aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2019-08-20 20:50:12 +0000
committerDimitry Andric <dim@FreeBSD.org>2019-08-20 20:50:12 +0000
commite6d1592492a3a379186bfb02bd0f4eda0669c0d5 (patch)
tree599ab169a01f1c86eda9adc774edaedde2f2db5b
parent1a56a5ead7a2e84bee8240f5f6b033b5f1707154 (diff)
downloadsrc-e6d1592492a3a379186bfb02bd0f4eda0669c0d5.tar.gz
src-e6d1592492a3a379186bfb02bd0f4eda0669c0d5.zip
Vendor import of stripped llvm trunk r366426 (just before the release_90vendor/llvm/llvm-trunk-r366426
Notes
Notes: svn path=/vendor/llvm/dist/; revision=351278 svn path=/vendor/llvm/llvm-trunk-r366426/; revision=351279; tag=vendor/llvm/llvm-trunk-r366426
-rw-r--r--LICENSE.TXT261
-rw-r--r--include/llvm-c/Analysis.h8
-rw-r--r--include/llvm-c/BitReader.h8
-rw-r--r--include/llvm-c/BitWriter.h8
-rw-r--r--include/llvm-c/Comdat.h8
-rw-r--r--include/llvm-c/Core.h223
-rw-r--r--include/llvm-c/DataTypes.h8
-rw-r--r--include/llvm-c/DebugInfo.h135
-rw-r--r--include/llvm-c/Disassembler.h8
-rw-r--r--include/llvm-c/DisassemblerTypes.h8
-rw-r--r--include/llvm-c/Error.h10
-rw-r--r--include/llvm-c/ErrorHandling.h8
-rw-r--r--include/llvm-c/ExecutionEngine.h8
-rw-r--r--include/llvm-c/IRReader.h8
-rw-r--r--include/llvm-c/Initialization.h8
-rw-r--r--include/llvm-c/LinkTimeOptimizer.h7
-rw-r--r--include/llvm-c/Linker.h8
-rw-r--r--include/llvm-c/Object.h163
-rw-r--r--include/llvm-c/OptRemarks.h204
-rw-r--r--include/llvm-c/OrcBindings.h8
-rw-r--r--include/llvm-c/Remarks.h329
-rw-r--r--include/llvm-c/Support.h8
-rw-r--r--include/llvm-c/Target.h12
-rw-r--r--include/llvm-c/TargetMachine.h8
-rw-r--r--include/llvm-c/Transforms/AggressiveInstCombine.h8
-rw-r--r--include/llvm-c/Transforms/Coroutines.h8
-rw-r--r--include/llvm-c/Transforms/IPO.h8
-rw-r--r--include/llvm-c/Transforms/InstCombine.h8
-rw-r--r--include/llvm-c/Transforms/PassManagerBuilder.h8
-rw-r--r--include/llvm-c/Transforms/Scalar.h8
-rw-r--r--include/llvm-c/Transforms/Utils.h11
-rw-r--r--include/llvm-c/Transforms/Vectorize.h8
-rw-r--r--include/llvm-c/Types.h13
-rw-r--r--include/llvm-c/lto.h52
-rw-r--r--include/llvm/ADT/APFloat.h18
-rw-r--r--include/llvm/ADT/APInt.h18
-rw-r--r--include/llvm/ADT/APSInt.h25
-rw-r--r--include/llvm/ADT/AllocatorList.h7
-rw-r--r--include/llvm/ADT/Any.h7
-rw-r--r--include/llvm/ADT/ArrayRef.h15
-rw-r--r--include/llvm/ADT/BitVector.h7
-rw-r--r--include/llvm/ADT/BitmaskEnum.h7
-rw-r--r--include/llvm/ADT/BreadthFirstIterator.h9
-rw-r--r--include/llvm/ADT/CachedHashString.h7
-rw-r--r--include/llvm/ADT/DAGDeltaAlgorithm.h7
-rw-r--r--include/llvm/ADT/DeltaAlgorithm.h7
-rw-r--r--include/llvm/ADT/DenseMap.h15
-rw-r--r--include/llvm/ADT/DenseMapInfo.h23
-rw-r--r--include/llvm/ADT/DenseSet.h9
-rw-r--r--include/llvm/ADT/DepthFirstIterator.h7
-rw-r--r--include/llvm/ADT/EpochTracker.h7
-rw-r--r--include/llvm/ADT/EquivalenceClasses.h7
-rw-r--r--include/llvm/ADT/FoldingSet.h7
-rw-r--r--include/llvm/ADT/FunctionExtras.h7
-rw-r--r--include/llvm/ADT/GraphTraits.h7
-rw-r--r--include/llvm/ADT/Hashing.h9
-rw-r--r--include/llvm/ADT/ImmutableList.h11
-rw-r--r--include/llvm/ADT/ImmutableMap.h7
-rw-r--r--include/llvm/ADT/ImmutableSet.h7
-rw-r--r--include/llvm/ADT/IndexedMap.h7
-rw-r--r--include/llvm/ADT/IntEqClasses.h7
-rw-r--r--include/llvm/ADT/IntervalMap.h7
-rw-r--r--include/llvm/ADT/IntrusiveRefCntPtr.h7
-rw-r--r--include/llvm/ADT/MapVector.h7
-rw-r--r--include/llvm/ADT/None.h7
-rw-r--r--include/llvm/ADT/Optional.h255
-rw-r--r--include/llvm/ADT/PackedVector.h7
-rw-r--r--include/llvm/ADT/PointerEmbeddedInt.h7
-rw-r--r--include/llvm/ADT/PointerIntPair.h27
-rw-r--r--include/llvm/ADT/PointerSumType.h7
-rw-r--r--include/llvm/ADT/PointerUnion.h482
-rw-r--r--include/llvm/ADT/PostOrderIterator.h7
-rw-r--r--include/llvm/ADT/PriorityQueue.h7
-rw-r--r--include/llvm/ADT/PriorityWorklist.h7
-rw-r--r--include/llvm/ADT/SCCIterator.h7
-rw-r--r--include/llvm/ADT/STLExtras.h108
-rw-r--r--include/llvm/ADT/ScopeExit.h7
-rw-r--r--include/llvm/ADT/ScopedHashTable.h7
-rw-r--r--include/llvm/ADT/Sequence.h7
-rw-r--r--include/llvm/ADT/SetOperations.h7
-rw-r--r--include/llvm/ADT/SetVector.h7
-rw-r--r--include/llvm/ADT/SmallBitVector.h7
-rw-r--r--include/llvm/ADT/SmallPtrSet.h7
-rw-r--r--include/llvm/ADT/SmallSet.h7
-rw-r--r--include/llvm/ADT/SmallString.h7
-rw-r--r--include/llvm/ADT/SmallVector.h48
-rw-r--r--include/llvm/ADT/SparseBitVector.h7
-rw-r--r--include/llvm/ADT/SparseMultiSet.h7
-rw-r--r--include/llvm/ADT/SparseSet.h7
-rw-r--r--include/llvm/ADT/Statistic.h7
-rw-r--r--include/llvm/ADT/StringExtras.h7
-rw-r--r--include/llvm/ADT/StringMap.h12
-rw-r--r--include/llvm/ADT/StringRef.h43
-rw-r--r--include/llvm/ADT/StringSet.h14
-rw-r--r--include/llvm/ADT/StringSwitch.h29
-rw-r--r--include/llvm/ADT/TinyPtrVector.h7
-rw-r--r--include/llvm/ADT/Triple.h85
-rw-r--r--include/llvm/ADT/Twine.h10
-rw-r--r--include/llvm/ADT/UniqueVector.h7
-rw-r--r--include/llvm/ADT/VariadicFunction.h9
-rw-r--r--include/llvm/ADT/bit.h17
-rw-r--r--include/llvm/ADT/edit_distance.h7
-rw-r--r--include/llvm/ADT/fallible_iterator.h243
-rw-r--r--include/llvm/ADT/ilist.h16
-rw-r--r--include/llvm/ADT/ilist_base.h7
-rw-r--r--include/llvm/ADT/ilist_iterator.h7
-rw-r--r--include/llvm/ADT/ilist_node.h7
-rw-r--r--include/llvm/ADT/ilist_node_base.h7
-rw-r--r--include/llvm/ADT/ilist_node_options.h7
-rw-r--r--include/llvm/ADT/iterator.h7
-rw-r--r--include/llvm/ADT/iterator_range.h7
-rw-r--r--include/llvm/ADT/simple_ilist.h7
-rw-r--r--include/llvm/Analysis/AliasAnalysis.h239
-rw-r--r--include/llvm/Analysis/AliasAnalysisEvaluator.h7
-rw-r--r--include/llvm/Analysis/AliasSetTracker.h28
-rw-r--r--include/llvm/Analysis/AssumptionCache.h15
-rw-r--r--include/llvm/Analysis/BasicAliasAnalysis.h34
-rw-r--r--include/llvm/Analysis/BlockFrequencyInfo.h10
-rw-r--r--include/llvm/Analysis/BlockFrequencyInfoImpl.h33
-rw-r--r--include/llvm/Analysis/BranchProbabilityInfo.h7
-rw-r--r--include/llvm/Analysis/CFG.h32
-rw-r--r--include/llvm/Analysis/CFGPrinter.h7
-rw-r--r--include/llvm/Analysis/CFLAliasAnalysisUtils.h7
-rw-r--r--include/llvm/Analysis/CFLAndersAliasAnalysis.h10
-rw-r--r--include/llvm/Analysis/CFLSteensAliasAnalysis.h14
-rw-r--r--include/llvm/Analysis/CGSCCPassManager.h396
-rw-r--r--include/llvm/Analysis/CallGraph.h24
-rw-r--r--include/llvm/Analysis/CallGraphSCCPass.h7
-rw-r--r--include/llvm/Analysis/CallPrinter.h7
-rw-r--r--include/llvm/Analysis/CaptureTracking.h7
-rw-r--r--include/llvm/Analysis/CmpInstAnalysis.h7
-rw-r--r--include/llvm/Analysis/CodeMetrics.h16
-rw-r--r--include/llvm/Analysis/ConstantFolding.h22
-rw-r--r--include/llvm/Analysis/DOTGraphTraitsPass.h7
-rw-r--r--include/llvm/Analysis/DemandedBits.h7
-rw-r--r--include/llvm/Analysis/DependenceAnalysis.h11
-rw-r--r--include/llvm/Analysis/DivergenceAnalysis.h7
-rw-r--r--include/llvm/Analysis/DomPrinter.h7
-rw-r--r--include/llvm/Analysis/DomTreeUpdater.h (renamed from include/llvm/IR/DomTreeUpdater.h)200
-rw-r--r--include/llvm/Analysis/DominanceFrontier.h7
-rw-r--r--include/llvm/Analysis/DominanceFrontierImpl.h7
-rw-r--r--include/llvm/Analysis/EHPersonalities.h7
-rw-r--r--include/llvm/Analysis/GlobalsModRef.h15
-rw-r--r--include/llvm/Analysis/GuardUtils.h30
-rw-r--r--include/llvm/Analysis/IVDescriptors.h28
-rw-r--r--include/llvm/Analysis/IVUsers.h7
-rw-r--r--include/llvm/Analysis/IndirectCallPromotionAnalysis.h7
-rw-r--r--include/llvm/Analysis/IndirectCallVisitor.h7
-rw-r--r--include/llvm/Analysis/InlineCost.h21
-rw-r--r--include/llvm/Analysis/InstructionPrecedenceTracking.h9
-rw-r--r--include/llvm/Analysis/InstructionSimplify.h33
-rw-r--r--include/llvm/Analysis/Interval.h7
-rw-r--r--include/llvm/Analysis/IntervalIterator.h7
-rw-r--r--include/llvm/Analysis/IntervalPartition.h7
-rw-r--r--include/llvm/Analysis/IteratedDominanceFrontier.h154
-rw-r--r--include/llvm/Analysis/LazyBlockFrequencyInfo.h7
-rw-r--r--include/llvm/Analysis/LazyBranchProbabilityInfo.h7
-rw-r--r--include/llvm/Analysis/LazyCallGraph.h32
-rw-r--r--include/llvm/Analysis/LazyValueInfo.h7
-rw-r--r--include/llvm/Analysis/LegacyDivergenceAnalysis.h7
-rw-r--r--include/llvm/Analysis/Lint.h7
-rw-r--r--include/llvm/Analysis/Loads.h29
-rw-r--r--include/llvm/Analysis/LoopAccessAnalysis.h13
-rw-r--r--include/llvm/Analysis/LoopAnalysisManager.h10
-rw-r--r--include/llvm/Analysis/LoopInfo.h249
-rw-r--r--include/llvm/Analysis/LoopInfoImpl.h85
-rw-r--r--include/llvm/Analysis/LoopIterator.h7
-rw-r--r--include/llvm/Analysis/LoopPass.h7
-rw-r--r--include/llvm/Analysis/LoopUnrollAnalyzer.h7
-rw-r--r--include/llvm/Analysis/MemoryBuiltins.h50
-rw-r--r--include/llvm/Analysis/MemoryDependenceAnalysis.h26
-rw-r--r--include/llvm/Analysis/MemoryLocation.h7
-rw-r--r--include/llvm/Analysis/MemorySSA.h49
-rw-r--r--include/llvm/Analysis/MemorySSAUpdater.h42
-rw-r--r--include/llvm/Analysis/ModuleSummaryAnalysis.h7
-rw-r--r--include/llvm/Analysis/MustExecute.h7
-rw-r--r--include/llvm/Analysis/ObjCARCAliasAnalysis.h16
-rw-r--r--include/llvm/Analysis/ObjCARCAnalysisUtils.h7
-rw-r--r--include/llvm/Analysis/ObjCARCInstKind.h11
-rw-r--r--include/llvm/Analysis/OptimizationRemarkEmitter.h11
-rw-r--r--include/llvm/Analysis/OrderedBasicBlock.h15
-rw-r--r--include/llvm/Analysis/OrderedInstructions.h7
-rw-r--r--include/llvm/Analysis/PHITransAddr.h7
-rw-r--r--include/llvm/Analysis/Passes.h7
-rw-r--r--include/llvm/Analysis/PhiValues.h7
-rw-r--r--include/llvm/Analysis/PostDominators.h7
-rw-r--r--include/llvm/Analysis/ProfileSummaryInfo.h16
-rw-r--r--include/llvm/Analysis/PtrUseVisitor.h11
-rw-r--r--include/llvm/Analysis/RegionInfo.h7
-rw-r--r--include/llvm/Analysis/RegionInfoImpl.h7
-rw-r--r--include/llvm/Analysis/RegionIterator.h7
-rw-r--r--include/llvm/Analysis/RegionPass.h7
-rw-r--r--include/llvm/Analysis/RegionPrinter.h7
-rw-r--r--include/llvm/Analysis/ScalarEvolution.h72
-rw-r--r--include/llvm/Analysis/ScalarEvolutionAliasAnalysis.h10
-rw-r--r--include/llvm/Analysis/ScalarEvolutionExpander.h17
-rw-r--r--include/llvm/Analysis/ScalarEvolutionExpressions.h156
-rw-r--r--include/llvm/Analysis/ScalarEvolutionNormalization.h7
-rw-r--r--include/llvm/Analysis/ScopedNoAliasAA.h16
-rw-r--r--include/llvm/Analysis/SparsePropagation.h15
-rw-r--r--include/llvm/Analysis/StackSafetyAnalysis.h7
-rw-r--r--include/llvm/Analysis/SyncDependenceAnalysis.h7
-rw-r--r--include/llvm/Analysis/SyntheticCountsUtils.h7
-rw-r--r--include/llvm/Analysis/TargetFolder.h11
-rw-r--r--include/llvm/Analysis/TargetLibraryInfo.def65
-rw-r--r--include/llvm/Analysis/TargetLibraryInfo.h14
-rw-r--r--include/llvm/Analysis/TargetTransformInfo.h235
-rw-r--r--include/llvm/Analysis/TargetTransformInfoImpl.h191
-rw-r--r--include/llvm/Analysis/Trace.h7
-rw-r--r--include/llvm/Analysis/TypeBasedAliasAnalysis.h19
-rw-r--r--include/llvm/Analysis/TypeMetadataUtils.h7
-rw-r--r--include/llvm/Analysis/Utils/Local.h7
-rw-r--r--include/llvm/Analysis/ValueLattice.h7
-rw-r--r--include/llvm/Analysis/ValueLatticeUtils.h7
-rw-r--r--include/llvm/Analysis/ValueTracking.h64
-rw-r--r--include/llvm/Analysis/VecFuncs.def250
-rw-r--r--include/llvm/Analysis/VectorUtils.h81
-rw-r--r--include/llvm/AsmParser/Parser.h7
-rw-r--r--include/llvm/AsmParser/SlotMapping.h7
-rw-r--r--include/llvm/BinaryFormat/AMDGPUMetadataVerifier.h37
-rw-r--r--include/llvm/BinaryFormat/COFF.h14
-rw-r--r--include/llvm/BinaryFormat/Dwarf.def16
-rw-r--r--include/llvm/BinaryFormat/Dwarf.h11
-rw-r--r--include/llvm/BinaryFormat/DynamicTags.def28
-rw-r--r--include/llvm/BinaryFormat/ELF.h88
-rw-r--r--include/llvm/BinaryFormat/ELFRelocs/ARM.def3
-rw-r--r--include/llvm/BinaryFormat/ELFRelocs/PowerPC.def33
-rw-r--r--include/llvm/BinaryFormat/MachO.def7
-rw-r--r--include/llvm/BinaryFormat/MachO.h25
-rw-r--r--include/llvm/BinaryFormat/Magic.h10
-rw-r--r--include/llvm/BinaryFormat/Minidump.h203
-rw-r--r--include/llvm/BinaryFormat/MinidumpConstants.def107
-rw-r--r--include/llvm/BinaryFormat/MsgPack.def7
-rw-r--r--include/llvm/BinaryFormat/MsgPack.h7
-rw-r--r--include/llvm/BinaryFormat/MsgPackDocument.h385
-rw-r--r--include/llvm/BinaryFormat/MsgPackReader.h7
-rw-r--r--include/llvm/BinaryFormat/MsgPackTypes.h372
-rw-r--r--include/llvm/BinaryFormat/MsgPackWriter.h7
-rw-r--r--include/llvm/BinaryFormat/Wasm.h65
-rw-r--r--include/llvm/BinaryFormat/WasmRelocs.def24
-rw-r--r--include/llvm/BinaryFormat/XCOFF.h145
-rw-r--r--include/llvm/Bitcode/BitcodeAnalyzer.h103
-rw-r--r--include/llvm/Bitcode/BitcodeReader.h9
-rw-r--r--include/llvm/Bitcode/BitcodeWriter.h7
-rw-r--r--include/llvm/Bitcode/BitcodeWriterPass.h7
-rw-r--r--include/llvm/Bitcode/LLVMBitCodes.h42
-rw-r--r--include/llvm/Bitstream/BitCodes.h (renamed from include/llvm/Bitcode/BitCodes.h)33
-rw-r--r--include/llvm/Bitstream/BitstreamReader.h (renamed from include/llvm/Bitcode/BitstreamReader.h)175
-rw-r--r--include/llvm/Bitstream/BitstreamWriter.h (renamed from include/llvm/Bitcode/BitstreamWriter.h)17
-rw-r--r--include/llvm/CodeGen/AccelTable.h31
-rw-r--r--include/llvm/CodeGen/Analysis.h27
-rw-r--r--include/llvm/CodeGen/AsmPrinter.h61
-rw-r--r--include/llvm/CodeGen/AsmPrinterHandler.h7
-rw-r--r--include/llvm/CodeGen/AtomicExpandUtils.h7
-rw-r--r--include/llvm/CodeGen/BasicTTIImpl.h250
-rw-r--r--include/llvm/CodeGen/BuiltinGCs.h7
-rw-r--r--include/llvm/CodeGen/CSEConfigBase.h28
-rw-r--r--include/llvm/CodeGen/CalcSpillWeights.h7
-rw-r--r--include/llvm/CodeGen/CallingConvLower.h11
-rw-r--r--include/llvm/CodeGen/CommandFlags.inc13
-rw-r--r--include/llvm/CodeGen/CostTable.h7
-rw-r--r--include/llvm/CodeGen/DAGCombine.h7
-rw-r--r--include/llvm/CodeGen/DFAPacketizer.h7
-rw-r--r--include/llvm/CodeGen/DIE.h59
-rw-r--r--include/llvm/CodeGen/DIEValue.def8
-rw-r--r--include/llvm/CodeGen/DbgEntityHistoryCalculator.h93
-rw-r--r--include/llvm/CodeGen/DebugHandlerBase.h9
-rw-r--r--include/llvm/CodeGen/DwarfStringPoolEntry.h7
-rw-r--r--include/llvm/CodeGen/EdgeBundles.h7
-rw-r--r--include/llvm/CodeGen/ExecutionDomainFix.h7
-rw-r--r--include/llvm/CodeGen/ExpandReductions.h7
-rw-r--r--include/llvm/CodeGen/FastISel.h9
-rw-r--r--include/llvm/CodeGen/FaultMaps.h7
-rw-r--r--include/llvm/CodeGen/FunctionLoweringInfo.h65
-rw-r--r--include/llvm/CodeGen/GCMetadata.h7
-rw-r--r--include/llvm/CodeGen/GCMetadataPrinter.h7
-rw-r--r--include/llvm/CodeGen/GCStrategy.h7
-rw-r--r--include/llvm/CodeGen/GlobalISel/CSEInfo.h41
-rw-r--r--include/llvm/CodeGen/GlobalISel/CSEMIRBuilder.h7
-rw-r--r--include/llvm/CodeGen/GlobalISel/CallLowering.h137
-rw-r--r--include/llvm/CodeGen/GlobalISel/Combiner.h9
-rw-r--r--include/llvm/CodeGen/GlobalISel/CombinerHelper.h27
-rw-r--r--include/llvm/CodeGen/GlobalISel/CombinerInfo.h7
-rw-r--r--include/llvm/CodeGen/GlobalISel/ConstantFoldingMIRBuilder.h7
-rw-r--r--include/llvm/CodeGen/GlobalISel/GISelChangeObserver.h18
-rw-r--r--include/llvm/CodeGen/GlobalISel/GISelWorkList.h50
-rw-r--r--include/llvm/CodeGen/GlobalISel/IRTranslator.h121
-rw-r--r--include/llvm/CodeGen/GlobalISel/InstructionSelect.h7
-rw-r--r--include/llvm/CodeGen/GlobalISel/InstructionSelector.h17
-rw-r--r--include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h60
-rw-r--r--include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h278
-rw-r--r--include/llvm/CodeGen/GlobalISel/Legalizer.h14
-rw-r--r--include/llvm/CodeGen/GlobalISel/LegalizerHelper.h120
-rw-r--r--include/llvm/CodeGen/GlobalISel/LegalizerInfo.h211
-rw-r--r--include/llvm/CodeGen/GlobalISel/Localizer.h22
-rw-r--r--include/llvm/CodeGen/GlobalISel/MIPatternMatch.h14
-rw-r--r--include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h361
-rw-r--r--include/llvm/CodeGen/GlobalISel/RegBankSelect.h16
-rw-r--r--include/llvm/CodeGen/GlobalISel/RegisterBank.h7
-rw-r--r--include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h53
-rw-r--r--include/llvm/CodeGen/GlobalISel/Types.h7
-rw-r--r--include/llvm/CodeGen/GlobalISel/Utils.h74
-rw-r--r--include/llvm/CodeGen/ISDOpcodes.h54
-rw-r--r--include/llvm/CodeGen/IntrinsicLowering.h11
-rw-r--r--include/llvm/CodeGen/LatencyPriorityQueue.h7
-rw-r--r--include/llvm/CodeGen/LazyMachineBlockFrequencyInfo.h7
-rw-r--r--include/llvm/CodeGen/LexicalScopes.h7
-rw-r--r--include/llvm/CodeGen/LinkAllAsmWriterComponents.h7
-rw-r--r--include/llvm/CodeGen/LinkAllCodegenComponents.h7
-rw-r--r--include/llvm/CodeGen/LiveInterval.h54
-rw-r--r--include/llvm/CodeGen/LiveIntervalUnion.h7
-rw-r--r--include/llvm/CodeGen/LiveIntervals.h16
-rw-r--r--include/llvm/CodeGen/LivePhysRegs.h7
-rw-r--r--include/llvm/CodeGen/LiveRangeEdit.h7
-rw-r--r--include/llvm/CodeGen/LiveRegMatrix.h7
-rw-r--r--include/llvm/CodeGen/LiveRegUnits.h7
-rw-r--r--include/llvm/CodeGen/LiveStacks.h7
-rw-r--r--include/llvm/CodeGen/LiveVariables.h7
-rw-r--r--include/llvm/CodeGen/LoopTraversal.h7
-rw-r--r--include/llvm/CodeGen/LowLevelType.h7
-rw-r--r--include/llvm/CodeGen/MIRParser/MIParser.h233
-rw-r--r--include/llvm/CodeGen/MIRParser/MIRParser.h7
-rw-r--r--include/llvm/CodeGen/MIRPrinter.h9
-rw-r--r--include/llvm/CodeGen/MIRYamlMapping.h108
-rw-r--r--include/llvm/CodeGen/MachORelocation.h7
-rw-r--r--include/llvm/CodeGen/MachineBasicBlock.h28
-rw-r--r--include/llvm/CodeGen/MachineBlockFrequencyInfo.h7
-rw-r--r--include/llvm/CodeGen/MachineBranchProbabilityInfo.h7
-rw-r--r--include/llvm/CodeGen/MachineCombinerPattern.h7
-rw-r--r--include/llvm/CodeGen/MachineConstantPool.h7
-rw-r--r--include/llvm/CodeGen/MachineDominanceFrontier.h7
-rw-r--r--include/llvm/CodeGen/MachineDominators.h7
-rw-r--r--include/llvm/CodeGen/MachineFrameInfo.h14
-rw-r--r--include/llvm/CodeGen/MachineFunction.h76
-rw-r--r--include/llvm/CodeGen/MachineFunctionPass.h7
-rw-r--r--include/llvm/CodeGen/MachineInstr.h81
-rw-r--r--include/llvm/CodeGen/MachineInstrBuilder.h15
-rw-r--r--include/llvm/CodeGen/MachineInstrBundle.h15
-rw-r--r--include/llvm/CodeGen/MachineInstrBundleIterator.h7
-rw-r--r--include/llvm/CodeGen/MachineJumpTableInfo.h7
-rw-r--r--include/llvm/CodeGen/MachineLoopInfo.h7
-rw-r--r--include/llvm/CodeGen/MachineMemOperand.h26
-rw-r--r--include/llvm/CodeGen/MachineModuleInfo.h32
-rw-r--r--include/llvm/CodeGen/MachineModuleInfoImpls.h7
-rw-r--r--include/llvm/CodeGen/MachineOperand.h21
-rw-r--r--include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h21
-rw-r--r--include/llvm/CodeGen/MachineOutliner.h13
-rw-r--r--include/llvm/CodeGen/MachinePassRegistry.h7
-rw-r--r--include/llvm/CodeGen/MachinePipeliner.h85
-rw-r--r--include/llvm/CodeGen/MachinePostDominators.h9
-rw-r--r--include/llvm/CodeGen/MachineRegionInfo.h7
-rw-r--r--include/llvm/CodeGen/MachineRegisterInfo.h20
-rw-r--r--include/llvm/CodeGen/MachineSSAUpdater.h7
-rw-r--r--include/llvm/CodeGen/MachineScheduler.h35
-rw-r--r--include/llvm/CodeGen/MachineTraceMetrics.h7
-rw-r--r--include/llvm/CodeGen/MacroFusion.h7
-rw-r--r--include/llvm/CodeGen/PBQP/CostAllocator.h7
-rw-r--r--include/llvm/CodeGen/PBQP/Graph.h7
-rw-r--r--include/llvm/CodeGen/PBQP/Math.h7
-rw-r--r--include/llvm/CodeGen/PBQP/ReductionRules.h7
-rw-r--r--include/llvm/CodeGen/PBQP/Solution.h7
-rw-r--r--include/llvm/CodeGen/PBQPRAConstraint.h9
-rw-r--r--include/llvm/CodeGen/ParallelCG.h7
-rw-r--r--include/llvm/CodeGen/Passes.h15
-rw-r--r--include/llvm/CodeGen/PreISelIntrinsicLowering.h7
-rw-r--r--include/llvm/CodeGen/PseudoSourceValue.h10
-rw-r--r--include/llvm/CodeGen/ReachingDefAnalysis.h9
-rw-r--r--include/llvm/CodeGen/RegAllocPBQP.h7
-rw-r--r--include/llvm/CodeGen/RegAllocRegistry.h35
-rw-r--r--include/llvm/CodeGen/Register.h60
-rw-r--r--include/llvm/CodeGen/RegisterClassInfo.h7
-rw-r--r--include/llvm/CodeGen/RegisterPressure.h11
-rw-r--r--include/llvm/CodeGen/RegisterScavenging.h24
-rw-r--r--include/llvm/CodeGen/RegisterUsageInfo.h7
-rw-r--r--include/llvm/CodeGen/ResourcePriorityQueue.h7
-rw-r--r--include/llvm/CodeGen/RuntimeLibcalls.h7
-rw-r--r--include/llvm/CodeGen/SDNodeProperties.td7
-rw-r--r--include/llvm/CodeGen/ScheduleDAG.h31
-rw-r--r--include/llvm/CodeGen/ScheduleDAGInstrs.h23
-rw-r--r--include/llvm/CodeGen/ScheduleDAGMutation.h7
-rw-r--r--include/llvm/CodeGen/ScheduleDFS.h9
-rw-r--r--include/llvm/CodeGen/ScheduleHazardRecognizer.h7
-rw-r--r--include/llvm/CodeGen/SchedulerRegistry.h7
-rw-r--r--include/llvm/CodeGen/ScoreboardHazardRecognizer.h7
-rw-r--r--include/llvm/CodeGen/SelectionDAG.h139
-rw-r--r--include/llvm/CodeGen/SelectionDAGAddressAnalysis.h46
-rw-r--r--include/llvm/CodeGen/SelectionDAGISel.h15
-rw-r--r--include/llvm/CodeGen/SelectionDAGNodes.h185
-rw-r--r--include/llvm/CodeGen/SelectionDAGTargetInfo.h15
-rw-r--r--include/llvm/CodeGen/SlotIndexes.h87
-rw-r--r--include/llvm/CodeGen/StackMaps.h7
-rw-r--r--include/llvm/CodeGen/StackProtector.h13
-rw-r--r--include/llvm/CodeGen/SwiftErrorValueTracking.h110
-rw-r--r--include/llvm/CodeGen/SwitchLoweringUtils.h297
-rw-r--r--include/llvm/CodeGen/TailDuplicator.h7
-rw-r--r--include/llvm/CodeGen/TargetCallingConv.h23
-rw-r--r--include/llvm/CodeGen/TargetFrameLowering.h37
-rw-r--r--include/llvm/CodeGen/TargetInstrInfo.h46
-rw-r--r--include/llvm/CodeGen/TargetLowering.h375
-rw-r--r--include/llvm/CodeGen/TargetLoweringObjectFileImpl.h7
-rw-r--r--include/llvm/CodeGen/TargetOpcodes.h7
-rw-r--r--include/llvm/CodeGen/TargetPassConfig.h37
-rw-r--r--include/llvm/CodeGen/TargetRegisterInfo.h14
-rw-r--r--include/llvm/CodeGen/TargetSchedule.h7
-rw-r--r--include/llvm/CodeGen/TargetSubtargetInfo.h33
-rw-r--r--include/llvm/CodeGen/UnreachableBlockElim.h7
-rw-r--r--include/llvm/CodeGen/ValueTypes.h7
-rw-r--r--include/llvm/CodeGen/ValueTypes.td200
-rw-r--r--include/llvm/CodeGen/VirtRegMap.h17
-rw-r--r--include/llvm/CodeGen/WasmEHFuncInfo.h29
-rw-r--r--include/llvm/CodeGen/WinEHFuncInfo.h7
-rw-r--r--include/llvm/DebugInfo/CodeView/AppendingTypeTableBuilder.h7
-rw-r--r--include/llvm/DebugInfo/CodeView/CVRecord.h38
-rw-r--r--include/llvm/DebugInfo/CodeView/CVSymbolVisitor.h7
-rw-r--r--include/llvm/DebugInfo/CodeView/CVTypeVisitor.h11
-rw-r--r--include/llvm/DebugInfo/CodeView/CodeView.h29
-rw-r--r--include/llvm/DebugInfo/CodeView/CodeViewError.h7
-rw-r--r--include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h140
-rw-r--r--include/llvm/DebugInfo/CodeView/CodeViewRegisters.def210
-rw-r--r--include/llvm/DebugInfo/CodeView/CodeViewSymbols.def9
-rw-r--r--include/llvm/DebugInfo/CodeView/CodeViewTypes.def7
-rw-r--r--include/llvm/DebugInfo/CodeView/ContinuationRecordBuilder.h9
-rw-r--r--include/llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h7
-rw-r--r--include/llvm/DebugInfo/CodeView/DebugCrossExSubsection.h7
-rw-r--r--include/llvm/DebugInfo/CodeView/DebugCrossImpSubsection.h9
-rw-r--r--include/llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h7
-rw-r--r--include/llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h14
-rw-r--r--include/llvm/DebugInfo/CodeView/DebugLinesSubsection.h7
-rw-r--r--include/llvm/DebugInfo/CodeView/DebugStringTableSubsection.h7
-rw-r--r--include/llvm/DebugInfo/CodeView/DebugSubsection.h7
-rw-r--r--include/llvm/DebugInfo/CodeView/DebugSubsectionRecord.h7
-rw-r--r--include/llvm/DebugInfo/CodeView/DebugSubsectionVisitor.h7
-rw-r--r--include/llvm/DebugInfo/CodeView/DebugSymbolRVASubsection.h7
-rw-r--r--include/llvm/DebugInfo/CodeView/DebugSymbolsSubsection.h7
-rw-r--r--include/llvm/DebugInfo/CodeView/DebugUnknownSubsection.h7
-rw-r--r--include/llvm/DebugInfo/CodeView/EnumTables.h9
-rw-r--r--include/llvm/DebugInfo/CodeView/Formatters.h7
-rw-r--r--include/llvm/DebugInfo/CodeView/FunctionId.h7
-rw-r--r--include/llvm/DebugInfo/CodeView/GUID.h7
-rw-r--r--include/llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h29
-rw-r--r--include/llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h7
-rw-r--r--include/llvm/DebugInfo/CodeView/Line.h7
-rw-r--r--include/llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h7
-rw-r--r--include/llvm/DebugInfo/CodeView/RecordName.h7
-rw-r--r--include/llvm/DebugInfo/CodeView/RecordSerialization.h10
-rw-r--r--include/llvm/DebugInfo/CodeView/SimpleTypeSerializer.h7
-rw-r--r--include/llvm/DebugInfo/CodeView/StringsAndChecksums.h7
-rw-r--r--include/llvm/DebugInfo/CodeView/SymbolDeserializer.h7
-rw-r--r--include/llvm/DebugInfo/CodeView/SymbolDumpDelegate.h7
-rw-r--r--include/llvm/DebugInfo/CodeView/SymbolDumper.h7
-rw-r--r--include/llvm/DebugInfo/CodeView/SymbolRecord.h66
-rw-r--r--include/llvm/DebugInfo/CodeView/SymbolRecordHelpers.h7
-rw-r--r--include/llvm/DebugInfo/CodeView/SymbolRecordMapping.h7
-rw-r--r--include/llvm/DebugInfo/CodeView/SymbolSerializer.h11
-rw-r--r--include/llvm/DebugInfo/CodeView/SymbolVisitorCallbackPipeline.h7
-rw-r--r--include/llvm/DebugInfo/CodeView/SymbolVisitorCallbacks.h7
-rw-r--r--include/llvm/DebugInfo/CodeView/SymbolVisitorDelegate.h7
-rw-r--r--include/llvm/DebugInfo/CodeView/TypeCollection.h7
-rw-r--r--include/llvm/DebugInfo/CodeView/TypeDeserializer.h17
-rw-r--r--include/llvm/DebugInfo/CodeView/TypeDumpVisitor.h7
-rw-r--r--include/llvm/DebugInfo/CodeView/TypeHashing.h38
-rw-r--r--include/llvm/DebugInfo/CodeView/TypeIndex.h7
-rw-r--r--include/llvm/DebugInfo/CodeView/TypeIndexDiscovery.h7
-rw-r--r--include/llvm/DebugInfo/CodeView/TypeRecord.h7
-rw-r--r--include/llvm/DebugInfo/CodeView/TypeRecordHelpers.h7
-rw-r--r--include/llvm/DebugInfo/CodeView/TypeRecordMapping.h9
-rw-r--r--include/llvm/DebugInfo/CodeView/TypeStreamMerger.h7
-rw-r--r--include/llvm/DebugInfo/CodeView/TypeSymbolEmitter.h7
-rw-r--r--include/llvm/DebugInfo/CodeView/TypeTableCollection.h7
-rw-r--r--include/llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h12
-rw-r--r--include/llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h7
-rw-r--r--include/llvm/DebugInfo/DIContext.h46
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h7
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h23
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFAddressRange.h13
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFAttribute.h21
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h7
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFContext.h25
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFDataExtractor.h7
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h7
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFDebugAddr.h7
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h7
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFDebugAranges.h11
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h7
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h7
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFDebugLine.h74
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h11
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h7
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFDebugPubTable.h7
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h9
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFDebugRnglists.h11
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFDie.h7
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFExpression.h26
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFFormValue.h54
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFGdbIndex.h7
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFListTable.h11
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFObject.h7
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFRelocMap.h14
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFSection.h12
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h7
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFUnit.h50
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFUnitIndex.h7
-rw-r--r--include/llvm/DebugInfo/DWARF/DWARFVerifier.h7
-rw-r--r--include/llvm/DebugInfo/GSYM/FileEntry.h68
-rw-r--r--include/llvm/DebugInfo/GSYM/FunctionInfo.h107
-rw-r--r--include/llvm/DebugInfo/GSYM/InlineInfo.h78
-rw-r--r--include/llvm/DebugInfo/GSYM/LineEntry.h48
-rw-r--r--include/llvm/DebugInfo/GSYM/Range.h87
-rw-r--r--include/llvm/DebugInfo/GSYM/StringTable.h54
-rw-r--r--include/llvm/DebugInfo/MSF/IMSFFile.h7
-rw-r--r--include/llvm/DebugInfo/MSF/MSFBuilder.h7
-rw-r--r--include/llvm/DebugInfo/MSF/MSFCommon.h7
-rw-r--r--include/llvm/DebugInfo/MSF/MSFError.h7
-rw-r--r--include/llvm/DebugInfo/MSF/MappedBlockStream.h7
-rw-r--r--include/llvm/DebugInfo/PDB/ConcreteSymbolEnumerator.h7
-rw-r--r--include/llvm/DebugInfo/PDB/DIA/DIADataStream.h7
-rw-r--r--include/llvm/DebugInfo/PDB/DIA/DIAEnumDebugStreams.h7
-rw-r--r--include/llvm/DebugInfo/PDB/DIA/DIAEnumFrameData.h7
-rw-r--r--include/llvm/DebugInfo/PDB/DIA/DIAEnumInjectedSources.h7
-rw-r--r--include/llvm/DebugInfo/PDB/DIA/DIAEnumLineNumbers.h7
-rw-r--r--include/llvm/DebugInfo/PDB/DIA/DIAEnumSectionContribs.h7
-rw-r--r--include/llvm/DebugInfo/PDB/DIA/DIAEnumSourceFiles.h7
-rw-r--r--include/llvm/DebugInfo/PDB/DIA/DIAEnumSymbols.h7
-rw-r--r--include/llvm/DebugInfo/PDB/DIA/DIAEnumTables.h7
-rw-r--r--include/llvm/DebugInfo/PDB/DIA/DIAError.h7
-rw-r--r--include/llvm/DebugInfo/PDB/DIA/DIAFrameData.h7
-rw-r--r--include/llvm/DebugInfo/PDB/DIA/DIAInjectedSource.h9
-rw-r--r--include/llvm/DebugInfo/PDB/DIA/DIALineNumber.h7
-rw-r--r--include/llvm/DebugInfo/PDB/DIA/DIARawSymbol.h7
-rw-r--r--include/llvm/DebugInfo/PDB/DIA/DIASectionContrib.h7
-rw-r--r--include/llvm/DebugInfo/PDB/DIA/DIASession.h7
-rw-r--r--include/llvm/DebugInfo/PDB/DIA/DIASourceFile.h7
-rw-r--r--include/llvm/DebugInfo/PDB/DIA/DIASupport.h7
-rw-r--r--include/llvm/DebugInfo/PDB/DIA/DIATable.h7
-rw-r--r--include/llvm/DebugInfo/PDB/DIA/DIAUtils.h7
-rw-r--r--include/llvm/DebugInfo/PDB/GenericError.h9
-rw-r--r--include/llvm/DebugInfo/PDB/IPDBDataStream.h7
-rw-r--r--include/llvm/DebugInfo/PDB/IPDBEnumChildren.h7
-rw-r--r--include/llvm/DebugInfo/PDB/IPDBFrameData.h7
-rw-r--r--include/llvm/DebugInfo/PDB/IPDBInjectedSource.h13
-rw-r--r--include/llvm/DebugInfo/PDB/IPDBLineNumber.h7
-rw-r--r--include/llvm/DebugInfo/PDB/IPDBRawSymbol.h7
-rw-r--r--include/llvm/DebugInfo/PDB/IPDBSectionContrib.h7
-rw-r--r--include/llvm/DebugInfo/PDB/IPDBSession.h7
-rw-r--r--include/llvm/DebugInfo/PDB/IPDBSourceFile.h7
-rw-r--r--include/llvm/DebugInfo/PDB/IPDBTable.h7
-rw-r--r--include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h7
-rw-r--r--include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h7
-rw-r--r--include/llvm/DebugInfo/PDB/Native/DbiModuleList.h7
-rw-r--r--include/llvm/DebugInfo/PDB/Native/DbiStream.h26
-rw-r--r--include/llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h7
-rw-r--r--include/llvm/DebugInfo/PDB/Native/EnumTables.h7
-rw-r--r--include/llvm/DebugInfo/PDB/Native/Formatters.h7
-rw-r--r--include/llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h7
-rw-r--r--include/llvm/DebugInfo/PDB/Native/GlobalsStream.h7
-rw-r--r--include/llvm/DebugInfo/PDB/Native/Hash.h7
-rw-r--r--include/llvm/DebugInfo/PDB/Native/HashTable.h92
-rw-r--r--include/llvm/DebugInfo/PDB/Native/ISectionContribVisitor.h7
-rw-r--r--include/llvm/DebugInfo/PDB/Native/InfoStream.h7
-rw-r--r--include/llvm/DebugInfo/PDB/Native/InfoStreamBuilder.h7
-rw-r--r--include/llvm/DebugInfo/PDB/Native/InjectedSourceStream.h44
-rw-r--r--include/llvm/DebugInfo/PDB/Native/ModuleDebugStream.h9
-rw-r--r--include/llvm/DebugInfo/PDB/Native/NamedStreamMap.h9
-rw-r--r--include/llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h7
-rw-r--r--include/llvm/DebugInfo/PDB/Native/NativeEnumGlobals.h7
-rw-r--r--include/llvm/DebugInfo/PDB/Native/NativeEnumInjectedSources.h43
-rw-r--r--include/llvm/DebugInfo/PDB/Native/NativeEnumModules.h7
-rw-r--r--include/llvm/DebugInfo/PDB/Native/NativeEnumTypes.h7
-rw-r--r--include/llvm/DebugInfo/PDB/Native/NativeExeSymbol.h7
-rw-r--r--include/llvm/DebugInfo/PDB/Native/NativeRawSymbol.h7
-rw-r--r--include/llvm/DebugInfo/PDB/Native/NativeSession.h7
-rw-r--r--include/llvm/DebugInfo/PDB/Native/NativeSymbolEnumerator.h7
-rw-r--r--include/llvm/DebugInfo/PDB/Native/NativeTypeArray.h7
-rw-r--r--include/llvm/DebugInfo/PDB/Native/NativeTypeBuiltin.h7
-rw-r--r--include/llvm/DebugInfo/PDB/Native/NativeTypeEnum.h7
-rw-r--r--include/llvm/DebugInfo/PDB/Native/NativeTypeFunctionSig.h7
-rw-r--r--include/llvm/DebugInfo/PDB/Native/NativeTypePointer.h7
-rw-r--r--include/llvm/DebugInfo/PDB/Native/NativeTypeTypedef.h7
-rw-r--r--include/llvm/DebugInfo/PDB/Native/NativeTypeUDT.h7
-rw-r--r--include/llvm/DebugInfo/PDB/Native/NativeTypeVTShape.h7
-rw-r--r--include/llvm/DebugInfo/PDB/Native/PDBFile.h23
-rw-r--r--include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h9
-rw-r--r--include/llvm/DebugInfo/PDB/Native/PDBStringTable.h7
-rw-r--r--include/llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h7
-rw-r--r--include/llvm/DebugInfo/PDB/Native/PublicsStream.h7
-rw-r--r--include/llvm/DebugInfo/PDB/Native/RawConstants.h7
-rw-r--r--include/llvm/DebugInfo/PDB/Native/RawError.h7
-rw-r--r--include/llvm/DebugInfo/PDB/Native/RawTypes.h18
-rw-r--r--include/llvm/DebugInfo/PDB/Native/SymbolCache.h7
-rw-r--r--include/llvm/DebugInfo/PDB/Native/SymbolStream.h7
-rw-r--r--include/llvm/DebugInfo/PDB/Native/TpiHashing.h7
-rw-r--r--include/llvm/DebugInfo/PDB/Native/TpiStream.h7
-rw-r--r--include/llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h7
-rw-r--r--include/llvm/DebugInfo/PDB/PDB.h7
-rw-r--r--include/llvm/DebugInfo/PDB/PDBContext.h16
-rw-r--r--include/llvm/DebugInfo/PDB/PDBExtras.h13
-rw-r--r--include/llvm/DebugInfo/PDB/PDBSymDumper.h7
-rw-r--r--include/llvm/DebugInfo/PDB/PDBSymbol.h7
-rw-r--r--include/llvm/DebugInfo/PDB/PDBSymbolAnnotation.h7
-rw-r--r--include/llvm/DebugInfo/PDB/PDBSymbolBlock.h7
-rw-r--r--include/llvm/DebugInfo/PDB/PDBSymbolCompiland.h7
-rw-r--r--include/llvm/DebugInfo/PDB/PDBSymbolCompilandDetails.h7
-rw-r--r--include/llvm/DebugInfo/PDB/PDBSymbolCompilandEnv.h7
-rw-r--r--include/llvm/DebugInfo/PDB/PDBSymbolCustom.h7
-rw-r--r--include/llvm/DebugInfo/PDB/PDBSymbolData.h7
-rw-r--r--include/llvm/DebugInfo/PDB/PDBSymbolExe.h7
-rw-r--r--include/llvm/DebugInfo/PDB/PDBSymbolFunc.h7
-rw-r--r--include/llvm/DebugInfo/PDB/PDBSymbolFuncDebugEnd.h7
-rw-r--r--include/llvm/DebugInfo/PDB/PDBSymbolFuncDebugStart.h7
-rw-r--r--include/llvm/DebugInfo/PDB/PDBSymbolLabel.h7
-rw-r--r--include/llvm/DebugInfo/PDB/PDBSymbolPublicSymbol.h7
-rw-r--r--include/llvm/DebugInfo/PDB/PDBSymbolThunk.h7
-rw-r--r--include/llvm/DebugInfo/PDB/PDBSymbolTypeArray.h7
-rw-r--r--include/llvm/DebugInfo/PDB/PDBSymbolTypeBaseClass.h7
-rw-r--r--include/llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h7
-rw-r--r--include/llvm/DebugInfo/PDB/PDBSymbolTypeCustom.h7
-rw-r--r--include/llvm/DebugInfo/PDB/PDBSymbolTypeDimension.h7
-rw-r--r--include/llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h7
-rw-r--r--include/llvm/DebugInfo/PDB/PDBSymbolTypeFriend.h7
-rw-r--r--include/llvm/DebugInfo/PDB/PDBSymbolTypeFunctionArg.h7
-rw-r--r--include/llvm/DebugInfo/PDB/PDBSymbolTypeFunctionSig.h7
-rw-r--r--include/llvm/DebugInfo/PDB/PDBSymbolTypeManaged.h7
-rw-r--r--include/llvm/DebugInfo/PDB/PDBSymbolTypePointer.h7
-rw-r--r--include/llvm/DebugInfo/PDB/PDBSymbolTypeTypedef.h7
-rw-r--r--include/llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h7
-rw-r--r--include/llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h7
-rw-r--r--include/llvm/DebugInfo/PDB/PDBSymbolTypeVTableShape.h7
-rw-r--r--include/llvm/DebugInfo/PDB/PDBSymbolUnknown.h7
-rw-r--r--include/llvm/DebugInfo/PDB/PDBSymbolUsingNamespace.h7
-rw-r--r--include/llvm/DebugInfo/PDB/PDBTypes.h76
-rw-r--r--include/llvm/DebugInfo/PDB/UDTLayout.h7
-rw-r--r--include/llvm/DebugInfo/Symbolize/DIPrinter.h20
-rw-r--r--include/llvm/DebugInfo/Symbolize/SymbolizableModule.h20
-rw-r--r--include/llvm/DebugInfo/Symbolize/Symbolize.h56
-rw-r--r--include/llvm/Demangle/Compiler.h93
-rw-r--r--include/llvm/Demangle/Demangle.h15
-rw-r--r--include/llvm/Demangle/DemangleConfig.h99
-rw-r--r--include/llvm/Demangle/ItaniumDemangle.h108
-rw-r--r--include/llvm/Demangle/MicrosoftDemangle.h53
-rw-r--r--include/llvm/Demangle/MicrosoftDemangleNodes.h33
-rw-r--r--include/llvm/Demangle/README.txt52
-rw-r--r--include/llvm/Demangle/StringView.h21
-rw-r--r--include/llvm/Demangle/Utility.h18
-rw-r--r--include/llvm/ExecutionEngine/ExecutionEngine.h20
-rw-r--r--include/llvm/ExecutionEngine/GenericValue.h7
-rw-r--r--include/llvm/ExecutionEngine/Interpreter.h7
-rw-r--r--include/llvm/ExecutionEngine/JITEventListener.h7
-rw-r--r--include/llvm/ExecutionEngine/JITLink/EHFrameSupport.h80
-rw-r--r--include/llvm/ExecutionEngine/JITLink/JITLink.h930
-rw-r--r--include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h99
-rw-r--r--include/llvm/ExecutionEngine/JITLink/MachO.h30
-rw-r--r--include/llvm/ExecutionEngine/JITLink/MachO_x86_64.h63
-rw-r--r--include/llvm/ExecutionEngine/JITSymbol.h34
-rw-r--r--include/llvm/ExecutionEngine/MCJIT.h7
-rw-r--r--include/llvm/ExecutionEngine/OProfileWrapper.h7
-rw-r--r--include/llvm/ExecutionEngine/ObjectCache.h7
-rw-r--r--include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h52
-rw-r--r--include/llvm/ExecutionEngine/Orc/CompileUtils.h95
-rw-r--r--include/llvm/ExecutionEngine/Orc/Core.h228
-rw-r--r--include/llvm/ExecutionEngine/Orc/ExecutionUtils.h52
-rw-r--r--include/llvm/ExecutionEngine/Orc/GlobalMappingLayer.h7
-rw-r--r--include/llvm/ExecutionEngine/Orc/IRCompileLayer.h27
-rw-r--r--include/llvm/ExecutionEngine/Orc/IRTransformLayer.h26
-rw-r--r--include/llvm/ExecutionEngine/Orc/IndirectionUtils.h11
-rw-r--r--include/llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h7
-rw-r--r--include/llvm/ExecutionEngine/Orc/LLJIT.h230
-rw-r--r--include/llvm/ExecutionEngine/Orc/LambdaResolver.h34
-rw-r--r--include/llvm/ExecutionEngine/Orc/Layer.h7
-rw-r--r--include/llvm/ExecutionEngine/Orc/LazyEmittingLayer.h24
-rw-r--r--include/llvm/ExecutionEngine/Orc/LazyReexports.h7
-rw-r--r--include/llvm/ExecutionEngine/Orc/Legacy.h18
-rw-r--r--include/llvm/ExecutionEngine/Orc/NullResolver.h7
-rw-r--r--include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h165
-rw-r--r--include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h23
-rw-r--r--include/llvm/ExecutionEngine/Orc/OrcABISupport.h7
-rw-r--r--include/llvm/ExecutionEngine/Orc/OrcError.h7
-rw-r--r--include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h7
-rw-r--r--include/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h7
-rw-r--r--include/llvm/ExecutionEngine/Orc/OrcRemoteTargetServer.h13
-rw-r--r--include/llvm/ExecutionEngine/Orc/RPCSerialization.h93
-rw-r--r--include/llvm/ExecutionEngine/Orc/RPCUtils.h25
-rw-r--r--include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h50
-rw-r--r--include/llvm/ExecutionEngine/Orc/RawByteChannel.h7
-rw-r--r--include/llvm/ExecutionEngine/Orc/RemoteObjectLayer.h60
-rw-r--r--include/llvm/ExecutionEngine/Orc/SymbolStringPool.h85
-rw-r--r--include/llvm/ExecutionEngine/Orc/ThreadSafeModule.h7
-rw-r--r--include/llvm/ExecutionEngine/OrcMCJITReplacement.h7
-rw-r--r--include/llvm/ExecutionEngine/OrcV1Deprecation.h22
-rw-r--r--include/llvm/ExecutionEngine/RTDyldMemoryManager.h7
-rw-r--r--include/llvm/ExecutionEngine/RuntimeDyld.h32
-rw-r--r--include/llvm/ExecutionEngine/RuntimeDyldChecker.h98
-rw-r--r--include/llvm/ExecutionEngine/SectionMemoryManager.h7
-rw-r--r--include/llvm/FuzzMutate/FuzzerCLI.h7
-rw-r--r--include/llvm/FuzzMutate/IRMutator.h7
-rw-r--r--include/llvm/FuzzMutate/OpDescriptor.h7
-rw-r--r--include/llvm/FuzzMutate/Operations.h7
-rw-r--r--include/llvm/FuzzMutate/Random.h7
-rw-r--r--include/llvm/FuzzMutate/RandomIRBuilder.h9
-rw-r--r--include/llvm/IR/Argument.h15
-rw-r--r--include/llvm/IR/AssemblyAnnotationWriter.h7
-rw-r--r--include/llvm/IR/Attributes.h27
-rw-r--r--include/llvm/IR/Attributes.td16
-rw-r--r--include/llvm/IR/AutoUpgrade.h13
-rw-r--r--include/llvm/IR/BasicBlock.h17
-rw-r--r--include/llvm/IR/CFG.h11
-rw-r--r--include/llvm/IR/CFGDiff.h7
-rw-r--r--include/llvm/IR/CallSite.h311
-rw-r--r--include/llvm/IR/CallingConv.h7
-rw-r--r--include/llvm/IR/Comdat.h7
-rw-r--r--include/llvm/IR/Constant.h11
-rw-r--r--include/llvm/IR/ConstantFolder.h11
-rw-r--r--include/llvm/IR/ConstantRange.h191
-rw-r--r--include/llvm/IR/Constants.h7
-rw-r--r--include/llvm/IR/DIBuilder.h17
-rw-r--r--include/llvm/IR/DataLayout.h47
-rw-r--r--include/llvm/IR/DebugInfo.h7
-rw-r--r--include/llvm/IR/DebugInfoFlags.def17
-rw-r--r--include/llvm/IR/DebugInfoMetadata.h386
-rw-r--r--include/llvm/IR/DebugLoc.h7
-rw-r--r--include/llvm/IR/DerivedTypes.h107
-rw-r--r--include/llvm/IR/DerivedUser.h7
-rw-r--r--include/llvm/IR/DiagnosticHandler.h9
-rw-r--r--include/llvm/IR/DiagnosticInfo.h20
-rw-r--r--include/llvm/IR/DiagnosticPrinter.h7
-rw-r--r--include/llvm/IR/Dominators.h7
-rw-r--r--include/llvm/IR/Function.h48
-rw-r--r--include/llvm/IR/GVMaterializer.h7
-rw-r--r--include/llvm/IR/GetElementPtrTypeIterator.h7
-rw-r--r--include/llvm/IR/GlobalAlias.h7
-rw-r--r--include/llvm/IR/GlobalIFunc.h7
-rw-r--r--include/llvm/IR/GlobalIndirectSymbol.h7
-rw-r--r--include/llvm/IR/GlobalObject.h7
-rw-r--r--include/llvm/IR/GlobalValue.h25
-rw-r--r--include/llvm/IR/GlobalVariable.h7
-rw-r--r--include/llvm/IR/IRBuilder.h340
-rw-r--r--include/llvm/IR/IRPrintingPasses.h7
-rw-r--r--include/llvm/IR/InlineAsm.h7
-rw-r--r--include/llvm/IR/InstIterator.h7
-rw-r--r--include/llvm/IR/InstVisitor.h20
-rw-r--r--include/llvm/IR/InstrTypes.h166
-rw-r--r--include/llvm/IR/Instruction.def146
-rw-r--r--include/llvm/IR/Instruction.h28
-rw-r--r--include/llvm/IR/Instructions.h487
-rw-r--r--include/llvm/IR/IntrinsicInst.h136
-rw-r--r--include/llvm/IR/Intrinsics.h37
-rw-r--r--include/llvm/IR/Intrinsics.td227
-rw-r--r--include/llvm/IR/IntrinsicsAArch64.td77
-rw-r--r--include/llvm/IR/IntrinsicsAMDGPU.td524
-rw-r--r--include/llvm/IR/IntrinsicsARM.td57
-rw-r--r--include/llvm/IR/IntrinsicsBPF.td7
-rw-r--r--include/llvm/IR/IntrinsicsHexagon.td513
-rw-r--r--include/llvm/IR/IntrinsicsMips.td277
-rw-r--r--include/llvm/IR/IntrinsicsNVVM.td465
-rw-r--r--include/llvm/IR/IntrinsicsPowerPC.td35
-rw-r--r--include/llvm/IR/IntrinsicsRISCV.td38
-rw-r--r--include/llvm/IR/IntrinsicsSystemZ.td66
-rw-r--r--include/llvm/IR/IntrinsicsWebAssembly.td45
-rw-r--r--include/llvm/IR/IntrinsicsX86.td1266
-rw-r--r--include/llvm/IR/IntrinsicsXCore.td7
-rw-r--r--include/llvm/IR/LLVMContext.h42
-rw-r--r--include/llvm/IR/LegacyPassManager.h7
-rw-r--r--include/llvm/IR/LegacyPassManagers.h7
-rw-r--r--include/llvm/IR/LegacyPassNameParser.h7
-rw-r--r--include/llvm/IR/MDBuilder.h18
-rw-r--r--include/llvm/IR/Mangler.h7
-rw-r--r--include/llvm/IR/Metadata.def8
-rw-r--r--include/llvm/IR/Metadata.h7
-rw-r--r--include/llvm/IR/Module.h50
-rw-r--r--include/llvm/IR/ModuleSlotTracker.h7
-rw-r--r--include/llvm/IR/ModuleSummaryIndex.h253
-rw-r--r--include/llvm/IR/ModuleSummaryIndexYAML.h15
-rw-r--r--include/llvm/IR/NoFolder.h11
-rw-r--r--include/llvm/IR/OperandTraits.h7
-rw-r--r--include/llvm/IR/Operator.h14
-rw-r--r--include/llvm/IR/OptBisect.h43
-rw-r--r--include/llvm/IR/PassInstrumentation.h7
-rw-r--r--include/llvm/IR/PassManager.h14
-rw-r--r--include/llvm/IR/PassManagerInternal.h7
-rw-r--r--include/llvm/IR/PassTimingInfo.h28
-rw-r--r--include/llvm/IR/PatternMatch.h91
-rw-r--r--include/llvm/IR/PredIteratorCache.h7
-rw-r--r--include/llvm/IR/ProfileSummary.h10
-rw-r--r--include/llvm/IR/RemarkStreamer.h96
-rw-r--r--include/llvm/IR/RuntimeLibcalls.def30
-rw-r--r--include/llvm/IR/SafepointIRVerifier.h19
-rw-r--r--include/llvm/IR/Statepoint.h149
-rw-r--r--include/llvm/IR/SymbolTableListTraits.h7
-rw-r--r--include/llvm/IR/TrackingMDRef.h7
-rw-r--r--include/llvm/IR/Type.h30
-rw-r--r--include/llvm/IR/TypeFinder.h7
-rw-r--r--include/llvm/IR/Use.h9
-rw-r--r--include/llvm/IR/UseListOrder.h7
-rw-r--r--include/llvm/IR/User.h7
-rw-r--r--include/llvm/IR/Value.def7
-rw-r--r--include/llvm/IR/Value.h69
-rw-r--r--include/llvm/IR/ValueHandle.h24
-rw-r--r--include/llvm/IR/ValueMap.h7
-rw-r--r--include/llvm/IR/ValueSymbolTable.h7
-rw-r--r--include/llvm/IR/Verifier.h7
-rw-r--r--include/llvm/IRReader/IRReader.h18
-rw-r--r--include/llvm/InitializePasses.h25
-rw-r--r--include/llvm/LTO/Caching.h11
-rw-r--r--include/llvm/LTO/Config.h35
-rw-r--r--include/llvm/LTO/LTO.h47
-rw-r--r--include/llvm/LTO/LTOBackend.h7
-rw-r--r--include/llvm/LTO/SummaryBasedOptimizations.h7
-rw-r--r--include/llvm/LTO/legacy/LTOCodeGenerator.h8
-rw-r--r--include/llvm/LTO/legacy/LTOModule.h18
-rw-r--r--include/llvm/LTO/legacy/ThinLTOCodeGenerator.h49
-rw-r--r--include/llvm/LTO/legacy/UpdateCompilerUsed.h7
-rw-r--r--include/llvm/LineEditor/LineEditor.h7
-rw-r--r--include/llvm/LinkAllIR.h7
-rw-r--r--include/llvm/LinkAllPasses.h13
-rw-r--r--include/llvm/Linker/IRMover.h7
-rw-r--r--include/llvm/Linker/Linker.h7
-rw-r--r--include/llvm/MC/ConstantPools.h9
-rw-r--r--include/llvm/MC/LaneBitmask.h7
-rw-r--r--include/llvm/MC/MCAsmBackend.h23
-rw-r--r--include/llvm/MC/MCAsmInfo.h24
-rw-r--r--include/llvm/MC/MCAsmInfoCOFF.h7
-rw-r--r--include/llvm/MC/MCAsmInfoDarwin.h7
-rw-r--r--include/llvm/MC/MCAsmInfoELF.h7
-rw-r--r--include/llvm/MC/MCAsmInfoWasm.h7
-rw-r--r--include/llvm/MC/MCAsmInfoXCOFF.h25
-rw-r--r--include/llvm/MC/MCAsmLayout.h7
-rw-r--r--include/llvm/MC/MCAsmMacro.h7
-rw-r--r--include/llvm/MC/MCAssembler.h7
-rw-r--r--include/llvm/MC/MCCodeEmitter.h7
-rw-r--r--include/llvm/MC/MCCodePadder.h9
-rw-r--r--include/llvm/MC/MCCodeView.h7
-rw-r--r--include/llvm/MC/MCContext.h54
-rw-r--r--include/llvm/MC/MCDirectives.h8
-rw-r--r--include/llvm/MC/MCDisassembler/MCDisassembler.h25
-rw-r--r--include/llvm/MC/MCDisassembler/MCExternalSymbolizer.h7
-rw-r--r--include/llvm/MC/MCDisassembler/MCRelocationInfo.h7
-rw-r--r--include/llvm/MC/MCDisassembler/MCSymbolizer.h7
-rw-r--r--include/llvm/MC/MCDwarf.h91
-rw-r--r--include/llvm/MC/MCELFObjectWriter.h11
-rw-r--r--include/llvm/MC/MCELFStreamer.h7
-rw-r--r--include/llvm/MC/MCExpr.h28
-rw-r--r--include/llvm/MC/MCFixedLenDisassembler.h7
-rw-r--r--include/llvm/MC/MCFixup.h10
-rw-r--r--include/llvm/MC/MCFixupKindInfo.h7
-rw-r--r--include/llvm/MC/MCFragment.h7
-rw-r--r--include/llvm/MC/MCInst.h10
-rw-r--r--include/llvm/MC/MCInstBuilder.h7
-rw-r--r--include/llvm/MC/MCInstPrinter.h11
-rw-r--r--include/llvm/MC/MCInstrAnalysis.h7
-rw-r--r--include/llvm/MC/MCInstrDesc.h13
-rw-r--r--include/llvm/MC/MCInstrInfo.h7
-rw-r--r--include/llvm/MC/MCInstrItineraries.h7
-rw-r--r--include/llvm/MC/MCLabel.h7
-rw-r--r--include/llvm/MC/MCLinkerOptimizationHint.h7
-rw-r--r--include/llvm/MC/MCMachObjectWriter.h7
-rw-r--r--include/llvm/MC/MCObjectFileInfo.h14
-rw-r--r--include/llvm/MC/MCObjectStreamer.h10
-rw-r--r--include/llvm/MC/MCObjectWriter.h7
-rw-r--r--include/llvm/MC/MCParser/AsmCond.h7
-rw-r--r--include/llvm/MC/MCParser/AsmLexer.h7
-rw-r--r--include/llvm/MC/MCParser/MCAsmLexer.h7
-rw-r--r--include/llvm/MC/MCParser/MCAsmParser.h13
-rw-r--r--include/llvm/MC/MCParser/MCAsmParserExtension.h7
-rw-r--r--include/llvm/MC/MCParser/MCAsmParserUtils.h7
-rw-r--r--include/llvm/MC/MCParser/MCParsedAsmOperand.h7
-rw-r--r--include/llvm/MC/MCParser/MCTargetAsmParser.h27
-rw-r--r--include/llvm/MC/MCRegisterInfo.h7
-rw-r--r--include/llvm/MC/MCSchedule.h13
-rw-r--r--include/llvm/MC/MCSection.h9
-rw-r--r--include/llvm/MC/MCSectionCOFF.h9
-rw-r--r--include/llvm/MC/MCSectionELF.h9
-rw-r--r--include/llvm/MC/MCSectionMachO.h7
-rw-r--r--include/llvm/MC/MCSectionWasm.h27
-rw-r--r--include/llvm/MC/MCSectionXCOFF.h56
-rw-r--r--include/llvm/MC/MCStreamer.h34
-rw-r--r--include/llvm/MC/MCSubtargetInfo.h71
-rw-r--r--include/llvm/MC/MCSymbol.h36
-rw-r--r--include/llvm/MC/MCSymbolCOFF.h7
-rw-r--r--include/llvm/MC/MCSymbolELF.h7
-rw-r--r--include/llvm/MC/MCSymbolMachO.h12
-rw-r--r--include/llvm/MC/MCSymbolWasm.h40
-rw-r--r--include/llvm/MC/MCSymbolXCOFF.h26
-rw-r--r--include/llvm/MC/MCTargetOptions.h28
-rw-r--r--include/llvm/MC/MCTargetOptionsCommandFlags.inc18
-rw-r--r--include/llvm/MC/MCValue.h7
-rw-r--r--include/llvm/MC/MCWasmObjectWriter.h7
-rw-r--r--include/llvm/MC/MCWasmStreamer.h7
-rw-r--r--include/llvm/MC/MCWin64EH.h7
-rw-r--r--include/llvm/MC/MCWinCOFFObjectWriter.h7
-rw-r--r--include/llvm/MC/MCWinCOFFStreamer.h7
-rw-r--r--include/llvm/MC/MCWinEH.h7
-rw-r--r--include/llvm/MC/MCXCOFFObjectWriter.h41
-rw-r--r--include/llvm/MC/MCXCOFFStreamer.h33
-rw-r--r--include/llvm/MC/MachineLocation.h7
-rw-r--r--include/llvm/MC/SectionKind.h7
-rw-r--r--include/llvm/MC/StringTableBuilder.h7
-rw-r--r--include/llvm/MC/SubtargetFeature.h100
-rw-r--r--include/llvm/MCA/Context.h21
-rw-r--r--include/llvm/MCA/HWEventListener.h38
-rw-r--r--include/llvm/MCA/HardwareUnits/HardwareUnit.h7
-rw-r--r--include/llvm/MCA/HardwareUnits/LSUnit.h393
-rw-r--r--include/llvm/MCA/HardwareUnits/RegisterFile.h10
-rw-r--r--include/llvm/MCA/HardwareUnits/ResourceManager.h31
-rw-r--r--include/llvm/MCA/HardwareUnits/RetireControlUnit.h7
-rw-r--r--include/llvm/MCA/HardwareUnits/Scheduler.h138
-rw-r--r--include/llvm/MCA/InstrBuilder.h7
-rw-r--r--include/llvm/MCA/Instruction.h163
-rw-r--r--include/llvm/MCA/Pipeline.h7
-rw-r--r--include/llvm/MCA/SourceMgr.h7
-rw-r--r--include/llvm/MCA/Stages/DispatchStage.h13
-rw-r--r--include/llvm/MCA/Stages/EntryStage.h7
-rw-r--r--include/llvm/MCA/Stages/ExecuteStage.h20
-rw-r--r--include/llvm/MCA/Stages/InstructionTables.h7
-rw-r--r--include/llvm/MCA/Stages/MicroOpQueueStage.h88
-rw-r--r--include/llvm/MCA/Stages/RetireStage.h7
-rw-r--r--include/llvm/MCA/Stages/Stage.h7
-rw-r--r--include/llvm/MCA/Support.h33
-rw-r--r--include/llvm/Object/Archive.h40
-rw-r--r--include/llvm/Object/ArchiveWriter.h10
-rw-r--r--include/llvm/Object/Binary.h22
-rw-r--r--include/llvm/Object/COFF.h20
-rw-r--r--include/llvm/Object/COFFImportFile.h24
-rw-r--r--include/llvm/Object/COFFModuleDefinition.h7
-rw-r--r--include/llvm/Object/CVDebugRecord.h7
-rw-r--r--include/llvm/Object/Decompressor.h7
-rw-r--r--include/llvm/Object/ELF.h158
-rw-r--r--include/llvm/Object/ELFObjectFile.h133
-rw-r--r--include/llvm/Object/ELFTypes.h9
-rw-r--r--include/llvm/Object/Error.h7
-rw-r--r--include/llvm/Object/IRObjectFile.h10
-rw-r--r--include/llvm/Object/IRSymtab.h25
-rw-r--r--include/llvm/Object/MachO.h76
-rw-r--r--include/llvm/Object/MachOUniversal.h7
-rw-r--r--include/llvm/Object/Minidump.h165
-rw-r--r--include/llvm/Object/ModuleSymbolTable.h7
-rw-r--r--include/llvm/Object/ObjectFile.h90
-rw-r--r--include/llvm/Object/RelocVisitor.h351
-rw-r--r--include/llvm/Object/RelocationResolver.h42
-rw-r--r--include/llvm/Object/StackMapParser.h50
-rw-r--r--include/llvm/Object/SymbolSize.h7
-rw-r--r--include/llvm/Object/SymbolicFile.h14
-rw-r--r--include/llvm/Object/Wasm.h80
-rw-r--r--include/llvm/Object/WasmTraits.h7
-rw-r--r--include/llvm/Object/WindowsMachineFlag.h33
-rw-r--r--include/llvm/Object/WindowsResource.h47
-rw-r--r--include/llvm/Object/XCOFFObjectFile.h268
-rw-r--r--include/llvm/ObjectYAML/COFFYAML.h7
-rw-r--r--include/llvm/ObjectYAML/CodeViewYAMLDebugSections.h7
-rw-r--r--include/llvm/ObjectYAML/CodeViewYAMLSymbols.h7
-rw-r--r--include/llvm/ObjectYAML/CodeViewYAMLTypeHashing.h7
-rw-r--r--include/llvm/ObjectYAML/CodeViewYAMLTypes.h7
-rw-r--r--include/llvm/ObjectYAML/DWARFEmitter.h7
-rw-r--r--include/llvm/ObjectYAML/DWARFYAML.h7
-rw-r--r--include/llvm/ObjectYAML/ELFYAML.h152
-rw-r--r--include/llvm/ObjectYAML/MachOYAML.h7
-rw-r--r--include/llvm/ObjectYAML/MinidumpYAML.h239
-rw-r--r--include/llvm/ObjectYAML/ObjectYAML.h9
-rw-r--r--include/llvm/ObjectYAML/WasmYAML.h69
-rw-r--r--include/llvm/ObjectYAML/XCOFFYAML.h71
-rw-r--r--include/llvm/ObjectYAML/YAML.h10
-rw-r--r--include/llvm/Option/Arg.h31
-rw-r--r--include/llvm/Option/ArgList.h17
-rw-r--r--include/llvm/Option/OptParser.td7
-rw-r--r--include/llvm/Option/OptSpecifier.h7
-rw-r--r--include/llvm/Option/OptTable.h7
-rw-r--r--include/llvm/Option/Option.h12
-rw-r--r--include/llvm/Pass.h7
-rw-r--r--include/llvm/PassAnalysisSupport.h7
-rw-r--r--include/llvm/PassInfo.h7
-rw-r--r--include/llvm/PassRegistry.h7
-rw-r--r--include/llvm/PassSupport.h7
-rw-r--r--include/llvm/Passes/PassBuilder.h114
-rw-r--r--include/llvm/Passes/PassPlugin.h7
-rw-r--r--include/llvm/Passes/StandardInstrumentations.h9
-rw-r--r--include/llvm/ProfileData/Coverage/CoverageMapping.h7
-rw-r--r--include/llvm/ProfileData/Coverage/CoverageMappingReader.h17
-rw-r--r--include/llvm/ProfileData/Coverage/CoverageMappingWriter.h7
-rw-r--r--include/llvm/ProfileData/GCOV.h19
-rw-r--r--include/llvm/ProfileData/InstrProf.h109
-rw-r--r--include/llvm/ProfileData/InstrProfData.inc92
-rw-r--r--include/llvm/ProfileData/InstrProfReader.h56
-rw-r--r--include/llvm/ProfileData/InstrProfWriter.h38
-rw-r--r--include/llvm/ProfileData/ProfileCommon.h10
-rw-r--r--include/llvm/ProfileData/SampleProf.h50
-rw-r--r--include/llvm/ProfileData/SampleProfReader.h16
-rw-r--r--include/llvm/ProfileData/SampleProfWriter.h7
-rw-r--r--include/llvm/Remarks/Remark.h113
-rw-r--r--include/llvm/Remarks/RemarkFormat.h33
-rw-r--r--include/llvm/Remarks/RemarkParser.h77
-rw-r--r--include/llvm/Remarks/RemarkSerializer.h68
-rw-r--r--include/llvm/Remarks/RemarkStringTable.h59
-rw-r--r--include/llvm/Support/AArch64TargetParser.def109
-rw-r--r--include/llvm/Support/AArch64TargetParser.h12
-rw-r--r--include/llvm/Support/AMDGPUMetadata.h39
-rw-r--r--include/llvm/Support/AMDHSAKernelDescriptor.h33
-rw-r--r--include/llvm/Support/ARMAttributeParser.h9
-rw-r--r--include/llvm/Support/ARMBuildAttributes.h13
-rw-r--r--include/llvm/Support/ARMEHABI.h7
-rw-r--r--include/llvm/Support/ARMTargetParser.def22
-rw-r--r--include/llvm/Support/ARMTargetParser.h19
-rw-r--r--include/llvm/Support/ARMWinEH.h11
-rw-r--r--include/llvm/Support/AlignOf.h7
-rw-r--r--include/llvm/Support/Allocator.h7
-rw-r--r--include/llvm/Support/ArrayRecycler.h7
-rw-r--r--include/llvm/Support/Atomic.h7
-rw-r--r--include/llvm/Support/AtomicOrdering.h7
-rw-r--r--include/llvm/Support/BinaryByteStream.h7
-rw-r--r--include/llvm/Support/BinaryItemStream.h7
-rw-r--r--include/llvm/Support/BinaryStream.h7
-rw-r--r--include/llvm/Support/BinaryStreamArray.h7
-rw-r--r--include/llvm/Support/BinaryStreamError.h7
-rw-r--r--include/llvm/Support/BinaryStreamReader.h19
-rw-r--r--include/llvm/Support/BinaryStreamRef.h7
-rw-r--r--include/llvm/Support/BinaryStreamWriter.h21
-rw-r--r--include/llvm/Support/BlockFrequency.h7
-rw-r--r--include/llvm/Support/BranchProbability.h35
-rw-r--r--include/llvm/Support/BuryPointer.h7
-rw-r--r--include/llvm/Support/CBindingWrapping.h9
-rw-r--r--include/llvm/Support/CFGUpdate.h7
-rw-r--r--include/llvm/Support/COM.h7
-rw-r--r--include/llvm/Support/CRC.h25
-rw-r--r--include/llvm/Support/CachePruning.h7
-rw-r--r--include/llvm/Support/Capacity.h7
-rw-r--r--include/llvm/Support/Casting.h17
-rw-r--r--include/llvm/Support/CheckedArithmetic.h16
-rw-r--r--include/llvm/Support/Chrono.h13
-rw-r--r--include/llvm/Support/CodeGen.h20
-rw-r--r--include/llvm/Support/CodeGenCoverage.h7
-rw-r--r--include/llvm/Support/CommandLine.h137
-rw-r--r--include/llvm/Support/Compiler.h16
-rw-r--r--include/llvm/Support/Compression.h7
-rw-r--r--include/llvm/Support/ConvertUTF.h7
-rw-r--r--include/llvm/Support/CrashRecoveryContext.h7
-rw-r--r--include/llvm/Support/DJB.h7
-rw-r--r--include/llvm/Support/DOTGraphTraits.h9
-rw-r--r--include/llvm/Support/DataExtractor.h7
-rw-r--r--include/llvm/Support/DataTypes.h7
-rw-r--r--include/llvm/Support/Debug.h7
-rw-r--r--include/llvm/Support/DebugCounter.h7
-rw-r--r--include/llvm/Support/DynamicLibrary.h7
-rw-r--r--include/llvm/Support/Endian.h28
-rw-r--r--include/llvm/Support/EndianStream.h7
-rw-r--r--include/llvm/Support/Errc.h7
-rw-r--r--include/llvm/Support/Errno.h7
-rw-r--r--include/llvm/Support/Error.h60
-rw-r--r--include/llvm/Support/ErrorHandling.h7
-rw-r--r--include/llvm/Support/ErrorOr.h7
-rw-r--r--include/llvm/Support/FileCheck.h557
-rw-r--r--include/llvm/Support/FileOutputBuffer.h12
-rw-r--r--include/llvm/Support/FileSystem.h113
-rw-r--r--include/llvm/Support/FileUtilities.h7
-rw-r--r--include/llvm/Support/Format.h7
-rw-r--r--include/llvm/Support/FormatAdapters.h7
-rw-r--r--include/llvm/Support/FormatCommon.h9
-rw-r--r--include/llvm/Support/FormatProviders.h7
-rw-r--r--include/llvm/Support/FormatVariadic.h7
-rw-r--r--include/llvm/Support/FormatVariadicDetails.h7
-rw-r--r--include/llvm/Support/FormattedStream.h7
-rw-r--r--include/llvm/Support/GenericDomTree.h19
-rw-r--r--include/llvm/Support/GenericDomTreeConstruction.h333
-rw-r--r--include/llvm/Support/GenericIteratedDominanceFrontier.h209
-rw-r--r--include/llvm/Support/GlobPattern.h7
-rw-r--r--include/llvm/Support/GraphWriter.h7
-rw-r--r--include/llvm/Support/Host.h7
-rw-r--r--include/llvm/Support/InitLLVM.h12
-rw-r--r--include/llvm/Support/ItaniumManglingCanonicalizer.h7
-rw-r--r--include/llvm/Support/JSON.h180
-rw-r--r--include/llvm/Support/JamCRC.h7
-rw-r--r--include/llvm/Support/KnownBits.h40
-rw-r--r--include/llvm/Support/LEB128.h15
-rw-r--r--include/llvm/Support/LineIterator.h7
-rw-r--r--include/llvm/Support/LockFileManager.h7
-rw-r--r--include/llvm/Support/LowLevelTypeImpl.h53
-rw-r--r--include/llvm/Support/MSVCErrorWorkarounds.h7
-rw-r--r--include/llvm/Support/MachineValueType.h287
-rw-r--r--include/llvm/Support/ManagedStatic.h32
-rw-r--r--include/llvm/Support/MathExtras.h42
-rw-r--r--include/llvm/Support/MemAlloc.h31
-rw-r--r--include/llvm/Support/Memory.h46
-rw-r--r--include/llvm/Support/MemoryBuffer.h13
-rw-r--r--include/llvm/Support/MipsABIFlags.h7
-rw-r--r--include/llvm/Support/Mutex.h7
-rw-r--r--include/llvm/Support/MutexGuard.h7
-rw-r--r--include/llvm/Support/NativeFormatting.h7
-rw-r--r--include/llvm/Support/OnDiskHashTable.h7
-rw-r--r--include/llvm/Support/Options.h7
-rw-r--r--include/llvm/Support/Parallel.h11
-rw-r--r--include/llvm/Support/Path.h7
-rw-r--r--include/llvm/Support/PluginLoader.h7
-rw-r--r--include/llvm/Support/PointerLikeTypeTraits.h7
-rw-r--r--include/llvm/Support/PrettyStackTrace.h21
-rw-r--r--include/llvm/Support/Printable.h7
-rw-r--r--include/llvm/Support/Process.h28
-rw-r--r--include/llvm/Support/Program.h7
-rw-r--r--include/llvm/Support/RWMutex.h7
-rw-r--r--include/llvm/Support/RandomNumberGenerator.h7
-rw-r--r--include/llvm/Support/Recycler.h7
-rw-r--r--include/llvm/Support/RecyclingAllocator.h7
-rw-r--r--include/llvm/Support/Regex.h7
-rw-r--r--include/llvm/Support/Registry.h13
-rw-r--r--include/llvm/Support/SHA1.h7
-rw-r--r--include/llvm/Support/SMLoc.h7
-rw-r--r--include/llvm/Support/SMTAPI.h447
-rw-r--r--include/llvm/Support/SaveAndRestore.h7
-rw-r--r--include/llvm/Support/ScalableSize.h43
-rw-r--r--include/llvm/Support/ScaledNumber.h11
-rw-r--r--include/llvm/Support/ScopedPrinter.h9
-rw-r--r--include/llvm/Support/Signals.h25
-rw-r--r--include/llvm/Support/Signposts.h43
-rw-r--r--include/llvm/Support/SmallVectorMemoryBuffer.h7
-rw-r--r--include/llvm/Support/Solaris/sys/regset.h7
-rw-r--r--include/llvm/Support/SourceMgr.h9
-rw-r--r--include/llvm/Support/SpecialCaseList.h7
-rw-r--r--include/llvm/Support/StringPool.h7
-rw-r--r--include/llvm/Support/StringSaver.h7
-rw-r--r--include/llvm/Support/SwapByteOrder.h15
-rw-r--r--include/llvm/Support/SymbolRemappingReader.h7
-rw-r--r--include/llvm/Support/SystemUtils.h7
-rw-r--r--include/llvm/Support/TarWriter.h7
-rw-r--r--include/llvm/Support/TargetOpcodes.def70
-rw-r--r--include/llvm/Support/TargetParser.h14
-rw-r--r--include/llvm/Support/TargetRegistry.h18
-rw-r--r--include/llvm/Support/TargetSelect.h7
-rw-r--r--include/llvm/Support/TaskQueue.h7
-rw-r--r--include/llvm/Support/ThreadLocal.h7
-rw-r--r--include/llvm/Support/ThreadPool.h7
-rw-r--r--include/llvm/Support/Threading.h23
-rw-r--r--include/llvm/Support/TimeProfiler.h76
-rw-r--r--include/llvm/Support/Timer.h14
-rw-r--r--include/llvm/Support/ToolOutputFile.h7
-rw-r--r--include/llvm/Support/TrailingObjects.h7
-rw-r--r--include/llvm/Support/TrigramIndex.h7
-rw-r--r--include/llvm/Support/TypeName.h7
-rw-r--r--include/llvm/Support/Unicode.h7
-rw-r--r--include/llvm/Support/UnicodeCharRanges.h7
-rw-r--r--include/llvm/Support/UniqueLock.h7
-rw-r--r--include/llvm/Support/Valgrind.h7
-rw-r--r--include/llvm/Support/VersionTuple.h7
-rw-r--r--include/llvm/Support/VirtualFileSystem.h34
-rw-r--r--include/llvm/Support/Watchdog.h7
-rw-r--r--include/llvm/Support/Win64EH.h7
-rw-r--r--include/llvm/Support/WindowsError.h7
-rw-r--r--include/llvm/Support/WithColor.h7
-rw-r--r--include/llvm/Support/X86DisassemblerDecoderCommon.h48
-rw-r--r--include/llvm/Support/X86TargetParser.def16
-rw-r--r--include/llvm/Support/YAMLParser.h7
-rw-r--r--include/llvm/Support/YAMLTraits.h83
-rw-r--r--include/llvm/Support/circular_raw_ostream.h7
-rw-r--r--include/llvm/Support/raw_os_ostream.h7
-rw-r--r--include/llvm/Support/raw_ostream.h9
-rw-r--r--include/llvm/Support/raw_sha1_ostream.h7
-rw-r--r--include/llvm/Support/thread.h7
-rw-r--r--include/llvm/Support/type_traits.h114
-rw-r--r--include/llvm/TableGen/Error.h7
-rw-r--r--include/llvm/TableGen/Main.h7
-rw-r--r--include/llvm/TableGen/Record.h100
-rw-r--r--include/llvm/TableGen/SearchableTable.td7
-rw-r--r--include/llvm/TableGen/SetTheory.h7
-rw-r--r--include/llvm/TableGen/StringMatcher.h7
-rw-r--r--include/llvm/TableGen/StringToOffsetTable.h7
-rw-r--r--include/llvm/TableGen/TableGenBackend.h9
-rw-r--r--include/llvm/Target/CodeGenCWrappers.h7
-rw-r--r--include/llvm/Target/GenericOpcodes.td195
-rw-r--r--include/llvm/Target/GlobalISel/RegisterBank.td7
-rw-r--r--include/llvm/Target/GlobalISel/SelectionDAGCompat.td23
-rw-r--r--include/llvm/Target/GlobalISel/Target.td7
-rw-r--r--include/llvm/Target/Target.td109
-rw-r--r--include/llvm/Target/TargetCallingConv.td21
-rw-r--r--include/llvm/Target/TargetInstrPredicate.td7
-rw-r--r--include/llvm/Target/TargetIntrinsicInfo.h7
-rw-r--r--include/llvm/Target/TargetItinerary.td7
-rw-r--r--include/llvm/Target/TargetLoweringObjectFile.h12
-rw-r--r--include/llvm/Target/TargetMachine.h39
-rw-r--r--include/llvm/Target/TargetOptions.h13
-rw-r--r--include/llvm/Target/TargetPfmCounters.td7
-rw-r--r--include/llvm/Target/TargetSchedule.td9
-rw-r--r--include/llvm/Target/TargetSelectionDAG.td178
-rw-r--r--include/llvm/Testing/Support/Annotations.h90
-rw-r--r--include/llvm/Testing/Support/Error.h7
-rw-r--r--include/llvm/Testing/Support/SupportHelpers.h56
-rw-r--r--include/llvm/TextAPI/ELF/ELFStub.h7
-rw-r--r--include/llvm/TextAPI/ELF/TBEHandler.h7
-rw-r--r--include/llvm/TextAPI/MachO/Architecture.def38
-rw-r--r--include/llvm/TextAPI/MachO/Architecture.h47
-rw-r--r--include/llvm/TextAPI/MachO/ArchitectureSet.h159
-rw-r--r--include/llvm/TextAPI/MachO/InterfaceFile.h436
-rw-r--r--include/llvm/TextAPI/MachO/PackedVersion.h64
-rw-r--r--include/llvm/TextAPI/MachO/Symbol.h96
-rw-r--r--include/llvm/TextAPI/MachO/TextAPIReader.h34
-rw-r--r--include/llvm/TextAPI/MachO/TextAPIWriter.h29
-rw-r--r--include/llvm/ToolDrivers/llvm-dlltool/DlltoolDriver.h7
-rw-r--r--include/llvm/ToolDrivers/llvm-lib/LibDriver.h8
-rw-r--r--include/llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h7
-rw-r--r--include/llvm/Transforms/Coroutines.h7
-rw-r--r--include/llvm/Transforms/IPO.h11
-rw-r--r--include/llvm/Transforms/IPO/AlwaysInliner.h7
-rw-r--r--include/llvm/Transforms/IPO/ArgumentPromotion.h7
-rw-r--r--include/llvm/Transforms/IPO/Attributor.h789
-rw-r--r--include/llvm/Transforms/IPO/CalledValuePropagation.h7
-rw-r--r--include/llvm/Transforms/IPO/ConstantMerge.h7
-rw-r--r--include/llvm/Transforms/IPO/CrossDSOCFI.h7
-rw-r--r--include/llvm/Transforms/IPO/DeadArgumentElimination.h7
-rw-r--r--include/llvm/Transforms/IPO/ElimAvailExtern.h7
-rw-r--r--include/llvm/Transforms/IPO/ForceFunctionAttrs.h7
-rw-r--r--include/llvm/Transforms/IPO/FunctionAttrs.h7
-rw-r--r--include/llvm/Transforms/IPO/FunctionImport.h7
-rw-r--r--include/llvm/Transforms/IPO/GlobalDCE.h7
-rw-r--r--include/llvm/Transforms/IPO/GlobalOpt.h7
-rw-r--r--include/llvm/Transforms/IPO/GlobalSplit.h7
-rw-r--r--include/llvm/Transforms/IPO/HotColdSplitting.h7
-rw-r--r--include/llvm/Transforms/IPO/InferFunctionAttrs.h7
-rw-r--r--include/llvm/Transforms/IPO/Inliner.h7
-rw-r--r--include/llvm/Transforms/IPO/Internalize.h13
-rw-r--r--include/llvm/Transforms/IPO/LowerTypeTests.h7
-rw-r--r--include/llvm/Transforms/IPO/PartialInlining.h7
-rw-r--r--include/llvm/Transforms/IPO/PassManagerBuilder.h30
-rw-r--r--include/llvm/Transforms/IPO/SCCP.h7
-rw-r--r--include/llvm/Transforms/IPO/SampleProfile.h7
-rw-r--r--include/llvm/Transforms/IPO/StripDeadPrototypes.h7
-rw-r--r--include/llvm/Transforms/IPO/ThinLTOBitcodeWriter.h7
-rw-r--r--include/llvm/Transforms/IPO/WholeProgramDevirt.h7
-rw-r--r--include/llvm/Transforms/InstCombine/InstCombine.h7
-rw-r--r--include/llvm/Transforms/InstCombine/InstCombineWorklist.h7
-rw-r--r--include/llvm/Transforms/Instrumentation.h52
-rw-r--r--include/llvm/Transforms/Instrumentation/AddressSanitizer.h143
-rw-r--r--include/llvm/Transforms/Instrumentation/BoundsChecking.h7
-rw-r--r--include/llvm/Transforms/Instrumentation/CGProfile.h7
-rw-r--r--include/llvm/Transforms/Instrumentation/ControlHeightReduction.h7
-rw-r--r--include/llvm/Transforms/Instrumentation/GCOVProfiler.h7
-rw-r--r--include/llvm/Transforms/Instrumentation/HWAddressSanitizer.h41
-rw-r--r--include/llvm/Transforms/Instrumentation/InstrOrderFile.h28
-rw-r--r--include/llvm/Transforms/Instrumentation/InstrProfiling.h15
-rw-r--r--include/llvm/Transforms/Instrumentation/MemorySanitizer.h30
-rw-r--r--include/llvm/Transforms/Instrumentation/PGOInstrumentation.h33
-rw-r--r--include/llvm/Transforms/Instrumentation/PoisonChecking.h25
-rw-r--r--include/llvm/Transforms/Instrumentation/ThreadSanitizer.h9
-rw-r--r--include/llvm/Transforms/ObjCARC.h7
-rw-r--r--include/llvm/Transforms/Scalar.h38
-rw-r--r--include/llvm/Transforms/Scalar/ADCE.h7
-rw-r--r--include/llvm/Transforms/Scalar/AlignmentFromAssumptions.h7
-rw-r--r--include/llvm/Transforms/Scalar/BDCE.h7
-rw-r--r--include/llvm/Transforms/Scalar/CallSiteSplitting.h7
-rw-r--r--include/llvm/Transforms/Scalar/ConstantHoisting.h14
-rw-r--r--include/llvm/Transforms/Scalar/CorrelatedValuePropagation.h7
-rw-r--r--include/llvm/Transforms/Scalar/DCE.h7
-rw-r--r--include/llvm/Transforms/Scalar/DeadStoreElimination.h7
-rw-r--r--include/llvm/Transforms/Scalar/DivRemPairs.h7
-rw-r--r--include/llvm/Transforms/Scalar/EarlyCSE.h7
-rw-r--r--include/llvm/Transforms/Scalar/Float2Int.h7
-rw-r--r--include/llvm/Transforms/Scalar/GVN.h7
-rw-r--r--include/llvm/Transforms/Scalar/GVNExpression.h7
-rw-r--r--include/llvm/Transforms/Scalar/GuardWidening.h11
-rw-r--r--include/llvm/Transforms/Scalar/IVUsersPrinter.h7
-rw-r--r--include/llvm/Transforms/Scalar/IndVarSimplify.h7
-rw-r--r--include/llvm/Transforms/Scalar/InductiveRangeCheckElimination.h7
-rw-r--r--include/llvm/Transforms/Scalar/InstSimplifyPass.h7
-rw-r--r--include/llvm/Transforms/Scalar/JumpThreading.h9
-rw-r--r--include/llvm/Transforms/Scalar/LICM.h19
-rw-r--r--include/llvm/Transforms/Scalar/LoopAccessAnalysisPrinter.h7
-rw-r--r--include/llvm/Transforms/Scalar/LoopDataPrefetch.h7
-rw-r--r--include/llvm/Transforms/Scalar/LoopDeletion.h7
-rw-r--r--include/llvm/Transforms/Scalar/LoopDistribute.h7
-rw-r--r--include/llvm/Transforms/Scalar/LoopFuse.h30
-rw-r--r--include/llvm/Transforms/Scalar/LoopIdiomRecognize.h7
-rw-r--r--include/llvm/Transforms/Scalar/LoopInstSimplify.h7
-rw-r--r--include/llvm/Transforms/Scalar/LoopLoadElimination.h7
-rw-r--r--include/llvm/Transforms/Scalar/LoopPassManager.h7
-rw-r--r--include/llvm/Transforms/Scalar/LoopPredication.h7
-rw-r--r--include/llvm/Transforms/Scalar/LoopRotation.h7
-rw-r--r--include/llvm/Transforms/Scalar/LoopSimplifyCFG.h7
-rw-r--r--include/llvm/Transforms/Scalar/LoopSink.h7
-rw-r--r--include/llvm/Transforms/Scalar/LoopStrengthReduce.h7
-rw-r--r--include/llvm/Transforms/Scalar/LoopUnrollAndJamPass.h7
-rw-r--r--include/llvm/Transforms/Scalar/LoopUnrollPass.h31
-rw-r--r--include/llvm/Transforms/Scalar/LowerAtomic.h7
-rw-r--r--include/llvm/Transforms/Scalar/LowerExpectIntrinsic.h7
-rw-r--r--include/llvm/Transforms/Scalar/LowerGuardIntrinsic.h7
-rw-r--r--include/llvm/Transforms/Scalar/LowerWidenableCondition.h26
-rw-r--r--include/llvm/Transforms/Scalar/MakeGuardsExplicit.h7
-rw-r--r--include/llvm/Transforms/Scalar/MemCpyOptimizer.h7
-rw-r--r--include/llvm/Transforms/Scalar/MergeICmps.h25
-rw-r--r--include/llvm/Transforms/Scalar/MergedLoadStoreMotion.h7
-rw-r--r--include/llvm/Transforms/Scalar/NaryReassociate.h7
-rw-r--r--include/llvm/Transforms/Scalar/NewGVN.h7
-rw-r--r--include/llvm/Transforms/Scalar/PartiallyInlineLibCalls.h7
-rw-r--r--include/llvm/Transforms/Scalar/Reassociate.h16
-rw-r--r--include/llvm/Transforms/Scalar/RewriteStatepointsForGC.h7
-rw-r--r--include/llvm/Transforms/Scalar/SCCP.h7
-rw-r--r--include/llvm/Transforms/Scalar/SROA.h8
-rw-r--r--include/llvm/Transforms/Scalar/Scalarizer.h7
-rw-r--r--include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h7
-rw-r--r--include/llvm/Transforms/Scalar/SimplifyCFG.h7
-rw-r--r--include/llvm/Transforms/Scalar/Sink.h7
-rw-r--r--include/llvm/Transforms/Scalar/SpeculateAroundPHIs.h7
-rw-r--r--include/llvm/Transforms/Scalar/SpeculativeExecution.h7
-rw-r--r--include/llvm/Transforms/Scalar/TailRecursionElimination.h7
-rw-r--r--include/llvm/Transforms/Scalar/WarnMissedTransforms.h7
-rw-r--r--include/llvm/Transforms/Utils.h7
-rw-r--r--include/llvm/Transforms/Utils/ASanStackFrameLayout.h7
-rw-r--r--include/llvm/Transforms/Utils/AddDiscriminators.h7
-rw-r--r--include/llvm/Transforms/Utils/BasicBlockUtils.h58
-rw-r--r--include/llvm/Transforms/Utils/BreakCriticalEdges.h7
-rw-r--r--include/llvm/Transforms/Utils/BuildLibCalls.h62
-rw-r--r--include/llvm/Transforms/Utils/BypassSlowDivision.h7
-rw-r--r--include/llvm/Transforms/Utils/CallPromotionUtils.h7
-rw-r--r--include/llvm/Transforms/Utils/CanonicalizeAliases.h7
-rw-r--r--include/llvm/Transforms/Utils/Cloning.h19
-rw-r--r--include/llvm/Transforms/Utils/CodeExtractor.h21
-rw-r--r--include/llvm/Transforms/Utils/CtorUtils.h7
-rw-r--r--include/llvm/Transforms/Utils/EntryExitInstrumenter.h7
-rw-r--r--include/llvm/Transforms/Utils/EscapeEnumerator.h7
-rw-r--r--include/llvm/Transforms/Utils/Evaluator.h7
-rw-r--r--include/llvm/Transforms/Utils/FunctionComparator.h7
-rw-r--r--include/llvm/Transforms/Utils/FunctionImportUtils.h12
-rw-r--r--include/llvm/Transforms/Utils/GlobalStatus.h7
-rw-r--r--include/llvm/Transforms/Utils/GuardUtils.h7
-rw-r--r--include/llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h9
-rw-r--r--include/llvm/Transforms/Utils/IntegerDivision.h7
-rw-r--r--include/llvm/Transforms/Utils/LCSSA.h7
-rw-r--r--include/llvm/Transforms/Utils/LibCallsShrinkWrap.h7
-rw-r--r--include/llvm/Transforms/Utils/Local.h45
-rw-r--r--include/llvm/Transforms/Utils/LoopRotationUtils.h7
-rw-r--r--include/llvm/Transforms/Utils/LoopSimplify.h15
-rw-r--r--include/llvm/Transforms/Utils/LoopUtils.h45
-rw-r--r--include/llvm/Transforms/Utils/LoopVersioning.h7
-rw-r--r--include/llvm/Transforms/Utils/LowerInvoke.h7
-rw-r--r--include/llvm/Transforms/Utils/LowerMemIntrinsics.h9
-rw-r--r--include/llvm/Transforms/Utils/Mem2Reg.h7
-rw-r--r--include/llvm/Transforms/Utils/ModuleUtils.h24
-rw-r--r--include/llvm/Transforms/Utils/NameAnonGlobals.h7
-rw-r--r--include/llvm/Transforms/Utils/PredicateInfo.h7
-rw-r--r--include/llvm/Transforms/Utils/PromoteMemToReg.h7
-rw-r--r--include/llvm/Transforms/Utils/SSAUpdater.h11
-rw-r--r--include/llvm/Transforms/Utils/SSAUpdaterBulk.h7
-rw-r--r--include/llvm/Transforms/Utils/SSAUpdaterImpl.h7
-rw-r--r--include/llvm/Transforms/Utils/SanitizerStats.h7
-rw-r--r--include/llvm/Transforms/Utils/SimplifyIndVar.h7
-rw-r--r--include/llvm/Transforms/Utils/SimplifyLibCalls.h45
-rw-r--r--include/llvm/Transforms/Utils/SizeOpts.h34
-rw-r--r--include/llvm/Transforms/Utils/SplitModule.h7
-rw-r--r--include/llvm/Transforms/Utils/SymbolRewriter.h7
-rw-r--r--include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h7
-rw-r--r--include/llvm/Transforms/Utils/UnrollLoop.h48
-rw-r--r--include/llvm/Transforms/Utils/VNCoercion.h7
-rw-r--r--include/llvm/Transforms/Utils/ValueMapper.h7
-rw-r--r--include/llvm/Transforms/Vectorize.h12
-rw-r--r--include/llvm/Transforms/Vectorize/LoadStoreVectorizer.h7
-rw-r--r--include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h60
-rw-r--r--include/llvm/Transforms/Vectorize/LoopVectorize.h58
-rw-r--r--include/llvm/Transforms/Vectorize/SLPVectorizer.h9
-rw-r--r--include/llvm/WindowsManifest/WindowsManifestMerger.h7
-rw-r--r--include/llvm/WindowsResource/ResourceProcessor.h7
-rw-r--r--include/llvm/WindowsResource/ResourceScriptToken.h7
-rw-r--r--include/llvm/WindowsResource/ResourceScriptTokenList.h7
-rw-r--r--include/llvm/XRay/BlockIndexer.h7
-rw-r--r--include/llvm/XRay/BlockPrinter.h7
-rw-r--r--include/llvm/XRay/BlockVerifier.h7
-rw-r--r--include/llvm/XRay/FDRLogBuilder.h7
-rw-r--r--include/llvm/XRay/FDRRecordConsumer.h7
-rw-r--r--include/llvm/XRay/FDRRecordProducer.h7
-rw-r--r--include/llvm/XRay/FDRRecords.h7
-rw-r--r--include/llvm/XRay/FDRTraceExpander.h7
-rw-r--r--include/llvm/XRay/FDRTraceWriter.h7
-rw-r--r--include/llvm/XRay/FileHeaderReader.h7
-rw-r--r--include/llvm/XRay/Graph.h7
-rw-r--r--include/llvm/XRay/InstrumentationMap.h7
-rw-r--r--include/llvm/XRay/Profile.h7
-rw-r--r--include/llvm/XRay/RecordPrinter.h7
-rw-r--r--include/llvm/XRay/Trace.h7
-rw-r--r--include/llvm/XRay/XRayRecord.h7
-rw-r--r--include/llvm/XRay/YAMLXRayRecord.h7
-rw-r--r--include/llvm/module.modulemap5
-rw-r--r--lib/Analysis/AliasAnalysis.cpp136
-rw-r--r--lib/Analysis/AliasAnalysisEvaluator.cpp7
-rw-r--r--lib/Analysis/AliasAnalysisSummary.cpp18
-rw-r--r--lib/Analysis/AliasAnalysisSummary.h22
-rw-r--r--lib/Analysis/AliasSetTracker.cpp131
-rw-r--r--lib/Analysis/Analysis.cpp7
-rw-r--r--lib/Analysis/AssumptionCache.cpp35
-rw-r--r--lib/Analysis/BasicAliasAnalysis.cpp239
-rw-r--r--lib/Analysis/BlockFrequencyInfo.cpp12
-rw-r--r--lib/Analysis/BlockFrequencyInfoImpl.cpp18
-rw-r--r--lib/Analysis/BranchProbabilityInfo.cpp15
-rw-r--r--lib/Analysis/CFG.cpp83
-rw-r--r--lib/Analysis/CFGPrinter.cpp7
-rw-r--r--lib/Analysis/CFLAndersAliasAnalysis.cpp16
-rw-r--r--lib/Analysis/CFLGraph.h68
-rw-r--r--lib/Analysis/CFLSteensAliasAnalysis.cpp7
-rw-r--r--lib/Analysis/CGSCCPassManager.cpp13
-rw-r--r--lib/Analysis/CallGraph.cpp32
-rw-r--r--lib/Analysis/CallGraphSCCPass.cpp94
-rw-r--r--lib/Analysis/CallPrinter.cpp7
-rw-r--r--lib/Analysis/CaptureTracking.cpp39
-rw-r--r--lib/Analysis/CmpInstAnalysis.cpp7
-rw-r--r--lib/Analysis/CodeMetrics.cpp18
-rw-r--r--lib/Analysis/ConstantFolding.cpp1099
-rw-r--r--lib/Analysis/CostModel.cpp7
-rw-r--r--lib/Analysis/Delinearization.cpp7
-rw-r--r--lib/Analysis/DemandedBits.cpp35
-rw-r--r--lib/Analysis/DependenceAnalysis.cpp51
-rw-r--r--lib/Analysis/DivergenceAnalysis.cpp7
-rw-r--r--lib/Analysis/DomPrinter.cpp7
-rw-r--r--lib/Analysis/DomTreeUpdater.cpp (renamed from lib/IR/DomTreeUpdater.cpp)132
-rw-r--r--lib/Analysis/DominanceFrontier.cpp7
-rw-r--r--lib/Analysis/EHPersonalities.cpp7
-rw-r--r--lib/Analysis/GlobalsModRef.cpp39
-rw-r--r--lib/Analysis/GuardUtils.cpp36
-rw-r--r--lib/Analysis/IVDescriptors.cpp33
-rw-r--r--lib/Analysis/IVUsers.cpp7
-rw-r--r--lib/Analysis/IndirectCallPromotionAnalysis.cpp7
-rw-r--r--lib/Analysis/InlineCost.cpp424
-rw-r--r--lib/Analysis/InstCount.cpp7
-rw-r--r--lib/Analysis/InstructionPrecedenceTracking.cpp11
-rw-r--r--lib/Analysis/InstructionSimplify.cpp713
-rw-r--r--lib/Analysis/Interval.cpp7
-rw-r--r--lib/Analysis/IntervalPartition.cpp7
-rw-r--r--lib/Analysis/IteratedDominanceFrontier.cpp110
-rw-r--r--lib/Analysis/LazyBlockFrequencyInfo.cpp7
-rw-r--r--lib/Analysis/LazyBranchProbabilityInfo.cpp7
-rw-r--r--lib/Analysis/LazyCallGraph.cpp20
-rw-r--r--lib/Analysis/LazyValueInfo.cpp192
-rw-r--r--lib/Analysis/LegacyDivergenceAnalysis.cpp7
-rw-r--r--lib/Analysis/Lint.cpp15
-rw-r--r--lib/Analysis/Loads.cpp44
-rw-r--r--lib/Analysis/LoopAccessAnalysis.cpp94
-rw-r--r--lib/Analysis/LoopAnalysisManager.cpp14
-rw-r--r--lib/Analysis/LoopInfo.cpp353
-rw-r--r--lib/Analysis/LoopPass.cpp20
-rw-r--r--lib/Analysis/LoopUnrollAnalyzer.cpp7
-rw-r--r--lib/Analysis/MemDepPrinter.cpp7
-rw-r--r--lib/Analysis/MemDerefPrinter.cpp12
-rw-r--r--lib/Analysis/MemoryBuiltins.cpp137
-rw-r--r--lib/Analysis/MemoryDependenceAnalysis.cpp42
-rw-r--r--lib/Analysis/MemoryLocation.cpp7
-rw-r--r--lib/Analysis/MemorySSA.cpp315
-rw-r--r--lib/Analysis/MemorySSAUpdater.cpp239
-rw-r--r--lib/Analysis/ModuleDebugInfoPrinter.cpp7
-rw-r--r--lib/Analysis/ModuleSummaryAnalysis.cpp276
-rw-r--r--lib/Analysis/MustExecute.cpp16
-rw-r--r--lib/Analysis/ObjCARCAliasAnalysis.cpp32
-rw-r--r--lib/Analysis/ObjCARCAnalysisUtils.cpp7
-rw-r--r--lib/Analysis/ObjCARCInstKind.cpp42
-rw-r--r--lib/Analysis/OptimizationRemarkEmitter.cpp7
-rw-r--r--lib/Analysis/OrderedBasicBlock.cpp31
-rw-r--r--lib/Analysis/OrderedInstructions.cpp7
-rw-r--r--lib/Analysis/PHITransAddr.cpp7
-rw-r--r--lib/Analysis/PhiValues.cpp7
-rw-r--r--lib/Analysis/PostDominators.cpp7
-rw-r--r--lib/Analysis/ProfileSummaryInfo.cpp26
-rw-r--r--lib/Analysis/PtrUseVisitor.cpp15
-rw-r--r--lib/Analysis/RegionInfo.cpp7
-rw-r--r--lib/Analysis/RegionPass.cpp16
-rw-r--r--lib/Analysis/RegionPrinter.cpp7
-rw-r--r--lib/Analysis/ScalarEvolution.cpp794
-rw-r--r--lib/Analysis/ScalarEvolutionAliasAnalysis.cpp14
-rw-r--r--lib/Analysis/ScalarEvolutionExpander.cpp267
-rw-r--r--lib/Analysis/ScalarEvolutionNormalization.cpp7
-rw-r--r--lib/Analysis/ScopedNoAliasAA.cpp28
-rw-r--r--lib/Analysis/StackSafetyAnalysis.cpp11
-rw-r--r--lib/Analysis/StratifiedSets.h7
-rw-r--r--lib/Analysis/SyncDependenceAnalysis.cpp35
-rw-r--r--lib/Analysis/SyntheticCountsUtils.cpp7
-rw-r--r--lib/Analysis/TargetLibraryInfo.cpp431
-rw-r--r--lib/Analysis/TargetTransformInfo.cpp184
-rw-r--r--lib/Analysis/Trace.cpp7
-rw-r--r--lib/Analysis/TypeBasedAliasAnalysis.cpp35
-rw-r--r--lib/Analysis/TypeMetadataUtils.cpp7
-rw-r--r--lib/Analysis/ValueLattice.cpp7
-rw-r--r--lib/Analysis/ValueLatticeUtils.cpp7
-rw-r--r--lib/Analysis/ValueTracking.cpp1204
-rw-r--r--lib/Analysis/VectorUtils.cpp148
-rw-r--r--lib/AsmParser/LLLexer.cpp31
-rw-r--r--lib/AsmParser/LLLexer.h7
-rw-r--r--lib/AsmParser/LLParser.cpp711
-rw-r--r--lib/AsmParser/LLParser.h17
-rw-r--r--lib/AsmParser/LLToken.h20
-rw-r--r--lib/AsmParser/Parser.cpp7
-rw-r--r--lib/BinaryFormat/AMDGPUMetadataVerifier.cpp160
-rw-r--r--lib/BinaryFormat/Dwarf.cpp13
-rw-r--r--lib/BinaryFormat/Magic.cpp21
-rw-r--r--lib/BinaryFormat/Minidump.cpp14
-rw-r--r--lib/BinaryFormat/MsgPackDocument.cpp245
-rw-r--r--lib/BinaryFormat/MsgPackDocumentYAML.cpp249
-rw-r--r--lib/BinaryFormat/MsgPackReader.cpp7
-rw-r--r--lib/BinaryFormat/MsgPackTypes.cpp303
-rw-r--r--lib/BinaryFormat/MsgPackWriter.cpp7
-rw-r--r--lib/BinaryFormat/Wasm.cpp29
-rw-r--r--lib/Bitcode/Reader/BitReader.cpp7
-rw-r--r--lib/Bitcode/Reader/BitcodeAnalyzer.cpp980
-rw-r--r--lib/Bitcode/Reader/BitcodeReader.cpp1261
-rw-r--r--lib/Bitcode/Reader/MetadataLoader.cpp269
-rw-r--r--lib/Bitcode/Reader/MetadataLoader.h7
-rw-r--r--lib/Bitcode/Reader/ValueList.cpp31
-rw-r--r--lib/Bitcode/Reader/ValueList.h44
-rw-r--r--lib/Bitcode/Writer/BitWriter.cpp7
-rw-r--r--lib/Bitcode/Writer/BitcodeWriter.cpp244
-rw-r--r--lib/Bitcode/Writer/BitcodeWriterPass.cpp7
-rw-r--r--lib/Bitcode/Writer/ValueEnumerator.cpp22
-rw-r--r--lib/Bitcode/Writer/ValueEnumerator.h7
-rw-r--r--lib/Bitstream/Reader/BitstreamReader.cpp (renamed from lib/Bitcode/Reader/BitstreamReader.cpp)282
-rw-r--r--lib/CodeGen/AggressiveAntiDepBreaker.cpp7
-rw-r--r--lib/CodeGen/AggressiveAntiDepBreaker.h7
-rw-r--r--lib/CodeGen/AllocationOrder.cpp7
-rw-r--r--lib/CodeGen/AllocationOrder.h7
-rw-r--r--lib/CodeGen/Analysis.cpp52
-rw-r--r--lib/CodeGen/AntiDepBreaker.h7
-rw-r--r--lib/CodeGen/AsmPrinter/ARMException.cpp7
-rw-r--r--lib/CodeGen/AsmPrinter/AccelTable.cpp46
-rw-r--r--lib/CodeGen/AsmPrinter/AddressPool.cpp31
-rw-r--r--lib/CodeGen/AsmPrinter/AddressPool.h9
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinter.cpp279
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp31
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp104
-rw-r--r--lib/CodeGen/AsmPrinter/ByteStreamer.h17
-rw-r--r--lib/CodeGen/AsmPrinter/CodeViewDebug.cpp371
-rw-r--r--lib/CodeGen/AsmPrinter/CodeViewDebug.h24
-rw-r--r--lib/CodeGen/AsmPrinter/DIE.cpp26
-rw-r--r--lib/CodeGen/AsmPrinter/DIEHash.cpp10
-rw-r--r--lib/CodeGen/AsmPrinter/DIEHash.h7
-rw-r--r--lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp354
-rw-r--r--lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp74
-rw-r--r--lib/CodeGen/AsmPrinter/DebugLocEntry.h205
-rw-r--r--lib/CodeGen/AsmPrinter/DebugLocStream.cpp7
-rw-r--r--lib/CodeGen/AsmPrinter/DebugLocStream.h7
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCFIException.cpp7
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp184
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCompileUnit.h29
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.cpp668
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.h92
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfException.h7
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfExpression.cpp120
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfExpression.h86
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfFile.cpp17
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfFile.h10
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfStringPool.cpp7
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfStringPool.h7
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfUnit.cpp197
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfUnit.h38
-rw-r--r--lib/CodeGen/AsmPrinter/EHStreamer.cpp20
-rw-r--r--lib/CodeGen/AsmPrinter/EHStreamer.h7
-rw-r--r--lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp7
-rw-r--r--lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp7
-rw-r--r--lib/CodeGen/AsmPrinter/WasmException.cpp11
-rw-r--r--lib/CodeGen/AsmPrinter/WasmException.h7
-rw-r--r--lib/CodeGen/AsmPrinter/WinCFGuard.cpp7
-rw-r--r--lib/CodeGen/AsmPrinter/WinCFGuard.h7
-rw-r--r--lib/CodeGen/AsmPrinter/WinException.cpp49
-rw-r--r--lib/CodeGen/AsmPrinter/WinException.h10
-rw-r--r--lib/CodeGen/AtomicExpandPass.cpp70
-rw-r--r--lib/CodeGen/BasicTargetTransformInfo.cpp7
-rw-r--r--lib/CodeGen/BranchFolding.cpp72
-rw-r--r--lib/CodeGen/BranchFolding.h7
-rw-r--r--lib/CodeGen/BranchRelaxation.cpp7
-rw-r--r--lib/CodeGen/BreakFalseDeps.cpp7
-rw-r--r--lib/CodeGen/BuiltinGCs.cpp7
-rw-r--r--lib/CodeGen/CFIInstrInserter.cpp7
-rw-r--r--lib/CodeGen/CalcSpillWeights.cpp7
-rw-r--r--lib/CodeGen/CallingConvLower.cpp7
-rw-r--r--lib/CodeGen/CodeGen.cpp10
-rw-r--r--lib/CodeGen/CodeGenPrepare.cpp523
-rw-r--r--lib/CodeGen/CriticalAntiDepBreaker.cpp7
-rw-r--r--lib/CodeGen/CriticalAntiDepBreaker.h7
-rw-r--r--lib/CodeGen/DFAPacketizer.cpp7
-rw-r--r--lib/CodeGen/DeadMachineInstructionElim.cpp15
-rw-r--r--lib/CodeGen/DetectDeadLanes.cpp7
-rw-r--r--lib/CodeGen/DwarfEHPrepare.cpp11
-rw-r--r--lib/CodeGen/EarlyIfConversion.cpp7
-rw-r--r--lib/CodeGen/EdgeBundles.cpp9
-rw-r--r--lib/CodeGen/ExecutionDomainFix.cpp16
-rw-r--r--lib/CodeGen/ExpandMemCmp.cpp68
-rw-r--r--lib/CodeGen/ExpandPostRAPseudos.cpp7
-rw-r--r--lib/CodeGen/ExpandReductions.cpp59
-rw-r--r--lib/CodeGen/FEntryInserter.cpp7
-rw-r--r--lib/CodeGen/FaultMaps.cpp7
-rw-r--r--lib/CodeGen/FinalizeISel.cpp (renamed from lib/CodeGen/ExpandISelPseudos.cpp)36
-rw-r--r--lib/CodeGen/FuncletLayout.cpp7
-rw-r--r--lib/CodeGen/GCMetadata.cpp7
-rw-r--r--lib/CodeGen/GCMetadataPrinter.cpp7
-rw-r--r--lib/CodeGen/GCRootLowering.cpp9
-rw-r--r--lib/CodeGen/GCStrategy.cpp7
-rw-r--r--lib/CodeGen/GlobalISel/CSEInfo.cpp47
-rw-r--r--lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp21
-rw-r--r--lib/CodeGen/GlobalISel/CallLowering.cpp154
-rw-r--r--lib/CodeGen/GlobalISel/Combiner.cpp12
-rw-r--r--lib/CodeGen/GlobalISel/CombinerHelper.cpp220
-rw-r--r--lib/CodeGen/GlobalISel/GISelChangeObserver.cpp8
-rw-r--r--lib/CodeGen/GlobalISel/GlobalISel.cpp7
-rw-r--r--lib/CodeGen/GlobalISel/IRTranslator.cpp1284
-rw-r--r--lib/CodeGen/GlobalISel/InstructionSelect.cpp19
-rw-r--r--lib/CodeGen/GlobalISel/InstructionSelector.cpp19
-rw-r--r--lib/CodeGen/GlobalISel/LegalityPredicates.cpp86
-rw-r--r--lib/CodeGen/GlobalISel/LegalizeMutations.cpp54
-rw-r--r--lib/CodeGen/GlobalISel/Legalizer.cpp54
-rw-r--r--lib/CodeGen/GlobalISel/LegalizerHelper.cpp2884
-rw-r--r--lib/CodeGen/GlobalISel/LegalizerInfo.cpp186
-rw-r--r--lib/CodeGen/GlobalISel/Localizer.cpp233
-rw-r--r--lib/CodeGen/GlobalISel/MachineIRBuilder.cpp429
-rw-r--r--lib/CodeGen/GlobalISel/RegBankSelect.cpp139
-rw-r--r--lib/CodeGen/GlobalISel/RegisterBank.cpp7
-rw-r--r--lib/CodeGen/GlobalISel/RegisterBankInfo.cpp115
-rw-r--r--lib/CodeGen/GlobalISel/Utils.cpp159
-rw-r--r--lib/CodeGen/GlobalMerge.cpp29
-rw-r--r--lib/CodeGen/HardwareLoops.cpp463
-rw-r--r--lib/CodeGen/IfConversion.cpp9
-rw-r--r--lib/CodeGen/ImplicitNullChecks.cpp25
-rw-r--r--lib/CodeGen/IndirectBrExpandPass.cpp15
-rw-r--r--lib/CodeGen/InlineSpiller.cpp52
-rw-r--r--lib/CodeGen/InterferenceCache.cpp7
-rw-r--r--lib/CodeGen/InterferenceCache.h7
-rw-r--r--lib/CodeGen/InterleavedAccessPass.cpp19
-rw-r--r--lib/CodeGen/InterleavedLoadCombinePass.cpp10
-rw-r--r--lib/CodeGen/IntrinsicLowering.cpp115
-rw-r--r--lib/CodeGen/LLVMTargetMachine.cpp16
-rw-r--r--lib/CodeGen/LatencyPriorityQueue.cpp7
-rw-r--r--lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp7
-rw-r--r--lib/CodeGen/LexicalScopes.cpp7
-rw-r--r--lib/CodeGen/LiveDebugValues.cpp720
-rw-r--r--lib/CodeGen/LiveDebugVariables.cpp181
-rw-r--r--lib/CodeGen/LiveDebugVariables.h7
-rw-r--r--lib/CodeGen/LiveInterval.cpp64
-rw-r--r--lib/CodeGen/LiveIntervalUnion.cpp7
-rw-r--r--lib/CodeGen/LiveIntervals.cpp13
-rw-r--r--lib/CodeGen/LivePhysRegs.cpp7
-rw-r--r--lib/CodeGen/LiveRangeCalc.cpp16
-rw-r--r--lib/CodeGen/LiveRangeCalc.h7
-rw-r--r--lib/CodeGen/LiveRangeEdit.cpp9
-rw-r--r--lib/CodeGen/LiveRangeShrink.cpp7
-rw-r--r--lib/CodeGen/LiveRangeUtils.h7
-rw-r--r--lib/CodeGen/LiveRegMatrix.cpp7
-rw-r--r--lib/CodeGen/LiveRegUnits.cpp23
-rw-r--r--lib/CodeGen/LiveStacks.cpp7
-rw-r--r--lib/CodeGen/LiveVariables.cpp9
-rw-r--r--lib/CodeGen/LocalStackSlotAllocation.cpp23
-rw-r--r--lib/CodeGen/LoopTraversal.cpp7
-rw-r--r--lib/CodeGen/LowLevelType.cpp7
-rw-r--r--lib/CodeGen/LowerEmuTLS.cpp7
-rw-r--r--lib/CodeGen/MIRCanonicalizerPass.cpp65
-rw-r--r--lib/CodeGen/MIRParser/MILexer.cpp8
-rw-r--r--lib/CodeGen/MIRParser/MILexer.h8
-rw-r--r--lib/CodeGen/MIRParser/MIParser.cpp574
-rw-r--r--lib/CodeGen/MIRParser/MIParser.h125
-rw-r--r--lib/CodeGen/MIRParser/MIRParser.cpp184
-rw-r--r--lib/CodeGen/MIRPrinter.cpp67
-rw-r--r--lib/CodeGen/MIRPrintingPass.cpp7
-rw-r--r--lib/CodeGen/MachineBasicBlock.cpp17
-rw-r--r--lib/CodeGen/MachineBlockFrequencyInfo.cpp7
-rw-r--r--lib/CodeGen/MachineBlockPlacement.cpp398
-rw-r--r--lib/CodeGen/MachineBranchProbabilityInfo.cpp7
-rw-r--r--lib/CodeGen/MachineCSE.cpp181
-rw-r--r--lib/CodeGen/MachineCombiner.cpp26
-rw-r--r--lib/CodeGen/MachineCopyPropagation.cpp7
-rw-r--r--lib/CodeGen/MachineDominanceFrontier.cpp7
-rw-r--r--lib/CodeGen/MachineDominators.cpp7
-rw-r--r--lib/CodeGen/MachineFrameInfo.cpp18
-rw-r--r--lib/CodeGen/MachineFunction.cpp87
-rw-r--r--lib/CodeGen/MachineFunctionPass.cpp7
-rw-r--r--lib/CodeGen/MachineFunctionPrinterPass.cpp7
-rw-r--r--lib/CodeGen/MachineInstr.cpp128
-rw-r--r--lib/CodeGen/MachineInstrBundle.cpp7
-rw-r--r--lib/CodeGen/MachineLICM.cpp7
-rw-r--r--lib/CodeGen/MachineLoopInfo.cpp7
-rw-r--r--lib/CodeGen/MachineModuleInfo.cpp30
-rw-r--r--lib/CodeGen/MachineModuleInfoImpls.cpp7
-rw-r--r--lib/CodeGen/MachineOperand.cpp29
-rw-r--r--lib/CodeGen/MachineOptimizationRemarkEmitter.cpp7
-rw-r--r--lib/CodeGen/MachineOutliner.cpp42
-rw-r--r--lib/CodeGen/MachinePipeliner.cpp534
-rw-r--r--lib/CodeGen/MachinePostDominators.cpp7
-rw-r--r--lib/CodeGen/MachineRegionInfo.cpp7
-rw-r--r--lib/CodeGen/MachineRegisterInfo.cpp20
-rw-r--r--lib/CodeGen/MachineSSAUpdater.cpp7
-rw-r--r--lib/CodeGen/MachineScheduler.cpp144
-rw-r--r--lib/CodeGen/MachineSink.cpp17
-rw-r--r--lib/CodeGen/MachineTraceMetrics.cpp7
-rw-r--r--lib/CodeGen/MachineVerifier.cpp510
-rw-r--r--lib/CodeGen/MacroFusion.cpp19
-rw-r--r--lib/CodeGen/OptimizePHIs.cpp14
-rw-r--r--lib/CodeGen/PHIElimination.cpp7
-rw-r--r--lib/CodeGen/PHIEliminationUtils.cpp7
-rw-r--r--lib/CodeGen/PHIEliminationUtils.h7
-rw-r--r--lib/CodeGen/ParallelCG.cpp7
-rw-r--r--lib/CodeGen/PatchableFunction.cpp7
-rw-r--r--lib/CodeGen/PeepholeOptimizer.cpp24
-rw-r--r--lib/CodeGen/PostRAHazardRecognizer.cpp7
-rw-r--r--lib/CodeGen/PostRASchedulerList.cpp7
-rw-r--r--lib/CodeGen/PreISelIntrinsicLowering.cpp13
-rw-r--r--lib/CodeGen/ProcessImplicitDefs.cpp7
-rw-r--r--lib/CodeGen/PrologEpilogInserter.cpp198
-rw-r--r--lib/CodeGen/PseudoSourceValue.cpp7
-rw-r--r--lib/CodeGen/ReachingDefAnalysis.cpp7
-rw-r--r--lib/CodeGen/RegAllocBase.cpp23
-rw-r--r--lib/CodeGen/RegAllocBase.h7
-rw-r--r--lib/CodeGen/RegAllocBasic.cpp7
-rw-r--r--lib/CodeGen/RegAllocFast.cpp240
-rw-r--r--lib/CodeGen/RegAllocGreedy.cpp65
-rw-r--r--lib/CodeGen/RegAllocPBQP.cpp7
-rw-r--r--lib/CodeGen/RegUsageInfoCollector.cpp90
-rw-r--r--lib/CodeGen/RegUsageInfoPropagate.cpp7
-rw-r--r--lib/CodeGen/RegisterClassInfo.cpp11
-rw-r--r--lib/CodeGen/RegisterCoalescer.cpp140
-rw-r--r--lib/CodeGen/RegisterCoalescer.h7
-rw-r--r--lib/CodeGen/RegisterPressure.cpp15
-rw-r--r--lib/CodeGen/RegisterScavenging.cpp45
-rw-r--r--lib/CodeGen/RegisterUsageInfo.cpp7
-rw-r--r--lib/CodeGen/RenameIndependentSubregs.cpp7
-rw-r--r--lib/CodeGen/ResetMachineFunctionPass.cpp9
-rw-r--r--lib/CodeGen/SafeStack.cpp60
-rw-r--r--lib/CodeGen/SafeStackColoring.cpp7
-rw-r--r--lib/CodeGen/SafeStackColoring.h7
-rw-r--r--lib/CodeGen/SafeStackLayout.cpp7
-rw-r--r--lib/CodeGen/SafeStackLayout.h7
-rw-r--r--lib/CodeGen/ScalarizeMaskedMemIntrin.cpp306
-rw-r--r--lib/CodeGen/ScheduleDAG.cpp47
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.cpp66
-rw-r--r--lib/CodeGen/ScheduleDAGPrinter.cpp7
-rw-r--r--lib/CodeGen/ScoreboardHazardRecognizer.cpp7
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp3271
-rw-r--r--lib/CodeGen/SelectionDAG/FastISel.cpp81
-rw-r--r--lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp83
-rw-r--r--lib/CodeGen/SelectionDAG/InstrEmitter.cpp92
-rw-r--r--lib/CodeGen/SelectionDAG/InstrEmitter.h14
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp472
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp168
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp447
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.cpp8
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.h50
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp7
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp181
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp646
-rw-r--r--lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp11
-rw-r--r--lib/CodeGen/SelectionDAG/SDNodeDbgValue.h10
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp10
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp94
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp107
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h7
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp7
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp1429
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp139
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp2305
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h383
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp152
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp446
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp7
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGTargetInfo.cpp7
-rw-r--r--lib/CodeGen/SelectionDAG/StatepointLowering.cpp109
-rw-r--r--lib/CodeGen/SelectionDAG/StatepointLowering.h14
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp1723
-rw-r--r--lib/CodeGen/ShadowStackGCLowering.cpp13
-rw-r--r--lib/CodeGen/ShrinkWrap.cpp16
-rw-r--r--lib/CodeGen/SjLjEHPrepare.cpp31
-rw-r--r--lib/CodeGen/SlotIndexes.cpp24
-rw-r--r--lib/CodeGen/SpillPlacement.cpp7
-rw-r--r--lib/CodeGen/SpillPlacement.h7
-rw-r--r--lib/CodeGen/Spiller.h7
-rw-r--r--lib/CodeGen/SplitKit.cpp16
-rw-r--r--lib/CodeGen/SplitKit.h7
-rw-r--r--lib/CodeGen/StackColoring.cpp16
-rw-r--r--lib/CodeGen/StackMapLivenessAnalysis.cpp7
-rw-r--r--lib/CodeGen/StackMaps.cpp7
-rw-r--r--lib/CodeGen/StackProtector.cpp70
-rw-r--r--lib/CodeGen/StackSlotColoring.cpp11
-rw-r--r--lib/CodeGen/SwiftErrorValueTracking.cpp312
-rw-r--r--lib/CodeGen/SwitchLoweringUtils.cpp489
-rw-r--r--lib/CodeGen/TailDuplication.cpp7
-rw-r--r--lib/CodeGen/TailDuplicator.cpp16
-rw-r--r--lib/CodeGen/TargetFrameLoweringImpl.cpp7
-rw-r--r--lib/CodeGen/TargetInstrInfo.cpp41
-rw-r--r--lib/CodeGen/TargetLoweringBase.cpp137
-rw-r--r--lib/CodeGen/TargetLoweringObjectFileImpl.cpp46
-rw-r--r--lib/CodeGen/TargetOptionsImpl.cpp7
-rw-r--r--lib/CodeGen/TargetPassConfig.cpp106
-rw-r--r--lib/CodeGen/TargetRegisterInfo.cpp13
-rw-r--r--lib/CodeGen/TargetSchedule.cpp7
-rw-r--r--lib/CodeGen/TargetSubtargetInfo.cpp69
-rw-r--r--lib/CodeGen/TwoAddressInstructionPass.cpp12
-rw-r--r--lib/CodeGen/UnreachableBlockElim.cpp43
-rw-r--r--lib/CodeGen/ValueTypes.cpp43
-rw-r--r--lib/CodeGen/VirtRegMap.cpp9
-rw-r--r--lib/CodeGen/WasmEHPrepare.cpp180
-rw-r--r--lib/CodeGen/WinEHPrepare.cpp20
-rw-r--r--lib/CodeGen/XRayInstrumentation.cpp9
-rw-r--r--lib/DebugInfo/CodeView/AppendingTypeTableBuilder.cpp16
-rw-r--r--lib/DebugInfo/CodeView/CVSymbolVisitor.cpp11
-rw-r--r--lib/DebugInfo/CodeView/CVTypeVisitor.cpp26
-rw-r--r--lib/DebugInfo/CodeView/CodeViewError.cpp9
-rw-r--r--lib/DebugInfo/CodeView/CodeViewRecordIO.cpp173
-rw-r--r--lib/DebugInfo/CodeView/ContinuationRecordBuilder.cpp20
-rw-r--r--lib/DebugInfo/CodeView/DebugChecksumsSubsection.cpp7
-rw-r--r--lib/DebugInfo/CodeView/DebugCrossExSubsection.cpp7
-rw-r--r--lib/DebugInfo/CodeView/DebugCrossImpSubsection.cpp7
-rw-r--r--lib/DebugInfo/CodeView/DebugFrameDataSubsection.cpp7
-rw-r--r--lib/DebugInfo/CodeView/DebugInlineeLinesSubsection.cpp7
-rw-r--r--lib/DebugInfo/CodeView/DebugLinesSubsection.cpp7
-rw-r--r--lib/DebugInfo/CodeView/DebugStringTableSubsection.cpp7
-rw-r--r--lib/DebugInfo/CodeView/DebugSubsection.cpp7
-rw-r--r--lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp7
-rw-r--r--lib/DebugInfo/CodeView/DebugSubsectionVisitor.cpp7
-rw-r--r--lib/DebugInfo/CodeView/DebugSymbolRVASubsection.cpp7
-rw-r--r--lib/DebugInfo/CodeView/DebugSymbolsSubsection.cpp9
-rw-r--r--lib/DebugInfo/CodeView/EnumTables.cpp28
-rw-r--r--lib/DebugInfo/CodeView/Formatters.cpp7
-rw-r--r--lib/DebugInfo/CodeView/GlobalTypeTableBuilder.cpp16
-rw-r--r--lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp7
-rw-r--r--lib/DebugInfo/CodeView/Line.cpp7
-rw-r--r--lib/DebugInfo/CodeView/MergingTypeTableBuilder.cpp13
-rw-r--r--lib/DebugInfo/CodeView/RecordName.cpp7
-rw-r--r--lib/DebugInfo/CodeView/RecordSerialization.cpp7
-rw-r--r--lib/DebugInfo/CodeView/SimpleTypeSerializer.cpp18
-rw-r--r--lib/DebugInfo/CodeView/StringsAndChecksums.cpp7
-rw-r--r--lib/DebugInfo/CodeView/SymbolDumper.cpp42
-rw-r--r--lib/DebugInfo/CodeView/SymbolRecordHelpers.cpp7
-rw-r--r--lib/DebugInfo/CodeView/SymbolRecordMapping.cpp19
-rw-r--r--lib/DebugInfo/CodeView/SymbolSerializer.cpp7
-rw-r--r--lib/DebugInfo/CodeView/TypeDumpVisitor.cpp11
-rw-r--r--lib/DebugInfo/CodeView/TypeHashing.cpp15
-rw-r--r--lib/DebugInfo/CodeView/TypeIndex.cpp7
-rw-r--r--lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp13
-rw-r--r--lib/DebugInfo/CodeView/TypeRecordHelpers.cpp7
-rw-r--r--lib/DebugInfo/CodeView/TypeRecordMapping.cpp266
-rw-r--r--lib/DebugInfo/CodeView/TypeStreamMerger.cpp7
-rw-r--r--lib/DebugInfo/CodeView/TypeTableCollection.cpp13
-rw-r--r--lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp15
-rw-r--r--lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp21
-rw-r--r--lib/DebugInfo/DWARF/DWARFAddressRange.cpp7
-rw-r--r--lib/DebugInfo/DWARF/DWARFCompileUnit.cpp7
-rw-r--r--lib/DebugInfo/DWARF/DWARFContext.cpp250
-rw-r--r--lib/DebugInfo/DWARF/DWARFDataExtractor.cpp23
-rw-r--r--lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp9
-rw-r--r--lib/DebugInfo/DWARF/DWARFDebugAddr.cpp34
-rw-r--r--lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp7
-rw-r--r--lib/DebugInfo/DWARF/DWARFDebugAranges.cpp26
-rw-r--r--lib/DebugInfo/DWARF/DWARFDebugFrame.cpp20
-rw-r--r--lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp7
-rw-r--r--lib/DebugInfo/DWARF/DWARFDebugLine.cpp266
-rw-r--r--lib/DebugInfo/DWARF/DWARFDebugLoc.cpp34
-rw-r--r--lib/DebugInfo/DWARF/DWARFDebugMacro.cpp7
-rw-r--r--lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp7
-rw-r--r--lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp9
-rw-r--r--lib/DebugInfo/DWARF/DWARFDebugRnglists.cpp17
-rw-r--r--lib/DebugInfo/DWARF/DWARFDie.cpp134
-rw-r--r--lib/DebugInfo/DWARF/DWARFExpression.cpp93
-rw-r--r--lib/DebugInfo/DWARF/DWARFFormValue.cpp84
-rw-r--r--lib/DebugInfo/DWARF/DWARFGdbIndex.cpp13
-rw-r--r--lib/DebugInfo/DWARF/DWARFListTable.cpp11
-rw-r--r--lib/DebugInfo/DWARF/DWARFTypeUnit.cpp7
-rw-r--r--lib/DebugInfo/DWARF/DWARFUnit.cpp147
-rw-r--r--lib/DebugInfo/DWARF/DWARFUnitIndex.cpp14
-rw-r--r--lib/DebugInfo/DWARF/DWARFVerifier.cpp125
-rw-r--r--lib/DebugInfo/GSYM/FunctionInfo.cpp22
-rw-r--r--lib/DebugInfo/GSYM/InlineInfo.cpp59
-rw-r--r--lib/DebugInfo/GSYM/Range.cpp55
-rw-r--r--lib/DebugInfo/MSF/MSFBuilder.cpp7
-rw-r--r--lib/DebugInfo/MSF/MSFCommon.cpp7
-rw-r--r--lib/DebugInfo/MSF/MSFError.cpp9
-rw-r--r--lib/DebugInfo/MSF/MappedBlockStream.cpp7
-rw-r--r--lib/DebugInfo/PDB/DIA/DIADataStream.cpp7
-rw-r--r--lib/DebugInfo/PDB/DIA/DIAEnumDebugStreams.cpp7
-rw-r--r--lib/DebugInfo/PDB/DIA/DIAEnumFrameData.cpp7
-rw-r--r--lib/DebugInfo/PDB/DIA/DIAEnumInjectedSources.cpp7
-rw-r--r--lib/DebugInfo/PDB/DIA/DIAEnumLineNumbers.cpp7
-rw-r--r--lib/DebugInfo/PDB/DIA/DIAEnumSectionContribs.cpp7
-rw-r--r--lib/DebugInfo/PDB/DIA/DIAEnumSourceFiles.cpp7
-rw-r--r--lib/DebugInfo/PDB/DIA/DIAEnumSymbols.cpp7
-rw-r--r--lib/DebugInfo/PDB/DIA/DIAEnumTables.cpp7
-rw-r--r--lib/DebugInfo/PDB/DIA/DIAFrameData.cpp7
-rw-r--r--lib/DebugInfo/PDB/DIA/DIAInjectedSource.cpp11
-rw-r--r--lib/DebugInfo/PDB/DIA/DIALineNumber.cpp7
-rw-r--r--lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp7
-rw-r--r--lib/DebugInfo/PDB/DIA/DIASectionContrib.cpp7
-rw-r--r--lib/DebugInfo/PDB/DIA/DIASession.cpp7
-rw-r--r--lib/DebugInfo/PDB/DIA/DIASourceFile.cpp7
-rw-r--r--lib/DebugInfo/PDB/DIA/DIATable.cpp7
-rw-r--r--lib/DebugInfo/PDB/GenericError.cpp9
-rw-r--r--lib/DebugInfo/PDB/IPDBSourceFile.cpp7
-rw-r--r--lib/DebugInfo/PDB/Native/DbiModuleDescriptor.cpp7
-rw-r--r--lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp13
-rw-r--r--lib/DebugInfo/PDB/Native/DbiModuleList.cpp7
-rw-r--r--lib/DebugInfo/PDB/Native/DbiStream.cpp108
-rw-r--r--lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp7
-rw-r--r--lib/DebugInfo/PDB/Native/EnumTables.cpp7
-rw-r--r--lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp22
-rw-r--r--lib/DebugInfo/PDB/Native/GlobalsStream.cpp7
-rw-r--r--lib/DebugInfo/PDB/Native/Hash.cpp7
-rw-r--r--lib/DebugInfo/PDB/Native/HashTable.cpp7
-rw-r--r--lib/DebugInfo/PDB/Native/InfoStream.cpp7
-rw-r--r--lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp7
-rw-r--r--lib/DebugInfo/PDB/Native/InjectedSourceStream.cpp65
-rw-r--r--lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp23
-rw-r--r--lib/DebugInfo/PDB/Native/NamedStreamMap.cpp15
-rw-r--r--lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp7
-rw-r--r--lib/DebugInfo/PDB/Native/NativeEnumGlobals.cpp7
-rw-r--r--lib/DebugInfo/PDB/Native/NativeEnumInjectedSources.cpp120
-rw-r--r--lib/DebugInfo/PDB/Native/NativeEnumModules.cpp7
-rw-r--r--lib/DebugInfo/PDB/Native/NativeEnumTypes.cpp7
-rw-r--r--lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp7
-rw-r--r--lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp7
-rw-r--r--lib/DebugInfo/PDB/Native/NativeSession.cpp20
-rw-r--r--lib/DebugInfo/PDB/Native/NativeSymbolEnumerator.cpp7
-rw-r--r--lib/DebugInfo/PDB/Native/NativeTypeArray.cpp7
-rw-r--r--lib/DebugInfo/PDB/Native/NativeTypeBuiltin.cpp7
-rw-r--r--lib/DebugInfo/PDB/Native/NativeTypeEnum.cpp7
-rw-r--r--lib/DebugInfo/PDB/Native/NativeTypeFunctionSig.cpp7
-rw-r--r--lib/DebugInfo/PDB/Native/NativeTypePointer.cpp7
-rw-r--r--lib/DebugInfo/PDB/Native/NativeTypeUDT.cpp7
-rw-r--r--lib/DebugInfo/PDB/Native/PDBFile.cpp95
-rw-r--r--lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp12
-rw-r--r--lib/DebugInfo/PDB/Native/PDBStringTable.cpp7
-rw-r--r--lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp139
-rw-r--r--lib/DebugInfo/PDB/Native/PublicsStream.cpp7
-rw-r--r--lib/DebugInfo/PDB/Native/RawError.cpp2
-rw-r--r--lib/DebugInfo/PDB/Native/SymbolStream.cpp7
-rw-r--r--lib/DebugInfo/PDB/Native/TpiHashing.cpp7
-rw-r--r--lib/DebugInfo/PDB/Native/TpiStream.cpp20
-rw-r--r--lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp16
-rw-r--r--lib/DebugInfo/PDB/PDB.cpp7
-rw-r--r--lib/DebugInfo/PDB/PDBContext.cpp31
-rw-r--r--lib/DebugInfo/PDB/PDBExtras.cpp47
-rw-r--r--lib/DebugInfo/PDB/PDBInterfaceAnchors.cpp7
-rw-r--r--lib/DebugInfo/PDB/PDBSymDumper.cpp7
-rw-r--r--lib/DebugInfo/PDB/PDBSymbol.cpp7
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolAnnotation.cpp7
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolBlock.cpp7
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolCompiland.cpp17
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolCompilandDetails.cpp7
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolCompilandEnv.cpp7
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolCustom.cpp7
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolData.cpp7
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolExe.cpp7
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolFunc.cpp7
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolFuncDebugEnd.cpp7
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolFuncDebugStart.cpp7
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolLabel.cpp7
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolPublicSymbol.cpp7
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolThunk.cpp7
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolTypeArray.cpp7
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolTypeBaseClass.cpp7
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolTypeBuiltin.cpp7
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolTypeCustom.cpp7
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolTypeDimension.cpp7
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolTypeEnum.cpp7
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolTypeFriend.cpp7
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolTypeFunctionArg.cpp7
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp7
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolTypeManaged.cpp7
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolTypePointer.cpp7
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolTypeTypedef.cpp7
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolTypeUDT.cpp7
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolTypeVTable.cpp7
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolTypeVTableShape.cpp7
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolUnknown.cpp7
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolUsingNamespace.cpp7
-rw-r--r--lib/DebugInfo/PDB/UDTLayout.cpp7
-rw-r--r--lib/DebugInfo/Symbolize/DIPrinter.cpp38
-rw-r--r--lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp124
-rw-r--r--lib/DebugInfo/Symbolize/SymbolizableObjectFile.h33
-rw-r--r--lib/DebugInfo/Symbolize/Symbolize.cpp223
-rw-r--r--lib/Demangle/Demangle.cpp36
-rw-r--r--lib/Demangle/ItaniumDemangle.cpp7
-rw-r--r--lib/Demangle/MicrosoftDemangle.cpp397
-rw-r--r--lib/Demangle/MicrosoftDemangleNodes.cpp35
-rw-r--r--lib/ExecutionEngine/ExecutionEngine.cpp60
-rw-r--r--lib/ExecutionEngine/ExecutionEngineBindings.cpp7
-rw-r--r--lib/ExecutionEngine/GDBRegistrationListener.cpp7
-rw-r--r--lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp21
-rw-r--r--lib/ExecutionEngine/IntelJITEvents/IntelJITEventsWrapper.h7
-rw-r--r--lib/ExecutionEngine/IntelJITEvents/ittnotify_config.h7
-rw-r--r--lib/ExecutionEngine/IntelJITEvents/ittnotify_types.h7
-rw-r--r--lib/ExecutionEngine/IntelJITEvents/jitprofiling.c7
-rw-r--r--lib/ExecutionEngine/IntelJITEvents/jitprofiling.h7
-rw-r--r--lib/ExecutionEngine/Interpreter/Execution.cpp63
-rw-r--r--lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp7
-rw-r--r--lib/ExecutionEngine/Interpreter/Interpreter.cpp7
-rw-r--r--lib/ExecutionEngine/Interpreter/Interpreter.h8
-rw-r--r--lib/ExecutionEngine/JITLink/BasicGOTAndStubsBuilder.h82
-rw-r--r--lib/ExecutionEngine/JITLink/EHFrameSupport.cpp544
-rw-r--r--lib/ExecutionEngine/JITLink/EHFrameSupportImpl.h72
-rw-r--r--lib/ExecutionEngine/JITLink/JITLink.cpp172
-rw-r--r--lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp481
-rw-r--r--lib/ExecutionEngine/JITLink/JITLinkGeneric.h256
-rw-r--r--lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp105
-rw-r--r--lib/ExecutionEngine/JITLink/MachO.cpp78
-rw-r--r--lib/ExecutionEngine/JITLink/MachOAtomGraphBuilder.cpp411
-rw-r--r--lib/ExecutionEngine/JITLink/MachOAtomGraphBuilder.h138
-rw-r--r--lib/ExecutionEngine/JITLink/MachO_x86_64.cpp608
-rw-r--r--lib/ExecutionEngine/MCJIT/MCJIT.cpp7
-rw-r--r--lib/ExecutionEngine/MCJIT/MCJIT.h7
-rw-r--r--lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp7
-rw-r--r--lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp7
-rw-r--r--lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp7
-rw-r--r--lib/ExecutionEngine/Orc/CompileUtils.cpp86
-rw-r--r--lib/ExecutionEngine/Orc/Core.cpp1022
-rw-r--r--lib/ExecutionEngine/Orc/ExecutionUtils.cpp32
-rw-r--r--lib/ExecutionEngine/Orc/IRCompileLayer.cpp7
-rw-r--r--lib/ExecutionEngine/Orc/IRTransformLayer.cpp7
-rw-r--r--lib/ExecutionEngine/Orc/IndirectionUtils.cpp15
-rw-r--r--lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp7
-rw-r--r--lib/ExecutionEngine/Orc/LLJIT.cpp262
-rw-r--r--lib/ExecutionEngine/Orc/Layer.cpp17
-rw-r--r--lib/ExecutionEngine/Orc/LazyReexports.cpp20
-rw-r--r--lib/ExecutionEngine/Orc/Legacy.cpp10
-rw-r--r--lib/ExecutionEngine/Orc/NullResolver.cpp7
-rw-r--r--lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp483
-rw-r--r--lib/ExecutionEngine/Orc/ObjectTransformLayer.cpp7
-rw-r--r--lib/ExecutionEngine/Orc/OrcABISupport.cpp17
-rw-r--r--lib/ExecutionEngine/Orc/OrcCBindings.cpp7
-rw-r--r--lib/ExecutionEngine/Orc/OrcCBindingsStack.h64
-rw-r--r--lib/ExecutionEngine/Orc/OrcError.cpp7
-rw-r--r--lib/ExecutionEngine/Orc/OrcMCJITReplacement.cpp10
-rw-r--r--lib/ExecutionEngine/Orc/OrcMCJITReplacement.h32
-rw-r--r--lib/ExecutionEngine/Orc/RPCUtils.cpp7
-rw-r--r--lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp66
-rw-r--r--lib/ExecutionEngine/Orc/ThreadSafeModule.cpp7
-rw-r--r--lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp27
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/JITSymbol.cpp7
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp16
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp147
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp7
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.h7
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp354
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCheckerImpl.h59
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp10
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h9
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h55
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp7
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h7
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFI386.h15
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFThumb.h28
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h23
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFMips.cpp7
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFMips.h7
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h9
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h11
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h9
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h11
-rw-r--r--lib/ExecutionEngine/SectionMemoryManager.cpp34
-rw-r--r--lib/ExecutionEngine/TargetSelect.cpp7
-rw-r--r--lib/FuzzMutate/FuzzerCLI.cpp7
-rw-r--r--lib/FuzzMutate/IRMutator.cpp7
-rw-r--r--lib/FuzzMutate/OpDescriptor.cpp7
-rw-r--r--lib/FuzzMutate/Operations.cpp7
-rw-r--r--lib/FuzzMutate/RandomIRBuilder.cpp10
-rw-r--r--lib/IR/AbstractCallSite.cpp134
-rw-r--r--lib/IR/AsmWriter.cpp150
-rw-r--r--lib/IR/AttributeImpl.h51
-rw-r--r--lib/IR/Attributes.cpp150
-rw-r--r--lib/IR/AutoUpgrade.cpp406
-rw-r--r--lib/IR/BasicBlock.cpp63
-rw-r--r--lib/IR/Comdat.cpp7
-rw-r--r--lib/IR/ConstantFold.cpp134
-rw-r--r--lib/IR/ConstantFold.h8
-rw-r--r--lib/IR/ConstantRange.cpp892
-rw-r--r--lib/IR/Constants.cpp61
-rw-r--r--lib/IR/ConstantsContext.h7
-rw-r--r--lib/IR/Core.cpp158
-rw-r--r--lib/IR/DIBuilder.cpp18
-rw-r--r--lib/IR/DataLayout.cpp36
-rw-r--r--lib/IR/DebugInfo.cpp107
-rw-r--r--lib/IR/DebugInfoMetadata.cpp121
-rw-r--r--lib/IR/DebugLoc.cpp7
-rw-r--r--lib/IR/DiagnosticHandler.cpp7
-rw-r--r--lib/IR/DiagnosticInfo.cpp87
-rw-r--r--lib/IR/DiagnosticPrinter.cpp7
-rw-r--r--lib/IR/Dominators.cpp7
-rw-r--r--lib/IR/Function.cpp218
-rw-r--r--lib/IR/GVMaterializer.cpp7
-rw-r--r--lib/IR/Globals.cpp35
-rw-r--r--lib/IR/IRBuilder.cpp55
-rw-r--r--lib/IR/IRPrintingPasses.cpp7
-rw-r--r--lib/IR/InlineAsm.cpp7
-rw-r--r--lib/IR/Instruction.cpp76
-rw-r--r--lib/IR/Instructions.cpp341
-rw-r--r--lib/IR/IntrinsicInst.cpp116
-rw-r--r--lib/IR/LLVMContext.cpp35
-rw-r--r--lib/IR/LLVMContextImpl.cpp7
-rw-r--r--lib/IR/LLVMContextImpl.h48
-rw-r--r--lib/IR/LegacyPassManager.cpp26
-rw-r--r--lib/IR/MDBuilder.cpp57
-rw-r--r--lib/IR/Mangler.cpp13
-rw-r--r--lib/IR/Metadata.cpp12
-rw-r--r--lib/IR/MetadataImpl.h7
-rw-r--r--lib/IR/Module.cpp32
-rw-r--r--lib/IR/ModuleSummaryIndex.cpp192
-rw-r--r--lib/IR/Operator.cpp7
-rw-r--r--lib/IR/OptBisect.cpp83
-rw-r--r--lib/IR/Pass.cpp33
-rw-r--r--lib/IR/PassInstrumentation.cpp7
-rw-r--r--lib/IR/PassManager.cpp7
-rw-r--r--lib/IR/PassRegistry.cpp7
-rw-r--r--lib/IR/PassTimingInfo.cpp28
-rw-r--r--lib/IR/ProfileSummary.cpp13
-rw-r--r--lib/IR/RemarkStreamer.cpp154
-rw-r--r--lib/IR/SafepointIRVerifier.cpp18
-rw-r--r--lib/IR/Statepoint.cpp37
-rw-r--r--lib/IR/SymbolTableListTraitsImpl.h10
-rw-r--r--lib/IR/Type.cpp38
-rw-r--r--lib/IR/TypeFinder.cpp7
-rw-r--r--lib/IR/Use.cpp7
-rw-r--r--lib/IR/User.cpp7
-rw-r--r--lib/IR/Value.cpp85
-rw-r--r--lib/IR/ValueSymbolTable.cpp7
-rw-r--r--lib/IR/Verifier.cpp451
-rw-r--r--lib/IRReader/IRReader.cpp13
-rw-r--r--lib/LTO/Caching.cpp31
-rw-r--r--lib/LTO/LTO.cpp193
-rw-r--r--lib/LTO/LTOBackend.cpp41
-rw-r--r--lib/LTO/LTOCodeGenerator.cpp60
-rw-r--r--lib/LTO/LTOModule.cpp38
-rw-r--r--lib/LTO/SummaryBasedOptimizations.cpp7
-rw-r--r--lib/LTO/ThinLTOCodeGenerator.cpp207
-rw-r--r--lib/LTO/UpdateCompilerUsed.cpp7
-rw-r--r--lib/LineEditor/LineEditor.cpp7
-rw-r--r--lib/Linker/IRMover.cpp74
-rw-r--r--lib/Linker/LinkDiagnosticInfo.h7
-rw-r--r--lib/Linker/LinkModules.cpp7
-rw-r--r--lib/MC/ConstantPools.cpp7
-rw-r--r--lib/MC/ELFObjectWriter.cpp59
-rw-r--r--lib/MC/MCAsmBackend.cpp12
-rw-r--r--lib/MC/MCAsmInfo.cpp11
-rw-r--r--lib/MC/MCAsmInfoCOFF.cpp7
-rw-r--r--lib/MC/MCAsmInfoDarwin.cpp7
-rw-r--r--lib/MC/MCAsmInfoELF.cpp7
-rw-r--r--lib/MC/MCAsmInfoWasm.cpp8
-rw-r--r--lib/MC/MCAsmInfoXCOFF.cpp18
-rw-r--r--lib/MC/MCAsmMacro.cpp7
-rw-r--r--lib/MC/MCAsmStreamer.cpp61
-rw-r--r--lib/MC/MCAssembler.cpp24
-rw-r--r--lib/MC/MCCodeEmitter.cpp7
-rw-r--r--lib/MC/MCCodePadder.cpp7
-rw-r--r--lib/MC/MCCodeView.cpp7
-rw-r--r--lib/MC/MCContext.cpp97
-rw-r--r--lib/MC/MCDisassembler/Disassembler.cpp35
-rw-r--r--lib/MC/MCDisassembler/Disassembler.h41
-rw-r--r--lib/MC/MCDisassembler/MCDisassembler.cpp16
-rw-r--r--lib/MC/MCDisassembler/MCExternalSymbolizer.cpp7
-rw-r--r--lib/MC/MCDisassembler/MCRelocationInfo.cpp7
-rw-r--r--lib/MC/MCDisassembler/MCSymbolizer.cpp7
-rw-r--r--lib/MC/MCDwarf.cpp108
-rw-r--r--lib/MC/MCELFObjectTargetWriter.cpp12
-rw-r--r--lib/MC/MCELFStreamer.cpp10
-rw-r--r--lib/MC/MCExpr.cpp47
-rw-r--r--lib/MC/MCFragment.cpp7
-rw-r--r--lib/MC/MCInst.cpp7
-rw-r--r--lib/MC/MCInstPrinter.cpp13
-rw-r--r--lib/MC/MCInstrAnalysis.cpp7
-rw-r--r--lib/MC/MCInstrDesc.cpp7
-rw-r--r--lib/MC/MCLabel.cpp7
-rw-r--r--lib/MC/MCLinkerOptimizationHint.cpp7
-rw-r--r--lib/MC/MCMachOStreamer.cpp11
-rw-r--r--lib/MC/MCMachObjectTargetWriter.cpp7
-rw-r--r--lib/MC/MCNullStreamer.cpp7
-rw-r--r--lib/MC/MCObjectFileInfo.cpp28
-rw-r--r--lib/MC/MCObjectStreamer.cpp9
-rw-r--r--lib/MC/MCObjectWriter.cpp7
-rw-r--r--lib/MC/MCParser/AsmLexer.cpp32
-rw-r--r--lib/MC/MCParser/AsmParser.cpp69
-rw-r--r--lib/MC/MCParser/COFFAsmParser.cpp7
-rw-r--r--lib/MC/MCParser/DarwinAsmParser.cpp9
-rw-r--r--lib/MC/MCParser/ELFAsmParser.cpp11
-rw-r--r--lib/MC/MCParser/MCAsmLexer.cpp7
-rw-r--r--lib/MC/MCParser/MCAsmParser.cpp7
-rw-r--r--lib/MC/MCParser/MCAsmParserExtension.cpp7
-rw-r--r--lib/MC/MCParser/MCTargetAsmParser.cpp7
-rw-r--r--lib/MC/MCParser/WasmAsmParser.cpp174
-rw-r--r--lib/MC/MCRegisterInfo.cpp7
-rw-r--r--lib/MC/MCSchedule.cpp23
-rw-r--r--lib/MC/MCSection.cpp7
-rw-r--r--lib/MC/MCSectionCOFF.cpp9
-rw-r--r--lib/MC/MCSectionELF.cpp13
-rw-r--r--lib/MC/MCSectionMachO.cpp7
-rw-r--r--lib/MC/MCSectionWasm.cpp16
-rw-r--r--lib/MC/MCSectionXCOFF.cpp33
-rw-r--r--lib/MC/MCStreamer.cpp32
-rw-r--r--lib/MC/MCSubtargetInfo.cpp251
-rw-r--r--lib/MC/MCSymbol.cpp7
-rw-r--r--lib/MC/MCSymbolELF.cpp17
-rw-r--r--lib/MC/MCTargetOptions.cpp18
-rw-r--r--lib/MC/MCValue.cpp7
-rw-r--r--lib/MC/MCWasmObjectTargetWriter.cpp11
-rw-r--r--lib/MC/MCWasmStreamer.cpp26
-rw-r--r--lib/MC/MCWin64EH.cpp108
-rw-r--r--lib/MC/MCWinCOFFStreamer.cpp11
-rw-r--r--lib/MC/MCWinEH.cpp7
-rw-r--r--lib/MC/MCXCOFFObjectTargetWriter.cpp16
-rw-r--r--lib/MC/MCXCOFFStreamer.cpp59
-rw-r--r--lib/MC/MachObjectWriter.cpp22
-rw-r--r--lib/MC/StringTableBuilder.cpp14
-rw-r--r--lib/MC/SubtargetFeature.cpp206
-rw-r--r--lib/MC/WasmObjectWriter.cpp423
-rw-r--r--lib/MC/WinCOFFObjectWriter.cpp9
-rw-r--r--lib/MC/XCOFFObjectWriter.cpp94
-rw-r--r--lib/MCA/Context.cpp14
-rw-r--r--lib/MCA/HWEventListener.cpp7
-rw-r--r--lib/MCA/HardwareUnits/HardwareUnit.cpp7
-rw-r--r--lib/MCA/HardwareUnits/LSUnit.cpp256
-rw-r--r--lib/MCA/HardwareUnits/RegisterFile.cpp53
-rw-r--r--lib/MCA/HardwareUnits/ResourceManager.cpp98
-rw-r--r--lib/MCA/HardwareUnits/RetireControlUnit.cpp7
-rw-r--r--lib/MCA/HardwareUnits/Scheduler.cpp166
-rw-r--r--lib/MCA/InstrBuilder.cpp37
-rw-r--r--lib/MCA/Instruction.cpp117
-rw-r--r--lib/MCA/Pipeline.cpp12
-rw-r--r--lib/MCA/Stages/DispatchStage.cpp51
-rw-r--r--lib/MCA/Stages/EntryStage.cpp11
-rw-r--r--lib/MCA/Stages/ExecuteStage.cpp83
-rw-r--r--lib/MCA/Stages/InstructionTables.cpp7
-rw-r--r--lib/MCA/Stages/MicroOpQueueStage.cpp70
-rw-r--r--lib/MCA/Stages/RetireStage.cpp7
-rw-r--r--lib/MCA/Stages/Stage.cpp7
-rw-r--r--lib/MCA/Support.cpp28
-rw-r--r--lib/Object/Archive.cpp18
-rw-r--r--lib/Object/ArchiveWriter.cpp149
-rw-r--r--lib/Object/Binary.cpp12
-rw-r--r--lib/Object/COFFImportFile.cpp14
-rw-r--r--lib/Object/COFFModuleDefinition.cpp7
-rw-r--r--lib/Object/COFFObjectFile.cpp53
-rw-r--r--lib/Object/Decompressor.cpp7
-rw-r--r--lib/Object/ELF.cpp39
-rw-r--r--lib/Object/ELFObjectFile.cpp58
-rw-r--r--lib/Object/Error.cpp30
-rw-r--r--lib/Object/IRObjectFile.cpp22
-rw-r--r--lib/Object/IRSymtab.cpp25
-rw-r--r--lib/Object/MachOObjectFile.cpp445
-rw-r--r--lib/Object/MachOUniversal.cpp7
-rw-r--r--lib/Object/Minidump.cpp137
-rw-r--r--lib/Object/ModuleSymbolTable.cpp7
-rw-r--r--lib/Object/Object.cpp132
-rw-r--r--lib/Object/ObjectFile.cpp24
-rw-r--r--lib/Object/RecordStreamer.cpp9
-rw-r--r--lib/Object/RecordStreamer.h19
-rw-r--r--lib/Object/RelocationResolver.cpp550
-rw-r--r--lib/Object/SymbolSize.cpp7
-rw-r--r--lib/Object/SymbolicFile.cpp10
-rw-r--r--lib/Object/WasmObjectFile.cpp332
-rw-r--r--lib/Object/WindowsMachineFlag.cpp44
-rw-r--r--lib/Object/WindowsResource.cpp203
-rw-r--r--lib/Object/XCOFFObjectFile.cpp584
-rw-r--r--lib/ObjectYAML/COFFYAML.cpp16
-rw-r--r--lib/ObjectYAML/CodeViewYAMLDebugSections.cpp7
-rw-r--r--lib/ObjectYAML/CodeViewYAMLSymbols.cpp17
-rw-r--r--lib/ObjectYAML/CodeViewYAMLTypeHashing.cpp7
-rw-r--r--lib/ObjectYAML/CodeViewYAMLTypes.cpp11
-rw-r--r--lib/ObjectYAML/DWARFEmitter.cpp7
-rw-r--r--lib/ObjectYAML/DWARFVisitor.cpp7
-rw-r--r--lib/ObjectYAML/DWARFVisitor.h7
-rw-r--r--lib/ObjectYAML/DWARFYAML.cpp7
-rw-r--r--lib/ObjectYAML/ELFYAML.cpp244
-rw-r--r--lib/ObjectYAML/MachOYAML.cpp7
-rw-r--r--lib/ObjectYAML/MinidumpYAML.cpp673
-rw-r--r--lib/ObjectYAML/ObjectYAML.cpp22
-rw-r--r--lib/ObjectYAML/WasmYAML.cpp80
-rw-r--r--lib/ObjectYAML/XCOFFYAML.cpp109
-rw-r--r--lib/ObjectYAML/YAML.cpp14
-rw-r--r--lib/OptRemarks/OptRemarksParser.cpp368
-rw-r--r--lib/Option/Arg.cpp10
-rw-r--r--lib/Option/ArgList.cpp22
-rw-r--r--lib/Option/OptTable.cpp81
-rw-r--r--lib/Option/Option.cpp120
-rw-r--r--lib/Passes/PassBuilder.cpp375
-rw-r--r--lib/Passes/PassPlugin.cpp7
-rw-r--r--lib/Passes/PassRegistry.def59
-rw-r--r--lib/Passes/StandardInstrumentations.cpp7
-rw-r--r--lib/ProfileData/Coverage/CoverageMapping.cpp18
-rw-r--r--lib/ProfileData/Coverage/CoverageMappingReader.cpp235
-rw-r--r--lib/ProfileData/Coverage/CoverageMappingWriter.cpp24
-rw-r--r--lib/ProfileData/GCOV.cpp26
-rw-r--r--lib/ProfileData/InstrProf.cpp293
-rw-r--r--lib/ProfileData/InstrProfReader.cpp47
-rw-r--r--lib/ProfileData/InstrProfWriter.cpp107
-rw-r--r--lib/ProfileData/ProfileSummaryBuilder.cpp20
-rw-r--r--lib/ProfileData/SampleProf.cpp7
-rw-r--r--lib/ProfileData/SampleProfReader.cpp11
-rw-r--r--lib/ProfileData/SampleProfWriter.cpp12
-rw-r--r--lib/Remarks/Remark.cpp132
-rw-r--r--lib/Remarks/RemarkFormat.cpp30
-rw-r--r--lib/Remarks/RemarkParser.cpp119
-rw-r--r--lib/Remarks/RemarkStringTable.cpp48
-rw-r--r--lib/Remarks/YAMLRemarkParser.cpp327
-rw-r--r--lib/Remarks/YAMLRemarkParser.h96
-rw-r--r--lib/Remarks/YAMLRemarkSerializer.cpp167
-rw-r--r--lib/Support/AArch64TargetParser.cpp17
-rw-r--r--lib/Support/AMDGPUMetadata.cpp23
-rw-r--r--lib/Support/APFloat.cpp51
-rw-r--r--lib/Support/APInt.cpp84
-rw-r--r--lib/Support/APSInt.cpp13
-rw-r--r--lib/Support/ARMAttributeParser.cpp32
-rw-r--r--lib/Support/ARMBuildAttrs.cpp8
-rw-r--r--lib/Support/ARMTargetParser.cpp265
-rw-r--r--lib/Support/ARMWinEH.cpp7
-rw-r--r--lib/Support/Allocator.cpp7
-rw-r--r--lib/Support/Atomic.cpp7
-rw-r--r--lib/Support/BinaryStreamError.cpp7
-rw-r--r--lib/Support/BinaryStreamReader.cpp40
-rw-r--r--lib/Support/BinaryStreamRef.cpp7
-rw-r--r--lib/Support/BinaryStreamWriter.cpp20
-rw-r--r--lib/Support/BlockFrequency.cpp7
-rw-r--r--lib/Support/BranchProbability.cpp11
-rw-r--r--lib/Support/BuryPointer.cpp7
-rw-r--r--lib/Support/COM.cpp7
-rw-r--r--lib/Support/CRC.cpp68
-rw-r--r--lib/Support/CachePruning.cpp18
-rw-r--r--lib/Support/Chrono.cpp7
-rw-r--r--lib/Support/CodeGenCoverage.cpp7
-rw-r--r--lib/Support/CommandLine.cpp445
-rw-r--r--lib/Support/Compression.cpp7
-rw-r--r--lib/Support/ConvertUTF.cpp7
-rw-r--r--lib/Support/ConvertUTFWrapper.cpp7
-rw-r--r--lib/Support/CrashRecoveryContext.cpp7
-rw-r--r--lib/Support/DAGDeltaAlgorithm.cpp7
-rw-r--r--lib/Support/DJB.cpp42
-rw-r--r--lib/Support/DataExtractor.cpp62
-rw-r--r--lib/Support/Debug.cpp7
-rw-r--r--lib/Support/DeltaAlgorithm.cpp7
-rw-r--r--lib/Support/DynamicLibrary.cpp7
-rw-r--r--lib/Support/Errno.cpp9
-rw-r--r--lib/Support/Error.cpp7
-rw-r--r--lib/Support/ErrorHandling.cpp29
-rw-r--r--lib/Support/FileCheck.cpp1122
-rw-r--r--lib/Support/FileOutputBuffer.cpp81
-rw-r--r--lib/Support/FileUtilities.cpp7
-rw-r--r--lib/Support/FoldingSet.cpp7
-rw-r--r--lib/Support/FormatVariadic.cpp7
-rw-r--r--lib/Support/FormattedStream.cpp7
-rw-r--r--lib/Support/GlobPattern.cpp7
-rw-r--r--lib/Support/GraphWriter.cpp7
-rw-r--r--lib/Support/Hashing.cpp7
-rw-r--r--lib/Support/Host.cpp102
-rw-r--r--lib/Support/InitLLVM.cpp8
-rw-r--r--lib/Support/IntEqClasses.cpp7
-rw-r--r--lib/Support/IntervalMap.cpp7
-rw-r--r--lib/Support/ItaniumManglingCanonicalizer.cpp8
-rw-r--r--lib/Support/JSON.cpp221
-rw-r--r--lib/Support/JamCRC.cpp7
-rw-r--r--lib/Support/KnownBits.cpp50
-rw-r--r--lib/Support/LEB128.cpp7
-rw-r--r--lib/Support/LineIterator.cpp7
-rw-r--r--lib/Support/LockFileManager.cpp7
-rw-r--r--lib/Support/LowLevelType.cpp11
-rw-r--r--lib/Support/ManagedStatic.cpp7
-rw-r--r--lib/Support/MathExtras.cpp7
-rw-r--r--lib/Support/Memory.cpp36
-rw-r--r--lib/Support/MemoryBuffer.cpp96
-rw-r--r--lib/Support/Mutex.cpp7
-rw-r--r--lib/Support/NativeFormatting.cpp7
-rw-r--r--lib/Support/Optional.cpp14
-rw-r--r--lib/Support/Options.cpp7
-rw-r--r--lib/Support/Parallel.cpp38
-rw-r--r--lib/Support/Path.cpp93
-rw-r--r--lib/Support/PluginLoader.cpp7
-rw-r--r--lib/Support/PrettyStackTrace.cpp92
-rw-r--r--lib/Support/Process.cpp7
-rw-r--r--lib/Support/Program.cpp7
-rw-r--r--lib/Support/RWMutex.cpp7
-rw-r--r--lib/Support/RandomNumberGenerator.cpp17
-rw-r--r--lib/Support/Regex.cpp7
-rw-r--r--lib/Support/SHA1.cpp7
-rw-r--r--lib/Support/ScaledNumber.cpp7
-rw-r--r--lib/Support/Signals.cpp11
-rw-r--r--lib/Support/Signposts.cpp119
-rw-r--r--lib/Support/SmallPtrSet.cpp7
-rw-r--r--lib/Support/SmallVector.cpp7
-rw-r--r--lib/Support/SourceMgr.cpp18
-rw-r--r--lib/Support/SpecialCaseList.cpp7
-rw-r--r--lib/Support/Statistic.cpp10
-rw-r--r--lib/Support/StringExtras.cpp7
-rw-r--r--lib/Support/StringMap.cpp7
-rw-r--r--lib/Support/StringPool.cpp7
-rw-r--r--lib/Support/StringRef.cpp7
-rw-r--r--lib/Support/StringSaver.cpp7
-rw-r--r--lib/Support/SymbolRemappingReader.cpp7
-rw-r--r--lib/Support/SystemUtils.cpp7
-rw-r--r--lib/Support/TarWriter.cpp7
-rw-r--r--lib/Support/TargetParser.cpp61
-rw-r--r--lib/Support/TargetRegistry.cpp7
-rw-r--r--lib/Support/ThreadLocal.cpp7
-rw-r--r--lib/Support/ThreadPool.cpp7
-rw-r--r--lib/Support/Threading.cpp7
-rw-r--r--lib/Support/TimeProfiler.cpp199
-rw-r--r--lib/Support/Timer.cpp30
-rw-r--r--lib/Support/ToolOutputFile.cpp7
-rw-r--r--lib/Support/TrigramIndex.cpp7
-rw-r--r--lib/Support/Triple.cpp36
-rw-r--r--lib/Support/Twine.cpp7
-rw-r--r--lib/Support/Unicode.cpp7
-rw-r--r--lib/Support/Unix/COM.inc7
-rw-r--r--lib/Support/Unix/DynamicLibrary.inc7
-rw-r--r--lib/Support/Unix/Host.inc24
-rw-r--r--lib/Support/Unix/Memory.inc81
-rw-r--r--lib/Support/Unix/Mutex.inc7
-rw-r--r--lib/Support/Unix/Path.inc219
-rw-r--r--lib/Support/Unix/Process.inc20
-rw-r--r--lib/Support/Unix/Program.inc23
-rw-r--r--lib/Support/Unix/RWMutex.inc7
-rw-r--r--lib/Support/Unix/Signals.inc81
-rw-r--r--lib/Support/Unix/ThreadLocal.inc7
-rw-r--r--lib/Support/Unix/Threading.inc52
-rw-r--r--lib/Support/Unix/Unix.h9
-rw-r--r--lib/Support/Unix/Watchdog.inc7
-rw-r--r--lib/Support/Valgrind.cpp7
-rw-r--r--lib/Support/VersionTuple.cpp7
-rw-r--r--lib/Support/VirtualFileSystem.cpp159
-rw-r--r--lib/Support/Watchdog.cpp7
-rw-r--r--lib/Support/Windows/COM.inc7
-rw-r--r--lib/Support/Windows/DynamicLibrary.inc7
-rw-r--r--lib/Support/Windows/Host.inc7
-rw-r--r--lib/Support/Windows/Memory.inc96
-rw-r--r--lib/Support/Windows/Mutex.inc7
-rw-r--r--lib/Support/Windows/Path.inc98
-rw-r--r--lib/Support/Windows/Process.inc9
-rw-r--r--lib/Support/Windows/Program.inc7
-rw-r--r--lib/Support/Windows/RWMutex.inc7
-rw-r--r--lib/Support/Windows/Signals.inc11
-rw-r--r--lib/Support/Windows/ThreadLocal.inc7
-rw-r--r--lib/Support/Windows/Threading.inc23
-rw-r--r--lib/Support/Windows/Watchdog.inc7
-rw-r--r--lib/Support/Windows/WindowsSupport.h7
-rw-r--r--lib/Support/WithColor.cpp7
-rw-r--r--lib/Support/YAMLParser.cpp7
-rw-r--r--lib/Support/YAMLTraits.cpp58
-rw-r--r--lib/Support/Z3Solver.cpp900
-rw-r--r--lib/Support/circular_raw_ostream.cpp7
-rw-r--r--lib/Support/raw_os_ostream.cpp7
-rw-r--r--lib/Support/raw_ostream.cpp9
-rw-r--r--lib/TableGen/Error.cpp7
-rw-r--r--lib/TableGen/JSONBackend.cpp7
-rw-r--r--lib/TableGen/Main.cpp7
-rw-r--r--lib/TableGen/Record.cpp204
-rw-r--r--lib/TableGen/SetTheory.cpp7
-rw-r--r--lib/TableGen/StringMatcher.cpp7
-rw-r--r--lib/TableGen/TGLexer.cpp28
-rw-r--r--lib/TableGen/TGLexer.h15
-rw-r--r--lib/TableGen/TGParser.cpp280
-rw-r--r--lib/TableGen/TGParser.h11
-rw-r--r--lib/TableGen/TableGenBackend.cpp7
-rw-r--r--lib/Target/AArch64/AArch64.h9
-rw-r--r--lib/Target/AArch64/AArch64.td65
-rw-r--r--lib/Target/AArch64/AArch64A53Fix835769.cpp7
-rw-r--r--lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp7
-rw-r--r--lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp7
-rw-r--r--lib/Target/AArch64/AArch64AsmPrinter.cpp281
-rw-r--r--lib/Target/AArch64/AArch64BranchTargets.cpp7
-rw-r--r--lib/Target/AArch64/AArch64CallLowering.cpp205
-rw-r--r--lib/Target/AArch64/AArch64CallLowering.h28
-rw-r--r--lib/Target/AArch64/AArch64CallingConvention.cpp134
-rw-r--r--lib/Target/AArch64/AArch64CallingConvention.h156
-rw-r--r--lib/Target/AArch64/AArch64CallingConvention.td33
-rw-r--r--lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp7
-rw-r--r--lib/Target/AArch64/AArch64CollectLOH.cpp7
-rw-r--r--lib/Target/AArch64/AArch64CompressJumpTables.cpp10
-rw-r--r--lib/Target/AArch64/AArch64CondBrTuning.cpp7
-rw-r--r--lib/Target/AArch64/AArch64ConditionOptimizer.cpp7
-rw-r--r--lib/Target/AArch64/AArch64ConditionalCompares.cpp9
-rw-r--r--lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp108
-rw-r--r--lib/Target/AArch64/AArch64ExpandImm.cpp411
-rw-r--r--lib/Target/AArch64/AArch64ExpandImm.h35
-rw-r--r--lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp619
-rw-r--r--lib/Target/AArch64/AArch64FalkorHWPFFix.cpp13
-rw-r--r--lib/Target/AArch64/AArch64FastISel.cpp34
-rw-r--r--lib/Target/AArch64/AArch64FrameLowering.cpp215
-rw-r--r--lib/Target/AArch64/AArch64FrameLowering.h17
-rw-r--r--lib/Target/AArch64/AArch64GenRegisterBankInfo.def11
-rw-r--r--lib/Target/AArch64/AArch64ISelDAGToDAG.cpp140
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.cpp583
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.h42
-rw-r--r--lib/Target/AArch64/AArch64InstrAtomics.td7
-rw-r--r--lib/Target/AArch64/AArch64InstrFormats.td50
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.cpp472
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.h51
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.td172
-rw-r--r--lib/Target/AArch64/AArch64InstructionSelector.cpp2773
-rw-r--r--lib/Target/AArch64/AArch64LegalizerInfo.cpp388
-rw-r--r--lib/Target/AArch64/AArch64LegalizerInfo.h13
-rw-r--r--lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp13
-rw-r--r--lib/Target/AArch64/AArch64MCInstLower.cpp7
-rw-r--r--lib/Target/AArch64/AArch64MCInstLower.h7
-rw-r--r--lib/Target/AArch64/AArch64MachineFunctionInfo.h28
-rw-r--r--lib/Target/AArch64/AArch64MacroFusion.cpp7
-rw-r--r--lib/Target/AArch64/AArch64MacroFusion.h7
-rw-r--r--lib/Target/AArch64/AArch64PBQPRegAlloc.cpp7
-rw-r--r--lib/Target/AArch64/AArch64PBQPRegAlloc.h7
-rw-r--r--lib/Target/AArch64/AArch64PerfectShuffle.h7
-rw-r--r--lib/Target/AArch64/AArch64PfmCounters.td7
-rw-r--r--lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp11
-rw-r--r--lib/Target/AArch64/AArch64PromoteConstant.cpp10
-rw-r--r--lib/Target/AArch64/AArch64RedundantCopyElimination.cpp11
-rw-r--r--lib/Target/AArch64/AArch64RegisterBankInfo.cpp238
-rw-r--r--lib/Target/AArch64/AArch64RegisterBankInfo.h20
-rw-r--r--lib/Target/AArch64/AArch64RegisterBanks.td7
-rw-r--r--lib/Target/AArch64/AArch64RegisterInfo.cpp49
-rw-r--r--lib/Target/AArch64/AArch64RegisterInfo.h11
-rw-r--r--lib/Target/AArch64/AArch64RegisterInfo.td26
-rw-r--r--lib/Target/AArch64/AArch64SIMDInstrOpt.cpp7
-rw-r--r--lib/Target/AArch64/AArch64SVEInstrInfo.td426
-rw-r--r--lib/Target/AArch64/AArch64SchedA53.td9
-rw-r--r--lib/Target/AArch64/AArch64SchedA57.td9
-rw-r--r--lib/Target/AArch64/AArch64SchedA57WriteRes.td7
-rw-r--r--lib/Target/AArch64/AArch64SchedCyclone.td9
-rw-r--r--lib/Target/AArch64/AArch64SchedExynosM1.td9
-rw-r--r--lib/Target/AArch64/AArch64SchedExynosM3.td9
-rw-r--r--lib/Target/AArch64/AArch64SchedExynosM4.td45
-rw-r--r--lib/Target/AArch64/AArch64SchedFalkor.td9
-rw-r--r--lib/Target/AArch64/AArch64SchedFalkorDetails.td7
-rw-r--r--lib/Target/AArch64/AArch64SchedKryo.td9
-rw-r--r--lib/Target/AArch64/AArch64SchedKryoDetails.td7
-rw-r--r--lib/Target/AArch64/AArch64SchedPredExynos.td18
-rw-r--r--lib/Target/AArch64/AArch64SchedPredicates.td60
-rw-r--r--lib/Target/AArch64/AArch64SchedThunderX.td9
-rw-r--r--lib/Target/AArch64/AArch64SchedThunderX2T99.td9
-rw-r--r--lib/Target/AArch64/AArch64Schedule.td7
-rw-r--r--lib/Target/AArch64/AArch64SelectionDAGInfo.cpp95
-rw-r--r--lib/Target/AArch64/AArch64SelectionDAGInfo.h11
-rw-r--r--lib/Target/AArch64/AArch64SpeculationHardening.cpp182
-rw-r--r--lib/Target/AArch64/AArch64StackTagging.cpp345
-rw-r--r--lib/Target/AArch64/AArch64StorePairSuppress.cpp9
-rw-r--r--lib/Target/AArch64/AArch64Subtarget.cpp8
-rw-r--r--lib/Target/AArch64/AArch64Subtarget.h40
-rw-r--r--lib/Target/AArch64/AArch64SystemOperands.td8
-rw-r--r--lib/Target/AArch64/AArch64TargetMachine.cpp37
-rw-r--r--lib/Target/AArch64/AArch64TargetMachine.h7
-rw-r--r--lib/Target/AArch64/AArch64TargetObjectFile.cpp7
-rw-r--r--lib/Target/AArch64/AArch64TargetObjectFile.h7
-rw-r--r--lib/Target/AArch64/AArch64TargetTransformInfo.cpp15
-rw-r--r--lib/Target/AArch64/AArch64TargetTransformInfo.h11
-rw-r--r--lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp102
-rw-r--r--lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp49
-rw-r--r--lib/Target/AArch64/Disassembler/AArch64Disassembler.h7
-rw-r--r--lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp7
-rw-r--r--lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.h7
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h7
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp54
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp9
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp11
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h7
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h7
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp (renamed from lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp)13
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h (renamed from lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h)13
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp11
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h7
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp14
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp10
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h9
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp203
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h14
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp17
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp8
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h7
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp7
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp7
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.h7
-rw-r--r--lib/Target/AArch64/SVEInstrFormats.td1296
-rw-r--r--lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp33
-rw-r--r--lib/Target/AArch64/TargetInfo/AArch64TargetInfo.h24
-rw-r--r--lib/Target/AArch64/Utils/AArch64BaseInfo.cpp7
-rw-r--r--lib/Target/AArch64/Utils/AArch64BaseInfo.h50
-rw-r--r--lib/Target/AMDGPU/AMDGPU.h52
-rw-r--r--lib/Target/AMDGPU/AMDGPU.td570
-rw-r--r--lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp41
-rw-r--r--lib/Target/AMDGPU/AMDGPUAliasAnalysis.h13
-rw-r--r--lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp7
-rw-r--r--lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp75
-rw-r--r--lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp8
-rw-r--r--lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp19
-rw-r--r--lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h43
-rw-r--r--lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp339
-rw-r--r--lib/Target/AMDGPU/AMDGPUAsmPrinter.h17
-rw-r--r--lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp314
-rw-r--r--lib/Target/AMDGPU/AMDGPUCallLowering.cpp362
-rw-r--r--lib/Target/AMDGPU/AMDGPUCallLowering.h20
-rw-r--r--lib/Target/AMDGPU/AMDGPUCallingConv.td49
-rw-r--r--lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp136
-rw-r--r--lib/Target/AMDGPU/AMDGPUFeatures.td18
-rw-r--r--lib/Target/AMDGPU/AMDGPUFixFunctionBitcasts.cpp7
-rw-r--r--lib/Target/AMDGPU/AMDGPUFrameLowering.cpp7
-rw-r--r--lib/Target/AMDGPU/AMDGPUFrameLowering.h7
-rw-r--r--lib/Target/AMDGPU/AMDGPUGISel.td55
-rw-r--r--lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def113
-rw-r--r--lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp220
-rw-r--r--lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h41
-rw-r--r--lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp802
-rw-r--r--lib/Target/AMDGPU/AMDGPUISelLowering.cpp363
-rw-r--r--lib/Target/AMDGPU/AMDGPUISelLowering.h73
-rw-r--r--lib/Target/AMDGPU/AMDGPUInline.cpp45
-rw-r--r--lib/Target/AMDGPU/AMDGPUInstrInfo.cpp7
-rw-r--r--lib/Target/AMDGPU/AMDGPUInstrInfo.h7
-rw-r--r--lib/Target/AMDGPU/AMDGPUInstrInfo.td46
-rw-r--r--lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp1469
-rw-r--r--lib/Target/AMDGPU/AMDGPUInstructionSelector.h55
-rw-r--r--lib/Target/AMDGPU/AMDGPUInstructions.td267
-rw-r--r--lib/Target/AMDGPU/AMDGPUIntrinsicInfo.cpp103
-rw-r--r--lib/Target/AMDGPU/AMDGPUIntrinsicInfo.h58
-rw-r--r--lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp1357
-rw-r--r--lib/Target/AMDGPU/AMDGPULegalizerInfo.h50
-rw-r--r--lib/Target/AMDGPU/AMDGPULibCalls.cpp151
-rw-r--r--lib/Target/AMDGPU/AMDGPULibFunc.cpp62
-rw-r--r--lib/Target/AMDGPU/AMDGPULibFunc.h11
-rw-r--r--lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp7
-rw-r--r--lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp38
-rw-r--r--lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp7
-rw-r--r--lib/Target/AMDGPU/AMDGPUMCInstLower.cpp48
-rw-r--r--lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp7
-rw-r--r--lib/Target/AMDGPU/AMDGPUMachineFunction.cpp21
-rw-r--r--lib/Target/AMDGPU/AMDGPUMachineFunction.h7
-rw-r--r--lib/Target/AMDGPU/AMDGPUMachineModuleInfo.cpp17
-rw-r--r--lib/Target/AMDGPU/AMDGPUMachineModuleInfo.h80
-rw-r--r--lib/Target/AMDGPU/AMDGPUMacroFusion.cpp7
-rw-r--r--lib/Target/AMDGPU/AMDGPUMacroFusion.h7
-rw-r--r--lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp11
-rw-r--r--lib/Target/AMDGPU/AMDGPUPTNote.h7
-rw-r--r--lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp77
-rw-r--r--lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h17
-rw-r--r--lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp36
-rw-r--r--lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp336
-rw-r--r--lib/Target/AMDGPU/AMDGPURegAsmNames.inc.cpp353
-rw-r--r--lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp1782
-rw-r--r--lib/Target/AMDGPU/AMDGPURegisterBankInfo.h52
-rw-r--r--lib/Target/AMDGPU/AMDGPURegisterBanks.td9
-rw-r--r--lib/Target/AMDGPU/AMDGPURegisterInfo.cpp27
-rw-r--r--lib/Target/AMDGPU/AMDGPURegisterInfo.h7
-rw-r--r--lib/Target/AMDGPU/AMDGPURegisterInfo.td9
-rw-r--r--lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp7
-rw-r--r--lib/Target/AMDGPU/AMDGPUSearchableTables.td60
-rw-r--r--lib/Target/AMDGPU/AMDGPUSubtarget.cpp263
-rw-r--r--lib/Target/AMDGPU/AMDGPUSubtarget.h311
-rw-r--r--lib/Target/AMDGPU/AMDGPUTargetMachine.cpp307
-rw-r--r--lib/Target/AMDGPU/AMDGPUTargetMachine.h21
-rw-r--r--lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp7
-rw-r--r--lib/Target/AMDGPU/AMDGPUTargetObjectFile.h7
-rw-r--r--lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp38
-rw-r--r--lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h21
-rw-r--r--lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp18
-rw-r--r--lib/Target/AMDGPU/AMDGPUUnifyMetadata.cpp7
-rw-r--r--lib/Target/AMDGPU/AMDILCFGStructurizer.cpp7
-rw-r--r--lib/Target/AMDGPU/AMDKernelCodeT.h15
-rw-r--r--lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp2786
-rw-r--r--lib/Target/AMDGPU/BUFInstructions.td957
-rw-r--r--lib/Target/AMDGPU/CaymanInstructions.td7
-rw-r--r--lib/Target/AMDGPU/DSInstructions.td566
-rw-r--r--lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp485
-rw-r--r--lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h32
-rw-r--r--lib/Target/AMDGPU/EvergreenInstructions.td7
-rw-r--r--lib/Target/AMDGPU/FLATInstructions.td527
-rw-r--r--lib/Target/AMDGPU/GCNDPPCombine.cpp259
-rw-r--r--lib/Target/AMDGPU/GCNHazardRecognizer.cpp826
-rw-r--r--lib/Target/AMDGPU/GCNHazardRecognizer.h41
-rw-r--r--lib/Target/AMDGPU/GCNILPSched.cpp7
-rw-r--r--lib/Target/AMDGPU/GCNIterativeScheduler.cpp7
-rw-r--r--lib/Target/AMDGPU/GCNIterativeScheduler.h7
-rw-r--r--lib/Target/AMDGPU/GCNMinRegStrategy.cpp7
-rw-r--r--lib/Target/AMDGPU/GCNNSAReassign.cpp343
-rw-r--r--lib/Target/AMDGPU/GCNProcessors.td114
-rw-r--r--lib/Target/AMDGPU/GCNRegBankReassign.cpp800
-rw-r--r--lib/Target/AMDGPU/GCNRegPressure.cpp22
-rw-r--r--lib/Target/AMDGPU/GCNRegPressure.h61
-rw-r--r--lib/Target/AMDGPU/GCNSchedStrategy.cpp35
-rw-r--r--lib/Target/AMDGPU/GCNSchedStrategy.h16
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp65
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp21
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.cpp7
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.h7
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUFixupKinds.h7
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp (renamed from lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp)537
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h (renamed from lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h)38
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp29
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.h8
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp7
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h20
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp41
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h12
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp218
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h40
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp14
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.cpp7
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp84
-rw-r--r--lib/Target/AMDGPU/MIMGInstructions.td484
-rw-r--r--lib/Target/AMDGPU/R600.td7
-rw-r--r--lib/Target/AMDGPU/R600AsmPrinter.cpp7
-rw-r--r--lib/Target/AMDGPU/R600AsmPrinter.h7
-rw-r--r--lib/Target/AMDGPU/R600ClauseMergePass.cpp7
-rw-r--r--lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp7
-rw-r--r--lib/Target/AMDGPU/R600Defines.h7
-rw-r--r--lib/Target/AMDGPU/R600EmitClauseMarkers.cpp7
-rw-r--r--lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp7
-rw-r--r--lib/Target/AMDGPU/R600FrameLowering.cpp7
-rw-r--r--lib/Target/AMDGPU/R600FrameLowering.h7
-rw-r--r--lib/Target/AMDGPU/R600ISelLowering.cpp37
-rw-r--r--lib/Target/AMDGPU/R600ISelLowering.h14
-rw-r--r--lib/Target/AMDGPU/R600InstrFormats.td7
-rw-r--r--lib/Target/AMDGPU/R600InstrInfo.cpp8
-rw-r--r--lib/Target/AMDGPU/R600InstrInfo.h7
-rw-r--r--lib/Target/AMDGPU/R600Instructions.td35
-rw-r--r--lib/Target/AMDGPU/R600MachineFunctionInfo.cpp7
-rw-r--r--lib/Target/AMDGPU/R600MachineFunctionInfo.h7
-rw-r--r--lib/Target/AMDGPU/R600MachineScheduler.cpp7
-rw-r--r--lib/Target/AMDGPU/R600MachineScheduler.h7
-rw-r--r--lib/Target/AMDGPU/R600OpenCLImageTypeLoweringPass.cpp7
-rw-r--r--lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp22
-rw-r--r--lib/Target/AMDGPU/R600Packetizer.cpp11
-rw-r--r--lib/Target/AMDGPU/R600Processors.td18
-rw-r--r--lib/Target/AMDGPU/R600RegisterInfo.cpp9
-rw-r--r--lib/Target/AMDGPU/R600RegisterInfo.h9
-rw-r--r--lib/Target/AMDGPU/R600Schedule.td7
-rw-r--r--lib/Target/AMDGPU/R700Instructions.td7
-rw-r--r--lib/Target/AMDGPU/SIAddIMGInit.cpp7
-rw-r--r--lib/Target/AMDGPU/SIAnnotateControlFlow.cpp64
-rw-r--r--lib/Target/AMDGPU/SIDebuggerInsertNops.cpp97
-rw-r--r--lib/Target/AMDGPU/SIDefines.h178
-rw-r--r--lib/Target/AMDGPU/SIFixSGPRCopies.cpp83
-rw-r--r--lib/Target/AMDGPU/SIFixVGPRCopies.cpp7
-rw-r--r--lib/Target/AMDGPU/SIFixWWMLiveness.cpp418
-rw-r--r--lib/Target/AMDGPU/SIFixupVectorISel.cpp12
-rw-r--r--lib/Target/AMDGPU/SIFoldOperands.cpp363
-rw-r--r--lib/Target/AMDGPU/SIFormMemoryClauses.cpp22
-rw-r--r--lib/Target/AMDGPU/SIFrameLowering.cpp810
-rw-r--r--lib/Target/AMDGPU/SIFrameLowering.h28
-rw-r--r--lib/Target/AMDGPU/SIISelLowering.cpp1918
-rw-r--r--lib/Target/AMDGPU/SIISelLowering.h49
-rw-r--r--lib/Target/AMDGPU/SIInsertSkips.cpp76
-rw-r--r--lib/Target/AMDGPU/SIInsertWaitcnts.cpp417
-rw-r--r--lib/Target/AMDGPU/SIInstrFormats.td68
-rw-r--r--lib/Target/AMDGPU/SIInstrInfo.cpp1415
-rw-r--r--lib/Target/AMDGPU/SIInstrInfo.h125
-rw-r--r--lib/Target/AMDGPU/SIInstrInfo.td654
-rw-r--r--lib/Target/AMDGPU/SIInstructions.td425
-rw-r--r--lib/Target/AMDGPU/SIIntrinsics.td19
-rw-r--r--lib/Target/AMDGPU/SILoadStoreOptimizer.cpp60
-rw-r--r--lib/Target/AMDGPU/SILowerControlFlow.cpp104
-rw-r--r--lib/Target/AMDGPU/SILowerI1Copies.cpp107
-rw-r--r--lib/Target/AMDGPU/SILowerSGPRSpills.cpp323
-rw-r--r--lib/Target/AMDGPU/SIMachineFunctionInfo.cpp271
-rw-r--r--lib/Target/AMDGPU/SIMachineFunctionInfo.h377
-rw-r--r--lib/Target/AMDGPU/SIMachineScheduler.cpp11
-rw-r--r--lib/Target/AMDGPU/SIMachineScheduler.h7
-rw-r--r--lib/Target/AMDGPU/SIMemoryLegalizer.cpp322
-rw-r--r--lib/Target/AMDGPU/SIModeRegister.cpp9
-rw-r--r--lib/Target/AMDGPU/SIOptimizeExecMasking.cpp98
-rw-r--r--lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp155
-rw-r--r--lib/Target/AMDGPU/SIPeepholeSDWA.cpp36
-rw-r--r--lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp221
-rw-r--r--lib/Target/AMDGPU/SIProgramInfo.h21
-rw-r--r--lib/Target/AMDGPU/SIRegisterInfo.cpp660
-rw-r--r--lib/Target/AMDGPU/SIRegisterInfo.h78
-rw-r--r--lib/Target/AMDGPU/SIRegisterInfo.td633
-rw-r--r--lib/Target/AMDGPU/SISchedule.td71
-rw-r--r--lib/Target/AMDGPU/SIShrinkInstructions.cpp140
-rw-r--r--lib/Target/AMDGPU/SIWholeQuadMode.cpp82
-rw-r--r--lib/Target/AMDGPU/SMInstructions.td359
-rw-r--r--lib/Target/AMDGPU/SOPInstructions.td666
-rw-r--r--lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.cpp9
-rw-r--r--lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.h29
-rw-r--r--lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp36
-rw-r--r--lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.h14
-rw-r--r--lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp410
-rw-r--r--lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h203
-rw-r--r--lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp723
-rw-r--r--lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h135
-rw-r--r--lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h11
-rw-r--r--lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp7
-rw-r--r--lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.h7
-rw-r--r--lib/Target/AMDGPU/VIInstrFormats.td7
-rw-r--r--lib/Target/AMDGPU/VIInstructions.td7
-rw-r--r--lib/Target/AMDGPU/VOP1Instructions.td487
-rw-r--r--lib/Target/AMDGPU/VOP2Instructions.td889
-rw-r--r--lib/Target/AMDGPU/VOP3Instructions.td501
-rw-r--r--lib/Target/AMDGPU/VOP3PInstructions.td220
-rw-r--r--lib/Target/AMDGPU/VOPCInstructions.td972
-rw-r--r--lib/Target/AMDGPU/VOPInstructions.td182
-rw-r--r--lib/Target/ARC/ARC.h8
-rw-r--r--lib/Target/ARC/ARC.td7
-rw-r--r--lib/Target/ARC/ARCAsmPrinter.cpp26
-rw-r--r--lib/Target/ARC/ARCBranchFinalize.cpp7
-rw-r--r--lib/Target/ARC/ARCCallingConv.td7
-rw-r--r--lib/Target/ARC/ARCExpandPseudos.cpp7
-rw-r--r--lib/Target/ARC/ARCFrameLowering.cpp59
-rw-r--r--lib/Target/ARC/ARCFrameLowering.h7
-rw-r--r--lib/Target/ARC/ARCISelDAGToDAG.cpp7
-rw-r--r--lib/Target/ARC/ARCISelLowering.cpp7
-rw-r--r--lib/Target/ARC/ARCISelLowering.h7
-rw-r--r--lib/Target/ARC/ARCInstrFormats.td71
-rw-r--r--lib/Target/ARC/ARCInstrInfo.cpp54
-rw-r--r--lib/Target/ARC/ARCInstrInfo.h17
-rw-r--r--lib/Target/ARC/ARCInstrInfo.td122
-rw-r--r--lib/Target/ARC/ARCMCInstLower.cpp7
-rw-r--r--lib/Target/ARC/ARCMCInstLower.h7
-rw-r--r--lib/Target/ARC/ARCMachineFunctionInfo.cpp7
-rw-r--r--lib/Target/ARC/ARCMachineFunctionInfo.h7
-rw-r--r--lib/Target/ARC/ARCOptAddrMode.cpp507
-rw-r--r--lib/Target/ARC/ARCRegisterInfo.cpp15
-rw-r--r--lib/Target/ARC/ARCRegisterInfo.h9
-rw-r--r--lib/Target/ARC/ARCRegisterInfo.td7
-rw-r--r--lib/Target/ARC/ARCSubtarget.cpp7
-rw-r--r--lib/Target/ARC/ARCSubtarget.h7
-rw-r--r--lib/Target/ARC/ARCTargetMachine.cpp13
-rw-r--r--lib/Target/ARC/ARCTargetMachine.h7
-rw-r--r--lib/Target/ARC/ARCTargetStreamer.h7
-rw-r--r--lib/Target/ARC/ARCTargetTransformInfo.h7
-rw-r--r--lib/Target/ARC/Disassembler/ARCDisassembler.cpp8
-rw-r--r--lib/Target/ARC/MCTargetDesc/ARCInfo.h7
-rw-r--r--lib/Target/ARC/MCTargetDesc/ARCInstPrinter.cpp (renamed from lib/Target/ARC/InstPrinter/ARCInstPrinter.cpp)7
-rw-r--r--lib/Target/ARC/MCTargetDesc/ARCInstPrinter.h (renamed from lib/Target/ARC/InstPrinter/ARCInstPrinter.h)7
-rw-r--r--lib/Target/ARC/MCTargetDesc/ARCMCAsmInfo.cpp7
-rw-r--r--lib/Target/ARC/MCTargetDesc/ARCMCAsmInfo.h7
-rw-r--r--lib/Target/ARC/MCTargetDesc/ARCMCTargetDesc.cpp11
-rw-r--r--lib/Target/ARC/MCTargetDesc/ARCMCTargetDesc.h9
-rw-r--r--lib/Target/ARC/TargetInfo/ARCTargetInfo.cpp9
-rw-r--r--lib/Target/ARC/TargetInfo/ARCTargetInfo.h20
-rw-r--r--lib/Target/ARM/A15SDOptimizer.cpp7
-rw-r--r--lib/Target/ARM/ARM.h18
-rw-r--r--lib/Target/ARM/ARM.td185
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.cpp153
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.h14
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp412
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.h72
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp51
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.h9
-rw-r--r--lib/Target/ARM/ARMBasicBlockInfo.cpp146
-rw-r--r--lib/Target/ARM/ARMBasicBlockInfo.h59
-rw-r--r--lib/Target/ARM/ARMCallLowering.cpp176
-rw-r--r--lib/Target/ARM/ARMCallLowering.h20
-rw-r--r--lib/Target/ARM/ARMCallingConv.cpp284
-rw-r--r--lib/Target/ARM/ARMCallingConv.h308
-rw-r--r--lib/Target/ARM/ARMCallingConv.td52
-rw-r--r--lib/Target/ARM/ARMCodeGenPrepare.cpp205
-rw-r--r--lib/Target/ARM/ARMComputeBlockSize.cpp81
-rw-r--r--lib/Target/ARM/ARMConstantIslandPass.cpp246
-rw-r--r--lib/Target/ARM/ARMConstantPoolValue.cpp7
-rw-r--r--lib/Target/ARM/ARMConstantPoolValue.h7
-rw-r--r--lib/Target/ARM/ARMExpandPseudoInsts.cpp28
-rw-r--r--lib/Target/ARM/ARMFastISel.cpp53
-rw-r--r--lib/Target/ARM/ARMFeatures.h7
-rw-r--r--lib/Target/ARM/ARMFrameLowering.cpp117
-rw-r--r--lib/Target/ARM/ARMFrameLowering.h7
-rw-r--r--lib/Target/ARM/ARMHazardRecognizer.cpp7
-rw-r--r--lib/Target/ARM/ARMHazardRecognizer.h7
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp213
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp1556
-rw-r--r--lib/Target/ARM/ARMISelLowering.h101
-rw-r--r--lib/Target/ARM/ARMInstrFormats.td115
-rw-r--r--lib/Target/ARM/ARMInstrInfo.cpp9
-rw-r--r--lib/Target/ARM/ARMInstrInfo.h7
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td380
-rw-r--r--lib/Target/ARM/ARMInstrMVE.td4591
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td1093
-rw-r--r--lib/Target/ARM/ARMInstrThumb.td75
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td487
-rw-r--r--lib/Target/ARM/ARMInstrVFP.td367
-rw-r--r--lib/Target/ARM/ARMInstructionSelector.cpp268
-rw-r--r--lib/Target/ARM/ARMLegalizerInfo.cpp161
-rw-r--r--lib/Target/ARM/ARMLegalizerInfo.h7
-rw-r--r--lib/Target/ARM/ARMLoadStoreOptimizer.cpp149
-rw-r--r--lib/Target/ARM/ARMLowOverheadLoops.cpp384
-rw-r--r--lib/Target/ARM/ARMMCInstLower.cpp7
-rw-r--r--lib/Target/ARM/ARMMachineFunctionInfo.cpp7
-rw-r--r--lib/Target/ARM/ARMMachineFunctionInfo.h16
-rw-r--r--lib/Target/ARM/ARMMacroFusion.cpp7
-rw-r--r--lib/Target/ARM/ARMMacroFusion.h7
-rw-r--r--lib/Target/ARM/ARMOptimizeBarriersPass.cpp7
-rw-r--r--lib/Target/ARM/ARMParallelDSP.cpp889
-rw-r--r--lib/Target/ARM/ARMPerfectShuffle.h7
-rw-r--r--lib/Target/ARM/ARMPredicates.td211
-rw-r--r--lib/Target/ARM/ARMRegisterBankInfo.cpp51
-rw-r--r--lib/Target/ARM/ARMRegisterBankInfo.h7
-rw-r--r--lib/Target/ARM/ARMRegisterBanks.td7
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.cpp7
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.h7
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.td132
-rw-r--r--lib/Target/ARM/ARMSchedule.td9
-rw-r--r--lib/Target/ARM/ARMScheduleA57.td13
-rw-r--r--lib/Target/ARM/ARMScheduleA57WriteRes.td7
-rw-r--r--lib/Target/ARM/ARMScheduleA8.td7
-rw-r--r--lib/Target/ARM/ARMScheduleA9.td7
-rw-r--r--lib/Target/ARM/ARMScheduleM3.td21
-rw-r--r--lib/Target/ARM/ARMScheduleM4.td119
-rw-r--r--lib/Target/ARM/ARMScheduleR52.td7
-rw-r--r--lib/Target/ARM/ARMScheduleSwift.td7
-rw-r--r--lib/Target/ARM/ARMScheduleV6.td7
-rw-r--r--lib/Target/ARM/ARMSelectionDAGInfo.cpp9
-rw-r--r--lib/Target/ARM/ARMSelectionDAGInfo.h7
-rw-r--r--lib/Target/ARM/ARMSubtarget.cpp73
-rw-r--r--lib/Target/ARM/ARMSubtarget.h78
-rw-r--r--lib/Target/ARM/ARMSystemRegister.td7
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp43
-rw-r--r--lib/Target/ARM/ARMTargetMachine.h7
-rw-r--r--lib/Target/ARM/ARMTargetObjectFile.cpp7
-rw-r--r--lib/Target/ARM/ARMTargetObjectFile.h7
-rw-r--r--lib/Target/ARM/ARMTargetTransformInfo.cpp275
-rw-r--r--lib/Target/ARM/ARMTargetTransformInfo.h23
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmParser.cpp1739
-rw-r--r--lib/Target/ARM/Disassembler/ARMDisassembler.cpp1387
-rwxr-xr-xlib/Target/ARM/LICENSE.TXT47
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h11
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp142
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h9
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h7
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h7
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h7
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h18
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp15
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp11
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h16
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp (renamed from lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp)139
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h (renamed from lib/Target/ARM/InstPrinter/ARMInstPrinter.h)45
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp7
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h7
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp459
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp7
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCExpr.h7
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp35
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h27
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp7
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp7
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp62
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp7
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h7
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp7
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp7
-rw-r--r--lib/Target/ARM/MLxExpansionPass.cpp7
-rw-r--r--lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp9
-rw-r--r--lib/Target/ARM/TargetInfo/ARMTargetInfo.h23
-rw-r--r--lib/Target/ARM/Thumb1FrameLowering.cpp120
-rw-r--r--lib/Target/ARM/Thumb1FrameLowering.h7
-rw-r--r--lib/Target/ARM/Thumb1InstrInfo.cpp7
-rw-r--r--lib/Target/ARM/Thumb1InstrInfo.h7
-rw-r--r--lib/Target/ARM/Thumb2ITBlockPass.cpp221
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.cpp58
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.h13
-rw-r--r--lib/Target/ARM/Thumb2SizeReduction.cpp13
-rw-r--r--lib/Target/ARM/ThumbRegisterInfo.cpp75
-rw-r--r--lib/Target/ARM/ThumbRegisterInfo.h13
-rw-r--r--lib/Target/ARM/Utils/ARMBaseInfo.cpp7
-rw-r--r--lib/Target/ARM/Utils/ARMBaseInfo.h31
-rw-r--r--lib/Target/AVR/AVR.h7
-rw-r--r--lib/Target/AVR/AVR.td7
-rw-r--r--lib/Target/AVR/AVRAsmPrinter.cpp29
-rw-r--r--lib/Target/AVR/AVRCallingConv.td7
-rw-r--r--lib/Target/AVR/AVRExpandPseudoInsts.cpp17
-rw-r--r--lib/Target/AVR/AVRFrameLowering.cpp12
-rw-r--r--lib/Target/AVR/AVRFrameLowering.h7
-rw-r--r--lib/Target/AVR/AVRISelDAGToDAG.cpp7
-rw-r--r--lib/Target/AVR/AVRISelLowering.cpp55
-rw-r--r--lib/Target/AVR/AVRISelLowering.h20
-rw-r--r--lib/Target/AVR/AVRInstrFormats.td7
-rw-r--r--lib/Target/AVR/AVRInstrInfo.cpp10
-rw-r--r--lib/Target/AVR/AVRInstrInfo.h7
-rw-r--r--lib/Target/AVR/AVRInstrInfo.td53
-rw-r--r--lib/Target/AVR/AVRMCInstLower.cpp7
-rw-r--r--lib/Target/AVR/AVRMCInstLower.h7
-rw-r--r--lib/Target/AVR/AVRMachineFunctionInfo.h7
-rw-r--r--lib/Target/AVR/AVRRegisterInfo.cpp30
-rw-r--r--lib/Target/AVR/AVRRegisterInfo.h16
-rw-r--r--lib/Target/AVR/AVRRegisterInfo.td11
-rw-r--r--lib/Target/AVR/AVRRelaxMemOperations.cpp7
-rw-r--r--lib/Target/AVR/AVRSelectionDAGInfo.h7
-rw-r--r--lib/Target/AVR/AVRSubtarget.cpp19
-rw-r--r--lib/Target/AVR/AVRSubtarget.h12
-rw-r--r--lib/Target/AVR/AVRTargetMachine.cpp8
-rw-r--r--lib/Target/AVR/AVRTargetMachine.h7
-rw-r--r--lib/Target/AVR/AVRTargetObjectFile.cpp7
-rw-r--r--lib/Target/AVR/AVRTargetObjectFile.h7
-rw-r--r--lib/Target/AVR/AsmParser/AVRAsmParser.cpp24
-rw-r--r--lib/Target/AVR/Disassembler/AVRDisassembler.cpp8
-rw-r--r--lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp7
-rw-r--r--lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h7
-rw-r--r--lib/Target/AVR/MCTargetDesc/AVRELFObjectWriter.cpp7
-rw-r--r--lib/Target/AVR/MCTargetDesc/AVRELFStreamer.h7
-rw-r--r--lib/Target/AVR/MCTargetDesc/AVRFixupKinds.h7
-rw-r--r--lib/Target/AVR/MCTargetDesc/AVRInstPrinter.cpp (renamed from lib/Target/AVR/InstPrinter/AVRInstPrinter.cpp)7
-rw-r--r--lib/Target/AVR/MCTargetDesc/AVRInstPrinter.h (renamed from lib/Target/AVR/InstPrinter/AVRInstPrinter.h)7
-rw-r--r--lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.cpp8
-rw-r--r--lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.h7
-rw-r--r--lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.cpp7
-rw-r--r--lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.h7
-rw-r--r--lib/Target/AVR/MCTargetDesc/AVRMCELFStreamer.cpp7
-rw-r--r--lib/Target/AVR/MCTargetDesc/AVRMCELFStreamer.h7
-rw-r--r--lib/Target/AVR/MCTargetDesc/AVRMCExpr.cpp7
-rw-r--r--lib/Target/AVR/MCTargetDesc/AVRMCExpr.h7
-rw-r--r--lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp10
-rw-r--r--lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.h9
-rw-r--r--lib/Target/AVR/MCTargetDesc/AVRTargetStreamer.cpp7
-rw-r--r--lib/Target/AVR/MCTargetDesc/AVRTargetStreamer.h7
-rw-r--r--lib/Target/AVR/TargetInfo/AVRTargetInfo.cpp9
-rw-r--r--lib/Target/AVR/TargetInfo/AVRTargetInfo.h18
-rw-r--r--lib/Target/BPF/AsmParser/BPFAsmParser.cpp10
-rw-r--r--lib/Target/BPF/BPF.h12
-rw-r--r--lib/Target/BPF/BPF.td8
-rw-r--r--lib/Target/BPF/BPFAbstractMemberAccess.cpp482
-rw-r--r--lib/Target/BPF/BPFAsmPrinter.cpp42
-rw-r--r--lib/Target/BPF/BPFCORE.h24
-rw-r--r--lib/Target/BPF/BPFCallingConv.td7
-rw-r--r--lib/Target/BPF/BPFFrameLowering.cpp7
-rw-r--r--lib/Target/BPF/BPFFrameLowering.h7
-rw-r--r--lib/Target/BPF/BPFISelDAGToDAG.cpp7
-rw-r--r--lib/Target/BPF/BPFISelLowering.cpp64
-rw-r--r--lib/Target/BPF/BPFISelLowering.h11
-rw-r--r--lib/Target/BPF/BPFInstrFormats.td8
-rw-r--r--lib/Target/BPF/BPFInstrInfo.cpp7
-rw-r--r--lib/Target/BPF/BPFInstrInfo.h7
-rw-r--r--lib/Target/BPF/BPFInstrInfo.td111
-rw-r--r--lib/Target/BPF/BPFMCInstLower.cpp7
-rw-r--r--lib/Target/BPF/BPFMCInstLower.h7
-rw-r--r--lib/Target/BPF/BPFMIChecking.cpp104
-rw-r--r--lib/Target/BPF/BPFMIPeephole.cpp7
-rw-r--r--lib/Target/BPF/BPFMISimplifyPatchable.cpp163
-rw-r--r--lib/Target/BPF/BPFRegisterInfo.cpp9
-rw-r--r--lib/Target/BPF/BPFRegisterInfo.h9
-rw-r--r--lib/Target/BPF/BPFRegisterInfo.td7
-rw-r--r--lib/Target/BPF/BPFSelectionDAGInfo.cpp7
-rw-r--r--lib/Target/BPF/BPFSelectionDAGInfo.h7
-rw-r--r--lib/Target/BPF/BPFSubtarget.cpp13
-rw-r--r--lib/Target/BPF/BPFSubtarget.h12
-rw-r--r--lib/Target/BPF/BPFTargetMachine.cpp20
-rw-r--r--lib/Target/BPF/BPFTargetMachine.h7
-rw-r--r--lib/Target/BPF/BTF.def9
-rw-r--r--lib/Target/BPF/BTF.h98
-rw-r--r--lib/Target/BPF/BTFDebug.cpp727
-rw-r--r--lib/Target/BPF/BTFDebug.h120
-rw-r--r--lib/Target/BPF/Disassembler/BPFDisassembler.cpp13
-rw-r--r--lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp19
-rw-r--r--lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp39
-rw-r--r--lib/Target/BPF/MCTargetDesc/BPFInstPrinter.cpp (renamed from lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp)9
-rw-r--r--lib/Target/BPF/MCTargetDesc/BPFInstPrinter.h (renamed from lib/Target/BPF/InstPrinter/BPFInstPrinter.h)11
-rw-r--r--lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h7
-rw-r--r--lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp14
-rw-r--r--lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp11
-rw-r--r--lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h11
-rw-r--r--lib/Target/BPF/TargetInfo/BPFTargetInfo.cpp18
-rw-r--r--lib/Target/BPF/TargetInfo/BPFTargetInfo.h22
-rw-r--r--lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp29
-rw-r--r--lib/Target/Hexagon/BitTracker.cpp7
-rw-r--r--lib/Target/Hexagon/BitTracker.h7
-rw-r--r--lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp10
-rw-r--r--lib/Target/Hexagon/Hexagon.h7
-rw-r--r--lib/Target/Hexagon/Hexagon.td7
-rw-r--r--lib/Target/Hexagon/HexagonAsmPrinter.cpp20
-rwxr-xr-xlib/Target/Hexagon/HexagonAsmPrinter.h14
-rw-r--r--lib/Target/Hexagon/HexagonBitSimplify.cpp7
-rw-r--r--lib/Target/Hexagon/HexagonBitTracker.cpp7
-rw-r--r--lib/Target/Hexagon/HexagonBitTracker.h7
-rw-r--r--lib/Target/Hexagon/HexagonBlockRanges.cpp7
-rw-r--r--lib/Target/Hexagon/HexagonBlockRanges.h7
-rw-r--r--lib/Target/Hexagon/HexagonBranchRelaxation.cpp7
-rw-r--r--lib/Target/Hexagon/HexagonCFGOptimizer.cpp7
-rw-r--r--lib/Target/Hexagon/HexagonCallingConv.td7
-rw-r--r--lib/Target/Hexagon/HexagonCommonGEP.cpp24
-rw-r--r--lib/Target/Hexagon/HexagonConstExtenders.cpp7
-rw-r--r--lib/Target/Hexagon/HexagonConstPropagation.cpp186
-rw-r--r--lib/Target/Hexagon/HexagonCopyToCombine.cpp11
-rw-r--r--lib/Target/Hexagon/HexagonDepArch.h7
-rw-r--r--lib/Target/Hexagon/HexagonDepArch.td7
-rw-r--r--lib/Target/Hexagon/HexagonDepDecoders.inc (renamed from lib/Target/Hexagon/HexagonDepDecoders.h)7
-rw-r--r--lib/Target/Hexagon/HexagonDepIICHVX.td7
-rw-r--r--lib/Target/Hexagon/HexagonDepIICScalar.td7
-rw-r--r--lib/Target/Hexagon/HexagonDepITypes.h7
-rw-r--r--lib/Target/Hexagon/HexagonDepITypes.td7
-rw-r--r--lib/Target/Hexagon/HexagonDepInstrFormats.td7
-rw-r--r--lib/Target/Hexagon/HexagonDepInstrInfo.td7
-rw-r--r--lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td7
-rw-r--r--lib/Target/Hexagon/HexagonDepMappings.td7
-rw-r--r--lib/Target/Hexagon/HexagonDepOperands.td7
-rw-r--r--lib/Target/Hexagon/HexagonDepTimingClasses.h7
-rw-r--r--lib/Target/Hexagon/HexagonEarlyIfConv.cpp7
-rw-r--r--lib/Target/Hexagon/HexagonExpandCondsets.cpp9
-rw-r--r--lib/Target/Hexagon/HexagonFixupHwLoops.cpp7
-rw-r--r--lib/Target/Hexagon/HexagonFrameLowering.cpp15
-rw-r--r--lib/Target/Hexagon/HexagonFrameLowering.h7
-rw-r--r--lib/Target/Hexagon/HexagonGenExtract.cpp9
-rw-r--r--lib/Target/Hexagon/HexagonGenInsert.cpp11
-rw-r--r--lib/Target/Hexagon/HexagonGenMux.cpp11
-rw-r--r--lib/Target/Hexagon/HexagonGenPredicate.cpp73
-rw-r--r--lib/Target/Hexagon/HexagonHardwareLoops.cpp7
-rw-r--r--lib/Target/Hexagon/HexagonHazardRecognizer.cpp7
-rw-r--r--lib/Target/Hexagon/HexagonHazardRecognizer.h7
-rw-r--r--lib/Target/Hexagon/HexagonIICHVX.td19
-rw-r--r--lib/Target/Hexagon/HexagonIICScalar.td7
-rw-r--r--lib/Target/Hexagon/HexagonISelDAGToDAG.cpp12
-rw-r--r--lib/Target/Hexagon/HexagonISelDAGToDAG.h7
-rw-r--r--lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp7
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.cpp100
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.h15
-rw-r--r--lib/Target/Hexagon/HexagonISelLoweringHVX.cpp9
-rw-r--r--lib/Target/Hexagon/HexagonInstrFormats.td7
-rw-r--r--lib/Target/Hexagon/HexagonInstrFormatsV5.td7
-rw-r--r--lib/Target/Hexagon/HexagonInstrFormatsV60.td7
-rw-r--r--lib/Target/Hexagon/HexagonInstrFormatsV65.td7
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.cpp62
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.h21
-rw-r--r--lib/Target/Hexagon/HexagonIntrinsics.td7
-rw-r--r--lib/Target/Hexagon/HexagonIntrinsicsV5.td7
-rw-r--r--lib/Target/Hexagon/HexagonIntrinsicsV60.td7
-rw-r--r--lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp19
-rw-r--r--lib/Target/Hexagon/HexagonMCInstLower.cpp7
-rw-r--r--lib/Target/Hexagon/HexagonMachineFunctionInfo.cpp7
-rw-r--r--lib/Target/Hexagon/HexagonMachineFunctionInfo.h7
-rw-r--r--lib/Target/Hexagon/HexagonMachineScheduler.cpp9
-rw-r--r--lib/Target/Hexagon/HexagonMachineScheduler.h7
-rw-r--r--lib/Target/Hexagon/HexagonMapAsm2IntrinV62.gen.td7
-rw-r--r--lib/Target/Hexagon/HexagonMapAsm2IntrinV65.gen.td7
-rw-r--r--lib/Target/Hexagon/HexagonNewValueJump.cpp7
-rw-r--r--lib/Target/Hexagon/HexagonOperands.td7
-rw-r--r--lib/Target/Hexagon/HexagonOptAddrMode.cpp7
-rw-r--r--lib/Target/Hexagon/HexagonOptimizeSZextends.cpp7
-rw-r--r--lib/Target/Hexagon/HexagonPatterns.td11
-rw-r--r--lib/Target/Hexagon/HexagonPatternsV65.td7
-rw-r--r--lib/Target/Hexagon/HexagonPeephole.cpp7
-rw-r--r--lib/Target/Hexagon/HexagonPseudo.td12
-rw-r--r--lib/Target/Hexagon/HexagonRDFOpt.cpp7
-rw-r--r--lib/Target/Hexagon/HexagonRegisterInfo.cpp9
-rw-r--r--lib/Target/Hexagon/HexagonRegisterInfo.h9
-rw-r--r--lib/Target/Hexagon/HexagonRegisterInfo.td7
-rw-r--r--lib/Target/Hexagon/HexagonSchedule.td7
-rw-r--r--lib/Target/Hexagon/HexagonScheduleV5.td7
-rw-r--r--lib/Target/Hexagon/HexagonScheduleV55.td7
-rw-r--r--lib/Target/Hexagon/HexagonScheduleV60.td7
-rw-r--r--lib/Target/Hexagon/HexagonScheduleV62.td7
-rw-r--r--lib/Target/Hexagon/HexagonScheduleV65.td7
-rw-r--r--lib/Target/Hexagon/HexagonScheduleV66.td7
-rw-r--r--lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp7
-rw-r--r--lib/Target/Hexagon/HexagonSelectionDAGInfo.h7
-rw-r--r--lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp7
-rw-r--r--lib/Target/Hexagon/HexagonSplitDouble.cpp11
-rw-r--r--lib/Target/Hexagon/HexagonStoreWidening.cpp15
-rw-r--r--lib/Target/Hexagon/HexagonSubtarget.cpp7
-rw-r--r--lib/Target/Hexagon/HexagonSubtarget.h7
-rw-r--r--lib/Target/Hexagon/HexagonTargetMachine.cpp8
-rw-r--r--lib/Target/Hexagon/HexagonTargetMachine.h7
-rw-r--r--lib/Target/Hexagon/HexagonTargetObjectFile.cpp14
-rw-r--r--lib/Target/Hexagon/HexagonTargetObjectFile.h7
-rw-r--r--lib/Target/Hexagon/HexagonTargetStreamer.h7
-rw-r--r--lib/Target/Hexagon/HexagonTargetTransformInfo.cpp12
-rw-r--r--lib/Target/Hexagon/HexagonTargetTransformInfo.h7
-rw-r--r--lib/Target/Hexagon/HexagonVExtract.cpp7
-rw-r--r--lib/Target/Hexagon/HexagonVLIWPacketizer.cpp7
-rw-r--r--lib/Target/Hexagon/HexagonVLIWPacketizer.h7
-rw-r--r--lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp222
-rw-r--r--lib/Target/Hexagon/HexagonVectorPrint.cpp7
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp8
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h7
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp9
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonFixupKinds.h7
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp8
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h7
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp7
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h7
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp8
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h7
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp10
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h14
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp8
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp7
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp9
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h10
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp7
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.h7
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp8
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h7
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp8
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h7
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp9
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h8
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp9
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h9
-rw-r--r--lib/Target/Hexagon/RDFCopy.cpp7
-rw-r--r--lib/Target/Hexagon/RDFCopy.h7
-rw-r--r--lib/Target/Hexagon/RDFDeadCode.cpp7
-rw-r--r--lib/Target/Hexagon/RDFDeadCode.h7
-rw-r--r--lib/Target/Hexagon/RDFGraph.cpp29
-rw-r--r--lib/Target/Hexagon/RDFGraph.h34
-rw-r--r--lib/Target/Hexagon/RDFLiveness.cpp8
-rw-r--r--lib/Target/Hexagon/RDFLiveness.h9
-rw-r--r--lib/Target/Hexagon/RDFRegisters.cpp7
-rw-r--r--lib/Target/Hexagon/RDFRegisters.h7
-rw-r--r--lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp10
-rw-r--r--lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.h20
-rw-r--r--lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp10
-rw-r--r--lib/Target/Lanai/Disassembler/LanaiDisassembler.cpp13
-rw-r--r--lib/Target/Lanai/Disassembler/LanaiDisassembler.h7
-rw-r--r--lib/Target/Lanai/Lanai.h15
-rw-r--r--lib/Target/Lanai/Lanai.td7
-rw-r--r--lib/Target/Lanai/LanaiAluCode.h7
-rw-r--r--lib/Target/Lanai/LanaiAsmPrinter.cpp19
-rw-r--r--lib/Target/Lanai/LanaiCallingConv.td7
-rw-r--r--lib/Target/Lanai/LanaiDelaySlotFiller.cpp7
-rw-r--r--lib/Target/Lanai/LanaiFrameLowering.cpp9
-rw-r--r--lib/Target/Lanai/LanaiFrameLowering.h8
-rw-r--r--lib/Target/Lanai/LanaiISelDAGToDAG.cpp9
-rw-r--r--lib/Target/Lanai/LanaiISelLowering.cpp7
-rw-r--r--lib/Target/Lanai/LanaiISelLowering.h7
-rw-r--r--lib/Target/Lanai/LanaiInstrFormats.td7
-rw-r--r--lib/Target/Lanai/LanaiInstrInfo.cpp24
-rw-r--r--lib/Target/Lanai/LanaiInstrInfo.h16
-rw-r--r--lib/Target/Lanai/LanaiInstrInfo.td7
-rw-r--r--lib/Target/Lanai/LanaiMCInstLower.cpp7
-rw-r--r--lib/Target/Lanai/LanaiMCInstLower.h7
-rw-r--r--lib/Target/Lanai/LanaiMachineFunctionInfo.cpp7
-rw-r--r--lib/Target/Lanai/LanaiMachineFunctionInfo.h7
-rw-r--r--lib/Target/Lanai/LanaiMemAluCombiner.cpp12
-rw-r--r--lib/Target/Lanai/LanaiRegisterInfo.cpp17
-rw-r--r--lib/Target/Lanai/LanaiRegisterInfo.h11
-rw-r--r--lib/Target/Lanai/LanaiRegisterInfo.td7
-rw-r--r--lib/Target/Lanai/LanaiSchedule.td7
-rw-r--r--lib/Target/Lanai/LanaiSelectionDAGInfo.cpp7
-rw-r--r--lib/Target/Lanai/LanaiSelectionDAGInfo.h7
-rw-r--r--lib/Target/Lanai/LanaiSubtarget.cpp7
-rw-r--r--lib/Target/Lanai/LanaiSubtarget.h7
-rw-r--r--lib/Target/Lanai/LanaiTargetMachine.cpp8
-rw-r--r--lib/Target/Lanai/LanaiTargetMachine.h7
-rw-r--r--lib/Target/Lanai/LanaiTargetObjectFile.cpp7
-rw-r--r--lib/Target/Lanai/LanaiTargetObjectFile.h7
-rw-r--r--lib/Target/Lanai/LanaiTargetTransformInfo.h7
-rw-r--r--lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp7
-rw-r--r--lib/Target/Lanai/MCTargetDesc/LanaiBaseInfo.h7
-rw-r--r--lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp9
-rw-r--r--lib/Target/Lanai/MCTargetDesc/LanaiFixupKinds.h7
-rw-r--r--lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.cpp (renamed from lib/Target/Lanai/InstPrinter/LanaiInstPrinter.cpp)14
-rw-r--r--lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.h (renamed from lib/Target/Lanai/InstPrinter/LanaiInstPrinter.h)13
-rw-r--r--lib/Target/Lanai/MCTargetDesc/LanaiMCAsmInfo.cpp7
-rw-r--r--lib/Target/Lanai/MCTargetDesc/LanaiMCAsmInfo.h7
-rw-r--r--lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp9
-rw-r--r--lib/Target/Lanai/MCTargetDesc/LanaiMCExpr.cpp7
-rw-r--r--lib/Target/Lanai/MCTargetDesc/LanaiMCExpr.h7
-rw-r--r--lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp10
-rw-r--r--lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.h9
-rw-r--r--lib/Target/Lanai/TargetInfo/LanaiTargetInfo.cpp13
-rw-r--r--lib/Target/Lanai/TargetInfo/LanaiTargetInfo.h20
-rw-r--r--lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp8
-rw-r--r--lib/Target/MSP430/Disassembler/MSP430Disassembler.cpp8
-rw-r--r--lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp7
-rw-r--r--lib/Target/MSP430/MCTargetDesc/MSP430ELFObjectWriter.cpp7
-rw-r--r--lib/Target/MSP430/MCTargetDesc/MSP430ELFStreamer.cpp7
-rw-r--r--lib/Target/MSP430/MCTargetDesc/MSP430FixupKinds.h7
-rw-r--r--lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.cpp (renamed from lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp)7
-rw-r--r--lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.h (renamed from lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h)11
-rw-r--r--lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp8
-rw-r--r--lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h7
-rw-r--r--lib/Target/MSP430/MCTargetDesc/MSP430MCCodeEmitter.cpp7
-rw-r--r--lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp10
-rw-r--r--lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h9
-rw-r--r--lib/Target/MSP430/MSP430.h7
-rw-r--r--lib/Target/MSP430/MSP430.td7
-rw-r--r--lib/Target/MSP430/MSP430AsmPrinter.cpp85
-rw-r--r--lib/Target/MSP430/MSP430BranchSelector.cpp7
-rw-r--r--lib/Target/MSP430/MSP430CallingConv.td7
-rw-r--r--lib/Target/MSP430/MSP430FrameLowering.cpp7
-rw-r--r--lib/Target/MSP430/MSP430FrameLowering.h7
-rw-r--r--lib/Target/MSP430/MSP430ISelDAGToDAG.cpp7
-rw-r--r--lib/Target/MSP430/MSP430ISelLowering.cpp7
-rw-r--r--lib/Target/MSP430/MSP430ISelLowering.h7
-rw-r--r--lib/Target/MSP430/MSP430InstrFormats.td7
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.cpp10
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.h7
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.td7
-rw-r--r--lib/Target/MSP430/MSP430MCInstLower.cpp7
-rw-r--r--lib/Target/MSP430/MSP430MCInstLower.h7
-rw-r--r--lib/Target/MSP430/MSP430MachineFunctionInfo.cpp7
-rw-r--r--lib/Target/MSP430/MSP430MachineFunctionInfo.h7
-rw-r--r--lib/Target/MSP430/MSP430RegisterInfo.cpp9
-rw-r--r--lib/Target/MSP430/MSP430RegisterInfo.h9
-rw-r--r--lib/Target/MSP430/MSP430RegisterInfo.td7
-rw-r--r--lib/Target/MSP430/MSP430Subtarget.cpp7
-rw-r--r--lib/Target/MSP430/MSP430Subtarget.h7
-rw-r--r--lib/Target/MSP430/MSP430TargetMachine.cpp8
-rw-r--r--lib/Target/MSP430/MSP430TargetMachine.h7
-rw-r--r--lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp10
-rw-r--r--lib/Target/MSP430/TargetInfo/MSP430TargetInfo.h20
-rw-r--r--lib/Target/Mips/AsmParser/MipsAsmParser.cpp409
-rw-r--r--lib/Target/Mips/Disassembler/MipsDisassembler.cpp17
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp7
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h7
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp14
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsABIInfo.h7
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp11
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h7
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h12
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp9
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp9
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h10
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h12
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsInstPrinter.cpp (renamed from lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp)9
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsInstPrinter.h (renamed from lib/Target/Mips/InstPrinter/MipsInstPrinter.h)11
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp7
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h7
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp15
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h7
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp21
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCExpr.h7
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h7
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp12
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h12
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp11
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp7
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp83
-rw-r--r--lib/Target/Mips/MicroMips32r6InstrFormats.td7
-rw-r--r--lib/Target/Mips/MicroMips32r6InstrInfo.td32
-rw-r--r--lib/Target/Mips/MicroMipsDSPInstrFormats.td7
-rw-r--r--lib/Target/Mips/MicroMipsDSPInstrInfo.td7
-rw-r--r--lib/Target/Mips/MicroMipsInstrFPU.td19
-rw-r--r--lib/Target/Mips/MicroMipsInstrFormats.td7
-rw-r--r--lib/Target/Mips/MicroMipsInstrInfo.td36
-rw-r--r--lib/Target/Mips/MicroMipsSizeReduction.cpp7
-rw-r--r--lib/Target/Mips/Mips.h7
-rw-r--r--lib/Target/Mips/Mips.td13
-rw-r--r--lib/Target/Mips/Mips16FrameLowering.cpp7
-rw-r--r--lib/Target/Mips/Mips16FrameLowering.h7
-rw-r--r--lib/Target/Mips/Mips16HardFloat.cpp9
-rw-r--r--lib/Target/Mips/Mips16HardFloatInfo.cpp7
-rw-r--r--lib/Target/Mips/Mips16HardFloatInfo.h7
-rw-r--r--lib/Target/Mips/Mips16ISelDAGToDAG.cpp7
-rw-r--r--lib/Target/Mips/Mips16ISelDAGToDAG.h7
-rw-r--r--lib/Target/Mips/Mips16ISelLowering.cpp17
-rw-r--r--lib/Target/Mips/Mips16ISelLowering.h8
-rw-r--r--lib/Target/Mips/Mips16InstrFormats.td7
-rw-r--r--lib/Target/Mips/Mips16InstrInfo.cpp7
-rw-r--r--lib/Target/Mips/Mips16InstrInfo.h7
-rw-r--r--lib/Target/Mips/Mips16InstrInfo.td15
-rw-r--r--lib/Target/Mips/Mips16RegisterInfo.cpp7
-rw-r--r--lib/Target/Mips/Mips16RegisterInfo.h7
-rw-r--r--lib/Target/Mips/Mips32r6InstrFormats.td7
-rw-r--r--lib/Target/Mips/Mips32r6InstrInfo.td12
-rw-r--r--lib/Target/Mips/Mips64InstrInfo.td92
-rw-r--r--lib/Target/Mips/Mips64r6InstrInfo.td10
-rw-r--r--lib/Target/Mips/MipsAnalyzeImmediate.cpp7
-rw-r--r--lib/Target/Mips/MipsAnalyzeImmediate.h7
-rw-r--r--lib/Target/Mips/MipsAsmPrinter.cpp63
-rw-r--r--lib/Target/Mips/MipsAsmPrinter.h13
-rw-r--r--lib/Target/Mips/MipsBranchExpansion.cpp7
-rw-r--r--lib/Target/Mips/MipsCCState.cpp7
-rw-r--r--lib/Target/Mips/MipsCCState.h7
-rw-r--r--lib/Target/Mips/MipsCallLowering.cpp265
-rw-r--r--lib/Target/Mips/MipsCallLowering.h31
-rw-r--r--lib/Target/Mips/MipsCallingConv.td7
-rw-r--r--lib/Target/Mips/MipsCondMov.td29
-rw-r--r--lib/Target/Mips/MipsConstantIslandPass.cpp15
-rw-r--r--lib/Target/Mips/MipsDSPInstrFormats.td7
-rw-r--r--lib/Target/Mips/MipsDSPInstrInfo.td12
-rw-r--r--lib/Target/Mips/MipsDelaySlotFiller.cpp45
-rw-r--r--lib/Target/Mips/MipsEVAInstrFormats.td7
-rw-r--r--lib/Target/Mips/MipsEVAInstrInfo.td7
-rw-r--r--lib/Target/Mips/MipsExpandPseudo.cpp7
-rw-r--r--lib/Target/Mips/MipsFastISel.cpp55
-rw-r--r--lib/Target/Mips/MipsFrameLowering.cpp7
-rw-r--r--lib/Target/Mips/MipsFrameLowering.h7
-rw-r--r--lib/Target/Mips/MipsISelDAGToDAG.cpp7
-rw-r--r--lib/Target/Mips/MipsISelDAGToDAG.h7
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp175
-rw-r--r--lib/Target/Mips/MipsISelLowering.h21
-rw-r--r--lib/Target/Mips/MipsInstrFPU.td26
-rw-r--r--lib/Target/Mips/MipsInstrFormats.td8
-rw-r--r--lib/Target/Mips/MipsInstrInfo.cpp23
-rw-r--r--lib/Target/Mips/MipsInstrInfo.h7
-rw-r--r--lib/Target/Mips/MipsInstrInfo.td114
-rw-r--r--lib/Target/Mips/MipsInstructionSelector.cpp447
-rw-r--r--lib/Target/Mips/MipsLegalizerInfo.cpp93
-rw-r--r--lib/Target/Mips/MipsLegalizerInfo.h7
-rw-r--r--lib/Target/Mips/MipsMCInstLower.cpp9
-rw-r--r--lib/Target/Mips/MipsMCInstLower.h7
-rw-r--r--lib/Target/Mips/MipsMSAInstrFormats.td7
-rw-r--r--lib/Target/Mips/MipsMSAInstrInfo.td90
-rw-r--r--lib/Target/Mips/MipsMTInstrFormats.td7
-rw-r--r--lib/Target/Mips/MipsMTInstrInfo.td7
-rw-r--r--lib/Target/Mips/MipsMachineFunction.cpp105
-rw-r--r--lib/Target/Mips/MipsMachineFunction.h14
-rw-r--r--lib/Target/Mips/MipsOptimizePICCall.cpp7
-rw-r--r--lib/Target/Mips/MipsOptionRecord.h7
-rw-r--r--lib/Target/Mips/MipsOs16.cpp7
-rw-r--r--lib/Target/Mips/MipsPreLegalizerCombiner.cpp18
-rw-r--r--lib/Target/Mips/MipsRegisterBankInfo.cpp598
-rw-r--r--lib/Target/Mips/MipsRegisterBankInfo.h132
-rw-r--r--lib/Target/Mips/MipsRegisterBanks.td9
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.cpp40
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.h9
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.td54
-rw-r--r--lib/Target/Mips/MipsSEFrameLowering.cpp7
-rw-r--r--lib/Target/Mips/MipsSEFrameLowering.h7
-rw-r--r--lib/Target/Mips/MipsSEISelDAGToDAG.cpp113
-rw-r--r--lib/Target/Mips/MipsSEISelDAGToDAG.h11
-rw-r--r--lib/Target/Mips/MipsSEISelLowering.cpp126
-rw-r--r--lib/Target/Mips/MipsSEISelLowering.h15
-rw-r--r--lib/Target/Mips/MipsSEInstrInfo.cpp12
-rw-r--r--lib/Target/Mips/MipsSEInstrInfo.h7
-rw-r--r--lib/Target/Mips/MipsSERegisterInfo.cpp7
-rw-r--r--lib/Target/Mips/MipsSERegisterInfo.h7
-rw-r--r--lib/Target/Mips/MipsSchedule.td7
-rw-r--r--lib/Target/Mips/MipsScheduleGeneric.td934
-rw-r--r--lib/Target/Mips/MipsScheduleP5600.td67
-rw-r--r--lib/Target/Mips/MipsSubtarget.cpp21
-rw-r--r--lib/Target/Mips/MipsSubtarget.h11
-rw-r--r--lib/Target/Mips/MipsTargetMachine.cpp17
-rw-r--r--lib/Target/Mips/MipsTargetMachine.h13
-rw-r--r--lib/Target/Mips/MipsTargetObjectFile.cpp7
-rw-r--r--lib/Target/Mips/MipsTargetObjectFile.h7
-rw-r--r--lib/Target/Mips/MipsTargetStreamer.h11
-rw-r--r--lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp10
-rw-r--r--lib/Target/Mips/TargetInfo/MipsTargetInfo.h23
-rw-r--r--lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h7
-rw-r--r--lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp (renamed from lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp)23
-rw-r--r--lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h (renamed from lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h)13
-rw-r--r--lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp16
-rw-r--r--lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h7
-rw-r--r--lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp10
-rw-r--r--lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h10
-rw-r--r--lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp26
-rw-r--r--lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.h10
-rw-r--r--lib/Target/NVPTX/ManagedStringPool.h7
-rw-r--r--lib/Target/NVPTX/NVPTX.h20
-rw-r--r--lib/Target/NVPTX/NVPTX.td9
-rw-r--r--lib/Target/NVPTX/NVPTXAllocaHoisting.cpp7
-rw-r--r--lib/Target/NVPTX/NVPTXAllocaHoisting.h7
-rw-r--r--lib/Target/NVPTX/NVPTXAsmPrinter.cpp83
-rw-r--r--lib/Target/NVPTX/NVPTXAsmPrinter.h18
-rw-r--r--lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp7
-rw-r--r--lib/Target/NVPTX/NVPTXFrameLowering.cpp7
-rw-r--r--lib/Target/NVPTX/NVPTXFrameLowering.h7
-rw-r--r--lib/Target/NVPTX/NVPTXGenericToNVVM.cpp7
-rw-r--r--lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp14
-rw-r--r--lib/Target/NVPTX/NVPTXISelDAGToDAG.h8
-rw-r--r--lib/Target/NVPTX/NVPTXISelLowering.cpp283
-rw-r--r--lib/Target/NVPTX/NVPTXISelLowering.h11
-rw-r--r--lib/Target/NVPTX/NVPTXImageOptimizer.cpp7
-rw-r--r--lib/Target/NVPTX/NVPTXInstrFormats.td7
-rw-r--r--lib/Target/NVPTX/NVPTXInstrInfo.cpp7
-rw-r--r--lib/Target/NVPTX/NVPTXInstrInfo.h7
-rw-r--r--lib/Target/NVPTX/NVPTXInstrInfo.td23
-rw-r--r--lib/Target/NVPTX/NVPTXIntrinsics.td658
-rw-r--r--lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp7
-rw-r--r--lib/Target/NVPTX/NVPTXLowerAggrCopies.h7
-rw-r--r--lib/Target/NVPTX/NVPTXLowerAlloca.cpp8
-rw-r--r--lib/Target/NVPTX/NVPTXLowerArgs.cpp11
-rw-r--r--lib/Target/NVPTX/NVPTXMCExpr.cpp7
-rw-r--r--lib/Target/NVPTX/NVPTXMCExpr.h7
-rw-r--r--lib/Target/NVPTX/NVPTXMachineFunctionInfo.h7
-rw-r--r--lib/Target/NVPTX/NVPTXPeephole.cpp7
-rw-r--r--lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp11
-rw-r--r--lib/Target/NVPTX/NVPTXProxyRegErasure.cpp7
-rw-r--r--lib/Target/NVPTX/NVPTXRegisterInfo.cpp9
-rw-r--r--lib/Target/NVPTX/NVPTXRegisterInfo.h9
-rw-r--r--lib/Target/NVPTX/NVPTXRegisterInfo.td7
-rw-r--r--lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp8
-rw-r--r--lib/Target/NVPTX/NVPTXSubtarget.cpp7
-rw-r--r--lib/Target/NVPTX/NVPTXSubtarget.h7
-rw-r--r--lib/Target/NVPTX/NVPTXTargetMachine.cpp27
-rw-r--r--lib/Target/NVPTX/NVPTXTargetMachine.h7
-rw-r--r--lib/Target/NVPTX/NVPTXTargetObjectFile.h7
-rw-r--r--lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp8
-rw-r--r--lib/Target/NVPTX/NVPTXTargetTransformInfo.h9
-rw-r--r--lib/Target/NVPTX/NVPTXUtilities.cpp8
-rw-r--r--lib/Target/NVPTX/NVPTXUtilities.h7
-rw-r--r--lib/Target/NVPTX/NVVMIntrRange.cpp7
-rw-r--r--lib/Target/NVPTX/NVVMReflect.cpp7
-rw-r--r--lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp10
-rw-r--r--lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.h21
-rw-r--r--lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp15
-rw-r--r--lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp22
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp117
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp10
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h7
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp (renamed from lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp)33
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h (renamed from lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h)11
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp13
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h17
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp9
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h14
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp7
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h7
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp67
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h14
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp7
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp7
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h7
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp29
-rw-r--r--lib/Target/PowerPC/P9InstrResources.td371
-rw-r--r--lib/Target/PowerPC/PPC.h22
-rw-r--r--lib/Target/PowerPC/PPC.td38
-rw-r--r--lib/Target/PowerPC/PPCAsmPrinter.cpp223
-rw-r--r--lib/Target/PowerPC/PPCBoolRetToInt.cpp7
-rw-r--r--lib/Target/PowerPC/PPCBranchCoalescing.cpp11
-rw-r--r--lib/Target/PowerPC/PPCBranchSelector.cpp262
-rw-r--r--lib/Target/PowerPC/PPCCCState.cpp7
-rw-r--r--lib/Target/PowerPC/PPCCCState.h7
-rw-r--r--lib/Target/PowerPC/PPCCTRLoops.cpp585
-rw-r--r--lib/Target/PowerPC/PPCCallingConv.cpp162
-rw-r--r--lib/Target/PowerPC/PPCCallingConv.h36
-rw-r--r--lib/Target/PowerPC/PPCCallingConv.td50
-rw-r--r--lib/Target/PowerPC/PPCEarlyReturn.cpp19
-rw-r--r--lib/Target/PowerPC/PPCExpandISEL.cpp7
-rw-r--r--lib/Target/PowerPC/PPCFastISel.cpp108
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.cpp211
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.h31
-rw-r--r--lib/Target/PowerPC/PPCHazardRecognizers.cpp10
-rw-r--r--lib/Target/PowerPC/PPCHazardRecognizers.h7
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp94
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp1087
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h117
-rw-r--r--lib/Target/PowerPC/PPCInstr64Bit.td66
-rw-r--r--lib/Target/PowerPC/PPCInstrAltivec.td37
-rw-r--r--lib/Target/PowerPC/PPCInstrBuilder.h7
-rw-r--r--lib/Target/PowerPC/PPCInstrFormats.td21
-rw-r--r--lib/Target/PowerPC/PPCInstrHTM.td49
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp388
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.h100
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td84
-rw-r--r--lib/Target/PowerPC/PPCInstrQPX.td7
-rw-r--r--lib/Target/PowerPC/PPCInstrSPE.td19
-rw-r--r--lib/Target/PowerPC/PPCInstrVSX.td531
-rw-r--r--lib/Target/PowerPC/PPCLoopPreIncPrep.cpp15
-rw-r--r--lib/Target/PowerPC/PPCMCInstLower.cpp17
-rw-r--r--lib/Target/PowerPC/PPCMIPeephole.cpp186
-rw-r--r--lib/Target/PowerPC/PPCMachineFunctionInfo.cpp7
-rw-r--r--lib/Target/PowerPC/PPCMachineFunctionInfo.h16
-rw-r--r--lib/Target/PowerPC/PPCMachineScheduler.cpp83
-rw-r--r--lib/Target/PowerPC/PPCMachineScheduler.h49
-rw-r--r--lib/Target/PowerPC/PPCPerfectShuffle.h7
-rw-r--r--lib/Target/PowerPC/PPCPfmCounters.td7
-rw-r--r--lib/Target/PowerPC/PPCPreEmitPeephole.cpp7
-rw-r--r--lib/Target/PowerPC/PPCQPXLoadSplat.cpp11
-rw-r--r--lib/Target/PowerPC/PPCReduceCRLogicals.cpp52
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp217
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.h18
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.td9
-rw-r--r--lib/Target/PowerPC/PPCSchedule.td8
-rw-r--r--lib/Target/PowerPC/PPCSchedule440.td7
-rw-r--r--lib/Target/PowerPC/PPCScheduleA2.td7
-rw-r--r--lib/Target/PowerPC/PPCScheduleE500.td7
-rw-r--r--lib/Target/PowerPC/PPCScheduleE500mc.td7
-rw-r--r--lib/Target/PowerPC/PPCScheduleE5500.td7
-rw-r--r--lib/Target/PowerPC/PPCScheduleG3.td7
-rw-r--r--lib/Target/PowerPC/PPCScheduleG4.td7
-rw-r--r--lib/Target/PowerPC/PPCScheduleG4Plus.td7
-rw-r--r--lib/Target/PowerPC/PPCScheduleG5.td7
-rw-r--r--lib/Target/PowerPC/PPCScheduleP7.td7
-rw-r--r--lib/Target/PowerPC/PPCScheduleP8.td7
-rw-r--r--lib/Target/PowerPC/PPCScheduleP9.td77
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.cpp29
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.h28
-rw-r--r--lib/Target/PowerPC/PPCTLSDynamicCall.cpp11
-rw-r--r--lib/Target/PowerPC/PPCTOCRegDeps.cpp11
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.cpp74
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.h11
-rw-r--r--lib/Target/PowerPC/PPCTargetObjectFile.cpp7
-rw-r--r--lib/Target/PowerPC/PPCTargetObjectFile.h7
-rw-r--r--lib/Target/PowerPC/PPCTargetStreamer.h7
-rw-r--r--lib/Target/PowerPC/PPCTargetTransformInfo.cpp449
-rw-r--r--lib/Target/PowerPC/PPCTargetTransformInfo.h21
-rw-r--r--lib/Target/PowerPC/PPCVSXCopy.cpp11
-rw-r--r--lib/Target/PowerPC/PPCVSXFMAMutate.cpp7
-rw-r--r--lib/Target/PowerPC/PPCVSXSwapRemoval.cpp12
-rw-r--r--lib/Target/PowerPC/README_P9.txt8
-rw-r--r--lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp10
-rw-r--r--lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.h22
-rw-r--r--lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp393
-rw-r--r--lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp20
-rw-r--r--lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp93
-rw-r--r--lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h54
-rw-r--r--lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp70
-rw-r--r--lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp32
-rw-r--r--lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h7
-rw-r--r--lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h36
-rw-r--r--lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp (renamed from lib/Target/RISCV/InstPrinter/RISCVInstPrinter.cpp)7
-rw-r--r--lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h (renamed from lib/Target/RISCV/InstPrinter/RISCVInstPrinter.h)11
-rw-r--r--lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp8
-rw-r--r--lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.h7
-rw-r--r--lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp150
-rw-r--r--lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp120
-rw-r--r--lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.h23
-rw-r--r--lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp18
-rw-r--r--lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h10
-rw-r--r--lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp7
-rw-r--r--lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h7
-rw-r--r--lib/Target/RISCV/RISCV.h7
-rw-r--r--lib/Target/RISCV/RISCV.td25
-rw-r--r--lib/Target/RISCV/RISCVAsmPrinter.cpp65
-rw-r--r--lib/Target/RISCV/RISCVCallingConv.td18
-rw-r--r--lib/Target/RISCV/RISCVExpandPseudoInsts.cpp196
-rw-r--r--lib/Target/RISCV/RISCVFrameLowering.cpp80
-rw-r--r--lib/Target/RISCV/RISCVFrameLowering.h7
-rw-r--r--lib/Target/RISCV/RISCVISelDAGToDAG.cpp15
-rw-r--r--lib/Target/RISCV/RISCVISelLowering.cpp1185
-rw-r--r--lib/Target/RISCV/RISCVISelLowering.h86
-rw-r--r--lib/Target/RISCV/RISCVInstrFormats.td36
-rw-r--r--lib/Target/RISCV/RISCVInstrFormatsC.td7
-rw-r--r--lib/Target/RISCV/RISCVInstrInfo.cpp36
-rw-r--r--lib/Target/RISCV/RISCVInstrInfo.h9
-rw-r--r--lib/Target/RISCV/RISCVInstrInfo.td320
-rw-r--r--lib/Target/RISCV/RISCVInstrInfoA.td89
-rw-r--r--lib/Target/RISCV/RISCVInstrInfoC.td57
-rw-r--r--lib/Target/RISCV/RISCVInstrInfoD.td41
-rw-r--r--lib/Target/RISCV/RISCVInstrInfoF.td97
-rw-r--r--lib/Target/RISCV/RISCVInstrInfoM.td46
-rw-r--r--lib/Target/RISCV/RISCVMCInstLower.cpp37
-rw-r--r--lib/Target/RISCV/RISCVMachineFunctionInfo.h9
-rw-r--r--lib/Target/RISCV/RISCVMergeBaseOffset.cpp7
-rw-r--r--lib/Target/RISCV/RISCVRegisterInfo.cpp53
-rw-r--r--lib/Target/RISCV/RISCVRegisterInfo.h9
-rw-r--r--lib/Target/RISCV/RISCVRegisterInfo.td9
-rw-r--r--lib/Target/RISCV/RISCVSubtarget.cpp22
-rw-r--r--lib/Target/RISCV/RISCVSubtarget.h21
-rw-r--r--lib/Target/RISCV/RISCVSystemOperands.td27
-rw-r--r--lib/Target/RISCV/RISCVTargetMachine.cpp21
-rw-r--r--lib/Target/RISCV/RISCVTargetMachine.h9
-rw-r--r--lib/Target/RISCV/RISCVTargetObjectFile.cpp103
-rw-r--r--lib/Target/RISCV/RISCVTargetObjectFile.h31
-rw-r--r--lib/Target/RISCV/RISCVTargetTransformInfo.cpp92
-rw-r--r--lib/Target/RISCV/RISCVTargetTransformInfo.h52
-rw-r--r--lib/Target/RISCV/TargetInfo/RISCVTargetInfo.cpp14
-rw-r--r--lib/Target/RISCV/TargetInfo/RISCVTargetInfo.h21
-rw-r--r--lib/Target/RISCV/Utils/RISCVBaseInfo.cpp71
-rw-r--r--lib/Target/RISCV/Utils/RISCVBaseInfo.h44
-rw-r--r--lib/Target/RISCV/Utils/RISCVMatInt.cpp32
-rw-r--r--lib/Target/RISCV/Utils/RISCVMatInt.h16
-rw-r--r--lib/Target/Sparc/AsmParser/SparcAsmParser.cpp11
-rw-r--r--lib/Target/Sparc/DelaySlotFiller.cpp7
-rw-r--r--lib/Target/Sparc/Disassembler/SparcDisassembler.cpp14
-rwxr-xr-xlib/Target/Sparc/LeonFeatures.td7
-rwxr-xr-xlib/Target/Sparc/LeonPasses.cpp7
-rwxr-xr-xlib/Target/Sparc/LeonPasses.h7
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp7
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp7
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcFixupKinds.h7
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.cpp (renamed from lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp)7
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.h (renamed from lib/Target/Sparc/InstPrinter/SparcInstPrinter.h)11
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp7
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h7
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp14
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp7
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h7
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp10
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h11
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcTargetStreamer.cpp9
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcTargetStreamer.h7
-rw-r--r--lib/Target/Sparc/Sparc.h7
-rw-r--r--lib/Target/Sparc/Sparc.td7
-rw-r--r--lib/Target/Sparc/SparcAsmPrinter.cpp23
-rw-r--r--lib/Target/Sparc/SparcCallingConv.td7
-rw-r--r--lib/Target/Sparc/SparcFrameLowering.cpp7
-rw-r--r--lib/Target/Sparc/SparcFrameLowering.h7
-rw-r--r--lib/Target/Sparc/SparcISelDAGToDAG.cpp12
-rw-r--r--lib/Target/Sparc/SparcISelLowering.cpp10
-rw-r--r--lib/Target/Sparc/SparcISelLowering.h7
-rw-r--r--lib/Target/Sparc/SparcInstr64Bit.td7
-rw-r--r--lib/Target/Sparc/SparcInstrAliases.td7
-rw-r--r--lib/Target/Sparc/SparcInstrFormats.td7
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.cpp7
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.h7
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.td7
-rw-r--r--lib/Target/Sparc/SparcInstrVIS.td7
-rw-r--r--lib/Target/Sparc/SparcMCInstLower.cpp7
-rw-r--r--lib/Target/Sparc/SparcMachineFunctionInfo.cpp7
-rw-r--r--lib/Target/Sparc/SparcMachineFunctionInfo.h7
-rw-r--r--lib/Target/Sparc/SparcRegisterInfo.cpp15
-rw-r--r--lib/Target/Sparc/SparcRegisterInfo.h9
-rw-r--r--lib/Target/Sparc/SparcRegisterInfo.td7
-rwxr-xr-xlib/Target/Sparc/SparcSchedule.td7
-rw-r--r--lib/Target/Sparc/SparcSubtarget.cpp7
-rw-r--r--lib/Target/Sparc/SparcSubtarget.h7
-rw-r--r--lib/Target/Sparc/SparcTargetMachine.cpp12
-rw-r--r--lib/Target/Sparc/SparcTargetMachine.h7
-rw-r--r--lib/Target/Sparc/SparcTargetObjectFile.cpp7
-rw-r--r--lib/Target/Sparc/SparcTargetObjectFile.h7
-rw-r--r--lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp10
-rw-r--r--lib/Target/Sparc/TargetInfo/SparcTargetInfo.h22
-rw-r--r--lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp35
-rw-r--r--lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp8
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp (renamed from lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp)7
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.h (renamed from lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h)13
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp7
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp7
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h7
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp14
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h7
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp11
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp11
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h9
-rw-r--r--lib/Target/SystemZ/SystemZ.h8
-rw-r--r--lib/Target/SystemZ/SystemZ.td7
-rw-r--r--lib/Target/SystemZ/SystemZAsmPrinter.cpp70
-rw-r--r--lib/Target/SystemZ/SystemZAsmPrinter.h13
-rw-r--r--lib/Target/SystemZ/SystemZCallingConv.cpp7
-rw-r--r--lib/Target/SystemZ/SystemZCallingConv.h7
-rw-r--r--lib/Target/SystemZ/SystemZCallingConv.td7
-rw-r--r--lib/Target/SystemZ/SystemZConstantPoolValue.cpp7
-rw-r--r--lib/Target/SystemZ/SystemZConstantPoolValue.h7
-rw-r--r--lib/Target/SystemZ/SystemZElimCompare.cpp16
-rw-r--r--lib/Target/SystemZ/SystemZExpandPseudo.cpp7
-rw-r--r--lib/Target/SystemZ/SystemZFeatures.td58
-rw-r--r--lib/Target/SystemZ/SystemZFrameLowering.cpp7
-rw-r--r--lib/Target/SystemZ/SystemZFrameLowering.h7
-rw-r--r--lib/Target/SystemZ/SystemZHazardRecognizer.cpp7
-rw-r--r--lib/Target/SystemZ/SystemZHazardRecognizer.h7
-rw-r--r--lib/Target/SystemZ/SystemZISelDAGToDAG.cpp109
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.cpp816
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.h44
-rw-r--r--lib/Target/SystemZ/SystemZInstrBuilder.h7
-rw-r--r--lib/Target/SystemZ/SystemZInstrDFP.td99
-rw-r--r--lib/Target/SystemZ/SystemZInstrFP.td302
-rw-r--r--lib/Target/SystemZ/SystemZInstrFormats.td378
-rw-r--r--lib/Target/SystemZ/SystemZInstrHFP.td7
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.cpp306
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.h23
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.td150
-rw-r--r--lib/Target/SystemZ/SystemZInstrSystem.td7
-rw-r--r--lib/Target/SystemZ/SystemZInstrVector.td555
-rw-r--r--lib/Target/SystemZ/SystemZLDCleanup.cpp7
-rw-r--r--lib/Target/SystemZ/SystemZLongBranch.cpp7
-rw-r--r--lib/Target/SystemZ/SystemZMCInstLower.cpp7
-rw-r--r--lib/Target/SystemZ/SystemZMCInstLower.h7
-rw-r--r--lib/Target/SystemZ/SystemZMachineFunctionInfo.cpp7
-rw-r--r--lib/Target/SystemZ/SystemZMachineFunctionInfo.h7
-rw-r--r--lib/Target/SystemZ/SystemZMachineScheduler.cpp7
-rw-r--r--lib/Target/SystemZ/SystemZMachineScheduler.h7
-rw-r--r--lib/Target/SystemZ/SystemZOperands.td27
-rw-r--r--lib/Target/SystemZ/SystemZOperators.td105
-rw-r--r--lib/Target/SystemZ/SystemZPatterns.td7
-rw-r--r--lib/Target/SystemZ/SystemZPostRewrite.cpp124
-rw-r--r--lib/Target/SystemZ/SystemZProcessors.td9
-rw-r--r--lib/Target/SystemZ/SystemZRegisterInfo.cpp123
-rw-r--r--lib/Target/SystemZ/SystemZRegisterInfo.h9
-rw-r--r--lib/Target/SystemZ/SystemZRegisterInfo.td14
-rw-r--r--lib/Target/SystemZ/SystemZSchedule.td8
-rw-r--r--lib/Target/SystemZ/SystemZScheduleArch13.td1695
-rw-r--r--lib/Target/SystemZ/SystemZScheduleZ13.td18
-rw-r--r--lib/Target/SystemZ/SystemZScheduleZ14.td18
-rw-r--r--lib/Target/SystemZ/SystemZScheduleZ196.td7
-rw-r--r--lib/Target/SystemZ/SystemZScheduleZEC12.td7
-rw-r--r--lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp25
-rw-r--r--lib/Target/SystemZ/SystemZSelectionDAGInfo.h7
-rw-r--r--lib/Target/SystemZ/SystemZShortenInst.cpp62
-rw-r--r--lib/Target/SystemZ/SystemZSubtarget.cpp10
-rw-r--r--lib/Target/SystemZ/SystemZSubtarget.h37
-rw-r--r--lib/Target/SystemZ/SystemZTDC.cpp11
-rw-r--r--lib/Target/SystemZ/SystemZTargetMachine.cpp22
-rw-r--r--lib/Target/SystemZ/SystemZTargetMachine.h7
-rw-r--r--lib/Target/SystemZ/SystemZTargetTransformInfo.cpp39
-rw-r--r--lib/Target/SystemZ/SystemZTargetTransformInfo.h7
-rw-r--r--lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp9
-rw-r--r--lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.h20
-rw-r--r--lib/Target/Target.cpp7
-rw-r--r--lib/Target/TargetIntrinsicInfo.cpp7
-rw-r--r--lib/Target/TargetLoweringObjectFile.cpp8
-rw-r--r--lib/Target/TargetMachine.cpp22
-rw-r--r--lib/Target/TargetMachineC.cpp7
-rw-r--r--lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp278
-rw-r--r--lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp58
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp20
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/WebAssemblyFixupKinds.h13
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp (renamed from lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp)104
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.h (renamed from lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h)7
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp9
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.h7
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp35
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp24
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h302
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp24
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h20
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp109
-rw-r--r--lib/Target/WebAssembly/README.txt2
-rw-r--r--lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.cpp10
-rw-r--r--lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.h26
-rw-r--r--lib/Target/WebAssembly/WebAssembly.h13
-rw-r--r--lib/Target/WebAssembly/WebAssembly.td29
-rw-r--r--lib/Target/WebAssembly/WebAssemblyAddMissingPrototypes.cpp89
-rw-r--r--lib/Target/WebAssembly/WebAssemblyArgumentMove.cpp11
-rw-r--r--lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp186
-rw-r--r--lib/Target/WebAssembly/WebAssemblyAsmPrinter.h16
-rw-r--r--lib/Target/WebAssembly/WebAssemblyCFGSort.cpp54
-rw-r--r--lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp931
-rw-r--r--lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp37
-rw-r--r--lib/Target/WebAssembly/WebAssemblyDebugValueManager.cpp7
-rw-r--r--lib/Target/WebAssembly/WebAssemblyDebugValueManager.h7
-rw-r--r--lib/Target/WebAssembly/WebAssemblyEHRestoreStackPointer.cpp87
-rw-r--r--lib/Target/WebAssembly/WebAssemblyExceptionInfo.cpp21
-rw-r--r--lib/Target/WebAssembly/WebAssemblyExceptionInfo.h7
-rw-r--r--lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp55
-rw-r--r--lib/Target/WebAssembly/WebAssemblyFastISel.cpp183
-rw-r--r--lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp79
-rw-r--r--lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp616
-rw-r--r--lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp14
-rw-r--r--lib/Target/WebAssembly/WebAssemblyFrameLowering.h7
-rw-r--r--lib/Target/WebAssembly/WebAssemblyISD.def14
-rw-r--r--lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp168
-rw-r--r--lib/Target/WebAssembly/WebAssemblyISelLowering.cpp556
-rw-r--r--lib/Target/WebAssembly/WebAssemblyISelLowering.h21
-rw-r--r--lib/Target/WebAssembly/WebAssemblyInstrAtomics.td546
-rw-r--r--lib/Target/WebAssembly/WebAssemblyInstrBulkMemory.td71
-rw-r--r--lib/Target/WebAssembly/WebAssemblyInstrCall.td202
-rw-r--r--lib/Target/WebAssembly/WebAssemblyInstrControl.td93
-rw-r--r--lib/Target/WebAssembly/WebAssemblyInstrConv.td7
-rw-r--r--lib/Target/WebAssembly/WebAssemblyInstrExceptRef.td27
-rw-r--r--lib/Target/WebAssembly/WebAssemblyInstrFloat.td7
-rw-r--r--lib/Target/WebAssembly/WebAssemblyInstrFormats.td10
-rw-r--r--lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp62
-rw-r--r--lib/Target/WebAssembly/WebAssemblyInstrInfo.h16
-rw-r--r--lib/Target/WebAssembly/WebAssemblyInstrInfo.td129
-rw-r--r--lib/Target/WebAssembly/WebAssemblyInstrInteger.td14
-rw-r--r--lib/Target/WebAssembly/WebAssemblyInstrMemory.td95
-rw-r--r--lib/Target/WebAssembly/WebAssemblyInstrRef.td25
-rw-r--r--lib/Target/WebAssembly/WebAssemblyInstrSIMD.td215
-rw-r--r--lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp467
-rw-r--r--lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp7
-rw-r--r--lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp95
-rw-r--r--lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp30
-rw-r--r--lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp118
-rw-r--r--lib/Target/WebAssembly/WebAssemblyMCInstLower.h12
-rw-r--r--lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp40
-rw-r--r--lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h47
-rw-r--r--lib/Target/WebAssembly/WebAssemblyMemIntrinsicResults.cpp23
-rw-r--r--lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp13
-rw-r--r--lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp17
-rw-r--r--lib/Target/WebAssembly/WebAssemblyPeephole.cpp39
-rw-r--r--lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp19
-rw-r--r--lib/Target/WebAssembly/WebAssemblyRegColoring.cpp31
-rw-r--r--lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp9
-rw-r--r--lib/Target/WebAssembly/WebAssemblyRegStackify.cpp173
-rw-r--r--lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp30
-rw-r--r--lib/Target/WebAssembly/WebAssemblyRegisterInfo.h9
-rw-r--r--lib/Target/WebAssembly/WebAssemblyRegisterInfo.td11
-rw-r--r--lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp7
-rw-r--r--lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp143
-rw-r--r--lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.h11
-rw-r--r--lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp49
-rw-r--r--lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h22
-rw-r--r--lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp123
-rw-r--r--lib/Target/WebAssembly/WebAssemblySubtarget.cpp12
-rw-r--r--lib/Target/WebAssembly/WebAssemblySubtarget.h22
-rw-r--r--lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp250
-rw-r--r--lib/Target/WebAssembly/WebAssemblyTargetMachine.h18
-rw-r--r--lib/Target/WebAssembly/WebAssemblyTargetObjectFile.cpp7
-rw-r--r--lib/Target/WebAssembly/WebAssemblyTargetObjectFile.h7
-rw-r--r--lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp9
-rw-r--r--lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h7
-rw-r--r--lib/Target/WebAssembly/WebAssemblyUtilities.cpp301
-rw-r--r--lib/Target/WebAssembly/WebAssemblyUtilities.h27
-rw-r--r--lib/Target/WebAssembly/known_gcc_test_failures.txt27
-rw-r--r--lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp1089
-rw-r--r--lib/Target/X86/AsmParser/X86AsmInstrumentation.h68
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParser.cpp447
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParserCommon.h7
-rw-r--r--lib/Target/X86/AsmParser/X86Operand.h58
-rw-r--r--lib/Target/X86/Disassembler/X86Disassembler.cpp217
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp19
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoder.h14
-rw-r--r--lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp202
-rw-r--r--lib/Target/X86/InstPrinter/X86InstPrinterCommon.cpp142
-rw-r--r--lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp162
-rw-r--r--lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp487
-rw-r--r--lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.h (renamed from lib/Target/X86/InstPrinter/X86ATTInstPrinter.h)48
-rw-r--r--lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp82
-rw-r--r--lib/Target/X86/MCTargetDesc/X86BaseInfo.h94
-rw-r--r--lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp38
-rw-r--r--lib/Target/X86/MCTargetDesc/X86FixupKinds.h7
-rw-r--r--lib/Target/X86/MCTargetDesc/X86InstComments.cpp (renamed from lib/Target/X86/InstPrinter/X86InstComments.cpp)36
-rw-r--r--lib/Target/X86/MCTargetDesc/X86InstComments.h (renamed from lib/Target/X86/InstPrinter/X86InstComments.h)11
-rw-r--r--lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp362
-rw-r--r--lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.h (renamed from lib/Target/X86/InstPrinter/X86InstPrinterCommon.h)19
-rw-r--r--lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp445
-rw-r--r--lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.h (renamed from lib/Target/X86/InstPrinter/X86IntelInstPrinter.h)57
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp7
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h7
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp97
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCExpr.h9
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp22
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h10
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp7
-rw-r--r--lib/Target/X86/MCTargetDesc/X86TargetStreamer.h7
-rw-r--r--lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp7
-rw-r--r--lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp7
-rw-r--r--lib/Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp7
-rw-r--r--lib/Target/X86/ShadowCallStack.cpp322
-rw-r--r--lib/Target/X86/TargetInfo/X86TargetInfo.cpp9
-rw-r--r--lib/Target/X86/TargetInfo/X86TargetInfo.h21
-rw-r--r--lib/Target/X86/Utils/X86ShuffleDecode.cpp14
-rw-r--r--lib/Target/X86/Utils/X86ShuffleDecode.h9
-rw-r--r--lib/Target/X86/X86.h15
-rw-r--r--lib/Target/X86/X86.td1226
-rw-r--r--lib/Target/X86/X86AsmPrinter.cpp274
-rw-r--r--lib/Target/X86/X86AsmPrinter.h25
-rw-r--r--lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp29
-rw-r--r--lib/Target/X86/X86CallFrameOptimization.cpp12
-rw-r--r--lib/Target/X86/X86CallLowering.cpp78
-rw-r--r--lib/Target/X86/X86CallLowering.h13
-rw-r--r--lib/Target/X86/X86CallingConv.cpp162
-rw-r--r--lib/Target/X86/X86CallingConv.h104
-rw-r--r--lib/Target/X86/X86CallingConv.td28
-rw-r--r--lib/Target/X86/X86CmovConversion.cpp35
-rw-r--r--lib/Target/X86/X86CondBrFolding.cpp26
-rw-r--r--lib/Target/X86/X86DiscriminateMemOps.cpp42
-rw-r--r--lib/Target/X86/X86DomainReassignment.cpp12
-rwxr-xr-xlib/Target/X86/X86EvexToVex.cpp21
-rw-r--r--lib/Target/X86/X86ExpandPseudo.cpp41
-rw-r--r--lib/Target/X86/X86FastISel.cpp264
-rw-r--r--lib/Target/X86/X86FixupBWInsts.cpp13
-rw-r--r--lib/Target/X86/X86FixupLEAs.cpp393
-rw-r--r--lib/Target/X86/X86FixupSetCC.cpp37
-rw-r--r--lib/Target/X86/X86FlagsCopyLowering.cpp56
-rw-r--r--lib/Target/X86/X86FloatingPoint.cpp28
-rw-r--r--lib/Target/X86/X86FrameLowering.cpp80
-rw-r--r--lib/Target/X86/X86FrameLowering.h11
-rw-r--r--lib/Target/X86/X86GenRegisterBankInfo.def7
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp1590
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp9548
-rw-r--r--lib/Target/X86/X86ISelLowering.h216
-rw-r--r--lib/Target/X86/X86IndirectBranchTracking.cpp49
-rw-r--r--lib/Target/X86/X86InsertPrefetch.cpp10
-rw-r--r--lib/Target/X86/X86Instr3DNow.td11
-rw-r--r--lib/Target/X86/X86InstrAVX512.td3486
-rw-r--r--lib/Target/X86/X86InstrArithmetic.td101
-rw-r--r--lib/Target/X86/X86InstrBuilder.h7
-rw-r--r--lib/Target/X86/X86InstrCMovSetCC.td176
-rw-r--r--lib/Target/X86/X86InstrCompiler.td323
-rw-r--r--lib/Target/X86/X86InstrControl.td64
-rw-r--r--lib/Target/X86/X86InstrExtension.td11
-rw-r--r--lib/Target/X86/X86InstrFMA.td13
-rw-r--r--lib/Target/X86/X86InstrFMA3Info.cpp17
-rw-r--r--lib/Target/X86/X86InstrFMA3Info.h7
-rw-r--r--lib/Target/X86/X86InstrFPStack.td341
-rw-r--r--lib/Target/X86/X86InstrFoldTables.cpp186
-rw-r--r--lib/Target/X86/X86InstrFoldTables.h7
-rw-r--r--lib/Target/X86/X86InstrFormats.td33
-rw-r--r--lib/Target/X86/X86InstrFragmentsSIMD.td368
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp1116
-rw-r--r--lib/Target/X86/X86InstrInfo.h79
-rw-r--r--lib/Target/X86/X86InstrInfo.td439
-rw-r--r--lib/Target/X86/X86InstrMMX.td13
-rw-r--r--lib/Target/X86/X86InstrMPX.td7
-rw-r--r--lib/Target/X86/X86InstrSGX.td7
-rw-r--r--lib/Target/X86/X86InstrSSE.td1917
-rw-r--r--lib/Target/X86/X86InstrSVM.td7
-rw-r--r--lib/Target/X86/X86InstrShiftRotate.td98
-rw-r--r--lib/Target/X86/X86InstrSystem.td26
-rw-r--r--lib/Target/X86/X86InstrTSX.td7
-rw-r--r--lib/Target/X86/X86InstrVMX.td7
-rw-r--r--lib/Target/X86/X86InstrVecCompiler.td104
-rw-r--r--lib/Target/X86/X86InstrXOP.td33
-rw-r--r--lib/Target/X86/X86InstructionSelector.cpp92
-rw-r--r--lib/Target/X86/X86InterleavedAccess.cpp27
-rw-r--r--lib/Target/X86/X86IntrinsicsInfo.h781
-rw-r--r--lib/Target/X86/X86LegalizerInfo.cpp30
-rw-r--r--lib/Target/X86/X86LegalizerInfo.h7
-rw-r--r--lib/Target/X86/X86MCInstLower.cpp274
-rw-r--r--lib/Target/X86/X86MachineFunctionInfo.cpp7
-rw-r--r--lib/Target/X86/X86MachineFunctionInfo.h7
-rw-r--r--lib/Target/X86/X86MacroFusion.cpp164
-rw-r--r--lib/Target/X86/X86MacroFusion.h7
-rw-r--r--lib/Target/X86/X86OptimizeLEAs.cpp14
-rw-r--r--lib/Target/X86/X86PadShortFunction.cpp16
-rw-r--r--lib/Target/X86/X86PfmCounters.td7
-rw-r--r--lib/Target/X86/X86RegisterBankInfo.cpp24
-rw-r--r--lib/Target/X86/X86RegisterBankInfo.h7
-rw-r--r--lib/Target/X86/X86RegisterBanks.td7
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp37
-rw-r--r--lib/Target/X86/X86RegisterInfo.h23
-rw-r--r--lib/Target/X86/X86RegisterInfo.td44
-rw-r--r--lib/Target/X86/X86RetpolineThunks.cpp7
-rwxr-xr-xlib/Target/X86/X86SchedBroadwell.td169
-rw-r--r--lib/Target/X86/X86SchedHaswell.td195
-rw-r--r--lib/Target/X86/X86SchedPredicates.td31
-rw-r--r--lib/Target/X86/X86SchedSandyBridge.td96
-rw-r--r--lib/Target/X86/X86SchedSkylakeClient.td193
-rwxr-xr-xlib/Target/X86/X86SchedSkylakeServer.td212
-rw-r--r--lib/Target/X86/X86Schedule.td14
-rw-r--r--lib/Target/X86/X86ScheduleAtom.td12
-rw-r--r--lib/Target/X86/X86ScheduleBdVer2.td599
-rw-r--r--lib/Target/X86/X86ScheduleBtVer2.td45
-rw-r--r--lib/Target/X86/X86ScheduleSLM.td10
-rw-r--r--lib/Target/X86/X86ScheduleZnver1.td10
-rw-r--r--lib/Target/X86/X86SelectionDAGInfo.cpp222
-rw-r--r--lib/Target/X86/X86SelectionDAGInfo.h7
-rw-r--r--lib/Target/X86/X86ShuffleDecodeConstantPool.cpp7
-rw-r--r--lib/Target/X86/X86ShuffleDecodeConstantPool.h7
-rw-r--r--lib/Target/X86/X86SpeculativeLoadHardening.cpp41
-rw-r--r--lib/Target/X86/X86Subtarget.cpp22
-rw-r--r--lib/Target/X86/X86Subtarget.h47
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp33
-rw-r--r--lib/Target/X86/X86TargetMachine.h7
-rw-r--r--lib/Target/X86/X86TargetObjectFile.cpp7
-rw-r--r--lib/Target/X86/X86TargetObjectFile.h7
-rw-r--r--lib/Target/X86/X86TargetTransformInfo.cpp529
-rw-r--r--lib/Target/X86/X86TargetTransformInfo.h76
-rw-r--r--lib/Target/X86/X86VZeroUpper.cpp7
-rw-r--r--lib/Target/X86/X86WinAllocaExpander.cpp46
-rw-r--r--lib/Target/X86/X86WinEHState.cpp45
-rw-r--r--lib/Target/XCore/Disassembler/XCoreDisassembler.cpp12
-rw-r--r--lib/Target/XCore/MCTargetDesc/XCoreInstPrinter.cpp (renamed from lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp)7
-rw-r--r--lib/Target/XCore/MCTargetDesc/XCoreInstPrinter.h (renamed from lib/Target/XCore/InstPrinter/XCoreInstPrinter.h)13
-rw-r--r--lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp7
-rw-r--r--lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h7
-rw-r--r--lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp10
-rw-r--r--lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h9
-rw-r--r--lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp10
-rw-r--r--lib/Target/XCore/TargetInfo/XCoreTargetInfo.h20
-rw-r--r--lib/Target/XCore/XCore.h7
-rw-r--r--lib/Target/XCore/XCore.td7
-rw-r--r--lib/Target/XCore/XCoreAsmPrinter.cpp31
-rw-r--r--lib/Target/XCore/XCoreCallingConv.td7
-rw-r--r--lib/Target/XCore/XCoreFrameLowering.cpp7
-rw-r--r--lib/Target/XCore/XCoreFrameLowering.h7
-rw-r--r--lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp7
-rw-r--r--lib/Target/XCore/XCoreISelDAGToDAG.cpp7
-rw-r--r--lib/Target/XCore/XCoreISelLowering.cpp82
-rw-r--r--lib/Target/XCore/XCoreISelLowering.h9
-rw-r--r--lib/Target/XCore/XCoreInstrFormats.td7
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.cpp7
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.h7
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.td7
-rw-r--r--lib/Target/XCore/XCoreLowerThreadLocal.cpp7
-rw-r--r--lib/Target/XCore/XCoreMCInstLower.cpp7
-rw-r--r--lib/Target/XCore/XCoreMCInstLower.h7
-rw-r--r--lib/Target/XCore/XCoreMachineFunctionInfo.cpp7
-rw-r--r--lib/Target/XCore/XCoreMachineFunctionInfo.h7
-rw-r--r--lib/Target/XCore/XCoreRegisterInfo.cpp11
-rw-r--r--lib/Target/XCore/XCoreRegisterInfo.h9
-rw-r--r--lib/Target/XCore/XCoreRegisterInfo.td7
-rw-r--r--lib/Target/XCore/XCoreSelectionDAGInfo.cpp7
-rw-r--r--lib/Target/XCore/XCoreSelectionDAGInfo.h7
-rw-r--r--lib/Target/XCore/XCoreSubtarget.cpp7
-rw-r--r--lib/Target/XCore/XCoreSubtarget.h7
-rw-r--r--lib/Target/XCore/XCoreTargetMachine.cpp8
-rw-r--r--lib/Target/XCore/XCoreTargetMachine.h7
-rw-r--r--lib/Target/XCore/XCoreTargetObjectFile.cpp7
-rw-r--r--lib/Target/XCore/XCoreTargetObjectFile.h7
-rw-r--r--lib/Target/XCore/XCoreTargetStreamer.h7
-rw-r--r--lib/Target/XCore/XCoreTargetTransformInfo.h7
-rw-r--r--lib/Testing/Support/Annotations.cpp95
-rw-r--r--lib/Testing/Support/Error.cpp7
-rw-r--r--lib/TextAPI/ELF/ELFStub.cpp7
-rw-r--r--lib/TextAPI/ELF/TBEHandler.cpp7
-rw-r--r--lib/TextAPI/MachO/Architecture.cpp77
-rw-r--r--lib/TextAPI/MachO/ArchitectureSet.cpp69
-rw-r--r--lib/TextAPI/MachO/InterfaceFile.cpp81
-rw-r--r--lib/TextAPI/MachO/PackedVersion.cpp113
-rw-r--r--lib/TextAPI/MachO/Symbol.cpp49
-rw-r--r--lib/TextAPI/MachO/TextAPIContext.h33
-rw-r--r--lib/TextAPI/MachO/TextStub.cpp660
-rw-r--r--lib/TextAPI/MachO/TextStubCommon.cpp178
-rw-r--r--lib/TextAPI/MachO/TextStubCommon.h81
-rw-r--r--lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp10
-rw-r--r--lib/ToolDrivers/llvm-lib/LibDriver.cpp156
-rw-r--r--lib/ToolDrivers/llvm-lib/Options.td16
-rw-r--r--lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp7
-rw-r--r--lib/Transforms/AggressiveInstCombine/AggressiveInstCombineInternal.h7
-rw-r--r--lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp7
-rw-r--r--lib/Transforms/Coroutines/CoroCleanup.cpp9
-rw-r--r--lib/Transforms/Coroutines/CoroEarly.cpp11
-rw-r--r--lib/Transforms/Coroutines/CoroElide.cpp7
-rw-r--r--lib/Transforms/Coroutines/CoroFrame.cpp62
-rw-r--r--lib/Transforms/Coroutines/CoroInstr.h7
-rw-r--r--lib/Transforms/Coroutines/CoroInternal.h7
-rw-r--r--lib/Transforms/Coroutines/CoroSplit.cpp30
-rw-r--r--lib/Transforms/Coroutines/Coroutines.cpp15
-rw-r--r--lib/Transforms/IPO/AlwaysInliner.cpp41
-rw-r--r--lib/Transforms/IPO/ArgumentPromotion.cpp93
-rw-r--r--lib/Transforms/IPO/Attributor.cpp1690
-rw-r--r--lib/Transforms/IPO/BarrierNoopPass.cpp7
-rw-r--r--lib/Transforms/IPO/BlockExtractor.cpp122
-rw-r--r--lib/Transforms/IPO/CalledValuePropagation.cpp7
-rw-r--r--lib/Transforms/IPO/ConstantMerge.cpp29
-rw-r--r--lib/Transforms/IPO/CrossDSOCFI.cpp17
-rw-r--r--lib/Transforms/IPO/DeadArgumentElimination.cpp9
-rw-r--r--lib/Transforms/IPO/ElimAvailExtern.cpp7
-rw-r--r--lib/Transforms/IPO/ExtractGV.cpp7
-rw-r--r--lib/Transforms/IPO/ForceFunctionAttrs.cpp8
-rw-r--r--lib/Transforms/IPO/FunctionAttrs.cpp73
-rw-r--r--lib/Transforms/IPO/FunctionImport.cpp57
-rw-r--r--lib/Transforms/IPO/GlobalDCE.cpp7
-rw-r--r--lib/Transforms/IPO/GlobalOpt.cpp144
-rw-r--r--lib/Transforms/IPO/GlobalSplit.cpp7
-rw-r--r--lib/Transforms/IPO/HotColdSplitting.cpp424
-rw-r--r--lib/Transforms/IPO/IPConstantPropagation.cpp50
-rw-r--r--lib/Transforms/IPO/IPO.cpp8
-rw-r--r--lib/Transforms/IPO/InferFunctionAttrs.cpp9
-rw-r--r--lib/Transforms/IPO/InlineSimple.cpp13
-rw-r--r--lib/Transforms/IPO/Inliner.cpp19
-rw-r--r--lib/Transforms/IPO/Internalize.cpp30
-rw-r--r--lib/Transforms/IPO/LoopExtractor.cpp14
-rw-r--r--lib/Transforms/IPO/LowerTypeTests.cpp41
-rw-r--r--lib/Transforms/IPO/MergeFunctions.cpp70
-rw-r--r--lib/Transforms/IPO/PartialInlining.cpp75
-rw-r--r--lib/Transforms/IPO/PassManagerBuilder.cpp206
-rw-r--r--lib/Transforms/IPO/PruneEH.cpp18
-rw-r--r--lib/Transforms/IPO/SCCP.cpp1
-rw-r--r--lib/Transforms/IPO/SampleProfile.cpp66
-rw-r--r--lib/Transforms/IPO/StripDeadPrototypes.cpp7
-rw-r--r--lib/Transforms/IPO/StripSymbols.cpp7
-rw-r--r--lib/Transforms/IPO/SyntheticCountsPropagation.cpp7
-rw-r--r--lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp50
-rw-r--r--lib/Transforms/IPO/WholeProgramDevirt.cpp53
-rw-r--r--lib/Transforms/InstCombine/InstCombineAddSub.cpp193
-rw-r--r--lib/Transforms/InstCombine/InstCombineAndOrXor.cpp120
-rw-r--r--lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp159
-rw-r--r--lib/Transforms/InstCombine/InstCombineCalls.cpp1158
-rw-r--r--lib/Transforms/InstCombine/InstCombineCasts.cpp90
-rw-r--r--lib/Transforms/InstCombine/InstCombineCompares.cpp643
-rw-r--r--lib/Transforms/InstCombine/InstCombineInternal.h101
-rw-r--r--lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp62
-rw-r--r--lib/Transforms/InstCombine/InstCombineMulDivRem.cpp103
-rw-r--r--lib/Transforms/InstCombine/InstCombinePHI.cpp15
-rw-r--r--lib/Transforms/InstCombine/InstCombineSelect.cpp288
-rw-r--r--lib/Transforms/InstCombine/InstCombineShifts.cpp98
-rw-r--r--lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp112
-rw-r--r--lib/Transforms/InstCombine/InstCombineVectorOps.cpp321
-rw-r--r--lib/Transforms/InstCombine/InstructionCombining.cpp348
-rw-r--r--lib/Transforms/Instrumentation/AddressSanitizer.cpp775
-rw-r--r--lib/Transforms/Instrumentation/BoundsChecking.cpp12
-rw-r--r--lib/Transforms/Instrumentation/CFGMST.h16
-rw-r--r--lib/Transforms/Instrumentation/CGProfile.cpp7
-rw-r--r--lib/Transforms/Instrumentation/ControlHeightReduction.cpp70
-rw-r--r--lib/Transforms/Instrumentation/DataFlowSanitizer.cpp171
-rw-r--r--lib/Transforms/Instrumentation/EfficiencySanitizer.cpp900
-rw-r--r--lib/Transforms/Instrumentation/GCOVProfiling.cpp154
-rw-r--r--lib/Transforms/Instrumentation/HWAddressSanitizer.cpp594
-rw-r--r--lib/Transforms/Instrumentation/IndirectCallPromotion.cpp21
-rw-r--r--lib/Transforms/Instrumentation/InstrOrderFile.cpp211
-rw-r--r--lib/Transforms/Instrumentation/InstrProfiling.cpp213
-rw-r--r--lib/Transforms/Instrumentation/Instrumentation.cpp25
-rw-r--r--lib/Transforms/Instrumentation/MaximumSpanningTree.h10
-rw-r--r--lib/Transforms/Instrumentation/MemorySanitizer.cpp395
-rw-r--r--lib/Transforms/Instrumentation/PGOInstrumentation.cpp445
-rw-r--r--lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp9
-rw-r--r--lib/Transforms/Instrumentation/PoisonChecking.cpp357
-rw-r--r--lib/Transforms/Instrumentation/SanitizerCoverage.cpp199
-rw-r--r--lib/Transforms/Instrumentation/ThreadSanitizer.cpp141
-rw-r--r--lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h31
-rw-r--r--lib/Transforms/ObjCARC/BlotMapVector.h7
-rw-r--r--lib/Transforms/ObjCARC/DependencyAnalysis.cpp7
-rw-r--r--lib/Transforms/ObjCARC/DependencyAnalysis.h7
-rw-r--r--lib/Transforms/ObjCARC/ObjCARC.cpp7
-rw-r--r--lib/Transforms/ObjCARC/ObjCARC.h7
-rw-r--r--lib/Transforms/ObjCARC/ObjCARCAPElim.cpp7
-rw-r--r--lib/Transforms/ObjCARC/ObjCARCContract.cpp251
-rw-r--r--lib/Transforms/ObjCARC/ObjCARCExpand.cpp7
-rw-r--r--lib/Transforms/ObjCARC/ObjCARCOpts.cpp80
-rw-r--r--lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp7
-rw-r--r--lib/Transforms/ObjCARC/ProvenanceAnalysis.h7
-rw-r--r--lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp7
-rw-r--r--lib/Transforms/ObjCARC/PtrState.cpp7
-rw-r--r--lib/Transforms/ObjCARC/PtrState.h7
-rw-r--r--lib/Transforms/Scalar/ADCE.cpp20
-rw-r--r--lib/Transforms/Scalar/AlignmentFromAssumptions.cpp7
-rw-r--r--lib/Transforms/Scalar/BDCE.cpp17
-rw-r--r--lib/Transforms/Scalar/CallSiteSplitting.cpp12
-rw-r--r--lib/Transforms/Scalar/ConstantHoisting.cpp40
-rw-r--r--lib/Transforms/Scalar/ConstantProp.cpp7
-rw-r--r--lib/Transforms/Scalar/CorrelatedValuePropagation.cpp305
-rw-r--r--lib/Transforms/Scalar/DCE.cpp7
-rw-r--r--lib/Transforms/Scalar/DeadStoreElimination.cpp102
-rw-r--r--lib/Transforms/Scalar/DivRemPairs.cpp7
-rw-r--r--lib/Transforms/Scalar/EarlyCSE.cpp239
-rw-r--r--lib/Transforms/Scalar/FlattenCFGPass.cpp7
-rw-r--r--lib/Transforms/Scalar/Float2Int.cpp29
-rw-r--r--lib/Transforms/Scalar/GVN.cpp104
-rw-r--r--lib/Transforms/Scalar/GVNHoist.cpp9
-rw-r--r--lib/Transforms/Scalar/GVNSink.cpp22
-rw-r--r--lib/Transforms/Scalar/GuardWidening.cpp212
-rw-r--r--lib/Transforms/Scalar/IVUsersPrinter.cpp7
-rw-r--r--lib/Transforms/Scalar/IndVarSimplify.cpp716
-rw-r--r--lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp101
-rw-r--r--lib/Transforms/Scalar/InferAddressSpaces.cpp53
-rw-r--r--lib/Transforms/Scalar/InstSimplifyPass.cpp7
-rw-r--r--lib/Transforms/Scalar/JumpThreading.cpp93
-rw-r--r--lib/Transforms/Scalar/LICM.cpp466
-rw-r--r--lib/Transforms/Scalar/LoopAccessAnalysisPrinter.cpp7
-rw-r--r--lib/Transforms/Scalar/LoopDataPrefetch.cpp11
-rw-r--r--lib/Transforms/Scalar/LoopDeletion.cpp7
-rw-r--r--lib/Transforms/Scalar/LoopDistribute.cpp22
-rw-r--r--lib/Transforms/Scalar/LoopFuse.cpp1215
-rw-r--r--lib/Transforms/Scalar/LoopIdiomRecognize.cpp94
-rw-r--r--lib/Transforms/Scalar/LoopInstSimplify.cpp9
-rw-r--r--lib/Transforms/Scalar/LoopInterchange.cpp130
-rw-r--r--lib/Transforms/Scalar/LoopLoadElimination.cpp62
-rw-r--r--lib/Transforms/Scalar/LoopPassManager.cpp7
-rw-r--r--lib/Transforms/Scalar/LoopPredication.cpp524
-rw-r--r--lib/Transforms/Scalar/LoopRerollPass.cpp17
-rw-r--r--lib/Transforms/Scalar/LoopRotation.cpp12
-rw-r--r--lib/Transforms/Scalar/LoopSimplifyCFG.cpp237
-rw-r--r--lib/Transforms/Scalar/LoopSink.cpp14
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp344
-rw-r--r--lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp10
-rw-r--r--lib/Transforms/Scalar/LoopUnrollPass.cpp97
-rw-r--r--lib/Transforms/Scalar/LoopUnswitch.cpp26
-rw-r--r--lib/Transforms/Scalar/LoopVersioningLICM.cpp17
-rw-r--r--lib/Transforms/Scalar/LowerAtomic.cpp17
-rw-r--r--lib/Transforms/Scalar/LowerExpectIntrinsic.cpp7
-rw-r--r--lib/Transforms/Scalar/LowerGuardIntrinsic.cpp7
-rw-r--r--lib/Transforms/Scalar/LowerWidenableCondition.cpp85
-rw-r--r--lib/Transforms/Scalar/MakeGuardsExplicit.cpp7
-rw-r--r--lib/Transforms/Scalar/MemCpyOptimizer.cpp24
-rw-r--r--lib/Transforms/Scalar/MergeICmps.cpp728
-rw-r--r--lib/Transforms/Scalar/MergedLoadStoreMotion.cpp7
-rw-r--r--lib/Transforms/Scalar/NaryReassociate.cpp11
-rw-r--r--lib/Transforms/Scalar/NewGVN.cpp61
-rw-r--r--lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp7
-rw-r--r--lib/Transforms/Scalar/PlaceSafepoints.cpp42
-rw-r--r--lib/Transforms/Scalar/Reassociate.cpp103
-rw-r--r--lib/Transforms/Scalar/Reg2Mem.cpp7
-rw-r--r--lib/Transforms/Scalar/RewriteStatepointsForGC.cpp254
-rw-r--r--lib/Transforms/Scalar/SCCP.cpp76
-rw-r--r--lib/Transforms/Scalar/SROA.cpp215
-rw-r--r--lib/Transforms/Scalar/Scalar.cpp11
-rw-r--r--lib/Transforms/Scalar/Scalarizer.cpp70
-rw-r--r--lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp7
-rw-r--r--lib/Transforms/Scalar/SimpleLoopUnswitch.cpp130
-rw-r--r--lib/Transforms/Scalar/SimplifyCFGPass.cpp7
-rw-r--r--lib/Transforms/Scalar/Sink.cpp7
-rw-r--r--lib/Transforms/Scalar/SpeculateAroundPHIs.cpp15
-rw-r--r--lib/Transforms/Scalar/SpeculativeExecution.cpp8
-rw-r--r--lib/Transforms/Scalar/StraightLineStrengthReduce.cpp15
-rw-r--r--lib/Transforms/Scalar/StructurizeCFG.cpp47
-rw-r--r--lib/Transforms/Scalar/TailRecursionElimination.cpp13
-rw-r--r--lib/Transforms/Scalar/WarnMissedTransforms.cpp9
-rw-r--r--lib/Transforms/Utils/ASanStackFrameLayout.cpp9
-rw-r--r--lib/Transforms/Utils/AddDiscriminators.cpp11
-rw-r--r--lib/Transforms/Utils/BasicBlockUtils.cpp141
-rw-r--r--lib/Transforms/Utils/BreakCriticalEdges.cpp34
-rw-r--r--lib/Transforms/Utils/BuildLibCalls.cpp339
-rw-r--r--lib/Transforms/Utils/BypassSlowDivision.cpp7
-rw-r--r--lib/Transforms/Utils/CallPromotionUtils.cpp21
-rw-r--r--lib/Transforms/Utils/CanonicalizeAliases.cpp7
-rw-r--r--lib/Transforms/Utils/CloneFunction.cpp37
-rw-r--r--lib/Transforms/Utils/CloneModule.cpp7
-rw-r--r--lib/Transforms/Utils/CodeExtractor.cpp496
-rw-r--r--lib/Transforms/Utils/CtorUtils.cpp7
-rw-r--r--lib/Transforms/Utils/DemoteRegToStack.cpp16
-rw-r--r--lib/Transforms/Utils/EntryExitInstrumenter.cpp11
-rw-r--r--lib/Transforms/Utils/EscapeEnumerator.cpp13
-rw-r--r--lib/Transforms/Utils/Evaluator.cpp113
-rw-r--r--lib/Transforms/Utils/FlattenCFG.cpp7
-rw-r--r--lib/Transforms/Utils/FunctionComparator.cpp53
-rw-r--r--lib/Transforms/Utils/FunctionImportUtils.cpp44
-rw-r--r--lib/Transforms/Utils/GlobalStatus.cpp7
-rw-r--r--lib/Transforms/Utils/GuardUtils.cpp7
-rw-r--r--lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp7
-rw-r--r--lib/Transforms/Utils/InlineFunction.cpp172
-rw-r--r--lib/Transforms/Utils/InstructionNamer.cpp7
-rw-r--r--lib/Transforms/Utils/IntegerDivision.cpp7
-rw-r--r--lib/Transforms/Utils/LCSSA.cpp47
-rw-r--r--lib/Transforms/Utils/LibCallsShrinkWrap.cpp7
-rw-r--r--lib/Transforms/Utils/Local.cpp387
-rw-r--r--lib/Transforms/Utils/LoopRotationUtils.cpp26
-rw-r--r--lib/Transforms/Utils/LoopSimplify.cpp126
-rw-r--r--lib/Transforms/Utils/LoopUnroll.cpp424
-rw-r--r--lib/Transforms/Utils/LoopUnrollAndJam.cpp17
-rw-r--r--lib/Transforms/Utils/LoopUnrollPeel.cpp210
-rw-r--r--lib/Transforms/Utils/LoopUnrollRuntime.cpp61
-rw-r--r--lib/Transforms/Utils/LoopUtils.cpp106
-rw-r--r--lib/Transforms/Utils/LoopVersioning.cpp12
-rw-r--r--lib/Transforms/Utils/LowerInvoke.cpp10
-rw-r--r--lib/Transforms/Utils/LowerMemIntrinsics.cpp29
-rw-r--r--lib/Transforms/Utils/LowerSwitch.cpp218
-rw-r--r--lib/Transforms/Utils/Mem2Reg.cpp7
-rw-r--r--lib/Transforms/Utils/MetaRenamer.cpp7
-rw-r--r--lib/Transforms/Utils/ModuleUtils.cpp88
-rw-r--r--lib/Transforms/Utils/NameAnonGlobals.cpp7
-rw-r--r--lib/Transforms/Utils/PredicateInfo.cpp18
-rw-r--r--lib/Transforms/Utils/PromoteMemoryToRegister.cpp66
-rw-r--r--lib/Transforms/Utils/SSAUpdater.cpp10
-rw-r--r--lib/Transforms/Utils/SSAUpdaterBulk.cpp7
-rw-r--r--lib/Transforms/Utils/SanitizerStats.cpp15
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp203
-rw-r--r--lib/Transforms/Utils/SimplifyIndVar.cpp200
-rw-r--r--lib/Transforms/Utils/SimplifyLibCalls.cpp510
-rw-r--r--lib/Transforms/Utils/SizeOpts.cpp37
-rw-r--r--lib/Transforms/Utils/SplitModule.cpp7
-rw-r--r--lib/Transforms/Utils/StripGCRelocates.cpp7
-rw-r--r--lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp7
-rw-r--r--lib/Transforms/Utils/SymbolRewriter.cpp7
-rw-r--r--lib/Transforms/Utils/UnifyFunctionExitNodes.cpp7
-rw-r--r--lib/Transforms/Utils/Utils.cpp10
-rw-r--r--lib/Transforms/Utils/VNCoercion.cpp66
-rw-r--r--lib/Transforms/Utils/ValueMapper.cpp22
-rw-r--r--lib/Transforms/Vectorize/LoadStoreVectorizer.cpp21
-rw-r--r--lib/Transforms/Vectorize/LoopVectorizationLegality.cpp347
-rw-r--r--lib/Transforms/Vectorize/LoopVectorizationPlanner.h23
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp476
-rw-r--r--lib/Transforms/Vectorize/SLPVectorizer.cpp1362
-rw-r--r--lib/Transforms/Vectorize/VPRecipeBuilder.h14
-rw-r--r--lib/Transforms/Vectorize/VPlan.cpp23
-rw-r--r--lib/Transforms/Vectorize/VPlan.h60
-rw-r--r--lib/Transforms/Vectorize/VPlanDominatorTree.h7
-rw-r--r--lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp11
-rw-r--r--lib/Transforms/Vectorize/VPlanHCFGBuilder.h7
-rw-r--r--lib/Transforms/Vectorize/VPlanHCFGTransforms.cpp7
-rw-r--r--lib/Transforms/Vectorize/VPlanHCFGTransforms.h7
-rw-r--r--lib/Transforms/Vectorize/VPlanLoopInfo.h7
-rw-r--r--lib/Transforms/Vectorize/VPlanPredicator.cpp248
-rw-r--r--lib/Transforms/Vectorize/VPlanPredicator.h74
-rw-r--r--lib/Transforms/Vectorize/VPlanSLP.cpp7
-rw-r--r--lib/Transforms/Vectorize/VPlanValue.h7
-rw-r--r--lib/Transforms/Vectorize/VPlanVerifier.cpp7
-rw-r--r--lib/Transforms/Vectorize/VPlanVerifier.h7
-rw-r--r--lib/Transforms/Vectorize/Vectorize.cpp7
-rw-r--r--lib/WindowsManifest/WindowsManifestMerger.cpp7
-rw-r--r--lib/XRay/BlockIndexer.cpp7
-rw-r--r--lib/XRay/BlockPrinter.cpp7
-rw-r--r--lib/XRay/BlockVerifier.cpp7
-rw-r--r--lib/XRay/FDRRecordProducer.cpp7
-rw-r--r--lib/XRay/FDRRecords.cpp7
-rw-r--r--lib/XRay/FDRTraceExpander.cpp7
-rw-r--r--lib/XRay/FDRTraceWriter.cpp7
-rw-r--r--lib/XRay/FileHeaderReader.cpp7
-rw-r--r--lib/XRay/InstrumentationMap.cpp26
-rw-r--r--lib/XRay/LogBuilderConsumer.cpp7
-rw-r--r--lib/XRay/Profile.cpp18
-rw-r--r--lib/XRay/RecordInitializer.cpp7
-rw-r--r--lib/XRay/RecordPrinter.cpp7
-rw-r--r--lib/XRay/Trace.cpp26
-rw-r--r--tools/bugpoint/BugDriver.cpp7
-rw-r--r--tools/bugpoint/BugDriver.h7
-rw-r--r--tools/bugpoint/CrashDebugger.cpp7
-rw-r--r--tools/bugpoint/ExecutionDriver.cpp7
-rw-r--r--tools/bugpoint/ExtractFunction.cpp7
-rw-r--r--tools/bugpoint/FindBugs.cpp7
-rw-r--r--tools/bugpoint/ListReducer.h7
-rw-r--r--tools/bugpoint/Miscompilation.cpp27
-rw-r--r--tools/bugpoint/OptimizerDriver.cpp7
-rw-r--r--tools/bugpoint/ToolRunner.cpp7
-rw-r--r--tools/bugpoint/ToolRunner.h7
-rw-r--r--tools/bugpoint/bugpoint.cpp7
-rw-r--r--tools/llc/llc.cpp62
-rw-r--r--tools/lli/RemoteJITUtils.h9
-rw-r--r--tools/lli/lli.cpp97
-rw-r--r--tools/llvm-ar/llvm-ar.cpp168
-rw-r--r--tools/llvm-as/llvm-as.cpp29
-rw-r--r--tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp984
-rw-r--r--tools/llvm-cov/CodeCoverage.cpp20
-rw-r--r--tools/llvm-cov/CoverageExporter.h7
-rw-r--r--tools/llvm-cov/CoverageExporterJson.cpp69
-rw-r--r--tools/llvm-cov/CoverageExporterJson.h7
-rw-r--r--tools/llvm-cov/CoverageExporterLcov.cpp9
-rw-r--r--tools/llvm-cov/CoverageExporterLcov.h7
-rw-r--r--tools/llvm-cov/CoverageFilters.cpp7
-rw-r--r--tools/llvm-cov/CoverageFilters.h7
-rw-r--r--tools/llvm-cov/CoverageReport.cpp7
-rw-r--r--tools/llvm-cov/CoverageReport.h7
-rw-r--r--tools/llvm-cov/CoverageSummaryInfo.cpp7
-rw-r--r--tools/llvm-cov/CoverageSummaryInfo.h7
-rw-r--r--tools/llvm-cov/CoverageViewOptions.h9
-rw-r--r--tools/llvm-cov/RenderingSupport.h7
-rw-r--r--tools/llvm-cov/SourceCoverageView.cpp11
-rw-r--r--tools/llvm-cov/SourceCoverageView.h7
-rw-r--r--tools/llvm-cov/SourceCoverageViewHTML.cpp7
-rw-r--r--tools/llvm-cov/SourceCoverageViewHTML.h7
-rw-r--r--tools/llvm-cov/SourceCoverageViewText.cpp7
-rw-r--r--tools/llvm-cov/SourceCoverageViewText.h7
-rw-r--r--tools/llvm-cov/TestingSupport.cpp20
-rw-r--r--tools/llvm-cov/gcov.cpp15
-rw-r--r--tools/llvm-cov/llvm-cov.cpp7
-rw-r--r--tools/llvm-cxxdump/Error.cpp7
-rw-r--r--tools/llvm-cxxdump/Error.h7
-rw-r--r--tools/llvm-cxxdump/llvm-cxxdump.cpp24
-rw-r--r--tools/llvm-cxxdump/llvm-cxxdump.h7
-rw-r--r--tools/llvm-cxxfilt/llvm-cxxfilt.cpp93
-rw-r--r--tools/llvm-cxxmap/llvm-cxxmap.cpp7
-rw-r--r--tools/llvm-diff/DiffConsumer.cpp9
-rw-r--r--tools/llvm-diff/DiffConsumer.h7
-rw-r--r--tools/llvm-diff/DiffLog.cpp7
-rw-r--r--tools/llvm-diff/DiffLog.h7
-rw-r--r--tools/llvm-diff/DifferenceEngine.cpp9
-rw-r--r--tools/llvm-diff/DifferenceEngine.h7
-rw-r--r--tools/llvm-diff/llvm-diff.cpp7
-rw-r--r--tools/llvm-dis/llvm-dis.cpp7
-rw-r--r--tools/llvm-dwarfdump/Statistics.cpp178
-rw-r--r--tools/llvm-dwarfdump/llvm-dwarfdump.cpp111
-rw-r--r--tools/llvm-extract/llvm-extract.cpp131
-rw-r--r--tools/llvm-link/llvm-link.cpp7
-rw-r--r--tools/llvm-lto/llvm-lto.cpp118
-rw-r--r--tools/llvm-lto2/llvm-lto2.cpp55
-rw-r--r--tools/llvm-mc/Disassembler.cpp7
-rw-r--r--tools/llvm-mc/Disassembler.h7
-rw-r--r--tools/llvm-mc/llvm-mc.cpp21
-rw-r--r--tools/llvm-mca/CodeRegion.cpp114
-rw-r--r--tools/llvm-mca/CodeRegion.h40
-rw-r--r--tools/llvm-mca/CodeRegionGenerator.cpp19
-rw-r--r--tools/llvm-mca/CodeRegionGenerator.h7
-rw-r--r--tools/llvm-mca/PipelinePrinter.cpp7
-rw-r--r--tools/llvm-mca/PipelinePrinter.h7
-rw-r--r--tools/llvm-mca/Views/BottleneckAnalysis.cpp624
-rw-r--r--tools/llvm-mca/Views/BottleneckAnalysis.h341
-rw-r--r--tools/llvm-mca/Views/DispatchStatistics.cpp7
-rw-r--r--tools/llvm-mca/Views/DispatchStatistics.h7
-rw-r--r--tools/llvm-mca/Views/InstructionInfoView.cpp10
-rw-r--r--tools/llvm-mca/Views/InstructionInfoView.h7
-rw-r--r--tools/llvm-mca/Views/RegisterFileStatistics.cpp7
-rw-r--r--tools/llvm-mca/Views/RegisterFileStatistics.h7
-rw-r--r--tools/llvm-mca/Views/ResourcePressureView.cpp7
-rw-r--r--tools/llvm-mca/Views/ResourcePressureView.h7
-rw-r--r--tools/llvm-mca/Views/RetireControlUnitStatistics.cpp7
-rw-r--r--tools/llvm-mca/Views/RetireControlUnitStatistics.h7
-rw-r--r--tools/llvm-mca/Views/SchedulerStatistics.cpp37
-rw-r--r--tools/llvm-mca/Views/SchedulerStatistics.h11
-rw-r--r--tools/llvm-mca/Views/SummaryView.cpp25
-rw-r--r--tools/llvm-mca/Views/SummaryView.h13
-rw-r--r--tools/llvm-mca/Views/TimelineView.cpp7
-rw-r--r--tools/llvm-mca/Views/TimelineView.h7
-rw-r--r--tools/llvm-mca/Views/View.cpp7
-rw-r--r--tools/llvm-mca/Views/View.h7
-rw-r--r--tools/llvm-mca/llvm-mca.cpp56
-rw-r--r--tools/llvm-modextract/llvm-modextract.cpp7
-rw-r--r--tools/llvm-nm/llvm-nm.cpp517
-rw-r--r--tools/llvm-objcopy/Buffer.cpp50
-rw-r--r--tools/llvm-objcopy/Buffer.h16
-rw-r--r--tools/llvm-objcopy/COFF/COFFObjcopy.cpp158
-rw-r--r--tools/llvm-objcopy/COFF/COFFObjcopy.h12
-rw-r--r--tools/llvm-objcopy/COFF/Object.cpp91
-rw-r--r--tools/llvm-objcopy/COFF/Object.h79
-rw-r--r--tools/llvm-objcopy/COFF/Reader.cpp112
-rw-r--r--tools/llvm-objcopy/COFF/Reader.h9
-rw-r--r--tools/llvm-objcopy/COFF/Writer.cpp167
-rw-r--r--tools/llvm-objcopy/COFF/Writer.h10
-rw-r--r--tools/llvm-objcopy/CopyConfig.cpp661
-rw-r--r--tools/llvm-objcopy/CopyConfig.h130
-rw-r--r--tools/llvm-objcopy/ELF/ELFObjcopy.cpp684
-rw-r--r--tools/llvm-objcopy/ELF/ELFObjcopy.h18
-rw-r--r--tools/llvm-objcopy/ELF/Object.cpp1198
-rw-r--r--tools/llvm-objcopy/ELF/Object.h314
-rw-r--r--tools/llvm-objcopy/MachO/MachOObjcopy.cpp68
-rw-r--r--tools/llvm-objcopy/MachO/MachOObjcopy.h31
-rw-r--r--tools/llvm-objcopy/MachO/MachOReader.cpp241
-rw-r--r--tools/llvm-objcopy/MachO/MachOReader.h48
-rw-r--r--tools/llvm-objcopy/MachO/MachOWriter.cpp590
-rw-r--r--tools/llvm-objcopy/MachO/MachOWriter.h64
-rw-r--r--tools/llvm-objcopy/MachO/Object.cpp15
-rw-r--r--tools/llvm-objcopy/MachO/Object.h232
-rw-r--r--tools/llvm-objcopy/ObjcopyOpts.td166
-rw-r--r--tools/llvm-objcopy/StripOpts.td61
-rw-r--r--tools/llvm-objcopy/llvm-objcopy.cpp237
-rw-r--r--tools/llvm-objcopy/llvm-objcopy.h8
-rw-r--r--tools/llvm-objdump/COFFDump.cpp82
-rw-r--r--tools/llvm-objdump/ELFDump.cpp252
-rw-r--r--tools/llvm-objdump/MachODump.cpp978
-rw-r--r--tools/llvm-objdump/WasmDump.cpp40
-rw-r--r--tools/llvm-objdump/llvm-objdump.cpp2215
-rw-r--r--tools/llvm-objdump/llvm-objdump.h153
-rw-r--r--tools/llvm-pdbutil/BytesOutputStyle.cpp11
-rw-r--r--tools/llvm-pdbutil/BytesOutputStyle.h7
-rw-r--r--tools/llvm-pdbutil/DumpOutputStyle.cpp192
-rw-r--r--tools/llvm-pdbutil/DumpOutputStyle.h16
-rw-r--r--tools/llvm-pdbutil/ExplainOutputStyle.cpp7
-rw-r--r--tools/llvm-pdbutil/ExplainOutputStyle.h7
-rw-r--r--tools/llvm-pdbutil/FormatUtil.cpp7
-rw-r--r--tools/llvm-pdbutil/FormatUtil.h7
-rw-r--r--tools/llvm-pdbutil/InputFile.cpp16
-rw-r--r--tools/llvm-pdbutil/InputFile.h7
-rw-r--r--tools/llvm-pdbutil/LinePrinter.cpp10
-rw-r--r--tools/llvm-pdbutil/LinePrinter.h10
-rw-r--r--tools/llvm-pdbutil/MinimalSymbolDumper.cpp159
-rw-r--r--tools/llvm-pdbutil/MinimalSymbolDumper.h7
-rw-r--r--tools/llvm-pdbutil/MinimalTypeDumper.cpp29
-rw-r--r--tools/llvm-pdbutil/MinimalTypeDumper.h14
-rw-r--r--tools/llvm-pdbutil/OutputStyle.h7
-rw-r--r--tools/llvm-pdbutil/PdbYaml.cpp10
-rw-r--r--tools/llvm-pdbutil/PdbYaml.h7
-rw-r--r--tools/llvm-pdbutil/PrettyBuiltinDumper.cpp7
-rw-r--r--tools/llvm-pdbutil/PrettyBuiltinDumper.h7
-rw-r--r--tools/llvm-pdbutil/PrettyClassDefinitionDumper.cpp7
-rw-r--r--tools/llvm-pdbutil/PrettyClassDefinitionDumper.h7
-rw-r--r--tools/llvm-pdbutil/PrettyClassLayoutGraphicalDumper.cpp7
-rw-r--r--tools/llvm-pdbutil/PrettyClassLayoutGraphicalDumper.h7
-rw-r--r--tools/llvm-pdbutil/PrettyCompilandDumper.cpp7
-rw-r--r--tools/llvm-pdbutil/PrettyCompilandDumper.h7
-rw-r--r--tools/llvm-pdbutil/PrettyEnumDumper.cpp7
-rw-r--r--tools/llvm-pdbutil/PrettyEnumDumper.h7
-rw-r--r--tools/llvm-pdbutil/PrettyExternalSymbolDumper.cpp7
-rw-r--r--tools/llvm-pdbutil/PrettyExternalSymbolDumper.h7
-rw-r--r--tools/llvm-pdbutil/PrettyFunctionDumper.cpp14
-rw-r--r--tools/llvm-pdbutil/PrettyFunctionDumper.h7
-rw-r--r--tools/llvm-pdbutil/PrettyTypeDumper.cpp7
-rw-r--r--tools/llvm-pdbutil/PrettyTypeDumper.h7
-rw-r--r--tools/llvm-pdbutil/PrettyTypedefDumper.cpp7
-rw-r--r--tools/llvm-pdbutil/PrettyTypedefDumper.h7
-rw-r--r--tools/llvm-pdbutil/PrettyVariableDumper.cpp7
-rw-r--r--tools/llvm-pdbutil/PrettyVariableDumper.h7
-rw-r--r--tools/llvm-pdbutil/StreamUtil.cpp7
-rw-r--r--tools/llvm-pdbutil/StreamUtil.h7
-rw-r--r--tools/llvm-pdbutil/TypeReferenceTracker.cpp160
-rw-r--r--tools/llvm-pdbutil/TypeReferenceTracker.h69
-rw-r--r--tools/llvm-pdbutil/YAMLOutputStyle.cpp12
-rw-r--r--tools/llvm-pdbutil/YAMLOutputStyle.h7
-rw-r--r--tools/llvm-pdbutil/llvm-pdbutil.cpp43
-rw-r--r--tools/llvm-pdbutil/llvm-pdbutil.h9
-rw-r--r--tools/llvm-profdata/llvm-profdata.cpp153
-rw-r--r--tools/llvm-readobj/ARMEHABIPrinter.h17
-rw-r--r--tools/llvm-readobj/ARMWinEHPrinter.cpp21
-rw-r--r--tools/llvm-readobj/ARMWinEHPrinter.h9
-rw-r--r--tools/llvm-readobj/COFFDumper.cpp187
-rw-r--r--tools/llvm-readobj/COFFImportDumper.cpp9
-rw-r--r--tools/llvm-readobj/DwarfCFIEHPrinter.h7
-rw-r--r--tools/llvm-readobj/ELFDumper.cpp2259
-rw-r--r--tools/llvm-readobj/Error.cpp7
-rw-r--r--tools/llvm-readobj/Error.h7
-rw-r--r--tools/llvm-readobj/MachODumper.cpp43
-rw-r--r--tools/llvm-readobj/ObjDumper.cpp218
-rw-r--r--tools/llvm-readobj/ObjDumper.h56
-rw-r--r--tools/llvm-readobj/StackMapPrinter.h19
-rw-r--r--tools/llvm-readobj/WasmDumper.cpp62
-rw-r--r--tools/llvm-readobj/Win64EHDumper.cpp7
-rw-r--r--tools/llvm-readobj/Win64EHDumper.h7
-rw-r--r--tools/llvm-readobj/WindowsResourceDumper.cpp7
-rw-r--r--tools/llvm-readobj/WindowsResourceDumper.h7
-rw-r--r--tools/llvm-readobj/XCOFFDumper.cpp190
-rw-r--r--tools/llvm-readobj/llvm-readobj.cpp272
-rw-r--r--tools/llvm-readobj/llvm-readobj.h14
-rw-r--r--tools/llvm-rtdyld/llvm-rtdyld.cpp361
-rw-r--r--tools/llvm-stress/llvm-stress.cpp9
-rw-r--r--tools/llvm-symbolizer/llvm-symbolizer.cpp161
-rw-r--r--tools/llvm-xray/func-id-helper.cpp21
-rw-r--r--tools/llvm-xray/func-id-helper.h7
-rw-r--r--tools/llvm-xray/llvm-xray.cpp7
-rw-r--r--tools/llvm-xray/trie-node.h7
-rw-r--r--tools/llvm-xray/xray-account.cpp11
-rw-r--r--tools/llvm-xray/xray-account.h19
-rw-r--r--tools/llvm-xray/xray-color-helper.cpp7
-rw-r--r--tools/llvm-xray/xray-color-helper.h7
-rw-r--r--tools/llvm-xray/xray-converter.cpp120
-rw-r--r--tools/llvm-xray/xray-converter.h7
-rw-r--r--tools/llvm-xray/xray-extract.cpp11
-rw-r--r--tools/llvm-xray/xray-fdr-dump.cpp18
-rw-r--r--tools/llvm-xray/xray-graph-diff.cpp7
-rw-r--r--tools/llvm-xray/xray-graph-diff.h7
-rw-r--r--tools/llvm-xray/xray-graph.cpp11
-rw-r--r--tools/llvm-xray/xray-graph.h10
-rw-r--r--tools/llvm-xray/xray-registry.cpp7
-rw-r--r--tools/llvm-xray/xray-registry.h7
-rw-r--r--tools/llvm-xray/xray-stacks.cpp17
-rw-r--r--tools/opt/AnalysisWrappers.cpp7
-rw-r--r--tools/opt/BreakpointPrinter.cpp11
-rw-r--r--tools/opt/BreakpointPrinter.h7
-rw-r--r--tools/opt/Debugify.cpp7
-rw-r--r--tools/opt/Debugify.h7
-rw-r--r--tools/opt/GraphPrinters.cpp7
-rw-r--r--tools/opt/NewPMDriver.cpp58
-rw-r--r--tools/opt/NewPMDriver.h14
-rw-r--r--tools/opt/PassPrinters.cpp7
-rw-r--r--tools/opt/PassPrinters.h7
-rw-r--r--tools/opt/PrintSCC.cpp7
-rw-r--r--tools/opt/opt.cpp151
-rw-r--r--utils/TableGen/AsmMatcherEmitter.cpp246
-rw-r--r--utils/TableGen/AsmWriterEmitter.cpp26
-rw-r--r--utils/TableGen/AsmWriterInst.cpp40
-rw-r--r--utils/TableGen/AsmWriterInst.h7
-rw-r--r--utils/TableGen/Attributes.cpp7
-rw-r--r--utils/TableGen/CTagsEmitter.cpp7
-rw-r--r--utils/TableGen/CallingConvEmitter.cpp51
-rw-r--r--utils/TableGen/CodeEmitterGen.cpp106
-rw-r--r--utils/TableGen/CodeGenDAGPatterns.cpp110
-rw-r--r--utils/TableGen/CodeGenDAGPatterns.h25
-rw-r--r--utils/TableGen/CodeGenHwModes.cpp7
-rw-r--r--utils/TableGen/CodeGenHwModes.h7
-rw-r--r--utils/TableGen/CodeGenInstruction.cpp79
-rw-r--r--utils/TableGen/CodeGenInstruction.h8
-rw-r--r--utils/TableGen/CodeGenIntrinsics.h20
-rw-r--r--utils/TableGen/CodeGenMapTable.cpp7
-rw-r--r--utils/TableGen/CodeGenRegisters.cpp15
-rw-r--r--utils/TableGen/CodeGenRegisters.h7
-rw-r--r--utils/TableGen/CodeGenSchedule.cpp52
-rw-r--r--utils/TableGen/CodeGenSchedule.h7
-rw-r--r--utils/TableGen/CodeGenTarget.cpp100
-rw-r--r--utils/TableGen/CodeGenTarget.h7
-rw-r--r--utils/TableGen/DAGISelEmitter.cpp7
-rw-r--r--utils/TableGen/DAGISelMatcher.cpp31
-rw-r--r--utils/TableGen/DAGISelMatcher.h66
-rw-r--r--utils/TableGen/DAGISelMatcherEmitter.cpp27
-rw-r--r--utils/TableGen/DAGISelMatcherGen.cpp60
-rw-r--r--utils/TableGen/DAGISelMatcherOpt.cpp15
-rw-r--r--utils/TableGen/DFAPacketizerEmitter.cpp7
-rw-r--r--utils/TableGen/DisassemblerEmitter.cpp7
-rw-r--r--utils/TableGen/ExegesisEmitter.cpp7
-rw-r--r--utils/TableGen/FastISelEmitter.cpp7
-rw-r--r--utils/TableGen/FixedLenDecoderEmitter.cpp147
-rw-r--r--utils/TableGen/GlobalISelEmitter.cpp157
-rw-r--r--utils/TableGen/InfoByHwMode.cpp14
-rw-r--r--utils/TableGen/InfoByHwMode.h13
-rw-r--r--utils/TableGen/InstrDocsEmitter.cpp7
-rw-r--r--utils/TableGen/InstrInfoEmitter.cpp89
-rw-r--r--utils/TableGen/IntrinsicEmitter.cpp120
-rw-r--r--utils/TableGen/OptParserEmitter.cpp7
-rw-r--r--utils/TableGen/PredicateExpander.cpp7
-rw-r--r--utils/TableGen/PredicateExpander.h7
-rw-r--r--utils/TableGen/PseudoLoweringEmitter.cpp7
-rw-r--r--utils/TableGen/RISCVCompressInstEmitter.cpp59
-rw-r--r--utils/TableGen/RegisterBankEmitter.cpp7
-rw-r--r--utils/TableGen/RegisterInfoEmitter.cpp7
-rw-r--r--utils/TableGen/SDNodeProperties.cpp13
-rw-r--r--utils/TableGen/SDNodeProperties.h7
-rw-r--r--utils/TableGen/SearchableTableEmitter.cpp35
-rw-r--r--utils/TableGen/SequenceToOffsetTable.h7
-rw-r--r--utils/TableGen/SubtargetEmitter.cpp140
-rw-r--r--utils/TableGen/SubtargetFeatureInfo.cpp27
-rw-r--r--utils/TableGen/SubtargetFeatureInfo.h16
-rw-r--r--utils/TableGen/TableGen.cpp18
-rw-r--r--utils/TableGen/TableGenBackends.h7
-rw-r--r--utils/TableGen/Types.cpp7
-rw-r--r--utils/TableGen/Types.h7
-rw-r--r--utils/TableGen/WebAssemblyDisassemblerEmitter.cpp38
-rw-r--r--utils/TableGen/WebAssemblyDisassemblerEmitter.h7
-rw-r--r--utils/TableGen/X86DisassemblerShared.h7
-rw-r--r--utils/TableGen/X86DisassemblerTables.cpp81
-rw-r--r--utils/TableGen/X86DisassemblerTables.h7
-rw-r--r--utils/TableGen/X86EVEX2VEXTablesEmitter.cpp64
-rw-r--r--utils/TableGen/X86FoldTablesEmitter.cpp71
-rw-r--r--utils/TableGen/X86ModRMFilters.cpp7
-rw-r--r--utils/TableGen/X86ModRMFilters.h7
-rw-r--r--utils/TableGen/X86RecognizableInstr.cpp128
-rw-r--r--utils/TableGen/X86RecognizableInstr.h24
4598 files changed, 218552 insertions, 97132 deletions
diff --git a/LICENSE.TXT b/LICENSE.TXT
index e4d67d16fea1..fa6ac5400070 100644
--- a/LICENSE.TXT
+++ b/LICENSE.TXT
@@ -1,5 +1,240 @@
==============================================================================
-LLVM Release License
+The LLVM Project is under the Apache License v2.0 with LLVM Exceptions:
+==============================================================================
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
+
+---- LLVM Exceptions to the Apache 2.0 License ----
+
+As an exception, if, as a result of your compiling your source code, portions
+of this Software are embedded into an Object form of such source code, you
+may redistribute such embedded portions in such Object form without complying
+with the conditions of Sections 4(a), 4(b) and 4(d) of the License.
+
+In addition, if you combine or link compiled forms of this Software with
+software that is licensed under the GPLv2 ("Combined Software") and if a
+court of competent jurisdiction determines that the patent provision (Section
+3), the indemnity provision (Section 9) or other Section of the License
+conflicts with the conditions of the GPLv2, you may retroactively and
+prospectively choose to deem waived or otherwise exclude such Section(s) of
+the License, but only in their entirety and only with respect to the Combined
+Software.
+
+==============================================================================
+Software from third parties included in the LLVM Project:
+==============================================================================
+The LLVM Project contains third party software which is under different license
+terms. All such code will be identified clearly using at least one of two
+mechanisms:
+1) It will be in a separate directory tree with its own `LICENSE.txt` or
+ `LICENSE` file at the top containing the specific license and restrictions
+ which apply to that software, or
+2) It will contain specific license and restriction terms at the top of every
+ file.
+
+==============================================================================
+Legacy LLVM License (https://llvm.org/docs/DeveloperPolicy.html#legacy):
==============================================================================
University of Illinois/NCSA
Open Source License
@@ -42,27 +277,3 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
SOFTWARE.
-==============================================================================
-Copyrights and Licenses for Third Party Software Distributed with LLVM:
-==============================================================================
-The LLVM software contains code written by third parties. Such software will
-have its own individual LICENSE.TXT file in the directory in which it appears.
-This file will describe the copyrights, license, and restrictions which apply
-to that code.
-
-The disclaimer of warranty in the University of Illinois Open Source License
-applies to all code in the LLVM Distribution, and nothing in any of the
-other licenses gives permission to use the names of the LLVM Team or the
-University of Illinois to endorse or promote products derived from this
-Software.
-
-The following pieces of software have additional or alternate copyrights,
-licenses, and/or restrictions:
-
-Program Directory
-------- ---------
-Google Test llvm/utils/unittest/googletest
-OpenBSD regex llvm/lib/Support/{reg*, COPYRIGHT.regex}
-pyyaml tests llvm/test/YAMLParser/{*.data, LICENSE.TXT}
-ARM contributions llvm/lib/Target/ARM/LICENSE.TXT
-md5 contributions llvm/lib/Support/MD5.cpp llvm/include/llvm/Support/MD5.h
diff --git a/include/llvm-c/Analysis.h b/include/llvm-c/Analysis.h
index 36dcb89e0e08..cb9e8ece3c53 100644
--- a/include/llvm-c/Analysis.h
+++ b/include/llvm-c/Analysis.h
@@ -1,9 +1,9 @@
/*===-- llvm-c/Analysis.h - Analysis Library C Interface --------*- C++ -*-===*\
|* *|
-|* The LLVM Compiler Infrastructure *|
-|* *|
-|* This file is distributed under the University of Illinois Open Source *|
-|* License. See LICENSE.TXT for details. *|
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM *|
+|* Exceptions. *|
+|* See https://llvm.org/LICENSE.txt for license information. *|
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception *|
|* *|
|*===----------------------------------------------------------------------===*|
|* *|
diff --git a/include/llvm-c/BitReader.h b/include/llvm-c/BitReader.h
index d1fc302767ba..b307ee979f8a 100644
--- a/include/llvm-c/BitReader.h
+++ b/include/llvm-c/BitReader.h
@@ -1,9 +1,9 @@
/*===-- llvm-c/BitReader.h - BitReader Library C Interface ------*- C++ -*-===*\
|* *|
-|* The LLVM Compiler Infrastructure *|
-|* *|
-|* This file is distributed under the University of Illinois Open Source *|
-|* License. See LICENSE.TXT for details. *|
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM *|
+|* Exceptions. *|
+|* See https://llvm.org/LICENSE.txt for license information. *|
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception *|
|* *|
|*===----------------------------------------------------------------------===*|
|* *|
diff --git a/include/llvm-c/BitWriter.h b/include/llvm-c/BitWriter.h
index 797d03179ab3..187051555b9a 100644
--- a/include/llvm-c/BitWriter.h
+++ b/include/llvm-c/BitWriter.h
@@ -1,9 +1,9 @@
/*===-- llvm-c/BitWriter.h - BitWriter Library C Interface ------*- C++ -*-===*\
|* *|
-|* The LLVM Compiler Infrastructure *|
-|* *|
-|* This file is distributed under the University of Illinois Open Source *|
-|* License. See LICENSE.TXT for details. *|
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM *|
+|* Exceptions. *|
+|* See https://llvm.org/LICENSE.txt for license information. *|
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception *|
|* *|
|*===----------------------------------------------------------------------===*|
|* *|
diff --git a/include/llvm-c/Comdat.h b/include/llvm-c/Comdat.h
index 499996d68a53..81fee3fc9a6b 100644
--- a/include/llvm-c/Comdat.h
+++ b/include/llvm-c/Comdat.h
@@ -1,9 +1,9 @@
/*===-- llvm-c/Comdat.h - Module Comdat C Interface -------------*- C++ -*-===*\
|* *|
-|* The LLVM Compiler Infrastructure *|
-|* *|
-|* This file is distributed under the University of Illinois Open Source *|
-|* License. See LICENSE.TXT for details. *|
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM *|
+|* Exceptions. *|
+|* See https://llvm.org/LICENSE.txt for license information. *|
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception *|
|* *|
|*===----------------------------------------------------------------------===*|
|* *|
diff --git a/include/llvm-c/Core.h b/include/llvm-c/Core.h
index 06de058bdc58..cac2f297056d 100644
--- a/include/llvm-c/Core.h
+++ b/include/llvm-c/Core.h
@@ -1,9 +1,9 @@
/*===-- llvm-c/Core.h - Core Library C Interface ------------------*- C -*-===*\
|* *|
-|* The LLVM Compiler Infrastructure *|
-|* *|
-|* This file is distributed under the University of Illinois Open Source *|
-|* License. See LICENSE.TXT for details. *|
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM *|
+|* Exceptions. *|
+|* See https://llvm.org/LICENSE.txt for license information. *|
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception *|
|* *|
|*===----------------------------------------------------------------------===*|
|* *|
@@ -65,6 +65,7 @@ typedef enum {
LLVMInvoke = 5,
/* removed 6 due to API changes */
LLVMUnreachable = 7,
+ LLVMCallBr = 67,
/* Standard Unary Operators */
LLVMFNeg = 66,
@@ -2402,6 +2403,13 @@ LLVMValueRef LLVMGetPersonalityFn(LLVMValueRef Fn);
void LLVMSetPersonalityFn(LLVMValueRef Fn, LLVMValueRef PersonalityFn);
/**
+ * Obtain the intrinsic ID number which matches the given function name.
+ *
+ * @see llvm::Function::lookupIntrinsicID()
+ */
+unsigned LLVMLookupIntrinsicID(const char *Name, size_t NameLen);
+
+/**
* Obtain the ID number from a function instance.
*
* @see llvm::Function::getIntrinsicID()
@@ -2612,52 +2620,138 @@ void LLVMSetParamAlignment(LLVMValueRef Arg, unsigned Align);
*/
/**
- * @}
+ * @defgroup LLVMCCoreValueGlobalIFunc IFuncs
+ *
+ * Functions in this group relate to indirect functions.
+ *
+ * Functions in this group expect LLVMValueRef instances that correspond
+ * to llvm::GlobalIFunc instances.
+ *
+ * @{
*/
/**
- * @}
+ * Add a global indirect function to a module under a specified name.
+ *
+ * @see llvm::GlobalIFunc::create()
*/
+LLVMValueRef LLVMAddGlobalIFunc(LLVMModuleRef M,
+ const char *Name, size_t NameLen,
+ LLVMTypeRef Ty, unsigned AddrSpace,
+ LLVMValueRef Resolver);
/**
- * @}
+ * Obtain a GlobalIFunc value from a Module by its name.
+ *
+ * The returned value corresponds to a llvm::GlobalIFunc value.
+ *
+ * @see llvm::Module::getNamedIFunc()
*/
+LLVMValueRef LLVMGetNamedGlobalIFunc(LLVMModuleRef M,
+ const char *Name, size_t NameLen);
/**
- * @defgroup LLVMCCoreValueMetadata Metadata
+ * Obtain an iterator to the first GlobalIFunc in a Module.
*
- * @{
+ * @see llvm::Module::ifunc_begin()
*/
+LLVMValueRef LLVMGetFirstGlobalIFunc(LLVMModuleRef M);
/**
- * Obtain a MDString value from a context.
+ * Obtain an iterator to the last GlobalIFunc in a Module.
*
- * The returned instance corresponds to the llvm::MDString class.
+ * @see llvm::Module::ifunc_end()
+ */
+LLVMValueRef LLVMGetLastGlobalIFunc(LLVMModuleRef M);
+
+/**
+ * Advance a GlobalIFunc iterator to the next GlobalIFunc.
*
- * The instance is specified by string data of a specified length. The
- * string content is copied, so the backing memory can be freed after
- * this function returns.
+ * Returns NULL if the iterator was already at the end and there are no more
+ * global aliases.
*/
-LLVMValueRef LLVMMDStringInContext(LLVMContextRef C, const char *Str,
- unsigned SLen);
+LLVMValueRef LLVMGetNextGlobalIFunc(LLVMValueRef IFunc);
/**
- * Obtain a MDString value from the global context.
+ * Decrement a GlobalIFunc iterator to the previous GlobalIFunc.
+ *
+ * Returns NULL if the iterator was already at the beginning and there are
+ * no previous global aliases.
*/
-LLVMValueRef LLVMMDString(const char *Str, unsigned SLen);
+LLVMValueRef LLVMGetPreviousGlobalIFunc(LLVMValueRef IFunc);
+
+/**
+ * Retrieves the resolver function associated with this indirect function, or
+ * NULL if it doesn't not exist.
+ *
+ * @see llvm::GlobalIFunc::getResolver()
+ */
+LLVMValueRef LLVMGetGlobalIFuncResolver(LLVMValueRef IFunc);
/**
- * Obtain a MDNode value from a context.
+ * Sets the resolver function associated with this indirect function.
*
- * The returned value corresponds to the llvm::MDNode class.
+ * @see llvm::GlobalIFunc::setResolver()
*/
-LLVMValueRef LLVMMDNodeInContext(LLVMContextRef C, LLVMValueRef *Vals,
- unsigned Count);
+void LLVMSetGlobalIFuncResolver(LLVMValueRef IFunc, LLVMValueRef Resolver);
/**
- * Obtain a MDNode value from the global context.
+ * Remove a global indirect function from its parent module and delete it.
+ *
+ * @see llvm::GlobalIFunc::eraseFromParent()
*/
-LLVMValueRef LLVMMDNode(LLVMValueRef *Vals, unsigned Count);
+void LLVMEraseGlobalIFunc(LLVMValueRef IFunc);
+
+/**
+ * Remove a global indirect function from its parent module.
+ *
+ * This unlinks the global indirect function from its containing module but
+ * keeps it alive.
+ *
+ * @see llvm::GlobalIFunc::removeFromParent()
+ */
+void LLVMRemoveGlobalIFunc(LLVMValueRef IFunc);
+
+/**
+ * @}
+ */
+
+/**
+ * @}
+ */
+
+/**
+ * @}
+ */
+
+/**
+ * @}
+ */
+
+/**
+ * @defgroup LLVMCCoreValueMetadata Metadata
+ *
+ * @{
+ */
+
+/**
+ * Create an MDString value from a given string value.
+ *
+ * The MDString value does not take ownership of the given string, it remains
+ * the responsibility of the caller to free it.
+ *
+ * @see llvm::MDString::get()
+ */
+LLVMMetadataRef LLVMMDStringInContext2(LLVMContextRef C, const char *Str,
+ size_t SLen);
+
+/**
+ * Create an MDNode value with the given array of operands.
+ *
+ * @see llvm::MDNode::get()
+ */
+LLVMMetadataRef LLVMMDNodeInContext2(LLVMContextRef C, LLVMMetadataRef *MDs,
+ size_t Count);
/**
* Obtain a Metadata as a Value.
@@ -2699,6 +2793,17 @@ unsigned LLVMGetMDNodeNumOperands(LLVMValueRef V);
*/
void LLVMGetMDNodeOperands(LLVMValueRef V, LLVMValueRef *Dest);
+/** Deprecated: Use LLVMMDStringInContext2 instead. */
+LLVMValueRef LLVMMDStringInContext(LLVMContextRef C, const char *Str,
+ unsigned SLen);
+/** Deprecated: Use LLVMMDStringInContext2 instead. */
+LLVMValueRef LLVMMDString(const char *Str, unsigned SLen);
+/** Deprecated: Use LLVMMDNodeInContext2 instead. */
+LLVMValueRef LLVMMDNodeInContext(LLVMContextRef C, LLVMValueRef *Vals,
+ unsigned Count);
+/** Deprecated: Use LLVMMDNodeInContext2 instead. */
+LLVMValueRef LLVMMDNode(LLVMValueRef *Vals, unsigned Count);
+
/**
* @}
*/
@@ -2812,6 +2917,24 @@ LLVMBasicBlockRef LLVMGetPreviousBasicBlock(LLVMBasicBlockRef BB);
LLVMBasicBlockRef LLVMGetEntryBasicBlock(LLVMValueRef Fn);
/**
+ * Insert the given basic block after the insertion point of the given builder.
+ *
+ * The insertion point must be valid.
+ *
+ * @see llvm::Function::BasicBlockListType::insertAfter()
+ */
+void LLVMInsertExistingBasicBlockAfterInsertBlock(LLVMBuilderRef Builder,
+ LLVMBasicBlockRef BB);
+
+/**
+ * Append the given basic block to the basic block list of the given function.
+ *
+ * @see llvm::Function::BasicBlockListType::push_back()
+ */
+void LLVMAppendExistingBasicBlock(LLVMValueRef Fn,
+ LLVMBasicBlockRef BB);
+
+/**
* Create a new basic block without inserting it into a function.
*
* @see llvm::BasicBlock::Create()
@@ -3387,9 +3510,59 @@ void LLVMInsertIntoBuilderWithName(LLVMBuilderRef Builder, LLVMValueRef Instr,
void LLVMDisposeBuilder(LLVMBuilderRef Builder);
/* Metadata */
+
+/**
+ * Get location information used by debugging information.
+ *
+ * @see llvm::IRBuilder::getCurrentDebugLocation()
+ */
+LLVMMetadataRef LLVMGetCurrentDebugLocation2(LLVMBuilderRef Builder);
+
+/**
+ * Set location information used by debugging information.
+ *
+ * To clear the location metadata of the given instruction, pass NULL to \p Loc.
+ *
+ * @see llvm::IRBuilder::SetCurrentDebugLocation()
+ */
+void LLVMSetCurrentDebugLocation2(LLVMBuilderRef Builder, LLVMMetadataRef Loc);
+
+/**
+ * Attempts to set the debug location for the given instruction using the
+ * current debug location for the given builder. If the builder has no current
+ * debug location, this function is a no-op.
+ *
+ * @see llvm::IRBuilder::SetInstDebugLocation()
+ */
+void LLVMSetInstDebugLocation(LLVMBuilderRef Builder, LLVMValueRef Inst);
+
+/**
+ * Get the dafult floating-point math metadata for a given builder.
+ *
+ * @see llvm::IRBuilder::getDefaultFPMathTag()
+ */
+LLVMMetadataRef LLVMBuilderGetDefaultFPMathTag(LLVMBuilderRef Builder);
+
+/**
+ * Set the default floating-point math metadata for the given builder.
+ *
+ * To clear the metadata, pass NULL to \p FPMathTag.
+ *
+ * @see llvm::IRBuilder::setDefaultFPMathTag()
+ */
+void LLVMBuilderSetDefaultFPMathTag(LLVMBuilderRef Builder,
+ LLVMMetadataRef FPMathTag);
+
+/**
+ * Deprecated: Passing the NULL location will crash.
+ * Use LLVMGetCurrentDebugLocation2 instead.
+ */
void LLVMSetCurrentDebugLocation(LLVMBuilderRef Builder, LLVMValueRef L);
+/**
+ * Deprecated: Returning the NULL location will crash.
+ * Use LLVMGetCurrentDebugLocation2 instead.
+ */
LLVMValueRef LLVMGetCurrentDebugLocation(LLVMBuilderRef Builder);
-void LLVMSetInstDebugLocation(LLVMBuilderRef Builder, LLVMValueRef Inst);
/* Terminators */
LLVMValueRef LLVMBuildRetVoid(LLVMBuilderRef);
diff --git a/include/llvm-c/DataTypes.h b/include/llvm-c/DataTypes.h
index 7081c83ffc2b..893b22b49ffc 100644
--- a/include/llvm-c/DataTypes.h
+++ b/include/llvm-c/DataTypes.h
@@ -1,9 +1,9 @@
/*===-- include/llvm-c/DataTypes.h - Define fixed size types ------*- C -*-===*\
|* *|
-|* The LLVM Compiler Infrastructure *|
-|* *|
-|* This file is distributed under the University of Illinois Open Source *|
-|* License. See LICENSE.TXT for details. *|
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM *|
+|* Exceptions. *|
+|* See https://llvm.org/LICENSE.txt for license information. *|
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception *|
|* *|
|*===----------------------------------------------------------------------===*|
|* *|
diff --git a/include/llvm-c/DebugInfo.h b/include/llvm-c/DebugInfo.h
index 87a72034b0e8..33c8110a863c 100644
--- a/include/llvm-c/DebugInfo.h
+++ b/include/llvm-c/DebugInfo.h
@@ -1,9 +1,8 @@
//===------------ DebugInfo.h - LLVM C API Debug Info API -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -51,13 +50,12 @@ typedef enum {
LLVMDIFlagIntroducedVirtual = 1 << 18,
LLVMDIFlagBitField = 1 << 19,
LLVMDIFlagNoReturn = 1 << 20,
- LLVMDIFlagMainSubprogram = 1 << 21,
LLVMDIFlagTypePassByValue = 1 << 22,
LLVMDIFlagTypePassByReference = 1 << 23,
LLVMDIFlagEnumClass = 1 << 24,
LLVMDIFlagFixedEnum = LLVMDIFlagEnumClass, // Deprecated.
LLVMDIFlagThunk = 1 << 25,
- LLVMDIFlagTrivial = 1 << 26,
+ LLVMDIFlagNonTrivial = 1 << 26,
LLVMDIFlagBigEndian = 1 << 27,
LLVMDIFlagLittleEndian = 1 << 28,
LLVMDIFlagIndirectVirtualBase = (1 << 2) | (1 << 5),
@@ -161,7 +159,8 @@ enum {
LLVMDIObjCPropertyMetadataKind,
LLVMDIImportedEntityMetadataKind,
LLVMDIMacroMetadataKind,
- LLVMDIMacroFileMetadataKind
+ LLVMDIMacroFileMetadataKind,
+ LLVMDICommonBlockMetadataKind
};
typedef unsigned LLVMMetadataKind;
@@ -453,6 +452,49 @@ unsigned LLVMDILocationGetColumn(LLVMMetadataRef Location);
LLVMMetadataRef LLVMDILocationGetScope(LLVMMetadataRef Location);
/**
+ * Get the "inline at" location associated with this debug location.
+ * \param Location The debug location.
+ *
+ * @see DILocation::getInlinedAt()
+ */
+LLVMMetadataRef LLVMDILocationGetInlinedAt(LLVMMetadataRef Location);
+
+/**
+ * Get the metadata of the file associated with a given scope.
+ * \param Scope The scope object.
+ *
+ * @see DIScope::getFile()
+ */
+LLVMMetadataRef LLVMDIScopeGetFile(LLVMMetadataRef Scope);
+
+/**
+ * Get the directory of a given file.
+ * \param File The file object.
+ * \param Len The length of the returned string.
+ *
+ * @see DIFile::getDirectory()
+ */
+const char *LLVMDIFileGetDirectory(LLVMMetadataRef File, unsigned *Len);
+
+/**
+ * Get the name of a given file.
+ * \param File The file object.
+ * \param Len The length of the returned string.
+ *
+ * @see DIFile::getFilename()
+ */
+const char *LLVMDIFileGetFilename(LLVMMetadataRef File, unsigned *Len);
+
+/**
+ * Get the source of a given file.
+ * \param File The file object.
+ * \param Len The length of the returned string.
+ *
+ * @see DIFile::getSource()
+ */
+const char *LLVMDIFileGetSource(LLVMMetadataRef File, unsigned *Len);
+
+/**
* Create a type array.
* \param Builder The DIBuilder.
* \param Data The type elements.
@@ -480,6 +522,19 @@ LLVMDIBuilderCreateSubroutineType(LLVMDIBuilderRef Builder,
LLVMDIFlags Flags);
/**
+ * Create debugging information entry for an enumerator.
+ * @param Builder The DIBuilder.
+ * @param Name Enumerator name.
+ * @param NameLen Length of enumerator name.
+ * @param Value Enumerator value.
+ * @param IsUnsigned True if the value is unsigned.
+ */
+LLVMMetadataRef LLVMDIBuilderCreateEnumerator(LLVMDIBuilderRef Builder,
+ const char *Name, size_t NameLen,
+ int64_t Value,
+ LLVMBool IsUnsigned);
+
+/**
* Create debugging information entry for an enumeration.
* \param Builder The DIBuilder.
* \param Scope Scope in which this enumeration is defined.
@@ -1017,6 +1072,48 @@ LLVMMetadataRef LLVMDIBuilderCreateGlobalVariableExpression(
size_t NameLen, const char *Linkage, size_t LinkLen, LLVMMetadataRef File,
unsigned LineNo, LLVMMetadataRef Ty, LLVMBool LocalToUnit,
LLVMMetadataRef Expr, LLVMMetadataRef Decl, uint32_t AlignInBits);
+
+/**
+ * Retrieves the \c DIVariable associated with this global variable expression.
+ * \param GVE The global variable expression.
+ *
+ * @see llvm::DIGlobalVariableExpression::getVariable()
+ */
+LLVMMetadataRef LLVMDIGlobalVariableExpressionGetVariable(LLVMMetadataRef GVE);
+
+/**
+ * Retrieves the \c DIExpression associated with this global variable expression.
+ * \param GVE The global variable expression.
+ *
+ * @see llvm::DIGlobalVariableExpression::getExpression()
+ */
+LLVMMetadataRef LLVMDIGlobalVariableExpressionGetExpression(
+ LLVMMetadataRef GVE);
+
+/**
+ * Get the metadata of the file associated with a given variable.
+ * \param Var The variable object.
+ *
+ * @see DIVariable::getFile()
+ */
+LLVMMetadataRef LLVMDIVariableGetFile(LLVMMetadataRef Var);
+
+/**
+ * Get the metadata of the scope associated with a given variable.
+ * \param Var The variable object.
+ *
+ * @see DIVariable::getScope()
+ */
+LLVMMetadataRef LLVMDIVariableGetScope(LLVMMetadataRef Var);
+
+/**
+ * Get the source line where this \c DIVariable is declared.
+ * \param Var The DIVariable.
+ *
+ * @see DIVariable::getLine()
+ */
+unsigned LLVMDIVariableGetLine(LLVMMetadataRef Var);
+
/**
* Create a new temporary \c MDNode. Suitable for use in constructing cyclic
* \c MDNode structures. A temporary \c MDNode is not uniqued, may be RAUW'd,
@@ -1181,6 +1278,30 @@ LLVMMetadataRef LLVMGetSubprogram(LLVMValueRef Func);
void LLVMSetSubprogram(LLVMValueRef Func, LLVMMetadataRef SP);
/**
+ * Get the line associated with a given subprogram.
+ * \param Subprogram The subprogram object.
+ *
+ * @see DISubprogram::getLine()
+ */
+unsigned LLVMDISubprogramGetLine(LLVMMetadataRef Subprogram);
+
+/**
+ * Get the debug location for the given instruction.
+ *
+ * @see llvm::Instruction::getDebugLoc()
+ */
+LLVMMetadataRef LLVMInstructionGetDebugLoc(LLVMValueRef Inst);
+
+/**
+ * Set the debug location for the given instruction.
+ *
+ * To clear the location metadata of the given instruction, pass NULL to \p Loc.
+ *
+ * @see llvm::Instruction::setDebugLoc()
+ */
+void LLVMInstructionSetDebugLoc(LLVMValueRef Inst, LLVMMetadataRef Loc);
+
+/**
* Obtain the enumerated type of a Metadata instance.
*
* @see llvm::Metadata::getMetadataID()
diff --git a/include/llvm-c/Disassembler.h b/include/llvm-c/Disassembler.h
index 5e80b95848cf..3adcc3c47a3f 100644
--- a/include/llvm-c/Disassembler.h
+++ b/include/llvm-c/Disassembler.h
@@ -1,9 +1,9 @@
/*===-- llvm-c/Disassembler.h - Disassembler Public C Interface ---*- C -*-===*\
|* *|
-|* The LLVM Compiler Infrastructure *|
-|* *|
-|* This file is distributed under the University of Illinois Open Source *|
-|* License. See LICENSE.TXT for details. *|
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM *|
+|* Exceptions. *|
+|* See https://llvm.org/LICENSE.txt for license information. *|
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception *|
|* *|
|*===----------------------------------------------------------------------===*|
|* *|
diff --git a/include/llvm-c/DisassemblerTypes.h b/include/llvm-c/DisassemblerTypes.h
index e8754ac77055..389e5ee454a8 100644
--- a/include/llvm-c/DisassemblerTypes.h
+++ b/include/llvm-c/DisassemblerTypes.h
@@ -1,9 +1,9 @@
/*===-- llvm-c/DisassemblerTypedefs.h -----------------------------*- C -*-===*\
|* *|
-|* The LLVM Compiler Infrastructure *|
-|* *|
-|* This file is distributed under the University of Illinois Open Source *|
-|* License. See LICENSE.TXT for details. *|
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM *|
+|* Exceptions. *|
+|* See https://llvm.org/LICENSE.txt for license information. *|
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception *|
|* *|
|*===----------------------------------------------------------------------===*/
diff --git a/include/llvm-c/Error.h b/include/llvm-c/Error.h
index 71e84661222b..52943063c697 100644
--- a/include/llvm-c/Error.h
+++ b/include/llvm-c/Error.h
@@ -1,9 +1,9 @@
/*===------- llvm-c/Error.h - llvm::Error class C Interface -------*- C -*-===*\
|* *|
-|* The LLVM Compiler Infrastructure *|
-|* *|
-|* This file is distributed under the University of Illinois Open Source *|
-|* License. See LICENSE.TXT for details. *|
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM *|
+|* Exceptions. *|
+|* See https://llvm.org/LICENSE.txt for license information. *|
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception *|
|* *|
|*===----------------------------------------------------------------------===*|
|* *|
@@ -60,7 +60,7 @@ void LLVMDisposeErrorMessage(char *ErrMsg);
/**
* Returns the type id for llvm StringError.
*/
-LLVMErrorTypeId LLVMGetStringErrorTypeId();
+LLVMErrorTypeId LLVMGetStringErrorTypeId(void);
#ifdef __cplusplus
}
diff --git a/include/llvm-c/ErrorHandling.h b/include/llvm-c/ErrorHandling.h
index 2059b3aeb158..4927349d8983 100644
--- a/include/llvm-c/ErrorHandling.h
+++ b/include/llvm-c/ErrorHandling.h
@@ -1,9 +1,9 @@
/*===-- llvm-c/ErrorHandling.h - Error Handling C Interface -------*- C -*-===*\
|* *|
-|* The LLVM Compiler Infrastructure *|
-|* *|
-|* This file is distributed under the University of Illinois Open Source *|
-|* License. See LICENSE.TXT for details. *|
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM *|
+|* Exceptions. *|
+|* See https://llvm.org/LICENSE.txt for license information. *|
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception *|
|* *|
|*===----------------------------------------------------------------------===*|
|* *|
diff --git a/include/llvm-c/ExecutionEngine.h b/include/llvm-c/ExecutionEngine.h
index e8ebef9ab15d..ef714cd06384 100644
--- a/include/llvm-c/ExecutionEngine.h
+++ b/include/llvm-c/ExecutionEngine.h
@@ -1,9 +1,9 @@
/*===-- llvm-c/ExecutionEngine.h - ExecutionEngine Lib C Iface --*- C++ -*-===*\
|* *|
-|* The LLVM Compiler Infrastructure *|
-|* *|
-|* This file is distributed under the University of Illinois Open Source *|
-|* License. See LICENSE.TXT for details. *|
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM *|
+|* Exceptions. *|
+|* See https://llvm.org/LICENSE.txt for license information. *|
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception *|
|* *|
|*===----------------------------------------------------------------------===*|
|* *|
diff --git a/include/llvm-c/IRReader.h b/include/llvm-c/IRReader.h
index 5b58d9921fb0..4d0b696e9583 100644
--- a/include/llvm-c/IRReader.h
+++ b/include/llvm-c/IRReader.h
@@ -1,9 +1,9 @@
/*===-- llvm-c/IRReader.h - IR Reader C Interface -----------------*- C -*-===*\
|* *|
-|* The LLVM Compiler Infrastructure *|
-|* *|
-|* This file is distributed under the University of Illinois Open Source *|
-|* License. See LICENSE.TXT for details. *|
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM *|
+|* Exceptions. *|
+|* See https://llvm.org/LICENSE.txt for license information. *|
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception *|
|* *|
|*===----------------------------------------------------------------------===*|
|* *|
diff --git a/include/llvm-c/Initialization.h b/include/llvm-c/Initialization.h
index e45eafb139f2..36c41dbd8d31 100644
--- a/include/llvm-c/Initialization.h
+++ b/include/llvm-c/Initialization.h
@@ -1,9 +1,9 @@
/*===-- llvm-c/Initialization.h - Initialization C Interface ------*- C -*-===*\
|* *|
-|* The LLVM Compiler Infrastructure *|
-|* *|
-|* This file is distributed under the University of Illinois Open Source *|
-|* License. See LICENSE.TXT for details. *|
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM *|
+|* Exceptions. *|
+|* See https://llvm.org/LICENSE.txt for license information. *|
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception *|
|* *|
|*===----------------------------------------------------------------------===*|
|* *|
diff --git a/include/llvm-c/LinkTimeOptimizer.h b/include/llvm-c/LinkTimeOptimizer.h
index 8bcf59969ccb..19b4f5cf7491 100644
--- a/include/llvm-c/LinkTimeOptimizer.h
+++ b/include/llvm-c/LinkTimeOptimizer.h
@@ -1,9 +1,8 @@
//===-- llvm/LinkTimeOptimizer.h - LTO Public C Interface -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm-c/Linker.h b/include/llvm-c/Linker.h
index d02c37f94c86..908513041661 100644
--- a/include/llvm-c/Linker.h
+++ b/include/llvm-c/Linker.h
@@ -1,9 +1,9 @@
/*===-- llvm-c/Linker.h - Module Linker C Interface -------------*- C++ -*-===*\
|* *|
-|* The LLVM Compiler Infrastructure *|
-|* *|
-|* This file is distributed under the University of Illinois Open Source *|
-|* License. See LICENSE.TXT for details. *|
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM *|
+|* Exceptions. *|
+|* See https://llvm.org/LICENSE.txt for license information. *|
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception *|
|* *|
|*===----------------------------------------------------------------------===*|
|* *|
diff --git a/include/llvm-c/Object.h b/include/llvm-c/Object.h
index a2980e89fe3d..1e9b703a68ff 100644
--- a/include/llvm-c/Object.h
+++ b/include/llvm-c/Object.h
@@ -1,9 +1,9 @@
/*===-- llvm-c/Object.h - Object Lib C Iface --------------------*- C++ -*-===*/
/* */
-/* The LLVM Compiler Infrastructure */
-/* */
-/* This file is distributed under the University of Illinois Open Source */
-/* License. See LICENSE.TXT for details. */
+/* Part of the LLVM Project, under the Apache License v2.0 with LLVM */
+/* Exceptions. */
+/* See https://llvm.org/LICENSE.txt for license information. */
+/* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception */
/* */
/*===----------------------------------------------------------------------===*/
/* */
@@ -34,29 +34,140 @@ extern "C" {
*/
// Opaque type wrappers
-typedef struct LLVMOpaqueObjectFile *LLVMObjectFileRef;
typedef struct LLVMOpaqueSectionIterator *LLVMSectionIteratorRef;
typedef struct LLVMOpaqueSymbolIterator *LLVMSymbolIteratorRef;
typedef struct LLVMOpaqueRelocationIterator *LLVMRelocationIteratorRef;
-// ObjectFile creation
-LLVMObjectFileRef LLVMCreateObjectFile(LLVMMemoryBufferRef MemBuf);
-void LLVMDisposeObjectFile(LLVMObjectFileRef ObjectFile);
+typedef enum {
+ LLVMBinaryTypeArchive, /**< Archive file. */
+ LLVMBinaryTypeMachOUniversalBinary, /**< Mach-O Universal Binary file. */
+ LLVMBinaryTypeCOFFImportFile, /**< COFF Import file. */
+ LLVMBinaryTypeIR, /**< LLVM IR. */
+ LLVMBinaryTypeWinRes, /**< Windows resource (.res) file. */
+ LLVMBinaryTypeCOFF, /**< COFF Object file. */
+ LLVMBinaryTypeELF32L, /**< ELF 32-bit, little endian. */
+ LLVMBinaryTypeELF32B, /**< ELF 32-bit, big endian. */
+ LLVMBinaryTypeELF64L, /**< ELF 64-bit, little endian. */
+ LLVMBinaryTypeELF64B, /**< ELF 64-bit, big endian. */
+ LLVMBinaryTypeMachO32L, /**< MachO 32-bit, little endian. */
+ LLVMBinaryTypeMachO32B, /**< MachO 32-bit, big endian. */
+ LLVMBinaryTypeMachO64L, /**< MachO 64-bit, little endian. */
+ LLVMBinaryTypeMachO64B, /**< MachO 64-bit, big endian. */
+ LLVMBinaryTypeWasm, /**< Web Assembly. */
+} LLVMBinaryType;
+
+/**
+ * Create a binary file from the given memory buffer.
+ *
+ * The exact type of the binary file will be inferred automatically, and the
+ * appropriate implementation selected. The context may be NULL except if
+ * the resulting file is an LLVM IR file.
+ *
+ * The memory buffer is not consumed by this function. It is the responsibilty
+ * of the caller to free it with \c LLVMDisposeMemoryBuffer.
+ *
+ * If NULL is returned, the \p ErrorMessage parameter is populated with the
+ * error's description. It is then the caller's responsibility to free this
+ * message by calling \c LLVMDisposeMessage.
+ *
+ * @see llvm::object::createBinary
+ */
+LLVMBinaryRef LLVMCreateBinary(LLVMMemoryBufferRef MemBuf,
+ LLVMContextRef Context,
+ char **ErrorMessage);
+
+/**
+ * Dispose of a binary file.
+ *
+ * The binary file does not own its backing buffer. It is the responsibilty
+ * of the caller to free it with \c LLVMDisposeMemoryBuffer.
+ */
+void LLVMDisposeBinary(LLVMBinaryRef BR);
+
+/**
+ * Retrieves a copy of the memory buffer associated with this object file.
+ *
+ * The returned buffer is merely a shallow copy and does not own the actual
+ * backing buffer of the binary. Nevertheless, it is the responsibility of the
+ * caller to free it with \c LLVMDisposeMemoryBuffer.
+ *
+ * @see llvm::object::getMemoryBufferRef
+ */
+LLVMMemoryBufferRef LLVMBinaryCopyMemoryBuffer(LLVMBinaryRef BR);
+
+/**
+ * Retrieve the specific type of a binary.
+ *
+ * @see llvm::object::Binary::getType
+ */
+LLVMBinaryType LLVMBinaryGetType(LLVMBinaryRef BR);
+
+/*
+ * For a Mach-O universal binary file, retrieves the object file corresponding
+ * to the given architecture if it is present as a slice.
+ *
+ * If NULL is returned, the \p ErrorMessage parameter is populated with the
+ * error's description. It is then the caller's responsibility to free this
+ * message by calling \c LLVMDisposeMessage.
+ *
+ * It is the responsiblity of the caller to free the returned object file by
+ * calling \c LLVMDisposeBinary.
+ */
+LLVMBinaryRef LLVMMachOUniversalBinaryCopyObjectForArch(LLVMBinaryRef BR,
+ const char *Arch,
+ size_t ArchLen,
+ char **ErrorMessage);
+
+/**
+ * Retrieve a copy of the section iterator for this object file.
+ *
+ * If there are no sections, the result is NULL.
+ *
+ * The returned iterator is merely a shallow copy. Nevertheless, it is
+ * the responsibility of the caller to free it with
+ * \c LLVMDisposeSectionIterator.
+ *
+ * @see llvm::object::sections()
+ */
+LLVMSectionIteratorRef LLVMObjectFileCopySectionIterator(LLVMBinaryRef BR);
+
+/**
+ * Returns whether the given section iterator is at the end.
+ *
+ * @see llvm::object::section_end
+ */
+LLVMBool LLVMObjectFileIsSectionIteratorAtEnd(LLVMBinaryRef BR,
+ LLVMSectionIteratorRef SI);
+
+/**
+ * Retrieve a copy of the symbol iterator for this object file.
+ *
+ * If there are no symbols, the result is NULL.
+ *
+ * The returned iterator is merely a shallow copy. Nevertheless, it is
+ * the responsibility of the caller to free it with
+ * \c LLVMDisposeSymbolIterator.
+ *
+ * @see llvm::object::symbols()
+ */
+LLVMSymbolIteratorRef LLVMObjectFileCopySymbolIterator(LLVMBinaryRef BR);
+
+/**
+ * Returns whether the given symbol iterator is at the end.
+ *
+ * @see llvm::object::symbol_end
+ */
+LLVMBool LLVMObjectFileIsSymbolIteratorAtEnd(LLVMBinaryRef BR,
+ LLVMSymbolIteratorRef SI);
-// ObjectFile Section iterators
-LLVMSectionIteratorRef LLVMGetSections(LLVMObjectFileRef ObjectFile);
void LLVMDisposeSectionIterator(LLVMSectionIteratorRef SI);
-LLVMBool LLVMIsSectionIteratorAtEnd(LLVMObjectFileRef ObjectFile,
- LLVMSectionIteratorRef SI);
+
void LLVMMoveToNextSection(LLVMSectionIteratorRef SI);
void LLVMMoveToContainingSection(LLVMSectionIteratorRef Sect,
LLVMSymbolIteratorRef Sym);
// ObjectFile Symbol iterators
-LLVMSymbolIteratorRef LLVMGetSymbols(LLVMObjectFileRef ObjectFile);
void LLVMDisposeSymbolIterator(LLVMSymbolIteratorRef SI);
-LLVMBool LLVMIsSymbolIteratorAtEnd(LLVMObjectFileRef ObjectFile,
- LLVMSymbolIteratorRef SI);
void LLVMMoveToNextSymbol(LLVMSymbolIteratorRef SI);
// SectionRef accessors
@@ -89,6 +200,28 @@ uint64_t LLVMGetRelocationType(LLVMRelocationIteratorRef RI);
const char *LLVMGetRelocationTypeName(LLVMRelocationIteratorRef RI);
const char *LLVMGetRelocationValueString(LLVMRelocationIteratorRef RI);
+/** Deprecated: Use LLVMBinaryRef instead. */
+typedef struct LLVMOpaqueObjectFile *LLVMObjectFileRef;
+
+/** Deprecated: Use LLVMCreateBinary instead. */
+LLVMObjectFileRef LLVMCreateObjectFile(LLVMMemoryBufferRef MemBuf);
+
+/** Deprecated: Use LLVMDisposeBinary instead. */
+void LLVMDisposeObjectFile(LLVMObjectFileRef ObjectFile);
+
+/** Deprecated: Use LLVMObjectFileCopySectionIterator instead. */
+LLVMSectionIteratorRef LLVMGetSections(LLVMObjectFileRef ObjectFile);
+
+/** Deprecated: Use LLVMObjectFileIsSectionIteratorAtEnd instead. */
+LLVMBool LLVMIsSectionIteratorAtEnd(LLVMObjectFileRef ObjectFile,
+ LLVMSectionIteratorRef SI);
+
+/** Deprecated: Use LLVMObjectFileCopySymbolIterator instead. */
+LLVMSymbolIteratorRef LLVMGetSymbols(LLVMObjectFileRef ObjectFile);
+
+/** Deprecated: Use LLVMObjectFileIsSymbolIteratorAtEnd instead. */
+LLVMBool LLVMIsSymbolIteratorAtEnd(LLVMObjectFileRef ObjectFile,
+ LLVMSymbolIteratorRef SI);
/**
* @}
*/
diff --git a/include/llvm-c/OptRemarks.h b/include/llvm-c/OptRemarks.h
deleted file mode 100644
index 6a90394e711c..000000000000
--- a/include/llvm-c/OptRemarks.h
+++ /dev/null
@@ -1,204 +0,0 @@
-/*===-- llvm-c/OptRemarks.h - OptRemarks Public C Interface -------*- C -*-===*\
-|* *|
-|* The LLVM Compiler Infrastructure *|
-|* *|
-|* This file is distributed under the University of Illinois Open Source *|
-|* License. See LICENSE.TXT for details. *|
-|* *|
-|*===----------------------------------------------------------------------===*|
-|* *|
-|* This header provides a public interface to an opt-remark library. *|
-|* LLVM provides an implementation of this interface. *|
-|* *|
-\*===----------------------------------------------------------------------===*/
-
-#ifndef LLVM_C_OPT_REMARKS_H
-#define LLVM_C_OPT_REMARKS_H
-
-#include "llvm-c/Core.h"
-#include "llvm-c/Types.h"
-#ifdef __cplusplus
-#include <cstddef>
-extern "C" {
-#else
-#include <stddef.h>
-#endif /* !defined(__cplusplus) */
-
-/**
- * @defgroup LLVMCOPTREMARKS OptRemarks
- * @ingroup LLVMC
- *
- * @{
- */
-
-#define OPT_REMARKS_API_VERSION 0
-
-/**
- * String containing a buffer and a length. The buffer is not guaranteed to be
- * zero-terminated.
- *
- * \since OPT_REMARKS_API_VERSION=0
- */
-typedef struct {
- const char *Str;
- uint32_t Len;
-} LLVMOptRemarkStringRef;
-
-/**
- * DebugLoc containing File, Line and Column.
- *
- * \since OPT_REMARKS_API_VERSION=0
- */
-typedef struct {
- // File:
- LLVMOptRemarkStringRef SourceFile;
- // Line:
- uint32_t SourceLineNumber;
- // Column:
- uint32_t SourceColumnNumber;
-} LLVMOptRemarkDebugLoc;
-
-/**
- * Element of the "Args" list. The key might give more information about what
- * are the semantics of the value, e.g. "Callee" will tell you that the value
- * is a symbol that names a function.
- *
- * \since OPT_REMARKS_API_VERSION=0
- */
-typedef struct {
- // e.g. "Callee"
- LLVMOptRemarkStringRef Key;
- // e.g. "malloc"
- LLVMOptRemarkStringRef Value;
-
- // "DebugLoc": Optional
- LLVMOptRemarkDebugLoc DebugLoc;
-} LLVMOptRemarkArg;
-
-/**
- * One remark entry.
- *
- * \since OPT_REMARKS_API_VERSION=0
- */
-typedef struct {
- // e.g. !Missed, !Passed
- LLVMOptRemarkStringRef RemarkType;
- // "Pass": Required
- LLVMOptRemarkStringRef PassName;
- // "Name": Required
- LLVMOptRemarkStringRef RemarkName;
- // "Function": Required
- LLVMOptRemarkStringRef FunctionName;
-
- // "DebugLoc": Optional
- LLVMOptRemarkDebugLoc DebugLoc;
- // "Hotness": Optional
- uint32_t Hotness;
- // "Args": Optional. It is an array of `num_args` elements.
- uint32_t NumArgs;
- LLVMOptRemarkArg *Args;
-} LLVMOptRemarkEntry;
-
-typedef struct LLVMOptRemarkOpaqueParser *LLVMOptRemarkParserRef;
-
-/**
- * Creates a remark parser that can be used to read and parse the buffer located
- * in \p Buf of size \p Size.
- *
- * \p Buf cannot be NULL.
- *
- * This function should be paired with LLVMOptRemarkParserDispose() to avoid
- * leaking resources.
- *
- * \since OPT_REMARKS_API_VERSION=0
- */
-extern LLVMOptRemarkParserRef LLVMOptRemarkParserCreate(const void *Buf,
- uint64_t Size);
-
-/**
- * Returns the next remark in the file.
- *
- * The value pointed to by the return value is invalidated by the next call to
- * LLVMOptRemarkParserGetNext().
- *
- * If the parser reaches the end of the buffer, the return value will be NULL.
- *
- * In the case of an error, the return value will be NULL, and:
- *
- * 1) LLVMOptRemarkParserHasError() will return `1`.
- *
- * 2) LLVMOptRemarkParserGetErrorMessage() will return a descriptive error
- * message.
- *
- * An error may occur if:
- *
- * 1) An argument is invalid.
- *
- * 2) There is a YAML parsing error. This type of error aborts parsing
- * immediately and returns `1`. It can occur on malformed YAML.
- *
- * 3) Remark parsing error. If this type of error occurs, the parser won't call
- * the handler and will continue to the next one. It can occur on malformed
- * remarks, like missing or extra fields in the file.
- *
- * Here is a quick example of the usage:
- *
- * ```
- * LLVMOptRemarkParserRef Parser = LLVMOptRemarkParserCreate(Buf, Size);
- * LLVMOptRemarkEntry *Remark = NULL;
- * while ((Remark == LLVMOptRemarkParserGetNext(Parser))) {
- * // use Remark
- * }
- * bool HasError = LLVMOptRemarkParserHasError(Parser);
- * LLVMOptRemarkParserDispose(Parser);
- * ```
- *
- * \since OPT_REMARKS_API_VERSION=0
- */
-extern LLVMOptRemarkEntry *
-LLVMOptRemarkParserGetNext(LLVMOptRemarkParserRef Parser);
-
-/**
- * Returns `1` if the parser encountered an error while parsing the buffer.
- *
- * \since OPT_REMARKS_API_VERSION=0
- */
-extern LLVMBool LLVMOptRemarkParserHasError(LLVMOptRemarkParserRef Parser);
-
-/**
- * Returns a null-terminated string containing an error message.
- *
- * In case of no error, the result is `NULL`.
- *
- * The memory of the string is bound to the lifetime of \p Parser. If
- * LLVMOptRemarkParserDispose() is called, the memory of the string will be
- * released.
- *
- * \since OPT_REMARKS_API_VERSION=0
- */
-extern const char *
-LLVMOptRemarkParserGetErrorMessage(LLVMOptRemarkParserRef Parser);
-
-/**
- * Releases all the resources used by \p Parser.
- *
- * \since OPT_REMARKS_API_VERSION=0
- */
-extern void LLVMOptRemarkParserDispose(LLVMOptRemarkParserRef Parser);
-
-/**
- * Returns the version of the opt-remarks dylib.
- *
- * \since OPT_REMARKS_API_VERSION=0
- */
-extern uint32_t LLVMOptRemarkVersion(void);
-
-/**
- * @} // endgoup LLVMCOPTREMARKS
- */
-
-#ifdef __cplusplus
-}
-#endif /* !defined(__cplusplus) */
-
-#endif /* LLVM_C_OPT_REMARKS_H */
diff --git a/include/llvm-c/OrcBindings.h b/include/llvm-c/OrcBindings.h
index 570db87fee94..9e92371b5a3a 100644
--- a/include/llvm-c/OrcBindings.h
+++ b/include/llvm-c/OrcBindings.h
@@ -1,9 +1,9 @@
/*===----------- llvm-c/OrcBindings.h - Orc Lib C Iface ---------*- C++ -*-===*\
|* *|
-|* The LLVM Compiler Infrastructure *|
-|* *|
-|* This file is distributed under the University of Illinois Open Source *|
-|* License. See LICENSE.TXT for details. *|
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM *|
+|* Exceptions. *|
+|* See https://llvm.org/LICENSE.txt for license information. *|
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception *|
|* *|
|*===----------------------------------------------------------------------===*|
|* *|
diff --git a/include/llvm-c/Remarks.h b/include/llvm-c/Remarks.h
new file mode 100644
index 000000000000..88eb5120c57c
--- /dev/null
+++ b/include/llvm-c/Remarks.h
@@ -0,0 +1,329 @@
+/*===-- llvm-c/Remarks.h - Remarks Public C Interface -------------*- C -*-===*\
+|* *|
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM *|
+|* Exceptions. *|
+|* See https://llvm.org/LICENSE.txt for license information. *|
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception *|
+|* *|
+|*===----------------------------------------------------------------------===*|
+|* *|
+|* This header provides a public interface to a remark diagnostics library. *|
+|* LLVM provides an implementation of this interface. *|
+|* *|
+\*===----------------------------------------------------------------------===*/
+
+#ifndef LLVM_C_REMARKS_H
+#define LLVM_C_REMARKS_H
+
+#include "llvm-c/Types.h"
+#ifdef __cplusplus
+#include <cstddef>
+extern "C" {
+#else
+#include <stddef.h>
+#endif /* !defined(__cplusplus) */
+
+/**
+ * @defgroup LLVMCREMARKS Remarks
+ * @ingroup LLVMC
+ *
+ * @{
+ */
+
+#define REMARKS_API_VERSION 0
+
+/**
+ * The type of the emitted remark.
+ */
+enum LLVMRemarkType {
+ LLVMRemarkTypeUnknown,
+ LLVMRemarkTypePassed,
+ LLVMRemarkTypeMissed,
+ LLVMRemarkTypeAnalysis,
+ LLVMRemarkTypeAnalysisFPCommute,
+ LLVMRemarkTypeAnalysisAliasing,
+ LLVMRemarkTypeFailure
+};
+
+/**
+ * String containing a buffer and a length. The buffer is not guaranteed to be
+ * zero-terminated.
+ *
+ * \since REMARKS_API_VERSION=0
+ */
+typedef struct LLVMRemarkOpaqueString *LLVMRemarkStringRef;
+
+/**
+ * Returns the buffer holding the string.
+ *
+ * \since REMARKS_API_VERSION=0
+ */
+extern const char *LLVMRemarkStringGetData(LLVMRemarkStringRef String);
+
+/**
+ * Returns the size of the string.
+ *
+ * \since REMARKS_API_VERSION=0
+ */
+extern uint32_t LLVMRemarkStringGetLen(LLVMRemarkStringRef String);
+
+/**
+ * DebugLoc containing File, Line and Column.
+ *
+ * \since REMARKS_API_VERSION=0
+ */
+typedef struct LLVMRemarkOpaqueDebugLoc *LLVMRemarkDebugLocRef;
+
+/**
+ * Return the path to the source file for a debug location.
+ *
+ * \since REMARKS_API_VERSION=0
+ */
+extern LLVMRemarkStringRef
+LLVMRemarkDebugLocGetSourceFilePath(LLVMRemarkDebugLocRef DL);
+
+/**
+ * Return the line in the source file for a debug location.
+ *
+ * \since REMARKS_API_VERSION=0
+ */
+extern uint32_t LLVMRemarkDebugLocGetSourceLine(LLVMRemarkDebugLocRef DL);
+
+/**
+ * Return the column in the source file for a debug location.
+ *
+ * \since REMARKS_API_VERSION=0
+ */
+extern uint32_t LLVMRemarkDebugLocGetSourceColumn(LLVMRemarkDebugLocRef DL);
+
+/**
+ * Element of the "Args" list. The key might give more information about what
+ * the semantics of the value are, e.g. "Callee" will tell you that the value
+ * is a symbol that names a function.
+ *
+ * \since REMARKS_API_VERSION=0
+ */
+typedef struct LLVMRemarkOpaqueArg *LLVMRemarkArgRef;
+
+/**
+ * Returns the key of an argument. The key defines what the value is, and the
+ * same key can appear multiple times in the list of arguments.
+ *
+ * \since REMARKS_API_VERSION=0
+ */
+extern LLVMRemarkStringRef LLVMRemarkArgGetKey(LLVMRemarkArgRef Arg);
+
+/**
+ * Returns the value of an argument. This is a string that can contain newlines.
+ *
+ * \since REMARKS_API_VERSION=0
+ */
+extern LLVMRemarkStringRef LLVMRemarkArgGetValue(LLVMRemarkArgRef Arg);
+
+/**
+ * Returns the debug location that is attached to the value of this argument.
+ *
+ * If there is no debug location, the return value will be `NULL`.
+ *
+ * \since REMARKS_API_VERSION=0
+ */
+extern LLVMRemarkDebugLocRef LLVMRemarkArgGetDebugLoc(LLVMRemarkArgRef Arg);
+
+/**
+ * A remark emitted by the compiler.
+ *
+ * \since REMARKS_API_VERSION=0
+ */
+typedef struct LLVMRemarkOpaqueEntry *LLVMRemarkEntryRef;
+
+/**
+ * Free the resources used by the remark entry.
+ *
+ * \since REMARKS_API_VERSION=0
+ */
+extern void LLVMRemarkEntryDispose(LLVMRemarkEntryRef Remark);
+
+/**
+ * The type of the remark. For example, it can allow users to only keep the
+ * missed optimizations from the compiler.
+ *
+ * \since REMARKS_API_VERSION=0
+ */
+extern enum LLVMRemarkType LLVMRemarkEntryGetType(LLVMRemarkEntryRef Remark);
+
+/**
+ * Get the name of the pass that emitted this remark.
+ *
+ * \since REMARKS_API_VERSION=0
+ */
+extern LLVMRemarkStringRef
+LLVMRemarkEntryGetPassName(LLVMRemarkEntryRef Remark);
+
+/**
+ * Get an identifier of the remark.
+ *
+ * \since REMARKS_API_VERSION=0
+ */
+extern LLVMRemarkStringRef
+LLVMRemarkEntryGetRemarkName(LLVMRemarkEntryRef Remark);
+
+/**
+ * Get the name of the function being processed when the remark was emitted.
+ *
+ * \since REMARKS_API_VERSION=0
+ */
+extern LLVMRemarkStringRef
+LLVMRemarkEntryGetFunctionName(LLVMRemarkEntryRef Remark);
+
+/**
+ * Returns the debug location that is attached to this remark.
+ *
+ * If there is no debug location, the return value will be `NULL`.
+ *
+ * \since REMARKS_API_VERSION=0
+ */
+extern LLVMRemarkDebugLocRef
+LLVMRemarkEntryGetDebugLoc(LLVMRemarkEntryRef Remark);
+
+/**
+ * Return the hotness of the remark.
+ *
+ * A hotness of `0` means this value is not set.
+ *
+ * \since REMARKS_API_VERSION=0
+ */
+extern uint64_t LLVMRemarkEntryGetHotness(LLVMRemarkEntryRef Remark);
+
+/**
+ * The number of arguments the remark holds.
+ *
+ * \since REMARKS_API_VERSION=0
+ */
+extern uint32_t LLVMRemarkEntryGetNumArgs(LLVMRemarkEntryRef Remark);
+
+/**
+ * Get a new iterator to iterate over a remark's argument.
+ *
+ * If there are no arguments in \p Remark, the return value will be `NULL`.
+ *
+ * The lifetime of the returned value is bound to the lifetime of \p Remark.
+ *
+ * \since REMARKS_API_VERSION=0
+ */
+extern LLVMRemarkArgRef LLVMRemarkEntryGetFirstArg(LLVMRemarkEntryRef Remark);
+
+/**
+ * Get the next argument in \p Remark from the position of \p It.
+ *
+ * Returns `NULL` if there are no more arguments available.
+ *
+ * The lifetime of the returned value is bound to the lifetime of \p Remark.
+ *
+ * \since REMARKS_API_VERSION=0
+ */
+extern LLVMRemarkArgRef LLVMRemarkEntryGetNextArg(LLVMRemarkArgRef It,
+ LLVMRemarkEntryRef Remark);
+
+typedef struct LLVMRemarkOpaqueParser *LLVMRemarkParserRef;
+
+/**
+ * Creates a remark parser that can be used to parse the buffer located in \p
+ * Buf of size \p Size bytes.
+ *
+ * \p Buf cannot be `NULL`.
+ *
+ * This function should be paired with LLVMRemarkParserDispose() to avoid
+ * leaking resources.
+ *
+ * \since REMARKS_API_VERSION=0
+ */
+extern LLVMRemarkParserRef LLVMRemarkParserCreateYAML(const void *Buf,
+ uint64_t Size);
+
+/**
+ * Returns the next remark in the file.
+ *
+ * The value pointed to by the return value needs to be disposed using a call to
+ * LLVMRemarkEntryDispose().
+ *
+ * All the entries in the returned value that are of LLVMRemarkStringRef type
+ * will become invalidated once a call to LLVMRemarkParserDispose is made.
+ *
+ * If the parser reaches the end of the buffer, the return value will be `NULL`.
+ *
+ * In the case of an error, the return value will be `NULL`, and:
+ *
+ * 1) LLVMRemarkParserHasError() will return `1`.
+ *
+ * 2) LLVMRemarkParserGetErrorMessage() will return a descriptive error
+ * message.
+ *
+ * An error may occur if:
+ *
+ * 1) An argument is invalid.
+ *
+ * 2) There is a parsing error. This can occur on things like malformed YAML.
+ *
+ * 3) There is a Remark semantic error. This can occur on well-formed files with
+ * missing or extra fields.
+ *
+ * Here is a quick example of the usage:
+ *
+ * ```
+ * LLVMRemarkParserRef Parser = LLVMRemarkParserCreateYAML(Buf, Size);
+ * LLVMRemarkEntryRef Remark = NULL;
+ * while ((Remark = LLVMRemarkParserGetNext(Parser))) {
+ * // use Remark
+ * LLVMRemarkEntryDispose(Remark); // Release memory.
+ * }
+ * bool HasError = LLVMRemarkParserHasError(Parser);
+ * LLVMRemarkParserDispose(Parser);
+ * ```
+ *
+ * \since REMARKS_API_VERSION=0
+ */
+extern LLVMRemarkEntryRef LLVMRemarkParserGetNext(LLVMRemarkParserRef Parser);
+
+/**
+ * Returns `1` if the parser encountered an error while parsing the buffer.
+ *
+ * \since REMARKS_API_VERSION=0
+ */
+extern LLVMBool LLVMRemarkParserHasError(LLVMRemarkParserRef Parser);
+
+/**
+ * Returns a null-terminated string containing an error message.
+ *
+ * In case of no error, the result is `NULL`.
+ *
+ * The memory of the string is bound to the lifetime of \p Parser. If
+ * LLVMRemarkParserDispose() is called, the memory of the string will be
+ * released.
+ *
+ * \since REMARKS_API_VERSION=0
+ */
+extern const char *LLVMRemarkParserGetErrorMessage(LLVMRemarkParserRef Parser);
+
+/**
+ * Releases all the resources used by \p Parser.
+ *
+ * \since REMARKS_API_VERSION=0
+ */
+extern void LLVMRemarkParserDispose(LLVMRemarkParserRef Parser);
+
+/**
+ * Returns the version of the remarks library.
+ *
+ * \since REMARKS_API_VERSION=0
+ */
+extern uint32_t LLVMRemarkVersion(void);
+
+/**
+ * @} // endgoup LLVMCREMARKS
+ */
+
+#ifdef __cplusplus
+}
+#endif /* !defined(__cplusplus) */
+
+#endif /* LLVM_C_REMARKS_H */
diff --git a/include/llvm-c/Support.h b/include/llvm-c/Support.h
index 37d5d72ff5dc..097f784246c5 100644
--- a/include/llvm-c/Support.h
+++ b/include/llvm-c/Support.h
@@ -1,9 +1,9 @@
/*===-- llvm-c/Support.h - Support C Interface --------------------*- C -*-===*\
|* *|
-|* The LLVM Compiler Infrastructure *|
-|* *|
-|* This file is distributed under the University of Illinois Open Source *|
-|* License. See LICENSE.TXT for details. *|
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM *|
+|* Exceptions. *|
+|* See https://llvm.org/LICENSE.txt for license information. *|
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception *|
|* *|
|*===----------------------------------------------------------------------===*|
|* *|
diff --git a/include/llvm-c/Target.h b/include/llvm-c/Target.h
index 03004ba5eec0..4ef641eaf232 100644
--- a/include/llvm-c/Target.h
+++ b/include/llvm-c/Target.h
@@ -1,9 +1,9 @@
/*===-- llvm-c/Target.h - Target Lib C Iface --------------------*- C++ -*-===*/
/* */
-/* The LLVM Compiler Infrastructure */
-/* */
-/* This file is distributed under the University of Illinois Open Source */
-/* License. See LICENSE.TXT for details. */
+/* Part of the LLVM Project, under the Apache License v2.0 with LLVM */
+/* Exceptions. */
+/* See https://llvm.org/LICENSE.txt for license information. */
+/* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception */
/* */
/*===----------------------------------------------------------------------===*/
/* */
@@ -22,10 +22,6 @@
#include "llvm-c/Types.h"
#include "llvm/Config/llvm-config.h"
-#if defined(_MSC_VER) && !defined(inline)
-#define inline __inline
-#endif
-
#ifdef __cplusplus
extern "C" {
#endif
diff --git a/include/llvm-c/TargetMachine.h b/include/llvm-c/TargetMachine.h
index c06e9edc9aaf..28d7c096871e 100644
--- a/include/llvm-c/TargetMachine.h
+++ b/include/llvm-c/TargetMachine.h
@@ -1,9 +1,9 @@
/*===-- llvm-c/TargetMachine.h - Target Machine Library C Interface - C++ -*-=*\
|* *|
-|* The LLVM Compiler Infrastructure *|
-|* *|
-|* This file is distributed under the University of Illinois Open Source *|
-|* License. See LICENSE.TXT for details. *|
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM *|
+|* Exceptions. *|
+|* See https://llvm.org/LICENSE.txt for license information. *|
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception *|
|* *|
|*===----------------------------------------------------------------------===*|
|* *|
diff --git a/include/llvm-c/Transforms/AggressiveInstCombine.h b/include/llvm-c/Transforms/AggressiveInstCombine.h
index 8756a22e917a..c0b0141c3da1 100644
--- a/include/llvm-c/Transforms/AggressiveInstCombine.h
+++ b/include/llvm-c/Transforms/AggressiveInstCombine.h
@@ -1,9 +1,9 @@
/*===-- AggressiveInstCombine.h ---------------------------------*- C++ -*-===*\
|* *|
-|* The LLVM Compiler Infrastructure *|
-|* *|
-|* This file is distributed under the University of Illinois Open Source *|
-|* License. See LICENSE.TXT for details. *|
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM *|
+|* Exceptions. *|
+|* See https://llvm.org/LICENSE.txt for license information. *|
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception *|
|* *|
|*===----------------------------------------------------------------------===*|
|* *|
diff --git a/include/llvm-c/Transforms/Coroutines.h b/include/llvm-c/Transforms/Coroutines.h
index 827e30fb2d7c..227e7cf0a360 100644
--- a/include/llvm-c/Transforms/Coroutines.h
+++ b/include/llvm-c/Transforms/Coroutines.h
@@ -1,9 +1,9 @@
/*===-- Coroutines.h - Coroutines Library C Interface -----------*- C++ -*-===*\
|* *|
-|* The LLVM Compiler Infrastructure *|
-|* *|
-|* This file is distributed under the University of Illinois Open Source *|
-|* License. See LICENSE.TXT for details. *|
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM *|
+|* Exceptions. *|
+|* See https://llvm.org/LICENSE.txt for license information. *|
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception *|
|* *|
|*===----------------------------------------------------------------------===*|
|* *|
diff --git a/include/llvm-c/Transforms/IPO.h b/include/llvm-c/Transforms/IPO.h
index 7705b1864dc3..7a82ed464141 100644
--- a/include/llvm-c/Transforms/IPO.h
+++ b/include/llvm-c/Transforms/IPO.h
@@ -1,9 +1,9 @@
/*===-- IPO.h - Interprocedural Transformations C Interface -----*- C++ -*-===*\
|* *|
-|* The LLVM Compiler Infrastructure *|
-|* *|
-|* This file is distributed under the University of Illinois Open Source *|
-|* License. See LICENSE.TXT for details. *|
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM *|
+|* Exceptions. *|
+|* See https://llvm.org/LICENSE.txt for license information. *|
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception *|
|* *|
|*===----------------------------------------------------------------------===*|
|* *|
diff --git a/include/llvm-c/Transforms/InstCombine.h b/include/llvm-c/Transforms/InstCombine.h
index e1c1572d53dc..166f278d9a69 100644
--- a/include/llvm-c/Transforms/InstCombine.h
+++ b/include/llvm-c/Transforms/InstCombine.h
@@ -1,9 +1,9 @@
/*===-- Scalar.h - Scalar Transformation Library C Interface ----*- C++ -*-===*\
|* *|
-|* The LLVM Compiler Infrastructure *|
-|* *|
-|* This file is distributed under the University of Illinois Open Source *|
-|* License. See LICENSE.TXT for details. *|
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM *|
+|* Exceptions. *|
+|* See https://llvm.org/LICENSE.txt for license information. *|
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception *|
|* *|
|*===----------------------------------------------------------------------===*|
|* *|
diff --git a/include/llvm-c/Transforms/PassManagerBuilder.h b/include/llvm-c/Transforms/PassManagerBuilder.h
index 69786b341ab4..d164c00d49c5 100644
--- a/include/llvm-c/Transforms/PassManagerBuilder.h
+++ b/include/llvm-c/Transforms/PassManagerBuilder.h
@@ -1,9 +1,9 @@
/*===-- llvm-c/Transform/PassManagerBuilder.h - PMB C Interface ---*- C -*-===*\
|* *|
-|* The LLVM Compiler Infrastructure *|
-|* *|
-|* This file is distributed under the University of Illinois Open Source *|
-|* License. See LICENSE.TXT for details. *|
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM *|
+|* Exceptions. *|
+|* See https://llvm.org/LICENSE.txt for license information. *|
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception *|
|* *|
|*===----------------------------------------------------------------------===*|
|* *|
diff --git a/include/llvm-c/Transforms/Scalar.h b/include/llvm-c/Transforms/Scalar.h
index 3c3bb4eb9b82..031cf98b2df2 100644
--- a/include/llvm-c/Transforms/Scalar.h
+++ b/include/llvm-c/Transforms/Scalar.h
@@ -1,9 +1,9 @@
/*===-- Scalar.h - Scalar Transformation Library C Interface ----*- C++ -*-===*\
|* *|
-|* The LLVM Compiler Infrastructure *|
-|* *|
-|* This file is distributed under the University of Illinois Open Source *|
-|* License. See LICENSE.TXT for details. *|
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM *|
+|* Exceptions. *|
+|* See https://llvm.org/LICENSE.txt for license information. *|
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception *|
|* *|
|*===----------------------------------------------------------------------===*|
|* *|
diff --git a/include/llvm-c/Transforms/Utils.h b/include/llvm-c/Transforms/Utils.h
index f171f7fbbe3e..63594abfa460 100644
--- a/include/llvm-c/Transforms/Utils.h
+++ b/include/llvm-c/Transforms/Utils.h
@@ -1,9 +1,9 @@
/*===-- Utils.h - Transformation Utils Library C Interface ------*- C++ -*-===*\
|* *|
-|* The LLVM Compiler Infrastructure *|
-|* *|
-|* This file is distributed under the University of Illinois Open Source *|
-|* License. See LICENSE.TXT for details. *|
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM *|
+|* Exceptions. *|
+|* See https://llvm.org/LICENSE.txt for license information. *|
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception *|
|* *|
|*===----------------------------------------------------------------------===*|
|* *|
@@ -38,6 +38,9 @@ void LLVMAddLowerSwitchPass(LLVMPassManagerRef PM);
/** See llvm::createPromoteMemoryToRegisterPass function. */
void LLVMAddPromoteMemoryToRegisterPass(LLVMPassManagerRef PM);
+/** See llvm::createAddDiscriminatorsPass function. */
+void LLVMAddAddDiscriminatorsPass(LLVMPassManagerRef PM);
+
/**
* @}
*/
diff --git a/include/llvm-c/Transforms/Vectorize.h b/include/llvm-c/Transforms/Vectorize.h
index e3f9961acfb1..e383481fe4f4 100644
--- a/include/llvm-c/Transforms/Vectorize.h
+++ b/include/llvm-c/Transforms/Vectorize.h
@@ -1,10 +1,10 @@
/*===---------------------------Vectorize.h --------------------- -*- C -*-===*\
|*===----------- Vectorization Transformation Library C Interface ---------===*|
|* *|
-|* The LLVM Compiler Infrastructure *|
-|* *|
-|* This file is distributed under the University of Illinois Open Source *|
-|* License. See LICENSE.TXT for details. *|
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM *|
+|* Exceptions. *|
+|* See https://llvm.org/LICENSE.txt for license information. *|
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception *|
|* *|
|*===----------------------------------------------------------------------===*|
|* *|
diff --git a/include/llvm-c/Types.h b/include/llvm-c/Types.h
index ce1acf3e0421..612c7d3eff32 100644
--- a/include/llvm-c/Types.h
+++ b/include/llvm-c/Types.h
@@ -1,9 +1,9 @@
/*===-- llvm-c/Support.h - C Interface Types declarations ---------*- C -*-===*\
|* *|
-|* The LLVM Compiler Infrastructure *|
-|* *|
-|* This file is distributed under the University of Illinois Open Source *|
-|* License. See LICENSE.TXT for details. *|
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM *|
+|* Exceptions. *|
+|* See https://llvm.org/LICENSE.txt for license information. *|
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception *|
|* *|
|*===----------------------------------------------------------------------===*|
|* *|
@@ -164,6 +164,11 @@ typedef struct LLVMOpaqueModuleFlagEntry LLVMModuleFlagEntry;
typedef struct LLVMOpaqueJITEventListener *LLVMJITEventListenerRef;
/**
+ * @see llvm::object::Binary
+ */
+typedef struct LLVMOpaqueBinary *LLVMBinaryRef;
+
+/**
* @}
*/
diff --git a/include/llvm-c/lto.h b/include/llvm-c/lto.h
index 090cd34af4e9..2467722b1954 100644
--- a/include/llvm-c/lto.h
+++ b/include/llvm-c/lto.h
@@ -1,9 +1,9 @@
/*===-- llvm-c/lto.h - LTO Public C Interface ---------------------*- C -*-===*\
|* *|
-|* The LLVM Compiler Infrastructure *|
-|* *|
-|* This file is distributed under the University of Illinois Open Source *|
-|* License. See LICENSE.TXT for details. *|
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM *|
+|* Exceptions. *|
+|* See https://llvm.org/LICENSE.txt for license information. *|
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception *|
|* *|
|*===----------------------------------------------------------------------===*|
|* *|
@@ -44,7 +44,7 @@ typedef bool lto_bool_t;
* @{
*/
-#define LTO_API_VERSION 23
+#define LTO_API_VERSION 24
/**
* \since prior to LTO_API_VERSION=3
@@ -846,7 +846,47 @@ thinlto_codegen_set_cache_size_megabytes(thinlto_code_gen_t cg,
extern void thinlto_codegen_set_cache_size_files(thinlto_code_gen_t cg,
unsigned max_size_files);
-
+/** Opaque reference to an LTO input file */
+typedef struct LLVMOpaqueLTOInput *lto_input_t;
+
+/**
+ * Creates an LTO input file from a buffer. The path
+ * argument is used for diagnotics as this function
+ * otherwise does not know which file the given buffer
+ * is associated with.
+ *
+ * \since LTO_API_VERSION=24
+ */
+extern lto_input_t lto_input_create(const void *buffer,
+ size_t buffer_size,
+ const char *path);
+
+/**
+ * Frees all memory internally allocated by the LTO input file.
+ * Upon return the lto_module_t is no longer valid.
+ *
+ * \since LTO_API_VERSION=24
+ */
+extern void lto_input_dispose(lto_input_t input);
+
+/**
+ * Returns the number of dependent library specifiers
+ * for the given LTO input file.
+ *
+ * \since LTO_API_VERSION=24
+ */
+extern unsigned lto_input_get_num_dependent_libraries(lto_input_t input);
+
+/**
+ * Returns the ith dependent library specifier
+ * for the given LTO input file. The returned
+ * string is not null-terminated.
+ *
+ * \since LTO_API_VERSION=24
+ */
+extern const char * lto_input_get_dependent_library(lto_input_t input,
+ size_t index,
+ size_t *size);
/**
* @} // endgroup LLVMCTLTO_CACHING
diff --git a/include/llvm/ADT/APFloat.h b/include/llvm/ADT/APFloat.h
index c6fa5ad674f6..a9648d35cf5d 100644
--- a/include/llvm/ADT/APFloat.h
+++ b/include/llvm/ADT/APFloat.h
@@ -1,9 +1,8 @@
//===- llvm/ADT/APFloat.h - Arbitrary Precision Floating Point ---*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -148,6 +147,17 @@ struct APFloatBase {
/// \name Floating Point Semantics.
/// @{
+ enum Semantics {
+ S_IEEEhalf,
+ S_IEEEsingle,
+ S_IEEEdouble,
+ S_x87DoubleExtended,
+ S_IEEEquad,
+ S_PPCDoubleDouble
+ };
+
+ static const llvm::fltSemantics &EnumToSemantics(Semantics S);
+ static Semantics SemanticsToEnum(const llvm::fltSemantics &Sem);
static const fltSemantics &IEEEhalf() LLVM_READNONE;
static const fltSemantics &IEEEsingle() LLVM_READNONE;
diff --git a/include/llvm/ADT/APInt.h b/include/llvm/ADT/APInt.h
index 6e106ff8bf5d..2381b75e08b1 100644
--- a/include/llvm/ADT/APInt.h
+++ b/include/llvm/ADT/APInt.h
@@ -1,9 +1,8 @@
//===-- llvm/ADT/APInt.h - For Arbitrary Precision Integer -----*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -2213,6 +2212,15 @@ Optional<APInt> SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
// See friend declaration above. This additional declaration is required in
// order to compile LLVM with IBM xlC compiler.
hash_code hash_value(const APInt &Arg);
-} // End of llvm namespace
+
+/// StoreIntToMemory - Fills the StoreBytes bytes of memory starting from Dst
+/// with the integer held in IntVal.
+void StoreIntToMemory(const APInt &IntVal, uint8_t *Dst, unsigned StoreBytes);
+
+/// LoadIntFromMemory - Loads the integer stored in the LoadBytes bytes starting
+/// from Src into IntVal, which is assumed to be wide enough and to hold zero.
+void LoadIntFromMemory(APInt &IntVal, uint8_t *Src, unsigned LoadBytes);
+
+} // namespace llvm
#endif
diff --git a/include/llvm/ADT/APSInt.h b/include/llvm/ADT/APSInt.h
index 7ee2c4c62fce..0f991826c457 100644
--- a/include/llvm/ADT/APSInt.h
+++ b/include/llvm/ADT/APSInt.h
@@ -1,9 +1,8 @@
//===-- llvm/ADT/APSInt.h - Arbitrary Precision Signed Int -----*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -43,6 +42,24 @@ public:
/// \param Str the string to be interpreted.
explicit APSInt(StringRef Str);
+ /// Determine sign of this APSInt.
+ ///
+ /// \returns true if this APSInt is negative, false otherwise
+ bool isNegative() const { return isSigned() && APInt::isNegative(); }
+
+ /// Determine if this APSInt Value is non-negative (>= 0)
+ ///
+ /// \returns true if this APSInt is non-negative, false otherwise
+ bool isNonNegative() const { return !isNegative(); }
+
+ /// Determine if this APSInt Value is positive.
+ ///
+ /// This tests if the value of this APSInt is positive (> 0). Note
+ /// that 0 is not a positive value.
+ ///
+ /// \returns true if this APSInt is positive.
+ bool isStrictlyPositive() const { return isNonNegative() && !isNullValue(); }
+
APSInt &operator=(APInt RHS) {
// Retain our current sign.
APInt::operator=(std::move(RHS));
diff --git a/include/llvm/ADT/AllocatorList.h b/include/llvm/ADT/AllocatorList.h
index 178c6742a87b..405a2e4264df 100644
--- a/include/llvm/ADT/AllocatorList.h
+++ b/include/llvm/ADT/AllocatorList.h
@@ -1,9 +1,8 @@
//===- llvm/ADT/AllocatorList.h - Custom allocator list ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/ADT/Any.h b/include/llvm/ADT/Any.h
index 7faa4c963d3d..5dcd6e73c54f 100644
--- a/include/llvm/ADT/Any.h
+++ b/include/llvm/ADT/Any.h
@@ -1,9 +1,8 @@
//===- Any.h - Generic type erased holder of any type -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/ADT/ArrayRef.h b/include/llvm/ADT/ArrayRef.h
index 9cb25b09c6cb..773c88f7c9f9 100644
--- a/include/llvm/ADT/ArrayRef.h
+++ b/include/llvm/ADT/ArrayRef.h
@@ -1,9 +1,8 @@
//===- ArrayRef.h - Array Reference Wrapper ---------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -431,7 +430,7 @@ namespace llvm {
std::copy(Data.begin(), Data.end(), this->begin());
}
- OwningArrayRef(OwningArrayRef &&Other) { *this = Other; }
+ OwningArrayRef(OwningArrayRef &&Other) { *this = std::move(Other); }
OwningArrayRef &operator=(OwningArrayRef &&Other) {
delete[] this->data();
@@ -526,12 +525,6 @@ namespace llvm {
/// @}
- // ArrayRefs can be treated like a POD type.
- template <typename T> struct isPodLike;
- template <typename T> struct isPodLike<ArrayRef<T>> {
- static const bool value = true;
- };
-
template <typename T> hash_code hash_value(ArrayRef<T> S) {
return hash_combine_range(S.begin(), S.end());
}
diff --git a/include/llvm/ADT/BitVector.h b/include/llvm/ADT/BitVector.h
index 9ab1da7c6913..fabf5d9cd348 100644
--- a/include/llvm/ADT/BitVector.h
+++ b/include/llvm/ADT/BitVector.h
@@ -1,9 +1,8 @@
//===- llvm/ADT/BitVector.h - Bit vectors -----------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/ADT/BitmaskEnum.h b/include/llvm/ADT/BitmaskEnum.h
index 18c6ba5a3eb8..1a18bc721b21 100644
--- a/include/llvm/ADT/BitmaskEnum.h
+++ b/include/llvm/ADT/BitmaskEnum.h
@@ -1,9 +1,8 @@
//===-- llvm/ADT/BitmaskEnum.h ----------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/ADT/BreadthFirstIterator.h b/include/llvm/ADT/BreadthFirstIterator.h
index 6bc63c283b09..e97d76680db8 100644
--- a/include/llvm/ADT/BreadthFirstIterator.h
+++ b/include/llvm/ADT/BreadthFirstIterator.h
@@ -1,9 +1,8 @@
//===- llvm/ADT/BreadthFirstIterator.h - Breadth First iterator -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -125,7 +124,7 @@ public:
const NodeRef &operator*() const { return VisitQueue.front()->first; }
- // This is a nonstandard operator-> that dereferenfces the pointer an extra
+ // This is a nonstandard operator-> that dereferences the pointer an extra
// time so that you can actually call methods on the node, because the
// contained type is a pointer.
NodeRef operator->() const { return **this; }
diff --git a/include/llvm/ADT/CachedHashString.h b/include/llvm/ADT/CachedHashString.h
index d8f0e7afdd49..80144fb87e0e 100644
--- a/include/llvm/ADT/CachedHashString.h
+++ b/include/llvm/ADT/CachedHashString.h
@@ -1,9 +1,8 @@
//===- llvm/ADT/CachedHashString.h - Prehashed string/StringRef -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/ADT/DAGDeltaAlgorithm.h b/include/llvm/ADT/DAGDeltaAlgorithm.h
index 41fdd43efb8a..d4cdc3c86048 100644
--- a/include/llvm/ADT/DAGDeltaAlgorithm.h
+++ b/include/llvm/ADT/DAGDeltaAlgorithm.h
@@ -1,9 +1,8 @@
//===- DAGDeltaAlgorithm.h - A DAG Minimization Algorithm ------*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//===----------------------------------------------------------------------===//
#ifndef LLVM_ADT_DAGDELTAALGORITHM_H
diff --git a/include/llvm/ADT/DeltaAlgorithm.h b/include/llvm/ADT/DeltaAlgorithm.h
index 6becb2a60104..114b95499530 100644
--- a/include/llvm/ADT/DeltaAlgorithm.h
+++ b/include/llvm/ADT/DeltaAlgorithm.h
@@ -1,9 +1,8 @@
//===- DeltaAlgorithm.h - A Set Minimization Algorithm ---------*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//===----------------------------------------------------------------------===//
#ifndef LLVM_ADT_DELTAALGORITHM_H
diff --git a/include/llvm/ADT/DenseMap.h b/include/llvm/ADT/DenseMap.h
index 1f50502fff92..a05cf8130d3c 100644
--- a/include/llvm/ADT/DenseMap.h
+++ b/include/llvm/ADT/DenseMap.h
@@ -1,9 +1,8 @@
//===- llvm/ADT/DenseMap.h - Dense probed hash table ------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -64,7 +63,7 @@ struct DenseMapPair : public std::pair<KeyT, ValueT> {
template <typename AltPairT>
DenseMapPair(AltPairT &&AltPair,
typename std::enable_if<std::is_convertible<
- AltPairT, std::pair<KeyT, ValueT>>::value>::type * = 0)
+ AltPairT, std::pair<KeyT, ValueT>>::value>::type * = nullptr)
: std::pair<KeyT, ValueT>(std::forward<AltPairT>(AltPair)) {}
KeyT &getFirst() { return std::pair<KeyT, ValueT>::first; }
@@ -146,7 +145,8 @@ public:
}
const KeyT EmptyKey = getEmptyKey(), TombstoneKey = getTombstoneKey();
- if (isPodLike<KeyT>::value && isPodLike<ValueT>::value) {
+ if (is_trivially_copyable<KeyT>::value &&
+ is_trivially_copyable<ValueT>::value) {
// Use a simpler loop when these are trivial types.
for (BucketT *P = getBuckets(), *E = getBucketsEnd(); P != E; ++P)
P->getFirst() = EmptyKey;
@@ -422,7 +422,8 @@ protected:
setNumEntries(other.getNumEntries());
setNumTombstones(other.getNumTombstones());
- if (isPodLike<KeyT>::value && isPodLike<ValueT>::value)
+ if (is_trivially_copyable<KeyT>::value &&
+ is_trivially_copyable<ValueT>::value)
memcpy(reinterpret_cast<void *>(getBuckets()), other.getBuckets(),
getNumBuckets() * sizeof(BucketT));
else
diff --git a/include/llvm/ADT/DenseMapInfo.h b/include/llvm/ADT/DenseMapInfo.h
index 5d12b424fb37..5ef6f3ad1b04 100644
--- a/include/llvm/ADT/DenseMapInfo.h
+++ b/include/llvm/ADT/DenseMapInfo.h
@@ -1,9 +1,8 @@
//===- llvm/ADT/DenseMapInfo.h - Type traits for DenseMap -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -18,6 +17,7 @@
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/PointerLikeTypeTraits.h"
+#include "llvm/Support/ScalableSize.h"
#include <cassert>
#include <cstddef>
#include <cstdint>
@@ -269,6 +269,21 @@ template <> struct DenseMapInfo<hash_code> {
static bool isEqual(hash_code LHS, hash_code RHS) { return LHS == RHS; }
};
+template <> struct DenseMapInfo<ElementCount> {
+ static inline ElementCount getEmptyKey() { return {~0U, true}; }
+ static inline ElementCount getTombstoneKey() { return {~0U - 1, false}; }
+ static unsigned getHashValue(const ElementCount& EltCnt) {
+ if (EltCnt.Scalable)
+ return (EltCnt.Min * 37U) - 1U;
+
+ return EltCnt.Min * 37U;
+ }
+
+ static bool isEqual(const ElementCount& LHS, const ElementCount& RHS) {
+ return LHS == RHS;
+ }
+};
+
} // end namespace llvm
#endif // LLVM_ADT_DENSEMAPINFO_H
diff --git a/include/llvm/ADT/DenseSet.h b/include/llvm/ADT/DenseSet.h
index e85a38587e41..9afb715ae1db 100644
--- a/include/llvm/ADT/DenseSet.h
+++ b/include/llvm/ADT/DenseSet.h
@@ -1,9 +1,8 @@
//===- llvm/ADT/DenseSet.h - Dense probed hash table ------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -131,7 +130,7 @@ public:
class ConstIterator {
typename MapTy::const_iterator I;
- friend class DenseSet;
+ friend class DenseSetImpl;
friend class Iterator;
public:
diff --git a/include/llvm/ADT/DepthFirstIterator.h b/include/llvm/ADT/DepthFirstIterator.h
index 1f3766d3c9de..11967f5eefcc 100644
--- a/include/llvm/ADT/DepthFirstIterator.h
+++ b/include/llvm/ADT/DepthFirstIterator.h
@@ -1,9 +1,8 @@
//===- llvm/ADT/DepthFirstIterator.h - Depth First iterator -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/ADT/EpochTracker.h b/include/llvm/ADT/EpochTracker.h
index 49ef192364e8..a782b4756898 100644
--- a/include/llvm/ADT/EpochTracker.h
+++ b/include/llvm/ADT/EpochTracker.h
@@ -1,9 +1,8 @@
//===- llvm/ADT/EpochTracker.h - ADT epoch tracking --------------*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/ADT/EquivalenceClasses.h b/include/llvm/ADT/EquivalenceClasses.h
index e3f48433c69f..2cb7108c0794 100644
--- a/include/llvm/ADT/EquivalenceClasses.h
+++ b/include/llvm/ADT/EquivalenceClasses.h
@@ -1,9 +1,8 @@
//===- llvm/ADT/EquivalenceClasses.h - Generic Equiv. Classes ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/ADT/FoldingSet.h b/include/llvm/ADT/FoldingSet.h
index e363e69d032a..d5837e51bcfc 100644
--- a/include/llvm/ADT/FoldingSet.h
+++ b/include/llvm/ADT/FoldingSet.h
@@ -1,9 +1,8 @@
//===- llvm/ADT/FoldingSet.h - Uniquing Hash Set ----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/ADT/FunctionExtras.h b/include/llvm/ADT/FunctionExtras.h
index 2b75dc6ac219..121aa527a5da 100644
--- a/include/llvm/ADT/FunctionExtras.h
+++ b/include/llvm/ADT/FunctionExtras.h
@@ -1,9 +1,8 @@
//===- FunctionExtras.h - Function type erasure utilities -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/include/llvm/ADT/GraphTraits.h b/include/llvm/ADT/GraphTraits.h
index d39b50fdc488..3ce91225d80d 100644
--- a/include/llvm/ADT/GraphTraits.h
+++ b/include/llvm/ADT/GraphTraits.h
@@ -1,9 +1,8 @@
//===- llvm/ADT/GraphTraits.h - Graph traits template -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/ADT/Hashing.h b/include/llvm/ADT/Hashing.h
index 9175c545b7c9..008188bfa210 100644
--- a/include/llvm/ADT/Hashing.h
+++ b/include/llvm/ADT/Hashing.h
@@ -1,9 +1,8 @@
//===-- llvm/ADT/Hashing.h - Utilities for hashing --------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -192,7 +191,7 @@ inline uint64_t hash_1to3_bytes(const char *s, size_t len, uint64_t seed) {
uint8_t b = s[len >> 1];
uint8_t c = s[len - 1];
uint32_t y = static_cast<uint32_t>(a) + (static_cast<uint32_t>(b) << 8);
- uint32_t z = len + (static_cast<uint32_t>(c) << 2);
+ uint32_t z = static_cast<uint32_t>(len) + (static_cast<uint32_t>(c) << 2);
return shift_mix(y * k2 ^ z * k3 ^ seed) * k2;
}
diff --git a/include/llvm/ADT/ImmutableList.h b/include/llvm/ADT/ImmutableList.h
index 0541dc2566ed..c9ee494734e7 100644
--- a/include/llvm/ADT/ImmutableList.h
+++ b/include/llvm/ADT/ImmutableList.h
@@ -1,9 +1,8 @@
//==--- ImmutableList.h - Immutable (functional) list interface --*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -242,10 +241,6 @@ template<typename T> struct DenseMapInfo<ImmutableList<T>> {
}
};
-template <typename T> struct isPodLike;
-template <typename T>
-struct isPodLike<ImmutableList<T>> { static const bool value = true; };
-
} // end namespace llvm
#endif // LLVM_ADT_IMMUTABLELIST_H
diff --git a/include/llvm/ADT/ImmutableMap.h b/include/llvm/ADT/ImmutableMap.h
index cbc27ff17ccf..86fd7fefaec3 100644
--- a/include/llvm/ADT/ImmutableMap.h
+++ b/include/llvm/ADT/ImmutableMap.h
@@ -1,9 +1,8 @@
//===--- ImmutableMap.h - Immutable (functional) map interface --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/ADT/ImmutableSet.h b/include/llvm/ADT/ImmutableSet.h
index b1d5f4ac42e4..587105431533 100644
--- a/include/llvm/ADT/ImmutableSet.h
+++ b/include/llvm/ADT/ImmutableSet.h
@@ -1,9 +1,8 @@
//===--- ImmutableSet.h - Immutable (functional) set interface --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/ADT/IndexedMap.h b/include/llvm/ADT/IndexedMap.h
index 2ee80d2cde63..b44f16b91d76 100644
--- a/include/llvm/ADT/IndexedMap.h
+++ b/include/llvm/ADT/IndexedMap.h
@@ -1,9 +1,8 @@
//===- llvm/ADT/IndexedMap.h - An index map implementation ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/ADT/IntEqClasses.h b/include/llvm/ADT/IntEqClasses.h
index 0baee2f11a79..08f46a3079ef 100644
--- a/include/llvm/ADT/IntEqClasses.h
+++ b/include/llvm/ADT/IntEqClasses.h
@@ -1,9 +1,8 @@
//===-- llvm/ADT/IntEqClasses.h - Equiv. Classes of Integers ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/ADT/IntervalMap.h b/include/llvm/ADT/IntervalMap.h
index 2af61049e5af..12828c4cfdab 100644
--- a/include/llvm/ADT/IntervalMap.h
+++ b/include/llvm/ADT/IntervalMap.h
@@ -1,9 +1,8 @@
//===- llvm/ADT/IntervalMap.h - A sorted interval map -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/ADT/IntrusiveRefCntPtr.h b/include/llvm/ADT/IntrusiveRefCntPtr.h
index 430ef86afbd9..6d97fe15db8b 100644
--- a/include/llvm/ADT/IntrusiveRefCntPtr.h
+++ b/include/llvm/ADT/IntrusiveRefCntPtr.h
@@ -1,9 +1,8 @@
//==- llvm/ADT/IntrusiveRefCntPtr.h - Smart Refcounting Pointer --*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/ADT/MapVector.h b/include/llvm/ADT/MapVector.h
index 47b4987f210a..1de1124f4ea2 100644
--- a/include/llvm/ADT/MapVector.h
+++ b/include/llvm/ADT/MapVector.h
@@ -1,9 +1,8 @@
//===- llvm/ADT/MapVector.h - Map w/ deterministic value order --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/ADT/None.h b/include/llvm/ADT/None.h
index 4b6bc1e005b5..004ca0ac50ac 100644
--- a/include/llvm/ADT/None.h
+++ b/include/llvm/ADT/None.h
@@ -1,9 +1,8 @@
//===-- None.h - Simple null value for implicit construction ------*- C++ -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/ADT/Optional.h b/include/llvm/ADT/Optional.h
index 76937d632ae1..b45a74002e10 100644
--- a/include/llvm/ADT/Optional.h
+++ b/include/llvm/ADT/Optional.h
@@ -1,9 +1,8 @@
//===- Optional.h - Simple variant for passing optional values --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -17,94 +16,197 @@
#define LLVM_ADT_OPTIONAL_H
#include "llvm/ADT/None.h"
-#include "llvm/Support/AlignOf.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/type_traits.h"
-#include <algorithm>
#include <cassert>
+#include <memory>
#include <new>
#include <utility>
namespace llvm {
+class raw_ostream;
+
namespace optional_detail {
+
+struct in_place_t {};
+
/// Storage for any type.
-template <typename T, bool = isPodLike<T>::value> struct OptionalStorage {
- AlignedCharArrayUnion<T> storage;
- bool hasVal = false;
+template <typename T, bool = is_trivially_copyable<T>::value>
+class OptionalStorage {
+ union {
+ char empty;
+ T value;
+ };
+ bool hasVal;
- OptionalStorage() = default;
+public:
+ ~OptionalStorage() { reset(); }
- OptionalStorage(const T &y) : hasVal(true) { new (storage.buffer) T(y); }
- OptionalStorage(const OptionalStorage &O) : hasVal(O.hasVal) {
- if (hasVal)
- new (storage.buffer) T(*O.getPointer());
+ OptionalStorage() noexcept : empty(), hasVal(false) {}
+
+ OptionalStorage(OptionalStorage const &other) : OptionalStorage() {
+ if (other.hasValue()) {
+ emplace(other.value);
+ }
}
- OptionalStorage(T &&y) : hasVal(true) {
- new (storage.buffer) T(std::forward<T>(y));
+ OptionalStorage(OptionalStorage &&other) : OptionalStorage() {
+ if (other.hasValue()) {
+ emplace(std::move(other.value));
+ }
}
- OptionalStorage(OptionalStorage &&O) : hasVal(O.hasVal) {
- if (O.hasVal) {
- new (storage.buffer) T(std::move(*O.getPointer()));
+
+ template <class... Args>
+ explicit OptionalStorage(in_place_t, Args &&... args)
+ : value(std::forward<Args>(args)...), hasVal(true) {}
+
+ void reset() noexcept {
+ if (hasVal) {
+ value.~T();
+ hasVal = false;
}
}
- OptionalStorage &operator=(T &&y) {
- if (hasVal)
- *getPointer() = std::move(y);
- else {
- new (storage.buffer) T(std::move(y));
+ bool hasValue() const noexcept { return hasVal; }
+
+ T &getValue() LLVM_LVALUE_FUNCTION noexcept {
+ assert(hasVal);
+ return value;
+ }
+ T const &getValue() const LLVM_LVALUE_FUNCTION noexcept {
+ assert(hasVal);
+ return value;
+ }
+#if LLVM_HAS_RVALUE_REFERENCE_THIS
+ T &&getValue() && noexcept {
+ assert(hasVal);
+ return std::move(value);
+ }
+#endif
+
+ template <class... Args> void emplace(Args &&... args) {
+ reset();
+ ::new ((void *)std::addressof(value)) T(std::forward<Args>(args)...);
+ hasVal = true;
+ }
+
+ OptionalStorage &operator=(T const &y) {
+ if (hasValue()) {
+ value = y;
+ } else {
+ ::new ((void *)std::addressof(value)) T(y);
hasVal = true;
}
return *this;
}
- OptionalStorage &operator=(OptionalStorage &&O) {
- if (!O.hasVal)
- reset();
- else {
- *this = std::move(*O.getPointer());
+ OptionalStorage &operator=(T &&y) {
+ if (hasValue()) {
+ value = std::move(y);
+ } else {
+ ::new ((void *)std::addressof(value)) T(std::move(y));
+ hasVal = true;
}
return *this;
}
- // FIXME: these assignments (& the equivalent const T&/const Optional& ctors)
- // could be made more efficient by passing by value, possibly unifying them
- // with the rvalue versions above - but this could place a different set of
- // requirements (notably: the existence of a default ctor) when implemented
- // in that way. Careful SFINAE to avoid such pitfalls would be required.
- OptionalStorage &operator=(const T &y) {
- if (hasVal)
- *getPointer() = y;
- else {
- new (storage.buffer) T(y);
- hasVal = true;
+ OptionalStorage &operator=(OptionalStorage const &other) {
+ if (other.hasValue()) {
+ if (hasValue()) {
+ value = other.value;
+ } else {
+ ::new ((void *)std::addressof(value)) T(other.value);
+ hasVal = true;
+ }
+ } else {
+ reset();
}
return *this;
}
- OptionalStorage &operator=(const OptionalStorage &O) {
- if (!O.hasVal)
+
+ OptionalStorage &operator=(OptionalStorage &&other) {
+ if (other.hasValue()) {
+ if (hasValue()) {
+ value = std::move(other.value);
+ } else {
+ ::new ((void *)std::addressof(value)) T(std::move(other.value));
+ hasVal = true;
+ }
+ } else {
reset();
- else
- *this = *O.getPointer();
+ }
return *this;
}
+};
- ~OptionalStorage() { reset(); }
+template <typename T> class OptionalStorage<T, true> {
+ union {
+ char empty;
+ T value;
+ };
+ bool hasVal = false;
+
+public:
+ ~OptionalStorage() = default;
+
+ OptionalStorage() noexcept : empty{} {}
+
+ OptionalStorage(OptionalStorage const &other) = default;
+ OptionalStorage(OptionalStorage &&other) = default;
+
+ OptionalStorage &operator=(OptionalStorage const &other) = default;
+ OptionalStorage &operator=(OptionalStorage &&other) = default;
+
+ template <class... Args>
+ explicit OptionalStorage(in_place_t, Args &&... args)
+ : value(std::forward<Args>(args)...), hasVal(true) {}
- void reset() {
+ void reset() noexcept {
if (hasVal) {
- (*getPointer()).~T();
+ value.~T();
hasVal = false;
}
}
- T *getPointer() {
+ bool hasValue() const noexcept { return hasVal; }
+
+ T &getValue() LLVM_LVALUE_FUNCTION noexcept {
assert(hasVal);
- return reinterpret_cast<T *>(storage.buffer);
+ return value;
}
- const T *getPointer() const {
+ T const &getValue() const LLVM_LVALUE_FUNCTION noexcept {
assert(hasVal);
- return reinterpret_cast<const T *>(storage.buffer);
+ return value;
+ }
+#if LLVM_HAS_RVALUE_REFERENCE_THIS
+ T &&getValue() && noexcept {
+ assert(hasVal);
+ return std::move(value);
+ }
+#endif
+
+ template <class... Args> void emplace(Args &&... args) {
+ reset();
+ ::new ((void *)std::addressof(value)) T(std::forward<Args>(args)...);
+ hasVal = true;
+ }
+
+ OptionalStorage &operator=(T const &y) {
+ if (hasValue()) {
+ value = y;
+ } else {
+ ::new ((void *)std::addressof(value)) T(y);
+ hasVal = true;
+ }
+ return *this;
+ }
+ OptionalStorage &operator=(T &&y) {
+ if (hasValue()) {
+ value = std::move(y);
+ } else {
+ ::new ((void *)std::addressof(value)) T(std::move(y));
+ hasVal = true;
+ }
+ return *this;
}
};
@@ -119,10 +221,10 @@ public:
constexpr Optional() {}
constexpr Optional(NoneType) {}
- Optional(const T &y) : Storage(y) {}
+ Optional(const T &y) : Storage(optional_detail::in_place_t{}, y) {}
Optional(const Optional &O) = default;
- Optional(T &&y) : Storage(std::forward<T>(y)) {}
+ Optional(T &&y) : Storage(optional_detail::in_place_t{}, std::move(y)) {}
Optional(Optional &&O) = default;
Optional &operator=(T &&y) {
@@ -133,9 +235,7 @@ public:
/// Create a new object by constructing it in place with the given arguments.
template <typename... ArgTypes> void emplace(ArgTypes &&... Args) {
- reset();
- Storage.hasVal = true;
- new (getPointer()) T(std::forward<ArgTypes>(Args)...);
+ Storage.emplace(std::forward<ArgTypes>(Args)...);
}
static inline Optional create(const T *y) {
@@ -150,23 +250,17 @@ public:
void reset() { Storage.reset(); }
- const T *getPointer() const {
- assert(Storage.hasVal);
- return reinterpret_cast<const T *>(Storage.storage.buffer);
- }
- T *getPointer() {
- assert(Storage.hasVal);
- return reinterpret_cast<T *>(Storage.storage.buffer);
- }
- const T &getValue() const LLVM_LVALUE_FUNCTION { return *getPointer(); }
- T &getValue() LLVM_LVALUE_FUNCTION { return *getPointer(); }
+ const T *getPointer() const { return &Storage.getValue(); }
+ T *getPointer() { return &Storage.getValue(); }
+ const T &getValue() const LLVM_LVALUE_FUNCTION { return Storage.getValue(); }
+ T &getValue() LLVM_LVALUE_FUNCTION { return Storage.getValue(); }
- explicit operator bool() const { return Storage.hasVal; }
- bool hasValue() const { return Storage.hasVal; }
+ explicit operator bool() const { return hasValue(); }
+ bool hasValue() const { return Storage.hasValue(); }
const T *operator->() const { return getPointer(); }
T *operator->() { return getPointer(); }
- const T &operator*() const LLVM_LVALUE_FUNCTION { return *getPointer(); }
- T &operator*() LLVM_LVALUE_FUNCTION { return *getPointer(); }
+ const T &operator*() const LLVM_LVALUE_FUNCTION { return getValue(); }
+ T &operator*() LLVM_LVALUE_FUNCTION { return getValue(); }
template <typename U>
constexpr T getValueOr(U &&value) const LLVM_LVALUE_FUNCTION {
@@ -174,8 +268,8 @@ public:
}
#if LLVM_HAS_RVALUE_REFERENCE_THIS
- T &&getValue() && { return std::move(*getPointer()); }
- T &&operator*() && { return std::move(*getPointer()); }
+ T &&getValue() && { return std::move(Storage.getValue()); }
+ T &&operator*() && { return std::move(Storage.getValue()); }
template <typename U>
T getValueOr(U &&value) && {
@@ -184,11 +278,6 @@ public:
#endif
};
-template <typename T> struct isPodLike<Optional<T>> {
- // An Optional<T> is pod-like if T is.
- static const bool value = isPodLike<T>::value;
-};
-
template <typename T, typename U>
bool operator==(const Optional<T> &X, const Optional<U> &Y) {
if (X && Y)
@@ -323,6 +412,18 @@ template <typename T> bool operator>=(const T &X, const Optional<T> &Y) {
return !(X < Y);
}
+raw_ostream &operator<<(raw_ostream &OS, NoneType);
+
+template <typename T, typename = decltype(std::declval<raw_ostream &>()
+ << std::declval<const T &>())>
+raw_ostream &operator<<(raw_ostream &OS, const Optional<T> &O) {
+ if (O)
+ OS << *O;
+ else
+ OS << None;
+ return OS;
+}
+
} // end namespace llvm
#endif // LLVM_ADT_OPTIONAL_H
diff --git a/include/llvm/ADT/PackedVector.h b/include/llvm/ADT/PackedVector.h
index 3d53c49536d0..ae7f8cc85743 100644
--- a/include/llvm/ADT/PackedVector.h
+++ b/include/llvm/ADT/PackedVector.h
@@ -1,9 +1,8 @@
//===- llvm/ADT/PackedVector.h - Packed values vector -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/ADT/PointerEmbeddedInt.h b/include/llvm/ADT/PointerEmbeddedInt.h
index ab4e1048a5bc..3eb6edb03430 100644
--- a/include/llvm/ADT/PointerEmbeddedInt.h
+++ b/include/llvm/ADT/PointerEmbeddedInt.h
@@ -1,9 +1,8 @@
//===- llvm/ADT/PointerEmbeddedInt.h ----------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/ADT/PointerIntPair.h b/include/llvm/ADT/PointerIntPair.h
index 6d1b53a90ad2..24a2bb67a36e 100644
--- a/include/llvm/ADT/PointerIntPair.h
+++ b/include/llvm/ADT/PointerIntPair.h
@@ -1,9 +1,8 @@
//===- llvm/ADT/PointerIntPair.h - Pair for pointer and int -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -15,6 +14,7 @@
#define LLVM_ADT_POINTERINTPAIR_H
#include "llvm/Support/PointerLikeTypeTraits.h"
+#include "llvm/Support/type_traits.h"
#include <cassert>
#include <cstdint>
#include <limits>
@@ -126,6 +126,19 @@ public:
}
};
+// Specialize is_trivially_copyable to avoid limitation of llvm::is_trivially_copyable
+// when compiled with gcc 4.9.
+template <typename PointerTy, unsigned IntBits, typename IntType,
+ typename PtrTraits,
+ typename Info>
+struct is_trivially_copyable<PointerIntPair<PointerTy, IntBits, IntType, PtrTraits, Info>> : std::true_type {
+#ifdef HAVE_STD_IS_TRIVIALLY_COPYABLE
+ static_assert(std::is_trivially_copyable<PointerIntPair<PointerTy, IntBits, IntType, PtrTraits, Info>>::value,
+ "inconsistent behavior between llvm:: and std:: implementation of is_trivially_copyable");
+#endif
+};
+
+
template <typename PointerT, unsigned IntBits, typename PtrTraits>
struct PointerIntPairInfo {
static_assert(PtrTraits::NumLowBitsAvailable <
@@ -176,12 +189,6 @@ struct PointerIntPairInfo {
}
};
-template <typename T> struct isPodLike;
-template <typename PointerTy, unsigned IntBits, typename IntType>
-struct isPodLike<PointerIntPair<PointerTy, IntBits, IntType>> {
- static const bool value = true;
-};
-
// Provide specialization of DenseMapInfo for PointerIntPair.
template <typename PointerTy, unsigned IntBits, typename IntType>
struct DenseMapInfo<PointerIntPair<PointerTy, IntBits, IntType>> {
diff --git a/include/llvm/ADT/PointerSumType.h b/include/llvm/ADT/PointerSumType.h
index a19e45a46218..d467f83f58ac 100644
--- a/include/llvm/ADT/PointerSumType.h
+++ b/include/llvm/ADT/PointerSumType.h
@@ -1,9 +1,8 @@
//===- llvm/ADT/PointerSumType.h --------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/ADT/PointerUnion.h b/include/llvm/ADT/PointerUnion.h
index 315e58336cba..2bcdf546c6e4 100644
--- a/include/llvm/ADT/PointerUnion.h
+++ b/include/llvm/ADT/PointerUnion.h
@@ -1,9 +1,8 @@
//===- llvm/ADT/PointerUnion.h - Discriminated Union of 2 Ptrs --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -54,22 +53,98 @@ struct PointerUnionTypeSelectorReturn<
typename PointerUnionTypeSelector<T1, T2, RET_EQ, RET_NE>::Return;
};
-/// Provide PointerLikeTypeTraits for void* that is used by PointerUnion
-/// for the two template arguments.
-template <typename PT1, typename PT2> class PointerUnionUIntTraits {
-public:
- static inline void *getAsVoidPointer(void *P) { return P; }
- static inline void *getFromVoidPointer(void *P) { return P; }
+namespace pointer_union_detail {
+ constexpr int constexprMin(int a, int b) { return a < b ? a : b; }
+ /// Determine the number of bits required to store integers with values < n.
+ /// This is ceil(log2(n)).
+ constexpr int bitsRequired(unsigned n) {
+ return n > 1 ? 1 + bitsRequired((n + 1) / 2) : 0;
+ }
+
+ // FIXME: In C++14, replace this with
+ // std::min({PointerLikeTypeTraits<Ts>::NumLowBitsAvailable...})
+ template <typename T> constexpr int lowBitsAvailable() {
+ return PointerLikeTypeTraits<T>::NumLowBitsAvailable;
+ }
+ template <typename T1, typename T2, typename... Ts>
+ constexpr int lowBitsAvailable() {
+ return constexprMin(lowBitsAvailable<T1>(), lowBitsAvailable<T2, Ts...>());
+ }
- enum {
- PT1BitsAv = (int)(PointerLikeTypeTraits<PT1>::NumLowBitsAvailable),
- PT2BitsAv = (int)(PointerLikeTypeTraits<PT2>::NumLowBitsAvailable),
- NumLowBitsAvailable = PT1BitsAv < PT2BitsAv ? PT1BitsAv : PT2BitsAv
+ /// Find the index of a type in a list of types. TypeIndex<T, Us...>::Index
+ /// is the index of T in Us, or sizeof...(Us) if T does not appear in the
+ /// list.
+ template <typename T, typename ...Us> struct TypeIndex;
+ template <typename T, typename ...Us> struct TypeIndex<T, T, Us...> {
+ static constexpr int Index = 0;
};
-};
+ template <typename T, typename U, typename... Us>
+ struct TypeIndex<T, U, Us...> {
+ static constexpr int Index = 1 + TypeIndex<T, Us...>::Index;
+ };
+ template <typename T> struct TypeIndex<T> {
+ static constexpr int Index = 0;
+ };
+
+ /// Find the first type in a list of types.
+ template <typename T, typename...> struct GetFirstType {
+ using type = T;
+ };
+
+ /// Provide PointerLikeTypeTraits for void* that is used by PointerUnion
+ /// for the template arguments.
+ template <typename ...PTs> class PointerUnionUIntTraits {
+ public:
+ static inline void *getAsVoidPointer(void *P) { return P; }
+ static inline void *getFromVoidPointer(void *P) { return P; }
+ static constexpr int NumLowBitsAvailable = lowBitsAvailable<PTs...>();
+ };
+
+ /// Implement assigment in terms of construction.
+ template <typename Derived, typename T> struct AssignableFrom {
+ Derived &operator=(T t) {
+ return static_cast<Derived &>(*this) = Derived(t);
+ }
+ };
+
+ template <typename Derived, typename ValTy, int I, typename ...Types>
+ class PointerUnionMembers;
-/// A discriminated union of two pointer types, with the discriminator in the
-/// low bit of the pointer.
+ template <typename Derived, typename ValTy, int I>
+ class PointerUnionMembers<Derived, ValTy, I> {
+ protected:
+ ValTy Val;
+ PointerUnionMembers() = default;
+ PointerUnionMembers(ValTy Val) : Val(Val) {}
+
+ friend struct PointerLikeTypeTraits<Derived>;
+ };
+
+ template <typename Derived, typename ValTy, int I, typename Type,
+ typename ...Types>
+ class PointerUnionMembers<Derived, ValTy, I, Type, Types...>
+ : public PointerUnionMembers<Derived, ValTy, I + 1, Types...> {
+ using Base = PointerUnionMembers<Derived, ValTy, I + 1, Types...>;
+ public:
+ using Base::Base;
+ PointerUnionMembers() = default;
+ PointerUnionMembers(Type V)
+ : Base(ValTy(const_cast<void *>(
+ PointerLikeTypeTraits<Type>::getAsVoidPointer(V)),
+ I)) {}
+
+ using Base::operator=;
+ Derived &operator=(Type V) {
+ this->Val = ValTy(
+ const_cast<void *>(PointerLikeTypeTraits<Type>::getAsVoidPointer(V)),
+ I);
+ return static_cast<Derived &>(*this);
+ };
+ };
+}
+
+/// A discriminated union of two or more pointer types, with the discriminator
+/// in the low bit of the pointer.
///
/// This implementation is extremely efficient in space due to leveraging the
/// low bits of the pointer, while exposing a natural and type-safe API.
@@ -84,49 +159,44 @@ public:
/// P = (float*)0;
/// Y = P.get<float*>(); // ok.
/// X = P.get<int*>(); // runtime assertion failure.
-template <typename PT1, typename PT2> class PointerUnion {
-public:
- using ValTy =
- PointerIntPair<void *, 1, bool, PointerUnionUIntTraits<PT1, PT2>>;
-
-private:
- ValTy Val;
-
- struct IsPT1 {
- static const int Num = 0;
- };
- struct IsPT2 {
- static const int Num = 1;
- };
- template <typename T> struct UNION_DOESNT_CONTAIN_TYPE {};
+template <typename... PTs>
+class PointerUnion
+ : public pointer_union_detail::PointerUnionMembers<
+ PointerUnion<PTs...>,
+ PointerIntPair<
+ void *, pointer_union_detail::bitsRequired(sizeof...(PTs)), int,
+ pointer_union_detail::PointerUnionUIntTraits<PTs...>>,
+ 0, PTs...> {
+ // The first type is special in some ways, but we don't want PointerUnion to
+ // be a 'template <typename First, typename ...Rest>' because it's much more
+ // convenient to have a name for the whole pack. So split off the first type
+ // here.
+ using First = typename pointer_union_detail::GetFirstType<PTs...>::type;
+ using Base = typename PointerUnion::PointerUnionMembers;
public:
PointerUnion() = default;
- PointerUnion(PT1 V)
- : Val(const_cast<void *>(
- PointerLikeTypeTraits<PT1>::getAsVoidPointer(V))) {}
- PointerUnion(PT2 V)
- : Val(const_cast<void *>(PointerLikeTypeTraits<PT2>::getAsVoidPointer(V)),
- 1) {}
+
+ PointerUnion(std::nullptr_t) : PointerUnion() {}
+ using Base::Base;
/// Test if the pointer held in the union is null, regardless of
/// which type it is.
bool isNull() const {
// Convert from the void* to one of the pointer types, to make sure that
// we recursively strip off low bits if we have a nested PointerUnion.
- return !PointerLikeTypeTraits<PT1>::getFromVoidPointer(Val.getPointer());
+ return !PointerLikeTypeTraits<First>::getFromVoidPointer(
+ this->Val.getPointer());
}
explicit operator bool() const { return !isNull(); }
/// Test if the Union currently holds the type matching T.
template <typename T> int is() const {
- using Ty = typename ::llvm::PointerUnionTypeSelector<
- PT1, T, IsPT1,
- ::llvm::PointerUnionTypeSelector<PT2, T, IsPT2,
- UNION_DOESNT_CONTAIN_TYPE<T>>>::Return;
- int TyNo = Ty::Num;
- return static_cast<int>(Val.getInt()) == TyNo;
+ constexpr int Index = pointer_union_detail::TypeIndex<T, PTs...>::Index;
+ static_assert(Index < sizeof...(PTs),
+ "PointerUnion::is<T> given type not in the union");
+ return this->Val.getInt() == Index;
}
/// Returns the value of the specified pointer type.
@@ -134,7 +204,7 @@ public:
/// If the specified pointer type is incorrect, assert.
template <typename T> T get() const {
assert(is<T>() && "Invalid accessor called");
- return PointerLikeTypeTraits<T>::getFromVoidPointer(Val.getPointer());
+ return PointerLikeTypeTraits<T>::getFromVoidPointer(this->Val.getPointer());
}
/// Returns the current pointer if it is of the specified pointer type,
@@ -147,342 +217,100 @@ public:
/// If the union is set to the first pointer type get an address pointing to
/// it.
- PT1 const *getAddrOfPtr1() const {
+ First const *getAddrOfPtr1() const {
return const_cast<PointerUnion *>(this)->getAddrOfPtr1();
}
/// If the union is set to the first pointer type get an address pointing to
/// it.
- PT1 *getAddrOfPtr1() {
- assert(is<PT1>() && "Val is not the first pointer");
+ First *getAddrOfPtr1() {
+ assert(is<First>() && "Val is not the first pointer");
assert(
- get<PT1>() == Val.getPointer() &&
+ get<First>() == this->Val.getPointer() &&
"Can't get the address because PointerLikeTypeTraits changes the ptr");
- return const_cast<PT1 *>(
- reinterpret_cast<const PT1 *>(Val.getAddrOfPointer()));
+ return const_cast<First *>(
+ reinterpret_cast<const First *>(this->Val.getAddrOfPointer()));
}
/// Assignment from nullptr which just clears the union.
const PointerUnion &operator=(std::nullptr_t) {
- Val.initWithPointer(nullptr);
+ this->Val.initWithPointer(nullptr);
return *this;
}
- /// Assignment operators - Allow assigning into this union from either
- /// pointer type, setting the discriminator to remember what it came from.
- const PointerUnion &operator=(const PT1 &RHS) {
- Val.initWithPointer(
- const_cast<void *>(PointerLikeTypeTraits<PT1>::getAsVoidPointer(RHS)));
- return *this;
- }
- const PointerUnion &operator=(const PT2 &RHS) {
- Val.setPointerAndInt(
- const_cast<void *>(PointerLikeTypeTraits<PT2>::getAsVoidPointer(RHS)),
- 1);
- return *this;
- }
+ /// Assignment from elements of the union.
+ using Base::operator=;
- void *getOpaqueValue() const { return Val.getOpaqueValue(); }
+ void *getOpaqueValue() const { return this->Val.getOpaqueValue(); }
static inline PointerUnion getFromOpaqueValue(void *VP) {
PointerUnion V;
- V.Val = ValTy::getFromOpaqueValue(VP);
+ V.Val = decltype(V.Val)::getFromOpaqueValue(VP);
return V;
}
};
-template <typename PT1, typename PT2>
-bool operator==(PointerUnion<PT1, PT2> lhs, PointerUnion<PT1, PT2> rhs) {
+template <typename ...PTs>
+bool operator==(PointerUnion<PTs...> lhs, PointerUnion<PTs...> rhs) {
return lhs.getOpaqueValue() == rhs.getOpaqueValue();
}
-template <typename PT1, typename PT2>
-bool operator!=(PointerUnion<PT1, PT2> lhs, PointerUnion<PT1, PT2> rhs) {
+template <typename ...PTs>
+bool operator!=(PointerUnion<PTs...> lhs, PointerUnion<PTs...> rhs) {
return lhs.getOpaqueValue() != rhs.getOpaqueValue();
}
-template <typename PT1, typename PT2>
-bool operator<(PointerUnion<PT1, PT2> lhs, PointerUnion<PT1, PT2> rhs) {
+template <typename ...PTs>
+bool operator<(PointerUnion<PTs...> lhs, PointerUnion<PTs...> rhs) {
return lhs.getOpaqueValue() < rhs.getOpaqueValue();
}
// Teach SmallPtrSet that PointerUnion is "basically a pointer", that has
// # low bits available = min(PT1bits,PT2bits)-1.
-template <typename PT1, typename PT2>
-struct PointerLikeTypeTraits<PointerUnion<PT1, PT2>> {
- static inline void *getAsVoidPointer(const PointerUnion<PT1, PT2> &P) {
+template <typename ...PTs>
+struct PointerLikeTypeTraits<PointerUnion<PTs...>> {
+ static inline void *getAsVoidPointer(const PointerUnion<PTs...> &P) {
return P.getOpaqueValue();
}
- static inline PointerUnion<PT1, PT2> getFromVoidPointer(void *P) {
- return PointerUnion<PT1, PT2>::getFromOpaqueValue(P);
+ static inline PointerUnion<PTs...> getFromVoidPointer(void *P) {
+ return PointerUnion<PTs...>::getFromOpaqueValue(P);
}
- // The number of bits available are the min of the two pointer types.
- enum {
- NumLowBitsAvailable = PointerLikeTypeTraits<
- typename PointerUnion<PT1, PT2>::ValTy>::NumLowBitsAvailable
- };
+ // The number of bits available are the min of the pointer types minus the
+ // bits needed for the discriminator.
+ static constexpr int NumLowBitsAvailable = PointerLikeTypeTraits<decltype(
+ PointerUnion<PTs...>::Val)>::NumLowBitsAvailable;
};
/// A pointer union of three pointer types. See documentation for PointerUnion
/// for usage.
-template <typename PT1, typename PT2, typename PT3> class PointerUnion3 {
-public:
- using InnerUnion = PointerUnion<PT1, PT2>;
- using ValTy = PointerUnion<InnerUnion, PT3>;
-
-private:
- ValTy Val;
-
- struct IsInnerUnion {
- ValTy Val;
-
- IsInnerUnion(ValTy val) : Val(val) {}
-
- template <typename T> int is() const {
- return Val.template is<InnerUnion>() &&
- Val.template get<InnerUnion>().template is<T>();
- }
-
- template <typename T> T get() const {
- return Val.template get<InnerUnion>().template get<T>();
- }
- };
-
- struct IsPT3 {
- ValTy Val;
-
- IsPT3(ValTy val) : Val(val) {}
-
- template <typename T> int is() const { return Val.template is<T>(); }
- template <typename T> T get() const { return Val.template get<T>(); }
- };
-
-public:
- PointerUnion3() = default;
- PointerUnion3(PT1 V) { Val = InnerUnion(V); }
- PointerUnion3(PT2 V) { Val = InnerUnion(V); }
- PointerUnion3(PT3 V) { Val = V; }
-
- /// Test if the pointer held in the union is null, regardless of
- /// which type it is.
- bool isNull() const { return Val.isNull(); }
- explicit operator bool() const { return !isNull(); }
-
- /// Test if the Union currently holds the type matching T.
- template <typename T> int is() const {
- // If T is PT1/PT2 choose IsInnerUnion otherwise choose IsPT3.
- using Ty = typename ::llvm::PointerUnionTypeSelector<
- PT1, T, IsInnerUnion,
- ::llvm::PointerUnionTypeSelector<PT2, T, IsInnerUnion, IsPT3>>::Return;
- return Ty(Val).template is<T>();
- }
-
- /// Returns the value of the specified pointer type.
- ///
- /// If the specified pointer type is incorrect, assert.
- template <typename T> T get() const {
- assert(is<T>() && "Invalid accessor called");
- // If T is PT1/PT2 choose IsInnerUnion otherwise choose IsPT3.
- using Ty = typename ::llvm::PointerUnionTypeSelector<
- PT1, T, IsInnerUnion,
- ::llvm::PointerUnionTypeSelector<PT2, T, IsInnerUnion, IsPT3>>::Return;
- return Ty(Val).template get<T>();
- }
-
- /// Returns the current pointer if it is of the specified pointer type,
- /// otherwises returns null.
- template <typename T> T dyn_cast() const {
- if (is<T>())
- return get<T>();
- return T();
- }
-
- /// Assignment from nullptr which just clears the union.
- const PointerUnion3 &operator=(std::nullptr_t) {
- Val = nullptr;
- return *this;
- }
-
- /// Assignment operators - Allow assigning into this union from either
- /// pointer type, setting the discriminator to remember what it came from.
- const PointerUnion3 &operator=(const PT1 &RHS) {
- Val = InnerUnion(RHS);
- return *this;
- }
- const PointerUnion3 &operator=(const PT2 &RHS) {
- Val = InnerUnion(RHS);
- return *this;
- }
- const PointerUnion3 &operator=(const PT3 &RHS) {
- Val = RHS;
- return *this;
- }
-
- void *getOpaqueValue() const { return Val.getOpaqueValue(); }
- static inline PointerUnion3 getFromOpaqueValue(void *VP) {
- PointerUnion3 V;
- V.Val = ValTy::getFromOpaqueValue(VP);
- return V;
- }
-};
-
-// Teach SmallPtrSet that PointerUnion3 is "basically a pointer", that has
-// # low bits available = min(PT1bits,PT2bits,PT2bits)-2.
template <typename PT1, typename PT2, typename PT3>
-struct PointerLikeTypeTraits<PointerUnion3<PT1, PT2, PT3>> {
- static inline void *getAsVoidPointer(const PointerUnion3<PT1, PT2, PT3> &P) {
- return P.getOpaqueValue();
- }
-
- static inline PointerUnion3<PT1, PT2, PT3> getFromVoidPointer(void *P) {
- return PointerUnion3<PT1, PT2, PT3>::getFromOpaqueValue(P);
- }
-
- // The number of bits available are the min of the two pointer types.
- enum {
- NumLowBitsAvailable = PointerLikeTypeTraits<
- typename PointerUnion3<PT1, PT2, PT3>::ValTy>::NumLowBitsAvailable
- };
-};
-
-template <typename PT1, typename PT2, typename PT3>
-bool operator<(PointerUnion3<PT1, PT2, PT3> lhs,
- PointerUnion3<PT1, PT2, PT3> rhs) {
- return lhs.getOpaqueValue() < rhs.getOpaqueValue();
-}
+using PointerUnion3 = PointerUnion<PT1, PT2, PT3>;
/// A pointer union of four pointer types. See documentation for PointerUnion
/// for usage.
template <typename PT1, typename PT2, typename PT3, typename PT4>
-class PointerUnion4 {
-public:
- using InnerUnion1 = PointerUnion<PT1, PT2>;
- using InnerUnion2 = PointerUnion<PT3, PT4>;
- using ValTy = PointerUnion<InnerUnion1, InnerUnion2>;
-
-private:
- ValTy Val;
-
-public:
- PointerUnion4() = default;
- PointerUnion4(PT1 V) { Val = InnerUnion1(V); }
- PointerUnion4(PT2 V) { Val = InnerUnion1(V); }
- PointerUnion4(PT3 V) { Val = InnerUnion2(V); }
- PointerUnion4(PT4 V) { Val = InnerUnion2(V); }
-
- /// Test if the pointer held in the union is null, regardless of
- /// which type it is.
- bool isNull() const { return Val.isNull(); }
- explicit operator bool() const { return !isNull(); }
-
- /// Test if the Union currently holds the type matching T.
- template <typename T> int is() const {
- // If T is PT1/PT2 choose InnerUnion1 otherwise choose InnerUnion2.
- using Ty = typename ::llvm::PointerUnionTypeSelector<
- PT1, T, InnerUnion1,
- ::llvm::PointerUnionTypeSelector<PT2, T, InnerUnion1,
- InnerUnion2>>::Return;
- return Val.template is<Ty>() && Val.template get<Ty>().template is<T>();
- }
-
- /// Returns the value of the specified pointer type.
- ///
- /// If the specified pointer type is incorrect, assert.
- template <typename T> T get() const {
- assert(is<T>() && "Invalid accessor called");
- // If T is PT1/PT2 choose InnerUnion1 otherwise choose InnerUnion2.
- using Ty = typename ::llvm::PointerUnionTypeSelector<
- PT1, T, InnerUnion1,
- ::llvm::PointerUnionTypeSelector<PT2, T, InnerUnion1,
- InnerUnion2>>::Return;
- return Val.template get<Ty>().template get<T>();
- }
-
- /// Returns the current pointer if it is of the specified pointer type,
- /// otherwises returns null.
- template <typename T> T dyn_cast() const {
- if (is<T>())
- return get<T>();
- return T();
- }
-
- /// Assignment from nullptr which just clears the union.
- const PointerUnion4 &operator=(std::nullptr_t) {
- Val = nullptr;
- return *this;
- }
-
- /// Assignment operators - Allow assigning into this union from either
- /// pointer type, setting the discriminator to remember what it came from.
- const PointerUnion4 &operator=(const PT1 &RHS) {
- Val = InnerUnion1(RHS);
- return *this;
- }
- const PointerUnion4 &operator=(const PT2 &RHS) {
- Val = InnerUnion1(RHS);
- return *this;
- }
- const PointerUnion4 &operator=(const PT3 &RHS) {
- Val = InnerUnion2(RHS);
- return *this;
- }
- const PointerUnion4 &operator=(const PT4 &RHS) {
- Val = InnerUnion2(RHS);
- return *this;
- }
-
- void *getOpaqueValue() const { return Val.getOpaqueValue(); }
- static inline PointerUnion4 getFromOpaqueValue(void *VP) {
- PointerUnion4 V;
- V.Val = ValTy::getFromOpaqueValue(VP);
- return V;
- }
-};
-
-// Teach SmallPtrSet that PointerUnion4 is "basically a pointer", that has
-// # low bits available = min(PT1bits,PT2bits,PT2bits)-2.
-template <typename PT1, typename PT2, typename PT3, typename PT4>
-struct PointerLikeTypeTraits<PointerUnion4<PT1, PT2, PT3, PT4>> {
- static inline void *
- getAsVoidPointer(const PointerUnion4<PT1, PT2, PT3, PT4> &P) {
- return P.getOpaqueValue();
- }
-
- static inline PointerUnion4<PT1, PT2, PT3, PT4> getFromVoidPointer(void *P) {
- return PointerUnion4<PT1, PT2, PT3, PT4>::getFromOpaqueValue(P);
- }
-
- // The number of bits available are the min of the two pointer types.
- enum {
- NumLowBitsAvailable = PointerLikeTypeTraits<
- typename PointerUnion4<PT1, PT2, PT3, PT4>::ValTy>::NumLowBitsAvailable
- };
-};
+using PointerUnion4 = PointerUnion<PT1, PT2, PT3, PT4>;
// Teach DenseMap how to use PointerUnions as keys.
-template <typename T, typename U> struct DenseMapInfo<PointerUnion<T, U>> {
- using Pair = PointerUnion<T, U>;
- using FirstInfo = DenseMapInfo<T>;
- using SecondInfo = DenseMapInfo<U>;
+template <typename ...PTs> struct DenseMapInfo<PointerUnion<PTs...>> {
+ using Union = PointerUnion<PTs...>;
+ using FirstInfo =
+ DenseMapInfo<typename pointer_union_detail::GetFirstType<PTs...>::type>;
- static inline Pair getEmptyKey() { return Pair(FirstInfo::getEmptyKey()); }
+ static inline Union getEmptyKey() { return Union(FirstInfo::getEmptyKey()); }
- static inline Pair getTombstoneKey() {
- return Pair(FirstInfo::getTombstoneKey());
+ static inline Union getTombstoneKey() {
+ return Union(FirstInfo::getTombstoneKey());
}
- static unsigned getHashValue(const Pair &PairVal) {
- intptr_t key = (intptr_t)PairVal.getOpaqueValue();
+ static unsigned getHashValue(const Union &UnionVal) {
+ intptr_t key = (intptr_t)UnionVal.getOpaqueValue();
return DenseMapInfo<intptr_t>::getHashValue(key);
}
- static bool isEqual(const Pair &LHS, const Pair &RHS) {
- return LHS.template is<T>() == RHS.template is<T>() &&
- (LHS.template is<T>() ? FirstInfo::isEqual(LHS.template get<T>(),
- RHS.template get<T>())
- : SecondInfo::isEqual(LHS.template get<U>(),
- RHS.template get<U>()));
+ static bool isEqual(const Union &LHS, const Union &RHS) {
+ return LHS == RHS;
}
};
diff --git a/include/llvm/ADT/PostOrderIterator.h b/include/llvm/ADT/PostOrderIterator.h
index d77b12228cb1..2fe7447a8e77 100644
--- a/include/llvm/ADT/PostOrderIterator.h
+++ b/include/llvm/ADT/PostOrderIterator.h
@@ -1,9 +1,8 @@
//===- llvm/ADT/PostOrderIterator.h - PostOrder iterator --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/ADT/PriorityQueue.h b/include/llvm/ADT/PriorityQueue.h
index 8ba871e25304..cf79ee10ba7f 100644
--- a/include/llvm/ADT/PriorityQueue.h
+++ b/include/llvm/ADT/PriorityQueue.h
@@ -1,9 +1,8 @@
//===- llvm/ADT/PriorityQueue.h - Priority queues ---------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/ADT/PriorityWorklist.h b/include/llvm/ADT/PriorityWorklist.h
index aa531f3337d9..96d22c87557e 100644
--- a/include/llvm/ADT/PriorityWorklist.h
+++ b/include/llvm/ADT/PriorityWorklist.h
@@ -1,9 +1,8 @@
//===- PriorityWorklist.h - Worklist with insertion priority ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/include/llvm/ADT/SCCIterator.h b/include/llvm/ADT/SCCIterator.h
index ab1dc4613be0..eb1a5d0938cf 100644
--- a/include/llvm/ADT/SCCIterator.h
+++ b/include/llvm/ADT/SCCIterator.h
@@ -1,9 +1,8 @@
//===- ADT/SCCIterator.h - Strongly Connected Comp. Iter. -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/include/llvm/ADT/STLExtras.h b/include/llvm/ADT/STLExtras.h
index f66ca7c08a73..81dce0168c79 100644
--- a/include/llvm/ADT/STLExtras.h
+++ b/include/llvm/ADT/STLExtras.h
@@ -1,9 +1,8 @@
//===- llvm/ADT/STLExtras.h - Useful STL related functions ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -241,6 +240,13 @@ inline mapped_iterator<ItTy, FuncTy> map_iterator(ItTy I, FuncTy F) {
return mapped_iterator<ItTy, FuncTy>(std::move(I), std::move(F));
}
+template <class ContainerTy, class FuncTy>
+auto map_range(ContainerTy &&C, FuncTy F)
+ -> decltype(make_range(map_iterator(C.begin(), F),
+ map_iterator(C.end(), F))) {
+ return make_range(map_iterator(C.begin(), F), map_iterator(C.end(), F));
+}
+
/// Helper to determine if type T has a member called rbegin().
template <typename Ty> class has_rbegin_impl {
using yes = char[1];
@@ -1278,29 +1284,52 @@ auto partition(R &&Range, UnaryPredicate P) -> decltype(adl_begin(Range)) {
/// Provide wrappers to std::lower_bound which take ranges instead of having to
/// pass begin/end explicitly.
-template <typename R, typename ForwardIt>
-auto lower_bound(R &&Range, ForwardIt I) -> decltype(adl_begin(Range)) {
- return std::lower_bound(adl_begin(Range), adl_end(Range), I);
+template <typename R, typename T>
+auto lower_bound(R &&Range, T &&Value) -> decltype(adl_begin(Range)) {
+ return std::lower_bound(adl_begin(Range), adl_end(Range),
+ std::forward<T>(Value));
}
-template <typename R, typename ForwardIt, typename Compare>
-auto lower_bound(R &&Range, ForwardIt I, Compare C)
+template <typename R, typename T, typename Compare>
+auto lower_bound(R &&Range, T &&Value, Compare C)
-> decltype(adl_begin(Range)) {
- return std::lower_bound(adl_begin(Range), adl_end(Range), I, C);
+ return std::lower_bound(adl_begin(Range), adl_end(Range),
+ std::forward<T>(Value), C);
}
/// Provide wrappers to std::upper_bound which take ranges instead of having to
/// pass begin/end explicitly.
-template <typename R, typename ForwardIt>
-auto upper_bound(R &&Range, ForwardIt I) -> decltype(adl_begin(Range)) {
- return std::upper_bound(adl_begin(Range), adl_end(Range), I);
+template <typename R, typename T>
+auto upper_bound(R &&Range, T &&Value) -> decltype(adl_begin(Range)) {
+ return std::upper_bound(adl_begin(Range), adl_end(Range),
+ std::forward<T>(Value));
}
-template <typename R, typename ForwardIt, typename Compare>
-auto upper_bound(R &&Range, ForwardIt I, Compare C)
+template <typename R, typename T, typename Compare>
+auto upper_bound(R &&Range, T &&Value, Compare C)
-> decltype(adl_begin(Range)) {
- return std::upper_bound(adl_begin(Range), adl_end(Range), I, C);
+ return std::upper_bound(adl_begin(Range), adl_end(Range),
+ std::forward<T>(Value), C);
+}
+
+template <typename R>
+void stable_sort(R &&Range) {
+ std::stable_sort(adl_begin(Range), adl_end(Range));
+}
+
+template <typename R, typename Compare>
+void stable_sort(R &&Range, Compare C) {
+ std::stable_sort(adl_begin(Range), adl_end(Range), C);
+}
+
+/// Binary search for the first iterator in a range where a predicate is false.
+/// Requires that C is always true below some limit, and always false above it.
+template <typename R, typename Predicate,
+ typename Val = decltype(*adl_begin(std::declval<R>()))>
+auto partition_point(R &&Range, Predicate P) -> decltype(adl_begin(Range)) {
+ return std::partition_point(adl_begin(Range), adl_end(Range), P);
}
+
/// Wrapper function around std::equal to detect if all elements
/// in a container are same.
template <typename R>
@@ -1331,6 +1360,33 @@ void erase_if(Container &C, UnaryPredicate P) {
C.erase(remove_if(C, P), C.end());
}
+/// Given a sequence container Cont, replace the range [ContIt, ContEnd) with
+/// the range [ValIt, ValEnd) (which is not from the same container).
+template<typename Container, typename RandomAccessIterator>
+void replace(Container &Cont, typename Container::iterator ContIt,
+ typename Container::iterator ContEnd, RandomAccessIterator ValIt,
+ RandomAccessIterator ValEnd) {
+ while (true) {
+ if (ValIt == ValEnd) {
+ Cont.erase(ContIt, ContEnd);
+ return;
+ } else if (ContIt == ContEnd) {
+ Cont.insert(ContIt, ValIt, ValEnd);
+ return;
+ }
+ *ContIt++ = *ValIt++;
+ }
+}
+
+/// Given a sequence container Cont, replace the range [ContIt, ContEnd) with
+/// the range R.
+template<typename Container, typename Range = std::initializer_list<
+ typename Container::value_type>>
+void replace(Container &Cont, typename Container::iterator ContIt,
+ typename Container::iterator ContEnd, Range R) {
+ replace(Cont, ContIt, ContEnd, R.begin(), R.end());
+}
+
//===----------------------------------------------------------------------===//
// Extra additions to <memory>
//===----------------------------------------------------------------------===//
@@ -1418,6 +1474,9 @@ namespace detail {
template <typename R> class enumerator_iter;
template <typename R> struct result_pair {
+ using value_reference =
+ typename std::iterator_traits<IterOfRange<R>>::reference;
+
friend class enumerator_iter<R>;
result_pair() = default;
@@ -1431,8 +1490,8 @@ template <typename R> struct result_pair {
}
std::size_t index() const { return Index; }
- const ValueOfRange<R> &value() const { return *Iter; }
- ValueOfRange<R> &value() { return *Iter; }
+ const value_reference value() const { return *Iter; }
+ value_reference value() { return *Iter; }
private:
std::size_t Index = std::numeric_limits<std::size_t>::max();
@@ -1577,6 +1636,19 @@ bool hasNItemsOrMore(
return true;
}
+/// Returns a raw pointer that represents the same address as the argument.
+///
+/// The late bound return should be removed once we move to C++14 to better
+/// align with the C++20 declaration. Also, this implementation can be removed
+/// once we move to C++20 where it's defined as std::to_addres()
+///
+/// The std::pointer_traits<>::to_address(p) variations of these overloads has
+/// not been implemented.
+template <class Ptr> auto to_address(const Ptr &P) -> decltype(P.operator->()) {
+ return P.operator->();
+}
+template <class T> constexpr T *to_address(T *P) { return P; }
+
} // end namespace llvm
#endif // LLVM_ADT_STLEXTRAS_H
diff --git a/include/llvm/ADT/ScopeExit.h b/include/llvm/ADT/ScopeExit.h
index bd13755fa999..712d91237739 100644
--- a/include/llvm/ADT/ScopeExit.h
+++ b/include/llvm/ADT/ScopeExit.h
@@ -1,9 +1,8 @@
//===- llvm/ADT/ScopeExit.h - Execute code at scope exit --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/ADT/ScopedHashTable.h b/include/llvm/ADT/ScopedHashTable.h
index 22b0c1bdaf4d..40c49ebc0be1 100644
--- a/include/llvm/ADT/ScopedHashTable.h
+++ b/include/llvm/ADT/ScopedHashTable.h
@@ -1,9 +1,8 @@
//===- ScopedHashTable.h - A simple scoped hash table -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/ADT/Sequence.h b/include/llvm/ADT/Sequence.h
index 3d4a897bf9a9..8c505f2010dd 100644
--- a/include/llvm/ADT/Sequence.h
+++ b/include/llvm/ADT/Sequence.h
@@ -1,9 +1,8 @@
//===- Sequence.h - Utility for producing sequences of values ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/include/llvm/ADT/SetOperations.h b/include/llvm/ADT/SetOperations.h
index 7c9f2fbe066e..037256a860b2 100644
--- a/include/llvm/ADT/SetOperations.h
+++ b/include/llvm/ADT/SetOperations.h
@@ -1,9 +1,8 @@
//===-- llvm/ADT/SetOperations.h - Generic Set Operations -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/ADT/SetVector.h b/include/llvm/ADT/SetVector.h
index 3d6781041320..d0a0d28d1c81 100644
--- a/include/llvm/ADT/SetVector.h
+++ b/include/llvm/ADT/SetVector.h
@@ -1,9 +1,8 @@
//===- llvm/ADT/SetVector.h - Set with insert order iteration ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/ADT/SmallBitVector.h b/include/llvm/ADT/SmallBitVector.h
index 0a73dbd60671..742450e6a951 100644
--- a/include/llvm/ADT/SmallBitVector.h
+++ b/include/llvm/ADT/SmallBitVector.h
@@ -1,9 +1,8 @@
//===- llvm/ADT/SmallBitVector.h - 'Normally small' bit vectors -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/ADT/SmallPtrSet.h b/include/llvm/ADT/SmallPtrSet.h
index db08e40257ba..913518230d2d 100644
--- a/include/llvm/ADT/SmallPtrSet.h
+++ b/include/llvm/ADT/SmallPtrSet.h
@@ -1,9 +1,8 @@
//===- llvm/ADT/SmallPtrSet.h - 'Normally small' pointer set ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/ADT/SmallSet.h b/include/llvm/ADT/SmallSet.h
index 5d84627714bc..6b128c2e2992 100644
--- a/include/llvm/ADT/SmallSet.h
+++ b/include/llvm/ADT/SmallSet.h
@@ -1,9 +1,8 @@
//===- llvm/ADT/SmallSet.h - 'Normally small' sets --------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/ADT/SmallString.h b/include/llvm/ADT/SmallString.h
index ff46e85ccb09..898be80d0324 100644
--- a/include/llvm/ADT/SmallString.h
+++ b/include/llvm/ADT/SmallString.h
@@ -1,9 +1,8 @@
//===- llvm/ADT/SmallString.h - 'Normally small' strings --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/ADT/SmallVector.h b/include/llvm/ADT/SmallVector.h
index 0636abbb1fbf..17586904d212 100644
--- a/include/llvm/ADT/SmallVector.h
+++ b/include/llvm/ADT/SmallVector.h
@@ -1,9 +1,8 @@
//===- llvm/ADT/SmallVector.h - 'Normally small' vectors --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -42,8 +41,8 @@ protected:
unsigned Size = 0, Capacity;
SmallVectorBase() = delete;
- SmallVectorBase(void *FirstEl, size_t Capacity)
- : BeginX(FirstEl), Capacity(Capacity) {}
+ SmallVectorBase(void *FirstEl, size_t TotalCapacity)
+ : BeginX(FirstEl), Capacity(TotalCapacity) {}
/// This is an implementation of the grow() method which only works
/// on POD-like data types and is out of line to reduce code duplication.
@@ -64,9 +63,9 @@ public:
/// of the buffer when they know that more elements are available, and only
/// update the size later. This avoids the cost of value initializing elements
/// which will only be overwritten.
- void set_size(size_t Size) {
- assert(Size <= capacity());
- this->Size = Size;
+ void set_size(size_t N) {
+ assert(N <= capacity());
+ Size = N;
}
};
@@ -125,13 +124,9 @@ public:
using const_pointer = const T *;
// forward iterator creation methods.
- LLVM_ATTRIBUTE_ALWAYS_INLINE
iterator begin() { return (iterator)this->BeginX; }
- LLVM_ATTRIBUTE_ALWAYS_INLINE
const_iterator begin() const { return (const_iterator)this->BeginX; }
- LLVM_ATTRIBUTE_ALWAYS_INLINE
iterator end() { return begin() + size(); }
- LLVM_ATTRIBUTE_ALWAYS_INLINE
const_iterator end() const { return begin() + size(); }
// reverse iterator creation methods.
@@ -150,12 +145,10 @@ public:
/// Return a pointer to the vector's buffer, even if empty().
const_pointer data() const { return const_pointer(begin()); }
- LLVM_ATTRIBUTE_ALWAYS_INLINE
reference operator[](size_type idx) {
assert(idx < size());
return begin()[idx];
}
- LLVM_ATTRIBUTE_ALWAYS_INLINE
const_reference operator[](size_type idx) const {
assert(idx < size());
return begin()[idx];
@@ -180,9 +173,9 @@ public:
}
};
-/// SmallVectorTemplateBase<isPodLike = false> - This is where we put method
+/// SmallVectorTemplateBase<TriviallyCopyable = false> - This is where we put method
/// implementations that are designed to work with non-POD-like T's.
-template <typename T, bool = isPodLike<T>::value>
+template <typename T, bool = is_trivially_copyable<T>::value>
class SmallVectorTemplateBase : public SmallVectorTemplateCommon<T> {
protected:
SmallVectorTemplateBase(size_t Size) : SmallVectorTemplateCommon<T>(Size) {}
@@ -236,8 +229,8 @@ public:
};
// Define this out-of-line to dissuade the C++ compiler from inlining it.
-template <typename T, bool isPodLike>
-void SmallVectorTemplateBase<T, isPodLike>::grow(size_t MinSize) {
+template <typename T, bool TriviallyCopyable>
+void SmallVectorTemplateBase<T, TriviallyCopyable>::grow(size_t MinSize) {
if (MinSize > UINT32_MAX)
report_bad_alloc_error("SmallVector capacity overflow during allocation");
@@ -260,9 +253,8 @@ void SmallVectorTemplateBase<T, isPodLike>::grow(size_t MinSize) {
this->Capacity = NewCapacity;
}
-
-/// SmallVectorTemplateBase<isPodLike = true> - This is where we put method
-/// implementations that are designed to work with POD-like T's.
+/// SmallVectorTemplateBase<TriviallyCopyable = true> - This is where we put
+/// method implementations that are designed to work with POD-like T's.
template <typename T>
class SmallVectorTemplateBase<T, true> : public SmallVectorTemplateCommon<T> {
protected:
@@ -326,12 +318,13 @@ class SmallVectorImpl : public SmallVectorTemplateBase<T> {
public:
using iterator = typename SuperClass::iterator;
using const_iterator = typename SuperClass::const_iterator;
+ using reference = typename SuperClass::reference;
using size_type = typename SuperClass::size_type;
protected:
// Default ctor - Initialize to empty.
explicit SmallVectorImpl(unsigned N)
- : SmallVectorTemplateBase<T, isPodLike<T>::value>(N) {}
+ : SmallVectorTemplateBase<T>(N) {}
public:
SmallVectorImpl(const SmallVectorImpl &) = delete;
@@ -393,22 +386,18 @@ public:
std::input_iterator_tag>::value>::type>
void append(in_iter in_start, in_iter in_end) {
size_type NumInputs = std::distance(in_start, in_end);
- // Grow allocated space if needed.
if (NumInputs > this->capacity() - this->size())
this->grow(this->size()+NumInputs);
- // Copy the new elements over.
this->uninitialized_copy(in_start, in_end, this->end());
this->set_size(this->size() + NumInputs);
}
- /// Add the specified range to the end of the SmallVector.
+ /// Append \p NumInputs copies of \p Elt to the end.
void append(size_type NumInputs, const T &Elt) {
- // Grow allocated space if needed.
if (NumInputs > this->capacity() - this->size())
this->grow(this->size()+NumInputs);
- // Copy the new elements over.
std::uninitialized_fill_n(this->end(), NumInputs, Elt);
this->set_size(this->size() + NumInputs);
}
@@ -649,11 +638,12 @@ public:
insert(I, IL.begin(), IL.end());
}
- template <typename... ArgTypes> void emplace_back(ArgTypes &&... Args) {
+ template <typename... ArgTypes> reference emplace_back(ArgTypes &&... Args) {
if (LLVM_UNLIKELY(this->size() >= this->capacity()))
this->grow();
::new ((void *)this->end()) T(std::forward<ArgTypes>(Args)...);
this->set_size(this->size() + 1);
+ return this->back();
}
SmallVectorImpl &operator=(const SmallVectorImpl &RHS);
diff --git a/include/llvm/ADT/SparseBitVector.h b/include/llvm/ADT/SparseBitVector.h
index 84e73bcbace8..12850e14f4ed 100644
--- a/include/llvm/ADT/SparseBitVector.h
+++ b/include/llvm/ADT/SparseBitVector.h
@@ -1,9 +1,8 @@
//===- llvm/ADT/SparseBitVector.h - Efficient Sparse BitVector --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/ADT/SparseMultiSet.h b/include/llvm/ADT/SparseMultiSet.h
index 3c8637621510..d9d3ff459267 100644
--- a/include/llvm/ADT/SparseMultiSet.h
+++ b/include/llvm/ADT/SparseMultiSet.h
@@ -1,9 +1,8 @@
//===- llvm/ADT/SparseMultiSet.h - Sparse multiset --------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/ADT/SparseSet.h b/include/llvm/ADT/SparseSet.h
index 74cc6dab8c74..a6eb9b942e80 100644
--- a/include/llvm/ADT/SparseSet.h
+++ b/include/llvm/ADT/SparseSet.h
@@ -1,9 +1,8 @@
//===- llvm/ADT/SparseSet.h - Sparse set ------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/ADT/Statistic.h b/include/llvm/ADT/Statistic.h
index 90c2eefceb6c..2ac59da596ef 100644
--- a/include/llvm/ADT/Statistic.h
+++ b/include/llvm/ADT/Statistic.h
@@ -1,9 +1,8 @@
//===-- llvm/ADT/Statistic.h - Easy way to expose stats ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/ADT/StringExtras.h b/include/llvm/ADT/StringExtras.h
index 60a03633a8a6..16ac90bd6c89 100644
--- a/include/llvm/ADT/StringExtras.h
+++ b/include/llvm/ADT/StringExtras.h
@@ -1,9 +1,8 @@
//===- llvm/ADT/StringExtras.h - Useful string functions --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/ADT/StringMap.h b/include/llvm/ADT/StringMap.h
index a9f83d3f5091..8a586fc26709 100644
--- a/include/llvm/ADT/StringMap.h
+++ b/include/llvm/ADT/StringMap.h
@@ -1,9 +1,8 @@
//===- StringMap.h - String Hash table map interface ------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -360,6 +359,11 @@ public:
return find(Key) == end() ? 0 : 1;
}
+ template <typename InputTy>
+ size_type count(const StringMapEntry<InputTy> &MapEntry) const {
+ return count(MapEntry.getKey());
+ }
+
/// insert - Insert the specified key/value pair into the map. If the key
/// already exists in the map, return false and ignore the request, otherwise
/// insert it and return true.
diff --git a/include/llvm/ADT/StringRef.h b/include/llvm/ADT/StringRef.h
index a5ba5b59b5a3..4661b1e68b2f 100644
--- a/include/llvm/ADT/StringRef.h
+++ b/include/llvm/ADT/StringRef.h
@@ -1,9 +1,8 @@
//===- StringRef.h - Constant String Reference Wrapper ----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -63,7 +62,6 @@ namespace llvm {
// Workaround memcmp issue with null pointers (undefined behavior)
// by providing a specialized version
- LLVM_ATTRIBUTE_ALWAYS_INLINE
static int compareMemory(const char *Lhs, const char *Rhs, size_t Length) {
if (Length == 0) { return 0; }
return ::memcmp(Lhs,Rhs,Length);
@@ -81,17 +79,14 @@ namespace llvm {
StringRef(std::nullptr_t) = delete;
/// Construct a string ref from a cstring.
- LLVM_ATTRIBUTE_ALWAYS_INLINE
/*implicit*/ StringRef(const char *Str)
: Data(Str), Length(Str ? ::strlen(Str) : 0) {}
/// Construct a string ref from a pointer and length.
- LLVM_ATTRIBUTE_ALWAYS_INLINE
/*implicit*/ constexpr StringRef(const char *data, size_t length)
: Data(data), Length(length) {}
/// Construct a string ref from an std::string.
- LLVM_ATTRIBUTE_ALWAYS_INLINE
/*implicit*/ StringRef(const std::string &Str)
: Data(Str.data()), Length(Str.length()) {}
@@ -124,17 +119,14 @@ namespace llvm {
/// data - Get a pointer to the start of the string (which may not be null
/// terminated).
LLVM_NODISCARD
- LLVM_ATTRIBUTE_ALWAYS_INLINE
const char *data() const { return Data; }
/// empty - Check if the string is empty.
LLVM_NODISCARD
- LLVM_ATTRIBUTE_ALWAYS_INLINE
bool empty() const { return Length == 0; }
/// size - Get the string size.
LLVM_NODISCARD
- LLVM_ATTRIBUTE_ALWAYS_INLINE
size_t size() const { return Length; }
/// front - Get the first character in the string.
@@ -165,7 +157,6 @@ namespace llvm {
/// equals - Check for string equality, this is more efficient than
/// compare() when the relative ordering of inequal strings isn't needed.
LLVM_NODISCARD
- LLVM_ATTRIBUTE_ALWAYS_INLINE
bool equals(StringRef RHS) const {
return (Length == RHS.Length &&
compareMemory(Data, RHS.Data, RHS.Length) == 0);
@@ -180,7 +171,6 @@ namespace llvm {
/// compare - Compare two strings; the result is -1, 0, or 1 if this string
/// is lexicographically less than, equal to, or greater than the \p RHS.
LLVM_NODISCARD
- LLVM_ATTRIBUTE_ALWAYS_INLINE
int compare(StringRef RHS) const {
// Check the prefix for a mismatch.
if (int Res = compareMemory(Data, RHS.Data, std::min(Length, RHS.Length)))
@@ -263,7 +253,6 @@ namespace llvm {
/// Check if this string starts with the given \p Prefix.
LLVM_NODISCARD
- LLVM_ATTRIBUTE_ALWAYS_INLINE
bool startswith(StringRef Prefix) const {
return Length >= Prefix.Length &&
compareMemory(Data, Prefix.Data, Prefix.Length) == 0;
@@ -275,7 +264,6 @@ namespace llvm {
/// Check if this string ends with the given \p Suffix.
LLVM_NODISCARD
- LLVM_ATTRIBUTE_ALWAYS_INLINE
bool endswith(StringRef Suffix) const {
return Length >= Suffix.Length &&
compareMemory(end() - Suffix.Length, Suffix.Data, Suffix.Length) == 0;
@@ -294,7 +282,6 @@ namespace llvm {
/// \returns The index of the first occurrence of \p C, or npos if not
/// found.
LLVM_NODISCARD
- LLVM_ATTRIBUTE_ALWAYS_INLINE
size_t find(char C, size_t From = 0) const {
size_t FindBegin = std::min(From, Length);
if (FindBegin < Length) { // Avoid calling memchr with nullptr.
@@ -317,7 +304,6 @@ namespace llvm {
/// \returns The index of the first character satisfying \p F starting from
/// \p From, or npos if not found.
LLVM_NODISCARD
- LLVM_ATTRIBUTE_ALWAYS_INLINE
size_t find_if(function_ref<bool(char)> F, size_t From = 0) const {
StringRef S = drop_front(From);
while (!S.empty()) {
@@ -333,7 +319,6 @@ namespace llvm {
/// \returns The index of the first character not satisfying \p F starting
/// from \p From, or npos if not found.
LLVM_NODISCARD
- LLVM_ATTRIBUTE_ALWAYS_INLINE
size_t find_if_not(function_ref<bool(char)> F, size_t From = 0) const {
return find_if([F](char c) { return !F(c); }, From);
}
@@ -444,19 +429,16 @@ namespace llvm {
/// Return true if the given string is a substring of *this, and false
/// otherwise.
LLVM_NODISCARD
- LLVM_ATTRIBUTE_ALWAYS_INLINE
bool contains(StringRef Other) const { return find(Other) != npos; }
/// Return true if the given character is contained in *this, and false
/// otherwise.
LLVM_NODISCARD
- LLVM_ATTRIBUTE_ALWAYS_INLINE
bool contains(char C) const { return find_first_of(C) != npos; }
/// Return true if the given string is a substring of *this, and false
/// otherwise.
LLVM_NODISCARD
- LLVM_ATTRIBUTE_ALWAYS_INLINE
bool contains_lower(StringRef Other) const {
return find_lower(Other) != npos;
}
@@ -464,7 +446,6 @@ namespace llvm {
/// Return true if the given character is contained in *this, and false
/// otherwise.
LLVM_NODISCARD
- LLVM_ATTRIBUTE_ALWAYS_INLINE
bool contains_lower(char C) const { return find_lower(C) != npos; }
/// @}
@@ -594,7 +575,6 @@ namespace llvm {
/// exceeds the number of characters remaining in the string, the string
/// suffix (starting with \p Start) will be returned.
LLVM_NODISCARD
- LLVM_ATTRIBUTE_ALWAYS_INLINE
StringRef substr(size_t Start, size_t N = npos) const {
Start = std::min(Start, Length);
return StringRef(Data + Start, std::min(N, Length - Start));
@@ -604,7 +584,6 @@ namespace llvm {
/// elements remaining. If \p N is greater than the length of the
/// string, the entire string is returned.
LLVM_NODISCARD
- LLVM_ATTRIBUTE_ALWAYS_INLINE
StringRef take_front(size_t N = 1) const {
if (N >= size())
return *this;
@@ -615,7 +594,6 @@ namespace llvm {
/// elements remaining. If \p N is greater than the length of the
/// string, the entire string is returned.
LLVM_NODISCARD
- LLVM_ATTRIBUTE_ALWAYS_INLINE
StringRef take_back(size_t N = 1) const {
if (N >= size())
return *this;
@@ -625,7 +603,6 @@ namespace llvm {
/// Return the longest prefix of 'this' such that every character
/// in the prefix satisfies the given predicate.
LLVM_NODISCARD
- LLVM_ATTRIBUTE_ALWAYS_INLINE
StringRef take_while(function_ref<bool(char)> F) const {
return substr(0, find_if_not(F));
}
@@ -633,7 +610,6 @@ namespace llvm {
/// Return the longest prefix of 'this' such that no character in
/// the prefix satisfies the given predicate.
LLVM_NODISCARD
- LLVM_ATTRIBUTE_ALWAYS_INLINE
StringRef take_until(function_ref<bool(char)> F) const {
return substr(0, find_if(F));
}
@@ -641,7 +617,6 @@ namespace llvm {
/// Return a StringRef equal to 'this' but with the first \p N elements
/// dropped.
LLVM_NODISCARD
- LLVM_ATTRIBUTE_ALWAYS_INLINE
StringRef drop_front(size_t N = 1) const {
assert(size() >= N && "Dropping more elements than exist");
return substr(N);
@@ -650,7 +625,6 @@ namespace llvm {
/// Return a StringRef equal to 'this' but with the last \p N elements
/// dropped.
LLVM_NODISCARD
- LLVM_ATTRIBUTE_ALWAYS_INLINE
StringRef drop_back(size_t N = 1) const {
assert(size() >= N && "Dropping more elements than exist");
return substr(0, size()-N);
@@ -659,7 +633,6 @@ namespace llvm {
/// Return a StringRef equal to 'this', but with all characters satisfying
/// the given predicate dropped from the beginning of the string.
LLVM_NODISCARD
- LLVM_ATTRIBUTE_ALWAYS_INLINE
StringRef drop_while(function_ref<bool(char)> F) const {
return substr(find_if_not(F));
}
@@ -667,14 +640,12 @@ namespace llvm {
/// Return a StringRef equal to 'this', but with all characters not
/// satisfying the given predicate dropped from the beginning of the string.
LLVM_NODISCARD
- LLVM_ATTRIBUTE_ALWAYS_INLINE
StringRef drop_until(function_ref<bool(char)> F) const {
return substr(find_if(F));
}
/// Returns true if this StringRef has the given prefix and removes that
/// prefix.
- LLVM_ATTRIBUTE_ALWAYS_INLINE
bool consume_front(StringRef Prefix) {
if (!startswith(Prefix))
return false;
@@ -685,7 +656,6 @@ namespace llvm {
/// Returns true if this StringRef has the given suffix and removes that
/// suffix.
- LLVM_ATTRIBUTE_ALWAYS_INLINE
bool consume_back(StringRef Suffix) {
if (!endswith(Suffix))
return false;
@@ -706,7 +676,6 @@ namespace llvm {
/// will be returned. If this is less than \p Start, an empty string will
/// be returned.
LLVM_NODISCARD
- LLVM_ATTRIBUTE_ALWAYS_INLINE
StringRef slice(size_t Start, size_t End) const {
Start = std::min(Start, Length);
End = std::min(std::max(Start, End), Length);
@@ -894,12 +863,10 @@ namespace llvm {
/// @name StringRef Comparison Operators
/// @{
- LLVM_ATTRIBUTE_ALWAYS_INLINE
inline bool operator==(StringRef LHS, StringRef RHS) {
return LHS.equals(RHS);
}
- LLVM_ATTRIBUTE_ALWAYS_INLINE
inline bool operator!=(StringRef LHS, StringRef RHS) { return !(LHS == RHS); }
inline bool operator<(StringRef LHS, StringRef RHS) {
@@ -928,10 +895,6 @@ namespace llvm {
LLVM_NODISCARD
hash_code hash_value(StringRef S);
- // StringRefs can be treated like a POD type.
- template <typename T> struct isPodLike;
- template <> struct isPodLike<StringRef> { static const bool value = true; };
-
} // end namespace llvm
#endif // LLVM_ADT_STRINGREF_H
diff --git a/include/llvm/ADT/StringSet.h b/include/llvm/ADT/StringSet.h
index 9af44c07df79..af3a44a7b32c 100644
--- a/include/llvm/ADT/StringSet.h
+++ b/include/llvm/ADT/StringSet.h
@@ -1,9 +1,8 @@
//===- StringSet.h - The LLVM Compiler Driver -------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open
-// Source License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -34,6 +33,7 @@ namespace llvm {
for (StringRef X : S)
insert(X);
}
+ explicit StringSet(AllocatorTy A) : base(A) {}
std::pair<typename base::iterator, bool> insert(StringRef Key) {
assert(!Key.empty());
@@ -45,6 +45,12 @@ namespace llvm {
for (auto It = Begin; It != End; ++It)
base::insert(std::make_pair(*It, '\0'));
}
+
+ template <typename ValueTy>
+ std::pair<typename base::iterator, bool>
+ insert(const StringMapEntry<ValueTy> &MapEntry) {
+ return insert(MapEntry.getKey());
+ }
};
} // end namespace llvm
diff --git a/include/llvm/ADT/StringSwitch.h b/include/llvm/ADT/StringSwitch.h
index b7860b98ce5d..fea911f6928b 100644
--- a/include/llvm/ADT/StringSwitch.h
+++ b/include/llvm/ADT/StringSwitch.h
@@ -1,9 +1,8 @@
//===--- StringSwitch.h - Switch-on-literal-string Construct --------------===/
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//===----------------------------------------------------------------------===/
//
// This file implements the StringSwitch template, which mimics a switch()
@@ -49,7 +48,6 @@ class StringSwitch {
Optional<T> Result;
public:
- LLVM_ATTRIBUTE_ALWAYS_INLINE
explicit StringSwitch(StringRef S)
: Str(S), Result() { }
@@ -66,7 +64,6 @@ public:
~StringSwitch() = default;
// Case-sensitive case matchers
- LLVM_ATTRIBUTE_ALWAYS_INLINE
StringSwitch &Case(StringLiteral S, T Value) {
if (!Result && Str == S) {
Result = std::move(Value);
@@ -74,7 +71,6 @@ public:
return *this;
}
- LLVM_ATTRIBUTE_ALWAYS_INLINE
StringSwitch& EndsWith(StringLiteral S, T Value) {
if (!Result && Str.endswith(S)) {
Result = std::move(Value);
@@ -82,7 +78,6 @@ public:
return *this;
}
- LLVM_ATTRIBUTE_ALWAYS_INLINE
StringSwitch& StartsWith(StringLiteral S, T Value) {
if (!Result && Str.startswith(S)) {
Result = std::move(Value);
@@ -90,51 +85,43 @@ public:
return *this;
}
- LLVM_ATTRIBUTE_ALWAYS_INLINE
StringSwitch &Cases(StringLiteral S0, StringLiteral S1, T Value) {
return Case(S0, Value).Case(S1, Value);
}
- LLVM_ATTRIBUTE_ALWAYS_INLINE
StringSwitch &Cases(StringLiteral S0, StringLiteral S1, StringLiteral S2,
T Value) {
return Case(S0, Value).Cases(S1, S2, Value);
}
- LLVM_ATTRIBUTE_ALWAYS_INLINE
StringSwitch &Cases(StringLiteral S0, StringLiteral S1, StringLiteral S2,
StringLiteral S3, T Value) {
return Case(S0, Value).Cases(S1, S2, S3, Value);
}
- LLVM_ATTRIBUTE_ALWAYS_INLINE
StringSwitch &Cases(StringLiteral S0, StringLiteral S1, StringLiteral S2,
StringLiteral S3, StringLiteral S4, T Value) {
return Case(S0, Value).Cases(S1, S2, S3, S4, Value);
}
- LLVM_ATTRIBUTE_ALWAYS_INLINE
StringSwitch &Cases(StringLiteral S0, StringLiteral S1, StringLiteral S2,
StringLiteral S3, StringLiteral S4, StringLiteral S5,
T Value) {
return Case(S0, Value).Cases(S1, S2, S3, S4, S5, Value);
}
- LLVM_ATTRIBUTE_ALWAYS_INLINE
StringSwitch &Cases(StringLiteral S0, StringLiteral S1, StringLiteral S2,
StringLiteral S3, StringLiteral S4, StringLiteral S5,
StringLiteral S6, T Value) {
return Case(S0, Value).Cases(S1, S2, S3, S4, S5, S6, Value);
}
- LLVM_ATTRIBUTE_ALWAYS_INLINE
StringSwitch &Cases(StringLiteral S0, StringLiteral S1, StringLiteral S2,
StringLiteral S3, StringLiteral S4, StringLiteral S5,
StringLiteral S6, StringLiteral S7, T Value) {
return Case(S0, Value).Cases(S1, S2, S3, S4, S5, S6, S7, Value);
}
- LLVM_ATTRIBUTE_ALWAYS_INLINE
StringSwitch &Cases(StringLiteral S0, StringLiteral S1, StringLiteral S2,
StringLiteral S3, StringLiteral S4, StringLiteral S5,
StringLiteral S6, StringLiteral S7, StringLiteral S8,
@@ -142,7 +129,6 @@ public:
return Case(S0, Value).Cases(S1, S2, S3, S4, S5, S6, S7, S8, Value);
}
- LLVM_ATTRIBUTE_ALWAYS_INLINE
StringSwitch &Cases(StringLiteral S0, StringLiteral S1, StringLiteral S2,
StringLiteral S3, StringLiteral S4, StringLiteral S5,
StringLiteral S6, StringLiteral S7, StringLiteral S8,
@@ -151,7 +137,6 @@ public:
}
// Case-insensitive case matchers.
- LLVM_ATTRIBUTE_ALWAYS_INLINE
StringSwitch &CaseLower(StringLiteral S, T Value) {
if (!Result && Str.equals_lower(S))
Result = std::move(Value);
@@ -159,7 +144,6 @@ public:
return *this;
}
- LLVM_ATTRIBUTE_ALWAYS_INLINE
StringSwitch &EndsWithLower(StringLiteral S, T Value) {
if (!Result && Str.endswith_lower(S))
Result = Value;
@@ -167,7 +151,6 @@ public:
return *this;
}
- LLVM_ATTRIBUTE_ALWAYS_INLINE
StringSwitch &StartsWithLower(StringLiteral S, T Value) {
if (!Result && Str.startswith_lower(S))
Result = std::move(Value);
@@ -175,31 +158,26 @@ public:
return *this;
}
- LLVM_ATTRIBUTE_ALWAYS_INLINE
StringSwitch &CasesLower(StringLiteral S0, StringLiteral S1, T Value) {
return CaseLower(S0, Value).CaseLower(S1, Value);
}
- LLVM_ATTRIBUTE_ALWAYS_INLINE
StringSwitch &CasesLower(StringLiteral S0, StringLiteral S1, StringLiteral S2,
T Value) {
return CaseLower(S0, Value).CasesLower(S1, S2, Value);
}
- LLVM_ATTRIBUTE_ALWAYS_INLINE
StringSwitch &CasesLower(StringLiteral S0, StringLiteral S1, StringLiteral S2,
StringLiteral S3, T Value) {
return CaseLower(S0, Value).CasesLower(S1, S2, S3, Value);
}
- LLVM_ATTRIBUTE_ALWAYS_INLINE
StringSwitch &CasesLower(StringLiteral S0, StringLiteral S1, StringLiteral S2,
StringLiteral S3, StringLiteral S4, T Value) {
return CaseLower(S0, Value).CasesLower(S1, S2, S3, S4, Value);
}
LLVM_NODISCARD
- LLVM_ATTRIBUTE_ALWAYS_INLINE
R Default(T Value) {
if (Result)
return std::move(*Result);
@@ -207,7 +185,6 @@ public:
}
LLVM_NODISCARD
- LLVM_ATTRIBUTE_ALWAYS_INLINE
operator R() {
assert(Result && "Fell off the end of a string-switch");
return std::move(*Result);
diff --git a/include/llvm/ADT/TinyPtrVector.h b/include/llvm/ADT/TinyPtrVector.h
index 1b8e9aa658c3..ac82451a9b21 100644
--- a/include/llvm/ADT/TinyPtrVector.h
+++ b/include/llvm/ADT/TinyPtrVector.h
@@ -1,9 +1,8 @@
//===- llvm/ADT/TinyPtrVector.h - 'Normally tiny' vectors -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/ADT/Triple.h b/include/llvm/ADT/Triple.h
index e06a68e27317..edeb31efab80 100644
--- a/include/llvm/ADT/Triple.h
+++ b/include/llvm/ADT/Triple.h
@@ -1,9 +1,8 @@
//===-- llvm/ADT/Triple.h - Target triple helper class ----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -50,6 +49,7 @@ public:
armeb, // ARM (big endian): armeb
aarch64, // AArch64 (little endian): aarch64
aarch64_be, // AArch64 (big endian): aarch64_be
+ aarch64_32, // AArch64 (little endian) ILP32: aarch64_32
arc, // ARC: Synopsys ARC
avr, // AVR: Atmel AVR microcontroller
bpfel, // eBPF or extended BPF or 64-bit BPF (little endian)
@@ -109,6 +109,7 @@ public:
ARMSubArch_v8r,
ARMSubArch_v8m_baseline,
ARMSubArch_v8m_mainline,
+ ARMSubArch_v8_1m_mainline,
ARMSubArch_v7,
ARMSubArch_v7em,
ARMSubArch_v7m,
@@ -187,7 +188,8 @@ public:
HermitCore, // HermitCore Unikernel/Multikernel
Hurd, // GNU/Hurd
WASI, // Experimental WebAssembly OS
- LastOSType = WASI
+ Emscripten,
+ LastOSType = Emscripten
};
enum EnvironmentType {
UnknownEnvironment,
@@ -201,6 +203,8 @@ public:
CODE16,
EABI,
EABIHF,
+ ELFv1,
+ ELFv2,
Android,
Musl,
MuslEABI,
@@ -210,8 +214,9 @@ public:
Itanium,
Cygnus,
CoreCLR,
- Simulator, // Simulator variants of other systems, e.g., Apple's iOS
- LastEnvironmentType = Simulator
+ Simulator, // Simulator variants of other systems, e.g., Apple's iOS
+ MacABI, // Mac Catalyst variant of Apple's iOS deployment target.
+ LastEnvironmentType = MacABI
};
enum ObjectFormatType {
UnknownObjectFormat,
@@ -220,6 +225,7 @@ public:
ELF,
MachO,
Wasm,
+ XCOFF,
};
private:
@@ -415,7 +421,7 @@ public:
if (LHS[1] != Minor)
return LHS[1] < Minor;
if (LHS[2] != Micro)
- return LHS[1] < Micro;
+ return LHS[2] < Micro;
return false;
}
@@ -480,6 +486,10 @@ public:
return getEnvironment() == Triple::Simulator;
}
+ bool isMacCatalystEnvironment() const {
+ return getEnvironment() == Triple::MacABI;
+ }
+
bool isOSNetBSD() const {
return getOS() == Triple::NetBSD;
}
@@ -524,32 +534,36 @@ public:
return getOS() == Triple::Haiku;
}
- /// Checks if the environment could be MSVC.
- bool isWindowsMSVCEnvironment() const {
- return getOS() == Triple::Win32 &&
- (getEnvironment() == Triple::UnknownEnvironment ||
- getEnvironment() == Triple::MSVC);
+ /// Tests whether the OS is Windows.
+ bool isOSWindows() const {
+ return getOS() == Triple::Win32;
}
/// Checks if the environment is MSVC.
bool isKnownWindowsMSVCEnvironment() const {
- return getOS() == Triple::Win32 && getEnvironment() == Triple::MSVC;
+ return isOSWindows() && getEnvironment() == Triple::MSVC;
+ }
+
+ /// Checks if the environment could be MSVC.
+ bool isWindowsMSVCEnvironment() const {
+ return isKnownWindowsMSVCEnvironment() ||
+ (isOSWindows() && getEnvironment() == Triple::UnknownEnvironment);
}
bool isWindowsCoreCLREnvironment() const {
- return getOS() == Triple::Win32 && getEnvironment() == Triple::CoreCLR;
+ return isOSWindows() && getEnvironment() == Triple::CoreCLR;
}
bool isWindowsItaniumEnvironment() const {
- return getOS() == Triple::Win32 && getEnvironment() == Triple::Itanium;
+ return isOSWindows() && getEnvironment() == Triple::Itanium;
}
bool isWindowsCygwinEnvironment() const {
- return getOS() == Triple::Win32 && getEnvironment() == Triple::Cygnus;
+ return isOSWindows() && getEnvironment() == Triple::Cygnus;
}
bool isWindowsGNUEnvironment() const {
- return getOS() == Triple::Win32 && getEnvironment() == Triple::GNU;
+ return isOSWindows() && getEnvironment() == Triple::GNU;
}
/// Tests for either Cygwin or MinGW OS
@@ -563,11 +577,6 @@ public:
isWindowsItaniumEnvironment();
}
- /// Tests whether the OS is Windows.
- bool isOSWindows() const {
- return getOS() == Triple::Win32;
- }
-
/// Tests whether the OS is NaCl (Native Client)
bool isOSNaCl() const {
return getOS() == Triple::NaCl;
@@ -593,6 +602,11 @@ public:
return getOS() == Triple::WASI;
}
+ /// Tests whether the OS is Emscripten.
+ bool isOSEmscripten() const {
+ return getOS() == Triple::Emscripten;
+ }
+
/// Tests whether the OS uses glibc.
bool isOSGlibc() const {
return (getOS() == Triple::Linux || getOS() == Triple::KFreeBSD ||
@@ -600,6 +614,11 @@ public:
!isAndroid();
}
+ /// Tests whether the OS is AIX.
+ bool isOSAIX() const {
+ return getOS() == Triple::AIX;
+ }
+
/// Tests whether the OS uses the ELF binary format.
bool isOSBinFormatELF() const {
return getObjectFormat() == Triple::ELF;
@@ -620,6 +639,11 @@ public:
return getObjectFormat() == Triple::Wasm;
}
+ /// Tests whether the OS uses the XCOFF binary format.
+ bool isOSBinFormatXCOFF() const {
+ return getObjectFormat() == Triple::XCOFF;
+ }
+
/// Tests whether the target is the PS4 CPU
bool isPS4CPU() const {
return getArch() == Triple::x86_64 &&
@@ -656,6 +680,11 @@ public:
getEnvironment() == Triple::MuslEABIHF;
}
+ /// Tests whether the target is SPIR (32- or 64-bit).
+ bool isSPIR() const {
+ return getArch() == Triple::spir || getArch() == Triple::spir64;
+ }
+
/// Tests whether the target is NVPTX (32- or 64-bit).
bool isNVPTX() const {
return getArch() == Triple::nvptx || getArch() == Triple::nvptx64;
@@ -691,6 +720,16 @@ public:
return isMIPS32() || isMIPS64();
}
+ /// Tests whether the target is 64-bit PowerPC (little and big endian).
+ bool isPPC64() const {
+ return getArch() == Triple::ppc64 || getArch() == Triple::ppc64le;
+ }
+
+ /// Tests whether the target is RISC-V (32- and 64-bit).
+ bool isRISCV() const {
+ return getArch() == Triple::riscv32 || getArch() == Triple::riscv64;
+ }
+
/// Tests whether the target supports comdat
bool supportsCOMDAT() const {
return !isOSBinFormatMachO();
diff --git a/include/llvm/ADT/Twine.h b/include/llvm/ADT/Twine.h
index b60fd0981398..4140c22aad3d 100644
--- a/include/llvm/ADT/Twine.h
+++ b/include/llvm/ADT/Twine.h
@@ -1,9 +1,8 @@
//===- Twine.h - Fast Temporary String Concatenation ------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -274,6 +273,9 @@ namespace llvm {
assert(isValid() && "Invalid twine!");
}
+ /// Delete the implicit conversion from nullptr as Twine(const char *)
+ /// cannot take nullptr.
+ /*implicit*/ Twine(std::nullptr_t) = delete;
/// Construct from an std::string.
/*implicit*/ Twine(const std::string &Str) : LHSKind(StdStringKind) {
diff --git a/include/llvm/ADT/UniqueVector.h b/include/llvm/ADT/UniqueVector.h
index c86bedd07687..bfea988f1702 100644
--- a/include/llvm/ADT/UniqueVector.h
+++ b/include/llvm/ADT/UniqueVector.h
@@ -1,9 +1,8 @@
//===- llvm/ADT/UniqueVector.h ----------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/ADT/VariadicFunction.h b/include/llvm/ADT/VariadicFunction.h
index 9028abe4c72c..5aefb05ecdda 100644
--- a/include/llvm/ADT/VariadicFunction.h
+++ b/include/llvm/ADT/VariadicFunction.h
@@ -1,9 +1,8 @@
-//===--- VariadicFunctions.h - Variadic Functions ---------------*- C++ -*-===//
+//===- VariadicFunction.h - Variadic Functions ------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/ADT/bit.h b/include/llvm/ADT/bit.h
index a4aba7b6a9ee..a790d5ed2d21 100644
--- a/include/llvm/ADT/bit.h
+++ b/include/llvm/ADT/bit.h
@@ -1,9 +1,8 @@
//===-- llvm/ADT/bit.h - C++20 <bit> ----------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -41,11 +40,11 @@ template <typename To, typename From
, typename = typename std::enable_if<__is_trivially_copyable(To)>::type
, typename = typename std::enable_if<__is_trivially_copyable(From)>::type
#else
- // This case is GCC 4.x. clang with libc++ or libstdc++ never get here. Unlike
- // llvm/Support/type_traits.h's isPodLike we don't want to provide a
- // good-enough answer here: developers in that configuration will hit
- // compilation failures on the bots instead of locally. That's acceptable
- // because it's very few developers, and only until we move past C++11.
+// This case is GCC 4.x. clang with libc++ or libstdc++ never get here. Unlike
+// llvm/Support/type_traits.h's is_trivially_copyable we don't want to
+// provide a good-enough answer here: developers in that configuration will hit
+// compilation failures on the bots instead of locally. That's acceptable
+// because it's very few developers, and only until we move past C++11.
#endif
>
inline To bit_cast(const From &from) noexcept {
diff --git a/include/llvm/ADT/edit_distance.h b/include/llvm/ADT/edit_distance.h
index b2e8ec5c3f6d..4f5134008692 100644
--- a/include/llvm/ADT/edit_distance.h
+++ b/include/llvm/ADT/edit_distance.h
@@ -1,9 +1,8 @@
//===-- llvm/ADT/edit_distance.h - Array edit distance function --- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/ADT/fallible_iterator.h b/include/llvm/ADT/fallible_iterator.h
new file mode 100644
index 000000000000..6501ad2233cd
--- /dev/null
+++ b/include/llvm/ADT/fallible_iterator.h
@@ -0,0 +1,243 @@
+//===--- fallible_iterator.h - Wrapper for fallible iterators ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_FALLIBLE_ITERATOR_H
+#define LLVM_ADT_FALLIBLE_ITERATOR_H
+
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/Support/Error.h"
+
+#include <type_traits>
+
+namespace llvm {
+
+/// A wrapper class for fallible iterators.
+///
+/// The fallible_iterator template wraps an underlying iterator-like class
+/// whose increment and decrement operations are replaced with fallible versions
+/// like:
+///
+/// @code{.cpp}
+/// Error inc();
+/// Error dec();
+/// @endcode
+///
+/// It produces an interface that is (mostly) compatible with a traditional
+/// c++ iterator, including ++ and -- operators that do not fail.
+///
+/// Instances of the wrapper are constructed with an instance of the
+/// underlying iterator and (for non-end iterators) a reference to an Error
+/// instance. If the underlying increment/decrement operations fail, the Error
+/// is returned via this reference, and the resulting iterator value set to an
+/// end-of-range sentinel value. This enables the following loop idiom:
+///
+/// @code{.cpp}
+/// class Archive { // E.g. Potentially malformed on-disk archive
+/// public:
+/// fallible_iterator<ArchiveChildItr> children_begin(Error &Err);
+/// fallible_iterator<ArchiveChildItr> children_end();
+/// iterator_range<fallible_iterator<ArchiveChildItr>>
+/// children(Error &Err) {
+/// return make_range(children_begin(Err), children_end());
+/// //...
+/// };
+///
+/// void walk(Archive &A) {
+/// Error Err = Error::success();
+/// for (auto &C : A.children(Err)) {
+/// // Loop body only entered when increment succeeds.
+/// }
+/// if (Err) {
+/// // handle error.
+/// }
+/// }
+/// @endcode
+///
+/// The wrapper marks the referenced Error as unchecked after each increment
+/// and/or decrement operation, and clears the unchecked flag when a non-end
+/// value is compared against end (since, by the increment invariant, not being
+/// an end value proves that there was no error, and is equivalent to checking
+/// that the Error is success). This allows early exits from the loop body
+/// without requiring redundant error checks.
+template <typename Underlying> class fallible_iterator {
+private:
+ template <typename T>
+ using enable_if_struct_deref_supported = std::enable_if<
+ !std::is_void<decltype(std::declval<T>().operator->())>::value,
+ decltype(std::declval<T>().operator->())>;
+
+public:
+ /// Construct a fallible iterator that *cannot* be used as an end-of-range
+ /// value.
+ ///
+ /// A value created by this method can be dereferenced, incremented,
+ /// decremented and compared, providing the underlying type supports it.
+ ///
+ /// The error that is passed in will be initially marked as checked, so if the
+ /// iterator is not used at all the Error need not be checked.
+ static fallible_iterator itr(Underlying I, Error &Err) {
+ (void)!!Err;
+ return fallible_iterator(std::move(I), &Err);
+ }
+
+ /// Construct a fallible iteratro that can be used as an end-of-range value.
+ ///
+ /// A value created by this method can be dereferenced (if the underlying
+ /// value points at a valid value) and compared, but not incremented or
+ /// decremented.
+ static fallible_iterator end(Underlying I) {
+ return fallible_iterator(std::move(I), nullptr);
+ }
+
+ /// Forward dereference to the underlying iterator.
+ auto operator*() -> decltype(*std::declval<Underlying>()) { return *I; }
+
+ /// Forward const dereference to the underlying iterator.
+ auto operator*() const -> decltype(*std::declval<const Underlying>()) {
+ return *I;
+ }
+
+ /// Forward structure dereference to the underlying iterator (if the
+ /// underlying iterator supports it).
+ template <typename T = Underlying>
+ typename enable_if_struct_deref_supported<T>::type operator->() {
+ return I.operator->();
+ }
+
+ /// Forward const structure dereference to the underlying iterator (if the
+ /// underlying iterator supports it).
+ template <typename T = Underlying>
+ typename enable_if_struct_deref_supported<const T>::type operator->() const {
+ return I.operator->();
+ }
+
+ /// Increment the fallible iterator.
+ ///
+ /// If the underlying 'inc' operation fails, this will set the Error value
+ /// and update this iterator value to point to end-of-range.
+ ///
+ /// The Error value is marked as needing checking, regardless of whether the
+ /// 'inc' operation succeeds or fails.
+ fallible_iterator &operator++() {
+ assert(getErrPtr() && "Cannot increment end iterator");
+ if (auto Err = I.inc())
+ handleError(std::move(Err));
+ else
+ resetCheckedFlag();
+ return *this;
+ }
+
+ /// Decrement the fallible iterator.
+ ///
+ /// If the underlying 'dec' operation fails, this will set the Error value
+ /// and update this iterator value to point to end-of-range.
+ ///
+ /// The Error value is marked as needing checking, regardless of whether the
+ /// 'dec' operation succeeds or fails.
+ fallible_iterator &operator--() {
+ assert(getErrPtr() && "Cannot decrement end iterator");
+ if (auto Err = I.dec())
+ handleError(std::move(Err));
+ else
+ resetCheckedFlag();
+ return *this;
+ }
+
+ /// Compare fallible iterators for equality.
+ ///
+ /// Returns true if both LHS and RHS are end-of-range values, or if both are
+ /// non-end-of-range values whose underlying iterator values compare equal.
+ ///
+ /// If this is a comparison between an end-of-range iterator and a
+ /// non-end-of-range iterator, then the Error (referenced by the
+ /// non-end-of-range value) is marked as checked: Since all
+ /// increment/decrement operations result in an end-of-range value, comparing
+ /// false against end-of-range is equivalent to checking that the Error value
+ /// is success. This flag management enables early returns from loop bodies
+ /// without redundant Error checks.
+ friend bool operator==(const fallible_iterator &LHS,
+ const fallible_iterator &RHS) {
+ // If both iterators are in the end state they compare
+ // equal, regardless of whether either is valid.
+ if (LHS.isEnd() && RHS.isEnd())
+ return true;
+
+ assert(LHS.isValid() && RHS.isValid() &&
+ "Invalid iterators can only be compared against end");
+
+ bool Equal = LHS.I == RHS.I;
+
+ // If the iterators differ and this is a comparison against end then mark
+ // the Error as checked.
+ if (!Equal) {
+ if (LHS.isEnd())
+ (void)!!*RHS.getErrPtr();
+ else
+ (void)!!*LHS.getErrPtr();
+ }
+
+ return Equal;
+ }
+
+ /// Compare fallible iterators for inequality.
+ ///
+ /// See notes for operator==.
+ friend bool operator!=(const fallible_iterator &LHS,
+ const fallible_iterator &RHS) {
+ return !(LHS == RHS);
+ }
+
+private:
+ fallible_iterator(Underlying I, Error *Err)
+ : I(std::move(I)), ErrState(Err, false) {}
+
+ Error *getErrPtr() const { return ErrState.getPointer(); }
+
+ bool isEnd() const { return getErrPtr() == nullptr; }
+
+ bool isValid() const { return !ErrState.getInt(); }
+
+ void handleError(Error Err) {
+ *getErrPtr() = std::move(Err);
+ ErrState.setPointer(nullptr);
+ ErrState.setInt(true);
+ }
+
+ void resetCheckedFlag() {
+ *getErrPtr() = Error::success();
+ }
+
+ Underlying I;
+ mutable PointerIntPair<Error *, 1> ErrState;
+};
+
+/// Convenience wrapper to make a fallible_iterator value from an instance
+/// of an underlying iterator and an Error reference.
+template <typename Underlying>
+fallible_iterator<Underlying> make_fallible_itr(Underlying I, Error &Err) {
+ return fallible_iterator<Underlying>::itr(std::move(I), Err);
+}
+
+/// Convenience wrapper to make a fallible_iterator end value from an instance
+/// of an underlying iterator.
+template <typename Underlying>
+fallible_iterator<Underlying> make_fallible_end(Underlying E) {
+ return fallible_iterator<Underlying>::end(std::move(E));
+}
+
+template <typename Underlying>
+iterator_range<fallible_iterator<Underlying>>
+make_fallible_range(Underlying I, Underlying E, Error &Err) {
+ return make_range(make_fallible_itr(std::move(I), Err),
+ make_fallible_end(std::move(E)));
+}
+
+} // end namespace llvm
+
+#endif // LLVM_ADT_FALLIBLE_ITERATOR_H
diff --git a/include/llvm/ADT/ilist.h b/include/llvm/ADT/ilist.h
index 00bb6d528175..06c7abff965f 100644
--- a/include/llvm/ADT/ilist.h
+++ b/include/llvm/ADT/ilist.h
@@ -1,9 +1,8 @@
//==-- llvm/ADT/ilist.h - Intrusive Linked List Template ---------*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -66,9 +65,8 @@ template <typename NodeTy> struct ilist_callback_traits {
void addNodeToList(NodeTy *) {}
void removeNodeFromList(NodeTy *) {}
- /// Callback before transferring nodes to this list.
- ///
- /// \pre \c this!=&OldList
+ /// Callback before transferring nodes to this list. The nodes may already be
+ /// in this same list.
template <class Iterator>
void transferNodesFromList(ilist_callback_traits &OldList, Iterator /*first*/,
Iterator /*last*/) {
@@ -287,8 +285,8 @@ private:
if (position == last)
return;
- if (this != &L2) // Notify traits we moved the nodes...
- this->transferNodesFromList(L2, first, last);
+ // Notify traits we moved the nodes...
+ this->transferNodesFromList(L2, first, last);
base_list_type::splice(position, L2, first, last);
}
diff --git a/include/llvm/ADT/ilist_base.h b/include/llvm/ADT/ilist_base.h
index 3d818a48d41d..b8c098b951ad 100644
--- a/include/llvm/ADT/ilist_base.h
+++ b/include/llvm/ADT/ilist_base.h
@@ -1,9 +1,8 @@
//===- llvm/ADT/ilist_base.h - Intrusive List Base --------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/ADT/ilist_iterator.h b/include/llvm/ADT/ilist_iterator.h
index 671e644e0154..cbe5cefa96d1 100644
--- a/include/llvm/ADT/ilist_iterator.h
+++ b/include/llvm/ADT/ilist_iterator.h
@@ -1,9 +1,8 @@
//===- llvm/ADT/ilist_iterator.h - Intrusive List Iterator ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/ADT/ilist_node.h b/include/llvm/ADT/ilist_node.h
index dd0e6b4ec2b9..e040d9630a1e 100644
--- a/include/llvm/ADT/ilist_node.h
+++ b/include/llvm/ADT/ilist_node.h
@@ -1,9 +1,8 @@
//===- llvm/ADT/ilist_node.h - Intrusive Linked List Helper -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/ADT/ilist_node_base.h b/include/llvm/ADT/ilist_node_base.h
index e5062ac4eaad..f6c518e6eed7 100644
--- a/include/llvm/ADT/ilist_node_base.h
+++ b/include/llvm/ADT/ilist_node_base.h
@@ -1,9 +1,8 @@
//===- llvm/ADT/ilist_node_base.h - Intrusive List Node Base -----*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/ADT/ilist_node_options.h b/include/llvm/ADT/ilist_node_options.h
index 7ff4005f6757..9b95cdbe08c4 100644
--- a/include/llvm/ADT/ilist_node_options.h
+++ b/include/llvm/ADT/ilist_node_options.h
@@ -1,9 +1,8 @@
//===- llvm/ADT/ilist_node_options.h - ilist_node Options -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/ADT/iterator.h b/include/llvm/ADT/iterator.h
index 40e490cf7864..467fd4c00ec5 100644
--- a/include/llvm/ADT/iterator.h
+++ b/include/llvm/ADT/iterator.h
@@ -1,9 +1,8 @@
//===- iterator.h - Utilities for using and defining iterators --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/ADT/iterator_range.h b/include/llvm/ADT/iterator_range.h
index 2ba12866ecf3..774c7c4e3366 100644
--- a/include/llvm/ADT/iterator_range.h
+++ b/include/llvm/ADT/iterator_range.h
@@ -1,9 +1,8 @@
//===- iterator_range.h - A range adaptor for iterators ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/include/llvm/ADT/simple_ilist.h b/include/llvm/ADT/simple_ilist.h
index 4c7598a1acb4..9257b47b9cf8 100644
--- a/include/llvm/ADT/simple_ilist.h
+++ b/include/llvm/ADT/simple_ilist.h
@@ -1,9 +1,8 @@
//===- llvm/ADT/simple_ilist.h - Simple Intrusive List ----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/Analysis/AliasAnalysis.h b/include/llvm/Analysis/AliasAnalysis.h
index e2a2ac0622e8..948341554f23 100644
--- a/include/llvm/Analysis/AliasAnalysis.h
+++ b/include/llvm/Analysis/AliasAnalysis.h
@@ -1,9 +1,8 @@
//===- llvm/Analysis/AliasAnalysis.h - Alias Analysis Interface -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -38,6 +37,7 @@
#ifndef LLVM_ANALYSIS_ALIASANALYSIS_H
#define LLVM_ANALYSIS_ALIASANALYSIS_H
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
@@ -286,6 +286,28 @@ createModRefInfo(const FunctionModRefBehavior FMRB) {
return ModRefInfo(FMRB & static_cast<int>(ModRefInfo::ModRef));
}
+/// This class stores info we want to provide to or retain within an alias
+/// query. By default, the root query is stateless and starts with a freshly
+/// constructed info object. Specific alias analyses can use this query info to
+/// store per-query state that is important for recursive or nested queries to
+/// avoid recomputing. To enable preserving this state across multiple queries
+/// where safe (due to the IR not changing), use a `BatchAAResults` wrapper.
+/// The information stored in an `AAQueryInfo` is currently limitted to the
+/// caches used by BasicAA, but can further be extended to fit other AA needs.
+class AAQueryInfo {
+public:
+ using LocPair = std::pair<MemoryLocation, MemoryLocation>;
+ using AliasCacheT = SmallDenseMap<LocPair, AliasResult, 8>;
+ AliasCacheT AliasCache;
+
+ using IsCapturedCacheT = SmallDenseMap<const Value *, bool, 8>;
+ IsCapturedCacheT IsCapturedCache;
+
+ AAQueryInfo() : AliasCache(), IsCapturedCache() {}
+};
+
+class BatchAAResults;
+
class AAResults {
public:
// Make these results default constructable and movable. We have to spell
@@ -600,32 +622,8 @@ public:
/// helpers above.
ModRefInfo getModRefInfo(const Instruction *I,
const Optional<MemoryLocation> &OptLoc) {
- if (OptLoc == None) {
- if (const auto *Call = dyn_cast<CallBase>(I)) {
- return createModRefInfo(getModRefBehavior(Call));
- }
- }
-
- const MemoryLocation &Loc = OptLoc.getValueOr(MemoryLocation());
-
- switch (I->getOpcode()) {
- case Instruction::VAArg: return getModRefInfo((const VAArgInst*)I, Loc);
- case Instruction::Load: return getModRefInfo((const LoadInst*)I, Loc);
- case Instruction::Store: return getModRefInfo((const StoreInst*)I, Loc);
- case Instruction::Fence: return getModRefInfo((const FenceInst*)I, Loc);
- case Instruction::AtomicCmpXchg:
- return getModRefInfo((const AtomicCmpXchgInst*)I, Loc);
- case Instruction::AtomicRMW:
- return getModRefInfo((const AtomicRMWInst*)I, Loc);
- case Instruction::Call: return getModRefInfo((const CallInst*)I, Loc);
- case Instruction::Invoke: return getModRefInfo((const InvokeInst*)I,Loc);
- case Instruction::CatchPad:
- return getModRefInfo((const CatchPadInst *)I, Loc);
- case Instruction::CatchRet:
- return getModRefInfo((const CatchReturnInst *)I, Loc);
- default:
- return ModRefInfo::NoModRef;
- }
+ AAQueryInfo AAQIP;
+ return getModRefInfo(I, OptLoc, AAQIP);
}
/// A convenience wrapper for constructing the memory location.
@@ -692,6 +690,69 @@ public:
}
private:
+ AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB,
+ AAQueryInfo &AAQI);
+ bool pointsToConstantMemory(const MemoryLocation &Loc, AAQueryInfo &AAQI,
+ bool OrLocal = false);
+ ModRefInfo getModRefInfo(Instruction *I, const CallBase *Call2,
+ AAQueryInfo &AAQIP);
+ ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc,
+ AAQueryInfo &AAQI);
+ ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2,
+ AAQueryInfo &AAQI);
+ ModRefInfo getModRefInfo(const VAArgInst *V, const MemoryLocation &Loc,
+ AAQueryInfo &AAQI);
+ ModRefInfo getModRefInfo(const LoadInst *L, const MemoryLocation &Loc,
+ AAQueryInfo &AAQI);
+ ModRefInfo getModRefInfo(const StoreInst *S, const MemoryLocation &Loc,
+ AAQueryInfo &AAQI);
+ ModRefInfo getModRefInfo(const FenceInst *S, const MemoryLocation &Loc,
+ AAQueryInfo &AAQI);
+ ModRefInfo getModRefInfo(const AtomicCmpXchgInst *CX,
+ const MemoryLocation &Loc, AAQueryInfo &AAQI);
+ ModRefInfo getModRefInfo(const AtomicRMWInst *RMW, const MemoryLocation &Loc,
+ AAQueryInfo &AAQI);
+ ModRefInfo getModRefInfo(const CatchPadInst *I, const MemoryLocation &Loc,
+ AAQueryInfo &AAQI);
+ ModRefInfo getModRefInfo(const CatchReturnInst *I, const MemoryLocation &Loc,
+ AAQueryInfo &AAQI);
+ ModRefInfo getModRefInfo(const Instruction *I,
+ const Optional<MemoryLocation> &OptLoc,
+ AAQueryInfo &AAQIP) {
+ if (OptLoc == None) {
+ if (const auto *Call = dyn_cast<CallBase>(I)) {
+ return createModRefInfo(getModRefBehavior(Call));
+ }
+ }
+
+ const MemoryLocation &Loc = OptLoc.getValueOr(MemoryLocation());
+
+ switch (I->getOpcode()) {
+ case Instruction::VAArg:
+ return getModRefInfo((const VAArgInst *)I, Loc, AAQIP);
+ case Instruction::Load:
+ return getModRefInfo((const LoadInst *)I, Loc, AAQIP);
+ case Instruction::Store:
+ return getModRefInfo((const StoreInst *)I, Loc, AAQIP);
+ case Instruction::Fence:
+ return getModRefInfo((const FenceInst *)I, Loc, AAQIP);
+ case Instruction::AtomicCmpXchg:
+ return getModRefInfo((const AtomicCmpXchgInst *)I, Loc, AAQIP);
+ case Instruction::AtomicRMW:
+ return getModRefInfo((const AtomicRMWInst *)I, Loc, AAQIP);
+ case Instruction::Call:
+ return getModRefInfo((const CallInst *)I, Loc, AAQIP);
+ case Instruction::Invoke:
+ return getModRefInfo((const InvokeInst *)I, Loc, AAQIP);
+ case Instruction::CatchPad:
+ return getModRefInfo((const CatchPadInst *)I, Loc, AAQIP);
+ case Instruction::CatchRet:
+ return getModRefInfo((const CatchReturnInst *)I, Loc, AAQIP);
+ default:
+ return ModRefInfo::NoModRef;
+ }
+ }
+
class Concept;
template <typename T> class Model;
@@ -703,6 +764,47 @@ private:
std::vector<std::unique_ptr<Concept>> AAs;
std::vector<AnalysisKey *> AADeps;
+
+ friend class BatchAAResults;
+};
+
+/// This class is a wrapper over an AAResults, and it is intended to be used
+/// only when there are no IR changes inbetween queries. BatchAAResults is
+/// reusing the same `AAQueryInfo` to preserve the state across queries,
+/// esentially making AA work in "batch mode". The internal state cannot be
+/// cleared, so to go "out-of-batch-mode", the user must either use AAResults,
+/// or create a new BatchAAResults.
+class BatchAAResults {
+ AAResults &AA;
+ AAQueryInfo AAQI;
+
+public:
+ BatchAAResults(AAResults &AAR) : AA(AAR), AAQI() {}
+ AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB) {
+ return AA.alias(LocA, LocB, AAQI);
+ }
+ bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal = false) {
+ return AA.pointsToConstantMemory(Loc, AAQI, OrLocal);
+ }
+ ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc) {
+ return AA.getModRefInfo(Call, Loc, AAQI);
+ }
+ ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2) {
+ return AA.getModRefInfo(Call1, Call2, AAQI);
+ }
+ ModRefInfo getModRefInfo(const Instruction *I,
+ const Optional<MemoryLocation> &OptLoc) {
+ return AA.getModRefInfo(I, OptLoc, AAQI);
+ }
+ ModRefInfo getModRefInfo(Instruction *I, const CallBase *Call2) {
+ return AA.getModRefInfo(I, Call2, AAQI);
+ }
+ ModRefInfo getArgModRefInfo(const CallBase *Call, unsigned ArgIdx) {
+ return AA.getArgModRefInfo(Call, ArgIdx);
+ }
+ FunctionModRefBehavior getModRefBehavior(const CallBase *Call) {
+ return AA.getModRefBehavior(Call);
+ }
};
/// Temporary typedef for legacy code that uses a generic \c AliasAnalysis
@@ -735,12 +837,12 @@ public:
/// each other. This is the interface that must be implemented by specific
/// alias analysis implementations.
virtual AliasResult alias(const MemoryLocation &LocA,
- const MemoryLocation &LocB) = 0;
+ const MemoryLocation &LocB, AAQueryInfo &AAQI) = 0;
/// Checks whether the given location points to constant memory, or if
/// \p OrLocal is true whether it points to a local alloca.
virtual bool pointsToConstantMemory(const MemoryLocation &Loc,
- bool OrLocal) = 0;
+ AAQueryInfo &AAQI, bool OrLocal) = 0;
/// @}
//===--------------------------------------------------------------------===//
@@ -764,13 +866,14 @@ public:
/// getModRefInfo (for call sites) - Return information about whether
/// a particular call site modifies or reads the specified memory location.
virtual ModRefInfo getModRefInfo(const CallBase *Call,
- const MemoryLocation &Loc) = 0;
+ const MemoryLocation &Loc,
+ AAQueryInfo &AAQI) = 0;
/// Return information about whether two call sites may refer to the same set
/// of memory locations. See the AA documentation for details:
/// http://llvm.org/docs/AliasAnalysis.html#ModRefInfo
- virtual ModRefInfo getModRefInfo(const CallBase *Call1,
- const CallBase *Call2) = 0;
+ virtual ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2,
+ AAQueryInfo &AAQI) = 0;
/// @}
};
@@ -792,14 +895,14 @@ public:
void setAAResults(AAResults *NewAAR) override { Result.setAAResults(NewAAR); }
- AliasResult alias(const MemoryLocation &LocA,
- const MemoryLocation &LocB) override {
- return Result.alias(LocA, LocB);
+ AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB,
+ AAQueryInfo &AAQI) override {
+ return Result.alias(LocA, LocB, AAQI);
}
- bool pointsToConstantMemory(const MemoryLocation &Loc,
+ bool pointsToConstantMemory(const MemoryLocation &Loc, AAQueryInfo &AAQI,
bool OrLocal) override {
- return Result.pointsToConstantMemory(Loc, OrLocal);
+ return Result.pointsToConstantMemory(Loc, AAQI, OrLocal);
}
ModRefInfo getArgModRefInfo(const CallBase *Call, unsigned ArgIdx) override {
@@ -814,14 +917,14 @@ public:
return Result.getModRefBehavior(F);
}
- ModRefInfo getModRefInfo(const CallBase *Call,
- const MemoryLocation &Loc) override {
- return Result.getModRefInfo(Call, Loc);
+ ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc,
+ AAQueryInfo &AAQI) override {
+ return Result.getModRefInfo(Call, Loc, AAQI);
}
- ModRefInfo getModRefInfo(const CallBase *Call1,
- const CallBase *Call2) override {
- return Result.getModRefInfo(Call1, Call2);
+ ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2,
+ AAQueryInfo &AAQI) override {
+ return Result.getModRefInfo(Call1, Call2, AAQI);
}
};
@@ -867,13 +970,16 @@ protected:
AAResultsProxy(AAResults *AAR, DerivedT &CurrentResult)
: AAR(AAR), CurrentResult(CurrentResult) {}
- AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB) {
- return AAR ? AAR->alias(LocA, LocB) : CurrentResult.alias(LocA, LocB);
+ AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB,
+ AAQueryInfo &AAQI) {
+ return AAR ? AAR->alias(LocA, LocB, AAQI)
+ : CurrentResult.alias(LocA, LocB, AAQI);
}
- bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal) {
- return AAR ? AAR->pointsToConstantMemory(Loc, OrLocal)
- : CurrentResult.pointsToConstantMemory(Loc, OrLocal);
+ bool pointsToConstantMemory(const MemoryLocation &Loc, AAQueryInfo &AAQI,
+ bool OrLocal) {
+ return AAR ? AAR->pointsToConstantMemory(Loc, AAQI, OrLocal)
+ : CurrentResult.pointsToConstantMemory(Loc, AAQI, OrLocal);
}
ModRefInfo getArgModRefInfo(const CallBase *Call, unsigned ArgIdx) {
@@ -890,14 +996,16 @@ protected:
return AAR ? AAR->getModRefBehavior(F) : CurrentResult.getModRefBehavior(F);
}
- ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc) {
- return AAR ? AAR->getModRefInfo(Call, Loc)
- : CurrentResult.getModRefInfo(Call, Loc);
+ ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc,
+ AAQueryInfo &AAQI) {
+ return AAR ? AAR->getModRefInfo(Call, Loc, AAQI)
+ : CurrentResult.getModRefInfo(Call, Loc, AAQI);
}
- ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2) {
- return AAR ? AAR->getModRefInfo(Call1, Call2)
- : CurrentResult.getModRefInfo(Call1, Call2);
+ ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2,
+ AAQueryInfo &AAQI) {
+ return AAR ? AAR->getModRefInfo(Call1, Call2, AAQI)
+ : CurrentResult.getModRefInfo(Call1, Call2, AAQI);
}
};
@@ -921,11 +1029,13 @@ protected:
AAResultsProxy getBestAAResults() { return AAResultsProxy(AAR, derived()); }
public:
- AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB) {
+ AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB,
+ AAQueryInfo &AAQI) {
return MayAlias;
}
- bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal) {
+ bool pointsToConstantMemory(const MemoryLocation &Loc, AAQueryInfo &AAQI,
+ bool OrLocal) {
return false;
}
@@ -941,11 +1051,13 @@ public:
return FMRB_UnknownModRefBehavior;
}
- ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc) {
+ ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc,
+ AAQueryInfo &AAQI) {
return ModRefInfo::ModRef;
}
- ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2) {
+ ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2,
+ AAQueryInfo &AAQI) {
return ModRefInfo::ModRef;
}
};
@@ -984,6 +1096,11 @@ bool isIdentifiedFunctionLocal(const Value *V);
/// This manager effectively wraps the AnalysisManager for registering alias
/// analyses. When you register your alias analysis with this manager, it will
/// ensure the analysis itself is registered with its AnalysisManager.
+///
+/// The result of this analysis is only invalidated if one of the particular
+/// aggregated AA results end up being invalidated. This removes the need to
+/// explicitly preserve the results of `AAManager`. Note that analyses should no
+/// longer be registered once the `AAManager` is run.
class AAManager : public AnalysisInfoMixin<AAManager> {
public:
using Result = AAResults;
diff --git a/include/llvm/Analysis/AliasAnalysisEvaluator.h b/include/llvm/Analysis/AliasAnalysisEvaluator.h
index 0941814a56c3..972eceaa3ba9 100644
--- a/include/llvm/Analysis/AliasAnalysisEvaluator.h
+++ b/include/llvm/Analysis/AliasAnalysisEvaluator.h
@@ -1,9 +1,8 @@
//===- AliasAnalysisEvaluator.h - Alias Analysis Accuracy Evaluator -------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/include/llvm/Analysis/AliasSetTracker.h b/include/llvm/Analysis/AliasSetTracker.h
index 7ed5cd5c4734..34a509b7f4bb 100644
--- a/include/llvm/Analysis/AliasSetTracker.h
+++ b/include/llvm/Analysis/AliasSetTracker.h
@@ -1,9 +1,8 @@
//===- llvm/Analysis/AliasSetTracker.h - Build Alias Sets -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -37,6 +36,8 @@ namespace llvm {
class AliasSetTracker;
class BasicBlock;
class LoadInst;
+class Loop;
+class MemorySSA;
class AnyMemSetInst;
class AnyMemTransferInst;
class raw_ostream;
@@ -294,7 +295,8 @@ private:
void removeFromTracker(AliasSetTracker &AST);
void addPointer(AliasSetTracker &AST, PointerRec &Entry, LocationSize Size,
- const AAMDNodes &AAInfo, bool KnownMustAlias = false);
+ const AAMDNodes &AAInfo, bool KnownMustAlias = false,
+ bool SkipSizeUpdate = false);
void addUnknownInst(Instruction *I, AliasAnalysis &AA);
void removeUnknownInst(AliasSetTracker &AST, Instruction *I) {
@@ -310,10 +312,10 @@ private:
}
public:
- /// Return true if the specified pointer "may" (or must) alias one of the
- /// members in the set.
- bool aliasesPointer(const Value *Ptr, LocationSize Size,
- const AAMDNodes &AAInfo, AliasAnalysis &AA) const;
+ /// If the specified pointer "may" (or must) alias one of the members in the
+ /// set return the appropriate AliasResult. Otherwise return NoAlias.
+ AliasResult aliasesPointer(const Value *Ptr, LocationSize Size,
+ const AAMDNodes &AAInfo, AliasAnalysis &AA) const;
bool aliasesUnknownInst(const Instruction *Inst, AliasAnalysis &AA) const;
};
@@ -341,6 +343,8 @@ class AliasSetTracker {
struct ASTCallbackVHDenseMapInfo : public DenseMapInfo<Value *> {};
AliasAnalysis &AA;
+ MemorySSA *MSSA;
+ Loop *L;
ilist<AliasSet> AliasSets;
using PointerMapType = DenseMap<ASTCallbackVH, AliasSet::PointerRec *,
@@ -353,6 +357,8 @@ public:
/// Create an empty collection of AliasSets, and use the specified alias
/// analysis object to disambiguate load and store addresses.
explicit AliasSetTracker(AliasAnalysis &aa) : AA(aa) {}
+ explicit AliasSetTracker(AliasAnalysis &aa, MemorySSA *mssa, Loop *l)
+ : AA(aa), MSSA(mssa), L(l) {}
~AliasSetTracker() { clear(); }
/// These methods are used to add different types of instructions to the alias
@@ -377,6 +383,7 @@ public:
void add(BasicBlock &BB); // Add all instructions in basic block
void add(const AliasSetTracker &AST); // Add alias relations from another AST
void addUnknown(Instruction *I);
+ void addAllInstructionsInLoopUsingMSSA();
void clear();
@@ -439,7 +446,8 @@ private:
AliasSet &addPointer(MemoryLocation Loc, AliasSet::AccessLattice E);
AliasSet *mergeAliasSetsForPointer(const Value *Ptr, LocationSize Size,
- const AAMDNodes &AAInfo);
+ const AAMDNodes &AAInfo,
+ bool &MustAliasAll);
/// Merge all alias sets into a single set that is considered to alias any
/// pointer.
diff --git a/include/llvm/Analysis/AssumptionCache.h b/include/llvm/Analysis/AssumptionCache.h
index 46538b1fa86f..b42846472f2e 100644
--- a/include/llvm/Analysis/AssumptionCache.h
+++ b/include/llvm/Analysis/AssumptionCache.h
@@ -1,9 +1,8 @@
//===- llvm/Analysis/AssumptionCache.h - Track @llvm.assume -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -104,6 +103,10 @@ public:
/// not already be in the cache.
void registerAssumption(CallInst *CI);
+ /// Remove an \@llvm.assume intrinsic from this function's cache if it has
+ /// been added to the cache earlier.
+ void unregisterAssumption(CallInst *CI);
+
/// Update the cache of values being affected by this assumption (i.e.
/// the values about which this assumption provides information).
void updateAffectedValues(CallInst *CI);
@@ -209,6 +212,10 @@ public:
/// existing cache will be returned.
AssumptionCache &getAssumptionCache(Function &F);
+ /// Return the cached assumptions for a function if it has already been
+ /// scanned. Otherwise return nullptr.
+ AssumptionCache *lookupAssumptionCache(Function &F);
+
AssumptionCacheTracker();
~AssumptionCacheTracker() override;
diff --git a/include/llvm/Analysis/BasicAliasAnalysis.h b/include/llvm/Analysis/BasicAliasAnalysis.h
index 820d7ac0935a..22e8c4b474cb 100644
--- a/include/llvm/Analysis/BasicAliasAnalysis.h
+++ b/include/llvm/Analysis/BasicAliasAnalysis.h
@@ -1,9 +1,8 @@
//===- BasicAliasAnalysis.h - Stateless, local Alias Analysis ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -82,14 +81,18 @@ public:
bool invalidate(Function &Fn, const PreservedAnalyses &PA,
FunctionAnalysisManager::Invalidator &Inv);
- AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB);
+ AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB,
+ AAQueryInfo &AAQI);
- ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc);
+ ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc,
+ AAQueryInfo &AAQI);
- ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2);
+ ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2,
+ AAQueryInfo &AAQI);
/// Chases pointers until we find a (constant global) or not.
- bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal);
+ bool pointsToConstantMemory(const MemoryLocation &Loc, AAQueryInfo &AAQI,
+ bool OrLocal);
/// Get the location associated with a pointer argument of a callsite.
ModRefInfo getArgModRefInfo(const CallBase *Call, unsigned ArgIdx);
@@ -141,11 +144,6 @@ private:
SmallVector<VariableGEPIndex, 4> VarIndices;
};
- /// Track alias queries to guard against recursion.
- using LocPair = std::pair<MemoryLocation, MemoryLocation>;
- using AliasCacheTy = SmallDenseMap<LocPair, AliasResult, 8>;
- AliasCacheTy AliasCache;
-
/// Tracks phi nodes we have visited.
///
/// When interpret "Value" pointer equality as value equality we need to make
@@ -200,22 +198,24 @@ private:
AliasResult aliasGEP(const GEPOperator *V1, LocationSize V1Size,
const AAMDNodes &V1AAInfo, const Value *V2,
LocationSize V2Size, const AAMDNodes &V2AAInfo,
- const Value *UnderlyingV1, const Value *UnderlyingV2);
+ const Value *UnderlyingV1, const Value *UnderlyingV2,
+ AAQueryInfo &AAQI);
AliasResult aliasPHI(const PHINode *PN, LocationSize PNSize,
const AAMDNodes &PNAAInfo, const Value *V2,
LocationSize V2Size, const AAMDNodes &V2AAInfo,
- const Value *UnderV2);
+ const Value *UnderV2, AAQueryInfo &AAQI);
AliasResult aliasSelect(const SelectInst *SI, LocationSize SISize,
const AAMDNodes &SIAAInfo, const Value *V2,
LocationSize V2Size, const AAMDNodes &V2AAInfo,
- const Value *UnderV2);
+ const Value *UnderV2, AAQueryInfo &AAQI);
AliasResult aliasCheck(const Value *V1, LocationSize V1Size,
AAMDNodes V1AATag, const Value *V2,
LocationSize V2Size, AAMDNodes V2AATag,
- const Value *O1 = nullptr, const Value *O2 = nullptr);
+ AAQueryInfo &AAQI, const Value *O1 = nullptr,
+ const Value *O2 = nullptr);
};
/// Analysis pass providing a never-invalidated alias analysis result.
diff --git a/include/llvm/Analysis/BlockFrequencyInfo.h b/include/llvm/Analysis/BlockFrequencyInfo.h
index 0b2618735697..8bcfd7ff8f58 100644
--- a/include/llvm/Analysis/BlockFrequencyInfo.h
+++ b/include/llvm/Analysis/BlockFrequencyInfo.h
@@ -1,9 +1,8 @@
//===- BlockFrequencyInfo.h - Block Frequency Analysis ----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -68,7 +67,8 @@ public:
/// Returns the estimated profile count of \p BB.
/// This computes the relative block frequency of \p BB and multiplies it by
/// the enclosing function's count (if available) and returns the value.
- Optional<uint64_t> getBlockProfileCount(const BasicBlock *BB) const;
+ Optional<uint64_t> getBlockProfileCount(const BasicBlock *BB,
+ bool AllowSynthetic = false) const;
/// Returns the estimated profile count of \p Freq.
/// This uses the frequency \p Freq and multiplies it by
diff --git a/include/llvm/Analysis/BlockFrequencyInfoImpl.h b/include/llvm/Analysis/BlockFrequencyInfoImpl.h
index 25b2efd33c98..bfe4fb14a2b8 100644
--- a/include/llvm/Analysis/BlockFrequencyInfoImpl.h
+++ b/include/llvm/Analysis/BlockFrequencyInfoImpl.h
@@ -1,9 +1,8 @@
//==- BlockFrequencyInfoImpl.h - Block Frequency Implementation --*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -160,10 +159,6 @@ inline raw_ostream &operator<<(raw_ostream &OS, BlockMass X) {
} // end namespace bfi_detail
-template <> struct isPodLike<bfi_detail::BlockMass> {
- static const bool value = true;
-};
-
/// Base class for BlockFrequencyInfoImpl
///
/// BlockFrequencyInfoImplBase has supporting data structures and some
@@ -187,9 +182,9 @@ public:
struct BlockNode {
using IndexType = uint32_t;
- IndexType Index = std::numeric_limits<uint32_t>::max();
+ IndexType Index;
- BlockNode() = default;
+ BlockNode() : Index(std::numeric_limits<uint32_t>::max()) {}
BlockNode(IndexType Index) : Index(Index) {}
bool operator==(const BlockNode &X) const { return Index == X.Index; }
@@ -525,9 +520,11 @@ public:
BlockFrequency getBlockFreq(const BlockNode &Node) const;
Optional<uint64_t> getBlockProfileCount(const Function &F,
- const BlockNode &Node) const;
+ const BlockNode &Node,
+ bool AllowSynthetic = false) const;
Optional<uint64_t> getProfileCountFromFreq(const Function &F,
- uint64_t Freq) const;
+ uint64_t Freq,
+ bool AllowSynthetic = false) const;
bool isIrrLoopHeader(const BlockNode &Node);
void setBlockFreq(const BlockNode &Node, uint64_t Freq);
@@ -973,13 +970,17 @@ public:
}
Optional<uint64_t> getBlockProfileCount(const Function &F,
- const BlockT *BB) const {
- return BlockFrequencyInfoImplBase::getBlockProfileCount(F, getNode(BB));
+ const BlockT *BB,
+ bool AllowSynthetic = false) const {
+ return BlockFrequencyInfoImplBase::getBlockProfileCount(F, getNode(BB),
+ AllowSynthetic);
}
Optional<uint64_t> getProfileCountFromFreq(const Function &F,
- uint64_t Freq) const {
- return BlockFrequencyInfoImplBase::getProfileCountFromFreq(F, Freq);
+ uint64_t Freq,
+ bool AllowSynthetic = false) const {
+ return BlockFrequencyInfoImplBase::getProfileCountFromFreq(F, Freq,
+ AllowSynthetic);
}
bool isIrrLoopHeader(const BlockT *BB) {
diff --git a/include/llvm/Analysis/BranchProbabilityInfo.h b/include/llvm/Analysis/BranchProbabilityInfo.h
index 45277db46090..97cb730d16c7 100644
--- a/include/llvm/Analysis/BranchProbabilityInfo.h
+++ b/include/llvm/Analysis/BranchProbabilityInfo.h
@@ -1,9 +1,8 @@
//===- BranchProbabilityInfo.h - Branch Probability Analysis ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Analysis/CFG.h b/include/llvm/Analysis/CFG.h
index caae0b6e2a8f..bb55e76ac86a 100644
--- a/include/llvm/Analysis/CFG.h
+++ b/include/llvm/Analysis/CFG.h
@@ -1,9 +1,8 @@
//===-- Analysis/CFG.h - BasicBlock Analyses --------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -48,8 +47,8 @@ unsigned GetSuccessorNumber(const BasicBlock *BB, const BasicBlock *Succ);
bool isCriticalEdge(const Instruction *TI, unsigned SuccNum,
bool AllowIdenticalEdges = false);
-/// Determine whether instruction 'To' is reachable from 'From',
-/// returning true if uncertain.
+/// Determine whether instruction 'To' is reachable from 'From', without passing
+/// through any blocks in ExclusionSet, returning true if uncertain.
///
/// Determine whether there is a path from From to To within a single function.
/// Returns false only if we can prove that once 'From' has been executed then
@@ -63,9 +62,10 @@ bool isCriticalEdge(const Instruction *TI, unsigned SuccNum,
/// we find a block that dominates the block containing 'To'. DT is most useful
/// on branchy code but not loops, and LI is most useful on code with loops but
/// does not help on branchy code outside loops.
-bool isPotentiallyReachable(const Instruction *From, const Instruction *To,
- const DominatorTree *DT = nullptr,
- const LoopInfo *LI = nullptr);
+bool isPotentiallyReachable(
+ const Instruction *From, const Instruction *To,
+ const SmallPtrSetImpl<BasicBlock *> *ExclusionSet = nullptr,
+ const DominatorTree *DT = nullptr, const LoopInfo *LI = nullptr);
/// Determine whether block 'To' is reachable from 'From', returning
/// true if uncertain.
@@ -89,6 +89,20 @@ bool isPotentiallyReachableFromMany(SmallVectorImpl<BasicBlock *> &Worklist,
const DominatorTree *DT = nullptr,
const LoopInfo *LI = nullptr);
+/// Determine whether there is at least one path from a block in
+/// 'Worklist' to 'StopBB' without passing through any blocks in
+/// 'ExclusionSet', returning true if uncertain.
+///
+/// Determine whether there is a path from at least one block in Worklist to
+/// StopBB within a single function without passing through any of the blocks
+/// in 'ExclusionSet'. Returns false only if we can prove that once any block
+/// in 'Worklist' has been reached then 'StopBB' can not be executed.
+/// Conservatively returns true.
+bool isPotentiallyReachableFromMany(
+ SmallVectorImpl<BasicBlock *> &Worklist, BasicBlock *StopBB,
+ const SmallPtrSetImpl<BasicBlock *> *ExclusionSet,
+ const DominatorTree *DT = nullptr, const LoopInfo *LI = nullptr);
+
/// Return true if the control flow in \p RPOTraversal is irreducible.
///
/// This is a generic implementation to detect CFG irreducibility based on loop
diff --git a/include/llvm/Analysis/CFGPrinter.h b/include/llvm/Analysis/CFGPrinter.h
index 5996dd90bcfd..aaefc11653dd 100644
--- a/include/llvm/Analysis/CFGPrinter.h
+++ b/include/llvm/Analysis/CFGPrinter.h
@@ -1,9 +1,8 @@
//===-- CFGPrinter.h - CFG printer external interface -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Analysis/CFLAliasAnalysisUtils.h b/include/llvm/Analysis/CFLAliasAnalysisUtils.h
index 981a8ddc2289..02f999a5b913 100644
--- a/include/llvm/Analysis/CFLAliasAnalysisUtils.h
+++ b/include/llvm/Analysis/CFLAliasAnalysisUtils.h
@@ -1,9 +1,8 @@
//=- CFLAliasAnalysisUtils.h - Utilities for CFL Alias Analysis ----*- C++-*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// \file
diff --git a/include/llvm/Analysis/CFLAndersAliasAnalysis.h b/include/llvm/Analysis/CFLAndersAliasAnalysis.h
index 8ae72553ab94..7c8b42b1d8d2 100644
--- a/include/llvm/Analysis/CFLAndersAliasAnalysis.h
+++ b/include/llvm/Analysis/CFLAndersAliasAnalysis.h
@@ -1,9 +1,8 @@
//==- CFLAndersAliasAnalysis.h - Unification-based Alias Analysis -*- C++-*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -61,7 +60,8 @@ public:
const cflaa::AliasSummary *getAliasSummary(const Function &);
AliasResult query(const MemoryLocation &, const MemoryLocation &);
- AliasResult alias(const MemoryLocation &, const MemoryLocation &);
+ AliasResult alias(const MemoryLocation &, const MemoryLocation &,
+ AAQueryInfo &);
private:
/// Ensures that the given function is available in the cache.
diff --git a/include/llvm/Analysis/CFLSteensAliasAnalysis.h b/include/llvm/Analysis/CFLSteensAliasAnalysis.h
index 09e366f11e18..cc7a47cd9a5f 100644
--- a/include/llvm/Analysis/CFLSteensAliasAnalysis.h
+++ b/include/llvm/Analysis/CFLSteensAliasAnalysis.h
@@ -1,9 +1,8 @@
//==- CFLSteensAliasAnalysis.h - Unification-based Alias Analysis -*- C++-*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -70,7 +69,8 @@ public:
AliasResult query(const MemoryLocation &LocA, const MemoryLocation &LocB);
- AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB) {
+ AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB,
+ AAQueryInfo &AAQI) {
if (LocA.Ptr == LocB.Ptr)
return MustAlias;
@@ -80,11 +80,11 @@ public:
// ConstantExpr, but every query needs to have at least one Value tied to a
// Function, and neither GlobalValues nor ConstantExprs are.
if (isa<Constant>(LocA.Ptr) && isa<Constant>(LocB.Ptr))
- return AAResultBase::alias(LocA, LocB);
+ return AAResultBase::alias(LocA, LocB, AAQI);
AliasResult QueryResult = query(LocA, LocB);
if (QueryResult == MayAlias)
- return AAResultBase::alias(LocA, LocB);
+ return AAResultBase::alias(LocA, LocB, AAQI);
return QueryResult;
}
diff --git a/include/llvm/Analysis/CGSCCPassManager.h b/include/llvm/Analysis/CGSCCPassManager.h
index 61b99f6c3e6b..8af5fb86995a 100644
--- a/include/llvm/Analysis/CGSCCPassManager.h
+++ b/include/llvm/Analysis/CGSCCPassManager.h
@@ -1,9 +1,8 @@
//===- CGSCCPassManager.h - Call graph pass management ----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -292,6 +291,21 @@ struct CGSCCUpdateResult {
/// post-order walk.
LazyCallGraph::SCC *UpdatedC;
+ /// Preserved analyses across SCCs.
+ ///
+ /// We specifically want to allow CGSCC passes to mutate ancestor IR
+ /// (changing both the CG structure and the function IR itself). However,
+ /// this means we need to take special care to correctly mark what analyses
+ /// are preserved *across* SCCs. We have to track this out-of-band here
+ /// because within the main `PassManeger` infrastructure we need to mark
+ /// everything within an SCC as preserved in order to avoid repeatedly
+ /// invalidating the same analyses as we unnest pass managers and adaptors.
+ /// So we track the cross-SCC version of the preserved analyses here from any
+ /// code that does direct invalidation of SCC analyses, and then use it
+ /// whenever we move forward in the post-order walk of SCCs before running
+ /// passes over the new SCC.
+ PreservedAnalyses CrossSCCPA;
+
/// A hacky area where the inliner can retain history about inlining
/// decisions that mutated the call graph's SCC structure in order to avoid
/// infinite inlining. See the comments in the inliner's CG update logic.
@@ -339,175 +353,7 @@ public:
}
/// Runs the CGSCC pass across every SCC in the module.
- PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM) {
- // Setup the CGSCC analysis manager from its proxy.
- CGSCCAnalysisManager &CGAM =
- AM.getResult<CGSCCAnalysisManagerModuleProxy>(M).getManager();
-
- // Get the call graph for this module.
- LazyCallGraph &CG = AM.getResult<LazyCallGraphAnalysis>(M);
-
- // We keep worklists to allow us to push more work onto the pass manager as
- // the passes are run.
- SmallPriorityWorklist<LazyCallGraph::RefSCC *, 1> RCWorklist;
- SmallPriorityWorklist<LazyCallGraph::SCC *, 1> CWorklist;
-
- // Keep sets for invalidated SCCs and RefSCCs that should be skipped when
- // iterating off the worklists.
- SmallPtrSet<LazyCallGraph::RefSCC *, 4> InvalidRefSCCSet;
- SmallPtrSet<LazyCallGraph::SCC *, 4> InvalidSCCSet;
-
- SmallDenseSet<std::pair<LazyCallGraph::Node *, LazyCallGraph::SCC *>, 4>
- InlinedInternalEdges;
-
- CGSCCUpdateResult UR = {RCWorklist, CWorklist, InvalidRefSCCSet,
- InvalidSCCSet, nullptr, nullptr,
- InlinedInternalEdges};
-
- // Request PassInstrumentation from analysis manager, will use it to run
- // instrumenting callbacks for the passes later.
- PassInstrumentation PI = AM.getResult<PassInstrumentationAnalysis>(M);
-
- PreservedAnalyses PA = PreservedAnalyses::all();
- CG.buildRefSCCs();
- for (auto RCI = CG.postorder_ref_scc_begin(),
- RCE = CG.postorder_ref_scc_end();
- RCI != RCE;) {
- assert(RCWorklist.empty() &&
- "Should always start with an empty RefSCC worklist");
- // The postorder_ref_sccs range we are walking is lazily constructed, so
- // we only push the first one onto the worklist. The worklist allows us
- // to capture *new* RefSCCs created during transformations.
- //
- // We really want to form RefSCCs lazily because that makes them cheaper
- // to update as the program is simplified and allows us to have greater
- // cache locality as forming a RefSCC touches all the parts of all the
- // functions within that RefSCC.
- //
- // We also eagerly increment the iterator to the next position because
- // the CGSCC passes below may delete the current RefSCC.
- RCWorklist.insert(&*RCI++);
-
- do {
- LazyCallGraph::RefSCC *RC = RCWorklist.pop_back_val();
- if (InvalidRefSCCSet.count(RC)) {
- LLVM_DEBUG(dbgs() << "Skipping an invalid RefSCC...\n");
- continue;
- }
-
- assert(CWorklist.empty() &&
- "Should always start with an empty SCC worklist");
-
- LLVM_DEBUG(dbgs() << "Running an SCC pass across the RefSCC: " << *RC
- << "\n");
-
- // Push the initial SCCs in reverse post-order as we'll pop off the
- // back and so see this in post-order.
- for (LazyCallGraph::SCC &C : llvm::reverse(*RC))
- CWorklist.insert(&C);
-
- do {
- LazyCallGraph::SCC *C = CWorklist.pop_back_val();
- // Due to call graph mutations, we may have invalid SCCs or SCCs from
- // other RefSCCs in the worklist. The invalid ones are dead and the
- // other RefSCCs should be queued above, so we just need to skip both
- // scenarios here.
- if (InvalidSCCSet.count(C)) {
- LLVM_DEBUG(dbgs() << "Skipping an invalid SCC...\n");
- continue;
- }
- if (&C->getOuterRefSCC() != RC) {
- LLVM_DEBUG(dbgs()
- << "Skipping an SCC that is now part of some other "
- "RefSCC...\n");
- continue;
- }
-
- do {
- // Check that we didn't miss any update scenario.
- assert(!InvalidSCCSet.count(C) && "Processing an invalid SCC!");
- assert(C->begin() != C->end() && "Cannot have an empty SCC!");
- assert(&C->getOuterRefSCC() == RC &&
- "Processing an SCC in a different RefSCC!");
-
- UR.UpdatedRC = nullptr;
- UR.UpdatedC = nullptr;
-
- // Check the PassInstrumentation's BeforePass callbacks before
- // running the pass, skip its execution completely if asked to
- // (callback returns false).
- if (!PI.runBeforePass<LazyCallGraph::SCC>(Pass, *C))
- continue;
-
- PreservedAnalyses PassPA = Pass.run(*C, CGAM, CG, UR);
-
- if (UR.InvalidatedSCCs.count(C))
- PI.runAfterPassInvalidated<LazyCallGraph::SCC>(Pass);
- else
- PI.runAfterPass<LazyCallGraph::SCC>(Pass, *C);
-
- // Update the SCC and RefSCC if necessary.
- C = UR.UpdatedC ? UR.UpdatedC : C;
- RC = UR.UpdatedRC ? UR.UpdatedRC : RC;
-
- // If the CGSCC pass wasn't able to provide a valid updated SCC,
- // the current SCC may simply need to be skipped if invalid.
- if (UR.InvalidatedSCCs.count(C)) {
- LLVM_DEBUG(dbgs()
- << "Skipping invalidated root or island SCC!\n");
- break;
- }
- // Check that we didn't miss any update scenario.
- assert(C->begin() != C->end() && "Cannot have an empty SCC!");
-
- // We handle invalidating the CGSCC analysis manager's information
- // for the (potentially updated) SCC here. Note that any other SCCs
- // whose structure has changed should have been invalidated by
- // whatever was updating the call graph. This SCC gets invalidated
- // late as it contains the nodes that were actively being
- // processed.
- CGAM.invalidate(*C, PassPA);
-
- // Then intersect the preserved set so that invalidation of module
- // analyses will eventually occur when the module pass completes.
- PA.intersect(std::move(PassPA));
-
- // The pass may have restructured the call graph and refined the
- // current SCC and/or RefSCC. We need to update our current SCC and
- // RefSCC pointers to follow these. Also, when the current SCC is
- // refined, re-run the SCC pass over the newly refined SCC in order
- // to observe the most precise SCC model available. This inherently
- // cannot cycle excessively as it only happens when we split SCCs
- // apart, at most converging on a DAG of single nodes.
- // FIXME: If we ever start having RefSCC passes, we'll want to
- // iterate there too.
- if (UR.UpdatedC)
- LLVM_DEBUG(dbgs()
- << "Re-running SCC passes after a refinement of the "
- "current SCC: "
- << *UR.UpdatedC << "\n");
-
- // Note that both `C` and `RC` may at this point refer to deleted,
- // invalid SCC and RefSCCs respectively. But we will short circuit
- // the processing when we check them in the loop above.
- } while (UR.UpdatedC);
- } while (!CWorklist.empty());
-
- // We only need to keep internal inlined edge information within
- // a RefSCC, clear it to save on space and let the next time we visit
- // any of these functions have a fresh start.
- InlinedInternalEdges.clear();
- } while (!RCWorklist.empty());
- }
-
- // By definition we preserve the call garph, all SCC analyses, and the
- // analysis proxies by handling them above and in any nested pass managers.
- PA.preserveSet<AllAnalysesOn<LazyCallGraph::SCC>>();
- PA.preserve<LazyCallGraphAnalysis>();
- PA.preserve<CGSCCAnalysisManagerModuleProxy>();
- PA.preserve<FunctionAnalysisManagerModuleProxy>();
- return PA;
- }
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
private:
CGSCCPassT Pass;
@@ -873,6 +719,210 @@ DevirtSCCRepeatedPass<PassT> createDevirtSCCRepeatedPass(PassT Pass,
return DevirtSCCRepeatedPass<PassT>(std::move(Pass), MaxIterations);
}
+// Out-of-line implementation details for templates below this point.
+
+template <typename CGSCCPassT>
+PreservedAnalyses
+ModuleToPostOrderCGSCCPassAdaptor<CGSCCPassT>::run(Module &M,
+ ModuleAnalysisManager &AM) {
+ // Setup the CGSCC analysis manager from its proxy.
+ CGSCCAnalysisManager &CGAM =
+ AM.getResult<CGSCCAnalysisManagerModuleProxy>(M).getManager();
+
+ // Get the call graph for this module.
+ LazyCallGraph &CG = AM.getResult<LazyCallGraphAnalysis>(M);
+
+ // We keep worklists to allow us to push more work onto the pass manager as
+ // the passes are run.
+ SmallPriorityWorklist<LazyCallGraph::RefSCC *, 1> RCWorklist;
+ SmallPriorityWorklist<LazyCallGraph::SCC *, 1> CWorklist;
+
+ // Keep sets for invalidated SCCs and RefSCCs that should be skipped when
+ // iterating off the worklists.
+ SmallPtrSet<LazyCallGraph::RefSCC *, 4> InvalidRefSCCSet;
+ SmallPtrSet<LazyCallGraph::SCC *, 4> InvalidSCCSet;
+
+ SmallDenseSet<std::pair<LazyCallGraph::Node *, LazyCallGraph::SCC *>, 4>
+ InlinedInternalEdges;
+
+ CGSCCUpdateResult UR = {
+ RCWorklist, CWorklist, InvalidRefSCCSet, InvalidSCCSet,
+ nullptr, nullptr, PreservedAnalyses::all(), InlinedInternalEdges};
+
+ // Request PassInstrumentation from analysis manager, will use it to run
+ // instrumenting callbacks for the passes later.
+ PassInstrumentation PI = AM.getResult<PassInstrumentationAnalysis>(M);
+
+ PreservedAnalyses PA = PreservedAnalyses::all();
+ CG.buildRefSCCs();
+ for (auto RCI = CG.postorder_ref_scc_begin(),
+ RCE = CG.postorder_ref_scc_end();
+ RCI != RCE;) {
+ assert(RCWorklist.empty() &&
+ "Should always start with an empty RefSCC worklist");
+ // The postorder_ref_sccs range we are walking is lazily constructed, so
+ // we only push the first one onto the worklist. The worklist allows us
+ // to capture *new* RefSCCs created during transformations.
+ //
+ // We really want to form RefSCCs lazily because that makes them cheaper
+ // to update as the program is simplified and allows us to have greater
+ // cache locality as forming a RefSCC touches all the parts of all the
+ // functions within that RefSCC.
+ //
+ // We also eagerly increment the iterator to the next position because
+ // the CGSCC passes below may delete the current RefSCC.
+ RCWorklist.insert(&*RCI++);
+
+ do {
+ LazyCallGraph::RefSCC *RC = RCWorklist.pop_back_val();
+ if (InvalidRefSCCSet.count(RC)) {
+ LLVM_DEBUG(dbgs() << "Skipping an invalid RefSCC...\n");
+ continue;
+ }
+
+ assert(CWorklist.empty() &&
+ "Should always start with an empty SCC worklist");
+
+ LLVM_DEBUG(dbgs() << "Running an SCC pass across the RefSCC: " << *RC
+ << "\n");
+
+ // Push the initial SCCs in reverse post-order as we'll pop off the
+ // back and so see this in post-order.
+ for (LazyCallGraph::SCC &C : llvm::reverse(*RC))
+ CWorklist.insert(&C);
+
+ do {
+ LazyCallGraph::SCC *C = CWorklist.pop_back_val();
+ // Due to call graph mutations, we may have invalid SCCs or SCCs from
+ // other RefSCCs in the worklist. The invalid ones are dead and the
+ // other RefSCCs should be queued above, so we just need to skip both
+ // scenarios here.
+ if (InvalidSCCSet.count(C)) {
+ LLVM_DEBUG(dbgs() << "Skipping an invalid SCC...\n");
+ continue;
+ }
+ if (&C->getOuterRefSCC() != RC) {
+ LLVM_DEBUG(dbgs() << "Skipping an SCC that is now part of some other "
+ "RefSCC...\n");
+ continue;
+ }
+
+ // Ensure we can proxy analysis updates from from the CGSCC analysis
+ // manager into the Function analysis manager by getting a proxy here.
+ // FIXME: This seems like a bit of a hack. We should find a cleaner
+ // or more costructive way to ensure this happens.
+ (void)CGAM.getResult<FunctionAnalysisManagerCGSCCProxy>(*C, CG);
+
+ // Each time we visit a new SCC pulled off the worklist,
+ // a transformation of a child SCC may have also modified this parent
+ // and invalidated analyses. So we invalidate using the update record's
+ // cross-SCC preserved set. This preserved set is intersected by any
+ // CGSCC pass that handles invalidation (primarily pass managers) prior
+ // to marking its SCC as preserved. That lets us track everything that
+ // might need invalidation across SCCs without excessive invalidations
+ // on a single SCC.
+ //
+ // This essentially allows SCC passes to freely invalidate analyses
+ // of any ancestor SCC. If this becomes detrimental to successfully
+ // caching analyses, we could force each SCC pass to manually
+ // invalidate the analyses for any SCCs other than themselves which
+ // are mutated. However, that seems to lose the robustness of the
+ // pass-manager driven invalidation scheme.
+ //
+ // FIXME: This is redundant in one case -- the top of the worklist may
+ // *also* be the same SCC we just ran over (and invalidated for). In
+ // that case, we'll end up doing a redundant invalidation here as
+ // a consequence.
+ CGAM.invalidate(*C, UR.CrossSCCPA);
+
+ do {
+ // Check that we didn't miss any update scenario.
+ assert(!InvalidSCCSet.count(C) && "Processing an invalid SCC!");
+ assert(C->begin() != C->end() && "Cannot have an empty SCC!");
+ assert(&C->getOuterRefSCC() == RC &&
+ "Processing an SCC in a different RefSCC!");
+
+ UR.UpdatedRC = nullptr;
+ UR.UpdatedC = nullptr;
+
+ // Check the PassInstrumentation's BeforePass callbacks before
+ // running the pass, skip its execution completely if asked to
+ // (callback returns false).
+ if (!PI.runBeforePass<LazyCallGraph::SCC>(Pass, *C))
+ continue;
+
+ PreservedAnalyses PassPA = Pass.run(*C, CGAM, CG, UR);
+
+ if (UR.InvalidatedSCCs.count(C))
+ PI.runAfterPassInvalidated<LazyCallGraph::SCC>(Pass);
+ else
+ PI.runAfterPass<LazyCallGraph::SCC>(Pass, *C);
+
+ // Update the SCC and RefSCC if necessary.
+ C = UR.UpdatedC ? UR.UpdatedC : C;
+ RC = UR.UpdatedRC ? UR.UpdatedRC : RC;
+
+ // If the CGSCC pass wasn't able to provide a valid updated SCC,
+ // the current SCC may simply need to be skipped if invalid.
+ if (UR.InvalidatedSCCs.count(C)) {
+ LLVM_DEBUG(dbgs() << "Skipping invalidated root or island SCC!\n");
+ break;
+ }
+ // Check that we didn't miss any update scenario.
+ assert(C->begin() != C->end() && "Cannot have an empty SCC!");
+
+ // We handle invalidating the CGSCC analysis manager's information
+ // for the (potentially updated) SCC here. Note that any other SCCs
+ // whose structure has changed should have been invalidated by
+ // whatever was updating the call graph. This SCC gets invalidated
+ // late as it contains the nodes that were actively being
+ // processed.
+ CGAM.invalidate(*C, PassPA);
+
+ // Then intersect the preserved set so that invalidation of module
+ // analyses will eventually occur when the module pass completes.
+ // Also intersect with the cross-SCC preserved set to capture any
+ // cross-SCC invalidation.
+ UR.CrossSCCPA.intersect(PassPA);
+ PA.intersect(std::move(PassPA));
+
+ // The pass may have restructured the call graph and refined the
+ // current SCC and/or RefSCC. We need to update our current SCC and
+ // RefSCC pointers to follow these. Also, when the current SCC is
+ // refined, re-run the SCC pass over the newly refined SCC in order
+ // to observe the most precise SCC model available. This inherently
+ // cannot cycle excessively as it only happens when we split SCCs
+ // apart, at most converging on a DAG of single nodes.
+ // FIXME: If we ever start having RefSCC passes, we'll want to
+ // iterate there too.
+ if (UR.UpdatedC)
+ LLVM_DEBUG(dbgs()
+ << "Re-running SCC passes after a refinement of the "
+ "current SCC: "
+ << *UR.UpdatedC << "\n");
+
+ // Note that both `C` and `RC` may at this point refer to deleted,
+ // invalid SCC and RefSCCs respectively. But we will short circuit
+ // the processing when we check them in the loop above.
+ } while (UR.UpdatedC);
+ } while (!CWorklist.empty());
+
+ // We only need to keep internal inlined edge information within
+ // a RefSCC, clear it to save on space and let the next time we visit
+ // any of these functions have a fresh start.
+ InlinedInternalEdges.clear();
+ } while (!RCWorklist.empty());
+ }
+
+ // By definition we preserve the call garph, all SCC analyses, and the
+ // analysis proxies by handling them above and in any nested pass managers.
+ PA.preserveSet<AllAnalysesOn<LazyCallGraph::SCC>>();
+ PA.preserve<LazyCallGraphAnalysis>();
+ PA.preserve<CGSCCAnalysisManagerModuleProxy>();
+ PA.preserve<FunctionAnalysisManagerModuleProxy>();
+ return PA;
+}
+
// Clear out the debug logging macro.
#undef DEBUG_TYPE
diff --git a/include/llvm/Analysis/CallGraph.h b/include/llvm/Analysis/CallGraph.h
index f109cf2fac4d..7a10183c4d91 100644
--- a/include/llvm/Analysis/CallGraph.h
+++ b/include/llvm/Analysis/CallGraph.h
@@ -1,9 +1,8 @@
//===- CallGraph.h - Build a Module's call graph ----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -48,8 +47,8 @@
#include "llvm/ADT/GraphTraits.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/ValueHandle.h"
@@ -230,11 +229,11 @@ public:
}
/// Adds a function to the list of functions called by this one.
- void addCalledFunction(CallSite CS, CallGraphNode *M) {
- assert(!CS.getInstruction() || !CS.getCalledFunction() ||
- !CS.getCalledFunction()->isIntrinsic() ||
- !Intrinsic::isLeaf(CS.getCalledFunction()->getIntrinsicID()));
- CalledFunctions.emplace_back(CS.getInstruction(), M);
+ void addCalledFunction(CallBase *Call, CallGraphNode *M) {
+ assert(!Call || !Call->getCalledFunction() ||
+ !Call->getCalledFunction()->isIntrinsic() ||
+ !Intrinsic::isLeaf(Call->getCalledFunction()->getIntrinsicID()));
+ CalledFunctions.emplace_back(Call, M);
M->AddRef();
}
@@ -247,7 +246,7 @@ public:
/// Removes the edge in the node for the specified call site.
///
/// Note that this method takes linear time, so it should be used sparingly.
- void removeCallEdgeFor(CallSite CS);
+ void removeCallEdgeFor(CallBase &Call);
/// Removes all call edges from this node to the specified callee
/// function.
@@ -264,7 +263,8 @@ public:
/// new one.
///
/// Note that this method takes linear time, so it should be used sparingly.
- void replaceCallEdge(CallSite CS, CallSite NewCS, CallGraphNode *NewNode);
+ void replaceCallEdge(CallBase &Call, CallBase &NewCall,
+ CallGraphNode *NewNode);
private:
friend class CallGraph;
diff --git a/include/llvm/Analysis/CallGraphSCCPass.h b/include/llvm/Analysis/CallGraphSCCPass.h
index ace54607634c..1b5b7e2f039e 100644
--- a/include/llvm/Analysis/CallGraphSCCPass.h
+++ b/include/llvm/Analysis/CallGraphSCCPass.h
@@ -1,9 +1,8 @@
//===- CallGraphSCCPass.h - Pass that operates BU on call graph -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Analysis/CallPrinter.h b/include/llvm/Analysis/CallPrinter.h
index 8b697d5aa149..8d4159f3ddc0 100644
--- a/include/llvm/Analysis/CallPrinter.h
+++ b/include/llvm/Analysis/CallPrinter.h
@@ -1,9 +1,8 @@
//===-- CallPrinter.h - Call graph printer external interface ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Analysis/CaptureTracking.h b/include/llvm/Analysis/CaptureTracking.h
index aaaaff9ae252..ca7abd34fea2 100644
--- a/include/llvm/Analysis/CaptureTracking.h
+++ b/include/llvm/Analysis/CaptureTracking.h
@@ -1,9 +1,8 @@
//===----- llvm/Analysis/CaptureTracking.h - Pointer capture ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Analysis/CmpInstAnalysis.h b/include/llvm/Analysis/CmpInstAnalysis.h
index 0e9c6a96b0f4..3d34cd12aea4 100644
--- a/include/llvm/Analysis/CmpInstAnalysis.h
+++ b/include/llvm/Analysis/CmpInstAnalysis.h
@@ -1,9 +1,8 @@
//===-- CmpInstAnalysis.h - Utils to help fold compare insts ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Analysis/CodeMetrics.h b/include/llvm/Analysis/CodeMetrics.h
index 752902238522..1482b66a3080 100644
--- a/include/llvm/Analysis/CodeMetrics.h
+++ b/include/llvm/Analysis/CodeMetrics.h
@@ -1,9 +1,8 @@
//===- CodeMetrics.h - Code cost measurements -------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -17,7 +16,6 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/IR/CallSite.h"
namespace llvm {
class AssumptionCache;
@@ -29,14 +27,6 @@ class DataLayout;
class TargetTransformInfo;
class Value;
-/// Check whether a call will lower to something small.
-///
-/// This tests checks whether this callsite will lower to something
-/// significantly cheaper than a traditional call, often a single
-/// instruction. Note that if isInstructionFree(CS.getInstruction()) would
-/// return true, so will this function.
-bool callIsSmall(ImmutableCallSite CS);
-
/// Utility to calculate the size and a few similar metrics for a set
/// of basic blocks.
struct CodeMetrics {
diff --git a/include/llvm/Analysis/ConstantFolding.h b/include/llvm/Analysis/ConstantFolding.h
index 192c1abddcd2..2385b6f09c40 100644
--- a/include/llvm/Analysis/ConstantFolding.h
+++ b/include/llvm/Analysis/ConstantFolding.h
@@ -1,9 +1,8 @@
//===-- ConstantFolding.h - Fold instructions into constants ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -23,7 +22,7 @@
namespace llvm {
class APInt;
template <typename T> class ArrayRef;
-class CallSite;
+class CallBase;
class Constant;
class ConstantExpr;
class ConstantVector;
@@ -31,7 +30,6 @@ class DataLayout;
class Function;
class GlobalValue;
class Instruction;
-class ImmutableCallSite;
class TargetLibraryInfo;
class Type;
@@ -73,6 +71,12 @@ ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS,
Constant *RHS, const DataLayout &DL,
const TargetLibraryInfo *TLI = nullptr);
+/// Attempt to constant fold a unary operation with the specified
+/// operand. If it fails, it returns a constant expression of the specified
+/// operands.
+Constant *ConstantFoldUnaryOpOperand(unsigned Opcode, Constant *Op,
+ const DataLayout &DL);
+
/// Attempt to constant fold a binary operation with the specified
/// operands. If it fails, it returns a constant expression of the specified
/// operands.
@@ -139,11 +143,11 @@ Constant *ConstantFoldLoadThroughGEPIndices(Constant *C,
/// canConstantFoldCallTo - Return true if its even possible to fold a call to
/// the specified function.
-bool canConstantFoldCallTo(ImmutableCallSite CS, const Function *F);
+bool canConstantFoldCallTo(const CallBase *Call, const Function *F);
/// ConstantFoldCall - Attempt to constant fold a call to the specified function
/// with the specified arguments, returning null if unsuccessful.
-Constant *ConstantFoldCall(ImmutableCallSite CS, Function *F,
+Constant *ConstantFoldCall(const CallBase *Call, Function *F,
ArrayRef<Constant *> Operands,
const TargetLibraryInfo *TLI = nullptr);
@@ -155,7 +159,7 @@ Constant *ConstantFoldLoadThroughBitcast(Constant *C, Type *DestTy,
/// Check whether the given call has no side-effects.
/// Specifically checks for math routimes which sometimes set errno.
-bool isMathLibCallNoop(CallSite CS, const TargetLibraryInfo *TLI);
+bool isMathLibCallNoop(const CallBase *Call, const TargetLibraryInfo *TLI);
}
#endif
diff --git a/include/llvm/Analysis/DOTGraphTraitsPass.h b/include/llvm/Analysis/DOTGraphTraitsPass.h
index b7447a0547d5..0410a3314659 100644
--- a/include/llvm/Analysis/DOTGraphTraitsPass.h
+++ b/include/llvm/Analysis/DOTGraphTraitsPass.h
@@ -1,9 +1,8 @@
//===-- DOTGraphTraitsPass.h - Print/View dotty graphs-----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Analysis/DemandedBits.h b/include/llvm/Analysis/DemandedBits.h
index 4c4e3f6c99e7..04db3eb57c18 100644
--- a/include/llvm/Analysis/DemandedBits.h
+++ b/include/llvm/Analysis/DemandedBits.h
@@ -1,9 +1,8 @@
//===- llvm/Analysis/DemandedBits.h - Determine demanded bits ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Analysis/DependenceAnalysis.h b/include/llvm/Analysis/DependenceAnalysis.h
index 69d0e2c1513e..997013a5fc8e 100644
--- a/include/llvm/Analysis/DependenceAnalysis.h
+++ b/include/llvm/Analysis/DependenceAnalysis.h
@@ -1,9 +1,8 @@
//===-- llvm/Analysis/DependenceAnalysis.h -------------------- -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -275,6 +274,10 @@ template <typename T> class ArrayRef;
LoopInfo *LI)
: AA(AA), SE(SE), LI(LI), F(F) {}
+ /// Handle transitive invalidation when the cached analysis results go away.
+ bool invalidate(Function &F, const PreservedAnalyses &PA,
+ FunctionAnalysisManager::Invalidator &Inv);
+
/// depends - Tests for a dependence between the Src and Dst instructions.
/// Returns NULL if no dependence; otherwise, returns a Dependence (or a
/// FullDependence) with as much information as can be gleaned.
diff --git a/include/llvm/Analysis/DivergenceAnalysis.h b/include/llvm/Analysis/DivergenceAnalysis.h
index d834862db095..3cfb9d13df94 100644
--- a/include/llvm/Analysis/DivergenceAnalysis.h
+++ b/include/llvm/Analysis/DivergenceAnalysis.h
@@ -1,9 +1,8 @@
//===- llvm/Analysis/DivergenceAnalysis.h - Divergence Analysis -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Analysis/DomPrinter.h b/include/llvm/Analysis/DomPrinter.h
index 0ed28994995a..a177f877b295 100644
--- a/include/llvm/Analysis/DomPrinter.h
+++ b/include/llvm/Analysis/DomPrinter.h
@@ -1,9 +1,8 @@
//===-- DomPrinter.h - Dom printer external interface ------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/IR/DomTreeUpdater.h b/include/llvm/Analysis/DomTreeUpdater.h
index e5bb092d21ca..5ccce2e064cc 100644
--- a/include/llvm/IR/DomTreeUpdater.h
+++ b/include/llvm/Analysis/DomTreeUpdater.h
@@ -1,9 +1,8 @@
//===- DomTreeUpdater.h - DomTree/Post DomTree Updater ----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -12,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_DOMTREEUPDATER_H
-#define LLVM_DOMTREEUPDATER_H
+#ifndef LLVM_ANALYSIS_DOMTREEUPDATER_H
+#define LLVM_ANALYSIS_DOMTREEUPDATER_H
#include "llvm/Analysis/PostDominators.h"
#include "llvm/IR/Dominators.h"
@@ -83,61 +82,114 @@ public:
/// Returns false under Eager UpdateStrategy or PDT is nullptr.
bool hasPendingPostDomTreeUpdates() const;
- /// Apply updates on all available trees. Under Eager UpdateStrategy with
- /// ForceRemoveDuplicates enabled or under Lazy UpdateStrategy, it will
- /// discard duplicated updates and self-dominance updates. If both DT and PDT
- /// are nullptrs, this function discards all updates. The Eager Strategy
- /// applies the updates immediately while the Lazy Strategy queues the
- /// updates. It is required for the state of the LLVM IR to be updated
- /// *before* applying the Updates because the internal update routine will
- /// analyze the current state of the relationship between a pair of (From, To)
- /// BasicBlocks to determine whether a single update needs to be discarded.
- void applyUpdates(ArrayRef<DominatorTree::UpdateType> Updates,
- bool ForceRemoveDuplicates = false);
-
- /// Notify all available trees on an edge insertion. If both DT and PDT are
- /// nullptrs, this function discards the update. Under either Strategy,
- /// self-dominance update will be removed. The Eager Strategy applies
- /// the update immediately while the Lazy Strategy queues the update.
- /// It is recommended to only use this method when you have exactly one
- /// insertion (and no deletions). It is recommended to use applyUpdates() in
- /// all other cases. This function has to be called *after* making the update
- /// on the actual CFG. An internal functions checks if the edge exists in the
+ ///@{
+ /// \name Mutation APIs
+ ///
+ /// These methods provide APIs for submitting updates to the DominatorTree and
+ /// the PostDominatorTree.
+ ///
+ /// Note: There are two strategies to update the DominatorTree and the
+ /// PostDominatorTree:
+ /// 1. Eager UpdateStrategy: Updates are submitted and then flushed
+ /// immediately.
+ /// 2. Lazy UpdateStrategy: Updates are submitted but only flushed when you
+ /// explicitly call Flush APIs. It is recommended to use this update strategy
+ /// when you submit a bunch of updates multiple times which can then
+ /// add up to a large number of updates between two queries on the
+ /// DominatorTree. The incremental updater can reschedule the updates or
+ /// decide to recalculate the dominator tree in order to speedup the updating
+ /// process depending on the number of updates.
+ ///
+ /// Although GenericDomTree provides several update primitives,
+ /// it is not encouraged to use these APIs directly.
+
+ /// Submit updates to all available trees.
+ /// The Eager Strategy flushes updates immediately while the Lazy Strategy
+ /// queues the updates.
+ ///
+ /// Note: The "existence" of an edge in a CFG refers to the CFG which DTU is
+ /// in sync with + all updates before that single update.
+ ///
+ /// CAUTION!
+ /// 1. It is required for the state of the LLVM IR to be updated
+ /// *before* submitting the updates because the internal update routine will
+ /// analyze the current state of the CFG to determine whether an update
+ /// is valid.
+ /// 2. It is illegal to submit any update that has already been submitted,
+ /// i.e., you are supposed not to insert an existent edge or delete a
+ /// nonexistent edge.
+ void applyUpdates(ArrayRef<DominatorTree::UpdateType> Updates);
+
+ /// Submit updates to all available trees. It will also
+ /// 1. discard duplicated updates,
+ /// 2. remove invalid updates. (Invalid updates means deletion of an edge that
+ /// still exists or insertion of an edge that does not exist.)
+ /// The Eager Strategy flushes updates immediately while the Lazy Strategy
+ /// queues the updates.
+ ///
+ /// Note: The "existence" of an edge in a CFG refers to the CFG which DTU is
+ /// in sync with + all updates before that single update.
+ ///
+ /// CAUTION!
+ /// 1. It is required for the state of the LLVM IR to be updated
+ /// *before* submitting the updates because the internal update routine will
+ /// analyze the current state of the CFG to determine whether an update
+ /// is valid.
+ /// 2. It is illegal to submit any update that has already been submitted,
+ /// i.e., you are supposed not to insert an existent edge or delete a
+ /// nonexistent edge.
+ /// 3. It is only legal to submit updates to an edge in the order CFG changes
+ /// are made. The order you submit updates on different edges is not
+ /// restricted.
+ void applyUpdatesPermissive(ArrayRef<DominatorTree::UpdateType> Updates);
+
+ /// Notify DTU that the entry block was replaced.
+ /// Recalculate all available trees and flush all BasicBlocks
+ /// awaiting deletion immediately.
+ void recalculate(Function &F);
+
+ /// \deprecated { Submit an edge insertion to all available trees. The Eager
+ /// Strategy flushes this update immediately while the Lazy Strategy queues
+ /// the update. An internal function checks if the edge exists in the CFG in
+ /// DEBUG mode. CAUTION! This function has to be called *after* making the
+ /// update on the actual CFG. It is illegal to submit any update that has
+ /// already been applied. }
+ LLVM_ATTRIBUTE_DEPRECATED(void insertEdge(BasicBlock *From, BasicBlock *To),
+ "Use applyUpdates() instead.");
+
+ /// \deprecated {Submit an edge insertion to all available trees.
+ /// Under either Strategy, an invalid update will be discard silently.
+ /// Invalid update means inserting an edge that does not exist in the CFG.
+ /// The Eager Strategy flushes this update immediately while the Lazy Strategy
+ /// queues the update. It is only recommended to use this method when you
+ /// want to discard an invalid update.
+ /// CAUTION! It is illegal to submit any update that has already been
+ /// submitted. }
+ LLVM_ATTRIBUTE_DEPRECATED(void insertEdgeRelaxed(BasicBlock *From,
+ BasicBlock *To),
+ "Use applyUpdatesPermissive() instead.");
+
+ /// \deprecated { Submit an edge deletion to all available trees. The Eager
+ /// Strategy flushes this update immediately while the Lazy Strategy queues
+ /// the update. An internal function checks if the edge doesn't exist in the
/// CFG in DEBUG mode.
- void insertEdge(BasicBlock *From, BasicBlock *To);
-
- /// Notify all available trees on an edge insertion.
- /// Under either Strategy, the following updates will be discard silently
- /// 1. Invalid - Inserting an edge that does not exist in the CFG.
- /// 2. Self-dominance update.
- /// 3. Both DT and PDT are nullptrs.
- /// The Eager Strategy applies the update immediately while the Lazy Strategy
- /// queues the update. It is recommended to only use this method when you have
- /// exactly one insertion (and no deletions) and want to discard an invalid
- /// update.
- void insertEdgeRelaxed(BasicBlock *From, BasicBlock *To);
-
- /// Notify all available trees on an edge deletion. If both DT and PDT are
- /// nullptrs, this function discards the update. Under either Strategy,
- /// self-dominance update will be removed. The Eager Strategy applies
- /// the update immediately while the Lazy Strategy queues the update.
- /// It is recommended to only use this method when you have exactly one
- /// deletion (and no insertions). It is recommended to use applyUpdates() in
- /// all other cases. This function has to be called *after* making the update
- /// on the actual CFG. An internal functions checks if the edge doesn't exist
- /// in the CFG in DEBUG mode.
- void deleteEdge(BasicBlock *From, BasicBlock *To);
-
- /// Notify all available trees on an edge deletion.
- /// Under either Strategy, the following updates will be discard silently
- /// 1. Invalid - Deleting an edge that still exists in the CFG.
- /// 2. Self-dominance update.
- /// 3. Both DT and PDT are nullptrs.
- /// The Eager Strategy applies the update immediately while the Lazy Strategy
- /// queues the update. It is recommended to only use this method when you have
- /// exactly one deletion (and no insertions) and want to discard an invalid
- /// update.
- void deleteEdgeRelaxed(BasicBlock *From, BasicBlock *To);
+ /// CAUTION! This function has to be called *after* making the update on the
+ /// actual CFG. It is illegal to submit any update that has already been
+ /// submitted. }
+ LLVM_ATTRIBUTE_DEPRECATED(void deleteEdge(BasicBlock *From, BasicBlock *To),
+ "Use applyUpdates() instead.");
+
+ /// \deprecated { Submit an edge deletion to all available trees.
+ /// Under either Strategy, an invalid update will be discard silently.
+ /// Invalid update means deleting an edge that exists in the CFG.
+ /// The Eager Strategy flushes this update immediately while the Lazy Strategy
+ /// queues the update. It is only recommended to use this method when you
+ /// want to discard an invalid update.
+ /// CAUTION! It is illegal to submit any update that has already been
+ /// submitted. }
+ LLVM_ATTRIBUTE_DEPRECATED(void deleteEdgeRelaxed(BasicBlock *From,
+ BasicBlock *To),
+ "Use applyUpdatesPermissive() instead.");
/// Delete DelBB. DelBB will be removed from its Parent and
/// erased from available trees if it exists and finally get deleted.
@@ -159,27 +211,32 @@ public:
void callbackDeleteBB(BasicBlock *DelBB,
std::function<void(BasicBlock *)> Callback);
- /// Recalculate all available trees and flush all BasicBlocks
- /// awaiting deletion immediately.
- void recalculate(Function &F);
+ ///@}
+
+ ///@{
+ /// \name Flush APIs
+ ///
+ /// CAUTION! By the moment these flush APIs are called, the current CFG needs
+ /// to be the same as the CFG which DTU is in sync with + all updates
+ /// submitted.
/// Flush DomTree updates and return DomTree.
- /// It also flush out of date updates applied by all available trees
- /// and flush Deleted BBs if both trees are up-to-date.
+ /// It flushes Deleted BBs if both trees are up-to-date.
/// It must only be called when it has a DomTree.
DominatorTree &getDomTree();
/// Flush PostDomTree updates and return PostDomTree.
- /// It also flush out of date updates applied by all available trees
- /// and flush Deleted BBs if both trees are up-to-date.
+ /// It flushes Deleted BBs if both trees are up-to-date.
/// It must only be called when it has a PostDomTree.
PostDominatorTree &getPostDomTree();
/// Apply all pending updates to available trees and flush all BasicBlocks
/// awaiting deletion.
- /// Does nothing under Eager UpdateStrategy.
+
void flush();
+ ///@}
+
/// Debug method to help view the internal state of this class.
LLVM_DUMP_METHOD void dump() const;
@@ -221,11 +278,6 @@ private:
/// Returns true if at least one BasicBlock is deleted.
bool forceFlushDeletedBB();
- /// Deduplicate and remove unnecessary updates (no-ops) when using Lazy
- /// UpdateStrategy. Returns true if the update is queued for update.
- bool applyLazyUpdate(DominatorTree::UpdateKind Kind, BasicBlock *From,
- BasicBlock *To);
-
/// Helper function to apply all pending DomTree updates.
void applyDomTreeUpdates();
@@ -254,4 +306,4 @@ private:
};
} // namespace llvm
-#endif // LLVM_DOMTREEUPDATER_H
+#endif // LLVM_ANALYSIS_DOMTREEUPDATER_H
diff --git a/include/llvm/Analysis/DominanceFrontier.h b/include/llvm/Analysis/DominanceFrontier.h
index d94c420d7177..c0bf30e162dd 100644
--- a/include/llvm/Analysis/DominanceFrontier.h
+++ b/include/llvm/Analysis/DominanceFrontier.h
@@ -1,9 +1,8 @@
//===- llvm/Analysis/DominanceFrontier.h - Dominator Frontiers --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Analysis/DominanceFrontierImpl.h b/include/llvm/Analysis/DominanceFrontierImpl.h
index 99224c0bf131..aa764be93b91 100644
--- a/include/llvm/Analysis/DominanceFrontierImpl.h
+++ b/include/llvm/Analysis/DominanceFrontierImpl.h
@@ -1,9 +1,8 @@
//===- llvm/Analysis/DominanceFrontier.h - Dominator Frontiers --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Analysis/EHPersonalities.h b/include/llvm/Analysis/EHPersonalities.h
index fe0e65b828ca..d89aa11617b5 100644
--- a/include/llvm/Analysis/EHPersonalities.h
+++ b/include/llvm/Analysis/EHPersonalities.h
@@ -1,9 +1,8 @@
//===- EHPersonalities.h - Compute EH-related information -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/Analysis/GlobalsModRef.h b/include/llvm/Analysis/GlobalsModRef.h
index 3a664ca6ef50..d3fcfc2d41ab 100644
--- a/include/llvm/Analysis/GlobalsModRef.h
+++ b/include/llvm/Analysis/GlobalsModRef.h
@@ -1,9 +1,8 @@
//===- GlobalsModRef.h - Simple Mod/Ref AA for Globals ----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -85,10 +84,12 @@ public:
//------------------------------------------------
// Implement the AliasAnalysis API
//
- AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB);
+ AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB,
+ AAQueryInfo &AAQI);
using AAResultBase::getModRefInfo;
- ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc);
+ ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc,
+ AAQueryInfo &AAQI);
/// getModRefBehavior - Return the behavior of the specified function if
/// called from the specified call site. The call site may be null in which
@@ -114,7 +115,7 @@ private:
bool isNonEscapingGlobalNoAlias(const GlobalValue *GV, const Value *V);
ModRefInfo getModRefInfoForArgument(const CallBase *Call,
- const GlobalValue *GV);
+ const GlobalValue *GV, AAQueryInfo &AAQI);
};
/// Analysis pass providing a never-invalidated alias analysis result.
diff --git a/include/llvm/Analysis/GuardUtils.h b/include/llvm/Analysis/GuardUtils.h
index 3b151eeafc81..41e7b7c06c75 100644
--- a/include/llvm/Analysis/GuardUtils.h
+++ b/include/llvm/Analysis/GuardUtils.h
@@ -1,9 +1,8 @@
//===-- GuardUtils.h - Utils for work with guards ---------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// Utils that are used to perform analyzes related to guards and their
@@ -15,12 +14,31 @@
namespace llvm {
+class BasicBlock;
class User;
+class Value;
-/// Returns true iff \p U has semantics of a guard.
+/// Returns true iff \p U has semantics of a guard expressed in a form of call
+/// of llvm.experimental.guard intrinsic.
bool isGuard(const User *U);
+/// Returns true iff \p U has semantics of a guard expressed in a form of a
+/// widenable conditional branch to deopt block.
+bool isGuardAsWidenableBranch(const User *U);
+
+/// If U is widenable branch looking like:
+/// %cond = ...
+/// %wc = call i1 @llvm.experimental.widenable.condition()
+/// %branch_cond = and i1 %cond, %wc
+/// br i1 %branch_cond, label %if_true_bb, label %if_false_bb ; <--- U
+/// The function returns true, and the values %cond and %wc and blocks
+/// %if_true_bb, if_false_bb are returned in
+/// the parameters (Condition, WidenableCondition, IfTrueBB and IfFalseFF)
+/// respectively. If \p U does not match this pattern, return false.
+bool parseWidenableBranch(const User *U, Value *&Condition,
+ Value *&WidenableCondition, BasicBlock *&IfTrueBB,
+ BasicBlock *&IfFalseBB);
+
} // llvm
#endif // LLVM_ANALYSIS_GUARDUTILS_H
-
diff --git a/include/llvm/Analysis/IVDescriptors.h b/include/llvm/Analysis/IVDescriptors.h
index 64b4ae23cc59..7be1fd3f5788 100644
--- a/include/llvm/Analysis/IVDescriptors.h
+++ b/include/llvm/Analysis/IVDescriptors.h
@@ -1,9 +1,8 @@
//===- llvm/Analysis/IVDescriptors.h - IndVar Descriptors -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -90,10 +89,12 @@ public:
RecurrenceDescriptor() = default;
RecurrenceDescriptor(Value *Start, Instruction *Exit, RecurrenceKind K,
- MinMaxRecurrenceKind MK, Instruction *UAI, Type *RT,
- bool Signed, SmallPtrSetImpl<Instruction *> &CI)
- : StartValue(Start), LoopExitInstr(Exit), Kind(K), MinMaxKind(MK),
- UnsafeAlgebraInst(UAI), RecurrenceType(RT), IsSigned(Signed) {
+ FastMathFlags FMF, MinMaxRecurrenceKind MK,
+ Instruction *UAI, Type *RT, bool Signed,
+ SmallPtrSetImpl<Instruction *> &CI)
+ : StartValue(Start), LoopExitInstr(Exit), Kind(K), FMF(FMF),
+ MinMaxKind(MK), UnsafeAlgebraInst(UAI), RecurrenceType(RT),
+ IsSigned(Signed) {
CastInsts.insert(CI.begin(), CI.end());
}
@@ -199,6 +200,8 @@ public:
MinMaxRecurrenceKind getMinMaxRecurrenceKind() { return MinMaxKind; }
+ FastMathFlags getFastMathFlags() { return FMF; }
+
TrackingVH<Value> getRecurrenceStartValue() { return StartValue; }
Instruction *getLoopExitInstr() { return LoopExitInstr; }
@@ -238,6 +241,9 @@ private:
Instruction *LoopExitInstr = nullptr;
// The kind of the recurrence.
RecurrenceKind Kind = RK_NoRecurrence;
+ // The fast-math flags on the recurrent instructions. We propagate these
+ // fast-math flags into the vectorized FP instructions we generate.
+ FastMathFlags FMF;
// If this a min/max recurrence the kind of recurrence.
MinMaxRecurrenceKind MinMaxKind = MRK_Invalid;
// First occurrence of unasfe algebra in the PHI's use-chain.
@@ -309,12 +315,16 @@ public:
/// not have the "fast-math" property. Such operation requires a relaxed FP
/// mode.
bool hasUnsafeAlgebra() {
- return InductionBinOp && !cast<FPMathOperator>(InductionBinOp)->isFast();
+ return (IK == IK_FpInduction) && InductionBinOp &&
+ !cast<FPMathOperator>(InductionBinOp)->isFast();
}
/// Returns induction operator that does not have "fast-math" property
/// and requires FP unsafe mode.
Instruction *getUnsafeAlgebraInst() {
+ if (IK != IK_FpInduction)
+ return nullptr;
+
if (!InductionBinOp || cast<FPMathOperator>(InductionBinOp)->isFast())
return nullptr;
return InductionBinOp;
diff --git a/include/llvm/Analysis/IVUsers.h b/include/llvm/Analysis/IVUsers.h
index 035b974c5c1d..f8ea3bcca229 100644
--- a/include/llvm/Analysis/IVUsers.h
+++ b/include/llvm/Analysis/IVUsers.h
@@ -1,9 +1,8 @@
//===- llvm/Analysis/IVUsers.h - Induction Variable Users -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Analysis/IndirectCallPromotionAnalysis.h b/include/llvm/Analysis/IndirectCallPromotionAnalysis.h
index be3a28424cf5..8a05e913a910 100644
--- a/include/llvm/Analysis/IndirectCallPromotionAnalysis.h
+++ b/include/llvm/Analysis/IndirectCallPromotionAnalysis.h
@@ -1,9 +1,8 @@
//===- IndirectCallPromotionAnalysis.h - Indirect call analysis -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/include/llvm/Analysis/IndirectCallVisitor.h b/include/llvm/Analysis/IndirectCallVisitor.h
index d00cf63368f1..1d1f3f4cc5c0 100644
--- a/include/llvm/Analysis/IndirectCallVisitor.h
+++ b/include/llvm/Analysis/IndirectCallVisitor.h
@@ -1,9 +1,8 @@
//===-- IndirectCallVisitor.h - indirect call visitor ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Analysis/InlineCost.h b/include/llvm/Analysis/InlineCost.h
index 4c270354b0c4..611c9de24e47 100644
--- a/include/llvm/Analysis/InlineCost.h
+++ b/include/llvm/Analysis/InlineCost.h
@@ -1,9 +1,8 @@
//===- InlineCost.h - Cost analysis for inliner -----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -23,7 +22,7 @@
namespace llvm {
class AssumptionCacheTracker;
class BlockFrequencyInfo;
-class CallSite;
+class CallBase;
class DataLayout;
class Function;
class ProfileSummaryInfo;
@@ -68,10 +67,10 @@ class InlineCost {
};
/// The estimated cost of inlining this callsite.
- const int Cost;
+ int Cost;
/// The adjusted threshold against which this cost was computed.
- const int Threshold;
+ int Threshold;
/// Must be set for Always and Never instances.
const char *Reason = nullptr;
@@ -200,7 +199,7 @@ InlineParams getInlineParams(unsigned OptLevel, unsigned SizeOptLevel);
/// Return the cost associated with a callsite, including parameter passing
/// and the call/return instruction.
-int getCallsiteCost(CallSite CS, const DataLayout &DL);
+int getCallsiteCost(CallBase &Call, const DataLayout &DL);
/// Get an InlineCost object representing the cost of inlining this
/// callsite.
@@ -214,7 +213,7 @@ int getCallsiteCost(CallSite CS, const DataLayout &DL);
/// Also note that calling this function *dynamically* computes the cost of
/// inlining the callsite. It is an expensive, heavyweight call.
InlineCost getInlineCost(
- CallSite CS, const InlineParams &Params, TargetTransformInfo &CalleeTTI,
+ CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI,
std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI,
ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE = nullptr);
@@ -225,14 +224,14 @@ InlineCost getInlineCost(
/// parameter in all other respects.
//
InlineCost
-getInlineCost(CallSite CS, Function *Callee, const InlineParams &Params,
+getInlineCost(CallBase &Call, Function *Callee, const InlineParams &Params,
TargetTransformInfo &CalleeTTI,
std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI,
ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE);
/// Minimal filter to detect invalid constructs for inlining.
-bool isInlineViable(Function &Callee);
+InlineResult isInlineViable(Function &Callee);
}
#endif
diff --git a/include/llvm/Analysis/InstructionPrecedenceTracking.h b/include/llvm/Analysis/InstructionPrecedenceTracking.h
index 073e6ec3b7f6..3c3981066a49 100644
--- a/include/llvm/Analysis/InstructionPrecedenceTracking.h
+++ b/include/llvm/Analysis/InstructionPrecedenceTracking.h
@@ -1,9 +1,8 @@
//===-- InstructionPrecedenceTracking.h -------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// Implements a class that is able to define some instructions as "special"
@@ -93,7 +92,7 @@ public:
/// example, throwing calls and guards do not always do this. If we need to know
/// for sure that some instruction is guaranteed to execute if the given block
/// is reached, then we need to make sure that there is no implicit control flow
-/// instruction (ICFI) preceeding it. For example, this check is required if we
+/// instruction (ICFI) preceding it. For example, this check is required if we
/// perform PRE moving non-speculable instruction to other place.
class ImplicitControlFlowTracking : public InstructionPrecedenceTracking {
public:
diff --git a/include/llvm/Analysis/InstructionSimplify.h b/include/llvm/Analysis/InstructionSimplify.h
index 6662e91037e1..054ffca7215e 100644
--- a/include/llvm/Analysis/InstructionSimplify.h
+++ b/include/llvm/Analysis/InstructionSimplify.h
@@ -1,9 +1,8 @@
//===-- InstructionSimplify.h - Fold instrs into simpler forms --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -41,8 +40,8 @@ class Function;
template <typename T, typename... TArgs> class AnalysisManager;
template <class T> class ArrayRef;
class AssumptionCache;
+class CallBase;
class DominatorTree;
-class ImmutableCallSite;
class DataLayout;
class FastMathFlags;
struct LoopStandardAnalysisResults;
@@ -118,6 +117,10 @@ struct SimplifyQuery {
// deprecated.
// Please use the SimplifyQuery versions in new code.
+/// Given operand for an FNeg, fold the result or return null.
+Value *SimplifyFNegInst(Value *Op, FastMathFlags FMF,
+ const SimplifyQuery &Q);
+
/// Given operands for an Add, fold the result or return null.
Value *SimplifyAddInst(Value *LHS, Value *RHS, bool isNSW, bool isNUW,
const SimplifyQuery &Q);
@@ -228,6 +231,15 @@ Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask,
Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
const SimplifyQuery &Q);
+/// Given operand for a UnaryOperator, fold the result or return null.
+Value *SimplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q);
+
+/// Given operand for an FP UnaryOperator, fold the result or return null.
+/// In contrast to SimplifyUnOp, try to use FastMathFlag when folding the
+/// result. In case we don't need FastMathFlags, simply fall to SimplifyUnOp.
+Value *SimplifyFPUnOp(unsigned Opcode, Value *Op, FastMathFlags FMF,
+ const SimplifyQuery &Q);
+
/// Given operands for a BinaryOperator, fold the result or return null.
Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
const SimplifyQuery &Q);
@@ -239,16 +251,7 @@ Value *SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS,
FastMathFlags FMF, const SimplifyQuery &Q);
/// Given a callsite, fold the result or return null.
-Value *SimplifyCall(ImmutableCallSite CS, const SimplifyQuery &Q);
-
-/// Given a function and iterators over arguments, fold the result or return
-/// null.
-Value *SimplifyCall(ImmutableCallSite CS, Value *V, User::op_iterator ArgBegin,
- User::op_iterator ArgEnd, const SimplifyQuery &Q);
-
-/// Given a function and set of arguments, fold the result or return null.
-Value *SimplifyCall(ImmutableCallSite CS, Value *V, ArrayRef<Value *> Args,
- const SimplifyQuery &Q);
+Value *SimplifyCall(CallBase *Call, const SimplifyQuery &Q);
/// See if we can compute a simplified version of this instruction. If not,
/// return null.
diff --git a/include/llvm/Analysis/Interval.h b/include/llvm/Analysis/Interval.h
index f3714dddedd5..5c9a4535bc7f 100644
--- a/include/llvm/Analysis/Interval.h
+++ b/include/llvm/Analysis/Interval.h
@@ -1,9 +1,8 @@
//===- llvm/Analysis/Interval.h - Interval Class Declaration ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Analysis/IntervalIterator.h b/include/llvm/Analysis/IntervalIterator.h
index 6ffcae592e98..efaaf9715b3d 100644
--- a/include/llvm/Analysis/IntervalIterator.h
+++ b/include/llvm/Analysis/IntervalIterator.h
@@ -1,9 +1,8 @@
//===- IntervalIterator.h - Interval Iterator Declaration -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Analysis/IntervalPartition.h b/include/llvm/Analysis/IntervalPartition.h
index 50335165711f..5b127c25a2b8 100644
--- a/include/llvm/Analysis/IntervalPartition.h
+++ b/include/llvm/Analysis/IntervalPartition.h
@@ -1,9 +1,8 @@
//===- IntervalPartition.h - Interval partition Calculation -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Analysis/IteratedDominanceFrontier.h b/include/llvm/Analysis/IteratedDominanceFrontier.h
index 3083db75b81c..7c826780c318 100644
--- a/include/llvm/Analysis/IteratedDominanceFrontier.h
+++ b/include/llvm/Analysis/IteratedDominanceFrontier.h
@@ -1,101 +1,89 @@
//===- IteratedDominanceFrontier.h - Calculate IDF --------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-/// Compute iterated dominance frontiers using a linear time algorithm.
-///
-/// The algorithm used here is based on:
-///
-/// Sreedhar and Gao. A linear time algorithm for placing phi-nodes.
-/// In Proceedings of the 22nd ACM SIGPLAN-SIGACT Symposium on Principles of
-/// Programming Languages
-/// POPL '95. ACM, New York, NY, 62-73.
-///
-/// It has been modified to not explicitly use the DJ graph data structure and
-/// to directly compute pruned SSA using per-variable liveness information.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_ANALYSIS_IDF_H
#define LLVM_ANALYSIS_IDF_H
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFGDiff.h"
-#include "llvm/IR/Dominators.h"
+#include "llvm/Support/GenericIteratedDominanceFrontier.h"
namespace llvm {
-/// Determine the iterated dominance frontier, given a set of defining
-/// blocks, and optionally, a set of live-in blocks.
-///
-/// In turn, the results can be used to place phi nodes.
-///
-/// This algorithm is a linear time computation of Iterated Dominance Frontiers,
-/// pruned using the live-in set.
-/// By default, liveness is not used to prune the IDF computation.
-/// The template parameters should be either BasicBlock* or Inverse<BasicBlock
-/// *>, depending on if you want the forward or reverse IDF.
-template <class NodeTy, bool IsPostDom>
-class IDFCalculator {
- public:
- IDFCalculator(DominatorTreeBase<BasicBlock, IsPostDom> &DT)
- : DT(DT), GD(nullptr), useLiveIn(false) {}
-
- IDFCalculator(DominatorTreeBase<BasicBlock, IsPostDom> &DT,
- const GraphDiff<BasicBlock *, IsPostDom> *GD)
- : DT(DT), GD(GD), useLiveIn(false) {}
-
- /// Give the IDF calculator the set of blocks in which the value is
- /// defined. This is equivalent to the set of starting blocks it should be
- /// calculating the IDF for (though later gets pruned based on liveness).
- ///
- /// Note: This set *must* live for the entire lifetime of the IDF calculator.
- void setDefiningBlocks(const SmallPtrSetImpl<BasicBlock *> &Blocks) {
- DefBlocks = &Blocks;
- }
-
- /// Give the IDF calculator the set of blocks in which the value is
- /// live on entry to the block. This is used to prune the IDF calculation to
- /// not include blocks where any phi insertion would be dead.
- ///
- /// Note: This set *must* live for the entire lifetime of the IDF calculator.
-
- void setLiveInBlocks(const SmallPtrSetImpl<BasicBlock *> &Blocks) {
- LiveInBlocks = &Blocks;
- useLiveIn = true;
- }
+class BasicBlock;
- /// Reset the live-in block set to be empty, and tell the IDF
- /// calculator to not use liveness anymore.
- void resetLiveInBlocks() {
- LiveInBlocks = nullptr;
- useLiveIn = false;
+namespace IDFCalculatorDetail {
+
+/// Specialization for BasicBlock for the optional use of GraphDiff.
+template <bool IsPostDom> struct ChildrenGetterTy<BasicBlock, IsPostDom> {
+ using NodeRef = BasicBlock *;
+ using ChildrenTy = SmallVector<BasicBlock *, 8>;
+
+ ChildrenGetterTy() = default;
+ ChildrenGetterTy(const GraphDiff<BasicBlock *, IsPostDom> *GD) : GD(GD) {
+ assert(GD);
}
- /// Calculate iterated dominance frontiers
- ///
- /// This uses the linear-time phi algorithm based on DJ-graphs mentioned in
- /// the file-level comment. It performs DF->IDF pruning using the live-in
- /// set, to avoid computing the IDF for blocks where an inserted PHI node
- /// would be dead.
- void calculate(SmallVectorImpl<BasicBlock *> &IDFBlocks);
-
-private:
- DominatorTreeBase<BasicBlock, IsPostDom> &DT;
- const GraphDiff<BasicBlock *, IsPostDom> *GD;
- bool useLiveIn;
- const SmallPtrSetImpl<BasicBlock *> *LiveInBlocks;
- const SmallPtrSetImpl<BasicBlock *> *DefBlocks;
+ ChildrenTy get(const NodeRef &N);
+
+ const GraphDiff<BasicBlock *, IsPostDom> *GD = nullptr;
};
-typedef IDFCalculator<BasicBlock *, false> ForwardIDFCalculator;
-typedef IDFCalculator<Inverse<BasicBlock *>, true> ReverseIDFCalculator;
+
+} // end of namespace IDFCalculatorDetail
+
+template <bool IsPostDom>
+class IDFCalculator final : public IDFCalculatorBase<BasicBlock, IsPostDom> {
+public:
+ using IDFCalculatorBase =
+ typename llvm::IDFCalculatorBase<BasicBlock, IsPostDom>;
+ using ChildrenGetterTy = typename IDFCalculatorBase::ChildrenGetterTy;
+
+ IDFCalculator(DominatorTreeBase<BasicBlock, IsPostDom> &DT)
+ : IDFCalculatorBase(DT) {}
+
+ IDFCalculator(DominatorTreeBase<BasicBlock, IsPostDom> &DT,
+ const GraphDiff<BasicBlock *, IsPostDom> *GD)
+ : IDFCalculatorBase(DT, ChildrenGetterTy(GD)) {
+ assert(GD);
+ }
+};
+
+using ForwardIDFCalculator = IDFCalculator<false>;
+using ReverseIDFCalculator = IDFCalculator<true>;
+
+//===----------------------------------------------------------------------===//
+// Implementation.
+//===----------------------------------------------------------------------===//
+
+namespace IDFCalculatorDetail {
+
+template <bool IsPostDom>
+typename ChildrenGetterTy<BasicBlock, IsPostDom>::ChildrenTy
+ChildrenGetterTy<BasicBlock, IsPostDom>::get(const NodeRef &N) {
+
+ using OrderedNodeTy =
+ typename IDFCalculatorBase<BasicBlock, IsPostDom>::OrderedNodeTy;
+
+ if (!GD) {
+ auto Children = children<OrderedNodeTy>(N);
+ return {Children.begin(), Children.end()};
+ }
+
+ using SnapShotBBPairTy =
+ std::pair<const GraphDiff<BasicBlock *, IsPostDom> *, OrderedNodeTy>;
+
+ ChildrenTy Ret;
+ for (const auto &SnapShotBBPair : children<SnapShotBBPairTy>({GD, N}))
+ Ret.emplace_back(SnapShotBBPair.second);
+ return Ret;
}
+
+} // end of namespace IDFCalculatorDetail
+
+} // end of namespace llvm
+
#endif
diff --git a/include/llvm/Analysis/LazyBlockFrequencyInfo.h b/include/llvm/Analysis/LazyBlockFrequencyInfo.h
index d1afb63d7e08..0e7dc943bacf 100644
--- a/include/llvm/Analysis/LazyBlockFrequencyInfo.h
+++ b/include/llvm/Analysis/LazyBlockFrequencyInfo.h
@@ -1,9 +1,8 @@
//===- LazyBlockFrequencyInfo.h - Lazy Block Frequency Analysis -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Analysis/LazyBranchProbabilityInfo.h b/include/llvm/Analysis/LazyBranchProbabilityInfo.h
index 9e6bcfedcbb9..cae0778cd16d 100644
--- a/include/llvm/Analysis/LazyBranchProbabilityInfo.h
+++ b/include/llvm/Analysis/LazyBranchProbabilityInfo.h
@@ -1,9 +1,8 @@
//===- LazyBranchProbabilityInfo.h - Lazy Branch Probability ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Analysis/LazyCallGraph.h b/include/llvm/Analysis/LazyCallGraph.h
index d1ec6a9dcc55..2d83929211e2 100644
--- a/include/llvm/Analysis/LazyCallGraph.h
+++ b/include/llvm/Analysis/LazyCallGraph.h
@@ -1,9 +1,8 @@
//===- LazyCallGraph.h - Analysis of a Module's call graph ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -39,6 +38,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -1083,12 +1083,26 @@ public:
continue;
}
+ // The blockaddress constant expression is a weird special case, we can't
+ // generically walk its operands the way we do for all other constants.
if (BlockAddress *BA = dyn_cast<BlockAddress>(C)) {
- // The blockaddress constant expression is a weird special case, we
- // can't generically walk its operands the way we do for all other
- // constants.
- if (Visited.insert(BA->getFunction()).second)
- Worklist.push_back(BA->getFunction());
+ // If we've already visited the function referred to by the block
+ // address, we don't need to revisit it.
+ if (Visited.count(BA->getFunction()))
+ continue;
+
+ // If all of the blockaddress' users are instructions within the
+ // referred to function, we don't need to insert a cycle.
+ if (llvm::all_of(BA->users(), [&](User *U) {
+ if (Instruction *I = dyn_cast<Instruction>(U))
+ return I->getFunction() == BA->getFunction();
+ return false;
+ }))
+ continue;
+
+ // Otherwise we should go visit the referred to function.
+ Visited.insert(BA->getFunction());
+ Worklist.push_back(BA->getFunction());
continue;
}
diff --git a/include/llvm/Analysis/LazyValueInfo.h b/include/llvm/Analysis/LazyValueInfo.h
index 1a4fdb591427..570a5044f6f8 100644
--- a/include/llvm/Analysis/LazyValueInfo.h
+++ b/include/llvm/Analysis/LazyValueInfo.h
@@ -1,9 +1,8 @@
//===- LazyValueInfo.h - Value constraint analysis --------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Analysis/LegacyDivergenceAnalysis.h b/include/llvm/Analysis/LegacyDivergenceAnalysis.h
index fc426ad7fb64..0a338b816640 100644
--- a/include/llvm/Analysis/LegacyDivergenceAnalysis.h
+++ b/include/llvm/Analysis/LegacyDivergenceAnalysis.h
@@ -1,9 +1,8 @@
//===- llvm/Analysis/LegacyDivergenceAnalysis.h - KernelDivergence Analysis -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Analysis/Lint.h b/include/llvm/Analysis/Lint.h
index db5919fd91c7..0fea81e215c9 100644
--- a/include/llvm/Analysis/Lint.h
+++ b/include/llvm/Analysis/Lint.h
@@ -1,9 +1,8 @@
//===-- llvm/Analysis/Lint.h - LLVM IR Lint ---------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Analysis/Loads.h b/include/llvm/Analysis/Loads.h
index f110c28bfc6d..5df6bb02308d 100644
--- a/include/llvm/Analysis/Loads.h
+++ b/include/llvm/Analysis/Loads.h
@@ -1,9 +1,8 @@
//===- Loads.h - Local load analysis --------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -26,7 +25,8 @@ class MDNode;
/// Return true if this is always a dereferenceable pointer. If the context
/// instruction is specified perform context-sensitive analysis and return true
/// if the pointer is dereferenceable at the specified instruction.
-bool isDereferenceablePointer(const Value *V, const DataLayout &DL,
+bool isDereferenceablePointer(const Value *V, Type *Ty,
+ const DataLayout &DL,
const Instruction *CtxI = nullptr,
const DominatorTree *DT = nullptr);
@@ -34,8 +34,8 @@ bool isDereferenceablePointer(const Value *V, const DataLayout &DL,
/// greater or equal than requested. If the context instruction is specified
/// performs context-sensitive analysis and returns true if the pointer is
/// dereferenceable at the specified instruction.
-bool isDereferenceableAndAlignedPointer(const Value *V, unsigned Align,
- const DataLayout &DL,
+bool isDereferenceableAndAlignedPointer(const Value *V, Type *Ty,
+ unsigned Align, const DataLayout &DL,
const Instruction *CtxI = nullptr,
const DominatorTree *DT = nullptr);
@@ -56,7 +56,20 @@ bool isDereferenceableAndAlignedPointer(const Value *V, unsigned Align,
/// If it is not obviously safe to load from the specified pointer, we do a
/// quick local scan of the basic block containing ScanFrom, to determine if
/// the address is already accessed.
-bool isSafeToLoadUnconditionally(Value *V, unsigned Align,
+bool isSafeToLoadUnconditionally(Value *V, unsigned Align, APInt &Size,
+ const DataLayout &DL,
+ Instruction *ScanFrom = nullptr,
+ const DominatorTree *DT = nullptr);
+
+/// Return true if we know that executing a load from this value cannot trap.
+///
+/// If DT and ScanFrom are specified this method performs context-sensitive
+/// analysis and returns true if it is safe to load immediately before ScanFrom.
+///
+/// If it is not obviously safe to load from the specified pointer, we do a
+/// quick local scan of the basic block containing ScanFrom, to determine if
+/// the address is already accessed.
+bool isSafeToLoadUnconditionally(Value *V, Type *Ty, unsigned Align,
const DataLayout &DL,
Instruction *ScanFrom = nullptr,
const DominatorTree *DT = nullptr);
diff --git a/include/llvm/Analysis/LoopAccessAnalysis.h b/include/llvm/Analysis/LoopAccessAnalysis.h
index 4ed00e207753..9e9aaa32c64f 100644
--- a/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -1,9 +1,8 @@
//===- llvm/Analysis/LoopAccessAnalysis.h -----------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -523,6 +522,11 @@ public:
/// no memory dependence cycles.
bool canVectorizeMemory() const { return CanVecMem; }
+ /// Return true if there is a convergent operation in the loop. There may
+ /// still be reported runtime pointer checks that would be required, but it is
+ /// not legal to insert them.
+ bool hasConvergentOp() const { return HasConvergentOp; }
+
const RuntimePointerChecking *getRuntimePointerChecking() const {
return PtrRtChecking.get();
}
@@ -643,6 +647,7 @@ private:
/// Cache the result of analyzeLoop.
bool CanVecMem;
+ bool HasConvergentOp;
/// Indicator that there are non vectorizable stores to a uniform address.
bool HasDependenceInvolvingLoopInvariantAddress;
diff --git a/include/llvm/Analysis/LoopAnalysisManager.h b/include/llvm/Analysis/LoopAnalysisManager.h
index 00e562c4f31f..368a810cfa67 100644
--- a/include/llvm/Analysis/LoopAnalysisManager.h
+++ b/include/llvm/Analysis/LoopAnalysisManager.h
@@ -1,9 +1,8 @@
//===- LoopAnalysisManager.h - Loop analysis management ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -62,9 +61,6 @@ struct LoopStandardAnalysisResults {
MemorySSA *MSSA;
};
-/// Enables memory ssa as a dependency for loop passes.
-extern cl::opt<bool> EnableMSSALoopDependency;
-
/// Extern template declaration for the analysis set for this IR unit.
extern template class AllAnalysesOn<Loop>;
diff --git a/include/llvm/Analysis/LoopInfo.h b/include/llvm/Analysis/LoopInfo.h
index 72873546a068..584eb3a8c854 100644
--- a/include/llvm/Analysis/LoopInfo.h
+++ b/include/llvm/Analysis/LoopInfo.h
@@ -1,9 +1,8 @@
//===- llvm/Analysis/LoopInfo.h - Natural Loop Calculator -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -55,8 +54,11 @@ namespace llvm {
class DominatorTree;
class LoopInfo;
class Loop;
+class InductionDescriptor;
class MDNode;
+class MemorySSAUpdater;
class PHINode;
+class ScalarEvolution;
class raw_ostream;
template <class N, bool IsPostDom> class DominatorTreeBase;
template <class N, class M> class LoopInfoBase;
@@ -199,9 +201,10 @@ public:
}
/// True if terminator in the block can branch to another block that is
- /// outside of the current loop.
+ /// outside of the current loop. \p BB must be inside the loop.
bool isLoopExiting(const BlockT *BB) const {
assert(!isInvalid() && "Loop not in a valid state!");
+ assert(contains(BB) && "Exiting block must be part of the loop");
for (const auto &Succ : children<const BlockT *>(BB)) {
if (!contains(Succ))
return true;
@@ -267,16 +270,20 @@ public:
/// Return all unique successor blocks of this loop.
/// These are the blocks _outside of the current loop_ which are branched to.
- /// This assumes that loop exits are in canonical form, i.e. all exits are
- /// dedicated exits.
void getUniqueExitBlocks(SmallVectorImpl<BlockT *> &ExitBlocks) const;
+ /// Return all unique successor blocks of this loop except successors from
+ /// Latch block are not considered. If the exit comes from Latch has also
+ /// non Latch predecessor in a loop it will be added to ExitBlocks.
+ /// These are the blocks _outside of the current loop_ which are branched to.
+ void getUniqueNonLatchExitBlocks(SmallVectorImpl<BlockT *> &ExitBlocks) const;
+
/// If getUniqueExitBlocks would return exactly one block, return that block.
/// Otherwise return null.
BlockT *getUniqueExitBlock() const;
/// Edge type.
- typedef std::pair<const BlockT *, const BlockT *> Edge;
+ typedef std::pair<BlockT *, BlockT *> Edge;
/// Return all pairs of (_inside_block_,_outside_block_).
void getExitEdges(SmallVectorImpl<Edge> &ExitEdges) const;
@@ -309,6 +316,40 @@ public:
LoopLatches.push_back(Pred);
}
+ /// Return all inner loops in the loop nest rooted by the loop in preorder,
+ /// with siblings in forward program order.
+ template <class Type>
+ static void getInnerLoopsInPreorder(const LoopT &L,
+ SmallVectorImpl<Type> &PreOrderLoops) {
+ SmallVector<LoopT *, 4> PreOrderWorklist;
+ PreOrderWorklist.append(L.rbegin(), L.rend());
+
+ while (!PreOrderWorklist.empty()) {
+ LoopT *L = PreOrderWorklist.pop_back_val();
+ // Sub-loops are stored in forward program order, but will process the
+ // worklist backwards so append them in reverse order.
+ PreOrderWorklist.append(L->rbegin(), L->rend());
+ PreOrderLoops.push_back(L);
+ }
+ }
+
+ /// Return all loops in the loop nest rooted by the loop in preorder, with
+ /// siblings in forward program order.
+ SmallVector<const LoopT *, 4> getLoopsInPreorder() const {
+ SmallVector<const LoopT *, 4> PreOrderLoops;
+ const LoopT *CurLoop = static_cast<const LoopT *>(this);
+ PreOrderLoops.push_back(CurLoop);
+ getInnerLoopsInPreorder(*CurLoop, PreOrderLoops);
+ return PreOrderLoops;
+ }
+ SmallVector<LoopT *, 4> getLoopsInPreorder() {
+ SmallVector<LoopT *, 4> PreOrderLoops;
+ LoopT *CurLoop = static_cast<LoopT *>(this);
+ PreOrderLoops.push_back(CurLoop);
+ getInnerLoopsInPreorder(*CurLoop, PreOrderLoops);
+ return PreOrderLoops;
+ }
+
//===--------------------------------------------------------------------===//
// APIs for updating loop information after changing the CFG
//
@@ -471,7 +512,7 @@ public:
public:
LocRange() {}
- LocRange(DebugLoc Start) : Start(std::move(Start)), End(std::move(Start)) {}
+ LocRange(DebugLoc Start) : Start(Start), End(Start) {}
LocRange(DebugLoc Start, DebugLoc End)
: Start(std::move(Start)), End(std::move(End)) {}
@@ -499,7 +540,8 @@ public:
/// If InsertPt is specified, it is the point to hoist instructions to.
/// If null, the terminator of the loop preheader is used.
bool makeLoopInvariant(Value *V, bool &Changed,
- Instruction *InsertPt = nullptr) const;
+ Instruction *InsertPt = nullptr,
+ MemorySSAUpdater *MSSAU = nullptr) const;
/// If the given instruction is inside of the loop and it can be hoisted, do
/// so to make it trivially loop-invariant.
@@ -511,7 +553,8 @@ public:
/// If null, the terminator of the loop preheader is used.
///
bool makeLoopInvariant(Instruction *I, bool &Changed,
- Instruction *InsertPt = nullptr) const;
+ Instruction *InsertPt = nullptr,
+ MemorySSAUpdater *MSSAU = nullptr) const;
/// Check to see if the loop has a canonical induction variable: an integer
/// recurrence that starts at 0 and increments by one each time through the
@@ -522,6 +565,170 @@ public:
///
PHINode *getCanonicalInductionVariable() const;
+ /// Obtain the unique incoming and back edge. Return false if they are
+ /// non-unique or the loop is dead; otherwise, return true.
+ bool getIncomingAndBackEdge(BasicBlock *&Incoming,
+ BasicBlock *&Backedge) const;
+
+ /// Below are some utilities to get loop bounds and induction variable, and
+ /// check if a given phinode is an auxiliary induction variable, as well as
+ /// checking if the loop is canonical.
+ ///
+ /// Here is an example:
+ /// \code
+ /// for (int i = lb; i < ub; i+=step)
+ /// <loop body>
+ /// --- pseudo LLVMIR ---
+ /// beforeloop:
+ /// guardcmp = (lb < ub)
+ /// if (guardcmp) goto preheader; else goto afterloop
+ /// preheader:
+ /// loop:
+ /// i_1 = phi[{lb, preheader}, {i_2, latch}]
+ /// <loop body>
+ /// i_2 = i_1 + step
+ /// latch:
+ /// cmp = (i_2 < ub)
+ /// if (cmp) goto loop
+ /// exit:
+ /// afterloop:
+ /// \endcode
+ ///
+ /// - getBounds
+ /// - getInitialIVValue --> lb
+ /// - getStepInst --> i_2 = i_1 + step
+ /// - getStepValue --> step
+ /// - getFinalIVValue --> ub
+ /// - getCanonicalPredicate --> '<'
+ /// - getDirection --> Increasing
+ ///
+ /// - getInductionVariable --> i_1
+ /// - isAuxiliaryInductionVariable(x) --> true if x == i_1
+ /// - isCanonical --> false
+ struct LoopBounds {
+ /// Return the LoopBounds object if
+ /// - the given \p IndVar is an induction variable
+ /// - the initial value of the induction variable can be found
+ /// - the step instruction of the induction variable can be found
+ /// - the final value of the induction variable can be found
+ ///
+ /// Else None.
+ static Optional<Loop::LoopBounds> getBounds(const Loop &L, PHINode &IndVar,
+ ScalarEvolution &SE);
+
+ /// Get the initial value of the loop induction variable.
+ Value &getInitialIVValue() const { return InitialIVValue; }
+
+ /// Get the instruction that updates the loop induction variable.
+ Instruction &getStepInst() const { return StepInst; }
+
+ /// Get the step that the loop induction variable gets updated by in each
+ /// loop iteration. Return nullptr if not found.
+ Value *getStepValue() const { return StepValue; }
+
+ /// Get the final value of the loop induction variable.
+ Value &getFinalIVValue() const { return FinalIVValue; }
+
+ /// Return the canonical predicate for the latch compare instruction, if
+ /// able to be calcuated. Else BAD_ICMP_PREDICATE.
+ ///
+ /// A predicate is considered as canonical if requirements below are all
+ /// satisfied:
+ /// 1. The first successor of the latch branch is the loop header
+ /// If not, inverse the predicate.
+ /// 2. One of the operands of the latch comparison is StepInst
+ /// If not, and
+ /// - if the current calcuated predicate is not ne or eq, flip the
+ /// predicate.
+ /// - else if the loop is increasing, return slt
+ /// (notice that it is safe to change from ne or eq to sign compare)
+ /// - else if the loop is decreasing, return sgt
+ /// (notice that it is safe to change from ne or eq to sign compare)
+ ///
+ /// Here is an example when both (1) and (2) are not satisfied:
+ /// \code
+ /// loop.header:
+ /// %iv = phi [%initialiv, %loop.preheader], [%inc, %loop.header]
+ /// %inc = add %iv, %step
+ /// %cmp = slt %iv, %finaliv
+ /// br %cmp, %loop.exit, %loop.header
+ /// loop.exit:
+ /// \endcode
+ /// - The second successor of the latch branch is the loop header instead
+ /// of the first successor (slt -> sge)
+ /// - The first operand of the latch comparison (%cmp) is the IndVar (%iv)
+ /// instead of the StepInst (%inc) (sge -> sgt)
+ ///
+ /// The predicate would be sgt if both (1) and (2) are satisfied.
+ /// getCanonicalPredicate() returns sgt for this example.
+ /// Note: The IR is not changed.
+ ICmpInst::Predicate getCanonicalPredicate() const;
+
+ /// An enum for the direction of the loop
+ /// - for (int i = 0; i < ub; ++i) --> Increasing
+ /// - for (int i = ub; i > 0; --i) --> Descresing
+ /// - for (int i = x; i != y; i+=z) --> Unknown
+ enum class Direction { Increasing, Decreasing, Unknown };
+
+ /// Get the direction of the loop.
+ Direction getDirection() const;
+
+ private:
+ LoopBounds(const Loop &Loop, Value &I, Instruction &SI, Value *SV, Value &F,
+ ScalarEvolution &SE)
+ : L(Loop), InitialIVValue(I), StepInst(SI), StepValue(SV),
+ FinalIVValue(F), SE(SE) {}
+
+ const Loop &L;
+
+ // The initial value of the loop induction variable
+ Value &InitialIVValue;
+
+ // The instruction that updates the loop induction variable
+ Instruction &StepInst;
+
+ // The value that the loop induction variable gets updated by in each loop
+ // iteration
+ Value *StepValue;
+
+ // The final value of the loop induction variable
+ Value &FinalIVValue;
+
+ ScalarEvolution &SE;
+ };
+
+ /// Return the struct LoopBounds collected if all struct members are found,
+ /// else None.
+ Optional<LoopBounds> getBounds(ScalarEvolution &SE) const;
+
+ /// Return the loop induction variable if found, else return nullptr.
+ /// An instruction is considered as the loop induction variable if
+ /// - it is an induction variable of the loop; and
+ /// - it is used to determine the condition of the branch in the loop latch
+ ///
+ /// Note: the induction variable doesn't need to be canonical, i.e. starts at
+ /// zero and increments by one each time through the loop (but it can be).
+ PHINode *getInductionVariable(ScalarEvolution &SE) const;
+
+ /// Get the loop induction descriptor for the loop induction variable. Return
+ /// true if the loop induction variable is found.
+ bool getInductionDescriptor(ScalarEvolution &SE,
+ InductionDescriptor &IndDesc) const;
+
+ /// Return true if the given PHINode \p AuxIndVar is
+ /// - in the loop header
+ /// - not used outside of the loop
+ /// - incremented by a loop invariant step for each loop iteration
+ /// - step instruction opcode should be add or sub
+ /// Note: auxiliary induction variable is not required to be used in the
+ /// conditional branch in the loop latch. (but it can be)
+ bool isAuxiliaryInductionVariable(PHINode &AuxIndVar,
+ ScalarEvolution &SE) const;
+
+ /// Return true if the loop induction variable starts at zero and increments
+ /// by one each time through the loop.
+ bool isCanonical(ScalarEvolution &SE) const;
+
/// Return true if the Loop is in LCSSA form.
bool isLCSSAForm(DominatorTree &DT) const;
@@ -1015,6 +1222,26 @@ MDNode *findOptionMDForLoop(const Loop *TheLoop, StringRef Name);
/// is representing an access group.
bool isValidAsAccessGroup(MDNode *AccGroup);
+/// Create a new LoopID after the loop has been transformed.
+///
+/// This can be used when no follow-up loop attributes are defined
+/// (llvm::makeFollowupLoopID returning None) to stop transformations to be
+/// applied again.
+///
+/// @param Context The LLVMContext in which to create the new LoopID.
+/// @param OrigLoopID The original LoopID; can be nullptr if the original
+/// loop has no LoopID.
+/// @param RemovePrefixes Remove all loop attributes that have these prefixes.
+/// Use to remove metadata of the transformation that has
+/// been applied.
+/// @param AddAttrs Add these loop attributes to the new LoopID.
+///
+/// @return A new LoopID that can be applied using Loop::setLoopID().
+llvm::MDNode *
+makePostTransformationMetadata(llvm::LLVMContext &Context, MDNode *OrigLoopID,
+ llvm::ArrayRef<llvm::StringRef> RemovePrefixes,
+ llvm::ArrayRef<llvm::MDNode *> AddAttrs);
+
} // End llvm namespace
#endif
diff --git a/include/llvm/Analysis/LoopInfoImpl.h b/include/llvm/Analysis/LoopInfoImpl.h
index 2b807919fedf..4c33dac9e21e 100644
--- a/include/llvm/Analysis/LoopInfoImpl.h
+++ b/include/llvm/Analysis/LoopInfoImpl.h
@@ -1,9 +1,8 @@
//===- llvm/Analysis/LoopInfoImpl.h - Natural Loop Calculator ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -96,49 +95,36 @@ bool LoopBase<BlockT, LoopT>::hasDedicatedExits() const {
return true;
}
+// Helper function to get unique loop exits. Pred is a predicate pointing to
+// BasicBlocks in a loop which should be considered to find loop exits.
+template <class BlockT, class LoopT, typename PredicateT>
+void getUniqueExitBlocksHelper(const LoopT *L,
+ SmallVectorImpl<BlockT *> &ExitBlocks,
+ PredicateT Pred) {
+ assert(!L->isInvalid() && "Loop not in a valid state!");
+ SmallPtrSet<BlockT *, 32> Visited;
+ auto Filtered = make_filter_range(L->blocks(), Pred);
+ for (BlockT *BB : Filtered)
+ for (BlockT *Successor : children<BlockT *>(BB))
+ if (!L->contains(Successor))
+ if (Visited.insert(Successor).second)
+ ExitBlocks.push_back(Successor);
+}
+
template <class BlockT, class LoopT>
void LoopBase<BlockT, LoopT>::getUniqueExitBlocks(
SmallVectorImpl<BlockT *> &ExitBlocks) const {
- typedef GraphTraits<BlockT *> BlockTraits;
- typedef GraphTraits<Inverse<BlockT *>> InvBlockTraits;
-
- assert(hasDedicatedExits() &&
- "getUniqueExitBlocks assumes the loop has canonical form exits!");
-
- SmallVector<BlockT *, 32> SwitchExitBlocks;
- for (BlockT *Block : this->blocks()) {
- SwitchExitBlocks.clear();
- for (BlockT *Successor : children<BlockT *>(Block)) {
- // If block is inside the loop then it is not an exit block.
- if (contains(Successor))
- continue;
-
- BlockT *FirstPred = *InvBlockTraits::child_begin(Successor);
-
- // If current basic block is this exit block's first predecessor then only
- // insert exit block in to the output ExitBlocks vector. This ensures that
- // same exit block is not inserted twice into ExitBlocks vector.
- if (Block != FirstPred)
- continue;
-
- // If a terminator has more then two successors, for example SwitchInst,
- // then it is possible that there are multiple edges from current block to
- // one exit block.
- if (std::distance(BlockTraits::child_begin(Block),
- BlockTraits::child_end(Block)) <= 2) {
- ExitBlocks.push_back(Successor);
- continue;
- }
+ getUniqueExitBlocksHelper(this, ExitBlocks,
+ [](const BlockT *BB) { return true; });
+}
- // In case of multiple edges from current block to exit block, collect
- // only one edge in ExitBlocks. Use switchExitBlocks to keep track of
- // duplicate edges.
- if (!is_contained(SwitchExitBlocks, Successor)) {
- SwitchExitBlocks.push_back(Successor);
- ExitBlocks.push_back(Successor);
- }
- }
- }
+template <class BlockT, class LoopT>
+void LoopBase<BlockT, LoopT>::getUniqueNonLatchExitBlocks(
+ SmallVectorImpl<BlockT *> &ExitBlocks) const {
+ const BlockT *Latch = getLoopLatch();
+ assert(Latch && "Latch block must exists");
+ getUniqueExitBlocksHelper(this, ExitBlocks,
+ [Latch](const BlockT *BB) { return BB != Latch; });
}
template <class BlockT, class LoopT>
@@ -588,16 +574,9 @@ SmallVector<LoopT *, 4> LoopInfoBase<BlockT, LoopT>::getLoopsInPreorder() {
// FIXME: If we change the order of LoopInfo we will want to remove the
// reverse here.
for (LoopT *RootL : reverse(*this)) {
- assert(PreOrderWorklist.empty() &&
- "Must start with an empty preorder walk worklist.");
- PreOrderWorklist.push_back(RootL);
- do {
- LoopT *L = PreOrderWorklist.pop_back_val();
- // Sub-loops are stored in forward program order, but will process the
- // worklist backwards so append them in reverse order.
- PreOrderWorklist.append(L->rbegin(), L->rend());
- PreOrderLoops.push_back(L);
- } while (!PreOrderWorklist.empty());
+ auto PreOrderLoopsInRootL = RootL->getLoopsInPreorder();
+ PreOrderLoops.append(PreOrderLoopsInRootL.begin(),
+ PreOrderLoopsInRootL.end());
}
return PreOrderLoops;
diff --git a/include/llvm/Analysis/LoopIterator.h b/include/llvm/Analysis/LoopIterator.h
index 91c54b23029b..fa4da4283f55 100644
--- a/include/llvm/Analysis/LoopIterator.h
+++ b/include/llvm/Analysis/LoopIterator.h
@@ -1,9 +1,8 @@
//===--------- LoopIterator.h - Iterate over loop blocks --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// This file defines iterators to visit the basic blocks within a loop.
diff --git a/include/llvm/Analysis/LoopPass.h b/include/llvm/Analysis/LoopPass.h
index 86cfecd9df11..9215ab34ec6d 100644
--- a/include/llvm/Analysis/LoopPass.h
+++ b/include/llvm/Analysis/LoopPass.h
@@ -1,9 +1,8 @@
//===- LoopPass.h - LoopPass class ----------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Analysis/LoopUnrollAnalyzer.h b/include/llvm/Analysis/LoopUnrollAnalyzer.h
index f45bf0b223b8..5f332e3cac16 100644
--- a/include/llvm/Analysis/LoopUnrollAnalyzer.h
+++ b/include/llvm/Analysis/LoopUnrollAnalyzer.h
@@ -1,9 +1,8 @@
//===- llvm/Analysis/LoopUnrollAnalyzer.h - Loop Unroll Analyzer-*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Analysis/MemoryBuiltins.h b/include/llvm/Analysis/MemoryBuiltins.h
index 5418128f16ef..49f9e58ffad7 100644
--- a/include/llvm/Analysis/MemoryBuiltins.h
+++ b/include/llvm/Analysis/MemoryBuiltins.h
@@ -1,9 +1,8 @@
//==- llvm/Analysis/MemoryBuiltins.h - Calls to memory builtins --*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -19,6 +18,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Analysis/TargetFolder.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstVisitor.h"
@@ -84,6 +84,15 @@ bool isMallocOrCallocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
bool isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
bool LookThroughBitCast = false);
+/// Tests if a value is a call or invoke to a library function that
+/// reallocates memory (e.g., realloc).
+bool isReallocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
+ bool LookThroughBitCast = false);
+
+/// Tests if a function is a call or invoke to a library function that
+/// reallocates memory (e.g., realloc).
+bool isReallocLikeFn(const Function *F, const TargetLibraryInfo *TLI);
+
//===----------------------------------------------------------------------===//
// malloc Call Utility Functions.
//
@@ -135,6 +144,9 @@ inline CallInst *extractCallocCall(Value *I, const TargetLibraryInfo *TLI) {
// free Call Utility Functions.
//
+/// isLibFreeFunction - Returns true if the function is a builtin free()
+bool isLibFreeFunction(const Function *F, const LibFunc TLIFn);
+
/// isFreeCall - Returns non-null if the value is a call to the builtin free()
const CallInst *isFreeCall(const Value *I, const TargetLibraryInfo *TLI);
@@ -178,14 +190,13 @@ bool getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout &DL,
const TargetLibraryInfo *TLI, ObjectSizeOpts Opts = {});
/// Try to turn a call to \@llvm.objectsize into an integer value of the given
-/// Type. Returns null on failure.
-/// If MustSucceed is true, this function will not return null, and may return
-/// conservative values governed by the second argument of the call to
-/// objectsize.
-ConstantInt *lowerObjectSizeCall(IntrinsicInst *ObjectSize,
- const DataLayout &DL,
- const TargetLibraryInfo *TLI,
- bool MustSucceed);
+/// Type. Returns null on failure. If MustSucceed is true, this function will
+/// not return null, and may return conservative values governed by the second
+/// argument of the call to objectsize.
+Value *lowerObjectSizeCall(IntrinsicInst *ObjectSize, const DataLayout &DL,
+ const TargetLibraryInfo *TLI, bool MustSucceed);
+
+
using SizeOffsetType = std::pair<APInt, APInt>;
@@ -252,7 +263,7 @@ using SizeOffsetEvalType = std::pair<Value *, Value *>;
/// May create code to compute the result at run-time.
class ObjectSizeOffsetEvaluator
: public InstVisitor<ObjectSizeOffsetEvaluator, SizeOffsetEvalType> {
- using BuilderTy = IRBuilder<TargetFolder>;
+ using BuilderTy = IRBuilder<TargetFolder, IRBuilderCallbackInserter>;
using WeakEvalType = std::pair<WeakTrackingVH, WeakTrackingVH>;
using CacheMapTy = DenseMap<const Value *, WeakEvalType>;
using PtrSetTy = SmallPtrSet<const Value *, 8>;
@@ -265,17 +276,18 @@ class ObjectSizeOffsetEvaluator
Value *Zero;
CacheMapTy CacheMap;
PtrSetTy SeenVals;
- bool RoundToAlign;
-
- SizeOffsetEvalType unknown() {
- return std::make_pair(nullptr, nullptr);
- }
+ ObjectSizeOpts EvalOpts;
+ SmallPtrSet<Instruction *, 8> InsertedInstructions;
SizeOffsetEvalType compute_(Value *V);
public:
+ static SizeOffsetEvalType unknown() {
+ return std::make_pair(nullptr, nullptr);
+ }
+
ObjectSizeOffsetEvaluator(const DataLayout &DL, const TargetLibraryInfo *TLI,
- LLVMContext &Context, bool RoundToAlign = false);
+ LLVMContext &Context, ObjectSizeOpts EvalOpts = {});
SizeOffsetEvalType compute(Value *V);
diff --git a/include/llvm/Analysis/MemoryDependenceAnalysis.h b/include/llvm/Analysis/MemoryDependenceAnalysis.h
index 958d4fe4b832..e2669c2fa601 100644
--- a/include/llvm/Analysis/MemoryDependenceAnalysis.h
+++ b/include/llvm/Analysis/MemoryDependenceAnalysis.h
@@ -1,9 +1,8 @@
//===- llvm/Analysis/MemoryDependenceAnalysis.h - Memory Deps ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -382,7 +381,8 @@ public:
///
/// See the class comment for more details. It is illegal to call this on
/// non-memory instructions.
- MemDepResult getDependency(Instruction *QueryInst);
+ MemDepResult getDependency(Instruction *QueryInst,
+ OrderedBasicBlock *OBB = nullptr);
/// Perform a full dependency query for the specified call, returning the set
/// of blocks that the value is potentially live across.
@@ -448,14 +448,14 @@ public:
BasicBlock::iterator ScanIt,
BasicBlock *BB,
Instruction *QueryInst = nullptr,
- unsigned *Limit = nullptr);
-
- MemDepResult getSimplePointerDependencyFrom(const MemoryLocation &MemLoc,
- bool isLoad,
- BasicBlock::iterator ScanIt,
- BasicBlock *BB,
- Instruction *QueryInst,
- unsigned *Limit = nullptr);
+ unsigned *Limit = nullptr,
+ OrderedBasicBlock *OBB = nullptr);
+
+ MemDepResult
+ getSimplePointerDependencyFrom(const MemoryLocation &MemLoc, bool isLoad,
+ BasicBlock::iterator ScanIt, BasicBlock *BB,
+ Instruction *QueryInst, unsigned *Limit,
+ OrderedBasicBlock *OBB);
/// This analysis looks for other loads and stores with invariant.group
/// metadata and the same pointer operand. Returns Unknown if it does not
diff --git a/include/llvm/Analysis/MemoryLocation.h b/include/llvm/Analysis/MemoryLocation.h
index fca18c1b5999..7c26353e618b 100644
--- a/include/llvm/Analysis/MemoryLocation.h
+++ b/include/llvm/Analysis/MemoryLocation.h
@@ -1,9 +1,8 @@
//===- MemoryLocation.h - Memory location descriptions ----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/include/llvm/Analysis/MemorySSA.h b/include/llvm/Analysis/MemorySSA.h
index 17e2d0c73977..b7730be75354 100644
--- a/include/llvm/Analysis/MemorySSA.h
+++ b/include/llvm/Analysis/MemorySSA.h
@@ -1,9 +1,8 @@
//===- MemorySSA.h - Build Memory SSA ---------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -105,6 +104,9 @@
namespace llvm {
+/// Enables memory ssa as a dependency for loop passes.
+extern cl::opt<bool> EnableMSSALoopDependency;
+
class Function;
class Instruction;
class MemoryAccess;
@@ -701,6 +703,11 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(MemoryPhi, MemoryAccess)
class MemorySSA {
public:
MemorySSA(Function &, AliasAnalysis *, DominatorTree *);
+
+ // MemorySSA must remain where it's constructed; Walkers it creates store
+ // pointers to it.
+ MemorySSA(MemorySSA &&) = delete;
+
~MemorySSA();
MemorySSAWalker *getWalker();
@@ -776,9 +783,6 @@ public:
/// all uses, uses appear in the right places). This is used by unit tests.
void verifyMemorySSA() const;
- /// Check clobber sanity for an access.
- void checkClobberSanityAccess(const MemoryAccess *MA) const;
-
/// Used in various insertion functions to specify whether we are talking
/// about the beginning or end of a block.
enum InsertionPlace { Beginning, End };
@@ -793,7 +797,6 @@ protected:
void verifyDomination(Function &F) const;
void verifyOrdering(Function &F) const;
void verifyDominationNumbers(const Function &F) const;
- void verifyClobberSanity(const Function &F) const;
// This is used by the use optimizer and updater.
AccessList *getWritableBlockAccesses(const BasicBlock *BB) const {
@@ -830,13 +833,13 @@ protected:
const MemoryUseOrDef *Template = nullptr);
private:
- class ClobberWalkerBase;
- class CachingWalker;
- class SkipSelfWalker;
+ template <class AliasAnalysisType> class ClobberWalkerBase;
+ template <class AliasAnalysisType> class CachingWalker;
+ template <class AliasAnalysisType> class SkipSelfWalker;
class OptimizeUses;
- CachingWalker *getWalkerImpl();
- void buildMemorySSA();
+ CachingWalker<AliasAnalysis> *getWalkerImpl();
+ void buildMemorySSA(BatchAAResults &BAA);
void optimizeUses();
void prepareForMoveTo(MemoryAccess *, BasicBlock *);
@@ -850,7 +853,8 @@ private:
void markUnreachableAsLiveOnEntry(BasicBlock *BB);
bool dominatesUse(const MemoryAccess *, const MemoryAccess *) const;
MemoryPhi *createMemoryPhi(BasicBlock *BB);
- MemoryUseOrDef *createNewAccess(Instruction *,
+ template <typename AliasAnalysisType>
+ MemoryUseOrDef *createNewAccess(Instruction *, AliasAnalysisType *,
const MemoryUseOrDef *Template = nullptr);
MemoryAccess *findDominatingDef(BasicBlock *, enum InsertionPlace);
void placePHINodes(const SmallPtrSetImpl<BasicBlock *> &);
@@ -886,9 +890,9 @@ private:
mutable DenseMap<const MemoryAccess *, unsigned long> BlockNumbering;
// Memory SSA building info
- std::unique_ptr<ClobberWalkerBase> WalkerBase;
- std::unique_ptr<CachingWalker> Walker;
- std::unique_ptr<SkipSelfWalker> SkipWalker;
+ std::unique_ptr<ClobberWalkerBase<AliasAnalysis>> WalkerBase;
+ std::unique_ptr<CachingWalker<AliasAnalysis>> Walker;
+ std::unique_ptr<SkipSelfWalker<AliasAnalysis>> SkipWalker;
unsigned NextID;
};
@@ -932,6 +936,9 @@ public:
MemorySSA &getMSSA() { return *MSSA.get(); }
std::unique_ptr<MemorySSA> MSSA;
+
+ bool invalidate(Function &F, const PreservedAnalyses &PA,
+ FunctionAnalysisManager::Invalidator &Inv);
};
Result run(Function &F, FunctionAnalysisManager &AM);
@@ -1044,8 +1051,6 @@ public:
/// the walker it uses or returns.
virtual void invalidateInfo(MemoryAccess *) {}
- virtual void verify(const MemorySSA *MSSA) { assert(MSSA == this->MSSA); }
-
protected:
friend class MemorySSA; // For updating MSSA pointer in MemorySSA move
// constructor.
@@ -1101,15 +1106,15 @@ public:
assert(Access && "Tried to access past the end of our iterator");
// Go to the first argument for phis, and the defining access for everything
// else.
- if (MemoryPhi *MP = dyn_cast<MemoryPhi>(Access))
+ if (const MemoryPhi *MP = dyn_cast<MemoryPhi>(Access))
return MP->getIncomingValue(ArgNo);
return cast<MemoryUseOrDef>(Access)->getDefiningAccess();
}
using BaseT::operator++;
- memoryaccess_def_iterator &operator++() {
+ memoryaccess_def_iterator_base &operator++() {
assert(Access && "Hit end of iterator");
- if (MemoryPhi *MP = dyn_cast<MemoryPhi>(Access)) {
+ if (const MemoryPhi *MP = dyn_cast<MemoryPhi>(Access)) {
if (++ArgNo >= MP->getNumIncomingValues()) {
ArgNo = 0;
Access = nullptr;
diff --git a/include/llvm/Analysis/MemorySSAUpdater.h b/include/llvm/Analysis/MemorySSAUpdater.h
index 169d5bd9fa8b..d4d8040c1ff6 100644
--- a/include/llvm/Analysis/MemorySSAUpdater.h
+++ b/include/llvm/Analysis/MemorySSAUpdater.h
@@ -1,9 +1,8 @@
//===- MemorySSAUpdater.h - Memory SSA Updater-------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -32,6 +31,7 @@
#ifndef LLVM_ANALYSIS_MEMORYSSAUPDATER_H
#define LLVM_ANALYSIS_MEMORYSSAUPDATER_H
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -106,7 +106,12 @@ public:
/// Update the MemoryPhi in `To` to have a single incoming edge from `From`,
/// following a CFG change that replaced multiple edges (switch) with a direct
/// branch.
- void removeDuplicatePhiEdgesBetween(BasicBlock *From, BasicBlock *To);
+ void removeDuplicatePhiEdgesBetween(const BasicBlock *From,
+ const BasicBlock *To);
+ /// Update MemorySSA when inserting a unique backedge block for a loop.
+ void updatePhisWhenInsertingUniqueBackedgeBlock(BasicBlock *LoopHeader,
+ BasicBlock *LoopPreheader,
+ BasicBlock *BackedgeBlock);
/// Update MemorySSA after a loop was cloned, given the blocks in RPO order,
/// the exit blocks and a 1:1 mapping of all blocks and instructions
/// cloned. This involves duplicating all defs and uses in the cloned blocks
@@ -222,14 +227,14 @@ public:
/// associated with it is erased from the program. For example, if a store or
/// load is simply erased (not replaced), removeMemoryAccess should be called
/// on the MemoryAccess for that store/load.
- void removeMemoryAccess(MemoryAccess *);
+ void removeMemoryAccess(MemoryAccess *, bool OptimizePhis = false);
/// Remove MemoryAccess for a given instruction, if a MemoryAccess exists.
/// This should be called when an instruction (load/store) is deleted from
/// the program.
- void removeMemoryAccess(const Instruction *I) {
+ void removeMemoryAccess(const Instruction *I, bool OptimizePhis = false) {
if (MemoryAccess *MA = MSSA->getMemoryAccess(I))
- removeMemoryAccess(MA);
+ removeMemoryAccess(MA, OptimizePhis);
}
/// Remove all MemoryAcceses in a set of BasicBlocks about to be deleted.
@@ -239,7 +244,17 @@ public:
/// Deleted blocks still have successor info, but their predecessor edges and
/// Phi nodes may already be updated. Instructions in DeadBlocks should be
/// deleted after this call.
- void removeBlocks(const SmallPtrSetImpl<BasicBlock *> &DeadBlocks);
+ void removeBlocks(const SmallSetVector<BasicBlock *, 8> &DeadBlocks);
+
+ /// Instruction I will be changed to an unreachable. Remove all accesses in
+ /// I's block that follow I (inclusive), and update the Phis in the blocks'
+ /// successors.
+ void changeToUnreachable(const Instruction *I);
+
+ /// Conditional branch BI is changed or replaced with an unconditional branch
+ /// to `To`. Update Phis in BI's successors to remove BI's BB.
+ void changeCondBranchToUnconditionalTo(const BranchInst *BI,
+ const BasicBlock *To);
/// Get handle on MemorySSA.
MemorySSA* getMemorySSA() const { return MSSA; }
@@ -262,6 +277,7 @@ private:
MemoryAccess *recursePhi(MemoryAccess *Phi);
template <class RangeType>
MemoryAccess *tryRemoveTrivialPhi(MemoryPhi *Phi, RangeType &Operands);
+ void tryRemoveTrivialPhis(ArrayRef<WeakVH> UpdatedPHIs);
void fixupDefs(const SmallVectorImpl<WeakVH> &);
// Clone all uses and defs from BB to NewBB given a 1:1 map of all
// instructions and blocks cloned, and a map of MemoryPhi : Definition
@@ -272,8 +288,14 @@ private:
// not necessarily be MemoryPhis themselves, they may be MemoryDefs. As such,
// the map is between MemoryPhis and MemoryAccesses, where the MemoryAccesses
// may be MemoryPhis or MemoryDefs and not MemoryUses.
+ // If CloneWasSimplified = true, the clone was exact. Otherwise, assume that
+ // the clone involved simplifications that may have: (1) turned a MemoryUse
+ // into an instruction that MemorySSA has no representation for, or (2) turned
+ // a MemoryDef into a MemoryUse or an instruction that MemorySSA has no
+ // representation for. No other cases are supported.
void cloneUsesAndDefs(BasicBlock *BB, BasicBlock *NewBB,
- const ValueToValueMapTy &VMap, PhiToDefMap &MPhiMap);
+ const ValueToValueMapTy &VMap, PhiToDefMap &MPhiMap,
+ bool CloneWasSimplified = false);
template <typename Iter>
void privateUpdateExitBlocksForClonedLoop(ArrayRef<BasicBlock *> ExitBlocks,
Iter ValuesBegin, Iter ValuesEnd,
diff --git a/include/llvm/Analysis/ModuleSummaryAnalysis.h b/include/llvm/Analysis/ModuleSummaryAnalysis.h
index 9af7859cb4bf..1572a49e3384 100644
--- a/include/llvm/Analysis/ModuleSummaryAnalysis.h
+++ b/include/llvm/Analysis/ModuleSummaryAnalysis.h
@@ -1,9 +1,8 @@
//===- ModuleSummaryAnalysis.h - Module summary index builder ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/include/llvm/Analysis/MustExecute.h b/include/llvm/Analysis/MustExecute.h
index ad3222c17e62..3ef539c89d97 100644
--- a/include/llvm/Analysis/MustExecute.h
+++ b/include/llvm/Analysis/MustExecute.h
@@ -1,9 +1,8 @@
//===- MustExecute.h - Is an instruction known to execute--------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/include/llvm/Analysis/ObjCARCAliasAnalysis.h b/include/llvm/Analysis/ObjCARCAliasAnalysis.h
index 58a67042ea2d..b4f4e5f29768 100644
--- a/include/llvm/Analysis/ObjCARCAliasAnalysis.h
+++ b/include/llvm/Analysis/ObjCARCAliasAnalysis.h
@@ -1,9 +1,8 @@
//===- ObjCARCAliasAnalysis.h - ObjC ARC Alias Analysis ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -53,14 +52,17 @@ public:
return false;
}
- AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB);
- bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal);
+ AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB,
+ AAQueryInfo &AAQI);
+ bool pointsToConstantMemory(const MemoryLocation &Loc, AAQueryInfo &AAQI,
+ bool OrLocal);
using AAResultBase::getModRefBehavior;
FunctionModRefBehavior getModRefBehavior(const Function *F);
using AAResultBase::getModRefInfo;
- ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc);
+ ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc,
+ AAQueryInfo &AAQI);
};
/// Analysis pass providing a never-invalidated alias analysis result.
diff --git a/include/llvm/Analysis/ObjCARCAnalysisUtils.h b/include/llvm/Analysis/ObjCARCAnalysisUtils.h
index 1f497fab35da..522abd756c9f 100644
--- a/include/llvm/Analysis/ObjCARCAnalysisUtils.h
+++ b/include/llvm/Analysis/ObjCARCAnalysisUtils.h
@@ -1,9 +1,8 @@
//===- ObjCARCAnalysisUtils.h - ObjC ARC Analysis Utilities -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/include/llvm/Analysis/ObjCARCInstKind.h b/include/llvm/Analysis/ObjCARCInstKind.h
index 018ea1f851be..dc6093a7b86c 100644
--- a/include/llvm/Analysis/ObjCARCInstKind.h
+++ b/include/llvm/Analysis/ObjCARCInstKind.h
@@ -1,9 +1,8 @@
//===- ObjCARCInstKind.h - ARC instruction equivalence classes --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -75,6 +74,10 @@ bool IsForwarding(ARCInstKind Class);
/// passed a null pointer.
bool IsNoopOnNull(ARCInstKind Class);
+/// Test if the given class represents instructions which do nothing if
+/// passed a global variable.
+bool IsNoopOnGlobal(ARCInstKind Class);
+
/// Test if the given class represents instructions which are always safe
/// to mark with the "tail" keyword.
bool IsAlwaysTail(ARCInstKind Class);
diff --git a/include/llvm/Analysis/OptimizationRemarkEmitter.h b/include/llvm/Analysis/OptimizationRemarkEmitter.h
index fa838696e2f8..7b8404404ce7 100644
--- a/include/llvm/Analysis/OptimizationRemarkEmitter.h
+++ b/include/llvm/Analysis/OptimizationRemarkEmitter.h
@@ -1,9 +1,8 @@
//===- OptimizationRemarkEmitter.h - Optimization Diagnostic ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -78,7 +77,7 @@ public:
// remarks enabled. We can't currently check whether remarks are requested
// for the calling pass since that requires actually building the remark.
- if (F->getContext().getDiagnosticsOutputFile() ||
+ if (F->getContext().getRemarkStreamer() ||
F->getContext().getDiagHandlerPtr()->isAnyRemarkEnabled()) {
auto R = RemarkBuilder();
emit((DiagnosticInfoOptimizationBase &)R);
@@ -93,7 +92,7 @@ public:
/// provide more context so that non-trivial false positives can be quickly
/// detected by the user.
bool allowExtraAnalysis(StringRef PassName) const {
- return (F->getContext().getDiagnosticsOutputFile() ||
+ return (F->getContext().getRemarkStreamer() ||
F->getContext().getDiagHandlerPtr()->isAnyRemarkEnabled(PassName));
}
diff --git a/include/llvm/Analysis/OrderedBasicBlock.h b/include/llvm/Analysis/OrderedBasicBlock.h
index 0776aa626005..ae64c0189f5e 100644
--- a/include/llvm/Analysis/OrderedBasicBlock.h
+++ b/include/llvm/Analysis/OrderedBasicBlock.h
@@ -1,9 +1,8 @@
//===- llvm/Analysis/OrderedBasicBlock.h --------------------- -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -60,6 +59,14 @@ public:
/// only relevant to compare relative instructions positions inside \p BB.
/// Returns false for A == B.
bool dominates(const Instruction *A, const Instruction *B);
+
+ /// Remove \p from the ordering, if it is present.
+ void eraseInstruction(const Instruction *I);
+
+ /// Replace \p Old with \p New in the ordering. \p New is assigned the
+ /// numbering of \p Old, so it must be inserted at the same position in the
+ /// IR.
+ void replaceInstruction(const Instruction *Old, const Instruction *New);
};
} // End llvm namespace
diff --git a/include/llvm/Analysis/OrderedInstructions.h b/include/llvm/Analysis/OrderedInstructions.h
index 7e3850b87c57..967b146b52de 100644
--- a/include/llvm/Analysis/OrderedInstructions.h
+++ b/include/llvm/Analysis/OrderedInstructions.h
@@ -1,9 +1,8 @@
//===- llvm/Transforms/Utils/OrderedInstructions.h -------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Analysis/PHITransAddr.h b/include/llvm/Analysis/PHITransAddr.h
index 0a335b6be6c7..54a07f053478 100644
--- a/include/llvm/Analysis/PHITransAddr.h
+++ b/include/llvm/Analysis/PHITransAddr.h
@@ -1,9 +1,8 @@
//===- PHITransAddr.h - PHI Translation for Addresses -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Analysis/Passes.h b/include/llvm/Analysis/Passes.h
index 081dd5000835..d9c97dff8c6e 100644
--- a/include/llvm/Analysis/Passes.h
+++ b/include/llvm/Analysis/Passes.h
@@ -1,9 +1,8 @@
//===-- llvm/Analysis/Passes.h - Constructors for analyses ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Analysis/PhiValues.h b/include/llvm/Analysis/PhiValues.h
index 76204ac1bc6c..124fa2191694 100644
--- a/include/llvm/Analysis/PhiValues.h
+++ b/include/llvm/Analysis/PhiValues.h
@@ -1,9 +1,8 @@
//===- PhiValues.h - Phi Value Analysis -------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Analysis/PostDominators.h b/include/llvm/Analysis/PostDominators.h
index f2dc8d135d71..87d2e0318d0a 100644
--- a/include/llvm/Analysis/PostDominators.h
+++ b/include/llvm/Analysis/PostDominators.h
@@ -1,9 +1,8 @@
//=- llvm/Analysis/PostDominators.h - Post Dominator Calculation --*- C++ -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Analysis/ProfileSummaryInfo.h b/include/llvm/Analysis/ProfileSummaryInfo.h
index 3aef4be72d71..f309d344b8d1 100644
--- a/include/llvm/Analysis/ProfileSummaryInfo.h
+++ b/include/llvm/Analysis/ProfileSummaryInfo.h
@@ -1,9 +1,8 @@
//===- llvm/Analysis/ProfileSummaryInfo.h - profile summary ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -74,6 +73,12 @@ public:
Summary->getKind() == ProfileSummary::PSK_Instr;
}
+ /// Returns true if module \c M has context sensitive instrumentation profile.
+ bool hasCSInstrumentationProfile() {
+ return hasProfileSummary() &&
+ Summary->getKind() == ProfileSummary::PSK_CSInstr;
+ }
+
/// Handle the invalidation of this information.
///
/// When used as a result of \c ProfileSummaryAnalysis this method will be
@@ -87,7 +92,8 @@ public:
/// Returns the profile count for \p CallInst.
Optional<uint64_t> getProfileCount(const Instruction *CallInst,
- BlockFrequencyInfo *BFI);
+ BlockFrequencyInfo *BFI,
+ bool AllowSynthetic = false);
/// Returns true if the working set size of the code is considered huge.
bool hasHugeWorkingSetSize();
/// Returns true if \p F has hot function entry.
diff --git a/include/llvm/Analysis/PtrUseVisitor.h b/include/llvm/Analysis/PtrUseVisitor.h
index b34b25c75040..fbf04c841d30 100644
--- a/include/llvm/Analysis/PtrUseVisitor.h
+++ b/include/llvm/Analysis/PtrUseVisitor.h
@@ -1,9 +1,8 @@
//===- PtrUseVisitor.h - InstVisitors over a pointers uses ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -257,6 +256,10 @@ protected:
enqueueUsers(BC);
}
+ void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
+ enqueueUsers(ASC);
+ }
+
void visitPtrToIntInst(PtrToIntInst &I) {
PI.setEscaped(&I);
}
diff --git a/include/llvm/Analysis/RegionInfo.h b/include/llvm/Analysis/RegionInfo.h
index 27f6cc197927..8bcc3e851200 100644
--- a/include/llvm/Analysis/RegionInfo.h
+++ b/include/llvm/Analysis/RegionInfo.h
@@ -1,9 +1,8 @@
//===- RegionInfo.h - SESE region analysis ----------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Analysis/RegionInfoImpl.h b/include/llvm/Analysis/RegionInfoImpl.h
index 5904214aa925..c59c09dd2095 100644
--- a/include/llvm/Analysis/RegionInfoImpl.h
+++ b/include/llvm/Analysis/RegionInfoImpl.h
@@ -1,9 +1,8 @@
//===- RegionInfoImpl.h - SESE region detection analysis --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// Detects single entry single exit regions in the control flow graph.
diff --git a/include/llvm/Analysis/RegionIterator.h b/include/llvm/Analysis/RegionIterator.h
index 4fd92fcde20b..72bc5bbcb506 100644
--- a/include/llvm/Analysis/RegionIterator.h
+++ b/include/llvm/Analysis/RegionIterator.h
@@ -1,9 +1,8 @@
//===- RegionIterator.h - Iterators to iteratate over Regions ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// This file defines the iterators to iterate over the elements of a Region.
diff --git a/include/llvm/Analysis/RegionPass.h b/include/llvm/Analysis/RegionPass.h
index b3da91c89cbd..5b1864a37629 100644
--- a/include/llvm/Analysis/RegionPass.h
+++ b/include/llvm/Analysis/RegionPass.h
@@ -1,9 +1,8 @@
//===- RegionPass.h - RegionPass class --------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Analysis/RegionPrinter.h b/include/llvm/Analysis/RegionPrinter.h
index e132eaea5674..154ac35c486a 100644
--- a/include/llvm/Analysis/RegionPrinter.h
+++ b/include/llvm/Analysis/RegionPrinter.h
@@ -1,9 +1,8 @@
//===-- RegionPrinter.h - Region printer external interface -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h
index 8f4200b07e5c..0bd98ef37e7a 100644
--- a/include/llvm/Analysis/ScalarEvolution.h
+++ b/include/llvm/Analysis/ScalarEvolution.h
@@ -1,9 +1,8 @@
//===- llvm/Analysis/ScalarEvolution.h - Scalar Evolution -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -85,6 +84,9 @@ class SCEV : public FoldingSetNode {
const unsigned short SCEVType;
protected:
+ // Estimated complexity of this node's expression tree size.
+ const unsigned short ExpressionSize;
+
/// This field is initialized to zero and may be used in subclasses to store
/// miscellaneous information.
unsigned short SubclassData = 0;
@@ -116,8 +118,9 @@ public:
NoWrapMask = (1 << 3) - 1
};
- explicit SCEV(const FoldingSetNodeIDRef ID, unsigned SCEVTy)
- : FastID(ID), SCEVType(SCEVTy) {}
+ explicit SCEV(const FoldingSetNodeIDRef ID, unsigned SCEVTy,
+ unsigned short ExpressionSize)
+ : FastID(ID), SCEVType(SCEVTy), ExpressionSize(ExpressionSize) {}
SCEV(const SCEV &) = delete;
SCEV &operator=(const SCEV &) = delete;
@@ -138,6 +141,19 @@ public:
/// Return true if the specified scev is negated, but not a constant.
bool isNonConstantNegative() const;
+ // Returns estimated size of the mathematical expression represented by this
+ // SCEV. The rules of its calculation are following:
+ // 1) Size of a SCEV without operands (like constants and SCEVUnknown) is 1;
+ // 2) Size SCEV with operands Op1, Op2, ..., OpN is calculated by formula:
+ // (1 + Size(Op1) + ... + Size(OpN)).
+ // This value gives us an estimation of time we need to traverse through this
+ // SCEV and all its operands recursively. We may use it to avoid performing
+ // heavy transformations on SCEVs of excessive size for sake of saving the
+ // compilation time.
+ unsigned short getExpressionSize() const {
+ return ExpressionSize;
+ }
+
/// Print out the internal representation of this scalar to the specified
/// stream. This should really only be used for debugging purposes.
void print(raw_ostream &OS) const;
@@ -521,7 +537,7 @@ public:
const SCEV *getConstant(ConstantInt *V);
const SCEV *getConstant(const APInt &Val);
const SCEV *getConstant(Type *Ty, uint64_t V, bool isSigned = false);
- const SCEV *getTruncateExpr(const SCEV *Op, Type *Ty);
+ const SCEV *getTruncateExpr(const SCEV *Op, Type *Ty, unsigned Depth = 0);
const SCEV *getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth = 0);
const SCEV *getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth = 0);
const SCEV *getAnyExtendExpr(const SCEV *Op, Type *Ty);
@@ -582,6 +598,8 @@ public:
/// \p IndexExprs The expressions for the indices.
const SCEV *getGEPExpr(GEPOperator *GEP,
const SmallVectorImpl<const SCEV *> &IndexExprs);
+ const SCEV *getMinMaxExpr(unsigned Kind,
+ SmallVectorImpl<const SCEV *> &Operands);
const SCEV *getSMaxExpr(const SCEV *LHS, const SCEV *RHS);
const SCEV *getSMaxExpr(SmallVectorImpl<const SCEV *> &Operands);
const SCEV *getUMaxExpr(const SCEV *LHS, const SCEV *RHS);
@@ -619,11 +637,13 @@ public:
/// Return a SCEV corresponding to a conversion of the input value to the
/// specified type. If the type must be extended, it is zero extended.
- const SCEV *getTruncateOrZeroExtend(const SCEV *V, Type *Ty);
+ const SCEV *getTruncateOrZeroExtend(const SCEV *V, Type *Ty,
+ unsigned Depth = 0);
/// Return a SCEV corresponding to a conversion of the input value to the
/// specified type. If the type must be extended, it is sign extended.
- const SCEV *getTruncateOrSignExtend(const SCEV *V, Type *Ty);
+ const SCEV *getTruncateOrSignExtend(const SCEV *V, Type *Ty,
+ unsigned Depth = 0);
/// Return a SCEV corresponding to a conversion of the input value to the
/// specified type. If the type must be extended, it is zero extended. The
@@ -726,9 +746,12 @@ public:
unsigned getSmallConstantTripMultiple(const Loop *L,
BasicBlock *ExitingBlock);
- /// Get the expression for the number of loop iterations for which this loop
- /// is guaranteed not to exit via ExitingBlock. Otherwise return
- /// SCEVCouldNotCompute.
+ /// Return the number of times the backedge executes before the given exit
+ /// would be taken; if not exactly computable, return SCEVCouldNotCompute.
+ /// For a single exit loop, this value is equivelent to the result of
+ /// getBackedgeTakenCount. The loop is guaranteed to exit (via *some* exit)
+ /// before the backedge is executed (ExitCount + 1) times. Note that there
+ /// is no guarantee about *which* exit is taken on the exiting iteration.
const SCEV *getExitCount(const Loop *L, BasicBlock *ExitingBlock);
/// If the specified loop has a predictable backedge-taken count, return it,
@@ -764,6 +787,13 @@ public:
/// backedge-taken count.
bool hasLoopInvariantBackedgeTakenCount(const Loop *L);
+ // This method should be called by the client when it made any change that
+ // would invalidate SCEV's answers, and the client wants to remove all loop
+ // information held internally by ScalarEvolution. This is intended to be used
+ // when the alternative to forget a loop is too expensive (i.e. large loop
+ // bodies).
+ void forgetAllLoops();
+
/// This method should be called by the client when it has changed a loop in
/// a way that may effect ScalarEvolution's ability to compute a trip count,
/// or if the loop is deleted. This call is potentially expensive for large
@@ -1273,7 +1303,7 @@ private:
using EdgeExitInfo = std::pair<BasicBlock *, ExitLimit>;
/// Initialize BackedgeTakenInfo from a list of exact exit counts.
- BackedgeTakenInfo(SmallVectorImpl<EdgeExitInfo> &&ExitCounts, bool Complete,
+ BackedgeTakenInfo(ArrayRef<EdgeExitInfo> ExitCounts, bool Complete,
const SCEV *MaxCount, bool MaxOrZero);
/// Test whether this BackedgeTakenInfo contains any computed information,
@@ -1826,15 +1856,15 @@ private:
bool NoWrap);
/// Get add expr already created or create a new one.
- const SCEV *getOrCreateAddExpr(SmallVectorImpl<const SCEV *> &Ops,
+ const SCEV *getOrCreateAddExpr(ArrayRef<const SCEV *> Ops,
SCEV::NoWrapFlags Flags);
/// Get mul expr already created or create a new one.
- const SCEV *getOrCreateMulExpr(SmallVectorImpl<const SCEV *> &Ops,
+ const SCEV *getOrCreateMulExpr(ArrayRef<const SCEV *> Ops,
SCEV::NoWrapFlags Flags);
// Get addrec expr already created or create a new one.
- const SCEV *getOrCreateAddRecExpr(SmallVectorImpl<const SCEV *> &Ops,
+ const SCEV *getOrCreateAddRecExpr(ArrayRef<const SCEV *> Ops,
const Loop *L, SCEV::NoWrapFlags Flags);
/// Return x if \p Val is f(x) where f is a 1-1 function.
@@ -1853,6 +1883,16 @@ private:
/// Assign A and B to LHS and RHS, respectively.
bool matchURem(const SCEV *Expr, const SCEV *&LHS, const SCEV *&RHS);
+ /// Look for a SCEV expression with type `SCEVType` and operands `Ops` in
+ /// `UniqueSCEVs`.
+ ///
+ /// The first component of the returned tuple is the SCEV if found and null
+ /// otherwise. The second component is the `FoldingSetNodeID` that was
+ /// constructed to look up the SCEV and the third component is the insertion
+ /// point.
+ std::tuple<const SCEV *, FoldingSetNodeID, void *>
+ findExistingSCEVInCache(int SCEVType, ArrayRef<const SCEV *> Ops);
+
FoldingSet<SCEV> UniqueSCEVs;
FoldingSet<SCEVPredicate> UniquePreds;
BumpPtrAllocator SCEVAllocator;
diff --git a/include/llvm/Analysis/ScalarEvolutionAliasAnalysis.h b/include/llvm/Analysis/ScalarEvolutionAliasAnalysis.h
index 329be51e5eac..98d53237d4a0 100644
--- a/include/llvm/Analysis/ScalarEvolutionAliasAnalysis.h
+++ b/include/llvm/Analysis/ScalarEvolutionAliasAnalysis.h
@@ -1,9 +1,8 @@
//===- ScalarEvolutionAliasAnalysis.h - SCEV-based AA -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -31,7 +30,8 @@ public:
explicit SCEVAAResult(ScalarEvolution &SE) : AAResultBase(), SE(SE) {}
SCEVAAResult(SCEVAAResult &&Arg) : AAResultBase(std::move(Arg)), SE(Arg.SE) {}
- AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB);
+ AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB,
+ AAQueryInfo &AAQI);
private:
Value *GetBaseValue(const SCEV *S);
diff --git a/include/llvm/Analysis/ScalarEvolutionExpander.h b/include/llvm/Analysis/ScalarEvolutionExpander.h
index 58d42680d6bc..a519f93216b3 100644
--- a/include/llvm/Analysis/ScalarEvolutionExpander.h
+++ b/include/llvm/Analysis/ScalarEvolutionExpander.h
@@ -1,9 +1,8 @@
//===---- llvm/Analysis/ScalarEvolutionExpander.h - SCEV Exprs --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -316,8 +315,10 @@ namespace llvm {
SmallPtrSetImpl<const SCEV *> &Processed);
/// Insert the specified binary operator, doing a small amount of work to
- /// avoid inserting an obviously redundant operation.
- Value *InsertBinop(Instruction::BinaryOps Opcode, Value *LHS, Value *RHS);
+ /// avoid inserting an obviously redundant operation, and hoisting to an
+ /// outer loop when the opportunity is there and it is safe.
+ Value *InsertBinop(Instruction::BinaryOps Opcode, Value *LHS, Value *RHS,
+ SCEV::NoWrapFlags Flags, bool IsSafeToHoist);
/// Arrange for there to be a cast of V to Ty at IP, reusing an existing
/// cast if a suitable one exists, moving an existing cast if a suitable one
@@ -368,6 +369,10 @@ namespace llvm {
Value *visitUMaxExpr(const SCEVUMaxExpr *S);
+ Value *visitSMinExpr(const SCEVSMinExpr *S);
+
+ Value *visitUMinExpr(const SCEVUMinExpr *S);
+
Value *visitUnknown(const SCEVUnknown *S) {
return S->getValue();
}
diff --git a/include/llvm/Analysis/ScalarEvolutionExpressions.h b/include/llvm/Analysis/ScalarEvolutionExpressions.h
index 42e76094eb2b..d008af7b7e6f 100644
--- a/include/llvm/Analysis/ScalarEvolutionExpressions.h
+++ b/include/llvm/Analysis/ScalarEvolutionExpressions.h
@@ -1,9 +1,8 @@
//===- llvm/Analysis/ScalarEvolutionExpressions.h - SCEV Exprs --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -40,7 +39,7 @@ class Type;
// These should be ordered in terms of increasing complexity to make the
// folders simpler.
scConstant, scTruncate, scZeroExtend, scSignExtend, scAddExpr, scMulExpr,
- scUDivExpr, scAddRecExpr, scUMaxExpr, scSMaxExpr,
+ scUDivExpr, scAddRecExpr, scUMaxExpr, scSMaxExpr, scUMinExpr, scSMinExpr,
scUnknown, scCouldNotCompute
};
@@ -51,7 +50,7 @@ class Type;
ConstantInt *V;
SCEVConstant(const FoldingSetNodeIDRef ID, ConstantInt *v) :
- SCEV(ID, scConstant), V(v) {}
+ SCEV(ID, scConstant, 1), V(v) {}
public:
ConstantInt *getValue() const { return V; }
@@ -65,6 +64,13 @@ class Type;
}
};
+ static unsigned short computeExpressionSize(ArrayRef<const SCEV *> Args) {
+ APInt Size(16, 1);
+ for (auto *Arg : Args)
+ Size = Size.uadd_sat(APInt(16, Arg->getExpressionSize()));
+ return (unsigned short)Size.getZExtValue();
+ }
+
/// This is the base class for unary cast operator classes.
class SCEVCastExpr : public SCEV {
protected:
@@ -142,9 +148,10 @@ class Type;
const SCEV *const *Operands;
size_t NumOperands;
- SCEVNAryExpr(const FoldingSetNodeIDRef ID,
- enum SCEVTypes T, const SCEV *const *O, size_t N)
- : SCEV(ID, T), Operands(O), NumOperands(N) {}
+ SCEVNAryExpr(const FoldingSetNodeIDRef ID, enum SCEVTypes T,
+ const SCEV *const *O, size_t N)
+ : SCEV(ID, T, computeExpressionSize(makeArrayRef(O, N))), Operands(O),
+ NumOperands(N) {}
public:
size_t getNumOperands() const { return NumOperands; }
@@ -183,10 +190,9 @@ class Type;
/// Methods for support type inquiry through isa, cast, and dyn_cast:
static bool classof(const SCEV *S) {
- return S->getSCEVType() == scAddExpr ||
- S->getSCEVType() == scMulExpr ||
- S->getSCEVType() == scSMaxExpr ||
- S->getSCEVType() == scUMaxExpr ||
+ return S->getSCEVType() == scAddExpr || S->getSCEVType() == scMulExpr ||
+ S->getSCEVType() == scSMaxExpr || S->getSCEVType() == scUMaxExpr ||
+ S->getSCEVType() == scSMinExpr || S->getSCEVType() == scUMinExpr ||
S->getSCEVType() == scAddRecExpr;
}
};
@@ -201,10 +207,9 @@ class Type;
public:
/// Methods for support type inquiry through isa, cast, and dyn_cast:
static bool classof(const SCEV *S) {
- return S->getSCEVType() == scAddExpr ||
- S->getSCEVType() == scMulExpr ||
- S->getSCEVType() == scSMaxExpr ||
- S->getSCEVType() == scUMaxExpr;
+ return S->getSCEVType() == scAddExpr || S->getSCEVType() == scMulExpr ||
+ S->getSCEVType() == scSMaxExpr || S->getSCEVType() == scUMaxExpr ||
+ S->getSCEVType() == scSMinExpr || S->getSCEVType() == scUMinExpr;
}
/// Set flags for a non-recurrence without clearing previously set flags.
@@ -258,7 +263,8 @@ class Type;
const SCEV *RHS;
SCEVUDivExpr(const FoldingSetNodeIDRef ID, const SCEV *lhs, const SCEV *rhs)
- : SCEV(ID, scUDivExpr), LHS(lhs), RHS(rhs) {}
+ : SCEV(ID, scUDivExpr, computeExpressionSize({lhs, rhs})), LHS(lhs),
+ RHS(rhs) {}
public:
const SCEV *getLHS() const { return LHS; }
@@ -358,18 +364,54 @@ class Type;
}
};
- /// This class represents a signed maximum selection.
- class SCEVSMaxExpr : public SCEVCommutativeExpr {
+ /// This node is the base class min/max selections.
+ class SCEVMinMaxExpr : public SCEVCommutativeExpr {
friend class ScalarEvolution;
- SCEVSMaxExpr(const FoldingSetNodeIDRef ID,
- const SCEV *const *O, size_t N)
- : SCEVCommutativeExpr(ID, scSMaxExpr, O, N) {
- // Max never overflows.
+ static bool isMinMaxType(enum SCEVTypes T) {
+ return T == scSMaxExpr || T == scUMaxExpr || T == scSMinExpr ||
+ T == scUMinExpr;
+ }
+
+ protected:
+ /// Note: Constructing subclasses via this constructor is allowed
+ SCEVMinMaxExpr(const FoldingSetNodeIDRef ID, enum SCEVTypes T,
+ const SCEV *const *O, size_t N)
+ : SCEVCommutativeExpr(ID, T, O, N) {
+ assert(isMinMaxType(T));
+ // Min and max never overflow
setNoWrapFlags((NoWrapFlags)(FlagNUW | FlagNSW));
}
public:
+ static bool classof(const SCEV *S) {
+ return isMinMaxType(static_cast<SCEVTypes>(S->getSCEVType()));
+ }
+
+ static enum SCEVTypes negate(enum SCEVTypes T) {
+ switch (T) {
+ case scSMaxExpr:
+ return scSMinExpr;
+ case scSMinExpr:
+ return scSMaxExpr;
+ case scUMaxExpr:
+ return scUMinExpr;
+ case scUMinExpr:
+ return scUMaxExpr;
+ default:
+ llvm_unreachable("Not a min or max SCEV type!");
+ }
+ }
+ };
+
+ /// This class represents a signed maximum selection.
+ class SCEVSMaxExpr : public SCEVMinMaxExpr {
+ friend class ScalarEvolution;
+
+ SCEVSMaxExpr(const FoldingSetNodeIDRef ID, const SCEV *const *O, size_t N)
+ : SCEVMinMaxExpr(ID, scSMaxExpr, O, N) {}
+
+ public:
/// Methods for support type inquiry through isa, cast, and dyn_cast:
static bool classof(const SCEV *S) {
return S->getSCEVType() == scSMaxExpr;
@@ -377,15 +419,11 @@ class Type;
};
/// This class represents an unsigned maximum selection.
- class SCEVUMaxExpr : public SCEVCommutativeExpr {
+ class SCEVUMaxExpr : public SCEVMinMaxExpr {
friend class ScalarEvolution;
- SCEVUMaxExpr(const FoldingSetNodeIDRef ID,
- const SCEV *const *O, size_t N)
- : SCEVCommutativeExpr(ID, scUMaxExpr, O, N) {
- // Max never overflows.
- setNoWrapFlags((NoWrapFlags)(FlagNUW | FlagNSW));
- }
+ SCEVUMaxExpr(const FoldingSetNodeIDRef ID, const SCEV *const *O, size_t N)
+ : SCEVMinMaxExpr(ID, scUMaxExpr, O, N) {}
public:
/// Methods for support type inquiry through isa, cast, and dyn_cast:
@@ -394,6 +432,34 @@ class Type;
}
};
+ /// This class represents a signed minimum selection.
+ class SCEVSMinExpr : public SCEVMinMaxExpr {
+ friend class ScalarEvolution;
+
+ SCEVSMinExpr(const FoldingSetNodeIDRef ID, const SCEV *const *O, size_t N)
+ : SCEVMinMaxExpr(ID, scSMinExpr, O, N) {}
+
+ public:
+ /// Methods for support type inquiry through isa, cast, and dyn_cast:
+ static bool classof(const SCEV *S) {
+ return S->getSCEVType() == scSMinExpr;
+ }
+ };
+
+ /// This class represents an unsigned minimum selection.
+ class SCEVUMinExpr : public SCEVMinMaxExpr {
+ friend class ScalarEvolution;
+
+ SCEVUMinExpr(const FoldingSetNodeIDRef ID, const SCEV *const *O, size_t N)
+ : SCEVMinMaxExpr(ID, scUMinExpr, O, N) {}
+
+ public:
+ /// Methods for support type inquiry through isa, cast, and dyn_cast:
+ static bool classof(const SCEV *S) {
+ return S->getSCEVType() == scUMinExpr;
+ }
+ };
+
/// This means that we are dealing with an entirely unknown SCEV
/// value, and only represent it as its LLVM Value. This is the
/// "bottom" value for the analysis.
@@ -411,7 +477,7 @@ class Type;
SCEVUnknown(const FoldingSetNodeIDRef ID, Value *V,
ScalarEvolution *se, SCEVUnknown *next) :
- SCEV(ID, scUnknown), CallbackVH(V), SE(se), Next(next) {}
+ SCEV(ID, scUnknown, 1), CallbackVH(V), SE(se), Next(next) {}
// Implement CallbackVH.
void deleted() override;
@@ -466,6 +532,10 @@ class Type;
return ((SC*)this)->visitSMaxExpr((const SCEVSMaxExpr*)S);
case scUMaxExpr:
return ((SC*)this)->visitUMaxExpr((const SCEVUMaxExpr*)S);
+ case scSMinExpr:
+ return ((SC *)this)->visitSMinExpr((const SCEVSMinExpr *)S);
+ case scUMinExpr:
+ return ((SC *)this)->visitUMinExpr((const SCEVUMinExpr *)S);
case scUnknown:
return ((SC*)this)->visitUnknown((const SCEVUnknown*)S);
case scCouldNotCompute:
@@ -519,6 +589,8 @@ class Type;
case scMulExpr:
case scSMaxExpr:
case scUMaxExpr:
+ case scSMinExpr:
+ case scUMinExpr:
case scAddRecExpr:
for (const auto *Op : cast<SCEVNAryExpr>(S)->operands())
push(Op);
@@ -681,6 +753,26 @@ class Type;
return !Changed ? Expr : SE.getUMaxExpr(Operands);
}
+ const SCEV *visitSMinExpr(const SCEVSMinExpr *Expr) {
+ SmallVector<const SCEV *, 2> Operands;
+ bool Changed = false;
+ for (auto *Op : Expr->operands()) {
+ Operands.push_back(((SC *)this)->visit(Op));
+ Changed |= Op != Operands.back();
+ }
+ return !Changed ? Expr : SE.getSMinExpr(Operands);
+ }
+
+ const SCEV *visitUMinExpr(const SCEVUMinExpr *Expr) {
+ SmallVector<const SCEV *, 2> Operands;
+ bool Changed = false;
+ for (auto *Op : Expr->operands()) {
+ Operands.push_back(((SC *)this)->visit(Op));
+ Changed |= Op != Operands.back();
+ }
+ return !Changed ? Expr : SE.getUMinExpr(Operands);
+ }
+
const SCEV *visitUnknown(const SCEVUnknown *Expr) {
return Expr;
}
diff --git a/include/llvm/Analysis/ScalarEvolutionNormalization.h b/include/llvm/Analysis/ScalarEvolutionNormalization.h
index 51c92121c8f0..1a05594a46ec 100644
--- a/include/llvm/Analysis/ScalarEvolutionNormalization.h
+++ b/include/llvm/Analysis/ScalarEvolutionNormalization.h
@@ -1,9 +1,8 @@
//===- llvm/Analysis/ScalarEvolutionNormalization.h - See below -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Analysis/ScopedNoAliasAA.h b/include/llvm/Analysis/ScopedNoAliasAA.h
index 1356c6e9198a..dae733bd2015 100644
--- a/include/llvm/Analysis/ScopedNoAliasAA.h
+++ b/include/llvm/Analysis/ScopedNoAliasAA.h
@@ -1,9 +1,8 @@
//===- ScopedNoAliasAA.h - Scoped No-Alias Alias Analysis -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -40,9 +39,12 @@ public:
return false;
}
- AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB);
- ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc);
- ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2);
+ AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB,
+ AAQueryInfo &AAQI);
+ ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc,
+ AAQueryInfo &AAQI);
+ ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2,
+ AAQueryInfo &AAQI);
private:
bool mayAliasInScopes(const MDNode *Scopes, const MDNode *NoAlias) const;
diff --git a/include/llvm/Analysis/SparsePropagation.h b/include/llvm/Analysis/SparsePropagation.h
index 02a2e64268b7..fac92e4a25a4 100644
--- a/include/llvm/Analysis/SparsePropagation.h
+++ b/include/llvm/Analysis/SparsePropagation.h
@@ -1,9 +1,8 @@
//===- SparsePropagation.h - Sparse Conditional Property Propagation ------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -330,12 +329,8 @@ void SparseSolver<LatticeKey, LatticeVal, KeyInfo>::getFeasibleSuccessors(
return;
}
- if (TI.isExceptionalTerminator()) {
- Succs.assign(Succs.size(), true);
- return;
- }
-
- if (isa<IndirectBrInst>(TI)) {
+ if (TI.isExceptionalTerminator() ||
+ TI.isIndirectTerminator()) {
Succs.assign(Succs.size(), true);
return;
}
diff --git a/include/llvm/Analysis/StackSafetyAnalysis.h b/include/llvm/Analysis/StackSafetyAnalysis.h
index 8a151650a34c..f9d8b08ac142 100644
--- a/include/llvm/Analysis/StackSafetyAnalysis.h
+++ b/include/llvm/Analysis/StackSafetyAnalysis.h
@@ -1,9 +1,8 @@
//===- StackSafetyAnalysis.h - Stack memory safety analysis -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Analysis/SyncDependenceAnalysis.h b/include/llvm/Analysis/SyncDependenceAnalysis.h
index df693d9d8e8c..099403b47757 100644
--- a/include/llvm/Analysis/SyncDependenceAnalysis.h
+++ b/include/llvm/Analysis/SyncDependenceAnalysis.h
@@ -1,9 +1,8 @@
//===- SyncDependenceAnalysis.h - Divergent Branch Dependence -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Analysis/SyntheticCountsUtils.h b/include/llvm/Analysis/SyntheticCountsUtils.h
index db80bef001e2..b9b4c98bfc35 100644
--- a/include/llvm/Analysis/SyntheticCountsUtils.h
+++ b/include/llvm/Analysis/SyntheticCountsUtils.h
@@ -1,9 +1,8 @@
//===- SyntheticCountsUtils.h - utilities for count propagation--*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Analysis/TargetFolder.h b/include/llvm/Analysis/TargetFolder.h
index ae75d3773362..7ab6562be440 100644
--- a/include/llvm/Analysis/TargetFolder.h
+++ b/include/llvm/Analysis/TargetFolder.h
@@ -1,9 +1,8 @@
//====- TargetFolder.h - Constant folding helper ---------------*- C++ -*-====//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -125,6 +124,10 @@ public:
return Fold(ConstantExpr::getNot(C));
}
+ Constant *CreateUnOp(Instruction::UnaryOps Opc, Constant *C) const {
+ return Fold(ConstantExpr::get(Opc, C));
+ }
+
//===--------------------------------------------------------------------===//
// Memory Instructions
//===--------------------------------------------------------------------===//
diff --git a/include/llvm/Analysis/TargetLibraryInfo.def b/include/llvm/Analysis/TargetLibraryInfo.def
index 518a85ee1a01..afed404f04c0 100644
--- a/include/llvm/Analysis/TargetLibraryInfo.def
+++ b/include/llvm/Analysis/TargetLibraryInfo.def
@@ -1,9 +1,8 @@
//===-- TargetLibraryInfo.def - Library information -------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -12,6 +11,15 @@
// Which is defined depends on whether TLI_DEFINE_ENUM is defined or
// TLI_DEFINE_STRING is defined. Only one should be defined at a time.
+// NOTE: The nofree attribute is added to Libfuncs which are not
+// listed as free or realloc functions in MemoryBuiltins.cpp
+//
+// When adding a function which frees memory include the LibFunc
+// in lib/Analysis/MemoryBuiltins.cpp "isLibFreeFunction".
+//
+// When adding a LibFunc which reallocates memory include the LibFunc
+// in lib/Analysis/MemoryBuiltins.cpp "AllocationFnData[]".
+
#if !(defined(TLI_DEFINE_ENUM) || defined(TLI_DEFINE_STRING))
#error "Must define TLI_DEFINE_ENUM or TLI_DEFINE_STRING for TLI .def."
#elif defined(TLI_DEFINE_ENUM) && defined(TLI_DEFINE_STRING)
@@ -330,6 +338,10 @@ TLI_DEFINE_STRING_INTERNAL("__logf_finite")
/// long double __logl_finite(long double x);
TLI_DEFINE_ENUM_INTERNAL(logl_finite)
TLI_DEFINE_STRING_INTERNAL("__logl_finite")
+/// void *__memccpy_chk(void *dst, const void *src, int c, size_t n,
+/// size_t dstsize)
+TLI_DEFINE_ENUM_INTERNAL(memccpy_chk)
+TLI_DEFINE_STRING_INTERNAL("__memccpy_chk")
/// void *__memcpy_chk(void *s1, const void *s2, size_t n, size_t s1size);
TLI_DEFINE_ENUM_INTERNAL(memcpy_chk)
TLI_DEFINE_STRING_INTERNAL("__memcpy_chk")
@@ -373,6 +385,23 @@ TLI_DEFINE_STRING_INTERNAL("__sinpi")
/// float __sinpif(float x);
TLI_DEFINE_ENUM_INTERNAL(sinpif)
TLI_DEFINE_STRING_INTERNAL("__sinpif")
+/// int __small_fprintf(FILE *stream, const char *format, ...);
+TLI_DEFINE_ENUM_INTERNAL(small_fprintf)
+TLI_DEFINE_STRING_INTERNAL("__small_fprintf")
+/// int __small_printf(const char *format, ...);
+TLI_DEFINE_ENUM_INTERNAL(small_printf)
+TLI_DEFINE_STRING_INTERNAL("__small_printf")
+/// int __small_sprintf(char *str, const char *format, ...);
+TLI_DEFINE_ENUM_INTERNAL(small_sprintf)
+TLI_DEFINE_STRING_INTERNAL("__small_sprintf")
+/// int __snprintf_chk(char *s, size_t n, int flags, size_t slen,
+/// const char *format, ...);
+TLI_DEFINE_ENUM_INTERNAL(snprintf_chk)
+TLI_DEFINE_STRING_INTERNAL("__snprintf_chk")
+/// int __sprintf_chk(char *str, int flags, size_t str_len,
+/// const char *format, ...);
+TLI_DEFINE_ENUM_INTERNAL(sprintf_chk)
+TLI_DEFINE_STRING_INTERNAL("__sprintf_chk")
/// double __sqrt_finite(double x);
TLI_DEFINE_ENUM_INTERNAL(sqrt_finite)
TLI_DEFINE_STRING_INTERNAL("__sqrt_finite")
@@ -388,12 +417,26 @@ TLI_DEFINE_STRING_INTERNAL("__stpcpy_chk")
/// char *__stpncpy_chk(char *s1, const char *s2, size_t n, size_t s1size);
TLI_DEFINE_ENUM_INTERNAL(stpncpy_chk)
TLI_DEFINE_STRING_INTERNAL("__stpncpy_chk")
+/// char *__strcat_chk(char *s1, const char *s2, size_t s1size);
+TLI_DEFINE_ENUM_INTERNAL(strcat_chk)
+TLI_DEFINE_STRING_INTERNAL("__strcat_chk")
/// char *__strcpy_chk(char *s1, const char *s2, size_t s1size);
TLI_DEFINE_ENUM_INTERNAL(strcpy_chk)
TLI_DEFINE_STRING_INTERNAL("__strcpy_chk")
/// char * __strdup(const char *s);
TLI_DEFINE_ENUM_INTERNAL(dunder_strdup)
TLI_DEFINE_STRING_INTERNAL("__strdup")
+/// size_t __strlcat_chk(char *dst, const char *src, size_t size,
+/// size_t dstsize);
+TLI_DEFINE_ENUM_INTERNAL(strlcat_chk)
+TLI_DEFINE_STRING_INTERNAL("__strlcat_chk")
+/// size_t __strlcpy_chk(char *dst, const char *src, size_t size,
+/// size_t dstsize);
+TLI_DEFINE_ENUM_INTERNAL(strlcpy_chk)
+TLI_DEFINE_STRING_INTERNAL("__strlcpy_chk")
+/// char *strncat_chk(char *s1, const char *s2, size_t n, size_t s1size);
+TLI_DEFINE_ENUM_INTERNAL(strncat_chk)
+TLI_DEFINE_STRING_INTERNAL("__strncat_chk")
/// char *__strncpy_chk(char *s1, const char *s2, size_t n, size_t s1size);
TLI_DEFINE_ENUM_INTERNAL(strncpy_chk)
TLI_DEFINE_STRING_INTERNAL("__strncpy_chk")
@@ -403,6 +446,14 @@ TLI_DEFINE_STRING_INTERNAL("__strndup")
/// char * __strtok_r(char *s, const char *delim, char **save_ptr);
TLI_DEFINE_ENUM_INTERNAL(dunder_strtok_r)
TLI_DEFINE_STRING_INTERNAL("__strtok_r")
+/// int __vsnprintf_chk(char *s, size_t n, int flags, size_t slen,
+/// const char *format, va_list ap);
+TLI_DEFINE_ENUM_INTERNAL(vsnprintf_chk)
+TLI_DEFINE_STRING_INTERNAL("__vsnprintf_chk")
+/// int __vsprintf_chk(char *s, int flags, size_t slen, const char *format,
+/// va_list ap);
+TLI_DEFINE_ENUM_INTERNAL(vsprintf_chk)
+TLI_DEFINE_STRING_INTERNAL("__vsprintf_chk")
/// int abs(int j);
TLI_DEFINE_ENUM_INTERNAL(abs)
TLI_DEFINE_STRING_INTERNAL("abs")
@@ -1192,6 +1243,12 @@ TLI_DEFINE_STRING_INTERNAL("strcspn")
/// char *strdup(const char *s1);
TLI_DEFINE_ENUM_INTERNAL(strdup)
TLI_DEFINE_STRING_INTERNAL("strdup")
+/// size_t strlcat(char *dst, const char *src, size_t size);
+TLI_DEFINE_ENUM_INTERNAL(strlcat)
+TLI_DEFINE_STRING_INTERNAL("strlcat")
+/// size_t strlcpy(char *dst, const char *src, size_t size);
+TLI_DEFINE_ENUM_INTERNAL(strlcpy)
+TLI_DEFINE_STRING_INTERNAL("strlcpy")
/// size_t strlen(const char *s);
TLI_DEFINE_ENUM_INTERNAL(strlen)
TLI_DEFINE_STRING_INTERNAL("strlen")
diff --git a/include/llvm/Analysis/TargetLibraryInfo.h b/include/llvm/Analysis/TargetLibraryInfo.h
index a3fe834022f7..4b5200f5a838 100644
--- a/include/llvm/Analysis/TargetLibraryInfo.h
+++ b/include/llvm/Analysis/TargetLibraryInfo.h
@@ -1,9 +1,8 @@
//===-- TargetLibraryInfo.h - Library information ---------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -87,6 +86,7 @@ public:
enum VectorLibrary {
NoLibrary, // Don't use any vector library.
Accelerate, // Use Accelerate framework.
+ MASSV, // IBM MASS vector library.
SVML // Intel short vector math library.
};
@@ -281,9 +281,9 @@ public:
case LibFunc_trunc: case LibFunc_truncf: case LibFunc_truncl:
case LibFunc_log2: case LibFunc_log2f: case LibFunc_log2l:
case LibFunc_exp2: case LibFunc_exp2f: case LibFunc_exp2l:
- case LibFunc_memcmp: case LibFunc_strcmp: case LibFunc_strcpy:
- case LibFunc_stpcpy: case LibFunc_strlen: case LibFunc_strnlen:
- case LibFunc_memchr: case LibFunc_mempcpy:
+ case LibFunc_memcmp: case LibFunc_bcmp: case LibFunc_strcmp:
+ case LibFunc_strcpy: case LibFunc_stpcpy: case LibFunc_strlen:
+ case LibFunc_strnlen: case LibFunc_memchr: case LibFunc_mempcpy:
return true;
}
return false;
diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h
index 223175d17c2d..7574b811bc1c 100644
--- a/include/llvm/Analysis/TargetTransformInfo.h
+++ b/include/llvm/Analysis/TargetTransformInfo.h
@@ -1,9 +1,8 @@
//===- TargetTransformInfo.h ------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -28,6 +27,10 @@
#include "llvm/Pass.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/DataTypes.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/Analysis/AssumptionCache.h"
#include <functional>
namespace llvm {
@@ -36,6 +39,8 @@ namespace Intrinsic {
enum ID : unsigned;
}
+class AssumptionCache;
+class BranchInst;
class Function;
class GlobalValue;
class IntrinsicInst;
@@ -45,6 +50,7 @@ class SCEV;
class ScalarEvolution;
class StoreInst;
class SwitchInst;
+class TargetLibraryInfo;
class Type;
class User;
class Value;
@@ -73,6 +79,30 @@ struct MemIntrinsicInfo {
}
};
+/// Attributes of a target dependent hardware loop.
+struct HardwareLoopInfo {
+ HardwareLoopInfo() = delete;
+ HardwareLoopInfo(Loop *L) : L(L) {}
+ Loop *L = nullptr;
+ BasicBlock *ExitBlock = nullptr;
+ BranchInst *ExitBranch = nullptr;
+ const SCEV *ExitCount = nullptr;
+ IntegerType *CountType = nullptr;
+ Value *LoopDecrement = nullptr; // Decrement the loop counter by this
+ // value in every iteration.
+ bool IsNestingLegal = false; // Can a hardware loop be a parent to
+ // another hardware loop?
+ bool CounterInReg = false; // Should loop counter be updated in
+ // the loop via a phi?
+ bool PerformEntryTest = false; // Generate the intrinsic which also performs
+ // icmp ne zero on the loop counter value and
+ // produces an i1 to guard the loop entry.
+ bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI,
+ DominatorTree &DT, bool ForceNestedLoop = false,
+ bool ForceHardwareLoopPHI = false);
+ bool canAnalyze(LoopInfo &LI);
+};
+
/// This pass provides access to the codegen interfaces that are needed
/// for IR-level transformations.
class TargetTransformInfo {
@@ -81,7 +111,7 @@ public:
/// API below.
///
/// This is used by targets to construct a TTI wrapping their target-specific
- /// implementaion that encodes appropriate costs for their target.
+ /// implementation that encodes appropriate costs for their target.
template <typename T> TargetTransformInfo(T Impl);
/// Construct a baseline TTI object using a minimal implementation of
@@ -209,18 +239,21 @@ public:
/// This is the most basic query for estimating call cost: it only knows the
/// function type and (potentially) the number of arguments at the call site.
/// The latter is only interesting for varargs function types.
- int getCallCost(FunctionType *FTy, int NumArgs = -1) const;
+ int getCallCost(FunctionType *FTy, int NumArgs = -1,
+ const User *U = nullptr) const;
/// Estimate the cost of calling a specific function when lowered.
///
/// This overload adds the ability to reason about the particular function
/// being called in the event it is a library call with special lowering.
- int getCallCost(const Function *F, int NumArgs = -1) const;
+ int getCallCost(const Function *F, int NumArgs = -1,
+ const User *U = nullptr) const;
/// Estimate the cost of calling a specific function when lowered.
///
/// This overload allows specifying a set of candidate argument values.
- int getCallCost(const Function *F, ArrayRef<const Value *> Arguments) const;
+ int getCallCost(const Function *F, ArrayRef<const Value *> Arguments,
+ const User *U = nullptr) const;
/// \returns A value by which our inlining threshold should be multiplied.
/// This is primarily used to bump up the inlining threshold wholesale on
@@ -230,17 +263,35 @@ public:
/// individual classes of instructions would be better.
unsigned getInliningThresholdMultiplier() const;
+ /// \returns Vector bonus in percent.
+ ///
+ /// Vector bonuses: We want to more aggressively inline vector-dense kernels
+ /// and apply this bonus based on the percentage of vector instructions. A
+ /// bonus is applied if the vector instructions exceed 50% and half that amount
+ /// is applied if it exceeds 10%. Note that these bonuses are some what
+ /// arbitrary and evolved over time by accident as much as because they are
+ /// principled bonuses.
+ /// FIXME: It would be nice to base the bonus values on something more
+ /// scientific. A target may has no bonus on vector instructions.
+ int getInlinerVectorBonusPercent() const;
+
/// Estimate the cost of an intrinsic when lowered.
///
/// Mirrors the \c getCallCost method but uses an intrinsic identifier.
int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
- ArrayRef<Type *> ParamTys) const;
+ ArrayRef<Type *> ParamTys,
+ const User *U = nullptr) const;
/// Estimate the cost of an intrinsic when lowered.
///
/// Mirrors the \c getCallCost method but uses an intrinsic identifier.
int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
- ArrayRef<const Value *> Arguments) const;
+ ArrayRef<const Value *> Arguments,
+ const User *U = nullptr) const;
+
+ /// \return the expected cost of a memcpy, which could e.g. depend on the
+ /// source/destination type and alignment and the number of bytes copied.
+ int getMemcpyCost(const Instruction *I) const;
/// \return The estimated number of case clusters when lowering \p 'SI'.
/// \p JTSize Set a jump table size only when \p SI is suitable for a jump
@@ -296,7 +347,7 @@ public:
// Returns true for the target specific
// set of operations which produce uniform result
- // even taking non-unform arguments
+ // even taking non-uniform arguments
bool isAlwaysUniform(const Value *V) const;
/// Returns the address space ID for a target's 'flat' address space. Note
@@ -437,6 +488,13 @@ public:
void getUnrollingPreferences(Loop *L, ScalarEvolution &,
UnrollingPreferences &UP) const;
+ /// Query the target whether it would be profitable to convert the given loop
+ /// into a hardware loop.
+ bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
+ AssumptionCache &AC,
+ TargetLibraryInfo *LibInfo,
+ HardwareLoopInfo &HWLoopInfo) const;
+
/// @}
/// \name Scalar Target Information
@@ -483,21 +541,40 @@ public:
/// calculation for the instructions in a loop.
bool canMacroFuseCmp() const;
+ /// Return true if the target can save a compare for loop count, for example
+ /// hardware loop saves a compare.
+ bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
+ DominatorTree *DT, AssumptionCache *AC,
+ TargetLibraryInfo *LibInfo) const;
+
/// \return True is LSR should make efforts to create/preserve post-inc
/// addressing mode expressions.
bool shouldFavorPostInc() const;
- /// Return true if the target supports masked load/store
- /// AVX2 and AVX-512 targets allow masks for consecutive load and store
+ /// Return true if LSR should make efforts to generate indexed addressing
+ /// modes that operate across loop iterations.
+ bool shouldFavorBackedgeIndex(const Loop *L) const;
+
+ /// Return true if the target supports masked load.
bool isLegalMaskedStore(Type *DataType) const;
+ /// Return true if the target supports masked store.
bool isLegalMaskedLoad(Type *DataType) const;
- /// Return true if the target supports masked gather/scatter
- /// AVX-512 fully supports gather and scatter for vectors with 32 and 64
- /// bits scalar type.
+ /// Return true if the target supports nontemporal store.
+ bool isLegalNTStore(Type *DataType, unsigned Alignment) const;
+ /// Return true if the target supports nontemporal load.
+ bool isLegalNTLoad(Type *DataType, unsigned Alignment) const;
+
+ /// Return true if the target supports masked scatter.
bool isLegalMaskedScatter(Type *DataType) const;
+ /// Return true if the target supports masked gather.
bool isLegalMaskedGather(Type *DataType) const;
+ /// Return true if the target supports masked compress store.
+ bool isLegalMaskedCompressStore(Type *DataType) const;
+ /// Return true if the target supports masked expand load.
+ bool isLegalMaskedExpandLoad(Type *DataType) const;
+
/// Return true if the target has a unified operation to calculate division
/// and remainder. If so, the additional implicit multiplication and
/// subtraction required to calculate a remainder from division are free. This
@@ -576,17 +653,35 @@ public:
/// Don't restrict interleaved unrolling to small loops.
bool enableAggressiveInterleaving(bool LoopHasReductions) const;
- /// If not nullptr, enable inline expansion of memcmp. IsZeroCmp is
- /// true if this is the expansion of memcmp(p1, p2, s) == 0.
+ /// Returns options for expansion of memcmp. IsZeroCmp is
+ // true if this is the expansion of memcmp(p1, p2, s) == 0.
struct MemCmpExpansionOptions {
+ // Return true if memcmp expansion is enabled.
+ operator bool() const { return MaxNumLoads > 0; }
+
+ // Maximum number of load operations.
+ unsigned MaxNumLoads = 0;
+
// The list of available load sizes (in bytes), sorted in decreasing order.
SmallVector<unsigned, 8> LoadSizes;
+
+ // For memcmp expansion when the memcmp result is only compared equal or
+ // not-equal to 0, allow up to this number of load pairs per block. As an
+ // example, this may allow 'memcmp(a, b, 3) == 0' in a single block:
+ // a0 = load2bytes &a[0]
+ // b0 = load2bytes &b[0]
+ // a2 = load1byte &a[2]
+ // b2 = load1byte &b[2]
+ // r = cmp eq (a0 ^ b0 | a2 ^ b2), 0
+ unsigned NumLoadsPerBlock = 1;
+
// Set to true to allow overlapping loads. For example, 7-byte compares can
// be done with two 4-byte compares instead of 4+2+1-byte compares. This
// requires all loads in LoadSizes to be doable in an unaligned way.
bool AllowOverlappingLoads = false;
};
- const MemCmpExpansionOptions *enableMemCmpExpansion(bool IsZeroCmp) const;
+ MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
+ bool IsZeroCmp) const;
/// Enable matching of interleaved access groups.
bool enableInterleavedAccessVectorization() const;
@@ -700,7 +795,7 @@ public:
bool shouldMaximizeVectorBandwidth(bool OptSize) const;
/// \return The minimum vectorization factor for types of given element
- /// bit width, or 0 if there is no mimimum VF. The returned value only
+ /// bit width, or 0 if there is no minimum VF. The returned value only
/// applies when shouldMaximizeVectorBandwidth returns true.
unsigned getMinimumVF(unsigned ElemWidth) const;
@@ -1005,6 +1100,11 @@ public:
/// \returns True if the target wants to expand the given reduction intrinsic
/// into a shuffle sequence.
bool shouldExpandReduction(const IntrinsicInst *II) const;
+
+ /// \returns the size cost of rematerializing a GlobalValue address relative
+ /// to a stack reload.
+ unsigned getGISelRematGlobalCost() const;
+
/// @}
private:
@@ -1035,15 +1135,18 @@ public:
virtual int getGEPCost(Type *PointeeType, const Value *Ptr,
ArrayRef<const Value *> Operands) = 0;
virtual int getExtCost(const Instruction *I, const Value *Src) = 0;
- virtual int getCallCost(FunctionType *FTy, int NumArgs) = 0;
- virtual int getCallCost(const Function *F, int NumArgs) = 0;
+ virtual int getCallCost(FunctionType *FTy, int NumArgs, const User *U) = 0;
+ virtual int getCallCost(const Function *F, int NumArgs, const User *U) = 0;
virtual int getCallCost(const Function *F,
- ArrayRef<const Value *> Arguments) = 0;
+ ArrayRef<const Value *> Arguments, const User *U) = 0;
virtual unsigned getInliningThresholdMultiplier() = 0;
+ virtual int getInlinerVectorBonusPercent() = 0;
virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
- ArrayRef<Type *> ParamTys) = 0;
+ ArrayRef<Type *> ParamTys, const User *U) = 0;
virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
- ArrayRef<const Value *> Arguments) = 0;
+ ArrayRef<const Value *> Arguments,
+ const User *U) = 0;
+ virtual int getMemcpyCost(const Instruction *I) = 0;
virtual unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
unsigned &JTSize) = 0;
virtual int
@@ -1055,6 +1158,10 @@ public:
virtual bool isLoweredToCall(const Function *F) = 0;
virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &,
UnrollingPreferences &UP) = 0;
+ virtual bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
+ AssumptionCache &AC,
+ TargetLibraryInfo *LibInfo,
+ HardwareLoopInfo &HWLoopInfo) = 0;
virtual bool isLegalAddImmediate(int64_t Imm) = 0;
virtual bool isLegalICmpImmediate(int64_t Imm) = 0;
virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
@@ -1065,11 +1172,19 @@ public:
virtual bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
TargetTransformInfo::LSRCost &C2) = 0;
virtual bool canMacroFuseCmp() = 0;
+ virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE,
+ LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC,
+ TargetLibraryInfo *LibInfo) = 0;
virtual bool shouldFavorPostInc() const = 0;
+ virtual bool shouldFavorBackedgeIndex(const Loop *L) const = 0;
virtual bool isLegalMaskedStore(Type *DataType) = 0;
virtual bool isLegalMaskedLoad(Type *DataType) = 0;
+ virtual bool isLegalNTStore(Type *DataType, unsigned Alignment) = 0;
+ virtual bool isLegalNTLoad(Type *DataType, unsigned Alignment) = 0;
virtual bool isLegalMaskedScatter(Type *DataType) = 0;
virtual bool isLegalMaskedGather(Type *DataType) = 0;
+ virtual bool isLegalMaskedCompressStore(Type *DataType) = 0;
+ virtual bool isLegalMaskedExpandLoad(Type *DataType) = 0;
virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0;
virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0;
virtual bool prefersVectorizedAddressing() = 0;
@@ -1092,8 +1207,8 @@ public:
unsigned VF) = 0;
virtual bool supportsEfficientVectorElementLoadStore() = 0;
virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
- virtual const MemCmpExpansionOptions *enableMemCmpExpansion(
- bool IsZeroCmp) const = 0;
+ virtual MemCmpExpansionOptions
+ enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const = 0;
virtual bool enableInterleavedAccessVectorization() = 0;
virtual bool enableMaskedInterleavedAccessVectorization() = 0;
virtual bool isFPVectorizationPotentiallyUnsafe() = 0;
@@ -1210,6 +1325,7 @@ public:
virtual bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
ReductionFlags) const = 0;
virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;
+ virtual unsigned getGISelRematGlobalCost() const = 0;
virtual int getInstructionLatency(const Instruction *I) = 0;
};
@@ -1235,26 +1351,33 @@ public:
int getExtCost(const Instruction *I, const Value *Src) override {
return Impl.getExtCost(I, Src);
}
- int getCallCost(FunctionType *FTy, int NumArgs) override {
- return Impl.getCallCost(FTy, NumArgs);
+ int getCallCost(FunctionType *FTy, int NumArgs, const User *U) override {
+ return Impl.getCallCost(FTy, NumArgs, U);
}
- int getCallCost(const Function *F, int NumArgs) override {
- return Impl.getCallCost(F, NumArgs);
+ int getCallCost(const Function *F, int NumArgs, const User *U) override {
+ return Impl.getCallCost(F, NumArgs, U);
}
int getCallCost(const Function *F,
- ArrayRef<const Value *> Arguments) override {
- return Impl.getCallCost(F, Arguments);
+ ArrayRef<const Value *> Arguments, const User *U) override {
+ return Impl.getCallCost(F, Arguments, U);
}
unsigned getInliningThresholdMultiplier() override {
return Impl.getInliningThresholdMultiplier();
}
+ int getInlinerVectorBonusPercent() override {
+ return Impl.getInlinerVectorBonusPercent();
+ }
int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
- ArrayRef<Type *> ParamTys) override {
- return Impl.getIntrinsicCost(IID, RetTy, ParamTys);
+ ArrayRef<Type *> ParamTys, const User *U = nullptr) override {
+ return Impl.getIntrinsicCost(IID, RetTy, ParamTys, U);
}
int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
- ArrayRef<const Value *> Arguments) override {
- return Impl.getIntrinsicCost(IID, RetTy, Arguments);
+ ArrayRef<const Value *> Arguments,
+ const User *U = nullptr) override {
+ return Impl.getIntrinsicCost(IID, RetTy, Arguments, U);
+ }
+ int getMemcpyCost(const Instruction *I) override {
+ return Impl.getMemcpyCost(I);
}
int getUserCost(const User *U, ArrayRef<const Value *> Operands) override {
return Impl.getUserCost(U, Operands);
@@ -1279,6 +1402,12 @@ public:
UnrollingPreferences &UP) override {
return Impl.getUnrollingPreferences(L, SE, UP);
}
+ bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
+ AssumptionCache &AC,
+ TargetLibraryInfo *LibInfo,
+ HardwareLoopInfo &HWLoopInfo) override {
+ return Impl.isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
+ }
bool isLegalAddImmediate(int64_t Imm) override {
return Impl.isLegalAddImmediate(Imm);
}
@@ -1299,21 +1428,42 @@ public:
bool canMacroFuseCmp() override {
return Impl.canMacroFuseCmp();
}
+ bool canSaveCmp(Loop *L, BranchInst **BI,
+ ScalarEvolution *SE,
+ LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC,
+ TargetLibraryInfo *LibInfo) override {
+ return Impl.canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);
+ }
bool shouldFavorPostInc() const override {
return Impl.shouldFavorPostInc();
}
+ bool shouldFavorBackedgeIndex(const Loop *L) const override {
+ return Impl.shouldFavorBackedgeIndex(L);
+ }
bool isLegalMaskedStore(Type *DataType) override {
return Impl.isLegalMaskedStore(DataType);
}
bool isLegalMaskedLoad(Type *DataType) override {
return Impl.isLegalMaskedLoad(DataType);
}
+ bool isLegalNTStore(Type *DataType, unsigned Alignment) override {
+ return Impl.isLegalNTStore(DataType, Alignment);
+ }
+ bool isLegalNTLoad(Type *DataType, unsigned Alignment) override {
+ return Impl.isLegalNTLoad(DataType, Alignment);
+ }
bool isLegalMaskedScatter(Type *DataType) override {
return Impl.isLegalMaskedScatter(DataType);
}
bool isLegalMaskedGather(Type *DataType) override {
return Impl.isLegalMaskedGather(DataType);
}
+ bool isLegalMaskedCompressStore(Type *DataType) override {
+ return Impl.isLegalMaskedCompressStore(DataType);
+ }
+ bool isLegalMaskedExpandLoad(Type *DataType) override {
+ return Impl.isLegalMaskedExpandLoad(DataType);
+ }
bool hasDivRemOp(Type *DataType, bool IsSigned) override {
return Impl.hasDivRemOp(DataType, IsSigned);
}
@@ -1368,9 +1518,9 @@ public:
bool enableAggressiveInterleaving(bool LoopHasReductions) override {
return Impl.enableAggressiveInterleaving(LoopHasReductions);
}
- const MemCmpExpansionOptions *enableMemCmpExpansion(
- bool IsZeroCmp) const override {
- return Impl.enableMemCmpExpansion(IsZeroCmp);
+ MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
+ bool IsZeroCmp) const override {
+ return Impl.enableMemCmpExpansion(OptSize, IsZeroCmp);
}
bool enableInterleavedAccessVectorization() override {
return Impl.enableInterleavedAccessVectorization();
@@ -1617,6 +1767,11 @@ public:
bool shouldExpandReduction(const IntrinsicInst *II) const override {
return Impl.shouldExpandReduction(II);
}
+
+ unsigned getGISelRematGlobalCost() const override {
+ return Impl.getGISelRematGlobalCost();
+ }
+
int getInstructionLatency(const Instruction *I) override {
return Impl.getInstructionLatency(I);
}
diff --git a/include/llvm/Analysis/TargetTransformInfoImpl.h b/include/llvm/Analysis/TargetTransformInfoImpl.h
index c9a234deeb7d..b99e1eb9adf0 100644
--- a/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -1,9 +1,8 @@
//===- TargetTransformInfoImpl.h --------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -124,7 +123,7 @@ public:
return TTI::TCC_Basic;
}
- unsigned getCallCost(FunctionType *FTy, int NumArgs) {
+ unsigned getCallCost(FunctionType *FTy, int NumArgs, const User *U) {
assert(FTy && "FunctionType must be provided to this routine.");
// The target-independent implementation just measures the size of the
@@ -141,45 +140,10 @@ public:
unsigned getInliningThresholdMultiplier() { return 1; }
- unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
- ArrayRef<Type *> ParamTys) {
- switch (IID) {
- default:
- // Intrinsics rarely (if ever) have normal argument setup constraints.
- // Model them as having a basic instruction cost.
- // FIXME: This is wrong for libc intrinsics.
- return TTI::TCC_Basic;
+ int getInlinerVectorBonusPercent() { return 150; }
- case Intrinsic::annotation:
- case Intrinsic::assume:
- case Intrinsic::sideeffect:
- case Intrinsic::dbg_declare:
- case Intrinsic::dbg_value:
- case Intrinsic::dbg_label:
- case Intrinsic::invariant_start:
- case Intrinsic::invariant_end:
- case Intrinsic::launder_invariant_group:
- case Intrinsic::strip_invariant_group:
- case Intrinsic::is_constant:
- case Intrinsic::lifetime_start:
- case Intrinsic::lifetime_end:
- case Intrinsic::objectsize:
- case Intrinsic::ptr_annotation:
- case Intrinsic::var_annotation:
- case Intrinsic::experimental_gc_result:
- case Intrinsic::experimental_gc_relocate:
- case Intrinsic::coro_alloc:
- case Intrinsic::coro_begin:
- case Intrinsic::coro_free:
- case Intrinsic::coro_end:
- case Intrinsic::coro_frame:
- case Intrinsic::coro_size:
- case Intrinsic::coro_suspend:
- case Intrinsic::coro_param:
- case Intrinsic::coro_subfn_addr:
- // These intrinsics don't actually represent code after lowering.
- return TTI::TCC_Free;
- }
+ unsigned getMemcpyCost(const Instruction *I) {
+ return TTI::TCC_Expensive;
}
bool hasBranchDivergence() { return false; }
@@ -228,6 +192,13 @@ public:
return true;
}
+ bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
+ AssumptionCache &AC,
+ TargetLibraryInfo *LibInfo,
+ HardwareLoopInfo &HWLoopInfo) {
+ return false;
+ }
+
void getUnrollingPreferences(Loop *, ScalarEvolution &,
TTI::UnrollingPreferences &) {}
@@ -252,16 +223,42 @@ public:
bool canMacroFuseCmp() { return false; }
+ bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
+ DominatorTree *DT, AssumptionCache *AC,
+ TargetLibraryInfo *LibInfo) {
+ return false;
+ }
+
bool shouldFavorPostInc() const { return false; }
+ bool shouldFavorBackedgeIndex(const Loop *L) const { return false; }
+
bool isLegalMaskedStore(Type *DataType) { return false; }
bool isLegalMaskedLoad(Type *DataType) { return false; }
+ bool isLegalNTStore(Type *DataType, unsigned Alignment) {
+ // By default, assume nontemporal memory stores are available for stores
+ // that are aligned and have a size that is a power of 2.
+ unsigned DataSize = DL.getTypeStoreSize(DataType);
+ return Alignment >= DataSize && isPowerOf2_32(DataSize);
+ }
+
+ bool isLegalNTLoad(Type *DataType, unsigned Alignment) {
+ // By default, assume nontemporal memory loads are available for loads that
+ // are aligned and have a size that is a power of 2.
+ unsigned DataSize = DL.getTypeStoreSize(DataType);
+ return Alignment >= DataSize && isPowerOf2_32(DataSize);
+ }
+
bool isLegalMaskedScatter(Type *DataType) { return false; }
bool isLegalMaskedGather(Type *DataType) { return false; }
+ bool isLegalMaskedCompressStore(Type *DataType) { return false; }
+
+ bool isLegalMaskedExpandLoad(Type *DataType) { return false; }
+
bool hasDivRemOp(Type *DataType, bool IsSigned) { return false; }
bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) { return false; }
@@ -307,9 +304,9 @@ public:
bool enableAggressiveInterleaving(bool LoopHasReductions) { return false; }
- const TTI::MemCmpExpansionOptions *enableMemCmpExpansion(
- bool IsZeroCmp) const {
- return nullptr;
+ TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
+ bool IsZeroCmp) const {
+ return {};
}
bool enableInterleavedAccessVectorization() { return false; }
@@ -583,6 +580,10 @@ public:
return true;
}
+ unsigned getGISelRematGlobalCost() const {
+ return 1;
+ }
+
protected:
// Obtain the minimum required size to hold the value (without the sign)
// In case of a vector it returns the min required size for one element.
@@ -679,7 +680,7 @@ protected:
public:
using BaseT::getCallCost;
- unsigned getCallCost(const Function *F, int NumArgs) {
+ unsigned getCallCost(const Function *F, int NumArgs, const User *U) {
assert(F && "A concrete function must be provided to this routine.");
if (NumArgs < 0)
@@ -691,35 +692,34 @@ public:
FunctionType *FTy = F->getFunctionType();
SmallVector<Type *, 8> ParamTys(FTy->param_begin(), FTy->param_end());
return static_cast<T *>(this)
- ->getIntrinsicCost(IID, FTy->getReturnType(), ParamTys);
+ ->getIntrinsicCost(IID, FTy->getReturnType(), ParamTys, U);
}
if (!static_cast<T *>(this)->isLoweredToCall(F))
return TTI::TCC_Basic; // Give a basic cost if it will be lowered
// directly.
- return static_cast<T *>(this)->getCallCost(F->getFunctionType(), NumArgs);
+ return static_cast<T *>(this)->getCallCost(F->getFunctionType(), NumArgs, U);
}
- unsigned getCallCost(const Function *F, ArrayRef<const Value *> Arguments) {
+ unsigned getCallCost(const Function *F, ArrayRef<const Value *> Arguments,
+ const User *U) {
// Simply delegate to generic handling of the call.
// FIXME: We should use instsimplify or something else to catch calls which
// will constant fold with these arguments.
- return static_cast<T *>(this)->getCallCost(F, Arguments.size());
+ return static_cast<T *>(this)->getCallCost(F, Arguments.size(), U);
}
using BaseT::getGEPCost;
int getGEPCost(Type *PointeeType, const Value *Ptr,
ArrayRef<const Value *> Operands) {
- const GlobalValue *BaseGV = nullptr;
- if (Ptr != nullptr) {
- // TODO: will remove this when pointers have an opaque type.
- assert(Ptr->getType()->getScalarType()->getPointerElementType() ==
- PointeeType &&
- "explicit pointee type doesn't match operand's pointee type");
- BaseGV = dyn_cast<GlobalValue>(Ptr->stripPointerCasts());
- }
+ assert(PointeeType && Ptr && "can't get GEPCost of nullptr");
+ // TODO: will remove this when pointers have an opaque type.
+ assert(Ptr->getType()->getScalarType()->getPointerElementType() ==
+ PointeeType &&
+ "explicit pointee type doesn't match operand's pointee type");
+ auto *BaseGV = dyn_cast<GlobalValue>(Ptr->stripPointerCasts());
bool HasBaseReg = (BaseGV == nullptr);
auto PtrSizeBits = DL.getPointerTypeSizeInBits(Ptr->getType());
@@ -762,21 +762,60 @@ public:
}
}
- // Assumes the address space is 0 when Ptr is nullptr.
- unsigned AS =
- (Ptr == nullptr ? 0 : Ptr->getType()->getPointerAddressSpace());
-
if (static_cast<T *>(this)->isLegalAddressingMode(
TargetType, const_cast<GlobalValue *>(BaseGV),
- BaseOffset.sextOrTrunc(64).getSExtValue(), HasBaseReg, Scale, AS))
+ BaseOffset.sextOrTrunc(64).getSExtValue(), HasBaseReg, Scale,
+ Ptr->getType()->getPointerAddressSpace()))
return TTI::TCC_Free;
return TTI::TCC_Basic;
}
- using BaseT::getIntrinsicCost;
+ unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
+ ArrayRef<Type *> ParamTys, const User *U) {
+ switch (IID) {
+ default:
+ // Intrinsics rarely (if ever) have normal argument setup constraints.
+ // Model them as having a basic instruction cost.
+ return TTI::TCC_Basic;
+
+ // TODO: other libc intrinsics.
+ case Intrinsic::memcpy:
+ return static_cast<T *>(this)->getMemcpyCost(dyn_cast<Instruction>(U));
+
+ case Intrinsic::annotation:
+ case Intrinsic::assume:
+ case Intrinsic::sideeffect:
+ case Intrinsic::dbg_declare:
+ case Intrinsic::dbg_value:
+ case Intrinsic::dbg_label:
+ case Intrinsic::invariant_start:
+ case Intrinsic::invariant_end:
+ case Intrinsic::launder_invariant_group:
+ case Intrinsic::strip_invariant_group:
+ case Intrinsic::is_constant:
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
+ case Intrinsic::objectsize:
+ case Intrinsic::ptr_annotation:
+ case Intrinsic::var_annotation:
+ case Intrinsic::experimental_gc_result:
+ case Intrinsic::experimental_gc_relocate:
+ case Intrinsic::coro_alloc:
+ case Intrinsic::coro_begin:
+ case Intrinsic::coro_free:
+ case Intrinsic::coro_end:
+ case Intrinsic::coro_frame:
+ case Intrinsic::coro_size:
+ case Intrinsic::coro_suspend:
+ case Intrinsic::coro_param:
+ case Intrinsic::coro_subfn_addr:
+ // These intrinsics don't actually represent code after lowering.
+ return TTI::TCC_Free;
+ }
+ }
unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
- ArrayRef<const Value *> Arguments) {
+ ArrayRef<const Value *> Arguments, const User *U) {
// Delegate to the generic intrinsic handling code. This mostly provides an
// opportunity for targets to (for example) special case the cost of
// certain intrinsics based on constants used as arguments.
@@ -784,7 +823,7 @@ public:
ParamTys.reserve(Arguments.size());
for (unsigned Idx = 0, Size = Arguments.size(); Idx != Size; ++Idx)
ParamTys.push_back(Arguments[Idx]->getType());
- return static_cast<T *>(this)->getIntrinsicCost(IID, RetTy, ParamTys);
+ return static_cast<T *>(this)->getIntrinsicCost(IID, RetTy, ParamTys, U);
}
unsigned getUserCost(const User *U, ArrayRef<const Value *> Operands) {
@@ -808,22 +847,18 @@ public:
// Just use the called value type.
Type *FTy = CS.getCalledValue()->getType()->getPointerElementType();
return static_cast<T *>(this)
- ->getCallCost(cast<FunctionType>(FTy), CS.arg_size());
+ ->getCallCost(cast<FunctionType>(FTy), CS.arg_size(), U);
}
SmallVector<const Value *, 8> Arguments(CS.arg_begin(), CS.arg_end());
- return static_cast<T *>(this)->getCallCost(F, Arguments);
+ return static_cast<T *>(this)->getCallCost(F, Arguments, U);
}
- if (const CastInst *CI = dyn_cast<CastInst>(U)) {
- // Result of a cmp instruction is often extended (to be used by other
- // cmp instructions, logical or return instructions). These are usually
- // nop on most sane targets.
- if (isa<CmpInst>(CI->getOperand(0)))
- return TTI::TCC_Free;
- if (isa<SExtInst>(CI) || isa<ZExtInst>(CI) || isa<FPExtInst>(CI))
- return static_cast<T *>(this)->getExtCost(CI, Operands.back());
- }
+ if (isa<SExtInst>(U) || isa<ZExtInst>(U) || isa<FPExtInst>(U))
+ // The old behaviour of generally treating extensions of icmp to be free
+ // has been removed. A target that needs it should override getUserCost().
+ return static_cast<T *>(this)->getExtCost(cast<Instruction>(U),
+ Operands.back());
return static_cast<T *>(this)->getOperationCost(
Operator::getOpcode(U), U->getType(),
diff --git a/include/llvm/Analysis/Trace.h b/include/llvm/Analysis/Trace.h
index b05d384ab1a3..a1ffd03c4053 100644
--- a/include/llvm/Analysis/Trace.h
+++ b/include/llvm/Analysis/Trace.h
@@ -1,9 +1,8 @@
//===- llvm/Analysis/Trace.h - Represent one trace of LLVM code -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Analysis/TypeBasedAliasAnalysis.h b/include/llvm/Analysis/TypeBasedAliasAnalysis.h
index d2e6df22425e..344f26806618 100644
--- a/include/llvm/Analysis/TypeBasedAliasAnalysis.h
+++ b/include/llvm/Analysis/TypeBasedAliasAnalysis.h
@@ -1,9 +1,8 @@
//===- TypeBasedAliasAnalysis.h - Type-Based Alias Analysis -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -41,12 +40,16 @@ public:
return false;
}
- AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB);
- bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal);
+ AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB,
+ AAQueryInfo &AAQI);
+ bool pointsToConstantMemory(const MemoryLocation &Loc, AAQueryInfo &AAQI,
+ bool OrLocal);
FunctionModRefBehavior getModRefBehavior(const CallBase *Call);
FunctionModRefBehavior getModRefBehavior(const Function *F);
- ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc);
- ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2);
+ ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc,
+ AAQueryInfo &AAQI);
+ ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2,
+ AAQueryInfo &AAQI);
private:
bool Aliases(const MDNode *A, const MDNode *B) const;
diff --git a/include/llvm/Analysis/TypeMetadataUtils.h b/include/llvm/Analysis/TypeMetadataUtils.h
index 3bf9c5d20741..82cf8efeea54 100644
--- a/include/llvm/Analysis/TypeMetadataUtils.h
+++ b/include/llvm/Analysis/TypeMetadataUtils.h
@@ -1,9 +1,8 @@
//===- TypeMetadataUtils.h - Utilities related to type metadata --*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Analysis/Utils/Local.h b/include/llvm/Analysis/Utils/Local.h
index b4141bbff28d..acbdf5dca32c 100644
--- a/include/llvm/Analysis/Utils/Local.h
+++ b/include/llvm/Analysis/Utils/Local.h
@@ -1,9 +1,8 @@
//===- Local.h - Functions to perform local transformations -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Analysis/ValueLattice.h b/include/llvm/Analysis/ValueLattice.h
index 0744ca617e48..56519d7d0857 100644
--- a/include/llvm/Analysis/ValueLattice.h
+++ b/include/llvm/Analysis/ValueLattice.h
@@ -1,9 +1,8 @@
//===- ValueLattice.h - Value constraint analysis ---------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/Analysis/ValueLatticeUtils.h b/include/llvm/Analysis/ValueLatticeUtils.h
index 02072672e56e..a3bbb96129bf 100644
--- a/include/llvm/Analysis/ValueLatticeUtils.h
+++ b/include/llvm/Analysis/ValueLatticeUtils.h
@@ -1,9 +1,8 @@
//===-- ValueLatticeUtils.h - Utils for solving lattices --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Analysis/ValueTracking.h b/include/llvm/Analysis/ValueTracking.h
index f46fdfcb608e..fa7e0e0eef7e 100644
--- a/include/llvm/Analysis/ValueTracking.h
+++ b/include/llvm/Analysis/ValueTracking.h
@@ -1,9 +1,8 @@
//===- llvm/Analysis/ValueTracking.h - Walk computations --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -17,8 +16,10 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Intrinsics.h"
#include <cassert>
@@ -29,10 +30,10 @@ namespace llvm {
class AddOperator;
class APInt;
class AssumptionCache;
-class DataLayout;
class DominatorTree;
class GEPOperator;
class IntrinsicInst;
+class WithOverflowInst;
struct KnownBits;
class Loop;
class LoopInfo;
@@ -223,7 +224,7 @@ class Value;
/// 0.0 etc. If the value can't be handled with a repeated byte store (e.g.
/// i16 0x1234), return null. If the value is entirely undef and padding,
/// return undef.
- Value *isBytewiseValue(Value *V);
+ Value *isBytewiseValue(Value *V, const DataLayout &DL);
/// Given an aggregrate and an sequence of indices, see if the scalar value
/// indexed is already around as a register, for example if it were inserted
@@ -237,8 +238,18 @@ class Value;
/// Analyze the specified pointer to see if it can be expressed as a base
/// pointer plus a constant offset. Return the base and offset to the caller.
- Value *GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
- const DataLayout &DL);
+ ///
+ /// This is a wrapper around Value::stripAndAccumulateConstantOffsets that
+ /// creates and later unpacks the required APInt.
+ inline Value *GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
+ const DataLayout &DL) {
+ APInt OffsetAPInt(DL.getIndexTypeSizeInBits(Ptr->getType()), 0);
+ Value *Base =
+ Ptr->stripAndAccumulateConstantOffsets(DL, OffsetAPInt,
+ /* AllowNonInbounds */ true);
+ Offset = OffsetAPInt.getSExtValue();
+ return Base;
+ }
inline const Value *GetPointerBaseWithConstantOffset(const Value *Ptr,
int64_t &Offset,
const DataLayout &DL) {
@@ -351,7 +362,8 @@ class Value;
/// Since A[i] and A[i-1] are independent pointers, getUnderlyingObjects
/// should not assume that Curr and Prev share the same underlying object thus
/// it shouldn't look through the phi above.
- void GetUnderlyingObjects(Value *V, SmallVectorImpl<Value *> &Objects,
+ void GetUnderlyingObjects(const Value *V,
+ SmallVectorImpl<const Value *> &Objects,
const DataLayout &DL, LoopInfo *LI = nullptr,
unsigned MaxLookup = 6);
@@ -411,7 +423,16 @@ class Value;
bool isValidAssumeForContext(const Instruction *I, const Instruction *CxtI,
const DominatorTree *DT = nullptr);
- enum class OverflowResult { AlwaysOverflows, MayOverflow, NeverOverflows };
+ enum class OverflowResult {
+ /// Always overflows in the direction of signed/unsigned min value.
+ AlwaysOverflowsLow,
+ /// Always overflows in the direction of signed/unsigned max value.
+ AlwaysOverflowsHigh,
+ /// May or may not overflow.
+ MayOverflow,
+ /// Never overflows.
+ NeverOverflows,
+ };
OverflowResult computeOverflowForUnsignedMul(const Value *LHS,
const Value *RHS,
@@ -455,12 +476,17 @@ class Value;
const Instruction *CxtI,
const DominatorTree *DT);
- /// Returns true if the arithmetic part of the \p II 's result is
+ /// Returns true if the arithmetic part of the \p WO 's result is
/// used only along the paths control dependent on the computation
- /// not overflowing, \p II being an <op>.with.overflow intrinsic.
- bool isOverflowIntrinsicNoWrap(const IntrinsicInst *II,
+ /// not overflowing, \p WO being an <op>.with.overflow intrinsic.
+ bool isOverflowIntrinsicNoWrap(const WithOverflowInst *WO,
const DominatorTree &DT);
+
+ /// Determine the possible constant range of an integer or vector of integer
+ /// value. This is intended as a cheap, non-recursive check.
+ ConstantRange computeConstantRange(const Value *V, bool UseInstrInfo = true);
+
/// Return true if this function can prove that the instruction I will
/// always transfer execution to one of its successors (including the next
/// instruction that follows within a basic block). E.g. this is not
@@ -506,6 +532,12 @@ class Value;
/// value (all bits poison).
const Value *getGuaranteedNonFullPoisonOp(const Instruction *I);
+ /// Return true if the given instruction must trigger undefined behavior.
+ /// when I is executed with any operands which appear in KnownPoison holding
+ /// a full-poison value at the point of execution.
+ bool mustTriggerUB(const Instruction *I,
+ const SmallSet<const Value *, 16>& KnownPoison);
+
/// Return true if this function can prove that if PoisonI is executed
/// and yields a full-poison value (all bits poison), then that will
/// trigger undefined behavior.
@@ -584,6 +616,12 @@ class Value;
return Result;
}
+ /// Determine the pattern that a select with the given compare as its
+ /// predicate and given values as its true/false operands would match.
+ SelectPatternResult matchDecomposedSelectPattern(
+ CmpInst *CmpI, Value *TrueVal, Value *FalseVal, Value *&LHS, Value *&RHS,
+ Instruction::CastOps *CastOp = nullptr, unsigned Depth = 0);
+
/// Return the canonical comparison predicate for the specified
/// minimum/maximum flavor.
CmpInst::Predicate getMinMaxPred(SelectPatternFlavor SPF,
diff --git a/include/llvm/Analysis/VecFuncs.def b/include/llvm/Analysis/VecFuncs.def
new file mode 100644
index 000000000000..4c9206266d9a
--- /dev/null
+++ b/include/llvm/Analysis/VecFuncs.def
@@ -0,0 +1,250 @@
+//===-- VecFuncs.def - Library information -------------*- C++ -*-----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// This .def file will create mappings from scalar math functions to vector
+// functions along with their vectorization factor. The current support includes
+// such mappings for Accelerate framework, MASS vector library, and SVML library.
+
+#if !(defined(TLI_DEFINE_VECFUNC))
+#define TLI_DEFINE_VECFUNC(SCAL, VEC, VF) {SCAL, VEC, VF},
+#endif
+
+#if defined(TLI_DEFINE_ACCELERATE_VECFUNCS)
+// Accelerate framework's Vector Functions
+
+// Floating-Point Arithmetic and Auxiliary Functions
+TLI_DEFINE_VECFUNC("ceilf", "vceilf", 4)
+TLI_DEFINE_VECFUNC("fabsf", "vfabsf", 4)
+TLI_DEFINE_VECFUNC("llvm.fabs.f32", "vfabsf", 4)
+TLI_DEFINE_VECFUNC("floorf", "vfloorf", 4)
+TLI_DEFINE_VECFUNC("sqrtf", "vsqrtf", 4)
+TLI_DEFINE_VECFUNC("llvm.sqrt.f32", "vsqrtf", 4)
+
+// Exponential and Logarithmic Functions
+TLI_DEFINE_VECFUNC("expf", "vexpf", 4)
+TLI_DEFINE_VECFUNC("llvm.exp.f32", "vexpf", 4)
+TLI_DEFINE_VECFUNC("expm1f", "vexpm1f", 4)
+TLI_DEFINE_VECFUNC("logf", "vlogf", 4)
+TLI_DEFINE_VECFUNC("llvm.log.f32", "vlogf", 4)
+TLI_DEFINE_VECFUNC("log1pf", "vlog1pf", 4)
+TLI_DEFINE_VECFUNC("log10f", "vlog10f", 4)
+TLI_DEFINE_VECFUNC("llvm.log10.f32", "vlog10f", 4)
+TLI_DEFINE_VECFUNC("logbf", "vlogbf", 4)
+
+// Trigonometric Functions
+TLI_DEFINE_VECFUNC("sinf", "vsinf", 4)
+TLI_DEFINE_VECFUNC("llvm.sin.f32", "vsinf", 4)
+TLI_DEFINE_VECFUNC("cosf", "vcosf", 4)
+TLI_DEFINE_VECFUNC("llvm.cos.f32", "vcosf", 4)
+TLI_DEFINE_VECFUNC("tanf", "vtanf", 4)
+TLI_DEFINE_VECFUNC("asinf", "vasinf", 4)
+TLI_DEFINE_VECFUNC("acosf", "vacosf", 4)
+TLI_DEFINE_VECFUNC("atanf", "vatanf", 4)
+
+// Hyperbolic Functions
+TLI_DEFINE_VECFUNC("sinhf", "vsinhf", 4)
+TLI_DEFINE_VECFUNC("coshf", "vcoshf", 4)
+TLI_DEFINE_VECFUNC("tanhf", "vtanhf", 4)
+TLI_DEFINE_VECFUNC("asinhf", "vasinhf", 4)
+TLI_DEFINE_VECFUNC("acoshf", "vacoshf", 4)
+TLI_DEFINE_VECFUNC("atanhf", "vatanhf", 4)
+
+
+#elif defined(TLI_DEFINE_MASSV_VECFUNCS)
+// IBM MASS library's vector Functions
+
+// Floating-Point Arithmetic and Auxiliary Functions
+TLI_DEFINE_VECFUNC("cbrt", "__cbrtd2_massv", 2)
+TLI_DEFINE_VECFUNC("cbrtf", "__cbrtf4_massv", 4)
+TLI_DEFINE_VECFUNC("pow", "__powd2_massv", 2)
+TLI_DEFINE_VECFUNC("llvm.pow.f64", "__powd2_massv", 2)
+TLI_DEFINE_VECFUNC("powf", "__powf4_massv", 4)
+TLI_DEFINE_VECFUNC("llvm.pow.f32", "__powf4_massv", 4)
+TLI_DEFINE_VECFUNC("sqrt", "__sqrtd2_massv", 2)
+TLI_DEFINE_VECFUNC("llvm.sqrt.f64", "__sqrtd2_massv", 2)
+TLI_DEFINE_VECFUNC("sqrtf", "__sqrtf4_massv", 4)
+TLI_DEFINE_VECFUNC("llvm.sqrt.f32", "__sqrtf4_massv", 4)
+
+// Exponential and Logarithmic Functions
+TLI_DEFINE_VECFUNC("exp", "__expd2_massv", 2)
+TLI_DEFINE_VECFUNC("llvm.exp.f64", "__expd2_massv", 2)
+TLI_DEFINE_VECFUNC("expf", "__expf4_massv", 4)
+TLI_DEFINE_VECFUNC("llvm.exp.f32", "__expf4_massv", 4)
+TLI_DEFINE_VECFUNC("exp2", "__exp2d2_massv", 2)
+TLI_DEFINE_VECFUNC("llvm.exp2.f64", "__exp2d2_massv", 2)
+TLI_DEFINE_VECFUNC("exp2f", "__exp2f4_massv", 4)
+TLI_DEFINE_VECFUNC("llvm.exp2.f32", "__exp2f4_massv", 4)
+TLI_DEFINE_VECFUNC("expm1", "__expm1d2_massv", 2)
+TLI_DEFINE_VECFUNC("expm1f", "__expm1f4_massv", 4)
+TLI_DEFINE_VECFUNC("log", "__logd2_massv", 2)
+TLI_DEFINE_VECFUNC("llvm.log.f64", "__logd2_massv", 2)
+TLI_DEFINE_VECFUNC("logf", "__logf4_massv", 4)
+TLI_DEFINE_VECFUNC("llvm.log.f32", "__logf4_massv", 4)
+TLI_DEFINE_VECFUNC("log1p", "__log1pd2_massv", 2)
+TLI_DEFINE_VECFUNC("log1pf", "__log1pf4_massv", 4)
+TLI_DEFINE_VECFUNC("log10", "__log10d2_massv", 2)
+TLI_DEFINE_VECFUNC("llvm.log10.f64", "__log10d2_massv", 2)
+TLI_DEFINE_VECFUNC("log10f", "__log10f4_massv", 4)
+TLI_DEFINE_VECFUNC("llvm.log10.f32", "__log10f4_massv", 4)
+TLI_DEFINE_VECFUNC("log2", "__log2d2_massv", 2)
+TLI_DEFINE_VECFUNC("llvm.log2.f64", "__log2d2_massv", 2)
+TLI_DEFINE_VECFUNC("log2f", "__log2f4_massv", 4)
+TLI_DEFINE_VECFUNC("llvm.log2.f32", "__log2f4_massv", 4)
+
+// Trigonometric Functions
+TLI_DEFINE_VECFUNC("sin", "__sind2_massv", 2)
+TLI_DEFINE_VECFUNC("llvm.sin.f64", "__sind2_massv", 2)
+TLI_DEFINE_VECFUNC("sinf", "__sinf4_massv", 4)
+TLI_DEFINE_VECFUNC("llvm.sin.f32", "__sinf4_massv", 4)
+TLI_DEFINE_VECFUNC("cos", "__cosd2_massv", 2)
+TLI_DEFINE_VECFUNC("llvm.cos.f64", "__cosd2_massv", 2)
+TLI_DEFINE_VECFUNC("cosf", "__cosf4_massv", 4)
+TLI_DEFINE_VECFUNC("llvm.cos.f32", "__cosf4_massv", 4)
+TLI_DEFINE_VECFUNC("tan", "__tand2_massv", 2)
+TLI_DEFINE_VECFUNC("tanf", "__tanf4_massv", 4)
+TLI_DEFINE_VECFUNC("asin", "__asind2_massv", 2)
+TLI_DEFINE_VECFUNC("asinf", "__asinf4_massv", 4)
+TLI_DEFINE_VECFUNC("acos", "__acosd2_massv", 2)
+TLI_DEFINE_VECFUNC("acosf", "__acosf4_massv", 4)
+TLI_DEFINE_VECFUNC("atan", "__atand2_massv", 2)
+TLI_DEFINE_VECFUNC("atanf", "__atanf4_massv", 4)
+TLI_DEFINE_VECFUNC("atan2", "__atan2d2_massv", 2)
+TLI_DEFINE_VECFUNC("atan2f", "__atan2f4_massv", 4)
+
+// Hyperbolic Functions
+TLI_DEFINE_VECFUNC("sinh", "__sinhd2_massv", 2)
+TLI_DEFINE_VECFUNC("sinhf", "__sinhf4_massv", 4)
+TLI_DEFINE_VECFUNC("cosh", "__coshd2_massv", 2)
+TLI_DEFINE_VECFUNC("coshf", "__coshf4_massv", 4)
+TLI_DEFINE_VECFUNC("tanh", "__tanhd2_massv", 2)
+TLI_DEFINE_VECFUNC("tanhf", "__tanhf4_massv", 4)
+TLI_DEFINE_VECFUNC("asinh", "__asinhd2_massv", 2)
+TLI_DEFINE_VECFUNC("asinhf", "__asinhf4_massv", 4)
+TLI_DEFINE_VECFUNC("acosh", "__acoshd2_massv", 2)
+TLI_DEFINE_VECFUNC("acoshf", "__acoshf4_massv", 4)
+TLI_DEFINE_VECFUNC("atanh", "__atanhd2_massv", 2)
+TLI_DEFINE_VECFUNC("atanhf", "__atanhf4_massv", 4)
+
+
+#elif defined(TLI_DEFINE_SVML_VECFUNCS)
+// Intel SVM library's Vector Functions
+
+TLI_DEFINE_VECFUNC("sin", "__svml_sin2", 2)
+TLI_DEFINE_VECFUNC("sin", "__svml_sin4", 4)
+TLI_DEFINE_VECFUNC("sin", "__svml_sin8", 8)
+
+TLI_DEFINE_VECFUNC("sinf", "__svml_sinf4", 4)
+TLI_DEFINE_VECFUNC("sinf", "__svml_sinf8", 8)
+TLI_DEFINE_VECFUNC("sinf", "__svml_sinf16", 16)
+
+TLI_DEFINE_VECFUNC("llvm.sin.f64", "__svml_sin2", 2)
+TLI_DEFINE_VECFUNC("llvm.sin.f64", "__svml_sin4", 4)
+TLI_DEFINE_VECFUNC("llvm.sin.f64", "__svml_sin8", 8)
+
+TLI_DEFINE_VECFUNC("llvm.sin.f32", "__svml_sinf4", 4)
+TLI_DEFINE_VECFUNC("llvm.sin.f32", "__svml_sinf8", 8)
+TLI_DEFINE_VECFUNC("llvm.sin.f32", "__svml_sinf16", 16)
+
+TLI_DEFINE_VECFUNC("cos", "__svml_cos2", 2)
+TLI_DEFINE_VECFUNC("cos", "__svml_cos4", 4)
+TLI_DEFINE_VECFUNC("cos", "__svml_cos8", 8)
+
+TLI_DEFINE_VECFUNC("cosf", "__svml_cosf4", 4)
+TLI_DEFINE_VECFUNC("cosf", "__svml_cosf8", 8)
+TLI_DEFINE_VECFUNC("cosf", "__svml_cosf16", 16)
+
+TLI_DEFINE_VECFUNC("llvm.cos.f64", "__svml_cos2", 2)
+TLI_DEFINE_VECFUNC("llvm.cos.f64", "__svml_cos4", 4)
+TLI_DEFINE_VECFUNC("llvm.cos.f64", "__svml_cos8", 8)
+
+TLI_DEFINE_VECFUNC("llvm.cos.f32", "__svml_cosf4", 4)
+TLI_DEFINE_VECFUNC("llvm.cos.f32", "__svml_cosf8", 8)
+TLI_DEFINE_VECFUNC("llvm.cos.f32", "__svml_cosf16", 16)
+
+TLI_DEFINE_VECFUNC("pow", "__svml_pow2", 2)
+TLI_DEFINE_VECFUNC("pow", "__svml_pow4", 4)
+TLI_DEFINE_VECFUNC("pow", "__svml_pow8", 8)
+
+TLI_DEFINE_VECFUNC("powf", "__svml_powf4", 4)
+TLI_DEFINE_VECFUNC("powf", "__svml_powf8", 8)
+TLI_DEFINE_VECFUNC("powf", "__svml_powf16", 16)
+
+TLI_DEFINE_VECFUNC("__pow_finite", "__svml_pow2", 2)
+TLI_DEFINE_VECFUNC("__pow_finite", "__svml_pow4", 4)
+TLI_DEFINE_VECFUNC("__pow_finite", "__svml_pow8", 8)
+
+TLI_DEFINE_VECFUNC("__powf_finite", "__svml_powf4", 4)
+TLI_DEFINE_VECFUNC("__powf_finite", "__svml_powf8", 8)
+TLI_DEFINE_VECFUNC("__powf_finite", "__svml_powf16", 16)
+
+TLI_DEFINE_VECFUNC("llvm.pow.f64", "__svml_pow2", 2)
+TLI_DEFINE_VECFUNC("llvm.pow.f64", "__svml_pow4", 4)
+TLI_DEFINE_VECFUNC("llvm.pow.f64", "__svml_pow8", 8)
+
+TLI_DEFINE_VECFUNC("llvm.pow.f32", "__svml_powf4", 4)
+TLI_DEFINE_VECFUNC("llvm.pow.f32", "__svml_powf8", 8)
+TLI_DEFINE_VECFUNC("llvm.pow.f32", "__svml_powf16", 16)
+
+TLI_DEFINE_VECFUNC("exp", "__svml_exp2", 2)
+TLI_DEFINE_VECFUNC("exp", "__svml_exp4", 4)
+TLI_DEFINE_VECFUNC("exp", "__svml_exp8", 8)
+
+TLI_DEFINE_VECFUNC("expf", "__svml_expf4", 4)
+TLI_DEFINE_VECFUNC("expf", "__svml_expf8", 8)
+TLI_DEFINE_VECFUNC("expf", "__svml_expf16", 16)
+
+TLI_DEFINE_VECFUNC("__exp_finite", "__svml_exp2", 2)
+TLI_DEFINE_VECFUNC("__exp_finite", "__svml_exp4", 4)
+TLI_DEFINE_VECFUNC("__exp_finite", "__svml_exp8", 8)
+
+TLI_DEFINE_VECFUNC("__expf_finite", "__svml_expf4", 4)
+TLI_DEFINE_VECFUNC("__expf_finite", "__svml_expf8", 8)
+TLI_DEFINE_VECFUNC("__expf_finite", "__svml_expf16", 16)
+
+TLI_DEFINE_VECFUNC("llvm.exp.f64", "__svml_exp2", 2)
+TLI_DEFINE_VECFUNC("llvm.exp.f64", "__svml_exp4", 4)
+TLI_DEFINE_VECFUNC("llvm.exp.f64", "__svml_exp8", 8)
+
+TLI_DEFINE_VECFUNC("llvm.exp.f32", "__svml_expf4", 4)
+TLI_DEFINE_VECFUNC("llvm.exp.f32", "__svml_expf8", 8)
+TLI_DEFINE_VECFUNC("llvm.exp.f32", "__svml_expf16", 16)
+
+TLI_DEFINE_VECFUNC("log", "__svml_log2", 2)
+TLI_DEFINE_VECFUNC("log", "__svml_log4", 4)
+TLI_DEFINE_VECFUNC("log", "__svml_log8", 8)
+
+TLI_DEFINE_VECFUNC("logf", "__svml_logf4", 4)
+TLI_DEFINE_VECFUNC("logf", "__svml_logf8", 8)
+TLI_DEFINE_VECFUNC("logf", "__svml_logf16", 16)
+
+TLI_DEFINE_VECFUNC("__log_finite", "__svml_log2", 2)
+TLI_DEFINE_VECFUNC("__log_finite", "__svml_log4", 4)
+TLI_DEFINE_VECFUNC("__log_finite", "__svml_log8", 8)
+
+TLI_DEFINE_VECFUNC("__logf_finite", "__svml_logf4", 4)
+TLI_DEFINE_VECFUNC("__logf_finite", "__svml_logf8", 8)
+TLI_DEFINE_VECFUNC("__logf_finite", "__svml_logf16", 16)
+
+TLI_DEFINE_VECFUNC("llvm.log.f64", "__svml_log2", 2)
+TLI_DEFINE_VECFUNC("llvm.log.f64", "__svml_log4", 4)
+TLI_DEFINE_VECFUNC("llvm.log.f64", "__svml_log8", 8)
+
+TLI_DEFINE_VECFUNC("llvm.log.f32", "__svml_logf4", 4)
+TLI_DEFINE_VECFUNC("llvm.log.f32", "__svml_logf8", 8)
+TLI_DEFINE_VECFUNC("llvm.log.f32", "__svml_logf16", 16)
+
+
+#else
+#error "Must choose which vector library functions are to be defined."
+#endif
+
+#undef TLI_DEFINE_VECFUNC
+#undef TLI_DEFINE_ACCELERATE_VECFUNCS
+#undef TLI_DEFINE_MASSV_VECFUNCS
+#undef TLI_DEFINE_SVML_VECFUNCS
+
diff --git a/include/llvm/Analysis/VectorUtils.h b/include/llvm/Analysis/VectorUtils.h
index be4d4f17b9ad..d93d2bc4570b 100644
--- a/include/llvm/Analysis/VectorUtils.h
+++ b/include/llvm/Analysis/VectorUtils.h
@@ -1,9 +1,8 @@
//===- llvm/Analysis/VectorUtils.h - Vector utilities -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -18,6 +17,7 @@
#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/Support/CheckedArithmetic.h"
namespace llvm {
@@ -36,13 +36,12 @@ enum ID : unsigned;
}
/// Identify if the intrinsic is trivially vectorizable.
-/// This method returns true if the intrinsic's argument types are all
-/// scalars for the scalar form of the intrinsic and all vectors for
-/// the vector form of the intrinsic.
+/// This method returns true if the intrinsic's argument types are all scalars
+/// for the scalar form of the intrinsic and all vectors (or scalars handled by
+/// hasVectorInstrinsicScalarOpd) for the vector form of the intrinsic.
bool isTriviallyVectorizable(Intrinsic::ID ID);
-/// Identifies if the intrinsic has a scalar operand. It checks for
-/// ctlz,cttz and powi special intrinsics whose argument is scalar.
+/// Identifies if the vector form of the intrinsic has a scalar operand.
bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx);
/// Returns intrinsic ID for call.
@@ -78,6 +77,12 @@ Value *findScalarElement(Value *V, unsigned EltNo);
/// a sequence of instructions that broadcast a single value into a vector.
const Value *getSplatValue(const Value *V);
+/// Return true if the input value is known to be a vector with all identical
+/// elements (potentially including undefined elements).
+/// This may be more powerful than the related getSplatValue() because it is
+/// not limited by finding a scalar source value to a splatted vector.
+bool isSplatValue(const Value *V, unsigned Depth = 0);
+
/// Compute a map of integer instructions to their minimum legal type
/// size.
///
@@ -223,6 +228,20 @@ Constant *createSequentialMask(IRBuilder<> &Builder, unsigned Start,
/// elements, it will be padded with undefs.
Value *concatenateVectors(IRBuilder<> &Builder, ArrayRef<Value *> Vecs);
+/// Given a mask vector of the form <Y x i1>, Return true if all of the
+/// elements of this predicate mask are false or undef. That is, return true
+/// if all lanes can be assumed inactive.
+bool maskIsAllZeroOrUndef(Value *Mask);
+
+/// Given a mask vector of the form <Y x i1>, Return true if all of the
+/// elements of this predicate mask are true or undef. That is, return true
+/// if all lanes can be assumed active.
+bool maskIsAllOneOrUndef(Value *Mask);
+
+/// Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y)
+/// for each lane which may be active.
+APInt possiblyDemandedEltsInMask(Value *Mask);
+
/// The group of interleaved loads/stores sharing the same stride and
/// close to each other.
///
@@ -251,10 +270,10 @@ Value *concatenateVectors(IRBuilder<> &Builder, ArrayRef<Value *> Vecs);
/// the interleaved store group doesn't allow gaps.
template <typename InstTy> class InterleaveGroup {
public:
- InterleaveGroup(unsigned Factor, bool Reverse, unsigned Align)
+ InterleaveGroup(uint32_t Factor, bool Reverse, uint32_t Align)
: Factor(Factor), Reverse(Reverse), Align(Align), InsertPos(nullptr) {}
- InterleaveGroup(InstTy *Instr, int Stride, unsigned Align)
+ InterleaveGroup(InstTy *Instr, int32_t Stride, uint32_t Align)
: Align(Align), InsertPos(Instr) {
assert(Align && "The alignment should be non-zero");
@@ -266,19 +285,23 @@ public:
}
bool isReverse() const { return Reverse; }
- unsigned getFactor() const { return Factor; }
- unsigned getAlignment() const { return Align; }
- unsigned getNumMembers() const { return Members.size(); }
+ uint32_t getFactor() const { return Factor; }
+ uint32_t getAlignment() const { return Align; }
+ uint32_t getNumMembers() const { return Members.size(); }
/// Try to insert a new member \p Instr with index \p Index and
/// alignment \p NewAlign. The index is related to the leader and it could be
/// negative if it is the new leader.
///
/// \returns false if the instruction doesn't belong to the group.
- bool insertMember(InstTy *Instr, int Index, unsigned NewAlign) {
+ bool insertMember(InstTy *Instr, int32_t Index, uint32_t NewAlign) {
assert(NewAlign && "The new member's alignment should be non-zero");
- int Key = Index + SmallestKey;
+ // Make sure the key fits in an int32_t.
+ Optional<int32_t> MaybeKey = checkedAdd(Index, SmallestKey);
+ if (!MaybeKey)
+ return false;
+ int32_t Key = *MaybeKey;
// Skip if there is already a member with the same index.
if (Members.find(Key) != Members.end())
@@ -286,13 +309,19 @@ public:
if (Key > LargestKey) {
// The largest index is always less than the interleave factor.
- if (Index >= static_cast<int>(Factor))
+ if (Index >= static_cast<int32_t>(Factor))
return false;
LargestKey = Key;
} else if (Key < SmallestKey) {
+
+ // Make sure the largest index fits in an int32_t.
+ Optional<int32_t> MaybeLargestIndex = checkedSub(LargestKey, Key);
+ if (!MaybeLargestIndex)
+ return false;
+
// The largest index is always less than the interleave factor.
- if (LargestKey - Key >= static_cast<int>(Factor))
+ if (*MaybeLargestIndex >= static_cast<int64_t>(Factor))
return false;
SmallestKey = Key;
@@ -307,8 +336,8 @@ public:
/// Get the member with the given index \p Index
///
/// \returns nullptr if contains no such member.
- InstTy *getMember(unsigned Index) const {
- int Key = SmallestKey + Index;
+ InstTy *getMember(uint32_t Index) const {
+ int32_t Key = SmallestKey + Index;
auto Member = Members.find(Key);
if (Member == Members.end())
return nullptr;
@@ -318,7 +347,7 @@ public:
/// Get the index for the given member. Unlike the key in the member
/// map, the index starts from 0.
- unsigned getIndex(const InstTy *Instr) const {
+ uint32_t getIndex(const InstTy *Instr) const {
for (auto I : Members) {
if (I.second == Instr)
return I.first - SmallestKey;
@@ -356,12 +385,12 @@ public:
}
private:
- unsigned Factor; // Interleave Factor.
+ uint32_t Factor; // Interleave Factor.
bool Reverse;
- unsigned Align;
- DenseMap<int, InstTy *> Members;
- int SmallestKey = 0;
- int LargestKey = 0;
+ uint32_t Align;
+ DenseMap<int32_t, InstTy *> Members;
+ int32_t SmallestKey = 0;
+ int32_t LargestKey = 0;
// To avoid breaking dependences, vectorized instructions of an interleave
// group should be inserted at either the first load or the last store in
diff --git a/include/llvm/AsmParser/Parser.h b/include/llvm/AsmParser/Parser.h
index 285a7c022a24..b0c603497805 100644
--- a/include/llvm/AsmParser/Parser.h
+++ b/include/llvm/AsmParser/Parser.h
@@ -1,9 +1,8 @@
//===-- Parser.h - Parser for LLVM IR text assembly files -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/AsmParser/SlotMapping.h b/include/llvm/AsmParser/SlotMapping.h
index bd7e8fcad8bc..0e95eb816b4c 100644
--- a/include/llvm/AsmParser/SlotMapping.h
+++ b/include/llvm/AsmParser/SlotMapping.h
@@ -1,9 +1,8 @@
//===-- SlotMapping.h - Slot number mapping for unnamed values --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/BinaryFormat/AMDGPUMetadataVerifier.h b/include/llvm/BinaryFormat/AMDGPUMetadataVerifier.h
index de44f41720ed..7332b2a7ea89 100644
--- a/include/llvm/BinaryFormat/AMDGPUMetadataVerifier.h
+++ b/include/llvm/BinaryFormat/AMDGPUMetadataVerifier.h
@@ -1,9 +1,8 @@
//===- AMDGPUMetadataVerifier.h - MsgPack Types -----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -17,7 +16,7 @@
#ifndef LLVM_BINARYFORMAT_AMDGPUMETADATAVERIFIER_H
#define LLVM_BINARYFORMAT_AMDGPUMETADATAVERIFIER_H
-#include "llvm/BinaryFormat/MsgPackTypes.h"
+#include "llvm/BinaryFormat/MsgPackDocument.h"
namespace llvm {
namespace AMDGPU {
@@ -34,22 +33,22 @@ namespace V3 {
class MetadataVerifier {
bool Strict;
- bool verifyScalar(msgpack::Node &Node, msgpack::ScalarNode::ScalarKind SKind,
- function_ref<bool(msgpack::ScalarNode &)> verifyValue = {});
- bool verifyInteger(msgpack::Node &Node);
- bool verifyArray(msgpack::Node &Node,
- function_ref<bool(msgpack::Node &)> verifyNode,
+ bool verifyScalar(msgpack::DocNode &Node, msgpack::Type SKind,
+ function_ref<bool(msgpack::DocNode &)> verifyValue = {});
+ bool verifyInteger(msgpack::DocNode &Node);
+ bool verifyArray(msgpack::DocNode &Node,
+ function_ref<bool(msgpack::DocNode &)> verifyNode,
Optional<size_t> Size = None);
- bool verifyEntry(msgpack::MapNode &MapNode, StringRef Key, bool Required,
- function_ref<bool(msgpack::Node &)> verifyNode);
+ bool verifyEntry(msgpack::MapDocNode &MapNode, StringRef Key, bool Required,
+ function_ref<bool(msgpack::DocNode &)> verifyNode);
bool
- verifyScalarEntry(msgpack::MapNode &MapNode, StringRef Key, bool Required,
- msgpack::ScalarNode::ScalarKind SKind,
- function_ref<bool(msgpack::ScalarNode &)> verifyValue = {});
- bool verifyIntegerEntry(msgpack::MapNode &MapNode, StringRef Key,
+ verifyScalarEntry(msgpack::MapDocNode &MapNode, StringRef Key, bool Required,
+ msgpack::Type SKind,
+ function_ref<bool(msgpack::DocNode &)> verifyValue = {});
+ bool verifyIntegerEntry(msgpack::MapDocNode &MapNode, StringRef Key,
bool Required);
- bool verifyKernelArgs(msgpack::Node &Node);
- bool verifyKernel(msgpack::Node &Node);
+ bool verifyKernelArgs(msgpack::DocNode &Node);
+ bool verifyKernel(msgpack::DocNode &Node);
public:
/// Construct a MetadataVerifier, specifying whether it will operate in \p
@@ -59,7 +58,7 @@ public:
/// Verify given HSA metadata.
///
/// \returns True when successful, false when metadata is invalid.
- bool verify(msgpack::Node &HSAMetadataRoot);
+ bool verify(msgpack::DocNode &HSAMetadataRoot);
};
} // end namespace V3
diff --git a/include/llvm/BinaryFormat/COFF.h b/include/llvm/BinaryFormat/COFF.h
index 7b973c03cc80..0fe38a437725 100644
--- a/include/llvm/BinaryFormat/COFF.h
+++ b/include/llvm/BinaryFormat/COFF.h
@@ -1,9 +1,8 @@
//===-- llvm/BinaryFormat/COFF.h --------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -371,13 +370,15 @@ enum RelocationTypesARM : unsigned {
IMAGE_REL_ARM_TOKEN = 0x0005,
IMAGE_REL_ARM_BLX24 = 0x0008,
IMAGE_REL_ARM_BLX11 = 0x0009,
+ IMAGE_REL_ARM_REL32 = 0x000A,
IMAGE_REL_ARM_SECTION = 0x000E,
IMAGE_REL_ARM_SECREL = 0x000F,
IMAGE_REL_ARM_MOV32A = 0x0010,
IMAGE_REL_ARM_MOV32T = 0x0011,
IMAGE_REL_ARM_BRANCH20T = 0x0012,
IMAGE_REL_ARM_BRANCH24T = 0x0014,
- IMAGE_REL_ARM_BLX23T = 0x0015
+ IMAGE_REL_ARM_BLX23T = 0x0015,
+ IMAGE_REL_ARM_PAIR = 0x0016,
};
enum RelocationTypesARM64 : unsigned {
@@ -398,9 +399,10 @@ enum RelocationTypesARM64 : unsigned {
IMAGE_REL_ARM64_ADDR64 = 0x000E,
IMAGE_REL_ARM64_BRANCH19 = 0x000F,
IMAGE_REL_ARM64_BRANCH14 = 0x0010,
+ IMAGE_REL_ARM64_REL32 = 0x0011,
};
-enum COMDATType : unsigned {
+enum COMDATType : uint8_t {
IMAGE_COMDAT_SELECT_NODUPLICATES = 1,
IMAGE_COMDAT_SELECT_ANY,
IMAGE_COMDAT_SELECT_SAME_SIZE,
diff --git a/include/llvm/BinaryFormat/Dwarf.def b/include/llvm/BinaryFormat/Dwarf.def
index 6ad3cb57f62f..b0f78d0fd61f 100644
--- a/include/llvm/BinaryFormat/Dwarf.def
+++ b/include/llvm/BinaryFormat/Dwarf.def
@@ -1,9 +1,8 @@
//===- llvm/Support/Dwarf.def - Dwarf definitions ---------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -355,7 +354,13 @@ HANDLE_DW_AT(0x2107, GNU_vector, 0, GNU)
HANDLE_DW_AT(0x2110, GNU_template_name, 0, GNU)
HANDLE_DW_AT(0x210f, GNU_odr_signature, 0, GNU)
HANDLE_DW_AT(0x2111, GNU_call_site_value, 0, GNU)
+HANDLE_DW_AT (0x2112, GNU_call_site_data_value, 0, GNU)
+HANDLE_DW_AT (0x2113, GNU_call_site_target, 0, GNU)
+HANDLE_DW_AT (0x2114, GNU_call_site_target_clobbered, 0, GNU)
+HANDLE_DW_AT (0x2115, GNU_tail_call, 0, GNU)
+HANDLE_DW_AT (0x2116, GNU_all_tail_call_sites, 0, GNU)
HANDLE_DW_AT(0x2117, GNU_all_call_sites, 0, GNU)
+HANDLE_DW_AT (0x2118, GNU_all_source_call_sites, 0, GNU)
HANDLE_DW_AT(0x2119, GNU_macros, 0, GNU)
// Extensions for Fission proposal.
HANDLE_DW_AT(0x2130, GNU_dwo_name, 0, GNU)
@@ -387,6 +392,7 @@ HANDLE_DW_AT(0x3b31, BORLAND_closure, 0, BORLAND)
HANDLE_DW_AT(0x3e00, LLVM_include_path, 0, LLVM)
HANDLE_DW_AT(0x3e01, LLVM_config_macros, 0, LLVM)
HANDLE_DW_AT(0x3e02, LLVM_isysroot, 0, LLVM)
+HANDLE_DW_AT(0x3e03, LLVM_tag_offset, 0, LLVM)
// Apple extensions.
HANDLE_DW_AT(0x3fe1, APPLE_optimized, 0, APPLE)
HANDLE_DW_AT(0x3fe2, APPLE_flags, 0, APPLE)
@@ -627,6 +633,8 @@ HANDLE_DW_OP(0xa9, reinterpret, 5, DWARF)
// Vendor extensions:
// Extensions for GNU-style thread-local storage.
HANDLE_DW_OP(0xe0, GNU_push_tls_address, 0, GNU)
+// The GNU entry value extension.
+HANDLE_DW_OP(0xf3, GNU_entry_value, 0, GNU)
// Extensions for Fission proposal.
HANDLE_DW_OP(0xfb, GNU_addr_index, 0, GNU)
HANDLE_DW_OP(0xfc, GNU_const_index, 0, GNU)
diff --git a/include/llvm/BinaryFormat/Dwarf.h b/include/llvm/BinaryFormat/Dwarf.h
index 525a04d5e6cf..76d9c365c0a8 100644
--- a/include/llvm/BinaryFormat/Dwarf.h
+++ b/include/llvm/BinaryFormat/Dwarf.h
@@ -1,9 +1,8 @@
//===-- llvm/BinaryFormat/Dwarf.h ---Dwarf Constants-------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -130,7 +129,9 @@ enum LocationAtom {
#include "llvm/BinaryFormat/Dwarf.def"
DW_OP_lo_user = 0xe0,
DW_OP_hi_user = 0xff,
- DW_OP_LLVM_fragment = 0x1000 ///< Only used in LLVM metadata.
+ DW_OP_LLVM_fragment = 0x1000, ///< Only used in LLVM metadata.
+ DW_OP_LLVM_convert = 0x1001, ///< Only used in LLVM metadata.
+ DW_OP_LLVM_tag_offset = 0x1002, ///< Only used in LLVM metadata.
};
enum TypeKind : uint8_t {
diff --git a/include/llvm/BinaryFormat/DynamicTags.def b/include/llvm/BinaryFormat/DynamicTags.def
index 2e15cc30fca7..aec408bd2d72 100644
--- a/include/llvm/BinaryFormat/DynamicTags.def
+++ b/include/llvm/BinaryFormat/DynamicTags.def
@@ -6,6 +6,11 @@
// such as DT_HIOS, etc. to allow using this file to in other contexts.
// For example we can use it to generate a stringification switch statement.
+#ifndef AARCH64_DYNAMIC_TAG
+#define AARCH64_DYNAMIC_TAG(name, value) DYNAMIC_TAG(name, value)
+#define AARCH64_DYNAMIC_TAG_DEFINED
+#endif
+
#ifndef HEXAGON_DYNAMIC_TAG
#define HEXAGON_DYNAMIC_TAG(name, value) DYNAMIC_TAG(name, value)
#define HEXAGON_DYNAMIC_TAG_DEFINED
@@ -16,6 +21,11 @@
#define MIPS_DYNAMIC_TAG_DEFINED
#endif
+#ifndef PPC_DYNAMIC_TAG
+#define PPC_DYNAMIC_TAG(name, value) DYNAMIC_TAG(name, value)
+#define PPC_DYNAMIC_TAG_DEFINED
+#endif
+
#ifndef PPC64_DYNAMIC_TAG
#define PPC64_DYNAMIC_TAG(name, value) DYNAMIC_TAG(name, value)
#define PPC64_DYNAMIC_TAG_DEFINED
@@ -107,6 +117,10 @@ DYNAMIC_TAG(VERNEED, 0X6FFFFFFE) // The address of the version dependency
// table.
DYNAMIC_TAG(VERNEEDNUM, 0X6FFFFFFF) // The number of entries in DT_VERNEED.
+// AArch64 specific dynamic table entries
+AARCH64_DYNAMIC_TAG(AARCH64_BTI_PLT, 0x70000001)
+AARCH64_DYNAMIC_TAG(AARCH64_PAC_PLT, 0x70000003)
+
// Hexagon specific dynamic table entries
HEXAGON_DYNAMIC_TAG(HEXAGON_SYMSZ, 0x70000000)
HEXAGON_DYNAMIC_TAG(HEXAGON_VER, 0x70000001)
@@ -190,17 +204,27 @@ MIPS_DYNAMIC_TAG(MIPS_RWPLT, 0x70000034) // Points to the base
MIPS_DYNAMIC_TAG(MIPS_RLD_MAP_REL, 0x70000035) // Relative offset of run time loader
// map, used for debugging.
+// PPC specific dynamic table entries.
+PPC_DYNAMIC_TAG(PPC_GOT, 0x70000000) // Uses Secure PLT ABI.
+PPC_DYNAMIC_TAG(PPC_OPT, 0x70000001) // Has TLS optimization.
+
// PPC64 specific dynamic table entries.
PPC64_DYNAMIC_TAG(PPC64_GLINK, 0x70000000) // Address of 32 bytes before the
// first glink lazy resolver stub.
// Sun machine-independent extensions.
DYNAMIC_TAG(AUXILIARY, 0x7FFFFFFD) // Shared object to load before self
+DYNAMIC_TAG(USED, 0x7FFFFFFE) // Same as DT_NEEDED
DYNAMIC_TAG(FILTER, 0x7FFFFFFF) // Shared object to get values from
#ifdef DYNAMIC_TAG_MARKER_DEFINED
#undef DYNAMIC_TAG_MARKER
+#undef DYNAMIC_TAG_MARKER_DEFINED
+#endif
+#ifdef AARCH64_DYNAMIC_TAG_DEFINED
+#undef AARCH64_DYNAMIC_TAG
+#undef AARCH64_DYNAMIC_TAG_DEFINED
#endif
#ifdef MIPS_DYNAMIC_TAG_DEFINED
#undef MIPS_DYNAMIC_TAG
@@ -210,6 +234,10 @@ DYNAMIC_TAG(FILTER, 0x7FFFFFFF) // Shared object to get values from
#undef HEXAGON_DYNAMIC_TAG
#undef HEXAGON_DYNAMIC_TAG_DEFINED
#endif
+#ifdef PPC_DYNAMIC_TAG_DEFINED
+#undef PPC_DYNAMIC_TAG
+#undef PPC_DYNAMIC_TAG_DEFINED
+#endif
#ifdef PPC64_DYNAMIC_TAG_DEFINED
#undef PPC64_DYNAMIC_TAG
#undef PPC64_DYNAMIC_TAG_DEFINED
diff --git a/include/llvm/BinaryFormat/ELF.h b/include/llvm/BinaryFormat/ELF.h
index ce35d127d433..2bd711137845 100644
--- a/include/llvm/BinaryFormat/ELF.h
+++ b/include/llvm/BinaryFormat/ELF.h
@@ -1,9 +1,8 @@
//===- llvm/BinaryFormat/ELF.h - ELF constants and structures ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -703,15 +702,20 @@ enum : unsigned {
EF_AMDGPU_MACH_AMDGCN_GFX902 = 0x02d,
EF_AMDGPU_MACH_AMDGCN_GFX904 = 0x02e,
EF_AMDGPU_MACH_AMDGCN_GFX906 = 0x02f,
+ EF_AMDGPU_MACH_AMDGCN_GFX908 = 0x030,
EF_AMDGPU_MACH_AMDGCN_GFX909 = 0x031,
+ // AMDGCN GFX10.
+ EF_AMDGPU_MACH_AMDGCN_GFX1010 = 0x033,
+ EF_AMDGPU_MACH_AMDGCN_GFX1011 = 0x034,
+ EF_AMDGPU_MACH_AMDGCN_GFX1012 = 0x035,
// Reserved for AMDGCN-based processors.
EF_AMDGPU_MACH_AMDGCN_RESERVED0 = 0x027,
- EF_AMDGPU_MACH_AMDGCN_RESERVED1 = 0x030,
+ EF_AMDGPU_MACH_AMDGCN_RESERVED1 = 0x032,
// First/last AMDGCN-based processors.
EF_AMDGPU_MACH_AMDGCN_FIRST = EF_AMDGPU_MACH_AMDGCN_GFX600,
- EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX909,
+ EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX1012,
// Indicates if the "xnack" target feature is enabled for all code contained
// in the object.
@@ -839,6 +843,10 @@ enum : unsigned {
SHT_LLVM_CALL_GRAPH_PROFILE = 0x6fff4c02, // LLVM Call Graph Profile.
SHT_LLVM_ADDRSIG = 0x6fff4c03, // List of address-significant symbols
// for safe ICF.
+ SHT_LLVM_DEPENDENT_LIBRARIES = 0x6fff4c04, // LLVM Dependent Library Specifiers.
+ SHT_LLVM_SYMPART = 0x6fff4c05, // Symbol partition specification.
+ SHT_LLVM_PART_EHDR = 0x6fff4c06, // ELF header for loadable partition.
+ SHT_LLVM_PART_PHDR = 0x6fff4c07, // Phdrs for loadable partition.
// Android's experimental support for SHT_RELR sections.
// https://android.googlesource.com/platform/bionic/+/b7feec74547f84559a1467aca02708ff61346d2a/libc/include/elf.h#512
SHT_ANDROID_RELR = 0x6fffff00, // Relocation entries; only offsets.
@@ -1340,6 +1348,14 @@ enum {
NT_FREEBSD_PROCSTAT_AUXV = 16,
};
+// Generic note types
+enum : unsigned {
+ NT_VERSION = 1,
+ NT_ARCH = 2,
+ NT_GNU_BUILD_ATTRIBUTE_OPEN = 0x100,
+ NT_GNU_BUILD_ATTRIBUTE_FUNC = 0x101,
+};
+
enum {
NT_GNU_ABI_TAG = 1,
NT_GNU_HWCAP = 2,
@@ -1352,13 +1368,65 @@ enum {
enum : unsigned {
GNU_PROPERTY_STACK_SIZE = 1,
GNU_PROPERTY_NO_COPY_ON_PROTECTED = 2,
- GNU_PROPERTY_X86_FEATURE_1_AND = 0xc0000002
+ GNU_PROPERTY_AARCH64_FEATURE_1_AND = 0xc0000000,
+ GNU_PROPERTY_X86_FEATURE_1_AND = 0xc0000002,
+ GNU_PROPERTY_X86_ISA_1_NEEDED = 0xc0008000,
+ GNU_PROPERTY_X86_FEATURE_2_NEEDED = 0xc0008001,
+ GNU_PROPERTY_X86_ISA_1_USED = 0xc0010000,
+ GNU_PROPERTY_X86_FEATURE_2_USED = 0xc0010001,
};
-// CET properties
-enum {
+// aarch64 processor feature bits.
+enum : unsigned {
+ GNU_PROPERTY_AARCH64_FEATURE_1_BTI = 1 << 0,
+ GNU_PROPERTY_AARCH64_FEATURE_1_PAC = 1 << 1,
+};
+
+// x86 processor feature bits.
+enum : unsigned {
GNU_PROPERTY_X86_FEATURE_1_IBT = 1 << 0,
- GNU_PROPERTY_X86_FEATURE_1_SHSTK = 1 << 1
+ GNU_PROPERTY_X86_FEATURE_1_SHSTK = 1 << 1,
+
+ GNU_PROPERTY_X86_ISA_1_CMOV = 1 << 0,
+ GNU_PROPERTY_X86_ISA_1_SSE = 1 << 1,
+ GNU_PROPERTY_X86_ISA_1_SSE2 = 1 << 2,
+ GNU_PROPERTY_X86_ISA_1_SSE3 = 1 << 3,
+ GNU_PROPERTY_X86_ISA_1_SSSE3 = 1 << 4,
+ GNU_PROPERTY_X86_ISA_1_SSE4_1 = 1 << 5,
+ GNU_PROPERTY_X86_ISA_1_SSE4_2 = 1 << 6,
+ GNU_PROPERTY_X86_ISA_1_AVX = 1 << 7,
+ GNU_PROPERTY_X86_ISA_1_AVX2 = 1 << 8,
+ GNU_PROPERTY_X86_ISA_1_FMA = 1 << 9,
+ GNU_PROPERTY_X86_ISA_1_AVX512F = 1 << 10,
+ GNU_PROPERTY_X86_ISA_1_AVX512CD = 1 << 11,
+ GNU_PROPERTY_X86_ISA_1_AVX512ER = 1 << 12,
+ GNU_PROPERTY_X86_ISA_1_AVX512PF = 1 << 13,
+ GNU_PROPERTY_X86_ISA_1_AVX512VL = 1 << 14,
+ GNU_PROPERTY_X86_ISA_1_AVX512DQ = 1 << 15,
+ GNU_PROPERTY_X86_ISA_1_AVX512BW = 1 << 16,
+ GNU_PROPERTY_X86_ISA_1_AVX512_4FMAPS = 1 << 17,
+ GNU_PROPERTY_X86_ISA_1_AVX512_4VNNIW = 1 << 18,
+ GNU_PROPERTY_X86_ISA_1_AVX512_BITALG = 1 << 19,
+ GNU_PROPERTY_X86_ISA_1_AVX512_IFMA = 1 << 20,
+ GNU_PROPERTY_X86_ISA_1_AVX512_VBMI = 1 << 21,
+ GNU_PROPERTY_X86_ISA_1_AVX512_VBMI2 = 1 << 22,
+ GNU_PROPERTY_X86_ISA_1_AVX512_VNNI = 1 << 23,
+
+ GNU_PROPERTY_X86_FEATURE_2_X86 = 1 << 0,
+ GNU_PROPERTY_X86_FEATURE_2_X87 = 1 << 1,
+ GNU_PROPERTY_X86_FEATURE_2_MMX = 1 << 2,
+ GNU_PROPERTY_X86_FEATURE_2_XMM = 1 << 3,
+ GNU_PROPERTY_X86_FEATURE_2_YMM = 1 << 4,
+ GNU_PROPERTY_X86_FEATURE_2_ZMM = 1 << 5,
+ GNU_PROPERTY_X86_FEATURE_2_FXSR = 1 << 6,
+ GNU_PROPERTY_X86_FEATURE_2_XSAVE = 1 << 7,
+ GNU_PROPERTY_X86_FEATURE_2_XSAVEOPT = 1 << 8,
+ GNU_PROPERTY_X86_FEATURE_2_XSAVEC = 1 << 9,
+};
+
+// AMDGPU-specific section indices.
+enum {
+ SHN_AMDGPU_LDS = 0xff00, // Variable in LDS; symbol encoded like SHN_COMMON
};
// AMD specific notes. (Code Object V2)
diff --git a/include/llvm/BinaryFormat/ELFRelocs/ARM.def b/include/llvm/BinaryFormat/ELFRelocs/ARM.def
index 730fc5b8836c..e0709fb81813 100644
--- a/include/llvm/BinaryFormat/ELFRelocs/ARM.def
+++ b/include/llvm/BinaryFormat/ELFRelocs/ARM.def
@@ -135,4 +135,7 @@ ELF_RELOC(R_ARM_PRIVATE_15, 0x7f)
ELF_RELOC(R_ARM_ME_TOO, 0x80)
ELF_RELOC(R_ARM_THM_TLS_DESCSEQ16, 0x81)
ELF_RELOC(R_ARM_THM_TLS_DESCSEQ32, 0x82)
+ELF_RELOC(R_ARM_THM_BF16, 0x88)
+ELF_RELOC(R_ARM_THM_BF12, 0x89)
+ELF_RELOC(R_ARM_THM_BF18, 0x8a)
ELF_RELOC(R_ARM_IRELATIVE, 0xa0)
diff --git a/include/llvm/BinaryFormat/ELFRelocs/PowerPC.def b/include/llvm/BinaryFormat/ELFRelocs/PowerPC.def
index e4f8ee0ebe2b..28036889cca6 100644
--- a/include/llvm/BinaryFormat/ELFRelocs/PowerPC.def
+++ b/include/llvm/BinaryFormat/ELFRelocs/PowerPC.def
@@ -27,9 +27,25 @@
#undef R_PPC_GOT16_HI
#undef R_PPC_GOT16_HA
#undef R_PPC_PLTREL24
+#undef R_PPC_COPY
+#undef R_PPC_GLOB_DAT
#undef R_PPC_JMP_SLOT
+#undef R_PPC_RELATIVE
#undef R_PPC_LOCAL24PC
+#undef R_PPC_UADDR32
+#undef R_PPC_UADDR16
#undef R_PPC_REL32
+#undef R_PPC_PLT32
+#undef R_PPC_PLTREL32
+#undef R_PPC_PLT16_LO
+#undef R_PPC_PLT16_HI
+#undef R_PPC_PLT16_HA
+#undef R_PPC_SDAREL16
+#undef R_PPC_SECTOFF
+#undef R_PPC_SECTOFF_LO
+#undef R_PPC_SECTOFF_HI
+#undef R_PPC_SECTOFF_HA
+#undef R_PPC_ADDR30
#undef R_PPC_TLS
#undef R_PPC_DTPMOD32
#undef R_PPC_TPREL16
@@ -84,9 +100,25 @@ ELF_RELOC(R_PPC_GOT16_LO, 15)
ELF_RELOC(R_PPC_GOT16_HI, 16)
ELF_RELOC(R_PPC_GOT16_HA, 17)
ELF_RELOC(R_PPC_PLTREL24, 18)
+ELF_RELOC(R_PPC_COPY, 19)
+ELF_RELOC(R_PPC_GLOB_DAT, 20)
ELF_RELOC(R_PPC_JMP_SLOT, 21)
+ELF_RELOC(R_PPC_RELATIVE, 22)
ELF_RELOC(R_PPC_LOCAL24PC, 23)
+ELF_RELOC(R_PPC_UADDR32, 24)
+ELF_RELOC(R_PPC_UADDR16, 25)
ELF_RELOC(R_PPC_REL32, 26)
+ELF_RELOC(R_PPC_PLT32, 27)
+ELF_RELOC(R_PPC_PLTREL32, 28)
+ELF_RELOC(R_PPC_PLT16_LO, 29)
+ELF_RELOC(R_PPC_PLT16_HI, 30)
+ELF_RELOC(R_PPC_PLT16_HA, 31)
+ELF_RELOC(R_PPC_SDAREL16, 32)
+ELF_RELOC(R_PPC_SECTOFF, 33)
+ELF_RELOC(R_PPC_SECTOFF_LO, 34)
+ELF_RELOC(R_PPC_SECTOFF_HI, 35)
+ELF_RELOC(R_PPC_SECTOFF_HA, 36)
+ELF_RELOC(R_PPC_ADDR30, 37)
ELF_RELOC(R_PPC_TLS, 67)
ELF_RELOC(R_PPC_DTPMOD32, 68)
ELF_RELOC(R_PPC_TPREL16, 69)
@@ -117,6 +149,7 @@ ELF_RELOC(R_PPC_GOT_DTPREL16_HI, 93)
ELF_RELOC(R_PPC_GOT_DTPREL16_HA, 94)
ELF_RELOC(R_PPC_TLSGD, 95)
ELF_RELOC(R_PPC_TLSLD, 96)
+ELF_RELOC(R_PPC_IRELATIVE, 248)
ELF_RELOC(R_PPC_REL16, 249)
ELF_RELOC(R_PPC_REL16_LO, 250)
ELF_RELOC(R_PPC_REL16_HI, 251)
diff --git a/include/llvm/BinaryFormat/MachO.def b/include/llvm/BinaryFormat/MachO.def
index 95de48d2b19e..76dcc58ba048 100644
--- a/include/llvm/BinaryFormat/MachO.def
+++ b/include/llvm/BinaryFormat/MachO.def
@@ -1,9 +1,8 @@
//,,,-- llvm/Support/MachO.def - The MachO file definitions -----*- C++ -*-,,,//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//,,,----------------------------------------------------------------------,,,//
//
diff --git a/include/llvm/BinaryFormat/MachO.h b/include/llvm/BinaryFormat/MachO.h
index b3d60984249f..a01393a3b303 100644
--- a/include/llvm/BinaryFormat/MachO.h
+++ b/include/llvm/BinaryFormat/MachO.h
@@ -1,9 +1,8 @@
//===-- llvm/BinaryFormat/MachO.h - The MachO file format -------*- C++/-*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -335,6 +334,7 @@ enum {
N_WEAK_DEF = 0x0080u,
N_SYMBOL_RESOLVER = 0x0100u,
N_ALT_ENTRY = 0x0200u,
+ N_COLD_FUNC = 0x0400u,
// For undefined symbols coming from libraries, see GET_LIBRARY_ORDINAL()
// as these are in the top 8 bits.
SELF_LIBRARY_ORDINAL = 0x0,
@@ -487,6 +487,7 @@ enum PlatformType {
PLATFORM_TVOS = 3,
PLATFORM_WATCHOS = 4,
PLATFORM_BRIDGEOS = 5,
+ PLATFORM_MACCATALYST = 6,
PLATFORM_IOSSIMULATOR = 7,
PLATFORM_TVOSSIMULATOR = 8,
PLATFORM_WATCHOSSIMULATOR = 9
@@ -942,8 +943,13 @@ struct fat_arch_64 {
// Structs from <mach-o/reloc.h>
struct relocation_info {
int32_t r_address;
+#if defined(BYTE_ORDER) && defined(BIG_ENDIAN) && (BYTE_ORDER == BIG_ENDIAN)
+ uint32_t r_type : 4, r_extern : 1, r_length : 2, r_pcrel : 1,
+ r_symbolnum : 24;
+#else
uint32_t r_symbolnum : 24, r_pcrel : 1, r_length : 2, r_extern : 1,
r_type : 4;
+#endif
};
struct scattered_relocation_info {
@@ -1396,7 +1402,8 @@ inline void SET_COMM_ALIGN(uint16_t &n_desc, uint8_t align) {
enum : uint32_t {
// Capability bits used in the definition of cpu_type.
CPU_ARCH_MASK = 0xff000000, // Mask for architecture bits
- CPU_ARCH_ABI64 = 0x01000000 // 64 bit ABI
+ CPU_ARCH_ABI64 = 0x01000000, // 64 bit ABI
+ CPU_ARCH_ABI64_32 = 0x02000000, // ILP32 ABI on 64-bit hardware
};
// Constants for the cputype field.
@@ -1409,6 +1416,7 @@ enum CPUType {
CPU_TYPE_MC98000 = 10, // Old Motorola PowerPC
CPU_TYPE_ARM = 12,
CPU_TYPE_ARM64 = CPU_TYPE_ARM | CPU_ARCH_ABI64,
+ CPU_TYPE_ARM64_32 = CPU_TYPE_ARM | CPU_ARCH_ABI64_32,
CPU_TYPE_SPARC = 14,
CPU_TYPE_POWERPC = 18,
CPU_TYPE_POWERPC64 = CPU_TYPE_POWERPC | CPU_ARCH_ABI64
@@ -1477,7 +1485,12 @@ enum CPUSubTypeARM {
CPU_SUBTYPE_ARM_V7EM = 16
};
-enum CPUSubTypeARM64 { CPU_SUBTYPE_ARM64_ALL = 0 };
+enum CPUSubTypeARM64 {
+ CPU_SUBTYPE_ARM64_ALL = 0,
+ CPU_SUBTYPE_ARM64E = 2,
+};
+
+enum CPUSubTypeARM64_32 { CPU_SUBTYPE_ARM64_32_V8 = 1 };
enum CPUSubTypeSPARC { CPU_SUBTYPE_SPARC_ALL = 0 };
diff --git a/include/llvm/BinaryFormat/Magic.h b/include/llvm/BinaryFormat/Magic.h
index 04801f810be3..cd9833ec4d22 100644
--- a/include/llvm/BinaryFormat/Magic.h
+++ b/include/llvm/BinaryFormat/Magic.h
@@ -1,9 +1,8 @@
//===- llvm/BinaryFormat/Magic.h - File magic identification ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -40,11 +39,14 @@ struct file_magic {
macho_dsym_companion, ///< Mach-O dSYM companion file
macho_kext_bundle, ///< Mach-O kext bundle file
macho_universal_binary, ///< Mach-O universal binary
+ minidump, ///< Windows minidump file
coff_cl_gl_object, ///< Microsoft cl.exe's intermediate code file
coff_object, ///< COFF object file
coff_import_library, ///< COFF import library
pecoff_executable, ///< PECOFF executable file
windows_resource, ///< Windows compiled resource file (.res)
+ xcoff_object_32, ///< 32-bit XCOFF object file
+ xcoff_object_64, ///< 64-bit XCOFF object file
wasm_object, ///< WebAssembly Object file
pdb, ///< Windows PDB debug info file
};
diff --git a/include/llvm/BinaryFormat/Minidump.h b/include/llvm/BinaryFormat/Minidump.h
new file mode 100644
index 000000000000..65c17d1eb00c
--- /dev/null
+++ b/include/llvm/BinaryFormat/Minidump.h
@@ -0,0 +1,203 @@
+//===- Minidump.h - Minidump constants and structures -----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This header constants and data structures pertaining to the Windows Minidump
+// core file format.
+//
+// Reference:
+// https://msdn.microsoft.com/en-us/library/windows/desktop/ms679293(v=vs.85).aspx
+// https://chromium.googlesource.com/breakpad/breakpad/
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_BINARYFORMAT_MINIDUMP_H
+#define LLVM_BINARYFORMAT_MINIDUMP_H
+
+#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/Support/Endian.h"
+
+namespace llvm {
+namespace minidump {
+
+/// The minidump header is the first part of a minidump file. It identifies the
+/// file as a minidump file, and gives the location of the stream directory.
+struct Header {
+ static constexpr uint32_t MagicSignature = 0x504d444d; // PMDM
+ static constexpr uint16_t MagicVersion = 0xa793;
+
+ support::ulittle32_t Signature;
+ // The high 16 bits of version field are implementation specific. The low 16
+ // bits should be MagicVersion.
+ support::ulittle32_t Version;
+ support::ulittle32_t NumberOfStreams;
+ support::ulittle32_t StreamDirectoryRVA;
+ support::ulittle32_t Checksum;
+ support::ulittle32_t TimeDateStamp;
+ support::ulittle64_t Flags;
+};
+static_assert(sizeof(Header) == 32, "");
+
+/// The type of a minidump stream identifies its contents. Streams numbers after
+/// LastReserved are for application-defined data streams.
+enum class StreamType : uint32_t {
+#define HANDLE_MDMP_STREAM_TYPE(CODE, NAME) NAME = CODE,
+#include "llvm/BinaryFormat/MinidumpConstants.def"
+ Unused = 0,
+ LastReserved = 0x0000ffff,
+};
+
+/// Specifies the location (and size) of various objects in the minidump file.
+/// The location is relative to the start of the file.
+struct LocationDescriptor {
+ support::ulittle32_t DataSize;
+ support::ulittle32_t RVA;
+};
+static_assert(sizeof(LocationDescriptor) == 8, "");
+
+/// Describes a single memory range (both its VM address and where to find it in
+/// the file) of the process from which this minidump file was generated.
+struct MemoryDescriptor {
+ support::ulittle64_t StartOfMemoryRange;
+ LocationDescriptor Memory;
+};
+static_assert(sizeof(MemoryDescriptor) == 16, "");
+
+/// Specifies the location and type of a single stream in the minidump file. The
+/// minidump stream directory is an array of entries of this type, with its size
+/// given by Header.NumberOfStreams.
+struct Directory {
+ support::little_t<StreamType> Type;
+ LocationDescriptor Location;
+};
+static_assert(sizeof(Directory) == 12, "");
+
+/// The processor architecture of the system that generated this minidump. Used
+/// in the ProcessorArch field of the SystemInfo stream.
+enum class ProcessorArchitecture : uint16_t {
+#define HANDLE_MDMP_ARCH(CODE, NAME) NAME = CODE,
+#include "llvm/BinaryFormat/MinidumpConstants.def"
+};
+
+/// The OS Platform of the system that generated this minidump. Used in the
+/// PlatformId field of the SystemInfo stream.
+enum class OSPlatform : uint32_t {
+#define HANDLE_MDMP_PLATFORM(CODE, NAME) NAME = CODE,
+#include "llvm/BinaryFormat/MinidumpConstants.def"
+};
+
+/// Detailed information about the processor of the system that generated this
+/// minidump. Its interpretation depends on the ProcessorArchitecture enum.
+union CPUInfo {
+ struct X86Info {
+ char VendorID[12]; // cpuid 0: ebx, edx, ecx
+ support::ulittle32_t VersionInfo; // cpuid 1: eax
+ support::ulittle32_t FeatureInfo; // cpuid 1: edx
+ support::ulittle32_t AMDExtendedFeatures; // cpuid 0x80000001, ebx
+ } X86;
+ struct ArmInfo {
+ support::ulittle32_t CPUID;
+ support::ulittle32_t ElfHWCaps; // linux specific, 0 otherwise
+ } Arm;
+ struct OtherInfo {
+ uint8_t ProcessorFeatures[16];
+ } Other;
+};
+static_assert(sizeof(CPUInfo) == 24, "");
+
+/// The SystemInfo stream, containing various information about the system where
+/// this minidump was generated.
+struct SystemInfo {
+ support::little_t<ProcessorArchitecture> ProcessorArch;
+ support::ulittle16_t ProcessorLevel;
+ support::ulittle16_t ProcessorRevision;
+
+ uint8_t NumberOfProcessors;
+ uint8_t ProductType;
+
+ support::ulittle32_t MajorVersion;
+ support::ulittle32_t MinorVersion;
+ support::ulittle32_t BuildNumber;
+ support::little_t<OSPlatform> PlatformId;
+ support::ulittle32_t CSDVersionRVA;
+
+ support::ulittle16_t SuiteMask;
+ support::ulittle16_t Reserved;
+
+ CPUInfo CPU;
+};
+static_assert(sizeof(SystemInfo) == 56, "");
+
+struct VSFixedFileInfo {
+ support::ulittle32_t Signature;
+ support::ulittle32_t StructVersion;
+ support::ulittle32_t FileVersionHigh;
+ support::ulittle32_t FileVersionLow;
+ support::ulittle32_t ProductVersionHigh;
+ support::ulittle32_t ProductVersionLow;
+ support::ulittle32_t FileFlagsMask;
+ support::ulittle32_t FileFlags;
+ support::ulittle32_t FileOS;
+ support::ulittle32_t FileType;
+ support::ulittle32_t FileSubtype;
+ support::ulittle32_t FileDateHigh;
+ support::ulittle32_t FileDateLow;
+};
+static_assert(sizeof(VSFixedFileInfo) == 52, "");
+
+inline bool operator==(const VSFixedFileInfo &LHS, const VSFixedFileInfo &RHS) {
+ return memcmp(&LHS, &RHS, sizeof(VSFixedFileInfo)) == 0;
+}
+
+struct Module {
+ support::ulittle64_t BaseOfImage;
+ support::ulittle32_t SizeOfImage;
+ support::ulittle32_t Checksum;
+ support::ulittle32_t TimeDateStamp;
+ support::ulittle32_t ModuleNameRVA;
+ VSFixedFileInfo VersionInfo;
+ LocationDescriptor CvRecord;
+ LocationDescriptor MiscRecord;
+ support::ulittle64_t Reserved0;
+ support::ulittle64_t Reserved1;
+};
+static_assert(sizeof(Module) == 108, "");
+
+/// Describes a single thread in the minidump file. Part of the ThreadList
+/// stream.
+struct Thread {
+ support::ulittle32_t ThreadId;
+ support::ulittle32_t SuspendCount;
+ support::ulittle32_t PriorityClass;
+ support::ulittle32_t Priority;
+ support::ulittle64_t EnvironmentBlock;
+ MemoryDescriptor Stack;
+ LocationDescriptor Context;
+};
+static_assert(sizeof(Thread) == 48, "");
+
+} // namespace minidump
+
+template <> struct DenseMapInfo<minidump::StreamType> {
+ static minidump::StreamType getEmptyKey() { return minidump::StreamType(-1); }
+
+ static minidump::StreamType getTombstoneKey() {
+ return minidump::StreamType(-2);
+ }
+
+ static unsigned getHashValue(minidump::StreamType Val) {
+ return DenseMapInfo<uint32_t>::getHashValue(static_cast<uint32_t>(Val));
+ }
+
+ static bool isEqual(minidump::StreamType LHS, minidump::StreamType RHS) {
+ return LHS == RHS;
+ }
+};
+
+} // namespace llvm
+
+#endif // LLVM_BINARYFORMAT_MINIDUMP_H
diff --git a/include/llvm/BinaryFormat/MinidumpConstants.def b/include/llvm/BinaryFormat/MinidumpConstants.def
new file mode 100644
index 000000000000..d4f13dd99217
--- /dev/null
+++ b/include/llvm/BinaryFormat/MinidumpConstants.def
@@ -0,0 +1,107 @@
+//===- MinidumpConstants.def - Iteration over minidump constants-*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#if !(defined HANDLE_MDMP_STREAM_TYPE || defined HANDLE_MDMP_ARCH || \
+ defined HANDLE_MDMP_PLATFORM)
+#error "Missing HANDLE_MDMP definition"
+#endif
+
+#ifndef HANDLE_MDMP_STREAM_TYPE
+#define HANDLE_MDMP_STREAM_TYPE(CODE, NAME)
+#endif
+
+#ifndef HANDLE_MDMP_ARCH
+#define HANDLE_MDMP_ARCH(CODE, NAME)
+#endif
+
+#ifndef HANDLE_MDMP_PLATFORM
+#define HANDLE_MDMP_PLATFORM(CODE, NAME)
+#endif
+
+HANDLE_MDMP_STREAM_TYPE(0x0003, ThreadList)
+HANDLE_MDMP_STREAM_TYPE(0x0004, ModuleList)
+HANDLE_MDMP_STREAM_TYPE(0x0005, MemoryList)
+HANDLE_MDMP_STREAM_TYPE(0x0006, Exception)
+HANDLE_MDMP_STREAM_TYPE(0x0007, SystemInfo)
+HANDLE_MDMP_STREAM_TYPE(0x0008, ThreadExList)
+HANDLE_MDMP_STREAM_TYPE(0x0009, Memory64List)
+HANDLE_MDMP_STREAM_TYPE(0x000a, CommentA)
+HANDLE_MDMP_STREAM_TYPE(0x000b, CommentW)
+HANDLE_MDMP_STREAM_TYPE(0x000c, HandleData)
+HANDLE_MDMP_STREAM_TYPE(0x000d, FunctionTable)
+HANDLE_MDMP_STREAM_TYPE(0x000e, UnloadedModuleList)
+HANDLE_MDMP_STREAM_TYPE(0x000f, MiscInfo)
+HANDLE_MDMP_STREAM_TYPE(0x0010, MemoryInfoList)
+HANDLE_MDMP_STREAM_TYPE(0x0011, ThreadInfoList)
+HANDLE_MDMP_STREAM_TYPE(0x0012, HandleOperationList)
+HANDLE_MDMP_STREAM_TYPE(0x0013, Token)
+HANDLE_MDMP_STREAM_TYPE(0x0014, JavascriptData)
+HANDLE_MDMP_STREAM_TYPE(0x0015, SystemMemoryInfo)
+HANDLE_MDMP_STREAM_TYPE(0x0016, ProcessVMCounters)
+// Breakpad extension types. 0x4767 = "Gg"
+HANDLE_MDMP_STREAM_TYPE(0x47670001, BreakpadInfo)
+HANDLE_MDMP_STREAM_TYPE(0x47670002, AssertionInfo)
+// These are additional minidump stream values which are specific to the linux
+// breakpad implementation.
+HANDLE_MDMP_STREAM_TYPE(0x47670003, LinuxCPUInfo) // /proc/cpuinfo
+HANDLE_MDMP_STREAM_TYPE(0x47670004, LinuxProcStatus) // /proc/$x/status
+HANDLE_MDMP_STREAM_TYPE(0x47670005, LinuxLSBRelease) // /etc/lsb-release
+HANDLE_MDMP_STREAM_TYPE(0x47670006, LinuxCMDLine) // /proc/$x/cmdline
+HANDLE_MDMP_STREAM_TYPE(0x47670007, LinuxEnviron) // /proc/$x/environ
+HANDLE_MDMP_STREAM_TYPE(0x47670008, LinuxAuxv) // /proc/$x/auxv
+HANDLE_MDMP_STREAM_TYPE(0x47670009, LinuxMaps) // /proc/$x/maps
+HANDLE_MDMP_STREAM_TYPE(0x4767000A, LinuxDSODebug)
+HANDLE_MDMP_STREAM_TYPE(0x4767000B, LinuxProcStat) // /proc/$x/stat
+HANDLE_MDMP_STREAM_TYPE(0x4767000C, LinuxProcUptime) // uptime
+HANDLE_MDMP_STREAM_TYPE(0x4767000D, LinuxProcFD) // /proc/$x/fd
+// Facebook-defined stream types
+HANDLE_MDMP_STREAM_TYPE(0xFACE1CA7, FacebookLogcat)
+HANDLE_MDMP_STREAM_TYPE(0xFACECAFA, FacebookAppCustomData)
+HANDLE_MDMP_STREAM_TYPE(0xFACECAFB, FacebookBuildID)
+HANDLE_MDMP_STREAM_TYPE(0xFACECAFC, FacebookAppVersionName)
+HANDLE_MDMP_STREAM_TYPE(0xFACECAFD, FacebookJavaStack)
+HANDLE_MDMP_STREAM_TYPE(0xFACECAFE, FacebookDalvikInfo)
+HANDLE_MDMP_STREAM_TYPE(0xFACECAFF, FacebookUnwindSymbols)
+HANDLE_MDMP_STREAM_TYPE(0xFACECB00, FacebookDumpErrorLog)
+HANDLE_MDMP_STREAM_TYPE(0xFACECCCC, FacebookAppStateLog)
+HANDLE_MDMP_STREAM_TYPE(0xFACEDEAD, FacebookAbortReason)
+HANDLE_MDMP_STREAM_TYPE(0xFACEE000, FacebookThreadName)
+
+HANDLE_MDMP_ARCH(0x0000, X86) // PROCESSOR_ARCHITECTURE_INTEL
+HANDLE_MDMP_ARCH(0x0001, MIPS) // PROCESSOR_ARCHITECTURE_MIPS
+HANDLE_MDMP_ARCH(0x0002, Alpha) // PROCESSOR_ARCHITECTURE_ALPHA
+HANDLE_MDMP_ARCH(0x0003, PPC) // PROCESSOR_ARCHITECTURE_PPC
+HANDLE_MDMP_ARCH(0x0004, SHX) // PROCESSOR_ARCHITECTURE_SHX (Super-H)
+HANDLE_MDMP_ARCH(0x0005, ARM) // PROCESSOR_ARCHITECTURE_ARM
+HANDLE_MDMP_ARCH(0x0006, IA64) // PROCESSOR_ARCHITECTURE_IA64
+HANDLE_MDMP_ARCH(0x0007, Alpha64) // PROCESSOR_ARCHITECTURE_ALPHA64
+HANDLE_MDMP_ARCH(0x0008, MSIL) // PROCESSOR_ARCHITECTURE_MSIL
+HANDLE_MDMP_ARCH(0x0009, AMD64) // PROCESSOR_ARCHITECTURE_AMD64
+HANDLE_MDMP_ARCH(0x000a, X86Win64) // PROCESSOR_ARCHITECTURE_IA32_ON_WIN64
+HANDLE_MDMP_ARCH(0x8001, SPARC) // Breakpad-defined value for SPARC
+HANDLE_MDMP_ARCH(0x8002, PPC64) // Breakpad-defined value for PPC64
+HANDLE_MDMP_ARCH(0x8003, ARM64) // Breakpad-defined value for ARM64
+HANDLE_MDMP_ARCH(0x8004, MIPS64) // Breakpad-defined value for MIPS64
+
+HANDLE_MDMP_PLATFORM(0x0000, Win32S) // Win32 on Windows 3.1
+HANDLE_MDMP_PLATFORM(0x0001, Win32Windows) // Windows 95-98-Me
+HANDLE_MDMP_PLATFORM(0x0002, Win32NT) // Windows NT, 2000+
+HANDLE_MDMP_PLATFORM(0x0003, Win32CE) // Windows CE, Windows Mobile, "Handheld"
+// Breakpad-defined values.
+HANDLE_MDMP_PLATFORM(0x8000, Unix) // Generic Unix-ish
+HANDLE_MDMP_PLATFORM(0x8101, MacOSX) // Mac OS X/Darwin
+HANDLE_MDMP_PLATFORM(0x8102, IOS) // iOS
+HANDLE_MDMP_PLATFORM(0x8201, Linux) // Linux
+HANDLE_MDMP_PLATFORM(0x8202, Solaris) // Solaris
+HANDLE_MDMP_PLATFORM(0x8203, Android) // Android
+HANDLE_MDMP_PLATFORM(0x8204, PS3) // PS3
+HANDLE_MDMP_PLATFORM(0x8205, NaCl) // Native Client (NaCl)
+
+#undef HANDLE_MDMP_STREAM_TYPE
+#undef HANDLE_MDMP_ARCH
+#undef HANDLE_MDMP_PLATFORM
diff --git a/include/llvm/BinaryFormat/MsgPack.def b/include/llvm/BinaryFormat/MsgPack.def
index 781b49f46aeb..7ad83ff21c42 100644
--- a/include/llvm/BinaryFormat/MsgPack.def
+++ b/include/llvm/BinaryFormat/MsgPack.def
@@ -1,9 +1,8 @@
//===- MsgPack.def - MessagePack definitions --------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/include/llvm/BinaryFormat/MsgPack.h b/include/llvm/BinaryFormat/MsgPack.h
index d431912a53e5..9fda14b21c71 100644
--- a/include/llvm/BinaryFormat/MsgPack.h
+++ b/include/llvm/BinaryFormat/MsgPack.h
@@ -1,9 +1,8 @@
//===-- MsgPack.h - MessagePack Constants -----------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/include/llvm/BinaryFormat/MsgPackDocument.h b/include/llvm/BinaryFormat/MsgPackDocument.h
new file mode 100644
index 000000000000..824ecc353207
--- /dev/null
+++ b/include/llvm/BinaryFormat/MsgPackDocument.h
@@ -0,0 +1,385 @@
+//===-- MsgPackDocument.h - MsgPack Document --------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// This file declares a class that exposes a simple in-memory representation
+/// of a document of MsgPack objects, that can be read from MsgPack, written to
+/// MsgPack, and inspected and modified in memory. This is intended to be a
+/// lighter-weight (in terms of memory allocations) replacement for
+/// MsgPackTypes.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_BINARYFORMAT_MSGPACKDOCUMENT_H
+#define LLVM_BINARYFORMAT_MSGPACKDOCUMENT_H
+
+#include "llvm/BinaryFormat/MsgPackReader.h"
+#include <map>
+
+namespace llvm {
+namespace msgpack {
+
+class ArrayDocNode;
+class Document;
+class MapDocNode;
+
+/// The kind of a DocNode and its owning Document.
+struct KindAndDocument {
+ Document *Doc;
+ Type Kind;
+};
+
+/// A node in a MsgPack Document. This is a simple copyable and
+/// passable-by-value type that does not own any memory.
+class DocNode {
+ friend Document;
+
+public:
+ typedef std::map<DocNode, DocNode> MapTy;
+ typedef std::vector<DocNode> ArrayTy;
+
+private:
+ // Using KindAndDocument allows us to squeeze Kind and a pointer to the
+ // owning Document into the same word. Having a pointer to the owning
+ // Document makes the API of DocNode more convenient, and allows its use in
+ // YAMLIO.
+ const KindAndDocument *KindAndDoc;
+
+protected:
+ // The union of different values.
+ union {
+ int64_t Int;
+ uint64_t UInt;
+ bool Bool;
+ double Float;
+ StringRef Raw;
+ ArrayTy *Array;
+ MapTy *Map;
+ };
+
+public:
+ DocNode() : KindAndDoc(nullptr) {}
+
+ // Type methods
+ bool isMap() const { return getKind() == Type::Map; }
+ bool isArray() const { return getKind() == Type::Array; }
+ bool isScalar() const { return !isMap() && !isArray(); }
+ bool isString() const { return getKind() == Type::String; }
+
+ // Accessors
+ bool isEmpty() const { return !KindAndDoc; }
+ Type getKind() const { return KindAndDoc->Kind; }
+ Document *getDocument() const { return KindAndDoc->Doc; }
+
+ int64_t &getInt() {
+ assert(getKind() == Type::Int);
+ return Int;
+ }
+
+ uint64_t &getUInt() {
+ assert(getKind() == Type::UInt);
+ return UInt;
+ }
+
+ bool &getBool() {
+ assert(getKind() == Type::Boolean);
+ return Bool;
+ }
+
+ double &getFloat() {
+ assert(getKind() == Type::Float);
+ return Float;
+ }
+
+ int64_t getInt() const {
+ assert(getKind() == Type::Int);
+ return Int;
+ }
+
+ uint64_t getUInt() const {
+ assert(getKind() == Type::UInt);
+ return UInt;
+ }
+
+ bool getBool() const {
+ assert(getKind() == Type::Boolean);
+ return Bool;
+ }
+
+ double getFloat() const {
+ assert(getKind() == Type::Float);
+ return Float;
+ }
+
+ StringRef getString() const {
+ assert(getKind() == Type::String);
+ return Raw;
+ }
+
+ /// Get an ArrayDocNode for an array node. If Convert, convert the node to an
+ /// array node if necessary.
+ ArrayDocNode &getArray(bool Convert = false) {
+ if (getKind() != Type::Array) {
+ assert(Convert);
+ convertToArray();
+ }
+ // This could be a static_cast, except ArrayDocNode is a forward reference.
+ return *reinterpret_cast<ArrayDocNode *>(this);
+ }
+
+ /// Get a MapDocNode for a map node. If Convert, convert the node to a map
+ /// node if necessary.
+ MapDocNode &getMap(bool Convert = false) {
+ if (getKind() != Type::Map) {
+ assert(Convert);
+ convertToMap();
+ }
+ // This could be a static_cast, except MapDocNode is a forward reference.
+ return *reinterpret_cast<MapDocNode *>(this);
+ }
+
+ /// Comparison operator, used for map keys.
+ friend bool operator<(const DocNode &Lhs, const DocNode &Rhs) {
+ // This has to cope with one or both of the nodes being default-constructed,
+ // such that KindAndDoc is not set.
+ if (Lhs.KindAndDoc != Rhs.KindAndDoc) {
+ if (!Rhs.KindAndDoc)
+ return false;
+ if (!Lhs.KindAndDoc)
+ return true;
+ return (unsigned)Lhs.getKind() < (unsigned)Rhs.getKind();
+ }
+ switch (Lhs.getKind()) {
+ case Type::Int:
+ return Lhs.Int < Rhs.Int;
+ case Type::UInt:
+ return Lhs.UInt < Rhs.UInt;
+ case Type::Nil:
+ return false;
+ case Type::Boolean:
+ return Lhs.Bool < Rhs.Bool;
+ case Type::Float:
+ return Lhs.Float < Rhs.Float;
+ case Type::String:
+ case Type::Binary:
+ return Lhs.Raw < Rhs.Raw;
+ default:
+ llvm_unreachable("bad map key type");
+ }
+ }
+
+ /// Equality operator
+ friend bool operator==(const DocNode &Lhs, const DocNode &Rhs) {
+ return !(Lhs < Rhs) && !(Rhs < Lhs);
+ }
+
+ /// Convert this node to a string, assuming it is scalar.
+ std::string toString() const;
+
+ /// Convert the StringRef and use it to set this DocNode (assuming scalar). If
+ /// it is a string, copy the string into the Document's strings list so we do
+ /// not rely on S having a lifetime beyond this call. Tag is "" or a YAML tag.
+ StringRef fromString(StringRef S, StringRef Tag = "");
+
+private:
+ // Private constructor setting KindAndDoc, used by methods in Document.
+ DocNode(const KindAndDocument *KindAndDoc) : KindAndDoc(KindAndDoc) {}
+
+ void convertToArray();
+ void convertToMap();
+};
+
+/// A DocNode that is a map.
+class MapDocNode : public DocNode {
+public:
+ MapDocNode() {}
+ MapDocNode(DocNode &N) : DocNode(N) { assert(getKind() == Type::Map); }
+
+ // Map access methods.
+ size_t size() const { return Map->size(); }
+ bool empty() const { return !size(); }
+ MapTy::iterator begin() { return Map->begin(); }
+ MapTy::iterator end() { return Map->end(); }
+ MapTy::iterator find(DocNode Key) { return Map->find(Key); }
+ MapTy::iterator find(StringRef Key);
+ /// Member access. The string data must remain valid for the lifetime of the
+ /// Document.
+ DocNode &operator[](StringRef S);
+ /// Member access.
+ DocNode &operator[](DocNode Key);
+};
+
+/// A DocNode that is an array.
+class ArrayDocNode : public DocNode {
+public:
+ ArrayDocNode() {}
+ ArrayDocNode(DocNode &N) : DocNode(N) { assert(getKind() == Type::Array); }
+
+ // Array access methods.
+ size_t size() const { return Array->size(); }
+ bool empty() const { return !size(); }
+ ArrayTy::iterator begin() { return Array->begin(); }
+ ArrayTy::iterator end() { return Array->end(); }
+ void push_back(DocNode N) {
+ assert(N.getDocument() == getDocument());
+ Array->push_back(N);
+ }
+
+ /// Element access. This extends the array if necessary.
+ DocNode &operator[](size_t Index);
+};
+
+/// Simple in-memory representation of a document of msgpack objects with
+/// ability to find and create array and map elements. Does not currently cope
+/// with any extension types.
+class Document {
+ // Maps, arrays and strings used by nodes in the document. No attempt is made
+ // to free unused ones.
+ std::vector<std::unique_ptr<DocNode::MapTy>> Maps;
+ std::vector<std::unique_ptr<DocNode::ArrayTy>> Arrays;
+ std::vector<std::unique_ptr<char[]>> Strings;
+
+ // The root node of the document.
+ DocNode Root;
+
+ // The KindAndDocument structs pointed to by nodes in the document.
+ KindAndDocument KindAndDocs[size_t(Type::Extension) + 1];
+
+ // Whether YAML output uses hex for UInt.
+ bool HexMode = false;
+
+public:
+ Document() {
+ clear();
+ for (unsigned T = 0; T != size_t(Type::Extension) + 1; ++T)
+ KindAndDocs[T] = {this, Type(T)};
+ }
+
+ /// Get ref to the document's root element.
+ DocNode &getRoot() { return Root; }
+
+ /// Restore the Document to an empty state.
+ void clear() { getRoot() = getNode(); }
+
+ /// Create a nil node associated with this Document.
+ DocNode getNode() {
+ auto N = DocNode(&KindAndDocs[size_t(Type::Nil)]);
+ return N;
+ }
+
+ /// Create an Int node associated with this Document.
+ DocNode getNode(int64_t V) {
+ auto N = DocNode(&KindAndDocs[size_t(Type::Int)]);
+ N.Int = V;
+ return N;
+ }
+
+ /// Create an Int node associated with this Document.
+ DocNode getNode(int V) {
+ auto N = DocNode(&KindAndDocs[size_t(Type::Int)]);
+ N.Int = V;
+ return N;
+ }
+
+ /// Create a UInt node associated with this Document.
+ DocNode getNode(uint64_t V) {
+ auto N = DocNode(&KindAndDocs[size_t(Type::UInt)]);
+ N.UInt = V;
+ return N;
+ }
+
+ /// Create a UInt node associated with this Document.
+ DocNode getNode(unsigned V) {
+ auto N = DocNode(&KindAndDocs[size_t(Type::UInt)]);
+ N.UInt = V;
+ return N;
+ }
+
+ /// Create a Boolean node associated with this Document.
+ DocNode getNode(bool V) {
+ auto N = DocNode(&KindAndDocs[size_t(Type::Boolean)]);
+ N.Bool = V;
+ return N;
+ }
+
+ /// Create a Float node associated with this Document.
+ DocNode getNode(double V) {
+ auto N = DocNode(&KindAndDocs[size_t(Type::Float)]);
+ N.Float = V;
+ return N;
+ }
+
+ /// Create a String node associated with this Document. If !Copy, the passed
+ /// string must remain valid for the lifetime of the Document.
+ DocNode getNode(StringRef V, bool Copy = false) {
+ if (Copy)
+ V = addString(V);
+ auto N = DocNode(&KindAndDocs[size_t(Type::String)]);
+ N.Raw = V;
+ return N;
+ }
+
+ /// Create a String node associated with this Document. If !Copy, the passed
+ /// string must remain valid for the lifetime of the Document.
+ DocNode getNode(const char *V, bool Copy = false) {
+ return getNode(StringRef(V), Copy);
+ }
+
+ /// Create an empty Map node associated with this Document.
+ MapDocNode getMapNode() {
+ auto N = DocNode(&KindAndDocs[size_t(Type::Map)]);
+ Maps.push_back(std::unique_ptr<DocNode::MapTy>(new DocNode::MapTy));
+ N.Map = Maps.back().get();
+ return N.getMap();
+ }
+
+ /// Create an empty Array node associated with this Document.
+ ArrayDocNode getArrayNode() {
+ auto N = DocNode(&KindAndDocs[size_t(Type::Array)]);
+ Arrays.push_back(std::unique_ptr<DocNode::ArrayTy>(new DocNode::ArrayTy));
+ N.Array = Arrays.back().get();
+ return N.getArray();
+ }
+
+ /// Read a MsgPack document from a binary MsgPack blob.
+ /// The blob data must remain valid for the lifetime of this Document (because
+ /// a string object in the document contains a StringRef into the original
+ /// blob).
+ /// If Multi, then this sets root to an array and adds top-level objects to
+ /// it. If !Multi, then it only reads a single top-level object, even if there
+ /// are more, and sets root to that.
+ /// Returns false if failed due to illegal format.
+ bool readFromBlob(StringRef Blob, bool Multi);
+
+ /// Write a MsgPack document to a binary MsgPack blob.
+ void writeToBlob(std::string &Blob);
+
+ /// Copy a string into the Document's strings list, and return the copy that
+ /// is owned by the Document.
+ StringRef addString(StringRef S) {
+ Strings.push_back(std::unique_ptr<char[]>(new char[S.size()]));
+ memcpy(&Strings.back()[0], S.data(), S.size());
+ return StringRef(&Strings.back()[0], S.size());
+ }
+
+ /// Set whether YAML output uses hex for UInt. Default off.
+ void setHexMode(bool Val = true) { HexMode = Val; }
+
+ /// Get Hexmode flag.
+ bool getHexMode() const { return HexMode; }
+
+ /// Convert MsgPack Document to YAML text.
+ void toYAML(raw_ostream &OS);
+
+ /// Read YAML text into the MsgPack document. Returns false on failure.
+ bool fromYAML(StringRef S);
+};
+
+} // namespace msgpack
+} // namespace llvm
+
+#endif // LLVM_BINARYFORMAT_MSGPACKDOCUMENT_H
diff --git a/include/llvm/BinaryFormat/MsgPackReader.h b/include/llvm/BinaryFormat/MsgPackReader.h
index 511c31407455..2d332f531b23 100644
--- a/include/llvm/BinaryFormat/MsgPackReader.h
+++ b/include/llvm/BinaryFormat/MsgPackReader.h
@@ -1,9 +1,8 @@
//===- MsgPackReader.h - Simple MsgPack reader ------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/include/llvm/BinaryFormat/MsgPackTypes.h b/include/llvm/BinaryFormat/MsgPackTypes.h
deleted file mode 100644
index f96cd4c338fd..000000000000
--- a/include/llvm/BinaryFormat/MsgPackTypes.h
+++ /dev/null
@@ -1,372 +0,0 @@
-//===- MsgPackTypes.h - MsgPack Types ---------------------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// This is a data structure for representing MessagePack "documents", with
-/// methods to go to and from MessagePack. The types also specialize YAMLIO
-/// traits in order to go to and from YAML.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ADT/Optional.h"
-#include "llvm/BinaryFormat/MsgPackReader.h"
-#include "llvm/BinaryFormat/MsgPackWriter.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/YAMLTraits.h"
-#include <vector>
-
-#ifndef LLVM_BINARYFORMAT_MSGPACKTYPES_H
-#define LLVM_BINARYFORMAT_MSGPACKTYPES_H
-
-namespace llvm {
-namespace msgpack {
-
-class Node;
-
-/// Short-hand for a Node pointer.
-using NodePtr = std::shared_ptr<Node>;
-
-/// Short-hand for an Optional Node pointer.
-using OptNodePtr = Optional<NodePtr>;
-
-/// Abstract base-class which can be any MessagePack type.
-class Node {
-public:
- enum NodeKind {
- NK_Scalar,
- NK_Array,
- NK_Map,
- };
-
-private:
- virtual void anchor() = 0;
- const NodeKind Kind;
-
- static Expected<OptNodePtr> readArray(Reader &MPReader, size_t Length);
- static Expected<OptNodePtr> readMap(Reader &MPReader, size_t Length);
-
-public:
- NodeKind getKind() const { return Kind; }
-
- /// Construct a Node. Used by derived classes to track kind information.
- Node(NodeKind Kind) : Kind(Kind) {}
-
- virtual ~Node() = default;
-
- /// Read from a MessagePack reader \p MPReader, returning an error if one is
- /// encountered, or None if \p MPReader is at the end of stream, or some Node
- /// pointer if some type is read.
- static Expected<OptNodePtr> read(Reader &MPReader);
-
- /// Write to a MessagePack writer \p MPWriter.
- virtual void write(Writer &MPWriter) = 0;
-};
-
-/// A MessagePack scalar.
-class ScalarNode : public Node {
-public:
- enum ScalarKind {
- SK_Int,
- SK_UInt,
- SK_Nil,
- SK_Boolean,
- SK_Float,
- SK_String,
- SK_Binary,
- };
-
-private:
- void anchor() override;
-
- void destroy();
-
- ScalarKind SKind;
-
- union {
- int64_t IntValue;
- uint64_t UIntValue;
- bool BoolValue;
- double FloatValue;
- std::string StringValue;
- };
-
-public:
- /// Construct an Int ScalarNode.
- ScalarNode(int64_t IntValue);
- /// Construct an Int ScalarNode.
- ScalarNode(int32_t IntValue);
- /// Construct an UInt ScalarNode.
- ScalarNode(uint64_t UIntValue);
- /// Construct an UInt ScalarNode.
- ScalarNode(uint32_t UIntValue);
- /// Construct a Nil ScalarNode.
- ScalarNode();
- /// Construct a Boolean ScalarNode.
- ScalarNode(bool BoolValue);
- /// Construct a Float ScalarNode.
- ScalarNode(double FloatValue);
- /// Construct a String ScalarNode.
- ScalarNode(StringRef StringValue);
- /// Construct a String ScalarNode.
- ScalarNode(const char *StringValue);
- /// Construct a String ScalarNode.
- ScalarNode(std::string &&StringValue);
- /// Construct a Binary ScalarNode.
- ScalarNode(MemoryBufferRef BinaryValue);
-
- ~ScalarNode();
-
- ScalarNode &operator=(const ScalarNode &RHS) = delete;
- /// A ScalarNode can only be move assigned.
- ScalarNode &operator=(ScalarNode &&RHS);
-
- /// Change the kind of this ScalarNode, zero initializing it to the new type.
- void setScalarKind(ScalarKind SKind) {
- switch (SKind) {
- case SK_Int:
- *this = int64_t(0);
- break;
- case SK_UInt:
- *this = uint64_t(0);
- break;
- case SK_Boolean:
- *this = false;
- break;
- case SK_Float:
- *this = 0.0;
- break;
- case SK_String:
- *this = StringRef();
- break;
- case SK_Binary:
- *this = MemoryBufferRef("", "");
- break;
- case SK_Nil:
- *this = ScalarNode();
- break;
- }
- }
-
- /// Get the current kind of ScalarNode.
- ScalarKind getScalarKind() { return SKind; }
-
- /// Get the value of an Int scalar.
- ///
- /// \warning Assumes getScalarKind() == SK_Int
- int64_t getInt() {
- assert(SKind == SK_Int);
- return IntValue;
- }
-
- /// Get the value of a UInt scalar.
- ///
- /// \warning Assumes getScalarKind() == SK_UInt
- uint64_t getUInt() {
- assert(SKind == SK_UInt);
- return UIntValue;
- }
-
- /// Get the value of an Boolean scalar.
- ///
- /// \warning Assumes getScalarKind() == SK_Boolean
- bool getBool() {
- assert(SKind == SK_Boolean);
- return BoolValue;
- }
-
- /// Get the value of an Float scalar.
- ///
- /// \warning Assumes getScalarKind() == SK_Float
- double getFloat() {
- assert(SKind == SK_Float);
- return FloatValue;
- }
-
- /// Get the value of a String scalar.
- ///
- /// \warning Assumes getScalarKind() == SK_String
- StringRef getString() {
- assert(SKind == SK_String);
- return StringValue;
- }
-
- /// Get the value of a Binary scalar.
- ///
- /// \warning Assumes getScalarKind() == SK_Binary
- StringRef getBinary() {
- assert(SKind == SK_Binary);
- return StringValue;
- }
-
- static bool classof(const Node *N) { return N->getKind() == NK_Scalar; }
-
- void write(Writer &MPWriter) override;
-
- /// Parse a YAML scalar of the current ScalarKind from \p ScalarStr.
- ///
- /// \returns An empty string on success, otherwise an error message.
- StringRef inputYAML(StringRef ScalarStr);
-
- /// Output a YAML scalar of the current ScalarKind into \p OS.
- void outputYAML(raw_ostream &OS) const;
-
- /// Determine which YAML quoting type the current value would need when
- /// output.
- yaml::QuotingType mustQuoteYAML(StringRef ScalarStr) const;
-
- /// Get the YAML tag for the current ScalarKind.
- StringRef getYAMLTag() const;
-
- /// Flag which affects how the type handles YAML tags when reading and
- /// writing.
- ///
- /// When false, tags are used when reading and writing. When reading, the tag
- /// is used to decide the ScalarKind before parsing. When writing, the tag is
- /// output along with the value.
- ///
- /// When true, tags are ignored when reading and writing. When reading, the
- /// ScalarKind is always assumed to be String. When writing, the tag is not
- /// output.
- bool IgnoreTag = false;
-
- static const char *IntTag;
- static const char *NilTag;
- static const char *BooleanTag;
- static const char *FloatTag;
- static const char *StringTag;
- static const char *BinaryTag;
-};
-
-class ArrayNode : public Node, public std::vector<NodePtr> {
- void anchor() override;
-
-public:
- ArrayNode() : Node(NK_Array) {}
- static bool classof(const Node *N) { return N->getKind() == NK_Array; }
-
- void write(Writer &MPWriter) override {
- MPWriter.writeArraySize(this->size());
- for (auto &N : *this)
- N->write(MPWriter);
- }
-};
-
-class MapNode : public Node, public StringMap<NodePtr> {
- void anchor() override;
-
-public:
- MapNode() : Node(NK_Map) {}
- static bool classof(const Node *N) { return N->getKind() == NK_Map; }
-
- void write(Writer &MPWriter) override {
- MPWriter.writeMapSize(this->size());
- for (auto &N : *this) {
- MPWriter.write(N.first());
- N.second->write(MPWriter);
- }
- }
-};
-
-} // end namespace msgpack
-
-namespace yaml {
-
-template <> struct PolymorphicTraits<msgpack::NodePtr> {
- static NodeKind getKind(const msgpack::NodePtr &N) {
- if (isa<msgpack::ScalarNode>(*N))
- return NodeKind::Scalar;
- if (isa<msgpack::MapNode>(*N))
- return NodeKind::Map;
- if (isa<msgpack::ArrayNode>(*N))
- return NodeKind::Sequence;
- llvm_unreachable("NodeKind not supported");
- }
- static msgpack::ScalarNode &getAsScalar(msgpack::NodePtr &N) {
- if (!N || !isa<msgpack::ScalarNode>(*N))
- N.reset(new msgpack::ScalarNode());
- return *cast<msgpack::ScalarNode>(N.get());
- }
- static msgpack::MapNode &getAsMap(msgpack::NodePtr &N) {
- if (!N || !isa<msgpack::MapNode>(*N))
- N.reset(new msgpack::MapNode());
- return *cast<msgpack::MapNode>(N.get());
- }
- static msgpack::ArrayNode &getAsSequence(msgpack::NodePtr &N) {
- if (!N || !isa<msgpack::ArrayNode>(*N))
- N.reset(new msgpack::ArrayNode());
- return *cast<msgpack::ArrayNode>(N.get());
- }
-};
-
-template <> struct TaggedScalarTraits<msgpack::ScalarNode> {
- static void output(const msgpack::ScalarNode &S, void *Ctxt,
- raw_ostream &ScalarOS, raw_ostream &TagOS) {
- if (!S.IgnoreTag)
- TagOS << S.getYAMLTag();
- S.outputYAML(ScalarOS);
- }
-
- static StringRef input(StringRef ScalarStr, StringRef Tag, void *Ctxt,
- msgpack::ScalarNode &S) {
- if (Tag == msgpack::ScalarNode::IntTag) {
- S.setScalarKind(msgpack::ScalarNode::SK_UInt);
- if (S.inputYAML(ScalarStr) == StringRef())
- return StringRef();
- S.setScalarKind(msgpack::ScalarNode::SK_Int);
- return S.inputYAML(ScalarStr);
- }
-
- if (S.IgnoreTag || Tag == msgpack::ScalarNode::StringTag ||
- Tag == "tag:yaml.org,2002:str")
- S.setScalarKind(msgpack::ScalarNode::SK_String);
- else if (Tag == msgpack::ScalarNode::NilTag)
- S.setScalarKind(msgpack::ScalarNode::SK_Nil);
- else if (Tag == msgpack::ScalarNode::BooleanTag)
- S.setScalarKind(msgpack::ScalarNode::SK_Boolean);
- else if (Tag == msgpack::ScalarNode::FloatTag)
- S.setScalarKind(msgpack::ScalarNode::SK_Float);
- else if (Tag == msgpack::ScalarNode::StringTag)
- S.setScalarKind(msgpack::ScalarNode::SK_String);
- else if (Tag == msgpack::ScalarNode::BinaryTag)
- S.setScalarKind(msgpack::ScalarNode::SK_Binary);
- else
- return "Unsupported messagepack tag";
-
- return S.inputYAML(ScalarStr);
- }
-
- static QuotingType mustQuote(const msgpack::ScalarNode &S, StringRef Str) {
- return S.mustQuoteYAML(Str);
- }
-};
-
-template <> struct CustomMappingTraits<msgpack::MapNode> {
- static void inputOne(IO &IO, StringRef Key, msgpack::MapNode &M) {
- IO.mapRequired(Key.str().c_str(), M[Key]);
- }
- static void output(IO &IO, msgpack::MapNode &M) {
- for (auto &N : M)
- IO.mapRequired(N.getKey().str().c_str(), N.getValue());
- }
-};
-
-template <> struct SequenceTraits<msgpack::ArrayNode> {
- static size_t size(IO &IO, msgpack::ArrayNode &A) { return A.size(); }
- static msgpack::NodePtr &element(IO &IO, msgpack::ArrayNode &A,
- size_t Index) {
- if (Index >= A.size())
- A.resize(Index + 1);
- return A[Index];
- }
-};
-
-} // end namespace yaml
-} // end namespace llvm
-
-#endif // LLVM_BINARYFORMAT_MSGPACKTYPES_H
diff --git a/include/llvm/BinaryFormat/MsgPackWriter.h b/include/llvm/BinaryFormat/MsgPackWriter.h
index 98af422c9f19..3b610b774f77 100644
--- a/include/llvm/BinaryFormat/MsgPackWriter.h
+++ b/include/llvm/BinaryFormat/MsgPackWriter.h
@@ -1,9 +1,8 @@
//===- MsgPackWriter.h - Simple MsgPack writer ------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/include/llvm/BinaryFormat/Wasm.h b/include/llvm/BinaryFormat/Wasm.h
index d9f0f94b298d..0f22bfe610c6 100644
--- a/include/llvm/BinaryFormat/Wasm.h
+++ b/include/llvm/BinaryFormat/Wasm.h
@@ -1,9 +1,8 @@
//===- Wasm.h - Wasm object file format -------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -43,6 +42,17 @@ struct WasmDylinkInfo {
std::vector<StringRef> Needed; // Shared library depenedencies
};
+struct WasmProducerInfo {
+ std::vector<std::pair<std::string, std::string>> Languages;
+ std::vector<std::pair<std::string, std::string>> Tools;
+ std::vector<std::pair<std::string, std::string>> SDKs;
+};
+
+struct WasmFeatureEntry {
+ uint8_t Prefix;
+ std::string Name;
+};
+
struct WasmExport {
StringRef Name;
uint8_t Kind;
@@ -126,12 +136,13 @@ struct WasmFunction {
};
struct WasmDataSegment {
- uint32_t MemoryIndex;
- WasmInitExpr Offset;
+ uint32_t InitFlags;
+ uint32_t MemoryIndex; // present if InitFlags & WASM_SEGMENT_HAS_MEMINDEX
+ WasmInitExpr Offset; // present if InitFlags & WASM_SEGMENT_IS_PASSIVE == 0
ArrayRef<uint8_t> Content;
StringRef Name; // from the "segment info" section
uint32_t Alignment;
- uint32_t Flags;
+ uint32_t LinkerFlags;
uint32_t Comdat; // from the "comdat info" section
};
@@ -165,7 +176,8 @@ struct WasmSymbolInfo {
StringRef Name;
uint8_t Kind;
uint32_t Flags;
- StringRef Module; // For undefined symbols the module name of the import
+ StringRef ImportModule; // For undefined symbols the module of the import
+ StringRef ImportName; // For undefined symbols the name of the import
union {
// For function or global symbols, the index in function or global index
// space.
@@ -212,7 +224,7 @@ enum : unsigned {
WASM_TYPE_F64 = 0x7C,
WASM_TYPE_V128 = 0x7B,
WASM_TYPE_FUNCREF = 0x70,
- WASM_TYPE_EXCEPT_REF = 0x68,
+ WASM_TYPE_EXNREF = 0x68,
WASM_TYPE_FUNC = 0x60,
WASM_TYPE_NORESULT = 0x40, // for blocks with no result values
};
@@ -229,11 +241,19 @@ enum : unsigned {
// Opcodes used in initializer expressions.
enum : unsigned {
WASM_OPCODE_END = 0x0b,
+ WASM_OPCODE_CALL = 0x10,
+ WASM_OPCODE_LOCAL_GET = 0x20,
WASM_OPCODE_GLOBAL_GET = 0x23,
+ WASM_OPCODE_GLOBAL_SET = 0x24,
+ WASM_OPCODE_I32_STORE = 0x36,
WASM_OPCODE_I32_CONST = 0x41,
WASM_OPCODE_I64_CONST = 0x42,
WASM_OPCODE_F32_CONST = 0x43,
WASM_OPCODE_F64_CONST = 0x44,
+ WASM_OPCODE_I32_ADD = 0x6a,
+ WASM_OPCODE_MISC_PREFIX = 0xfc,
+ WASM_OPCODE_MEMORY_INIT = 0x08,
+ WASM_OPCODE_DATA_DROP = 0x09,
};
enum : unsigned {
@@ -241,6 +261,18 @@ enum : unsigned {
WASM_LIMITS_FLAG_IS_SHARED = 0x2,
};
+enum : unsigned {
+ WASM_SEGMENT_IS_PASSIVE = 0x01,
+ WASM_SEGMENT_HAS_MEMINDEX = 0x02,
+};
+
+// Feature policy prefixes used in the custom "target_features" section
+enum : uint8_t {
+ WASM_FEATURE_PREFIX_USED = '+',
+ WASM_FEATURE_PREFIX_REQUIRED = '=',
+ WASM_FEATURE_PREFIX_DISALLOWED = '-',
+};
+
// Kind codes used in the custom "name" section
enum : unsigned {
WASM_NAMES_FUNCTION = 0x1,
@@ -284,6 +316,8 @@ const unsigned WASM_SYMBOL_BINDING_LOCAL = 0x2;
const unsigned WASM_SYMBOL_VISIBILITY_DEFAULT = 0x0;
const unsigned WASM_SYMBOL_VISIBILITY_HIDDEN = 0x4;
const unsigned WASM_SYMBOL_UNDEFINED = 0x10;
+const unsigned WASM_SYMBOL_EXPORTED = 0x20;
+const unsigned WASM_SYMBOL_EXPLICIT_NAME = 0x40;
#define WASM_RELOC(name, value) name = value,
@@ -300,17 +334,17 @@ enum class ValType {
F32 = WASM_TYPE_F32,
F64 = WASM_TYPE_F64,
V128 = WASM_TYPE_V128,
- EXCEPT_REF = WASM_TYPE_EXCEPT_REF,
+ EXNREF = WASM_TYPE_EXNREF,
};
struct WasmSignature {
- SmallVector<wasm::ValType, 1> Returns;
- SmallVector<wasm::ValType, 4> Params;
+ SmallVector<ValType, 1> Returns;
+ SmallVector<ValType, 4> Params;
// Support empty and tombstone instances, needed by DenseMap.
enum { Plain, Empty, Tombstone } State = Plain;
- WasmSignature(SmallVector<wasm::ValType, 1> &&InReturns,
- SmallVector<wasm::ValType, 4> &&InParams)
+ WasmSignature(SmallVector<ValType, 1> &&InReturns,
+ SmallVector<ValType, 4> &&InParams)
: Returns(InReturns), Params(InParams) {}
WasmSignature() = default;
};
@@ -333,8 +367,9 @@ inline bool operator!=(const WasmGlobalType &LHS, const WasmGlobalType &RHS) {
return !(LHS == RHS);
}
-std::string toString(wasm::WasmSymbolType type);
+std::string toString(WasmSymbolType type);
std::string relocTypetoString(uint32_t type);
+bool relocTypeHasAddend(uint32_t type);
} // end namespace wasm
} // end namespace llvm
diff --git a/include/llvm/BinaryFormat/WasmRelocs.def b/include/llvm/BinaryFormat/WasmRelocs.def
index b3a08e70c1d5..00dacf72abb0 100644
--- a/include/llvm/BinaryFormat/WasmRelocs.def
+++ b/include/llvm/BinaryFormat/WasmRelocs.def
@@ -2,14 +2,16 @@
#error "WASM_RELOC must be defined"
#endif
-WASM_RELOC(R_WEBASSEMBLY_FUNCTION_INDEX_LEB, 0)
-WASM_RELOC(R_WEBASSEMBLY_TABLE_INDEX_SLEB, 1)
-WASM_RELOC(R_WEBASSEMBLY_TABLE_INDEX_I32, 2)
-WASM_RELOC(R_WEBASSEMBLY_MEMORY_ADDR_LEB, 3)
-WASM_RELOC(R_WEBASSEMBLY_MEMORY_ADDR_SLEB, 4)
-WASM_RELOC(R_WEBASSEMBLY_MEMORY_ADDR_I32, 5)
-WASM_RELOC(R_WEBASSEMBLY_TYPE_INDEX_LEB, 6)
-WASM_RELOC(R_WEBASSEMBLY_GLOBAL_INDEX_LEB, 7)
-WASM_RELOC(R_WEBASSEMBLY_FUNCTION_OFFSET_I32, 8)
-WASM_RELOC(R_WEBASSEMBLY_SECTION_OFFSET_I32, 9)
-WASM_RELOC(R_WEBASSEMBLY_EVENT_INDEX_LEB, 10)
+WASM_RELOC(R_WASM_FUNCTION_INDEX_LEB, 0)
+WASM_RELOC(R_WASM_TABLE_INDEX_SLEB, 1)
+WASM_RELOC(R_WASM_TABLE_INDEX_I32, 2)
+WASM_RELOC(R_WASM_MEMORY_ADDR_LEB, 3)
+WASM_RELOC(R_WASM_MEMORY_ADDR_SLEB, 4)
+WASM_RELOC(R_WASM_MEMORY_ADDR_I32, 5)
+WASM_RELOC(R_WASM_TYPE_INDEX_LEB, 6)
+WASM_RELOC(R_WASM_GLOBAL_INDEX_LEB, 7)
+WASM_RELOC(R_WASM_FUNCTION_OFFSET_I32, 8)
+WASM_RELOC(R_WASM_SECTION_OFFSET_I32, 9)
+WASM_RELOC(R_WASM_EVENT_INDEX_LEB, 10)
+WASM_RELOC(R_WASM_MEMORY_ADDR_REL_SLEB, 11)
+WASM_RELOC(R_WASM_TABLE_INDEX_REL_SLEB, 12)
diff --git a/include/llvm/BinaryFormat/XCOFF.h b/include/llvm/BinaryFormat/XCOFF.h
new file mode 100644
index 000000000000..7774ab3ed24a
--- /dev/null
+++ b/include/llvm/BinaryFormat/XCOFF.h
@@ -0,0 +1,145 @@
+//===-- llvm/BinaryFormat/XCOFF.h - The XCOFF file format -------*- C++/-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines manifest constants for the XCOFF object file format.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_BINARYFORMAT_XCOFF_H
+#define LLVM_BINARYFORMAT_XCOFF_H
+
+#include <cstdint>
+
+namespace llvm {
+namespace XCOFF {
+
+// Constants used in the XCOFF definition.
+enum { SectionNameSize = 8, SymbolNameSize = 8 };
+enum ReservedSectionNum { N_DEBUG = -2, N_ABS = -1, N_UNDEF = 0 };
+
+// x_smclas field of x_csect from system header: /usr/include/syms.h
+/// Storage Mapping Class definitions.
+enum StorageMappingClass {
+ // READ ONLY CLASSES
+ XMC_PR = 0, ///< Program Code
+ XMC_RO = 1, ///< Read Only Constant
+ XMC_DB = 2, ///< Debug Dictionary Table
+ XMC_GL = 6, ///< Global Linkage (Interfile Interface Code)
+ XMC_XO = 7, ///< Extended Operation (Pseudo Machine Instruction)
+ XMC_SV = 8, ///< Supervisor Call (32-bit process only)
+ XMC_SV64 = 17, ///< Supervisor Call for 64-bit process
+ XMC_SV3264 = 18, ///< Supervisor Call for both 32- and 64-bit processes
+ XMC_TI = 12, ///< Traceback Index csect
+ XMC_TB = 13, ///< Traceback Table csect
+
+ // READ WRITE CLASSES
+ XMC_RW = 5, ///< Read Write Data
+ XMC_TC0 = 15, ///< TOC Anchor for TOC Addressability
+ XMC_TC = 3, ///< General TOC item
+ XMC_TD = 16, ///< Scalar data item in the TOC
+ XMC_DS = 10, ///< Descriptor csect
+ XMC_UA = 4, ///< Unclassified - Treated as Read Write
+ XMC_BS = 9, ///< BSS class (uninitialized static internal)
+ XMC_UC = 11, ///< Un-named Fortran Common
+
+ XMC_TL = 20, ///< Initialized thread-local variable
+ XMC_UL = 21, ///< Uninitialized thread-local variable
+ XMC_TE = 22 ///< Symbol mapped at the end of TOC
+};
+
+// Flags for defining the section type. Used for the s_flags field of
+// the section header structure. Defined in the system header `scnhdr.h`.
+enum SectionTypeFlags {
+ STYP_PAD = 0x0008,
+ STYP_DWARF = 0x0010,
+ STYP_TEXT = 0x0020,
+ STYP_DATA = 0x0040,
+ STYP_BSS = 0x0080,
+ STYP_EXCEPT = 0x0100,
+ STYP_INFO = 0x0200,
+ STYP_TDATA = 0x0400,
+ STYP_TBSS = 0x0800,
+ STYP_LOADER = 0x1000,
+ STYP_DEBUG = 0x2000,
+ STYP_TYPCHK = 0x4000,
+ STYP_OVRFLO = 0x8000
+};
+
+// STORAGE CLASSES, n_sclass field of syment.
+// The values come from `storclass.h` and `dbxstclass.h`.
+enum StorageClass : uint8_t {
+ // Storage classes used for symbolic debugging symbols.
+ C_FILE = 103, // File name
+ C_BINCL = 108, // Beginning of include file
+ C_EINCL = 109, // Ending of include file
+ C_GSYM = 128, // Global variable
+ C_STSYM = 133, // Statically allocated symbol
+ C_BCOMM = 135, // Beginning of common block
+ C_ECOMM = 137, // End of common block
+ C_ENTRY = 141, // Alternate entry
+ C_BSTAT = 143, // Beginning of static block
+ C_ESTAT = 144, // End of static block
+ C_GTLS = 145, // Global thread-local variable
+ C_STTLS = 146, // Static thread-local variable
+
+ // Storage classes used for DWARF symbols.
+ C_DWARF = 112, // DWARF section symbol
+
+ // Storage classes used for absolute symbols.
+ C_LSYM = 129, // Automatic variable allocated on stack
+ C_PSYM = 130, // Argument to subroutine allocated on stack
+ C_RSYM = 131, // Register variable
+ C_RPSYM = 132, // Argument to function or procedure stored in register
+ C_ECOML = 136, // Local member of common block
+ C_FUN = 142, // Function or procedure
+
+ // Storage classes used for undefined external symbols or
+ // symbols of general sections.
+ C_EXT = 2, // External symbol
+ C_WEAKEXT = 111, // Weak external symbol
+
+ // Storage classes used for symbols of general sections.
+ C_NULL = 0,
+ C_STAT = 3, // Static
+ C_BLOCK = 100, // ".bb" or ".eb"
+ C_FCN = 101, // ".bf" or ".ef"
+ C_HIDEXT = 107, // Un-named external symbol
+ C_INFO = 110, // Comment string in .info section
+ C_DECL = 140, // Declaration of object (type)
+
+ // Storage classes - Obsolete/Undocumented.
+ C_AUTO = 1, // Automatic variable
+ C_REG = 4, // Register variable
+ C_EXTDEF = 5, // External definition
+ C_LABEL = 6, // Label
+ C_ULABEL = 7, // Undefined label
+ C_MOS = 8, // Member of structure
+ C_ARG = 9, // Function argument
+ C_STRTAG = 10, // Structure tag
+ C_MOU = 11, // Member of union
+ C_UNTAG = 12, // Union tag
+ C_TPDEF = 13, // Type definition
+ C_USTATIC = 14, // Undefined static
+ C_ENTAG = 15, // Enumeration tag
+ C_MOE = 16, // Member of enumeration
+ C_REGPARM = 17, // Register parameter
+ C_FIELD = 18, // Bit field
+ C_EOS = 102, // End of structure
+ C_LINE = 104,
+ C_ALIAS = 105, // Duplicate tag
+ C_HIDDEN = 106, // Special storage class for external
+ C_EFCN = 255, // Physical end of function
+
+ // Storage classes - reserved
+ C_TCSYM = 134 // Reserved
+};
+
+} // end namespace XCOFF
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/Bitcode/BitcodeAnalyzer.h b/include/llvm/Bitcode/BitcodeAnalyzer.h
new file mode 100644
index 000000000000..cfdebd6fe6cb
--- /dev/null
+++ b/include/llvm/Bitcode/BitcodeAnalyzer.h
@@ -0,0 +1,103 @@
+//===- llvm/Bitcode/BitcodeAnalyzer.h - Bitcode analyzer --------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This header defines interfaces to analyze LLVM bitcode files/streams.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_BITCODE_BITCODE_ANALYZER_H
+#define LLVM_BITCODE_BITCODE_ANALYZER_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Bitstream/BitstreamReader.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/raw_ostream.h"
+#include <map>
+#include <vector>
+
+namespace llvm {
+
+/// CurStreamTypeType - A type for CurStreamType
+enum CurStreamTypeType {
+ UnknownBitstream,
+ LLVMIRBitstream,
+ ClangSerializedASTBitstream,
+ ClangSerializedDiagnosticsBitstream,
+};
+
+struct BCDumpOptions {
+ /// The stream.
+ raw_ostream &OS;
+ /// Print per-code histogram.
+ bool Histogram = false;
+ /// Don't emit numeric info in dump if symbolic info is available.
+ bool Symbolic = false;
+ /// Print binary blobs using hex escapes.
+ bool ShowBinaryBlobs = false;
+
+ BCDumpOptions(raw_ostream &OS) : OS(OS) {}
+};
+
+class BitcodeAnalyzer {
+ BitstreamCursor Stream;
+ BitstreamBlockInfo BlockInfo;
+ CurStreamTypeType CurStreamType;
+ Optional<BitstreamCursor> BlockInfoStream;
+ unsigned NumTopBlocks = 0;
+
+ struct PerRecordStats {
+ unsigned NumInstances;
+ unsigned NumAbbrev;
+ uint64_t TotalBits;
+ PerRecordStats() : NumInstances(0), NumAbbrev(0), TotalBits(0) {}
+ };
+
+ struct PerBlockIDStats {
+ /// NumInstances - This the number of times this block ID has been seen.
+ unsigned NumInstances;
+ /// NumBits - The total size in bits of all of these blocks.
+ uint64_t NumBits;
+ /// NumSubBlocks - The total number of blocks these blocks contain.
+ unsigned NumSubBlocks;
+ /// NumAbbrevs - The total number of abbreviations.
+ unsigned NumAbbrevs;
+ /// NumRecords - The total number of records these blocks contain, and the
+ /// number that are abbreviated.
+ unsigned NumRecords, NumAbbreviatedRecords;
+ /// CodeFreq - Keep track of the number of times we see each code.
+ std::vector<PerRecordStats> CodeFreq;
+ PerBlockIDStats()
+ : NumInstances(0), NumBits(0), NumSubBlocks(0), NumAbbrevs(0),
+ NumRecords(0), NumAbbreviatedRecords(0) {}
+ };
+
+ std::map<unsigned, PerBlockIDStats> BlockIDStats;
+
+public:
+ BitcodeAnalyzer(StringRef Buffer, Optional<StringRef> BlockInfoBuffer = None);
+ /// Analyze the bitcode file.
+ Error analyze(Optional<BCDumpOptions> O = None,
+ Optional<StringRef> CheckHash = None);
+ /// Print stats about the bitcode file.
+ void printStats(BCDumpOptions O, Optional<StringRef> Filename = None);
+
+private:
+ /// Read a block, updating statistics, etc.
+ Error parseBlock(unsigned BlockID, unsigned IndentLevel,
+ Optional<BCDumpOptions> O = None,
+ Optional<StringRef> CheckHash = None);
+
+ Error decodeMetadataStringsBlob(StringRef Indent, ArrayRef<uint64_t> Record,
+ StringRef Blob, raw_ostream &OS);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_BITCODE_BITCODE_ANALYZER_H
diff --git a/include/llvm/Bitcode/BitcodeReader.h b/include/llvm/Bitcode/BitcodeReader.h
index 0d7cc141f2ce..ba61da733bea 100644
--- a/include/llvm/Bitcode/BitcodeReader.h
+++ b/include/llvm/Bitcode/BitcodeReader.h
@@ -1,9 +1,8 @@
//===- llvm/Bitcode/BitcodeReader.h - Bitcode reader ------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -16,7 +15,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/Bitcode/BitCodes.h"
+#include "llvm/Bitstream/BitCodes.h"
#include "llvm/IR/ModuleSummaryIndex.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
diff --git a/include/llvm/Bitcode/BitcodeWriter.h b/include/llvm/Bitcode/BitcodeWriter.h
index 0010cf6c0544..39061e09cda5 100644
--- a/include/llvm/Bitcode/BitcodeWriter.h
+++ b/include/llvm/Bitcode/BitcodeWriter.h
@@ -1,9 +1,8 @@
//===- llvm/Bitcode/BitcodeWriter.h - Bitcode writers -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Bitcode/BitcodeWriterPass.h b/include/llvm/Bitcode/BitcodeWriterPass.h
index 05044c9ae11c..1773d1b9f11b 100644
--- a/include/llvm/Bitcode/BitcodeWriterPass.h
+++ b/include/llvm/Bitcode/BitcodeWriterPass.h
@@ -1,9 +1,8 @@
//===-- BitcodeWriterPass.h - Bitcode writing pass --------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/include/llvm/Bitcode/LLVMBitCodes.h b/include/llvm/Bitcode/LLVMBitCodes.h
index f0d11e9c1689..decd4dd3a965 100644
--- a/include/llvm/Bitcode/LLVMBitCodes.h
+++ b/include/llvm/Bitcode/LLVMBitCodes.h
@@ -1,9 +1,8 @@
//===- LLVMBitCodes.h - Enum values for the LLVM bitcode format -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -18,7 +17,7 @@
#ifndef LLVM_BITCODE_LLVMBITCODES_H
#define LLVM_BITCODE_LLVMBITCODES_H
-#include "llvm/Bitcode/BitCodes.h"
+#include "llvm/Bitstream/BitCodes.h"
namespace llvm {
namespace bitc {
@@ -264,10 +263,31 @@ enum GlobalValueSummarySymtabCodes {
// Index-wide flags
FS_FLAGS = 20,
// Maps type identifier to summary information for that type identifier.
+ // Produced by the thin link (only lives in combined index).
// TYPE_ID: [typeid, kind, bitwidth, align, size, bitmask, inlinebits,
// n x (typeid, kind, name, numrba,
// numrba x (numarg, numarg x arg, kind, info, byte, bit))]
FS_TYPE_ID = 21,
+ // For background see overview at https://llvm.org/docs/TypeMetadata.html.
+ // The type metadata includes both the type identifier and the offset of
+ // the address point of the type (the address held by objects of that type
+ // which may not be the beginning of the virtual table). Vtable definitions
+ // are decorated with type metadata for the types they are compatible with.
+ //
+ // Maps type identifier to summary information for that type identifier
+ // computed from type metadata: the valueid of each vtable definition
+ // decorated with a type metadata for that identifier, and the offset from
+ // the corresponding type metadata.
+ // Exists in the per-module summary to provide information to thin link
+ // for index-based whole program devirtualization.
+ // TYPE_ID_METADATA: [typeid, n x (valueid, offset)]
+ FS_TYPE_ID_METADATA = 22,
+ // Summarizes vtable definition for use in index-based whole program
+ // devirtualization during the thin link.
+ // PERMODULE_VTABLE_GLOBALVAR_INIT_REFS: [valueid, flags, varflags,
+ // numrefs, numrefs x valueid,
+ // n x (valueid, offset)]
+ FS_PERMODULE_VTABLE_GLOBALVAR_INIT_REFS = 23,
};
enum MetadataCodes {
@@ -311,6 +331,7 @@ enum MetadataCodes {
METADATA_INDEX_OFFSET = 38, // [offset]
METADATA_INDEX = 39, // [bitpos]
METADATA_LABEL = 40, // [distinct, scope, name, file, line]
+ METADATA_COMMON_BLOCK = 44, // [distinct, scope, name, variable,...]
};
// The constants block (CONSTANTS_BLOCK_ID) describes emission for each
@@ -407,7 +428,9 @@ enum RMWOperations {
RMW_MAX = 7,
RMW_MIN = 8,
RMW_UMAX = 9,
- RMW_UMIN = 10
+ RMW_UMIN = 10,
+ RMW_FADD = 11,
+ RMW_FSUB = 12
};
/// OverflowingBinaryOperatorOptionalFlags - Flags for serializing
@@ -534,6 +557,8 @@ enum FunctionCodes {
// 54 is unused.
FUNC_CODE_OPERAND_BUNDLE = 55, // OPERAND_BUNDLE: [tag#, value...]
FUNC_CODE_INST_UNOP = 56, // UNOP: [opcode, ty, opval]
+ FUNC_CODE_INST_CALLBR = 57, // CALLBR: [attr, cc, norm, transfs,
+ // fnty, fnid, args...]
};
enum UseListCodes {
@@ -602,6 +627,11 @@ enum AttributeKindCodes {
ATTR_KIND_OPT_FOR_FUZZING = 57,
ATTR_KIND_SHADOWCALLSTACK = 58,
ATTR_KIND_SPECULATIVE_LOAD_HARDENING = 59,
+ ATTR_KIND_IMMARG = 60,
+ ATTR_KIND_WILLRETURN = 61,
+ ATTR_KIND_NOFREE = 62,
+ ATTR_KIND_NOSYNC = 63,
+ ATTR_KIND_SANITIZE_MEMTAG = 64,
};
enum ComdatSelectionKindCodes {
diff --git a/include/llvm/Bitcode/BitCodes.h b/include/llvm/Bitstream/BitCodes.h
index bf21e146e771..adf54ba96396 100644
--- a/include/llvm/Bitcode/BitCodes.h
+++ b/include/llvm/Bitstream/BitCodes.h
@@ -1,13 +1,12 @@
-//===- BitCodes.h - Enum values for the bitcode format ----------*- C++ -*-===//
+//===- BitCodes.h - Enum values for the bitstream format --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
-// This header Bitcode enum values.
+// This header defines bitstream enum values.
//
// The enum values defined in this file should be considered permanent. If
// new features are added, they should have values added at the end of the
@@ -15,8 +14,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_BITCODE_BITCODES_H
-#define LLVM_BITCODE_BITCODES_H
+#ifndef LLVM_BITSTREAM_BITCODES_H
+#define LLVM_BITSTREAM_BITCODES_H
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/DataTypes.h"
@@ -24,13 +23,15 @@
#include <cassert>
namespace llvm {
-/// Offsets of the 32-bit fields of bitcode wrapper header.
-static const unsigned BWH_MagicField = 0 * 4;
-static const unsigned BWH_VersionField = 1 * 4;
-static const unsigned BWH_OffsetField = 2 * 4;
-static const unsigned BWH_SizeField = 3 * 4;
-static const unsigned BWH_CPUTypeField = 4 * 4;
-static const unsigned BWH_HeaderSize = 5 * 4;
+/// Offsets of the 32-bit fields of bitstream wrapper header.
+enum BitstreamWrapperHeader : unsigned {
+ BWH_MagicField = 0 * 4,
+ BWH_VersionField = 1 * 4,
+ BWH_OffsetField = 2 * 4,
+ BWH_SizeField = 3 * 4,
+ BWH_CPUTypeField = 4 * 4,
+ BWH_HeaderSize = 5 * 4
+};
namespace bitc {
enum StandardWidths {
@@ -160,8 +161,6 @@ public:
};
-template <> struct isPodLike<BitCodeAbbrevOp> { static const bool value=true; };
-
/// BitCodeAbbrev - This class represents an abbreviation record. An
/// abbreviation allows a complex record that has redundancy to be stored in a
/// specialized format instead of the fully-general, fully-vbr, format.
diff --git a/include/llvm/Bitcode/BitstreamReader.h b/include/llvm/Bitstream/BitstreamReader.h
index 72e7619d9e1c..ee82e7ec1ba2 100644
--- a/include/llvm/Bitcode/BitstreamReader.h
+++ b/include/llvm/Bitstream/BitstreamReader.h
@@ -1,9 +1,8 @@
//===- BitstreamReader.h - Low-level bitstream reader interface -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -12,12 +11,12 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_BITCODE_BITSTREAMREADER_H
-#define LLVM_BITCODE_BITSTREAMREADER_H
+#ifndef LLVM_BITSTREAM_BITSTREAMREADER_H
+#define LLVM_BITSTREAM_BITSTREAMREADER_H
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/Bitcode/BitCodes.h"
+#include "llvm/Bitstream/BitCodes.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
@@ -98,14 +97,13 @@ private:
unsigned BitsInCurWord = 0;
public:
- static const size_t MaxChunkSize = sizeof(word_t) * 8;
+ static const constexpr size_t MaxChunkSize = sizeof(word_t) * 8;
SimpleBitstreamCursor() = default;
explicit SimpleBitstreamCursor(ArrayRef<uint8_t> BitcodeBytes)
: BitcodeBytes(BitcodeBytes) {}
explicit SimpleBitstreamCursor(StringRef BitcodeBytes)
- : BitcodeBytes(reinterpret_cast<const uint8_t *>(BitcodeBytes.data()),
- BitcodeBytes.size()) {}
+ : BitcodeBytes(arrayRefFromStringRef(BitcodeBytes)) {}
explicit SimpleBitstreamCursor(MemoryBufferRef BitcodeBytes)
: SimpleBitstreamCursor(BitcodeBytes.getBuffer()) {}
@@ -129,7 +127,7 @@ public:
ArrayRef<uint8_t> getBitcodeBytes() const { return BitcodeBytes; }
/// Reset the stream to the specified bit number.
- void JumpToBit(uint64_t BitNo) {
+ Error JumpToBit(uint64_t BitNo) {
size_t ByteNo = size_t(BitNo/8) & ~(sizeof(word_t)-1);
unsigned WordBitNo = unsigned(BitNo & (sizeof(word_t)*8-1));
assert(canSkipToPos(ByteNo) && "Invalid location");
@@ -139,8 +137,14 @@ public:
BitsInCurWord = 0;
// Skip over any bits that are already consumed.
- if (WordBitNo)
- Read(WordBitNo);
+ if (WordBitNo) {
+ if (Expected<word_t> Res = Read(WordBitNo))
+ return Error::success();
+ else
+ return Res.takeError();
+ }
+
+ return Error::success();
}
/// Get a pointer into the bitstream at the specified byte offset.
@@ -156,9 +160,11 @@ public:
return getPointerToByte(BitNo / 8, NumBytes);
}
- void fillCurWord() {
+ Error fillCurWord() {
if (NextChar >= BitcodeBytes.size())
- report_fatal_error("Unexpected end of file");
+ return createStringError(std::errc::io_error,
+ "Unexpected end of file reading %u of %u bytes",
+ NextChar, BitcodeBytes.size());
// Read the next word from the stream.
const uint8_t *NextCharPtr = BitcodeBytes.data() + NextChar;
@@ -177,9 +183,10 @@ public:
}
NextChar += BytesRead;
BitsInCurWord = BytesRead * 8;
+ return Error::success();
}
- word_t Read(unsigned NumBits) {
+ Expected<word_t> Read(unsigned NumBits) {
static const unsigned BitsInWord = MaxChunkSize;
assert(NumBits && NumBits <= BitsInWord &&
@@ -201,11 +208,14 @@ public:
word_t R = BitsInCurWord ? CurWord : 0;
unsigned BitsLeft = NumBits - BitsInCurWord;
- fillCurWord();
+ if (Error fillResult = fillCurWord())
+ return std::move(fillResult);
// If we run out of data, abort.
if (BitsLeft > BitsInCurWord)
- report_fatal_error("Unexpected end of file");
+ return createStringError(std::errc::io_error,
+ "Unexpected end of file reading %u of %u bits",
+ BitsInCurWord, BitsLeft);
word_t R2 = CurWord & (~word_t(0) >> (BitsInWord - BitsLeft));
@@ -219,8 +229,12 @@ public:
return R;
}
- uint32_t ReadVBR(unsigned NumBits) {
- uint32_t Piece = Read(NumBits);
+ Expected<uint32_t> ReadVBR(unsigned NumBits) {
+ Expected<unsigned> MaybeRead = Read(NumBits);
+ if (!MaybeRead)
+ return MaybeRead;
+ uint32_t Piece = MaybeRead.get();
+
if ((Piece & (1U << (NumBits-1))) == 0)
return Piece;
@@ -233,14 +247,21 @@ public:
return Result;
NextBit += NumBits-1;
- Piece = Read(NumBits);
+ MaybeRead = Read(NumBits);
+ if (!MaybeRead)
+ return MaybeRead;
+ Piece = MaybeRead.get();
}
}
// Read a VBR that may have a value up to 64-bits in size. The chunk size of
// the VBR must still be <= 32 bits though.
- uint64_t ReadVBR64(unsigned NumBits) {
- uint32_t Piece = Read(NumBits);
+ Expected<uint64_t> ReadVBR64(unsigned NumBits) {
+ Expected<uint64_t> MaybeRead = Read(NumBits);
+ if (!MaybeRead)
+ return MaybeRead;
+ uint32_t Piece = MaybeRead.get();
+
if ((Piece & (1U << (NumBits-1))) == 0)
return uint64_t(Piece);
@@ -253,7 +274,10 @@ public:
return Result;
NextBit += NumBits-1;
- Piece = Read(NumBits);
+ MaybeRead = Read(NumBits);
+ if (!MaybeRead)
+ return MaybeRead;
+ Piece = MaybeRead.get();
}
}
@@ -270,6 +294,9 @@ public:
BitsInCurWord = 0;
}
+ /// Return the size of the stream in bytes.
+ size_t SizeInBytes() const { return BitcodeBytes.size(); }
+
/// Skip to the end of the file.
void skipToEnd() { NextChar = BitcodeBytes.size(); }
};
@@ -340,17 +367,18 @@ public:
explicit BitstreamCursor(MemoryBufferRef BitcodeBytes)
: SimpleBitstreamCursor(BitcodeBytes) {}
- using SimpleBitstreamCursor::canSkipToPos;
using SimpleBitstreamCursor::AtEndOfStream;
+ using SimpleBitstreamCursor::canSkipToPos;
+ using SimpleBitstreamCursor::fillCurWord;
using SimpleBitstreamCursor::getBitcodeBytes;
using SimpleBitstreamCursor::GetCurrentBitNo;
using SimpleBitstreamCursor::getCurrentByteNo;
using SimpleBitstreamCursor::getPointerToByte;
using SimpleBitstreamCursor::JumpToBit;
- using SimpleBitstreamCursor::fillCurWord;
using SimpleBitstreamCursor::Read;
using SimpleBitstreamCursor::ReadVBR;
using SimpleBitstreamCursor::ReadVBR64;
+ using SimpleBitstreamCursor::SizeInBytes;
/// Return the number of bits used to encode an abbrev #.
unsigned getAbbrevIDWidth() const { return CurCodeSize; }
@@ -367,12 +395,16 @@ public:
};
/// Advance the current bitstream, returning the next entry in the stream.
- BitstreamEntry advance(unsigned Flags = 0) {
+ Expected<BitstreamEntry> advance(unsigned Flags = 0) {
while (true) {
if (AtEndOfStream())
return BitstreamEntry::getError();
- unsigned Code = ReadCode();
+ Expected<unsigned> MaybeCode = ReadCode();
+ if (!MaybeCode)
+ return MaybeCode.takeError();
+ unsigned Code = MaybeCode.get();
+
if (Code == bitc::END_BLOCK) {
// Pop the end of the block unless Flags tells us not to.
if (!(Flags & AF_DontPopBlockAtEnd) && ReadBlockEnd())
@@ -380,14 +412,19 @@ public:
return BitstreamEntry::getEndBlock();
}
- if (Code == bitc::ENTER_SUBBLOCK)
- return BitstreamEntry::getSubBlock(ReadSubBlockID());
+ if (Code == bitc::ENTER_SUBBLOCK) {
+ if (Expected<unsigned> MaybeSubBlock = ReadSubBlockID())
+ return BitstreamEntry::getSubBlock(MaybeSubBlock.get());
+ else
+ return MaybeSubBlock.takeError();
+ }
if (Code == bitc::DEFINE_ABBREV &&
!(Flags & AF_DontAutoprocessAbbrevs)) {
// We read and accumulate abbrev's, the client can't do anything with
// them anyway.
- ReadAbbrevRecord();
+ if (Error Err = ReadAbbrevRecord())
+ return std::move(Err);
continue;
}
@@ -397,53 +434,66 @@ public:
/// This is a convenience function for clients that don't expect any
/// subblocks. This just skips over them automatically.
- BitstreamEntry advanceSkippingSubblocks(unsigned Flags = 0) {
+ Expected<BitstreamEntry> advanceSkippingSubblocks(unsigned Flags = 0) {
while (true) {
// If we found a normal entry, return it.
- BitstreamEntry Entry = advance(Flags);
+ Expected<BitstreamEntry> MaybeEntry = advance(Flags);
+ if (!MaybeEntry)
+ return MaybeEntry;
+ BitstreamEntry Entry = MaybeEntry.get();
+
if (Entry.Kind != BitstreamEntry::SubBlock)
return Entry;
// If we found a sub-block, just skip over it and check the next entry.
- if (SkipBlock())
- return BitstreamEntry::getError();
+ if (Error Err = SkipBlock())
+ return std::move(Err);
}
}
- unsigned ReadCode() {
- return Read(CurCodeSize);
- }
+ Expected<unsigned> ReadCode() { return Read(CurCodeSize); }
// Block header:
// [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen]
/// Having read the ENTER_SUBBLOCK code, read the BlockID for the block.
- unsigned ReadSubBlockID() {
- return ReadVBR(bitc::BlockIDWidth);
- }
+ Expected<unsigned> ReadSubBlockID() { return ReadVBR(bitc::BlockIDWidth); }
/// Having read the ENTER_SUBBLOCK abbrevid and a BlockID, skip over the body
- /// of this block. If the block record is malformed, return true.
- bool SkipBlock() {
- // Read and ignore the codelen value. Since we are skipping this block, we
- // don't care what code widths are used inside of it.
- ReadVBR(bitc::CodeLenWidth);
+ /// of this block.
+ Error SkipBlock() {
+ // Read and ignore the codelen value.
+ if (Expected<uint32_t> Res = ReadVBR(bitc::CodeLenWidth))
+ ; // Since we are skipping this block, we don't care what code widths are
+ // used inside of it.
+ else
+ return Res.takeError();
+
SkipToFourByteBoundary();
- size_t NumFourBytes = Read(bitc::BlockSizeWidth);
+ Expected<unsigned> MaybeNum = Read(bitc::BlockSizeWidth);
+ if (!MaybeNum)
+ return MaybeNum.takeError();
+ size_t NumFourBytes = MaybeNum.get();
// Check that the block wasn't partially defined, and that the offset isn't
// bogus.
- size_t SkipTo = GetCurrentBitNo() + NumFourBytes*4*8;
- if (AtEndOfStream() || !canSkipToPos(SkipTo/8))
- return true;
-
- JumpToBit(SkipTo);
- return false;
+ size_t SkipTo = GetCurrentBitNo() + NumFourBytes * 4 * 8;
+ if (AtEndOfStream())
+ return createStringError(std::errc::illegal_byte_sequence,
+ "can't skip block: already at end of stream");
+ if (!canSkipToPos(SkipTo / 8))
+ return createStringError(std::errc::illegal_byte_sequence,
+ "can't skip to bit %zu from %" PRIu64, SkipTo,
+ GetCurrentBitNo());
+
+ if (Error Res = JumpToBit(SkipTo))
+ return Res;
+
+ return Error::success();
}
- /// Having read the ENTER_SUBBLOCK abbrevid, enter the block, and return true
- /// if the block has an error.
- bool EnterSubBlock(unsigned BlockID, unsigned *NumWordsP = nullptr);
+ /// Having read the ENTER_SUBBLOCK abbrevid, and enter the block.
+ Error EnterSubBlock(unsigned BlockID, unsigned *NumWordsP = nullptr);
bool ReadBlockEnd() {
if (BlockScope.empty()) return true;
@@ -478,22 +528,23 @@ public:
}
/// Read the current record and discard it, returning the code for the record.
- unsigned skipRecord(unsigned AbbrevID);
+ Expected<unsigned> skipRecord(unsigned AbbrevID);
- unsigned readRecord(unsigned AbbrevID, SmallVectorImpl<uint64_t> &Vals,
- StringRef *Blob = nullptr);
+ Expected<unsigned> readRecord(unsigned AbbrevID,
+ SmallVectorImpl<uint64_t> &Vals,
+ StringRef *Blob = nullptr);
//===--------------------------------------------------------------------===//
// Abbrev Processing
//===--------------------------------------------------------------------===//
- void ReadAbbrevRecord();
+ Error ReadAbbrevRecord();
/// Read and return a block info block from the bitstream. If an error was
/// encountered, return None.
///
/// \param ReadBlockInfoNames Whether to read block/record name information in
/// the BlockInfo block. Only llvm-bcanalyzer uses this.
- Optional<BitstreamBlockInfo>
+ Expected<Optional<BitstreamBlockInfo>>
ReadBlockInfoBlock(bool ReadBlockInfoNames = false);
/// Set the block info to be used by this BitstreamCursor to interpret
@@ -503,4 +554,4 @@ public:
} // end llvm namespace
-#endif // LLVM_BITCODE_BITSTREAMREADER_H
+#endif // LLVM_BITSTREAM_BITSTREAMREADER_H
diff --git a/include/llvm/Bitcode/BitstreamWriter.h b/include/llvm/Bitstream/BitstreamWriter.h
index c854769e0622..c0ead19dc71d 100644
--- a/include/llvm/Bitcode/BitstreamWriter.h
+++ b/include/llvm/Bitstream/BitstreamWriter.h
@@ -1,9 +1,8 @@
//===- BitstreamWriter.h - Low-level bitstream writer interface -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -12,14 +11,14 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_BITCODE_BITSTREAMWRITER_H
-#define LLVM_BITCODE_BITSTREAMWRITER_H
+#ifndef LLVM_BITSTREAM_BITSTREAMWRITER_H
+#define LLVM_BITSTREAM_BITSTREAMWRITER_H
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/Bitcode/BitCodes.h"
+#include "llvm/Bitstream/BitCodes.h"
#include "llvm/Support/Endian.h"
#include <vector>
@@ -487,10 +486,8 @@ private:
}
public:
- /// EmitAbbrev - This emits an abbreviation to the stream. Note that this
- /// method takes ownership of the specified abbrev.
+ /// Emits the abbreviation \p Abbv to the stream.
unsigned EmitAbbrev(std::shared_ptr<BitCodeAbbrev> Abbv) {
- // Emit the abbreviation as a record.
EncodeAbbrev(*Abbv);
CurAbbrevs.push_back(std::move(Abbv));
return static_cast<unsigned>(CurAbbrevs.size())-1 +
diff --git a/include/llvm/CodeGen/AccelTable.h b/include/llvm/CodeGen/AccelTable.h
index 13928582f2dd..734531a65d50 100644
--- a/include/llvm/CodeGen/AccelTable.h
+++ b/include/llvm/CodeGen/AccelTable.h
@@ -1,9 +1,8 @@
//==- include/llvm/CodeGen/AccelTable.h - Accelerator Tables -----*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -327,14 +326,8 @@ public:
void emit(AsmPrinter *Asm) const override;
-#ifndef _MSC_VER
- // The line below is rejected by older versions (TBD) of MSVC.
static constexpr Atom Atoms[] = {
Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)};
-#else
- // FIXME: Erase this path once the minimum MSCV version has been bumped.
- static const SmallVector<Atom, 4> Atoms;
-#endif
#ifndef NDEBUG
void print(raw_ostream &OS) const override;
@@ -352,16 +345,10 @@ public:
void emit(AsmPrinter *Asm) const override;
-#ifndef _MSC_VER
- // The line below is rejected by older versions (TBD) of MSVC.
static constexpr Atom Atoms[] = {
Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4),
Atom(dwarf::DW_ATOM_die_tag, dwarf::DW_FORM_data2),
Atom(dwarf::DW_ATOM_type_flags, dwarf::DW_FORM_data1)};
-#else
- // FIXME: Erase this path once the minimum MSCV version has been bumped.
- static const SmallVector<Atom, 4> Atoms;
-#endif
#ifndef NDEBUG
void print(raw_ostream &OS) const override;
@@ -376,14 +363,8 @@ public:
void emit(AsmPrinter *Asm) const override;
-#ifndef _MSC_VER
- // The line below is rejected by older versions (TBD) of MSVC.
static constexpr Atom Atoms[] = {
Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)};
-#else
- // FIXME: Erase this path once the minimum MSCV version has been bumped.
- static const SmallVector<Atom, 4> Atoms;
-#endif
#ifndef NDEBUG
void print(raw_ostream &OS) const override;
@@ -407,16 +388,10 @@ public:
void emit(AsmPrinter *Asm) const override;
-#ifndef _MSC_VER
- // The line below is rejected by older versions (TBD) of MSVC.
static constexpr Atom Atoms[] = {
Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4),
Atom(dwarf::DW_ATOM_die_tag, dwarf::DW_FORM_data2),
Atom(5, dwarf::DW_FORM_data1), Atom(6, dwarf::DW_FORM_data4)};
-#else
- // FIXME: Erase this path once the minimum MSCV version has been bumped.
- static const SmallVector<Atom, 4> Atoms;
-#endif
#ifndef NDEBUG
void print(raw_ostream &OS) const override;
diff --git a/include/llvm/CodeGen/Analysis.h b/include/llvm/CodeGen/Analysis.h
index d77aee66ed76..0be0ac22a74d 100644
--- a/include/llvm/CodeGen/Analysis.h
+++ b/include/llvm/CodeGen/Analysis.h
@@ -1,9 +1,8 @@
//===- CodeGen/Analysis.h - CodeGen LLVM IR Analysis Utilities --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -26,6 +25,7 @@
namespace llvm {
class GlobalValue;
+class LLT;
class MachineBasicBlock;
class MachineFunction;
class TargetLoweringBase;
@@ -74,6 +74,25 @@ void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty,
SmallVectorImpl<uint64_t> *Offsets = nullptr,
uint64_t StartingOffset = 0);
+/// Variant of ComputeValueVTs that also produces the memory VTs.
+void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty,
+ SmallVectorImpl<EVT> &ValueVTs,
+ SmallVectorImpl<EVT> *MemVTs,
+ SmallVectorImpl<uint64_t> *Offsets = nullptr,
+ uint64_t StartingOffset = 0);
+
+/// computeValueLLTs - Given an LLVM IR type, compute a sequence of
+/// LLTs that represent all the individual underlying
+/// non-aggregate types that comprise it.
+///
+/// If Offsets is non-null, it points to a vector to be filled in
+/// with the in-memory offsets of each of the individual values.
+///
+void computeValueLLTs(const DataLayout &DL, Type &Ty,
+ SmallVectorImpl<LLT> &ValueTys,
+ SmallVectorImpl<uint64_t> *Offsets = nullptr,
+ uint64_t StartingOffset = 0);
+
/// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V.
GlobalValue *ExtractTypeInfo(Value *V);
diff --git a/include/llvm/CodeGen/AsmPrinter.h b/include/llvm/CodeGen/AsmPrinter.h
index 413901d218f9..d110f8b01cb5 100644
--- a/include/llvm/CodeGen/AsmPrinter.h
+++ b/include/llvm/CodeGen/AsmPrinter.h
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/AsmPrinter.h - AsmPrinter Framework ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -20,6 +19,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/CodeGen/AsmPrinterHandler.h"
#include "llvm/CodeGen/DwarfStringPoolEntry.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/IR/InlineAsm.h"
@@ -33,7 +33,6 @@
namespace llvm {
-class AsmPrinterHandler;
class BasicBlock;
class BlockAddress;
class Constant;
@@ -122,9 +121,6 @@ public:
using GOTEquivUsePair = std::pair<const GlobalVariable *, unsigned>;
MapVector<const MCSymbol *, GOTEquivUsePair> GlobalGOTEquivs;
- /// Enable print [latency:throughput] in output.
- bool EnablePrintSchedInfo = false;
-
private:
MCSymbol *CurrentFnBegin = nullptr;
MCSymbol *CurrentFnEnd = nullptr;
@@ -142,16 +138,16 @@ protected:
/// Protected struct HandlerInfo and Handlers permit target extended
/// AsmPrinter adds their own handlers.
struct HandlerInfo {
- AsmPrinterHandler *Handler;
+ std::unique_ptr<AsmPrinterHandler> Handler;
const char *TimerName;
const char *TimerDescription;
const char *TimerGroupName;
const char *TimerGroupDescription;
- HandlerInfo(AsmPrinterHandler *Handler, const char *TimerName,
- const char *TimerDescription, const char *TimerGroupName,
- const char *TimerGroupDescription)
- : Handler(Handler), TimerName(TimerName),
+ HandlerInfo(std::unique_ptr<AsmPrinterHandler> Handler,
+ const char *TimerName, const char *TimerDescription,
+ const char *TimerGroupName, const char *TimerGroupDescription)
+ : Handler(std::move(Handler)), TimerName(TimerName),
TimerDescription(TimerDescription), TimerGroupName(TimerGroupName),
TimerGroupDescription(TimerGroupDescription) {}
};
@@ -227,6 +223,9 @@ public:
void EmitToStreamer(MCStreamer &S, const MCInst &Inst);
+ /// Emits inital debug location directive.
+ void emitInitialRawDwarfLocDirective(const MachineFunction &MF);
+
/// Return the current section we are emitting to.
const MCSection *getCurrentSection() const;
@@ -316,6 +315,8 @@ public:
void emitStackSizeSection(const MachineFunction &MF);
+ void emitRemarksSection(Module &M);
+
enum CFIMoveType { CFI_M_None, CFI_M_EH, CFI_M_Debug };
CFIMoveType needsCFIMoves() const;
@@ -511,7 +512,7 @@ public:
void EmitSLEB128(int64_t Value, const char *Desc = nullptr) const;
/// Emit the specified unsigned leb128 value.
- void EmitULEB128(uint64_t Value, const char *Desc = nullptr) const;
+ void EmitULEB128(uint64_t Value, const char *Desc = nullptr, unsigned PadTo = 0) const;
/// Emit a .byte 42 directive that corresponds to an encoding. If verbose
/// assembly output is enabled, we output comments describing the encoding.
@@ -542,6 +543,12 @@ public:
emitDwarfStringOffset(S.getEntry());
}
+ /// Emit reference to a call site with a specified encoding
+ void EmitCallSiteOffset(const MCSymbol *Hi, const MCSymbol *Lo,
+ unsigned Encoding) const;
+ /// Emit an integer value corresponding to the call site encoding
+ void EmitCallSiteValue(uint64_t Value, unsigned Encoding) const;
+
/// Get the value for DW_AT_APPLE_isa. Zero if no isa encoding specified.
virtual unsigned getISAEncoding() { return 0; }
@@ -589,20 +596,22 @@ public:
virtual void PrintSpecial(const MachineInstr *MI, raw_ostream &OS,
const char *Code) const;
+ /// Print the MachineOperand as a symbol. Targets with complex handling of
+ /// symbol references should override the base implementation.
+ virtual void PrintSymbolOperand(const MachineOperand &MO, raw_ostream &OS);
+
/// Print the specified operand of MI, an INLINEASM instruction, using the
/// specified assembler variant. Targets should override this to format as
/// appropriate. This method can return true if the operand is erroneous.
virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
- unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &OS);
+ const char *ExtraCode, raw_ostream &OS);
/// Print the specified operand of MI, an INLINEASM instruction, using the
/// specified assembler variant as an address. Targets should override this to
/// format as appropriate. This method can return true if the operand is
/// erroneous.
virtual bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
- unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &OS);
+ const char *ExtraCode, raw_ostream &OS);
/// Let the target do anything it needs to do before emitting inlineasm.
/// \p StartInfo - the subtarget info before parsing inline asm
@@ -617,6 +626,15 @@ public:
virtual void emitInlineAsmEnd(const MCSubtargetInfo &StartInfo,
const MCSubtargetInfo *EndInfo) const;
+ /// This emits visibility information about symbol, if this is supported by
+ /// the target.
+ void EmitVisibility(MCSymbol *Sym, unsigned Visibility,
+ bool IsDefinition = true) const;
+
+ /// This emits linkage information about \p GVSym based on \p GV, if this is
+ /// supported by the target.
+ void EmitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const;
+
private:
/// Private state for PrintSpecial()
// Assign a unique ID to this machine instruction.
@@ -647,13 +665,6 @@ private:
// Internal Implementation Details
//===------------------------------------------------------------------===//
- /// This emits visibility information about symbol, if this is supported by
- /// the target.
- void EmitVisibility(MCSymbol *Sym, unsigned Visibility,
- bool IsDefinition = true) const;
-
- void EmitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const;
-
void EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
const MachineBasicBlock *MBB, unsigned uid) const;
void EmitLLVMUsedList(const ConstantArray *InitList);
diff --git a/include/llvm/CodeGen/AsmPrinterHandler.h b/include/llvm/CodeGen/AsmPrinterHandler.h
index a8b13200dd4e..affb558f2fa6 100644
--- a/include/llvm/CodeGen/AsmPrinterHandler.h
+++ b/include/llvm/CodeGen/AsmPrinterHandler.h
@@ -1,9 +1,8 @@
//===-- llvm/CodeGen/AsmPrinterHandler.h -----------------------*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/CodeGen/AtomicExpandUtils.h b/include/llvm/CodeGen/AtomicExpandUtils.h
index b1adf66e7ff4..8a46c6e00d22 100644
--- a/include/llvm/CodeGen/AtomicExpandUtils.h
+++ b/include/llvm/CodeGen/AtomicExpandUtils.h
@@ -1,9 +1,8 @@
//===- AtomicExpandUtils.h - Utilities for expanding atomic instructions --===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/CodeGen/BasicTTIImpl.h b/include/llvm/CodeGen/BasicTTIImpl.h
index f105d887c397..70bf670fdf0b 100644
--- a/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/include/llvm/CodeGen/BasicTTIImpl.h
@@ -1,9 +1,8 @@
//===- BasicTTIImpl.h -------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -197,11 +196,12 @@ protected:
public:
/// \name Scalar TTI Implementations
/// @{
- bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
- unsigned BitWidth, unsigned AddressSpace,
- unsigned Alignment, bool *Fast) const {
+ bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
+ unsigned AddressSpace, unsigned Alignment,
+ bool *Fast) const {
EVT E = EVT::getIntegerVT(Context, BitWidth);
- return getTLI()->allowsMisalignedMemoryAccesses(E, AddressSpace, Alignment, Fast);
+ return getTLI()->allowsMisalignedMemoryAccesses(
+ E, AddressSpace, Alignment, MachineMemOperand::MONone, Fast);
}
bool hasBranchDivergence() { return false; }
@@ -293,12 +293,12 @@ public:
}
unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
- ArrayRef<const Value *> Arguments) {
- return BaseT::getIntrinsicCost(IID, RetTy, Arguments);
+ ArrayRef<const Value *> Arguments, const User *U) {
+ return BaseT::getIntrinsicCost(IID, RetTy, Arguments, U);
}
unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
- ArrayRef<Type *> ParamTys) {
+ ArrayRef<Type *> ParamTys, const User *U) {
if (IID == Intrinsic::cttz) {
if (getTLI()->isCheapToSpeculateCttz())
return TargetTransformInfo::TCC_Basic;
@@ -311,7 +311,7 @@ public:
return TargetTransformInfo::TCC_Expensive;
}
- return BaseT::getIntrinsicCost(IID, RetTy, ParamTys);
+ return BaseT::getIntrinsicCost(IID, RetTy, ParamTys, U);
}
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
@@ -414,6 +414,12 @@ public:
if (TLI->isZExtFree(OpTy, Ty))
return TargetTransformInfo::TCC_Free;
return TargetTransformInfo::TCC_Basic;
+
+ case Instruction::AddrSpaceCast:
+ if (TLI->isFreeAddrSpaceCast(OpTy->getPointerAddressSpace(),
+ Ty->getPointerAddressSpace()))
+ return TargetTransformInfo::TCC_Free;
+ return TargetTransformInfo::TCC_Basic;
}
return BaseT::getOperationCost(Opcode, Ty, OpTy);
@@ -421,6 +427,8 @@ public:
unsigned getInliningThresholdMultiplier() { return 1; }
+ int getInlinerVectorBonusPercent() { return 150; }
+
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP) {
// This unrolling functionality is target independent, but to provide some
@@ -486,6 +494,13 @@ public:
UP.BEInsns = 2;
}
+ bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
+ AssumptionCache &AC,
+ TargetLibraryInfo *LibInfo,
+ HardwareLoopInfo &HWLoopInfo) {
+ return BaseT::isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
+ }
+
int getInstructionLatency(const Instruction *I) {
if (isa<LoadInst>(I))
return getST()->getSchedModel().DefaultLoadLatency;
@@ -657,7 +672,7 @@ public:
return 0;
if (Opcode == Instruction::AddrSpaceCast &&
- TLI->isNoopAddrSpaceCast(Src->getPointerAddressSpace(),
+ TLI->isFreeAddrSpaceCast(Src->getPointerAddressSpace(),
Dst->getPointerAddressSpace()))
return 0;
@@ -997,7 +1012,7 @@ public:
// inside the loop.
if (UseMaskForGaps)
Cost += static_cast<T *>(this)->getArithmeticInstrCost(
- BinaryOperator::And, MaskVT);
+ BinaryOperator::And, MaskVT);
return Cost;
}
@@ -1058,8 +1073,8 @@ public:
case Intrinsic::experimental_vector_reduce_and:
case Intrinsic::experimental_vector_reduce_or:
case Intrinsic::experimental_vector_reduce_xor:
- case Intrinsic::experimental_vector_reduce_fadd:
- case Intrinsic::experimental_vector_reduce_fmul:
+ case Intrinsic::experimental_vector_reduce_v2_fadd:
+ case Intrinsic::experimental_vector_reduce_v2_fmul:
case Intrinsic::experimental_vector_reduce_smax:
case Intrinsic::experimental_vector_reduce_smin:
case Intrinsic::experimental_vector_reduce_fmax:
@@ -1116,6 +1131,9 @@ public:
unsigned getIntrinsicInstrCost(
Intrinsic::ID IID, Type *RetTy, ArrayRef<Type *> Tys, FastMathFlags FMF,
unsigned ScalarizationCostPassed = std::numeric_limits<unsigned>::max()) {
+ unsigned RetVF = (RetTy->isVectorTy() ? RetTy->getVectorNumElements() : 1);
+ auto *ConcreteTTI = static_cast<T *>(this);
+
SmallVector<unsigned, 2> ISDs;
unsigned SingleCallCost = 10; // Library call cost. Make it expensive.
switch (IID) {
@@ -1144,8 +1162,8 @@ public:
if (ScalarCalls == 1)
return 1; // Return cost of a scalar intrinsic. Assume it to be cheap.
- unsigned ScalarCost = static_cast<T *>(this)->getIntrinsicInstrCost(
- IID, ScalarRetTy, ScalarTys, FMF);
+ unsigned ScalarCost =
+ ConcreteTTI->getIntrinsicInstrCost(IID, ScalarRetTy, ScalarTys, FMF);
return ScalarCalls * ScalarCost + ScalarizationCost;
}
@@ -1227,44 +1245,181 @@ public:
case Intrinsic::sideeffect:
return 0;
case Intrinsic::masked_store:
- return static_cast<T *>(this)
- ->getMaskedMemoryOpCost(Instruction::Store, Tys[0], 0, 0);
+ return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Store, Tys[0], 0,
+ 0);
case Intrinsic::masked_load:
- return static_cast<T *>(this)
- ->getMaskedMemoryOpCost(Instruction::Load, RetTy, 0, 0);
+ return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Load, RetTy, 0, 0);
case Intrinsic::experimental_vector_reduce_add:
- return static_cast<T *>(this)->getArithmeticReductionCost(
- Instruction::Add, Tys[0], /*IsPairwiseForm=*/false);
+ return ConcreteTTI->getArithmeticReductionCost(Instruction::Add, Tys[0],
+ /*IsPairwiseForm=*/false);
case Intrinsic::experimental_vector_reduce_mul:
- return static_cast<T *>(this)->getArithmeticReductionCost(
- Instruction::Mul, Tys[0], /*IsPairwiseForm=*/false);
+ return ConcreteTTI->getArithmeticReductionCost(Instruction::Mul, Tys[0],
+ /*IsPairwiseForm=*/false);
case Intrinsic::experimental_vector_reduce_and:
- return static_cast<T *>(this)->getArithmeticReductionCost(
- Instruction::And, Tys[0], /*IsPairwiseForm=*/false);
+ return ConcreteTTI->getArithmeticReductionCost(Instruction::And, Tys[0],
+ /*IsPairwiseForm=*/false);
case Intrinsic::experimental_vector_reduce_or:
- return static_cast<T *>(this)->getArithmeticReductionCost(
- Instruction::Or, Tys[0], /*IsPairwiseForm=*/false);
+ return ConcreteTTI->getArithmeticReductionCost(Instruction::Or, Tys[0],
+ /*IsPairwiseForm=*/false);
case Intrinsic::experimental_vector_reduce_xor:
- return static_cast<T *>(this)->getArithmeticReductionCost(
- Instruction::Xor, Tys[0], /*IsPairwiseForm=*/false);
- case Intrinsic::experimental_vector_reduce_fadd:
- return static_cast<T *>(this)->getArithmeticReductionCost(
- Instruction::FAdd, Tys[0], /*IsPairwiseForm=*/false);
- case Intrinsic::experimental_vector_reduce_fmul:
- return static_cast<T *>(this)->getArithmeticReductionCost(
- Instruction::FMul, Tys[0], /*IsPairwiseForm=*/false);
+ return ConcreteTTI->getArithmeticReductionCost(Instruction::Xor, Tys[0],
+ /*IsPairwiseForm=*/false);
+ case Intrinsic::experimental_vector_reduce_v2_fadd:
+ return ConcreteTTI->getArithmeticReductionCost(
+ Instruction::FAdd, Tys[0],
+ /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict
+ // reductions.
+ case Intrinsic::experimental_vector_reduce_v2_fmul:
+ return ConcreteTTI->getArithmeticReductionCost(
+ Instruction::FMul, Tys[0],
+ /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict
+ // reductions.
case Intrinsic::experimental_vector_reduce_smax:
case Intrinsic::experimental_vector_reduce_smin:
case Intrinsic::experimental_vector_reduce_fmax:
case Intrinsic::experimental_vector_reduce_fmin:
- return static_cast<T *>(this)->getMinMaxReductionCost(
+ return ConcreteTTI->getMinMaxReductionCost(
Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false,
- /*IsSigned=*/true);
+ /*IsUnsigned=*/true);
case Intrinsic::experimental_vector_reduce_umax:
case Intrinsic::experimental_vector_reduce_umin:
- return static_cast<T *>(this)->getMinMaxReductionCost(
+ return ConcreteTTI->getMinMaxReductionCost(
Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false,
- /*IsSigned=*/false);
+ /*IsUnsigned=*/false);
+ case Intrinsic::sadd_sat:
+ case Intrinsic::ssub_sat: {
+ Type *CondTy = Type::getInt1Ty(RetTy->getContext());
+ if (RetVF > 1)
+ CondTy = VectorType::get(CondTy, RetVF);
+
+ Type *OpTy = StructType::create({RetTy, CondTy});
+ Intrinsic::ID OverflowOp = IID == Intrinsic::sadd_sat
+ ? Intrinsic::sadd_with_overflow
+ : Intrinsic::ssub_with_overflow;
+
+ // SatMax -> Overflow && SumDiff < 0
+ // SatMin -> Overflow && SumDiff >= 0
+ unsigned Cost = 0;
+ Cost += ConcreteTTI->getIntrinsicInstrCost(
+ OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed);
+ Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy,
+ CondTy, nullptr);
+ Cost += 2 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
+ CondTy, nullptr);
+ return Cost;
+ }
+ case Intrinsic::uadd_sat:
+ case Intrinsic::usub_sat: {
+ Type *CondTy = Type::getInt1Ty(RetTy->getContext());
+ if (RetVF > 1)
+ CondTy = VectorType::get(CondTy, RetVF);
+
+ Type *OpTy = StructType::create({RetTy, CondTy});
+ Intrinsic::ID OverflowOp = IID == Intrinsic::uadd_sat
+ ? Intrinsic::uadd_with_overflow
+ : Intrinsic::usub_with_overflow;
+
+ unsigned Cost = 0;
+ Cost += ConcreteTTI->getIntrinsicInstrCost(
+ OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed);
+ Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
+ CondTy, nullptr);
+ return Cost;
+ }
+ case Intrinsic::smul_fix:
+ case Intrinsic::umul_fix: {
+ unsigned ExtSize = RetTy->getScalarSizeInBits() * 2;
+ Type *ExtTy = Type::getIntNTy(RetTy->getContext(), ExtSize);
+ if (RetVF > 1)
+ ExtTy = VectorType::get(ExtTy, RetVF);
+
+ unsigned ExtOp =
+ IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
+
+ unsigned Cost = 0;
+ Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, RetTy);
+ Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy);
+ Cost +=
+ 2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy);
+ Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, RetTy,
+ TTI::OK_AnyValue,
+ TTI::OK_UniformConstantValue);
+ Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Shl, RetTy,
+ TTI::OK_AnyValue,
+ TTI::OK_UniformConstantValue);
+ Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Or, RetTy);
+ return Cost;
+ }
+ case Intrinsic::sadd_with_overflow:
+ case Intrinsic::ssub_with_overflow: {
+ Type *SumTy = RetTy->getContainedType(0);
+ Type *OverflowTy = RetTy->getContainedType(1);
+ unsigned Opcode = IID == Intrinsic::sadd_with_overflow
+ ? BinaryOperator::Add
+ : BinaryOperator::Sub;
+
+ // LHSSign -> LHS >= 0
+ // RHSSign -> RHS >= 0
+ // SumSign -> Sum >= 0
+ //
+ // Add:
+ // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
+ // Sub:
+ // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
+ unsigned Cost = 0;
+ Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy);
+ Cost += 3 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy,
+ OverflowTy, nullptr);
+ Cost += 2 * ConcreteTTI->getCmpSelInstrCost(
+ BinaryOperator::ICmp, OverflowTy, OverflowTy, nullptr);
+ Cost +=
+ ConcreteTTI->getArithmeticInstrCost(BinaryOperator::And, OverflowTy);
+ return Cost;
+ }
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::usub_with_overflow: {
+ Type *SumTy = RetTy->getContainedType(0);
+ Type *OverflowTy = RetTy->getContainedType(1);
+ unsigned Opcode = IID == Intrinsic::uadd_with_overflow
+ ? BinaryOperator::Add
+ : BinaryOperator::Sub;
+
+ unsigned Cost = 0;
+ Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy);
+ Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy,
+ OverflowTy, nullptr);
+ return Cost;
+ }
+ case Intrinsic::smul_with_overflow:
+ case Intrinsic::umul_with_overflow: {
+ Type *MulTy = RetTy->getContainedType(0);
+ Type *OverflowTy = RetTy->getContainedType(1);
+ unsigned ExtSize = MulTy->getScalarSizeInBits() * 2;
+ Type *ExtTy = Type::getIntNTy(RetTy->getContext(), ExtSize);
+ if (MulTy->isVectorTy())
+ ExtTy = VectorType::get(ExtTy, MulTy->getVectorNumElements() );
+
+ unsigned ExtOp =
+ IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
+
+ unsigned Cost = 0;
+ Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, MulTy);
+ Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy);
+ Cost +=
+ 2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy);
+ Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, MulTy,
+ TTI::OK_AnyValue,
+ TTI::OK_UniformConstantValue);
+
+ if (IID == Intrinsic::smul_with_overflow)
+ Cost += ConcreteTTI->getArithmeticInstrCost(
+ Instruction::AShr, MulTy, TTI::OK_AnyValue,
+ TTI::OK_UniformConstantValue);
+
+ Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, MulTy,
+ OverflowTy, nullptr);
+ return Cost;
+ }
case Intrinsic::ctpop:
ISDs.push_back(ISD::CTPOP);
// In case of legalization use TCC_Expensive. This is cheaper than a
@@ -1305,17 +1460,16 @@ public:
if (MinLegalCostI != LegalCost.end())
return *MinLegalCostI;
- auto MinCustomCostI = std::min_element(CustomCost.begin(), CustomCost.end());
+ auto MinCustomCostI =
+ std::min_element(CustomCost.begin(), CustomCost.end());
if (MinCustomCostI != CustomCost.end())
return *MinCustomCostI;
// If we can't lower fmuladd into an FMA estimate the cost as a floating
// point mul followed by an add.
if (IID == Intrinsic::fmuladd)
- return static_cast<T *>(this)
- ->getArithmeticInstrCost(BinaryOperator::FMul, RetTy) +
- static_cast<T *>(this)
- ->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy);
+ return ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FMul, RetTy) +
+ ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy);
// Else, assume that we need to scalarize this intrinsic. For math builtins
// this will emit a costly libcall, adding call overhead and spills. Make it
@@ -1333,7 +1487,7 @@ public:
Ty = Ty->getScalarType();
ScalarTys.push_back(Ty);
}
- unsigned ScalarCost = static_cast<T *>(this)->getIntrinsicInstrCost(
+ unsigned ScalarCost = ConcreteTTI->getIntrinsicInstrCost(
IID, RetTy->getScalarType(), ScalarTys, FMF);
for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
if (Tys[i]->isVectorTy()) {
diff --git a/include/llvm/CodeGen/BuiltinGCs.h b/include/llvm/CodeGen/BuiltinGCs.h
index 1767922fb5ac..d44183dab0f7 100644
--- a/include/llvm/CodeGen/BuiltinGCs.h
+++ b/include/llvm/CodeGen/BuiltinGCs.h
@@ -1,9 +1,8 @@
//===-- BuiltinGCs.h - Garbage collector linkage hacks --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/CodeGen/CSEConfigBase.h b/include/llvm/CodeGen/CSEConfigBase.h
new file mode 100644
index 000000000000..70b5e5c17eb1
--- /dev/null
+++ b/include/llvm/CodeGen/CSEConfigBase.h
@@ -0,0 +1,28 @@
+//===- CSEConfigBase.h - A CSEConfig interface ------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_CSECONFIG_BASE_H
+#define LLVM_CODEGEN_CSECONFIG_BASE_H
+
+namespace llvm {
+// Class representing some configuration that can be done during GlobalISel's
+// CSEInfo analysis. We define it here because TargetPassConfig can't depend on
+// the GlobalISel library, and so we use this in the interface between them
+// so that the derived classes in GISel can reference generic opcodes.
+class CSEConfigBase {
+public:
+ virtual ~CSEConfigBase() = default;
+ // Hook for defining which Generic instructions should be CSEd.
+ // GISelCSEInfo currently only calls this hook when dealing with generic
+ // opcodes.
+ virtual bool shouldCSEOpc(unsigned Opc) { return false; }
+};
+
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_CSECONFIG_BASE_H
diff --git a/include/llvm/CodeGen/CalcSpillWeights.h b/include/llvm/CodeGen/CalcSpillWeights.h
index f85767f1fc11..9b8b7324f30a 100644
--- a/include/llvm/CodeGen/CalcSpillWeights.h
+++ b/include/llvm/CodeGen/CalcSpillWeights.h
@@ -1,9 +1,8 @@
//===- lib/CodeGen/CalcSpillWeights.h ---------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/CodeGen/CallingConvLower.h b/include/llvm/CodeGen/CallingConvLower.h
index efcf80ba0b4e..aa339e1cc913 100644
--- a/include/llvm/CodeGen/CallingConvLower.h
+++ b/include/llvm/CodeGen/CallingConvLower.h
@@ -1,9 +1,8 @@
//===- llvm/CallingConvLower.h - Calling Conventions ------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -146,7 +145,7 @@ public:
bool needsCustom() const { return isCustom; }
- unsigned getLocReg() const { assert(isRegLoc()); return Loc; }
+ Register getLocReg() const { assert(isRegLoc()); return Loc; }
unsigned getLocMemOffset() const { assert(isMemLoc()); return Loc; }
unsigned getExtraInfo() const { return Loc; }
MVT getLocVT() const { return LocVT; }
@@ -557,7 +556,7 @@ public:
// Sort the locations of the arguments according to their original position.
SmallVector<CCValAssign, 16> TmpArgLocs;
- std::swap(TmpArgLocs, Locs);
+ TmpArgLocs.swap(Locs);
auto B = TmpArgLocs.begin(), E = TmpArgLocs.end();
std::merge(B, B + NumFirstPassLocs, B + NumFirstPassLocs, E,
std::back_inserter(Locs),
diff --git a/include/llvm/CodeGen/CommandFlags.inc b/include/llvm/CodeGen/CommandFlags.inc
index 568d329a5e8c..cb69e9f61405 100644
--- a/include/llvm/CodeGen/CommandFlags.inc
+++ b/include/llvm/CodeGen/CommandFlags.inc
@@ -1,9 +1,8 @@
//===-- CommandFlags.h - Command Line Flags Interface -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -272,6 +271,11 @@ static cl::opt<bool>
EnableAddrsig("addrsig", cl::desc("Emit an address-significance table"),
cl::init(false));
+static cl::opt<bool>
+ EnableDebugEntryValues("debug-entry-values",
+ cl::desc("Emit debug info about parameter's entry values"),
+ cl::init(false));
+
// Common utility function tightly tied to the options listed here. Initializes
// a TargetOptions object with CodeGen flags and returns it.
static TargetOptions InitTargetOptionsFromCodeGenFlags() {
@@ -301,6 +305,7 @@ static TargetOptions InitTargetOptionsFromCodeGenFlags() {
Options.ExceptionModel = ExceptionModel;
Options.EmitStackSizeSection = EnableStackSizeSection;
Options.EmitAddrsig = EnableAddrsig;
+ Options.EnableDebugEntryValues = EnableDebugEntryValues;
Options.MCOptions = InitMCTargetOptionsFromFlags();
diff --git a/include/llvm/CodeGen/CostTable.h b/include/llvm/CodeGen/CostTable.h
index 48ad76971520..52f3bfaea180 100644
--- a/include/llvm/CodeGen/CostTable.h
+++ b/include/llvm/CodeGen/CostTable.h
@@ -1,9 +1,8 @@
//===-- CostTable.h - Instruction Cost Table handling -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/include/llvm/CodeGen/DAGCombine.h b/include/llvm/CodeGen/DAGCombine.h
index 8b5919005451..944187341455 100644
--- a/include/llvm/CodeGen/DAGCombine.h
+++ b/include/llvm/CodeGen/DAGCombine.h
@@ -1,9 +1,8 @@
//===-- llvm/CodeGen/DAGCombine.h ------- SelectionDAG Nodes ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/CodeGen/DFAPacketizer.h b/include/llvm/CodeGen/DFAPacketizer.h
index d3aabe22f216..cf58ee0cabea 100644
--- a/include/llvm/CodeGen/DFAPacketizer.h
+++ b/include/llvm/CodeGen/DFAPacketizer.h
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/DFAPacketizer.h - DFA Packetizer for VLIW ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// This class implements a deterministic finite automaton (DFA) based
diff --git a/include/llvm/CodeGen/DIE.h b/include/llvm/CodeGen/DIE.h
index 7d486b1df56d..684f9e40ca5a 100644
--- a/include/llvm/CodeGen/DIE.h
+++ b/include/llvm/CodeGen/DIE.h
@@ -1,9 +1,8 @@
//===- lib/CodeGen/DIE.h - DWARF Info Entries -------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -39,6 +38,7 @@ namespace llvm {
class AsmPrinter;
class DIE;
class DIEUnit;
+class DwarfCompileUnit;
class MCExpr;
class MCSection;
class MCSymbol;
@@ -231,6 +231,25 @@ public:
};
//===--------------------------------------------------------------------===//
+/// A BaseTypeRef DIE.
+class DIEBaseTypeRef {
+ const DwarfCompileUnit *CU;
+ const uint64_t Index;
+ static constexpr unsigned ULEB128PadSize = 4;
+
+public:
+ explicit DIEBaseTypeRef(const DwarfCompileUnit *TheCU, uint64_t Idx)
+ : CU(TheCU), Index(Idx) {}
+
+ /// EmitValue - Emit base type reference.
+ void EmitValue(const AsmPrinter *AP, dwarf::Form Form) const;
+ /// SizeOf - Determine size of the base type reference in bytes.
+ unsigned SizeOf(const AsmPrinter *AP, dwarf::Form Form) const;
+
+ void print(raw_ostream &O) const;
+};
+
+//===--------------------------------------------------------------------===//
/// A simple label difference DIE.
///
class DIEDelta {
@@ -350,7 +369,7 @@ private:
/// should be stored by reference instead of by value.
using ValTy = AlignedCharArrayUnion<DIEInteger, DIEString, DIEExpr, DIELabel,
DIEDelta *, DIEEntry, DIEBlock *,
- DIELoc *, DIELocList>;
+ DIELoc *, DIELocList, DIEBaseTypeRef *>;
static_assert(sizeof(ValTy) <= sizeof(uint64_t) ||
sizeof(ValTy) <= sizeof(void *),
@@ -502,6 +521,18 @@ struct IntrusiveBackListBase {
}
Last = &N;
}
+
+ void push_front(Node &N) {
+ assert(N.Next.getPointer() == &N && "Expected unlinked node");
+ assert(N.Next.getInt() == true && "Expected unlinked node");
+
+ if (Last) {
+ N.Next.setPointerAndInt(Last->Next.getPointer(), false);
+ Last->Next.setPointerAndInt(&N, true);
+ } else {
+ Last = &N;
+ }
+ }
};
template <class T> class IntrusiveBackList : IntrusiveBackListBase {
@@ -509,8 +540,15 @@ public:
using IntrusiveBackListBase::empty;
void push_back(T &N) { IntrusiveBackListBase::push_back(N); }
+ void push_front(T &N) { IntrusiveBackListBase::push_front(N); }
T &back() { return *static_cast<T *>(Last); }
const T &back() const { return *static_cast<T *>(Last); }
+ T &front() {
+ return *static_cast<T *>(Last ? Last->Next.getPointer() : nullptr);
+ }
+ const T &front() const {
+ return *static_cast<T *>(Last ? Last->Next.getPointer() : nullptr);
+ }
class const_iterator;
class iterator
@@ -760,7 +798,7 @@ public:
///
/// \returns the DIEUnit that represents the compile or type unit that owns
/// this DIE, or NULL if this DIE hasn't been added to a unit DIE.
- const DIEUnit *getUnit() const;
+ DIEUnit *getUnit() const;
void setOffset(unsigned O) { Offset = O; }
void setSize(unsigned S) { Size = S; }
@@ -773,6 +811,13 @@ public:
return Children.back();
}
+ DIE &addChildFront(DIE *Child) {
+ assert(!Child->getParent() && "Child should be orphaned");
+ Child->Owner = this;
+ Children.push_front(*Child);
+ return Children.front();
+ }
+
/// Find a value in the DIE with the attribute given.
///
/// Returns a default-constructed DIEValue (where \a DIEValue::getType()
@@ -800,7 +845,7 @@ class DIEUnit {
const uint16_t Version; /// The Dwarf version number for this unit.
const uint8_t AddrSize; /// The size in bytes of an address for this unit.
protected:
- ~DIEUnit() = default;
+ virtual ~DIEUnit() = default;
public:
DIEUnit(uint16_t Version, uint8_t AddrSize, dwarf::Tag UnitTag);
diff --git a/include/llvm/CodeGen/DIEValue.def b/include/llvm/CodeGen/DIEValue.def
index a3fce9b1d20c..92afeb3868b4 100644
--- a/include/llvm/CodeGen/DIEValue.def
+++ b/include/llvm/CodeGen/DIEValue.def
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/DIEValue.def - DIEValue types ---------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -35,6 +34,7 @@ HANDLE_DIEVALUE_SMALL(Integer)
HANDLE_DIEVALUE_SMALL(String)
HANDLE_DIEVALUE_SMALL(Expr)
HANDLE_DIEVALUE_SMALL(Label)
+HANDLE_DIEVALUE_LARGE(BaseTypeRef)
HANDLE_DIEVALUE_LARGE(Delta)
HANDLE_DIEVALUE_SMALL(Entry)
HANDLE_DIEVALUE_LARGE(Block)
diff --git a/include/llvm/CodeGen/DbgEntityHistoryCalculator.h b/include/llvm/CodeGen/DbgEntityHistoryCalculator.h
index befc28f084e7..7eec75bc81bf 100644
--- a/include/llvm/CodeGen/DbgEntityHistoryCalculator.h
+++ b/include/llvm/CodeGen/DbgEntityHistoryCalculator.h
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/DbgEntityHistoryCalculator.h ----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -11,6 +10,7 @@
#define LLVM_CODEGEN_DBGVALUEHISTORYCALCULATOR_H
#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include <utility>
@@ -22,35 +22,76 @@ class MachineFunction;
class MachineInstr;
class TargetRegisterInfo;
-// For each user variable, keep a list of instruction ranges where this variable
-// is accessible. The variables are listed in order of appearance.
+/// For each user variable, keep a list of instruction ranges where this
+/// variable is accessible. The variables are listed in order of appearance.
class DbgValueHistoryMap {
- // Each instruction range starts with a DBG_VALUE instruction, specifying the
- // location of a variable, which is assumed to be valid until the end of the
- // range. If end is not specified, location is valid until the start
- // instruction of the next instruction range, or until the end of the
- // function.
public:
- using InstrRange = std::pair<const MachineInstr *, const MachineInstr *>;
- using InstrRanges = SmallVector<InstrRange, 4>;
+ /// Index in the entry vector.
+ typedef size_t EntryIndex;
+
+ /// Special value to indicate that an entry is valid until the end of the
+ /// function.
+ static const EntryIndex NoEntry = std::numeric_limits<EntryIndex>::max();
+
+ /// Specifies a change in a variable's debug value history.
+ ///
+ /// There exist two types of entries:
+ ///
+ /// * Debug value entry:
+ ///
+ /// A new debug value becomes live. If the entry's \p EndIndex is \p NoEntry,
+ /// the value is valid until the end of the function. For other values, the
+ /// index points to the entry in the entry vector that ends this debug
+ /// value. The ending entry can either be an overlapping debug value, or
+ /// an instruction that clobbers the value.
+ ///
+ /// * Clobbering entry:
+ ///
+ /// This entry's instruction clobbers one or more preceding
+ /// register-described debug values that have their end index
+ /// set to this entry's position in the entry vector.
+ class Entry {
+ public:
+ enum EntryKind { DbgValue, Clobber };
+
+ Entry(const MachineInstr *Instr, EntryKind Kind)
+ : Instr(Instr, Kind), EndIndex(NoEntry) {}
+
+ const MachineInstr *getInstr() const { return Instr.getPointer(); }
+ EntryIndex getEndIndex() const { return EndIndex; }
+ EntryKind getEntryKind() const { return Instr.getInt(); }
+
+ bool isClobber() const { return getEntryKind() == Clobber; }
+ bool isDbgValue() const { return getEntryKind() == DbgValue; }
+ bool isClosed() const { return EndIndex != NoEntry; }
+
+ void endEntry(EntryIndex EndIndex);
+
+ private:
+ PointerIntPair<const MachineInstr *, 1, EntryKind> Instr;
+ EntryIndex EndIndex;
+ };
+ using Entries = SmallVector<Entry, 4>;
using InlinedEntity = std::pair<const DINode *, const DILocation *>;
- using InstrRangesMap = MapVector<InlinedEntity, InstrRanges>;
+ using EntriesMap = MapVector<InlinedEntity, Entries>;
private:
- InstrRangesMap VarInstrRanges;
+ EntriesMap VarEntries;
public:
- void startInstrRange(InlinedEntity Var, const MachineInstr &MI);
- void endInstrRange(InlinedEntity Var, const MachineInstr &MI);
-
- // Returns register currently describing @Var. If @Var is currently
- // unaccessible or is not described by a register, returns 0.
- unsigned getRegisterForVar(InlinedEntity Var) const;
-
- bool empty() const { return VarInstrRanges.empty(); }
- void clear() { VarInstrRanges.clear(); }
- InstrRangesMap::const_iterator begin() const { return VarInstrRanges.begin(); }
- InstrRangesMap::const_iterator end() const { return VarInstrRanges.end(); }
+ bool startDbgValue(InlinedEntity Var, const MachineInstr &MI,
+ EntryIndex &NewIndex);
+ EntryIndex startClobber(InlinedEntity Var, const MachineInstr &MI);
+
+ Entry &getEntry(InlinedEntity Var, EntryIndex Index) {
+ auto &Entries = VarEntries[Var];
+ return Entries[Index];
+ }
+
+ bool empty() const { return VarEntries.empty(); }
+ void clear() { VarEntries.clear(); }
+ EntriesMap::const_iterator begin() const { return VarEntries.begin(); }
+ EntriesMap::const_iterator end() const { return VarEntries.end(); }
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void dump() const;
diff --git a/include/llvm/CodeGen/DebugHandlerBase.h b/include/llvm/CodeGen/DebugHandlerBase.h
index 4f0d14d317f2..4008d597395e 100644
--- a/include/llvm/CodeGen/DebugHandlerBase.h
+++ b/include/llvm/CodeGen/DebugHandlerBase.h
@@ -1,9 +1,8 @@
//===-- llvm/CodeGen/DebugHandlerBase.h -----------------------*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -130,7 +129,7 @@ public:
const MCExpr *getFunctionLocalOffsetAfterInsn(const MachineInstr *MI);
/// If this type is derived from a base type then return base type size.
- static uint64_t getBaseTypeSize(const DITypeRef TyRef);
+ static uint64_t getBaseTypeSize(const DIType *Ty);
};
}
diff --git a/include/llvm/CodeGen/DwarfStringPoolEntry.h b/include/llvm/CodeGen/DwarfStringPoolEntry.h
index 8b1a7af17bbf..e189352a7b2d 100644
--- a/include/llvm/CodeGen/DwarfStringPoolEntry.h
+++ b/include/llvm/CodeGen/DwarfStringPoolEntry.h
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/DwarfStringPoolEntry.h - String pool entry --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/CodeGen/EdgeBundles.h b/include/llvm/CodeGen/EdgeBundles.h
index c31fad246c96..28cdf54e0575 100644
--- a/include/llvm/CodeGen/EdgeBundles.h
+++ b/include/llvm/CodeGen/EdgeBundles.h
@@ -1,9 +1,8 @@
//===-------- EdgeBundles.h - Bundles of CFG edges --------------*- c++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/CodeGen/ExecutionDomainFix.h b/include/llvm/CodeGen/ExecutionDomainFix.h
index 338c214dd073..6836678e2101 100644
--- a/include/llvm/CodeGen/ExecutionDomainFix.h
+++ b/include/llvm/CodeGen/ExecutionDomainFix.h
@@ -1,9 +1,8 @@
//==-- llvm/CodeGen/ExecutionDomainFix.h - Execution Domain Fix -*- C++ -*--==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/CodeGen/ExpandReductions.h b/include/llvm/CodeGen/ExpandReductions.h
index c6aaaad967b3..5dbed07873c1 100644
--- a/include/llvm/CodeGen/ExpandReductions.h
+++ b/include/llvm/CodeGen/ExpandReductions.h
@@ -1,9 +1,8 @@
//===----- ExpandReductions.h - Expand experimental reduction intrinsics --===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/CodeGen/FastISel.h b/include/llvm/CodeGen/FastISel.h
index 865d8a88b8cc..f09b59daf4dd 100644
--- a/include/llvm/CodeGen/FastISel.h
+++ b/include/llvm/CodeGen/FastISel.h
@@ -1,9 +1,8 @@
//===- FastISel.h - Definition of the FastISel class ------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -528,7 +527,7 @@ protected:
/// Select and emit code for a binary operator instruction, which has
/// an opcode which directly corresponds to the given ISD opcode.
bool selectBinaryOp(const User *I, unsigned ISDOpcode);
- bool selectFNeg(const User *I);
+ bool selectFNeg(const User *I, const Value *In);
bool selectGetElementPtr(const User *I);
bool selectStackmap(const CallInst *I);
bool selectPatchpoint(const CallInst *I);
diff --git a/include/llvm/CodeGen/FaultMaps.h b/include/llvm/CodeGen/FaultMaps.h
index 55e25c9823b1..a1e2349c413e 100644
--- a/include/llvm/CodeGen/FaultMaps.h
+++ b/include/llvm/CodeGen/FaultMaps.h
@@ -1,9 +1,8 @@
//===- FaultMaps.h - The "FaultMaps" section --------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/CodeGen/FunctionLoweringInfo.h b/include/llvm/CodeGen/FunctionLoweringInfo.h
index 7c658515de09..fb60191abd3a 100644
--- a/include/llvm/CodeGen/FunctionLoweringInfo.h
+++ b/include/llvm/CodeGen/FunctionLoweringInfo.h
@@ -1,9 +1,8 @@
//===- FunctionLoweringInfo.h - Lower functions from LLVM IR ---*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -14,13 +13,14 @@
#ifndef LLVM_CODEGEN_FUNCTIONLOWERINGINFO_H
#define LLVM_CODEGEN_FUNCTIONLOWERINGINFO_H
-
#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/IndexedMap.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
@@ -57,6 +57,7 @@ public:
const TargetLowering *TLI;
MachineRegisterInfo *RegInfo;
BranchProbabilityInfo *BPI;
+ const LegacyDivergenceAnalysis *DA;
/// CanLowerReturn - true iff the function's return value can be lowered to
/// registers.
bool CanLowerReturn;
@@ -71,48 +72,6 @@ public:
/// MBBMap - A mapping from LLVM basic blocks to their machine code entry.
DenseMap<const BasicBlock*, MachineBasicBlock *> MBBMap;
- /// A map from swifterror value in a basic block to the virtual register it is
- /// currently represented by.
- DenseMap<std::pair<const MachineBasicBlock *, const Value *>, unsigned>
- SwiftErrorVRegDefMap;
-
- /// A list of upward exposed vreg uses that need to be satisfied by either a
- /// copy def or a phi node at the beginning of the basic block representing
- /// the predecessor(s) swifterror value.
- DenseMap<std::pair<const MachineBasicBlock *, const Value *>, unsigned>
- SwiftErrorVRegUpwardsUse;
-
- /// A map from instructions that define/use a swifterror value to the virtual
- /// register that represents that def/use.
- llvm::DenseMap<PointerIntPair<const Instruction *, 1, bool>, unsigned>
- SwiftErrorVRegDefUses;
-
- /// The swifterror argument of the current function.
- const Value *SwiftErrorArg;
-
- using SwiftErrorValues = SmallVector<const Value*, 1>;
- /// A function can only have a single swifterror argument. And if it does
- /// have a swifterror argument, it must be the first entry in
- /// SwiftErrorVals.
- SwiftErrorValues SwiftErrorVals;
-
- /// Get or create the swifterror value virtual register in
- /// SwiftErrorVRegDefMap for this basic block.
- unsigned getOrCreateSwiftErrorVReg(const MachineBasicBlock *,
- const Value *);
-
- /// Set the swifterror virtual register in the SwiftErrorVRegDefMap for this
- /// basic block.
- void setCurrentSwiftErrorVReg(const MachineBasicBlock *MBB, const Value *,
- unsigned);
-
- /// Get or create the swifterror value virtual register for a def of a
- /// swifterror by an instruction.
- std::pair<unsigned, bool> getOrCreateSwiftErrorVRegDefAt(const Instruction *);
- std::pair<unsigned, bool>
- getOrCreateSwiftErrorVRegUseAt(const Instruction *, const MachineBasicBlock *,
- const Value *);
-
/// ValueMap - Since we emit code for the function a basic block at a time,
/// we must remember which virtual registers hold the values for
/// cross-basic-block values.
@@ -175,6 +134,10 @@ public:
/// function arguments that are inserted after scheduling is completed.
SmallVector<MachineInstr*, 8> ArgDbgValues;
+ /// Bitvector with a bit set if corresponding argument is described in
+ /// ArgDbgValues. Using arg numbers according to Argument numbering.
+ BitVector DescribedArgs;
+
/// RegFixups - Registers which need to be replaced after isel is done.
DenseMap<unsigned, unsigned> RegFixups;
@@ -236,9 +199,11 @@ public:
return ValueMap.count(V);
}
- unsigned CreateReg(MVT VT);
+ unsigned CreateReg(MVT VT, bool isDivergent = false);
+
+ unsigned CreateRegs(const Value *V);
- unsigned CreateRegs(Type *Ty);
+ unsigned CreateRegs(Type *Ty, bool isDivergent = false);
unsigned InitializeRegForValue(const Value *V) {
// Tokens never live in vregs.
@@ -247,7 +212,7 @@ public:
unsigned &R = ValueMap[V];
assert(R == 0 && "Already initialized this value register!");
assert(VirtReg2Value.empty());
- return R = CreateRegs(V->getType());
+ return R = CreateRegs(V);
}
/// GetLiveOutRegInfo - Gets LiveOutInfo for a register, returning NULL if the
diff --git a/include/llvm/CodeGen/GCMetadata.h b/include/llvm/CodeGen/GCMetadata.h
index 7fb27202c122..77cd356c49dd 100644
--- a/include/llvm/CodeGen/GCMetadata.h
+++ b/include/llvm/CodeGen/GCMetadata.h
@@ -1,9 +1,8 @@
//===- GCMetadata.h - Garbage collector metadata ----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/CodeGen/GCMetadataPrinter.h b/include/llvm/CodeGen/GCMetadataPrinter.h
index 5f1efb2ce02c..f9527c9f8752 100644
--- a/include/llvm/CodeGen/GCMetadataPrinter.h
+++ b/include/llvm/CodeGen/GCMetadataPrinter.h
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/GCMetadataPrinter.h - Prints asm GC tables --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/CodeGen/GCStrategy.h b/include/llvm/CodeGen/GCStrategy.h
index 5a60cd7cb823..c5731528da4e 100644
--- a/include/llvm/CodeGen/GCStrategy.h
+++ b/include/llvm/CodeGen/GCStrategy.h
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/GCStrategy.h - Garbage collection -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/CodeGen/GlobalISel/CSEInfo.h b/include/llvm/CodeGen/GlobalISel/CSEInfo.h
index ce2d285a99e5..5a44e67992ad 100644
--- a/include/llvm/CodeGen/GlobalISel/CSEInfo.h
+++ b/include/llvm/CodeGen/GlobalISel/CSEInfo.h
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/GlobalISel/CSEInfo.h ------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -14,6 +13,7 @@
#define LLVM_CODEGEN_GLOBALISEL_CSEINFO_H
#include "llvm/ADT/FoldingSet.h"
+#include "llvm/CodeGen/CSEConfigBase.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/GlobalISel/GISelWorkList.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
@@ -37,25 +37,27 @@ public:
void Profile(FoldingSetNodeID &ID);
};
-// Class representing some configuration that can be done during CSE analysis.
-// Currently it only supports shouldCSE method that each pass can set.
-class CSEConfig {
+// A CSE config for fully optimized builds.
+class CSEConfigFull : public CSEConfigBase {
public:
- virtual ~CSEConfig() = default;
- // Hook for defining which Generic instructions should be CSEd.
- // GISelCSEInfo currently only calls this hook when dealing with generic
- // opcodes.
- virtual bool shouldCSEOpc(unsigned Opc);
+ virtual ~CSEConfigFull() = default;
+ virtual bool shouldCSEOpc(unsigned Opc) override;
};
-// TODO: Find a better place for this.
// Commonly used for O0 config.
-class CSEConfigConstantOnly : public CSEConfig {
+class CSEConfigConstantOnly : public CSEConfigBase {
public:
virtual ~CSEConfigConstantOnly() = default;
virtual bool shouldCSEOpc(unsigned Opc) override;
};
+// Returns the standard expected CSEConfig for the given optimization level.
+// We have this logic here so targets can make use of it from their derived
+// TargetPassConfig, but can't put this logic into TargetPassConfig directly
+// because the CodeGen library can't depend on GlobalISel.
+std::unique_ptr<CSEConfigBase>
+getStandardCSEConfigForOpt(CodeGenOpt::Level Level);
+
/// The CSE Analysis object.
/// This installs itself as a delegate to the MachineFunction to track
/// new instructions as well as deletions. It however will not be able to
@@ -74,7 +76,7 @@ class GISelCSEInfo : public GISelChangeObserver {
FoldingSet<UniqueMachineInstr> CSEMap;
MachineRegisterInfo *MRI = nullptr;
MachineFunction *MF = nullptr;
- std::unique_ptr<CSEConfig> CSEOpt;
+ std::unique_ptr<CSEConfigBase> CSEOpt;
/// Keep a cache of UniqueInstrs for each MachineInstr. In GISel,
/// often instructions are mutated (while their ID has completely changed).
/// Whenever mutation happens, invalidate the UniqueMachineInstr for the
@@ -139,7 +141,9 @@ public:
void releaseMemory();
- void setCSEConfig(std::unique_ptr<CSEConfig> Opt) { CSEOpt = std::move(Opt); }
+ void setCSEConfig(std::unique_ptr<CSEConfigBase> Opt) {
+ CSEOpt = std::move(Opt);
+ }
bool shouldCSE(unsigned Opc) const;
@@ -199,11 +203,12 @@ class GISelCSEAnalysisWrapper {
bool AlreadyComputed = false;
public:
- /// Takes a CSEConfig object that defines what opcodes get CSEd.
+ /// Takes a CSEConfigBase object that defines what opcodes get CSEd.
/// If CSEConfig is already set, and the CSE Analysis has been preserved,
/// it will not use the new CSEOpt(use Recompute to force using the new
/// CSEOpt).
- GISelCSEInfo &get(std::unique_ptr<CSEConfig> CSEOpt, bool ReCompute = false);
+ GISelCSEInfo &get(std::unique_ptr<CSEConfigBase> CSEOpt,
+ bool ReCompute = false);
void setMF(MachineFunction &MFunc) { MF = &MFunc; }
void setComputed(bool Computed) { AlreadyComputed = Computed; }
void releaseMemory() { Info.releaseMemory(); }
diff --git a/include/llvm/CodeGen/GlobalISel/CSEMIRBuilder.h b/include/llvm/CodeGen/GlobalISel/CSEMIRBuilder.h
index a8fb736ebbb5..4f95335db74b 100644
--- a/include/llvm/CodeGen/GlobalISel/CSEMIRBuilder.h
+++ b/include/llvm/CodeGen/GlobalISel/CSEMIRBuilder.h
@@ -1,9 +1,8 @@
//===-- llvm/CodeGen/GlobalISel/CSEMIRBuilder.h --*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/include/llvm/CodeGen/GlobalISel/CallLowering.h b/include/llvm/CodeGen/GlobalISel/CallLowering.h
index ab498e8f070b..d717121ad78e 100644
--- a/include/llvm/CodeGen/GlobalISel/CallLowering.h
+++ b/include/llvm/CodeGen/GlobalISel/CallLowering.h
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/GlobalISel/CallLowering.h - Call lowering ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -16,6 +15,7 @@
#define LLVM_CODEGEN_GLOBALISEL_CALLLOWERING_H
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/TargetCallingConv.h"
#include "llvm/IR/CallSite.h"
@@ -27,6 +27,7 @@
namespace llvm {
+class CCState;
class DataLayout;
class Function;
class MachineIRBuilder;
@@ -43,14 +44,19 @@ class CallLowering {
virtual void anchor();
public:
struct ArgInfo {
- unsigned Reg;
+ SmallVector<Register, 4> Regs;
Type *Ty;
ISD::ArgFlagsTy Flags;
bool IsFixed;
- ArgInfo(unsigned Reg, Type *Ty, ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy{},
- bool IsFixed = true)
- : Reg(Reg), Ty(Ty), Flags(Flags), IsFixed(IsFixed) {}
+ ArgInfo(ArrayRef<Register> Regs, Type *Ty,
+ ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy{}, bool IsFixed = true)
+ : Regs(Regs.begin(), Regs.end()), Ty(Ty), Flags(Flags),
+ IsFixed(IsFixed) {
+ // FIXME: We should have just one way of saying "no register".
+ assert((Ty->isVoidTy() == (Regs.empty() || Regs[0] == 0)) &&
+ "only void types should have no register");
+ }
};
/// Argument handling is mostly uniform between the four places that
@@ -66,24 +72,28 @@ public:
virtual ~ValueHandler() = default;
+ /// Returns true if the handler is dealing with formal arguments,
+ /// not with return values etc.
+ virtual bool isArgumentHandler() const { return false; }
+
/// Materialize a VReg containing the address of the specified
/// stack-based object. This is either based on a FrameIndex or
/// direct SP manipulation, depending on the context. \p MPO
/// should be initialized to an appropriate description of the
/// address created.
- virtual unsigned getStackAddress(uint64_t Size, int64_t Offset,
+ virtual Register getStackAddress(uint64_t Size, int64_t Offset,
MachinePointerInfo &MPO) = 0;
/// The specified value has been assigned to a physical register,
/// handle the appropriate COPY (either to or from) and mark any
/// relevant uses/defines as needed.
- virtual void assignValueToReg(unsigned ValVReg, unsigned PhysReg,
+ virtual void assignValueToReg(Register ValVReg, Register PhysReg,
CCValAssign &VA) = 0;
/// The specified value has been assigned to a stack
/// location. Load or store it there, with appropriate extension
/// if necessary.
- virtual void assignValueToAddress(unsigned ValVReg, unsigned Addr,
+ virtual void assignValueToAddress(Register ValVReg, Register Addr,
uint64_t Size, MachinePointerInfo &MPO,
CCValAssign &VA) = 0;
@@ -98,7 +108,7 @@ public:
llvm_unreachable("Custom values not supported");
}
- unsigned extendRegister(unsigned ValReg, CCValAssign &VA);
+ Register extendRegister(Register ValReg, CCValAssign &VA);
virtual bool assignArg(unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo, const ArgInfo &Info,
@@ -130,39 +140,83 @@ protected:
void setArgFlags(ArgInfo &Arg, unsigned OpIdx, const DataLayout &DL,
const FuncInfoTy &FuncInfo) const;
+ /// Generate instructions for packing \p SrcRegs into one big register
+ /// corresponding to the aggregate type \p PackedTy.
+ ///
+ /// \param SrcRegs should contain one virtual register for each base type in
+ /// \p PackedTy, as returned by computeValueLLTs.
+ ///
+ /// \return The packed register.
+ Register packRegs(ArrayRef<Register> SrcRegs, Type *PackedTy,
+ MachineIRBuilder &MIRBuilder) const;
+
+ /// Generate instructions for unpacking \p SrcReg into the \p DstRegs
+ /// corresponding to the aggregate type \p PackedTy.
+ ///
+ /// \param DstRegs should contain one virtual register for each base type in
+ /// \p PackedTy, as returned by computeValueLLTs.
+ void unpackRegs(ArrayRef<Register> DstRegs, Register SrcReg, Type *PackedTy,
+ MachineIRBuilder &MIRBuilder) const;
+
/// Invoke Handler::assignArg on each of the given \p Args and then use
/// \p Callback to move them to the assigned locations.
///
/// \return True if everything has succeeded, false otherwise.
bool handleAssignments(MachineIRBuilder &MIRBuilder, ArrayRef<ArgInfo> Args,
ValueHandler &Handler) const;
-
+ bool handleAssignments(CCState &CCState,
+ SmallVectorImpl<CCValAssign> &ArgLocs,
+ MachineIRBuilder &MIRBuilder, ArrayRef<ArgInfo> Args,
+ ValueHandler &Handler) const;
public:
CallLowering(const TargetLowering *TLI) : TLI(TLI) {}
virtual ~CallLowering() = default;
+ /// \return true if the target is capable of handling swifterror values that
+ /// have been promoted to a specified register. The extended versions of
+ /// lowerReturn and lowerCall should be implemented.
+ virtual bool supportSwiftError() const {
+ return false;
+ }
+
/// This hook must be implemented to lower outgoing return values, described
/// by \p Val, into the specified virtual registers \p VRegs.
/// This hook is used by GlobalISel.
///
+ /// \p SwiftErrorVReg is non-zero if the function has a swifterror parameter
+ /// that needs to be implicitly returned.
+ ///
/// \return True if the lowering succeeds, false otherwise.
virtual bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val,
- ArrayRef<unsigned> VRegs) const {
+ ArrayRef<Register> VRegs,
+ Register SwiftErrorVReg) const {
+ if (!supportSwiftError()) {
+ assert(SwiftErrorVReg == 0 && "attempt to use unsupported swifterror");
+ return lowerReturn(MIRBuilder, Val, VRegs);
+ }
+ return false;
+ }
+
+ /// This hook behaves as the extended lowerReturn function, but for targets
+ /// that do not support swifterror value promotion.
+ virtual bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val,
+ ArrayRef<Register> VRegs) const {
return false;
}
/// This hook must be implemented to lower the incoming (formal)
- /// arguments, described by \p Args, for GlobalISel. Each argument
- /// must end up in the related virtual register described by VRegs.
- /// In other words, the first argument should end up in VRegs[0],
- /// the second in VRegs[1], and so on.
+ /// arguments, described by \p VRegs, for GlobalISel. Each argument
+ /// must end up in the related virtual registers described by \p VRegs.
+ /// In other words, the first argument should end up in \c VRegs[0],
+ /// the second in \c VRegs[1], and so on. For each argument, there will be one
+ /// register for each non-aggregate type, as returned by \c computeValueLLTs.
/// \p MIRBuilder is set to the proper insertion for the argument
/// lowering.
///
/// \return True if the lowering succeeded, false otherwise.
virtual bool lowerFormalArguments(MachineIRBuilder &MIRBuilder,
const Function &F,
- ArrayRef<unsigned> VRegs) const {
+ ArrayRef<ArrayRef<Register>> VRegs) const {
return false;
}
@@ -174,20 +228,31 @@ public:
/// \p Callee is the destination of the call. It should be either a register,
/// globaladdress, or externalsymbol.
///
- /// \p ResTy is the type returned by the function
- ///
- /// \p ResReg is the generic virtual register that the returned
- /// value should be lowered into.
+ /// \p OrigRet is a descriptor for the return type of the function.
///
- /// \p ArgTys is a list of the types each member of \p ArgRegs has; used by
- /// the target to decide which register/stack slot should be allocated.
+ /// \p OrigArgs is a list of descriptors of the arguments passed to the
+ /// function.
///
- /// \p ArgRegs is a list of virtual registers containing each argument that
- /// needs to be passed.
+ /// \p SwiftErrorVReg is non-zero if the call has a swifterror inout
+ /// parameter, and contains the vreg that the swifterror should be copied into
+ /// after the call.
///
/// \return true if the lowering succeeded, false otherwise.
virtual bool lowerCall(MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv,
const MachineOperand &Callee, const ArgInfo &OrigRet,
+ ArrayRef<ArgInfo> OrigArgs,
+ Register SwiftErrorVReg) const {
+ if (!supportSwiftError()) {
+ assert(SwiftErrorVReg == 0 && "trying to use unsupported swifterror");
+ return lowerCall(MIRBuilder, CallConv, Callee, OrigRet, OrigArgs);
+ }
+ return false;
+ }
+
+ /// This hook behaves as the extended lowerCall function, but for targets that
+ /// do not support swifterror value promotion.
+ virtual bool lowerCall(MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv,
+ const MachineOperand &Callee, const ArgInfo &OrigRet,
ArrayRef<ArgInfo> OrigArgs) const {
return false;
}
@@ -197,11 +262,18 @@ public:
///
/// \p CI is the call/invoke instruction.
///
- /// \p ResReg is a register where the call's return value should be stored (or
- /// 0 if there is no return value).
+ /// \p ResRegs are the registers where the call's return value should be
+ /// stored (or 0 if there is no return value). There will be one register for
+ /// each non-aggregate type, as returned by \c computeValueLLTs.
+ ///
+ /// \p ArgRegs is a list of lists of virtual registers containing each
+ /// argument that needs to be passed (argument \c i should be placed in \c
+ /// ArgRegs[i]). For each argument, there will be one register for each
+ /// non-aggregate type, as returned by \c computeValueLLTs.
///
- /// \p ArgRegs is a list of virtual registers containing each argument that
- /// needs to be passed.
+ /// \p SwiftErrorVReg is non-zero if the call has a swifterror inout
+ /// parameter, and contains the vreg that the swifterror should be copied into
+ /// after the call.
///
/// \p GetCalleeReg is a callback to materialize a register for the callee if
/// the target determines it cannot jump to the destination based purely on \p
@@ -210,7 +282,8 @@ public:
///
/// \return true if the lowering succeeded, false otherwise.
bool lowerCall(MachineIRBuilder &MIRBuilder, ImmutableCallSite CS,
- unsigned ResReg, ArrayRef<unsigned> ArgRegs,
+ ArrayRef<Register> ResRegs,
+ ArrayRef<ArrayRef<Register>> ArgRegs, Register SwiftErrorVReg,
std::function<unsigned()> GetCalleeReg) const;
};
diff --git a/include/llvm/CodeGen/GlobalISel/Combiner.h b/include/llvm/CodeGen/GlobalISel/Combiner.h
index b097c7817762..efe8bdf93664 100644
--- a/include/llvm/CodeGen/GlobalISel/Combiner.h
+++ b/include/llvm/CodeGen/GlobalISel/Combiner.h
@@ -1,9 +1,8 @@
-//== ----- llvm/CodeGen/GlobalISel/Combiner.h --------------------- == //
+//== ----- llvm/CodeGen/GlobalISel/Combiner.h -------------------*- C++ -*-== //
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 6e9ac01c1ee2..0c50c9c5e0cf 100644
--- a/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -1,9 +1,8 @@
//===-- llvm/CodeGen/GlobalISel/CombinerHelper.h --------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===--------------------------------------------------------------------===//
//
@@ -18,6 +17,9 @@
#ifndef LLVM_CODEGEN_GLOBALISEL_COMBINER_HELPER_H
#define LLVM_CODEGEN_GLOBALISEL_COMBINER_HELPER_H
+#include "llvm/CodeGen/LowLevelType.h"
+#include "llvm/CodeGen/Register.h"
+
namespace llvm {
class GISelChangeObserver;
@@ -26,6 +28,12 @@ class MachineRegisterInfo;
class MachineInstr;
class MachineOperand;
+struct PreferredTuple {
+ LLT Ty; // The result type of the extend.
+ unsigned ExtendOpcode; // G_ANYEXT/G_SEXT/G_ZEXT
+ MachineInstr *MI;
+};
+
class CombinerHelper {
MachineIRBuilder &Builder;
MachineRegisterInfo &MRI;
@@ -35,20 +43,27 @@ public:
CombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B);
/// MachineRegisterInfo::replaceRegWith() and inform the observer of the changes
- void replaceRegWith(MachineRegisterInfo &MRI, unsigned FromReg, unsigned ToReg) const;
+ void replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const;
/// Replace a single register operand with a new register and inform the
/// observer of the changes.
void replaceRegOpWith(MachineRegisterInfo &MRI, MachineOperand &FromRegOp,
- unsigned ToReg) const;
+ Register ToReg) const;
/// If \p MI is COPY, try to combine it.
/// Returns true if MI changed.
bool tryCombineCopy(MachineInstr &MI);
+ bool matchCombineCopy(MachineInstr &MI);
+ void applyCombineCopy(MachineInstr &MI);
/// If \p MI is extend that consumes the result of a load, try to combine it.
/// Returns true if MI changed.
bool tryCombineExtendingLoads(MachineInstr &MI);
+ bool matchCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo);
+ void applyCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo);
+
+ bool matchCombineBr(MachineInstr &MI);
+ bool tryCombineBr(MachineInstr &MI);
/// Try to transform \p MI by using all of the above
/// combine functions. Returns true if changed.
diff --git a/include/llvm/CodeGen/GlobalISel/CombinerInfo.h b/include/llvm/CodeGen/GlobalISel/CombinerInfo.h
index d21aa3f725d9..3b09a8e2b479 100644
--- a/include/llvm/CodeGen/GlobalISel/CombinerInfo.h
+++ b/include/llvm/CodeGen/GlobalISel/CombinerInfo.h
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/GlobalISel/CombinerInfo.h ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/CodeGen/GlobalISel/ConstantFoldingMIRBuilder.h b/include/llvm/CodeGen/GlobalISel/ConstantFoldingMIRBuilder.h
index 220a571b21db..e817d9b4550e 100644
--- a/include/llvm/CodeGen/GlobalISel/ConstantFoldingMIRBuilder.h
+++ b/include/llvm/CodeGen/GlobalISel/ConstantFoldingMIRBuilder.h
@@ -1,9 +1,8 @@
//===-- llvm/CodeGen/GlobalISel/ConstantFoldingMIRBuilder.h --*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/include/llvm/CodeGen/GlobalISel/GISelChangeObserver.h b/include/llvm/CodeGen/GlobalISel/GISelChangeObserver.h
index c8e8a7a5a7cb..e5691cb35174 100644
--- a/include/llvm/CodeGen/GlobalISel/GISelChangeObserver.h
+++ b/include/llvm/CodeGen/GlobalISel/GISelChangeObserver.h
@@ -1,9 +1,8 @@
-//===----- llvm/CodeGen/GlobalISel/GISelChangeObserver.h ------------------===//
+//===----- llvm/CodeGen/GlobalISel/GISelChangeObserver.h --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -34,10 +33,17 @@ public:
/// An instruction is about to be erased.
virtual void erasingInstr(MachineInstr &MI) = 0;
- /// An instruction was created and inserted into the function.
+
+ /// An instruction has been created and inserted into the function.
+ /// Note that the instruction might not be a fully fledged instruction at this
+ /// point and won't be if the MachineFunction::Delegate is calling it. This is
+ /// because the delegate only sees the construction of the MachineInstr before
+ /// operands have been added.
virtual void createdInstr(MachineInstr &MI) = 0;
+
/// This instruction is about to be mutated in some way.
virtual void changingInstr(MachineInstr &MI) = 0;
+
/// This instruction was mutated in some way.
virtual void changedInstr(MachineInstr &MI) = 0;
diff --git a/include/llvm/CodeGen/GlobalISel/GISelWorkList.h b/include/llvm/CodeGen/GlobalISel/GISelWorkList.h
index 1571841a208d..b0bb519283b1 100644
--- a/include/llvm/CodeGen/GlobalISel/GISelWorkList.h
+++ b/include/llvm/CodeGen/GlobalISel/GISelWorkList.h
@@ -1,9 +1,8 @@
//===- GISelWorkList.h - Worklist for GISel passes ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -33,23 +32,61 @@ class GISelWorkList {
SmallVector<MachineInstr *, N> Worklist;
DenseMap<MachineInstr *, unsigned> WorklistMap;
+#ifndef NDEBUG
+ bool Finalized = true;
+#endif
+
public:
- GISelWorkList() {}
+ GISelWorkList() : WorklistMap(N) {}
bool empty() const { return WorklistMap.empty(); }
unsigned size() const { return WorklistMap.size(); }
+ // Since we don't know ahead of time how many instructions we're going to add
+ // to the worklist, and migrating densemap's elements is quite expensive
+ // everytime we resize, only insert to the smallvector (typically during the
+ // initial phase of populating lists). Before the worklist can be used,
+ // finalize should be called. Also assert with NDEBUG if list is ever used
+ // without finalizing. Note that unlike insert, we won't check for duplicates
+ // - so the ideal place to use this is during the initial prepopulating phase
+ // of most passes.
+ void deferred_insert(MachineInstr *I) {
+ Worklist.push_back(I);
+#ifndef NDEBUG
+ Finalized = false;
+#endif
+ }
+
+ // This should only be called when using deferred_insert.
+ // This asserts that the WorklistMap is empty, and then
+ // inserts all the elements in the Worklist into the map.
+ // It also asserts if there are any duplicate elements found.
+ void finalize() {
+ assert(WorklistMap.empty() && "Expecting empty worklistmap");
+ if (Worklist.size() > N)
+ WorklistMap.reserve(Worklist.size());
+ for (unsigned i = 0; i < Worklist.size(); ++i)
+ if (!WorklistMap.try_emplace(Worklist[i], i).second)
+ llvm_unreachable("Duplicate elements in the list");
+#ifndef NDEBUG
+ Finalized = true;
+#endif
+ }
+
/// Add the specified instruction to the worklist if it isn't already in it.
void insert(MachineInstr *I) {
+ assert(Finalized && "GISelWorkList used without finalizing");
if (WorklistMap.try_emplace(I, Worklist.size()).second)
Worklist.push_back(I);
}
/// Remove I from the worklist if it exists.
void remove(const MachineInstr *I) {
+ assert((Finalized || WorklistMap.empty()) && "Neither finalized nor empty");
auto It = WorklistMap.find(I);
- if (It == WorklistMap.end()) return; // Not in worklist.
+ if (It == WorklistMap.end())
+ return; // Not in worklist.
// Don't bother moving everything down, just null out the slot.
Worklist[It->second] = nullptr;
@@ -63,6 +100,7 @@ public:
}
MachineInstr *pop_back_val() {
+ assert(Finalized && "GISelWorkList used without finalizing");
MachineInstr *I;
do {
I = Worklist.pop_back_val();
diff --git a/include/llvm/CodeGen/GlobalISel/IRTranslator.h b/include/llvm/CodeGen/GlobalISel/IRTranslator.h
index d1770bf6e4ce..8654ba83f08d 100644
--- a/include/llvm/CodeGen/GlobalISel/IRTranslator.h
+++ b/include/llvm/CodeGen/GlobalISel/IRTranslator.h
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/GlobalISel/IRTranslator.h - IRTranslator ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -23,7 +22,9 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Types.h"
+#include "llvm/CodeGen/SwiftErrorValueTracking.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/SwitchLoweringUtils.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/Support/Allocator.h"
#include <memory>
@@ -37,6 +38,7 @@ class CallInst;
class CallLowering;
class Constant;
class DataLayout;
+class FunctionLoweringInfo;
class Instruction;
class MachineBasicBlock;
class MachineFunction;
@@ -69,7 +71,7 @@ private:
public:
ValueToVRegInfo() = default;
- using VRegListT = SmallVector<unsigned, 1>;
+ using VRegListT = SmallVector<Register, 1>;
using OffsetListT = SmallVector<uint64_t, 1>;
using const_vreg_iterator =
@@ -164,6 +166,8 @@ private:
/// this function.
DenseMap<const AllocaInst *, int> FrameIndices;
+ SwiftErrorValueTracking SwiftError;
+
/// \name Methods for translating form LLVM IR to MachineInstr.
/// \see ::translate for general information on the translate methods.
/// @{
@@ -196,7 +200,7 @@ private:
/// the function.
///
/// \return true if the materialization succeeded.
- bool translate(const Constant &C, unsigned Reg);
+ bool translate(const Constant &C, Register Reg);
/// Translate an LLVM bitcast into generic IR. Either a COPY or a G_BITCAST is
/// emitted.
@@ -212,24 +216,27 @@ private:
bool translateMemfunc(const CallInst &CI, MachineIRBuilder &MIRBuilder,
unsigned ID);
- void getStackGuard(unsigned DstReg, MachineIRBuilder &MIRBuilder);
+ void getStackGuard(Register DstReg, MachineIRBuilder &MIRBuilder);
bool translateOverflowIntrinsic(const CallInst &CI, unsigned Op,
MachineIRBuilder &MIRBuilder);
+ /// Helper function for translateSimpleIntrinsic.
+ /// \return The generic opcode for \p IntrinsicID if \p IntrinsicID is a
+ /// simple intrinsic (ceil, fabs, etc.). Otherwise, returns
+ /// Intrinsic::not_intrinsic.
+ unsigned getSimpleIntrinsicOpcode(Intrinsic::ID ID);
+
+ /// Translates the intrinsics defined in getSimpleIntrinsicOpcode.
+ /// \return true if the translation succeeded.
+ bool translateSimpleIntrinsic(const CallInst &CI, Intrinsic::ID ID,
+ MachineIRBuilder &MIRBuilder);
+
bool translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
MachineIRBuilder &MIRBuilder);
bool translateInlineAsm(const CallInst &CI, MachineIRBuilder &MIRBuilder);
- // FIXME: temporary function to expose previous interface to call lowering
- // until it is refactored.
- /// Combines all component registers of \p V into a single scalar with size
- /// "max(Offsets) + last size".
- unsigned packRegs(const Value &V, MachineIRBuilder &MIRBuilder);
-
- void unpackRegs(const Value &V, unsigned Src, MachineIRBuilder &MIRBuilder);
-
/// Returns true if the value should be split into multiple LLTs.
/// If \p Offsets is given then the split type's offsets will be stored in it.
/// If \p Offsets is not empty it will be cleared first.
@@ -242,6 +249,8 @@ private:
bool translateInvoke(const User &U, MachineIRBuilder &MIRBuilder);
+ bool translateCallBr(const User &U, MachineIRBuilder &MIRBuilder);
+
bool translateLandingPad(const User &U, MachineIRBuilder &MIRBuilder);
/// Translate one of LLVM's cast instructions into MachineInstrs, with the
@@ -278,7 +287,42 @@ private:
/// \pre \p U is a branch instruction.
bool translateBr(const User &U, MachineIRBuilder &MIRBuilder);
+ // Begin switch lowering functions.
+ bool emitJumpTableHeader(SwitchCG::JumpTable &JT,
+ SwitchCG::JumpTableHeader &JTH,
+ MachineBasicBlock *HeaderBB);
+ void emitJumpTable(SwitchCG::JumpTable &JT, MachineBasicBlock *MBB);
+
+ void emitSwitchCase(SwitchCG::CaseBlock &CB, MachineBasicBlock *SwitchBB,
+ MachineIRBuilder &MIB);
+
+ bool lowerJumpTableWorkItem(SwitchCG::SwitchWorkListItem W,
+ MachineBasicBlock *SwitchMBB,
+ MachineBasicBlock *CurMBB,
+ MachineBasicBlock *DefaultMBB,
+ MachineIRBuilder &MIB,
+ MachineFunction::iterator BBI,
+ BranchProbability UnhandledProbs,
+ SwitchCG::CaseClusterIt I,
+ MachineBasicBlock *Fallthrough,
+ bool FallthroughUnreachable);
+
+ bool lowerSwitchRangeWorkItem(SwitchCG::CaseClusterIt I,
+ Value *Cond,
+ MachineBasicBlock *Fallthrough,
+ bool FallthroughUnreachable,
+ BranchProbability UnhandledProbs,
+ MachineBasicBlock *CurMBB,
+ MachineIRBuilder &MIB,
+ MachineBasicBlock *SwitchMBB);
+
+ bool lowerSwitchWorkItem(SwitchCG::SwitchWorkListItem W, Value *Cond,
+ MachineBasicBlock *SwitchMBB,
+ MachineBasicBlock *DefaultMBB,
+ MachineIRBuilder &MIB);
+
bool translateSwitch(const User &U, MachineIRBuilder &MIRBuilder);
+ // End switch lowering section.
bool translateIndirectBr(const User &U, MachineIRBuilder &MIRBuilder);
@@ -404,6 +448,7 @@ private:
bool translateAtomicCmpXchg(const User &U, MachineIRBuilder &MIRBuilder);
bool translateAtomicRMW(const User &U, MachineIRBuilder &MIRBuilder);
+ bool translateFence(const User &U, MachineIRBuilder &MIRBuilder);
// Stubs to keep the compiler happy while we implement the rest of the
// translation.
@@ -419,9 +464,6 @@ private:
bool translateCatchSwitch(const User &U, MachineIRBuilder &MIRBuilder) {
return false;
}
- bool translateFence(const User &U, MachineIRBuilder &MIRBuilder) {
- return false;
- }
bool translateAddrSpaceCast(const User &U, MachineIRBuilder &MIRBuilder) {
return translateCast(TargetOpcode::G_ADDRSPACE_CAST, U, MIRBuilder);
}
@@ -466,19 +508,50 @@ private:
/// Current optimization remark emitter. Used to report failures.
std::unique_ptr<OptimizationRemarkEmitter> ORE;
+ FunctionLoweringInfo FuncInfo;
+
+ // True when either the Target Machine specifies no optimizations or the
+ // function has the optnone attribute.
+ bool EnableOpts = false;
+
+ /// Switch analysis and optimization.
+ class GISelSwitchLowering : public SwitchCG::SwitchLowering {
+ public:
+ GISelSwitchLowering(IRTranslator *irt, FunctionLoweringInfo &funcinfo)
+ : SwitchLowering(funcinfo), IRT(irt) {
+ assert(irt && "irt is null!");
+ }
+
+ virtual void addSuccessorWithProb(
+ MachineBasicBlock *Src, MachineBasicBlock *Dst,
+ BranchProbability Prob = BranchProbability::getUnknown()) override {
+ IRT->addSuccessorWithProb(Src, Dst, Prob);
+ }
+
+ virtual ~GISelSwitchLowering() = default;
+
+ private:
+ IRTranslator *IRT;
+ };
+
+ std::unique_ptr<GISelSwitchLowering> SL;
+
// * Insert all the code needed to materialize the constants
// at the proper place. E.g., Entry block or dominator block
// of each constant depending on how fancy we want to be.
// * Clear the different maps.
void finalizeFunction();
+ // Handle emitting jump tables for each basic block.
+ void finalizeBasicBlock();
+
/// Get the VRegs that represent \p Val.
/// Non-aggregate types have just one corresponding VReg and the list can be
/// used as a single "unsigned". Aggregates get flattened. If such VRegs do
/// not exist, they are created.
- ArrayRef<unsigned> getOrCreateVRegs(const Value &Val);
+ ArrayRef<Register> getOrCreateVRegs(const Value &Val);
- unsigned getOrCreateVReg(const Value &Val) {
+ Register getOrCreateVReg(const Value &Val) {
auto Regs = getOrCreateVRegs(Val);
if (Regs.empty())
return 0;
@@ -522,6 +595,14 @@ private:
return SmallVector<MachineBasicBlock *, 4>(1, &getMBB(*Edge.first));
}
+ /// Return branch probability calculated by BranchProbabilityInfo for IR
+ /// blocks.
+ BranchProbability getEdgeProbability(const MachineBasicBlock *Src,
+ const MachineBasicBlock *Dst) const;
+
+ void addSuccessorWithProb(MachineBasicBlock *Src, MachineBasicBlock *Dst,
+ BranchProbability Prob);
+
public:
// Ctor, nothing fancy.
IRTranslator();
diff --git a/include/llvm/CodeGen/GlobalISel/InstructionSelect.h b/include/llvm/CodeGen/GlobalISel/InstructionSelect.h
index 01521c46ab6a..1af46e0a9e76 100644
--- a/include/llvm/CodeGen/GlobalISel/InstructionSelect.h
+++ b/include/llvm/CodeGen/GlobalISel/InstructionSelect.h
@@ -1,9 +1,8 @@
//== llvm/CodeGen/GlobalISel/InstructionSelect.h -----------------*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file This file describes the interface of the MachineFunctionPass
diff --git a/include/llvm/CodeGen/GlobalISel/InstructionSelector.h b/include/llvm/CodeGen/GlobalISel/InstructionSelector.h
index 471def7f45a3..e9b93be76754 100644
--- a/include/llvm/CodeGen/GlobalISel/InstructionSelector.h
+++ b/include/llvm/CodeGen/GlobalISel/InstructionSelector.h
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/GlobalISel/InstructionSelector.h ------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -139,6 +138,16 @@ enum {
/// - MMOIdx - MMO index
/// - Size - The size in bytes of the memory access
GIM_CheckMemorySizeEqualTo,
+
+ /// Check the address space of the memory access for the given machine memory
+ /// operand.
+ /// - InsnID - Instruction ID
+ /// - MMOIdx - MMO index
+ /// - NumAddrSpace - Number of valid address spaces
+ /// - AddrSpaceN - An allowed space of the memory access
+ /// - AddrSpaceN+1 ...
+ GIM_CheckMemoryAddressSpace,
+
/// Check the size of the memory access for the given machine memory operand
/// against the size of an operand.
/// - InsnID - Instruction ID
diff --git a/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h b/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h
index 2003a79f6b20..e8ee4af0cb0b 100644
--- a/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h
+++ b/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -371,6 +370,45 @@ bool InstructionSelector::executeMatchTable(
return false;
break;
}
+ case GIM_CheckMemoryAddressSpace: {
+ int64_t InsnID = MatchTable[CurrentIdx++];
+ int64_t MMOIdx = MatchTable[CurrentIdx++];
+ // This accepts a list of possible address spaces.
+ const int NumAddrSpace = MatchTable[CurrentIdx++];
+
+ if (State.MIs[InsnID]->getNumMemOperands() <= MMOIdx) {
+ if (handleReject() == RejectAndGiveUp)
+ return false;
+ break;
+ }
+
+ // Need to still jump to the end of the list of address spaces if we find
+ // a match earlier.
+ const uint64_t LastIdx = CurrentIdx + NumAddrSpace;
+
+ const MachineMemOperand *MMO
+ = *(State.MIs[InsnID]->memoperands_begin() + MMOIdx);
+ const unsigned MMOAddrSpace = MMO->getAddrSpace();
+
+ bool Success = false;
+ for (int I = 0; I != NumAddrSpace; ++I) {
+ unsigned AddrSpace = MatchTable[CurrentIdx++];
+ DEBUG_WITH_TYPE(
+ TgtInstructionSelector::getName(),
+ dbgs() << "addrspace(" << MMOAddrSpace << ") vs "
+ << AddrSpace << '\n');
+
+ if (AddrSpace == MMOAddrSpace) {
+ Success = true;
+ break;
+ }
+ }
+
+ CurrentIdx = LastIdx;
+ if (!Success && handleReject() == RejectAndGiveUp)
+ return false;
+ break;
+ }
case GIM_CheckMemorySizeEqualTo: {
int64_t InsnID = MatchTable[CurrentIdx++];
int64_t MMOIdx = MatchTable[CurrentIdx++];
@@ -438,15 +476,15 @@ bool InstructionSelector::executeMatchTable(
unsigned Size = MRI.getType(MO.getReg()).getSizeInBits();
if (MatcherOpcode == GIM_CheckMemorySizeEqualToLLT &&
- MMO->getSize() * 8 != Size) {
+ MMO->getSizeInBits() != Size) {
if (handleReject() == RejectAndGiveUp)
return false;
} else if (MatcherOpcode == GIM_CheckMemorySizeLessThanLLT &&
- MMO->getSize() * 8 >= Size) {
+ MMO->getSizeInBits() >= Size) {
if (handleReject() == RejectAndGiveUp)
return false;
} else if (MatcherOpcode == GIM_CheckMemorySizeGreaterThanLLT &&
- MMO->getSize() * 8 <= Size)
+ MMO->getSizeInBits() <= Size)
if (handleReject() == RejectAndGiveUp)
return false;
@@ -479,17 +517,19 @@ bool InstructionSelector::executeMatchTable(
<< InsnID << "]->getOperand(" << OpIdx
<< "), SizeInBits=" << SizeInBits << ")\n");
assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
+ MachineOperand &MO = State.MIs[InsnID]->getOperand(OpIdx);
+ const LLT Ty = MRI.getType(MO.getReg());
+
// iPTR must be looked up in the target.
if (SizeInBits == 0) {
MachineFunction *MF = State.MIs[InsnID]->getParent()->getParent();
- SizeInBits = MF->getDataLayout().getPointerSizeInBits(0);
+ const unsigned AddrSpace = Ty.getAddressSpace();
+ SizeInBits = MF->getDataLayout().getPointerSizeInBits(AddrSpace);
}
assert(SizeInBits != 0 && "Pointer size must be known");
- MachineOperand &MO = State.MIs[InsnID]->getOperand(OpIdx);
if (MO.isReg()) {
- const LLT &Ty = MRI.getType(MO.getReg());
if (!Ty.isPointer() || Ty.getSizeInBits() != SizeInBits)
if (handleReject() == RejectAndGiveUp)
return false;
diff --git a/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
index 20bec7650179..a22778b8848c 100644
--- a/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
+++ b/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
@@ -1,9 +1,8 @@
//===-- llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h -----*- C++ -*-//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// This file contains some helper functions which try to cleanup artifacts
@@ -29,6 +28,18 @@ class LegalizationArtifactCombiner {
MachineRegisterInfo &MRI;
const LegalizerInfo &LI;
+ static bool isArtifactCast(unsigned Opc) {
+ switch (Opc) {
+ case TargetOpcode::G_TRUNC:
+ case TargetOpcode::G_SEXT:
+ case TargetOpcode::G_ZEXT:
+ case TargetOpcode::G_ANYEXT:
+ return true;
+ default:
+ return false;
+ }
+ }
+
public:
LegalizationArtifactCombiner(MachineIRBuilder &B, MachineRegisterInfo &MRI,
const LegalizerInfo &LI)
@@ -40,11 +51,11 @@ public:
return false;
Builder.setInstr(MI);
- unsigned DstReg = MI.getOperand(0).getReg();
- unsigned SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg());
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg());
// aext(trunc x) - > aext/copy/trunc x
- unsigned TruncSrc;
+ Register TruncSrc;
if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) {
LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;);
Builder.buildAnyExtOrTrunc(DstReg, TruncSrc);
@@ -53,7 +64,7 @@ public:
}
// aext([asz]ext x) -> [asz]ext x
- unsigned ExtSrc;
+ Register ExtSrc;
MachineInstr *ExtMI;
if (mi_match(SrcReg, MRI,
m_all_of(m_MInstr(ExtMI), m_any_of(m_GAnyExt(m_Reg(ExtSrc)),
@@ -63,6 +74,20 @@ public:
markInstAndDefDead(MI, *ExtMI, DeadInsts);
return true;
}
+
+ // Try to fold aext(g_constant) when the larger constant type is legal.
+ // Can't use MIPattern because we don't have a specific constant in mind.
+ auto *SrcMI = MRI.getVRegDef(SrcReg);
+ if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) {
+ const LLT &DstTy = MRI.getType(DstReg);
+ if (isInstLegal({TargetOpcode::G_CONSTANT, {DstTy}})) {
+ auto &CstVal = SrcMI->getOperand(1);
+ Builder.buildConstant(
+ DstReg, CstVal.getCImm()->getValue().sext(DstTy.getSizeInBits()));
+ markInstAndDefDead(MI, *SrcMI, DeadInsts);
+ return true;
+ }
+ }
return tryFoldImplicitDef(MI, DeadInsts);
}
@@ -73,25 +98,39 @@ public:
return false;
Builder.setInstr(MI);
- unsigned DstReg = MI.getOperand(0).getReg();
- unsigned SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg());
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg());
// zext(trunc x) - > and (aext/copy/trunc x), mask
- unsigned TruncSrc;
+ Register TruncSrc;
if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) {
LLT DstTy = MRI.getType(DstReg);
if (isInstUnsupported({TargetOpcode::G_AND, {DstTy}}) ||
- isInstUnsupported({TargetOpcode::G_CONSTANT, {DstTy}}))
+ isConstantUnsupported(DstTy))
return false;
LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;);
LLT SrcTy = MRI.getType(SrcReg);
- APInt Mask = APInt::getAllOnesValue(SrcTy.getSizeInBits());
+ APInt Mask = APInt::getAllOnesValue(SrcTy.getScalarSizeInBits());
auto MIBMask = Builder.buildConstant(DstTy, Mask.getZExtValue());
Builder.buildAnd(DstReg, Builder.buildAnyExtOrTrunc(DstTy, TruncSrc),
MIBMask);
markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts);
return true;
}
+
+ // Try to fold zext(g_constant) when the larger constant type is legal.
+ // Can't use MIPattern because we don't have a specific constant in mind.
+ auto *SrcMI = MRI.getVRegDef(SrcReg);
+ if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) {
+ const LLT &DstTy = MRI.getType(DstReg);
+ if (isInstLegal({TargetOpcode::G_CONSTANT, {DstTy}})) {
+ auto &CstVal = SrcMI->getOperand(1);
+ Builder.buildConstant(
+ DstReg, CstVal.getCImm()->getValue().zext(DstTy.getSizeInBits()));
+ markInstAndDefDead(MI, *SrcMI, DeadInsts);
+ return true;
+ }
+ }
return tryFoldImplicitDef(MI, DeadInsts);
}
@@ -102,20 +141,22 @@ public:
return false;
Builder.setInstr(MI);
- unsigned DstReg = MI.getOperand(0).getReg();
- unsigned SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg());
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg());
// sext(trunc x) - > ashr (shl (aext/copy/trunc x), c), c
- unsigned TruncSrc;
+ Register TruncSrc;
if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) {
LLT DstTy = MRI.getType(DstReg);
- if (isInstUnsupported({TargetOpcode::G_SHL, {DstTy}}) ||
- isInstUnsupported({TargetOpcode::G_ASHR, {DstTy}}) ||
- isInstUnsupported({TargetOpcode::G_CONSTANT, {DstTy}}))
+ // Guess on the RHS shift amount type, which should be re-legalized if
+ // applicable.
+ if (isInstUnsupported({TargetOpcode::G_SHL, {DstTy, DstTy}}) ||
+ isInstUnsupported({TargetOpcode::G_ASHR, {DstTy, DstTy}}) ||
+ isConstantUnsupported(DstTy))
return false;
LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;);
LLT SrcTy = MRI.getType(SrcReg);
- unsigned ShAmt = DstTy.getSizeInBits() - SrcTy.getSizeInBits();
+ unsigned ShAmt = DstTy.getScalarSizeInBits() - SrcTy.getScalarSizeInBits();
auto MIBShAmt = Builder.buildConstant(DstTy, ShAmt);
auto MIBShl = Builder.buildInstr(
TargetOpcode::G_SHL, {DstTy},
@@ -138,7 +179,7 @@ public:
if (MachineInstr *DefMI = getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF,
MI.getOperand(1).getReg(), MRI)) {
Builder.setInstr(MI);
- unsigned DstReg = MI.getOperand(0).getReg();
+ Register DstReg = MI.getOperand(0).getReg();
LLT DstTy = MRI.getType(DstReg);
if (Opcode == TargetOpcode::G_ANYEXT) {
@@ -150,7 +191,7 @@ public:
} else {
// G_[SZ]EXT (G_IMPLICIT_DEF) -> G_CONSTANT 0 because the top
// bits will be 0 for G_ZEXT and 0/1 for the G_SEXT.
- if (isInstUnsupported({TargetOpcode::G_CONSTANT, {DstTy}}))
+ if (isConstantUnsupported(DstTy))
return false;
LLVM_DEBUG(dbgs() << ".. Combine G_[SZ]EXT(G_IMPLICIT_DEF): " << MI;);
Builder.buildConstant(DstReg, 0);
@@ -162,6 +203,16 @@ public:
return false;
}
+ static unsigned getMergeOpcode(LLT OpTy, LLT DestTy) {
+ if (OpTy.isVector() && DestTy.isVector())
+ return TargetOpcode::G_CONCAT_VECTORS;
+
+ if (OpTy.isVector() && !DestTy.isVector())
+ return TargetOpcode::G_BUILD_VECTOR;
+
+ return TargetOpcode::G_MERGE_VALUES;
+ }
+
bool tryCombineMerges(MachineInstr &MI,
SmallVectorImpl<MachineInstr *> &DeadInsts) {
@@ -169,27 +220,33 @@ public:
return false;
unsigned NumDefs = MI.getNumOperands() - 1;
+ MachineInstr *SrcDef =
+ getDefIgnoringCopies(MI.getOperand(NumDefs).getReg(), MRI);
+ if (!SrcDef)
+ return false;
- unsigned MergingOpcode;
LLT OpTy = MRI.getType(MI.getOperand(NumDefs).getReg());
LLT DestTy = MRI.getType(MI.getOperand(0).getReg());
- if (OpTy.isVector() && DestTy.isVector())
- MergingOpcode = TargetOpcode::G_CONCAT_VECTORS;
- else if (OpTy.isVector() && !DestTy.isVector())
- MergingOpcode = TargetOpcode::G_BUILD_VECTOR;
- else
- MergingOpcode = TargetOpcode::G_MERGE_VALUES;
-
- MachineInstr *MergeI =
- getOpcodeDef(MergingOpcode, MI.getOperand(NumDefs).getReg(), MRI);
+ MachineInstr *MergeI = SrcDef;
+ unsigned ConvertOp = 0;
+
+ // Handle intermediate conversions
+ unsigned SrcOp = SrcDef->getOpcode();
+ if (isArtifactCast(SrcOp)) {
+ ConvertOp = SrcOp;
+ MergeI = getDefIgnoringCopies(SrcDef->getOperand(1).getReg(), MRI);
+ }
- if (!MergeI)
+ // FIXME: Handle scalarizing concat_vectors (scalar result type with vector
+ // source)
+ unsigned MergingOpcode = getMergeOpcode(OpTy, DestTy);
+ if (!MergeI || MergeI->getOpcode() != MergingOpcode)
return false;
const unsigned NumMergeRegs = MergeI->getNumOperands() - 1;
if (NumMergeRegs < NumDefs) {
- if (NumDefs % NumMergeRegs != 0)
+ if (ConvertOp != 0 || NumDefs % NumMergeRegs != 0)
return false;
Builder.setInstr(MI);
@@ -202,7 +259,7 @@ public:
const unsigned NewNumDefs = NumDefs / NumMergeRegs;
for (unsigned Idx = 0; Idx < NumMergeRegs; ++Idx) {
- SmallVector<unsigned, 2> DstRegs;
+ SmallVector<Register, 2> DstRegs;
for (unsigned j = 0, DefIdx = Idx * NewNumDefs; j < NewNumDefs;
++j, ++DefIdx)
DstRegs.push_back(MI.getOperand(DefIdx).getReg());
@@ -211,7 +268,7 @@ public:
}
} else if (NumMergeRegs > NumDefs) {
- if (NumMergeRegs % NumDefs != 0)
+ if (ConvertOp != 0 || NumMergeRegs % NumDefs != 0)
return false;
Builder.setInstr(MI);
@@ -224,7 +281,7 @@ public:
const unsigned NumRegs = NumMergeRegs / NumDefs;
for (unsigned DefIdx = 0; DefIdx < NumDefs; ++DefIdx) {
- SmallVector<unsigned, 2> Regs;
+ SmallVector<Register, 2> Regs;
for (unsigned j = 0, Idx = NumRegs * DefIdx + 1; j < NumRegs;
++j, ++Idx)
Regs.push_back(MergeI->getOperand(Idx).getReg());
@@ -233,10 +290,22 @@ public:
}
} else {
+ LLT MergeSrcTy = MRI.getType(MergeI->getOperand(1).getReg());
+ if (ConvertOp) {
+ Builder.setInstr(MI);
+
+ for (unsigned Idx = 0; Idx < NumDefs; ++Idx) {
+ Register MergeSrc = MergeI->getOperand(Idx + 1).getReg();
+ Builder.buildInstr(ConvertOp, {MI.getOperand(Idx).getReg()},
+ {MergeSrc});
+ }
+
+ markInstAndDefDead(MI, *MergeI, DeadInsts);
+ return true;
+ }
// FIXME: is a COPY appropriate if the types mismatch? We know both
// registers are allocatable by now.
- if (MRI.getType(MI.getOperand(0).getReg()) !=
- MRI.getType(MergeI->getOperand(1).getReg()))
+ if (DestTy != MergeSrcTy)
return false;
for (unsigned Idx = 0; Idx < NumDefs; ++Idx)
@@ -248,12 +317,77 @@ public:
return true;
}
+ static bool isMergeLikeOpcode(unsigned Opc) {
+ switch (Opc) {
+ case TargetOpcode::G_MERGE_VALUES:
+ case TargetOpcode::G_BUILD_VECTOR:
+ case TargetOpcode::G_CONCAT_VECTORS:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ bool tryCombineExtract(MachineInstr &MI,
+ SmallVectorImpl<MachineInstr *> &DeadInsts) {
+ assert(MI.getOpcode() == TargetOpcode::G_EXTRACT);
+
+ // Try to use the source registers from a G_MERGE_VALUES
+ //
+ // %2 = G_MERGE_VALUES %0, %1
+ // %3 = G_EXTRACT %2, N
+ // =>
+ //
+ // for N < %2.getSizeInBits() / 2
+ // %3 = G_EXTRACT %0, N
+ //
+ // for N >= %2.getSizeInBits() / 2
+ // %3 = G_EXTRACT %1, (N - %0.getSizeInBits()
+
+ unsigned Src = lookThroughCopyInstrs(MI.getOperand(1).getReg());
+ MachineInstr *MergeI = MRI.getVRegDef(Src);
+ if (!MergeI || !isMergeLikeOpcode(MergeI->getOpcode()))
+ return false;
+
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+ LLT SrcTy = MRI.getType(Src);
+
+ // TODO: Do we need to check if the resulting extract is supported?
+ unsigned ExtractDstSize = DstTy.getSizeInBits();
+ unsigned Offset = MI.getOperand(2).getImm();
+ unsigned NumMergeSrcs = MergeI->getNumOperands() - 1;
+ unsigned MergeSrcSize = SrcTy.getSizeInBits() / NumMergeSrcs;
+ unsigned MergeSrcIdx = Offset / MergeSrcSize;
+
+ // Compute the offset of the last bit the extract needs.
+ unsigned EndMergeSrcIdx = (Offset + ExtractDstSize - 1) / MergeSrcSize;
+
+ // Can't handle the case where the extract spans multiple inputs.
+ if (MergeSrcIdx != EndMergeSrcIdx)
+ return false;
+
+ // TODO: We could modify MI in place in most cases.
+ Builder.setInstr(MI);
+ Builder.buildExtract(
+ MI.getOperand(0).getReg(),
+ MergeI->getOperand(MergeSrcIdx + 1).getReg(),
+ Offset - MergeSrcIdx * MergeSrcSize);
+ markInstAndDefDead(MI, *MergeI, DeadInsts);
+ return true;
+ }
+
/// Try to combine away MI.
/// Returns true if it combined away the MI.
/// Adds instructions that are dead as a result of the combine
/// into DeadInsts, which can include MI.
bool tryCombineInstruction(MachineInstr &MI,
- SmallVectorImpl<MachineInstr *> &DeadInsts) {
+ SmallVectorImpl<MachineInstr *> &DeadInsts,
+ GISelObserverWrapper &WrapperObserver) {
+ // This might be a recursive call, and we might have DeadInsts already
+ // populated. To avoid bad things happening later with multiple vreg defs
+ // etc, process the dead instructions now if any.
+ if (!DeadInsts.empty())
+ deleteMarkedDeadInsts(DeadInsts, WrapperObserver);
switch (MI.getOpcode()) {
default:
return false;
@@ -265,16 +399,35 @@ public:
return tryCombineSExt(MI, DeadInsts);
case TargetOpcode::G_UNMERGE_VALUES:
return tryCombineMerges(MI, DeadInsts);
+ case TargetOpcode::G_EXTRACT:
+ return tryCombineExtract(MI, DeadInsts);
case TargetOpcode::G_TRUNC: {
bool Changed = false;
for (auto &Use : MRI.use_instructions(MI.getOperand(0).getReg()))
- Changed |= tryCombineInstruction(Use, DeadInsts);
+ Changed |= tryCombineInstruction(Use, DeadInsts, WrapperObserver);
return Changed;
}
}
}
private:
+
+ static unsigned getArtifactSrcReg(const MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ case TargetOpcode::COPY:
+ case TargetOpcode::G_TRUNC:
+ case TargetOpcode::G_ZEXT:
+ case TargetOpcode::G_ANYEXT:
+ case TargetOpcode::G_SEXT:
+ case TargetOpcode::G_UNMERGE_VALUES:
+ return MI.getOperand(MI.getNumOperands() - 1).getReg();
+ case TargetOpcode::G_EXTRACT:
+ return MI.getOperand(1).getReg();
+ default:
+ llvm_unreachable("Not a legalization artifact happen");
+ }
+ }
+
/// Mark MI as dead. If a def of one of MI's operands, DefMI, would also be
/// dead due to MI being killed, then mark DefMI as dead too.
/// Some of the combines (extends(trunc)), try to walk through redundant
@@ -295,13 +448,15 @@ private:
// and as a result, %3, %2, %1 are dead.
MachineInstr *PrevMI = &MI;
while (PrevMI != &DefMI) {
- unsigned PrevRegSrc =
- PrevMI->getOperand(PrevMI->getNumOperands() - 1).getReg();
+ unsigned PrevRegSrc = getArtifactSrcReg(*PrevMI);
+
MachineInstr *TmpDef = MRI.getVRegDef(PrevRegSrc);
if (MRI.hasOneUse(PrevRegSrc)) {
if (TmpDef != &DefMI) {
- assert(TmpDef->getOpcode() == TargetOpcode::COPY &&
- "Expecting copy here");
+ assert((TmpDef->getOpcode() == TargetOpcode::COPY ||
+ isArtifactCast(TmpDef->getOpcode())) &&
+ "Expecting copy or artifact cast here");
+
DeadInsts.push_back(TmpDef);
}
} else
@@ -312,6 +467,22 @@ private:
DeadInsts.push_back(&DefMI);
}
+ /// Erase the dead instructions in the list and call the observer hooks.
+ /// Normally the Legalizer will deal with erasing instructions that have been
+ /// marked dead. However, for the trunc(ext(x)) cases we can end up trying to
+ /// process instructions which have been marked dead, but otherwise break the
+ /// MIR by introducing multiple vreg defs. For those cases, allow the combines
+ /// to explicitly delete the instructions before we run into trouble.
+ void deleteMarkedDeadInsts(SmallVectorImpl<MachineInstr *> &DeadInsts,
+ GISelObserverWrapper &WrapperObserver) {
+ for (auto *DeadMI : DeadInsts) {
+ LLVM_DEBUG(dbgs() << *DeadMI << "Is dead, eagerly deleting\n");
+ WrapperObserver.erasingInstr(*DeadMI);
+ DeadMI->eraseFromParentAndMarkDBGValuesForRemoval();
+ }
+ DeadInsts.clear();
+ }
+
/// Checks if the target legalizer info has specified anything about the
/// instruction, or if unsupported.
bool isInstUnsupported(const LegalityQuery &Query) const {
@@ -320,10 +491,23 @@ private:
return Step.Action == Unsupported || Step.Action == NotFound;
}
+ bool isInstLegal(const LegalityQuery &Query) const {
+ return LI.getAction(Query).Action == LegalizeActions::Legal;
+ }
+
+ bool isConstantUnsupported(LLT Ty) const {
+ if (!Ty.isVector())
+ return isInstUnsupported({TargetOpcode::G_CONSTANT, {Ty}});
+
+ LLT EltTy = Ty.getElementType();
+ return isInstUnsupported({TargetOpcode::G_CONSTANT, {EltTy}}) ||
+ isInstUnsupported({TargetOpcode::G_BUILD_VECTOR, {Ty, EltTy}});
+ }
+
/// Looks through copy instructions and returns the actual
/// source register.
- unsigned lookThroughCopyInstrs(unsigned Reg) {
- unsigned TmpReg;
+ unsigned lookThroughCopyInstrs(Register Reg) {
+ Register TmpReg;
while (mi_match(Reg, MRI, m_Copy(m_Reg(TmpReg)))) {
if (MRI.getType(TmpReg).isValid())
Reg = TmpReg;
diff --git a/include/llvm/CodeGen/GlobalISel/Legalizer.h b/include/llvm/CodeGen/GlobalISel/Legalizer.h
index 8284ab6dac65..13cf3f7e694d 100644
--- a/include/llvm/CodeGen/GlobalISel/Legalizer.h
+++ b/include/llvm/CodeGen/GlobalISel/Legalizer.h
@@ -1,9 +1,8 @@
-//== llvm/CodeGen/GlobalISel/LegalizePass.h ------------- -*- C++ -*-==//
+//== llvm/CodeGen/GlobalISel/Legalizer.h ---------------- -*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -55,6 +54,11 @@ public:
MachineFunctionProperties::Property::Legalized);
}
+ MachineFunctionProperties getClearedProperties() const override {
+ return MachineFunctionProperties()
+ .set(MachineFunctionProperties::Property::NoPHIs);
+ }
+
bool combineExtracts(MachineInstr &MI, MachineRegisterInfo &MRI,
const TargetInstrInfo &TII);
diff --git a/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
index 9b4ecf9284e3..a0f21e8b19d7 100644
--- a/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
+++ b/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
@@ -1,9 +1,8 @@
//== llvm/CodeGen/GlobalISel/LegalizerHelper.h ---------------- -*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -87,7 +86,7 @@ public:
/// Legalize a vector instruction by increasing the number of vector elements
/// involved and ignoring the added elements later.
LegalizeResult moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
- LLT WideTy);
+ LLT MoreTy);
/// Expose MIRBuilder so clients can set their own RecordInsertInstruction
/// functions
@@ -105,19 +104,126 @@ private:
unsigned ExtOpcode);
/// Legalize a single operand \p OpIdx of the machine instruction \p MI as a
+ /// Use by truncating the operand's type to \p NarrowTy using G_TRUNC, and
+ /// replacing the vreg of the operand in place.
+ void narrowScalarSrc(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx);
+
+ /// Legalize a single operand \p OpIdx of the machine instruction \p MI as a
/// Def by extending the operand's type to \p WideTy and truncating it back
/// with the \p TruncOpcode, and replacing the vreg of the operand in place.
void widenScalarDst(MachineInstr &MI, LLT WideTy, unsigned OpIdx = 0,
unsigned TruncOpcode = TargetOpcode::G_TRUNC);
+ // Legalize a single operand \p OpIdx of the machine instruction \p MI as a
+ // Def by truncating the operand's type to \p NarrowTy, replacing in place and
+ // extending back with \p ExtOpcode.
+ void narrowScalarDst(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx,
+ unsigned ExtOpcode);
+ /// Legalize a single operand \p OpIdx of the machine instruction \p MI as a
+ /// Def by performing it with additional vector elements and extracting the
+ /// result elements, and replacing the vreg of the operand in place.
+ void moreElementsVectorDst(MachineInstr &MI, LLT MoreTy, unsigned OpIdx);
+
+ /// Legalize a single operand \p OpIdx of the machine instruction \p MI as a
+ /// Use by producing a vector with undefined high elements, extracting the
+ /// original vector type, and replacing the vreg of the operand in place.
+ void moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy, unsigned OpIdx);
+
+ LegalizeResult
+ widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx, LLT WideTy);
+ LegalizeResult
+ widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx, LLT WideTy);
+ LegalizeResult
+ widenScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT WideTy);
+ LegalizeResult
+ widenScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT WideTy);
+
/// Helper function to split a wide generic register into bitwise blocks with
/// the given Type (which implies the number of blocks needed). The generic
/// registers created are appended to Ops, starting at bit 0 of Reg.
- void extractParts(unsigned Reg, LLT Ty, int NumParts,
- SmallVectorImpl<unsigned> &VRegs);
+ void extractParts(Register Reg, LLT Ty, int NumParts,
+ SmallVectorImpl<Register> &VRegs);
+
+ /// Version which handles irregular splits.
+ bool extractParts(Register Reg, LLT RegTy, LLT MainTy,
+ LLT &LeftoverTy,
+ SmallVectorImpl<Register> &VRegs,
+ SmallVectorImpl<Register> &LeftoverVRegs);
+
+ /// Helper function to build a wide generic register \p DstReg of type \p
+ /// RegTy from smaller parts. This will produce a G_MERGE_VALUES,
+ /// G_BUILD_VECTOR, G_CONCAT_VECTORS, or sequence of G_INSERT as appropriate
+ /// for the types.
+ ///
+ /// \p PartRegs must be registers of type \p PartTy.
+ ///
+ /// If \p ResultTy does not evenly break into \p PartTy sized pieces, the
+ /// remainder must be specified with \p LeftoverRegs of type \p LeftoverTy.
+ void insertParts(Register DstReg, LLT ResultTy,
+ LLT PartTy, ArrayRef<Register> PartRegs,
+ LLT LeftoverTy = LLT(), ArrayRef<Register> LeftoverRegs = {});
+
+ /// Perform generic multiplication of values held in multiple registers.
+ /// Generated instructions use only types NarrowTy and i1.
+ /// Destination can be same or two times size of the source.
+ void multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
+ ArrayRef<Register> Src1Regs,
+ ArrayRef<Register> Src2Regs, LLT NarrowTy);
+
+public:
+ LegalizeResult fewerElementsVectorImplicitDef(MachineInstr &MI,
+ unsigned TypeIdx, LLT NarrowTy);
+
+ /// Legalize a simple vector instruction where all operands are the same type
+ /// by splitting into multiple components.
+ LegalizeResult fewerElementsVectorBasic(MachineInstr &MI, unsigned TypeIdx,
+ LLT NarrowTy);
+
+ /// Legalize a instruction with a vector type where each operand may have a
+ /// different element type. All type indexes must have the same number of
+ /// elements.
+ LegalizeResult fewerElementsVectorMultiEltType(MachineInstr &MI,
+ unsigned TypeIdx, LLT NarrowTy);
+
+ LegalizeResult fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx,
+ LLT NarrowTy);
+
+ LegalizeResult
+ fewerElementsVectorCmp(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy);
+
+ LegalizeResult
+ fewerElementsVectorSelect(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy);
+
+ LegalizeResult fewerElementsVectorPhi(MachineInstr &MI,
+ unsigned TypeIdx, LLT NarrowTy);
+
+ LegalizeResult moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
+ LLT MoreTy);
+
+ LegalizeResult
+ reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy);
+
+ LegalizeResult narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt,
+ LLT HalfTy, LLT ShiftAmtTy);
+
+ LegalizeResult narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
+ LegalizeResult narrowScalarMul(MachineInstr &MI, LLT Ty);
+ LegalizeResult narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
+ LegalizeResult narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
+
+ LegalizeResult narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
+ LegalizeResult narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
LegalizeResult lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
+ LegalizeResult lowerU64ToF32BitOps(MachineInstr &MI);
+ LegalizeResult lowerUITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
+ LegalizeResult lowerSITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
+ LegalizeResult lowerMinMax(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
+ LegalizeResult lowerFCopySign(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
+ LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI);
+
+private:
MachineRegisterInfo &MRI;
const LegalizerInfo &LI;
/// To keep track of changes made by the LegalizerHelper.
diff --git a/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
index 13776dd3e87d..513c98f2d23f 100644
--- a/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
+++ b/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/GlobalISel/LegalizerInfo.h ------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -93,6 +92,7 @@ enum LegalizeAction : std::uint8_t {
UseLegacyRules,
};
} // end namespace LegalizeActions
+raw_ostream &operator<<(raw_ostream &OS, LegalizeActions::LegalizeAction Action);
using LegalizeActions::LegalizeAction;
@@ -123,6 +123,7 @@ struct LegalityQuery {
struct MemDesc {
uint64_t SizeInBits;
+ uint64_t AlignInBits;
AtomicOrdering Ordering;
};
@@ -165,13 +166,23 @@ using LegalizeMutation =
std::function<std::pair<unsigned, LLT>(const LegalityQuery &)>;
namespace LegalityPredicates {
-struct TypePairAndMemSize {
+struct TypePairAndMemDesc {
LLT Type0;
LLT Type1;
uint64_t MemSize;
+ uint64_t Align;
+
+ bool operator==(const TypePairAndMemDesc &Other) const {
+ return Type0 == Other.Type0 && Type1 == Other.Type1 &&
+ Align == Other.Align &&
+ MemSize == Other.MemSize;
+ }
- bool operator==(const TypePairAndMemSize &Other) const {
+ /// \returns true if this memory access is legal with for the acecss described
+ /// by \p Other (The alignment is sufficient for the size and result type).
+ bool isCompatible(const TypePairAndMemDesc &Other) const {
return Type0 == Other.Type0 && Type1 == Other.Type1 &&
+ Align >= Other.Align &&
MemSize == Other.MemSize;
}
};
@@ -200,20 +211,45 @@ typePairInSet(unsigned TypeIdx0, unsigned TypeIdx1,
std::initializer_list<std::pair<LLT, LLT>> TypesInit);
/// True iff the given types for the given pair of type indexes is one of the
/// specified type pairs.
-LegalityPredicate typePairAndMemSizeInSet(
+LegalityPredicate typePairAndMemDescInSet(
unsigned TypeIdx0, unsigned TypeIdx1, unsigned MMOIdx,
- std::initializer_list<TypePairAndMemSize> TypesAndMemSizeInit);
+ std::initializer_list<TypePairAndMemDesc> TypesAndMemDescInit);
/// True iff the specified type index is a scalar.
LegalityPredicate isScalar(unsigned TypeIdx);
+/// True iff the specified type index is a vector.
+LegalityPredicate isVector(unsigned TypeIdx);
+/// True iff the specified type index is a pointer (with any address space).
+LegalityPredicate isPointer(unsigned TypeIdx);
+/// True iff the specified type index is a pointer with the specified address
+/// space.
+LegalityPredicate isPointer(unsigned TypeIdx, unsigned AddrSpace);
+
/// True iff the specified type index is a scalar that's narrower than the given
/// size.
LegalityPredicate narrowerThan(unsigned TypeIdx, unsigned Size);
+
/// True iff the specified type index is a scalar that's wider than the given
/// size.
LegalityPredicate widerThan(unsigned TypeIdx, unsigned Size);
+
+/// True iff the specified type index is a scalar or vector with an element type
+/// that's narrower than the given size.
+LegalityPredicate scalarOrEltNarrowerThan(unsigned TypeIdx, unsigned Size);
+
+/// True iff the specified type index is a scalar or a vector with an element
+/// type that's wider than the given size.
+LegalityPredicate scalarOrEltWiderThan(unsigned TypeIdx, unsigned Size);
+
/// True iff the specified type index is a scalar whose size is not a power of
/// 2.
LegalityPredicate sizeNotPow2(unsigned TypeIdx);
+
+/// True iff the specified type index is a scalar or vector whose element size
+/// is not a power of 2.
+LegalityPredicate scalarOrEltSizeNotPow2(unsigned TypeIdx);
+
+/// True iff the specified type indices are both the same bit size.
+LegalityPredicate sameSize(unsigned TypeIdx0, unsigned TypeIdx1);
/// True iff the specified MMO index has a size that is not a power of 2
LegalityPredicate memSizeInBytesNotPow2(unsigned MMOIdx);
/// True iff the specified type index is a vector whose element count is not a
@@ -228,13 +264,25 @@ LegalityPredicate atomicOrderingAtLeastOrStrongerThan(unsigned MMOIdx,
namespace LegalizeMutations {
/// Select this specific type for the given type index.
LegalizeMutation changeTo(unsigned TypeIdx, LLT Ty);
+
/// Keep the same type as the given type index.
LegalizeMutation changeTo(unsigned TypeIdx, unsigned FromTypeIdx);
-/// Widen the type for the given type index to the next power of 2.
-LegalizeMutation widenScalarToNextPow2(unsigned TypeIdx, unsigned Min = 0);
+
+/// Keep the same scalar or element type as the given type index.
+LegalizeMutation changeElementTo(unsigned TypeIdx, unsigned FromTypeIdx);
+
+/// Keep the same scalar or element type as the given type.
+LegalizeMutation changeElementTo(unsigned TypeIdx, LLT Ty);
+
+/// Widen the scalar type or vector element type for the given type index to the
+/// next power of 2.
+LegalizeMutation widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned Min = 0);
+
/// Add more elements to the type for the given type index to the next power of
/// 2.
LegalizeMutation moreElementsToNextPow2(unsigned TypeIdx, unsigned Min = 0);
+/// Break up the vector type for the given type index into the element type.
+LegalizeMutation scalarize(unsigned TypeIdx);
} // end namespace LegalizeMutations
/// A single rule in a legalizer info ruleset.
@@ -419,13 +467,13 @@ public:
return actionFor(LegalizeAction::Legal, Types);
}
/// The instruction is legal when type indexes 0 and 1 along with the memory
- /// size is any type and size tuple in the given list.
- LegalizeRuleSet &legalForTypesWithMemSize(
- std::initializer_list<LegalityPredicates::TypePairAndMemSize>
- TypesAndMemSize) {
+ /// size and minimum alignment is any type and size tuple in the given list.
+ LegalizeRuleSet &legalForTypesWithMemDesc(
+ std::initializer_list<LegalityPredicates::TypePairAndMemDesc>
+ TypesAndMemDesc) {
return actionIf(LegalizeAction::Legal,
- LegalityPredicates::typePairAndMemSizeInSet(
- typeIdx(0), typeIdx(1), /*MMOIdx*/ 0, TypesAndMemSize));
+ LegalityPredicates::typePairAndMemDescInSet(
+ typeIdx(0), typeIdx(1), /*MMOIdx*/ 0, TypesAndMemDesc));
}
/// The instruction is legal when type indexes 0 and 1 are both in the given
/// list. That is, the type pair is in the cartesian product of the list.
@@ -438,6 +486,20 @@ public:
std::initializer_list<LLT> Types1) {
return actionForCartesianProduct(LegalizeAction::Legal, Types0, Types1);
}
+ /// The instruction is legal when type indexes 0, 1, and 2 are both their
+ /// respective lists.
+ LegalizeRuleSet &legalForCartesianProduct(std::initializer_list<LLT> Types0,
+ std::initializer_list<LLT> Types1,
+ std::initializer_list<LLT> Types2) {
+ return actionForCartesianProduct(LegalizeAction::Legal, Types0, Types1,
+ Types2);
+ }
+
+ LegalizeRuleSet &alwaysLegal() {
+ using namespace LegalizeMutations;
+ markAllTypeIdxsAsCovered();
+ return actionIf(LegalizeAction::Legal, always);
+ }
/// The instruction is lowered.
LegalizeRuleSet &lower() {
@@ -588,6 +650,13 @@ public:
LegalizeRuleSet &customFor(std::initializer_list<LLT> Types) {
return actionFor(LegalizeAction::Custom, Types);
}
+
+ /// The instruction is custom when type indexes 0 and 1 is any type pair in the
+ /// given list.
+ LegalizeRuleSet &customFor(std::initializer_list<std::pair<LLT, LLT>> Types) {
+ return actionFor(LegalizeAction::Custom, Types);
+ }
+
LegalizeRuleSet &customForCartesianProduct(std::initializer_list<LLT> Types) {
return actionForCartesianProduct(LegalizeAction::Custom, Types);
}
@@ -597,13 +666,29 @@ public:
return actionForCartesianProduct(LegalizeAction::Custom, Types0, Types1);
}
+ /// Unconditionally custom lower.
+ LegalizeRuleSet &custom() {
+ return customIf(always);
+ }
+
/// Widen the scalar to the next power of two that is at least MinSize.
/// No effect if the type is not a scalar or is a power of two.
LegalizeRuleSet &widenScalarToNextPow2(unsigned TypeIdx,
unsigned MinSize = 0) {
using namespace LegalityPredicates;
- return actionIf(LegalizeAction::WidenScalar, sizeNotPow2(typeIdx(TypeIdx)),
- LegalizeMutations::widenScalarToNextPow2(TypeIdx, MinSize));
+ return actionIf(
+ LegalizeAction::WidenScalar, sizeNotPow2(typeIdx(TypeIdx)),
+ LegalizeMutations::widenScalarOrEltToNextPow2(TypeIdx, MinSize));
+ }
+
+ /// Widen the scalar or vector element type to the next power of two that is
+ /// at least MinSize. No effect if the scalar size is a power of two.
+ LegalizeRuleSet &widenScalarOrEltToNextPow2(unsigned TypeIdx,
+ unsigned MinSize = 0) {
+ using namespace LegalityPredicates;
+ return actionIf(
+ LegalizeAction::WidenScalar, scalarOrEltSizeNotPow2(typeIdx(TypeIdx)),
+ LegalizeMutations::widenScalarOrEltToNextPow2(TypeIdx, MinSize));
}
LegalizeRuleSet &narrowScalar(unsigned TypeIdx, LegalizeMutation Mutation) {
@@ -612,6 +697,32 @@ public:
Mutation);
}
+ LegalizeRuleSet &scalarize(unsigned TypeIdx) {
+ using namespace LegalityPredicates;
+ return actionIf(LegalizeAction::FewerElements, isVector(typeIdx(TypeIdx)),
+ LegalizeMutations::scalarize(TypeIdx));
+ }
+
+ /// Ensure the scalar or element is at least as wide as Ty.
+ LegalizeRuleSet &minScalarOrElt(unsigned TypeIdx, const LLT &Ty) {
+ using namespace LegalityPredicates;
+ using namespace LegalizeMutations;
+ return actionIf(LegalizeAction::WidenScalar,
+ scalarOrEltNarrowerThan(TypeIdx, Ty.getScalarSizeInBits()),
+ changeElementTo(typeIdx(TypeIdx), Ty));
+ }
+
+ /// Ensure the scalar or element is at least as wide as Ty.
+ LegalizeRuleSet &minScalarOrEltIf(LegalityPredicate Predicate,
+ unsigned TypeIdx, const LLT &Ty) {
+ using namespace LegalityPredicates;
+ using namespace LegalizeMutations;
+ return actionIf(LegalizeAction::WidenScalar,
+ all(Predicate, scalarOrEltNarrowerThan(
+ TypeIdx, Ty.getScalarSizeInBits())),
+ changeElementTo(typeIdx(TypeIdx), Ty));
+ }
+
/// Ensure the scalar is at least as wide as Ty.
LegalizeRuleSet &minScalar(unsigned TypeIdx, const LLT &Ty) {
using namespace LegalityPredicates;
@@ -622,6 +733,15 @@ public:
}
/// Ensure the scalar is at most as wide as Ty.
+ LegalizeRuleSet &maxScalarOrElt(unsigned TypeIdx, const LLT &Ty) {
+ using namespace LegalityPredicates;
+ using namespace LegalizeMutations;
+ return actionIf(LegalizeAction::NarrowScalar,
+ scalarOrEltWiderThan(TypeIdx, Ty.getScalarSizeInBits()),
+ changeElementTo(typeIdx(TypeIdx), Ty));
+ }
+
+ /// Ensure the scalar is at most as wide as Ty.
LegalizeRuleSet &maxScalar(unsigned TypeIdx, const LLT &Ty) {
using namespace LegalityPredicates;
using namespace LegalizeMutations;
@@ -637,12 +757,12 @@ public:
const LLT &Ty) {
using namespace LegalityPredicates;
using namespace LegalizeMutations;
- return actionIf(LegalizeAction::NarrowScalar,
- [=](const LegalityQuery &Query) {
- return widerThan(TypeIdx, Ty.getSizeInBits()) &&
- Predicate(Query);
- },
- changeTo(typeIdx(TypeIdx), Ty));
+ return actionIf(
+ LegalizeAction::NarrowScalar,
+ [=](const LegalityQuery &Query) {
+ return widerThan(TypeIdx, Ty.getSizeInBits()) && Predicate(Query);
+ },
+ changeElementTo(typeIdx(TypeIdx), Ty));
}
/// Limit the range of scalar sizes to MinTy and MaxTy.
@@ -652,6 +772,12 @@ public:
return minScalar(TypeIdx, MinTy).maxScalar(TypeIdx, MaxTy);
}
+ /// Limit the range of scalar sizes to MinTy and MaxTy.
+ LegalizeRuleSet &clampScalarOrElt(unsigned TypeIdx, const LLT &MinTy,
+ const LLT &MaxTy) {
+ return minScalarOrElt(TypeIdx, MinTy).maxScalarOrElt(TypeIdx, MaxTy);
+ }
+
/// Widen the scalar to match the size of another.
LegalizeRuleSet &minScalarSameAs(unsigned TypeIdx, unsigned LargeTypeIdx) {
typeIdx(TypeIdx);
@@ -661,8 +787,25 @@ public:
Query.Types[TypeIdx].getSizeInBits();
},
[=](const LegalityQuery &Query) {
+ LLT T = Query.Types[LargeTypeIdx];
return std::make_pair(TypeIdx,
- Query.Types[LargeTypeIdx].getElementType());
+ T.isVector() ? T.getElementType() : T);
+ });
+ }
+
+ /// Conditionally widen the scalar or elt to match the size of another.
+ LegalizeRuleSet &minScalarEltSameAsIf(LegalityPredicate Predicate,
+ unsigned TypeIdx, unsigned LargeTypeIdx) {
+ typeIdx(TypeIdx);
+ return widenScalarIf(
+ [=](const LegalityQuery &Query) {
+ return Query.Types[LargeTypeIdx].getScalarSizeInBits() >
+ Query.Types[TypeIdx].getScalarSizeInBits() &&
+ Predicate(Query);
+ },
+ [=](const LegalityQuery &Query) {
+ LLT T = Query.Types[LargeTypeIdx];
+ return std::make_pair(TypeIdx, T);
});
}
@@ -691,7 +834,7 @@ public:
[=](const LegalityQuery &Query) {
LLT VecTy = Query.Types[TypeIdx];
return std::make_pair(
- TypeIdx, LLT::vector(MinElements, VecTy.getScalarSizeInBits()));
+ TypeIdx, LLT::vector(MinElements, VecTy.getElementType()));
});
}
/// Limit the number of elements in EltTy vectors to at most MaxElements.
@@ -708,10 +851,8 @@ public:
},
[=](const LegalityQuery &Query) {
LLT VecTy = Query.Types[TypeIdx];
- if (MaxElements == 1)
- return std::make_pair(TypeIdx, VecTy.getElementType());
- return std::make_pair(
- TypeIdx, LLT::vector(MaxElements, VecTy.getScalarSizeInBits()));
+ LLT NewTy = LLT::scalarOrVector(MaxElements, VecTy.getElementType());
+ return std::make_pair(TypeIdx, NewTy);
});
}
/// Limit the number of elements for the given vectors to at least MinTy's
@@ -962,12 +1103,22 @@ public:
LegalizeActionStep getAction(const MachineInstr &MI,
const MachineRegisterInfo &MRI) const;
+ bool isLegal(const LegalityQuery &Query) const {
+ return getAction(Query).Action == LegalizeAction::Legal;
+ }
bool isLegal(const MachineInstr &MI, const MachineRegisterInfo &MRI) const;
+ bool isLegalOrCustom(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI) const;
virtual bool legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &MIRBuilder,
GISelChangeObserver &Observer) const;
+ /// Return true if MI is either legal or has been legalized and false
+ /// if not legal.
+ virtual bool legalizeIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIRBuilder) const;
+
private:
/// Determine what action should be taken to legalize the given generic
/// instruction opcode, type-index and type. Requires computeTables to have
diff --git a/include/llvm/CodeGen/GlobalISel/Localizer.h b/include/llvm/CodeGen/GlobalISel/Localizer.h
index 1e2d4763e5e1..06de5800b8b7 100644
--- a/include/llvm/CodeGen/GlobalISel/Localizer.h
+++ b/include/llvm/CodeGen/GlobalISel/Localizer.h
@@ -1,9 +1,8 @@
//== llvm/CodeGen/GlobalISel/Localizer.h - Localizer -------------*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -22,12 +21,14 @@
#ifndef LLVM_CODEGEN_GLOBALISEL_LOCALIZER_H
#define LLVM_CODEGEN_GLOBALISEL_LOCALIZER_H
+#include "llvm/ADT/SetVector.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
namespace llvm {
// Forward declarations.
class MachineRegisterInfo;
+class TargetTransformInfo;
/// This pass implements the localization mechanism described at the
/// top of this file. One specificity of the implementation is that
@@ -44,9 +45,11 @@ private:
/// MRI contains all the register class/bank information that this
/// pass uses and updates.
MachineRegisterInfo *MRI;
+ /// TTI used for getting remat costs for instructions.
+ TargetTransformInfo *TTI;
/// Check whether or not \p MI needs to be moved close to its uses.
- static bool shouldLocalize(const MachineInstr &MI);
+ bool shouldLocalize(const MachineInstr &MI);
/// Check if \p MOUse is used in the same basic block as \p Def.
/// If the use is in the same block, we say it is local.
@@ -58,6 +61,15 @@ private:
/// Initialize the field members using \p MF.
void init(MachineFunction &MF);
+ typedef SmallSetVector<MachineInstr *, 32> LocalizedSetVecT;
+
+ /// Do inter-block localization from the entry block.
+ bool localizeInterBlock(MachineFunction &MF,
+ LocalizedSetVecT &LocalizedInstrs);
+
+ /// Do intra-block localization of already localized instructions.
+ bool localizeIntraBlock(LocalizedSetVecT &LocalizedInstrs);
+
public:
Localizer();
diff --git a/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h b/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h
index f77f9a8df7ee..13eddd9539fa 100644
--- a/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h
+++ b/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h
@@ -1,9 +1,8 @@
-//== ----- llvm/CodeGen/GlobalISel/MIPatternMatch.h --------------------- == //
+//==------ llvm/CodeGen/GlobalISel/MIPatternMatch.h -------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -31,8 +30,7 @@ template <typename SubPatternT> struct OneUse_match {
SubPatternT SubPat;
OneUse_match(const SubPatternT &SP) : SubPat(SP) {}
- template <typename OpTy>
- bool match(const MachineRegisterInfo &MRI, unsigned Reg) {
+ bool match(MachineRegisterInfo &MRI, unsigned Reg) {
return MRI.hasOneUse(Reg) && SubPat.match(MRI, Reg);
}
};
@@ -162,7 +160,7 @@ template <typename Class> struct bind_ty {
}
};
-inline bind_ty<unsigned> m_Reg(unsigned &R) { return R; }
+inline bind_ty<Register> m_Reg(Register &R) { return R; }
inline bind_ty<MachineInstr *> m_MInstr(MachineInstr *&MI) { return MI; }
inline bind_ty<LLT> m_Type(LLT &Ty) { return Ty; }
diff --git a/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
index 37de8f030410..10d712176b1b 100644
--- a/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
+++ b/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
@@ -1,9 +1,8 @@
//===-- llvm/CodeGen/GlobalISel/MachineIRBuilder.h - MIBuilder --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -60,13 +59,15 @@ struct MachineIRBuilderState {
class DstOp {
union {
LLT LLTTy;
- unsigned Reg;
+ Register Reg;
const TargetRegisterClass *RC;
};
public:
enum class DstType { Ty_LLT, Ty_Reg, Ty_RC };
DstOp(unsigned R) : Reg(R), Ty(DstType::Ty_Reg) {}
+ DstOp(Register R) : Reg(R), Ty(DstType::Ty_Reg) {}
+ DstOp(const MachineOperand &Op) : Reg(Op.getReg()), Ty(DstType::Ty_Reg) {}
DstOp(const LLT &T) : LLTTy(T), Ty(DstType::Ty_LLT) {}
DstOp(const TargetRegisterClass *TRC) : RC(TRC), Ty(DstType::Ty_RC) {}
@@ -96,7 +97,7 @@ public:
llvm_unreachable("Unrecognised DstOp::DstType enum");
}
- unsigned getReg() const {
+ Register getReg() const {
assert(Ty == DstType::Ty_Reg && "Not a register");
return Reg;
}
@@ -119,13 +120,14 @@ private:
class SrcOp {
union {
MachineInstrBuilder SrcMIB;
- unsigned Reg;
+ Register Reg;
CmpInst::Predicate Pred;
};
public:
enum class SrcType { Ty_Reg, Ty_MIB, Ty_Predicate };
- SrcOp(unsigned R) : Reg(R), Ty(SrcType::Ty_Reg) {}
+ SrcOp(Register R) : Reg(R), Ty(SrcType::Ty_Reg) {}
+ SrcOp(const MachineOperand &Op) : Reg(Op.getReg()), Ty(SrcType::Ty_Reg) {}
SrcOp(const MachineInstrBuilder &MIB) : SrcMIB(MIB), Ty(SrcType::Ty_MIB) {}
SrcOp(const CmpInst::Predicate P) : Pred(P), Ty(SrcType::Ty_Predicate) {}
@@ -155,7 +157,7 @@ public:
llvm_unreachable("Unrecognised SrcOp::SrcType enum");
}
- unsigned getReg() const {
+ Register getReg() const {
switch (Ty) {
case SrcType::Ty_Predicate:
llvm_unreachable("Not a register operand");
@@ -202,6 +204,7 @@ protected:
void validateTruncExt(const LLT &Dst, const LLT &Src, bool IsExtend);
void validateBinaryOp(const LLT &Res, const LLT &Op0, const LLT &Op1);
+ void validateShiftOp(const LLT &Res, const LLT &Op0, const LLT &Op1);
void validateSelectOp(const LLT &ResTy, const LLT &TstTy, const LLT &Op0Ty,
const LLT &Op1Ty);
@@ -230,6 +233,15 @@ public:
return *State.MF;
}
+ const MachineFunction &getMF() const {
+ assert(State.MF && "MachineFunction is not set");
+ return *State.MF;
+ }
+
+ const DataLayout &getDataLayout() const {
+ return getMF().getFunction().getParent()->getDataLayout();
+ }
+
/// Getter for DebugLoc
const DebugLoc &getDL() { return State.DL; }
@@ -310,13 +322,13 @@ public:
/// Build and insert a DBG_VALUE instruction expressing the fact that the
/// associated \p Variable lives in \p Reg (suitably modified by \p Expr).
- MachineInstrBuilder buildDirectDbgValue(unsigned Reg, const MDNode *Variable,
+ MachineInstrBuilder buildDirectDbgValue(Register Reg, const MDNode *Variable,
const MDNode *Expr);
/// Build and insert a DBG_VALUE instruction expressing the fact that the
/// associated \p Variable lives in memory at \p Reg (suitably modified by \p
/// Expr).
- MachineInstrBuilder buildIndirectDbgValue(unsigned Reg,
+ MachineInstrBuilder buildIndirectDbgValue(Register Reg,
const MDNode *Variable,
const MDNode *Expr);
@@ -345,7 +357,7 @@ public:
/// \pre \p Res must be a generic virtual register with pointer type.
///
/// \return a MachineInstrBuilder for the newly created instruction.
- MachineInstrBuilder buildFrameIndex(unsigned Res, int Idx);
+ MachineInstrBuilder buildFrameIndex(const DstOp &Res, int Idx);
/// Build and insert \p Res = G_GLOBAL_VALUE \p GV
///
@@ -357,8 +369,7 @@ public:
/// in the same address space as \p GV.
///
/// \return a MachineInstrBuilder for the newly created instruction.
- MachineInstrBuilder buildGlobalValue(unsigned Res, const GlobalValue *GV);
-
+ MachineInstrBuilder buildGlobalValue(const DstOp &Res, const GlobalValue *GV);
/// Build and insert \p Res = G_GEP \p Op0, \p Op1
///
@@ -371,8 +382,8 @@ public:
/// \pre \p Op1 must be a generic virtual register with scalar type.
///
/// \return a MachineInstrBuilder for the newly created instruction.
- MachineInstrBuilder buildGEP(unsigned Res, unsigned Op0,
- unsigned Op1);
+ MachineInstrBuilder buildGEP(const DstOp &Res, const SrcOp &Op0,
+ const SrcOp &Op1);
/// Materialize and insert \p Res = G_GEP \p Op0, (G_CONSTANT \p Value)
///
@@ -390,7 +401,7 @@ public:
/// type as \p Op0 or \p Op0 itself.
///
/// \return a MachineInstrBuilder for the newly created instruction.
- Optional<MachineInstrBuilder> materializeGEP(unsigned &Res, unsigned Op0,
+ Optional<MachineInstrBuilder> materializeGEP(Register &Res, Register Op0,
const LLT &ValueTy,
uint64_t Value);
@@ -407,9 +418,24 @@ public:
/// be cleared in \p Op0.
///
/// \return a MachineInstrBuilder for the newly created instruction.
- MachineInstrBuilder buildPtrMask(unsigned Res, unsigned Op0,
+ MachineInstrBuilder buildPtrMask(const DstOp &Res, const SrcOp &Op0,
uint32_t NumBits);
+ /// Build and insert \p Res, \p CarryOut = G_UADDO \p Op0, \p Op1
+ ///
+ /// G_UADDO sets \p Res to \p Op0 + \p Op1 (truncated to the bit width) and
+ /// sets \p CarryOut to 1 if the result overflowed in unsigned arithmetic.
+ ///
+ /// \pre setBasicBlock or setMI must have been called.
+ /// \pre \p Res, \p Op0 and \p Op1 must be generic virtual registers with the
+ /// same scalar type.
+ ////\pre \p CarryOut must be generic virtual register with scalar type
+ ///(typically s1)
+ ///
+ /// \return The newly created instruction.
+ MachineInstrBuilder buildUAddo(const DstOp &Res, const DstOp &CarryOut,
+ const SrcOp &Op0, const SrcOp &Op1);
+
/// Build and insert \p Res, \p CarryOut = G_UADDE \p Op0,
/// \p Op1, \p CarryIn
///
@@ -458,6 +484,25 @@ public:
/// \return The newly created instruction.
MachineInstrBuilder buildSExt(const DstOp &Res, const SrcOp &Op);
+ /// Build and insert a G_PTRTOINT instruction.
+ MachineInstrBuilder buildPtrToInt(const DstOp &Dst, const SrcOp &Src) {
+ return buildInstr(TargetOpcode::G_PTRTOINT, {Dst}, {Src});
+ }
+
+ /// Build and insert \p Dst = G_BITCAST \p Src
+ MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src) {
+ return buildInstr(TargetOpcode::G_BITCAST, {Dst}, {Src});
+ }
+
+ /// \return The opcode of the extension the target wants to use for boolean
+ /// values.
+ unsigned getBoolExtOp(bool IsVec, bool IsFP) const;
+
+ // Build and insert \p Res = G_ANYEXT \p Op, \p Res = G_SEXT \p Op, or \p Res
+ // = G_ZEXT \p Op depending on how the target wants to extend boolean values.
+ MachineInstrBuilder buildBoolExt(const DstOp &Res, const SrcOp &Op,
+ bool IsFP);
+
/// Build and insert \p Res = G_ZEXT \p Op
///
/// G_ZEXT produces a register of the specified width, with bits 0 to
@@ -538,7 +583,7 @@ public:
/// depend on bit 0 (for now).
///
/// \return The newly created instruction.
- MachineInstrBuilder buildBrCond(unsigned Tst, MachineBasicBlock &Dest);
+ MachineInstrBuilder buildBrCond(Register Tst, MachineBasicBlock &Dest);
/// Build and insert G_BRINDIRECT \p Tgt
///
@@ -548,7 +593,21 @@ public:
/// \pre \p Tgt must be a generic virtual register with pointer type.
///
/// \return a MachineInstrBuilder for the newly created instruction.
- MachineInstrBuilder buildBrIndirect(unsigned Tgt);
+ MachineInstrBuilder buildBrIndirect(Register Tgt);
+
+ /// Build and insert G_BRJT \p TablePtr, \p JTI, \p IndexReg
+ ///
+ /// G_BRJT is a jump table branch using a table base pointer \p TablePtr,
+ /// jump table index \p JTI and index \p IndexReg
+ ///
+ /// \pre setBasicBlock or setMI must have been called.
+ /// \pre \p TablePtr must be a generic virtual register with pointer type.
+ /// \pre \p JTI must be be a jump table index.
+ /// \pre \p IndexReg must be a generic virtual register with pointer type.
+ ///
+ /// \return a MachineInstrBuilder for the newly created instruction.
+ MachineInstrBuilder buildBrJT(Register TablePtr, unsigned JTI,
+ Register IndexReg);
/// Build and insert \p Res = G_CONSTANT \p Val
///
@@ -572,6 +631,7 @@ public:
///
/// \return The newly created instruction.
MachineInstrBuilder buildConstant(const DstOp &Res, int64_t Val);
+ MachineInstrBuilder buildConstant(const DstOp &Res, const APInt &Val);
/// Build and insert \p Res = G_FCONSTANT \p Val
///
@@ -586,6 +646,7 @@ public:
const ConstantFP &Val);
MachineInstrBuilder buildFConstant(const DstOp &Res, double Val);
+ MachineInstrBuilder buildFConstant(const DstOp &Res, const APFloat &Val);
/// Build and insert \p Res = COPY Op
///
@@ -605,7 +666,7 @@ public:
/// \pre \p Addr must be a generic virtual register with pointer type.
///
/// \return a MachineInstrBuilder for the newly created instruction.
- MachineInstrBuilder buildLoad(unsigned Res, unsigned Addr,
+ MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr,
MachineMemOperand &MMO);
/// Build and insert `Res = <opcode> Addr, MMO`.
@@ -617,8 +678,8 @@ public:
/// \pre \p Addr must be a generic virtual register with pointer type.
///
/// \return a MachineInstrBuilder for the newly created instruction.
- MachineInstrBuilder buildLoadInstr(unsigned Opcode, unsigned Res,
- unsigned Addr, MachineMemOperand &MMO);
+ MachineInstrBuilder buildLoadInstr(unsigned Opcode, const DstOp &Res,
+ const SrcOp &Addr, MachineMemOperand &MMO);
/// Build and insert `G_STORE Val, Addr, MMO`.
///
@@ -629,7 +690,7 @@ public:
/// \pre \p Addr must be a generic virtual register with pointer type.
///
/// \return a MachineInstrBuilder for the newly created instruction.
- MachineInstrBuilder buildStore(unsigned Val, unsigned Addr,
+ MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr,
MachineMemOperand &MMO);
/// Build and insert `Res0, ... = G_EXTRACT Src, Idx0`.
@@ -638,7 +699,7 @@ public:
/// \pre \p Res and \p Src must be generic virtual registers.
///
/// \return a MachineInstrBuilder for the newly created instruction.
- MachineInstrBuilder buildExtract(unsigned Res, unsigned Src, uint64_t Index);
+ MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index);
/// Build and insert \p Res = IMPLICIT_DEF.
MachineInstrBuilder buildUndef(const DstOp &Res);
@@ -656,7 +717,7 @@ public:
/// \pre The bits defined by each Op (derived from index and scalar size) must
/// not overlap.
/// \pre \p Indices must be in ascending order of bit position.
- void buildSequence(unsigned Res, ArrayRef<unsigned> Ops,
+ void buildSequence(Register Res, ArrayRef<Register> Ops,
ArrayRef<uint64_t> Indices);
/// Build and insert \p Res = G_MERGE_VALUES \p Op0, ...
@@ -670,7 +731,7 @@ public:
/// \pre The type of all \p Ops registers must be identical.
///
/// \return a MachineInstrBuilder for the newly created instruction.
- MachineInstrBuilder buildMerge(const DstOp &Res, ArrayRef<unsigned> Ops);
+ MachineInstrBuilder buildMerge(const DstOp &Res, ArrayRef<Register> Ops);
/// Build and insert \p Res0, ... = G_UNMERGE_VALUES \p Op
///
@@ -683,7 +744,10 @@ public:
///
/// \return a MachineInstrBuilder for the newly created instruction.
MachineInstrBuilder buildUnmerge(ArrayRef<LLT> Res, const SrcOp &Op);
- MachineInstrBuilder buildUnmerge(ArrayRef<unsigned> Res, const SrcOp &Op);
+ MachineInstrBuilder buildUnmerge(ArrayRef<Register> Res, const SrcOp &Op);
+
+ /// Build and insert an unmerge of \p Res sized pieces to cover \p Op
+ MachineInstrBuilder buildUnmerge(LLT Res, const SrcOp &Op);
/// Build and insert \p Res = G_BUILD_VECTOR \p Op0, ...
///
@@ -695,7 +759,12 @@ public:
///
/// \return a MachineInstrBuilder for the newly created instruction.
MachineInstrBuilder buildBuildVector(const DstOp &Res,
- ArrayRef<unsigned> Ops);
+ ArrayRef<Register> Ops);
+
+ /// Build and insert \p Res = G_BUILD_VECTOR with \p Src replicated to fill
+ /// the number of elements
+ MachineInstrBuilder buildSplatVector(const DstOp &Res,
+ const SrcOp &Src);
/// Build and insert \p Res = G_BUILD_VECTOR_TRUNC \p Op0, ...
///
@@ -711,7 +780,7 @@ public:
///
/// \return a MachineInstrBuilder for the newly created instruction.
MachineInstrBuilder buildBuildVectorTrunc(const DstOp &Res,
- ArrayRef<unsigned> Ops);
+ ArrayRef<Register> Ops);
/// Build and insert \p Res = G_CONCAT_VECTORS \p Op0, ...
///
@@ -725,10 +794,10 @@ public:
///
/// \return a MachineInstrBuilder for the newly created instruction.
MachineInstrBuilder buildConcatVectors(const DstOp &Res,
- ArrayRef<unsigned> Ops);
+ ArrayRef<Register> Ops);
- MachineInstrBuilder buildInsert(unsigned Res, unsigned Src,
- unsigned Op, unsigned Index);
+ MachineInstrBuilder buildInsert(Register Res, Register Src,
+ Register Op, unsigned Index);
/// Build and insert either a G_INTRINSIC (if \p HasSideEffects is false) or
/// G_INTRINSIC_W_SIDE_EFFECTS instruction. Its first operand will be the
@@ -740,7 +809,9 @@ public:
/// \pre setBasicBlock or setMI must have been called.
///
/// \return a MachineInstrBuilder for the newly created instruction.
- MachineInstrBuilder buildIntrinsic(Intrinsic::ID ID, unsigned Res,
+ MachineInstrBuilder buildIntrinsic(Intrinsic::ID ID, ArrayRef<Register> Res,
+ bool HasSideEffects);
+ MachineInstrBuilder buildIntrinsic(Intrinsic::ID ID, ArrayRef<DstOp> Res,
bool HasSideEffects);
/// Build and insert \p Res = G_FPTRUNC \p Op
@@ -855,8 +926,8 @@ public:
///
/// \return a MachineInstrBuilder for the newly created instruction.
MachineInstrBuilder
- buildAtomicCmpXchgWithSuccess(unsigned OldValRes, unsigned SuccessRes,
- unsigned Addr, unsigned CmpVal, unsigned NewVal,
+ buildAtomicCmpXchgWithSuccess(Register OldValRes, Register SuccessRes,
+ Register Addr, Register CmpVal, Register NewVal,
MachineMemOperand &MMO);
/// Build and insert `OldValRes<def> = G_ATOMIC_CMPXCHG Addr, CmpVal, NewVal,
@@ -873,8 +944,8 @@ public:
/// registers of the same type.
///
/// \return a MachineInstrBuilder for the newly created instruction.
- MachineInstrBuilder buildAtomicCmpXchg(unsigned OldValRes, unsigned Addr,
- unsigned CmpVal, unsigned NewVal,
+ MachineInstrBuilder buildAtomicCmpXchg(Register OldValRes, Register Addr,
+ Register CmpVal, Register NewVal,
MachineMemOperand &MMO);
/// Build and insert `OldValRes<def> = G_ATOMICRMW_<Opcode> Addr, Val, MMO`.
@@ -890,8 +961,8 @@ public:
/// same type.
///
/// \return a MachineInstrBuilder for the newly created instruction.
- MachineInstrBuilder buildAtomicRMW(unsigned Opcode, unsigned OldValRes,
- unsigned Addr, unsigned Val,
+ MachineInstrBuilder buildAtomicRMW(unsigned Opcode, Register OldValRes,
+ Register Addr, Register Val,
MachineMemOperand &MMO);
/// Build and insert `OldValRes<def> = G_ATOMICRMW_XCHG Addr, Val, MMO`.
@@ -906,8 +977,8 @@ public:
/// same type.
///
/// \return a MachineInstrBuilder for the newly created instruction.
- MachineInstrBuilder buildAtomicRMWXchg(unsigned OldValRes, unsigned Addr,
- unsigned Val, MachineMemOperand &MMO);
+ MachineInstrBuilder buildAtomicRMWXchg(Register OldValRes, Register Addr,
+ Register Val, MachineMemOperand &MMO);
/// Build and insert `OldValRes<def> = G_ATOMICRMW_ADD Addr, Val, MMO`.
///
@@ -921,8 +992,8 @@ public:
/// same type.
///
/// \return a MachineInstrBuilder for the newly created instruction.
- MachineInstrBuilder buildAtomicRMWAdd(unsigned OldValRes, unsigned Addr,
- unsigned Val, MachineMemOperand &MMO);
+ MachineInstrBuilder buildAtomicRMWAdd(Register OldValRes, Register Addr,
+ Register Val, MachineMemOperand &MMO);
/// Build and insert `OldValRes<def> = G_ATOMICRMW_SUB Addr, Val, MMO`.
///
@@ -936,8 +1007,8 @@ public:
/// same type.
///
/// \return a MachineInstrBuilder for the newly created instruction.
- MachineInstrBuilder buildAtomicRMWSub(unsigned OldValRes, unsigned Addr,
- unsigned Val, MachineMemOperand &MMO);
+ MachineInstrBuilder buildAtomicRMWSub(Register OldValRes, Register Addr,
+ Register Val, MachineMemOperand &MMO);
/// Build and insert `OldValRes<def> = G_ATOMICRMW_AND Addr, Val, MMO`.
///
@@ -951,8 +1022,8 @@ public:
/// same type.
///
/// \return a MachineInstrBuilder for the newly created instruction.
- MachineInstrBuilder buildAtomicRMWAnd(unsigned OldValRes, unsigned Addr,
- unsigned Val, MachineMemOperand &MMO);
+ MachineInstrBuilder buildAtomicRMWAnd(Register OldValRes, Register Addr,
+ Register Val, MachineMemOperand &MMO);
/// Build and insert `OldValRes<def> = G_ATOMICRMW_NAND Addr, Val, MMO`.
///
@@ -967,8 +1038,8 @@ public:
/// same type.
///
/// \return a MachineInstrBuilder for the newly created instruction.
- MachineInstrBuilder buildAtomicRMWNand(unsigned OldValRes, unsigned Addr,
- unsigned Val, MachineMemOperand &MMO);
+ MachineInstrBuilder buildAtomicRMWNand(Register OldValRes, Register Addr,
+ Register Val, MachineMemOperand &MMO);
/// Build and insert `OldValRes<def> = G_ATOMICRMW_OR Addr, Val, MMO`.
///
@@ -982,8 +1053,8 @@ public:
/// same type.
///
/// \return a MachineInstrBuilder for the newly created instruction.
- MachineInstrBuilder buildAtomicRMWOr(unsigned OldValRes, unsigned Addr,
- unsigned Val, MachineMemOperand &MMO);
+ MachineInstrBuilder buildAtomicRMWOr(Register OldValRes, Register Addr,
+ Register Val, MachineMemOperand &MMO);
/// Build and insert `OldValRes<def> = G_ATOMICRMW_XOR Addr, Val, MMO`.
///
@@ -997,8 +1068,8 @@ public:
/// same type.
///
/// \return a MachineInstrBuilder for the newly created instruction.
- MachineInstrBuilder buildAtomicRMWXor(unsigned OldValRes, unsigned Addr,
- unsigned Val, MachineMemOperand &MMO);
+ MachineInstrBuilder buildAtomicRMWXor(Register OldValRes, Register Addr,
+ Register Val, MachineMemOperand &MMO);
/// Build and insert `OldValRes<def> = G_ATOMICRMW_MAX Addr, Val, MMO`.
///
@@ -1013,8 +1084,8 @@ public:
/// same type.
///
/// \return a MachineInstrBuilder for the newly created instruction.
- MachineInstrBuilder buildAtomicRMWMax(unsigned OldValRes, unsigned Addr,
- unsigned Val, MachineMemOperand &MMO);
+ MachineInstrBuilder buildAtomicRMWMax(Register OldValRes, Register Addr,
+ Register Val, MachineMemOperand &MMO);
/// Build and insert `OldValRes<def> = G_ATOMICRMW_MIN Addr, Val, MMO`.
///
@@ -1029,8 +1100,8 @@ public:
/// same type.
///
/// \return a MachineInstrBuilder for the newly created instruction.
- MachineInstrBuilder buildAtomicRMWMin(unsigned OldValRes, unsigned Addr,
- unsigned Val, MachineMemOperand &MMO);
+ MachineInstrBuilder buildAtomicRMWMin(Register OldValRes, Register Addr,
+ Register Val, MachineMemOperand &MMO);
/// Build and insert `OldValRes<def> = G_ATOMICRMW_UMAX Addr, Val, MMO`.
///
@@ -1045,8 +1116,8 @@ public:
/// same type.
///
/// \return a MachineInstrBuilder for the newly created instruction.
- MachineInstrBuilder buildAtomicRMWUmax(unsigned OldValRes, unsigned Addr,
- unsigned Val, MachineMemOperand &MMO);
+ MachineInstrBuilder buildAtomicRMWUmax(Register OldValRes, Register Addr,
+ Register Val, MachineMemOperand &MMO);
/// Build and insert `OldValRes<def> = G_ATOMICRMW_UMIN Addr, Val, MMO`.
///
@@ -1061,8 +1132,11 @@ public:
/// same type.
///
/// \return a MachineInstrBuilder for the newly created instruction.
- MachineInstrBuilder buildAtomicRMWUmin(unsigned OldValRes, unsigned Addr,
- unsigned Val, MachineMemOperand &MMO);
+ MachineInstrBuilder buildAtomicRMWUmin(Register OldValRes, Register Addr,
+ Register Val, MachineMemOperand &MMO);
+
+ /// Build and insert `G_FENCE Ordering, Scope`.
+ MachineInstrBuilder buildFence(unsigned Ordering, unsigned Scope);
/// Build and insert \p Res = G_BLOCK_ADDR \p BA
///
@@ -1072,7 +1146,7 @@ public:
/// \pre \p Res must be a generic virtual register of a pointer type.
///
/// \return The newly created instruction.
- MachineInstrBuilder buildBlockAddress(unsigned Res, const BlockAddress *BA);
+ MachineInstrBuilder buildBlockAddress(Register Res, const BlockAddress *BA);
/// Build and insert \p Res = G_ADD \p Op0, \p Op1
///
@@ -1124,6 +1198,36 @@ public:
return buildInstr(TargetOpcode::G_MUL, {Dst}, {Src0, Src1}, Flags);
}
+ MachineInstrBuilder buildUMulH(const DstOp &Dst, const SrcOp &Src0,
+ const SrcOp &Src1,
+ Optional<unsigned> Flags = None) {
+ return buildInstr(TargetOpcode::G_UMULH, {Dst}, {Src0, Src1}, Flags);
+ }
+
+ MachineInstrBuilder buildSMulH(const DstOp &Dst, const SrcOp &Src0,
+ const SrcOp &Src1,
+ Optional<unsigned> Flags = None) {
+ return buildInstr(TargetOpcode::G_SMULH, {Dst}, {Src0, Src1}, Flags);
+ }
+
+ MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0,
+ const SrcOp &Src1,
+ Optional<unsigned> Flags = None) {
+ return buildInstr(TargetOpcode::G_SHL, {Dst}, {Src0, Src1}, Flags);
+ }
+
+ MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0,
+ const SrcOp &Src1,
+ Optional<unsigned> Flags = None) {
+ return buildInstr(TargetOpcode::G_LSHR, {Dst}, {Src0, Src1}, Flags);
+ }
+
+ MachineInstrBuilder buildAShr(const DstOp &Dst, const SrcOp &Src0,
+ const SrcOp &Src1,
+ Optional<unsigned> Flags = None) {
+ return buildInstr(TargetOpcode::G_ASHR, {Dst}, {Src0, Src1}, Flags);
+ }
+
/// Build and insert \p Res = G_AND \p Op0, \p Op1
///
/// G_AND sets \p Res to the bitwise and of integer parameters \p Op0 and \p
@@ -1155,6 +1259,137 @@ public:
return buildInstr(TargetOpcode::G_OR, {Dst}, {Src0, Src1});
}
+ /// Build and insert \p Res = G_XOR \p Op0, \p Op1
+ MachineInstrBuilder buildXor(const DstOp &Dst, const SrcOp &Src0,
+ const SrcOp &Src1) {
+ return buildInstr(TargetOpcode::G_XOR, {Dst}, {Src0, Src1});
+ }
+
+ /// Build and insert a bitwise not,
+ /// \p NegOne = G_CONSTANT -1
+ /// \p Res = G_OR \p Op0, NegOne
+ MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0) {
+ auto NegOne = buildConstant(Dst.getLLTTy(*getMRI()), -1);
+ return buildInstr(TargetOpcode::G_XOR, {Dst}, {Src0, NegOne});
+ }
+
+ /// Build and insert \p Res = G_CTPOP \p Op0, \p Src0
+ MachineInstrBuilder buildCTPOP(const DstOp &Dst, const SrcOp &Src0) {
+ return buildInstr(TargetOpcode::G_CTPOP, {Dst}, {Src0});
+ }
+
+ /// Build and insert \p Res = G_CTLZ \p Op0, \p Src0
+ MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0) {
+ return buildInstr(TargetOpcode::G_CTLZ, {Dst}, {Src0});
+ }
+
+ /// Build and insert \p Res = G_CTLZ_ZERO_UNDEF \p Op0, \p Src0
+ MachineInstrBuilder buildCTLZ_ZERO_UNDEF(const DstOp &Dst, const SrcOp &Src0) {
+ return buildInstr(TargetOpcode::G_CTLZ_ZERO_UNDEF, {Dst}, {Src0});
+ }
+
+ /// Build and insert \p Res = G_CTTZ \p Op0, \p Src0
+ MachineInstrBuilder buildCTTZ(const DstOp &Dst, const SrcOp &Src0) {
+ return buildInstr(TargetOpcode::G_CTTZ, {Dst}, {Src0});
+ }
+
+ /// Build and insert \p Res = G_CTTZ_ZERO_UNDEF \p Op0, \p Src0
+ MachineInstrBuilder buildCTTZ_ZERO_UNDEF(const DstOp &Dst, const SrcOp &Src0) {
+ return buildInstr(TargetOpcode::G_CTTZ_ZERO_UNDEF, {Dst}, {Src0});
+ }
+
+ /// Build and insert \p Res = G_FADD \p Op0, \p Op1
+ MachineInstrBuilder buildFAdd(const DstOp &Dst, const SrcOp &Src0,
+ const SrcOp &Src1) {
+ return buildInstr(TargetOpcode::G_FADD, {Dst}, {Src0, Src1});
+ }
+
+ /// Build and insert \p Res = G_FSUB \p Op0, \p Op1
+ MachineInstrBuilder buildFSub(const DstOp &Dst, const SrcOp &Src0,
+ const SrcOp &Src1) {
+ return buildInstr(TargetOpcode::G_FSUB, {Dst}, {Src0, Src1});
+ }
+
+ /// Build and insert \p Res = G_FMA \p Op0, \p Op1, \p Op2
+ MachineInstrBuilder buildFMA(const DstOp &Dst, const SrcOp &Src0,
+ const SrcOp &Src1, const SrcOp &Src2) {
+ return buildInstr(TargetOpcode::G_FMA, {Dst}, {Src0, Src1, Src2});
+ }
+
+ /// Build and insert \p Res = G_FNEG \p Op0
+ MachineInstrBuilder buildFNeg(const DstOp &Dst, const SrcOp &Src0) {
+ return buildInstr(TargetOpcode::G_FNEG, {Dst}, {Src0});
+ }
+
+ /// Build and insert \p Res = G_FABS \p Op0
+ MachineInstrBuilder buildFAbs(const DstOp &Dst, const SrcOp &Src0) {
+ return buildInstr(TargetOpcode::G_FABS, {Dst}, {Src0});
+ }
+
+ /// Build and insert \p Dst = G_FCANONICALIZE \p Src0
+ MachineInstrBuilder buildFCanonicalize(const DstOp &Dst, const SrcOp &Src0,
+ Optional<unsigned> Flags = None) {
+ return buildInstr(TargetOpcode::G_FCANONICALIZE, {Dst}, {Src0}, Flags);
+ }
+
+ /// Build and insert \p Res = G_FCOPYSIGN \p Op0, \p Op1
+ MachineInstrBuilder buildFCopysign(const DstOp &Dst, const SrcOp &Src0,
+ const SrcOp &Src1) {
+ return buildInstr(TargetOpcode::G_FCOPYSIGN, {Dst}, {Src0, Src1});
+ }
+
+ /// Build and insert \p Res = G_UITOFP \p Src0
+ MachineInstrBuilder buildUITOFP(const DstOp &Dst, const SrcOp &Src0) {
+ return buildInstr(TargetOpcode::G_UITOFP, {Dst}, {Src0});
+ }
+
+ /// Build and insert \p Res = G_SITOFP \p Src0
+ MachineInstrBuilder buildSITOFP(const DstOp &Dst, const SrcOp &Src0) {
+ return buildInstr(TargetOpcode::G_SITOFP, {Dst}, {Src0});
+ }
+
+ /// Build and insert \p Res = G_FPTOUI \p Src0
+ MachineInstrBuilder buildFPTOUI(const DstOp &Dst, const SrcOp &Src0) {
+ return buildInstr(TargetOpcode::G_FPTOUI, {Dst}, {Src0});
+ }
+
+ /// Build and insert \p Res = G_FPTOSI \p Src0
+ MachineInstrBuilder buildFPTOSI(const DstOp &Dst, const SrcOp &Src0) {
+ return buildInstr(TargetOpcode::G_FPTOSI, {Dst}, {Src0});
+ }
+
+ /// Build and insert \p Res = G_SMIN \p Op0, \p Op1
+ MachineInstrBuilder buildSMin(const DstOp &Dst, const SrcOp &Src0,
+ const SrcOp &Src1) {
+ return buildInstr(TargetOpcode::G_SMIN, {Dst}, {Src0, Src1});
+ }
+
+ /// Build and insert \p Res = G_SMAX \p Op0, \p Op1
+ MachineInstrBuilder buildSMax(const DstOp &Dst, const SrcOp &Src0,
+ const SrcOp &Src1) {
+ return buildInstr(TargetOpcode::G_SMAX, {Dst}, {Src0, Src1});
+ }
+
+ /// Build and insert \p Res = G_UMIN \p Op0, \p Op1
+ MachineInstrBuilder buildUMin(const DstOp &Dst, const SrcOp &Src0,
+ const SrcOp &Src1) {
+ return buildInstr(TargetOpcode::G_UMIN, {Dst}, {Src0, Src1});
+ }
+
+ /// Build and insert \p Res = G_UMAX \p Op0, \p Op1
+ MachineInstrBuilder buildUMax(const DstOp &Dst, const SrcOp &Src0,
+ const SrcOp &Src1) {
+ return buildInstr(TargetOpcode::G_UMAX, {Dst}, {Src0, Src1});
+ }
+
+ /// Build and insert \p Res = G_JUMP_TABLE \p JTI
+ ///
+ /// G_JUMP_TABLE sets \p Res to the address of the jump table specified by
+ /// the jump table index \p JTI.
+ ///
+ /// \return a MachineInstrBuilder for the newly created instruction.
+ MachineInstrBuilder buildJumpTable(const LLT PtrTy, unsigned JTI);
+
virtual MachineInstrBuilder buildInstr(unsigned Opc, ArrayRef<DstOp> DstOps,
ArrayRef<SrcOp> SrcOps,
Optional<unsigned> Flags = None);
diff --git a/include/llvm/CodeGen/GlobalISel/RegBankSelect.h b/include/llvm/CodeGen/GlobalISel/RegBankSelect.h
index c53ae416e60b..d9d076ba312c 100644
--- a/include/llvm/CodeGen/GlobalISel/RegBankSelect.h
+++ b/include/llvm/CodeGen/GlobalISel/RegBankSelect.h
@@ -1,9 +1,8 @@
//=- llvm/CodeGen/GlobalISel/RegBankSelect.h - Reg Bank Selector --*- C++ -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -524,7 +523,7 @@ private:
/// \p OnlyAssign == true means that \p Reg just needs to be assigned a
/// register bank. I.e., no repairing is necessary to have the
/// assignment match.
- bool assignmentMatch(unsigned Reg,
+ bool assignmentMatch(Register Reg,
const RegisterBankInfo::ValueMapping &ValMapping,
bool &OnlyAssign) const;
@@ -563,7 +562,7 @@ private:
bool repairReg(MachineOperand &MO,
const RegisterBankInfo::ValueMapping &ValMapping,
RegBankSelect::RepairingPlacement &RepairPt,
- const iterator_range<SmallVectorImpl<unsigned>::const_iterator>
+ const iterator_range<SmallVectorImpl<Register>::const_iterator>
&NewVRegs);
/// Return the cost of the instruction needed to map \p MO to \p ValMapping.
@@ -634,6 +633,11 @@ public:
MachineFunctionProperties::Property::RegBankSelected);
}
+ MachineFunctionProperties getClearedProperties() const override {
+ return MachineFunctionProperties()
+ .set(MachineFunctionProperties::Property::NoPHIs);
+ }
+
/// Walk through \p MF and assign a register bank to every virtual register
/// that are still mapped to nothing.
/// The target needs to provide a RegisterBankInfo and in particular
diff --git a/include/llvm/CodeGen/GlobalISel/RegisterBank.h b/include/llvm/CodeGen/GlobalISel/RegisterBank.h
index d5612e17393c..f528d1a46012 100644
--- a/include/llvm/CodeGen/GlobalISel/RegisterBank.h
+++ b/include/llvm/CodeGen/GlobalISel/RegisterBank.h
@@ -1,9 +1,8 @@
//==-- llvm/CodeGen/GlobalISel/RegisterBank.h - Register Bank ----*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h b/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h
index c33b32b2db40..e84b1c3ea8b1 100644
--- a/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h
+++ b/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/GlobalISel/RegisterBankInfo.h ---------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -19,6 +18,7 @@
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/iterator_range.h"
+#include "llvm/CodeGen/Register.h"
#include "llvm/Support/ErrorHandling.h"
#include <cassert>
#include <initializer_list>
@@ -161,6 +161,10 @@ public:
const PartialMapping *begin() const { return BreakDown; }
const PartialMapping *end() const { return BreakDown + NumBreakDowns; }
+ /// \return true if all partial mappings are the same size and register
+ /// bank.
+ bool partsAllUniform() const;
+
/// Check if this ValueMapping is valid.
bool isValid() const { return BreakDown && NumBreakDowns; }
@@ -190,7 +194,7 @@ public:
unsigned Cost = 0;
/// Mapping of all the operands.
- const ValueMapping *OperandsMapping;
+ const ValueMapping *OperandsMapping = nullptr;
/// Number of operands.
unsigned NumOperands = 0;
@@ -207,15 +211,11 @@ public:
/// The rationale is that it is more efficient for the optimizers
/// to be able to assume that the mapping of the ith operand is
/// at the index i.
- ///
- /// \pre ID != InvalidMappingID
InstructionMapping(unsigned ID, unsigned Cost,
const ValueMapping *OperandsMapping,
unsigned NumOperands)
: ID(ID), Cost(Cost), OperandsMapping(OperandsMapping),
NumOperands(NumOperands) {
- assert(getID() != InvalidMappingID &&
- "Use the default constructor for invalid mapping");
}
/// Default constructor.
@@ -282,7 +282,7 @@ public:
SmallVector<int, 8> OpToNewVRegIdx;
/// Hold the registers that will be used to map MI with InstrMapping.
- SmallVector<unsigned, 8> NewVRegs;
+ SmallVector<Register, 8> NewVRegs;
/// Current MachineRegisterInfo, used to create new virtual registers.
MachineRegisterInfo &MRI;
@@ -303,15 +303,15 @@ public:
/// \return The iterator range for the space created.
//
/// \pre getMI().getOperand(OpIdx).isReg()
- iterator_range<SmallVectorImpl<unsigned>::iterator>
+ iterator_range<SmallVectorImpl<Register>::iterator>
getVRegsMem(unsigned OpIdx);
/// Get the end iterator for a range starting at \p StartIdx and
/// spannig \p NumVal in NewVRegs.
/// \pre StartIdx + NumVal <= NewVRegs.size()
- SmallVectorImpl<unsigned>::const_iterator
+ SmallVectorImpl<Register>::const_iterator
getNewVRegsEnd(unsigned StartIdx, unsigned NumVal) const;
- SmallVectorImpl<unsigned>::iterator getNewVRegsEnd(unsigned StartIdx,
+ SmallVectorImpl<Register>::iterator getNewVRegsEnd(unsigned StartIdx,
unsigned NumVal);
public:
@@ -357,7 +357,7 @@ public:
///
/// \post the \p PartialMapIdx-th register of the value mapping of the \p
/// OpIdx-th operand has been set.
- void setVRegs(unsigned OpIdx, unsigned PartialMapIdx, unsigned NewVReg);
+ void setVRegs(unsigned OpIdx, unsigned PartialMapIdx, Register NewVReg);
/// Get all the virtual registers required to map the \p OpIdx-th operand of
/// the instruction.
@@ -371,7 +371,7 @@ public:
///
/// \pre getMI().getOperand(OpIdx).isReg()
/// \pre ForDebug || All partial mappings have been set a register
- iterator_range<SmallVectorImpl<unsigned>::const_iterator>
+ iterator_range<SmallVectorImpl<Register>::const_iterator>
getVRegs(unsigned OpIdx, bool ForDebug = false) const;
/// Print this operands mapper on dbgs() stream.
@@ -435,7 +435,7 @@ protected:
/// Get the MinimalPhysRegClass for Reg.
/// \pre Reg is a physical register.
const TargetRegisterClass &
- getMinimalPhysRegClass(unsigned Reg, const TargetRegisterInfo &TRI) const;
+ getMinimalPhysRegClass(Register Reg, const TargetRegisterInfo &TRI) const;
/// Try to get the mapping of \p MI.
/// See getInstrMapping for more details on what a mapping represents.
@@ -580,7 +580,7 @@ public:
/// or a register bank, then this returns nullptr.
///
/// \pre Reg != 0 (NoRegister)
- const RegisterBank *getRegBank(unsigned Reg, const MachineRegisterInfo &MRI,
+ const RegisterBank *getRegBank(Register Reg, const MachineRegisterInfo &MRI,
const TargetRegisterInfo &TRI) const;
/// Get the total number of register banks.
@@ -618,6 +618,21 @@ public:
return &A != &B;
}
+ /// \returns true if emitting a copy from \p Src to \p Dst is impossible.
+ bool cannotCopy(const RegisterBank &Dst, const RegisterBank &Src,
+ unsigned Size) const {
+ return copyCost(Dst, Src, Size) == std::numeric_limits<unsigned>::max();
+ }
+
+ /// Get the cost of using \p ValMapping to decompose a register. This is
+ /// similar to ::copyCost, except for cases where multiple copy-like
+ /// operations need to be inserted. If the register is used as a source
+ /// operand and already has a bank assigned, \p CurBank is non-null.
+ virtual unsigned getBreakDownCost(const ValueMapping &ValMapping,
+ const RegisterBank *CurBank = nullptr) const {
+ return std::numeric_limits<unsigned>::max();
+ }
+
/// Constrain the (possibly generic) virtual register \p Reg to \p RC.
///
/// \pre \p Reg is a virtual register that either has a bank or a class.
@@ -626,7 +641,7 @@ public:
/// \note Use MachineRegisterInfo::constrainRegAttrs instead for any non-isel
/// purpose, including non-select passes of GlobalISel
static const TargetRegisterClass *
- constrainGenericRegister(unsigned Reg, const TargetRegisterClass &RC,
+ constrainGenericRegister(Register Reg, const TargetRegisterClass &RC,
MachineRegisterInfo &MRI);
/// Identifier used when the related instruction mapping instance
@@ -711,7 +726,7 @@ public:
/// virtual register.
///
/// \pre \p Reg != 0 (NoRegister).
- unsigned getSizeInBits(unsigned Reg, const MachineRegisterInfo &MRI,
+ unsigned getSizeInBits(Register Reg, const MachineRegisterInfo &MRI,
const TargetRegisterInfo &TRI) const;
/// Check that information hold by this instance make sense for the
diff --git a/include/llvm/CodeGen/GlobalISel/Types.h b/include/llvm/CodeGen/GlobalISel/Types.h
index 7b22e343a7f8..4fd7043ba02d 100644
--- a/include/llvm/CodeGen/GlobalISel/Types.h
+++ b/include/llvm/CodeGen/GlobalISel/Types.h
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/GlobalISel/Types.h - Types used by GISel ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/include/llvm/CodeGen/GlobalISel/Utils.h b/include/llvm/CodeGen/GlobalISel/Utils.h
index 82b791d35b2b..4cdaa48fb689 100644
--- a/include/llvm/CodeGen/GlobalISel/Utils.h
+++ b/include/llvm/CodeGen/GlobalISel/Utils.h
@@ -1,9 +1,8 @@
//==-- llvm/CodeGen/GlobalISel/Utils.h ---------------------------*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -16,6 +15,7 @@
#define LLVM_CODEGEN_GLOBALISEL_UTILS_H
#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/Register.h"
namespace llvm {
@@ -37,21 +37,37 @@ class ConstantFP;
class APFloat;
/// Try to constrain Reg to the specified register class. If this fails,
-/// create a new virtual register in the correct class and insert a COPY before
-/// \p InsertPt. The debug location of \p InsertPt is used for the new copy.
+/// create a new virtual register in the correct class.
///
/// \return The virtual register constrained to the right register class.
unsigned constrainRegToClass(MachineRegisterInfo &MRI,
const TargetInstrInfo &TII,
- const RegisterBankInfo &RBI,
- MachineInstr &InsertPt, unsigned Reg,
+ const RegisterBankInfo &RBI, unsigned Reg,
const TargetRegisterClass &RegClass);
+/// Constrain the Register operand OpIdx, so that it is now constrained to the
+/// TargetRegisterClass passed as an argument (RegClass).
+/// If this fails, create a new virtual register in the correct class and
+/// insert a COPY before \p InsertPt if it is a use or after if it is a
+/// definition. The debug location of \p InsertPt is used for the new copy.
+///
+/// \return The virtual register constrained to the right register class.
+unsigned constrainOperandRegClass(const MachineFunction &MF,
+ const TargetRegisterInfo &TRI,
+ MachineRegisterInfo &MRI,
+ const TargetInstrInfo &TII,
+ const RegisterBankInfo &RBI,
+ MachineInstr &InsertPt,
+ const TargetRegisterClass &RegClass,
+ const MachineOperand &RegMO, unsigned OpIdx);
+
/// Try to constrain Reg so that it is usable by argument OpIdx of the
/// provided MCInstrDesc \p II. If this fails, create a new virtual
-/// register in the correct class and insert a COPY before \p InsertPt.
-/// This is equivalent to constrainRegToClass() with RegClass obtained from the
-/// MCInstrDesc. The debug location of \p InsertPt is used for the new copy.
+/// register in the correct class and insert a COPY before \p InsertPt
+/// if it is a use or after if it is a definition.
+/// This is equivalent to constrainOperandRegClass(..., RegClass, ...)
+/// with RegClass obtained from the MCInstrDesc. The debug location of \p
+/// InsertPt is used for the new copy.
///
/// \return The virtual register constrained to the right register class.
unsigned constrainOperandRegClass(const MachineFunction &MF,
@@ -90,17 +106,40 @@ void reportGISelFailure(MachineFunction &MF, const TargetPassConfig &TPC,
const char *PassName, StringRef Msg,
const MachineInstr &MI);
+/// If \p VReg is defined by a G_CONSTANT fits in int64_t
+/// returns it.
Optional<int64_t> getConstantVRegVal(unsigned VReg,
const MachineRegisterInfo &MRI);
+/// Simple struct used to hold a constant integer value and a virtual
+/// register.
+struct ValueAndVReg {
+ int64_t Value;
+ unsigned VReg;
+};
+/// If \p VReg is defined by a statically evaluable chain of
+/// instructions rooted on a G_CONSTANT (\p LookThroughInstrs == true)
+/// and that constant fits in int64_t, returns its value as well as
+/// the virtual register defined by this G_CONSTANT.
+/// When \p LookThroughInstrs == false, this function behaves like
+/// getConstantVRegVal.
+Optional<ValueAndVReg>
+getConstantVRegValWithLookThrough(unsigned VReg, const MachineRegisterInfo &MRI,
+ bool LookThroughInstrs = true);
const ConstantFP* getConstantFPVRegVal(unsigned VReg,
const MachineRegisterInfo &MRI);
/// See if Reg is defined by an single def instruction that is
/// Opcode. Also try to do trivial folding if it's a COPY with
/// same types. Returns null otherwise.
-MachineInstr *getOpcodeDef(unsigned Opcode, unsigned Reg,
+MachineInstr *getOpcodeDef(unsigned Opcode, Register Reg,
const MachineRegisterInfo &MRI);
+/// Find the def instruction for \p Reg, folding away any trivial copies. Note
+/// it may still return a COPY, if it changes the type. May return nullptr if \p
+/// Reg is not a generic virtual register.
+MachineInstr *getDefIgnoringCopies(Register Reg,
+ const MachineRegisterInfo &MRI);
+
/// Returns an APFloat from Val converted to the appropriate size.
APFloat getAPFloatFromSize(double Val, unsigned Size);
@@ -111,5 +150,16 @@ void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU);
Optional<APInt> ConstantFoldBinOp(unsigned Opcode, const unsigned Op1,
const unsigned Op2,
const MachineRegisterInfo &MRI);
+
+/// Returns true if \p Val can be assumed to never be a NaN. If \p SNaN is true,
+/// this returns if \p Val can be assumed to never be a signaling NaN.
+bool isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI,
+ bool SNaN = false);
+
+/// Returns true if \p Val can be assumed to never be a signaling NaN.
+inline bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI) {
+ return isKnownNeverNaN(Val, MRI, true);
+}
+
} // End namespace llvm.
#endif
diff --git a/include/llvm/CodeGen/ISDOpcodes.h b/include/llvm/CodeGen/ISDOpcodes.h
index 9c918ae1104f..acf27dcc5fab 100644
--- a/include/llvm/CodeGen/ISDOpcodes.h
+++ b/include/llvm/CodeGen/ISDOpcodes.h
@@ -1,9 +1,8 @@
//===-- llvm/CodeGen/ISDOpcodes.h - CodeGen opcodes -------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -272,12 +271,17 @@ namespace ISD {
/// resulting value is this minimum value.
SSUBSAT, USUBSAT,
- /// RESULT = SMULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication on
+ /// RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication on
/// 2 integers with the same width and scale. SCALE represents the scale of
/// both operands as fixed point numbers. This SCALE parameter must be a
/// constant integer. A scale of zero is effectively performing
/// multiplication on 2 integers.
- SMULFIX,
+ SMULFIX, UMULFIX,
+
+ /// Same as the corresponding unsaturated fixed point instructions, but the
+ /// result is clamped between the min and max values representable by the
+ /// bits of the first 2 operands.
+ SMULFIXSAT,
/// Simple binary floating point operators.
FADD, FSUB, FMUL, FDIV, FREM,
@@ -298,6 +302,26 @@ namespace ISD {
STRICT_FRINT, STRICT_FNEARBYINT, STRICT_FMAXNUM, STRICT_FMINNUM,
STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND, STRICT_FTRUNC,
+ /// X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating
+ /// point type down to the precision of the destination VT. TRUNC is a
+ /// flag, which is always an integer that is zero or one. If TRUNC is 0,
+ /// this is a normal rounding, if it is 1, this FP_ROUND is known to not
+ /// change the value of Y.
+ ///
+ /// The TRUNC = 1 case is used in cases where we know that the value will
+ /// not be modified by the node, because Y is not using any of the extra
+ /// precision of source type. This allows certain transformations like
+ /// STRICT_FP_EXTEND(STRICT_FP_ROUND(X,1)) -> X which are not safe for
+ /// STRICT_FP_EXTEND(STRICT_FP_ROUND(X,0)) because the extra bits aren't
+ /// removed.
+ /// It is used to limit optimizations while the DAG is being optimized.
+ STRICT_FP_ROUND,
+
+ /// X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP
+ /// type.
+ /// It is used to limit optimizations while the DAG is being optimized.
+ STRICT_FP_EXTEND,
+
/// FMA - Perform a * b + c with no intermediate rounding step.
FMA,
@@ -580,10 +604,14 @@ namespace ISD {
/// is often a storage-only type but has native conversions.
FP16_TO_FP, FP_TO_FP16,
- /// Perform various unary floating-point operations inspired by libm.
+ /// Perform various unary floating-point operations inspired by libm. For
+ /// FPOWI, the result is undefined if if the integer operand doesn't fit
+ /// into 32 bits.
FNEG, FABS, FSQRT, FCBRT, FSIN, FCOS, FPOWI, FPOW,
FLOG, FLOG2, FLOG10, FEXP, FEXP2,
FCEIL, FTRUNC, FRINT, FNEARBYINT, FROUND, FFLOOR,
+ LROUND, LLROUND, LRINT, LLRINT,
+
/// FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two
/// values.
//
@@ -666,6 +694,9 @@ namespace ISD {
/// SDOperands.
INLINEASM,
+ /// INLINEASM_BR - Terminator version of inline asm. Used by asm-goto.
+ INLINEASM_BR,
+
/// EH_LABEL - Represents a label in mid basic block used to track
/// locations needed for debug and exception handling tables. These nodes
/// take a chain as input and return a chain.
@@ -819,6 +850,8 @@ namespace ISD {
ATOMIC_LOAD_MAX,
ATOMIC_LOAD_UMIN,
ATOMIC_LOAD_UMAX,
+ ATOMIC_LOAD_FADD,
+ ATOMIC_LOAD_FSUB,
// Masked load and store - consecutive vector load and store operations
// with additional mask operand that prevents memory accesses to the
@@ -866,11 +899,14 @@ namespace ISD {
VECREDUCE_STRICT_FADD, VECREDUCE_STRICT_FMUL,
/// These reductions are non-strict, and have a single vector operand.
VECREDUCE_FADD, VECREDUCE_FMUL,
+ /// FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
+ VECREDUCE_FMAX, VECREDUCE_FMIN,
+ /// Integer reductions may have a result type larger than the vector element
+ /// type. However, the reduction is performed using the vector element type
+ /// and the value in the top bits is unspecified.
VECREDUCE_ADD, VECREDUCE_MUL,
VECREDUCE_AND, VECREDUCE_OR, VECREDUCE_XOR,
VECREDUCE_SMAX, VECREDUCE_SMIN, VECREDUCE_UMAX, VECREDUCE_UMIN,
- /// FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
- VECREDUCE_FMAX, VECREDUCE_FMIN,
/// BUILTIN_OP_END - This must be the last enum value in this list.
/// The target-specific pre-isel opcode values start here.
diff --git a/include/llvm/CodeGen/IntrinsicLowering.h b/include/llvm/CodeGen/IntrinsicLowering.h
index 597d684909c1..daf2d9a47801 100644
--- a/include/llvm/CodeGen/IntrinsicLowering.h
+++ b/include/llvm/CodeGen/IntrinsicLowering.h
@@ -1,9 +1,8 @@
//===-- IntrinsicLowering.h - Intrinsic Function Lowering -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -31,10 +30,6 @@ class IntrinsicLowering {
public:
explicit IntrinsicLowering(const DataLayout &DL) : DL(DL), Warned(false) {}
- /// Add all of the prototypes that might be needed by an intrinsic lowering
- /// implementation to be inserted into the module specified.
- void AddPrototypes(Module &M);
-
/// Replace a call to the specified intrinsic function.
/// If an intrinsic function must be implemented by the code generator
/// (such as va_start), this function should print a message and abort.
diff --git a/include/llvm/CodeGen/LatencyPriorityQueue.h b/include/llvm/CodeGen/LatencyPriorityQueue.h
index 9b8d83ce77ca..95f4c6473542 100644
--- a/include/llvm/CodeGen/LatencyPriorityQueue.h
+++ b/include/llvm/CodeGen/LatencyPriorityQueue.h
@@ -1,9 +1,8 @@
//===---- LatencyPriorityQueue.h - A latency-oriented priority queue ------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/CodeGen/LazyMachineBlockFrequencyInfo.h b/include/llvm/CodeGen/LazyMachineBlockFrequencyInfo.h
index 221f16a03f16..ca99c6c89b19 100644
--- a/include/llvm/CodeGen/LazyMachineBlockFrequencyInfo.h
+++ b/include/llvm/CodeGen/LazyMachineBlockFrequencyInfo.h
@@ -1,9 +1,8 @@
///===- LazyMachineBlockFrequencyInfo.h - Lazy Block Frequency -*- C++ -*--===//
///
-/// The LLVM Compiler Infrastructure
-///
-/// This file is distributed under the University of Illinois Open Source
-/// License. See LICENSE.TXT for details.
+/// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+/// See https://llvm.org/LICENSE.txt for license information.
+/// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
///
///===---------------------------------------------------------------------===//
/// \file
diff --git a/include/llvm/CodeGen/LexicalScopes.h b/include/llvm/CodeGen/LexicalScopes.h
index 3ba503487823..253d4734995b 100644
--- a/include/llvm/CodeGen/LexicalScopes.h
+++ b/include/llvm/CodeGen/LexicalScopes.h
@@ -1,9 +1,8 @@
//===- LexicalScopes.cpp - Collecting lexical scope info --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/CodeGen/LinkAllAsmWriterComponents.h b/include/llvm/CodeGen/LinkAllAsmWriterComponents.h
index 38fcb37b1e69..75a5c359630e 100644
--- a/include/llvm/CodeGen/LinkAllAsmWriterComponents.h
+++ b/include/llvm/CodeGen/LinkAllAsmWriterComponents.h
@@ -1,9 +1,8 @@
//===- llvm/Codegen/LinkAllAsmWriterComponents.h ----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/CodeGen/LinkAllCodegenComponents.h b/include/llvm/CodeGen/LinkAllCodegenComponents.h
index 18c13ca8f598..56c93b24147e 100644
--- a/include/llvm/CodeGen/LinkAllCodegenComponents.h
+++ b/include/llvm/CodeGen/LinkAllCodegenComponents.h
@@ -1,9 +1,8 @@
//===- llvm/Codegen/LinkAllCodegenComponents.h ------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/CodeGen/LiveInterval.h b/include/llvm/CodeGen/LiveInterval.h
index cdf9ad2588cf..8bb88165d3e1 100644
--- a/include/llvm/CodeGen/LiveInterval.h
+++ b/include/llvm/CodeGen/LiveInterval.h
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/LiveInterval.h - Interval representation ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -606,6 +605,44 @@ namespace llvm {
/// activated in the constructor of the live range.
void flushSegmentSet();
+ /// Stores indexes from the input index sequence R at which this LiveRange
+ /// is live to the output O iterator.
+ /// R is a range of _ascending sorted_ _random_ access iterators
+ /// to the input indexes. Indexes stored at O are ascending sorted so it
+ /// can be used directly in the subsequent search (for example for
+ /// subranges). Returns true if found at least one index.
+ template <typename Range, typename OutputIt>
+ bool findIndexesLiveAt(Range &&R, OutputIt O) const {
+ assert(std::is_sorted(R.begin(), R.end()));
+ auto Idx = R.begin(), EndIdx = R.end();
+ auto Seg = segments.begin(), EndSeg = segments.end();
+ bool Found = false;
+ while (Idx != EndIdx && Seg != EndSeg) {
+ // if the Seg is lower find first segment that is above Idx using binary
+ // search
+ if (Seg->end <= *Idx) {
+ Seg = std::upper_bound(++Seg, EndSeg, *Idx,
+ [=](typename std::remove_reference<decltype(*Idx)>::type V,
+ const typename std::remove_reference<decltype(*Seg)>::type &S) {
+ return V < S.end;
+ });
+ if (Seg == EndSeg)
+ break;
+ }
+ auto NotLessStart = std::lower_bound(Idx, EndIdx, Seg->start);
+ if (NotLessStart == EndIdx)
+ break;
+ auto NotLessEnd = std::lower_bound(NotLessStart, EndIdx, Seg->end);
+ if (NotLessEnd != NotLessStart) {
+ Found = true;
+ O = std::copy(NotLessStart, NotLessEnd, O);
+ }
+ Idx = NotLessEnd;
+ ++Seg;
+ }
+ return Found;
+ }
+
void print(raw_ostream &OS) const;
void dump() const;
@@ -790,8 +827,15 @@ namespace llvm {
/// L000F, refining for mask L0018. Will split the L00F0 lane into
/// L00E0 and L0010 and the L000F lane into L0007 and L0008. The Mod
/// function will be applied to the L0010 and L0008 subranges.
+ ///
+ /// \p Indexes and \p TRI are required to clean up the VNIs that
+ /// don't defne the related lane masks after they get shrunk. E.g.,
+ /// when L000F gets split into L0007 and L0008 maybe only a subset
+ /// of the VNIs that defined L000F defines L0007.
void refineSubRanges(BumpPtrAllocator &Allocator, LaneBitmask LaneMask,
- std::function<void(LiveInterval::SubRange&)> Apply);
+ std::function<void(LiveInterval::SubRange &)> Apply,
+ const SlotIndexes &Indexes,
+ const TargetRegisterInfo &TRI);
bool operator<(const LiveInterval& other) const {
const SlotIndex &thisIndex = beginIndex();
diff --git a/include/llvm/CodeGen/LiveIntervalUnion.h b/include/llvm/CodeGen/LiveIntervalUnion.h
index 9e2799bd4414..05506d2c3bc6 100644
--- a/include/llvm/CodeGen/LiveIntervalUnion.h
+++ b/include/llvm/CodeGen/LiveIntervalUnion.h
@@ -1,9 +1,8 @@
//===- LiveIntervalUnion.h - Live interval union data struct ---*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/CodeGen/LiveIntervals.h b/include/llvm/CodeGen/LiveIntervals.h
index 16ab1dc475c4..588b0f9cf39c 100644
--- a/include/llvm/CodeGen/LiveIntervals.h
+++ b/include/llvm/CodeGen/LiveIntervals.h
@@ -1,9 +1,8 @@
//===- LiveIntervals.h - Live Interval Analysis -----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -418,6 +417,15 @@ class VirtRegMap;
RegUnitRanges[Unit] = nullptr;
}
+ /// Remove associated live ranges for the register units associated with \p
+ /// Reg. Subsequent uses should rely on on-demand recomputation. \note This
+ /// method can result in inconsistent liveness tracking if multiple phyical
+ /// registers share a regunit, and should be used cautiously.
+ void removeAllRegUnitsForPhysReg(unsigned Reg) {
+ for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
+ removeRegUnit(*Units);
+ }
+
/// Remove value numbers and related live segments starting at position
/// \p Pos that are part of any liverange of physical register \p Reg or one
/// of its subregisters.
diff --git a/include/llvm/CodeGen/LivePhysRegs.h b/include/llvm/CodeGen/LivePhysRegs.h
index 7312902e21b7..50da0b3d5c48 100644
--- a/include/llvm/CodeGen/LivePhysRegs.h
+++ b/include/llvm/CodeGen/LivePhysRegs.h
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/LivePhysRegs.h - Live Physical Register Set -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/CodeGen/LiveRangeEdit.h b/include/llvm/CodeGen/LiveRangeEdit.h
index 53830297c525..6519937ec071 100644
--- a/include/llvm/CodeGen/LiveRangeEdit.h
+++ b/include/llvm/CodeGen/LiveRangeEdit.h
@@ -1,9 +1,8 @@
//===- LiveRangeEdit.h - Basic tools for split and spill --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/CodeGen/LiveRegMatrix.h b/include/llvm/CodeGen/LiveRegMatrix.h
index f62a55c73085..ab4d44f9a611 100644
--- a/include/llvm/CodeGen/LiveRegMatrix.h
+++ b/include/llvm/CodeGen/LiveRegMatrix.h
@@ -1,9 +1,8 @@
//===- LiveRegMatrix.h - Track register interference ----------*- C++ -*---===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/CodeGen/LiveRegUnits.h b/include/llvm/CodeGen/LiveRegUnits.h
index 5e9dd8b3cdf6..7dbb2feab8bf 100644
--- a/include/llvm/CodeGen/LiveRegUnits.h
+++ b/include/llvm/CodeGen/LiveRegUnits.h
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/LiveRegUnits.h - Register Unit Set ----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/CodeGen/LiveStacks.h b/include/llvm/CodeGen/LiveStacks.h
index 44ed785f7b53..7c4c64d515df 100644
--- a/include/llvm/CodeGen/LiveStacks.h
+++ b/include/llvm/CodeGen/LiveStacks.h
@@ -1,9 +1,8 @@
//===- LiveStacks.h - Live Stack Slot Analysis ------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/CodeGen/LiveVariables.h b/include/llvm/CodeGen/LiveVariables.h
index ed8da8662106..71de306e2942 100644
--- a/include/llvm/CodeGen/LiveVariables.h
+++ b/include/llvm/CodeGen/LiveVariables.h
@@ -1,9 +1,8 @@
//===-- llvm/CodeGen/LiveVariables.h - Live Variable Analysis ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/CodeGen/LoopTraversal.h b/include/llvm/CodeGen/LoopTraversal.h
index 750da0143c0d..e5810ef1ef26 100644
--- a/include/llvm/CodeGen/LoopTraversal.h
+++ b/include/llvm/CodeGen/LoopTraversal.h
@@ -1,9 +1,8 @@
//==------ llvm/CodeGen/LoopTraversal.h - Loop Traversal -*- C++ -*---------==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/CodeGen/LowLevelType.h b/include/llvm/CodeGen/LowLevelType.h
index a3c5c9329f53..687233e4e168 100644
--- a/include/llvm/CodeGen/LowLevelType.h
+++ b/include/llvm/CodeGen/LowLevelType.h
@@ -1,9 +1,8 @@
//== llvm/CodeGen/LowLevelType.h ------------------------------- -*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/CodeGen/MIRParser/MIParser.h b/include/llvm/CodeGen/MIRParser/MIParser.h
new file mode 100644
index 000000000000..4e32a04551c1
--- /dev/null
+++ b/include/llvm/CodeGen/MIRParser/MIParser.h
@@ -0,0 +1,233 @@
+//===- MIParser.h - Machine Instructions Parser -----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the function that parses the machine instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_MIRPARSER_MIPARSER_H
+#define LLVM_LIB_CODEGEN_MIRPARSER_MIPARSER_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/Support/Allocator.h"
+
+namespace llvm {
+
+class MachineBasicBlock;
+class MachineFunction;
+class MDNode;
+class RegisterBank;
+struct SlotMapping;
+class SMDiagnostic;
+class SourceMgr;
+class StringRef;
+class TargetRegisterClass;
+class TargetSubtargetInfo;
+
+struct VRegInfo {
+ enum uint8_t {
+ UNKNOWN, NORMAL, GENERIC, REGBANK
+ } Kind = UNKNOWN;
+ bool Explicit = false; ///< VReg was explicitly specified in the .mir file.
+ union {
+ const TargetRegisterClass *RC;
+ const RegisterBank *RegBank;
+ } D;
+ unsigned VReg;
+ unsigned PreferredReg = 0;
+};
+
+using Name2RegClassMap = StringMap<const TargetRegisterClass *>;
+using Name2RegBankMap = StringMap<const RegisterBank *>;
+
+struct PerTargetMIParsingState {
+private:
+ const TargetSubtargetInfo &Subtarget;
+
+ /// Maps from instruction names to op codes.
+ StringMap<unsigned> Names2InstrOpCodes;
+
+ /// Maps from register names to registers.
+ StringMap<unsigned> Names2Regs;
+
+ /// Maps from register mask names to register masks.
+ StringMap<const uint32_t *> Names2RegMasks;
+
+ /// Maps from subregister names to subregister indices.
+ StringMap<unsigned> Names2SubRegIndices;
+
+ /// Maps from target index names to target indices.
+ StringMap<int> Names2TargetIndices;
+
+ /// Maps from direct target flag names to the direct target flag values.
+ StringMap<unsigned> Names2DirectTargetFlags;
+
+ /// Maps from direct target flag names to the bitmask target flag values.
+ StringMap<unsigned> Names2BitmaskTargetFlags;
+
+ /// Maps from MMO target flag names to MMO target flag values.
+ StringMap<MachineMemOperand::Flags> Names2MMOTargetFlags;
+
+ /// Maps from register class names to register classes.
+ Name2RegClassMap Names2RegClasses;
+
+ /// Maps from register bank names to register banks.
+ Name2RegBankMap Names2RegBanks;
+
+ void initNames2InstrOpCodes();
+ void initNames2Regs();
+ void initNames2RegMasks();
+ void initNames2SubRegIndices();
+ void initNames2TargetIndices();
+ void initNames2DirectTargetFlags();
+ void initNames2BitmaskTargetFlags();
+ void initNames2MMOTargetFlags();
+
+ void initNames2RegClasses();
+ void initNames2RegBanks();
+
+public:
+ /// Try to convert an instruction name to an opcode. Return true if the
+ /// instruction name is invalid.
+ bool parseInstrName(StringRef InstrName, unsigned &OpCode);
+
+ /// Try to convert a register name to a register number. Return true if the
+ /// register name is invalid.
+ bool getRegisterByName(StringRef RegName, unsigned &Reg);
+
+ /// Check if the given identifier is a name of a register mask.
+ ///
+ /// Return null if the identifier isn't a register mask.
+ const uint32_t *getRegMask(StringRef Identifier);
+
+ /// Check if the given identifier is a name of a subregister index.
+ ///
+ /// Return 0 if the name isn't a subregister index class.
+ unsigned getSubRegIndex(StringRef Name);
+
+ /// Try to convert a name of target index to the corresponding target index.
+ ///
+ /// Return true if the name isn't a name of a target index.
+ bool getTargetIndex(StringRef Name, int &Index);
+
+ /// Try to convert a name of a direct target flag to the corresponding
+ /// target flag.
+ ///
+ /// Return true if the name isn't a name of a direct flag.
+ bool getDirectTargetFlag(StringRef Name, unsigned &Flag);
+
+ /// Try to convert a name of a bitmask target flag to the corresponding
+ /// target flag.
+ ///
+ /// Return true if the name isn't a name of a bitmask target flag.
+ bool getBitmaskTargetFlag(StringRef Name, unsigned &Flag);
+
+ /// Try to convert a name of a MachineMemOperand target flag to the
+ /// corresponding target flag.
+ ///
+ /// Return true if the name isn't a name of a target MMO flag.
+ bool getMMOTargetFlag(StringRef Name, MachineMemOperand::Flags &Flag);
+
+ /// Check if the given identifier is a name of a register class.
+ ///
+ /// Return null if the name isn't a register class.
+ const TargetRegisterClass *getRegClass(StringRef Name);
+
+ /// Check if the given identifier is a name of a register bank.
+ ///
+ /// Return null if the name isn't a register bank.
+ const RegisterBank *getRegBank(StringRef Name);
+
+ PerTargetMIParsingState(const TargetSubtargetInfo &STI)
+ : Subtarget(STI) {
+ initNames2RegClasses();
+ initNames2RegBanks();
+ }
+
+ ~PerTargetMIParsingState() = default;
+
+ void setTarget(const TargetSubtargetInfo &NewSubtarget);
+};
+
+struct PerFunctionMIParsingState {
+ BumpPtrAllocator Allocator;
+ MachineFunction &MF;
+ SourceMgr *SM;
+ const SlotMapping &IRSlots;
+ PerTargetMIParsingState &Target;
+
+ DenseMap<unsigned, MachineBasicBlock *> MBBSlots;
+ DenseMap<unsigned, VRegInfo *> VRegInfos;
+ StringMap<VRegInfo *> VRegInfosNamed;
+ DenseMap<unsigned, int> FixedStackObjectSlots;
+ DenseMap<unsigned, int> StackObjectSlots;
+ DenseMap<unsigned, unsigned> ConstantPoolSlots;
+ DenseMap<unsigned, unsigned> JumpTableSlots;
+
+ PerFunctionMIParsingState(MachineFunction &MF, SourceMgr &SM,
+ const SlotMapping &IRSlots,
+ PerTargetMIParsingState &Target);
+
+ VRegInfo &getVRegInfo(unsigned Num);
+ VRegInfo &getVRegInfoNamed(StringRef RegName);
+};
+
+/// Parse the machine basic block definitions, and skip the machine
+/// instructions.
+///
+/// This function runs the first parsing pass on the machine function's body.
+/// It parses only the machine basic block definitions and creates the machine
+/// basic blocks in the given machine function.
+///
+/// The machine instructions aren't parsed during the first pass because all
+/// the machine basic blocks aren't defined yet - this makes it impossible to
+/// resolve the machine basic block references.
+///
+/// Return true if an error occurred.
+bool parseMachineBasicBlockDefinitions(PerFunctionMIParsingState &PFS,
+ StringRef Src, SMDiagnostic &Error);
+
+/// Parse the machine instructions.
+///
+/// This function runs the second parsing pass on the machine function's body.
+/// It skips the machine basic block definitions and parses only the machine
+/// instructions and basic block attributes like liveins and successors.
+///
+/// The second parsing pass assumes that the first parsing pass already ran
+/// on the given source string.
+///
+/// Return true if an error occurred.
+bool parseMachineInstructions(PerFunctionMIParsingState &PFS, StringRef Src,
+ SMDiagnostic &Error);
+
+bool parseMBBReference(PerFunctionMIParsingState &PFS,
+ MachineBasicBlock *&MBB, StringRef Src,
+ SMDiagnostic &Error);
+
+bool parseRegisterReference(PerFunctionMIParsingState &PFS,
+ unsigned &Reg, StringRef Src,
+ SMDiagnostic &Error);
+
+bool parseNamedRegisterReference(PerFunctionMIParsingState &PFS, unsigned &Reg,
+ StringRef Src, SMDiagnostic &Error);
+
+bool parseVirtualRegisterReference(PerFunctionMIParsingState &PFS,
+ VRegInfo *&Info, StringRef Src,
+ SMDiagnostic &Error);
+
+bool parseStackObjectReference(PerFunctionMIParsingState &PFS, int &FI,
+ StringRef Src, SMDiagnostic &Error);
+
+bool parseMDNode(PerFunctionMIParsingState &PFS, MDNode *&Node, StringRef Src,
+ SMDiagnostic &Error);
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_MIRPARSER_MIPARSER_H
diff --git a/include/llvm/CodeGen/MIRParser/MIRParser.h b/include/llvm/CodeGen/MIRParser/MIRParser.h
index e199a1f69ad7..6a04e48e533c 100644
--- a/include/llvm/CodeGen/MIRParser/MIRParser.h
+++ b/include/llvm/CodeGen/MIRParser/MIRParser.h
@@ -1,9 +1,8 @@
//===- MIRParser.h - MIR serialization format parser ------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/CodeGen/MIRPrinter.h b/include/llvm/CodeGen/MIRPrinter.h
index 078c4b2f6072..a4b03a7fb765 100644
--- a/include/llvm/CodeGen/MIRPrinter.h
+++ b/include/llvm/CodeGen/MIRPrinter.h
@@ -1,9 +1,8 @@
-//===- MIRPrinter.h - MIR serialization format printer --------------------===//
+//===- MIRPrinter.h - MIR serialization format printer ----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/CodeGen/MIRYamlMapping.h b/include/llvm/CodeGen/MIRYamlMapping.h
index 98ac81915dc0..94e76a75e8da 100644
--- a/include/llvm/CodeGen/MIRYamlMapping.h
+++ b/include/llvm/CodeGen/MIRYamlMapping.h
@@ -1,9 +1,8 @@
-//===- MIRYAMLMapping.h - Describes the mapping between MIR and YAML ------===//
+//===- MIRYamlMapping.h - Describe mapping between MIR and YAML--*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -18,6 +17,7 @@
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/Support/SMLoc.h"
#include "llvm/Support/YAMLTraits.h"
#include "llvm/Support/raw_ostream.h"
@@ -37,6 +37,7 @@ struct StringValue {
StringValue() = default;
StringValue(std::string Value) : Value(std::move(Value)) {}
+ StringValue(const char Val[]) : Value(Val) {}
bool operator==(const StringValue &Other) const {
return Value == Other.Value;
@@ -212,7 +213,7 @@ struct MachineStackObject {
int64_t Offset = 0;
uint64_t Size = 0;
unsigned Alignment = 0;
- uint8_t StackID = 0;
+ TargetStackID::Value StackID;
StringValue CalleeSavedRegister;
bool CalleeSavedRestored = true;
Optional<int64_t> LocalOffset;
@@ -252,7 +253,7 @@ template <> struct MappingTraits<MachineStackObject> {
if (Object.Type != MachineStackObject::VariableSized)
YamlIO.mapRequired("size", Object.Size);
YamlIO.mapOptional("alignment", Object.Alignment, (unsigned)0);
- YamlIO.mapOptional("stack-id", Object.StackID);
+ YamlIO.mapOptional("stack-id", Object.StackID, TargetStackID::Default);
YamlIO.mapOptional("callee-saved-register", Object.CalleeSavedRegister,
StringValue()); // Don't print it out when it's empty.
YamlIO.mapOptional("callee-saved-restored", Object.CalleeSavedRestored,
@@ -278,7 +279,7 @@ struct FixedMachineStackObject {
int64_t Offset = 0;
uint64_t Size = 0;
unsigned Alignment = 0;
- uint8_t StackID = 0;
+ TargetStackID::Value StackID;
bool IsImmutable = false;
bool IsAliased = false;
StringValue CalleeSavedRegister;
@@ -308,6 +309,15 @@ struct ScalarEnumerationTraits<FixedMachineStackObject::ObjectType> {
}
};
+template <>
+struct ScalarEnumerationTraits<TargetStackID::Value> {
+ static void enumeration(yaml::IO &IO, TargetStackID::Value &ID) {
+ IO.enumCase(ID, "default", TargetStackID::Default);
+ IO.enumCase(ID, "sgpr-spill", TargetStackID::SGPRSpill);
+ IO.enumCase(ID, "noalloc", TargetStackID::NoAlloc);
+ }
+};
+
template <> struct MappingTraits<FixedMachineStackObject> {
static void mapping(yaml::IO &YamlIO, FixedMachineStackObject &Object) {
YamlIO.mapRequired("id", Object.ID);
@@ -317,7 +327,7 @@ template <> struct MappingTraits<FixedMachineStackObject> {
YamlIO.mapOptional("offset", Object.Offset, (int64_t)0);
YamlIO.mapOptional("size", Object.Size, (uint64_t)0);
YamlIO.mapOptional("alignment", Object.Alignment, (unsigned)0);
- YamlIO.mapOptional("stack-id", Object.StackID);
+ YamlIO.mapOptional("stack-id", Object.StackID, TargetStackID::Default);
if (Object.Type != FixedMachineStackObject::SpillSlot) {
YamlIO.mapOptional("isImmutable", Object.IsImmutable, false);
YamlIO.mapOptional("isAliased", Object.IsAliased, false);
@@ -337,6 +347,66 @@ template <> struct MappingTraits<FixedMachineStackObject> {
static const bool flow = true;
};
+
+/// Serializable representation of CallSiteInfo.
+struct CallSiteInfo {
+ // Representation of call argument and register which is used to
+ // transfer it.
+ struct ArgRegPair {
+ StringValue Reg;
+ uint16_t ArgNo;
+
+ bool operator==(const ArgRegPair &Other) const {
+ return Reg == Other.Reg && ArgNo == Other.ArgNo;
+ }
+ };
+
+ /// Identifies call instruction location in machine function.
+ struct MachineInstrLoc {
+ unsigned BlockNum;
+ unsigned Offset;
+
+ bool operator==(const MachineInstrLoc &Other) const {
+ return BlockNum == Other.BlockNum && Offset == Other.Offset;
+ }
+ };
+
+ MachineInstrLoc CallLocation;
+ std::vector<ArgRegPair> ArgForwardingRegs;
+
+ bool operator==(const CallSiteInfo &Other) const {
+ return CallLocation.BlockNum == Other.CallLocation.BlockNum &&
+ CallLocation.Offset == Other.CallLocation.Offset;
+ }
+};
+
+template <> struct MappingTraits<CallSiteInfo::ArgRegPair> {
+ static void mapping(IO &YamlIO, CallSiteInfo::ArgRegPair &ArgReg) {
+ YamlIO.mapRequired("arg", ArgReg.ArgNo);
+ YamlIO.mapRequired("reg", ArgReg.Reg);
+ }
+
+ static const bool flow = true;
+};
+}
+}
+
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::CallSiteInfo::ArgRegPair)
+
+namespace llvm {
+namespace yaml {
+
+template <> struct MappingTraits<CallSiteInfo> {
+ static void mapping(IO &YamlIO, CallSiteInfo &CSInfo) {
+ YamlIO.mapRequired("bb", CSInfo.CallLocation.BlockNum);
+ YamlIO.mapRequired("offset", CSInfo.CallLocation.Offset);
+ YamlIO.mapOptional("fwdArgRegs", CSInfo.ArgForwardingRegs,
+ std::vector<CallSiteInfo::ArgRegPair>());
+ }
+
+ static const bool flow = true;
+};
+
struct MachineConstantPoolValue {
UnsignedValue ID;
StringValue Value;
@@ -391,6 +461,7 @@ LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::MachineFunctionLiveIn)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::VirtualRegisterDefinition)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::MachineStackObject)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::FixedMachineStackObject)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::CallSiteInfo)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::MachineConstantPoolValue)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::MachineJumpTable::Entry)
@@ -483,6 +554,20 @@ template <> struct MappingTraits<MachineFrameInfo> {
}
};
+/// Targets should override this in a way that mirrors the implementation of
+/// llvm::MachineFunctionInfo.
+struct MachineFunctionInfo {
+ virtual ~MachineFunctionInfo() {}
+ virtual void mappingImpl(IO &YamlIO) {}
+};
+
+template <> struct MappingTraits<std::unique_ptr<MachineFunctionInfo>> {
+ static void mapping(IO &YamlIO, std::unique_ptr<MachineFunctionInfo> &MFI) {
+ if (MFI)
+ MFI->mappingImpl(YamlIO);
+ }
+};
+
struct MachineFunction {
StringRef Name;
unsigned Alignment = 0;
@@ -504,6 +589,8 @@ struct MachineFunction {
std::vector<FixedMachineStackObject> FixedStackObjects;
std::vector<MachineStackObject> StackObjects;
std::vector<MachineConstantPoolValue> Constants; /// Constant pool.
+ std::unique_ptr<MachineFunctionInfo> MachineFuncInfo;
+ std::vector<CallSiteInfo> CallSitesInfo;
MachineJumpTable JumpTableInfo;
BlockStringValue Body;
};
@@ -530,8 +617,11 @@ template <> struct MappingTraits<MachineFunction> {
std::vector<FixedMachineStackObject>());
YamlIO.mapOptional("stack", MF.StackObjects,
std::vector<MachineStackObject>());
+ YamlIO.mapOptional("callSites", MF.CallSitesInfo,
+ std::vector<CallSiteInfo>());
YamlIO.mapOptional("constants", MF.Constants,
std::vector<MachineConstantPoolValue>());
+ YamlIO.mapOptional("machineFunctionInfo", MF.MachineFuncInfo);
if (!YamlIO.outputting() || !MF.JumpTableInfo.Entries.empty())
YamlIO.mapOptional("jumpTable", MF.JumpTableInfo, MachineJumpTable());
YamlIO.mapOptional("body", MF.Body, BlockStringValue());
diff --git a/include/llvm/CodeGen/MachORelocation.h b/include/llvm/CodeGen/MachORelocation.h
index cbb49695af75..0185c7cbe018 100644
--- a/include/llvm/CodeGen/MachORelocation.h
+++ b/include/llvm/CodeGen/MachORelocation.h
@@ -1,9 +1,8 @@
//=== MachORelocation.h - Mach-O Relocation Info ----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/CodeGen/MachineBasicBlock.h b/include/llvm/CodeGen/MachineBasicBlock.h
index ec2f270fcb3f..333d0a78618c 100644
--- a/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/include/llvm/CodeGen/MachineBasicBlock.h
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/MachineBasicBlock.h -------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -115,6 +114,10 @@ private:
/// branch.
bool AddressTaken = false;
+ /// Indicate that this basic block needs its symbol be emitted regardless of
+ /// whether the flow just falls-through to it.
+ bool LabelMustBeEmitted = false;
+
/// Indicate that this basic block is the entry block of an EH scope, i.e.,
/// the block that used to have a catchpad or cleanuppad instruction in the
/// LLVM IR.
@@ -159,6 +162,13 @@ public:
/// branch.
void setHasAddressTaken() { AddressTaken = true; }
+ /// Test whether this block must have its label emitted.
+ bool hasLabelMustBeEmitted() const { return LabelMustBeEmitted; }
+
+ /// Set this block to reflect that, regardless how we flow to it, we need
+ /// its label be emitted.
+ void setLabelMustBeEmitted() { LabelMustBeEmitted = true; }
+
/// Return the MachineFunction containing this basic block.
const MachineFunction *getParent() const { return xParent; }
MachineFunction *getParent() { return xParent; }
@@ -900,11 +910,11 @@ class MachineInstrSpan {
MachineBasicBlock::iterator I, B, E;
public:
- MachineInstrSpan(MachineBasicBlock::iterator I)
- : MBB(*I->getParent()),
- I(I),
- B(I == MBB.begin() ? MBB.end() : std::prev(I)),
- E(std::next(I)) {}
+ MachineInstrSpan(MachineBasicBlock::iterator I, MachineBasicBlock *BB)
+ : MBB(*BB), I(I), B(I == MBB.begin() ? MBB.end() : std::prev(I)),
+ E(std::next(I)) {
+ assert(I == BB->end() || I->getParent() == BB);
+ }
MachineBasicBlock::iterator begin() {
return B == MBB.end() ? MBB.begin() : std::next(B);
diff --git a/include/llvm/CodeGen/MachineBlockFrequencyInfo.h b/include/llvm/CodeGen/MachineBlockFrequencyInfo.h
index 5b4b99ca0a5d..a438ecfcc25e 100644
--- a/include/llvm/CodeGen/MachineBlockFrequencyInfo.h
+++ b/include/llvm/CodeGen/MachineBlockFrequencyInfo.h
@@ -1,9 +1,8 @@
//===- MachineBlockFrequencyInfo.h - MBB Frequency Analysis -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/CodeGen/MachineBranchProbabilityInfo.h b/include/llvm/CodeGen/MachineBranchProbabilityInfo.h
index 81b0524cf0a4..2b9b2030eb97 100644
--- a/include/llvm/CodeGen/MachineBranchProbabilityInfo.h
+++ b/include/llvm/CodeGen/MachineBranchProbabilityInfo.h
@@ -1,9 +1,8 @@
//=- MachineBranchProbabilityInfo.h - Branch Probability Analysis -*- C++ -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/CodeGen/MachineCombinerPattern.h b/include/llvm/CodeGen/MachineCombinerPattern.h
index 586535f771c2..4f4034baf801 100644
--- a/include/llvm/CodeGen/MachineCombinerPattern.h
+++ b/include/llvm/CodeGen/MachineCombinerPattern.h
@@ -1,10 +1,9 @@
//===-- llvm/CodeGen/MachineCombinerPattern.h - Instruction pattern supported by
// combiner ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/CodeGen/MachineConstantPool.h b/include/llvm/CodeGen/MachineConstantPool.h
index b0b5420a884b..4d07b620a4b4 100644
--- a/include/llvm/CodeGen/MachineConstantPool.h
+++ b/include/llvm/CodeGen/MachineConstantPool.h
@@ -1,9 +1,8 @@
//===- CodeGen/MachineConstantPool.h - Abstract Constant Pool ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/CodeGen/MachineDominanceFrontier.h b/include/llvm/CodeGen/MachineDominanceFrontier.h
index 75d75bc3669a..f7bbd07a63ab 100644
--- a/include/llvm/CodeGen/MachineDominanceFrontier.h
+++ b/include/llvm/CodeGen/MachineDominanceFrontier.h
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/MachineDominanceFrontier.h ------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/CodeGen/MachineDominators.h b/include/llvm/CodeGen/MachineDominators.h
index e3d3d169db97..d2200080b897 100644
--- a/include/llvm/CodeGen/MachineDominators.h
+++ b/include/llvm/CodeGen/MachineDominators.h
@@ -1,9 +1,8 @@
//==- llvm/CodeGen/MachineDominators.h - Machine Dom Calculation -*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/CodeGen/MachineFrameInfo.h b/include/llvm/CodeGen/MachineFrameInfo.h
index c2706a21a177..761735120a64 100644
--- a/include/llvm/CodeGen/MachineFrameInfo.h
+++ b/include/llvm/CodeGen/MachineFrameInfo.h
@@ -1,9 +1,8 @@
//===-- CodeGen/MachineFrameInfo.h - Abstract Stack Frame Rep. --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -471,7 +470,10 @@ public:
assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() &&
"Invalid Object Idx!");
Objects[ObjectIdx+NumFixedObjects].Alignment = Align;
- ensureMaxAlignment(Align);
+
+ // Only ensure max alignment for the default stack.
+ if (getStackID(ObjectIdx) == 0)
+ ensureMaxAlignment(Align);
}
/// Return the underlying Alloca of the specified
@@ -698,6 +700,8 @@ public:
assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() &&
"Invalid Object Idx!");
Objects[ObjectIdx+NumFixedObjects].StackID = ID;
+ // If ID > 0, MaxAlignment may now be overly conservative.
+ // If ID == 0, MaxAlignment will need to be updated separately.
}
/// Returns true if the specified index corresponds to a dead object.
diff --git a/include/llvm/CodeGen/MachineFunction.h b/include/llvm/CodeGen/MachineFunction.h
index 25edf5bcce51..201c126ee52e 100644
--- a/include/llvm/CodeGen/MachineFunction.h
+++ b/include/llvm/CodeGen/MachineFunction.h
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/MachineFunction.h ---------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -31,11 +30,6 @@
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/IR/DebugLoc.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Metadata.h"
-#include "llvm/MC/MCDwarf.h"
-#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/ArrayRecycler.h"
#include "llvm/Support/AtomicOrdering.h"
@@ -53,6 +47,7 @@ namespace llvm {
class BasicBlock;
class BlockAddress;
class DataLayout;
+class DebugLoc;
class DIExpression;
class DILocalVariable;
class DILocation;
@@ -67,6 +62,7 @@ class MachineModuleInfo;
class MachineRegisterInfo;
class MCContext;
class MCInstrDesc;
+class MCSymbol;
class Pass;
class PseudoSourceValueManager;
class raw_ostream;
@@ -86,7 +82,7 @@ template <> struct ilist_callback_traits<MachineBasicBlock> {
template <class Iterator>
void transferNodesFromList(ilist_callback_traits &OldList, Iterator, Iterator) {
- llvm_unreachable("Never transfer between lists");
+ assert(this == &OldList && "never transfer MBBs between functions");
}
};
@@ -325,6 +321,10 @@ class MachineFunction {
/// CodeView label annotations.
std::vector<std::pair<MCSymbol *, MDNode *>> CodeViewAnnotations;
+ /// CodeView heapallocsites.
+ std::vector<std::tuple<MCSymbol*, MCSymbol*, DIType*>>
+ CodeViewHeapAllocSites;
+
bool CallsEHReturn = false;
bool CallsUnwindInit = false;
bool HasEHScopes = false;
@@ -378,9 +378,28 @@ public:
virtual void MF_HandleRemoval(MachineInstr &MI) = 0;
};
+ /// Structure used to represent pair of argument number after call lowering
+ /// and register used to transfer that argument.
+ /// For now we support only cases when argument is transferred through one
+ /// register.
+ struct ArgRegPair {
+ unsigned Reg;
+ uint16_t ArgNo;
+ ArgRegPair(unsigned R, unsigned Arg) : Reg(R), ArgNo(Arg) {
+ assert(Arg < (1 << 16) && "Arg out of range");
+ }
+ };
+ /// Vector of call argument and its forwarding register.
+ using CallSiteInfo = SmallVector<ArgRegPair, 1>;
+ using CallSiteInfoImpl = SmallVectorImpl<ArgRegPair>;
+
private:
Delegate *TheDelegate = nullptr;
+ using CallSiteInfoMap = DenseMap<const MachineInstr *, CallSiteInfo>;
+ /// Map a call instruction to call site arguments forwarding info.
+ CallSiteInfoMap CallSitesInfo;
+
// Callbacks for insertion and removal.
void handleInsertion(MachineInstr &MI);
void handleRemoval(MachineInstr &MI);
@@ -443,7 +462,6 @@ public:
/// getSubtarget - Return the subtarget for which this machine code is being
/// compiled.
const TargetSubtargetInfo &getSubtarget() const { return *STI; }
- void setSubtarget(const TargetSubtargetInfo *ST) { STI = ST; }
/// getSubtarget - This method returns a pointer to the specified type of
/// TargetSubtargetInfo. In debug builds, it verifies that the object being
@@ -741,6 +759,12 @@ public:
MachineMemOperand *getMachineMemOperand(const MachineMemOperand *MMO,
const AAMDNodes &AAInfo);
+ /// Allocate a new MachineMemOperand by copying an existing one,
+ /// replacing the flags. MachineMemOperands are owned
+ /// by the MachineFunction and need not be explicitly deallocated.
+ MachineMemOperand *getMachineMemOperand(const MachineMemOperand *MMO,
+ MachineMemOperand::Flags Flags);
+
using OperandCapacity = ArrayRecycler<MachineOperand>::Capacity;
/// Allocate an array of MachineOperands. This is only intended for use by
@@ -791,10 +815,7 @@ public:
return FrameInstructions;
}
- LLVM_NODISCARD unsigned addFrameInst(const MCCFIInstruction &Inst) {
- FrameInstructions.push_back(Inst);
- return FrameInstructions.size() - 1;
- }
+ LLVM_NODISCARD unsigned addFrameInst(const MCCFIInstruction &Inst);
/// \name Exception Handling
/// \{
@@ -913,6 +934,14 @@ public:
return CodeViewAnnotations;
}
+ /// Record heapallocsites
+ void addCodeViewHeapAllocSite(MachineInstr *I, MDNode *MD);
+
+ ArrayRef<std::tuple<MCSymbol*, MCSymbol*, DIType*>>
+ getCodeViewHeapAllocSites() const {
+ return CodeViewHeapAllocSites;
+ }
+
/// Return a reference to the C++ typeinfo for the current function.
const std::vector<const GlobalValue *> &getTypeInfos() const {
return TypeInfos;
@@ -936,6 +965,23 @@ public:
const VariableDbgInfoMapTy &getVariableDbgInfo() const {
return VariableDbgInfos;
}
+
+ void addCallArgsForwardingRegs(const MachineInstr *CallI,
+ CallSiteInfoImpl &&CallInfo) {
+ assert(CallI->isCall());
+ CallSitesInfo[CallI] = std::move(CallInfo);
+ }
+
+ const CallSiteInfoMap &getCallSitesInfo() const {
+ return CallSitesInfo;
+ }
+
+ /// Update call sites info by deleting entry for \p Old call instruction.
+ /// If \p New is present then transfer \p Old call info to it. This function
+ /// should be called before removing call instruction or before replacing
+ /// call instruction with new one.
+ void updateCallSiteInfo(const MachineInstr *Old,
+ const MachineInstr *New = nullptr);
};
//===--------------------------------------------------------------------===//
diff --git a/include/llvm/CodeGen/MachineFunctionPass.h b/include/llvm/CodeGen/MachineFunctionPass.h
index 6d978daa2018..caaf22c2139e 100644
--- a/include/llvm/CodeGen/MachineFunctionPass.h
+++ b/include/llvm/CodeGen/MachineFunctionPass.h
@@ -1,9 +1,8 @@
//===-- MachineFunctionPass.h - Pass for MachineFunctions --------*-C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/CodeGen/MachineInstr.h b/include/llvm/CodeGen/MachineInstr.h
index ea1a2a536fc7..c82c5b137507 100644
--- a/include/llvm/CodeGen/MachineInstr.h
+++ b/include/llvm/CodeGen/MachineInstr.h
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/MachineInstr.h - MachineInstr class ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -25,6 +24,7 @@
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/MC/MCInstrDesc.h"
@@ -103,8 +103,10 @@ public:
// no unsigned wrap.
NoSWrap = 1 << 12, // Instruction supports binary operator
// no signed wrap.
- IsExact = 1 << 13 // Instruction supports division is
+ IsExact = 1 << 13, // Instruction supports division is
// known to be exact.
+ FPExcept = 1 << 14, // Instruction may raise floating-point
+ // exceptions.
};
private:
@@ -831,6 +833,17 @@ public:
return mayLoad(Type) || mayStore(Type);
}
+ /// Return true if this instruction could possibly raise a floating-point
+ /// exception. This is the case if the instruction is a floating-point
+ /// instruction that can in principle raise an exception, as indicated
+ /// by the MCID::MayRaiseFPException property, *and* at the same time,
+ /// the instruction is used in a context where we expect floating-point
+ /// exceptions might be enabled, as indicated by the FPExcept MI flag.
+ bool mayRaiseFPException() const {
+ return hasProperty(MCID::MayRaiseFPException) &&
+ getFlag(MachineInstr::MIFlag::FPExcept);
+ }
+
//===--------------------------------------------------------------------===//
// Flags that indicate whether an instruction can be modified by a method.
//===--------------------------------------------------------------------===//
@@ -1006,16 +1019,33 @@ public:
&& getOperand(1).isImm();
}
+ /// A DBG_VALUE is an entry value iff its debug expression contains the
+ /// DW_OP_entry_value DWARF operation.
+ bool isDebugEntryValue() const {
+ return isDebugValue() && getDebugExpression()->isEntryValue();
+ }
+
+ /// Return true if the instruction is a debug value which describes a part of
+ /// a variable as unavailable.
+ bool isUndefDebugValue() const {
+ return isDebugValue() && getOperand(0).isReg() && !getOperand(0).getReg();
+ }
+
bool isPHI() const {
return getOpcode() == TargetOpcode::PHI ||
getOpcode() == TargetOpcode::G_PHI;
}
bool isKill() const { return getOpcode() == TargetOpcode::KILL; }
bool isImplicitDef() const { return getOpcode()==TargetOpcode::IMPLICIT_DEF; }
- bool isInlineAsm() const { return getOpcode() == TargetOpcode::INLINEASM; }
+ bool isInlineAsm() const {
+ return getOpcode() == TargetOpcode::INLINEASM ||
+ getOpcode() == TargetOpcode::INLINEASM_BR;
+ }
+ /// FIXME: Seems like a layering violation that the AsmDialect, which is X86
+ /// specific, be attached to a generic MachineInstr.
bool isMSInlineAsm() const {
- return getOpcode() == TargetOpcode::INLINEASM && getInlineAsmDialect();
+ return isInlineAsm() && getInlineAsmDialect() == InlineAsm::AD_Intel;
}
bool isStackAligningInlineAsm() const;
@@ -1197,12 +1227,22 @@ public:
/// Wrapper for findRegisterDefOperandIdx, it returns
/// a pointer to the MachineOperand rather than an index.
- MachineOperand *findRegisterDefOperand(unsigned Reg, bool isDead = false,
- const TargetRegisterInfo *TRI = nullptr) {
- int Idx = findRegisterDefOperandIdx(Reg, isDead, false, TRI);
+ MachineOperand *
+ findRegisterDefOperand(unsigned Reg, bool isDead = false,
+ bool Overlap = false,
+ const TargetRegisterInfo *TRI = nullptr) {
+ int Idx = findRegisterDefOperandIdx(Reg, isDead, Overlap, TRI);
return (Idx == -1) ? nullptr : &getOperand(Idx);
}
+ const MachineOperand *
+ findRegisterDefOperand(unsigned Reg, bool isDead = false,
+ bool Overlap = false,
+ const TargetRegisterInfo *TRI = nullptr) const {
+ return const_cast<MachineInstr *>(this)->findRegisterDefOperand(
+ Reg, isDead, Overlap, TRI);
+ }
+
/// Find the index of the first operand in the
/// operand list that is used to represent the predicate. It returns -1 if
/// none is found.
@@ -1364,7 +1404,7 @@ public:
/// @param AA Optional alias analysis, used to compare memory operands.
/// @param Other MachineInstr to check aliasing against.
/// @param UseTBAA Whether to pass TBAA information to alias analysis.
- bool mayAlias(AliasAnalysis *AA, MachineInstr &Other, bool UseTBAA);
+ bool mayAlias(AliasAnalysis *AA, const MachineInstr &Other, bool UseTBAA) const;
/// Return true if this instruction may have an ordered
/// or volatile memory reference, or if the information describing the memory
@@ -1400,6 +1440,19 @@ public:
/// Return true if all the defs of this instruction are dead.
bool allDefsAreDead() const;
+ /// Return a valid size if the instruction is a spill instruction.
+ Optional<unsigned> getSpillSize(const TargetInstrInfo *TII) const;
+
+ /// Return a valid size if the instruction is a folded spill instruction.
+ Optional<unsigned> getFoldedSpillSize(const TargetInstrInfo *TII) const;
+
+ /// Return a valid size if the instruction is a restore instruction.
+ Optional<unsigned> getRestoreSize(const TargetInstrInfo *TII) const;
+
+ /// Return a valid size if the instruction is a folded restore instruction.
+ Optional<unsigned>
+ getFoldedRestoreSize(const TargetInstrInfo *TII) const;
+
/// Copy implicit register operands from specified
/// instruction to this instruction.
void copyImplicitOps(MachineFunction &MF, const MachineInstr &MI);
@@ -1521,11 +1574,17 @@ public:
/// FIXME: This is not fully implemented yet.
void setPostInstrSymbol(MachineFunction &MF, MCSymbol *Symbol);
+ /// Clone another MachineInstr's pre- and post- instruction symbols and
+ /// replace ours with it.
+ void cloneInstrSymbols(MachineFunction &MF, const MachineInstr &MI);
+
/// Return the MIFlags which represent both MachineInstrs. This
/// should be used when merging two MachineInstrs into one. This routine does
/// not modify the MIFlags of this MachineInstr.
uint16_t mergeFlagsWith(const MachineInstr& Other) const;
+ static uint16_t copyFlagsFromInstruction(const Instruction &I);
+
/// Copy all flags to MachineInst MIFlags
void copyIRFlags(const Instruction &I);
diff --git a/include/llvm/CodeGen/MachineInstrBuilder.h b/include/llvm/CodeGen/MachineInstrBuilder.h
index b5e523f655e7..6d7fb72b6bd1 100644
--- a/include/llvm/CodeGen/MachineInstrBuilder.h
+++ b/include/llvm/CodeGen/MachineInstrBuilder.h
@@ -1,9 +1,8 @@
//===- CodeGen/MachineInstrBuilder.h - Simplify creation of MIs --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -80,6 +79,11 @@ public:
/// explicitly.
MachineInstr *getInstr() const { return MI; }
+ /// Get the register for the operand index.
+ /// The operand at the index should be a register (asserted by
+ /// MachineOperand).
+ Register getReg(unsigned Idx) const { return MI->getOperand(Idx).getReg(); }
+
/// Add a new virtual register operand.
const MachineInstrBuilder &addReg(unsigned RegNo, unsigned flags = 0,
unsigned SubReg = 0) const {
@@ -283,6 +287,9 @@ public:
case MachineOperand::MO_GlobalAddress:
return addGlobalAddress(Disp.getGlobal(), Disp.getOffset() + off,
TargetFlags);
+ case MachineOperand::MO_BlockAddress:
+ return addBlockAddress(Disp.getBlockAddress(), Disp.getOffset() + off,
+ TargetFlags);
}
}
diff --git a/include/llvm/CodeGen/MachineInstrBundle.h b/include/llvm/CodeGen/MachineInstrBundle.h
index b5341fd1ae49..1810d23072d0 100644
--- a/include/llvm/CodeGen/MachineInstrBundle.h
+++ b/include/llvm/CodeGen/MachineInstrBundle.h
@@ -1,9 +1,8 @@
-//===-- CodeGen/MachineInstBundle.h - MI bundle utilities -------*- C++ -*-===//
+//===- llvm/CodeGen/MachineInstrBundle.h - MI bundle utilities --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -62,7 +61,8 @@ inline MachineBasicBlock::instr_iterator getBundleEnd(
MachineBasicBlock::instr_iterator I) {
while (I->isBundledWithSucc())
++I;
- return ++I;
+ ++I;
+ return I;
}
/// Returns an iterator pointing beyond the bundle containing \p I.
@@ -70,7 +70,8 @@ inline MachineBasicBlock::const_instr_iterator getBundleEnd(
MachineBasicBlock::const_instr_iterator I) {
while (I->isBundledWithSucc())
++I;
- return ++I;
+ ++I;
+ return I;
}
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/CodeGen/MachineInstrBundleIterator.h b/include/llvm/CodeGen/MachineInstrBundleIterator.h
index 5fe4964ff116..0f59563e7e1b 100644
--- a/include/llvm/CodeGen/MachineInstrBundleIterator.h
+++ b/include/llvm/CodeGen/MachineInstrBundleIterator.h
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/MachineInstrBundleIterator.h ----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/CodeGen/MachineJumpTableInfo.h b/include/llvm/CodeGen/MachineJumpTableInfo.h
index 25a3e6b556a3..11781145b378 100644
--- a/include/llvm/CodeGen/MachineJumpTableInfo.h
+++ b/include/llvm/CodeGen/MachineJumpTableInfo.h
@@ -1,9 +1,8 @@
//===-- CodeGen/MachineJumpTableInfo.h - Abstract Jump Tables --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/CodeGen/MachineLoopInfo.h b/include/llvm/CodeGen/MachineLoopInfo.h
index 917fb90380f5..da6df59c739c 100644
--- a/include/llvm/CodeGen/MachineLoopInfo.h
+++ b/include/llvm/CodeGen/MachineLoopInfo.h
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/MachineLoopInfo.h - Natural Loop Calculator -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/CodeGen/MachineMemOperand.h b/include/llvm/CodeGen/MachineMemOperand.h
index 078ef7ca510c..65f706302bc2 100644
--- a/include/llvm/CodeGen/MachineMemOperand.h
+++ b/include/llvm/CodeGen/MachineMemOperand.h
@@ -1,9 +1,8 @@
//==- llvm/CodeGen/MachineMemOperand.h - MachineMemOperand class -*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -19,8 +18,6 @@
#include "llvm/ADT/BitmaskEnum.h"
#include "llvm/ADT/PointerUnion.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Metadata.h"
#include "llvm/IR/Value.h" // PointerLikeTypeTraits<Value*>
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/DataTypes.h"
@@ -223,6 +220,9 @@ public:
/// Return the size in bytes of the memory reference.
uint64_t getSize() const { return Size; }
+ /// Return the size in bits of the memory reference.
+ uint64_t getSizeInBits() const { return Size * 8; }
+
/// Return the minimum known alignment in bytes of the actual memory
/// reference.
uint64_t getAlignment() const;
@@ -267,13 +267,13 @@ public:
bool isAtomic() const { return getOrdering() != AtomicOrdering::NotAtomic; }
/// Returns true if this memory operation doesn't have any ordering
- /// constraints other than normal aliasing. Volatile and atomic memory
- /// operations can't be reordered.
- ///
- /// Currently, we don't model the difference between volatile and atomic
- /// operations. They should retain their ordering relative to all memory
- /// operations.
- bool isUnordered() const { return !isVolatile(); }
+ /// constraints other than normal aliasing. Volatile and (ordered) atomic
+ /// memory operations can't be reordered.
+ bool isUnordered() const {
+ return (getOrdering() == AtomicOrdering::NotAtomic ||
+ getOrdering() == AtomicOrdering::Unordered) &&
+ !isVolatile();
+ }
/// Update this MachineMemOperand to reflect the alignment of MMO, if it has a
/// greater alignment. This must only be used when the new alignment applies
diff --git a/include/llvm/CodeGen/MachineModuleInfo.h b/include/llvm/CodeGen/MachineModuleInfo.h
index 4371420bc7a2..4ff5c7fd013a 100644
--- a/include/llvm/CodeGen/MachineModuleInfo.h
+++ b/include/llvm/CodeGen/MachineModuleInfo.h
@@ -1,9 +1,8 @@
//===-- llvm/CodeGen/MachineModuleInfo.h ------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -114,10 +113,9 @@ class MachineModuleInfo : public ImmutablePass {
/// True if debugging information is available in this module.
bool DbgInfoAvailable;
- /// True if this module calls VarArg function with floating-point arguments.
- /// This is used to emit an undefined reference to _fltused on Windows
- /// targets.
- bool UsesVAFloatArgument;
+ /// True if this module is being built for windows/msvc, and uses floating
+ /// point. This is used to emit an undefined reference to _fltused.
+ bool UsesMSVCFloatingPoint;
/// True if the module calls the __morestack function indirectly, as is
/// required under the large code model on x86. This is used to emit
@@ -152,6 +150,8 @@ public:
bool doInitialization(Module &) override;
bool doFinalization(Module &) override;
+ const LLVMTargetMachine &getTarget() const { return TM; }
+
const MCContext &getContext() const { return Context; }
MCContext &getContext() { return Context; }
@@ -187,13 +187,9 @@ public:
bool hasDebugInfo() const { return DbgInfoAvailable; }
void setDebugInfoAvailability(bool avail) { DbgInfoAvailable = avail; }
- bool usesVAFloatArgument() const {
- return UsesVAFloatArgument;
- }
+ bool usesMSVCFloatingPoint() const { return UsesMSVCFloatingPoint; }
- void setUsesVAFloatArgument(bool b) {
- UsesVAFloatArgument = b;
- }
+ void setUsesMSVCFloatingPoint(bool b) { UsesMSVCFloatingPoint = b; }
bool usesMorestackAddr() const {
return UsesMorestackAddr;
@@ -258,14 +254,6 @@ public:
/// \}
}; // End class MachineModuleInfo
-//===- MMI building helpers -----------------------------------------------===//
-
-/// Determine if any floating-point values are being passed to this variadic
-/// function, and set the MachineModuleInfo's usesVAFloatArgument flag if so.
-/// This flag is used to emit an undefined reference to _fltused on Windows,
-/// which will link in MSVCRT's floating-point support.
-void computeUsesVAFloatArgument(const CallInst &I, MachineModuleInfo &MMI);
-
} // end namespace llvm
#endif // LLVM_CODEGEN_MACHINEMODULEINFO_H
diff --git a/include/llvm/CodeGen/MachineModuleInfoImpls.h b/include/llvm/CodeGen/MachineModuleInfoImpls.h
index 17df1fa792b7..746e92239613 100644
--- a/include/llvm/CodeGen/MachineModuleInfoImpls.h
+++ b/include/llvm/CodeGen/MachineModuleInfoImpls.h
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/MachineModuleInfoImpls.h --------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/CodeGen/MachineOperand.h b/include/llvm/CodeGen/MachineOperand.h
index 53e8889d118a..2152c7582e5a 100644
--- a/include/llvm/CodeGen/MachineOperand.h
+++ b/include/llvm/CodeGen/MachineOperand.h
@@ -1,9 +1,8 @@
//===-- llvm/CodeGen/MachineOperand.h - MachineOperand class ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -15,6 +14,7 @@
#define LLVM_CODEGEN_MACHINEOPERAND_H
#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/Register.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/Support/DataTypes.h"
#include "llvm/Support/LowLevelTypeImpl.h"
@@ -346,9 +346,9 @@ public:
//===--------------------------------------------------------------------===//
/// getReg - Returns the register number.
- unsigned getReg() const {
+ Register getReg() const {
assert(isReg() && "This is not a register operand!");
- return SmallContents.RegNo;
+ return Register(SmallContents.RegNo);
}
unsigned getSubReg() const {
@@ -684,6 +684,11 @@ public:
Contents.RegMask = RegMaskPtr;
}
+ void setPredicate(unsigned Predicate) {
+ assert(isPredicate() && "Wrong MachineOperand mutator");
+ Contents.Pred = Predicate;
+ }
+
//===--------------------------------------------------------------------===//
// Other methods.
//===--------------------------------------------------------------------===//
@@ -714,6 +719,10 @@ public:
/// ChangeToES - Replace this operand with a new external symbol operand.
void ChangeToES(const char *SymName, unsigned char TargetFlags = 0);
+ /// ChangeToGA - Replace this operand with a new global address operand.
+ void ChangeToGA(const GlobalValue *GV, int64_t Offset,
+ unsigned char TargetFlags = 0);
+
/// ChangeToMCSymbol - Replace this operand with a new MC symbol operand.
void ChangeToMCSymbol(MCSymbol *Sym);
diff --git a/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h b/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h
index a7ce870400c2..a461a299917c 100644
--- a/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h
+++ b/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h
@@ -1,9 +1,8 @@
///===- MachineOptimizationRemarkEmitter.h - Opt Diagnostics -*- C++ -*----===//
///
-/// The LLVM Compiler Infrastructure
-///
-/// This file is distributed under the University of Illinois Open Source
-/// License. See LICENSE.TXT for details.
+/// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+/// See https://llvm.org/LICENSE.txt for license information.
+/// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
///
///===---------------------------------------------------------------------===//
/// \file
@@ -159,9 +158,10 @@ public:
/// (1) to filter trivial false positives or (2) to provide more context so
/// that non-trivial false positives can be quickly detected by the user.
bool allowExtraAnalysis(StringRef PassName) const {
- return (MF.getFunction().getContext().getDiagnosticsOutputFile() ||
- MF.getFunction().getContext()
- .getDiagHandlerPtr()->isAnyRemarkEnabled(PassName));
+ return (
+ MF.getFunction().getContext().getRemarkStreamer() ||
+ MF.getFunction().getContext().getDiagHandlerPtr()->isAnyRemarkEnabled(
+ PassName));
}
/// Take a lambda that returns a remark which will be emitted. Second
@@ -172,8 +172,11 @@ public:
// remarks enabled. We can't currently check whether remarks are requested
// for the calling pass since that requires actually building the remark.
- if (MF.getFunction().getContext().getDiagnosticsOutputFile() ||
- MF.getFunction().getContext().getDiagHandlerPtr()->isAnyRemarkEnabled()) {
+ if (MF.getFunction().getContext().getRemarkStreamer() ||
+ MF.getFunction()
+ .getContext()
+ .getDiagHandlerPtr()
+ ->isAnyRemarkEnabled()) {
auto R = RemarkBuilder();
emit((DiagnosticInfoOptimizationBase &)R);
}
diff --git a/include/llvm/CodeGen/MachineOutliner.h b/include/llvm/CodeGen/MachineOutliner.h
index bfd1e994053a..3868fa415579 100644
--- a/include/llvm/CodeGen/MachineOutliner.h
+++ b/include/llvm/CodeGen/MachineOutliner.h
@@ -1,9 +1,8 @@
//===---- MachineOutliner.h - Outliner data structures ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -172,13 +171,13 @@ public:
/// Represents the size of a sequence in bytes. (Some instructions vary
/// widely in size, so just counting the instructions isn't very useful.)
- unsigned SequenceSize;
+ unsigned SequenceSize = 0;
/// Target-defined overhead of constructing a frame for this function.
- unsigned FrameOverhead;
+ unsigned FrameOverhead = 0;
/// Target-defined identifier for constructing a frame for this function.
- unsigned FrameConstructionID;
+ unsigned FrameConstructionID = 0;
/// Return the number of candidates for this \p OutlinedFunction.
unsigned getOccurrenceCount() const { return Candidates.size(); }
diff --git a/include/llvm/CodeGen/MachinePassRegistry.h b/include/llvm/CodeGen/MachinePassRegistry.h
index a031c92d914f..f5b3723db0aa 100644
--- a/include/llvm/CodeGen/MachinePassRegistry.h
+++ b/include/llvm/CodeGen/MachinePassRegistry.h
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/MachinePassRegistry.h -----------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/CodeGen/MachinePipeliner.h b/include/llvm/CodeGen/MachinePipeliner.h
index 38cb33e90e63..03ca53072685 100644
--- a/include/llvm/CodeGen/MachinePipeliner.h
+++ b/include/llvm/CodeGen/MachinePipeliner.h
@@ -1,9 +1,8 @@
//===- MachinePipeliner.h - Machine Software Pipeliner Pass -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -63,6 +62,8 @@ public:
const InstrItineraryData *InstrItins;
const TargetInstrInfo *TII = nullptr;
RegisterClassInfo RegClassInfo;
+ bool disabledByPragma = false;
+ unsigned II_setByPragma = 0;
#ifndef NDEBUG
static int NumTries;
@@ -100,6 +101,7 @@ private:
bool canPipelineLoop(MachineLoop &L);
bool scheduleLoop(MachineLoop &L);
bool swingModuloScheduler(MachineLoop &L);
+ void setPragmaPipelineOptions(MachineLoop &L);
};
/// This class builds the dependence graph for the instructions in a loop,
@@ -108,11 +110,14 @@ class SwingSchedulerDAG : public ScheduleDAGInstrs {
MachinePipeliner &Pass;
/// The minimum initiation interval between iterations for this schedule.
unsigned MII = 0;
+ /// The maximum initiation interval between iterations for this schedule.
+ unsigned MAX_II = 0;
/// Set to true if a valid pipelined schedule is found for the loop.
bool Scheduled = false;
MachineLoop &Loop;
LiveIntervals &LIS;
const RegisterClassInfo &RegClassInfo;
+ unsigned II_setByPragma = 0;
/// A toplogical ordering of the SUnits, which is needed for changing
/// dependences and iterating over the SUnits.
@@ -190,9 +195,9 @@ class SwingSchedulerDAG : public ScheduleDAGInstrs {
public:
SwingSchedulerDAG(MachinePipeliner &P, MachineLoop &L, LiveIntervals &lis,
- const RegisterClassInfo &rci)
+ const RegisterClassInfo &rci, unsigned II)
: ScheduleDAGInstrs(*P.MF, P.MLI, false), Pass(P), Loop(L), LIS(lis),
- RegClassInfo(rci), Topo(SUnits, &ExitSU) {
+ RegClassInfo(rci), II_setByPragma(II), Topo(SUnits, &ExitSU) {
P.MF->getSubtarget().getSMSMutations(Mutations);
if (SwpEnableCopyToPhi)
Mutations.push_back(llvm::make_unique<CopyToPhiMutation>());
@@ -253,9 +258,6 @@ public:
return 0;
}
- /// Set the Minimum Initiation Interval for this schedule attempt.
- void setMII(unsigned mii) { MII = mii; }
-
void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule);
void fixupRegisterOverlaps(std::deque<SUnit *> &Instrs);
@@ -316,9 +318,9 @@ private:
MBBVectorTy &EpilogBBs);
void splitLifetimes(MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs,
SMSchedule &Schedule);
- void addBranches(MBBVectorTy &PrologBBs, MachineBasicBlock *KernelBB,
- MBBVectorTy &EpilogBBs, SMSchedule &Schedule,
- ValueMapTy *VRMap);
+ void addBranches(MachineBasicBlock &PreheaderBB, MBBVectorTy &PrologBBs,
+ MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs,
+ SMSchedule &Schedule, ValueMapTy *VRMap);
bool computeDelta(MachineInstr &MI, unsigned &Delta);
void updateMemOperands(MachineInstr &NewMI, MachineInstr &OldMI,
unsigned Num);
@@ -346,6 +348,10 @@ private:
unsigned &OffsetPos, unsigned &NewBase,
int64_t &NewOffset);
void postprocessDAG();
+ /// Set the Minimum Initiation Interval for this schedule attempt.
+ void setMII(unsigned ResMII, unsigned RecMII);
+ /// Set the Maximum Initiation Interval for this schedule attempt.
+ void setMAX_II();
};
/// A NodeSet contains a set of SUnit DAG nodes with additional information
@@ -457,6 +463,56 @@ public:
#endif
};
+// 16 was selected based on the number of ProcResource kinds for all
+// existing Subtargets, so that SmallVector don't need to resize too often.
+static const int DefaultProcResSize = 16;
+
+class ResourceManager {
+private:
+ const MCSubtargetInfo *STI;
+ const MCSchedModel &SM;
+ const bool UseDFA;
+ std::unique_ptr<DFAPacketizer> DFAResources;
+ /// Each processor resource is associated with a so-called processor resource
+ /// mask. This vector allows to correlate processor resource IDs with
+ /// processor resource masks. There is exactly one element per each processor
+ /// resource declared by the scheduling model.
+ llvm::SmallVector<uint64_t, DefaultProcResSize> ProcResourceMasks;
+
+ llvm::SmallVector<uint64_t, DefaultProcResSize> ProcResourceCount;
+
+public:
+ ResourceManager(const TargetSubtargetInfo *ST)
+ : STI(ST), SM(ST->getSchedModel()), UseDFA(ST->useDFAforSMS()),
+ ProcResourceMasks(SM.getNumProcResourceKinds(), 0),
+ ProcResourceCount(SM.getNumProcResourceKinds(), 0) {
+ if (UseDFA)
+ DFAResources.reset(ST->getInstrInfo()->CreateTargetScheduleState(*ST));
+ initProcResourceVectors(SM, ProcResourceMasks);
+ }
+
+ void initProcResourceVectors(const MCSchedModel &SM,
+ SmallVectorImpl<uint64_t> &Masks);
+ /// Check if the resources occupied by a MCInstrDesc are available in
+ /// the current state.
+ bool canReserveResources(const MCInstrDesc *MID) const;
+
+ /// Reserve the resources occupied by a MCInstrDesc and change the current
+ /// state to reflect that change.
+ void reserveResources(const MCInstrDesc *MID);
+
+ /// Check if the resources occupied by a machine instruction are available
+ /// in the current state.
+ bool canReserveResources(const MachineInstr &MI) const;
+
+ /// Reserve the resources occupied by a machine instruction and change the
+ /// current state to reflect that change.
+ void reserveResources(const MachineInstr &MI);
+
+ /// Reset the state
+ void clearResources();
+};
+
/// This class represents the scheduled code. The main data structure is a
/// map from scheduled cycle to instructions. During scheduling, the
/// data structure explicitly represents all stages/iterations. When
@@ -495,12 +551,11 @@ private:
/// Virtual register information.
MachineRegisterInfo &MRI;
- std::unique_ptr<DFAPacketizer> Resources;
+ ResourceManager ProcItinResources;
public:
SMSchedule(MachineFunction *mf)
- : ST(mf->getSubtarget()), MRI(mf->getRegInfo()),
- Resources(ST.getInstrInfo()->CreateTargetScheduleState(ST)) {}
+ : ST(mf->getSubtarget()), MRI(mf->getRegInfo()), ProcItinResources(&ST) {}
void reset() {
ScheduledInstrs.clear();
diff --git a/include/llvm/CodeGen/MachinePostDominators.h b/include/llvm/CodeGen/MachinePostDominators.h
index c6a41598ce32..b67e6b52ac8f 100644
--- a/include/llvm/CodeGen/MachinePostDominators.h
+++ b/include/llvm/CodeGen/MachinePostDominators.h
@@ -1,9 +1,8 @@
-//=- llvm/CodeGen/MachineDominators.h ----------------------------*- C++ -*-==//
+//===- llvm/CodeGen/MachinePostDominators.h ----------------------*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/CodeGen/MachineRegionInfo.h b/include/llvm/CodeGen/MachineRegionInfo.h
index 8394b58d0a16..6d9fb9b9100a 100644
--- a/include/llvm/CodeGen/MachineRegionInfo.h
+++ b/include/llvm/CodeGen/MachineRegionInfo.h
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/MachineRegionInfo.h -------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/CodeGen/MachineRegisterInfo.h b/include/llvm/CodeGen/MachineRegisterInfo.h
index fef010a23ef9..b5deed1f5010 100644
--- a/include/llvm/CodeGen/MachineRegisterInfo.h
+++ b/include/llvm/CodeGen/MachineRegisterInfo.h
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/MachineRegisterInfo.h -----------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -562,9 +561,14 @@ public:
}
/// hasOneNonDBGUse - Return true if there is exactly one non-Debug
- /// instruction using the specified register.
+ /// use of the specified register.
bool hasOneNonDBGUse(unsigned RegNo) const;
+ /// hasOneNonDBGUse - Return true if there is exactly one non-Debug
+ /// instruction using the specified register. Said instruction may have
+ /// multiple uses.
+ bool hasOneNonDBGUser(unsigned RegNo) const;
+
/// replaceRegWith - Replace all instances of FromReg with ToReg in the
/// machine function. This is like llvm-level X->replaceAllUsesWith(Y),
/// except that it also changes any definitions of the register as well.
@@ -713,12 +717,12 @@ public:
/// createVirtualRegister - Create and return a new virtual register in the
/// function with the specified register class.
- unsigned createVirtualRegister(const TargetRegisterClass *RegClass,
+ Register createVirtualRegister(const TargetRegisterClass *RegClass,
StringRef Name = "");
/// Create and return a new virtual register in the function with the same
/// attributes as the given register.
- unsigned cloneVirtualRegister(unsigned VReg, StringRef Name = "");
+ Register cloneVirtualRegister(Register VReg, StringRef Name = "");
/// Get the low-level type of \p Reg or LLT{} if Reg is not a generic
/// (target independent) virtual register.
@@ -733,7 +737,7 @@ public:
/// Create and return a new generic virtual register with low-level
/// type \p Ty.
- unsigned createGenericVirtualRegister(LLT Ty, StringRef Name = "");
+ Register createGenericVirtualRegister(LLT Ty, StringRef Name = "");
/// Remove all types associated to virtual registers (after instruction
/// selection and constraining of all generic virtual registers).
diff --git a/include/llvm/CodeGen/MachineSSAUpdater.h b/include/llvm/CodeGen/MachineSSAUpdater.h
index 5e91246b402c..0319ec774671 100644
--- a/include/llvm/CodeGen/MachineSSAUpdater.h
+++ b/include/llvm/CodeGen/MachineSSAUpdater.h
@@ -1,9 +1,8 @@
//===- MachineSSAUpdater.h - Unstructured SSA Update Tool -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/CodeGen/MachineScheduler.h b/include/llvm/CodeGen/MachineScheduler.h
index 4bc31ae7c61a..75a334f61ad0 100644
--- a/include/llvm/CodeGen/MachineScheduler.h
+++ b/include/llvm/CodeGen/MachineScheduler.h
@@ -1,9 +1,8 @@
//===- MachineScheduler.h - MachineInstr Scheduling Pass --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -265,10 +264,6 @@ protected:
LiveIntervals *LIS;
std::unique_ptr<MachineSchedStrategy> SchedImpl;
- /// Topo - A topological ordering for SUnits which permits fast IsReachable
- /// and similar queries.
- ScheduleDAGTopologicalSort Topo;
-
/// Ordered list of DAG postprocessing steps.
std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations;
@@ -292,7 +287,7 @@ public:
ScheduleDAGMI(MachineSchedContext *C, std::unique_ptr<MachineSchedStrategy> S,
bool RemoveKillFlags)
: ScheduleDAGInstrs(*C->MF, C->MLI, RemoveKillFlags), AA(C->AA),
- LIS(C->LIS), SchedImpl(std::move(S)), Topo(SUnits, &ExitSU) {}
+ LIS(C->LIS), SchedImpl(std::move(S)) {}
// Provide a vtable anchor
~ScheduleDAGMI() override;
@@ -320,17 +315,6 @@ public:
Mutations.push_back(std::move(Mutation));
}
- /// True if an edge can be added from PredSU to SuccSU without creating
- /// a cycle.
- bool canAddEdge(SUnit *SuccSU, SUnit *PredSU);
-
- /// Add a DAG edge to the given SU with the given predecessor
- /// dependence data.
- ///
- /// \returns true if the edge may be added without creating a cycle OR if an
- /// equivalent edge already existed (false indicates failure).
- bool addEdge(SUnit *SuccSU, const SDep &PredDep);
-
MachineBasicBlock::iterator top() const { return CurrentTop; }
MachineBasicBlock::iterator bottom() const { return CurrentBottom; }
@@ -682,6 +666,10 @@ private:
// scheduled instruction.
SmallVector<unsigned, 16> ReservedCycles;
+ // For each PIdx, stores first index into ReservedCycles that corresponds to
+ // it.
+ SmallVector<unsigned, 16> ReservedCyclesIndex;
+
#ifndef NDEBUG
// Remember the greatest possible stall as an upper bound on the number of
// times we should retry the pending queue because of a hazard.
@@ -756,7 +744,11 @@ public:
/// cycle.
unsigned getLatencyStallCycles(SUnit *SU);
- unsigned getNextResourceCycle(unsigned PIdx, unsigned Cycles);
+ unsigned getNextResourceCycleByInstance(unsigned InstanceIndex,
+ unsigned Cycles);
+
+ std::pair<unsigned, unsigned> getNextResourceCycle(unsigned PIdx,
+ unsigned Cycles);
bool checkHazard(SUnit *SU);
@@ -1015,6 +1007,7 @@ protected:
/// Callbacks from ScheduleDAGMI:
/// initPolicy -> initialize(DAG) -> registerRoots -> pickNode ...
class PostGenericScheduler : public GenericSchedulerBase {
+protected:
ScheduleDAGMI *DAG;
SchedBoundary Top;
SmallVector<SUnit*, 8> BotRoots;
diff --git a/include/llvm/CodeGen/MachineTraceMetrics.h b/include/llvm/CodeGen/MachineTraceMetrics.h
index 9d8db393ca92..025989504177 100644
--- a/include/llvm/CodeGen/MachineTraceMetrics.h
+++ b/include/llvm/CodeGen/MachineTraceMetrics.h
@@ -1,9 +1,8 @@
//===- lib/CodeGen/MachineTraceMetrics.h - Super-scalar metrics -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/CodeGen/MacroFusion.h b/include/llvm/CodeGen/MacroFusion.h
index a77226ddaf33..3a140fe63fde 100644
--- a/include/llvm/CodeGen/MacroFusion.h
+++ b/include/llvm/CodeGen/MacroFusion.h
@@ -1,9 +1,8 @@
//===- MacroFusion.h - Macro Fusion -----------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/CodeGen/PBQP/CostAllocator.h b/include/llvm/CodeGen/PBQP/CostAllocator.h
index bde451ae1fcc..0d6d8a31317b 100644
--- a/include/llvm/CodeGen/PBQP/CostAllocator.h
+++ b/include/llvm/CodeGen/PBQP/CostAllocator.h
@@ -1,9 +1,8 @@
//===- CostAllocator.h - PBQP Cost Allocator --------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/CodeGen/PBQP/Graph.h b/include/llvm/CodeGen/PBQP/Graph.h
index a6d88b057dcb..c2cd6dadae5f 100644
--- a/include/llvm/CodeGen/PBQP/Graph.h
+++ b/include/llvm/CodeGen/PBQP/Graph.h
@@ -1,9 +1,8 @@
//===- Graph.h - PBQP Graph -------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/CodeGen/PBQP/Math.h b/include/llvm/CodeGen/PBQP/Math.h
index d1432a3053c4..8b014ccbb07b 100644
--- a/include/llvm/CodeGen/PBQP/Math.h
+++ b/include/llvm/CodeGen/PBQP/Math.h
@@ -1,9 +1,8 @@
//===- Math.h - PBQP Vector and Matrix classes ------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/CodeGen/PBQP/ReductionRules.h b/include/llvm/CodeGen/PBQP/ReductionRules.h
index 21b99027970d..51822d082bad 100644
--- a/include/llvm/CodeGen/PBQP/ReductionRules.h
+++ b/include/llvm/CodeGen/PBQP/ReductionRules.h
@@ -1,9 +1,8 @@
//===- ReductionRules.h - Reduction Rules -----------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/CodeGen/PBQP/Solution.h b/include/llvm/CodeGen/PBQP/Solution.h
index 4d4379fbc2c2..d5b1474f0f4c 100644
--- a/include/llvm/CodeGen/PBQP/Solution.h
+++ b/include/llvm/CodeGen/PBQP/Solution.h
@@ -1,9 +1,8 @@
//===- Solution.h - PBQP Solution -------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/CodeGen/PBQPRAConstraint.h b/include/llvm/CodeGen/PBQPRAConstraint.h
index 995467dc56d8..876ab97a669f 100644
--- a/include/llvm/CodeGen/PBQPRAConstraint.h
+++ b/include/llvm/CodeGen/PBQPRAConstraint.h
@@ -1,9 +1,8 @@
-//===- RegAllocPBQP.h -------------------------------------------*- C++ -*-===//
+//===- llvm/CodeGen/PBQPRAConstraint.h --------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/CodeGen/ParallelCG.h b/include/llvm/CodeGen/ParallelCG.h
index dbf09ea31e20..a44715d4fc4f 100644
--- a/include/llvm/CodeGen/ParallelCG.h
+++ b/include/llvm/CodeGen/ParallelCG.h
@@ -1,9 +1,8 @@
//===-- llvm/CodeGen/ParallelCG.h - Parallel code generation ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/CodeGen/Passes.h b/include/llvm/CodeGen/Passes.h
index acf1ebb5bc83..d92ee93268e7 100644
--- a/include/llvm/CodeGen/Passes.h
+++ b/include/llvm/CodeGen/Passes.h
@@ -1,9 +1,8 @@
//===-- Passes.h - Target independent code generation passes ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -346,8 +345,9 @@ namespace llvm {
/// pointer or stack pointer index addressing.
extern char &LocalStackSlotAllocationID;
- /// ExpandISelPseudos - This pass expands pseudo-instructions.
- extern char &ExpandISelPseudosID;
+ /// This pass expands pseudo-instructions, reserves registers and adjusts
+ /// machine frame information.
+ extern char &FinalizeISelID;
/// UnpackMachineBundles - This pass unpack machine instruction bundles.
extern char &UnpackMachineBundlesID;
@@ -447,6 +447,9 @@ namespace llvm {
/// Creates CFI Instruction Inserter pass. \see CFIInstrInserter.cpp
FunctionPass *createCFIInstrInserter();
+ /// Create Hardware Loop pass. \see HardwareLoops.cpp
+ FunctionPass *createHardwareLoopsPass();
+
} // End llvm namespace
#endif
diff --git a/include/llvm/CodeGen/PreISelIntrinsicLowering.h b/include/llvm/CodeGen/PreISelIntrinsicLowering.h
index b7f83e515b7e..73d7d779e55b 100644
--- a/include/llvm/CodeGen/PreISelIntrinsicLowering.h
+++ b/include/llvm/CodeGen/PreISelIntrinsicLowering.h
@@ -1,9 +1,8 @@
//===- PreISelIntrinsicLowering.h - Pre-ISel intrinsic lowering pass ------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/CodeGen/PseudoSourceValue.h b/include/llvm/CodeGen/PseudoSourceValue.h
index f66191bc9fb4..4b3cc9145a13 100644
--- a/include/llvm/CodeGen/PseudoSourceValue.h
+++ b/include/llvm/CodeGen/PseudoSourceValue.h
@@ -1,9 +1,8 @@
//===-- llvm/CodeGen/PseudoSourceValue.h ------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -16,7 +15,6 @@
#include "llvm/ADT/StringMap.h"
#include "llvm/IR/GlobalValue.h"
-#include "llvm/IR/Value.h"
#include "llvm/IR/ValueMap.h"
#include <map>
@@ -124,7 +122,7 @@ public:
bool mayAlias(const MachineFrameInfo *) const override;
};
-/// A specialized pseudo soruce value for holding GlobalValue values.
+/// A specialized pseudo source value for holding GlobalValue values.
class GlobalValuePseudoSourceValue : public CallEntryPseudoSourceValue {
const GlobalValue *GV;
diff --git a/include/llvm/CodeGen/ReachingDefAnalysis.h b/include/llvm/CodeGen/ReachingDefAnalysis.h
index b21b745c8fd1..a599fb62f5e2 100644
--- a/include/llvm/CodeGen/ReachingDefAnalysis.h
+++ b/include/llvm/CodeGen/ReachingDefAnalysis.h
@@ -1,15 +1,14 @@
//==--- llvm/CodeGen/ReachingDefAnalysis.h - Reaching Def Analysis -*- C++ -*---==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
/// \file Reaching Defs Analysis pass.
///
-/// This pass tracks for each instruction what is the “closest” reaching def of
+/// This pass tracks for each instruction what is the "closest" reaching def of
/// a given register. It is used by BreakFalseDeps (for clearance calculation)
/// and ExecutionDomainFix (for arbitrating conflicting domains).
///
diff --git a/include/llvm/CodeGen/RegAllocPBQP.h b/include/llvm/CodeGen/RegAllocPBQP.h
index ba9763077d09..f7f92248f4ce 100644
--- a/include/llvm/CodeGen/RegAllocPBQP.h
+++ b/include/llvm/CodeGen/RegAllocPBQP.h
@@ -1,9 +1,8 @@
//===- RegAllocPBQP.h -------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/CodeGen/RegAllocRegistry.h b/include/llvm/CodeGen/RegAllocRegistry.h
index b518fbb9c9da..9a63674689b3 100644
--- a/include/llvm/CodeGen/RegAllocRegistry.h
+++ b/include/llvm/CodeGen/RegAllocRegistry.h
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/RegAllocRegistry.h --------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -23,29 +22,30 @@ class FunctionPass;
//===----------------------------------------------------------------------===//
///
-/// RegisterRegAlloc class - Track the registration of register allocators.
+/// RegisterRegAllocBase class - Track the registration of register allocators.
///
//===----------------------------------------------------------------------===//
-class RegisterRegAlloc : public MachinePassRegistryNode<FunctionPass *(*)()> {
+template <class SubClass>
+class RegisterRegAllocBase : public MachinePassRegistryNode<FunctionPass *(*)()> {
public:
using FunctionPassCtor = FunctionPass *(*)();
static MachinePassRegistry<FunctionPassCtor> Registry;
- RegisterRegAlloc(const char *N, const char *D, FunctionPassCtor C)
+ RegisterRegAllocBase(const char *N, const char *D, FunctionPassCtor C)
: MachinePassRegistryNode(N, D, C) {
Registry.Add(this);
}
- ~RegisterRegAlloc() { Registry.Remove(this); }
+ ~RegisterRegAllocBase() { Registry.Remove(this); }
// Accessors.
- RegisterRegAlloc *getNext() const {
- return (RegisterRegAlloc *)MachinePassRegistryNode::getNext();
+ SubClass *getNext() const {
+ return static_cast<SubClass *>(MachinePassRegistryNode::getNext());
}
- static RegisterRegAlloc *getList() {
- return (RegisterRegAlloc *)Registry.getList();
+ static SubClass *getList() {
+ return static_cast<SubClass *>(Registry.getList());
}
static FunctionPassCtor getDefault() { return Registry.getDefault(); }
@@ -57,6 +57,17 @@ public:
}
};
+class RegisterRegAlloc : public RegisterRegAllocBase<RegisterRegAlloc> {
+public:
+ RegisterRegAlloc(const char *N, const char *D, FunctionPassCtor C)
+ : RegisterRegAllocBase(N, D, C) {}
+};
+
+/// RegisterRegAlloc's global Registry tracks allocator registration.
+template <class T>
+MachinePassRegistry<RegisterRegAlloc::FunctionPassCtor>
+RegisterRegAllocBase<T>::Registry;
+
} // end namespace llvm
#endif // LLVM_CODEGEN_REGALLOCREGISTRY_H
diff --git a/include/llvm/CodeGen/Register.h b/include/llvm/CodeGen/Register.h
new file mode 100644
index 000000000000..907c1a99e56f
--- /dev/null
+++ b/include/llvm/CodeGen/Register.h
@@ -0,0 +1,60 @@
+//===-- llvm/CodeGen/Register.h ---------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_REGISTER_H
+#define LLVM_CODEGEN_REGISTER_H
+
+#include <cassert>
+
+namespace llvm {
+
+/// Wrapper class representing virtual and physical registers. Should be passed
+/// by value.
+class Register {
+ unsigned Reg;
+
+public:
+ Register(unsigned Val = 0): Reg(Val) {}
+
+ /// Return true if the specified register number is in the virtual register
+ /// namespace.
+ bool isVirtual() const {
+ return int(Reg) < 0;
+ }
+
+ /// Return true if the specified register number is in the physical register
+ /// namespace.
+ bool isPhysical() const {
+ return int(Reg) > 0;
+ }
+
+ /// Convert a virtual register number to a 0-based index. The first virtual
+ /// register in a function will get the index 0.
+ unsigned virtRegIndex() const {
+ assert(isVirtual() && "Not a virtual register");
+ return Reg & ~(1u << 31);
+ }
+
+ /// Convert a 0-based index to a virtual register number.
+ /// This is the inverse operation of VirtReg2IndexFunctor below.
+ static Register index2VirtReg(unsigned Index) {
+ return Register(Index | (1u << 31));
+ }
+
+ operator unsigned() const {
+ return Reg;
+ }
+
+ bool isValid() const {
+ return Reg != 0;
+ }
+};
+
+}
+
+#endif
diff --git a/include/llvm/CodeGen/RegisterClassInfo.h b/include/llvm/CodeGen/RegisterClassInfo.h
index 97113c575815..14af5c4d090d 100644
--- a/include/llvm/CodeGen/RegisterClassInfo.h
+++ b/include/llvm/CodeGen/RegisterClassInfo.h
@@ -1,9 +1,8 @@
//===- RegisterClassInfo.h - Dynamic Register Class Info --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/CodeGen/RegisterPressure.h b/include/llvm/CodeGen/RegisterPressure.h
index 79054b9e33b7..5bbaa03fd751 100644
--- a/include/llvm/CodeGen/RegisterPressure.h
+++ b/include/llvm/CodeGen/RegisterPressure.h
@@ -1,9 +1,8 @@
//===- RegisterPressure.h - Dynamic Register Pressure -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -132,10 +131,6 @@ public:
}
};
-template <> struct isPodLike<PressureChange> {
- static const bool value = true;
-};
-
/// List of PressureChanges in order of increasing, unique PSetID.
///
/// Use a small fixed number, because we can fit more PressureChanges in an
diff --git a/include/llvm/CodeGen/RegisterScavenging.h b/include/llvm/CodeGen/RegisterScavenging.h
index b6bd028a8cac..9c48df82f07d 100644
--- a/include/llvm/CodeGen/RegisterScavenging.h
+++ b/include/llvm/CodeGen/RegisterScavenging.h
@@ -1,9 +1,8 @@
//===- RegisterScavenging.h - Machine register scavenging -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -158,10 +157,15 @@ public:
/// Returns the scavenged register.
/// This is deprecated as it depends on the quality of the kill flags being
/// present; Use scavengeRegisterBackwards() instead!
+ ///
+ /// If \p AllowSpill is false, fail if a spill is required to make the
+ /// register available, and return NoRegister.
unsigned scavengeRegister(const TargetRegisterClass *RC,
- MachineBasicBlock::iterator I, int SPAdj);
- unsigned scavengeRegister(const TargetRegisterClass *RegClass, int SPAdj) {
- return scavengeRegister(RegClass, MBBI, SPAdj);
+ MachineBasicBlock::iterator I, int SPAdj,
+ bool AllowSpill = true);
+ unsigned scavengeRegister(const TargetRegisterClass *RegClass, int SPAdj,
+ bool AllowSpill = true) {
+ return scavengeRegister(RegClass, MBBI, SPAdj, AllowSpill);
}
/// Make a register of the specific register class available from the current
@@ -170,9 +174,13 @@ public:
/// SPAdj is the stack adjustment due to call frame, it's passed along to
/// eliminateFrameIndex().
/// Returns the scavenged register.
+ ///
+ /// If \p AllowSpill is false, fail if a spill is required to make the
+ /// register available, and return NoRegister.
unsigned scavengeRegisterBackwards(const TargetRegisterClass &RC,
MachineBasicBlock::iterator To,
- bool RestoreAfter, int SPAdj);
+ bool RestoreAfter, int SPAdj,
+ bool AllowSpill = true);
/// Tell the scavenger a register is used.
void setRegUsed(unsigned Reg, LaneBitmask LaneMask = LaneBitmask::getAll());
diff --git a/include/llvm/CodeGen/RegisterUsageInfo.h b/include/llvm/CodeGen/RegisterUsageInfo.h
index efecc61d9c30..33554550b9dc 100644
--- a/include/llvm/CodeGen/RegisterUsageInfo.h
+++ b/include/llvm/CodeGen/RegisterUsageInfo.h
@@ -1,9 +1,8 @@
//==- RegisterUsageInfo.h - Register Usage Informartion Storage --*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/include/llvm/CodeGen/ResourcePriorityQueue.h b/include/llvm/CodeGen/ResourcePriorityQueue.h
index 8d582ee298b6..81587a3170ce 100644
--- a/include/llvm/CodeGen/ResourcePriorityQueue.h
+++ b/include/llvm/CodeGen/ResourcePriorityQueue.h
@@ -1,9 +1,8 @@
//===----- ResourcePriorityQueue.h - A DFA-oriented priority queue -------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/CodeGen/RuntimeLibcalls.h b/include/llvm/CodeGen/RuntimeLibcalls.h
index 28567a1ce437..f71f39e5bf03 100644
--- a/include/llvm/CodeGen/RuntimeLibcalls.h
+++ b/include/llvm/CodeGen/RuntimeLibcalls.h
@@ -1,9 +1,8 @@
//===-- CodeGen/RuntimeLibcalls.h - Runtime Library Calls -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/CodeGen/SDNodeProperties.td b/include/llvm/CodeGen/SDNodeProperties.td
index 83bbab2fdc8d..d25e0bda26a9 100644
--- a/include/llvm/CodeGen/SDNodeProperties.td
+++ b/include/llvm/CodeGen/SDNodeProperties.td
@@ -1,9 +1,8 @@
//===- SDNodeProperties.td - Common code for DAG isels ---*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/CodeGen/ScheduleDAG.h b/include/llvm/CodeGen/ScheduleDAG.h
index 0870d67db390..e004f3bf2cc1 100644
--- a/include/llvm/CodeGen/ScheduleDAG.h
+++ b/include/llvm/CodeGen/ScheduleDAG.h
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/ScheduleDAG.h - Common Base Class -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -239,9 +238,6 @@ class TargetRegisterInfo;
void dump(const TargetRegisterInfo *TRI = nullptr) const;
};
- template <>
- struct isPodLike<SDep> { static const bool value = true; };
-
/// Scheduling unit. This is a node in the scheduling DAG.
class SUnit {
private:
@@ -418,7 +414,7 @@ class TargetRegisterInfo;
/// dirty.
void setDepthToAtLeast(unsigned NewDepth);
- /// If NewDepth is greater than this node's depth value, set it to be
+ /// If NewHeight is greater than this node's height value, set it to be
/// the new height value. This also recursively marks predecessor nodes
/// dirty.
void setHeightToAtLeast(unsigned NewHeight);
@@ -695,6 +691,12 @@ class TargetRegisterInfo;
std::vector<SUnit> &SUnits;
SUnit *ExitSU;
+ // Have any new nodes been added?
+ bool Dirty = false;
+
+ // Outstanding added edges, that have not been applied to the ordering.
+ SmallVector<std::pair<SUnit *, SUnit *>, 16> Updates;
+
/// Maps topological index to the node number.
std::vector<int> Index2Node;
/// Maps the node number to its topological index.
@@ -714,6 +716,11 @@ class TargetRegisterInfo;
/// Assigns the topological index to the node n.
void Allocate(int n, int index);
+ /// Fix the ordering, by either recomputing from scratch or by applying
+ /// any outstanding updates. Uses a heuristic to estimate what will be
+ /// cheaper.
+ void FixOrder();
+
public:
ScheduleDAGTopologicalSort(std::vector<SUnit> &SUnits, SUnit *ExitSU);
@@ -738,11 +745,19 @@ class TargetRegisterInfo;
/// added from SUnit \p X to SUnit \p Y.
void AddPred(SUnit *Y, SUnit *X);
+ /// Queues an update to the topological ordering to accommodate an edge to
+ /// be added from SUnit \p X to SUnit \p Y.
+ void AddPredQueued(SUnit *Y, SUnit *X);
+
/// Updates the topological ordering to accommodate an an edge to be
/// removed from the specified node \p N from the predecessors of the
/// current node \p M.
void RemovePred(SUnit *M, SUnit *N);
+ /// Mark the ordering as temporarily broken, after a new node has been
+ /// added.
+ void MarkDirty() { Dirty = true; }
+
typedef std::vector<int>::iterator iterator;
typedef std::vector<int>::const_iterator const_iterator;
iterator begin() { return Index2Node.begin(); }
diff --git a/include/llvm/CodeGen/ScheduleDAGInstrs.h b/include/llvm/CodeGen/ScheduleDAGInstrs.h
index daad18125db9..3e3b604acbac 100644
--- a/include/llvm/CodeGen/ScheduleDAGInstrs.h
+++ b/include/llvm/CodeGen/ScheduleDAGInstrs.h
@@ -1,9 +1,8 @@
//===- ScheduleDAGInstrs.h - MachineInstr Scheduling ------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -235,6 +234,11 @@ namespace llvm {
/// For an unanalyzable memory access, this Value is used in maps.
UndefValue *UnknownValue;
+
+ /// Topo - A topological ordering for SUnits which permits fast IsReachable
+ /// and similar queries.
+ ScheduleDAGTopologicalSort Topo;
+
using DbgValueVector =
std::vector<std::pair<MachineInstr *, MachineInstr *>>;
/// Remember instruction that precedes DBG_VALUE.
@@ -339,6 +343,17 @@ namespace llvm {
/// Fixes register kill flags that scheduling has made invalid.
void fixupKills(MachineBasicBlock &MBB);
+ /// True if an edge can be added from PredSU to SuccSU without creating
+ /// a cycle.
+ bool canAddEdge(SUnit *SuccSU, SUnit *PredSU);
+
+ /// Add a DAG edge to the given SU with the given predecessor
+ /// dependence data.
+ ///
+ /// \returns true if the edge may be added without creating a cycle OR if an
+ /// equivalent edge already existed (false indicates failure).
+ bool addEdge(SUnit *SuccSU, const SDep &PredDep);
+
protected:
void initSUnits();
void addPhysRegDataDeps(SUnit *SU, unsigned OperIdx);
diff --git a/include/llvm/CodeGen/ScheduleDAGMutation.h b/include/llvm/CodeGen/ScheduleDAGMutation.h
index 5c236427e0b8..d1dd72859a38 100644
--- a/include/llvm/CodeGen/ScheduleDAGMutation.h
+++ b/include/llvm/CodeGen/ScheduleDAGMutation.h
@@ -1,9 +1,8 @@
//===- ScheduleDAGMutation.h - MachineInstr Scheduling ----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/CodeGen/ScheduleDFS.h b/include/llvm/CodeGen/ScheduleDFS.h
index 3ecc033ac35a..d60deab95f5d 100644
--- a/include/llvm/CodeGen/ScheduleDFS.h
+++ b/include/llvm/CodeGen/ScheduleDFS.h
@@ -1,9 +1,8 @@
-//===- ScheduleDAGILP.h - ILP metric for ScheduleDAGInstrs ------*- C++ -*-===//
+//===- ScheduleDFS.h - ILP metric for ScheduleDAGInstrs ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/CodeGen/ScheduleHazardRecognizer.h b/include/llvm/CodeGen/ScheduleHazardRecognizer.h
index ace4a2d836ca..37590f496ca2 100644
--- a/include/llvm/CodeGen/ScheduleHazardRecognizer.h
+++ b/include/llvm/CodeGen/ScheduleHazardRecognizer.h
@@ -1,9 +1,8 @@
//=- llvm/CodeGen/ScheduleHazardRecognizer.h - Scheduling Support -*- C++ -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/CodeGen/SchedulerRegistry.h b/include/llvm/CodeGen/SchedulerRegistry.h
index fbe559f25556..0ccfaafd9e50 100644
--- a/include/llvm/CodeGen/SchedulerRegistry.h
+++ b/include/llvm/CodeGen/SchedulerRegistry.h
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/SchedulerRegistry.h -------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/CodeGen/ScoreboardHazardRecognizer.h b/include/llvm/CodeGen/ScoreboardHazardRecognizer.h
index 3f75d108f282..ac67f3008fa7 100644
--- a/include/llvm/CodeGen/ScoreboardHazardRecognizer.h
+++ b/include/llvm/CodeGen/ScoreboardHazardRecognizer.h
@@ -1,9 +1,8 @@
//=- llvm/CodeGen/ScoreboardHazardRecognizer.h - Schedule Support -*- C++ -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h
index 67fe87fc96af..12a970847021 100644
--- a/include/llvm/CodeGen/SelectionDAG.h
+++ b/include/llvm/CodeGen/SelectionDAG.h
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/SelectionDAG.h - InstSelection DAG ----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -268,6 +267,10 @@ class SelectionDAG {
/// Tracks dbg_value and dbg_label information through SDISel.
SDDbgInfo *DbgInfo;
+ using CallSiteInfo = MachineFunction::CallSiteInfo;
+ using CallSiteInfoImpl = MachineFunction::CallSiteInfoImpl;
+ DenseMap<const SDNode *, CallSiteInfo> SDCallSiteInfo;
+
uint16_t NextPersistentId = 0;
public:
@@ -298,6 +301,9 @@ public:
/// The node N that was updated.
virtual void NodeUpdated(SDNode *N);
+
+ /// The node N that was inserted.
+ virtual void NodeInserted(SDNode *N);
};
struct DAGNodeDeletedListener : public DAGUpdateListener {
@@ -404,6 +410,7 @@ public:
const TargetLowering &getTargetLoweringInfo() const { return *TLI; }
const TargetLibraryInfo &getLibInfo() const { return *LibInfo; }
const SelectionDAGTargetInfo &getSelectionDAGInfo() const { return *TSI; }
+ const LegacyDivergenceAnalysis *getDivergenceAnalysis() const { return DA; }
LLVMContext *getContext() const {return Context; }
OptimizationRemarkEmitter &getORE() const { return *ORE; }
@@ -573,6 +580,9 @@ public:
bool isTarget = false, bool isOpaque = false);
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL,
bool isTarget = false);
+ SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL,
+ bool LegalTypes = true);
+
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT,
bool isOpaque = false) {
return getConstant(Val, DL, VT, true, isOpaque);
@@ -789,6 +799,16 @@ public:
/// value assuming it was the smaller SrcTy value.
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT);
+ /// Convert Op, which must be of integer type, to the integer type VT, by
+ /// either truncating it or performing either zero or sign extension as
+ /// appropriate extension for the pointer's semantics.
+ SDValue getPtrExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT);
+
+ /// Return the expression required to extend the Op as a pointer value
+ /// assuming it was the smaller SrcTy value. This may be either a zero extend
+ /// or a sign extend.
+ SDValue getPtrExtendInReg(SDValue Op, const SDLoc &DL, EVT VT);
+
/// Convert Op, which must be of integer type, to the integer type VT,
/// by using an extension appropriate for the target's
/// BooleanContent for type OpVT or truncating it.
@@ -971,6 +991,10 @@ public:
/// Try to simplify a shift into 1 of its operands or a constant.
SDValue simplifyShift(SDValue X, SDValue Y);
+ /// Try to simplify a floating-point binary operation into 1 of its operands
+ /// or a constant.
+ SDValue simplifyFPBinop(unsigned Opcode, SDValue X, SDValue Y);
+
/// VAArg produces a result and token chain, and takes a pointer
/// and a source value as input.
SDValue getVAArg(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr,
@@ -982,21 +1006,11 @@ public:
/// a success flag (initially i1), and a chain.
SDValue getAtomicCmpSwap(unsigned Opcode, const SDLoc &dl, EVT MemVT,
SDVTList VTs, SDValue Chain, SDValue Ptr,
- SDValue Cmp, SDValue Swp, MachinePointerInfo PtrInfo,
- unsigned Alignment, AtomicOrdering SuccessOrdering,
- AtomicOrdering FailureOrdering,
- SyncScope::ID SSID);
- SDValue getAtomicCmpSwap(unsigned Opcode, const SDLoc &dl, EVT MemVT,
- SDVTList VTs, SDValue Chain, SDValue Ptr,
SDValue Cmp, SDValue Swp, MachineMemOperand *MMO);
/// Gets a node for an atomic op, produces result (if relevant)
/// and chain and takes 2 operands.
SDValue getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDValue Chain,
- SDValue Ptr, SDValue Val, const Value *PtrVal,
- unsigned Alignment, AtomicOrdering Ordering,
- SyncScope::ID SSID);
- SDValue getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDValue Chain,
SDValue Ptr, SDValue Val, MachineMemOperand *MMO);
/// Gets a node for an atomic op, produces result and chain and
@@ -1021,12 +1035,19 @@ public:
unsigned Align = 0,
MachineMemOperand::Flags Flags
= MachineMemOperand::MOLoad | MachineMemOperand::MOStore,
- unsigned Size = 0);
+ unsigned Size = 0,
+ const AAMDNodes &AAInfo = AAMDNodes());
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList,
ArrayRef<SDValue> Ops, EVT MemVT,
MachineMemOperand *MMO);
+ /// Creates a LifetimeSDNode that starts (`IsStart==true`) or ends
+ /// (`IsStart==false`) the lifetime of the portion of `FrameIndex` between
+ /// offsets `Offset` and `Offset + Size`.
+ SDValue getLifetimeNode(bool IsStart, const SDLoc &dl, SDValue Chain,
+ int FrameIndex, int64_t Size, int64_t Offset = -1);
+
/// Create a MERGE_VALUES node from the given operands.
SDValue getMergeValues(ArrayRef<SDValue> Ops, const SDLoc &dl);
@@ -1154,6 +1175,11 @@ public:
SDValue Op3, SDValue Op4, SDValue Op5);
SDNode *UpdateNodeOperands(SDNode *N, ArrayRef<SDValue> Ops);
+ /// Creates a new TokenFactor containing \p Vals. If \p Vals contains 64k
+ /// values or more, move values into new TokenFactors in 64k-1 blocks, until
+ /// the final TokenFactor has less than 64k operands.
+ SDValue getTokenFactor(const SDLoc &DL, SmallVectorImpl<SDValue> &Vals);
+
/// *Mutate* the specified machine node's memory references to the provided
/// list.
void setNodeMemRefs(MachineSDNode *N,
@@ -1358,21 +1384,20 @@ public:
/// with this SelectionDAG.
bool hasDebugValues() const { return !DbgInfo->empty(); }
- SDDbgInfo::DbgIterator DbgBegin() { return DbgInfo->DbgBegin(); }
- SDDbgInfo::DbgIterator DbgEnd() { return DbgInfo->DbgEnd(); }
+ SDDbgInfo::DbgIterator DbgBegin() const { return DbgInfo->DbgBegin(); }
+ SDDbgInfo::DbgIterator DbgEnd() const { return DbgInfo->DbgEnd(); }
- SDDbgInfo::DbgIterator ByvalParmDbgBegin() {
+ SDDbgInfo::DbgIterator ByvalParmDbgBegin() const {
return DbgInfo->ByvalParmDbgBegin();
}
-
- SDDbgInfo::DbgIterator ByvalParmDbgEnd() {
+ SDDbgInfo::DbgIterator ByvalParmDbgEnd() const {
return DbgInfo->ByvalParmDbgEnd();
}
- SDDbgInfo::DbgLabelIterator DbgLabelBegin() {
+ SDDbgInfo::DbgLabelIterator DbgLabelBegin() const {
return DbgInfo->DbgLabelBegin();
}
- SDDbgInfo::DbgLabelIterator DbgLabelEnd() {
+ SDDbgInfo::DbgLabelIterator DbgLabelEnd() const {
return DbgInfo->DbgLabelEnd();
}
@@ -1395,27 +1420,42 @@ public:
const SDNode *N2);
SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT,
- SDNode *Cst1, SDNode *Cst2);
+ SDNode *N1, SDNode *N2);
SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT,
- const ConstantSDNode *Cst1,
- const ConstantSDNode *Cst2);
+ const ConstantSDNode *C1,
+ const ConstantSDNode *C2);
SDValue FoldConstantVectorArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT,
ArrayRef<SDValue> Ops,
const SDNodeFlags Flags = SDNodeFlags());
+ /// Fold floating-point operations with 2 operands when both operands are
+ /// constants and/or undefined.
+ SDValue foldConstantFPMath(unsigned Opcode, const SDLoc &DL, EVT VT,
+ SDValue N1, SDValue N2);
+
/// Constant fold a setcc to true or false.
SDValue FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond,
const SDLoc &dl);
- /// See if the specified operand can be simplified with the knowledge that only
- /// the bits specified by Mask are used. If so, return the simpler operand,
- /// otherwise return a null SDValue.
+ /// See if the specified operand can be simplified with the knowledge that
+ /// only the bits specified by DemandedBits are used. If so, return the
+ /// simpler operand, otherwise return a null SDValue.
+ ///
+ /// (This exists alongside SimplifyDemandedBits because GetDemandedBits can
+ /// simplify nodes with multiple uses more aggressively.)
+ SDValue GetDemandedBits(SDValue V, const APInt &DemandedBits);
+
+ /// See if the specified operand can be simplified with the knowledge that
+ /// only the bits specified by DemandedBits are used in the elements specified
+ /// by DemandedElts. If so, return the simpler operand, otherwise return a
+ /// null SDValue.
///
/// (This exists alongside SimplifyDemandedBits because GetDemandedBits can
/// simplify nodes with multiple uses more aggressively.)
- SDValue GetDemandedBits(SDValue V, const APInt &Mask);
+ SDValue GetDemandedBits(SDValue V, const APInt &DemandedBits,
+ const APInt &DemandedElts);
/// Return true if the sign bit of Op is known to be zero.
/// We use this predicate to simplify operations downstream.
@@ -1424,8 +1464,19 @@ public:
/// Return true if 'Op & Mask' is known to be zero. We
/// use this predicate to simplify operations downstream. Op and Mask are
/// known to be the same type.
- bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth = 0)
- const;
+ bool MaskedValueIsZero(SDValue Op, const APInt &Mask,
+ unsigned Depth = 0) const;
+
+ /// Return true if 'Op & Mask' is known to be zero in DemandedElts. We
+ /// use this predicate to simplify operations downstream. Op and Mask are
+ /// known to be the same type.
+ bool MaskedValueIsZero(SDValue Op, const APInt &Mask,
+ const APInt &DemandedElts, unsigned Depth = 0) const;
+
+ /// Return true if '(Op & Mask) == Mask'.
+ /// Op and Mask are known to be the same type.
+ bool MaskedValueIsAllOnes(SDValue Op, const APInt &Mask,
+ unsigned Depth = 0) const;
/// Determine which bits of Op are known to be either zero or one and return
/// them in Known. For vectors, the known bits are those that are shared by
@@ -1525,6 +1576,13 @@ public:
/// Test whether \p V has a splatted value.
bool isSplatValue(SDValue V, bool AllowUndefs = false);
+ /// If V is a splatted value, return the source vector and its splat index.
+ SDValue getSplatSourceVector(SDValue V, int &SplatIndex);
+
+ /// If V is a splat vector, return its scalar source operand by extracting
+ /// that element from the source vector.
+ SDValue getSplatValue(SDValue V);
+
/// Match a binop + shuffle pyramid that represents a horizontal reduction
/// over the elements of a vector starting from the EXTRACT_VECTOR_ELT node /p
/// Extract. The reduction must use one of the opcodes listed in /p
@@ -1542,6 +1600,11 @@ public:
/// vector op and fill the end of the resulting vector with UNDEFS.
SDValue UnrollVectorOp(SDNode *N, unsigned ResNE = 0);
+ /// Like UnrollVectorOp(), but for the [US](ADD|SUB|MUL)O family of opcodes.
+ /// This is a separate function because those opcodes have two results.
+ std::pair<SDValue, SDValue> UnrollVectorOverflowOp(SDNode *N,
+ unsigned ResNE = 0);
+
/// Return true if loads are next to each other and can be
/// merged. Check that both are nonvolatile and if LD is loading
/// 'Bytes' bytes from a location that is 'Dist' units away from the
@@ -1576,6 +1639,9 @@ public:
return SplitVector(N->getOperand(OpNo), SDLoc(N));
}
+ /// Widen the vector up to the next power of two using INSERT_SUBVECTOR.
+ SDValue WidenVector(const SDValue &N, const SDLoc &DL);
+
/// Append the extracted elements from Start to Count out of the vector Op
/// in Args. If Count is 0, all of the elements will be extracted.
void ExtractVectorElements(SDValue Op, SmallVectorImpl<SDValue> &Args,
@@ -1597,6 +1663,17 @@ public:
isConstantFPBuildVectorOrConstantFP(N);
}
+ void addCallSiteInfo(const SDNode *CallNode, CallSiteInfoImpl &&CallInfo) {
+ SDCallSiteInfo[CallNode] = std::move(CallInfo);
+ }
+
+ CallSiteInfo getSDCallSiteInfo(const SDNode *CallNode) {
+ auto I = SDCallSiteInfo.find(CallNode);
+ if (I != SDCallSiteInfo.end())
+ return std::move(I->second);
+ return CallSiteInfo();
+ }
+
private:
void InsertNode(SDNode *N);
bool RemoveNodeFromCSEMaps(SDNode *N);
diff --git a/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h b/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h
index 2b2c48d57bc0..4ee58333495b 100644
--- a/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h
+++ b/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h
@@ -1,9 +1,8 @@
//===- SelectionDAGAddressAnalysis.h - DAG Address Analysis -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -34,11 +33,13 @@ class BaseIndexOffset {
private:
SDValue Base;
SDValue Index;
- int64_t Offset = 0;
+ Optional<int64_t> Offset;
bool IsIndexSignExt = false;
public:
BaseIndexOffset() = default;
+ BaseIndexOffset(SDValue Base, SDValue Index, bool IsIndexSignExt)
+ : Base(Base), Index(Index), Offset(), IsIndexSignExt(IsIndexSignExt) {}
BaseIndexOffset(SDValue Base, SDValue Index, int64_t Offset,
bool IsIndexSignExt)
: Base(Base), Index(Index), Offset(Offset),
@@ -48,6 +49,13 @@ public:
SDValue getBase() const { return Base; }
SDValue getIndex() { return Index; }
SDValue getIndex() const { return Index; }
+ bool hasValidOffset() const { return Offset.hasValue(); }
+
+ // Returns true if `Other` and `*this` are both some offset from the same base
+ // pointer. In that case, `Off` is set to the offset between `*this` and
+ // `Other` (negative if `Other` is before `*this`).
+ bool equalBaseIndex(const BaseIndexOffset &Other, const SelectionDAG &DAG,
+ int64_t &Off) const;
bool equalBaseIndex(const BaseIndexOffset &Other,
const SelectionDAG &DAG) const {
@@ -55,11 +63,31 @@ public:
return equalBaseIndex(Other, DAG, Off);
}
- bool equalBaseIndex(const BaseIndexOffset &Other, const SelectionDAG &DAG,
- int64_t &Off) const;
+ // Returns true if `Other` (with size `OtherSize`) can be proven to be fully
+ // contained in `*this` (with size `Size`).
+ bool contains(const SelectionDAG &DAG, int64_t BitSize,
+ const BaseIndexOffset &Other, int64_t OtherBitSize,
+ int64_t &BitOffset) const;
+
+ bool contains(const SelectionDAG &DAG, int64_t BitSize,
+ const BaseIndexOffset &Other, int64_t OtherBitSize) const {
+ int64_t BitOffset;
+ return contains(DAG, BitSize, Other, OtherBitSize, BitOffset);
+ }
+
+ // Returns true `Op0` and `Op1` can be proven to alias/not alias, in
+ // which case `IsAlias` is set to true/false.
+ static bool computeAliasing(const SDNode *Op0,
+ const Optional<int64_t> NumBytes0,
+ const SDNode *Op1,
+ const Optional<int64_t> NumBytes1,
+ const SelectionDAG &DAG, bool &IsAlias);
+
+ /// Parses tree in N for base, index, offset addresses.
+ static BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG);
- /// Parses tree in Ptr for base, index, offset addresses.
- static BaseIndexOffset match(const LSBaseSDNode *N, const SelectionDAG &DAG);
+ void print(raw_ostream& OS) const;
+ void dump() const;
};
} // end namespace llvm
diff --git a/include/llvm/CodeGen/SelectionDAGISel.h b/include/llvm/CodeGen/SelectionDAGISel.h
index 6758c55c696a..147c325342fc 100644
--- a/include/llvm/CodeGen/SelectionDAGISel.h
+++ b/include/llvm/CodeGen/SelectionDAGISel.h
@@ -1,9 +1,8 @@
//===-- llvm/CodeGen/SelectionDAGISel.h - Common Base Class------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -35,6 +34,7 @@ namespace llvm {
class TargetLibraryInfo;
class FunctionLoweringInfo;
class ScheduleHazardRecognizer;
+ class SwiftErrorValueTracking;
class GCFunctionInfo;
class ScheduleDAGSDNodes;
class LoadInst;
@@ -46,6 +46,7 @@ public:
TargetMachine &TM;
const TargetLibraryInfo *LibInfo;
FunctionLoweringInfo *FuncInfo;
+ SwiftErrorValueTracking *SwiftError;
MachineFunction *MF;
MachineRegisterInfo *RegInfo;
SelectionDAG *CurDAG;
@@ -144,10 +145,12 @@ public:
OPC_CheckInteger,
OPC_CheckChild0Integer, OPC_CheckChild1Integer, OPC_CheckChild2Integer,
OPC_CheckChild3Integer, OPC_CheckChild4Integer,
- OPC_CheckCondCode,
+ OPC_CheckCondCode, OPC_CheckChild2CondCode,
OPC_CheckValueType,
OPC_CheckComplexPat,
OPC_CheckAndImm, OPC_CheckOrImm,
+ OPC_CheckImmAllOnesV,
+ OPC_CheckImmAllZerosV,
OPC_CheckFoldableChainNode,
OPC_EmitInteger,
@@ -303,7 +306,7 @@ public:
private:
// Calls to these functions are generated by tblgen.
- void Select_INLINEASM(SDNode *N);
+ void Select_INLINEASM(SDNode *N, bool Branch);
void Select_READ_REGISTER(SDNode *Op);
void Select_WRITE_REGISTER(SDNode *Op);
void Select_UNDEF(SDNode *N);
diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h
index 10f284179084..5aab9643e09d 100644
--- a/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/SelectionDAGNodes.h - SelectionDAG Nodes ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -184,6 +183,7 @@ public:
inline unsigned getNumOperands() const;
inline const SDValue &getOperand(unsigned i) const;
inline uint64_t getConstantOperandVal(unsigned i) const;
+ inline const APInt &getConstantOperandAPInt(unsigned i) const;
inline bool isTargetMemoryOpcode() const;
inline bool isTargetOpcode() const;
inline bool isMachineOpcode() const;
@@ -232,7 +232,6 @@ template<> struct DenseMapInfo<SDValue> {
return LHS == RHS;
}
};
-template <> struct isPodLike<SDValue> { static const bool value = true; };
/// Allow casting operators to work directly on
/// SDValues as if they were SDNode*'s.
@@ -369,6 +368,13 @@ private:
bool ApproximateFuncs : 1;
bool AllowReassociation : 1;
+ // We assume instructions do not raise floating-point exceptions by default,
+ // and only those marked explicitly may do so. We could choose to represent
+ // this via a positive "FPExcept" flags like on the MI level, but having a
+ // negative "NoFPExcept" flag here (that defaults to true) makes the flag
+ // intersection logic more straightforward.
+ bool NoFPExcept : 1;
+
public:
/// Default constructor turns off all optimization flags.
SDNodeFlags()
@@ -376,7 +382,7 @@ public:
Exact(false), NoNaNs(false), NoInfs(false),
NoSignedZeros(false), AllowReciprocal(false), VectorReduction(false),
AllowContract(false), ApproximateFuncs(false),
- AllowReassociation(false) {}
+ AllowReassociation(false), NoFPExcept(true) {}
/// Propagate the fast-math-flags from an IR FPMathOperator.
void copyFMF(const FPMathOperator &FPMO) {
@@ -439,6 +445,10 @@ public:
setDefined();
AllowReassociation = b;
}
+ void setFPExcept(bool b) {
+ setDefined();
+ NoFPExcept = !b;
+ }
// These are accessors for each flag.
bool hasNoUnsignedWrap() const { return NoUnsignedWrap; }
@@ -452,9 +462,10 @@ public:
bool hasAllowContract() const { return AllowContract; }
bool hasApproximateFuncs() const { return ApproximateFuncs; }
bool hasAllowReassociation() const { return AllowReassociation; }
+ bool hasFPExcept() const { return !NoFPExcept; }
bool isFast() const {
- return NoSignedZeros && AllowReciprocal && NoNaNs && NoInfs &&
+ return NoSignedZeros && AllowReciprocal && NoNaNs && NoInfs && NoFPExcept &&
AllowContract && ApproximateFuncs && AllowReassociation;
}
@@ -474,6 +485,7 @@ public:
AllowContract &= Flags.AllowContract;
ApproximateFuncs &= Flags.ApproximateFuncs;
AllowReassociation &= Flags.AllowReassociation;
+ NoFPExcept &= Flags.NoFPExcept;
}
};
@@ -489,6 +501,17 @@ protected:
// SubclassData. These are designed to fit within a uint16_t so they pack
// with NodeType.
+#if defined(_AIX) && (!defined(__GNUC__) || defined(__ibmxl__))
+// Except for GCC; by default, AIX compilers store bit-fields in 4-byte words
+// and give the `pack` pragma push semantics.
+#define BEGIN_TWO_BYTE_PACK() _Pragma("pack(2)")
+#define END_TWO_BYTE_PACK() _Pragma("pack(pop)")
+#else
+#define BEGIN_TWO_BYTE_PACK()
+#define END_TWO_BYTE_PACK()
+#endif
+
+BEGIN_TWO_BYTE_PACK()
class SDNodeBitfields {
friend class SDNode;
friend class MemIntrinsicSDNode;
@@ -561,6 +584,9 @@ protected:
LoadSDNodeBitfields LoadSDNodeBits;
StoreSDNodeBitfields StoreSDNodeBits;
};
+END_TWO_BYTE_PACK()
+#undef BEGIN_TWO_BYTE_PACK
+#undef END_TWO_BYTE_PACK
// RawSDNodeBits must cover the entirety of the union. This means that all of
// the union's members must have size <= RawSDNodeBits. We write the RHS as
@@ -678,6 +704,8 @@ public:
case ISD::STRICT_FFLOOR:
case ISD::STRICT_FROUND:
case ISD::STRICT_FTRUNC:
+ case ISD::STRICT_FP_ROUND:
+ case ISD::STRICT_FP_EXTEND:
return true;
}
}
@@ -898,9 +926,17 @@ public:
/// Return the number of values used by this operation.
unsigned getNumOperands() const { return NumOperands; }
+ /// Return the maximum number of operands that a SDNode can hold.
+ static constexpr size_t getMaxNumOperands() {
+ return std::numeric_limits<decltype(SDNode::NumOperands)>::max();
+ }
+
/// Helper method returns the integer value of a ConstantSDNode operand.
inline uint64_t getConstantOperandVal(unsigned Num) const;
+ /// Helper method returns the APInt of a ConstantSDNode operand.
+ inline const APInt &getConstantOperandAPInt(unsigned Num) const;
+
const SDValue &getOperand(unsigned Num) const {
assert(Num < NumOperands && "Invalid child # of SDNode!");
return OperandList[Num];
@@ -1128,6 +1164,10 @@ inline uint64_t SDValue::getConstantOperandVal(unsigned i) const {
return Node->getConstantOperandVal(i);
}
+inline const APInt &SDValue::getConstantOperandAPInt(unsigned i) const {
+ return Node->getConstantOperandAPInt(i);
+}
+
inline bool SDValue::isTargetOpcode() const {
return Node->isTargetOpcode();
}
@@ -1356,6 +1396,8 @@ public:
N->getOpcode() == ISD::ATOMIC_LOAD_MAX ||
N->getOpcode() == ISD::ATOMIC_LOAD_UMIN ||
N->getOpcode() == ISD::ATOMIC_LOAD_UMAX ||
+ N->getOpcode() == ISD::ATOMIC_LOAD_FADD ||
+ N->getOpcode() == ISD::ATOMIC_LOAD_FSUB ||
N->getOpcode() == ISD::ATOMIC_LOAD ||
N->getOpcode() == ISD::ATOMIC_STORE ||
N->getOpcode() == ISD::MLOAD ||
@@ -1372,7 +1414,10 @@ class AtomicSDNode : public MemSDNode {
public:
AtomicSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl, SDVTList VTL,
EVT MemVT, MachineMemOperand *MMO)
- : MemSDNode(Opc, Order, dl, VTL, MemVT, MMO) {}
+ : MemSDNode(Opc, Order, dl, VTL, MemVT, MMO) {
+ assert(((Opc != ISD::ATOMIC_LOAD && Opc != ISD::ATOMIC_STORE) ||
+ MMO->isAtomic()) && "then why are we using an AtomicSDNode?");
+ }
const SDValue &getBasePtr() const { return getOperand(1); }
const SDValue &getVal() const { return getOperand(2); }
@@ -1408,6 +1453,8 @@ public:
N->getOpcode() == ISD::ATOMIC_LOAD_MAX ||
N->getOpcode() == ISD::ATOMIC_LOAD_UMIN ||
N->getOpcode() == ISD::ATOMIC_LOAD_UMAX ||
+ N->getOpcode() == ISD::ATOMIC_LOAD_FADD ||
+ N->getOpcode() == ISD::ATOMIC_LOAD_FSUB ||
N->getOpcode() == ISD::ATOMIC_LOAD ||
N->getOpcode() == ISD::ATOMIC_STORE;
}
@@ -1467,14 +1514,16 @@ public:
bool isSplat() const { return isSplatMask(Mask, getValueType(0)); }
- int getSplatIndex() const {
+ int getSplatIndex() const {
assert(isSplat() && "Cannot get splat index for non-splat!");
EVT VT = getValueType(0);
- for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
+ for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
if (Mask[i] >= 0)
return Mask[i];
- }
- llvm_unreachable("Splat with all undef indices?");
+
+ // We can choose any index value here and be correct because all elements
+ // are undefined. Return 0 for better potential for callers to simplify.
+ return 0;
}
static bool isSplatMask(const int *Mask, EVT VT);
@@ -1536,6 +1585,10 @@ uint64_t SDNode::getConstantOperandVal(unsigned Num) const {
return cast<ConstantSDNode>(getOperand(Num))->getZExtValue();
}
+const APInt &SDNode::getConstantOperandAPInt(unsigned Num) const {
+ return cast<ConstantSDNode>(getOperand(Num))->getAPIntValue();
+}
+
class ConstantFPSDNode : public SDNode {
friend class SelectionDAG;
@@ -1603,20 +1656,36 @@ SDValue peekThroughBitcasts(SDValue V);
/// If \p V is not a bitcasted one-use value, it is returned as-is.
SDValue peekThroughOneUseBitcasts(SDValue V);
+/// Return the non-extracted vector source operand of \p V if it exists.
+/// If \p V is not an extracted subvector, it is returned as-is.
+SDValue peekThroughExtractSubvectors(SDValue V);
+
/// Returns true if \p V is a bitwise not operation. Assumes that an all ones
/// constant is canonicalized to be operand 1.
-bool isBitwiseNot(SDValue V);
+bool isBitwiseNot(SDValue V, bool AllowUndefs = false);
/// Returns the SDNode if it is a constant splat BuildVector or constant int.
-ConstantSDNode *isConstOrConstSplat(SDValue N, bool AllowUndefs = false);
+ConstantSDNode *isConstOrConstSplat(SDValue N, bool AllowUndefs = false,
+ bool AllowTruncation = false);
+
+/// Returns the SDNode if it is a demanded constant splat BuildVector or
+/// constant int.
+ConstantSDNode *isConstOrConstSplat(SDValue N, const APInt &DemandedElts,
+ bool AllowUndefs = false,
+ bool AllowTruncation = false);
/// Returns the SDNode if it is a constant splat BuildVector or constant float.
ConstantFPSDNode *isConstOrConstSplatFP(SDValue N, bool AllowUndefs = false);
+/// Returns the SDNode if it is a demanded constant splat BuildVector or
+/// constant float.
+ConstantFPSDNode *isConstOrConstSplatFP(SDValue N, const APInt &DemandedElts,
+ bool AllowUndefs = false);
+
/// Return true if the value is a constant 0 integer or a splatted vector of
-/// a constant 0 integer (with no undefs).
+/// a constant 0 integer (with no undefs by default).
/// Build vector implicit truncation is not an issue for null values.
-bool isNullOrNullSplat(SDValue V);
+bool isNullOrNullSplat(SDValue V, bool AllowUndefs = false);
/// Return true if the value is a constant 1 integer or a splatted vector of a
/// constant 1 integer (with no undefs).
@@ -1673,6 +1742,38 @@ public:
}
};
+/// This SDNode is used for LIFETIME_START/LIFETIME_END values, which indicate
+/// the offet and size that are started/ended in the underlying FrameIndex.
+class LifetimeSDNode : public SDNode {
+ friend class SelectionDAG;
+ int64_t Size;
+ int64_t Offset; // -1 if offset is unknown.
+
+ LifetimeSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl,
+ SDVTList VTs, int64_t Size, int64_t Offset)
+ : SDNode(Opcode, Order, dl, VTs), Size(Size), Offset(Offset) {}
+public:
+ int64_t getFrameIndex() const {
+ return cast<FrameIndexSDNode>(getOperand(1))->getIndex();
+ }
+
+ bool hasOffset() const { return Offset >= 0; }
+ int64_t getOffset() const {
+ assert(hasOffset() && "offset is unknown");
+ return Offset;
+ }
+ int64_t getSize() const {
+ assert(hasOffset() && "offset is unknown");
+ return Size;
+ }
+
+ // Methods to support isa and dyn_cast
+ static bool classof(const SDNode *N) {
+ return N->getOpcode() == ISD::LIFETIME_START ||
+ N->getOpcode() == ISD::LIFETIME_END;
+ }
+};
+
class JumpTableSDNode : public SDNode {
friend class SelectionDAG;
@@ -1818,12 +1919,31 @@ public:
unsigned MinSplatBits = 0,
bool isBigEndian = false) const;
+ /// Returns the demanded splatted value or a null value if this is not a
+ /// splat.
+ ///
+ /// The DemandedElts mask indicates the elements that must be in the splat.
+ /// If passed a non-null UndefElements bitvector, it will resize it to match
+ /// the vector width and set the bits where elements are undef.
+ SDValue getSplatValue(const APInt &DemandedElts,
+ BitVector *UndefElements = nullptr) const;
+
/// Returns the splatted value or a null value if this is not a splat.
///
/// If passed a non-null UndefElements bitvector, it will resize it to match
/// the vector width and set the bits where elements are undef.
SDValue getSplatValue(BitVector *UndefElements = nullptr) const;
+ /// Returns the demanded splatted constant or null if this is not a constant
+ /// splat.
+ ///
+ /// The DemandedElts mask indicates the elements that must be in the splat.
+ /// If passed a non-null UndefElements bitvector, it will resize it to match
+ /// the vector width and set the bits where elements are undef.
+ ConstantSDNode *
+ getConstantSplatNode(const APInt &DemandedElts,
+ BitVector *UndefElements = nullptr) const;
+
/// Returns the splatted constant or null if this is not a constant
/// splat.
///
@@ -1832,6 +1952,16 @@ public:
ConstantSDNode *
getConstantSplatNode(BitVector *UndefElements = nullptr) const;
+ /// Returns the demanded splatted constant FP or null if this is not a
+ /// constant FP splat.
+ ///
+ /// The DemandedElts mask indicates the elements that must be in the splat.
+ /// If passed a non-null UndefElements bitvector, it will resize it to match
+ /// the vector width and set the bits where elements are undef.
+ ConstantFPSDNode *
+ getConstantFPSplatNode(const APInt &DemandedElts,
+ BitVector *UndefElements = nullptr) const;
+
/// Returns the splatted constant FP or null if this is not a constant
/// FP splat.
///
@@ -1956,8 +2086,10 @@ class LabelSDNode : public SDNode {
MCSymbol *Label;
- LabelSDNode(unsigned Order, const DebugLoc &dl, MCSymbol *L)
- : SDNode(ISD::EH_LABEL, Order, dl, getSDVTList(MVT::Other)), Label(L) {}
+ LabelSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl, MCSymbol *L)
+ : SDNode(Opcode, Order, dl, getSDVTList(MVT::Other)), Label(L) {
+ assert(LabelSDNode::classof(this) && "not a label opcode");
+ }
public:
MCSymbol *getLabel() const { return Label; }
@@ -2049,6 +2181,8 @@ public:
: MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {
LSBaseSDNodeBits.AddressingMode = AM;
assert(getAddressingMode() == AM && "Value truncated");
+ assert((!MMO->isAtomic() || MMO->isVolatile()) &&
+ "use an AtomicSDNode instead for non-volatile atomics");
}
const SDValue &getOffset() const {
@@ -2473,18 +2607,6 @@ namespace ISD {
cast<StoreSDNode>(N)->getAddressingMode() == ISD::UNINDEXED;
}
- /// Return true if the node is a math/logic binary operator. This corresponds
- /// to the IR function of the same name.
- inline bool isBinaryOp(const SDNode *N) {
- auto Op = N->getOpcode();
- return (Op == ISD::ADD || Op == ISD::SUB || Op == ISD::MUL ||
- Op == ISD::AND || Op == ISD::OR || Op == ISD::XOR ||
- Op == ISD::SHL || Op == ISD::SRL || Op == ISD::SRA ||
- Op == ISD::SDIV || Op == ISD::UDIV || Op == ISD::SREM ||
- Op == ISD::UREM || Op == ISD::FADD || Op == ISD::FSUB ||
- Op == ISD::FMUL || Op == ISD::FDIV || Op == ISD::FREM);
- }
-
/// Attempt to match a unary predicate against a scalar/splat constant or
/// every element of a constant BUILD_VECTOR.
/// If AllowUndef is true, then UNDEF elements will pass nullptr to Match.
@@ -2495,10 +2617,11 @@ namespace ISD {
/// Attempt to match a binary predicate against a pair of scalar/splat
/// constants or every element of a pair of constant BUILD_VECTORs.
/// If AllowUndef is true, then UNDEF elements will pass nullptr to Match.
+ /// If AllowTypeMismatch is true then RetType + ArgTypes don't need to match.
bool matchBinaryPredicate(
SDValue LHS, SDValue RHS,
std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match,
- bool AllowUndefs = false);
+ bool AllowUndefs = false, bool AllowTypeMismatch = false);
} // end namespace ISD
} // end namespace llvm
diff --git a/include/llvm/CodeGen/SelectionDAGTargetInfo.h b/include/llvm/CodeGen/SelectionDAGTargetInfo.h
index 45c1df48a5e6..6f6a9a5ae269 100644
--- a/include/llvm/CodeGen/SelectionDAGTargetInfo.h
+++ b/include/llvm/CodeGen/SelectionDAGTargetInfo.h
@@ -1,9 +1,8 @@
//==- llvm/CodeGen/SelectionDAGTargetInfo.h - SelectionDAG Info --*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -148,6 +147,14 @@ public:
return std::make_pair(SDValue(), SDValue());
}
+ virtual SDValue EmitTargetCodeForSetTag(SelectionDAG &DAG, const SDLoc &dl,
+ SDValue Chain, SDValue Addr,
+ SDValue Size,
+ MachinePointerInfo DstPtrInfo,
+ bool ZeroData) const {
+ return SDValue();
+ }
+
// Return true when the decision to generate FMA's (or FMS, FMLA etc) rather
// than FMUL and ADD is delegated to the machine combiner.
virtual bool generateFMAsInMachineCombiner(CodeGenOpt::Level OptLevel) const {
diff --git a/include/llvm/CodeGen/SlotIndexes.h b/include/llvm/CodeGen/SlotIndexes.h
index 8c8a7be459fd..2b32a4d30dff 100644
--- a/include/llvm/CodeGen/SlotIndexes.h
+++ b/include/llvm/CodeGen/SlotIndexes.h
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/SlotIndexes.h - Slot indexes representation -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -302,8 +301,6 @@ class raw_ostream;
}
};
- template <> struct isPodLike<SlotIndex> { static const bool value = true; };
-
inline raw_ostream& operator<<(raw_ostream &os, SlotIndex li) {
li.print(os);
return os;
@@ -311,20 +308,6 @@ class raw_ostream;
using IdxMBBPair = std::pair<SlotIndex, MachineBasicBlock *>;
- inline bool operator<(SlotIndex V, const IdxMBBPair &IM) {
- return V < IM.first;
- }
-
- inline bool operator<(const IdxMBBPair &IM, SlotIndex V) {
- return IM.first < V;
- }
-
- struct Idx2MBBCompare {
- bool operator()(const IdxMBBPair &LHS, const IdxMBBPair &RHS) const {
- return LHS.first < RHS.first;
- }
- };
-
/// SlotIndexes pass.
///
/// This pass assigns indexes to each instruction.
@@ -336,10 +319,6 @@ class raw_ostream;
using IndexList = ilist<IndexListEntry>;
IndexList indexList;
-#ifdef EXPENSIVE_CHECKS
- IndexList graveyardList;
-#endif // EXPENSIVE_CHECKS
-
MachineFunction *mf;
using Mi2IndexMap = DenseMap<const MachineInstr *, SlotIndex>;
@@ -368,7 +347,7 @@ class raw_ostream;
public:
static char ID;
- SlotIndexes() : MachineFunctionPass(ID) {
+ SlotIndexes() : MachineFunctionPass(ID), mf(nullptr) {
initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
}
@@ -385,9 +364,6 @@ class raw_ostream;
/// Dump the indexes.
void dump() const;
- /// Renumber the index list, providing space for new instructions.
- void renumberIndexes();
-
/// Repair indexes after adding and removing instructions.
void repairIndexesInRange(MachineBasicBlock *MBB,
MachineBasicBlock::iterator Begin,
@@ -516,7 +492,9 @@ class raw_ostream;
/// Move iterator to the next IdxMBBPair where the SlotIndex is greater or
/// equal to \p To.
MBBIndexIterator advanceMBBIndex(MBBIndexIterator I, SlotIndex To) const {
- return std::lower_bound(I, idx2MBBMap.end(), To);
+ return std::partition_point(
+ I, idx2MBBMap.end(),
+ [=](const IdxMBBPair &IM) { return IM.first < To; });
}
/// Get an iterator pointing to the IdxMBBPair with the biggest SlotIndex
@@ -552,29 +530,6 @@ class raw_ostream;
return J->second;
}
- /// Returns the MBB covering the given range, or null if the range covers
- /// more than one basic block.
- MachineBasicBlock* getMBBCoveringRange(SlotIndex start, SlotIndex end) const {
-
- assert(start < end && "Backwards ranges not allowed.");
- MBBIndexIterator itr = findMBBIndex(start);
- if (itr == MBBIndexEnd()) {
- itr = std::prev(itr);
- return itr->second;
- }
-
- // Check that we don't cross the boundary into this block.
- if (itr->first < end)
- return nullptr;
-
- itr = std::prev(itr);
-
- if (itr->first <= start)
- return itr->second;
-
- return nullptr;
- }
-
/// Insert the given machine instruction into the mapping. Returns the
/// assigned index.
/// If Late is set and there are null indexes between mi's neighboring
@@ -680,33 +635,7 @@ class raw_ostream;
idx2MBBMap.push_back(IdxMBBPair(startIdx, mbb));
renumberIndexes(newItr);
- llvm::sort(idx2MBBMap, Idx2MBBCompare());
- }
-
- /// Free the resources that were required to maintain a SlotIndex.
- ///
- /// Once an index is no longer needed (for instance because the instruction
- /// at that index has been moved), the resources required to maintain the
- /// index can be relinquished to reduce memory use and improve renumbering
- /// performance. Any remaining SlotIndex objects that point to the same
- /// index are left 'dangling' (much the same as a dangling pointer to a
- /// freed object) and should not be accessed, except to destruct them.
- ///
- /// Like dangling pointers, access to dangling SlotIndexes can cause
- /// painful-to-track-down bugs, especially if the memory for the index
- /// previously pointed to has been re-used. To detect dangling SlotIndex
- /// bugs, build with EXPENSIVE_CHECKS=1. This will cause "erased" indexes to
- /// be retained in a graveyard instead of being freed. Operations on indexes
- /// in the graveyard will trigger an assertion.
- void eraseIndex(SlotIndex index) {
- IndexListEntry *entry = index.listEntry();
-#ifdef EXPENSIVE_CHECKS
- indexList.remove(entry);
- graveyardList.push_back(entry);
- entry->setPoison();
-#else
- indexList.erase(entry);
-#endif
+ llvm::sort(idx2MBBMap, less_first());
}
};
diff --git a/include/llvm/CodeGen/StackMaps.h b/include/llvm/CodeGen/StackMaps.h
index 8be9ae378557..d7d88de6f682 100644
--- a/include/llvm/CodeGen/StackMaps.h
+++ b/include/llvm/CodeGen/StackMaps.h
@@ -1,9 +1,8 @@
//===- StackMaps.h - StackMaps ----------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/CodeGen/StackProtector.h b/include/llvm/CodeGen/StackProtector.h
index a506ac636a17..2bdf4425e24a 100644
--- a/include/llvm/CodeGen/StackProtector.h
+++ b/include/llvm/CodeGen/StackProtector.h
@@ -1,9 +1,8 @@
//===- StackProtector.h - Stack Protector Insertion -------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -62,12 +61,6 @@ private:
/// protection when -fstack-protection is used.
unsigned SSPBufferSize = 0;
- /// VisitedPHIs - The set of PHI nodes visited when determining
- /// if a variable's reference has been taken. This set
- /// is maintained to ensure we don't visit the same PHI node multiple
- /// times.
- SmallPtrSet<const PHINode *, 16> VisitedPHIs;
-
// A prologue is generated.
bool HasPrologue = false;
diff --git a/include/llvm/CodeGen/SwiftErrorValueTracking.h b/include/llvm/CodeGen/SwiftErrorValueTracking.h
new file mode 100644
index 000000000000..fb7a12853c09
--- /dev/null
+++ b/include/llvm/CodeGen/SwiftErrorValueTracking.h
@@ -0,0 +1,110 @@
+//===- SwiftErrorValueTracking.h - Track swifterror VReg vals --*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements a limited mem2reg-like analysis to promote uses of function
+// arguments and allocas marked with swiftalloc from memory into virtual
+// registers tracked by this class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SWIFTERRORVALUETRACKING_H
+#define SWIFTERRORVALUETRACKING_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/Register.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/DebugLoc.h"
+#include <functional>
+#include <type_traits>
+#include <utility>
+
+
+namespace llvm {
+ class Function;
+ class MachineBasicBlock;
+ class MachineFunction;
+ class MachineInstr;
+ class TargetInstrInfo;
+ class TargetLowering;
+
+class SwiftErrorValueTracking {
+ // Some useful objects to reduce the number of function arguments needed.
+ MachineFunction *MF;
+ const Function *Fn;
+ const TargetLowering *TLI;
+ const TargetInstrInfo *TII;
+
+ /// A map from swifterror value in a basic block to the virtual register it is
+ /// currently represented by.
+ DenseMap<std::pair<const MachineBasicBlock *, const Value *>, Register>
+ VRegDefMap;
+
+ /// A list of upward exposed vreg uses that need to be satisfied by either a
+ /// copy def or a phi node at the beginning of the basic block representing
+ /// the predecessor(s) swifterror value.
+ DenseMap<std::pair<const MachineBasicBlock *, const Value *>, Register>
+ VRegUpwardsUse;
+
+ /// A map from instructions that define/use a swifterror value to the virtual
+ /// register that represents that def/use.
+ llvm::DenseMap<PointerIntPair<const Instruction *, 1, bool>, Register>
+ VRegDefUses;
+
+ /// The swifterror argument of the current function.
+ const Value *SwiftErrorArg;
+
+ using SwiftErrorValues = SmallVector<const Value*, 1>;
+ /// A function can only have a single swifterror argument. And if it does
+ /// have a swifterror argument, it must be the first entry in
+ /// SwiftErrorVals.
+ SwiftErrorValues SwiftErrorVals;
+
+public:
+ /// Initialize data structures for specified new function.
+ void setFunction(MachineFunction &MF);
+
+ /// Get the (unique) function argument that was marked swifterror, or nullptr
+ /// if this function has no swifterror args.
+ const Value *getFunctionArg() const {
+ return SwiftErrorArg;
+ }
+
+ /// Get or create the swifterror value virtual register in
+ /// VRegDefMap for this basic block.
+ Register getOrCreateVReg(const MachineBasicBlock *, const Value *);
+
+ /// Set the swifterror virtual register in the VRegDefMap for this
+ /// basic block.
+ void setCurrentVReg(const MachineBasicBlock *MBB, const Value *, Register);
+
+ /// Get or create the swifterror value virtual register for a def of a
+ /// swifterror by an instruction.
+ Register getOrCreateVRegDefAt(const Instruction *, const MachineBasicBlock *,
+ const Value *);
+
+ /// Get or create the swifterror value virtual register for a use of a
+ /// swifterror by an instruction.
+ Register getOrCreateVRegUseAt(const Instruction *, const MachineBasicBlock *,
+ const Value *);
+
+ /// Create initial definitions of swifterror values in the entry block of the
+ /// current function.
+ bool createEntriesInEntryBlock(DebugLoc DbgLoc);
+
+ /// Propagate assigned swifterror vregs through a function, synthesizing PHI
+ /// nodes when needed to maintain consistency.
+ void propagateVRegs();
+
+ void preassignVRegs(MachineBasicBlock *MBB, BasicBlock::const_iterator Begin,
+ BasicBlock::const_iterator End);
+};
+
+}
+
+#endif
diff --git a/include/llvm/CodeGen/SwitchLoweringUtils.h b/include/llvm/CodeGen/SwitchLoweringUtils.h
new file mode 100644
index 000000000000..62134dc792f7
--- /dev/null
+++ b/include/llvm/CodeGen/SwitchLoweringUtils.h
@@ -0,0 +1,297 @@
+//===- SwitchLoweringUtils.h - Switch Lowering ------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_SWITCHLOWERINGUTILS_H
+#define LLVM_CODEGEN_SWITCHLOWERINGUTILS_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/Support/BranchProbability.h"
+
+namespace llvm {
+
+class FunctionLoweringInfo;
+class MachineBasicBlock;
+
+namespace SwitchCG {
+
+enum CaseClusterKind {
+ /// A cluster of adjacent case labels with the same destination, or just one
+ /// case.
+ CC_Range,
+ /// A cluster of cases suitable for jump table lowering.
+ CC_JumpTable,
+ /// A cluster of cases suitable for bit test lowering.
+ CC_BitTests
+};
+
+/// A cluster of case labels.
+struct CaseCluster {
+ CaseClusterKind Kind;
+ const ConstantInt *Low, *High;
+ union {
+ MachineBasicBlock *MBB;
+ unsigned JTCasesIndex;
+ unsigned BTCasesIndex;
+ };
+ BranchProbability Prob;
+
+ static CaseCluster range(const ConstantInt *Low, const ConstantInt *High,
+ MachineBasicBlock *MBB, BranchProbability Prob) {
+ CaseCluster C;
+ C.Kind = CC_Range;
+ C.Low = Low;
+ C.High = High;
+ C.MBB = MBB;
+ C.Prob = Prob;
+ return C;
+ }
+
+ static CaseCluster jumpTable(const ConstantInt *Low, const ConstantInt *High,
+ unsigned JTCasesIndex, BranchProbability Prob) {
+ CaseCluster C;
+ C.Kind = CC_JumpTable;
+ C.Low = Low;
+ C.High = High;
+ C.JTCasesIndex = JTCasesIndex;
+ C.Prob = Prob;
+ return C;
+ }
+
+ static CaseCluster bitTests(const ConstantInt *Low, const ConstantInt *High,
+ unsigned BTCasesIndex, BranchProbability Prob) {
+ CaseCluster C;
+ C.Kind = CC_BitTests;
+ C.Low = Low;
+ C.High = High;
+ C.BTCasesIndex = BTCasesIndex;
+ C.Prob = Prob;
+ return C;
+ }
+};
+
+using CaseClusterVector = std::vector<CaseCluster>;
+using CaseClusterIt = CaseClusterVector::iterator;
+
+/// Sort Clusters and merge adjacent cases.
+void sortAndRangeify(CaseClusterVector &Clusters);
+
+struct CaseBits {
+ uint64_t Mask = 0;
+ MachineBasicBlock *BB = nullptr;
+ unsigned Bits = 0;
+ BranchProbability ExtraProb;
+
+ CaseBits() = default;
+ CaseBits(uint64_t mask, MachineBasicBlock *bb, unsigned bits,
+ BranchProbability Prob)
+ : Mask(mask), BB(bb), Bits(bits), ExtraProb(Prob) {}
+};
+
+using CaseBitsVector = std::vector<CaseBits>;
+
+/// This structure is used to communicate between SelectionDAGBuilder and
+/// SDISel for the code generation of additional basic blocks needed by
+/// multi-case switch statements.
+struct CaseBlock {
+ // For the GISel interface.
+ struct PredInfoPair {
+ CmpInst::Predicate Pred;
+ // Set when no comparison should be emitted.
+ bool NoCmp;
+ };
+ union {
+ // The condition code to use for the case block's setcc node.
+ // Besides the integer condition codes, this can also be SETTRUE, in which
+ // case no comparison gets emitted.
+ ISD::CondCode CC;
+ struct PredInfoPair PredInfo;
+ };
+
+ // The LHS/MHS/RHS of the comparison to emit.
+ // Emit by default LHS op RHS. MHS is used for range comparisons:
+ // If MHS is not null: (LHS <= MHS) and (MHS <= RHS).
+ const Value *CmpLHS, *CmpMHS, *CmpRHS;
+
+ // The block to branch to if the setcc is true/false.
+ MachineBasicBlock *TrueBB, *FalseBB;
+
+ // The block into which to emit the code for the setcc and branches.
+ MachineBasicBlock *ThisBB;
+
+ /// The debug location of the instruction this CaseBlock was
+ /// produced from.
+ SDLoc DL;
+ DebugLoc DbgLoc;
+
+ // Branch weights.
+ BranchProbability TrueProb, FalseProb;
+
+ // Constructor for SelectionDAG.
+ CaseBlock(ISD::CondCode cc, const Value *cmplhs, const Value *cmprhs,
+ const Value *cmpmiddle, MachineBasicBlock *truebb,
+ MachineBasicBlock *falsebb, MachineBasicBlock *me, SDLoc dl,
+ BranchProbability trueprob = BranchProbability::getUnknown(),
+ BranchProbability falseprob = BranchProbability::getUnknown())
+ : CC(cc), CmpLHS(cmplhs), CmpMHS(cmpmiddle), CmpRHS(cmprhs),
+ TrueBB(truebb), FalseBB(falsebb), ThisBB(me), DL(dl),
+ TrueProb(trueprob), FalseProb(falseprob) {}
+
+ // Constructor for GISel.
+ CaseBlock(CmpInst::Predicate pred, bool nocmp, const Value *cmplhs,
+ const Value *cmprhs, const Value *cmpmiddle,
+ MachineBasicBlock *truebb, MachineBasicBlock *falsebb,
+ MachineBasicBlock *me, DebugLoc dl,
+ BranchProbability trueprob = BranchProbability::getUnknown(),
+ BranchProbability falseprob = BranchProbability::getUnknown())
+ : PredInfo({pred, nocmp}), CmpLHS(cmplhs), CmpMHS(cmpmiddle),
+ CmpRHS(cmprhs), TrueBB(truebb), FalseBB(falsebb), ThisBB(me),
+ DbgLoc(dl), TrueProb(trueprob), FalseProb(falseprob) {}
+};
+
+struct JumpTable {
+ /// The virtual register containing the index of the jump table entry
+ /// to jump to.
+ unsigned Reg;
+ /// The JumpTableIndex for this jump table in the function.
+ unsigned JTI;
+ /// The MBB into which to emit the code for the indirect jump.
+ MachineBasicBlock *MBB;
+ /// The MBB of the default bb, which is a successor of the range
+ /// check MBB. This is when updating PHI nodes in successors.
+ MachineBasicBlock *Default;
+
+ JumpTable(unsigned R, unsigned J, MachineBasicBlock *M, MachineBasicBlock *D)
+ : Reg(R), JTI(J), MBB(M), Default(D) {}
+};
+struct JumpTableHeader {
+ APInt First;
+ APInt Last;
+ const Value *SValue;
+ MachineBasicBlock *HeaderBB;
+ bool Emitted;
+ bool OmitRangeCheck;
+
+ JumpTableHeader(APInt F, APInt L, const Value *SV, MachineBasicBlock *H,
+ bool E = false)
+ : First(std::move(F)), Last(std::move(L)), SValue(SV), HeaderBB(H),
+ Emitted(E), OmitRangeCheck(false) {}
+};
+using JumpTableBlock = std::pair<JumpTableHeader, JumpTable>;
+
+struct BitTestCase {
+ uint64_t Mask;
+ MachineBasicBlock *ThisBB;
+ MachineBasicBlock *TargetBB;
+ BranchProbability ExtraProb;
+
+ BitTestCase(uint64_t M, MachineBasicBlock *T, MachineBasicBlock *Tr,
+ BranchProbability Prob)
+ : Mask(M), ThisBB(T), TargetBB(Tr), ExtraProb(Prob) {}
+};
+
+using BitTestInfo = SmallVector<BitTestCase, 3>;
+
+struct BitTestBlock {
+ APInt First;
+ APInt Range;
+ const Value *SValue;
+ unsigned Reg;
+ MVT RegVT;
+ bool Emitted;
+ bool ContiguousRange;
+ MachineBasicBlock *Parent;
+ MachineBasicBlock *Default;
+ BitTestInfo Cases;
+ BranchProbability Prob;
+ BranchProbability DefaultProb;
+
+ BitTestBlock(APInt F, APInt R, const Value *SV, unsigned Rg, MVT RgVT, bool E,
+ bool CR, MachineBasicBlock *P, MachineBasicBlock *D,
+ BitTestInfo C, BranchProbability Pr)
+ : First(std::move(F)), Range(std::move(R)), SValue(SV), Reg(Rg),
+ RegVT(RgVT), Emitted(E), ContiguousRange(CR), Parent(P), Default(D),
+ Cases(std::move(C)), Prob(Pr) {}
+};
+
+/// Return the range of value within a range.
+uint64_t getJumpTableRange(const CaseClusterVector &Clusters, unsigned First,
+ unsigned Last);
+
+/// Return the number of cases within a range.
+uint64_t getJumpTableNumCases(const SmallVectorImpl<unsigned> &TotalCases,
+ unsigned First, unsigned Last);
+
+struct SwitchWorkListItem {
+ MachineBasicBlock *MBB;
+ CaseClusterIt FirstCluster;
+ CaseClusterIt LastCluster;
+ const ConstantInt *GE;
+ const ConstantInt *LT;
+ BranchProbability DefaultProb;
+};
+using SwitchWorkList = SmallVector<SwitchWorkListItem, 4>;
+
+class SwitchLowering {
+public:
+ SwitchLowering(FunctionLoweringInfo &funcinfo) : FuncInfo(funcinfo) {}
+
+ void init(const TargetLowering &tli, const TargetMachine &tm,
+ const DataLayout &dl) {
+ TLI = &tli;
+ TM = &tm;
+ DL = &dl;
+ }
+
+ /// Vector of CaseBlock structures used to communicate SwitchInst code
+ /// generation information.
+ std::vector<CaseBlock> SwitchCases;
+
+ /// Vector of JumpTable structures used to communicate SwitchInst code
+ /// generation information.
+ std::vector<JumpTableBlock> JTCases;
+
+ /// Vector of BitTestBlock structures used to communicate SwitchInst code
+ /// generation information.
+ std::vector<BitTestBlock> BitTestCases;
+
+ void findJumpTables(CaseClusterVector &Clusters, const SwitchInst *SI,
+ MachineBasicBlock *DefaultMBB);
+
+ bool buildJumpTable(const CaseClusterVector &Clusters, unsigned First,
+ unsigned Last, const SwitchInst *SI,
+ MachineBasicBlock *DefaultMBB, CaseCluster &JTCluster);
+
+
+ void findBitTestClusters(CaseClusterVector &Clusters, const SwitchInst *SI);
+
+ /// Build a bit test cluster from Clusters[First..Last]. Returns false if it
+ /// decides it's not a good idea.
+ bool buildBitTests(CaseClusterVector &Clusters, unsigned First, unsigned Last,
+ const SwitchInst *SI, CaseCluster &BTCluster);
+
+ virtual void addSuccessorWithProb(
+ MachineBasicBlock *Src, MachineBasicBlock *Dst,
+ BranchProbability Prob = BranchProbability::getUnknown()) = 0;
+
+ virtual ~SwitchLowering() = default;
+
+private:
+ const TargetLowering *TLI;
+ const TargetMachine *TM;
+ const DataLayout *DL;
+ FunctionLoweringInfo &FuncInfo;
+};
+
+} // namespace SwitchCG
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_SWITCHLOWERINGUTILS_H
+
diff --git a/include/llvm/CodeGen/TailDuplicator.h b/include/llvm/CodeGen/TailDuplicator.h
index be6562c85f2e..358798d5ed60 100644
--- a/include/llvm/CodeGen/TailDuplicator.h
+++ b/include/llvm/CodeGen/TailDuplicator.h
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/TailDuplicator.h ----------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/CodeGen/TargetCallingConv.h b/include/llvm/CodeGen/TargetCallingConv.h
index 7d138f585171..aebeeecbe506 100644
--- a/include/llvm/CodeGen/TargetCallingConv.h
+++ b/include/llvm/CodeGen/TargetCallingConv.h
@@ -1,9 +1,8 @@
//===-- llvm/CodeGen/TargetCallingConv.h - Calling Convention ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -46,9 +45,12 @@ namespace ISD {
unsigned IsInConsecutiveRegsLast : 1;
unsigned IsInConsecutiveRegs : 1;
unsigned IsCopyElisionCandidate : 1; ///< Argument copy elision candidate
+ unsigned IsPointer : 1;
unsigned ByValSize; ///< Byval struct size
+ unsigned PointerAddrSpace; ///< Address space of pointer argument
+
public:
ArgFlagsTy()
: IsZExt(0), IsSExt(0), IsInReg(0), IsSRet(0), IsByVal(0), IsNest(0),
@@ -56,8 +58,9 @@ namespace ISD {
IsSwiftSelf(0), IsSwiftError(0), IsHva(0), IsHvaStart(0),
IsSecArgPass(0), ByValAlign(0), OrigAlign(0),
IsInConsecutiveRegsLast(0), IsInConsecutiveRegs(0),
- IsCopyElisionCandidate(0), ByValSize(0) {
- static_assert(sizeof(*this) == 2 * sizeof(unsigned), "flags are too big");
+ IsCopyElisionCandidate(0), IsPointer(0), ByValSize(0),
+ PointerAddrSpace(0) {
+ static_assert(sizeof(*this) == 3 * sizeof(unsigned), "flags are too big");
}
bool isZExt() const { return IsZExt; }
@@ -114,6 +117,9 @@ namespace ISD {
bool isCopyElisionCandidate() const { return IsCopyElisionCandidate; }
void setCopyElisionCandidate() { IsCopyElisionCandidate = 1; }
+ bool isPointer() const { return IsPointer; }
+ void setPointer() { IsPointer = 1; }
+
unsigned getByValAlign() const { return (1U << ByValAlign) / 2; }
void setByValAlign(unsigned A) {
ByValAlign = Log2_32(A) + 1;
@@ -128,7 +134,10 @@ namespace ISD {
unsigned getByValSize() const { return ByValSize; }
void setByValSize(unsigned S) { ByValSize = S; }
- };
+
+ unsigned getPointerAddrSpace() const { return PointerAddrSpace; }
+ void setPointerAddrSpace(unsigned AS) { PointerAddrSpace = AS; }
+};
/// InputArg - This struct carries flags and type information about a
/// single incoming (formal) argument or incoming (from the perspective
diff --git a/include/llvm/CodeGen/TargetFrameLowering.h b/include/llvm/CodeGen/TargetFrameLowering.h
index b4d1da941433..878c9ffd2b51 100644
--- a/include/llvm/CodeGen/TargetFrameLowering.h
+++ b/include/llvm/CodeGen/TargetFrameLowering.h
@@ -1,9 +1,8 @@
//===-- llvm/CodeGen/TargetFrameLowering.h ----------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -15,6 +14,7 @@
#define LLVM_CODEGEN_TARGETFRAMELOWERING_H
#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/ADT/StringSwitch.h"
#include <utility>
#include <vector>
@@ -24,6 +24,14 @@ namespace llvm {
class MachineFunction;
class RegScavenger;
+namespace TargetStackID {
+ enum Value {
+ Default = 0,
+ SGPRSpill = 1,
+ NoAlloc = 255
+ };
+}
+
/// Information about stack frame layout on the target. It holds the direction
/// of stack growth, the known stack alignment on entry to each function, and
/// the offset to the locals area.
@@ -262,6 +270,17 @@ public:
return getFrameIndexReference(MF, FI, FrameReg);
}
+ /// getNonLocalFrameIndexReference - This method returns the offset used to
+ /// reference a frame index location. The offset can be from either FP/BP/SP
+ /// based on which base register is returned by llvm.localaddress.
+ virtual int getNonLocalFrameIndexReference(const MachineFunction &MF,
+ int FI) const {
+ // By default, dispatch to getFrameIndexReference. Interested targets can
+ // override this.
+ unsigned FrameReg;
+ return getFrameIndexReference(MF, FI, FrameReg);
+ }
+
/// This method determines which of the registers reported by
/// TargetRegisterInfo::getCalleeSavedRegs() should actually get saved.
/// The default implementation checks populates the \p SavedRegs bitset with
@@ -335,6 +354,16 @@ public:
return true;
}
+ virtual bool isSupportedStackID(TargetStackID::Value ID) const {
+ switch (ID) {
+ default:
+ return false;
+ case TargetStackID::Default:
+ case TargetStackID::NoAlloc:
+ return true;
+ }
+ }
+
/// Check if given function is safe for not having callee saved registers.
/// This is used when interprocedural register allocation is enabled.
static bool isSafeForNoCSROpt(const Function &F) {
diff --git a/include/llvm/CodeGen/TargetInstrInfo.h b/include/llvm/CodeGen/TargetInstrInfo.h
index 961b90e9bc12..25b04f8c019a 100644
--- a/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/include/llvm/CodeGen/TargetInstrInfo.h
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/TargetInstrInfo.h - Instruction Info --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -27,6 +26,7 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineOutliner.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/VirtRegMap.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/ErrorHandling.h"
@@ -81,6 +81,7 @@ public:
/// Given a machine instruction descriptor, returns the register
/// class constraint for OpNum, or NULL.
+ virtual
const TargetRegisterClass *getRegClass(const MCInstrDesc &MCID, unsigned OpNum,
const TargetRegisterInfo *TRI,
const MachineFunction &MF) const;
@@ -429,6 +430,13 @@ public:
RegSubRegPair(unsigned Reg = 0, unsigned SubReg = 0)
: Reg(Reg), SubReg(SubReg) {}
+
+ bool operator==(const RegSubRegPair& P) const {
+ return Reg == P.Reg && SubReg == P.SubReg;
+ }
+ bool operator!=(const RegSubRegPair& P) const {
+ return !(*this == P);
+ }
};
/// A pair composed of a pair of a register and a sub-register index,
@@ -663,8 +671,9 @@ public:
/// is finished. Return the value/register of the new loop count. We need
/// this function when peeling off one or more iterations of a loop. This
/// function assumes the nth iteration is peeled first.
- virtual unsigned reduceLoopCount(MachineBasicBlock &MBB, MachineInstr *IndVar,
- MachineInstr &Cmp,
+ virtual unsigned reduceLoopCount(MachineBasicBlock &MBB,
+ MachineBasicBlock &PreHeader,
+ MachineInstr *IndVar, MachineInstr &Cmp,
SmallVectorImpl<MachineOperand> &Cond,
SmallVectorImpl<MachineInstr *> &PrevInsts,
unsigned Iter, unsigned MaxIter) const {
@@ -926,9 +935,12 @@ public:
/// operand folded, otherwise NULL is returned.
/// The new instruction is inserted before MI, and the client is responsible
/// for removing the old instruction.
+ /// If VRM is passed, the assigned physregs can be inspected by target to
+ /// decide on using an opcode (note that those assignments can still change).
MachineInstr *foldMemoryOperand(MachineInstr &MI, ArrayRef<unsigned> Ops,
int FI,
- LiveIntervals *LIS = nullptr) const;
+ LiveIntervals *LIS = nullptr,
+ VirtRegMap *VRM = nullptr) const;
/// Same as the previous version except it allows folding of any load and
/// store from / to any address, not just from a specific stack slot.
@@ -1018,7 +1030,8 @@ protected:
foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
ArrayRef<unsigned> Ops,
MachineBasicBlock::iterator InsertPt, int FrameIndex,
- LiveIntervals *LIS = nullptr) const {
+ LiveIntervals *LIS = nullptr,
+ VirtRegMap *VRM = nullptr) const {
return nullptr;
}
@@ -1138,8 +1151,9 @@ public:
/// Get the base operand and byte offset of an instruction that reads/writes
/// memory.
- virtual bool getMemOperandWithOffset(MachineInstr &MI,
- MachineOperand *&BaseOp, int64_t &Offset,
+ virtual bool getMemOperandWithOffset(const MachineInstr &MI,
+ const MachineOperand *&BaseOp,
+ int64_t &Offset,
const TargetRegisterInfo *TRI) const {
return false;
}
@@ -1164,8 +1178,8 @@ public:
/// or
/// DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
/// to TargetPassConfig::createMachineScheduler() to have an effect.
- virtual bool shouldClusterMemOps(MachineOperand &BaseOp1,
- MachineOperand &BaseOp2,
+ virtual bool shouldClusterMemOps(const MachineOperand &BaseOp1,
+ const MachineOperand &BaseOp2,
unsigned NumLoads) const {
llvm_unreachable("target did not implement shouldClusterMemOps()");
}
@@ -1253,8 +1267,9 @@ public:
/// Measure the specified inline asm to determine an approximation of its
/// length.
- virtual unsigned getInlineAsmLength(const char *Str,
- const MCAsmInfo &MAI) const;
+ virtual unsigned getInlineAsmLength(
+ const char *Str, const MCAsmInfo &MAI,
+ const TargetSubtargetInfo *STI = nullptr) const;
/// Allocate and return a hazard recognizer to use for this target when
/// scheduling the machine instructions before register allocation.
@@ -1542,7 +1557,8 @@ public:
/// See also MachineInstr::mayAlias, which is implemented on top of this
/// function.
virtual bool
- areMemAccessesTriviallyDisjoint(MachineInstr &MIa, MachineInstr &MIb,
+ areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
+ const MachineInstr &MIb,
AliasAnalysis *AA = nullptr) const {
assert((MIa.mayLoad() || MIa.mayStore()) &&
"MIa must load from or modify a memory location");
diff --git a/include/llvm/CodeGen/TargetLowering.h b/include/llvm/CodeGen/TargetLowering.h
index 23dbaac03ebe..d5cca60bb1b2 100644
--- a/include/llvm/CodeGen/TargetLowering.h
+++ b/include/llvm/CodeGen/TargetLowering.h
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/TargetLowering.h - Target Lowering Info -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -189,13 +188,18 @@ public:
bool IsSwiftSelf : 1;
bool IsSwiftError : 1;
uint16_t Alignment = 0;
+ Type *ByValType = nullptr;
ArgListEntry()
: IsSExt(false), IsZExt(false), IsInReg(false), IsSRet(false),
IsNest(false), IsByVal(false), IsInAlloca(false), IsReturned(false),
IsSwiftSelf(false), IsSwiftError(false) {}
- void setAttributes(ImmutableCallSite *CS, unsigned ArgIdx);
+ void setAttributes(const CallBase *Call, unsigned ArgIdx);
+
+ void setAttributes(ImmutableCallSite *CS, unsigned ArgIdx) {
+ return setAttributes(cast<CallBase>(CS->getInstruction()), ArgIdx);
+ }
};
using ArgListTy = std::vector<ArgListEntry>;
@@ -235,7 +239,14 @@ public:
/// Return the pointer type for the given address space, defaults to
/// the pointer type from the data layout.
/// FIXME: The default needs to be removed once all the code is updated.
- MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const {
+ virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const {
+ return MVT::getIntegerVT(DL.getPointerSizeInBits(AS));
+ }
+
+ /// Return the in-memory pointer type for the given address space, defaults to
+ /// the pointer type from the data layout. FIXME: The default needs to be
+ /// removed once all the code is updated.
+ MVT getPointerMemTy(const DataLayout &DL, uint32_t AS = 0) const {
return MVT::getIntegerVT(DL.getPointerSizeInBits(AS));
}
@@ -291,6 +302,9 @@ public:
// The default action for one element vectors is to scalarize
if (VT.getVectorNumElements() == 1)
return TypeScalarizeVector;
+ // The default action for an odd-width vector is to widen.
+ if (!VT.isPow2VectorType())
+ return TypeWidenVector;
// The default action for other vectors is to promote
return TypePromoteInteger;
}
@@ -387,8 +401,9 @@ public:
/// efficiently, casting the load to a smaller vector of larger types and
/// loading is more efficient, however, this can be undone by optimizations in
/// dag combiner.
- virtual bool isLoadBitCastBeneficial(EVT LoadVT,
- EVT BitcastVT) const {
+ virtual bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
+ const SelectionDAG &DAG,
+ const MachineMemOperand &MMO) const {
// Don't do if we could do an indexed load on the original type, but not on
// the new one.
if (!LoadVT.isSimple() || !BitcastVT.isSimple())
@@ -402,14 +417,18 @@ public:
getTypeToPromoteTo(ISD::LOAD, LoadMVT) == BitcastVT.getSimpleVT())
return false;
- return true;
+ bool Fast = false;
+ return allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), BitcastVT,
+ MMO, &Fast) && Fast;
}
/// Return true if the following transform is beneficial:
/// (store (y (conv x)), y*)) -> (store x, (x*))
- virtual bool isStoreBitCastBeneficial(EVT StoreVT, EVT BitcastVT) const {
+ virtual bool isStoreBitCastBeneficial(EVT StoreVT, EVT BitcastVT,
+ const SelectionDAG &DAG,
+ const MachineMemOperand &MMO) const {
// Default to the same logic as loads.
- return isLoadBitCastBeneficial(StoreVT, BitcastVT);
+ return isLoadBitCastBeneficial(StoreVT, BitcastVT, DAG, MMO);
}
/// Return true if it is expected to be cheaper to do a store of a non-zero
@@ -421,10 +440,12 @@ public:
return false;
}
- /// Allow store merging after legalization in addition to before legalization.
- /// This may catch stores that do not exist earlier (eg, stores created from
- /// intrinsics).
- virtual bool mergeStoresAfterLegalization() const { return true; }
+ /// Allow store merging for the specified type after legalization in addition
+ /// to before legalization. This may transform stores that do not exist
+ /// earlier (for example, stores created from intrinsics).
+ virtual bool mergeStoresAfterLegalization(EVT MemVT) const {
+ return true;
+ }
/// Returns if it's reasonable to merge stores to MemVT size.
virtual bool canMergeStoresTo(unsigned AS, EVT MemVT,
@@ -521,13 +542,22 @@ public:
/// There are two ways to clear extreme bits (either low or high):
/// Mask: x & (-1 << y) (the instcombine canonical form)
/// Shifts: x >> y << y
- /// Return true if the variant with 2 shifts is preferred.
+ /// Return true if the variant with 2 variable shifts is preferred.
/// Return false if there is no preference.
- virtual bool preferShiftsToClearExtremeBits(SDValue X) const {
+ virtual bool shouldFoldMaskToVariableShiftPair(SDValue X) const {
// By default, let's assume that no one prefers shifts.
return false;
}
+ /// Return true if it is profitable to fold a pair of shifts into a mask.
+ /// This is usually true on most targets. But some targets, like Thumb1,
+ /// have immediate shift instructions, but no immediate "and" instruction;
+ /// this makes the fold unprofitable.
+ virtual bool shouldFoldConstantShiftPairToMask(const SDNode *N,
+ CombineLevel Level) const {
+ return true;
+ }
+
/// Should we tranform the IR-optimal check for whether given truncation
/// down into KeptBits would be truncating or not:
/// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
@@ -541,6 +571,16 @@ public:
return false;
}
+ /// These two forms are equivalent:
+ /// sub %y, (xor %x, -1)
+ /// add (add %x, 1), %y
+ /// The variant with two add's is IR-canonical.
+ /// Some targets may prefer one to the other.
+ virtual bool preferIncOfAddToSubOfNot(EVT VT) const {
+ // By default, let's assume that everyone prefers the form with two add's.
+ return true;
+ }
+
/// Return true if the target wants to use the optimization that
/// turns ext(promotableInst1(...(promotableInstN(load)))) into
/// promotedInst1(...(promotedInstN(ext(load)))).
@@ -560,11 +600,6 @@ public:
return false;
}
- /// Return true if target supports floating point exceptions.
- bool hasFloatingPointExceptions() const {
- return HasFloatingPointExceptions;
- }
-
/// Return true if target always beneficiates from combining into FMA for a
/// given value type. This must typically return false on targets where FMA
/// takes more cycles to execute than FADD.
@@ -619,12 +654,21 @@ public:
/// Return the register class that should be used for the specified value
/// type.
- virtual const TargetRegisterClass *getRegClassFor(MVT VT) const {
+ virtual const TargetRegisterClass *getRegClassFor(MVT VT, bool isDivergent = false) const {
+ (void)isDivergent;
const TargetRegisterClass *RC = RegClassForVT[VT.SimpleTy];
assert(RC && "This value type is not natively supported!");
return RC;
}
+ /// Allows target to decide about the register class of the
+ /// specific value that is live outside the defining block.
+ /// Returns true if the value needs uniform register class.
+ virtual bool requiresUniformRegister(MachineFunction &MF,
+ const Value *) const {
+ return false;
+ }
+
/// Return the 'representative' register class for the specified value
/// type.
///
@@ -643,6 +687,13 @@ public:
return RepRegClassCostForVT[VT.SimpleTy];
}
+ /// Return true if SHIFT instructions should be expanded to SHIFT_PARTS
+ /// instructions, and false if a library call is preferred (e.g for code-size
+ /// reasons).
+ virtual bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const {
+ return true;
+ }
+
/// Return true if the target has native support for the specified value type.
/// This means that it has a register that directly holds it without
/// promotions or expansions.
@@ -768,7 +819,8 @@ public:
/// Returns true if the target can instruction select the specified FP
/// immediate natively. If false, the legalizer will materialize the FP
/// immediate as a load from a constant pool.
- virtual bool isFPImmLegal(const APFloat &/*Imm*/, EVT /*VT*/) const {
+ virtual bool isFPImmLegal(const APFloat & /*Imm*/, EVT /*VT*/,
+ bool ForCodeSize = false) const {
return false;
}
@@ -830,6 +882,8 @@ public:
default:
llvm_unreachable("Unexpected fixed point operation.");
case ISD::SMULFIX:
+ case ISD::SMULFIXSAT:
+ case ISD::UMULFIX:
Supported = isSupportedFixedPointOperation(Op, VT, Scale);
break;
}
@@ -865,6 +919,8 @@ public:
case ISD::STRICT_FFLOOR: EqOpc = ISD::FFLOOR; break;
case ISD::STRICT_FROUND: EqOpc = ISD::FROUND; break;
case ISD::STRICT_FTRUNC: EqOpc = ISD::FTRUNC; break;
+ case ISD::STRICT_FP_ROUND: EqOpc = ISD::FP_ROUND; break;
+ case ISD::STRICT_FP_EXTEND: EqOpc = ISD::FP_EXTEND; break;
}
auto Action = getOperationAction(EqOpc, VT);
@@ -931,21 +987,20 @@ public:
/// Return true if lowering to a jump table is suitable for a set of case
/// clusters which may contain \p NumCases cases, \p Range range of values.
- /// FIXME: This function check the maximum table size and density, but the
- /// minimum size is not checked. It would be nice if the minimum size is
- /// also combined within this function. Currently, the minimum size check is
- /// performed in findJumpTable() in SelectionDAGBuiler and
- /// getEstimatedNumberOfCaseClusters() in BasicTTIImpl.
virtual bool isSuitableForJumpTable(const SwitchInst *SI, uint64_t NumCases,
uint64_t Range) const {
- const bool OptForSize = SI->getParent()->getParent()->optForSize();
+ // FIXME: This function check the maximum table size and density, but the
+ // minimum size is not checked. It would be nice if the minimum size is
+ // also combined within this function. Currently, the minimum size check is
+ // performed in findJumpTable() in SelectionDAGBuiler and
+ // getEstimatedNumberOfCaseClusters() in BasicTTIImpl.
+ const bool OptForSize = SI->getParent()->getParent()->hasOptSize();
const unsigned MinDensity = getMinimumJumpTableDensity(OptForSize);
- const unsigned MaxJumpTableSize =
- OptForSize || getMaximumJumpTableSize() == 0
- ? UINT_MAX
- : getMaximumJumpTableSize();
- // Check whether a range of clusters is dense enough for a jump table.
- if (Range <= MaxJumpTableSize &&
+ const unsigned MaxJumpTableSize = getMaximumJumpTableSize();
+
+ // Check whether the number of cases is small enough and
+ // the range is dense enough for a jump table.
+ if ((OptForSize || Range <= MaxJumpTableSize) &&
(NumCases * 100 >= Range * MinDensity)) {
return true;
}
@@ -1140,24 +1195,42 @@ public:
EVT getValueType(const DataLayout &DL, Type *Ty,
bool AllowUnknown = false) const {
// Lower scalar pointers to native pointer types.
- if (PointerType *PTy = dyn_cast<PointerType>(Ty))
+ if (auto *PTy = dyn_cast<PointerType>(Ty))
return getPointerTy(DL, PTy->getAddressSpace());
- if (Ty->isVectorTy()) {
- VectorType *VTy = cast<VectorType>(Ty);
- Type *Elm = VTy->getElementType();
+ if (auto *VTy = dyn_cast<VectorType>(Ty)) {
+ Type *EltTy = VTy->getElementType();
// Lower vectors of pointers to native pointer types.
+ if (auto *PTy = dyn_cast<PointerType>(EltTy)) {
+ EVT PointerTy(getPointerTy(DL, PTy->getAddressSpace()));
+ EltTy = PointerTy.getTypeForEVT(Ty->getContext());
+ }
+ return EVT::getVectorVT(Ty->getContext(), EVT::getEVT(EltTy, false),
+ VTy->getNumElements());
+ }
+
+ return EVT::getEVT(Ty, AllowUnknown);
+ }
+
+ EVT getMemValueType(const DataLayout &DL, Type *Ty,
+ bool AllowUnknown = false) const {
+ // Lower scalar pointers to native pointer types.
+ if (PointerType *PTy = dyn_cast<PointerType>(Ty))
+ return getPointerMemTy(DL, PTy->getAddressSpace());
+ else if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
+ Type *Elm = VTy->getElementType();
if (PointerType *PT = dyn_cast<PointerType>(Elm)) {
- EVT PointerTy(getPointerTy(DL, PT->getAddressSpace()));
+ EVT PointerTy(getPointerMemTy(DL, PT->getAddressSpace()));
Elm = PointerTy.getTypeForEVT(Ty->getContext());
}
-
return EVT::getVectorVT(Ty->getContext(), EVT::getEVT(Elm, false),
VTy->getNumElements());
}
- return EVT::getEVT(Ty, AllowUnknown);
+
+ return getValueType(DL, Ty, AllowUnknown);
}
+
/// Return the MVT corresponding to this LLVM type. See getValueType.
MVT getSimpleValueType(const DataLayout &DL, Type *Ty,
bool AllowUnknown = false) const {
@@ -1327,18 +1400,6 @@ public:
return OptSize ? MaxLoadsPerMemcmpOptSize : MaxLoadsPerMemcmp;
}
- /// For memcmp expansion when the memcmp result is only compared equal or
- /// not-equal to 0, allow up to this number of load pairs per block. As an
- /// example, this may allow 'memcmp(a, b, 3) == 0' in a single block:
- /// a0 = load2bytes &a[0]
- /// b0 = load2bytes &b[0]
- /// a2 = load1byte &a[2]
- /// b2 = load1byte &b[2]
- /// r = cmp eq (a0 ^ b0 | a2 ^ b2), 0
- virtual unsigned getMemcmpEqZeroLoadsPerBlock() const {
- return 1;
- }
-
/// Get maximum # of store operations permitted for llvm.memmove
///
/// This function returns the maximum number of store operations permitted
@@ -1358,10 +1419,10 @@ public:
/// copy/move/set is converted to a sequence of store operations. Its use
/// helps to ensure that such replacements don't generate code that causes an
/// alignment error (trap) on the target machine.
- virtual bool allowsMisalignedMemoryAccesses(EVT,
- unsigned AddrSpace = 0,
- unsigned Align = 1,
- bool * /*Fast*/ = nullptr) const {
+ virtual bool allowsMisalignedMemoryAccesses(
+ EVT, unsigned AddrSpace = 0, unsigned Align = 1,
+ MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
+ bool * /*Fast*/ = nullptr) const {
return false;
}
@@ -1369,8 +1430,18 @@ public:
/// given address space and alignment. If the access is allowed, the optional
/// final parameter returns if the access is also fast (as defined by the
/// target).
+ bool
+ allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT,
+ unsigned AddrSpace = 0, unsigned Alignment = 1,
+ MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
+ bool *Fast = nullptr) const;
+
+ /// Return true if the target supports a memory access of this type for the
+ /// given MachineMemOperand. If the access is allowed, the optional
+ /// final parameter returns if the access is also fast (as defined by the
+ /// target).
bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT,
- unsigned AddrSpace = 0, unsigned Alignment = 1,
+ const MachineMemOperand &MMO,
bool *Fast = nullptr) const;
/// Returns the target specific optimal type for load and store operations as
@@ -1384,12 +1455,11 @@ public:
/// zero. 'MemcpyStrSrc' indicates whether the memcpy source is constant so it
/// does not need to be loaded. It returns EVT::Other if the type should be
/// determined using generic target-independent logic.
- virtual EVT getOptimalMemOpType(uint64_t /*Size*/,
- unsigned /*DstAlign*/, unsigned /*SrcAlign*/,
- bool /*IsMemset*/,
- bool /*ZeroMemset*/,
- bool /*MemcpyStrSrc*/,
- MachineFunction &/*MF*/) const {
+ virtual EVT
+ getOptimalMemOpType(uint64_t /*Size*/, unsigned /*DstAlign*/,
+ unsigned /*SrcAlign*/, bool /*IsMemset*/,
+ bool /*ZeroMemset*/, bool /*MemcpyStrSrc*/,
+ const AttributeList & /*FuncAttributes*/) const {
return MVT::Other;
}
@@ -1515,7 +1585,7 @@ public:
/// performs validation and error handling, returns the function. Otherwise,
/// returns nullptr. Must be previously inserted by insertSSPDeclarations.
/// Should be used only when getIRStackGuard returns nullptr.
- virtual Value *getSSPStackGuardCheck(const Module &M) const;
+ virtual Function *getSSPStackGuardCheck(const Module &M) const;
protected:
Value *getDefaultSafeStackPointerLocation(IRBuilder<> &IRB,
@@ -1537,8 +1607,9 @@ public:
}
/// Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g. we
- /// are happy to sink it into basic blocks.
- virtual bool isCheapAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const {
+ /// are happy to sink it into basic blocks. A cast may be free, but not
+ /// necessarily a no-op. e.g. a free truncate from a 64-bit to 32-bit pointer.
+ virtual bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const {
return isNoopAddrSpaceCast(SrcAS, DestAS);
}
@@ -1716,8 +1787,9 @@ public:
/// Returns how the IR-level AtomicExpand pass should expand the given
/// AtomicRMW, if at all. Default is to never expand.
- virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const {
- return AtomicExpansionKind::None;
+ virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
+ return RMW->isFloatingPointOperation() ?
+ AtomicExpansionKind::CmpXChg : AtomicExpansionKind::None;
}
/// On some platforms, an AtomicRMW that never actually modifies the value
@@ -1762,6 +1834,8 @@ public:
Action != TypeSplitVector;
}
+ virtual bool isProfitableToCombineMinNumMaxNum(EVT VT) const { return true; }
+
/// Return true if a select of constants (select Cond, C1, C2) should be
/// transformed into simple math ops with the condition value. For example:
/// select Cond, C1, C1-1 --> add (zext Cond), C1-1
@@ -1865,12 +1939,6 @@ protected:
/// control.
void setJumpIsExpensive(bool isExpensive = true);
- /// Tells the code generator that this target supports floating point
- /// exceptions and cares about preserving floating point exception behavior.
- void setHasFloatingPointExceptions(bool FPExceptions = true) {
- HasFloatingPointExceptions = FPExceptions;
- }
-
/// Tells the code generator which bitwidths to bypass.
void addBypassSlowDiv(unsigned int SlowBitWidth, unsigned int FastBitWidth) {
BypassSlowDivWidths[SlowBitWidth] = FastBitWidth;
@@ -2159,6 +2227,8 @@ public:
case ISD::UADDSAT:
case ISD::FMINNUM:
case ISD::FMAXNUM:
+ case ISD::FMINNUM_IEEE:
+ case ISD::FMAXNUM_IEEE:
case ISD::FMINIMUM:
case ISD::FMAXIMUM:
return true;
@@ -2166,6 +2236,30 @@ public:
}
}
+ /// Return true if the node is a math/logic binary operator.
+ virtual bool isBinOp(unsigned Opcode) const {
+ // A commutative binop must be a binop.
+ if (isCommutativeBinOp(Opcode))
+ return true;
+ // These are non-commutative binops.
+ switch (Opcode) {
+ case ISD::SUB:
+ case ISD::SHL:
+ case ISD::SRL:
+ case ISD::SRA:
+ case ISD::SDIV:
+ case ISD::UDIV:
+ case ISD::SREM:
+ case ISD::UREM:
+ case ISD::FSUB:
+ case ISD::FDIV:
+ case ISD::FREM:
+ return true;
+ default:
+ return false;
+ }
+ }
+
/// Return true if it's free to truncate a value of type FromTy to type
/// ToTy. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
/// by referencing its sub-register AX.
@@ -2270,6 +2364,16 @@ public:
return false;
}
+ /// Return true if sinking I's operands to the same basic block as I is
+ /// profitable, e.g. because the operands can be folded into a target
+ /// instruction during instruction selection. After calling the function
+ /// \p Ops contains the Uses to sink ordered by dominance (dominating users
+ /// come first).
+ virtual bool shouldSinkOperands(Instruction *I,
+ SmallVectorImpl<Use *> &Ops) const {
+ return false;
+ }
+
/// Return true if the target supplies and combines to a paired load
/// two loaded values of type LoadedType next to each other in memory.
/// RequiredAlignment gives the minimal alignment constraints that must be met
@@ -2415,6 +2519,31 @@ public:
return false;
}
+ /// Return true if extraction of a scalar element from the given vector type
+ /// at the given index is cheap. For example, if scalar operations occur on
+ /// the same register file as vector operations, then an extract element may
+ /// be a sub-register rename rather than an actual instruction.
+ virtual bool isExtractVecEltCheap(EVT VT, unsigned Index) const {
+ return false;
+ }
+
+ /// Try to convert math with an overflow comparison into the corresponding DAG
+ /// node operation. Targets may want to override this independently of whether
+ /// the operation is legal/custom for the given type because it may obscure
+ /// matching of other patterns.
+ virtual bool shouldFormOverflowOp(unsigned Opcode, EVT VT) const {
+ // TODO: The default logic is inherited from code in CodeGenPrepare.
+ // The opcode should not make a difference by default?
+ if (Opcode != ISD::UADDO)
+ return false;
+
+ // Allow the transform as long as we have an integer type that is not
+ // obviously illegal and unsupported.
+ if (VT.isVector())
+ return false;
+ return VT.isSimple() || !isOperationExpand(Opcode, VT);
+ }
+
// Return true if it is profitable to use a scalar input to a BUILD_VECTOR
// even if the vector itself has multiple uses.
virtual bool aggressivelyPreferBuildVectorSources(EVT VecVT) const {
@@ -2495,10 +2624,6 @@ private:
/// predication.
bool JumpIsExpensive;
- /// Whether the target supports or cares about preserving floating point
- /// exception behavior.
- bool HasFloatingPointExceptions;
-
/// This target prefers to use _setjmp to implement llvm.setjmp.
///
/// Defaults to false.
@@ -2834,11 +2959,10 @@ public:
/// Returns a pair of (return value, chain).
/// It is an error to pass RTLIB::UNKNOWN_LIBCALL as \p LC.
- std::pair<SDValue, SDValue> makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC,
- EVT RetVT, ArrayRef<SDValue> Ops,
- bool isSigned, const SDLoc &dl,
- bool doesNotReturn = false,
- bool isReturnValueUsed = true) const;
+ std::pair<SDValue, SDValue> makeLibCall(
+ SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef<SDValue> Ops,
+ bool isSigned, const SDLoc &dl, bool doesNotReturn = false,
+ bool isReturnValueUsed = true, bool isPostTypeLegalization = false) const;
/// Check whether parameters to a call that are passed in callee saved
/// registers are the same as from the calling function. This needs to be
@@ -2876,6 +3000,20 @@ public:
}
};
+ /// Determines the optimal series of memory ops to replace the memset / memcpy.
+ /// Return true if the number of memory ops is below the threshold (Limit).
+ /// It returns the types of the sequence of memory ops to perform
+ /// memset / memcpy by reference.
+ bool findOptimalMemOpLowering(std::vector<EVT> &MemOps,
+ unsigned Limit, uint64_t Size,
+ unsigned DstAlign, unsigned SrcAlign,
+ bool IsMemset,
+ bool ZeroMemset,
+ bool MemcpyStrSrc,
+ bool AllowOverlap,
+ unsigned DstAS, unsigned SrcAS,
+ const AttributeList &FuncAttributes) const;
+
/// Check to see if the specified operand of the specified instruction is a
/// constant integer. If so, check to see if there are any bits set in the
/// constant that are not demanded. If so, shrink the constant and return
@@ -3001,6 +3139,10 @@ public:
TargetLoweringOpt &TLO,
unsigned Depth = 0) const;
+ /// This method returns the constant pool value that will be loaded by LD.
+ /// NOTE: You must check for implicit extensions of the constant by LD.
+ virtual const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const;
+
/// If \p SNaN is false, \returns true if \p Op is known to never be any
/// NaN. If \p sNaN is true, returns if \p Op is known to never be a signaling
/// NaN.
@@ -3088,15 +3230,6 @@ public:
return true;
}
- /// Return true if it is profitable to fold a pair of shifts into a mask.
- /// This is usually true on most targets. But some targets, like Thumb1,
- /// have immediate shift instructions, but no immediate "and" instruction;
- /// this makes the fold unprofitable.
- virtual bool shouldFoldShiftPairToMask(const SDNode *N,
- CombineLevel Level) const {
- return true;
- }
-
// Return true if it is profitable to combine a BUILD_VECTOR with a stride-pattern
// to a shuffle and a truncate.
// Example of such a combine:
@@ -3430,6 +3563,15 @@ public:
return false;
}
+ /// For most targets, an LLVM type must be broken down into multiple
+ /// smaller types. Usually the halves are ordered according to the endianness
+ /// but for some platform that would break. So this method will default to
+ /// matching the endianness but can be overridden.
+ virtual bool
+ shouldSplitFunctionArgumentsAsLittleEndian(const DataLayout &DL) const {
+ return DL.isLittleEndian();
+ }
+
/// Returns a 0 terminated array of registers that can be safely used as
/// scratch registers.
virtual const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const {
@@ -3638,6 +3780,12 @@ public:
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const;
+ // Lower custom output constraints. If invalid, return SDValue().
+ virtual SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag,
+ SDLoc DL,
+ const AsmOperandInfo &OpInfo,
+ SelectionDAG &DAG) const;
+
//===--------------------------------------------------------------------===//
// Div utility functions
//
@@ -3840,8 +3988,26 @@ public:
/// Method for building the DAG expansion of ISD::SMULFIX. This method accepts
/// integers as its arguments.
- SDValue getExpandedFixedPointMultiplication(SDNode *Node,
- SelectionDAG &DAG) const;
+ SDValue expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const;
+
+ /// Method for building the DAG expansion of ISD::U(ADD|SUB)O. Expansion
+ /// always suceeds and populates the Result and Overflow arguments.
+ void expandUADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow,
+ SelectionDAG &DAG) const;
+
+ /// Method for building the DAG expansion of ISD::S(ADD|SUB)O. Expansion
+ /// always suceeds and populates the Result and Overflow arguments.
+ void expandSADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow,
+ SelectionDAG &DAG) const;
+
+ /// Method for building the DAG expansion of ISD::[US]MULO. Returns whether
+ /// expansion was successful and populates the Result and Overflow arguments.
+ bool expandMULO(SDNode *Node, SDValue &Result, SDValue &Overflow,
+ SelectionDAG &DAG) const;
+
+ /// Expand a VECREDUCE_* into an explicit calculation. If Count is specified,
+ /// only the first Count elements of the vector are used.
+ SDValue expandVecReduce(SDNode *Node, SelectionDAG &DAG) const;
//===--------------------------------------------------------------------===//
// Instruction Emitting Hooks
@@ -3894,14 +4060,23 @@ public:
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const;
private:
- SDValue simplifySetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
- ISD::CondCode Cond, DAGCombinerInfo &DCI,
- const SDLoc &DL) const;
+ SDValue foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
+ const SDLoc &DL, DAGCombinerInfo &DCI) const;
+ SDValue foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
+ const SDLoc &DL, DAGCombinerInfo &DCI) const;
SDValue optimizeSetCCOfSignedTruncationCheck(EVT SCCVT, SDValue N0,
SDValue N1, ISD::CondCode Cond,
DAGCombinerInfo &DCI,
const SDLoc &DL) const;
+
+ SDValue prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
+ SDValue CompTargetNode, ISD::CondCode Cond,
+ DAGCombinerInfo &DCI, const SDLoc &DL,
+ SmallVectorImpl<SDNode *> &Created) const;
+ SDValue buildUREMEqFold(EVT SETCCVT, SDValue REMNode, SDValue CompTargetNode,
+ ISD::CondCode Cond, DAGCombinerInfo &DCI,
+ const SDLoc &DL) const;
};
/// Given an LLVM IR type and return type attributes, compute the return value
diff --git a/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h b/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
index 052d1f8bc686..a1fb81cb009d 100644
--- a/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
+++ b/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
@@ -1,9 +1,8 @@
//==- llvm/CodeGen/TargetLoweringObjectFileImpl.h - Object Info --*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/CodeGen/TargetOpcodes.h b/include/llvm/CodeGen/TargetOpcodes.h
index d0d959c4ae11..080a244f6f69 100644
--- a/include/llvm/CodeGen/TargetOpcodes.h
+++ b/include/llvm/CodeGen/TargetOpcodes.h
@@ -1,9 +1,8 @@
//===-- llvm/CodeGen/TargetOpcodes.h - Target Indep Opcodes -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/CodeGen/TargetPassConfig.h b/include/llvm/CodeGen/TargetPassConfig.h
index 3288711a335d..0bd82aafac37 100644
--- a/include/llvm/CodeGen/TargetPassConfig.h
+++ b/include/llvm/CodeGen/TargetPassConfig.h
@@ -1,9 +1,8 @@
//===- TargetPassConfig.h - Code Generation pass options --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -25,6 +24,7 @@ class LLVMTargetMachine;
struct MachineSchedContext;
class PassConfigImpl;
class ScheduleDAGInstrs;
+class CSEConfigBase;
// The old pass manager infrastructure is hidden in a legacy namespace now.
namespace legacy {
@@ -75,9 +75,6 @@ public:
}
};
-template <> struct isPodLike<IdentifyingPassPtr> {
- static const bool value = true;
-};
/// Target-Independent Code Generator Pass Configuration Options.
///
@@ -319,6 +316,13 @@ public:
/// when GlobalISel failed and isGlobalISelAbortEnabled is false.
virtual bool reportDiagnosticWhenGlobalISelFallback() const;
+ /// Check whether continuous CSE should be enabled in GISel passes.
+ /// By default, it's enabled for non O0 levels.
+ virtual bool isGISelCSEEnabled() const;
+
+ /// Returns the CSEConfig object to use for the current optimization level.
+ virtual std::unique_ptr<CSEConfigBase> getCSEConfig() const;
+
protected:
// Helper to verify the analysis is really immutable.
void setOpt(bool &Opt, bool Val);
@@ -360,11 +364,11 @@ protected:
/// addFastRegAlloc - Add the minimum set of target-independent passes that
/// are required for fast register allocation.
- virtual void addFastRegAlloc(FunctionPass *RegAllocPass);
+ virtual void addFastRegAlloc();
/// addOptimizedRegAlloc - Add passes related to register allocation.
/// LLVMTargetMachine provides standard regalloc passes for most targets.
- virtual void addOptimizedRegAlloc(FunctionPass *RegAllocPass);
+ virtual void addOptimizedRegAlloc();
/// addPreRewrite - Add passes to the optimized register allocation pipeline
/// after register allocation is complete, but before virtual registers are
@@ -374,10 +378,18 @@ protected:
/// after RABasic or RAGreedy, they should take advantage of LiveRegMatrix.
/// When these passes run, VirtRegMap contains legal physreg assignments for
/// all virtual registers.
+ ///
+ /// Note if the target overloads addRegAssignAndRewriteOptimized, this may not
+ /// be honored. This is also not generally used for the the fast variant,
+ /// where the allocation and rewriting are done in one pass.
virtual bool addPreRewrite() {
return false;
}
+ /// Add passes to be run immediately after virtual registers are rewritten
+ /// to physical registers.
+ virtual void addPostRewrite() { }
+
/// This method may be implemented by targets that want to run passes after
/// register allocation pass pipeline but before prolog-epilog insertion.
virtual void addPostRegAlloc() { }
@@ -431,7 +443,12 @@ protected:
/// addMachinePasses helper to create the target-selected or overriden
/// regalloc pass.
- FunctionPass *createRegAllocPass(bool Optimized);
+ virtual FunctionPass *createRegAllocPass(bool Optimized);
+
+ /// Add core register alloator passes which do the actual register assignment
+ /// and rewriting. \returns true if any passes were added.
+ virtual bool addRegAssignmentFast();
+ virtual bool addRegAssignmentOptimized();
};
} // end namespace llvm
diff --git a/include/llvm/CodeGen/TargetRegisterInfo.h b/include/llvm/CodeGen/TargetRegisterInfo.h
index 0fbff3137653..ddbd677b3eaa 100644
--- a/include/llvm/CodeGen/TargetRegisterInfo.h
+++ b/include/llvm/CodeGen/TargetRegisterInfo.h
@@ -1,9 +1,8 @@
//==- CodeGen/TargetRegisterInfo.h - Target Register Information -*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -521,6 +520,11 @@ public:
/// function. Used by MachineRegisterInfo::isConstantPhysReg().
virtual bool isConstantPhysReg(unsigned PhysReg) const { return false; }
+ /// Returns true if the register class is considered divergent.
+ virtual bool isDivergentRegClass(const TargetRegisterClass *RC) const {
+ return false;
+ }
+
/// Physical registers that may be modified within a function but are
/// guaranteed to be restored before any uses. This is useful for targets that
/// have call sequences where a GOT register may be updated by the caller
@@ -986,7 +990,7 @@ public:
/// getFrameRegister - This method should return the register used as a base
/// for values allocated in the current stack frame.
- virtual unsigned getFrameRegister(const MachineFunction &MF) const = 0;
+ virtual Register getFrameRegister(const MachineFunction &MF) const = 0;
/// Mark a register and all its aliases as reserved in the given set.
void markSuperRegs(BitVector &RegisterSet, unsigned Reg) const;
diff --git a/include/llvm/CodeGen/TargetSchedule.h b/include/llvm/CodeGen/TargetSchedule.h
index 6173925e23a1..cce85c8d7b0d 100644
--- a/include/llvm/CodeGen/TargetSchedule.h
+++ b/include/llvm/CodeGen/TargetSchedule.h
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/TargetSchedule.h - Sched Machine Model ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/CodeGen/TargetSubtargetInfo.h b/include/llvm/CodeGen/TargetSubtargetInfo.h
index 968e4c4b8102..037fc3ed3243 100644
--- a/include/llvm/CodeGen/TargetSubtargetInfo.h
+++ b/include/llvm/CodeGen/TargetSubtargetInfo.h
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/TargetSubtargetInfo.h - Target Information --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -43,6 +42,7 @@ class RegisterBankInfo;
class SDep;
class SelectionDAGTargetInfo;
struct SubtargetFeatureKV;
+struct SubtargetSubTypeKV;
struct SubtargetInfoKV;
class SUnit;
class TargetFrameLowering;
@@ -63,8 +63,7 @@ class TargetSubtargetInfo : public MCSubtargetInfo {
protected: // Can only create subclasses...
TargetSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS,
ArrayRef<SubtargetFeatureKV> PF,
- ArrayRef<SubtargetFeatureKV> PD,
- const SubtargetInfoKV *ProcSched,
+ ArrayRef<SubtargetSubTypeKV> PD,
const MCWriteProcResEntry *WPR,
const MCWriteLatencyEntry *WL,
const MCReadAdvanceEntry *RA, const InstrStage *IS,
@@ -190,13 +189,13 @@ public:
/// TargetLowering preference). It does not yet disable the postRA scheduler.
virtual bool enableMachineScheduler() const;
- /// Support printing of [latency:throughput] comment in output .S file.
- virtual bool supportPrintSchedInfo() const { return false; }
-
/// True if the machine scheduler should disable the TLI preference
/// for preRA scheduling with the source level scheduler.
virtual bool enableMachineSchedDefaultSched() const { return true; }
+ /// True if the subtarget should run MachinePipeliner
+ virtual bool enableMachinePipeliner() const { return true; };
+
/// True if the subtarget should enable joining global copies.
///
/// By default this is enabled if the machine scheduler is enabled, but
@@ -250,6 +249,10 @@ public:
std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
}
+ /// Default to DFA for resource management, return false when target will use
+ /// ProcResource in InstrSchedModel instead.
+ virtual bool useDFAforSMS() const { return true; }
+
// For use with PostRAScheduling: get the minimum optimization level needed
// to enable post-RA scheduling.
virtual CodeGenOpt::Level getOptLevelToEnablePostRAScheduler() const {
@@ -286,12 +289,16 @@ public:
/// possible.
virtual bool enableSubRegLiveness() const { return false; }
- /// Returns string representation of scheduler comment
- std::string getSchedInfoStr(const MachineInstr &MI) const;
- std::string getSchedInfoStr(MCInst const &MCI) const override;
-
/// This is called after a .mir file was loaded.
virtual void mirFileLoaded(MachineFunction &MF) const;
+
+ /// True if the register allocator should use the allocation orders exactly as
+ /// written in the tablegen descriptions, false if it should allocate
+ /// the specified physical register later if is it callee-saved.
+ virtual bool ignoreCSRForAllocationOrder(const MachineFunction &MF,
+ unsigned PhysReg) const {
+ return false;
+ }
};
} // end namespace llvm
diff --git a/include/llvm/CodeGen/UnreachableBlockElim.h b/include/llvm/CodeGen/UnreachableBlockElim.h
index 3e7afd4cd433..d52d7c3c5b49 100644
--- a/include/llvm/CodeGen/UnreachableBlockElim.h
+++ b/include/llvm/CodeGen/UnreachableBlockElim.h
@@ -1,9 +1,8 @@
//===-- UnreachableBlockElim.h - Remove unreachable blocks for codegen --===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/CodeGen/ValueTypes.h b/include/llvm/CodeGen/ValueTypes.h
index d2ef4a94f8e2..c540c94f79d9 100644
--- a/include/llvm/CodeGen/ValueTypes.h
+++ b/include/llvm/CodeGen/ValueTypes.h
@@ -1,9 +1,8 @@
//===- CodeGen/ValueTypes.h - Low-Level Target independ. types --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/CodeGen/ValueTypes.td b/include/llvm/CodeGen/ValueTypes.td
index 0abb4ece1d14..5818ac183fcc 100644
--- a/include/llvm/CodeGen/ValueTypes.td
+++ b/include/llvm/CodeGen/ValueTypes.td
@@ -1,9 +1,8 @@
//===- ValueTypes.td - ValueType definitions ---------------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -63,89 +62,105 @@ def v32i16 : ValueType<512, 38>; // 32 x i16 vector value
def v64i16 : ValueType<1024,39>; // 64 x i16 vector value
def v128i16: ValueType<2048,40>; //128 x i16 vector value
-def v1i32 : ValueType<32 , 41>; // 1 x i32 vector value
-def v2i32 : ValueType<64 , 42>; // 2 x i32 vector value
-def v4i32 : ValueType<128, 43>; // 4 x i32 vector value
-def v8i32 : ValueType<256, 44>; // 8 x i32 vector value
-def v16i32 : ValueType<512, 45>; // 16 x i32 vector value
-def v32i32 : ValueType<1024,46>; // 32 x i32 vector value
-def v64i32 : ValueType<2048,47>; // 64 x i32 vector value
-
-def v1i64 : ValueType<64 , 48>; // 1 x i64 vector value
-def v2i64 : ValueType<128, 49>; // 2 x i64 vector value
-def v4i64 : ValueType<256, 50>; // 4 x i64 vector value
-def v8i64 : ValueType<512, 51>; // 8 x i64 vector value
-def v16i64 : ValueType<1024,52>; // 16 x i64 vector value
-def v32i64 : ValueType<2048,53>; // 32 x i64 vector value
-
-def v1i128 : ValueType<128, 54>; // 1 x i128 vector value
-
-def nxv1i1 : ValueType<1, 55>; // n x 1 x i1 vector value
-def nxv2i1 : ValueType<2, 56>; // n x 2 x i1 vector value
-def nxv4i1 : ValueType<4, 57>; // n x 4 x i1 vector value
-def nxv8i1 : ValueType<8, 58>; // n x 8 x i1 vector value
-def nxv16i1 : ValueType<16, 59>; // n x 16 x i1 vector value
-def nxv32i1 : ValueType<32, 60>; // n x 32 x i1 vector value
-
-def nxv1i8 : ValueType<8, 61>; // n x 1 x i8 vector value
-def nxv2i8 : ValueType<16, 62>; // n x 2 x i8 vector value
-def nxv4i8 : ValueType<32, 63>; // n x 4 x i8 vector value
-def nxv8i8 : ValueType<64, 64>; // n x 8 x i8 vector value
-def nxv16i8 : ValueType<128, 65>; // n x 16 x i8 vector value
-def nxv32i8 : ValueType<256, 66>; // n x 32 x i8 vector value
-
-def nxv1i16 : ValueType<16, 67>; // n x 1 x i16 vector value
-def nxv2i16 : ValueType<32, 68>; // n x 2 x i16 vector value
-def nxv4i16 : ValueType<64, 69>; // n x 4 x i16 vector value
-def nxv8i16 : ValueType<128, 70>; // n x 8 x i16 vector value
-def nxv16i16: ValueType<256, 71>; // n x 16 x i16 vector value
-def nxv32i16: ValueType<512, 72>; // n x 32 x i16 vector value
-
-def nxv1i32 : ValueType<32, 73>; // n x 1 x i32 vector value
-def nxv2i32 : ValueType<64, 74>; // n x 2 x i32 vector value
-def nxv4i32 : ValueType<128, 75>; // n x 4 x i32 vector value
-def nxv8i32 : ValueType<256, 76>; // n x 8 x i32 vector value
-def nxv16i32: ValueType<512, 77>; // n x 16 x i32 vector value
-def nxv32i32: ValueType<1024,78>; // n x 32 x i32 vector value
-
-def nxv1i64 : ValueType<64, 79>; // n x 1 x i64 vector value
-def nxv2i64 : ValueType<128, 80>; // n x 2 x i64 vector value
-def nxv4i64 : ValueType<256, 81>; // n x 4 x i64 vector value
-def nxv8i64 : ValueType<512, 82>; // n x 8 x i64 vector value
-def nxv16i64: ValueType<1024,83>; // n x 16 x i64 vector value
-def nxv32i64: ValueType<2048,84>; // n x 32 x i64 vector value
-
-def v2f16 : ValueType<32 , 85>; // 2 x f16 vector value
-def v4f16 : ValueType<64 , 86>; // 4 x f16 vector value
-def v8f16 : ValueType<128, 87>; // 8 x f16 vector value
-def v1f32 : ValueType<32 , 88>; // 1 x f32 vector value
-def v2f32 : ValueType<64 , 89>; // 2 x f32 vector value
-def v4f32 : ValueType<128, 90>; // 4 x f32 vector value
-def v8f32 : ValueType<256, 91>; // 8 x f32 vector value
-def v16f32 : ValueType<512, 92>; // 16 x f32 vector value
-def v1f64 : ValueType<64, 93>; // 1 x f64 vector value
-def v2f64 : ValueType<128, 94>; // 2 x f64 vector value
-def v4f64 : ValueType<256, 95>; // 4 x f64 vector value
-def v8f64 : ValueType<512, 96>; // 8 x f64 vector value
-
-def nxv2f16 : ValueType<32 , 97>; // n x 2 x f16 vector value
-def nxv4f16 : ValueType<64 , 98>; // n x 4 x f16 vector value
-def nxv8f16 : ValueType<128, 99>; // n x 8 x f16 vector value
-def nxv1f32 : ValueType<32 , 100>; // n x 1 x f32 vector value
-def nxv2f32 : ValueType<64 , 101>; // n x 2 x f32 vector value
-def nxv4f32 : ValueType<128, 102>; // n x 4 x f32 vector value
-def nxv8f32 : ValueType<256, 103>; // n x 8 x f32 vector value
-def nxv16f32 : ValueType<512, 104>; // n x 16 x f32 vector value
-def nxv1f64 : ValueType<64, 105>; // n x 1 x f64 vector value
-def nxv2f64 : ValueType<128, 106>; // n x 2 x f64 vector value
-def nxv4f64 : ValueType<256, 107>; // n x 4 x f64 vector value
-def nxv8f64 : ValueType<512, 108>; // n x 8 x f64 vector value
-
-def x86mmx : ValueType<64 , 109>; // X86 MMX value
-def FlagVT : ValueType<0 , 110>; // Pre-RA sched glue
-def isVoid : ValueType<0 , 111>; // Produces no value
-def untyped: ValueType<8 , 112>; // Produces an untyped value
-def ExceptRef: ValueType<0, 113>; // WebAssembly's except_ref type
+def v1i32 : ValueType<32 , 41>; // 1 x i32 vector value
+def v2i32 : ValueType<64 , 42>; // 2 x i32 vector value
+def v3i32 : ValueType<96 , 43>; // 3 x i32 vector value
+def v4i32 : ValueType<128, 44>; // 4 x i32 vector value
+def v5i32 : ValueType<160, 45>; // 5 x i32 vector value
+def v8i32 : ValueType<256, 46>; // 8 x i32 vector value
+def v16i32 : ValueType<512, 47>; // 16 x i32 vector value
+def v32i32 : ValueType<1024,48>; // 32 x i32 vector value
+def v64i32 : ValueType<2048,49>; // 64 x i32 vector value
+def v128i32 : ValueType<4096,50>; // 128 x i32 vector value
+def v256i32 : ValueType<8182,51>; // 256 x i32 vector value
+def v512i32 : ValueType<16384,52>; // 512 x i32 vector value
+def v1024i32 : ValueType<32768,53>; // 1024 x i32 vector value
+def v2048i32 : ValueType<65536,54>; // 2048 x i32 vector value
+
+def v1i64 : ValueType<64 , 55>; // 1 x i64 vector value
+def v2i64 : ValueType<128, 56>; // 2 x i64 vector value
+def v4i64 : ValueType<256, 57>; // 4 x i64 vector value
+def v8i64 : ValueType<512, 58>; // 8 x i64 vector value
+def v16i64 : ValueType<1024,59>; // 16 x i64 vector value
+def v32i64 : ValueType<2048,60>; // 32 x i64 vector value
+
+def v1i128 : ValueType<128, 61>; // 1 x i128 vector value
+
+def nxv1i1 : ValueType<1, 62>; // n x 1 x i1 vector value
+def nxv2i1 : ValueType<2, 63>; // n x 2 x i1 vector value
+def nxv4i1 : ValueType<4, 64>; // n x 4 x i1 vector value
+def nxv8i1 : ValueType<8, 65>; // n x 8 x i1 vector value
+def nxv16i1 : ValueType<16, 66>; // n x 16 x i1 vector value
+def nxv32i1 : ValueType<32, 67>; // n x 32 x i1 vector value
+
+def nxv1i8 : ValueType<8, 68>; // n x 1 x i8 vector value
+def nxv2i8 : ValueType<16, 69>; // n x 2 x i8 vector value
+def nxv4i8 : ValueType<32, 70>; // n x 4 x i8 vector value
+def nxv8i8 : ValueType<64, 71>; // n x 8 x i8 vector value
+def nxv16i8 : ValueType<128, 72>; // n x 16 x i8 vector value
+def nxv32i8 : ValueType<256, 73>; // n x 32 x i8 vector value
+
+def nxv1i16 : ValueType<16, 74>; // n x 1 x i16 vector value
+def nxv2i16 : ValueType<32, 75>; // n x 2 x i16 vector value
+def nxv4i16 : ValueType<64, 76>; // n x 4 x i16 vector value
+def nxv8i16 : ValueType<128, 77>; // n x 8 x i16 vector value
+def nxv16i16: ValueType<256, 78>; // n x 16 x i16 vector value
+def nxv32i16: ValueType<512, 79>; // n x 32 x i16 vector value
+
+def nxv1i32 : ValueType<32, 80>; // n x 1 x i32 vector value
+def nxv2i32 : ValueType<64, 81>; // n x 2 x i32 vector value
+def nxv4i32 : ValueType<128, 82>; // n x 4 x i32 vector value
+def nxv8i32 : ValueType<256, 83>; // n x 8 x i32 vector value
+def nxv16i32: ValueType<512, 84>; // n x 16 x i32 vector value
+def nxv32i32: ValueType<1024,85>; // n x 32 x i32 vector value
+
+def nxv1i64 : ValueType<64, 86>; // n x 1 x i64 vector value
+def nxv2i64 : ValueType<128, 87>; // n x 2 x i64 vector value
+def nxv4i64 : ValueType<256, 88>; // n x 4 x i64 vector value
+def nxv8i64 : ValueType<512, 89>; // n x 8 x i64 vector value
+def nxv16i64: ValueType<1024,90>; // n x 16 x i64 vector value
+def nxv32i64: ValueType<2048,91>; // n x 32 x i64 vector value
+
+def v2f16 : ValueType<32 , 92>; // 2 x f16 vector value
+def v4f16 : ValueType<64 , 93>; // 4 x f16 vector value
+def v8f16 : ValueType<128, 94>; // 8 x f16 vector value
+def v1f32 : ValueType<32 , 95>; // 1 x f32 vector value
+def v2f32 : ValueType<64 , 96>; // 2 x f32 vector value
+def v3f32 : ValueType<96 , 97>; // 3 x f32 vector value
+def v4f32 : ValueType<128, 98>; // 4 x f32 vector value
+def v5f32 : ValueType<160, 99>; // 5 x f32 vector value
+def v8f32 : ValueType<256, 100>; // 8 x f32 vector value
+def v16f32 : ValueType<512, 101>; // 16 x f32 vector value
+def v32f32 : ValueType<1024, 102>; // 32 x f32 vector value
+def v64f32 : ValueType<2048, 103>; // 64 x f32 vector value
+def v128f32 : ValueType<4096, 104>; // 128 x f32 vector value
+def v256f32 : ValueType<8182, 105>; // 256 x f32 vector value
+def v512f32 : ValueType<16384, 106>; // 512 x f32 vector value
+def v1024f32 : ValueType<32768, 107>; // 1024 x f32 vector value
+def v2048f32 : ValueType<65536, 108>; // 2048 x f32 vector value
+def v1f64 : ValueType<64, 109>; // 1 x f64 vector value
+def v2f64 : ValueType<128, 110>; // 2 x f64 vector value
+def v4f64 : ValueType<256, 111>; // 4 x f64 vector value
+def v8f64 : ValueType<512, 112>; // 8 x f64 vector value
+
+def nxv2f16 : ValueType<32 , 113>; // n x 2 x f16 vector value
+def nxv4f16 : ValueType<64 , 114>; // n x 4 x f16 vector value
+def nxv8f16 : ValueType<128, 115>; // n x 8 x f16 vector value
+def nxv1f32 : ValueType<32 , 116>; // n x 1 x f32 vector value
+def nxv2f32 : ValueType<64 , 117>; // n x 2 x f32 vector value
+def nxv4f32 : ValueType<128, 118>; // n x 4 x f32 vector value
+def nxv8f32 : ValueType<256, 119>; // n x 8 x f32 vector value
+def nxv16f32 : ValueType<512, 120>; // n x 16 x f32 vector value
+def nxv1f64 : ValueType<64, 121>; // n x 1 x f64 vector value
+def nxv2f64 : ValueType<128, 122>; // n x 2 x f64 vector value
+def nxv4f64 : ValueType<256, 123>; // n x 4 x f64 vector value
+def nxv8f64 : ValueType<512, 124>; // n x 8 x f64 vector value
+
+def x86mmx : ValueType<64 , 125>; // X86 MMX value
+def FlagVT : ValueType<0 , 126>; // Pre-RA sched glue
+def isVoid : ValueType<0 , 127>; // Produces no value
+def untyped: ValueType<8 , 128>; // Produces an untyped value
+def exnref: ValueType<0, 129>; // WebAssembly's exnref type
def token : ValueType<0 , 248>; // TokenTy
def MetadataVT: ValueType<0, 249>; // Metadata
@@ -167,3 +182,14 @@ def iPTR : ValueType<0 , 254>;
// Pseudo valuetype to represent "any type of any size".
def Any : ValueType<0 , 255>;
+
+/// This class is for targets that want to use pointer types in patterns
+/// with the GlobalISelEmitter. Targets must define their own pointer
+/// derived from this class. The scalar argument should be an
+/// integer type with the same bit size as the ponter.
+/// e.g. def p0 : PtrValueType <i64, 0>;
+
+class PtrValueType <ValueType scalar, int addrspace> :
+ ValueType<scalar.Size, scalar.Value> {
+ int AddrSpace = addrspace;
+}
diff --git a/include/llvm/CodeGen/VirtRegMap.h b/include/llvm/CodeGen/VirtRegMap.h
index 6a8e50a7e5f5..70eb048f05eb 100644
--- a/include/llvm/CodeGen/VirtRegMap.h
+++ b/include/llvm/CodeGen/VirtRegMap.h
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/VirtRegMap.h - Virtual Register Map ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -68,8 +67,10 @@ class TargetInstrInfo;
public:
static char ID;
- VirtRegMap() : MachineFunctionPass(ID), Virt2PhysMap(NO_PHYS_REG),
- Virt2StackSlotMap(NO_STACK_SLOT), Virt2SplitMap(0) {}
+ VirtRegMap()
+ : MachineFunctionPass(ID), MRI(nullptr), TII(nullptr), TRI(nullptr),
+ MF(nullptr), Virt2PhysMap(NO_PHYS_REG),
+ Virt2StackSlotMap(NO_STACK_SLOT), Virt2SplitMap(0) {}
VirtRegMap(const VirtRegMap &) = delete;
VirtRegMap &operator=(const VirtRegMap &) = delete;
@@ -98,8 +99,8 @@ class TargetInstrInfo;
/// returns the physical register mapped to the specified
/// virtual register
- unsigned getPhys(unsigned virtReg) const {
- assert(TargetRegisterInfo::isVirtualRegister(virtReg));
+ Register getPhys(Register virtReg) const {
+ assert(virtReg.isVirtual());
return Virt2PhysMap[virtReg];
}
diff --git a/include/llvm/CodeGen/WasmEHFuncInfo.h b/include/llvm/CodeGen/WasmEHFuncInfo.h
index 219fff988f6e..887a1467b3e4 100644
--- a/include/llvm/CodeGen/WasmEHFuncInfo.h
+++ b/include/llvm/CodeGen/WasmEHFuncInfo.h
@@ -1,9 +1,8 @@
//===--- llvm/CodeGen/WasmEHFuncInfo.h --------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -29,10 +28,6 @@ struct WasmEHFuncInfo {
// When there is an entry <A, B>, if an exception is not caught by A, it
// should next unwind to the EH pad B.
DenseMap<BBOrMBB, BBOrMBB> EHPadUnwindMap;
- // For entry <A, B>, A is a BB with an instruction that may throw
- // (invoke/cleanupret in LLVM IR, call/rethrow in the backend) and B is an EH
- // pad that A unwinds to.
- DenseMap<BBOrMBB, BBOrMBB> ThrowUnwindMap;
// Helper functions
const BasicBlock *getEHPadUnwindDest(const BasicBlock *BB) const {
@@ -41,18 +36,9 @@ struct WasmEHFuncInfo {
void setEHPadUnwindDest(const BasicBlock *BB, const BasicBlock *Dest) {
EHPadUnwindMap[BB] = Dest;
}
- const BasicBlock *getThrowUnwindDest(BasicBlock *BB) const {
- return ThrowUnwindMap.lookup(BB).get<const BasicBlock *>();
- }
- void setThrowUnwindDest(const BasicBlock *BB, const BasicBlock *Dest) {
- ThrowUnwindMap[BB] = Dest;
- }
bool hasEHPadUnwindDest(const BasicBlock *BB) const {
return EHPadUnwindMap.count(BB);
}
- bool hasThrowUnwindDest(const BasicBlock *BB) const {
- return ThrowUnwindMap.count(BB);
- }
MachineBasicBlock *getEHPadUnwindDest(MachineBasicBlock *MBB) const {
return EHPadUnwindMap.lookup(MBB).get<MachineBasicBlock *>();
@@ -60,18 +46,9 @@ struct WasmEHFuncInfo {
void setEHPadUnwindDest(MachineBasicBlock *MBB, MachineBasicBlock *Dest) {
EHPadUnwindMap[MBB] = Dest;
}
- MachineBasicBlock *getThrowUnwindDest(MachineBasicBlock *MBB) const {
- return ThrowUnwindMap.lookup(MBB).get<MachineBasicBlock *>();
- }
- void setThrowUnwindDest(MachineBasicBlock *MBB, MachineBasicBlock *Dest) {
- ThrowUnwindMap[MBB] = Dest;
- }
bool hasEHPadUnwindDest(MachineBasicBlock *MBB) const {
return EHPadUnwindMap.count(MBB);
}
- bool hasThrowUnwindDest(MachineBasicBlock *MBB) const {
- return ThrowUnwindMap.count(MBB);
- }
};
// Analyze the IR in the given function to build WasmEHFuncInfo.
diff --git a/include/llvm/CodeGen/WinEHFuncInfo.h b/include/llvm/CodeGen/WinEHFuncInfo.h
index 8043024626a0..f098316de793 100644
--- a/include/llvm/CodeGen/WinEHFuncInfo.h
+++ b/include/llvm/CodeGen/WinEHFuncInfo.h
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/WinEHFuncInfo.h -----------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/DebugInfo/CodeView/AppendingTypeTableBuilder.h b/include/llvm/DebugInfo/CodeView/AppendingTypeTableBuilder.h
index bd1743511ed4..0ac8b651939d 100644
--- a/include/llvm/DebugInfo/CodeView/AppendingTypeTableBuilder.h
+++ b/include/llvm/DebugInfo/CodeView/AppendingTypeTableBuilder.h
@@ -1,9 +1,8 @@
//===- AppendingTypeTableBuilder.h -------------------------------*- C++-*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/CodeView/CVRecord.h b/include/llvm/DebugInfo/CodeView/CVRecord.h
index 11ca9ff108de..784c47e3bf5d 100644
--- a/include/llvm/DebugInfo/CodeView/CVRecord.h
+++ b/include/llvm/DebugInfo/CodeView/CVRecord.h
@@ -1,9 +1,8 @@
-//===- RecordIterator.h -----------------------------------------*- C++ -*-===//
+//===- CVRecord.h -----------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -25,17 +24,31 @@ namespace llvm {
namespace codeview {
+/// CVRecord is a fat pointer (base + size pair) to a symbol or type record.
+/// Carrying the size separately instead of trusting the size stored in the
+/// record prefix provides some extra safety and flexibility.
template <typename Kind> class CVRecord {
public:
- CVRecord() : Type(static_cast<Kind>(0)) {}
+ CVRecord() = default;
+
+ CVRecord(ArrayRef<uint8_t> Data) : RecordData(Data) {}
- CVRecord(Kind K, ArrayRef<uint8_t> Data) : Type(K), RecordData(Data) {}
+ CVRecord(const RecordPrefix *P, size_t Size)
+ : RecordData(reinterpret_cast<const uint8_t *>(P), Size) {}
- bool valid() const { return Type != static_cast<Kind>(0); }
+ bool valid() const { return kind() != Kind(0); }
uint32_t length() const { return RecordData.size(); }
- Kind kind() const { return Type; }
+
+ Kind kind() const {
+ if (RecordData.size() < sizeof(RecordPrefix))
+ return Kind(0);
+ return static_cast<Kind>(static_cast<uint16_t>(
+ reinterpret_cast<const RecordPrefix *>(RecordData.data())->RecordKind));
+ }
+
ArrayRef<uint8_t> data() const { return RecordData; }
+
StringRef str_data() const {
return StringRef(reinterpret_cast<const char *>(RecordData.data()),
RecordData.size());
@@ -45,7 +58,6 @@ public:
return RecordData.drop_front(sizeof(RecordPrefix));
}
- Kind Type;
ArrayRef<uint8_t> RecordData;
};
@@ -72,8 +84,7 @@ Error forEachCodeViewRecord(ArrayRef<uint8_t> StreamBuffer, Func F) {
ArrayRef<uint8_t> Data = StreamBuffer.take_front(RealLen);
StreamBuffer = StreamBuffer.drop_front(RealLen);
- Record R(static_cast<decltype(Record::Type)>((uint16_t)Prefix->RecordKind),
- Data);
+ Record R(Data);
if (auto EC = F(R))
return EC;
}
@@ -92,13 +103,12 @@ inline Expected<CVRecord<Kind>> readCVRecordFromStream(BinaryStreamRef Stream,
return std::move(EC);
if (Prefix->RecordLen < 2)
return make_error<CodeViewError>(cv_error_code::corrupt_record);
- Kind K = static_cast<Kind>(uint16_t(Prefix->RecordKind));
Reader.setOffset(Offset);
ArrayRef<uint8_t> RawData;
if (auto EC = Reader.readBytes(RawData, Prefix->RecordLen + sizeof(uint16_t)))
return std::move(EC);
- return codeview::CVRecord<Kind>(K, RawData);
+ return codeview::CVRecord<Kind>(RawData);
}
} // end namespace codeview
diff --git a/include/llvm/DebugInfo/CodeView/CVSymbolVisitor.h b/include/llvm/DebugInfo/CodeView/CVSymbolVisitor.h
index 7c8cd121751a..1615ff41df12 100644
--- a/include/llvm/DebugInfo/CodeView/CVSymbolVisitor.h
+++ b/include/llvm/DebugInfo/CodeView/CVSymbolVisitor.h
@@ -1,9 +1,8 @@
//===- CVSymbolVisitor.h ----------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/CodeView/CVTypeVisitor.h b/include/llvm/DebugInfo/CodeView/CVTypeVisitor.h
index b765ba1abb4d..7d20bb0a7bde 100644
--- a/include/llvm/DebugInfo/CodeView/CVTypeVisitor.h
+++ b/include/llvm/DebugInfo/CodeView/CVTypeVisitor.h
@@ -1,9 +1,8 @@
//===- CVTypeVisitor.h ------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -12,6 +11,7 @@
#include "llvm/DebugInfo/CodeView/CVRecord.h"
#include "llvm/DebugInfo/CodeView/TypeRecord.h"
+#include "llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h"
#include "llvm/Support/Error.h"
namespace llvm {
@@ -31,6 +31,9 @@ enum VisitorDataSource {
Error visitTypeRecord(CVType &Record, TypeIndex Index,
TypeVisitorCallbacks &Callbacks,
VisitorDataSource Source = VDS_BytesPresent);
+Error visitTypeRecord(CVType &Record, TypeIndex Index,
+ TypeVisitorCallbackPipeline &Callbacks,
+ VisitorDataSource Source = VDS_BytesPresent);
Error visitTypeRecord(CVType &Record, TypeVisitorCallbacks &Callbacks,
VisitorDataSource Source = VDS_BytesPresent);
diff --git a/include/llvm/DebugInfo/CodeView/CodeView.h b/include/llvm/DebugInfo/CodeView/CodeView.h
index 8e0d9f608e93..c3acb05ea8b1 100644
--- a/include/llvm/DebugInfo/CodeView/CodeView.h
+++ b/include/llvm/DebugInfo/CodeView/CodeView.h
@@ -1,9 +1,8 @@
//===- CodeView.h -----------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -160,9 +159,10 @@ enum SourceLanguage : uint8_t {
MSIL = 0x0f,
HLSL = 0x10,
- /// The DMD compiler emits 'D' for the CV source language. Microsoft doesn't
- /// have an enumerator for it yet.
+ /// The DMD & Swift compilers emit 'D' and 'S', respectively, for the CV
+ /// source language. Microsoft does not have enumerators for them yet.
D = 'D',
+ Swift = 'S',
};
/// These values correspond to the CV_call_e enumeration, and are documented
@@ -304,6 +304,9 @@ enum class ModifierOptions : uint16_t {
};
CV_DEFINE_ENUM_CLASS_FLAGS_OPERATORS(ModifierOptions)
+// If the subsection kind has this bit set, then the linker should ignore it.
+enum : uint32_t { SubsectionIgnoreFlag = 0x80000000 };
+
enum class DebugSubsectionKind : uint32_t {
None = 0,
Symbols = 0xf1,
@@ -509,9 +512,23 @@ enum class FrameCookieKind : uint8_t {
// Corresponds to CV_HREG_e enum.
enum class RegisterId : uint16_t {
+#define CV_REGISTERS_ALL
#define CV_REGISTER(name, value) name = value,
#include "CodeViewRegisters.def"
#undef CV_REGISTER
+#undef CV_REGISTERS_ALL
+};
+
+// Register Ids are shared between architectures in CodeView. CPUType is needed
+// to map register Id to name.
+struct CPURegister {
+ CPURegister() = delete;
+ CPURegister(CPUType Cpu, codeview::RegisterId Reg) {
+ this->Cpu = Cpu;
+ this->Reg = Reg;
+ }
+ CPUType Cpu;
+ RegisterId Reg;
};
/// Two-bit value indicating which register is the designated frame pointer
diff --git a/include/llvm/DebugInfo/CodeView/CodeViewError.h b/include/llvm/DebugInfo/CodeView/CodeViewError.h
index d4615d02220d..9990c8d05d1c 100644
--- a/include/llvm/DebugInfo/CodeView/CodeViewError.h
+++ b/include/llvm/DebugInfo/CodeView/CodeViewError.h
@@ -1,9 +1,8 @@
//===- CodeViewError.h - Error extensions for CodeView ----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h b/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h
index 94f104ff772c..00fb0cf4cc90 100644
--- a/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h
+++ b/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h
@@ -1,9 +1,8 @@
//===- CodeViewRecordIO.h ---------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -25,28 +24,65 @@
#include <type_traits>
namespace llvm {
+
namespace codeview {
+class CodeViewRecordStreamer {
+public:
+ virtual void EmitBytes(StringRef Data) = 0;
+ virtual void EmitIntValue(uint64_t Value, unsigned Size) = 0;
+ virtual void EmitBinaryData(StringRef Data) = 0;
+ virtual void AddComment(const Twine &T) = 0;
+ virtual ~CodeViewRecordStreamer() = default;
+};
+
class CodeViewRecordIO {
uint32_t getCurrentOffset() const {
- return (isWriting()) ? Writer->getOffset() : Reader->getOffset();
+ if (isWriting())
+ return Writer->getOffset();
+ else if (isReading())
+ return Reader->getOffset();
+ else
+ return 0;
}
public:
+ // deserializes records to structures
explicit CodeViewRecordIO(BinaryStreamReader &Reader) : Reader(&Reader) {}
+
+ // serializes records to buffer
explicit CodeViewRecordIO(BinaryStreamWriter &Writer) : Writer(&Writer) {}
+ // writes records to assembly file using MC library interface
+ explicit CodeViewRecordIO(CodeViewRecordStreamer &Streamer)
+ : Streamer(&Streamer) {}
+
Error beginRecord(Optional<uint32_t> MaxLength);
Error endRecord();
- Error mapInteger(TypeIndex &TypeInd);
+ Error mapInteger(TypeIndex &TypeInd, const Twine &Comment = "");
- bool isReading() const { return Reader != nullptr; }
- bool isWriting() const { return !isReading(); }
+ bool isStreaming() const {
+ return (Streamer != nullptr) && (Reader == nullptr) && (Writer == nullptr);
+ }
+ bool isReading() const {
+ return (Reader != nullptr) && (Streamer == nullptr) && (Writer == nullptr);
+ }
+ bool isWriting() const {
+ return (Writer != nullptr) && (Streamer == nullptr) && (Reader == nullptr);
+ }
uint32_t maxFieldLength() const;
template <typename T> Error mapObject(T &Value) {
+ if (isStreaming()) {
+ StringRef BytesSR =
+ StringRef((reinterpret_cast<const char *>(&Value)), sizeof(Value));
+ Streamer->EmitBytes(BytesSR);
+ incrStreamedLen(sizeof(T));
+ return Error::success();
+ }
+
if (isWriting())
return Writer->writeObject(Value);
@@ -57,41 +93,63 @@ public:
return Error::success();
}
- template <typename T> Error mapInteger(T &Value) {
+ template <typename T> Error mapInteger(T &Value, const Twine &Comment = "") {
+ if (isStreaming()) {
+ emitComment(Comment);
+ Streamer->EmitIntValue((int)Value, sizeof(T));
+ incrStreamedLen(sizeof(T));
+ return Error::success();
+ }
+
if (isWriting())
return Writer->writeInteger(Value);
return Reader->readInteger(Value);
}
- template <typename T> Error mapEnum(T &Value) {
- if (sizeof(Value) > maxFieldLength())
+ template <typename T> Error mapEnum(T &Value, const Twine &Comment = "") {
+ if (!isStreaming() && sizeof(Value) > maxFieldLength())
return make_error<CodeViewError>(cv_error_code::insufficient_buffer);
using U = typename std::underlying_type<T>::type;
U X;
- if (isWriting())
+
+ if (isWriting() || isStreaming())
X = static_cast<U>(Value);
- if (auto EC = mapInteger(X))
+ if (auto EC = mapInteger(X, Comment))
return EC;
+
if (isReading())
Value = static_cast<T>(X);
+
return Error::success();
}
- Error mapEncodedInteger(int64_t &Value);
- Error mapEncodedInteger(uint64_t &Value);
- Error mapEncodedInteger(APSInt &Value);
- Error mapStringZ(StringRef &Value);
- Error mapGuid(GUID &Guid);
+ Error mapEncodedInteger(int64_t &Value, const Twine &Comment = "");
+ Error mapEncodedInteger(uint64_t &Value, const Twine &Comment = "");
+ Error mapEncodedInteger(APSInt &Value, const Twine &Comment = "");
+ Error mapStringZ(StringRef &Value, const Twine &Comment = "");
+ Error mapGuid(GUID &Guid, const Twine &Comment = "");
- Error mapStringZVectorZ(std::vector<StringRef> &Value);
+ Error mapStringZVectorZ(std::vector<StringRef> &Value,
+ const Twine &Comment = "");
template <typename SizeType, typename T, typename ElementMapper>
- Error mapVectorN(T &Items, const ElementMapper &Mapper) {
+ Error mapVectorN(T &Items, const ElementMapper &Mapper,
+ const Twine &Comment = "") {
SizeType Size;
- if (isWriting()) {
+ if (isStreaming()) {
+ Size = static_cast<SizeType>(Items.size());
+ emitComment(Comment);
+ Streamer->EmitIntValue(Size, sizeof(Size));
+ incrStreamedLen(sizeof(Size)); // add 1 for the delimiter
+
+ for (auto &X : Items) {
+ if (auto EC = Mapper(*this, X))
+ return EC;
+ }
+ } else if (isWriting()) {
Size = static_cast<SizeType>(Items.size());
if (auto EC = Writer->writeInteger(Size))
return EC;
@@ -115,8 +173,10 @@ public:
}
template <typename T, typename ElementMapper>
- Error mapVectorTail(T &Items, const ElementMapper &Mapper) {
- if (isWriting()) {
+ Error mapVectorTail(T &Items, const ElementMapper &Mapper,
+ const Twine &Comment = "") {
+ emitComment(Comment);
+ if (isStreaming() || isWriting()) {
for (auto &Item : Items) {
if (auto EC = Mapper(*this, Item))
return EC;
@@ -133,16 +193,44 @@ public:
return Error::success();
}
- Error mapByteVectorTail(ArrayRef<uint8_t> &Bytes);
- Error mapByteVectorTail(std::vector<uint8_t> &Bytes);
+ Error mapByteVectorTail(ArrayRef<uint8_t> &Bytes, const Twine &Comment = "");
+ Error mapByteVectorTail(std::vector<uint8_t> &Bytes,
+ const Twine &Comment = "");
Error padToAlignment(uint32_t Align);
Error skipPadding();
+ uint64_t getStreamedLen() {
+ if (isStreaming())
+ return StreamedLen;
+ return 0;
+ }
+
private:
+ void emitEncodedSignedInteger(const int64_t &Value,
+ const Twine &Comment = "");
+ void emitEncodedUnsignedInteger(const uint64_t &Value,
+ const Twine &Comment = "");
Error writeEncodedSignedInteger(const int64_t &Value);
Error writeEncodedUnsignedInteger(const uint64_t &Value);
+ void incrStreamedLen(const uint64_t &Len) {
+ if (isStreaming())
+ StreamedLen += Len;
+ }
+
+ void resetStreamedLen() {
+ if (isStreaming())
+ StreamedLen = 4; // The record prefix is 4 bytes long
+ }
+
+ void emitComment(const Twine &Comment) {
+ if (isStreaming()) {
+ Twine TComment(Comment);
+ Streamer->AddComment(TComment);
+ }
+ }
+
struct RecordLimit {
uint32_t BeginOffset;
Optional<uint32_t> MaxLength;
@@ -163,6 +251,8 @@ private:
BinaryStreamReader *Reader = nullptr;
BinaryStreamWriter *Writer = nullptr;
+ CodeViewRecordStreamer *Streamer = nullptr;
+ uint64_t StreamedLen = 0;
};
} // end namespace codeview
diff --git a/include/llvm/DebugInfo/CodeView/CodeViewRegisters.def b/include/llvm/DebugInfo/CodeView/CodeViewRegisters.def
index fdfcf4d53a23..9767e49c44f5 100644
--- a/include/llvm/DebugInfo/CodeView/CodeViewRegisters.def
+++ b/include/llvm/DebugInfo/CodeView/CodeViewRegisters.def
@@ -1,9 +1,8 @@
//===-- CodeViewRegisters.def - CodeView registers --------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -15,8 +14,15 @@
#define CV_REGISTER(name, value)
#endif
+#if !defined(CV_REGISTERS_ALL) && !defined(CV_REGISTERS_X86) && \
+ !defined(CV_REGISTERS_ARM64)
+#error Need include at least one register set.
+#endif
+
// This currently only contains the "register subset shared by all processor
-// types" (ERR etc.) and the x86 registers.
+// types" (ERR etc.) and the x86/arm64 registers.
+
+#if defined(CV_REGISTERS_ALL) || defined(CV_REGISTERS_X86)
// Some system headers define macros that conflict with our enums. Every
// compiler supported by LLVM has the push_macro and pop_macro pragmas, so use
@@ -357,3 +363,197 @@ CV_REGISTER(AMD64_K7, 765)
#pragma pop_macro("CR2")
#pragma pop_macro("CR3")
#pragma pop_macro("CR4")
+
+#endif // defined(CV_REGISTERS_ALL) || defined(CV_REGISTERS_X86)
+
+#if defined(CV_REGISTERS_ALL) || defined(CV_REGISTERS_ARM64)
+
+// ARM64 registers
+
+CV_REGISTER(ARM64_NOREG, 0)
+
+// General purpose 32-bit integer registers
+
+CV_REGISTER(ARM64_W0, 10)
+CV_REGISTER(ARM64_W1, 11)
+CV_REGISTER(ARM64_W2, 12)
+CV_REGISTER(ARM64_W3, 13)
+CV_REGISTER(ARM64_W4, 14)
+CV_REGISTER(ARM64_W5, 15)
+CV_REGISTER(ARM64_W6, 16)
+CV_REGISTER(ARM64_W7, 17)
+CV_REGISTER(ARM64_W8, 18)
+CV_REGISTER(ARM64_W9, 19)
+CV_REGISTER(ARM64_W10, 20)
+CV_REGISTER(ARM64_W11, 21)
+CV_REGISTER(ARM64_W12, 22)
+CV_REGISTER(ARM64_W13, 23)
+CV_REGISTER(ARM64_W14, 24)
+CV_REGISTER(ARM64_W15, 25)
+CV_REGISTER(ARM64_W16, 26)
+CV_REGISTER(ARM64_W17, 27)
+CV_REGISTER(ARM64_W18, 28)
+CV_REGISTER(ARM64_W19, 29)
+CV_REGISTER(ARM64_W20, 30)
+CV_REGISTER(ARM64_W21, 31)
+CV_REGISTER(ARM64_W22, 32)
+CV_REGISTER(ARM64_W23, 33)
+CV_REGISTER(ARM64_W24, 34)
+CV_REGISTER(ARM64_W25, 35)
+CV_REGISTER(ARM64_W26, 36)
+CV_REGISTER(ARM64_W27, 37)
+CV_REGISTER(ARM64_W28, 38)
+CV_REGISTER(ARM64_W29, 39)
+CV_REGISTER(ARM64_W30, 40)
+CV_REGISTER(ARM64_WZR, 41)
+
+// General purpose 64-bit integer registers
+
+CV_REGISTER(ARM64_X0, 50)
+CV_REGISTER(ARM64_X1, 51)
+CV_REGISTER(ARM64_X2, 52)
+CV_REGISTER(ARM64_X3, 53)
+CV_REGISTER(ARM64_X4, 54)
+CV_REGISTER(ARM64_X5, 55)
+CV_REGISTER(ARM64_X6, 56)
+CV_REGISTER(ARM64_X7, 57)
+CV_REGISTER(ARM64_X8, 58)
+CV_REGISTER(ARM64_X9, 59)
+CV_REGISTER(ARM64_X10, 60)
+CV_REGISTER(ARM64_X11, 61)
+CV_REGISTER(ARM64_X12, 62)
+CV_REGISTER(ARM64_X13, 63)
+CV_REGISTER(ARM64_X14, 64)
+CV_REGISTER(ARM64_X15, 65)
+CV_REGISTER(ARM64_X16, 66)
+CV_REGISTER(ARM64_X17, 67)
+CV_REGISTER(ARM64_X18, 68)
+CV_REGISTER(ARM64_X19, 69)
+CV_REGISTER(ARM64_X20, 70)
+CV_REGISTER(ARM64_X21, 71)
+CV_REGISTER(ARM64_X22, 72)
+CV_REGISTER(ARM64_X23, 73)
+CV_REGISTER(ARM64_X24, 74)
+CV_REGISTER(ARM64_X25, 75)
+CV_REGISTER(ARM64_X26, 76)
+CV_REGISTER(ARM64_X27, 77)
+CV_REGISTER(ARM64_X28, 78)
+CV_REGISTER(ARM64_FP, 79)
+CV_REGISTER(ARM64_LR, 80)
+CV_REGISTER(ARM64_SP, 81)
+CV_REGISTER(ARM64_ZR, 82)
+
+// status register
+
+CV_REGISTER(ARM64_NZCV, 90)
+
+// 32-bit floating point registers
+
+CV_REGISTER(ARM64_S0, 100)
+CV_REGISTER(ARM64_S1, 101)
+CV_REGISTER(ARM64_S2, 102)
+CV_REGISTER(ARM64_S3, 103)
+CV_REGISTER(ARM64_S4, 104)
+CV_REGISTER(ARM64_S5, 105)
+CV_REGISTER(ARM64_S6, 106)
+CV_REGISTER(ARM64_S7, 107)
+CV_REGISTER(ARM64_S8, 108)
+CV_REGISTER(ARM64_S9, 109)
+CV_REGISTER(ARM64_S10, 110)
+CV_REGISTER(ARM64_S11, 111)
+CV_REGISTER(ARM64_S12, 112)
+CV_REGISTER(ARM64_S13, 113)
+CV_REGISTER(ARM64_S14, 114)
+CV_REGISTER(ARM64_S15, 115)
+CV_REGISTER(ARM64_S16, 116)
+CV_REGISTER(ARM64_S17, 117)
+CV_REGISTER(ARM64_S18, 118)
+CV_REGISTER(ARM64_S19, 119)
+CV_REGISTER(ARM64_S20, 120)
+CV_REGISTER(ARM64_S21, 121)
+CV_REGISTER(ARM64_S22, 122)
+CV_REGISTER(ARM64_S23, 123)
+CV_REGISTER(ARM64_S24, 124)
+CV_REGISTER(ARM64_S25, 125)
+CV_REGISTER(ARM64_S26, 126)
+CV_REGISTER(ARM64_S27, 127)
+CV_REGISTER(ARM64_S28, 128)
+CV_REGISTER(ARM64_S29, 129)
+CV_REGISTER(ARM64_S30, 130)
+CV_REGISTER(ARM64_S31, 131)
+
+// 64-bit floating point registers
+
+CV_REGISTER(ARM64_D0, 140)
+CV_REGISTER(ARM64_D1, 141)
+CV_REGISTER(ARM64_D2, 142)
+CV_REGISTER(ARM64_D3, 143)
+CV_REGISTER(ARM64_D4, 144)
+CV_REGISTER(ARM64_D5, 145)
+CV_REGISTER(ARM64_D6, 146)
+CV_REGISTER(ARM64_D7, 147)
+CV_REGISTER(ARM64_D8, 148)
+CV_REGISTER(ARM64_D9, 149)
+CV_REGISTER(ARM64_D10, 150)
+CV_REGISTER(ARM64_D11, 151)
+CV_REGISTER(ARM64_D12, 152)
+CV_REGISTER(ARM64_D13, 153)
+CV_REGISTER(ARM64_D14, 154)
+CV_REGISTER(ARM64_D15, 155)
+CV_REGISTER(ARM64_D16, 156)
+CV_REGISTER(ARM64_D17, 157)
+CV_REGISTER(ARM64_D18, 158)
+CV_REGISTER(ARM64_D19, 159)
+CV_REGISTER(ARM64_D20, 160)
+CV_REGISTER(ARM64_D21, 161)
+CV_REGISTER(ARM64_D22, 162)
+CV_REGISTER(ARM64_D23, 163)
+CV_REGISTER(ARM64_D24, 164)
+CV_REGISTER(ARM64_D25, 165)
+CV_REGISTER(ARM64_D26, 166)
+CV_REGISTER(ARM64_D27, 167)
+CV_REGISTER(ARM64_D28, 168)
+CV_REGISTER(ARM64_D29, 169)
+CV_REGISTER(ARM64_D30, 170)
+CV_REGISTER(ARM64_D31, 171)
+
+// 128-bit SIMD registers
+
+CV_REGISTER(ARM64_Q0, 180)
+CV_REGISTER(ARM64_Q1, 181)
+CV_REGISTER(ARM64_Q2, 182)
+CV_REGISTER(ARM64_Q3, 183)
+CV_REGISTER(ARM64_Q4, 184)
+CV_REGISTER(ARM64_Q5, 185)
+CV_REGISTER(ARM64_Q6, 186)
+CV_REGISTER(ARM64_Q7, 187)
+CV_REGISTER(ARM64_Q8, 188)
+CV_REGISTER(ARM64_Q9, 189)
+CV_REGISTER(ARM64_Q10, 190)
+CV_REGISTER(ARM64_Q11, 191)
+CV_REGISTER(ARM64_Q12, 192)
+CV_REGISTER(ARM64_Q13, 193)
+CV_REGISTER(ARM64_Q14, 194)
+CV_REGISTER(ARM64_Q15, 195)
+CV_REGISTER(ARM64_Q16, 196)
+CV_REGISTER(ARM64_Q17, 197)
+CV_REGISTER(ARM64_Q18, 198)
+CV_REGISTER(ARM64_Q19, 199)
+CV_REGISTER(ARM64_Q20, 200)
+CV_REGISTER(ARM64_Q21, 201)
+CV_REGISTER(ARM64_Q22, 202)
+CV_REGISTER(ARM64_Q23, 203)
+CV_REGISTER(ARM64_Q24, 204)
+CV_REGISTER(ARM64_Q25, 205)
+CV_REGISTER(ARM64_Q26, 206)
+CV_REGISTER(ARM64_Q27, 207)
+CV_REGISTER(ARM64_Q28, 208)
+CV_REGISTER(ARM64_Q29, 209)
+CV_REGISTER(ARM64_Q30, 210)
+CV_REGISTER(ARM64_Q31, 211)
+
+// Floating point status register
+
+CV_REGISTER(ARM64_FPSR, 220)
+
+#endif // defined(CV_REGISTERS_ALL) || defined(CV_REGISTERS_ARM64)
diff --git a/include/llvm/DebugInfo/CodeView/CodeViewSymbols.def b/include/llvm/DebugInfo/CodeView/CodeViewSymbols.def
index b5f1cc0198dc..4f8ccfdd16af 100644
--- a/include/llvm/DebugInfo/CodeView/CodeViewSymbols.def
+++ b/include/llvm/DebugInfo/CodeView/CodeViewSymbols.def
@@ -1,9 +1,8 @@
//===-- CodeViewSymbols.def - All CodeView leaf types -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -103,7 +102,6 @@ CV_SYMBOL(S_LPROCIA64_ST , 0x1015)
CV_SYMBOL(S_GPROCIA64_ST , 0x1016)
CV_SYMBOL(S_LOCALSLOT_ST , 0x1017)
CV_SYMBOL(S_PARAMSLOT_ST , 0x1018)
-CV_SYMBOL(S_ANNOTATION , 0x1019)
CV_SYMBOL(S_GMANPROC_ST , 0x101a)
CV_SYMBOL(S_LMANPROC_ST , 0x101b)
CV_SYMBOL(S_RESERVED1 , 0x101c)
@@ -255,6 +253,7 @@ SYMBOL_RECORD(S_LTHREAD32 , 0x1112, ThreadLocalDataSym)
SYMBOL_RECORD_ALIAS(S_GTHREAD32 , 0x1113, GlobalTLS, ThreadLocalDataSym)
SYMBOL_RECORD(S_UNAMESPACE , 0x1124, UsingNamespaceSym)
+SYMBOL_RECORD(S_ANNOTATION , 0x1019, AnnotationSym)
#undef CV_SYMBOL
#undef SYMBOL_RECORD
diff --git a/include/llvm/DebugInfo/CodeView/CodeViewTypes.def b/include/llvm/DebugInfo/CodeView/CodeViewTypes.def
index e9a479dba496..a31111eb80a4 100644
--- a/include/llvm/DebugInfo/CodeView/CodeViewTypes.def
+++ b/include/llvm/DebugInfo/CodeView/CodeViewTypes.def
@@ -1,9 +1,8 @@
//===-- CodeViewTypes.def - All CodeView leaf types -------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/DebugInfo/CodeView/ContinuationRecordBuilder.h b/include/llvm/DebugInfo/CodeView/ContinuationRecordBuilder.h
index 7f851a2595dc..53ab2dd04aa7 100644
--- a/include/llvm/DebugInfo/CodeView/ContinuationRecordBuilder.h
+++ b/include/llvm/DebugInfo/CodeView/ContinuationRecordBuilder.h
@@ -1,9 +1,8 @@
//===- ContinuationRecordBuilder.h ------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -62,4 +61,4 @@ public:
} // namespace codeview
} // namespace llvm
-#endif \ No newline at end of file
+#endif
diff --git a/include/llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h b/include/llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h
index 78b284563afd..01f83676afdf 100644
--- a/include/llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h
+++ b/include/llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h
@@ -1,9 +1,8 @@
//===- DebugChecksumsSubsection.h -------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/CodeView/DebugCrossExSubsection.h b/include/llvm/DebugInfo/CodeView/DebugCrossExSubsection.h
index 2f9e9814d998..64a78a7cef21 100644
--- a/include/llvm/DebugInfo/CodeView/DebugCrossExSubsection.h
+++ b/include/llvm/DebugInfo/CodeView/DebugCrossExSubsection.h
@@ -1,9 +1,8 @@
//===- DebugCrossExSubsection.h ---------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/CodeView/DebugCrossImpSubsection.h b/include/llvm/DebugInfo/CodeView/DebugCrossImpSubsection.h
index 8be7ef265c82..e7683cb2a9c4 100644
--- a/include/llvm/DebugInfo/CodeView/DebugCrossImpSubsection.h
+++ b/include/llvm/DebugInfo/CodeView/DebugCrossImpSubsection.h
@@ -1,9 +1,8 @@
-//===- DebugCrossExSubsection.h ---------------------------------*- C++ -*-===//
+//===- DebugCrossImpSubsection.h --------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h b/include/llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h
index 847d93f0e985..d5cd640231f9 100644
--- a/include/llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h
+++ b/include/llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h
@@ -1,9 +1,8 @@
//===- DebugFrameDataSubsection.h ------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h b/include/llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h
index b88c0eae1de2..9fd88a64873a 100644
--- a/include/llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h
+++ b/include/llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h
@@ -1,9 +1,8 @@
//===- DebugInlineeLinesSubsection.h ----------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -71,6 +70,11 @@ public:
}
Error initialize(BinaryStreamReader Reader);
+ Error initialize(BinaryStreamRef Section) {
+ return initialize(BinaryStreamReader(Section));
+ }
+
+ bool valid() const { return Lines.valid(); }
bool hasExtraFiles() const;
Iterator begin() const { return Lines.begin(); }
@@ -78,7 +82,7 @@ public:
private:
InlineeLinesSignature Signature;
- VarStreamArray<InlineeSourceLine> Lines;
+ LinesArray Lines;
};
class DebugInlineeLinesSubsection final : public DebugSubsection {
diff --git a/include/llvm/DebugInfo/CodeView/DebugLinesSubsection.h b/include/llvm/DebugInfo/CodeView/DebugLinesSubsection.h
index 53044b6c3dc8..1f8e56c5311f 100644
--- a/include/llvm/DebugInfo/CodeView/DebugLinesSubsection.h
+++ b/include/llvm/DebugInfo/CodeView/DebugLinesSubsection.h
@@ -1,9 +1,8 @@
//===- DebugLinesSubsection.h -----------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/CodeView/DebugStringTableSubsection.h b/include/llvm/DebugInfo/CodeView/DebugStringTableSubsection.h
index bebc960223cc..6e5b8adddd4a 100644
--- a/include/llvm/DebugInfo/CodeView/DebugStringTableSubsection.h
+++ b/include/llvm/DebugInfo/CodeView/DebugStringTableSubsection.h
@@ -1,9 +1,8 @@
//===- DebugStringTableSubsection.h - CodeView String Table -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/CodeView/DebugSubsection.h b/include/llvm/DebugInfo/CodeView/DebugSubsection.h
index e427e0006a55..66272870efda 100644
--- a/include/llvm/DebugInfo/CodeView/DebugSubsection.h
+++ b/include/llvm/DebugInfo/CodeView/DebugSubsection.h
@@ -1,9 +1,8 @@
//===- DebugSubsection.h ------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/CodeView/DebugSubsectionRecord.h b/include/llvm/DebugInfo/CodeView/DebugSubsectionRecord.h
index fc0cf0d1d90e..bcb379f00d68 100644
--- a/include/llvm/DebugInfo/CodeView/DebugSubsectionRecord.h
+++ b/include/llvm/DebugInfo/CodeView/DebugSubsectionRecord.h
@@ -1,9 +1,8 @@
//===- DebugSubsectionRecord.h ----------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/CodeView/DebugSubsectionVisitor.h b/include/llvm/DebugInfo/CodeView/DebugSubsectionVisitor.h
index 75f749dfa933..720b1b49581f 100644
--- a/include/llvm/DebugInfo/CodeView/DebugSubsectionVisitor.h
+++ b/include/llvm/DebugInfo/CodeView/DebugSubsectionVisitor.h
@@ -1,9 +1,8 @@
//===- DebugSubsectionVisitor.h -----------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/CodeView/DebugSymbolRVASubsection.h b/include/llvm/DebugInfo/CodeView/DebugSymbolRVASubsection.h
index a4c04b55eb4c..91b740ce6b9a 100644
--- a/include/llvm/DebugInfo/CodeView/DebugSymbolRVASubsection.h
+++ b/include/llvm/DebugInfo/CodeView/DebugSymbolRVASubsection.h
@@ -1,9 +1,8 @@
//===- DebugSymbolRVASubsection.h -------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/CodeView/DebugSymbolsSubsection.h b/include/llvm/DebugInfo/CodeView/DebugSymbolsSubsection.h
index dfda7deb6cb4..784fc59484b9 100644
--- a/include/llvm/DebugInfo/CodeView/DebugSymbolsSubsection.h
+++ b/include/llvm/DebugInfo/CodeView/DebugSymbolsSubsection.h
@@ -1,9 +1,8 @@
//===- DebugSymbolsSubsection.h --------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/CodeView/DebugUnknownSubsection.h b/include/llvm/DebugInfo/CodeView/DebugUnknownSubsection.h
index ea9a96ca8d68..fa7df325499f 100644
--- a/include/llvm/DebugInfo/CodeView/DebugUnknownSubsection.h
+++ b/include/llvm/DebugInfo/CodeView/DebugUnknownSubsection.h
@@ -1,9 +1,8 @@
//===- DebugUnknownSubsection.h -----------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/CodeView/EnumTables.h b/include/llvm/DebugInfo/CodeView/EnumTables.h
index ee0f0f7c6023..ed126ed9e2ff 100644
--- a/include/llvm/DebugInfo/CodeView/EnumTables.h
+++ b/include/llvm/DebugInfo/CodeView/EnumTables.h
@@ -1,9 +1,8 @@
//===- EnumTables.h - Enum to string conversion tables ----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -21,7 +20,7 @@ namespace codeview {
ArrayRef<EnumEntry<SymbolKind>> getSymbolTypeNames();
ArrayRef<EnumEntry<TypeLeafKind>> getTypeLeafNames();
-ArrayRef<EnumEntry<uint16_t>> getRegisterNames();
+ArrayRef<EnumEntry<uint16_t>> getRegisterNames(CPUType Cpu);
ArrayRef<EnumEntry<uint32_t>> getPublicSymFlagNames();
ArrayRef<EnumEntry<uint8_t>> getProcSymFlagNames();
ArrayRef<EnumEntry<uint16_t>> getLocalFlagNames();
diff --git a/include/llvm/DebugInfo/CodeView/Formatters.h b/include/llvm/DebugInfo/CodeView/Formatters.h
index 278ad02a39cd..7d04a6a89bef 100644
--- a/include/llvm/DebugInfo/CodeView/Formatters.h
+++ b/include/llvm/DebugInfo/CodeView/Formatters.h
@@ -1,9 +1,8 @@
//===- Formatters.h ---------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/CodeView/FunctionId.h b/include/llvm/DebugInfo/CodeView/FunctionId.h
index 1af3da810b5a..bc102278819c 100644
--- a/include/llvm/DebugInfo/CodeView/FunctionId.h
+++ b/include/llvm/DebugInfo/CodeView/FunctionId.h
@@ -1,9 +1,8 @@
//===- FunctionId.h ---------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/CodeView/GUID.h b/include/llvm/DebugInfo/CodeView/GUID.h
index a055ce9e2e45..5f807e6f7eeb 100644
--- a/include/llvm/DebugInfo/CodeView/GUID.h
+++ b/include/llvm/DebugInfo/CodeView/GUID.h
@@ -1,9 +1,8 @@
//===- GUID.h ---------------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h b/include/llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h
index c4704168ed34..a43ce20edde6 100644
--- a/include/llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h
+++ b/include/llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h
@@ -1,9 +1,8 @@
//===- GlobalTypeTableBuilder.h ----------------------------------*- C++-*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -74,14 +73,30 @@ public:
CreateFunc Create) {
auto Result = HashedRecords.try_emplace(Hash, nextTypeIndex());
- if (LLVM_UNLIKELY(Result.second)) {
+ if (LLVM_UNLIKELY(Result.second /*inserted*/ ||
+ Result.first->second.isSimple())) {
uint8_t *Stable = RecordStorage.Allocate<uint8_t>(RecordSize);
MutableArrayRef<uint8_t> Data(Stable, RecordSize);
- SeenRecords.push_back(Create(Data));
+ ArrayRef<uint8_t> StableRecord = Create(Data);
+ if (StableRecord.empty()) {
+ // Records with forward references into the Type stream will be deferred
+ // for insertion at a later time, on the second pass.
+ Result.first->getSecond() = TypeIndex(SimpleTypeKind::NotTranslated);
+ return TypeIndex(SimpleTypeKind::NotTranslated);
+ }
+ if (Result.first->second.isSimple()) {
+ assert(Result.first->second.getIndex() ==
+ (uint32_t)SimpleTypeKind::NotTranslated);
+ // On the second pass, update with index to remapped record. The
+ // (initially misbehaved) record will now come *after* other records
+ // resolved in the first pass, with proper *back* references in the
+ // stream.
+ Result.first->second = nextTypeIndex();
+ }
+ SeenRecords.push_back(StableRecord);
SeenHashes.push_back(Hash);
}
- // Update the caller's copy of Record to point a stable copy.
return Result.first->second;
}
diff --git a/include/llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h b/include/llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h
index 383f7dd9fb6a..4e03627e9580 100644
--- a/include/llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h
+++ b/include/llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h
@@ -1,9 +1,8 @@
//===- LazyRandomTypeCollection.h -------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/CodeView/Line.h b/include/llvm/DebugInfo/CodeView/Line.h
index ac229c337513..eb2aa154df1b 100644
--- a/include/llvm/DebugInfo/CodeView/Line.h
+++ b/include/llvm/DebugInfo/CodeView/Line.h
@@ -1,9 +1,8 @@
//===- Line.h ---------------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h b/include/llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h
index 9030918ebbb3..1b2f6d29a9b6 100644
--- a/include/llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h
+++ b/include/llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h
@@ -1,9 +1,8 @@
//===- MergingTypeTableBuilder.h ---------------------------------*- C++-*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/CodeView/RecordName.h b/include/llvm/DebugInfo/CodeView/RecordName.h
index b022108df3d6..cc09db8933bd 100644
--- a/include/llvm/DebugInfo/CodeView/RecordName.h
+++ b/include/llvm/DebugInfo/CodeView/RecordName.h
@@ -1,9 +1,8 @@
//===- RecordName.h ------------------------------------------- *- C++ --*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/CodeView/RecordSerialization.h b/include/llvm/DebugInfo/CodeView/RecordSerialization.h
index 36237e1a4d9e..36c0f2fbd8fa 100644
--- a/include/llvm/DebugInfo/CodeView/RecordSerialization.h
+++ b/include/llvm/DebugInfo/CodeView/RecordSerialization.h
@@ -1,9 +1,8 @@
//===- RecordSerialization.h ------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -32,6 +31,9 @@ using llvm::support::ulittle32_t;
enum : unsigned { MaxRecordLength = 0xFF00 };
struct RecordPrefix {
+ RecordPrefix() = default;
+ explicit RecordPrefix(uint16_t Kind) : RecordLen(2), RecordKind(Kind) {}
+
ulittle16_t RecordLen; // Record length, starting from &RecordKind.
ulittle16_t RecordKind; // Record kind enum (SymRecordKind or TypeRecordKind)
};
diff --git a/include/llvm/DebugInfo/CodeView/SimpleTypeSerializer.h b/include/llvm/DebugInfo/CodeView/SimpleTypeSerializer.h
index a85d9270186b..3ca09b445a30 100644
--- a/include/llvm/DebugInfo/CodeView/SimpleTypeSerializer.h
+++ b/include/llvm/DebugInfo/CodeView/SimpleTypeSerializer.h
@@ -1,9 +1,8 @@
//===- SimpleTypeSerializer.h -----------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/CodeView/StringsAndChecksums.h b/include/llvm/DebugInfo/CodeView/StringsAndChecksums.h
index 22a333e631a0..22a283e785e1 100644
--- a/include/llvm/DebugInfo/CodeView/StringsAndChecksums.h
+++ b/include/llvm/DebugInfo/CodeView/StringsAndChecksums.h
@@ -1,9 +1,8 @@
//===- StringsAndChecksums.h ------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/CodeView/SymbolDeserializer.h b/include/llvm/DebugInfo/CodeView/SymbolDeserializer.h
index 6b5dd2d20d17..62761cb87c81 100644
--- a/include/llvm/DebugInfo/CodeView/SymbolDeserializer.h
+++ b/include/llvm/DebugInfo/CodeView/SymbolDeserializer.h
@@ -1,9 +1,8 @@
//===- SymbolDeserializer.h -------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/CodeView/SymbolDumpDelegate.h b/include/llvm/DebugInfo/CodeView/SymbolDumpDelegate.h
index 823636c398de..12f45dcb21ff 100644
--- a/include/llvm/DebugInfo/CodeView/SymbolDumpDelegate.h
+++ b/include/llvm/DebugInfo/CodeView/SymbolDumpDelegate.h
@@ -1,9 +1,8 @@
//===-- SymbolDumpDelegate.h ------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/CodeView/SymbolDumper.h b/include/llvm/DebugInfo/CodeView/SymbolDumper.h
index 215da2e2b522..d832a48b1265 100644
--- a/include/llvm/DebugInfo/CodeView/SymbolDumper.h
+++ b/include/llvm/DebugInfo/CodeView/SymbolDumper.h
@@ -1,9 +1,8 @@
//===-- SymbolDumper.h - CodeView symbol info dumper ------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/CodeView/SymbolRecord.h b/include/llvm/DebugInfo/CodeView/SymbolRecord.h
index b58825c4a788..5e9a7432b9b6 100644
--- a/include/llvm/DebugInfo/CodeView/SymbolRecord.h
+++ b/include/llvm/DebugInfo/CodeView/SymbolRecord.h
@@ -1,9 +1,8 @@
//===- SymbolRecord.h -------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -14,6 +13,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/DebugInfo/CodeView/CVRecord.h"
#include "llvm/DebugInfo/CodeView/CodeView.h"
@@ -156,15 +156,19 @@ public:
uint32_t RecordOffset;
};
-struct BinaryAnnotationIterator {
- struct AnnotationData {
- BinaryAnnotationsOpCode OpCode;
- StringRef Name;
- uint32_t U1;
- uint32_t U2;
- int32_t S1;
- };
+struct DecodedAnnotation {
+ StringRef Name;
+ ArrayRef<uint8_t> Bytes;
+ BinaryAnnotationsOpCode OpCode;
+ uint32_t U1 = 0;
+ uint32_t U2 = 0;
+ int32_t S1 = 0;
+};
+struct BinaryAnnotationIterator
+ : public iterator_facade_base<BinaryAnnotationIterator,
+ std::forward_iterator_tag,
+ DecodedAnnotation> {
BinaryAnnotationIterator() = default;
BinaryAnnotationIterator(ArrayRef<uint8_t> Annotations) : Data(Annotations) {}
BinaryAnnotationIterator(const BinaryAnnotationIterator &Other)
@@ -174,10 +178,6 @@ struct BinaryAnnotationIterator {
return Data == Other.Data;
}
- bool operator!=(const BinaryAnnotationIterator &Other) const {
- return !(*this == Other);
- }
-
BinaryAnnotationIterator &operator=(const BinaryAnnotationIterator Other) {
Data = Other.Data;
return *this;
@@ -194,13 +194,7 @@ struct BinaryAnnotationIterator {
return *this;
}
- BinaryAnnotationIterator operator++(int) {
- BinaryAnnotationIterator Orig(*this);
- ++(*this);
- return Orig;
- }
-
- const AnnotationData &operator*() {
+ const DecodedAnnotation &operator*() {
ParseCurrentAnnotation();
return Current.getValue();
}
@@ -242,17 +236,17 @@ private:
(ThirdByte << 8) | FourthByte;
return -1;
- };
+ }
static int32_t DecodeSignedOperand(uint32_t Operand) {
if (Operand & 1)
return -(Operand >> 1);
return Operand >> 1;
- };
+ }
static int32_t DecodeSignedOperand(ArrayRef<uint8_t> &Annotations) {
return DecodeSignedOperand(GetCompressedAnnotation(Annotations));
- };
+ }
bool ParseCurrentAnnotation() {
if (Current.hasValue())
@@ -260,7 +254,7 @@ private:
Next = Data;
uint32_t Op = GetCompressedAnnotation(Next);
- AnnotationData Result;
+ DecodedAnnotation Result;
Result.OpCode = static_cast<BinaryAnnotationsOpCode>(Op);
switch (Result.OpCode) {
case BinaryAnnotationsOpCode::Invalid:
@@ -325,11 +319,12 @@ private:
break;
}
}
+ Result.Bytes = Data.take_front(Data.size() - Next.size());
Current = Result;
return true;
}
- Optional<AnnotationData> Current;
+ Optional<DecodedAnnotation> Current;
ArrayRef<uint8_t> Data;
ArrayRef<uint8_t> Next;
};
@@ -974,7 +969,7 @@ class UsingNamespaceSym : public SymbolRecord {
public:
explicit UsingNamespaceSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {}
explicit UsingNamespaceSym(uint32_t RecordOffset)
- : SymbolRecord(SymbolRecordKind::RegRelativeSym),
+ : SymbolRecord(SymbolRecordKind::UsingNamespaceSym),
RecordOffset(RecordOffset) {}
StringRef Name;
@@ -983,6 +978,19 @@ public:
};
// S_ANNOTATION
+class AnnotationSym : public SymbolRecord {
+public:
+ explicit AnnotationSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {}
+ explicit AnnotationSym(uint32_t RecordOffset)
+ : SymbolRecord(SymbolRecordKind::AnnotationSym),
+ RecordOffset(RecordOffset) {}
+
+ uint32_t CodeOffset = 0;
+ uint16_t Segment = 0;
+ std::vector<StringRef> Strings;
+
+ uint32_t RecordOffset;
+};
using CVSymbol = CVRecord<SymbolKind>;
using CVSymbolArray = VarStreamArray<CVSymbol>;
diff --git a/include/llvm/DebugInfo/CodeView/SymbolRecordHelpers.h b/include/llvm/DebugInfo/CodeView/SymbolRecordHelpers.h
index 3713fe118eaa..57dbc56c0769 100644
--- a/include/llvm/DebugInfo/CodeView/SymbolRecordHelpers.h
+++ b/include/llvm/DebugInfo/CodeView/SymbolRecordHelpers.h
@@ -1,9 +1,8 @@
//===- SymbolRecordHelpers.h ------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/CodeView/SymbolRecordMapping.h b/include/llvm/DebugInfo/CodeView/SymbolRecordMapping.h
index 391e8f127665..34368b6185d6 100644
--- a/include/llvm/DebugInfo/CodeView/SymbolRecordMapping.h
+++ b/include/llvm/DebugInfo/CodeView/SymbolRecordMapping.h
@@ -1,9 +1,8 @@
//===- SymbolRecordMapping.h ------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/CodeView/SymbolSerializer.h b/include/llvm/DebugInfo/CodeView/SymbolSerializer.h
index f4d8ab0c3c2e..b805b6595e80 100644
--- a/include/llvm/DebugInfo/CodeView/SymbolSerializer.h
+++ b/include/llvm/DebugInfo/CodeView/SymbolSerializer.h
@@ -1,9 +1,8 @@
//===- SymbolSerializer.h ---------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -52,8 +51,8 @@ public:
template <typename SymType>
static CVSymbol writeOneSymbol(SymType &Sym, BumpPtrAllocator &Storage,
CodeViewContainer Container) {
- CVSymbol Result;
- Result.Type = static_cast<SymbolKind>(Sym.Kind);
+ RecordPrefix Prefix{uint16_t(Sym.Kind)};
+ CVSymbol Result(&Prefix, sizeof(Prefix));
SymbolSerializer Serializer(Storage, Container);
consumeError(Serializer.visitSymbolBegin(Result));
consumeError(Serializer.visitKnownRecord(Result, Sym));
diff --git a/include/llvm/DebugInfo/CodeView/SymbolVisitorCallbackPipeline.h b/include/llvm/DebugInfo/CodeView/SymbolVisitorCallbackPipeline.h
index e29511a67b7f..145d63a6fe61 100644
--- a/include/llvm/DebugInfo/CodeView/SymbolVisitorCallbackPipeline.h
+++ b/include/llvm/DebugInfo/CodeView/SymbolVisitorCallbackPipeline.h
@@ -1,9 +1,8 @@
//===- SymbolVisitorCallbackPipeline.h --------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/CodeView/SymbolVisitorCallbacks.h b/include/llvm/DebugInfo/CodeView/SymbolVisitorCallbacks.h
index 0816f7c62656..1a4d5b9d31df 100644
--- a/include/llvm/DebugInfo/CodeView/SymbolVisitorCallbacks.h
+++ b/include/llvm/DebugInfo/CodeView/SymbolVisitorCallbacks.h
@@ -1,9 +1,8 @@
//===- SymbolVisitorCallbacks.h ---------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/CodeView/SymbolVisitorDelegate.h b/include/llvm/DebugInfo/CodeView/SymbolVisitorDelegate.h
index a2a3c6f18fba..368d8b288315 100644
--- a/include/llvm/DebugInfo/CodeView/SymbolVisitorDelegate.h
+++ b/include/llvm/DebugInfo/CodeView/SymbolVisitorDelegate.h
@@ -1,9 +1,8 @@
//===-- SymbolVisitorDelegate.h ---------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/CodeView/TypeCollection.h b/include/llvm/DebugInfo/CodeView/TypeCollection.h
index e9fc9b0de8ef..58b1dd058c1a 100644
--- a/include/llvm/DebugInfo/CodeView/TypeCollection.h
+++ b/include/llvm/DebugInfo/CodeView/TypeCollection.h
@@ -1,9 +1,8 @@
//===- TypeCollection.h - A collection of CodeView type records -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/CodeView/TypeDeserializer.h b/include/llvm/DebugInfo/CodeView/TypeDeserializer.h
index 9887d901773a..081de32dd02c 100644
--- a/include/llvm/DebugInfo/CodeView/TypeDeserializer.h
+++ b/include/llvm/DebugInfo/CodeView/TypeDeserializer.h
@@ -1,9 +1,8 @@
//===- TypeDeserializer.h ---------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -59,7 +58,7 @@ public:
TypeRecordKind K =
static_cast<TypeRecordKind>(uint16_t(Prefix->RecordKind));
T Record(K);
- CVType CVT(static_cast<TypeLeafKind>(K), Data);
+ CVType CVT(Data);
if (auto EC = deserializeAs<T>(CVT, Record))
return std::move(EC);
return Record;
@@ -112,14 +111,14 @@ class FieldListDeserializer : public TypeVisitorCallbacks {
public:
explicit FieldListDeserializer(BinaryStreamReader &Reader) : Mapping(Reader) {
- CVType FieldList;
- FieldList.Type = TypeLeafKind::LF_FIELDLIST;
+ RecordPrefix Pre(static_cast<uint16_t>(TypeLeafKind::LF_FIELDLIST));
+ CVType FieldList(&Pre, sizeof(Pre));
consumeError(Mapping.Mapping.visitTypeBegin(FieldList));
}
~FieldListDeserializer() override {
- CVType FieldList;
- FieldList.Type = TypeLeafKind::LF_FIELDLIST;
+ RecordPrefix Pre(static_cast<uint16_t>(TypeLeafKind::LF_FIELDLIST));
+ CVType FieldList(&Pre, sizeof(Pre));
consumeError(Mapping.Mapping.visitTypeEnd(FieldList));
}
diff --git a/include/llvm/DebugInfo/CodeView/TypeDumpVisitor.h b/include/llvm/DebugInfo/CodeView/TypeDumpVisitor.h
index afb8b3636361..41a219ae5a7b 100644
--- a/include/llvm/DebugInfo/CodeView/TypeDumpVisitor.h
+++ b/include/llvm/DebugInfo/CodeView/TypeDumpVisitor.h
@@ -1,9 +1,8 @@
//===-- TypeDumpVisitor.h - CodeView type info dumper -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/CodeView/TypeHashing.h b/include/llvm/DebugInfo/CodeView/TypeHashing.h
index 1f732d29a538..b0a16cccbff3 100644
--- a/include/llvm/DebugInfo/CodeView/TypeHashing.h
+++ b/include/llvm/DebugInfo/CodeView/TypeHashing.h
@@ -1,9 +1,8 @@
//===- TypeHashing.h ---------------------------------------------*- C++-*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -85,6 +84,8 @@ struct GloballyHashedType {
}
std::array<uint8_t, 8> Hash;
+ bool empty() const { return *(const uint64_t*)Hash.data() == 0; }
+
/// Given a sequence of bytes representing a record, compute a global hash for
/// this record. Due to the nature of global hashes incorporating the hashes
/// of referenced records, this function requires a list of types and ids
@@ -108,8 +109,33 @@ struct GloballyHashedType {
template <typename Range>
static std::vector<GloballyHashedType> hashTypes(Range &&Records) {
std::vector<GloballyHashedType> Hashes;
- for (const auto &R : Records)
- Hashes.push_back(hashType(R, Hashes, Hashes));
+ bool UnresolvedRecords = false;
+ for (const auto &R : Records) {
+ GloballyHashedType H = hashType(R, Hashes, Hashes);
+ if (H.empty())
+ UnresolvedRecords = true;
+ Hashes.push_back(H);
+ }
+
+ // In some rare cases, there might be records with forward references in the
+ // stream. Several passes might be needed to fully hash each record in the
+ // Type stream. However this occurs on very small OBJs generated by MASM,
+ // with a dozen records at most. Therefore this codepath isn't
+ // time-critical, as it isn't taken in 99% of cases.
+ while (UnresolvedRecords) {
+ UnresolvedRecords = false;
+ auto HashIt = Hashes.begin();
+ for (const auto &R : Records) {
+ if (HashIt->empty()) {
+ GloballyHashedType H = hashType(R, Hashes, Hashes);
+ if (H.empty())
+ UnresolvedRecords = true;
+ else
+ *HashIt = H;
+ }
+ ++HashIt;
+ }
+ }
return Hashes;
}
diff --git a/include/llvm/DebugInfo/CodeView/TypeIndex.h b/include/llvm/DebugInfo/CodeView/TypeIndex.h
index 58463a6b13df..b9e2562bfc2b 100644
--- a/include/llvm/DebugInfo/CodeView/TypeIndex.h
+++ b/include/llvm/DebugInfo/CodeView/TypeIndex.h
@@ -1,9 +1,8 @@
//===- TypeIndex.h ----------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/CodeView/TypeIndexDiscovery.h b/include/llvm/DebugInfo/CodeView/TypeIndexDiscovery.h
index c424a09ece89..469768787274 100644
--- a/include/llvm/DebugInfo/CodeView/TypeIndexDiscovery.h
+++ b/include/llvm/DebugInfo/CodeView/TypeIndexDiscovery.h
@@ -1,9 +1,8 @@
//===- TypeIndexDiscovery.h -------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/CodeView/TypeRecord.h b/include/llvm/DebugInfo/CodeView/TypeRecord.h
index 7b4a30ee622d..b147dd6c3d05 100644
--- a/include/llvm/DebugInfo/CodeView/TypeRecord.h
+++ b/include/llvm/DebugInfo/CodeView/TypeRecord.h
@@ -1,9 +1,8 @@
//===- TypeRecord.h ---------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/CodeView/TypeRecordHelpers.h b/include/llvm/DebugInfo/CodeView/TypeRecordHelpers.h
index 389472ed1aea..e84704d99ddc 100644
--- a/include/llvm/DebugInfo/CodeView/TypeRecordHelpers.h
+++ b/include/llvm/DebugInfo/CodeView/TypeRecordHelpers.h
@@ -1,9 +1,8 @@
//===- TypeRecordHelpers.h --------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/CodeView/TypeRecordMapping.h b/include/llvm/DebugInfo/CodeView/TypeRecordMapping.h
index cbe8d6066bb9..4c309c10ff0c 100644
--- a/include/llvm/DebugInfo/CodeView/TypeRecordMapping.h
+++ b/include/llvm/DebugInfo/CodeView/TypeRecordMapping.h
@@ -1,9 +1,8 @@
//===- TypeRecordMapping.h --------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -24,9 +23,11 @@ class TypeRecordMapping : public TypeVisitorCallbacks {
public:
explicit TypeRecordMapping(BinaryStreamReader &Reader) : IO(Reader) {}
explicit TypeRecordMapping(BinaryStreamWriter &Writer) : IO(Writer) {}
+ explicit TypeRecordMapping(CodeViewRecordStreamer &Streamer) : IO(Streamer) {}
using TypeVisitorCallbacks::visitTypeBegin;
Error visitTypeBegin(CVType &Record) override;
+ Error visitTypeBegin(CVType &Record, TypeIndex Index) override;
Error visitTypeEnd(CVType &Record) override;
Error visitMemberBegin(CVMemberRecord &Record) override;
diff --git a/include/llvm/DebugInfo/CodeView/TypeStreamMerger.h b/include/llvm/DebugInfo/CodeView/TypeStreamMerger.h
index 0b9f54ec60bf..d0506cce8176 100644
--- a/include/llvm/DebugInfo/CodeView/TypeStreamMerger.h
+++ b/include/llvm/DebugInfo/CodeView/TypeStreamMerger.h
@@ -1,9 +1,8 @@
//===- TypeStreamMerger.h ---------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/CodeView/TypeSymbolEmitter.h b/include/llvm/DebugInfo/CodeView/TypeSymbolEmitter.h
index dfba83d62fce..4f2e5deb10b4 100644
--- a/include/llvm/DebugInfo/CodeView/TypeSymbolEmitter.h
+++ b/include/llvm/DebugInfo/CodeView/TypeSymbolEmitter.h
@@ -1,9 +1,8 @@
//===- TypeSymbolEmitter.h --------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/CodeView/TypeTableCollection.h b/include/llvm/DebugInfo/CodeView/TypeTableCollection.h
index 80326a0ffd39..5cbe3400e029 100644
--- a/include/llvm/DebugInfo/CodeView/TypeTableCollection.h
+++ b/include/llvm/DebugInfo/CodeView/TypeTableCollection.h
@@ -1,9 +1,8 @@
//===- TypeTableCollection.h ---------------------------------- *- C++ --*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h b/include/llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h
index 126fb8abb0da..169715be2d52 100644
--- a/include/llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h
+++ b/include/llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h
@@ -1,9 +1,8 @@
//===- TypeVisitorCallbackPipeline.h ----------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -83,6 +82,11 @@ public:
Pipeline.push_back(&Callbacks);
}
+ void addCallbackToPipelineFront(TypeVisitorCallbacks &Callbacks) {
+ auto CallBackItr = Pipeline.begin();
+ Pipeline.insert(CallBackItr, &Callbacks);
+ }
+
#define TYPE_RECORD(EnumName, EnumVal, Name) \
Error visitKnownRecord(CVType &CVR, Name##Record &Record) override { \
return visitKnownRecordImpl(CVR, Record); \
diff --git a/include/llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h b/include/llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h
index d7a473306bc2..33f8b1f24b1b 100644
--- a/include/llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h
+++ b/include/llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h
@@ -1,9 +1,8 @@
//===- TypeVisitorCallbacks.h -----------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/DIContext.h b/include/llvm/DebugInfo/DIContext.h
index 85e96402a246..d2a5318179eb 100644
--- a/include/llvm/DebugInfo/DIContext.h
+++ b/include/llvm/DebugInfo/DIContext.h
@@ -1,9 +1,8 @@
//===- DIContext.h ----------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -98,11 +97,10 @@ public:
void addFrame(const DILineInfo &Frame) {
Frames.push_back(Frame);
}
-
+
void resize(unsigned i) {
Frames.resize(i);
}
-
};
/// Container for description of a global variable.
@@ -114,6 +112,16 @@ struct DIGlobal {
DIGlobal() : Name("<invalid>") {}
};
+struct DILocal {
+ std::string FunctionName;
+ std::string Name;
+ std::string DeclFile;
+ uint64_t DeclLine = 0;
+ Optional<int64_t> FrameOffset;
+ Optional<uint64_t> Size;
+ Optional<uint64_t> TagOffset;
+};
+
/// A DINameKind is passed to name search methods to specify a
/// preference regarding the type of name resolution the caller wants.
enum class DINameKind { None, ShortName, LinkageName };
@@ -158,7 +166,8 @@ enum DIDumpType : unsigned {
/// dumped.
struct DIDumpOptions {
unsigned DumpType = DIDT_All;
- unsigned RecurseDepth = -1U;
+ unsigned ChildRecurseDepth = -1U;
+ unsigned ParentRecurseDepth = -1U;
uint16_t Version = 0; // DWARF version to assume when extracting.
uint8_t AddrSize = 4; // Address byte size to assume when extracting.
bool ShowAddresses = true;
@@ -172,15 +181,18 @@ struct DIDumpOptions {
/// Return default option set for printing a single DIE without children.
static DIDumpOptions getForSingleDIE() {
DIDumpOptions Opts;
- Opts.RecurseDepth = 0;
+ Opts.ChildRecurseDepth = 0;
+ Opts.ParentRecurseDepth = 0;
return Opts;
}
/// Return the options with RecurseDepth set to 0 unless explicitly required.
DIDumpOptions noImplicitRecursion() const {
DIDumpOptions Opts = *this;
- if (RecurseDepth == -1U && !ShowChildren)
- Opts.RecurseDepth = 0;
+ if (ChildRecurseDepth == -1U && !ShowChildren)
+ Opts.ChildRecurseDepth = 0;
+ if (ParentRecurseDepth == -1U && !ShowParents)
+ Opts.ParentRecurseDepth = 0;
return Opts;
}
};
@@ -204,12 +216,18 @@ public:
return true;
}
- virtual DILineInfo getLineInfoForAddress(uint64_t Address,
+ virtual DILineInfo getLineInfoForAddress(
+ object::SectionedAddress Address,
DILineInfoSpecifier Specifier = DILineInfoSpecifier()) = 0;
- virtual DILineInfoTable getLineInfoForAddressRange(uint64_t Address,
- uint64_t Size, DILineInfoSpecifier Specifier = DILineInfoSpecifier()) = 0;
- virtual DIInliningInfo getInliningInfoForAddress(uint64_t Address,
+ virtual DILineInfoTable getLineInfoForAddressRange(
+ object::SectionedAddress Address, uint64_t Size,
DILineInfoSpecifier Specifier = DILineInfoSpecifier()) = 0;
+ virtual DIInliningInfo getInliningInfoForAddress(
+ object::SectionedAddress Address,
+ DILineInfoSpecifier Specifier = DILineInfoSpecifier()) = 0;
+
+ virtual std::vector<DILocal>
+ getLocalsForAddress(object::SectionedAddress Address) = 0;
private:
const DIContextKind Kind;
diff --git a/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h b/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h
index 84b23398b8cc..ccf2891c2e21 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h
@@ -1,9 +1,8 @@
//===- DWARFAbbreviationDeclaration.h ---------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h b/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h
index 1d448728338f..303375703d2e 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h
@@ -1,9 +1,8 @@
//===- DWARFAcceleratorTable.h ----------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -72,7 +71,7 @@ public:
: AccelSection(AccelSection), StringSection(StringSection) {}
virtual ~DWARFAcceleratorTable();
- virtual llvm::Error extract() = 0;
+ virtual Error extract() = 0;
virtual void dump(raw_ostream &OS) const = 0;
DWARFAcceleratorTable(const DWARFAcceleratorTable &) = delete;
@@ -175,7 +174,7 @@ public:
DataExtractor StringSection)
: DWARFAcceleratorTable(AccelSection, StringSection) {}
- llvm::Error extract() override;
+ Error extract() override;
uint32_t getNumBuckets();
uint32_t getNumHashes();
uint32_t getSizeHdr();
@@ -223,7 +222,7 @@ public:
/// referenced by the name table and interpreted with the help of the
/// abbreviation table.
class DWARFDebugNames : public DWARFAcceleratorTable {
- /// The fixed-size part of a Dwarf 5 Name Index header
+ /// The fixed-size part of a DWARF v5 Name Index header
struct HeaderPOD {
uint32_t UnitLength;
uint16_t Version;
@@ -242,7 +241,7 @@ public:
class NameIterator;
class ValueIterator;
- /// Dwarf 5 Name Index header.
+ /// DWARF v5 Name Index header.
struct Header : public HeaderPOD {
SmallString<8> AugmentationString;
@@ -349,7 +348,7 @@ private:
};
public:
- /// A single entry in the Name Table (Dwarf 5 sect. 6.1.1.4.6) of the Name
+ /// A single entry in the Name Table (DWARF v5 sect. 6.1.1.4.6) of the Name
/// Index.
class NameTableEntry {
DataExtractor StrData;
@@ -381,7 +380,7 @@ public:
uint32_t getEntryOffset() const { return EntryOffset; }
};
- /// Represents a single accelerator table within the Dwarf 5 .debug_names
+ /// Represents a single accelerator table within the DWARF v5 .debug_names
/// section.
class NameIndex {
DenseSet<Abbrev, AbbrevMapInfo> Abbrevs;
@@ -460,7 +459,7 @@ public:
NameIterator begin() const { return NameIterator(this, 1); }
NameIterator end() const { return NameIterator(this, getNameCount() + 1); }
- llvm::Error extract();
+ Error extract();
uint32_t getUnitOffset() const { return Base; }
uint32_t getNextUnitOffset() const { return Base + 4 + Hdr.UnitLength; }
void dump(ScopedPrinter &W) const;
@@ -580,7 +579,7 @@ public:
DataExtractor StringSection)
: DWARFAcceleratorTable(AccelSection, StringSection) {}
- llvm::Error extract() override;
+ Error extract() override;
void dump(raw_ostream &OS) const override;
/// Look up all entries in the accelerator table matching \c Key.
diff --git a/include/llvm/DebugInfo/DWARF/DWARFAddressRange.h b/include/llvm/DebugInfo/DWARF/DWARFAddressRange.h
index 5a7df5c353e8..2d5f9f3c7658 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFAddressRange.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFAddressRange.h
@@ -1,9 +1,8 @@
//===- DWARFAddressRange.h --------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -43,12 +42,6 @@ struct DWARFAddressRange {
return LowPC < RHS.HighPC && RHS.LowPC < HighPC;
}
- /// Returns true if [LowPC, HighPC) fully contains [RHS.LowPC, RHS.HighPC).
- bool contains(const DWARFAddressRange &RHS) const {
- assert(valid() && RHS.valid());
- return LowPC <= RHS.LowPC && RHS.HighPC <= HighPC;
- }
-
void dump(raw_ostream &OS, uint32_t AddressSize,
DIDumpOptions DumpOpts = {}) const;
};
diff --git a/include/llvm/DebugInfo/DWARF/DWARFAttribute.h b/include/llvm/DebugInfo/DWARF/DWARFAttribute.h
index f0672bb0ca75..c8ad19ad6bf6 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFAttribute.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFAttribute.h
@@ -1,9 +1,8 @@
//===- DWARFAttribute.h -----------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -28,13 +27,10 @@ struct DWARFAttribute {
/// The debug info/types section byte size of the data for this attribute.
uint32_t ByteSize = 0;
/// The attribute enumeration of this attribute.
- dwarf::Attribute Attr;
+ dwarf::Attribute Attr = dwarf::Attribute(0);
/// The form and value for this attribute.
DWARFFormValue Value;
- DWARFAttribute(uint32_t O, dwarf::Attribute A = dwarf::Attribute(0),
- dwarf::Form F = dwarf::Form(0)) : Attr(A), Value(F) {}
-
bool isValid() const {
return Offset != 0 && Attr != dwarf::Attribute(0);
}
@@ -43,12 +39,9 @@ struct DWARFAttribute {
return isValid();
}
- void clear() {
- Offset = 0;
- ByteSize = 0;
- Attr = dwarf::Attribute(0);
- Value = DWARFFormValue();
- }
+ /// Identifies DWARF attributes that may contain a reference to a
+ /// DWARF expression.
+ static bool mayHaveLocationDescription(dwarf::Attribute Attr);
};
} // end namespace llvm
diff --git a/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h b/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h
index 33797419a7b8..16b9bfb5de56 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h
@@ -1,9 +1,8 @@
//===- DWARFCompileUnit.h ---------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/DWARF/DWARFContext.h b/include/llvm/DebugInfo/DWARF/DWARFContext.h
index dbb6be04544b..23cf21c3523f 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFContext.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFContext.h
@@ -1,9 +1,8 @@
//===- DWARFContext.h -------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===/
@@ -318,15 +317,23 @@ public:
/// Get the compilation unit, the function DIE and lexical block DIE for the
/// given address where applicable.
+ /// TODO: change input parameter from "uint64_t Address"
+ /// into "SectionedAddress Address"
DIEsForAddress getDIEsForAddress(uint64_t Address);
- DILineInfo getLineInfoForAddress(uint64_t Address,
+ DILineInfo getLineInfoForAddress(
+ object::SectionedAddress Address,
DILineInfoSpecifier Specifier = DILineInfoSpecifier()) override;
- DILineInfoTable getLineInfoForAddressRange(uint64_t Address, uint64_t Size,
+ DILineInfoTable getLineInfoForAddressRange(
+ object::SectionedAddress Address, uint64_t Size,
DILineInfoSpecifier Specifier = DILineInfoSpecifier()) override;
- DIInliningInfo getInliningInfoForAddress(uint64_t Address,
+ DIInliningInfo getInliningInfoForAddress(
+ object::SectionedAddress Address,
DILineInfoSpecifier Specifier = DILineInfoSpecifier()) override;
+ std::vector<DILocal>
+ getLocalsForAddress(object::SectionedAddress Address) override;
+
bool isLittleEndian() const { return DObj->isLittleEndian(); }
static bool isSupportedVersion(unsigned version) {
return version == 2 || version == 3 || version == 4 || version == 5;
@@ -367,7 +374,11 @@ public:
private:
/// Return the compile unit which contains instruction with provided
/// address.
+ /// TODO: change input parameter from "uint64_t Address"
+ /// into "SectionedAddress Address"
DWARFCompileUnit *getCompileUnitForAddress(uint64_t Address);
+ void addLocalsForDie(DWARFCompileUnit *CU, DWARFDie Subprogram, DWARFDie Die,
+ std::vector<DILocal> &Result);
};
} // end namespace llvm
diff --git a/include/llvm/DebugInfo/DWARF/DWARFDataExtractor.h b/include/llvm/DebugInfo/DWARF/DWARFDataExtractor.h
index 1ed087520b30..7c2a159b71fa 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFDataExtractor.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFDataExtractor.h
@@ -1,9 +1,8 @@
//===- DWARFDataExtractor.h -------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h b/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h
index d277ec382ba5..28fd8484b4a9 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h
@@ -1,9 +1,8 @@
//===- DWARFDebugAbbrev.h ---------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugAddr.h b/include/llvm/DebugInfo/DWARF/DWARFDebugAddr.h
index ffbd1b06d1e2..a98bf282fe7c 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFDebugAddr.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFDebugAddr.h
@@ -1,9 +1,8 @@
//===- DWARFDebugAddr.h -------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h b/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h
index ab46fac39f7c..5b6c578bc3bf 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h
@@ -1,9 +1,8 @@
//===- DWARFDebugArangeSet.h ------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugAranges.h b/include/llvm/DebugInfo/DWARF/DWARFDebugAranges.h
index ea71a50f3270..03223fbc80a9 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFDebugAranges.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFDebugAranges.h
@@ -1,9 +1,8 @@
//===- DWARFDebugAranges.h --------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -50,10 +49,6 @@ private:
return -1ULL;
}
- bool containsAddress(uint64_t Address) const {
- return LowPC <= Address && Address < HighPC();
- }
-
bool operator<(const Range &other) const {
return LowPC < other.LowPC;
}
diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h b/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h
index 7dc07d774aba..d960f4bc9b1c 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h
@@ -1,9 +1,8 @@
//===- DWARFDebugFrame.h - Parsing of .debug_frame --------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h b/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h
index 88c8f57bc33c..f50063b24370 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h
@@ -1,9 +1,8 @@
//===- DWARFDebugInfoEntry.h ------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h b/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
index d50af5a057f1..e7425c192373 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
@@ -1,9 +1,8 @@
//===- DWARFDebugLine.h -----------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -122,6 +121,17 @@ public:
return LineBase + (int8_t)LineRange - 1;
}
+ /// Get DWARF-version aware access to the file name entry at the provided
+ /// index.
+ const llvm::DWARFDebugLine::FileNameEntry &
+ getFileNameEntry(uint64_t Index) const;
+
+ bool hasFileAtIndex(uint64_t FileIndex) const;
+
+ bool getFileNameByIndex(uint64_t FileIndex, StringRef CompDir,
+ DILineInfoSpecifier::FileLineInfoKind Kind,
+ std::string &Result) const;
+
void clear();
void dump(raw_ostream &OS, DIDumpOptions DumpOptions) const;
Error parse(const DWARFDataExtractor &DebugLineData, uint32_t *OffsetPtr,
@@ -140,12 +150,16 @@ public:
static void dumpTableHeader(raw_ostream &OS);
static bool orderByAddress(const Row &LHS, const Row &RHS) {
- return LHS.Address < RHS.Address;
+ return std::tie(LHS.Address.SectionIndex, LHS.Address.Address) <
+ std::tie(RHS.Address.SectionIndex, RHS.Address.Address);
}
/// The program-counter value corresponding to a machine instruction
- /// generated by the compiler.
- uint64_t Address;
+ /// generated by the compiler and section index pointing to the section
+ /// containg this PC. If relocation information is present then section
+ /// index is the index of the section which contains above address.
+ /// Otherwise this is object::SectionedAddress::Undef value.
+ object::SectionedAddress Address;
/// An unsigned integer indicating a source line number. Lines are numbered
/// beginning at 1. The compiler may emit the value 0 in cases where an
/// instruction cannot be attributed to any source line.
@@ -193,21 +207,29 @@ public:
/// and is described by line table rows [FirstRowIndex, LastRowIndex).
uint64_t LowPC;
uint64_t HighPC;
+ /// If relocation information is present then this is the index of the
+ /// section which contains above addresses. Otherwise this is
+ /// object::SectionedAddress::Undef value.
+ uint64_t SectionIndex;
unsigned FirstRowIndex;
unsigned LastRowIndex;
bool Empty;
void reset();
- static bool orderByLowPC(const Sequence &LHS, const Sequence &RHS) {
- return LHS.LowPC < RHS.LowPC;
+ static bool orderByHighPC(const Sequence &LHS, const Sequence &RHS) {
+ return std::tie(LHS.SectionIndex, LHS.HighPC) <
+ std::tie(RHS.SectionIndex, RHS.HighPC);
}
bool isValid() const {
return !Empty && (LowPC < HighPC) && (FirstRowIndex < LastRowIndex);
}
- bool containsPC(uint64_t PC) const { return (LowPC <= PC && PC < HighPC); }
+ bool containsPC(object::SectionedAddress PC) const {
+ return SectionIndex == PC.SectionIndex &&
+ (LowPC <= PC.Address && PC.Address < HighPC);
+ }
};
struct LineTable {
@@ -224,22 +246,30 @@ public:
/// Returns the index of the row with file/line info for a given address,
/// or UnknownRowIndex if there is no such row.
- uint32_t lookupAddress(uint64_t Address) const;
+ uint32_t lookupAddress(object::SectionedAddress Address) const;
- bool lookupAddressRange(uint64_t Address, uint64_t Size,
+ bool lookupAddressRange(object::SectionedAddress Address, uint64_t Size,
std::vector<uint32_t> &Result) const;
- bool hasFileAtIndex(uint64_t FileIndex) const;
+ bool hasFileAtIndex(uint64_t FileIndex) const {
+ return Prologue.hasFileAtIndex(FileIndex);
+ }
/// Extracts filename by its index in filename table in prologue.
+ /// In Dwarf 4, the files are 1-indexed and the current compilation file
+ /// name is not represented in the list. In DWARF v5, the files are
+ /// 0-indexed and the primary source file has the index 0.
/// Returns true on success.
- bool getFileNameByIndex(uint64_t FileIndex, const char *CompDir,
+ bool getFileNameByIndex(uint64_t FileIndex, StringRef CompDir,
DILineInfoSpecifier::FileLineInfoKind Kind,
- std::string &Result) const;
+ std::string &Result) const {
+ return Prologue.getFileNameByIndex(FileIndex, CompDir, Kind, Result);
+ }
/// Fills the Result argument with the file and line information
/// corresponding to Address. Returns true on success.
- bool getFileLineInfoForAddress(uint64_t Address, const char *CompDir,
+ bool getFileLineInfoForAddress(object::SectionedAddress Address,
+ const char *CompDir,
DILineInfoSpecifier::FileLineInfoKind Kind,
DILineInfo &Result) const;
@@ -264,10 +294,15 @@ public:
private:
uint32_t findRowInSeq(const DWARFDebugLine::Sequence &Seq,
- uint64_t Address) const;
+ object::SectionedAddress Address) const;
Optional<StringRef>
getSourceByIndex(uint64_t FileIndex,
DILineInfoSpecifier::FileLineInfoKind Kind) const;
+
+ uint32_t lookupAddressImpl(object::SectionedAddress Address) const;
+
+ bool lookupAddressRangeImpl(object::SectionedAddress Address, uint64_t Size,
+ std::vector<uint32_t> &Result) const;
};
const LineTable *getLineTable(uint32_t Offset) const;
@@ -334,13 +369,10 @@ private:
ParsingState(struct LineTable *LT);
void resetRowAndSequence();
- void appendRowToMatrix(uint32_t Offset);
+ void appendRowToMatrix();
/// Line table we're currently parsing.
struct LineTable *LineTable;
- /// The row number that starts at zero for the prologue, and increases for
- /// each row added to the matrix.
- unsigned RowNumber = 0;
struct Row Row;
struct Sequence Sequence;
};
diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h b/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h
index da2098e15402..cced6048e811 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h
@@ -1,9 +1,8 @@
//===- DWARFDebugLoc.h ------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -42,7 +41,7 @@ public:
SmallVector<Entry, 2> Entries;
/// Dump this list on OS.
void dump(raw_ostream &OS, bool IsLittleEndian, unsigned AddressSize,
- const MCRegisterInfo *MRI, uint64_t BaseAddress,
+ const MCRegisterInfo *MRI, DWARFUnit *U, uint64_t BaseAddress,
unsigned Indent) const;
};
@@ -87,7 +86,7 @@ public:
SmallVector<Entry, 2> Entries;
void dump(raw_ostream &OS, uint64_t BaseAddr, bool IsLittleEndian,
unsigned AddressSize, const MCRegisterInfo *RegInfo,
- unsigned Indent) const;
+ DWARFUnit *U, unsigned Indent) const;
};
private:
diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h b/include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h
index bfe2fc3ac02d..a6c125990ca7 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h
@@ -1,9 +1,8 @@
//===- DWARFDebugMacro.h ----------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugPubTable.h b/include/llvm/DebugInfo/DWARF/DWARFDebugPubTable.h
index 9e1656eb1615..99e91ca90319 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFDebugPubTable.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFDebugPubTable.h
@@ -1,9 +1,8 @@
//===- DWARFDebugPubTable.h -------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h b/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h
index bc26edf00647..a66f60292343 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h
@@ -1,9 +1,8 @@
//===- DWARFDebugRangeList.h ------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -77,7 +76,7 @@ public:
/// list. Has to be passed base address of the compile unit referencing this
/// range list.
DWARFAddressRangesVector
- getAbsoluteRanges(llvm::Optional<SectionedAddress> BaseAddr) const;
+ getAbsoluteRanges(llvm::Optional<object::SectionedAddress> BaseAddr) const;
};
} // end namespace llvm
diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugRnglists.h b/include/llvm/DebugInfo/DWARF/DWARFDebugRnglists.h
index 5cc8d789e598..167ddde3ec3d 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFDebugRnglists.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFDebugRnglists.h
@@ -1,9 +1,8 @@
//===- DWARFDebugRnglists.h -------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -38,7 +37,7 @@ struct RangeListEntry : public DWARFListEntryBase {
Error extract(DWARFDataExtractor Data, uint32_t End, uint32_t *OffsetPtr);
void dump(raw_ostream &OS, uint8_t AddrSize, uint8_t MaxEncodingStringLength,
uint64_t &CurrentBase, DIDumpOptions DumpOpts,
- llvm::function_ref<Optional<SectionedAddress>(uint32_t)>
+ llvm::function_ref<Optional<object::SectionedAddress>(uint32_t)>
LookupPooledAddress) const;
bool isSentinel() const { return EntryKind == dwarf::DW_RLE_end_of_list; }
};
@@ -48,7 +47,7 @@ class DWARFDebugRnglist : public DWARFListType<RangeListEntry> {
public:
/// Build a DWARFAddressRangesVector from a rangelist.
DWARFAddressRangesVector
- getAbsoluteRanges(llvm::Optional<SectionedAddress> BaseAddr,
+ getAbsoluteRanges(llvm::Optional<object::SectionedAddress> BaseAddr,
DWARFUnit &U) const;
};
diff --git a/include/llvm/DebugInfo/DWARF/DWARFDie.h b/include/llvm/DebugInfo/DWARF/DWARFDie.h
index 56d46cd739a2..21e68f983bb3 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFDie.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFDie.h
@@ -1,9 +1,8 @@
//===- DWARFDie.h -----------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/DWARF/DWARFExpression.h b/include/llvm/DebugInfo/DWARF/DWARFExpression.h
index 3fad68a9b48b..f066dd58d606 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFExpression.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFExpression.h
@@ -1,9 +1,8 @@
//===--- DWARFExpression.h - DWARF Expression handling ----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -42,7 +41,8 @@ public:
SizeAddr = 5,
SizeRefAddr = 6,
SizeBlock = 7, ///< Preceding operand contains block size
- SignBit = 0x8,
+ BaseTypeRef = 8,
+ SignBit = 0x80,
SignedSize1 = SignBit | Size1,
SignedSize2 = SignBit | Size2,
SignedSize4 = SignBit | Size4,
@@ -55,7 +55,8 @@ public:
DwarfNA, ///< Serves as a marker for unused entries
Dwarf2 = 2,
Dwarf3,
- Dwarf4
+ Dwarf4,
+ Dwarf5
};
/// Description of the encoding of one expression Op.
@@ -78,17 +79,20 @@ public:
bool Error;
uint32_t EndOffset;
uint64_t Operands[2];
+ uint32_t OperandEndOffsets[2];
public:
Description &getDescription() { return Desc; }
uint8_t getCode() { return Opcode; }
uint64_t getRawOperand(unsigned Idx) { return Operands[Idx]; }
+ uint32_t getOperandEndOffset(unsigned Idx) { return OperandEndOffsets[Idx]; }
uint32_t getEndOffset() { return EndOffset; }
bool extract(DataExtractor Data, uint16_t Version, uint8_t AddressSize,
uint32_t Offset);
bool isError() { return Error; }
- bool print(raw_ostream &OS, const DWARFExpression *U,
- const MCRegisterInfo *RegInfo, bool isEH);
+ bool print(raw_ostream &OS, const DWARFExpression *Expr,
+ const MCRegisterInfo *RegInfo, DWARFUnit *U, bool isEH);
+ bool verify(DWARFUnit *U);
};
/// An iterator to go through the expression operations.
@@ -125,15 +129,17 @@ public:
DWARFExpression(DataExtractor Data, uint16_t Version, uint8_t AddressSize)
: Data(Data), Version(Version), AddressSize(AddressSize) {
- assert(AddressSize == 8 || AddressSize == 4);
+ assert(AddressSize == 8 || AddressSize == 4 || AddressSize == 2);
}
iterator begin() const { return iterator(this, 0); }
iterator end() const { return iterator(this, Data.getData().size()); }
- void print(raw_ostream &OS, const MCRegisterInfo *RegInfo,
+ void print(raw_ostream &OS, const MCRegisterInfo *RegInfo, DWARFUnit *U,
bool IsEH = false) const;
+ bool verify(DWARFUnit *U);
+
private:
DataExtractor Data;
uint16_t Version;
diff --git a/include/llvm/DebugInfo/DWARF/DWARFFormValue.h b/include/llvm/DebugInfo/DWARF/DWARFFormValue.h
index 727e853c09fb..731e71ed9eae 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFFormValue.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFFormValue.h
@@ -1,9 +1,8 @@
//===- DWARFFormValue.h -----------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -42,6 +41,9 @@ public:
private:
struct ValueType {
ValueType() { uval = 0; }
+ ValueType(int64_t V) : sval(V) {}
+ ValueType(uint64_t V) : uval(V) {}
+ ValueType(const char *V) : cstr(V) {}
union {
uint64_t uval;
@@ -56,26 +58,28 @@ private:
ValueType Value; /// Contains all data for the form.
const DWARFUnit *U = nullptr; /// Remember the DWARFUnit at extract time.
const DWARFContext *C = nullptr; /// Context for extract time.
+
+ DWARFFormValue(dwarf::Form F, ValueType V) : Form(F), Value(V) {}
+
public:
DWARFFormValue(dwarf::Form F = dwarf::Form(0)) : Form(F) {}
+ static DWARFFormValue createFromSValue(dwarf::Form F, int64_t V);
+ static DWARFFormValue createFromUValue(dwarf::Form F, uint64_t V);
+ static DWARFFormValue createFromPValue(dwarf::Form F, const char *V);
+ static DWARFFormValue createFromBlockValue(dwarf::Form F,
+ ArrayRef<uint8_t> D);
+ static DWARFFormValue createFromUnit(dwarf::Form F, const DWARFUnit *Unit,
+ uint32_t *OffsetPtr);
+
dwarf::Form getForm() const { return Form; }
uint64_t getRawUValue() const { return Value.uval; }
- void setForm(dwarf::Form F) { Form = F; }
- void setUValue(uint64_t V) { Value.uval = V; }
- void setSValue(int64_t V) { Value.sval = V; }
- void setPValue(const char *V) { Value.cstr = V; }
-
- void setBlockValue(const ArrayRef<uint8_t> &Data) {
- Value.data = Data.data();
- setUValue(Data.size());
- }
bool isFormClass(FormClass FC) const;
const DWARFUnit *getUnit() const { return U; }
void dump(raw_ostream &OS, DIDumpOptions DumpOpts = DIDumpOptions()) const;
void dumpSectionedAddress(raw_ostream &OS, DIDumpOptions DumpOpts,
- SectionedAddress SA) const;
+ object::SectionedAddress SA) const;
static void dumpAddressSection(const DWARFObject &Obj, raw_ostream &OS,
DIDumpOptions DumpOpts, uint64_t SectionIndex);
@@ -100,11 +104,16 @@ public:
/// getAsFoo functions below return the extracted value as Foo if only
/// DWARFFormValue has form class is suitable for representing Foo.
Optional<uint64_t> getAsReference() const;
+ struct UnitOffset {
+ DWARFUnit *Unit;
+ uint64_t Offset;
+ };
+ Optional<UnitOffset> getAsRelativeReference() const;
Optional<uint64_t> getAsUnsignedConstant() const;
Optional<int64_t> getAsSignedConstant() const;
Optional<const char *> getAsCString() const;
Optional<uint64_t> getAsAddress() const;
- Optional<SectionedAddress> getAsSectionedAddress() const;
+ Optional<object::SectionedAddress> getAsSectionedAddress() const;
Optional<uint64_t> getAsSectionOffset() const;
Optional<ArrayRef<uint8_t>> getAsBlock() const;
Optional<uint64_t> getAsCStringOffset() const;
@@ -155,6 +164,19 @@ inline Optional<const char *> toString(const Optional<DWARFFormValue> &V) {
return None;
}
+/// Take an optional DWARFFormValue and try to extract a string value from it.
+///
+/// \param V and optional DWARFFormValue to attempt to extract the value from.
+/// \returns an optional value that contains a value if the form value
+/// was valid and was a string.
+inline StringRef toStringRef(const Optional<DWARFFormValue> &V,
+ StringRef Default = {}) {
+ if (V)
+ if (auto S = V->getAsCString())
+ return *S;
+ return Default;
+}
+
/// Take an optional DWARFFormValue and extract a string value from it.
///
/// \param V and optional DWARFFormValue to attempt to extract the value from.
@@ -242,7 +264,7 @@ inline Optional<uint64_t> toAddress(const Optional<DWARFFormValue> &V) {
return None;
}
-inline Optional<SectionedAddress>
+inline Optional<object::SectionedAddress>
toSectionedAddress(const Optional<DWARFFormValue> &V) {
if (V)
return V->getAsSectionedAddress();
diff --git a/include/llvm/DebugInfo/DWARF/DWARFGdbIndex.h b/include/llvm/DebugInfo/DWARF/DWARFGdbIndex.h
index 073e02903c39..38cd42ddb883 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFGdbIndex.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFGdbIndex.h
@@ -1,9 +1,8 @@
//===- DWARFGdbIndex.h ------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/DWARF/DWARFListTable.h b/include/llvm/DebugInfo/DWARF/DWARFListTable.h
index 9b987314f209..a1ea69b040f0 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFListTable.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFListTable.h
@@ -1,9 +1,8 @@
//===- DWARFListTable.h -----------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -158,7 +157,7 @@ public:
uint8_t getAddrSize() const { return Header.getAddrSize(); }
void dump(raw_ostream &OS,
- llvm::function_ref<Optional<SectionedAddress>(uint32_t)>
+ llvm::function_ref<Optional<object::SectionedAddress>(uint32_t)>
LookupPooledAddress,
DIDumpOptions DumpOpts = {}) const;
@@ -235,7 +234,7 @@ Error DWARFListType<ListEntryType>::extract(DWARFDataExtractor Data,
template <typename DWARFListType>
void DWARFListTableBase<DWARFListType>::dump(
raw_ostream &OS,
- llvm::function_ref<Optional<SectionedAddress>(uint32_t)>
+ llvm::function_ref<Optional<object::SectionedAddress>(uint32_t)>
LookupPooledAddress,
DIDumpOptions DumpOpts) const {
Header.dump(OS, DumpOpts);
diff --git a/include/llvm/DebugInfo/DWARF/DWARFObject.h b/include/llvm/DebugInfo/DWARF/DWARFObject.h
index d611b5d075c8..1bba74a25d0e 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFObject.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFObject.h
@@ -1,9 +1,8 @@
//===- DWARFObject.h --------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===-----------------------------------------------------------------------===/
diff --git a/include/llvm/DebugInfo/DWARF/DWARFRelocMap.h b/include/llvm/DebugInfo/DWARF/DWARFRelocMap.h
index f51838424614..3add711943d0 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFRelocMap.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFRelocMap.h
@@ -1,9 +1,8 @@
//===- DWARFRelocMap.h ------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -11,6 +10,7 @@
#define LLVM_DEBUGINFO_DWARF_DWARFRELOCMAP_H
#include "llvm/ADT/DenseMap.h"
+#include "llvm/Object/RelocationResolver.h"
#include <cstdint>
namespace llvm {
@@ -19,7 +19,11 @@ namespace llvm {
/// Section index is -1LL if relocation points to absolute symbol.
struct RelocAddrEntry {
uint64_t SectionIndex;
- uint64_t Value;
+ object::RelocationRef Reloc;
+ uint64_t SymbolValue;
+ Optional<object::RelocationRef> Reloc2;
+ uint64_t SymbolValue2;
+ object::RelocationResolver Resolver;
};
/// In place of applying the relocations to the data we've read from disk we use
diff --git a/include/llvm/DebugInfo/DWARF/DWARFSection.h b/include/llvm/DebugInfo/DWARF/DWARFSection.h
index 7f8235965297..054524d368ed 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFSection.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFSection.h
@@ -1,9 +1,8 @@
//===- DWARFSection.h -------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -23,11 +22,6 @@ struct SectionName {
bool IsNameUnique;
};
-struct SectionedAddress {
- uint64_t Address;
- uint64_t SectionIndex;
-};
-
} // end namespace llvm
#endif // LLVM_DEBUGINFO_DWARF_DWARFSECTION_H
diff --git a/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h b/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h
index 8ca5ba13fc23..90d89375fd35 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h
@@ -1,9 +1,8 @@
//===- DWARFTypeUnit.h ------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/DWARF/DWARFUnit.h b/include/llvm/DebugInfo/DWARF/DWARFUnit.h
index 79c3ce1106d5..f9f90db31890 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFUnit.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFUnit.h
@@ -1,9 +1,8 @@
//===- DWARFUnit.h ----------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -49,7 +48,7 @@ class DWARFUnitHeader {
uint32_t Offset = 0;
// Version, address size, and DWARF format.
dwarf::FormParams FormParams;
- uint32_t Length = 0;
+ uint64_t Length = 0;
uint64_t AbbrOffset = 0;
// For DWO units only.
@@ -83,7 +82,7 @@ public:
uint8_t getDwarfOffsetByteSize() const {
return FormParams.getDwarfOffsetByteSize();
}
- uint32_t getLength() const { return Length; }
+ uint64_t getLength() const { return Length; }
uint64_t getAbbrOffset() const { return AbbrOffset; }
Optional<uint64_t> getDWOId() const { return DWOId; }
void setDWOId(uint64_t Id) {
@@ -98,8 +97,11 @@ public:
return UnitType == dwarf::DW_UT_type || UnitType == dwarf::DW_UT_split_type;
}
uint8_t getSize() const { return Size; }
- // FIXME: Support DWARF64.
- uint32_t getNextUnitOffset() const { return Offset + Length + 4; }
+ uint32_t getNextUnitOffset() const {
+ return Offset + Length +
+ (FormParams.Format == llvm::dwarf::DwarfFormat::DWARF64 ? 4 : 0) +
+ FormParams.getDwarfOffsetByteSize();
+ }
};
const DWARFUnitIndex &getDWARFUnitIndex(DWARFContext &Context,
@@ -173,6 +175,7 @@ struct StrOffsetsContributionDescriptor {
StrOffsetsContributionDescriptor(uint64_t Base, uint64_t Size,
uint8_t Version, dwarf::DwarfFormat Format)
: Base(Base), Size(Size), FormParams({Version, 0, Format}) {}
+ StrOffsetsContributionDescriptor() = default;
uint8_t getVersion() const { return FormParams.Version; }
dwarf::DwarfFormat getFormat() const { return FormParams.Format; }
@@ -182,7 +185,7 @@ struct StrOffsetsContributionDescriptor {
/// Determine whether a contribution to the string offsets table is
/// consistent with the relevant section size and that its length is
/// a multiple of the size of one of its entries.
- Optional<StrOffsetsContributionDescriptor>
+ Expected<StrOffsetsContributionDescriptor>
validateContributionSize(DWARFDataExtractor &DA);
};
@@ -218,7 +221,7 @@ class DWARFUnit {
Optional<DWARFDebugRnglistTable> RngListTable;
mutable const DWARFAbbreviationDeclarationSet *Abbrevs;
- llvm::Optional<SectionedAddress> BaseAddr;
+ llvm::Optional<object::SectionedAddress> BaseAddr;
/// The compile unit debug information entry items.
std::vector<DWARFDebugInfoEntry> DieArray;
@@ -247,14 +250,14 @@ protected:
/// Find the unit's contribution to the string offsets table and determine its
/// length and form. The given offset is expected to be derived from the unit
/// DIE's DW_AT_str_offsets_base attribute.
- Optional<StrOffsetsContributionDescriptor>
+ Expected<Optional<StrOffsetsContributionDescriptor>>
determineStringOffsetsTableContribution(DWARFDataExtractor &DA);
/// Find the unit's contribution to the string offsets table and determine its
/// length and form. The given offset is expected to be 0 in a dwo file or,
/// in a dwp file, the start of the unit's contribution to the string offsets
/// table section (as determined by the index table).
- Optional<StrOffsetsContributionDescriptor>
+ Expected<Optional<StrOffsetsContributionDescriptor>>
determineStringOffsetsTableContributionDWO(DWARFDataExtractor &DA);
public:
@@ -305,7 +308,8 @@ public:
RangeSectionBase = Base;
}
- Optional<SectionedAddress> getAddrOffsetSectionItem(uint32_t Index) const;
+ Optional<object::SectionedAddress>
+ getAddrOffsetSectionItem(uint32_t Index) const;
Optional<uint64_t> getStringOffsetSectionItem(uint32_t Index) const;
DWARFDataExtractor getDebugInfoExtractor() const;
@@ -376,7 +380,7 @@ public:
llvm_unreachable("Invalid UnitType.");
}
- llvm::Optional<SectionedAddress> getBaseAddress();
+ llvm::Optional<object::SectionedAddress> getBaseAddress();
DWARFDie getUnitDIE(bool ExtractUnitDIEOnly = true) {
extractDIEsIfNeeded(ExtractUnitDIEOnly);
@@ -385,6 +389,13 @@ public:
return DWARFDie(this, &DieArray[0]);
}
+ DWARFDie getNonSkeletonUnitDIE(bool ExtractUnitDIEOnly = true) {
+ parseDWO();
+ if (DWO)
+ return DWO->getUnitDIE(ExtractUnitDIEOnly);
+ return getUnitDIE(ExtractUnitDIEOnly);
+ }
+
const char *getCompilationDir();
Optional<uint64_t> getDWOId() {
extractDIEsIfNeeded(/*CUDieOnly*/ true);
@@ -462,13 +473,12 @@ public:
DWARFDie getDIEForOffset(uint32_t Offset) {
extractDIEsIfNeeded(false);
assert(!DieArray.empty());
- auto it = std::lower_bound(
- DieArray.begin(), DieArray.end(), Offset,
- [](const DWARFDebugInfoEntry &LHS, uint32_t Offset) {
- return LHS.getOffset() < Offset;
+ auto It =
+ llvm::partition_point(DieArray, [=](const DWARFDebugInfoEntry &DIE) {
+ return DIE.getOffset() < Offset;
});
- if (it != DieArray.end() && it->getOffset() == Offset)
- return DWARFDie(this, &*it);
+ if (It != DieArray.end() && It->getOffset() == Offset)
+ return DWARFDie(this, &*It);
return DWARFDie();
}
diff --git a/include/llvm/DebugInfo/DWARF/DWARFUnitIndex.h b/include/llvm/DebugInfo/DWARF/DWARFUnitIndex.h
index 16be5f9401c0..fc8c707c512e 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFUnitIndex.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFUnitIndex.h
@@ -1,9 +1,8 @@
//===- DWARFUnitIndex.h -----------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/DWARF/DWARFVerifier.h b/include/llvm/DebugInfo/DWARF/DWARFVerifier.h
index e47fbea5646e..f1268f220272 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFVerifier.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFVerifier.h
@@ -1,9 +1,8 @@
//===- DWARFVerifier.h ----------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/GSYM/FileEntry.h b/include/llvm/DebugInfo/GSYM/FileEntry.h
new file mode 100644
index 000000000000..228b4efa0656
--- /dev/null
+++ b/include/llvm/DebugInfo/GSYM/FileEntry.h
@@ -0,0 +1,68 @@
+//===- FileEntry.h ----------------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_GSYM_FILEENTRY_H
+#define LLVM_DEBUGINFO_GSYM_FILEENTRY_H
+
+#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/ADT/Hashing.h"
+#include <functional>
+#include <stdint.h>
+#include <utility>
+
+namespace llvm {
+namespace gsym {
+
+/// Files in GSYM are contained in FileEntry structs where we split the
+/// directory and basename into two different strings in the string
+/// table. This allows paths to shared commont directory and filename
+/// strings and saves space.
+struct FileEntry {
+
+ /// Offsets in the string table.
+ /// @{
+ uint32_t Dir = 0;
+ uint32_t Base = 0;
+ /// @}
+
+ FileEntry() = default;
+ FileEntry(uint32_t D, uint32_t B) : Dir(D), Base(B) {}
+
+ // Implement operator== so that FileEntry can be used as key in
+ // unordered containers.
+ bool operator==(const FileEntry &RHS) const {
+ return Base == RHS.Base && Dir == RHS.Dir;
+ };
+ bool operator!=(const FileEntry &RHS) const {
+ return Base != RHS.Base || Dir != RHS.Dir;
+ };
+};
+
+} // namespace gsym
+
+template <> struct DenseMapInfo<gsym::FileEntry> {
+ static inline gsym::FileEntry getEmptyKey() {
+ uint32_t key = DenseMapInfo<uint32_t>::getEmptyKey();
+ return gsym::FileEntry(key, key);
+ }
+ static inline gsym::FileEntry getTombstoneKey() {
+ uint32_t key = DenseMapInfo<uint32_t>::getTombstoneKey();
+ return gsym::FileEntry(key, key);
+ }
+ static unsigned getHashValue(const gsym::FileEntry &Val) {
+ return llvm::hash_combine(DenseMapInfo<uint32_t>::getHashValue(Val.Dir),
+ DenseMapInfo<uint32_t>::getHashValue(Val.Base));
+ }
+ static bool isEqual(const gsym::FileEntry &LHS, const gsym::FileEntry &RHS) {
+ return LHS == RHS;
+ }
+};
+
+} // namespace llvm
+#endif // #ifndef LLVM_DEBUGINFO_GSYM_FILEENTRY_H
diff --git a/include/llvm/DebugInfo/GSYM/FunctionInfo.h b/include/llvm/DebugInfo/GSYM/FunctionInfo.h
new file mode 100644
index 000000000000..eedb1e638fd1
--- /dev/null
+++ b/include/llvm/DebugInfo/GSYM/FunctionInfo.h
@@ -0,0 +1,107 @@
+//===- FunctionInfo.h -------------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_GSYM_FUNCTIONINFO_H
+#define LLVM_DEBUGINFO_GSYM_FUNCTIONINFO_H
+
+#include "llvm/DebugInfo/GSYM/InlineInfo.h"
+#include "llvm/DebugInfo/GSYM/LineEntry.h"
+#include "llvm/DebugInfo/GSYM/Range.h"
+#include "llvm/DebugInfo/GSYM/StringTable.h"
+#include <tuple>
+#include <vector>
+
+namespace llvm {
+class raw_ostream;
+namespace gsym {
+
+/// Function information in GSYM files encodes information for one
+/// contiguous address range. The name of the function is encoded as
+/// a string table offset and allows multiple functions with the same
+/// name to share the name string in the string table. Line tables are
+/// stored in a sorted vector of gsym::LineEntry objects and are split
+/// into line tables for each function. If a function has a discontiguous
+/// range, it will be split into two gsym::FunctionInfo objects. If the
+/// function has inline functions, the information will be encoded in
+/// the "Inline" member, see gsym::InlineInfo for more information.
+struct FunctionInfo {
+ AddressRange Range;
+ uint32_t Name; ///< String table offset in the string table.
+ std::vector<gsym::LineEntry> Lines;
+ InlineInfo Inline;
+
+ FunctionInfo(uint64_t Addr = 0, uint64_t Size = 0, uint32_t N = 0)
+ : Range(Addr, Addr + Size), Name(N) {}
+
+ bool hasRichInfo() const {
+ /// Returns whether we have something else than range and name. When
+ /// converting information from a symbol table and from debug info, we
+ /// might end up with multiple FunctionInfo objects for the same range
+ /// and we need to be able to tell which one is the better object to use.
+ return !Lines.empty() || Inline.isValid();
+ }
+
+ bool isValid() const {
+ /// Address and size can be zero and there can be no line entries for a
+ /// symbol so the only indication this entry is valid is if the name is
+ /// not zero. This can happen when extracting information from symbol
+ /// tables that do not encode symbol sizes. In that case only the
+ /// address and name will be filled in.
+ return Name != 0;
+ }
+
+ uint64_t startAddress() const { return Range.Start; }
+ uint64_t endAddress() const { return Range.End; }
+ uint64_t size() const { return Range.size(); }
+ void setStartAddress(uint64_t Addr) { Range.Start = Addr; }
+ void setEndAddress(uint64_t Addr) { Range.End = Addr; }
+ void setSize(uint64_t Size) { Range.End = Range.Start + Size; }
+
+ void clear() {
+ Range = {0, 0};
+ Name = 0;
+ Lines.clear();
+ Inline.clear();
+ }
+};
+
+inline bool operator==(const FunctionInfo &LHS, const FunctionInfo &RHS) {
+ return LHS.Range == RHS.Range && LHS.Name == RHS.Name &&
+ LHS.Lines == RHS.Lines && LHS.Inline == RHS.Inline;
+}
+inline bool operator!=(const FunctionInfo &LHS, const FunctionInfo &RHS) {
+ return !(LHS == RHS);
+}
+/// This sorting will order things consistently by address range first, but then
+/// followed by inlining being valid and line tables. We might end up with a
+/// FunctionInfo from debug info that will have the same range as one from the
+/// symbol table, but we want to quickly be able to sort and use the best version
+/// when creating the final GSYM file.
+inline bool operator<(const FunctionInfo &LHS, const FunctionInfo &RHS) {
+ // First sort by address range
+ if (LHS.Range != RHS.Range)
+ return LHS.Range < RHS.Range;
+
+ // Then sort by inline
+ if (LHS.Inline.isValid() != RHS.Inline.isValid())
+ return RHS.Inline.isValid();
+
+ // If the number of lines is the same, then compare line table entries
+ if (LHS.Lines.size() == RHS.Lines.size())
+ return LHS.Lines < RHS.Lines;
+ // Then sort by number of line table entries (more is better)
+ return LHS.Lines.size() < RHS.Lines.size();
+}
+
+raw_ostream &operator<<(raw_ostream &OS, const FunctionInfo &R);
+
+} // namespace gsym
+} // namespace llvm
+
+#endif // #ifndef LLVM_DEBUGINFO_GSYM_FUNCTIONINFO_H
diff --git a/include/llvm/DebugInfo/GSYM/InlineInfo.h b/include/llvm/DebugInfo/GSYM/InlineInfo.h
new file mode 100644
index 000000000000..222430622932
--- /dev/null
+++ b/include/llvm/DebugInfo/GSYM/InlineInfo.h
@@ -0,0 +1,78 @@
+//===- InlineInfo.h ---------------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_GSYM_INLINEINFO_H
+#define LLVM_DEBUGINFO_GSYM_INLINEINFO_H
+
+#include "llvm/ADT/Optional.h"
+#include "llvm/DebugInfo/GSYM/Range.h"
+#include <stdint.h>
+#include <vector>
+
+
+namespace llvm {
+class raw_ostream;
+
+namespace gsym {
+
+/// Inline information stores the name of the inline function along with
+/// an array of address ranges. It also stores the call file and call line
+/// that called this inline function. This allows us to unwind inline call
+/// stacks back to the inline or concrete function that called this
+/// function. Inlined functions contained in this function are stored in the
+/// "Children" variable. All address ranges must be sorted and all address
+/// ranges of all children must be contained in the ranges of this function.
+/// Any clients that encode information will need to ensure the ranges are
+/// all contined correctly or lookups could fail. Add ranges in these objects
+/// must be contained in the top level FunctionInfo address ranges as well.
+struct InlineInfo {
+
+ uint32_t Name; ///< String table offset in the string table.
+ uint32_t CallFile; ///< 1 based file index in the file table.
+ uint32_t CallLine; ///< Source line number.
+ AddressRanges Ranges;
+ std::vector<InlineInfo> Children;
+ InlineInfo() : Name(0), CallFile(0), CallLine(0) {}
+ void clear() {
+ Name = 0;
+ CallFile = 0;
+ CallLine = 0;
+ Ranges.clear();
+ Children.clear();
+ }
+ bool isValid() const { return !Ranges.empty(); }
+
+ using InlineArray = std::vector<const InlineInfo *>;
+
+ /// Lookup an address in the InlineInfo object
+ ///
+ /// This function is used to symbolicate an inline call stack and can
+ /// turn one address in the program into one or more inline call stacks
+ /// and have the stack trace show the original call site from
+ /// non-inlined code.
+ ///
+ /// \param Addr the address to lookup
+ ///
+ /// \returns optional vector of InlineInfo objects that describe the
+ /// inline call stack for a given address, false otherwise.
+ llvm::Optional<InlineArray> getInlineStack(uint64_t Addr) const;
+};
+
+inline bool operator==(const InlineInfo &LHS, const InlineInfo &RHS) {
+ return LHS.Name == RHS.Name && LHS.CallFile == RHS.CallFile &&
+ LHS.CallLine == RHS.CallLine && LHS.Ranges == RHS.Ranges &&
+ LHS.Children == RHS.Children;
+}
+
+raw_ostream &operator<<(raw_ostream &OS, const InlineInfo &FI);
+
+} // namespace gsym
+} // namespace llvm
+
+#endif // #ifndef LLVM_DEBUGINFO_GSYM_INLINEINFO_H
diff --git a/include/llvm/DebugInfo/GSYM/LineEntry.h b/include/llvm/DebugInfo/GSYM/LineEntry.h
new file mode 100644
index 000000000000..6b9380940bd3
--- /dev/null
+++ b/include/llvm/DebugInfo/GSYM/LineEntry.h
@@ -0,0 +1,48 @@
+//===- LineEntry.h ----------------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_GSYM_LINEENTRY_H
+#define LLVM_DEBUGINFO_GSYM_LINEENTRY_H
+
+#include "llvm/DebugInfo/GSYM/Range.h"
+
+namespace llvm {
+namespace gsym {
+
+/// Line entries are used to encode the line tables in FunctionInfo objects.
+/// They are stored as a sorted vector of these objects and store the
+/// address, file and line of the line table row for a given address. The
+/// size of a line table entry is calculated by looking at the next entry
+/// in the FunctionInfo's vector of entries.
+struct LineEntry {
+ uint64_t Addr; ///< Start address of this line entry.
+ uint32_t File; ///< 1 based index of file in FileTable
+ uint32_t Line; ///< Source line number.
+ LineEntry(uint64_t A = 0, uint32_t F = 0, uint32_t L = 0)
+ : Addr(A), File(F), Line(L) {}
+ bool isValid() { return File != 0; }
+};
+
+inline raw_ostream &operator<<(raw_ostream &OS, const LineEntry &LE) {
+ return OS << "addr=" << HEX64(LE.Addr) << ", file=" << format("%3u", LE.File)
+ << ", line=" << format("%3u", LE.Line);
+}
+
+inline bool operator==(const LineEntry &LHS, const LineEntry &RHS) {
+ return LHS.Addr == RHS.Addr && LHS.File == RHS.File && LHS.Line == RHS.Line;
+}
+inline bool operator!=(const LineEntry &LHS, const LineEntry &RHS) {
+ return !(LHS == RHS);
+}
+inline bool operator<(const LineEntry &LHS, const LineEntry &RHS) {
+ return LHS.Addr < RHS.Addr;
+}
+} // namespace gsym
+} // namespace llvm
+#endif // #ifndef LLVM_DEBUGINFO_GSYM_LINEENTRY_H
diff --git a/include/llvm/DebugInfo/GSYM/Range.h b/include/llvm/DebugInfo/GSYM/Range.h
new file mode 100644
index 000000000000..772ff244c5b7
--- /dev/null
+++ b/include/llvm/DebugInfo/GSYM/Range.h
@@ -0,0 +1,87 @@
+//===- AddressRange.h -------------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_GSYM_RANGE_H
+#define LLVM_DEBUGINFO_GSYM_RANGE_H
+
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+#include <stdint.h>
+#include <vector>
+
+#define HEX8(v) llvm::format_hex(v, 4)
+#define HEX16(v) llvm::format_hex(v, 6)
+#define HEX32(v) llvm::format_hex(v, 10)
+#define HEX64(v) llvm::format_hex(v, 18)
+
+namespace llvm {
+class raw_ostream;
+
+namespace gsym {
+
+/// A class that represents an address range. The range is specified using
+/// a start and an end address.
+struct AddressRange {
+ uint64_t Start;
+ uint64_t End;
+ AddressRange() : Start(0), End(0) {}
+ AddressRange(uint64_t S, uint64_t E) : Start(S), End(E) {}
+ uint64_t size() const { return End - Start; }
+ bool contains(uint64_t Addr) const { return Start <= Addr && Addr < End; }
+ bool intersects(const AddressRange &R) const {
+ return Start < R.End && R.Start < End;
+ }
+
+ bool operator==(const AddressRange &R) const {
+ return Start == R.Start && End == R.End;
+ }
+ bool operator!=(const AddressRange &R) const {
+ return !(*this == R);
+ }
+ bool operator<(const AddressRange &R) const {
+ return std::make_pair(Start, End) < std::make_pair(R.Start, R.End);
+ }
+};
+
+raw_ostream &operator<<(raw_ostream &OS, const AddressRange &R);
+
+/// The AddressRanges class helps normalize address range collections.
+/// This class keeps a sorted vector of AddressRange objects and can perform
+/// insertions and searches efficiently. The address ranges are always sorted
+/// and never contain any invalid or empty address ranges. This allows us to
+/// emit address ranges into the GSYM file efficiently. Intersecting address
+/// ranges are combined during insertion so that we can emit the most compact
+/// representation for address ranges when writing to disk.
+class AddressRanges {
+protected:
+ using Collection = std::vector<AddressRange>;
+ Collection Ranges;
+public:
+ void clear() { Ranges.clear(); }
+ bool empty() const { return Ranges.empty(); }
+ bool contains(uint64_t Addr) const;
+ void insert(AddressRange Range);
+ size_t size() const { return Ranges.size(); }
+ bool operator==(const AddressRanges &RHS) const {
+ return Ranges == RHS.Ranges;
+ }
+ const AddressRange &operator[](size_t i) const {
+ assert(i < Ranges.size());
+ return Ranges[i];
+ }
+ Collection::const_iterator begin() const { return Ranges.begin(); }
+ Collection::const_iterator end() const { return Ranges.end(); }
+};
+
+raw_ostream &operator<<(raw_ostream &OS, const AddressRanges &AR);
+
+} // namespace gsym
+} // namespace llvm
+
+#endif // #ifndef LLVM_DEBUGINFO_GSYM_RANGE_H
diff --git a/include/llvm/DebugInfo/GSYM/StringTable.h b/include/llvm/DebugInfo/GSYM/StringTable.h
new file mode 100644
index 000000000000..0001b8b82743
--- /dev/null
+++ b/include/llvm/DebugInfo/GSYM/StringTable.h
@@ -0,0 +1,54 @@
+//===- StringTable.h --------------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_GSYM_STRINGTABLE_H
+#define LLVM_DEBUGINFO_GSYM_STRINGTABLE_H
+
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/DebugInfo/GSYM/Range.h"
+#include <stdint.h>
+#include <string>
+
+
+namespace llvm {
+namespace gsym {
+
+/// String tables in GSYM files are required to start with an empty
+/// string at offset zero. Strings must be UTF8 NULL terminated strings.
+struct StringTable {
+ StringRef Data;
+ StringTable() : Data() {}
+ StringTable(StringRef D) : Data(D) {}
+ StringRef operator[](size_t Offset) const { return getString(Offset); }
+ StringRef getString(uint32_t Offset) const {
+ if (Offset < Data.size()) {
+ auto End = Data.find('\0', Offset);
+ return Data.substr(Offset, End - Offset);
+ }
+ return StringRef();
+ }
+ void clear() { Data = StringRef(); }
+};
+
+inline raw_ostream &operator<<(raw_ostream &OS, const StringTable &S) {
+ OS << "String table:\n";
+ uint32_t Offset = 0;
+ const size_t Size = S.Data.size();
+ while (Offset < Size) {
+ StringRef Str = S.getString(Offset);
+ OS << HEX32(Offset) << ": \"" << Str << "\"\n";
+ Offset += Str.size() + 1;
+ }
+ return OS;
+}
+
+} // namespace gsym
+} // namespace llvm
+#endif // #ifndef LLVM_DEBUGINFO_GSYM_STRINGTABLE_H
diff --git a/include/llvm/DebugInfo/MSF/IMSFFile.h b/include/llvm/DebugInfo/MSF/IMSFFile.h
index f98e715e6b15..7e80f96b89ae 100644
--- a/include/llvm/DebugInfo/MSF/IMSFFile.h
+++ b/include/llvm/DebugInfo/MSF/IMSFFile.h
@@ -1,9 +1,8 @@
//===- IMSFFile.h - Abstract base class for an MSF file ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/MSF/MSFBuilder.h b/include/llvm/DebugInfo/MSF/MSFBuilder.h
index 3de98c4ecba8..282870f5b3f1 100644
--- a/include/llvm/DebugInfo/MSF/MSFBuilder.h
+++ b/include/llvm/DebugInfo/MSF/MSFBuilder.h
@@ -1,9 +1,8 @@
//===- MSFBuilder.h - MSF Directory & Metadata Builder ----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/MSF/MSFCommon.h b/include/llvm/DebugInfo/MSF/MSFCommon.h
index 2db2b71df4a7..83331b14b8af 100644
--- a/include/llvm/DebugInfo/MSF/MSFCommon.h
+++ b/include/llvm/DebugInfo/MSF/MSFCommon.h
@@ -1,9 +1,8 @@
//===- MSFCommon.h - Common types and functions for MSF files ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/MSF/MSFError.h b/include/llvm/DebugInfo/MSF/MSFError.h
index 5c043a7837b3..fbc4e6928536 100644
--- a/include/llvm/DebugInfo/MSF/MSFError.h
+++ b/include/llvm/DebugInfo/MSF/MSFError.h
@@ -1,9 +1,8 @@
//===- MSFError.h - Error extensions for MSF Files --------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/MSF/MappedBlockStream.h b/include/llvm/DebugInfo/MSF/MappedBlockStream.h
index f65e52922da7..593d781b990e 100644
--- a/include/llvm/DebugInfo/MSF/MappedBlockStream.h
+++ b/include/llvm/DebugInfo/MSF/MappedBlockStream.h
@@ -1,9 +1,8 @@
//==- MappedBlockStream.h - Discontiguous stream data in an MSF --*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/ConcreteSymbolEnumerator.h b/include/llvm/DebugInfo/PDB/ConcreteSymbolEnumerator.h
index ac7f19637ab1..49ba20af7263 100644
--- a/include/llvm/DebugInfo/PDB/ConcreteSymbolEnumerator.h
+++ b/include/llvm/DebugInfo/PDB/ConcreteSymbolEnumerator.h
@@ -1,9 +1,8 @@
//===- ConcreteSymbolEnumerator.h -------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/DIA/DIADataStream.h b/include/llvm/DebugInfo/PDB/DIA/DIADataStream.h
index 881d7329ab66..f05b58c55507 100644
--- a/include/llvm/DebugInfo/PDB/DIA/DIADataStream.h
+++ b/include/llvm/DebugInfo/PDB/DIA/DIADataStream.h
@@ -1,9 +1,8 @@
//===- DIADataStream.h - DIA implementation of IPDBDataStream ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/DIA/DIAEnumDebugStreams.h b/include/llvm/DebugInfo/PDB/DIA/DIAEnumDebugStreams.h
index 1f129052d034..8a00ad45291a 100644
--- a/include/llvm/DebugInfo/PDB/DIA/DIAEnumDebugStreams.h
+++ b/include/llvm/DebugInfo/PDB/DIA/DIAEnumDebugStreams.h
@@ -1,9 +1,8 @@
//==- DIAEnumDebugStreams.h - DIA Debug Stream Enumerator impl ---*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/DIA/DIAEnumFrameData.h b/include/llvm/DebugInfo/PDB/DIA/DIAEnumFrameData.h
index f3b02f07e648..bd417c0746b1 100644
--- a/include/llvm/DebugInfo/PDB/DIA/DIAEnumFrameData.h
+++ b/include/llvm/DebugInfo/PDB/DIA/DIAEnumFrameData.h
@@ -1,9 +1,8 @@
//==- DIAEnumFrameData.h --------------------------------------- -*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/DIA/DIAEnumInjectedSources.h b/include/llvm/DebugInfo/PDB/DIA/DIAEnumInjectedSources.h
index 4669a8d31196..1f75ca27c4f8 100644
--- a/include/llvm/DebugInfo/PDB/DIA/DIAEnumInjectedSources.h
+++ b/include/llvm/DebugInfo/PDB/DIA/DIAEnumInjectedSources.h
@@ -1,9 +1,8 @@
//==- DIAEnumInjectedSources.h - DIA Injected Sources Enumerator -*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/DIA/DIAEnumLineNumbers.h b/include/llvm/DebugInfo/PDB/DIA/DIAEnumLineNumbers.h
index f1cb6268a26d..8800baac105d 100644
--- a/include/llvm/DebugInfo/PDB/DIA/DIAEnumLineNumbers.h
+++ b/include/llvm/DebugInfo/PDB/DIA/DIAEnumLineNumbers.h
@@ -1,9 +1,8 @@
//==- DIAEnumLineNumbers.h - DIA Line Number Enumerator impl -----*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/DIA/DIAEnumSectionContribs.h b/include/llvm/DebugInfo/PDB/DIA/DIAEnumSectionContribs.h
index ac2ae317d263..be8613bfba9d 100644
--- a/include/llvm/DebugInfo/PDB/DIA/DIAEnumSectionContribs.h
+++ b/include/llvm/DebugInfo/PDB/DIA/DIAEnumSectionContribs.h
@@ -1,9 +1,8 @@
//==- DIAEnumSectionContribs.h --------------------------------- -*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/DIA/DIAEnumSourceFiles.h b/include/llvm/DebugInfo/PDB/DIA/DIAEnumSourceFiles.h
index dac3df06a178..61278994ed36 100644
--- a/include/llvm/DebugInfo/PDB/DIA/DIAEnumSourceFiles.h
+++ b/include/llvm/DebugInfo/PDB/DIA/DIAEnumSourceFiles.h
@@ -1,9 +1,8 @@
//==- DIAEnumSourceFiles.h - DIA Source File Enumerator impl -----*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/DIA/DIAEnumSymbols.h b/include/llvm/DebugInfo/PDB/DIA/DIAEnumSymbols.h
index 9689859ae0f8..f55342cea2e5 100644
--- a/include/llvm/DebugInfo/PDB/DIA/DIAEnumSymbols.h
+++ b/include/llvm/DebugInfo/PDB/DIA/DIAEnumSymbols.h
@@ -1,9 +1,8 @@
//==- DIAEnumSymbols.h - DIA Symbol Enumerator impl --------------*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/DIA/DIAEnumTables.h b/include/llvm/DebugInfo/PDB/DIA/DIAEnumTables.h
index f4f856ebb6fd..057cb06fc8ca 100644
--- a/include/llvm/DebugInfo/PDB/DIA/DIAEnumTables.h
+++ b/include/llvm/DebugInfo/PDB/DIA/DIAEnumTables.h
@@ -1,9 +1,8 @@
//===- DIAEnumTables.h - DIA Tables Enumerator Impl -------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/DIA/DIAError.h b/include/llvm/DebugInfo/PDB/DIA/DIAError.h
index 2b33a65a0a14..96d960599f7e 100644
--- a/include/llvm/DebugInfo/PDB/DIA/DIAError.h
+++ b/include/llvm/DebugInfo/PDB/DIA/DIAError.h
@@ -1,9 +1,8 @@
//===- DIAError.h - Error extensions for PDB DIA implementation -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/DIA/DIAFrameData.h b/include/llvm/DebugInfo/PDB/DIA/DIAFrameData.h
index 0ce6cfc93030..c04f7cd00836 100644
--- a/include/llvm/DebugInfo/PDB/DIA/DIAFrameData.h
+++ b/include/llvm/DebugInfo/PDB/DIA/DIAFrameData.h
@@ -1,9 +1,8 @@
//===- DIAFrameData.h - DIA Impl. of IPDBFrameData ---------------- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/DIA/DIAInjectedSource.h b/include/llvm/DebugInfo/PDB/DIA/DIAInjectedSource.h
index 635508da84ea..67963a06d939 100644
--- a/include/llvm/DebugInfo/PDB/DIA/DIAInjectedSource.h
+++ b/include/llvm/DebugInfo/PDB/DIA/DIAInjectedSource.h
@@ -1,9 +1,8 @@
//===- DIAInjectedSource.h - DIA impl for IPDBInjectedSource ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -26,7 +25,7 @@ public:
std::string getFileName() const override;
std::string getObjectFileName() const override;
std::string getVirtualFileName() const override;
- PDB_SourceCompression getCompression() const override;
+ uint32_t getCompression() const override;
std::string getCode() const override;
private:
diff --git a/include/llvm/DebugInfo/PDB/DIA/DIALineNumber.h b/include/llvm/DebugInfo/PDB/DIA/DIALineNumber.h
index a59e3a19c8c2..d8bb27220763 100644
--- a/include/llvm/DebugInfo/PDB/DIA/DIALineNumber.h
+++ b/include/llvm/DebugInfo/PDB/DIA/DIALineNumber.h
@@ -1,9 +1,8 @@
//===- DIALineNumber.h - DIA implementation of IPDBLineNumber ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/DIA/DIARawSymbol.h b/include/llvm/DebugInfo/PDB/DIA/DIARawSymbol.h
index 5d4f855c63ca..7f201d3a4e36 100644
--- a/include/llvm/DebugInfo/PDB/DIA/DIARawSymbol.h
+++ b/include/llvm/DebugInfo/PDB/DIA/DIARawSymbol.h
@@ -1,9 +1,8 @@
//===- DIARawSymbol.h - DIA implementation of IPDBRawSymbol ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/DIA/DIASectionContrib.h b/include/llvm/DebugInfo/PDB/DIA/DIASectionContrib.h
index 4688f1f91a89..0972831e8b16 100644
--- a/include/llvm/DebugInfo/PDB/DIA/DIASectionContrib.h
+++ b/include/llvm/DebugInfo/PDB/DIA/DIASectionContrib.h
@@ -1,9 +1,8 @@
//===- DIASectionContrib.h - DIA Impl. of IPDBSectionContrib ------ C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/DIA/DIASession.h b/include/llvm/DebugInfo/PDB/DIA/DIASession.h
index 592e061a8d83..6f62e6061f56 100644
--- a/include/llvm/DebugInfo/PDB/DIA/DIASession.h
+++ b/include/llvm/DebugInfo/PDB/DIA/DIASession.h
@@ -1,9 +1,8 @@
//===- DIASession.h - DIA implementation of IPDBSession ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/DIA/DIASourceFile.h b/include/llvm/DebugInfo/PDB/DIA/DIASourceFile.h
index 1088ea54981c..96edfc9f9e29 100644
--- a/include/llvm/DebugInfo/PDB/DIA/DIASourceFile.h
+++ b/include/llvm/DebugInfo/PDB/DIA/DIASourceFile.h
@@ -1,9 +1,8 @@
//===- DIASourceFile.h - DIA implementation of IPDBSourceFile ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/DIA/DIASupport.h b/include/llvm/DebugInfo/PDB/DIA/DIASupport.h
index 92ebc04ae5a4..1a7c2f3aeeab 100644
--- a/include/llvm/DebugInfo/PDB/DIA/DIASupport.h
+++ b/include/llvm/DebugInfo/PDB/DIA/DIASupport.h
@@ -1,9 +1,8 @@
//===- DIASupport.h - Common header includes for DIA ------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// Common defines and header includes for all LLVMDebugInfoPDBDIA. The
diff --git a/include/llvm/DebugInfo/PDB/DIA/DIATable.h b/include/llvm/DebugInfo/PDB/DIA/DIATable.h
index ce93fa0b86c3..65396a042f06 100644
--- a/include/llvm/DebugInfo/PDB/DIA/DIATable.h
+++ b/include/llvm/DebugInfo/PDB/DIA/DIATable.h
@@ -1,9 +1,8 @@
//===- DIATable.h - DIA implementation of IPDBTable -------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/DIA/DIAUtils.h b/include/llvm/DebugInfo/PDB/DIA/DIAUtils.h
index aa843e05de70..5e01d8f10a6e 100644
--- a/include/llvm/DebugInfo/PDB/DIA/DIAUtils.h
+++ b/include/llvm/DebugInfo/PDB/DIA/DIAUtils.h
@@ -1,9 +1,8 @@
//===- DIAUtils.h - Utility functions for working with DIA ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/GenericError.h b/include/llvm/DebugInfo/PDB/GenericError.h
index 997f13f5f30e..ec85d92d2a92 100644
--- a/include/llvm/DebugInfo/PDB/GenericError.h
+++ b/include/llvm/DebugInfo/PDB/GenericError.h
@@ -1,9 +1,8 @@
-//===- Error.h - system_error extensions for PDB ----------------*- C++ -*-===//
+//===- GenericError.h - system_error extensions for PDB ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/IPDBDataStream.h b/include/llvm/DebugInfo/PDB/IPDBDataStream.h
index 0d7a286a11a6..4d0589a87915 100644
--- a/include/llvm/DebugInfo/PDB/IPDBDataStream.h
+++ b/include/llvm/DebugInfo/PDB/IPDBDataStream.h
@@ -1,9 +1,8 @@
//===- IPDBDataStream.h - base interface for child enumerator ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/IPDBEnumChildren.h b/include/llvm/DebugInfo/PDB/IPDBEnumChildren.h
index 7017f2600e9b..bfa67d39bc76 100644
--- a/include/llvm/DebugInfo/PDB/IPDBEnumChildren.h
+++ b/include/llvm/DebugInfo/PDB/IPDBEnumChildren.h
@@ -1,9 +1,8 @@
//===- IPDBEnumChildren.h - base interface for child enumerator -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/IPDBFrameData.h b/include/llvm/DebugInfo/PDB/IPDBFrameData.h
index 74679215b880..24138b380db4 100644
--- a/include/llvm/DebugInfo/PDB/IPDBFrameData.h
+++ b/include/llvm/DebugInfo/PDB/IPDBFrameData.h
@@ -1,9 +1,8 @@
//===- IPDBFrameData.h - base interface for frame data ----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/IPDBInjectedSource.h b/include/llvm/DebugInfo/PDB/IPDBInjectedSource.h
index e75d64af92bb..d5b36f9846b5 100644
--- a/include/llvm/DebugInfo/PDB/IPDBInjectedSource.h
+++ b/include/llvm/DebugInfo/PDB/IPDBInjectedSource.h
@@ -1,16 +1,14 @@
//===- IPDBInjectedSource.h - base class for PDB injected file --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_DEBUGINFO_PDB_IPDBINJECTEDSOURCE_H
#define LLVM_DEBUGINFO_PDB_IPDBINJECTEDSOURCE_H
-#include "PDBTypes.h"
#include "llvm/Support/raw_ostream.h"
#include <memory>
#include <string>
@@ -33,7 +31,10 @@ public:
virtual std::string getFileName() const = 0;
virtual std::string getObjectFileName() const = 0;
virtual std::string getVirtualFileName() const = 0;
- virtual PDB_SourceCompression getCompression() const = 0;
+ // The returned value depends on the PDB producer,
+ // but 0 is guaranteed to mean "no compression".
+ // The enum PDB_SourceCompression lists known return values.
+ virtual uint32_t getCompression() const = 0;
virtual std::string getCode() const = 0;
};
} // namespace pdb
diff --git a/include/llvm/DebugInfo/PDB/IPDBLineNumber.h b/include/llvm/DebugInfo/PDB/IPDBLineNumber.h
index e20080f2fbfc..77e88999497e 100644
--- a/include/llvm/DebugInfo/PDB/IPDBLineNumber.h
+++ b/include/llvm/DebugInfo/PDB/IPDBLineNumber.h
@@ -1,9 +1,8 @@
//===- IPDBLineNumber.h - base interface for PDB line no. info ---*- C++-*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/IPDBRawSymbol.h b/include/llvm/DebugInfo/PDB/IPDBRawSymbol.h
index 7c818d7cadeb..b24e712e3b78 100644
--- a/include/llvm/DebugInfo/PDB/IPDBRawSymbol.h
+++ b/include/llvm/DebugInfo/PDB/IPDBRawSymbol.h
@@ -1,9 +1,8 @@
//===- IPDBRawSymbol.h - base interface for PDB symbol types ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/IPDBSectionContrib.h b/include/llvm/DebugInfo/PDB/IPDBSectionContrib.h
index 4fda62404672..c5cf4bbe5560 100644
--- a/include/llvm/DebugInfo/PDB/IPDBSectionContrib.h
+++ b/include/llvm/DebugInfo/PDB/IPDBSectionContrib.h
@@ -1,9 +1,8 @@
//==- IPDBSectionContrib.h - Interfaces for PDB SectionContribs --*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/IPDBSession.h b/include/llvm/DebugInfo/PDB/IPDBSession.h
index 88fd02c0a345..aa8d9c76d63e 100644
--- a/include/llvm/DebugInfo/PDB/IPDBSession.h
+++ b/include/llvm/DebugInfo/PDB/IPDBSession.h
@@ -1,9 +1,8 @@
//===- IPDBSession.h - base interface for a PDB symbol context --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/IPDBSourceFile.h b/include/llvm/DebugInfo/PDB/IPDBSourceFile.h
index 3676c4030b13..d7e49fb70580 100644
--- a/include/llvm/DebugInfo/PDB/IPDBSourceFile.h
+++ b/include/llvm/DebugInfo/PDB/IPDBSourceFile.h
@@ -1,9 +1,8 @@
//===- IPDBSourceFile.h - base interface for a PDB source file --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/IPDBTable.h b/include/llvm/DebugInfo/PDB/IPDBTable.h
index 4561c4e847b2..55ca230d58c4 100644
--- a/include/llvm/DebugInfo/PDB/IPDBTable.h
+++ b/include/llvm/DebugInfo/PDB/IPDBTable.h
@@ -1,9 +1,8 @@
//===- IPDBTable.h - Base Interface for a PDB Symbol Context ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h b/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h
index 9eef4041d0a1..568f0c98c559 100644
--- a/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h
+++ b/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h
@@ -1,9 +1,8 @@
//===- DbiModuleDescriptor.h - PDB module information -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h b/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h
index ac7f741afefa..4f5d28bbd05a 100644
--- a/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h
+++ b/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h
@@ -1,9 +1,8 @@
//===- DbiModuleDescriptorBuilder.h - PDB module information ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/Native/DbiModuleList.h b/include/llvm/DebugInfo/PDB/Native/DbiModuleList.h
index 5f6e7ab92a96..14223273c898 100644
--- a/include/llvm/DebugInfo/PDB/Native/DbiModuleList.h
+++ b/include/llvm/DebugInfo/PDB/Native/DbiModuleList.h
@@ -1,9 +1,8 @@
//===- DbiModuleList.h - PDB module information list ------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/Native/DbiStream.h b/include/llvm/DebugInfo/PDB/Native/DbiStream.h
index a3ca607efbef..7d75c159b7ae 100644
--- a/include/llvm/DebugInfo/PDB/Native/DbiStream.h
+++ b/include/llvm/DebugInfo/PDB/Native/DbiStream.h
@@ -1,9 +1,8 @@
//===- DbiStream.h - PDB Dbi Stream (Stream 3) Access -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -11,6 +10,7 @@
#define LLVM_DEBUGINFO_PDB_RAW_PDBDBISTREAM_H
#include "llvm/DebugInfo/CodeView/DebugSubsection.h"
+#include "llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h"
#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
#include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h"
#include "llvm/DebugInfo/PDB/Native/DbiModuleList.h"
@@ -80,7 +80,10 @@ public:
FixedStreamArray<object::coff_section> getSectionHeaders() const;
- FixedStreamArray<object::FpoData> getFpoRecords();
+ bool hasOldFpoRecords() const;
+ FixedStreamArray<object::FpoData> getOldFpoRecords() const;
+ bool hasNewFpoRecords() const;
+ const codeview::DebugFrameDataSubsectionRef &getNewFpoRecords() const;
FixedStreamArray<SecMapEntry> getSectionMap() const;
void visitSectionContributions(ISectionContribVisitor &Visitor) const;
@@ -91,7 +94,11 @@ private:
Error initializeSectionContributionData();
Error initializeSectionHeadersData(PDBFile *Pdb);
Error initializeSectionMapData();
- Error initializeFpoRecords(PDBFile *Pdb);
+ Error initializeOldFpoRecords(PDBFile *Pdb);
+ Error initializeNewFpoRecords(PDBFile *Pdb);
+
+ Expected<std::unique_ptr<msf::MappedBlockStream>>
+ createIndexedStreamForHeaderType(PDBFile *Pdb, DbgHeaderType Type) const;
std::unique_ptr<BinaryStream> Stream;
@@ -117,8 +124,11 @@ private:
std::unique_ptr<msf::MappedBlockStream> SectionHeaderStream;
FixedStreamArray<object::coff_section> SectionHeaders;
- std::unique_ptr<msf::MappedBlockStream> FpoStream;
- FixedStreamArray<object::FpoData> FpoRecords;
+ std::unique_ptr<msf::MappedBlockStream> OldFpoStream;
+ FixedStreamArray<object::FpoData> OldFpoRecords;
+
+ std::unique_ptr<msf::MappedBlockStream> NewFpoStream;
+ codeview::DebugFrameDataSubsectionRef NewFpoRecords;
const DbiStreamHeader *Header;
};
diff --git a/include/llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h b/include/llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h
index b538de576677..d9be238af07b 100644
--- a/include/llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h
+++ b/include/llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h
@@ -1,9 +1,8 @@
//===- DbiStreamBuilder.h - PDB Dbi Stream Creation -------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/Native/EnumTables.h b/include/llvm/DebugInfo/PDB/Native/EnumTables.h
index c018445630fe..70161fadf7d2 100644
--- a/include/llvm/DebugInfo/PDB/Native/EnumTables.h
+++ b/include/llvm/DebugInfo/PDB/Native/EnumTables.h
@@ -1,9 +1,8 @@
//===- EnumTables.h - Enum to string conversion tables ----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/Native/Formatters.h b/include/llvm/DebugInfo/PDB/Native/Formatters.h
index 7d5eab2e2a09..29c957eeb5e0 100644
--- a/include/llvm/DebugInfo/PDB/Native/Formatters.h
+++ b/include/llvm/DebugInfo/PDB/Native/Formatters.h
@@ -1,9 +1,8 @@
//===- Formatters.h ---------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h b/include/llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h
index 4c39ca762b5b..a49795600028 100644
--- a/include/llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h
+++ b/include/llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h
@@ -1,9 +1,8 @@
//===- GSIStreamBuilder.h - PDB Publics/Globals Stream Creation -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/Native/GlobalsStream.h b/include/llvm/DebugInfo/PDB/Native/GlobalsStream.h
index 7f84564ee988..404baaa25077 100644
--- a/include/llvm/DebugInfo/PDB/Native/GlobalsStream.h
+++ b/include/llvm/DebugInfo/PDB/Native/GlobalsStream.h
@@ -1,9 +1,8 @@
//===- GlobalsStream.h - PDB Index of Symbols by Name -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/Native/Hash.h b/include/llvm/DebugInfo/PDB/Native/Hash.h
index 1f11d43ecdd4..b048d878a12c 100644
--- a/include/llvm/DebugInfo/PDB/Native/Hash.h
+++ b/include/llvm/DebugInfo/PDB/Native/Hash.h
@@ -1,9 +1,8 @@
//===- Hash.h - PDB hash functions ------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/Native/HashTable.h b/include/llvm/DebugInfo/PDB/Native/HashTable.h
index 34cc6179688b..aa38417bcf4c 100644
--- a/include/llvm/DebugInfo/PDB/Native/HashTable.h
+++ b/include/llvm/DebugInfo/PDB/Native/HashTable.h
@@ -1,9 +1,8 @@
//===- HashTable.h - PDB Hash Table -----------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -32,21 +31,21 @@ namespace pdb {
Error readSparseBitVector(BinaryStreamReader &Stream, SparseBitVector<> &V);
Error writeSparseBitVector(BinaryStreamWriter &Writer, SparseBitVector<> &Vec);
-template <typename ValueT, typename TraitsT> class HashTable;
+template <typename ValueT> class HashTable;
-template <typename ValueT, typename TraitsT>
+template <typename ValueT>
class HashTableIterator
- : public iterator_facade_base<HashTableIterator<ValueT, TraitsT>,
+ : public iterator_facade_base<HashTableIterator<ValueT>,
std::forward_iterator_tag,
- std::pair<uint32_t, ValueT>> {
- friend HashTable<ValueT, TraitsT>;
+ const std::pair<uint32_t, ValueT>> {
+ friend HashTable<ValueT>;
- HashTableIterator(const HashTable<ValueT, TraitsT> &Map, uint32_t Index,
+ HashTableIterator(const HashTable<ValueT> &Map, uint32_t Index,
bool IsEnd)
: Map(&Map), Index(Index), IsEnd(IsEnd) {}
public:
- HashTableIterator(const HashTable<ValueT, TraitsT> &Map) : Map(&Map) {
+ HashTableIterator(const HashTable<ValueT> &Map) : Map(&Map) {
int I = Map.Present.find_first();
if (I == -1) {
Index = 0;
@@ -73,6 +72,12 @@ public:
assert(Map->Present.test(Index));
return Map->Buckets[Index];
}
+
+ // Implement postfix op++ in terms of prefix op++ by using the superclass
+ // implementation.
+ using iterator_facade_base<HashTableIterator<ValueT>,
+ std::forward_iterator_tag,
+ const std::pair<uint32_t, ValueT>>::operator++;
HashTableIterator &operator++() {
while (Index < Map->Buckets.size()) {
++Index;
@@ -88,24 +93,13 @@ private:
bool isEnd() const { return IsEnd; }
uint32_t index() const { return Index; }
- const HashTable<ValueT, TraitsT> *Map;
+ const HashTable<ValueT> *Map;
uint32_t Index;
bool IsEnd;
};
-template <typename T> struct PdbHashTraits {};
-
-template <> struct PdbHashTraits<uint32_t> {
- uint32_t hashLookupKey(uint32_t N) const { return N; }
- uint32_t storageKeyToLookupKey(uint32_t N) const { return N; }
- uint32_t lookupKeyToStorageKey(uint32_t N) { return N; }
-};
-
-template <typename ValueT, typename TraitsT = PdbHashTraits<ValueT>>
+template <typename ValueT>
class HashTable {
- using iterator = HashTableIterator<ValueT, TraitsT>;
- friend iterator;
-
struct Header {
support::ulittle32_t Size;
support::ulittle32_t Capacity;
@@ -114,10 +108,11 @@ class HashTable {
using BucketList = std::vector<std::pair<uint32_t, ValueT>>;
public:
- HashTable() { Buckets.resize(8); }
+ using const_iterator = HashTableIterator<ValueT>;
+ friend const_iterator;
- explicit HashTable(TraitsT Traits) : HashTable(8, std::move(Traits)) {}
- HashTable(uint32_t Capacity, TraitsT Traits) : Traits(Traits) {
+ HashTable() { Buckets.resize(8); }
+ explicit HashTable(uint32_t Capacity) {
Buckets.resize(Capacity);
}
@@ -144,7 +139,7 @@ public:
return EC;
if (Present.intersects(Deleted))
return make_error<RawError>(raw_error_code::corrupt_file,
- "Present bit vector interesects deleted!");
+ "Present bit vector intersects deleted!");
for (uint32_t P : Present) {
if (auto EC = Stream.readInteger(Buckets[P].first))
@@ -217,19 +212,20 @@ public:
uint32_t capacity() const { return Buckets.size(); }
uint32_t size() const { return Present.count(); }
- iterator begin() const { return iterator(*this); }
- iterator end() const { return iterator(*this, 0, true); }
+ const_iterator begin() const { return const_iterator(*this); }
+ const_iterator end() const { return const_iterator(*this, 0, true); }
/// Find the entry whose key has the specified hash value, using the specified
/// traits defining hash function and equality.
- template <typename Key> iterator find_as(const Key &K) const {
+ template <typename Key, typename TraitsT>
+ const_iterator find_as(const Key &K, TraitsT &Traits) const {
uint32_t H = Traits.hashLookupKey(K) % capacity();
uint32_t I = H;
Optional<uint32_t> FirstUnused;
do {
if (isPresent(I)) {
if (Traits.storageKeyToLookupKey(Buckets[I].first) == K)
- return iterator(*this, I, false);
+ return const_iterator(*this, I, false);
} else {
if (!FirstUnused)
FirstUnused = I;
@@ -248,17 +244,19 @@ public:
// table were Present. But this would violate the load factor constraints
// that we impose, so it should never happen.
assert(FirstUnused);
- return iterator(*this, *FirstUnused, true);
+ return const_iterator(*this, *FirstUnused, true);
}
/// Set the entry using a key type that the specified Traits can convert
/// from a real key to an internal key.
- template <typename Key> bool set_as(const Key &K, ValueT V) {
- return set_as_internal(K, std::move(V), None);
+ template <typename Key, typename TraitsT>
+ bool set_as(const Key &K, ValueT V, TraitsT &Traits) {
+ return set_as_internal(K, std::move(V), Traits, None);
}
- template <typename Key> ValueT get(const Key &K) const {
- auto Iter = find_as(K);
+ template <typename Key, typename TraitsT>
+ ValueT get(const Key &K, TraitsT &Traits) const {
+ auto Iter = find_as(K, Traits);
assert(Iter != end());
return (*Iter).second;
}
@@ -267,7 +265,6 @@ protected:
bool isPresent(uint32_t K) const { return Present.test(K); }
bool isDeleted(uint32_t K) const { return Deleted.test(K); }
- TraitsT Traits;
BucketList Buckets;
mutable SparseBitVector<> Present;
mutable SparseBitVector<> Deleted;
@@ -275,9 +272,10 @@ protected:
private:
/// Set the entry using a key type that the specified Traits can convert
/// from a real key to an internal key.
- template <typename Key>
- bool set_as_internal(const Key &K, ValueT V, Optional<uint32_t> InternalKey) {
- auto Entry = find_as(K);
+ template <typename Key, typename TraitsT>
+ bool set_as_internal(const Key &K, ValueT V, TraitsT &Traits,
+ Optional<uint32_t> InternalKey) {
+ auto Entry = find_as(K, Traits);
if (Entry != end()) {
assert(isPresent(Entry.index()));
assert(Traits.storageKeyToLookupKey(Buckets[Entry.index()].first) == K);
@@ -294,15 +292,16 @@ private:
Present.set(Entry.index());
Deleted.reset(Entry.index());
- grow();
+ grow(Traits);
- assert((find_as(K)) != end());
+ assert((find_as(K, Traits)) != end());
return true;
}
static uint32_t maxLoad(uint32_t capacity) { return capacity * 2 / 3 + 1; }
- void grow() {
+ template <typename TraitsT>
+ void grow(TraitsT &Traits) {
uint32_t S = size();
uint32_t MaxLoad = maxLoad(capacity());
if (S < maxLoad(capacity()))
@@ -314,10 +313,11 @@ private:
// Growing requires rebuilding the table and re-hashing every item. Make a
// copy with a larger capacity, insert everything into the copy, then swap
// it in.
- HashTable NewMap(NewCapacity, Traits);
+ HashTable NewMap(NewCapacity);
for (auto I : Present) {
auto LookupKey = Traits.storageKeyToLookupKey(Buckets[I].first);
- NewMap.set_as_internal(LookupKey, Buckets[I].second, Buckets[I].first);
+ NewMap.set_as_internal(LookupKey, Buckets[I].second, Traits,
+ Buckets[I].first);
}
Buckets.swap(NewMap.Buckets);
diff --git a/include/llvm/DebugInfo/PDB/Native/ISectionContribVisitor.h b/include/llvm/DebugInfo/PDB/Native/ISectionContribVisitor.h
index fb00d6ad4bc7..717dce2f2737 100644
--- a/include/llvm/DebugInfo/PDB/Native/ISectionContribVisitor.h
+++ b/include/llvm/DebugInfo/PDB/Native/ISectionContribVisitor.h
@@ -1,9 +1,8 @@
//===- ISectionContribVisitor.h ---------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/Native/InfoStream.h b/include/llvm/DebugInfo/PDB/Native/InfoStream.h
index 8c52b042f289..315b09356ae3 100644
--- a/include/llvm/DebugInfo/PDB/Native/InfoStream.h
+++ b/include/llvm/DebugInfo/PDB/Native/InfoStream.h
@@ -1,9 +1,8 @@
//===- InfoStream.h - PDB Info Stream (Stream 1) Access ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/Native/InfoStreamBuilder.h b/include/llvm/DebugInfo/PDB/Native/InfoStreamBuilder.h
index 101127a355f5..208a37c45d49 100644
--- a/include/llvm/DebugInfo/PDB/Native/InfoStreamBuilder.h
+++ b/include/llvm/DebugInfo/PDB/Native/InfoStreamBuilder.h
@@ -1,9 +1,8 @@
//===- InfoStreamBuilder.h - PDB Info Stream Creation -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/Native/InjectedSourceStream.h b/include/llvm/DebugInfo/PDB/Native/InjectedSourceStream.h
new file mode 100644
index 000000000000..d0cac3749bca
--- /dev/null
+++ b/include/llvm/DebugInfo/PDB/Native/InjectedSourceStream.h
@@ -0,0 +1,44 @@
+//===- InjectedSourceStream.h - PDB Headerblock Stream Access ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_PDB_RAW_PDBINJECTEDSOURCESTREAM_H
+#define LLVM_DEBUGINFO_PDB_RAW_PDBINJECTEDSOURCESTREAM_H
+
+#include "llvm/DebugInfo/PDB/Native/HashTable.h"
+#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
+#include "llvm/Support/Error.h"
+
+namespace llvm {
+namespace msf {
+class MappedBlockStream;
+}
+namespace pdb {
+class PDBFile;
+class PDBStringTable;
+
+class InjectedSourceStream {
+public:
+ InjectedSourceStream(std::unique_ptr<msf::MappedBlockStream> Stream);
+ Error reload(const PDBStringTable &Strings);
+
+ using const_iterator = HashTable<SrcHeaderBlockEntry>::const_iterator;
+ const_iterator begin() const { return InjectedSourceTable.begin(); }
+ const_iterator end() const { return InjectedSourceTable.end(); }
+
+ uint32_t size() const { return InjectedSourceTable.size(); }
+
+private:
+ std::unique_ptr<msf::MappedBlockStream> Stream;
+
+ const SrcHeaderBlockHeader* Header;
+ HashTable<SrcHeaderBlockEntry> InjectedSourceTable;
+};
+}
+}
+
+#endif
diff --git a/include/llvm/DebugInfo/PDB/Native/ModuleDebugStream.h b/include/llvm/DebugInfo/PDB/Native/ModuleDebugStream.h
index 8d590df288f3..cb1ffc729512 100644
--- a/include/llvm/DebugInfo/PDB/Native/ModuleDebugStream.h
+++ b/include/llvm/DebugInfo/PDB/Native/ModuleDebugStream.h
@@ -1,9 +1,8 @@
//===- ModuleDebugStream.h - PDB Module Info Stream Access ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -69,6 +68,8 @@ public:
findChecksumsSubsection() const;
private:
+ Error reloadSerialize(BinaryStreamReader &Reader);
+
DbiModuleDescriptor Mod;
uint32_t Signature;
diff --git a/include/llvm/DebugInfo/PDB/Native/NamedStreamMap.h b/include/llvm/DebugInfo/PDB/Native/NamedStreamMap.h
index 01b8f1b5da56..1df059ffa9fd 100644
--- a/include/llvm/DebugInfo/PDB/Native/NamedStreamMap.h
+++ b/include/llvm/DebugInfo/PDB/Native/NamedStreamMap.h
@@ -1,9 +1,8 @@
//===- NamedStreamMap.h - PDB Named Stream Map ------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -60,7 +59,7 @@ private:
NamedStreamMapTraits HashTraits;
/// Closed hash table from Offset -> StreamNumber, where Offset is the offset
/// of the stream name in NamesBuffer.
- HashTable<support::ulittle32_t, NamedStreamMapTraits> OffsetIndexMap;
+ HashTable<support::ulittle32_t> OffsetIndexMap;
/// Buffer of string data.
std::vector<char> NamesBuffer;
diff --git a/include/llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h b/include/llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h
index 3cd465503044..50d437642d0f 100644
--- a/include/llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h
+++ b/include/llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h
@@ -1,9 +1,8 @@
//===- NativeCompilandSymbol.h - native impl for compiland syms -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/Native/NativeEnumGlobals.h b/include/llvm/DebugInfo/PDB/Native/NativeEnumGlobals.h
index 4442a1ec41fb..073878afd129 100644
--- a/include/llvm/DebugInfo/PDB/Native/NativeEnumGlobals.h
+++ b/include/llvm/DebugInfo/PDB/Native/NativeEnumGlobals.h
@@ -1,9 +1,8 @@
//==- NativeEnumGlobals.h - Native Global Enumerator impl --------*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/Native/NativeEnumInjectedSources.h b/include/llvm/DebugInfo/PDB/Native/NativeEnumInjectedSources.h
new file mode 100644
index 000000000000..ca1e22bd82a2
--- /dev/null
+++ b/include/llvm/DebugInfo/PDB/Native/NativeEnumInjectedSources.h
@@ -0,0 +1,43 @@
+//==- NativeEnumInjectedSources.cpp - Native Injected Source Enumerator --*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVEENUMINJECTEDSOURCES_H
+#define LLVM_DEBUGINFO_PDB_NATIVE_NATIVEENUMINJECTEDSOURCES_H
+
+#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
+#include "llvm/DebugInfo/PDB/IPDBInjectedSource.h"
+#include "llvm/DebugInfo/PDB/Native/InjectedSourceStream.h"
+
+namespace llvm {
+namespace pdb {
+
+class InjectedSourceStream;
+class PDBStringTable;
+
+class NativeEnumInjectedSources : public IPDBEnumChildren<IPDBInjectedSource> {
+public:
+ NativeEnumInjectedSources(PDBFile &File, const InjectedSourceStream &IJS,
+ const PDBStringTable &Strings);
+
+ uint32_t getChildCount() const override;
+ std::unique_ptr<IPDBInjectedSource>
+ getChildAtIndex(uint32_t Index) const override;
+ std::unique_ptr<IPDBInjectedSource> getNext() override;
+ void reset() override;
+
+private:
+ PDBFile &File;
+ const InjectedSourceStream &Stream;
+ const PDBStringTable &Strings;
+ InjectedSourceStream::const_iterator Cur;
+};
+
+} // namespace pdb
+} // namespace llvm
+
+#endif
diff --git a/include/llvm/DebugInfo/PDB/Native/NativeEnumModules.h b/include/llvm/DebugInfo/PDB/Native/NativeEnumModules.h
index c268641a1008..94f1ee18ed9f 100644
--- a/include/llvm/DebugInfo/PDB/Native/NativeEnumModules.h
+++ b/include/llvm/DebugInfo/PDB/Native/NativeEnumModules.h
@@ -1,9 +1,8 @@
//==- NativeEnumModules.h - Native Module Enumerator impl --------*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/Native/NativeEnumTypes.h b/include/llvm/DebugInfo/PDB/Native/NativeEnumTypes.h
index f8ac1655dc61..25c56567384f 100644
--- a/include/llvm/DebugInfo/PDB/Native/NativeEnumTypes.h
+++ b/include/llvm/DebugInfo/PDB/Native/NativeEnumTypes.h
@@ -1,9 +1,8 @@
//==- NativeEnumTypes.h - Native Type Enumerator impl ------------*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/Native/NativeExeSymbol.h b/include/llvm/DebugInfo/PDB/Native/NativeExeSymbol.h
index f4030da1d026..280358d02305 100644
--- a/include/llvm/DebugInfo/PDB/Native/NativeExeSymbol.h
+++ b/include/llvm/DebugInfo/PDB/Native/NativeExeSymbol.h
@@ -1,9 +1,8 @@
//===- NativeExeSymbol.h - native impl for PDBSymbolExe ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/Native/NativeRawSymbol.h b/include/llvm/DebugInfo/PDB/Native/NativeRawSymbol.h
index 6505a7d39573..4133be220713 100644
--- a/include/llvm/DebugInfo/PDB/Native/NativeRawSymbol.h
+++ b/include/llvm/DebugInfo/PDB/Native/NativeRawSymbol.h
@@ -1,9 +1,8 @@
//==- NativeRawSymbol.h - Native implementation of IPDBRawSymbol -*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/Native/NativeSession.h b/include/llvm/DebugInfo/PDB/Native/NativeSession.h
index 4878e47d3121..ee7d8cdec93b 100644
--- a/include/llvm/DebugInfo/PDB/Native/NativeSession.h
+++ b/include/llvm/DebugInfo/PDB/Native/NativeSession.h
@@ -1,9 +1,8 @@
//===- NativeSession.h - Native implementation of IPDBSession ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/Native/NativeSymbolEnumerator.h b/include/llvm/DebugInfo/PDB/Native/NativeSymbolEnumerator.h
index acc5eb8ff2c2..063585097899 100644
--- a/include/llvm/DebugInfo/PDB/Native/NativeSymbolEnumerator.h
+++ b/include/llvm/DebugInfo/PDB/Native/NativeSymbolEnumerator.h
@@ -1,9 +1,8 @@
//===- NativeSymbolEnumerator.h - info about enumerator values --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/Native/NativeTypeArray.h b/include/llvm/DebugInfo/PDB/Native/NativeTypeArray.h
index 10e68e6df450..262864fd709f 100644
--- a/include/llvm/DebugInfo/PDB/Native/NativeTypeArray.h
+++ b/include/llvm/DebugInfo/PDB/Native/NativeTypeArray.h
@@ -1,9 +1,8 @@
//===- NativeTypeArray.h ------------------------------------------ C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/Native/NativeTypeBuiltin.h b/include/llvm/DebugInfo/PDB/Native/NativeTypeBuiltin.h
index 725dfb89222f..8bb09f05d0bc 100644
--- a/include/llvm/DebugInfo/PDB/Native/NativeTypeBuiltin.h
+++ b/include/llvm/DebugInfo/PDB/Native/NativeTypeBuiltin.h
@@ -1,9 +1,8 @@
//===- NativeTypeBuiltin.h ---------------------------------------- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/Native/NativeTypeEnum.h b/include/llvm/DebugInfo/PDB/Native/NativeTypeEnum.h
index a5cbefc18111..2068c88fc74a 100644
--- a/include/llvm/DebugInfo/PDB/Native/NativeTypeEnum.h
+++ b/include/llvm/DebugInfo/PDB/Native/NativeTypeEnum.h
@@ -1,9 +1,8 @@
//===- NativeTypeEnum.h - info about enum type ------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/Native/NativeTypeFunctionSig.h b/include/llvm/DebugInfo/PDB/Native/NativeTypeFunctionSig.h
index 1b1b87f6581f..a7ea287dffc8 100644
--- a/include/llvm/DebugInfo/PDB/Native/NativeTypeFunctionSig.h
+++ b/include/llvm/DebugInfo/PDB/Native/NativeTypeFunctionSig.h
@@ -1,9 +1,8 @@
//===- NativeTypeFunctionSig.h - info about function signature ---*- C++-*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/Native/NativeTypePointer.h b/include/llvm/DebugInfo/PDB/Native/NativeTypePointer.h
index bcb7431fecf1..446f77db0f6c 100644
--- a/include/llvm/DebugInfo/PDB/Native/NativeTypePointer.h
+++ b/include/llvm/DebugInfo/PDB/Native/NativeTypePointer.h
@@ -1,9 +1,8 @@
//===- NativeTypePointer.h - info about pointer type -------------*- C++-*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/Native/NativeTypeTypedef.h b/include/llvm/DebugInfo/PDB/Native/NativeTypeTypedef.h
index 06eb6fcf3764..fe8a6f7f2bda 100644
--- a/include/llvm/DebugInfo/PDB/Native/NativeTypeTypedef.h
+++ b/include/llvm/DebugInfo/PDB/Native/NativeTypeTypedef.h
@@ -1,9 +1,8 @@
//===- NativeTypeTypedef.h - info about typedef ------------------*- C++-*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/Native/NativeTypeUDT.h b/include/llvm/DebugInfo/PDB/Native/NativeTypeUDT.h
index 84821d8731be..8f4dee3e658c 100644
--- a/include/llvm/DebugInfo/PDB/Native/NativeTypeUDT.h
+++ b/include/llvm/DebugInfo/PDB/Native/NativeTypeUDT.h
@@ -1,9 +1,8 @@
//===- NativeTypeUDT.h - info about class/struct type ------------*- C++-*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/Native/NativeTypeVTShape.h b/include/llvm/DebugInfo/PDB/Native/NativeTypeVTShape.h
index a996f34ef859..4ec0f9bf6b3d 100644
--- a/include/llvm/DebugInfo/PDB/Native/NativeTypeVTShape.h
+++ b/include/llvm/DebugInfo/PDB/Native/NativeTypeVTShape.h
@@ -1,9 +1,8 @@
//===- NativeTypeVTShape.h - info about virtual table shape ------*- C++-*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/Native/PDBFile.h b/include/llvm/DebugInfo/PDB/Native/PDBFile.h
index 5e39ac3e37b7..56de4030167d 100644
--- a/include/llvm/DebugInfo/PDB/Native/PDBFile.h
+++ b/include/llvm/DebugInfo/PDB/Native/PDBFile.h
@@ -1,9 +1,8 @@
//===- PDBFile.h - Low level interface to a PDB file ------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -33,6 +32,7 @@ namespace pdb {
class DbiStream;
class GlobalsStream;
class InfoStream;
+class InjectedSourceStream;
class PDBStringTable;
class PDBFileBuilder;
class PublicsStream;
@@ -84,7 +84,12 @@ public:
ArrayRef<support::ulittle32_t> getDirectoryBlockArray() const;
- std::unique_ptr<msf::MappedBlockStream> createIndexedStream(uint16_t SN);
+ std::unique_ptr<msf::MappedBlockStream>
+ createIndexedStream(uint16_t SN) const;
+ Expected<std::unique_ptr<msf::MappedBlockStream>>
+ safelyCreateIndexedStream(uint32_t StreamIndex) const;
+ Expected<std::unique_ptr<msf::MappedBlockStream>>
+ safelyCreateNamedStream(StringRef Name);
msf::MSFStreamLayout getStreamLayout(uint32_t StreamIdx) const;
msf::MSFStreamLayout getFpmStreamLayout() const;
@@ -100,6 +105,7 @@ public:
Expected<PublicsStream &> getPDBPublicsStream();
Expected<SymbolStream &> getPDBSymbolStream();
Expected<PDBStringTable &> getStringTable();
+ Expected<InjectedSourceStream &> getInjectedSourceStream();
BumpPtrAllocator &getAllocator() { return Allocator; }
@@ -111,15 +117,11 @@ public:
bool hasPDBSymbolStream();
bool hasPDBTpiStream() const;
bool hasPDBStringTable();
+ bool hasPDBInjectedSourceStream();
uint32_t getPointerSize();
private:
- Expected<std::unique_ptr<msf::MappedBlockStream>>
- safelyCreateIndexedStream(const msf::MSFLayout &Layout,
- BinaryStreamRef MsfData,
- uint32_t StreamIndex) const;
-
std::string FilePath;
BumpPtrAllocator &Allocator;
@@ -136,6 +138,7 @@ private:
std::unique_ptr<SymbolStream> Symbols;
std::unique_ptr<msf::MappedBlockStream> DirectoryStream;
std::unique_ptr<msf::MappedBlockStream> StringTableStream;
+ std::unique_ptr<InjectedSourceStream> InjectedSources;
std::unique_ptr<PDBStringTable> Strings;
};
}
diff --git a/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h b/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h
index 37458749a8d8..2abaa5f4cdc4 100644
--- a/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h
+++ b/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h
@@ -1,9 +1,8 @@
//===- PDBFileBuilder.h - PDB File Creation ---------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -98,7 +97,7 @@ private:
PDBStringTableBuilder Strings;
StringTableHashTraits InjectedSourceHashTraits;
- HashTable<SrcHeaderBlockEntry, StringTableHashTraits> InjectedSourceTable;
+ HashTable<SrcHeaderBlockEntry> InjectedSourceTable;
SmallVector<InjectedSourceDescriptor, 2> InjectedSources;
diff --git a/include/llvm/DebugInfo/PDB/Native/PDBStringTable.h b/include/llvm/DebugInfo/PDB/Native/PDBStringTable.h
index 29167c966d42..57f0b64a32a6 100644
--- a/include/llvm/DebugInfo/PDB/Native/PDBStringTable.h
+++ b/include/llvm/DebugInfo/PDB/Native/PDBStringTable.h
@@ -1,9 +1,8 @@
//===- PDBStringTable.h - PDB String Table -----------------------*- C++-*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h b/include/llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h
index 0f81c18eafe6..57267ef5c6c5 100644
--- a/include/llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h
+++ b/include/llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h
@@ -1,9 +1,8 @@
//===- PDBStringTableBuilder.h - PDB String Table Builder -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/DebugInfo/PDB/Native/PublicsStream.h b/include/llvm/DebugInfo/PDB/Native/PublicsStream.h
index 2d0222a9071a..ee28d108df8b 100644
--- a/include/llvm/DebugInfo/PDB/Native/PublicsStream.h
+++ b/include/llvm/DebugInfo/PDB/Native/PublicsStream.h
@@ -1,9 +1,8 @@
//===- PublicsStream.h - PDB Public Symbol Stream -------- ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/Native/RawConstants.h b/include/llvm/DebugInfo/PDB/Native/RawConstants.h
index fbbd3318d958..0dde5ef66932 100644
--- a/include/llvm/DebugInfo/PDB/Native/RawConstants.h
+++ b/include/llvm/DebugInfo/PDB/Native/RawConstants.h
@@ -1,9 +1,8 @@
//===- RawConstants.h -------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/Native/RawError.h b/include/llvm/DebugInfo/PDB/Native/RawError.h
index 97d11b4f20d1..aadb64c2e3f1 100644
--- a/include/llvm/DebugInfo/PDB/Native/RawError.h
+++ b/include/llvm/DebugInfo/PDB/Native/RawError.h
@@ -1,9 +1,8 @@
//===- RawError.h - Error extensions for raw PDB implementation -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/Native/RawTypes.h b/include/llvm/DebugInfo/PDB/Native/RawTypes.h
index 8f6d6611c032..6119e6e5db26 100644
--- a/include/llvm/DebugInfo/PDB/Native/RawTypes.h
+++ b/include/llvm/DebugInfo/PDB/Native/RawTypes.h
@@ -1,9 +1,8 @@
//===- RawTypes.h -----------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -177,7 +176,7 @@ struct DbiStreamHeader {
};
static_assert(sizeof(DbiStreamHeader) == 64, "Invalid DbiStreamHeader size!");
-/// The header preceeding the File Info Substream of the DBI stream.
+/// The header preceding the File Info Substream of the DBI stream.
struct FileInfoSubstreamHeader {
/// Total # of modules, should match number of records in the ModuleInfo
/// substream.
@@ -208,7 +207,7 @@ struct ModInfoFlags {
static const uint16_t TypeServerIndexShift = 8;
};
-/// The header preceeding each entry in the Module Info substream of the DBI
+/// The header preceding each entry in the Module Info substream of the DBI
/// stream. Corresponds to the type MODI in the reference implementation.
struct ModuleInfoHeader {
/// Currently opened module. This field is a pointer in the reference
@@ -273,7 +272,7 @@ struct PublicsStreamHeader {
support::ulittle32_t NumSections;
};
-// The header preceeding the global TPI stream.
+// The header preceding the global TPI stream.
// This corresponds to `HDR` in PDB/dbi/tpi.h.
struct TpiStreamHeader {
struct EmbeddedBuf {
@@ -301,7 +300,7 @@ struct TpiStreamHeader {
const uint32_t MinTpiHashBuckets = 0x1000;
const uint32_t MaxTpiHashBuckets = 0x40000;
-/// The header preceeding the global PDB Stream (Stream 1)
+/// The header preceding the global PDB Stream (Stream 1)
struct InfoStreamHeader {
support::ulittle32_t Version;
support::ulittle32_t Signature;
@@ -309,7 +308,7 @@ struct InfoStreamHeader {
codeview::GUID Guid;
};
-/// The header preceeding the /names stream.
+/// The header preceding the /names stream.
struct PDBStringTableHeader {
support::ulittle32_t Signature; // PDBStringTableSignature
support::ulittle32_t HashVersion; // 1 or 2
@@ -342,7 +341,6 @@ struct SrcHeaderBlockEntry {
short Padding; // Pad to 4 bytes.
char Reserved[8];
};
-
static_assert(sizeof(SrcHeaderBlockEntry) == 40, "Incorrect struct size!");
} // namespace pdb
diff --git a/include/llvm/DebugInfo/PDB/Native/SymbolCache.h b/include/llvm/DebugInfo/PDB/Native/SymbolCache.h
index 08e1d41e6ee9..0b15ab474f71 100644
--- a/include/llvm/DebugInfo/PDB/Native/SymbolCache.h
+++ b/include/llvm/DebugInfo/PDB/Native/SymbolCache.h
@@ -1,9 +1,8 @@
//==- SymbolCache.h - Cache of native symbols and ids ------------*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/Native/SymbolStream.h b/include/llvm/DebugInfo/PDB/Native/SymbolStream.h
index ae9f7d657b70..4fe1bd9734e4 100644
--- a/include/llvm/DebugInfo/PDB/Native/SymbolStream.h
+++ b/include/llvm/DebugInfo/PDB/Native/SymbolStream.h
@@ -1,9 +1,8 @@
//===- SymbolStream.cpp - PDB Symbol Stream Access --------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/Native/TpiHashing.h b/include/llvm/DebugInfo/PDB/Native/TpiHashing.h
index c2996ccf1825..4ac60a80e701 100644
--- a/include/llvm/DebugInfo/PDB/Native/TpiHashing.h
+++ b/include/llvm/DebugInfo/PDB/Native/TpiHashing.h
@@ -1,9 +1,8 @@
//===- TpiHashing.h ---------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/Native/TpiStream.h b/include/llvm/DebugInfo/PDB/Native/TpiStream.h
index b76576a7a263..1b7fd2d54cb2 100644
--- a/include/llvm/DebugInfo/PDB/Native/TpiStream.h
+++ b/include/llvm/DebugInfo/PDB/Native/TpiStream.h
@@ -1,9 +1,8 @@
//===- TpiStream.cpp - PDB Type Info (TPI) Stream 2 Access ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h b/include/llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h
index 411720d6f56b..72d98e9c2c4d 100644
--- a/include/llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h
+++ b/include/llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h
@@ -1,9 +1,8 @@
//===- TpiStreamBuilder.h - PDB Tpi Stream Creation -------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/PDB.h b/include/llvm/DebugInfo/PDB/PDB.h
index 9f9da39ca6cc..6d734dc2f243 100644
--- a/include/llvm/DebugInfo/PDB/PDB.h
+++ b/include/llvm/DebugInfo/PDB/PDB.h
@@ -1,9 +1,8 @@
//===- PDB.h - base header file for creating a PDB reader -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/PDBContext.h b/include/llvm/DebugInfo/PDB/PDBContext.h
index 0ce49f5ef922..7b6793f0a639 100644
--- a/include/llvm/DebugInfo/PDB/PDBContext.h
+++ b/include/llvm/DebugInfo/PDB/PDBContext.h
@@ -1,9 +1,8 @@
//===-- PDBContext.h --------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===/
@@ -44,15 +43,18 @@ namespace pdb {
void dump(raw_ostream &OS, DIDumpOptions DIDumpOpts) override;
DILineInfo getLineInfoForAddress(
- uint64_t Address,
+ object::SectionedAddress Address,
DILineInfoSpecifier Specifier = DILineInfoSpecifier()) override;
DILineInfoTable getLineInfoForAddressRange(
- uint64_t Address, uint64_t Size,
+ object::SectionedAddress Address, uint64_t Size,
DILineInfoSpecifier Specifier = DILineInfoSpecifier()) override;
DIInliningInfo getInliningInfoForAddress(
- uint64_t Address,
+ object::SectionedAddress Address,
DILineInfoSpecifier Specifier = DILineInfoSpecifier()) override;
+ std::vector<DILocal>
+ getLocalsForAddress(object::SectionedAddress Address) override;
+
private:
std::string getFunctionName(uint64_t Address, DINameKind NameKind) const;
std::unique_ptr<IPDBSession> Session;
diff --git a/include/llvm/DebugInfo/PDB/PDBExtras.h b/include/llvm/DebugInfo/PDB/PDBExtras.h
index aaec71aa8c90..45aba013e7c8 100644
--- a/include/llvm/DebugInfo/PDB/PDBExtras.h
+++ b/include/llvm/DebugInfo/PDB/PDBExtras.h
@@ -1,9 +1,8 @@
//===- PDBExtras.h - helper functions and classes for PDBs ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -28,7 +27,8 @@ raw_ostream &operator<<(raw_ostream &OS, const PDB_VariantType &Value);
raw_ostream &operator<<(raw_ostream &OS, const PDB_CallingConv &Conv);
raw_ostream &operator<<(raw_ostream &OS, const PDB_BuiltinType &Type);
raw_ostream &operator<<(raw_ostream &OS, const PDB_DataKind &Data);
-raw_ostream &operator<<(raw_ostream &OS, const codeview::RegisterId &Reg);
+raw_ostream &operator<<(raw_ostream &OS,
+ const llvm::codeview::CPURegister &CpuReg);
raw_ostream &operator<<(raw_ostream &OS, const PDB_LocType &Loc);
raw_ostream &operator<<(raw_ostream &OS, const codeview::ThunkOrdinal &Thunk);
raw_ostream &operator<<(raw_ostream &OS, const PDB_Checksum &Checksum);
@@ -37,13 +37,12 @@ raw_ostream &operator<<(raw_ostream &OS, const PDB_SymType &Tag);
raw_ostream &operator<<(raw_ostream &OS, const PDB_MemberAccess &Access);
raw_ostream &operator<<(raw_ostream &OS, const PDB_UdtType &Type);
raw_ostream &operator<<(raw_ostream &OS, const PDB_Machine &Machine);
-raw_ostream &operator<<(raw_ostream &OS,
- const PDB_SourceCompression &Compression);
raw_ostream &operator<<(raw_ostream &OS, const Variant &Value);
raw_ostream &operator<<(raw_ostream &OS, const VersionInfo &Version);
raw_ostream &operator<<(raw_ostream &OS, const TagStats &Stats);
+raw_ostream& dumpPDBSourceCompression(raw_ostream& OS, uint32_t Compression);
template <typename T>
void dumpSymbolField(raw_ostream &OS, StringRef Name, T Value, int Indent) {
diff --git a/include/llvm/DebugInfo/PDB/PDBSymDumper.h b/include/llvm/DebugInfo/PDB/PDBSymDumper.h
index c976935c48e0..f81b15f2353d 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymDumper.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymDumper.h
@@ -1,9 +1,8 @@
//===- PDBSymDumper.h - base interface for PDB symbol dumper *- C++ -----*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/PDBSymbol.h b/include/llvm/DebugInfo/PDB/PDBSymbol.h
index 3a74f7c3aace..d9004a8894d9 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymbol.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymbol.h
@@ -1,9 +1,8 @@
//===- PDBSymbol.h - base class for user-facing symbol types -----*- C++-*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/PDBSymbolAnnotation.h b/include/llvm/DebugInfo/PDB/PDBSymbolAnnotation.h
index ef00df15cb0a..c76466a97b66 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymbolAnnotation.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymbolAnnotation.h
@@ -1,9 +1,8 @@
//===- PDBSymbolAnnotation.h - Accessors for querying PDB annotations ---*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_DEBUGINFO_PDB_PDBSYMBOLANNOTATION_H
diff --git a/include/llvm/DebugInfo/PDB/PDBSymbolBlock.h b/include/llvm/DebugInfo/PDB/PDBSymbolBlock.h
index 2cf9c72a8886..cf471450d989 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymbolBlock.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymbolBlock.h
@@ -1,9 +1,8 @@
//===- PDBSymbolBlock.h - Accessors for querying PDB blocks -------------*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_DEBUGINFO_PDB_PDBSYMBOLBLOCK_H
diff --git a/include/llvm/DebugInfo/PDB/PDBSymbolCompiland.h b/include/llvm/DebugInfo/PDB/PDBSymbolCompiland.h
index 04dbd962ebd4..ca8b39d03f86 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymbolCompiland.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymbolCompiland.h
@@ -1,9 +1,8 @@
//===- PDBSymbolCompiland.h - Accessors for querying PDB compilands -----*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_DEBUGINFO_PDB_PDBSYMBOLCOMPILAND_H
diff --git a/include/llvm/DebugInfo/PDB/PDBSymbolCompilandDetails.h b/include/llvm/DebugInfo/PDB/PDBSymbolCompilandDetails.h
index 3d651a464d94..b82bb6c0a352 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymbolCompilandDetails.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymbolCompilandDetails.h
@@ -1,9 +1,8 @@
//===- PDBSymbolCompilandDetails.h - PDB compiland details ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/PDBSymbolCompilandEnv.h b/include/llvm/DebugInfo/PDB/PDBSymbolCompilandEnv.h
index ffc408314d9a..61607a03593d 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymbolCompilandEnv.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymbolCompilandEnv.h
@@ -1,9 +1,8 @@
//===- PDBSymbolCompilandEnv.h - compiland environment variables *- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/PDBSymbolCustom.h b/include/llvm/DebugInfo/PDB/PDBSymbolCustom.h
index c29e4c31d3f3..75a86411643a 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymbolCustom.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymbolCustom.h
@@ -1,9 +1,8 @@
//===- PDBSymbolCustom.h - compiler-specific types --------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/PDBSymbolData.h b/include/llvm/DebugInfo/PDB/PDBSymbolData.h
index 217e1e976e6b..7e9b69d7cf4b 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymbolData.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymbolData.h
@@ -1,9 +1,8 @@
//===- PDBSymbolData.h - PDB data (e.g. variable) accessors -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/PDBSymbolExe.h b/include/llvm/DebugInfo/PDB/PDBSymbolExe.h
index 366d0cf4777f..1a9fb240a248 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymbolExe.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymbolExe.h
@@ -1,9 +1,8 @@
//===- PDBSymbolExe.h - Accessors for querying executables in a PDB ----*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/PDBSymbolFunc.h b/include/llvm/DebugInfo/PDB/PDBSymbolFunc.h
index 129e557c7f25..6be27c8d3bc7 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymbolFunc.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymbolFunc.h
@@ -1,9 +1,8 @@
//===- PDBSymbolFunc.h - class representing a function instance -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/PDBSymbolFuncDebugEnd.h b/include/llvm/DebugInfo/PDB/PDBSymbolFuncDebugEnd.h
index 18db8a50fd1b..7152249cbd03 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymbolFuncDebugEnd.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymbolFuncDebugEnd.h
@@ -1,9 +1,8 @@
//===- PDBSymbolFuncDebugEnd.h - function end bounds info -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/PDBSymbolFuncDebugStart.h b/include/llvm/DebugInfo/PDB/PDBSymbolFuncDebugStart.h
index 83d82f0cbcc5..3125c271d2e8 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymbolFuncDebugStart.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymbolFuncDebugStart.h
@@ -1,9 +1,8 @@
//===- PDBSymbolFuncDebugStart.h - function start bounds info ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/PDBSymbolLabel.h b/include/llvm/DebugInfo/PDB/PDBSymbolLabel.h
index 8b2617fcd757..3625e23f014f 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymbolLabel.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymbolLabel.h
@@ -1,9 +1,8 @@
//===- PDBSymbolLabel.h - label info ----------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/PDBSymbolPublicSymbol.h b/include/llvm/DebugInfo/PDB/PDBSymbolPublicSymbol.h
index 9def3edb469a..e2b2545d78ec 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymbolPublicSymbol.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymbolPublicSymbol.h
@@ -1,9 +1,8 @@
//===- PDBSymbolPublicSymbol.h - public symbol info -------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/PDBSymbolThunk.h b/include/llvm/DebugInfo/PDB/PDBSymbolThunk.h
index 7bb0555362db..274de8b0b16f 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymbolThunk.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymbolThunk.h
@@ -1,9 +1,8 @@
//===- PDBSymbolThunk.h - Support for querying PDB thunks ---------------*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/PDBSymbolTypeArray.h b/include/llvm/DebugInfo/PDB/PDBSymbolTypeArray.h
index 488f668bdc10..c0215c9ee4b1 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymbolTypeArray.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymbolTypeArray.h
@@ -1,9 +1,8 @@
//===- PDBSymbolTypeArray.h - array type information ------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/PDBSymbolTypeBaseClass.h b/include/llvm/DebugInfo/PDB/PDBSymbolTypeBaseClass.h
index 550deedd7504..bab292ee0d46 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymbolTypeBaseClass.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymbolTypeBaseClass.h
@@ -1,9 +1,8 @@
//===- PDBSymbolTypeBaseClass.h - base class type information ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h b/include/llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h
index e07e88802b8f..7d94c3c97a2b 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h
@@ -1,9 +1,8 @@
//===- PDBSymbolTypeBuiltin.h - builtin type information --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/PDBSymbolTypeCustom.h b/include/llvm/DebugInfo/PDB/PDBSymbolTypeCustom.h
index 0d8979c9c5c5..dc647aff48d3 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymbolTypeCustom.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymbolTypeCustom.h
@@ -1,9 +1,8 @@
//===- PDBSymbolTypeCustom.h - custom compiler type information -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/PDBSymbolTypeDimension.h b/include/llvm/DebugInfo/PDB/PDBSymbolTypeDimension.h
index 58292a63501f..7a9e43785d67 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymbolTypeDimension.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymbolTypeDimension.h
@@ -1,9 +1,8 @@
//===- PDBSymbolTypeDimension.h - array dimension type info -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h b/include/llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h
index f463047bb5b5..3ac72801b202 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h
@@ -1,9 +1,8 @@
//===- PDBSymbolTypeEnum.h - enum type info ---------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/PDBSymbolTypeFriend.h b/include/llvm/DebugInfo/PDB/PDBSymbolTypeFriend.h
index 5b940b0737af..c4d9dd6308a3 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymbolTypeFriend.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymbolTypeFriend.h
@@ -1,9 +1,8 @@
//===- PDBSymbolTypeFriend.h - friend type info -----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/PDBSymbolTypeFunctionArg.h b/include/llvm/DebugInfo/PDB/PDBSymbolTypeFunctionArg.h
index 074cb418fc82..22d3623496f2 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymbolTypeFunctionArg.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymbolTypeFunctionArg.h
@@ -1,9 +1,8 @@
//===- PDBSymbolTypeFunctionArg.h - function arg type info ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/PDBSymbolTypeFunctionSig.h b/include/llvm/DebugInfo/PDB/PDBSymbolTypeFunctionSig.h
index dfdf436197c3..a1491ca2e415 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymbolTypeFunctionSig.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymbolTypeFunctionSig.h
@@ -1,9 +1,8 @@
//===- PDBSymbolTypeFunctionSig.h - function signature type info *- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/PDBSymbolTypeManaged.h b/include/llvm/DebugInfo/PDB/PDBSymbolTypeManaged.h
index d716abd640c6..6bc70bca82e7 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymbolTypeManaged.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymbolTypeManaged.h
@@ -1,9 +1,8 @@
//===- PDBSymbolTypeManaged.h - managed type info ---------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/PDBSymbolTypePointer.h b/include/llvm/DebugInfo/PDB/PDBSymbolTypePointer.h
index 300d6722fc4d..b36f459e880c 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymbolTypePointer.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymbolTypePointer.h
@@ -1,9 +1,8 @@
//===- PDBSymbolTypePointer.h - pointer type info ---------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/PDBSymbolTypeTypedef.h b/include/llvm/DebugInfo/PDB/PDBSymbolTypeTypedef.h
index d6e2a36486d5..2712d0617e0e 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymbolTypeTypedef.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymbolTypeTypedef.h
@@ -1,9 +1,8 @@
//===- PDBSymbolTypeTypedef.h - typedef type info ---------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h b/include/llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h
index 937dd6c87221..3e73ad7ac85a 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h
@@ -1,9 +1,8 @@
//===- PDBSymbolTypeUDT.h - UDT type info -----------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h b/include/llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h
index 6efce4bbd686..e8161d311ea7 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h
@@ -1,9 +1,8 @@
//===- PDBSymbolTypeVTable.h - VTable type info -----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/PDBSymbolTypeVTableShape.h b/include/llvm/DebugInfo/PDB/PDBSymbolTypeVTableShape.h
index 8949052b0c0f..614060867042 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymbolTypeVTableShape.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymbolTypeVTableShape.h
@@ -1,9 +1,8 @@
//===- PDBSymbolTypeVTableShape.h - VTable shape info -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/PDBSymbolUnknown.h b/include/llvm/DebugInfo/PDB/PDBSymbolUnknown.h
index e935ac6ce0dc..cc29d38c2578 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymbolUnknown.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymbolUnknown.h
@@ -1,9 +1,8 @@
//===- PDBSymbolUnknown.h - unknown symbol type -----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/PDBSymbolUsingNamespace.h b/include/llvm/DebugInfo/PDB/PDBSymbolUsingNamespace.h
index 4e8c99fc8d89..fd812cb2f793 100644
--- a/include/llvm/DebugInfo/PDB/PDBSymbolUsingNamespace.h
+++ b/include/llvm/DebugInfo/PDB/PDBSymbolUsingNamespace.h
@@ -1,9 +1,8 @@
//===- PDBSymbolUsingNamespace.h - using namespace info ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/PDB/PDBTypes.h b/include/llvm/DebugInfo/PDB/PDBTypes.h
index 917f3ed73910..c26d8d1ed10c 100644
--- a/include/llvm/DebugInfo/PDB/PDBTypes.h
+++ b/include/llvm/DebugInfo/PDB/PDBTypes.h
@@ -1,9 +1,8 @@
//===- PDBTypes.h - Defines enums for various fields contained in PDB ----====//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -127,6 +126,7 @@ enum class PDB_Machine {
Am33 = 0x13,
Amd64 = 0x8664,
Arm = 0x1C0,
+ Arm64 = 0xaa64,
ArmNT = 0x1C4,
Ebc = 0xEBC,
x86 = 0x14C,
@@ -146,11 +146,69 @@ enum class PDB_Machine {
WceMipsV2 = 0x169
};
-enum class PDB_SourceCompression {
- None,
- RunLengthEncoded,
- Huffman,
- LZ,
+// A struct with an inner unnamed enum with explicit underlying type resuls
+// in an enum class that can implicitly convert to the underlying type, which
+// is convenient for this enum.
+struct PDB_SourceCompression {
+ enum : uint32_t {
+ // No compression. Produced e.g. by `link.exe /natvis:foo.natvis`.
+ None,
+ // Not known what produces this.
+ RunLengthEncoded,
+ // Not known what produces this.
+ Huffman,
+ // Not known what produces this.
+ LZ,
+ // Produced e.g. by `csc /debug`. The encoded data is its own mini-stream
+ // with the following layout (in little endian):
+ // GUID LanguageTypeGuid;
+ // GUID LanguageVendorGuid;
+ // GUID DocumentTypeGuid;
+ // GUID HashFunctionGuid;
+ // uint32_t HashDataSize;
+ // uint32_t CompressedDataSize;
+ // Followed by HashDataSize bytes containing a hash checksum,
+ // followed by CompressedDataSize bytes containing source contents.
+ //
+ // CompressedDataSize can be 0, in this case only the hash data is present.
+ // (CompressedDataSize is != 0 e.g. if `/embed` is passed to csc.exe.)
+ // The compressed data format is:
+ // uint32_t UncompressedDataSize;
+ // If UncompressedDataSize is 0, the data is stored uncompressed and
+ // CompressedDataSize stores the uncompressed size.
+ // If UncompressedDataSize is != 0, then the data is in raw deflate
+ // encoding as described in rfc1951.
+ //
+ // A GUID is 16 bytes, stored in the usual
+ // uint32_t
+ // uint16_t
+ // uint16_t
+ // uint8_t[24]
+ // layout.
+ //
+ // Well-known GUIDs for LanguageTypeGuid are:
+ // 63a08714-fc37-11d2-904c-00c04fa302a1 C
+ // 3a12d0b7-c26c-11d0-b442-00a0244a1dd2 C++
+ // 3f5162f8-07c6-11d3-9053-00c04fa302a1 C#
+ // af046cd1-d0e1-11d2-977c-00a0c9b4d50c Cobol
+ // ab4f38c9-b6e6-43ba-be3b-58080b2ccce3 F#
+ // 3a12d0b4-c26c-11d0-b442-00a0244a1dd2 Java
+ // 3a12d0b6-c26c-11d0-b442-00a0244a1dd2 JScript
+ // af046cd2-d0e1-11d2-977c-00a0c9b4d50c Pascal
+ // 3a12d0b8-c26c-11d0-b442-00a0244a1dd2 Visual Basic
+ //
+ // Well-known GUIDs for LanguageVendorGuid are:
+ // 994b45c4-e6e9-11d2-903f-00c04fa302a1 Microsoft
+ //
+ // Well-known GUIDs for DocumentTypeGuid are:
+ // 5a869d0b-6611-11d3-bd2a-0000f80849bd Text
+ //
+ // Well-known GUIDs for HashFunctionGuid are:
+ // 406ea660-64cf-4c82-b6f0-42d48172a799 MD5 (HashDataSize is 16)
+ // ff1816ec-aa5e-4d10-87f7-6f4963833460 SHA1 (HashDataSize is 20)
+ // 8829d00f-11b8-4213-878b-770e8597ac16 SHA256 (HashDataSize is 32)
+ DotNet = 101,
+ };
};
/// These values correspond to the CV_call_e enumeration, and are documented
diff --git a/include/llvm/DebugInfo/PDB/UDTLayout.h b/include/llvm/DebugInfo/PDB/UDTLayout.h
index c4234c191e21..c67b093b63c0 100644
--- a/include/llvm/DebugInfo/PDB/UDTLayout.h
+++ b/include/llvm/DebugInfo/PDB/UDTLayout.h
@@ -1,9 +1,8 @@
//===- UDTLayout.h - UDT layout info ----------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/DebugInfo/Symbolize/DIPrinter.h b/include/llvm/DebugInfo/Symbolize/DIPrinter.h
index ab82be3706d8..db7a61a8f160 100644
--- a/include/llvm/DebugInfo/Symbolize/DIPrinter.h
+++ b/include/llvm/DebugInfo/Symbolize/DIPrinter.h
@@ -1,9 +1,8 @@
//===- llvm/DebugInfo/Symbolize/DIPrinter.h ---------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -21,15 +20,22 @@ namespace llvm {
struct DILineInfo;
class DIInliningInfo;
struct DIGlobal;
+struct DILocal;
namespace symbolize {
class DIPrinter {
+public:
+ enum class OutputStyle { LLVM, GNU };
+
+private:
raw_ostream &OS;
bool PrintFunctionNames;
bool PrintPretty;
int PrintSourceContext;
bool Verbose;
+ bool Basenames;
+ OutputStyle Style;
void print(const DILineInfo &Info, bool Inlined);
void printContext(const std::string &FileName, int64_t Line);
@@ -37,14 +43,16 @@ class DIPrinter {
public:
DIPrinter(raw_ostream &OS, bool PrintFunctionNames = true,
bool PrintPretty = false, int PrintSourceContext = 0,
- bool Verbose = false)
+ bool Verbose = false, bool Basenames = false,
+ OutputStyle Style = OutputStyle::LLVM)
: OS(OS), PrintFunctionNames(PrintFunctionNames),
PrintPretty(PrintPretty), PrintSourceContext(PrintSourceContext),
- Verbose(Verbose) {}
+ Verbose(Verbose), Basenames(Basenames), Style(Style) {}
DIPrinter &operator<<(const DILineInfo &Info);
DIPrinter &operator<<(const DIInliningInfo &Info);
DIPrinter &operator<<(const DIGlobal &Global);
+ DIPrinter &operator<<(const DILocal &Local);
};
}
}
diff --git a/include/llvm/DebugInfo/Symbolize/SymbolizableModule.h b/include/llvm/DebugInfo/Symbolize/SymbolizableModule.h
index e576a91e887c..506ecc424b4c 100644
--- a/include/llvm/DebugInfo/Symbolize/SymbolizableModule.h
+++ b/include/llvm/DebugInfo/Symbolize/SymbolizableModule.h
@@ -1,9 +1,8 @@
//===- SymbolizableModule.h -------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -25,13 +24,16 @@ class SymbolizableModule {
public:
virtual ~SymbolizableModule() = default;
- virtual DILineInfo symbolizeCode(uint64_t ModuleOffset,
+ virtual DILineInfo symbolizeCode(object::SectionedAddress ModuleOffset,
FunctionNameKind FNKind,
bool UseSymbolTable) const = 0;
- virtual DIInliningInfo symbolizeInlinedCode(uint64_t ModuleOffset,
- FunctionNameKind FNKind,
- bool UseSymbolTable) const = 0;
- virtual DIGlobal symbolizeData(uint64_t ModuleOffset) const = 0;
+ virtual DIInliningInfo
+ symbolizeInlinedCode(object::SectionedAddress ModuleOffset,
+ FunctionNameKind FNKind, bool UseSymbolTable) const = 0;
+ virtual DIGlobal
+ symbolizeData(object::SectionedAddress ModuleOffset) const = 0;
+ virtual std::vector<DILocal>
+ symbolizeFrame(object::SectionedAddress ModuleOffset) const = 0;
// Return true if this is a 32-bit x86 PE COFF module.
virtual bool isWin32Module() const = 0;
diff --git a/include/llvm/DebugInfo/Symbolize/Symbolize.h b/include/llvm/DebugInfo/Symbolize/Symbolize.h
index 289148f569db..d3da28ca0b7b 100644
--- a/include/llvm/DebugInfo/Symbolize/Symbolize.h
+++ b/include/llvm/DebugInfo/Symbolize/Symbolize.h
@@ -1,9 +1,8 @@
//===- Symbolize.h ----------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -36,35 +35,35 @@ using FunctionNameKind = DILineInfoSpecifier::FunctionNameKind;
class LLVMSymbolizer {
public:
struct Options {
- FunctionNameKind PrintFunctions;
- bool UseSymbolTable : 1;
- bool Demangle : 1;
- bool RelativeAddresses : 1;
+ FunctionNameKind PrintFunctions = FunctionNameKind::LinkageName;
+ bool UseSymbolTable = true;
+ bool Demangle = true;
+ bool RelativeAddresses = false;
std::string DefaultArch;
std::vector<std::string> DsymHints;
-
- Options(FunctionNameKind PrintFunctions = FunctionNameKind::LinkageName,
- bool UseSymbolTable = true, bool Demangle = true,
- bool RelativeAddresses = false, std::string DefaultArch = "")
- : PrintFunctions(PrintFunctions), UseSymbolTable(UseSymbolTable),
- Demangle(Demangle), RelativeAddresses(RelativeAddresses),
- DefaultArch(std::move(DefaultArch)) {}
+ std::string FallbackDebugPath;
+ std::string DWPName;
};
- LLVMSymbolizer(const Options &Opts = Options()) : Opts(Opts) {}
+ LLVMSymbolizer() = default;
+ LLVMSymbolizer(const Options &Opts) : Opts(Opts) {}
~LLVMSymbolizer() {
flush();
}
+ Expected<DILineInfo> symbolizeCode(const ObjectFile &Obj,
+ object::SectionedAddress ModuleOffset);
Expected<DILineInfo> symbolizeCode(const std::string &ModuleName,
- uint64_t ModuleOffset,
- StringRef DWPName = "");
- Expected<DIInliningInfo> symbolizeInlinedCode(const std::string &ModuleName,
- uint64_t ModuleOffset,
- StringRef DWPName = "");
+ object::SectionedAddress ModuleOffset);
+ Expected<DIInliningInfo>
+ symbolizeInlinedCode(const std::string &ModuleName,
+ object::SectionedAddress ModuleOffset);
Expected<DIGlobal> symbolizeData(const std::string &ModuleName,
- uint64_t ModuleOffset);
+ object::SectionedAddress ModuleOffset);
+ Expected<std::vector<DILocal>>
+ symbolizeFrame(const std::string &ModuleName,
+ object::SectionedAddress ModuleOffset);
void flush();
static std::string
@@ -74,14 +73,23 @@ public:
private:
// Bundles together object file with code/data and object file with
// corresponding debug info. These objects can be the same.
- using ObjectPair = std::pair<ObjectFile *, ObjectFile *>;
+ using ObjectPair = std::pair<const ObjectFile *, const ObjectFile *>;
+
+ Expected<DILineInfo>
+ symbolizeCodeCommon(SymbolizableModule *Info,
+ object::SectionedAddress ModuleOffset);
/// Returns a SymbolizableModule or an error if loading debug info failed.
/// Only one attempt is made to load a module, and errors during loading are
/// only reported once. Subsequent calls to get module info for a module that
/// failed to load will return nullptr.
Expected<SymbolizableModule *>
- getOrCreateModuleInfo(const std::string &ModuleName, StringRef DWPName = "");
+ getOrCreateModuleInfo(const std::string &ModuleName);
+
+ Expected<SymbolizableModule *>
+ createModuleInfo(const ObjectFile *Obj,
+ std::unique_ptr<DIContext> Context,
+ StringRef ModuleName);
ObjectFile *lookUpDsymFile(const std::string &Path,
const MachOObjectFile *ExeObj,
diff --git a/include/llvm/Demangle/Compiler.h b/include/llvm/Demangle/Compiler.h
deleted file mode 100644
index 248d6e3a7faa..000000000000
--- a/include/llvm/Demangle/Compiler.h
+++ /dev/null
@@ -1,93 +0,0 @@
-//===--- Compiler.h ---------------------------------------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//
-// This file contains a variety of feature test macros copied from
-// include/llvm/Support/Compiler.h so that LLVMDemangle does not need to take
-// a dependency on LLVMSupport.
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_DEMANGLE_COMPILER_H
-#define LLVM_DEMANGLE_COMPILER_H
-
-#ifdef _MSC_VER
-// snprintf is implemented in VS 2015
-#if _MSC_VER < 1900
-#define snprintf _snprintf_s
-#endif
-#endif
-
-#ifndef __has_feature
-#define __has_feature(x) 0
-#endif
-
-#ifndef __has_cpp_attribute
-#define __has_cpp_attribute(x) 0
-#endif
-
-#ifndef __has_attribute
-#define __has_attribute(x) 0
-#endif
-
-#ifndef __has_builtin
-#define __has_builtin(x) 0
-#endif
-
-#ifndef LLVM_GNUC_PREREQ
-#if defined(__GNUC__) && defined(__GNUC_MINOR__) && defined(__GNUC_PATCHLEVEL__)
-#define LLVM_GNUC_PREREQ(maj, min, patch) \
- ((__GNUC__ << 20) + (__GNUC_MINOR__ << 10) + __GNUC_PATCHLEVEL__ >= \
- ((maj) << 20) + ((min) << 10) + (patch))
-#elif defined(__GNUC__) && defined(__GNUC_MINOR__)
-#define LLVM_GNUC_PREREQ(maj, min, patch) \
- ((__GNUC__ << 20) + (__GNUC_MINOR__ << 10) >= ((maj) << 20) + ((min) << 10))
-#else
-#define LLVM_GNUC_PREREQ(maj, min, patch) 0
-#endif
-#endif
-
-#if __has_attribute(used) || LLVM_GNUC_PREREQ(3, 1, 0)
-#define LLVM_ATTRIBUTE_USED __attribute__((__used__))
-#else
-#define LLVM_ATTRIBUTE_USED
-#endif
-
-#if __has_builtin(__builtin_unreachable) || LLVM_GNUC_PREREQ(4, 5, 0)
-#define LLVM_BUILTIN_UNREACHABLE __builtin_unreachable()
-#elif defined(_MSC_VER)
-#define LLVM_BUILTIN_UNREACHABLE __assume(false)
-#endif
-
-#if __has_attribute(noinline) || LLVM_GNUC_PREREQ(3, 4, 0)
-#define LLVM_ATTRIBUTE_NOINLINE __attribute__((noinline))
-#elif defined(_MSC_VER)
-#define LLVM_ATTRIBUTE_NOINLINE __declspec(noinline)
-#else
-#define LLVM_ATTRIBUTE_NOINLINE
-#endif
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-#define LLVM_DUMP_METHOD LLVM_ATTRIBUTE_NOINLINE LLVM_ATTRIBUTE_USED
-#else
-#define LLVM_DUMP_METHOD LLVM_ATTRIBUTE_NOINLINE
-#endif
-
-#if __cplusplus > 201402L && __has_cpp_attribute(fallthrough)
-#define LLVM_FALLTHROUGH [[fallthrough]]
-#elif __has_cpp_attribute(gnu::fallthrough)
-#define LLVM_FALLTHROUGH [[gnu::fallthrough]]
-#elif !__cplusplus
-// Workaround for llvm.org/PR23435, since clang 3.6 and below emit a spurious
-// error when __has_cpp_attribute is given a scoped attribute in C mode.
-#define LLVM_FALLTHROUGH
-#elif __has_cpp_attribute(clang::fallthrough)
-#define LLVM_FALLTHROUGH [[clang::fallthrough]]
-#else
-#define LLVM_FALLTHROUGH
-#endif
-
-#endif
diff --git a/include/llvm/Demangle/Demangle.h b/include/llvm/Demangle/Demangle.h
index 4c9dc9569e18..6fea7ef13f11 100644
--- a/include/llvm/Demangle/Demangle.h
+++ b/include/llvm/Demangle/Demangle.h
@@ -1,9 +1,8 @@
//===--- Demangle.h ---------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -11,6 +10,7 @@
#define LLVM_DEMANGLE_DEMANGLE_H
#include <cstddef>
+#include <string>
namespace llvm {
/// This is a llvm local version of __cxa_demangle. Other than the name and
@@ -36,6 +36,13 @@ enum MSDemangleFlags { MSDF_None = 0, MSDF_DumpBackrefs = 1 << 0 };
char *microsoftDemangle(const char *mangled_name, char *buf, size_t *n,
int *status, MSDemangleFlags Flags = MSDF_None);
+/// Attempt to demangle a string using different demangling schemes.
+/// The function uses heuristics to determine which demangling scheme to use.
+/// \param MangledName - reference to string to demangle.
+/// \returns - the demangled string, or a copy of the input string if no
+/// demangling occurred.
+std::string demangle(const std::string &MangledName);
+
/// "Partial" demangler. This supports demangling a string into an AST
/// (typically an intermediate stage in itaniumDemangle) and querying certain
/// properties or partially printing the demangled name.
diff --git a/include/llvm/Demangle/DemangleConfig.h b/include/llvm/Demangle/DemangleConfig.h
new file mode 100644
index 000000000000..73f89d357c85
--- /dev/null
+++ b/include/llvm/Demangle/DemangleConfig.h
@@ -0,0 +1,99 @@
+//===--- DemangleConfig.h ---------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a variety of feature test macros copied from
+// include/llvm/Support/Compiler.h so that LLVMDemangle does not need to take
+// a dependency on LLVMSupport.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEMANGLE_COMPILER_H
+#define LLVM_DEMANGLE_COMPILER_H
+
+#ifdef _MSC_VER
+// snprintf is implemented in VS 2015
+#if _MSC_VER < 1900
+#define snprintf _snprintf_s
+#endif
+#endif
+
+#ifndef __has_feature
+#define __has_feature(x) 0
+#endif
+
+#ifndef __has_cpp_attribute
+#define __has_cpp_attribute(x) 0
+#endif
+
+#ifndef __has_attribute
+#define __has_attribute(x) 0
+#endif
+
+#ifndef __has_builtin
+#define __has_builtin(x) 0
+#endif
+
+#ifndef DEMANGLE_GNUC_PREREQ
+#if defined(__GNUC__) && defined(__GNUC_MINOR__) && defined(__GNUC_PATCHLEVEL__)
+#define DEMANGLE_GNUC_PREREQ(maj, min, patch) \
+ ((__GNUC__ << 20) + (__GNUC_MINOR__ << 10) + __GNUC_PATCHLEVEL__ >= \
+ ((maj) << 20) + ((min) << 10) + (patch))
+#elif defined(__GNUC__) && defined(__GNUC_MINOR__)
+#define DEMANGLE_GNUC_PREREQ(maj, min, patch) \
+ ((__GNUC__ << 20) + (__GNUC_MINOR__ << 10) >= ((maj) << 20) + ((min) << 10))
+#else
+#define DEMANGLE_GNUC_PREREQ(maj, min, patch) 0
+#endif
+#endif
+
+#if __has_attribute(used) || DEMANGLE_GNUC_PREREQ(3, 1, 0)
+#define DEMANGLE_ATTRIBUTE_USED __attribute__((__used__))
+#else
+#define DEMANGLE_ATTRIBUTE_USED
+#endif
+
+#if __has_builtin(__builtin_unreachable) || DEMANGLE_GNUC_PREREQ(4, 5, 0)
+#define DEMANGLE_UNREACHABLE __builtin_unreachable()
+#elif defined(_MSC_VER)
+#define DEMANGLE_UNREACHABLE __assume(false)
+#else
+#define DEMANGLE_UNREACHABLE
+#endif
+
+#if __has_attribute(noinline) || DEMANGLE_GNUC_PREREQ(3, 4, 0)
+#define DEMANGLE_ATTRIBUTE_NOINLINE __attribute__((noinline))
+#elif defined(_MSC_VER)
+#define DEMANGLE_ATTRIBUTE_NOINLINE __declspec(noinline)
+#else
+#define DEMANGLE_ATTRIBUTE_NOINLINE
+#endif
+
+#if !defined(NDEBUG)
+#define DEMANGLE_DUMP_METHOD DEMANGLE_ATTRIBUTE_NOINLINE DEMANGLE_ATTRIBUTE_USED
+#else
+#define DEMANGLE_DUMP_METHOD DEMANGLE_ATTRIBUTE_NOINLINE
+#endif
+
+#if __cplusplus > 201402L && __has_cpp_attribute(fallthrough)
+#define DEMANGLE_FALLTHROUGH [[fallthrough]]
+#elif __has_cpp_attribute(gnu::fallthrough)
+#define DEMANGLE_FALLTHROUGH [[gnu::fallthrough]]
+#elif !__cplusplus
+// Workaround for llvm.org/PR23435, since clang 3.6 and below emit a spurious
+// error when __has_cpp_attribute is given a scoped attribute in C mode.
+#define DEMANGLE_FALLTHROUGH
+#elif __has_cpp_attribute(clang::fallthrough)
+#define DEMANGLE_FALLTHROUGH [[clang::fallthrough]]
+#else
+#define DEMANGLE_FALLTHROUGH
+#endif
+
+#define DEMANGLE_NAMESPACE_BEGIN namespace llvm { namespace itanium_demangle {
+#define DEMANGLE_NAMESPACE_END } }
+
+#endif
diff --git a/include/llvm/Demangle/ItaniumDemangle.h b/include/llvm/Demangle/ItaniumDemangle.h
index 0b9187f30a5a..aaccb27e17a3 100644
--- a/include/llvm/Demangle/ItaniumDemangle.h
+++ b/include/llvm/Demangle/ItaniumDemangle.h
@@ -1,23 +1,26 @@
//===------------------------- ItaniumDemangle.h ----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
-// This file is dual licensed under the MIT and the University of Illinois Open
-// Source Licenses. See LICENSE.TXT for details.
+//===----------------------------------------------------------------------===//
+//
+// Generic itanium demangler library. This file has two byte-per-byte identical
+// copies in the source tree, one in libcxxabi, and the other in llvm.
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_DEMANGLE_ITANIUMDEMANGLE_H
-#define LLVM_DEMANGLE_ITANIUMDEMANGLE_H
+#ifndef DEMANGLE_ITANIUMDEMANGLE_H
+#define DEMANGLE_ITANIUMDEMANGLE_H
// FIXME: (possibly) incomplete list of features that clang mangles that this
// file does not yet support:
// - C++ modules TS
-#include "llvm/Demangle/Compiler.h"
-#include "llvm/Demangle/StringView.h"
-#include "llvm/Demangle/Utility.h"
-
+#include "DemangleConfig.h"
+#include "StringView.h"
+#include "Utility.h"
#include <cassert>
#include <cctype>
#include <cstdio>
@@ -86,6 +89,7 @@
X(InitListExpr) \
X(FoldExpr) \
X(ThrowExpr) \
+ X(UUIDOfExpr) \
X(BoolExpr) \
X(IntegerCastExpr) \
X(IntegerLiteral) \
@@ -95,8 +99,8 @@
X(BracedExpr) \
X(BracedRangeExpr)
-namespace llvm {
-namespace itanium_demangle {
+DEMANGLE_NAMESPACE_BEGIN
+
// Base class of all AST nodes. The AST is built by the parser, then is
// traversed by the printLeft/Right functions to produce a demangled string.
class Node {
@@ -194,7 +198,7 @@ public:
virtual ~Node() = default;
#ifndef NDEBUG
- LLVM_DUMP_METHOD void dump() const;
+ DEMANGLE_DUMP_METHOD void dump() const;
#endif
};
@@ -1278,7 +1282,7 @@ public:
case SpecialSubKind::iostream:
return StringView("basic_iostream");
}
- LLVM_BUILTIN_UNREACHABLE;
+ DEMANGLE_UNREACHABLE;
}
void printLeft(OutputStream &S) const override {
@@ -1330,7 +1334,7 @@ public:
case SpecialSubKind::iostream:
return StringView("iostream");
}
- LLVM_BUILTIN_UNREACHABLE;
+ DEMANGLE_UNREACHABLE;
}
void printLeft(OutputStream &S) const override {
@@ -1870,6 +1874,21 @@ public:
}
};
+// MSVC __uuidof extension, generated by clang in -fms-extensions mode.
+class UUIDOfExpr : public Node {
+ Node *Operand;
+public:
+ UUIDOfExpr(Node *Operand_) : Node(KUUIDOfExpr), Operand(Operand_) {}
+
+ template<typename Fn> void match(Fn F) const { F(Operand); }
+
+ void printLeft(OutputStream &S) const override {
+ S << "__uuidof(";
+ Operand->print(S);
+ S << ")";
+ }
+};
+
class BoolExpr : public Node {
bool Value;
@@ -2476,6 +2495,12 @@ AbstractManglingParser<Derived, Alloc>::parseUnnamedTypeName(NameState *) {
return nullptr;
return make<ClosureTypeName>(Params, Count);
}
+ if (consumeIf("Ub")) {
+ (void)parseNumber();
+ if (!consumeIf('_'))
+ return nullptr;
+ return make<NameType>("'block-literal'");
+ }
return nullptr;
}
@@ -2785,11 +2810,13 @@ AbstractManglingParser<Derived, Alloc>::parseOperatorName(NameState *State) {
// <ctor-dtor-name> ::= C1 # complete object constructor
// ::= C2 # base object constructor
// ::= C3 # complete object allocating constructor
-// extension ::= C5 # ?
+// extension ::= C4 # gcc old-style "[unified]" constructor
+// extension ::= C5 # the COMDAT used for ctors
// ::= D0 # deleting destructor
// ::= D1 # complete object destructor
// ::= D2 # base object destructor
-// extension ::= D5 # ?
+// extension ::= D4 # gcc old-style "[unified]" destructor
+// extension ::= D5 # the COMDAT used for dtors
template <typename Derived, typename Alloc>
Node *
AbstractManglingParser<Derived, Alloc>::parseCtorDtorName(Node *&SoFar,
@@ -2812,7 +2839,8 @@ AbstractManglingParser<Derived, Alloc>::parseCtorDtorName(Node *&SoFar,
if (consumeIf('C')) {
bool IsInherited = consumeIf('I');
- if (look() != '1' && look() != '2' && look() != '3' && look() != '5')
+ if (look() != '1' && look() != '2' && look() != '3' && look() != '4' &&
+ look() != '5')
return nullptr;
int Variant = look() - '0';
++First;
@@ -2821,15 +2849,15 @@ AbstractManglingParser<Derived, Alloc>::parseCtorDtorName(Node *&SoFar,
if (getDerived().parseName(State) == nullptr)
return nullptr;
}
- return make<CtorDtorName>(SoFar, false, Variant);
+ return make<CtorDtorName>(SoFar, /*IsDtor=*/false, Variant);
}
- if (look() == 'D' &&
- (look(1) == '0' || look(1) == '1' || look(1) == '2' || look(1) == '5')) {
+ if (look() == 'D' && (look(1) == '0' || look(1) == '1' || look(1) == '2' ||
+ look(1) == '4' || look(1) == '5')) {
int Variant = look(1) - '0';
First += 2;
if (State) State->CtorDtorConversion = true;
- return make<CtorDtorName>(SoFar, true, Variant);
+ return make<CtorDtorName>(SoFar, /*IsDtor=*/true, Variant);
}
return nullptr;
@@ -3467,7 +3495,7 @@ Node *AbstractManglingParser<Derived, Alloc>::parseType() {
Result = getDerived().parseFunctionType();
break;
}
- LLVM_FALLTHROUGH;
+ DEMANGLE_FALLTHROUGH;
}
case 'U': {
Result = getDerived().parseQualifiedType();
@@ -3564,7 +3592,11 @@ Node *AbstractManglingParser<Derived, Alloc>::parseType() {
StringView Res = parseBareSourceName();
if (Res.empty())
return nullptr;
- return make<NameType>(Res);
+ // Typically, <builtin-type>s are not considered substitution candidates,
+ // but the exception to that exception is vendor extended types (Itanium C++
+ // ABI 5.9.1).
+ Result = make<NameType>(Res);
+ break;
}
case 'D':
switch (look(1)) {
@@ -3592,6 +3624,10 @@ Node *AbstractManglingParser<Derived, Alloc>::parseType() {
case 's':
First += 2;
return make<NameType>("char16_t");
+ // ::= Du # char8_t (C++2a, not yet in the Itanium spec)
+ case 'u':
+ First += 2;
+ return make<NameType>("char8_t");
// ::= Da # auto (in dependent new-expressions)
case 'a':
First += 2;
@@ -3754,7 +3790,7 @@ Node *AbstractManglingParser<Derived, Alloc>::parseType() {
// substitution table.
return Sub;
}
- LLVM_FALLTHROUGH;
+ DEMANGLE_FALLTHROUGH;
}
// ::= <class-enum-type>
default: {
@@ -4633,6 +4669,21 @@ Node *AbstractManglingParser<Derived, Alloc>::parseExpr() {
case '9':
return getDerived().parseUnresolvedName();
}
+
+ if (consumeIf("u8__uuidoft")) {
+ Node *Ty = getDerived().parseType();
+ if (!Ty)
+ return nullptr;
+ return make<UUIDOfExpr>(Ty);
+ }
+
+ if (consumeIf("u8__uuidofz")) {
+ Node *Ex = getDerived().parseExpr();
+ if (!Ex)
+ return nullptr;
+ return make<UUIDOfExpr>(Ex);
+ }
+
return nullptr;
}
@@ -5139,7 +5190,7 @@ AbstractManglingParser<Derived, Alloc>::parseTemplateArgs(bool TagTemplates) {
// extension ::= ___Z <encoding> _block_invoke_<decimal-digit>+
template <typename Derived, typename Alloc>
Node *AbstractManglingParser<Derived, Alloc>::parse() {
- if (consumeIf("_Z")) {
+ if (consumeIf("_Z") || consumeIf("__Z")) {
Node *Encoding = getDerived().parseEncoding();
if (Encoding == nullptr)
return nullptr;
@@ -5152,7 +5203,7 @@ Node *AbstractManglingParser<Derived, Alloc>::parse() {
return Encoding;
}
- if (consumeIf("___Z")) {
+ if (consumeIf("___Z") || consumeIf("____Z")) {
Node *Encoding = getDerived().parseEncoding();
if (Encoding == nullptr || !consumeIf("_block_invoke"))
return nullptr;
@@ -5178,7 +5229,6 @@ struct ManglingParser : AbstractManglingParser<ManglingParser<Alloc>, Alloc> {
Alloc>::AbstractManglingParser;
};
-} // namespace itanium_demangle
-} // namespace llvm
+DEMANGLE_NAMESPACE_END
-#endif // LLVM_DEMANGLE_ITANIUMDEMANGLE_H
+#endif // DEMANGLE_ITANIUMDEMANGLE_H
diff --git a/include/llvm/Demangle/MicrosoftDemangle.h b/include/llvm/Demangle/MicrosoftDemangle.h
index 97b918fc9459..382e79401c43 100644
--- a/include/llvm/Demangle/MicrosoftDemangle.h
+++ b/include/llvm/Demangle/MicrosoftDemangle.h
@@ -1,16 +1,15 @@
//===------------------------- MicrosoftDemangle.h --------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is dual licensed under the MIT and the University of Illinois Open
-// Source Licenses. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_DEMANGLE_MICROSOFT_DEMANGLE_H
#define LLVM_DEMANGLE_MICROSOFT_DEMANGLE_H
-#include "llvm/Demangle/Compiler.h"
+#include "llvm/Demangle/DemangleConfig.h"
#include "llvm/Demangle/MicrosoftDemangleNodes.h"
#include "llvm/Demangle/StringView.h"
#include "llvm/Demangle/Utility.h"
@@ -56,24 +55,21 @@ public:
}
}
- char *allocUnalignedBuffer(size_t Length) {
- uint8_t *Buf = Head->Buf + Head->Used;
-
- Head->Used += Length;
- if (Head->Used > Head->Capacity) {
- // It's possible we need a buffer which is larger than our default unit
- // size, so we need to be careful to add a node with capacity that is at
- // least as large as what we need.
- addNode(std::max(AllocUnit, Length));
- Head->Used = Length;
- Buf = Head->Buf;
- }
+ char *allocUnalignedBuffer(size_t Size) {
+ assert(Head && Head->Buf);
+
+ uint8_t *P = Head->Buf + Head->Used;
+
+ Head->Used += Size;
+ if (Head->Used <= Head->Capacity)
+ return reinterpret_cast<char *>(P);
- return reinterpret_cast<char *>(Buf);
+ addNode(std::max(AllocUnit, Size));
+ Head->Used = Size;
+ return reinterpret_cast<char *>(Head->Buf);
}
template <typename T, typename... Args> T *allocArray(size_t Count) {
-
size_t Size = Count * sizeof(T);
assert(Head && Head->Buf);
@@ -84,17 +80,16 @@ public:
size_t Adjustment = AlignedP - P;
Head->Used += Size + Adjustment;
- if (Head->Used < Head->Capacity)
+ if (Head->Used <= Head->Capacity)
return new (PP) T[Count]();
- addNode(AllocUnit);
+ addNode(std::max(AllocUnit, Size));
Head->Used = Size;
return new (Head->Buf) T[Count]();
}
template <typename T, typename... Args> T *alloc(Args &&... ConstructorArgs) {
-
- size_t Size = sizeof(T);
+ constexpr size_t Size = sizeof(T);
assert(Head && Head->Buf);
size_t P = (size_t)Head->Buf + Head->Used;
@@ -104,9 +99,10 @@ public:
size_t Adjustment = AlignedP - P;
Head->Used += Size + Adjustment;
- if (Head->Used < Head->Capacity)
+ if (Head->Used <= Head->Capacity)
return new (PP) T(std::forward<Args>(ConstructorArgs)...);
+ static_assert(Size < AllocUnit, "");
addNode(AllocUnit);
Head->Used = Size;
return new (Head->Buf) T(std::forward<Args>(ConstructorArgs)...);
@@ -160,6 +156,8 @@ public:
private:
SymbolNode *demangleEncodedSymbol(StringView &MangledName,
QualifiedNameNode *QN);
+ SymbolNode *demangleDeclarator(StringView &MangledName);
+ SymbolNode *demangleMD5Name(StringView &MangledName);
VariableSymbolNode *demangleVariableEncoding(StringView &MangledName,
StorageClass SC);
@@ -179,8 +177,9 @@ private:
ArrayTypeNode *demangleArrayType(StringView &MangledName);
+ NodeArrayNode *demangleFunctionParameterList(StringView &MangledName,
+ bool &IsVariadic);
NodeArrayNode *demangleTemplateParameterList(StringView &MangledName);
- NodeArrayNode *demangleFunctionParameterList(StringView &MangledName);
std::pair<uint64_t, bool> demangleNumber(StringView &MangledName);
uint64_t demangleUnsigned(StringView &MangledName);
@@ -207,6 +206,8 @@ private:
NamedIdentifierNode *demangleBackRefName(StringView &MangledName);
IdentifierNode *demangleTemplateInstantiationName(StringView &MangledName,
NameBackrefBehavior NBB);
+ IntrinsicFunctionKind
+ translateIntrinsicFunctionCode(char CH, FunctionIdentifierCodeGroup Group);
IdentifierNode *demangleFunctionIdentifierCode(StringView &MangledName);
IdentifierNode *
demangleFunctionIdentifierCode(StringView &MangledName,
@@ -223,7 +224,7 @@ private:
demangleSpecialTableSymbolNode(StringView &MangledName,
SpecialIntrinsicKind SIK);
LocalStaticGuardVariableNode *
- demangleLocalStaticGuard(StringView &MangledName);
+ demangleLocalStaticGuard(StringView &MangledName, bool IsThread);
VariableSymbolNode *demangleUntypedVariable(ArenaAllocator &Arena,
StringView &MangledName,
StringView VariableName);
diff --git a/include/llvm/Demangle/MicrosoftDemangleNodes.h b/include/llvm/Demangle/MicrosoftDemangleNodes.h
index 9e3478e9fd29..da9d9d5bfdc0 100644
--- a/include/llvm/Demangle/MicrosoftDemangleNodes.h
+++ b/include/llvm/Demangle/MicrosoftDemangleNodes.h
@@ -1,11 +1,30 @@
+//===- MicrosoftDemangleNodes.h ---------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the AST nodes used in the MSVC demangler.
+//
+//===----------------------------------------------------------------------===//
+
#ifndef LLVM_SUPPORT_MICROSOFTDEMANGLENODES_H
#define LLVM_SUPPORT_MICROSOFTDEMANGLENODES_H
-#include "llvm/Demangle/Compiler.h"
+#include "llvm/Demangle/DemangleConfig.h"
#include "llvm/Demangle/StringView.h"
#include <array>
+namespace llvm {
+namespace itanium_demangle {
class OutputStream;
+}
+}
+
+using llvm::itanium_demangle::OutputStream;
+using llvm::itanium_demangle::StringView;
namespace llvm {
namespace ms_demangle {
@@ -63,6 +82,7 @@ enum class PrimitiveKind {
Char,
Schar,
Uchar,
+ Char8,
Char16,
Char32,
Short,
@@ -151,8 +171,8 @@ enum class IntrinsicFunctionKind : uint8_t {
VectorCopyCtorIter, // ?__G vector copy constructor iterator
VectorVbaseCopyCtorIter, // ?__H vector vbase copy constructor iterator
ManVectorVbaseCopyCtorIter, // ?__I managed vector vbase copy constructor
- CoAwait, // ?__L co_await
- Spaceship, // operator<=>
+ CoAwait, // ?__L operator co_await
+ Spaceship, // ?__M operator<=>
MaxIntrinsic
};
@@ -324,7 +344,7 @@ struct FunctionSignatureNode : public TypeNode {
// Function parameters
NodeArrayNode *Params = nullptr;
- // True if the function type is noexcept
+ // True if the function type is noexcept.
bool IsNoexcept = false;
};
@@ -389,6 +409,7 @@ struct LocalStaticGuardIdentifierNode : public IdentifierNode {
void output(OutputStream &OS, OutputFlags Flags) const override;
+ bool IsThread = false;
uint32_t ScopeIndex = 0;
};
@@ -494,7 +515,7 @@ struct NodeArrayNode : public Node {
void output(OutputStream &OS, OutputFlags Flags, StringView Separator) const;
- Node **Nodes = 0;
+ Node **Nodes = nullptr;
size_t Count = 0;
};
@@ -602,4 +623,4 @@ struct FunctionSymbolNode : public SymbolNode {
} // namespace ms_demangle
} // namespace llvm
-#endif \ No newline at end of file
+#endif
diff --git a/include/llvm/Demangle/README.txt b/include/llvm/Demangle/README.txt
new file mode 100644
index 000000000000..514ff6dd16f2
--- /dev/null
+++ b/include/llvm/Demangle/README.txt
@@ -0,0 +1,52 @@
+Itanium Name Demangler Library
+==============================
+
+Introduction
+------------
+
+This directory contains the generic itanium name demangler library. The main
+purpose of the library is to demangle C++ symbols, i.e. convert the string
+"_Z1fv" into "f()". You can also use the CRTP base ManglingParser to perform
+some simple analysis on the mangled name, or (in LLVM) use the opaque
+ItaniumPartialDemangler to query the demangled AST.
+
+Why are there multiple copies of the this library in the source tree?
+---------------------------------------------------------------------
+
+This directory is mirrored between libcxxabi/demangle and
+llvm/include/llvm/Demangle. The simple reason for this is that both projects
+need to demangle symbols, but neither can depend on each other. libcxxabi needs
+the demangler to implement __cxa_demangle, which is part of the itanium ABI
+spec. LLVM needs a copy for a bunch of places, but doesn't want to use the
+system's __cxa_demangle because it a) might not be available (i.e., on Windows),
+and b) probably isn't that up-to-date on the latest language features.
+
+The copy of the demangler in LLVM has some extra stuff that aren't needed in
+libcxxabi (ie, the MSVC demangler, ItaniumPartialDemangler), which depend on the
+shared generic components. Despite these differences, we want to keep the "core"
+generic demangling library identical between both copies to simplify development
+and testing.
+
+If you're working on the generic library, then do the work first in libcxxabi,
+then run the cp-to-llvm.sh script in src/demangle. This script takes as an
+argument the path to llvm, and re-copies the changes you made to libcxxabi over.
+Note that this script just blindly overwrites all changes to the generic library
+in llvm, so be careful.
+
+Because the core demangler needs to work in libcxxabi, everything needs to be
+declared in an anonymous namespace (see DEMANGLE_NAMESPACE_BEGIN), and you can't
+introduce any code that depends on the libcxx dylib.
+
+Hopefully, when LLVM becomes a monorepo, we can de-duplicate this code, and have
+both LLVM and libcxxabi depend on a shared demangler library.
+
+Testing
+-------
+
+The tests are split up between libcxxabi/test/{unit,}test_demangle.cpp, and
+llvm/unittest/Demangle. The llvm directory should only get tests for stuff not
+included in the core library. In the future though, we should probably move all
+the tests to LLVM.
+
+It is also a really good idea to run libFuzzer after non-trivial changes, see
+libcxxabi/fuzz/cxa_demangle_fuzzer.cpp and https://llvm.org/docs/LibFuzzer.html.
diff --git a/include/llvm/Demangle/StringView.h b/include/llvm/Demangle/StringView.h
index a89deda694c2..ceb6c7958066 100644
--- a/include/llvm/Demangle/StringView.h
+++ b/include/llvm/Demangle/StringView.h
@@ -1,22 +1,25 @@
//===--- StringView.h -------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+//===----------------------------------------------------------------------===//
//
+// FIXME: Use std::string_view instead when we support C++17.
//
-// This file contains a limited version of LLVM's StringView class. It is
-// copied here so that LLVMDemangle need not take a dependency on LLVMSupport.
//===----------------------------------------------------------------------===//
-#ifndef LLVM_DEMANGLE_STRINGVIEW_H
-#define LLVM_DEMANGLE_STRINGVIEW_H
+#ifndef DEMANGLE_STRINGVIEW_H
+#define DEMANGLE_STRINGVIEW_H
+#include "DemangleConfig.h"
#include <algorithm>
#include <cassert>
#include <cstring>
+DEMANGLE_NAMESPACE_BEGIN
+
class StringView {
const char *First;
const char *Last;
@@ -43,7 +46,7 @@ public:
if (FindBegin < size()) {
// Just forward to memchr, which is faster than a hand-rolled loop.
if (const void *P = ::memchr(First + FindBegin, C, size() - FindBegin))
- return static_cast<const char *>(P) - First;
+ return size_t(static_cast<const char *>(P) - First);
}
return npos;
}
@@ -118,4 +121,6 @@ inline bool operator==(const StringView &LHS, const StringView &RHS) {
std::equal(LHS.begin(), LHS.end(), RHS.begin());
}
+DEMANGLE_NAMESPACE_END
+
#endif
diff --git a/include/llvm/Demangle/Utility.h b/include/llvm/Demangle/Utility.h
index 1d1601c81635..ec23859af46a 100644
--- a/include/llvm/Demangle/Utility.h
+++ b/include/llvm/Demangle/Utility.h
@@ -1,25 +1,27 @@
//===--- Utility.h ----------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+//===----------------------------------------------------------------------===//
//
+// Provide some utility classes for use in the demangler(s).
//
-// This file contains several utility classes used by the demangle library.
//===----------------------------------------------------------------------===//
-#ifndef LLVM_DEMANGLE_UTILITY_H
-#define LLVM_DEMANGLE_UTILITY_H
+#ifndef DEMANGLE_UTILITY_H
+#define DEMANGLE_UTILITY_H
#include "StringView.h"
-
#include <cstdint>
#include <cstdlib>
#include <cstring>
#include <iterator>
#include <limits>
+DEMANGLE_NAMESPACE_BEGIN
+
// Stream that AST nodes write their string representation into after the AST
// has been parsed.
class OutputStream {
@@ -184,4 +186,6 @@ inline bool initializeOutputStream(char *Buf, size_t *N, OutputStream &S,
return true;
}
+DEMANGLE_NAMESPACE_END
+
#endif
diff --git a/include/llvm/ExecutionEngine/ExecutionEngine.h b/include/llvm/ExecutionEngine/ExecutionEngine.h
index b61cb24fa5fb..4fb6dad96387 100644
--- a/include/llvm/ExecutionEngine/ExecutionEngine.h
+++ b/include/llvm/ExecutionEngine/ExecutionEngine.h
@@ -1,9 +1,8 @@
//===- ExecutionEngine.h - Abstract Execution Engine Interface --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -22,6 +21,7 @@
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ExecutionEngine/JITSymbol.h"
+#include "llvm/ExecutionEngine/OrcV1Deprecation.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Module.h"
#include "llvm/Object/Binary.h"
@@ -635,7 +635,13 @@ public:
}
// Use OrcMCJITReplacement instead of MCJIT. Off by default.
- void setUseOrcMCJITReplacement(bool UseOrcMCJITReplacement) {
+ LLVM_ATTRIBUTE_DEPRECATED(
+ inline void setUseOrcMCJITReplacement(bool UseOrcMCJITReplacement),
+ "ORCv1 utilities (including OrcMCJITReplacement) are deprecated. Please "
+ "use ORCv2/LLJIT instead (see docs/ORCv2.rst)");
+
+ void setUseOrcMCJITReplacement(ORCv1DeprecationAcknowledgement,
+ bool UseOrcMCJITReplacement) {
this->UseOrcMCJITReplacement = UseOrcMCJITReplacement;
}
@@ -659,6 +665,10 @@ public:
ExecutionEngine *create(TargetMachine *TM);
};
+void EngineBuilder::setUseOrcMCJITReplacement(bool UseOrcMCJITReplacement) {
+ this->UseOrcMCJITReplacement = UseOrcMCJITReplacement;
+}
+
// Create wrappers for C Binding types (see CBindingWrapping.h).
DEFINE_SIMPLE_CONVERSION_FUNCTIONS(ExecutionEngine, LLVMExecutionEngineRef)
diff --git a/include/llvm/ExecutionEngine/GenericValue.h b/include/llvm/ExecutionEngine/GenericValue.h
index 504e30a018b6..1ca989da1b7e 100644
--- a/include/llvm/ExecutionEngine/GenericValue.h
+++ b/include/llvm/ExecutionEngine/GenericValue.h
@@ -1,9 +1,8 @@
//===- GenericValue.h - Represent any type of LLVM value --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/ExecutionEngine/Interpreter.h b/include/llvm/ExecutionEngine/Interpreter.h
index a14707840ad8..0749409766e3 100644
--- a/include/llvm/ExecutionEngine/Interpreter.h
+++ b/include/llvm/ExecutionEngine/Interpreter.h
@@ -1,9 +1,8 @@
//===-- Interpreter.h - Abstract Execution Engine Interface -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/ExecutionEngine/JITEventListener.h b/include/llvm/ExecutionEngine/JITEventListener.h
index 1b08379b8c3b..606b6f7cc128 100644
--- a/include/llvm/ExecutionEngine/JITEventListener.h
+++ b/include/llvm/ExecutionEngine/JITEventListener.h
@@ -1,9 +1,8 @@
//===- JITEventListener.h - Exposes events from JIT compilation -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/ExecutionEngine/JITLink/EHFrameSupport.h b/include/llvm/ExecutionEngine/JITLink/EHFrameSupport.h
new file mode 100644
index 000000000000..8d2f641254b3
--- /dev/null
+++ b/include/llvm/ExecutionEngine/JITLink/EHFrameSupport.h
@@ -0,0 +1,80 @@
+//===--------- EHFrameSupport.h - JITLink eh-frame utils --------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// EHFrame registration support for JITLink.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_JITLINK_EHFRAMESUPPORT_H
+#define LLVM_EXECUTIONENGINE_JITLINK_EHFRAMESUPPORT_H
+
+#include "llvm/ADT/Triple.h"
+#include "llvm/ExecutionEngine/JITLink/JITLink.h"
+#include "llvm/ExecutionEngine/JITSymbol.h"
+#include "llvm/Support/Error.h"
+
+namespace llvm {
+namespace jitlink {
+
+/// Registers all FDEs in the given eh-frame section with the current process.
+Error registerEHFrameSection(const void *EHFrameSectionAddr);
+
+/// Deregisters all FDEs in the given eh-frame section with the current process.
+Error deregisterEHFrameSection(const void *EHFrameSectionAddr);
+
+/// Supports registration/deregistration of EH-frames in a target process.
+class EHFrameRegistrar {
+public:
+ virtual ~EHFrameRegistrar();
+ virtual Error registerEHFrames(JITTargetAddress EHFrameSectionAddr) = 0;
+ virtual Error deregisterEHFrames(JITTargetAddress EHFrameSectionAddr) = 0;
+};
+
+/// Registers / Deregisters EH-frames in the current process.
+class InProcessEHFrameRegistrar final : public EHFrameRegistrar {
+public:
+ /// Get a reference to the InProcessEHFrameRegistrar singleton.
+ static InProcessEHFrameRegistrar &getInstance();
+
+ InProcessEHFrameRegistrar(const InProcessEHFrameRegistrar &) = delete;
+ InProcessEHFrameRegistrar &
+ operator=(const InProcessEHFrameRegistrar &) = delete;
+
+ InProcessEHFrameRegistrar(InProcessEHFrameRegistrar &&) = delete;
+ InProcessEHFrameRegistrar &operator=(InProcessEHFrameRegistrar &&) = delete;
+
+ Error registerEHFrames(JITTargetAddress EHFrameSectionAddr) override {
+ return registerEHFrameSection(
+ jitTargetAddressToPointer<void *>(EHFrameSectionAddr));
+ }
+
+ Error deregisterEHFrames(JITTargetAddress EHFrameSectionAddr) override {
+ return deregisterEHFrameSection(
+ jitTargetAddressToPointer<void *>(EHFrameSectionAddr));
+ }
+
+private:
+ InProcessEHFrameRegistrar();
+};
+
+using StoreFrameAddressFunction = std::function<void(JITTargetAddress)>;
+
+/// Creates a pass that records the address of the EH frame section. If no
+/// eh-frame section is found, it will set EHFrameAddr to zero.
+///
+/// Authors of JITLinkContexts can use this function to register a post-fixup
+/// pass that records the address of the eh-frame section. This address can
+/// be used after finalization to register and deregister the frame.
+AtomGraphPassFunction
+createEHFrameRecorderPass(const Triple &TT,
+ StoreFrameAddressFunction StoreFrameAddress);
+
+} // end namespace jitlink
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_JITLINK_EHFRAMESUPPORT_H
diff --git a/include/llvm/ExecutionEngine/JITLink/JITLink.h b/include/llvm/ExecutionEngine/JITLink/JITLink.h
new file mode 100644
index 000000000000..be80d44ccf51
--- /dev/null
+++ b/include/llvm/ExecutionEngine/JITLink/JITLink.h
@@ -0,0 +1,930 @@
+//===------------ JITLink.h - JIT linker functionality ----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Contains generic JIT-linker types.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_JITLINK_JITLINK_H
+#define LLVM_EXECUTIONENGINE_JITLINK_JITLINK_H
+
+#include "JITLinkMemoryManager.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/ExecutionEngine/JITSymbol.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Memory.h"
+#include "llvm/Support/MemoryBuffer.h"
+
+#include <map>
+#include <string>
+#include <system_error>
+
+namespace llvm {
+namespace jitlink {
+
+/// Base class for errors originating in JIT linker, e.g. missing relocation
+/// support.
+class JITLinkError : public ErrorInfo<JITLinkError> {
+public:
+ static char ID;
+
+ JITLinkError(Twine ErrMsg) : ErrMsg(ErrMsg.str()) {}
+
+ void log(raw_ostream &OS) const override;
+ const std::string &getErrorMessage() const { return ErrMsg; }
+ std::error_code convertToErrorCode() const override;
+
+private:
+ std::string ErrMsg;
+};
+
+// Forward declare the Atom class.
+class Atom;
+
+/// Edge class. Represents both object file relocations, as well as layout and
+/// keep-alive constraints.
+class Edge {
+public:
+ using Kind = uint8_t;
+
+ using GenericEdgeKind = enum : Kind {
+ Invalid, // Invalid edge value.
+ FirstKeepAlive, // Keeps target alive. Offset/addend zero.
+ KeepAlive = FirstKeepAlive, // Tag first edge kind that preserves liveness.
+ LayoutNext, // Layout constraint. Offset/Addend zero.
+ FirstRelocation // First architecture specific relocation.
+ };
+
+ using OffsetT = uint32_t;
+ using AddendT = int64_t;
+
+ Edge(Kind K, OffsetT Offset, Atom &Target, AddendT Addend)
+ : Target(&Target), Offset(Offset), Addend(Addend), K(K) {}
+
+ OffsetT getOffset() const { return Offset; }
+ Kind getKind() const { return K; }
+ void setKind(Kind K) { this->K = K; }
+ bool isRelocation() const { return K >= FirstRelocation; }
+ Kind getRelocation() const {
+ assert(isRelocation() && "Not a relocation edge");
+ return K - FirstRelocation;
+ }
+ bool isKeepAlive() const { return K >= FirstKeepAlive; }
+ Atom &getTarget() const { return *Target; }
+ void setTarget(Atom &Target) { this->Target = &Target; }
+ AddendT getAddend() const { return Addend; }
+ void setAddend(AddendT Addend) { this->Addend = Addend; }
+
+private:
+ Atom *Target;
+ OffsetT Offset;
+ AddendT Addend;
+ Kind K = 0;
+};
+
+using EdgeVector = std::vector<Edge>;
+
+const StringRef getGenericEdgeKindName(Edge::Kind K);
+
+/// Base Atom class. Used by absolute and undefined atoms.
+class Atom {
+ friend class AtomGraph;
+
+protected:
+ /// Create a named (as yet unresolved) atom.
+ Atom(StringRef Name)
+ : Name(Name), IsDefined(false), IsLive(false), ShouldDiscard(false),
+ IsGlobal(false), IsAbsolute(false), IsCallable(false),
+ IsExported(false), IsWeak(false), HasLayoutNext(false),
+ IsCommon(false) {}
+
+ /// Create an absolute symbol atom.
+ Atom(StringRef Name, JITTargetAddress Address)
+ : Name(Name), Address(Address), IsDefined(true), IsLive(false),
+ ShouldDiscard(false), IsGlobal(false), IsAbsolute(false),
+ IsCallable(false), IsExported(false), IsWeak(false),
+ HasLayoutNext(false), IsCommon(false) {}
+
+public:
+ /// Returns true if this atom has a name.
+ bool hasName() const { return Name != StringRef(); }
+
+ /// Returns the name of this atom.
+ StringRef getName() const { return Name; }
+
+ /// Returns the current target address of this atom.
+ /// The initial target address (for atoms that have one) will be taken from
+ /// the input object file's virtual address space. During the layout phase
+ /// of JIT linking the atom's address will be updated to point to its final
+ /// address in the JIT'd process.
+ JITTargetAddress getAddress() const { return Address; }
+
+ /// Set the current target address of this atom.
+ void setAddress(JITTargetAddress Address) { this->Address = Address; }
+
+ /// Returns true if this is a defined atom.
+ bool isDefined() const { return IsDefined; }
+
+ /// Returns true if this atom is marked as live.
+ bool isLive() const { return IsLive; }
+
+ /// Mark this atom as live.
+ ///
+ /// Note: Only defined and absolute atoms can be marked live.
+ void setLive(bool IsLive) {
+ assert((IsDefined || IsAbsolute || !IsLive) &&
+ "Only defined and absolute atoms can be marked live");
+ this->IsLive = IsLive;
+ }
+
+ /// Returns true if this atom should be discarded during pruning.
+ bool shouldDiscard() const { return ShouldDiscard; }
+
+ /// Mark this atom to be discarded.
+ ///
+ /// Note: Only defined and absolute atoms can be marked live.
+ void setShouldDiscard(bool ShouldDiscard) {
+ assert((IsDefined || IsAbsolute || !ShouldDiscard) &&
+ "Only defined and absolute atoms can be marked live");
+ this->ShouldDiscard = ShouldDiscard;
+ }
+
+ /// Returns true if this definition is global (i.e. visible outside this
+ /// linkage unit).
+ ///
+ /// Note: This is distict from Exported, which means visibile outside the
+ /// JITDylib that this graph is being linked in to.
+ bool isGlobal() const { return IsGlobal; }
+
+ /// Mark this atom as global.
+ void setGlobal(bool IsGlobal) { this->IsGlobal = IsGlobal; }
+
+ /// Returns true if this atom represents an absolute symbol.
+ bool isAbsolute() const { return IsAbsolute; }
+
+ /// Returns true if this atom is known to be callable.
+ ///
+ /// Primarily provided for easy interoperability with ORC, which uses the
+ /// JITSymbolFlags::Common flag to identify symbols that can be interposed
+ /// with stubs.
+ bool isCallable() const { return IsCallable; }
+
+ /// Mark this atom as callable.
+ void setCallable(bool IsCallable) {
+ assert((IsDefined || IsAbsolute || !IsCallable) &&
+ "Callable atoms must be defined or absolute");
+ this->IsCallable = IsCallable;
+ }
+
+ /// Returns true if this atom should appear in the symbol table of a final
+ /// linked image.
+ bool isExported() const { return IsExported; }
+
+ /// Mark this atom as exported.
+ void setExported(bool IsExported) {
+ assert((!IsExported || ((IsDefined || IsAbsolute) && hasName())) &&
+ "Exported atoms must have names");
+ this->IsExported = IsExported;
+ }
+
+ /// Returns true if this is a weak symbol.
+ bool isWeak() const { return IsWeak; }
+
+ /// Mark this atom as weak.
+ void setWeak(bool IsWeak) { this->IsWeak = IsWeak; }
+
+private:
+ StringRef Name;
+ JITTargetAddress Address = 0;
+
+ bool IsDefined : 1;
+ bool IsLive : 1;
+ bool ShouldDiscard : 1;
+
+ bool IsGlobal : 1;
+ bool IsAbsolute : 1;
+ bool IsCallable : 1;
+ bool IsExported : 1;
+ bool IsWeak : 1;
+
+protected:
+ // These flags only make sense for DefinedAtom, but we can minimize the size
+ // of DefinedAtom by defining them here.
+ bool HasLayoutNext : 1;
+ bool IsCommon : 1;
+};
+
+// Forward declare DefinedAtom.
+class DefinedAtom;
+
+raw_ostream &operator<<(raw_ostream &OS, const Atom &A);
+void printEdge(raw_ostream &OS, const Atom &FixupAtom, const Edge &E,
+ StringRef EdgeKindName);
+
+/// Represents a section address range via a pair of DefinedAtom pointers to
+/// the first and last atoms in the section.
+class SectionRange {
+public:
+ SectionRange() = default;
+ SectionRange(DefinedAtom *First, DefinedAtom *Last)
+ : First(First), Last(Last) {}
+ DefinedAtom *getFirstAtom() const {
+ assert((!Last || First) && "First can not be null if end is non-null");
+ return First;
+ }
+ DefinedAtom *getLastAtom() const {
+ assert((First || !Last) && "Last can not be null if start is non-null");
+ return Last;
+ }
+ bool isEmpty() const {
+ assert((First || !Last) && "Last can not be null if start is non-null");
+ return !First;
+ }
+ JITTargetAddress getStart() const;
+ JITTargetAddress getEnd() const;
+ uint64_t getSize() const;
+
+private:
+ DefinedAtom *First = nullptr;
+ DefinedAtom *Last = nullptr;
+};
+
+/// Represents an object file section.
+class Section {
+ friend class AtomGraph;
+
+private:
+ Section(StringRef Name, uint32_t Alignment, sys::Memory::ProtectionFlags Prot,
+ unsigned Ordinal, bool IsZeroFill)
+ : Name(Name), Alignment(Alignment), Prot(Prot), Ordinal(Ordinal),
+ IsZeroFill(IsZeroFill) {
+ assert(isPowerOf2_32(Alignment) && "Alignments must be a power of 2");
+ }
+
+ using DefinedAtomSet = DenseSet<DefinedAtom *>;
+
+public:
+ using atom_iterator = DefinedAtomSet::iterator;
+ using const_atom_iterator = DefinedAtomSet::const_iterator;
+
+ ~Section();
+ StringRef getName() const { return Name; }
+ uint32_t getAlignment() const { return Alignment; }
+ sys::Memory::ProtectionFlags getProtectionFlags() const { return Prot; }
+ unsigned getSectionOrdinal() const { return Ordinal; }
+ size_t getNextAtomOrdinal() { return ++NextAtomOrdinal; }
+
+ bool isZeroFill() const { return IsZeroFill; }
+
+ /// Returns an iterator over the atoms in the section (in no particular
+ /// order).
+ iterator_range<atom_iterator> atoms() {
+ return make_range(DefinedAtoms.begin(), DefinedAtoms.end());
+ }
+
+ /// Returns an iterator over the atoms in the section (in no particular
+ /// order).
+ iterator_range<const_atom_iterator> atoms() const {
+ return make_range(DefinedAtoms.begin(), DefinedAtoms.end());
+ }
+
+ /// Return the number of atoms in this section.
+ DefinedAtomSet::size_type atoms_size() { return DefinedAtoms.size(); }
+
+ /// Return true if this section contains no atoms.
+ bool atoms_empty() const { return DefinedAtoms.empty(); }
+
+ /// Returns the range of this section as the pair of atoms with the lowest
+ /// and highest target address. This operation is expensive, as it
+ /// must traverse all atoms in the section.
+ ///
+ /// Note: If the section is empty, both values will be null. The section
+ /// address will evaluate to null, and the size to zero. If the section
+ /// contains a single atom both values will point to it, the address will
+ /// evaluate to the address of that atom, and the size will be the size of
+ /// that atom.
+ SectionRange getRange() const;
+
+private:
+ void addAtom(DefinedAtom &DA) {
+ assert(!DefinedAtoms.count(&DA) && "Atom is already in this section");
+ DefinedAtoms.insert(&DA);
+ }
+
+ void removeAtom(DefinedAtom &DA) {
+ assert(DefinedAtoms.count(&DA) && "Atom is not in this section");
+ DefinedAtoms.erase(&DA);
+ }
+
+ StringRef Name;
+ uint32_t Alignment = 0;
+ sys::Memory::ProtectionFlags Prot;
+ unsigned Ordinal = 0;
+ unsigned NextAtomOrdinal = 0;
+ bool IsZeroFill = false;
+ DefinedAtomSet DefinedAtoms;
+};
+
+/// Defined atom class. Suitable for use by defined named and anonymous
+/// atoms.
+class DefinedAtom : public Atom {
+ friend class AtomGraph;
+
+private:
+ DefinedAtom(Section &Parent, JITTargetAddress Address, uint32_t Alignment)
+ : Atom("", Address), Parent(Parent), Ordinal(Parent.getNextAtomOrdinal()),
+ Alignment(Alignment) {
+ assert(isPowerOf2_32(Alignment) && "Alignments must be a power of two");
+ }
+
+ DefinedAtom(Section &Parent, StringRef Name, JITTargetAddress Address,
+ uint32_t Alignment)
+ : Atom(Name, Address), Parent(Parent),
+ Ordinal(Parent.getNextAtomOrdinal()), Alignment(Alignment) {
+ assert(isPowerOf2_32(Alignment) && "Alignments must be a power of two");
+ }
+
+public:
+ using edge_iterator = EdgeVector::iterator;
+
+ Section &getSection() const { return Parent; }
+
+ uint64_t getSize() const { return Size; }
+
+ StringRef getContent() const {
+ assert(!Parent.isZeroFill() && "Trying to get content for zero-fill atom");
+ assert(Size <= std::numeric_limits<size_t>::max() &&
+ "Content size too large");
+ return {ContentPtr, static_cast<size_t>(Size)};
+ }
+ void setContent(StringRef Content) {
+ assert(!Parent.isZeroFill() && "Calling setContent on zero-fill atom?");
+ ContentPtr = Content.data();
+ Size = Content.size();
+ }
+
+ bool isZeroFill() const { return Parent.isZeroFill(); }
+
+ void setZeroFill(uint64_t Size) {
+ assert(Parent.isZeroFill() && !ContentPtr &&
+ "Can't set zero-fill length of a non zero-fill atom");
+ this->Size = Size;
+ }
+
+ uint64_t getZeroFillSize() const {
+ assert(Parent.isZeroFill() &&
+ "Can't get zero-fill length of a non zero-fill atom");
+ return Size;
+ }
+
+ uint32_t getAlignment() const { return Alignment; }
+
+ bool hasLayoutNext() const { return HasLayoutNext; }
+ void setLayoutNext(DefinedAtom &Next) {
+ assert(!HasLayoutNext && "Atom already has layout-next constraint");
+ HasLayoutNext = true;
+ Edges.push_back(Edge(Edge::LayoutNext, 0, Next, 0));
+ }
+ DefinedAtom &getLayoutNext() {
+ assert(HasLayoutNext && "Atom does not have a layout-next constraint");
+ DefinedAtom *Next = nullptr;
+ for (auto &E : edges())
+ if (E.getKind() == Edge::LayoutNext) {
+ assert(E.getTarget().isDefined() &&
+ "layout-next target atom must be a defined atom");
+ Next = static_cast<DefinedAtom *>(&E.getTarget());
+ break;
+ }
+ assert(Next && "Missing LayoutNext edge");
+ return *Next;
+ }
+
+ bool isCommon() const { return IsCommon; }
+
+ void addEdge(Edge::Kind K, Edge::OffsetT Offset, Atom &Target,
+ Edge::AddendT Addend) {
+ assert(K != Edge::LayoutNext &&
+ "Layout edges should be added via setLayoutNext");
+ Edges.push_back(Edge(K, Offset, Target, Addend));
+ }
+
+ iterator_range<edge_iterator> edges() {
+ return make_range(Edges.begin(), Edges.end());
+ }
+ size_t edges_size() const { return Edges.size(); }
+ bool edges_empty() const { return Edges.empty(); }
+
+ unsigned getOrdinal() const { return Ordinal; }
+
+private:
+ void setCommon(uint64_t Size) {
+ assert(ContentPtr == 0 && "Atom already has content?");
+ IsCommon = true;
+ setZeroFill(Size);
+ }
+
+ EdgeVector Edges;
+ uint64_t Size = 0;
+ Section &Parent;
+ const char *ContentPtr = nullptr;
+ unsigned Ordinal = 0;
+ uint32_t Alignment = 0;
+};
+
+inline JITTargetAddress SectionRange::getStart() const {
+ return First ? First->getAddress() : 0;
+}
+
+inline JITTargetAddress SectionRange::getEnd() const {
+ return Last ? Last->getAddress() + Last->getSize() : 0;
+}
+
+inline uint64_t SectionRange::getSize() const { return getEnd() - getStart(); }
+
+inline SectionRange Section::getRange() const {
+ if (atoms_empty())
+ return SectionRange();
+ DefinedAtom *First = *DefinedAtoms.begin(), *Last = *DefinedAtoms.begin();
+ for (auto *DA : atoms()) {
+ if (DA->getAddress() < First->getAddress())
+ First = DA;
+ if (DA->getAddress() > Last->getAddress())
+ Last = DA;
+ }
+ return SectionRange(First, Last);
+}
+
+class AtomGraph {
+private:
+ using SectionList = std::vector<std::unique_ptr<Section>>;
+ using AddressToAtomMap = std::map<JITTargetAddress, DefinedAtom *>;
+ using NamedAtomMap = DenseMap<StringRef, Atom *>;
+ using ExternalAtomSet = DenseSet<Atom *>;
+
+public:
+ using external_atom_iterator = ExternalAtomSet::iterator;
+
+ using section_iterator = pointee_iterator<SectionList::iterator>;
+ using const_section_iterator = pointee_iterator<SectionList::const_iterator>;
+
+ template <typename SecItrT, typename AtomItrT, typename T>
+ class defined_atom_iterator_impl
+ : public iterator_facade_base<
+ defined_atom_iterator_impl<SecItrT, AtomItrT, T>,
+ std::forward_iterator_tag, T> {
+ public:
+ defined_atom_iterator_impl() = default;
+
+ defined_atom_iterator_impl(SecItrT SI, SecItrT SE)
+ : SI(SI), SE(SE),
+ AI(SI != SE ? SI->atoms().begin() : Section::atom_iterator()) {
+ moveToNextAtomOrEnd();
+ }
+
+ bool operator==(const defined_atom_iterator_impl &RHS) const {
+ return (SI == RHS.SI) && (AI == RHS.AI);
+ }
+
+ T operator*() const {
+ assert(AI != SI->atoms().end() && "Dereferencing end?");
+ return *AI;
+ }
+
+ defined_atom_iterator_impl operator++() {
+ ++AI;
+ moveToNextAtomOrEnd();
+ return *this;
+ }
+
+ private:
+ void moveToNextAtomOrEnd() {
+ while (SI != SE && AI == SI->atoms().end()) {
+ ++SI;
+ if (SI == SE)
+ AI = Section::atom_iterator();
+ else
+ AI = SI->atoms().begin();
+ }
+ }
+
+ SecItrT SI, SE;
+ AtomItrT AI;
+ };
+
+ using defined_atom_iterator =
+ defined_atom_iterator_impl<section_iterator, Section::atom_iterator,
+ DefinedAtom *>;
+
+ using const_defined_atom_iterator =
+ defined_atom_iterator_impl<const_section_iterator,
+ Section::const_atom_iterator,
+ const DefinedAtom *>;
+
+ AtomGraph(std::string Name, unsigned PointerSize,
+ support::endianness Endianness)
+ : Name(std::move(Name)), PointerSize(PointerSize),
+ Endianness(Endianness) {}
+
+ /// Returns the name of this graph (usually the name of the original
+ /// underlying MemoryBuffer).
+ const std::string &getName() { return Name; }
+
+ /// Returns the pointer size for use in this graph.
+ unsigned getPointerSize() const { return PointerSize; }
+
+ /// Returns the endianness of atom-content in this graph.
+ support::endianness getEndianness() const { return Endianness; }
+
+ /// Create a section with the given name, protection flags, and alignment.
+ Section &createSection(StringRef Name, uint32_t Alignment,
+ sys::Memory::ProtectionFlags Prot, bool IsZeroFill) {
+ std::unique_ptr<Section> Sec(
+ new Section(Name, Alignment, Prot, Sections.size(), IsZeroFill));
+ Sections.push_back(std::move(Sec));
+ return *Sections.back();
+ }
+
+ /// Add an external atom representing an undefined symbol in this graph.
+ Atom &addExternalAtom(StringRef Name) {
+ assert(!NamedAtoms.count(Name) && "Duplicate named atom inserted");
+ Atom *A = reinterpret_cast<Atom *>(
+ AtomAllocator.Allocate(sizeof(Atom), alignof(Atom)));
+ new (A) Atom(Name);
+ ExternalAtoms.insert(A);
+ NamedAtoms[Name] = A;
+ return *A;
+ }
+
+ /// Add an external atom representing an absolute symbol.
+ Atom &addAbsoluteAtom(StringRef Name, JITTargetAddress Addr) {
+ assert(!NamedAtoms.count(Name) && "Duplicate named atom inserted");
+ Atom *A = reinterpret_cast<Atom *>(
+ AtomAllocator.Allocate(sizeof(Atom), alignof(Atom)));
+ new (A) Atom(Name, Addr);
+ AbsoluteAtoms.insert(A);
+ NamedAtoms[Name] = A;
+ return *A;
+ }
+
+ /// Add an anonymous defined atom to the graph.
+ ///
+ /// Anonymous atoms have content but no name. They must have an address.
+ DefinedAtom &addAnonymousAtom(Section &Parent, JITTargetAddress Address,
+ uint32_t Alignment) {
+ DefinedAtom *A = reinterpret_cast<DefinedAtom *>(
+ AtomAllocator.Allocate(sizeof(DefinedAtom), alignof(DefinedAtom)));
+ new (A) DefinedAtom(Parent, Address, Alignment);
+ Parent.addAtom(*A);
+ getAddrToAtomMap()[A->getAddress()] = A;
+ return *A;
+ }
+
+ /// Add a defined atom to the graph.
+ ///
+ /// Allocates and constructs a DefinedAtom instance with the given parent,
+ /// name, address, and alignment.
+ DefinedAtom &addDefinedAtom(Section &Parent, StringRef Name,
+ JITTargetAddress Address, uint32_t Alignment) {
+ assert(!NamedAtoms.count(Name) && "Duplicate named atom inserted");
+ DefinedAtom *A = reinterpret_cast<DefinedAtom *>(
+ AtomAllocator.Allocate(sizeof(DefinedAtom), alignof(DefinedAtom)));
+ new (A) DefinedAtom(Parent, Name, Address, Alignment);
+ Parent.addAtom(*A);
+ getAddrToAtomMap()[A->getAddress()] = A;
+ NamedAtoms[Name] = A;
+ return *A;
+ }
+
+ /// Add a common symbol atom to the graph.
+ ///
+ /// Adds a common-symbol atom to the graph with the given parent, name,
+ /// address, alignment and size.
+ DefinedAtom &addCommonAtom(Section &Parent, StringRef Name,
+ JITTargetAddress Address, uint32_t Alignment,
+ uint64_t Size) {
+ assert(!NamedAtoms.count(Name) && "Duplicate named atom inserted");
+ DefinedAtom *A = reinterpret_cast<DefinedAtom *>(
+ AtomAllocator.Allocate(sizeof(DefinedAtom), alignof(DefinedAtom)));
+ new (A) DefinedAtom(Parent, Name, Address, Alignment);
+ A->setCommon(Size);
+ Parent.addAtom(*A);
+ NamedAtoms[Name] = A;
+ return *A;
+ }
+
+ iterator_range<section_iterator> sections() {
+ return make_range(section_iterator(Sections.begin()),
+ section_iterator(Sections.end()));
+ }
+
+ /// Returns the section with the given name if it exists, otherwise returns
+ /// null.
+ Section *findSectionByName(StringRef Name) {
+ for (auto &S : sections())
+ if (S.getName() == Name)
+ return &S;
+ return nullptr;
+ }
+
+ iterator_range<external_atom_iterator> external_atoms() {
+ return make_range(ExternalAtoms.begin(), ExternalAtoms.end());
+ }
+
+ iterator_range<external_atom_iterator> absolute_atoms() {
+ return make_range(AbsoluteAtoms.begin(), AbsoluteAtoms.end());
+ }
+
+ iterator_range<defined_atom_iterator> defined_atoms() {
+ return make_range(defined_atom_iterator(Sections.begin(), Sections.end()),
+ defined_atom_iterator(Sections.end(), Sections.end()));
+ }
+
+ iterator_range<const_defined_atom_iterator> defined_atoms() const {
+ return make_range(
+ const_defined_atom_iterator(Sections.begin(), Sections.end()),
+ const_defined_atom_iterator(Sections.end(), Sections.end()));
+ }
+
+ /// Returns the atom with the given name, which must exist in this graph.
+ Atom &getAtomByName(StringRef Name) {
+ auto I = NamedAtoms.find(Name);
+ assert(I != NamedAtoms.end() && "Name not in NamedAtoms map");
+ return *I->second;
+ }
+
+ /// Returns the atom with the given name, which must exist in this graph and
+ /// be a DefinedAtom.
+ DefinedAtom &getDefinedAtomByName(StringRef Name) {
+ auto &A = getAtomByName(Name);
+ assert(A.isDefined() && "Atom is not a defined atom");
+ return static_cast<DefinedAtom &>(A);
+ }
+
+ /// Search for the given atom by name.
+ /// Returns the atom (if found) or an error (if no atom with this name
+ /// exists).
+ Expected<Atom &> findAtomByName(StringRef Name) {
+ auto I = NamedAtoms.find(Name);
+ if (I == NamedAtoms.end())
+ return make_error<JITLinkError>("No atom named " + Name);
+ return *I->second;
+ }
+
+ /// Search for the given defined atom by name.
+ /// Returns the defined atom (if found) or an error (if no atom with this
+ /// name exists, or if one exists but is not a defined atom).
+ Expected<DefinedAtom &> findDefinedAtomByName(StringRef Name) {
+ auto I = NamedAtoms.find(Name);
+ if (I == NamedAtoms.end())
+ return make_error<JITLinkError>("No atom named " + Name);
+ if (!I->second->isDefined())
+ return make_error<JITLinkError>("Atom " + Name +
+ " exists but is not a "
+ "defined atom");
+ return static_cast<DefinedAtom &>(*I->second);
+ }
+
+ /// Returns the atom covering the given address, or an error if no such atom
+ /// exists.
+ ///
+ /// Returns null if no atom exists at the given address.
+ DefinedAtom *getAtomByAddress(JITTargetAddress Address) {
+ refreshAddrToAtomCache();
+
+ // If there are no defined atoms, bail out early.
+ if (AddrToAtomCache->empty())
+ return nullptr;
+
+ // Find the atom *after* the given address.
+ auto I = AddrToAtomCache->upper_bound(Address);
+
+ // If this address falls before any known atom, bail out.
+ if (I == AddrToAtomCache->begin())
+ return nullptr;
+
+ // The atom we're looking for is the one before the atom we found.
+ --I;
+
+ // Otherwise range check the atom that was found.
+ assert(!I->second->getContent().empty() && "Atom content not set");
+ if (Address >= I->second->getAddress() + I->second->getContent().size())
+ return nullptr;
+
+ return I->second;
+ }
+
+ /// Like getAtomByAddress, but returns an Error if the given address is not
+ /// covered by an atom, rather than a null pointer.
+ Expected<DefinedAtom &> findAtomByAddress(JITTargetAddress Address) {
+ if (auto *DA = getAtomByAddress(Address))
+ return *DA;
+ return make_error<JITLinkError>("No atom at address " +
+ formatv("{0:x16}", Address));
+ }
+
+ // Remove the given external atom from the graph.
+ void removeExternalAtom(Atom &A) {
+ assert(!A.isDefined() && !A.isAbsolute() && "A is not an external atom");
+ assert(ExternalAtoms.count(&A) && "A is not in the external atoms set");
+ ExternalAtoms.erase(&A);
+ A.~Atom();
+ }
+
+ /// Remove the given absolute atom from the graph.
+ void removeAbsoluteAtom(Atom &A) {
+ assert(A.isAbsolute() && "A is not an absolute atom");
+ assert(AbsoluteAtoms.count(&A) && "A is not in the absolute atoms set");
+ AbsoluteAtoms.erase(&A);
+ A.~Atom();
+ }
+
+ /// Remove the given defined atom from the graph.
+ void removeDefinedAtom(DefinedAtom &DA) {
+ if (AddrToAtomCache) {
+ assert(AddrToAtomCache->count(DA.getAddress()) &&
+ "Cache exists, but does not contain atom");
+ AddrToAtomCache->erase(DA.getAddress());
+ }
+ if (DA.hasName()) {
+ assert(NamedAtoms.count(DA.getName()) && "Named atom not in map");
+ NamedAtoms.erase(DA.getName());
+ }
+ DA.getSection().removeAtom(DA);
+ DA.~DefinedAtom();
+ }
+
+ /// Invalidate the atom-to-address map.
+ void invalidateAddrToAtomMap() { AddrToAtomCache = None; }
+
+ /// Dump the graph.
+ ///
+ /// If supplied, the EdgeKindToName function will be used to name edge
+ /// kinds in the debug output. Otherwise raw edge kind numbers will be
+ /// displayed.
+ void dump(raw_ostream &OS,
+ std::function<StringRef(Edge::Kind)> EdegKindToName =
+ std::function<StringRef(Edge::Kind)>());
+
+private:
+ AddressToAtomMap &getAddrToAtomMap() {
+ refreshAddrToAtomCache();
+ return *AddrToAtomCache;
+ }
+
+ const AddressToAtomMap &getAddrToAtomMap() const {
+ refreshAddrToAtomCache();
+ return *AddrToAtomCache;
+ }
+
+ void refreshAddrToAtomCache() const {
+ if (!AddrToAtomCache) {
+ AddrToAtomCache = AddressToAtomMap();
+ for (auto *DA : defined_atoms())
+ (*AddrToAtomCache)[DA->getAddress()] = const_cast<DefinedAtom *>(DA);
+ }
+ }
+
+ // Put the BumpPtrAllocator first so that we don't free any of the atoms in
+ // it until all of their destructors have been run.
+ BumpPtrAllocator AtomAllocator;
+
+ std::string Name;
+ unsigned PointerSize;
+ support::endianness Endianness;
+ SectionList Sections;
+ NamedAtomMap NamedAtoms;
+ ExternalAtomSet ExternalAtoms;
+ ExternalAtomSet AbsoluteAtoms;
+ mutable Optional<AddressToAtomMap> AddrToAtomCache;
+};
+
+/// A function for mutating AtomGraphs.
+using AtomGraphPassFunction = std::function<Error(AtomGraph &)>;
+
+/// A list of atom graph passes.
+using AtomGraphPassList = std::vector<AtomGraphPassFunction>;
+
+/// An atom graph pass configuration, consisting of a list of pre-prune,
+/// post-prune, and post-fixup passes.
+struct PassConfiguration {
+
+ /// Pre-prune passes.
+ ///
+ /// These passes are called on the graph after it is built, and before any
+ /// atoms have been pruned.
+ ///
+ /// Notable use cases: Marking atoms live or should-discard.
+ AtomGraphPassList PrePrunePasses;
+
+ /// Post-prune passes.
+ ///
+ /// These passes are called on the graph after dead and should-discard atoms
+ /// have been removed, but before fixups are applied.
+ ///
+ /// Notable use cases: Building GOT, stub, and TLV atoms.
+ AtomGraphPassList PostPrunePasses;
+
+ /// Post-fixup passes.
+ ///
+ /// These passes are called on the graph after atom contents has been copied
+ /// to working memory, and fixups applied.
+ ///
+ /// Notable use cases: Testing and validation.
+ AtomGraphPassList PostFixupPasses;
+};
+
+/// A map of symbol names to resolved addresses.
+using AsyncLookupResult = DenseMap<StringRef, JITEvaluatedSymbol>;
+
+/// A function to call with a resolved symbol map (See AsyncLookupResult) or an
+/// error if resolution failed.
+using JITLinkAsyncLookupContinuation =
+ std::function<void(Expected<AsyncLookupResult> LR)>;
+
+/// An asynchronous symbol lookup. Performs a search (possibly asynchronously)
+/// for the given symbols, calling the given continuation with either the result
+/// (if the lookup succeeds), or an error (if the lookup fails).
+using JITLinkAsyncLookupFunction =
+ std::function<void(const DenseSet<StringRef> &Symbols,
+ JITLinkAsyncLookupContinuation LookupContinuation)>;
+
+/// Holds context for a single jitLink invocation.
+class JITLinkContext {
+public:
+ /// Destroy a JITLinkContext.
+ virtual ~JITLinkContext();
+
+ /// Return the MemoryManager to be used for this link.
+ virtual JITLinkMemoryManager &getMemoryManager() = 0;
+
+ /// Returns a StringRef for the object buffer.
+ /// This method can not be called once takeObjectBuffer has been called.
+ virtual MemoryBufferRef getObjectBuffer() const = 0;
+
+ /// Notify this context that linking failed.
+ /// Called by JITLink if linking cannot be completed.
+ virtual void notifyFailed(Error Err) = 0;
+
+ /// Called by JITLink to resolve external symbols. This method is passed a
+ /// lookup continutation which it must call with a result to continue the
+ /// linking process.
+ virtual void lookup(const DenseSet<StringRef> &Symbols,
+ JITLinkAsyncLookupContinuation LookupContinuation) = 0;
+
+ /// Called by JITLink once all defined atoms in the graph have been assigned
+ /// their final memory locations in the target process. At this point he
+ /// atom graph can be, inspected to build a symbol table however the atom
+ /// content will not generally have been copied to the target location yet.
+ virtual void notifyResolved(AtomGraph &G) = 0;
+
+ /// Called by JITLink to notify the context that the object has been
+ /// finalized (i.e. emitted to memory and memory permissions set). If all of
+ /// this objects dependencies have also been finalized then the code is ready
+ /// to run.
+ virtual void
+ notifyFinalized(std::unique_ptr<JITLinkMemoryManager::Allocation> A) = 0;
+
+ /// Called by JITLink prior to linking to determine whether default passes for
+ /// the target should be added. The default implementation returns true.
+ /// If subclasses override this method to return false for any target then
+ /// they are required to fully configure the pass pipeline for that target.
+ virtual bool shouldAddDefaultTargetPasses(const Triple &TT) const;
+
+ /// Returns the mark-live pass to be used for this link. If no pass is
+ /// returned (the default) then the target-specific linker implementation will
+ /// choose a conservative default (usually marking all atoms live).
+ /// This function is only called if shouldAddDefaultTargetPasses returns true,
+ /// otherwise the JITContext is responsible for adding a mark-live pass in
+ /// modifyPassConfig.
+ virtual AtomGraphPassFunction getMarkLivePass(const Triple &TT) const;
+
+ /// Called by JITLink to modify the pass pipeline prior to linking.
+ /// The default version performs no modification.
+ virtual Error modifyPassConfig(const Triple &TT, PassConfiguration &Config);
+};
+
+/// Marks all atoms in a graph live. This can be used as a default, conservative
+/// mark-live implementation.
+Error markAllAtomsLive(AtomGraph &G);
+
+/// Basic JITLink implementation.
+///
+/// This function will use sensible defaults for GOT and Stub handling.
+void jitLink(std::unique_ptr<JITLinkContext> Ctx);
+
+} // end namespace jitlink
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_JITLINK_JITLINK_H
diff --git a/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h b/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h
new file mode 100644
index 000000000000..9d0b37fe4a4d
--- /dev/null
+++ b/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h
@@ -0,0 +1,99 @@
+//===-- JITLinkMemoryManager.h - JITLink mem manager interface --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Contains the JITLinkMemoryManager interface.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_JITLINK_JITLINKMEMORYMANAGER_H
+#define LLVM_EXECUTIONENGINE_JITLINK_JITLINKMEMORYMANAGER_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ExecutionEngine/JITSymbol.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/Memory.h"
+#include <cstdint>
+
+namespace llvm {
+namespace jitlink {
+
+/// Manages allocations of JIT memory.
+///
+/// Instances of this class may be accessed concurrently from multiple threads
+/// and their implemetations should include any necessary synchronization.
+class JITLinkMemoryManager {
+public:
+ using ProtectionFlags = sys::Memory::ProtectionFlags;
+
+ class SegmentRequest {
+ public:
+ SegmentRequest() = default;
+ SegmentRequest(size_t ContentSize, unsigned ContentAlign,
+ uint64_t ZeroFillSize, unsigned ZeroFillAlign)
+ : ContentSize(ContentSize), ZeroFillSize(ZeroFillSize),
+ ContentAlign(ContentAlign), ZeroFillAlign(ZeroFillAlign) {}
+ size_t getContentSize() const { return ContentSize; }
+ unsigned getContentAlignment() const { return ContentAlign; }
+ uint64_t getZeroFillSize() const { return ZeroFillSize; }
+ unsigned getZeroFillAlignment() const { return ZeroFillAlign; }
+
+ private:
+ size_t ContentSize = 0;
+ uint64_t ZeroFillSize = 0;
+ unsigned ContentAlign = 0;
+ unsigned ZeroFillAlign = 0;
+ };
+
+ using SegmentsRequestMap = DenseMap<unsigned, SegmentRequest>;
+
+ /// Represents an allocation created by the memory manager.
+ ///
+ /// An allocation object is responsible for allocating and owning jit-linker
+ /// working and target memory, and for transfering from working to target
+ /// memory.
+ ///
+ class Allocation {
+ public:
+ using FinalizeContinuation = std::function<void(Error)>;
+
+ virtual ~Allocation();
+
+ /// Should return the address of linker working memory for the segment with
+ /// the given protection flags.
+ virtual MutableArrayRef<char> getWorkingMemory(ProtectionFlags Seg) = 0;
+
+ /// Should return the final address in the target process where the segment
+ /// will reside.
+ virtual JITTargetAddress getTargetMemory(ProtectionFlags Seg) = 0;
+
+ /// Should transfer from working memory to target memory, and release
+ /// working memory.
+ virtual void finalizeAsync(FinalizeContinuation OnFinalize) = 0;
+
+ /// Should deallocate target memory.
+ virtual Error deallocate() = 0;
+ };
+
+ virtual ~JITLinkMemoryManager();
+
+ /// Create an Allocation object.
+ virtual Expected<std::unique_ptr<Allocation>>
+ allocate(const SegmentsRequestMap &Request) = 0;
+};
+
+/// A JITLinkMemoryManager that allocates in-process memory.
+class InProcessMemoryManager : public JITLinkMemoryManager {
+public:
+ Expected<std::unique_ptr<Allocation>>
+ allocate(const SegmentsRequestMap &Request) override;
+};
+
+} // end namespace jitlink
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_JITLINK_JITLINK_H
diff --git a/include/llvm/ExecutionEngine/JITLink/MachO.h b/include/llvm/ExecutionEngine/JITLink/MachO.h
new file mode 100644
index 000000000000..7facb657a51c
--- /dev/null
+++ b/include/llvm/ExecutionEngine/JITLink/MachO.h
@@ -0,0 +1,30 @@
+//===------- MachO.h - Generic JIT link function for MachO ------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Generic jit-link functions for MachO.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_JITLINK_MACHO_H
+#define LLVM_EXECUTIONENGINE_JITLINK_MACHO_H
+
+#include "llvm/ExecutionEngine/JITLink/JITLink.h"
+
+namespace llvm {
+namespace jitlink {
+
+/// jit-link the given ObjBuffer, which must be a MachO object file.
+///
+/// Uses conservative defaults for GOT and stub handling based on the target
+/// platform.
+void jitLink_MachO(std::unique_ptr<JITLinkContext> Ctx);
+
+} // end namespace jitlink
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_JITLINK_MACHO_H
diff --git a/include/llvm/ExecutionEngine/JITLink/MachO_x86_64.h b/include/llvm/ExecutionEngine/JITLink/MachO_x86_64.h
new file mode 100644
index 000000000000..1d5b586afc32
--- /dev/null
+++ b/include/llvm/ExecutionEngine/JITLink/MachO_x86_64.h
@@ -0,0 +1,63 @@
+//===--- MachO_x86_64.h - JIT link functions for MachO/x86-64 ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// jit-link functions for MachO/x86-64.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_JITLINK_MACHO_X86_64_H
+#define LLVM_EXECUTIONENGINE_JITLINK_MACHO_X86_64_H
+
+#include "llvm/ExecutionEngine/JITLink/JITLink.h"
+
+namespace llvm {
+namespace jitlink {
+
+namespace MachO_x86_64_Edges {
+
+enum MachOX86RelocationKind : Edge::Kind {
+ Branch32 = Edge::FirstRelocation,
+ Pointer64,
+ Pointer64Anon,
+ PCRel32,
+ PCRel32Minus1,
+ PCRel32Minus2,
+ PCRel32Minus4,
+ PCRel32Anon,
+ PCRel32Minus1Anon,
+ PCRel32Minus2Anon,
+ PCRel32Minus4Anon,
+ PCRel32GOTLoad,
+ PCRel32GOT,
+ PCRel32TLV,
+ Delta32,
+ Delta64,
+ NegDelta32,
+ NegDelta64,
+};
+
+} // namespace MachO_x86_64_Edges
+
+/// jit-link the given object buffer, which must be a MachO x86-64 object file.
+///
+/// If PrePrunePasses is empty then a default mark-live pass will be inserted
+/// that will mark all exported atoms live. If PrePrunePasses is not empty, the
+/// caller is responsible for including a pass to mark atoms as live.
+///
+/// If PostPrunePasses is empty then a default GOT-and-stubs insertion pass will
+/// be inserted. If PostPrunePasses is not empty then the caller is responsible
+/// for including a pass to insert GOT and stub edges.
+void jitLink_MachO_x86_64(std::unique_ptr<JITLinkContext> Ctx);
+
+/// Return the string name of the given MachO x86-64 edge kind.
+StringRef getMachOX86RelocationKindName(Edge::Kind R);
+
+} // end namespace jitlink
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_JITLINK_MACHO_X86_64_H
diff --git a/include/llvm/ExecutionEngine/JITSymbol.h b/include/llvm/ExecutionEngine/JITSymbol.h
index 05c9590726df..b14154c5b5e8 100644
--- a/include/llvm/ExecutionEngine/JITSymbol.h
+++ b/include/llvm/ExecutionEngine/JITSymbol.h
@@ -1,9 +1,8 @@
//===- JITSymbol.h - JIT symbol abstraction ---------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -56,7 +55,7 @@ template <typename T> JITTargetAddress pointerToJITTargetAddress(T *Ptr) {
class JITSymbolFlags {
public:
using UnderlyingType = uint8_t;
- using TargetFlagsType = uint64_t;
+ using TargetFlagsType = uint8_t;
enum FlagNames : UnderlyingType {
None = 0,
@@ -66,15 +65,9 @@ public:
Absolute = 1U << 3,
Exported = 1U << 4,
Callable = 1U << 5,
- Lazy = 1U << 6,
- Materializing = 1U << 7,
- LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue = */ Materializing)
+ LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue = */ Callable)
};
- static JITSymbolFlags stripTransientFlags(JITSymbolFlags Orig) {
- return static_cast<FlagNames>(Orig.Flags & ~Lazy & ~Materializing);
- }
-
/// Default-construct a JITSymbolFlags instance.
JITSymbolFlags() = default;
@@ -84,7 +77,7 @@ public:
/// Construct a JITSymbolFlags instance from the given flags and target
/// flags.
JITSymbolFlags(FlagNames Flags, TargetFlagsType TargetFlags)
- : Flags(Flags), TargetFlags(TargetFlags) {}
+ : TargetFlags(TargetFlags), Flags(Flags) {}
/// Implicitly convert to bool. Returs true if any flag is set.
explicit operator bool() const { return Flags != None || TargetFlags != 0; }
@@ -111,19 +104,6 @@ public:
return (Flags & HasError) == HasError;
}
- /// Returns true if this is a lazy symbol.
- /// This flag is used internally by the JIT APIs to track
- /// materialization states.
- bool isLazy() const { return Flags & Lazy; }
-
- /// Returns true if this symbol is in the process of being
- /// materialized.
- bool isMaterializing() const { return Flags & Materializing; }
-
- /// Returns true if this symbol is fully materialized.
- /// (i.e. neither lazy, nor materializing).
- bool isMaterialized() const { return !(Flags & (Lazy | Materializing)); }
-
/// Returns true if the Weak flag is set.
bool isWeak() const {
return (Flags & Weak) == Weak;
@@ -168,8 +148,8 @@ public:
fromObjectSymbol(const object::SymbolRef &Symbol);
private:
- FlagNames Flags = None;
TargetFlagsType TargetFlags = 0;
+ FlagNames Flags = None;
};
inline JITSymbolFlags operator&(const JITSymbolFlags &LHS,
diff --git a/include/llvm/ExecutionEngine/MCJIT.h b/include/llvm/ExecutionEngine/MCJIT.h
index 66ddb7cdb875..8253bf98963b 100644
--- a/include/llvm/ExecutionEngine/MCJIT.h
+++ b/include/llvm/ExecutionEngine/MCJIT.h
@@ -1,9 +1,8 @@
//===-- MCJIT.h - MC-Based Just-In-Time Execution Engine --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/ExecutionEngine/OProfileWrapper.h b/include/llvm/ExecutionEngine/OProfileWrapper.h
index 05da594a94a8..b13d7f6e245b 100644
--- a/include/llvm/ExecutionEngine/OProfileWrapper.h
+++ b/include/llvm/ExecutionEngine/OProfileWrapper.h
@@ -1,9 +1,8 @@
//===-- OProfileWrapper.h - OProfile JIT API Wrapper ------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// This file defines a OProfileWrapper object that detects if the oprofile
diff --git a/include/llvm/ExecutionEngine/ObjectCache.h b/include/llvm/ExecutionEngine/ObjectCache.h
index 077044408e09..47e94f18a1c7 100644
--- a/include/llvm/ExecutionEngine/ObjectCache.h
+++ b/include/llvm/ExecutionEngine/ObjectCache.h
@@ -1,9 +1,8 @@
//===-- ObjectCache.h - Class definition for the ObjectCache ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h b/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h
index 884878925cde..5f593a27cad6 100644
--- a/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h
+++ b/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h
@@ -1,9 +1,8 @@
//===- CompileOnDemandLayer.h - Compile each function on demand -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -265,13 +264,26 @@ public:
std::function<void(VModuleKey K, std::shared_ptr<SymbolResolver> R)>;
/// Construct a compile-on-demand layer instance.
- LegacyCompileOnDemandLayer(ExecutionSession &ES, BaseLayerT &BaseLayer,
- SymbolResolverGetter GetSymbolResolver,
- SymbolResolverSetter SetSymbolResolver,
- PartitioningFtor Partition,
- CompileCallbackMgrT &CallbackMgr,
- IndirectStubsManagerBuilderT CreateIndirectStubsManager,
- bool CloneStubsIntoPartitions = true)
+ LLVM_ATTRIBUTE_DEPRECATED(
+ LegacyCompileOnDemandLayer(
+ ExecutionSession &ES, BaseLayerT &BaseLayer,
+ SymbolResolverGetter GetSymbolResolver,
+ SymbolResolverSetter SetSymbolResolver, PartitioningFtor Partition,
+ CompileCallbackMgrT &CallbackMgr,
+ IndirectStubsManagerBuilderT CreateIndirectStubsManager,
+ bool CloneStubsIntoPartitions = true),
+ "ORCv1 layers (layers with the 'Legacy' prefix) are deprecated. Please "
+ "use "
+ "the ORCv2 LegacyCompileOnDemandLayer instead");
+
+ /// Legacy layer constructor with deprecation acknowledgement.
+ LegacyCompileOnDemandLayer(
+ ORCv1DeprecationAcknowledgement, ExecutionSession &ES,
+ BaseLayerT &BaseLayer, SymbolResolverGetter GetSymbolResolver,
+ SymbolResolverSetter SetSymbolResolver, PartitioningFtor Partition,
+ CompileCallbackMgrT &CallbackMgr,
+ IndirectStubsManagerBuilderT CreateIndirectStubsManager,
+ bool CloneStubsIntoPartitions = true)
: ES(ES), BaseLayer(BaseLayer),
GetSymbolResolver(std::move(GetSymbolResolver)),
SetSymbolResolver(std::move(SetSymbolResolver)),
@@ -730,8 +742,24 @@ private:
bool CloneStubsIntoPartitions;
};
-} // end namespace orc
+template <typename BaseLayerT, typename CompileCallbackMgrT,
+ typename IndirectStubsMgrT>
+LegacyCompileOnDemandLayer<BaseLayerT, CompileCallbackMgrT, IndirectStubsMgrT>::
+ LegacyCompileOnDemandLayer(
+ ExecutionSession &ES, BaseLayerT &BaseLayer,
+ SymbolResolverGetter GetSymbolResolver,
+ SymbolResolverSetter SetSymbolResolver, PartitioningFtor Partition,
+ CompileCallbackMgrT &CallbackMgr,
+ IndirectStubsManagerBuilderT CreateIndirectStubsManager,
+ bool CloneStubsIntoPartitions)
+ : ES(ES), BaseLayer(BaseLayer),
+ GetSymbolResolver(std::move(GetSymbolResolver)),
+ SetSymbolResolver(std::move(SetSymbolResolver)),
+ Partition(std::move(Partition)), CompileCallbackMgr(CallbackMgr),
+ CreateIndirectStubsManager(std::move(CreateIndirectStubsManager)),
+ CloneStubsIntoPartitions(CloneStubsIntoPartitions) {}
+} // end namespace orc
} // end namespace llvm
#endif // LLVM_EXECUTIONENGINE_ORC_COMPILEONDEMANDLAYER_H
diff --git a/include/llvm/ExecutionEngine/Orc/CompileUtils.h b/include/llvm/ExecutionEngine/Orc/CompileUtils.h
index f34f88311ba5..eb6d84e8cbb4 100644
--- a/include/llvm/ExecutionEngine/Orc/CompileUtils.h
+++ b/include/llvm/ExecutionEngine/Orc/CompileUtils.h
@@ -1,9 +1,8 @@
//===- CompileUtils.h - Utilities for compiling IR in the JIT ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -14,28 +13,21 @@
#ifndef LLVM_EXECUTIONENGINE_ORC_COMPILEUTILS_H
#define LLVM_EXECUTIONENGINE_ORC_COMPILEUTILS_H
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ExecutionEngine/ObjectCache.h"
#include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h"
-#include "llvm/IR/LegacyPassManager.h"
-#include "llvm/Object/Binary.h"
-#include "llvm/Object/ObjectFile.h"
-#include "llvm/Support/Error.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/SmallVectorMemoryBuffer.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetMachine.h"
-#include <algorithm>
#include <memory>
namespace llvm {
class MCContext;
+class MemoryBuffer;
class Module;
+class ObjectCache;
+class TargetMachine;
namespace orc {
+class JITTargetMachineBuilder;
+
/// Simple compile functor: Takes a single IR module and returns an ObjectFile.
/// This compiler supports a single compilation thread and LLVMContext only.
/// For multithreaded compilation, use ConcurrentIRCompiler below.
@@ -51,56 +43,32 @@ public:
void setObjectCache(ObjectCache *NewCache) { ObjCache = NewCache; }
/// Compile a Module to an ObjectFile.
- CompileResult operator()(Module &M) {
- CompileResult CachedObject = tryToLoadFromObjectCache(M);
- if (CachedObject)
- return CachedObject;
-
- SmallVector<char, 0> ObjBufferSV;
-
- {
- raw_svector_ostream ObjStream(ObjBufferSV);
-
- legacy::PassManager PM;
- MCContext *Ctx;
- if (TM.addPassesToEmitMC(PM, Ctx, ObjStream))
- llvm_unreachable("Target does not support MC emission.");
- PM.run(M);
- }
-
- auto ObjBuffer =
- llvm::make_unique<SmallVectorMemoryBuffer>(std::move(ObjBufferSV));
- auto Obj =
- object::ObjectFile::createObjectFile(ObjBuffer->getMemBufferRef());
-
- if (Obj) {
- notifyObjectCompiled(M, *ObjBuffer);
- return std::move(ObjBuffer);
- }
-
- // TODO: Actually report errors helpfully.
- consumeError(Obj.takeError());
- return nullptr;
- }
+ CompileResult operator()(Module &M);
private:
-
- CompileResult tryToLoadFromObjectCache(const Module &M) {
- if (!ObjCache)
- return CompileResult();
-
- return ObjCache->getObject(&M);
- }
-
- void notifyObjectCompiled(const Module &M, const MemoryBuffer &ObjBuffer) {
- if (ObjCache)
- ObjCache->notifyObjectCompiled(&M, ObjBuffer.getMemBufferRef());
- }
+ CompileResult tryToLoadFromObjectCache(const Module &M);
+ void notifyObjectCompiled(const Module &M, const MemoryBuffer &ObjBuffer);
TargetMachine &TM;
ObjectCache *ObjCache = nullptr;
};
+/// A SimpleCompiler that owns its TargetMachine.
+///
+/// This convenient for clients who don't want to own their TargetMachines,
+/// e.g. LLJIT.
+class TMOwningSimpleCompiler : public SimpleCompiler {
+public:
+ TMOwningSimpleCompiler(std::unique_ptr<TargetMachine> TM,
+ ObjectCache *ObjCache = nullptr)
+ : SimpleCompiler(*TM, ObjCache), TM(std::move(TM)) {}
+
+private:
+ // FIXME: shared because std::functions (and consequently
+ // IRCompileLayer::CompileFunction) are not moveable.
+ std::shared_ptr<llvm::TargetMachine> TM;
+};
+
/// A thread-safe version of SimpleCompiler.
///
/// This class creates a new TargetMachine and SimpleCompiler instance for each
@@ -108,16 +76,11 @@ private:
class ConcurrentIRCompiler {
public:
ConcurrentIRCompiler(JITTargetMachineBuilder JTMB,
- ObjectCache *ObjCache = nullptr)
- : JTMB(std::move(JTMB)), ObjCache(ObjCache) {}
+ ObjectCache *ObjCache = nullptr);
void setObjectCache(ObjectCache *ObjCache) { this->ObjCache = ObjCache; }
- std::unique_ptr<MemoryBuffer> operator()(Module &M) {
- auto TM = cantFail(JTMB.createTargetMachine());
- SimpleCompiler C(*TM, ObjCache);
- return C(M);
- }
+ std::unique_ptr<MemoryBuffer> operator()(Module &M);
private:
JITTargetMachineBuilder JTMB;
diff --git a/include/llvm/ExecutionEngine/Orc/Core.h b/include/llvm/ExecutionEngine/Orc/Core.h
index 39d306e0bd4c..94a5618233e4 100644
--- a/include/llvm/ExecutionEngine/Orc/Core.h
+++ b/include/llvm/ExecutionEngine/Orc/Core.h
@@ -1,9 +1,8 @@
//===------ Core.h -- Core ORC APIs (Layer, JITDylib, etc.) -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -17,6 +16,7 @@
#include "llvm/ADT/BitmaskEnum.h"
#include "llvm/ExecutionEngine/JITSymbol.h"
#include "llvm/ExecutionEngine/Orc/SymbolStringPool.h"
+#include "llvm/ExecutionEngine/OrcV1Deprecation.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/Debug.h"
@@ -34,6 +34,7 @@ class ExecutionSession;
class MaterializationUnit;
class MaterializationResponsibility;
class JITDylib;
+enum class SymbolState : uint8_t;
/// VModuleKey provides a unique identifier (allocated and managed by
/// ExecutionSessions) for a module added to the JIT.
@@ -57,6 +58,18 @@ using SymbolDependenceMap = DenseMap<JITDylib *, SymbolNameSet>;
/// A list of (JITDylib*, bool) pairs.
using JITDylibSearchList = std::vector<std::pair<JITDylib *, bool>>;
+struct SymbolAliasMapEntry {
+ SymbolAliasMapEntry() = default;
+ SymbolAliasMapEntry(SymbolStringPtr Aliasee, JITSymbolFlags AliasFlags)
+ : Aliasee(std::move(Aliasee)), AliasFlags(AliasFlags) {}
+
+ SymbolStringPtr Aliasee;
+ JITSymbolFlags AliasFlags;
+};
+
+/// A map of Symbols to (Symbol, Flags) pairs.
+using SymbolAliasMap = DenseMap<SymbolStringPtr, SymbolAliasMapEntry>;
+
/// Render a SymbolStringPtr.
raw_ostream &operator<<(raw_ostream &OS, const SymbolStringPtr &Sym);
@@ -88,12 +101,15 @@ raw_ostream &operator<<(raw_ostream &OS, const MaterializationUnit &MU);
/// Render a JITDylibSearchList.
raw_ostream &operator<<(raw_ostream &OS, const JITDylibSearchList &JDs);
+/// Render a SymbolAliasMap.
+raw_ostream &operator<<(raw_ostream &OS, const SymbolAliasMap &Aliases);
+
+/// Render a SymbolState.
+raw_ostream &operator<<(raw_ostream &OS, const SymbolState &S);
+
/// Callback to notify client that symbols have been resolved.
using SymbolsResolvedCallback = std::function<void(Expected<SymbolMap>)>;
-/// Callback to notify client that symbols are ready for execution.
-using SymbolsReadyCallback = std::function<void(Error)>;
-
/// Callback to register the dependencies for a given query.
using RegisterDependenciesFunction =
std::function<void(const SymbolDependenceMap &)>;
@@ -175,7 +191,7 @@ public:
/// Note: The returned flags may have transient flags (Lazy, Materializing)
/// set. These should be stripped with JITSymbolFlags::stripTransientFlags
/// before using.
- const SymbolFlagsMap &getSymbols() { return SymbolFlags; }
+ const SymbolFlagsMap &getSymbols() const { return SymbolFlags; }
/// Returns the names of any symbols covered by this
/// MaterializationResponsibility object that have queries pending. This
@@ -189,12 +205,12 @@ public:
/// symbols must be ones covered by this MaterializationResponsibility
/// instance. Individual calls to this method may resolve a subset of the
/// symbols, but all symbols must have been resolved prior to calling emit.
- void resolve(const SymbolMap &Symbols);
+ void notifyResolved(const SymbolMap &Symbols);
/// Notifies the target JITDylib (and any pending queries on that JITDylib)
/// that all symbols covered by this MaterializationResponsibility instance
/// have been emitted.
- void emit();
+ void notifyEmitted();
/// Adds new symbols to the JITDylib and this responsibility instance.
/// JITDylib entries start out in the materializing state.
@@ -334,18 +350,6 @@ absoluteSymbols(SymbolMap Symbols, VModuleKey K = VModuleKey()) {
std::move(Symbols), std::move(K));
}
-struct SymbolAliasMapEntry {
- SymbolAliasMapEntry() = default;
- SymbolAliasMapEntry(SymbolStringPtr Aliasee, JITSymbolFlags AliasFlags)
- : Aliasee(std::move(Aliasee)), AliasFlags(AliasFlags) {}
-
- SymbolStringPtr Aliasee;
- JITSymbolFlags AliasFlags;
-};
-
-/// A map of Symbols to (Symbol, Flags) pairs.
-using SymbolAliasMap = DenseMap<SymbolStringPtr, SymbolAliasMapEntry>;
-
/// A materialization unit for symbol aliases. Allows existing symbols to be
/// aliased with alternate flags.
class ReExportsMaterializationUnit : public MaterializationUnit {
@@ -419,7 +423,7 @@ public:
ReexportsGenerator(JITDylib &SourceJD, bool MatchNonExported = false,
SymbolPredicate Allow = SymbolPredicate());
- SymbolNameSet operator()(JITDylib &JD, const SymbolNameSet &Names);
+ Expected<SymbolNameSet> operator()(JITDylib &JD, const SymbolNameSet &Names);
private:
JITDylib &SourceJD;
@@ -427,6 +431,15 @@ private:
SymbolPredicate Allow;
};
+/// Represents the state that a symbol has reached during materialization.
+enum class SymbolState : uint8_t {
+ Invalid, /// No symbol should be in this state.
+ NeverSearched, /// Added to the symbol table, never queried.
+ Materializing, /// Queried, materialization begun.
+ Resolved, /// Assigned address, still materializing.
+ Ready = 0x3f /// Ready and safe for clients to access.
+};
+
/// A symbol query that returns results via a callback when results are
/// ready.
///
@@ -437,38 +450,30 @@ class AsynchronousSymbolQuery {
friend class JITSymbolResolverAdapter;
public:
-
- /// Create a query for the given symbols, notify-resolved and
- /// notify-ready callbacks.
+ /// Create a query for the given symbols. The NotifyComplete
+ /// callback will be called once all queried symbols reach the given
+ /// minimum state.
AsynchronousSymbolQuery(const SymbolNameSet &Symbols,
- SymbolsResolvedCallback NotifySymbolsResolved,
- SymbolsReadyCallback NotifySymbolsReady);
+ SymbolState RequiredState,
+ SymbolsResolvedCallback NotifyComplete);
- /// Set the resolved symbol information for the given symbol name.
- void resolve(const SymbolStringPtr &Name, JITEvaluatedSymbol Sym);
+ /// Notify the query that a requested symbol has reached the required state.
+ void notifySymbolMetRequiredState(const SymbolStringPtr &Name,
+ JITEvaluatedSymbol Sym);
/// Returns true if all symbols covered by this query have been
/// resolved.
- bool isFullyResolved() const { return NotYetResolvedCount == 0; }
+ bool isComplete() const { return OutstandingSymbolsCount == 0; }
- /// Call the NotifySymbolsResolved callback.
+ /// Call the NotifyComplete callback.
///
- /// This should only be called if all symbols covered by the query have been
- /// resolved.
- void handleFullyResolved();
-
- /// Notify the query that a requested symbol is ready for execution.
- void notifySymbolReady();
-
- /// Returns true if all symbols covered by this query are ready.
- bool isFullyReady() const { return NotYetReadyCount == 0; }
-
- /// Calls the NotifySymbolsReady callback.
- ///
- /// This should only be called if all symbols covered by this query are ready.
- void handleFullyReady();
+ /// This should only be called if all symbols covered by the query have
+ /// reached the specified state.
+ void handleComplete();
private:
+ SymbolState getRequiredState() { return RequiredState; }
+
void addQueryDependence(JITDylib &JD, SymbolStringPtr Name);
void removeQueryDependence(JITDylib &JD, const SymbolStringPtr &Name);
@@ -479,12 +484,11 @@ private:
void detach();
- SymbolsResolvedCallback NotifySymbolsResolved;
- SymbolsReadyCallback NotifySymbolsReady;
+ SymbolsResolvedCallback NotifyComplete;
SymbolDependenceMap QueryRegistrations;
SymbolMap ResolvedSymbols;
- size_t NotYetResolvedCount;
- size_t NotYetReadyCount;
+ size_t OutstandingSymbolsCount;
+ SymbolState RequiredState;
};
/// A symbol table that supports asynchoronous symbol queries.
@@ -498,7 +502,7 @@ class JITDylib {
friend class ExecutionSession;
friend class MaterializationResponsibility;
public:
- using GeneratorFunction = std::function<SymbolNameSet(
+ using GeneratorFunction = std::function<Expected<SymbolNameSet>(
JITDylib &Parent, const SymbolNameSet &Names)>;
using AsynchronousSymbolQuerySet =
@@ -596,7 +600,7 @@ public:
/// Search the given JITDylib for the symbols in Symbols. If found, store
/// the flags for each symbol in Flags. Returns any unresolved symbols.
- SymbolFlagsMap lookupFlags(const SymbolNameSet &Names);
+ Expected<SymbolFlagsMap> lookupFlags(const SymbolNameSet &Names);
/// Dump current JITDylib state to OS.
void dump(raw_ostream &OS);
@@ -609,8 +613,8 @@ public:
/// and the query will not be applied. The Query is not failed and can be
/// re-used in a subsequent lookup once the symbols have been added, or
/// manually failed.
- SymbolNameSet legacyLookup(std::shared_ptr<AsynchronousSymbolQuery> Q,
- SymbolNameSet Names);
+ Expected<SymbolNameSet>
+ legacyLookup(std::shared_ptr<AsynchronousSymbolQuery> Q, SymbolNameSet Names);
private:
using AsynchronousSymbolQueryList =
@@ -627,40 +631,92 @@ private:
DenseMap<SymbolStringPtr, std::shared_ptr<UnmaterializedInfo>>;
struct MaterializingInfo {
- AsynchronousSymbolQueryList PendingQueries;
SymbolDependenceMap Dependants;
SymbolDependenceMap UnemittedDependencies;
bool IsEmitted = false;
+
+ void addQuery(std::shared_ptr<AsynchronousSymbolQuery> Q);
+ void removeQuery(const AsynchronousSymbolQuery &Q);
+ AsynchronousSymbolQueryList takeQueriesMeeting(SymbolState RequiredState);
+ AsynchronousSymbolQueryList takeAllQueries();
+ bool hasQueriesPending() const { return !PendingQueries.empty(); }
+ const AsynchronousSymbolQueryList &pendingQueries() const {
+ return PendingQueries;
+ }
+
+ private:
+ AsynchronousSymbolQueryList PendingQueries;
};
using MaterializingInfosMap = DenseMap<SymbolStringPtr, MaterializingInfo>;
- using LookupImplActionFlags = enum {
- None = 0,
- NotifyFullyResolved = 1 << 0U,
- NotifyFullyReady = 1 << 1U,
- LLVM_MARK_AS_BITMASK_ENUM(NotifyFullyReady)
+ class SymbolTableEntry {
+ public:
+ SymbolTableEntry() = default;
+ SymbolTableEntry(JITSymbolFlags Flags)
+ : Flags(Flags), State(static_cast<uint8_t>(SymbolState::NeverSearched)),
+ MaterializerAttached(false), PendingRemoval(false) {}
+
+ JITTargetAddress getAddress() const { return Addr; }
+ JITSymbolFlags getFlags() const { return Flags; }
+ SymbolState getState() const { return static_cast<SymbolState>(State); }
+
+ bool isInMaterializationPhase() const {
+ return getState() == SymbolState::Materializing ||
+ getState() == SymbolState::Resolved;
+ }
+
+ bool hasMaterializerAttached() const { return MaterializerAttached; }
+ bool isPendingRemoval() const { return PendingRemoval; }
+
+ void setAddress(JITTargetAddress Addr) { this->Addr = Addr; }
+ void setFlags(JITSymbolFlags Flags) { this->Flags = Flags; }
+ void setState(SymbolState State) {
+ assert(static_cast<uint8_t>(State) < (1 << 6) &&
+ "State does not fit in bitfield");
+ this->State = static_cast<uint8_t>(State);
+ }
+
+ void setMaterializerAttached(bool MaterializerAttached) {
+ this->MaterializerAttached = MaterializerAttached;
+ }
+
+ void setPendingRemoval(bool PendingRemoval) {
+ this->PendingRemoval = PendingRemoval;
+ }
+
+ JITEvaluatedSymbol getSymbol() const {
+ return JITEvaluatedSymbol(Addr, Flags);
+ }
+
+ private:
+ JITTargetAddress Addr = 0;
+ JITSymbolFlags Flags;
+ uint8_t State : 6;
+ uint8_t MaterializerAttached : 1;
+ uint8_t PendingRemoval : 1;
};
+ using SymbolTable = DenseMap<SymbolStringPtr, SymbolTableEntry>;
+
JITDylib(ExecutionSession &ES, std::string Name);
Error defineImpl(MaterializationUnit &MU);
- SymbolNameSet lookupFlagsImpl(SymbolFlagsMap &Flags,
- const SymbolNameSet &Names);
+ Expected<SymbolNameSet> lookupFlagsImpl(SymbolFlagsMap &Flags,
+ const SymbolNameSet &Names);
- void lodgeQuery(std::shared_ptr<AsynchronousSymbolQuery> &Q,
- SymbolNameSet &Unresolved, bool MatchNonExported,
- MaterializationUnitList &MUs);
+ Error lodgeQuery(std::shared_ptr<AsynchronousSymbolQuery> &Q,
+ SymbolNameSet &Unresolved, bool MatchNonExported,
+ MaterializationUnitList &MUs);
void lodgeQueryImpl(std::shared_ptr<AsynchronousSymbolQuery> &Q,
SymbolNameSet &Unresolved, bool MatchNonExported,
MaterializationUnitList &MUs);
- LookupImplActionFlags
- lookupImpl(std::shared_ptr<AsynchronousSymbolQuery> &Q,
- std::vector<std::unique_ptr<MaterializationUnit>> &MUs,
- SymbolNameSet &Unresolved);
+ bool lookupImpl(std::shared_ptr<AsynchronousSymbolQuery> &Q,
+ std::vector<std::unique_ptr<MaterializationUnit>> &MUs,
+ SymbolNameSet &Unresolved);
void detachQueryHelper(AsynchronousSymbolQuery &Q,
const SymbolNameSet &QuerySymbols);
@@ -686,7 +742,7 @@ private:
ExecutionSession &ES;
std::string JITDylibName;
- SymbolMap Symbols;
+ SymbolTable Symbols;
UnmaterializedInfosMap UnmaterializedInfos;
MaterializingInfosMap MaterializingInfos;
GeneratorFunction DefGenerator;
@@ -727,7 +783,15 @@ public:
/// the ExecutionSession.
JITDylib &getMainJITDylib();
+ /// Return a pointer to the "name" JITDylib.
+ /// Ownership of JITDylib remains within Execution Session
+ JITDylib *getJITDylibByName(StringRef Name);
+
/// Add a new JITDylib to this ExecutionSession.
+ ///
+ /// The JITDylib Name is required to be unique. Clients should verify that
+ /// names are not being re-used (e.g. by calling getJITDylibByName) if names
+ /// are based on user input.
JITDylib &createJITDylib(std::string Name,
bool AddToMainDylibSearchOrder = true);
@@ -769,7 +833,7 @@ public:
/// Do not use -- this will be removed soon.
Expected<SymbolMap>
legacyLookup(LegacyAsyncLookupFunction AsyncLookup, SymbolNameSet Names,
- bool WaiUntilReady,
+ SymbolState RequiredState,
RegisterDependenciesFunction RegisterDependencies);
/// Search the given JITDylib list for the given symbols.
@@ -779,11 +843,8 @@ public:
/// (hidden visibility) symbols in that dylib (true means match against
/// non-exported symbols, false means do not match).
///
- /// The OnResolve callback will be called once all requested symbols are
- /// resolved, or if an error occurs prior to resolution.
- ///
- /// The OnReady callback will be called once all requested symbols are ready,
- /// or if an error occurs after resolution but before all symbols are ready.
+ /// The NotifyComplete callback will be called once all requested symbols
+ /// reach the required state.
///
/// If all symbols are found, the RegisterDependencies function will be called
/// while the session lock is held. This gives clients a chance to register
@@ -795,7 +856,7 @@ public:
/// client to get an address to call) then the value NoDependenciesToRegister
/// can be used.
void lookup(const JITDylibSearchList &SearchOrder, SymbolNameSet Symbols,
- SymbolsResolvedCallback OnResolve, SymbolsReadyCallback OnReady,
+ SymbolState RequiredState, SymbolsResolvedCallback NotifyComplete,
RegisterDependenciesFunction RegisterDependencies);
/// Blocking version of lookup above. Returns the resolved symbol map.
@@ -807,9 +868,9 @@ public:
/// error will be reported via reportErrors.
Expected<SymbolMap> lookup(const JITDylibSearchList &SearchOrder,
const SymbolNameSet &Symbols,
+ SymbolState RequiredState = SymbolState::Ready,
RegisterDependenciesFunction RegisterDependencies =
- NoDependenciesToRegister,
- bool WaitUntilReady = true);
+ NoDependenciesToRegister);
/// Convenience version of blocking lookup.
/// Searches each of the JITDylibs in the search order in turn for the given
@@ -832,10 +893,11 @@ public:
/// Materialize the given unit.
void dispatchMaterialization(JITDylib &JD,
std::unique_ptr<MaterializationUnit> MU) {
- LLVM_DEBUG(runSessionLocked([&]() {
- dbgs() << "Compiling, for " << JD.getName() << ", " << *MU
- << "\n";
- }););
+ LLVM_DEBUG({
+ runSessionLocked([&]() {
+ dbgs() << "Dispatching " << *MU << " for " << JD.getName() << "\n";
+ });
+ });
DispatchMaterialization(JD, std::move(MU));
}
diff --git a/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h b/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h
index 88559f822e5d..75865920c741 100644
--- a/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h
+++ b/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h
@@ -1,9 +1,8 @@
//===- ExecutionUtils.h - Utilities for executing code in Orc ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -98,7 +97,14 @@ class LegacyCtorDtorRunner {
public:
/// Construct a CtorDtorRunner for the given range using the given
/// name mangling function.
- LegacyCtorDtorRunner(std::vector<std::string> CtorDtorNames, VModuleKey K)
+ LLVM_ATTRIBUTE_DEPRECATED(
+ LegacyCtorDtorRunner(std::vector<std::string> CtorDtorNames,
+ VModuleKey K),
+ "ORCv1 utilities (utilities with the 'Legacy' prefix) are deprecated. "
+ "Please use the ORCv2 CtorDtorRunner utility instead");
+
+ LegacyCtorDtorRunner(ORCv1DeprecationAcknowledgement,
+ std::vector<std::string> CtorDtorNames, VModuleKey K)
: CtorDtorNames(std::move(CtorDtorNames)), K(K) {}
/// Run the recorded constructors/destructors through the given JIT
@@ -129,6 +135,11 @@ private:
orc::VModuleKey K;
};
+template <typename JITLayerT>
+LegacyCtorDtorRunner<JITLayerT>::LegacyCtorDtorRunner(
+ std::vector<std::string> CtorDtorNames, VModuleKey K)
+ : CtorDtorNames(std::move(CtorDtorNames)), K(K) {}
+
class CtorDtorRunner {
public:
CtorDtorRunner(JITDylib &JD) : JD(JD) {}
@@ -181,7 +192,14 @@ class LegacyLocalCXXRuntimeOverrides : public LocalCXXRuntimeOverridesBase {
public:
/// Create a runtime-overrides class.
template <typename MangleFtorT>
- LegacyLocalCXXRuntimeOverrides(const MangleFtorT &Mangle) {
+ LLVM_ATTRIBUTE_DEPRECATED(
+ LegacyLocalCXXRuntimeOverrides(const MangleFtorT &Mangle),
+ "ORCv1 utilities (utilities with the 'Legacy' prefix) are deprecated. "
+ "Please use the ORCv2 LocalCXXRuntimeOverrides utility instead");
+
+ template <typename MangleFtorT>
+ LegacyLocalCXXRuntimeOverrides(ORCv1DeprecationAcknowledgement,
+ const MangleFtorT &Mangle) {
addOverride(Mangle("__dso_handle"), toTargetAddress(&DSOHandleOverride));
addOverride(Mangle("__cxa_atexit"), toTargetAddress(&CXAAtExitOverride));
}
@@ -202,6 +220,13 @@ private:
StringMap<JITTargetAddress> CXXRuntimeOverrides;
};
+template <typename MangleFtorT>
+LegacyLocalCXXRuntimeOverrides::LegacyLocalCXXRuntimeOverrides(
+ const MangleFtorT &Mangle) {
+ addOverride(Mangle("__dso_handle"), toTargetAddress(&DSOHandleOverride));
+ addOverride(Mangle("__cxa_atexit"), toTargetAddress(&CXAAtExitOverride));
+}
+
class LocalCXXRuntimeOverrides : public LocalCXXRuntimeOverridesBase {
public:
Error enable(JITDylib &JD, MangleAndInterner &Mangler);
@@ -218,28 +243,29 @@ public:
/// Create a DynamicLibrarySearchGenerator that searches for symbols in the
/// given sys::DynamicLibrary.
+ ///
/// If the Allow predicate is given then only symbols matching the predicate
- /// will be searched for in the DynamicLibrary. If the predicate is not given
- /// then all symbols will be searched for.
- DynamicLibrarySearchGenerator(sys::DynamicLibrary Dylib, const DataLayout &DL,
+ /// will be searched for. If the predicate is not given then all symbols will
+ /// be searched for.
+ DynamicLibrarySearchGenerator(sys::DynamicLibrary Dylib, char GlobalPrefix,
SymbolPredicate Allow = SymbolPredicate());
/// Permanently loads the library at the given path and, on success, returns
/// a DynamicLibrarySearchGenerator that will search it for symbol definitions
/// in the library. On failure returns the reason the library failed to load.
static Expected<DynamicLibrarySearchGenerator>
- Load(const char *FileName, const DataLayout &DL,
+ Load(const char *FileName, char GlobalPrefix,
SymbolPredicate Allow = SymbolPredicate());
/// Creates a DynamicLibrarySearchGenerator that searches for symbols in
/// the current process.
static Expected<DynamicLibrarySearchGenerator>
- GetForCurrentProcess(const DataLayout &DL,
+ GetForCurrentProcess(char GlobalPrefix,
SymbolPredicate Allow = SymbolPredicate()) {
- return Load(nullptr, DL, std::move(Allow));
+ return Load(nullptr, GlobalPrefix, std::move(Allow));
}
- SymbolNameSet operator()(JITDylib &JD, const SymbolNameSet &Names);
+ Expected<SymbolNameSet> operator()(JITDylib &JD, const SymbolNameSet &Names);
private:
sys::DynamicLibrary Dylib;
diff --git a/include/llvm/ExecutionEngine/Orc/GlobalMappingLayer.h b/include/llvm/ExecutionEngine/Orc/GlobalMappingLayer.h
index a8a88d7cb2d2..a4e43d4e1c9c 100644
--- a/include/llvm/ExecutionEngine/Orc/GlobalMappingLayer.h
+++ b/include/llvm/ExecutionEngine/Orc/GlobalMappingLayer.h
@@ -1,9 +1,8 @@
//===- GlobalMappingLayer.h - Run all IR through a functor ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h b/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h
index 30d71e69cd70..52223a83ad42 100644
--- a/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h
+++ b/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h
@@ -1,9 +1,8 @@
//===- IRCompileLayer.h -- Eagerly compile IR for JIT -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -64,8 +63,18 @@ public:
/// Construct an LegacyIRCompileLayer with the given BaseLayer, which must
/// implement the ObjectLayer concept.
+ LLVM_ATTRIBUTE_DEPRECATED(
+ LegacyIRCompileLayer(
+ BaseLayerT &BaseLayer, CompileFtor Compile,
+ NotifyCompiledCallback NotifyCompiled = NotifyCompiledCallback()),
+ "ORCv1 layers (layers with the 'Legacy' prefix) are deprecated. Please "
+ "use "
+ "the ORCv2 IRCompileLayer instead");
+
+ /// Legacy layer constructor with deprecation acknowledgement.
LegacyIRCompileLayer(
- BaseLayerT &BaseLayer, CompileFtor Compile,
+ ORCv1DeprecationAcknowledgement, BaseLayerT &BaseLayer,
+ CompileFtor Compile,
NotifyCompiledCallback NotifyCompiled = NotifyCompiledCallback())
: BaseLayer(BaseLayer), Compile(std::move(Compile)),
NotifyCompiled(std::move(NotifyCompiled)) {}
@@ -123,8 +132,14 @@ private:
NotifyCompiledCallback NotifyCompiled;
};
-} // end namespace orc
+template <typename BaseLayerT, typename CompileFtor>
+LegacyIRCompileLayer<BaseLayerT, CompileFtor>::LegacyIRCompileLayer(
+ BaseLayerT &BaseLayer, CompileFtor Compile,
+ NotifyCompiledCallback NotifyCompiled)
+ : BaseLayer(BaseLayer), Compile(std::move(Compile)),
+ NotifyCompiled(std::move(NotifyCompiled)) {}
+} // end namespace orc
} // end namespace llvm
#endif // LLVM_EXECUTIONENGINE_ORC_IRCOMPILINGLAYER_H
diff --git a/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h b/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h
index 49e65b9f2a80..1b4c8b6cd95f 100644
--- a/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h
+++ b/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h
@@ -1,9 +1,8 @@
//===- IRTransformLayer.h - Run all IR through a functor --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -57,9 +56,17 @@ class LegacyIRTransformLayer {
public:
/// Construct an LegacyIRTransformLayer with the given BaseLayer
- LegacyIRTransformLayer(BaseLayerT &BaseLayer,
- TransformFtor Transform = TransformFtor())
- : BaseLayer(BaseLayer), Transform(std::move(Transform)) {}
+ LLVM_ATTRIBUTE_DEPRECATED(
+ LegacyIRTransformLayer(BaseLayerT &BaseLayer,
+ TransformFtor Transform = TransformFtor()),
+ "ORCv1 layers (layers with the 'Legacy' prefix) are deprecated. Please "
+ "use "
+ "the ORCv2 IRTransformLayer instead");
+
+ /// Legacy layer constructor with deprecation acknowledgement.
+ LegacyIRTransformLayer(ORCv1DeprecationAcknowledgement, BaseLayerT &BaseLayer,
+ TransformFtor Transform = TransformFtor())
+ : BaseLayer(BaseLayer), Transform(std::move(Transform)) {}
/// Apply the transform functor to the module, then add the module to
/// the layer below, along with the memory manager and symbol resolver.
@@ -109,6 +116,11 @@ private:
TransformFtor Transform;
};
+template <typename BaseLayerT, typename TransformFtor>
+LegacyIRTransformLayer<BaseLayerT, TransformFtor>::LegacyIRTransformLayer(
+ BaseLayerT &BaseLayer, TransformFtor Transform)
+ : BaseLayer(BaseLayer), Transform(std::move(Transform)) {}
+
} // end namespace orc
} // end namespace llvm
diff --git a/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h b/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h
index c2527802f6a7..a7ed5372d1e4 100644
--- a/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h
+++ b/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h
@@ -1,9 +1,8 @@
//===- IndirectionUtils.h - Utilities for adding indirections ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -147,13 +146,13 @@ private:
std::error_code EC;
auto TrampolineBlock =
sys::OwningMemoryBlock(sys::Memory::allocateMappedMemory(
- sys::Process::getPageSize(), nullptr,
+ sys::Process::getPageSizeEstimate(), nullptr,
sys::Memory::MF_READ | sys::Memory::MF_WRITE, EC));
if (EC)
return errorCodeToError(EC);
unsigned NumTrampolines =
- (sys::Process::getPageSize() - ORCABI::PointerSize) /
+ (sys::Process::getPageSizeEstimate() - ORCABI::PointerSize) /
ORCABI::TrampolineSize;
uint8_t *TrampolineMem = static_cast<uint8_t *>(TrampolineBlock.base());
diff --git a/include/llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h b/include/llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h
index eb9b6bf2dea6..bcbd72e68f15 100644
--- a/include/llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h
+++ b/include/llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h
@@ -1,9 +1,8 @@
//===- JITTargetMachineBuilder.h - Build TargetMachines for JIT -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/ExecutionEngine/Orc/LLJIT.h b/include/llvm/ExecutionEngine/Orc/LLJIT.h
index ce3e5d519c73..0aac1916423f 100644
--- a/include/llvm/ExecutionEngine/Orc/LLJIT.h
+++ b/include/llvm/ExecutionEngine/Orc/LLJIT.h
@@ -1,9 +1,8 @@
//===----- LLJIT.h -- An ORC-based JIT for compiling LLVM IR ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for 3Bdetails.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -21,35 +20,49 @@
#include "llvm/ExecutionEngine/Orc/IRTransformLayer.h"
#include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h"
#include "llvm/ExecutionEngine/Orc/ObjectTransformLayer.h"
-#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
#include "llvm/ExecutionEngine/Orc/ThreadSafeModule.h"
#include "llvm/Support/ThreadPool.h"
namespace llvm {
namespace orc {
+class LLJITBuilderState;
+class LLLazyJITBuilderState;
+
/// A pre-fabricated ORC JIT stack that can serve as an alternative to MCJIT.
+///
+/// Create instances using LLJITBuilder.
class LLJIT {
+ template <typename, typename, typename> friend class LLJITBuilderSetters;
+
public:
+ static Expected<std::unique_ptr<LLJIT>> Create(LLJITBuilderState &S);
/// Destruct this instance. If a multi-threaded instance, waits for all
/// compile threads to complete.
~LLJIT();
- /// Create an LLJIT instance.
- /// If NumCompileThreads is not equal to zero, creates a multi-threaded
- /// LLJIT with the given number of compile threads.
- static Expected<std::unique_ptr<LLJIT>>
- Create(JITTargetMachineBuilder JTMB, DataLayout DL,
- unsigned NumCompileThreads = 0);
-
/// Returns the ExecutionSession for this instance.
ExecutionSession &getExecutionSession() { return *ES; }
+ /// Returns a reference to the DataLayout for this instance.
+ const DataLayout &getDataLayout() const { return DL; }
+
/// Returns a reference to the JITDylib representing the JIT'd main program.
JITDylib &getMainJITDylib() { return Main; }
+ /// Returns the JITDylib with the given name, or nullptr if no JITDylib with
+ /// that name exists.
+ JITDylib *getJITDylibByName(StringRef Name) {
+ return ES->getJITDylibByName(Name);
+ }
+
/// Create a new JITDylib with the given name and return a reference to it.
+ ///
+ /// JITDylib names must be unique. If the given name is derived from user
+ /// input or elsewhere in the environment then the client should check
+ /// (e.g. by calling getJITDylibByName) that the given name is not already in
+ /// use.
JITDylib &createJITDylib(std::string Name) {
return ES->createJITDylib(std::move(Name));
}
@@ -57,8 +70,6 @@ public:
/// Convenience method for defining an absolute symbol.
Error defineAbsolute(StringRef Name, JITEvaluatedSymbol Address);
- /// Convenience method for defining an
-
/// Adds an IR module to the given JITDylib.
Error addIRModule(JITDylib &JD, ThreadSafeModule TSM);
@@ -104,17 +115,17 @@ public:
Error runDestructors() { return DtorRunner.run(); }
/// Returns a reference to the ObjLinkingLayer
- RTDyldObjectLinkingLayer &getObjLinkingLayer() { return ObjLinkingLayer; }
+ ObjectLayer &getObjLinkingLayer() { return *ObjLinkingLayer; }
protected:
+ static std::unique_ptr<ObjectLayer>
+ createObjectLinkingLayer(LLJITBuilderState &S, ExecutionSession &ES);
- /// Create an LLJIT instance with a single compile thread.
- LLJIT(std::unique_ptr<ExecutionSession> ES, std::unique_ptr<TargetMachine> TM,
- DataLayout DL);
+ static Expected<IRCompileLayer::CompileFunction>
+ createCompileFunction(LLJITBuilderState &S, JITTargetMachineBuilder JTMB);
- /// Create an LLJIT instance with multiple compile threads.
- LLJIT(std::unique_ptr<ExecutionSession> ES, JITTargetMachineBuilder JTMB,
- DataLayout DL, unsigned NumCompileThreads);
+ /// Create an LLJIT instance with a single compile thread.
+ LLJIT(LLJITBuilderState &S, Error &Err);
std::string mangle(StringRef UnmangledName);
@@ -128,8 +139,8 @@ protected:
DataLayout DL;
std::unique_ptr<ThreadPool> CompileThreads;
- RTDyldObjectLinkingLayer ObjLinkingLayer;
- IRCompileLayer CompileLayer;
+ std::unique_ptr<ObjectLayer> ObjLinkingLayer;
+ std::unique_ptr<IRCompileLayer> CompileLayer;
CtorDtorRunner CtorRunner, DtorRunner;
};
@@ -137,25 +148,20 @@ protected:
/// An extended version of LLJIT that supports lazy function-at-a-time
/// compilation of LLVM IR.
class LLLazyJIT : public LLJIT {
-public:
+ template <typename, typename, typename> friend class LLJITBuilderSetters;
- /// Create an LLLazyJIT instance.
- /// If NumCompileThreads is not equal to zero, creates a multi-threaded
- /// LLLazyJIT with the given number of compile threads.
- static Expected<std::unique_ptr<LLLazyJIT>>
- Create(JITTargetMachineBuilder JTMB, DataLayout DL,
- JITTargetAddress ErrorAddr, unsigned NumCompileThreads = 0);
+public:
/// Set an IR transform (e.g. pass manager pipeline) to run on each function
/// when it is compiled.
void setLazyCompileTransform(IRTransformLayer::TransformFunction Transform) {
- TransformLayer.setTransform(std::move(Transform));
+ TransformLayer->setTransform(std::move(Transform));
}
/// Sets the partition function.
void
setPartitionFunction(CompileOnDemandLayer::PartitionFunction Partition) {
- CODLayer.setPartitionFunction(std::move(Partition));
+ CODLayer->setPartitionFunction(std::move(Partition));
}
/// Add a module to be lazily compiled to JITDylib JD.
@@ -169,24 +175,160 @@ public:
private:
// Create a single-threaded LLLazyJIT instance.
- LLLazyJIT(std::unique_ptr<ExecutionSession> ES,
- std::unique_ptr<TargetMachine> TM, DataLayout DL,
- std::unique_ptr<LazyCallThroughManager> LCTMgr,
- std::function<std::unique_ptr<IndirectStubsManager>()> ISMBuilder);
+ LLLazyJIT(LLLazyJITBuilderState &S, Error &Err);
+
+ std::unique_ptr<LazyCallThroughManager> LCTMgr;
+ std::unique_ptr<IRTransformLayer> TransformLayer;
+ std::unique_ptr<CompileOnDemandLayer> CODLayer;
+};
+
+class LLJITBuilderState {
+public:
+ using ObjectLinkingLayerCreator =
+ std::function<std::unique_ptr<ObjectLayer>(ExecutionSession &)>;
+
+ using CompileFunctionCreator =
+ std::function<Expected<IRCompileLayer::CompileFunction>(
+ JITTargetMachineBuilder JTMB)>;
+
+ std::unique_ptr<ExecutionSession> ES;
+ Optional<JITTargetMachineBuilder> JTMB;
+ ObjectLinkingLayerCreator CreateObjectLinkingLayer;
+ CompileFunctionCreator CreateCompileFunction;
+ unsigned NumCompileThreads = 0;
- // Create a multi-threaded LLLazyJIT instance.
- LLLazyJIT(std::unique_ptr<ExecutionSession> ES, JITTargetMachineBuilder JTMB,
- DataLayout DL, unsigned NumCompileThreads,
- std::unique_ptr<LazyCallThroughManager> LCTMgr,
- std::function<std::unique_ptr<IndirectStubsManager>()> ISMBuilder);
+ /// Called prior to JIT class construcion to fix up defaults.
+ Error prepareForConstruction();
+};
+
+template <typename JITType, typename SetterImpl, typename State>
+class LLJITBuilderSetters {
+public:
+ /// Set the JITTargetMachineBuilder for this instance.
+ ///
+ /// If this method is not called, JITTargetMachineBuilder::detectHost will be
+ /// used to construct a default target machine builder for the host platform.
+ SetterImpl &setJITTargetMachineBuilder(JITTargetMachineBuilder JTMB) {
+ impl().JTMB = std::move(JTMB);
+ return impl();
+ }
+
+ /// Return a reference to the JITTargetMachineBuilder.
+ ///
+ Optional<JITTargetMachineBuilder> &getJITTargetMachineBuilder() {
+ return impl().JTMB;
+ }
+ /// Set an ObjectLinkingLayer creation function.
+ ///
+ /// If this method is not called, a default creation function will be used
+ /// that will construct an RTDyldObjectLinkingLayer.
+ SetterImpl &setObjectLinkingLayerCreator(
+ LLJITBuilderState::ObjectLinkingLayerCreator CreateObjectLinkingLayer) {
+ impl().CreateObjectLinkingLayer = std::move(CreateObjectLinkingLayer);
+ return impl();
+ }
+
+ /// Set a CompileFunctionCreator.
+ ///
+ /// If this method is not called, a default creation function wil be used
+ /// that will construct a basic IR compile function that is compatible with
+ /// the selected number of threads (SimpleCompiler for '0' compile threads,
+ /// ConcurrentIRCompiler otherwise).
+ SetterImpl &setCompileFunctionCreator(
+ LLJITBuilderState::CompileFunctionCreator CreateCompileFunction) {
+ impl().CreateCompileFunction = std::move(CreateCompileFunction);
+ return impl();
+ }
+
+ /// Set the number of compile threads to use.
+ ///
+ /// If set to zero, compilation will be performed on the execution thread when
+ /// JITing in-process. If set to any other number N, a thread pool of N
+ /// threads will be created for compilation.
+ ///
+ /// If this method is not called, behavior will be as if it were called with
+ /// a zero argument.
+ SetterImpl &setNumCompileThreads(unsigned NumCompileThreads) {
+ impl().NumCompileThreads = NumCompileThreads;
+ return impl();
+ }
+
+ /// Create an instance of the JIT.
+ Expected<std::unique_ptr<JITType>> create() {
+ if (auto Err = impl().prepareForConstruction())
+ return std::move(Err);
+
+ Error Err = Error::success();
+ std::unique_ptr<JITType> J(new JITType(impl(), Err));
+ if (Err)
+ return std::move(Err);
+ return std::move(J);
+ }
+
+protected:
+ SetterImpl &impl() { return static_cast<SetterImpl &>(*this); }
+};
+
+/// Constructs LLJIT instances.
+class LLJITBuilder
+ : public LLJITBuilderState,
+ public LLJITBuilderSetters<LLJIT, LLJITBuilder, LLJITBuilderState> {};
+
+class LLLazyJITBuilderState : public LLJITBuilderState {
+ friend class LLLazyJIT;
+
+public:
+ using IndirectStubsManagerBuilderFunction =
+ std::function<std::unique_ptr<IndirectStubsManager>()>;
+
+ Triple TT;
+ JITTargetAddress LazyCompileFailureAddr = 0;
std::unique_ptr<LazyCallThroughManager> LCTMgr;
- std::function<std::unique_ptr<IndirectStubsManager>()> ISMBuilder;
+ IndirectStubsManagerBuilderFunction ISMBuilder;
+
+ Error prepareForConstruction();
+};
+
+template <typename JITType, typename SetterImpl, typename State>
+class LLLazyJITBuilderSetters
+ : public LLJITBuilderSetters<JITType, SetterImpl, State> {
+public:
+ /// Set the address in the target address to call if a lazy compile fails.
+ ///
+ /// If this method is not called then the value will default to 0.
+ SetterImpl &setLazyCompileFailureAddr(JITTargetAddress Addr) {
+ this->impl().LazyCompileFailureAddr = Addr;
+ return this->impl();
+ }
+
+ /// Set the lazy-callthrough manager.
+ ///
+ /// If this method is not called then a default, in-process lazy callthrough
+ /// manager for the host platform will be used.
+ SetterImpl &
+ setLazyCallthroughManager(std::unique_ptr<LazyCallThroughManager> LCTMgr) {
+ this->impl().LCTMgr = std::move(LCTMgr);
+ return this->impl();
+ }
- IRTransformLayer TransformLayer;
- CompileOnDemandLayer CODLayer;
+ /// Set the IndirectStubsManager builder function.
+ ///
+ /// If this method is not called then a default, in-process
+ /// IndirectStubsManager builder for the host platform will be used.
+ SetterImpl &setIndirectStubsManagerBuilder(
+ LLLazyJITBuilderState::IndirectStubsManagerBuilderFunction ISMBuilder) {
+ this->impl().ISMBuilder = std::move(ISMBuilder);
+ return this->impl();
+ }
};
+/// Constructs LLLazyJIT instances.
+class LLLazyJITBuilder
+ : public LLLazyJITBuilderState,
+ public LLLazyJITBuilderSetters<LLLazyJIT, LLLazyJITBuilder,
+ LLLazyJITBuilderState> {};
+
} // End namespace orc
} // End namespace llvm
diff --git a/include/llvm/ExecutionEngine/Orc/LambdaResolver.h b/include/llvm/ExecutionEngine/Orc/LambdaResolver.h
index 7b6f3d2f92ab..855e31b33549 100644
--- a/include/llvm/ExecutionEngine/Orc/LambdaResolver.h
+++ b/include/llvm/ExecutionEngine/Orc/LambdaResolver.h
@@ -1,9 +1,8 @@
//===- LambdaResolverMM - Redirect symbol lookup via a functor --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -25,7 +24,15 @@ namespace orc {
template <typename DylibLookupFtorT, typename ExternalLookupFtorT>
class LambdaResolver : public LegacyJITSymbolResolver {
public:
- LambdaResolver(DylibLookupFtorT DylibLookupFtor,
+ LLVM_ATTRIBUTE_DEPRECATED(
+ LambdaResolver(DylibLookupFtorT DylibLookupFtor,
+ ExternalLookupFtorT ExternalLookupFtor),
+ "ORCv1 utilities (including resolvers) are deprecated and will be "
+ "removed "
+ "in the next release. Please use ORCv2 (see docs/ORCv2.rst)");
+
+ LambdaResolver(ORCv1DeprecationAcknowledgement,
+ DylibLookupFtorT DylibLookupFtor,
ExternalLookupFtorT ExternalLookupFtor)
: DylibLookupFtor(DylibLookupFtor),
ExternalLookupFtor(ExternalLookupFtor) {}
@@ -43,6 +50,12 @@ private:
ExternalLookupFtorT ExternalLookupFtor;
};
+template <typename DylibLookupFtorT, typename ExternalLookupFtorT>
+LambdaResolver<DylibLookupFtorT, ExternalLookupFtorT>::LambdaResolver(
+ DylibLookupFtorT DylibLookupFtor, ExternalLookupFtorT ExternalLookupFtor)
+ : DylibLookupFtor(DylibLookupFtor), ExternalLookupFtor(ExternalLookupFtor) {
+}
+
template <typename DylibLookupFtorT,
typename ExternalLookupFtorT>
std::shared_ptr<LambdaResolver<DylibLookupFtorT, ExternalLookupFtorT>>
@@ -53,6 +66,17 @@ createLambdaResolver(DylibLookupFtorT DylibLookupFtor,
std::move(ExternalLookupFtor));
}
+template <typename DylibLookupFtorT, typename ExternalLookupFtorT>
+std::shared_ptr<LambdaResolver<DylibLookupFtorT, ExternalLookupFtorT>>
+createLambdaResolver(ORCv1DeprecationAcknowledgement,
+ DylibLookupFtorT DylibLookupFtor,
+ ExternalLookupFtorT ExternalLookupFtor) {
+ using LR = LambdaResolver<DylibLookupFtorT, ExternalLookupFtorT>;
+ return make_unique<LR>(AcknowledgeORCv1Deprecation,
+ std::move(DylibLookupFtor),
+ std::move(ExternalLookupFtor));
+}
+
} // end namespace orc
} // end namespace llvm
diff --git a/include/llvm/ExecutionEngine/Orc/Layer.h b/include/llvm/ExecutionEngine/Orc/Layer.h
index cd797445a2e6..8f9bd704395e 100644
--- a/include/llvm/ExecutionEngine/Orc/Layer.h
+++ b/include/llvm/ExecutionEngine/Orc/Layer.h
@@ -1,9 +1,8 @@
//===---------------- Layer.h -- Layer interfaces --------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/ExecutionEngine/Orc/LazyEmittingLayer.h b/include/llvm/ExecutionEngine/Orc/LazyEmittingLayer.h
index 46761b0ca7e1..16202d89f861 100644
--- a/include/llvm/ExecutionEngine/Orc/LazyEmittingLayer.h
+++ b/include/llvm/ExecutionEngine/Orc/LazyEmittingLayer.h
@@ -1,9 +1,8 @@
//===- LazyEmittingLayer.h - Lazily emit IR to lower JIT layers -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -35,8 +34,8 @@ namespace orc {
/// Lazy-emitting IR layer.
///
-/// This layer accepts LLVM IR Modules (via addModule), but does not
-/// immediately emit them the layer below. Instead, emissing to the base layer
+/// This layer accepts LLVM IR Modules (via addModule) but does not
+/// immediately emit them the layer below. Instead, emission to the base layer
/// is deferred until the first time the client requests the address (via
/// JITSymbol::getAddress) for a symbol contained in this layer.
template <typename BaseLayerT> class LazyEmittingLayer {
@@ -197,7 +196,14 @@ private:
public:
/// Construct a lazy emitting layer.
- LazyEmittingLayer(BaseLayerT &BaseLayer) : BaseLayer(BaseLayer) {}
+ LLVM_ATTRIBUTE_DEPRECATED(
+ LazyEmittingLayer(BaseLayerT &BaseLayer),
+ "ORCv1 layers (including LazyEmittingLayer) are deprecated. Please use "
+ "ORCv2, where lazy emission is the default");
+
+ /// Construct a lazy emitting layer.
+ LazyEmittingLayer(ORCv1DeprecationAcknowledgement, BaseLayerT &BaseLayer)
+ : BaseLayer(BaseLayer) {}
/// Add the given module to the lazy emitting layer.
Error addModule(VModuleKey K, std::unique_ptr<Module> M) {
@@ -255,6 +261,10 @@ public:
}
};
+template <typename BaseLayerT>
+LazyEmittingLayer<BaseLayerT>::LazyEmittingLayer(BaseLayerT &BaseLayer)
+ : BaseLayer(BaseLayer) {}
+
} // end namespace orc
} // end namespace llvm
diff --git a/include/llvm/ExecutionEngine/Orc/LazyReexports.h b/include/llvm/ExecutionEngine/Orc/LazyReexports.h
index b5041325bce2..9fdd1d15f782 100644
--- a/include/llvm/ExecutionEngine/Orc/LazyReexports.h
+++ b/include/llvm/ExecutionEngine/Orc/LazyReexports.h
@@ -1,9 +1,8 @@
//===------ LazyReexports.h -- Utilities for lazy reexports -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/ExecutionEngine/Orc/Legacy.h b/include/llvm/ExecutionEngine/Orc/Legacy.h
index 4c6162ac4b8b..f9cbbf6ff180 100644
--- a/include/llvm/ExecutionEngine/Orc/Legacy.h
+++ b/include/llvm/ExecutionEngine/Orc/Legacy.h
@@ -1,9 +1,8 @@
//===--- Legacy.h -- Adapters for ExecutionEngine API interop ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -149,8 +148,8 @@ lookupWithLegacyFn(ExecutionSession &ES, AsynchronousSymbolQuery &Query,
for (auto &S : Symbols) {
if (JITSymbol Sym = FindSymbol(*S)) {
if (auto Addr = Sym.getAddress()) {
- Query.resolve(S, JITEvaluatedSymbol(*Addr, Sym.getFlags()));
- Query.notifySymbolReady();
+ Query.notifySymbolMetRequiredState(
+ S, JITEvaluatedSymbol(*Addr, Sym.getFlags()));
NewSymbolsResolved = true;
} else {
ES.legacyFailQuery(Query, Addr.takeError());
@@ -163,11 +162,8 @@ lookupWithLegacyFn(ExecutionSession &ES, AsynchronousSymbolQuery &Query,
SymbolsNotFound.insert(S);
}
- if (NewSymbolsResolved && Query.isFullyResolved())
- Query.handleFullyResolved();
-
- if (NewSymbolsResolved && Query.isFullyReady())
- Query.handleFullyReady();
+ if (NewSymbolsResolved && Query.isComplete())
+ Query.handleComplete();
return SymbolsNotFound;
}
diff --git a/include/llvm/ExecutionEngine/Orc/NullResolver.h b/include/llvm/ExecutionEngine/Orc/NullResolver.h
index 03fefb69a928..ffa37a13d064 100644
--- a/include/llvm/ExecutionEngine/Orc/NullResolver.h
+++ b/include/llvm/ExecutionEngine/Orc/NullResolver.h
@@ -1,9 +1,8 @@
//===------ NullResolver.h - Reject symbol lookup requests ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h b/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h
new file mode 100644
index 000000000000..c1e7d27f446e
--- /dev/null
+++ b/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h
@@ -0,0 +1,165 @@
+//===-- ObjectLinkingLayer.h - JITLink-based jit linking layer --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Contains the definition for an JITLink-based, in-process object linking
+// layer.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_OBJECTLINKINGLAYER_H
+#define LLVM_EXECUTIONENGINE_ORC_OBJECTLINKINGLAYER_H
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ExecutionEngine/JITLink/JITLink.h"
+#include "llvm/ExecutionEngine/JITSymbol.h"
+#include "llvm/ExecutionEngine/Orc/Core.h"
+#include "llvm/ExecutionEngine/Orc/Layer.h"
+#include "llvm/Support/Error.h"
+#include <algorithm>
+#include <cassert>
+#include <functional>
+#include <list>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+namespace llvm {
+
+namespace jitlink {
+class EHFrameRegistrar;
+} // namespace jitlink
+
+namespace object {
+class ObjectFile;
+} // namespace object
+
+namespace orc {
+
+class ObjectLinkingLayerJITLinkContext;
+
+/// An ObjectLayer implementation built on JITLink.
+///
+/// Clients can use this class to add relocatable object files to an
+/// ExecutionSession, and it typically serves as the base layer (underneath
+/// a compiling layer like IRCompileLayer) for the rest of the JIT.
+class ObjectLinkingLayer : public ObjectLayer {
+ friend class ObjectLinkingLayerJITLinkContext;
+
+public:
+ /// Plugin instances can be added to the ObjectLinkingLayer to receive
+ /// callbacks when code is loaded or emitted, and when JITLink is being
+ /// configured.
+ class Plugin {
+ public:
+ virtual ~Plugin();
+ virtual void modifyPassConfig(MaterializationResponsibility &MR,
+ const Triple &TT,
+ jitlink::PassConfiguration &Config) {}
+ virtual void notifyLoaded(MaterializationResponsibility &MR) {}
+ virtual Error notifyEmitted(MaterializationResponsibility &MR) {
+ return Error::success();
+ }
+ virtual Error notifyRemovingModule(VModuleKey K) {
+ return Error::success();
+ }
+ virtual Error notifyRemovingAllModules() { return Error::success(); }
+ };
+
+ /// Construct an ObjectLinkingLayer with the given NotifyLoaded,
+ /// and NotifyEmitted functors.
+ ObjectLinkingLayer(ExecutionSession &ES,
+ jitlink::JITLinkMemoryManager &MemMgr);
+
+ /// Destruct an ObjectLinkingLayer.
+ ~ObjectLinkingLayer();
+
+ /// Add a pass-config modifier.
+ ObjectLinkingLayer &addPlugin(std::unique_ptr<Plugin> P) {
+ std::lock_guard<std::mutex> Lock(LayerMutex);
+ Plugins.push_back(std::move(P));
+ return *this;
+ }
+
+ /// Emit the object.
+ void emit(MaterializationResponsibility R,
+ std::unique_ptr<MemoryBuffer> O) override;
+
+ /// Instructs this ObjectLinkingLayer instance to override the symbol flags
+ /// found in the AtomGraph with the flags supplied by the
+ /// MaterializationResponsibility instance. This is a workaround to support
+ /// symbol visibility in COFF, which does not use the libObject's
+ /// SF_Exported flag. Use only when generating / adding COFF object files.
+ ///
+ /// FIXME: We should be able to remove this if/when COFF properly tracks
+ /// exported symbols.
+ ObjectLinkingLayer &
+ setOverrideObjectFlagsWithResponsibilityFlags(bool OverrideObjectFlags) {
+ this->OverrideObjectFlags = OverrideObjectFlags;
+ return *this;
+ }
+
+ /// If set, this ObjectLinkingLayer instance will claim responsibility
+ /// for any symbols provided by a given object file that were not already in
+ /// the MaterializationResponsibility instance. Setting this flag allows
+ /// higher-level program representations (e.g. LLVM IR) to be added based on
+ /// only a subset of the symbols they provide, without having to write
+ /// intervening layers to scan and add the additional symbols. This trades
+ /// diagnostic quality for convenience however: If all symbols are enumerated
+ /// up-front then clashes can be detected and reported early (and usually
+ /// deterministically). If this option is set, clashes for the additional
+ /// symbols may not be detected until late, and detection may depend on
+ /// the flow of control through JIT'd code. Use with care.
+ ObjectLinkingLayer &
+ setAutoClaimResponsibilityForObjectSymbols(bool AutoClaimObjectSymbols) {
+ this->AutoClaimObjectSymbols = AutoClaimObjectSymbols;
+ return *this;
+ }
+
+private:
+ using AllocPtr = std::unique_ptr<jitlink::JITLinkMemoryManager::Allocation>;
+
+ void modifyPassConfig(MaterializationResponsibility &MR, const Triple &TT,
+ jitlink::PassConfiguration &PassConfig);
+ void notifyLoaded(MaterializationResponsibility &MR);
+ Error notifyEmitted(MaterializationResponsibility &MR, AllocPtr Alloc);
+
+ Error removeModule(VModuleKey K);
+ Error removeAllModules();
+
+ mutable std::mutex LayerMutex;
+ jitlink::JITLinkMemoryManager &MemMgr;
+ bool OverrideObjectFlags = false;
+ bool AutoClaimObjectSymbols = false;
+ DenseMap<VModuleKey, AllocPtr> TrackedAllocs;
+ std::vector<AllocPtr> UntrackedAllocs;
+ std::vector<std::unique_ptr<Plugin>> Plugins;
+};
+
+class EHFrameRegistrationPlugin : public ObjectLinkingLayer::Plugin {
+public:
+ EHFrameRegistrationPlugin(jitlink::EHFrameRegistrar &Registrar);
+ Error notifyEmitted(MaterializationResponsibility &MR) override;
+ void modifyPassConfig(MaterializationResponsibility &MR, const Triple &TT,
+ jitlink::PassConfiguration &PassConfig) override;
+ Error notifyRemovingModule(VModuleKey K) override;
+ Error notifyRemovingAllModules() override;
+
+private:
+ jitlink::EHFrameRegistrar &Registrar;
+ DenseMap<MaterializationResponsibility *, JITTargetAddress> InProcessLinks;
+ DenseMap<VModuleKey, JITTargetAddress> TrackedEHFrameAddrs;
+ std::vector<JITTargetAddress> UntrackedEHFrameAddrs;
+};
+
+} // end namespace orc
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_OBJECTLINKINGLAYER_H
diff --git a/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h b/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h
index 44d6b490e19d..eac1cc3e097a 100644
--- a/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h
+++ b/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h
@@ -1,9 +1,8 @@
//===- ObjectTransformLayer.h - Run all objects through functor -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -49,7 +48,16 @@ template <typename BaseLayerT, typename TransformFtor>
class LegacyObjectTransformLayer {
public:
/// Construct an ObjectTransformLayer with the given BaseLayer
- LegacyObjectTransformLayer(BaseLayerT &BaseLayer,
+ LLVM_ATTRIBUTE_DEPRECATED(
+ LegacyObjectTransformLayer(BaseLayerT &BaseLayer,
+ TransformFtor Transform = TransformFtor()),
+ "ORCv1 layers (layers with the 'Legacy' prefix) are deprecated. Please "
+ "use "
+ "the ORCv2 ObjectTransformLayer instead");
+
+ /// Legacy layer constructor with deprecation acknowledgement.
+ LegacyObjectTransformLayer(ORCv1DeprecationAcknowledgement,
+ BaseLayerT &BaseLayer,
TransformFtor Transform = TransformFtor())
: BaseLayer(BaseLayer), Transform(std::move(Transform)) {}
@@ -108,6 +116,11 @@ private:
TransformFtor Transform;
};
+template <typename BaseLayerT, typename TransformFtor>
+LegacyObjectTransformLayer<BaseLayerT, TransformFtor>::
+ LegacyObjectTransformLayer(BaseLayerT &BaseLayer, TransformFtor Transform)
+ : BaseLayer(BaseLayer), Transform(std::move(Transform)) {}
+
} // end namespace orc
} // end namespace llvm
diff --git a/include/llvm/ExecutionEngine/Orc/OrcABISupport.h b/include/llvm/ExecutionEngine/Orc/OrcABISupport.h
index a70fc373713d..38246bc480b6 100644
--- a/include/llvm/ExecutionEngine/Orc/OrcABISupport.h
+++ b/include/llvm/ExecutionEngine/Orc/OrcABISupport.h
@@ -1,9 +1,8 @@
//===- OrcABISupport.h - ABI support code -----------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/ExecutionEngine/Orc/OrcError.h b/include/llvm/ExecutionEngine/Orc/OrcError.h
index dc60e8d74e97..e5d6a3eca85f 100644
--- a/include/llvm/ExecutionEngine/Orc/OrcError.h
+++ b/include/llvm/ExecutionEngine/Orc/OrcError.h
@@ -1,9 +1,8 @@
//===------ OrcError.h - Reject symbol lookup requests ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h b/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h
index 3e07f5cf3742..8b875b7906e1 100644
--- a/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h
+++ b/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h
@@ -1,9 +1,8 @@
//===- OrcRemoteTargetClient.h - Orc Remote-target Client -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h b/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h
index 8db9e317a18a..e7b598d8f812 100644
--- a/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h
+++ b/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h
@@ -1,9 +1,8 @@
//===- OrcRemoteTargetRPCAPI.h - Orc Remote-target RPC API ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetServer.h b/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetServer.h
index acbc1682fa5d..4c8e2ea1a7be 100644
--- a/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetServer.h
+++ b/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetServer.h
@@ -1,9 +1,8 @@
//===- OrcRemoteTargetServer.h - Orc Remote-target Server -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -300,13 +299,13 @@ private:
std::error_code EC;
auto TrampolineBlock =
sys::OwningMemoryBlock(sys::Memory::allocateMappedMemory(
- sys::Process::getPageSize(), nullptr,
+ sys::Process::getPageSizeEstimate(), nullptr,
sys::Memory::MF_READ | sys::Memory::MF_WRITE, EC));
if (EC)
return errorCodeToError(EC);
uint32_t NumTrampolines =
- (sys::Process::getPageSize() - TargetT::PointerSize) /
+ (sys::Process::getPageSizeEstimate() - TargetT::PointerSize) /
TargetT::TrampolineSize;
uint8_t *TrampolineMem = static_cast<uint8_t *>(TrampolineBlock.base());
@@ -336,7 +335,7 @@ private:
handleGetRemoteInfo() {
std::string ProcessTriple = sys::getProcessTriple();
uint32_t PointerSize = TargetT::PointerSize;
- uint32_t PageSize = sys::Process::getPageSize();
+ uint32_t PageSize = sys::Process::getPageSizeEstimate();
uint32_t TrampolineSize = TargetT::TrampolineSize;
uint32_t IndirectStubSize = TargetT::IndirectStubsInfo::StubSize;
LLVM_DEBUG(dbgs() << " Remote info:\n"
diff --git a/include/llvm/ExecutionEngine/Orc/RPCSerialization.h b/include/llvm/ExecutionEngine/Orc/RPCSerialization.h
index 1e5f6ced597a..07c7471afc6a 100644
--- a/include/llvm/ExecutionEngine/Orc/RPCSerialization.h
+++ b/include/llvm/ExecutionEngine/Orc/RPCSerialization.h
@@ -1,9 +1,8 @@
//===- llvm/ExecutionEngine/Orc/RPCSerialization.h --------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -128,123 +127,85 @@ template <typename T>
class RPCTypeName<Expected<T>> {
public:
static const char* getName() {
- std::lock_guard<std::mutex> Lock(NameMutex);
- if (Name.empty())
+ static std::string Name = [] {
+ std::string Name;
raw_string_ostream(Name) << "Expected<"
<< RPCTypeNameSequence<T>()
<< ">";
+ return Name;
+ }();
return Name.data();
}
-
-private:
- static std::mutex NameMutex;
- static std::string Name;
};
-template <typename T>
-std::mutex RPCTypeName<Expected<T>>::NameMutex;
-
-template <typename T>
-std::string RPCTypeName<Expected<T>>::Name;
-
template <typename T1, typename T2>
class RPCTypeName<std::pair<T1, T2>> {
public:
static const char* getName() {
- std::lock_guard<std::mutex> Lock(NameMutex);
- if (Name.empty())
+ static std::string Name = [] {
+ std::string Name;
raw_string_ostream(Name) << "std::pair<" << RPCTypeNameSequence<T1, T2>()
<< ">";
+ return Name;
+ }();
return Name.data();
}
-private:
- static std::mutex NameMutex;
- static std::string Name;
};
-template <typename T1, typename T2>
-std::mutex RPCTypeName<std::pair<T1, T2>>::NameMutex;
-template <typename T1, typename T2>
-std::string RPCTypeName<std::pair<T1, T2>>::Name;
-
template <typename... ArgTs>
class RPCTypeName<std::tuple<ArgTs...>> {
public:
static const char* getName() {
- std::lock_guard<std::mutex> Lock(NameMutex);
- if (Name.empty())
+ static std::string Name = [] {
+ std::string Name;
raw_string_ostream(Name) << "std::tuple<"
<< RPCTypeNameSequence<ArgTs...>() << ">";
+ return Name;
+ }();
return Name.data();
}
-private:
- static std::mutex NameMutex;
- static std::string Name;
};
-template <typename... ArgTs>
-std::mutex RPCTypeName<std::tuple<ArgTs...>>::NameMutex;
-template <typename... ArgTs>
-std::string RPCTypeName<std::tuple<ArgTs...>>::Name;
-
template <typename T>
class RPCTypeName<std::vector<T>> {
public:
static const char*getName() {
- std::lock_guard<std::mutex> Lock(NameMutex);
- if (Name.empty())
+ static std::string Name = [] {
+ std::string Name;
raw_string_ostream(Name) << "std::vector<" << RPCTypeName<T>::getName()
<< ">";
+ return Name;
+ }();
return Name.data();
}
-
-private:
- static std::mutex NameMutex;
- static std::string Name;
};
-template <typename T>
-std::mutex RPCTypeName<std::vector<T>>::NameMutex;
-template <typename T>
-std::string RPCTypeName<std::vector<T>>::Name;
-
template <typename T> class RPCTypeName<std::set<T>> {
public:
static const char *getName() {
- std::lock_guard<std::mutex> Lock(NameMutex);
- if (Name.empty())
+ static std::string Name = [] {
+ std::string Name;
raw_string_ostream(Name)
<< "std::set<" << RPCTypeName<T>::getName() << ">";
+ return Name;
+ }();
return Name.data();
}
-
-private:
- static std::mutex NameMutex;
- static std::string Name;
};
-template <typename T> std::mutex RPCTypeName<std::set<T>>::NameMutex;
-template <typename T> std::string RPCTypeName<std::set<T>>::Name;
-
template <typename K, typename V> class RPCTypeName<std::map<K, V>> {
public:
static const char *getName() {
- std::lock_guard<std::mutex> Lock(NameMutex);
- if (Name.empty())
+ static std::string Name = [] {
+ std::string Name;
raw_string_ostream(Name)
<< "std::map<" << RPCTypeNameSequence<K, V>() << ">";
+ return Name;
+ }();
return Name.data();
}
-
-private:
- static std::mutex NameMutex;
- static std::string Name;
};
-template <typename K, typename V>
-std::mutex RPCTypeName<std::map<K, V>>::NameMutex;
-template <typename K, typename V> std::string RPCTypeName<std::map<K, V>>::Name;
-
/// The SerializationTraits<ChannelT, T> class describes how to serialize and
/// deserialize an instance of type T to/from an abstract channel of type
/// ChannelT. It also provides a representation of the type's name via the
diff --git a/include/llvm/ExecutionEngine/Orc/RPCUtils.h b/include/llvm/ExecutionEngine/Orc/RPCUtils.h
index 953b73e10e43..3b11e1b283de 100644
--- a/include/llvm/ExecutionEngine/Orc/RPCUtils.h
+++ b/include/llvm/ExecutionEngine/Orc/RPCUtils.h
@@ -1,9 +1,8 @@
-//===------- RPCUTils.h - Utilities for building RPC APIs -------*- C++ -*-===//
+//===- RPCUtils.h - Utilities for building RPC APIs -------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -152,25 +151,17 @@ public:
/// Returns the full function prototype as a string.
static const char *getPrototype() {
- std::lock_guard<std::mutex> Lock(NameMutex);
- if (Name.empty())
+ static std::string Name = [] {
+ std::string Name;
raw_string_ostream(Name)
<< RPCTypeName<RetT>::getName() << " " << DerivedFunc::getName()
<< "(" << llvm::orc::rpc::RPCTypeNameSequence<ArgTs...>() << ")";
+ return Name;
+ }();
return Name.data();
}
-
-private:
- static std::mutex NameMutex;
- static std::string Name;
};
-template <typename DerivedFunc, typename RetT, typename... ArgTs>
-std::mutex Function<DerivedFunc, RetT(ArgTs...)>::NameMutex;
-
-template <typename DerivedFunc, typename RetT, typename... ArgTs>
-std::string Function<DerivedFunc, RetT(ArgTs...)>::Name;
-
/// Allocates RPC function ids during autonegotiation.
/// Specializations of this class must provide four members:
///
diff --git a/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h b/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h
index 6f90f0380d95..d9535ce5f21f 100644
--- a/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h
+++ b/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h
@@ -1,9 +1,8 @@
//===- RTDyldObjectLinkingLayer.h - RTDyld-based jit linking ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -44,22 +43,34 @@ public:
const RuntimeDyld::LoadedObjectInfo &)>;
/// Functor for receiving finalization notifications.
- using NotifyEmittedFunction = std::function<void(VModuleKey)>;
+ using NotifyEmittedFunction =
+ std::function<void(VModuleKey, std::unique_ptr<MemoryBuffer>)>;
using GetMemoryManagerFunction =
std::function<std::unique_ptr<RuntimeDyld::MemoryManager>()>;
/// Construct an ObjectLinkingLayer with the given NotifyLoaded,
/// and NotifyEmitted functors.
- RTDyldObjectLinkingLayer(
- ExecutionSession &ES, GetMemoryManagerFunction GetMemoryManager,
- NotifyLoadedFunction NotifyLoaded = NotifyLoadedFunction(),
- NotifyEmittedFunction NotifyEmitted = NotifyEmittedFunction());
+ RTDyldObjectLinkingLayer(ExecutionSession &ES,
+ GetMemoryManagerFunction GetMemoryManager);
/// Emit the object.
void emit(MaterializationResponsibility R,
std::unique_ptr<MemoryBuffer> O) override;
+ /// Set the NotifyLoaded callback.
+ RTDyldObjectLinkingLayer &setNotifyLoaded(NotifyLoadedFunction NotifyLoaded) {
+ this->NotifyLoaded = std::move(NotifyLoaded);
+ return *this;
+ }
+
+ /// Set the NotifyEmitted callback.
+ RTDyldObjectLinkingLayer &
+ setNotifyEmitted(NotifyEmittedFunction NotifyEmitted) {
+ this->NotifyEmitted = std::move(NotifyEmitted);
+ return *this;
+ }
+
/// Set the 'ProcessAllSections' flag.
///
/// If set to true, all sections in each object file will be allocated using
@@ -109,7 +120,8 @@ private:
std::map<StringRef, JITEvaluatedSymbol> Resolved,
std::set<StringRef> &InternalSymbols);
- void onObjEmit(VModuleKey K, MaterializationResponsibility &R, Error Err);
+ void onObjEmit(VModuleKey K, std::unique_ptr<MemoryBuffer> ObjBuffer,
+ MaterializationResponsibility &R, Error Err);
mutable std::mutex RTDyldLayerMutex;
GetMemoryManagerFunction GetMemoryManager;
@@ -341,17 +353,27 @@ public:
/// Construct an ObjectLinkingLayer with the given NotifyLoaded,
/// and NotifyFinalized functors.
+ LLVM_ATTRIBUTE_DEPRECATED(
+ LegacyRTDyldObjectLinkingLayer(
+ ExecutionSession &ES, ResourcesGetter GetResources,
+ NotifyLoadedFtor NotifyLoaded = NotifyLoadedFtor(),
+ NotifyFinalizedFtor NotifyFinalized = NotifyFinalizedFtor(),
+ NotifyFreedFtor NotifyFreed = NotifyFreedFtor()),
+ "ORCv1 layers (layers with the 'Legacy' prefix) are deprecated. Please "
+ "use "
+ "ORCv2 (see docs/ORCv2.rst)");
+
+ // Legacy layer constructor with deprecation acknowledgement.
LegacyRTDyldObjectLinkingLayer(
- ExecutionSession &ES, ResourcesGetter GetResources,
+ ORCv1DeprecationAcknowledgement, ExecutionSession &ES,
+ ResourcesGetter GetResources,
NotifyLoadedFtor NotifyLoaded = NotifyLoadedFtor(),
NotifyFinalizedFtor NotifyFinalized = NotifyFinalizedFtor(),
NotifyFreedFtor NotifyFreed = NotifyFreedFtor())
: ES(ES), GetResources(std::move(GetResources)),
NotifyLoaded(std::move(NotifyLoaded)),
NotifyFinalized(std::move(NotifyFinalized)),
- NotifyFreed(std::move(NotifyFreed)),
- ProcessAllSections(false) {
- }
+ NotifyFreed(std::move(NotifyFreed)), ProcessAllSections(false) {}
/// Set the 'ProcessAllSections' flag.
///
diff --git a/include/llvm/ExecutionEngine/Orc/RawByteChannel.h b/include/llvm/ExecutionEngine/Orc/RawByteChannel.h
index db810f4ef2e5..46b7c59450e6 100644
--- a/include/llvm/ExecutionEngine/Orc/RawByteChannel.h
+++ b/include/llvm/ExecutionEngine/Orc/RawByteChannel.h
@@ -1,9 +1,8 @@
//===- llvm/ExecutionEngine/Orc/RawByteChannel.h ----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/ExecutionEngine/Orc/RemoteObjectLayer.h b/include/llvm/ExecutionEngine/Orc/RemoteObjectLayer.h
index 955e77607a18..b87cf697a81e 100644
--- a/include/llvm/ExecutionEngine/Orc/RemoteObjectLayer.h
+++ b/include/llvm/ExecutionEngine/Orc/RemoteObjectLayer.h
@@ -1,9 +1,8 @@
//===------ RemoteObjectLayer.h - Forwards objs to a remote -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -14,9 +13,10 @@
#ifndef LLVM_EXECUTIONENGINE_ORC_REMOTEOBJECTLAYER_H
#define LLVM_EXECUTIONENGINE_ORC_REMOTEOBJECTLAYER_H
+#include "llvm/ExecutionEngine/Orc/Core.h"
+#include "llvm/ExecutionEngine/Orc/LambdaResolver.h"
#include "llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h"
#include "llvm/Object/ObjectFile.h"
-#include "llvm/ExecutionEngine/Orc/LambdaResolver.h"
#include <map>
namespace llvm {
@@ -313,7 +313,14 @@ public:
///
/// The ReportError functor can be used locally log errors that are intended
/// to be sent sent
- RemoteObjectClientLayer(RPCEndpoint &Remote,
+ LLVM_ATTRIBUTE_DEPRECATED(
+ RemoteObjectClientLayer(RPCEndpoint &Remote,
+ std::function<void(Error)> ReportError),
+ "ORCv1 layers (including RemoteObjectClientLayer) are deprecated. Please "
+ "use "
+ "ORCv2 (see docs/ORCv2.rst)");
+
+ RemoteObjectClientLayer(ORCv1DeprecationAcknowledgement, RPCEndpoint &Remote,
std::function<void(Error)> ReportError)
: RemoteObjectLayer<RPCEndpoint>(Remote, std::move(ReportError)) {
using ThisT = RemoteObjectClientLayer<RPCEndpoint>;
@@ -418,11 +425,18 @@ public:
/// Create a RemoteObjectServerLayer with the given base layer (which must be
/// an object layer), RPC endpoint, and error reporter function.
- RemoteObjectServerLayer(BaseLayerT &BaseLayer,
- RPCEndpoint &Remote,
+ LLVM_ATTRIBUTE_DEPRECATED(
+ RemoteObjectServerLayer(BaseLayerT &BaseLayer, RPCEndpoint &Remote,
+ std::function<void(Error)> ReportError),
+ "ORCv1 layers (including RemoteObjectServerLayer) are deprecated. Please "
+ "use "
+ "ORCv2 (see docs/ORCv2.rst)");
+
+ RemoteObjectServerLayer(ORCv1DeprecationAcknowledgement,
+ BaseLayerT &BaseLayer, RPCEndpoint &Remote,
std::function<void(Error)> ReportError)
- : RemoteObjectLayer<RPCEndpoint>(Remote, std::move(ReportError)),
- BaseLayer(BaseLayer), HandleIdMgr(1) {
+ : RemoteObjectLayer<RPCEndpoint>(Remote, std::move(ReportError)),
+ BaseLayer(BaseLayer), HandleIdMgr(1) {
using ThisT = RemoteObjectServerLayer<BaseLayerT, RPCEndpoint>;
Remote.template addHandler<AddObject>(*this, &ThisT::addObject);
@@ -463,6 +477,7 @@ private:
assert(!BaseLayerHandles.count(Id) && "Id already in use?");
auto Resolver = createLambdaResolver(
+ AcknowledgeORCv1Deprecation,
[this, Id](const std::string &Name) { return lookup(Id, Name); },
[this, Id](const std::string &Name) {
return lookupInLogicalDylib(Id, Name);
@@ -523,6 +538,31 @@ private:
std::map<ObjHandleT, typename BaseLayerT::ObjHandleT> BaseLayerHandles;
};
+template <typename RPCEndpoint>
+RemoteObjectClientLayer<RPCEndpoint>::RemoteObjectClientLayer(
+ RPCEndpoint &Remote, std::function<void(Error)> ReportError)
+ : RemoteObjectLayer<RPCEndpoint>(Remote, std::move(ReportError)) {
+ using ThisT = RemoteObjectClientLayer<RPCEndpoint>;
+ Remote.template addHandler<Lookup>(*this, &ThisT::lookup);
+ Remote.template addHandler<LookupInLogicalDylib>(
+ *this, &ThisT::lookupInLogicalDylib);
+}
+
+template <typename BaseLayerT, typename RPCEndpoint>
+RemoteObjectServerLayer<BaseLayerT, RPCEndpoint>::RemoteObjectServerLayer(
+ BaseLayerT &BaseLayer, RPCEndpoint &Remote,
+ std::function<void(Error)> ReportError)
+ : RemoteObjectLayer<RPCEndpoint>(Remote, std::move(ReportError)),
+ BaseLayer(BaseLayer), HandleIdMgr(1) {
+ using ThisT = RemoteObjectServerLayer<BaseLayerT, RPCEndpoint>;
+
+ Remote.template addHandler<AddObject>(*this, &ThisT::addObject);
+ Remote.template addHandler<RemoveObject>(*this, &ThisT::removeObject);
+ Remote.template addHandler<FindSymbol>(*this, &ThisT::findSymbol);
+ Remote.template addHandler<FindSymbolIn>(*this, &ThisT::findSymbolIn);
+ Remote.template addHandler<EmitAndFinalize>(*this, &ThisT::emitAndFinalize);
+}
+
} // end namespace orc
} // end namespace llvm
diff --git a/include/llvm/ExecutionEngine/Orc/SymbolStringPool.h b/include/llvm/ExecutionEngine/Orc/SymbolStringPool.h
index 717076e25609..c354f6c3559c 100644
--- a/include/llvm/ExecutionEngine/Orc/SymbolStringPool.h
+++ b/include/llvm/ExecutionEngine/Orc/SymbolStringPool.h
@@ -1,9 +1,8 @@
//===- SymbolStringPool.h - Multi-threaded pool for JIT symbols -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -51,25 +50,20 @@ private:
class SymbolStringPtr {
friend class SymbolStringPool;
friend struct DenseMapInfo<SymbolStringPtr>;
- friend bool operator==(const SymbolStringPtr &LHS,
- const SymbolStringPtr &RHS);
- friend bool operator<(const SymbolStringPtr &LHS, const SymbolStringPtr &RHS);
-
- static SymbolStringPool::PoolMapEntry Tombstone;
public:
SymbolStringPtr() = default;
SymbolStringPtr(const SymbolStringPtr &Other)
: S(Other.S) {
- if (S)
+ if (isRealPoolEntry(S))
++S->getValue();
}
SymbolStringPtr& operator=(const SymbolStringPtr &Other) {
- if (S)
+ if (isRealPoolEntry(S))
--S->getValue();
S = Other.S;
- if (S)
+ if (isRealPoolEntry(S))
++S->getValue();
return *this;
}
@@ -79,7 +73,7 @@ public:
}
SymbolStringPtr& operator=(SymbolStringPtr &&Other) {
- if (S)
+ if (isRealPoolEntry(S))
--S->getValue();
S = nullptr;
std::swap(S, Other.S);
@@ -87,34 +81,64 @@ public:
}
~SymbolStringPtr() {
- if (S)
+ if (isRealPoolEntry(S))
--S->getValue();
}
StringRef operator*() const { return S->first(); }
+ friend bool operator==(const SymbolStringPtr &LHS,
+ const SymbolStringPtr &RHS) {
+ return LHS.S == RHS.S;
+ }
+
+ friend bool operator!=(const SymbolStringPtr &LHS,
+ const SymbolStringPtr &RHS) {
+ return !(LHS == RHS);
+ }
+
+ friend bool operator<(const SymbolStringPtr &LHS,
+ const SymbolStringPtr &RHS) {
+ return LHS.S < RHS.S;
+ }
+
private:
+ using PoolEntryPtr = SymbolStringPool::PoolMapEntry *;
SymbolStringPtr(SymbolStringPool::PoolMapEntry *S)
: S(S) {
- if (S)
+ if (isRealPoolEntry(S))
++S->getValue();
}
- SymbolStringPool::PoolMapEntry *S = nullptr;
-};
+ // Returns false for null, empty, and tombstone values, true otherwise.
+ bool isRealPoolEntry(PoolEntryPtr P) {
+ return ((reinterpret_cast<uintptr_t>(P) - 1) & InvalidPtrMask) !=
+ InvalidPtrMask;
+ }
-inline bool operator==(const SymbolStringPtr &LHS, const SymbolStringPtr &RHS) {
- return LHS.S == RHS.S;
-}
+ static SymbolStringPtr getEmptyVal() {
+ return SymbolStringPtr(reinterpret_cast<PoolEntryPtr>(EmptyBitPattern));
+ }
-inline bool operator!=(const SymbolStringPtr &LHS, const SymbolStringPtr &RHS) {
- return !(LHS == RHS);
-}
+ static SymbolStringPtr getTombstoneVal() {
+ return SymbolStringPtr(reinterpret_cast<PoolEntryPtr>(TombstoneBitPattern));
+ }
-inline bool operator<(const SymbolStringPtr &LHS, const SymbolStringPtr &RHS) {
- return LHS.S < RHS.S;
-}
+ constexpr static uintptr_t EmptyBitPattern =
+ std::numeric_limits<uintptr_t>::max()
+ << PointerLikeTypeTraits<PoolEntryPtr>::NumLowBitsAvailable;
+
+ constexpr static uintptr_t TombstoneBitPattern =
+ (std::numeric_limits<uintptr_t>::max() - 1)
+ << PointerLikeTypeTraits<PoolEntryPtr>::NumLowBitsAvailable;
+
+ constexpr static uintptr_t InvalidPtrMask =
+ (std::numeric_limits<uintptr_t>::max() - 3)
+ << PointerLikeTypeTraits<PoolEntryPtr>::NumLowBitsAvailable;
+
+ PoolEntryPtr S = nullptr;
+};
inline SymbolStringPool::~SymbolStringPool() {
#ifndef NDEBUG
@@ -151,16 +175,15 @@ template <>
struct DenseMapInfo<orc::SymbolStringPtr> {
static orc::SymbolStringPtr getEmptyKey() {
- return orc::SymbolStringPtr();
+ return orc::SymbolStringPtr::getEmptyVal();
}
static orc::SymbolStringPtr getTombstoneKey() {
- return orc::SymbolStringPtr(&orc::SymbolStringPtr::Tombstone);
+ return orc::SymbolStringPtr::getTombstoneVal();
}
- static unsigned getHashValue(orc::SymbolStringPtr V) {
- uintptr_t IV = reinterpret_cast<uintptr_t>(V.S);
- return unsigned(IV) ^ unsigned(IV >> 9);
+ static unsigned getHashValue(const orc::SymbolStringPtr &V) {
+ return DenseMapInfo<orc::SymbolStringPtr::PoolEntryPtr>::getHashValue(V.S);
}
static bool isEqual(const orc::SymbolStringPtr &LHS,
diff --git a/include/llvm/ExecutionEngine/Orc/ThreadSafeModule.h b/include/llvm/ExecutionEngine/Orc/ThreadSafeModule.h
index bf946de532d3..5787500387c4 100644
--- a/include/llvm/ExecutionEngine/Orc/ThreadSafeModule.h
+++ b/include/llvm/ExecutionEngine/Orc/ThreadSafeModule.h
@@ -1,9 +1,8 @@
//===----------- ThreadSafeModule.h -- Layer interfaces ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/ExecutionEngine/OrcMCJITReplacement.h b/include/llvm/ExecutionEngine/OrcMCJITReplacement.h
index 4cd5648b2fc2..6cca1933f39f 100644
--- a/include/llvm/ExecutionEngine/OrcMCJITReplacement.h
+++ b/include/llvm/ExecutionEngine/OrcMCJITReplacement.h
@@ -1,9 +1,8 @@
//===---- OrcMCJITReplacement.h - Orc-based MCJIT replacement ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/ExecutionEngine/OrcV1Deprecation.h b/include/llvm/ExecutionEngine/OrcV1Deprecation.h
new file mode 100644
index 000000000000..7ed254b3ee04
--- /dev/null
+++ b/include/llvm/ExecutionEngine/OrcV1Deprecation.h
@@ -0,0 +1,22 @@
+//===------ OrcV1Deprecation.h - Memory manager for MC-JIT ------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Tag for suppressing ORCv1 deprecation warnings.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORCV1DEPRECATION_H
+#define LLVM_EXECUTIONENGINE_ORCV1DEPRECATION_H
+
+namespace llvm {
+
+enum ORCv1DeprecationAcknowledgement { AcknowledgeORCv1Deprecation };
+
+} // namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORCV1DEPRECATION_H
diff --git a/include/llvm/ExecutionEngine/RTDyldMemoryManager.h b/include/llvm/ExecutionEngine/RTDyldMemoryManager.h
index 23d651f6d1b6..c7c87ecdfa09 100644
--- a/include/llvm/ExecutionEngine/RTDyldMemoryManager.h
+++ b/include/llvm/ExecutionEngine/RTDyldMemoryManager.h
@@ -1,9 +1,8 @@
//===-- RTDyldMemoryManager.cpp - Memory manager for MC-JIT -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/ExecutionEngine/RuntimeDyld.h b/include/llvm/ExecutionEngine/RuntimeDyld.h
index e419ee05e566..b2b4eba47074 100644
--- a/include/llvm/ExecutionEngine/RuntimeDyld.h
+++ b/include/llvm/ExecutionEngine/RuntimeDyld.h
@@ -1,9 +1,8 @@
//===- RuntimeDyld.h - Run-time dynamic linker for MC-JIT -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -53,18 +52,19 @@ private:
std::string ErrMsg;
};
-class RuntimeDyldCheckerImpl;
class RuntimeDyldImpl;
class RuntimeDyld {
- friend class RuntimeDyldCheckerImpl;
-
protected:
// Change the address associated with a section when resolving relocations.
// Any relocations already associated with the symbol will be re-resolved.
void reassignSectionAddress(unsigned SectionID, uint64_t Addr);
public:
+ using NotifyStubEmittedFunction = std::function<void(
+ StringRef FileName, StringRef SectionName, StringRef SymbolName,
+ unsigned SectionID, uint32_t StubOffset)>;
+
/// Information about the loaded object.
class LoadedObjectInfo : public llvm::LoadedObjectInfo {
friend class RuntimeDyldImpl;
@@ -185,6 +185,9 @@ public:
/// and resolve relocatons based on where they put it).
void *getSymbolLocalAddress(StringRef Name) const;
+ /// Get the section ID for the section containing the given symbol.
+ unsigned getSymbolSectionID(StringRef Name) const;
+
/// Get the target address and flags for the named symbol.
/// This address is the one used for relocation.
JITEvaluatedSymbol getSymbol(StringRef Name) const;
@@ -205,6 +208,19 @@ public:
/// This is the address which will be used for relocation resolution.
void mapSectionAddress(const void *LocalAddress, uint64_t TargetAddress);
+ /// Returns the section's working memory.
+ StringRef getSectionContent(unsigned SectionID) const;
+
+ /// If the section was loaded, return the section's load address,
+ /// otherwise return None.
+ uint64_t getSectionLoadAddress(unsigned SectionID) const;
+
+ /// Set the NotifyStubEmitted callback. This is used for debugging
+ /// purposes. A callback is made for each stub that is generated.
+ void setNotifyStubEmitted(NotifyStubEmittedFunction NotifyStubEmitted) {
+ this->NotifyStubEmitted = std::move(NotifyStubEmitted);
+ }
+
/// Register any EH frame sections that have been loaded but not previously
/// registered with the memory manager. Note, RuntimeDyld is responsible
/// for identifying the EH frame and calling the memory manager with the
@@ -266,7 +282,7 @@ private:
MemoryManager &MemMgr;
JITSymbolResolver &Resolver;
bool ProcessAllSections;
- RuntimeDyldCheckerImpl *Checker;
+ NotifyStubEmittedFunction NotifyStubEmitted;
};
// Asynchronous JIT link for ORC.
diff --git a/include/llvm/ExecutionEngine/RuntimeDyldChecker.h b/include/llvm/ExecutionEngine/RuntimeDyldChecker.h
index 13fc5fd5a3e7..93ea09107bd1 100644
--- a/include/llvm/ExecutionEngine/RuntimeDyldChecker.h
+++ b/include/llvm/ExecutionEngine/RuntimeDyldChecker.h
@@ -1,16 +1,18 @@
//===---- RuntimeDyldChecker.h - RuntimeDyld tester framework -----*- C++ -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_EXECUTIONENGINE_RUNTIMEDYLDCHECKER_H
#define LLVM_EXECUTIONENGINE_RUNTIMEDYLDCHECKER_H
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Optional.h"
+#include "llvm/ExecutionEngine/JITSymbol.h"
+#include "llvm/Support/Endian.h"
#include <cstdint>
#include <memory>
@@ -58,7 +60,8 @@ class raw_ostream;
///
/// ident_expr = 'decode_operand' '(' symbol ',' operand-index ')'
/// | 'next_pc' '(' symbol ')'
-/// | 'stub_addr' '(' file-name ',' section-name ',' symbol ')'
+/// | 'stub_addr' '(' stub-container-name ',' symbol ')'
+/// | 'got_addr' '(' stub-container-name ',' symbol ')'
/// | symbol
///
/// binary_expr = expr '+' expr
@@ -70,15 +73,84 @@ class raw_ostream;
///
class RuntimeDyldChecker {
public:
- RuntimeDyldChecker(RuntimeDyld &RTDyld, MCDisassembler *Disassembler,
- MCInstPrinter *InstPrinter, raw_ostream &ErrStream);
- ~RuntimeDyldChecker();
+ class MemoryRegionInfo {
+ public:
+ MemoryRegionInfo() = default;
+
+ /// Constructor for symbols/sections with content.
+ MemoryRegionInfo(StringRef Content, JITTargetAddress TargetAddress)
+ : ContentPtr(Content.data()), Size(Content.size()),
+ TargetAddress(TargetAddress) {}
+
+ /// Constructor for zero-fill symbols/sections.
+ MemoryRegionInfo(uint64_t Size, JITTargetAddress TargetAddress)
+ : Size(Size), TargetAddress(TargetAddress) {}
+
+ /// Returns true if this is a zero-fill symbol/section.
+ bool isZeroFill() const {
+ assert(Size && "setContent/setZeroFill must be called first");
+ return !ContentPtr;
+ }
+
+ /// Set the content for this memory region.
+ void setContent(StringRef Content) {
+ assert(!ContentPtr && !Size && "Content/zero-fill already set");
+ ContentPtr = Content.data();
+ Size = Content.size();
+ }
+
+ /// Set a zero-fill length for this memory region.
+ void setZeroFill(uint64_t Size) {
+ assert(!ContentPtr && !this->Size && "Content/zero-fill already set");
+ this->Size = Size;
+ }
- // Get the associated RTDyld instance.
- RuntimeDyld& getRTDyld();
+ /// Returns the content for this section if there is any.
+ StringRef getContent() const {
+ assert(!isZeroFill() && "Can't get content for a zero-fill section");
+ return StringRef(ContentPtr, static_cast<size_t>(Size));
+ }
- // Get the associated RTDyld instance.
- const RuntimeDyld& getRTDyld() const;
+ /// Returns the zero-fill length for this section.
+ uint64_t getZeroFillLength() const {
+ assert(isZeroFill() && "Can't get zero-fill length for content section");
+ return Size;
+ }
+
+ /// Set the target address for this region.
+ void setTargetAddress(JITTargetAddress TargetAddress) {
+ assert(!this->TargetAddress && "TargetAddress already set");
+ this->TargetAddress = TargetAddress;
+ }
+
+ /// Return the target address for this region.
+ JITTargetAddress getTargetAddress() const { return TargetAddress; }
+
+ private:
+ const char *ContentPtr = 0;
+ uint64_t Size = 0;
+ JITTargetAddress TargetAddress = 0;
+ };
+
+ using IsSymbolValidFunction = std::function<bool(StringRef Symbol)>;
+ using GetSymbolInfoFunction =
+ std::function<Expected<MemoryRegionInfo>(StringRef SymbolName)>;
+ using GetSectionInfoFunction = std::function<Expected<MemoryRegionInfo>(
+ StringRef FileName, StringRef SectionName)>;
+ using GetStubInfoFunction = std::function<Expected<MemoryRegionInfo>(
+ StringRef StubContainer, StringRef TargetName)>;
+ using GetGOTInfoFunction = std::function<Expected<MemoryRegionInfo>(
+ StringRef GOTContainer, StringRef TargetName)>;
+
+ RuntimeDyldChecker(IsSymbolValidFunction IsSymbolValid,
+ GetSymbolInfoFunction GetSymbolInfo,
+ GetSectionInfoFunction GetSectionInfo,
+ GetStubInfoFunction GetStubInfo,
+ GetGOTInfoFunction GetGOTInfo,
+ support::endianness Endianness,
+ MCDisassembler *Disassembler, MCInstPrinter *InstPrinter,
+ raw_ostream &ErrStream);
+ ~RuntimeDyldChecker();
/// Check a single expression against the attached RuntimeDyld
/// instance.
@@ -100,7 +172,7 @@ public:
bool LocalAddress);
/// If there is a section at the given local address, return its load
- /// address, otherwise return none.
+ /// address, otherwise return none.
Optional<uint64_t> getSectionLoadAddress(void *LocalAddress) const;
private:
diff --git a/include/llvm/ExecutionEngine/SectionMemoryManager.h b/include/llvm/ExecutionEngine/SectionMemoryManager.h
index 3cf131c27778..d7316425da2f 100644
--- a/include/llvm/ExecutionEngine/SectionMemoryManager.h
+++ b/include/llvm/ExecutionEngine/SectionMemoryManager.h
@@ -1,9 +1,8 @@
//===- SectionMemoryManager.h - Memory manager for MCJIT/RtDyld -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/FuzzMutate/FuzzerCLI.h b/include/llvm/FuzzMutate/FuzzerCLI.h
index 3333e96db166..2a16e43a6ab3 100644
--- a/include/llvm/FuzzMutate/FuzzerCLI.h
+++ b/include/llvm/FuzzMutate/FuzzerCLI.h
@@ -1,9 +1,8 @@
//===-- FuzzerCLI.h - Common logic for CLIs of fuzzers ----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/FuzzMutate/IRMutator.h b/include/llvm/FuzzMutate/IRMutator.h
index 9aa9d6d6a4bc..40a1ce8aeec9 100644
--- a/include/llvm/FuzzMutate/IRMutator.h
+++ b/include/llvm/FuzzMutate/IRMutator.h
@@ -1,9 +1,8 @@
//===-- IRMutator.h - Mutation engine for fuzzing IR ------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/FuzzMutate/OpDescriptor.h b/include/llvm/FuzzMutate/OpDescriptor.h
index dd30fda99bea..d6c98cd949a2 100644
--- a/include/llvm/FuzzMutate/OpDescriptor.h
+++ b/include/llvm/FuzzMutate/OpDescriptor.h
@@ -1,9 +1,8 @@
//===-- OpDescriptor.h ------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/FuzzMutate/Operations.h b/include/llvm/FuzzMutate/Operations.h
index 668bd952ebb2..2eb4c38c2aeb 100644
--- a/include/llvm/FuzzMutate/Operations.h
+++ b/include/llvm/FuzzMutate/Operations.h
@@ -1,9 +1,8 @@
//===-- Operations.h - ----------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/FuzzMutate/Random.h b/include/llvm/FuzzMutate/Random.h
index 3a5f46a07554..615b15f04ceb 100644
--- a/include/llvm/FuzzMutate/Random.h
+++ b/include/llvm/FuzzMutate/Random.h
@@ -1,9 +1,8 @@
//===--- Random.h - Utilities for random sampling -------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/FuzzMutate/RandomIRBuilder.h b/include/llvm/FuzzMutate/RandomIRBuilder.h
index 5cf3f0b22709..f3b609702e9d 100644
--- a/include/llvm/FuzzMutate/RandomIRBuilder.h
+++ b/include/llvm/FuzzMutate/RandomIRBuilder.h
@@ -1,9 +1,8 @@
-//===-- Mutator.h - Utils for randomly mutation IR --------------*- C++ -*-===//
+//===- RandomIRBuilder.h - Utils for randomly mutation IR -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/IR/Argument.h b/include/llvm/IR/Argument.h
index 497dca44547c..5f514b9c47d2 100644
--- a/include/llvm/IR/Argument.h
+++ b/include/llvm/IR/Argument.h
@@ -1,9 +1,8 @@
//===-- llvm/Argument.h - Definition of the Argument class ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -79,6 +78,9 @@ public:
/// If this is a byval or inalloca argument, return its alignment.
unsigned getParamAlignment() const;
+ /// If this is a byval argument, return its type.
+ Type *getParamByValType() const;
+
/// Return true if this argument has the nest attribute.
bool hasNestAttr() const;
@@ -91,6 +93,9 @@ public:
/// Return true if this argument has the sret attribute.
bool hasStructRetAttr() const;
+ /// Return true if this argument has the inreg attribute.
+ bool hasInRegAttr() const;
+
/// Return true if this argument has the returned attribute.
bool hasReturnedAttr() const;
@@ -119,6 +124,8 @@ public:
/// Check if an argument has a given attribute.
bool hasAttribute(Attribute::AttrKind Kind) const;
+ Attribute getAttribute(Attribute::AttrKind Kind) const;
+
/// Method for support type inquiry through isa, cast, and dyn_cast.
static bool classof(const Value *V) {
return V->getValueID() == ArgumentVal;
diff --git a/include/llvm/IR/AssemblyAnnotationWriter.h b/include/llvm/IR/AssemblyAnnotationWriter.h
index 6e1f5c43e12e..3fd3c57a6796 100644
--- a/include/llvm/IR/AssemblyAnnotationWriter.h
+++ b/include/llvm/IR/AssemblyAnnotationWriter.h
@@ -1,9 +1,8 @@
//===-- AssemblyAnnotationWriter.h - Annotation .ll files -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/IR/Attributes.h b/include/llvm/IR/Attributes.h
index 9fc4614af010..06cc09e1cfc7 100644
--- a/include/llvm/IR/Attributes.h
+++ b/include/llvm/IR/Attributes.h
@@ -1,9 +1,8 @@
//===- llvm/Attributes.h - Container for Attributes -------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -91,6 +90,7 @@ public:
static Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val = 0);
static Attribute get(LLVMContext &Context, StringRef Kind,
StringRef Val = StringRef());
+ static Attribute get(LLVMContext &Context, AttrKind Kind, Type *Ty);
/// Return a uniquified Attribute object that has the specific
/// alignment set.
@@ -103,6 +103,7 @@ public:
static Attribute getWithAllocSizeArgs(LLVMContext &Context,
unsigned ElemSizeArg,
const Optional<unsigned> &NumElemsArg);
+ static Attribute getWithByValType(LLVMContext &Context, Type *Ty);
//===--------------------------------------------------------------------===//
// Attribute Accessors
@@ -118,6 +119,9 @@ public:
/// attribute.
bool isStringAttribute() const;
+ /// Return true if the attribute is a type attribute.
+ bool isTypeAttribute() const;
+
/// Return true if the attribute is present.
bool hasAttribute(AttrKind Val) const;
@@ -140,6 +144,10 @@ public:
/// attribute to be a string attribute.
StringRef getValueAsString() const;
+ /// Return the attribute's value as a Type. This requires the attribute to be
+ /// a type attribute.
+ Type *getValueAsType() const;
+
/// Returns the alignment field of an attribute as a byte alignment
/// value.
unsigned getAlignment() const;
@@ -280,6 +288,7 @@ public:
unsigned getStackAlignment() const;
uint64_t getDereferenceableBytes() const;
uint64_t getDereferenceableOrNullBytes() const;
+ Type *getByValType() const;
std::pair<unsigned, Optional<unsigned>> getAllocSizeArgs() const;
std::string getAsString(bool InAttrGrp = false) const;
@@ -599,6 +608,9 @@ public:
/// Return the alignment for the specified function parameter.
unsigned getParamAlignment(unsigned ArgNo) const;
+ /// Return the byval type for the specified function parameter.
+ Type *getParamByValType(unsigned ArgNo) const;
+
/// Get the stack alignment.
unsigned getStackAlignment(unsigned Index) const;
@@ -698,6 +710,7 @@ class AttrBuilder {
uint64_t DerefBytes = 0;
uint64_t DerefOrNullBytes = 0;
uint64_t AllocSizeArgs = 0;
+ Type *ByValType = nullptr;
public:
AttrBuilder() = default;
@@ -773,6 +786,9 @@ public:
/// dereferenceable_or_null attribute exists (zero is returned otherwise).
uint64_t getDereferenceableOrNullBytes() const { return DerefOrNullBytes; }
+ /// Retrieve the byval type.
+ Type *getByValType() const { return ByValType; }
+
/// Retrieve the allocsize args, if the allocsize attribute exists. If it
/// doesn't exist, pair(0, 0) is returned.
std::pair<unsigned, Optional<unsigned>> getAllocSizeArgs() const;
@@ -797,6 +813,9 @@ public:
AttrBuilder &addAllocSizeAttr(unsigned ElemSizeArg,
const Optional<unsigned> &NumElemsArg);
+ /// This turns a byval type into the form used internally in Attribute.
+ AttrBuilder &addByValAttr(Type *Ty);
+
/// Add an allocsize attribute, using the representation returned by
/// Attribute.getIntValue().
AttrBuilder &addAllocSizeAttrFromRawRepr(uint64_t RawAllocSizeRepr);
diff --git a/include/llvm/IR/Attributes.td b/include/llvm/IR/Attributes.td
index e786d85d05a8..153046d2311c 100644
--- a/include/llvm/IR/Attributes.td
+++ b/include/llvm/IR/Attributes.td
@@ -85,6 +85,9 @@ def NoCapture : EnumAttr<"nocapture">;
/// Call cannot be duplicated.
def NoDuplicate : EnumAttr<"noduplicate">;
+/// Function does not deallocate memory.
+def NoFree : EnumAttr<"nofree">;
+
/// Disable implicit floating point insts.
def NoImplicitFloat : EnumAttr<"noimplicitfloat">;
@@ -106,6 +109,9 @@ def NoRedZone : EnumAttr<"noredzone">;
/// Mark the function as not returning.
def NoReturn : EnumAttr<"noreturn">;
+/// Function does not synchronize.
+def NoSync : EnumAttr<"nosync">;
+
/// Disable Indirect Branch Tracking.
def NoCfCheck : EnumAttr<"nocf_check">;
@@ -130,6 +136,9 @@ def ReadOnly : EnumAttr<"readonly">;
/// Return value is always equal to this argument.
def Returned : EnumAttr<"returned">;
+/// Parameter is required to be a trivial constant.
+def ImmArg : EnumAttr<"immarg">;
+
/// Function can return twice.
def ReturnsTwice : EnumAttr<"returns_twice">;
@@ -176,6 +185,9 @@ def SanitizeMemory : EnumAttr<"sanitize_memory">;
/// HWAddressSanitizer is on.
def SanitizeHWAddress : EnumAttr<"sanitize_hwaddress">;
+/// MemTagSanitizer is on.
+def SanitizeMemTag : EnumAttr<"sanitize_memtag">;
+
/// Speculative Load Hardening is enabled.
///
/// Note that this uses the default compatibility (always compatible during
@@ -193,6 +205,9 @@ def SwiftSelf : EnumAttr<"swiftself">;
/// Function must be in a unwind table.
def UWTable : EnumAttr<"uwtable">;
+/// Function always comes back to callsite.
+def WillReturn : EnumAttr<"willreturn">;
+
/// Function only writes to memory.
def WriteOnly : EnumAttr<"writeonly">;
@@ -221,6 +236,7 @@ def : CompatRule<"isEqual<SanitizeAddressAttr>">;
def : CompatRule<"isEqual<SanitizeThreadAttr>">;
def : CompatRule<"isEqual<SanitizeMemoryAttr>">;
def : CompatRule<"isEqual<SanitizeHWAddressAttr>">;
+def : CompatRule<"isEqual<SanitizeMemTagAttr>">;
def : CompatRule<"isEqual<SafeStackAttr>">;
def : CompatRule<"isEqual<ShadowCallStackAttr>">;
diff --git a/include/llvm/IR/AutoUpgrade.h b/include/llvm/IR/AutoUpgrade.h
index 8cf574c6a138..017ad93d8a2a 100644
--- a/include/llvm/IR/AutoUpgrade.h
+++ b/include/llvm/IR/AutoUpgrade.h
@@ -1,9 +1,8 @@
//===- AutoUpgrade.h - AutoUpgrade Helpers ----------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -47,9 +46,9 @@ namespace llvm {
/// so that it can update all calls to the old function.
void UpgradeCallsToIntrinsic(Function* F);
- /// This checks for global variables which should be upgraded. It returns true
- /// if it requires upgrading.
- bool UpgradeGlobalVariable(GlobalVariable *GV);
+ /// This checks for global variables which should be upgraded. It it requires
+ /// upgrading, returns a pointer to the upgraded variable.
+ GlobalVariable *UpgradeGlobalVariable(GlobalVariable *GV);
/// This checks for module flags which should be upgraded. It returns true if
/// module is modified.
diff --git a/include/llvm/IR/BasicBlock.h b/include/llvm/IR/BasicBlock.h
index 99eac33f742e..69555af50e1f 100644
--- a/include/llvm/IR/BasicBlock.h
+++ b/include/llvm/IR/BasicBlock.h
@@ -1,9 +1,8 @@
//===- llvm/BasicBlock.h - Represent a basic block in the VM ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -363,7 +362,7 @@ public:
/// This is actually not used to update the Predecessor list, but is actually
/// used to update the PHI nodes that reside in the block. Note that this
/// should be called while the predecessor still refers to this block.
- void removePredecessor(BasicBlock *Pred, bool DontDeleteUselessPHIs = false);
+ void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs = false);
bool canSplitPredecessors() const;
@@ -391,6 +390,14 @@ public:
/// direct branches, switches, etc. to it.
bool hasAddressTaken() const { return getSubclassDataFromValue() != 0; }
+ /// Update all phi nodes in this basic block to refer to basic block \p New
+ /// instead of basic block \p Old.
+ void replacePhiUsesWith(BasicBlock *Old, BasicBlock *New);
+
+ /// Update all phi nodes in this basic block's successors to refer to basic
+ /// block \p New instead of basic block \p Old.
+ void replaceSuccessorsPhiUsesWith(BasicBlock *Old, BasicBlock *New);
+
/// Update all phi nodes in this basic block's successors to refer to basic
/// block \p New instead of to it.
void replaceSuccessorsPhiUsesWith(BasicBlock *New);
diff --git a/include/llvm/IR/CFG.h b/include/llvm/IR/CFG.h
index 8385c4647e12..55aff7137e86 100644
--- a/include/llvm/IR/CFG.h
+++ b/include/llvm/IR/CFG.h
@@ -1,9 +1,8 @@
//===- CFG.h ----------------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -238,10 +237,6 @@ public:
}
};
-template <typename T, typename U> struct isPodLike<SuccIterator<T, U>> {
- static const bool value = isPodLike<T>::value;
-};
-
using succ_iterator = SuccIterator<Instruction, BasicBlock>;
using succ_const_iterator = SuccIterator<const Instruction, const BasicBlock>;
using succ_range = iterator_range<succ_iterator>;
diff --git a/include/llvm/IR/CFGDiff.h b/include/llvm/IR/CFGDiff.h
index da4373f7bce2..57b62dd66a47 100644
--- a/include/llvm/IR/CFGDiff.h
+++ b/include/llvm/IR/CFGDiff.h
@@ -1,9 +1,8 @@
//===- CFGDiff.h - Define a CFG snapshot. -----------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/IR/CallSite.h b/include/llvm/IR/CallSite.h
index a3e78049f4be..b47a96c5d5fa 100644
--- a/include/llvm/IR/CallSite.h
+++ b/include/llvm/IR/CallSite.h
@@ -1,15 +1,14 @@
//===- CallSite.h - Abstract Call & Invoke instrs ---------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file defines the CallSite class, which is a handy wrapper for code that
-// wants to treat Call and Invoke instructions in a generic way. When in non-
-// mutation context (e.g. an analysis) ImmutableCallSite should be used.
+// wants to treat Call, Invoke and CallBr instructions in a generic way. When
+// in non-mutation context (e.g. an analysis) ImmutableCallSite should be used.
// Finally, when some degree of customization is necessary between these two
// extremes, CallSiteBase<> can be supplied with fine-tuned parameters.
//
@@ -18,7 +17,7 @@
// They are efficiently copyable, assignable and constructable, with cost
// equivalent to copying a pointer (notice that they have only a single data
// member). The internal representation carries a flag which indicates which of
-// the two variants is enclosed. This allows for cheaper checks when various
+// the three variants is enclosed. This allows for cheaper checks when various
// accessors of CallSite are employed.
//
//===----------------------------------------------------------------------===//
@@ -49,45 +48,50 @@ namespace Intrinsic {
enum ID : unsigned;
}
-template <typename FunTy = const Function,
- typename BBTy = const BasicBlock,
- typename ValTy = const Value,
- typename UserTy = const User,
- typename UseTy = const Use,
- typename InstrTy = const Instruction,
+template <typename FunTy = const Function, typename BBTy = const BasicBlock,
+ typename ValTy = const Value, typename UserTy = const User,
+ typename UseTy = const Use, typename InstrTy = const Instruction,
typename CallTy = const CallInst,
typename InvokeTy = const InvokeInst,
+ typename CallBrTy = const CallBrInst,
typename IterTy = User::const_op_iterator>
class CallSiteBase {
protected:
- PointerIntPair<InstrTy*, 1, bool> I;
+ PointerIntPair<InstrTy *, 2, int> I;
CallSiteBase() = default;
- CallSiteBase(CallTy *CI) : I(CI, true) { assert(CI); }
- CallSiteBase(InvokeTy *II) : I(II, false) { assert(II); }
+ CallSiteBase(CallTy *CI) : I(CI, 1) { assert(CI); }
+ CallSiteBase(InvokeTy *II) : I(II, 0) { assert(II); }
+ CallSiteBase(CallBrTy *CBI) : I(CBI, 2) { assert(CBI); }
explicit CallSiteBase(ValTy *II) { *this = get(II); }
private:
/// This static method is like a constructor. It will create an appropriate
- /// call site for a Call or Invoke instruction, but it can also create a null
- /// initialized CallSiteBase object for something which is NOT a call site.
+ /// call site for a Call, Invoke or CallBr instruction, but it can also create
+ /// a null initialized CallSiteBase object for something which is NOT a call
+ /// site.
static CallSiteBase get(ValTy *V) {
if (InstrTy *II = dyn_cast<InstrTy>(V)) {
if (II->getOpcode() == Instruction::Call)
return CallSiteBase(static_cast<CallTy*>(II));
- else if (II->getOpcode() == Instruction::Invoke)
+ if (II->getOpcode() == Instruction::Invoke)
return CallSiteBase(static_cast<InvokeTy*>(II));
+ if (II->getOpcode() == Instruction::CallBr)
+ return CallSiteBase(static_cast<CallBrTy *>(II));
}
return CallSiteBase();
}
public:
- /// Return true if a CallInst is enclosed. Note that !isCall() does not mean
- /// an InvokeInst is enclosed. It may also signify a NULL instruction pointer.
- bool isCall() const { return I.getInt(); }
+ /// Return true if a CallInst is enclosed.
+ bool isCall() const { return I.getInt() == 1; }
+
+ /// Return true if a InvokeInst is enclosed. !I.getInt() may also signify a
+ /// NULL instruction pointer, so check that.
+ bool isInvoke() const { return getInstruction() && I.getInt() == 0; }
- /// Return true if a InvokeInst is enclosed.
- bool isInvoke() const { return getInstruction() && !I.getInt(); }
+ /// Return true if a CallBrInst is enclosed.
+ bool isCallBr() const { return I.getInt() == 2; }
InstrTy *getInstruction() const { return I.getPointer(); }
InstrTy *operator->() const { return I.getPointer(); }
@@ -98,7 +102,7 @@ public:
/// Return the pointer to function that is being called.
ValTy *getCalledValue() const {
- assert(getInstruction() && "Not a call or invoke instruction!");
+ assert(getInstruction() && "Not a call, invoke or callbr instruction!");
return *getCallee();
}
@@ -115,16 +119,19 @@ public:
return false;
if (isa<FunTy>(V) || isa<Constant>(V))
return false;
- if (const CallInst *CI = dyn_cast<CallInst>(getInstruction())) {
- if (CI->isInlineAsm())
+ if (const CallBase *CB = dyn_cast<CallBase>(getInstruction()))
+ if (CB->isInlineAsm())
return false;
- }
return true;
}
- /// Set the callee to the specified value.
+ /// Set the callee to the specified value. Unlike the function of the same
+ /// name on CallBase, does not modify the type!
void setCalledFunction(Value *V) {
- assert(getInstruction() && "Not a call or invoke instruction!");
+ assert(getInstruction() && "Not a call, callbr, or invoke instruction!");
+ assert(cast<PointerType>(V->getType())->getElementType() ==
+ cast<CallBase>(getInstruction())->getFunctionType() &&
+ "New callee type does not match FunctionType on call");
*getCallee() = V;
}
@@ -189,7 +196,7 @@ public:
}
void setArgument(unsigned ArgNo, Value* newVal) {
- assert(getInstruction() && "Not a call or invoke instruction!");
+ assert(getInstruction() && "Not a call, invoke or callbr instruction!");
assert(arg_begin() + ArgNo < arg_end() && "Argument # out of range!");
getInstruction()->setOperand(ArgNo, newVal);
}
@@ -203,7 +210,7 @@ public:
/// Given a use for an argument, get the argument number that corresponds to
/// it.
unsigned getArgumentNo(const Use *U) const {
- assert(getInstruction() && "Not a call or invoke instruction!");
+ assert(getInstruction() && "Not a call, invoke or callbr instruction!");
assert(isArgOperand(U) && "Argument # out of range!");
return U - arg_begin();
}
@@ -227,7 +234,7 @@ public:
/// Given a use for a data operand, get the data operand number that
/// corresponds to it.
unsigned getDataOperandNo(const Use *U) const {
- assert(getInstruction() && "Not a call or invoke instruction!");
+ assert(getInstruction() && "Not a call, invoke or callbr instruction!");
assert(isDataOperand(U) && "Data operand # out of range!");
return U - data_operands_begin();
}
@@ -237,18 +244,19 @@ public:
using data_operand_iterator = IterTy;
/// data_operands_begin/data_operands_end - Return iterators iterating over
- /// the call / invoke argument list and bundle operands. For invokes, this is
- /// the set of instruction operands except the invoke target and the two
- /// successor blocks; and for calls this is the set of instruction operands
- /// except the call target.
+ /// the call / invoke / callbr argument list and bundle operands. For invokes,
+ /// this is the set of instruction operands except the invoke target and the
+ /// two successor blocks; for calls this is the set of instruction operands
+ /// except the call target; for callbrs the number of labels to skip must be
+ /// determined first.
IterTy data_operands_begin() const {
assert(getInstruction() && "Not a call or invoke instruction!");
- return (*this)->op_begin();
+ return cast<CallBase>(getInstruction())->data_operands_begin();
}
IterTy data_operands_end() const {
assert(getInstruction() && "Not a call or invoke instruction!");
- return (*this)->op_end() - (isCall() ? 1 : 3);
+ return cast<CallBase>(getInstruction())->data_operands_end();
}
iterator_range<IterTy> data_ops() const {
return make_range(data_operands_begin(), data_operands_end());
@@ -277,17 +285,19 @@ public:
return isCall() && cast<CallInst>(getInstruction())->isTailCall();
}
-#define CALLSITE_DELEGATE_GETTER(METHOD) \
- InstrTy *II = getInstruction(); \
- return isCall() \
- ? cast<CallInst>(II)->METHOD \
- : cast<InvokeInst>(II)->METHOD
+#define CALLSITE_DELEGATE_GETTER(METHOD) \
+ InstrTy *II = getInstruction(); \
+ return isCall() ? cast<CallInst>(II)->METHOD \
+ : isCallBr() ? cast<CallBrInst>(II)->METHOD \
+ : cast<InvokeInst>(II)->METHOD
-#define CALLSITE_DELEGATE_SETTER(METHOD) \
- InstrTy *II = getInstruction(); \
- if (isCall()) \
- cast<CallInst>(II)->METHOD; \
- else \
+#define CALLSITE_DELEGATE_SETTER(METHOD) \
+ InstrTy *II = getInstruction(); \
+ if (isCall()) \
+ cast<CallInst>(II)->METHOD; \
+ else if (isCallBr()) \
+ cast<CallBrInst>(II)->METHOD; \
+ else \
cast<InvokeInst>(II)->METHOD
unsigned getNumArgOperands() const {
@@ -303,9 +313,7 @@ public:
}
bool isInlineAsm() const {
- if (isCall())
- return cast<CallInst>(getInstruction())->isInlineAsm();
- return false;
+ return cast<CallBase>(getInstruction())->isInlineAsm();
}
/// Get the calling convention of the call.
@@ -389,10 +397,10 @@ public:
/// Return true if the data operand at index \p i directly or indirectly has
/// the attribute \p A.
///
- /// Normal call or invoke arguments have per operand attributes, as specified
- /// in the attribute set attached to this instruction, while operand bundle
- /// operands may have some attributes implied by the type of its containing
- /// operand bundle.
+ /// Normal call, invoke or callbr arguments have per operand attributes, as
+ /// specified in the attribute set attached to this instruction, while operand
+ /// bundle operands may have some attributes implied by the type of its
+ /// containing operand bundle.
bool dataOperandHasImpliedAttr(unsigned i, Attribute::AttrKind Kind) const {
CALLSITE_DELEGATE_GETTER(dataOperandHasImpliedAttr(i, Kind));
}
@@ -407,6 +415,11 @@ public:
CALLSITE_DELEGATE_GETTER(getParamAlignment(ArgNo));
}
+ /// Extract the byval type for a call or parameter (nullptr=unknown).
+ Type *getParamByValType(unsigned ArgNo) const {
+ CALLSITE_DELEGATE_GETTER(getParamByValType(ArgNo));
+ }
+
/// Extract the number of dereferenceable bytes for a call or parameter
/// (0=unknown).
uint64_t getDereferenceableBytes(unsigned i) const {
@@ -580,13 +593,9 @@ public:
#undef CALLSITE_DELEGATE_SETTER
void getOperandBundlesAsDefs(SmallVectorImpl<OperandBundleDef> &Defs) const {
- const Instruction *II = getInstruction();
// Since this is actually a getter that "looks like" a setter, don't use the
// above macros to avoid confusion.
- if (isCall())
- cast<CallInst>(II)->getOperandBundlesAsDefs(Defs);
- else
- cast<InvokeInst>(II)->getOperandBundlesAsDefs(Defs);
+ cast<CallBase>(getInstruction())->getOperandBundlesAsDefs(Defs);
}
/// Determine whether this data operand is not captured.
@@ -662,12 +671,13 @@ private:
class CallSite : public CallSiteBase<Function, BasicBlock, Value, User, Use,
Instruction, CallInst, InvokeInst,
- User::op_iterator> {
+ CallBrInst, User::op_iterator> {
public:
CallSite() = default;
CallSite(CallSiteBase B) : CallSiteBase(B) {}
CallSite(CallInst *CI) : CallSiteBase(CI) {}
CallSite(InvokeInst *II) : CallSiteBase(II) {}
+ CallSite(CallBrInst *CBI) : CallSiteBase(CBI) {}
explicit CallSite(Instruction *II) : CallSiteBase(II) {}
explicit CallSite(Value *V) : CallSiteBase(V) {}
@@ -683,6 +693,182 @@ private:
User::op_iterator getCallee() const;
};
+/// AbstractCallSite
+///
+/// An abstract call site is a wrapper that allows to treat direct,
+/// indirect, and callback calls the same. If an abstract call site
+/// represents a direct or indirect call site it behaves like a stripped
+/// down version of a normal call site object. The abstract call site can
+/// also represent a callback call, thus the fact that the initially
+/// called function (=broker) may invoke a third one (=callback callee).
+/// In this case, the abstract call site hides the middle man, hence the
+/// broker function. The result is a representation of the callback call,
+/// inside the broker, but in the context of the original call to the broker.
+///
+/// There are up to three functions involved when we talk about callback call
+/// sites. The caller (1), which invokes the broker function. The broker
+/// function (2), that will invoke the callee zero or more times. And finally
+/// the callee (3), which is the target of the callback call.
+///
+/// The abstract call site will handle the mapping from parameters to arguments
+/// depending on the semantic of the broker function. However, it is important
+/// to note that the mapping is often partial. Thus, some arguments of the
+/// call/invoke instruction are mapped to parameters of the callee while others
+/// are not.
+class AbstractCallSite {
+public:
+
+ /// The encoding of a callback with regards to the underlying instruction.
+ struct CallbackInfo {
+
+ /// For direct/indirect calls the parameter encoding is empty. If it is not,
+ /// the abstract call site represents a callback. In that case, the first
+ /// element of the encoding vector represents which argument of the call
+ /// site CS is the callback callee. The remaining elements map parameters
+ /// (identified by their position) to the arguments that will be passed
+ /// through (also identified by position but in the call site instruction).
+ ///
+ /// NOTE that we use LLVM argument numbers (starting at 0) and not
+ /// clang/source argument numbers (starting at 1). The -1 entries represent
+ /// unknown values that are passed to the callee.
+ using ParameterEncodingTy = SmallVector<int, 0>;
+ ParameterEncodingTy ParameterEncoding;
+
+ };
+
+private:
+
+ /// The underlying call site:
+ /// caller -> callee, if this is a direct or indirect call site
+ /// caller -> broker function, if this is a callback call site
+ CallSite CS;
+
+ /// The encoding of a callback with regards to the underlying instruction.
+ CallbackInfo CI;
+
+public:
+ /// Sole constructor for abstract call sites (ACS).
+ ///
+ /// An abstract call site can only be constructed through a llvm::Use because
+ /// each operand (=use) of an instruction could potentially be a different
+ /// abstract call site. Furthermore, even if the value of the llvm::Use is the
+ /// same, and the user is as well, the abstract call sites might not be.
+ ///
+ /// If a use is not associated with an abstract call site the constructed ACS
+ /// will evaluate to false if converted to a boolean.
+ ///
+ /// If the use is the callee use of a call or invoke instruction, the
+ /// constructed abstract call site will behave as a llvm::CallSite would.
+ ///
+ /// If the use is not a callee use of a call or invoke instruction, the
+ /// callback metadata is used to determine the argument <-> parameter mapping
+ /// as well as the callee of the abstract call site.
+ AbstractCallSite(const Use *U);
+
+ /// Conversion operator to conveniently check for a valid/initialized ACS.
+ explicit operator bool() const { return (bool)CS; }
+
+ /// Return the underlying instruction.
+ Instruction *getInstruction() const { return CS.getInstruction(); }
+
+ /// Return the call site abstraction for the underlying instruction.
+ CallSite getCallSite() const { return CS; }
+
+ /// Return true if this ACS represents a direct call.
+ bool isDirectCall() const {
+ return !isCallbackCall() && !CS.isIndirectCall();
+ }
+
+ /// Return true if this ACS represents an indirect call.
+ bool isIndirectCall() const {
+ return !isCallbackCall() && CS.isIndirectCall();
+ }
+
+ /// Return true if this ACS represents a callback call.
+ bool isCallbackCall() const {
+ // For a callback call site the callee is ALWAYS stored first in the
+ // transitive values vector. Thus, a non-empty vector indicates a callback.
+ return !CI.ParameterEncoding.empty();
+ }
+
+ /// Return true if @p UI is the use that defines the callee of this ACS.
+ bool isCallee(Value::const_user_iterator UI) const {
+ return isCallee(&UI.getUse());
+ }
+
+ /// Return true if @p U is the use that defines the callee of this ACS.
+ bool isCallee(const Use *U) const {
+ if (isDirectCall())
+ return CS.isCallee(U);
+
+ assert(!CI.ParameterEncoding.empty() &&
+ "Callback without parameter encoding!");
+
+ return (int)CS.getArgumentNo(U) == CI.ParameterEncoding[0];
+ }
+
+ /// Return the number of parameters of the callee.
+ unsigned getNumArgOperands() const {
+ if (isDirectCall())
+ return CS.getNumArgOperands();
+ // Subtract 1 for the callee encoding.
+ return CI.ParameterEncoding.size() - 1;
+ }
+
+ /// Return the operand index of the underlying instruction associated with @p
+ /// Arg.
+ int getCallArgOperandNo(Argument &Arg) const {
+ return getCallArgOperandNo(Arg.getArgNo());
+ }
+
+ /// Return the operand index of the underlying instruction associated with
+ /// the function parameter number @p ArgNo or -1 if there is none.
+ int getCallArgOperandNo(unsigned ArgNo) const {
+ if (isDirectCall())
+ return ArgNo;
+ // Add 1 for the callee encoding.
+ return CI.ParameterEncoding[ArgNo + 1];
+ }
+
+ /// Return the operand of the underlying instruction associated with @p Arg.
+ Value *getCallArgOperand(Argument &Arg) const {
+ return getCallArgOperand(Arg.getArgNo());
+ }
+
+ /// Return the operand of the underlying instruction associated with the
+ /// function parameter number @p ArgNo or nullptr if there is none.
+ Value *getCallArgOperand(unsigned ArgNo) const {
+ if (isDirectCall())
+ return CS.getArgOperand(ArgNo);
+ // Add 1 for the callee encoding.
+ return CI.ParameterEncoding[ArgNo + 1] >= 0
+ ? CS.getArgOperand(CI.ParameterEncoding[ArgNo + 1])
+ : nullptr;
+ }
+
+ /// Return the operand index of the underlying instruction associated with the
+ /// callee of this ACS. Only valid for callback calls!
+ int getCallArgOperandNoForCallee() const {
+ assert(isCallbackCall());
+ assert(CI.ParameterEncoding.size() && CI.ParameterEncoding[0] > 0);
+ return CI.ParameterEncoding[0];
+ }
+
+ /// Return the pointer to function that is being called.
+ Value *getCalledValue() const {
+ if (isDirectCall())
+ return CS.getCalledValue();
+ return CS.getArgOperand(getCallArgOperandNoForCallee());
+ }
+
+ /// Return the function being called if this is a direct call, otherwise
+ /// return null (if it's an indirect call).
+ Function *getCalledFunction() const {
+ Value *V = getCalledValue();
+ return V ? dyn_cast<Function>(V->stripPointerCasts()) : nullptr;
+ }
+};
+
template <> struct DenseMapInfo<CallSite> {
using BaseInfo = DenseMapInfo<decltype(CallSite::I)>;
@@ -713,6 +899,7 @@ public:
ImmutableCallSite() = default;
ImmutableCallSite(const CallInst *CI) : CallSiteBase(CI) {}
ImmutableCallSite(const InvokeInst *II) : CallSiteBase(II) {}
+ ImmutableCallSite(const CallBrInst *CBI) : CallSiteBase(CBI) {}
explicit ImmutableCallSite(const Instruction *II) : CallSiteBase(II) {}
explicit ImmutableCallSite(const Value *V) : CallSiteBase(V) {}
ImmutableCallSite(CallSite CS) : CallSiteBase(CS.getInstruction()) {}
diff --git a/include/llvm/IR/CallingConv.h b/include/llvm/IR/CallingConv.h
index 49c3be960373..399c6ad521fa 100644
--- a/include/llvm/IR/CallingConv.h
+++ b/include/llvm/IR/CallingConv.h
@@ -1,9 +1,8 @@
//===- llvm/CallingConv.h - LLVM Calling Conventions ------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/IR/Comdat.h b/include/llvm/IR/Comdat.h
index 555121e928f7..f712a16dd318 100644
--- a/include/llvm/IR/Comdat.h
+++ b/include/llvm/IR/Comdat.h
@@ -1,9 +1,8 @@
//===- llvm/IR/Comdat.h - Comdat definitions --------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/IR/Constant.h b/include/llvm/IR/Constant.h
index 98437f8eff1f..931576651224 100644
--- a/include/llvm/IR/Constant.h
+++ b/include/llvm/IR/Constant.h
@@ -1,9 +1,8 @@
//===-- llvm/Constant.h - Constant class definition -------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -91,6 +90,10 @@ public:
/// elements.
bool containsUndefElement() const;
+ /// Return true if this is a vector constant that includes any constant
+ /// expressions.
+ bool containsConstantExpression() const;
+
/// Return true if evaluation of this constant could trap. This is true for
/// things like constant expressions that could divide by zero.
bool canTrap() const;
diff --git a/include/llvm/IR/ConstantFolder.h b/include/llvm/IR/ConstantFolder.h
index da5bba7ba141..5a5cabfd0206 100644
--- a/include/llvm/IR/ConstantFolder.h
+++ b/include/llvm/IR/ConstantFolder.h
@@ -1,9 +1,8 @@
//===- ConstantFolder.h - Constant folding helper ---------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -135,6 +134,10 @@ public:
return ConstantExpr::getNot(C);
}
+ Constant *CreateUnOp(Instruction::UnaryOps Opc, Constant *C) const {
+ return ConstantExpr::get(Opc, C);
+ }
+
//===--------------------------------------------------------------------===//
// Memory Instructions
//===--------------------------------------------------------------------===//
diff --git a/include/llvm/IR/ConstantRange.h b/include/llvm/IR/ConstantRange.h
index 1adda3269abc..91f3f31abe17 100644
--- a/include/llvm/IR/ConstantRange.h
+++ b/include/llvm/IR/ConstantRange.h
@@ -1,9 +1,8 @@
//===- ConstantRange.h - Represent a range ----------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -42,14 +41,25 @@ namespace llvm {
class MDNode;
class raw_ostream;
+struct KnownBits;
/// This class represents a range of values.
class LLVM_NODISCARD ConstantRange {
APInt Lower, Upper;
+ /// Create empty constant range with same bitwidth.
+ ConstantRange getEmpty() const {
+ return ConstantRange(getBitWidth(), false);
+ }
+
+ /// Create full constant range with same bitwidth.
+ ConstantRange getFull() const {
+ return ConstantRange(getBitWidth(), true);
+ }
+
public:
- /// Initialize a full (the default) or empty set for the specified bit width.
- explicit ConstantRange(uint32_t BitWidth, bool isFullSet = true);
+ /// Initialize a full or empty set for the specified bit width.
+ explicit ConstantRange(uint32_t BitWidth, bool isFullSet);
/// Initialize a range to hold the single specified value.
ConstantRange(APInt Value);
@@ -59,6 +69,29 @@ public:
/// assert out if the two APInt's are not the same bit width.
ConstantRange(APInt Lower, APInt Upper);
+ /// Create empty constant range with the given bit width.
+ static ConstantRange getEmpty(uint32_t BitWidth) {
+ return ConstantRange(BitWidth, false);
+ }
+
+ /// Create full constant range with the given bit width.
+ static ConstantRange getFull(uint32_t BitWidth) {
+ return ConstantRange(BitWidth, true);
+ }
+
+ /// Create non-empty constant range with the given bounds. If Lower and
+ /// Upper are the same, a full range is returned.
+ static ConstantRange getNonEmpty(APInt Lower, APInt Upper) {
+ if (Lower == Upper)
+ return getFull(Lower.getBitWidth());
+ return ConstantRange(std::move(Lower), std::move(Upper));
+ }
+
+ /// Initialize a range based on a known bits constraint. The IsSigned flag
+ /// indicates whether the constant range should not wrap in the signed or
+ /// unsigned domain.
+ static ConstantRange fromKnownBits(const KnownBits &Known, bool IsSigned);
+
/// Produce the smallest range such that all values that may satisfy the given
/// predicate with any value contained within Other is contained in the
/// returned range. Formally, this returns a superset of
@@ -91,14 +124,12 @@ public:
static ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred,
const APInt &Other);
- /// Return the largest range containing all X such that "X BinOpC Y" is
- /// guaranteed not to wrap (overflow) for all Y in Other.
+ /// Produce the largest range containing all X such that "X BinOp Y" is
+ /// guaranteed not to wrap (overflow) for *all* Y in Other. However, there may
+ /// be *some* Y in Other for which additional X not contained in the result
+ /// also do not overflow.
///
- /// NB! The returned set does *not* contain **all** possible values of X for
- /// which "X BinOpC Y" does not wrap -- some viable values of X may be
- /// missing, so you cannot use this to constrain X's range. E.g. in the
- /// fourth example, "(-2) + 1" is both nsw and nuw (so the "X" could be -2),
- /// but (-2) is not in the set returned.
+ /// NoWrapKind must be one of OBO::NoUnsignedWrap or OBO::NoSignedWrap.
///
/// Examples:
/// typedef OverflowingBinaryOperator OBO;
@@ -106,17 +137,19 @@ public:
/// MGNR(Add, [i8 1, 2), OBO::NoSignedWrap) == [-128, 127)
/// MGNR(Add, [i8 1, 2), OBO::NoUnsignedWrap) == [0, -1)
/// MGNR(Add, [i8 0, 1), OBO::NoUnsignedWrap) == Full Set
- /// MGNR(Add, [i8 1, 2), OBO::NoUnsignedWrap | OBO::NoSignedWrap)
- /// == [0,INT_MAX)
/// MGNR(Add, [i8 -1, 6), OBO::NoSignedWrap) == [INT_MIN+1, INT_MAX-4)
/// MGNR(Sub, [i8 1, 2), OBO::NoSignedWrap) == [-127, 128)
/// MGNR(Sub, [i8 1, 2), OBO::NoUnsignedWrap) == [1, 0)
- /// MGNR(Sub, [i8 1, 2), OBO::NoUnsignedWrap | OBO::NoSignedWrap)
- /// == [1,INT_MAX)
static ConstantRange makeGuaranteedNoWrapRegion(Instruction::BinaryOps BinOp,
const ConstantRange &Other,
unsigned NoWrapKind);
+ /// Produce the range that contains X if and only if "X BinOp Other" does
+ /// not wrap.
+ static ConstantRange makeExactNoWrapRegion(Instruction::BinaryOps BinOp,
+ const APInt &Other,
+ unsigned NoWrapKind);
+
/// Set up \p Pred and \p RHS such that
/// ConstantRange::makeExactICmpRegion(Pred, RHS) == *this. Return true if
/// successful.
@@ -138,14 +171,32 @@ public:
/// Return true if this set contains no members.
bool isEmptySet() const;
- /// Return true if this set wraps around the top of the range.
- /// For example: [100, 8).
+ /// Return true if this set wraps around the unsigned domain. Special cases:
+ /// * Empty set: Not wrapped.
+ /// * Full set: Not wrapped.
+ /// * [X, 0) == [X, Max]: Not wrapped.
bool isWrappedSet() const;
- /// Return true if this set wraps around the INT_MIN of
- /// its bitwidth. For example: i8 [120, 140).
+ /// Return true if the exclusive upper bound wraps around the unsigned
+ /// domain. Special cases:
+ /// * Empty set: Not wrapped.
+ /// * Full set: Not wrapped.
+ /// * [X, 0): Wrapped.
+ bool isUpperWrapped() const;
+
+ /// Return true if this set wraps around the signed domain. Special cases:
+ /// * Empty set: Not wrapped.
+ /// * Full set: Not wrapped.
+ /// * [X, SignedMin) == [X, SignedMax]: Not wrapped.
bool isSignWrappedSet() const;
+ /// Return true if the (exclusive) upper bound wraps around the signed
+ /// domain. Special cases:
+ /// * Empty set: Not wrapped.
+ /// * Full set: Not wrapped.
+ /// * [X, SignedMin): Wrapped.
+ bool isUpperSignWrapped() const;
+
/// Return true if the specified value is in the set.
bool contains(const APInt &Val) const;
@@ -170,15 +221,18 @@ public:
/// Return true if this set contains exactly one member.
bool isSingleElement() const { return getSingleElement() != nullptr; }
- /// Return the number of elements in this set.
- APInt getSetSize() const;
-
/// Compare set size of this range with the range CR.
bool isSizeStrictlySmallerThan(const ConstantRange &CR) const;
- // Compare set size of this range with Value.
+ /// Compare set size of this range with Value.
bool isSizeLargerThan(uint64_t MaxSize) const;
+ /// Return true if all values in this range are negative.
+ bool isAllNegative() const;
+
+ /// Return true if all values in this range are non-negative.
+ bool isAllNonNegative() const;
+
/// Return the largest unsigned value contained in the ConstantRange.
APInt getUnsignedMax() const;
@@ -206,20 +260,30 @@ public:
/// the sets).
ConstantRange difference(const ConstantRange &CR) const;
- /// Return the range that results from the intersection of
- /// this range with another range. The resultant range is guaranteed to
- /// include all elements contained in both input ranges, and to have the
- /// smallest possible set size that does so. Because there may be two
- /// intersections with the same set size, A.intersectWith(B) might not
- /// be equal to B.intersectWith(A).
- ConstantRange intersectWith(const ConstantRange &CR) const;
+ /// If represented precisely, the result of some range operations may consist
+ /// of multiple disjoint ranges. As only a single range may be returned, any
+ /// range covering these disjoint ranges constitutes a valid result, but some
+ /// may be more useful than others depending on context. The preferred range
+ /// type specifies whether a range that is non-wrapping in the unsigned or
+ /// signed domain, or has the smallest size, is preferred. If a signedness is
+ /// preferred but all ranges are non-wrapping or all wrapping, then the
+ /// smallest set size is preferred. If there are multiple smallest sets, any
+ /// one of them may be returned.
+ enum PreferredRangeType { Smallest, Unsigned, Signed };
+
+ /// Return the range that results from the intersection of this range with
+ /// another range. If the intersection is disjoint, such that two results
+ /// are possible, the preferred range is determined by the PreferredRangeType.
+ ConstantRange intersectWith(const ConstantRange &CR,
+ PreferredRangeType Type = Smallest) const;
/// Return the range that results from the union of this range
/// with another range. The resultant range is guaranteed to include the
/// elements of both sets, but may contain more. For example, [3, 9) union
/// [12,15) is [3, 15), which includes 9, 10, and 11, which were not included
/// in either set before.
- ConstantRange unionWith(const ConstantRange &CR) const;
+ ConstantRange unionWith(const ConstantRange &CR,
+ PreferredRangeType Type = Smallest) const;
/// Return a new range representing the possible values resulting
/// from an application of the specified cast operator to this range. \p
@@ -301,6 +365,23 @@ public:
ConstantRange udiv(const ConstantRange &Other) const;
/// Return a new range representing the possible values resulting
+ /// from a signed division of a value in this range and a value in
+ /// \p Other. Division by zero and division of SignedMin by -1 are considered
+ /// undefined behavior, in line with IR, and do not contribute towards the
+ /// result.
+ ConstantRange sdiv(const ConstantRange &Other) const;
+
+ /// Return a new range representing the possible values resulting
+ /// from an unsigned remainder operation of a value in this range and a
+ /// value in \p Other.
+ ConstantRange urem(const ConstantRange &Other) const;
+
+ /// Return a new range representing the possible values resulting
+ /// from a signed remainder operation of a value in this range and a
+ /// value in \p Other.
+ ConstantRange srem(const ConstantRange &Other) const;
+
+ /// Return a new range representing the possible values resulting
/// from a binary-and of a value in this range by a value in \p Other.
ConstantRange binaryAnd(const ConstantRange &Other) const;
@@ -321,9 +402,53 @@ public:
/// arithmetic right shift of a value in this range and a value in \p Other.
ConstantRange ashr(const ConstantRange &Other) const;
+ /// Perform an unsigned saturating addition of two constant ranges.
+ ConstantRange uadd_sat(const ConstantRange &Other) const;
+
+ /// Perform a signed saturating addition of two constant ranges.
+ ConstantRange sadd_sat(const ConstantRange &Other) const;
+
+ /// Perform an unsigned saturating subtraction of two constant ranges.
+ ConstantRange usub_sat(const ConstantRange &Other) const;
+
+ /// Perform a signed saturating subtraction of two constant ranges.
+ ConstantRange ssub_sat(const ConstantRange &Other) const;
+
/// Return a new range that is the logical not of the current set.
ConstantRange inverse() const;
+ /// Calculate absolute value range. If the original range contains signed
+ /// min, then the resulting range will also contain signed min.
+ ConstantRange abs() const;
+
+ /// Represents whether an operation on the given constant range is known to
+ /// always or never overflow.
+ enum class OverflowResult {
+ /// Always overflows in the direction of signed/unsigned min value.
+ AlwaysOverflowsLow,
+ /// Always overflows in the direction of signed/unsigned max value.
+ AlwaysOverflowsHigh,
+ /// May or may not overflow.
+ MayOverflow,
+ /// Never overflows.
+ NeverOverflows,
+ };
+
+ /// Return whether unsigned add of the two ranges always/never overflows.
+ OverflowResult unsignedAddMayOverflow(const ConstantRange &Other) const;
+
+ /// Return whether signed add of the two ranges always/never overflows.
+ OverflowResult signedAddMayOverflow(const ConstantRange &Other) const;
+
+ /// Return whether unsigned sub of the two ranges always/never overflows.
+ OverflowResult unsignedSubMayOverflow(const ConstantRange &Other) const;
+
+ /// Return whether signed sub of the two ranges always/never overflows.
+ OverflowResult signedSubMayOverflow(const ConstantRange &Other) const;
+
+ /// Return whether unsigned mul of the two ranges always/never overflows.
+ OverflowResult unsignedMulMayOverflow(const ConstantRange &Other) const;
+
/// Print out the bounds to a stream.
void print(raw_ostream &OS) const;
diff --git a/include/llvm/IR/Constants.h b/include/llvm/IR/Constants.h
index afc93cd61d47..ca56e8b9328c 100644
--- a/include/llvm/IR/Constants.h
+++ b/include/llvm/IR/Constants.h
@@ -1,9 +1,8 @@
//===-- llvm/Constants.h - Constant class subclass definitions --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/IR/DIBuilder.h b/include/llvm/IR/DIBuilder.h
index 443332b1b23c..ad9a35b55414 100644
--- a/include/llvm/IR/DIBuilder.h
+++ b/include/llvm/IR/DIBuilder.h
@@ -1,9 +1,8 @@
//===- DIBuilder.h - Debug Information Builder ------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -707,6 +706,16 @@ namespace llvm {
DITemplateParameterArray TParams = nullptr,
DITypeArray ThrownTypes = nullptr);
+ /// Create common block entry for a Fortran common block.
+ /// \param Scope Scope of this common block.
+ /// \param decl Global variable declaration.
+ /// \param Name The name of this common block.
+ /// \param File The file this common block is defined.
+ /// \param LineNo Line number.
+ DICommonBlock *createCommonBlock(DIScope *Scope, DIGlobalVariable *decl,
+ StringRef Name, DIFile *File,
+ unsigned LineNo);
+
/// This creates new descriptor for a namespace with the specified
/// parent scope.
/// \param Scope Namespace scope
diff --git a/include/llvm/IR/DataLayout.h b/include/llvm/IR/DataLayout.h
index c144d1c13c34..ac9770a15120 100644
--- a/include/llvm/IR/DataLayout.h
+++ b/include/llvm/IR/DataLayout.h
@@ -1,9 +1,8 @@
//===- llvm/DataLayout.h - Data size & alignment info -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -109,6 +108,13 @@ struct PointerAlignElem {
/// generating LLVM IR is required to generate the right target data for the
/// target being codegen'd to.
class DataLayout {
+public:
+ enum class FunctionPtrAlignType {
+ /// The function pointer alignment is independent of the function alignment.
+ Independent,
+ /// The function pointer alignment is a multiple of the function alignment.
+ MultipleOfFunctionAlign,
+ };
private:
/// Defaults to false.
bool BigEndian;
@@ -117,6 +123,9 @@ private:
unsigned StackNaturalAlign;
unsigned ProgramAddrSpace;
+ unsigned FunctionPtrAlign;
+ FunctionPtrAlignType TheFunctionPtrAlignType;
+
enum ManglingModeT {
MM_None,
MM_ELF,
@@ -200,6 +209,8 @@ public:
BigEndian = DL.isBigEndian();
AllocaAddrSpace = DL.AllocaAddrSpace;
StackNaturalAlign = DL.StackNaturalAlign;
+ FunctionPtrAlign = DL.FunctionPtrAlign;
+ TheFunctionPtrAlignType = DL.TheFunctionPtrAlignType;
ProgramAddrSpace = DL.ProgramAddrSpace;
ManglingMode = DL.ManglingMode;
LegalIntWidths = DL.LegalIntWidths;
@@ -257,6 +268,17 @@ public:
unsigned getStackAlignment() const { return StackNaturalAlign; }
unsigned getAllocaAddrSpace() const { return AllocaAddrSpace; }
+ /// Returns the alignment of function pointers, which may or may not be
+ /// related to the alignment of functions.
+ /// \see getFunctionPtrAlignType
+ unsigned getFunctionPtrAlign() const { return FunctionPtrAlign; }
+
+ /// Return the type of function pointer alignment.
+ /// \see getFunctionPtrAlign
+ FunctionPtrAlignType getFunctionPtrAlignType() const {
+ return TheFunctionPtrAlignType;
+ }
+
unsigned getProgramAddressSpace() const { return ProgramAddrSpace; }
bool hasMicrosoftFastStdCallMangling() const {
@@ -346,10 +368,13 @@ public:
return NonIntegralAddressSpaces;
}
- bool isNonIntegralPointerType(PointerType *PT) const {
+ bool isNonIntegralAddressSpace(unsigned AddrSpace) const {
ArrayRef<unsigned> NonIntegralSpaces = getNonIntegralAddressSpaces();
- return find(NonIntegralSpaces, PT->getAddressSpace()) !=
- NonIntegralSpaces.end();
+ return find(NonIntegralSpaces, AddrSpace) != NonIntegralSpaces.end();
+ }
+
+ bool isNonIntegralPointerType(PointerType *PT) const {
+ return isNonIntegralAddressSpace(PT->getAddressSpace());
}
bool isNonIntegralPointerType(Type *Ty) const {
@@ -428,6 +453,14 @@ public:
return 8 * getTypeStoreSize(Ty);
}
+ /// Returns true if no extra padding bits are needed when storing the
+ /// specified type.
+ ///
+ /// For example, returns false for i19 that has a 24-bit store size.
+ bool typeSizeEqualsStoreSize(Type *Ty) const {
+ return getTypeSizeInBits(Ty) == getTypeStoreSizeInBits(Ty);
+ }
+
/// Returns the offset in bytes between successive objects of the
/// specified type, including alignment padding.
///
diff --git a/include/llvm/IR/DebugInfo.h b/include/llvm/IR/DebugInfo.h
index 01178af3c9ff..171e1621889f 100644
--- a/include/llvm/IR/DebugInfo.h
+++ b/include/llvm/IR/DebugInfo.h
@@ -1,9 +1,8 @@
//===- DebugInfo.h - Debug Information Helpers ------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/IR/DebugInfoFlags.def b/include/llvm/IR/DebugInfoFlags.def
index ce117aa452aa..07e3d6bdc9e5 100644
--- a/include/llvm/IR/DebugInfoFlags.def
+++ b/include/llvm/IR/DebugInfoFlags.def
@@ -1,9 +1,8 @@
//===- llvm/IR/DebugInfoFlags.def - Debug info flag definitions -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -51,12 +50,12 @@ HANDLE_DI_FLAG((3 << 16), VirtualInheritance)
HANDLE_DI_FLAG((1 << 18), IntroducedVirtual)
HANDLE_DI_FLAG((1 << 19), BitField)
HANDLE_DI_FLAG((1 << 20), NoReturn)
-HANDLE_DI_FLAG((1 << 21), MainSubprogram)
+HANDLE_DI_FLAG((1 << 21), ArgumentNotModified)
HANDLE_DI_FLAG((1 << 22), TypePassByValue)
HANDLE_DI_FLAG((1 << 23), TypePassByReference)
HANDLE_DI_FLAG((1 << 24), EnumClass)
HANDLE_DI_FLAG((1 << 25), Thunk)
-HANDLE_DI_FLAG((1 << 26), Trivial)
+HANDLE_DI_FLAG((1 << 26), NonTrivial)
HANDLE_DI_FLAG((1 << 27), BigEndian)
HANDLE_DI_FLAG((1 << 28), LittleEndian)
HANDLE_DI_FLAG((1 << 29), AllCallsDescribed)
@@ -85,11 +84,15 @@ HANDLE_DISP_FLAG(2u, PureVirtual)
HANDLE_DISP_FLAG((1u << 2), LocalToUnit)
HANDLE_DISP_FLAG((1u << 3), Definition)
HANDLE_DISP_FLAG((1u << 4), Optimized)
+HANDLE_DISP_FLAG((1u << 5), Pure)
+HANDLE_DISP_FLAG((1u << 6), Elemental)
+HANDLE_DISP_FLAG((1u << 7), Recursive)
+HANDLE_DISP_FLAG((1u << 8), MainSubprogram)
#ifdef DISP_FLAG_LARGEST_NEEDED
// Intended to be used with ADT/BitmaskEnum.h.
// NOTE: Always must be equal to largest flag, check this when adding new flags.
-HANDLE_DISP_FLAG((1 << 4), Largest)
+HANDLE_DISP_FLAG((1 << 8), Largest)
#undef DISP_FLAG_LARGEST_NEEDED
#endif
diff --git a/include/llvm/IR/DebugInfoMetadata.h b/include/llvm/IR/DebugInfoMetadata.h
index a461d1bd4fe8..9dc6dfbb0f68 100644
--- a/include/llvm/IR/DebugInfoMetadata.h
+++ b/include/llvm/IR/DebugInfoMetadata.h
@@ -1,9 +1,8 @@
//===- llvm/IR/DebugInfoMetadata.h - Debug info metadata --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -61,44 +60,6 @@
namespace llvm {
-/// Holds a subclass of DINode.
-///
-/// FIXME: This class doesn't currently make much sense. Previously it was a
-/// union beteen MDString (for ODR-uniqued types) and things like DIType. To
-/// support CodeView work, it wasn't deleted outright when MDString-based type
-/// references were deleted; we'll soon need a similar concept for CodeView
-/// DITypeIndex.
-template <class T> class TypedDINodeRef {
- const Metadata *MD = nullptr;
-
-public:
- TypedDINodeRef() = default;
- TypedDINodeRef(std::nullptr_t) {}
- TypedDINodeRef(const T *MD) : MD(MD) {}
-
- explicit TypedDINodeRef(const Metadata *MD) : MD(MD) {
- assert((!MD || isa<T>(MD)) && "Expected valid type ref");
- }
-
- template <class U>
- TypedDINodeRef(
- const TypedDINodeRef<U> &X,
- typename std::enable_if<std::is_convertible<U *, T *>::value>::type * =
- nullptr)
- : MD(X) {}
-
- operator Metadata *() const { return const_cast<Metadata *>(MD); }
-
- T *resolve() const { return const_cast<T *>(cast_or_null<T>(MD)); }
-
- bool operator==(const TypedDINodeRef<T> &X) const { return MD == X.MD; }
- bool operator!=(const TypedDINodeRef<T> &X) const { return MD != X.MD; }
-};
-
-using DINodeRef = TypedDINodeRef<DINode>;
-using DIScopeRef = TypedDINodeRef<DIScope>;
-using DITypeRef = TypedDINodeRef<DIType>;
-
class DITypeRefArray {
const MDTuple *N = nullptr;
@@ -115,17 +76,19 @@ public:
// FIXME: Fix callers and remove condition on N.
unsigned size() const { return N ? N->getNumOperands() : 0u; }
- DITypeRef operator[](unsigned I) const { return DITypeRef(N->getOperand(I)); }
+ DIType *operator[](unsigned I) const {
+ return cast_or_null<DIType>(N->getOperand(I));
+ }
- class iterator : std::iterator<std::input_iterator_tag, DITypeRef,
- std::ptrdiff_t, void, DITypeRef> {
+ class iterator : std::iterator<std::input_iterator_tag, DIType *,
+ std::ptrdiff_t, void, DIType *> {
MDNode::op_iterator I = nullptr;
public:
iterator() = default;
explicit iterator(MDNode::op_iterator I) : I(I) {}
- DITypeRef operator*() const { return DITypeRef(*I); }
+ DIType *operator*() const { return cast_or_null<DIType>(*I); }
iterator &operator++() {
++I;
@@ -228,6 +191,7 @@ public:
case DILexicalBlockKind:
case DILexicalBlockFileKind:
case DINamespaceKind:
+ case DICommonBlockKind:
case DITemplateTypeParameterKind:
case DITemplateValueParameterKind:
case DIGlobalVariableKind:
@@ -241,18 +205,6 @@ public:
}
};
-template <class T> struct simplify_type<const TypedDINodeRef<T>> {
- using SimpleType = Metadata *;
-
- static SimpleType getSimplifiedValue(const TypedDINodeRef<T> &MD) {
- return MD;
- }
-};
-
-template <class T>
-struct simplify_type<TypedDINodeRef<T>>
- : simplify_type<const TypedDINodeRef<T>> {};
-
/// Generic tagged DWARF-like metadata node.
///
/// An un-specialized DWARF-like metadata node. The first operand is a
@@ -459,7 +411,7 @@ public:
inline Optional<StringRef> getSource() const;
StringRef getName() const;
- DIScopeRef getScope() const;
+ DIScope *getScope() const;
/// Return the raw underlying file.
///
@@ -486,6 +438,7 @@ public:
case DILexicalBlockKind:
case DILexicalBlockFileKind:
case DINamespaceKind:
+ case DICommonBlockKind:
case DIModuleKind:
return true;
}
@@ -672,7 +625,7 @@ public:
uint64_t getOffsetInBits() const { return OffsetInBits; }
DIFlags getFlags() const { return Flags; }
- DIScopeRef getScope() const { return DIScopeRef(getRawScope()); }
+ DIScope *getScope() const { return cast_or_null<DIScope>(getRawScope()); }
StringRef getName() const { return getStringOperand(2); }
@@ -817,14 +770,12 @@ class DIDerivedType : public DIType {
DWARFAddressSpace(DWARFAddressSpace) {}
~DIDerivedType() = default;
- static DIDerivedType *getImpl(LLVMContext &Context, unsigned Tag,
- StringRef Name, DIFile *File, unsigned Line,
- DIScopeRef Scope, DITypeRef BaseType,
- uint64_t SizeInBits, uint32_t AlignInBits,
- uint64_t OffsetInBits,
- Optional<unsigned> DWARFAddressSpace,
- DIFlags Flags, Metadata *ExtraData,
- StorageType Storage, bool ShouldCreate = true) {
+ static DIDerivedType *
+ getImpl(LLVMContext &Context, unsigned Tag, StringRef Name, DIFile *File,
+ unsigned Line, DIScope *Scope, DIType *BaseType, uint64_t SizeInBits,
+ uint32_t AlignInBits, uint64_t OffsetInBits,
+ Optional<unsigned> DWARFAddressSpace, DIFlags Flags,
+ Metadata *ExtraData, StorageType Storage, bool ShouldCreate = true) {
return getImpl(Context, Tag, getCanonicalMDString(Context, Name), File,
Line, Scope, BaseType, SizeInBits, AlignInBits, OffsetInBits,
DWARFAddressSpace, Flags, ExtraData, Storage, ShouldCreate);
@@ -858,7 +809,7 @@ public:
ExtraData))
DEFINE_MDNODE_GET(DIDerivedType,
(unsigned Tag, StringRef Name, DIFile *File, unsigned Line,
- DIScopeRef Scope, DITypeRef BaseType, uint64_t SizeInBits,
+ DIScope *Scope, DIType *BaseType, uint64_t SizeInBits,
uint32_t AlignInBits, uint64_t OffsetInBits,
Optional<unsigned> DWARFAddressSpace, DIFlags Flags,
Metadata *ExtraData = nullptr),
@@ -869,7 +820,7 @@ public:
TempDIDerivedType clone() const { return cloneImpl(); }
/// Get the base type this is derived from.
- DITypeRef getBaseType() const { return DITypeRef(getRawBaseType()); }
+ DIType *getBaseType() const { return cast_or_null<DIType>(getRawBaseType()); }
Metadata *getRawBaseType() const { return getOperand(3); }
/// \returns The DWARF address space of the memory pointed to or referenced by
@@ -889,9 +840,9 @@ public:
/// Get casted version of extra data.
/// @{
- DITypeRef getClassType() const {
+ DIType *getClassType() const {
assert(getTag() == dwarf::DW_TAG_ptr_to_member_type);
- return DITypeRef(getExtraData());
+ return cast_or_null<DIType>(getExtraData());
}
DIObjCProperty *getObjCProperty() const {
@@ -963,12 +914,12 @@ class DICompositeType : public DIType {
static DICompositeType *
getImpl(LLVMContext &Context, unsigned Tag, StringRef Name, Metadata *File,
- unsigned Line, DIScopeRef Scope, DITypeRef BaseType,
- uint64_t SizeInBits, uint32_t AlignInBits, uint64_t OffsetInBits,
- DIFlags Flags, DINodeArray Elements, unsigned RuntimeLang,
- DITypeRef VTableHolder, DITemplateParameterArray TemplateParams,
- StringRef Identifier, DIDerivedType *Discriminator,
- StorageType Storage, bool ShouldCreate = true) {
+ unsigned Line, DIScope *Scope, DIType *BaseType, uint64_t SizeInBits,
+ uint32_t AlignInBits, uint64_t OffsetInBits, DIFlags Flags,
+ DINodeArray Elements, unsigned RuntimeLang, DIType *VTableHolder,
+ DITemplateParameterArray TemplateParams, StringRef Identifier,
+ DIDerivedType *Discriminator, StorageType Storage,
+ bool ShouldCreate = true) {
return getImpl(
Context, Tag, getCanonicalMDString(Context, Name), File, Line, Scope,
BaseType, SizeInBits, AlignInBits, OffsetInBits, Flags, Elements.get(),
@@ -995,12 +946,13 @@ class DICompositeType : public DIType {
public:
DEFINE_MDNODE_GET(DICompositeType,
(unsigned Tag, StringRef Name, DIFile *File, unsigned Line,
- DIScopeRef Scope, DITypeRef BaseType, uint64_t SizeInBits,
- uint32_t AlignInBits, uint64_t OffsetInBits,
- DIFlags Flags, DINodeArray Elements, unsigned RuntimeLang,
- DITypeRef VTableHolder,
+ DIScope *Scope, DIType *BaseType, uint64_t SizeInBits,
+ uint32_t AlignInBits, uint64_t OffsetInBits, DIFlags Flags,
+ DINodeArray Elements, unsigned RuntimeLang,
+ DIType *VTableHolder,
DITemplateParameterArray TemplateParams = nullptr,
- StringRef Identifier = "", DIDerivedType *Discriminator = nullptr),
+ StringRef Identifier = "",
+ DIDerivedType *Discriminator = nullptr),
(Tag, Name, File, Line, Scope, BaseType, SizeInBits,
AlignInBits, OffsetInBits, Flags, Elements, RuntimeLang,
VTableHolder, TemplateParams, Identifier, Discriminator))
@@ -1053,11 +1005,13 @@ public:
unsigned RuntimeLang, Metadata *VTableHolder,
Metadata *TemplateParams, Metadata *Discriminator);
- DITypeRef getBaseType() const { return DITypeRef(getRawBaseType()); }
+ DIType *getBaseType() const { return cast_or_null<DIType>(getRawBaseType()); }
DINodeArray getElements() const {
return cast_or_null<MDTuple>(getRawElements());
}
- DITypeRef getVTableHolder() const { return DITypeRef(getRawVTableHolder()); }
+ DIType *getVTableHolder() const {
+ return cast_or_null<DIType>(getRawVTableHolder());
+ }
DITemplateParameterArray getTemplateParams() const {
return cast_or_null<MDTuple>(getRawTemplateParams());
}
@@ -1087,7 +1041,7 @@ public:
replaceOperandWith(4, Elements.get());
}
- void replaceVTableHolder(DITypeRef VTableHolder) {
+ void replaceVTableHolder(DIType *VTableHolder) {
replaceOperandWith(5, VTableHolder);
}
@@ -1541,9 +1495,6 @@ public:
///
/// For precise control over the data being encoded in the discriminator,
/// use encodeDiscriminator/decodeDiscriminator.
- ///
- /// Use {get|set}BaseDiscriminator and cloneWithDuplicationFactor after reading
- /// their documentation, as their behavior has side-effects.
inline unsigned getDiscriminator() const;
@@ -1554,7 +1505,7 @@ public:
/// base discriminator is set in the new DILocation, the other encoded values
/// are elided.
/// If the discriminator cannot be encoded, the function returns None.
- inline Optional<const DILocation *> setBaseDiscriminator(unsigned BD) const;
+ inline Optional<const DILocation *> cloneWithBaseDiscriminator(unsigned BD) const;
/// Returns the duplication factor stored in the discriminator, or 1 if no
/// duplication factor (or 0) is encoded.
@@ -1570,7 +1521,7 @@ public:
/// duplication factor encoded in the discriminator. The current duplication
/// factor is as defined by getDuplicationFactor().
/// Returns None if encoding failed.
- inline Optional<const DILocation *> cloneWithDuplicationFactor(unsigned DF) const;
+ inline Optional<const DILocation *> cloneByMultiplyingDuplicationFactor(unsigned DF) const;
/// When two instructions are combined into a single instruction we also
/// need to combine the original locations into a single location.
@@ -1594,10 +1545,11 @@ public:
return getUnsignedFromPrefixEncoding(D);
}
- /// Raw encoding of the discriminator. APIs such as setBaseDiscriminator or
- /// cloneWithDuplicationFactor have certain side-effects. This API, in
- /// conjunction with cloneWithDiscriminator, may be used to encode precisely
- /// the values provided. \p BD: base discriminator \p DF: duplication factor
+ /// Raw encoding of the discriminator. APIs such as cloneWithDuplicationFactor
+ /// have certain special case behavior (e.g. treating empty duplication factor
+ /// as the value '1').
+ /// This API, in conjunction with cloneWithDiscriminator, may be used to encode
+ /// the raw values provided. \p BD: base discriminator \p DF: duplication factor
/// \p CI: copy index
/// The return is None if the values cannot be encoded in 32 bits - for
/// example, values for BD or DF larger than 12 bits. Otherwise, the return
@@ -1638,9 +1590,6 @@ public:
};
/// Subprogram description.
-///
-/// TODO: Remove DisplayName. It's always equal to Name.
-/// TODO: Split up flags.
class DISubprogram : public DILocalScope {
friend class LLVMContextImpl;
friend class MDNode;
@@ -1678,7 +1627,8 @@ public:
// Helper for converting old bitfields to new flags word.
static DISPFlags toSPFlags(bool IsLocalToUnit, bool IsDefinition,
bool IsOptimized,
- unsigned Virtuality = SPFlagNonvirtual) {
+ unsigned Virtuality = SPFlagNonvirtual,
+ bool IsMainSubprogram = false) {
// We're assuming virtuality is the low-order field.
static_assert(
int(SPFlagVirtual) == int(dwarf::DW_VIRTUALITY_virtual) &&
@@ -1688,7 +1638,8 @@ public:
(Virtuality & SPFlagVirtuality) |
(IsLocalToUnit ? SPFlagLocalToUnit : SPFlagZero) |
(IsDefinition ? SPFlagDefinition : SPFlagZero) |
- (IsOptimized ? SPFlagOptimized : SPFlagZero));
+ (IsOptimized ? SPFlagOptimized : SPFlagZero) |
+ (IsMainSubprogram ? SPFlagMainSubprogram : SPFlagZero));
}
private:
@@ -1707,9 +1658,9 @@ private:
~DISubprogram() = default;
static DISubprogram *
- getImpl(LLVMContext &Context, DIScopeRef Scope, StringRef Name,
+ getImpl(LLVMContext &Context, DIScope *Scope, StringRef Name,
StringRef LinkageName, DIFile *File, unsigned Line,
- DISubroutineType *Type, unsigned ScopeLine, DITypeRef ContainingType,
+ DISubroutineType *Type, unsigned ScopeLine, DIType *ContainingType,
unsigned VirtualIndex, int ThisAdjustment, DIFlags Flags,
DISPFlags SPFlags, DICompileUnit *Unit,
DITemplateParameterArray TemplateParams, DISubprogram *Declaration,
@@ -1744,9 +1695,9 @@ private:
public:
DEFINE_MDNODE_GET(
DISubprogram,
- (DIScopeRef Scope, StringRef Name, StringRef LinkageName, DIFile *File,
+ (DIScope * Scope, StringRef Name, StringRef LinkageName, DIFile *File,
unsigned Line, DISubroutineType *Type, unsigned ScopeLine,
- DITypeRef ContainingType, unsigned VirtualIndex, int ThisAdjustment,
+ DIType *ContainingType, unsigned VirtualIndex, int ThisAdjustment,
DIFlags Flags, DISPFlags SPFlags, DICompileUnit *Unit,
DITemplateParameterArray TemplateParams = nullptr,
DISubprogram *Declaration = nullptr, DINodeArray RetainedNodes = nullptr,
@@ -1787,6 +1738,7 @@ public:
bool isLocalToUnit() const { return getSPFlags() & SPFlagLocalToUnit; }
bool isDefinition() const { return getSPFlags() & SPFlagDefinition; }
bool isOptimized() const { return getSPFlags() & SPFlagOptimized; }
+ bool isMainSubprogram() const { return getSPFlags() & SPFlagMainSubprogram; }
bool isArtificial() const { return getFlags() & FlagArtificial; }
bool isPrivate() const {
@@ -1803,7 +1755,9 @@ public:
bool areAllCallsDescribed() const {
return getFlags() & FlagAllCallsDescribed;
}
- bool isMainSubprogram() const { return getFlags() & FlagMainSubprogram; }
+ bool isPure() const { return getSPFlags() & SPFlagPure; }
+ bool isElemental() const { return getSPFlags() & SPFlagElemental; }
+ bool isRecursive() const { return getSPFlags() & SPFlagRecursive; }
/// Check if this is reference-qualified.
///
@@ -1827,7 +1781,7 @@ public:
// Returns true if this subprogram is a thunk generated by the compiler.
bool isThunk() const { return getFlags() & FlagThunk; }
- DIScopeRef getScope() const { return DIScopeRef(getRawScope()); }
+ DIScope *getScope() const { return cast_or_null<DIScope>(getRawScope()); }
StringRef getName() const { return getStringOperand(2); }
StringRef getLinkageName() const { return getStringOperand(3); }
@@ -1835,8 +1789,8 @@ public:
DISubroutineType *getType() const {
return cast_or_null<DISubroutineType>(getRawType());
}
- DITypeRef getContainingType() const {
- return DITypeRef(getRawContainingType());
+ DIType *getContainingType() const {
+ return cast_or_null<DIType>(getRawContainingType());
}
DICompileUnit *getUnit() const {
@@ -2039,15 +1993,17 @@ unsigned DILocation::getCopyIdentifier() const {
return getCopyIdentifierFromDiscriminator(getDiscriminator());
}
-Optional<const DILocation *> DILocation::setBaseDiscriminator(unsigned D) const {
- if (D == 0)
+Optional<const DILocation *> DILocation::cloneWithBaseDiscriminator(unsigned D) const {
+ unsigned BD, DF, CI;
+ decodeDiscriminator(getDiscriminator(), BD, DF, CI);
+ if (D == BD)
return this;
- if (D > 0xfff)
- return None;
- return cloneWithDiscriminator(encodeComponent(D));
+ if (Optional<unsigned> Encoded = encodeDiscriminator(D, DF, CI))
+ return cloneWithDiscriminator(*Encoded);
+ return None;
}
-Optional<const DILocation *> DILocation::cloneWithDuplicationFactor(unsigned DF) const {
+Optional<const DILocation *> DILocation::cloneByMultiplyingDuplicationFactor(unsigned DF) const {
DF *= getDuplicationFactor();
if (DF <= 1)
return this;
@@ -2179,7 +2135,7 @@ protected:
public:
StringRef getName() const { return getStringOperand(0); }
- DITypeRef getType() const { return DITypeRef(getRawType()); }
+ DIType *getType() const { return cast_or_null<DIType>(getRawType()); }
MDString *getRawName() const { return getOperandAs<MDString>(0); }
Metadata *getRawType() const { return getOperand(1); }
@@ -2201,7 +2157,7 @@ class DITemplateTypeParameter : public DITemplateParameter {
~DITemplateTypeParameter() = default;
static DITemplateTypeParameter *getImpl(LLVMContext &Context, StringRef Name,
- DITypeRef Type, StorageType Storage,
+ DIType *Type, StorageType Storage,
bool ShouldCreate = true) {
return getImpl(Context, getCanonicalMDString(Context, Name), Type, Storage,
ShouldCreate);
@@ -2215,7 +2171,7 @@ class DITemplateTypeParameter : public DITemplateParameter {
}
public:
- DEFINE_MDNODE_GET(DITemplateTypeParameter, (StringRef Name, DITypeRef Type),
+ DEFINE_MDNODE_GET(DITemplateTypeParameter, (StringRef Name, DIType *Type),
(Name, Type))
DEFINE_MDNODE_GET(DITemplateTypeParameter, (MDString * Name, Metadata *Type),
(Name, Type))
@@ -2238,7 +2194,7 @@ class DITemplateValueParameter : public DITemplateParameter {
~DITemplateValueParameter() = default;
static DITemplateValueParameter *getImpl(LLVMContext &Context, unsigned Tag,
- StringRef Name, DITypeRef Type,
+ StringRef Name, DIType *Type,
Metadata *Value, StorageType Storage,
bool ShouldCreate = true) {
return getImpl(Context, Tag, getCanonicalMDString(Context, Name), Type,
@@ -2255,8 +2211,9 @@ class DITemplateValueParameter : public DITemplateParameter {
}
public:
- DEFINE_MDNODE_GET(DITemplateValueParameter, (unsigned Tag, StringRef Name,
- DITypeRef Type, Metadata *Value),
+ DEFINE_MDNODE_GET(DITemplateValueParameter,
+ (unsigned Tag, StringRef Name, DIType *Type,
+ Metadata *Value),
(Tag, Name, Type, Value))
DEFINE_MDNODE_GET(DITemplateValueParameter, (unsigned Tag, MDString *Name,
Metadata *Type, Metadata *Value),
@@ -2288,7 +2245,7 @@ public:
DIScope *getScope() const { return cast_or_null<DIScope>(getRawScope()); }
StringRef getName() const { return getStringOperand(1); }
DIFile *getFile() const { return cast_or_null<DIFile>(getRawFile()); }
- DITypeRef getType() const { return DITypeRef(getRawType()); }
+ DIType *getType() const { return cast_or_null<DIType>(getRawType()); }
uint32_t getAlignInBits() const { return AlignInBits; }
uint32_t getAlignInBytes() const { return getAlignInBits() / CHAR_BIT; }
/// Determines the size of the variable's type.
@@ -2297,7 +2254,7 @@ public:
/// Return the signedness of this variable's type, or None if this type is
/// neither signed nor unsigned.
Optional<DIBasicType::Signedness> getSignedness() const {
- if (auto *BT = dyn_cast<DIBasicType>(getType().resolve()))
+ if (auto *BT = dyn_cast<DIBasicType>(getType()))
return BT->getSignedness();
return None;
}
@@ -2504,6 +2461,13 @@ public:
/// Return whether this is a piece of an aggregate variable.
bool isFragment() const { return getFragmentInfo().hasValue(); }
+ /// Return whether this is an implicit location description.
+ bool isImplicit() const;
+
+ /// Return whether the location is computed on the expression stack, meaning
+ /// it cannot be a simple register location.
+ bool isComplex() const;
+
/// Append \p Ops with operations to apply the \p Offset.
static void appendOffset(SmallVectorImpl<uint64_t> &Ops, int64_t Offset);
@@ -2511,20 +2475,32 @@ public:
/// return true with an offset of zero.
bool extractIfOffset(int64_t &Offset) const;
- /// Constants for DIExpression::prepend.
- enum { NoDeref = false, WithDeref = true, WithStackValue = true };
+ /// Checks if the last 4 elements of the expression are DW_OP_constu <DWARF
+ /// Address Space> DW_OP_swap DW_OP_xderef and extracts the <DWARF Address
+ /// Space>.
+ static const DIExpression *extractAddressClass(const DIExpression *Expr,
+ unsigned &AddrClass);
+
+ /// Used for DIExpression::prepend.
+ enum PrependOps : uint8_t {
+ ApplyOffset = 0,
+ DerefBefore = 1 << 0,
+ DerefAfter = 1 << 1,
+ StackValue = 1 << 2,
+ EntryValue = 1 << 3
+ };
/// Prepend \p DIExpr with a deref and offset operation and optionally turn it
- /// into a stack value.
- static DIExpression *prepend(const DIExpression *Expr, bool DerefBefore,
- int64_t Offset = 0, bool DerefAfter = false,
- bool StackValue = false);
+ /// into a stack value or/and an entry value.
+ static DIExpression *prepend(const DIExpression *Expr, uint8_t Flags,
+ int64_t Offset = 0);
/// Prepend \p DIExpr with the given opcodes and optionally turn it into a
/// stack value.
static DIExpression *prependOpcodes(const DIExpression *Expr,
SmallVectorImpl<uint64_t> &Ops,
- bool StackValue = false);
+ bool StackValue = false,
+ bool EntryValue = false);
/// Append the opcodes \p Ops to \p DIExpr. Unlike \ref appendToStack, the
/// returned expression is a stack value only if \p DIExpr is a stack value.
@@ -2553,17 +2529,14 @@ public:
createFragmentExpression(const DIExpression *Expr, unsigned OffsetInBits,
unsigned SizeInBits);
- /// Determine the relative position of the fragments described by this
- /// DIExpression and \p Other.
+ /// Determine the relative position of the fragments passed in.
/// Returns -1 if this is entirely before Other, 0 if this and Other overlap,
/// 1 if this is entirely after Other.
- int fragmentCmp(const DIExpression *Other) const {
- auto Fragment1 = *getFragmentInfo();
- auto Fragment2 = *Other->getFragmentInfo();
- unsigned l1 = Fragment1.OffsetInBits;
- unsigned l2 = Fragment2.OffsetInBits;
- unsigned r1 = l1 + Fragment1.SizeInBits;
- unsigned r2 = l2 + Fragment2.SizeInBits;
+ static int fragmentCmp(const FragmentInfo &A, const FragmentInfo &B) {
+ uint64_t l1 = A.OffsetInBits;
+ uint64_t l2 = B.OffsetInBits;
+ uint64_t r1 = l1 + A.SizeInBits;
+ uint64_t r2 = l2 + B.SizeInBits;
if (r1 <= l2)
return -1;
else if (r2 <= l1)
@@ -2572,12 +2545,59 @@ public:
return 0;
}
+ /// Check if fragments overlap between a pair of FragmentInfos.
+ static bool fragmentsOverlap(const FragmentInfo &A, const FragmentInfo &B) {
+ return fragmentCmp(A, B) == 0;
+ }
+
+ /// Determine the relative position of the fragments described by this
+ /// DIExpression and \p Other. Calls static fragmentCmp implementation.
+ int fragmentCmp(const DIExpression *Other) const {
+ auto Fragment1 = *getFragmentInfo();
+ auto Fragment2 = *Other->getFragmentInfo();
+ return fragmentCmp(Fragment1, Fragment2);
+ }
+
/// Check if fragments overlap between this DIExpression and \p Other.
bool fragmentsOverlap(const DIExpression *Other) const {
if (!isFragment() || !Other->isFragment())
return true;
return fragmentCmp(Other) == 0;
}
+
+ /// Check if the expression consists of exactly one entry value operand.
+ /// (This is the only configuration of entry values that is supported.)
+ bool isEntryValue() const {
+ return getNumElements() > 0 &&
+ getElement(0) == dwarf::DW_OP_entry_value;
+ }
+};
+
+inline bool operator==(const DIExpression::FragmentInfo &A,
+ const DIExpression::FragmentInfo &B) {
+ return std::tie(A.SizeInBits, A.OffsetInBits) ==
+ std::tie(B.SizeInBits, B.OffsetInBits);
+}
+
+inline bool operator<(const DIExpression::FragmentInfo &A,
+ const DIExpression::FragmentInfo &B) {
+ return std::tie(A.SizeInBits, A.OffsetInBits) <
+ std::tie(B.SizeInBits, B.OffsetInBits);
+}
+
+template <> struct DenseMapInfo<DIExpression::FragmentInfo> {
+ using FragInfo = DIExpression::FragmentInfo;
+ static const uint64_t MaxVal = std::numeric_limits<uint64_t>::max();
+
+ static inline FragInfo getEmptyKey() { return {MaxVal, MaxVal}; }
+
+ static inline FragInfo getTombstoneKey() { return {MaxVal - 1, MaxVal - 1}; }
+
+ static unsigned getHashValue(const FragInfo &Frag) {
+ return (Frag.SizeInBits & 0xffff) << 16 | (Frag.OffsetInBits & 0xffff);
+ }
+
+ static bool isEqual(const FragInfo &A, const FragInfo &B) { return A == B; }
};
/// Global variables.
@@ -2599,7 +2619,7 @@ class DIGlobalVariable : public DIVariable {
static DIGlobalVariable *
getImpl(LLVMContext &Context, DIScope *Scope, StringRef Name,
- StringRef LinkageName, DIFile *File, unsigned Line, DITypeRef Type,
+ StringRef LinkageName, DIFile *File, unsigned Line, DIType *Type,
bool IsLocalToUnit, bool IsDefinition,
DIDerivedType *StaticDataMemberDeclaration, MDTuple *TemplateParams,
uint32_t AlignInBits, StorageType Storage, bool ShouldCreate = true) {
@@ -2626,7 +2646,7 @@ class DIGlobalVariable : public DIVariable {
public:
DEFINE_MDNODE_GET(DIGlobalVariable,
(DIScope * Scope, StringRef Name, StringRef LinkageName,
- DIFile *File, unsigned Line, DITypeRef Type,
+ DIFile *File, unsigned Line, DIType *Type,
bool IsLocalToUnit, bool IsDefinition,
DIDerivedType *StaticDataMemberDeclaration,
MDTuple *TemplateParams, uint32_t AlignInBits),
@@ -2663,6 +2683,65 @@ public:
}
};
+class DICommonBlock : public DIScope {
+ unsigned LineNo;
+
+ friend class LLVMContextImpl;
+ friend class MDNode;
+
+ DICommonBlock(LLVMContext &Context, StorageType Storage, unsigned LineNo,
+ ArrayRef<Metadata *> Ops)
+ : DIScope(Context, DICommonBlockKind, Storage, dwarf::DW_TAG_common_block,
+ Ops), LineNo(LineNo) {}
+
+ static DICommonBlock *getImpl(LLVMContext &Context, DIScope *Scope,
+ DIGlobalVariable *Decl, StringRef Name,
+ DIFile *File, unsigned LineNo,
+ StorageType Storage,
+ bool ShouldCreate = true) {
+ return getImpl(Context, Scope, Decl, getCanonicalMDString(Context, Name),
+ File, LineNo, Storage, ShouldCreate);
+ }
+ static DICommonBlock *getImpl(LLVMContext &Context, Metadata *Scope,
+ Metadata *Decl, MDString *Name, Metadata *File,
+ unsigned LineNo,
+ StorageType Storage, bool ShouldCreate = true);
+
+ TempDICommonBlock cloneImpl() const {
+ return getTemporary(getContext(), getScope(), getDecl(), getName(),
+ getFile(), getLineNo());
+ }
+
+public:
+ DEFINE_MDNODE_GET(DICommonBlock,
+ (DIScope *Scope, DIGlobalVariable *Decl, StringRef Name,
+ DIFile *File, unsigned LineNo),
+ (Scope, Decl, Name, File, LineNo))
+ DEFINE_MDNODE_GET(DICommonBlock,
+ (Metadata *Scope, Metadata *Decl, MDString *Name,
+ Metadata *File, unsigned LineNo),
+ (Scope, Decl, Name, File, LineNo))
+
+ TempDICommonBlock clone() const { return cloneImpl(); }
+
+ DIScope *getScope() const { return cast_or_null<DIScope>(getRawScope()); }
+ DIGlobalVariable *getDecl() const {
+ return cast_or_null<DIGlobalVariable>(getRawDecl());
+ }
+ StringRef getName() const { return getStringOperand(2); }
+ DIFile *getFile() const { return cast_or_null<DIFile>(getRawFile()); }
+ unsigned getLineNo() const { return LineNo; }
+
+ Metadata *getRawScope() const { return getOperand(0); }
+ Metadata *getRawDecl() const { return getOperand(1); }
+ MDString *getRawName() const { return getOperandAs<MDString>(2); }
+ Metadata *getRawFile() const { return getOperand(3); }
+
+ static bool classof(const Metadata *MD) {
+ return MD->getMetadataID() == DICommonBlockKind;
+ }
+};
+
/// Local variable.
///
/// TODO: Split up flags.
@@ -2684,7 +2763,7 @@ class DILocalVariable : public DIVariable {
static DILocalVariable *getImpl(LLVMContext &Context, DIScope *Scope,
StringRef Name, DIFile *File, unsigned Line,
- DITypeRef Type, unsigned Arg, DIFlags Flags,
+ DIType *Type, unsigned Arg, DIFlags Flags,
uint32_t AlignInBits, StorageType Storage,
bool ShouldCreate = true) {
return getImpl(Context, Scope, getCanonicalMDString(Context, Name), File,
@@ -2705,8 +2784,8 @@ class DILocalVariable : public DIVariable {
public:
DEFINE_MDNODE_GET(DILocalVariable,
(DILocalScope * Scope, StringRef Name, DIFile *File,
- unsigned Line, DITypeRef Type, unsigned Arg,
- DIFlags Flags, uint32_t AlignInBits),
+ unsigned Line, DIType *Type, unsigned Arg, DIFlags Flags,
+ uint32_t AlignInBits),
(Scope, Name, File, Line, Type, Arg, Flags, AlignInBits))
DEFINE_MDNODE_GET(DILocalVariable,
(Metadata * Scope, MDString *Name, Metadata *File,
@@ -2730,6 +2809,11 @@ public:
bool isArtificial() const { return getFlags() & FlagArtificial; }
bool isObjectPointer() const { return getFlags() & FlagObjectPointer; }
+ /// Check that an argument is unmodified.
+ bool isNotModified() const { return getFlags() & FlagArgumentNotModified; }
+ /// Set the flag if an argument is unmodified.
+ void setIsNotModified() { Flags |= FlagArgumentNotModified; }
+
/// Check that a location is valid for this variable.
///
/// Check that \c DL exists, is in the same subprogram, and has the same
@@ -2831,7 +2915,7 @@ class DIObjCProperty : public DINode {
static DIObjCProperty *
getImpl(LLVMContext &Context, StringRef Name, DIFile *File, unsigned Line,
StringRef GetterName, StringRef SetterName, unsigned Attributes,
- DITypeRef Type, StorageType Storage, bool ShouldCreate = true) {
+ DIType *Type, StorageType Storage, bool ShouldCreate = true) {
return getImpl(Context, getCanonicalMDString(Context, Name), File, Line,
getCanonicalMDString(Context, GetterName),
getCanonicalMDString(Context, SetterName), Attributes, Type,
@@ -2853,7 +2937,7 @@ public:
DEFINE_MDNODE_GET(DIObjCProperty,
(StringRef Name, DIFile *File, unsigned Line,
StringRef GetterName, StringRef SetterName,
- unsigned Attributes, DITypeRef Type),
+ unsigned Attributes, DIType *Type),
(Name, File, Line, GetterName, SetterName, Attributes,
Type))
DEFINE_MDNODE_GET(DIObjCProperty,
@@ -2871,7 +2955,7 @@ public:
DIFile *getFile() const { return cast_or_null<DIFile>(getRawFile()); }
StringRef getGetterName() const { return getStringOperand(2); }
StringRef getSetterName() const { return getStringOperand(3); }
- DITypeRef getType() const { return DITypeRef(getRawType()); }
+ DIType *getType() const { return cast_or_null<DIType>(getRawType()); }
StringRef getFilename() const {
if (auto *F = getFile())
@@ -2915,8 +2999,8 @@ class DIImportedEntity : public DINode {
~DIImportedEntity() = default;
static DIImportedEntity *getImpl(LLVMContext &Context, unsigned Tag,
- DIScope *Scope, DINodeRef Entity,
- DIFile *File, unsigned Line, StringRef Name,
+ DIScope *Scope, DINode *Entity, DIFile *File,
+ unsigned Line, StringRef Name,
StorageType Storage,
bool ShouldCreate = true) {
return getImpl(Context, Tag, Scope, Entity, File, Line,
@@ -2935,8 +3019,8 @@ class DIImportedEntity : public DINode {
public:
DEFINE_MDNODE_GET(DIImportedEntity,
- (unsigned Tag, DIScope *Scope, DINodeRef Entity,
- DIFile *File, unsigned Line, StringRef Name = ""),
+ (unsigned Tag, DIScope *Scope, DINode *Entity, DIFile *File,
+ unsigned Line, StringRef Name = ""),
(Tag, Scope, Entity, File, Line, Name))
DEFINE_MDNODE_GET(DIImportedEntity,
(unsigned Tag, Metadata *Scope, Metadata *Entity,
@@ -2947,7 +3031,7 @@ public:
unsigned getLine() const { return Line; }
DIScope *getScope() const { return cast_or_null<DIScope>(getRawScope()); }
- DINodeRef getEntity() const { return DINodeRef(getRawEntity()); }
+ DINode *getEntity() const { return cast_or_null<DINode>(getRawEntity()); }
StringRef getName() const { return getStringOperand(2); }
DIFile *getFile() const { return cast_or_null<DIFile>(getRawFile()); }
diff --git a/include/llvm/IR/DebugLoc.h b/include/llvm/IR/DebugLoc.h
index 4f0d7f51b5f9..780d17a33661 100644
--- a/include/llvm/IR/DebugLoc.h
+++ b/include/llvm/IR/DebugLoc.h
@@ -1,9 +1,8 @@
//===- DebugLoc.h - Debug Location Information ------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/IR/DerivedTypes.h b/include/llvm/IR/DerivedTypes.h
index 9526d6287d2f..3c1d4278905f 100644
--- a/include/llvm/IR/DerivedTypes.h
+++ b/include/llvm/IR/DerivedTypes.h
@@ -1,9 +1,8 @@
//===- llvm/DerivedTypes.h - Classes for handling data types ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -24,6 +23,7 @@
#include "llvm/IR/Type.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ScalableSize.h"
#include <cassert>
#include <cstdint>
@@ -158,6 +158,38 @@ unsigned Type::getFunctionNumParams() const {
return cast<FunctionType>(this)->getNumParams();
}
+/// A handy container for a FunctionType+Callee-pointer pair, which can be
+/// passed around as a single entity. This assists in replacing the use of
+/// PointerType::getElementType() to access the function's type, since that's
+/// slated for removal as part of the [opaque pointer types] project.
+class FunctionCallee {
+public:
+ // Allow implicit conversion from types which have a getFunctionType member
+ // (e.g. Function and InlineAsm).
+ template <typename T, typename U = decltype(&T::getFunctionType)>
+ FunctionCallee(T *Fn)
+ : FnTy(Fn ? Fn->getFunctionType() : nullptr), Callee(Fn) {}
+
+ FunctionCallee(FunctionType *FnTy, Value *Callee)
+ : FnTy(FnTy), Callee(Callee) {
+ assert((FnTy == nullptr) == (Callee == nullptr));
+ }
+
+ FunctionCallee(std::nullptr_t) {}
+
+ FunctionCallee() = default;
+
+ FunctionType *getFunctionType() { return FnTy; }
+
+ Value *getCallee() { return Callee; }
+
+ explicit operator bool() { return Callee; }
+
+private:
+ FunctionType *FnTy = nullptr;
+ Value *Callee = nullptr;
+};
+
/// Common super class of ArrayType, StructType and VectorType.
class CompositeType : public Type {
protected:
@@ -356,6 +388,8 @@ public:
SequentialType(const SequentialType &) = delete;
SequentialType &operator=(const SequentialType &) = delete;
+ /// For scalable vectors, this will return the minimum number of elements
+ /// in the vector.
uint64_t getNumElements() const { return NumElements; }
Type *getElementType() const { return ContainedType; }
@@ -391,14 +425,37 @@ uint64_t Type::getArrayNumElements() const {
/// Class to represent vector types.
class VectorType : public SequentialType {
- VectorType(Type *ElType, unsigned NumEl);
+ /// A fully specified VectorType is of the form <vscale x n x Ty>. 'n' is the
+ /// minimum number of elements of type Ty contained within the vector, and
+ /// 'vscale x' indicates that the total element count is an integer multiple
+ /// of 'n', where the multiple is either guaranteed to be one, or is
+ /// statically unknown at compile time.
+ ///
+ /// If the multiple is known to be 1, then the extra term is discarded in
+ /// textual IR:
+ ///
+ /// <4 x i32> - a vector containing 4 i32s
+ /// <vscale x 4 x i32> - a vector containing an unknown integer multiple
+ /// of 4 i32s
+
+ VectorType(Type *ElType, unsigned NumEl, bool Scalable = false);
+ VectorType(Type *ElType, ElementCount EC);
+
+ // If true, the total number of elements is an unknown multiple of the
+ // minimum 'NumElements' from SequentialType. Otherwise the total number
+ // of elements is exactly equal to 'NumElements'.
+ bool Scalable;
public:
VectorType(const VectorType &) = delete;
VectorType &operator=(const VectorType &) = delete;
/// This static method is the primary way to construct an VectorType.
- static VectorType *get(Type *ElementType, unsigned NumElements);
+ static VectorType *get(Type *ElementType, ElementCount EC);
+ static VectorType *get(Type *ElementType, unsigned NumElements,
+ bool Scalable = false) {
+ return VectorType::get(ElementType, {NumElements, Scalable});
+ }
/// This static method gets a VectorType with the same number of elements as
/// the input type, and the element type is an integer type of the same width
@@ -407,7 +464,7 @@ public:
unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
assert(EltBits && "Element size must be of a non-zero size");
Type *EltTy = IntegerType::get(VTy->getContext(), EltBits);
- return VectorType::get(EltTy, VTy->getNumElements());
+ return VectorType::get(EltTy, VTy->getElementCount());
}
/// This static method is like getInteger except that the element types are
@@ -415,7 +472,7 @@ public:
static VectorType *getExtendedElementVectorType(VectorType *VTy) {
unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
Type *EltTy = IntegerType::get(VTy->getContext(), EltBits * 2);
- return VectorType::get(EltTy, VTy->getNumElements());
+ return VectorType::get(EltTy, VTy->getElementCount());
}
/// This static method is like getInteger except that the element types are
@@ -425,29 +482,45 @@ public:
assert((EltBits & 1) == 0 &&
"Cannot truncate vector element with odd bit-width");
Type *EltTy = IntegerType::get(VTy->getContext(), EltBits / 2);
- return VectorType::get(EltTy, VTy->getNumElements());
+ return VectorType::get(EltTy, VTy->getElementCount());
}
/// This static method returns a VectorType with half as many elements as the
/// input type and the same element type.
static VectorType *getHalfElementsVectorType(VectorType *VTy) {
- unsigned NumElts = VTy->getNumElements();
- assert ((NumElts & 1) == 0 &&
+ auto EltCnt = VTy->getElementCount();
+ assert ((EltCnt.Min & 1) == 0 &&
"Cannot halve vector with odd number of elements.");
- return VectorType::get(VTy->getElementType(), NumElts/2);
+ return VectorType::get(VTy->getElementType(), EltCnt/2);
}
/// This static method returns a VectorType with twice as many elements as the
/// input type and the same element type.
static VectorType *getDoubleElementsVectorType(VectorType *VTy) {
- unsigned NumElts = VTy->getNumElements();
- return VectorType::get(VTy->getElementType(), NumElts*2);
+ auto EltCnt = VTy->getElementCount();
+ assert((VTy->getNumElements() * 2ull) <= UINT_MAX &&
+ "Too many elements in vector");
+ return VectorType::get(VTy->getElementType(), EltCnt*2);
}
/// Return true if the specified type is valid as a element type.
static bool isValidElementType(Type *ElemTy);
- /// Return the number of bits in the Vector type.
+ /// Return an ElementCount instance to represent the (possibly scalable)
+ /// number of elements in the vector.
+ ElementCount getElementCount() const {
+ uint64_t MinimumEltCnt = getNumElements();
+ assert(MinimumEltCnt <= UINT_MAX && "Too many elements in vector");
+ return { (unsigned)MinimumEltCnt, Scalable };
+ }
+
+ /// Returns whether or not this is a scalable vector (meaning the total
+ /// element count is a multiple of the minimum).
+ bool isScalable() const {
+ return Scalable;
+ }
+
+ /// Return the minimum number of bits in the Vector type.
/// Returns zero when the vector is a vector of pointers.
unsigned getBitWidth() const {
return getNumElements() * getElementType()->getPrimitiveSizeInBits();
@@ -463,6 +536,10 @@ unsigned Type::getVectorNumElements() const {
return cast<VectorType>(this)->getNumElements();
}
+bool Type::getVectorIsScalable() const {
+ return cast<VectorType>(this)->isScalable();
+}
+
/// Class to represent pointers.
class PointerType : public Type {
explicit PointerType(Type *ElType, unsigned AddrSpace);
diff --git a/include/llvm/IR/DerivedUser.h b/include/llvm/IR/DerivedUser.h
index 67c483d3c497..a25d316c2d60 100644
--- a/include/llvm/IR/DerivedUser.h
+++ b/include/llvm/IR/DerivedUser.h
@@ -1,9 +1,8 @@
//===- DerivedUser.h - Base for non-IR Users --------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/IR/DiagnosticHandler.h b/include/llvm/IR/DiagnosticHandler.h
index 51873bea3d41..55e5e5975808 100644
--- a/include/llvm/IR/DiagnosticHandler.h
+++ b/include/llvm/IR/DiagnosticHandler.h
@@ -1,9 +1,8 @@
-//===- DiagnosticHandler.h - DiagnosticHandler class for LLVM -*- C++ ---*-===//
+//===- DiagnosticHandler.h - DiagnosticHandler class for LLVM ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// Base DiagnosticHandler class declaration. Derive from this class to provide
diff --git a/include/llvm/IR/DiagnosticInfo.h b/include/llvm/IR/DiagnosticInfo.h
index 3a55a7dca7f4..373663289dbd 100644
--- a/include/llvm/IR/DiagnosticInfo.h
+++ b/include/llvm/IR/DiagnosticInfo.h
@@ -1,9 +1,8 @@
//===- llvm/IR/DiagnosticInfo.h - Diagnostic Declaration --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -466,12 +465,15 @@ public:
virtual bool isEnabled() const = 0;
StringRef getPassName() const { return PassName; }
+ StringRef getRemarkName() const { return RemarkName; }
std::string getMsg() const;
Optional<uint64_t> getHotness() const { return Hotness; }
void setHotness(Optional<uint64_t> H) { Hotness = H; }
bool isVerbose() const { return IsVerbose; }
+ ArrayRef<Argument> getArgs() const { return Args; }
+
static bool classof(const DiagnosticInfo *DI) {
return (DI->getKind() >= DK_FirstRemark &&
DI->getKind() <= DK_LastRemark) ||
@@ -501,7 +503,7 @@ protected:
const char *PassName;
/// Textual identifier for the remark (single-word, camel-case). Can be used
- /// by external tools reading the YAML output file for optimization remarks to
+ /// by external tools reading the output file for optimization remarks to
/// identify the remark.
StringRef RemarkName;
@@ -519,8 +521,6 @@ protected:
/// the optimization records and not in the remark printed in the compiler
/// output.
int FirstExtraArgIndex = -1;
-
- friend struct yaml::MappingTraits<DiagnosticInfoOptimizationBase *>;
};
/// Allow the insertion operator to return the actual remark type rather than a
@@ -1002,12 +1002,6 @@ public:
void print(DiagnosticPrinter &DP) const override;
};
-namespace yaml {
-template <> struct MappingTraits<DiagnosticInfoOptimizationBase *> {
- static void mapping(IO &io, DiagnosticInfoOptimizationBase *&OptDiag);
-};
-} // namespace yaml
-
} // end namespace llvm
#endif // LLVM_IR_DIAGNOSTICINFO_H
diff --git a/include/llvm/IR/DiagnosticPrinter.h b/include/llvm/IR/DiagnosticPrinter.h
index 25c47cdd1a12..102932ceefa5 100644
--- a/include/llvm/IR/DiagnosticPrinter.h
+++ b/include/llvm/IR/DiagnosticPrinter.h
@@ -1,9 +1,8 @@
//===- llvm/Support/DiagnosticPrinter.h - Diagnostic Printer ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/IR/Dominators.h b/include/llvm/IR/Dominators.h
index f7da47d07663..fef1c6abf8c2 100644
--- a/include/llvm/IR/Dominators.h
+++ b/include/llvm/IR/Dominators.h
@@ -1,9 +1,8 @@
//===- Dominators.h - Dominator Info Calculation ----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/IR/Function.h b/include/llvm/IR/Function.h
index 630f47e8bb57..7fa61e12f431 100644
--- a/include/llvm/IR/Function.h
+++ b/include/llvm/IR/Function.h
@@ -1,9 +1,8 @@
//===- llvm/Function.h - Class to represent a single function ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -297,15 +296,18 @@ public:
/// Get the entry count for this function.
///
- /// Entry count is the number of times the function was executed based on
- /// pgo data.
- ProfileCount getEntryCount() const;
+ /// Entry count is the number of times the function was executed.
+ /// When AllowSynthetic is false, only pgo_data will be returned.
+ ProfileCount getEntryCount(bool AllowSynthetic = false) const;
/// Return true if the function is annotated with profile data.
///
/// Presence of entry counts from a profile run implies the function has
- /// profile annotations.
- bool hasProfileData() const { return getEntryCount().hasValue(); }
+ /// profile annotations. If IncludeSynthetic is false, only return true
+ /// when the profile data is real.
+ bool hasProfileData(bool IncludeSynthetic = false) const {
+ return getEntryCount(IncludeSynthetic).hasValue();
+ }
/// Returns the set of GUIDs that needs to be imported to the function for
/// sample PGO, to enable the same inlines as the profiled optimized binary.
@@ -399,6 +401,11 @@ public:
return getAttributes().hasParamAttribute(ArgNo, Kind);
}
+ /// gets the specified attribute from the list of attributes.
+ Attribute getParamAttribute(unsigned ArgNo, Attribute::AttrKind Kind) const {
+ return getAttributes().getParamAttr(ArgNo, Kind);
+ }
+
/// gets the attribute from the list of attributes.
Attribute getAttribute(unsigned i, Attribute::AttrKind Kind) const {
return AttributeSets.getAttribute(i, Kind);
@@ -429,6 +436,12 @@ public:
return AttributeSets.getParamAlignment(ArgNo);
}
+ /// Extract the byval type for a parameter.
+ Type *getParamByValType(unsigned ArgNo) const {
+ Type *Ty = AttributeSets.getParamByValType(ArgNo);
+ return Ty ? Ty : (arg_begin() + ArgNo)->getType()->getPointerElementType();
+ }
+
/// Extract the number of dereferenceable bytes for a call or
/// parameter (0=unknown).
/// @param i AttributeList index, referring to a return value or argument.
@@ -551,6 +564,14 @@ public:
addFnAttr(Attribute::Speculatable);
}
+ /// Determine if the call might deallocate memory.
+ bool doesNotFreeMemory() const {
+ return onlyReadsMemory() || hasFnAttribute(Attribute::NoFree);
+ }
+ void setDoesNotFreeMemory() {
+ addFnAttr(Attribute::NoFree);
+ }
+
/// Determine if the function is known not to recurse, directly or
/// indirectly.
bool doesNotRecurse() const {
@@ -591,12 +612,15 @@ public:
addAttribute(AttributeList::ReturnIndex, Attribute::NoAlias);
}
+ /// Do not optimize this function (-O0).
+ bool hasOptNone() const { return hasFnAttribute(Attribute::OptimizeNone); }
+
/// Optimize this function for minimum size (-Oz).
- bool optForMinSize() const { return hasFnAttribute(Attribute::MinSize); }
+ bool hasMinSize() const { return hasFnAttribute(Attribute::MinSize); }
/// Optimize this function for size (-Os) or minimum size (-Oz).
- bool optForSize() const {
- return hasFnAttribute(Attribute::OptimizeForSize) || optForMinSize();
+ bool hasOptSize() const {
+ return hasFnAttribute(Attribute::OptimizeForSize) || hasMinSize();
}
/// copyAttributesFrom - copy all additional attributes (those not needed to
diff --git a/include/llvm/IR/GVMaterializer.h b/include/llvm/IR/GVMaterializer.h
index 675abeb6ec3a..d62da41ebc29 100644
--- a/include/llvm/IR/GVMaterializer.h
+++ b/include/llvm/IR/GVMaterializer.h
@@ -1,9 +1,8 @@
//===- GVMaterializer.h - Interface for GV materializers --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/IR/GetElementPtrTypeIterator.h b/include/llvm/IR/GetElementPtrTypeIterator.h
index 3c143ea5f703..9b257abc7c1f 100644
--- a/include/llvm/IR/GetElementPtrTypeIterator.h
+++ b/include/llvm/IR/GetElementPtrTypeIterator.h
@@ -1,9 +1,8 @@
//===- GetElementPtrTypeIterator.h ------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/IR/GlobalAlias.h b/include/llvm/IR/GlobalAlias.h
index 450583baaa3c..3cd405701300 100644
--- a/include/llvm/IR/GlobalAlias.h
+++ b/include/llvm/IR/GlobalAlias.h
@@ -1,9 +1,8 @@
//===-------- llvm/GlobalAlias.h - GlobalAlias class ------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/IR/GlobalIFunc.h b/include/llvm/IR/GlobalIFunc.h
index ef51315a6f5d..bc0d3c053cce 100644
--- a/include/llvm/IR/GlobalIFunc.h
+++ b/include/llvm/IR/GlobalIFunc.h
@@ -1,9 +1,8 @@
//===-------- llvm/GlobalIFunc.h - GlobalIFunc class ------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/include/llvm/IR/GlobalIndirectSymbol.h b/include/llvm/IR/GlobalIndirectSymbol.h
index 22c00686c549..8bc3f90b94aa 100644
--- a/include/llvm/IR/GlobalIndirectSymbol.h
+++ b/include/llvm/IR/GlobalIndirectSymbol.h
@@ -1,9 +1,8 @@
//===- llvm/GlobalIndirectSymbol.h - GlobalIndirectSymbol class -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/IR/GlobalObject.h b/include/llvm/IR/GlobalObject.h
index 1fd3568100c2..b8ab6140ebe7 100644
--- a/include/llvm/IR/GlobalObject.h
+++ b/include/llvm/IR/GlobalObject.h
@@ -1,9 +1,8 @@
//===-- llvm/GlobalObject.h - Class to represent global objects -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/IR/GlobalValue.h b/include/llvm/IR/GlobalValue.h
index c07d4051c803..2209881dbda6 100644
--- a/include/llvm/IR/GlobalValue.h
+++ b/include/llvm/IR/GlobalValue.h
@@ -1,9 +1,8 @@
//===-- llvm/GlobalValue.h - Class to represent a global value --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -80,15 +79,15 @@ protected:
ValueType(Ty), Visibility(DefaultVisibility),
UnnamedAddrVal(unsigned(UnnamedAddr::None)),
DllStorageClass(DefaultStorageClass), ThreadLocal(NotThreadLocal),
- HasLLVMReservedName(false), IsDSOLocal(false), IntID((Intrinsic::ID)0U),
- Parent(nullptr) {
+ HasLLVMReservedName(false), IsDSOLocal(false), HasPartition(false),
+ IntID((Intrinsic::ID)0U), Parent(nullptr) {
setLinkage(Linkage);
setName(Name);
}
Type *ValueType;
- static const unsigned GlobalValueSubClassDataBits = 17;
+ static const unsigned GlobalValueSubClassDataBits = 16;
// All bitfields use unsigned as the underlying type so that MSVC will pack
// them.
@@ -109,9 +108,13 @@ protected:
/// definition cannot be runtime preempted.
unsigned IsDSOLocal : 1;
+ /// True if this symbol has a partition name assigned (see
+ /// https://lld.llvm.org/Partitions.html).
+ unsigned HasPartition : 1;
+
private:
// Give subclasses access to what otherwise would be wasted padding.
- // (17 + 4 + 2 + 2 + 2 + 3 + 1 + 1) == 32.
+ // (16 + 4 + 2 + 2 + 2 + 3 + 1 + 1 + 1) == 32.
unsigned SubClassData : GlobalValueSubClassDataBits;
friend class Constant;
@@ -281,6 +284,12 @@ public:
return IsDSOLocal;
}
+ bool hasPartition() const {
+ return HasPartition;
+ }
+ StringRef getPartition() const;
+ void setPartition(StringRef Part);
+
static LinkageTypes getLinkOnceLinkage(bool ODR) {
return ODR ? LinkOnceODRLinkage : LinkOnceAnyLinkage;
}
diff --git a/include/llvm/IR/GlobalVariable.h b/include/llvm/IR/GlobalVariable.h
index 03b9ec46ebb4..2e2c8c477913 100644
--- a/include/llvm/IR/GlobalVariable.h
+++ b/include/llvm/IR/GlobalVariable.h
@@ -1,9 +1,8 @@
//===-- llvm/GlobalVariable.h - GlobalVariable class ------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/IR/IRBuilder.h b/include/llvm/IR/IRBuilder.h
index fac2ff46c453..a74364dffb2e 100644
--- a/include/llvm/IR/IRBuilder.h
+++ b/include/llvm/IR/IRBuilder.h
@@ -1,9 +1,8 @@
//===- llvm/IRBuilder.h - Builder for LLVM Instructions ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -32,7 +31,7 @@
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
@@ -97,12 +96,18 @@ protected:
MDNode *DefaultFPMathTag;
FastMathFlags FMF;
+ bool IsFPConstrained;
+ ConstrainedFPIntrinsic::ExceptionBehavior DefaultConstrainedExcept;
+ ConstrainedFPIntrinsic::RoundingMode DefaultConstrainedRounding;
+
ArrayRef<OperandBundleDef> DefaultOperandBundles;
public:
IRBuilderBase(LLVMContext &context, MDNode *FPMathTag = nullptr,
ArrayRef<OperandBundleDef> OpBundles = None)
- : Context(context), DefaultFPMathTag(FPMathTag),
+ : Context(context), DefaultFPMathTag(FPMathTag), IsFPConstrained(false),
+ DefaultConstrainedExcept(ConstrainedFPIntrinsic::ebStrict),
+ DefaultConstrainedRounding(ConstrainedFPIntrinsic::rmDynamic),
DefaultOperandBundles(OpBundles) {
ClearInsertionPoint();
}
@@ -219,6 +224,37 @@ public:
/// Set the fast-math flags to be used with generated fp-math operators
void setFastMathFlags(FastMathFlags NewFMF) { FMF = NewFMF; }
+ /// Enable/Disable use of constrained floating point math. When
+ /// enabled the CreateF<op>() calls instead create constrained
+ /// floating point intrinsic calls. Fast math flags are unaffected
+ /// by this setting.
+ void setIsFPConstrained(bool IsCon) { IsFPConstrained = IsCon; }
+
+ /// Query for the use of constrained floating point math
+ bool getIsFPConstrained() { return IsFPConstrained; }
+
+ /// Set the exception handling to be used with constrained floating point
+ void setDefaultConstrainedExcept(
+ ConstrainedFPIntrinsic::ExceptionBehavior NewExcept) {
+ DefaultConstrainedExcept = NewExcept;
+ }
+
+ /// Set the rounding mode handling to be used with constrained floating point
+ void setDefaultConstrainedRounding(
+ ConstrainedFPIntrinsic::RoundingMode NewRounding) {
+ DefaultConstrainedRounding = NewRounding;
+ }
+
+ /// Get the exception handling used with constrained floating point
+ ConstrainedFPIntrinsic::ExceptionBehavior getDefaultConstrainedExcept() {
+ return DefaultConstrainedExcept;
+ }
+
+ /// Get the rounding mode handling used with constrained floating point
+ ConstrainedFPIntrinsic::RoundingMode getDefaultConstrainedRounding() {
+ return DefaultConstrainedRounding;
+ }
+
//===--------------------------------------------------------------------===//
// RAII helpers.
//===--------------------------------------------------------------------===//
@@ -906,20 +942,20 @@ public:
Name);
}
- InvokeInst *CreateInvoke(Function *Callee, BasicBlock *NormalDest,
+ InvokeInst *CreateInvoke(FunctionCallee Callee, BasicBlock *NormalDest,
BasicBlock *UnwindDest, ArrayRef<Value *> Args,
ArrayRef<OperandBundleDef> OpBundles,
const Twine &Name = "") {
- return CreateInvoke(Callee->getFunctionType(), Callee, NormalDest,
- UnwindDest, Args, OpBundles, Name);
+ return CreateInvoke(Callee.getFunctionType(), Callee.getCallee(),
+ NormalDest, UnwindDest, Args, OpBundles, Name);
}
- InvokeInst *CreateInvoke(Function *Callee, BasicBlock *NormalDest,
+ InvokeInst *CreateInvoke(FunctionCallee Callee, BasicBlock *NormalDest,
BasicBlock *UnwindDest,
ArrayRef<Value *> Args = None,
const Twine &Name = "") {
- return CreateInvoke(Callee->getFunctionType(), Callee, NormalDest,
- UnwindDest, Args, Name);
+ return CreateInvoke(Callee.getFunctionType(), Callee.getCallee(),
+ NormalDest, UnwindDest, Args, Name);
}
// Deprecated [opaque pointer types]
@@ -944,6 +980,42 @@ public:
Callee, NormalDest, UnwindDest, Args, Name);
}
+ /// \brief Create a callbr instruction.
+ CallBrInst *CreateCallBr(FunctionType *Ty, Value *Callee,
+ BasicBlock *DefaultDest,
+ ArrayRef<BasicBlock *> IndirectDests,
+ ArrayRef<Value *> Args = None,
+ const Twine &Name = "") {
+ return Insert(CallBrInst::Create(Ty, Callee, DefaultDest, IndirectDests,
+ Args), Name);
+ }
+ CallBrInst *CreateCallBr(FunctionType *Ty, Value *Callee,
+ BasicBlock *DefaultDest,
+ ArrayRef<BasicBlock *> IndirectDests,
+ ArrayRef<Value *> Args,
+ ArrayRef<OperandBundleDef> OpBundles,
+ const Twine &Name = "") {
+ return Insert(
+ CallBrInst::Create(Ty, Callee, DefaultDest, IndirectDests, Args,
+ OpBundles), Name);
+ }
+
+ CallBrInst *CreateCallBr(FunctionCallee Callee, BasicBlock *DefaultDest,
+ ArrayRef<BasicBlock *> IndirectDests,
+ ArrayRef<Value *> Args = None,
+ const Twine &Name = "") {
+ return CreateCallBr(Callee.getFunctionType(), Callee.getCallee(),
+ DefaultDest, IndirectDests, Args, Name);
+ }
+ CallBrInst *CreateCallBr(FunctionCallee Callee, BasicBlock *DefaultDest,
+ ArrayRef<BasicBlock *> IndirectDests,
+ ArrayRef<Value *> Args,
+ ArrayRef<OperandBundleDef> OpBundles,
+ const Twine &Name = "") {
+ return CreateCallBr(Callee.getFunctionType(), Callee.getCallee(),
+ DefaultDest, IndirectDests, Args, Name);
+ }
+
ResumeInst *CreateResume(Value *Exn) {
return Insert(ResumeInst::Create(Exn));
}
@@ -1004,12 +1076,44 @@ private:
}
Value *foldConstant(Instruction::BinaryOps Opc, Value *L,
- Value *R, const Twine &Name = nullptr) const {
+ Value *R, const Twine &Name) const {
auto *LC = dyn_cast<Constant>(L);
auto *RC = dyn_cast<Constant>(R);
return (LC && RC) ? Insert(Folder.CreateBinOp(Opc, LC, RC), Name) : nullptr;
}
+ Value *getConstrainedFPRounding(
+ Optional<ConstrainedFPIntrinsic::RoundingMode> Rounding) {
+ ConstrainedFPIntrinsic::RoundingMode UseRounding =
+ DefaultConstrainedRounding;
+
+ if (Rounding.hasValue())
+ UseRounding = Rounding.getValue();
+
+ Optional<StringRef> RoundingStr =
+ ConstrainedFPIntrinsic::RoundingModeToStr(UseRounding);
+ assert(RoundingStr.hasValue() && "Garbage strict rounding mode!");
+ auto *RoundingMDS = MDString::get(Context, RoundingStr.getValue());
+
+ return MetadataAsValue::get(Context, RoundingMDS);
+ }
+
+ Value *getConstrainedFPExcept(
+ Optional<ConstrainedFPIntrinsic::ExceptionBehavior> Except) {
+ ConstrainedFPIntrinsic::ExceptionBehavior UseExcept =
+ DefaultConstrainedExcept;
+
+ if (Except.hasValue())
+ UseExcept = Except.getValue();
+
+ Optional<StringRef> ExceptStr =
+ ConstrainedFPIntrinsic::ExceptionBehaviorToStr(UseExcept);
+ assert(ExceptStr.hasValue() && "Garbage strict exception behavior!");
+ auto *ExceptMDS = MDString::get(Context, ExceptStr.getValue());
+
+ return MetadataAsValue::get(Context, ExceptMDS);
+ }
+
public:
Value *CreateAdd(Value *LHS, Value *RHS, const Twine &Name = "",
bool HasNUW = false, bool HasNSW = false) {
@@ -1179,6 +1283,14 @@ public:
return CreateAnd(LHS, ConstantInt::get(LHS->getType(), RHS), Name);
}
+ Value *CreateAnd(ArrayRef<Value*> Ops) {
+ assert(!Ops.empty());
+ Value *Accum = Ops[0];
+ for (unsigned i = 1; i < Ops.size(); i++)
+ Accum = CreateAnd(Accum, Ops[i]);
+ return Accum;
+ }
+
Value *CreateOr(Value *LHS, Value *RHS, const Twine &Name = "") {
if (auto *RC = dyn_cast<Constant>(RHS)) {
if (RC->isNullValue())
@@ -1197,6 +1309,14 @@ public:
return CreateOr(LHS, ConstantInt::get(LHS->getType(), RHS), Name);
}
+ Value *CreateOr(ArrayRef<Value*> Ops) {
+ assert(!Ops.empty());
+ Value *Accum = Ops[0];
+ for (unsigned i = 1; i < Ops.size(); i++)
+ Accum = CreateOr(Accum, Ops[i]);
+ return Accum;
+ }
+
Value *CreateXor(Value *LHS, Value *RHS, const Twine &Name = "") {
if (Value *V = foldConstant(Instruction::Xor, LHS, RHS, Name)) return V;
return Insert(BinaryOperator::CreateXor(LHS, RHS), Name);
@@ -1212,6 +1332,10 @@ public:
Value *CreateFAdd(Value *L, Value *R, const Twine &Name = "",
MDNode *FPMD = nullptr) {
+ if (IsFPConstrained)
+ return CreateConstrainedFPBinOp(Intrinsic::experimental_constrained_fadd,
+ L, R, nullptr, Name, FPMD);
+
if (Value *V = foldConstant(Instruction::FAdd, L, R, Name)) return V;
Instruction *I = setFPAttrs(BinaryOperator::CreateFAdd(L, R), FPMD, FMF);
return Insert(I, Name);
@@ -1221,6 +1345,10 @@ public:
/// default FMF.
Value *CreateFAddFMF(Value *L, Value *R, Instruction *FMFSource,
const Twine &Name = "") {
+ if (IsFPConstrained)
+ return CreateConstrainedFPBinOp(Intrinsic::experimental_constrained_fadd,
+ L, R, FMFSource, Name);
+
if (Value *V = foldConstant(Instruction::FAdd, L, R, Name)) return V;
Instruction *I = setFPAttrs(BinaryOperator::CreateFAdd(L, R), nullptr,
FMFSource->getFastMathFlags());
@@ -1229,6 +1357,10 @@ public:
Value *CreateFSub(Value *L, Value *R, const Twine &Name = "",
MDNode *FPMD = nullptr) {
+ if (IsFPConstrained)
+ return CreateConstrainedFPBinOp(Intrinsic::experimental_constrained_fsub,
+ L, R, nullptr, Name, FPMD);
+
if (Value *V = foldConstant(Instruction::FSub, L, R, Name)) return V;
Instruction *I = setFPAttrs(BinaryOperator::CreateFSub(L, R), FPMD, FMF);
return Insert(I, Name);
@@ -1238,6 +1370,10 @@ public:
/// default FMF.
Value *CreateFSubFMF(Value *L, Value *R, Instruction *FMFSource,
const Twine &Name = "") {
+ if (IsFPConstrained)
+ return CreateConstrainedFPBinOp(Intrinsic::experimental_constrained_fsub,
+ L, R, FMFSource, Name);
+
if (Value *V = foldConstant(Instruction::FSub, L, R, Name)) return V;
Instruction *I = setFPAttrs(BinaryOperator::CreateFSub(L, R), nullptr,
FMFSource->getFastMathFlags());
@@ -1246,6 +1382,10 @@ public:
Value *CreateFMul(Value *L, Value *R, const Twine &Name = "",
MDNode *FPMD = nullptr) {
+ if (IsFPConstrained)
+ return CreateConstrainedFPBinOp(Intrinsic::experimental_constrained_fmul,
+ L, R, nullptr, Name, FPMD);
+
if (Value *V = foldConstant(Instruction::FMul, L, R, Name)) return V;
Instruction *I = setFPAttrs(BinaryOperator::CreateFMul(L, R), FPMD, FMF);
return Insert(I, Name);
@@ -1255,6 +1395,10 @@ public:
/// default FMF.
Value *CreateFMulFMF(Value *L, Value *R, Instruction *FMFSource,
const Twine &Name = "") {
+ if (IsFPConstrained)
+ return CreateConstrainedFPBinOp(Intrinsic::experimental_constrained_fmul,
+ L, R, FMFSource, Name);
+
if (Value *V = foldConstant(Instruction::FMul, L, R, Name)) return V;
Instruction *I = setFPAttrs(BinaryOperator::CreateFMul(L, R), nullptr,
FMFSource->getFastMathFlags());
@@ -1263,6 +1407,10 @@ public:
Value *CreateFDiv(Value *L, Value *R, const Twine &Name = "",
MDNode *FPMD = nullptr) {
+ if (IsFPConstrained)
+ return CreateConstrainedFPBinOp(Intrinsic::experimental_constrained_fdiv,
+ L, R, nullptr, Name, FPMD);
+
if (Value *V = foldConstant(Instruction::FDiv, L, R, Name)) return V;
Instruction *I = setFPAttrs(BinaryOperator::CreateFDiv(L, R), FPMD, FMF);
return Insert(I, Name);
@@ -1272,6 +1420,10 @@ public:
/// default FMF.
Value *CreateFDivFMF(Value *L, Value *R, Instruction *FMFSource,
const Twine &Name = "") {
+ if (IsFPConstrained)
+ return CreateConstrainedFPBinOp(Intrinsic::experimental_constrained_fdiv,
+ L, R, FMFSource, Name);
+
if (Value *V = foldConstant(Instruction::FDiv, L, R, Name)) return V;
Instruction *I = setFPAttrs(BinaryOperator::CreateFDiv(L, R), nullptr,
FMFSource->getFastMathFlags());
@@ -1280,6 +1432,10 @@ public:
Value *CreateFRem(Value *L, Value *R, const Twine &Name = "",
MDNode *FPMD = nullptr) {
+ if (IsFPConstrained)
+ return CreateConstrainedFPBinOp(Intrinsic::experimental_constrained_frem,
+ L, R, nullptr, Name, FPMD);
+
if (Value *V = foldConstant(Instruction::FRem, L, R, Name)) return V;
Instruction *I = setFPAttrs(BinaryOperator::CreateFRem(L, R), FPMD, FMF);
return Insert(I, Name);
@@ -1289,6 +1445,10 @@ public:
/// default FMF.
Value *CreateFRemFMF(Value *L, Value *R, Instruction *FMFSource,
const Twine &Name = "") {
+ if (IsFPConstrained)
+ return CreateConstrainedFPBinOp(Intrinsic::experimental_constrained_frem,
+ L, R, FMFSource, Name);
+
if (Value *V = foldConstant(Instruction::FRem, L, R, Name)) return V;
Instruction *I = setFPAttrs(BinaryOperator::CreateFRem(L, R), nullptr,
FMFSource->getFastMathFlags());
@@ -1305,6 +1465,23 @@ public:
return Insert(BinOp, Name);
}
+ CallInst *CreateConstrainedFPBinOp(
+ Intrinsic::ID ID, Value *L, Value *R, Instruction *FMFSource = nullptr,
+ const Twine &Name = "", MDNode *FPMathTag = nullptr,
+ Optional<ConstrainedFPIntrinsic::RoundingMode> Rounding = None,
+ Optional<ConstrainedFPIntrinsic::ExceptionBehavior> Except = None) {
+ Value *RoundingV = getConstrainedFPRounding(Rounding);
+ Value *ExceptV = getConstrainedFPExcept(Except);
+
+ FastMathFlags UseFMF = FMF;
+ if (FMFSource)
+ UseFMF = FMFSource->getFastMathFlags();
+
+ CallInst *C = CreateIntrinsic(ID, {L->getType()},
+ {L, R, RoundingV, ExceptV}, nullptr, Name);
+ return cast<CallInst>(setFPAttrs(C, FPMathTag, UseFMF));
+ }
+
Value *CreateNeg(Value *V, const Twine &Name = "",
bool HasNUW = false, bool HasNSW = false) {
if (auto *VC = dyn_cast<Constant>(V))
@@ -1331,12 +1508,54 @@ public:
Name);
}
+ /// Copy fast-math-flags from an instruction rather than using the builder's
+ /// default FMF.
+ Value *CreateFNegFMF(Value *V, Instruction *FMFSource,
+ const Twine &Name = "") {
+ if (auto *VC = dyn_cast<Constant>(V))
+ return Insert(Folder.CreateFNeg(VC), Name);
+ // TODO: This should return UnaryOperator::CreateFNeg(...) once we are
+ // confident that they are optimized sufficiently.
+ return Insert(setFPAttrs(BinaryOperator::CreateFNeg(V), nullptr,
+ FMFSource->getFastMathFlags()),
+ Name);
+ }
+
Value *CreateNot(Value *V, const Twine &Name = "") {
if (auto *VC = dyn_cast<Constant>(V))
return Insert(Folder.CreateNot(VC), Name);
return Insert(BinaryOperator::CreateNot(V), Name);
}
+ Value *CreateUnOp(Instruction::UnaryOps Opc,
+ Value *V, const Twine &Name = "",
+ MDNode *FPMathTag = nullptr) {
+ if (auto *VC = dyn_cast<Constant>(V))
+ return Insert(Folder.CreateUnOp(Opc, VC), Name);
+ Instruction *UnOp = UnaryOperator::Create(Opc, V);
+ if (isa<FPMathOperator>(UnOp))
+ UnOp = setFPAttrs(UnOp, FPMathTag, FMF);
+ return Insert(UnOp, Name);
+ }
+
+ /// Create either a UnaryOperator or BinaryOperator depending on \p Opc.
+ /// Correct number of operands must be passed accordingly.
+ Value *CreateNAryOp(unsigned Opc, ArrayRef<Value *> Ops,
+ const Twine &Name = "",
+ MDNode *FPMathTag = nullptr) {
+ if (Instruction::isBinaryOp(Opc)) {
+ assert(Ops.size() == 2 && "Invalid number of operands!");
+ return CreateBinOp(static_cast<Instruction::BinaryOps>(Opc),
+ Ops[0], Ops[1], Name, FPMathTag);
+ }
+ if (Instruction::isUnaryOp(Opc)) {
+ assert(Ops.size() == 1 && "Invalid number of operands!");
+ return CreateUnOp(static_cast<Instruction::UnaryOps>(Opc),
+ Ops[0], Name, FPMathTag);
+ }
+ llvm_unreachable("Unexpected opcode!");
+ }
+
//===--------------------------------------------------------------------===//
// Instruction creation methods: Memory Instructions
//===--------------------------------------------------------------------===//
@@ -1989,16 +2208,17 @@ public:
return Insert(CI, Name);
}
- CallInst *CreateCall(Function *Callee, ArrayRef<Value *> Args = None,
+ CallInst *CreateCall(FunctionCallee Callee, ArrayRef<Value *> Args = None,
const Twine &Name = "", MDNode *FPMathTag = nullptr) {
- return CreateCall(Callee->getFunctionType(), Callee, Args, Name, FPMathTag);
+ return CreateCall(Callee.getFunctionType(), Callee.getCallee(), Args, Name,
+ FPMathTag);
}
- CallInst *CreateCall(Function *Callee, ArrayRef<Value *> Args,
+ CallInst *CreateCall(FunctionCallee Callee, ArrayRef<Value *> Args,
ArrayRef<OperandBundleDef> OpBundles,
const Twine &Name = "", MDNode *FPMathTag = nullptr) {
- return CreateCall(Callee->getFunctionType(), Callee, Args, OpBundles, Name,
- FPMathTag);
+ return CreateCall(Callee.getFunctionType(), Callee.getCallee(), Args,
+ OpBundles, Name, FPMathTag);
}
// Deprecated [opaque pointer types]
@@ -2031,6 +2251,8 @@ public:
MDNode *Unpred = MDFrom->getMetadata(LLVMContext::MD_unpredictable);
Sel = addBranchMetadata(Sel, Prof, Unpred);
}
+ if (isa<FPMathOperator>(Sel))
+ Sel = cast<SelectInst>(setFPAttrs(Sel, nullptr /* MDNode* */, FMF));
return Insert(Sel, Name);
}
@@ -2231,6 +2453,74 @@ public:
return V;
}
+ Value *CreatePreserveArrayAccessIndex(Value *Base, unsigned Dimension,
+ unsigned LastIndex) {
+ assert(isa<PointerType>(Base->getType()) &&
+ "Invalid Base ptr type for preserve.array.access.index.");
+ auto *BaseType = Base->getType();
+
+ Value *LastIndexV = getInt32(LastIndex);
+ Constant *Zero = ConstantInt::get(Type::getInt32Ty(Context), 0);
+ SmallVector<Value *, 4> IdxList;
+ for (unsigned I = 0; I < Dimension; ++I)
+ IdxList.push_back(Zero);
+ IdxList.push_back(LastIndexV);
+
+ Type *ResultType =
+ GetElementPtrInst::getGEPReturnType(Base, IdxList);
+
+ Module *M = BB->getParent()->getParent();
+ Function *FnPreserveArrayAccessIndex = Intrinsic::getDeclaration(
+ M, Intrinsic::preserve_array_access_index, {ResultType, BaseType});
+
+ Value *DimV = getInt32(Dimension);
+ CallInst *Fn =
+ CreateCall(FnPreserveArrayAccessIndex, {Base, DimV, LastIndexV});
+
+ return Fn;
+ }
+
+ Value *CreatePreserveUnionAccessIndex(Value *Base, unsigned FieldIndex,
+ MDNode *DbgInfo) {
+ assert(isa<PointerType>(Base->getType()) &&
+ "Invalid Base ptr type for preserve.union.access.index.");
+ auto *BaseType = Base->getType();
+
+ Module *M = BB->getParent()->getParent();
+ Function *FnPreserveUnionAccessIndex = Intrinsic::getDeclaration(
+ M, Intrinsic::preserve_union_access_index, {BaseType, BaseType});
+
+ Value *DIIndex = getInt32(FieldIndex);
+ CallInst *Fn =
+ CreateCall(FnPreserveUnionAccessIndex, {Base, DIIndex});
+ Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
+
+ return Fn;
+ }
+
+ Value *CreatePreserveStructAccessIndex(Value *Base, unsigned Index,
+ unsigned FieldIndex, MDNode *DbgInfo) {
+ assert(isa<PointerType>(Base->getType()) &&
+ "Invalid Base ptr type for preserve.struct.access.index.");
+ auto *BaseType = Base->getType();
+
+ Value *GEPIndex = getInt32(Index);
+ Constant *Zero = ConstantInt::get(Type::getInt32Ty(Context), 0);
+ Type *ResultType =
+ GetElementPtrInst::getGEPReturnType(Base, {Zero, GEPIndex});
+
+ Module *M = BB->getParent()->getParent();
+ Function *FnPreserveStructAccessIndex = Intrinsic::getDeclaration(
+ M, Intrinsic::preserve_struct_access_index, {ResultType, BaseType});
+
+ Value *DIIndex = getInt32(FieldIndex);
+ CallInst *Fn = CreateCall(FnPreserveStructAccessIndex,
+ {Base, GEPIndex, DIIndex});
+ Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
+
+ return Fn;
+ }
+
private:
/// Helper function that creates an assume intrinsic call that
/// represents an alignment assumption on the provided Ptr, Mask, Type
@@ -2280,10 +2570,11 @@ public:
Value **TheCheck = nullptr) {
assert(isa<PointerType>(PtrValue->getType()) &&
"trying to create an alignment assumption on a non-pointer?");
+ assert(Alignment != 0 && "Invalid Alignment");
auto *PtrTy = cast<PointerType>(PtrValue->getType());
Type *IntPtrTy = getIntPtrTy(DL, PtrTy->getAddressSpace());
- Value *Mask = ConstantInt::get(IntPtrTy, Alignment > 0 ? Alignment - 1 : 0);
+ Value *Mask = ConstantInt::get(IntPtrTy, Alignment - 1);
return CreateAlignmentAssumptionHelper(DL, PtrValue, Mask, IntPtrTy,
OffsetValue, TheCheck);
}
@@ -2310,15 +2601,10 @@ public:
Type *IntPtrTy = getIntPtrTy(DL, PtrTy->getAddressSpace());
if (Alignment->getType() != IntPtrTy)
- Alignment = CreateIntCast(Alignment, IntPtrTy, /*isSigned*/ true,
+ Alignment = CreateIntCast(Alignment, IntPtrTy, /*isSigned*/ false,
"alignmentcast");
- Value *IsPositive =
- CreateICmp(CmpInst::ICMP_SGT, Alignment,
- ConstantInt::get(Alignment->getType(), 0), "ispositive");
- Value *PositiveMask =
- CreateSub(Alignment, ConstantInt::get(IntPtrTy, 1), "positivemask");
- Value *Mask = CreateSelect(IsPositive, PositiveMask,
- ConstantInt::get(IntPtrTy, 0), "mask");
+
+ Value *Mask = CreateSub(Alignment, ConstantInt::get(IntPtrTy, 1), "mask");
return CreateAlignmentAssumptionHelper(DL, PtrValue, Mask, IntPtrTy,
OffsetValue, TheCheck);
diff --git a/include/llvm/IR/IRPrintingPasses.h b/include/llvm/IR/IRPrintingPasses.h
index 75f80567dbd5..3be9449c1a93 100644
--- a/include/llvm/IR/IRPrintingPasses.h
+++ b/include/llvm/IR/IRPrintingPasses.h
@@ -1,9 +1,8 @@
//===- IRPrintingPasses.h - Passes to print out IR constructs ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/include/llvm/IR/InlineAsm.h b/include/llvm/IR/InlineAsm.h
index 1519a45d59e9..2aac807623a9 100644
--- a/include/llvm/IR/InlineAsm.h
+++ b/include/llvm/IR/InlineAsm.h
@@ -1,9 +1,8 @@
//===- llvm/InlineAsm.h - Class to represent inline asm strings -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/IR/InstIterator.h b/include/llvm/IR/InstIterator.h
index 2988fc935dd5..054fe4e9cbe9 100644
--- a/include/llvm/IR/InstIterator.h
+++ b/include/llvm/IR/InstIterator.h
@@ -1,9 +1,8 @@
//===- InstIterator.h - Classes for inst iteration --------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/IR/InstVisitor.h b/include/llvm/IR/InstVisitor.h
index c5b4c6f71d7d..fbeb2caf14e6 100644
--- a/include/llvm/IR/InstVisitor.h
+++ b/include/llvm/IR/InstVisitor.h
@@ -1,9 +1,8 @@
//===- InstVisitor.h - Instruction visitor templates ------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -218,14 +217,17 @@ public:
RetTy visitVACopyInst(VACopyInst &I) { DELEGATE(IntrinsicInst); }
RetTy visitIntrinsicInst(IntrinsicInst &I) { DELEGATE(CallInst); }
- // Call and Invoke are slightly different as they delegate first through
- // a generic CallSite visitor.
+ // Call, Invoke and CallBr are slightly different as they delegate first
+ // through a generic CallSite visitor.
RetTy visitCallInst(CallInst &I) {
return static_cast<SubClass*>(this)->visitCallSite(&I);
}
RetTy visitInvokeInst(InvokeInst &I) {
return static_cast<SubClass*>(this)->visitCallSite(&I);
}
+ RetTy visitCallBrInst(CallBrInst &I) {
+ return static_cast<SubClass *>(this)->visitCallSite(&I);
+ }
// While terminators don't have a distinct type modeling them, we support
// intercepting them with dedicated a visitor callback.
@@ -271,14 +273,14 @@ public:
// The next level delegation for `CallBase` is slightly more complex in order
// to support visiting cases where the call is also a terminator.
RetTy visitCallBase(CallBase &I) {
- if (isa<InvokeInst>(I))
+ if (isa<InvokeInst>(I) || isa<CallBrInst>(I))
return static_cast<SubClass *>(this)->visitTerminator(I);
DELEGATE(Instruction);
}
- // Provide a legacy visitor for a 'callsite' that visits both calls and
- // invokes.
+ // Provide a legacy visitor for a 'callsite' that visits calls, invokes,
+ // and calbrs.
//
// Prefer overriding the type system based `CallBase` instead.
RetTy visitCallSite(CallSite CS) {
diff --git a/include/llvm/IR/InstrTypes.h b/include/llvm/IR/InstrTypes.h
index 3f384a6ee40c..ca419b50da6b 100644
--- a/include/llvm/IR/InstrTypes.h
+++ b/include/llvm/IR/InstrTypes.h
@@ -1,9 +1,8 @@
//===- llvm/InstrTypes.h - Important Instruction subclasses -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -28,6 +27,7 @@
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/OperandTraits.h"
@@ -77,7 +77,8 @@ public:
// Methods for support type inquiry through isa, cast, and dyn_cast:
static bool classof(const Instruction *I) {
- return I->getOpcode() == Instruction::Alloca ||
+ return I->isUnaryOp() ||
+ I->getOpcode() == Instruction::Alloca ||
I->getOpcode() == Instruction::Load ||
I->getOpcode() == Instruction::VAArg ||
I->getOpcode() == Instruction::ExtractValue ||
@@ -96,6 +97,91 @@ struct OperandTraits<UnaryInstruction> :
DEFINE_TRANSPARENT_OPERAND_ACCESSORS(UnaryInstruction, Value)
//===----------------------------------------------------------------------===//
+// UnaryOperator Class
+//===----------------------------------------------------------------------===//
+
+class UnaryOperator : public UnaryInstruction {
+ void AssertOK();
+
+protected:
+ UnaryOperator(UnaryOps iType, Value *S, Type *Ty,
+ const Twine &Name, Instruction *InsertBefore);
+ UnaryOperator(UnaryOps iType, Value *S, Type *Ty,
+ const Twine &Name, BasicBlock *InsertAtEnd);
+
+ // Note: Instruction needs to be a friend here to call cloneImpl.
+ friend class Instruction;
+
+ UnaryOperator *cloneImpl() const;
+
+public:
+
+ /// Construct a unary instruction, given the opcode and an operand.
+ /// Optionally (if InstBefore is specified) insert the instruction
+ /// into a BasicBlock right before the specified instruction. The specified
+ /// Instruction is allowed to be a dereferenced end iterator.
+ ///
+ static UnaryOperator *Create(UnaryOps Op, Value *S,
+ const Twine &Name = Twine(),
+ Instruction *InsertBefore = nullptr);
+
+ /// Construct a unary instruction, given the opcode and an operand.
+ /// Also automatically insert this instruction to the end of the
+ /// BasicBlock specified.
+ ///
+ static UnaryOperator *Create(UnaryOps Op, Value *S,
+ const Twine &Name,
+ BasicBlock *InsertAtEnd);
+
+ /// These methods just forward to Create, and are useful when you
+ /// statically know what type of instruction you're going to create. These
+ /// helpers just save some typing.
+#define HANDLE_UNARY_INST(N, OPC, CLASS) \
+ static UnaryOperator *Create##OPC(Value *V, const Twine &Name = "") {\
+ return Create(Instruction::OPC, V, Name);\
+ }
+#include "llvm/IR/Instruction.def"
+#define HANDLE_UNARY_INST(N, OPC, CLASS) \
+ static UnaryOperator *Create##OPC(Value *V, const Twine &Name, \
+ BasicBlock *BB) {\
+ return Create(Instruction::OPC, V, Name, BB);\
+ }
+#include "llvm/IR/Instruction.def"
+#define HANDLE_UNARY_INST(N, OPC, CLASS) \
+ static UnaryOperator *Create##OPC(Value *V, const Twine &Name, \
+ Instruction *I) {\
+ return Create(Instruction::OPC, V, Name, I);\
+ }
+#include "llvm/IR/Instruction.def"
+
+ static UnaryOperator *CreateWithCopiedFlags(UnaryOps Opc,
+ Value *V,
+ Instruction *CopyO,
+ const Twine &Name = "") {
+ UnaryOperator *UO = Create(Opc, V, Name);
+ UO->copyIRFlags(CopyO);
+ return UO;
+ }
+
+ static UnaryOperator *CreateFNegFMF(Value *Op, Instruction *FMFSource,
+ const Twine &Name = "") {
+ return CreateWithCopiedFlags(Instruction::FNeg, Op, FMFSource, Name);
+ }
+
+ UnaryOps getOpcode() const {
+ return static_cast<UnaryOps>(Instruction::getOpcode());
+ }
+
+ // Methods for support type inquiry through isa, cast, and dyn_cast:
+ static bool classof(const Instruction *I) {
+ return I->isUnaryOp();
+ }
+ static bool classof(const Value *V) {
+ return isa<Instruction>(V) && classof(cast<Instruction>(V));
+ }
+};
+
+//===----------------------------------------------------------------------===//
// BinaryOperator Class
//===----------------------------------------------------------------------===//
@@ -162,42 +248,42 @@ public:
static BinaryOperator *CreateWithCopiedFlags(BinaryOps Opc,
Value *V1, Value *V2,
- BinaryOperator *CopyBO,
+ Instruction *CopyO,
const Twine &Name = "") {
BinaryOperator *BO = Create(Opc, V1, V2, Name);
- BO->copyIRFlags(CopyBO);
+ BO->copyIRFlags(CopyO);
return BO;
}
static BinaryOperator *CreateFAddFMF(Value *V1, Value *V2,
- BinaryOperator *FMFSource,
+ Instruction *FMFSource,
const Twine &Name = "") {
return CreateWithCopiedFlags(Instruction::FAdd, V1, V2, FMFSource, Name);
}
static BinaryOperator *CreateFSubFMF(Value *V1, Value *V2,
- BinaryOperator *FMFSource,
+ Instruction *FMFSource,
const Twine &Name = "") {
return CreateWithCopiedFlags(Instruction::FSub, V1, V2, FMFSource, Name);
}
static BinaryOperator *CreateFMulFMF(Value *V1, Value *V2,
- BinaryOperator *FMFSource,
+ Instruction *FMFSource,
const Twine &Name = "") {
return CreateWithCopiedFlags(Instruction::FMul, V1, V2, FMFSource, Name);
}
static BinaryOperator *CreateFDivFMF(Value *V1, Value *V2,
- BinaryOperator *FMFSource,
+ Instruction *FMFSource,
const Twine &Name = "") {
return CreateWithCopiedFlags(Instruction::FDiv, V1, V2, FMFSource, Name);
}
static BinaryOperator *CreateFRemFMF(Value *V1, Value *V2,
- BinaryOperator *FMFSource,
+ Instruction *FMFSource,
const Twine &Name = "") {
return CreateWithCopiedFlags(Instruction::FRem, V1, V2, FMFSource, Name);
}
- static BinaryOperator *CreateFNegFMF(Value *Op, BinaryOperator *FMFSource,
+ static BinaryOperator *CreateFNegFMF(Value *Op, Instruction *FMFSource,
const Twine &Name = "") {
Value *Zero = ConstantFP::getNegativeZero(Op->getType());
- return CreateWithCopiedFlags(Instruction::FSub, Zero, Op, FMFSource);
+ return CreateWithCopiedFlags(Instruction::FSub, Zero, Op, FMFSource, Name);
}
static BinaryOperator *CreateNSW(BinaryOps Opc, Value *V1, Value *V2,
@@ -1033,16 +1119,23 @@ protected:
return 0;
case Instruction::Invoke:
return 2;
+ case Instruction::CallBr:
+ return getNumSubclassExtraOperandsDynamic();
}
llvm_unreachable("Invalid opcode!");
}
+ /// Get the number of extra operands for instructions that don't have a fixed
+ /// number of extra operands.
+ unsigned getNumSubclassExtraOperandsDynamic() const;
+
public:
using Instruction::getContext;
static bool classof(const Instruction *I) {
return I->getOpcode() == Instruction::Call ||
- I->getOpcode() == Instruction::Invoke;
+ I->getOpcode() == Instruction::Invoke ||
+ I->getOpcode() == Instruction::CallBr;
}
static bool classof(const Value *V) {
return isa<Instruction>(V) && classof(cast<Instruction>(V));
@@ -1096,6 +1189,19 @@ public:
return isDataOperand(&UI.getUse());
}
+ /// Given a value use iterator, return the data operand corresponding to it.
+ /// Iterator must actually correspond to a data operand.
+ unsigned getDataOperandNo(Value::const_user_iterator UI) const {
+ return getDataOperandNo(&UI.getUse());
+ }
+
+ /// Given a use for a data operand, get the data operand number that
+ /// corresponds to it.
+ unsigned getDataOperandNo(const Use *U) const {
+ assert(isDataOperand(U) && "Data operand # out of range!");
+ return U - data_operands_begin();
+ }
+
/// Return the iterator pointing to the beginning of the argument list.
User::op_iterator arg_begin() { return op_begin(); }
User::const_op_iterator arg_begin() const {
@@ -1199,6 +1305,13 @@ public:
return const_cast<CallBase *>(this)->getCaller();
}
+ /// Tests if this call site must be tail call optimized. Only a CallInst can
+ /// be tail call optimized.
+ bool isMustTailCall() const;
+
+ /// Tests if this call site is marked as a tail call.
+ bool isTailCall() const;
+
/// Returns the intrinsic ID of the intrinsic called or
/// Intrinsic::not_intrinsic if the called function is not an intrinsic, or if
/// this is an indirect call.
@@ -1207,10 +1320,13 @@ public:
void setCalledOperand(Value *V) { Op<CalledOperandOpEndIdx>() = V; }
/// Sets the function called, including updating the function type.
- void setCalledFunction(Value *Fn) {
- setCalledFunction(
- cast<FunctionType>(cast<PointerType>(Fn->getType())->getElementType()),
- Fn);
+ void setCalledFunction(Function *Fn) {
+ setCalledFunction(Fn->getFunctionType(), Fn);
+ }
+
+ /// Sets the function called, including updating the function type.
+ void setCalledFunction(FunctionCallee Fn) {
+ setCalledFunction(Fn.getFunctionType(), Fn.getCallee());
}
/// Sets the function called, including updating to the specified function
@@ -1219,6 +1335,9 @@ public:
this->FTy = FTy;
assert(FTy == cast<FunctionType>(
cast<PointerType>(Fn->getType())->getElementType()));
+ // This function doesn't mutate the return type, only the function
+ // type. Seems broken, but I'm just gonna stick an assert in for now.
+ assert(getType() == FTy->getReturnType());
setCalledOperand(Fn);
}
@@ -1233,6 +1352,9 @@ public:
(ID << 2));
}
+ /// Check if this call is an inline asm statement.
+ bool isInlineAsm() const { return isa<InlineAsm>(getCalledOperand()); }
+
/// \name Attribute API
///
/// These methods access and modify attributes on this call (including
@@ -1452,6 +1574,12 @@ public:
return Attrs.getParamAlignment(ArgNo);
}
+ /// Extract the byval type for a call or parameter.
+ Type *getParamByValType(unsigned ArgNo) const {
+ Type *Ty = Attrs.getParamByValType(ArgNo);
+ return Ty ? Ty : getArgOperand(ArgNo)->getType()->getPointerElementType();
+ }
+
/// Extract the number of dereferenceable bytes for a call or
/// parameter (0=unknown).
uint64_t getDereferenceableBytes(unsigned i) const {
diff --git a/include/llvm/IR/Instruction.def b/include/llvm/IR/Instruction.def
index 58e4e2e1d6cc..41cdf613ad64 100644
--- a/include/llvm/IR/Instruction.def
+++ b/include/llvm/IR/Instruction.def
@@ -1,9 +1,8 @@
//===-- llvm/Instruction.def - File that describes Instructions -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -135,89 +134,90 @@ HANDLE_TERM_INST ( 7, Unreachable , UnreachableInst)
HANDLE_TERM_INST ( 8, CleanupRet , CleanupReturnInst)
HANDLE_TERM_INST ( 9, CatchRet , CatchReturnInst)
HANDLE_TERM_INST (10, CatchSwitch , CatchSwitchInst)
- LAST_TERM_INST (10)
+HANDLE_TERM_INST (11, CallBr , CallBrInst) // A call-site terminator
+ LAST_TERM_INST (11)
// Standard unary operators...
- FIRST_UNARY_INST(11)
-HANDLE_UNARY_INST(11, FNeg , UnaryOperator)
- LAST_UNARY_INST(11)
+ FIRST_UNARY_INST(12)
+HANDLE_UNARY_INST(12, FNeg , UnaryOperator)
+ LAST_UNARY_INST(12)
// Standard binary operators...
- FIRST_BINARY_INST(12)
-HANDLE_BINARY_INST(12, Add , BinaryOperator)
-HANDLE_BINARY_INST(13, FAdd , BinaryOperator)
-HANDLE_BINARY_INST(14, Sub , BinaryOperator)
-HANDLE_BINARY_INST(15, FSub , BinaryOperator)
-HANDLE_BINARY_INST(16, Mul , BinaryOperator)
-HANDLE_BINARY_INST(17, FMul , BinaryOperator)
-HANDLE_BINARY_INST(18, UDiv , BinaryOperator)
-HANDLE_BINARY_INST(19, SDiv , BinaryOperator)
-HANDLE_BINARY_INST(20, FDiv , BinaryOperator)
-HANDLE_BINARY_INST(21, URem , BinaryOperator)
-HANDLE_BINARY_INST(22, SRem , BinaryOperator)
-HANDLE_BINARY_INST(23, FRem , BinaryOperator)
+ FIRST_BINARY_INST(13)
+HANDLE_BINARY_INST(13, Add , BinaryOperator)
+HANDLE_BINARY_INST(14, FAdd , BinaryOperator)
+HANDLE_BINARY_INST(15, Sub , BinaryOperator)
+HANDLE_BINARY_INST(16, FSub , BinaryOperator)
+HANDLE_BINARY_INST(17, Mul , BinaryOperator)
+HANDLE_BINARY_INST(18, FMul , BinaryOperator)
+HANDLE_BINARY_INST(19, UDiv , BinaryOperator)
+HANDLE_BINARY_INST(20, SDiv , BinaryOperator)
+HANDLE_BINARY_INST(21, FDiv , BinaryOperator)
+HANDLE_BINARY_INST(22, URem , BinaryOperator)
+HANDLE_BINARY_INST(23, SRem , BinaryOperator)
+HANDLE_BINARY_INST(24, FRem , BinaryOperator)
// Logical operators (integer operands)
-HANDLE_BINARY_INST(24, Shl , BinaryOperator) // Shift left (logical)
-HANDLE_BINARY_INST(25, LShr , BinaryOperator) // Shift right (logical)
-HANDLE_BINARY_INST(26, AShr , BinaryOperator) // Shift right (arithmetic)
-HANDLE_BINARY_INST(27, And , BinaryOperator)
-HANDLE_BINARY_INST(28, Or , BinaryOperator)
-HANDLE_BINARY_INST(29, Xor , BinaryOperator)
- LAST_BINARY_INST(29)
+HANDLE_BINARY_INST(25, Shl , BinaryOperator) // Shift left (logical)
+HANDLE_BINARY_INST(26, LShr , BinaryOperator) // Shift right (logical)
+HANDLE_BINARY_INST(27, AShr , BinaryOperator) // Shift right (arithmetic)
+HANDLE_BINARY_INST(28, And , BinaryOperator)
+HANDLE_BINARY_INST(29, Or , BinaryOperator)
+HANDLE_BINARY_INST(30, Xor , BinaryOperator)
+ LAST_BINARY_INST(30)
// Memory operators...
- FIRST_MEMORY_INST(30)
-HANDLE_MEMORY_INST(30, Alloca, AllocaInst) // Stack management
-HANDLE_MEMORY_INST(31, Load , LoadInst ) // Memory manipulation instrs
-HANDLE_MEMORY_INST(32, Store , StoreInst )
-HANDLE_MEMORY_INST(33, GetElementPtr, GetElementPtrInst)
-HANDLE_MEMORY_INST(34, Fence , FenceInst )
-HANDLE_MEMORY_INST(35, AtomicCmpXchg , AtomicCmpXchgInst )
-HANDLE_MEMORY_INST(36, AtomicRMW , AtomicRMWInst )
- LAST_MEMORY_INST(36)
+ FIRST_MEMORY_INST(31)
+HANDLE_MEMORY_INST(31, Alloca, AllocaInst) // Stack management
+HANDLE_MEMORY_INST(32, Load , LoadInst ) // Memory manipulation instrs
+HANDLE_MEMORY_INST(33, Store , StoreInst )
+HANDLE_MEMORY_INST(34, GetElementPtr, GetElementPtrInst)
+HANDLE_MEMORY_INST(35, Fence , FenceInst )
+HANDLE_MEMORY_INST(36, AtomicCmpXchg , AtomicCmpXchgInst )
+HANDLE_MEMORY_INST(37, AtomicRMW , AtomicRMWInst )
+ LAST_MEMORY_INST(37)
// Cast operators ...
// NOTE: The order matters here because CastInst::isEliminableCastPair
// NOTE: (see Instructions.cpp) encodes a table based on this ordering.
- FIRST_CAST_INST(37)
-HANDLE_CAST_INST(37, Trunc , TruncInst ) // Truncate integers
-HANDLE_CAST_INST(38, ZExt , ZExtInst ) // Zero extend integers
-HANDLE_CAST_INST(39, SExt , SExtInst ) // Sign extend integers
-HANDLE_CAST_INST(40, FPToUI , FPToUIInst ) // floating point -> UInt
-HANDLE_CAST_INST(41, FPToSI , FPToSIInst ) // floating point -> SInt
-HANDLE_CAST_INST(42, UIToFP , UIToFPInst ) // UInt -> floating point
-HANDLE_CAST_INST(43, SIToFP , SIToFPInst ) // SInt -> floating point
-HANDLE_CAST_INST(44, FPTrunc , FPTruncInst ) // Truncate floating point
-HANDLE_CAST_INST(45, FPExt , FPExtInst ) // Extend floating point
-HANDLE_CAST_INST(46, PtrToInt, PtrToIntInst) // Pointer -> Integer
-HANDLE_CAST_INST(47, IntToPtr, IntToPtrInst) // Integer -> Pointer
-HANDLE_CAST_INST(48, BitCast , BitCastInst ) // Type cast
-HANDLE_CAST_INST(49, AddrSpaceCast, AddrSpaceCastInst) // addrspace cast
- LAST_CAST_INST(49)
-
- FIRST_FUNCLETPAD_INST(50)
-HANDLE_FUNCLETPAD_INST(50, CleanupPad, CleanupPadInst)
-HANDLE_FUNCLETPAD_INST(51, CatchPad , CatchPadInst)
- LAST_FUNCLETPAD_INST(51)
+ FIRST_CAST_INST(38)
+HANDLE_CAST_INST(38, Trunc , TruncInst ) // Truncate integers
+HANDLE_CAST_INST(39, ZExt , ZExtInst ) // Zero extend integers
+HANDLE_CAST_INST(40, SExt , SExtInst ) // Sign extend integers
+HANDLE_CAST_INST(41, FPToUI , FPToUIInst ) // floating point -> UInt
+HANDLE_CAST_INST(42, FPToSI , FPToSIInst ) // floating point -> SInt
+HANDLE_CAST_INST(43, UIToFP , UIToFPInst ) // UInt -> floating point
+HANDLE_CAST_INST(44, SIToFP , SIToFPInst ) // SInt -> floating point
+HANDLE_CAST_INST(45, FPTrunc , FPTruncInst ) // Truncate floating point
+HANDLE_CAST_INST(46, FPExt , FPExtInst ) // Extend floating point
+HANDLE_CAST_INST(47, PtrToInt, PtrToIntInst) // Pointer -> Integer
+HANDLE_CAST_INST(48, IntToPtr, IntToPtrInst) // Integer -> Pointer
+HANDLE_CAST_INST(49, BitCast , BitCastInst ) // Type cast
+HANDLE_CAST_INST(50, AddrSpaceCast, AddrSpaceCastInst) // addrspace cast
+ LAST_CAST_INST(50)
+
+ FIRST_FUNCLETPAD_INST(51)
+HANDLE_FUNCLETPAD_INST(51, CleanupPad, CleanupPadInst)
+HANDLE_FUNCLETPAD_INST(52, CatchPad , CatchPadInst)
+ LAST_FUNCLETPAD_INST(52)
// Other operators...
- FIRST_OTHER_INST(52)
-HANDLE_OTHER_INST(52, ICmp , ICmpInst ) // Integer comparison instruction
-HANDLE_OTHER_INST(53, FCmp , FCmpInst ) // Floating point comparison instr.
-HANDLE_OTHER_INST(54, PHI , PHINode ) // PHI node instruction
-HANDLE_OTHER_INST(55, Call , CallInst ) // Call a function
-HANDLE_OTHER_INST(56, Select , SelectInst ) // select instruction
-HANDLE_USER_INST (57, UserOp1, Instruction) // May be used internally in a pass
-HANDLE_USER_INST (58, UserOp2, Instruction) // Internal to passes only
-HANDLE_OTHER_INST(59, VAArg , VAArgInst ) // vaarg instruction
-HANDLE_OTHER_INST(60, ExtractElement, ExtractElementInst)// extract from vector
-HANDLE_OTHER_INST(61, InsertElement, InsertElementInst) // insert into vector
-HANDLE_OTHER_INST(62, ShuffleVector, ShuffleVectorInst) // shuffle two vectors.
-HANDLE_OTHER_INST(63, ExtractValue, ExtractValueInst)// extract from aggregate
-HANDLE_OTHER_INST(64, InsertValue, InsertValueInst) // insert into aggregate
-HANDLE_OTHER_INST(65, LandingPad, LandingPadInst) // Landing pad instruction.
- LAST_OTHER_INST(65)
+ FIRST_OTHER_INST(53)
+HANDLE_OTHER_INST(53, ICmp , ICmpInst ) // Integer comparison instruction
+HANDLE_OTHER_INST(54, FCmp , FCmpInst ) // Floating point comparison instr.
+HANDLE_OTHER_INST(55, PHI , PHINode ) // PHI node instruction
+HANDLE_OTHER_INST(56, Call , CallInst ) // Call a function
+HANDLE_OTHER_INST(57, Select , SelectInst ) // select instruction
+HANDLE_USER_INST (58, UserOp1, Instruction) // May be used internally in a pass
+HANDLE_USER_INST (59, UserOp2, Instruction) // Internal to passes only
+HANDLE_OTHER_INST(60, VAArg , VAArgInst ) // vaarg instruction
+HANDLE_OTHER_INST(61, ExtractElement, ExtractElementInst)// extract from vector
+HANDLE_OTHER_INST(62, InsertElement, InsertElementInst) // insert into vector
+HANDLE_OTHER_INST(63, ShuffleVector, ShuffleVectorInst) // shuffle two vectors.
+HANDLE_OTHER_INST(64, ExtractValue, ExtractValueInst)// extract from aggregate
+HANDLE_OTHER_INST(65, InsertValue, InsertValueInst) // insert into aggregate
+HANDLE_OTHER_INST(66, LandingPad, LandingPadInst) // Landing pad instruction.
+ LAST_OTHER_INST(66)
#undef FIRST_TERM_INST
#undef HANDLE_TERM_INST
diff --git a/include/llvm/IR/Instruction.h b/include/llvm/IR/Instruction.h
index 5e78cb1edf02..6a9a74bd16f0 100644
--- a/include/llvm/IR/Instruction.h
+++ b/include/llvm/IR/Instruction.h
@@ -1,9 +1,8 @@
//===-- llvm/Instruction.h - Instruction class definition -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -136,6 +135,9 @@ public:
bool isExceptionalTerminator() const {
return isExceptionalTerminator(getOpcode());
}
+ bool isIndirectTerminator() const {
+ return isIndirectTerminator(getOpcode());
+ }
static const char* getOpcodeName(unsigned OpCode);
@@ -203,6 +205,17 @@ public:
}
}
+ /// Returns true if the OpCode is a terminator with indirect targets.
+ static inline bool isIndirectTerminator(unsigned OpCode) {
+ switch (OpCode) {
+ case Instruction::IndirectBr:
+ case Instruction::CallBr:
+ return true;
+ default:
+ return false;
+ }
+ }
+
//===--------------------------------------------------------------------===//
// Metadata manipulation.
//===--------------------------------------------------------------------===//
@@ -298,9 +311,6 @@ public:
/// Returns false if no metadata was found.
bool extractProfTotalWeight(uint64_t &TotalVal) const;
- /// Updates branch_weights metadata by scaling it by \p S / \p T.
- void updateProfWeight(uint64_t S, uint64_t T);
-
/// Sets the branch_weights metadata to \p W for CallInst.
void setProfWeight(uint64_t W);
@@ -655,6 +665,10 @@ public:
/// instruction must be a terminator.
void setSuccessor(unsigned Idx, BasicBlock *BB);
+ /// Replace specified successor OldBB to point at the provided block.
+ /// This instruction must be a terminator.
+ void replaceSuccessorWith(BasicBlock *OldBB, BasicBlock *NewBB);
+
/// Methods for support type inquiry through isa, cast, and dyn_cast:
static bool classof(const Value *V) {
return V->getValueID() >= Value::InstructionVal;
diff --git a/include/llvm/IR/Instructions.h b/include/llvm/IR/Instructions.h
index 0ff8f56f213a..215ce45c7b75 100644
--- a/include/llvm/IR/Instructions.h
+++ b/include/llvm/IR/Instructions.h
@@ -1,9 +1,8 @@
//===- llvm/Instructions.h - Instruction subclass definitions ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -522,9 +521,11 @@ private:
// AtomicCmpXchgInst Class
//===----------------------------------------------------------------------===//
-/// an instruction that atomically checks whether a
+/// An instruction that atomically checks whether a
/// specified value is in a memory location, and, if it is, stores a new value
-/// there. Returns the value that was loaded.
+/// there. The value returned by this instruction is a pair containing the
+/// original value as first element, and an i1 indicating success (true) or
+/// failure (false) as second element.
///
class AtomicCmpXchgInst : public Instruction {
void Init(Value *Ptr, Value *Cmp, Value *NewVal,
@@ -725,8 +726,14 @@ public:
/// *p = old <unsigned v ? old : v
UMin,
+ /// *p = old + v
+ FAdd,
+
+ /// *p = old - v
+ FSub,
+
FIRST_BINOP = Xchg,
- LAST_BINOP = UMin,
+ LAST_BINOP = FSub,
BAD_BINOP
};
@@ -748,6 +755,16 @@ public:
static StringRef getOperationName(BinOp Op);
+ static bool isFPOperation(BinOp Op) {
+ switch (Op) {
+ case AtomicRMWInst::FAdd:
+ case AtomicRMWInst::FSub:
+ return true;
+ default:
+ return false;
+ }
+ }
+
void setOperation(BinOp Operation) {
unsigned short SubclassData = getSubclassDataFromInstruction();
setInstructionSubclassData((SubclassData & 31) |
@@ -805,6 +822,10 @@ public:
return getPointerOperand()->getType()->getPointerAddressSpace();
}
+ bool isFloatingPointOperation() const {
+ return isFPOperation(getOperation());
+ }
+
// Methods for support type inquiry through isa, cast, and dyn_cast:
static bool classof(const Instruction *I) {
return I->getOpcode() == Instruction::AtomicRMW;
@@ -1115,71 +1136,6 @@ GetElementPtrInst::GetElementPtrInst(Type *PointeeType, Value *Ptr,
DEFINE_TRANSPARENT_OPERAND_ACCESSORS(GetElementPtrInst, Value)
//===----------------------------------------------------------------------===//
-// UnaryOperator Class
-//===----------------------------------------------------------------------===//
-
-/// a unary instruction
-class UnaryOperator : public UnaryInstruction {
- void AssertOK();
-
-protected:
- UnaryOperator(UnaryOps iType, Value *S, Type *Ty,
- const Twine &Name, Instruction *InsertBefore);
- UnaryOperator(UnaryOps iType, Value *S, Type *Ty,
- const Twine &Name, BasicBlock *InsertAtEnd);
-
- // Note: Instruction needs to be a friend here to call cloneImpl.
- friend class Instruction;
-
- UnaryOperator *cloneImpl() const;
-
-public:
-
- /// Construct a unary instruction, given the opcode and an operand.
- /// Optionally (if InstBefore is specified) insert the instruction
- /// into a BasicBlock right before the specified instruction. The specified
- /// Instruction is allowed to be a dereferenced end iterator.
- ///
- static UnaryOperator *Create(UnaryOps Op, Value *S,
- const Twine &Name = Twine(),
- Instruction *InsertBefore = nullptr);
-
- /// Construct a unary instruction, given the opcode and an operand.
- /// Also automatically insert this instruction to the end of the
- /// BasicBlock specified.
- ///
- static UnaryOperator *Create(UnaryOps Op, Value *S,
- const Twine &Name,
- BasicBlock *InsertAtEnd);
-
- /// These methods just forward to Create, and are useful when you
- /// statically know what type of instruction you're going to create. These
- /// helpers just save some typing.
-#define HANDLE_UNARY_INST(N, OPC, CLASS) \
- static UnaryInstruction *Create##OPC(Value *V, \
- const Twine &Name = "") {\
- return Create(Instruction::OPC, V, Name);\
- }
-#include "llvm/IR/Instruction.def"
-#define HANDLE_UNARY_INST(N, OPC, CLASS) \
- static UnaryInstruction *Create##OPC(Value *V, \
- const Twine &Name, BasicBlock *BB) {\
- return Create(Instruction::OPC, V, Name, BB);\
- }
-#include "llvm/IR/Instruction.def"
-#define HANDLE_UNARY_INST(N, OPC, CLASS) \
- static UnaryInstruction *Create##OPC(Value *V, \
- const Twine &Name, Instruction *I) {\
- return Create(Instruction::OPC, V, Name, I);\
- }
-#include "llvm/IR/Instruction.def"
-
- UnaryOps getOpcode() const {
- return static_cast<UnaryOps>(Instruction::getOpcode());
- }
-};
-
-//===----------------------------------------------------------------------===//
// ICmpInst Class
//===----------------------------------------------------------------------===//
@@ -1524,25 +1480,44 @@ public:
CallInst(Ty, Func, Args, Bundles, NameStr, InsertAtEnd);
}
- static CallInst *Create(Function *Func, const Twine &NameStr = "",
+ static CallInst *Create(FunctionCallee Func, const Twine &NameStr = "",
Instruction *InsertBefore = nullptr) {
- return Create(Func->getFunctionType(), Func, NameStr, InsertBefore);
+ return Create(Func.getFunctionType(), Func.getCallee(), NameStr,
+ InsertBefore);
}
- static CallInst *Create(Function *Func, ArrayRef<Value *> Args,
+ static CallInst *Create(FunctionCallee Func, ArrayRef<Value *> Args,
+ ArrayRef<OperandBundleDef> Bundles = None,
const Twine &NameStr = "",
Instruction *InsertBefore = nullptr) {
- return Create(Func->getFunctionType(), Func, Args, NameStr, InsertBefore);
+ return Create(Func.getFunctionType(), Func.getCallee(), Args, Bundles,
+ NameStr, InsertBefore);
}
- static CallInst *Create(Function *Func, const Twine &NameStr,
+ static CallInst *Create(FunctionCallee Func, ArrayRef<Value *> Args,
+ const Twine &NameStr,
+ Instruction *InsertBefore = nullptr) {
+ return Create(Func.getFunctionType(), Func.getCallee(), Args, NameStr,
+ InsertBefore);
+ }
+
+ static CallInst *Create(FunctionCallee Func, const Twine &NameStr,
BasicBlock *InsertAtEnd) {
- return Create(Func->getFunctionType(), Func, NameStr, InsertAtEnd);
+ return Create(Func.getFunctionType(), Func.getCallee(), NameStr,
+ InsertAtEnd);
}
- static CallInst *Create(Function *Func, ArrayRef<Value *> Args,
+ static CallInst *Create(FunctionCallee Func, ArrayRef<Value *> Args,
const Twine &NameStr, BasicBlock *InsertAtEnd) {
- return Create(Func->getFunctionType(), Func, Args, NameStr, InsertAtEnd);
+ return Create(Func.getFunctionType(), Func.getCallee(), Args, NameStr,
+ InsertAtEnd);
+ }
+
+ static CallInst *Create(FunctionCallee Func, ArrayRef<Value *> Args,
+ ArrayRef<OperandBundleDef> Bundles,
+ const Twine &NameStr, BasicBlock *InsertAtEnd) {
+ return Create(Func.getFunctionType(), Func.getCallee(), Args, Bundles,
+ NameStr, InsertAtEnd);
}
// Deprecated [opaque pointer types]
@@ -1684,9 +1659,6 @@ public:
addAttribute(AttributeList::FunctionIndex, Attribute::ReturnsTwice);
}
- /// Check if this call is an inline asm statement.
- bool isInlineAsm() const { return isa<InlineAsm>(getCalledOperand()); }
-
// Methods for support type inquiry through isa, cast, and dyn_cast:
static bool classof(const Instruction *I) {
return I->getOpcode() == Instruction::Call;
@@ -1695,6 +1667,9 @@ public:
return isa<Instruction>(V) && classof(cast<Instruction>(V));
}
+ /// Updates profile metadata by scaling it by \p S / \p T.
+ void updateProfWeight(uint64_t S, uint64_t T);
+
private:
// Shadow Instruction::setInstructionSubclassData with a private forwarding
// method so that subclasses cannot accidentally use it.
@@ -2008,6 +1983,10 @@ public:
return User::operator new(s, 3);
}
+ /// Swap the first 2 operands and adjust the mask to preserve the semantics
+ /// of the instruction.
+ void commute();
+
/// Return true if a shufflevector instruction can be
/// formed with the specified operands.
static bool isValidOperands(const Value *V1, const Value *V2,
@@ -2696,6 +2675,14 @@ public:
block_begin()[i] = BB;
}
+ /// Replace every incoming basic block \p Old to basic block \p New.
+ void replaceIncomingBlockWith(const BasicBlock *Old, BasicBlock *New) {
+ assert(New && Old && "PHI node got a null basic block!");
+ for (unsigned Op = 0, NumOps = getNumOperands(); Op != NumOps; ++Op)
+ if (getIncomingBlock(Op) == Old)
+ setIncomingBlock(Op, New);
+ }
+
/// Add an incoming value to the end of the PHI list
///
void addIncoming(Value *V, BasicBlock *BB) {
@@ -2739,6 +2726,19 @@ public:
return getIncomingValue(Idx);
}
+ /// Set every incoming value(s) for block \p BB to \p V.
+ void setIncomingValueForBlock(const BasicBlock *BB, Value *V) {
+ assert(BB && "PHI node got a null basic block!");
+ bool Found = false;
+ for (unsigned Op = 0, NumOps = getNumOperands(); Op != NumOps; ++Op)
+ if (getIncomingBlock(Op) == BB) {
+ Found = true;
+ setIncomingValue(Op, V);
+ }
+ (void)Found;
+ assert(Found && "Invalid basic block argument to set!");
+ }
+
/// If the specified PHI node always merges together the
/// same value, return the value, otherwise return null.
Value *hasConstantValue() const;
@@ -3450,6 +3450,60 @@ public:
}
};
+/// A wrapper class to simplify modification of SwitchInst cases along with
+/// their prof branch_weights metadata.
+class SwitchInstProfUpdateWrapper {
+ SwitchInst &SI;
+ Optional<SmallVector<uint32_t, 8> > Weights = None;
+
+ // Sticky invalid state is needed to safely ignore operations with prof data
+ // in cases where SwitchInstProfUpdateWrapper is created from SwitchInst
+ // with inconsistent prof data. TODO: once we fix all prof data
+ // inconsistencies we can turn invalid state to assertions.
+ enum {
+ Invalid,
+ Initialized,
+ Changed
+ } State = Invalid;
+
+protected:
+ static MDNode *getProfBranchWeightsMD(const SwitchInst &SI);
+
+ MDNode *buildProfBranchWeightsMD();
+
+ void init();
+
+public:
+ using CaseWeightOpt = Optional<uint32_t>;
+ SwitchInst *operator->() { return &SI; }
+ SwitchInst &operator*() { return SI; }
+ operator SwitchInst *() { return &SI; }
+
+ SwitchInstProfUpdateWrapper(SwitchInst &SI) : SI(SI) { init(); }
+
+ ~SwitchInstProfUpdateWrapper() {
+ if (State == Changed)
+ SI.setMetadata(LLVMContext::MD_prof, buildProfBranchWeightsMD());
+ }
+
+ /// Delegate the call to the underlying SwitchInst::removeCase() and remove
+ /// correspondent branch weight.
+ SwitchInst::CaseIt removeCase(SwitchInst::CaseIt I);
+
+ /// Delegate the call to the underlying SwitchInst::addCase() and set the
+ /// specified branch weight for the added case.
+ void addCase(ConstantInt *OnVal, BasicBlock *Dest, CaseWeightOpt W);
+
+ /// Delegate the call to the underlying SwitchInst::eraseFromParent() and mark
+ /// this object to not touch the underlying SwitchInst in destructor.
+ SymbolTableList<Instruction>::iterator eraseFromParent();
+
+ void setSuccessorWeight(unsigned idx, CaseWeightOpt W);
+ CaseWeightOpt getSuccessorWeight(unsigned idx);
+
+ static CaseWeightOpt getSuccessorWeight(const SwitchInst &SI, unsigned idx);
+};
+
template <>
struct OperandTraits<SwitchInst> : public HungoffOperandTraits<2> {
};
@@ -3688,36 +3742,36 @@ public:
NameStr, InsertAtEnd);
}
- static InvokeInst *Create(Function *Func, BasicBlock *IfNormal,
+ static InvokeInst *Create(FunctionCallee Func, BasicBlock *IfNormal,
BasicBlock *IfException, ArrayRef<Value *> Args,
const Twine &NameStr,
Instruction *InsertBefore = nullptr) {
- return Create(Func->getFunctionType(), Func, IfNormal, IfException, Args,
- None, NameStr, InsertBefore);
+ return Create(Func.getFunctionType(), Func.getCallee(), IfNormal,
+ IfException, Args, None, NameStr, InsertBefore);
}
- static InvokeInst *Create(Function *Func, BasicBlock *IfNormal,
+ static InvokeInst *Create(FunctionCallee Func, BasicBlock *IfNormal,
BasicBlock *IfException, ArrayRef<Value *> Args,
ArrayRef<OperandBundleDef> Bundles = None,
const Twine &NameStr = "",
Instruction *InsertBefore = nullptr) {
- return Create(Func->getFunctionType(), Func, IfNormal, IfException, Args,
- Bundles, NameStr, InsertBefore);
+ return Create(Func.getFunctionType(), Func.getCallee(), IfNormal,
+ IfException, Args, Bundles, NameStr, InsertBefore);
}
- static InvokeInst *Create(Function *Func, BasicBlock *IfNormal,
+ static InvokeInst *Create(FunctionCallee Func, BasicBlock *IfNormal,
BasicBlock *IfException, ArrayRef<Value *> Args,
const Twine &NameStr, BasicBlock *InsertAtEnd) {
- return Create(Func->getFunctionType(), Func, IfNormal, IfException, Args,
- NameStr, InsertAtEnd);
+ return Create(Func.getFunctionType(), Func.getCallee(), IfNormal,
+ IfException, Args, NameStr, InsertAtEnd);
}
- static InvokeInst *Create(Function *Func, BasicBlock *IfNormal,
+ static InvokeInst *Create(FunctionCallee Func, BasicBlock *IfNormal,
BasicBlock *IfException, ArrayRef<Value *> Args,
ArrayRef<OperandBundleDef> Bundles,
const Twine &NameStr, BasicBlock *InsertAtEnd) {
- return Create(Func->getFunctionType(), Func, IfNormal, IfException, Args,
- Bundles, NameStr, InsertAtEnd);
+ return Create(Func.getFunctionType(), Func.getCallee(), IfNormal,
+ IfException, Args, Bundles, NameStr, InsertAtEnd);
}
// Deprecated [opaque pointer types]
@@ -3852,6 +3906,249 @@ InvokeInst::InvokeInst(FunctionType *Ty, Value *Func, BasicBlock *IfNormal,
}
//===----------------------------------------------------------------------===//
+// CallBrInst Class
+//===----------------------------------------------------------------------===//
+
+/// CallBr instruction, tracking function calls that may not return control but
+/// instead transfer it to a third location. The SubclassData field is used to
+/// hold the calling convention of the call.
+///
+class CallBrInst : public CallBase {
+
+ unsigned NumIndirectDests;
+
+ CallBrInst(const CallBrInst &BI);
+
+ /// Construct a CallBrInst given a range of arguments.
+ ///
+ /// Construct a CallBrInst from a range of arguments
+ inline CallBrInst(FunctionType *Ty, Value *Func, BasicBlock *DefaultDest,
+ ArrayRef<BasicBlock *> IndirectDests,
+ ArrayRef<Value *> Args,
+ ArrayRef<OperandBundleDef> Bundles, int NumOperands,
+ const Twine &NameStr, Instruction *InsertBefore);
+
+ inline CallBrInst(FunctionType *Ty, Value *Func, BasicBlock *DefaultDest,
+ ArrayRef<BasicBlock *> IndirectDests,
+ ArrayRef<Value *> Args,
+ ArrayRef<OperandBundleDef> Bundles, int NumOperands,
+ const Twine &NameStr, BasicBlock *InsertAtEnd);
+
+ void init(FunctionType *FTy, Value *Func, BasicBlock *DefaultDest,
+ ArrayRef<BasicBlock *> IndirectDests, ArrayRef<Value *> Args,
+ ArrayRef<OperandBundleDef> Bundles, const Twine &NameStr);
+
+ /// Compute the number of operands to allocate.
+ static int ComputeNumOperands(int NumArgs, int NumIndirectDests,
+ int NumBundleInputs = 0) {
+ // We need one operand for the called function, plus our extra operands and
+ // the input operand counts provided.
+ return 2 + NumIndirectDests + NumArgs + NumBundleInputs;
+ }
+
+protected:
+ // Note: Instruction needs to be a friend here to call cloneImpl.
+ friend class Instruction;
+
+ CallBrInst *cloneImpl() const;
+
+public:
+ static CallBrInst *Create(FunctionType *Ty, Value *Func,
+ BasicBlock *DefaultDest,
+ ArrayRef<BasicBlock *> IndirectDests,
+ ArrayRef<Value *> Args, const Twine &NameStr,
+ Instruction *InsertBefore = nullptr) {
+ int NumOperands = ComputeNumOperands(Args.size(), IndirectDests.size());
+ return new (NumOperands)
+ CallBrInst(Ty, Func, DefaultDest, IndirectDests, Args, None,
+ NumOperands, NameStr, InsertBefore);
+ }
+
+ static CallBrInst *Create(FunctionType *Ty, Value *Func,
+ BasicBlock *DefaultDest,
+ ArrayRef<BasicBlock *> IndirectDests,
+ ArrayRef<Value *> Args,
+ ArrayRef<OperandBundleDef> Bundles = None,
+ const Twine &NameStr = "",
+ Instruction *InsertBefore = nullptr) {
+ int NumOperands = ComputeNumOperands(Args.size(), IndirectDests.size(),
+ CountBundleInputs(Bundles));
+ unsigned DescriptorBytes = Bundles.size() * sizeof(BundleOpInfo);
+
+ return new (NumOperands, DescriptorBytes)
+ CallBrInst(Ty, Func, DefaultDest, IndirectDests, Args, Bundles,
+ NumOperands, NameStr, InsertBefore);
+ }
+
+ static CallBrInst *Create(FunctionType *Ty, Value *Func,
+ BasicBlock *DefaultDest,
+ ArrayRef<BasicBlock *> IndirectDests,
+ ArrayRef<Value *> Args, const Twine &NameStr,
+ BasicBlock *InsertAtEnd) {
+ int NumOperands = ComputeNumOperands(Args.size(), IndirectDests.size());
+ return new (NumOperands)
+ CallBrInst(Ty, Func, DefaultDest, IndirectDests, Args, None,
+ NumOperands, NameStr, InsertAtEnd);
+ }
+
+ static CallBrInst *Create(FunctionType *Ty, Value *Func,
+ BasicBlock *DefaultDest,
+ ArrayRef<BasicBlock *> IndirectDests,
+ ArrayRef<Value *> Args,
+ ArrayRef<OperandBundleDef> Bundles,
+ const Twine &NameStr, BasicBlock *InsertAtEnd) {
+ int NumOperands = ComputeNumOperands(Args.size(), IndirectDests.size(),
+ CountBundleInputs(Bundles));
+ unsigned DescriptorBytes = Bundles.size() * sizeof(BundleOpInfo);
+
+ return new (NumOperands, DescriptorBytes)
+ CallBrInst(Ty, Func, DefaultDest, IndirectDests, Args, Bundles,
+ NumOperands, NameStr, InsertAtEnd);
+ }
+
+ static CallBrInst *Create(FunctionCallee Func, BasicBlock *DefaultDest,
+ ArrayRef<BasicBlock *> IndirectDests,
+ ArrayRef<Value *> Args, const Twine &NameStr,
+ Instruction *InsertBefore = nullptr) {
+ return Create(Func.getFunctionType(), Func.getCallee(), DefaultDest,
+ IndirectDests, Args, NameStr, InsertBefore);
+ }
+
+ static CallBrInst *Create(FunctionCallee Func, BasicBlock *DefaultDest,
+ ArrayRef<BasicBlock *> IndirectDests,
+ ArrayRef<Value *> Args,
+ ArrayRef<OperandBundleDef> Bundles = None,
+ const Twine &NameStr = "",
+ Instruction *InsertBefore = nullptr) {
+ return Create(Func.getFunctionType(), Func.getCallee(), DefaultDest,
+ IndirectDests, Args, Bundles, NameStr, InsertBefore);
+ }
+
+ static CallBrInst *Create(FunctionCallee Func, BasicBlock *DefaultDest,
+ ArrayRef<BasicBlock *> IndirectDests,
+ ArrayRef<Value *> Args, const Twine &NameStr,
+ BasicBlock *InsertAtEnd) {
+ return Create(Func.getFunctionType(), Func.getCallee(), DefaultDest,
+ IndirectDests, Args, NameStr, InsertAtEnd);
+ }
+
+ static CallBrInst *Create(FunctionCallee Func,
+ BasicBlock *DefaultDest,
+ ArrayRef<BasicBlock *> IndirectDests,
+ ArrayRef<Value *> Args,
+ ArrayRef<OperandBundleDef> Bundles,
+ const Twine &NameStr, BasicBlock *InsertAtEnd) {
+ return Create(Func.getFunctionType(), Func.getCallee(), DefaultDest,
+ IndirectDests, Args, Bundles, NameStr, InsertAtEnd);
+ }
+
+ /// Create a clone of \p CBI with a different set of operand bundles and
+ /// insert it before \p InsertPt.
+ ///
+ /// The returned callbr instruction is identical to \p CBI in every way
+ /// except that the operand bundles for the new instruction are set to the
+ /// operand bundles in \p Bundles.
+ static CallBrInst *Create(CallBrInst *CBI,
+ ArrayRef<OperandBundleDef> Bundles,
+ Instruction *InsertPt = nullptr);
+
+ /// Return the number of callbr indirect dest labels.
+ ///
+ unsigned getNumIndirectDests() const { return NumIndirectDests; }
+
+ /// getIndirectDestLabel - Return the i-th indirect dest label.
+ ///
+ Value *getIndirectDestLabel(unsigned i) const {
+ assert(i < getNumIndirectDests() && "Out of bounds!");
+ return getOperand(i + getNumArgOperands() + getNumTotalBundleOperands() +
+ 1);
+ }
+
+ Value *getIndirectDestLabelUse(unsigned i) const {
+ assert(i < getNumIndirectDests() && "Out of bounds!");
+ return getOperandUse(i + getNumArgOperands() + getNumTotalBundleOperands() +
+ 1);
+ }
+
+ // Return the destination basic blocks...
+ BasicBlock *getDefaultDest() const {
+ return cast<BasicBlock>(*(&Op<-1>() - getNumIndirectDests() - 1));
+ }
+ BasicBlock *getIndirectDest(unsigned i) const {
+ return cast<BasicBlock>(*(&Op<-1>() - getNumIndirectDests() + i));
+ }
+ SmallVector<BasicBlock *, 16> getIndirectDests() const {
+ SmallVector<BasicBlock *, 16> IndirectDests;
+ for (unsigned i = 0, e = getNumIndirectDests(); i < e; ++i)
+ IndirectDests.push_back(getIndirectDest(i));
+ return IndirectDests;
+ }
+ void setDefaultDest(BasicBlock *B) {
+ *(&Op<-1>() - getNumIndirectDests() - 1) = reinterpret_cast<Value *>(B);
+ }
+ void setIndirectDest(unsigned i, BasicBlock *B) {
+ *(&Op<-1>() - getNumIndirectDests() + i) = reinterpret_cast<Value *>(B);
+ }
+
+ BasicBlock *getSuccessor(unsigned i) const {
+ assert(i < getNumSuccessors() + 1 &&
+ "Successor # out of range for callbr!");
+ return i == 0 ? getDefaultDest() : getIndirectDest(i - 1);
+ }
+
+ void setSuccessor(unsigned idx, BasicBlock *NewSucc) {
+ assert(idx < getNumIndirectDests() + 1 &&
+ "Successor # out of range for callbr!");
+ *(&Op<-1>() - getNumIndirectDests() -1 + idx) =
+ reinterpret_cast<Value *>(NewSucc);
+ }
+
+ unsigned getNumSuccessors() const { return getNumIndirectDests() + 1; }
+
+ // Methods for support type inquiry through isa, cast, and dyn_cast:
+ static bool classof(const Instruction *I) {
+ return (I->getOpcode() == Instruction::CallBr);
+ }
+ static bool classof(const Value *V) {
+ return isa<Instruction>(V) && classof(cast<Instruction>(V));
+ }
+
+private:
+
+ // Shadow Instruction::setInstructionSubclassData with a private forwarding
+ // method so that subclasses cannot accidentally use it.
+ void setInstructionSubclassData(unsigned short D) {
+ Instruction::setInstructionSubclassData(D);
+ }
+};
+
+CallBrInst::CallBrInst(FunctionType *Ty, Value *Func, BasicBlock *DefaultDest,
+ ArrayRef<BasicBlock *> IndirectDests,
+ ArrayRef<Value *> Args,
+ ArrayRef<OperandBundleDef> Bundles, int NumOperands,
+ const Twine &NameStr, Instruction *InsertBefore)
+ : CallBase(Ty->getReturnType(), Instruction::CallBr,
+ OperandTraits<CallBase>::op_end(this) - NumOperands, NumOperands,
+ InsertBefore) {
+ init(Ty, Func, DefaultDest, IndirectDests, Args, Bundles, NameStr);
+}
+
+CallBrInst::CallBrInst(FunctionType *Ty, Value *Func, BasicBlock *DefaultDest,
+ ArrayRef<BasicBlock *> IndirectDests,
+ ArrayRef<Value *> Args,
+ ArrayRef<OperandBundleDef> Bundles, int NumOperands,
+ const Twine &NameStr, BasicBlock *InsertAtEnd)
+ : CallBase(
+ cast<FunctionType>(
+ cast<PointerType>(Func->getType())->getElementType())
+ ->getReturnType(),
+ Instruction::CallBr,
+ OperandTraits<CallBase>::op_end(this) - NumOperands, NumOperands,
+ InsertAtEnd) {
+ init(Ty, Func, DefaultDest, IndirectDests, Args, Bundles, NameStr);
+}
+
+//===----------------------------------------------------------------------===//
// ResumeInst Class
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/IR/IntrinsicInst.h b/include/llvm/IR/IntrinsicInst.h
index 80a7a7052574..438bdb29b706 100644
--- a/include/llvm/IR/IntrinsicInst.h
+++ b/include/llvm/IR/IntrinsicInst.h
@@ -1,9 +1,8 @@
//===-- llvm/IntrinsicInst.h - Intrinsic Instruction Wrappers ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -209,26 +208,47 @@ namespace llvm {
/// This is the common base class for constrained floating point intrinsics.
class ConstrainedFPIntrinsic : public IntrinsicInst {
public:
- enum RoundingMode {
- rmInvalid,
- rmDynamic,
- rmToNearest,
- rmDownward,
- rmUpward,
- rmTowardZero
+ /// Specifies the rounding mode to be assumed. This is only used when
+ /// when constrained floating point is enabled. See the LLVM Language
+ /// Reference Manual for details.
+ enum RoundingMode : uint8_t {
+ rmDynamic, ///< This corresponds to "fpround.dynamic".
+ rmToNearest, ///< This corresponds to "fpround.tonearest".
+ rmDownward, ///< This corresponds to "fpround.downward".
+ rmUpward, ///< This corresponds to "fpround.upward".
+ rmTowardZero ///< This corresponds to "fpround.tozero".
};
- enum ExceptionBehavior {
- ebInvalid,
- ebIgnore,
- ebMayTrap,
- ebStrict
+ /// Specifies the required exception behavior. This is only used when
+ /// when constrained floating point is used. See the LLVM Language
+ /// Reference Manual for details.
+ enum ExceptionBehavior : uint8_t {
+ ebIgnore, ///< This corresponds to "fpexcept.ignore".
+ ebMayTrap, ///< This corresponds to "fpexcept.maytrap".
+ ebStrict ///< This corresponds to "fpexcept.strict".
};
bool isUnaryOp() const;
bool isTernaryOp() const;
- RoundingMode getRoundingMode() const;
- ExceptionBehavior getExceptionBehavior() const;
+ Optional<RoundingMode> getRoundingMode() const;
+ Optional<ExceptionBehavior> getExceptionBehavior() const;
+
+ /// Returns a valid RoundingMode enumerator when given a string
+ /// that is valid as input in constrained intrinsic rounding mode
+ /// metadata.
+ static Optional<RoundingMode> StrToRoundingMode(StringRef);
+
+ /// For any RoundingMode enumerator, returns a string valid as input in
+ /// constrained intrinsic rounding mode metadata.
+ static Optional<StringRef> RoundingModeToStr(RoundingMode);
+
+ /// Returns a valid ExceptionBehavior enumerator when given a string
+ /// valid as input in constrained intrinsic exception behavior metadata.
+ static Optional<ExceptionBehavior> StrToExceptionBehavior(StringRef);
+
+ /// For any ExceptionBehavior enumerator, returns a string valid as
+ /// input in constrained intrinsic exception behavior metadata.
+ static Optional<StringRef> ExceptionBehaviorToStr(ExceptionBehavior);
// Methods for support type inquiry through isa, cast, and dyn_cast:
static bool classof(const IntrinsicInst *I) {
@@ -239,6 +259,8 @@ namespace llvm {
case Intrinsic::experimental_constrained_fdiv:
case Intrinsic::experimental_constrained_frem:
case Intrinsic::experimental_constrained_fma:
+ case Intrinsic::experimental_constrained_fptrunc:
+ case Intrinsic::experimental_constrained_fpext:
case Intrinsic::experimental_constrained_sqrt:
case Intrinsic::experimental_constrained_pow:
case Intrinsic::experimental_constrained_powi:
@@ -266,6 +288,84 @@ namespace llvm {
}
};
+ /// This class represents an intrinsic that is based on a binary operation.
+ /// This includes op.with.overflow and saturating add/sub intrinsics.
+ class BinaryOpIntrinsic : public IntrinsicInst {
+ public:
+ static bool classof(const IntrinsicInst *I) {
+ switch (I->getIntrinsicID()) {
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::sadd_with_overflow:
+ case Intrinsic::usub_with_overflow:
+ case Intrinsic::ssub_with_overflow:
+ case Intrinsic::umul_with_overflow:
+ case Intrinsic::smul_with_overflow:
+ case Intrinsic::uadd_sat:
+ case Intrinsic::sadd_sat:
+ case Intrinsic::usub_sat:
+ case Intrinsic::ssub_sat:
+ return true;
+ default:
+ return false;
+ }
+ }
+ static bool classof(const Value *V) {
+ return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+ }
+
+ Value *getLHS() const { return const_cast<Value*>(getArgOperand(0)); }
+ Value *getRHS() const { return const_cast<Value*>(getArgOperand(1)); }
+
+ /// Returns the binary operation underlying the intrinsic.
+ Instruction::BinaryOps getBinaryOp() const;
+
+ /// Whether the intrinsic is signed or unsigned.
+ bool isSigned() const;
+
+ /// Returns one of OBO::NoSignedWrap or OBO::NoUnsignedWrap.
+ unsigned getNoWrapKind() const;
+ };
+
+ /// Represents an op.with.overflow intrinsic.
+ class WithOverflowInst : public BinaryOpIntrinsic {
+ public:
+ static bool classof(const IntrinsicInst *I) {
+ switch (I->getIntrinsicID()) {
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::sadd_with_overflow:
+ case Intrinsic::usub_with_overflow:
+ case Intrinsic::ssub_with_overflow:
+ case Intrinsic::umul_with_overflow:
+ case Intrinsic::smul_with_overflow:
+ return true;
+ default:
+ return false;
+ }
+ }
+ static bool classof(const Value *V) {
+ return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+ }
+ };
+
+ /// Represents a saturating add/sub intrinsic.
+ class SaturatingInst : public BinaryOpIntrinsic {
+ public:
+ static bool classof(const IntrinsicInst *I) {
+ switch (I->getIntrinsicID()) {
+ case Intrinsic::uadd_sat:
+ case Intrinsic::sadd_sat:
+ case Intrinsic::usub_sat:
+ case Intrinsic::ssub_sat:
+ return true;
+ default:
+ return false;
+ }
+ }
+ static bool classof(const Value *V) {
+ return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+ }
+ };
+
/// Common base class for all memory intrinsics. Simply provides
/// common methods.
/// Written as CRTP to avoid a common base class amongst the
diff --git a/include/llvm/IR/Intrinsics.h b/include/llvm/IR/Intrinsics.h
index e1e17f983ff8..f38f92022d21 100644
--- a/include/llvm/IR/Intrinsics.h
+++ b/include/llvm/IR/Intrinsics.h
@@ -1,9 +1,8 @@
-//===-- llvm/Instrinsics.h - LLVM Intrinsic Function Handling ---*- C++ -*-===//
+//===- Intrinsics.h - LLVM Intrinsic Function Handling ----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -100,7 +99,8 @@ namespace Intrinsic {
Void, VarArg, MMX, Token, Metadata, Half, Float, Double, Quad,
Integer, Vector, Pointer, Struct,
Argument, ExtendArgument, TruncArgument, HalfVecArgument,
- SameVecWidthArgument, PtrToArgument, PtrToElt, VecOfAnyPtrsToElt
+ SameVecWidthArgument, PtrToArgument, PtrToElt, VecOfAnyPtrsToElt,
+ VecElementArgument
} Kind;
union {
@@ -117,20 +117,22 @@ namespace Intrinsic {
AK_AnyInteger,
AK_AnyFloat,
AK_AnyVector,
- AK_AnyPointer
+ AK_AnyPointer,
+ AK_MatchType = 7
};
unsigned getArgumentNumber() const {
assert(Kind == Argument || Kind == ExtendArgument ||
Kind == TruncArgument || Kind == HalfVecArgument ||
Kind == SameVecWidthArgument || Kind == PtrToArgument ||
- Kind == PtrToElt);
+ Kind == PtrToElt || Kind == VecElementArgument);
return Argument_Info >> 3;
}
ArgKind getArgumentKind() const {
assert(Kind == Argument || Kind == ExtendArgument ||
Kind == TruncArgument || Kind == HalfVecArgument ||
- Kind == SameVecWidthArgument || Kind == PtrToArgument);
+ Kind == SameVecWidthArgument || Kind == PtrToArgument ||
+ Kind == VecElementArgument);
return (ArgKind)(Argument_Info & 7);
}
@@ -162,14 +164,21 @@ namespace Intrinsic {
/// of IITDescriptors.
void getIntrinsicInfoTableEntries(ID id, SmallVectorImpl<IITDescriptor> &T);
- /// Match the specified type (which comes from an intrinsic argument or return
- /// value) with the type constraints specified by the .td file. If the given
- /// type is an overloaded type it is pushed to the ArgTys vector.
+ enum MatchIntrinsicTypesResult {
+ MatchIntrinsicTypes_Match = 0,
+ MatchIntrinsicTypes_NoMatchRet = 1,
+ MatchIntrinsicTypes_NoMatchArg = 2,
+ };
+
+ /// Match the specified function type with the type constraints specified by
+ /// the .td file. If the given type is an overloaded type it is pushed to the
+ /// ArgTys vector.
///
/// Returns false if the given type matches with the constraints, true
/// otherwise.
- bool matchIntrinsicType(Type *Ty, ArrayRef<IITDescriptor> &Infos,
- SmallVectorImpl<Type*> &ArgTys);
+ MatchIntrinsicTypesResult
+ matchIntrinsicSignature(FunctionType *FTy, ArrayRef<IITDescriptor> &Infos,
+ SmallVectorImpl<Type *> &ArgTys);
/// Verify if the intrinsic has variable arguments. This method is intended to
/// be called after all the fixed arguments have been matched first.
diff --git a/include/llvm/IR/Intrinsics.td b/include/llvm/IR/Intrinsics.td
index 64603d8ea030..d660f8278437 100644
--- a/include/llvm/IR/Intrinsics.td
+++ b/include/llvm/IR/Intrinsics.td
@@ -1,9 +1,8 @@
//===- Intrinsics.td - Defines all LLVM intrinsics ---------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -70,6 +69,11 @@ class Returned<int argNo> : IntrinsicProperty {
int ArgNo = argNo;
}
+// ImmArg - The specified argument must be an immediate.
+class ImmArg<int argNo> : IntrinsicProperty {
+ int ArgNo = argNo;
+}
+
// ReadOnly - The specified argument pointer is not written to through the
// pointer by the intrinsic.
class ReadOnly<int argNo> : IntrinsicProperty {
@@ -90,6 +94,8 @@ class ReadNone<int argNo> : IntrinsicProperty {
def IntrNoReturn : IntrinsicProperty;
+def IntrWillReturn : IntrinsicProperty;
+
// IntrCold - Calls to this intrinsic are cold.
// Parallels the cold attribute on LLVM IR functions.
def IntrCold : IntrinsicProperty;
@@ -157,13 +163,19 @@ class LLVMMatchType<int num>
// the intrinsic is overloaded, so the matched type should be declared as iAny.
class LLVMExtendedType<int num> : LLVMMatchType<num>;
class LLVMTruncatedType<int num> : LLVMMatchType<num>;
-class LLVMVectorSameWidth<int num, LLVMType elty>
- : LLVMMatchType<num> {
+
+// Match the scalar/vector of another intrinsic parameter but with a different
+// element type. Either both are scalars or both are vectors with the same
+// number of elements.
+class LLVMScalarOrSameVectorWidth<int idx, LLVMType elty>
+ : LLVMMatchType<idx> {
ValueType ElTy = elty.VT;
}
+
class LLVMPointerTo<int num> : LLVMMatchType<num>;
class LLVMPointerToElt<int num> : LLVMMatchType<num>;
class LLVMVectorOfAnyPointersToElt<int num> : LLVMMatchType<num>;
+class LLVMVectorElementType<int num> : LLVMMatchType<num>;
// Match the type of another intrinsic parameter that is expected to be a
// vector type, but change the element count to be half as many
@@ -251,6 +263,7 @@ def llvm_v2f32_ty : LLVMType<v2f32>; // 2 x float
def llvm_v4f32_ty : LLVMType<v4f32>; // 4 x float
def llvm_v8f32_ty : LLVMType<v8f32>; // 8 x float
def llvm_v16f32_ty : LLVMType<v16f32>; // 16 x float
+def llvm_v32f32_ty : LLVMType<v32f32>; // 32 x float
def llvm_v1f64_ty : LLVMType<v1f64>; // 1 x double
def llvm_v2f64_ty : LLVMType<v2f64>; // 2 x double
def llvm_v4f64_ty : LLVMType<v4f64>; // 4 x double
@@ -393,9 +406,9 @@ def int_objc_arc_annotation_bottomup_bbend : Intrinsic<[],
//===--------------------- Code Generator Intrinsics ----------------------===//
//
-def int_returnaddress : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty], [IntrNoMem]>;
+def int_returnaddress : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty], [IntrNoMem, ImmArg<0>]>;
def int_addressofreturnaddress : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>;
-def int_frameaddress : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty], [IntrNoMem]>;
+def int_frameaddress : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty], [IntrNoMem, ImmArg<0>]>;
def int_sponentry : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>;
def int_read_register : Intrinsic<[llvm_anyint_ty], [llvm_metadata_ty],
[IntrReadMem], "llvm.read_register">;
@@ -413,7 +426,7 @@ def int_localescape : Intrinsic<[], [llvm_vararg_ty]>;
// to an escaped allocation indicated by the index.
def int_localrecover : Intrinsic<[llvm_ptr_ty],
[llvm_ptr_ty, llvm_ptr_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<2>]>;
// Given the frame pointer passed into an SEH filter function, returns a
// pointer to the local variable area suitable for use with llvm.localrecover.
@@ -439,7 +452,8 @@ def int_thread_pointer : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>,
// memory while not impeding optimization.
def int_prefetch
: Intrinsic<[], [ llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty ],
- [ IntrInaccessibleMemOrArgMemOnly, ReadOnly<0>, NoCapture<0> ]>;
+ [ IntrInaccessibleMemOrArgMemOnly, ReadOnly<0>, NoCapture<0>,
+ ImmArg<1>, ImmArg<2>]>;
def int_pcmarker : Intrinsic<[], [llvm_i32_ty]>;
def int_readcyclecounter : Intrinsic<[llvm_i64_ty]>;
@@ -480,16 +494,17 @@ def int_memcpy : Intrinsic<[],
[llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty,
llvm_i1_ty],
[IntrArgMemOnly, NoCapture<0>, NoCapture<1>,
- WriteOnly<0>, ReadOnly<1>]>;
+ WriteOnly<0>, ReadOnly<1>, ImmArg<3>]>;
def int_memmove : Intrinsic<[],
[llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty,
llvm_i1_ty],
[IntrArgMemOnly, NoCapture<0>, NoCapture<1>,
- ReadOnly<1>]>;
+ ReadOnly<1>, ImmArg<3>]>;
def int_memset : Intrinsic<[],
[llvm_anyptr_ty, llvm_i8_ty, llvm_anyint_ty,
llvm_i1_ty],
- [IntrArgMemOnly, NoCapture<0>, WriteOnly<0>]>;
+ [IntrArgMemOnly, NoCapture<0>, WriteOnly<0>,
+ ImmArg<3>]>;
// FIXME: Add version of these floating point intrinsics which allow non-default
// rounding modes and FP exception handling.
@@ -527,6 +542,11 @@ let IntrProperties = [IntrNoMem, IntrSpeculatable] in {
def int_round : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
def int_canonicalize : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>],
[IntrNoMem]>;
+
+ def int_lround : Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty]>;
+ def int_llround : Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty]>;
+ def int_lrint : Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty]>;
+ def int_llrint : Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty]>;
}
def int_minnum : Intrinsic<[llvm_anyfloat_ty],
@@ -554,8 +574,9 @@ def int_siglongjmp : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], [IntrNoReturn]>;
// Internal interface for object size checking
def int_objectsize : Intrinsic<[llvm_anyint_ty],
- [llvm_anyptr_ty, llvm_i1_ty, llvm_i1_ty],
- [IntrNoMem, IntrSpeculatable]>,
+ [llvm_anyptr_ty, llvm_i1_ty,
+ llvm_i1_ty, llvm_i1_ty],
+ [IntrNoMem, IntrSpeculatable, ImmArg<1>, ImmArg<2>, ImmArg<3>]>,
GCCBuiltin<"__builtin_object_size">;
//===--------------- Constrained Floating Point Intrinsics ----------------===//
@@ -595,6 +616,15 @@ let IntrProperties = [IntrInaccessibleMemOnly] in {
llvm_metadata_ty,
llvm_metadata_ty ]>;
+ def int_experimental_constrained_fptrunc : Intrinsic<[ llvm_anyfloat_ty ],
+ [ llvm_anyfloat_ty,
+ llvm_metadata_ty,
+ llvm_metadata_ty ]>;
+
+ def int_experimental_constrained_fpext : Intrinsic<[ llvm_anyfloat_ty ],
+ [ llvm_anyfloat_ty,
+ llvm_metadata_ty ]>;
+
// These intrinsics are sensitive to the rounding mode so we need constrained
// versions of each of them. When strict rounding and exception control are
// not required the non-constrained versions of these intrinsics should be
@@ -676,14 +706,12 @@ let IntrProperties = [IntrInaccessibleMemOnly] in {
llvm_metadata_ty,
llvm_metadata_ty ]>;
}
-// FIXME: Add intrinsics for fcmp, fptrunc, fpext, fptoui and fptosi.
-// FIXME: Add intrinsics for fabs and copysign?
-
+// FIXME: Add intrinsics for fcmp, fptoui and fptosi.
//===------------------------- Expect Intrinsics --------------------------===//
//
-def int_expect : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
- LLVMMatchType<0>], [IntrNoMem]>;
+def int_expect : Intrinsic<[llvm_anyint_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
//===-------------------- Bit Manipulation Intrinsics ---------------------===//
//
@@ -692,8 +720,6 @@ def int_expect : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
let IntrProperties = [IntrNoMem, IntrSpeculatable] in {
def int_bswap: Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>;
def int_ctpop: Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>;
- def int_ctlz : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>;
- def int_cttz : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>;
def int_bitreverse : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>;
def int_fshl : Intrinsic<[llvm_anyint_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>;
@@ -701,6 +727,11 @@ let IntrProperties = [IntrNoMem, IntrSpeculatable] in {
[LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>;
}
+let IntrProperties = [IntrNoMem, IntrSpeculatable, ImmArg<1>] in {
+ def int_ctlz : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>;
+ def int_cttz : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>;
+}
+
//===------------------------ Debugger Intrinsics -------------------------===//
//
@@ -797,24 +828,30 @@ def int_adjust_trampoline : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty],
//
// Expose the carry flag from add operations on two integrals.
-def int_sadd_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty],
+def int_sadd_with_overflow : Intrinsic<[llvm_anyint_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
[LLVMMatchType<0>, LLVMMatchType<0>],
[IntrNoMem, IntrSpeculatable]>;
-def int_uadd_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty],
+def int_uadd_with_overflow : Intrinsic<[llvm_anyint_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
[LLVMMatchType<0>, LLVMMatchType<0>],
[IntrNoMem, IntrSpeculatable]>;
-def int_ssub_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty],
+def int_ssub_with_overflow : Intrinsic<[llvm_anyint_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
[LLVMMatchType<0>, LLVMMatchType<0>],
[IntrNoMem, IntrSpeculatable]>;
-def int_usub_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty],
+def int_usub_with_overflow : Intrinsic<[llvm_anyint_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
[LLVMMatchType<0>, LLVMMatchType<0>],
[IntrNoMem, IntrSpeculatable]>;
-def int_smul_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty],
+def int_smul_with_overflow : Intrinsic<[llvm_anyint_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
[LLVMMatchType<0>, LLVMMatchType<0>],
[IntrNoMem, IntrSpeculatable]>;
-def int_umul_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty],
+def int_umul_with_overflow : Intrinsic<[llvm_anyint_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
[LLVMMatchType<0>, LLVMMatchType<0>],
[IntrNoMem, IntrSpeculatable]>;
@@ -837,23 +874,33 @@ def int_usub_sat : Intrinsic<[llvm_anyint_ty],
//
def int_smul_fix : Intrinsic<[llvm_anyint_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty],
- [IntrNoMem, IntrSpeculatable, Commutative]>;
+ [IntrNoMem, IntrSpeculatable, Commutative, ImmArg<2>]>;
+
+def int_umul_fix : Intrinsic<[llvm_anyint_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative, ImmArg<2>]>;
+
+//===------------------- Fixed Point Saturation Arithmetic Intrinsics ----------------===//
+//
+def int_smul_fix_sat : Intrinsic<[llvm_anyint_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative, ImmArg<2>]>;
//===------------------------- Memory Use Markers -------------------------===//
//
def int_lifetime_start : Intrinsic<[],
[llvm_i64_ty, llvm_anyptr_ty],
- [IntrArgMemOnly, NoCapture<1>]>;
+ [IntrArgMemOnly, NoCapture<1>, ImmArg<0>]>;
def int_lifetime_end : Intrinsic<[],
[llvm_i64_ty, llvm_anyptr_ty],
- [IntrArgMemOnly, NoCapture<1>]>;
+ [IntrArgMemOnly, NoCapture<1>, ImmArg<0>]>;
def int_invariant_start : Intrinsic<[llvm_descriptor_ty],
[llvm_i64_ty, llvm_anyptr_ty],
- [IntrArgMemOnly, NoCapture<1>]>;
+ [IntrArgMemOnly, NoCapture<1>, ImmArg<0>]>;
def int_invariant_end : Intrinsic<[],
[llvm_descriptor_ty, llvm_i64_ty,
llvm_anyptr_ty],
- [IntrArgMemOnly, NoCapture<2>]>;
+ [IntrArgMemOnly, NoCapture<2>, ImmArg<1>]>;
// launder.invariant.group can't be marked with 'readnone' (IntrNoMem),
// because it would cause CSE of two barriers with the same argument.
@@ -900,13 +947,13 @@ def int_experimental_gc_statepoint : Intrinsic<[llvm_token_ty],
[llvm_i64_ty, llvm_i32_ty,
llvm_anyptr_ty, llvm_i32_ty,
llvm_i32_ty, llvm_vararg_ty],
- [Throws]>;
+ [Throws, ImmArg<0>, ImmArg<1>, ImmArg<3>, ImmArg<4>]>;
def int_experimental_gc_result : Intrinsic<[llvm_any_ty], [llvm_token_ty],
[IntrReadMem]>;
def int_experimental_gc_relocate : Intrinsic<[llvm_any_ty],
[llvm_token_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrReadMem]>;
+ [IntrReadMem, ImmArg<1>, ImmArg<2>]>;
//===------------------------ Coroutine Intrinsics ---------------===//
// These are documented in docs/Coroutines.rst
@@ -996,41 +1043,41 @@ def int_clear_cache : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty],
// Intrinsic to detect whether its argument is a constant.
def int_is_constant : Intrinsic<[llvm_i1_ty], [llvm_any_ty], [IntrNoMem], "llvm.is.constant">;
-
//===-------------------------- Masked Intrinsics -------------------------===//
//
def int_masked_store : Intrinsic<[], [llvm_anyvector_ty,
LLVMAnyPointerType<LLVMMatchType<0>>,
llvm_i32_ty,
- LLVMVectorSameWidth<0, llvm_i1_ty>],
- [IntrArgMemOnly]>;
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
+ [IntrArgMemOnly, ImmArg<2>]>;
def int_masked_load : Intrinsic<[llvm_anyvector_ty],
[LLVMAnyPointerType<LLVMMatchType<0>>, llvm_i32_ty,
- LLVMVectorSameWidth<0, llvm_i1_ty>, LLVMMatchType<0>],
- [IntrReadMem, IntrArgMemOnly]>;
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<0>],
+ [IntrReadMem, IntrArgMemOnly, ImmArg<1>]>;
def int_masked_gather: Intrinsic<[llvm_anyvector_ty],
[LLVMVectorOfAnyPointersToElt<0>, llvm_i32_ty,
- LLVMVectorSameWidth<0, llvm_i1_ty>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
LLVMMatchType<0>],
- [IntrReadMem]>;
+ [IntrReadMem, ImmArg<1>]>;
def int_masked_scatter: Intrinsic<[],
[llvm_anyvector_ty,
LLVMVectorOfAnyPointersToElt<0>, llvm_i32_ty,
- LLVMVectorSameWidth<0, llvm_i1_ty>]>;
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
+ [ImmArg<2>]>;
def int_masked_expandload: Intrinsic<[llvm_anyvector_ty],
[LLVMPointerToElt<0>,
- LLVMVectorSameWidth<0, llvm_i1_ty>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
LLVMMatchType<0>],
[IntrReadMem]>;
def int_masked_compressstore: Intrinsic<[],
[llvm_anyvector_ty,
LLVMPointerToElt<0>,
- LLVMVectorSameWidth<0, llvm_i1_ty>],
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
[IntrArgMemOnly]>;
// Test whether a pointer is associated with a type metadata identifier.
@@ -1049,6 +1096,9 @@ def int_icall_branch_funnel : Intrinsic<[], [llvm_vararg_ty], []>;
def int_load_relative: Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_anyint_ty],
[IntrReadMem, IntrArgMemOnly]>;
+def int_hwasan_check_memaccess :
+ Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty, llvm_i32_ty], [IntrInaccessibleMemOnly, ImmArg<2>]>;
+
// Xray intrinsics
//===----------------------------------------------------------------------===//
// Custom event logging for x-ray.
@@ -1072,7 +1122,7 @@ def int_memcpy_element_unordered_atomic
],
[
IntrArgMemOnly, NoCapture<0>, NoCapture<1>, WriteOnly<0>,
- ReadOnly<1>
+ ReadOnly<1>, ImmArg<3>
]>;
// @llvm.memmove.element.unordered.atomic.*(dest, src, length, elementsize)
@@ -1083,62 +1133,105 @@ def int_memmove_element_unordered_atomic
],
[
IntrArgMemOnly, NoCapture<0>, NoCapture<1>, WriteOnly<0>,
- ReadOnly<1>
+ ReadOnly<1>, ImmArg<3>
]>;
// @llvm.memset.element.unordered.atomic.*(dest, value, length, elementsize)
def int_memset_element_unordered_atomic
: Intrinsic<[], [ llvm_anyptr_ty, llvm_i8_ty, llvm_anyint_ty, llvm_i32_ty ],
- [ IntrArgMemOnly, NoCapture<0>, WriteOnly<0> ]>;
+ [ IntrArgMemOnly, NoCapture<0>, WriteOnly<0>, ImmArg<3> ]>;
//===------------------------ Reduction Intrinsics ------------------------===//
//
-def int_experimental_vector_reduce_fadd : Intrinsic<[llvm_anyfloat_ty],
- [llvm_anyfloat_ty,
- llvm_anyvector_ty],
- [IntrNoMem]>;
-def int_experimental_vector_reduce_fmul : Intrinsic<[llvm_anyfloat_ty],
- [llvm_anyfloat_ty,
- llvm_anyvector_ty],
- [IntrNoMem]>;
-def int_experimental_vector_reduce_add : Intrinsic<[llvm_anyint_ty],
+def int_experimental_vector_reduce_v2_fadd : Intrinsic<[llvm_anyfloat_ty],
+ [LLVMMatchType<0>,
+ llvm_anyvector_ty],
+ [IntrNoMem]>;
+def int_experimental_vector_reduce_v2_fmul : Intrinsic<[llvm_anyfloat_ty],
+ [LLVMMatchType<0>,
+ llvm_anyvector_ty],
+ [IntrNoMem]>;
+def int_experimental_vector_reduce_add : Intrinsic<[LLVMVectorElementType<0>],
[llvm_anyvector_ty],
[IntrNoMem]>;
-def int_experimental_vector_reduce_mul : Intrinsic<[llvm_anyint_ty],
+def int_experimental_vector_reduce_mul : Intrinsic<[LLVMVectorElementType<0>],
[llvm_anyvector_ty],
[IntrNoMem]>;
-def int_experimental_vector_reduce_and : Intrinsic<[llvm_anyint_ty],
+def int_experimental_vector_reduce_and : Intrinsic<[LLVMVectorElementType<0>],
[llvm_anyvector_ty],
[IntrNoMem]>;
-def int_experimental_vector_reduce_or : Intrinsic<[llvm_anyint_ty],
+def int_experimental_vector_reduce_or : Intrinsic<[LLVMVectorElementType<0>],
[llvm_anyvector_ty],
[IntrNoMem]>;
-def int_experimental_vector_reduce_xor : Intrinsic<[llvm_anyint_ty],
+def int_experimental_vector_reduce_xor : Intrinsic<[LLVMVectorElementType<0>],
[llvm_anyvector_ty],
[IntrNoMem]>;
-def int_experimental_vector_reduce_smax : Intrinsic<[llvm_anyint_ty],
+def int_experimental_vector_reduce_smax : Intrinsic<[LLVMVectorElementType<0>],
[llvm_anyvector_ty],
[IntrNoMem]>;
-def int_experimental_vector_reduce_smin : Intrinsic<[llvm_anyint_ty],
+def int_experimental_vector_reduce_smin : Intrinsic<[LLVMVectorElementType<0>],
[llvm_anyvector_ty],
[IntrNoMem]>;
-def int_experimental_vector_reduce_umax : Intrinsic<[llvm_anyint_ty],
+def int_experimental_vector_reduce_umax : Intrinsic<[LLVMVectorElementType<0>],
[llvm_anyvector_ty],
[IntrNoMem]>;
-def int_experimental_vector_reduce_umin : Intrinsic<[llvm_anyint_ty],
+def int_experimental_vector_reduce_umin : Intrinsic<[LLVMVectorElementType<0>],
[llvm_anyvector_ty],
[IntrNoMem]>;
-def int_experimental_vector_reduce_fmax : Intrinsic<[llvm_anyfloat_ty],
+def int_experimental_vector_reduce_fmax : Intrinsic<[LLVMVectorElementType<0>],
[llvm_anyvector_ty],
[IntrNoMem]>;
-def int_experimental_vector_reduce_fmin : Intrinsic<[llvm_anyfloat_ty],
+def int_experimental_vector_reduce_fmin : Intrinsic<[LLVMVectorElementType<0>],
[llvm_anyvector_ty],
[IntrNoMem]>;
+//===---------- Intrinsics to control hardware supported loops ----------===//
+
+// Specify that the value given is the number of iterations that the next loop
+// will execute.
+def int_set_loop_iterations :
+ Intrinsic<[], [llvm_anyint_ty], [IntrNoDuplicate]>;
+
+// Specify that the value given is the number of iterations that the next loop
+// will execute. Also test that the given count is not zero, allowing it to
+// control entry to a 'while' loop.
+def int_test_set_loop_iterations :
+ Intrinsic<[llvm_i1_ty], [llvm_anyint_ty], [IntrNoDuplicate]>;
+
+// Decrement loop counter by the given argument. Return false if the loop
+// should exit.
+def int_loop_decrement :
+ Intrinsic<[llvm_i1_ty], [llvm_anyint_ty], [IntrNoDuplicate]>;
+
+// Decrement the first operand (the loop counter) by the second operand (the
+// maximum number of elements processed in an iteration). Return the remaining
+// number of iterations still to be executed. This is effectively a sub which
+// can be used with a phi, icmp and br to control the number of iterations
+// executed, as usual.
+def int_loop_decrement_reg :
+ Intrinsic<[llvm_anyint_ty],
+ [llvm_anyint_ty, llvm_anyint_ty], [IntrNoDuplicate]>;
+
//===----- Intrinsics that are used to provide predicate information -----===//
def int_ssa_copy : Intrinsic<[llvm_any_ty], [LLVMMatchType<0>],
[IntrNoMem, Returned<0>]>;
+
+//===------- Intrinsics that are used to preserve debug information -------===//
+
+def int_preserve_array_access_index : Intrinsic<[llvm_anyptr_ty],
+ [llvm_anyptr_ty, llvm_i32_ty,
+ llvm_i32_ty],
+ [IntrNoMem, ImmArg<1>, ImmArg<2>]>;
+def int_preserve_union_access_index : Intrinsic<[llvm_anyptr_ty],
+ [llvm_anyptr_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<1>]>;
+def int_preserve_struct_access_index : Intrinsic<[llvm_anyptr_ty],
+ [llvm_anyptr_ty, llvm_i32_ty,
+ llvm_i32_ty],
+ [IntrNoMem, ImmArg<1>,
+ ImmArg<2>]>;
+
//===----------------------------------------------------------------------===//
// Target-specific intrinsics
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/IR/IntrinsicsAArch64.td b/include/llvm/IR/IntrinsicsAArch64.td
index ff25750fe399..832aca4fd30f 100644
--- a/include/llvm/IR/IntrinsicsAArch64.td
+++ b/include/llvm/IR/IntrinsicsAArch64.td
@@ -1,9 +1,8 @@
//===- IntrinsicsAARCH64.td - Defines AARCH64 intrinsics ---*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -32,6 +31,8 @@ def int_aarch64_sdiv : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
def int_aarch64_udiv : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
LLVMMatchType<0>], [IntrNoMem]>;
+def int_aarch64_fjcvtzs : Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
+
//===----------------------------------------------------------------------===//
// HINT
@@ -290,6 +291,7 @@ let TargetPrefix = "aarch64", IntrProperties = [IntrNoMem] in {
// Pairwise Add
def int_aarch64_neon_addp : AdvSIMD_2VectorArg_Intrinsic;
+ def int_aarch64_neon_faddp : AdvSIMD_2VectorArg_Intrinsic;
// Long Pairwise Add
// FIXME: In theory, we shouldn't need intrinsics for saddlp or
@@ -462,12 +464,12 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
[IntrArgMemOnly, NoCapture<2>]>;
class AdvSIMD_2Vec_Load_Intrinsic
- : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
+ : Intrinsic<[LLVMMatchType<0>, llvm_anyvector_ty],
[LLVMAnyPointerType<LLVMMatchType<0>>],
[IntrReadMem, IntrArgMemOnly]>;
class AdvSIMD_2Vec_Load_Lane_Intrinsic
- : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
- [LLVMMatchType<0>, LLVMMatchType<0>,
+ : Intrinsic<[LLVMMatchType<0>, LLVMMatchType<0>],
+ [LLVMMatchType<0>, llvm_anyvector_ty,
llvm_i64_ty, llvm_anyptr_ty],
[IntrReadMem, IntrArgMemOnly]>;
class AdvSIMD_2Vec_Store_Intrinsic
@@ -480,12 +482,12 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
[IntrArgMemOnly, NoCapture<3>]>;
class AdvSIMD_3Vec_Load_Intrinsic
- : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>],
+ : Intrinsic<[LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty],
[LLVMAnyPointerType<LLVMMatchType<0>>],
[IntrReadMem, IntrArgMemOnly]>;
class AdvSIMD_3Vec_Load_Lane_Intrinsic
- : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>],
- [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>,
+ : Intrinsic<[LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
+ [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty,
llvm_i64_ty, llvm_anyptr_ty],
[IntrReadMem, IntrArgMemOnly]>;
class AdvSIMD_3Vec_Store_Intrinsic
@@ -499,15 +501,15 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
[IntrArgMemOnly, NoCapture<4>]>;
class AdvSIMD_4Vec_Load_Intrinsic
- : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
- LLVMMatchType<0>, LLVMMatchType<0>],
+ : Intrinsic<[LLVMMatchType<0>, LLVMMatchType<0>,
+ LLVMMatchType<0>, llvm_anyvector_ty],
[LLVMAnyPointerType<LLVMMatchType<0>>],
[IntrReadMem, IntrArgMemOnly]>;
class AdvSIMD_4Vec_Load_Lane_Intrinsic
- : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
+ : Intrinsic<[LLVMMatchType<0>, LLVMMatchType<0>,
LLVMMatchType<0>, LLVMMatchType<0>],
[LLVMMatchType<0>, LLVMMatchType<0>,
- LLVMMatchType<0>, LLVMMatchType<0>,
+ LLVMMatchType<0>, llvm_anyvector_ty,
llvm_i64_ty, llvm_anyptr_ty],
[IntrReadMem, IntrArgMemOnly]>;
class AdvSIMD_4Vec_Store_Intrinsic
@@ -684,3 +686,50 @@ def int_aarch64_crc32x : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i64_ty],
def int_aarch64_crc32cx : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i64_ty],
[IntrNoMem]>;
}
+
+//===----------------------------------------------------------------------===//
+// Memory Tagging Extensions (MTE) Intrinsics
+let TargetPrefix = "aarch64" in {
+def int_aarch64_irg : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_i64_ty],
+ [IntrInaccessibleMemOnly]>;
+def int_aarch64_addg : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_i64_ty],
+ [IntrNoMem]>;
+def int_aarch64_gmi : Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_i64_ty],
+ [IntrNoMem]>;
+def int_aarch64_ldg : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_ptr_ty],
+ [IntrReadMem]>;
+def int_aarch64_stg : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty],
+ [IntrWriteMem]>;
+def int_aarch64_subp : Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_ptr_ty],
+ [IntrNoMem]>;
+
+// The following are codegen-only intrinsics for stack instrumentation.
+
+// Generate a randomly tagged stack base pointer.
+def int_aarch64_irg_sp : Intrinsic<[llvm_ptr_ty], [llvm_i64_ty],
+ [IntrInaccessibleMemOnly]>;
+
+// Transfer pointer tag with offset.
+// ptr1 = tagp(ptr0, baseptr, tag_offset) returns a pointer where
+// * address is the address in ptr0
+// * tag is a function of (tag in baseptr, tag_offset).
+// Address bits in baseptr and tag bits in ptr0 are ignored.
+// When offset between ptr0 and baseptr is a compile time constant, this can be emitted as
+// ADDG ptr1, baseptr, (ptr0 - baseptr), tag_offset
+// It is intended that ptr0 is an alloca address, and baseptr is the direct output of llvm.aarch64.irg.sp.
+def int_aarch64_tagp : Intrinsic<[llvm_anyptr_ty], [LLVMMatchType<0>, llvm_ptr_ty, llvm_i64_ty],
+ [IntrNoMem, ImmArg<2>]>;
+
+// Update allocation tags for the memory range to match the tag in the pointer argument.
+def int_aarch64_settag : Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty],
+ [IntrWriteMem, IntrArgMemOnly, NoCapture<0>, WriteOnly<0>]>;
+
+// Update allocation tags for the memory range to match the tag in the pointer argument,
+// and set memory contents to zero.
+def int_aarch64_settag_zero : Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty],
+ [IntrWriteMem, IntrArgMemOnly, NoCapture<0>, WriteOnly<0>]>;
+
+// Update allocation tags for 16-aligned, 16-sized memory region, and store a pair 8-byte values.
+def int_aarch64_stgp : Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty, llvm_i64_ty],
+ [IntrWriteMem, IntrArgMemOnly, NoCapture<0>, WriteOnly<0>]>;
+}
diff --git a/include/llvm/IR/IntrinsicsAMDGPU.td b/include/llvm/IR/IntrinsicsAMDGPU.td
index 7913ce828fbc..3982444b5401 100644
--- a/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -1,9 +1,8 @@
//===- IntrinsicsAMDGPU.td - Defines AMDGPU intrinsics -----*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -178,7 +177,7 @@ def int_amdgcn_implicit_buffer_ptr :
// This is always moved to the beginning of the basic block.
def int_amdgcn_init_exec : Intrinsic<[],
[llvm_i64_ty], // 64-bit literal constant
- [IntrConvergent]>;
+ [IntrConvergent, ImmArg<0>]>;
// Set EXEC according to a thread count packed in an SGPR input:
// thread_count = (input >> bitoffset) & 0x7f;
@@ -188,6 +187,10 @@ def int_amdgcn_init_exec_from_input : Intrinsic<[],
llvm_i32_ty], // bit offset of the thread count
[IntrConvergent]>;
+def int_amdgcn_wavefrontsize :
+ GCCBuiltin<"__builtin_amdgcn_wavefrontsize">,
+ Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>;
+
//===----------------------------------------------------------------------===//
// Instruction Intrinsics
@@ -196,9 +199,9 @@ def int_amdgcn_init_exec_from_input : Intrinsic<[],
// The first parameter is s_sendmsg immediate (i16),
// the second one is copied to m0
def int_amdgcn_s_sendmsg : GCCBuiltin<"__builtin_amdgcn_s_sendmsg">,
- Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], []>;
+ Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], [ImmArg<0>, IntrInaccessibleMemOnly]>;
def int_amdgcn_s_sendmsghalt : GCCBuiltin<"__builtin_amdgcn_s_sendmsghalt">,
- Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], []>;
+ Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], [ImmArg<0>, IntrInaccessibleMemOnly]>;
def int_amdgcn_s_barrier : GCCBuiltin<"__builtin_amdgcn_s_barrier">,
Intrinsic<[], [], [IntrConvergent]>;
@@ -207,7 +210,7 @@ def int_amdgcn_wave_barrier : GCCBuiltin<"__builtin_amdgcn_wave_barrier">,
Intrinsic<[], [], [IntrConvergent]>;
def int_amdgcn_s_waitcnt : GCCBuiltin<"__builtin_amdgcn_s_waitcnt">,
- Intrinsic<[], [llvm_i32_ty], []>;
+ Intrinsic<[], [llvm_i32_ty], [ImmArg<0>]>;
def int_amdgcn_div_scale : Intrinsic<
// 1st parameter: Numerator
@@ -216,7 +219,7 @@ def int_amdgcn_div_scale : Intrinsic<
// second. (0 = first, 1 = second).
[llvm_anyfloat_ty, llvm_i1_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty],
- [IntrNoMem, IntrSpeculatable]
+ [IntrNoMem, IntrSpeculatable, ImmArg<2>]
>;
def int_amdgcn_div_fmas : Intrinsic<[llvm_anyfloat_ty],
@@ -293,29 +296,33 @@ def int_amdgcn_fract : Intrinsic<
[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
>;
-def int_amdgcn_cvt_pkrtz : Intrinsic<
- [llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, IntrSpeculatable]
+def int_amdgcn_cvt_pkrtz : GCCBuiltin<"__builtin_amdgcn_cvt_pkrtz">,
+ Intrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable]
>;
-def int_amdgcn_cvt_pknorm_i16 : Intrinsic<
- [llvm_v2i16_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, IntrSpeculatable]
+def int_amdgcn_cvt_pknorm_i16 :
+ GCCBuiltin<"__builtin_amdgcn_cvt_pknorm_i16">,
+ Intrinsic<[llvm_v2i16_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable]
>;
-def int_amdgcn_cvt_pknorm_u16 : Intrinsic<
- [llvm_v2i16_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, IntrSpeculatable]
+def int_amdgcn_cvt_pknorm_u16 :
+ GCCBuiltin<"__builtin_amdgcn_cvt_pknorm_u16">,
+ Intrinsic<[llvm_v2i16_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable]
>;
-def int_amdgcn_cvt_pk_i16 : Intrinsic<
+def int_amdgcn_cvt_pk_i16 :
+ GCCBuiltin<"__builtin_amdgcn_cvt_pk_i16">,
+ Intrinsic<
[llvm_v2i16_ty], [llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, IntrSpeculatable]
>;
-def int_amdgcn_cvt_pk_u16 : Intrinsic<
- [llvm_v2i16_ty], [llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem, IntrSpeculatable]
+def int_amdgcn_cvt_pk_u16 : GCCBuiltin<"__builtin_amdgcn_cvt_pk_u16">,
+ Intrinsic<[llvm_v2i16_ty], [llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem, IntrSpeculatable]
>;
def int_amdgcn_class : Intrinsic<
@@ -374,7 +381,7 @@ class AMDGPUAtomicIncIntrin : Intrinsic<[llvm_anyint_ty],
llvm_i32_ty, // ordering
llvm_i32_ty, // scope
llvm_i1_ty], // isVolatile
- [IntrArgMemOnly, NoCapture<0>], "",
+ [IntrArgMemOnly, NoCapture<0>, ImmArg<2>, ImmArg<3>, ImmArg<4>], "",
[SDNPMemOperand]
>;
@@ -389,9 +396,45 @@ class AMDGPULDSF32Intrin<string clang_builtin> :
llvm_i32_ty, // ordering
llvm_i32_ty, // scope
llvm_i1_ty], // isVolatile
- [IntrArgMemOnly, NoCapture<0>]
+ [IntrArgMemOnly, NoCapture<0>, ImmArg<2>, ImmArg<3>, ImmArg<4>]
+>;
+
+// FIXME: The m0 argument should be moved after the normal arguments
+class AMDGPUDSOrderedIntrinsic : Intrinsic<
+ [llvm_i32_ty],
+ // M0 = {hi16:address, lo16:waveID}. Allow passing M0 as a pointer, so that
+ // the bit packing can be optimized at the IR level.
+ [LLVMQualPointerType<llvm_i32_ty, 2>, // IntToPtr(M0)
+ llvm_i32_ty, // value to add or swap
+ llvm_i32_ty, // ordering
+ llvm_i32_ty, // scope
+ llvm_i1_ty, // isVolatile
+ llvm_i32_ty, // ordered count index (OA index), also added to the address
+ // gfx10: bits 24-27 indicate the number of active threads/dwords
+ llvm_i1_ty, // wave release, usually set to 1
+ llvm_i1_ty], // wave done, set to 1 for the last ordered instruction
+ [NoCapture<0>,
+ ImmArg<2>, ImmArg<3>, ImmArg<4>,
+ ImmArg<5>, ImmArg<6>, ImmArg<7>
+ ]
+>;
+
+class AMDGPUDSAppendConsumedIntrinsic : Intrinsic<
+ [llvm_i32_ty],
+ [llvm_anyptr_ty, // LDS or GDS ptr
+ llvm_i1_ty], // isVolatile
+ [IntrConvergent, IntrArgMemOnly, NoCapture<0>, ImmArg<1>],
+ "",
+ [SDNPMemOperand]
>;
+def int_amdgcn_ds_ordered_add : AMDGPUDSOrderedIntrinsic;
+def int_amdgcn_ds_ordered_swap : AMDGPUDSOrderedIntrinsic;
+
+// The pointer argument is assumed to be dynamically uniform if a VGPR.
+def int_amdgcn_ds_append : AMDGPUDSAppendConsumedIntrinsic;
+def int_amdgcn_ds_consume : AMDGPUDSAppendConsumedIntrinsic;
+
def int_amdgcn_ds_fadd : AMDGPULDSF32Intrin<"__builtin_amdgcn_ds_faddf">;
def int_amdgcn_ds_fmin : AMDGPULDSF32Intrin<"__builtin_amdgcn_ds_fminf">;
def int_amdgcn_ds_fmax : AMDGPULDSF32Intrin<"__builtin_amdgcn_ds_fmaxf">;
@@ -442,9 +485,12 @@ class arglistconcat<list<list<AMDGPUArg>> arglists, int shift = 0> {
}
// Represent texture/image types / dimensionality.
-class AMDGPUDimProps<string name, list<string> coord_names, list<string> slice_names> {
+class AMDGPUDimProps<bits<3> enc, string name, string asmsuffix,
+ list<string> coord_names, list<string> slice_names> {
AMDGPUDimProps Dim = !cast<AMDGPUDimProps>(NAME);
string Name = name; // e.g. "2darraymsaa"
+ string AsmSuffix = asmsuffix; // e.g. 2D_MSAA_ARRAY (used in assembly strings)
+ bits<3> Encoding = enc;
bit DA = 0; // DA bit in MIMG encoding
list<AMDGPUArg> CoordSliceArgs =
@@ -460,17 +506,17 @@ class AMDGPUDimProps<string name, list<string> coord_names, list<string> slice_n
bits<8> NumGradients = !size(GradientArgs);
}
-def AMDGPUDim1D : AMDGPUDimProps<"1d", ["s"], []>;
-def AMDGPUDim2D : AMDGPUDimProps<"2d", ["s", "t"], []>;
-def AMDGPUDim3D : AMDGPUDimProps<"3d", ["s", "t", "r"], []>;
+def AMDGPUDim1D : AMDGPUDimProps<0x0, "1d", "1D", ["s"], []>;
+def AMDGPUDim2D : AMDGPUDimProps<0x1, "2d", "2D", ["s", "t"], []>;
+def AMDGPUDim3D : AMDGPUDimProps<0x2, "3d", "3D", ["s", "t", "r"], []>;
let DA = 1 in {
- def AMDGPUDimCube : AMDGPUDimProps<"cube", ["s", "t"], ["face"]>;
- def AMDGPUDim1DArray : AMDGPUDimProps<"1darray", ["s"], ["slice"]>;
- def AMDGPUDim2DArray : AMDGPUDimProps<"2darray", ["s", "t"], ["slice"]>;
+ def AMDGPUDimCube : AMDGPUDimProps<0x3, "cube", "CUBE", ["s", "t"], ["face"]>;
+ def AMDGPUDim1DArray : AMDGPUDimProps<0x4, "1darray", "1D_ARRAY", ["s"], ["slice"]>;
+ def AMDGPUDim2DArray : AMDGPUDimProps<0x5, "2darray", "2D_ARRAY", ["s", "t"], ["slice"]>;
}
-def AMDGPUDim2DMsaa : AMDGPUDimProps<"2dmsaa", ["s", "t"], ["fragid"]>;
+def AMDGPUDim2DMsaa : AMDGPUDimProps<0x6, "2dmsaa", "2D_MSAA", ["s", "t"], ["fragid"]>;
let DA = 1 in {
- def AMDGPUDim2DArrayMsaa : AMDGPUDimProps<"2darraymsaa", ["s", "t"], ["slice", "fragid"]>;
+ def AMDGPUDim2DArrayMsaa : AMDGPUDimProps<0x7, "2darraymsaa", "2D_MSAA_ARRAY", ["s", "t"], ["slice", "fragid"]>;
}
def AMDGPUDims {
@@ -621,6 +667,19 @@ class AMDGPUDimGetResInfoProfile<AMDGPUDimProps dim> : AMDGPUDimProfile<"GET_RES
let LodClampMip = "mip";
}
+// Helper class for figuring out image intrinsic argument indexes.
+class AMDGPUImageDimIntrinsicEval<AMDGPUDimProfile P_> {
+ int NumDataArgs = !size(P_.DataArgs);
+ int NumDmaskArgs = !if(P_.IsAtomic, 0, 1);
+ int NumVAddrArgs = !size(P_.AddrArgs);
+ int NumRSrcArgs = 1;
+ int NumSampArgs = !if(P_.IsSample, 2, 0);
+ int DmaskArgIndex = NumDataArgs;
+ int UnormArgIndex = !add(NumDataArgs, NumDmaskArgs, NumVAddrArgs, NumRSrcArgs, 1);
+ int TexFailCtrlArgIndex = !add(NumDataArgs, NumDmaskArgs, NumVAddrArgs, NumRSrcArgs, NumSampArgs);
+ int CachePolicyArgIndex = !add(TexFailCtrlArgIndex, 1);
+}
+
// All dimension-aware intrinsics are derived from this class.
class AMDGPUImageDimIntrinsic<AMDGPUDimProfile P_,
list<IntrinsicProperty> props,
@@ -634,8 +693,13 @@ class AMDGPUImageDimIntrinsic<AMDGPUDimProfile P_,
!if(P_.IsSample, [llvm_v4i32_ty, // samp(SGPR)
llvm_i1_ty], []), // unorm(imm)
[llvm_i32_ty, // texfailctrl(imm; bit 0 = tfe, bit 1 = lwe)
- llvm_i32_ty]), // cachepolicy(imm; bit 0 = glc, bit 1 = slc)
- props, "", sdnodeprops>,
+ llvm_i32_ty]), // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc)
+ !listconcat(props,
+ !if(P_.IsAtomic, [], [ImmArg<AMDGPUImageDimIntrinsicEval<P_>.DmaskArgIndex>]),
+ !if(P_.IsSample, [ImmArg<AMDGPUImageDimIntrinsicEval<P_>.UnormArgIndex>], []),
+ [ImmArg<AMDGPUImageDimIntrinsicEval<P_>.TexFailCtrlArgIndex>,
+ ImmArg<AMDGPUImageDimIntrinsicEval<P_>.CachePolicyArgIndex>]),
+ "", sdnodeprops>,
AMDGPURsrcIntrinsic<!add(!size(P_.DataArgs), !size(P_.AddrTypes),
!if(P_.IsAtomic, 0, 1)), 1> {
AMDGPUDimProfile P = P_;
@@ -791,13 +855,13 @@ let TargetPrefix = "amdgcn" in {
defset list<AMDGPURsrcIntrinsic> AMDGPUBufferIntrinsics = {
class AMDGPUBufferLoad : Intrinsic <
- [llvm_anyfloat_ty],
+ [llvm_any_ty],
[llvm_v4i32_ty, // rsrc(SGPR)
llvm_i32_ty, // vindex(VGPR)
llvm_i32_ty, // offset(SGPR/VGPR/imm)
llvm_i1_ty, // glc(imm)
llvm_i1_ty], // slc(imm)
- [IntrReadMem], "", [SDNPMemOperand]>,
+ [IntrReadMem, ImmArg<3>, ImmArg<4>], "", [SDNPMemOperand]>,
AMDGPURsrcIntrinsic<0>;
def int_amdgcn_buffer_load_format : AMDGPUBufferLoad;
def int_amdgcn_buffer_load : AMDGPUBufferLoad;
@@ -805,20 +869,20 @@ def int_amdgcn_buffer_load : AMDGPUBufferLoad;
def int_amdgcn_s_buffer_load : Intrinsic <
[llvm_any_ty],
[llvm_v4i32_ty, // rsrc(SGPR)
- llvm_i32_ty, // byte offset(SGPR/VGPR/imm)
- llvm_i32_ty], // cachepolicy(imm; bit 0 = glc)
- [IntrNoMem]>,
+ llvm_i32_ty, // byte offset(SGPR/imm)
+ llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 2 = dlc)
+ [IntrNoMem, ImmArg<2>]>,
AMDGPURsrcIntrinsic<0>;
class AMDGPUBufferStore : Intrinsic <
[],
- [llvm_anyfloat_ty, // vdata(VGPR) -- can currently only select f32, v2f32, v4f32
+ [llvm_any_ty, // vdata(VGPR)
llvm_v4i32_ty, // rsrc(SGPR)
llvm_i32_ty, // vindex(VGPR)
llvm_i32_ty, // offset(SGPR/VGPR/imm)
llvm_i1_ty, // glc(imm)
llvm_i1_ty], // slc(imm)
- [IntrWriteMem], "", [SDNPMemOperand]>,
+ [IntrWriteMem, ImmArg<4>, ImmArg<5>], "", [SDNPMemOperand]>,
AMDGPURsrcIntrinsic<1>;
def int_amdgcn_buffer_store_format : AMDGPUBufferStore;
def int_amdgcn_buffer_store : AMDGPUBufferStore;
@@ -835,8 +899,8 @@ class AMDGPURawBufferLoad : Intrinsic <
[llvm_v4i32_ty, // rsrc(SGPR)
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
- llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc)
- [IntrReadMem], "", [SDNPMemOperand]>,
+ llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+)
+ [IntrReadMem, ImmArg<3>], "", [SDNPMemOperand]>,
AMDGPURsrcIntrinsic<0>;
def int_amdgcn_raw_buffer_load_format : AMDGPURawBufferLoad;
def int_amdgcn_raw_buffer_load : AMDGPURawBufferLoad;
@@ -847,8 +911,8 @@ class AMDGPUStructBufferLoad : Intrinsic <
llvm_i32_ty, // vindex(VGPR)
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
- llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc)
- [IntrReadMem], "", [SDNPMemOperand]>,
+ llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+)
+ [IntrReadMem, ImmArg<4>], "", [SDNPMemOperand]>,
AMDGPURsrcIntrinsic<0>;
def int_amdgcn_struct_buffer_load_format : AMDGPUStructBufferLoad;
def int_amdgcn_struct_buffer_load : AMDGPUStructBufferLoad;
@@ -859,8 +923,8 @@ class AMDGPURawBufferStore : Intrinsic <
llvm_v4i32_ty, // rsrc(SGPR)
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
- llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc)
- [IntrWriteMem], "", [SDNPMemOperand]>,
+ llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+)
+ [IntrWriteMem, ImmArg<4>], "", [SDNPMemOperand]>,
AMDGPURsrcIntrinsic<1>;
def int_amdgcn_raw_buffer_store_format : AMDGPURawBufferStore;
def int_amdgcn_raw_buffer_store : AMDGPURawBufferStore;
@@ -872,8 +936,8 @@ class AMDGPUStructBufferStore : Intrinsic <
llvm_i32_ty, // vindex(VGPR)
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
- llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc)
- [IntrWriteMem], "", [SDNPMemOperand]>,
+ llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+)
+ [IntrWriteMem, ImmArg<5>], "", [SDNPMemOperand]>,
AMDGPURsrcIntrinsic<1>;
def int_amdgcn_struct_buffer_store_format : AMDGPUStructBufferStore;
def int_amdgcn_struct_buffer_store : AMDGPUStructBufferStore;
@@ -885,7 +949,7 @@ class AMDGPURawBufferAtomic : Intrinsic <
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
llvm_i32_ty], // cachepolicy(imm; bit 1 = slc)
- [], "", [SDNPMemOperand]>,
+ [ImmArg<4>], "", [SDNPMemOperand]>,
AMDGPURsrcIntrinsic<1, 0>;
def int_amdgcn_raw_buffer_atomic_swap : AMDGPURawBufferAtomic;
def int_amdgcn_raw_buffer_atomic_add : AMDGPURawBufferAtomic;
@@ -905,7 +969,7 @@ def int_amdgcn_raw_buffer_atomic_cmpswap : Intrinsic<
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
llvm_i32_ty], // cachepolicy(imm; bit 1 = slc)
- [], "", [SDNPMemOperand]>,
+ [ImmArg<5>], "", [SDNPMemOperand]>,
AMDGPURsrcIntrinsic<2, 0>;
class AMDGPUStructBufferAtomic : Intrinsic <
@@ -916,7 +980,7 @@ class AMDGPUStructBufferAtomic : Intrinsic <
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
llvm_i32_ty], // cachepolicy(imm; bit 1 = slc)
- [], "", [SDNPMemOperand]>,
+ [ImmArg<5>], "", [SDNPMemOperand]>,
AMDGPURsrcIntrinsic<1, 0>;
def int_amdgcn_struct_buffer_atomic_swap : AMDGPUStructBufferAtomic;
def int_amdgcn_struct_buffer_atomic_add : AMDGPUStructBufferAtomic;
@@ -937,7 +1001,7 @@ def int_amdgcn_struct_buffer_atomic_cmpswap : Intrinsic<
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
llvm_i32_ty], // cachepolicy(imm; bit 1 = slc)
- [], "", [SDNPMemOperand]>,
+ [ImmArg<6>], "", [SDNPMemOperand]>,
AMDGPURsrcIntrinsic<2, 0>;
// Obsolescent tbuffer intrinsics.
@@ -952,7 +1016,8 @@ def int_amdgcn_tbuffer_load : Intrinsic <
llvm_i32_ty, // nfmt(imm)
llvm_i1_ty, // glc(imm)
llvm_i1_ty], // slc(imm)
- [IntrReadMem], "", [SDNPMemOperand]>,
+ [IntrReadMem, ImmArg<4>, ImmArg<5>, ImmArg<6>,
+ ImmArg<7>, ImmArg<8>], "", [SDNPMemOperand]>,
AMDGPURsrcIntrinsic<0>;
def int_amdgcn_tbuffer_store : Intrinsic <
@@ -967,7 +1032,8 @@ def int_amdgcn_tbuffer_store : Intrinsic <
llvm_i32_ty, // nfmt(imm)
llvm_i1_ty, // glc(imm)
llvm_i1_ty], // slc(imm)
- [IntrWriteMem], "", [SDNPMemOperand]>,
+ [IntrWriteMem, ImmArg<5>, ImmArg<6>, ImmArg<7>,
+ ImmArg<8>, ImmArg<9>], "", [SDNPMemOperand]>,
AMDGPURsrcIntrinsic<1>;
// New tbuffer intrinsics, with:
@@ -980,8 +1046,8 @@ def int_amdgcn_raw_tbuffer_load : Intrinsic <
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
- llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc)
- [IntrReadMem], "", [SDNPMemOperand]>,
+ llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+)
+ [IntrReadMem, ImmArg<3>, ImmArg<4>], "", [SDNPMemOperand]>,
AMDGPURsrcIntrinsic<0>;
def int_amdgcn_raw_tbuffer_store : Intrinsic <
@@ -991,8 +1057,8 @@ def int_amdgcn_raw_tbuffer_store : Intrinsic <
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
- llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc)
- [IntrWriteMem], "", [SDNPMemOperand]>,
+ llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+)
+ [IntrWriteMem, ImmArg<4>, ImmArg<5>], "", [SDNPMemOperand]>,
AMDGPURsrcIntrinsic<1>;
def int_amdgcn_struct_tbuffer_load : Intrinsic <
@@ -1002,8 +1068,8 @@ def int_amdgcn_struct_tbuffer_load : Intrinsic <
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
- llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc)
- [IntrReadMem], "", [SDNPMemOperand]>,
+ llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+)
+ [IntrReadMem, ImmArg<4>, ImmArg<5>], "", [SDNPMemOperand]>,
AMDGPURsrcIntrinsic<0>;
def int_amdgcn_struct_tbuffer_store : Intrinsic <
@@ -1014,18 +1080,18 @@ def int_amdgcn_struct_tbuffer_store : Intrinsic <
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
- llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc)
- [IntrWriteMem], "", [SDNPMemOperand]>,
+ llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+)
+ [IntrWriteMem, ImmArg<5>, ImmArg<6>], "", [SDNPMemOperand]>,
AMDGPURsrcIntrinsic<1>;
class AMDGPUBufferAtomic : Intrinsic <
- [llvm_i32_ty],
- [llvm_i32_ty, // vdata(VGPR)
+ [llvm_anyint_ty],
+ [LLVMMatchType<0>, // vdata(VGPR)
llvm_v4i32_ty, // rsrc(SGPR)
llvm_i32_ty, // vindex(VGPR)
llvm_i32_ty, // offset(SGPR/VGPR/imm)
llvm_i1_ty], // slc(imm)
- [], "", [SDNPMemOperand]>,
+ [ImmArg<4>], "", [SDNPMemOperand]>,
AMDGPURsrcIntrinsic<1, 0>;
def int_amdgcn_buffer_atomic_swap : AMDGPUBufferAtomic;
def int_amdgcn_buffer_atomic_add : AMDGPUBufferAtomic;
@@ -1045,7 +1111,7 @@ def int_amdgcn_buffer_atomic_cmpswap : Intrinsic<
llvm_i32_ty, // vindex(VGPR)
llvm_i32_ty, // offset(SGPR/VGPR/imm)
llvm_i1_ty], // slc(imm)
- [], "", [SDNPMemOperand]>,
+ [ImmArg<5>], "", [SDNPMemOperand]>,
AMDGPURsrcIntrinsic<2, 0>;
} // defset AMDGPUBufferIntrinsics
@@ -1062,7 +1128,7 @@ def int_amdgcn_exp : Intrinsic <[], [
llvm_i1_ty, // done
llvm_i1_ty // vm
],
- []
+ [ImmArg<0>, ImmArg<1>, ImmArg<6>, ImmArg<7>, IntrInaccessibleMemOnly]
>;
// exp with compr bit set.
@@ -1073,7 +1139,7 @@ def int_amdgcn_exp_compr : Intrinsic <[], [
LLVMMatchType<0>, // src1
llvm_i1_ty, // done
llvm_i1_ty], // vm
- []
+ [ImmArg<0>, ImmArg<1>, ImmArg<4>, ImmArg<5>, IntrInaccessibleMemOnly]
>;
def int_amdgcn_buffer_wbinvl1_sc :
@@ -1090,27 +1156,27 @@ def int_amdgcn_s_dcache_inv :
def int_amdgcn_s_memtime :
GCCBuiltin<"__builtin_amdgcn_s_memtime">,
- Intrinsic<[llvm_i64_ty], [], [IntrReadMem]>;
+ Intrinsic<[llvm_i64_ty], []>;
def int_amdgcn_s_sleep :
GCCBuiltin<"__builtin_amdgcn_s_sleep">,
- Intrinsic<[], [llvm_i32_ty], []> {
+ Intrinsic<[], [llvm_i32_ty], [ImmArg<0>]> {
}
def int_amdgcn_s_incperflevel :
GCCBuiltin<"__builtin_amdgcn_s_incperflevel">,
- Intrinsic<[], [llvm_i32_ty], []> {
+ Intrinsic<[], [llvm_i32_ty], [ImmArg<0>]> {
}
def int_amdgcn_s_decperflevel :
GCCBuiltin<"__builtin_amdgcn_s_decperflevel">,
- Intrinsic<[], [llvm_i32_ty], []> {
+ Intrinsic<[], [llvm_i32_ty], [ImmArg<0>]> {
}
def int_amdgcn_s_getreg :
GCCBuiltin<"__builtin_amdgcn_s_getreg">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty],
- [IntrReadMem, IntrSpeculatable]
+ [IntrInaccessibleMemOnly, IntrReadMem, IntrSpeculatable, ImmArg<0>]
>;
// int_amdgcn_s_getpc is provided to allow a specific style of position
@@ -1129,7 +1195,7 @@ def int_amdgcn_interp_mov :
GCCBuiltin<"__builtin_amdgcn_interp_mov">,
Intrinsic<[llvm_float_ty],
[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem, IntrSpeculatable]>;
+ [IntrNoMem, IntrSpeculatable, ImmArg<1>, ImmArg<2>]>;
// __builtin_amdgcn_interp_p1 <i>, <attr_chan>, <attr>, <m0>
// This intrinsic reads from lds, but the memory values are constant,
@@ -1138,16 +1204,30 @@ def int_amdgcn_interp_p1 :
GCCBuiltin<"__builtin_amdgcn_interp_p1">,
Intrinsic<[llvm_float_ty],
[llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem, IntrSpeculatable]>;
+ [IntrNoMem, IntrSpeculatable, ImmArg<1>, ImmArg<2>]>;
// __builtin_amdgcn_interp_p2 <p1>, <j>, <attr_chan>, <attr>, <m0>
def int_amdgcn_interp_p2 :
GCCBuiltin<"__builtin_amdgcn_interp_p2">,
Intrinsic<[llvm_float_ty],
[llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem, IntrSpeculatable]>;
+ [IntrNoMem, IntrSpeculatable, ImmArg<2>, ImmArg<3>]>;
// See int_amdgcn_v_interp_p1 for why this is IntrNoMem.
+// __builtin_amdgcn_interp_p1_f16 <i>, <attr_chan>, <attr>, <high>, <m0>
+def int_amdgcn_interp_p1_f16 :
+ GCCBuiltin<"__builtin_amdgcn_interp_p1_f16">,
+ Intrinsic<[llvm_float_ty],
+ [llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty, llvm_i32_ty],
+ [IntrNoMem, IntrSpeculatable, ImmArg<1>, ImmArg<2>, ImmArg<3>]>;
+
+// __builtin_amdgcn_interp_p2_f16 <p1>, <j>, <attr_chan>, <attr>, <high>, <m0>
+def int_amdgcn_interp_p2_f16 :
+ GCCBuiltin<"__builtin_amdgcn_interp_p2_f16">,
+ Intrinsic<[llvm_half_ty],
+ [llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty, llvm_i32_ty],
+ [IntrNoMem, IntrSpeculatable, ImmArg<2>, ImmArg<3>, ImmArg<4>]>;
+
// Pixel shaders only: whether the current pixel is live (i.e. not a helper
// invocation for derivative computation).
def int_amdgcn_ps_live : Intrinsic <
@@ -1166,16 +1246,17 @@ def int_amdgcn_mbcnt_hi :
// llvm.amdgcn.ds.swizzle src offset
def int_amdgcn_ds_swizzle :
GCCBuiltin<"__builtin_amdgcn_ds_swizzle">,
- Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>;
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem, IntrConvergent, ImmArg<1>]>;
def int_amdgcn_ubfe : Intrinsic<[llvm_anyint_ty],
- [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem, IntrSpeculatable]
+ [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem, IntrSpeculatable]
>;
def int_amdgcn_sbfe : Intrinsic<[llvm_anyint_ty],
- [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem, IntrSpeculatable]
+ [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem, IntrSpeculatable]
>;
def int_amdgcn_lerp :
@@ -1233,12 +1314,12 @@ def int_amdgcn_cvt_pk_u8_f32 :
>;
def int_amdgcn_icmp :
- Intrinsic<[llvm_i64_ty], [llvm_anyint_ty, LLVMMatchType<0>, llvm_i32_ty],
- [IntrNoMem, IntrConvergent]>;
+ Intrinsic<[llvm_anyint_ty], [llvm_anyint_ty, LLVMMatchType<1>, llvm_i32_ty],
+ [IntrNoMem, IntrConvergent, ImmArg<2>]>;
def int_amdgcn_fcmp :
- Intrinsic<[llvm_i64_ty], [llvm_anyfloat_ty, LLVMMatchType<0>, llvm_i32_ty],
- [IntrNoMem, IntrConvergent]>;
+ Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty, LLVMMatchType<1>, llvm_i32_ty],
+ [IntrNoMem, IntrConvergent, ImmArg<2>]>;
def int_amdgcn_readfirstlane :
GCCBuiltin<"__builtin_amdgcn_readfirstlane">,
@@ -1263,16 +1344,86 @@ def int_amdgcn_writelane :
[IntrNoMem, IntrConvergent]
>;
-def int_amdgcn_alignbit : Intrinsic<[llvm_i32_ty],
+def int_amdgcn_alignbit :
+ GCCBuiltin<"__builtin_amdgcn_alignbit">, Intrinsic<[llvm_i32_ty],
[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, IntrSpeculatable]
>;
-def int_amdgcn_alignbyte : Intrinsic<[llvm_i32_ty],
- [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+def int_amdgcn_alignbyte : GCCBuiltin<"__builtin_amdgcn_alignbyte">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, IntrSpeculatable]
>;
+def int_amdgcn_mul_i24 : Intrinsic<[llvm_i32_ty],
+ [llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem, IntrSpeculatable]
+>;
+
+def int_amdgcn_mul_u24 : Intrinsic<[llvm_i32_ty],
+ [llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem, IntrSpeculatable]
+>;
+
+// llvm.amdgcn.ds.gws.init(i32 bar_val, i32 resource_id)
+//
+// bar_val is the total number of waves that will wait on this
+// barrier, minus 1.
+def int_amdgcn_ds_gws_init :
+ GCCBuiltin<"__builtin_amdgcn_ds_gws_init">,
+ Intrinsic<[],
+ [llvm_i32_ty, llvm_i32_ty],
+ [IntrConvergent, IntrWriteMem, IntrInaccessibleMemOnly], "",
+ [SDNPMemOperand]
+>;
+
+// llvm.amdgcn.ds.gws.barrier(i32 vsrc0, i32 resource_id)
+// bar_val is the total number of waves that will wait on this
+// barrier, minus 1.
+def int_amdgcn_ds_gws_barrier :
+ GCCBuiltin<"__builtin_amdgcn_ds_gws_barrier">,
+ Intrinsic<[],
+ [llvm_i32_ty, llvm_i32_ty],
+ [IntrConvergent, IntrInaccessibleMemOnly], "",
+ [SDNPMemOperand]
+>;
+
+// llvm.amdgcn.ds.gws.sema.v(i32 resource_id)
+def int_amdgcn_ds_gws_sema_v :
+ GCCBuiltin<"__builtin_amdgcn_ds_gws_sema_v">,
+ Intrinsic<[],
+ [llvm_i32_ty],
+ [IntrConvergent, IntrInaccessibleMemOnly], "",
+ [SDNPMemOperand]
+>;
+
+// llvm.amdgcn.ds.gws.sema.br(i32 vsrc, i32 resource_id)
+def int_amdgcn_ds_gws_sema_br :
+ GCCBuiltin<"__builtin_amdgcn_ds_gws_sema_br">,
+ Intrinsic<[],
+ [llvm_i32_ty, llvm_i32_ty],
+ [IntrConvergent, IntrInaccessibleMemOnly], "",
+ [SDNPMemOperand]
+>;
+
+// llvm.amdgcn.ds.gws.sema.p(i32 resource_id)
+def int_amdgcn_ds_gws_sema_p :
+ GCCBuiltin<"__builtin_amdgcn_ds_gws_sema_p">,
+ Intrinsic<[],
+ [llvm_i32_ty],
+ [IntrConvergent, IntrInaccessibleMemOnly], "",
+ [SDNPMemOperand]
+>;
+
+// llvm.amdgcn.ds.gws.sema.release.all(i32 resource_id)
+def int_amdgcn_ds_gws_sema_release_all :
+ GCCBuiltin<"__builtin_amdgcn_ds_gws_sema_release_all">,
+ Intrinsic<[],
+ [llvm_i32_ty],
+ [IntrConvergent, IntrInaccessibleMemOnly], "",
+ [SDNPMemOperand]
+>;
+
// Copies the source value to the destination value, with the guarantee that
// the source value is computed as if the entire program were executed in WQM.
@@ -1295,7 +1446,7 @@ def int_amdgcn_kill : Intrinsic<[], [llvm_i1_ty], []>;
// enabled, with a few exceptions: - Phi nodes with require WWM return an
// undefined value.
def int_amdgcn_wwm : Intrinsic<[llvm_any_ty],
- [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
+ [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrConvergent]
>;
// Given a value, copies it while setting all the inactive lanes to a given
@@ -1328,7 +1479,8 @@ def int_amdgcn_buffer_wbinvl1_vol :
def int_amdgcn_mov_dpp :
Intrinsic<[llvm_anyint_ty],
[LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
- llvm_i1_ty], [IntrNoMem, IntrConvergent]>;
+ llvm_i1_ty], [IntrNoMem, IntrConvergent, ImmArg<1>,
+ ImmArg<2>, ImmArg<3>, ImmArg<4>]>;
// llvm.amdgcn.update.dpp.i32 <old> <src> <dpp_ctrl> <row_mask> <bank_mask> <bound_ctrl>
// Should be equivalent to:
@@ -1336,8 +1488,10 @@ def int_amdgcn_mov_dpp :
// v_mov_b32 <dest> <src> <dpp_ctrl> <row_mask> <bank_mask> <bound_ctrl>
def int_amdgcn_update_dpp :
Intrinsic<[llvm_anyint_ty],
- [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty,
- llvm_i32_ty, llvm_i1_ty], [IntrNoMem, IntrConvergent]>;
+ [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty,
+ llvm_i32_ty, llvm_i32_ty, llvm_i1_ty],
+ [IntrNoMem, IntrConvergent,
+ ImmArg<2>, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
def int_amdgcn_s_dcache_wb :
GCCBuiltin<"__builtin_amdgcn_s_dcache_wb">,
@@ -1349,7 +1503,7 @@ def int_amdgcn_s_dcache_wb_vol :
def int_amdgcn_s_memrealtime :
GCCBuiltin<"__builtin_amdgcn_s_memrealtime">,
- Intrinsic<[llvm_i64_ty], [], [IntrReadMem]>;
+ Intrinsic<[llvm_i64_ty]>;
// llvm.amdgcn.ds.permute <index> <src>
def int_amdgcn_ds_permute :
@@ -1362,6 +1516,34 @@ def int_amdgcn_ds_bpermute :
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>;
//===----------------------------------------------------------------------===//
+// GFX10 Intrinsics
+//===----------------------------------------------------------------------===//
+
+// llvm.amdgcn.permlane16 <old> <src0> <src1> <src2> <fi> <bound_control>
+def int_amdgcn_permlane16 : GCCBuiltin<"__builtin_amdgcn_permlane16">,
+ Intrinsic<[llvm_i32_ty],
+ [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty, llvm_i1_ty],
+ [IntrNoMem, IntrConvergent, ImmArg<4>, ImmArg<5>]>;
+
+// llvm.amdgcn.permlanex16 <old> <src0> <src1> <src2> <fi> <bound_control>
+def int_amdgcn_permlanex16 : GCCBuiltin<"__builtin_amdgcn_permlanex16">,
+ Intrinsic<[llvm_i32_ty],
+ [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty, llvm_i1_ty],
+ [IntrNoMem, IntrConvergent, ImmArg<4>, ImmArg<5>]>;
+
+// llvm.amdgcn.mov.dpp8.i32 <src> <sel>
+// <sel> is a 32-bit constant whose high 8 bits must be zero which selects
+// the lanes to read from.
+def int_amdgcn_mov_dpp8 :
+ Intrinsic<[llvm_anyint_ty],
+ [LLVMMatchType<0>, llvm_i32_ty],
+ [IntrNoMem, IntrConvergent, ImmArg<1>]>;
+
+def int_amdgcn_s_get_waveid_in_workgroup :
+ GCCBuiltin<"__builtin_amdgcn_s_get_waveid_in_workgroup">,
+ Intrinsic<[llvm_i32_ty], [], [IntrReadMem, IntrInaccessibleMemOnly]>;
+
+//===----------------------------------------------------------------------===//
// Deep learning intrinsics.
//===----------------------------------------------------------------------===//
@@ -1377,7 +1559,7 @@ def int_amdgcn_fdot2 :
llvm_float_ty, // %c
llvm_i1_ty // %clamp
],
- [IntrNoMem, IntrSpeculatable]
+ [IntrNoMem, IntrSpeculatable, ImmArg<3>]
>;
// i32 %r = llvm.amdgcn.sdot2(v2i16 %a, v2i16 %b, i32 %c, i1 %clamp)
@@ -1392,7 +1574,7 @@ def int_amdgcn_sdot2 :
llvm_i32_ty, // %c
llvm_i1_ty // %clamp
],
- [IntrNoMem, IntrSpeculatable]
+ [IntrNoMem, IntrSpeculatable, ImmArg<3>]
>;
// u32 %r = llvm.amdgcn.udot2(v2u16 %a, v2u16 %b, u32 %c, i1 %clamp)
@@ -1407,7 +1589,7 @@ def int_amdgcn_udot2 :
llvm_i32_ty, // %c
llvm_i1_ty // %clamp
],
- [IntrNoMem, IntrSpeculatable]
+ [IntrNoMem, IntrSpeculatable, ImmArg<3>]
>;
// i32 %r = llvm.amdgcn.sdot4(v4i8 (as i32) %a, v4i8 (as i32) %b, i32 %c, i1 %clamp)
@@ -1422,7 +1604,7 @@ def int_amdgcn_sdot4 :
llvm_i32_ty, // %c
llvm_i1_ty // %clamp
],
- [IntrNoMem, IntrSpeculatable]
+ [IntrNoMem, IntrSpeculatable, ImmArg<3>]
>;
// u32 %r = llvm.amdgcn.udot4(v4u8 (as u32) %a, v4u8 (as u32) %b, u32 %c, i1 %clamp)
@@ -1437,7 +1619,7 @@ def int_amdgcn_udot4 :
llvm_i32_ty, // %c
llvm_i1_ty // %clamp
],
- [IntrNoMem, IntrSpeculatable]
+ [IntrNoMem, IntrSpeculatable, ImmArg<3>]
>;
// i32 %r = llvm.amdgcn.sdot8(v8i4 (as i32) %a, v8i4 (as i32) %b, i32 %c, i1 %clamp)
@@ -1453,7 +1635,7 @@ def int_amdgcn_sdot8 :
llvm_i32_ty, // %c
llvm_i1_ty // %clamp
],
- [IntrNoMem, IntrSpeculatable]
+ [IntrNoMem, IntrSpeculatable, ImmArg<3>]
>;
// u32 %r = llvm.amdgcn.udot8(v8u4 (as u32) %a, v8u4 (as u32) %b, u32 %c, i1 %clamp)
@@ -1469,30 +1651,154 @@ def int_amdgcn_udot8 :
llvm_i32_ty, // %c
llvm_i1_ty // %clamp
],
- [IntrNoMem, IntrSpeculatable]
+ [IntrNoMem, IntrSpeculatable, ImmArg<3>]
>;
//===----------------------------------------------------------------------===//
+// gfx908 intrinsics
+// ===----------------------------------------------------------------------===//
+
+class AMDGPUBufferAtomicNoRtn : Intrinsic <
+ [],
+ [llvm_anyfloat_ty, // vdata(VGPR)
+ llvm_v4i32_ty, // rsrc(SGPR)
+ llvm_i32_ty, // vindex(VGPR)
+ llvm_i32_ty, // offset(SGPR/VGPR/imm)
+ llvm_i1_ty], // slc(imm)
+ [], "", [SDNPMemOperand]>,
+ AMDGPURsrcIntrinsic<1, 0>;
+
+class AMDGPUGlobalAtomicNoRtn : Intrinsic <
+ [],
+ [llvm_anyptr_ty, // vaddr
+ llvm_anyfloat_ty], // vdata(VGPR)
+ [IntrArgMemOnly, NoCapture<0>], "", [SDNPMemOperand]>;
+
+def int_amdgcn_buffer_atomic_fadd : AMDGPUBufferAtomicNoRtn;
+def int_amdgcn_global_atomic_fadd : AMDGPUGlobalAtomicNoRtn;
+
+// llvm.amdgcn.mfma.f32.* vdst, srcA, srcB, srcC, cbsz, abid, blgp
+def int_amdgcn_mfma_f32_32x32x1f32 : Intrinsic<[llvm_v32f32_ty],
+ [llvm_float_ty, llvm_float_ty, llvm_v32f32_ty,
+ llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
+
+def int_amdgcn_mfma_f32_16x16x1f32 : Intrinsic<[llvm_v16f32_ty],
+ [llvm_float_ty, llvm_float_ty, llvm_v16f32_ty,
+ llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
+
+def int_amdgcn_mfma_f32_4x4x1f32 : Intrinsic<[llvm_v4f32_ty],
+ [llvm_float_ty, llvm_float_ty, llvm_v4f32_ty,
+ llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
+
+def int_amdgcn_mfma_f32_32x32x2f32 : Intrinsic<[llvm_v16f32_ty],
+ [llvm_float_ty, llvm_float_ty, llvm_v16f32_ty,
+ llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
+
+def int_amdgcn_mfma_f32_16x16x4f32 : Intrinsic<[llvm_v4f32_ty],
+ [llvm_float_ty, llvm_float_ty, llvm_v4f32_ty,
+ llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
+
+def int_amdgcn_mfma_f32_32x32x4f16 : Intrinsic<[llvm_v32f32_ty],
+ [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v32f32_ty,
+ llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
+
+def int_amdgcn_mfma_f32_16x16x4f16 : Intrinsic<[llvm_v16f32_ty],
+ [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v16f32_ty,
+ llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
+
+def int_amdgcn_mfma_f32_4x4x4f16 : Intrinsic<[llvm_v4f32_ty],
+ [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v4f32_ty,
+ llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
+
+def int_amdgcn_mfma_f32_32x32x8f16 : Intrinsic<[llvm_v16f32_ty],
+ [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v16f32_ty,
+ llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
+
+def int_amdgcn_mfma_f32_16x16x16f16 : Intrinsic<[llvm_v4f32_ty],
+ [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v4f32_ty,
+ llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
+
+def int_amdgcn_mfma_i32_32x32x4i8 : Intrinsic<[llvm_v32i32_ty],
+ [llvm_i32_ty, llvm_i32_ty, llvm_v32i32_ty,
+ llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
+
+def int_amdgcn_mfma_i32_16x16x4i8 : Intrinsic<[llvm_v16i32_ty],
+ [llvm_i32_ty, llvm_i32_ty, llvm_v16i32_ty,
+ llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
+
+def int_amdgcn_mfma_i32_4x4x4i8 : Intrinsic<[llvm_v4i32_ty],
+ [llvm_i32_ty, llvm_i32_ty, llvm_v4i32_ty,
+ llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
+
+def int_amdgcn_mfma_i32_32x32x8i8 : Intrinsic<[llvm_v16i32_ty],
+ [llvm_i32_ty, llvm_i32_ty, llvm_v16i32_ty,
+ llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
+
+def int_amdgcn_mfma_i32_16x16x16i8 : Intrinsic<[llvm_v4i32_ty],
+ [llvm_i32_ty, llvm_i32_ty, llvm_v4i32_ty,
+ llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
+
+def int_amdgcn_mfma_f32_32x32x2bf16 : Intrinsic<[llvm_v32f32_ty],
+ [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v32f32_ty,
+ llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
+
+def int_amdgcn_mfma_f32_16x16x2bf16 : Intrinsic<[llvm_v16f32_ty],
+ [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v16f32_ty,
+ llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
+
+def int_amdgcn_mfma_f32_4x4x2bf16 : Intrinsic<[llvm_v4f32_ty],
+ [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v4f32_ty,
+ llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
+
+def int_amdgcn_mfma_f32_32x32x4bf16 : Intrinsic<[llvm_v16f32_ty],
+ [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v16f32_ty,
+ llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
+
+def int_amdgcn_mfma_f32_16x16x8bf16 : Intrinsic<[llvm_v4f32_ty],
+ [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v4f32_ty,
+ llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
+
+//===----------------------------------------------------------------------===//
// Special Intrinsics for backend internal use only. No frontend
// should emit calls to these.
// ===----------------------------------------------------------------------===//
-def int_amdgcn_if : Intrinsic<[llvm_i1_ty, llvm_i64_ty],
+def int_amdgcn_if : Intrinsic<[llvm_i1_ty, llvm_anyint_ty],
[llvm_i1_ty], [IntrConvergent]
>;
-def int_amdgcn_else : Intrinsic<[llvm_i1_ty, llvm_i64_ty],
- [llvm_i64_ty], [IntrConvergent]
+def int_amdgcn_else : Intrinsic<[llvm_i1_ty, llvm_anyint_ty],
+ [llvm_anyint_ty], [IntrConvergent]
>;
-def int_amdgcn_if_break : Intrinsic<[llvm_i64_ty],
- [llvm_i1_ty, llvm_i64_ty], [IntrNoMem, IntrConvergent]
+def int_amdgcn_if_break : Intrinsic<[llvm_anyint_ty],
+ [llvm_i1_ty, llvm_anyint_ty], [IntrNoMem, IntrConvergent]
>;
def int_amdgcn_loop : Intrinsic<[llvm_i1_ty],
- [llvm_i64_ty], [IntrConvergent]
+ [llvm_anyint_ty], [IntrConvergent]
>;
-def int_amdgcn_end_cf : Intrinsic<[], [llvm_i64_ty], [IntrConvergent]>;
+def int_amdgcn_end_cf : Intrinsic<[], [llvm_anyint_ty], [IntrConvergent]>;
// Represent unreachable in a divergent region.
def int_amdgcn_unreachable : Intrinsic<[], [], [IntrConvergent]>;
diff --git a/include/llvm/IR/IntrinsicsARM.td b/include/llvm/IR/IntrinsicsARM.td
index 4e11f9c29dd0..4792af097d95 100644
--- a/include/llvm/IR/IntrinsicsARM.td
+++ b/include/llvm/IR/IntrinsicsARM.td
@@ -1,9 +1,8 @@
//===- IntrinsicsARM.td - Defines ARM intrinsics -----------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -20,7 +19,7 @@ let TargetPrefix = "arm" in { // All intrinsics start with "llvm.arm.".
// A space-consuming intrinsic primarily for testing ARMConstantIslands. The
// first argument is the number of bytes this "instruction" takes up, the second
// and return value are essentially chains, used to force ordering during ISel.
-def int_arm_space : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
+def int_arm_space : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [ImmArg<0>]>;
// 16-bit multiplications
def int_arm_smulbb : GCCBuiltin<"__builtin_arm_smulbb">,
@@ -263,59 +262,59 @@ def int_arm_vcvtru : Intrinsic<[llvm_float_ty], [llvm_anyfloat_ty],
// Coprocessor
def int_arm_ldc : GCCBuiltin<"__builtin_arm_ldc">,
- Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], []>;
+ Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<0>, ImmArg<1>]>;
def int_arm_ldcl : GCCBuiltin<"__builtin_arm_ldcl">,
- Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], []>;
+ Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<0>, ImmArg<1>]>;
def int_arm_ldc2 : GCCBuiltin<"__builtin_arm_ldc2">,
- Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], []>;
+ Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<0>, ImmArg<1>]>;
def int_arm_ldc2l : GCCBuiltin<"__builtin_arm_ldc2l">,
- Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], []>;
+ Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<0>, ImmArg<1>]>;
def int_arm_stc : GCCBuiltin<"__builtin_arm_stc">,
- Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], []>;
+ Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<0>, ImmArg<1>]>;
def int_arm_stcl : GCCBuiltin<"__builtin_arm_stcl">,
- Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], []>;
+ Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<0>, ImmArg<1>]>;
def int_arm_stc2 : GCCBuiltin<"__builtin_arm_stc2">,
- Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], []>;
+ Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<0>, ImmArg<1>]>;
def int_arm_stc2l : GCCBuiltin<"__builtin_arm_stc2l">,
- Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], []>;
+ Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], [ImmArg<0>, ImmArg<1>]>;
// Move to coprocessor
def int_arm_mcr : GCCBuiltin<"__builtin_arm_mcr">,
Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
- llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>;
+ llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg<0>, ImmArg<1>, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
def int_arm_mcr2 : GCCBuiltin<"__builtin_arm_mcr2">,
Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
- llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>;
+ llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg<0>, ImmArg<1>, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
// Move from coprocessor
def int_arm_mrc : GCCBuiltin<"__builtin_arm_mrc">,
MSBuiltin<"_MoveFromCoprocessor">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
- llvm_i32_ty, llvm_i32_ty], []>;
+ llvm_i32_ty, llvm_i32_ty], [ImmArg<0>, ImmArg<1>, ImmArg<2>, ImmArg<3>, ImmArg<4>]>;
def int_arm_mrc2 : GCCBuiltin<"__builtin_arm_mrc2">,
MSBuiltin<"_MoveFromCoprocessor2">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
- llvm_i32_ty, llvm_i32_ty], []>;
+ llvm_i32_ty, llvm_i32_ty], [ImmArg<0>, ImmArg<1>, ImmArg<2>, ImmArg<3>, ImmArg<4>]>;
// Coprocessor data processing
def int_arm_cdp : GCCBuiltin<"__builtin_arm_cdp">,
Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
- llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>;
+ llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg<0>, ImmArg<1>, ImmArg<2>, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
def int_arm_cdp2 : GCCBuiltin<"__builtin_arm_cdp2">,
Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
- llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>;
+ llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg<0>, ImmArg<1>, ImmArg<2>, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
// Move from two registers to coprocessor
def int_arm_mcrr : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
- llvm_i32_ty, llvm_i32_ty], []>;
+ llvm_i32_ty, llvm_i32_ty], [ImmArg<0>, ImmArg<1>, ImmArg<4>]>;
def int_arm_mcrr2 : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
- llvm_i32_ty, llvm_i32_ty], []>;
+ llvm_i32_ty, llvm_i32_ty], [ImmArg<0>, ImmArg<1>, ImmArg<4>]>;
def int_arm_mrrc : Intrinsic<[llvm_i32_ty, llvm_i32_ty], [llvm_i32_ty,
- llvm_i32_ty, llvm_i32_ty], []>;
+ llvm_i32_ty, llvm_i32_ty], [ImmArg<0>, ImmArg<1>, ImmArg<2>]>;
def int_arm_mrrc2 : Intrinsic<[llvm_i32_ty, llvm_i32_ty], [llvm_i32_ty,
- llvm_i32_ty, llvm_i32_ty], []>;
+ llvm_i32_ty, llvm_i32_ty], [ImmArg<0>, ImmArg<1>, ImmArg<2>]>;
//===----------------------------------------------------------------------===//
// CRC32
@@ -334,6 +333,18 @@ def int_arm_crc32cw : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
[IntrNoMem]>;
//===----------------------------------------------------------------------===//
+// CMSE
+
+def int_arm_cmse_tt : GCCBuiltin<"__builtin_arm_cmse_TT">,
+ Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrNoMem]>;
+def int_arm_cmse_ttt : GCCBuiltin<"__builtin_arm_cmse_TTT">,
+ Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrNoMem]>;
+def int_arm_cmse_tta : GCCBuiltin<"__builtin_arm_cmse_TTA">,
+ Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrNoMem]>;
+def int_arm_cmse_ttat : GCCBuiltin<"__builtin_arm_cmse_TTAT">,
+ Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrNoMem]>;
+
+//===----------------------------------------------------------------------===//
// HINT
def int_arm_hint : Intrinsic<[], [llvm_i32_ty]>;
diff --git a/include/llvm/IR/IntrinsicsBPF.td b/include/llvm/IR/IntrinsicsBPF.td
index 94eca8e40332..d7595a2a7700 100644
--- a/include/llvm/IR/IntrinsicsBPF.td
+++ b/include/llvm/IR/IntrinsicsBPF.td
@@ -1,9 +1,8 @@
//===- IntrinsicsBPF.td - Defines BPF intrinsics -----------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/IR/IntrinsicsHexagon.td b/include/llvm/IR/IntrinsicsHexagon.td
index ecc69a679553..2abc1dc07ebd 100644
--- a/include/llvm/IR/IntrinsicsHexagon.td
+++ b/include/llvm/IR/IntrinsicsHexagon.td
@@ -1,8 +1,7 @@
//===- IntrinsicsHexagon.td - Defines Hexagon intrinsics ---*- tablegen -*-===//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -52,19 +51,19 @@ class Hexagon_mem_memmemsisi_Intrinsic<string GCCIntSuffix>
: Hexagon_Intrinsic<GCCIntSuffix,
[llvm_ptr_ty], [llvm_ptr_ty, llvm_ptr_ty,
llvm_i32_ty, llvm_i32_ty],
- [IntrArgMemOnly]>;
+ [IntrArgMemOnly, ImmArg<3>]>;
class Hexagon_mem_memsisisi_Intrinsic<string GCCIntSuffix>
: Hexagon_Intrinsic<GCCIntSuffix,
[llvm_ptr_ty], [llvm_ptr_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty],
- [IntrWriteMem]>;
+ [IntrWriteMem, ImmArg<3>]>;
class Hexagon_mem_memdisisi_Intrinsic<string GCCIntSuffix>
: Hexagon_Intrinsic<GCCIntSuffix,
[llvm_ptr_ty], [llvm_ptr_ty, llvm_i64_ty,
llvm_i32_ty, llvm_i32_ty],
- [IntrWriteMem]>;
+ [IntrWriteMem, ImmArg<3>]>;
//
// BUILTIN_INFO_NONCONST(circ_ldd,PTR_ftype_PTRPTRSISI,4)
@@ -554,16 +553,18 @@ class Hexagon_v32i32_v32i32v32i32_Intrinsic<string GCCIntSuffix>
[IntrNoMem]>;
// tag : V6_vaslw_acc
-class Hexagon_v16i32_v16i32v16i32i32_Intrinsic<string GCCIntSuffix>
+class Hexagon_v16i32_v16i32v16i32i32_Intrinsic<string GCCIntSuffix,
+ list<IntrinsicProperty> intr_properties = []>
: Hexagon_Intrinsic<GCCIntSuffix,
[llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty,llvm_i32_ty],
- [IntrNoMem]>;
+ !listconcat([IntrNoMem], intr_properties)>;
// tag : V6_vaslw_acc
-class Hexagon_v32i32_v32i32v32i32i32_Intrinsic<string GCCIntSuffix>
+class Hexagon_v32i32_v32i32v32i32i32_Intrinsic<string GCCIntSuffix,
+ list<IntrinsicProperty> intr_properties = []>
: Hexagon_Intrinsic<GCCIntSuffix,
[llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_i32_ty],
- [IntrNoMem]>;
+ !listconcat([IntrNoMem], intr_properties)>;
// tag : V6_vmux
class Hexagon_v16i32_v512i1v16i32v16i32_Intrinsic<string GCCIntSuffix>
@@ -581,7 +582,7 @@ class Hexagon_v32i32_v1024i1v32i32v32i32_Intrinsic<string GCCIntSuffix>
class Hexagon_i32_i32i32i32i32_Intrinsic<string GCCIntSuffix>
: Hexagon_Intrinsic<GCCIntSuffix,
[llvm_i32_ty], [llvm_i32_ty,llvm_i32_ty,llvm_i32_ty,llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<2>, ImmArg<3>]>;
// tag : V6_vandnqrt_acc
class Hexagon_v16i32_v16i32v512i1i32_Intrinsic<string GCCIntSuffix>
@@ -596,58 +597,62 @@ class Hexagon_v32i32_v32i32v1024i1i32_Intrinsic<string GCCIntSuffix>
[IntrNoMem]>;
// tag : V6_vrmpybusi
-class Hexagon_v32i32_v32i32i32i32_Intrinsic<string GCCIntSuffix>
+class Hexagon_v32i32_v32i32i32i32_Intrinsic<string GCCIntSuffix,
+ list<IntrinsicProperty> intr_properties = []>
: Hexagon_Intrinsic<GCCIntSuffix,
[llvm_v32i32_ty], [llvm_v32i32_ty,llvm_i32_ty,llvm_i32_ty],
- [IntrNoMem]>;
+ !listconcat([IntrNoMem], intr_properties)>;
// tag : V6_vrmpybusi
-class Hexagon_v64i32_v64i32i32i32_Intrinsic<string GCCIntSuffix>
+class Hexagon_v64i32_v64i32i32i32_Intrinsic<string GCCIntSuffix,
+ list<IntrinsicProperty> intr_properties = []>
: Hexagon_Intrinsic<GCCIntSuffix,
[llvm_v64i32_ty], [llvm_v64i32_ty,llvm_i32_ty,llvm_i32_ty],
- [IntrNoMem]>;
+ !listconcat([IntrNoMem], intr_properties)>;
// tag : V6_vsubb_dv
-class Hexagon_v64i32_v64i32v64i32_Intrinsic<string GCCIntSuffix>
+class Hexagon_v64i32_v64i32v64i32_Intrinsic<string GCCIntSuffix, list<IntrinsicProperty> intr_properties = []>
: Hexagon_Intrinsic<GCCIntSuffix,
[llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v64i32_ty],
- [IntrNoMem]>;
+ !listconcat([IntrNoMem], intr_properties)>;
// tag : M2_mpysu_up
-class Hexagon_i32_i32i32_Intrinsic<string GCCIntSuffix>
+class Hexagon_i32_i32i32_Intrinsic<string GCCIntSuffix,
+ list<IntrinsicProperty> intr_properties = []>
: Hexagon_Intrinsic<GCCIntSuffix,
[llvm_i32_ty], [llvm_i32_ty,llvm_i32_ty],
- [IntrNoMem]>;
+ !listconcat([IntrNoMem], intr_properties)>;
// tag : M2_mpyud_acc_ll_s0
-class Hexagon_i64_i64i32i32_Intrinsic<string GCCIntSuffix>
+class Hexagon_i64_i64i32i32_Intrinsic<string GCCIntSuffix, list<IntrinsicProperty> intr_properties = []>
: Hexagon_Intrinsic<GCCIntSuffix,
[llvm_i64_ty], [llvm_i64_ty,llvm_i32_ty,llvm_i32_ty],
- [IntrNoMem]>;
+ !listconcat([IntrNoMem], intr_properties)>;
// tag : S2_lsr_i_r_nac
-class Hexagon_i32_i32i32i32_Intrinsic<string GCCIntSuffix>
+class Hexagon_i32_i32i32i32_Intrinsic<string GCCIntSuffix,
+ list<IntrinsicProperty> intr_properties = []>
: Hexagon_Intrinsic<GCCIntSuffix,
[llvm_i32_ty], [llvm_i32_ty,llvm_i32_ty,llvm_i32_ty],
- [IntrNoMem]>;
+ !listconcat([IntrNoMem], intr_properties)>;
// tag : M2_cmpysc_s0
-class Hexagon_i64_i32i32_Intrinsic<string GCCIntSuffix>
+class Hexagon_i64_i32i32_Intrinsic<string GCCIntSuffix, list<IntrinsicProperty> intr_properties = []>
: Hexagon_Intrinsic<GCCIntSuffix,
[llvm_i64_ty], [llvm_i32_ty,llvm_i32_ty],
- [IntrNoMem]>;
+ !listconcat([IntrNoMem], intr_properties)>;
// tag : V6_lo
-class Hexagon_v16i32_v32i32_Intrinsic<string GCCIntSuffix>
+class Hexagon_v16i32_v32i32_Intrinsic<string GCCIntSuffix, list<IntrinsicProperty> intr_properties = []>
: Hexagon_Intrinsic<GCCIntSuffix,
[llvm_v16i32_ty], [llvm_v32i32_ty],
- [IntrNoMem]>;
+ !listconcat([IntrNoMem], intr_properties)>;
// tag : V6_lo
-class Hexagon_v32i32_v64i32_Intrinsic<string GCCIntSuffix>
+class Hexagon_v32i32_v64i32_Intrinsic<string GCCIntSuffix, list<IntrinsicProperty> intr_properties = []>
: Hexagon_Intrinsic<GCCIntSuffix,
[llvm_v32i32_ty], [llvm_v64i32_ty],
- [IntrNoMem]>;
+ !listconcat([IntrNoMem], intr_properties)>;
// tag : S2_shuffoh
class Hexagon_i64_i64i64_Intrinsic<string GCCIntSuffix>
@@ -698,10 +703,10 @@ class Hexagon_v32i32_v32i32i32_Intrinsic<string GCCIntSuffix>
[IntrNoMem]>;
// tag : A4_vcmphgti
-class Hexagon_i32_i64i32_Intrinsic<string GCCIntSuffix>
+class Hexagon_i32_i64i32_Intrinsic<string GCCIntSuffix, list<IntrinsicProperty> intr_properties = []>
: Hexagon_Intrinsic<GCCIntSuffix,
[llvm_i32_ty], [llvm_i64_ty,llvm_i32_ty],
- [IntrNoMem]>;
+ !listconcat([IntrNoMem], intr_properties)>;
// tag :
class Hexagon_v32i32_v16i32i32_Intrinsic<string GCCIntSuffix>
@@ -710,10 +715,11 @@ class Hexagon_v32i32_v16i32i32_Intrinsic<string GCCIntSuffix>
[IntrNoMem]>;
// tag : S6_rol_i_p_or
-class Hexagon_i64_i64i64i32_Intrinsic<string GCCIntSuffix>
+class Hexagon_i64_i64i64i32_Intrinsic<string GCCIntSuffix,
+ list<IntrinsicProperty> intr_properties = []>
: Hexagon_Intrinsic<GCCIntSuffix,
[llvm_i64_ty], [llvm_i64_ty,llvm_i64_ty,llvm_i32_ty],
- [IntrNoMem]>;
+ !listconcat([IntrNoMem], intr_properties)>;
// tag : V6_vgtuh_and
class Hexagon_v512i1_v512i1v16i32v16i32_Intrinsic<string GCCIntSuffix>
@@ -728,16 +734,18 @@ class Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<string GCCIntSuffix>
[IntrNoMem]>;
// tag : A2_abssat
-class Hexagon_i32_i32_Intrinsic<string GCCIntSuffix>
+class Hexagon_i32_i32_Intrinsic<string GCCIntSuffix,
+ list<IntrinsicProperty> intr_properties = []>
: Hexagon_Intrinsic<GCCIntSuffix,
[llvm_i32_ty], [llvm_i32_ty],
- [IntrNoMem]>;
+ !listconcat([IntrNoMem], intr_properties)>;
// tag : A2_vcmpwgtu
-class Hexagon_i32_i64i64_Intrinsic<string GCCIntSuffix>
+class Hexagon_i32_i64i64_Intrinsic<string GCCIntSuffix,
+ list<IntrinsicProperty> intr_properties = []>
: Hexagon_Intrinsic<GCCIntSuffix,
[llvm_i32_ty], [llvm_i64_ty,llvm_i64_ty],
- [IntrNoMem]>;
+ !listconcat([IntrNoMem], intr_properties)>;
// tag : V6_vtmpybus_acc
class Hexagon_v64i32_v64i32v64i32i32_Intrinsic<string GCCIntSuffix>
@@ -764,16 +772,18 @@ class Hexagon_v1024i1_v1024i1v1024i1_Intrinsic<string GCCIntSuffix>
[IntrNoMem]>;
// tag : S2_asr_i_p_rnd_goodsyntax
-class Hexagon_i64_i64i32_Intrinsic<string GCCIntSuffix>
+class Hexagon_i64_i64i32_Intrinsic<string GCCIntSuffix,
+ list<IntrinsicProperty> intr_properties = []>
: Hexagon_Intrinsic<GCCIntSuffix,
[llvm_i64_ty], [llvm_i64_ty,llvm_i32_ty],
- [IntrNoMem]>;
+ !listconcat([IntrNoMem], intr_properties)>;
// tag : F2_conv_w2df
-class Hexagon_double_i32_Intrinsic<string GCCIntSuffix>
+class Hexagon_double_i32_Intrinsic<string GCCIntSuffix,
+ list<IntrinsicProperty> intr_properties = []>
: Hexagon_Intrinsic<GCCIntSuffix,
[llvm_double_ty], [llvm_i32_ty],
- [IntrNoMem]>;
+ !listconcat([IntrNoMem], intr_properties)>;
// tag : V6_vunpackuh
class Hexagon_v32i32_v16i32_Intrinsic<string GCCIntSuffix>
@@ -866,16 +876,18 @@ class Hexagon_i32_v32i32i32_Intrinsic<string GCCIntSuffix>
[IntrNoMem]>;
// tag : V6_vlutvwhi
-class Hexagon_v32i32_v16i32v16i32i32_Intrinsic<string GCCIntSuffix>
+class Hexagon_v32i32_v16i32v16i32i32_Intrinsic<string GCCIntSuffix,
+ list<IntrinsicProperty> intr_properties = []>
: Hexagon_Intrinsic<GCCIntSuffix,
[llvm_v32i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty,llvm_i32_ty],
- [IntrNoMem]>;
+ !listconcat([IntrNoMem], intr_properties)>;
// tag : V6_vlutvwhi
-class Hexagon_v64i32_v32i32v32i32i32_Intrinsic<string GCCIntSuffix>
+class Hexagon_v64i32_v32i32v32i32i32_Intrinsic<string GCCIntSuffix,
+ list<IntrinsicProperty> intr_properties = []>
: Hexagon_Intrinsic<GCCIntSuffix,
[llvm_v64i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_i32_ty],
- [IntrNoMem]>;
+ !listconcat([IntrNoMem], intr_properties)>;
// tag : V6_vgtuh
class Hexagon_v512i1_v16i32v16i32_Intrinsic<string GCCIntSuffix>
@@ -902,10 +914,11 @@ class Hexagon_double_i64_Intrinsic<string GCCIntSuffix>
[IntrNoMem]>;
// tag : S2_vzxthw
-class Hexagon_i64_i32_Intrinsic<string GCCIntSuffix>
+class Hexagon_i64_i32_Intrinsic<string GCCIntSuffix,
+ list<IntrinsicProperty> intr_properties = []>
: Hexagon_Intrinsic<GCCIntSuffix,
[llvm_i64_ty], [llvm_i32_ty],
- [IntrNoMem]>;
+ !listconcat([IntrNoMem], intr_properties)>;
// tag : V6_vtmpyhb
class Hexagon_v64i32_v64i32i32_Intrinsic<string GCCIntSuffix>
@@ -944,10 +957,11 @@ class Hexagon_v16i32_v16i32_Intrinsic<string GCCIntSuffix>
[IntrNoMem]>;
// tag : F2_conv_uw2sf
-class Hexagon_float_i32_Intrinsic<string GCCIntSuffix>
+class Hexagon_float_i32_Intrinsic<string GCCIntSuffix,
+ list<IntrinsicProperty> intr_properties = []>
: Hexagon_Intrinsic<GCCIntSuffix,
[llvm_float_ty], [llvm_i32_ty],
- [IntrNoMem]>;
+ !listconcat([IntrNoMem], intr_properties)>;
// tag : V6_vswap
class Hexagon_v32i32_v512i1v16i32v16i32_Intrinsic<string GCCIntSuffix>
@@ -1022,16 +1036,17 @@ class Hexagon_v32i32_v32i32v32i32v1024i1_Intrinsic<string GCCIntSuffix>
[IntrNoMem]>;
// tag : V6_vlutvvb_oracc
-class Hexagon_v16i32_v16i32v16i32v16i32i32_Intrinsic<string GCCIntSuffix>
+class Hexagon_v16i32_v16i32v16i32v16i32i32_Intrinsic<string GCCIntSuffix,
+ list<IntrinsicProperty> intr_properties = []>
: Hexagon_Intrinsic<GCCIntSuffix,
[llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty,llvm_v16i32_ty,llvm_i32_ty],
- [IntrNoMem]>;
+ !listconcat([IntrNoMem], intr_properties)>;
// tag : V6_vlutvvb_oracc
-class Hexagon_v32i32_v32i32v32i32v32i32i32_Intrinsic<string GCCIntSuffix>
+class Hexagon_v32i32_v32i32v32i32v32i32i32_Intrinsic<string GCCIntSuffix, list<IntrinsicProperty> intr_properties = []>
: Hexagon_Intrinsic<GCCIntSuffix,
[llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_v32i32_ty,llvm_i32_ty],
- [IntrNoMem]>;
+ !listconcat([IntrNoMem], intr_properties)>;
// tag : V6_vrmpybub_rtt
class Hexagon_v32i32_v16i32i64_Intrinsic<string GCCIntSuffix>
@@ -1052,16 +1067,18 @@ class Hexagon_i64i32_i64i64i32_Intrinsic<string GCCIntSuffix>
[IntrNoMem]>;
// tag : V6_vrsadubi_acc
-class Hexagon_v32i32_v32i32v32i32i32i32_Intrinsic<string GCCIntSuffix>
+class Hexagon_v32i32_v32i32v32i32i32i32_Intrinsic<string GCCIntSuffix,
+ list<IntrinsicProperty> intr_properties = []>
: Hexagon_Intrinsic<GCCIntSuffix,
[llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_i32_ty,llvm_i32_ty],
- [IntrNoMem]>;
+ !listconcat([IntrNoMem], intr_properties)>;
// tag : V6_vrsadubi_acc
-class Hexagon_v64i32_v64i32v64i32i32i32_Intrinsic<string GCCIntSuffix>
+class Hexagon_v64i32_v64i32v64i32i32i32_Intrinsic<string GCCIntSuffix,
+ list<IntrinsicProperty> intr_properties = []>
: Hexagon_Intrinsic<GCCIntSuffix,
[llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v64i32_ty,llvm_i32_ty,llvm_i32_ty],
- [IntrNoMem]>;
+ !listconcat([IntrNoMem], intr_properties)>;
// tag : F2_conv_df2sf
class Hexagon_float_double_Intrinsic<string GCCIntSuffix>
@@ -1166,10 +1183,11 @@ class Hexagon_v32i32_v32i32v32i32v32i32_Intrinsic<string GCCIntSuffix>
[IntrNoMem]>;
// tag : S2_insertp
-class Hexagon_i64_i64i64i32i32_Intrinsic<string GCCIntSuffix>
+class Hexagon_i64_i64i64i32i32_Intrinsic<string GCCIntSuffix,
+ list<IntrinsicProperty> intr_properties = []>
: Hexagon_Intrinsic<GCCIntSuffix,
[llvm_i64_ty], [llvm_i64_ty,llvm_i64_ty,llvm_i32_ty,llvm_i32_ty],
- [IntrNoMem]>;
+ !listconcat([IntrNoMem], intr_properties)>;
// tag : F2_sfinvsqrta
class Hexagon_floati32_float_Intrinsic<string GCCIntSuffix>
@@ -1190,16 +1208,18 @@ class Hexagon_v32i32v32i32_v32i32v32i32i32_Intrinsic<string GCCIntSuffix>
[IntrNoMem]>;
// tag : V6_vlutvwh_oracc
-class Hexagon_v32i32_v32i32v16i32v16i32i32_Intrinsic<string GCCIntSuffix>
+class Hexagon_v32i32_v32i32v16i32v16i32i32_Intrinsic<string GCCIntSuffix,
+ list<IntrinsicProperty> intr_properties = []>
: Hexagon_Intrinsic<GCCIntSuffix,
[llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v16i32_ty,llvm_v16i32_ty,llvm_i32_ty],
- [IntrNoMem]>;
+ !listconcat([IntrNoMem], intr_properties)>;
// tag : V6_vlutvwh_oracc
-class Hexagon_v64i32_v64i32v32i32v32i32i32_Intrinsic<string GCCIntSuffix>
+class Hexagon_v64i32_v64i32v32i32v32i32i32_Intrinsic<string GCCIntSuffix,
+ list<IntrinsicProperty> intr_properties = []>
: Hexagon_Intrinsic<GCCIntSuffix,
[llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v32i32_ty,llvm_v32i32_ty,llvm_i32_ty],
- [IntrNoMem]>;
+ !listconcat([IntrNoMem], intr_properties)>;
// tag : F2_dfcmpge
class Hexagon_i32_doubledouble_Intrinsic<string GCCIntSuffix>
@@ -1223,7 +1243,7 @@ class Hexagon_i32_float_Intrinsic<string GCCIntSuffix>
class Hexagon_i32_floati32_Intrinsic<string GCCIntSuffix>
: Hexagon_Intrinsic<GCCIntSuffix,
[llvm_i32_ty], [llvm_float_ty,llvm_i32_ty],
- [IntrNoMem, Throws]>;
+ [IntrNoMem, Throws, ImmArg<1>]>;
// tag : F2_conv_sf2ud_chop
class Hexagon_i64_float_Intrinsic<string GCCIntSuffix>
@@ -1292,10 +1312,11 @@ class Hexagon_float_floatfloatfloati32_Intrinsic<string GCCIntSuffix>
[IntrNoMem, Throws]>;
// tag : F2_dfclass
-class Hexagon_i32_doublei32_Intrinsic<string GCCIntSuffix>
+class Hexagon_i32_doublei32_Intrinsic<string GCCIntSuffix,
+ list<IntrinsicProperty> intr_properties = []>
: Hexagon_Intrinsic<GCCIntSuffix,
[llvm_i32_ty], [llvm_double_ty,llvm_i32_ty],
- [IntrNoMem, Throws]>;
+ !listconcat([IntrNoMem, Throws], intr_properties)>;
// tag : V6_vd0
class Hexagon_v16i32__Intrinsic<string GCCIntSuffix>
@@ -1393,13 +1414,13 @@ def int_hexagon_A2_vabswsat :
Hexagon_i64_i64_Intrinsic<"HEXAGON_A2_vabswsat">;
def int_hexagon_S2_asr_i_r :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_asr_i_r">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_asr_i_r", [ImmArg<1>]>;
def int_hexagon_S2_asr_i_p :
-Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asr_i_p">;
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asr_i_p", [ImmArg<1>]>;
def int_hexagon_A4_combineri :
-Hexagon_i64_i32i32_Intrinsic<"HEXAGON_A4_combineri">;
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_A4_combineri", [ImmArg<1>]>;
def int_hexagon_M2_mpy_nac_sat_hl_s1 :
Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_nac_sat_hl_s1">;
@@ -1450,7 +1471,7 @@ def int_hexagon_A2_maxup :
Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_maxup">;
def int_hexagon_A4_vcmphgti :
-Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmphgti">;
+Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmphgti", [ImmArg<1>]>;
def int_hexagon_S2_interleave :
Hexagon_i64_i64_Intrinsic<"HEXAGON_S2_interleave">;
@@ -1471,10 +1492,10 @@ def int_hexagon_C2_cmpgtp :
Hexagon_i32_i64i64_Intrinsic<"HEXAGON_C2_cmpgtp">;
def int_hexagon_A4_cmphgtui :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cmphgtui">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cmphgtui", [ImmArg<1>]>;
def int_hexagon_C2_cmpgti :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_cmpgti">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_cmpgti", [ImmArg<1>]>;
def int_hexagon_M2_mpyi :
Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpyi">;
@@ -1492,16 +1513,16 @@ def int_hexagon_M2_mpy_lh_s0 :
Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_lh_s0">;
def int_hexagon_S2_lsr_i_r_xacc :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_lsr_i_r_xacc">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_lsr_i_r_xacc", [ImmArg<2>]>;
def int_hexagon_S2_vrcnegh :
Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_vrcnegh">;
def int_hexagon_S2_extractup :
-Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_S2_extractup">;
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_S2_extractup", [ImmArg<1>, ImmArg<2>]>;
def int_hexagon_S2_asr_i_p_rnd_goodsyntax :
-Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asr_i_p_rnd_goodsyntax">;
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asr_i_p_rnd_goodsyntax", [ImmArg<1>]>;
def int_hexagon_S4_ntstbit_r :
Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S4_ntstbit_r">;
@@ -1528,10 +1549,10 @@ def int_hexagon_S2_asr_r_r_and :
Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asr_r_r_and">;
def int_hexagon_A4_rcmpneqi :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_rcmpneqi">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_rcmpneqi", [ImmArg<1>]>;
def int_hexagon_S2_asl_i_r_nac :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asl_i_r_nac">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asl_i_r_nac", [ImmArg<2>]>;
def int_hexagon_M2_subacc :
Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_subacc">;
@@ -1546,10 +1567,10 @@ def int_hexagon_M2_mpy_acc_sat_lh_s1 :
Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_acc_sat_lh_s1">;
def int_hexagon_S2_asr_i_vh :
-Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asr_i_vh">;
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asr_i_vh", [ImmArg<1>]>;
def int_hexagon_S2_asr_i_vw :
-Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asr_i_vw">;
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asr_i_vw", [ImmArg<1>]>;
def int_hexagon_A4_cmpbgtu :
Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cmpbgtu">;
@@ -1558,7 +1579,7 @@ def int_hexagon_A4_vcmpbeq_any :
Hexagon_i32_i64i64_Intrinsic<"HEXAGON_A4_vcmpbeq_any">;
def int_hexagon_A4_cmpbgti :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cmpbgti">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cmpbgti", [ImmArg<1>]>;
def int_hexagon_M2_mpyd_lh_s1 :
Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_mpyd_lh_s1">;
@@ -1567,7 +1588,7 @@ def int_hexagon_S2_asl_r_p_nac :
Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asl_r_p_nac">;
def int_hexagon_S2_lsr_i_r_nac :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_lsr_i_r_nac">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_lsr_i_r_nac", [ImmArg<2>]>;
def int_hexagon_A2_addsp :
Hexagon_i64_i32i64_Intrinsic<"HEXAGON_A2_addsp">;
@@ -1576,7 +1597,7 @@ def int_hexagon_S4_vxsubaddw :
Hexagon_i64_i64i64_Intrinsic<"HEXAGON_S4_vxsubaddw">;
def int_hexagon_A4_vcmpheqi :
-Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmpheqi">;
+Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmpheqi", [ImmArg<1>]>;
def int_hexagon_S4_vxsubaddh :
Hexagon_i64_i64i64_Intrinsic<"HEXAGON_S4_vxsubaddh">;
@@ -1603,16 +1624,16 @@ def int_hexagon_A2_pxorf :
Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_A2_pxorf">;
def int_hexagon_C2_cmpgei :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_cmpgei">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_cmpgei", [ImmArg<1>]>;
def int_hexagon_A2_vsubub :
Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vsubub">;
def int_hexagon_S2_asl_i_p :
-Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asl_i_p">;
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asl_i_p", [ImmArg<1>]>;
def int_hexagon_S2_asl_i_r :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_asl_i_r">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_asl_i_r", [ImmArg<1>]>;
def int_hexagon_A4_vrminuw :
Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_A4_vrminuw">;
@@ -1642,10 +1663,10 @@ def int_hexagon_C2_bitsset :
Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_bitsset">;
def int_hexagon_M2_mpysip :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpysip">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpysip", [ImmArg<1>]>;
def int_hexagon_M2_mpysin :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpysin">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpysin", [ImmArg<1>]>;
def int_hexagon_A4_boundscheck :
Hexagon_i32_i32i64_Intrinsic<"HEXAGON_A4_boundscheck">;
@@ -1684,10 +1705,10 @@ def int_hexagon_A2_vnavgw :
Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vnavgw">;
def int_hexagon_S2_asl_i_r_acc :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asl_i_r_acc">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asl_i_r_acc", [ImmArg<2>]>;
def int_hexagon_S4_subi_lsr_ri :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_subi_lsr_ri">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_subi_lsr_ri", [ImmArg<0>, ImmArg<2>]>;
def int_hexagon_S2_vzxthw :
Hexagon_i64_i32_Intrinsic<"HEXAGON_S2_vzxthw">;
@@ -1714,7 +1735,7 @@ def int_hexagon_S2_packhl :
Hexagon_i64_i32i32_Intrinsic<"HEXAGON_S2_packhl">;
def int_hexagon_A4_vcmpwgti :
-Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmpwgti">;
+Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmpwgti", [ImmArg<1>]>;
def int_hexagon_A2_vavguwr :
Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vavguwr">;
@@ -1735,7 +1756,7 @@ def int_hexagon_F2_conv_d2df :
Hexagon_double_i64_Intrinsic<"HEXAGON_F2_conv_d2df">;
def int_hexagon_C2_cmpgtui :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_cmpgtui">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_cmpgtui", [ImmArg<1>]>;
def int_hexagon_A2_vconj :
Hexagon_i64_i64_Intrinsic<"HEXAGON_A2_vconj">;
@@ -1765,7 +1786,7 @@ def int_hexagon_S2_togglebit_r :
Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_togglebit_r">;
def int_hexagon_S2_togglebit_i :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_togglebit_i">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_togglebit_i", [ImmArg<1>]>;
def int_hexagon_F2_conv_uw2sf :
Hexagon_float_i32_Intrinsic<"HEXAGON_F2_conv_uw2sf">;
@@ -1801,10 +1822,10 @@ def int_hexagon_S2_asl_r_r_nac :
Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asl_r_r_nac">;
def int_hexagon_S2_asl_i_p_acc :
-Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asl_i_p_acc">;
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asl_i_p_acc", [ImmArg<2>]>;
def int_hexagon_A4_vcmpwgtui :
-Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmpwgtui">;
+Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmpwgtui", [ImmArg<1>]>;
def int_hexagon_M4_vrmpyoh_acc_s0 :
Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M4_vrmpyoh_acc_s0">;
@@ -1831,7 +1852,7 @@ def int_hexagon_A2_vavgwcr :
Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vavgwcr">;
def int_hexagon_S2_asl_i_p_xacc :
-Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asl_i_p_xacc">;
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asl_i_p_xacc", [ImmArg<2>]>;
def int_hexagon_A4_vrmaxw :
Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_A4_vrmaxw">;
@@ -1843,22 +1864,22 @@ def int_hexagon_M4_cmpyi_wh :
Hexagon_i32_i64i32_Intrinsic<"HEXAGON_M4_cmpyi_wh">;
def int_hexagon_A2_tfrsi :
-Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_tfrsi">;
+Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_tfrsi", [ImmArg<0>]>;
def int_hexagon_S2_asr_i_r_acc :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asr_i_r_acc">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asr_i_r_acc", [ImmArg<2>]>;
def int_hexagon_A2_svnavgh :
Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_svnavgh">;
def int_hexagon_S2_lsr_i_r :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_lsr_i_r">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_lsr_i_r", [ImmArg<1>]>;
def int_hexagon_M2_vmac2 :
Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_vmac2">;
def int_hexagon_A4_vcmphgtui :
-Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmphgtui">;
+Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmphgtui", [ImmArg<1>]>;
def int_hexagon_A2_svavgh :
Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_svavgh">;
@@ -1870,7 +1891,7 @@ def int_hexagon_M4_vrmpyeh_acc_s1 :
Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M4_vrmpyeh_acc_s1">;
def int_hexagon_S2_lsr_i_p :
-Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_lsr_i_p">;
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_lsr_i_p", [ImmArg<1>]>;
def int_hexagon_A2_combine_hl :
Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_combine_hl">;
@@ -1909,7 +1930,7 @@ def int_hexagon_M2_mmpyul_rs0 :
Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_mmpyul_rs0">;
def int_hexagon_S2_asr_i_r_rnd_goodsyntax :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_asr_i_r_rnd_goodsyntax">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_asr_i_r_rnd_goodsyntax", [ImmArg<1>]>;
def int_hexagon_S2_lsr_r_p_nac :
Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_lsr_r_p_nac">;
@@ -1924,10 +1945,10 @@ def int_hexagon_M4_or_and :
Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_or_and">;
def int_hexagon_M4_mpyrr_addi :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_mpyrr_addi">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_mpyrr_addi", [ImmArg<0>]>;
def int_hexagon_S4_or_andi :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_or_andi">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_or_andi", [ImmArg<2>]>;
def int_hexagon_M2_mpy_sat_hl_s0 :
Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_sat_hl_s0">;
@@ -2032,7 +2053,7 @@ def int_hexagon_F2_sffms_lib :
Hexagon_float_floatfloatfloat_Intrinsic<"HEXAGON_F2_sffms_lib">;
def int_hexagon_C4_cmpneqi :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C4_cmpneqi">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C4_cmpneqi", [ImmArg<1>]>;
def int_hexagon_M4_and_xor :
Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_and_xor">;
@@ -2056,7 +2077,7 @@ def int_hexagon_A2_vrsadub_acc :
Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_A2_vrsadub_acc">;
def int_hexagon_C2_bitsclri :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_bitsclri">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_bitsclri", [ImmArg<1>]>;
def int_hexagon_A2_subh_h16_sat_hh :
Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_subh_h16_sat_hh">;
@@ -2158,10 +2179,10 @@ def int_hexagon_S2_parityp :
Hexagon_i32_i64i64_Intrinsic<"HEXAGON_S2_parityp">;
def int_hexagon_S2_lsr_i_p_and :
-Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_lsr_i_p_and">;
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_lsr_i_p_and", [ImmArg<2>]>;
def int_hexagon_S2_asr_i_r_or :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asr_i_r_or">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asr_i_r_or", [ImmArg<2>]>;
def int_hexagon_M2_mpyu_nac_ll_s0 :
Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpyu_nac_ll_s0">;
@@ -2191,7 +2212,7 @@ def int_hexagon_M2_cnacsc_s0 :
Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_cnacsc_s0">;
def int_hexagon_S4_subaddi :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_subaddi">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_subaddi", [ImmArg<1>]>;
def int_hexagon_M2_mpyud_nac_hl_s1 :
Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyud_nac_hl_s1">;
@@ -2200,13 +2221,13 @@ def int_hexagon_M2_mpyud_nac_hl_s0 :
Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyud_nac_hl_s0">;
def int_hexagon_S5_vasrhrnd_goodsyntax :
-Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S5_vasrhrnd_goodsyntax">;
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S5_vasrhrnd_goodsyntax", [ImmArg<1>]>;
def int_hexagon_S2_tstbit_r :
Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_tstbit_r">;
def int_hexagon_S4_vrcrotate :
-Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_S4_vrcrotate">;
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_S4_vrcrotate", [ImmArg<2>]>;
def int_hexagon_M2_mmachs_s1 :
Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M2_mmachs_s1">;
@@ -2215,7 +2236,7 @@ def int_hexagon_M2_mmachs_s0 :
Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M2_mmachs_s0">;
def int_hexagon_S2_tstbit_i :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_tstbit_i">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_tstbit_i", [ImmArg<1>]>;
def int_hexagon_M2_mpy_up_s1 :
Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_up_s1">;
@@ -2227,7 +2248,7 @@ def int_hexagon_M2_mmpyuh_rs0 :
Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_mmpyuh_rs0">;
def int_hexagon_S2_lsr_i_vw :
-Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_lsr_i_vw">;
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_lsr_i_vw", [ImmArg<1>]>;
def int_hexagon_M2_mpy_rnd_ll_s0 :
Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_rnd_ll_s0">;
@@ -2266,16 +2287,16 @@ def int_hexagon_A2_subh_l16_sat_hl :
Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_subh_l16_sat_hl">;
def int_hexagon_C2_cmpeqi :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_cmpeqi">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_cmpeqi", [ImmArg<1>]>;
def int_hexagon_S2_asl_i_r_and :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asl_i_r_and">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asl_i_r_and", [ImmArg<2>]>;
def int_hexagon_S2_vcnegh :
Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_vcnegh">;
def int_hexagon_A4_vcmpweqi :
-Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmpweqi">;
+Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmpweqi", [ImmArg<1>]>;
def int_hexagon_M2_vdmpyrs_s0 :
Hexagon_i32_i64i64_Intrinsic<"HEXAGON_M2_vdmpyrs_s0">;
@@ -2308,7 +2329,7 @@ def int_hexagon_S2_cl0p :
Hexagon_i32_i64_Intrinsic<"HEXAGON_S2_cl0p">;
def int_hexagon_S2_valignib :
-Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_valignib">;
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_valignib", [ImmArg<2>]>;
def int_hexagon_F2_sffixupd :
Hexagon_float_floatfloat_Intrinsic<"HEXAGON_F2_sffixupd">;
@@ -2338,7 +2359,7 @@ def int_hexagon_M2_mmpyul_rs1 :
Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_mmpyul_rs1">;
def int_hexagon_S4_ntstbit_i :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S4_ntstbit_i">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S4_ntstbit_i", [ImmArg<1>]> ;
def int_hexagon_F2_sffixupr :
Hexagon_float_float_Intrinsic<"HEXAGON_F2_sffixupr">;
@@ -2362,7 +2383,7 @@ def int_hexagon_M2_vmpy2s_s0pack :
Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_vmpy2s_s0pack">;
def int_hexagon_S4_addaddi :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_addaddi">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_addaddi", [ImmArg<2>]>;
def int_hexagon_M2_mpyd_acc_ll_s0 :
Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyd_acc_ll_s0">;
@@ -2371,13 +2392,13 @@ def int_hexagon_M2_mpy_acc_sat_hl_s1 :
Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_acc_sat_hl_s1">;
def int_hexagon_A4_rcmpeqi :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_rcmpeqi">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_rcmpeqi", [ImmArg<1>]>;
def int_hexagon_M4_xor_and :
Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_xor_and">;
def int_hexagon_S2_asl_i_p_and :
-Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asl_i_p_and">;
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asl_i_p_and", [ImmArg<2>]>;
def int_hexagon_M2_mmpyuh_rs1 :
Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_mmpyuh_rs1">;
@@ -2386,7 +2407,7 @@ def int_hexagon_S2_asr_r_r_or :
Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asr_r_r_or">;
def int_hexagon_A4_round_ri :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_round_ri">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_round_ri", [ImmArg<1>]>;
def int_hexagon_A2_max :
Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_max">;
@@ -2395,10 +2416,10 @@ def int_hexagon_A4_round_rr :
Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_round_rr">;
def int_hexagon_A4_combineii :
-Hexagon_i64_i32i32_Intrinsic<"HEXAGON_A4_combineii">;
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_A4_combineii", [ImmArg<0>, ImmArg<1>]>;
def int_hexagon_A4_combineir :
-Hexagon_i64_i32i32_Intrinsic<"HEXAGON_A4_combineir">;
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_A4_combineir", [ImmArg<0>]>;
def int_hexagon_C4_and_orn :
Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_C4_and_orn">;
@@ -2413,7 +2434,7 @@ def int_hexagon_M4_cmpyr_whc :
Hexagon_i32_i64i32_Intrinsic<"HEXAGON_M4_cmpyr_whc">;
def int_hexagon_S2_lsr_i_r_acc :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_lsr_i_r_acc">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_lsr_i_r_acc", [ImmArg<2>]>;
def int_hexagon_S2_vzxtbh :
Hexagon_i64_i32_Intrinsic<"HEXAGON_S2_vzxtbh">;
@@ -2440,7 +2461,7 @@ def int_hexagon_S2_asl_r_p_or :
Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asl_r_p_or">;
def int_hexagon_S4_ori_asl_ri :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_ori_asl_ri">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_ori_asl_ri", [ImmArg<0>, ImmArg<2>]>;
def int_hexagon_C4_nbitsset :
Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C4_nbitsset">;
@@ -2476,10 +2497,10 @@ def int_hexagon_M2_mpyd_acc_hh_s1 :
Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyd_acc_hh_s1">;
def int_hexagon_F2_sfimm_p :
-Hexagon_float_i32_Intrinsic<"HEXAGON_F2_sfimm_p">;
+Hexagon_float_i32_Intrinsic<"HEXAGON_F2_sfimm_p", [ImmArg<0>]>;
def int_hexagon_F2_sfimm_n :
-Hexagon_float_i32_Intrinsic<"HEXAGON_F2_sfimm_n">;
+Hexagon_float_i32_Intrinsic<"HEXAGON_F2_sfimm_n", [ImmArg<0>]>;
def int_hexagon_M4_cmpyr_wh :
Hexagon_i32_i64i32_Intrinsic<"HEXAGON_M4_cmpyr_wh">;
@@ -2497,7 +2518,7 @@ def int_hexagon_A2_vavguh :
Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vavguh">;
def int_hexagon_A4_cmpbeqi :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cmpbeqi">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cmpbeqi", [ImmArg<1>]>;
def int_hexagon_F2_sfcmpuo :
Hexagon_i32_floatfloat_Intrinsic<"HEXAGON_F2_sfcmpuo">;
@@ -2506,7 +2527,7 @@ def int_hexagon_A2_vavguw :
Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vavguw">;
def int_hexagon_S2_asr_i_p_nac :
-Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asr_i_p_nac">;
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asr_i_p_nac", [ImmArg<2>]>;
def int_hexagon_S2_vsatwh_nopack :
Hexagon_i64_i64_Intrinsic<"HEXAGON_S2_vsatwh_nopack">;
@@ -2533,7 +2554,7 @@ def int_hexagon_A2_minp :
Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_minp">;
def int_hexagon_S4_or_andix :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_or_andix">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_or_andix", [ImmArg<2>]>;
def int_hexagon_M2_mpy_rnd_lh_s0 :
Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_rnd_lh_s0">;
@@ -2584,19 +2605,19 @@ def int_hexagon_S2_lsl_r_r_or :
Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_lsl_r_r_or">;
def int_hexagon_C4_cmplteui :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C4_cmplteui">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C4_cmplteui", [ImmArg<1>]>;
def int_hexagon_S4_addi_lsr_ri :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_addi_lsr_ri">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_addi_lsr_ri", [ImmArg<0>, ImmArg<2>]>;
def int_hexagon_A4_tfrcpp :
Hexagon_i64_i64_Intrinsic<"HEXAGON_A4_tfrcpp">;
def int_hexagon_S2_asr_i_svw_trun :
-Hexagon_i32_i64i32_Intrinsic<"HEXAGON_S2_asr_i_svw_trun">;
+Hexagon_i32_i64i32_Intrinsic<"HEXAGON_S2_asr_i_svw_trun", [ImmArg<1>]>;
def int_hexagon_A4_cmphgti :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cmphgti">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cmphgti", [ImmArg<1>]>;
def int_hexagon_A4_vrminh :
Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_A4_vrminh">;
@@ -2614,7 +2635,7 @@ def int_hexagon_A2_vnavghcr :
Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vnavghcr">;
def int_hexagon_S4_subi_asl_ri :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_subi_asl_ri">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_subi_asl_ri", [ImmArg<0>, ImmArg<2>]>;
def int_hexagon_S2_lsl_r_vh :
Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_lsl_r_vh">;
@@ -2638,7 +2659,7 @@ def int_hexagon_C2_cmpltu :
Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_cmpltu">;
def int_hexagon_S2_insertp :
-Hexagon_i64_i64i64i32i32_Intrinsic<"HEXAGON_S2_insertp">;
+Hexagon_i64_i64i64i32i32_Intrinsic<"HEXAGON_S2_insertp", [ImmArg<2>, ImmArg<3>]>;
def int_hexagon_M2_mpyd_rnd_ll_s1 :
Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_mpyd_rnd_ll_s1">;
@@ -2647,7 +2668,7 @@ def int_hexagon_M2_mpyd_rnd_ll_s0 :
Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_mpyd_rnd_ll_s0">;
def int_hexagon_S2_lsr_i_p_nac :
-Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_lsr_i_p_nac">;
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_lsr_i_p_nac", [ImmArg<2>]>;
def int_hexagon_S2_extractup_rp :
Hexagon_i64_i64i64_Intrinsic<"HEXAGON_S2_extractup_rp">;
@@ -2749,7 +2770,7 @@ def int_hexagon_M2_dpmpyss_rnd_s0 :
Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_dpmpyss_rnd_s0">;
def int_hexagon_C2_muxri :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_C2_muxri">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_C2_muxri", [ImmArg<1>]>;
def int_hexagon_M2_vmac2es_s0 :
Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M2_vmac2es_s0">;
@@ -2767,7 +2788,7 @@ def int_hexagon_M2_mpyu_lh_s0 :
Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpyu_lh_s0">;
def int_hexagon_S2_asl_i_r_or :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asl_i_r_or">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asl_i_r_or", [ImmArg<2>]>;
def int_hexagon_M2_mpyd_acc_hl_s0 :
Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyd_acc_hl_s0">;
@@ -2782,7 +2803,7 @@ def int_hexagon_A2_vaddw :
Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vaddw">;
def int_hexagon_S2_asr_i_r_and :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asr_i_r_and">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asr_i_r_and", [ImmArg<2>]>;
def int_hexagon_A2_vaddh :
Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vaddh">;
@@ -2797,22 +2818,22 @@ def int_hexagon_C2_cmpeqp :
Hexagon_i32_i64i64_Intrinsic<"HEXAGON_C2_cmpeqp">;
def int_hexagon_M4_mpyri_addi :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_mpyri_addi">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_mpyri_addi", [ImmArg<0>, ImmArg<2>]>;
def int_hexagon_A2_not :
Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_not">;
def int_hexagon_S4_andi_lsr_ri :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_andi_lsr_ri">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_andi_lsr_ri", [ImmArg<0>, ImmArg<2>]>;
def int_hexagon_M2_macsip :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_macsip">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_macsip", [ImmArg<2>]>;
def int_hexagon_A2_tfrcrr :
Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_tfrcrr">;
def int_hexagon_M2_macsin :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_macsin">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_macsin", [ImmArg<2>]>;
def int_hexagon_C2_orn :
Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_orn">;
@@ -2875,7 +2896,7 @@ def int_hexagon_F2_dfcmpge :
Hexagon_i32_doubledouble_Intrinsic<"HEXAGON_F2_dfcmpge">;
def int_hexagon_M2_accii :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_accii">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_accii", [ImmArg<2>]>;
def int_hexagon_A5_vaddhubs :
Hexagon_i32_i64i64_Intrinsic<"HEXAGON_A5_vaddhubs">;
@@ -2893,10 +2914,10 @@ def int_hexagon_S2_vsxthw :
Hexagon_i64_i32_Intrinsic<"HEXAGON_S2_vsxthw">;
def int_hexagon_S4_andi_asl_ri :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_andi_asl_ri">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_andi_asl_ri", [ImmArg<0>, ImmArg<2>]>;
def int_hexagon_S2_asl_i_p_nac :
-Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asl_i_p_nac">;
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asl_i_p_nac", [ImmArg<2>]>;
def int_hexagon_S2_lsl_r_p_xor :
Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_lsl_r_p_xor">;
@@ -2929,7 +2950,7 @@ def int_hexagon_M4_xor_andn :
Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_xor_andn">;
def int_hexagon_S2_addasl_rrri :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_addasl_rrri">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_addasl_rrri", [ImmArg<2>]>;
def int_hexagon_M5_vdmpybsu :
Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M5_vdmpybsu">;
@@ -2941,7 +2962,7 @@ def int_hexagon_M2_mpyu_nac_hh_s1 :
Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpyu_nac_hh_s1">;
def int_hexagon_A2_addi :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_addi">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_addi", [ImmArg<1>]>;
def int_hexagon_A2_addp :
Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_addp">;
@@ -2962,7 +2983,7 @@ def int_hexagon_S2_shuffeh :
Hexagon_i64_i64i64_Intrinsic<"HEXAGON_S2_shuffeh">;
def int_hexagon_S2_lsr_i_r_and :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_lsr_i_r_and">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_lsr_i_r_and", [ImmArg<2>]>;
def int_hexagon_M2_mpy_sat_rnd_hh_s1 :
Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_sat_rnd_hh_s1">;
@@ -3064,13 +3085,13 @@ def int_hexagon_S5_popcountp :
Hexagon_i32_i64_Intrinsic<"HEXAGON_S5_popcountp">;
def int_hexagon_S4_extractp :
-Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_S4_extractp">;
+Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_S4_extractp", [ImmArg<1>, ImmArg<2>]>;
def int_hexagon_S2_cl0 :
Hexagon_i32_i32_Intrinsic<"HEXAGON_S2_cl0">;
def int_hexagon_A4_vcmpbgti :
-Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmpbgti">;
+Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmpbgti", [ImmArg<1>]>;
def int_hexagon_M2_mmacls_s1 :
Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M2_mmacls_s1">;
@@ -3118,7 +3139,7 @@ def int_hexagon_A2_vmaxuh :
Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vmaxuh">;
def int_hexagon_A4_bitspliti :
-Hexagon_i64_i32i32_Intrinsic<"HEXAGON_A4_bitspliti">;
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_A4_bitspliti", [ImmArg<1>]>;
def int_hexagon_A2_vmaxub :
Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vmaxub">;
@@ -3145,13 +3166,13 @@ def int_hexagon_S2_asr_r_r_nac :
Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asr_r_r_nac">;
def int_hexagon_F2_dfimm_n :
-Hexagon_double_i32_Intrinsic<"HEXAGON_F2_dfimm_n">;
+Hexagon_double_i32_Intrinsic<"HEXAGON_F2_dfimm_n", [ImmArg<0>]>;
def int_hexagon_A4_cmphgt :
Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cmphgt">;
def int_hexagon_F2_dfimm_p :
-Hexagon_double_i32_Intrinsic<"HEXAGON_F2_dfimm_p">;
+Hexagon_double_i32_Intrinsic<"HEXAGON_F2_dfimm_p", [ImmArg<0>]>;
def int_hexagon_M2_mpyud_acc_lh_s1 :
Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyud_acc_lh_s1">;
@@ -3160,7 +3181,7 @@ def int_hexagon_M2_vcmpy_s1_sat_r :
Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_vcmpy_s1_sat_r">;
def int_hexagon_M4_mpyri_addr_u2 :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_mpyri_addr_u2">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_mpyri_addr_u2", [ImmArg<1>]>;
def int_hexagon_M2_vcmpy_s1_sat_i :
Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_vcmpy_s1_sat_i">;
@@ -3172,10 +3193,10 @@ def int_hexagon_M5_vrmacbuu :
Hexagon_i64_i64i64i64_Intrinsic<"HEXAGON_M5_vrmacbuu">;
def int_hexagon_S5_asrhub_rnd_sat_goodsyntax :
-Hexagon_i32_i64i32_Intrinsic<"HEXAGON_S5_asrhub_rnd_sat_goodsyntax">;
+Hexagon_i32_i64i32_Intrinsic<"HEXAGON_S5_asrhub_rnd_sat_goodsyntax", [ImmArg<1>]>;
def int_hexagon_S2_vspliceib :
-Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_vspliceib">;
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_vspliceib", [ImmArg<2>]>;
def int_hexagon_M2_dpmpyss_acc_s0 :
Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_dpmpyss_acc_s0">;
@@ -3193,25 +3214,25 @@ def int_hexagon_A2_maxp :
Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_maxp">;
def int_hexagon_A2_andir :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_andir">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_andir", [ImmArg<1>]>;
def int_hexagon_F2_sfrecipa :
Hexagon_floati32_floatfloat_Intrinsic<"HEXAGON_F2_sfrecipa">;
def int_hexagon_A2_combineii :
-Hexagon_i64_i32i32_Intrinsic<"HEXAGON_A2_combineii">;
+Hexagon_i64_i32i32_Intrinsic<"HEXAGON_A2_combineii", [ImmArg<0>, ImmArg<1>]>;
def int_hexagon_A4_orn :
Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_orn">;
def int_hexagon_A4_cmpbgtui :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cmpbgtui">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cmpbgtui", [ImmArg<1>]>;
def int_hexagon_S2_lsr_r_r_or :
Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_lsr_r_r_or">;
def int_hexagon_A4_vcmpbeqi :
-Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmpbeqi">;
+Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmpbeqi", [ImmArg<1>]>;
def int_hexagon_S2_lsl_r_r :
Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_lsl_r_r">;
@@ -3247,19 +3268,19 @@ def int_hexagon_M2_vrcmpys_s1 :
Hexagon_i64_i64i32_Intrinsic<"HEXAGON_M2_vrcmpys_s1">;
def int_hexagon_S4_or_ori :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_or_ori">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_or_ori", [ImmArg<2>]>;
def int_hexagon_C4_fastcorner9_not :
Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C4_fastcorner9_not">;
def int_hexagon_A2_tfrih :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_tfrih">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_tfrih", [ImmArg<1>]>;
def int_hexagon_A2_tfril :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_tfril">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_tfril", [ImmArg<1>]>;
def int_hexagon_M4_mpyri_addr :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_mpyri_addr">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_mpyri_addr", [ImmArg<2>]>;
def int_hexagon_S2_vtrunehb :
Hexagon_i32_i64_Intrinsic<"HEXAGON_S2_vtrunehb">;
@@ -3274,16 +3295,16 @@ def int_hexagon_F2_sfsub :
Hexagon_float_floatfloat_Intrinsic<"HEXAGON_F2_sfsub">;
def int_hexagon_C2_muxii :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_C2_muxii">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_C2_muxii", [ImmArg<1>, ImmArg<2>]>;
def int_hexagon_C2_muxir :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_C2_muxir">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_C2_muxir", [ImmArg<2>]>;
def int_hexagon_A2_swiz :
Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_swiz">;
def int_hexagon_S2_asr_i_p_and :
-Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asr_i_p_and">;
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asr_i_p_and", [ImmArg<2>]>;
def int_hexagon_M2_cmpyrsc_s0 :
Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_cmpyrsc_s0">;
@@ -3313,7 +3334,7 @@ def int_hexagon_M2_mpy_nac_sat_ll_s0 :
Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_nac_sat_ll_s0">;
def int_hexagon_S4_extract :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_extract">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_extract", [ImmArg<1>, ImmArg<2>]>;
def int_hexagon_A2_vcmpweq :
Hexagon_i32_i64i64_Intrinsic<"HEXAGON_A2_vcmpweq">;
@@ -3322,10 +3343,10 @@ def int_hexagon_M2_acci :
Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_acci">;
def int_hexagon_S2_lsr_i_p_acc :
-Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_lsr_i_p_acc">;
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_lsr_i_p_acc", [ImmArg<2>]>;
def int_hexagon_S2_lsr_i_p_or :
-Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_lsr_i_p_or">;
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_lsr_i_p_or", [ImmArg<2>]>;
def int_hexagon_F2_conv_ud2sf :
Hexagon_float_i64_Intrinsic<"HEXAGON_F2_conv_ud2sf">;
@@ -3334,10 +3355,10 @@ def int_hexagon_A2_tfr :
Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_tfr">;
def int_hexagon_S2_asr_i_p_or :
-Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asr_i_p_or">;
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asr_i_p_or", [ImmArg<2>]>;
def int_hexagon_A2_subri :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_subri">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_subri", [ImmArg<0>]>;
def int_hexagon_A4_vrmaxuw :
Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_A4_vrmaxuw">;
@@ -3349,7 +3370,7 @@ def int_hexagon_A4_vrmaxuh :
Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_A4_vrmaxuh">;
def int_hexagon_S2_asl_i_vw :
-Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asl_i_vw">;
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asl_i_vw", [ImmArg<1>]>;
def int_hexagon_A2_vavgw :
Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vavgw">;
@@ -3361,13 +3382,13 @@ def int_hexagon_A2_vavgh :
Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vavgh">;
def int_hexagon_S2_clrbit_i :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_clrbit_i">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_clrbit_i", [ImmArg<1>]>;
def int_hexagon_S2_asl_i_vh :
-Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asl_i_vh">;
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asl_i_vh", [ImmArg<1>]>;
def int_hexagon_S2_lsr_i_r_or :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_lsr_i_r_or">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_lsr_i_r_or", [ImmArg<2>]>;
def int_hexagon_S2_lsl_r_r_nac :
Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_lsl_r_r_nac">;
@@ -3385,7 +3406,7 @@ def int_hexagon_M2_mmpyl_s1 :
Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_mmpyl_s1">;
def int_hexagon_M2_naccii :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_naccii">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_naccii", [ImmArg<2>]>;
def int_hexagon_S2_vrndpackwhs :
Hexagon_i32_i64_Intrinsic<"HEXAGON_S2_vrndpackwhs">;
@@ -3406,7 +3427,7 @@ def int_hexagon_M4_mac_up_s1_sat :
Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M4_mac_up_s1_sat">;
def int_hexagon_S4_vrcrotate_acc :
-Hexagon_i64_i64i64i32i32_Intrinsic<"HEXAGON_S4_vrcrotate_acc">;
+Hexagon_i64_i64i64i32i32_Intrinsic<"HEXAGON_S4_vrcrotate_acc", [ImmArg<3>]>;
def int_hexagon_F2_conv_uw2df :
Hexagon_double_i32_Intrinsic<"HEXAGON_F2_conv_uw2df">;
@@ -3418,7 +3439,7 @@ def int_hexagon_S2_asr_r_r_acc :
Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asr_r_r_acc">;
def int_hexagon_A2_orir :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_orir">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_orir", [ImmArg<1>]>;
def int_hexagon_A2_andp :
Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_andp">;
@@ -3430,7 +3451,7 @@ def int_hexagon_A2_min :
Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_min">;
def int_hexagon_M2_mpysmi :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpysmi">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpysmi", [ImmArg<1>]>;
def int_hexagon_M2_vcmpy_s0_sat_r :
Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_vcmpy_s0_sat_r">;
@@ -3466,10 +3487,10 @@ def int_hexagon_F2_conv_df2w :
Hexagon_i32_double_Intrinsic<"HEXAGON_F2_conv_df2w">;
def int_hexagon_S5_asrhub_sat :
-Hexagon_i32_i64i32_Intrinsic<"HEXAGON_S5_asrhub_sat">;
+Hexagon_i32_i64i32_Intrinsic<"HEXAGON_S5_asrhub_sat", [ImmArg<1>]>;
def int_hexagon_S2_asl_i_r_xacc :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asl_i_r_xacc">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asl_i_r_xacc", [ImmArg<2>]>;
def int_hexagon_F2_conv_df2d :
Hexagon_i64_double_Intrinsic<"HEXAGON_F2_conv_df2d">;
@@ -3505,7 +3526,7 @@ def int_hexagon_F2_sffma_sc :
Hexagon_float_floatfloatfloati32_Intrinsic<"HEXAGON_F2_sffma_sc">;
def int_hexagon_F2_dfclass :
-Hexagon_i32_doublei32_Intrinsic<"HEXAGON_F2_dfclass">;
+Hexagon_i32_doublei32_Intrinsic<"HEXAGON_F2_dfclass", [ImmArg<1>]>;
def int_hexagon_F2_conv_df2ud :
Hexagon_i64_double_Intrinsic<"HEXAGON_F2_conv_df2ud">;
@@ -3520,7 +3541,7 @@ def int_hexagon_M2_cmpyrs_s1 :
Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_cmpyrs_s1">;
def int_hexagon_C4_cmpltei :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C4_cmpltei">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C4_cmpltei", [ImmArg<1>]>;
def int_hexagon_C4_cmplteu :
Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C4_cmplteu">;
@@ -3532,7 +3553,7 @@ def int_hexagon_A2_subh_l16_ll :
Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_subh_l16_ll">;
def int_hexagon_S2_asr_i_r_rnd :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_asr_i_r_rnd">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_asr_i_r_rnd", [ImmArg<1>]>;
def int_hexagon_M2_vrmpy_s0 :
Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_vrmpy_s0">;
@@ -3577,7 +3598,7 @@ def int_hexagon_M2_vrcmpyi_s0c :
Hexagon_i64_i64i64_Intrinsic<"HEXAGON_M2_vrcmpyi_s0c">;
def int_hexagon_S2_asr_i_p_rnd :
-Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asr_i_p_rnd">;
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asr_i_p_rnd", [ImmArg<1>]>;
def int_hexagon_A2_addpsat :
Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_addpsat">;
@@ -3586,7 +3607,7 @@ def int_hexagon_A2_svaddhs :
Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_svaddhs">;
def int_hexagon_S4_ori_lsr_ri :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_ori_lsr_ri">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_ori_lsr_ri", [ImmArg<0>, ImmArg<2>]>;
def int_hexagon_M2_mpy_sat_rnd_ll_s1 :
Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpy_sat_rnd_ll_s1">;
@@ -3619,7 +3640,7 @@ def int_hexagon_S2_asl_r_r_or :
Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asl_r_r_or">;
def int_hexagon_S4_lsli :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S4_lsli">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S4_lsli", [ImmArg<0>]>;
def int_hexagon_S2_lsl_r_vw :
Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_lsl_r_vw">;
@@ -3664,7 +3685,7 @@ def int_hexagon_A2_negp :
Hexagon_i64_i64_Intrinsic<"HEXAGON_A2_negp">;
def int_hexagon_S2_asl_i_r_sat :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_asl_i_r_sat">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_asl_i_r_sat", [ImmArg<1>]>;
def int_hexagon_A2_addh_l16_sat_hl :
Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_addh_l16_sat_hl">;
@@ -3682,10 +3703,10 @@ def int_hexagon_C2_cmpgtup :
Hexagon_i32_i64i64_Intrinsic<"HEXAGON_C2_cmpgtup">;
def int_hexagon_A4_cround_ri :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cround_ri">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cround_ri", [ImmArg<1>]>;
def int_hexagon_S4_clbpaddi :
-Hexagon_i32_i64i32_Intrinsic<"HEXAGON_S4_clbpaddi">;
+Hexagon_i32_i64i32_Intrinsic<"HEXAGON_S4_clbpaddi", [ImmArg<1>]>;
def int_hexagon_A4_cround_rr :
Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cround_rr">;
@@ -3715,13 +3736,13 @@ def int_hexagon_A2_vminub :
Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vminub">;
def int_hexagon_S2_extractu :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_extractu">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_extractu", [ImmArg<1>, ImmArg<2>]>;
def int_hexagon_A2_svsubh :
Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_svsubh">;
def int_hexagon_S4_clbaddi :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S4_clbaddi">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S4_clbaddi", [ImmArg<1>]>;
def int_hexagon_F2_sffms :
Hexagon_float_floatfloatfloat_Intrinsic<"HEXAGON_F2_sffms">;
@@ -3754,7 +3775,7 @@ def int_hexagon_M2_mpy_acc_hh_s0 :
Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_acc_hh_s0">;
def int_hexagon_S4_addi_asl_ri :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_addi_asl_ri">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S4_addi_asl_ri", [ImmArg<0>, ImmArg<2>]>;
def int_hexagon_M2_mpyd_nac_hh_s1 :
Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyd_nac_hh_s1">;
@@ -3763,10 +3784,10 @@ def int_hexagon_M2_mpyd_nac_hh_s0 :
Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyd_nac_hh_s0">;
def int_hexagon_S2_asr_i_r_nac :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asr_i_r_nac">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asr_i_r_nac", [ImmArg<2>]>;
def int_hexagon_A4_cmpheqi :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cmpheqi">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_cmpheqi", [ImmArg<1>]>;
def int_hexagon_S2_lsr_r_p_xor :
Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_lsr_r_p_xor">;
@@ -3781,7 +3802,7 @@ def int_hexagon_F2_conv_sf2ud_chop :
Hexagon_i64_float_Intrinsic<"HEXAGON_F2_conv_sf2ud_chop">;
def int_hexagon_C2_cmpgeui :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_cmpgeui">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C2_cmpgeui", [ImmArg<1>]>;
def int_hexagon_M2_mpy_acc_sat_hh_s0 :
Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_acc_sat_hh_s0">;
@@ -3808,7 +3829,7 @@ def int_hexagon_M2_mpyud_nac_lh_s0 :
Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyud_nac_lh_s0">;
def int_hexagon_A4_round_ri_sat :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_round_ri_sat">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_round_ri_sat", [ImmArg<1>]>;
def int_hexagon_M2_mpy_nac_hl_s0 :
Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mpy_nac_hl_s0">;
@@ -3829,10 +3850,10 @@ def int_hexagon_M2_cmaci_s0 :
Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_cmaci_s0">;
def int_hexagon_S2_setbit_i :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_setbit_i">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_setbit_i", [ImmArg<1>]>;
def int_hexagon_S2_asl_i_p_or :
-Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asl_i_p_or">;
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asl_i_p_or", [ImmArg<2>]>;
def int_hexagon_A4_andn :
Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_andn">;
@@ -3856,13 +3877,13 @@ def int_hexagon_M2_xor_xacc :
Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_xor_xacc">;
def int_hexagon_A4_vcmpbgtui :
-Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmpbgtui">;
+Hexagon_i32_i64i32_Intrinsic<"HEXAGON_A4_vcmpbgtui", [ImmArg<1>]>;
def int_hexagon_A4_ornp :
Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A4_ornp">;
def int_hexagon_A2_tfrpi :
-Hexagon_i64_i32_Intrinsic<"HEXAGON_A2_tfrpi">;
+Hexagon_i64_i32_Intrinsic<"HEXAGON_A2_tfrpi", [ImmArg<0>]>;
def int_hexagon_C4_and_or :
Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_C4_and_or">;
@@ -3886,16 +3907,16 @@ def int_hexagon_M2_vmpy2su_s0 :
Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_vmpy2su_s0">;
def int_hexagon_S2_asr_i_p_acc :
-Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asr_i_p_acc">;
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asr_i_p_acc", [ImmArg<2>]>;
def int_hexagon_C4_nbitsclri :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C4_nbitsclri">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_C4_nbitsclri", [ImmArg<1>]>;
def int_hexagon_S2_lsr_i_vh :
-Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_lsr_i_vh">;
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_lsr_i_vh", [ImmArg<1>]>;
def int_hexagon_S2_lsr_i_p_xacc :
-Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_lsr_i_p_xacc">;
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_lsr_i_p_xacc", [ImmArg<2>]>;
// V55 Scalar Instructions.
@@ -3905,40 +3926,40 @@ Hexagon_i64i32_i64i64i64_Intrinsic<"HEXAGON_A5_ACS">;
// V60 Scalar Instructions.
def int_hexagon_S6_rol_i_p_and :
-Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S6_rol_i_p_and">;
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S6_rol_i_p_and", [ImmArg<2>]>;
def int_hexagon_S6_rol_i_r_xacc :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S6_rol_i_r_xacc">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S6_rol_i_r_xacc", [ImmArg<2>]>;
def int_hexagon_S6_rol_i_r_and :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S6_rol_i_r_and">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S6_rol_i_r_and", [ImmArg<2>]>;
def int_hexagon_S6_rol_i_r_acc :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S6_rol_i_r_acc">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S6_rol_i_r_acc", [ImmArg<2>]>;
def int_hexagon_S6_rol_i_p_xacc :
-Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S6_rol_i_p_xacc">;
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S6_rol_i_p_xacc", [ImmArg<2>]>;
def int_hexagon_S6_rol_i_p :
-Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S6_rol_i_p">;
+Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S6_rol_i_p", [ImmArg<1>]>;
def int_hexagon_S6_rol_i_p_nac :
-Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S6_rol_i_p_nac">;
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S6_rol_i_p_nac", [ImmArg<2>]>;
def int_hexagon_S6_rol_i_p_acc :
-Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S6_rol_i_p_acc">;
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S6_rol_i_p_acc", [ImmArg<2>]>;
def int_hexagon_S6_rol_i_r_or :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S6_rol_i_r_or">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S6_rol_i_r_or", [ImmArg<2>]>;
def int_hexagon_S6_rol_i_r :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S6_rol_i_r">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S6_rol_i_r", [ImmArg<1>]>;
def int_hexagon_S6_rol_i_r_nac :
-Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S6_rol_i_r_nac">;
+Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S6_rol_i_r_nac", [ImmArg<2>]>;
def int_hexagon_S6_rol_i_p_or :
-Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S6_rol_i_p_or">;
+Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S6_rol_i_p_or", [ImmArg<2>]>;
// V62 Scalar Instructions.
@@ -3980,7 +4001,7 @@ def int_hexagon_M2_mnaci :
Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_mnaci">;
def int_hexagon_S2_mask :
-Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_mask">;
+Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_mask", [ImmArg<0>, ImmArg<1>]>;
// V60 HVX Instructions.
@@ -4021,10 +4042,10 @@ def int_hexagon_V6_vaddh_dv_128B :
Hexagon_v64i32_v64i32v64i32_Intrinsic<"HEXAGON_V6_vaddh_dv_128B">;
def int_hexagon_V6_vrmpybusi :
-Hexagon_v32i32_v32i32i32i32_Intrinsic<"HEXAGON_V6_vrmpybusi">;
+Hexagon_v32i32_v32i32i32i32_Intrinsic<"HEXAGON_V6_vrmpybusi", [ImmArg<2>]>;
def int_hexagon_V6_vrmpybusi_128B :
-Hexagon_v64i32_v64i32i32i32_Intrinsic<"HEXAGON_V6_vrmpybusi_128B">;
+Hexagon_v64i32_v64i32i32i32_Intrinsic<"HEXAGON_V6_vrmpybusi_128B", [ImmArg<2>]>;
def int_hexagon_V6_vshufoh :
Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vshufoh">;
@@ -4045,10 +4066,10 @@ def int_hexagon_V6_vdmpyhsuisat_128B :
Hexagon_v32i32_v64i32i32_Intrinsic<"HEXAGON_V6_vdmpyhsuisat_128B">;
def int_hexagon_V6_vrsadubi_acc :
-Hexagon_v32i32_v32i32v32i32i32i32_Intrinsic<"HEXAGON_V6_vrsadubi_acc">;
+Hexagon_v32i32_v32i32v32i32i32i32_Intrinsic<"HEXAGON_V6_vrsadubi_acc", [ImmArg<3>]>;
def int_hexagon_V6_vrsadubi_acc_128B :
-Hexagon_v64i32_v64i32v64i32i32i32_Intrinsic<"HEXAGON_V6_vrsadubi_acc_128B">;
+Hexagon_v64i32_v64i32v64i32i32i32_Intrinsic<"HEXAGON_V6_vrsadubi_acc_128B", [ImmArg<3>]>;
def int_hexagon_V6_vnavgw :
Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vnavgw">;
@@ -4915,10 +4936,10 @@ def int_hexagon_V6_vsubhsat_128B :
Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubhsat_128B">;
def int_hexagon_V6_vrmpyubi_acc :
-Hexagon_v32i32_v32i32v32i32i32i32_Intrinsic<"HEXAGON_V6_vrmpyubi_acc">;
+Hexagon_v32i32_v32i32v32i32i32i32_Intrinsic<"HEXAGON_V6_vrmpyubi_acc", [ImmArg<3>]>;
def int_hexagon_V6_vrmpyubi_acc_128B :
-Hexagon_v64i32_v64i32v64i32i32i32_Intrinsic<"HEXAGON_V6_vrmpyubi_acc_128B">;
+Hexagon_v64i32_v64i32v64i32i32i32_Intrinsic<"HEXAGON_V6_vrmpyubi_acc_128B", [ImmArg<3>]>;
def int_hexagon_V6_vabsw :
Hexagon_v16i32_v16i32_Intrinsic<"HEXAGON_V6_vabsw">;
@@ -5095,10 +5116,10 @@ def int_hexagon_V6_vmpybv_acc_128B :
Hexagon_v64i32_v64i32v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpybv_acc_128B">;
def int_hexagon_V6_vrsadubi :
-Hexagon_v32i32_v32i32i32i32_Intrinsic<"HEXAGON_V6_vrsadubi">;
+Hexagon_v32i32_v32i32i32i32_Intrinsic<"HEXAGON_V6_vrsadubi", [ImmArg<2>]>;
def int_hexagon_V6_vrsadubi_128B :
-Hexagon_v64i32_v64i32i32i32_Intrinsic<"HEXAGON_V6_vrsadubi_128B">;
+Hexagon_v64i32_v64i32i32i32_Intrinsic<"HEXAGON_V6_vrsadubi_128B", [ImmArg<2>]>;
def int_hexagon_V6_vdmpyhb_dv_acc :
Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vdmpyhb_dv_acc">;
@@ -5377,10 +5398,10 @@ def int_hexagon_V6_vaddbnq_128B :
Hexagon_v32i32_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddbnq_128B">;
def int_hexagon_V6_vlalignbi :
-Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vlalignbi">;
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vlalignbi", [ImmArg<2>]>;
def int_hexagon_V6_vlalignbi_128B :
-Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vlalignbi_128B">;
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vlalignbi_128B", [ImmArg<2>]>;
def int_hexagon_V6_vsatwh :
Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vsatwh">;
@@ -5443,10 +5464,10 @@ def int_hexagon_V6_veqh_and_128B :
Hexagon_v1024i1_v1024i1v32i32v32i32_Intrinsic<"HEXAGON_V6_veqh_and_128B">;
def int_hexagon_V6_valignbi :
-Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_valignbi">;
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_valignbi", [ImmArg<2>]>;
def int_hexagon_V6_valignbi_128B :
-Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_valignbi_128B">;
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_valignbi_128B", [ImmArg<2>]>;
def int_hexagon_V6_vaddwsat :
Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddwsat">;
@@ -5689,10 +5710,10 @@ def int_hexagon_V6_vsubh_128B :
Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubh_128B">;
def int_hexagon_V6_vrmpyubi :
-Hexagon_v32i32_v32i32i32i32_Intrinsic<"HEXAGON_V6_vrmpyubi">;
+Hexagon_v32i32_v32i32i32i32_Intrinsic<"HEXAGON_V6_vrmpyubi", [ImmArg<2>]>;
def int_hexagon_V6_vrmpyubi_128B :
-Hexagon_v64i32_v64i32i32i32_Intrinsic<"HEXAGON_V6_vrmpyubi_128B">;
+Hexagon_v64i32_v64i32i32i32_Intrinsic<"HEXAGON_V6_vrmpyubi_128B", [ImmArg<2>]>;
def int_hexagon_V6_vminw :
Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vminw">;
@@ -5755,10 +5776,10 @@ def int_hexagon_V6_vsubuhw_128B :
Hexagon_v64i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubuhw_128B">;
def int_hexagon_V6_vrmpybusi_acc :
-Hexagon_v32i32_v32i32v32i32i32i32_Intrinsic<"HEXAGON_V6_vrmpybusi_acc">;
+Hexagon_v32i32_v32i32v32i32i32i32_Intrinsic<"HEXAGON_V6_vrmpybusi_acc", [ImmArg<3>]>;
def int_hexagon_V6_vrmpybusi_acc_128B :
-Hexagon_v64i32_v64i32v64i32i32i32_Intrinsic<"HEXAGON_V6_vrmpybusi_acc_128B">;
+Hexagon_v64i32_v64i32v64i32i32i32_Intrinsic<"HEXAGON_V6_vrmpybusi_acc_128B", [ImmArg<3>]>;
def int_hexagon_V6_vasrw :
Hexagon_v16i32_v16i32i32_Intrinsic<"HEXAGON_V6_vasrw">;
@@ -5883,10 +5904,10 @@ def int_hexagon_V6_vlsrb_128B :
Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_vlsrb_128B">;
def int_hexagon_V6_vlutvwhi :
-Hexagon_v32i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vlutvwhi">;
+Hexagon_v32i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vlutvwhi", [ImmArg<2>]>;
def int_hexagon_V6_vlutvwhi_128B :
-Hexagon_v64i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vlutvwhi_128B">;
+Hexagon_v64i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vlutvwhi_128B", [ImmArg<2>]>;
def int_hexagon_V6_vaddububb_sat :
Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddububb_sat">;
@@ -5907,10 +5928,10 @@ def int_hexagon_V6_ldtp0_128B :
Hexagon_v32i32_i32i32_Intrinsic<"HEXAGON_V6_ldtp0_128B">;
def int_hexagon_V6_vlutvvb_oracci :
-Hexagon_v16i32_v16i32v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vlutvvb_oracci">;
+Hexagon_v16i32_v16i32v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vlutvvb_oracci", [ImmArg<3>]>;
def int_hexagon_V6_vlutvvb_oracci_128B :
-Hexagon_v32i32_v32i32v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vlutvvb_oracci_128B">;
+Hexagon_v32i32_v32i32v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vlutvvb_oracci_128B", [ImmArg<3>]>;
def int_hexagon_V6_vsubuwsat_dv :
Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubuwsat_dv">;
@@ -6045,10 +6066,10 @@ def int_hexagon_V6_vasrwuhrndsat_128B :
Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vasrwuhrndsat_128B">;
def int_hexagon_V6_vlutvvbi :
-Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vlutvvbi">;
+Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vlutvvbi", [ImmArg<2>]>;
def int_hexagon_V6_vlutvvbi_128B :
-Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vlutvvbi_128B">;
+Hexagon_v32i32_v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vlutvvbi_128B", [ImmArg<2>]>;
def int_hexagon_V6_vsubuwsat :
Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vsubuwsat">;
@@ -6141,10 +6162,10 @@ def int_hexagon_V6_ldcnp0_128B :
Hexagon_v32i32_i32i32_Intrinsic<"HEXAGON_V6_ldcnp0_128B">;
def int_hexagon_V6_vlutvwh_oracci :
-Hexagon_v32i32_v32i32v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vlutvwh_oracci">;
+Hexagon_v32i32_v32i32v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vlutvwh_oracci", [ImmArg<3>]>;
def int_hexagon_V6_vlutvwh_oracci_128B :
-Hexagon_v64i32_v64i32v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vlutvwh_oracci_128B">;
+Hexagon_v64i32_v64i32v32i32v32i32i32_Intrinsic<"HEXAGON_V6_vlutvwh_oracci_128B", [ImmArg<3>]>;
def int_hexagon_V6_vsubbsat :
Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vsubbsat">;
diff --git a/include/llvm/IR/IntrinsicsMips.td b/include/llvm/IR/IntrinsicsMips.td
index 421a79be4ebc..6393a9ca35d5 100644
--- a/include/llvm/IR/IntrinsicsMips.td
+++ b/include/llvm/IR/IntrinsicsMips.td
@@ -1,9 +1,8 @@
//===- IntrinsicsMips.td - Defines Mips intrinsics ---------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -235,9 +234,9 @@ def int_mips_extpdp: GCCBuiltin<"__builtin_mips_extpdp">,
// Misc
def int_mips_wrdsp: GCCBuiltin<"__builtin_mips_wrdsp">,
- Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], []>;
+ Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], [ImmArg<1>]>;
def int_mips_rddsp: GCCBuiltin<"__builtin_mips_rddsp">,
- Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrReadMem]>;
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrReadMem, ImmArg<0>]>;
def int_mips_insv: GCCBuiltin<"__builtin_mips_insv">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrReadMem]>;
@@ -303,10 +302,10 @@ def int_mips_adduh_r_qb: GCCBuiltin<"__builtin_mips_adduh_r_qb">,
def int_mips_append: GCCBuiltin<"__builtin_mips_append">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<2>]>;
def int_mips_balign: GCCBuiltin<"__builtin_mips_balign">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<2>]>;
def int_mips_cmpgdu_eq_qb: GCCBuiltin<"__builtin_mips_cmpgdu_eq_qb">,
Intrinsic<[llvm_i32_ty], [llvm_v4i8_ty, llvm_v4i8_ty], [Commutative]>;
@@ -356,14 +355,14 @@ def int_mips_precr_qb_ph: GCCBuiltin<"__builtin_mips_precr_qb_ph">,
Intrinsic<[llvm_v4i8_ty], [llvm_v2i16_ty, llvm_v2i16_ty], []>;
def int_mips_precr_sra_ph_w: GCCBuiltin<"__builtin_mips_precr_sra_ph_w">,
Intrinsic<[llvm_v2i16_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<2>]>;
def int_mips_precr_sra_r_ph_w: GCCBuiltin<"__builtin_mips_precr_sra_r_ph_w">,
Intrinsic<[llvm_v2i16_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<2>]>;
def int_mips_prepend: GCCBuiltin<"__builtin_mips_prepend">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<2>]>;
def int_mips_shra_qb: GCCBuiltin<"__builtin_mips_shra_qb">,
Intrinsic<[llvm_v4i8_ty], [llvm_v4i8_ty, llvm_i32_ty], [IntrNoMem]>;
@@ -464,22 +463,22 @@ def int_mips_addv_d : GCCBuiltin<"__builtin_msa_addv_d">,
def int_mips_addvi_b : GCCBuiltin<"__builtin_msa_addvi_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty],
- [Commutative, IntrNoMem]>;
+ [Commutative, IntrNoMem, ImmArg<1>]>;
def int_mips_addvi_h : GCCBuiltin<"__builtin_msa_addvi_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty],
- [Commutative, IntrNoMem]>;
+ [Commutative, IntrNoMem, ImmArg<1>]>;
def int_mips_addvi_w : GCCBuiltin<"__builtin_msa_addvi_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty],
- [Commutative, IntrNoMem]>;
+ [Commutative, IntrNoMem, ImmArg<1>]>;
def int_mips_addvi_d : GCCBuiltin<"__builtin_msa_addvi_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty],
- [Commutative, IntrNoMem]>;
+ [Commutative, IntrNoMem, ImmArg<1>]>;
def int_mips_and_v : GCCBuiltin<"__builtin_msa_and_v">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
def int_mips_andi_b : GCCBuiltin<"__builtin_msa_andi_b">,
- Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_asub_s_b : GCCBuiltin<"__builtin_msa_asub_s_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
@@ -561,13 +560,13 @@ def int_mips_bclr_d : GCCBuiltin<"__builtin_msa_bclr_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
def int_mips_bclri_b : GCCBuiltin<"__builtin_msa_bclri_b">,
- Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_bclri_h : GCCBuiltin<"__builtin_msa_bclri_h">,
- Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_bclri_w : GCCBuiltin<"__builtin_msa_bclri_w">,
- Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_bclri_d : GCCBuiltin<"__builtin_msa_bclri_d">,
- Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_binsl_b : GCCBuiltin<"__builtin_msa_binsl_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty],
@@ -584,16 +583,16 @@ def int_mips_binsl_d : GCCBuiltin<"__builtin_msa_binsl_d">,
def int_mips_binsli_b : GCCBuiltin<"__builtin_msa_binsli_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<2>]>;
def int_mips_binsli_h : GCCBuiltin<"__builtin_msa_binsli_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<2>]>;
def int_mips_binsli_w : GCCBuiltin<"__builtin_msa_binsli_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<2>]>;
def int_mips_binsli_d : GCCBuiltin<"__builtin_msa_binsli_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<2>]>;
def int_mips_binsr_b : GCCBuiltin<"__builtin_msa_binsr_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty],
@@ -610,16 +609,16 @@ def int_mips_binsr_d : GCCBuiltin<"__builtin_msa_binsr_d">,
def int_mips_binsri_b : GCCBuiltin<"__builtin_msa_binsri_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<2>]>;
def int_mips_binsri_h : GCCBuiltin<"__builtin_msa_binsri_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<2>]>;
def int_mips_binsri_w : GCCBuiltin<"__builtin_msa_binsri_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<2>]>;
def int_mips_binsri_d : GCCBuiltin<"__builtin_msa_binsri_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<2>]>;
def int_mips_bmnz_v : GCCBuiltin<"__builtin_msa_bmnz_v">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty],
@@ -627,7 +626,7 @@ def int_mips_bmnz_v : GCCBuiltin<"__builtin_msa_bmnz_v">,
def int_mips_bmnzi_b : GCCBuiltin<"__builtin_msa_bmnzi_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<2>]>;
def int_mips_bmz_v : GCCBuiltin<"__builtin_msa_bmz_v">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty],
@@ -635,7 +634,7 @@ def int_mips_bmz_v : GCCBuiltin<"__builtin_msa_bmz_v">,
def int_mips_bmzi_b : GCCBuiltin<"__builtin_msa_bmzi_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<2>]>;
def int_mips_bneg_b : GCCBuiltin<"__builtin_msa_bneg_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
@@ -647,13 +646,13 @@ def int_mips_bneg_d : GCCBuiltin<"__builtin_msa_bneg_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
def int_mips_bnegi_b : GCCBuiltin<"__builtin_msa_bnegi_b">,
- Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_bnegi_h : GCCBuiltin<"__builtin_msa_bnegi_h">,
- Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_bnegi_w : GCCBuiltin<"__builtin_msa_bnegi_w">,
- Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_bnegi_d : GCCBuiltin<"__builtin_msa_bnegi_d">,
- Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_bnz_b : GCCBuiltin<"__builtin_msa_bnz_b">,
Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty], [IntrNoMem]>;
@@ -673,7 +672,7 @@ def int_mips_bsel_v : GCCBuiltin<"__builtin_msa_bsel_v">,
def int_mips_bseli_b : GCCBuiltin<"__builtin_msa_bseli_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<2>]>;
def int_mips_bset_b : GCCBuiltin<"__builtin_msa_bset_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
@@ -685,13 +684,13 @@ def int_mips_bset_d : GCCBuiltin<"__builtin_msa_bset_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
def int_mips_bseti_b : GCCBuiltin<"__builtin_msa_bseti_b">,
- Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_bseti_h : GCCBuiltin<"__builtin_msa_bseti_h">,
- Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_bseti_w : GCCBuiltin<"__builtin_msa_bseti_w">,
- Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_bseti_d : GCCBuiltin<"__builtin_msa_bseti_d">,
- Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_bz_b : GCCBuiltin<"__builtin_msa_bz_b">,
Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty], [IntrNoMem]>;
@@ -715,16 +714,16 @@ def int_mips_ceq_d : GCCBuiltin<"__builtin_msa_ceq_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
def int_mips_ceqi_b : GCCBuiltin<"__builtin_msa_ceqi_b">,
- Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_ceqi_h : GCCBuiltin<"__builtin_msa_ceqi_h">,
- Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_ceqi_w : GCCBuiltin<"__builtin_msa_ceqi_w">,
- Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_ceqi_d : GCCBuiltin<"__builtin_msa_ceqi_d">,
- Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_cfcmsa : GCCBuiltin<"__builtin_msa_cfcmsa">,
- Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>;
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [ImmArg<0>]>;
def int_mips_cle_s_b : GCCBuiltin<"__builtin_msa_cle_s_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
@@ -745,22 +744,22 @@ def int_mips_cle_u_d : GCCBuiltin<"__builtin_msa_cle_u_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
def int_mips_clei_s_b : GCCBuiltin<"__builtin_msa_clei_s_b">,
- Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_clei_s_h : GCCBuiltin<"__builtin_msa_clei_s_h">,
- Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_clei_s_w : GCCBuiltin<"__builtin_msa_clei_s_w">,
- Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_clei_s_d : GCCBuiltin<"__builtin_msa_clei_s_d">,
- Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_clei_u_b : GCCBuiltin<"__builtin_msa_clei_u_b">,
- Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_clei_u_h : GCCBuiltin<"__builtin_msa_clei_u_h">,
- Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_clei_u_w : GCCBuiltin<"__builtin_msa_clei_u_w">,
- Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_clei_u_d : GCCBuiltin<"__builtin_msa_clei_u_d">,
- Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_clt_s_b : GCCBuiltin<"__builtin_msa_clt_s_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
@@ -781,22 +780,22 @@ def int_mips_clt_u_d : GCCBuiltin<"__builtin_msa_clt_u_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
def int_mips_clti_s_b : GCCBuiltin<"__builtin_msa_clti_s_b">,
- Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_clti_s_h : GCCBuiltin<"__builtin_msa_clti_s_h">,
- Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_clti_s_w : GCCBuiltin<"__builtin_msa_clti_s_w">,
- Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_clti_s_d : GCCBuiltin<"__builtin_msa_clti_s_d">,
- Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_clti_u_b : GCCBuiltin<"__builtin_msa_clti_u_b">,
- Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_clti_u_h : GCCBuiltin<"__builtin_msa_clti_u_h">,
- Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_clti_u_w : GCCBuiltin<"__builtin_msa_clti_u_w">,
- Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_clti_u_d : GCCBuiltin<"__builtin_msa_clti_u_d">,
- Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_copy_s_b : GCCBuiltin<"__builtin_msa_copy_s_b">,
Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
@@ -817,7 +816,7 @@ def int_mips_copy_u_d : GCCBuiltin<"__builtin_msa_copy_u_d">,
Intrinsic<[llvm_i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
def int_mips_ctcmsa : GCCBuiltin<"__builtin_msa_ctcmsa">,
- Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], []>;
+ Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], [ImmArg<0>]>;
def int_mips_div_s_b : GCCBuiltin<"__builtin_msa_div_s_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
@@ -1245,41 +1244,41 @@ def int_mips_insert_d : GCCBuiltin<"__builtin_msa_insert_d">,
def int_mips_insve_b : GCCBuiltin<"__builtin_msa_insve_b">,
Intrinsic<[llvm_v16i8_ty],
[llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<1>]>;
def int_mips_insve_h : GCCBuiltin<"__builtin_msa_insve_h">,
Intrinsic<[llvm_v8i16_ty],
[llvm_v8i16_ty, llvm_i32_ty, llvm_v8i16_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<1>]>;
def int_mips_insve_w : GCCBuiltin<"__builtin_msa_insve_w">,
Intrinsic<[llvm_v4i32_ty],
[llvm_v4i32_ty, llvm_i32_ty, llvm_v4i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<1>]>;
def int_mips_insve_d : GCCBuiltin<"__builtin_msa_insve_d">,
Intrinsic<[llvm_v2i64_ty],
[llvm_v2i64_ty, llvm_i32_ty, llvm_v2i64_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<1>]>;
def int_mips_ld_b : GCCBuiltin<"__builtin_msa_ld_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly]>;
+ [IntrReadMem, IntrArgMemOnly, ImmArg<1>]>;
def int_mips_ld_h : GCCBuiltin<"__builtin_msa_ld_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_ptr_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly]>;
+ [IntrReadMem, IntrArgMemOnly, ImmArg<1>]>;
def int_mips_ld_w : GCCBuiltin<"__builtin_msa_ld_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly]>;
+ [IntrReadMem, IntrArgMemOnly, ImmArg<1>]>;
def int_mips_ld_d : GCCBuiltin<"__builtin_msa_ld_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_ptr_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly]>;
+ [IntrReadMem, IntrArgMemOnly, ImmArg<1>]>;
def int_mips_ldi_b : GCCBuiltin<"__builtin_msa_ldi_b">,
- Intrinsic<[llvm_v16i8_ty], [llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v16i8_ty], [llvm_i32_ty], [IntrNoMem, ImmArg<0>]>;
def int_mips_ldi_h : GCCBuiltin<"__builtin_msa_ldi_h">,
- Intrinsic<[llvm_v8i16_ty], [llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v8i16_ty], [llvm_i32_ty], [IntrNoMem, ImmArg<0>]>;
def int_mips_ldi_w : GCCBuiltin<"__builtin_msa_ldi_w">,
- Intrinsic<[llvm_v4i32_ty], [llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v4i32_ty], [llvm_i32_ty], [IntrNoMem, ImmArg<0>]>;
def int_mips_ldi_d : GCCBuiltin<"__builtin_msa_ldi_d">,
- Intrinsic<[llvm_v2i64_ty], [llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v2i64_ty], [llvm_i32_ty], [IntrNoMem, ImmArg<0>]>;
// This instruction is part of the MSA spec but it does not share the
// __builtin_msa prefix because it operates on the GPR registers.
@@ -1342,22 +1341,22 @@ def int_mips_max_u_d : GCCBuiltin<"__builtin_msa_max_u_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
def int_mips_maxi_s_b : GCCBuiltin<"__builtin_msa_maxi_s_b">,
- Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_maxi_s_h : GCCBuiltin<"__builtin_msa_maxi_s_h">,
- Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_maxi_s_w : GCCBuiltin<"__builtin_msa_maxi_s_w">,
- Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_maxi_s_d : GCCBuiltin<"__builtin_msa_maxi_s_d">,
- Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_maxi_u_b : GCCBuiltin<"__builtin_msa_maxi_u_b">,
- Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_maxi_u_h : GCCBuiltin<"__builtin_msa_maxi_u_h">,
- Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_maxi_u_w : GCCBuiltin<"__builtin_msa_maxi_u_w">,
- Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_maxi_u_d : GCCBuiltin<"__builtin_msa_maxi_u_d">,
- Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_min_a_b : GCCBuiltin<"__builtin_msa_min_a_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
@@ -1387,22 +1386,22 @@ def int_mips_min_u_d : GCCBuiltin<"__builtin_msa_min_u_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
def int_mips_mini_s_b : GCCBuiltin<"__builtin_msa_mini_s_b">,
- Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_mini_s_h : GCCBuiltin<"__builtin_msa_mini_s_h">,
- Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_mini_s_w : GCCBuiltin<"__builtin_msa_mini_s_w">,
- Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_mini_s_d : GCCBuiltin<"__builtin_msa_mini_s_d">,
- Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_mini_u_b : GCCBuiltin<"__builtin_msa_mini_u_b">,
- Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_mini_u_h : GCCBuiltin<"__builtin_msa_mini_u_h">,
- Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_mini_u_w : GCCBuiltin<"__builtin_msa_mini_u_w">,
- Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_mini_u_d : GCCBuiltin<"__builtin_msa_mini_u_d">,
- Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_mod_s_b : GCCBuiltin<"__builtin_msa_mod_s_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
@@ -1493,13 +1492,13 @@ def int_mips_nor_v : GCCBuiltin<"__builtin_msa_nor_v">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
def int_mips_nori_b : GCCBuiltin<"__builtin_msa_nori_b">,
- Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_or_v : GCCBuiltin<"__builtin_msa_or_v">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
def int_mips_ori_b : GCCBuiltin<"__builtin_msa_ori_b">,
- Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_pckev_b : GCCBuiltin<"__builtin_msa_pckev_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
@@ -1529,29 +1528,29 @@ def int_mips_pcnt_d : GCCBuiltin<"__builtin_msa_pcnt_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>;
def int_mips_sat_s_b : GCCBuiltin<"__builtin_msa_sat_s_b">,
- Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_sat_s_h : GCCBuiltin<"__builtin_msa_sat_s_h">,
- Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_sat_s_w : GCCBuiltin<"__builtin_msa_sat_s_w">,
- Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_sat_s_d : GCCBuiltin<"__builtin_msa_sat_s_d">,
- Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_sat_u_b : GCCBuiltin<"__builtin_msa_sat_u_b">,
- Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_sat_u_h : GCCBuiltin<"__builtin_msa_sat_u_h">,
- Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_sat_u_w : GCCBuiltin<"__builtin_msa_sat_u_w">,
- Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_sat_u_d : GCCBuiltin<"__builtin_msa_sat_u_d">,
- Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_shf_b : GCCBuiltin<"__builtin_msa_shf_b">,
- Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_shf_h : GCCBuiltin<"__builtin_msa_shf_h">,
- Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_shf_w : GCCBuiltin<"__builtin_msa_shf_w">,
- Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_sld_b : GCCBuiltin<"__builtin_msa_sld_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
@@ -1564,16 +1563,16 @@ def int_mips_sld_d : GCCBuiltin<"__builtin_msa_sld_d">,
def int_mips_sldi_b : GCCBuiltin<"__builtin_msa_sldi_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<2>]>;
def int_mips_sldi_h : GCCBuiltin<"__builtin_msa_sldi_h">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<2>]>;
def int_mips_sldi_w : GCCBuiltin<"__builtin_msa_sldi_w">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<2>]>;
def int_mips_sldi_d : GCCBuiltin<"__builtin_msa_sldi_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<2>]>;
def int_mips_sll_b : GCCBuiltin<"__builtin_msa_sll_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
@@ -1585,13 +1584,13 @@ def int_mips_sll_d : GCCBuiltin<"__builtin_msa_sll_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
def int_mips_slli_b : GCCBuiltin<"__builtin_msa_slli_b">,
- Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_slli_h : GCCBuiltin<"__builtin_msa_slli_h">,
- Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_slli_w : GCCBuiltin<"__builtin_msa_slli_w">,
- Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_slli_d : GCCBuiltin<"__builtin_msa_slli_d">,
- Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_splat_b : GCCBuiltin<"__builtin_msa_splat_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
@@ -1603,13 +1602,13 @@ def int_mips_splat_d : GCCBuiltin<"__builtin_msa_splat_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
def int_mips_splati_b : GCCBuiltin<"__builtin_msa_splati_b">,
- Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_splati_h : GCCBuiltin<"__builtin_msa_splati_h">,
- Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_splati_w : GCCBuiltin<"__builtin_msa_splati_w">,
- Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_splati_d : GCCBuiltin<"__builtin_msa_splati_d">,
- Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_sra_b : GCCBuiltin<"__builtin_msa_sra_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
@@ -1621,13 +1620,13 @@ def int_mips_sra_d : GCCBuiltin<"__builtin_msa_sra_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
def int_mips_srai_b : GCCBuiltin<"__builtin_msa_srai_b">,
- Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_srai_h : GCCBuiltin<"__builtin_msa_srai_h">,
- Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_srai_w : GCCBuiltin<"__builtin_msa_srai_w">,
- Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_srai_d : GCCBuiltin<"__builtin_msa_srai_d">,
- Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_srar_b : GCCBuiltin<"__builtin_msa_srar_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
@@ -1639,13 +1638,13 @@ def int_mips_srar_d : GCCBuiltin<"__builtin_msa_srar_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
def int_mips_srari_b : GCCBuiltin<"__builtin_msa_srari_b">,
- Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_srari_h : GCCBuiltin<"__builtin_msa_srari_h">,
- Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_srari_w : GCCBuiltin<"__builtin_msa_srari_w">,
- Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_srari_d : GCCBuiltin<"__builtin_msa_srari_d">,
- Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_srl_b : GCCBuiltin<"__builtin_msa_srl_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
@@ -1657,13 +1656,13 @@ def int_mips_srl_d : GCCBuiltin<"__builtin_msa_srl_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
def int_mips_srli_b : GCCBuiltin<"__builtin_msa_srli_b">,
- Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_srli_h : GCCBuiltin<"__builtin_msa_srli_h">,
- Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_srli_w : GCCBuiltin<"__builtin_msa_srli_w">,
- Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_srli_d : GCCBuiltin<"__builtin_msa_srli_d">,
- Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_srlr_b : GCCBuiltin<"__builtin_msa_srlr_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
@@ -1675,26 +1674,26 @@ def int_mips_srlr_d : GCCBuiltin<"__builtin_msa_srlr_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
def int_mips_srlri_b : GCCBuiltin<"__builtin_msa_srlri_b">,
- Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_srlri_h : GCCBuiltin<"__builtin_msa_srlri_h">,
- Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_srlri_w : GCCBuiltin<"__builtin_msa_srlri_w">,
- Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_srlri_d : GCCBuiltin<"__builtin_msa_srlri_d">,
- Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_st_b : GCCBuiltin<"__builtin_msa_st_b">,
Intrinsic<[], [llvm_v16i8_ty, llvm_ptr_ty, llvm_i32_ty],
- [IntrArgMemOnly]>;
+ [IntrArgMemOnly, ImmArg<2>]>;
def int_mips_st_h : GCCBuiltin<"__builtin_msa_st_h">,
Intrinsic<[], [llvm_v8i16_ty, llvm_ptr_ty, llvm_i32_ty],
- [IntrArgMemOnly]>;
+ [IntrArgMemOnly, ImmArg<2>]>;
def int_mips_st_w : GCCBuiltin<"__builtin_msa_st_w">,
Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty, llvm_i32_ty],
- [IntrArgMemOnly]>;
+ [IntrArgMemOnly, ImmArg<2>]>;
def int_mips_st_d : GCCBuiltin<"__builtin_msa_st_d">,
Intrinsic<[], [llvm_v2i64_ty, llvm_ptr_ty, llvm_i32_ty],
- [IntrArgMemOnly]>;
+ [IntrArgMemOnly, ImmArg<2>]>;
def int_mips_subs_s_b : GCCBuiltin<"__builtin_msa_subs_s_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
@@ -1742,13 +1741,13 @@ def int_mips_subv_d : GCCBuiltin<"__builtin_msa_subv_d">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
def int_mips_subvi_b : GCCBuiltin<"__builtin_msa_subvi_b">,
- Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_subvi_h : GCCBuiltin<"__builtin_msa_subvi_h">,
- Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_subvi_w : GCCBuiltin<"__builtin_msa_subvi_w">,
- Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_subvi_d : GCCBuiltin<"__builtin_msa_subvi_d">,
- Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_mips_vshf_b : GCCBuiltin<"__builtin_msa_vshf_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty],
@@ -1767,5 +1766,5 @@ def int_mips_xor_v : GCCBuiltin<"__builtin_msa_xor_v">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
def int_mips_xori_b : GCCBuiltin<"__builtin_msa_xori_b">,
- Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
}
diff --git a/include/llvm/IR/IntrinsicsNVVM.td b/include/llvm/IR/IntrinsicsNVVM.td
index 7f694f68969e..dba7dd76c4ff 100644
--- a/include/llvm/IR/IntrinsicsNVVM.td
+++ b/include/llvm/IR/IntrinsicsNVVM.td
@@ -1,9 +1,8 @@
//===- IntrinsicsNVVM.td - Defines NVVM intrinsics ---------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -38,6 +37,245 @@ def llvm_anyi64ptr_ty : LLVMAnyPointerType<llvm_i64_ty>; // (space)i64*
// MISC
//
+// Helper class for construction of n-element list<LLVMtype> [t,t,...,t]
+class RepLLVMType<int N, LLVMType T> {
+ list<LLVMType> ret = !if(N, !listconcat(RepLLVMType<!add(N,-1), T>.ret, [T]), []);
+}
+
+// Helper class that represents a 'fragment' of an NVPTX *MMA instruction.
+// Geom: m<M>n<N>k<K>. E.g. m8n32k16
+// Frag: [abcd]
+// PtxEltType: PTX type for the element.
+class WMMA_REGS<string Geom, string Frag, string PtxEltType> {
+ string geom = Geom;
+ string frag = Frag;
+ string ptx_elt_type = PtxEltType;
+ string gft = Geom#":"#Frag#":"#ptx_elt_type;
+ string ft = frag#":"#ptx_elt_type;
+ list<LLVMType> regs = !cond(
+ // fp16 -> fp16/fp32 @ m16n16k16/m8n32k16/m32n8k16
+ // All currently supported geometries use the same fragment format,
+ // so we only need to consider {fragment, type}.
+ !eq(ft,"a:f16") : RepLLVMType<8, llvm_v2f16_ty>.ret,
+ !eq(ft,"b:f16") : RepLLVMType<8, llvm_v2f16_ty>.ret,
+ !eq(ft,"c:f16") : RepLLVMType<4, llvm_v2f16_ty>.ret,
+ !eq(ft,"d:f16") : RepLLVMType<4, llvm_v2f16_ty>.ret,
+ !eq(ft,"c:f32") : RepLLVMType<8, llvm_float_ty>.ret,
+ !eq(ft,"d:f32") : RepLLVMType<8, llvm_float_ty>.ret,
+
+ // u8/s8 -> s32 @ m16n16k16/m8n32k16/m32n8k16
+ !eq(gft,"m16n16k16:a:u8") : RepLLVMType<2, llvm_i32_ty>.ret,
+ !eq(gft,"m16n16k16:a:s8") : RepLLVMType<2, llvm_i32_ty>.ret,
+ !eq(gft,"m16n16k16:b:u8") : RepLLVMType<2, llvm_i32_ty>.ret,
+ !eq(gft,"m16n16k16:b:s8") : RepLLVMType<2, llvm_i32_ty>.ret,
+ !eq(gft,"m16n16k16:c:s32") : RepLLVMType<8, llvm_i32_ty>.ret,
+ !eq(gft,"m16n16k16:d:s32") : RepLLVMType<8, llvm_i32_ty>.ret,
+
+ !eq(gft,"m8n32k16:a:u8") : [llvm_i32_ty],
+ !eq(gft,"m8n32k16:a:s8") : [llvm_i32_ty],
+ !eq(gft,"m8n32k16:b:u8") : RepLLVMType<4, llvm_i32_ty>.ret,
+ !eq(gft,"m8n32k16:b:s8") : RepLLVMType<4, llvm_i32_ty>.ret,
+ !eq(gft,"m8n32k16:c:s32") : RepLLVMType<8, llvm_i32_ty>.ret,
+ !eq(gft,"m8n32k16:d:s32") : RepLLVMType<8, llvm_i32_ty>.ret,
+
+ !eq(gft,"m32n8k16:a:u8") : RepLLVMType<4, llvm_i32_ty>.ret,
+ !eq(gft,"m32n8k16:a:s8") : RepLLVMType<4, llvm_i32_ty>.ret,
+ !eq(gft,"m32n8k16:b:u8") : [llvm_i32_ty],
+ !eq(gft,"m32n8k16:b:s8") : [llvm_i32_ty],
+ !eq(gft,"m32n8k16:c:s32") : RepLLVMType<8, llvm_i32_ty>.ret,
+ !eq(gft,"m32n8k16:d:s32") : RepLLVMType<8, llvm_i32_ty>.ret,
+
+ // u4/s4/b1 -> s32 @ m8n8k32 (u4/s4), m8n8k128(b1)
+ !eq(gft,"m8n8k128:a:b1") : [llvm_i32_ty],
+ !eq(gft,"m8n8k32:a:u4") : [llvm_i32_ty],
+ !eq(gft,"m8n8k32:a:s4") : [llvm_i32_ty],
+ !eq(gft,"m8n8k128:b:b1") : [llvm_i32_ty],
+ !eq(gft,"m8n8k32:b:u4") : [llvm_i32_ty],
+ !eq(gft,"m8n8k32:b:s4") : [llvm_i32_ty],
+ !eq(gft,"m8n8k128:c:s32") : RepLLVMType<2, llvm_i32_ty>.ret,
+ !eq(gft,"m8n8k128:d:s32") : RepLLVMType<2, llvm_i32_ty>.ret,
+ !eq(gft,"m8n8k32:c:s32") : RepLLVMType<2, llvm_i32_ty>.ret,
+ !eq(gft,"m8n8k32:d:s32") : RepLLVMType<2, llvm_i32_ty>.ret,
+ );
+}
+
+class WMMA_NAME_LDST<string Op, WMMA_REGS Frag, string Layout, int WithStride> {
+ string intr = "llvm.nvvm.wmma."
+ # Frag.geom
+ # "." # Op
+ # "." # Frag.frag
+ # "." # Layout
+ # !if(WithStride, ".stride", "")
+ # "." # Frag.ptx_elt_type
+ ;
+ // TODO(tra): record name should ideally use the same field order as the intrinsic.
+ // E.g. string record = !subst("llvm", "int",
+ // !subst(".", "_", llvm));
+ string record = "int_nvvm_wmma_"
+ # Frag.geom
+ # "_" # Op
+ # "_" # Frag.frag
+ # "_" # Frag.ptx_elt_type
+ # "_" # Layout
+ # !if(WithStride, "_stride", "");
+}
+
+class MMA_SIGNATURE<WMMA_REGS A, WMMA_REGS B, WMMA_REGS C, WMMA_REGS D> {
+ list<WMMA_REGS> id_frags = !cond(
+ // int and sub-int ops are identified by input type.
+ !eq(A.ptx_elt_type, "s8") : [A],
+ !eq(A.ptx_elt_type, "u8") : [A],
+ !eq(A.ptx_elt_type, "s4") : [A],
+ !eq(A.ptx_elt_type, "u4") : [A],
+ !eq(A.ptx_elt_type, "b1") : [A],
+ // the rest are FP ops identified by accumulator & result type.
+ 1: [D, C]
+ );
+ string ret = !foldl("", id_frags, a, b, !strconcat(a, ".", b.ptx_elt_type));
+}
+
+class WMMA_NAME_MMA<string ALayout, string BLayout, int Satfinite,
+ WMMA_REGS A, WMMA_REGS B, WMMA_REGS C, WMMA_REGS D> {
+ string signature = MMA_SIGNATURE<A, B, C, D>.ret;
+ string llvm = "llvm.nvvm.wmma."
+ # A.geom
+ # ".mma"
+ # "." # ALayout
+ # "." # BLayout
+ # signature
+ # !if(Satfinite, ".satfinite", "");
+
+ string record = !subst(".", "_",
+ !subst("llvm.", "int_", llvm));
+}
+
+// Generates list of 4-tuples of WMMA_REGS representing a valid MMA op.
+// Geom: list of supported geometries.
+// TypeN: PTX type of the corresponding fragment's element.
+// TypeB and TypeD may be empty if it must match that of TypeA or TypeC.
+class MMA_OPS<list<string> Geom, list<string> TypeA, list<string> TypeB,
+ list<string> TypeC, list<string> TypeD> {
+ list<list<WMMA_REGS>> ret =
+ !foldl([]<list<WMMA_REGS>>, Geom, t1, geom, !listconcat(t1,
+ !foldl([]<list<WMMA_REGS>>, TypeA, t2, type_a, !listconcat(t2,
+ !foldl([]<list<WMMA_REGS>>, !if(!size(TypeB), TypeB, [type_a]), t3, type_b, !listconcat(t3,
+ !foldl([]<list<WMMA_REGS>>, TypeC, t4, type_c, !listconcat(t4,
+ !foldl([]<list<WMMA_REGS>>, !if(!size(TypeC), TypeC, [type_c]), t5, type_d, !listconcat(t5,
+ [[WMMA_REGS<geom, "a", type_a>,
+ WMMA_REGS<geom, "b", type_b>,
+ WMMA_REGS<geom, "c", type_c>,
+ WMMA_REGS<geom, "d", type_d>]]))))))))));
+ // Debugging aid for readable representation of the list above.
+ list<list<string>> ops = !foreach(x, ret, [x[0].gft, x[1].gft, x[2].gft, x[3].gft]);
+}
+
+class MMA_LDST_OPS<list<string> Geom, list<string> Frags, list<string> Types> {
+ list<WMMA_REGS> ret =
+ !foldl([]<WMMA_REGS>, Geom, t1, geom, !listconcat(t1,
+ !foldl([]<WMMA_REGS>, Frags, t2, frag, !listconcat(t2,
+ !foldl([]<WMMA_REGS>, Types, t3, type, !listconcat(t3,
+ [WMMA_REGS<geom, frag, type>]))))));
+ // Debugging aid for readable representation of the list above.
+ list<string> ops = !foreach(x, ret, x.gft);
+}
+
+
+
+// Creates list of valid combinations of fragments. This is the master list that
+// drives generation of corresponding intrinsics and instructions.
+class NVVM_MMA_OPS<int _ = 0> {
+ list<list<WMMA_REGS>> fp_mma_ops = MMA_OPS<
+ ["m16n16k16", "m32n8k16", "m8n32k16"],
+ ["f16"], [], ["f16", "f32"], ["f16", "f32"]>.ret;
+ list<list<WMMA_REGS>> int_mma_ops = MMA_OPS<
+ ["m16n16k16", "m32n8k16", "m8n32k16"],
+ ["s8", "u8"], [], ["s32"], []>.ret;
+ list<list<WMMA_REGS>> subint_mma_ops = MMA_OPS<
+ ["m8n8k32"],
+ ["s4", "u4"], [], ["s32"], []>.ret;
+ list<list<WMMA_REGS>> bit_mma_ops = MMA_OPS<
+ ["m8n8k128"],
+ ["b1"], [], ["s32"], []>.ret;
+ list<list<WMMA_REGS>> all_mma_ops = !listconcat(fp_mma_ops, int_mma_ops,
+ subint_mma_ops, bit_mma_ops);
+
+ list<WMMA_REGS> ldst_ab_ops = MMA_LDST_OPS<
+ ["m16n16k16", "m32n8k16", "m8n32k16"],
+ ["a", "b"], ["f16", "u8", "s8"]>.ret;
+ list<WMMA_REGS> ldst_cd_ops = MMA_LDST_OPS<
+ ["m16n16k16", "m32n8k16", "m8n32k16"],
+ ["c", "d"], ["f16", "f32", "s32"]>.ret;
+ list<WMMA_REGS> ldst_subint_ab_ops = MMA_LDST_OPS<
+ ["m8n8k32"], ["a", "b"], ["s4","u4"]>.ret;
+ list<WMMA_REGS> ldst_bit_ab_ops = MMA_LDST_OPS<
+ ["m8n8k128"], ["a", "b"], ["b1"]>.ret;
+ list<WMMA_REGS> ldst_subint_cd_ops = MMA_LDST_OPS<
+ ["m8n8k32", "m8n8k128"], ["c", "d"], ["s32"]>.ret;
+ list<WMMA_REGS> all_ldst_ops = !listconcat(ldst_ab_ops, ldst_cd_ops,
+ ldst_subint_ab_ops,
+ ldst_bit_ab_ops,
+ ldst_subint_cd_ops);
+ // Separate A/B/C fragments (loads) from D (stores).
+ list<WMMA_REGS> all_ld_ops = !foldl([]<WMMA_REGS>, all_ldst_ops, a, b,
+ !listconcat(a, !if(!eq(b.frag,"d"), [],[b])));
+ list<WMMA_REGS> all_st_ops = !foldl([]<WMMA_REGS>, all_ldst_ops, a, b,
+ !listconcat(a, !if(!eq(b.frag,"d"), [b],[])));
+}
+
+def NVVM_MMA_OPS : NVVM_MMA_OPS;
+
+// Returns [1] if this combination of layout/satf is supported, [] otherwise.
+// MMA ops must provide all parameters. Loads and stores -- only frags and layout_a.
+// The class is used to prevent generation of records for the unsupported variants.
+// E.g.
+// foreach _ = NVVM_MMA_SUPPORTED<...>.ret in =
+// def : FOO<>; // The record will only be defined for supported ops.
+//
+class NVVM_MMA_SUPPORTED<list<WMMA_REGS> frags, string layout_a, string layout_b="-", int satf=-1> {
+ // MMA ops check both layouts.
+ string mma = frags[0].ptx_elt_type
+ # ":" # layout_a
+ # ":" # layout_b;
+ // Load ops only need type/fragment/layout.
+ string ld = frags[0].ptx_elt_type
+ # ":" # frags[0].frag
+ # ":" # layout_a
+ ;
+ string ldf = frags[0].ptx_elt_type
+ # ":" # frags[0].frag
+ ;
+ string t = frags[0].ptx_elt_type;
+ list<int> ret = !cond(
+ // Sub-int MMA only supports fixed A/B layout.
+ // b1 does not support .satf.
+ !eq(mma#":"#satf, "b1:row:col:0") : [1],
+ !eq(mma, "s4:row:col") : [1],
+ !eq(mma, "u4:row:col") : [1],
+ !eq(mma, "s4:row:col") : [1],
+ !eq(mma, "u4:row:col") : [1],
+ // Sub-int load/stores have fixed layout for A and B.
+ !and(!eq(layout_b, "-"), // It's a Load or Store op
+ !or(!eq(ld, "b1:a:row"),
+ !eq(ld, "b1:b:col"),
+ !eq(ldf, "b1:c"),
+ !eq(ldf, "b1:d"),
+ !eq(ld, "s4:a:row"),
+ !eq(ld, "s4:b:col"),
+ !eq(ldf, "s4:c"),
+ !eq(ldf, "s4:d"),
+ !eq(ld, "u4:a:row"),
+ !eq(ld, "u4:b:col"),
+ !eq(ldf, "u4:c"),
+ !eq(ldf, "u4:d"))) : [1],
+ // All other sub-int ops are not supported.
+ !eq(t, "b1") : [],
+ !eq(t, "s4") : [],
+ !eq(t, "u4") : [],
+ // All other (non sub-int) are OK.
+ 1: [1]
+ );
+}
+
let TargetPrefix = "nvvm" in {
def int_nvvm_prmt : GCCBuiltin<"__nvvm_prmt">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
@@ -689,14 +927,6 @@ let TargetPrefix = "nvvm" in {
[IntrNoMem]>;
// Atomics not available as llvm intrinsics.
- def int_nvvm_atomic_load_add_f32 : Intrinsic<[llvm_float_ty],
- [LLVMAnyPointerType<llvm_float_ty>, llvm_float_ty],
- [IntrArgMemOnly, NoCapture<0>]>;
- // Atomic add of f64 requires sm_60.
- def int_nvvm_atomic_load_add_f64 : Intrinsic<[llvm_double_ty],
- [LLVMAnyPointerType<llvm_double_ty>, llvm_double_ty],
- [IntrArgMemOnly, NoCapture<0>]>;
-
def int_nvvm_atomic_load_inc_32 : Intrinsic<[llvm_i32_ty],
[LLVMAnyPointerType<llvm_i32_ty>, llvm_i32_ty],
[IntrArgMemOnly, NoCapture<0>]>;
@@ -3674,11 +3904,19 @@ multiclass PTXReadSRegIntrinsic_v4i32<string regname> {
class PTXReadSRegIntrinsic_r32<string name>
: Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
GCCBuiltin<"__nvvm_read_ptx_sreg_" # name>;
-
class PTXReadSRegIntrinsic_r64<string name>
: Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>,
GCCBuiltin<"__nvvm_read_ptx_sreg_" # name>;
+// Intrinsics to read registers with non-constant values. E.g. the values that
+// do change over the kernel lifetime. Such reads should not be CSE'd.
+class PTXReadNCSRegIntrinsic_r32<string name>
+ : Intrinsic<[llvm_i32_ty], [], [IntrInaccessibleMemOnly]>,
+ GCCBuiltin<"__nvvm_read_ptx_sreg_" # name>;
+class PTXReadNCSRegIntrinsic_r64<string name>
+ : Intrinsic<[llvm_i64_ty], [], [IntrInaccessibleMemOnly]>,
+ GCCBuiltin<"__nvvm_read_ptx_sreg_" # name>;
+
defm int_nvvm_read_ptx_sreg_tid : PTXReadSRegIntrinsic_v4i32<"tid">;
defm int_nvvm_read_ptx_sreg_ntid : PTXReadSRegIntrinsic_v4i32<"ntid">;
@@ -3704,13 +3942,13 @@ def int_nvvm_read_ptx_sreg_lanemask_ge :
def int_nvvm_read_ptx_sreg_lanemask_gt :
PTXReadSRegIntrinsic_r32<"lanemask_gt">;
-def int_nvvm_read_ptx_sreg_clock : PTXReadSRegIntrinsic_r32<"clock">;
-def int_nvvm_read_ptx_sreg_clock64 : PTXReadSRegIntrinsic_r64<"clock64">;
+def int_nvvm_read_ptx_sreg_clock : PTXReadNCSRegIntrinsic_r32<"clock">;
+def int_nvvm_read_ptx_sreg_clock64 : PTXReadNCSRegIntrinsic_r64<"clock64">;
-def int_nvvm_read_ptx_sreg_pm0 : PTXReadSRegIntrinsic_r32<"pm0">;
-def int_nvvm_read_ptx_sreg_pm1 : PTXReadSRegIntrinsic_r32<"pm1">;
-def int_nvvm_read_ptx_sreg_pm2 : PTXReadSRegIntrinsic_r32<"pm2">;
-def int_nvvm_read_ptx_sreg_pm3 : PTXReadSRegIntrinsic_r32<"pm3">;
+def int_nvvm_read_ptx_sreg_pm0 : PTXReadNCSRegIntrinsic_r32<"pm0">;
+def int_nvvm_read_ptx_sreg_pm1 : PTXReadNCSRegIntrinsic_r32<"pm1">;
+def int_nvvm_read_ptx_sreg_pm2 : PTXReadNCSRegIntrinsic_r32<"pm2">;
+def int_nvvm_read_ptx_sreg_pm3 : PTXReadNCSRegIntrinsic_r32<"pm3">;
def int_nvvm_read_ptx_sreg_warpsize : PTXReadSRegIntrinsic_r32<"warpsize">;
@@ -3882,166 +4120,59 @@ def int_nvvm_match_all_sync_i64p :
//
// WMMA instructions
//
-
// WMMA.LOAD
-class NVVM_WMMA_LD_GALSTS<string Geometry, string Abc, string Layout,
- string Type, LLVMType regty, int WithStride>
- : Intrinsic<!if(!eq(Abc#Type,"cf16"),
- [regty, regty, regty, regty],
- [regty, regty, regty, regty,
- regty, regty, regty, regty]),
+class NVVM_WMMA_LD<WMMA_REGS Frag, string Layout, int WithStride>
+ : Intrinsic<Frag.regs,
!if(WithStride, [llvm_anyptr_ty, llvm_i32_ty], [llvm_anyptr_ty]),
[IntrReadMem, IntrArgMemOnly, ReadOnly<0>, NoCapture<0>],
- "llvm.nvvm.wmma."
- # Geometry
- # ".load"
- # "." # Abc
- # "." # Layout
- # !if(WithStride, ".stride", "")
- # "." # Type>;
-
-multiclass NVVM_WMMA_LD_GALT<string Geometry, string Abc, string Layout,
- string Type, LLVMType regty> {
- def _stride: NVVM_WMMA_LD_GALSTS<Geometry, Abc, Layout, Type, regty, 1>;
- def NAME : NVVM_WMMA_LD_GALSTS<Geometry, Abc, Layout, Type, regty, 0>;
-}
-
-multiclass NVVM_WMMA_LD_GAT<string Geometry, string Abc,
- string Type, LLVMType regty> {
- defm _row: NVVM_WMMA_LD_GALT<Geometry, Abc, "row", Type, regty>;
- defm _col: NVVM_WMMA_LD_GALT<Geometry, Abc, "col", Type, regty>;
-}
-
-multiclass NVVM_WMMA_LD_G<string Geometry> {
- defm _a_f16: NVVM_WMMA_LD_GAT<Geometry, "a", "f16", llvm_v2f16_ty>;
- defm _b_f16: NVVM_WMMA_LD_GAT<Geometry, "b", "f16", llvm_v2f16_ty>;
- defm _c_f16: NVVM_WMMA_LD_GAT<Geometry, "c", "f16", llvm_v2f16_ty>;
- defm _c_f32: NVVM_WMMA_LD_GAT<Geometry, "c", "f32", llvm_float_ty>;
-}
-
-multiclass NVVM_WMMA_LD {
- defm _m32n8k16_load: NVVM_WMMA_LD_G<"m32n8k16">;
- defm _m16n16k16_load: NVVM_WMMA_LD_G<"m16n16k16">;
- defm _m8n32k16_load: NVVM_WMMA_LD_G<"m8n32k16">;
-}
-
-defm int_nvvm_wmma: NVVM_WMMA_LD;
+ WMMA_NAME_LDST<"load", Frag, Layout, WithStride>.intr>;
// WMMA.STORE.D
-class NVVM_WMMA_STD_GLSTS<string Geometry, string Layout,
- string Type, LLVMType regty, int WithStride,
- // This is only used to create a typed empty array we
- // need to pass to !if below.
- list<LLVMType>Empty=[]>
+class NVVM_WMMA_ST<WMMA_REGS Frag, string Layout, int WithStride>
: Intrinsic<[],
!listconcat(
[llvm_anyptr_ty],
- !if(!eq(Type,"f16"),
- [regty, regty, regty, regty],
- [regty, regty, regty, regty,
- regty, regty, regty, regty]),
- !if(WithStride, [llvm_i32_ty], Empty)),
+ Frag.regs,
+ !if(WithStride, [llvm_i32_ty], [])),
[IntrWriteMem, IntrArgMemOnly, WriteOnly<0>, NoCapture<0>],
- "llvm.nvvm.wmma."
- # Geometry
- # ".store.d"
- # "." # Layout
- # !if(WithStride, ".stride", "")
- # "." # Type>;
-
-multiclass NVVM_WMMA_STD_GLT<string Geometry, string Layout,
- string Type, LLVMType regty> {
- def _stride: NVVM_WMMA_STD_GLSTS<Geometry, Layout, Type, regty, 1>;
- def NAME: NVVM_WMMA_STD_GLSTS<Geometry, Layout, Type, regty, 0>;
-}
-
-multiclass NVVM_WMMA_STD_GT<string Geometry, string Type, LLVMType regty> {
- defm _row: NVVM_WMMA_STD_GLT<Geometry, "row", Type, regty>;
- defm _col: NVVM_WMMA_STD_GLT<Geometry, "col", Type, regty>;
-}
-multiclass NVVM_WMMA_STD_G<string Geometry> {
- defm _d_f16: NVVM_WMMA_STD_GT<Geometry, "f16", llvm_v2f16_ty>;
- defm _d_f32: NVVM_WMMA_STD_GT<Geometry, "f32", llvm_float_ty>;
-}
-
-multiclass NVVM_WMMA_STD {
- defm _m32n8k16_store: NVVM_WMMA_STD_G<"m32n8k16">;
- defm _m16n16k16_store: NVVM_WMMA_STD_G<"m16n16k16">;
- defm _m8n32k16_store: NVVM_WMMA_STD_G<"m8n32k16">;
+ WMMA_NAME_LDST<"store", Frag, Layout, WithStride>.intr>;
+
+// Create all load/store variants
+foreach layout = ["row", "col"] in {
+ foreach stride = [0, 1] in {
+ foreach frag = NVVM_MMA_OPS.all_ld_ops in
+ foreach _ = NVVM_MMA_SUPPORTED<[frag], layout>.ret in
+ def WMMA_NAME_LDST<"load", frag, layout, stride>.record
+ : NVVM_WMMA_LD<frag, layout, stride>;
+ foreach frag = NVVM_MMA_OPS.all_st_ops in
+ foreach _ = NVVM_MMA_SUPPORTED<[frag], layout>.ret in
+ def WMMA_NAME_LDST<"store", frag, layout, stride>.record
+ : NVVM_WMMA_ST<frag, layout, stride>;
+ }
}
-defm int_nvvm_wmma: NVVM_WMMA_STD;
-
// WMMA.MMA
-class NVVM_WMMA_MMA_GABDCS<string Geometry,
- string ALayout, string BLayout,
- string DType, LLVMType d_regty,
- string CType, LLVMType c_regty,
- string Satfinite = "">
- : Intrinsic<!if(!eq(DType,"f16"),
- [d_regty, d_regty, d_regty, d_regty],
- [d_regty, d_regty, d_regty, d_regty,
- d_regty, d_regty, d_regty, d_regty]),
- !listconcat(
- [// A
- llvm_v2f16_ty, llvm_v2f16_ty, llvm_v2f16_ty, llvm_v2f16_ty,
- llvm_v2f16_ty, llvm_v2f16_ty, llvm_v2f16_ty, llvm_v2f16_ty,
- // B
- llvm_v2f16_ty, llvm_v2f16_ty, llvm_v2f16_ty, llvm_v2f16_ty,
- llvm_v2f16_ty, llvm_v2f16_ty, llvm_v2f16_ty, llvm_v2f16_ty],
- !if(!eq(CType,"f16"),
- [c_regty, c_regty, c_regty, c_regty],
- [c_regty, c_regty, c_regty, c_regty,
- c_regty, c_regty, c_regty, c_regty])),
+class NVVM_WMMA_MMA<string ALayout, string BLayout, int Satfinite,
+ WMMA_REGS A, WMMA_REGS B,
+ WMMA_REGS C, WMMA_REGS D>
+ : Intrinsic<D.regs,
+ !listconcat(A.regs, B.regs, C.regs),
[IntrNoMem],
- "llvm.nvvm.wmma."
- # Geometry
- # ".mma"
- # "." # ALayout
- # "." # BLayout
- # "." # DType
- # "." # CType
- # Satfinite> {
-}
-
-multiclass NVVM_WMMA_MMA_GABDC<string Geometry, string ALayout, string BLayout,
- string DType, LLVMType d_regty,
- string CType, LLVMType c_regty> {
- def NAME : NVVM_WMMA_MMA_GABDCS<Geometry, ALayout, BLayout,
- DType, d_regty, CType, c_regty>;
- def _satfinite: NVVM_WMMA_MMA_GABDCS<Geometry, ALayout, BLayout,
- DType, d_regty, CType, c_regty,".satfinite">;
-}
-
-multiclass NVVM_WMMA_MMA_GABD<string Geometry, string ALayout, string BLayout,
- string DType, LLVMType d_regty> {
- defm _f16: NVVM_WMMA_MMA_GABDC<Geometry, ALayout, BLayout, DType, d_regty,
- "f16", llvm_v2f16_ty>;
- defm _f32: NVVM_WMMA_MMA_GABDC<Geometry, ALayout, BLayout, DType, d_regty,
- "f32", llvm_float_ty>;
-}
-
-multiclass NVVM_WMMA_MMA_GAB<string Geometry, string ALayout, string BLayout> {
- defm _f16: NVVM_WMMA_MMA_GABD<Geometry, ALayout, BLayout, "f16", llvm_v2f16_ty>;
- defm _f32: NVVM_WMMA_MMA_GABD<Geometry, ALayout, BLayout, "f32", llvm_float_ty>;
-}
-
-multiclass NVVM_WMMA_MMA_GA<string Geometry, string ALayout> {
- defm _col: NVVM_WMMA_MMA_GAB<Geometry, ALayout, "col">;
- defm _row: NVVM_WMMA_MMA_GAB<Geometry, ALayout, "row">;
-}
-
-multiclass NVVM_WMMA_MMA_G<string Geometry> {
- defm _col: NVVM_WMMA_MMA_GA<Geometry, "col">;
- defm _row: NVVM_WMMA_MMA_GA<Geometry, "row">;
-}
-
-multiclass NVVM_WMMA_MMA {
- defm _m32n8k16_mma : NVVM_WMMA_MMA_G<"m32n8k16">;
- defm _m16n16k16_mma : NVVM_WMMA_MMA_G<"m16n16k16">;
- defm _m8n32k16_mma : NVVM_WMMA_MMA_G<"m8n32k16">;
-}
-
-defm int_nvvm_wmma : NVVM_WMMA_MMA;
+ WMMA_NAME_MMA<ALayout, BLayout, Satfinite, A, B, C, D>.llvm>;
+
+foreach layout_a = ["row", "col"] in {
+ foreach layout_b = ["row", "col"] in {
+ foreach satf = [0, 1] in {
+ foreach op = NVVM_MMA_OPS.all_mma_ops in {
+ foreach _ = NVVM_MMA_SUPPORTED<op, layout_a, layout_b, satf>.ret in {
+ def WMMA_NAME_MMA<layout_a, layout_b, satf,
+ op[0], op[1], op[2], op[3]>.record
+ : NVVM_WMMA_MMA<layout_a, layout_b, satf,
+ op[0], op[1], op[2], op[3]>;
+ }
+ }
+ } // satf
+ } // layout_b
+} // layout_a
} // let TargetPrefix = "nvvm"
diff --git a/include/llvm/IR/IntrinsicsPowerPC.td b/include/llvm/IR/IntrinsicsPowerPC.td
index 62b2e8f77e7d..f87317445753 100644
--- a/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/include/llvm/IR/IntrinsicsPowerPC.td
@@ -1,9 +1,8 @@
//===- IntrinsicsPowerPC.td - Defines PowerPC intrinsics ---*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -19,7 +18,8 @@
let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.".
// dcba/dcbf/dcbi/dcbst/dcbt/dcbz/dcbzl(PPC970) instructions.
def int_ppc_dcba : Intrinsic<[], [llvm_ptr_ty], []>;
- def int_ppc_dcbf : Intrinsic<[], [llvm_ptr_ty], []>;
+ def int_ppc_dcbf : GCCBuiltin<"__builtin_dcbf">,
+ Intrinsic<[], [llvm_ptr_ty], []>;
def int_ppc_dcbi : Intrinsic<[], [llvm_ptr_ty], []>;
def int_ppc_dcbst : Intrinsic<[], [llvm_ptr_ty], []>;
def int_ppc_dcbt : Intrinsic<[], [llvm_ptr_ty],
@@ -610,16 +610,16 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.".
// FP <-> integer conversion.
def int_ppc_altivec_vcfsx : GCCBuiltin<"__builtin_altivec_vcfsx">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4i32_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<1>]>;
def int_ppc_altivec_vcfux : GCCBuiltin<"__builtin_altivec_vcfux">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4i32_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<1>]>;
def int_ppc_altivec_vctsxs : GCCBuiltin<"__builtin_altivec_vctsxs">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<1>]>;
def int_ppc_altivec_vctuxs : GCCBuiltin<"__builtin_altivec_vctuxs">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<1>]>;
def int_ppc_altivec_vrfim : GCCBuiltin<"__builtin_altivec_vrfim">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
@@ -716,11 +716,11 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.".
def int_ppc_altivec_crypto_vshasigmad :
GCCBuiltin<"__builtin_altivec_crypto_vshasigmad">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
- llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ llvm_i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>, ImmArg<2>]>;
def int_ppc_altivec_crypto_vshasigmaw :
GCCBuiltin<"__builtin_altivec_crypto_vshasigmaw">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
- llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ llvm_i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>, ImmArg<2>]>;
}
def int_ppc_altivec_crypto_vcipher :
PowerPC_Vec_DDD_Intrinsic<"crypto_vcipher">;
@@ -915,10 +915,10 @@ def int_ppc_vsx_xvxsigsp :
[llvm_v4f32_ty], [IntrNoMem]>;
def int_ppc_vsx_xvtstdcdp :
PowerPC_VSX_Intrinsic<"xvtstdcdp", [llvm_v2i64_ty],
- [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem]>;
+ [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_ppc_vsx_xvtstdcsp :
PowerPC_VSX_Intrinsic<"xvtstdcsp", [llvm_v4i32_ty],
- [llvm_v4f32_ty,llvm_i32_ty], [IntrNoMem]>;
+ [llvm_v4f32_ty,llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_ppc_vsx_xvcvhpsp :
PowerPC_VSX_Intrinsic<"xvcvhpsp", [llvm_v4f32_ty],
[llvm_v8i16_ty],[IntrNoMem]>;
@@ -1113,9 +1113,9 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.".
let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.".
def int_ppc_tbegin : GCCBuiltin<"__builtin_tbegin">,
- Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>;
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [ImmArg<0>]>;
def int_ppc_tend : GCCBuiltin<"__builtin_tend">,
- Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>;
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [ImmArg<0>]>;
def int_ppc_tabort : GCCBuiltin<"__builtin_tabort">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>;
@@ -1167,4 +1167,9 @@ def int_ppc_ttest : GCCBuiltin<"__builtin_ttest">,
Intrinsic<[llvm_i64_ty], [], []>;
def int_ppc_cfence : Intrinsic<[], [llvm_anyint_ty], []>;
+
+// PowerPC set FPSCR Intrinsic Definitions.
+def int_ppc_setrnd : GCCBuiltin<"__builtin_setrnd">,
+ Intrinsic<[llvm_double_ty], [llvm_i32_ty], []>;
+
}
diff --git a/include/llvm/IR/IntrinsicsRISCV.td b/include/llvm/IR/IntrinsicsRISCV.td
index 0ac7348b56db..60393189b830 100644
--- a/include/llvm/IR/IntrinsicsRISCV.td
+++ b/include/llvm/IR/IntrinsicsRISCV.td
@@ -1,9 +1,8 @@
//===- IntrinsicsRISCV.td - Defines RISCV intrinsics -------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -19,13 +18,13 @@ let TargetPrefix = "riscv" in {
class MaskedAtomicRMW32Intrinsic
: Intrinsic<[llvm_i32_ty],
[llvm_anyptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrArgMemOnly, NoCapture<0>]>;
+ [IntrArgMemOnly, NoCapture<0>, ImmArg<3>]>;
class MaskedAtomicRMW32WithSextIntrinsic
: Intrinsic<[llvm_i32_ty],
[llvm_anyptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty],
- [IntrArgMemOnly, NoCapture<0>]>;
+ [IntrArgMemOnly, NoCapture<0>, ImmArg<4>]>;
def int_riscv_masked_atomicrmw_xchg_i32 : MaskedAtomicRMW32Intrinsic;
def int_riscv_masked_atomicrmw_add_i32 : MaskedAtomicRMW32Intrinsic;
@@ -39,6 +38,31 @@ def int_riscv_masked_atomicrmw_umin_i32 : MaskedAtomicRMW32Intrinsic;
def int_riscv_masked_cmpxchg_i32
: Intrinsic<[llvm_i32_ty], [llvm_anyptr_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty],
- [IntrArgMemOnly, NoCapture<0>]>;
+ [IntrArgMemOnly, NoCapture<0>, ImmArg<4>]>;
+
+class MaskedAtomicRMW64Intrinsic
+ : Intrinsic<[llvm_i64_ty],
+ [llvm_anyptr_ty, llvm_i64_ty, llvm_i64_ty, llvm_i64_ty],
+ [IntrArgMemOnly, NoCapture<0>, ImmArg<3>]>;
+
+class MaskedAtomicRMW64WithSextIntrinsic
+ : Intrinsic<[llvm_i64_ty],
+ [llvm_anyptr_ty, llvm_i64_ty, llvm_i64_ty, llvm_i64_ty,
+ llvm_i64_ty],
+ [IntrArgMemOnly, NoCapture<0>, ImmArg<4>]>;
+
+def int_riscv_masked_atomicrmw_xchg_i64 : MaskedAtomicRMW64Intrinsic;
+def int_riscv_masked_atomicrmw_add_i64 : MaskedAtomicRMW64Intrinsic;
+def int_riscv_masked_atomicrmw_sub_i64 : MaskedAtomicRMW64Intrinsic;
+def int_riscv_masked_atomicrmw_nand_i64 : MaskedAtomicRMW64Intrinsic;
+def int_riscv_masked_atomicrmw_max_i64 : MaskedAtomicRMW64WithSextIntrinsic;
+def int_riscv_masked_atomicrmw_min_i64 : MaskedAtomicRMW64WithSextIntrinsic;
+def int_riscv_masked_atomicrmw_umax_i64 : MaskedAtomicRMW64Intrinsic;
+def int_riscv_masked_atomicrmw_umin_i64 : MaskedAtomicRMW64Intrinsic;
+
+def int_riscv_masked_cmpxchg_i64
+ : Intrinsic<[llvm_i64_ty], [llvm_anyptr_ty, llvm_i64_ty, llvm_i64_ty,
+ llvm_i64_ty, llvm_i64_ty],
+ [IntrArgMemOnly, NoCapture<0>, ImmArg<4>]>;
} // TargetPrefix = "riscv"
diff --git a/include/llvm/IR/IntrinsicsSystemZ.td b/include/llvm/IR/IntrinsicsSystemZ.td
index caa2ec209a31..40d6ba17eaf1 100644
--- a/include/llvm/IR/IntrinsicsSystemZ.td
+++ b/include/llvm/IR/IntrinsicsSystemZ.td
@@ -1,9 +1,8 @@
//===- IntrinsicsSystemZ.td - Defines SystemZ intrinsics ---*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -39,7 +38,8 @@ class SystemZBinaryConvCC<LLVMType result, LLVMType arg>
: Intrinsic<[result, llvm_i32_ty], [arg, arg], [IntrNoMem]>;
class SystemZBinaryConvIntCC<LLVMType result, LLVMType arg>
- : Intrinsic<[result, llvm_i32_ty], [arg, llvm_i32_ty], [IntrNoMem]>;
+ : Intrinsic<[result, llvm_i32_ty], [arg, llvm_i32_ty],
+ [IntrNoMem, ImmArg<1>]>;
class SystemZBinaryCC<LLVMType type>
: SystemZBinaryConvCC<type, type>;
@@ -48,23 +48,28 @@ class SystemZTernaryConv<string name, LLVMType result, LLVMType arg>
: GCCBuiltin<"__builtin_s390_" ## name>,
Intrinsic<[result], [arg, arg, result], [IntrNoMem]>;
+class SystemZTernaryConvCC<LLVMType result, LLVMType arg>
+ : Intrinsic<[result, llvm_i32_ty], [arg, arg, result], [IntrNoMem]>;
+
class SystemZTernary<string name, LLVMType type>
: SystemZTernaryConv<name, type, type>;
class SystemZTernaryInt<string name, LLVMType type>
: GCCBuiltin<"__builtin_s390_" ## name>,
- Intrinsic<[type], [type, type, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[type], [type, type, llvm_i32_ty], [IntrNoMem, ImmArg<2>]>;
class SystemZTernaryIntCC<LLVMType type>
- : Intrinsic<[type, llvm_i32_ty], [type, type, llvm_i32_ty], [IntrNoMem]>;
+ : Intrinsic<[type, llvm_i32_ty], [type, type, llvm_i32_ty],
+ [IntrNoMem, ImmArg<2>]>;
class SystemZQuaternaryInt<string name, LLVMType type>
: GCCBuiltin<"__builtin_s390_" ## name>,
- Intrinsic<[type], [type, type, type, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[type], [type, type, type, llvm_i32_ty],
+ [IntrNoMem, ImmArg<3>]>;
class SystemZQuaternaryIntCC<LLVMType type>
: Intrinsic<[type, llvm_i32_ty], [type, type, type, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<3>]>;
multiclass SystemZUnaryExtBHF<string name> {
def b : SystemZUnaryConv<name##"b", llvm_v8i16_ty, llvm_v16i8_ty>;
@@ -180,7 +185,8 @@ multiclass SystemZQuaternaryIntBHF<string name> {
def f : SystemZQuaternaryInt<name##"f", llvm_v4i32_ty>;
}
-multiclass SystemZQuaternaryIntBHFG<string name> : SystemZQuaternaryIntBHF<name> {
+multiclass SystemZQuaternaryIntBHFG<string name> :
+ SystemZQuaternaryIntBHF<name> {
def g : SystemZQuaternaryInt<name##"g", llvm_v2i64_ty>;
}
@@ -232,11 +238,11 @@ let TargetPrefix = "s390" in {
let TargetPrefix = "s390" in {
def int_s390_lcbb : GCCBuiltin<"__builtin_s390_lcbb">,
Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<1>]>;
def int_s390_vlbb : GCCBuiltin<"__builtin_s390_vlbb">,
Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly]>;
+ [IntrReadMem, IntrArgMemOnly, ImmArg<1>]>;
def int_s390_vll : GCCBuiltin<"__builtin_s390_vll">,
Intrinsic<[llvm_v16i8_ty], [llvm_i32_ty, llvm_ptr_ty],
@@ -245,7 +251,7 @@ let TargetPrefix = "s390" in {
def int_s390_vpdi : GCCBuiltin<"__builtin_s390_vpdi">,
Intrinsic<[llvm_v2i64_ty],
[llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<2>]>;
def int_s390_vperm : GCCBuiltin<"__builtin_s390_vperm">,
Intrinsic<[llvm_v16i8_ty],
@@ -311,7 +317,7 @@ let TargetPrefix = "s390" in {
def int_s390_vsldb : GCCBuiltin<"__builtin_s390_vsldb">,
Intrinsic<[llvm_v16i8_ty],
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<2>]>;
defm int_s390_vscbi : SystemZBinaryBHFG<"vscbi">;
@@ -370,7 +376,7 @@ let TargetPrefix = "s390" in {
def int_s390_vfidb : Intrinsic<[llvm_v2f64_ty],
[llvm_v2f64_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<1>, ImmArg<2>]>;
// Instructions from the Vector Enhancements Facility 1
def int_s390_vbperm : SystemZBinaryConv<"vbperm", llvm_v2i64_ty,
@@ -379,20 +385,20 @@ let TargetPrefix = "s390" in {
def int_s390_vmslg : GCCBuiltin<"__builtin_s390_vmslg">,
Intrinsic<[llvm_v16i8_ty],
[llvm_v2i64_ty, llvm_v2i64_ty, llvm_v16i8_ty,
- llvm_i32_ty], [IntrNoMem]>;
+ llvm_i32_ty], [IntrNoMem, ImmArg<3>]>;
def int_s390_vfmaxdb : Intrinsic<[llvm_v2f64_ty],
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<2>]>;
def int_s390_vfmindb : Intrinsic<[llvm_v2f64_ty],
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<2>]>;
def int_s390_vfmaxsb : Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<2>]>;
def int_s390_vfminsb : Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<2>]>;
def int_s390_vfcesbs : SystemZBinaryConvCC<llvm_v4i32_ty, llvm_v4f32_ty>;
def int_s390_vfchsbs : SystemZBinaryConvCC<llvm_v4i32_ty, llvm_v4f32_ty>;
@@ -402,7 +408,7 @@ let TargetPrefix = "s390" in {
def int_s390_vfisb : Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<1>, ImmArg<2>]>;
// Instructions from the Vector Packed Decimal Facility
def int_s390_vlrl : GCCBuiltin<"__builtin_s390_vlrl">,
@@ -412,6 +418,24 @@ let TargetPrefix = "s390" in {
def int_s390_vstrl : GCCBuiltin<"__builtin_s390_vstrl">,
Intrinsic<[], [llvm_v16i8_ty, llvm_i32_ty, llvm_ptr_ty],
[IntrArgMemOnly, IntrWriteMem]>;
+
+ // Instructions from the Vector Enhancements Facility 2
+ def int_s390_vsld : GCCBuiltin<"__builtin_s390_vsld">,
+ Intrinsic<[llvm_v16i8_ty],
+ [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<2>]>;
+
+ def int_s390_vsrd : GCCBuiltin<"__builtin_s390_vsrd">,
+ Intrinsic<[llvm_v16i8_ty],
+ [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<2>]>;
+
+ def int_s390_vstrsb : SystemZTernaryConvCC<llvm_v16i8_ty, llvm_v16i8_ty>;
+ def int_s390_vstrsh : SystemZTernaryConvCC<llvm_v16i8_ty, llvm_v8i16_ty>;
+ def int_s390_vstrsf : SystemZTernaryConvCC<llvm_v16i8_ty, llvm_v4i32_ty>;
+ def int_s390_vstrszb : SystemZTernaryConvCC<llvm_v16i8_ty, llvm_v16i8_ty>;
+ def int_s390_vstrszh : SystemZTernaryConvCC<llvm_v16i8_ty, llvm_v8i16_ty>;
+ def int_s390_vstrszf : SystemZTernaryConvCC<llvm_v16i8_ty, llvm_v4i32_ty>;
}
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/IR/IntrinsicsWebAssembly.td b/include/llvm/IR/IntrinsicsWebAssembly.td
index b015650906e0..1b892727547d 100644
--- a/include/llvm/IR/IntrinsicsWebAssembly.td
+++ b/include/llvm/IR/IntrinsicsWebAssembly.td
@@ -1,9 +1,8 @@
//===- IntrinsicsWebAssembly.td - Defines wasm intrinsics --*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -41,8 +40,8 @@ def int_wasm_trunc_saturate_unsigned : Intrinsic<[llvm_anyint_ty],
// throw / rethrow
def int_wasm_throw : Intrinsic<[], [llvm_i32_ty, llvm_ptr_ty],
- [Throws, IntrNoReturn]>;
-def int_wasm_rethrow : Intrinsic<[], [], [Throws, IntrNoReturn]>;
+ [Throws, IntrNoReturn, ImmArg<0>]>;
+def int_wasm_rethrow_in_catch : Intrinsic<[], [], [Throws, IntrNoReturn]>;
// Since wasm does not use landingpad instructions, these instructions return
// exception pointer and selector values until we lower them in WasmEHPrepare.
@@ -50,17 +49,16 @@ def int_wasm_get_exception : Intrinsic<[llvm_ptr_ty], [llvm_token_ty],
[IntrHasSideEffects]>;
def int_wasm_get_ehselector : Intrinsic<[llvm_i32_ty], [llvm_token_ty],
[IntrHasSideEffects]>;
-
-// wasm.catch returns the pointer to the exception object caught by wasm 'catch'
-// instruction.
-def int_wasm_catch : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty],
- [IntrHasSideEffects]>;
+// This is the same as llvm.wasm.get.exception except that it does not take a
+// token operand. This is only for instruction selection purpose.
+def int_wasm_extract_exception : Intrinsic<[llvm_ptr_ty], [],
+ [IntrHasSideEffects]>;
// WebAssembly EH must maintain the landingpads in the order assigned to them
// by WasmEHPrepare pass to generate landingpad table in EHStreamer. This is
// used in order to give them the indices in WasmEHPrepare.
def int_wasm_landingpad_index: Intrinsic<[], [llvm_token_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<1>]>;
// Returns LSDA address of the current function.
def int_wasm_lsda : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>;
@@ -112,4 +110,27 @@ def int_wasm_alltrue :
[llvm_anyvector_ty],
[IntrNoMem, IntrSpeculatable]>;
+//===----------------------------------------------------------------------===//
+// Bulk memory intrinsics
+//===----------------------------------------------------------------------===//
+
+def int_wasm_memory_init :
+ Intrinsic<[],
+ [llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrWriteMem, IntrInaccessibleMemOrArgMemOnly, WriteOnly<2>,
+ IntrHasSideEffects, ImmArg<0>, ImmArg<1>]>;
+def int_wasm_data_drop :
+ Intrinsic<[],
+ [llvm_i32_ty],
+ [IntrNoDuplicate, IntrHasSideEffects, ImmArg<0>]>;
+
+//===----------------------------------------------------------------------===//
+// Thread-local storage intrinsics
+//===----------------------------------------------------------------------===//
+
+def int_wasm_tls_size :
+ Intrinsic<[llvm_anyint_ty],
+ [],
+ [IntrNoMem, IntrSpeculatable]>;
+
} // TargetPrefix = "wasm"
diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td
index 8d8cc8e97678..236d312d7d78 100644
--- a/include/llvm/IR/IntrinsicsX86.td
+++ b/include/llvm/IR/IntrinsicsX86.td
@@ -1,9 +1,8 @@
//===- IntrinsicsX86.td - Defines X86 intrinsics -----------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -14,7 +13,7 @@
//===----------------------------------------------------------------------===//
// Interrupt traps
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_int : Intrinsic<[], [llvm_i8_ty]>;
+ def int_x86_int : Intrinsic<[], [llvm_i8_ty], [ImmArg<0>]>;
}
//===----------------------------------------------------------------------===//
@@ -204,12 +203,12 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse_cmp_ss : GCCBuiltin<"__builtin_ia32_cmpss">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
- llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
+ llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem, ImmArg<2>]>;
// NOTE: This comparison intrinsic is not used by clang as long as the
// distinction in signaling behaviour is not implemented.
def int_x86_sse_cmp_ps :
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
- llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
+ llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem, ImmArg<2>]>;
def int_x86_sse_comieq_ss : GCCBuiltin<"__builtin_ia32_comieq">,
Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
llvm_v4f32_ty], [IntrNoMem]>;
@@ -278,9 +277,17 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
// Control register.
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse_stmxcsr :
- Intrinsic<[], [llvm_ptr_ty], []>;
+ Intrinsic<[], [llvm_ptr_ty],
+ [IntrWriteMem, IntrArgMemOnly,
+ // This prevents reordering with ldmxcsr
+ IntrHasSideEffects]>;
def int_x86_sse_ldmxcsr :
- Intrinsic<[], [llvm_ptr_ty], []>;
+ Intrinsic<[], [llvm_ptr_ty],
+ [IntrReadMem, IntrArgMemOnly, IntrHasSideEffects,
+ // FIXME: LDMXCSR does not actualy write to memory,
+ // but Fast and DAG Isel both use writing to memory
+ // as a proxy for having side effects.
+ IntrWriteMem]>;
}
// Misc.
@@ -312,12 +319,12 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse2_cmp_sd : GCCBuiltin<"__builtin_ia32_cmpsd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
- llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>;
+ llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem, ImmArg<2>]>;
// NOTE: This comparison intrinsic is not used by clang as long as the
// distinction in signaling behaviour is not implemented.
def int_x86_sse2_cmp_pd :
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
- llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>;
+ llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem, ImmArg<2>]>;
def int_x86_sse2_comieq_sd : GCCBuiltin<"__builtin_ia32_comisdeq">,
Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
llvm_v2f64_ty], [IntrNoMem]>;
@@ -367,6 +374,12 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse2_pmadd_wd : GCCBuiltin<"__builtin_ia32_pmaddwd128">,
Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty,
llvm_v8i16_ty], [IntrNoMem, Commutative]>;
+ def int_x86_sse2_pavg_b : GCCBuiltin<"__builtin_ia32_pavgb128">,
+ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
+ llvm_v16i8_ty], [IntrNoMem, Commutative]>;
+ def int_x86_sse2_pavg_w : GCCBuiltin<"__builtin_ia32_pavgw128">,
+ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
+ llvm_v8i16_ty], [IntrNoMem, Commutative]>;
def int_x86_sse2_psad_bw : GCCBuiltin<"__builtin_ia32_psadbw128">,
Intrinsic<[llvm_v2i64_ty], [llvm_v16i8_ty,
llvm_v16i8_ty], [IntrNoMem, Commutative]>;
@@ -399,6 +412,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
llvm_v4i32_ty], [IntrNoMem]>;
+ // Oddly these don't require an immediate due to a gcc compatibility issue.
def int_x86_sse2_pslli_w : GCCBuiltin<"__builtin_ia32_psllwi128">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
llvm_i32_ty], [IntrNoMem]>;
@@ -604,7 +618,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
llvm_v16i8_ty], [IntrNoMem]>;
def int_x86_sse_pshuf_w : GCCBuiltin<"__builtin_ia32_pshufw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_i8_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<1>]>;
}
// Sign ops
@@ -650,16 +664,16 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse41_round_ss : GCCBuiltin<"__builtin_ia32_roundss">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
- llvm_i32_ty], [IntrNoMem]>;
+ llvm_i32_ty], [IntrNoMem, ImmArg<2>]>;
def int_x86_sse41_round_ps : GCCBuiltin<"__builtin_ia32_roundps">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
- llvm_i32_ty], [IntrNoMem]>;
+ llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_x86_sse41_round_sd : GCCBuiltin<"__builtin_ia32_roundsd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
- llvm_i32_ty], [IntrNoMem]>;
+ llvm_i32_ty], [IntrNoMem, ImmArg<2>]>;
def int_x86_sse41_round_pd : GCCBuiltin<"__builtin_ia32_roundpd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
- llvm_i32_ty], [IntrNoMem]>;
+ llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
}
// Vector min element
@@ -722,20 +736,20 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_aesni_aeskeygenassist :
GCCBuiltin<"__builtin_ia32_aeskeygenassist128">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i8_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<1>]>;
}
// PCLMUL instructions
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_pclmulqdq : GCCBuiltin<"__builtin_ia32_pclmulqdq128">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<2>]>;
def int_x86_pclmulqdq_256 : GCCBuiltin<"__builtin_ia32_pclmulqdq256">,
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<2>]>;
def int_x86_pclmulqdq_512 : GCCBuiltin<"__builtin_ia32_pclmulqdq512">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<2>]>;
}
// Vector pack
@@ -749,7 +763,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse41_insertps : GCCBuiltin<"__builtin_ia32_insertps128">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<2>]>;
}
// Vector blend
@@ -769,17 +783,17 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse41_dppd : GCCBuiltin<"__builtin_ia32_dppd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty],
- [IntrNoMem, Commutative]>;
+ [IntrNoMem, Commutative, ImmArg<2>]>;
def int_x86_sse41_dpps : GCCBuiltin<"__builtin_ia32_dpps">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
- [IntrNoMem, Commutative]>;
+ [IntrNoMem, Commutative, ImmArg<2>]>;
}
// Vector sum of absolute differences
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse41_mpsadbw : GCCBuiltin<"__builtin_ia32_mpsadbw128">,
Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty,llvm_i8_ty],
- [IntrNoMem, Commutative]>;
+ [IntrNoMem, Commutative, ImmArg<2>]>;
}
// Test instruction with bitwise comparison.
@@ -820,66 +834,66 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse42_pcmpistrm128 : GCCBuiltin<"__builtin_ia32_pcmpistrm128">,
Intrinsic<[llvm_v16i8_ty],
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<2>]>;
def int_x86_sse42_pcmpistri128 : GCCBuiltin<"__builtin_ia32_pcmpistri128">,
Intrinsic<[llvm_i32_ty],
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<2>]>;
def int_x86_sse42_pcmpistria128 : GCCBuiltin<"__builtin_ia32_pcmpistria128">,
Intrinsic<[llvm_i32_ty],
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<2>]>;
def int_x86_sse42_pcmpistric128 : GCCBuiltin<"__builtin_ia32_pcmpistric128">,
Intrinsic<[llvm_i32_ty],
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<2>]>;
def int_x86_sse42_pcmpistrio128 : GCCBuiltin<"__builtin_ia32_pcmpistrio128">,
Intrinsic<[llvm_i32_ty],
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<2>]>;
def int_x86_sse42_pcmpistris128 : GCCBuiltin<"__builtin_ia32_pcmpistris128">,
Intrinsic<[llvm_i32_ty],
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<2>]>;
def int_x86_sse42_pcmpistriz128 : GCCBuiltin<"__builtin_ia32_pcmpistriz128">,
Intrinsic<[llvm_i32_ty],
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<2>]>;
def int_x86_sse42_pcmpestrm128 : GCCBuiltin<"__builtin_ia32_pcmpestrm128">,
Intrinsic<[llvm_v16i8_ty],
[llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty,
llvm_i8_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<4>]>;
def int_x86_sse42_pcmpestri128 : GCCBuiltin<"__builtin_ia32_pcmpestri128">,
Intrinsic<[llvm_i32_ty],
[llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty,
llvm_i8_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<4>]>;
def int_x86_sse42_pcmpestria128 : GCCBuiltin<"__builtin_ia32_pcmpestria128">,
Intrinsic<[llvm_i32_ty],
[llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty,
llvm_i8_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<4>]>;
def int_x86_sse42_pcmpestric128 : GCCBuiltin<"__builtin_ia32_pcmpestric128">,
Intrinsic<[llvm_i32_ty],
[llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty,
llvm_i8_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<4>]>;
def int_x86_sse42_pcmpestrio128 : GCCBuiltin<"__builtin_ia32_pcmpestrio128">,
Intrinsic<[llvm_i32_ty],
[llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty,
llvm_i8_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<4>]>;
def int_x86_sse42_pcmpestris128 : GCCBuiltin<"__builtin_ia32_pcmpestris128">,
Intrinsic<[llvm_i32_ty],
[llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty,
llvm_i8_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<4>]>;
def int_x86_sse42_pcmpestriz128 : GCCBuiltin<"__builtin_ia32_pcmpestriz128">,
Intrinsic<[llvm_i32_ty],
[llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty,
llvm_i8_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<4>]>;
}
//===----------------------------------------------------------------------===//
@@ -888,13 +902,14 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse4a_extrqi : GCCBuiltin<"__builtin_ia32_extrqi">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i8_ty, llvm_i8_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<1>, ImmArg<2>]>;
def int_x86_sse4a_extrq : GCCBuiltin<"__builtin_ia32_extrq">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v16i8_ty], [IntrNoMem]>;
def int_x86_sse4a_insertqi : GCCBuiltin<"__builtin_ia32_insertqi">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
- llvm_i8_ty, llvm_i8_ty], [IntrNoMem]>;
+ llvm_i8_ty, llvm_i8_ty],
+ [IntrNoMem, ImmArg<2>, ImmArg<3>]>;
def int_x86_sse4a_insertq : GCCBuiltin<"__builtin_ia32_insertq">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
}
@@ -931,10 +946,10 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx_round_pd_256 : GCCBuiltin<"__builtin_ia32_roundpd256">,
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
- llvm_i32_ty], [IntrNoMem]>;
+ llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
def int_x86_avx_round_ps_256 : GCCBuiltin<"__builtin_ia32_roundps256">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
- llvm_i32_ty], [IntrNoMem]>;
+ llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
}
// Horizontal ops
@@ -1086,33 +1101,33 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
GCCBuiltin<"__builtin_ia32_vgf2p8affineinvqb_v16qi">,
Intrinsic<[llvm_v16i8_ty],
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<2>]>;
def int_x86_vgf2p8affineinvqb_256 :
GCCBuiltin<"__builtin_ia32_vgf2p8affineinvqb_v32qi">,
Intrinsic<[llvm_v32i8_ty],
[llvm_v32i8_ty, llvm_v32i8_ty, llvm_i8_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<2>]>;
def int_x86_vgf2p8affineinvqb_512 :
GCCBuiltin<"__builtin_ia32_vgf2p8affineinvqb_v64qi">,
Intrinsic<[llvm_v64i8_ty],
[llvm_v64i8_ty, llvm_v64i8_ty, llvm_i8_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<2>]>;
def int_x86_vgf2p8affineqb_128 :
GCCBuiltin<"__builtin_ia32_vgf2p8affineqb_v16qi">,
Intrinsic<[llvm_v16i8_ty],
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<2>]>;
def int_x86_vgf2p8affineqb_256 :
GCCBuiltin<"__builtin_ia32_vgf2p8affineqb_v32qi">,
Intrinsic<[llvm_v32i8_ty],
[llvm_v32i8_ty, llvm_v32i8_ty, llvm_i8_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<2>]>;
def int_x86_vgf2p8affineqb_512 :
GCCBuiltin<"__builtin_ia32_vgf2p8affineqb_v64qi">,
Intrinsic<[llvm_v64i8_ty],
[llvm_v64i8_ty, llvm_v64i8_ty, llvm_i8_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<2>]>;
def int_x86_vgf2p8mulb_128 :
GCCBuiltin<"__builtin_ia32_vgf2p8mulb_v16qi">,
@@ -1145,17 +1160,18 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx_dp_ps_256 : GCCBuiltin<"__builtin_ia32_dpps256">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
- llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem, Commutative]>;
+ llvm_v8f32_ty, llvm_i8_ty],
+ [IntrNoMem, Commutative, ImmArg<2>]>;
}
// Vector compare
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx_cmp_pd_256 :
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
- llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
+ llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem, ImmArg<2>]>;
def int_x86_avx_cmp_ps_256 :
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
- llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
+ llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem, ImmArg<2>]>;
}
// Vector convert
@@ -1222,30 +1238,30 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx512_fpclass_pd_128 :
Intrinsic<[llvm_v2i1_ty], [llvm_v2f64_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<1>]>;
def int_x86_avx512_fpclass_pd_256 :
Intrinsic<[llvm_v4i1_ty], [llvm_v4f64_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<1>]>;
def int_x86_avx512_fpclass_pd_512 :
Intrinsic<[llvm_v8i1_ty], [llvm_v8f64_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<1>]>;
def int_x86_avx512_fpclass_ps_128 :
Intrinsic<[llvm_v4i1_ty], [llvm_v4f32_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<1>]>;
def int_x86_avx512_fpclass_ps_256 :
Intrinsic<[llvm_v8i1_ty], [llvm_v8f32_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<1>]>;
def int_x86_avx512_fpclass_ps_512 :
Intrinsic<[llvm_v16i1_ty], [llvm_v16f32_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<1>]>;
def int_x86_avx512_mask_fpclass_sd :
GCCBuiltin<"__builtin_ia32_fpclasssd_mask">,
Intrinsic<[llvm_i8_ty], [llvm_v2f64_ty, llvm_i32_ty, llvm_i8_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<1>]>;
def int_x86_avx512_mask_fpclass_ss :
GCCBuiltin<"__builtin_ia32_fpclassss_mask">,
Intrinsic<[llvm_i8_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i8_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<1>]>;
}
// Vector extract sign mask
@@ -1328,6 +1344,12 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx2_pmadd_wd : GCCBuiltin<"__builtin_ia32_pmaddwd256">,
Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty,
llvm_v16i16_ty], [IntrNoMem, Commutative]>;
+ def int_x86_avx2_pavg_b : GCCBuiltin<"__builtin_ia32_pavgb256">,
+ Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
+ llvm_v32i8_ty], [IntrNoMem, Commutative]>;
+ def int_x86_avx2_pavg_w : GCCBuiltin<"__builtin_ia32_pavgw256">,
+ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
+ llvm_v16i16_ty], [IntrNoMem, Commutative]>;
def int_x86_avx2_psad_bw : GCCBuiltin<"__builtin_ia32_psadbw256">,
Intrinsic<[llvm_v4i64_ty], [llvm_v32i8_ty,
llvm_v32i8_ty], [IntrNoMem, Commutative]>;
@@ -1360,6 +1382,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
llvm_v4i32_ty], [IntrNoMem]>;
+ // Oddly these don't require an immediate due to a gcc compatibility issue.
def int_x86_avx2_pslli_w : GCCBuiltin<"__builtin_ia32_psllwi256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
llvm_i32_ty], [IntrNoMem]>;
@@ -1392,6 +1415,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
llvm_v2i64_ty], [IntrNoMem]>;
+ // Oddly these don't require an immediate due to a gcc compatibility issue.
def int_x86_avx512_psrai_q_128 : GCCBuiltin<"__builtin_ia32_psraqi128">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
llvm_i32_ty], [IntrNoMem]>;
@@ -1427,6 +1451,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
llvm_v2i64_ty], [IntrNoMem]>;
+ // Oddly these don't require an immediate due to a gcc compatibility issue.
def int_x86_avx512_pslli_w_512 : GCCBuiltin<"__builtin_ia32_psllwi512">,
Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
llvm_i32_ty], [IntrNoMem]>;
@@ -1677,71 +1702,73 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
// Gather ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
+ // NOTE: These can't be ArgMemOnly because you can put the address completely
+ // in the index register.
def int_x86_avx2_gather_d_pd : GCCBuiltin<"__builtin_ia32_gatherd_pd">,
Intrinsic<[llvm_v2f64_ty],
[llvm_v2f64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v2f64_ty, llvm_i8_ty],
- [IntrReadMem, IntrArgMemOnly]>;
+ [IntrReadMem, ImmArg<4>]>;
def int_x86_avx2_gather_d_pd_256 : GCCBuiltin<"__builtin_ia32_gatherd_pd256">,
Intrinsic<[llvm_v4f64_ty],
[llvm_v4f64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4f64_ty, llvm_i8_ty],
- [IntrReadMem, IntrArgMemOnly]>;
+ [IntrReadMem, ImmArg<4>]>;
def int_x86_avx2_gather_q_pd : GCCBuiltin<"__builtin_ia32_gatherq_pd">,
Intrinsic<[llvm_v2f64_ty],
[llvm_v2f64_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v2f64_ty, llvm_i8_ty],
- [IntrReadMem, IntrArgMemOnly]>;
+ [IntrReadMem, ImmArg<4>]>;
def int_x86_avx2_gather_q_pd_256 : GCCBuiltin<"__builtin_ia32_gatherq_pd256">,
Intrinsic<[llvm_v4f64_ty],
[llvm_v4f64_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4f64_ty, llvm_i8_ty],
- [IntrReadMem, IntrArgMemOnly]>;
+ [IntrReadMem, ImmArg<4>]>;
def int_x86_avx2_gather_d_ps : GCCBuiltin<"__builtin_ia32_gatherd_ps">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4f32_ty, llvm_i8_ty],
- [IntrReadMem, IntrArgMemOnly]>;
+ [IntrReadMem, ImmArg<4>]>;
def int_x86_avx2_gather_d_ps_256 : GCCBuiltin<"__builtin_ia32_gatherd_ps256">,
Intrinsic<[llvm_v8f32_ty],
[llvm_v8f32_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_v8f32_ty, llvm_i8_ty],
- [IntrReadMem, IntrArgMemOnly]>;
+ [IntrReadMem, ImmArg<4>]>;
def int_x86_avx2_gather_q_ps : GCCBuiltin<"__builtin_ia32_gatherq_ps">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v4f32_ty, llvm_i8_ty],
- [IntrReadMem, IntrArgMemOnly]>;
+ [IntrReadMem, ImmArg<4>]>;
def int_x86_avx2_gather_q_ps_256 : GCCBuiltin<"__builtin_ia32_gatherq_ps256">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4f32_ty, llvm_i8_ty],
- [IntrReadMem, IntrArgMemOnly]>;
+ [IntrReadMem, ImmArg<4>]>;
def int_x86_avx2_gather_d_q : GCCBuiltin<"__builtin_ia32_gatherd_q">,
Intrinsic<[llvm_v2i64_ty],
[llvm_v2i64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v2i64_ty, llvm_i8_ty],
- [IntrReadMem, IntrArgMemOnly]>;
+ [IntrReadMem, ImmArg<4>]>;
def int_x86_avx2_gather_d_q_256 : GCCBuiltin<"__builtin_ia32_gatherd_q256">,
Intrinsic<[llvm_v4i64_ty],
[llvm_v4i64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i64_ty, llvm_i8_ty],
- [IntrReadMem, IntrArgMemOnly]>;
+ [IntrReadMem, ImmArg<4>]>;
def int_x86_avx2_gather_q_q : GCCBuiltin<"__builtin_ia32_gatherq_q">,
Intrinsic<[llvm_v2i64_ty],
[llvm_v2i64_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty],
- [IntrReadMem, IntrArgMemOnly]>;
+ [IntrReadMem, ImmArg<4>]>;
def int_x86_avx2_gather_q_q_256 : GCCBuiltin<"__builtin_ia32_gatherq_q256">,
Intrinsic<[llvm_v4i64_ty],
[llvm_v4i64_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty],
- [IntrReadMem, IntrArgMemOnly]>;
+ [IntrReadMem, ImmArg<4>]>;
def int_x86_avx2_gather_d_d : GCCBuiltin<"__builtin_ia32_gatherd_d">,
Intrinsic<[llvm_v4i32_ty],
[llvm_v4i32_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty],
- [IntrReadMem, IntrArgMemOnly]>;
+ [IntrReadMem, ImmArg<4>]>;
def int_x86_avx2_gather_d_d_256 : GCCBuiltin<"__builtin_ia32_gatherd_d256">,
Intrinsic<[llvm_v8i32_ty],
[llvm_v8i32_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty],
- [IntrReadMem, IntrArgMemOnly]>;
+ [IntrReadMem, ImmArg<4>]>;
def int_x86_avx2_gather_q_d : GCCBuiltin<"__builtin_ia32_gatherq_d">,
Intrinsic<[llvm_v4i32_ty],
[llvm_v4i32_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v4i32_ty, llvm_i8_ty],
- [IntrReadMem, IntrArgMemOnly]>;
+ [IntrReadMem, ImmArg<4>]>;
def int_x86_avx2_gather_q_d_256 : GCCBuiltin<"__builtin_ia32_gatherq_d256">,
Intrinsic<[llvm_v4i32_ty],
[llvm_v4i32_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i32_ty, llvm_i8_ty],
- [IntrReadMem, IntrArgMemOnly]>;
+ [IntrReadMem, ImmArg<4>]>;
}
// Misc.
@@ -1753,7 +1780,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
llvm_v32i8_ty], [IntrNoMem]>;
def int_x86_avx2_mpsadbw : GCCBuiltin<"__builtin_ia32_mpsadbw256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty,
- llvm_i8_ty], [IntrNoMem, Commutative]>;
+ llvm_i8_ty], [IntrNoMem, Commutative, ImmArg<2>]>;
}
//===----------------------------------------------------------------------===//
@@ -1763,32 +1790,32 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx512_vfmadd_pd_512 :
Intrinsic<[llvm_v8f64_ty],
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<3>]>;
def int_x86_avx512_vfmadd_ps_512 :
Intrinsic<[llvm_v16f32_ty],
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<3>]>;
// TODO: Can we use 2 vfmadds+shufflevector?
def int_x86_avx512_vfmaddsub_pd_512 :
Intrinsic<[llvm_v8f64_ty],
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<3>]>;
def int_x86_avx512_vfmaddsub_ps_512 :
Intrinsic<[llvm_v16f32_ty],
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<3>]>;
def int_x86_avx512_vfmadd_f64 :
Intrinsic<[llvm_double_ty],
[llvm_double_ty, llvm_double_ty, llvm_double_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<3>]>;
def int_x86_avx512_vfmadd_f32 :
Intrinsic<[llvm_float_ty],
[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<3>]>;
def int_x86_avx512_vpmadd52h_uq_128 :
GCCBuiltin<"__builtin_ia32_vpmadd52huq128">,
@@ -1878,23 +1905,23 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_xop_vpermil2pd : GCCBuiltin<"__builtin_ia32_vpermil2pd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
llvm_v2i64_ty, llvm_i8_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<3>]>;
def int_x86_xop_vpermil2pd_256 :
GCCBuiltin<"__builtin_ia32_vpermil2pd256">,
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty,
llvm_v4i64_ty, llvm_i8_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<3>]>;
def int_x86_xop_vpermil2ps : GCCBuiltin<"__builtin_ia32_vpermil2ps">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
llvm_v4i32_ty, llvm_i8_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<3>]>;
def int_x86_xop_vpermil2ps_256 :
GCCBuiltin<"__builtin_ia32_vpermil2ps256">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty,
llvm_v8i32_ty, llvm_i8_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<3>]>;
def int_x86_xop_vfrcz_pd : GCCBuiltin<"__builtin_ia32_vfrczpd">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
@@ -1909,31 +1936,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_xop_vfrcz_ps_256 : GCCBuiltin<"__builtin_ia32_vfrczps256">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
- def int_x86_xop_vpcomb : GCCBuiltin<"__builtin_ia32_vpcomb">,
- Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
- llvm_i8_ty], [IntrNoMem]>;
- def int_x86_xop_vpcomw : GCCBuiltin<"__builtin_ia32_vpcomw">,
- Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
- llvm_i8_ty], [IntrNoMem]>;
- def int_x86_xop_vpcomd : GCCBuiltin<"__builtin_ia32_vpcomd">,
- Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
- llvm_i8_ty], [IntrNoMem]>;
- def int_x86_xop_vpcomq : GCCBuiltin<"__builtin_ia32_vpcomq">,
- Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
- llvm_i8_ty], [IntrNoMem]>;
- def int_x86_xop_vpcomub : GCCBuiltin<"__builtin_ia32_vpcomub">,
- Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
- llvm_i8_ty], [IntrNoMem]>;
- def int_x86_xop_vpcomuw : GCCBuiltin<"__builtin_ia32_vpcomuw">,
- Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
- llvm_i8_ty], [IntrNoMem]>;
- def int_x86_xop_vpcomud : GCCBuiltin<"__builtin_ia32_vpcomud">,
- Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
- llvm_i8_ty], [IntrNoMem]>;
- def int_x86_xop_vpcomuq : GCCBuiltin<"__builtin_ia32_vpcomuq">,
- Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
- llvm_i8_ty], [IntrNoMem]>;
-
def int_x86_xop_vphaddbd :
GCCBuiltin<"__builtin_ia32_vphaddbd">,
Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty], [IntrNoMem]>;
@@ -2261,6 +2263,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_x86mmx_ty], [IntrNoMem]>;
+ // Oddly these don't require an immediate due to a gcc compatibility issue.
def int_x86_mmx_pslli_w : GCCBuiltin<"__builtin_ia32_psllwi">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
llvm_i32_ty], [IntrNoMem]>;
@@ -2398,15 +2401,15 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_mmx_palignr_b : GCCBuiltin<"__builtin_ia32_palignr">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
- llvm_x86mmx_ty, llvm_i8_ty], [IntrNoMem]>;
+ llvm_x86mmx_ty, llvm_i8_ty], [IntrNoMem, ImmArg<2>]>;
def int_x86_mmx_pextr_w : GCCBuiltin<"__builtin_ia32_vec_ext_v4hi">,
Intrinsic<[llvm_i32_ty], [llvm_x86mmx_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<1>]>;
def int_x86_mmx_pinsr_w : GCCBuiltin<"__builtin_ia32_vec_set_v4hi">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
- llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ llvm_i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<2>]>;
}
//===----------------------------------------------------------------------===//
@@ -2527,13 +2530,14 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
Intrinsic<[llvm_v8f32_ty], [llvm_v8i16_ty], [IntrNoMem]>;
def int_x86_vcvtps2ph_128 : GCCBuiltin<"__builtin_ia32_vcvtps2ph">,
Intrinsic<[llvm_v8i16_ty], [llvm_v4f32_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<1>]>;
def int_x86_vcvtps2ph_256 : GCCBuiltin<"__builtin_ia32_vcvtps2ph256">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8f32_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<1>]>;
def int_x86_avx512_mask_vcvtph2ps_512 : GCCBuiltin<"__builtin_ia32_vcvtph2ps512_mask">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16i16_ty, llvm_v16f32_ty,
- llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
+ llvm_i16_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<3>]>;
def int_x86_avx512_mask_vcvtph2ps_256 : GCCBuiltin<"__builtin_ia32_vcvtph2ps256_mask">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8i16_ty, llvm_v8f32_ty,
llvm_i8_ty], [IntrNoMem]>;
@@ -2542,13 +2546,16 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_vcvtps2ph_512 : GCCBuiltin<"__builtin_ia32_vcvtps2ph512_mask">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16f32_ty, llvm_i32_ty,
- llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
+ llvm_v16i16_ty, llvm_i16_ty],
+ [IntrNoMem, ImmArg<1>]>;
def int_x86_avx512_mask_vcvtps2ph_256 : GCCBuiltin<"__builtin_ia32_vcvtps2ph256_mask">,
Intrinsic<[llvm_v8i16_ty], [llvm_v8f32_ty, llvm_i32_ty,
- llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
+ llvm_v8i16_ty, llvm_i8_ty],
+ [IntrNoMem, ImmArg<1>]>;
def int_x86_avx512_mask_vcvtps2ph_128 : GCCBuiltin<"__builtin_ia32_vcvtps2ph_mask">,
Intrinsic<[llvm_v8i16_ty], [llvm_v4f32_ty, llvm_i32_ty,
- llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
+ llvm_v8i16_ty, llvm_i8_ty],
+ [IntrNoMem, ImmArg<1>]>;
}
//===----------------------------------------------------------------------===//
@@ -2556,9 +2563,11 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_tbm_bextri_u32 : GCCBuiltin<"__builtin_ia32_bextri_u32">,
- Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<1>]>;
def int_x86_tbm_bextri_u64 : GCCBuiltin<"__builtin_ia32_bextri_u64">,
- Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
+ [IntrNoMem, ImmArg<1>]>;
}
//===----------------------------------------------------------------------===//
@@ -2604,7 +2613,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_xend : GCCBuiltin<"__builtin_ia32_xend">,
Intrinsic<[], [], []>;
def int_x86_xabort : GCCBuiltin<"__builtin_ia32_xabort">,
- Intrinsic<[], [llvm_i8_ty], []>;
+ Intrinsic<[], [llvm_i8_ty], [ImmArg<0>]>;
def int_x86_xtest : GCCBuiltin<"__builtin_ia32_xtest">,
Intrinsic<[llvm_i32_ty], [], []>;
}
@@ -2645,55 +2654,71 @@ let TargetPrefix = "x86" in {
// Conversion ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx512_cvttss2si : GCCBuiltin<"__builtin_ia32_vcvttss2si32">,
- Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<1>]>;
def int_x86_avx512_cvttss2si64 : GCCBuiltin<"__builtin_ia32_vcvttss2si64">,
- Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<1>]>;
def int_x86_avx512_cvttss2usi : GCCBuiltin<"__builtin_ia32_vcvttss2usi32">,
- Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<1>]>;
def int_x86_avx512_cvttss2usi64 : GCCBuiltin<"__builtin_ia32_vcvttss2usi64">,
- Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<1>]>;
def int_x86_avx512_cvtusi2ss : GCCBuiltin<"__builtin_ia32_cvtusi2ss32">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
- llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ llvm_i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<2>]>;
def int_x86_avx512_cvtusi642ss : GCCBuiltin<"__builtin_ia32_cvtusi2ss64">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
- llvm_i64_ty, llvm_i32_ty], [IntrNoMem]>;
+ llvm_i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<2>]>;
def int_x86_avx512_cvttsd2si : GCCBuiltin<"__builtin_ia32_vcvttsd2si32">,
- Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<1>]>;
def int_x86_avx512_cvttsd2si64 : GCCBuiltin<"__builtin_ia32_vcvttsd2si64">,
- Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<1>]>;
def int_x86_avx512_cvttsd2usi : GCCBuiltin<"__builtin_ia32_vcvttsd2usi32">,
- Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<1>]>;
def int_x86_avx512_cvttsd2usi64 : GCCBuiltin<"__builtin_ia32_vcvttsd2usi64">,
- Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<1>]>;
def int_x86_avx512_cvtusi642sd : GCCBuiltin<"__builtin_ia32_cvtusi2sd64">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
- llvm_i64_ty, llvm_i32_ty], [IntrNoMem]>;
+ llvm_i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<2>]>;
def int_x86_avx512_vcvtss2usi32 : GCCBuiltin<"__builtin_ia32_vcvtss2usi32">,
- Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<1>]>;
def int_x86_avx512_vcvtss2usi64 : GCCBuiltin<"__builtin_ia32_vcvtss2usi64">,
- Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<1>]>;
def int_x86_avx512_vcvtss2si32 : GCCBuiltin<"__builtin_ia32_vcvtss2si32">,
- Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<1>]>;
def int_x86_avx512_vcvtss2si64 : GCCBuiltin<"__builtin_ia32_vcvtss2si64">,
- Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<1>]>;
def int_x86_avx512_vcvtsd2usi32 : GCCBuiltin<"__builtin_ia32_vcvtsd2usi32">,
- Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<1>]>;
def int_x86_avx512_vcvtsd2usi64 : GCCBuiltin<"__builtin_ia32_vcvtsd2usi64">,
- Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<1>]>;
def int_x86_avx512_vcvtsd2si32 : GCCBuiltin<"__builtin_ia32_vcvtsd2si32">,
- Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<1>]>;
def int_x86_avx512_vcvtsd2si64 : GCCBuiltin<"__builtin_ia32_vcvtsd2si64">,
- Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<1>]>;
def int_x86_avx512_cvtsi2ss32 : GCCBuiltin<"__builtin_ia32_cvtsi2ss32">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
- llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ llvm_i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<2>]>;
def int_x86_avx512_cvtsi2ss64 : GCCBuiltin<"__builtin_ia32_cvtsi2ss64">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
- llvm_i64_ty, llvm_i32_ty], [IntrNoMem]>;
+ llvm_i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<2>]>;
def int_x86_avx512_cvtsi2sd64 : GCCBuiltin<"__builtin_ia32_cvtsi2sd64">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
- llvm_i64_ty, llvm_i32_ty], [IntrNoMem]>;
+ llvm_i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<2>]>;
}
// Pack ops.
@@ -2714,11 +2739,13 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
// Vector convert
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
- def int_x86_avx512_mask_cvtdq2ps_512 :
- GCCBuiltin<"__builtin_ia32_cvtdq2ps512_mask">,
- Intrinsic<[llvm_v16f32_ty],
- [llvm_v16i32_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ def int_x86_avx512_sitofp_round :
+ Intrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<1>]>;
+
+ def int_x86_avx512_uitofp_round :
+ Intrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<1>]>;
def int_x86_avx512_mask_cvtpd2dq_128 :
GCCBuiltin<"__builtin_ia32_cvtpd2dq128_mask">,
@@ -2730,25 +2757,25 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
GCCBuiltin<"__builtin_ia32_cvtpd2dq512_mask">,
Intrinsic<[llvm_v8i32_ty],
[llvm_v8f64_ty, llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<3>]>;
def int_x86_avx512_mask_cvtpd2ps_512 :
GCCBuiltin<"__builtin_ia32_cvtpd2ps512_mask">,
Intrinsic<[llvm_v8f32_ty],
[llvm_v8f64_ty, llvm_v8f32_ty, llvm_i8_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<3>]>;
def int_x86_avx512_mask_cvtsd2ss_round :
GCCBuiltin<"__builtin_ia32_cvtsd2ss_round_mask">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_v2f64_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<4>]>;
def int_x86_avx512_mask_cvtss2sd_round :
GCCBuiltin<"__builtin_ia32_cvtss2sd_round_mask">,
Intrinsic<[llvm_v2f64_ty],
[llvm_v2f64_ty, llvm_v4f32_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<4>]>;
def int_x86_avx512_mask_cvtpd2ps :
GCCBuiltin<"__builtin_ia32_cvtpd2ps_mask">,
@@ -2772,7 +2799,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
GCCBuiltin<"__builtin_ia32_cvtpd2qq512_mask">,
Intrinsic<[llvm_v8i64_ty],
[llvm_v8f64_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<3>]>;
def int_x86_avx512_mask_cvtpd2udq_128 :
GCCBuiltin<"__builtin_ia32_cvtpd2udq128_mask">,
@@ -2790,7 +2817,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
GCCBuiltin<"__builtin_ia32_cvtpd2udq512_mask">,
Intrinsic<[llvm_v8i32_ty],
[llvm_v8f64_ty, llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<3>]>;
def int_x86_avx512_mask_cvtpd2uqq_128 :
GCCBuiltin<"__builtin_ia32_cvtpd2uqq128_mask">,
@@ -2808,7 +2835,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
GCCBuiltin<"__builtin_ia32_cvtpd2uqq512_mask">,
Intrinsic<[llvm_v8i64_ty],
[llvm_v8f64_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<3>]>;
def int_x86_avx512_mask_cvtps2dq_128 :
GCCBuiltin<"__builtin_ia32_cvtps2dq128_mask">,
@@ -2826,13 +2853,13 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
GCCBuiltin<"__builtin_ia32_cvtps2dq512_mask">,
Intrinsic<[llvm_v16i32_ty],
[llvm_v16f32_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<3>]>;
def int_x86_avx512_mask_cvtps2pd_512 :
GCCBuiltin<"__builtin_ia32_cvtps2pd512_mask">,
Intrinsic<[llvm_v8f64_ty],
[llvm_v8f32_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<3>]>;
def int_x86_avx512_mask_cvtps2qq_128 :
GCCBuiltin<"__builtin_ia32_cvtps2qq128_mask">,
@@ -2850,7 +2877,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
GCCBuiltin<"__builtin_ia32_cvtps2qq512_mask">,
Intrinsic<[llvm_v8i64_ty],
[llvm_v8f32_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<3>]>;
def int_x86_avx512_mask_cvtps2udq_128 :
GCCBuiltin<"__builtin_ia32_cvtps2udq128_mask">,
@@ -2868,7 +2895,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
GCCBuiltin<"__builtin_ia32_cvtps2udq512_mask">,
Intrinsic<[llvm_v16i32_ty],
[llvm_v16f32_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<3>]>;
def int_x86_avx512_mask_cvtps2uqq_128 :
GCCBuiltin<"__builtin_ia32_cvtps2uqq128_mask">,
@@ -2886,13 +2913,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
GCCBuiltin<"__builtin_ia32_cvtps2uqq512_mask">,
Intrinsic<[llvm_v8i64_ty],
[llvm_v8f32_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty],
- [IntrNoMem]>;
-
- def int_x86_avx512_mask_cvtqq2pd_512 :
- GCCBuiltin<"__builtin_ia32_cvtqq2pd512_mask">,
- Intrinsic<[llvm_v8f64_ty],
- [llvm_v8i64_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<3>]>;
def int_x86_avx512_mask_cvtqq2ps_128 :
GCCBuiltin<"__builtin_ia32_cvtqq2ps128_mask">,
@@ -2900,18 +2921,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
[llvm_v2i64_ty, llvm_v4f32_ty, llvm_i8_ty],
[IntrNoMem]>;
- def int_x86_avx512_mask_cvtqq2ps_256 :
- GCCBuiltin<"__builtin_ia32_cvtqq2ps256_mask">,
- Intrinsic<[llvm_v4f32_ty],
- [llvm_v4i64_ty, llvm_v4f32_ty, llvm_i8_ty],
- [IntrNoMem]>;
-
- def int_x86_avx512_mask_cvtqq2ps_512 :
- GCCBuiltin<"__builtin_ia32_cvtqq2ps512_mask">,
- Intrinsic<[llvm_v8f32_ty],
- [llvm_v8i64_ty, llvm_v8f32_ty, llvm_i8_ty, llvm_i32_ty],
- [IntrNoMem]>;
-
def int_x86_avx512_mask_cvttpd2dq_128 :
GCCBuiltin<"__builtin_ia32_cvttpd2dq128_mask">,
Intrinsic<[llvm_v4i32_ty],
@@ -2922,7 +2931,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
GCCBuiltin<"__builtin_ia32_cvttpd2dq512_mask">,
Intrinsic<[llvm_v8i32_ty],
[llvm_v8f64_ty, llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<3>]>;
def int_x86_avx512_mask_cvttpd2qq_128 :
GCCBuiltin<"__builtin_ia32_cvttpd2qq128_mask">,
@@ -2940,7 +2949,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
GCCBuiltin<"__builtin_ia32_cvttpd2qq512_mask">,
Intrinsic<[llvm_v8i64_ty],
[llvm_v8f64_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<3>]>;
def int_x86_avx512_mask_cvttpd2udq_128 :
GCCBuiltin<"__builtin_ia32_cvttpd2udq128_mask">,
@@ -2958,7 +2967,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
GCCBuiltin<"__builtin_ia32_cvttpd2udq512_mask">,
Intrinsic<[llvm_v8i32_ty],
[llvm_v8f64_ty, llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<3>]>;
def int_x86_avx512_mask_cvttpd2uqq_128 :
GCCBuiltin<"__builtin_ia32_cvttpd2uqq128_mask">,
@@ -2976,13 +2985,13 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
GCCBuiltin<"__builtin_ia32_cvttpd2uqq512_mask">,
Intrinsic<[llvm_v8i64_ty],
[llvm_v8f64_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<3>]>;
def int_x86_avx512_mask_cvttps2dq_512 :
GCCBuiltin<"__builtin_ia32_cvttps2dq512_mask">,
Intrinsic<[llvm_v16i32_ty],
[llvm_v16f32_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<3>]>;
def int_x86_avx512_mask_cvttps2qq_128 :
GCCBuiltin<"__builtin_ia32_cvttps2qq128_mask">,
@@ -3000,7 +3009,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
GCCBuiltin<"__builtin_ia32_cvttps2qq512_mask">,
Intrinsic<[llvm_v8i64_ty],
[llvm_v8f32_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<3>]>;
def int_x86_avx512_mask_cvttps2udq_128 :
GCCBuiltin<"__builtin_ia32_cvttps2udq128_mask">,
@@ -3018,7 +3027,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
GCCBuiltin<"__builtin_ia32_cvttps2udq512_mask">,
Intrinsic<[llvm_v16i32_ty],
[llvm_v16f32_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<3>]>;
def int_x86_avx512_mask_cvttps2uqq_128 :
GCCBuiltin<"__builtin_ia32_cvttps2uqq128_mask">,
@@ -3036,19 +3045,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
GCCBuiltin<"__builtin_ia32_cvttps2uqq512_mask">,
Intrinsic<[llvm_v8i64_ty],
[llvm_v8f32_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty],
- [IntrNoMem]>;
-
- def int_x86_avx512_mask_cvtudq2ps_512 :
- GCCBuiltin<"__builtin_ia32_cvtudq2ps512_mask">,
- Intrinsic<[llvm_v16f32_ty],
- [llvm_v16i32_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty],
- [IntrNoMem]>;
-
- def int_x86_avx512_mask_cvtuqq2pd_512 :
- GCCBuiltin<"__builtin_ia32_cvtuqq2pd512_mask">,
- Intrinsic<[llvm_v8f64_ty],
- [llvm_v8i64_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<3>]>;
def int_x86_avx512_mask_cvtuqq2ps_128 :
GCCBuiltin<"__builtin_ia32_cvtuqq2ps128_mask">,
@@ -3056,72 +3053,78 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
[llvm_v2i64_ty, llvm_v4f32_ty, llvm_i8_ty],
[IntrNoMem]>;
- def int_x86_avx512_mask_cvtuqq2ps_256 :
- GCCBuiltin<"__builtin_ia32_cvtuqq2ps256_mask">,
- Intrinsic<[llvm_v4f32_ty],
- [llvm_v4i64_ty, llvm_v4f32_ty, llvm_i8_ty],
- [IntrNoMem]>;
-
- def int_x86_avx512_mask_cvtuqq2ps_512 :
- GCCBuiltin<"__builtin_ia32_cvtuqq2ps512_mask">,
- Intrinsic<[llvm_v8f32_ty],
- [llvm_v8i64_ty, llvm_v8f32_ty, llvm_i8_ty, llvm_i32_ty],
- [IntrNoMem]>;
-
def int_x86_avx512_mask_rndscale_pd_128 : GCCBuiltin<"__builtin_ia32_rndscalepd_128_mask">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_i32_ty,
- llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>;
+ llvm_v2f64_ty, llvm_i8_ty],
+ [IntrNoMem, ImmArg<1>]>;
def int_x86_avx512_mask_rndscale_pd_256 : GCCBuiltin<"__builtin_ia32_rndscalepd_256_mask">,
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_i32_ty,
- llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
+ llvm_v4f64_ty, llvm_i8_ty],
+ [IntrNoMem, ImmArg<1>]>;
def int_x86_avx512_mask_rndscale_pd_512 : GCCBuiltin<"__builtin_ia32_rndscalepd_mask">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_i32_ty, llvm_v8f64_ty,
- llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ llvm_i8_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<1>, ImmArg<4>]>;
def int_x86_avx512_mask_rndscale_ps_128 : GCCBuiltin<"__builtin_ia32_rndscaleps_128_mask">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty,
- llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
+ llvm_v4f32_ty, llvm_i8_ty],
+ [IntrNoMem, ImmArg<1>]>;
def int_x86_avx512_mask_rndscale_ps_256 : GCCBuiltin<"__builtin_ia32_rndscaleps_256_mask">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_i32_ty,
- llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
+ llvm_v8f32_ty, llvm_i8_ty],
+ [IntrNoMem, ImmArg<1>]>;
def int_x86_avx512_mask_rndscale_ps_512 : GCCBuiltin<"__builtin_ia32_rndscaleps_mask">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_i32_ty, llvm_v16f32_ty,
- llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
+ llvm_i16_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<1>, ImmArg<4>]>;
def int_x86_avx512_mask_reduce_pd_128 : GCCBuiltin<"__builtin_ia32_reducepd128_mask">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_i32_ty,
- llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>;
+ llvm_v2f64_ty, llvm_i8_ty],
+ [IntrNoMem, ImmArg<1>]>;
def int_x86_avx512_mask_reduce_pd_256 : GCCBuiltin<"__builtin_ia32_reducepd256_mask">,
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_i32_ty,
- llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
+ llvm_v4f64_ty, llvm_i8_ty],
+ [IntrNoMem, ImmArg<1>]>;
def int_x86_avx512_mask_reduce_pd_512 : GCCBuiltin<"__builtin_ia32_reducepd512_mask">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_i32_ty, llvm_v8f64_ty,
- llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ llvm_i8_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<1>, ImmArg<4>]>;
def int_x86_avx512_mask_reduce_ps_128 : GCCBuiltin<"__builtin_ia32_reduceps128_mask">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty,
- llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
+ llvm_v4f32_ty, llvm_i8_ty],
+ [IntrNoMem, ImmArg<1>]>;
def int_x86_avx512_mask_reduce_ps_256 : GCCBuiltin<"__builtin_ia32_reduceps256_mask">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_i32_ty,
- llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
+ llvm_v8f32_ty, llvm_i8_ty],
+ [IntrNoMem, ImmArg<1>]>;
def int_x86_avx512_mask_reduce_ps_512 : GCCBuiltin<"__builtin_ia32_reduceps512_mask">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_i32_ty, llvm_v16f32_ty,
- llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
+ llvm_i16_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<1>, ImmArg<4>]>;
def int_x86_avx512_mask_range_pd_128 : GCCBuiltin<"__builtin_ia32_rangepd128_mask">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty,
- llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>;
+ llvm_v2f64_ty, llvm_i8_ty],
+ [IntrNoMem, ImmArg<2>]>;
def int_x86_avx512_mask_range_pd_256 : GCCBuiltin<"__builtin_ia32_rangepd256_mask">,
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_i32_ty,
- llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
+ llvm_v4f64_ty, llvm_i8_ty],
+ [IntrNoMem, ImmArg<2>]>;
def int_x86_avx512_mask_range_pd_512 : GCCBuiltin<"__builtin_ia32_rangepd512_mask">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i32_ty,
- llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<2>, ImmArg<5>]>;
def int_x86_avx512_mask_range_ps_128 : GCCBuiltin<"__builtin_ia32_rangeps128_mask">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty,
- llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
+ llvm_v4f32_ty, llvm_i8_ty],
+ [IntrNoMem, ImmArg<2>]>;
def int_x86_avx512_mask_range_ps_256 : GCCBuiltin<"__builtin_ia32_rangeps256_mask">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_i32_ty,
- llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
+ llvm_v8f32_ty, llvm_i8_ty],
+ [IntrNoMem, ImmArg<2>]>;
def int_x86_avx512_mask_range_ps_512 : GCCBuiltin<"__builtin_ia32_rangeps512_mask">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i32_ty,
- llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
+ llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<2>, ImmArg<5>]>;
}
// Vector load with broadcast
@@ -3151,109 +3154,111 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx512_add_ps_512 : GCCBuiltin<"__builtin_ia32_addps512">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
- llvm_i32_ty], [IntrNoMem]>;
+ llvm_i32_ty], [IntrNoMem, ImmArg<2>]>;
def int_x86_avx512_add_pd_512 : GCCBuiltin<"__builtin_ia32_addpd512">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
- llvm_i32_ty], [IntrNoMem]>;
+ llvm_i32_ty], [IntrNoMem, ImmArg<2>]>;
def int_x86_avx512_sub_ps_512 : GCCBuiltin<"__builtin_ia32_subps512">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
- llvm_i32_ty], [IntrNoMem]>;
+ llvm_i32_ty], [IntrNoMem, ImmArg<2>]>;
def int_x86_avx512_sub_pd_512 : GCCBuiltin<"__builtin_ia32_subpd512">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
- llvm_i32_ty], [IntrNoMem]>;
+ llvm_i32_ty], [IntrNoMem, ImmArg<2>]>;
def int_x86_avx512_mul_ps_512 : GCCBuiltin<"__builtin_ia32_mulps512">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
- llvm_i32_ty], [IntrNoMem]>;
+ llvm_i32_ty], [IntrNoMem, ImmArg<2>]>;
def int_x86_avx512_mul_pd_512 : GCCBuiltin<"__builtin_ia32_mulpd512">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
- llvm_i32_ty], [IntrNoMem]>;
+ llvm_i32_ty], [IntrNoMem, ImmArg<2>]>;
def int_x86_avx512_div_ps_512 : GCCBuiltin<"__builtin_ia32_divps512">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
- llvm_i32_ty], [IntrNoMem]>;
+ llvm_i32_ty], [IntrNoMem, ImmArg<2>]>;
def int_x86_avx512_div_pd_512 : GCCBuiltin<"__builtin_ia32_divpd512">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
- llvm_i32_ty], [IntrNoMem]>;
+ llvm_i32_ty], [IntrNoMem, ImmArg<2>]>;
def int_x86_avx512_max_ps_512 : GCCBuiltin<"__builtin_ia32_maxps512">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
- llvm_i32_ty], [IntrNoMem]>;
+ llvm_i32_ty], [IntrNoMem, ImmArg<2>]>;
def int_x86_avx512_max_pd_512 : GCCBuiltin<"__builtin_ia32_maxpd512">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
- llvm_i32_ty], [IntrNoMem]>;
+ llvm_i32_ty], [IntrNoMem, ImmArg<2>]>;
def int_x86_avx512_min_ps_512 : GCCBuiltin<"__builtin_ia32_minps512">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
- llvm_i32_ty], [IntrNoMem]>;
+ llvm_i32_ty], [IntrNoMem, ImmArg<2>]>;
def int_x86_avx512_min_pd_512 : GCCBuiltin<"__builtin_ia32_minpd512">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
- llvm_i32_ty], [IntrNoMem]>;
+ llvm_i32_ty], [IntrNoMem, ImmArg<2>]>;
def int_x86_avx512_mask_add_ss_round : GCCBuiltin<"__builtin_ia32_addss_round_mask">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
- llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<4>]>;
def int_x86_avx512_mask_div_ss_round : GCCBuiltin<"__builtin_ia32_divss_round_mask">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
- llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<4>]>;
def int_x86_avx512_mask_mul_ss_round : GCCBuiltin<"__builtin_ia32_mulss_round_mask">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
- llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<4>]>;
def int_x86_avx512_mask_sub_ss_round : GCCBuiltin<"__builtin_ia32_subss_round_mask">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
- llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<4>]>;
def int_x86_avx512_mask_max_ss_round : GCCBuiltin<"__builtin_ia32_maxss_round_mask">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
- llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<4>]>;
def int_x86_avx512_mask_min_ss_round : GCCBuiltin<"__builtin_ia32_minss_round_mask">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
- llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<4>]>;
def int_x86_avx512_mask_add_sd_round : GCCBuiltin<"__builtin_ia32_addsd_round_mask">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
- llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<4>]>;
def int_x86_avx512_mask_div_sd_round : GCCBuiltin<"__builtin_ia32_divsd_round_mask">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
- llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<4>]>;
def int_x86_avx512_mask_mul_sd_round : GCCBuiltin<"__builtin_ia32_mulsd_round_mask">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
- llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<4>]>;
def int_x86_avx512_mask_sub_sd_round : GCCBuiltin<"__builtin_ia32_subsd_round_mask">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
- llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<4>]>;
def int_x86_avx512_mask_max_sd_round : GCCBuiltin<"__builtin_ia32_maxsd_round_mask">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
- llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<4>]>;
def int_x86_avx512_mask_min_sd_round : GCCBuiltin<"__builtin_ia32_minsd_round_mask">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
- llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<4>]>;
def int_x86_avx512_mask_rndscale_ss : GCCBuiltin<"__builtin_ia32_rndscaless_round_mask">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
llvm_i8_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<4>, ImmArg<5>]>;
def int_x86_avx512_mask_rndscale_sd : GCCBuiltin<"__builtin_ia32_rndscalesd_round_mask">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
llvm_i8_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<4>, ImmArg<5>]>;
def int_x86_avx512_mask_range_ss : GCCBuiltin<"__builtin_ia32_rangess128_round_mask">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
llvm_i8_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<4>, ImmArg<5>]>;
def int_x86_avx512_mask_range_sd : GCCBuiltin<"__builtin_ia32_rangesd128_round_mask">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
llvm_i8_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<4>, ImmArg<5>]>;
def int_x86_avx512_mask_reduce_ss : GCCBuiltin<"__builtin_ia32_reducess_mask">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
llvm_i8_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<4>, ImmArg<5>]>;
def int_x86_avx512_mask_reduce_sd : GCCBuiltin<"__builtin_ia32_reducesd_mask">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
llvm_i8_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<4>, ImmArg<5>]>;
def int_x86_avx512_mask_scalef_sd : GCCBuiltin<"__builtin_ia32_scalefsd_round_mask">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
- llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<4>]>;
def int_x86_avx512_mask_scalef_ss : GCCBuiltin<"__builtin_ia32_scalefss_round_mask">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
- llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<4>]>;
def int_x86_avx512_mask_scalef_pd_128 : GCCBuiltin<"__builtin_ia32_scalefpd128_mask">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>;
@@ -3262,7 +3267,8 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
llvm_v4f64_ty, llvm_i8_ty],[IntrNoMem]>;
def int_x86_avx512_mask_scalef_pd_512 : GCCBuiltin<"__builtin_ia32_scalefpd512_mask">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
- llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<4>]>;
def int_x86_avx512_mask_scalef_ps_128 : GCCBuiltin<"__builtin_ia32_scalefps128_mask">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
@@ -3271,99 +3277,104 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_scalef_ps_512 : GCCBuiltin<"__builtin_ia32_scalefps512_mask">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
- llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
+ llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<4>]>;
def int_x86_avx512_mask_sqrt_ss :
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
- llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ llvm_i8_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<4>]>;
def int_x86_avx512_mask_sqrt_sd :
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
- llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ llvm_i8_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<4>]>;
def int_x86_avx512_sqrt_pd_512 :
- Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<1>]>;
def int_x86_avx512_sqrt_ps_512 :
- Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<1>]>;
def int_x86_avx512_mask_fixupimm_pd_128 :
GCCBuiltin<"__builtin_ia32_fixupimmpd128_mask">,
Intrinsic<[llvm_v2f64_ty],
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_i8_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<3>]>;
def int_x86_avx512_maskz_fixupimm_pd_128 :
GCCBuiltin<"__builtin_ia32_fixupimmpd128_maskz">,
Intrinsic<[llvm_v2f64_ty],
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_i8_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<3>]>;
def int_x86_avx512_mask_fixupimm_pd_256 :
GCCBuiltin<"__builtin_ia32_fixupimmpd256_mask">,
Intrinsic<[llvm_v4f64_ty],
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4i64_ty, llvm_i32_ty, llvm_i8_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<3>]>;
def int_x86_avx512_maskz_fixupimm_pd_256 :
GCCBuiltin<"__builtin_ia32_fixupimmpd256_maskz">,
Intrinsic<[llvm_v4f64_ty],
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4i64_ty, llvm_i32_ty, llvm_i8_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<3>]>;
def int_x86_avx512_mask_fixupimm_pd_512 :
GCCBuiltin<"__builtin_ia32_fixupimmpd512_mask">,
Intrinsic<[llvm_v8f64_ty],
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8i64_ty, llvm_i32_ty, llvm_i8_ty,
- llvm_i32_ty], [IntrNoMem]>;
+ llvm_i32_ty], [IntrNoMem, ImmArg<3>, ImmArg<5>]>;
def int_x86_avx512_maskz_fixupimm_pd_512 :
GCCBuiltin<"__builtin_ia32_fixupimmpd512_maskz">,
Intrinsic<[llvm_v8f64_ty],
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8i64_ty, llvm_i32_ty, llvm_i8_ty,
- llvm_i32_ty], [IntrNoMem]>;
+ llvm_i32_ty], [IntrNoMem, ImmArg<3>, ImmArg<5>]>;
def int_x86_avx512_mask_fixupimm_ps_128 :
GCCBuiltin<"__builtin_ia32_fixupimmps128_mask">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_i8_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<3>]>;
def int_x86_avx512_maskz_fixupimm_ps_128 :
GCCBuiltin<"__builtin_ia32_fixupimmps128_maskz">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_i8_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<3>]>;
def int_x86_avx512_mask_fixupimm_ps_256 :
GCCBuiltin<"__builtin_ia32_fixupimmps256_mask">,
Intrinsic<[llvm_v8f32_ty],
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8i32_ty, llvm_i32_ty, llvm_i8_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<3>]>;
def int_x86_avx512_maskz_fixupimm_ps_256 :
GCCBuiltin<"__builtin_ia32_fixupimmps256_maskz">,
Intrinsic<[llvm_v8f32_ty],
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8i32_ty, llvm_i32_ty, llvm_i8_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<3>]>;
def int_x86_avx512_mask_fixupimm_ps_512 :
GCCBuiltin<"__builtin_ia32_fixupimmps512_mask">,
Intrinsic<[llvm_v16f32_ty],
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16i32_ty, llvm_i32_ty,
- llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
+ llvm_i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<3>, ImmArg<5>]>;
def int_x86_avx512_maskz_fixupimm_ps_512 :
GCCBuiltin<"__builtin_ia32_fixupimmps512_maskz">,
Intrinsic<[llvm_v16f32_ty],
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16i32_ty, llvm_i32_ty,
- llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
+ llvm_i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<3>, ImmArg<5>]>;
def int_x86_avx512_mask_fixupimm_sd :
GCCBuiltin<"__builtin_ia32_fixupimmsd_mask">,
Intrinsic<[llvm_v2f64_ty],
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_i8_ty,
- llvm_i32_ty], [IntrNoMem]>;
+ llvm_i32_ty], [IntrNoMem, ImmArg<3>, ImmArg<5>]>;
def int_x86_avx512_maskz_fixupimm_sd :
GCCBuiltin<"__builtin_ia32_fixupimmsd_maskz">,
Intrinsic<[llvm_v2f64_ty],
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_i8_ty,
- llvm_i32_ty], [IntrNoMem]>;
+ llvm_i32_ty], [IntrNoMem, ImmArg<3>, ImmArg<5>]>;
def int_x86_avx512_mask_fixupimm_ss :
GCCBuiltin<"__builtin_ia32_fixupimmss_mask">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_i8_ty,
- llvm_i32_ty], [IntrNoMem]>;
+ llvm_i32_ty], [IntrNoMem, ImmArg<3>, ImmArg<5>]>;
def int_x86_avx512_maskz_fixupimm_ss :
GCCBuiltin<"__builtin_ia32_fixupimmss_maskz">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_i8_ty,
- llvm_i32_ty], [IntrNoMem]>;
+ llvm_i32_ty], [IntrNoMem, ImmArg<3>, ImmArg<5>]>;
def int_x86_avx512_mask_getexp_pd_128 : GCCBuiltin<"__builtin_ia32_getexppd128_mask">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
llvm_i8_ty], [IntrNoMem]>;
@@ -3372,7 +3383,8 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_getexp_pd_512 : GCCBuiltin<"__builtin_ia32_getexppd512_mask">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
- llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ llvm_i8_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<3>]>;
def int_x86_avx512_mask_getexp_ps_128 : GCCBuiltin<"__builtin_ia32_getexpps128_mask">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
llvm_i8_ty], [IntrNoMem]>;
@@ -3381,62 +3393,65 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_getexp_ps_512 : GCCBuiltin<"__builtin_ia32_getexpps512_mask">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
- llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
+ llvm_i16_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<3>]>;
def int_x86_avx512_mask_getexp_ss : GCCBuiltin<"__builtin_ia32_getexpss128_round_mask">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
- llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ llvm_i8_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<4>]>;
def int_x86_avx512_mask_getexp_sd : GCCBuiltin<"__builtin_ia32_getexpsd128_round_mask">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
- llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ llvm_i8_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<4>]>;
def int_x86_avx512_mask_getmant_pd_128 :
GCCBuiltin<"__builtin_ia32_getmantpd128_mask">,
Intrinsic<[llvm_v2f64_ty],
[llvm_v2f64_ty,llvm_i32_ty, llvm_v2f64_ty, llvm_i8_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<1>]>;
def int_x86_avx512_mask_getmant_pd_256 :
GCCBuiltin<"__builtin_ia32_getmantpd256_mask">,
Intrinsic<[llvm_v4f64_ty],
[llvm_v4f64_ty,llvm_i32_ty, llvm_v4f64_ty, llvm_i8_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<1>]>;
def int_x86_avx512_mask_getmant_pd_512 :
GCCBuiltin<"__builtin_ia32_getmantpd512_mask">,
Intrinsic<[llvm_v8f64_ty],
[llvm_v8f64_ty,llvm_i32_ty, llvm_v8f64_ty, llvm_i8_ty,llvm_i32_ty ],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<1>, ImmArg<4>]>;
def int_x86_avx512_mask_getmant_ps_128 :
GCCBuiltin<"__builtin_ia32_getmantps128_mask">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_i32_ty, llvm_v4f32_ty, llvm_i8_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<1>]>;
def int_x86_avx512_mask_getmant_ps_256 :
GCCBuiltin<"__builtin_ia32_getmantps256_mask">,
Intrinsic<[llvm_v8f32_ty],
[llvm_v8f32_ty, llvm_i32_ty, llvm_v8f32_ty, llvm_i8_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<1>]>;
def int_x86_avx512_mask_getmant_ps_512 :
GCCBuiltin<"__builtin_ia32_getmantps512_mask">,
Intrinsic<[llvm_v16f32_ty],
[llvm_v16f32_ty,llvm_i32_ty, llvm_v16f32_ty,llvm_i16_ty,llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<1>, ImmArg<4>]>;
def int_x86_avx512_mask_getmant_ss :
GCCBuiltin<"__builtin_ia32_getmantss_round_mask">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_v4f32_ty,
- llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<2>, ImmArg<5>]>;
def int_x86_avx512_mask_getmant_sd :
GCCBuiltin<"__builtin_ia32_getmantsd_round_mask">,
Intrinsic<[llvm_v2f64_ty],
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty, llvm_v2f64_ty,
- llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<2>, ImmArg<5>]>;
def int_x86_avx512_rsqrt14_ss : GCCBuiltin<"__builtin_ia32_rsqrt14ss_mask">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
@@ -3491,41 +3506,41 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx512_rcp28_ps : GCCBuiltin<"__builtin_ia32_rcp28ps_mask">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
- llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
+ llvm_i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<3>]>;
def int_x86_avx512_rcp28_pd : GCCBuiltin<"__builtin_ia32_rcp28pd_mask">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
- llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<3>]>;
def int_x86_avx512_exp2_ps : GCCBuiltin<"__builtin_ia32_exp2ps_mask">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
- llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
+ llvm_i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<3>]>;
def int_x86_avx512_exp2_pd : GCCBuiltin<"__builtin_ia32_exp2pd_mask">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
- llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ llvm_i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<3>]>;
def int_x86_avx512_rcp28_ss : GCCBuiltin<"__builtin_ia32_rcp28ss_round_mask">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<4>]>;
def int_x86_avx512_rcp28_sd : GCCBuiltin<"__builtin_ia32_rcp28sd_round_mask">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<4>]>;
def int_x86_avx512_rsqrt28_ps : GCCBuiltin<"__builtin_ia32_rsqrt28ps_mask">,
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
llvm_i16_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<3>]>;
def int_x86_avx512_rsqrt28_pd : GCCBuiltin<"__builtin_ia32_rsqrt28pd_mask">,
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
llvm_i8_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<3>]>;
def int_x86_avx512_rsqrt28_ss : GCCBuiltin<"__builtin_ia32_rsqrt28ss_round_mask">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<4>]>;
def int_x86_avx512_rsqrt28_sd : GCCBuiltin<"__builtin_ia32_rsqrt28sd_round_mask">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<4>]>;
def int_x86_avx512_psad_bw_512 : GCCBuiltin<"__builtin_ia32_psadbw512">,
Intrinsic<[llvm_v8i64_ty], [llvm_v64i8_ty, llvm_v64i8_ty],
[IntrNoMem, Commutative]>;
@@ -3538,6 +3553,12 @@ let TargetPrefix = "x86" in {
def int_x86_avx512_pmulh_w_512 : GCCBuiltin<"__builtin_ia32_pmulhw512">,
Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
llvm_v32i16_ty], [IntrNoMem, Commutative]>;
+ def int_x86_avx512_pavg_b_512 : GCCBuiltin<"__builtin_ia32_pavgb512">,
+ Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_pavg_w_512 : GCCBuiltin<"__builtin_ia32_pavgw512">,
+ Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty],
+ [IntrNoMem]>;
def int_x86_avx512_pmaddw_d_512 : GCCBuiltin<"__builtin_ia32_pmaddwd512">,
Intrinsic<[llvm_v16i32_ty], [llvm_v32i16_ty,
llvm_v32i16_ty], [IntrNoMem, Commutative]>;
@@ -3548,582 +3569,553 @@ let TargetPrefix = "x86" in {
def int_x86_avx512_dbpsadbw_128 :
GCCBuiltin<"__builtin_ia32_dbpsadbw128">,
Intrinsic<[llvm_v8i16_ty],
- [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<2>]>;
def int_x86_avx512_dbpsadbw_256 :
GCCBuiltin<"__builtin_ia32_dbpsadbw256">,
Intrinsic<[llvm_v16i16_ty],
- [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<2>]>;
def int_x86_avx512_dbpsadbw_512 :
GCCBuiltin<"__builtin_ia32_dbpsadbw512">,
Intrinsic<[llvm_v32i16_ty],
- [llvm_v64i8_ty, llvm_v64i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ [llvm_v64i8_ty, llvm_v64i8_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<2>]>;
}
// Gather and Scatter ops
let TargetPrefix = "x86" in {
// NOTE: These are deprecated in favor of the versions that take a vXi1 mask.
- def int_x86_avx512_gather_dpd_512 : GCCBuiltin<"__builtin_ia32_gathersiv8df">,
+ // NOTE: These can't be ArgMemOnly because you can put the address completely
+ // in the index register.
+ def int_x86_avx512_gather_dpd_512 :
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_ptr_ty,
llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly]>;
- def int_x86_avx512_gather_dps_512 : GCCBuiltin<"__builtin_ia32_gathersiv16sf">,
+ [IntrReadMem, ImmArg<4>]>;
+ def int_x86_avx512_gather_dps_512 :
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_ptr_ty,
llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly]>;
- def int_x86_avx512_gather_qpd_512 : GCCBuiltin<"__builtin_ia32_gatherdiv8df">,
+ [IntrReadMem, ImmArg<4>]>;
+ def int_x86_avx512_gather_qpd_512 :
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_ptr_ty,
llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly]>;
- def int_x86_avx512_gather_qps_512 : GCCBuiltin<"__builtin_ia32_gatherdiv16sf">,
+ [IntrReadMem, ImmArg<4>]>;
+ def int_x86_avx512_gather_qps_512 :
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_ptr_ty,
llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly]>;
+ [IntrReadMem, ImmArg<4>]>;
- def int_x86_avx512_gather_dpq_512 : GCCBuiltin<"__builtin_ia32_gathersiv8di">,
+ def int_x86_avx512_gather_dpq_512 :
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_ptr_ty,
llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly]>;
- def int_x86_avx512_gather_dpi_512 : GCCBuiltin<"__builtin_ia32_gathersiv16si">,
+ [IntrReadMem, ImmArg<4>]>;
+ def int_x86_avx512_gather_dpi_512 :
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_ptr_ty,
llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly]>;
- def int_x86_avx512_gather_qpq_512 : GCCBuiltin<"__builtin_ia32_gatherdiv8di">,
+ [IntrReadMem, ImmArg<4>]>;
+ def int_x86_avx512_gather_qpq_512 :
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_ptr_ty,
llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly]>;
- def int_x86_avx512_gather_qpi_512 : GCCBuiltin<"__builtin_ia32_gatherdiv16si">,
+ [IntrReadMem, ImmArg<4>]>;
+ def int_x86_avx512_gather_qpi_512 :
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_ptr_ty,
llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly]>;
+ [IntrReadMem, ImmArg<4>]>;
def int_x86_avx512_gather3div2_df :
- GCCBuiltin<"__builtin_ia32_gather3div2df">,
Intrinsic<[llvm_v2f64_ty],
[llvm_v2f64_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly]>;
+ [IntrReadMem, ImmArg<4>]>;
def int_x86_avx512_gather3div2_di :
- GCCBuiltin<"__builtin_ia32_gather3div2di">,
Intrinsic<[llvm_v2i64_ty],
[llvm_v2i64_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly]>;
+ [IntrReadMem, ImmArg<4>]>;
def int_x86_avx512_gather3div4_df :
- GCCBuiltin<"__builtin_ia32_gather3div4df">,
Intrinsic<[llvm_v4f64_ty],
[llvm_v4f64_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly]>;
+ [IntrReadMem, ImmArg<4>]>;
def int_x86_avx512_gather3div4_di :
- GCCBuiltin<"__builtin_ia32_gather3div4di">,
Intrinsic<[llvm_v4i64_ty],
[llvm_v4i64_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly]>;
+ [IntrReadMem, ImmArg<4>]>;
def int_x86_avx512_gather3div4_sf :
- GCCBuiltin<"__builtin_ia32_gather3div4sf">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly]>;
+ [IntrReadMem, ImmArg<4>]>;
def int_x86_avx512_gather3div4_si :
- GCCBuiltin<"__builtin_ia32_gather3div4si">,
Intrinsic<[llvm_v4i32_ty],
[llvm_v4i32_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly]>;
+ [IntrReadMem, ImmArg<4>]>;
def int_x86_avx512_gather3div8_sf :
- GCCBuiltin<"__builtin_ia32_gather3div8sf">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly]>;
+ [IntrReadMem, ImmArg<4>]>;
def int_x86_avx512_gather3div8_si :
- GCCBuiltin<"__builtin_ia32_gather3div8si">,
Intrinsic<[llvm_v4i32_ty],
[llvm_v4i32_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly]>;
+ [IntrReadMem, ImmArg<4>]>;
def int_x86_avx512_gather3siv2_df :
- GCCBuiltin<"__builtin_ia32_gather3siv2df">,
Intrinsic<[llvm_v2f64_ty],
[llvm_v2f64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly]>;
+ [IntrReadMem, ImmArg<4>]>;
def int_x86_avx512_gather3siv2_di :
- GCCBuiltin<"__builtin_ia32_gather3siv2di">,
Intrinsic<[llvm_v2i64_ty],
[llvm_v2i64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly]>;
+ [IntrReadMem, ImmArg<4>]>;
def int_x86_avx512_gather3siv4_df :
- GCCBuiltin<"__builtin_ia32_gather3siv4df">,
Intrinsic<[llvm_v4f64_ty],
[llvm_v4f64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly]>;
+ [IntrReadMem, ImmArg<4>]>;
def int_x86_avx512_gather3siv4_di :
- GCCBuiltin<"__builtin_ia32_gather3siv4di">,
Intrinsic<[llvm_v4i64_ty],
[llvm_v4i64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly]>;
+ [IntrReadMem, ImmArg<4>]>;
def int_x86_avx512_gather3siv4_sf :
- GCCBuiltin<"__builtin_ia32_gather3siv4sf">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly]>;
+ [IntrReadMem, ImmArg<4>]>;
def int_x86_avx512_gather3siv4_si :
- GCCBuiltin<"__builtin_ia32_gather3siv4si">,
Intrinsic<[llvm_v4i32_ty],
[llvm_v4i32_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly]>;
+ [IntrReadMem, ImmArg<4>]>;
def int_x86_avx512_gather3siv8_sf :
- GCCBuiltin<"__builtin_ia32_gather3siv8sf">,
Intrinsic<[llvm_v8f32_ty],
[llvm_v8f32_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly]>;
+ [IntrReadMem, ImmArg<4>]>;
def int_x86_avx512_gather3siv8_si :
- GCCBuiltin<"__builtin_ia32_gather3siv8si">,
Intrinsic<[llvm_v8i32_ty],
[llvm_v8i32_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly]>;
+ [IntrReadMem, ImmArg<4>]>;
// scatter
// NOTE: These are deprecated in favor of the versions that take a vXi1 mask.
- def int_x86_avx512_scatter_dpd_512 : GCCBuiltin<"__builtin_ia32_scattersiv8df">,
+ // NOTE: These can't be ArgMemOnly because you can put the address completely
+ // in the index register.
+ def int_x86_avx512_scatter_dpd_512 :
Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty,
llvm_v8i32_ty, llvm_v8f64_ty, llvm_i32_ty],
- [IntrArgMemOnly]>;
- def int_x86_avx512_scatter_dps_512 : GCCBuiltin<"__builtin_ia32_scattersiv16sf">,
+ [ImmArg<4>]>;
+ def int_x86_avx512_scatter_dps_512 :
Intrinsic<[], [llvm_ptr_ty, llvm_i16_ty,
llvm_v16i32_ty, llvm_v16f32_ty, llvm_i32_ty],
- [IntrArgMemOnly]>;
- def int_x86_avx512_scatter_qpd_512 : GCCBuiltin<"__builtin_ia32_scatterdiv8df">,
+ [ImmArg<4>]>;
+ def int_x86_avx512_scatter_qpd_512 :
Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty,
llvm_v8i64_ty, llvm_v8f64_ty, llvm_i32_ty],
- [IntrArgMemOnly]>;
- def int_x86_avx512_scatter_qps_512 : GCCBuiltin<"__builtin_ia32_scatterdiv16sf">,
+ [ImmArg<4>]>;
+ def int_x86_avx512_scatter_qps_512 :
Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty,
llvm_v8i64_ty, llvm_v8f32_ty, llvm_i32_ty],
- [IntrArgMemOnly]>;
+ [ImmArg<4>]>;
- def int_x86_avx512_scatter_dpq_512 : GCCBuiltin<"__builtin_ia32_scattersiv8di">,
+ def int_x86_avx512_scatter_dpq_512 :
Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty,
llvm_v8i32_ty, llvm_v8i64_ty, llvm_i32_ty],
- [IntrArgMemOnly]>;
- def int_x86_avx512_scatter_dpi_512 : GCCBuiltin<"__builtin_ia32_scattersiv16si">,
+ [ImmArg<4>]>;
+ def int_x86_avx512_scatter_dpi_512 :
Intrinsic<[], [llvm_ptr_ty, llvm_i16_ty,
llvm_v16i32_ty, llvm_v16i32_ty, llvm_i32_ty],
- [IntrArgMemOnly]>;
- def int_x86_avx512_scatter_qpq_512 : GCCBuiltin<"__builtin_ia32_scatterdiv8di">,
+ [ImmArg<4>]>;
+ def int_x86_avx512_scatter_qpq_512 :
Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty,llvm_v8i64_ty, llvm_v8i64_ty,
llvm_i32_ty],
- [IntrArgMemOnly]>;
- def int_x86_avx512_scatter_qpi_512 : GCCBuiltin<"__builtin_ia32_scatterdiv16si">,
+ [ImmArg<4>]>;
+ def int_x86_avx512_scatter_qpi_512 :
Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty, llvm_v8i64_ty, llvm_v8i32_ty,
llvm_i32_ty],
- [IntrArgMemOnly]>;
+ [ImmArg<4>]>;
def int_x86_avx512_scatterdiv2_df :
- GCCBuiltin<"__builtin_ia32_scatterdiv2df">,
Intrinsic<[],
[llvm_ptr_ty, llvm_i8_ty, llvm_v2i64_ty, llvm_v2f64_ty, llvm_i32_ty],
- [IntrArgMemOnly]>;
+ [ImmArg<4>]>;
def int_x86_avx512_scatterdiv2_di :
- GCCBuiltin<"__builtin_ia32_scatterdiv2di">,
Intrinsic<[],
[llvm_ptr_ty, llvm_i8_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty],
- [IntrArgMemOnly]>;
+ [ImmArg<4>]>;
def int_x86_avx512_scatterdiv4_df :
- GCCBuiltin<"__builtin_ia32_scatterdiv4df">,
Intrinsic<[],
[llvm_ptr_ty, llvm_i8_ty, llvm_v4i64_ty, llvm_v4f64_ty, llvm_i32_ty],
- [IntrArgMemOnly]>;
+ [ImmArg<4>]>;
def int_x86_avx512_scatterdiv4_di :
- GCCBuiltin<"__builtin_ia32_scatterdiv4di">,
Intrinsic<[],
[llvm_ptr_ty, llvm_i8_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty],
- [IntrArgMemOnly]>;
+ [ImmArg<4>]>;
def int_x86_avx512_scatterdiv4_sf :
- GCCBuiltin<"__builtin_ia32_scatterdiv4sf">,
Intrinsic<[],
[llvm_ptr_ty, llvm_i8_ty, llvm_v2i64_ty, llvm_v4f32_ty, llvm_i32_ty],
- [IntrArgMemOnly]>;
+ [ImmArg<4>]>;
def int_x86_avx512_scatterdiv4_si :
- GCCBuiltin<"__builtin_ia32_scatterdiv4si">,
Intrinsic<[],
[llvm_ptr_ty, llvm_i8_ty, llvm_v2i64_ty, llvm_v4i32_ty, llvm_i32_ty],
- [IntrArgMemOnly]>;
+ [ImmArg<4>]>;
def int_x86_avx512_scatterdiv8_sf :
- GCCBuiltin<"__builtin_ia32_scatterdiv8sf">,
Intrinsic<[],
[llvm_ptr_ty, llvm_i8_ty, llvm_v4i64_ty, llvm_v4f32_ty, llvm_i32_ty],
- [IntrArgMemOnly]>;
+ [ImmArg<4>]>;
def int_x86_avx512_scatterdiv8_si :
- GCCBuiltin<"__builtin_ia32_scatterdiv8si">,
Intrinsic<[],
[llvm_ptr_ty, llvm_i8_ty, llvm_v4i64_ty, llvm_v4i32_ty, llvm_i32_ty],
- [IntrArgMemOnly]>;
+ [ImmArg<4>]>;
def int_x86_avx512_scattersiv2_df :
- GCCBuiltin<"__builtin_ia32_scattersiv2df">,
Intrinsic<[],
[llvm_ptr_ty, llvm_i8_ty, llvm_v4i32_ty, llvm_v2f64_ty, llvm_i32_ty],
- [IntrArgMemOnly]>;
+ [ImmArg<4>]>;
def int_x86_avx512_scattersiv2_di :
- GCCBuiltin<"__builtin_ia32_scattersiv2di">,
Intrinsic<[],
[llvm_ptr_ty, llvm_i8_ty, llvm_v4i32_ty, llvm_v2i64_ty, llvm_i32_ty],
- [IntrArgMemOnly]>;
+ [ImmArg<4>]>;
def int_x86_avx512_scattersiv4_df :
- GCCBuiltin<"__builtin_ia32_scattersiv4df">,
Intrinsic<[],
[llvm_ptr_ty, llvm_i8_ty, llvm_v4i32_ty, llvm_v4f64_ty, llvm_i32_ty],
- [IntrArgMemOnly]>;
+ [ImmArg<4>]>;
def int_x86_avx512_scattersiv4_di :
- GCCBuiltin<"__builtin_ia32_scattersiv4di">,
Intrinsic<[],
[llvm_ptr_ty, llvm_i8_ty, llvm_v4i32_ty, llvm_v4i64_ty, llvm_i32_ty],
- [IntrArgMemOnly]>;
+ [ImmArg<4>]>;
def int_x86_avx512_scattersiv4_sf :
- GCCBuiltin<"__builtin_ia32_scattersiv4sf">,
Intrinsic<[],
[llvm_ptr_ty, llvm_i8_ty, llvm_v4i32_ty, llvm_v4f32_ty, llvm_i32_ty],
- [IntrArgMemOnly]>;
+ [ImmArg<4>]>;
def int_x86_avx512_scattersiv4_si :
- GCCBuiltin<"__builtin_ia32_scattersiv4si">,
Intrinsic<[],
[llvm_ptr_ty, llvm_i8_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty],
- [IntrArgMemOnly]>;
+ [ImmArg<4>]>;
def int_x86_avx512_scattersiv8_sf :
- GCCBuiltin<"__builtin_ia32_scattersiv8sf">,
Intrinsic<[],
[llvm_ptr_ty, llvm_i8_ty, llvm_v8i32_ty, llvm_v8f32_ty, llvm_i32_ty],
- [IntrArgMemOnly]>;
+ [ImmArg<4>]>;
def int_x86_avx512_scattersiv8_si :
- GCCBuiltin<"__builtin_ia32_scattersiv8si">,
Intrinsic<[],
[llvm_ptr_ty, llvm_i8_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty],
- [IntrArgMemOnly]>;
+ [ImmArg<4>]>;
// gather prefetch
+ // NOTE: These can't be ArgMemOnly because you can put the address completely
+ // in the index register.
def int_x86_avx512_gatherpf_dpd_512 : GCCBuiltin<"__builtin_ia32_gatherpfdpd">,
Intrinsic<[], [llvm_i8_ty, llvm_v8i32_ty, llvm_ptr_ty,
- llvm_i32_ty, llvm_i32_ty], [IntrArgMemOnly]>;
+ llvm_i32_ty, llvm_i32_ty], [ImmArg<3>, ImmArg<4>]>;
def int_x86_avx512_gatherpf_dps_512 : GCCBuiltin<"__builtin_ia32_gatherpfdps">,
Intrinsic<[], [llvm_i16_ty, llvm_v16i32_ty, llvm_ptr_ty,
- llvm_i32_ty, llvm_i32_ty], [IntrArgMemOnly]>;
+ llvm_i32_ty, llvm_i32_ty], [ImmArg<3>, ImmArg<4>]>;
def int_x86_avx512_gatherpf_qpd_512 : GCCBuiltin<"__builtin_ia32_gatherpfqpd">,
Intrinsic<[], [llvm_i8_ty, llvm_v8i64_ty, llvm_ptr_ty,
- llvm_i32_ty, llvm_i32_ty], [IntrArgMemOnly]>;
+ llvm_i32_ty, llvm_i32_ty], [ImmArg<3>, ImmArg<4>]>;
def int_x86_avx512_gatherpf_qps_512 : GCCBuiltin<"__builtin_ia32_gatherpfqps">,
Intrinsic<[], [llvm_i8_ty, llvm_v8i64_ty, llvm_ptr_ty,
- llvm_i32_ty, llvm_i32_ty], [IntrArgMemOnly]>;
+ llvm_i32_ty, llvm_i32_ty], [ImmArg<3>, ImmArg<4>]>;
// scatter prefetch
+ // NOTE: These can't be ArgMemOnly because you can put the address completely
+ // in the index register.
def int_x86_avx512_scatterpf_dpd_512 : GCCBuiltin<"__builtin_ia32_scatterpfdpd">,
Intrinsic<[], [llvm_i8_ty, llvm_v8i32_ty, llvm_ptr_ty,
- llvm_i32_ty, llvm_i32_ty], [IntrArgMemOnly]>;
+ llvm_i32_ty, llvm_i32_ty], [ImmArg<3>, ImmArg<4>]>;
def int_x86_avx512_scatterpf_dps_512 : GCCBuiltin<"__builtin_ia32_scatterpfdps">,
Intrinsic<[], [llvm_i16_ty, llvm_v16i32_ty, llvm_ptr_ty,
- llvm_i32_ty, llvm_i32_ty], [IntrArgMemOnly]>;
+ llvm_i32_ty, llvm_i32_ty], [ImmArg<3>, ImmArg<4>]>;
def int_x86_avx512_scatterpf_qpd_512 : GCCBuiltin<"__builtin_ia32_scatterpfqpd">,
Intrinsic<[], [llvm_i8_ty, llvm_v8i64_ty, llvm_ptr_ty,
- llvm_i32_ty, llvm_i32_ty], [IntrArgMemOnly]>;
+ llvm_i32_ty, llvm_i32_ty], [ImmArg<3>, ImmArg<4>]>;
def int_x86_avx512_scatterpf_qps_512 : GCCBuiltin<"__builtin_ia32_scatterpfqps">,
Intrinsic<[], [llvm_i8_ty, llvm_v8i64_ty, llvm_ptr_ty,
- llvm_i32_ty, llvm_i32_ty], [IntrArgMemOnly]>;
+ llvm_i32_ty, llvm_i32_ty], [ImmArg<3>, ImmArg<4>]>;
}
// AVX512 gather/scatter intrinsics that use vXi1 masks.
let TargetPrefix = "x86" in {
+ // NOTE: These can't be ArgMemOnly because you can put the address completely
+ // in the index register.
def int_x86_avx512_mask_gather_dpd_512 :
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_ptr_ty,
llvm_v8i32_ty, llvm_v8i1_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly]>;
+ [IntrReadMem, ImmArg<4>]>;
def int_x86_avx512_mask_gather_dps_512 :
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_ptr_ty,
llvm_v16i32_ty, llvm_v16i1_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly]>;
+ [IntrReadMem, ImmArg<4>]>;
def int_x86_avx512_mask_gather_qpd_512 :
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_ptr_ty,
llvm_v8i64_ty, llvm_v8i1_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly]>;
+ [IntrReadMem, ImmArg<4>]>;
def int_x86_avx512_mask_gather_qps_512 :
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_ptr_ty,
llvm_v8i64_ty, llvm_v8i1_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly]>;
+ [IntrReadMem, ImmArg<4>]>;
def int_x86_avx512_mask_gather_dpq_512 :
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_ptr_ty,
llvm_v8i32_ty, llvm_v8i1_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly]>;
+ [IntrReadMem, ImmArg<4>]>;
def int_x86_avx512_mask_gather_dpi_512 :
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_ptr_ty,
llvm_v16i32_ty, llvm_v16i1_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly]>;
+ [IntrReadMem, ImmArg<4>]>;
def int_x86_avx512_mask_gather_qpq_512 :
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_ptr_ty,
llvm_v8i64_ty, llvm_v8i1_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly]>;
+ [IntrReadMem, ImmArg<4>]>;
def int_x86_avx512_mask_gather_qpi_512 :
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_ptr_ty,
llvm_v8i64_ty, llvm_v8i1_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly]>;
+ [IntrReadMem, ImmArg<4>]>;
def int_x86_avx512_mask_gather3div2_df :
Intrinsic<[llvm_v2f64_ty],
[llvm_v2f64_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v2i1_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly]>;
+ [IntrReadMem, ImmArg<4>]>;
def int_x86_avx512_mask_gather3div2_di :
Intrinsic<[llvm_v2i64_ty],
[llvm_v2i64_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v2i1_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly]>;
+ [IntrReadMem, ImmArg<4>]>;
def int_x86_avx512_mask_gather3div4_df :
Intrinsic<[llvm_v4f64_ty],
[llvm_v4f64_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i1_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly]>;
+ [IntrReadMem, ImmArg<4>]>;
def int_x86_avx512_mask_gather3div4_di :
Intrinsic<[llvm_v4i64_ty],
[llvm_v4i64_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i1_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly]>;
+ [IntrReadMem, ImmArg<4>]>;
def int_x86_avx512_mask_gather3div4_sf :
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v2i1_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly]>;
+ [IntrReadMem, ImmArg<4>]>;
def int_x86_avx512_mask_gather3div4_si :
Intrinsic<[llvm_v4i32_ty],
[llvm_v4i32_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v2i1_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly]>;
+ [IntrReadMem, ImmArg<4>]>;
def int_x86_avx512_mask_gather3div8_sf :
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i1_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly]>;
+ [IntrReadMem, ImmArg<4>]>;
def int_x86_avx512_mask_gather3div8_si :
Intrinsic<[llvm_v4i32_ty],
[llvm_v4i32_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i1_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly]>;
+ [IntrReadMem, ImmArg<4>]>;
def int_x86_avx512_mask_gather3siv2_df :
Intrinsic<[llvm_v2f64_ty],
[llvm_v2f64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v2i1_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly]>;
+ [IntrReadMem, ImmArg<4>]>;
def int_x86_avx512_mask_gather3siv2_di :
Intrinsic<[llvm_v2i64_ty],
[llvm_v2i64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v2i1_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly]>;
+ [IntrReadMem, ImmArg<4>]>;
def int_x86_avx512_mask_gather3siv4_df :
Intrinsic<[llvm_v4f64_ty],
[llvm_v4f64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i1_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly]>;
+ [IntrReadMem, ImmArg<4>]>;
def int_x86_avx512_mask_gather3siv4_di :
Intrinsic<[llvm_v4i64_ty],
[llvm_v4i64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i1_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly]>;
+ [IntrReadMem, ImmArg<4>]>;
def int_x86_avx512_mask_gather3siv4_sf :
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i1_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly]>;
+ [IntrReadMem, ImmArg<4>]>;
def int_x86_avx512_mask_gather3siv4_si :
Intrinsic<[llvm_v4i32_ty],
[llvm_v4i32_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i1_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly]>;
+ [IntrReadMem, ImmArg<4>]>;
def int_x86_avx512_mask_gather3siv8_sf :
Intrinsic<[llvm_v8f32_ty],
[llvm_v8f32_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_v8i1_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly]>;
+ [IntrReadMem, ImmArg<4>]>;
def int_x86_avx512_mask_gather3siv8_si :
Intrinsic<[llvm_v8i32_ty],
[llvm_v8i32_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_v8i1_ty, llvm_i32_ty],
- [IntrReadMem, IntrArgMemOnly]>;
+ [IntrReadMem, ImmArg<4>]>;
def int_x86_avx512_mask_scatter_dpd_512 :
Intrinsic<[], [llvm_ptr_ty, llvm_v8i1_ty,
llvm_v8i32_ty, llvm_v8f64_ty, llvm_i32_ty],
- [IntrArgMemOnly]>;
+ [ImmArg<4>]>;
def int_x86_avx512_mask_scatter_dps_512 :
Intrinsic<[], [llvm_ptr_ty, llvm_v16i1_ty,
llvm_v16i32_ty, llvm_v16f32_ty, llvm_i32_ty],
- [IntrArgMemOnly]>;
+ [ImmArg<4>]>;
def int_x86_avx512_mask_scatter_qpd_512 :
Intrinsic<[], [llvm_ptr_ty, llvm_v8i1_ty,
llvm_v8i64_ty, llvm_v8f64_ty, llvm_i32_ty],
- [IntrArgMemOnly]>;
+ [ImmArg<4>]>;
def int_x86_avx512_mask_scatter_qps_512 :
Intrinsic<[], [llvm_ptr_ty, llvm_v8i1_ty,
llvm_v8i64_ty, llvm_v8f32_ty, llvm_i32_ty],
- [IntrArgMemOnly]>;
+ [ImmArg<4>]>;
+ // NOTE: These can't be ArgMemOnly because you can put the address completely
+ // in the index register.
def int_x86_avx512_mask_scatter_dpq_512 :
Intrinsic<[], [llvm_ptr_ty, llvm_v8i1_ty,
llvm_v8i32_ty, llvm_v8i64_ty, llvm_i32_ty],
- [IntrArgMemOnly]>;
+ [ImmArg<4>]>;
def int_x86_avx512_mask_scatter_dpi_512 :
Intrinsic<[], [llvm_ptr_ty, llvm_v16i1_ty,
llvm_v16i32_ty, llvm_v16i32_ty, llvm_i32_ty],
- [IntrArgMemOnly]>;
+ [ImmArg<4>]>;
def int_x86_avx512_mask_scatter_qpq_512 :
Intrinsic<[], [llvm_ptr_ty, llvm_v8i1_ty,llvm_v8i64_ty, llvm_v8i64_ty,
llvm_i32_ty],
- [IntrArgMemOnly]>;
+ [ImmArg<4>]>;
def int_x86_avx512_mask_scatter_qpi_512 :
Intrinsic<[], [llvm_ptr_ty, llvm_v8i1_ty, llvm_v8i64_ty, llvm_v8i32_ty,
llvm_i32_ty],
- [IntrArgMemOnly]>;
+ [ImmArg<4>]>;
def int_x86_avx512_mask_scatterdiv2_df :
Intrinsic<[],
[llvm_ptr_ty, llvm_v2i1_ty, llvm_v2i64_ty, llvm_v2f64_ty, llvm_i32_ty],
- [IntrArgMemOnly]>;
+ [ImmArg<4>]>;
def int_x86_avx512_mask_scatterdiv2_di :
Intrinsic<[],
[llvm_ptr_ty, llvm_v2i1_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty],
- [IntrArgMemOnly]>;
+ [ImmArg<4>]>;
def int_x86_avx512_mask_scatterdiv4_df :
Intrinsic<[],
[llvm_ptr_ty, llvm_v4i1_ty, llvm_v4i64_ty, llvm_v4f64_ty, llvm_i32_ty],
- [IntrArgMemOnly]>;
+ [ImmArg<4>]>;
def int_x86_avx512_mask_scatterdiv4_di :
Intrinsic<[],
[llvm_ptr_ty, llvm_v4i1_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty],
- [IntrArgMemOnly]>;
+ [ImmArg<4>]>;
def int_x86_avx512_mask_scatterdiv4_sf :
Intrinsic<[],
[llvm_ptr_ty, llvm_v2i1_ty, llvm_v2i64_ty, llvm_v4f32_ty, llvm_i32_ty],
- [IntrArgMemOnly]>;
+ [ImmArg<4>]>;
def int_x86_avx512_mask_scatterdiv4_si :
Intrinsic<[],
[llvm_ptr_ty, llvm_v2i1_ty, llvm_v2i64_ty, llvm_v4i32_ty, llvm_i32_ty],
- [IntrArgMemOnly]>;
+ [ImmArg<4>]>;
def int_x86_avx512_mask_scatterdiv8_sf :
Intrinsic<[],
[llvm_ptr_ty, llvm_v4i1_ty, llvm_v4i64_ty, llvm_v4f32_ty, llvm_i32_ty],
- [IntrArgMemOnly]>;
+ [ImmArg<4>]>;
def int_x86_avx512_mask_scatterdiv8_si :
Intrinsic<[],
[llvm_ptr_ty, llvm_v4i1_ty, llvm_v4i64_ty, llvm_v4i32_ty, llvm_i32_ty],
- [IntrArgMemOnly]>;
+ [ImmArg<4>]>;
def int_x86_avx512_mask_scattersiv2_df :
Intrinsic<[],
[llvm_ptr_ty, llvm_v2i1_ty, llvm_v4i32_ty, llvm_v2f64_ty, llvm_i32_ty],
- [IntrArgMemOnly]>;
+ [ImmArg<4>]>;
def int_x86_avx512_mask_scattersiv2_di :
Intrinsic<[],
[llvm_ptr_ty, llvm_v2i1_ty, llvm_v4i32_ty, llvm_v2i64_ty, llvm_i32_ty],
- [IntrArgMemOnly]>;
+ [ImmArg<4>]>;
def int_x86_avx512_mask_scattersiv4_df :
Intrinsic<[],
[llvm_ptr_ty, llvm_v4i1_ty, llvm_v4i32_ty, llvm_v4f64_ty, llvm_i32_ty],
- [IntrArgMemOnly]>;
+ [ImmArg<4>]>;
def int_x86_avx512_mask_scattersiv4_di :
Intrinsic<[],
[llvm_ptr_ty, llvm_v4i1_ty, llvm_v4i32_ty, llvm_v4i64_ty, llvm_i32_ty],
- [IntrArgMemOnly]>;
+ [ImmArg<4>]>;
def int_x86_avx512_mask_scattersiv4_sf :
Intrinsic<[],
[llvm_ptr_ty, llvm_v4i1_ty, llvm_v4i32_ty, llvm_v4f32_ty, llvm_i32_ty],
- [IntrArgMemOnly]>;
+ [ImmArg<4>]>;
def int_x86_avx512_mask_scattersiv4_si :
Intrinsic<[],
[llvm_ptr_ty, llvm_v4i1_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty],
- [IntrArgMemOnly]>;
+ [ImmArg<4>]>;
def int_x86_avx512_mask_scattersiv8_sf :
Intrinsic<[],
[llvm_ptr_ty, llvm_v8i1_ty, llvm_v8i32_ty, llvm_v8f32_ty, llvm_i32_ty],
- [IntrArgMemOnly]>;
+ [ImmArg<4>]>;
def int_x86_avx512_mask_scattersiv8_si :
Intrinsic<[],
[llvm_ptr_ty, llvm_v8i1_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty],
- [IntrArgMemOnly]>;
+ [ImmArg<4>]>;
}
// AVX-512 conflict detection instruction
// Instructions that count the number of leading zero bits
let TargetPrefix = "x86" in {
- def int_x86_avx512_mask_conflict_d_128 :
- GCCBuiltin<"__builtin_ia32_vpconflictsi_128_mask">,
- Intrinsic<[llvm_v4i32_ty],
- [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty],
- [IntrNoMem]>;
- def int_x86_avx512_mask_conflict_d_256 :
- GCCBuiltin<"__builtin_ia32_vpconflictsi_256_mask">,
- Intrinsic<[llvm_v8i32_ty],
- [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty],
- [IntrNoMem]>;
- def int_x86_avx512_mask_conflict_d_512 :
- GCCBuiltin<"__builtin_ia32_vpconflictsi_512_mask">,
- Intrinsic<[llvm_v16i32_ty],
- [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty],
- [IntrNoMem]>;
-
- def int_x86_avx512_mask_conflict_q_128 :
- GCCBuiltin<"__builtin_ia32_vpconflictdi_128_mask">,
- Intrinsic<[llvm_v2i64_ty],
- [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty],
- [IntrNoMem]>;
- def int_x86_avx512_mask_conflict_q_256 :
- GCCBuiltin<"__builtin_ia32_vpconflictdi_256_mask">,
- Intrinsic<[llvm_v4i64_ty],
- [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty],
- [IntrNoMem]>;
- def int_x86_avx512_mask_conflict_q_512 :
- GCCBuiltin<"__builtin_ia32_vpconflictdi_512_mask">,
- Intrinsic<[llvm_v8i64_ty],
- [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty],
- [IntrNoMem]>;
+ def int_x86_avx512_conflict_d_128 :
+ GCCBuiltin<"__builtin_ia32_vpconflictsi_128">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty], [IntrNoMem]>;
+ def int_x86_avx512_conflict_d_256 :
+ GCCBuiltin<"__builtin_ia32_vpconflictsi_256">,
+ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty], [IntrNoMem]>;
+ def int_x86_avx512_conflict_d_512 :
+ GCCBuiltin<"__builtin_ia32_vpconflictsi_512">,
+ Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_conflict_q_128 :
+ GCCBuiltin<"__builtin_ia32_vpconflictdi_128">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>;
+ def int_x86_avx512_conflict_q_256 :
+ GCCBuiltin<"__builtin_ia32_vpconflictdi_256">,
+ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty], [IntrNoMem]>;
+ def int_x86_avx512_conflict_q_512 :
+ GCCBuiltin<"__builtin_ia32_vpconflictdi_512">,
+ Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty], [IntrNoMem]>;
}
// Compares
@@ -4131,164 +4123,26 @@ let TargetPrefix = "x86" in {
// 512-bit
def int_x86_avx512_vcomi_sd : GCCBuiltin<"__builtin_ia32_vcomisd">,
Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
- llvm_v2f64_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ llvm_v2f64_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<2>, ImmArg<3>]>;
def int_x86_avx512_vcomi_ss : GCCBuiltin<"__builtin_ia32_vcomiss">,
Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
- llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<2>, ImmArg<3>]>;
}
// Compress, Expand
let TargetPrefix = "x86" in {
- def int_x86_avx512_mask_compress_ps_512 :
- GCCBuiltin<"__builtin_ia32_compresssf512_mask">,
- Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
- llvm_i16_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_compress_pd_512 :
- GCCBuiltin<"__builtin_ia32_compressdf512_mask">,
- Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
- llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_compress_ps_256 :
- GCCBuiltin<"__builtin_ia32_compresssf256_mask">,
- Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty,
- llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_compress_pd_256 :
- GCCBuiltin<"__builtin_ia32_compressdf256_mask">,
- Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty,
- llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_compress_ps_128 :
- GCCBuiltin<"__builtin_ia32_compresssf128_mask">,
- Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
- llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_compress_pd_128 :
- GCCBuiltin<"__builtin_ia32_compressdf128_mask">,
- Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
- llvm_i8_ty], [IntrNoMem]>;
-
- def int_x86_avx512_mask_compress_d_512 :
- GCCBuiltin<"__builtin_ia32_compresssi512_mask">,
- Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
- llvm_i16_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_compress_q_512 :
- GCCBuiltin<"__builtin_ia32_compressdi512_mask">,
- Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
- llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_compress_d_256 :
- GCCBuiltin<"__builtin_ia32_compresssi256_mask">,
- Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
- llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_compress_q_256 :
- GCCBuiltin<"__builtin_ia32_compressdi256_mask">,
- Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty,
- llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_compress_d_128 :
- GCCBuiltin<"__builtin_ia32_compresssi128_mask">,
- Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
- llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_compress_q_128 :
- GCCBuiltin<"__builtin_ia32_compressdi128_mask">,
- Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
- llvm_i8_ty], [IntrNoMem]>;
-
- def int_x86_avx512_mask_compress_b_512 :
- GCCBuiltin<"__builtin_ia32_compressqi512_mask">,
- Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty,
- llvm_i64_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_compress_w_512 :
- GCCBuiltin<"__builtin_ia32_compresshi512_mask">,
- Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,
- llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_compress_b_256 :
- GCCBuiltin<"__builtin_ia32_compressqi256_mask">,
- Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty,
- llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_compress_w_256 :
- GCCBuiltin<"__builtin_ia32_compresshi256_mask">,
- Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,
- llvm_i16_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_compress_b_128 :
- GCCBuiltin<"__builtin_ia32_compressqi128_mask">,
- Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
- llvm_i16_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_compress_w_128 :
- GCCBuiltin<"__builtin_ia32_compresshi128_mask">,
- Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
- llvm_i8_ty], [IntrNoMem]>;
-
-// expand
- def int_x86_avx512_mask_expand_ps_512 :
- GCCBuiltin<"__builtin_ia32_expandsf512_mask">,
- Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
- llvm_i16_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_expand_pd_512 :
- GCCBuiltin<"__builtin_ia32_expanddf512_mask">,
- Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
- llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_expand_ps_256 :
- GCCBuiltin<"__builtin_ia32_expandsf256_mask">,
- Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty,
- llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_expand_pd_256 :
- GCCBuiltin<"__builtin_ia32_expanddf256_mask">,
- Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty,
- llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_expand_ps_128 :
- GCCBuiltin<"__builtin_ia32_expandsf128_mask">,
- Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
- llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_expand_pd_128 :
- GCCBuiltin<"__builtin_ia32_expanddf128_mask">,
- Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
- llvm_i8_ty], [IntrNoMem]>;
-
- def int_x86_avx512_mask_expand_d_512 :
- GCCBuiltin<"__builtin_ia32_expandsi512_mask">,
- Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
- llvm_i16_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_expand_q_512 :
- GCCBuiltin<"__builtin_ia32_expanddi512_mask">,
- Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
- llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_expand_d_256 :
- GCCBuiltin<"__builtin_ia32_expandsi256_mask">,
- Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
- llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_expand_q_256 :
- GCCBuiltin<"__builtin_ia32_expanddi256_mask">,
- Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty,
- llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_expand_d_128 :
- GCCBuiltin<"__builtin_ia32_expandsi128_mask">,
- Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
- llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_expand_q_128 :
- GCCBuiltin<"__builtin_ia32_expanddi128_mask">,
- Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
- llvm_i8_ty], [IntrNoMem]>;
-
- def int_x86_avx512_mask_expand_b_512 :
- GCCBuiltin<"__builtin_ia32_expandqi512_mask">,
- Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty,
- llvm_i64_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_expand_w_512 :
- GCCBuiltin<"__builtin_ia32_expandhi512_mask">,
- Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,
- llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_expand_b_256 :
- GCCBuiltin<"__builtin_ia32_expandqi256_mask">,
- Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty,
- llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_expand_w_256 :
- GCCBuiltin<"__builtin_ia32_expandhi256_mask">,
- Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,
- llvm_i16_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_expand_b_128 :
- GCCBuiltin<"__builtin_ia32_expandqi128_mask">,
- Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
- llvm_i16_ty], [IntrNoMem]>;
- def int_x86_avx512_mask_expand_w_128 :
- GCCBuiltin<"__builtin_ia32_expandhi128_mask">,
- Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
- llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_compress :
+ Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_expand :
+ Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
+ [IntrNoMem]>;
}
// truncate
@@ -4502,10 +4356,6 @@ let TargetPrefix = "x86" in {
Intrinsic<[],
[llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty],
[IntrArgMemOnly]>;
- def int_x86_avx512_mask_pmov_qd_256 : // FIXME: Replace with trunc+select.
- Intrinsic<[llvm_v4i32_ty],
- [llvm_v4i64_ty, llvm_v4i32_ty, llvm_i8_ty],
- [IntrNoMem]>;
def int_x86_avx512_mask_pmov_qd_mem_256 :
GCCBuiltin<"__builtin_ia32_pmovqd256mem_mask">,
Intrinsic<[],
@@ -4531,10 +4381,6 @@ let TargetPrefix = "x86" in {
Intrinsic<[],
[llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty],
[IntrArgMemOnly]>;
- def int_x86_avx512_mask_pmov_qd_512 : // FIXME: Replace with trunc+select.
- Intrinsic<[llvm_v8i32_ty],
- [llvm_v8i64_ty, llvm_v8i32_ty, llvm_i8_ty],
- [IntrNoMem]>;
def int_x86_avx512_mask_pmov_qd_mem_512 :
GCCBuiltin<"__builtin_ia32_pmovqd512mem_mask">,
Intrinsic<[],
@@ -4768,10 +4614,6 @@ let TargetPrefix = "x86" in {
Intrinsic<[],
[llvm_ptr_ty, llvm_v8i16_ty, llvm_i8_ty],
[IntrArgMemOnly]>;
- def int_x86_avx512_mask_pmov_wb_256 : // FIXME: Replace with trunc+select.
- Intrinsic<[llvm_v16i8_ty],
- [llvm_v16i16_ty, llvm_v16i8_ty, llvm_i16_ty],
- [IntrNoMem]>;
def int_x86_avx512_mask_pmov_wb_mem_256 :
GCCBuiltin<"__builtin_ia32_pmovwb256mem_mask">,
Intrinsic<[],
@@ -4797,10 +4639,6 @@ let TargetPrefix = "x86" in {
Intrinsic<[],
[llvm_ptr_ty, llvm_v16i16_ty, llvm_i16_ty],
[IntrArgMemOnly]>;
- def int_x86_avx512_mask_pmov_wb_512 : // FIXME: Replace with trunc+select.
- Intrinsic<[llvm_v32i8_ty],
- [llvm_v32i16_ty, llvm_v32i8_ty, llvm_i32_ty],
- [IntrNoMem]>;
def int_x86_avx512_mask_pmov_wb_mem_512 :
GCCBuiltin<"__builtin_ia32_pmovwb512mem_mask">,
Intrinsic<[],
@@ -4834,36 +4672,64 @@ let TargetPrefix = "x86" in {
GCCBuiltin<"__builtin_ia32_pternlogd128">,
Intrinsic<[llvm_v4i32_ty],
[llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<3>]>;
def int_x86_avx512_pternlog_d_256 :
GCCBuiltin<"__builtin_ia32_pternlogd256">,
Intrinsic<[llvm_v8i32_ty],
[llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<3>]>;
def int_x86_avx512_pternlog_d_512 :
GCCBuiltin<"__builtin_ia32_pternlogd512">,
Intrinsic<[llvm_v16i32_ty],
[llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty,
- llvm_i32_ty], [IntrNoMem]>;
+ llvm_i32_ty], [IntrNoMem, ImmArg<3>]>;
def int_x86_avx512_pternlog_q_128 :
GCCBuiltin<"__builtin_ia32_pternlogq128">,
Intrinsic<[llvm_v2i64_ty],
[llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<3>]>;
def int_x86_avx512_pternlog_q_256 :
GCCBuiltin<"__builtin_ia32_pternlogq256">,
Intrinsic<[llvm_v4i64_ty],
[llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<3>]>;
def int_x86_avx512_pternlog_q_512 :
GCCBuiltin<"__builtin_ia32_pternlogq512">,
Intrinsic<[llvm_v8i64_ty],
[llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<3>]>;
+}
+
+// vp2intersect
+let TargetPrefix = "x86" in {
+ def int_x86_avx512_vp2intersect_q_512 :
+ Intrinsic<[llvm_v8i1_ty, llvm_v8i1_ty],
+ [llvm_v8i64_ty, llvm_v8i64_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_vp2intersect_q_256 :
+ Intrinsic<[llvm_v4i1_ty, llvm_v4i1_ty],
+ [llvm_v4i64_ty, llvm_v4i64_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_vp2intersect_q_128 :
+ Intrinsic<[llvm_v2i1_ty, llvm_v2i1_ty],
+ [llvm_v2i64_ty, llvm_v2i64_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_vp2intersect_d_512 :
+ Intrinsic<[llvm_v16i1_ty, llvm_v16i1_ty],
+ [llvm_v16i32_ty, llvm_v16i32_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_vp2intersect_d_256 :
+ Intrinsic<[llvm_v8i1_ty, llvm_v8i1_ty],
+ [llvm_v8i32_ty, llvm_v8i32_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_vp2intersect_d_128 :
+ Intrinsic<[llvm_v4i1_ty, llvm_v4i1_ty],
+ [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
}
@@ -4873,31 +4739,35 @@ let TargetPrefix = "x86" in {
// distinction in signaling behaviour is not implemented.
def int_x86_avx512_cmp_ps_512 :
Intrinsic<[llvm_v16i1_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
- llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<2>, ImmArg<3>]>;
def int_x86_avx512_cmp_pd_512 :
Intrinsic<[llvm_v8i1_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
- llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<2>, ImmArg<3>]>;
def int_x86_avx512_cmp_ps_256 :
Intrinsic<[llvm_v8i1_ty], [llvm_v8f32_ty, llvm_v8f32_ty,
- llvm_i32_ty], [IntrNoMem]>;
+ llvm_i32_ty], [IntrNoMem, ImmArg<2>]>;
def int_x86_avx512_cmp_pd_256 :
Intrinsic<[llvm_v4i1_ty], [llvm_v4f64_ty, llvm_v4f64_ty,
- llvm_i32_ty], [IntrNoMem]>;
+ llvm_i32_ty], [IntrNoMem, ImmArg<2>]>;
def int_x86_avx512_cmp_ps_128 :
Intrinsic<[llvm_v4i1_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
- llvm_i32_ty], [IntrNoMem]>;
+ llvm_i32_ty], [IntrNoMem, ImmArg<2>]>;
def int_x86_avx512_cmp_pd_128 :
Intrinsic<[llvm_v2i1_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
- llvm_i32_ty], [IntrNoMem]>;
+ llvm_i32_ty], [IntrNoMem, ImmArg<2>]>;
def int_x86_avx512_mask_cmp_ss :
GCCBuiltin<"__builtin_ia32_cmpss_mask">,
Intrinsic<[llvm_i8_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
- llvm_i32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ llvm_i32_ty, llvm_i8_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<2>, ImmArg<4>]>;
def int_x86_avx512_mask_cmp_sd :
GCCBuiltin<"__builtin_ia32_cmpsd_mask">,
Intrinsic<[llvm_i8_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
- llvm_i32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ llvm_i32_ty, llvm_i8_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<2>, ImmArg<4>]>;
}
//===----------------------------------------------------------------------===//
@@ -4905,7 +4775,7 @@ let TargetPrefix = "x86" in {
let TargetPrefix = "x86" in {
def int_x86_sha1rnds4 : GCCBuiltin<"__builtin_ia32_sha1rnds4">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty],
- [IntrNoMem]>;
+ [IntrNoMem, ImmArg<2>]>;
def int_x86_sha1nexte : GCCBuiltin<"__builtin_ia32_sha1nexte">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
def int_x86_sha1msg1 : GCCBuiltin<"__builtin_ia32_sha1msg1">,
@@ -5000,3 +4870,51 @@ let TargetPrefix = "x86" in {
def int_x86_invpcid : GCCBuiltin<"__builtin_ia32_invpcid">,
Intrinsic<[], [llvm_i32_ty, llvm_ptr_ty], []>;
}
+
+let TargetPrefix = "x86" in {
+ def int_x86_avx512bf16_cvtne2ps2bf16_128:
+ GCCBuiltin<"__builtin_ia32_cvtne2ps2bf16_128">,
+ Intrinsic<[llvm_v8i16_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512bf16_cvtne2ps2bf16_256:
+ GCCBuiltin<"__builtin_ia32_cvtne2ps2bf16_256">,
+ Intrinsic<[llvm_v16i16_ty], [llvm_v8f32_ty, llvm_v8f32_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512bf16_cvtne2ps2bf16_512:
+ GCCBuiltin<"__builtin_ia32_cvtne2ps2bf16_512">,
+ Intrinsic<[llvm_v32i16_ty], [llvm_v16f32_ty, llvm_v16f32_ty],
+ [IntrNoMem]>;
+ // Intrinsic must be masked due to it producing less than 128 bits of results.
+ def int_x86_avx512bf16_mask_cvtneps2bf16_128:
+ Intrinsic<[llvm_v8i16_ty],
+ [llvm_v4f32_ty, llvm_v8i16_ty, llvm_v4i1_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512bf16_cvtneps2bf16_256:
+ GCCBuiltin<"__builtin_ia32_cvtneps2bf16_256">,
+ Intrinsic<[llvm_v8i16_ty], [llvm_v8f32_ty], [IntrNoMem]>;
+ def int_x86_avx512bf16_cvtneps2bf16_512:
+ GCCBuiltin<"__builtin_ia32_cvtneps2bf16_512">,
+ Intrinsic<[llvm_v16i16_ty], [llvm_v16f32_ty], [IntrNoMem]>;
+ def int_x86_avx512bf16_dpbf16ps_128:
+ GCCBuiltin<"__builtin_ia32_dpbf16ps_128">,
+ Intrinsic<[llvm_v4f32_ty],
+ [llvm_v4f32_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
+ def int_x86_avx512bf16_dpbf16ps_256:
+ GCCBuiltin<"__builtin_ia32_dpbf16ps_256">,
+ Intrinsic<[llvm_v8f32_ty],
+ [llvm_v8f32_ty, llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
+ def int_x86_avx512bf16_dpbf16ps_512:
+ GCCBuiltin<"__builtin_ia32_dpbf16ps_512">,
+ Intrinsic<[llvm_v16f32_ty],
+ [llvm_v16f32_ty, llvm_v16i32_ty, llvm_v16i32_ty], [IntrNoMem]>;
+}
+
+//===----------------------------------------------------------------------===//
+// ENQCMD - Enqueue Stores Instructions
+
+let TargetPrefix = "x86" in {
+ def int_x86_enqcmd : GCCBuiltin<"__builtin_ia32_enqcmd">,
+ Intrinsic<[llvm_i8_ty], [llvm_ptr_ty, llvm_ptr_ty], []>;
+ def int_x86_enqcmds : GCCBuiltin<"__builtin_ia32_enqcmds">,
+ Intrinsic<[llvm_i8_ty], [llvm_ptr_ty, llvm_ptr_ty], []>;
+}
diff --git a/include/llvm/IR/IntrinsicsXCore.td b/include/llvm/IR/IntrinsicsXCore.td
index b614e1ed6ec0..7fe8bdfd3bd0 100644
--- a/include/llvm/IR/IntrinsicsXCore.td
+++ b/include/llvm/IR/IntrinsicsXCore.td
@@ -1,9 +1,8 @@
//==- IntrinsicsXCore.td - XCore intrinsics -*- tablegen -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/IR/LLVMContext.h b/include/llvm/IR/LLVMContext.h
index bd7097b39a3e..c80504500418 100644
--- a/include/llvm/IR/LLVMContext.h
+++ b/include/llvm/IR/LLVMContext.h
@@ -1,9 +1,8 @@
//===- llvm/LLVMContext.h - Class for managing "global" state ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -36,12 +35,8 @@ template <typename T> class SmallVectorImpl;
class SMDiagnostic;
class StringRef;
class Twine;
-
-namespace yaml {
-
-class Output;
-
-} // end namespace yaml
+class RemarkStreamer;
+class raw_ostream;
namespace SyncScope {
@@ -103,6 +98,8 @@ public:
MD_callees = 23, // "callees"
MD_irr_loop = 24, // "irr_loop"
MD_access_group = 25, // "llvm.access.group"
+ MD_callback = 26, // "callback"
+ MD_preserve_access_index = 27, // "llvm.preserve.*.access.index"
};
/// Known operand bundle tag IDs, which always have the same value. All
@@ -246,16 +243,23 @@ public:
/// included in optimization diagnostics.
void setDiagnosticsHotnessThreshold(uint64_t Threshold);
- /// Return the YAML file used by the backend to save optimization
- /// diagnostics. If null, diagnostics are not saved in a file but only
- /// emitted via the diagnostic handler.
- yaml::Output *getDiagnosticsOutputFile();
- /// Set the diagnostics output file used for optimization diagnostics.
+ /// Return the streamer used by the backend to save remark diagnostics. If it
+ /// does not exist, diagnostics are not saved in a file but only emitted via
+ /// the diagnostic handler.
+ RemarkStreamer *getRemarkStreamer();
+ const RemarkStreamer *getRemarkStreamer() const;
+
+ /// Set the diagnostics output used for optimization diagnostics.
+ /// This filename may be embedded in a section for tools to find the
+ /// diagnostics whenever they're needed.
+ ///
+ /// If a remark streamer is already set, it will be replaced with
+ /// \p RemarkStreamer.
///
- /// By default or if invoked with null, diagnostics are not saved in a file
- /// but only emitted via the diagnostic handler. Even if an output file is
- /// set, the handler is invoked for each diagnostic message.
- void setDiagnosticsOutputFile(std::unique_ptr<yaml::Output> F);
+ /// By default, diagnostics are not saved in a file but only emitted via the
+ /// diagnostic handler. Even if an output file is set, the handler is invoked
+ /// for each diagnostic message.
+ void setRemarkStreamer(std::unique_ptr<RemarkStreamer> RemarkStreamer);
/// Get the prefix that should be printed in front of a diagnostic of
/// the given \p Severity
diff --git a/include/llvm/IR/LegacyPassManager.h b/include/llvm/IR/LegacyPassManager.h
index 5257a0eed488..d6bb79ab6019 100644
--- a/include/llvm/IR/LegacyPassManager.h
+++ b/include/llvm/IR/LegacyPassManager.h
@@ -1,9 +1,8 @@
//===- LegacyPassManager.h - Legacy Container for Passes --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/IR/LegacyPassManagers.h b/include/llvm/IR/LegacyPassManagers.h
index 51a2eb2a146d..72bc80fb5381 100644
--- a/include/llvm/IR/LegacyPassManagers.h
+++ b/include/llvm/IR/LegacyPassManagers.h
@@ -1,9 +1,8 @@
//===- LegacyPassManagers.h - Legacy Pass Infrastructure --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/IR/LegacyPassNameParser.h b/include/llvm/IR/LegacyPassNameParser.h
index 4cec08196408..30820e750350 100644
--- a/include/llvm/IR/LegacyPassNameParser.h
+++ b/include/llvm/IR/LegacyPassNameParser.h
@@ -1,9 +1,8 @@
//===- LegacyPassNameParser.h -----------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/IR/MDBuilder.h b/include/llvm/IR/MDBuilder.h
index 174616c7ab1d..3a2b1bddf45d 100644
--- a/include/llvm/IR/MDBuilder.h
+++ b/include/llvm/IR/MDBuilder.h
@@ -1,9 +1,8 @@
//===---- llvm/MDBuilder.h - Builder for LLVM metadata ----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -95,6 +94,17 @@ public:
MDNode *createCallees(ArrayRef<Function *> Callees);
//===------------------------------------------------------------------===//
+ // Callback metadata.
+ //===------------------------------------------------------------------===//
+
+ /// Return metadata describing a callback (see llvm::AbstractCallSite).
+ MDNode *createCallbackEncoding(unsigned CalleeArgNo, ArrayRef<int> Arguments,
+ bool VarArgsArePassed);
+
+ /// Merge the new callback encoding \p NewCB into \p ExistingCallbacks.
+ MDNode *mergeCallbackEncodings(MDNode *ExistingCallbacks, MDNode *NewCB);
+
+ //===------------------------------------------------------------------===//
// AA metadata.
//===------------------------------------------------------------------===//
diff --git a/include/llvm/IR/Mangler.h b/include/llvm/IR/Mangler.h
index 0261c00f524c..e4a05ab46a65 100644
--- a/include/llvm/IR/Mangler.h
+++ b/include/llvm/IR/Mangler.h
@@ -1,9 +1,8 @@
//===-- llvm/IR/Mangler.h - Self-contained name mangler ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/IR/Metadata.def b/include/llvm/IR/Metadata.def
index 70a03f28b488..1df60cadac08 100644
--- a/include/llvm/IR/Metadata.def
+++ b/include/llvm/IR/Metadata.def
@@ -1,9 +1,8 @@
//===- llvm/IR/Metadata.def - Metadata definitions --------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -114,6 +113,7 @@ HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIImportedEntity)
HANDLE_SPECIALIZED_MDNODE_BRANCH(DIMacroNode)
HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIMacro)
HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIMacroFile)
+HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DICommonBlock)
#undef HANDLE_METADATA
#undef HANDLE_METADATA_LEAF
diff --git a/include/llvm/IR/Metadata.h b/include/llvm/IR/Metadata.h
index be82c4efc115..7ca2540181ba 100644
--- a/include/llvm/IR/Metadata.h
+++ b/include/llvm/IR/Metadata.h
@@ -1,9 +1,8 @@
//===- llvm/IR/Metadata.h - Metadata definitions ----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/IR/Module.h b/include/llvm/IR/Module.h
index 9ef35f1f73cd..f458680cfe15 100644
--- a/include/llvm/IR/Module.h
+++ b/include/llvm/IR/Module.h
@@ -1,9 +1,8 @@
//===- llvm/Module.h - C++ class to represent a VM module -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -29,6 +28,7 @@
#include "llvm/IR/GlobalIFunc.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Metadata.h"
+#include "llvm/IR/ProfileSummary.h"
#include "llvm/IR/SymbolTableListTraits.h"
#include "llvm/Support/CBindingWrapping.h"
#include "llvm/Support/CodeGen.h"
@@ -333,16 +333,18 @@ public:
/// Look up the specified function in the module symbol table. Four
/// possibilities:
/// 1. If it does not exist, add a prototype for the function and return it.
- /// 2. If it exists, and has a local linkage, the existing function is
- /// renamed and a new one is inserted.
- /// 3. Otherwise, if the existing function has the correct prototype, return
+ /// 2. Otherwise, if the existing function has the correct prototype, return
/// the existing function.
- /// 4. Finally, the function exists but has the wrong prototype: return the
+ /// 3. Finally, the function exists but has the wrong prototype: return the
/// function with a constantexpr cast to the right prototype.
- Constant *getOrInsertFunction(StringRef Name, FunctionType *T,
- AttributeList AttributeList);
+ ///
+ /// In all cases, the returned value is a FunctionCallee wrapper around the
+ /// 'FunctionType *T' passed in, as well as a 'Value*' either of the Function or
+ /// the bitcast to the function.
+ FunctionCallee getOrInsertFunction(StringRef Name, FunctionType *T,
+ AttributeList AttributeList);
- Constant *getOrInsertFunction(StringRef Name, FunctionType *T);
+ FunctionCallee getOrInsertFunction(StringRef Name, FunctionType *T);
/// Look up the specified function in the module symbol table. If it does not
/// exist, add a prototype for the function and return it. This function
@@ -350,11 +352,10 @@ public:
/// or a ConstantExpr BitCast of that type if the named function has a
/// different type. This version of the method takes a list of
/// function arguments, which makes it easier for clients to use.
- template<typename... ArgsTy>
- Constant *getOrInsertFunction(StringRef Name,
- AttributeList AttributeList,
- Type *RetTy, ArgsTy... Args)
- {
+ template <typename... ArgsTy>
+ FunctionCallee getOrInsertFunction(StringRef Name,
+ AttributeList AttributeList, Type *RetTy,
+ ArgsTy... Args) {
SmallVector<Type*, sizeof...(ArgsTy)> ArgTys{Args...};
return getOrInsertFunction(Name,
FunctionType::get(RetTy, ArgTys, false),
@@ -362,15 +363,17 @@ public:
}
/// Same as above, but without the attributes.
- template<typename... ArgsTy>
- Constant *getOrInsertFunction(StringRef Name, Type *RetTy, ArgsTy... Args) {
+ template <typename... ArgsTy>
+ FunctionCallee getOrInsertFunction(StringRef Name, Type *RetTy,
+ ArgsTy... Args) {
return getOrInsertFunction(Name, AttributeList{}, RetTy, Args...);
}
// Avoid an incorrect ordering that'd otherwise compile incorrectly.
template <typename... ArgsTy>
- Constant *getOrInsertFunction(StringRef Name, AttributeList AttributeList,
- FunctionType *Invalid, ArgsTy... Args) = delete;
+ FunctionCallee
+ getOrInsertFunction(StringRef Name, AttributeList AttributeList,
+ FunctionType *Invalid, ArgsTy... Args) = delete;
/// Look up the specified function in the module symbol table. If it does not
/// exist, return null.
@@ -866,10 +869,11 @@ public:
/// @{
/// Attach profile summary metadata to this module.
- void setProfileSummary(Metadata *M);
+ void setProfileSummary(Metadata *M, ProfileSummary::Kind Kind);
- /// Returns profile summary metadata
- Metadata *getProfileSummary();
+ /// Returns profile summary metadata. When IsCS is true, use the context
+ /// sensitive profile summary.
+ Metadata *getProfileSummary(bool IsCS);
/// @}
/// Returns true if PLT should be avoided for RTLib calls.
diff --git a/include/llvm/IR/ModuleSlotTracker.h b/include/llvm/IR/ModuleSlotTracker.h
index eb26fba906ea..85f8ff938366 100644
--- a/include/llvm/IR/ModuleSlotTracker.h
+++ b/include/llvm/IR/ModuleSlotTracker.h
@@ -1,9 +1,8 @@
//===-- llvm/IR/ModuleSlotTracker.h -----------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/IR/ModuleSummaryIndex.h b/include/llvm/IR/ModuleSummaryIndex.h
index a1acee494475..aacf8cfc089f 100644
--- a/include/llvm/IR/ModuleSummaryIndex.h
+++ b/include/llvm/IR/ModuleSummaryIndex.h
@@ -1,9 +1,8 @@
//===- llvm/ModuleSummaryIndex.h - Module Summary Index ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -120,7 +119,7 @@ class GlobalValueSummary;
using GlobalValueSummaryList = std::vector<std::unique_ptr<GlobalValueSummary>>;
-struct GlobalValueSummaryInfo {
+struct LLVM_ALIGNAS(8) GlobalValueSummaryInfo {
union NameOrGV {
NameOrGV(bool HaveGVs) {
if (HaveGVs)
@@ -163,7 +162,8 @@ using GlobalValueSummaryMapTy =
/// Struct that holds a reference to a particular GUID in a global value
/// summary.
struct ValueInfo {
- PointerIntPair<const GlobalValueSummaryMapTy::value_type *, 2, int>
+ enum Flags { HaveGV = 1, ReadOnly = 2, WriteOnly = 4 };
+ PointerIntPair<const GlobalValueSummaryMapTy::value_type *, 3, int>
RefAndFlags;
ValueInfo() = default;
@@ -189,15 +189,42 @@ struct ValueInfo {
: getRef()->second.U.Name;
}
- bool haveGVs() const { return RefAndFlags.getInt() & 0x1; }
- bool isReadOnly() const { return RefAndFlags.getInt() & 0x2; }
- void setReadOnly() { RefAndFlags.setInt(RefAndFlags.getInt() | 0x2); }
+ bool haveGVs() const { return RefAndFlags.getInt() & HaveGV; }
+ bool isReadOnly() const {
+ assert(isValidAccessSpecifier());
+ return RefAndFlags.getInt() & ReadOnly;
+ }
+ bool isWriteOnly() const {
+ assert(isValidAccessSpecifier());
+ return RefAndFlags.getInt() & WriteOnly;
+ }
+ unsigned getAccessSpecifier() const {
+ assert(isValidAccessSpecifier());
+ return RefAndFlags.getInt() & (ReadOnly | WriteOnly);
+ }
+ bool isValidAccessSpecifier() const {
+ unsigned BadAccessMask = ReadOnly | WriteOnly;
+ return (RefAndFlags.getInt() & BadAccessMask) != BadAccessMask;
+ }
+ void setReadOnly() {
+ // We expect ro/wo attribute to set only once during
+ // ValueInfo lifetime.
+ assert(getAccessSpecifier() == 0);
+ RefAndFlags.setInt(RefAndFlags.getInt() | ReadOnly);
+ }
+ void setWriteOnly() {
+ assert(getAccessSpecifier() == 0);
+ RefAndFlags.setInt(RefAndFlags.getInt() | WriteOnly);
+ }
const GlobalValueSummaryMapTy::value_type *getRef() const {
return RefAndFlags.getPointer();
}
bool isDSOLocal() const;
+
+ /// Checks if all copies are eligible for auto-hiding (have flag set).
+ bool canAutoHide() const;
};
inline raw_ostream &operator<<(raw_ostream &OS, const ValueInfo &VI) {
@@ -280,11 +307,23 @@ public:
/// within the same linkage unit.
unsigned DSOLocal : 1;
+ /// In the per-module summary, indicates that the global value is
+ /// linkonce_odr and global unnamed addr (so eligible for auto-hiding
+ /// via hidden visibility). In the combined summary, indicates that the
+ /// prevailing linkonce_odr copy can be auto-hidden via hidden visibility
+ /// when it is upgraded to weak_odr in the backend. This is legal when
+ /// all copies are eligible for auto-hiding (i.e. all copies were
+ /// linkonce_odr global unnamed addr. If any copy is not (e.g. it was
+ /// originally weak_odr, we cannot auto-hide the prevailing copy as it
+ /// means the symbol was externally visible.
+ unsigned CanAutoHide : 1;
+
/// Convenience Constructors
explicit GVFlags(GlobalValue::LinkageTypes Linkage,
- bool NotEligibleToImport, bool Live, bool IsLocal)
+ bool NotEligibleToImport, bool Live, bool IsLocal,
+ bool CanAutoHide)
: Linkage(Linkage), NotEligibleToImport(NotEligibleToImport),
- Live(Live), DSOLocal(IsLocal) {}
+ Live(Live), DSOLocal(IsLocal), CanAutoHide(CanAutoHide) {}
};
private:
@@ -365,6 +404,10 @@ public:
bool isDSOLocal() const { return Flags.DSOLocal; }
+ void setCanAutoHide(bool CanAutoHide) { Flags.CanAutoHide = CanAutoHide; }
+
+ bool canAutoHide() const { return Flags.CanAutoHide; }
+
/// Flag that this global value cannot be imported.
void setNotEligibleToImport() { Flags.NotEligibleToImport = true; }
@@ -381,25 +424,35 @@ public:
/// Alias summary information.
class AliasSummary : public GlobalValueSummary {
+ ValueInfo AliaseeValueInfo;
+
+ /// This is the Aliasee in the same module as alias (could get from VI, trades
+ /// memory for time). Note that this pointer may be null (and the value info
+ /// empty) when we have a distributed index where the alias is being imported
+ /// (as a copy of the aliasee), but the aliasee is not.
GlobalValueSummary *AliaseeSummary;
- // AliaseeGUID is only set and accessed when we are building a combined index
- // via the BitcodeReader.
- GlobalValue::GUID AliaseeGUID;
public:
AliasSummary(GVFlags Flags)
: GlobalValueSummary(AliasKind, Flags, ArrayRef<ValueInfo>{}),
- AliaseeSummary(nullptr), AliaseeGUID(0) {}
+ AliaseeSummary(nullptr) {}
/// Check if this is an alias summary.
static bool classof(const GlobalValueSummary *GVS) {
return GVS->getSummaryKind() == AliasKind;
}
- void setAliasee(GlobalValueSummary *Aliasee) { AliaseeSummary = Aliasee; }
- void setAliaseeGUID(GlobalValue::GUID GUID) { AliaseeGUID = GUID; }
+ void setAliasee(ValueInfo &AliaseeVI, GlobalValueSummary *Aliasee) {
+ AliaseeValueInfo = AliaseeVI;
+ AliaseeSummary = Aliasee;
+ }
- bool hasAliasee() const { return !!AliaseeSummary; }
+ bool hasAliasee() const {
+ assert(!!AliaseeSummary == (AliaseeValueInfo &&
+ !AliaseeValueInfo.getSummaryList().empty()) &&
+ "Expect to have both aliasee summary and summary list or neither");
+ return !!AliaseeSummary;
+ }
const GlobalValueSummary &getAliasee() const {
assert(AliaseeSummary && "Unexpected missing aliasee summary");
@@ -410,10 +463,13 @@ public:
return const_cast<GlobalValueSummary &>(
static_cast<const AliasSummary *>(this)->getAliasee());
}
- bool hasAliaseeGUID() const { return AliaseeGUID != 0; }
- const GlobalValue::GUID &getAliaseeGUID() const {
- assert(AliaseeGUID && "Unexpected missing aliasee GUID");
- return AliaseeGUID;
+ ValueInfo getAliaseeVI() const {
+ assert(AliaseeValueInfo && "Unexpected missing aliasee");
+ return AliaseeValueInfo;
+ }
+ GlobalValue::GUID getAliaseeGUID() const {
+ assert(AliaseeValueInfo && "Unexpected missing aliasee");
+ return AliaseeValueInfo.getGUID();
}
};
@@ -500,7 +556,8 @@ public:
return FunctionSummary(
FunctionSummary::GVFlags(
GlobalValue::LinkageTypes::AvailableExternallyLinkage,
- /*NotEligibleToImport=*/true, /*Live=*/true, /*IsLocal=*/false),
+ /*NotEligibleToImport=*/true, /*Live=*/true, /*IsLocal=*/false,
+ /*CanAutoHide=*/false),
/*InsCount=*/0, FunctionSummary::FFlags{}, /*EntryCount=*/0,
std::vector<ValueInfo>(), std::move(Edges),
std::vector<GlobalValue::GUID>(),
@@ -552,8 +609,8 @@ public:
std::move(TypeTestAssumeConstVCalls),
std::move(TypeCheckedLoadConstVCalls)});
}
- // Gets the number of immutable refs in RefEdgeList
- unsigned immutableRefCount() const;
+ // Gets the number of readonly and writeonly refs in RefEdgeList
+ std::pair<unsigned, unsigned> specialRefCounts() const;
/// Check if this is a function summary.
static bool classof(const GlobalValueSummary *GVS) {
@@ -666,18 +723,43 @@ template <> struct DenseMapInfo<FunctionSummary::ConstVCall> {
}
};
+/// The ValueInfo and offset for a function within a vtable definition
+/// initializer array.
+struct VirtFuncOffset {
+ VirtFuncOffset(ValueInfo VI, uint64_t Offset)
+ : FuncVI(VI), VTableOffset(Offset) {}
+
+ ValueInfo FuncVI;
+ uint64_t VTableOffset;
+};
+/// List of functions referenced by a particular vtable definition.
+using VTableFuncList = std::vector<VirtFuncOffset>;
+
/// Global variable summary information to aid decisions and
/// implementation of importing.
///
-/// Global variable summary has extra flag, telling if it is
-/// modified during the program run or not. This affects ThinLTO
-/// internalization
+/// Global variable summary has two extra flag, telling if it is
+/// readonly or writeonly. Both readonly and writeonly variables
+/// can be optimized in the backed: readonly variables can be
+/// const-folded, while writeonly vars can be completely eliminated
+/// together with corresponding stores. We let both things happen
+/// by means of internalizing such variables after ThinLTO import.
class GlobalVarSummary : public GlobalValueSummary {
+private:
+ /// For vtable definitions this holds the list of functions and
+ /// their corresponding offsets within the initializer array.
+ std::unique_ptr<VTableFuncList> VTableFuncs;
+
public:
struct GVarFlags {
- GVarFlags(bool ReadOnly = false) : ReadOnly(ReadOnly) {}
-
- unsigned ReadOnly : 1;
+ GVarFlags(bool ReadOnly, bool WriteOnly)
+ : MaybeReadOnly(ReadOnly), MaybeWriteOnly(WriteOnly) {}
+
+ // In permodule summaries both MaybeReadOnly and MaybeWriteOnly
+ // bits are set, because attribute propagation occurs later on
+ // thin link phase.
+ unsigned MaybeReadOnly : 1;
+ unsigned MaybeWriteOnly : 1;
} VarFlags;
GlobalVarSummary(GVFlags Flags, GVarFlags VarFlags,
@@ -691,8 +773,21 @@ public:
}
GVarFlags varflags() const { return VarFlags; }
- void setReadOnly(bool RO) { VarFlags.ReadOnly = RO; }
- bool isReadOnly() const { return VarFlags.ReadOnly; }
+ void setReadOnly(bool RO) { VarFlags.MaybeReadOnly = RO; }
+ void setWriteOnly(bool WO) { VarFlags.MaybeWriteOnly = WO; }
+ bool maybeReadOnly() const { return VarFlags.MaybeReadOnly; }
+ bool maybeWriteOnly() const { return VarFlags.MaybeWriteOnly; }
+
+ void setVTableFuncs(VTableFuncList Funcs) {
+ assert(!VTableFuncs);
+ VTableFuncs = llvm::make_unique<VTableFuncList>(std::move(Funcs));
+ }
+
+ ArrayRef<VirtFuncOffset> vTableFuncs() const {
+ if (VTableFuncs)
+ return *VTableFuncs;
+ return {};
+ }
};
struct TypeTestResolution {
@@ -791,6 +886,29 @@ using GVSummaryMapTy = DenseMap<GlobalValue::GUID, GlobalValueSummary *>;
using TypeIdSummaryMapTy =
std::multimap<GlobalValue::GUID, std::pair<std::string, TypeIdSummary>>;
+/// The following data structures summarize type metadata information.
+/// For type metadata overview see https://llvm.org/docs/TypeMetadata.html.
+/// Each type metadata includes both the type identifier and the offset of
+/// the address point of the type (the address held by objects of that type
+/// which may not be the beginning of the virtual table). Vtable definitions
+/// are decorated with type metadata for the types they are compatible with.
+///
+/// Holds information about vtable definitions decorated with type metadata:
+/// the vtable definition value and its address point offset in a type
+/// identifier metadata it is decorated (compatible) with.
+struct TypeIdOffsetVtableInfo {
+ TypeIdOffsetVtableInfo(uint64_t Offset, ValueInfo VI)
+ : AddressPointOffset(Offset), VTableVI(VI) {}
+
+ uint64_t AddressPointOffset;
+ ValueInfo VTableVI;
+};
+/// List of vtable definitions decorated by a particular type identifier,
+/// and their corresponding offsets in that type identifier's metadata.
+/// Note that each type identifier may be compatible with multiple vtables, due
+/// to inheritance, which is why this is a vector.
+using TypeIdCompatibleVtableInfo = std::vector<TypeIdOffsetVtableInfo>;
+
/// Class to hold module path string table and global value map,
/// and encapsulate methods for operating on them.
class ModuleSummaryIndex {
@@ -803,9 +921,15 @@ private:
ModulePathStringTableTy ModulePathStringTable;
/// Mapping from type identifier GUIDs to type identifier and its summary
- /// information.
+ /// information. Produced by thin link.
TypeIdSummaryMapTy TypeIdMap;
+ /// Mapping from type identifier to information about vtables decorated
+ /// with that type identifier's metadata. Produced by per module summary
+ /// analysis and consumed by thin link. For more information, see description
+ /// above where TypeIdCompatibleVtableInfo is defined.
+ std::map<std::string, TypeIdCompatibleVtableInfo> TypeIdCompatibleVtableMap;
+
/// Mapping from original ID to GUID. If original ID can map to multiple
/// GUIDs, it will be mapped to 0.
std::map<GlobalValue::GUID, GlobalValue::GUID> OidGuidMap;
@@ -1044,24 +1168,30 @@ public:
OidGuidMap[OrigGUID] = ValueGUID;
}
- /// Find the summary for global \p GUID in module \p ModuleId, or nullptr if
+ /// Find the summary for ValueInfo \p VI in module \p ModuleId, or nullptr if
/// not found.
- GlobalValueSummary *findSummaryInModule(GlobalValue::GUID ValueGUID,
- StringRef ModuleId) const {
- auto CalleeInfo = getValueInfo(ValueGUID);
- if (!CalleeInfo) {
- return nullptr; // This function does not have a summary
- }
+ GlobalValueSummary *findSummaryInModule(ValueInfo VI, StringRef ModuleId) const {
+ auto SummaryList = VI.getSummaryList();
auto Summary =
- llvm::find_if(CalleeInfo.getSummaryList(),
+ llvm::find_if(SummaryList,
[&](const std::unique_ptr<GlobalValueSummary> &Summary) {
return Summary->modulePath() == ModuleId;
});
- if (Summary == CalleeInfo.getSummaryList().end())
+ if (Summary == SummaryList.end())
return nullptr;
return Summary->get();
}
+ /// Find the summary for global \p GUID in module \p ModuleId, or nullptr if
+ /// not found.
+ GlobalValueSummary *findSummaryInModule(GlobalValue::GUID ValueGUID,
+ StringRef ModuleId) const {
+ auto CalleeInfo = getValueInfo(ValueGUID);
+ if (!CalleeInfo)
+ return nullptr; // This function does not have a summary
+ return findSummaryInModule(CalleeInfo, ModuleId);
+ }
+
/// Returns the first GlobalValueSummary for \p GV, asserting that there
/// is only one if \p PerModuleIndex.
GlobalValueSummary *getGlobalValueSummary(const GlobalValue &GV,
@@ -1163,6 +1293,29 @@ public:
return nullptr;
}
+ const std::map<std::string, TypeIdCompatibleVtableInfo> &
+ typeIdCompatibleVtableMap() const {
+ return TypeIdCompatibleVtableMap;
+ }
+
+ /// Return an existing or new TypeIdCompatibleVtableMap entry for \p TypeId.
+ /// This accessor can mutate the map and therefore should not be used in
+ /// the ThinLTO backends.
+ TypeIdCompatibleVtableInfo &
+ getOrInsertTypeIdCompatibleVtableSummary(StringRef TypeId) {
+ return TypeIdCompatibleVtableMap[TypeId];
+ }
+
+ /// For the given \p TypeId, this returns the TypeIdCompatibleVtableMap
+ /// entry if present in the summary map. This may be used when importing.
+ Optional<TypeIdCompatibleVtableInfo>
+ getTypeIdCompatibleVtableSummary(StringRef TypeId) const {
+ auto I = TypeIdCompatibleVtableMap.find(TypeId);
+ if (I == TypeIdCompatibleVtableMap.end())
+ return None;
+ return I->second;
+ }
+
/// Collect for the given module the list of functions it defines
/// (GUID -> Summary).
void collectDefinedFunctionsForModule(StringRef ModulePath,
@@ -1170,8 +1323,16 @@ public:
/// Collect for each module the list of Summaries it defines (GUID ->
/// Summary).
- void collectDefinedGVSummariesPerModule(
- StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries) const;
+ template <class Map>
+ void
+ collectDefinedGVSummariesPerModule(Map &ModuleToDefinedGVSummaries) const {
+ for (auto &GlobalList : *this) {
+ auto GUID = GlobalList.first;
+ for (auto &Summary : GlobalList.second.SummaryList) {
+ ModuleToDefinedGVSummaries[Summary->modulePath()][GUID] = Summary.get();
+ }
+ }
+ }
/// Print to an output stream.
void print(raw_ostream &OS, bool IsForDebug = false) const;
@@ -1186,7 +1347,7 @@ public:
void dumpSCCs(raw_ostream &OS);
/// Analyze index and detect unmodified globals
- void propagateConstants(const DenseSet<GlobalValue::GUID> &PreservedSymbols);
+ void propagateAttributes(const DenseSet<GlobalValue::GUID> &PreservedSymbols);
};
/// GraphTraits definition to build SCC for the index
diff --git a/include/llvm/IR/ModuleSummaryIndexYAML.h b/include/llvm/IR/ModuleSummaryIndexYAML.h
index a88ee26b51c3..26d9c43fabf1 100644
--- a/include/llvm/IR/ModuleSummaryIndexYAML.h
+++ b/include/llvm/IR/ModuleSummaryIndexYAML.h
@@ -1,9 +1,8 @@
//===-- llvm/ModuleSummaryIndexYAML.h - YAML I/O for summary ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -137,7 +136,7 @@ template <> struct MappingTraits<TypeIdSummary> {
struct FunctionSummaryYaml {
unsigned Linkage;
- bool NotEligibleToImport, Live, IsLocal;
+ bool NotEligibleToImport, Live, IsLocal, CanAutoHide;
std::vector<uint64_t> Refs;
std::vector<uint64_t> TypeTests;
std::vector<FunctionSummary::VFuncId> TypeTestAssumeVCalls,
@@ -181,6 +180,7 @@ template <> struct MappingTraits<FunctionSummaryYaml> {
io.mapOptional("NotEligibleToImport", summary.NotEligibleToImport);
io.mapOptional("Live", summary.Live);
io.mapOptional("Local", summary.IsLocal);
+ io.mapOptional("CanAutoHide", summary.CanAutoHide);
io.mapOptional("Refs", summary.Refs);
io.mapOptional("TypeTests", summary.TypeTests);
io.mapOptional("TypeTestAssumeVCalls", summary.TypeTestAssumeVCalls);
@@ -223,7 +223,7 @@ template <> struct CustomMappingTraits<GlobalValueSummaryMapTy> {
Elem.SummaryList.push_back(llvm::make_unique<FunctionSummary>(
GlobalValueSummary::GVFlags(
static_cast<GlobalValue::LinkageTypes>(FSum.Linkage),
- FSum.NotEligibleToImport, FSum.Live, FSum.IsLocal),
+ FSum.NotEligibleToImport, FSum.Live, FSum.IsLocal, FSum.CanAutoHide),
/*NumInsts=*/0, FunctionSummary::FFlags{}, /*EntryCount=*/0, Refs,
ArrayRef<FunctionSummary::EdgeTy>{}, std::move(FSum.TypeTests),
std::move(FSum.TypeTestAssumeVCalls),
@@ -244,7 +244,8 @@ template <> struct CustomMappingTraits<GlobalValueSummaryMapTy> {
FSum->flags().Linkage,
static_cast<bool>(FSum->flags().NotEligibleToImport),
static_cast<bool>(FSum->flags().Live),
- static_cast<bool>(FSum->flags().DSOLocal), Refs,
+ static_cast<bool>(FSum->flags().DSOLocal),
+ static_cast<bool>(FSum->flags().CanAutoHide), Refs,
FSum->type_tests(), FSum->type_test_assume_vcalls(),
FSum->type_checked_load_vcalls(),
FSum->type_test_assume_const_vcalls(),
diff --git a/include/llvm/IR/NoFolder.h b/include/llvm/IR/NoFolder.h
index def07ffe2ff6..0e3c19f4947f 100644
--- a/include/llvm/IR/NoFolder.h
+++ b/include/llvm/IR/NoFolder.h
@@ -1,9 +1,8 @@
//===- NoFolder.h - Constant folding helper ---------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -204,6 +203,10 @@ public:
return BinaryOperator::CreateNot(C);
}
+ Instruction *CreateUnOp(Instruction::UnaryOps Opc, Constant *C) const {
+ return UnaryOperator::Create(Opc, C);
+ }
+
//===--------------------------------------------------------------------===//
// Memory Instructions
//===--------------------------------------------------------------------===//
diff --git a/include/llvm/IR/OperandTraits.h b/include/llvm/IR/OperandTraits.h
index c618aff3df9a..979ad35019f8 100644
--- a/include/llvm/IR/OperandTraits.h
+++ b/include/llvm/IR/OperandTraits.h
@@ -1,9 +1,8 @@
//===-- llvm/OperandTraits.h - OperandTraits class definition ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/IR/Operator.h b/include/llvm/IR/Operator.h
index 6b387bbcccb1..8199c65ca8a0 100644
--- a/include/llvm/IR/Operator.h
+++ b/include/llvm/IR/Operator.h
@@ -1,9 +1,8 @@
//===-- llvm/Operator.h - Operator utility subclass -------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -188,6 +187,12 @@ public:
FastMathFlags() = default;
+ static FastMathFlags getFast() {
+ FastMathFlags FMF;
+ FMF.setFast();
+ return FMF;
+ }
+
bool any() const { return Flags != 0; }
bool none() const { return Flags == 0; }
bool all() const { return Flags == ~0U; }
@@ -380,6 +385,7 @@ public:
case Instruction::ExtractElement:
case Instruction::ShuffleVector:
case Instruction::InsertElement:
+ case Instruction::PHI:
return false;
default:
return V->getType()->isFPOrFPVectorTy();
diff --git a/include/llvm/IR/OptBisect.h b/include/llvm/IR/OptBisect.h
index aa24c94c0130..1b2b0bd7acaa 100644
--- a/include/llvm/IR/OptBisect.h
+++ b/include/llvm/IR/OptBisect.h
@@ -1,9 +1,8 @@
//===- llvm/IR/OptBisect.h - LLVM Bisect support ----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -20,12 +19,6 @@
namespace llvm {
class Pass;
-class Module;
-class Function;
-class BasicBlock;
-class Region;
-class Loop;
-class CallGraphSCC;
/// Extensions to this class implement mechanisms to disable passes and
/// individual optimizations at compile time.
@@ -33,12 +26,14 @@ class OptPassGate {
public:
virtual ~OptPassGate() = default;
- virtual bool shouldRunPass(const Pass *P, const Module &U) { return true; }
- virtual bool shouldRunPass(const Pass *P, const Function &U) {return true; }
- virtual bool shouldRunPass(const Pass *P, const BasicBlock &U) { return true; }
- virtual bool shouldRunPass(const Pass *P, const Region &U) { return true; }
- virtual bool shouldRunPass(const Pass *P, const Loop &U) { return true; }
- virtual bool shouldRunPass(const Pass *P, const CallGraphSCC &U) { return true; }
+ /// IRDescription is a textual description of the IR unit the pass is running
+ /// over.
+ virtual bool shouldRunPass(const Pass *P, StringRef IRDescription) {
+ return true;
+ }
+
+ /// isEnabled should return true before calling shouldRunPass
+ virtual bool isEnabled() const { return false; }
};
/// This class implements a mechanism to disable passes and individual
@@ -60,23 +55,19 @@ public:
/// Checks the bisect limit to determine if the specified pass should run.
///
- /// These functions immediately return true if bisection is disabled. If the
- /// bisect limit is set to -1, the functions print a message describing
+ /// If the bisect limit is set to -1, the function prints a message describing
/// the pass and the bisect number assigned to it and return true. Otherwise,
- /// the functions print a message with the bisect number assigned to the
+ /// the function prints a message with the bisect number assigned to the
/// pass and indicating whether or not the pass will be run and return true if
/// the bisect limit has not yet been exceeded or false if it has.
///
- /// Most passes should not call these routines directly. Instead, they are
+ /// Most passes should not call this routine directly. Instead, they are
/// called through helper routines provided by the pass base classes. For
/// instance, function passes should call FunctionPass::skipFunction().
- bool shouldRunPass(const Pass *P, const Module &U) override;
- bool shouldRunPass(const Pass *P, const Function &U) override;
- bool shouldRunPass(const Pass *P, const BasicBlock &U) override;
- bool shouldRunPass(const Pass *P, const Region &U) override;
- bool shouldRunPass(const Pass *P, const Loop &U) override;
- bool shouldRunPass(const Pass *P, const CallGraphSCC &U) override;
+ bool shouldRunPass(const Pass *P, StringRef IRDescription) override;
+ /// isEnabled should return true before calling shouldRunPass
+ bool isEnabled() const override { return BisectEnabled; }
private:
bool checkPass(const StringRef PassName, const StringRef TargetDesc);
diff --git a/include/llvm/IR/PassInstrumentation.h b/include/llvm/IR/PassInstrumentation.h
index 08dac1c4a274..f8a1196871cf 100644
--- a/include/llvm/IR/PassInstrumentation.h
+++ b/include/llvm/IR/PassInstrumentation.h
@@ -1,9 +1,8 @@
//===- llvm/IR/PassInstrumentation.h ----------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/include/llvm/IR/PassManager.h b/include/llvm/IR/PassManager.h
index 738a2242eea0..37fe2a5b01ad 100644
--- a/include/llvm/IR/PassManager.h
+++ b/include/llvm/IR/PassManager.h
@@ -1,9 +1,8 @@
//===- PassManager.h - Pass management infrastructure -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -287,6 +286,13 @@ public:
PA.PreservedIDs.count(ID));
}
+ /// Return true if the checker's analysis was not abandoned, i.e. it was not
+ /// explicitly invalidated. Even if the analysis is not explicitly
+ /// preserved, if the analysis is known stateless, then it is preserved.
+ bool preservedWhenStateless() {
+ return !IsAbandoned;
+ }
+
/// Returns true if the checker's analysis was not abandoned and either
/// - \p AnalysisSetT is explicitly preserved or
/// - all analyses are preserved.
diff --git a/include/llvm/IR/PassManagerInternal.h b/include/llvm/IR/PassManagerInternal.h
index 5ad68be62742..58198bf67b11 100644
--- a/include/llvm/IR/PassManagerInternal.h
+++ b/include/llvm/IR/PassManagerInternal.h
@@ -1,9 +1,8 @@
//===- PassManager internal APIs and implementation details -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/include/llvm/IR/PassTimingInfo.h b/include/llvm/IR/PassTimingInfo.h
index e9945f997f43..b8d8f117f73d 100644
--- a/include/llvm/IR/PassTimingInfo.h
+++ b/include/llvm/IR/PassTimingInfo.h
@@ -1,9 +1,8 @@
//===- PassTimingInfo.h - pass execution timing -----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -27,10 +26,12 @@ namespace llvm {
class Pass;
class PassInstrumentationCallbacks;
+class raw_ostream;
/// If -time-passes has been specified, report the timings immediately and then
-/// reset the timers to zero.
-void reportAndResetTimings();
+/// reset the timers to zero. By default it uses the stream created by
+/// CreateInfoOutputFile().
+void reportAndResetTimings(raw_ostream *OutStream = nullptr);
/// Request the timer for this legacy-pass-manager's pass instance.
Timer *getPassTimer(Pass *);
@@ -63,18 +64,18 @@ class TimePassesHandler {
/// Stack of currently active timers.
SmallVector<Timer *, 8> TimerStack;
+ /// Custom output stream to print timing information into.
+ /// By default (== nullptr) we emit time report into the stream created by
+ /// CreateInfoOutputFile().
+ raw_ostream *OutStream = nullptr;
+
bool Enabled;
public:
TimePassesHandler(bool Enabled = TimePassesIsEnabled);
/// Destructor handles the print action if it has not been handled before.
- ~TimePassesHandler() {
- // First destroying the timers from TimingData, which deploys all their
- // collected data into the TG time group member, which later prints itself
- // when being destroyed.
- TimingData.clear();
- }
+ ~TimePassesHandler() { print(); }
/// Prints out timing information and then resets the timers.
void print();
@@ -85,6 +86,9 @@ public:
void registerCallbacks(PassInstrumentationCallbacks &PIC);
+ /// Set a custom output stream for subsequent reporting.
+ void setOutStream(raw_ostream &OutStream);
+
private:
/// Dumps information for running/triggered timers, useful for debugging
LLVM_DUMP_METHOD void dump() const;
diff --git a/include/llvm/IR/PatternMatch.h b/include/llvm/IR/PatternMatch.h
index 120fc253b908..0f03d7cc56b8 100644
--- a/include/llvm/IR/PatternMatch.h
+++ b/include/llvm/IR/PatternMatch.h
@@ -1,9 +1,8 @@
//===- PatternMatch.h - Match on the LLVM IR --------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -419,6 +418,46 @@ inline cst_pred_ty<is_lowbit_mask> m_LowBitMask() {
return cst_pred_ty<is_lowbit_mask>();
}
+struct icmp_pred_with_threshold {
+ ICmpInst::Predicate Pred;
+ const APInt *Thr;
+ bool isValue(const APInt &C) {
+ switch (Pred) {
+ case ICmpInst::Predicate::ICMP_EQ:
+ return C.eq(*Thr);
+ case ICmpInst::Predicate::ICMP_NE:
+ return C.ne(*Thr);
+ case ICmpInst::Predicate::ICMP_UGT:
+ return C.ugt(*Thr);
+ case ICmpInst::Predicate::ICMP_UGE:
+ return C.uge(*Thr);
+ case ICmpInst::Predicate::ICMP_ULT:
+ return C.ult(*Thr);
+ case ICmpInst::Predicate::ICMP_ULE:
+ return C.ule(*Thr);
+ case ICmpInst::Predicate::ICMP_SGT:
+ return C.sgt(*Thr);
+ case ICmpInst::Predicate::ICMP_SGE:
+ return C.sge(*Thr);
+ case ICmpInst::Predicate::ICMP_SLT:
+ return C.slt(*Thr);
+ case ICmpInst::Predicate::ICMP_SLE:
+ return C.sle(*Thr);
+ default:
+ llvm_unreachable("Unhandled ICmp predicate");
+ }
+ }
+};
+/// Match an integer or vector with every element comparing 'pred' (eg/ne/...)
+/// to Threshold. For vectors, this includes constants with undefined elements.
+inline cst_pred_ty<icmp_pred_with_threshold>
+m_SpecificInt_ICMP(ICmpInst::Predicate Predicate, const APInt &Threshold) {
+ cst_pred_ty<icmp_pred_with_threshold> P;
+ P.Pred = Predicate;
+ P.Thr = &Threshold;
+ return P;
+}
+
struct is_nan {
bool isValue(const APFloat &C) { return C.isNaN(); }
};
@@ -668,18 +707,26 @@ template <typename Op_t> struct FNeg_match {
FNeg_match(const Op_t &Op) : X(Op) {}
template <typename OpTy> bool match(OpTy *V) {
auto *FPMO = dyn_cast<FPMathOperator>(V);
- if (!FPMO || FPMO->getOpcode() != Instruction::FSub)
- return false;
- if (FPMO->hasNoSignedZeros()) {
- // With 'nsz', any zero goes.
- if (!cstfp_pred_ty<is_any_zero_fp>().match(FPMO->getOperand(0)))
- return false;
- } else {
- // Without 'nsz', we need fsub -0.0, X exactly.
- if (!cstfp_pred_ty<is_neg_zero_fp>().match(FPMO->getOperand(0)))
- return false;
+ if (!FPMO) return false;
+
+ if (FPMO->getOpcode() == Instruction::FNeg)
+ return X.match(FPMO->getOperand(0));
+
+ if (FPMO->getOpcode() == Instruction::FSub) {
+ if (FPMO->hasNoSignedZeros()) {
+ // With 'nsz', any zero goes.
+ if (!cstfp_pred_ty<is_any_zero_fp>().match(FPMO->getOperand(0)))
+ return false;
+ } else {
+ // Without 'nsz', we need fsub -0.0, X exactly.
+ if (!cstfp_pred_ty<is_neg_zero_fp>().match(FPMO->getOperand(0)))
+ return false;
+ }
+
+ return X.match(FPMO->getOperand(1));
}
- return X.match(FPMO->getOperand(1));
+
+ return false;
}
};
@@ -1464,6 +1511,20 @@ struct UAddWithOverflow_match {
if (AddExpr.match(ICmpRHS) && (ICmpLHS == AddLHS || ICmpLHS == AddRHS))
return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpRHS);
+ // Match special-case for increment-by-1.
+ if (Pred == ICmpInst::ICMP_EQ) {
+ // (a + 1) == 0
+ // (1 + a) == 0
+ if (AddExpr.match(ICmpLHS) && m_ZeroInt().match(ICmpRHS) &&
+ (m_One().match(AddLHS) || m_One().match(AddRHS)))
+ return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpLHS);
+ // 0 == (a + 1)
+ // 0 == (1 + a)
+ if (m_ZeroInt().match(ICmpLHS) && AddExpr.match(ICmpRHS) &&
+ (m_One().match(AddLHS) || m_One().match(AddRHS)))
+ return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpRHS);
+ }
+
return false;
}
};
diff --git a/include/llvm/IR/PredIteratorCache.h b/include/llvm/IR/PredIteratorCache.h
index 81f535311431..cc835277910b 100644
--- a/include/llvm/IR/PredIteratorCache.h
+++ b/include/llvm/IR/PredIteratorCache.h
@@ -1,9 +1,8 @@
//===- PredIteratorCache.h - pred_iterator Cache ----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/IR/ProfileSummary.h b/include/llvm/IR/ProfileSummary.h
index e38663770a13..78635ec4386c 100644
--- a/include/llvm/IR/ProfileSummary.h
+++ b/include/llvm/IR/ProfileSummary.h
@@ -1,9 +1,8 @@
//===- ProfileSummary.h - Profile summary data structure. -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -43,11 +42,10 @@ using SummaryEntryVector = std::vector<ProfileSummaryEntry>;
class ProfileSummary {
public:
- enum Kind { PSK_Instr, PSK_Sample };
+ enum Kind { PSK_Instr, PSK_CSInstr, PSK_Sample };
private:
const Kind PSK;
- static const char *KindStr[2];
SummaryEntryVector DetailedSummary;
uint64_t TotalCount, MaxCount, MaxInternalCount, MaxFunctionCount;
uint32_t NumCounts, NumFunctions;
diff --git a/include/llvm/IR/RemarkStreamer.h b/include/llvm/IR/RemarkStreamer.h
new file mode 100644
index 000000000000..f34cc660b2fb
--- /dev/null
+++ b/include/llvm/IR/RemarkStreamer.h
@@ -0,0 +1,96 @@
+//===- llvm/IR/RemarkStreamer.h - Remark Streamer ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the main interface for outputting remarks.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_IR_REMARKSTREAMER_H
+#define LLVM_IR_REMARKSTREAMER_H
+
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/Remarks/RemarkSerializer.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/Regex.h"
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/raw_ostream.h"
+#include <string>
+#include <vector>
+
+namespace llvm {
+/// Streamer for remarks.
+class RemarkStreamer {
+ /// The filename that the remark diagnostics are emitted to.
+ const std::string Filename;
+ /// The regex used to filter remarks based on the passes that emit them.
+ Optional<Regex> PassFilter;
+ /// The object used to serialize the remarks to a specific format.
+ std::unique_ptr<remarks::Serializer> Serializer;
+
+ /// Convert diagnostics into remark objects.
+ /// The lifetime of the members of the result is bound to the lifetime of
+ /// the LLVM diagnostics.
+ remarks::Remark toRemark(const DiagnosticInfoOptimizationBase &Diag);
+
+public:
+ RemarkStreamer(StringRef Filename,
+ std::unique_ptr<remarks::Serializer> Serializer);
+ /// Return the filename that the remark diagnostics are emitted to.
+ StringRef getFilename() const { return Filename; }
+ /// Return stream that the remark diagnostics are emitted to.
+ raw_ostream &getStream() { return Serializer->OS; }
+ /// Return the serializer used for this stream.
+ remarks::Serializer &getSerializer() { return *Serializer; }
+ /// Set a pass filter based on a regex \p Filter.
+ /// Returns an error if the regex is invalid.
+ Error setFilter(StringRef Filter);
+ /// Emit a diagnostic through the streamer.
+ void emit(const DiagnosticInfoOptimizationBase &Diag);
+};
+
+template <typename ThisError>
+struct RemarkSetupErrorInfo : public ErrorInfo<ThisError> {
+ std::string Msg;
+ std::error_code EC;
+
+ RemarkSetupErrorInfo(Error E) {
+ handleAllErrors(std::move(E), [&](const ErrorInfoBase &EIB) {
+ Msg = EIB.message();
+ EC = EIB.convertToErrorCode();
+ });
+ }
+
+ void log(raw_ostream &OS) const override { OS << Msg; }
+ std::error_code convertToErrorCode() const override { return EC; }
+};
+
+struct RemarkSetupFileError : RemarkSetupErrorInfo<RemarkSetupFileError> {
+ static char ID;
+ using RemarkSetupErrorInfo<RemarkSetupFileError>::RemarkSetupErrorInfo;
+};
+
+struct RemarkSetupPatternError : RemarkSetupErrorInfo<RemarkSetupPatternError> {
+ static char ID;
+ using RemarkSetupErrorInfo<RemarkSetupPatternError>::RemarkSetupErrorInfo;
+};
+
+struct RemarkSetupFormatError : RemarkSetupErrorInfo<RemarkSetupFormatError> {
+ static char ID;
+ using RemarkSetupErrorInfo<RemarkSetupFormatError>::RemarkSetupErrorInfo;
+};
+
+/// Setup optimization remarks.
+Expected<std::unique_ptr<ToolOutputFile>>
+setupOptimizationRemarks(LLVMContext &Context, StringRef RemarksFilename,
+ StringRef RemarksPasses, StringRef RemarksFormat,
+ bool RemarksWithHotness,
+ unsigned RemarksHotnessThreshold = 0);
+
+} // end namespace llvm
+
+#endif // LLVM_IR_REMARKSTREAMER_H
diff --git a/include/llvm/IR/RuntimeLibcalls.def b/include/llvm/IR/RuntimeLibcalls.def
index 89005120cdc1..f6c74d497b18 100644
--- a/include/llvm/IR/RuntimeLibcalls.def
+++ b/include/llvm/IR/RuntimeLibcalls.def
@@ -1,9 +1,8 @@
//===-- llvm/RuntimeLibcalls.def - File that describes libcalls -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -255,6 +254,26 @@ HANDLE_LIBCALL(FMAX_F64, "fmax")
HANDLE_LIBCALL(FMAX_F80, "fmaxl")
HANDLE_LIBCALL(FMAX_F128, "fmaxl")
HANDLE_LIBCALL(FMAX_PPCF128, "fmaxl")
+HANDLE_LIBCALL(LROUND_F32, "lroundf")
+HANDLE_LIBCALL(LROUND_F64, "lround")
+HANDLE_LIBCALL(LROUND_F80, "lroundl")
+HANDLE_LIBCALL(LROUND_F128, "lroundl")
+HANDLE_LIBCALL(LROUND_PPCF128, "lroundl")
+HANDLE_LIBCALL(LLROUND_F32, "llroundf")
+HANDLE_LIBCALL(LLROUND_F64, "llround")
+HANDLE_LIBCALL(LLROUND_F80, "llroundl")
+HANDLE_LIBCALL(LLROUND_F128, "llroundl")
+HANDLE_LIBCALL(LLROUND_PPCF128, "llroundl")
+HANDLE_LIBCALL(LRINT_F32, "lrintf")
+HANDLE_LIBCALL(LRINT_F64, "lrint")
+HANDLE_LIBCALL(LRINT_F80, "lrintl")
+HANDLE_LIBCALL(LRINT_F128, "lrintl")
+HANDLE_LIBCALL(LRINT_PPCF128, "lrintl")
+HANDLE_LIBCALL(LLRINT_F32, "llrintf")
+HANDLE_LIBCALL(LLRINT_F64, "llrint")
+HANDLE_LIBCALL(LLRINT_F80, "llrintl")
+HANDLE_LIBCALL(LLRINT_F128, "llrintl")
+HANDLE_LIBCALL(LLRINT_PPCF128, "llrintl")
// Conversion
HANDLE_LIBCALL(FPEXT_F32_PPCF128, "__gcc_stoq")
@@ -530,6 +549,9 @@ HANDLE_LIBCALL(STACKPROTECTOR_CHECK_FAIL, "__stack_chk_fail")
// Deoptimization
HANDLE_LIBCALL(DEOPTIMIZE, "__llvm_deoptimize")
+// Return address
+HANDLE_LIBCALL(RETURN_ADDRESS, nullptr)
+
HANDLE_LIBCALL(UNKNOWN_LIBCALL, nullptr)
#undef HANDLE_LIBCALL
diff --git a/include/llvm/IR/SafepointIRVerifier.h b/include/llvm/IR/SafepointIRVerifier.h
index 092050d1d207..ec5527954adc 100644
--- a/include/llvm/IR/SafepointIRVerifier.h
+++ b/include/llvm/IR/SafepointIRVerifier.h
@@ -1,9 +1,8 @@
//===- SafepointIRVerifier.h - Checks for GC relocation problems *- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -19,6 +18,8 @@
#ifndef LLVM_IR_SAFEPOINT_IR_VERIFIER
#define LLVM_IR_SAFEPOINT_IR_VERIFIER
+#include "llvm/IR/PassManager.h"
+
namespace llvm {
class Function;
@@ -30,6 +31,16 @@ void verifySafepointIR(Function &F);
/// Create an instance of the safepoint verifier pass which can be added to
/// a pass pipeline to check for relocation bugs.
FunctionPass *createSafepointIRVerifierPass();
+
+/// Create an instance of the safepoint verifier pass which can be added to
+/// a pass pipeline to check for relocation bugs.
+class SafepointIRVerifierPass : public PassInfoMixin<SafepointIRVerifierPass> {
+
+public:
+ explicit SafepointIRVerifierPass() {}
+
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
}
#endif // LLVM_IR_SAFEPOINT_IR_VERIFIER
diff --git a/include/llvm/IR/Statepoint.h b/include/llvm/IR/Statepoint.h
index 8908e1b0d090..89f130bc3351 100644
--- a/include/llvm/IR/Statepoint.h
+++ b/include/llvm/IR/Statepoint.h
@@ -1,14 +1,13 @@
//===- llvm/IR/Statepoint.h - gc.statepoint utilities -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file contains utility functions and a wrapper class analogous to
-// CallSite for accessing the fields of gc.statepoint, gc.relocate,
+// CallBase for accessing the fields of gc.statepoint, gc.relocate,
// gc.result intrinsics; and some general utilities helpful when dealing with
// gc.statepoint.
//
@@ -21,7 +20,6 @@
#include "llvm/ADT/iterator_range.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instruction.h"
@@ -57,42 +55,36 @@ enum class StatepointFlags {
class GCRelocateInst;
class GCResultInst;
-bool isStatepoint(ImmutableCallSite CS);
+bool isStatepoint(const CallBase *Call);
bool isStatepoint(const Value *V);
bool isStatepoint(const Value &V);
-bool isGCRelocate(ImmutableCallSite CS);
+bool isGCRelocate(const CallBase *Call);
bool isGCRelocate(const Value *V);
-bool isGCResult(ImmutableCallSite CS);
+bool isGCResult(const CallBase *Call);
bool isGCResult(const Value *V);
-/// Analogous to CallSiteBase, this provides most of the actual
+/// A wrapper around a GC intrinsic call, this provides most of the actual
/// functionality for Statepoint and ImmutableStatepoint. It is
/// templatized to allow easily specializing of const and non-const
-/// concrete subtypes. This is structured analogous to CallSite
-/// rather than the IntrinsicInst.h helpers since we need to support
-/// invokable statepoints.
+/// concrete subtypes.
template <typename FunTy, typename InstructionTy, typename ValueTy,
- typename CallSiteTy>
+ typename CallBaseTy>
class StatepointBase {
- CallSiteTy StatepointCS;
+ CallBaseTy *StatepointCall;
protected:
explicit StatepointBase(InstructionTy *I) {
- if (isStatepoint(I)) {
- StatepointCS = CallSiteTy(I);
- assert(StatepointCS && "isStatepoint implies CallSite");
- }
+ StatepointCall = isStatepoint(I) ? cast<CallBaseTy>(I) : nullptr;
}
- explicit StatepointBase(CallSiteTy CS) {
- if (isStatepoint(CS))
- StatepointCS = CS;
+ explicit StatepointBase(CallBaseTy *Call) {
+ StatepointCall = isStatepoint(Call) ? Call : nullptr;
}
public:
- using arg_iterator = typename CallSiteTy::arg_iterator;
+ using arg_iterator = typename CallBaseTy::const_op_iterator;
enum {
IDPos = 0,
@@ -107,30 +99,30 @@ public:
void *operator new(size_t s) = delete;
explicit operator bool() const {
- // We do not assign non-statepoint CallSites to StatepointCS.
- return (bool)StatepointCS;
+ // We do not assign non-statepoint call instructions to StatepointCall.
+ return (bool)StatepointCall;
}
- /// Return the underlying CallSite.
- CallSiteTy getCallSite() const {
+ /// Return the underlying call instruction.
+ CallBaseTy *getCall() const {
assert(*this && "check validity first!");
- return StatepointCS;
+ return StatepointCall;
}
uint64_t getFlags() const {
- return cast<ConstantInt>(getCallSite().getArgument(FlagsPos))
+ return cast<ConstantInt>(getCall()->getArgOperand(FlagsPos))
->getZExtValue();
}
/// Return the ID associated with this statepoint.
uint64_t getID() const {
- const Value *IDVal = getCallSite().getArgument(IDPos);
+ const Value *IDVal = getCall()->getArgOperand(IDPos);
return cast<ConstantInt>(IDVal)->getZExtValue();
}
/// Return the number of patchable bytes associated with this statepoint.
uint32_t getNumPatchBytes() const {
- const Value *NumPatchBytesVal = getCallSite().getArgument(NumPatchBytesPos);
+ const Value *NumPatchBytesVal = getCall()->getArgOperand(NumPatchBytesPos);
uint64_t NumPatchBytes =
cast<ConstantInt>(NumPatchBytesVal)->getZExtValue();
assert(isInt<32>(NumPatchBytes) && "should fit in 32 bits!");
@@ -139,12 +131,11 @@ public:
/// Return the value actually being called or invoked.
ValueTy *getCalledValue() const {
- return getCallSite().getArgument(CalledFunctionPos);
+ return getCall()->getArgOperand(CalledFunctionPos);
}
- InstructionTy *getInstruction() const {
- return getCallSite().getInstruction();
- }
+ // FIXME: Migrate users of this to `getCall` and remove it.
+ InstructionTy *getInstruction() const { return getCall(); }
/// Return the function being called if this is a direct call, otherwise
/// return null (if it's an indirect call).
@@ -153,12 +144,12 @@ public:
}
/// Return the caller function for this statepoint.
- FunTy *getCaller() const { return getCallSite().getCaller(); }
+ FunTy *getCaller() const { return getCall()->getCaller(); }
/// Determine if the statepoint cannot unwind.
bool doesNotThrow() const {
Function *F = getCalledFunction();
- return getCallSite().doesNotThrow() || (F ? F->doesNotThrow() : false);
+ return getCall()->doesNotThrow() || (F ? F->doesNotThrow() : false);
}
/// Return the type of the value returned by the call underlying the
@@ -171,18 +162,18 @@ public:
/// Number of arguments to be passed to the actual callee.
int getNumCallArgs() const {
- const Value *NumCallArgsVal = getCallSite().getArgument(NumCallArgsPos);
+ const Value *NumCallArgsVal = getCall()->getArgOperand(NumCallArgsPos);
return cast<ConstantInt>(NumCallArgsVal)->getZExtValue();
}
size_t arg_size() const { return getNumCallArgs(); }
- typename CallSiteTy::arg_iterator arg_begin() const {
- assert(CallArgsBeginPos <= (int)getCallSite().arg_size());
- return getCallSite().arg_begin() + CallArgsBeginPos;
+ arg_iterator arg_begin() const {
+ assert(CallArgsBeginPos <= (int)getCall()->arg_size());
+ return getCall()->arg_begin() + CallArgsBeginPos;
}
- typename CallSiteTy::arg_iterator arg_end() const {
+ arg_iterator arg_end() const {
auto I = arg_begin() + arg_size();
- assert((getCallSite().arg_end() - I) >= 0);
+ assert((getCall()->arg_end() - I) >= 0);
return I;
}
@@ -199,8 +190,8 @@ public:
/// Return true if the call or the callee has the given attribute.
bool paramHasAttr(unsigned i, Attribute::AttrKind A) const {
Function *F = getCalledFunction();
- return getCallSite().paramHasAttr(i + CallArgsBeginPos, A) ||
- (F ? F->getAttributes().hasAttribute(i, A) : false);
+ return getCall()->paramHasAttr(i + CallArgsBeginPos, A) ||
+ (F ? F->getAttributes().hasAttribute(i, A) : false);
}
/// Number of GC transition args.
@@ -208,14 +199,14 @@ public:
const Value *NumGCTransitionArgs = *arg_end();
return cast<ConstantInt>(NumGCTransitionArgs)->getZExtValue();
}
- typename CallSiteTy::arg_iterator gc_transition_args_begin() const {
+ arg_iterator gc_transition_args_begin() const {
auto I = arg_end() + 1;
- assert((getCallSite().arg_end() - I) >= 0);
+ assert((getCall()->arg_end() - I) >= 0);
return I;
}
- typename CallSiteTy::arg_iterator gc_transition_args_end() const {
+ arg_iterator gc_transition_args_end() const {
auto I = gc_transition_args_begin() + getNumTotalGCTransitionArgs();
- assert((getCallSite().arg_end() - I) >= 0);
+ assert((getCall()->arg_end() - I) >= 0);
return I;
}
@@ -231,14 +222,14 @@ public:
return cast<ConstantInt>(NumVMSArgs)->getZExtValue();
}
- typename CallSiteTy::arg_iterator deopt_begin() const {
+ arg_iterator deopt_begin() const {
auto I = gc_transition_args_end() + 1;
- assert((getCallSite().arg_end() - I) >= 0);
+ assert((getCall()->arg_end() - I) >= 0);
return I;
}
- typename CallSiteTy::arg_iterator deopt_end() const {
+ arg_iterator deopt_end() const {
auto I = deopt_begin() + getNumTotalVMSArgs();
- assert((getCallSite().arg_end() - I) >= 0);
+ assert((getCall()->arg_end() - I) >= 0);
return I;
}
@@ -247,15 +238,11 @@ public:
return make_range(deopt_begin(), deopt_end());
}
- typename CallSiteTy::arg_iterator gc_args_begin() const {
- return deopt_end();
- }
- typename CallSiteTy::arg_iterator gc_args_end() const {
- return getCallSite().arg_end();
- }
+ arg_iterator gc_args_begin() const { return deopt_end(); }
+ arg_iterator gc_args_end() const { return getCall()->arg_end(); }
unsigned gcArgsStartIdx() const {
- return gc_args_begin() - getInstruction()->op_begin();
+ return gc_args_begin() - getCall()->op_begin();
}
/// range adapter for gc arguments
@@ -304,25 +291,24 @@ public:
/// to a gc.statepoint.
class ImmutableStatepoint
: public StatepointBase<const Function, const Instruction, const Value,
- ImmutableCallSite> {
- using Base =
- StatepointBase<const Function, const Instruction, const Value,
- ImmutableCallSite>;
+ const CallBase> {
+ using Base = StatepointBase<const Function, const Instruction, const Value,
+ const CallBase>;
public:
explicit ImmutableStatepoint(const Instruction *I) : Base(I) {}
- explicit ImmutableStatepoint(ImmutableCallSite CS) : Base(CS) {}
+ explicit ImmutableStatepoint(const CallBase *Call) : Base(Call) {}
};
/// A specialization of it's base class for read-write access
/// to a gc.statepoint.
class Statepoint
- : public StatepointBase<Function, Instruction, Value, CallSite> {
- using Base = StatepointBase<Function, Instruction, Value, CallSite>;
+ : public StatepointBase<Function, Instruction, Value, CallBase> {
+ using Base = StatepointBase<Function, Instruction, Value, CallBase>;
public:
explicit Statepoint(Instruction *I) : Base(I) {}
- explicit Statepoint(CallSite CS) : Base(CS) {}
+ explicit Statepoint(CallBase *Call) : Base(Call) {}
};
/// Common base class for representing values projected from a statepoint.
@@ -347,14 +333,14 @@ public:
}
/// The statepoint with which this gc.relocate is associated.
- const Instruction *getStatepoint() const {
+ const CallBase *getStatepoint() const {
const Value *Token = getArgOperand(0);
// This takes care both of relocates for call statepoints and relocates
// on normal path of invoke statepoint.
if (!isa<LandingPadInst>(Token)) {
assert(isStatepoint(Token));
- return cast<Instruction>(Token);
+ return cast<CallBase>(Token);
}
// This relocate is on exceptional path of an invoke statepoint
@@ -366,7 +352,7 @@ public:
"safepoint block should be well formed");
assert(isStatepoint(InvokeBB->getTerminator()));
- return InvokeBB->getTerminator();
+ return cast<CallBase>(InvokeBB->getTerminator());
}
};
@@ -395,13 +381,11 @@ public:
}
Value *getBasePtr() const {
- ImmutableCallSite CS(getStatepoint());
- return *(CS.arg_begin() + getBasePtrIndex());
+ return *(getStatepoint()->arg_begin() + getBasePtrIndex());
}
Value *getDerivedPtr() const {
- ImmutableCallSite CS(getStatepoint());
- return *(CS.arg_begin() + getDerivedPtrIndex());
+ return *(getStatepoint()->arg_begin() + getDerivedPtrIndex());
}
};
@@ -418,28 +402,25 @@ public:
};
template <typename FunTy, typename InstructionTy, typename ValueTy,
- typename CallSiteTy>
+ typename CallBaseTy>
std::vector<const GCRelocateInst *>
-StatepointBase<FunTy, InstructionTy, ValueTy, CallSiteTy>::getRelocates()
+StatepointBase<FunTy, InstructionTy, ValueTy, CallBaseTy>::getRelocates()
const {
-
std::vector<const GCRelocateInst *> Result;
- CallSiteTy StatepointCS = getCallSite();
-
// Search for relocated pointers. Note that working backwards from the
// gc_relocates ensures that we only get pairs which are actually relocated
// and used after the statepoint.
- for (const User *U : getInstruction()->users())
+ for (const User *U : StatepointCall->users())
if (auto *Relocate = dyn_cast<GCRelocateInst>(U))
Result.push_back(Relocate);
- if (!StatepointCS.isInvoke())
+ auto *StatepointInvoke = dyn_cast<InvokeInst>(StatepointCall);
+ if (!StatepointInvoke)
return Result;
// We need to scan thorough exceptional relocations if it is invoke statepoint
- LandingPadInst *LandingPad =
- cast<InvokeInst>(getInstruction())->getLandingPadInst();
+ LandingPadInst *LandingPad = StatepointInvoke->getLandingPadInst();
// Search for gc relocates that are attached to this landingpad.
for (const User *LandingPadUser : LandingPad->users()) {
diff --git a/include/llvm/IR/SymbolTableListTraits.h b/include/llvm/IR/SymbolTableListTraits.h
index 87ce902c2811..5b793e5dbf28 100644
--- a/include/llvm/IR/SymbolTableListTraits.h
+++ b/include/llvm/IR/SymbolTableListTraits.h
@@ -1,9 +1,8 @@
//===- llvm/SymbolTableListTraits.h - Traits for iplist ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/IR/TrackingMDRef.h b/include/llvm/IR/TrackingMDRef.h
index 084efada221f..d7377398b91b 100644
--- a/include/llvm/IR/TrackingMDRef.h
+++ b/include/llvm/IR/TrackingMDRef.h
@@ -1,9 +1,8 @@
//===- llvm/IR/TrackingMDRef.h - Tracking Metadata references ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/IR/Type.h b/include/llvm/IR/Type.h
index 9c1f99d1b3a2..f2aa49030aaa 100644
--- a/include/llvm/IR/Type.h
+++ b/include/llvm/IR/Type.h
@@ -1,9 +1,8 @@
//===- llvm/Type.h - Classes for handling data types ------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -367,6 +366,7 @@ public:
return ContainedTys[0];
}
+ inline bool getVectorIsScalable() const;
inline unsigned getVectorNumElements() const;
Type *getVectorElementType() const {
assert(getTypeID() == VectorTyID);
@@ -467,28 +467,6 @@ template <> struct isa_impl<PointerType, Type> {
}
};
-//===----------------------------------------------------------------------===//
-// Provide specializations of GraphTraits to be able to treat a type as a
-// graph of sub types.
-
-template <> struct GraphTraits<Type *> {
- using NodeRef = Type *;
- using ChildIteratorType = Type::subtype_iterator;
-
- static NodeRef getEntryNode(Type *T) { return T; }
- static ChildIteratorType child_begin(NodeRef N) { return N->subtype_begin(); }
- static ChildIteratorType child_end(NodeRef N) { return N->subtype_end(); }
-};
-
-template <> struct GraphTraits<const Type*> {
- using NodeRef = const Type *;
- using ChildIteratorType = Type::subtype_iterator;
-
- static NodeRef getEntryNode(NodeRef T) { return T; }
- static ChildIteratorType child_begin(NodeRef N) { return N->subtype_begin(); }
- static ChildIteratorType child_end(NodeRef N) { return N->subtype_end(); }
-};
-
// Create wrappers for C Binding types (see CBindingWrapping.h).
DEFINE_ISA_CONVERSION_FUNCTIONS(Type, LLVMTypeRef)
diff --git a/include/llvm/IR/TypeFinder.h b/include/llvm/IR/TypeFinder.h
index c050c388d398..a83f85ea84c3 100644
--- a/include/llvm/IR/TypeFinder.h
+++ b/include/llvm/IR/TypeFinder.h
@@ -1,9 +1,8 @@
//===- llvm/IR/TypeFinder.h - Class to find used struct types ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/IR/Use.h b/include/llvm/IR/Use.h
index 25c44e0871a9..034ca2c8ac23 100644
--- a/include/llvm/IR/Use.h
+++ b/include/llvm/IR/Use.h
@@ -1,9 +1,8 @@
//===- llvm/Use.h - Definition of the Use class -----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -140,7 +139,7 @@ private:
const Use *getImpliedUser() const LLVM_READONLY;
Value *Val = nullptr;
- Use *Next;
+ Use *Next = nullptr;
PointerIntPair<Use **, 2, PrevPtrTag, PrevPointerTraits> Prev;
void setPrev(Use **NewPrev) { Prev.setPointer(NewPrev); }
diff --git a/include/llvm/IR/UseListOrder.h b/include/llvm/IR/UseListOrder.h
index b6bb0f19a0aa..a1f313e269b2 100644
--- a/include/llvm/IR/UseListOrder.h
+++ b/include/llvm/IR/UseListOrder.h
@@ -1,9 +1,8 @@
//===- llvm/IR/UseListOrder.h - LLVM Use List Order -------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/IR/User.h b/include/llvm/IR/User.h
index aea31467f2fa..19d87c5c621d 100644
--- a/include/llvm/IR/User.h
+++ b/include/llvm/IR/User.h
@@ -1,9 +1,8 @@
//===- llvm/User.h - User class definition ----------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/IR/Value.def b/include/llvm/IR/Value.def
index e2ddba0aa159..aaf1651979a9 100644
--- a/include/llvm/IR/Value.def
+++ b/include/llvm/IR/Value.def
@@ -1,9 +1,8 @@
//===-------- llvm/IR/Value.def - File that describes Values ---v-*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/IR/Value.h b/include/llvm/IR/Value.h
index 4f3a45c684fc..b2d8e7ac4741 100644
--- a/include/llvm/IR/Value.h
+++ b/include/llvm/IR/Value.h
@@ -1,9 +1,8 @@
//===- llvm/Value.h - Definition of the Value class -------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -494,7 +493,7 @@ public:
/// swifterror attribute.
bool isSwiftError() const;
- /// Strip off pointer casts, all-zero GEPs, and aliases.
+ /// Strip off pointer casts, all-zero GEPs, address space casts, and aliases.
///
/// Returns the original uncasted value. If this is called on a non-pointer
/// value, it returns 'this'.
@@ -504,6 +503,17 @@ public:
static_cast<const Value *>(this)->stripPointerCasts());
}
+ /// Strip off pointer casts, all-zero GEPs, address space casts, and aliases
+ /// but ensures the representation of the result stays the same.
+ ///
+ /// Returns the original uncasted value with the same representation. If this
+ /// is called on a non-pointer value, it returns 'this'.
+ const Value *stripPointerCastsSameRepresentation() const;
+ Value *stripPointerCastsSameRepresentation() {
+ return const_cast<Value *>(static_cast<const Value *>(this)
+ ->stripPointerCastsSameRepresentation());
+ }
+
/// Strip off pointer casts, all-zero GEPs, aliases and invariant group
/// info.
///
@@ -536,19 +546,48 @@ public:
static_cast<const Value *>(this)->stripInBoundsConstantOffsets());
}
- /// Accumulate offsets from \a stripInBoundsConstantOffsets().
- ///
- /// Stores the resulting constant offset stripped into the APInt provided.
- /// The provided APInt will be extended or truncated as needed to be the
- /// correct bitwidth for an offset of this pointer type.
- ///
- /// If this is called on a non-pointer value, it returns 'this'.
+ /// Accumulate the constant offset this value has compared to a base pointer.
+ /// Only 'getelementptr' instructions (GEPs) with constant indices are
+ /// accumulated but other instructions, e.g., casts, are stripped away as
+ /// well. The accumulated constant offset is added to \p Offset and the base
+ /// pointer is returned.
+ ///
+ /// The APInt \p Offset has to have a bit-width equal to the IntPtr type for
+ /// the address space of 'this' pointer value, e.g., use
+ /// DataLayout::getIndexTypeSizeInBits(Ty).
+ ///
+ /// If \p AllowNonInbounds is true, constant offsets in GEPs are stripped and
+ /// accumulated even if the GEP is not "inbounds".
+ ///
+ /// If this is called on a non-pointer value, it returns 'this' and the
+ /// \p Offset is not modified.
+ ///
+ /// Note that this function will never return a nullptr. It will also never
+ /// manipulate the \p Offset in a way that would not match the difference
+ /// between the underlying value and the returned one. Thus, if no constant
+ /// offset was found, the returned value is the underlying one and \p Offset
+ /// is unchanged.
+ const Value *stripAndAccumulateConstantOffsets(const DataLayout &DL,
+ APInt &Offset,
+ bool AllowNonInbounds) const;
+ Value *stripAndAccumulateConstantOffsets(const DataLayout &DL, APInt &Offset,
+ bool AllowNonInbounds) {
+ return const_cast<Value *>(
+ static_cast<const Value *>(this)->stripAndAccumulateConstantOffsets(
+ DL, Offset, AllowNonInbounds));
+ }
+
+ /// This is a wrapper around stripAndAccumulateConstantOffsets with the
+ /// in-bounds requirement set to false.
const Value *stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL,
- APInt &Offset) const;
+ APInt &Offset) const {
+ return stripAndAccumulateConstantOffsets(DL, Offset,
+ /* AllowNonInbounds */ false);
+ }
Value *stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL,
APInt &Offset) {
- return const_cast<Value *>(static_cast<const Value *>(this)
- ->stripAndAccumulateInBoundsConstantOffsets(DL, Offset));
+ return stripAndAccumulateConstantOffsets(DL, Offset,
+ /* AllowNonInbounds */ false);
}
/// Strip off pointer casts and inbounds GEPs.
diff --git a/include/llvm/IR/ValueHandle.h b/include/llvm/IR/ValueHandle.h
index d94472ce1be1..1135d796f7ed 100644
--- a/include/llvm/IR/ValueHandle.h
+++ b/include/llvm/IR/ValueHandle.h
@@ -1,9 +1,8 @@
//===- ValueHandle.h - Value Smart Pointer classes --------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -309,15 +308,6 @@ struct DenseMapInfo<AssertingVH<T>> {
}
};
-template <typename T>
-struct isPodLike<AssertingVH<T>> {
-#ifdef NDEBUG
- static const bool value = true;
-#else
- static const bool value = false;
-#endif
-};
-
/// Value handle that tracks a Value across RAUW.
///
/// TrackingVH is designed for situations where a client needs to hold a handle
@@ -549,14 +539,6 @@ template <typename T> struct DenseMapInfo<PoisoningVH<T>> {
}
};
-template <typename T> struct isPodLike<PoisoningVH<T>> {
-#ifdef NDEBUG
- static const bool value = true;
-#else
- static const bool value = false;
-#endif
-};
-
} // end namespace llvm
#endif // LLVM_IR_VALUEHANDLE_H
diff --git a/include/llvm/IR/ValueMap.h b/include/llvm/IR/ValueMap.h
index e7e33918a613..6a79b1d387f3 100644
--- a/include/llvm/IR/ValueMap.h
+++ b/include/llvm/IR/ValueMap.h
@@ -1,9 +1,8 @@
//===- ValueMap.h - Safe map from Values to data ----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/IR/ValueSymbolTable.h b/include/llvm/IR/ValueSymbolTable.h
index 012e717c7470..105ea73857af 100644
--- a/include/llvm/IR/ValueSymbolTable.h
+++ b/include/llvm/IR/ValueSymbolTable.h
@@ -1,9 +1,8 @@
//===- llvm/ValueSymbolTable.h - Implement a Value Symtab -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/IR/Verifier.h b/include/llvm/IR/Verifier.h
index 7255132e1e65..62c33c8325eb 100644
--- a/include/llvm/IR/Verifier.h
+++ b/include/llvm/IR/Verifier.h
@@ -1,9 +1,8 @@
//===- Verifier.h - LLVM IR Verifier ----------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/IRReader/IRReader.h b/include/llvm/IRReader/IRReader.h
index bedde8954fbb..05171300b602 100644
--- a/include/llvm/IRReader/IRReader.h
+++ b/include/llvm/IRReader/IRReader.h
@@ -1,9 +1,8 @@
//===---- llvm/IRReader/IRReader.h - Reader for LLVM IR files ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -21,11 +20,22 @@
namespace llvm {
class StringRef;
+class MemoryBuffer;
class MemoryBufferRef;
class Module;
class SMDiagnostic;
class LLVMContext;
+/// If the given MemoryBuffer holds a bitcode image, return a Module
+/// for it which does lazy deserialization of function bodies. Otherwise,
+/// attempt to parse it as LLVM Assembly and return a fully populated
+/// Module. The ShouldLazyLoadMetadata flag is passed down to the bitcode
+/// reader to optionally enable lazy metadata loading. This takes ownership
+/// of \p Buffer.
+std::unique_ptr<Module> getLazyIRModule(std::unique_ptr<MemoryBuffer> Buffer,
+ SMDiagnostic &Err, LLVMContext &Context,
+ bool ShouldLazyLoadMetadata = false);
+
/// If the given file holds a bitcode image, return a Module
/// for it which does lazy deserialization of function bodies. Otherwise,
/// attempt to parse it as LLVM Assembly and return a fully populated
diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h
index 037c0dbb56ec..164d0be2855a 100644
--- a/include/llvm/InitializePasses.h
+++ b/include/llvm/InitializePasses.h
@@ -1,9 +1,8 @@
//===- llvm/InitializePasses.h - Initialize All Passes ----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -65,8 +64,9 @@ void initializeAAEvalLegacyPassPass(PassRegistry&);
void initializeAAResultsWrapperPassPass(PassRegistry&);
void initializeADCELegacyPassPass(PassRegistry&);
void initializeAddDiscriminatorsLegacyPassPass(PassRegistry&);
-void initializeAddressSanitizerModulePass(PassRegistry&);
-void initializeAddressSanitizerPass(PassRegistry&);
+void initializeModuleAddressSanitizerLegacyPassPass(PassRegistry &);
+void initializeASanGlobalsMetadataWrapperPassPass(PassRegistry &);
+void initializeAddressSanitizerLegacyPassPass(PassRegistry &);
void initializeAggressiveInstCombinerLegacyPassPass(PassRegistry&);
void initializeAliasSetPrinterPass(PassRegistry&);
void initializeAlignmentFromAssumptionsPass(PassRegistry&);
@@ -74,6 +74,7 @@ void initializeAlwaysInlinerLegacyPassPass(PassRegistry&);
void initializeArgPromotionPass(PassRegistry&);
void initializeAssumptionCacheTrackerPass(PassRegistry&);
void initializeAtomicExpandPass(PassRegistry&);
+void initializeAttributorLegacyPassPass(PassRegistry&);
void initializeBDCELegacyPassPass(PassRegistry&);
void initializeBarrierNoopPass(PassRegistry&);
void initializeBasicAAWrapperPassPass(PassRegistry&);
@@ -134,16 +135,15 @@ void initializeEarlyIfConverterPass(PassRegistry&);
void initializeEarlyMachineLICMPass(PassRegistry&);
void initializeEarlyTailDuplicatePass(PassRegistry&);
void initializeEdgeBundlesPass(PassRegistry&);
-void initializeEfficiencySanitizerPass(PassRegistry&);
void initializeEliminateAvailableExternallyLegacyPassPass(PassRegistry&);
void initializeEntryExitInstrumenterPass(PassRegistry&);
-void initializeExpandISelPseudosPass(PassRegistry&);
void initializeExpandMemCmpPassPass(PassRegistry&);
void initializeExpandPostRAPass(PassRegistry&);
void initializeExpandReductionsPass(PassRegistry&);
void initializeMakeGuardsExplicitLegacyPassPass(PassRegistry&);
void initializeExternalAAWrapperPassPass(PassRegistry&);
void initializeFEntryInserterPass(PassRegistry&);
+void initializeFinalizeISelPass(PassRegistry&);
void initializeFinalizeMachineBundlesPass(PassRegistry&);
void initializeFlattenCFGPassPass(PassRegistry&);
void initializeFloat2IntLegacyPassPass(PassRegistry&);
@@ -163,8 +163,9 @@ void initializeGlobalOptLegacyPassPass(PassRegistry&);
void initializeGlobalSplitPass(PassRegistry&);
void initializeGlobalsAAWrapperPassPass(PassRegistry&);
void initializeGuardWideningLegacyPassPass(PassRegistry&);
+void initializeHardwareLoopsPass(PassRegistry&);
void initializeHotColdSplittingLegacyPassPass(PassRegistry&);
-void initializeHWAddressSanitizerPass(PassRegistry&);
+void initializeHWAddressSanitizerLegacyPassPass(PassRegistry &);
void initializeIPCPPass(PassRegistry&);
void initializeIPSCCPLegacyPassPass(PassRegistry&);
void initializeIRCELegacyPassPass(PassRegistry&);
@@ -181,6 +182,7 @@ void initializeInstCountPass(PassRegistry&);
void initializeInstNamerPass(PassRegistry&);
void initializeInstSimplifyLegacyPassPass(PassRegistry &);
void initializeInstrProfilingLegacyPassPass(PassRegistry&);
+void initializeInstrOrderFileLegacyPassPass(PassRegistry&);
void initializeInstructionCombiningPassPass(PassRegistry&);
void initializeInstructionSelectPass(PassRegistry&);
void initializeInterleavedAccessPass(PassRegistry&);
@@ -219,6 +221,7 @@ void initializeLoopDeletionLegacyPassPass(PassRegistry&);
void initializeLoopDistributeLegacyPass(PassRegistry&);
void initializeLoopExtractorPass(PassRegistry&);
void initializeLoopGuardWideningLegacyPassPass(PassRegistry&);
+void initializeLoopFuseLegacyPass(PassRegistry&);
void initializeLoopIdiomRecognizeLegacyPassPass(PassRegistry&);
void initializeLoopInfoWrapperPassPass(PassRegistry&);
void initializeLoopInstSimplifyLegacyPassPass(PassRegistry&);
@@ -241,6 +244,7 @@ void initializeLowerAtomicLegacyPassPass(PassRegistry&);
void initializeLowerEmuTLSPass(PassRegistry&);
void initializeLowerExpectIntrinsicPass(PassRegistry&);
void initializeLowerGuardIntrinsicLegacyPassPass(PassRegistry&);
+void initializeLowerWidenableConditionLegacyPassPass(PassRegistry&);
void initializeLowerIntrinsicsPass(PassRegistry&);
void initializeLowerInvokeLegacyPassPass(PassRegistry&);
void initializeLowerSwitchPass(PassRegistry&);
@@ -277,7 +281,7 @@ void initializeMemorySSAPrinterLegacyPassPass(PassRegistry&);
void initializeMemorySSAWrapperPassPass(PassRegistry&);
void initializeMemorySanitizerLegacyPassPass(PassRegistry&);
void initializeMergeFunctionsPass(PassRegistry&);
-void initializeMergeICmpsPass(PassRegistry&);
+void initializeMergeICmpsLegacyPassPass(PassRegistry &);
void initializeMergedLoadStoreMotionLegacyPassPass(PassRegistry&);
void initializeMetaRenamerPass(PassRegistry&);
void initializeModuleDebugInfoPrinterPass(PassRegistry&);
@@ -298,6 +302,7 @@ void initializePEIPass(PassRegistry&);
void initializePGOIndirectCallPromotionLegacyPassPass(PassRegistry&);
void initializePGOInstrumentationGenLegacyPassPass(PassRegistry&);
void initializePGOInstrumentationUseLegacyPassPass(PassRegistry&);
+void initializePGOInstrumentationGenCreateVarLegacyPassPass(PassRegistry&);
void initializePGOMemOPSizeOptLegacyPassPass(PassRegistry&);
void initializePHIEliminationPass(PassRegistry&);
void initializePartialInlinerLegacyPassPass(PassRegistry&);
diff --git a/include/llvm/LTO/Caching.h b/include/llvm/LTO/Caching.h
index 7201ab31f5b0..4c4a708d24e9 100644
--- a/include/llvm/LTO/Caching.h
+++ b/include/llvm/LTO/Caching.h
@@ -1,9 +1,8 @@
//===- Caching.h - LLVM Link Time Optimizer Configuration -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -25,8 +24,8 @@ namespace lto {
/// (e.g. in a cache).
///
/// Buffer callbacks must be thread safe.
-typedef std::function<void(unsigned Task, std::unique_ptr<MemoryBuffer> MB)>
- AddBufferFn;
+using AddBufferFn =
+ std::function<void(unsigned Task, std::unique_ptr<MemoryBuffer> MB)>;
/// Create a local file system cache which uses the given cache directory and
/// file callback. This function also creates the cache directory if it does not
diff --git a/include/llvm/LTO/Config.h b/include/llvm/LTO/Config.h
index 7058602c3ee2..fb107e3fbe02 100644
--- a/include/llvm/LTO/Config.h
+++ b/include/llvm/LTO/Config.h
@@ -1,9 +1,8 @@
//===-Config.h - LLVM Link Time Optimizer Configuration -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -56,6 +55,9 @@ struct Config {
/// Disable entirely the optimizer, including importing for ThinLTO
bool CodeGenOnly = false;
+ /// Run PGO context sensitive IR instrumentation.
+ bool RunCSIRInstr = false;
+
/// If this field is set, the set of passes run in the middle-end optimizer
/// will be the one specified by the string. Only works with the new pass
/// manager as the old one doesn't have this ability.
@@ -74,6 +76,9 @@ struct Config {
/// with this triple.
std::string DefaultTriple;
+ /// Context Sensitive PGO profile path.
+ std::string CSIRProfile;
+
/// Sample PGO profile path.
std::string SampleProfile;
@@ -83,17 +88,29 @@ struct Config {
/// The directory to store .dwo files.
std::string DwoDir;
+ /// The name for the split debug info file used for the DW_AT_[GNU_]dwo_name
+ /// attribute in the skeleton CU. This should generally only be used when
+ /// running an individual backend directly via thinBackend(), as otherwise
+ /// all objects would use the same .dwo file. Not used as output path.
+ std::string SplitDwarfFile;
+
/// The path to write a .dwo file to. This should generally only be used when
/// running an individual backend directly via thinBackend(), as otherwise
- /// all .dwo files will be written to the same path.
- std::string DwoPath;
+ /// all .dwo files will be written to the same path. Not used in skeleton CU.
+ std::string SplitDwarfOutput;
/// Optimization remarks file path.
std::string RemarksFilename = "";
+ /// Optimization remarks pass filter.
+ std::string RemarksPasses = "";
+
/// Whether to emit optimization remarks with hotness informations.
bool RemarksWithHotness = false;
+ /// The format used for serializing remarks (default: YAML).
+ std::string RemarksFormat = "";
+
/// Whether to emit the pass manager debuggging informations.
bool DebugPassManager = false;
@@ -133,7 +150,7 @@ struct Config {
///
/// Note that in out-of-process backend scenarios, none of the hooks will be
/// called for ThinLTO tasks.
- typedef std::function<bool(unsigned Task, const Module &)> ModuleHookFn;
+ using ModuleHookFn = std::function<bool(unsigned Task, const Module &)>;
/// This module hook is called after linking (regular LTO) or loading
/// (ThinLTO) the module, before modifying it.
@@ -166,8 +183,8 @@ struct Config {
///
/// It is called regardless of whether the backend is in-process, although it
/// is not called from individual backend processes.
- typedef std::function<bool(const ModuleSummaryIndex &Index)>
- CombinedIndexHookFn;
+ using CombinedIndexHookFn =
+ std::function<bool(const ModuleSummaryIndex &Index)>;
CombinedIndexHookFn CombinedIndexHook;
/// This is a convenience function that configures this Config object to write
diff --git a/include/llvm/LTO/LTO.h b/include/llvm/LTO/LTO.h
index 534d9b6f3f2a..ca0a8b64523a 100644
--- a/include/llvm/LTO/LTO.h
+++ b/include/llvm/LTO/LTO.h
@@ -1,9 +1,8 @@
//===-LTO.h - LLVM Link Time Optimizer ------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -21,6 +20,7 @@
#include "llvm/ADT/StringSet.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/ModuleSummaryIndex.h"
+#include "llvm/IR/RemarkStreamer.h"
#include "llvm/LTO/Config.h"
#include "llvm/Linker/IRMover.h"
#include "llvm/Object/IRSymtab.h"
@@ -51,7 +51,8 @@ void thinLTOResolvePrevailingInIndex(
function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
isPrevailing,
function_ref<void(StringRef, GlobalValue::GUID, GlobalValue::LinkageTypes)>
- recordNewLinkage);
+ recordNewLinkage,
+ const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols);
/// Update the linkages in the given \p Index to mark exported values
/// as external and non-exported values as internal. The ThinLTO backends
@@ -84,8 +85,13 @@ std::string getThinLTOOutputFile(const std::string &Path,
/// Setup optimization remarks.
Expected<std::unique_ptr<ToolOutputFile>>
-setupOptimizationRemarks(LLVMContext &Context, StringRef LTORemarksFilename,
- bool LTOPassRemarksWithHotness, int Count = -1);
+setupOptimizationRemarks(LLVMContext &Context, StringRef RemarksFilename,
+ StringRef RemarksPasses, StringRef RemarksFormat,
+ bool RemarksWithHotness, int Count = -1);
+
+/// Setups the output file for saving statistics.
+Expected<std::unique_ptr<ToolOutputFile>>
+setupStatsFile(StringRef StatsFilename);
class LTO;
struct SymbolResolution;
@@ -110,6 +116,7 @@ private:
std::vector<std::pair<size_t, size_t>> ModuleSymIndices;
StringRef TargetTriple, SourceFileName, COFFLinkerOpts;
+ std::vector<StringRef> DependentLibraries;
std::vector<StringRef> ComdatTable;
public:
@@ -131,6 +138,7 @@ public:
using irsymtab::Symbol::isWeak;
using irsymtab::Symbol::isIndirect;
using irsymtab::Symbol::getName;
+ using irsymtab::Symbol::getIRName;
using irsymtab::Symbol::getVisibility;
using irsymtab::Symbol::canBeOmittedFromSymbolTable;
using irsymtab::Symbol::isTLS;
@@ -140,6 +148,7 @@ public:
using irsymtab::Symbol::getCOFFWeakExternalFallback;
using irsymtab::Symbol::getSectionName;
using irsymtab::Symbol::isExecutable;
+ using irsymtab::Symbol::isUsed;
};
/// A range over the symbols in this InputFile.
@@ -148,6 +157,9 @@ public:
/// Returns linker options specified in the input file.
StringRef getCOFFLinkerOpts() const { return COFFLinkerOpts; }
+ /// Returns dependent library specifiers from the input file.
+ ArrayRef<StringRef> getDependentLibraries() const { return DependentLibraries; }
+
/// Returns the path to the InputFile.
StringRef getName() const;
@@ -160,6 +172,9 @@ public:
// Returns a table with all the comdats used by this file.
ArrayRef<StringRef> getComdatTable() const { return ComdatTable; }
+ // Returns the only BitcodeModule from InputFile.
+ BitcodeModule &getSingleBitcodeModule();
+
private:
ArrayRef<Symbol> module_symbols(unsigned I) const {
const auto &Indices = ModuleSymIndices[I];
@@ -183,8 +198,8 @@ public:
/// the fly.
///
/// Stream callbacks must be thread safe.
-typedef std::function<std::unique_ptr<NativeObjectStream>(unsigned Task)>
- AddStreamFn;
+using AddStreamFn =
+ std::function<std::unique_ptr<NativeObjectStream>(unsigned Task)>;
/// This is the type of a native object cache. To request an item from the
/// cache, pass a unique string as the Key. For hits, the cached file will be
@@ -198,17 +213,16 @@ typedef std::function<std::unique_ptr<NativeObjectStream>(unsigned Task)>
///
/// if (AddStreamFn AddStream = Cache(Task, Key))
/// ProduceContent(AddStream);
-typedef std::function<AddStreamFn(unsigned Task, StringRef Key)>
- NativeObjectCache;
+using NativeObjectCache =
+ std::function<AddStreamFn(unsigned Task, StringRef Key)>;
/// A ThinBackend defines what happens after the thin-link phase during ThinLTO.
/// The details of this type definition aren't important; clients can only
/// create a ThinBackend using one of the create*ThinBackend() functions below.
-typedef std::function<std::unique_ptr<ThinBackendProc>(
+using ThinBackend = std::function<std::unique_ptr<ThinBackendProc>(
Config &C, ModuleSummaryIndex &CombinedIndex,
StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries,
- AddStreamFn AddStream, NativeObjectCache Cache)>
- ThinBackend;
+ AddStreamFn AddStream, NativeObjectCache Cache)>;
/// This ThinBackend runs the individual backend jobs in-process.
ThinBackend createInProcessThinBackend(unsigned ParallelismLevel);
@@ -397,7 +411,10 @@ private:
const SymbolResolution *&ResI, const SymbolResolution *ResE);
Error runRegularLTO(AddStreamFn AddStream);
- Error runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache);
+ Error runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache,
+ const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols);
+
+ Error checkPartiallySplit();
mutable bool CalledGetMaxTasks = false;
diff --git a/include/llvm/LTO/LTOBackend.h b/include/llvm/LTO/LTOBackend.h
index d4743f6940ff..4ff8a1993d49 100644
--- a/include/llvm/LTO/LTOBackend.h
+++ b/include/llvm/LTO/LTOBackend.h
@@ -1,9 +1,8 @@
//===-LTOBackend.h - LLVM Link Time Optimizer Backend ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/LTO/SummaryBasedOptimizations.h b/include/llvm/LTO/SummaryBasedOptimizations.h
index ad3a8e7dc77b..6697c821a5ea 100644
--- a/include/llvm/LTO/SummaryBasedOptimizations.h
+++ b/include/llvm/LTO/SummaryBasedOptimizations.h
@@ -1,9 +1,8 @@
//=- llvm/LTO/SummaryBasedOptimizations.h -Link time optimizations-*- C++ -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/LTO/legacy/LTOCodeGenerator.h b/include/llvm/LTO/legacy/LTOCodeGenerator.h
index 8f23b7cb4574..d3cb4c8b79a0 100644
--- a/include/llvm/LTO/legacy/LTOCodeGenerator.h
+++ b/include/llvm/LTO/legacy/LTOCodeGenerator.h
@@ -1,9 +1,8 @@
//===-LTOCodeGenerator.h - LLVM Link Time Optimizer -----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -242,6 +241,7 @@ private:
TargetMachine::CodeGenFileType FileType = TargetMachine::CGFT_ObjectFile;
std::unique_ptr<ToolOutputFile> DiagnosticOutputFile;
bool Freestanding = false;
+ std::unique_ptr<ToolOutputFile> StatsFile = nullptr;
};
}
#endif
diff --git a/include/llvm/LTO/legacy/LTOModule.h b/include/llvm/LTO/legacy/LTOModule.h
index 017e223ed8a6..84b9b8c02942 100644
--- a/include/llvm/LTO/legacy/LTOModule.h
+++ b/include/llvm/LTO/legacy/LTOModule.h
@@ -1,9 +1,8 @@
//===-LTOModule.h - LLVM Link Time Optimizer ------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -18,6 +17,7 @@
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/IR/Module.h"
+#include "llvm/LTO/LTO.h"
#include "llvm/Object/IRObjectFile.h"
#include "llvm/Object/ModuleSymbolTable.h"
#include "llvm/Target/TargetMachine.h"
@@ -48,6 +48,8 @@ private:
std::string LinkerOpts;
+ std::string DependentLibraries;
+
std::unique_ptr<Module> Mod;
MemoryBufferRef MBRef;
ModuleSymbolTable SymTab;
@@ -156,9 +158,17 @@ public:
const std::vector<StringRef> &getAsmUndefinedRefs() { return _asm_undefines; }
+ static lto::InputFile *createInputFile(const void *buffer, size_t buffer_size,
+ const char *path, std::string &out_error);
+
+ static size_t getDependentLibraryCount(lto::InputFile *input);
+
+ static const char *getDependentLibrary(lto::InputFile *input, size_t index, size_t *size);
+
private:
/// Parse metadata from the module
// FIXME: it only parses "llvm.linker.options" metadata at the moment
+ // FIXME: can't access metadata in lazily loaded modules
void parseMetadata();
/// Parse the symbols from the module and model-level ASM and add them to
diff --git a/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h b/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h
index d4c69a1ce260..210a2ce00bdf 100644
--- a/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h
+++ b/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h
@@ -1,9 +1,8 @@
//===-ThinLTOCodeGenerator.h - LLVM Link Time Optimizer -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -20,6 +19,7 @@
#include "llvm/ADT/StringSet.h"
#include "llvm/ADT/Triple.h"
#include "llvm/IR/ModuleSummaryIndex.h"
+#include "llvm/LTO/LTO.h"
#include "llvm/Support/CachePruning.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/MemoryBuffer.h"
@@ -32,23 +32,6 @@ class StringRef;
class LLVMContext;
class TargetMachine;
-/// Wrapper around MemoryBufferRef, owning the identifier
-class ThinLTOBuffer {
- std::string OwnedIdentifier;
- StringRef Buffer;
-
-public:
- ThinLTOBuffer(StringRef Buffer, StringRef Identifier)
- : OwnedIdentifier(Identifier), Buffer(Buffer) {}
-
- MemoryBufferRef getMemBuffer() const {
- return MemoryBufferRef(Buffer,
- {OwnedIdentifier.c_str(), OwnedIdentifier.size()});
- }
- StringRef getBuffer() const { return Buffer; }
- StringRef getBufferIdentifier() const { return OwnedIdentifier; }
-};
-
/// Helper to gather options relevant to the target machine creation
struct TargetMachineBuilder {
Triple TheTriple;
@@ -268,37 +251,49 @@ public:
* and additionally resolve weak and linkonce symbols.
* Index is updated to reflect linkage changes from weak resolution.
*/
- void promote(Module &Module, ModuleSummaryIndex &Index);
+ void promote(Module &Module, ModuleSummaryIndex &Index,
+ const lto::InputFile &File);
/**
* Compute and emit the imported files for module at \p ModulePath.
*/
void emitImports(Module &Module, StringRef OutputName,
- ModuleSummaryIndex &Index);
+ ModuleSummaryIndex &Index,
+ const lto::InputFile &File);
/**
* Perform cross-module importing for the module identified by
* ModuleIdentifier.
*/
- void crossModuleImport(Module &Module, ModuleSummaryIndex &Index);
+ void crossModuleImport(Module &Module, ModuleSummaryIndex &Index,
+ const lto::InputFile &File);
/**
* Compute the list of summaries needed for importing into module.
*/
void gatherImportedSummariesForModule(
Module &Module, ModuleSummaryIndex &Index,
- std::map<std::string, GVSummaryMapTy> &ModuleToSummariesForIndex);
+ std::map<std::string, GVSummaryMapTy> &ModuleToSummariesForIndex,
+ const lto::InputFile &File);
/**
* Perform internalization. Index is updated to reflect linkage changes.
*/
- void internalize(Module &Module, ModuleSummaryIndex &Index);
+ void internalize(Module &Module, ModuleSummaryIndex &Index,
+ const lto::InputFile &File);
/**
* Perform post-importing ThinLTO optimizations.
*/
void optimize(Module &Module);
+ /**
+ * Write temporary object file to SavedObjectDirectoryPath, write symlink
+ * to Cache directory if needed. Returns the path to the generated file in
+ * SavedObjectsDirectoryPath.
+ */
+ std::string writeGeneratedObject(int count, StringRef CacheEntryPath,
+ const MemoryBuffer &OutputBuffer);
/**@}*/
private:
@@ -314,7 +309,7 @@ private:
/// Vector holding the input buffers containing the bitcode modules to
/// process.
- std::vector<ThinLTOBuffer> Modules;
+ std::vector<std::unique_ptr<lto::InputFile>> Modules;
/// Set of symbols that need to be preserved outside of the set of bitcode
/// files.
diff --git a/include/llvm/LTO/legacy/UpdateCompilerUsed.h b/include/llvm/LTO/legacy/UpdateCompilerUsed.h
index 4be0027e97d7..af9d62b4af30 100644
--- a/include/llvm/LTO/legacy/UpdateCompilerUsed.h
+++ b/include/llvm/LTO/legacy/UpdateCompilerUsed.h
@@ -1,9 +1,8 @@
//==------ UpdateCompilerUsed.h - LLVM Link Time Optimizer Utility --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/LineEditor/LineEditor.h b/include/llvm/LineEditor/LineEditor.h
index 68995d0633ad..0beaf1bb23a9 100644
--- a/include/llvm/LineEditor/LineEditor.h
+++ b/include/llvm/LineEditor/LineEditor.h
@@ -1,9 +1,8 @@
//===-- llvm/LineEditor/LineEditor.h - line editor --------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/LinkAllIR.h b/include/llvm/LinkAllIR.h
index 4f4af7187be4..4b0aabeee701 100644
--- a/include/llvm/LinkAllIR.h
+++ b/include/llvm/LinkAllIR.h
@@ -1,9 +1,8 @@
//===----- LinkAllIR.h - Reference All VMCore Code --------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/LinkAllPasses.h b/include/llvm/LinkAllPasses.h
index 0851c2f8d265..675d179eb22a 100644
--- a/include/llvm/LinkAllPasses.h
+++ b/include/llvm/LinkAllPasses.h
@@ -1,9 +1,8 @@
//===- llvm/LinkAllPasses.h ------------ Reference All Passes ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -42,6 +41,7 @@
#include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/AlwaysInliner.h"
+#include "llvm/Transforms/IPO/Attributor.h"
#include "llvm/Transforms/IPO/FunctionAttrs.h"
#include "llvm/Transforms/InstCombine/InstCombine.h"
#include "llvm/Transforms/Instrumentation.h"
@@ -103,6 +103,7 @@ namespace {
(void) llvm::createGCOVProfilerPass();
(void) llvm::createPGOInstrumentationGenLegacyPass();
(void) llvm::createPGOInstrumentationUseLegacyPass();
+ (void) llvm::createPGOInstrumentationGenCreateVarLegacyPass();
(void) llvm::createPGOIndirectCallPromotionLegacyPass();
(void) llvm::createPGOMemOPSizeOptLegacyPass();
(void) llvm::createInstrProfilingLegacyPass();
@@ -188,10 +189,11 @@ namespace {
(void) llvm::createPostDomTree();
(void) llvm::createInstructionNamerPass();
(void) llvm::createMetaRenamerPass();
+ (void) llvm::createAttributorLegacyPass();
(void) llvm::createPostOrderFunctionAttrsLegacyPass();
(void) llvm::createReversePostOrderFunctionAttrsPass();
(void) llvm::createMergeFunctionsPass();
- (void) llvm::createMergeICmpsPass();
+ (void) llvm::createMergeICmpsLegacyPass();
(void) llvm::createExpandMemCmpPass();
std::string buf;
llvm::raw_string_ostream os(buf);
@@ -221,6 +223,7 @@ namespace {
(void) llvm::createEliminateAvailableExternallyPass();
(void) llvm::createScalarizeMaskedMemIntrinPass();
(void) llvm::createWarnMissedTransformationsPass();
+ (void) llvm::createHardwareLoopsPass();
(void)new llvm::IntervalPartition();
(void)new llvm::ScalarEvolutionWrapperPass();
diff --git a/include/llvm/Linker/IRMover.h b/include/llvm/Linker/IRMover.h
index 235ada47cef4..e5df83f01fe3 100644
--- a/include/llvm/Linker/IRMover.h
+++ b/include/llvm/Linker/IRMover.h
@@ -1,9 +1,8 @@
//===- IRMover.h ------------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/Linker/Linker.h b/include/llvm/Linker/Linker.h
index 7776c720ec53..c9b1d42b3903 100644
--- a/include/llvm/Linker/Linker.h
+++ b/include/llvm/Linker/Linker.h
@@ -1,9 +1,8 @@
//===- Linker.h - Module Linker Interface -----------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/MC/ConstantPools.h b/include/llvm/MC/ConstantPools.h
index ef33250204ec..2fe5ce252c94 100644
--- a/include/llvm/MC/ConstantPools.h
+++ b/include/llvm/MC/ConstantPools.h
@@ -1,9 +1,8 @@
-//===- ConstantPool.h - Keep track of assembler-generated ------*- C++ -*-===//
+//===- ConstantPools.h - Keep track of assembler-generated ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/MC/LaneBitmask.h b/include/llvm/MC/LaneBitmask.h
index 8c0b4ecb8fd4..d5f69287a265 100644
--- a/include/llvm/MC/LaneBitmask.h
+++ b/include/llvm/MC/LaneBitmask.h
@@ -1,9 +1,8 @@
//===- llvm/MC/LaneBitmask.h ------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/include/llvm/MC/MCAsmBackend.h b/include/llvm/MC/MCAsmBackend.h
index 07835c21fced..1f3ad6c1e547 100644
--- a/include/llvm/MC/MCAsmBackend.h
+++ b/include/llvm/MC/MCAsmBackend.h
@@ -1,9 +1,8 @@
//===- llvm/MC/MCAsmBackend.h - MC Asm Backend ------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -88,6 +87,22 @@ public:
return false;
}
+ /// Hook to check if extra nop bytes must be inserted for alignment directive.
+ /// For some targets this may be necessary in order to support linker
+ /// relaxation. The number of bytes to insert are returned in Size.
+ virtual bool shouldInsertExtraNopBytesForCodeAlign(const MCAlignFragment &AF,
+ unsigned &Size) {
+ return false;
+ }
+
+ /// Hook which indicates if the target requires a fixup to be generated when
+ /// handling an align directive in an executable section
+ virtual bool shouldInsertFixupForCodeAlign(MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ MCAlignFragment &AF) {
+ return false;
+ }
+
/// Apply the \p Value for given \p Fixup into the provided data fragment, at
/// the offset specified by the fixup and following the fixup kind as
/// appropriate. Errors (such as an out of range fixup value) should be
diff --git a/include/llvm/MC/MCAsmInfo.h b/include/llvm/MC/MCAsmInfo.h
index 120fb8fa7492..971e9354da8c 100644
--- a/include/llvm/MC/MCAsmInfo.h
+++ b/include/llvm/MC/MCAsmInfo.h
@@ -1,9 +1,8 @@
//===-- llvm/MC/MCAsmInfo.h - Asm info --------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -18,16 +17,17 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCDirectives.h"
-#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCTargetOptions.h"
#include <vector>
namespace llvm {
class MCContext;
+class MCCFIInstruction;
class MCExpr;
class MCSection;
class MCStreamer;
+class MCSubtargetInfo;
class MCSymbol;
namespace WinEH {
@@ -474,7 +474,13 @@ public:
bool hasMachoTBSSDirective() const { return HasMachoTBSSDirective; }
bool hasCOFFAssociativeComdats() const { return HasCOFFAssociativeComdats; }
bool hasCOFFComdatConstants() const { return HasCOFFComdatConstants; }
- unsigned getMaxInstLength() const { return MaxInstLength; }
+
+ /// Returns the maximum possible encoded instruction size in bytes. If \p STI
+ /// is null, this should be the maximum size for any subtarget.
+ virtual unsigned getMaxInstLength(const MCSubtargetInfo *STI = nullptr) const {
+ return MaxInstLength;
+ }
+
unsigned getMinInstAlignment() const { return MinInstAlignment; }
bool getDollarIsPC() const { return DollarIsPC; }
const char *getSeparatorString() const { return SeparatorString; }
@@ -492,7 +498,7 @@ public:
StringRef getPrivateLabelPrefix() const { return PrivateLabelPrefix; }
bool hasLinkerPrivateGlobalPrefix() const {
- return LinkerPrivateGlobalPrefix[0] != '\0';
+ return !LinkerPrivateGlobalPrefix.empty();
}
StringRef getLinkerPrivateGlobalPrefix() const {
@@ -598,9 +604,7 @@ public:
return SupportsExtendedDwarfLocDirective;
}
- void addInitialFrameState(const MCCFIInstruction &Inst) {
- InitialFrameState.push_back(Inst);
- }
+ void addInitialFrameState(const MCCFIInstruction &Inst);
const std::vector<MCCFIInstruction> &getInitialFrameState() const {
return InitialFrameState;
diff --git a/include/llvm/MC/MCAsmInfoCOFF.h b/include/llvm/MC/MCAsmInfoCOFF.h
index 01c8ae49a6fc..1dfb4750af66 100644
--- a/include/llvm/MC/MCAsmInfoCOFF.h
+++ b/include/llvm/MC/MCAsmInfoCOFF.h
@@ -1,9 +1,8 @@
//===- MCAsmInfoCOFF.h - COFF asm properties --------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/MC/MCAsmInfoDarwin.h b/include/llvm/MC/MCAsmInfoDarwin.h
index a533d604a89e..c889ce99cebe 100644
--- a/include/llvm/MC/MCAsmInfoDarwin.h
+++ b/include/llvm/MC/MCAsmInfoDarwin.h
@@ -1,9 +1,8 @@
//===- MCAsmInfoDarwin.h - Darwin asm properties ----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/MC/MCAsmInfoELF.h b/include/llvm/MC/MCAsmInfoELF.h
index f113afc9885e..aa2e5873e2c6 100644
--- a/include/llvm/MC/MCAsmInfoELF.h
+++ b/include/llvm/MC/MCAsmInfoELF.h
@@ -1,9 +1,8 @@
//===- llvm/MC/MCAsmInfoELF.h - ELF Asm info --------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/MC/MCAsmInfoWasm.h b/include/llvm/MC/MCAsmInfoWasm.h
index 71c6ee28df70..3afc610b8b07 100644
--- a/include/llvm/MC/MCAsmInfoWasm.h
+++ b/include/llvm/MC/MCAsmInfoWasm.h
@@ -1,9 +1,8 @@
//===-- llvm/MC/MCAsmInfoWasm.h - Wasm Asm info -----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/MC/MCAsmInfoXCOFF.h b/include/llvm/MC/MCAsmInfoXCOFF.h
new file mode 100644
index 000000000000..2a72ba7398a7
--- /dev/null
+++ b/include/llvm/MC/MCAsmInfoXCOFF.h
@@ -0,0 +1,25 @@
+//===- MCAsmInfoXCOFF.h - XCOFF asm properties ----------------- *- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCASMINFOXCOFF_H
+#define LLVM_MC_MCASMINFOXCOFF_H
+
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+
+class MCAsmInfoXCOFF : public MCAsmInfo {
+ virtual void anchor();
+
+protected:
+ MCAsmInfoXCOFF();
+};
+
+} // end namespace llvm
+
+#endif // LLVM_MC_MCASMINFOXCOFF_H
diff --git a/include/llvm/MC/MCAsmLayout.h b/include/llvm/MC/MCAsmLayout.h
index b711db319302..45ac96f0b81e 100644
--- a/include/llvm/MC/MCAsmLayout.h
+++ b/include/llvm/MC/MCAsmLayout.h
@@ -1,9 +1,8 @@
//===- MCAsmLayout.h - Assembly Layout Object -------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/MC/MCAsmMacro.h b/include/llvm/MC/MCAsmMacro.h
index 135fa4f2e33d..364d3b5f3666 100644
--- a/include/llvm/MC/MCAsmMacro.h
+++ b/include/llvm/MC/MCAsmMacro.h
@@ -1,9 +1,8 @@
//===- MCAsmMacro.h - Assembly Macros ---------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/MC/MCAssembler.h b/include/llvm/MC/MCAssembler.h
index 986c6e17548f..4543018901a4 100644
--- a/include/llvm/MC/MCAssembler.h
+++ b/include/llvm/MC/MCAssembler.h
@@ -1,9 +1,8 @@
//===- MCAssembler.h - Object File Generation -------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/MC/MCCodeEmitter.h b/include/llvm/MC/MCCodeEmitter.h
index f1b0b784a2df..04b4367ada7b 100644
--- a/include/llvm/MC/MCCodeEmitter.h
+++ b/include/llvm/MC/MCCodeEmitter.h
@@ -1,9 +1,8 @@
//===- llvm/MC/MCCodeEmitter.h - Instruction Encoding -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/MC/MCCodePadder.h b/include/llvm/MC/MCCodePadder.h
index 4dde6bf59272..f7b1a2113a9a 100644
--- a/include/llvm/MC/MCCodePadder.h
+++ b/include/llvm/MC/MCCodePadder.h
@@ -1,9 +1,8 @@
-//===- llvm/MC/CodePadder.h - MC Code Padder --------------------*- C++ -*-===//
+//===- llvm/MC/MCCodePadder.h - MC Code Padder ------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/MC/MCCodeView.h b/include/llvm/MC/MCCodeView.h
index cef03a409f95..2126354cded6 100644
--- a/include/llvm/MC/MCCodeView.h
+++ b/include/llvm/MC/MCCodeView.h
@@ -1,9 +1,8 @@
//===- MCCodeView.h - Machine Code CodeView support -------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/MC/MCContext.h b/include/llvm/MC/MCContext.h
index 3b8ac8b79e21..5c2124cc0d15 100644
--- a/include/llvm/MC/MCContext.h
+++ b/include/llvm/MC/MCContext.h
@@ -1,9 +1,8 @@
//===- MCContext.h - Machine Code Context -----------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -19,6 +18,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/BinaryFormat/XCOFF.h"
#include "llvm/MC/MCAsmMacro.h"
#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCSubtargetInfo.h"
@@ -50,6 +50,7 @@ namespace llvm {
class MCSectionELF;
class MCSectionMachO;
class MCSectionWasm;
+ class MCSectionXCOFF;
class MCStreamer;
class MCSymbol;
class MCSymbolELF;
@@ -92,6 +93,7 @@ namespace llvm {
SpecificBumpPtrAllocator<MCSectionELF> ELFAllocator;
SpecificBumpPtrAllocator<MCSectionMachO> MachOAllocator;
SpecificBumpPtrAllocator<MCSectionWasm> WasmAllocator;
+ SpecificBumpPtrAllocator<MCSectionXCOFF> XCOFFAllocator;
/// Bindings of names to symbols.
SymbolTable Symbols;
@@ -247,10 +249,25 @@ namespace llvm {
}
};
+ struct XCOFFSectionKey {
+ std::string SectionName;
+ XCOFF::StorageMappingClass MappingClass;
+
+ XCOFFSectionKey(StringRef SectionName,
+ XCOFF::StorageMappingClass MappingClass)
+ : SectionName(SectionName), MappingClass(MappingClass) {}
+
+ bool operator<(const XCOFFSectionKey &Other) const {
+ return std::tie(SectionName, MappingClass) <
+ std::tie(Other.SectionName, Other.MappingClass);
+ }
+ };
+
StringMap<MCSectionMachO *> MachOUniquingMap;
std::map<ELFSectionKey, MCSectionELF *> ELFUniquingMap;
std::map<COFFSectionKey, MCSectionCOFF *> COFFUniquingMap;
std::map<WasmSectionKey, MCSectionWasm *> WasmUniquingMap;
+ std::map<XCOFFSectionKey, MCSectionXCOFF *> XCOFFUniquingMap;
StringMap<bool> RelSecNames;
SpecificBumpPtrAllocator<MCSubtargetInfo> MCSubtargetAllocator;
@@ -441,8 +458,6 @@ namespace llvm {
SectionKind Kind,
const char *BeginSymName = nullptr);
- MCSectionCOFF *getCOFFSection(StringRef Section);
-
/// Gets or creates a section equivalent to Sec that is associated with the
/// section containing KeySym. For example, to create a debug info section
/// associated with an inline function, pass the normal debug info section
@@ -473,6 +488,11 @@ namespace llvm {
const MCSymbolWasm *Group, unsigned UniqueID,
const char *BeginSymName);
+ MCSectionXCOFF *getXCOFFSection(StringRef Section,
+ XCOFF::StorageMappingClass MappingClass,
+ SectionKind K,
+ const char *BeginSymName = nullptr);
+
// Create and save a copy of STI and return a reference to the copy.
MCSubtargetInfo &getSubtargetCopy(const MCSubtargetInfo &STI);
@@ -489,12 +509,6 @@ namespace llvm {
/// Set the compilation directory for DW_AT_comp_dir
void setCompilationDir(StringRef S) { CompilationDir = S.str(); }
- /// Get the debug prefix map.
- const std::map<const std::string, const std::string> &
- getDebugPrefixMap() const {
- return DebugPrefixMap;
- }
-
/// Add an entry to the debug prefix map.
void addDebugPrefixMapEntry(const std::string &From, const std::string &To);
@@ -512,7 +526,7 @@ namespace llvm {
/// Creates an entry in the dwarf file and directory tables.
Expected<unsigned> getDwarfFile(StringRef Directory, StringRef FileName,
unsigned FileNumber,
- MD5::MD5Result *Checksum,
+ Optional<MD5::MD5Result> Checksum,
Optional<StringRef> Source, unsigned CUID);
bool isValidDwarfFileNumber(unsigned FileNumber, unsigned CUID = 0);
@@ -539,13 +553,6 @@ namespace llvm {
return getMCDwarfLineTable(CUID).getMCDwarfDirs();
}
- bool hasMCLineSections() const {
- for (const auto &Table : MCDwarfLineTablesCUMap)
- if (!Table.second.getMCDwarfFiles().empty() || Table.second.getLabel())
- return true;
- return false;
- }
-
unsigned getDwarfCompileUnitID() { return DwarfCompileUnitID; }
void setDwarfCompileUnitID(unsigned CUIndex) {
@@ -555,7 +562,8 @@ namespace llvm {
/// Specifies the "root" file and directory of the compilation unit.
/// These are "file 0" and "directory 0" in DWARF v5.
void setMCLineTableRootFile(unsigned CUID, StringRef CompilationDir,
- StringRef Filename, MD5::MD5Result *Checksum,
+ StringRef Filename,
+ Optional<MD5::MD5Result> Checksum,
Optional<StringRef> Source) {
getMCDwarfLineTable(CUID).setRootFile(CompilationDir, Filename, Checksum,
Source);
@@ -595,6 +603,10 @@ namespace llvm {
GenDwarfFileNumber = FileNumber;
}
+ /// Specifies information about the "root file" for assembler clients
+ /// (e.g., llvm-mc). Assumes compilation dir etc. have been set up.
+ void setGenDwarfRootFile(StringRef FileName, StringRef Buffer);
+
const SetVector<MCSection *> &getGenDwarfSectionSyms() {
return SectionsForRanges;
}
diff --git a/include/llvm/MC/MCDirectives.h b/include/llvm/MC/MCDirectives.h
index 8c74b169135b..4029264c2026 100644
--- a/include/llvm/MC/MCDirectives.h
+++ b/include/llvm/MC/MCDirectives.h
@@ -1,9 +1,8 @@
//===- MCDirectives.h - Enums for directives on various targets -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -20,6 +19,7 @@ enum MCSymbolAttr {
MCSA_Invalid = 0, ///< Not a valid directive.
// Various directives in alphabetical order.
+ MCSA_Cold, ///< .cold (MachO)
MCSA_ELF_TypeFunction, ///< .type _foo, STT_FUNC # aka @function
MCSA_ELF_TypeIndFunction, ///< .type _foo, STT_GNU_IFUNC
MCSA_ELF_TypeObject, ///< .type _foo, STT_OBJECT # aka @object
diff --git a/include/llvm/MC/MCDisassembler/MCDisassembler.h b/include/llvm/MC/MCDisassembler/MCDisassembler.h
index 7f09c05ccf2a..268f3ccad889 100644
--- a/include/llvm/MC/MCDisassembler/MCDisassembler.h
+++ b/include/llvm/MC/MCDisassembler/MCDisassembler.h
@@ -1,9 +1,8 @@
//===- llvm/MC/MCDisassembler.h - Disassembler interface --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -17,6 +16,7 @@
namespace llvm {
template <typename T> class ArrayRef;
+class StringRef;
class MCContext;
class MCInst;
class MCSubtargetInfo;
@@ -80,6 +80,23 @@ public:
raw_ostream &VStream,
raw_ostream &CStream) const = 0;
+ /// May parse any prelude that precedes instructions after the start of a
+ /// symbol. Needed for some targets, e.g. WebAssembly.
+ ///
+ /// \param Name - The name of the symbol.
+ /// \param Size - The number of bytes consumed.
+ /// \param Address - The address, in the memory space of region, of the first
+ /// byte of the symbol.
+ /// \param Bytes - A reference to the actual bytes at the symbol location.
+ /// \param VStream - The stream to print warnings and diagnostic messages on.
+ /// \param CStream - The stream to print comments and annotations on.
+ /// \return - MCDisassembler::Success if the bytes are valid,
+ /// MCDisassembler::Fail if the bytes were invalid.
+ virtual DecodeStatus onSymbolStart(StringRef Name, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes, uint64_t Address,
+ raw_ostream &VStream,
+ raw_ostream &CStream) const;
+
private:
MCContext &Ctx;
diff --git a/include/llvm/MC/MCDisassembler/MCExternalSymbolizer.h b/include/llvm/MC/MCDisassembler/MCExternalSymbolizer.h
index df909a0dccd3..ffac5ee5cb1f 100644
--- a/include/llvm/MC/MCDisassembler/MCExternalSymbolizer.h
+++ b/include/llvm/MC/MCDisassembler/MCExternalSymbolizer.h
@@ -1,9 +1,8 @@
//===-- llvm/MC/MCExternalSymbolizer.h - ------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/MC/MCDisassembler/MCRelocationInfo.h b/include/llvm/MC/MCDisassembler/MCRelocationInfo.h
index 6030ae660d38..efc59da19335 100644
--- a/include/llvm/MC/MCDisassembler/MCRelocationInfo.h
+++ b/include/llvm/MC/MCDisassembler/MCRelocationInfo.h
@@ -1,9 +1,8 @@
//===- llvm/MC/MCRelocationInfo.h -------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/MC/MCDisassembler/MCSymbolizer.h b/include/llvm/MC/MCDisassembler/MCSymbolizer.h
index 0bfa569474ec..b7ca83a5f16c 100644
--- a/include/llvm/MC/MCDisassembler/MCSymbolizer.h
+++ b/include/llvm/MC/MCDisassembler/MCSymbolizer.h
@@ -1,9 +1,8 @@
//===- llvm/MC/MCSymbolizer.h - MCSymbolizer class --------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/MC/MCDwarf.h b/include/llvm/MC/MCDwarf.h
index 7b96e9aaca89..1a37aafd0654 100644
--- a/include/llvm/MC/MCDwarf.h
+++ b/include/llvm/MC/MCDwarf.h
@@ -1,9 +1,8 @@
//===- MCDwarf.h - Machine Code Dwarf support -------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -42,11 +41,14 @@ class raw_ostream;
class SMLoc;
class SourceMgr;
-/// Instances of this class represent the name of the dwarf
-/// .file directive and its associated dwarf file number in the MC file,
-/// and MCDwarfFile's are created and uniqued by the MCContext class where
-/// the file number for each is its index into the vector of DwarfFiles (note
-/// index 0 is not used and not a valid dwarf file number).
+/// Instances of this class represent the name of the dwarf .file directive and
+/// its associated dwarf file number in the MC file. MCDwarfFile's are created
+/// and uniqued by the MCContext class. In Dwarf 4 file numbers start from 1;
+/// i.e. the entry with file number 1 is the first element in the vector of
+/// DwarfFiles and there is no MCDwarfFile with file number 0. In Dwarf 5 file
+/// numbers start from 0, with the MCDwarfFile with file number 0 being the
+/// primary source file, and file numbers correspond to their index in the
+/// vector.
struct MCDwarfFile {
// The base name of the file without its directory path.
std::string Name;
@@ -56,7 +58,7 @@ struct MCDwarfFile {
/// The MD5 checksum, if there is one. Non-owning pointer to data allocated
/// in MCContext.
- MD5::MD5Result *Checksum = nullptr;
+ Optional<MD5::MD5Result> Checksum;
/// The source code of the file. Non-owning reference to data allocated in
/// MCContext.
@@ -224,8 +226,9 @@ public:
MCDwarfLineTableHeader() = default;
Expected<unsigned> tryGetFile(StringRef &Directory, StringRef &FileName,
- MD5::MD5Result *Checksum,
- Optional<StringRef> &Source,
+ Optional<MD5::MD5Result> Checksum,
+ Optional<StringRef> Source,
+ uint16_t DwarfVersion,
unsigned FileNumber = 0);
std::pair<MCSymbol *, MCSymbol *>
Emit(MCStreamer *MCOS, MCDwarfLineTableParams Params,
@@ -246,32 +249,50 @@ public:
return MCDwarfFiles.empty() || (HasAllMD5 == HasAnyMD5);
}
+ void setRootFile(StringRef Directory, StringRef FileName,
+ Optional<MD5::MD5Result> Checksum,
+ Optional<StringRef> Source) {
+ CompilationDir = Directory;
+ RootFile.Name = FileName;
+ RootFile.DirIndex = 0;
+ RootFile.Checksum = Checksum;
+ RootFile.Source = Source;
+ trackMD5Usage(Checksum.hasValue());
+ HasSource = Source.hasValue();
+ }
+
+ void resetFileTable() {
+ MCDwarfDirs.clear();
+ MCDwarfFiles.clear();
+ RootFile.Name.clear();
+ resetMD5Usage();
+ HasSource = false;
+ }
+
private:
void emitV2FileDirTables(MCStreamer *MCOS) const;
- void emitV5FileDirTables(MCStreamer *MCOS, Optional<MCDwarfLineStr> &LineStr,
- StringRef CtxCompilationDir) const;
+ void emitV5FileDirTables(MCStreamer *MCOS, Optional<MCDwarfLineStr> &LineStr) const;
};
class MCDwarfDwoLineTable {
MCDwarfLineTableHeader Header;
+ bool HasSplitLineTable = false;
public:
void maybeSetRootFile(StringRef Directory, StringRef FileName,
- MD5::MD5Result *Checksum, Optional<StringRef> Source) {
+ Optional<MD5::MD5Result> Checksum,
+ Optional<StringRef> Source) {
if (!Header.RootFile.Name.empty())
return;
- Header.CompilationDir = Directory;
- Header.RootFile.Name = FileName;
- Header.RootFile.DirIndex = 0;
- Header.RootFile.Checksum = Checksum;
- Header.RootFile.Source = Source;
- Header.trackMD5Usage(Checksum);
- Header.HasSource = Source.hasValue();
+ Header.setRootFile(Directory, FileName, Checksum, Source);
}
unsigned getFile(StringRef Directory, StringRef FileName,
- MD5::MD5Result *Checksum, Optional<StringRef> Source) {
- return cantFail(Header.tryGetFile(Directory, FileName, Checksum, Source));
+ Optional<MD5::MD5Result> Checksum, uint16_t DwarfVersion,
+ Optional<StringRef> Source) {
+ HasSplitLineTable = true;
+ return cantFail(Header.tryGetFile(Directory, FileName, Checksum, Source,
+ DwarfVersion));
}
void Emit(MCStreamer &MCOS, MCDwarfLineTableParams Params,
@@ -291,36 +312,34 @@ public:
Optional<MCDwarfLineStr> &LineStr) const;
Expected<unsigned> tryGetFile(StringRef &Directory, StringRef &FileName,
- MD5::MD5Result *Checksum,
+ Optional<MD5::MD5Result> Checksum,
Optional<StringRef> Source,
+ uint16_t DwarfVersion,
unsigned FileNumber = 0);
unsigned getFile(StringRef &Directory, StringRef &FileName,
- MD5::MD5Result *Checksum, Optional<StringRef> &Source,
- unsigned FileNumber = 0) {
+ Optional<MD5::MD5Result> Checksum, Optional<StringRef> Source,
+ uint16_t DwarfVersion, unsigned FileNumber = 0) {
return cantFail(tryGetFile(Directory, FileName, Checksum, Source,
- FileNumber));
+ DwarfVersion, FileNumber));
}
void setRootFile(StringRef Directory, StringRef FileName,
- MD5::MD5Result *Checksum, Optional<StringRef> Source) {
+ Optional<MD5::MD5Result> Checksum, Optional<StringRef> Source) {
Header.CompilationDir = Directory;
Header.RootFile.Name = FileName;
Header.RootFile.DirIndex = 0;
Header.RootFile.Checksum = Checksum;
Header.RootFile.Source = Source;
- Header.trackMD5Usage(Checksum);
+ Header.trackMD5Usage(Checksum.hasValue());
Header.HasSource = Source.hasValue();
}
- void resetRootFile() {
- assert(Header.MCDwarfFiles.empty());
- Header.RootFile.Name.clear();
- Header.resetMD5Usage();
- Header.HasSource = false;
- }
+ void resetFileTable() { Header.resetFileTable(); }
bool hasRootFile() const { return !Header.RootFile.Name.empty(); }
+ const MCDwarfFile &getRootFile() const { return Header.RootFile; }
+
// Report whether MD5 usage has been consistent (all-or-none).
bool isMD5UsageConsistent() const { return Header.isMD5UsageConsistent(); }
diff --git a/include/llvm/MC/MCELFObjectWriter.h b/include/llvm/MC/MCELFObjectWriter.h
index f226d6a45a5a..2d441fdeee28 100644
--- a/include/llvm/MC/MCELFObjectWriter.h
+++ b/include/llvm/MC/MCELFObjectWriter.h
@@ -1,9 +1,8 @@
//===- llvm/MC/MCELFObjectWriter.h - ELF Object Writer ----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -54,13 +53,14 @@ struct ELFRelocationEntry {
class MCELFObjectTargetWriter : public MCObjectTargetWriter {
const uint8_t OSABI;
+ const uint8_t ABIVersion;
const uint16_t EMachine;
const unsigned HasRelocationAddend : 1;
const unsigned Is64Bit : 1;
protected:
MCELFObjectTargetWriter(bool Is64Bit_, uint8_t OSABI_, uint16_t EMachine_,
- bool HasRelocationAddend);
+ bool HasRelocationAddend_, uint8_t ABIVersion_ = 0);
public:
virtual ~MCELFObjectTargetWriter() = default;
@@ -98,6 +98,7 @@ public:
/// \name Accessors
/// @{
uint8_t getOSABI() const { return OSABI; }
+ uint8_t getABIVersion() const { return ABIVersion; }
uint16_t getEMachine() const { return EMachine; }
bool hasRelocationAddend() const { return HasRelocationAddend; }
bool is64Bit() const { return Is64Bit; }
diff --git a/include/llvm/MC/MCELFStreamer.h b/include/llvm/MC/MCELFStreamer.h
index 3797079661e4..8838d53d75b5 100644
--- a/include/llvm/MC/MCELFStreamer.h
+++ b/include/llvm/MC/MCELFStreamer.h
@@ -1,9 +1,8 @@
//===- MCELFStreamer.h - MCStreamer ELF Object File Interface ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/MC/MCExpr.h b/include/llvm/MC/MCExpr.h
index 8cb6b86fd672..fb23c0114c76 100644
--- a/include/llvm/MC/MCExpr.h
+++ b/include/llvm/MC/MCExpr.h
@@ -1,9 +1,8 @@
//===- MCExpr.h - Assembly Level Expressions --------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -135,15 +134,21 @@ inline raw_ostream &operator<<(raw_ostream &OS, const MCExpr &E) {
//// Represent a constant integer expression.
class MCConstantExpr : public MCExpr {
int64_t Value;
+ bool PrintInHex = false;
- explicit MCConstantExpr(int64_t Value)
+ MCConstantExpr(int64_t Value)
: MCExpr(MCExpr::Constant, SMLoc()), Value(Value) {}
+ MCConstantExpr(int64_t Value, bool PrintInHex)
+ : MCExpr(MCExpr::Constant, SMLoc()), Value(Value),
+ PrintInHex(PrintInHex) {}
+
public:
/// \name Construction
/// @{
- static const MCConstantExpr *create(int64_t Value, MCContext &Ctx);
+ static const MCConstantExpr *create(int64_t Value, MCContext &Ctx,
+ bool PrintInHex = false);
/// @}
/// \name Accessors
@@ -151,6 +156,8 @@ public:
int64_t getValue() const { return Value; }
+ bool useHexFormat() const { return PrintInHex; }
+
/// @}
static bool classof(const MCExpr *E) {
@@ -285,16 +292,17 @@ public:
VK_Hexagon_IE,
VK_Hexagon_IE_GOT,
- VK_WebAssembly_FUNCTION, // Function table index, rather than virtual addr
- VK_WebAssembly_GLOBAL, // Global object index
- VK_WebAssembly_TYPEINDEX,// Type table index
- VK_WebAssembly_EVENT, // Event index
+ VK_WASM_TYPEINDEX, // Reference to a symbol's type (signature)
+ VK_WASM_MBREL, // Memory address relative to memory base
+ VK_WASM_TBREL, // Table index relative to table bare
VK_AMDGPU_GOTPCREL32_LO, // symbol@gotpcrel32@lo
VK_AMDGPU_GOTPCREL32_HI, // symbol@gotpcrel32@hi
VK_AMDGPU_REL32_LO, // symbol@rel32@lo
VK_AMDGPU_REL32_HI, // symbol@rel32@hi
VK_AMDGPU_REL64, // symbol@rel64
+ VK_AMDGPU_ABS32_LO, // symbol@abs32@lo
+ VK_AMDGPU_ABS32_HI, // symbol@abs32@hi
VK_TPREL,
VK_DTPREL
diff --git a/include/llvm/MC/MCFixedLenDisassembler.h b/include/llvm/MC/MCFixedLenDisassembler.h
index ad34d9494bb9..218ae0d13189 100644
--- a/include/llvm/MC/MCFixedLenDisassembler.h
+++ b/include/llvm/MC/MCFixedLenDisassembler.h
@@ -1,9 +1,8 @@
//===-- llvm/MC/MCFixedLenDisassembler.h - Decoder driver -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// Fixed length disassembler decoder state machine driver.
diff --git a/include/llvm/MC/MCFixup.h b/include/llvm/MC/MCFixup.h
index 5f301eafc556..accffb7f2247 100644
--- a/include/llvm/MC/MCFixup.h
+++ b/include/llvm/MC/MCFixup.h
@@ -1,9 +1,8 @@
//===-- llvm/MC/MCFixup.h - Instruction Relocation and Patching -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -21,7 +20,8 @@ class MCExpr;
/// Extensible enumeration to represent the type of a fixup.
enum MCFixupKind {
- FK_Data_1 = 0, ///< A one-byte fixup.
+ FK_NONE = 0, ///< A no-op fixup.
+ FK_Data_1, ///< A one-byte fixup.
FK_Data_2, ///< A two-byte fixup.
FK_Data_4, ///< A four-byte fixup.
FK_Data_8, ///< A eight-byte fixup.
diff --git a/include/llvm/MC/MCFixupKindInfo.h b/include/llvm/MC/MCFixupKindInfo.h
index 483abb39403f..0ea34866db6a 100644
--- a/include/llvm/MC/MCFixupKindInfo.h
+++ b/include/llvm/MC/MCFixupKindInfo.h
@@ -1,9 +1,8 @@
//===-- llvm/MC/MCFixupKindInfo.h - Fixup Descriptors -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/MC/MCFragment.h b/include/llvm/MC/MCFragment.h
index c999c9fc4f17..aadf2ce725ea 100644
--- a/include/llvm/MC/MCFragment.h
+++ b/include/llvm/MC/MCFragment.h
@@ -1,9 +1,8 @@
//===- MCFragment.h - Fragment type hierarchy -------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/MC/MCInst.h b/include/llvm/MC/MCInst.h
index d501b686bb2e..8df8096bba94 100644
--- a/include/llvm/MC/MCInst.h
+++ b/include/llvm/MC/MCInst.h
@@ -1,9 +1,8 @@
//===- llvm/MC/MCInst.h - MCInst class --------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -154,8 +153,6 @@ public:
bool evaluateAsConstantImm(int64_t &Imm) const;
};
-template <> struct isPodLike<MCOperand> { static const bool value = true; };
-
/// Instances of this class represent a single low-level machine
/// instruction.
class MCInst {
@@ -190,6 +187,7 @@ public:
void clear() { Operands.clear(); }
void erase(iterator I) { Operands.erase(I); }
+ void erase(iterator First, iterator Last) { Operands.erase(First, Last); }
size_t size() const { return Operands.size(); }
iterator begin() { return Operands.begin(); }
const_iterator begin() const { return Operands.begin(); }
diff --git a/include/llvm/MC/MCInstBuilder.h b/include/llvm/MC/MCInstBuilder.h
index c5c4f481e7df..0c8e01fdc412 100644
--- a/include/llvm/MC/MCInstBuilder.h
+++ b/include/llvm/MC/MCInstBuilder.h
@@ -1,9 +1,8 @@
//===-- llvm/MC/MCInstBuilder.h - Simplify creation of MCInsts --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/MC/MCInstPrinter.h b/include/llvm/MC/MCInstPrinter.h
index df221e1db0e7..6bbc4bc2903b 100644
--- a/include/llvm/MC/MCInstPrinter.h
+++ b/include/llvm/MC/MCInstPrinter.h
@@ -1,9 +1,8 @@
//===- MCInstPrinter.h - MCInst to target assembly syntax -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -65,6 +64,10 @@ public:
virtual ~MCInstPrinter();
+ /// Customize the printer according to a command line option.
+ /// @return true if the option is recognized and applied.
+ virtual bool applyTargetSpecificCLOption(StringRef Opt) { return false; }
+
/// Specify a stream to emit comments to.
void setCommentStream(raw_ostream &OS) { CommentStream = &OS; }
diff --git a/include/llvm/MC/MCInstrAnalysis.h b/include/llvm/MC/MCInstrAnalysis.h
index 200f10f7d64b..dfefd7e72777 100644
--- a/include/llvm/MC/MCInstrAnalysis.h
+++ b/include/llvm/MC/MCInstrAnalysis.h
@@ -1,9 +1,8 @@
//===- llvm/MC/MCInstrAnalysis.h - InstrDesc target hooks -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/MC/MCInstrDesc.h b/include/llvm/MC/MCInstrDesc.h
index 61e7d09afbcb..0aa586dfc901 100644
--- a/include/llvm/MC/MCInstrDesc.h
+++ b/include/llvm/MC/MCInstrDesc.h
@@ -1,9 +1,8 @@
//===-- llvm/MC/MCInstrDesc.h - Instruction Descriptors -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -135,6 +134,7 @@ enum Flag {
FoldableAsLoad,
MayLoad,
MayStore,
+ MayRaiseFPException,
Predicable,
NotDuplicable,
UnmodeledSideEffects,
@@ -404,6 +404,11 @@ public:
/// may not actually modify anything, for example.
bool mayStore() const { return Flags & (1ULL << MCID::MayStore); }
+ /// Return true if this instruction may raise a floating-point exception.
+ bool mayRaiseFPException() const {
+ return Flags & (1ULL << MCID::MayRaiseFPException);
+ }
+
/// Return true if this instruction has side
/// effects that are not modeled by other flags. This does not return true
/// for instructions whose effects are captured by:
diff --git a/include/llvm/MC/MCInstrInfo.h b/include/llvm/MC/MCInstrInfo.h
index 18da87cf8929..874b1e46795b 100644
--- a/include/llvm/MC/MCInstrInfo.h
+++ b/include/llvm/MC/MCInstrInfo.h
@@ -1,9 +1,8 @@
//===-- llvm/MC/MCInstrInfo.h - Target Instruction Info ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/MC/MCInstrItineraries.h b/include/llvm/MC/MCInstrItineraries.h
index fe81376e0db7..485aa663272e 100644
--- a/include/llvm/MC/MCInstrItineraries.h
+++ b/include/llvm/MC/MCInstrItineraries.h
@@ -1,9 +1,8 @@
//===- llvm/MC/MCInstrItineraries.h - Scheduling ----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/MC/MCLabel.h b/include/llvm/MC/MCLabel.h
index aaf70691fc01..0b8afac8f754 100644
--- a/include/llvm/MC/MCLabel.h
+++ b/include/llvm/MC/MCLabel.h
@@ -1,9 +1,8 @@
//===- MCLabel.h - Machine Code Directional Local Labels --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/MC/MCLinkerOptimizationHint.h b/include/llvm/MC/MCLinkerOptimizationHint.h
index f0fd07f43cf3..f2a1364ad884 100644
--- a/include/llvm/MC/MCLinkerOptimizationHint.h
+++ b/include/llvm/MC/MCLinkerOptimizationHint.h
@@ -1,10 +1,9 @@
//===- MCLinkerOptimizationHint.h - LOH interface ---------------*- C++ -*-===//
//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/MC/MCMachObjectWriter.h b/include/llvm/MC/MCMachObjectWriter.h
index 22fbeb72a4ec..278aebee99ac 100644
--- a/include/llvm/MC/MCMachObjectWriter.h
+++ b/include/llvm/MC/MCMachObjectWriter.h
@@ -1,9 +1,8 @@
//===- llvm/MC/MCMachObjectWriter.h - Mach Object Writer --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/MC/MCObjectFileInfo.h b/include/llvm/MC/MCObjectFileInfo.h
index f8142ccd8ac5..abc87bf27748 100644
--- a/include/llvm/MC/MCObjectFileInfo.h
+++ b/include/llvm/MC/MCObjectFileInfo.h
@@ -1,9 +1,8 @@
//===-- llvm/MC/MCObjectFileInfo.h - Object File Info -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -160,6 +159,9 @@ protected:
/// FaultMap section.
MCSection *FaultMapSection;
+ /// Remarks section.
+ MCSection *RemarksSection;
+
/// EH frame section.
///
/// It is initialized on demand so it can be overwritten (with uniquing).
@@ -315,6 +317,7 @@ public:
MCSection *getStackMapSection() const { return StackMapSection; }
MCSection *getFaultMapSection() const { return FaultMapSection; }
+ MCSection *getRemarksSection() const { return RemarksSection; }
MCSection *getStackSizesSection(const MCSection &TextSec) const;
@@ -381,7 +384,7 @@ public:
return EHFrameSection;
}
- enum Environment { IsMachO, IsELF, IsCOFF, IsWasm };
+ enum Environment { IsMachO, IsELF, IsCOFF, IsWasm, IsXCOFF };
Environment getObjectFileType() const { return Env; }
bool isPositionIndependent() const { return PositionIndependent; }
@@ -397,6 +400,7 @@ private:
void initELFMCObjectFileInfo(const Triple &T, bool Large);
void initCOFFMCObjectFileInfo(const Triple &T);
void initWasmMCObjectFileInfo(const Triple &T);
+ void initXCOFFMCObjectFileInfo(const Triple &T);
MCSection *getDwarfComdatSection(const char *Name, uint64_t Hash) const;
public:
diff --git a/include/llvm/MC/MCObjectStreamer.h b/include/llvm/MC/MCObjectStreamer.h
index 892909656c15..8affca49490f 100644
--- a/include/llvm/MC/MCObjectStreamer.h
+++ b/include/llvm/MC/MCObjectStreamer.h
@@ -1,9 +1,8 @@
//===- MCObjectStreamer.h - MCStreamer Object File Interface ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -116,8 +115,7 @@ public:
void EmitSLEB128Value(const MCExpr *Value) override;
void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) override;
void ChangeSection(MCSection *Section, const MCExpr *Subsection) override;
- void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
- bool = false) override;
+ void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override;
/// Emit an instruction to a special fragment, because this instruction
/// can change its size during relaxation.
diff --git a/include/llvm/MC/MCObjectWriter.h b/include/llvm/MC/MCObjectWriter.h
index 8bae2bf20083..2547b2b7c9c1 100644
--- a/include/llvm/MC/MCObjectWriter.h
+++ b/include/llvm/MC/MCObjectWriter.h
@@ -1,9 +1,8 @@
//===- llvm/MC/MCObjectWriter.h - Object File Writer Interface --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/MC/MCParser/AsmCond.h b/include/llvm/MC/MCParser/AsmCond.h
index a6e0fbd7f337..ea2155010081 100644
--- a/include/llvm/MC/MCParser/AsmCond.h
+++ b/include/llvm/MC/MCParser/AsmCond.h
@@ -1,9 +1,8 @@
//===- AsmCond.h - Assembly file conditional assembly ----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/MC/MCParser/AsmLexer.h b/include/llvm/MC/MCParser/AsmLexer.h
index 2e9b8dfa3b26..b7294493b2f8 100644
--- a/include/llvm/MC/MCParser/AsmLexer.h
+++ b/include/llvm/MC/MCParser/AsmLexer.h
@@ -1,9 +1,8 @@
//===- AsmLexer.h - Lexer for Assembly Files --------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/MC/MCParser/MCAsmLexer.h b/include/llvm/MC/MCParser/MCAsmLexer.h
index ea13d1cdc09f..e89abeaac94c 100644
--- a/include/llvm/MC/MCParser/MCAsmLexer.h
+++ b/include/llvm/MC/MCParser/MCAsmLexer.h
@@ -1,9 +1,8 @@
//===- llvm/MC/MCAsmLexer.h - Abstract Asm Lexer Interface ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/MC/MCParser/MCAsmParser.h b/include/llvm/MC/MCParser/MCAsmParser.h
index b80289878e6e..da5653ee71d3 100644
--- a/include/llvm/MC/MCParser/MCAsmParser.h
+++ b/include/llvm/MC/MCParser/MCAsmParser.h
@@ -1,9 +1,8 @@
//===- llvm/MC/MCAsmParser.h - Abstract Asm Parser Interface ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -130,9 +129,6 @@ protected: // Can only create subclasses.
/// Flag tracking whether any errors have been encountered.
bool HadError = false;
- /// Enable print [latency:throughput] in output file.
- bool EnablePrintSchedInfo = false;
-
bool ShowParsedOperands = false;
public:
@@ -166,9 +162,6 @@ public:
bool getShowParsedOperands() const { return ShowParsedOperands; }
void setShowParsedOperands(bool Value) { ShowParsedOperands = Value; }
- void setEnablePrintSchedInfo(bool Value) { EnablePrintSchedInfo = Value; }
- bool shouldPrintSchedInfo() const { return EnablePrintSchedInfo; }
-
/// Run the parser on the input source buffer.
virtual bool Run(bool NoInitialTextSection, bool NoFinalize = false) = 0;
diff --git a/include/llvm/MC/MCParser/MCAsmParserExtension.h b/include/llvm/MC/MCParser/MCAsmParserExtension.h
index 1a132bceddc5..5d2afe81a54b 100644
--- a/include/llvm/MC/MCParser/MCAsmParserExtension.h
+++ b/include/llvm/MC/MCParser/MCAsmParserExtension.h
@@ -1,9 +1,8 @@
//===- llvm/MC/MCAsmParserExtension.h - Asm Parser Hooks --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/MC/MCParser/MCAsmParserUtils.h b/include/llvm/MC/MCParser/MCAsmParserUtils.h
index 84173bb9cb8e..d692da7402fe 100644
--- a/include/llvm/MC/MCParser/MCAsmParserUtils.h
+++ b/include/llvm/MC/MCParser/MCAsmParserUtils.h
@@ -1,9 +1,8 @@
//===- llvm/MC/MCAsmParserUtils.h - Asm Parser Utilities --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/MC/MCParser/MCParsedAsmOperand.h b/include/llvm/MC/MCParser/MCParsedAsmOperand.h
index 4af76ac2a858..2b6e2aa48b8f 100644
--- a/include/llvm/MC/MCParser/MCParsedAsmOperand.h
+++ b/include/llvm/MC/MCParser/MCParsedAsmOperand.h
@@ -1,9 +1,8 @@
//===- llvm/MC/MCParsedAsmOperand.h - Asm Parser Operand --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/MC/MCParser/MCTargetAsmParser.h b/include/llvm/MC/MCParser/MCTargetAsmParser.h
index ccf13a6a4fb4..849dbd57f1aa 100644
--- a/include/llvm/MC/MCParser/MCTargetAsmParser.h
+++ b/include/llvm/MC/MCParser/MCTargetAsmParser.h
@@ -1,9 +1,8 @@
//===- llvm/MC/MCTargetAsmParser.h - Target Assembly Parser -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -17,6 +16,7 @@
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/MC/MCParser/MCAsmParserExtension.h"
#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/MC/SubtargetFeature.h"
#include "llvm/Support/SMLoc.h"
#include <cstdint>
#include <memory>
@@ -203,7 +203,7 @@ public:
// The instruction encoding is not valid because it requires some target
// features that are not currently enabled. MissingFeatures has a bit set for
// each feature that the encoding needs but which is not enabled.
- static NearMissInfo getMissedFeature(uint64_t MissingFeatures) {
+ static NearMissInfo getMissedFeature(const FeatureBitset &MissingFeatures) {
NearMissInfo Result;
Result.Kind = NearMissFeature;
Result.Features = MissingFeatures;
@@ -255,7 +255,7 @@ public:
// Feature flags required by the instruction, that the current target does
// not have.
- uint64_t getFeatures() const {
+ const FeatureBitset& getFeatures() const {
assert(Kind == NearMissFeature);
return Features;
}
@@ -305,7 +305,7 @@ private:
};
union {
- uint64_t Features;
+ FeatureBitset Features;
unsigned PredicateError;
MissedOpInfo MissedOperand;
TooFewOperandsInfo TooFewOperands;
@@ -335,7 +335,7 @@ protected: // Can only create subclasses.
MCSubtargetInfo &copySTI();
/// AvailableFeatures - The current set of available features.
- uint64_t AvailableFeatures = 0;
+ FeatureBitset AvailableFeatures;
/// ParsingInlineAsm - Are we parsing ms-style inline assembly?
bool ParsingInlineAsm = false;
@@ -360,8 +360,12 @@ public:
const MCSubtargetInfo &getSTI() const;
- uint64_t getAvailableFeatures() const { return AvailableFeatures; }
- void setAvailableFeatures(uint64_t Value) { AvailableFeatures = Value; }
+ const FeatureBitset& getAvailableFeatures() const {
+ return AvailableFeatures;
+ }
+ void setAvailableFeatures(const FeatureBitset& Value) {
+ AvailableFeatures = Value;
+ }
bool isParsingInlineAsm () { return ParsingInlineAsm; }
void setParsingInlineAsm (bool Value) { ParsingInlineAsm = Value; }
@@ -380,9 +384,6 @@ public:
virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
SMLoc &EndLoc) = 0;
- /// Sets frame register corresponding to the current MachineFunction.
- virtual void SetFrameRegister(unsigned RegNo) {}
-
/// ParseInstruction - Parse one assembly instruction.
///
/// The parser is positioned following the instruction name. The target
diff --git a/include/llvm/MC/MCRegisterInfo.h b/include/llvm/MC/MCRegisterInfo.h
index 8d8c677c77ea..92d39c3fcfb7 100644
--- a/include/llvm/MC/MCRegisterInfo.h
+++ b/include/llvm/MC/MCRegisterInfo.h
@@ -1,9 +1,8 @@
//===- MC/MCRegisterInfo.h - Target Register Description --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/MC/MCSchedule.h b/include/llvm/MC/MCSchedule.h
index 689ac73cbdd1..df3248ee6e86 100644
--- a/include/llvm/MC/MCSchedule.h
+++ b/include/llvm/MC/MCSchedule.h
@@ -1,9 +1,8 @@
//===-- llvm/MC/MCSchedule.h - Scheduling -----------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -15,6 +14,7 @@
#ifndef LLVM_MC_MCSCHEDULE_H
#define LLVM_MC_MCSCHEDULE_H
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/Optional.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/Support/DataTypes.h"
@@ -370,6 +370,11 @@ struct MCSchedModel {
getReciprocalThroughput(const MCSubtargetInfo &STI, const MCInstrInfo &MCII,
const MCInst &Inst) const;
+ /// Returns the maximum forwarding delay for register reads dependent on
+ /// writes of scheduling class WriteResourceIdx.
+ static unsigned getForwardingDelayCycles(ArrayRef<MCReadAdvanceEntry> Entries,
+ unsigned WriteResourceIdx = 0);
+
/// Returns the default initialized model.
static const MCSchedModel &GetDefaultSchedModel() { return Default; }
static const MCSchedModel Default;
diff --git a/include/llvm/MC/MCSection.h b/include/llvm/MC/MCSection.h
index eb210b4e9dfa..6fad1ec2069c 100644
--- a/include/llvm/MC/MCSection.h
+++ b/include/llvm/MC/MCSection.h
@@ -1,9 +1,8 @@
//===- MCSection.h - Machine Code Sections ----------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -38,7 +37,7 @@ template <> struct ilist_alloc_traits<MCFragment> {
/// current translation unit. The MCContext class uniques and creates these.
class MCSection {
public:
- enum SectionVariant { SV_COFF = 0, SV_ELF, SV_MachO, SV_Wasm };
+ enum SectionVariant { SV_COFF = 0, SV_ELF, SV_MachO, SV_Wasm, SV_XCOFF };
/// Express the state of bundle locked groups while emitting code.
enum BundleLockStateType {
diff --git a/include/llvm/MC/MCSectionCOFF.h b/include/llvm/MC/MCSectionCOFF.h
index 24b9f8898ebb..8be95e0f1de5 100644
--- a/include/llvm/MC/MCSectionCOFF.h
+++ b/include/llvm/MC/MCSectionCOFF.h
@@ -1,9 +1,8 @@
//===- MCSectionCOFF.h - COFF Machine Code Sections -------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -63,8 +62,6 @@ private:
}
public:
- ~MCSectionCOFF();
-
/// Decides whether a '.section' directive should be printed before the
/// section name
bool ShouldOmitSectionDirective(StringRef Name, const MCAsmInfo &MAI) const;
diff --git a/include/llvm/MC/MCSectionELF.h b/include/llvm/MC/MCSectionELF.h
index 00c289c6bd6e..fe6b2d7afc79 100644
--- a/include/llvm/MC/MCSectionELF.h
+++ b/include/llvm/MC/MCSectionELF.h
@@ -1,9 +1,8 @@
//===- MCSectionELF.h - ELF Machine Code Sections ---------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -64,8 +63,6 @@ private:
void setSectionName(StringRef Name) { SectionName = Name; }
public:
- ~MCSectionELF();
-
/// Decides whether a '.section' directive should be printed before the
/// section name
bool ShouldOmitSectionDirective(StringRef Name, const MCAsmInfo &MAI) const;
diff --git a/include/llvm/MC/MCSectionMachO.h b/include/llvm/MC/MCSectionMachO.h
index 89db09cbdbdc..2c73661fb1fd 100644
--- a/include/llvm/MC/MCSectionMachO.h
+++ b/include/llvm/MC/MCSectionMachO.h
@@ -1,9 +1,8 @@
//===- MCSectionMachO.h - MachO Machine Code Sections -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/MC/MCSectionWasm.h b/include/llvm/MC/MCSectionWasm.h
index ab4cd7b007ec..2941a40f3b8c 100644
--- a/include/llvm/MC/MCSectionWasm.h
+++ b/include/llvm/MC/MCSectionWasm.h
@@ -1,9 +1,8 @@
//===- MCSectionWasm.h - Wasm Machine Code Sections -------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -43,20 +42,19 @@ class MCSectionWasm final : public MCSection {
// segment
uint32_t SegmentIndex = 0;
+ // Whether this data segment is passive
+ bool IsPassive = false;
+
friend class MCContext;
MCSectionWasm(StringRef Section, SectionKind K, const MCSymbolWasm *group,
unsigned UniqueID, MCSymbol *Begin)
: MCSection(SV_Wasm, K, Begin), SectionName(Section), UniqueID(UniqueID),
Group(group) {}
- void setSectionName(StringRef Name) { SectionName = Name; }
-
public:
- ~MCSectionWasm();
-
/// Decides whether a '.section' directive should be printed before the
/// section name
- bool ShouldOmitSectionDirective(StringRef Name, const MCAsmInfo &MAI) const;
+ bool shouldOmitSectionDirective(StringRef Name, const MCAsmInfo &MAI) const;
StringRef getSectionName() const { return SectionName; }
const MCSymbolWasm *getGroup() const { return Group; }
@@ -68,7 +66,8 @@ public:
bool isVirtualSection() const override;
bool isWasmData() const {
- return Kind.isGlobalWriteableData() || Kind.isReadOnly();
+ return Kind.isGlobalWriteableData() || Kind.isReadOnly() ||
+ Kind.isThreadLocal();
}
bool isUnique() const { return UniqueID != ~0U; }
@@ -80,6 +79,14 @@ public:
uint32_t getSegmentIndex() const { return SegmentIndex; }
void setSegmentIndex(uint32_t Index) { SegmentIndex = Index; }
+ bool getPassive() const {
+ assert(isWasmData());
+ return IsPassive;
+ }
+ void setPassive(bool V = true) {
+ assert(isWasmData());
+ IsPassive = V;
+ }
static bool classof(const MCSection *S) { return S->getVariant() == SV_Wasm; }
};
diff --git a/include/llvm/MC/MCSectionXCOFF.h b/include/llvm/MC/MCSectionXCOFF.h
new file mode 100644
index 000000000000..2a3f391fd3e2
--- /dev/null
+++ b/include/llvm/MC/MCSectionXCOFF.h
@@ -0,0 +1,56 @@
+//===- MCSectionXCOFF.h - XCOFF Machine Code Sections -----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the MCSectionXCOFF class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCSECTIONXCOFF_H
+#define LLVM_MC_MCSECTIONXCOFF_H
+
+#include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/XCOFF.h"
+#include "llvm/MC/MCSection.h"
+
+namespace llvm {
+
+class MCSymbol;
+
+// This class represents an XCOFF `Control Section`, more commonly referred to
+// as a csect. A csect represents the smallest possible unit of data/code which
+// will be relocated as a single block.
+class MCSectionXCOFF final : public MCSection {
+ friend class MCContext;
+
+ StringRef Name;
+ XCOFF::StorageMappingClass MappingClass;
+
+ MCSectionXCOFF(StringRef Section, XCOFF::StorageMappingClass SMC,
+ SectionKind K, MCSymbol *Begin)
+ : MCSection(SV_XCOFF, K, Begin), Name(Section), MappingClass(SMC) {}
+
+public:
+ ~MCSectionXCOFF();
+
+ static bool classof(const MCSection *S) {
+ return S->getVariant() == SV_XCOFF;
+ }
+
+ StringRef getSectionName() const { return Name; }
+ XCOFF::StorageMappingClass getMappingClass() const { return MappingClass; }
+
+ void PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
+ raw_ostream &OS,
+ const MCExpr *Subsection) const override;
+ bool UseCodeAlign() const override;
+ bool isVirtualSection() const override;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/MC/MCStreamer.h b/include/llvm/MC/MCStreamer.h
index f613d3a1943f..731e7515448c 100644
--- a/include/llvm/MC/MCStreamer.h
+++ b/include/llvm/MC/MCStreamer.h
@@ -1,9 +1,8 @@
//===- MCStreamer.h - High-level Streaming Machine Code Output --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -20,7 +19,6 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCDirectives.h"
-#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCLinkerOptimizationHint.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCWinEH.h"
@@ -44,6 +42,7 @@ class MCAsmBackend;
class MCCodeEmitter;
struct MCCodePaddingContext;
class MCContext;
+struct MCDwarfFrameInfo;
class MCExpr;
class MCInst;
class MCInstPrinter;
@@ -267,10 +266,8 @@ public:
/// closed. Otherwise, issue an error and return null.
WinEH::FrameInfo *EnsureValidWinFrameInfo(SMLoc Loc);
- unsigned getNumFrameInfos() { return DwarfFrameInfos.size(); }
- ArrayRef<MCDwarfFrameInfo> getDwarfFrameInfos() const {
- return DwarfFrameInfos;
- }
+ unsigned getNumFrameInfos();
+ ArrayRef<MCDwarfFrameInfo> getDwarfFrameInfos() const;
bool hasUnfinishedDwarfFrameInfo();
@@ -629,13 +626,20 @@ public:
/// to pass in a MCExpr for constant integers.
virtual void EmitIntValue(uint64_t Value, unsigned Size);
+ /// Special case of EmitValue that avoids the client having to pass
+ /// in a MCExpr for constant integers & prints in Hex format for certain
+ /// modes.
+ virtual void EmitIntValueInHex(uint64_t Value, unsigned Size) {
+ EmitIntValue(Value, Size);
+ }
+
virtual void EmitULEB128Value(const MCExpr *Value);
virtual void EmitSLEB128Value(const MCExpr *Value);
/// Special case of EmitULEB128Value that avoids the client having to
/// pass in a MCExpr for constant integers.
- void EmitULEB128IntValue(uint64_t Value);
+ void EmitULEB128IntValue(uint64_t Value, unsigned PadTo = 0);
/// Special case of EmitSLEB128Value that avoids the client having to
/// pass in a MCExpr for constant integers.
@@ -782,7 +786,7 @@ public:
/// implements the DWARF2 '.file 4 "foo.c"' assembler directive.
unsigned EmitDwarfFileDirective(unsigned FileNo, StringRef Directory,
StringRef Filename,
- MD5::MD5Result *Checksum = nullptr,
+ Optional<MD5::MD5Result> Checksum = None,
Optional<StringRef> Source = None,
unsigned CUID = 0) {
return cantFail(
@@ -797,12 +801,12 @@ public:
/// '.file 4 "dir/foo.c" md5 "..." source "..."' assembler directive.
virtual Expected<unsigned> tryEmitDwarfFileDirective(
unsigned FileNo, StringRef Directory, StringRef Filename,
- MD5::MD5Result *Checksum = nullptr, Optional<StringRef> Source = None,
+ Optional<MD5::MD5Result> Checksum = None, Optional<StringRef> Source = None,
unsigned CUID = 0);
/// Specify the "root" file of the compilation, using the ".file 0" extension.
virtual void emitDwarfFile0Directive(StringRef Directory, StringRef Filename,
- MD5::MD5Result *Checksum,
+ Optional<MD5::MD5Result> Checksum,
Optional<StringRef> Source,
unsigned CUID = 0);
@@ -953,9 +957,7 @@ public:
virtual void EmitAddrsigSym(const MCSymbol *Sym) {}
/// Emit the given \p Instruction into the current section.
- /// PrintSchedInfo == true then schedul comment should be added to output
- virtual void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
- bool PrintSchedInfo = false);
+ virtual void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI);
/// Set the bundle alignment mode from now on in the section.
/// The argument is the power of 2 to which the alignment is set. The
diff --git a/include/llvm/MC/MCSubtargetInfo.h b/include/llvm/MC/MCSubtargetInfo.h
index b3ce523d9c0c..9490a6ecedad 100644
--- a/include/llvm/MC/MCSubtargetInfo.h
+++ b/include/llvm/MC/MCSubtargetInfo.h
@@ -1,9 +1,8 @@
//===- llvm/MC/MCSubtargetInfo.h - Subtarget Information --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -30,6 +29,45 @@ namespace llvm {
class MCInst;
//===----------------------------------------------------------------------===//
+
+/// Used to provide key value pairs for feature and CPU bit flags.
+struct SubtargetFeatureKV {
+ const char *Key; ///< K-V key string
+ const char *Desc; ///< Help descriptor
+ unsigned Value; ///< K-V integer value
+ FeatureBitArray Implies; ///< K-V bit mask
+
+ /// Compare routine for std::lower_bound
+ bool operator<(StringRef S) const {
+ return StringRef(Key) < S;
+ }
+
+ /// Compare routine for std::is_sorted.
+ bool operator<(const SubtargetFeatureKV &Other) const {
+ return StringRef(Key) < StringRef(Other.Key);
+ }
+};
+
+//===----------------------------------------------------------------------===//
+
+/// Used to provide key value pairs for feature and CPU bit flags.
+struct SubtargetSubTypeKV {
+ const char *Key; ///< K-V key string
+ FeatureBitArray Implies; ///< K-V bit mask
+ const MCSchedModel *SchedModel;
+
+ /// Compare routine for std::lower_bound
+ bool operator<(StringRef S) const {
+ return StringRef(Key) < S;
+ }
+
+ /// Compare routine for std::is_sorted.
+ bool operator<(const SubtargetSubTypeKV &Other) const {
+ return StringRef(Key) < StringRef(Other.Key);
+ }
+};
+
+//===----------------------------------------------------------------------===//
///
/// Generic base class for all target subtargets.
///
@@ -37,10 +75,9 @@ class MCSubtargetInfo {
Triple TargetTriple;
std::string CPU; // CPU being targeted.
ArrayRef<SubtargetFeatureKV> ProcFeatures; // Processor feature list
- ArrayRef<SubtargetFeatureKV> ProcDesc; // Processor descriptions
+ ArrayRef<SubtargetSubTypeKV> ProcDesc; // Processor descriptions
// Scheduler machine model
- const SubtargetInfoKV *ProcSchedModels;
const MCWriteProcResEntry *WriteProcResTable;
const MCWriteLatencyEntry *WriteLatencyTable;
const MCReadAdvanceEntry *ReadAdvanceTable;
@@ -55,8 +92,7 @@ public:
MCSubtargetInfo(const MCSubtargetInfo &) = default;
MCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS,
ArrayRef<SubtargetFeatureKV> PF,
- ArrayRef<SubtargetFeatureKV> PD,
- const SubtargetInfoKV *ProcSched,
+ ArrayRef<SubtargetSubTypeKV> PD,
const MCWriteProcResEntry *WPR, const MCWriteLatencyEntry *WL,
const MCReadAdvanceEntry *RA, const InstrStage *IS,
const unsigned *OC, const unsigned *FP);
@@ -105,6 +141,10 @@ public:
/// all feature bits implied by the flag.
FeatureBitset ApplyFeatureFlag(StringRef FS);
+ /// Set/clear additional feature bits, including all other bits they imply.
+ FeatureBitset SetFeatureBitsTransitively(const FeatureBitset& FB);
+ FeatureBitset ClearFeatureBitsTransitively(const FeatureBitset &FB);
+
/// Check whether the subtarget features are enabled/disabled as per
/// the provided string, ignoring all other features.
bool checkFeatures(StringRef FS) const;
@@ -153,6 +193,16 @@ public:
return 0;
}
+ /// Return the set of ReadAdvance entries declared by the scheduling class
+ /// descriptor in input.
+ ArrayRef<MCReadAdvanceEntry>
+ getReadAdvanceEntries(const MCSchedClassDesc &SC) const {
+ if (!SC.NumReadAdvanceEntries)
+ return ArrayRef<MCReadAdvanceEntry>();
+ return ArrayRef<MCReadAdvanceEntry>(&ReadAdvanceTable[SC.ReadAdvanceIdx],
+ SC.NumReadAdvanceEntries);
+ }
+
/// Get scheduling itinerary of a CPU.
InstrItineraryData getInstrItineraryForCPU(StringRef CPU) const;
@@ -171,11 +221,6 @@ public:
auto Found = std::lower_bound(ProcDesc.begin(), ProcDesc.end(), CPU);
return Found != ProcDesc.end() && StringRef(Found->Key) == CPU;
}
-
- /// Returns string representation of scheduler comment
- virtual std::string getSchedInfoStr(MCInst const &MCI) const {
- return {};
- }
};
} // end namespace llvm
diff --git a/include/llvm/MC/MCSymbol.h b/include/llvm/MC/MCSymbol.h
index 4681a1be60c4..189484deac7e 100644
--- a/include/llvm/MC/MCSymbol.h
+++ b/include/llvm/MC/MCSymbol.h
@@ -1,9 +1,8 @@
//===- MCSymbol.h - Machine Code Symbols ------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -49,6 +48,7 @@ protected:
SymbolKindELF,
SymbolKindMachO,
SymbolKindWasm,
+ SymbolKindXCOFF,
};
/// A symbol can contain an Offset, or Value, or be Common, but never more
@@ -58,6 +58,7 @@ protected:
SymContentsOffset,
SymContentsVariable,
SymContentsCommon,
+ SymContentsTargetCommon, // Index stores the section index
};
// Special sentinal value for the absolute pseudo fragment.
@@ -108,7 +109,7 @@ protected:
/// This is actually a Contents enumerator, but is unsigned to avoid sign
/// extension and achieve better bitpacking with MSVC.
- unsigned SymbolContents : 2;
+ unsigned SymbolContents : 3;
/// The alignment of the symbol, if it is 'common', or -1.
///
@@ -286,6 +287,8 @@ public:
bool isWasm() const { return Kind == SymbolKindWasm; }
+ bool isXCOFF() const { return Kind == SymbolKindXCOFF; }
+
/// @}
/// \name Variable Symbols
/// @{
@@ -342,10 +345,11 @@ public:
///
/// \param Size - The size of the symbol.
/// \param Align - The alignment of the symbol.
- void setCommon(uint64_t Size, unsigned Align) {
+ /// \param Target - Is the symbol a target-specific common-like symbol.
+ void setCommon(uint64_t Size, unsigned Align, bool Target = false) {
assert(getOffset() == 0);
CommonSize = Size;
- SymbolContents = SymContentsCommon;
+ SymbolContents = Target ? SymContentsTargetCommon : SymContentsCommon;
assert((!Align || isPowerOf2_32(Align)) &&
"Alignment must be a power of 2");
@@ -365,20 +369,28 @@ public:
///
/// \param Size - The size of the symbol.
/// \param Align - The alignment of the symbol.
+ /// \param Target - Is the symbol a target-specific common-like symbol.
/// \return True if symbol was already declared as a different type
- bool declareCommon(uint64_t Size, unsigned Align) {
+ bool declareCommon(uint64_t Size, unsigned Align, bool Target = false) {
assert(isCommon() || getOffset() == 0);
if(isCommon()) {
- if(CommonSize != Size || getCommonAlignment() != Align)
- return true;
+ if (CommonSize != Size || getCommonAlignment() != Align ||
+ isTargetCommon() != Target)
+ return true;
} else
- setCommon(Size, Align);
+ setCommon(Size, Align, Target);
return false;
}
/// Is this a 'common' symbol.
bool isCommon() const {
- return SymbolContents == SymContentsCommon;
+ return SymbolContents == SymContentsCommon ||
+ SymbolContents == SymContentsTargetCommon;
+ }
+
+ /// Is this a target-specific common-like symbol.
+ bool isTargetCommon() const {
+ return SymbolContents == SymContentsTargetCommon;
}
MCFragment *getFragment(bool SetUsed = true) const {
diff --git a/include/llvm/MC/MCSymbolCOFF.h b/include/llvm/MC/MCSymbolCOFF.h
index 7918c353dc15..94087ce871ae 100644
--- a/include/llvm/MC/MCSymbolCOFF.h
+++ b/include/llvm/MC/MCSymbolCOFF.h
@@ -1,9 +1,8 @@
//===- MCSymbolCOFF.h - ----------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/MC/MCSymbolELF.h b/include/llvm/MC/MCSymbolELF.h
index bbcd22e8e7db..34e5c4344aff 100644
--- a/include/llvm/MC/MCSymbolELF.h
+++ b/include/llvm/MC/MCSymbolELF.h
@@ -1,9 +1,8 @@
//===- MCSymbolELF.h - -----------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_MC_MCSYMBOLELF_H
diff --git a/include/llvm/MC/MCSymbolMachO.h b/include/llvm/MC/MCSymbolMachO.h
index 6125c2050976..8f9ff56470a7 100644
--- a/include/llvm/MC/MCSymbolMachO.h
+++ b/include/llvm/MC/MCSymbolMachO.h
@@ -1,9 +1,8 @@
//===- MCSymbolMachO.h - ---------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_MC_MCSYMBOLMACHO_H
@@ -35,6 +34,7 @@ class MCSymbolMachO : public MCSymbol {
SF_WeakDefinition = 0x0080,
SF_SymbolResolver = 0x0100,
SF_AltEntry = 0x0200,
+ SF_Cold = 0x0400,
// Common alignment
SF_CommonAlignmentMask = 0xF0FF,
@@ -98,6 +98,10 @@ public:
return getFlags() & SF_AltEntry;
}
+ void setCold() const { modifyFlags(SF_Cold, SF_Cold); }
+
+ bool isCold() const { return getFlags() & SF_Cold; }
+
void setDesc(unsigned Value) const {
assert(Value == (Value & SF_DescFlagsMask) &&
"Invalid .desc value!");
diff --git a/include/llvm/MC/MCSymbolWasm.h b/include/llvm/MC/MCSymbolWasm.h
index 8e66dc881d0f..c50cd0ee4709 100644
--- a/include/llvm/MC/MCSymbolWasm.h
+++ b/include/llvm/MC/MCSymbolWasm.h
@@ -1,9 +1,8 @@
//===- MCSymbolWasm.h - ----------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_MC_MCSYMBOLWASM_H
@@ -19,7 +18,9 @@ class MCSymbolWasm : public MCSymbol {
bool IsWeak = false;
bool IsHidden = false;
bool IsComdat = false;
- std::string ModuleName;
+ mutable bool IsUsedInGOT = false;
+ Optional<std::string> ImportModule;
+ Optional<std::string> ImportName;
wasm::WasmSignature *Signature = nullptr;
Optional<wasm::WasmGlobalType> GlobalType;
Optional<wasm::WasmEventType> EventType;
@@ -32,7 +33,7 @@ public:
// Use a module name of "env" for now, for compatibility with existing tools.
// This is temporary, and may change, as the ABI is not yet stable.
MCSymbolWasm(const StringMapEntry<bool> *Name, bool isTemporary)
- : MCSymbol(SymbolKindWasm, Name, isTemporary), ModuleName("env") {}
+ : MCSymbol(SymbolKindWasm, Name, isTemporary) {}
static bool classof(const MCSymbol *S) { return S->isWasm(); }
const MCExpr *getSize() const { return SymbolSize; }
@@ -46,6 +47,13 @@ public:
wasm::WasmSymbolType getType() const { return Type; }
void setType(wasm::WasmSymbolType type) { Type = type; }
+ bool isExported() const {
+ return getFlags() & wasm::WASM_SYMBOL_EXPORTED;
+ }
+ void setExported() const {
+ modifyFlags(wasm::WASM_SYMBOL_EXPORTED, wasm::WASM_SYMBOL_EXPORTED);
+ }
+
bool isWeak() const { return IsWeak; }
void setWeak(bool isWeak) { IsWeak = isWeak; }
@@ -55,8 +63,24 @@ public:
bool isComdat() const { return IsComdat; }
void setComdat(bool isComdat) { IsComdat = isComdat; }
- const StringRef getModuleName() const { return ModuleName; }
- void setModuleName(StringRef Name) { ModuleName = Name; }
+ const StringRef getImportModule() const {
+ if (ImportModule.hasValue()) {
+ return ImportModule.getValue();
+ }
+ return "env";
+ }
+ void setImportModule(StringRef Name) { ImportModule = Name; }
+
+ const StringRef getImportName() const {
+ if (ImportName.hasValue()) {
+ return ImportName.getValue();
+ }
+ return getName();
+ }
+ void setImportName(StringRef Name) { ImportName = Name; }
+
+ void setUsedInGOT() const { IsUsedInGOT = true; }
+ bool isUsedInGOT() const { return IsUsedInGOT; }
const wasm::WasmSignature *getSignature() const { return Signature; }
void setSignature(wasm::WasmSignature *Sig) { Signature = Sig; }
diff --git a/include/llvm/MC/MCSymbolXCOFF.h b/include/llvm/MC/MCSymbolXCOFF.h
new file mode 100644
index 000000000000..0a1fe1475138
--- /dev/null
+++ b/include/llvm/MC/MCSymbolXCOFF.h
@@ -0,0 +1,26 @@
+//===- MCSymbolXCOFF.h - ----------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_MC_MCSYMBOLXCOFF_H
+#define LLVM_MC_MCSYMBOLXCOFF_H
+
+#include "llvm/BinaryFormat/XCOFF.h"
+#include "llvm/MC/MCSymbol.h"
+
+namespace llvm {
+
+class MCSymbolXCOFF : public MCSymbol {
+public:
+ MCSymbolXCOFF(const StringMapEntry<bool> *Name, bool isTemporary)
+ : MCSymbol(SymbolKindXCOFF, Name, isTemporary) {}
+
+ static bool classof(const MCSymbol *S) { return S->isXCOFF(); }
+};
+
+} // end namespace llvm
+
+#endif // LLVM_MC_MCSYMBOLXCOFF_H
diff --git a/include/llvm/MC/MCTargetOptions.h b/include/llvm/MC/MCTargetOptions.h
index f5d330fbeb22..f184620ff047 100644
--- a/include/llvm/MC/MCTargetOptions.h
+++ b/include/llvm/MC/MCTargetOptions.h
@@ -1,9 +1,8 @@
//===- MCTargetOptions.h - MC Target Options --------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -16,18 +15,18 @@
namespace llvm {
enum class ExceptionHandling {
- None, /// No exception support
- DwarfCFI, /// DWARF-like instruction based exceptions
- SjLj, /// setjmp/longjmp based exceptions
- ARM, /// ARM EHABI
- WinEH, /// Windows Exception Handling
- Wasm, /// WebAssembly Exception Handling
+ None, ///< No exception support
+ DwarfCFI, ///< DWARF-like instruction based exceptions
+ SjLj, ///< setjmp/longjmp based exceptions
+ ARM, ///< ARM EHABI
+ WinEH, ///< Windows Exception Handling
+ Wasm, ///< WebAssembly Exception Handling
};
enum class DebugCompressionType {
- None, /// No compression
- GNU, /// zlib-gnu style compression
- Z, /// zlib style complession
+ None, ///< No compression
+ GNU, ///< zlib-gnu style compression
+ Z, ///< zlib style complession
};
class StringRef;
@@ -39,9 +38,6 @@ public:
AsmInstrumentationAddress
};
- /// Enables AddressSanitizer instrumentation at machine level.
- bool SanitizeAddress : 1;
-
bool MCRelaxAll : 1;
bool MCNoExecStack : 1;
bool MCFatalWarnings : 1;
diff --git a/include/llvm/MC/MCTargetOptionsCommandFlags.inc b/include/llvm/MC/MCTargetOptionsCommandFlags.inc
index 5172fa44511f..9f1177f470b9 100644
--- a/include/llvm/MC/MCTargetOptionsCommandFlags.inc
+++ b/include/llvm/MC/MCTargetOptionsCommandFlags.inc
@@ -1,9 +1,8 @@
//===-- MCTargetOptionsCommandFlags.h --------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -19,15 +18,6 @@
#include "llvm/Support/CommandLine.h"
using namespace llvm;
-static cl::opt<MCTargetOptions::AsmInstrumentation> AsmInstrumentation(
- "asm-instrumentation", cl::desc("Instrumentation of inline assembly and "
- "assembly source files"),
- cl::init(MCTargetOptions::AsmInstrumentationNone),
- cl::values(clEnumValN(MCTargetOptions::AsmInstrumentationNone, "none",
- "no instrumentation at all"),
- clEnumValN(MCTargetOptions::AsmInstrumentationAddress, "address",
- "instrument instructions with memory arguments")));
-
static cl::opt<bool> RelaxAll("mc-relax-all",
cl::desc("When used with filetype=obj, "
"relax all fixups in the emitted object file"));
@@ -63,8 +53,6 @@ ABIName("target-abi", cl::Hidden,
static MCTargetOptions InitMCTargetOptionsFromFlags() {
MCTargetOptions Options;
- Options.SanitizeAddress =
- (AsmInstrumentation == MCTargetOptions::AsmInstrumentationAddress);
Options.MCRelaxAll = RelaxAll;
Options.MCIncrementalLinkerCompatible = IncrementalLinkerCompatible;
Options.MCPIECopyRelocations = PIECopyRelocations;
diff --git a/include/llvm/MC/MCValue.h b/include/llvm/MC/MCValue.h
index 11f5082ed3f4..0be7ce7055c5 100644
--- a/include/llvm/MC/MCValue.h
+++ b/include/llvm/MC/MCValue.h
@@ -1,9 +1,8 @@
//===-- llvm/MC/MCValue.h - MCValue class -----------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/MC/MCWasmObjectWriter.h b/include/llvm/MC/MCWasmObjectWriter.h
index 6b788cfe96b9..4adbca28f116 100644
--- a/include/llvm/MC/MCWasmObjectWriter.h
+++ b/include/llvm/MC/MCWasmObjectWriter.h
@@ -1,9 +1,8 @@
//===-- llvm/MC/MCWasmObjectWriter.h - Wasm Object Writer -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/MC/MCWasmStreamer.h b/include/llvm/MC/MCWasmStreamer.h
index 01e6a4379287..2d7f2b9975c9 100644
--- a/include/llvm/MC/MCWasmStreamer.h
+++ b/include/llvm/MC/MCWasmStreamer.h
@@ -1,9 +1,8 @@
//===- MCWasmStreamer.h - MCStreamer Wasm Object File Interface -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/MC/MCWin64EH.h b/include/llvm/MC/MCWin64EH.h
index 1a9f6f403d7c..60ec06e61b7c 100644
--- a/include/llvm/MC/MCWin64EH.h
+++ b/include/llvm/MC/MCWin64EH.h
@@ -1,9 +1,8 @@
//===- MCWin64EH.h - Machine Code Win64 EH support --------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/MC/MCWinCOFFObjectWriter.h b/include/llvm/MC/MCWinCOFFObjectWriter.h
index c1d35ea1f6ba..3fe124fd7f1c 100644
--- a/include/llvm/MC/MCWinCOFFObjectWriter.h
+++ b/include/llvm/MC/MCWinCOFFObjectWriter.h
@@ -1,9 +1,8 @@
//===- llvm/MC/MCWinCOFFObjectWriter.h - Win COFF Object Writer -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/MC/MCWinCOFFStreamer.h b/include/llvm/MC/MCWinCOFFStreamer.h
index 0049d04b4b3f..c1c1ec56cb48 100644
--- a/include/llvm/MC/MCWinCOFFStreamer.h
+++ b/include/llvm/MC/MCWinCOFFStreamer.h
@@ -1,9 +1,8 @@
//===- MCWinCOFFStreamer.h - COFF Object File Interface ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/MC/MCWinEH.h b/include/llvm/MC/MCWinEH.h
index 98ef0367a11d..b1c28c0ecae7 100644
--- a/include/llvm/MC/MCWinEH.h
+++ b/include/llvm/MC/MCWinEH.h
@@ -1,9 +1,8 @@
//===- MCWinEH.h - Windows Unwinding Support --------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/MC/MCXCOFFObjectWriter.h b/include/llvm/MC/MCXCOFFObjectWriter.h
new file mode 100644
index 000000000000..fe4087f70614
--- /dev/null
+++ b/include/llvm/MC/MCXCOFFObjectWriter.h
@@ -0,0 +1,41 @@
+//===-- llvm/MC/MCXCOFFObjectWriter.h - XCOFF Object Writer ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCXCOFFOBJECTWRITER_H
+#define LLVM_MC_MCXCOFFOBJECTWRITER_H
+
+#include "llvm/MC/MCObjectWriter.h"
+
+namespace llvm {
+
+class raw_pwrite_stream;
+
+class MCXCOFFObjectTargetWriter : public MCObjectTargetWriter {
+protected:
+ MCXCOFFObjectTargetWriter(bool Is64Bit);
+
+public:
+ ~MCXCOFFObjectTargetWriter() override;
+
+ Triple::ObjectFormatType getFormat() const override { return Triple::XCOFF; }
+ static bool classof(const MCObjectTargetWriter *W) {
+ return W->getFormat() == Triple::XCOFF;
+ }
+ bool is64Bit() const { return Is64Bit; }
+
+private:
+ bool Is64Bit;
+};
+
+std::unique_ptr<MCObjectWriter>
+createXCOFFObjectWriter(std::unique_ptr<MCXCOFFObjectTargetWriter> MOTW,
+ raw_pwrite_stream &OS);
+
+} // end namespace llvm
+
+#endif // LLVM_MC_MCXCOFFOBJECTWRITER_H
diff --git a/include/llvm/MC/MCXCOFFStreamer.h b/include/llvm/MC/MCXCOFFStreamer.h
new file mode 100644
index 000000000000..159ae4818749
--- /dev/null
+++ b/include/llvm/MC/MCXCOFFStreamer.h
@@ -0,0 +1,33 @@
+//===- MCXCOFFObjectStreamer.h - MCStreamer XCOFF Object File Interface ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCXCOFFSTREAMER_H
+#define LLVM_MC_MCXCOFFSTREAMER_H
+
+#include "llvm/MC/MCObjectStreamer.h"
+
+namespace llvm {
+
+class MCXCOFFStreamer : public MCObjectStreamer {
+public:
+ MCXCOFFStreamer(MCContext &Context, std::unique_ptr<MCAsmBackend> MAB,
+ std::unique_ptr<MCObjectWriter> OW,
+ std::unique_ptr<MCCodeEmitter> Emitter);
+
+ bool EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) override;
+ void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment) override;
+ void EmitZerofill(MCSection *Section, MCSymbol *Symbol = nullptr,
+ uint64_t Size = 0, unsigned ByteAlignment = 0,
+ SMLoc Loc = SMLoc()) override;
+ void EmitInstToData(const MCInst &Inst, const MCSubtargetInfo &) override;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_MC_MCXCOFFSTREAMER_H
diff --git a/include/llvm/MC/MachineLocation.h b/include/llvm/MC/MachineLocation.h
index 91ed661ebeab..5872540e6104 100644
--- a/include/llvm/MC/MachineLocation.h
+++ b/include/llvm/MC/MachineLocation.h
@@ -1,9 +1,8 @@
//===- llvm/MC/MachineLocation.h --------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// The MachineLocation class is used to represent a simple location in a machine
diff --git a/include/llvm/MC/SectionKind.h b/include/llvm/MC/SectionKind.h
index 66eb9ec56d14..0342c4cfbbde 100644
--- a/include/llvm/MC/SectionKind.h
+++ b/include/llvm/MC/SectionKind.h
@@ -1,9 +1,8 @@
//===-- llvm/MC/SectionKind.h - Classification of sections ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/MC/StringTableBuilder.h b/include/llvm/MC/StringTableBuilder.h
index 265260fcee4d..c83eca4e512d 100644
--- a/include/llvm/MC/StringTableBuilder.h
+++ b/include/llvm/MC/StringTableBuilder.h
@@ -1,9 +1,8 @@
//===- StringTableBuilder.h - String table building utility -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/MC/SubtargetFeature.h b/include/llvm/MC/SubtargetFeature.h
index 76c7dd560800..fc9565ceafad 100644
--- a/include/llvm/MC/SubtargetFeature.h
+++ b/include/llvm/MC/SubtargetFeature.h
@@ -1,9 +1,8 @@
//===- llvm/MC/SubtargetFeature.h - CPU characteristics ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -19,6 +18,7 @@
#define LLVM_MC_SUBTARGETFEATURE_H
#include "llvm/ADT/StringRef.h"
+#include <array>
#include <bitset>
#include <initializer_list>
#include <string>
@@ -26,11 +26,12 @@
namespace llvm {
-template <typename T> class ArrayRef;
class raw_ostream;
class Triple;
-const unsigned MAX_SUBTARGET_FEATURES = 192;
+const unsigned MAX_SUBTARGET_WORDS = 3;
+const unsigned MAX_SUBTARGET_FEATURES = MAX_SUBTARGET_WORDS * 64;
+
/// Container class for subtarget features.
/// This is convenient because std::bitset does not have a constructor
/// with an initializer list of set bits.
@@ -45,38 +46,34 @@ public:
for (auto I : Init)
set(I);
}
-};
-
-//===----------------------------------------------------------------------===//
-
-/// Used to provide key value pairs for feature and CPU bit flags.
-struct SubtargetFeatureKV {
- const char *Key; ///< K-V key string
- const char *Desc; ///< Help descriptor
- FeatureBitset Value; ///< K-V integer value
- FeatureBitset Implies; ///< K-V bit mask
- /// Compare routine for std::lower_bound
- bool operator<(StringRef S) const {
- return StringRef(Key) < S;
- }
-
- /// Compare routine for std::is_sorted.
- bool operator<(const SubtargetFeatureKV &Other) const {
- return StringRef(Key) < StringRef(Other.Key);
+ bool operator < (const FeatureBitset &Other) const {
+ for (unsigned I = 0, E = size(); I != E; ++I) {
+ bool LHS = test(I), RHS = Other.test(I);
+ if (LHS != RHS)
+ return LHS < RHS;
+ }
+ return false;
}
};
-//===----------------------------------------------------------------------===//
+/// Class used to store the subtarget bits in the tables created by tablegen.
+/// The std::initializer_list constructor of FeatureBitset can't be done at
+/// compile time and requires a static constructor to run at startup.
+class FeatureBitArray {
+ std::array<uint64_t, MAX_SUBTARGET_WORDS> Bits;
+
+public:
+ constexpr FeatureBitArray(const std::array<uint64_t, MAX_SUBTARGET_WORDS> &B)
+ : Bits(B) {}
-/// Used to provide key value pairs for CPU and arbitrary pointers.
-struct SubtargetInfoKV {
- const char *Key; ///< K-V key string
- const void *Value; ///< K-V pointer value
+ FeatureBitset getAsBitset() const {
+ FeatureBitset Result;
- /// Compare routine for std::lower_bound
- bool operator<(StringRef S) const {
- return StringRef(Key) < S;
+ for (unsigned i = 0, e = Bits.size(); i != e; ++i)
+ Result |= FeatureBitset(Bits[i]) << (64 * i);
+
+ return Result;
}
};
@@ -102,19 +99,6 @@ public:
/// Adds Features.
void AddFeature(StringRef String, bool Enable = true);
- /// Toggles a feature and update the feature bits.
- static void ToggleFeature(FeatureBitset &Bits, StringRef String,
- ArrayRef<SubtargetFeatureKV> FeatureTable);
-
- /// Applies the feature flag and update the feature bits.
- static void ApplyFeatureFlag(FeatureBitset &Bits, StringRef Feature,
- ArrayRef<SubtargetFeatureKV> FeatureTable);
-
- /// Returns feature bits of a CPU.
- FeatureBitset getFeatureBits(StringRef CPU,
- ArrayRef<SubtargetFeatureKV> CPUTable,
- ArrayRef<SubtargetFeatureKV> FeatureTable);
-
/// Returns the vector of individual subtarget features.
const std::vector<std::string> &getFeatures() const { return Features; }
@@ -126,6 +110,32 @@ public:
/// Adds the default features for the specified target triple.
void getDefaultSubtargetFeatures(const Triple& Triple);
+
+ /// Determine if a feature has a flag; '+' or '-'
+ static bool hasFlag(StringRef Feature) {
+ assert(!Feature.empty() && "Empty string");
+ // Get first character
+ char Ch = Feature[0];
+ // Check if first character is '+' or '-' flag
+ return Ch == '+' || Ch =='-';
+ }
+
+ /// Return string stripped of flag.
+ static std::string StripFlag(StringRef Feature) {
+ return hasFlag(Feature) ? Feature.substr(1) : Feature;
+ }
+
+ /// Return true if enable flag; '+'.
+ static inline bool isEnabled(StringRef Feature) {
+ assert(!Feature.empty() && "Empty string");
+ // Get first character
+ char Ch = Feature[0];
+ // Check if first character is '+' for enabled
+ return Ch == '+';
+ }
+
+ /// Splits a string of comma separated items in to a vector of strings.
+ static void Split(std::vector<std::string> &V, StringRef S);
};
} // end namespace llvm
diff --git a/include/llvm/MCA/Context.h b/include/llvm/MCA/Context.h
index 6b2bee0fdc42..503d780d4947 100644
--- a/include/llvm/MCA/Context.h
+++ b/include/llvm/MCA/Context.h
@@ -1,9 +1,8 @@
//===---------------------------- Context.h ---------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -32,15 +31,21 @@ namespace mca {
/// This is a convenience struct to hold the parameters necessary for creating
/// the pre-built "default" out-of-order pipeline.
struct PipelineOptions {
- PipelineOptions(unsigned DW, unsigned RFS, unsigned LQS, unsigned SQS,
- bool NoAlias)
- : DispatchWidth(DW), RegisterFileSize(RFS), LoadQueueSize(LQS),
- StoreQueueSize(SQS), AssumeNoAlias(NoAlias) {}
+ PipelineOptions(unsigned UOPQSize, unsigned DecThr, unsigned DW, unsigned RFS,
+ unsigned LQS, unsigned SQS, bool NoAlias,
+ bool ShouldEnableBottleneckAnalysis = false)
+ : MicroOpQueueSize(UOPQSize), DecodersThroughput(DecThr),
+ DispatchWidth(DW), RegisterFileSize(RFS), LoadQueueSize(LQS),
+ StoreQueueSize(SQS), AssumeNoAlias(NoAlias),
+ EnableBottleneckAnalysis(ShouldEnableBottleneckAnalysis) {}
+ unsigned MicroOpQueueSize;
+ unsigned DecodersThroughput; // Instructions per cycle.
unsigned DispatchWidth;
unsigned RegisterFileSize;
unsigned LoadQueueSize;
unsigned StoreQueueSize;
bool AssumeNoAlias;
+ bool EnableBottleneckAnalysis;
};
class Context {
diff --git a/include/llvm/MCA/HWEventListener.h b/include/llvm/MCA/HWEventListener.h
index 3b32b2cd6577..e11d06de2b2e 100644
--- a/include/llvm/MCA/HWEventListener.h
+++ b/include/llvm/MCA/HWEventListener.h
@@ -1,9 +1,8 @@
//===----------------------- HWEventListener.h ------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -40,6 +39,7 @@ public:
// Events generated by the Retire Control Unit.
Retired,
// Events generated by the Scheduler.
+ Pending,
Ready,
Issued,
Executed,
@@ -126,6 +126,35 @@ public:
const InstRef &IR;
};
+// A HWPressureEvent describes an increase in backend pressure caused by
+// the presence of data dependencies or unavailability of pipeline resources.
+class HWPressureEvent {
+public:
+ enum GenericReason {
+ INVALID = 0,
+ // Scheduler was unable to issue all the ready instructions because some
+ // pipeline resources were unavailable.
+ RESOURCES,
+ // Instructions could not be issued because of register data dependencies.
+ REGISTER_DEPS,
+ // Instructions could not be issued because of memory dependencies.
+ MEMORY_DEPS
+ };
+
+ HWPressureEvent(GenericReason reason, ArrayRef<InstRef> Insts,
+ uint64_t Mask = 0)
+ : Reason(reason), AffectedInstructions(Insts), ResourceMask(Mask) {}
+
+ // Reason for this increase in backend pressure.
+ GenericReason Reason;
+
+ // Instructions affected (i.e. delayed) by this increase in backend pressure.
+ ArrayRef<InstRef> AffectedInstructions;
+
+ // A mask of unavailable processor resources.
+ const uint64_t ResourceMask;
+};
+
class HWEventListener {
public:
// Generic events generated by the pipeline.
@@ -134,6 +163,7 @@ public:
virtual void onEvent(const HWInstructionEvent &Event) {}
virtual void onEvent(const HWStallEvent &Event) {}
+ virtual void onEvent(const HWPressureEvent &Event) {}
using ResourceRef = std::pair<uint64_t, uint64_t>;
virtual void onResourceAvailable(const ResourceRef &RRef) {}
diff --git a/include/llvm/MCA/HardwareUnits/HardwareUnit.h b/include/llvm/MCA/HardwareUnits/HardwareUnit.h
index 104a2009f219..f6e178bcff10 100644
--- a/include/llvm/MCA/HardwareUnits/HardwareUnit.h
+++ b/include/llvm/MCA/HardwareUnits/HardwareUnit.h
@@ -1,9 +1,8 @@
//===-------------------------- HardwareUnit.h ------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/include/llvm/MCA/HardwareUnits/LSUnit.h b/include/llvm/MCA/HardwareUnits/LSUnit.h
index e217fc50f780..ae9a49c64855 100644
--- a/include/llvm/MCA/HardwareUnits/LSUnit.h
+++ b/include/llvm/MCA/HardwareUnits/LSUnit.h
@@ -1,9 +1,8 @@
//===------------------------- LSUnit.h --------------------------*- C++-*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -16,21 +15,298 @@
#ifndef LLVM_MCA_LSUNIT_H
#define LLVM_MCA_LSUNIT_H
-#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/MC/MCSchedule.h"
#include "llvm/MCA/HardwareUnits/HardwareUnit.h"
+#include "llvm/MCA/Instruction.h"
namespace llvm {
namespace mca {
-class InstRef;
class Scheduler;
-/// A Load/Store Unit implementing a load and store queues.
+/// A node of a memory dependency graph. A MemoryGroup describes a set of
+/// instructions with same memory dependencies.
///
-/// This class implements a load queue and a store queue to emulate the
-/// out-of-order execution of memory operations.
-/// Each load (or store) consumes an entry in the load (or store) queue.
+/// By construction, instructions of a MemoryGroup don't depend on each other.
+/// At dispatch stage, instructions are mapped by the LSUnit to MemoryGroups.
+/// A Memory group identifier is then stored as a "token" in field
+/// Instruction::LSUTokenID of each dispatched instructions. That token is used
+/// internally by the LSUnit to track memory dependencies.
+class MemoryGroup {
+ unsigned NumPredecessors;
+ unsigned NumExecutingPredecessors;
+ unsigned NumExecutedPredecessors;
+
+ unsigned NumInstructions;
+ unsigned NumExecuting;
+ unsigned NumExecuted;
+ SmallVector<MemoryGroup *, 4> Succ;
+
+ CriticalDependency CriticalPredecessor;
+ InstRef CriticalMemoryInstruction;
+
+ MemoryGroup(const MemoryGroup &) = delete;
+ MemoryGroup &operator=(const MemoryGroup &) = delete;
+
+public:
+ MemoryGroup()
+ : NumPredecessors(0), NumExecutingPredecessors(0),
+ NumExecutedPredecessors(0), NumInstructions(0), NumExecuting(0),
+ NumExecuted(0), CriticalPredecessor(), CriticalMemoryInstruction() {}
+ MemoryGroup(MemoryGroup &&) = default;
+
+ ArrayRef<MemoryGroup *> getSuccessors() const { return Succ; }
+ unsigned getNumSuccessors() const { return Succ.size(); }
+ unsigned getNumPredecessors() const { return NumPredecessors; }
+ unsigned getNumExecutingPredecessors() const {
+ return NumExecutingPredecessors;
+ }
+ unsigned getNumExecutedPredecessors() const {
+ return NumExecutedPredecessors;
+ }
+ unsigned getNumInstructions() const { return NumInstructions; }
+ unsigned getNumExecuting() const { return NumExecuting; }
+ unsigned getNumExecuted() const { return NumExecuted; }
+
+ const InstRef &getCriticalMemoryInstruction() const {
+ return CriticalMemoryInstruction;
+ }
+ const CriticalDependency &getCriticalPredecessor() const {
+ return CriticalPredecessor;
+ }
+
+ void addSuccessor(MemoryGroup *Group) {
+ Group->NumPredecessors++;
+ assert(!isExecuted() && "Should have been removed!");
+ if (isExecuting())
+ Group->onGroupIssued(CriticalMemoryInstruction);
+ Succ.emplace_back(Group);
+ }
+
+ bool isWaiting() const {
+ return NumPredecessors >
+ (NumExecutingPredecessors + NumExecutedPredecessors);
+ }
+ bool isPending() const {
+ return NumExecutingPredecessors &&
+ ((NumExecutedPredecessors + NumExecutingPredecessors) ==
+ NumPredecessors);
+ }
+ bool isReady() const { return NumExecutedPredecessors == NumPredecessors; }
+ bool isExecuting() const {
+ return NumExecuting && (NumExecuting == (NumInstructions - NumExecuted));
+ }
+ bool isExecuted() const { return NumInstructions == NumExecuted; }
+
+ void onGroupIssued(const InstRef &IR) {
+ assert(!isReady() && "Unexpected group-start event!");
+ NumExecutingPredecessors++;
+
+ unsigned Cycles = IR.getInstruction()->getCyclesLeft();
+ if (CriticalPredecessor.Cycles < Cycles) {
+ CriticalPredecessor.IID = IR.getSourceIndex();
+ CriticalPredecessor.Cycles = Cycles;
+ }
+ }
+
+ void onGroupExecuted() {
+ assert(!isReady() && "Inconsistent state found!");
+ NumExecutingPredecessors--;
+ NumExecutedPredecessors++;
+ }
+
+ void onInstructionIssued(const InstRef &IR) {
+ assert(!isExecuting() && "Invalid internal state!");
+ ++NumExecuting;
+
+ // update the CriticalMemDep.
+ const Instruction &IS = *IR.getInstruction();
+ if ((bool)CriticalMemoryInstruction) {
+ const Instruction &OtherIS = *CriticalMemoryInstruction.getInstruction();
+ if (OtherIS.getCyclesLeft() < IS.getCyclesLeft())
+ CriticalMemoryInstruction = IR;
+ } else {
+ CriticalMemoryInstruction = IR;
+ }
+
+ if (!isExecuting())
+ return;
+
+ // Notify successors that this group started execution.
+ for (MemoryGroup *MG : Succ)
+ MG->onGroupIssued(CriticalMemoryInstruction);
+ }
+
+ void onInstructionExecuted() {
+ assert(isReady() && !isExecuted() && "Invalid internal state!");
+ --NumExecuting;
+ ++NumExecuted;
+
+ if (!isExecuted())
+ return;
+
+ // Notify successors that this group has finished execution.
+ for (MemoryGroup *MG : Succ)
+ MG->onGroupExecuted();
+ }
+
+ void addInstruction() {
+ assert(!getNumSuccessors() && "Cannot add instructions to this group!");
+ ++NumInstructions;
+ }
+
+ void cycleEvent() {
+ if (isWaiting() && CriticalPredecessor.Cycles)
+ CriticalPredecessor.Cycles--;
+ }
+};
+
+/// Abstract base interface for LS (load/store) units in llvm-mca.
+class LSUnitBase : public HardwareUnit {
+ /// Load queue size.
+ ///
+ /// A value of zero for this field means that the load queue is unbounded.
+ /// Processor models can declare the size of a load queue via tablegen (see
+ /// the definition of tablegen class LoadQueue in
+ /// llvm/Target/TargetSchedule.td).
+ unsigned LQSize;
+
+ /// Load queue size.
+ ///
+ /// A value of zero for this field means that the store queue is unbounded.
+ /// Processor models can declare the size of a store queue via tablegen (see
+ /// the definition of tablegen class StoreQueue in
+ /// llvm/Target/TargetSchedule.td).
+ unsigned SQSize;
+
+ unsigned UsedLQEntries;
+ unsigned UsedSQEntries;
+
+ /// True if loads don't alias with stores.
+ ///
+ /// By default, the LS unit assumes that loads and stores don't alias with
+ /// eachother. If this field is set to false, then loads are always assumed to
+ /// alias with stores.
+ const bool NoAlias;
+
+ /// Used to map group identifiers to MemoryGroups.
+ DenseMap<unsigned, std::unique_ptr<MemoryGroup>> Groups;
+ unsigned NextGroupID;
+
+public:
+ LSUnitBase(const MCSchedModel &SM, unsigned LoadQueueSize,
+ unsigned StoreQueueSize, bool AssumeNoAlias);
+
+ virtual ~LSUnitBase();
+
+ /// Returns the total number of entries in the load queue.
+ unsigned getLoadQueueSize() const { return LQSize; }
+
+ /// Returns the total number of entries in the store queue.
+ unsigned getStoreQueueSize() const { return SQSize; }
+
+ unsigned getUsedLQEntries() const { return UsedLQEntries; }
+ unsigned getUsedSQEntries() const { return UsedSQEntries; }
+ unsigned assignLQSlot() { return UsedLQEntries++; }
+ unsigned assignSQSlot() { return UsedSQEntries++; }
+
+ bool assumeNoAlias() const { return NoAlias; }
+
+ enum Status {
+ LSU_AVAILABLE = 0,
+ LSU_LQUEUE_FULL, // Load Queue unavailable
+ LSU_SQUEUE_FULL // Store Queue unavailable
+ };
+
+ /// This method checks the availability of the load/store buffers.
+ ///
+ /// Returns LSU_AVAILABLE if there are enough load/store queue entries to
+ /// accomodate instruction IR. By default, LSU_AVAILABLE is returned if IR is
+ /// not a memory operation.
+ virtual Status isAvailable(const InstRef &IR) const = 0;
+
+ /// Allocates LS resources for instruction IR.
+ ///
+ /// This method assumes that a previous call to `isAvailable(IR)` succeeded
+ /// with a LSUnitBase::Status value of LSU_AVAILABLE.
+ /// Returns the GroupID associated with this instruction. That value will be
+ /// used to set the LSUTokenID field in class Instruction.
+ virtual unsigned dispatch(const InstRef &IR) = 0;
+
+ bool isSQEmpty() const { return !UsedSQEntries; }
+ bool isLQEmpty() const { return !UsedLQEntries; }
+ bool isSQFull() const { return SQSize && SQSize == UsedSQEntries; }
+ bool isLQFull() const { return LQSize && LQSize == UsedLQEntries; }
+
+ bool isValidGroupID(unsigned Index) const {
+ return Index && (Groups.find(Index) != Groups.end());
+ }
+
+ /// Check if a peviously dispatched instruction IR is now ready for execution.
+ bool isReady(const InstRef &IR) const {
+ unsigned GroupID = IR.getInstruction()->getLSUTokenID();
+ const MemoryGroup &Group = getGroup(GroupID);
+ return Group.isReady();
+ }
+
+ /// Check if instruction IR only depends on memory instructions that are
+ /// currently executing.
+ bool isPending(const InstRef &IR) const {
+ unsigned GroupID = IR.getInstruction()->getLSUTokenID();
+ const MemoryGroup &Group = getGroup(GroupID);
+ return Group.isPending();
+ }
+
+ /// Check if instruction IR is still waiting on memory operations, and the
+ /// wait time is still unknown.
+ bool isWaiting(const InstRef &IR) const {
+ unsigned GroupID = IR.getInstruction()->getLSUTokenID();
+ const MemoryGroup &Group = getGroup(GroupID);
+ return Group.isWaiting();
+ }
+
+ bool hasDependentUsers(const InstRef &IR) const {
+ unsigned GroupID = IR.getInstruction()->getLSUTokenID();
+ const MemoryGroup &Group = getGroup(GroupID);
+ return !Group.isExecuted() && Group.getNumSuccessors();
+ }
+
+ const MemoryGroup &getGroup(unsigned Index) const {
+ assert(isValidGroupID(Index) && "Group doesn't exist!");
+ return *Groups.find(Index)->second;
+ }
+
+ MemoryGroup &getGroup(unsigned Index) {
+ assert(isValidGroupID(Index) && "Group doesn't exist!");
+ return *Groups.find(Index)->second;
+ }
+
+ unsigned createMemoryGroup() {
+ Groups.insert(
+ std::make_pair(NextGroupID, llvm::make_unique<MemoryGroup>()));
+ return NextGroupID++;
+ }
+
+ // Instruction executed event handlers.
+ virtual void onInstructionExecuted(const InstRef &IR);
+
+ virtual void onInstructionIssued(const InstRef &IR) {
+ unsigned GroupID = IR.getInstruction()->getLSUTokenID();
+ Groups[GroupID]->onInstructionIssued(IR);
+ }
+
+ virtual void cycleEvent();
+
+#ifndef NDEBUG
+ void dump() const;
+#endif
+};
+
+/// Default Load/Store Unit (LS Unit) for simulated processors.
+///
+/// Each load (or store) consumes one entry in the load (or store) queue.
///
/// Rules are:
/// 1) A younger load is allowed to pass an older load only if there are no
@@ -89,26 +365,7 @@ class Scheduler;
/// A load/store barrier is "executed" when it becomes the oldest entry in
/// the load/store queue(s). That also means, all the older loads/stores have
/// already been executed.
-class LSUnit : public HardwareUnit {
- // Load queue size.
- // LQ_Size == 0 means that there are infinite slots in the load queue.
- unsigned LQ_Size;
-
- // Store queue size.
- // SQ_Size == 0 means that there are infinite slots in the store queue.
- unsigned SQ_Size;
-
- // If true, loads will never alias with stores. This is the default.
- bool NoAlias;
-
- // When a `MayLoad` instruction is dispatched to the schedulers for execution,
- // the LSUnit reserves an entry in the `LoadQueue` for it.
- //
- // LoadQueue keeps track of all the loads that are in-flight. A load
- // instruction is eventually removed from the LoadQueue when it reaches
- // completion stage. That means, a load leaves the queue whe it is 'executed',
- // and its value can be forwarded on the data path to outside units.
- //
+class LSUnit : public LSUnitBase {
// This class doesn't know about the latency of a load instruction. So, it
// conservatively/pessimistically assumes that the latency of a load opcode
// matches the instruction latency.
@@ -139,66 +396,50 @@ class LSUnit : public HardwareUnit {
// alternative approaches that let instructions specify the number of
// load/store queue entries which they consume at dispatch stage (See
// PR39830).
- SmallSet<unsigned, 16> LoadQueue;
- SmallSet<unsigned, 16> StoreQueue;
-
- void assignLQSlot(unsigned Index);
- void assignSQSlot(unsigned Index);
- bool isReadyNoAlias(unsigned Index) const;
-
+ //
// An instruction that both 'mayStore' and 'HasUnmodeledSideEffects' is
// conservatively treated as a store barrier. It forces older store to be
// executed before newer stores are issued.
- SmallSet<unsigned, 8> StoreBarriers;
-
+ //
// An instruction that both 'MayLoad' and 'HasUnmodeledSideEffects' is
// conservatively treated as a load barrier. It forces older loads to execute
// before newer loads are issued.
- SmallSet<unsigned, 8> LoadBarriers;
-
- bool isSQEmpty() const { return StoreQueue.empty(); }
- bool isLQEmpty() const { return LoadQueue.empty(); }
- bool isSQFull() const { return SQ_Size != 0 && StoreQueue.size() == SQ_Size; }
- bool isLQFull() const { return LQ_Size != 0 && LoadQueue.size() == LQ_Size; }
+ unsigned CurrentLoadGroupID;
+ unsigned CurrentLoadBarrierGroupID;
+ unsigned CurrentStoreGroupID;
public:
- LSUnit(const MCSchedModel &SM, unsigned LQ = 0, unsigned SQ = 0,
- bool AssumeNoAlias = false);
+ LSUnit(const MCSchedModel &SM)
+ : LSUnit(SM, /* LQSize */ 0, /* SQSize */ 0, /* NoAlias */ false) {}
+ LSUnit(const MCSchedModel &SM, unsigned LQ, unsigned SQ)
+ : LSUnit(SM, LQ, SQ, /* NoAlias */ false) {}
+ LSUnit(const MCSchedModel &SM, unsigned LQ, unsigned SQ, bool AssumeNoAlias)
+ : LSUnitBase(SM, LQ, SQ, AssumeNoAlias), CurrentLoadGroupID(0),
+ CurrentLoadBarrierGroupID(0), CurrentStoreGroupID(0) {}
-#ifndef NDEBUG
- void dump() const;
-#endif
+ /// Returns LSU_AVAILABLE if there are enough load/store queue entries to
+ /// accomodate instruction IR.
+ Status isAvailable(const InstRef &IR) const override;
- enum Status { LSU_AVAILABLE = 0, LSU_LQUEUE_FULL, LSU_SQUEUE_FULL };
+ /// Allocates LS resources for instruction IR.
+ ///
+ /// This method assumes that a previous call to `isAvailable(IR)` succeeded
+ /// returning LSU_AVAILABLE.
+ ///
+ /// Rules are:
+ /// By default, rules are:
+ /// 1. A store may not pass a previous store.
+ /// 2. A load may not pass a previous store unless flag 'NoAlias' is set.
+ /// 3. A load may pass a previous load.
+ /// 4. A store may not pass a previous load (regardless of flag 'NoAlias').
+ /// 5. A load has to wait until an older load barrier is fully executed.
+ /// 6. A store has to wait until an older store barrier is fully executed.
+ unsigned dispatch(const InstRef &IR) override;
- // Returns LSU_AVAILABLE if there are enough load/store queue entries to serve
- // IR. It also returns LSU_AVAILABLE if IR is not a memory operation.
- Status isAvailable(const InstRef &IR) const;
-
- // Allocates load/store queue resources for IR.
- //
- // This method assumes that a previous call to `isAvailable(IR)` returned
- // LSU_AVAILABLE, and that IR is a memory operation.
- void dispatch(const InstRef &IR);
-
- // By default, rules are:
- // 1. A store may not pass a previous store.
- // 2. A load may not pass a previous store unless flag 'NoAlias' is set.
- // 3. A load may pass a previous load.
- // 4. A store may not pass a previous load (regardless of flag 'NoAlias').
- // 5. A load has to wait until an older load barrier is fully executed.
- // 6. A store has to wait until an older store barrier is fully executed.
- virtual bool isReady(const InstRef &IR) const;
-
- // Load and store instructions are tracked by their corresponding queues from
- // dispatch until the "instruction executed" event.
- // Only when a load instruction reaches the 'Executed' stage, its value
- // becomes available to the users. At that point, the load no longer needs to
- // be tracked by the load queue.
// FIXME: For simplicity, we optimistically assume a similar behavior for
// store instructions. In practice, store operations don't tend to leave the
// store queue until they reach the 'Retired' stage (See PR39830).
- void onInstructionExecuted(const InstRef &IR);
+ void onInstructionExecuted(const InstRef &IR) override;
};
} // namespace mca
diff --git a/include/llvm/MCA/HardwareUnits/RegisterFile.h b/include/llvm/MCA/HardwareUnits/RegisterFile.h
index c23ab0389234..36506327bd29 100644
--- a/include/llvm/MCA/HardwareUnits/RegisterFile.h
+++ b/include/llvm/MCA/HardwareUnits/RegisterFile.h
@@ -1,9 +1,8 @@
//===--------------------- RegisterFile.h -----------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -21,6 +20,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSchedule.h"
+#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MCA/HardwareUnits/HardwareUnit.h"
#include "llvm/Support/Error.h"
@@ -196,7 +196,7 @@ public:
// Collect writes that are in a data dependency with RS, and update RS
// internal state.
- void addRegisterRead(ReadState &RS, SmallVectorImpl<WriteRef> &Writes) const;
+ void addRegisterRead(ReadState &RS, const MCSubtargetInfo &STI) const;
// Removes write \param WS from the register mappings.
// Physical registers may be released to reflect this update.
diff --git a/include/llvm/MCA/HardwareUnits/ResourceManager.h b/include/llvm/MCA/HardwareUnits/ResourceManager.h
index 549a46c247fe..2f91185516fb 100644
--- a/include/llvm/MCA/HardwareUnits/ResourceManager.h
+++ b/include/llvm/MCA/HardwareUnits/ResourceManager.h
@@ -1,9 +1,8 @@
//===--------------------- ResourceManager.h --------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -335,13 +334,26 @@ class ResourceManager {
// Used to quickly identify groups that own a particular resource unit.
std::vector<uint64_t> Resource2Groups;
- // A table to map processor resource IDs to processor resource masks.
+ // A table that maps processor resource IDs to processor resource masks.
SmallVector<uint64_t, 8> ProcResID2Mask;
+ // A table that maps resource indices to actual processor resource IDs in the
+ // scheduling model.
+ SmallVector<unsigned, 8> ResIndex2ProcResID;
+
// Keeps track of which resources are busy, and how many cycles are left
// before those become usable again.
SmallDenseMap<ResourceRef, unsigned> BusyResources;
+ // Set of processor resource units available on the target.
+ uint64_t ProcResUnitMask;
+
+ // Set of processor resource units that are available during this cycle.
+ uint64_t AvailableProcResUnits;
+
+ // Set of processor resource groups that are currently reserved.
+ uint64_t ReservedResourceGroups;
+
// Returns the actual resource unit that will be used.
ResourceRef selectPipe(uint64_t ResourceID);
@@ -389,7 +401,14 @@ public:
// Release a previously reserved processor resource.
void releaseResource(uint64_t ResourceID);
- bool canBeIssued(const InstrDesc &Desc) const;
+ // Returns a zero mask if resources requested by Desc are all available during
+ // this cycle. It returns a non-zero mask value only if there are unavailable
+ // processor resources; each bit set in the mask represents a busy processor
+ // resource unit or a reserved processor resource group.
+ uint64_t checkAvailability(const InstrDesc &Desc) const;
+
+ uint64_t getProcResUnitMask() const { return ProcResUnitMask; }
+ uint64_t getAvailableProcResUnits() const { return AvailableProcResUnits; }
void issueInstruction(
const InstrDesc &Desc,
diff --git a/include/llvm/MCA/HardwareUnits/RetireControlUnit.h b/include/llvm/MCA/HardwareUnits/RetireControlUnit.h
index 71360e984ade..06290141739e 100644
--- a/include/llvm/MCA/HardwareUnits/RetireControlUnit.h
+++ b/include/llvm/MCA/HardwareUnits/RetireControlUnit.h
@@ -1,9 +1,8 @@
//===---------------------- RetireControlUnit.h -----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/include/llvm/MCA/HardwareUnits/Scheduler.h b/include/llvm/MCA/HardwareUnits/Scheduler.h
index 351ea4827df9..27beb842dfd2 100644
--- a/include/llvm/MCA/HardwareUnits/Scheduler.h
+++ b/include/llvm/MCA/HardwareUnits/Scheduler.h
@@ -1,9 +1,8 @@
//===--------------------- Scheduler.h ------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -68,22 +67,6 @@ public:
/// resources. This class is also responsible for tracking the progress of
/// instructions from the dispatch stage, until the write-back stage.
///
-/// An instruction dispatched to the Scheduler is initially placed into either
-/// the 'WaitSet' or the 'ReadySet' depending on the availability of the input
-/// operands.
-///
-/// An instruction is moved from the WaitSet to the ReadySet when register
-/// operands become available, and all memory dependencies are met.
-/// Instructions that are moved from the WaitSet to the ReadySet transition
-/// in state from 'IS_AVAILABLE' to 'IS_READY'.
-///
-/// On every cycle, the Scheduler checks if it can promote instructions from the
-/// WaitSet to the ReadySet.
-///
-/// An Instruction is moved from the ReadySet the `IssuedSet` when it is issued
-/// to a (one or more) pipeline(s). This event also causes an instruction state
-/// transition (i.e. from state IS_READY, to state IS_EXECUTING). An Instruction
-/// leaves the IssuedSet when it reaches the write-back stage.
class Scheduler : public HardwareUnit {
LSUnit &LSU;
@@ -93,10 +76,58 @@ class Scheduler : public HardwareUnit {
// Hardware resources that are managed by this scheduler.
std::unique_ptr<ResourceManager> Resources;
+ // Instructions dispatched to the Scheduler are internally classified based on
+ // the instruction stage (see Instruction::InstrStage).
+ //
+ // An Instruction dispatched to the Scheduler is added to the WaitSet if not
+ // all its register operands are available, and at least one latency is
+ // unknown. By construction, the WaitSet only contains instructions that are
+ // in the IS_DISPATCHED stage.
+ //
+ // An Instruction transitions from the WaitSet to the PendingSet if the
+ // instruction is not ready yet, but the latency of every register read is
+ // known. Instructions in the PendingSet can only be in the IS_PENDING or
+ // IS_READY stage. Only IS_READY instructions that are waiting on memory
+ // dependencies can be added to the PendingSet.
+ //
+ // Instructions in the PendingSet are immediately dominated only by
+ // instructions that have already been issued to the underlying pipelines. In
+ // the presence of bottlenecks caused by data dependencies, the PendingSet can
+ // be inspected to identify problematic data dependencies between
+ // instructions.
+ //
+ // An instruction is moved to the ReadySet when all register operands become
+ // available, and all memory dependencies are met. Instructions that are
+ // moved from the PendingSet to the ReadySet must transition to the 'IS_READY'
+ // stage.
+ //
+ // On every cycle, the Scheduler checks if it can promote instructions from the
+ // PendingSet to the ReadySet.
+ //
+ // An Instruction is moved from the ReadySet to the `IssuedSet` when it starts
+ // exection. This event also causes an instruction state transition (i.e. from
+ // state IS_READY, to state IS_EXECUTING). An Instruction leaves the IssuedSet
+ // only when it reaches the write-back stage.
std::vector<InstRef> WaitSet;
+ std::vector<InstRef> PendingSet;
std::vector<InstRef> ReadySet;
std::vector<InstRef> IssuedSet;
+ // A mask of busy resource units. It defaults to the empty set (i.e. a zero
+ // mask), and it is cleared at the beginning of every cycle.
+ // It is updated every time the scheduler fails to issue an instruction from
+ // the ready set due to unavailable pipeline resources.
+ // Each bit of the mask represents an unavailable resource.
+ uint64_t BusyResourceUnits;
+
+ // Counts the number of instructions in the pending set that were dispatched
+ // during this cycle.
+ unsigned NumDispatchedToThePendingSet;
+
+ // True if the previous pipeline Stage was unable to dispatch a full group of
+ // opcodes because scheduler buffers (or LS queues) were unavailable.
+ bool HadTokenStall;
+
/// Verify the given selection strategy and set the Strategy member
/// accordingly. If no strategy is provided, the DefaultSchedulerStrategy is
/// used.
@@ -112,9 +143,15 @@ class Scheduler : public HardwareUnit {
// vector 'Executed'.
void updateIssuedSet(SmallVectorImpl<InstRef> &Executed);
- // Try to promote instructions from WaitSet to ReadySet.
+ // Try to promote instructions from the PendingSet to the ReadySet.
// Add promoted instructions to the 'Ready' vector in input.
- void promoteToReadySet(SmallVectorImpl<InstRef> &Ready);
+ // Returns true if at least one instruction was promoted.
+ bool promoteToReadySet(SmallVectorImpl<InstRef> &Ready);
+
+ // Try to promote instructions from the WaitSet to the PendingSet.
+ // Add promoted instructions to the 'Pending' vector in input.
+ // Returns true if at least one instruction was promoted.
+ bool promoteToPendingSet(SmallVectorImpl<InstRef> &Pending);
public:
Scheduler(const MCSchedModel &Model, LSUnit &Lsu)
@@ -127,7 +164,8 @@ public:
Scheduler(std::unique_ptr<ResourceManager> RM, LSUnit &Lsu,
std::unique_ptr<SchedulerStrategy> SelectStrategy)
- : LSU(Lsu), Resources(std::move(RM)) {
+ : LSU(Lsu), Resources(std::move(RM)), BusyResourceUnits(0),
+ NumDispatchedToThePendingSet(0), HadTokenStall(false) {
initializeStrategy(std::move(SelectStrategy));
}
@@ -140,15 +178,12 @@ public:
SC_DISPATCH_GROUP_STALL,
};
- /// Check if the instruction in 'IR' can be dispatched and returns an answer
- /// in the form of a Status value.
+ /// Check if the instruction in 'IR' can be dispatched during this cycle.
+ /// Return SC_AVAILABLE if both scheduler and LS resources are available.
///
- /// The DispatchStage is responsible for querying the Scheduler before
- /// dispatching new instructions. This routine is used for performing such
- /// a query. If the instruction 'IR' can be dispatched, then true is
- /// returned, otherwise false is returned with Event set to the stall type.
- /// Internally, it also checks if the load/store unit is available.
- Status isAvailable(const InstRef &IR) const;
+ /// This method is also responsible for setting field HadTokenStall if
+ /// IR cannot be dispatched to the Scheduler due to unavailable resources.
+ Status isAvailable(const InstRef &IR);
/// Reserves buffer and LSUnit queue resources that are necessary to issue
/// this instruction.
@@ -156,11 +191,11 @@ public:
/// Returns true if instruction IR is ready to be issued to the underlying
/// pipelines. Note that this operation cannot fail; it assumes that a
/// previous call to method `isAvailable(IR)` returned `SC_AVAILABLE`.
- void dispatch(const InstRef &IR);
-
- /// Returns true if IR is ready to be executed by the underlying pipelines.
- /// This method assumes that IR has been previously dispatched.
- bool isReady(const InstRef &IR) const;
+ ///
+ /// If IR is a memory operation, then the Scheduler queries the LS unit to
+ /// obtain a LS token. An LS token is used internally to track memory
+ /// dependencies.
+ bool dispatch(InstRef &IR);
/// Issue an instruction and populates a vector of used pipeline resources,
/// and a vector of instructions that transitioned to the ready state as a
@@ -168,6 +203,7 @@ public:
void issueInstruction(
InstRef &IR,
SmallVectorImpl<std::pair<ResourceRef, ResourceCycles>> &Used,
+ SmallVectorImpl<InstRef> &Pending,
SmallVectorImpl<InstRef> &Ready);
/// Returns true if IR has to be issued immediately, or if IR is a zero
@@ -181,9 +217,15 @@ public:
/// have changed in state, and that are now available to new instructions.
/// Instructions executed are added to vector Executed, while vector Ready is
/// populated with instructions that have become ready in this new cycle.
+ /// Vector Pending is popluated by instructions that have transitioned through
+ /// the pending stat during this cycle. The Pending and Ready sets may not be
+ /// disjoint. An instruction is allowed to transition from the WAIT state to
+ /// the READY state (going through the PENDING state) within a single cycle.
+ /// That means, instructions may appear in both the Pending and Ready set.
void cycleEvent(SmallVectorImpl<ResourceRef> &Freed,
- SmallVectorImpl<InstRef> &Ready,
- SmallVectorImpl<InstRef> &Executed);
+ SmallVectorImpl<InstRef> &Executed,
+ SmallVectorImpl<InstRef> &Pending,
+ SmallVectorImpl<InstRef> &Ready);
/// Convert a resource mask into a valid llvm processor resource identifier.
unsigned getResourceID(uint64_t Mask) const {
@@ -195,6 +237,26 @@ public:
/// resources are not available.
InstRef select();
+ bool isReadySetEmpty() const { return ReadySet.empty(); }
+ bool isWaitSetEmpty() const { return WaitSet.empty(); }
+
+ /// This method is called by the ExecuteStage at the end of each cycle to
+ /// identify bottlenecks caused by data dependencies. Vector RegDeps is
+ /// populated by instructions that were not issued because of unsolved
+ /// register dependencies. Vector MemDeps is populated by instructions that
+ /// were not issued because of unsolved memory dependencies.
+ void analyzeDataDependencies(SmallVectorImpl<InstRef> &RegDeps,
+ SmallVectorImpl<InstRef> &MemDeps);
+
+ /// Returns a mask of busy resources, and populates vector Insts with
+ /// instructions that could not be issued to the underlying pipelines because
+ /// not all pipeline resources were available.
+ uint64_t analyzeResourcePressure(SmallVectorImpl<InstRef> &Insts);
+
+ // Returns true if the dispatch logic couldn't dispatch a full group due to
+ // unavailable scheduler and/or LS resources.
+ bool hadTokenStall() const { return HadTokenStall; }
+
#ifndef NDEBUG
// Update the ready queues.
void dump() const;
diff --git a/include/llvm/MCA/InstrBuilder.h b/include/llvm/MCA/InstrBuilder.h
index 5f998db5e4ce..690016354f7a 100644
--- a/include/llvm/MCA/InstrBuilder.h
+++ b/include/llvm/MCA/InstrBuilder.h
@@ -1,9 +1,8 @@
//===--------------------- InstrBuilder.h -----------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/include/llvm/MCA/Instruction.h b/include/llvm/MCA/Instruction.h
index b91610c64d85..d4d3f22797f7 100644
--- a/include/llvm/MCA/Instruction.h
+++ b/include/llvm/MCA/Instruction.h
@@ -1,9 +1,8 @@
//===--------------------- Instruction.h ------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -81,6 +80,15 @@ struct ReadDescriptor {
class ReadState;
+/// A critical data dependency descriptor.
+///
+/// Field RegID is set to the invalid register for memory dependencies.
+struct CriticalDependency {
+ unsigned IID;
+ unsigned RegID;
+ unsigned Cycles;
+};
+
/// Tracks uses of a register definition (e.g. register write).
///
/// Each implicit/explicit register write is associated with an instance of
@@ -124,9 +132,11 @@ class WriteState {
// A partial write that is in a false dependency with this write.
WriteState *PartialWrite;
-
unsigned DependentWriteCyclesLeft;
+ // Critical register dependency for this write.
+ CriticalDependency CRD;
+
// A list of dependent reads. Users is a set of dependent
// reads. A dependent read is added to the set only if CyclesLeft
// is "unknown". As soon as CyclesLeft is 'known', each user in the set
@@ -141,7 +151,7 @@ public:
: WD(&Desc), CyclesLeft(UNKNOWN_CYCLES), RegisterID(RegID), PRFID(0),
ClearsSuperRegs(clearsSuperRegs), WritesZero(writesZero),
IsEliminated(false), DependentWrite(nullptr), PartialWrite(nullptr),
- DependentWriteCyclesLeft(0) {}
+ DependentWriteCyclesLeft(0), CRD() {}
WriteState(const WriteState &Other) = default;
WriteState &operator=(const WriteState &Other) = default;
@@ -151,13 +161,21 @@ public:
unsigned getRegisterID() const { return RegisterID; }
unsigned getRegisterFileID() const { return PRFID; }
unsigned getLatency() const { return WD->Latency; }
-
- void addUser(ReadState *Use, int ReadAdvance);
- void addUser(WriteState *Use);
-
unsigned getDependentWriteCyclesLeft() const {
return DependentWriteCyclesLeft;
}
+ const WriteState *getDependentWrite() const { return DependentWrite; }
+ const CriticalDependency &getCriticalRegDep() const { return CRD; }
+
+ // This method adds Use to the set of data dependent reads. IID is the
+ // instruction identifier associated with this write. ReadAdvance is the
+ // number of cycles to subtract from the latency of this data dependency.
+ // Use is in a RAW dependency with this write.
+ void addUser(unsigned IID, ReadState *Use, int ReadAdvance);
+
+ // Use is a younger register write that is in a false dependency with this
+ // write. IID is the instruction identifier associated with this write.
+ void addUser(unsigned IID, WriteState *Use);
unsigned getNumUsers() const {
unsigned NumUsers = Users.size();
@@ -169,17 +187,20 @@ public:
bool clearsSuperRegisters() const { return ClearsSuperRegs; }
bool isWriteZero() const { return WritesZero; }
bool isEliminated() const { return IsEliminated; }
- bool isExecuted() const {
- return CyclesLeft != UNKNOWN_CYCLES && CyclesLeft <= 0;
+
+ bool isReady() const {
+ if (DependentWrite)
+ return false;
+ unsigned CyclesLeft = getDependentWriteCyclesLeft();
+ return !CyclesLeft || CyclesLeft < getLatency();
}
- const WriteState *getDependentWrite() const { return DependentWrite; }
- void setDependentWrite(WriteState *Other) { DependentWrite = Other; }
- void writeStartEvent(unsigned Cycles) {
- DependentWriteCyclesLeft = Cycles;
- DependentWrite = nullptr;
+ bool isExecuted() const {
+ return CyclesLeft != UNKNOWN_CYCLES && CyclesLeft <= 0;
}
+ void setDependentWrite(const WriteState *Other) { DependentWrite = Other; }
+ void writeStartEvent(unsigned IID, unsigned RegID, unsigned Cycles);
void setWriteZero() { WritesZero = true; }
void setEliminated() {
assert(Users.empty() && "Write is in an inconsistent state.");
@@ -191,7 +212,7 @@ public:
// On every cycle, update CyclesLeft and notify dependent users.
void cycleEvent();
- void onInstructionIssued();
+ void onInstructionIssued(unsigned IID);
#ifndef NDEBUG
void dump() const;
@@ -221,6 +242,8 @@ class ReadState {
// dependent writes (i.e. field DependentWrite) is zero, this value is
// propagated to field CyclesLeft.
unsigned TotalCycles;
+ // Longest register dependency.
+ CriticalDependency CRD;
// This field is set to true only if there are no dependent writes, and
// there are no `CyclesLeft' to wait.
bool IsReady;
@@ -232,14 +255,16 @@ class ReadState {
public:
ReadState(const ReadDescriptor &Desc, unsigned RegID)
: RD(&Desc), RegisterID(RegID), PRFID(0), DependentWrites(0),
- CyclesLeft(UNKNOWN_CYCLES), TotalCycles(0), IsReady(true),
+ CyclesLeft(UNKNOWN_CYCLES), TotalCycles(0), CRD(), IsReady(true),
IsZero(false), IndependentFromDef(false) {}
const ReadDescriptor &getDescriptor() const { return *RD; }
unsigned getSchedClass() const { return RD->SchedClassID; }
unsigned getRegisterID() const { return RegisterID; }
unsigned getRegisterFileID() const { return PRFID; }
+ const CriticalDependency &getCriticalRegDep() const { return CRD; }
+ bool isPending() const { return !IndependentFromDef && CyclesLeft > 0; }
bool isReady() const { return IsReady; }
bool isImplicitRead() const { return RD->isImplicitRead(); }
@@ -247,7 +272,7 @@ public:
void setIndependentFromDef() { IndependentFromDef = true; }
void cycleEvent();
- void writeStartEvent(unsigned Cycles);
+ void writeStartEvent(unsigned IID, unsigned RegID, unsigned Cycles);
void setDependentWrites(unsigned Writes) {
DependentWrites = Writes;
IsReady = !Writes;
@@ -330,9 +355,16 @@ struct InstrDesc {
// A list of buffered resources consumed by this instruction.
SmallVector<uint64_t, 4> Buffers;
+ unsigned UsedProcResUnits;
+ unsigned UsedProcResGroups;
+
unsigned MaxLatency;
// Number of MicroOps for this instruction.
unsigned NumMicroOps;
+ // SchedClassID used to construct this InstrDesc.
+ // This information is currently used by views to do fast queries on the
+ // subtarget when computing the reciprocal throughput.
+ unsigned SchedClassID;
bool MayLoad;
bool MayStore;
@@ -398,6 +430,7 @@ public:
// Returns true if this instruction is a candidate for move elimination.
bool isOptimizableMove() const { return IsOptimizableMove; }
void setOptimizableMove() { IsOptimizableMove = true; }
+ bool isMemOp() const { return Desc.MayLoad || Desc.MayStore; }
};
/// An instruction propagated through the simulated instruction pipeline.
@@ -406,12 +439,13 @@ public:
/// that are sent to the various components of the simulated hardware pipeline.
class Instruction : public InstructionBase {
enum InstrStage {
- IS_INVALID, // Instruction in an invalid state.
- IS_AVAILABLE, // Instruction dispatched but operands are not ready.
- IS_READY, // Instruction dispatched and operands ready.
- IS_EXECUTING, // Instruction issued.
- IS_EXECUTED, // Instruction executed. Values are written back.
- IS_RETIRED // Instruction retired.
+ IS_INVALID, // Instruction in an invalid state.
+ IS_DISPATCHED, // Instruction dispatched but operands are not ready.
+ IS_PENDING, // Instruction is not ready, but operand latency is known.
+ IS_READY, // Instruction dispatched and operands ready.
+ IS_EXECUTING, // Instruction issued.
+ IS_EXECUTED, // Instruction executed. Values are written back.
+ IS_RETIRED // Instruction retired.
};
// The current instruction stage.
@@ -424,12 +458,34 @@ class Instruction : public InstructionBase {
// Retire Unit token ID for this instruction.
unsigned RCUTokenID;
+ // LS token ID for this instruction.
+ // This field is set to the invalid null token if this is not a memory
+ // operation.
+ unsigned LSUTokenID;
+
+ // Critical register dependency.
+ CriticalDependency CriticalRegDep;
+
+ // Critical memory dependency.
+ CriticalDependency CriticalMemDep;
+
+ // A bitmask of busy processor resource units.
+ // This field is set to zero only if execution is not delayed during this
+ // cycle because of unavailable pipeline resources.
+ uint64_t CriticalResourceMask;
+
+ // True if this instruction has been optimized at register renaming stage.
+ bool IsEliminated;
+
public:
Instruction(const InstrDesc &D)
: InstructionBase(D), Stage(IS_INVALID), CyclesLeft(UNKNOWN_CYCLES),
- RCUTokenID(0) {}
+ RCUTokenID(0), LSUTokenID(0), CriticalRegDep(), CriticalMemDep(),
+ CriticalResourceMask(0), IsEliminated(false) {}
unsigned getRCUTokenID() const { return RCUTokenID; }
+ unsigned getLSUTokenID() const { return LSUTokenID; }
+ void setLSUTokenID(unsigned LSUTok) { LSUTokenID = LSUTok; }
int getCyclesLeft() const { return CyclesLeft; }
// Transition to the dispatch stage, and assign a RCUToken to this
@@ -438,37 +494,48 @@ public:
void dispatch(unsigned RCUTokenID);
// Instruction issued. Transition to the IS_EXECUTING state, and update
- // all the definitions.
- void execute();
-
- // Force a transition from the IS_AVAILABLE state to the IS_READY state if
- // input operands are all ready. State transitions normally occur at the
- // beginning of a new cycle (see method cycleEvent()). However, the scheduler
- // may decide to promote instructions from the wait queue to the ready queue
- // as the result of another issue event. This method is called every time the
- // instruction might have changed in state.
+ // all the register definitions.
+ void execute(unsigned IID);
+
+ // Force a transition from the IS_DISPATCHED state to the IS_READY or
+ // IS_PENDING state. State transitions normally occur either at the beginning
+ // of a new cycle (see method cycleEvent()), or as a result of another issue
+ // event. This method is called every time the instruction might have changed
+ // in state. It internally delegates to method updateDispatched() and
+ // updateWaiting().
void update();
+ bool updateDispatched();
+ bool updatePending();
- bool isDispatched() const { return Stage == IS_AVAILABLE; }
+ bool isDispatched() const { return Stage == IS_DISPATCHED; }
+ bool isPending() const { return Stage == IS_PENDING; }
bool isReady() const { return Stage == IS_READY; }
bool isExecuting() const { return Stage == IS_EXECUTING; }
bool isExecuted() const { return Stage == IS_EXECUTED; }
bool isRetired() const { return Stage == IS_RETIRED; }
+ bool isEliminated() const { return IsEliminated; }
- bool isEliminated() const {
- return isReady() && getDefs().size() &&
- all_of(getDefs(),
- [](const WriteState &W) { return W.isEliminated(); });
- }
-
- // Forces a transition from state IS_AVAILABLE to state IS_EXECUTED.
+ // Forces a transition from state IS_DISPATCHED to state IS_EXECUTED.
void forceExecuted();
+ void setEliminated() { IsEliminated = true; }
void retire() {
assert(isExecuted() && "Instruction is in an invalid state!");
Stage = IS_RETIRED;
}
+ const CriticalDependency &getCriticalRegDep() const { return CriticalRegDep; }
+ const CriticalDependency &getCriticalMemDep() const { return CriticalMemDep; }
+ const CriticalDependency &computeCriticalRegDep();
+ void setCriticalMemDep(const CriticalDependency &MemDep) {
+ CriticalMemDep = MemDep;
+ }
+
+ uint64_t getCriticalResourceMask() const { return CriticalResourceMask; }
+ void setCriticalResourceMask(uint64_t ResourceMask) {
+ CriticalResourceMask = ResourceMask;
+ }
+
void cycleEvent();
};
@@ -483,13 +550,17 @@ public:
InstRef(unsigned Index, Instruction *I) : Data(std::make_pair(Index, I)) {}
bool operator==(const InstRef &Other) const { return Data == Other.Data; }
+ bool operator!=(const InstRef &Other) const { return Data != Other.Data; }
+ bool operator<(const InstRef &Other) const {
+ return Data.first < Other.Data.first;
+ }
unsigned getSourceIndex() const { return Data.first; }
Instruction *getInstruction() { return Data.second; }
const Instruction *getInstruction() const { return Data.second; }
/// Returns true if this references a valid instruction.
- operator bool() const { return Data.second != nullptr; }
+ explicit operator bool() const { return Data.second != nullptr; }
/// Invalidate this reference.
void invalidate() { Data.second = nullptr; }
@@ -537,7 +608,7 @@ public:
return !WS || WS->isExecuted();
}
- bool isValid() const { return Data.first != INVALID_IID && Data.second; }
+ bool isValid() const { return Data.second && Data.first != INVALID_IID; }
bool operator==(const WriteRef &Other) const { return Data == Other.Data; }
#ifndef NDEBUG
diff --git a/include/llvm/MCA/Pipeline.h b/include/llvm/MCA/Pipeline.h
index acd256060bdd..935033f67f8b 100644
--- a/include/llvm/MCA/Pipeline.h
+++ b/include/llvm/MCA/Pipeline.h
@@ -1,9 +1,8 @@
//===--------------------- Pipeline.h ---------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/include/llvm/MCA/SourceMgr.h b/include/llvm/MCA/SourceMgr.h
index 5e0ca6419f5d..dbe31db1b1dd 100644
--- a/include/llvm/MCA/SourceMgr.h
+++ b/include/llvm/MCA/SourceMgr.h
@@ -1,9 +1,8 @@
//===--------------------- SourceMgr.h --------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/include/llvm/MCA/Stages/DispatchStage.h b/include/llvm/MCA/Stages/DispatchStage.h
index f015cd7522eb..d80ededeaca1 100644
--- a/include/llvm/MCA/Stages/DispatchStage.h
+++ b/include/llvm/MCA/Stages/DispatchStage.h
@@ -1,9 +1,8 @@
//===----------------------- DispatchStage.h --------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -62,8 +61,6 @@ class DispatchStage final : public Stage {
bool canDispatch(const InstRef &IR) const;
Error dispatch(InstRef IR);
- void updateRAWDependencies(ReadState &RS, const MCSubtargetInfo &STI);
-
void notifyInstructionDispatched(const InstRef &IR,
ArrayRef<unsigned> UsedPhysRegs,
unsigned uOps) const;
@@ -71,9 +68,7 @@ class DispatchStage final : public Stage {
public:
DispatchStage(const MCSubtargetInfo &Subtarget, const MCRegisterInfo &MRI,
unsigned MaxDispatchWidth, RetireControlUnit &R,
- RegisterFile &F)
- : DispatchWidth(MaxDispatchWidth), AvailableEntries(MaxDispatchWidth),
- CarryOver(0U), CarriedOver(), STI(Subtarget), RCU(R), PRF(F) {}
+ RegisterFile &F);
bool isAvailable(const InstRef &IR) const override;
diff --git a/include/llvm/MCA/Stages/EntryStage.h b/include/llvm/MCA/Stages/EntryStage.h
index cd9a65b8cc2b..59a2daff886e 100644
--- a/include/llvm/MCA/Stages/EntryStage.h
+++ b/include/llvm/MCA/Stages/EntryStage.h
@@ -1,9 +1,8 @@
//===---------------------- EntryStage.h ------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/include/llvm/MCA/Stages/ExecuteStage.h b/include/llvm/MCA/Stages/ExecuteStage.h
index 8cb287e06d9f..03737e0220eb 100644
--- a/include/llvm/MCA/Stages/ExecuteStage.h
+++ b/include/llvm/MCA/Stages/ExecuteStage.h
@@ -1,9 +1,8 @@
//===---------------------- ExecuteStage.h ----------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -29,6 +28,12 @@ namespace mca {
class ExecuteStage final : public Stage {
Scheduler &HWS;
+ unsigned NumDispatchedOpcodes;
+ unsigned NumIssuedOpcodes;
+
+ // True if this stage should notify listeners of HWPressureEvents.
+ bool EnablePressureEvents;
+
Error issueInstruction(InstRef &IR);
// Called at the beginning of each cycle to issue already dispatched
@@ -42,7 +47,10 @@ class ExecuteStage final : public Stage {
ExecuteStage &operator=(const ExecuteStage &Other) = delete;
public:
- ExecuteStage(Scheduler &S) : Stage(), HWS(S) {}
+ ExecuteStage(Scheduler &S) : ExecuteStage(S, false) {}
+ ExecuteStage(Scheduler &S, bool ShouldPerformBottleneckAnalysis)
+ : Stage(), HWS(S), NumDispatchedOpcodes(0), NumIssuedOpcodes(0),
+ EnablePressureEvents(ShouldPerformBottleneckAnalysis) {}
// This stage works under the assumption that the Pipeline will eventually
// execute a retire stage. We don't need to check if pipelines and/or
@@ -61,12 +69,14 @@ public:
// Instructions that transitioned to the 'Executed' state are automatically
// moved to the next stage (i.e. RetireStage).
Error cycleStart() override;
+ Error cycleEnd() override;
Error execute(InstRef &IR) override;
void notifyInstructionIssued(
const InstRef &IR,
MutableArrayRef<std::pair<ResourceRef, ResourceCycles>> Used) const;
void notifyInstructionExecuted(const InstRef &IR) const;
+ void notifyInstructionPending(const InstRef &IR) const;
void notifyInstructionReady(const InstRef &IR) const;
void notifyResourceAvailable(const ResourceRef &RR) const;
diff --git a/include/llvm/MCA/Stages/InstructionTables.h b/include/llvm/MCA/Stages/InstructionTables.h
index 34e338f0ce6b..4b463c9b51c1 100644
--- a/include/llvm/MCA/Stages/InstructionTables.h
+++ b/include/llvm/MCA/Stages/InstructionTables.h
@@ -1,9 +1,8 @@
//===--------------------- InstructionTables.h ------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/include/llvm/MCA/Stages/MicroOpQueueStage.h b/include/llvm/MCA/Stages/MicroOpQueueStage.h
new file mode 100644
index 000000000000..50a5ef87b2d2
--- /dev/null
+++ b/include/llvm/MCA/Stages/MicroOpQueueStage.h
@@ -0,0 +1,88 @@
+//===---------------------- MicroOpQueueStage.h -----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines a stage that implements a queue of micro opcodes.
+/// It can be used to simulate a hardware micro-op queue that serves opcodes to
+/// the out of order backend.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MCA_MICRO_OP_QUEUE_STAGE_H
+#define LLVM_MCA_MICRO_OP_QUEUE_STAGE_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MCA/Stages/Stage.h"
+
+namespace llvm {
+namespace mca {
+
+/// A stage that simulates a queue of instruction opcodes.
+class MicroOpQueueStage : public Stage {
+ SmallVector<InstRef, 8> Buffer;
+ unsigned NextAvailableSlotIdx;
+ unsigned CurrentInstructionSlotIdx;
+
+ // Limits the number of instructions that can be written to this buffer every
+ // cycle. A value of zero means that there is no limit to the instruction
+ // throughput in input.
+ const unsigned MaxIPC;
+ unsigned CurrentIPC;
+
+ // Number of entries that are available during this cycle.
+ unsigned AvailableEntries;
+
+ // True if instructions dispatched to this stage don't need to wait for the
+ // next cycle before moving to the next stage.
+ // False if this buffer acts as a one cycle delay in the execution pipeline.
+ bool IsZeroLatencyStage;
+
+ MicroOpQueueStage(const MicroOpQueueStage &Other) = delete;
+ MicroOpQueueStage &operator=(const MicroOpQueueStage &Other) = delete;
+
+ // By default, an instruction consumes a number of buffer entries equal to its
+ // number of micro opcodes (see field `InstrDesc::NumMicroOpcodes`). The
+ // number of entries consumed by an instruction is normalized to the
+ // minimum value between NumMicroOpcodes and the buffer size. This is to avoid
+ // problems with (microcoded) instructions that generate a number of micro
+ // opcodes than doesn't fit in the buffer.
+ unsigned getNormalizedOpcodes(const InstRef &IR) const {
+ unsigned NormalizedOpcodes =
+ std::min(static_cast<unsigned>(Buffer.size()),
+ IR.getInstruction()->getDesc().NumMicroOps);
+ return NormalizedOpcodes ? NormalizedOpcodes : 1U;
+ }
+
+ Error moveInstructions();
+
+public:
+ MicroOpQueueStage(unsigned Size, unsigned IPC = 0,
+ bool ZeroLatencyStage = true);
+
+ bool isAvailable(const InstRef &IR) const override {
+ if (MaxIPC && CurrentIPC == MaxIPC)
+ return false;
+ unsigned NormalizedOpcodes = getNormalizedOpcodes(IR);
+ if (NormalizedOpcodes > AvailableEntries)
+ return false;
+ return true;
+ }
+
+ bool hasWorkToComplete() const override {
+ return AvailableEntries != Buffer.size();
+ }
+
+ Error execute(InstRef &IR) override;
+ Error cycleStart() override;
+ Error cycleEnd() override;
+};
+
+} // namespace mca
+} // namespace llvm
+
+#endif // LLVM_MCA_MICRO_OP_QUEUE_STAGE_H
diff --git a/include/llvm/MCA/Stages/RetireStage.h b/include/llvm/MCA/Stages/RetireStage.h
index 2051ce5c86ad..08c216ac7bf4 100644
--- a/include/llvm/MCA/Stages/RetireStage.h
+++ b/include/llvm/MCA/Stages/RetireStage.h
@@ -1,9 +1,8 @@
//===---------------------- RetireStage.h -----------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/include/llvm/MCA/Stages/Stage.h b/include/llvm/MCA/Stages/Stage.h
index fc7ab569bb0f..46b242caa6cf 100644
--- a/include/llvm/MCA/Stages/Stage.h
+++ b/include/llvm/MCA/Stages/Stage.h
@@ -1,9 +1,8 @@
//===---------------------- Stage.h -----------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/include/llvm/MCA/Support.h b/include/llvm/MCA/Support.h
index 7b0c5bf3a486..1da097c90922 100644
--- a/include/llvm/MCA/Support.h
+++ b/include/llvm/MCA/Support.h
@@ -1,9 +1,8 @@
//===--------------------- Support.h ----------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -61,24 +60,13 @@ public:
return (Denominator == 1) ? Numerator : (double)Numerator / Denominator;
}
+ unsigned getNumerator() const { return Numerator; }
+ unsigned getDenominator() const { return Denominator; }
+
// Add the components of RHS to this instance. Instead of calculating
// the final value here, we keep track of the numerator and denominator
// separately, to reduce floating point error.
- ResourceCycles &operator+=(const ResourceCycles &RHS) {
- if (Denominator == RHS.Denominator)
- Numerator += RHS.Numerator;
- else {
- // Create a common denominator for LHS and RHS by calculating the least
- // common multiple from the GCD.
- unsigned GCD = GreatestCommonDivisor64(Denominator, RHS.Denominator);
- unsigned LCM = (Denominator * RHS.Denominator) / GCD;
- unsigned LHSNumerator = Numerator * (LCM / Denominator);
- unsigned RHSNumerator = RHS.Numerator * (LCM / RHS.Denominator);
- Numerator = LHSNumerator + RHSNumerator;
- Denominator = LCM;
- }
- return *this;
- }
+ ResourceCycles &operator+=(const ResourceCycles &RHS);
};
/// Populates vector Masks with processor resource masks.
@@ -106,6 +94,13 @@ public:
void computeProcResourceMasks(const MCSchedModel &SM,
MutableArrayRef<uint64_t> Masks);
+// Returns the index of the highest bit set. For resource masks, the position of
+// the highest bit set can be used to construct a resource mask identifier.
+inline unsigned getResourceStateIndex(uint64_t Mask) {
+ assert(Mask && "Processor Resource Mask cannot be zero!");
+ return (std::numeric_limits<uint64_t>::digits - countLeadingZeros(Mask)) - 1;
+}
+
/// Compute the reciprocal block throughput from a set of processor resource
/// cycles. The reciprocal block throughput is computed as the MAX between:
/// - NumMicroOps / DispatchWidth
diff --git a/include/llvm/Object/Archive.h b/include/llvm/Object/Archive.h
index 9ef1e4875191..c40278a4f923 100644
--- a/include/llvm/Object/Archive.h
+++ b/include/llvm/Object/Archive.h
@@ -1,9 +1,8 @@
//===- Archive.h - ar archive file format -----------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -16,6 +15,7 @@
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/fallible_iterator.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Object/Binary.h"
#include "llvm/Support/Chrono.h"
@@ -143,44 +143,38 @@ public:
getAsBinary(LLVMContext *Context = nullptr) const;
};
- class child_iterator {
+ class ChildFallibleIterator {
Child C;
- Error *E = nullptr;
public:
- child_iterator() : C(Child(nullptr, nullptr, nullptr)) {}
- child_iterator(const Child &C, Error *E) : C(C), E(E) {}
+ ChildFallibleIterator() : C(Child(nullptr, nullptr, nullptr)) {}
+ ChildFallibleIterator(const Child &C) : C(C) {}
const Child *operator->() const { return &C; }
const Child &operator*() const { return C; }
- bool operator==(const child_iterator &other) const {
+ bool operator==(const ChildFallibleIterator &other) const {
// Ignore errors here: If an error occurred during increment then getNext
// will have been set to child_end(), and the following comparison should
// do the right thing.
return C == other.C;
}
- bool operator!=(const child_iterator &other) const {
+ bool operator!=(const ChildFallibleIterator &other) const {
return !(*this == other);
}
- // Code in loops with child_iterators must check for errors on each loop
- // iteration. And if there is an error break out of the loop.
- child_iterator &operator++() { // Preincrement
- assert(E && "Can't increment iterator with no Error attached");
- ErrorAsOutParameter ErrAsOutParam(E);
- if (auto ChildOrErr = C.getNext())
- C = *ChildOrErr;
- else {
- C = C.getParent()->child_end().C;
- *E = ChildOrErr.takeError();
- E = nullptr;
- }
- return *this;
+ Error inc() {
+ auto NextChild = C.getNext();
+ if (!NextChild)
+ return NextChild.takeError();
+ C = std::move(*NextChild);
+ return Error::success();
}
};
+ using child_iterator = fallible_iterator<ChildFallibleIterator>;
+
class Symbol {
const Archive *Parent;
uint32_t SymbolIndex;
diff --git a/include/llvm/Object/ArchiveWriter.h b/include/llvm/Object/ArchiveWriter.h
index 495b943d04c0..9e6daf2da36e 100644
--- a/include/llvm/Object/ArchiveWriter.h
+++ b/include/llvm/Object/ArchiveWriter.h
@@ -1,9 +1,8 @@
//===- ArchiveWriter.h - ar archive file format writer ----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -27,7 +26,6 @@ struct NewArchiveMember {
sys::TimePoint<std::chrono::seconds> ModTime;
unsigned UID = 0, GID = 0, Perms = 0644;
- bool IsNew = false;
NewArchiveMember() = default;
NewArchiveMember(MemoryBufferRef BufRef);
@@ -38,6 +36,8 @@ struct NewArchiveMember {
bool Deterministic);
};
+Expected<std::string> computeArchiveRelativePath(StringRef From, StringRef To);
+
Error writeArchive(StringRef ArcName, ArrayRef<NewArchiveMember> NewMembers,
bool WriteSymtab, object::Archive::Kind Kind,
bool Deterministic, bool Thin,
diff --git a/include/llvm/Object/Binary.h b/include/llvm/Object/Binary.h
index 99745e24b8c8..3c3e977baff4 100644
--- a/include/llvm/Object/Binary.h
+++ b/include/llvm/Object/Binary.h
@@ -1,9 +1,8 @@
//===- Binary.h - A generic binary file -------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -14,6 +13,7 @@
#ifndef LLVM_OBJECT_BINARY_H
#define LLVM_OBJECT_BINARY_H
+#include "llvm-c/Types.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Object/Error.h"
#include "llvm/Support/Error.h"
@@ -42,7 +42,9 @@ protected:
ID_Archive,
ID_MachOUniversalBinary,
ID_COFFImportFile,
- ID_IR, // LLVM IR
+ ID_IR, // LLVM IR
+
+ ID_Minidump,
ID_WinRes, // Windows resource (.res) file.
@@ -50,6 +52,9 @@ protected:
ID_StartObjects,
ID_COFF,
+ ID_XCOFF32, // AIX XCOFF 32-bit
+ ID_XCOFF64, // AIX XCOFF 64-bit
+
ID_ELF32L, // ELF 32-bit, little endian
ID_ELF32B, // ELF 32-bit, big endian
ID_ELF64L, // ELF 64-bit, little endian
@@ -118,6 +123,8 @@ public:
return TypeID == ID_COFF;
}
+ bool isXCOFF() const { return TypeID == ID_XCOFF32 || TypeID == ID_XCOFF64; }
+
bool isWasm() const { return TypeID == ID_Wasm; }
bool isCOFFImportFile() const {
@@ -128,6 +135,8 @@ public:
return TypeID == ID_IR;
}
+ bool isMinidump() const { return TypeID == ID_Minidump; }
+
bool isLittleEndian() const {
return !(TypeID == ID_ELF32B || TypeID == ID_ELF64B ||
TypeID == ID_MachO32B || TypeID == ID_MachO64B);
@@ -156,6 +165,9 @@ public:
}
};
+// Create wrappers for C Binding types (see CBindingWrapping.h).
+DEFINE_ISA_CONVERSION_FUNCTIONS(Binary, LLVMBinaryRef)
+
/// Create a Binary from Source, autodetecting the file type.
///
/// @param Source The data to create the Binary from.
diff --git a/include/llvm/Object/COFF.h b/include/llvm/Object/COFF.h
index b753d261a0fc..c53cbc46c747 100644
--- a/include/llvm/Object/COFF.h
+++ b/include/llvm/Object/COFF.h
@@ -1,9 +1,8 @@
//===- COFF.h - COFF object file implementation -----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -898,13 +897,12 @@ protected:
Expected<SymbolRef::Type> getSymbolType(DataRefImpl Symb) const override;
Expected<section_iterator> getSymbolSection(DataRefImpl Symb) const override;
void moveSectionNext(DataRefImpl &Sec) const override;
- std::error_code getSectionName(DataRefImpl Sec,
- StringRef &Res) const override;
+ Expected<StringRef> getSectionName(DataRefImpl Sec) const override;
uint64_t getSectionAddress(DataRefImpl Sec) const override;
uint64_t getSectionIndex(DataRefImpl Sec) const override;
uint64_t getSectionSize(DataRefImpl Sec) const override;
- std::error_code getSectionContents(DataRefImpl Sec,
- StringRef &Res) const override;
+ Expected<ArrayRef<uint8_t>>
+ getSectionContents(DataRefImpl Sec) const override;
uint64_t getSectionAlignment(DataRefImpl Sec) const override;
bool isSectionCompressed(DataRefImpl Sec) const override;
bool isSectionText(DataRefImpl Sec) const override;
@@ -1034,10 +1032,10 @@ public:
ArrayRef<coff_relocation> getRelocations(const coff_section *Sec) const;
- std::error_code getSectionName(const coff_section *Sec, StringRef &Res) const;
+ Expected<StringRef> getSectionName(const coff_section *Sec) const;
uint64_t getSectionSize(const coff_section *Sec) const;
- std::error_code getSectionContents(const coff_section *Sec,
- ArrayRef<uint8_t> &Res) const;
+ Error getSectionContents(const coff_section *Sec,
+ ArrayRef<uint8_t> &Res) const;
uint64_t getImageBase() const;
std::error_code getVaPtr(uint64_t VA, uintptr_t &Res) const;
diff --git a/include/llvm/Object/COFFImportFile.h b/include/llvm/Object/COFFImportFile.h
index 0a4556ad8884..5aa836411118 100644
--- a/include/llvm/Object/COFFImportFile.h
+++ b/include/llvm/Object/COFFImportFile.h
@@ -1,9 +1,8 @@
//===- COFFImportFile.h - COFF short import file implementation -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -37,12 +36,11 @@ public:
void moveSymbolNext(DataRefImpl &Symb) const override { ++Symb.p; }
- std::error_code printSymbolName(raw_ostream &OS,
- DataRefImpl Symb) const override {
+ Error printSymbolName(raw_ostream &OS, DataRefImpl Symb) const override {
if (Symb.p == 0)
OS << "__imp_";
OS << StringRef(Data.getBufferStart() + sizeof(coff_import_header));
- return std::error_code();
+ return Error::success();
}
uint32_t getSymbolFlags(DataRefImpl Symb) const override {
@@ -71,9 +69,21 @@ private:
};
struct COFFShortExport {
+ /// The name of the export as specified in the .def file or on the command
+ /// line, i.e. "foo" in "/EXPORT:foo", and "bar" in "/EXPORT:foo=bar". This
+ /// may lack mangling, such as underscore prefixing and stdcall suffixing.
std::string Name;
+
+ /// The external, exported name. Only non-empty when export renaming is in
+ /// effect, i.e. "foo" in "/EXPORT:foo=bar".
std::string ExtName;
+
+ /// The real, mangled symbol name from the object file. Given
+ /// "/export:foo=bar", this could be "_bar@8" if bar is stdcall.
std::string SymbolName;
+
+ /// Creates a weak alias. This is the name of the weak aliasee. In a .def
+ /// file, this is "baz" in "EXPORTS\nfoo = bar == baz".
std::string AliasTarget;
uint16_t Ordinal = 0;
diff --git a/include/llvm/Object/COFFModuleDefinition.h b/include/llvm/Object/COFFModuleDefinition.h
index be139a2833b0..ab52259fea1a 100644
--- a/include/llvm/Object/COFFModuleDefinition.h
+++ b/include/llvm/Object/COFFModuleDefinition.h
@@ -1,9 +1,8 @@
//===--- COFFModuleDefinition.h ---------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Object/CVDebugRecord.h b/include/llvm/Object/CVDebugRecord.h
index faad72c0df29..d41c7391f701 100644
--- a/include/llvm/Object/CVDebugRecord.h
+++ b/include/llvm/Object/CVDebugRecord.h
@@ -1,9 +1,8 @@
//===- CVDebugRecord.h ------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/Object/Decompressor.h b/include/llvm/Object/Decompressor.h
index 2a77d2ffbf68..cc918481b308 100644
--- a/include/llvm/Object/Decompressor.h
+++ b/include/llvm/Object/Decompressor.h
@@ -1,9 +1,8 @@
//===-- Decompressor.h ------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===/
diff --git a/include/llvm/Object/ELF.h b/include/llvm/Object/ELF.h
index bcdc190cc7dc..cf8e4529bad9 100644
--- a/include/llvm/Object/ELF.h
+++ b/include/llvm/Object/ELF.h
@@ -1,9 +1,8 @@
//===- ELF.h - ELF object file implementation -------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -45,10 +44,26 @@ getElfArchType(StringRef Object) {
(uint8_t)Object[ELF::EI_DATA]);
}
-static inline Error createError(StringRef Err) {
+static inline Error createError(const Twine &Err) {
return make_error<StringError>(Err, object_error::parse_failed);
}
+template <class ELFT> class ELFFile;
+
+template <class ELFT>
+std::string getSecIndexForError(const ELFFile<ELFT> *Obj,
+ const typename ELFT::Shdr *Sec) {
+ auto TableOrErr = Obj->sections();
+ if (TableOrErr)
+ return "[index " + std::to_string(Sec - &TableOrErr->front()) + "]";
+ // To make this helper be more convenient for error reporting purposes we
+ // drop the error. But really it should never be triggered. Before this point,
+ // our code should have called 'sections()' and reported a proper error on
+ // failure.
+ llvm::consumeError(TableOrErr.takeError());
+ return "[unknown index]";
+}
+
template <class ELFT>
class ELFFile {
public:
@@ -80,9 +95,7 @@ public:
using Elf_Relr_Range = typename ELFT::RelrRange;
using Elf_Phdr_Range = typename ELFT::PhdrRange;
- const uint8_t *base() const {
- return reinterpret_cast<const uint8_t *>(Buf.data());
- }
+ const uint8_t *base() const { return Buf.bytes_begin(); }
size_t getBufSize() const { return Buf.size(); }
@@ -115,8 +128,8 @@ public:
SmallVectorImpl<char> &Result) const;
uint32_t getRelativeRelocationType() const;
- const char *getDynamicTagAsString(unsigned Arch, uint64_t Type) const;
- const char *getDynamicTagAsString(uint64_t Type) const;
+ std::string getDynamicTagAsString(unsigned Arch, uint64_t Type) const;
+ std::string getDynamicTagAsString(uint64_t Type) const;
/// Get the symbol for a given relocation.
Expected<const Elf_Sym *> getRelocationSymbol(const Elf_Rel *Rel,
@@ -165,11 +178,16 @@ public:
/// Iterate over program header table.
Expected<Elf_Phdr_Range> program_headers() const {
if (getHeader()->e_phnum && getHeader()->e_phentsize != sizeof(Elf_Phdr))
- return createError("invalid e_phentsize");
+ return createError("invalid e_phentsize: " +
+ Twine(getHeader()->e_phentsize));
if (getHeader()->e_phoff +
(getHeader()->e_phnum * getHeader()->e_phentsize) >
getBufSize())
- return createError("program headers longer than binary");
+ return createError("program headers are longer than binary of size " +
+ Twine(getBufSize()) + ": e_phoff = 0x" +
+ Twine::utohexstr(getHeader()->e_phoff) +
+ ", e_phnum = " + Twine(getHeader()->e_phnum) +
+ ", e_phentsize = " + Twine(getHeader()->e_phentsize));
auto *Begin =
reinterpret_cast<const Elf_Phdr *>(base() + getHeader()->e_phoff);
return makeArrayRef(Begin, Begin + getHeader()->e_phnum);
@@ -183,12 +201,12 @@ public:
/// \param Err [out] an error to support fallible iteration, which should
/// be checked after iteration ends.
Elf_Note_Iterator notes_begin(const Elf_Phdr &Phdr, Error &Err) const {
- if (Phdr.p_type != ELF::PT_NOTE) {
- Err = createError("attempt to iterate notes of non-note program header");
- return Elf_Note_Iterator(Err);
- }
+ assert(Phdr.p_type == ELF::PT_NOTE && "Phdr is not of type PT_NOTE");
+ ErrorAsOutParameter ErrAsOutParam(&Err);
if (Phdr.p_offset + Phdr.p_filesz > getBufSize()) {
- Err = createError("invalid program header offset/size");
+ Err = createError("PT_NOTE header has invalid offset (0x" +
+ Twine::utohexstr(Phdr.p_offset) + ") or size (0x" +
+ Twine::utohexstr(Phdr.p_filesz) + ")");
return Elf_Note_Iterator(Err);
}
return Elf_Note_Iterator(base() + Phdr.p_offset, Phdr.p_filesz, Err);
@@ -202,12 +220,13 @@ public:
/// \param Err [out] an error to support fallible iteration, which should
/// be checked after iteration ends.
Elf_Note_Iterator notes_begin(const Elf_Shdr &Shdr, Error &Err) const {
- if (Shdr.sh_type != ELF::SHT_NOTE) {
- Err = createError("attempt to iterate notes of non-note section");
- return Elf_Note_Iterator(Err);
- }
+ assert(Shdr.sh_type == ELF::SHT_NOTE && "Shdr is not of type SHT_NOTE");
+ ErrorAsOutParameter ErrAsOutParam(&Err);
if (Shdr.sh_offset + Shdr.sh_size > getBufSize()) {
- Err = createError("invalid section offset/size");
+ Err = createError("SHT_NOTE section " + getSecIndexForError(this, &Shdr) +
+ " has invalid offset (0x" +
+ Twine::utohexstr(Shdr.sh_offset) + ") or size (0x" +
+ Twine::utohexstr(Shdr.sh_size) + ")");
return Elf_Note_Iterator(Err);
}
return Elf_Note_Iterator(base() + Shdr.sh_offset, Shdr.sh_size, Err);
@@ -274,7 +293,7 @@ template <class ELFT>
inline Expected<const typename ELFT::Shdr *>
getSection(typename ELFT::ShdrRange Sections, uint32_t Index) {
if (Index >= Sections.size())
- return createError("invalid section index");
+ return createError("invalid section index: " + Twine(Index));
return &Sections[Index];
}
@@ -286,7 +305,10 @@ getExtendedSymbolTableIndex(const typename ELFT::Sym *Sym,
assert(Sym->st_shndx == ELF::SHN_XINDEX);
unsigned Index = Sym - FirstSym;
if (Index >= ShndxTable.size())
- return createError("index past the end of the symbol table");
+ return createError(
+ "extended symbol index (" + Twine(Index) +
+ ") is past the end of the SHT_SYMTAB_SHNDX section of size " +
+ Twine(ShndxTable.size()));
// The size of the table was checked in getSHNDXTable.
return ShndxTable[Index];
@@ -333,20 +355,18 @@ ELFFile<ELFT>::getSection(const Elf_Sym *Sym, Elf_Sym_Range Symbols,
}
template <class ELFT>
-inline Expected<const typename ELFT::Sym *>
-getSymbol(typename ELFT::SymRange Symbols, uint32_t Index) {
- if (Index >= Symbols.size())
- return createError("invalid symbol index");
- return &Symbols[Index];
-}
-
-template <class ELFT>
Expected<const typename ELFT::Sym *>
ELFFile<ELFT>::getSymbol(const Elf_Shdr *Sec, uint32_t Index) const {
- auto SymtabOrErr = symbols(Sec);
- if (!SymtabOrErr)
- return SymtabOrErr.takeError();
- return object::getSymbol<ELFT>(*SymtabOrErr, Index);
+ auto SymsOrErr = symbols(Sec);
+ if (!SymsOrErr)
+ return SymsOrErr.takeError();
+
+ Elf_Sym_Range Symbols = *SymsOrErr;
+ if (Index >= Symbols.size())
+ return createError("unable to get symbol from section " +
+ getSecIndexForError(this, Sec) +
+ ": invalid symbol index (" + Twine(Index) + ")");
+ return &Symbols[Index];
}
template <class ELFT>
@@ -354,18 +374,26 @@ template <typename T>
Expected<ArrayRef<T>>
ELFFile<ELFT>::getSectionContentsAsArray(const Elf_Shdr *Sec) const {
if (Sec->sh_entsize != sizeof(T) && sizeof(T) != 1)
- return createError("invalid sh_entsize");
+ return createError("section " + getSecIndexForError(this, Sec) +
+ " has an invalid sh_entsize: " + Twine(Sec->sh_entsize));
uintX_t Offset = Sec->sh_offset;
uintX_t Size = Sec->sh_size;
if (Size % sizeof(T))
- return createError("size is not a multiple of sh_entsize");
+ return createError("section " + getSecIndexForError(this, Sec) +
+ " has an invalid sh_size (" + Twine(Size) +
+ ") which is not a multiple of its sh_entsize (" +
+ Twine(Sec->sh_entsize) + ")");
if ((std::numeric_limits<uintX_t>::max() - Offset < Size) ||
Offset + Size > Buf.size())
- return createError("invalid section offset");
+ return createError("section " + getSecIndexForError(this, Sec) +
+ " has a sh_offset (0x" + Twine::utohexstr(Offset) +
+ ") + sh_size (0x" + Twine(Size) +
+ ") that cannot be represented");
if (Offset % alignof(T))
+ // TODO: this error is untested.
return createError("unaligned data");
const T *Start = reinterpret_cast<const T *>(base() + Offset);
@@ -438,8 +466,10 @@ ELFFile<ELFT>::getSectionStringTable(Elf_Shdr_Range Sections) const {
if (!Index) // no section string table.
return "";
+ // TODO: Test a case when the sh_link of the section with index 0 is broken.
if (Index >= Sections.size())
- return createError("invalid section index");
+ return createError("section header string table index " + Twine(Index) +
+ " does not exist");
return getStringTable(&Sections[Index]);
}
@@ -448,7 +478,9 @@ template <class ELFT> ELFFile<ELFT>::ELFFile(StringRef Object) : Buf(Object) {}
template <class ELFT>
Expected<ELFFile<ELFT>> ELFFile<ELFT>::create(StringRef Object) {
if (sizeof(Elf_Ehdr) > Object.size())
- return createError("Invalid buffer");
+ return createError("invalid buffer: the size (" + Twine(Object.size()) +
+ ") is smaller than an ELF header (" +
+ Twine(sizeof(Elf_Ehdr)) + ")");
return ELFFile(Object);
}
@@ -459,16 +491,18 @@ Expected<typename ELFT::ShdrRange> ELFFile<ELFT>::sections() const {
return ArrayRef<Elf_Shdr>();
if (getHeader()->e_shentsize != sizeof(Elf_Shdr))
- return createError(
- "invalid section header entry size (e_shentsize) in ELF header");
+ return createError("invalid e_shentsize in ELF header: " +
+ Twine(getHeader()->e_shentsize));
const uint64_t FileSize = Buf.size();
-
if (SectionTableOffset + sizeof(Elf_Shdr) > FileSize)
- return createError("section header table goes past the end of the file");
+ return createError(
+ "section header table goes past the end of the file: e_shoff = 0x" +
+ Twine::utohexstr(SectionTableOffset));
// Invalid address alignment of section headers
if (SectionTableOffset & (alignof(Elf_Shdr) - 1))
+ // TODO: this error is untested.
return createError("invalid alignment of section headers");
const Elf_Shdr *First =
@@ -479,6 +513,7 @@ Expected<typename ELFT::ShdrRange> ELFFile<ELFT>::sections() const {
NumSections = First->sh_size;
if (NumSections > UINT64_MAX / sizeof(Elf_Shdr))
+ // TODO: this error is untested.
return createError("section table goes past the end of file");
const uint64_t SectionTableSize = NumSections * sizeof(Elf_Shdr);
@@ -505,10 +540,14 @@ template <typename T>
Expected<const T *> ELFFile<ELFT>::getEntry(const Elf_Shdr *Section,
uint32_t Entry) const {
if (sizeof(T) != Section->sh_entsize)
+ // TODO: this error is untested.
return createError("invalid sh_entsize");
size_t Pos = Section->sh_offset + Entry * sizeof(T);
if (Pos + sizeof(T) > Buf.size())
- return createError("invalid section offset");
+ return createError("unable to access section " +
+ getSecIndexForError(this, Section) + " data at 0x" +
+ Twine::utohexstr(Pos) +
+ ": offset goes past the end of file");
return reinterpret_cast<const T *>(base() + Pos);
}
@@ -534,6 +573,7 @@ ELFFile<ELFT>::getSection(const StringRef SectionName) const {
if (*SecNameOrErr == SectionName)
return &Sec;
}
+ // TODO: this error is untested.
return createError("invalid section name");
}
@@ -541,15 +581,24 @@ template <class ELFT>
Expected<StringRef>
ELFFile<ELFT>::getStringTable(const Elf_Shdr *Section) const {
if (Section->sh_type != ELF::SHT_STRTAB)
- return createError("invalid sh_type for string table, expected SHT_STRTAB");
+ return createError("invalid sh_type for string table section " +
+ getSecIndexForError(this, Section) +
+ ": expected SHT_STRTAB, but got " +
+ object::getELFSectionTypeName(getHeader()->e_machine,
+ Section->sh_type));
auto V = getSectionContentsAsArray<char>(Section);
if (!V)
return V.takeError();
ArrayRef<char> Data = *V;
if (Data.empty())
+ // TODO: this error is untested.
return createError("empty string table");
if (Data.back() != '\0')
- return createError("string table non-null terminated");
+ return createError(object::getELFSectionTypeName(getHeader()->e_machine,
+ Section->sh_type) +
+ " string table section " +
+ getSecIndexForError(this, Section) +
+ " is non-null terminated");
return StringRef(Data.begin(), Data.size());
}
@@ -577,9 +626,13 @@ ELFFile<ELFT>::getSHNDXTable(const Elf_Shdr &Section,
const Elf_Shdr &SymTable = **SymTableOrErr;
if (SymTable.sh_type != ELF::SHT_SYMTAB &&
SymTable.sh_type != ELF::SHT_DYNSYM)
+ // TODO: this error is untested.
return createError("invalid sh_type");
if (V.size() != (SymTable.sh_size / sizeof(Elf_Sym)))
- return createError("invalid section contents size");
+ return createError("SHT_SYMTAB_SHNDX section has sh_size (" +
+ Twine(SymTable.sh_size) +
+ ") which is not equal to the number of symbols (" +
+ Twine(V.size()) + ")");
return V;
}
@@ -598,6 +651,7 @@ ELFFile<ELFT>::getStringTableForSymtab(const Elf_Shdr &Sec,
Elf_Shdr_Range Sections) const {
if (Sec.sh_type != ELF::SHT_SYMTAB && Sec.sh_type != ELF::SHT_DYNSYM)
+ // TODO: this error is untested.
return createError(
"invalid sh_type for symbol table, expected SHT_SYMTAB or SHT_DYNSYM");
auto SectionOrErr = object::getSection<ELFT>(Sections, Sec.sh_link);
@@ -625,7 +679,11 @@ Expected<StringRef> ELFFile<ELFT>::getSectionName(const Elf_Shdr *Section,
if (Offset == 0)
return StringRef();
if (Offset >= DotShstrtab.size())
- return createError("invalid string offset");
+ return createError("a section " + getSecIndexForError(this, Section) +
+ " has an invalid sh_name (0x" +
+ Twine::utohexstr(Offset) +
+ ") offset which goes past the end of the "
+ "section name string table");
return StringRef(DotShstrtab.data() + Offset);
}
diff --git a/include/llvm/Object/ELFObjectFile.h b/include/llvm/Object/ELFObjectFile.h
index 0f620681cd99..86c015efd704 100644
--- a/include/llvm/Object/ELFObjectFile.h
+++ b/include/llvm/Object/ELFObjectFile.h
@@ -1,9 +1,8 @@
//===- ELFObjectFile.h - ELF object file implementation ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -42,6 +41,9 @@
namespace llvm {
namespace object {
+constexpr int NumElfSymbolTypes = 8;
+extern const llvm::EnumEntry<unsigned> ElfSymbolTypes[NumElfSymbolTypes];
+
class elf_symbol_iterator;
class ELFObjectFileBase : public ObjectFile {
@@ -52,8 +54,8 @@ class ELFObjectFileBase : public ObjectFile {
protected:
ELFObjectFileBase(unsigned int Type, MemoryBufferRef Source);
- virtual uint16_t getEMachine() const = 0;
virtual uint64_t getSymbolSize(DataRefImpl Symb) const = 0;
+ virtual uint8_t getSymbolBinding(DataRefImpl Symb) const = 0;
virtual uint8_t getSymbolOther(DataRefImpl Symb) const = 0;
virtual uint8_t getSymbolELFType(DataRefImpl Symb) const = 0;
@@ -62,6 +64,7 @@ protected:
virtual uint64_t getSectionOffset(DataRefImpl Sec) const = 0;
virtual Expected<int64_t> getRelocationAddend(DataRefImpl Rel) const = 0;
+ virtual Error getBuildAttributes(ARMAttributeParser &Attributes) const = 0;
public:
using elf_symbol_iterator_range = iterator_range<elf_symbol_iterator>;
@@ -87,6 +90,8 @@ public:
virtual uint16_t getEType() const = 0;
+ virtual uint16_t getEMachine() const = 0;
+
std::vector<std::pair<DataRefImpl, uint64_t>> getPltAddresses() const;
};
@@ -142,6 +147,10 @@ public:
return getObject()->getSymbolSize(getRawDataRefImpl());
}
+ uint8_t getBinding() const {
+ return getObject()->getSymbolBinding(getRawDataRefImpl());
+ }
+
uint8_t getOther() const {
return getObject()->getSymbolOther(getRawDataRefImpl());
}
@@ -149,6 +158,16 @@ public:
uint8_t getELFType() const {
return getObject()->getSymbolELFType(getRawDataRefImpl());
}
+
+ StringRef getELFTypeName() const {
+ uint8_t Type = getELFType();
+ for (auto &EE : ElfSymbolTypes) {
+ if (EE.Value == Type) {
+ return EE.AltName;
+ }
+ }
+ return "";
+ }
};
class elf_symbol_iterator : public symbol_iterator {
@@ -239,6 +258,7 @@ protected:
uint32_t getSymbolAlignment(DataRefImpl Symb) const override;
uint64_t getCommonSymbolSizeImpl(DataRefImpl Symb) const override;
uint32_t getSymbolFlags(DataRefImpl Symb) const override;
+ uint8_t getSymbolBinding(DataRefImpl Symb) const override;
uint8_t getSymbolOther(DataRefImpl Symb) const override;
uint8_t getSymbolELFType(DataRefImpl Symb) const override;
Expected<SymbolRef::Type> getSymbolType(DataRefImpl Symb) const override;
@@ -247,13 +267,12 @@ protected:
Expected<section_iterator> getSymbolSection(DataRefImpl Symb) const override;
void moveSectionNext(DataRefImpl &Sec) const override;
- std::error_code getSectionName(DataRefImpl Sec,
- StringRef &Res) const override;
+ Expected<StringRef> getSectionName(DataRefImpl Sec) const override;
uint64_t getSectionAddress(DataRefImpl Sec) const override;
uint64_t getSectionIndex(DataRefImpl Sec) const override;
uint64_t getSectionSize(DataRefImpl Sec) const override;
- std::error_code getSectionContents(DataRefImpl Sec,
- StringRef &Res) const override;
+ Expected<ArrayRef<uint8_t>>
+ getSectionContents(DataRefImpl Sec) const override;
uint64_t getSectionAlignment(DataRefImpl Sec) const override;
bool isSectionCompressed(DataRefImpl Sec) const override;
bool isSectionText(DataRefImpl Sec) const override;
@@ -341,6 +360,28 @@ protected:
(Visibility == ELF::STV_DEFAULT || Visibility == ELF::STV_PROTECTED));
}
+ Error getBuildAttributes(ARMAttributeParser &Attributes) const override {
+ auto SectionsOrErr = EF.sections();
+ if (!SectionsOrErr)
+ return SectionsOrErr.takeError();
+
+ for (const Elf_Shdr &Sec : *SectionsOrErr) {
+ if (Sec.sh_type == ELF::SHT_ARM_ATTRIBUTES) {
+ auto ErrorOrContents = EF.getSectionContents(&Sec);
+ if (!ErrorOrContents)
+ return ErrorOrContents.takeError();
+
+ auto Contents = ErrorOrContents.get();
+ if (Contents[0] != ARMBuildAttrs::Format_Version || Contents.size() == 1)
+ return Error::success();
+
+ Attributes.Parse(Contents, ELFT::TargetEndianness == support::little);
+ break;
+ }
+ }
+ return Error::success();
+ }
+
// This flag is used for classof, to distinguish ELFObjectFile from
// its subclass. If more subclasses will be created, this flag will
// have to become an enum.
@@ -382,28 +423,6 @@ public:
unsigned getPlatformFlags() const override { return EF.getHeader()->e_flags; }
- std::error_code getBuildAttributes(ARMAttributeParser &Attributes) const override {
- auto SectionsOrErr = EF.sections();
- if (!SectionsOrErr)
- return errorToErrorCode(SectionsOrErr.takeError());
-
- for (const Elf_Shdr &Sec : *SectionsOrErr) {
- if (Sec.sh_type == ELF::SHT_ARM_ATTRIBUTES) {
- auto ErrorOrContents = EF.getSectionContents(&Sec);
- if (!ErrorOrContents)
- return errorToErrorCode(ErrorOrContents.takeError());
-
- auto Contents = ErrorOrContents.get();
- if (Contents[0] != ARMBuildAttrs::Format_Version || Contents.size() == 1)
- return std::error_code();
-
- Attributes.Parse(Contents, ELFT::TargetEndianness == support::little);
- break;
- }
- }
- return std::error_code();
- }
-
const ELFFile<ELFT> *getELFFile() const { return &EF; }
bool isDyldType() const { return isDyldELFObject; }
@@ -441,7 +460,16 @@ Expected<StringRef> ELFObjectFile<ELFT>::getSymbolName(DataRefImpl Sym) const {
auto SymStrTabOrErr = EF.getStringTable(StringTableSec);
if (!SymStrTabOrErr)
return SymStrTabOrErr.takeError();
- return ESym->getName(*SymStrTabOrErr);
+ Expected<StringRef> Name = ESym->getName(*SymStrTabOrErr);
+
+ // If the symbol name is empty use the section name.
+ if ((!Name || Name->empty()) && ESym->getType() == ELF::STT_SECTION) {
+ StringRef SecName;
+ Expected<section_iterator> Sec = getSymbolSection(Sym);
+ if (Sec && !(*Sec)->getName(SecName))
+ return SecName;
+ }
+ return Name;
}
template <class ELFT>
@@ -533,6 +561,11 @@ uint64_t ELFObjectFile<ELFT>::getCommonSymbolSizeImpl(DataRefImpl Symb) const {
}
template <class ELFT>
+uint8_t ELFObjectFile<ELFT>::getSymbolBinding(DataRefImpl Symb) const {
+ return getSymbol(Symb)->getBinding();
+}
+
+template <class ELFT>
uint8_t ELFObjectFile<ELFT>::getSymbolOther(DataRefImpl Symb) const {
return getSymbol(Symb)->st_other;
}
@@ -654,13 +687,8 @@ void ELFObjectFile<ELFT>::moveSectionNext(DataRefImpl &Sec) const {
}
template <class ELFT>
-std::error_code ELFObjectFile<ELFT>::getSectionName(DataRefImpl Sec,
- StringRef &Result) const {
- auto Name = EF.getSectionName(&*getSection(Sec));
- if (!Name)
- return errorToErrorCode(Name.takeError());
- Result = *Name;
- return std::error_code();
+Expected<StringRef> ELFObjectFile<ELFT>::getSectionName(DataRefImpl Sec) const {
+ return EF.getSectionName(&*getSection(Sec));
}
template <class ELFT>
@@ -685,16 +713,15 @@ uint64_t ELFObjectFile<ELFT>::getSectionSize(DataRefImpl Sec) const {
}
template <class ELFT>
-std::error_code
-ELFObjectFile<ELFT>::getSectionContents(DataRefImpl Sec,
- StringRef &Result) const {
+Expected<ArrayRef<uint8_t>>
+ELFObjectFile<ELFT>::getSectionContents(DataRefImpl Sec) const {
const Elf_Shdr *EShdr = getSection(Sec);
if (std::error_code EC =
checkOffset(getMemoryBufferRef(),
(uintptr_t)base() + EShdr->sh_offset, EShdr->sh_size))
- return EC;
- Result = StringRef((const char *)base() + EShdr->sh_offset, EShdr->sh_size);
- return std::error_code();
+ return errorCodeToError(EC);
+ return makeArrayRef((const uint8_t *)base() + EShdr->sh_offset,
+ EShdr->sh_size);
}
template <class ELFT>
@@ -750,7 +777,7 @@ ELFObjectFile<ELFT>::dynamic_relocation_sections() const {
}
}
for (const Elf_Shdr &Sec : *SectionsOrErr) {
- if (is_contained(Offsets, Sec.sh_offset))
+ if (is_contained(Offsets, Sec.sh_addr))
Res.emplace_back(toDRI(&Sec), this);
}
return Res;
@@ -925,15 +952,13 @@ ELFObjectFile<ELFT>::create(MemoryBufferRef Object) {
for (const Elf_Shdr &Sec : *SectionsOrErr) {
switch (Sec.sh_type) {
case ELF::SHT_DYNSYM: {
- if (DotDynSymSec)
- return createError("More than one dynamic symbol table!");
- DotDynSymSec = &Sec;
+ if (!DotDynSymSec)
+ DotDynSymSec = &Sec;
break;
}
case ELF::SHT_SYMTAB: {
- if (DotSymtabSec)
- return createError("More than one static symbol table!");
- DotSymtabSec = &Sec;
+ if (!DotSymtabSec)
+ DotSymtabSec = &Sec;
break;
}
case ELF::SHT_SYMTAB_SHNDX: {
@@ -967,7 +992,9 @@ ELFObjectFile<ELFT>::ELFObjectFile(ELFObjectFile<ELFT> &&Other)
template <class ELFT>
basic_symbol_iterator ELFObjectFile<ELFT>::symbol_begin() const {
- DataRefImpl Sym = toDRI(DotSymtabSec, 0);
+ DataRefImpl Sym =
+ toDRI(DotSymtabSec,
+ DotSymtabSec && DotSymtabSec->sh_size >= sizeof(Elf_Sym) ? 1 : 0);
return basic_symbol_iterator(SymbolRef(Sym, this));
}
diff --git a/include/llvm/Object/ELFTypes.h b/include/llvm/Object/ELFTypes.h
index ec3c8e7bae46..5552208b1f8a 100644
--- a/include/llvm/Object/ELFTypes.h
+++ b/include/llvm/Object/ELFTypes.h
@@ -1,9 +1,8 @@
//===- ELFTypes.h - Endian specific types for ELF ---------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -593,9 +592,9 @@ class Elf_Note_Impl {
template <class NoteIteratorELFT> friend class Elf_Note_Iterator_Impl;
+public:
Elf_Note_Impl(const Elf_Nhdr_Impl<ELFT> &Nhdr) : Nhdr(Nhdr) {}
-public:
/// Get the note's name, excluding the terminating null byte.
StringRef getName() const {
if (!Nhdr.n_namesz)
diff --git a/include/llvm/Object/Error.h b/include/llvm/Object/Error.h
index a15f8b9236eb..b7bbf06fc86d 100644
--- a/include/llvm/Object/Error.h
+++ b/include/llvm/Object/Error.h
@@ -1,9 +1,8 @@
//===- Error.h - system_error extensions for Object -------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Object/IRObjectFile.h b/include/llvm/Object/IRObjectFile.h
index 993359b766a1..08b92f1bae50 100644
--- a/include/llvm/Object/IRObjectFile.h
+++ b/include/llvm/Object/IRObjectFile.h
@@ -1,9 +1,8 @@
//===- IRObjectFile.h - LLVM IR object file implementation ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -38,8 +37,7 @@ class IRObjectFile : public SymbolicFile {
public:
~IRObjectFile() override;
void moveSymbolNext(DataRefImpl &Symb) const override;
- std::error_code printSymbolName(raw_ostream &OS,
- DataRefImpl Symb) const override;
+ Error printSymbolName(raw_ostream &OS, DataRefImpl Symb) const override;
uint32_t getSymbolFlags(DataRefImpl Symb) const override;
basic_symbol_iterator symbol_begin() const override;
basic_symbol_iterator symbol_end() const override;
diff --git a/include/llvm/Object/IRSymtab.h b/include/llvm/Object/IRSymtab.h
index 5f6a024cd132..0bbfc932493c 100644
--- a/include/llvm/Object/IRSymtab.h
+++ b/include/llvm/Object/IRSymtab.h
@@ -1,9 +1,8 @@
//===- IRSymtab.h - data definitions for IR symbol tables -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -126,12 +125,13 @@ struct Uncommon {
Str SectionName;
};
+
struct Header {
/// Version number of the symtab format. This number should be incremented
/// when the format changes, but it does not need to be incremented if a
/// change to LLVM would cause it to create a different symbol table.
Word Version;
- enum { kCurrentVersion = 1 };
+ enum { kCurrentVersion = 2 };
/// The producer's version string (LLVM_VERSION_STRING " " LLVM_REVISION).
/// Consumers should rebuild the symbol table from IR if the producer's
@@ -148,6 +148,9 @@ struct Header {
/// COFF-specific: linker directives.
Str COFFLinkerOpts;
+
+ /// Dependent Library Specifiers
+ Range<Str> DependentLibraries;
};
} // end namespace storage
@@ -232,6 +235,7 @@ class Reader {
ArrayRef<storage::Comdat> Comdats;
ArrayRef<storage::Symbol> Symbols;
ArrayRef<storage::Uncommon> Uncommons;
+ ArrayRef<storage::Str> DependentLibraries;
StringRef str(storage::Str S) const { return S.get(Strtab); }
@@ -252,6 +256,7 @@ public:
Comdats = range(header().Comdats);
Symbols = range(header().Symbols);
Uncommons = range(header().Uncommons);
+ DependentLibraries = range(header().DependentLibraries);
}
using symbol_range = iterator_range<object::content_iterator<SymbolRef>>;
@@ -284,6 +289,16 @@ public:
/// COFF-specific: returns linker options specified in the input file.
StringRef getCOFFLinkerOpts() const { return str(header().COFFLinkerOpts); }
+
+ /// Returns dependent library specifiers
+ std::vector<StringRef> getDependentLibraries() const {
+ std::vector<StringRef> Specifiers;
+ Specifiers.reserve(DependentLibraries.size());
+ for (auto S : DependentLibraries) {
+ Specifiers.push_back(str(S));
+ }
+ return Specifiers;
+ }
};
/// Ephemeral symbols produced by Reader::symbols() and
diff --git a/include/llvm/Object/MachO.h b/include/llvm/Object/MachO.h
index c2f4f4062934..ca9512f21706 100644
--- a/include/llvm/Object/MachO.h
+++ b/include/llvm/Object/MachO.h
@@ -1,9 +1,8 @@
//===- MachO.h - MachO object file implementation ---------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -134,11 +133,9 @@ public:
BindRebaseSegInfo(const MachOObjectFile *Obj);
// Used to check a Mach-O Bind or Rebase entry for errors when iterating.
- const char *checkSegAndOffset(int32_t SegIndex, uint64_t SegOffset,
- bool endInvalid);
- const char *checkCountAndSkip(uint32_t Count, uint32_t Skip,
- uint8_t PointerSize, int32_t SegIndex,
- uint64_t SegOffset);
+ const char* checkSegAndOffsets(int32_t SegIndex, uint64_t SegOffset,
+ uint8_t PointerSize, uint32_t Count=1,
+ uint32_t Skip=0);
// Used with valid SegIndex/SegOffset values from checked entries.
StringRef segmentName(int32_t SegIndex);
StringRef sectionName(int32_t SegIndex, uint64_t SegOffset);
@@ -296,13 +293,12 @@ public:
unsigned getSectionID(SectionRef Sec) const;
void moveSectionNext(DataRefImpl &Sec) const override;
- std::error_code getSectionName(DataRefImpl Sec,
- StringRef &Res) const override;
+ Expected<StringRef> getSectionName(DataRefImpl Sec) const override;
uint64_t getSectionAddress(DataRefImpl Sec) const override;
uint64_t getSectionIndex(DataRefImpl Sec) const override;
uint64_t getSectionSize(DataRefImpl Sec) const override;
- std::error_code getSectionContents(DataRefImpl Sec,
- StringRef &Res) const override;
+ Expected<ArrayRef<uint8_t>>
+ getSectionContents(DataRefImpl Sec) const override;
uint64_t getSectionAlignment(DataRefImpl Sec) const override;
Expected<SectionRef> getSection(unsigned SectionIndex) const;
Expected<SectionRef> getSection(StringRef SectionName) const;
@@ -413,36 +409,32 @@ public:
bool is64,
MachOBindEntry::Kind);
- /// For use with a SegIndex,SegOffset pair in MachOBindEntry::moveNext() to
- /// validate a MachOBindEntry.
- const char *BindEntryCheckSegAndOffset(int32_t SegIndex, uint64_t SegOffset,
- bool endInvalid) const {
- return BindRebaseSectionTable->checkSegAndOffset(SegIndex, SegOffset,
- endInvalid);
- }
- /// For use in MachOBindEntry::moveNext() to validate a MachOBindEntry for
- /// the BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB opcode.
- const char *BindEntryCheckCountAndSkip(uint32_t Count, uint32_t Skip,
- uint8_t PointerSize, int32_t SegIndex,
- uint64_t SegOffset) const {
- return BindRebaseSectionTable->checkCountAndSkip(Count, Skip, PointerSize,
- SegIndex, SegOffset);
+ // Given a SegIndex, SegOffset, and PointerSize, verify a valid section exists
+ // that fully contains a pointer at that location. Multiple fixups in a bind
+ // (such as with the BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB opcode) can
+ // be tested via the Count and Skip parameters.
+ //
+ // This is used by MachOBindEntry::moveNext() to validate a MachOBindEntry.
+ const char *BindEntryCheckSegAndOffsets(int32_t SegIndex, uint64_t SegOffset,
+ uint8_t PointerSize, uint32_t Count=1,
+ uint32_t Skip=0) const {
+ return BindRebaseSectionTable->checkSegAndOffsets(SegIndex, SegOffset,
+ PointerSize, Count, Skip);
}
- /// For use with a SegIndex,SegOffset pair in MachORebaseEntry::moveNext() to
- /// validate a MachORebaseEntry.
- const char *RebaseEntryCheckSegAndOffset(int32_t SegIndex, uint64_t SegOffset,
- bool endInvalid) const {
- return BindRebaseSectionTable->checkSegAndOffset(SegIndex, SegOffset,
- endInvalid);
- }
- /// For use in MachORebaseEntry::moveNext() to validate a MachORebaseEntry for
- /// the REBASE_OPCODE_DO_*_TIMES* opcodes.
- const char *RebaseEntryCheckCountAndSkip(uint32_t Count, uint32_t Skip,
- uint8_t PointerSize, int32_t SegIndex,
- uint64_t SegOffset) const {
- return BindRebaseSectionTable->checkCountAndSkip(Count, Skip, PointerSize,
- SegIndex, SegOffset);
+ // Given a SegIndex, SegOffset, and PointerSize, verify a valid section exists
+ // that fully contains a pointer at that location. Multiple fixups in a rebase
+ // (such as with the REBASE_OPCODE_DO_*_TIMES* opcodes) can be tested via the
+ // Count and Skip parameters.
+ //
+ // This is used by MachORebaseEntry::moveNext() to validate a MachORebaseEntry
+ const char *RebaseEntryCheckSegAndOffsets(int32_t SegIndex,
+ uint64_t SegOffset,
+ uint8_t PointerSize,
+ uint32_t Count=1,
+ uint32_t Skip=0) const {
+ return BindRebaseSectionTable->checkSegAndOffsets(SegIndex, SegOffset,
+ PointerSize, Count, Skip);
}
/// For use with the SegIndex of a checked Mach-O Bind or Rebase entry to
@@ -579,6 +571,7 @@ public:
const char **McpuDefault = nullptr,
const char **ArchFlag = nullptr);
static bool isValidArch(StringRef ArchFlag);
+ static ArrayRef<StringRef> getValidArchs();
static Triple getHostArch();
bool isRelocatableObject() const override;
@@ -616,6 +609,7 @@ public:
case MachO::PLATFORM_TVOS: return "tvos";
case MachO::PLATFORM_WATCHOS: return "watchos";
case MachO::PLATFORM_BRIDGEOS: return "bridgeos";
+ case MachO::PLATFORM_MACCATALYST: return "macCatalyst";
case MachO::PLATFORM_IOSSIMULATOR: return "iossimulator";
case MachO::PLATFORM_TVOSSIMULATOR: return "tvossimulator";
case MachO::PLATFORM_WATCHOSSIMULATOR: return "watchossimulator";
diff --git a/include/llvm/Object/MachOUniversal.h b/include/llvm/Object/MachOUniversal.h
index 9e70b0bc30c0..5bf724f2c8b2 100644
--- a/include/llvm/Object/MachOUniversal.h
+++ b/include/llvm/Object/MachOUniversal.h
@@ -1,9 +1,8 @@
//===- MachOUniversal.h - Mach-O universal binaries -------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Object/Minidump.h b/include/llvm/Object/Minidump.h
new file mode 100644
index 000000000000..470008d552e7
--- /dev/null
+++ b/include/llvm/Object/Minidump.h
@@ -0,0 +1,165 @@
+//===- Minidump.h - Minidump object file implementation ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_OBJECT_MINIDUMP_H
+#define LLVM_OBJECT_MINIDUMP_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/BinaryFormat/Minidump.h"
+#include "llvm/Object/Binary.h"
+#include "llvm/Support/Error.h"
+
+namespace llvm {
+namespace object {
+
+/// A class providing access to the contents of a minidump file.
+class MinidumpFile : public Binary {
+public:
+ /// Construct a new MinidumpFile object from the given memory buffer. Returns
+ /// an error if this file cannot be identified as a minidump file, or if its
+ /// contents are badly corrupted (i.e. we cannot read the stream directory).
+ static Expected<std::unique_ptr<MinidumpFile>> create(MemoryBufferRef Source);
+
+ static bool classof(const Binary *B) { return B->isMinidump(); }
+
+ /// Returns the contents of the minidump header.
+ const minidump::Header &header() const { return Header; }
+
+ /// Returns the list of streams (stream directory entries) in this file.
+ ArrayRef<minidump::Directory> streams() const { return Streams; }
+
+ /// Returns the raw contents of the stream given by the directory entry.
+ ArrayRef<uint8_t> getRawStream(const minidump::Directory &Stream) const {
+ return getData().slice(Stream.Location.RVA, Stream.Location.DataSize);
+ }
+
+ /// Returns the raw contents of the stream of the given type, or None if the
+ /// file does not contain a stream of this type.
+ Optional<ArrayRef<uint8_t>> getRawStream(minidump::StreamType Type) const;
+
+ /// Returns the raw contents of an object given by the LocationDescriptor. An
+ /// error is returned if the descriptor points outside of the minidump file.
+ Expected<ArrayRef<uint8_t>>
+ getRawData(minidump::LocationDescriptor Desc) const {
+ return getDataSlice(getData(), Desc.RVA, Desc.DataSize);
+ }
+
+ /// Returns the minidump string at the given offset. An error is returned if
+ /// we fail to parse the string, or the string is invalid UTF16.
+ Expected<std::string> getString(size_t Offset) const;
+
+ /// Returns the contents of the SystemInfo stream, cast to the appropriate
+ /// type. An error is returned if the file does not contain this stream, or
+ /// the stream is smaller than the size of the SystemInfo structure. The
+ /// internal consistency of the stream is not checked in any way.
+ Expected<const minidump::SystemInfo &> getSystemInfo() const {
+ return getStream<minidump::SystemInfo>(minidump::StreamType::SystemInfo);
+ }
+
+ /// Returns the module list embedded in the ModuleList stream. An error is
+ /// returned if the file does not contain this stream, or if the stream is
+ /// not large enough to contain the number of modules declared in the stream
+ /// header. The consistency of the Module entries themselves is not checked in
+ /// any way.
+ Expected<ArrayRef<minidump::Module>> getModuleList() const {
+ return getListStream<minidump::Module>(minidump::StreamType::ModuleList);
+ }
+
+ /// Returns the thread list embedded in the ThreadList stream. An error is
+ /// returned if the file does not contain this stream, or if the stream is
+ /// not large enough to contain the number of threads declared in the stream
+ /// header. The consistency of the Thread entries themselves is not checked in
+ /// any way.
+ Expected<ArrayRef<minidump::Thread>> getThreadList() const {
+ return getListStream<minidump::Thread>(minidump::StreamType::ThreadList);
+ }
+
+ /// Returns the list of memory ranges embedded in the MemoryList stream. An
+ /// error is returned if the file does not contain this stream, or if the
+ /// stream is not large enough to contain the number of memory descriptors
+ /// declared in the stream header. The consistency of the MemoryDescriptor
+ /// entries themselves is not checked in any way.
+ Expected<ArrayRef<minidump::MemoryDescriptor>> getMemoryList() const {
+ return getListStream<minidump::MemoryDescriptor>(
+ minidump::StreamType::MemoryList);
+ }
+
+private:
+ static Error createError(StringRef Str) {
+ return make_error<GenericBinaryError>(Str, object_error::parse_failed);
+ }
+
+ static Error createEOFError() {
+ return make_error<GenericBinaryError>("Unexpected EOF",
+ object_error::unexpected_eof);
+ }
+
+ /// Return a slice of the given data array, with bounds checking.
+ static Expected<ArrayRef<uint8_t>> getDataSlice(ArrayRef<uint8_t> Data,
+ size_t Offset, size_t Size);
+
+ /// Return the slice of the given data array as an array of objects of the
+ /// given type. The function checks that the input array is large enough to
+ /// contain the correct number of objects of the given type.
+ template <typename T>
+ static Expected<ArrayRef<T>> getDataSliceAs(ArrayRef<uint8_t> Data,
+ size_t Offset, size_t Count);
+
+ MinidumpFile(MemoryBufferRef Source, const minidump::Header &Header,
+ ArrayRef<minidump::Directory> Streams,
+ DenseMap<minidump::StreamType, std::size_t> StreamMap)
+ : Binary(ID_Minidump, Source), Header(Header), Streams(Streams),
+ StreamMap(std::move(StreamMap)) {}
+
+ ArrayRef<uint8_t> getData() const {
+ return arrayRefFromStringRef(Data.getBuffer());
+ }
+
+ /// Return the stream of the given type, cast to the appropriate type. Checks
+ /// that the stream is large enough to hold an object of this type.
+ template <typename T>
+ Expected<const T &> getStream(minidump::StreamType Stream) const;
+
+ /// Return the contents of a stream which contains a list of fixed-size items,
+ /// prefixed by the list size.
+ template <typename T>
+ Expected<ArrayRef<T>> getListStream(minidump::StreamType Stream) const;
+
+ const minidump::Header &Header;
+ ArrayRef<minidump::Directory> Streams;
+ DenseMap<minidump::StreamType, std::size_t> StreamMap;
+};
+
+template <typename T>
+Expected<const T &> MinidumpFile::getStream(minidump::StreamType Stream) const {
+ if (auto OptionalStream = getRawStream(Stream)) {
+ if (OptionalStream->size() >= sizeof(T))
+ return *reinterpret_cast<const T *>(OptionalStream->data());
+ return createEOFError();
+ }
+ return createError("No such stream");
+}
+
+template <typename T>
+Expected<ArrayRef<T>> MinidumpFile::getDataSliceAs(ArrayRef<uint8_t> Data,
+ size_t Offset,
+ size_t Count) {
+ // Check for overflow.
+ if (Count > std::numeric_limits<size_t>::max() / sizeof(T))
+ return createEOFError();
+ auto ExpectedArray = getDataSlice(Data, Offset, sizeof(T) * Count);
+ if (!ExpectedArray)
+ return ExpectedArray.takeError();
+ return ArrayRef<T>(reinterpret_cast<const T *>(ExpectedArray->data()), Count);
+}
+
+} // end namespace object
+} // end namespace llvm
+
+#endif // LLVM_OBJECT_MINIDUMP_H
diff --git a/include/llvm/Object/ModuleSymbolTable.h b/include/llvm/Object/ModuleSymbolTable.h
index c3cbc27998e5..4c582fbcda81 100644
--- a/include/llvm/Object/ModuleSymbolTable.h
+++ b/include/llvm/Object/ModuleSymbolTable.h
@@ -1,9 +1,8 @@
//===- ModuleSymbolTable.h - symbol table for in-memory IR ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Object/ObjectFile.h b/include/llvm/Object/ObjectFile.h
index 036c99cb6baf..483a3486bd72 100644
--- a/include/llvm/Object/ObjectFile.h
+++ b/include/llvm/Object/ObjectFile.h
@@ -1,9 +1,8 @@
//===- ObjectFile.h - File format independent object file -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -14,6 +13,7 @@
#ifndef LLVM_OBJECT_OBJECTFILE_H
#define LLVM_OBJECT_OBJECTFILE_H
+#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/iterator_range.h"
@@ -98,7 +98,7 @@ public:
uint64_t getAddress() const;
uint64_t getIndex() const;
uint64_t getSize() const;
- std::error_code getContents(StringRef &Result) const;
+ Expected<StringRef> getContents() const;
/// Get the alignment of this section as the actual value (not log 2).
uint64_t getAlignment() const;
@@ -136,6 +136,30 @@ public:
const ObjectFile *getObject() const;
};
+struct SectionedAddress {
+ // TODO: constructors could be removed when C++14 would be adopted.
+ SectionedAddress() {}
+ SectionedAddress(uint64_t Addr, uint64_t SectIdx)
+ : Address(Addr), SectionIndex(SectIdx) {}
+
+ const static uint64_t UndefSection = UINT64_MAX;
+
+ uint64_t Address = 0;
+ uint64_t SectionIndex = UndefSection;
+};
+
+inline bool operator<(const SectionedAddress &LHS,
+ const SectionedAddress &RHS) {
+ return std::tie(LHS.SectionIndex, LHS.Address) <
+ std::tie(RHS.SectionIndex, RHS.Address);
+}
+
+inline bool operator==(const SectionedAddress &LHS,
+ const SectionedAddress &RHS) {
+ return std::tie(LHS.SectionIndex, LHS.Address) ==
+ std::tie(RHS.SectionIndex, RHS.Address);
+}
+
/// This is a value type class that represents a single symbol in the list of
/// symbols in the object file.
class SymbolRef : public BasicSymbolRef {
@@ -220,7 +244,7 @@ protected:
friend class SymbolRef;
virtual Expected<StringRef> getSymbolName(DataRefImpl Symb) const = 0;
- std::error_code printSymbolName(raw_ostream &OS,
+ Error printSymbolName(raw_ostream &OS,
DataRefImpl Symb) const override;
virtual Expected<uint64_t> getSymbolAddress(DataRefImpl Symb) const = 0;
virtual uint64_t getSymbolValueImpl(DataRefImpl Symb) const = 0;
@@ -234,13 +258,12 @@ protected:
friend class SectionRef;
virtual void moveSectionNext(DataRefImpl &Sec) const = 0;
- virtual std::error_code getSectionName(DataRefImpl Sec,
- StringRef &Res) const = 0;
+ virtual Expected<StringRef> getSectionName(DataRefImpl Sec) const = 0;
virtual uint64_t getSectionAddress(DataRefImpl Sec) const = 0;
virtual uint64_t getSectionIndex(DataRefImpl Sec) const = 0;
virtual uint64_t getSectionSize(DataRefImpl Sec) const = 0;
- virtual std::error_code getSectionContents(DataRefImpl Sec,
- StringRef &Res) const = 0;
+ virtual Expected<ArrayRef<uint8_t>>
+ getSectionContents(DataRefImpl Sec) const = 0;
virtual uint64_t getSectionAlignment(DataRefImpl Sec) const = 0;
virtual bool isSectionCompressed(DataRefImpl Sec) const = 0;
virtual bool isSectionText(DataRefImpl Sec) const = 0;
@@ -308,11 +331,6 @@ public:
/// Create a triple from the data in this object file.
Triple makeTriple() const;
- virtual std::error_code
- getBuildAttributes(ARMAttributeParser &Attributes) const {
- return std::error_code();
- }
-
/// Maps a debug section name to a standard DWARF section name.
virtual StringRef mapDebugSectionName(StringRef Name) const { return Name; }
@@ -341,6 +359,9 @@ public:
createCOFFObjectFile(MemoryBufferRef Object);
static Expected<std::unique_ptr<ObjectFile>>
+ createXCOFFObjectFile(MemoryBufferRef Object, unsigned FileType);
+
+ static Expected<std::unique_ptr<ObjectFile>>
createELFObjectFile(MemoryBufferRef Object);
static Expected<std::unique_ptr<MachOObjectFile>>
@@ -396,14 +417,16 @@ inline SectionRef::SectionRef(DataRefImpl SectionP,
, OwningObject(Owner) {}
inline bool SectionRef::operator==(const SectionRef &Other) const {
- return SectionPimpl == Other.SectionPimpl;
+ return OwningObject == Other.OwningObject &&
+ SectionPimpl == Other.SectionPimpl;
}
inline bool SectionRef::operator!=(const SectionRef &Other) const {
- return SectionPimpl != Other.SectionPimpl;
+ return !(*this == Other);
}
inline bool SectionRef::operator<(const SectionRef &Other) const {
+ assert(OwningObject == Other.OwningObject);
return SectionPimpl < Other.SectionPimpl;
}
@@ -412,7 +435,11 @@ inline void SectionRef::moveNext() {
}
inline std::error_code SectionRef::getName(StringRef &Result) const {
- return OwningObject->getSectionName(SectionPimpl, Result);
+ Expected<StringRef> NameOrErr = OwningObject->getSectionName(SectionPimpl);
+ if (!NameOrErr)
+ return errorToErrorCode(NameOrErr.takeError());
+ Result = *NameOrErr;
+ return std::error_code();
}
inline uint64_t SectionRef::getAddress() const {
@@ -427,8 +454,12 @@ inline uint64_t SectionRef::getSize() const {
return OwningObject->getSectionSize(SectionPimpl);
}
-inline std::error_code SectionRef::getContents(StringRef &Result) const {
- return OwningObject->getSectionContents(SectionPimpl, Result);
+inline Expected<StringRef> SectionRef::getContents() const {
+ Expected<ArrayRef<uint8_t>> Res =
+ OwningObject->getSectionContents(SectionPimpl);
+ if (!Res)
+ return Res.takeError();
+ return StringRef(reinterpret_cast<const char *>(Res->data()), Res->size());
}
inline uint64_t SectionRef::getAlignment() const {
@@ -531,6 +562,25 @@ inline const ObjectFile *RelocationRef::getObject() const {
} // end namespace object
+template <> struct DenseMapInfo<object::SectionRef> {
+ static bool isEqual(const object::SectionRef &A,
+ const object::SectionRef &B) {
+ return A == B;
+ }
+ static object::SectionRef getEmptyKey() {
+ return object::SectionRef({}, nullptr);
+ }
+ static object::SectionRef getTombstoneKey() {
+ object::DataRefImpl TS;
+ TS.p = (uintptr_t)-1;
+ return object::SectionRef(TS, nullptr);
+ }
+ static unsigned getHashValue(const object::SectionRef &Sec) {
+ object::DataRefImpl Raw = Sec.getRawDataRefImpl();
+ return hash_combine(Raw.p, Raw.d.a, Raw.d.b);
+ }
+};
+
} // end namespace llvm
#endif // LLVM_OBJECT_OBJECTFILE_H
diff --git a/include/llvm/Object/RelocVisitor.h b/include/llvm/Object/RelocVisitor.h
deleted file mode 100644
index 9a978de2e599..000000000000
--- a/include/llvm/Object/RelocVisitor.h
+++ /dev/null
@@ -1,351 +0,0 @@
-//===- RelocVisitor.h - Visitor for object file relocations -----*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file provides a wrapper around all the different types of relocations
-// in different file formats, such that a client can handle them in a unified
-// manner by only implementing a minimal number of functions.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_OBJECT_RELOCVISITOR_H
-#define LLVM_OBJECT_RELOCVISITOR_H
-
-#include "llvm/ADT/Triple.h"
-#include "llvm/BinaryFormat/ELF.h"
-#include "llvm/BinaryFormat/MachO.h"
-#include "llvm/Object/COFF.h"
-#include "llvm/Object/ELFObjectFile.h"
-#include "llvm/Object/MachO.h"
-#include "llvm/Object/ObjectFile.h"
-#include "llvm/Object/Wasm.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/ErrorHandling.h"
-#include <cstdint>
-#include <system_error>
-
-namespace llvm {
-namespace object {
-
-/// Base class for object file relocation visitors.
-class RelocVisitor {
-public:
- explicit RelocVisitor(const ObjectFile &Obj) : ObjToVisit(Obj) {}
-
- // TODO: Should handle multiple applied relocations via either passing in the
- // previously computed value or just count paired relocations as a single
- // visit.
- uint64_t visit(uint32_t Rel, RelocationRef R, uint64_t Value = 0) {
- if (isa<ELFObjectFileBase>(ObjToVisit))
- return visitELF(Rel, R, Value);
- if (isa<COFFObjectFile>(ObjToVisit))
- return visitCOFF(Rel, R, Value);
- if (isa<MachOObjectFile>(ObjToVisit))
- return visitMachO(Rel, R, Value);
- if (isa<WasmObjectFile>(ObjToVisit))
- return visitWasm(Rel, R, Value);
-
- HasError = true;
- return 0;
- }
-
- bool error() { return HasError; }
-
-private:
- const ObjectFile &ObjToVisit;
- bool HasError = false;
-
- uint64_t visitELF(uint32_t Rel, RelocationRef R, uint64_t Value) {
- if (ObjToVisit.getBytesInAddress() == 8) { // 64-bit object file
- switch (ObjToVisit.getArch()) {
- case Triple::x86_64:
- return visitX86_64(Rel, R, Value);
- case Triple::aarch64:
- case Triple::aarch64_be:
- return visitAarch64(Rel, R, Value);
- case Triple::bpfel:
- case Triple::bpfeb:
- return visitBpf(Rel, R, Value);
- case Triple::mips64el:
- case Triple::mips64:
- return visitMips64(Rel, R, Value);
- case Triple::ppc64le:
- case Triple::ppc64:
- return visitPPC64(Rel, R, Value);
- case Triple::systemz:
- return visitSystemz(Rel, R, Value);
- case Triple::sparcv9:
- return visitSparc64(Rel, R, Value);
- case Triple::amdgcn:
- return visitAmdgpu(Rel, R, Value);
- default:
- HasError = true;
- return 0;
- }
- }
-
- // 32-bit object file
- assert(ObjToVisit.getBytesInAddress() == 4 &&
- "Invalid word size in object file");
-
- switch (ObjToVisit.getArch()) {
- case Triple::x86:
- return visitX86(Rel, R, Value);
- case Triple::ppc:
- return visitPPC32(Rel, R, Value);
- case Triple::arm:
- case Triple::armeb:
- return visitARM(Rel, R, Value);
- case Triple::lanai:
- return visitLanai(Rel, R, Value);
- case Triple::mipsel:
- case Triple::mips:
- return visitMips32(Rel, R, Value);
- case Triple::sparc:
- return visitSparc32(Rel, R, Value);
- case Triple::hexagon:
- return visitHexagon(Rel, R, Value);
- default:
- HasError = true;
- return 0;
- }
- }
-
- int64_t getELFAddend(RelocationRef R) {
- Expected<int64_t> AddendOrErr = ELFRelocationRef(R).getAddend();
- handleAllErrors(AddendOrErr.takeError(), [](const ErrorInfoBase &EI) {
- report_fatal_error(EI.message());
- });
- return *AddendOrErr;
- }
-
- uint64_t visitX86_64(uint32_t Rel, RelocationRef R, uint64_t Value) {
- switch (Rel) {
- case ELF::R_X86_64_NONE:
- return 0;
- case ELF::R_X86_64_64:
- case ELF::R_X86_64_DTPOFF32:
- case ELF::R_X86_64_DTPOFF64:
- return Value + getELFAddend(R);
- case ELF::R_X86_64_PC32:
- return Value + getELFAddend(R) - R.getOffset();
- case ELF::R_X86_64_32:
- case ELF::R_X86_64_32S:
- return (Value + getELFAddend(R)) & 0xFFFFFFFF;
- }
- HasError = true;
- return 0;
- }
-
- uint64_t visitAarch64(uint32_t Rel, RelocationRef R, uint64_t Value) {
- switch (Rel) {
- case ELF::R_AARCH64_ABS32: {
- int64_t Res = Value + getELFAddend(R);
- if (Res < INT32_MIN || Res > UINT32_MAX)
- HasError = true;
- return static_cast<uint32_t>(Res);
- }
- case ELF::R_AARCH64_ABS64:
- return Value + getELFAddend(R);
- }
- HasError = true;
- return 0;
- }
-
- uint64_t visitBpf(uint32_t Rel, RelocationRef R, uint64_t Value) {
- switch (Rel) {
- case ELF::R_BPF_64_32:
- return Value & 0xFFFFFFFF;
- case ELF::R_BPF_64_64:
- return Value;
- }
- HasError = true;
- return 0;
- }
-
- uint64_t visitMips64(uint32_t Rel, RelocationRef R, uint64_t Value) {
- switch (Rel) {
- case ELF::R_MIPS_32:
- return (Value + getELFAddend(R)) & 0xFFFFFFFF;
- case ELF::R_MIPS_64:
- return Value + getELFAddend(R);
- case ELF::R_MIPS_TLS_DTPREL64:
- return Value + getELFAddend(R) - 0x8000;
- }
- HasError = true;
- return 0;
- }
-
- uint64_t visitPPC64(uint32_t Rel, RelocationRef R, uint64_t Value) {
- switch (Rel) {
- case ELF::R_PPC64_ADDR32:
- return (Value + getELFAddend(R)) & 0xFFFFFFFF;
- case ELF::R_PPC64_ADDR64:
- return Value + getELFAddend(R);
- }
- HasError = true;
- return 0;
- }
-
- uint64_t visitSystemz(uint32_t Rel, RelocationRef R, uint64_t Value) {
- switch (Rel) {
- case ELF::R_390_32: {
- int64_t Res = Value + getELFAddend(R);
- if (Res < INT32_MIN || Res > UINT32_MAX)
- HasError = true;
- return static_cast<uint32_t>(Res);
- }
- case ELF::R_390_64:
- return Value + getELFAddend(R);
- }
- HasError = true;
- return 0;
- }
-
- uint64_t visitSparc64(uint32_t Rel, RelocationRef R, uint64_t Value) {
- switch (Rel) {
- case ELF::R_SPARC_32:
- case ELF::R_SPARC_64:
- case ELF::R_SPARC_UA32:
- case ELF::R_SPARC_UA64:
- return Value + getELFAddend(R);
- }
- HasError = true;
- return 0;
- }
-
- uint64_t visitAmdgpu(uint32_t Rel, RelocationRef R, uint64_t Value) {
- switch (Rel) {
- case ELF::R_AMDGPU_ABS32:
- case ELF::R_AMDGPU_ABS64:
- return Value + getELFAddend(R);
- }
- HasError = true;
- return 0;
- }
-
- uint64_t visitX86(uint32_t Rel, RelocationRef R, uint64_t Value) {
- switch (Rel) {
- case ELF::R_386_NONE:
- return 0;
- case ELF::R_386_32:
- return Value;
- case ELF::R_386_PC32:
- return Value - R.getOffset();
- }
- HasError = true;
- return 0;
- }
-
- uint64_t visitPPC32(uint32_t Rel, RelocationRef R, uint64_t Value) {
- if (Rel == ELF::R_PPC_ADDR32)
- return (Value + getELFAddend(R)) & 0xFFFFFFFF;
- HasError = true;
- return 0;
- }
-
- uint64_t visitARM(uint32_t Rel, RelocationRef R, uint64_t Value) {
- if (Rel == ELF::R_ARM_ABS32) {
- if ((int64_t)Value < INT32_MIN || (int64_t)Value > UINT32_MAX)
- HasError = true;
- return static_cast<uint32_t>(Value);
- }
- HasError = true;
- return 0;
- }
-
- uint64_t visitLanai(uint32_t Rel, RelocationRef R, uint64_t Value) {
- if (Rel == ELF::R_LANAI_32)
- return (Value + getELFAddend(R)) & 0xFFFFFFFF;
- HasError = true;
- return 0;
- }
-
- uint64_t visitMips32(uint32_t Rel, RelocationRef R, uint64_t Value) {
- // FIXME: Take in account implicit addends to get correct results.
- if (Rel == ELF::R_MIPS_32)
- return Value & 0xFFFFFFFF;
- if (Rel == ELF::R_MIPS_TLS_DTPREL32)
- return Value & 0xFFFFFFFF;
- HasError = true;
- return 0;
- }
-
- uint64_t visitSparc32(uint32_t Rel, RelocationRef R, uint64_t Value) {
- if (Rel == ELF::R_SPARC_32 || Rel == ELF::R_SPARC_UA32)
- return Value + getELFAddend(R);
- HasError = true;
- return 0;
- }
-
- uint64_t visitHexagon(uint32_t Rel, RelocationRef R, uint64_t Value) {
- if (Rel == ELF::R_HEX_32)
- return Value + getELFAddend(R);
- HasError = true;
- return 0;
- }
-
- uint64_t visitCOFF(uint32_t Rel, RelocationRef R, uint64_t Value) {
- switch (ObjToVisit.getArch()) {
- case Triple::x86:
- switch (Rel) {
- case COFF::IMAGE_REL_I386_SECREL:
- case COFF::IMAGE_REL_I386_DIR32:
- return static_cast<uint32_t>(Value);
- }
- break;
- case Triple::x86_64:
- switch (Rel) {
- case COFF::IMAGE_REL_AMD64_SECREL:
- return static_cast<uint32_t>(Value);
- case COFF::IMAGE_REL_AMD64_ADDR64:
- return Value;
- }
- break;
- default:
- break;
- }
- HasError = true;
- return 0;
- }
-
- uint64_t visitMachO(uint32_t Rel, RelocationRef R, uint64_t Value) {
- if (ObjToVisit.getArch() == Triple::x86_64 &&
- Rel == MachO::X86_64_RELOC_UNSIGNED)
- return Value;
- HasError = true;
- return 0;
- }
-
- uint64_t visitWasm(uint32_t Rel, RelocationRef R, uint64_t Value) {
- if (ObjToVisit.getArch() == Triple::wasm32) {
- switch (Rel) {
- case wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB:
- case wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB:
- case wasm::R_WEBASSEMBLY_TABLE_INDEX_I32:
- case wasm::R_WEBASSEMBLY_MEMORY_ADDR_LEB:
- case wasm::R_WEBASSEMBLY_MEMORY_ADDR_SLEB:
- case wasm::R_WEBASSEMBLY_MEMORY_ADDR_I32:
- case wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB:
- case wasm::R_WEBASSEMBLY_GLOBAL_INDEX_LEB:
- case wasm::R_WEBASSEMBLY_FUNCTION_OFFSET_I32:
- case wasm::R_WEBASSEMBLY_SECTION_OFFSET_I32:
- case wasm::R_WEBASSEMBLY_EVENT_INDEX_LEB:
- // For wasm section, its offset at 0 -- ignoring Value
- return 0;
- }
- }
- HasError = true;
- return 0;
- }
-};
-
-} // end namespace object
-} // end namespace llvm
-
-#endif // LLVM_OBJECT_RELOCVISITOR_H
diff --git a/include/llvm/Object/RelocationResolver.h b/include/llvm/Object/RelocationResolver.h
new file mode 100644
index 000000000000..1246dcc5ec73
--- /dev/null
+++ b/include/llvm/Object/RelocationResolver.h
@@ -0,0 +1,42 @@
+//===- RelocVisitor.h - Visitor for object file relocations -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides a wrapper around all the different types of relocations
+// in different file formats, such that a client can handle them in a unified
+// manner by only implementing a minimal number of functions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_OBJECT_RELOCVISITOR_H
+#define LLVM_OBJECT_RELOCVISITOR_H
+
+#include "llvm/ADT/Triple.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/BinaryFormat/MachO.h"
+#include "llvm/Object/COFF.h"
+#include "llvm/Object/ELFObjectFile.h"
+#include "llvm/Object/MachO.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Object/Wasm.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cstdint>
+#include <system_error>
+
+namespace llvm {
+namespace object {
+
+using RelocationResolver = uint64_t (*)(RelocationRef R, uint64_t S, uint64_t A);
+
+std::pair<bool (*)(uint64_t), RelocationResolver>
+getRelocationResolver(const ObjectFile &Obj);
+
+} // end namespace object
+} // end namespace llvm
+
+#endif // LLVM_OBJECT_RELOCVISITOR_H
diff --git a/include/llvm/Object/StackMapParser.h b/include/llvm/Object/StackMapParser.h
index 557db5afa825..ed44efbf80b9 100644
--- a/include/llvm/Object/StackMapParser.h
+++ b/include/llvm/Object/StackMapParser.h
@@ -1,9 +1,8 @@
//===- StackMapParser.h - StackMap Parsing Support --------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -20,8 +19,9 @@
namespace llvm {
+/// A parser for the latest stackmap format. At the moment, latest=V2.
template <support::endianness Endianness>
-class StackMapV2Parser {
+class StackMapParser {
public:
template <typename AccessorT>
class AccessorIterator {
@@ -50,7 +50,7 @@ public:
/// Accessor for function records.
class FunctionAccessor {
- friend class StackMapV2Parser;
+ friend class StackMapParser;
public:
/// Get the function address.
@@ -82,7 +82,7 @@ public:
/// Accessor for constants.
class ConstantAccessor {
- friend class StackMapV2Parser;
+ friend class StackMapParser;
public:
/// Return the value of this constant.
@@ -106,7 +106,7 @@ public:
/// Accessor for location records.
class LocationAccessor {
- friend class StackMapV2Parser;
+ friend class StackMapParser;
friend class RecordAccessor;
public:
@@ -115,6 +115,12 @@ public:
return LocationKind(P[KindOffset]);
}
+ /// Get the Size for this location.
+ unsigned getSizeInBytes() const {
+ return read<uint16_t>(P + SizeOffset);
+
+ }
+
/// Get the Dwarf register number for this location.
uint16_t getDwarfRegNum() const {
return read<uint16_t>(P + DwarfRegNumOffset);
@@ -149,16 +155,17 @@ public:
}
static const int KindOffset = 0;
- static const int DwarfRegNumOffset = KindOffset + sizeof(uint16_t);
- static const int SmallConstantOffset = DwarfRegNumOffset + sizeof(uint16_t);
- static const int LocationAccessorSize = sizeof(uint64_t);
+ static const int SizeOffset = KindOffset + sizeof(uint16_t);
+ static const int DwarfRegNumOffset = SizeOffset + sizeof(uint16_t);
+ static const int SmallConstantOffset = DwarfRegNumOffset + sizeof(uint32_t);
+ static const int LocationAccessorSize = sizeof(uint64_t) + sizeof(uint32_t);
const uint8_t *P;
};
/// Accessor for stackmap live-out fields.
class LiveOutAccessor {
- friend class StackMapV2Parser;
+ friend class StackMapParser;
friend class RecordAccessor;
public:
@@ -189,7 +196,7 @@ public:
/// Accessor for stackmap records.
class RecordAccessor {
- friend class StackMapV2Parser;
+ friend class StackMapParser;
public:
using location_iterator = AccessorIterator<LocationAccessor>;
@@ -264,8 +271,9 @@ public:
RecordAccessor(const uint8_t *P) : P(P) {}
unsigned getNumLiveOutsOffset() const {
- return LocationListOffset + LocationSize * getNumLocations() +
- sizeof(uint16_t);
+ unsigned LocOffset =
+ ((LocationListOffset + LocationSize * getNumLocations()) + 7) & ~0x7;
+ return LocOffset + sizeof(uint16_t);
}
unsigned getSizeInBytes() const {
@@ -285,7 +293,7 @@ public:
InstructionOffsetOffset + sizeof(uint32_t) + sizeof(uint16_t);
static const unsigned LocationListOffset =
NumLocationsOffset + sizeof(uint16_t);
- static const unsigned LocationSize = sizeof(uint64_t);
+ static const unsigned LocationSize = sizeof(uint64_t) + sizeof(uint32_t);
static const unsigned LiveOutSize = sizeof(uint32_t);
const uint8_t *P;
@@ -293,12 +301,12 @@ public:
/// Construct a parser for a version-2 stackmap. StackMap data will be read
/// from the given array.
- StackMapV2Parser(ArrayRef<uint8_t> StackMapSection)
+ StackMapParser(ArrayRef<uint8_t> StackMapSection)
: StackMapSection(StackMapSection) {
ConstantsListOffset = FunctionListOffset + getNumFunctions() * FunctionSize;
- assert(StackMapSection[0] == 2 &&
- "StackMapV2Parser can only parse version 2 stackmaps");
+ assert(StackMapSection[0] == 3 &&
+ "StackMapParser can only parse version 3 stackmaps");
unsigned CurrentRecordOffset =
ConstantsListOffset + getNumConstants() * ConstantSize;
@@ -314,8 +322,8 @@ public:
using constant_iterator = AccessorIterator<ConstantAccessor>;
using record_iterator = AccessorIterator<RecordAccessor>;
- /// Get the version number of this stackmap. (Always returns 2).
- unsigned getVersion() const { return 2; }
+ /// Get the version number of this stackmap. (Always returns 3).
+ unsigned getVersion() const { return 3; }
/// Get the number of functions in the stack map.
uint32_t getNumFunctions() const {
diff --git a/include/llvm/Object/SymbolSize.h b/include/llvm/Object/SymbolSize.h
index 1a1dc8752943..085623e35907 100644
--- a/include/llvm/Object/SymbolSize.h
+++ b/include/llvm/Object/SymbolSize.h
@@ -1,9 +1,8 @@
//===- SymbolSize.h ---------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/Object/SymbolicFile.h b/include/llvm/Object/SymbolicFile.h
index 5b9549bc3449..1398fa134c81 100644
--- a/include/llvm/Object/SymbolicFile.h
+++ b/include/llvm/Object/SymbolicFile.h
@@ -1,9 +1,8 @@
//===- SymbolicFile.h - Interface that only provides symbols ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -127,7 +126,7 @@ public:
void moveNext();
- std::error_code printName(raw_ostream &OS) const;
+ Error printName(raw_ostream &OS) const;
/// Get symbol flags (bitwise OR of SymbolRef::Flags)
uint32_t getFlags() const;
@@ -146,8 +145,7 @@ public:
// virtual interface.
virtual void moveSymbolNext(DataRefImpl &Symb) const = 0;
- virtual std::error_code printSymbolName(raw_ostream &OS,
- DataRefImpl Symb) const = 0;
+ virtual Error printSymbolName(raw_ostream &OS, DataRefImpl Symb) const = 0;
virtual uint32_t getSymbolFlags(DataRefImpl Symb) const = 0;
@@ -194,7 +192,7 @@ inline void BasicSymbolRef::moveNext() {
return OwningObject->moveSymbolNext(SymbolPimpl);
}
-inline std::error_code BasicSymbolRef::printName(raw_ostream &OS) const {
+inline Error BasicSymbolRef::printName(raw_ostream &OS) const {
return OwningObject->printSymbolName(OS, SymbolPimpl);
}
diff --git a/include/llvm/Object/Wasm.h b/include/llvm/Object/Wasm.h
index ed857652a048..e130ea32ed21 100644
--- a/include/llvm/Object/Wasm.h
+++ b/include/llvm/Object/Wasm.h
@@ -1,9 +1,8 @@
-//===- WasmObjectFile.h - Wasm object file implementation -------*- C++ -*-===//
+//===- Wasm.h - Wasm object file implementation -----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -130,6 +129,10 @@ public:
static bool classof(const Binary *v) { return v->isWasm(); }
const wasm::WasmDylinkInfo &dylinkInfo() const { return DylinkInfo; }
+ const wasm::WasmProducerInfo &getProducerInfo() const { return ProducerInfo; }
+ ArrayRef<wasm::WasmFeatureEntry> getTargetFeatures() const {
+ return TargetFeatures;
+ }
ArrayRef<wasm::WasmSignature> types() const { return Signatures; }
ArrayRef<uint32_t> functionTypes() const { return FunctionTypes; }
ArrayRef<wasm::WasmImport> imports() const { return Imports; }
@@ -149,7 +152,6 @@ public:
uint32_t getNumImportedGlobals() const { return NumImportedGlobals; }
uint32_t getNumImportedFunctions() const { return NumImportedFunctions; }
uint32_t getNumImportedEvents() const { return NumImportedEvents; }
-
void moveSymbolNext(DataRefImpl &Symb) const override;
uint32_t getSymbolFlags(DataRefImpl Symb) const override;
@@ -169,13 +171,12 @@ public:
// Overrides from SectionRef.
void moveSectionNext(DataRefImpl &Sec) const override;
- std::error_code getSectionName(DataRefImpl Sec,
- StringRef &Res) const override;
+ Expected<StringRef> getSectionName(DataRefImpl Sec) const override;
uint64_t getSectionAddress(DataRefImpl Sec) const override;
uint64_t getSectionIndex(DataRefImpl Sec) const override;
uint64_t getSectionSize(DataRefImpl Sec) const override;
- std::error_code getSectionContents(DataRefImpl Sec,
- StringRef &Res) const override;
+ Expected<ArrayRef<uint8_t>>
+ getSectionContents(DataRefImpl Sec) const override;
uint64_t getSectionAlignment(DataRefImpl Sec) const override;
bool isSectionCompressed(DataRefImpl Sec) const override;
bool isSectionText(DataRefImpl Sec) const override;
@@ -222,13 +223,13 @@ private:
bool isValidDataSymbol(uint32_t Index) const;
bool isValidSectionSymbol(uint32_t Index) const;
wasm::WasmFunction &getDefinedFunction(uint32_t Index);
+ const wasm::WasmFunction &getDefinedFunction(uint32_t Index) const;
wasm::WasmGlobal &getDefinedGlobal(uint32_t Index);
wasm::WasmEvent &getDefinedEvent(uint32_t Index);
const WasmSection &getWasmSection(DataRefImpl Ref) const;
const wasm::WasmRelocation &getWasmRelocation(DataRefImpl Ref) const;
- const uint8_t *getPtr(size_t Offset) const;
Error parseSection(WasmSection &Sec);
Error parseCustomSection(WasmSection &Sec, ReadContext &Ctx);
@@ -245,6 +246,7 @@ private:
Error parseElemSection(ReadContext &Ctx);
Error parseCodeSection(ReadContext &Ctx);
Error parseDataSection(ReadContext &Ctx);
+ Error parseDataCountSection(ReadContext &Ctx);
// Custom section types
Error parseDylinkSection(ReadContext &Ctx);
@@ -252,11 +254,15 @@ private:
Error parseLinkingSection(ReadContext &Ctx);
Error parseLinkingSectionSymtab(ReadContext &Ctx);
Error parseLinkingSectionComdat(ReadContext &Ctx);
+ Error parseProducersSection(ReadContext &Ctx);
+ Error parseTargetFeaturesSection(ReadContext &Ctx);
Error parseRelocSection(StringRef Name, ReadContext &Ctx);
wasm::WasmObjectHeader Header;
std::vector<WasmSection> Sections;
wasm::WasmDylinkInfo DylinkInfo;
+ wasm::WasmProducerInfo ProducerInfo;
+ std::vector<wasm::WasmFeatureEntry> TargetFeatures;
std::vector<wasm::WasmSignature> Signatures;
std::vector<uint32_t> FunctionTypes;
std::vector<wasm::WasmTable> Tables;
@@ -267,6 +273,7 @@ private:
std::vector<wasm::WasmExport> Exports;
std::vector<wasm::WasmElemSegment> ElemSegments;
std::vector<WasmSegment> DataSegments;
+ llvm::Optional<size_t> DataCount;
std::vector<wasm::WasmFunction> Functions;
std::vector<WasmSymbol> Symbols;
std::vector<wasm::WasmFunctionName> DebugNames;
@@ -287,40 +294,51 @@ class WasmSectionOrderChecker {
public:
// We define orders for all core wasm sections and known custom sections.
enum : int {
+ // Sentinel, must be zero
+ WASM_SEC_ORDER_NONE = 0,
+
// Core sections
- // The order of standard sections is precisely given by the spec.
- WASM_SEC_ORDER_TYPE = 1,
- WASM_SEC_ORDER_IMPORT = 2,
- WASM_SEC_ORDER_FUNCTION = 3,
- WASM_SEC_ORDER_TABLE = 4,
- WASM_SEC_ORDER_MEMORY = 5,
- WASM_SEC_ORDER_GLOBAL = 6,
- WASM_SEC_ORDER_EVENT = 7,
- WASM_SEC_ORDER_EXPORT = 8,
- WASM_SEC_ORDER_START = 9,
- WASM_SEC_ORDER_ELEM = 10,
- WASM_SEC_ORDER_DATACOUNT = 11,
- WASM_SEC_ORDER_CODE = 12,
- WASM_SEC_ORDER_DATA = 13,
+ WASM_SEC_ORDER_TYPE,
+ WASM_SEC_ORDER_IMPORT,
+ WASM_SEC_ORDER_FUNCTION,
+ WASM_SEC_ORDER_TABLE,
+ WASM_SEC_ORDER_MEMORY,
+ WASM_SEC_ORDER_GLOBAL,
+ WASM_SEC_ORDER_EVENT,
+ WASM_SEC_ORDER_EXPORT,
+ WASM_SEC_ORDER_START,
+ WASM_SEC_ORDER_ELEM,
+ WASM_SEC_ORDER_DATACOUNT,
+ WASM_SEC_ORDER_CODE,
+ WASM_SEC_ORDER_DATA,
// Custom sections
// "dylink" should be the very first section in the module
- WASM_SEC_ORDER_DYLINK = 0,
+ WASM_SEC_ORDER_DYLINK,
// "linking" section requires DATA section in order to validate data symbols
- WASM_SEC_ORDER_LINKING = 100,
+ WASM_SEC_ORDER_LINKING,
// Must come after "linking" section in order to validate reloc indexes.
- WASM_SEC_ORDER_RELOC = 101,
+ WASM_SEC_ORDER_RELOC,
// "name" section must appear after DATA. Comes after "linking" to allow
// symbol table to set default function name.
- WASM_SEC_ORDER_NAME = 102,
+ WASM_SEC_ORDER_NAME,
// "producers" section must appear after "name" section.
- WASM_SEC_ORDER_PRODUCERS = 103
+ WASM_SEC_ORDER_PRODUCERS,
+ // "target_features" section must appear after producers section
+ WASM_SEC_ORDER_TARGET_FEATURES,
+
+ // Must be last
+ WASM_NUM_SEC_ORDERS
+
};
+ // Sections that may or may not be present, but cannot be predecessors
+ static int DisallowedPredecessors[WASM_NUM_SEC_ORDERS][WASM_NUM_SEC_ORDERS];
+
bool isValidSectionOrder(unsigned ID, StringRef CustomSectionName = "");
private:
- int LastOrder = -1; // Lastly seen known section's order
+ bool Seen[WASM_NUM_SEC_ORDERS] = {}; // Sections that have been seen already
// Returns -1 for unknown sections.
int getSectionOrder(unsigned ID, StringRef CustomSectionName = "");
diff --git a/include/llvm/Object/WasmTraits.h b/include/llvm/Object/WasmTraits.h
index 049d72f79e41..3eee8e71b187 100644
--- a/include/llvm/Object/WasmTraits.h
+++ b/include/llvm/Object/WasmTraits.h
@@ -1,9 +1,8 @@
//===- WasmTraits.h - DenseMap traits for the Wasm structures ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Object/WindowsMachineFlag.h b/include/llvm/Object/WindowsMachineFlag.h
new file mode 100644
index 000000000000..acc6afc0329c
--- /dev/null
+++ b/include/llvm/Object/WindowsMachineFlag.h
@@ -0,0 +1,33 @@
+//===- WindowsMachineFlag.h -------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Functions for implementing the /machine: flag.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLDRIVERS_MACHINEFLAG_MACHINEFLAG_H
+#define LLVM_TOOLDRIVERS_MACHINEFLAG_MACHINEFLAG_H
+
+namespace llvm {
+
+class StringRef;
+namespace COFF {
+enum MachineTypes : unsigned;
+}
+
+// Returns a user-readable string for ARMNT, ARM64, AMD64, I386.
+// Other MachineTypes values must not be passed in.
+StringRef machineToStr(COFF::MachineTypes MT);
+
+// Maps /machine: arguments to a MachineTypes value.
+// Only returns ARMNT, ARM64, AMD64, I386, or IMAGE_FILE_MACHINE_UNKNOWN.
+COFF::MachineTypes getMachineType(StringRef S);
+
+}
+
+#endif
diff --git a/include/llvm/Object/WindowsResource.h b/include/llvm/Object/WindowsResource.h
index a077c82871bf..356dcb03abba 100644
--- a/include/llvm/Object/WindowsResource.h
+++ b/include/llvm/Object/WindowsResource.h
@@ -1,9 +1,8 @@
//===-- WindowsResource.h ---------------------------------------*- C++-*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===---------------------------------------------------------------------===//
//
@@ -38,11 +37,14 @@
#include "llvm/Support/ConvertUTF.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
-#include "llvm/Support/ScopedPrinter.h"
#include <map>
namespace llvm {
+
+class raw_ostream;
+class ScopedPrinter;
+
namespace object {
class WindowsResource;
@@ -118,6 +120,7 @@ private:
const WindowsResource *Owner);
BinaryStreamReader Reader;
+ const WindowsResource *Owner;
bool IsStringType;
ArrayRef<UTF16> Type;
uint16_t TypeID;
@@ -149,7 +152,7 @@ class WindowsResourceParser {
public:
class TreeNode;
WindowsResourceParser();
- Error parse(WindowsResource *WR);
+ Error parse(WindowsResource *WR, std::vector<std::string> &Duplicates);
void printTree(raw_ostream &OS) const;
const TreeNode &getTree() const { return Root; }
const ArrayRef<std::vector<uint8_t>> getData() const { return Data; }
@@ -185,21 +188,25 @@ public:
static std::unique_ptr<TreeNode> createIDNode();
static std::unique_ptr<TreeNode> createDataNode(uint16_t MajorVersion,
uint16_t MinorVersion,
- uint32_t Characteristics);
+ uint32_t Characteristics,
+ uint32_t Origin);
explicit TreeNode(bool IsStringNode);
TreeNode(uint16_t MajorVersion, uint16_t MinorVersion,
- uint32_t Characteristics);
+ uint32_t Characteristics, uint32_t Origin);
- void addEntry(const ResourceEntryRef &Entry, bool &IsNewTypeString,
- bool &IsNewNameString);
+ bool addEntry(const ResourceEntryRef &Entry, uint32_t Origin,
+ bool &IsNewTypeString, bool &IsNewNameString,
+ TreeNode *&Result);
TreeNode &addTypeNode(const ResourceEntryRef &Entry, bool &IsNewTypeString);
TreeNode &addNameNode(const ResourceEntryRef &Entry, bool &IsNewNameString);
- TreeNode &addLanguageNode(const ResourceEntryRef &Entry);
- TreeNode &addChild(uint32_t ID, bool IsDataNode = false,
- uint16_t MajorVersion = 0, uint16_t MinorVersion = 0,
- uint32_t Characteristics = 0);
- TreeNode &addChild(ArrayRef<UTF16> NameRef, bool &IsNewString);
+ bool addLanguageNode(const ResourceEntryRef &Entry, uint32_t Origin,
+ TreeNode *&Result);
+ bool addDataChild(uint32_t ID, uint16_t MajorVersion, uint16_t MinorVersion,
+ uint32_t Characteristics, uint32_t Origin,
+ TreeNode *&Result);
+ TreeNode &addIDChild(uint32_t ID);
+ TreeNode &addNameChild(ArrayRef<UTF16> NameRef, bool &IsNewString);
bool IsDataNode = false;
uint32_t StringIndex;
@@ -209,18 +216,26 @@ public:
uint16_t MajorVersion = 0;
uint16_t MinorVersion = 0;
uint32_t Characteristics = 0;
+
+ // The .res file that defined this TreeNode, for diagnostics.
+ // Index into InputFilenames.
+ uint32_t Origin;
};
private:
TreeNode Root;
std::vector<std::vector<uint8_t>> Data;
std::vector<std::vector<UTF16>> StringTable;
+
+ std::vector<std::string> InputFilenames;
};
Expected<std::unique_ptr<MemoryBuffer>>
writeWindowsResourceCOFF(llvm::COFF::MachineTypes MachineType,
- const WindowsResourceParser &Parser);
+ const WindowsResourceParser &Parser,
+ uint32_t TimeDateStamp);
+void printResourceTypeName(uint16_t TypeID, raw_ostream &OS);
} // namespace object
} // namespace llvm
diff --git a/include/llvm/Object/XCOFFObjectFile.h b/include/llvm/Object/XCOFFObjectFile.h
new file mode 100644
index 000000000000..cdee7129a2ab
--- /dev/null
+++ b/include/llvm/Object/XCOFFObjectFile.h
@@ -0,0 +1,268 @@
+//===- XCOFFObjectFile.h - XCOFF object file implementation -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the XCOFFObjectFile class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_OBJECT_XCOFFOBJECTFILE_H
+#define LLVM_OBJECT_XCOFFOBJECTFILE_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/BinaryFormat/Magic.h"
+#include "llvm/BinaryFormat/XCOFF.h"
+#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/Object/Binary.h"
+#include "llvm/Object/Error.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Object/SymbolicFile.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include <cassert>
+#include <cstdint>
+#include <memory>
+#include <system_error>
+
+namespace llvm {
+namespace object {
+
+struct XCOFFFileHeader32 {
+ support::ubig16_t Magic;
+ support::ubig16_t NumberOfSections;
+
+ // Unix time value, value of 0 indicates no timestamp.
+ // Negative values are reserved.
+ support::big32_t TimeStamp;
+
+ support::ubig32_t SymbolTableOffset; // File offset to symbol table.
+ support::big32_t NumberOfSymTableEntries;
+ support::ubig16_t AuxHeaderSize;
+ support::ubig16_t Flags;
+};
+
+struct XCOFFFileHeader64 {
+ support::ubig16_t Magic;
+ support::ubig16_t NumberOfSections;
+
+ // Unix time value, value of 0 indicates no timestamp.
+ // Negative values are reserved.
+ support::big32_t TimeStamp;
+
+ support::ubig64_t SymbolTableOffset; // File offset to symbol table.
+ support::ubig16_t AuxHeaderSize;
+ support::ubig16_t Flags;
+ support::ubig32_t NumberOfSymTableEntries;
+};
+
+struct XCOFFSectionHeader32 {
+ char Name[XCOFF::SectionNameSize];
+ support::ubig32_t PhysicalAddress;
+ support::ubig32_t VirtualAddress;
+ support::ubig32_t SectionSize;
+ support::ubig32_t FileOffsetToRawData;
+ support::ubig32_t FileOffsetToRelocationInfo;
+ support::ubig32_t FileOffsetToLineNumberInfo;
+ support::ubig16_t NumberOfRelocations;
+ support::ubig16_t NumberOfLineNumbers;
+ support::big32_t Flags;
+
+ StringRef getName() const;
+};
+
+struct XCOFFSectionHeader64 {
+ char Name[XCOFF::SectionNameSize];
+ support::ubig64_t PhysicalAddress;
+ support::ubig64_t VirtualAddress;
+ support::ubig64_t SectionSize;
+ support::big64_t FileOffsetToRawData;
+ support::big64_t FileOffsetToRelocationInfo;
+ support::big64_t FileOffsetToLineNumberInfo;
+ support::ubig32_t NumberOfRelocations;
+ support::ubig32_t NumberOfLineNumbers;
+ support::big32_t Flags;
+ char Padding[4];
+
+ StringRef getName() const;
+};
+
+struct XCOFFSymbolEntry {
+ enum { NAME_IN_STR_TBL_MAGIC = 0x0 };
+ typedef struct {
+ support::big32_t Magic; // Zero indicates name in string table.
+ support::ubig32_t Offset;
+ } NameInStrTblType;
+
+ typedef struct {
+ uint8_t LanguageId;
+ uint8_t CpuTypeId;
+ } CFileLanguageIdAndTypeIdType;
+
+ union {
+ char SymbolName[XCOFF::SymbolNameSize];
+ NameInStrTblType NameInStrTbl;
+ };
+
+ support::ubig32_t Value; // Symbol value; storage class-dependent.
+ support::big16_t SectionNumber;
+
+ union {
+ support::ubig16_t SymbolType;
+ CFileLanguageIdAndTypeIdType CFileLanguageIdAndTypeId;
+ };
+
+ XCOFF::StorageClass StorageClass;
+ uint8_t NumberOfAuxEntries;
+};
+
+struct XCOFFStringTable {
+ uint32_t Size;
+ const char *Data;
+};
+
+class XCOFFObjectFile : public ObjectFile {
+private:
+ const void *FileHeader = nullptr;
+ const void *SectionHeaderTable = nullptr;
+
+ const XCOFFSymbolEntry *SymbolTblPtr = nullptr;
+ XCOFFStringTable StringTable = {0, nullptr};
+
+ const XCOFFFileHeader32 *fileHeader32() const;
+ const XCOFFFileHeader64 *fileHeader64() const;
+
+ const XCOFFSectionHeader32 *sectionHeaderTable32() const;
+ const XCOFFSectionHeader64 *sectionHeaderTable64() const;
+
+ size_t getFileHeaderSize() const;
+ size_t getSectionHeaderSize() const;
+
+ const XCOFFSectionHeader32 *toSection32(DataRefImpl Ref) const;
+ const XCOFFSectionHeader64 *toSection64(DataRefImpl Ref) const;
+ void checkSectionAddress(uintptr_t Addr, uintptr_t TableAddr) const;
+ uintptr_t getSectionHeaderTableAddress() const;
+
+ // This returns a pointer to the start of the storage for the name field of
+ // the 32-bit or 64-bit SectionHeader struct. This string is *not* necessarily
+ // null-terminated.
+ const char *getSectionNameInternal(DataRefImpl Sec) const;
+
+ int32_t getSectionFlags(DataRefImpl Sec) const;
+
+ static bool isReservedSectionNumber(int16_t SectionNumber);
+ Expected<DataRefImpl> getSectionByNum(int16_t Num) const;
+
+ // Constructor and "create" factory function. The constructor is only a thin
+ // wrapper around the base constructor. The "create" function fills out the
+ // XCOFF-specific information and performs the error checking along the way.
+ XCOFFObjectFile(unsigned Type, MemoryBufferRef Object);
+ static Expected<std::unique_ptr<XCOFFObjectFile>> create(unsigned Type,
+ MemoryBufferRef MBR);
+
+ // Helper for parsing the StringTable. Returns an 'Error' if parsing failed
+ // and an XCOFFStringTable if parsing succeeded.
+ static Expected<XCOFFStringTable> parseStringTable(const XCOFFObjectFile *Obj,
+ uint64_t Offset);
+
+ // Make a friend so it can call the private 'create' function.
+ friend Expected<std::unique_ptr<ObjectFile>>
+ ObjectFile::createXCOFFObjectFile(MemoryBufferRef Object, unsigned FileType);
+
+public:
+ // Interface inherited from base classes.
+ void moveSymbolNext(DataRefImpl &Symb) const override;
+ uint32_t getSymbolFlags(DataRefImpl Symb) const override;
+ basic_symbol_iterator symbol_begin() const override;
+ basic_symbol_iterator symbol_end() const override;
+
+ Expected<StringRef> getSymbolName(DataRefImpl Symb) const override;
+ Expected<uint64_t> getSymbolAddress(DataRefImpl Symb) const override;
+ uint64_t getSymbolValueImpl(DataRefImpl Symb) const override;
+ uint64_t getCommonSymbolSizeImpl(DataRefImpl Symb) const override;
+ Expected<SymbolRef::Type> getSymbolType(DataRefImpl Symb) const override;
+ Expected<section_iterator> getSymbolSection(DataRefImpl Symb) const override;
+
+ void moveSectionNext(DataRefImpl &Sec) const override;
+ Expected<StringRef> getSectionName(DataRefImpl Sec) const override;
+ uint64_t getSectionAddress(DataRefImpl Sec) const override;
+ uint64_t getSectionIndex(DataRefImpl Sec) const override;
+ uint64_t getSectionSize(DataRefImpl Sec) const override;
+ Expected<ArrayRef<uint8_t>>
+ getSectionContents(DataRefImpl Sec) const override;
+ uint64_t getSectionAlignment(DataRefImpl Sec) const override;
+ bool isSectionCompressed(DataRefImpl Sec) const override;
+ bool isSectionText(DataRefImpl Sec) const override;
+ bool isSectionData(DataRefImpl Sec) const override;
+ bool isSectionBSS(DataRefImpl Sec) const override;
+
+ bool isSectionVirtual(DataRefImpl Sec) const override;
+ relocation_iterator section_rel_begin(DataRefImpl Sec) const override;
+ relocation_iterator section_rel_end(DataRefImpl Sec) const override;
+
+ void moveRelocationNext(DataRefImpl &Rel) const override;
+ uint64_t getRelocationOffset(DataRefImpl Rel) const override;
+ symbol_iterator getRelocationSymbol(DataRefImpl Rel) const override;
+ uint64_t getRelocationType(DataRefImpl Rel) const override;
+ void getRelocationTypeName(DataRefImpl Rel,
+ SmallVectorImpl<char> &Result) const override;
+
+ section_iterator section_begin() const override;
+ section_iterator section_end() const override;
+ uint8_t getBytesInAddress() const override;
+ StringRef getFileFormatName() const override;
+ Triple::ArchType getArch() const override;
+ SubtargetFeatures getFeatures() const override;
+ Expected<uint64_t> getStartAddress() const override;
+ bool isRelocatableObject() const override;
+
+ // Below here is the non-inherited interface.
+ bool is64Bit() const;
+
+ const XCOFFSymbolEntry *getPointerToSymbolTable() const {
+ assert(!is64Bit() && "Symbol table handling not supported yet.");
+ return SymbolTblPtr;
+ }
+
+ Expected<StringRef>
+ getSymbolSectionName(const XCOFFSymbolEntry *SymEntPtr) const;
+
+ const XCOFFSymbolEntry *toSymbolEntry(DataRefImpl Ref) const;
+
+ // File header related interfaces.
+ uint16_t getMagic() const;
+ uint16_t getNumberOfSections() const;
+ int32_t getTimeStamp() const;
+
+ // Symbol table offset and entry count are handled differently between
+ // XCOFF32 and XCOFF64.
+ uint32_t getSymbolTableOffset32() const;
+ uint64_t getSymbolTableOffset64() const;
+
+ // Note that this value is signed and might return a negative value. Negative
+ // values are reserved for future use.
+ int32_t getRawNumberOfSymbolTableEntries32() const;
+
+ // The sanitized value appropriate to use as an index into the symbol table.
+ uint32_t getLogicalNumberOfSymbolTableEntries32() const;
+
+ uint32_t getNumberOfSymbolTableEntries64() const;
+
+ uint16_t getOptionalHeaderSize() const;
+ uint16_t getFlags() const;
+
+ // Section header table related interfaces.
+ ArrayRef<XCOFFSectionHeader32> sections32() const;
+ ArrayRef<XCOFFSectionHeader64> sections64() const;
+}; // XCOFFObjectFile
+
+} // namespace object
+} // namespace llvm
+
+#endif // LLVM_OBJECT_XCOFFOBJECTFILE_H
diff --git a/include/llvm/ObjectYAML/COFFYAML.h b/include/llvm/ObjectYAML/COFFYAML.h
index 253c627dd683..eec5af928f6d 100644
--- a/include/llvm/ObjectYAML/COFFYAML.h
+++ b/include/llvm/ObjectYAML/COFFYAML.h
@@ -1,9 +1,8 @@
//===- COFFYAML.h - COFF YAMLIO implementation ------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/ObjectYAML/CodeViewYAMLDebugSections.h b/include/llvm/ObjectYAML/CodeViewYAMLDebugSections.h
index d620008e22d2..9cbacb88b518 100644
--- a/include/llvm/ObjectYAML/CodeViewYAMLDebugSections.h
+++ b/include/llvm/ObjectYAML/CodeViewYAMLDebugSections.h
@@ -1,9 +1,8 @@
//=- CodeViewYAMLDebugSections.h - CodeView YAMLIO debug sections -*- C++ -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/ObjectYAML/CodeViewYAMLSymbols.h b/include/llvm/ObjectYAML/CodeViewYAMLSymbols.h
index 791193c78f19..7c05c9eea05e 100644
--- a/include/llvm/ObjectYAML/CodeViewYAMLSymbols.h
+++ b/include/llvm/ObjectYAML/CodeViewYAMLSymbols.h
@@ -1,9 +1,8 @@
//===- CodeViewYAMLSymbols.h - CodeView YAMLIO Symbol implementation ------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/ObjectYAML/CodeViewYAMLTypeHashing.h b/include/llvm/ObjectYAML/CodeViewYAMLTypeHashing.h
index 344966fe6891..d6cec8d310eb 100644
--- a/include/llvm/ObjectYAML/CodeViewYAMLTypeHashing.h
+++ b/include/llvm/ObjectYAML/CodeViewYAMLTypeHashing.h
@@ -1,9 +1,8 @@
//==- CodeViewYAMLTypeHashing.h - CodeView YAMLIO Type hashing ----*- C++-*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/ObjectYAML/CodeViewYAMLTypes.h b/include/llvm/ObjectYAML/CodeViewYAMLTypes.h
index 1b1306df4f53..04b5e0ba3aa1 100644
--- a/include/llvm/ObjectYAML/CodeViewYAMLTypes.h
+++ b/include/llvm/ObjectYAML/CodeViewYAMLTypes.h
@@ -1,9 +1,8 @@
//==- CodeViewYAMLTypes.h - CodeView YAMLIO Type implementation --*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/ObjectYAML/DWARFEmitter.h b/include/llvm/ObjectYAML/DWARFEmitter.h
index ce3227421930..2ccc876d5023 100644
--- a/include/llvm/ObjectYAML/DWARFEmitter.h
+++ b/include/llvm/ObjectYAML/DWARFEmitter.h
@@ -1,9 +1,8 @@
//===--- DWARFEmitter.h - ---------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/include/llvm/ObjectYAML/DWARFYAML.h b/include/llvm/ObjectYAML/DWARFYAML.h
index 705c88778945..78d736c3ef05 100644
--- a/include/llvm/ObjectYAML/DWARFYAML.h
+++ b/include/llvm/ObjectYAML/DWARFYAML.h
@@ -1,9 +1,8 @@
//===- DWARFYAML.h - DWARF YAMLIO implementation ----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/include/llvm/ObjectYAML/ELFYAML.h b/include/llvm/ObjectYAML/ELFYAML.h
index f2b0c35521f0..f4212516f486 100644
--- a/include/llvm/ObjectYAML/ELFYAML.h
+++ b/include/llvm/ObjectYAML/ELFYAML.h
@@ -1,9 +1,8 @@
//===- ELFYAML.h - ELF YAMLIO implementation --------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -44,6 +43,8 @@ LLVM_YAML_STRONG_TYPEDEF(uint8_t, ELF_ELFDATA)
LLVM_YAML_STRONG_TYPEDEF(uint8_t, ELF_ELFOSABI)
// Just use 64, since it can hold 32-bit values too.
LLVM_YAML_STRONG_TYPEDEF(uint64_t, ELF_EF)
+// Just use 64, since it can hold 32-bit values too.
+LLVM_YAML_STRONG_TYPEDEF(uint64_t, ELF_DYNTAG)
LLVM_YAML_STRONG_TYPEDEF(uint32_t, ELF_PF)
LLVM_YAML_STRONG_TYPEDEF(uint32_t, ELF_SHT)
LLVM_YAML_STRONG_TYPEDEF(uint32_t, ELF_REL)
@@ -51,6 +52,7 @@ LLVM_YAML_STRONG_TYPEDEF(uint8_t, ELF_RSS)
// Just use 64, since it can hold 32-bit values too.
LLVM_YAML_STRONG_TYPEDEF(uint64_t, ELF_SHF)
LLVM_YAML_STRONG_TYPEDEF(uint16_t, ELF_SHN)
+LLVM_YAML_STRONG_TYPEDEF(uint8_t, ELF_STB)
LLVM_YAML_STRONG_TYPEDEF(uint8_t, ELF_STT)
LLVM_YAML_STRONG_TYPEDEF(uint8_t, ELF_STV)
LLVM_YAML_STRONG_TYPEDEF(uint8_t, ELF_STO)
@@ -73,6 +75,11 @@ struct FileHeader {
ELF_EM Machine;
ELF_EF Flags;
llvm::yaml::Hex64 Entry;
+
+ Optional<llvm::yaml::Hex16> SHEntSize;
+ Optional<llvm::yaml::Hex16> SHOffset;
+ Optional<llvm::yaml::Hex16> SHNum;
+ Optional<llvm::yaml::Hex16> SHStrNdx;
};
struct SectionName {
@@ -85,53 +92,81 @@ struct ProgramHeader {
llvm::yaml::Hex64 VAddr;
llvm::yaml::Hex64 PAddr;
Optional<llvm::yaml::Hex64> Align;
+ Optional<llvm::yaml::Hex64> FileSize;
+ Optional<llvm::yaml::Hex64> MemSize;
+ Optional<llvm::yaml::Hex64> Offset;
std::vector<SectionName> Sections;
};
struct Symbol {
StringRef Name;
+ Optional<uint32_t> NameIndex;
ELF_STT Type;
StringRef Section;
Optional<ELF_SHN> Index;
+ ELF_STB Binding;
llvm::yaml::Hex64 Value;
llvm::yaml::Hex64 Size;
uint8_t Other;
};
-struct LocalGlobalWeakSymbols {
- std::vector<Symbol> Local;
- std::vector<Symbol> Global;
- std::vector<Symbol> Weak;
-};
-
struct SectionOrType {
StringRef sectionNameOrType;
};
+struct DynamicEntry {
+ ELF_DYNTAG Tag;
+ llvm::yaml::Hex64 Val;
+};
+
struct Section {
enum class SectionKind {
+ Dynamic,
Group,
RawContent,
Relocation,
NoBits,
+ Verdef,
+ Verneed,
+ Symver,
MipsABIFlags
};
SectionKind Kind;
StringRef Name;
ELF_SHT Type;
- ELF_SHF Flags;
+ Optional<ELF_SHF> Flags;
llvm::yaml::Hex64 Address;
StringRef Link;
- StringRef Info;
llvm::yaml::Hex64 AddressAlign;
Optional<llvm::yaml::Hex64> EntSize;
+ // This can be used to override the sh_offset field. It does not place the
+ // section data at the offset specified. Useful for creating invalid objects.
+ Optional<llvm::yaml::Hex64> ShOffset;
+
+ // This can be used to override the sh_size field. It does not affect the
+ // content written.
+ Optional<llvm::yaml::Hex64> ShSize;
+
Section(SectionKind Kind) : Kind(Kind) {}
virtual ~Section();
};
+
+struct DynamicSection : Section {
+ std::vector<DynamicEntry> Entries;
+ Optional<yaml::BinaryRef> Content;
+
+ DynamicSection() : Section(SectionKind::Dynamic) {}
+
+ static bool classof(const Section *S) {
+ return S->Kind == SectionKind::Dynamic;
+ }
+};
+
struct RawContentSection : Section {
- yaml::BinaryRef Content;
- llvm::yaml::Hex64 Size;
+ Optional<yaml::BinaryRef> Content;
+ Optional<llvm::yaml::Hex64> Size;
+ Optional<llvm::yaml::Hex64> Info;
RawContentSection() : Section(SectionKind::RawContent) {}
@@ -150,10 +185,64 @@ struct NoBitsSection : Section {
}
};
+struct VernauxEntry {
+ uint32_t Hash;
+ uint16_t Flags;
+ uint16_t Other;
+ StringRef Name;
+};
+
+struct VerneedEntry {
+ uint16_t Version;
+ StringRef File;
+ std::vector<VernauxEntry> AuxV;
+};
+
+struct VerneedSection : Section {
+ std::vector<VerneedEntry> VerneedV;
+ llvm::yaml::Hex64 Info;
+
+ VerneedSection() : Section(SectionKind::Verneed) {}
+
+ static bool classof(const Section *S) {
+ return S->Kind == SectionKind::Verneed;
+ }
+};
+
+struct SymverSection : Section {
+ std::vector<uint16_t> Entries;
+
+ SymverSection() : Section(SectionKind::Symver) {}
+
+ static bool classof(const Section *S) {
+ return S->Kind == SectionKind::Symver;
+ }
+};
+
+struct VerdefEntry {
+ uint16_t Version;
+ uint16_t Flags;
+ uint16_t VersionNdx;
+ uint32_t Hash;
+ std::vector<StringRef> VerNames;
+};
+
+struct VerdefSection : Section {
+ std::vector<VerdefEntry> Entries;
+ llvm::yaml::Hex64 Info;
+
+ VerdefSection() : Section(SectionKind::Verdef) {}
+
+ static bool classof(const Section *S) {
+ return S->Kind == SectionKind::Verdef;
+ }
+};
+
struct Group : Section {
// Members of a group contain a flag and a list of section indices
// that are part of the group.
std::vector<SectionOrType> Members;
+ StringRef Signature; /* Info */
Group() : Section(SectionKind::Group) {}
@@ -171,6 +260,7 @@ struct Relocation {
struct RelocationSection : Section {
std::vector<Relocation> Relocations;
+ StringRef RelocatableSec; /* Info */
RelocationSection() : Section(SectionKind::Relocation) {}
@@ -208,16 +298,20 @@ struct Object {
// cleaner and nicer if we read them from the YAML as a separate
// top-level key, which automatically ensures that invariants like there
// being a single SHT_SYMTAB section are upheld.
- LocalGlobalWeakSymbols Symbols;
- LocalGlobalWeakSymbols DynamicSymbols;
+ std::vector<Symbol> Symbols;
+ std::vector<Symbol> DynamicSymbols;
};
} // end namespace ELFYAML
} // end namespace llvm
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::DynamicEntry)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::ProgramHeader)
LLVM_YAML_IS_SEQUENCE_VECTOR(std::unique_ptr<llvm::ELFYAML::Section>)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::Symbol)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::VerdefEntry)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::VernauxEntry)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::VerneedEntry)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::Relocation)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::SectionOrType)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::SectionName)
@@ -277,6 +371,10 @@ template <> struct ScalarEnumerationTraits<ELFYAML::ELF_SHN> {
static void enumeration(IO &IO, ELFYAML::ELF_SHN &Value);
};
+template <> struct ScalarEnumerationTraits<ELFYAML::ELF_STB> {
+ static void enumeration(IO &IO, ELFYAML::ELF_STB &Value);
+};
+
template <>
struct ScalarEnumerationTraits<ELFYAML::ELF_STT> {
static void enumeration(IO &IO, ELFYAML::ELF_STT &Value);
@@ -298,6 +396,11 @@ struct ScalarEnumerationTraits<ELFYAML::ELF_REL> {
};
template <>
+struct ScalarEnumerationTraits<ELFYAML::ELF_DYNTAG> {
+ static void enumeration(IO &IO, ELFYAML::ELF_DYNTAG &Value);
+};
+
+template <>
struct ScalarEnumerationTraits<ELFYAML::ELF_RSS> {
static void enumeration(IO &IO, ELFYAML::ELF_RSS &Value);
};
@@ -347,9 +450,20 @@ struct MappingTraits<ELFYAML::Symbol> {
static StringRef validate(IO &IO, ELFYAML::Symbol &Symbol);
};
-template <>
-struct MappingTraits<ELFYAML::LocalGlobalWeakSymbols> {
- static void mapping(IO &IO, ELFYAML::LocalGlobalWeakSymbols &Symbols);
+template <> struct MappingTraits<ELFYAML::DynamicEntry> {
+ static void mapping(IO &IO, ELFYAML::DynamicEntry &Rel);
+};
+
+template <> struct MappingTraits<ELFYAML::VerdefEntry> {
+ static void mapping(IO &IO, ELFYAML::VerdefEntry &E);
+};
+
+template <> struct MappingTraits<ELFYAML::VerneedEntry> {
+ static void mapping(IO &IO, ELFYAML::VerneedEntry &E);
+};
+
+template <> struct MappingTraits<ELFYAML::VernauxEntry> {
+ static void mapping(IO &IO, ELFYAML::VernauxEntry &E);
};
template <> struct MappingTraits<ELFYAML::Relocation> {
diff --git a/include/llvm/ObjectYAML/MachOYAML.h b/include/llvm/ObjectYAML/MachOYAML.h
index cec4f86185f0..d7e1c033f43b 100644
--- a/include/llvm/ObjectYAML/MachOYAML.h
+++ b/include/llvm/ObjectYAML/MachOYAML.h
@@ -1,9 +1,8 @@
//===- MachOYAML.h - Mach-O YAMLIO implementation ---------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/include/llvm/ObjectYAML/MinidumpYAML.h b/include/llvm/ObjectYAML/MinidumpYAML.h
new file mode 100644
index 000000000000..39fdd62e017b
--- /dev/null
+++ b/include/llvm/ObjectYAML/MinidumpYAML.h
@@ -0,0 +1,239 @@
+//===- MinidumpYAML.h - Minidump YAMLIO implementation ----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_OBJECTYAML_MINIDUMPYAML_H
+#define LLVM_OBJECTYAML_MINIDUMPYAML_H
+
+#include "llvm/BinaryFormat/Minidump.h"
+#include "llvm/Object/Minidump.h"
+#include "llvm/ObjectYAML/YAML.h"
+#include "llvm/Support/YAMLTraits.h"
+
+namespace llvm {
+namespace MinidumpYAML {
+
+/// The base class for all minidump streams. The "Type" of the stream
+/// corresponds to the Stream Type field in the minidump file. The "Kind" field
+/// specifies how are we going to treat it. For highly specialized streams (e.g.
+/// SystemInfo), there is a 1:1 mapping between Types and Kinds, but in general
+/// one stream Kind can be used to represent multiple stream Types (e.g. any
+/// unrecognised stream Type will be handled via RawContentStream). The mapping
+/// from Types to Kinds is fixed and given by the static getKind function.
+struct Stream {
+ enum class StreamKind {
+ MemoryList,
+ ModuleList,
+ RawContent,
+ SystemInfo,
+ TextContent,
+ ThreadList,
+ };
+
+ Stream(StreamKind Kind, minidump::StreamType Type) : Kind(Kind), Type(Type) {}
+ virtual ~Stream(); // anchor
+
+ const StreamKind Kind;
+ const minidump::StreamType Type;
+
+ /// Get the stream Kind used for representing streams of a given Type.
+ static StreamKind getKind(minidump::StreamType Type);
+
+ /// Create an empty stream of the given Type.
+ static std::unique_ptr<Stream> create(minidump::StreamType Type);
+
+ /// Create a stream from the given stream directory entry.
+ static Expected<std::unique_ptr<Stream>>
+ create(const minidump::Directory &StreamDesc,
+ const object::MinidumpFile &File);
+};
+
+namespace detail {
+/// A stream representing a list of abstract entries in a minidump stream. Its
+/// instantiations can be used to represent the ModuleList stream and other
+/// streams with a similar structure.
+template <typename EntryT> struct ListStream : public Stream {
+ using entry_type = EntryT;
+
+ std::vector<entry_type> Entries;
+
+ explicit ListStream(std::vector<entry_type> Entries = {})
+ : Stream(EntryT::Kind, EntryT::Type), Entries(std::move(Entries)) {}
+
+ static bool classof(const Stream *S) { return S->Kind == EntryT::Kind; }
+};
+
+/// A structure containing all data belonging to a single minidump module.
+struct ParsedModule {
+ static constexpr Stream::StreamKind Kind = Stream::StreamKind::ModuleList;
+ static constexpr minidump::StreamType Type = minidump::StreamType::ModuleList;
+
+ minidump::Module Entry;
+ std::string Name;
+ yaml::BinaryRef CvRecord;
+ yaml::BinaryRef MiscRecord;
+};
+
+/// A structure containing all data belonging to a single minidump thread.
+struct ParsedThread {
+ static constexpr Stream::StreamKind Kind = Stream::StreamKind::ThreadList;
+ static constexpr minidump::StreamType Type = minidump::StreamType::ThreadList;
+
+ minidump::Thread Entry;
+ yaml::BinaryRef Stack;
+ yaml::BinaryRef Context;
+};
+
+/// A structure containing all data describing a single memory region.
+struct ParsedMemoryDescriptor {
+ static constexpr Stream::StreamKind Kind = Stream::StreamKind::MemoryList;
+ static constexpr minidump::StreamType Type = minidump::StreamType::MemoryList;
+
+ minidump::MemoryDescriptor Entry;
+ yaml::BinaryRef Content;
+};
+} // namespace detail
+
+using ModuleListStream = detail::ListStream<detail::ParsedModule>;
+using ThreadListStream = detail::ListStream<detail::ParsedThread>;
+using MemoryListStream = detail::ListStream<detail::ParsedMemoryDescriptor>;
+
+/// A minidump stream represented as a sequence of hex bytes. This is used as a
+/// fallback when no other stream kind is suitable.
+struct RawContentStream : public Stream {
+ yaml::BinaryRef Content;
+ yaml::Hex32 Size;
+
+ RawContentStream(minidump::StreamType Type, ArrayRef<uint8_t> Content = {})
+ : Stream(StreamKind::RawContent, Type), Content(Content),
+ Size(Content.size()) {}
+
+ static bool classof(const Stream *S) {
+ return S->Kind == StreamKind::RawContent;
+ }
+};
+
+/// SystemInfo minidump stream.
+struct SystemInfoStream : public Stream {
+ minidump::SystemInfo Info;
+ std::string CSDVersion;
+
+ explicit SystemInfoStream(const minidump::SystemInfo &Info,
+ std::string CSDVersion)
+ : Stream(StreamKind::SystemInfo, minidump::StreamType::SystemInfo),
+ Info(Info), CSDVersion(std::move(CSDVersion)) {}
+
+ SystemInfoStream()
+ : Stream(StreamKind::SystemInfo, minidump::StreamType::SystemInfo) {
+ memset(&Info, 0, sizeof(Info));
+ }
+
+ static bool classof(const Stream *S) {
+ return S->Kind == StreamKind::SystemInfo;
+ }
+};
+
+/// A StringRef, which is printed using YAML block notation.
+LLVM_YAML_STRONG_TYPEDEF(StringRef, BlockStringRef)
+
+/// A minidump stream containing textual data (typically, the contents of a
+/// /proc/<pid> file on linux).
+struct TextContentStream : public Stream {
+ BlockStringRef Text;
+
+ TextContentStream(minidump::StreamType Type, StringRef Text = {})
+ : Stream(StreamKind::TextContent, Type), Text(Text) {}
+
+ static bool classof(const Stream *S) {
+ return S->Kind == StreamKind::TextContent;
+ }
+};
+
+/// The top level structure representing a minidump object, consisting of a
+/// minidump header, and zero or more streams. To construct an Object from a
+/// minidump file, use the static create function. To serialize to/from yaml,
+/// use the appropriate streaming operator on a yaml stream.
+struct Object {
+ Object() = default;
+ Object(const Object &) = delete;
+ Object &operator=(const Object &) = delete;
+ Object(Object &&) = default;
+ Object &operator=(Object &&) = default;
+
+ Object(const minidump::Header &Header,
+ std::vector<std::unique_ptr<Stream>> Streams)
+ : Header(Header), Streams(std::move(Streams)) {}
+
+ /// The minidump header.
+ minidump::Header Header;
+
+ /// The list of streams in this minidump object.
+ std::vector<std::unique_ptr<Stream>> Streams;
+
+ static Expected<Object> create(const object::MinidumpFile &File);
+};
+
+/// Serialize the minidump file represented by Obj to OS in binary form.
+void writeAsBinary(Object &Obj, raw_ostream &OS);
+
+/// Serialize the yaml string as a minidump file to OS in binary form.
+Error writeAsBinary(StringRef Yaml, raw_ostream &OS);
+
+} // namespace MinidumpYAML
+
+namespace yaml {
+template <> struct BlockScalarTraits<MinidumpYAML::BlockStringRef> {
+ static void output(const MinidumpYAML::BlockStringRef &Text, void *,
+ raw_ostream &OS) {
+ OS << Text;
+ }
+
+ static StringRef input(StringRef Scalar, void *,
+ MinidumpYAML::BlockStringRef &Text) {
+ Text = Scalar;
+ return "";
+ }
+};
+
+template <> struct MappingTraits<std::unique_ptr<MinidumpYAML::Stream>> {
+ static void mapping(IO &IO, std::unique_ptr<MinidumpYAML::Stream> &S);
+ static StringRef validate(IO &IO, std::unique_ptr<MinidumpYAML::Stream> &S);
+};
+
+template <> struct MappingContextTraits<minidump::MemoryDescriptor, BinaryRef> {
+ static void mapping(IO &IO, minidump::MemoryDescriptor &Memory,
+ BinaryRef &Content);
+};
+
+} // namespace yaml
+
+} // namespace llvm
+
+LLVM_YAML_DECLARE_ENUM_TRAITS(llvm::minidump::ProcessorArchitecture)
+LLVM_YAML_DECLARE_ENUM_TRAITS(llvm::minidump::OSPlatform)
+LLVM_YAML_DECLARE_ENUM_TRAITS(llvm::minidump::StreamType)
+
+LLVM_YAML_DECLARE_MAPPING_TRAITS(llvm::minidump::CPUInfo::ArmInfo)
+LLVM_YAML_DECLARE_MAPPING_TRAITS(llvm::minidump::CPUInfo::OtherInfo)
+LLVM_YAML_DECLARE_MAPPING_TRAITS(llvm::minidump::CPUInfo::X86Info)
+LLVM_YAML_DECLARE_MAPPING_TRAITS(llvm::minidump::VSFixedFileInfo)
+
+LLVM_YAML_DECLARE_MAPPING_TRAITS(
+ llvm::MinidumpYAML::MemoryListStream::entry_type)
+LLVM_YAML_DECLARE_MAPPING_TRAITS(
+ llvm::MinidumpYAML::ModuleListStream::entry_type)
+LLVM_YAML_DECLARE_MAPPING_TRAITS(
+ llvm::MinidumpYAML::ThreadListStream::entry_type)
+
+LLVM_YAML_IS_SEQUENCE_VECTOR(std::unique_ptr<llvm::MinidumpYAML::Stream>)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MinidumpYAML::MemoryListStream::entry_type)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MinidumpYAML::ModuleListStream::entry_type)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MinidumpYAML::ThreadListStream::entry_type)
+
+LLVM_YAML_DECLARE_MAPPING_TRAITS(llvm::MinidumpYAML::Object)
+
+#endif // LLVM_OBJECTYAML_MINIDUMPYAML_H
diff --git a/include/llvm/ObjectYAML/ObjectYAML.h b/include/llvm/ObjectYAML/ObjectYAML.h
index 00ce86430fca..0015fd3dc501 100644
--- a/include/llvm/ObjectYAML/ObjectYAML.h
+++ b/include/llvm/ObjectYAML/ObjectYAML.h
@@ -1,9 +1,8 @@
//===- ObjectYAML.h ---------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -13,6 +12,7 @@
#include "llvm/ObjectYAML/COFFYAML.h"
#include "llvm/ObjectYAML/ELFYAML.h"
#include "llvm/ObjectYAML/MachOYAML.h"
+#include "llvm/ObjectYAML/MinidumpYAML.h"
#include "llvm/ObjectYAML/WasmYAML.h"
#include "llvm/Support/YAMLTraits.h"
#include <memory>
@@ -27,6 +27,7 @@ struct YamlObjectFile {
std::unique_ptr<COFFYAML::Object> Coff;
std::unique_ptr<MachOYAML::Object> MachO;
std::unique_ptr<MachOYAML::UniversalBinary> FatMachO;
+ std::unique_ptr<MinidumpYAML::Object> Minidump;
std::unique_ptr<WasmYAML::Object> Wasm;
};
diff --git a/include/llvm/ObjectYAML/WasmYAML.h b/include/llvm/ObjectYAML/WasmYAML.h
index 406dd7cb515f..2411dc7ac17d 100644
--- a/include/llvm/ObjectYAML/WasmYAML.h
+++ b/include/llvm/ObjectYAML/WasmYAML.h
@@ -1,9 +1,8 @@
//===- WasmYAML.h - Wasm YAMLIO implementation ------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -39,6 +38,7 @@ LLVM_YAML_STRONG_TYPEDEF(uint32_t, SymbolKind)
LLVM_YAML_STRONG_TYPEDEF(uint32_t, SegmentFlags)
LLVM_YAML_STRONG_TYPEDEF(uint32_t, LimitFlags)
LLVM_YAML_STRONG_TYPEDEF(uint32_t, ComdatKind)
+LLVM_YAML_STRONG_TYPEDEF(uint32_t, FeaturePolicyPrefix)
struct FileHeader {
yaml::Hex32 Version;
@@ -112,8 +112,9 @@ struct Relocation {
};
struct DataSegment {
- uint32_t MemoryIndex;
uint32_t SectionOffset;
+ uint32_t InitFlags;
+ uint32_t MemoryIndex;
wasm::WasmInitExpr Offset;
yaml::BinaryRef Content;
};
@@ -123,6 +124,16 @@ struct NameEntry {
StringRef Name;
};
+struct ProducerEntry {
+ std::string Name;
+ std::string Version;
+};
+
+struct FeatureEntry {
+ FeaturePolicyPrefix Prefix;
+ std::string Name;
+};
+
struct SegmentInfo {
uint32_t Index;
StringRef Name;
@@ -224,6 +235,30 @@ struct LinkingSection : CustomSection {
std::vector<Comdat> Comdats;
};
+struct ProducersSection : CustomSection {
+ ProducersSection() : CustomSection("producers") {}
+
+ static bool classof(const Section *S) {
+ auto C = dyn_cast<CustomSection>(S);
+ return C && C->Name == "producers";
+ }
+
+ std::vector<ProducerEntry> Languages;
+ std::vector<ProducerEntry> Tools;
+ std::vector<ProducerEntry> SDKs;
+};
+
+struct TargetFeaturesSection : CustomSection {
+ TargetFeaturesSection() : CustomSection("target_features") {}
+
+ static bool classof(const Section *S) {
+ auto C = dyn_cast<CustomSection>(S);
+ return C && C->Name == "target_features";
+ }
+
+ std::vector<FeatureEntry> Features;
+};
+
struct TypeSection : Section {
TypeSection() : Section(wasm::WASM_SEC_TYPE) {}
@@ -344,6 +379,16 @@ struct DataSection : Section {
std::vector<DataSegment> Segments;
};
+struct DataCountSection : Section {
+ DataCountSection() : Section(wasm::WASM_SEC_DATACOUNT) {}
+
+ static bool classof(const Section *S) {
+ return S->Type == wasm::WASM_SEC_DATACOUNT;
+ }
+
+ uint32_t Count;
+};
+
struct Object {
FileHeader Header;
std::vector<std::unique_ptr<Section>> Sections;
@@ -366,6 +411,8 @@ LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::Function)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::LocalDecl)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::Relocation)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::NameEntry)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::ProducerEntry)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::FeatureEntry)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::SegmentInfo)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::SymbolInfo)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::InitFunction)
@@ -444,6 +491,18 @@ template <> struct MappingTraits<WasmYAML::NameEntry> {
static void mapping(IO &IO, WasmYAML::NameEntry &NameEntry);
};
+template <> struct MappingTraits<WasmYAML::ProducerEntry> {
+ static void mapping(IO &IO, WasmYAML::ProducerEntry &ProducerEntry);
+};
+
+template <> struct ScalarEnumerationTraits<WasmYAML::FeaturePolicyPrefix> {
+ static void enumeration(IO &IO, WasmYAML::FeaturePolicyPrefix &Prefix);
+};
+
+template <> struct MappingTraits<WasmYAML::FeatureEntry> {
+ static void mapping(IO &IO, WasmYAML::FeatureEntry &FeatureEntry);
+};
+
template <> struct MappingTraits<WasmYAML::SegmentInfo> {
static void mapping(IO &IO, WasmYAML::SegmentInfo &SegmentInfo);
};
diff --git a/include/llvm/ObjectYAML/XCOFFYAML.h b/include/llvm/ObjectYAML/XCOFFYAML.h
new file mode 100644
index 000000000000..f99004e69762
--- /dev/null
+++ b/include/llvm/ObjectYAML/XCOFFYAML.h
@@ -0,0 +1,71 @@
+//===----- XCOFFYAML.h - XCOFF YAMLIO implementation ------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares classes for handling the YAML representation of XCOFF.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_OBJECTYAML_XCOFFYAML_H
+#define LLVM_OBJECTYAML_XCOFFYAML_H
+
+#include "llvm/BinaryFormat/XCOFF.h"
+#include "llvm/ObjectYAML/YAML.h"
+#include <vector>
+
+namespace llvm {
+namespace XCOFFYAML {
+
+struct FileHeader {
+ llvm::yaml::Hex16 Magic;
+ uint16_t NumberOfSections;
+ int32_t TimeStamp;
+ llvm::yaml::Hex32 SymbolTableOffset; // File offset to symbol table.
+ int32_t NumberOfSymTableEntries;
+ uint16_t AuxHeaderSize;
+ llvm::yaml::Hex16 Flags;
+};
+
+struct Symbol {
+ StringRef SymbolName;
+ llvm::yaml::Hex32 Value; // Symbol value; storage class-dependent.
+ StringRef SectionName;
+ llvm::yaml::Hex16 Type;
+ XCOFF::StorageClass StorageClass;
+ uint8_t NumberOfAuxEntries; // Number of auxiliary entries
+};
+
+struct Object {
+ FileHeader Header;
+ std::vector<Symbol> Symbols;
+ Object();
+};
+} // namespace XCOFFYAML
+} // namespace llvm
+LLVM_YAML_IS_SEQUENCE_VECTOR(XCOFFYAML::Symbol)
+namespace llvm {
+namespace yaml {
+
+template <> struct ScalarEnumerationTraits<XCOFF::StorageClass> {
+ static void enumeration(IO &IO, XCOFF::StorageClass &Value);
+};
+
+template <> struct MappingTraits<XCOFFYAML::FileHeader> {
+ static void mapping(IO &IO, XCOFFYAML::FileHeader &H);
+};
+
+template <> struct MappingTraits<XCOFFYAML::Object> {
+ static void mapping(IO &IO, XCOFFYAML::Object &Obj);
+};
+
+template <> struct MappingTraits<XCOFFYAML::Symbol> {
+ static void mapping(IO &IO, XCOFFYAML::Symbol &S);
+};
+
+} // namespace yaml
+} // namespace llvm
+
+#endif // LLVM_OBJECTYAML_XCOFFYAML_H
diff --git a/include/llvm/ObjectYAML/YAML.h b/include/llvm/ObjectYAML/YAML.h
index 163cd8dfcf08..37014109a615 100644
--- a/include/llvm/ObjectYAML/YAML.h
+++ b/include/llvm/ObjectYAML/YAML.h
@@ -1,9 +1,8 @@
//===- YAML.h ---------------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -74,8 +73,7 @@ class BinaryRef {
public:
BinaryRef() = default;
BinaryRef(ArrayRef<uint8_t> Data) : Data(Data), DataIsHexString(false) {}
- BinaryRef(StringRef Data)
- : Data(reinterpret_cast<const uint8_t *>(Data.data()), Data.size()) {}
+ BinaryRef(StringRef Data) : Data(arrayRefFromStringRef(Data)) {}
/// The number of bytes that are represented by this BinaryRef.
/// This is the number of bytes that writeAsBinary() will write.
diff --git a/include/llvm/Option/Arg.h b/include/llvm/Option/Arg.h
index d0086bb6d611..22e2bcf06a6e 100644
--- a/include/llvm/Option/Arg.h
+++ b/include/llvm/Option/Arg.h
@@ -1,9 +1,8 @@
//===- Arg.h - Parsed Argument Classes --------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -59,6 +58,11 @@ private:
/// The argument values, as C strings.
SmallVector<const char *, 2> Values;
+ /// If this arg was created through an alias, this is the original alias arg.
+ /// For example, *this might be "-finput-charset=utf-8" and Alias might
+ /// point to an arg representing "/source-charset:utf-8".
+ std::unique_ptr<Arg> Alias;
+
public:
Arg(const Option Opt, StringRef Spelling, unsigned Index,
const Arg *BaseArg = nullptr);
@@ -71,7 +75,15 @@ public:
~Arg();
const Option &getOption() const { return Opt; }
+
+ /// Returns the used prefix and name of the option:
+ /// For `--foo=bar`, returns `--foo=`.
+ /// This is often the wrong function to call:
+ /// * Use `getValue()` to get `bar`.
+ /// * Use `getAsString()` to get a string suitable for printing an Arg in
+ /// a diagnostic.
StringRef getSpelling() const { return Spelling; }
+
unsigned getIndex() const { return Index; }
/// Return the base argument which generated this arg.
@@ -83,6 +95,11 @@ public:
}
void setBaseArg(const Arg *BaseArg) { this->BaseArg = BaseArg; }
+ /// Args are converted to their unaliased form. For args that originally
+ /// came from an alias, this returns the alias the arg was produced from.
+ const Arg* getAlias() const { return Alias.get(); }
+ void setAlias(std::unique_ptr<Arg> Alias) { this->Alias = std::move(Alias); }
+
bool getOwnsValues() const { return OwnsValues; }
void setOwnsValues(bool Value) const { OwnsValues = Value; }
@@ -120,8 +137,10 @@ public:
void print(raw_ostream &O) const;
void dump() const;
- /// Return a formatted version of the argument and
- /// its values, for debugging and diagnostics.
+ /// Return a formatted version of the argument and its values, for
+ /// diagnostics. Since this is for diagnostics, if this Arg was produced
+ /// through an alias, this returns the string representation of the alias
+ /// that the user wrote.
std::string getAsString(const ArgList &Args) const;
};
diff --git a/include/llvm/Option/ArgList.h b/include/llvm/Option/ArgList.h
index 687c8cbb02f9..74bfadcba726 100644
--- a/include/llvm/Option/ArgList.h
+++ b/include/llvm/Option/ArgList.h
@@ -1,9 +1,8 @@
//===- ArgList.h - Argument List Management ---------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -302,10 +301,12 @@ public:
bool hasFlag(OptSpecifier Pos, OptSpecifier PosAlias, OptSpecifier Neg,
bool Default = true) const;
- /// AddLastArg - Render only the last argument match \p Id0, if present.
- void AddLastArg(ArgStringList &Output, OptSpecifier Id0) const;
- void AddLastArg(ArgStringList &Output, OptSpecifier Id0,
- OptSpecifier Id1) const;
+ /// Render only the last argument match \p Id0, if present.
+ template<typename ...OptSpecifiers>
+ void AddLastArg(ArgStringList &Output, OptSpecifiers ...Ids) const {
+ if (Arg *A = getLastArg(Ids...)) // Calls claim() on all Ids's Args.
+ A->render(*this, Output);
+ }
/// AddAllArgsExcept - Render all arguments matching any of the given ids
/// and not matching any of the excluded ids.
diff --git a/include/llvm/Option/OptParser.td b/include/llvm/Option/OptParser.td
index 9c373741770b..a68f17a8b10b 100644
--- a/include/llvm/Option/OptParser.td
+++ b/include/llvm/Option/OptParser.td
@@ -1,9 +1,8 @@
//===--- OptParser.td - Common Option Parsing Interfaces ------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Option/OptSpecifier.h b/include/llvm/Option/OptSpecifier.h
index 84c3cf8ad534..7a5fcfb18b38 100644
--- a/include/llvm/Option/OptSpecifier.h
+++ b/include/llvm/Option/OptSpecifier.h
@@ -1,9 +1,8 @@
//===- OptSpecifier.h - Option Specifiers -----------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/Option/OptTable.h b/include/llvm/Option/OptTable.h
index fdb05d8a15af..5db30436069d 100644
--- a/include/llvm/Option/OptTable.h
+++ b/include/llvm/Option/OptTable.h
@@ -1,9 +1,8 @@
//===- OptTable.h - Option Table --------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/Option/Option.h b/include/llvm/Option/Option.h
index b09f6043b7a9..33813d28d274 100644
--- a/include/llvm/Option/Option.h
+++ b/include/llvm/Option/Option.h
@@ -1,9 +1,8 @@
//===- Option.h - Abstract Driver Options -----------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -207,6 +206,11 @@ public:
/// start.
Arg *accept(const ArgList &Args, unsigned &Index, unsigned ArgSize) const;
+private:
+ Arg *acceptInternal(const ArgList &Args, unsigned &Index,
+ unsigned ArgSize) const;
+
+public:
void print(raw_ostream &O) const;
void dump() const;
};
diff --git a/include/llvm/Pass.h b/include/llvm/Pass.h
index 5935a0853d32..329f7eaba73d 100644
--- a/include/llvm/Pass.h
+++ b/include/llvm/Pass.h
@@ -1,9 +1,8 @@
//===- llvm/Pass.h - Base class for Passes ----------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/PassAnalysisSupport.h b/include/llvm/PassAnalysisSupport.h
index a075eb557472..1228534deb95 100644
--- a/include/llvm/PassAnalysisSupport.h
+++ b/include/llvm/PassAnalysisSupport.h
@@ -1,9 +1,8 @@
//===- llvm/PassAnalysisSupport.h - Analysis Pass Support code --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/PassInfo.h b/include/llvm/PassInfo.h
index 2f1ab4d43377..686fc044ebcb 100644
--- a/include/llvm/PassInfo.h
+++ b/include/llvm/PassInfo.h
@@ -1,9 +1,8 @@
//===- llvm/PassInfo.h - Pass Info class ------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/PassRegistry.h b/include/llvm/PassRegistry.h
index 57462138c5ae..b9a015430c10 100644
--- a/include/llvm/PassRegistry.h
+++ b/include/llvm/PassRegistry.h
@@ -1,9 +1,8 @@
//===- llvm/PassRegistry.h - Pass Information Registry ----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/PassSupport.h b/include/llvm/PassSupport.h
index 1bf23dcba50b..ab90217ce4a8 100644
--- a/include/llvm/PassSupport.h
+++ b/include/llvm/PassSupport.h
@@ -1,9 +1,8 @@
//===- llvm/PassSupport.h - Pass Support code -------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Passes/PassBuilder.h b/include/llvm/Passes/PassBuilder.h
index fa59345a02cf..5e6660599f93 100644
--- a/include/llvm/Passes/PassBuilder.h
+++ b/include/llvm/Passes/PassBuilder.h
@@ -1,9 +1,8 @@
//===- Parsing, selection, and construction of pass pipelines --*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -32,36 +31,85 @@ class ModuleSummaryIndex;
/// A struct capturing PGO tunables.
struct PGOOptions {
- PGOOptions(std::string ProfileGenFile = "", std::string ProfileUseFile = "",
- std::string SampleProfileFile = "",
- std::string ProfileRemappingFile = "",
- bool RunProfileGen = false, bool SamplePGOSupport = false)
- : ProfileGenFile(ProfileGenFile), ProfileUseFile(ProfileUseFile),
- SampleProfileFile(SampleProfileFile),
- ProfileRemappingFile(ProfileRemappingFile),
- RunProfileGen(RunProfileGen),
- SamplePGOSupport(SamplePGOSupport || !SampleProfileFile.empty()) {
- assert((RunProfileGen ||
- !SampleProfileFile.empty() ||
- !ProfileUseFile.empty() ||
- SamplePGOSupport) && "Illegal PGOOptions.");
+ enum PGOAction { NoAction, IRInstr, IRUse, SampleUse };
+ enum CSPGOAction { NoCSAction, CSIRInstr, CSIRUse };
+ PGOOptions(std::string ProfileFile = "", std::string CSProfileGenFile = "",
+ std::string ProfileRemappingFile = "", PGOAction Action = NoAction,
+ CSPGOAction CSAction = NoCSAction, bool SamplePGOSupport = false)
+ : ProfileFile(ProfileFile), CSProfileGenFile(CSProfileGenFile),
+ ProfileRemappingFile(ProfileRemappingFile), Action(Action),
+ CSAction(CSAction),
+ SamplePGOSupport(SamplePGOSupport || Action == SampleUse) {
+ // Note, we do allow ProfileFile.empty() for Action=IRUse LTO can
+ // callback with IRUse action without ProfileFile.
+
+ // If there is a CSAction, PGOAction cannot be IRInstr or SampleUse.
+ assert(this->CSAction == NoCSAction ||
+ (this->Action != IRInstr && this->Action != SampleUse));
+
+ // For CSIRInstr, CSProfileGenFile also needs to be nonempty.
+ assert(this->CSAction != CSIRInstr || !this->CSProfileGenFile.empty());
+
+ // If CSAction is CSIRUse, PGOAction needs to be IRUse as they share
+ // a profile.
+ assert(this->CSAction != CSIRUse || this->Action == IRUse);
+
+ // If neither Action nor CSAction, SamplePGOSupport needs to be true.
+ assert(this->Action != NoAction || this->CSAction != NoCSAction ||
+ this->SamplePGOSupport);
}
- std::string ProfileGenFile;
- std::string ProfileUseFile;
- std::string SampleProfileFile;
+ std::string ProfileFile;
+ std::string CSProfileGenFile;
std::string ProfileRemappingFile;
- bool RunProfileGen;
+ PGOAction Action;
+ CSPGOAction CSAction;
bool SamplePGOSupport;
};
+/// Tunable parameters for passes in the default pipelines.
+class PipelineTuningOptions {
+public:
+ /// Constructor sets pipeline tuning defaults based on cl::opts. Each option
+ /// can be set in the PassBuilder when using a LLVM as a library.
+ PipelineTuningOptions();
+
+ /// Tuning option to set loop interleaving on/off. Its default value is that
+ /// of the flag: `-interleave-loops`.
+ bool LoopInterleaving;
+
+ /// Tuning option to enable/disable loop vectorization. Its default value is
+ /// that of the flag: `-vectorize-loops`.
+ bool LoopVectorization;
+
+ /// Tuning option to enable/disable slp loop vectorization. Its default value
+ /// is that of the flag: `vectorize-slp`.
+ bool SLPVectorization;
+
+ /// Tuning option to enable/disable loop unrolling. Its default value is true.
+ bool LoopUnrolling;
+
+ /// Tuning option to forget all SCEV loops in LoopUnroll. Its default value
+ /// is that of the flag: `-forget-scev-loop-unroll`.
+ bool ForgetAllSCEVInLoopUnroll;
+
+ /// Tuning option to cap the number of calls to retrive clobbering accesses in
+ /// MemorySSA, in LICM.
+ unsigned LicmMssaOptCap;
+
+ /// Tuning option to disable promotion to scalars in LICM with MemorySSA, if
+ /// the number of access is too large.
+ unsigned LicmMssaNoAccForPromotionCap;
+};
+
/// This class provides access to building LLVM's passes.
///
-/// It's members provide the baseline state available to passes during their
+/// Its members provide the baseline state available to passes during their
/// construction. The \c PassRegistry.def file specifies how to construct all
/// of the built-in passes, and those may reference these members during
/// construction.
class PassBuilder {
TargetMachine *TM;
+ PipelineTuningOptions PTO;
Optional<PGOOptions> PGOOpt;
PassInstrumentationCallbacks *PIC;
@@ -85,9 +133,9 @@ public:
enum class ThinLTOPhase {
/// No ThinLTO behavior needed.
None,
- // ThinLTO prelink (summary) phase.
+ /// ThinLTO prelink (summary) phase.
PreLink,
- // ThinLTO postlink (backend compile) phase.
+ /// ThinLTO postlink (backend compile) phase.
PostLink
};
@@ -178,14 +226,15 @@ public:
};
explicit PassBuilder(TargetMachine *TM = nullptr,
+ PipelineTuningOptions PTO = PipelineTuningOptions(),
Optional<PGOOptions> PGOOpt = None,
PassInstrumentationCallbacks *PIC = nullptr)
- : TM(TM), PGOOpt(PGOOpt), PIC(PIC) {}
+ : TM(TM), PTO(PTO), PGOOpt(PGOOpt), PIC(PIC) {}
/// Cross register the analysis managers through their proxies.
///
/// This is an interface that can be used to cross register each
- // AnalysisManager with all the others analysis managers.
+ /// AnalysisManager with all the others analysis managers.
void crossRegisterProxies(LoopAnalysisManager &LAM,
FunctionAnalysisManager &FAM,
CGSCCAnalysisManager &CGAM,
@@ -275,7 +324,8 @@ public:
/// require some transformations for semantic reasons, they should explicitly
/// build them.
ModulePassManager buildModuleOptimizationPipeline(OptimizationLevel Level,
- bool DebugLogging = false);
+ bool DebugLogging = false,
+ bool LTOPreLink = false);
/// Build a per-module default optimization pipeline.
///
@@ -289,7 +339,8 @@ public:
/// require some transformations for semantic reasons, they should explicitly
/// build them.
ModulePassManager buildPerModuleDefaultPipeline(OptimizationLevel Level,
- bool DebugLogging = false);
+ bool DebugLogging = false,
+ bool LTOPreLink = false);
/// Build a pre-link, ThinLTO-targeting default optimization pipeline to
/// a pass manager.
@@ -392,7 +443,7 @@ public:
/// {{@ Parse a textual pass pipeline description into a specific PassManager
///
/// Automatic deduction of an appropriate pass manager stack is not supported.
- /// For example, to insert a loop pass 'lpass' into a FunctinoPassManager,
+ /// For example, to insert a loop pass 'lpass' into a FunctionPassManager,
/// this is the valid pipeline text:
///
/// function(lpass)
@@ -606,9 +657,8 @@ private:
bool VerifyEachPass, bool DebugLogging);
void addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging,
- OptimizationLevel Level, bool RunProfileGen,
- std::string ProfileGenFile,
- std::string ProfileUseFile,
+ OptimizationLevel Level, bool RunProfileGen, bool IsCS,
+ std::string ProfileFile,
std::string ProfileRemappingFile);
void invokePeepholeEPCallbacks(FunctionPassManager &, OptimizationLevel);
diff --git a/include/llvm/Passes/PassPlugin.h b/include/llvm/Passes/PassPlugin.h
index af8f11a7a352..013b7a827c47 100644
--- a/include/llvm/Passes/PassPlugin.h
+++ b/include/llvm/Passes/PassPlugin.h
@@ -1,9 +1,8 @@
//===- llvm/Passes/PassPlugin.h - Public Plugin API -----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Passes/StandardInstrumentations.h b/include/llvm/Passes/StandardInstrumentations.h
index 8c6f5e1e22f7..3d3002eecce9 100644
--- a/include/llvm/Passes/StandardInstrumentations.h
+++ b/include/llvm/Passes/StandardInstrumentations.h
@@ -1,9 +1,8 @@
//===- StandardInstrumentations.h ------------------------------*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -64,6 +63,8 @@ public:
StandardInstrumentations() = default;
void registerCallbacks(PassInstrumentationCallbacks &PIC);
+
+ TimePassesHandler &getTimePasses() { return TimePasses; }
};
} // namespace llvm
diff --git a/include/llvm/ProfileData/Coverage/CoverageMapping.h b/include/llvm/ProfileData/Coverage/CoverageMapping.h
index beaa36553287..11758ac4cf2f 100644
--- a/include/llvm/ProfileData/Coverage/CoverageMapping.h
+++ b/include/llvm/ProfileData/Coverage/CoverageMapping.h
@@ -1,9 +1,8 @@
//===- CoverageMapping.h - Code coverage mapping support --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/ProfileData/Coverage/CoverageMappingReader.h b/include/llvm/ProfileData/Coverage/CoverageMappingReader.h
index c88c71a6d6f4..57a2aaefd660 100644
--- a/include/llvm/ProfileData/Coverage/CoverageMappingReader.h
+++ b/include/llvm/ProfileData/Coverage/CoverageMappingReader.h
@@ -1,9 +1,8 @@
//===- CoverageMappingReader.h - Code coverage mapping reader ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -204,9 +203,15 @@ public:
BinaryCoverageReader(const BinaryCoverageReader &) = delete;
BinaryCoverageReader &operator=(const BinaryCoverageReader &) = delete;
+ static Expected<std::vector<std::unique_ptr<BinaryCoverageReader>>>
+ create(MemoryBufferRef ObjectBuffer, StringRef Arch,
+ SmallVectorImpl<std::unique_ptr<MemoryBuffer>> &ObjectFileBuffers);
+
static Expected<std::unique_ptr<BinaryCoverageReader>>
- create(std::unique_ptr<MemoryBuffer> &ObjectBuffer,
- StringRef Arch);
+ createCoverageReaderFromBuffer(StringRef Coverage,
+ InstrProfSymtab &&ProfileNames,
+ uint8_t BytesInAddress,
+ support::endianness Endian);
Error readNextRecord(CoverageMappingRecord &Record) override;
};
diff --git a/include/llvm/ProfileData/Coverage/CoverageMappingWriter.h b/include/llvm/ProfileData/Coverage/CoverageMappingWriter.h
index 86fb1bdf1773..5f88cacdfcbb 100644
--- a/include/llvm/ProfileData/Coverage/CoverageMappingWriter.h
+++ b/include/llvm/ProfileData/Coverage/CoverageMappingWriter.h
@@ -1,9 +1,8 @@
//===- CoverageMappingWriter.h - Code coverage mapping writer ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/ProfileData/GCOV.h b/include/llvm/ProfileData/GCOV.h
index a088f63a6915..004ff3f4a2e2 100644
--- a/include/llvm/ProfileData/GCOV.h
+++ b/include/llvm/ProfileData/GCOV.h
@@ -1,9 +1,8 @@
//===- GCOV.h - LLVM coverage tool ------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -45,9 +44,10 @@ enum GCOVVersion { V402, V404, V704 };
/// A struct for passing gcov options between functions.
struct Options {
- Options(bool A, bool B, bool C, bool F, bool P, bool U, bool L, bool N)
+ Options(bool A, bool B, bool C, bool F, bool P, bool U, bool L, bool N, bool X)
: AllBlocks(A), BranchInfo(B), BranchCount(C), FuncCoverage(F),
- PreservePaths(P), UncondBranch(U), LongFileNames(L), NoOutput(N) {}
+ PreservePaths(P), UncondBranch(U), LongFileNames(L), NoOutput(N),
+ HashFilenames(X) {}
bool AllBlocks;
bool BranchInfo;
@@ -57,6 +57,7 @@ struct Options {
bool UncondBranch;
bool LongFileNames;
bool NoOutput;
+ bool HashFilenames;
};
} // end namespace GCOV
@@ -317,12 +318,6 @@ class GCOVBlock {
uint64_t Count = 0;
};
- struct SortDstEdgesFunctor {
- bool operator()(const GCOVEdge *E1, const GCOVEdge *E2) {
- return E1->Dst.Number < E2->Dst.Number;
- }
- };
-
public:
using EdgeIterator = SmallVectorImpl<GCOVEdge *>::const_iterator;
using BlockVector = SmallVector<const GCOVBlock *, 4>;
diff --git a/include/llvm/ProfileData/InstrProf.h b/include/llvm/ProfileData/InstrProf.h
index dc45021fc47d..c7d764ade30d 100644
--- a/include/llvm/ProfileData/InstrProf.h
+++ b/include/llvm/ProfileData/InstrProf.h
@@ -1,9 +1,8 @@
//===- InstrProf.h - Instrumented profiling format support ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -235,7 +234,7 @@ bool isIRPGOFlagSet(const Module *M);
bool canRenameComdatFunc(const Function &F, bool CheckAddressTaken = false);
enum InstrProfValueKind : uint32_t {
-#define VALUE_PROF_KIND(Enumerator, Value) Enumerator = Value,
+#define VALUE_PROF_KIND(Enumerator, Value, Descr) Enumerator = Value,
#include "llvm/ProfileData/InstrProfData.inc"
};
@@ -591,6 +590,70 @@ StringRef InstrProfSymtab::getOrigFuncName(uint64_t FuncMD5Hash) {
return PGOName.drop_front(S + 1);
}
+// To store the sums of profile count values, or the percentage of
+// the sums of the total count values.
+struct CountSumOrPercent {
+ uint64_t NumEntries;
+ double CountSum;
+ double ValueCounts[IPVK_Last - IPVK_First + 1];
+ CountSumOrPercent() : NumEntries(0), CountSum(0.0f), ValueCounts() {}
+ void reset() {
+ NumEntries = 0;
+ CountSum = 0.0f;
+ for (unsigned I = 0; I < IPVK_Last - IPVK_First + 1; I++)
+ ValueCounts[I] = 0.0f;
+ }
+};
+
+// Function level or program level overlap information.
+struct OverlapStats {
+ enum OverlapStatsLevel { ProgramLevel, FunctionLevel };
+ // Sum of the total count values for the base profile.
+ CountSumOrPercent Base;
+ // Sum of the total count values for the test profile.
+ CountSumOrPercent Test;
+ // Overlap lap score. Should be in range of [0.0f to 1.0f].
+ CountSumOrPercent Overlap;
+ CountSumOrPercent Mismatch;
+ CountSumOrPercent Unique;
+ OverlapStatsLevel Level;
+ const std::string *BaseFilename;
+ const std::string *TestFilename;
+ StringRef FuncName;
+ uint64_t FuncHash;
+ bool Valid;
+
+ OverlapStats(OverlapStatsLevel L = ProgramLevel)
+ : Level(L), BaseFilename(nullptr), TestFilename(nullptr), FuncHash(0),
+ Valid(false) {}
+
+ void dump(raw_fd_ostream &OS) const;
+
+ void setFuncInfo(StringRef Name, uint64_t Hash) {
+ FuncName = Name;
+ FuncHash = Hash;
+ }
+
+ Error accumuateCounts(const std::string &BaseFilename,
+ const std::string &TestFilename, bool IsCS);
+ void addOneMismatch(const CountSumOrPercent &MismatchFunc);
+ void addOneUnique(const CountSumOrPercent &UniqueFunc);
+
+ static inline double score(uint64_t Val1, uint64_t Val2, double Sum1,
+ double Sum2) {
+ if (Sum1 < 1.0f || Sum2 < 1.0f)
+ return 0.0f;
+ return std::min(Val1 / Sum1, Val2 / Sum2);
+ }
+};
+
+// This is used to filter the functions whose overlap information
+// to be output.
+struct OverlapFuncFilters {
+ uint64_t ValueCutoff;
+ const std::string NameFilter;
+};
+
struct InstrProfValueSiteRecord {
/// Value profiling data pairs at a given value site.
std::list<InstrProfValueData> ValueData;
@@ -616,6 +679,10 @@ struct InstrProfValueSiteRecord {
function_ref<void(instrprof_error)> Warn);
/// Scale up value profile data counts.
void scale(uint64_t Weight, function_ref<void(instrprof_error)> Warn);
+
+ /// Compute the overlap b/w this record and Input record.
+ void overlap(InstrProfValueSiteRecord &Input, uint32_t ValueKind,
+ OverlapStats &Overlap, OverlapStats &FuncLevelOverlap);
};
/// Profiling information for a single function.
@@ -704,6 +771,18 @@ struct InstrProfRecord {
/// Clear value data entries
void clearValueData() { ValueData = nullptr; }
+ /// Compute the sums of all counts and store in Sum.
+ void accumuateCounts(CountSumOrPercent &Sum) const;
+
+ /// Compute the overlap b/w this IntrprofRecord and Other.
+ void overlap(InstrProfRecord &Other, OverlapStats &Overlap,
+ OverlapStats &FuncLevelOverlap, uint64_t ValueCutoff);
+
+ /// Compute the overlap of value profile counts.
+ void overlapValueProfData(uint32_t ValueKind, InstrProfRecord &Src,
+ OverlapStats &Overlap,
+ OverlapStats &FuncLevelOverlap);
+
private:
struct ValueProfData {
std::vector<InstrProfValueSiteRecord> IndirectCallSites;
@@ -768,10 +847,20 @@ struct NamedInstrProfRecord : InstrProfRecord {
StringRef Name;
uint64_t Hash;
+ // We reserve this bit as the flag for context sensitive profile record.
+ static const int CS_FLAG_IN_FUNC_HASH = 60;
+
NamedInstrProfRecord() = default;
NamedInstrProfRecord(StringRef Name, uint64_t Hash,
std::vector<uint64_t> Counts)
: InstrProfRecord(std::move(Counts)), Name(Name), Hash(Hash) {}
+
+ static bool hasCSFlagInHash(uint64_t FuncHash) {
+ return ((FuncHash >> CS_FLAG_IN_FUNC_HASH) & 1);
+ }
+ static void setCSFlagInHash(uint64_t &FuncHash) {
+ FuncHash |= ((uint64_t)1 << CS_FLAG_IN_FUNC_HASH);
+ }
};
uint32_t InstrProfRecord::getNumValueKinds() const {
@@ -1005,6 +1094,8 @@ namespace RawInstrProf {
// from control data struct is changed from raw pointer to Name's MD5 value.
// Version 4: ValueDataBegin and ValueDataSizes fields are removed from the
// raw header.
+// Version 5: Bit 60 of FuncHash is reserved for the flag for the context
+// sensitive records.
const uint64_t Version = INSTR_PROF_RAW_VERSION;
template <class IntPtrT> inline uint64_t getMagic();
@@ -1041,6 +1132,12 @@ struct Header {
void getMemOPSizeRangeFromOption(StringRef Str, int64_t &RangeStart,
int64_t &RangeLast);
-} // end namespace llvm
+// Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime
+// aware this is an ir_level profile so it can set the version flag.
+void createIRLevelProfileFlagVar(Module &M, bool IsCS);
+// Create the variable for the profile file name.
+void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput);
+
+} // end namespace llvm
#endif // LLVM_PROFILEDATA_INSTRPROF_H
diff --git a/include/llvm/ProfileData/InstrProfData.inc b/include/llvm/ProfileData/InstrProfData.inc
index 454620ed997a..749781b9ac2d 100644
--- a/include/llvm/ProfileData/InstrProfData.inc
+++ b/include/llvm/ProfileData/InstrProfData.inc
@@ -1,9 +1,8 @@
/*===-- InstrProfData.inc - instr profiling runtime structures -*- C++ -*-=== *\
|*
-|* The LLVM Compiler Infrastructure
-|*
-|* This file is distributed under the University of Illinois Open Source
-|* License. See LICENSE.TXT for details.
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+|* See https://llvm.org/LICENSE.txt for license information.
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|*
\*===----------------------------------------------------------------------===*/
/*
@@ -170,7 +169,7 @@ VALUE_PROF_FUNC_PARAM(uint64_t, LargeValue, Type::getInt64Ty(Ctx))
/* VALUE_PROF_KIND start */
#ifndef VALUE_PROF_KIND
-#define VALUE_PROF_KIND(Enumerator, Value)
+#define VALUE_PROF_KIND(Enumerator, Value, Descr)
#else
#define INSTR_PROF_DATA_DEFINED
#endif
@@ -183,16 +182,16 @@ VALUE_PROF_FUNC_PARAM(uint64_t, LargeValue, Type::getInt64Ty(Ctx))
* For this remapping the ProfData is used. ProfData contains both the function
* name hash and the function address.
*/
-VALUE_PROF_KIND(IPVK_IndirectCallTarget, 0)
+VALUE_PROF_KIND(IPVK_IndirectCallTarget, 0, "indirect call target")
/* For memory intrinsic functions size profiling. */
-VALUE_PROF_KIND(IPVK_MemOPSize, 1)
+VALUE_PROF_KIND(IPVK_MemOPSize, 1, "memory intrinsic functions size")
/* These two kinds must be the last to be
* declared. This is to make sure the string
* array created with the template can be
* indexed with the kind value.
*/
-VALUE_PROF_KIND(IPVK_First, IPVK_IndirectCallTarget)
-VALUE_PROF_KIND(IPVK_Last, IPVK_MemOPSize)
+VALUE_PROF_KIND(IPVK_First, IPVK_IndirectCallTarget, "first")
+VALUE_PROF_KIND(IPVK_Last, IPVK_MemOPSize, "last")
#undef VALUE_PROF_KIND
/* VALUE_PROF_KIND end */
@@ -250,22 +249,25 @@ COVMAP_HEADER(uint32_t, Int32Ty, Version, \
#define INSTR_PROF_DATA_DEFINED
INSTR_PROF_SECT_ENTRY(IPSK_data, \
INSTR_PROF_QUOTE(INSTR_PROF_DATA_COMMON), \
- INSTR_PROF_QUOTE(INSTR_PROF_DATA_COFF), "__DATA,")
+ INSTR_PROF_DATA_COFF, "__DATA,")
INSTR_PROF_SECT_ENTRY(IPSK_cnts, \
INSTR_PROF_QUOTE(INSTR_PROF_CNTS_COMMON), \
- INSTR_PROF_QUOTE(INSTR_PROF_CNTS_COFF), "__DATA,")
+ INSTR_PROF_CNTS_COFF, "__DATA,")
INSTR_PROF_SECT_ENTRY(IPSK_name, \
INSTR_PROF_QUOTE(INSTR_PROF_NAME_COMMON), \
- INSTR_PROF_QUOTE(INSTR_PROF_NAME_COFF), "__DATA,")
+ INSTR_PROF_NAME_COFF, "__DATA,")
INSTR_PROF_SECT_ENTRY(IPSK_vals, \
INSTR_PROF_QUOTE(INSTR_PROF_VALS_COMMON), \
- INSTR_PROF_QUOTE(INSTR_PROF_VALS_COFF), "__DATA,")
+ INSTR_PROF_VALS_COFF, "__DATA,")
INSTR_PROF_SECT_ENTRY(IPSK_vnodes, \
INSTR_PROF_QUOTE(INSTR_PROF_VNODES_COMMON), \
- INSTR_PROF_QUOTE(INSTR_PROF_VNODES_COFF), "__DATA,")
+ INSTR_PROF_VNODES_COFF, "__DATA,")
INSTR_PROF_SECT_ENTRY(IPSK_covmap, \
INSTR_PROF_QUOTE(INSTR_PROF_COVMAP_COMMON), \
- INSTR_PROF_QUOTE(INSTR_PROF_COVMAP_COFF), "__LLVM_COV,")
+ INSTR_PROF_COVMAP_COFF, "__LLVM_COV,")
+INSTR_PROF_SECT_ENTRY(IPSK_orderfile, \
+ INSTR_PROF_QUOTE(INSTR_PROF_ORDERFILE_COMMON), \
+ INSTR_PROF_QUOTE(INSTR_PROF_ORDERFILE_COFF), "__DATA,")
#undef INSTR_PROF_SECT_ENTRY
#endif
@@ -636,10 +638,12 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
* version for other variants of profile. We set the lowest bit of the upper 8
* bits (i.e. bit 56) to 1 to indicate if this is an IR-level instrumentaiton
* generated profile, and 0 if this is a Clang FE generated profile.
+ * 1 in bit 57 indicates there are context-sensitive records in the profile.
*/
#define VARIANT_MASKS_ALL 0xff00000000000000ULL
#define GET_VERSION(V) ((V) & ~VARIANT_MASKS_ALL)
#define VARIANT_MASK_IR_PROF (0x1ULL << 56)
+#define VARIANT_MASK_CSIR_PROF (0x1ULL << 57)
#define INSTR_PROF_RAW_VERSION_VAR __llvm_profile_raw_version
#define INSTR_PROF_PROFILE_RUNTIME_VAR __llvm_profile_runtime
@@ -655,13 +659,17 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
#define INSTR_PROF_VALS_COMMON __llvm_prf_vals
#define INSTR_PROF_VNODES_COMMON __llvm_prf_vnds
#define INSTR_PROF_COVMAP_COMMON __llvm_covmap
-/* Win32 */
-#define INSTR_PROF_DATA_COFF .lprfd
-#define INSTR_PROF_NAME_COFF .lprfn
-#define INSTR_PROF_CNTS_COFF .lprfc
-#define INSTR_PROF_VALS_COFF .lprfv
-#define INSTR_PROF_VNODES_COFF .lprfnd
-#define INSTR_PROF_COVMAP_COFF .lcovmap
+#define INSTR_PROF_ORDERFILE_COMMON __llvm_orderfile
+/* Windows section names. Because these section names contain dollar characters,
+ * they must be quoted.
+ */
+#define INSTR_PROF_DATA_COFF ".lprfd$M"
+#define INSTR_PROF_NAME_COFF ".lprfn$M"
+#define INSTR_PROF_CNTS_COFF ".lprfc$M"
+#define INSTR_PROF_VALS_COFF ".lprfv$M"
+#define INSTR_PROF_VNODES_COFF ".lprfnd$M"
+#define INSTR_PROF_COVMAP_COFF ".lcovmap$M"
+#define INSTR_PROF_ORDERFILE_COFF ".lorderfile$M"
#ifdef _WIN32
/* Runtime section names and name strings. */
@@ -675,32 +683,30 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
/* Value profile nodes section. */
#define INSTR_PROF_VNODES_SECT_NAME INSTR_PROF_VNODES_COFF
#define INSTR_PROF_COVMAP_SECT_NAME INSTR_PROF_COVMAP_COFF
+#define INSTR_PROF_ORDERFILE_SECT_NAME INSTR_PROF_ORDERFILE_COFF
#else
/* Runtime section names and name strings. */
-#define INSTR_PROF_DATA_SECT_NAME INSTR_PROF_DATA_COMMON
-#define INSTR_PROF_NAME_SECT_NAME INSTR_PROF_NAME_COMMON
-#define INSTR_PROF_CNTS_SECT_NAME INSTR_PROF_CNTS_COMMON
+#define INSTR_PROF_DATA_SECT_NAME INSTR_PROF_QUOTE(INSTR_PROF_DATA_COMMON)
+#define INSTR_PROF_NAME_SECT_NAME INSTR_PROF_QUOTE(INSTR_PROF_NAME_COMMON)
+#define INSTR_PROF_CNTS_SECT_NAME INSTR_PROF_QUOTE(INSTR_PROF_CNTS_COMMON)
/* Array of pointers. Each pointer points to a list
* of value nodes associated with one value site.
*/
-#define INSTR_PROF_VALS_SECT_NAME INSTR_PROF_VALS_COMMON
+#define INSTR_PROF_VALS_SECT_NAME INSTR_PROF_QUOTE(INSTR_PROF_VALS_COMMON)
/* Value profile nodes section. */
-#define INSTR_PROF_VNODES_SECT_NAME INSTR_PROF_VNODES_COMMON
-#define INSTR_PROF_COVMAP_SECT_NAME INSTR_PROF_COVMAP_COMMON
+#define INSTR_PROF_VNODES_SECT_NAME INSTR_PROF_QUOTE(INSTR_PROF_VNODES_COMMON)
+#define INSTR_PROF_COVMAP_SECT_NAME INSTR_PROF_QUOTE(INSTR_PROF_COVMAP_COMMON)
+/* Order file instrumentation. */
+#define INSTR_PROF_ORDERFILE_SECT_NAME \
+ INSTR_PROF_QUOTE(INSTR_PROF_ORDERFILE_COMMON)
#endif
-#define INSTR_PROF_DATA_SECT_NAME_STR \
- INSTR_PROF_QUOTE(INSTR_PROF_DATA_SECT_NAME)
-#define INSTR_PROF_NAME_SECT_NAME_STR \
- INSTR_PROF_QUOTE(INSTR_PROF_NAME_SECT_NAME)
-#define INSTR_PROF_CNTS_SECT_NAME_STR \
- INSTR_PROF_QUOTE(INSTR_PROF_CNTS_SECT_NAME)
-#define INSTR_PROF_COVMAP_SECT_NAME_STR \
- INSTR_PROF_QUOTE(INSTR_PROF_COVMAP_SECT_NAME)
-#define INSTR_PROF_VALS_SECT_NAME_STR \
- INSTR_PROF_QUOTE(INSTR_PROF_VALS_SECT_NAME)
-#define INSTR_PROF_VNODES_SECT_NAME_STR \
- INSTR_PROF_QUOTE(INSTR_PROF_VNODES_SECT_NAME)
+#define INSTR_PROF_ORDERFILE_BUFFER_NAME _llvm_order_file_buffer
+#define INSTR_PROF_ORDERFILE_BUFFER_NAME_STR \
+ INSTR_PROF_QUOTE(INSTR_PROF_ORDERFILE_BUFFER_NAME)
+#define INSTR_PROF_ORDERFILE_BUFFER_IDX_NAME _llvm_order_file_buffer_idx
+#define INSTR_PROF_ORDERFILE_BUFFER_IDX_NAME_STR \
+ INSTR_PROF_QUOTE(INSTR_PROF_ORDERFILE_BUFFER_IDX_NAME)
/* Macros to define start/stop section symbol for a given
* section on Linux. For instance
@@ -735,6 +741,12 @@ typedef struct InstrProfValueData {
#endif /* INSTR_PROF_DATA_INC */
+#ifndef INSTR_ORDER_FILE_INC
+// The maximal # of functions: 128*1024 (the buffer size will be 128*4 KB).
+#define INSTR_ORDER_FILE_BUFFER_SIZE 131072
+#define INSTR_ORDER_FILE_BUFFER_BITS 17
+#define INSTR_ORDER_FILE_BUFFER_MASK 0x1ffff
+#endif /* INSTR_ORDER_FILE_INC */
#else
#undef INSTR_PROF_DATA_DEFINED
#endif
diff --git a/include/llvm/ProfileData/InstrProfReader.h b/include/llvm/ProfileData/InstrProfReader.h
index 08d782276117..73751faab88e 100644
--- a/include/llvm/ProfileData/InstrProfReader.h
+++ b/include/llvm/ProfileData/InstrProfReader.h
@@ -1,9 +1,8 @@
//===- InstrProfReader.h - Instrumented profiling readers -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -78,6 +77,8 @@ public:
virtual bool isIRLevelProfile() const = 0;
+ virtual bool hasCSIRLevelProfile() const = 0;
+
/// Return the PGO symtab. There are three different readers:
/// Raw, Text, and Indexed profile readers. The first two types
/// of readers are used only by llvm-profdata tool, while the indexed
@@ -90,6 +91,9 @@ public:
/// compiler.
virtual InstrProfSymtab &getSymtab() = 0;
+ /// Compute the sum of counts and return in Sum.
+ void accumuateCounts(CountSumOrPercent &Sum, bool IsCS);
+
protected:
std::unique_ptr<InstrProfSymtab> Symtab;
@@ -143,6 +147,7 @@ private:
/// Iterator over the profile data.
line_iterator Line;
bool IsIRLevelProfile = false;
+ bool HasCSIRLevelProfile = false;
Error readValueProfileData(InstrProfRecord &Record);
@@ -157,6 +162,8 @@ public:
bool isIRLevelProfile() const override { return IsIRLevelProfile; }
+ bool hasCSIRLevelProfile() const override { return HasCSIRLevelProfile; }
+
/// Read the header.
Error readHeader() override;
@@ -213,6 +220,10 @@ public:
return (Version & VARIANT_MASK_IR_PROF) != 0;
}
+ bool hasCSIRLevelProfile() const override {
+ return (Version & VARIANT_MASK_CSIR_PROF) != 0;
+ }
+
InstrProfSymtab &getSymtab() override {
assert(Symtab.get());
return *Symtab.get();
@@ -342,6 +353,7 @@ struct InstrProfReaderIndexBase {
virtual void setValueProfDataEndianness(support::endianness Endianness) = 0;
virtual uint64_t getVersion() const = 0;
virtual bool isIRLevelProfile() const = 0;
+ virtual bool hasCSIRLevelProfile() const = 0;
virtual Error populateSymtab(InstrProfSymtab &) = 0;
};
@@ -386,6 +398,10 @@ public:
return (FormatVersion & VARIANT_MASK_IR_PROF) != 0;
}
+ bool hasCSIRLevelProfile() const override {
+ return (FormatVersion & VARIANT_MASK_CSIR_PROF) != 0;
+ }
+
Error populateSymtab(InstrProfSymtab &Symtab) override {
return Symtab.create(HashTable->keys());
}
@@ -413,13 +429,16 @@ private:
std::unique_ptr<InstrProfReaderRemapper> Remapper;
/// Profile summary data.
std::unique_ptr<ProfileSummary> Summary;
+ /// Context sensitive profile summary data.
+ std::unique_ptr<ProfileSummary> CS_Summary;
// Index to the current record in the record array.
unsigned RecordIndex;
// Read the profile summary. Return a pointer pointing to one byte past the
// end of the summary data if it exists or the input \c Cur.
+ // \c UseCS indicates whether to use the context-sensitive profile summary.
const unsigned char *readSummary(IndexedInstrProf::ProfVersion Version,
- const unsigned char *Cur);
+ const unsigned char *Cur, bool UseCS);
public:
IndexedInstrProfReader(
@@ -433,6 +452,9 @@ public:
/// Return the profile version.
uint64_t getVersion() const { return Index->getVersion(); }
bool isIRLevelProfile() const override { return Index->isIRLevelProfile(); }
+ bool hasCSIRLevelProfile() const override {
+ return Index->hasCSIRLevelProfile();
+ }
/// Return true if the given buffer is in an indexed instrprof format.
static bool hasFormat(const MemoryBuffer &DataBuffer);
@@ -451,7 +473,16 @@ public:
std::vector<uint64_t> &Counts);
/// Return the maximum of all known function counts.
- uint64_t getMaximumFunctionCount() { return Summary->getMaxFunctionCount(); }
+ /// \c UseCS indicates whether to use the context-sensitive count.
+ uint64_t getMaximumFunctionCount(bool UseCS) {
+ if (UseCS) {
+ assert(CS_Summary && "No context sensitive profile summary");
+ return CS_Summary->getMaxFunctionCount();
+ } else {
+ assert(Summary && "No profile summary");
+ return Summary->getMaxFunctionCount();
+ }
+ }
/// Factory method to create an indexed reader.
static Expected<std::unique_ptr<IndexedInstrProfReader>>
@@ -470,7 +501,18 @@ public:
// to be used by llvm-profdata (for dumping). Avoid using this when
// the client is the compiler.
InstrProfSymtab &getSymtab() override;
- ProfileSummary &getSummary() { return *(Summary.get()); }
+
+ /// Return the profile summary.
+ /// \c UseCS indicates whether to use the context-sensitive summary.
+ ProfileSummary &getSummary(bool UseCS) {
+ if (UseCS) {
+ assert(CS_Summary && "No context sensitive summary");
+ return *(CS_Summary.get());
+ } else {
+ assert(Summary && "No profile summary");
+ return *(Summary.get());
+ }
+ }
};
} // end namespace llvm
diff --git a/include/llvm/ProfileData/InstrProfWriter.h b/include/llvm/ProfileData/InstrProfWriter.h
index 8107ab386fe2..5882fa2781e2 100644
--- a/include/llvm/ProfileData/InstrProfWriter.h
+++ b/include/llvm/ProfileData/InstrProfWriter.h
@@ -1,9 +1,8 @@
//===- InstrProfWriter.h - Instrumented profiling writer --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -34,7 +33,8 @@ class raw_fd_ostream;
class InstrProfWriter {
public:
using ProfilingData = SmallDenseMap<uint64_t, InstrProfRecord>;
- enum ProfKind { PF_Unknown = 0, PF_FE, PF_IRLevel };
+ // PF_IRLevelWithCS is the profile from context sensitive IR instrumentation.
+ enum ProfKind { PF_Unknown = 0, PF_FE, PF_IRLevel, PF_IRLevelWithCS };
private:
bool Sparse;
@@ -75,20 +75,36 @@ public:
std::unique_ptr<MemoryBuffer> writeBuffer();
/// Set the ProfileKind. Report error if mixing FE and IR level profiles.
- Error setIsIRLevelProfile(bool IsIRLevel) {
+ /// \c WithCS indicates if this is for contenxt sensitive instrumentation.
+ Error setIsIRLevelProfile(bool IsIRLevel, bool WithCS) {
if (ProfileKind == PF_Unknown) {
- ProfileKind = IsIRLevel ? PF_IRLevel: PF_FE;
+ if (IsIRLevel)
+ ProfileKind = WithCS ? PF_IRLevelWithCS : PF_IRLevel;
+ else
+ ProfileKind = PF_FE;
return Error::success();
}
- return (IsIRLevel == (ProfileKind == PF_IRLevel))
- ? Error::success()
- : make_error<InstrProfError>(
- instrprof_error::unsupported_version);
+
+ if (((ProfileKind != PF_FE) && !IsIRLevel) ||
+ ((ProfileKind == PF_FE) && IsIRLevel))
+ return make_error<InstrProfError>(instrprof_error::unsupported_version);
+
+ // When merging a context-sensitive profile (WithCS == true) with an IRLevel
+ // profile, set the kind to PF_IRLevelWithCS.
+ if (ProfileKind == PF_IRLevel && WithCS)
+ ProfileKind = PF_IRLevelWithCS;
+
+ return Error::success();
}
// Internal interface for testing purpose only.
void setValueProfDataEndianness(support::endianness Endianness);
void setOutputSparse(bool Sparse);
+ // Compute the overlap b/w this object and Other. Program level result is
+ // stored in Overlap and function level result is stored in FuncLevelOverlap.
+ void overlapRecord(NamedInstrProfRecord &&Other, OverlapStats &Overlap,
+ OverlapStats &FuncLevelOverlap,
+ const OverlapFuncFilters &FuncFilter);
private:
void addRecord(StringRef Name, uint64_t Hash, InstrProfRecord &&I,
diff --git a/include/llvm/ProfileData/ProfileCommon.h b/include/llvm/ProfileData/ProfileCommon.h
index 087588f06340..f98a34387fdf 100644
--- a/include/llvm/ProfileData/ProfileCommon.h
+++ b/include/llvm/ProfileData/ProfileCommon.h
@@ -1,9 +1,8 @@
//===- ProfileCommon.h - Common profiling APIs. -----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -84,7 +83,8 @@ public:
SampleProfileSummaryBuilder(std::vector<uint32_t> Cutoffs)
: ProfileSummaryBuilder(std::move(Cutoffs)) {}
- void addRecord(const sampleprof::FunctionSamples &FS);
+ void addRecord(const sampleprof::FunctionSamples &FS,
+ bool isCallsiteSample = false);
std::unique_ptr<ProfileSummary> getSummary();
};
diff --git a/include/llvm/ProfileData/SampleProf.h b/include/llvm/ProfileData/SampleProf.h
index 927dfd246878..7fbc857b7230 100644
--- a/include/llvm/ProfileData/SampleProf.h
+++ b/include/llvm/ProfileData/SampleProf.h
@@ -1,9 +1,8 @@
//===- SampleProf.h - Sampling profiling format support ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -411,6 +410,34 @@ public:
return getNameInModule(Name, M);
}
+ /// Return the canonical name for a function, taking into account
+ /// suffix elision policy attributes.
+ static StringRef getCanonicalFnName(const Function &F) {
+ static const char *knownSuffixes[] = { ".llvm.", ".part." };
+ auto AttrName = "sample-profile-suffix-elision-policy";
+ auto Attr = F.getFnAttribute(AttrName).getValueAsString();
+ if (Attr == "" || Attr == "all") {
+ return F.getName().split('.').first;
+ } else if (Attr == "selected") {
+ StringRef Cand(F.getName());
+ for (const auto &Suf : knownSuffixes) {
+ StringRef Suffix(Suf);
+ auto It = Cand.rfind(Suffix);
+ if (It == StringRef::npos)
+ return Cand;
+ auto Dit = Cand.rfind('.');
+ if (Dit == It + Suffix.size() - 1)
+ Cand = Cand.substr(0, It);
+ }
+ return Cand;
+ } else if (Attr == "none") {
+ return F.getName();
+ } else {
+ assert(false && "internal error: unknown suffix elision policy");
+ }
+ return F.getName();
+ }
+
/// Translate \p Name into its original name in Module.
/// When the Format is not SPF_Compact_Binary, \p Name needs no translation.
/// When the Format is SPF_Compact_Binary, \p Name in current FunctionSamples
@@ -466,11 +493,9 @@ public:
/// built in post-thin-link phase and var promotion has been done,
/// we need to add the substring of function name without the suffix
/// into the GUIDToFuncNameMap.
- auto pos = OrigName.find('.');
- if (pos != StringRef::npos) {
- StringRef NewName = OrigName.substr(0, pos);
- GUIDToFuncNameMap.insert({Function::getGUID(NewName), NewName});
- }
+ StringRef CanonName = getCanonicalFnName(F);
+ if (CanonName != OrigName)
+ GUIDToFuncNameMap.insert({Function::getGUID(CanonName), CanonName});
}
CurrentModule = &M;
}
@@ -547,10 +572,9 @@ public:
SampleSorter(const std::map<LocationT, SampleT> &Samples) {
for (const auto &I : Samples)
V.push_back(&I);
- std::stable_sort(V.begin(), V.end(),
- [](const SamplesWithLoc *A, const SamplesWithLoc *B) {
- return A->first < B->first;
- });
+ llvm::stable_sort(V, [](const SamplesWithLoc *A, const SamplesWithLoc *B) {
+ return A->first < B->first;
+ });
}
const SamplesWithLocList &get() const { return V; }
diff --git a/include/llvm/ProfileData/SampleProfReader.h b/include/llvm/ProfileData/SampleProfReader.h
index 5cc729e42cc8..969cdea859c9 100644
--- a/include/llvm/ProfileData/SampleProfReader.h
+++ b/include/llvm/ProfileData/SampleProfReader.h
@@ -1,9 +1,8 @@
//===- SampleProfReader.h - Read LLVM sample profile data -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -287,10 +286,11 @@ public:
/// Return the samples collected for function \p F.
FunctionSamples *getSamplesFor(const Function &F) {
- // The function name may have been updated by adding suffix. In sample
- // profile, the function names are all stripped, so we need to strip
- // the function name suffix before matching with profile.
- return getSamplesFor(F.getName().split('.').first);
+ // The function name may have been updated by adding suffix. Call
+ // a helper to (optionally) strip off suffixes so that we can
+ // match against the original function name in the profile.
+ StringRef CanonName = FunctionSamples::getCanonicalFnName(F);
+ return getSamplesFor(CanonName);
}
/// Return the samples collected for function \p F.
diff --git a/include/llvm/ProfileData/SampleProfWriter.h b/include/llvm/ProfileData/SampleProfWriter.h
index d5ac6e53e4f7..81e6e3ab0b4a 100644
--- a/include/llvm/ProfileData/SampleProfWriter.h
+++ b/include/llvm/ProfileData/SampleProfWriter.h
@@ -1,9 +1,8 @@
//===- SampleProfWriter.h - Write LLVM sample profile data ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Remarks/Remark.h b/include/llvm/Remarks/Remark.h
new file mode 100644
index 000000000000..05d0ea60accd
--- /dev/null
+++ b/include/llvm/Remarks/Remark.h
@@ -0,0 +1,113 @@
+//===-- llvm/Remarks/Remark.h - The remark type -----------------*- C++/-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an abstraction for handling remarks.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_REMARKS_REMARK_H
+#define LLVM_REMARKS_REMARK_H
+
+#include "llvm-c/Remarks.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/CBindingWrapping.h"
+#include <string>
+
+namespace llvm {
+namespace remarks {
+
+constexpr uint64_t Version = 0;
+
+/// The debug location used to track a remark back to the source file.
+struct RemarkLocation {
+ /// Absolute path of the source file corresponding to this remark.
+ StringRef SourceFilePath;
+ unsigned SourceLine;
+ unsigned SourceColumn;
+};
+
+// Create wrappers for C Binding types (see CBindingWrapping.h).
+DEFINE_SIMPLE_CONVERSION_FUNCTIONS(RemarkLocation, LLVMRemarkDebugLocRef)
+
+/// A key-value pair with a debug location that is used to display the remarks
+/// at the right place in the source.
+struct Argument {
+ StringRef Key;
+ // FIXME: We might want to be able to store other types than strings here.
+ StringRef Val;
+ // If set, the debug location corresponding to the value.
+ Optional<RemarkLocation> Loc;
+};
+
+// Create wrappers for C Binding types (see CBindingWrapping.h).
+DEFINE_SIMPLE_CONVERSION_FUNCTIONS(Argument, LLVMRemarkArgRef)
+
+/// The type of the remark.
+enum class Type {
+ Unknown,
+ Passed,
+ Missed,
+ Analysis,
+ AnalysisFPCommute,
+ AnalysisAliasing,
+ Failure,
+ LastTypeValue = Failure
+};
+
+/// A remark type used for both emission and parsing.
+struct Remark {
+ /// The type of the remark.
+ Type RemarkType = Type::Unknown;
+
+ /// Name of the pass that triggers the emission of this remark.
+ StringRef PassName;
+
+ /// Textual identifier for the remark (single-word, camel-case). Can be used
+ /// by external tools reading the output file for remarks to identify the
+ /// remark.
+ StringRef RemarkName;
+
+ /// Mangled name of the function that triggers the emssion of this remark.
+ StringRef FunctionName;
+
+ /// The location in the source file of the remark.
+ Optional<RemarkLocation> Loc;
+
+ /// If profile information is available, this is the number of times the
+ /// corresponding code was executed in a profile instrumentation run.
+ Optional<uint64_t> Hotness;
+
+ /// Arguments collected via the streaming interface.
+ SmallVector<Argument, 5> Args;
+
+ Remark() = default;
+ Remark(Remark &&) = default;
+ Remark &operator=(Remark &&) = default;
+
+ /// Return a message composed from the arguments as a string.
+ std::string getArgsAsMsg() const;
+
+ /// Clone this remark to explicitly ask for a copy.
+ Remark clone() const { return *this; }
+
+private:
+ /// In order to avoid unwanted copies, "delete" the copy constructor.
+ /// If a copy is needed, it should be done through `Remark::clone()`.
+ Remark(const Remark &) = default;
+ Remark& operator=(const Remark &) = default;
+};
+
+// Create wrappers for C Binding types (see CBindingWrapping.h).
+DEFINE_SIMPLE_CONVERSION_FUNCTIONS(Remark, LLVMRemarkEntryRef)
+
+} // end namespace remarks
+} // end namespace llvm
+
+#endif /* LLVM_REMARKS_REMARK_H */
diff --git a/include/llvm/Remarks/RemarkFormat.h b/include/llvm/Remarks/RemarkFormat.h
new file mode 100644
index 000000000000..e167d99d2517
--- /dev/null
+++ b/include/llvm/Remarks/RemarkFormat.h
@@ -0,0 +1,33 @@
+//===-- llvm/Remarks/RemarkFormat.h - The format of remarks -----*- C++/-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines utilities to deal with the format of remarks.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_REMARKS_REMARK_FORMAT_H
+#define LLVM_REMARKS_REMARK_FORMAT_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Error.h"
+
+namespace llvm {
+namespace remarks {
+
+constexpr StringRef Magic("REMARKS", 7);
+
+/// The format used for serializing/deserializing remarks.
+enum class Format { Unknown, YAML };
+
+/// Parse and validate a string for the remark format.
+Expected<Format> parseFormat(StringRef FormatStr);
+
+} // end namespace remarks
+} // end namespace llvm
+
+#endif /* LLVM_REMARKS_REMARK_FORMAT_H */
diff --git a/include/llvm/Remarks/RemarkParser.h b/include/llvm/Remarks/RemarkParser.h
new file mode 100644
index 000000000000..671e1abe5ec7
--- /dev/null
+++ b/include/llvm/Remarks/RemarkParser.h
@@ -0,0 +1,77 @@
+//===-- llvm/Remarks/Remark.h - The remark type -----------------*- C++/-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides an interface for parsing remarks in LLVM.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_REMARKS_REMARK_PARSER_H
+#define LLVM_REMARKS_REMARK_PARSER_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Remarks/Remark.h"
+#include "llvm/Remarks/RemarkFormat.h"
+#include "llvm/Support/Error.h"
+#include <memory>
+
+namespace llvm {
+namespace remarks {
+
+struct ParserImpl;
+struct ParsedStringTable;
+
+class EndOfFileError : public ErrorInfo<EndOfFileError> {
+public:
+ static char ID;
+
+ EndOfFileError() {}
+
+ void log(raw_ostream &OS) const override { OS << "End of file reached."; }
+ std::error_code convertToErrorCode() const override {
+ return inconvertibleErrorCode();
+ }
+};
+
+/// Parser used to parse a raw buffer to remarks::Remark objects.
+struct Parser {
+ /// The format of the parser.
+ Format ParserFormat;
+
+ Parser(Format ParserFormat) : ParserFormat(ParserFormat) {}
+
+ /// If no error occurs, this returns a valid Remark object.
+ /// If an error of type EndOfFileError occurs, it is safe to recover from it
+ /// by stopping the parsing.
+ /// If any other error occurs, it should be propagated to the user.
+ /// The pointer should never be null.
+ virtual Expected<std::unique_ptr<Remark>> next() = 0;
+
+ virtual ~Parser() = default;
+};
+
+/// In-memory representation of the string table parsed from a buffer (e.g. the
+/// remarks section).
+struct ParsedStringTable {
+ /// The buffer mapped from the section contents.
+ StringRef Buffer;
+ /// Collection of offsets in the buffer for each string entry.
+ SmallVector<size_t, 8> Offsets;
+
+ Expected<StringRef> operator[](size_t Index) const;
+ ParsedStringTable(StringRef Buffer);
+};
+
+Expected<std::unique_ptr<Parser>>
+createRemarkParser(Format ParserFormat, StringRef Buf,
+ Optional<const ParsedStringTable *> StrTab = None);
+
+} // end namespace remarks
+} // end namespace llvm
+
+#endif /* LLVM_REMARKS_REMARK_PARSER_H */
diff --git a/include/llvm/Remarks/RemarkSerializer.h b/include/llvm/Remarks/RemarkSerializer.h
new file mode 100644
index 000000000000..def5c2e16620
--- /dev/null
+++ b/include/llvm/Remarks/RemarkSerializer.h
@@ -0,0 +1,68 @@
+//===-- RemarkSerializer.h - Remark serialization interface -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides an interface for serializing remarks to different formats.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_REMARKS_REMARK_SERIALIZER_H
+#define LLVM_REMARKS_REMARK_SERIALIZER_H
+
+#include "llvm/Remarks/Remark.h"
+#include "llvm/Remarks/RemarkStringTable.h"
+#include "llvm/Support/YAMLTraits.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+namespace remarks {
+
+/// This is the base class for a remark serializer.
+/// It includes support for using a string table while emitting.
+struct Serializer {
+ /// The open raw_ostream that the remark diagnostics are emitted to.
+ raw_ostream &OS;
+ /// The string table containing all the unique strings used in the output.
+ /// The table can be serialized to be consumed after the compilation.
+ Optional<StringTable> StrTab;
+
+ Serializer(raw_ostream &OS) : OS(OS), StrTab() {}
+
+ /// This is just an interface.
+ virtual ~Serializer() = default;
+ virtual void emit(const Remark &Remark) = 0;
+};
+
+/// Wether the serializer should use a string table while emitting.
+enum class UseStringTable { No, Yes };
+
+/// Serialize the remarks to YAML. One remark entry looks like this:
+/// --- !<TYPE>
+/// Pass: <PASSNAME>
+/// Name: <REMARKNAME>
+/// DebugLoc: { File: <SOURCEFILENAME>, Line: <SOURCELINE>,
+/// Column: <SOURCECOLUMN> }
+/// Function: <FUNCTIONNAME>
+/// Args:
+/// - <KEY>: <VALUE>
+/// DebugLoc: { File: <FILE>, Line: <LINE>, Column: <COL> }
+/// ...
+struct YAMLSerializer : public Serializer {
+ /// The YAML streamer.
+ yaml::Output YAMLOutput;
+
+ YAMLSerializer(raw_ostream &OS,
+ UseStringTable UseStringTable = remarks::UseStringTable::No);
+
+ /// Emit a remark to the stream.
+ void emit(const Remark &Remark) override;
+};
+
+} // end namespace remarks
+} // end namespace llvm
+
+#endif /* LLVM_REMARKS_REMARK_SERIALIZER_H */
diff --git a/include/llvm/Remarks/RemarkStringTable.h b/include/llvm/Remarks/RemarkStringTable.h
new file mode 100644
index 000000000000..f9b4fdbbfb8d
--- /dev/null
+++ b/include/llvm/Remarks/RemarkStringTable.h
@@ -0,0 +1,59 @@
+//===-- RemarkStringTable.h - Serializing string table ----------*- C++/-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This class is used to deduplicate and serialize a string table used for
+// generating remarks.
+//
+// For parsing a string table, use ParsedStringTable in RemarkParser.h
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_REMARKS_REMARK_STRING_TABLE_H
+#define LLVM_REMARKS_REMARK_STRING_TABLE_H
+
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Allocator.h"
+#include <vector>
+
+namespace llvm {
+
+class raw_ostream;
+
+namespace remarks {
+
+/// The string table used for serializing remarks.
+/// This table can be for example serialized in a section to be consumed after
+/// the compilation.
+struct StringTable {
+ /// Allocator holding all the memory used by the map.
+ BumpPtrAllocator Allocator;
+ /// The string table containing all the unique strings used in the output.
+ /// It maps a string to an unique ID.
+ StringMap<unsigned, BumpPtrAllocator &> StrTab;
+ /// Total size of the string table when serialized.
+ size_t SerializedSize = 0;
+
+ StringTable() : Allocator(), StrTab(Allocator) {}
+ /// Add a string to the table. It returns an unique ID of the string.
+ std::pair<unsigned, StringRef> add(StringRef Str);
+ /// Serialize the string table to a stream. It is serialized as a little
+ /// endian uint64 (the size of the table in bytes) followed by a sequence of
+ /// NULL-terminated strings, where the N-th string is the string with the ID N
+ /// in the StrTab map.
+ void serialize(raw_ostream &OS) const;
+ /// Serialize the string table to a vector. This allows users to do the actual
+ /// writing to file/memory/other.
+ /// The string with the ID == N should be the N-th element in the vector.
+ std::vector<StringRef> serialize() const;
+};
+
+} // end namespace remarks
+} // end namespace llvm
+
+#endif /* LLVM_REMARKS_REMARK_STRING_TABLE_H */
diff --git a/include/llvm/Support/AArch64TargetParser.def b/include/llvm/Support/AArch64TargetParser.def
index e03297b7c3c3..e152f383b3ec 100644
--- a/include/llvm/Support/AArch64TargetParser.def
+++ b/include/llvm/Support/AArch64TargetParser.def
@@ -1,9 +1,8 @@
//===- AARCH64TargetParser.def - AARCH64 target parsing defines ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -51,78 +50,92 @@ AARCH64_ARCH("armv8.5-a", ARMV8_5A, "8.5-A", "v8.5a",
#define AARCH64_ARCH_EXT_NAME(NAME, ID, FEATURE, NEGFEATURE)
#endif
// FIXME: This would be nicer were it tablegen
-AARCH64_ARCH_EXT_NAME("invalid", AArch64::AEK_INVALID, nullptr, nullptr)
-AARCH64_ARCH_EXT_NAME("none", AArch64::AEK_NONE, nullptr, nullptr)
-AARCH64_ARCH_EXT_NAME("crc", AArch64::AEK_CRC, "+crc", "-crc")
-AARCH64_ARCH_EXT_NAME("lse", AArch64::AEK_LSE, "+lse", "-lse")
-AARCH64_ARCH_EXT_NAME("rdm", AArch64::AEK_RDM, "+rdm", "-rdm")
-AARCH64_ARCH_EXT_NAME("crypto", AArch64::AEK_CRYPTO, "+crypto","-crypto")
-AARCH64_ARCH_EXT_NAME("sm4", AArch64::AEK_SM4, "+sm4", "-sm4")
-AARCH64_ARCH_EXT_NAME("sha3", AArch64::AEK_SHA3, "+sha3", "-sha3")
-AARCH64_ARCH_EXT_NAME("sha2", AArch64::AEK_SHA2, "+sha2", "-sha2")
-AARCH64_ARCH_EXT_NAME("aes", AArch64::AEK_AES, "+aes", "-aes")
-AARCH64_ARCH_EXT_NAME("dotprod", AArch64::AEK_DOTPROD, "+dotprod","-dotprod")
-AARCH64_ARCH_EXT_NAME("fp", AArch64::AEK_FP, "+fp-armv8", "-fp-armv8")
-AARCH64_ARCH_EXT_NAME("simd", AArch64::AEK_SIMD, "+neon", "-neon")
-AARCH64_ARCH_EXT_NAME("fp16", AArch64::AEK_FP16, "+fullfp16", "-fullfp16")
-AARCH64_ARCH_EXT_NAME("fp16fml", AArch64::AEK_FP16FML, "+fp16fml", "-fp16fml")
-AARCH64_ARCH_EXT_NAME("profile", AArch64::AEK_PROFILE, "+spe", "-spe")
-AARCH64_ARCH_EXT_NAME("ras", AArch64::AEK_RAS, "+ras", "-ras")
-AARCH64_ARCH_EXT_NAME("sve", AArch64::AEK_SVE, "+sve", "-sve")
-AARCH64_ARCH_EXT_NAME("rcpc", AArch64::AEK_RCPC, "+rcpc", "-rcpc")
-AARCH64_ARCH_EXT_NAME("rng", AArch64::AEK_RAND, "+rand", "-rand")
-AARCH64_ARCH_EXT_NAME("memtag", AArch64::AEK_MTE, "+mte", "-mte")
-AARCH64_ARCH_EXT_NAME("ssbs", AArch64::AEK_SSBS, "+ssbs", "-ssbs")
-AARCH64_ARCH_EXT_NAME("sb", AArch64::AEK_SB, "+sb", "-sb")
-AARCH64_ARCH_EXT_NAME("predres", AArch64::AEK_PREDRES, "+predres", "-predres")
+AARCH64_ARCH_EXT_NAME("invalid", AArch64::AEK_INVALID, nullptr, nullptr)
+AARCH64_ARCH_EXT_NAME("none", AArch64::AEK_NONE, nullptr, nullptr)
+AARCH64_ARCH_EXT_NAME("crc", AArch64::AEK_CRC, "+crc", "-crc")
+AARCH64_ARCH_EXT_NAME("lse", AArch64::AEK_LSE, "+lse", "-lse")
+AARCH64_ARCH_EXT_NAME("rdm", AArch64::AEK_RDM, "+rdm", "-rdm")
+AARCH64_ARCH_EXT_NAME("crypto", AArch64::AEK_CRYPTO, "+crypto","-crypto")
+AARCH64_ARCH_EXT_NAME("sm4", AArch64::AEK_SM4, "+sm4", "-sm4")
+AARCH64_ARCH_EXT_NAME("sha3", AArch64::AEK_SHA3, "+sha3", "-sha3")
+AARCH64_ARCH_EXT_NAME("sha2", AArch64::AEK_SHA2, "+sha2", "-sha2")
+AARCH64_ARCH_EXT_NAME("aes", AArch64::AEK_AES, "+aes", "-aes")
+AARCH64_ARCH_EXT_NAME("dotprod", AArch64::AEK_DOTPROD, "+dotprod","-dotprod")
+AARCH64_ARCH_EXT_NAME("fp", AArch64::AEK_FP, "+fp-armv8", "-fp-armv8")
+AARCH64_ARCH_EXT_NAME("simd", AArch64::AEK_SIMD, "+neon", "-neon")
+AARCH64_ARCH_EXT_NAME("fp16", AArch64::AEK_FP16, "+fullfp16", "-fullfp16")
+AARCH64_ARCH_EXT_NAME("fp16fml", AArch64::AEK_FP16FML, "+fp16fml", "-fp16fml")
+AARCH64_ARCH_EXT_NAME("profile", AArch64::AEK_PROFILE, "+spe", "-spe")
+AARCH64_ARCH_EXT_NAME("ras", AArch64::AEK_RAS, "+ras", "-ras")
+AARCH64_ARCH_EXT_NAME("sve", AArch64::AEK_SVE, "+sve", "-sve")
+AARCH64_ARCH_EXT_NAME("sve2", AArch64::AEK_SVE2, "+sve2", "-sve2")
+AARCH64_ARCH_EXT_NAME("sve2-aes", AArch64::AEK_SVE2AES, "+sve2-aes", "-sve2-aes")
+AARCH64_ARCH_EXT_NAME("sve2-sm4", AArch64::AEK_SVE2SM4, "+sve2-sm4", "-sve2-sm4")
+AARCH64_ARCH_EXT_NAME("sve2-sha3", AArch64::AEK_SVE2SHA3, "+sve2-sha3", "-sve2-sha3")
+AARCH64_ARCH_EXT_NAME("bitperm", AArch64::AEK_BITPERM, "+bitperm", "-bitperm")
+AARCH64_ARCH_EXT_NAME("rcpc", AArch64::AEK_RCPC, "+rcpc", "-rcpc")
+AARCH64_ARCH_EXT_NAME("rng", AArch64::AEK_RAND, "+rand", "-rand")
+AARCH64_ARCH_EXT_NAME("memtag", AArch64::AEK_MTE, "+mte", "-mte")
+AARCH64_ARCH_EXT_NAME("ssbs", AArch64::AEK_SSBS, "+ssbs", "-ssbs")
+AARCH64_ARCH_EXT_NAME("sb", AArch64::AEK_SB, "+sb", "-sb")
+AARCH64_ARCH_EXT_NAME("predres", AArch64::AEK_PREDRES, "+predres", "-predres")
#undef AARCH64_ARCH_EXT_NAME
#ifndef AARCH64_CPU_NAME
#define AARCH64_CPU_NAME(NAME, ID, DEFAULT_FPU, IS_DEFAULT, DEFAULT_EXT)
#endif
AARCH64_CPU_NAME("cortex-a35", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
- (AArch64::AEK_CRC))
+ (AArch64::AEK_CRC))
AARCH64_CPU_NAME("cortex-a53", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, true,
- (AArch64::AEK_CRC))
+ (AArch64::AEK_CRC))
AARCH64_CPU_NAME("cortex-a55", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_FP16 | AArch64::AEK_DOTPROD | AArch64::AEK_RCPC))
AARCH64_CPU_NAME("cortex-a57", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
- (AArch64::AEK_CRC))
+ (AArch64::AEK_CRC))
AARCH64_CPU_NAME("cortex-a72", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
- (AArch64::AEK_CRC))
+ (AArch64::AEK_CRC))
AARCH64_CPU_NAME("cortex-a73", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
- (AArch64::AEK_CRC))
+ (AArch64::AEK_CRC))
AARCH64_CPU_NAME("cortex-a75", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_FP16 | AArch64::AEK_DOTPROD | AArch64::AEK_RCPC))
+AARCH64_CPU_NAME("cortex-a76", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
+ (AArch64::AEK_FP16 | AArch64::AEK_DOTPROD | AArch64::AEK_RCPC |
+ AArch64::AEK_SSBS))
+AARCH64_CPU_NAME("cortex-a76ae", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
+ (AArch64::AEK_FP16 | AArch64::AEK_DOTPROD | AArch64::AEK_RCPC |
+ AArch64::AEK_SSBS))
AARCH64_CPU_NAME("cyclone", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
- (AArch64::AEK_NONE))
+ (AArch64::AEK_NONE))
AARCH64_CPU_NAME("exynos-m1", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
- (AArch64::AEK_CRC))
+ (AArch64::AEK_CRC))
AARCH64_CPU_NAME("exynos-m2", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
- (AArch64::AEK_CRC))
+ (AArch64::AEK_CRC))
AARCH64_CPU_NAME("exynos-m3", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
- (AArch64::AEK_CRC))
+ (AArch64::AEK_CRC))
AARCH64_CPU_NAME("exynos-m4", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
- (AArch64::AEK_FP16 | AArch64::AEK_DOTPROD))
+ (AArch64::AEK_DOTPROD | AArch64::AEK_FP16))
+AARCH64_CPU_NAME("exynos-m5", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
+ (AArch64::AEK_DOTPROD | AArch64::AEK_FP16))
AARCH64_CPU_NAME("falkor", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
- (AArch64::AEK_CRC | AArch64::AEK_RDM))
+ (AArch64::AEK_CRC | AArch64::AEK_RDM))
AARCH64_CPU_NAME("saphira", ARMV8_3A, FK_CRYPTO_NEON_FP_ARMV8, false,
- (AArch64::AEK_PROFILE))
+ (AArch64::AEK_PROFILE))
AARCH64_CPU_NAME("kryo", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
- (AArch64::AEK_CRC))
+ (AArch64::AEK_CRC))
AARCH64_CPU_NAME("thunderx2t99", ARMV8_1A, FK_CRYPTO_NEON_FP_ARMV8, false,
- (AArch64::AEK_NONE))
+ (AArch64::AEK_NONE))
AARCH64_CPU_NAME("thunderx", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
- (AArch64::AEK_CRC | AArch64::AEK_PROFILE))
+ (AArch64::AEK_CRC | AArch64::AEK_PROFILE))
AARCH64_CPU_NAME("thunderxt88", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
- (AArch64::AEK_CRC | AArch64::AEK_PROFILE))
+ (AArch64::AEK_CRC | AArch64::AEK_PROFILE))
AARCH64_CPU_NAME("thunderxt81", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
- (AArch64::AEK_CRC | AArch64::AEK_PROFILE))
+ (AArch64::AEK_CRC | AArch64::AEK_PROFILE))
AARCH64_CPU_NAME("thunderxt83", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
- (AArch64::AEK_CRC | AArch64::AEK_PROFILE))
+ (AArch64::AEK_CRC | AArch64::AEK_PROFILE))
AARCH64_CPU_NAME("tsv110", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
- (AArch64::AEK_PROFILE | AArch64::AEK_FP16 | AArch64::AEK_FP16FML |
- AArch64::AEK_DOTPROD))
+ (AArch64::AEK_DOTPROD |
+ AArch64::AEK_FP16 | AArch64::AEK_FP16FML |
+ AArch64::AEK_PROFILE))
// Invalid CPU
AARCH64_CPU_NAME("invalid", INVALID, FK_INVALID, true, AArch64::AEK_INVALID)
#undef AARCH64_CPU_NAME
diff --git a/include/llvm/Support/AArch64TargetParser.h b/include/llvm/Support/AArch64TargetParser.h
index 76b77d474428..965d38535e74 100644
--- a/include/llvm/Support/AArch64TargetParser.h
+++ b/include/llvm/Support/AArch64TargetParser.h
@@ -1,9 +1,8 @@
//===-- AArch64TargetParser - Parser for AArch64 features -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -50,6 +49,11 @@ enum ArchExtKind : unsigned {
AEK_SSBS = 1 << 20,
AEK_SB = 1 << 21,
AEK_PREDRES = 1 << 22,
+ AEK_SVE2 = 1 << 23,
+ AEK_SVE2AES = 1 << 24,
+ AEK_SVE2SM4 = 1 << 25,
+ AEK_SVE2SHA3 = 1 << 26,
+ AEK_BITPERM = 1 << 27,
};
enum class ArchKind {
diff --git a/include/llvm/Support/AMDGPUMetadata.h b/include/llvm/Support/AMDGPUMetadata.h
index 84851c07499d..f7f1ec40dde9 100644
--- a/include/llvm/Support/AMDGPUMetadata.h
+++ b/include/llvm/Support/AMDGPUMetadata.h
@@ -1,9 +1,8 @@
//===--- AMDGPUMetadata.h ---------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -75,6 +74,7 @@ enum class ValueKind : uint8_t {
HiddenPrintfBuffer = 11,
HiddenDefaultQueue = 12,
HiddenCompletionAction = 13,
+ HiddenMultiGridSyncArg = 14,
Unknown = 0xff
};
@@ -157,6 +157,8 @@ constexpr char Name[] = "Name";
constexpr char TypeName[] = "TypeName";
/// Key for Kernel::Arg::Metadata::mSize.
constexpr char Size[] = "Size";
+/// Key for Kernel::Arg::Metadata::mOffset.
+constexpr char Offset[] = "Offset";
/// Key for Kernel::Arg::Metadata::mAlign.
constexpr char Align[] = "Align";
/// Key for Kernel::Arg::Metadata::mValueKind.
@@ -189,6 +191,8 @@ struct Metadata final {
std::string mTypeName = std::string();
/// Size in bytes. Required.
uint32_t mSize = 0;
+ /// Offset in bytes. Required for code object v3, unused for code object v2.
+ uint32_t mOffset = 0;
/// Alignment in bytes. Required.
uint32_t mAlign = 0;
/// Value kind. Required.
@@ -453,11 +457,30 @@ constexpr char AssemblerDirectiveEnd[] = ".end_amdgpu_metadata";
//===----------------------------------------------------------------------===//
namespace PALMD {
-/// PAL metadata assembler directive.
+/// PAL metadata (old linear format) assembler directive.
constexpr char AssemblerDirective[] = ".amd_amdgpu_pal_metadata";
+/// PAL metadata (new MsgPack format) beginning assembler directive.
+constexpr char AssemblerDirectiveBegin[] = ".amdgpu_pal_metadata";
+
+/// PAL metadata (new MsgPack format) ending assembler directive.
+constexpr char AssemblerDirectiveEnd[] = ".end_amdgpu_pal_metadata";
+
/// PAL metadata keys.
enum Key : uint32_t {
+ R_2E12_COMPUTE_PGM_RSRC1 = 0x2e12,
+ R_2D4A_SPI_SHADER_PGM_RSRC1_LS = 0x2d4a,
+ R_2D0A_SPI_SHADER_PGM_RSRC1_HS = 0x2d0a,
+ R_2CCA_SPI_SHADER_PGM_RSRC1_ES = 0x2cca,
+ R_2C8A_SPI_SHADER_PGM_RSRC1_GS = 0x2c8a,
+ R_2C4A_SPI_SHADER_PGM_RSRC1_VS = 0x2c4a,
+ R_2C0A_SPI_SHADER_PGM_RSRC1_PS = 0x2c0a,
+ R_2E00_COMPUTE_DISPATCH_INITIATOR = 0x2e00,
+ R_A1B3_SPI_PS_INPUT_ENA = 0xa1b3,
+ R_A1B4_SPI_PS_INPUT_ADDR = 0xa1b4,
+ R_A1B6_SPI_PS_IN_CONTROL = 0xa1b6,
+ R_A2D5_VGT_SHADER_STAGES_EN = 0xa2d5,
+
LS_NUM_USED_VGPRS = 0x10000021,
HS_NUM_USED_VGPRS = 0x10000022,
ES_NUM_USED_VGPRS = 0x10000023,
@@ -483,12 +506,6 @@ enum Key : uint32_t {
CS_SCRATCH_SIZE = 0x1000004a
};
-/// PAL metadata represented as a vector.
-typedef std::vector<uint32_t> Metadata;
-
-/// Converts \p PALMetadata to \p String.
-std::error_code toString(const Metadata &PALMetadata, std::string &String);
-
} // end namespace PALMD
} // end namespace AMDGPU
} // end namespace llvm
diff --git a/include/llvm/Support/AMDHSAKernelDescriptor.h b/include/llvm/Support/AMDHSAKernelDescriptor.h
index 751699e3a19f..d1c2147536a7 100644
--- a/include/llvm/Support/AMDHSAKernelDescriptor.h
+++ b/include/llvm/Support/AMDHSAKernelDescriptor.h
@@ -1,9 +1,8 @@
//===--- AMDHSAKernelDescriptor.h -----------------------------*- C++ -*---===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -89,8 +88,11 @@ enum : int32_t {
COMPUTE_PGM_RSRC1(ENABLE_IEEE_MODE, 23, 1),
COMPUTE_PGM_RSRC1(BULKY, 24, 1),
COMPUTE_PGM_RSRC1(CDBG_USER, 25, 1),
- COMPUTE_PGM_RSRC1(FP16_OVFL, 26, 1), // GFX9+
- COMPUTE_PGM_RSRC1(RESERVED0, 27, 5),
+ COMPUTE_PGM_RSRC1(FP16_OVFL, 26, 1), // GFX9+
+ COMPUTE_PGM_RSRC1(RESERVED0, 27, 2),
+ COMPUTE_PGM_RSRC1(WGP_MODE, 29, 1), // GFX10+
+ COMPUTE_PGM_RSRC1(MEM_ORDERED, 30, 1), // GFX10+
+ COMPUTE_PGM_RSRC1(FWD_PROGRESS, 31, 1), // GFX10+
};
#undef COMPUTE_PGM_RSRC1
@@ -120,6 +122,15 @@ enum : int32_t {
};
#undef COMPUTE_PGM_RSRC2
+// Compute program resource register 3. Must match hardware definition.
+#define COMPUTE_PGM_RSRC3(NAME, SHIFT, WIDTH) \
+ AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_ ## NAME, SHIFT, WIDTH)
+enum : int32_t {
+ COMPUTE_PGM_RSRC3(SHARED_VGPR_COUNT, 0, 4), // GFX10+
+ COMPUTE_PGM_RSRC3(RESERVED0, 4, 28),
+};
+#undef COMPUTE_PGM_RSRC3
+
// Kernel code properties. Must be kept backwards compatible.
#define KERNEL_CODE_PROPERTY(NAME, SHIFT, WIDTH) \
AMDHSA_BITS_ENUM_ENTRY(KERNEL_CODE_PROPERTY_ ## NAME, SHIFT, WIDTH)
@@ -131,7 +142,9 @@ enum : int32_t {
KERNEL_CODE_PROPERTY(ENABLE_SGPR_DISPATCH_ID, 4, 1),
KERNEL_CODE_PROPERTY(ENABLE_SGPR_FLAT_SCRATCH_INIT, 5, 1),
KERNEL_CODE_PROPERTY(ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 6, 1),
- KERNEL_CODE_PROPERTY(RESERVED0, 7, 9),
+ KERNEL_CODE_PROPERTY(RESERVED0, 7, 3),
+ KERNEL_CODE_PROPERTY(ENABLE_WAVEFRONT_SIZE32, 10, 1), // GFX10+
+ KERNEL_CODE_PROPERTY(RESERVED1, 11, 5),
};
#undef KERNEL_CODE_PROPERTY
@@ -141,7 +154,8 @@ struct kernel_descriptor_t {
uint32_t private_segment_fixed_size;
uint8_t reserved0[8];
int64_t kernel_code_entry_byte_offset;
- uint8_t reserved1[24];
+ uint8_t reserved1[20];
+ uint32_t compute_pgm_rsrc3; // GFX10+
uint32_t compute_pgm_rsrc1;
uint32_t compute_pgm_rsrc2;
uint16_t kernel_code_properties;
@@ -167,6 +181,9 @@ static_assert(
offsetof(kernel_descriptor_t, reserved1) == 24,
"invalid offset for reserved1");
static_assert(
+ offsetof(kernel_descriptor_t, compute_pgm_rsrc3) == 44,
+ "invalid offset for compute_pgm_rsrc3");
+static_assert(
offsetof(kernel_descriptor_t, compute_pgm_rsrc1) == 48,
"invalid offset for compute_pgm_rsrc1");
static_assert(
diff --git a/include/llvm/Support/ARMAttributeParser.h b/include/llvm/Support/ARMAttributeParser.h
index 919f39721f86..f6c39abb4f21 100644
--- a/include/llvm/Support/ARMAttributeParser.h
+++ b/include/llvm/Support/ARMAttributeParser.h
@@ -1,9 +1,8 @@
//===--- ARMAttributeParser.h - ARM Attribute Information Printer ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -54,6 +53,8 @@ class ARMAttributeParser {
uint32_t &Offset);
void Advanced_SIMD_arch(ARMBuildAttrs::AttrType Tag, const uint8_t *Data,
uint32_t &Offset);
+ void MVE_arch(ARMBuildAttrs::AttrType Tag, const uint8_t *Data,
+ uint32_t &Offset);
void PCS_config(ARMBuildAttrs::AttrType Tag, const uint8_t *Data,
uint32_t &Offset);
void ABI_PCS_R9_use(ARMBuildAttrs::AttrType Tag, const uint8_t *Data,
diff --git a/include/llvm/Support/ARMBuildAttributes.h b/include/llvm/Support/ARMBuildAttributes.h
index b8a03765a7c0..90481eaa1677 100644
--- a/include/llvm/Support/ARMBuildAttributes.h
+++ b/include/llvm/Support/ARMBuildAttributes.h
@@ -1,9 +1,8 @@
//===-- ARMBuildAttributes.h - ARM Build Attributes -------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -68,6 +67,7 @@ enum AttrType {
MPextension_use = 42, // recoded from 70 (ABI r2.08)
DIV_use = 44,
DSP_extension = 46,
+ MVE_arch = 48,
also_compatible_with = 65,
conformance = 67,
Virtualization_use = 68,
@@ -111,6 +111,7 @@ enum CPUArch {
v8_R = 15, // e.g. Cortex R52
v8_M_Base= 16, // v8_M_Base AArch32
v8_M_Main= 17, // v8_M_Main AArch32
+ v8_1_M_Main=21, // v8_1_M_Main AArch32
};
enum CPUArchProfile { // (=7), uleb128
@@ -152,6 +153,10 @@ enum {
AllowNeonARMv8 = 3, // ARM v8-A SIMD was permitted
AllowNeonARMv8_1a = 4,// ARM v8.1-A SIMD was permitted (RDMA)
+ // Tag_MVE_arch, (=48), uleb128
+ AllowMVEInteger = 1, // integer-only MVE was permitted
+ AllowMVEIntegerAndFloat = 2, // both integer and floating point MVE were permitted
+
// Tag_ABI_PCS_R9_use, (=14), uleb128
R9IsGPR = 0, // R9 used as v6 (just another callee-saved register)
R9IsSB = 1, // R9 used as a global static base rgister
diff --git a/include/llvm/Support/ARMEHABI.h b/include/llvm/Support/ARMEHABI.h
index 9b052df0a908..3fbb56d65eb8 100644
--- a/include/llvm/Support/ARMEHABI.h
+++ b/include/llvm/Support/ARMEHABI.h
@@ -1,9 +1,8 @@
//===--- ARMEHABI.h - ARM Exception Handling ABI ----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Support/ARMTargetParser.def b/include/llvm/Support/ARMTargetParser.def
index 9e844e2b464d..f466b3252748 100644
--- a/include/llvm/Support/ARMTargetParser.def
+++ b/include/llvm/Support/ARMTargetParser.def
@@ -1,9 +1,8 @@
//===- ARMTargetParser.def - ARM target parsing defines ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -32,6 +31,8 @@ ARM_FPU("fpv4-sp-d16", FK_FPV4_SP_D16, FPUVersion::VFPV4, NeonSupportLevel::None
ARM_FPU("fpv5-d16", FK_FPV5_D16, FPUVersion::VFPV5, NeonSupportLevel::None, FPURestriction::D16)
ARM_FPU("fpv5-sp-d16", FK_FPV5_SP_D16, FPUVersion::VFPV5, NeonSupportLevel::None, FPURestriction::SP_D16)
ARM_FPU("fp-armv8", FK_FP_ARMV8, FPUVersion::VFPV5, NeonSupportLevel::None, FPURestriction::None)
+ARM_FPU("fp-armv8-fullfp16-d16", FK_FP_ARMV8_FULLFP16_D16, FPUVersion::VFPV5_FULLFP16, NeonSupportLevel::None, FPURestriction::D16)
+ARM_FPU("fp-armv8-fullfp16-sp-d16", FK_FP_ARMV8_FULLFP16_SP_D16, FPUVersion::VFPV5_FULLFP16, NeonSupportLevel::None, FPURestriction::SP_D16)
ARM_FPU("neon", FK_NEON, FPUVersion::VFPV3, NeonSupportLevel::Neon, FPURestriction::None)
ARM_FPU("neon-fp16", FK_NEON_FP16, FPUVersion::VFPV3_FP16, NeonSupportLevel::Neon, FPURestriction::None)
ARM_FPU("neon-vfpv4", FK_NEON_VFPV4, FPUVersion::VFPV4, NeonSupportLevel::Neon, FPURestriction::None)
@@ -119,6 +120,8 @@ ARM_ARCH("armv8-m.base", ARMV8MBaseline, "8-M.Baseline", "v8m.base",
ARMBuildAttrs::CPUArch::v8_M_Base, FK_NONE, ARM::AEK_HWDIVTHUMB)
ARM_ARCH("armv8-m.main", ARMV8MMainline, "8-M.Mainline", "v8m.main",
ARMBuildAttrs::CPUArch::v8_M_Main, FK_FPV5_D16, ARM::AEK_HWDIVTHUMB)
+ARM_ARCH("armv8.1-m.main", ARMV8_1MMainline, "8.1-M.Mainline", "v8.1m.main",
+ ARMBuildAttrs::CPUArch::v8_1_M_Main, FK_FP_ARMV8_FULLFP16_SP_D16, ARM::AEK_HWDIVTHUMB | ARM::AEK_RAS | ARM::AEK_LOB)
// Non-standard Arch names.
ARM_ARCH("iwmmxt", IWMMXT, "iwmmxt", "", ARMBuildAttrs::CPUArch::v5TE,
FK_NONE, ARM::AEK_NONE)
@@ -145,6 +148,9 @@ ARM_ARCH_EXT_NAME("aes", ARM::AEK_AES, "+aes", "-aes")
ARM_ARCH_EXT_NAME("dotprod", ARM::AEK_DOTPROD, "+dotprod","-dotprod")
ARM_ARCH_EXT_NAME("dsp", ARM::AEK_DSP, "+dsp", "-dsp")
ARM_ARCH_EXT_NAME("fp", ARM::AEK_FP, nullptr, nullptr)
+ARM_ARCH_EXT_NAME("fp.dp", ARM::AEK_FP_DP, nullptr, nullptr)
+ARM_ARCH_EXT_NAME("mve", (ARM::AEK_DSP | ARM::AEK_SIMD), "+mve", "-mve")
+ARM_ARCH_EXT_NAME("mve.fp", (ARM::AEK_DSP | ARM::AEK_SIMD | ARM::AEK_FP), "+mve.fp", "-mve.fp")
ARM_ARCH_EXT_NAME("idiv", (ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB), nullptr, nullptr)
ARM_ARCH_EXT_NAME("mp", ARM::AEK_MP, nullptr, nullptr)
ARM_ARCH_EXT_NAME("simd", ARM::AEK_SIMD, nullptr, nullptr)
@@ -159,6 +165,7 @@ ARM_ARCH_EXT_NAME("maverick", ARM::AEK_MAVERICK, nullptr, nullptr)
ARM_ARCH_EXT_NAME("xscale", ARM::AEK_XSCALE, nullptr, nullptr)
ARM_ARCH_EXT_NAME("fp16fml", ARM::AEK_FP16FML, "+fp16fml", "-fp16fml")
ARM_ARCH_EXT_NAME("sb", ARM::AEK_SB, "+sb", "-sb")
+ARM_ARCH_EXT_NAME("lob", ARM::AEK_LOB, "+lob", "-lob")
#undef ARM_ARCH_EXT_NAME
#ifndef ARM_HW_DIV_NAME
@@ -252,6 +259,7 @@ ARM_CPU_NAME("cortex-m4", ARMV7EM, FK_FPV4_SP_D16, true, ARM::AEK_NONE)
ARM_CPU_NAME("cortex-m7", ARMV7EM, FK_FPV5_D16, false, ARM::AEK_NONE)
ARM_CPU_NAME("cortex-m23", ARMV8MBaseline, FK_NONE, false, ARM::AEK_NONE)
ARM_CPU_NAME("cortex-m33", ARMV8MMainline, FK_FPV5_SP_D16, false, ARM::AEK_DSP)
+ARM_CPU_NAME("cortex-m35p", ARMV8MMainline, FK_FPV5_SP_D16, false, ARM::AEK_DSP)
ARM_CPU_NAME("cortex-a32", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC)
ARM_CPU_NAME("cortex-a35", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC)
ARM_CPU_NAME("cortex-a53", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC)
@@ -262,12 +270,18 @@ ARM_CPU_NAME("cortex-a72", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC)
ARM_CPU_NAME("cortex-a73", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC)
ARM_CPU_NAME("cortex-a75", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
(ARM::AEK_FP16 | ARM::AEK_DOTPROD))
+ARM_CPU_NAME("cortex-a76", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
+ (ARM::AEK_FP16 | ARM::AEK_DOTPROD))
+ARM_CPU_NAME("cortex-a76ae", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
+ (ARM::AEK_FP16 | ARM::AEK_DOTPROD))
ARM_CPU_NAME("cyclone", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC)
ARM_CPU_NAME("exynos-m1", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC)
ARM_CPU_NAME("exynos-m2", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC)
ARM_CPU_NAME("exynos-m3", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC)
ARM_CPU_NAME("exynos-m4", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
(ARM::AEK_FP16 | ARM::AEK_DOTPROD))
+ARM_CPU_NAME("exynos-m5", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
+ (ARM::AEK_FP16 | ARM::AEK_DOTPROD))
ARM_CPU_NAME("kryo", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC)
// Non-standard Arch names.
ARM_CPU_NAME("iwmmxt", IWMMXT, FK_NONE, true, ARM::AEK_NONE)
diff --git a/include/llvm/Support/ARMTargetParser.h b/include/llvm/Support/ARMTargetParser.h
index 71acc0dc72d0..4b9070dea596 100644
--- a/include/llvm/Support/ARMTargetParser.h
+++ b/include/llvm/Support/ARMTargetParser.h
@@ -1,9 +1,8 @@
//===-- ARMTargetParser - Parser for ARM target features --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -46,6 +45,13 @@ enum ArchExtKind : unsigned {
AEK_AES = 1 << 16,
AEK_FP16FML = 1 << 17,
AEK_SB = 1 << 18,
+ AEK_SVE2 = 1 << 19,
+ AEK_SVE2AES = 1 << 20,
+ AEK_SVE2SM4 = 1 << 21,
+ AEK_SVE2SHA3 = 1 << 22,
+ AEK_BITPERM = 1 << 23,
+ AEK_FP_DP = 1 << 24,
+ AEK_LOB = 1 << 25,
// Unsupported extensions.
AEK_OS = 0x8000000,
AEK_IWMMXT = 0x10000000,
@@ -127,7 +133,8 @@ enum class FPUVersion {
VFPV3,
VFPV3_FP16,
VFPV4,
- VFPV5
+ VFPV5,
+ VFPV5_FULLFP16,
};
// An FPU name restricts the FPU in one of three ways:
@@ -234,6 +241,8 @@ StringRef getCPUAttr(ArchKind AK);
StringRef getSubArch(ArchKind AK);
StringRef getArchExtName(unsigned ArchExtKind);
StringRef getArchExtFeature(StringRef ArchExt);
+bool appendArchExtFeatures(StringRef CPU, ARM::ArchKind AK, StringRef ArchExt,
+ std::vector<StringRef> &Features);
StringRef getHWDivName(unsigned HWDivKind);
// Information by Name
diff --git a/include/llvm/Support/ARMWinEH.h b/include/llvm/Support/ARMWinEH.h
index 60174503ad49..857a0d3814a8 100644
--- a/include/llvm/Support/ARMWinEH.h
+++ b/include/llvm/Support/ARMWinEH.h
@@ -1,9 +1,8 @@
-//===-- llvm/Support/WinARMEH.h - Windows on ARM EH Constants ---*- C++ -*-===//
+//===-- llvm/Support/ARMWinEH.h - Windows on ARM EH Constants ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -383,7 +382,7 @@ struct ExceptionDataRecord {
return ((Data[0] & 0x00400000) >> 22);
}
- uint8_t EpilogueCount() const {
+ uint16_t EpilogueCount() const {
if (HeaderWords(*this) == 1) {
if (isAArch64)
return (Data[0] & 0x07C00000) >> 22;
diff --git a/include/llvm/Support/AlignOf.h b/include/llvm/Support/AlignOf.h
index 9e7a62b85e34..d12401f0eb49 100644
--- a/include/llvm/Support/AlignOf.h
+++ b/include/llvm/Support/AlignOf.h
@@ -1,9 +1,8 @@
//===--- AlignOf.h - Portable calculation of type alignment -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Support/Allocator.h b/include/llvm/Support/Allocator.h
index 42d08378a677..09e967b98abc 100644
--- a/include/llvm/Support/Allocator.h
+++ b/include/llvm/Support/Allocator.h
@@ -1,9 +1,8 @@
//===- Allocator.h - Simple memory allocation abstraction -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/include/llvm/Support/ArrayRecycler.h b/include/llvm/Support/ArrayRecycler.h
index 68696be6bf3d..5256ce80c028 100644
--- a/include/llvm/Support/ArrayRecycler.h
+++ b/include/llvm/Support/ArrayRecycler.h
@@ -1,9 +1,8 @@
//==- llvm/Support/ArrayRecycler.h - Recycling of Arrays ---------*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Support/Atomic.h b/include/llvm/Support/Atomic.h
index 552313f0c241..a8445fddc1a8 100644
--- a/include/llvm/Support/Atomic.h
+++ b/include/llvm/Support/Atomic.h
@@ -1,9 +1,8 @@
//===- llvm/Support/Atomic.h - Atomic Operations -----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Support/AtomicOrdering.h b/include/llvm/Support/AtomicOrdering.h
index a679ab30243e..763bc3ea7b28 100644
--- a/include/llvm/Support/AtomicOrdering.h
+++ b/include/llvm/Support/AtomicOrdering.h
@@ -1,9 +1,8 @@
//===-- llvm/Support/AtomicOrdering.h ---Atomic Ordering---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/include/llvm/Support/BinaryByteStream.h b/include/llvm/Support/BinaryByteStream.h
index 9808d3b72157..7acce9a03888 100644
--- a/include/llvm/Support/BinaryByteStream.h
+++ b/include/llvm/Support/BinaryByteStream.h
@@ -1,9 +1,8 @@
//===- BinaryByteStream.h ---------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//===----------------------------------------------------------------------===//
// A BinaryStream which stores data in a single continguous memory buffer.
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/Support/BinaryItemStream.h b/include/llvm/Support/BinaryItemStream.h
index 278723ddf8da..4cd66adcc01a 100644
--- a/include/llvm/Support/BinaryItemStream.h
+++ b/include/llvm/Support/BinaryItemStream.h
@@ -1,9 +1,8 @@
//===- BinaryItemStream.h ---------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/Support/BinaryStream.h b/include/llvm/Support/BinaryStream.h
index 7677214e48ee..fcf4398550ee 100644
--- a/include/llvm/Support/BinaryStream.h
+++ b/include/llvm/Support/BinaryStream.h
@@ -1,9 +1,8 @@
//===- BinaryStream.h - Base interface for a stream of data -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/Support/BinaryStreamArray.h b/include/llvm/Support/BinaryStreamArray.h
index 7c110fcb6a4b..96d09db69ae5 100644
--- a/include/llvm/Support/BinaryStreamArray.h
+++ b/include/llvm/Support/BinaryStreamArray.h
@@ -1,9 +1,8 @@
//===- BinaryStreamArray.h - Array backed by an arbitrary stream *- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/Support/BinaryStreamError.h b/include/llvm/Support/BinaryStreamError.h
index 7d9699d53639..cf6e034ffd2c 100644
--- a/include/llvm/Support/BinaryStreamError.h
+++ b/include/llvm/Support/BinaryStreamError.h
@@ -1,9 +1,8 @@
//===- BinaryStreamError.h - Error extensions for Binary Streams *- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/Support/BinaryStreamReader.h b/include/llvm/Support/BinaryStreamReader.h
index 392958de30d5..d8fddde66bfa 100644
--- a/include/llvm/Support/BinaryStreamReader.h
+++ b/include/llvm/Support/BinaryStreamReader.h
@@ -1,9 +1,8 @@
//===- BinaryStreamReader.h - Reads objects from a binary stream *- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -97,6 +96,18 @@ public:
return Error::success();
}
+ /// Read an unsigned LEB128 encoded value.
+ ///
+ /// \returns a success error code if the data was successfully read, otherwise
+ /// returns an appropriate error code.
+ Error readULEB128(uint64_t &Dest);
+
+ /// Read a signed LEB128 encoded value.
+ ///
+ /// \returns a success error code if the data was successfully read, otherwise
+ /// returns an appropriate error code.
+ Error readSLEB128(int64_t &Dest);
+
/// Read a null terminated string from \p Dest. Whether a copy occurs depends
/// on the implementation of the underlying stream. Updates the stream's
/// offset to point after the newly read data.
diff --git a/include/llvm/Support/BinaryStreamRef.h b/include/llvm/Support/BinaryStreamRef.h
index d8dc1392c01c..7427b8da5b43 100644
--- a/include/llvm/Support/BinaryStreamRef.h
+++ b/include/llvm/Support/BinaryStreamRef.h
@@ -1,9 +1,8 @@
//===- BinaryStreamRef.h - A copyable reference to a stream -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/Support/BinaryStreamWriter.h b/include/llvm/Support/BinaryStreamWriter.h
index 6e8a68a30474..86d2389d9182 100644
--- a/include/llvm/Support/BinaryStreamWriter.h
+++ b/include/llvm/Support/BinaryStreamWriter.h
@@ -1,9 +1,8 @@
//===- BinaryStreamWriter.h - Writes objects to a BinaryStream ---*- C++-*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -80,6 +79,20 @@ public:
return writeInteger<U>(static_cast<U>(Num));
}
+ /// Write the unsigned integer Value to the underlying stream using ULEB128
+ /// encoding.
+ ///
+ /// \returns a success error code if the data was successfully written,
+ /// otherwise returns an appropriate error code.
+ Error writeULEB128(uint64_t Value);
+
+ /// Write the unsigned integer Value to the underlying stream using ULEB128
+ /// encoding.
+ ///
+ /// \returns a success error code if the data was successfully written,
+ /// otherwise returns an appropriate error code.
+ Error writeSLEB128(int64_t Value);
+
/// Write the string \p Str to the underlying stream followed by a null
/// terminator. On success, updates the offset so that subsequent writes
/// occur at the next unwritten position. \p Str need not be null terminated
diff --git a/include/llvm/Support/BlockFrequency.h b/include/llvm/Support/BlockFrequency.h
index 4b468f7acb32..18fb60e1904b 100644
--- a/include/llvm/Support/BlockFrequency.h
+++ b/include/llvm/Support/BlockFrequency.h
@@ -1,9 +1,8 @@
//===-------- BlockFrequency.h - Block Frequency Wrapper --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Support/BranchProbability.h b/include/llvm/Support/BranchProbability.h
index 3a88e71c2480..cd9d369b4f4e 100644
--- a/include/llvm/Support/BranchProbability.h
+++ b/include/llvm/Support/BranchProbability.h
@@ -1,9 +1,8 @@
//===- BranchProbability.h - Branch Probability Wrapper ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -119,6 +118,13 @@ public:
return *this;
}
+ BranchProbability &operator/=(BranchProbability RHS) {
+ assert(N != UnknownN && RHS.N != UnknownN &&
+ "Unknown probability cannot participate in arithmetics.");
+ N = (static_cast<uint64_t>(N) * D + RHS.N / 2) / RHS.N;
+ return *this;
+ }
+
BranchProbability &operator/=(uint32_t RHS) {
assert(N != UnknownN &&
"Unknown probability cannot participate in arithmetics.");
@@ -129,27 +135,38 @@ public:
BranchProbability operator+(BranchProbability RHS) const {
BranchProbability Prob(*this);
- return Prob += RHS;
+ Prob += RHS;
+ return Prob;
}
BranchProbability operator-(BranchProbability RHS) const {
BranchProbability Prob(*this);
- return Prob -= RHS;
+ Prob -= RHS;
+ return Prob;
}
BranchProbability operator*(BranchProbability RHS) const {
BranchProbability Prob(*this);
- return Prob *= RHS;
+ Prob *= RHS;
+ return Prob;
}
BranchProbability operator*(uint32_t RHS) const {
BranchProbability Prob(*this);
- return Prob *= RHS;
+ Prob *= RHS;
+ return Prob;
+ }
+
+ BranchProbability operator/(BranchProbability RHS) const {
+ BranchProbability Prob(*this);
+ Prob /= RHS;
+ return Prob;
}
BranchProbability operator/(uint32_t RHS) const {
BranchProbability Prob(*this);
- return Prob /= RHS;
+ Prob /= RHS;
+ return Prob;
}
bool operator==(BranchProbability RHS) const { return N == RHS.N; }
diff --git a/include/llvm/Support/BuryPointer.h b/include/llvm/Support/BuryPointer.h
index 53f1f395b922..276a5b7089c3 100644
--- a/include/llvm/Support/BuryPointer.h
+++ b/include/llvm/Support/BuryPointer.h
@@ -1,9 +1,8 @@
//===- llvm/Support/BuryPointer.h - Memory Manipulation/Leak ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/Support/CBindingWrapping.h b/include/llvm/Support/CBindingWrapping.h
index f60f99d376ad..46d6b4e3fa7d 100644
--- a/include/llvm/Support/CBindingWrapping.h
+++ b/include/llvm/Support/CBindingWrapping.h
@@ -1,9 +1,8 @@
-//===- llvm/Support/CBindingWrapph.h - C Interface Wrapping -----*- C++ -*-===//
+//===- llvm/Support/CBindingWrapping.h - C Interface Wrapping ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Support/CFGUpdate.h b/include/llvm/Support/CFGUpdate.h
index 63c24a3d2a20..eeaf5d0a21ac 100644
--- a/include/llvm/Support/CFGUpdate.h
+++ b/include/llvm/Support/CFGUpdate.h
@@ -1,9 +1,8 @@
//===- CFGUpdate.h - Encode a CFG Edge Update. ------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Support/COM.h b/include/llvm/Support/COM.h
index a2d5a7a68ba9..d59966f849b4 100644
--- a/include/llvm/Support/COM.h
+++ b/include/llvm/Support/COM.h
@@ -1,9 +1,8 @@
//===- llvm/Support/COM.h ---------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/include/llvm/Support/CRC.h b/include/llvm/Support/CRC.h
new file mode 100644
index 000000000000..6ea8e3edcea4
--- /dev/null
+++ b/include/llvm/Support/CRC.h
@@ -0,0 +1,25 @@
+//===-- llvm/Support/CRC.h - Cyclic Redundancy Check-------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains basic functions for calculating Cyclic Redundancy Check
+// or CRC.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_CRC_H
+#define LLVM_SUPPORT_CRC_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+/// zlib independent CRC32 calculation.
+uint32_t crc32(uint32_t CRC, StringRef S);
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/Support/CachePruning.h b/include/llvm/Support/CachePruning.h
index cf3f8ec67a52..a72a86439f6a 100644
--- a/include/llvm/Support/CachePruning.h
+++ b/include/llvm/Support/CachePruning.h
@@ -1,9 +1,8 @@
//=- CachePruning.h - Helper to manage the pruning of a cache dir -*- C++ -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Support/Capacity.h b/include/llvm/Support/Capacity.h
index 7460f9825bd3..6b99e0aaa488 100644
--- a/include/llvm/Support/Capacity.h
+++ b/include/llvm/Support/Capacity.h
@@ -1,9 +1,8 @@
//===--- Capacity.h - Generic computation of ADT memory use -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Support/Casting.h b/include/llvm/Support/Casting.h
index 3f21e0f9ebc3..46bdedb04cfe 100644
--- a/include/llvm/Support/Casting.h
+++ b/include/llvm/Support/Casting.h
@@ -1,9 +1,8 @@
//===- llvm/Support/Casting.h - Allow flexible, checked, casts --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -144,6 +143,16 @@ template <class X, class Y> LLVM_NODISCARD inline bool isa(const Y &Val) {
typename simplify_type<const Y>::SimpleType>::doit(Val);
}
+// isa_and_nonnull<X> - Functionally identical to isa, except that a null value
+// is accepted.
+//
+template <class X, class Y>
+LLVM_NODISCARD inline bool isa_and_nonnull(const Y &Val) {
+ if (!Val)
+ return false;
+ return isa<X>(Val);
+}
+
//===----------------------------------------------------------------------===//
// cast<x> Support Templates
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/Support/CheckedArithmetic.h b/include/llvm/Support/CheckedArithmetic.h
index 039c374136ff..8a50e3d5ddf6 100644
--- a/include/llvm/Support/CheckedArithmetic.h
+++ b/include/llvm/Support/CheckedArithmetic.h
@@ -1,9 +1,8 @@
//==-- llvm/Support/CheckedArithmetic.h - Safe arithmetical operations *- C++ //
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -50,6 +49,15 @@ checkedAdd(T LHS, T RHS) {
return checkedOp(LHS, RHS, &llvm::APInt::sadd_ov);
}
+/// Subtract two signed integers \p LHS and \p RHS.
+/// \return Optional of sum if no signed overflow occurred,
+/// \c None otherwise.
+template <typename T>
+typename std::enable_if<std::is_signed<T>::value, llvm::Optional<T>>::type
+checkedSub(T LHS, T RHS) {
+ return checkedOp(LHS, RHS, &llvm::APInt::ssub_ov);
+}
+
/// Multiply two signed integers \p LHS and \p RHS.
/// \return Optional of product if no signed overflow occurred,
/// \c None otherwise.
diff --git a/include/llvm/Support/Chrono.h b/include/llvm/Support/Chrono.h
index 57677e8d5cf1..334ab60835a4 100644
--- a/include/llvm/Support/Chrono.h
+++ b/include/llvm/Support/Chrono.h
@@ -1,9 +1,8 @@
//===- llvm/Support/Chrono.h - Utilities for Timing Manipulation-*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -34,21 +33,21 @@ template <typename D = std::chrono::nanoseconds>
using TimePoint = std::chrono::time_point<std::chrono::system_clock, D>;
/// Convert a TimePoint to std::time_t
-LLVM_ATTRIBUTE_ALWAYS_INLINE inline std::time_t toTimeT(TimePoint<> TP) {
+inline std::time_t toTimeT(TimePoint<> TP) {
using namespace std::chrono;
return system_clock::to_time_t(
time_point_cast<system_clock::time_point::duration>(TP));
}
/// Convert a std::time_t to a TimePoint
-LLVM_ATTRIBUTE_ALWAYS_INLINE inline TimePoint<std::chrono::seconds>
+inline TimePoint<std::chrono::seconds>
toTimePoint(std::time_t T) {
using namespace std::chrono;
return time_point_cast<seconds>(system_clock::from_time_t(T));
}
/// Convert a std::time_t + nanoseconds to a TimePoint
-LLVM_ATTRIBUTE_ALWAYS_INLINE inline TimePoint<>
+inline TimePoint<>
toTimePoint(std::time_t T, uint32_t nsec) {
using namespace std::chrono;
return time_point_cast<nanoseconds>(system_clock::from_time_t(T))
diff --git a/include/llvm/Support/CodeGen.h b/include/llvm/Support/CodeGen.h
index 22e74167266c..a3f423e558cf 100644
--- a/include/llvm/Support/CodeGen.h
+++ b/include/llvm/Support/CodeGen.h
@@ -1,9 +1,8 @@
//===-- llvm/Support/CodeGen.h - CodeGen Concepts ---------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -19,13 +18,14 @@ namespace llvm {
// Relocation model types.
namespace Reloc {
- enum Model { Static, PIC_, DynamicNoPIC, ROPI, RWPI, ROPI_RWPI };
+ // Cannot be named PIC due to collision with -DPIC
+ enum Model { Static, PIC_, DynamicNoPIC, ROPI, RWPI, ROPI_RWPI };
}
// Code model types.
namespace CodeModel {
// Sync changes with CodeGenCWrappers.h.
- enum Model { Tiny, Small, Kernel, Medium, Large };
+ enum Model { Tiny, Small, Kernel, Medium, Large };
}
namespace PICLevel {
@@ -50,10 +50,10 @@ namespace llvm {
// Code generation optimization level.
namespace CodeGenOpt {
enum Level {
- None, // -O0
- Less, // -O1
- Default, // -O2, -Os
- Aggressive // -O3
+ None = 0, // -O0
+ Less = 1, // -O1
+ Default = 2, // -O2, -Os
+ Aggressive = 3 // -O3
};
}
diff --git a/include/llvm/Support/CodeGenCoverage.h b/include/llvm/Support/CodeGenCoverage.h
index c863be35b822..0b1af779ffb0 100644
--- a/include/llvm/Support/CodeGenCoverage.h
+++ b/include/llvm/Support/CodeGenCoverage.h
@@ -1,9 +1,8 @@
//== llvm/Support/CodeGenCoverage.h ------------------------------*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file This file provides rule coverage tracking for tablegen-erated CodeGen.
diff --git a/include/llvm/Support/CommandLine.h b/include/llvm/Support/CommandLine.h
index a8ad89384d17..3cc2c3c0121b 100644
--- a/include/llvm/Support/CommandLine.h
+++ b/include/llvm/Support/CommandLine.h
@@ -1,9 +1,8 @@
//===- llvm/Support/CommandLine.h - Command line handler --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -67,7 +66,8 @@ namespace cl {
bool ParseCommandLineOptions(int argc, const char *const *argv,
StringRef Overview = "",
raw_ostream *Errs = nullptr,
- const char *EnvVar = nullptr);
+ const char *EnvVar = nullptr,
+ bool LongOptionsUseDoubleDash = false);
//===----------------------------------------------------------------------===//
// ParseEnvironmentOptions - Environment variable option processing alternate
@@ -159,23 +159,27 @@ enum OptionHidden { // Control whether -help shows this option
// AlwaysPrefix - Only allow the behavior enabled by the Prefix flag and reject
// the Option=Value form.
//
-// Grouping - With this option enabled, multiple letter options are allowed to
-// bunch together with only a single hyphen for the whole group. This allows
-// emulation of the behavior that ls uses for example: ls -la === ls -l -a
-//
enum FormattingFlags {
NormalFormatting = 0x00, // Nothing special
Positional = 0x01, // Is a positional argument, no '-' required
Prefix = 0x02, // Can this option directly prefix its value?
- AlwaysPrefix = 0x03, // Can this option only directly prefix its value?
- Grouping = 0x04 // Can this option group with other options?
+ AlwaysPrefix = 0x03 // Can this option only directly prefix its value?
};
enum MiscFlags { // Miscellaneous flags to adjust argument
CommaSeparated = 0x01, // Should this cl::list split between commas?
PositionalEatsArgs = 0x02, // Should this positional cl::list eat -args?
- Sink = 0x04 // Should this cl::list eat all unknown options?
+ Sink = 0x04, // Should this cl::list eat all unknown options?
+
+ // Grouping - Can this option group with other options?
+ // If this is enabled, multiple letter options are allowed to bunch together
+ // with only a single hyphen for the whole group. This allows emulation
+ // of the behavior that ls uses for example: ls -la === ls -l -a
+ Grouping = 0x08,
+
+ // Default option
+ DefaultOption = 0x10
};
//===----------------------------------------------------------------------===//
@@ -261,26 +265,27 @@ class Option {
// Out of line virtual function to provide home for the class.
virtual void anchor();
- int NumOccurrences = 0; // The number of times specified
+ uint16_t NumOccurrences; // The number of times specified
// Occurrences, HiddenFlag, and Formatting are all enum types but to avoid
// problems with signed enums in bitfields.
- unsigned Occurrences : 3; // enum NumOccurrencesFlag
+ uint16_t Occurrences : 3; // enum NumOccurrencesFlag
// not using the enum type for 'Value' because zero is an implementation
// detail representing the non-value
- unsigned Value : 2;
- unsigned HiddenFlag : 2; // enum OptionHidden
- unsigned Formatting : 3; // enum FormattingFlags
- unsigned Misc : 3;
- unsigned Position = 0; // Position of last occurrence of the option
- unsigned AdditionalVals = 0; // Greater than 0 for multi-valued option.
+ uint16_t Value : 2;
+ uint16_t HiddenFlag : 2; // enum OptionHidden
+ uint16_t Formatting : 2; // enum FormattingFlags
+ uint16_t Misc : 5;
+ uint16_t FullyInitialized : 1; // Has addArgument been called?
+ uint16_t Position; // Position of last occurrence of the option
+ uint16_t AdditionalVals; // Greater than 0 for multi-valued option.
public:
StringRef ArgStr; // The argument string itself (ex: "help", "o")
StringRef HelpStr; // The descriptive text message for -help
StringRef ValueStr; // String describing what the value of this option is
- OptionCategory *Category; // The Category this option belongs to
- SmallPtrSet<SubCommand *, 4> Subs; // The subcommands this option belongs to.
- bool FullyInitialized = false; // Has addArgument been called?
+ SmallVector<OptionCategory *, 1>
+ Categories; // The Categories this option belongs to
+ SmallPtrSet<SubCommand *, 1> Subs; // The subcommands this option belongs to.
inline enum NumOccurrencesFlag getNumOccurrencesFlag() const {
return (enum NumOccurrencesFlag)Occurrences;
@@ -306,6 +311,7 @@ public:
bool hasArgStr() const { return !ArgStr.empty(); }
bool isPositional() const { return getFormattingFlag() == cl::Positional; }
bool isSink() const { return getMiscFlags() & cl::Sink; }
+ bool isDefaultOption() const { return getMiscFlags() & cl::DefaultOption; }
bool isConsumeAfter() const {
return getNumOccurrencesFlag() == cl::ConsumeAfter;
@@ -329,14 +335,17 @@ public:
void setFormattingFlag(enum FormattingFlags V) { Formatting = V; }
void setMiscFlag(enum MiscFlags M) { Misc |= M; }
void setPosition(unsigned pos) { Position = pos; }
- void setCategory(OptionCategory &C) { Category = &C; }
+ void addCategory(OptionCategory &C);
void addSubCommand(SubCommand &S) { Subs.insert(&S); }
protected:
explicit Option(enum NumOccurrencesFlag OccurrencesFlag,
enum OptionHidden Hidden)
- : Occurrences(OccurrencesFlag), Value(0), HiddenFlag(Hidden),
- Formatting(NormalFormatting), Misc(0), Category(&GeneralCategory) {}
+ : NumOccurrences(0), Occurrences(OccurrencesFlag), Value(0),
+ HiddenFlag(Hidden), Formatting(NormalFormatting), Misc(0),
+ FullyInitialized(false), Position(0), AdditionalVals(0) {
+ Categories.push_back(&GeneralCategory);
+ }
inline void setNumAdditionalVals(unsigned n) { AdditionalVals = n; }
@@ -382,7 +391,7 @@ public:
}
inline int getNumOccurrences() const { return NumOccurrences; }
- inline void reset() { NumOccurrences = 0; }
+ void reset();
};
//===----------------------------------------------------------------------===//
@@ -447,7 +456,7 @@ struct cat {
cat(OptionCategory &c) : Category(c) {}
- template <class Opt> void apply(Opt &O) const { O.setCategory(Category); }
+ template <class Opt> void apply(Opt &O) const { O.addCategory(Category); }
};
// sub - Specify the subcommand that this option belongs to.
@@ -823,6 +832,8 @@ class basic_parser_impl { // non-template implementation of basic_parser<t>
public:
basic_parser_impl(Option &) {}
+ virtual ~basic_parser_impl() {}
+
enum ValueExpected getValueExpectedFlagDefault() const {
return ValueRequired;
}
@@ -850,8 +861,6 @@ public:
virtual void anchor();
protected:
- ~basic_parser_impl() = default;
-
// A helper for basic_parser::printOptionDiff.
void printOptionName(const Option &O, size_t GlobalWidth) const;
};
@@ -865,15 +874,12 @@ public:
using OptVal = OptionValue<DataType>;
basic_parser(Option &O) : basic_parser_impl(O) {}
-
-protected:
- ~basic_parser() = default;
};
//--------------------------------------------------
// parser<bool>
//
-template <> class parser<bool> final : public basic_parser<bool> {
+template <> class parser<bool> : public basic_parser<bool> {
public:
parser(Option &O) : basic_parser(O) {}
@@ -900,8 +906,7 @@ extern template class basic_parser<bool>;
//--------------------------------------------------
// parser<boolOrDefault>
-template <>
-class parser<boolOrDefault> final : public basic_parser<boolOrDefault> {
+template <> class parser<boolOrDefault> : public basic_parser<boolOrDefault> {
public:
parser(Option &O) : basic_parser(O) {}
@@ -927,7 +932,7 @@ extern template class basic_parser<boolOrDefault>;
//--------------------------------------------------
// parser<int>
//
-template <> class parser<int> final : public basic_parser<int> {
+template <> class parser<int> : public basic_parser<int> {
public:
parser(Option &O) : basic_parser(O) {}
@@ -949,7 +954,7 @@ extern template class basic_parser<int>;
//--------------------------------------------------
// parser<unsigned>
//
-template <> class parser<unsigned> final : public basic_parser<unsigned> {
+template <> class parser<unsigned> : public basic_parser<unsigned> {
public:
parser(Option &O) : basic_parser(O) {}
@@ -969,11 +974,33 @@ public:
extern template class basic_parser<unsigned>;
//--------------------------------------------------
+// parser<unsigned long>
+//
+template <>
+class parser<unsigned long> final : public basic_parser<unsigned long> {
+public:
+ parser(Option &O) : basic_parser(O) {}
+
+ // parse - Return true on error.
+ bool parse(Option &O, StringRef ArgName, StringRef Arg, unsigned long &Val);
+
+ // getValueName - Overload in subclass to provide a better default value.
+ StringRef getValueName() const override { return "ulong"; }
+
+ void printOptionDiff(const Option &O, unsigned long V, OptVal Default,
+ size_t GlobalWidth) const;
+
+ // An out-of-line virtual method to provide a 'home' for this class.
+ void anchor() override;
+};
+
+extern template class basic_parser<unsigned long>;
+
+//--------------------------------------------------
// parser<unsigned long long>
//
template <>
-class parser<unsigned long long> final
- : public basic_parser<unsigned long long> {
+class parser<unsigned long long> : public basic_parser<unsigned long long> {
public:
parser(Option &O) : basic_parser(O) {}
@@ -982,7 +1009,7 @@ public:
unsigned long long &Val);
// getValueName - Overload in subclass to provide a better default value.
- StringRef getValueName() const override { return "uint"; }
+ StringRef getValueName() const override { return "ulong"; }
void printOptionDiff(const Option &O, unsigned long long V, OptVal Default,
size_t GlobalWidth) const;
@@ -996,7 +1023,7 @@ extern template class basic_parser<unsigned long long>;
//--------------------------------------------------
// parser<double>
//
-template <> class parser<double> final : public basic_parser<double> {
+template <> class parser<double> : public basic_parser<double> {
public:
parser(Option &O) : basic_parser(O) {}
@@ -1018,7 +1045,7 @@ extern template class basic_parser<double>;
//--------------------------------------------------
// parser<float>
//
-template <> class parser<float> final : public basic_parser<float> {
+template <> class parser<float> : public basic_parser<float> {
public:
parser(Option &O) : basic_parser(O) {}
@@ -1040,7 +1067,7 @@ extern template class basic_parser<float>;
//--------------------------------------------------
// parser<std::string>
//
-template <> class parser<std::string> final : public basic_parser<std::string> {
+template <> class parser<std::string> : public basic_parser<std::string> {
public:
parser(Option &O) : basic_parser(O) {}
@@ -1065,7 +1092,7 @@ extern template class basic_parser<std::string>;
//--------------------------------------------------
// parser<char>
//
-template <> class parser<char> final : public basic_parser<char> {
+template <> class parser<char> : public basic_parser<char> {
public:
parser(Option &O) : basic_parser(O) {}
@@ -1178,7 +1205,11 @@ template <> struct applicator<FormattingFlags> {
};
template <> struct applicator<MiscFlags> {
- static void opt(MiscFlags MF, Option &O) { O.setMiscFlag(MF); }
+ static void opt(MiscFlags MF, Option &O) {
+ assert((MF != Grouping || O.ArgStr.size() == 1) &&
+ "cl::Grouping can only apply to single charater Options.");
+ O.setMiscFlag(MF);
+ }
};
// apply method - Apply modifiers to an option in a type safe way.
@@ -1398,6 +1429,8 @@ template <class DataType, class StorageClass> class list_storage {
public:
list_storage() = default;
+ void clear() {}
+
bool setLocation(Option &O, StorageClass &L) {
if (Location)
return O.error("cl::location(x) specified more than once!");
@@ -1449,6 +1482,10 @@ public:
reference operator[](size_type pos) { return Storage[pos]; }
const_reference operator[](size_type pos) const { return Storage[pos]; }
+ void clear() {
+ Storage.clear();
+ }
+
iterator erase(const_iterator pos) { return Storage.erase(pos); }
iterator erase(const_iterator first, const_iterator last) {
return Storage.erase(first, last);
@@ -1526,7 +1563,10 @@ class list : public Option, public list_storage<DataType, StorageClass> {
void printOptionValue(size_t /*GlobalWidth*/, bool /*Force*/) const override {
}
- void setDefault() override {}
+ void setDefault() override {
+ Positions.clear();
+ list_storage<DataType, StorageClass>::clear();
+ }
void done() {
addArgument();
@@ -1732,7 +1772,10 @@ class alias : public Option {
error("cl::alias must have argument name specified!");
if (!AliasFor)
error("cl::alias must have an cl::aliasopt(option) specified!");
+ if (!Subs.empty())
+ error("cl::alias must not have cl::sub(), aliased option's cl::sub() will be used!");
Subs = AliasFor->Subs;
+ Categories = AliasFor->Categories;
addArgument();
}
diff --git a/include/llvm/Support/Compiler.h b/include/llvm/Support/Compiler.h
index 14e4d6e97140..3f4f465f3960 100644
--- a/include/llvm/Support/Compiler.h
+++ b/include/llvm/Support/Compiler.h
@@ -1,9 +1,8 @@
//===-- llvm/Support/Compiler.h - Compiler abstraction support --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -255,6 +254,15 @@
#define LLVM_FALLTHROUGH
#endif
+/// LLVM_REQUIRE_CONSTANT_INITIALIZATION - Apply this to globals to ensure that
+/// they are constant initialized.
+#if __has_cpp_attribute(clang::require_constant_initialization)
+#define LLVM_REQUIRE_CONSTANT_INITIALIZATION \
+ [[clang::require_constant_initialization]]
+#else
+#define LLVM_REQUIRE_CONSTANT_INITIALIZATION
+#endif
+
/// LLVM_EXTENSION - Support compilers where we have a keyword to suppress
/// pedantic diagnostics.
#ifdef __GNUC__
diff --git a/include/llvm/Support/Compression.h b/include/llvm/Support/Compression.h
index f7258f4bf8f8..5bc0e56913fe 100644
--- a/include/llvm/Support/Compression.h
+++ b/include/llvm/Support/Compression.h
@@ -1,9 +1,8 @@
//===-- llvm/Support/Compression.h ---Compression----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Support/ConvertUTF.h b/include/llvm/Support/ConvertUTF.h
index 6ae56c2470bb..1add185330fa 100644
--- a/include/llvm/Support/ConvertUTF.h
+++ b/include/llvm/Support/ConvertUTF.h
@@ -1,9 +1,8 @@
/*===--- ConvertUTF.h - Universal Character Names conversions ---------------===
*
- * The LLVM Compiler Infrastructure
- *
- * This file is distributed under the University of Illinois Open Source
- * License. See LICENSE.TXT for details.
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*==------------------------------------------------------------------------==*/
/*
diff --git a/include/llvm/Support/CrashRecoveryContext.h b/include/llvm/Support/CrashRecoveryContext.h
index 7b3fd4f882e4..feb449e2899c 100644
--- a/include/llvm/Support/CrashRecoveryContext.h
+++ b/include/llvm/Support/CrashRecoveryContext.h
@@ -1,9 +1,8 @@
//===--- CrashRecoveryContext.h - Crash Recovery ----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/Support/DJB.h b/include/llvm/Support/DJB.h
index e03111473362..8a04a324a5dc 100644
--- a/include/llvm/Support/DJB.h
+++ b/include/llvm/Support/DJB.h
@@ -1,9 +1,8 @@
//===-- llvm/Support/DJB.h ---DJB Hash --------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Support/DOTGraphTraits.h b/include/llvm/Support/DOTGraphTraits.h
index 4381b5bf1633..ec01b7d9576a 100644
--- a/include/llvm/Support/DOTGraphTraits.h
+++ b/include/llvm/Support/DOTGraphTraits.h
@@ -1,9 +1,8 @@
-//===-- llvm/Support/DotGraphTraits.h - Customize .dot output ---*- C++ -*-===//
+//===-- llvm/Support/DOTGraphTraits.h - Customize .dot output ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Support/DataExtractor.h b/include/llvm/Support/DataExtractor.h
index 2b1639856e79..6b08a2a2a445 100644
--- a/include/llvm/Support/DataExtractor.h
+++ b/include/llvm/Support/DataExtractor.h
@@ -1,9 +1,8 @@
//===-- DataExtractor.h -----------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/Support/DataTypes.h b/include/llvm/Support/DataTypes.h
index ad60a5b3f300..a3fcc82531b7 100644
--- a/include/llvm/Support/DataTypes.h
+++ b/include/llvm/Support/DataTypes.h
@@ -1,9 +1,8 @@
//===-- llvm/Support/DataTypes.h - Define fixed size types ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Support/Debug.h b/include/llvm/Support/Debug.h
index df86dbb82414..64b730951bda 100644
--- a/include/llvm/Support/Debug.h
+++ b/include/llvm/Support/Debug.h
@@ -1,9 +1,8 @@
//===- llvm/Support/Debug.h - Easy way to add debug output ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Support/DebugCounter.h b/include/llvm/Support/DebugCounter.h
index 6eadd5c6aeff..e7d1fa68f21a 100644
--- a/include/llvm/Support/DebugCounter.h
+++ b/include/llvm/Support/DebugCounter.h
@@ -1,9 +1,8 @@
//===- llvm/Support/DebugCounter.h - Debug counter support ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/include/llvm/Support/DynamicLibrary.h b/include/llvm/Support/DynamicLibrary.h
index 9563b483f6d5..95d5ba281e22 100644
--- a/include/llvm/Support/DynamicLibrary.h
+++ b/include/llvm/Support/DynamicLibrary.h
@@ -1,9 +1,8 @@
//===-- llvm/Support/DynamicLibrary.h - Portable Dynamic Library -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Support/Endian.h b/include/llvm/Support/Endian.h
index a4d3f4ff793d..d8be94427d7e 100644
--- a/include/llvm/Support/Endian.h
+++ b/include/llvm/Support/Endian.h
@@ -1,9 +1,8 @@
//===- Endian.h - Utilities for IO with endian specific data ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -204,10 +203,14 @@ inline void writeAtBitAlignment(void *memory, value_type value,
namespace detail {
-template<typename value_type,
- endianness endian,
- std::size_t alignment>
+template<typename ValueType,
+ endianness Endian,
+ std::size_t Alignment>
struct packed_endian_specific_integral {
+ using value_type = ValueType;
+ static constexpr endianness endian = Endian;
+ static constexpr std::size_t alignment = Alignment;
+
packed_endian_specific_integral() = default;
explicit packed_endian_specific_integral(value_type val) { *this = val; }
@@ -335,6 +338,17 @@ using unaligned_int32_t =
using unaligned_int64_t =
detail::packed_endian_specific_integral<int64_t, native, unaligned>;
+template <typename T>
+using little_t = detail::packed_endian_specific_integral<T, little, unaligned>;
+template <typename T>
+using big_t = detail::packed_endian_specific_integral<T, big, unaligned>;
+
+template <typename T>
+using aligned_little_t =
+ detail::packed_endian_specific_integral<T, little, aligned>;
+template <typename T>
+using aligned_big_t = detail::packed_endian_specific_integral<T, big, aligned>;
+
namespace endian {
template <typename T> inline T read(const void *P, endianness E) {
diff --git a/include/llvm/Support/EndianStream.h b/include/llvm/Support/EndianStream.h
index 9742e253ad3e..87898038d216 100644
--- a/include/llvm/Support/EndianStream.h
+++ b/include/llvm/Support/EndianStream.h
@@ -1,9 +1,8 @@
//===- EndianStream.h - Stream ops with endian specific data ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Support/Errc.h b/include/llvm/Support/Errc.h
index dce42782a0d3..9be8e5705a54 100644
--- a/include/llvm/Support/Errc.h
+++ b/include/llvm/Support/Errc.h
@@ -1,9 +1,8 @@
//===- llvm/Support/Errc.h - Defines the llvm::errc enum --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Support/Errno.h b/include/llvm/Support/Errno.h
index 8069c3639df3..aedb5fb292b8 100644
--- a/include/llvm/Support/Errno.h
+++ b/include/llvm/Support/Errno.h
@@ -1,9 +1,8 @@
//===- llvm/Support/Errno.h - Portable+convenient errno handling -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Support/Error.h b/include/llvm/Support/Error.h
index ee2cbeec97a8..299fce7a1368 100644
--- a/include/llvm/Support/Error.h
+++ b/include/llvm/Support/Error.h
@@ -1,9 +1,8 @@
//===- llvm/Support/Error.h - Recoverable error handling --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -1161,8 +1160,8 @@ private:
/// Create formatted StringError object.
template <typename... Ts>
-Error createStringError(std::error_code EC, char const *Fmt,
- const Ts &... Vals) {
+inline Error createStringError(std::error_code EC, char const *Fmt,
+ const Ts &... Vals) {
std::string Buffer;
raw_string_ostream Stream(Buffer);
Stream << format(Fmt, Vals...);
@@ -1171,18 +1170,27 @@ Error createStringError(std::error_code EC, char const *Fmt,
Error createStringError(std::error_code EC, char const *Msg);
+template <typename... Ts>
+inline Error createStringError(std::errc EC, char const *Fmt,
+ const Ts &... Vals) {
+ return createStringError(std::make_error_code(EC), Fmt, Vals...);
+}
+
/// This class wraps a filename and another Error.
///
/// In some cases, an error needs to live along a 'source' name, in order to
/// show more detailed information to the user.
class FileError final : public ErrorInfo<FileError> {
- friend Error createFileError(std::string, Error);
+ friend Error createFileError(const Twine &, Error);
+ friend Error createFileError(const Twine &, size_t, Error);
public:
void log(raw_ostream &OS) const override {
assert(Err && !FileName.empty() && "Trying to log after takeError().");
OS << "'" << FileName << "': ";
+ if (Line.hasValue())
+ OS << "line " << Line.getValue() << ": ";
Err->log(OS);
}
@@ -1194,29 +1202,51 @@ public:
static char ID;
private:
- FileError(std::string F, std::unique_ptr<ErrorInfoBase> E) {
+ FileError(const Twine &F, Optional<size_t> LineNum,
+ std::unique_ptr<ErrorInfoBase> E) {
assert(E && "Cannot create FileError from Error success value.");
- assert(!F.empty() &&
+ assert(!F.isTriviallyEmpty() &&
"The file name provided to FileError must not be empty.");
- FileName = F;
+ FileName = F.str();
Err = std::move(E);
+ Line = std::move(LineNum);
}
- static Error build(std::string F, Error E) {
- return Error(std::unique_ptr<FileError>(new FileError(F, E.takePayload())));
+ static Error build(const Twine &F, Optional<size_t> Line, Error E) {
+ return Error(
+ std::unique_ptr<FileError>(new FileError(F, Line, E.takePayload())));
}
std::string FileName;
+ Optional<size_t> Line;
std::unique_ptr<ErrorInfoBase> Err;
};
/// Concatenate a source file path and/or name with an Error. The resulting
/// Error is unchecked.
-inline Error createFileError(std::string F, Error E) {
- return FileError::build(F, std::move(E));
+inline Error createFileError(const Twine &F, Error E) {
+ return FileError::build(F, Optional<size_t>(), std::move(E));
+}
+
+/// Concatenate a source file path and/or name with line number and an Error.
+/// The resulting Error is unchecked.
+inline Error createFileError(const Twine &F, size_t Line, Error E) {
+ return FileError::build(F, Optional<size_t>(Line), std::move(E));
+}
+
+/// Concatenate a source file path and/or name with a std::error_code
+/// to form an Error object.
+inline Error createFileError(const Twine &F, std::error_code EC) {
+ return createFileError(F, errorCodeToError(EC));
+}
+
+/// Concatenate a source file path and/or name with line number and
+/// std::error_code to form an Error object.
+inline Error createFileError(const Twine &F, size_t Line, std::error_code EC) {
+ return createFileError(F, Line, errorCodeToError(EC));
}
-Error createFileError(std::string F, ErrorSuccess) = delete;
+Error createFileError(const Twine &F, ErrorSuccess) = delete;
/// Helper for check-and-exit error handling.
///
diff --git a/include/llvm/Support/ErrorHandling.h b/include/llvm/Support/ErrorHandling.h
index fec39e59a717..f75c2984a9ff 100644
--- a/include/llvm/Support/ErrorHandling.h
+++ b/include/llvm/Support/ErrorHandling.h
@@ -1,9 +1,8 @@
//===- llvm/Support/ErrorHandling.h - Fatal error handling ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Support/ErrorOr.h b/include/llvm/Support/ErrorOr.h
index e6ce764ad822..8211f4d8a098 100644
--- a/include/llvm/Support/ErrorOr.h
+++ b/include/llvm/Support/ErrorOr.h
@@ -1,9 +1,8 @@
//===- llvm/Support/ErrorOr.h - Error Smart Pointer -------------*- C++ -*-===//
//
-// The LLVM Linker
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/include/llvm/Support/FileCheck.h b/include/llvm/Support/FileCheck.h
index 4061a26e22c5..0cd25a71a3b3 100644
--- a/include/llvm/Support/FileCheck.h
+++ b/include/llvm/Support/FileCheck.h
@@ -1,9 +1,8 @@
//==-- llvm/Support/FileCheck.h ---------------------------*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -37,9 +36,218 @@ struct FileCheckRequest {
bool VerboseVerbose = false;
};
+//===----------------------------------------------------------------------===//
+// Numeric substitution handling code.
+//===----------------------------------------------------------------------===//
+
+/// Base class representing the AST of a given expression.
+class FileCheckExpressionAST {
+public:
+ virtual ~FileCheckExpressionAST() = default;
+
+ /// Evaluates and \returns the value of the expression represented by this
+ /// AST or an error if evaluation fails.
+ virtual Expected<uint64_t> eval() const = 0;
+};
+
+/// Class representing an unsigned literal in the AST of an expression.
+class FileCheckExpressionLiteral : public FileCheckExpressionAST {
+private:
+ /// Actual value of the literal.
+ uint64_t Value;
+
+public:
+ /// Constructs a literal with the specified value.
+ FileCheckExpressionLiteral(uint64_t Val) : Value(Val) {}
+
+ /// \returns the literal's value.
+ Expected<uint64_t> eval() const { return Value; }
+};
+
+/// Class to represent an undefined variable error, which quotes that
+/// variable's name when printed.
+class FileCheckUndefVarError : public ErrorInfo<FileCheckUndefVarError> {
+private:
+ StringRef VarName;
+
+public:
+ static char ID;
+
+ FileCheckUndefVarError(StringRef VarName) : VarName(VarName) {}
+
+ StringRef getVarName() const { return VarName; }
+
+ std::error_code convertToErrorCode() const override {
+ return inconvertibleErrorCode();
+ }
+
+ /// Print name of variable associated with this error.
+ void log(raw_ostream &OS) const override {
+ OS << "\"";
+ OS.write_escaped(VarName) << "\"";
+ }
+};
+
+/// Class representing a numeric variable and its associated current value.
+class FileCheckNumericVariable {
+private:
+ /// Name of the numeric variable.
+ StringRef Name;
+
+ /// Value of numeric variable, if defined, or None otherwise.
+ Optional<uint64_t> Value;
+
+ /// Line number where this variable is defined, or None if defined before
+ /// input is parsed. Used to determine whether a variable is defined on the
+ /// same line as a given use.
+ Optional<size_t> DefLineNumber;
+
+public:
+ /// Constructor for a variable \p Name defined at line \p DefLineNumber or
+ /// defined before input is parsed if DefLineNumber is None.
+ FileCheckNumericVariable(StringRef Name,
+ Optional<size_t> DefLineNumber = None)
+ : Name(Name), DefLineNumber(DefLineNumber) {}
+
+ /// \returns name of this numeric variable.
+ StringRef getName() const { return Name; }
+
+ /// \returns this variable's value.
+ Optional<uint64_t> getValue() const { return Value; }
+
+ /// Sets value of this numeric variable, if undefined. Triggers an assertion
+ /// failure if the variable is actually defined.
+ void setValue(uint64_t Value);
+
+ /// Clears value of this numeric variable, regardless of whether it is
+ /// currently defined or not.
+ void clearValue();
+
+ /// \returns the line number where this variable is defined, if any, or None
+ /// if defined before input is parsed.
+ Optional<size_t> getDefLineNumber() { return DefLineNumber; }
+};
+
+/// Class representing the use of a numeric variable in the AST of an
+/// expression.
+class FileCheckNumericVariableUse : public FileCheckExpressionAST {
+private:
+ /// Name of the numeric variable.
+ StringRef Name;
+
+ /// Pointer to the class instance for the variable this use is about.
+ FileCheckNumericVariable *NumericVariable;
+
+public:
+ FileCheckNumericVariableUse(StringRef Name,
+ FileCheckNumericVariable *NumericVariable)
+ : Name(Name), NumericVariable(NumericVariable) {}
+
+ /// \returns the value of the variable referenced by this instance.
+ Expected<uint64_t> eval() const;
+};
+
+/// Type of functions evaluating a given binary operation.
+using binop_eval_t = uint64_t (*)(uint64_t, uint64_t);
+
+/// Class representing a single binary operation in the AST of an expression.
+class FileCheckASTBinop : public FileCheckExpressionAST {
+private:
+ /// Left operand.
+ std::unique_ptr<FileCheckExpressionAST> LeftOperand;
+
+ /// Right operand.
+ std::unique_ptr<FileCheckExpressionAST> RightOperand;
+
+ /// Pointer to function that can evaluate this binary operation.
+ binop_eval_t EvalBinop;
+
+public:
+ FileCheckASTBinop(binop_eval_t EvalBinop,
+ std::unique_ptr<FileCheckExpressionAST> LeftOp,
+ std::unique_ptr<FileCheckExpressionAST> RightOp)
+ : EvalBinop(EvalBinop) {
+ LeftOperand = std::move(LeftOp);
+ RightOperand = std::move(RightOp);
+ }
+
+ /// Evaluates the value of the binary operation represented by this AST,
+ /// using EvalBinop on the result of recursively evaluating the operands.
+ /// \returns the expression value or an error if an undefined numeric
+ /// variable is used in one of the operands.
+ Expected<uint64_t> eval() const;
+};
+
+class FileCheckPatternContext;
+
+/// Class representing a substitution to perform in the RegExStr string.
+class FileCheckSubstitution {
+protected:
+ /// Pointer to a class instance holding, among other things, the table with
+ /// the values of live string variables at the start of any given CHECK line.
+ /// Used for substituting string variables with the text they were defined
+ /// as. Expressions are linked to the numeric variables they use at
+ /// parse time and directly access the value of the numeric variable to
+ /// evaluate their value.
+ FileCheckPatternContext *Context;
+
+ /// The string that needs to be substituted for something else. For a
+ /// string variable this is its name, otherwise this is the whole expression.
+ StringRef FromStr;
+
+ // Index in RegExStr of where to do the substitution.
+ size_t InsertIdx;
+
+public:
+ FileCheckSubstitution(FileCheckPatternContext *Context, StringRef VarName,
+ size_t InsertIdx)
+ : Context(Context), FromStr(VarName), InsertIdx(InsertIdx) {}
+
+ virtual ~FileCheckSubstitution() = default;
+
+ /// \returns the string to be substituted for something else.
+ StringRef getFromString() const { return FromStr; }
+
+ /// \returns the index where the substitution is to be performed in RegExStr.
+ size_t getIndex() const { return InsertIdx; }
+
+ /// \returns a string containing the result of the substitution represented
+ /// by this class instance or an error if substitution failed.
+ virtual Expected<std::string> getResult() const = 0;
+};
+
+class FileCheckStringSubstitution : public FileCheckSubstitution {
+public:
+ FileCheckStringSubstitution(FileCheckPatternContext *Context,
+ StringRef VarName, size_t InsertIdx)
+ : FileCheckSubstitution(Context, VarName, InsertIdx) {}
+
+ /// \returns the text that the string variable in this substitution matched
+ /// when defined, or an error if the variable is undefined.
+ Expected<std::string> getResult() const override;
+};
+
+class FileCheckNumericSubstitution : public FileCheckSubstitution {
+private:
+ /// Pointer to the class representing the expression whose value is to be
+ /// substituted.
+ std::unique_ptr<FileCheckExpressionAST> ExpressionAST;
+
+public:
+ FileCheckNumericSubstitution(FileCheckPatternContext *Context, StringRef Expr,
+ std::unique_ptr<FileCheckExpressionAST> ExprAST,
+ size_t InsertIdx)
+ : FileCheckSubstitution(Context, Expr, InsertIdx) {
+ ExpressionAST = std::move(ExprAST);
+ }
+
+ /// \returns a string containing the result of evaluating the expression in
+ /// this substitution, or an error if evaluation failed.
+ Expected<std::string> getResult() const override;
+};
//===----------------------------------------------------------------------===//
-// Pattern Handling Code.
+// Pattern handling code.
//===----------------------------------------------------------------------===//
namespace Check {
@@ -78,12 +286,133 @@ public:
int getCount() const { return Count; }
FileCheckType &setCount(int C);
+ // \returns a description of \p Prefix.
std::string getDescription(StringRef Prefix) const;
};
-}
+} // namespace Check
struct FileCheckDiag;
+/// Class holding the FileCheckPattern global state, shared by all patterns:
+/// tables holding values of variables and whether they are defined or not at
+/// any given time in the matching process.
+class FileCheckPatternContext {
+ friend class FileCheckPattern;
+
+private:
+ /// When matching a given pattern, this holds the value of all the string
+ /// variables defined in previous patterns. In a pattern, only the last
+ /// definition for a given variable is recorded in this table.
+ /// Back-references are used for uses after any the other definition.
+ StringMap<StringRef> GlobalVariableTable;
+
+ /// Map of all string variables defined so far. Used at parse time to detect
+ /// a name conflict between a numeric variable and a string variable when
+ /// the former is defined on a later line than the latter.
+ StringMap<bool> DefinedVariableTable;
+
+ /// When matching a given pattern, this holds the pointers to the classes
+ /// representing the numeric variables defined in previous patterns. When
+ /// matching a pattern all definitions for that pattern are recorded in the
+ /// NumericVariableDefs table in the FileCheckPattern instance of that
+ /// pattern.
+ StringMap<FileCheckNumericVariable *> GlobalNumericVariableTable;
+
+ /// Pointer to the class instance representing the @LINE pseudo variable for
+ /// easily updating its value.
+ FileCheckNumericVariable *LineVariable = nullptr;
+
+ /// Vector holding pointers to all parsed numeric variables. Used to
+ /// automatically free them once they are guaranteed to no longer be used.
+ std::vector<std::unique_ptr<FileCheckNumericVariable>> NumericVariables;
+
+ /// Vector holding pointers to all substitutions. Used to automatically free
+ /// them once they are guaranteed to no longer be used.
+ std::vector<std::unique_ptr<FileCheckSubstitution>> Substitutions;
+
+public:
+ /// \returns the value of string variable \p VarName or an error if no such
+ /// variable has been defined.
+ Expected<StringRef> getPatternVarValue(StringRef VarName);
+
+ /// Defines string and numeric variables from definitions given on the
+ /// command line, passed as a vector of [#]VAR=VAL strings in
+ /// \p CmdlineDefines. \returns an error list containing diagnostics against
+ /// \p SM for all definition parsing failures, if any, or Success otherwise.
+ Error defineCmdlineVariables(std::vector<std::string> &CmdlineDefines,
+ SourceMgr &SM);
+
+ /// Create @LINE pseudo variable. Value is set when pattern are being
+ /// matched.
+ void createLineVariable();
+
+ /// Undefines local variables (variables whose name does not start with a '$'
+ /// sign), i.e. removes them from GlobalVariableTable and from
+ /// GlobalNumericVariableTable and also clears the value of numeric
+ /// variables.
+ void clearLocalVars();
+
+private:
+ /// Makes a new numeric variable and registers it for destruction when the
+ /// context is destroyed.
+ template <class... Types>
+ FileCheckNumericVariable *makeNumericVariable(Types... args);
+
+ /// Makes a new string substitution and registers it for destruction when the
+ /// context is destroyed.
+ FileCheckSubstitution *makeStringSubstitution(StringRef VarName,
+ size_t InsertIdx);
+
+ /// Makes a new numeric substitution and registers it for destruction when
+ /// the context is destroyed.
+ FileCheckSubstitution *
+ makeNumericSubstitution(StringRef ExpressionStr,
+ std::unique_ptr<FileCheckExpressionAST> ExpressionAST,
+ size_t InsertIdx);
+};
+
+/// Class to represent an error holding a diagnostic with location information
+/// used when printing it.
+class FileCheckErrorDiagnostic : public ErrorInfo<FileCheckErrorDiagnostic> {
+private:
+ SMDiagnostic Diagnostic;
+
+public:
+ static char ID;
+
+ FileCheckErrorDiagnostic(SMDiagnostic &&Diag) : Diagnostic(Diag) {}
+
+ std::error_code convertToErrorCode() const override {
+ return inconvertibleErrorCode();
+ }
+
+ /// Print diagnostic associated with this error when printing the error.
+ void log(raw_ostream &OS) const override { Diagnostic.print(nullptr, OS); }
+
+ static Error get(const SourceMgr &SM, SMLoc Loc, const Twine &ErrMsg) {
+ return make_error<FileCheckErrorDiagnostic>(
+ SM.GetMessage(Loc, SourceMgr::DK_Error, ErrMsg));
+ }
+
+ static Error get(const SourceMgr &SM, StringRef Buffer, const Twine &ErrMsg) {
+ return get(SM, SMLoc::getFromPointer(Buffer.data()), ErrMsg);
+ }
+};
+
+class FileCheckNotFoundError : public ErrorInfo<FileCheckNotFoundError> {
+public:
+ static char ID;
+
+ std::error_code convertToErrorCode() const override {
+ return inconvertibleErrorCode();
+ }
+
+ /// Print diagnostic associated with this error when printing the error.
+ void log(raw_ostream &OS) const override {
+ OS << "String not found in input";
+ }
+};
+
class FileCheckPattern {
SMLoc PatternLoc;
@@ -95,43 +424,143 @@ class FileCheckPattern {
/// a fixed string to match.
std::string RegExStr;
- /// Entries in this vector map to uses of a variable in the pattern, e.g.
- /// "foo[[bar]]baz". In this case, the RegExStr will contain "foobaz" and
- /// we'll get an entry in this vector that tells us to insert the value of
- /// bar at offset 3.
- std::vector<std::pair<StringRef, unsigned>> VariableUses;
+ /// Entries in this vector represent a substitution of a string variable or
+ /// an expression in the RegExStr regex at match time. For example, in the
+ /// case of a CHECK directive with the pattern "foo[[bar]]baz[[#N+1]]",
+ /// RegExStr will contain "foobaz" and we'll get two entries in this vector
+ /// that tells us to insert the value of string variable "bar" at offset 3
+ /// and the value of expression "N+1" at offset 6.
+ std::vector<FileCheckSubstitution *> Substitutions;
- /// Maps definitions of variables to their parenthesized capture numbers.
+ /// Maps names of string variables defined in a pattern to the number of
+ /// their parenthesis group in RegExStr capturing their last definition.
+ ///
+ /// E.g. for the pattern "foo[[bar:.*]]baz([[bar]][[QUUX]][[bar:.*]])",
+ /// RegExStr will be "foo(.*)baz(\1<quux value>(.*))" where <quux value> is
+ /// the value captured for QUUX on the earlier line where it was defined, and
+ /// VariableDefs will map "bar" to the third parenthesis group which captures
+ /// the second definition of "bar".
///
- /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to
- /// 1.
+ /// Note: uses std::map rather than StringMap to be able to get the key when
+ /// iterating over values.
std::map<StringRef, unsigned> VariableDefs;
+ /// Structure representing the definition of a numeric variable in a pattern.
+ /// It holds the pointer to the class representing the numeric variable whose
+ /// value is being defined and the number of the parenthesis group in
+ /// RegExStr to capture that value.
+ struct FileCheckNumericVariableMatch {
+ /// Pointer to class representing the numeric variable whose value is being
+ /// defined.
+ FileCheckNumericVariable *DefinedNumericVariable;
+
+ /// Number of the parenthesis group in RegExStr that captures the value of
+ /// this numeric variable definition.
+ unsigned CaptureParenGroup;
+ };
+
+ /// Holds the number of the parenthesis group in RegExStr and pointer to the
+ /// corresponding FileCheckNumericVariable class instance of all numeric
+ /// variable definitions. Used to set the matched value of all those
+ /// variables.
+ StringMap<FileCheckNumericVariableMatch> NumericVariableDefs;
+
+ /// Pointer to a class instance holding the global state shared by all
+ /// patterns:
+ /// - separate tables with the values of live string and numeric variables
+ /// respectively at the start of any given CHECK line;
+ /// - table holding whether a string variable has been defined at any given
+ /// point during the parsing phase.
+ FileCheckPatternContext *Context;
+
Check::FileCheckType CheckTy;
- /// Contains the number of line this pattern is in.
- unsigned LineNumber;
+ /// Line number for this CHECK pattern or None if it is an implicit pattern.
+ /// Used to determine whether a variable definition is made on an earlier
+ /// line to the one with this CHECK.
+ Optional<size_t> LineNumber;
public:
- explicit FileCheckPattern(Check::FileCheckType Ty)
- : CheckTy(Ty) {}
+ FileCheckPattern(Check::FileCheckType Ty, FileCheckPatternContext *Context,
+ Optional<size_t> Line = None)
+ : Context(Context), CheckTy(Ty), LineNumber(Line) {}
- /// Returns the location in source code.
+ /// \returns the location in source code.
SMLoc getLoc() const { return PatternLoc; }
- bool ParsePattern(StringRef PatternStr, StringRef Prefix, SourceMgr &SM,
- unsigned LineNumber, const FileCheckRequest &Req);
- size_t Match(StringRef Buffer, size_t &MatchLen,
- StringMap<StringRef> &VariableTable) const;
- void PrintVariableUses(const SourceMgr &SM, StringRef Buffer,
- const StringMap<StringRef> &VariableTable,
- SMRange MatchRange = None) const;
- void PrintFuzzyMatch(const SourceMgr &SM, StringRef Buffer,
- const StringMap<StringRef> &VariableTable,
+ /// \returns the pointer to the global state for all patterns in this
+ /// FileCheck instance.
+ FileCheckPatternContext *getContext() const { return Context; }
+
+ /// \returns whether \p C is a valid first character for a variable name.
+ static bool isValidVarNameStart(char C);
+
+ /// Parsing information about a variable.
+ struct VariableProperties {
+ StringRef Name;
+ bool IsPseudo;
+ };
+
+ /// Parses the string at the start of \p Str for a variable name. \returns
+ /// a VariableProperties structure holding the variable name and whether it
+ /// is the name of a pseudo variable, or an error holding a diagnostic
+ /// against \p SM if parsing fail. If parsing was successful, also strips
+ /// \p Str from the variable name.
+ static Expected<VariableProperties> parseVariable(StringRef &Str,
+ const SourceMgr &SM);
+ /// Parses \p Expr for the name of a numeric variable to be defined at line
+ /// \p LineNumber or before input is parsed if \p LineNumber is None.
+ /// \returns a pointer to the class instance representing that variable,
+ /// creating it if needed, or an error holding a diagnostic against \p SM
+ /// should defining such a variable be invalid.
+ static Expected<FileCheckNumericVariable *> parseNumericVariableDefinition(
+ StringRef &Expr, FileCheckPatternContext *Context,
+ Optional<size_t> LineNumber, const SourceMgr &SM);
+ /// Parses \p Expr for a numeric substitution block. Parameter
+ /// \p IsLegacyLineExpr indicates whether \p Expr should be a legacy @LINE
+ /// expression. \returns a pointer to the class instance representing the AST
+ /// of the expression whose value must be substituted, or an error holding a
+ /// diagnostic against \p SM if parsing fails. If substitution was
+ /// successful, sets \p DefinedNumericVariable to point to the class
+ /// representing the numeric variable being defined in this numeric
+ /// substitution block, or None if this block does not define any variable.
+ Expected<std::unique_ptr<FileCheckExpressionAST>>
+ parseNumericSubstitutionBlock(
+ StringRef Expr,
+ Optional<FileCheckNumericVariable *> &DefinedNumericVariable,
+ bool IsLegacyLineExpr, const SourceMgr &SM) const;
+ /// Parses the pattern in \p PatternStr and initializes this FileCheckPattern
+ /// instance accordingly.
+ ///
+ /// \p Prefix provides which prefix is being matched, \p Req describes the
+ /// global options that influence the parsing such as whitespace
+ /// canonicalization, \p SM provides the SourceMgr used for error reports.
+ /// \returns true in case of an error, false otherwise.
+ bool parsePattern(StringRef PatternStr, StringRef Prefix, SourceMgr &SM,
+ const FileCheckRequest &Req);
+ /// Matches the pattern string against the input buffer \p Buffer
+ ///
+ /// \returns the position that is matched or an error indicating why matching
+ /// failed. If there is a match, updates \p MatchLen with the size of the
+ /// matched string.
+ ///
+ /// The GlobalVariableTable StringMap in the FileCheckPatternContext class
+ /// instance provides the current values of FileCheck string variables and
+ /// is updated if this match defines new values. Likewise, the
+ /// GlobalNumericVariableTable StringMap in the same class provides the
+ /// current values of FileCheck numeric variables and is updated if this
+ /// match defines new numeric values.
+ Expected<size_t> match(StringRef Buffer, size_t &MatchLen,
+ const SourceMgr &SM) const;
+ /// Prints the value of successful substitutions or the name of the undefined
+ /// string or numeric variables preventing a successful substitution.
+ void printSubstitutions(const SourceMgr &SM, StringRef Buffer,
+ SMRange MatchRange = None) const;
+ void printFuzzyMatch(const SourceMgr &SM, StringRef Buffer,
std::vector<FileCheckDiag> *Diags) const;
bool hasVariable() const {
- return !(VariableUses.empty() && VariableDefs.empty());
+ return !(Substitutions.empty() && VariableDefs.empty());
}
Check::FileCheckType getCheckTy() const { return CheckTy; }
@@ -141,11 +570,40 @@ public:
private:
bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM);
void AddBackrefToRegEx(unsigned BackrefNum);
- unsigned
- ComputeMatchDistance(StringRef Buffer,
- const StringMap<StringRef> &VariableTable) const;
- bool EvaluateExpression(StringRef Expr, std::string &Value) const;
+ /// Computes an arbitrary estimate for the quality of matching this pattern
+ /// at the start of \p Buffer; a distance of zero should correspond to a
+ /// perfect match.
+ unsigned computeMatchDistance(StringRef Buffer) const;
+ /// Finds the closing sequence of a regex variable usage or definition.
+ ///
+ /// \p Str has to point in the beginning of the definition (right after the
+ /// opening sequence). \p SM holds the SourceMgr used for error repporting.
+ /// \returns the offset of the closing sequence within Str, or npos if it
+ /// was not found.
size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM);
+
+ /// Parses \p Name as a (pseudo if \p IsPseudo is true) numeric variable use.
+ /// \returns the pointer to the class instance representing that variable if
+ /// successful, or an error holding a diagnostic against \p SM otherwise.
+ Expected<std::unique_ptr<FileCheckNumericVariableUse>>
+ parseNumericVariableUse(StringRef Name, bool IsPseudo,
+ const SourceMgr &SM) const;
+ enum class AllowedOperand { LineVar, Literal, Any };
+ /// Parses \p Expr for use of a numeric operand. Accepts both literal values
+ /// and numeric variables, depending on the value of \p AO. \returns the
+ /// class representing that operand in the AST of the expression or an error
+ /// holding a diagnostic against \p SM otherwise.
+ Expected<std::unique_ptr<FileCheckExpressionAST>>
+ parseNumericOperand(StringRef &Expr, AllowedOperand AO,
+ const SourceMgr &SM) const;
+ /// Parses \p Expr for a binary operation. The left operand of this binary
+ /// operation is given in \p LeftOp and \p IsLegacyLineExpr indicates whether
+ /// we are parsing a legacy @LINE expression. \returns the class representing
+ /// the binary operation in the AST of the expression, or an error holding a
+ /// diagnostic against \p SM otherwise.
+ Expected<std::unique_ptr<FileCheckExpressionAST>>
+ parseBinop(StringRef &Expr, std::unique_ptr<FileCheckExpressionAST> LeftOp,
+ bool IsLegacyLineExpr, const SourceMgr &SM) const;
};
//===----------------------------------------------------------------------===//
@@ -223,20 +681,27 @@ struct FileCheckString {
FileCheckString(const FileCheckPattern &P, StringRef S, SMLoc L)
: Pat(P), Prefix(S), Loc(L) {}
+ /// Matches check string and its "not strings" and/or "dag strings".
size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode,
- size_t &MatchLen, StringMap<StringRef> &VariableTable,
- FileCheckRequest &Req, std::vector<FileCheckDiag> *Diags) const;
+ size_t &MatchLen, FileCheckRequest &Req,
+ std::vector<FileCheckDiag> *Diags) const;
+ /// Verifies that there is a single line in the given \p Buffer. Errors are
+ /// reported against \p SM.
bool CheckNext(const SourceMgr &SM, StringRef Buffer) const;
+ /// Verifies that there is no newline in the given \p Buffer. Errors are
+ /// reported against \p SM.
bool CheckSame(const SourceMgr &SM, StringRef Buffer) const;
+ /// Verifies that none of the strings in \p NotStrings are found in the given
+ /// \p Buffer. Errors are reported against \p SM and diagnostics recorded in
+ /// \p Diags according to the verbosity level set in \p Req.
bool CheckNot(const SourceMgr &SM, StringRef Buffer,
const std::vector<const FileCheckPattern *> &NotStrings,
- StringMap<StringRef> &VariableTable,
const FileCheckRequest &Req,
std::vector<FileCheckDiag> *Diags) const;
+ /// Matches "dag strings" and their mixed "not strings".
size_t CheckDag(const SourceMgr &SM, StringRef Buffer,
std::vector<const FileCheckPattern *> &NotStrings,
- StringMap<StringRef> &VariableTable,
const FileCheckRequest &Req,
std::vector<FileCheckDiag> *Diags) const;
};
@@ -245,6 +710,7 @@ struct FileCheckString {
/// use information from the request.
class FileCheck {
FileCheckRequest Req;
+ FileCheckPatternContext PatternContext;
public:
FileCheck(FileCheckRequest Req) : Req(Req) {}
@@ -256,24 +722,27 @@ public:
// library.
Regex buildCheckPrefixRegex();
- /// Read the check file, which specifies the sequence of expected strings.
+ /// Reads the check file from \p Buffer and records the expected strings it
+ /// contains in the \p CheckStrings vector. Errors are reported against
+ /// \p SM.
///
- /// The strings are added to the CheckStrings vector. Returns true in case of
- /// an error, false otherwise.
+ /// Only expected strings whose prefix is one of those listed in \p PrefixRE
+ /// are recorded. \returns true in case of an error, false otherwise.
bool ReadCheckFile(SourceMgr &SM, StringRef Buffer, Regex &PrefixRE,
std::vector<FileCheckString> &CheckStrings);
bool ValidateCheckPrefixes();
- /// Canonicalize whitespaces in the file. Line endings are replaced with
+ /// Canonicalizes whitespaces in the file. Line endings are replaced with
/// UNIX-style '\n'.
StringRef CanonicalizeFile(MemoryBuffer &MB,
SmallVectorImpl<char> &OutputBuffer);
- /// Check the input to FileCheck provided in the \p Buffer against the \p
- /// CheckStrings read from the check file.
+ /// Checks the input to FileCheck provided in the \p Buffer against the
+ /// \p CheckStrings read from the check file and record diagnostics emitted
+ /// in \p Diags. Errors are recorded against \p SM.
///
- /// Returns false if the input fails to satisfy the checks.
+ /// \returns false if the input fails to satisfy the checks.
bool CheckInput(SourceMgr &SM, StringRef Buffer,
ArrayRef<FileCheckString> CheckStrings,
std::vector<FileCheckDiag> *Diags = nullptr);
diff --git a/include/llvm/Support/FileOutputBuffer.h b/include/llvm/Support/FileOutputBuffer.h
index 68226ca55502..999f551ebf2d 100644
--- a/include/llvm/Support/FileOutputBuffer.h
+++ b/include/llvm/Support/FileOutputBuffer.h
@@ -1,9 +1,8 @@
//=== FileOutputBuffer.h - File Output Buffer -------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -33,11 +32,6 @@ public:
enum {
/// set the 'x' bit on the resulting file
F_executable = 1,
-
- /// the contents of the new file are initialized from the file that exists
- /// at the location (if present). This allows in-place modification of an
- /// existing file.
- F_modify = 2
};
/// Factory method to create an OutputBuffer object which manages a read/write
diff --git a/include/llvm/Support/FileSystem.h b/include/llvm/Support/FileSystem.h
index d2042f51d8c1..1bec27bddad9 100644
--- a/include/llvm/Support/FileSystem.h
+++ b/include/llvm/Support/FileSystem.h
@@ -1,9 +1,8 @@
//===- llvm/Support/FileSystem.h - File System OS Concept -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -649,6 +648,19 @@ std::error_code status(const Twine &path, file_status &result,
/// A version for when a file descriptor is already available.
std::error_code status(int FD, file_status &Result);
+#ifdef _WIN32
+/// A version for when a file descriptor is already available.
+std::error_code status(file_t FD, file_status &Result);
+#endif
+
+/// Get file creation mode mask of the process.
+///
+/// @returns Mask reported by umask(2)
+/// @note There is no umask on Windows. This function returns 0 always
+/// on Windows. This function does not return an error_code because
+/// umask(2) never fails. It is not thread safe.
+unsigned getUmask();
+
/// Set file permissions.
///
/// @param Path File to set permissions on.
@@ -660,6 +672,11 @@ std::error_code status(int FD, file_status &Result);
/// Otherwise, the file will be marked as read-only.
std::error_code setPermissions(const Twine &Path, perms Permissions);
+/// Vesion of setPermissions accepting a file descriptor.
+/// TODO Delete the path based overload once we implement the FD based overload
+/// on Windows.
+std::error_code setPermissions(int FD, perms Permissions);
+
/// Get file permissions.
///
/// @param Path File to get permissions from.
@@ -765,11 +782,32 @@ enum OpenFlags : unsigned {
OF_UpdateAtime = 16,
};
+/// Create a potentially unique file name but does not create it.
+///
+/// Generates a unique path suitable for a temporary file but does not
+/// open or create the file. The name is based on \a Model with '%'
+/// replaced by a random char in [0-9a-f]. If \a MakeAbsolute is true
+/// then the system's temp directory is prepended first. If \a MakeAbsolute
+/// is false the current directory will be used instead.
+///
+/// This function does not check if the file exists. If you want to be sure
+/// that the file does not yet exist, you should use use enough '%' characters
+/// in your model to ensure this. Each '%' gives 4-bits of entropy so you can
+/// use 32 of them to get 128 bits of entropy.
+///
+/// Example: clang-%%-%%-%%-%%-%%.s => clang-a0-b1-c2-d3-e4.s
+///
+/// @param Model Name to base unique path off of.
+/// @param ResultPath Set to the file's path.
+/// @param MakeAbsolute Whether to use the system temp directory.
+void createUniquePath(const Twine &Model, SmallVectorImpl<char> &ResultPath,
+ bool MakeAbsolute);
+
/// Create a uniquely named file.
///
/// Generates a unique path suitable for a temporary file and then opens it as a
-/// file. The name is based on \a model with '%' replaced by a random char in
-/// [0-9a-f]. If \a model is not an absolute path, the temporary file will be
+/// file. The name is based on \a Model with '%' replaced by a random char in
+/// [0-9a-f]. If \a Model is not an absolute path, the temporary file will be
/// created in the current directory.
///
/// Example: clang-%%-%%-%%-%%-%%.s => clang-a0-b1-c2-d3-e4.s
@@ -932,6 +970,51 @@ Expected<file_t> openNativeFile(const Twine &Name, CreationDisposition Disp,
FileAccess Access, OpenFlags Flags,
unsigned Mode = 0666);
+/// Converts from a Posix file descriptor number to a native file handle.
+/// On Windows, this retreives the underlying handle. On non-Windows, this is a
+/// no-op.
+file_t convertFDToNativeFile(int FD);
+
+#ifndef _WIN32
+inline file_t convertFDToNativeFile(int FD) { return FD; }
+#endif
+
+/// Return an open handle to standard in. On Unix, this is typically FD 0.
+/// Returns kInvalidFile when the stream is closed.
+file_t getStdinHandle();
+
+/// Return an open handle to standard out. On Unix, this is typically FD 1.
+/// Returns kInvalidFile when the stream is closed.
+file_t getStdoutHandle();
+
+/// Return an open handle to standard error. On Unix, this is typically FD 2.
+/// Returns kInvalidFile when the stream is closed.
+file_t getStderrHandle();
+
+/// Reads \p Buf.size() bytes from \p FileHandle into \p Buf. The number of
+/// bytes actually read is returned in \p BytesRead. On Unix, this is equivalent
+/// to `*BytesRead = ::read(FD, Buf.data(), Buf.size())`, with error reporting.
+/// BytesRead will contain zero when reaching EOF.
+///
+/// @param FileHandle File to read from.
+/// @param Buf Buffer to read into.
+/// @param BytesRead Output parameter of the number of bytes read.
+/// @returns The error, if any, or errc::success.
+std::error_code readNativeFile(file_t FileHandle, MutableArrayRef<char> Buf,
+ size_t *BytesRead);
+
+/// Reads \p Buf.size() bytes from \p FileHandle at offset \p Offset into \p
+/// Buf. If 'pread' is available, this will use that, otherwise it will use
+/// 'lseek'. Bytes requested beyond the end of the file will be zero
+/// initialized.
+///
+/// @param FileHandle File to read from.
+/// @param Buf Buffer to read into.
+/// @param Offset Offset into the file at which the read should occur.
+/// @returns The error, if any, or errc::success.
+std::error_code readNativeFileSlice(file_t FileHandle,
+ MutableArrayRef<char> Buf, size_t Offset);
+
/// @brief Opens the file with the given name in a write-only or read-write
/// mode, returning its open file descriptor. If the file does not exist, it
/// is created.
@@ -1051,11 +1134,15 @@ openNativeFileForRead(const Twine &Name, OpenFlags Flags = OF_None,
SmallVectorImpl<char> *RealPath = nullptr);
/// @brief Close the file object. This should be used instead of ::close for
-/// portability.
+/// portability. On error, the caller should assume the file is closed, as is
+/// the case for Process::SafelyCloseFileDescriptor
///
/// @param F On input, this is the file to close. On output, the file is
/// set to kInvalidFile.
-void closeFile(file_t &F);
+///
+/// @returns An error code if closing the file failed. Typically, an error here
+/// means that the filesystem may have failed to perform some buffered writes.
+std::error_code closeFile(file_t &F);
std::error_code getUniqueID(const Twine Path, UniqueID &Result);
@@ -1085,21 +1172,19 @@ private:
size_t Size;
void *Mapping;
#ifdef _WIN32
- void *FileHandle;
+ sys::fs::file_t FileHandle;
#endif
mapmode Mode;
- std::error_code init(int FD, uint64_t Offset, mapmode Mode);
+ std::error_code init(sys::fs::file_t FD, uint64_t Offset, mapmode Mode);
public:
mapped_file_region() = delete;
mapped_file_region(mapped_file_region&) = delete;
mapped_file_region &operator =(mapped_file_region&) = delete;
- /// \param fd An open file descriptor to map. mapped_file_region takes
- /// ownership if closefd is true. It must have been opended in the correct
- /// mode.
- mapped_file_region(int fd, mapmode mode, size_t length, uint64_t offset,
+ /// \param fd An open file descriptor to map. Does not take ownership of fd.
+ mapped_file_region(sys::fs::file_t fd, mapmode mode, size_t length, uint64_t offset,
std::error_code &ec);
~mapped_file_region();
diff --git a/include/llvm/Support/FileUtilities.h b/include/llvm/Support/FileUtilities.h
index 2ee2c60b9964..16b2206924c3 100644
--- a/include/llvm/Support/FileUtilities.h
+++ b/include/llvm/Support/FileUtilities.h
@@ -1,9 +1,8 @@
//===- llvm/Support/FileUtilities.h - File System Utilities -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Support/Format.h b/include/llvm/Support/Format.h
index bcbd2bec5722..77dcbaebf1a3 100644
--- a/include/llvm/Support/Format.h
+++ b/include/llvm/Support/Format.h
@@ -1,9 +1,8 @@
//===- Format.h - Efficient printf-style formatting for streams -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Support/FormatAdapters.h b/include/llvm/Support/FormatAdapters.h
index 8320eaad39a9..a0e8cc439191 100644
--- a/include/llvm/Support/FormatAdapters.h
+++ b/include/llvm/Support/FormatAdapters.h
@@ -1,9 +1,8 @@
//===- FormatAdapters.h - Formatters for common LLVM types -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/Support/FormatCommon.h b/include/llvm/Support/FormatCommon.h
index 36fbad296c3f..3c119d12529a 100644
--- a/include/llvm/Support/FormatCommon.h
+++ b/include/llvm/Support/FormatCommon.h
@@ -1,9 +1,8 @@
-//===- FormatAdapters.h - Formatters for common LLVM types -----*- C++ -*-===//
+//===- FormatCommon.h - Formatters for common LLVM types --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/Support/FormatProviders.h b/include/llvm/Support/FormatProviders.h
index 4e57034ff98e..629a4845716a 100644
--- a/include/llvm/Support/FormatProviders.h
+++ b/include/llvm/Support/FormatProviders.h
@@ -1,9 +1,8 @@
//===- FormatProviders.h - Formatters for common LLVM types -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Support/FormatVariadic.h b/include/llvm/Support/FormatVariadic.h
index b0f582513e07..5bbda9dd626e 100644
--- a/include/llvm/Support/FormatVariadic.h
+++ b/include/llvm/Support/FormatVariadic.h
@@ -1,9 +1,8 @@
//===- FormatVariadic.h - Efficient type-safe string formatting --*- C++-*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Support/FormatVariadicDetails.h b/include/llvm/Support/FormatVariadicDetails.h
index e8bd90f50941..e3c185134daa 100644
--- a/include/llvm/Support/FormatVariadicDetails.h
+++ b/include/llvm/Support/FormatVariadicDetails.h
@@ -1,9 +1,8 @@
//===- FormatVariadicDetails.h - Helpers for FormatVariadic.h ----*- C++-*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/Support/FormattedStream.h b/include/llvm/Support/FormattedStream.h
index 4a135cd23174..b49c8d86531d 100644
--- a/include/llvm/Support/FormattedStream.h
+++ b/include/llvm/Support/FormattedStream.h
@@ -1,9 +1,8 @@
//===-- llvm/Support/FormattedStream.h - Formatted streams ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Support/GenericDomTree.h b/include/llvm/Support/GenericDomTree.h
index b3018bac310a..99620802505b 100644
--- a/include/llvm/Support/GenericDomTree.h
+++ b/include/llvm/Support/GenericDomTree.h
@@ -1,9 +1,8 @@
//===- GenericDomTree.h - Generic dominator trees for graphs ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -670,14 +669,12 @@ protected:
// The postdom tree can have a null root if there are no returns.
if (getRootNode()) PrintDomTree<NodeT>(getRootNode(), O, 1);
- if (IsPostDominator) {
- O << "Roots: ";
- for (const NodePtr Block : Roots) {
- Block->printAsOperand(O, false);
- O << " ";
- }
- O << "\n";
+ O << "Roots: ";
+ for (const NodePtr Block : Roots) {
+ Block->printAsOperand(O, false);
+ O << " ";
}
+ O << "\n";
}
public:
diff --git a/include/llvm/Support/GenericDomTreeConstruction.h b/include/llvm/Support/GenericDomTreeConstruction.h
index 971e8305a112..ccceba881718 100644
--- a/include/llvm/Support/GenericDomTreeConstruction.h
+++ b/include/llvm/Support/GenericDomTreeConstruction.h
@@ -1,9 +1,8 @@
//===- GenericDomTreeConstruction.h - Dominator Calculation ------*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -16,9 +15,12 @@
/// Loukas Georgiadis, Princeton University, November 2005, pp. 21-23:
/// ftp://ftp.cs.princeton.edu/reports/2005/737.pdf
///
-/// This implements the O(n*log(n)) versions of EVAL and LINK, because it turns
-/// out that the theoretically slower O(n*log(n)) implementation is actually
-/// faster than the almost-linear O(n*alpha(n)) version, even for large CFGs.
+/// Semi-NCA algorithm runs in O(n^2) worst-case time but usually slightly
+/// faster than Simple Lengauer-Tarjan in practice.
+///
+/// O(n^2) worst cases happen when the computation of nearest common ancestors
+/// requires O(n) average time, which is very unlikely in real world. If this
+/// ever turns out to be an issue, consider implementing a hybrid algorithm.
///
/// The file uses the Depth Based Search algorithm to perform incremental
/// updates (insertion and deletions). The implemented algorithm is based on
@@ -255,42 +257,47 @@ struct SemiNCAInfo {
return LastNum;
}
- NodePtr eval(NodePtr VIn, unsigned LastLinked) {
- auto &VInInfo = NodeToInfo[VIn];
- if (VInInfo.DFSNum < LastLinked)
- return VIn;
-
- SmallVector<NodePtr, 32> Work;
- SmallPtrSet<NodePtr, 32> Visited;
-
- if (VInInfo.Parent >= LastLinked)
- Work.push_back(VIn);
-
- while (!Work.empty()) {
- NodePtr V = Work.back();
- auto &VInfo = NodeToInfo[V];
- NodePtr VAncestor = NumToNode[VInfo.Parent];
-
- // Process Ancestor first
- if (Visited.insert(VAncestor).second && VInfo.Parent >= LastLinked) {
- Work.push_back(VAncestor);
- continue;
- }
- Work.pop_back();
-
- // Update VInfo based on Ancestor info
- if (VInfo.Parent < LastLinked)
- continue;
-
- auto &VAInfo = NodeToInfo[VAncestor];
- NodePtr VAncestorLabel = VAInfo.Label;
- NodePtr VLabel = VInfo.Label;
- if (NodeToInfo[VAncestorLabel].Semi < NodeToInfo[VLabel].Semi)
- VInfo.Label = VAncestorLabel;
- VInfo.Parent = VAInfo.Parent;
- }
-
- return VInInfo.Label;
+ // V is a predecessor of W. eval() returns V if V < W, otherwise the minimum
+ // of sdom(U), where U > W and there is a virtual forest path from U to V. The
+ // virtual forest consists of linked edges of processed vertices.
+ //
+ // We can follow Parent pointers (virtual forest edges) to determine the
+ // ancestor U with minimum sdom(U). But it is slow and thus we employ the path
+ // compression technique to speed up to O(m*log(n)). Theoretically the virtual
+ // forest can be organized as balanced trees to achieve almost linear
+ // O(m*alpha(m,n)) running time. But it requires two auxiliary arrays (Size
+ // and Child) and is unlikely to be faster than the simple implementation.
+ //
+ // For each vertex V, its Label points to the vertex with the minimal sdom(U)
+ // (Semi) in its path from V (included) to NodeToInfo[V].Parent (excluded).
+ NodePtr eval(NodePtr V, unsigned LastLinked,
+ SmallVectorImpl<InfoRec *> &Stack) {
+ InfoRec *VInfo = &NodeToInfo[V];
+ if (VInfo->Parent < LastLinked)
+ return VInfo->Label;
+
+ // Store ancestors except the last (root of a virtual tree) into a stack.
+ assert(Stack.empty());
+ do {
+ Stack.push_back(VInfo);
+ VInfo = &NodeToInfo[NumToNode[VInfo->Parent]];
+ } while (VInfo->Parent >= LastLinked);
+
+ // Path compression. Point each vertex's Parent to the root and update its
+ // Label if any of its ancestors (PInfo->Label) has a smaller Semi.
+ const InfoRec *PInfo = VInfo;
+ const InfoRec *PLabelInfo = &NodeToInfo[PInfo->Label];
+ do {
+ VInfo = Stack.pop_back_val();
+ VInfo->Parent = PInfo->Parent;
+ const InfoRec *VLabelInfo = &NodeToInfo[VInfo->Label];
+ if (PLabelInfo->Semi < VLabelInfo->Semi)
+ VInfo->Label = PInfo->Label;
+ else
+ PLabelInfo = VLabelInfo;
+ PInfo = VInfo;
+ } while (!Stack.empty());
+ return VInfo->Label;
}
// This function requires DFS to be run before calling it.
@@ -304,6 +311,7 @@ struct SemiNCAInfo {
}
// Step #1: Calculate the semidominators of all vertices.
+ SmallVector<InfoRec *, 32> EvalStack;
for (unsigned i = NextDFSNum - 1; i >= 2; --i) {
NodePtr W = NumToNode[i];
auto &WInfo = NodeToInfo[W];
@@ -319,7 +327,7 @@ struct SemiNCAInfo {
if (TN && TN->getLevel() < MinLevel)
continue;
- unsigned SemiU = NodeToInfo[eval(N, i + 1)].Semi;
+ unsigned SemiU = NodeToInfo[eval(N, i + 1, EvalStack)].Semi;
if (SemiU < WInfo.Semi) WInfo.Semi = SemiU;
}
}
@@ -620,21 +628,22 @@ struct SemiNCAInfo {
// Helper struct used during edge insertions.
struct InsertionInfo {
- using BucketElementTy = std::pair<unsigned, TreeNodePtr>;
- struct DecreasingLevel {
- bool operator()(const BucketElementTy &First,
- const BucketElementTy &Second) const {
- return First.first > Second.first;
+ struct Compare {
+ bool operator()(TreeNodePtr LHS, TreeNodePtr RHS) const {
+ return LHS->getLevel() < RHS->getLevel();
}
};
- std::priority_queue<BucketElementTy, SmallVector<BucketElementTy, 8>,
- DecreasingLevel>
- Bucket; // Queue of tree nodes sorted by level in descending order.
- SmallDenseSet<TreeNodePtr, 8> Affected;
- SmallDenseMap<TreeNodePtr, unsigned, 8> Visited;
- SmallVector<TreeNodePtr, 8> AffectedQueue;
- SmallVector<TreeNodePtr, 8> VisitedNotAffectedQueue;
+ // Bucket queue of tree nodes ordered by descending level. For simplicity,
+ // we use a priority_queue here.
+ std::priority_queue<TreeNodePtr, SmallVector<TreeNodePtr, 8>,
+ Compare>
+ Bucket;
+ SmallDenseSet<TreeNodePtr, 8> Visited;
+ SmallVector<TreeNodePtr, 8> Affected;
+#ifndef NDEBUG
+ SmallVector<TreeNodePtr, 8> VisitedUnaffected;
+#endif
};
static void InsertEdge(DomTreeT &DT, const BatchUpdatePtr BUI,
@@ -689,6 +698,17 @@ struct SemiNCAInfo {
return true;
}
+ static bool isPermutation(const SmallVectorImpl<NodePtr> &A,
+ const SmallVectorImpl<NodePtr> &B) {
+ if (A.size() != B.size())
+ return false;
+ SmallPtrSet<NodePtr, 4> Set(A.begin(), A.end());
+ for (NodePtr N : B)
+ if (Set.count(N) == 0)
+ return false;
+ return true;
+ }
+
// Updates the set of roots after insertion or deletion. This ensures that
// roots are the same when after a series of updates and when the tree would
// be built from scratch.
@@ -702,9 +722,8 @@ struct SemiNCAInfo {
return;
// Recalculate the set of roots.
- auto Roots = FindRoots(DT, BUI);
- if (DT.Roots.size() != Roots.size() ||
- !std::is_permutation(DT.Roots.begin(), DT.Roots.end(), Roots.begin())) {
+ RootsT Roots = FindRoots(DT, BUI);
+ if (!isPermutation(DT.Roots, Roots)) {
// The roots chosen in the CFG have changed. This is because the
// incremental algorithm does not really know or use the set of roots and
// can make a different (implicit) decision about which node within an
@@ -715,7 +734,6 @@ struct SemiNCAInfo {
// It may be possible to update the tree without recalculating it, but
// we do not know yet how to do it, and it happens rarely in practise.
CalculateFromScratch(DT, BUI);
- return;
}
}
@@ -737,128 +755,113 @@ struct SemiNCAInfo {
assert(NCD);
LLVM_DEBUG(dbgs() << "\t\tNCA == " << BlockNamePrinter(NCD) << "\n");
- const TreeNodePtr ToIDom = To->getIDom();
+ const unsigned NCDLevel = NCD->getLevel();
- // Nothing affected -- NCA property holds.
- // (Based on the lemma 2.5 from the second paper.)
- if (NCD == To || NCD == ToIDom) return;
+ // Based on Lemma 2.5 from the second paper, after insertion of (From,To), v
+ // is affected iff depth(NCD)+1 < depth(v) && a path P from To to v exists
+ // where every w on P s.t. depth(v) <= depth(w)
+ //
+ // This reduces to a widest path problem (maximizing the depth of the
+ // minimum vertex in the path) which can be solved by a modified version of
+ // Dijkstra with a bucket queue (named depth-based search in the paper).
+
+ // To is in the path, so depth(NCD)+1 < depth(v) <= depth(To). Nothing
+ // affected if this does not hold.
+ if (NCDLevel + 1 >= To->getLevel())
+ return;
- // Identify and collect affected nodes.
InsertionInfo II;
- LLVM_DEBUG(dbgs() << "Marking " << BlockNamePrinter(To)
- << " as affected\n");
- II.Affected.insert(To);
- const unsigned ToLevel = To->getLevel();
- LLVM_DEBUG(dbgs() << "Putting " << BlockNamePrinter(To)
- << " into a Bucket\n");
- II.Bucket.push({ToLevel, To});
+ SmallVector<TreeNodePtr, 8> UnaffectedOnCurrentLevel;
+ II.Bucket.push(To);
+ II.Visited.insert(To);
while (!II.Bucket.empty()) {
- const TreeNodePtr CurrentNode = II.Bucket.top().second;
- const unsigned CurrentLevel = CurrentNode->getLevel();
+ TreeNodePtr TN = II.Bucket.top();
II.Bucket.pop();
- LLVM_DEBUG(dbgs() << "\tAdding to Visited and AffectedQueue: "
- << BlockNamePrinter(CurrentNode) << "\n");
-
- II.Visited.insert({CurrentNode, CurrentLevel});
- II.AffectedQueue.push_back(CurrentNode);
+ II.Affected.push_back(TN);
+
+ const unsigned CurrentLevel = TN->getLevel();
+ LLVM_DEBUG(dbgs() << "Mark " << BlockNamePrinter(TN) <<
+ "as affected, CurrentLevel " << CurrentLevel << "\n");
+
+ assert(TN->getBlock() && II.Visited.count(TN) && "Preconditions!");
+
+ while (true) {
+ // Unlike regular Dijkstra, we have an inner loop to expand more
+ // vertices. The first iteration is for the (affected) vertex popped
+ // from II.Bucket and the rest are for vertices in
+ // UnaffectedOnCurrentLevel, which may eventually expand to affected
+ // vertices.
+ //
+ // Invariant: there is an optimal path from `To` to TN with the minimum
+ // depth being CurrentLevel.
+ for (const NodePtr Succ :
+ ChildrenGetter<IsPostDom>::Get(TN->getBlock(), BUI)) {
+ const TreeNodePtr SuccTN = DT.getNode(Succ);
+ assert(SuccTN &&
+ "Unreachable successor found at reachable insertion");
+ const unsigned SuccLevel = SuccTN->getLevel();
+
+ LLVM_DEBUG(dbgs() << "\tSuccessor " << BlockNamePrinter(Succ)
+ << ", level = " << SuccLevel << "\n");
+
+ // There is an optimal path from `To` to Succ with the minimum depth
+ // being min(CurrentLevel, SuccLevel).
+ //
+ // If depth(NCD)+1 < depth(Succ) is not satisfied, Succ is unaffected
+ // and no affected vertex may be reached by a path passing through it.
+ // Stop here. Also, Succ may be visited by other predecessors but the
+ // first visit has the optimal path. Stop if Succ has been visited.
+ if (SuccLevel <= NCDLevel + 1 || !II.Visited.insert(SuccTN).second)
+ continue;
+
+ if (SuccLevel > CurrentLevel) {
+ // Succ is unaffected but it may (transitively) expand to affected
+ // vertices. Store it in UnaffectedOnCurrentLevel.
+ LLVM_DEBUG(dbgs() << "\t\tMarking visited not affected "
+ << BlockNamePrinter(Succ) << "\n");
+ UnaffectedOnCurrentLevel.push_back(SuccTN);
+#ifndef NDEBUG
+ II.VisitedUnaffected.push_back(SuccTN);
+#endif
+ } else {
+ // The condition is satisfied (Succ is affected). Add Succ to the
+ // bucket queue.
+ LLVM_DEBUG(dbgs() << "\t\tAdd " << BlockNamePrinter(Succ)
+ << " to a Bucket\n");
+ II.Bucket.push(SuccTN);
+ }
+ }
- // Discover and collect affected successors of the current node.
- VisitInsertion(DT, BUI, CurrentNode, CurrentLevel, NCD, II);
+ if (UnaffectedOnCurrentLevel.empty())
+ break;
+ TN = UnaffectedOnCurrentLevel.pop_back_val();
+ LLVM_DEBUG(dbgs() << " Next: " << BlockNamePrinter(TN) << "\n");
+ }
}
// Finish by updating immediate dominators and levels.
UpdateInsertion(DT, BUI, NCD, II);
}
- // Visits an affected node and collect its affected successors.
- static void VisitInsertion(DomTreeT &DT, const BatchUpdatePtr BUI,
- const TreeNodePtr TN, const unsigned RootLevel,
- const TreeNodePtr NCD, InsertionInfo &II) {
- const unsigned NCDLevel = NCD->getLevel();
- LLVM_DEBUG(dbgs() << "Visiting " << BlockNamePrinter(TN) << ", RootLevel "
- << RootLevel << "\n");
-
- SmallVector<TreeNodePtr, 8> Stack = {TN};
- assert(TN->getBlock() && II.Visited.count(TN) && "Preconditions!");
-
- SmallPtrSet<TreeNodePtr, 8> Processed;
-
- do {
- TreeNodePtr Next = Stack.pop_back_val();
- LLVM_DEBUG(dbgs() << " Next: " << BlockNamePrinter(Next) << "\n");
-
- for (const NodePtr Succ :
- ChildrenGetter<IsPostDom>::Get(Next->getBlock(), BUI)) {
- const TreeNodePtr SuccTN = DT.getNode(Succ);
- assert(SuccTN && "Unreachable successor found at reachable insertion");
- const unsigned SuccLevel = SuccTN->getLevel();
-
- LLVM_DEBUG(dbgs() << "\tSuccessor " << BlockNamePrinter(Succ)
- << ", level = " << SuccLevel << "\n");
-
- // Do not process the same node multiple times.
- if (Processed.count(Next) > 0)
- continue;
-
- // Succ dominated by subtree From -- not affected.
- // (Based on the lemma 2.5 from the second paper.)
- if (SuccLevel > RootLevel) {
- LLVM_DEBUG(dbgs() << "\t\tDominated by subtree From\n");
- if (II.Visited.count(SuccTN) != 0) {
- LLVM_DEBUG(dbgs() << "\t\t\talready visited at level "
- << II.Visited[SuccTN] << "\n\t\t\tcurrent level "
- << RootLevel << ")\n");
-
- // A node can be necessary to visit again if we see it again at
- // a lower level than before.
- if (II.Visited[SuccTN] >= RootLevel)
- continue;
- }
-
- LLVM_DEBUG(dbgs() << "\t\tMarking visited not affected "
- << BlockNamePrinter(Succ) << "\n");
- II.Visited.insert({SuccTN, RootLevel});
- II.VisitedNotAffectedQueue.push_back(SuccTN);
- Stack.push_back(SuccTN);
- } else if ((SuccLevel > NCDLevel + 1) &&
- II.Affected.count(SuccTN) == 0) {
- LLVM_DEBUG(dbgs() << "\t\tMarking affected and adding "
- << BlockNamePrinter(Succ) << " to a Bucket\n");
- II.Affected.insert(SuccTN);
- II.Bucket.push({SuccLevel, SuccTN});
- }
- }
-
- Processed.insert(Next);
- } while (!Stack.empty());
- }
-
// Updates immediate dominators and levels after insertion.
static void UpdateInsertion(DomTreeT &DT, const BatchUpdatePtr BUI,
const TreeNodePtr NCD, InsertionInfo &II) {
LLVM_DEBUG(dbgs() << "Updating NCD = " << BlockNamePrinter(NCD) << "\n");
- for (const TreeNodePtr TN : II.AffectedQueue) {
+ for (const TreeNodePtr TN : II.Affected) {
LLVM_DEBUG(dbgs() << "\tIDom(" << BlockNamePrinter(TN)
<< ") = " << BlockNamePrinter(NCD) << "\n");
TN->setIDom(NCD);
}
- UpdateLevelsAfterInsertion(II);
- if (IsPostDom) UpdateRootsAfterUpdate(DT, BUI);
- }
-
- static void UpdateLevelsAfterInsertion(InsertionInfo &II) {
- LLVM_DEBUG(
- dbgs() << "Updating levels for visited but not affected nodes\n");
+#ifndef NDEBUG
+ for (const TreeNodePtr TN : II.VisitedUnaffected)
+ assert(TN->getLevel() == TN->getIDom()->getLevel() + 1 &&
+ "TN should have been updated by an affected ancestor");
+#endif
- for (const TreeNodePtr TN : II.VisitedNotAffectedQueue) {
- LLVM_DEBUG(dbgs() << "\tlevel(" << BlockNamePrinter(TN) << ") = ("
- << BlockNamePrinter(TN->getIDom()) << ") "
- << TN->getIDom()->getLevel() << " + 1\n");
- TN->UpdateLevel();
- }
+ if (IsPostDom) UpdateRootsAfterUpdate(DT, BUI);
}
// Handles insertion to previously unreachable nodes.
@@ -1182,6 +1185,10 @@ struct SemiNCAInfo {
BUI.FuturePredecessors[U.getTo()].push_back({U.getFrom(), U.getKind()});
}
+#if 0
+ // FIXME: The LLVM_DEBUG macro only plays well with a modular
+ // build of LLVM when the header is marked as textual, but doing
+ // so causes redefinition errors.
LLVM_DEBUG(dbgs() << "About to apply " << NumLegalized << " updates\n");
LLVM_DEBUG(if (NumLegalized < 32) for (const auto &U
: reverse(BUI.Updates)) {
@@ -1190,6 +1197,7 @@ struct SemiNCAInfo {
dbgs() << "\n";
});
LLVM_DEBUG(dbgs() << "\n");
+#endif
// Recalculate the DominatorTree when the number of updates
// exceeds a threshold, which usually makes direct updating slower than
@@ -1215,8 +1223,13 @@ struct SemiNCAInfo {
static void ApplyNextUpdate(DomTreeT &DT, BatchUpdateInfo &BUI) {
assert(!BUI.Updates.empty() && "No updates to apply!");
UpdateT CurrentUpdate = BUI.Updates.pop_back_val();
+#if 0
+ // FIXME: The LLVM_DEBUG macro only plays well with a modular
+ // build of LLVM when the header is marked as textual, but doing
+ // so causes redefinition errors.
LLVM_DEBUG(dbgs() << "Applying update: ");
LLVM_DEBUG(CurrentUpdate.dump(); dbgs() << "\n");
+#endif
// Move to the next snapshot of the CFG by removing the reverse-applied
// current update. Since updates are performed in the same order they are
@@ -1270,9 +1283,7 @@ struct SemiNCAInfo {
}
RootsT ComputedRoots = FindRoots(DT, nullptr);
- if (DT.Roots.size() != ComputedRoots.size() ||
- !std::is_permutation(DT.Roots.begin(), DT.Roots.end(),
- ComputedRoots.begin())) {
+ if (!isPermutation(DT.Roots, ComputedRoots)) {
errs() << "Tree has different roots than freshly computed ones!\n";
errs() << "\tPDT roots: ";
for (const NodePtr N : DT.Roots) errs() << BlockNamePrinter(N) << ", ";
diff --git a/include/llvm/Support/GenericIteratedDominanceFrontier.h b/include/llvm/Support/GenericIteratedDominanceFrontier.h
new file mode 100644
index 000000000000..25eb7cd7b6d5
--- /dev/null
+++ b/include/llvm/Support/GenericIteratedDominanceFrontier.h
@@ -0,0 +1,209 @@
+//===- IteratedDominanceFrontier.h - Calculate IDF --------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// Compute iterated dominance frontiers using a linear time algorithm.
+///
+/// The algorithm used here is based on:
+///
+/// Sreedhar and Gao. A linear time algorithm for placing phi-nodes.
+/// In Proceedings of the 22nd ACM SIGPLAN-SIGACT Symposium on Principles of
+/// Programming Languages
+/// POPL '95. ACM, New York, NY, 62-73.
+///
+/// It has been modified to not explicitly use the DJ graph data structure and
+/// to directly compute pruned SSA using per-variable liveness information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_GENERIC_IDF_H
+#define LLVM_SUPPORT_GENERIC_IDF_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/GenericDomTree.h"
+#include <queue>
+
+namespace llvm {
+
+namespace IDFCalculatorDetail {
+
+/// Generic utility class used for getting the children of a basic block.
+/// May be specialized if, for example, one wouldn't like to return nullpointer
+/// successors.
+template <class NodeTy, bool IsPostDom> struct ChildrenGetterTy {
+ using NodeRef = typename GraphTraits<NodeTy>::NodeRef;
+ using ChildrenTy = SmallVector<NodeRef, 8>;
+
+ ChildrenTy get(const NodeRef &N);
+};
+
+} // end of namespace IDFCalculatorDetail
+
+/// Determine the iterated dominance frontier, given a set of defining
+/// blocks, and optionally, a set of live-in blocks.
+///
+/// In turn, the results can be used to place phi nodes.
+///
+/// This algorithm is a linear time computation of Iterated Dominance Frontiers,
+/// pruned using the live-in set.
+/// By default, liveness is not used to prune the IDF computation.
+/// The template parameters should be of a CFG block type.
+template <class NodeTy, bool IsPostDom> class IDFCalculatorBase {
+public:
+ using OrderedNodeTy =
+ typename std::conditional<IsPostDom, Inverse<NodeTy *>, NodeTy *>::type;
+ using ChildrenGetterTy =
+ IDFCalculatorDetail::ChildrenGetterTy<NodeTy, IsPostDom>;
+
+ IDFCalculatorBase(DominatorTreeBase<NodeTy, IsPostDom> &DT) : DT(DT) {}
+
+ IDFCalculatorBase(DominatorTreeBase<NodeTy, IsPostDom> &DT,
+ const ChildrenGetterTy &C)
+ : DT(DT), ChildrenGetter(C) {}
+
+ /// Give the IDF calculator the set of blocks in which the value is
+ /// defined. This is equivalent to the set of starting blocks it should be
+ /// calculating the IDF for (though later gets pruned based on liveness).
+ ///
+ /// Note: This set *must* live for the entire lifetime of the IDF calculator.
+ void setDefiningBlocks(const SmallPtrSetImpl<NodeTy *> &Blocks) {
+ DefBlocks = &Blocks;
+ }
+
+ /// Give the IDF calculator the set of blocks in which the value is
+ /// live on entry to the block. This is used to prune the IDF calculation to
+ /// not include blocks where any phi insertion would be dead.
+ ///
+ /// Note: This set *must* live for the entire lifetime of the IDF calculator.
+ void setLiveInBlocks(const SmallPtrSetImpl<NodeTy *> &Blocks) {
+ LiveInBlocks = &Blocks;
+ useLiveIn = true;
+ }
+
+ /// Reset the live-in block set to be empty, and tell the IDF
+ /// calculator to not use liveness anymore.
+ void resetLiveInBlocks() {
+ LiveInBlocks = nullptr;
+ useLiveIn = false;
+ }
+
+ /// Calculate iterated dominance frontiers
+ ///
+ /// This uses the linear-time phi algorithm based on DJ-graphs mentioned in
+ /// the file-level comment. It performs DF->IDF pruning using the live-in
+ /// set, to avoid computing the IDF for blocks where an inserted PHI node
+ /// would be dead.
+ void calculate(SmallVectorImpl<NodeTy *> &IDFBlocks);
+
+private:
+ DominatorTreeBase<NodeTy, IsPostDom> &DT;
+ ChildrenGetterTy ChildrenGetter;
+ bool useLiveIn = false;
+ const SmallPtrSetImpl<NodeTy *> *LiveInBlocks;
+ const SmallPtrSetImpl<NodeTy *> *DefBlocks;
+};
+
+//===----------------------------------------------------------------------===//
+// Implementation.
+//===----------------------------------------------------------------------===//
+
+namespace IDFCalculatorDetail {
+
+template <class NodeTy, bool IsPostDom>
+typename ChildrenGetterTy<NodeTy, IsPostDom>::ChildrenTy
+ChildrenGetterTy<NodeTy, IsPostDom>::get(const NodeRef &N) {
+ using OrderedNodeTy =
+ typename IDFCalculatorBase<NodeTy, IsPostDom>::OrderedNodeTy;
+
+ auto Children = children<OrderedNodeTy>(N);
+ return {Children.begin(), Children.end()};
+}
+
+} // end of namespace IDFCalculatorDetail
+
+template <class NodeTy, bool IsPostDom>
+void IDFCalculatorBase<NodeTy, IsPostDom>::calculate(
+ SmallVectorImpl<NodeTy *> &PHIBlocks) {
+ // Use a priority queue keyed on dominator tree level so that inserted nodes
+ // are handled from the bottom of the dominator tree upwards. We also augment
+ // the level with a DFS number to ensure that the blocks are ordered in a
+ // deterministic way.
+ using DomTreeNodePair =
+ std::pair<DomTreeNodeBase<NodeTy> *, std::pair<unsigned, unsigned>>;
+ using IDFPriorityQueue =
+ std::priority_queue<DomTreeNodePair, SmallVector<DomTreeNodePair, 32>,
+ less_second>;
+
+ IDFPriorityQueue PQ;
+
+ DT.updateDFSNumbers();
+
+ for (NodeTy *BB : *DefBlocks) {
+ if (DomTreeNodeBase<NodeTy> *Node = DT.getNode(BB))
+ PQ.push({Node, std::make_pair(Node->getLevel(), Node->getDFSNumIn())});
+ }
+
+ SmallVector<DomTreeNodeBase<NodeTy> *, 32> Worklist;
+ SmallPtrSet<DomTreeNodeBase<NodeTy> *, 32> VisitedPQ;
+ SmallPtrSet<DomTreeNodeBase<NodeTy> *, 32> VisitedWorklist;
+
+ while (!PQ.empty()) {
+ DomTreeNodePair RootPair = PQ.top();
+ PQ.pop();
+ DomTreeNodeBase<NodeTy> *Root = RootPair.first;
+ unsigned RootLevel = RootPair.second.first;
+
+ // Walk all dominator tree children of Root, inspecting their CFG edges with
+ // targets elsewhere on the dominator tree. Only targets whose level is at
+ // most Root's level are added to the iterated dominance frontier of the
+ // definition set.
+
+ Worklist.clear();
+ Worklist.push_back(Root);
+ VisitedWorklist.insert(Root);
+
+ while (!Worklist.empty()) {
+ DomTreeNodeBase<NodeTy> *Node = Worklist.pop_back_val();
+ NodeTy *BB = Node->getBlock();
+ // Succ is the successor in the direction we are calculating IDF, so it is
+ // successor for IDF, and predecessor for Reverse IDF.
+ auto DoWork = [&](NodeTy *Succ) {
+ DomTreeNodeBase<NodeTy> *SuccNode = DT.getNode(Succ);
+
+ const unsigned SuccLevel = SuccNode->getLevel();
+ if (SuccLevel > RootLevel)
+ return;
+
+ if (!VisitedPQ.insert(SuccNode).second)
+ return;
+
+ NodeTy *SuccBB = SuccNode->getBlock();
+ if (useLiveIn && !LiveInBlocks->count(SuccBB))
+ return;
+
+ PHIBlocks.emplace_back(SuccBB);
+ if (!DefBlocks->count(SuccBB))
+ PQ.push(std::make_pair(
+ SuccNode, std::make_pair(SuccLevel, SuccNode->getDFSNumIn())));
+ };
+
+ for (auto Succ : ChildrenGetter.get(BB))
+ DoWork(Succ);
+
+ for (auto DomChild : *Node) {
+ if (VisitedWorklist.insert(DomChild).second)
+ Worklist.push_back(DomChild);
+ }
+ }
+ }
+}
+
+} // end of namespace llvm
+
+#endif
diff --git a/include/llvm/Support/GlobPattern.h b/include/llvm/Support/GlobPattern.h
index c9436a13c1a3..66a4cd94c12a 100644
--- a/include/llvm/Support/GlobPattern.h
+++ b/include/llvm/Support/GlobPattern.h
@@ -1,9 +1,8 @@
//===-- GlobPattern.h - glob pattern matcher implementation -*- C++ -*-----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Support/GraphWriter.h b/include/llvm/Support/GraphWriter.h
index 02d98bec16e2..466a0449e257 100644
--- a/include/llvm/Support/GraphWriter.h
+++ b/include/llvm/Support/GraphWriter.h
@@ -1,9 +1,8 @@
//===- llvm/Support/GraphWriter.h - Write graph to a .dot file --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Support/Host.h b/include/llvm/Support/Host.h
index 57c79c0b9fdf..b37cc514c92e 100644
--- a/include/llvm/Support/Host.h
+++ b/include/llvm/Support/Host.h
@@ -1,9 +1,8 @@
//===- llvm/Support/Host.h - Host machine characteristics --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Support/InitLLVM.h b/include/llvm/Support/InitLLVM.h
index 0f629c9ac92d..8069859a3e0b 100644
--- a/include/llvm/Support/InitLLVM.h
+++ b/include/llvm/Support/InitLLVM.h
@@ -1,9 +1,8 @@
//===- InitLLVM.h -----------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -20,7 +19,10 @@
// 1. Setting up a signal handler so that pretty stack trace is printed out
// if a process crashes.
//
-// 2. If running on Windows, obtain command line arguments using a
+// 2. Set up the global new-handler which is called when a memory allocation
+// attempt fails.
+//
+// 3. If running on Windows, obtain command line arguments using a
// multibyte character-aware API and convert arguments into UTF-8
// encoding, so that you can assume that command line arguments are
// always encoded in UTF-8 on any platform.
diff --git a/include/llvm/Support/ItaniumManglingCanonicalizer.h b/include/llvm/Support/ItaniumManglingCanonicalizer.h
index 34eb9f7deaaf..6920000340d4 100644
--- a/include/llvm/Support/ItaniumManglingCanonicalizer.h
+++ b/include/llvm/Support/ItaniumManglingCanonicalizer.h
@@ -1,9 +1,8 @@
//===--- ItaniumManglingCanonicalizer.h -------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Support/JSON.h b/include/llvm/Support/JSON.h
index 7a04fd52bc50..0ca41097dddd 100644
--- a/include/llvm/Support/JSON.h
+++ b/include/llvm/Support/JSON.h
@@ -1,9 +1,8 @@
//===--- JSON.h - JSON values, parsing and serialization -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===---------------------------------------------------------------------===//
///
@@ -22,6 +21,9 @@
/// - a convention and helpers for mapping between json::Value and user-defined
/// types. See fromJSON(), ObjectMapper, and the class comment on Value.
///
+/// - an output API json::OStream which can emit JSON without materializing
+/// all structures as json::Value.
+///
/// Typically, JSON data would be read from an external source, parsed into
/// a Value, and then converted into some native data structure before doing
/// real work on it. (And vice versa when writing).
@@ -37,7 +39,7 @@
///
/// - LLVM bitstream is a space- and CPU- efficient binary format. Typically it
/// encodes LLVM IR ("bitcode"), but it can be a container for other data.
-/// Low-level reader/writer libraries are in Bitcode/Bitstream*.h
+/// Low-level reader/writer libraries are in Bitstream/Bitstream*.h
///
//===---------------------------------------------------------------------===//
@@ -96,7 +98,7 @@ public:
using iterator = Storage::iterator;
using const_iterator = Storage::const_iterator;
- explicit Object() = default;
+ Object() = default;
// KV is a trivial key-value struct for list-initialization.
// (using std::pair forces extra copies).
struct KV;
@@ -157,7 +159,7 @@ public:
using iterator = std::vector<Value>::iterator;
using const_iterator = std::vector<Value>::const_iterator;
- explicit Array() = default;
+ Array() = default;
explicit Array(std::initializer_list<Value> Elements);
template <typename Collection> explicit Array(const Collection &C) {
for (const auto &V : C)
@@ -180,6 +182,7 @@ public:
bool empty() const { return V.empty(); }
size_t size() const { return V.size(); }
+ void reserve(size_t S) { V.reserve(S); }
void clear() { V.clear(); }
void push_back(const Value &E) { V.push_back(E); }
@@ -310,8 +313,8 @@ public:
create<std::string>(std::move(V));
}
Value(const llvm::SmallVectorImpl<char> &V)
- : Value(std::string(V.begin(), V.end())){};
- Value(const llvm::formatv_object_base &V) : Value(V.str()){};
+ : Value(std::string(V.begin(), V.end())) {}
+ Value(const llvm::formatv_object_base &V) : Value(V.str()) {}
// Strings: types with reference semantics. Must be valid UTF-8.
Value(StringRef V) : Type(T_StringRef) {
create<llvm::StringRef>(V);
@@ -437,11 +440,6 @@ public:
return LLVM_LIKELY(Type == T_Array) ? &as<json::Array>() : nullptr;
}
- /// Serializes this Value to JSON, writing it to the provided stream.
- /// The formatting is compact (no extra whitespace) and deterministic.
- /// For pretty-printing, use the formatv() format_provider below.
- friend llvm::raw_ostream &operator<<(llvm::raw_ostream &, const Value &);
-
private:
void destroy();
void copyFrom(const Value &M);
@@ -462,9 +460,7 @@ private:
return *static_cast<T *>(Storage);
}
- template <typename Indenter>
- void print(llvm::raw_ostream &, const Indenter &) const;
- friend struct llvm::format_provider<llvm::json::Value>;
+ friend class OStream;
enum ValueType : char {
T_Null,
@@ -481,11 +477,11 @@ private:
mutable llvm::AlignedCharArrayUnion<bool, double, int64_t, llvm::StringRef,
std::string, json::Array, json::Object>
Union;
+ friend bool operator==(const Value &, const Value &);
};
bool operator==(const Value &, const Value &);
inline bool operator!=(const Value &L, const Value &R) { return !(L == R); }
-llvm::raw_ostream &operator<<(llvm::raw_ostream &, const Value &);
/// ObjectKey is a used to capture keys in Object. Like Value but:
/// - only strings are allowed
@@ -698,6 +694,154 @@ public:
return llvm::inconvertibleErrorCode();
}
};
+
+/// json::OStream allows writing well-formed JSON without materializing
+/// all structures as json::Value ahead of time.
+/// It's faster, lower-level, and less safe than OS << json::Value.
+///
+/// Only one "top-level" object can be written to a stream.
+/// Simplest usage involves passing lambdas (Blocks) to fill in containers:
+///
+/// json::OStream J(OS);
+/// J.array([&]{
+/// for (const Event &E : Events)
+/// J.object([&] {
+/// J.attribute("timestamp", int64_t(E.Time));
+/// J.attributeArray("participants", [&] {
+/// for (const Participant &P : E.Participants)
+/// J.string(P.toString());
+/// });
+/// });
+/// });
+///
+/// This would produce JSON like:
+///
+/// [
+/// {
+/// "timestamp": 19287398741,
+/// "participants": [
+/// "King Kong",
+/// "Miley Cyrus",
+/// "Cleopatra"
+/// ]
+/// },
+/// ...
+/// ]
+///
+/// The lower level begin/end methods (arrayBegin()) are more flexible but
+/// care must be taken to pair them correctly:
+///
+/// json::OStream J(OS);
+// J.arrayBegin();
+/// for (const Event &E : Events) {
+/// J.objectBegin();
+/// J.attribute("timestamp", int64_t(E.Time));
+/// J.attributeBegin("participants");
+/// for (const Participant &P : E.Participants)
+/// J.value(P.toString());
+/// J.attributeEnd();
+/// J.objectEnd();
+/// }
+/// J.arrayEnd();
+///
+/// If the call sequence isn't valid JSON, asserts will fire in debug mode.
+/// This can be mismatched begin()/end() pairs, trying to emit attributes inside
+/// an array, and so on.
+/// With asserts disabled, this is undefined behavior.
+class OStream {
+ public:
+ using Block = llvm::function_ref<void()>;
+ // If IndentSize is nonzero, output is pretty-printed.
+ explicit OStream(llvm::raw_ostream &OS, unsigned IndentSize = 0)
+ : OS(OS), IndentSize(IndentSize) {
+ Stack.emplace_back();
+ }
+ ~OStream() {
+ assert(Stack.size() == 1 && "Unmatched begin()/end()");
+ assert(Stack.back().Ctx == Singleton);
+ assert(Stack.back().HasValue && "Did not write top-level value");
+ }
+
+ /// Flushes the underlying ostream. OStream does not buffer internally.
+ void flush() { OS.flush(); }
+
+ // High level functions to output a value.
+ // Valid at top-level (exactly once), in an attribute value (exactly once),
+ // or in an array (any number of times).
+
+ /// Emit a self-contained value (number, string, vector<string> etc).
+ void value(const Value &V);
+ /// Emit an array whose elements are emitted in the provided Block.
+ void array(Block Contents) {
+ arrayBegin();
+ Contents();
+ arrayEnd();
+ }
+ /// Emit an object whose elements are emitted in the provided Block.
+ void object(Block Contents) {
+ objectBegin();
+ Contents();
+ objectEnd();
+ }
+
+ // High level functions to output object attributes.
+ // Valid only within an object (any number of times).
+
+ /// Emit an attribute whose value is self-contained (number, vector<int> etc).
+ void attribute(llvm::StringRef Key, const Value& Contents) {
+ attributeImpl(Key, [&] { value(Contents); });
+ }
+ /// Emit an attribute whose value is an array with elements from the Block.
+ void attributeArray(llvm::StringRef Key, Block Contents) {
+ attributeImpl(Key, [&] { array(Contents); });
+ }
+ /// Emit an attribute whose value is an object with attributes from the Block.
+ void attributeObject(llvm::StringRef Key, Block Contents) {
+ attributeImpl(Key, [&] { object(Contents); });
+ }
+
+ // Low-level begin/end functions to output arrays, objects, and attributes.
+ // Must be correctly paired. Allowed contexts are as above.
+
+ void arrayBegin();
+ void arrayEnd();
+ void objectBegin();
+ void objectEnd();
+ void attributeBegin(llvm::StringRef Key);
+ void attributeEnd();
+
+ private:
+ void attributeImpl(llvm::StringRef Key, Block Contents) {
+ attributeBegin(Key);
+ Contents();
+ attributeEnd();
+ }
+
+ void valueBegin();
+ void newline();
+
+ enum Context {
+ Singleton, // Top level, or object attribute.
+ Array,
+ Object,
+ };
+ struct State {
+ Context Ctx = Singleton;
+ bool HasValue = false;
+ };
+ llvm::SmallVector<State, 16> Stack; // Never empty.
+ llvm::raw_ostream &OS;
+ unsigned IndentSize;
+ unsigned Indent = 0;
+};
+
+/// Serializes this Value to JSON, writing it to the provided stream.
+/// The formatting is compact (no extra whitespace) and deterministic.
+/// For pretty-printing, use the formatv() format_provider below.
+inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Value &V) {
+ OStream(OS).value(V);
+ return OS;
+}
} // namespace json
/// Allow printing json::Value with formatv().
diff --git a/include/llvm/Support/JamCRC.h b/include/llvm/Support/JamCRC.h
index 846d6cea9828..b6fc4e7b9b03 100644
--- a/include/llvm/Support/JamCRC.h
+++ b/include/llvm/Support/JamCRC.h
@@ -1,9 +1,8 @@
//===-- llvm/Support/JamCRC.h - Cyclic Redundancy Check ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Support/KnownBits.h b/include/llvm/Support/KnownBits.h
index 259df9546c57..07fd94e29a1f 100644
--- a/include/llvm/Support/KnownBits.h
+++ b/include/llvm/Support/KnownBits.h
@@ -1,9 +1,8 @@
//===- llvm/Support/KnownBits.h - Stores known zeros/ones -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -110,25 +109,36 @@ public:
/// Truncate the underlying known Zero and One bits. This is equivalent
/// to truncating the value we're tracking.
- KnownBits trunc(unsigned BitWidth) {
+ KnownBits trunc(unsigned BitWidth) const {
return KnownBits(Zero.trunc(BitWidth), One.trunc(BitWidth));
}
- /// Zero extends the underlying known Zero and One bits. This is equivalent
- /// to zero extending the value we're tracking.
- KnownBits zext(unsigned BitWidth) {
- return KnownBits(Zero.zext(BitWidth), One.zext(BitWidth));
+ /// Extends the underlying known Zero and One bits.
+ /// By setting ExtendedBitsAreKnownZero=true this will be equivalent to
+ /// zero extending the value we're tracking.
+ /// With ExtendedBitsAreKnownZero=false the extended bits are set to unknown.
+ KnownBits zext(unsigned BitWidth, bool ExtendedBitsAreKnownZero) const {
+ unsigned OldBitWidth = getBitWidth();
+ APInt NewZero = Zero.zext(BitWidth);
+ if (ExtendedBitsAreKnownZero)
+ NewZero.setBitsFrom(OldBitWidth);
+ return KnownBits(NewZero, One.zext(BitWidth));
}
/// Sign extends the underlying known Zero and One bits. This is equivalent
/// to sign extending the value we're tracking.
- KnownBits sext(unsigned BitWidth) {
+ KnownBits sext(unsigned BitWidth) const {
return KnownBits(Zero.sext(BitWidth), One.sext(BitWidth));
}
- /// Zero extends or truncates the underlying known Zero and One bits. This is
- /// equivalent to zero extending or truncating the value we're tracking.
- KnownBits zextOrTrunc(unsigned BitWidth) {
+ /// Extends or truncates the underlying known Zero and One bits. When
+ /// extending the extended bits can either be set as known zero (if
+ /// ExtendedBitsAreKnownZero=true) or as unknown (if
+ /// ExtendedBitsAreKnownZero=false).
+ KnownBits zextOrTrunc(unsigned BitWidth,
+ bool ExtendedBitsAreKnownZero) const {
+ if (BitWidth > getBitWidth())
+ return zext(BitWidth, ExtendedBitsAreKnownZero);
return KnownBits(Zero.zextOrTrunc(BitWidth), One.zextOrTrunc(BitWidth));
}
@@ -192,6 +202,10 @@ public:
return getBitWidth() - Zero.countPopulation();
}
+ /// Compute known bits resulting from adding LHS, RHS and a 1-bit Carry.
+ static KnownBits computeForAddCarry(
+ const KnownBits &LHS, const KnownBits &RHS, const KnownBits &Carry);
+
/// Compute known bits resulting from adding LHS and RHS.
static KnownBits computeForAddSub(bool Add, bool NSW, const KnownBits &LHS,
KnownBits RHS);
diff --git a/include/llvm/Support/LEB128.h b/include/llvm/Support/LEB128.h
index 9feb07229225..a02b83ca9597 100644
--- a/include/llvm/Support/LEB128.h
+++ b/include/llvm/Support/LEB128.h
@@ -1,9 +1,8 @@
//===- llvm/Support/LEB128.h - [SU]LEB128 utility functions -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -166,6 +165,8 @@ inline int64_t decodeSLEB128(const uint8_t *p, unsigned *n = nullptr,
int64_t Value = 0;
unsigned Shift = 0;
uint8_t Byte;
+ if (error)
+ *error = nullptr;
do {
if (end && p == end) {
if (error)
@@ -175,11 +176,11 @@ inline int64_t decodeSLEB128(const uint8_t *p, unsigned *n = nullptr,
return 0;
}
Byte = *p++;
- Value |= (int64_t(Byte & 0x7f) << Shift);
+ Value |= (uint64_t(Byte & 0x7f) << Shift);
Shift += 7;
} while (Byte >= 128);
- // Sign extend negative numbers.
- if (Byte & 0x40)
+ // Sign extend negative numbers if needed.
+ if (Shift < 64 && (Byte & 0x40))
Value |= (-1ULL) << Shift;
if (n)
*n = (unsigned)(p - orig_p);
diff --git a/include/llvm/Support/LineIterator.h b/include/llvm/Support/LineIterator.h
index 892d289976cb..c9f10ca975ae 100644
--- a/include/llvm/Support/LineIterator.h
+++ b/include/llvm/Support/LineIterator.h
@@ -1,9 +1,8 @@
//===- LineIterator.h - Iterator to read a text buffer's lines --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/Support/LockFileManager.h b/include/llvm/Support/LockFileManager.h
index 86db0b2b1020..57e4fbd84cd9 100644
--- a/include/llvm/Support/LockFileManager.h
+++ b/include/llvm/Support/LockFileManager.h
@@ -1,9 +1,8 @@
//===--- LockFileManager.h - File-level locking utility ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_SUPPORT_LOCKFILEMANAGER_H
diff --git a/include/llvm/Support/LowLevelTypeImpl.h b/include/llvm/Support/LowLevelTypeImpl.h
index 2a1075c9a48d..0e02b6e7d750 100644
--- a/include/llvm/Support/LowLevelTypeImpl.h
+++ b/include/llvm/Support/LowLevelTypeImpl.h
@@ -1,9 +1,8 @@
//== llvm/Support/LowLevelTypeImpl.h --------------------------- -*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -46,8 +45,8 @@ public:
SizeInBits, /*AddressSpace=*/0};
}
- /// Get a low-level pointer in the given address space (defaulting to 0).
- static LLT pointer(uint16_t AddressSpace, unsigned SizeInBits) {
+ /// Get a low-level pointer in the given address space.
+ static LLT pointer(unsigned AddressSpace, unsigned SizeInBits) {
assert(SizeInBits > 0 && "invalid pointer size");
return LLT{/*isPointer=*/true, /*isVector=*/false, /*NumElements=*/0,
SizeInBits, AddressSpace};
@@ -71,6 +70,14 @@ public:
ScalarTy.isPointer() ? ScalarTy.getAddressSpace() : 0};
}
+ static LLT scalarOrVector(uint16_t NumElements, LLT ScalarTy) {
+ return NumElements == 1 ? ScalarTy : LLT::vector(NumElements, ScalarTy);
+ }
+
+ static LLT scalarOrVector(uint16_t NumElements, unsigned ScalarSize) {
+ return scalarOrVector(NumElements, LLT::scalar(ScalarSize));
+ }
+
explicit LLT(bool isPointer, bool isVector, uint16_t NumElements,
unsigned SizeInBits, unsigned AddressSpace) {
init(isPointer, isVector, NumElements, SizeInBits, AddressSpace);
@@ -104,6 +111,32 @@ public:
return getScalarSizeInBits() * getNumElements();
}
+ /// Returns the total size of the type in bytes, i.e. number of whole bytes
+ /// needed to represent the size in bits. Must only be called on sized types.
+ unsigned getSizeInBytes() const {
+ return (getSizeInBits() + 7) / 8;
+ }
+
+ LLT getScalarType() const {
+ return isVector() ? getElementType() : *this;
+ }
+
+ /// If this type is a vector, return a vector with the same number of elements
+ /// but the new element type. Otherwise, return the new element type.
+ LLT changeElementType(LLT NewEltTy) const {
+ return isVector() ? LLT::vector(getNumElements(), NewEltTy) : NewEltTy;
+ }
+
+ /// If this type is a vector, return a vector with the same number of elements
+ /// but the new element size. Otherwise, return the new element type. Invalid
+ /// for pointer types. For pointer types, use changeElementType.
+ LLT changeElementSize(unsigned NewEltSize) const {
+ assert(!getScalarType().isPointer() &&
+ "invalid to directly change element size for pointers");
+ return isVector() ? LLT::vector(getNumElements(), NewEltSize)
+ : LLT::scalar(NewEltSize);
+ }
+
unsigned getScalarSizeInBits() const {
assert(RawData != 0 && "Invalid Type");
if (!IsVector) {
@@ -170,10 +203,10 @@ private:
static const constexpr BitFieldInfo ScalarSizeFieldInfo{32, 0};
/// * Pointer (isPointer == 1 && isVector == 0):
/// SizeInBits: 16;
- /// AddressSpace: 23;
+ /// AddressSpace: 24;
static const constexpr BitFieldInfo PointerSizeFieldInfo{16, 0};
static const constexpr BitFieldInfo PointerAddressSpaceFieldInfo{
- 23, PointerSizeFieldInfo[0] + PointerSizeFieldInfo[1]};
+ 24, PointerSizeFieldInfo[0] + PointerSizeFieldInfo[1]};
/// * Vector-of-non-pointer (isPointer == 0 && isVector == 1):
/// NumElements: 16;
/// SizeOfElement: 32;
@@ -183,13 +216,13 @@ private:
/// * Vector-of-pointer (isPointer == 1 && isVector == 1):
/// NumElements: 16;
/// SizeOfElement: 16;
- /// AddressSpace: 23;
+ /// AddressSpace: 24;
static const constexpr BitFieldInfo PointerVectorElementsFieldInfo{16, 0};
static const constexpr BitFieldInfo PointerVectorSizeFieldInfo{
16,
PointerVectorElementsFieldInfo[1] + PointerVectorElementsFieldInfo[0]};
static const constexpr BitFieldInfo PointerVectorAddressSpaceFieldInfo{
- 23, PointerVectorSizeFieldInfo[1] + PointerVectorSizeFieldInfo[0]};
+ 24, PointerVectorSizeFieldInfo[1] + PointerVectorSizeFieldInfo[0]};
uint64_t IsPointer : 1;
uint64_t IsVector : 1;
diff --git a/include/llvm/Support/MSVCErrorWorkarounds.h b/include/llvm/Support/MSVCErrorWorkarounds.h
index 053ecf64d1e9..30e8febae20b 100644
--- a/include/llvm/Support/MSVCErrorWorkarounds.h
+++ b/include/llvm/Support/MSVCErrorWorkarounds.h
@@ -1,9 +1,8 @@
//===--- MSVCErrorWorkarounds.h - Enable future<Error> in MSVC --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Support/MachineValueType.h b/include/llvm/Support/MachineValueType.h
index 552dea05029c..b94d2c4836cc 100644
--- a/include/llvm/Support/MachineValueType.h
+++ b/include/llvm/Support/MachineValueType.h
@@ -1,9 +1,8 @@
//===- Support/MachineValueType.h - Machine-Level types ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -87,58 +86,65 @@ namespace llvm {
v64i16 = 39, // 64 x i16
v128i16 = 40, //128 x i16
- v1i32 = 41, // 1 x i32
- v2i32 = 42, // 2 x i32
- v4i32 = 43, // 4 x i32
- v8i32 = 44, // 8 x i32
- v16i32 = 45, // 16 x i32
- v32i32 = 46, // 32 x i32
- v64i32 = 47, // 64 x i32
-
- v1i64 = 48, // 1 x i64
- v2i64 = 49, // 2 x i64
- v4i64 = 50, // 4 x i64
- v8i64 = 51, // 8 x i64
- v16i64 = 52, // 16 x i64
- v32i64 = 53, // 32 x i64
-
- v1i128 = 54, // 1 x i128
+ v1i32 = 41, // 1 x i32
+ v2i32 = 42, // 2 x i32
+ v3i32 = 43, // 3 x i32
+ v4i32 = 44, // 4 x i32
+ v5i32 = 45, // 5 x i32
+ v8i32 = 46, // 8 x i32
+ v16i32 = 47, // 16 x i32
+ v32i32 = 48, // 32 x i32
+ v64i32 = 49, // 64 x i32
+ v128i32 = 50, // 128 x i32
+ v256i32 = 51, // 256 x i32
+ v512i32 = 52, // 512 x i32
+ v1024i32 = 53, // 1024 x i32
+ v2048i32 = 54, // 2048 x i32
+
+ v1i64 = 55, // 1 x i64
+ v2i64 = 56, // 2 x i64
+ v4i64 = 57, // 4 x i64
+ v8i64 = 58, // 8 x i64
+ v16i64 = 59, // 16 x i64
+ v32i64 = 60, // 32 x i64
+
+ v1i128 = 61, // 1 x i128
// Scalable integer types
- nxv1i1 = 55, // n x 1 x i1
- nxv2i1 = 56, // n x 2 x i1
- nxv4i1 = 57, // n x 4 x i1
- nxv8i1 = 58, // n x 8 x i1
- nxv16i1 = 59, // n x 16 x i1
- nxv32i1 = 60, // n x 32 x i1
-
- nxv1i8 = 61, // n x 1 x i8
- nxv2i8 = 62, // n x 2 x i8
- nxv4i8 = 63, // n x 4 x i8
- nxv8i8 = 64, // n x 8 x i8
- nxv16i8 = 65, // n x 16 x i8
- nxv32i8 = 66, // n x 32 x i8
-
- nxv1i16 = 67, // n x 1 x i16
- nxv2i16 = 68, // n x 2 x i16
- nxv4i16 = 69, // n x 4 x i16
- nxv8i16 = 70, // n x 8 x i16
- nxv16i16 = 71, // n x 16 x i16
- nxv32i16 = 72, // n x 32 x i16
-
- nxv1i32 = 73, // n x 1 x i32
- nxv2i32 = 74, // n x 2 x i32
- nxv4i32 = 75, // n x 4 x i32
- nxv8i32 = 76, // n x 8 x i32
- nxv16i32 = 77, // n x 16 x i32
- nxv32i32 = 78, // n x 32 x i32
-
- nxv1i64 = 79, // n x 1 x i64
- nxv2i64 = 80, // n x 2 x i64
- nxv4i64 = 81, // n x 4 x i64
- nxv8i64 = 82, // n x 8 x i64
- nxv16i64 = 83, // n x 16 x i64
- nxv32i64 = 84, // n x 32 x i64
+ nxv1i1 = 62, // n x 1 x i1
+ nxv2i1 = 63, // n x 2 x i1
+ nxv4i1 = 64, // n x 4 x i1
+ nxv8i1 = 65, // n x 8 x i1
+ nxv16i1 = 66, // n x 16 x i1
+ nxv32i1 = 67, // n x 32 x i1
+
+ nxv1i8 = 68, // n x 1 x i8
+ nxv2i8 = 69, // n x 2 x i8
+ nxv4i8 = 70, // n x 4 x i8
+ nxv8i8 = 71, // n x 8 x i8
+ nxv16i8 = 72, // n x 16 x i8
+ nxv32i8 = 73, // n x 32 x i8
+
+ nxv1i16 = 74, // n x 1 x i16
+ nxv2i16 = 75, // n x 2 x i16
+ nxv4i16 = 76, // n x 4 x i16
+ nxv8i16 = 77, // n x 8 x i16
+ nxv16i16 = 78, // n x 16 x i16
+ nxv32i16 = 79, // n x 32 x i16
+
+ nxv1i32 = 80, // n x 1 x i32
+ nxv2i32 = 81, // n x 2 x i32
+ nxv4i32 = 82, // n x 4 x i32
+ nxv8i32 = 83, // n x 8 x i32
+ nxv16i32 = 84, // n x 16 x i32
+ nxv32i32 = 85, // n x 32 x i32
+
+ nxv1i64 = 86, // n x 1 x i64
+ nxv2i64 = 87, // n x 2 x i64
+ nxv4i64 = 88, // n x 4 x i64
+ nxv8i64 = 89, // n x 8 x i64
+ nxv16i64 = 90, // n x 16 x i64
+ nxv32i64 = 91, // n x 32 x i64
FIRST_INTEGER_VECTOR_VALUETYPE = v1i1,
LAST_INTEGER_VECTOR_VALUETYPE = nxv32i64,
@@ -146,31 +152,40 @@ namespace llvm {
FIRST_INTEGER_SCALABLE_VALUETYPE = nxv1i1,
LAST_INTEGER_SCALABLE_VALUETYPE = nxv32i64,
- v2f16 = 85, // 2 x f16
- v4f16 = 86, // 4 x f16
- v8f16 = 87, // 8 x f16
- v1f32 = 88, // 1 x f32
- v2f32 = 89, // 2 x f32
- v4f32 = 90, // 4 x f32
- v8f32 = 91, // 8 x f32
- v16f32 = 92, // 16 x f32
- v1f64 = 93, // 1 x f64
- v2f64 = 94, // 2 x f64
- v4f64 = 95, // 4 x f64
- v8f64 = 96, // 8 x f64
-
- nxv2f16 = 97, // n x 2 x f16
- nxv4f16 = 98, // n x 4 x f16
- nxv8f16 = 99, // n x 8 x f16
- nxv1f32 = 100, // n x 1 x f32
- nxv2f32 = 101, // n x 2 x f32
- nxv4f32 = 102, // n x 4 x f32
- nxv8f32 = 103, // n x 8 x f32
- nxv16f32 = 104, // n x 16 x f32
- nxv1f64 = 105, // n x 1 x f64
- nxv2f64 = 106, // n x 2 x f64
- nxv4f64 = 107, // n x 4 x f64
- nxv8f64 = 108, // n x 8 x f64
+ v2f16 = 92, // 2 x f16
+ v4f16 = 93, // 4 x f16
+ v8f16 = 94, // 8 x f16
+ v1f32 = 95, // 1 x f32
+ v2f32 = 96, // 2 x f32
+ v3f32 = 97, // 3 x f32
+ v4f32 = 98, // 4 x f32
+ v5f32 = 99, // 5 x f32
+ v8f32 = 100, // 8 x f32
+ v16f32 = 101, // 16 x f32
+ v32f32 = 102, // 32 x f32
+ v64f32 = 103, // 64 x f32
+ v128f32 = 104, // 128 x f32
+ v256f32 = 105, // 256 x f32
+ v512f32 = 106, // 512 x f32
+ v1024f32 = 107, // 1024 x f32
+ v2048f32 = 108, // 2048 x f32
+ v1f64 = 109, // 1 x f64
+ v2f64 = 110, // 2 x f64
+ v4f64 = 111, // 4 x f64
+ v8f64 = 112, // 8 x f64
+
+ nxv2f16 = 113, // n x 2 x f16
+ nxv4f16 = 114, // n x 4 x f16
+ nxv8f16 = 115, // n x 8 x f16
+ nxv1f32 = 116, // n x 1 x f32
+ nxv2f32 = 117, // n x 2 x f32
+ nxv4f32 = 118, // n x 4 x f32
+ nxv8f32 = 119, // n x 8 x f32
+ nxv16f32 = 120, // n x 16 x f32
+ nxv1f64 = 121, // n x 1 x f64
+ nxv2f64 = 122, // n x 2 x f64
+ nxv4f64 = 123, // n x 4 x f64
+ nxv8f64 = 124, // n x 8 x f64
FIRST_FP_VECTOR_VALUETYPE = v2f16,
LAST_FP_VECTOR_VALUETYPE = nxv8f64,
@@ -181,25 +196,25 @@ namespace llvm {
FIRST_VECTOR_VALUETYPE = v1i1,
LAST_VECTOR_VALUETYPE = nxv8f64,
- x86mmx = 109, // This is an X86 MMX value
+ x86mmx = 125, // This is an X86 MMX value
- Glue = 110, // This glues nodes together during pre-RA sched
+ Glue = 126, // This glues nodes together during pre-RA sched
- isVoid = 111, // This has no value
+ isVoid = 127, // This has no value
- Untyped = 112, // This value takes a register, but has
+ Untyped = 128, // This value takes a register, but has
// unspecified type. The register class
// will be determined by the opcode.
- ExceptRef = 113, // WebAssembly's except_ref type
+ exnref = 129, // WebAssembly's exnref type
FIRST_VALUETYPE = 1, // This is always the beginning of the list.
- LAST_VALUETYPE = 114, // This always remains at the end of the list.
+ LAST_VALUETYPE = 130, // This always remains at the end of the list.
// This is the current maximum for LAST_VALUETYPE.
// MVT::MAX_ALLOWED_VALUETYPE is used for asserts and to size bit vectors
// This value must be a multiple of 32.
- MAX_ALLOWED_VALUETYPE = 128,
+ MAX_ALLOWED_VALUETYPE = 160,
// A value of type llvm::TokenTy
token = 248,
@@ -464,11 +479,18 @@ namespace llvm {
case nxv32i16: return i16;
case v1i32:
case v2i32:
+ case v3i32:
case v4i32:
+ case v5i32:
case v8i32:
case v16i32:
case v32i32:
case v64i32:
+ case v128i32:
+ case v256i32:
+ case v512i32:
+ case v1024i32:
+ case v2048i32:
case nxv1i32:
case nxv2i32:
case nxv4i32:
@@ -496,9 +518,18 @@ namespace llvm {
case nxv8f16: return f16;
case v1f32:
case v2f32:
+ case v3f32:
case v4f32:
+ case v5f32:
case v8f32:
case v16f32:
+ case v32f32:
+ case v64f32:
+ case v128f32:
+ case v256f32:
+ case v512f32:
+ case v1024f32:
+ case v2048f32:
case nxv1f32:
case nxv2f32:
case nxv4f32:
@@ -519,21 +550,33 @@ namespace llvm {
switch (SimpleTy) {
default:
llvm_unreachable("Not a vector MVT!");
- case v1024i1: return 1024;
- case v512i1: return 512;
- case v256i8: return 256;
+ case v2048i32:
+ case v2048f32: return 2048;
+ case v1024i1:
+ case v1024i32:
+ case v1024f32: return 1024;
+ case v512i1:
+ case v512i32:
+ case v512f32: return 512;
+ case v256i8:
+ case v256i32:
+ case v256f32: return 256;
case v128i1:
case v128i8:
- case v128i16: return 128;
+ case v128i16:
+ case v128i32:
+ case v128f32: return 128;
case v64i1:
case v64i8:
case v64i16:
- case v64i32: return 64;
+ case v64i32:
+ case v64f32: return 64;
case v32i1:
case v32i8:
case v32i16:
case v32i32:
case v32i64:
+ case v32f32:
case nxv32i1:
case nxv32i8:
case nxv32i16:
@@ -567,6 +610,8 @@ namespace llvm {
case nxv8f16:
case nxv8f32:
case nxv8f64: return 8;
+ case v5i32:
+ case v5f32: return 5;
case v4i1:
case v4i8:
case v4i16:
@@ -583,6 +628,8 @@ namespace llvm {
case nxv4f16:
case nxv4f32:
case nxv4f64: return 4;
+ case v3i32:
+ case v3f32: return 3;
case v2i1:
case v2i8:
case v2i16:
@@ -693,6 +740,8 @@ namespace llvm {
case nxv2f32:
case nxv1f64: return 64;
case f80 : return 80;
+ case v3i32:
+ case v3f32: return 96;
case f128:
case ppcf128:
case i128:
@@ -712,6 +761,8 @@ namespace llvm {
case nxv8f16:
case nxv4f32:
case nxv2f64: return 128;
+ case v5i32:
+ case v5f32: return 160;
case v32i8:
case v16i16:
case v8i32:
@@ -741,14 +792,26 @@ namespace llvm {
case v64i16:
case v32i32:
case v16i64:
+ case v32f32:
case nxv32i32:
case nxv16i64: return 1024;
case v256i8:
case v128i16:
case v64i32:
case v32i64:
+ case v64f32:
case nxv32i64: return 2048;
- case ExceptRef: return 0; // opaque type
+ case v128i32:
+ case v128f32: return 4096;
+ case v256i32:
+ case v256f32: return 8192;
+ case v512i32:
+ case v512f32: return 16384;
+ case v1024i32:
+ case v1024f32: return 32768;
+ case v2048i32:
+ case v2048f32: return 65536;
+ case exnref: return 0; // opaque type
}
}
@@ -862,13 +925,20 @@ namespace llvm {
if (NumElements == 128) return MVT::v128i16;
break;
case MVT::i32:
- if (NumElements == 1) return MVT::v1i32;
- if (NumElements == 2) return MVT::v2i32;
- if (NumElements == 4) return MVT::v4i32;
- if (NumElements == 8) return MVT::v8i32;
- if (NumElements == 16) return MVT::v16i32;
- if (NumElements == 32) return MVT::v32i32;
- if (NumElements == 64) return MVT::v64i32;
+ if (NumElements == 1) return MVT::v1i32;
+ if (NumElements == 2) return MVT::v2i32;
+ if (NumElements == 3) return MVT::v3i32;
+ if (NumElements == 4) return MVT::v4i32;
+ if (NumElements == 5) return MVT::v5i32;
+ if (NumElements == 8) return MVT::v8i32;
+ if (NumElements == 16) return MVT::v16i32;
+ if (NumElements == 32) return MVT::v32i32;
+ if (NumElements == 64) return MVT::v64i32;
+ if (NumElements == 128) return MVT::v128i32;
+ if (NumElements == 256) return MVT::v256i32;
+ if (NumElements == 512) return MVT::v512i32;
+ if (NumElements == 1024) return MVT::v1024i32;
+ if (NumElements == 2048) return MVT::v2048i32;
break;
case MVT::i64:
if (NumElements == 1) return MVT::v1i64;
@@ -887,11 +957,20 @@ namespace llvm {
if (NumElements == 8) return MVT::v8f16;
break;
case MVT::f32:
- if (NumElements == 1) return MVT::v1f32;
- if (NumElements == 2) return MVT::v2f32;
- if (NumElements == 4) return MVT::v4f32;
- if (NumElements == 8) return MVT::v8f32;
- if (NumElements == 16) return MVT::v16f32;
+ if (NumElements == 1) return MVT::v1f32;
+ if (NumElements == 2) return MVT::v2f32;
+ if (NumElements == 3) return MVT::v3f32;
+ if (NumElements == 4) return MVT::v4f32;
+ if (NumElements == 5) return MVT::v5f32;
+ if (NumElements == 8) return MVT::v8f32;
+ if (NumElements == 16) return MVT::v16f32;
+ if (NumElements == 32) return MVT::v32f32;
+ if (NumElements == 64) return MVT::v64f32;
+ if (NumElements == 128) return MVT::v128f32;
+ if (NumElements == 256) return MVT::v256f32;
+ if (NumElements == 512) return MVT::v512f32;
+ if (NumElements == 1024) return MVT::v1024f32;
+ if (NumElements == 2048) return MVT::v2048f32;
break;
case MVT::f64:
if (NumElements == 1) return MVT::v1f64;
diff --git a/include/llvm/Support/ManagedStatic.h b/include/llvm/Support/ManagedStatic.h
index b4bf3210cc73..e65bb051f181 100644
--- a/include/llvm/Support/ManagedStatic.h
+++ b/include/llvm/Support/ManagedStatic.h
@@ -1,9 +1,8 @@
//===-- llvm/Support/ManagedStatic.h - Static Global wrapper ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -33,18 +32,41 @@ template <typename T, size_t N> struct object_deleter<T[N]> {
static void call(void *Ptr) { delete[](T *)Ptr; }
};
+// ManagedStatic must be initialized to zero, and it must *not* have a dynamic
+// initializer because managed statics are often created while running other
+// dynamic initializers. In standard C++11, the best way to accomplish this is
+// with a constexpr default constructor. However, different versions of the
+// Visual C++ compiler have had bugs where, even though the constructor may be
+// constexpr, a dynamic initializer may be emitted depending on optimization
+// settings. For the affected versions of MSVC, use the old linker
+// initialization pattern of not providing a constructor and leaving the fields
+// uninitialized.
+#if !defined(_MSC_VER) || defined(__clang__)
+#define LLVM_USE_CONSTEXPR_CTOR
+#endif
+
/// ManagedStaticBase - Common base class for ManagedStatic instances.
class ManagedStaticBase {
protected:
+#ifdef LLVM_USE_CONSTEXPR_CTOR
+ mutable std::atomic<void *> Ptr{};
+ mutable void (*DeleterFn)(void *) = nullptr;
+ mutable const ManagedStaticBase *Next = nullptr;
+#else
// This should only be used as a static variable, which guarantees that this
// will be zero initialized.
mutable std::atomic<void *> Ptr;
- mutable void (*DeleterFn)(void*);
+ mutable void (*DeleterFn)(void *);
mutable const ManagedStaticBase *Next;
+#endif
void RegisterManagedStatic(void *(*creator)(), void (*deleter)(void*)) const;
public:
+#ifdef LLVM_USE_CONSTEXPR_CTOR
+ constexpr ManagedStaticBase() = default;
+#endif
+
/// isConstructed - Return true if this object has not been created yet.
bool isConstructed() const { return Ptr != nullptr; }
diff --git a/include/llvm/Support/MathExtras.h b/include/llvm/Support/MathExtras.h
index b59f21b4998e..249139e824b5 100644
--- a/include/llvm/Support/MathExtras.h
+++ b/include/llvm/Support/MathExtras.h
@@ -1,9 +1,8 @@
//===-- llvm/Support/MathExtras.h - Useful math functions -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -52,14 +51,14 @@ enum ZeroBehavior {
namespace detail {
template <typename T, std::size_t SizeOfT> struct TrailingZerosCounter {
- static std::size_t count(T Val, ZeroBehavior) {
+ static unsigned count(T Val, ZeroBehavior) {
if (!Val)
return std::numeric_limits<T>::digits;
if (Val & 0x1)
return 0;
// Bisection method.
- std::size_t ZeroBits = 0;
+ unsigned ZeroBits = 0;
T Shift = std::numeric_limits<T>::digits >> 1;
T Mask = std::numeric_limits<T>::max() >> Shift;
while (Shift) {
@@ -76,7 +75,7 @@ template <typename T, std::size_t SizeOfT> struct TrailingZerosCounter {
#if __GNUC__ >= 4 || defined(_MSC_VER)
template <typename T> struct TrailingZerosCounter<T, 4> {
- static std::size_t count(T Val, ZeroBehavior ZB) {
+ static unsigned count(T Val, ZeroBehavior ZB) {
if (ZB != ZB_Undefined && Val == 0)
return 32;
@@ -92,7 +91,7 @@ template <typename T> struct TrailingZerosCounter<T, 4> {
#if !defined(_MSC_VER) || defined(_M_X64)
template <typename T> struct TrailingZerosCounter<T, 8> {
- static std::size_t count(T Val, ZeroBehavior ZB) {
+ static unsigned count(T Val, ZeroBehavior ZB) {
if (ZB != ZB_Undefined && Val == 0)
return 64;
@@ -117,7 +116,7 @@ template <typename T> struct TrailingZerosCounter<T, 8> {
/// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are
/// valid arguments.
template <typename T>
-std::size_t countTrailingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
+unsigned countTrailingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
static_assert(std::numeric_limits<T>::is_integer &&
!std::numeric_limits<T>::is_signed,
"Only unsigned integral types are allowed.");
@@ -126,12 +125,12 @@ std::size_t countTrailingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
namespace detail {
template <typename T, std::size_t SizeOfT> struct LeadingZerosCounter {
- static std::size_t count(T Val, ZeroBehavior) {
+ static unsigned count(T Val, ZeroBehavior) {
if (!Val)
return std::numeric_limits<T>::digits;
// Bisection method.
- std::size_t ZeroBits = 0;
+ unsigned ZeroBits = 0;
for (T Shift = std::numeric_limits<T>::digits >> 1; Shift; Shift >>= 1) {
T Tmp = Val >> Shift;
if (Tmp)
@@ -145,7 +144,7 @@ template <typename T, std::size_t SizeOfT> struct LeadingZerosCounter {
#if __GNUC__ >= 4 || defined(_MSC_VER)
template <typename T> struct LeadingZerosCounter<T, 4> {
- static std::size_t count(T Val, ZeroBehavior ZB) {
+ static unsigned count(T Val, ZeroBehavior ZB) {
if (ZB != ZB_Undefined && Val == 0)
return 32;
@@ -161,7 +160,7 @@ template <typename T> struct LeadingZerosCounter<T, 4> {
#if !defined(_MSC_VER) || defined(_M_X64)
template <typename T> struct LeadingZerosCounter<T, 8> {
- static std::size_t count(T Val, ZeroBehavior ZB) {
+ static unsigned count(T Val, ZeroBehavior ZB) {
if (ZB != ZB_Undefined && Val == 0)
return 64;
@@ -186,7 +185,7 @@ template <typename T> struct LeadingZerosCounter<T, 8> {
/// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are
/// valid arguments.
template <typename T>
-std::size_t countLeadingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
+unsigned countLeadingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
static_assert(std::numeric_limits<T>::is_integer &&
!std::numeric_limits<T>::is_signed,
"Only unsigned integral types are allowed.");
@@ -459,7 +458,7 @@ inline uint64_t ByteSwap_64(uint64_t Value) {
/// \param ZB the behavior on an input of all ones. Only ZB_Width and
/// ZB_Undefined are valid arguments.
template <typename T>
-std::size_t countLeadingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
+unsigned countLeadingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
static_assert(std::numeric_limits<T>::is_integer &&
!std::numeric_limits<T>::is_signed,
"Only unsigned integral types are allowed.");
@@ -475,7 +474,7 @@ std::size_t countLeadingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
/// \param ZB the behavior on an input of all ones. Only ZB_Width and
/// ZB_Undefined are valid arguments.
template <typename T>
-std::size_t countTrailingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
+unsigned countTrailingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
static_assert(std::numeric_limits<T>::is_integer &&
!std::numeric_limits<T>::is_signed,
"Only unsigned integral types are allowed.");
@@ -560,15 +559,20 @@ inline unsigned Log2_64_Ceil(uint64_t Value) {
}
/// Return the greatest common divisor of the values using Euclid's algorithm.
-inline uint64_t GreatestCommonDivisor64(uint64_t A, uint64_t B) {
+template <typename T>
+inline T greatestCommonDivisor(T A, T B) {
while (B) {
- uint64_t T = B;
+ T Tmp = B;
B = A % B;
- A = T;
+ A = Tmp;
}
return A;
}
+inline uint64_t GreatestCommonDivisor64(uint64_t A, uint64_t B) {
+ return greatestCommonDivisor<uint64_t>(A, B);
+}
+
/// This function takes a 64-bit integer and returns the bit equivalent double.
inline double BitsToDouble(uint64_t Bits) {
double D;
diff --git a/include/llvm/Support/MemAlloc.h b/include/llvm/Support/MemAlloc.h
index d06c659cfba6..0e5869141fd3 100644
--- a/include/llvm/Support/MemAlloc.h
+++ b/include/llvm/Support/MemAlloc.h
@@ -1,9 +1,8 @@
//===- MemAlloc.h - Memory allocation functions -----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -25,23 +24,41 @@ namespace llvm {
LLVM_ATTRIBUTE_RETURNS_NONNULL inline void *safe_malloc(size_t Sz) {
void *Result = std::malloc(Sz);
- if (Result == nullptr)
+ if (Result == nullptr) {
+ // It is implementation-defined whether allocation occurs if the space
+ // requested is zero (ISO/IEC 9899:2018 7.22.3). Retry, requesting
+ // non-zero, if the space requested was zero.
+ if (Sz == 0)
+ return safe_malloc(1);
report_bad_alloc_error("Allocation failed");
+ }
return Result;
}
LLVM_ATTRIBUTE_RETURNS_NONNULL inline void *safe_calloc(size_t Count,
size_t Sz) {
void *Result = std::calloc(Count, Sz);
- if (Result == nullptr)
+ if (Result == nullptr) {
+ // It is implementation-defined whether allocation occurs if the space
+ // requested is zero (ISO/IEC 9899:2018 7.22.3). Retry, requesting
+ // non-zero, if the space requested was zero.
+ if (Count == 0 || Sz == 0)
+ return safe_malloc(1);
report_bad_alloc_error("Allocation failed");
+ }
return Result;
}
LLVM_ATTRIBUTE_RETURNS_NONNULL inline void *safe_realloc(void *Ptr, size_t Sz) {
void *Result = std::realloc(Ptr, Sz);
- if (Result == nullptr)
+ if (Result == nullptr) {
+ // It is implementation-defined whether allocation occurs if the space
+ // requested is zero (ISO/IEC 9899:2018 7.22.3). Retry, requesting
+ // non-zero, if the space requested was zero.
+ if (Sz == 0)
+ return safe_malloc(1);
report_bad_alloc_error("Allocation failed");
+ }
return Result;
}
diff --git a/include/llvm/Support/Memory.h b/include/llvm/Support/Memory.h
index fa026d49a61b..6f22dd7080cd 100644
--- a/include/llvm/Support/Memory.h
+++ b/include/llvm/Support/Memory.h
@@ -1,9 +1,8 @@
//===- llvm/Support/Memory.h - Memory Support -------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -19,6 +18,10 @@
#include <system_error>
namespace llvm {
+
+// Forward declare raw_ostream: it is used for debug dumping below.
+class raw_ostream;
+
namespace sys {
/// This class encapsulates the notion of a memory block which has an address
@@ -28,14 +31,18 @@ namespace sys {
/// Memory block abstraction.
class MemoryBlock {
public:
- MemoryBlock() : Address(nullptr), Size(0) { }
- MemoryBlock(void *addr, size_t size) : Address(addr), Size(size) { }
+ MemoryBlock() : Address(nullptr), AllocatedSize(0) {}
+ MemoryBlock(void *addr, size_t allocatedSize)
+ : Address(addr), AllocatedSize(allocatedSize) {}
void *base() const { return Address; }
- size_t size() const { return Size; }
-
+ /// The size as it was allocated. This is always greater or equal to the
+ /// size that was originally requested.
+ size_t allocatedSize() const { return AllocatedSize; }
+
private:
void *Address; ///< Address of first byte of memory area
- size_t Size; ///< Size, in bytes of the memory area
+ size_t AllocatedSize; ///< Size, in bytes of the memory area
+ unsigned Flags = 0;
friend class Memory;
};
@@ -46,9 +53,11 @@ namespace sys {
class Memory {
public:
enum ProtectionFlags {
- MF_READ = 0x1000000,
+ MF_READ = 0x1000000,
MF_WRITE = 0x2000000,
- MF_EXEC = 0x4000000
+ MF_EXEC = 0x4000000,
+ MF_RWE_MASK = 0x7000000,
+ MF_HUGE_HINT = 0x0000001
};
/// This method allocates a block of memory that is suitable for loading
@@ -133,13 +142,22 @@ namespace sys {
Memory::releaseMappedMemory(M);
}
void *base() const { return M.base(); }
- size_t size() const { return M.size(); }
+ /// The size as it was allocated. This is always greater or equal to the
+ /// size that was originally requested.
+ size_t allocatedSize() const { return M.allocatedSize(); }
MemoryBlock getMemoryBlock() const { return M; }
private:
MemoryBlock M;
};
-}
-}
+#ifndef NDEBUG
+ /// Debugging output for Memory::ProtectionFlags.
+ raw_ostream &operator<<(raw_ostream &OS, const Memory::ProtectionFlags &PF);
+
+ /// Debugging output for MemoryBlock.
+ raw_ostream &operator<<(raw_ostream &OS, const MemoryBlock &MB);
+#endif // ifndef NDEBUG
+ } // end namespace sys
+ } // end namespace llvm
#endif
diff --git a/include/llvm/Support/MemoryBuffer.h b/include/llvm/Support/MemoryBuffer.h
index 8933295d4ea4..b5196cd84cb4 100644
--- a/include/llvm/Support/MemoryBuffer.h
+++ b/include/llvm/Support/MemoryBuffer.h
@@ -1,9 +1,8 @@
//===--- MemoryBuffer.h - Memory Buffer Interface ---------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -91,7 +90,7 @@ public:
/// MemoryBuffer. The slice is specified by an \p Offset and \p MapSize.
/// Since this is in the middle of a file, the buffer is not null terminated.
static ErrorOr<std::unique_ptr<MemoryBuffer>>
- getOpenFileSlice(int FD, const Twine &Filename, uint64_t MapSize,
+ getOpenFileSlice(sys::fs::file_t FD, const Twine &Filename, uint64_t MapSize,
int64_t Offset, bool IsVolatile = false);
/// Given an already-open file descriptor, read the file and return a
@@ -101,7 +100,7 @@ public:
/// can change outside the user's control, e.g. when libclang tries to parse
/// while the user is editing/updating the file or if the file is on an NFS.
static ErrorOr<std::unique_ptr<MemoryBuffer>>
- getOpenFile(int FD, const Twine &Filename, uint64_t FileSize,
+ getOpenFile(sys::fs::file_t FD, const Twine &Filename, uint64_t FileSize,
bool RequiresNullTerminator = true, bool IsVolatile = false);
/// Open the specified memory range as a MemoryBuffer. Note that InputData
@@ -265,7 +264,7 @@ class MemoryBufferRef {
public:
MemoryBufferRef() = default;
- MemoryBufferRef(MemoryBuffer& Buffer)
+ MemoryBufferRef(const MemoryBuffer& Buffer)
: Buffer(Buffer.getBuffer()), Identifier(Buffer.getBufferIdentifier()) {}
MemoryBufferRef(StringRef Buffer, StringRef Identifier)
: Buffer(Buffer), Identifier(Identifier) {}
diff --git a/include/llvm/Support/MipsABIFlags.h b/include/llvm/Support/MipsABIFlags.h
index 12c350015b21..d3233f645fb9 100644
--- a/include/llvm/Support/MipsABIFlags.h
+++ b/include/llvm/Support/MipsABIFlags.h
@@ -1,9 +1,8 @@
//===--- MipsABIFlags.h - MIPS ABI flags ----------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Support/Mutex.h b/include/llvm/Support/Mutex.h
index 680d94b24ef5..c3abfc7a7806 100644
--- a/include/llvm/Support/Mutex.h
+++ b/include/llvm/Support/Mutex.h
@@ -1,9 +1,8 @@
//===- llvm/Support/Mutex.h - Mutex Operating System Concept -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Support/MutexGuard.h b/include/llvm/Support/MutexGuard.h
index 641d64d94988..d86ced145816 100644
--- a/include/llvm/Support/MutexGuard.h
+++ b/include/llvm/Support/MutexGuard.h
@@ -1,9 +1,8 @@
//===-- Support/MutexGuard.h - Acquire/Release Mutex In Scope ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Support/NativeFormatting.h b/include/llvm/Support/NativeFormatting.h
index 6d1dd7b422fe..825a44c77c00 100644
--- a/include/llvm/Support/NativeFormatting.h
+++ b/include/llvm/Support/NativeFormatting.h
@@ -1,9 +1,8 @@
//===- NativeFormatting.h - Low level formatting helpers ---------*- C++-*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/Support/OnDiskHashTable.h b/include/llvm/Support/OnDiskHashTable.h
index 912e2700d1a0..d84da92aab9b 100644
--- a/include/llvm/Support/OnDiskHashTable.h
+++ b/include/llvm/Support/OnDiskHashTable.h
@@ -1,9 +1,8 @@
//===--- OnDiskHashTable.h - On-Disk Hash Table Implementation --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/include/llvm/Support/Options.h b/include/llvm/Support/Options.h
index dd321c6a1984..d02ef85a75bf 100644
--- a/include/llvm/Support/Options.h
+++ b/include/llvm/Support/Options.h
@@ -1,9 +1,8 @@
//===- llvm/Support/Options.h - Debug options support -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/include/llvm/Support/Parallel.h b/include/llvm/Support/Parallel.h
index 1462265343be..eab9b492c4a5 100644
--- a/include/llvm/Support/Parallel.h
+++ b/include/llvm/Support/Parallel.h
@@ -1,9 +1,8 @@
//===- llvm/Support/Parallel.h - Parallel algorithms ----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -74,8 +73,12 @@ public:
class TaskGroup {
Latch L;
+ bool Parallel;
public:
+ TaskGroup();
+ ~TaskGroup();
+
void spawn(std::function<void()> f);
void sync() const { L.sync(); }
diff --git a/include/llvm/Support/Path.h b/include/llvm/Support/Path.h
index 76de887b7cb4..5c0bee58f188 100644
--- a/include/llvm/Support/Path.h
+++ b/include/llvm/Support/Path.h
@@ -1,9 +1,8 @@
//===- llvm/Support/Path.h - Path Operating System Concept ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Support/PluginLoader.h b/include/llvm/Support/PluginLoader.h
index bdbb134b28eb..c0c516bdae03 100644
--- a/include/llvm/Support/PluginLoader.h
+++ b/include/llvm/Support/PluginLoader.h
@@ -1,9 +1,8 @@
//===-- llvm/Support/PluginLoader.h - Plugin Loader for Tools ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Support/PointerLikeTypeTraits.h b/include/llvm/Support/PointerLikeTypeTraits.h
index 1710b57131d1..1e7e5b53ca65 100644
--- a/include/llvm/Support/PointerLikeTypeTraits.h
+++ b/include/llvm/Support/PointerLikeTypeTraits.h
@@ -1,9 +1,8 @@
//===- llvm/Support/PointerLikeTypeTraits.h - Pointer Traits ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Support/PrettyStackTrace.h b/include/llvm/Support/PrettyStackTrace.h
index 4d64fe4ef727..6eb070b2297e 100644
--- a/include/llvm/Support/PrettyStackTrace.h
+++ b/include/llvm/Support/PrettyStackTrace.h
@@ -1,9 +1,8 @@
//===- llvm/Support/PrettyStackTrace.h - Pretty Crash Handling --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -22,8 +21,22 @@
namespace llvm {
class raw_ostream;
+ /// Enables dumping a "pretty" stack trace when the program crashes.
+ ///
+ /// \see PrettyStackTraceEntry
void EnablePrettyStackTrace();
+ /// Enables (or disables) dumping a "pretty" stack trace when the user sends
+ /// SIGINFO or SIGUSR1 to the current process.
+ ///
+ /// This is a per-thread decision so that a program can choose to print stack
+ /// traces only on a primary thread, or on all threads that use
+ /// PrettyStackTraceEntry.
+ ///
+ /// \see EnablePrettyStackTrace
+ /// \see PrettyStackTraceEntry
+ void EnablePrettyStackTraceOnSigInfoForThisThread(bool ShouldEnable = true);
+
/// PrettyStackTraceEntry - This class is used to represent a frame of the
/// "pretty" stack trace that is dumped when a program crashes. You can define
/// subclasses of this and declare them on the program stack: when they are
diff --git a/include/llvm/Support/Printable.h b/include/llvm/Support/Printable.h
index cb55d41316e3..0f8670d0419c 100644
--- a/include/llvm/Support/Printable.h
+++ b/include/llvm/Support/Printable.h
@@ -1,9 +1,8 @@
//===--- Printable.h - Print function helpers -------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Support/Process.h b/include/llvm/Support/Process.h
index f9f1cac86278..67e37912519b 100644
--- a/include/llvm/Support/Process.h
+++ b/include/llvm/Support/Process.h
@@ -1,9 +1,8 @@
//===- llvm/Support/Process.h -----------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -29,6 +28,7 @@
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Chrono.h"
#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/Error.h"
#include <system_error>
namespace llvm {
@@ -42,7 +42,25 @@ namespace sys {
/// current executing process.
class Process {
public:
- static unsigned getPageSize();
+ /// Get the process's page size.
+ /// This may fail if the underlying syscall returns an error. In most cases,
+ /// page size information is used for optimization, and this error can be
+ /// safely discarded by calling consumeError, and an estimated page size
+ /// substituted instead.
+ static Expected<unsigned> getPageSize();
+
+ /// Get the process's estimated page size.
+ /// This function always succeeds, but if the underlying syscall to determine
+ /// the page size fails then this will silently return an estimated page size.
+ /// The estimated page size is guaranteed to be a power of 2.
+ static unsigned getPageSizeEstimate() {
+ if (auto PageSize = getPageSize())
+ return *PageSize;
+ else {
+ consumeError(PageSize.takeError());
+ return 4096;
+ }
+ }
/// Return process memory usage.
/// This static function will return the total amount of memory allocated
diff --git a/include/llvm/Support/Program.h b/include/llvm/Support/Program.h
index 1f4dbdce3323..6b2315c5da8d 100644
--- a/include/llvm/Support/Program.h
+++ b/include/llvm/Support/Program.h
@@ -1,9 +1,8 @@
//===- llvm/Support/Program.h ------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Support/RWMutex.h b/include/llvm/Support/RWMutex.h
index 5ac3e558999b..9cd57cbd65a1 100644
--- a/include/llvm/Support/RWMutex.h
+++ b/include/llvm/Support/RWMutex.h
@@ -1,9 +1,8 @@
//===- RWMutex.h - Reader/Writer Mutual Exclusion Lock ----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Support/RandomNumberGenerator.h b/include/llvm/Support/RandomNumberGenerator.h
index 1399dab815f8..55d6876cc5e4 100644
--- a/include/llvm/Support/RandomNumberGenerator.h
+++ b/include/llvm/Support/RandomNumberGenerator.h
@@ -1,9 +1,8 @@
//==- llvm/Support/RandomNumberGenerator.h - RNG for diversity ---*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Support/Recycler.h b/include/llvm/Support/Recycler.h
index 53db2e86d12d..bbd9ae321ae3 100644
--- a/include/llvm/Support/Recycler.h
+++ b/include/llvm/Support/Recycler.h
@@ -1,9 +1,8 @@
//==- llvm/Support/Recycler.h - Recycling Allocator --------------*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Support/RecyclingAllocator.h b/include/llvm/Support/RecyclingAllocator.h
index 32b033b17946..2c29dacfe212 100644
--- a/include/llvm/Support/RecyclingAllocator.h
+++ b/include/llvm/Support/RecyclingAllocator.h
@@ -1,9 +1,8 @@
//==- llvm/Support/RecyclingAllocator.h - Recycling Allocator ----*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Support/Regex.h b/include/llvm/Support/Regex.h
index d901eb1e3ffb..2d19b10fd890 100644
--- a/include/llvm/Support/Regex.h
+++ b/include/llvm/Support/Regex.h
@@ -1,9 +1,8 @@
//===-- Regex.h - Regular Expression matcher implementation -*- C++ -*-----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Support/Registry.h b/include/llvm/Support/Registry.h
index 02fd5b9354a1..4d8aa5f1470d 100644
--- a/include/llvm/Support/Registry.h
+++ b/include/llvm/Support/Registry.h
@@ -1,9 +1,8 @@
//=== Registry.h - Linker-supported plugin registries -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -81,17 +80,17 @@ namespace llvm {
/// Iterators for registry entries.
///
- class iterator {
+ class iterator
+ : public llvm::iterator_facade_base<iterator, std::forward_iterator_tag,
+ const entry> {
const node *Cur;
public:
explicit iterator(const node *N) : Cur(N) {}
bool operator==(const iterator &That) const { return Cur == That.Cur; }
- bool operator!=(const iterator &That) const { return Cur != That.Cur; }
iterator &operator++() { Cur = Cur->Next; return *this; }
const entry &operator*() const { return Cur->Val; }
- const entry *operator->() const { return &Cur->Val; }
};
// begin is not defined here in order to avoid usage of an undefined static
diff --git a/include/llvm/Support/SHA1.h b/include/llvm/Support/SHA1.h
index 1fc60a878f94..87fe94bbd5cd 100644
--- a/include/llvm/Support/SHA1.h
+++ b/include/llvm/Support/SHA1.h
@@ -1,9 +1,8 @@
//==- SHA1.h - SHA1 implementation for LLVM --*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// This code is taken from public domain
diff --git a/include/llvm/Support/SMLoc.h b/include/llvm/Support/SMLoc.h
index c74feff378d6..d8607034ee86 100644
--- a/include/llvm/Support/SMLoc.h
+++ b/include/llvm/Support/SMLoc.h
@@ -1,9 +1,8 @@
//===- SMLoc.h - Source location for use with diagnostics -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Support/SMTAPI.h b/include/llvm/Support/SMTAPI.h
new file mode 100644
index 000000000000..24dcd124593e
--- /dev/null
+++ b/include/llvm/Support/SMTAPI.h
@@ -0,0 +1,447 @@
+//===- SMTAPI.h -------------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a SMT generic Solver API, which will be the base class
+// for every SMT solver specific class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_SMTAPI_H
+#define LLVM_SUPPORT_SMTAPI_H
+
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/Support/raw_ostream.h"
+#include <memory>
+
+namespace llvm {
+
+/// Generic base class for SMT sorts
+class SMTSort {
+public:
+ SMTSort() = default;
+ virtual ~SMTSort() = default;
+
+ /// Returns true if the sort is a bitvector, calls isBitvectorSortImpl().
+ virtual bool isBitvectorSort() const { return isBitvectorSortImpl(); }
+
+ /// Returns true if the sort is a floating-point, calls isFloatSortImpl().
+ virtual bool isFloatSort() const { return isFloatSortImpl(); }
+
+ /// Returns true if the sort is a boolean, calls isBooleanSortImpl().
+ virtual bool isBooleanSort() const { return isBooleanSortImpl(); }
+
+ /// Returns the bitvector size, fails if the sort is not a bitvector
+ /// Calls getBitvectorSortSizeImpl().
+ virtual unsigned getBitvectorSortSize() const {
+ assert(isBitvectorSort() && "Not a bitvector sort!");
+ unsigned Size = getBitvectorSortSizeImpl();
+ assert(Size && "Size is zero!");
+ return Size;
+ };
+
+ /// Returns the floating-point size, fails if the sort is not a floating-point
+ /// Calls getFloatSortSizeImpl().
+ virtual unsigned getFloatSortSize() const {
+ assert(isFloatSort() && "Not a floating-point sort!");
+ unsigned Size = getFloatSortSizeImpl();
+ assert(Size && "Size is zero!");
+ return Size;
+ };
+
+ virtual void Profile(llvm::FoldingSetNodeID &ID) const = 0;
+
+ bool operator<(const SMTSort &Other) const {
+ llvm::FoldingSetNodeID ID1, ID2;
+ Profile(ID1);
+ Other.Profile(ID2);
+ return ID1 < ID2;
+ }
+
+ friend bool operator==(SMTSort const &LHS, SMTSort const &RHS) {
+ return LHS.equal_to(RHS);
+ }
+
+ virtual void print(raw_ostream &OS) const = 0;
+
+ LLVM_DUMP_METHOD void dump() const;
+
+protected:
+ /// Query the SMT solver and returns true if two sorts are equal (same kind
+ /// and bit width). This does not check if the two sorts are the same objects.
+ virtual bool equal_to(SMTSort const &other) const = 0;
+
+ /// Query the SMT solver and checks if a sort is bitvector.
+ virtual bool isBitvectorSortImpl() const = 0;
+
+ /// Query the SMT solver and checks if a sort is floating-point.
+ virtual bool isFloatSortImpl() const = 0;
+
+ /// Query the SMT solver and checks if a sort is boolean.
+ virtual bool isBooleanSortImpl() const = 0;
+
+ /// Query the SMT solver and returns the sort bit width.
+ virtual unsigned getBitvectorSortSizeImpl() const = 0;
+
+ /// Query the SMT solver and returns the sort bit width.
+ virtual unsigned getFloatSortSizeImpl() const = 0;
+};
+
+/// Shared pointer for SMTSorts, used by SMTSolver API.
+using SMTSortRef = const SMTSort *;
+
+/// Generic base class for SMT exprs
+class SMTExpr {
+public:
+ SMTExpr() = default;
+ virtual ~SMTExpr() = default;
+
+ bool operator<(const SMTExpr &Other) const {
+ llvm::FoldingSetNodeID ID1, ID2;
+ Profile(ID1);
+ Other.Profile(ID2);
+ return ID1 < ID2;
+ }
+
+ virtual void Profile(llvm::FoldingSetNodeID &ID) const = 0;
+
+ friend bool operator==(SMTExpr const &LHS, SMTExpr const &RHS) {
+ return LHS.equal_to(RHS);
+ }
+
+ virtual void print(raw_ostream &OS) const = 0;
+
+ LLVM_DUMP_METHOD void dump() const;
+
+protected:
+ /// Query the SMT solver and returns true if two sorts are equal (same kind
+ /// and bit width). This does not check if the two sorts are the same objects.
+ virtual bool equal_to(SMTExpr const &other) const = 0;
+};
+
+/// Shared pointer for SMTExprs, used by SMTSolver API.
+using SMTExprRef = const SMTExpr *;
+
+/// Generic base class for SMT Solvers
+///
+/// This class is responsible for wrapping all sorts and expression generation,
+/// through the mk* methods. It also provides methods to create SMT expressions
+/// straight from clang's AST, through the from* methods.
+class SMTSolver {
+public:
+ SMTSolver() = default;
+ virtual ~SMTSolver() = default;
+
+ LLVM_DUMP_METHOD void dump() const;
+
+ // Returns an appropriate floating-point sort for the given bitwidth.
+ SMTSortRef getFloatSort(unsigned BitWidth) {
+ switch (BitWidth) {
+ case 16:
+ return getFloat16Sort();
+ case 32:
+ return getFloat32Sort();
+ case 64:
+ return getFloat64Sort();
+ case 128:
+ return getFloat128Sort();
+ default:;
+ }
+ llvm_unreachable("Unsupported floating-point bitwidth!");
+ }
+
+ // Returns a boolean sort.
+ virtual SMTSortRef getBoolSort() = 0;
+
+ // Returns an appropriate bitvector sort for the given bitwidth.
+ virtual SMTSortRef getBitvectorSort(const unsigned BitWidth) = 0;
+
+ // Returns a floating-point sort of width 16
+ virtual SMTSortRef getFloat16Sort() = 0;
+
+ // Returns a floating-point sort of width 32
+ virtual SMTSortRef getFloat32Sort() = 0;
+
+ // Returns a floating-point sort of width 64
+ virtual SMTSortRef getFloat64Sort() = 0;
+
+ // Returns a floating-point sort of width 128
+ virtual SMTSortRef getFloat128Sort() = 0;
+
+ // Returns an appropriate sort for the given AST.
+ virtual SMTSortRef getSort(const SMTExprRef &AST) = 0;
+
+ /// Given a constraint, adds it to the solver
+ virtual void addConstraint(const SMTExprRef &Exp) const = 0;
+
+ /// Creates a bitvector addition operation
+ virtual SMTExprRef mkBVAdd(const SMTExprRef &LHS, const SMTExprRef &RHS) = 0;
+
+ /// Creates a bitvector subtraction operation
+ virtual SMTExprRef mkBVSub(const SMTExprRef &LHS, const SMTExprRef &RHS) = 0;
+
+ /// Creates a bitvector multiplication operation
+ virtual SMTExprRef mkBVMul(const SMTExprRef &LHS, const SMTExprRef &RHS) = 0;
+
+ /// Creates a bitvector signed modulus operation
+ virtual SMTExprRef mkBVSRem(const SMTExprRef &LHS, const SMTExprRef &RHS) = 0;
+
+ /// Creates a bitvector unsigned modulus operation
+ virtual SMTExprRef mkBVURem(const SMTExprRef &LHS, const SMTExprRef &RHS) = 0;
+
+ /// Creates a bitvector signed division operation
+ virtual SMTExprRef mkBVSDiv(const SMTExprRef &LHS, const SMTExprRef &RHS) = 0;
+
+ /// Creates a bitvector unsigned division operation
+ virtual SMTExprRef mkBVUDiv(const SMTExprRef &LHS, const SMTExprRef &RHS) = 0;
+
+ /// Creates a bitvector logical shift left operation
+ virtual SMTExprRef mkBVShl(const SMTExprRef &LHS, const SMTExprRef &RHS) = 0;
+
+ /// Creates a bitvector arithmetic shift right operation
+ virtual SMTExprRef mkBVAshr(const SMTExprRef &LHS, const SMTExprRef &RHS) = 0;
+
+ /// Creates a bitvector logical shift right operation
+ virtual SMTExprRef mkBVLshr(const SMTExprRef &LHS, const SMTExprRef &RHS) = 0;
+
+ /// Creates a bitvector negation operation
+ virtual SMTExprRef mkBVNeg(const SMTExprRef &Exp) = 0;
+
+ /// Creates a bitvector not operation
+ virtual SMTExprRef mkBVNot(const SMTExprRef &Exp) = 0;
+
+ /// Creates a bitvector xor operation
+ virtual SMTExprRef mkBVXor(const SMTExprRef &LHS, const SMTExprRef &RHS) = 0;
+
+ /// Creates a bitvector or operation
+ virtual SMTExprRef mkBVOr(const SMTExprRef &LHS, const SMTExprRef &RHS) = 0;
+
+ /// Creates a bitvector and operation
+ virtual SMTExprRef mkBVAnd(const SMTExprRef &LHS, const SMTExprRef &RHS) = 0;
+
+ /// Creates a bitvector unsigned less-than operation
+ virtual SMTExprRef mkBVUlt(const SMTExprRef &LHS, const SMTExprRef &RHS) = 0;
+
+ /// Creates a bitvector signed less-than operation
+ virtual SMTExprRef mkBVSlt(const SMTExprRef &LHS, const SMTExprRef &RHS) = 0;
+
+ /// Creates a bitvector unsigned greater-than operation
+ virtual SMTExprRef mkBVUgt(const SMTExprRef &LHS, const SMTExprRef &RHS) = 0;
+
+ /// Creates a bitvector signed greater-than operation
+ virtual SMTExprRef mkBVSgt(const SMTExprRef &LHS, const SMTExprRef &RHS) = 0;
+
+ /// Creates a bitvector unsigned less-equal-than operation
+ virtual SMTExprRef mkBVUle(const SMTExprRef &LHS, const SMTExprRef &RHS) = 0;
+
+ /// Creates a bitvector signed less-equal-than operation
+ virtual SMTExprRef mkBVSle(const SMTExprRef &LHS, const SMTExprRef &RHS) = 0;
+
+ /// Creates a bitvector unsigned greater-equal-than operation
+ virtual SMTExprRef mkBVUge(const SMTExprRef &LHS, const SMTExprRef &RHS) = 0;
+
+ /// Creates a bitvector signed greater-equal-than operation
+ virtual SMTExprRef mkBVSge(const SMTExprRef &LHS, const SMTExprRef &RHS) = 0;
+
+ /// Creates a boolean not operation
+ virtual SMTExprRef mkNot(const SMTExprRef &Exp) = 0;
+
+ /// Creates a boolean equality operation
+ virtual SMTExprRef mkEqual(const SMTExprRef &LHS, const SMTExprRef &RHS) = 0;
+
+ /// Creates a boolean and operation
+ virtual SMTExprRef mkAnd(const SMTExprRef &LHS, const SMTExprRef &RHS) = 0;
+
+ /// Creates a boolean or operation
+ virtual SMTExprRef mkOr(const SMTExprRef &LHS, const SMTExprRef &RHS) = 0;
+
+ /// Creates a boolean ite operation
+ virtual SMTExprRef mkIte(const SMTExprRef &Cond, const SMTExprRef &T,
+ const SMTExprRef &F) = 0;
+
+ /// Creates a bitvector sign extension operation
+ virtual SMTExprRef mkBVSignExt(unsigned i, const SMTExprRef &Exp) = 0;
+
+ /// Creates a bitvector zero extension operation
+ virtual SMTExprRef mkBVZeroExt(unsigned i, const SMTExprRef &Exp) = 0;
+
+ /// Creates a bitvector extract operation
+ virtual SMTExprRef mkBVExtract(unsigned High, unsigned Low,
+ const SMTExprRef &Exp) = 0;
+
+ /// Creates a bitvector concat operation
+ virtual SMTExprRef mkBVConcat(const SMTExprRef &LHS,
+ const SMTExprRef &RHS) = 0;
+
+ /// Creates a predicate that checks for overflow in a bitvector addition
+ /// operation
+ virtual SMTExprRef mkBVAddNoOverflow(const SMTExprRef &LHS,
+ const SMTExprRef &RHS,
+ bool isSigned) = 0;
+
+ /// Creates a predicate that checks for underflow in a signed bitvector
+ /// addition operation
+ virtual SMTExprRef mkBVAddNoUnderflow(const SMTExprRef &LHS,
+ const SMTExprRef &RHS) = 0;
+
+ /// Creates a predicate that checks for overflow in a signed bitvector
+ /// subtraction operation
+ virtual SMTExprRef mkBVSubNoOverflow(const SMTExprRef &LHS,
+ const SMTExprRef &RHS) = 0;
+
+ /// Creates a predicate that checks for underflow in a bitvector subtraction
+ /// operation
+ virtual SMTExprRef mkBVSubNoUnderflow(const SMTExprRef &LHS,
+ const SMTExprRef &RHS,
+ bool isSigned) = 0;
+
+ /// Creates a predicate that checks for overflow in a signed bitvector
+ /// division/modulus operation
+ virtual SMTExprRef mkBVSDivNoOverflow(const SMTExprRef &LHS,
+ const SMTExprRef &RHS) = 0;
+
+ /// Creates a predicate that checks for overflow in a bitvector negation
+ /// operation
+ virtual SMTExprRef mkBVNegNoOverflow(const SMTExprRef &Exp) = 0;
+
+ /// Creates a predicate that checks for overflow in a bitvector multiplication
+ /// operation
+ virtual SMTExprRef mkBVMulNoOverflow(const SMTExprRef &LHS,
+ const SMTExprRef &RHS,
+ bool isSigned) = 0;
+
+ /// Creates a predicate that checks for underflow in a signed bitvector
+ /// multiplication operation
+ virtual SMTExprRef mkBVMulNoUnderflow(const SMTExprRef &LHS,
+ const SMTExprRef &RHS) = 0;
+
+ /// Creates a floating-point negation operation
+ virtual SMTExprRef mkFPNeg(const SMTExprRef &Exp) = 0;
+
+ /// Creates a floating-point isInfinite operation
+ virtual SMTExprRef mkFPIsInfinite(const SMTExprRef &Exp) = 0;
+
+ /// Creates a floating-point isNaN operation
+ virtual SMTExprRef mkFPIsNaN(const SMTExprRef &Exp) = 0;
+
+ /// Creates a floating-point isNormal operation
+ virtual SMTExprRef mkFPIsNormal(const SMTExprRef &Exp) = 0;
+
+ /// Creates a floating-point isZero operation
+ virtual SMTExprRef mkFPIsZero(const SMTExprRef &Exp) = 0;
+
+ /// Creates a floating-point multiplication operation
+ virtual SMTExprRef mkFPMul(const SMTExprRef &LHS, const SMTExprRef &RHS) = 0;
+
+ /// Creates a floating-point division operation
+ virtual SMTExprRef mkFPDiv(const SMTExprRef &LHS, const SMTExprRef &RHS) = 0;
+
+ /// Creates a floating-point remainder operation
+ virtual SMTExprRef mkFPRem(const SMTExprRef &LHS, const SMTExprRef &RHS) = 0;
+
+ /// Creates a floating-point addition operation
+ virtual SMTExprRef mkFPAdd(const SMTExprRef &LHS, const SMTExprRef &RHS) = 0;
+
+ /// Creates a floating-point subtraction operation
+ virtual SMTExprRef mkFPSub(const SMTExprRef &LHS, const SMTExprRef &RHS) = 0;
+
+ /// Creates a floating-point less-than operation
+ virtual SMTExprRef mkFPLt(const SMTExprRef &LHS, const SMTExprRef &RHS) = 0;
+
+ /// Creates a floating-point greater-than operation
+ virtual SMTExprRef mkFPGt(const SMTExprRef &LHS, const SMTExprRef &RHS) = 0;
+
+ /// Creates a floating-point less-than-or-equal operation
+ virtual SMTExprRef mkFPLe(const SMTExprRef &LHS, const SMTExprRef &RHS) = 0;
+
+ /// Creates a floating-point greater-than-or-equal operation
+ virtual SMTExprRef mkFPGe(const SMTExprRef &LHS, const SMTExprRef &RHS) = 0;
+
+ /// Creates a floating-point equality operation
+ virtual SMTExprRef mkFPEqual(const SMTExprRef &LHS,
+ const SMTExprRef &RHS) = 0;
+
+ /// Creates a floating-point conversion from floatint-point to floating-point
+ /// operation
+ virtual SMTExprRef mkFPtoFP(const SMTExprRef &From, const SMTSortRef &To) = 0;
+
+ /// Creates a floating-point conversion from signed bitvector to
+ /// floatint-point operation
+ virtual SMTExprRef mkSBVtoFP(const SMTExprRef &From,
+ const SMTSortRef &To) = 0;
+
+ /// Creates a floating-point conversion from unsigned bitvector to
+ /// floatint-point operation
+ virtual SMTExprRef mkUBVtoFP(const SMTExprRef &From,
+ const SMTSortRef &To) = 0;
+
+ /// Creates a floating-point conversion from floatint-point to signed
+ /// bitvector operation
+ virtual SMTExprRef mkFPtoSBV(const SMTExprRef &From, unsigned ToWidth) = 0;
+
+ /// Creates a floating-point conversion from floatint-point to unsigned
+ /// bitvector operation
+ virtual SMTExprRef mkFPtoUBV(const SMTExprRef &From, unsigned ToWidth) = 0;
+
+ /// Creates a new symbol, given a name and a sort
+ virtual SMTExprRef mkSymbol(const char *Name, SMTSortRef Sort) = 0;
+
+ // Returns an appropriate floating-point rounding mode.
+ virtual SMTExprRef getFloatRoundingMode() = 0;
+
+ // If the a model is available, returns the value of a given bitvector symbol
+ virtual llvm::APSInt getBitvector(const SMTExprRef &Exp, unsigned BitWidth,
+ bool isUnsigned) = 0;
+
+ // If the a model is available, returns the value of a given boolean symbol
+ virtual bool getBoolean(const SMTExprRef &Exp) = 0;
+
+ /// Constructs an SMTExprRef from a boolean.
+ virtual SMTExprRef mkBoolean(const bool b) = 0;
+
+ /// Constructs an SMTExprRef from a finite APFloat.
+ virtual SMTExprRef mkFloat(const llvm::APFloat Float) = 0;
+
+ /// Constructs an SMTExprRef from an APSInt and its bit width
+ virtual SMTExprRef mkBitvector(const llvm::APSInt Int, unsigned BitWidth) = 0;
+
+ /// Given an expression, extract the value of this operand in the model.
+ virtual bool getInterpretation(const SMTExprRef &Exp, llvm::APSInt &Int) = 0;
+
+ /// Given an expression extract the value of this operand in the model.
+ virtual bool getInterpretation(const SMTExprRef &Exp,
+ llvm::APFloat &Float) = 0;
+
+ /// Check if the constraints are satisfiable
+ virtual Optional<bool> check() const = 0;
+
+ /// Push the current solver state
+ virtual void push() = 0;
+
+ /// Pop the previous solver state
+ virtual void pop(unsigned NumStates = 1) = 0;
+
+ /// Reset the solver and remove all constraints.
+ virtual void reset() = 0;
+
+ /// Checks if the solver supports floating-points.
+ virtual bool isFPSupported() = 0;
+
+ virtual void print(raw_ostream &OS) const = 0;
+};
+
+/// Shared pointer for SMTSolvers.
+using SMTSolverRef = std::shared_ptr<SMTSolver>;
+
+/// Convenience method to create and Z3Solver object
+SMTSolverRef CreateZ3Solver();
+
+} // namespace llvm
+
+#endif
diff --git a/include/llvm/Support/SaveAndRestore.h b/include/llvm/Support/SaveAndRestore.h
index 8e11789907ad..3c0333b7119a 100644
--- a/include/llvm/Support/SaveAndRestore.h
+++ b/include/llvm/Support/SaveAndRestore.h
@@ -1,9 +1,8 @@
//===-- SaveAndRestore.h - Utility -------------------------------*- C++ -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/include/llvm/Support/ScalableSize.h b/include/llvm/Support/ScalableSize.h
new file mode 100644
index 000000000000..96bf043773a0
--- /dev/null
+++ b/include/llvm/Support/ScalableSize.h
@@ -0,0 +1,43 @@
+//===- ScalableSize.h - Scalable vector size info ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides a struct that can be used to query the size of IR types
+// which may be scalable vectors. It provides convenience operators so that
+// it can be used in much the same way as a single scalar value.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_SCALABLESIZE_H
+#define LLVM_SUPPORT_SCALABLESIZE_H
+
+namespace llvm {
+
+class ElementCount {
+public:
+ unsigned Min; // Minimum number of vector elements.
+ bool Scalable; // If true, NumElements is a multiple of 'Min' determined
+ // at runtime rather than compile time.
+
+ ElementCount(unsigned Min, bool Scalable)
+ : Min(Min), Scalable(Scalable) {}
+
+ ElementCount operator*(unsigned RHS) {
+ return { Min * RHS, Scalable };
+ }
+ ElementCount operator/(unsigned RHS) {
+ return { Min / RHS, Scalable };
+ }
+
+ bool operator==(const ElementCount& RHS) const {
+ return Min == RHS.Min && Scalable == RHS.Scalable;
+ }
+};
+
+} // end namespace llvm
+
+#endif // LLVM_SUPPORT_SCALABLESIZE_H
diff --git a/include/llvm/Support/ScaledNumber.h b/include/llvm/Support/ScaledNumber.h
index 3bd3ccedc42c..552da34f357b 100644
--- a/include/llvm/Support/ScaledNumber.h
+++ b/include/llvm/Support/ScaledNumber.h
@@ -1,9 +1,8 @@
//===- llvm/Support/ScaledNumber.h - Support for scaled numbers -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -887,10 +886,6 @@ template <class DigitsT> void ScaledNumber<DigitsT>::shiftRight(int32_t Shift) {
Digits >>= Shift;
}
-template <typename T> struct isPodLike;
-template <typename T> struct isPodLike<ScaledNumber<T>> {
- static const bool value = true;
-};
} // end namespace llvm
diff --git a/include/llvm/Support/ScopedPrinter.h b/include/llvm/Support/ScopedPrinter.h
index 34c1a287ee10..88daedc8713b 100644
--- a/include/llvm/Support/ScopedPrinter.h
+++ b/include/llvm/Support/ScopedPrinter.h
@@ -1,9 +1,8 @@
-//===-- ScopedPrinter.h ---------------------------------------------------===//
+//===-- ScopedPrinter.h ----------------------------------------*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/Support/Signals.h b/include/llvm/Support/Signals.h
index f25a04969904..a6b215a24311 100644
--- a/include/llvm/Support/Signals.h
+++ b/include/llvm/Support/Signals.h
@@ -1,9 +1,8 @@
//===- llvm/Support/Signals.h - Signal Handling support ----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -66,13 +65,25 @@ namespace sys {
/// This function registers a function to be called when the user "interrupts"
/// the program (typically by pressing ctrl-c). When the user interrupts the
/// program, the specified interrupt function is called instead of the program
- /// being killed, and the interrupt function automatically disabled. Note
- /// that interrupt functions are not allowed to call any non-reentrant
+ /// being killed, and the interrupt function automatically disabled.
+ ///
+ /// Note that interrupt functions are not allowed to call any non-reentrant
/// functions. An null interrupt function pointer disables the current
/// installed function. Note also that the handler may be executed on a
/// different thread on some platforms.
- /// Register a function to be called when ctrl-c is pressed.
void SetInterruptFunction(void (*IF)());
+
+ /// Registers a function to be called when an "info" signal is delivered to
+ /// the process.
+ ///
+ /// On POSIX systems, this will be SIGUSR1; on systems that have it, SIGINFO
+ /// will also be used (typically ctrl-t).
+ ///
+ /// Note that signal handlers are not allowed to call any non-reentrant
+ /// functions. An null function pointer disables the current installed
+ /// function. Note also that the handler may be executed on a different
+ /// thread on some platforms.
+ void SetInfoSignalFunction(void (*Handler)());
} // End sys namespace
} // End llvm namespace
diff --git a/include/llvm/Support/Signposts.h b/include/llvm/Support/Signposts.h
new file mode 100644
index 000000000000..b5a8c3d61e3e
--- /dev/null
+++ b/include/llvm/Support/Signposts.h
@@ -0,0 +1,43 @@
+//===-- llvm/Support/Signposts.h - Interval debug annotations ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file Some OS's provide profilers that allow applications to provide custom
+/// annotations to the profiler. For example, on Xcode 10 and later 'signposts'
+/// can be emitted by the application and these will be rendered to the Points
+/// of Interest track on the instruments timeline.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_SIGNPOSTS_H
+#define LLVM_SUPPORT_SIGNPOSTS_H
+
+namespace llvm {
+class SignpostEmitterImpl;
+class Timer;
+
+/// Manages the emission of signposts into the recording method supported by
+/// the OS.
+class SignpostEmitter {
+ SignpostEmitterImpl *Impl;
+
+public:
+ SignpostEmitter();
+ ~SignpostEmitter();
+
+ bool isEnabled() const;
+
+ /// Begin a signposted interval for the given timer.
+ void startTimerInterval(Timer *T);
+ /// End a signposted interval for the given timer.
+ void endTimerInterval(Timer *T);
+};
+
+} // end namespace llvm
+
+#endif // ifndef LLVM_SUPPORT_SIGNPOSTS_H
diff --git a/include/llvm/Support/SmallVectorMemoryBuffer.h b/include/llvm/Support/SmallVectorMemoryBuffer.h
index c4a600e7f37d..b63b58e3a8ba 100644
--- a/include/llvm/Support/SmallVectorMemoryBuffer.h
+++ b/include/llvm/Support/SmallVectorMemoryBuffer.h
@@ -1,9 +1,8 @@
//===- SmallVectorMemoryBuffer.h --------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Support/Solaris/sys/regset.h b/include/llvm/Support/Solaris/sys/regset.h
index 6a69ebe718a1..6bd98fa826a6 100644
--- a/include/llvm/Support/Solaris/sys/regset.h
+++ b/include/llvm/Support/Solaris/sys/regset.h
@@ -1,9 +1,8 @@
/*===- llvm/Support/Solaris/sys/regset.h ------------------------*- C++ -*-===*
*
- * The LLVM Compiler Infrastructure
- *
- * This file is distributed under the University of Illinois Open Source
- * License. See LICENSE.TXT for details.
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===----------------------------------------------------------------------===*
*
diff --git a/include/llvm/Support/SourceMgr.h b/include/llvm/Support/SourceMgr.h
index 63ac893239d1..aa6026c23d07 100644
--- a/include/llvm/Support/SourceMgr.h
+++ b/include/llvm/Support/SourceMgr.h
@@ -1,9 +1,8 @@
//===- SourceMgr.h - Manager for Source Buffers & Diagnostics ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -107,6 +106,8 @@ public:
SourceMgr() = default;
SourceMgr(const SourceMgr &) = delete;
SourceMgr &operator=(const SourceMgr &) = delete;
+ SourceMgr(SourceMgr &&) = default;
+ SourceMgr &operator=(SourceMgr &&) = default;
~SourceMgr() = default;
void setIncludeDirs(const std::vector<std::string> &Dirs) {
diff --git a/include/llvm/Support/SpecialCaseList.h b/include/llvm/Support/SpecialCaseList.h
index fd62fc48047b..b7400266f4df 100644
--- a/include/llvm/Support/SpecialCaseList.h
+++ b/include/llvm/Support/SpecialCaseList.h
@@ -1,9 +1,8 @@
//===-- SpecialCaseList.h - special case list for sanitizers ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//===----------------------------------------------------------------------===//
//
// This is a utility class used to parse user-provided text files with
diff --git a/include/llvm/Support/StringPool.h b/include/llvm/Support/StringPool.h
index bb5fd07f0d00..a4f45916f53d 100644
--- a/include/llvm/Support/StringPool.h
+++ b/include/llvm/Support/StringPool.h
@@ -1,9 +1,8 @@
//===- StringPool.h - Interned string pool ----------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Support/StringSaver.h b/include/llvm/Support/StringSaver.h
index 6b77d487333b..c54044e3986c 100644
--- a/include/llvm/Support/StringSaver.h
+++ b/include/llvm/Support/StringSaver.h
@@ -1,9 +1,8 @@
//===- llvm/Support/StringSaver.h -------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/Support/SwapByteOrder.h b/include/llvm/Support/SwapByteOrder.h
index 71d3724950ab..06a447a27c2a 100644
--- a/include/llvm/Support/SwapByteOrder.h
+++ b/include/llvm/Support/SwapByteOrder.h
@@ -1,9 +1,8 @@
//===- SwapByteOrder.h - Generic and optimized byte swaps -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -18,6 +17,7 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/DataTypes.h"
#include <cstddef>
+#include <type_traits>
#if defined(_MSC_VER) && !defined(_DEBUG)
#include <stdlib.h>
#endif
@@ -116,6 +116,13 @@ inline double getSwappedBytes(double C) {
return out.d;
}
+template <typename T>
+inline typename std::enable_if<std::is_enum<T>::value, T>::type
+getSwappedBytes(T C) {
+ return static_cast<T>(
+ getSwappedBytes(static_cast<typename std::underlying_type<T>::type>(C)));
+}
+
template<typename T>
inline void swapByteOrder(T &Value) {
Value = getSwappedBytes(Value);
diff --git a/include/llvm/Support/SymbolRemappingReader.h b/include/llvm/Support/SymbolRemappingReader.h
index b457b9e817e4..2b9ab570eb8b 100644
--- a/include/llvm/Support/SymbolRemappingReader.h
+++ b/include/llvm/Support/SymbolRemappingReader.h
@@ -1,9 +1,8 @@
//===- SymbolRemappingReader.h - Read symbol remapping file -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Support/SystemUtils.h b/include/llvm/Support/SystemUtils.h
index bd60793d1554..77deddb9ee1c 100644
--- a/include/llvm/Support/SystemUtils.h
+++ b/include/llvm/Support/SystemUtils.h
@@ -1,9 +1,8 @@
//===- SystemUtils.h - Utilities to do low-level system stuff ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Support/TarWriter.h b/include/llvm/Support/TarWriter.h
index 639f61b53892..71164e2ef961 100644
--- a/include/llvm/Support/TarWriter.h
+++ b/include/llvm/Support/TarWriter.h
@@ -1,9 +1,8 @@
//===-- llvm/Support/TarWriter.h - Tar archive file creator -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/Support/TargetOpcodes.def b/include/llvm/Support/TargetOpcodes.def
index 3e8193a5cdcf..598c1064efd0 100644
--- a/include/llvm/Support/TargetOpcodes.def
+++ b/include/llvm/Support/TargetOpcodes.def
@@ -1,9 +1,8 @@
//===-- llvm/Support/TargetOpcodes.def - Target Indep Opcodes ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -29,6 +28,7 @@
///
HANDLE_TARGET_OPCODE(PHI)
HANDLE_TARGET_OPCODE(INLINEASM)
+HANDLE_TARGET_OPCODE(INLINEASM_BR)
HANDLE_TARGET_OPCODE(CFI_INSTRUCTION)
HANDLE_TARGET_OPCODE(EH_LABEL)
HANDLE_TARGET_OPCODE(GC_LABEL)
@@ -316,6 +316,9 @@ HANDLE_TARGET_OPCODE(G_ATOMICRMW_MIN)
HANDLE_TARGET_OPCODE(G_ATOMICRMW_UMAX)
HANDLE_TARGET_OPCODE(G_ATOMICRMW_UMIN)
+// Generic atomic fence
+HANDLE_TARGET_OPCODE(G_FENCE)
+
/// Generic conditional branch instruction.
HANDLE_TARGET_OPCODE(G_BRCOND)
@@ -481,6 +484,27 @@ HANDLE_TARGET_OPCODE(G_UITOFP)
/// Generic FP absolute value.
HANDLE_TARGET_OPCODE(G_FABS)
+/// FCOPYSIGN(X, Y) - Return the value of X with the sign of Y. NOTE: This does
+/// not require that X and Y have the same type, just that they are both
+/// floating point. X and the result must have the same type. FCOPYSIGN(f32,
+/// f64) is allowed.
+HANDLE_TARGET_OPCODE(G_FCOPYSIGN)
+
+/// Generic FP canonicalize value.
+HANDLE_TARGET_OPCODE(G_FCANONICALIZE)
+
+/// FP min/max matching libm's fmin/fmax
+HANDLE_TARGET_OPCODE(G_FMINNUM)
+HANDLE_TARGET_OPCODE(G_FMAXNUM)
+
+/// FP min/max matching IEEE-754 2008's minnum/maxnum semantics.
+HANDLE_TARGET_OPCODE(G_FMINNUM_IEEE)
+HANDLE_TARGET_OPCODE(G_FMAXNUM_IEEE)
+
+/// FP min/max matching IEEE-754 2018 draft semantics.
+HANDLE_TARGET_OPCODE(G_FMINIMUM)
+HANDLE_TARGET_OPCODE(G_FMAXIMUM)
+
/// Generic pointer offset
HANDLE_TARGET_OPCODE(G_GEP)
@@ -488,9 +512,24 @@ HANDLE_TARGET_OPCODE(G_GEP)
/// *down* to the given alignment.
HANDLE_TARGET_OPCODE(G_PTR_MASK)
+/// Generic signed integer minimum.
+HANDLE_TARGET_OPCODE(G_SMIN)
+
+/// Generic signed integer maximum.
+HANDLE_TARGET_OPCODE(G_SMAX)
+
+/// Generic unsigned integer maximum.
+HANDLE_TARGET_OPCODE(G_UMIN)
+
+/// Generic unsigned integer maximum.
+HANDLE_TARGET_OPCODE(G_UMAX)
+
/// Generic BRANCH instruction. This is an unconditional branch.
HANDLE_TARGET_OPCODE(G_BR)
+/// Generic branch to jump table entry.
+HANDLE_TARGET_OPCODE(G_BRJT)
+
/// Generic insertelement.
HANDLE_TARGET_OPCODE(G_INSERT_VECTOR_ELT)
@@ -521,18 +560,39 @@ HANDLE_TARGET_OPCODE(G_BSWAP)
/// Floating point ceil.
HANDLE_TARGET_OPCODE(G_FCEIL)
+/// Floating point cosine.
+HANDLE_TARGET_OPCODE(G_FCOS)
+
+/// Floating point sine.
+HANDLE_TARGET_OPCODE(G_FSIN)
+
+/// Floating point square root.
+HANDLE_TARGET_OPCODE(G_FSQRT)
+
+/// Floating point floor.
+HANDLE_TARGET_OPCODE(G_FFLOOR)
+
+/// Floating point round to next integer.
+HANDLE_TARGET_OPCODE(G_FRINT)
+
+/// Floating point round to nearest integer.
+HANDLE_TARGET_OPCODE(G_FNEARBYINT)
+
/// Generic AddressSpaceCast.
HANDLE_TARGET_OPCODE(G_ADDRSPACE_CAST)
/// Generic block address
HANDLE_TARGET_OPCODE(G_BLOCK_ADDR)
+/// Generic jump table address
+HANDLE_TARGET_OPCODE(G_JUMP_TABLE)
+
// TODO: Add more generic opcodes as we move along.
/// Marker for the end of the generic opcode.
/// This is used to check if an opcode is in the range of the
/// generic opcodes.
-HANDLE_TARGET_OPCODE_MARKER(PRE_ISEL_GENERIC_OPCODE_END, G_BLOCK_ADDR)
+HANDLE_TARGET_OPCODE_MARKER(PRE_ISEL_GENERIC_OPCODE_END, G_JUMP_TABLE)
/// BUILTIN_OP_END - This must be the last enum value in this list.
/// The target-specific post-isel opcode values start here.
diff --git a/include/llvm/Support/TargetParser.h b/include/llvm/Support/TargetParser.h
index ace11ed410a3..a7e1a752d081 100644
--- a/include/llvm/Support/TargetParser.h
+++ b/include/llvm/Support/TargetParser.h
@@ -1,9 +1,8 @@
//===-- TargetParser - Parser for target features ---------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -122,10 +121,15 @@ enum GPUKind : uint32_t {
GK_GFX902 = 61,
GK_GFX904 = 62,
GK_GFX906 = 63,
+ GK_GFX908 = 64,
GK_GFX909 = 65,
+ GK_GFX1010 = 71,
+ GK_GFX1011 = 72,
+ GK_GFX1012 = 73,
+
GK_AMDGCN_FIRST = GK_GFX600,
- GK_AMDGCN_LAST = GK_GFX909,
+ GK_AMDGCN_LAST = GK_GFX1012,
};
/// Instruction set architecture version.
diff --git a/include/llvm/Support/TargetRegistry.h b/include/llvm/Support/TargetRegistry.h
index 1bafc4e687da..bf75650760d0 100644
--- a/include/llvm/Support/TargetRegistry.h
+++ b/include/llvm/Support/TargetRegistry.h
@@ -1,9 +1,8 @@
//===- Support/TargetRegistry.h - Target Registration -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -101,6 +100,11 @@ MCStreamer *createWasmStreamer(MCContext &Ctx,
std::unique_ptr<MCObjectWriter> &&OW,
std::unique_ptr<MCCodeEmitter> &&CE,
bool RelaxAll);
+MCStreamer *createXCOFFStreamer(MCContext &Ctx,
+ std::unique_ptr<MCAsmBackend> &&TAB,
+ std::unique_ptr<MCObjectWriter> &&OW,
+ std::unique_ptr<MCCodeEmitter> &&CE,
+ bool RelaxAll);
MCRelocationInfo *createMCRelocationInfo(const Triple &TT, MCContext &Ctx);
@@ -471,7 +475,7 @@ public:
bool DWARFMustBeAtTheEnd) const {
MCStreamer *S;
switch (T.getObjectFormat()) {
- default:
+ case Triple::UnknownObjectFormat:
llvm_unreachable("Unknown object format");
case Triple::COFF:
assert(T.isOSWindows() && "only Windows COFF is supported");
@@ -505,6 +509,10 @@ public:
S = createWasmStreamer(Ctx, std::move(TAB), std::move(OW),
std::move(Emitter), RelaxAll);
break;
+ case Triple::XCOFF:
+ S = createXCOFFStreamer(Ctx, std::move(TAB), std::move(OW),
+ std::move(Emitter), RelaxAll);
+ break;
}
if (ObjectTargetStreamerCtorFn)
ObjectTargetStreamerCtorFn(*S, STI);
diff --git a/include/llvm/Support/TargetSelect.h b/include/llvm/Support/TargetSelect.h
index 582785cb69a5..9ffb84c4a570 100644
--- a/include/llvm/Support/TargetSelect.h
+++ b/include/llvm/Support/TargetSelect.h
@@ -1,9 +1,8 @@
//===- TargetSelect.h - Target Selection & Registration ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Support/TaskQueue.h b/include/llvm/Support/TaskQueue.h
index 49981adb763d..df2ffdee2cc2 100644
--- a/include/llvm/Support/TaskQueue.h
+++ b/include/llvm/Support/TaskQueue.h
@@ -1,9 +1,8 @@
//===-- llvm/Support/TaskQueue.h - A TaskQueue implementation ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Support/ThreadLocal.h b/include/llvm/Support/ThreadLocal.h
index 885bd18e8356..d6838c15fc34 100644
--- a/include/llvm/Support/ThreadLocal.h
+++ b/include/llvm/Support/ThreadLocal.h
@@ -1,9 +1,8 @@
//===- llvm/Support/ThreadLocal.h - Thread Local Data ------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Support/ThreadPool.h b/include/llvm/Support/ThreadPool.h
index 4fdbd528b212..4bcbaa3142fd 100644
--- a/include/llvm/Support/ThreadPool.h
+++ b/include/llvm/Support/ThreadPool.h
@@ -1,9 +1,8 @@
//===-- llvm/Support/ThreadPool.h - A ThreadPool implementation -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Support/Threading.h b/include/llvm/Support/Threading.h
index ba7ece5e72ba..46d413dc487b 100644
--- a/include/llvm/Support/Threading.h
+++ b/include/llvm/Support/Threading.h
@@ -1,9 +1,8 @@
//===-- llvm/Support/Threading.h - Control multithreading mode --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -33,6 +32,9 @@
// implementations like libstdc++ are known to have problems on NetBSD,
// OpenBSD and PowerPC.
#define LLVM_THREADING_USE_STD_CALL_ONCE 1
+#elif defined(LLVM_ON_UNIX) && \
+ ((defined(__ppc__) || defined(__PPC__)) && defined(__LITTLE_ENDIAN__))
+#define LLVM_THREADING_USE_STD_CALL_ONCE 1
#else
#define LLVM_THREADING_USE_STD_CALL_ONCE 0
#endif
@@ -165,6 +167,19 @@ void llvm_execute_on_thread(void (*UserFn)(void *), void *UserData,
/// purposes, and as with setting a thread's name no indication of whether
/// the operation succeeded or failed is returned.
void get_thread_name(SmallVectorImpl<char> &Name);
+
+ enum class ThreadPriority {
+ Background = 0,
+ Default = 1,
+ };
+ /// If priority is Background tries to lower current threads priority such
+ /// that it does not affect foreground tasks significantly. Can be used for
+ /// long-running, latency-insensitive tasks to make sure cpu is not hogged by
+ /// this task.
+ /// If the priority is default tries to restore current threads priority to
+ /// default scheduling priority.
+ enum class SetThreadPriorityResult { FAILURE, SUCCESS };
+ SetThreadPriorityResult set_thread_priority(ThreadPriority Priority);
}
#endif
diff --git a/include/llvm/Support/TimeProfiler.h b/include/llvm/Support/TimeProfiler.h
new file mode 100644
index 000000000000..72b6f7180bde
--- /dev/null
+++ b/include/llvm/Support/TimeProfiler.h
@@ -0,0 +1,76 @@
+//===- llvm/Support/TimeProfiler.h - Hierarchical Time Profiler -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_TIME_PROFILER_H
+#define LLVM_SUPPORT_TIME_PROFILER_H
+
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+
+struct TimeTraceProfiler;
+extern TimeTraceProfiler *TimeTraceProfilerInstance;
+
+/// Initialize the time trace profiler.
+/// This sets up the global \p TimeTraceProfilerInstance
+/// variable to be the profiler instance.
+void timeTraceProfilerInitialize();
+
+/// Cleanup the time trace profiler, if it was initialized.
+void timeTraceProfilerCleanup();
+
+/// Is the time trace profiler enabled, i.e. initialized?
+inline bool timeTraceProfilerEnabled() {
+ return TimeTraceProfilerInstance != nullptr;
+}
+
+/// Write profiling data to output file.
+/// Data produced is JSON, in Chrome "Trace Event" format, see
+/// https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/preview
+void timeTraceProfilerWrite(raw_pwrite_stream &OS);
+
+/// Manually begin a time section, with the given \p Name and \p Detail.
+/// Profiler copies the string data, so the pointers can be given into
+/// temporaries. Time sections can be hierarchical; every Begin must have a
+/// matching End pair but they can nest.
+void timeTraceProfilerBegin(StringRef Name, StringRef Detail);
+void timeTraceProfilerBegin(StringRef Name,
+ llvm::function_ref<std::string()> Detail);
+
+/// Manually end the last time section.
+void timeTraceProfilerEnd();
+
+/// The TimeTraceScope is a helper class to call the begin and end functions
+/// of the time trace profiler. When the object is constructed, it begins
+/// the section; and when it is destroyed, it stops it. If the time profiler
+/// is not initialized, the overhead is a single branch.
+struct TimeTraceScope {
+
+ TimeTraceScope() = delete;
+ TimeTraceScope(const TimeTraceScope &) = delete;
+ TimeTraceScope &operator=(const TimeTraceScope &) = delete;
+ TimeTraceScope(TimeTraceScope &&) = delete;
+ TimeTraceScope &operator=(TimeTraceScope &&) = delete;
+
+ TimeTraceScope(StringRef Name, StringRef Detail) {
+ if (TimeTraceProfilerInstance != nullptr)
+ timeTraceProfilerBegin(Name, Detail);
+ }
+ TimeTraceScope(StringRef Name, llvm::function_ref<std::string()> Detail) {
+ if (TimeTraceProfilerInstance != nullptr)
+ timeTraceProfilerBegin(Name, Detail);
+ }
+ ~TimeTraceScope() {
+ if (TimeTraceProfilerInstance != nullptr)
+ timeTraceProfilerEnd();
+ }
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/Support/Timer.h b/include/llvm/Support/Timer.h
index a11c3ce3ff22..76c9bc7b6863 100644
--- a/include/llvm/Support/Timer.h
+++ b/include/llvm/Support/Timer.h
@@ -1,9 +1,8 @@
//===-- llvm/Support/Timer.h - Interval Timing Support ----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -206,8 +205,9 @@ public:
Description.assign(NewDescription.begin(), NewDescription.end());
}
- /// Print any started timers in this group.
- void print(raw_ostream &OS);
+ /// Print any started timers in this group, optionally resetting timers after
+ /// printing them.
+ void print(raw_ostream &OS, bool ResetAfterPrint = false);
/// Clear all timers in this group.
void clear();
@@ -234,7 +234,7 @@ private:
friend void PrintStatisticsJSON(raw_ostream &OS);
void addTimer(Timer &T);
void removeTimer(Timer &T);
- void prepareToPrintList();
+ void prepareToPrintList(bool reset_time = false);
void PrintQueuedTimers(raw_ostream &OS);
void printJSONValue(raw_ostream &OS, const PrintRecord &R,
const char *suffix, double Value);
diff --git a/include/llvm/Support/ToolOutputFile.h b/include/llvm/Support/ToolOutputFile.h
index cf3bc2fb0171..a99e327f8db7 100644
--- a/include/llvm/Support/ToolOutputFile.h
+++ b/include/llvm/Support/ToolOutputFile.h
@@ -1,9 +1,8 @@
//===- ToolOutputFile.h - Output files for compiler-like tools -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Support/TrailingObjects.h b/include/llvm/Support/TrailingObjects.h
index 490bd94f4cd5..8cf4f7aed7f8 100644
--- a/include/llvm/Support/TrailingObjects.h
+++ b/include/llvm/Support/TrailingObjects.h
@@ -1,9 +1,8 @@
//===--- TrailingObjects.h - Variable-length classes ------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/include/llvm/Support/TrigramIndex.h b/include/llvm/Support/TrigramIndex.h
index da0b6daf47ed..9351c2db169a 100644
--- a/include/llvm/Support/TrigramIndex.h
+++ b/include/llvm/Support/TrigramIndex.h
@@ -1,9 +1,8 @@
//===-- TrigramIndex.h - a heuristic for SpecialCaseList --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//===----------------------------------------------------------------------===//
//
// TrigramIndex implements a heuristic for SpecialCaseList that allows to
diff --git a/include/llvm/Support/TypeName.h b/include/llvm/Support/TypeName.h
index 0eb7ead98b21..236490a25011 100644
--- a/include/llvm/Support/TypeName.h
+++ b/include/llvm/Support/TypeName.h
@@ -1,9 +1,8 @@
//===- TypeName.h -----------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/Support/Unicode.h b/include/llvm/Support/Unicode.h
index 983acaf03635..ca17bba2fbb4 100644
--- a/include/llvm/Support/Unicode.h
+++ b/include/llvm/Support/Unicode.h
@@ -1,9 +1,8 @@
//===- llvm/Support/Unicode.h - Unicode character properties -*- C++ -*-=====//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Support/UnicodeCharRanges.h b/include/llvm/Support/UnicodeCharRanges.h
index 3cf4a6d96602..4b59f8a92b76 100644
--- a/include/llvm/Support/UnicodeCharRanges.h
+++ b/include/llvm/Support/UnicodeCharRanges.h
@@ -1,9 +1,8 @@
//===--- UnicodeCharRanges.h - Types and functions for character ranges ---===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_SUPPORT_UNICODECHARRANGES_H
diff --git a/include/llvm/Support/UniqueLock.h b/include/llvm/Support/UniqueLock.h
index 91dc911036d5..0a887ad5965d 100644
--- a/include/llvm/Support/UniqueLock.h
+++ b/include/llvm/Support/UniqueLock.h
@@ -1,9 +1,8 @@
//===- Support/UniqueLock.h - Acquire/Release Mutex In Scope ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Support/Valgrind.h b/include/llvm/Support/Valgrind.h
index 084b901b326c..1e14dfec9a61 100644
--- a/include/llvm/Support/Valgrind.h
+++ b/include/llvm/Support/Valgrind.h
@@ -1,9 +1,8 @@
//===- llvm/Support/Valgrind.h - Communication with Valgrind ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Support/VersionTuple.h b/include/llvm/Support/VersionTuple.h
index e85a188e54b4..14736d6b28f0 100644
--- a/include/llvm/Support/VersionTuple.h
+++ b/include/llvm/Support/VersionTuple.h
@@ -1,9 +1,8 @@
//===- VersionTuple.h - Version Number Handling -----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/include/llvm/Support/VirtualFileSystem.h b/include/llvm/Support/VirtualFileSystem.h
index 61c3d2f46e9c..31c9e851daed 100644
--- a/include/llvm/Support/VirtualFileSystem.h
+++ b/include/llvm/Support/VirtualFileSystem.h
@@ -1,9 +1,8 @@
//===- VirtualFileSystem.h - Virtual File System Layer ----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -59,15 +58,15 @@ public:
Status() = default;
Status(const llvm::sys::fs::file_status &Status);
- Status(StringRef Name, llvm::sys::fs::UniqueID UID,
+ Status(const Twine &Name, llvm::sys::fs::UniqueID UID,
llvm::sys::TimePoint<> MTime, uint32_t User, uint32_t Group,
uint64_t Size, llvm::sys::fs::file_type Type,
llvm::sys::fs::perms Perms);
/// Get a copy of a Status with a different name.
- static Status copyWithNewName(const Status &In, StringRef NewName);
+ static Status copyWithNewName(const Status &In, const Twine &NewName);
static Status copyWithNewName(const llvm::sys::fs::file_status &In,
- StringRef NewName);
+ const Twine &NewName);
/// Returns the name that should be used for this file or directory.
StringRef getName() const { return Name; }
@@ -299,8 +298,16 @@ public:
/// Gets an \p vfs::FileSystem for the 'real' file system, as seen by
/// the operating system.
+/// The working directory is linked to the process's working directory.
+/// (This is usually thread-hostile).
IntrusiveRefCntPtr<FileSystem> getRealFileSystem();
+/// Create an \p vfs::FileSystem for the 'real' file system, as seen by
+/// the operating system.
+/// It has its own working directory, independent of (but initially equal to)
+/// that of the process.
+std::unique_ptr<FileSystem> createPhysicalFileSystem();
+
/// A file system that allows overlaying one \p AbstractFileSystem on top
/// of another.
///
@@ -336,15 +343,24 @@ public:
using iterator = FileSystemList::reverse_iterator;
using const_iterator = FileSystemList::const_reverse_iterator;
+ using reverse_iterator = FileSystemList::iterator;
+ using const_reverse_iterator = FileSystemList::const_iterator;
/// Get an iterator pointing to the most recently added file system.
iterator overlays_begin() { return FSList.rbegin(); }
const_iterator overlays_begin() const { return FSList.rbegin(); }
- /// Get an iterator pointing one-past the least recently added file
- /// system.
+ /// Get an iterator pointing one-past the least recently added file system.
iterator overlays_end() { return FSList.rend(); }
const_iterator overlays_end() const { return FSList.rend(); }
+
+ /// Get an iterator pointing to the least recently added file system.
+ reverse_iterator overlays_rbegin() { return FSList.begin(); }
+ const_reverse_iterator overlays_rbegin() const { return FSList.begin(); }
+
+ /// Get an iterator pointing one-past the most recently added file system.
+ reverse_iterator overlays_rend() { return FSList.end(); }
+ const_reverse_iterator overlays_rend() const { return FSList.end(); }
};
/// By default, this delegates all calls to the underlying file system. This
diff --git a/include/llvm/Support/Watchdog.h b/include/llvm/Support/Watchdog.h
index 01e1d926eb95..281595e8f272 100644
--- a/include/llvm/Support/Watchdog.h
+++ b/include/llvm/Support/Watchdog.h
@@ -1,9 +1,8 @@
//===--- Watchdog.h - Watchdog timer ----------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Support/Win64EH.h b/include/llvm/Support/Win64EH.h
index e27bf1b3a1a5..bdd23b41594e 100644
--- a/include/llvm/Support/Win64EH.h
+++ b/include/llvm/Support/Win64EH.h
@@ -1,9 +1,8 @@
//===-- llvm/Support/Win64EH.h ---Win64 EH Constants-------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Support/WindowsError.h b/include/llvm/Support/WindowsError.h
index 63bfe5976546..195405224124 100644
--- a/include/llvm/Support/WindowsError.h
+++ b/include/llvm/Support/WindowsError.h
@@ -1,9 +1,8 @@
//===-- WindowsError.h - Support for mapping windows errors to posix-------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/Support/WithColor.h b/include/llvm/Support/WithColor.h
index 76842d1c3dc8..f4e107581179 100644
--- a/include/llvm/Support/WithColor.h
+++ b/include/llvm/Support/WithColor.h
@@ -1,9 +1,8 @@
//===- WithColor.h ----------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/Support/X86DisassemblerDecoderCommon.h b/include/llvm/Support/X86DisassemblerDecoderCommon.h
index 466dd309909a..baf842b12a27 100644
--- a/include/llvm/Support/X86DisassemblerDecoderCommon.h
+++ b/include/llvm/Support/X86DisassemblerDecoderCommon.h
@@ -1,9 +1,8 @@
//===-- X86DisassemblerDecoderCommon.h - Disassembler decoder ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -47,29 +46,23 @@ namespace X86Disassembler {
// Attributes of an instruction that must be known before the opcode can be
// processed correctly. Most of these indicate the presence of particular
// prefixes, but ATTR_64BIT is simply an attribute of the decoding context.
-#define ATTRIBUTE_BITS \
- ENUM_ENTRY(ATTR_NONE, 0x00) \
- ENUM_ENTRY(ATTR_64BIT, (0x1 << 0)) \
- ENUM_ENTRY(ATTR_XS, (0x1 << 1)) \
- ENUM_ENTRY(ATTR_XD, (0x1 << 2)) \
- ENUM_ENTRY(ATTR_REXW, (0x1 << 3)) \
- ENUM_ENTRY(ATTR_OPSIZE, (0x1 << 4)) \
- ENUM_ENTRY(ATTR_ADSIZE, (0x1 << 5)) \
- ENUM_ENTRY(ATTR_VEX, (0x1 << 6)) \
- ENUM_ENTRY(ATTR_VEXL, (0x1 << 7)) \
- ENUM_ENTRY(ATTR_EVEX, (0x1 << 8)) \
- ENUM_ENTRY(ATTR_EVEXL, (0x1 << 9)) \
- ENUM_ENTRY(ATTR_EVEXL2, (0x1 << 10)) \
- ENUM_ENTRY(ATTR_EVEXK, (0x1 << 11)) \
- ENUM_ENTRY(ATTR_EVEXKZ, (0x1 << 12)) \
- ENUM_ENTRY(ATTR_EVEXB, (0x1 << 13))
-
-#define ENUM_ENTRY(n, v) n = v,
enum attributeBits {
- ATTRIBUTE_BITS
- ATTR_max
+ ATTR_NONE = 0x00,
+ ATTR_64BIT = 0x1 << 0,
+ ATTR_XS = 0x1 << 1,
+ ATTR_XD = 0x1 << 2,
+ ATTR_REXW = 0x1 << 3,
+ ATTR_OPSIZE = 0x1 << 4,
+ ATTR_ADSIZE = 0x1 << 5,
+ ATTR_VEX = 0x1 << 6,
+ ATTR_VEXL = 0x1 << 7,
+ ATTR_EVEX = 0x1 << 8,
+ ATTR_EVEXL2 = 0x1 << 9,
+ ATTR_EVEXK = 0x1 << 10,
+ ATTR_EVEXKZ = 0x1 << 11,
+ ATTR_EVEXB = 0x1 << 12,
+ ATTR_max = 0x1 << 13,
};
-#undef ENUM_ENTRY
// Combinations of the above attributes that are relevant to instruction
// decode. Although other combinations are possible, they can be reduced to
@@ -394,6 +387,7 @@ enum ModRMDecisionType {
ENUM_ENTRY(ENCODING_IRC, "Immediate for static rounding control") \
ENUM_ENTRY(ENCODING_Rv, "Register code of operand size added to the " \
"opcode byte") \
+ ENUM_ENTRY(ENCODING_CC, "Condition code encoded in opcode") \
ENUM_ENTRY(ENCODING_DUP, "Duplicate of another operand; ID is encoded " \
"in type") \
ENUM_ENTRY(ENCODING_SI, "Source index; encoded in OpSize/Adsize prefix") \
@@ -415,9 +409,6 @@ enum OperandEncoding {
ENUM_ENTRY(TYPE_R32, "4-byte") \
ENUM_ENTRY(TYPE_R64, "8-byte") \
ENUM_ENTRY(TYPE_IMM, "immediate operand") \
- ENUM_ENTRY(TYPE_IMM3, "1-byte immediate operand between 0 and 7") \
- ENUM_ENTRY(TYPE_IMM5, "1-byte immediate operand between 0 and 31") \
- ENUM_ENTRY(TYPE_AVX512ICC, "1-byte immediate operand for AVX512 icmp") \
ENUM_ENTRY(TYPE_UIMM8, "1-byte unsigned immediate operand") \
ENUM_ENTRY(TYPE_M, "Memory operand") \
ENUM_ENTRY(TYPE_MVSIBX, "Memory operand using XMM index") \
@@ -432,6 +423,7 @@ enum OperandEncoding {
ENUM_ENTRY(TYPE_YMM, "32-byte") \
ENUM_ENTRY(TYPE_ZMM, "64-byte") \
ENUM_ENTRY(TYPE_VK, "mask register") \
+ ENUM_ENTRY(TYPE_VK_PAIR, "mask register pair") \
ENUM_ENTRY(TYPE_SEGMENTREG, "Segment register operand") \
ENUM_ENTRY(TYPE_DEBUGREG, "Debug register operand") \
ENUM_ENTRY(TYPE_CONTROLREG, "Control register operand") \
diff --git a/include/llvm/Support/X86TargetParser.def b/include/llvm/Support/X86TargetParser.def
index e9bede545d3f..1749be3b3ae2 100644
--- a/include/llvm/Support/X86TargetParser.def
+++ b/include/llvm/Support/X86TargetParser.def
@@ -1,9 +1,8 @@
//===- X86TargetParser.def - X86 target parsing defines ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -34,6 +33,8 @@ X86_VENDOR(VENDOR_AMD, "amd")
#ifndef X86_CPU_TYPE
#define X86_CPU_TYPE(ARCHNAME, ENUM)
#endif
+// The first part of this list must match what is implemented in libgcc and
+// compilert-rt. Clang uses this to know how to implement __builtin_cpu_is.
X86_CPU_TYPE_COMPAT_WITH_ALIAS("bonnell", INTEL_BONNELL, "bonnell", "atom")
X86_CPU_TYPE_COMPAT ("core2", INTEL_CORE2, "core2")
X86_CPU_TYPE_COMPAT ("nehalem", INTEL_COREI7, "corei7")
@@ -80,6 +81,8 @@ X86_CPU_TYPE ("k8-sse3", AMD_K8SSE3)
#define X86_CPU_SUBTYPE(ARCHNAME, ENUM)
#endif
+// The first part of this list must match what is implemented in libgcc and
+// compilert-rt. Clang uses this to know how to implement __builtin_cpu_is.
X86_CPU_SUBTYPE_COMPAT("nehalem", INTEL_COREI7_NEHALEM, "nehalem")
X86_CPU_SUBTYPE_COMPAT("westmere", INTEL_COREI7_WESTMERE, "westmere")
X86_CPU_SUBTYPE_COMPAT("sandybridge", INTEL_COREI7_SANDYBRIDGE, "sandybridge")
@@ -99,14 +102,16 @@ X86_CPU_SUBTYPE_COMPAT("skylake-avx512", INTEL_COREI7_SKYLAKE_AVX512, "skylake-a
X86_CPU_SUBTYPE_COMPAT("cannonlake", INTEL_COREI7_CANNONLAKE, "cannonlake")
X86_CPU_SUBTYPE_COMPAT("icelake-client", INTEL_COREI7_ICELAKE_CLIENT, "icelake-client")
X86_CPU_SUBTYPE_COMPAT("icelake-server", INTEL_COREI7_ICELAKE_SERVER, "icelake-server")
+X86_CPU_SUBTYPE_COMPAT("znver2", AMDFAM17H_ZNVER2, "znver2")
+X86_CPU_SUBTYPE_COMPAT("cascadelake", INTEL_COREI7_CASCADELAKE, "cascadelake")
// Entries below this are not in libgcc/compiler-rt.
X86_CPU_SUBTYPE ("core2", INTEL_CORE2_65)
X86_CPU_SUBTYPE ("penryn", INTEL_CORE2_45)
-X86_CPU_SUBTYPE ("cascadelake", INTEL_COREI7_CASCADELAKE)
X86_CPU_SUBTYPE ("k6", AMDPENTIUM_K6)
X86_CPU_SUBTYPE ("k6-2", AMDPENTIUM_K62)
X86_CPU_SUBTYPE ("k6-3", AMDPENTIUM_K63)
X86_CPU_SUBTYPE ("geode", AMDPENTIUM_GEODE)
+X86_CPU_SUBTYPE ("cooperlake", INTEL_COREI7_COOPERLAKE)
#undef X86_CPU_SUBTYPE_COMPAT
#undef X86_CPU_SUBTYPE
@@ -161,5 +166,6 @@ X86_FEATURE (65, FEATURE_ADX)
X86_FEATURE (66, FEATURE_EM64T)
X86_FEATURE (67, FEATURE_CLFLUSHOPT)
X86_FEATURE (68, FEATURE_SHA)
+X86_FEATURE (69, FEATURE_AVX512BF16)
#undef X86_FEATURE_COMPAT
#undef X86_FEATURE
diff --git a/include/llvm/Support/YAMLParser.h b/include/llvm/Support/YAMLParser.h
index 5b031a9a4270..3570119a3bfd 100644
--- a/include/llvm/Support/YAMLParser.h
+++ b/include/llvm/Support/YAMLParser.h
@@ -1,9 +1,8 @@
//===- YAMLParser.h - Simple YAML parser ------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Support/YAMLTraits.h b/include/llvm/Support/YAMLTraits.h
index 3d790e96fff7..5181dc56d81d 100644
--- a/include/llvm/Support/YAMLTraits.h
+++ b/include/llvm/Support/YAMLTraits.h
@@ -1,9 +1,8 @@
//===- llvm/Support/YAMLTraits.h --------------------------------*- C++ -*-===//
//
-// The LLVM Linker
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -102,8 +101,7 @@ template <class T, class Context> struct MappingContextTraits {
/// io.enumCase(value, "green", cGreen);
/// }
/// };
-template<typename T>
-struct ScalarEnumerationTraits {
+template <typename T, typename Enable = void> struct ScalarEnumerationTraits {
// Must provide:
// static void enumeration(IO &io, T &value);
};
@@ -119,8 +117,7 @@ struct ScalarEnumerationTraits {
/// io.bitSetCase(value, "round", flagRound);
/// }
/// };
-template<typename T>
-struct ScalarBitSetTraits {
+template <typename T, typename Enable = void> struct ScalarBitSetTraits {
// Must provide:
// static void bitset(IO &io, T &value);
};
@@ -146,8 +143,7 @@ enum class QuotingType { None, Single, Double };
/// }
/// static QuotingType mustQuote(StringRef) { return QuotingType::Single; }
/// };
-template<typename T>
-struct ScalarTraits {
+template <typename T, typename Enable = void> struct ScalarTraits {
// Must provide:
//
// Function to write the value as a string:
@@ -864,8 +860,8 @@ public:
mapOptionalWithContext(Key, Val, Ctx);
}
- template <typename T>
- void mapOptional(const char *Key, T &Val, const T &Default) {
+ template <typename T, typename DefaultT>
+ void mapOptional(const char *Key, T &Val, const DefaultT &Default) {
EmptyContext Ctx;
mapOptionalWithContext(Key, Val, Default, Ctx);
}
@@ -891,10 +887,13 @@ public:
this->processKey(Key, Val, false, Ctx);
}
- template <typename T, typename Context>
- void mapOptionalWithContext(const char *Key, T &Val, const T &Default,
+ template <typename T, typename Context, typename DefaultT>
+ void mapOptionalWithContext(const char *Key, T &Val, const DefaultT &Default,
Context &Ctx) {
- this->processKeyWithDefault(Key, Val, Default, false, Ctx);
+ static_assert(std::is_convertible<DefaultT, T>::value,
+ "Default type must be implicitly convertible to value type!");
+ this->processKeyWithDefault(Key, Val, static_cast<const T &>(Default),
+ false, Ctx);
}
private:
@@ -978,7 +977,7 @@ yamlize(IO &io, T &Val, bool, EmptyContext &Ctx) {
bool DoClear;
if ( io.beginBitSetScalar(DoClear) ) {
if ( DoClear )
- Val = static_cast<T>(0);
+ Val = T();
ScalarBitSetTraits<T>::bitset(io, Val);
io.endBitSetScalar();
}
@@ -1243,12 +1242,14 @@ struct ScalarTraits<double> {
static QuotingType mustQuote(StringRef) { return QuotingType::None; }
};
-// For endian types, we just use the existing ScalarTraits for the underlying
-// type. This way endian aware types are supported whenever a ScalarTraits
-// is defined for the underlying type.
+// For endian types, we use existing scalar Traits class for the underlying
+// type. This way endian aware types are supported whenever the traits are
+// defined for the underlying type.
template <typename value_type, support::endianness endian, size_t alignment>
-struct ScalarTraits<support::detail::packed_endian_specific_integral<
- value_type, endian, alignment>> {
+struct ScalarTraits<
+ support::detail::packed_endian_specific_integral<value_type, endian,
+ alignment>,
+ typename std::enable_if<has_ScalarTraits<value_type>::value>::type> {
using endian_type =
support::detail::packed_endian_specific_integral<value_type, endian,
alignment>;
@@ -1269,6 +1270,38 @@ struct ScalarTraits<support::detail::packed_endian_specific_integral<
}
};
+template <typename value_type, support::endianness endian, size_t alignment>
+struct ScalarEnumerationTraits<
+ support::detail::packed_endian_specific_integral<value_type, endian,
+ alignment>,
+ typename std::enable_if<
+ has_ScalarEnumerationTraits<value_type>::value>::type> {
+ using endian_type =
+ support::detail::packed_endian_specific_integral<value_type, endian,
+ alignment>;
+
+ static void enumeration(IO &io, endian_type &E) {
+ value_type V = E;
+ ScalarEnumerationTraits<value_type>::enumeration(io, V);
+ E = V;
+ }
+};
+
+template <typename value_type, support::endianness endian, size_t alignment>
+struct ScalarBitSetTraits<
+ support::detail::packed_endian_specific_integral<value_type, endian,
+ alignment>,
+ typename std::enable_if<has_ScalarBitSetTraits<value_type>::value>::type> {
+ using endian_type =
+ support::detail::packed_endian_specific_integral<value_type, endian,
+ alignment>;
+ static void bitset(IO &io, endian_type &E) {
+ value_type V = E;
+ ScalarBitSetTraits<value_type>::bitset(io, V);
+ E = V;
+ }
+};
+
// Utility for use within MappingTraits<>::mapping() method
// to [de]normalize an object for use with YAML conversion.
template <typename TNorm, typename TFinal>
@@ -1587,8 +1620,9 @@ private:
bool NeedBitValueComma = false;
bool NeedFlowSequenceComma = false;
bool EnumerationMatchFound = false;
- bool NeedsNewLine = false;
bool WriteDefaultValues = false;
+ StringRef Padding;
+ StringRef PaddingBeforeContainer;
};
/// YAML I/O does conversion based on types. But often native data types
@@ -1872,6 +1906,11 @@ struct SequenceTraits<SmallVector<T, N>,
typename std::enable_if<CheckIsBool<
SequenceElementTraits<T>::flow>::value>::type>
: SequenceTraitsImpl<SmallVector<T, N>, SequenceElementTraits<T>::flow> {};
+template <typename T>
+struct SequenceTraits<SmallVectorImpl<T>,
+ typename std::enable_if<CheckIsBool<
+ SequenceElementTraits<T>::flow>::value>::type>
+ : SequenceTraitsImpl<SmallVectorImpl<T>, SequenceElementTraits<T>::flow> {};
// Sequences of fundamental types use flow formatting.
template <typename T>
diff --git a/include/llvm/Support/circular_raw_ostream.h b/include/llvm/Support/circular_raw_ostream.h
index b46fd7f730c9..4ecdb17376f1 100644
--- a/include/llvm/Support/circular_raw_ostream.h
+++ b/include/llvm/Support/circular_raw_ostream.h
@@ -1,9 +1,8 @@
//===-- llvm/Support/circular_raw_ostream.h - Buffered streams --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Support/raw_os_ostream.h b/include/llvm/Support/raw_os_ostream.h
index a983aeb90879..c51a94da3a28 100644
--- a/include/llvm/Support/raw_os_ostream.h
+++ b/include/llvm/Support/raw_os_ostream.h
@@ -1,9 +1,8 @@
//===- raw_os_ostream.h - std::ostream adaptor for raw_ostream --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Support/raw_ostream.h b/include/llvm/Support/raw_ostream.h
index d062e716209d..48bb623b0638 100644
--- a/include/llvm/Support/raw_ostream.h
+++ b/include/llvm/Support/raw_ostream.h
@@ -1,9 +1,8 @@
//===--- raw_ostream.h - Raw output stream ----------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -346,7 +345,7 @@ public:
explicit raw_pwrite_stream(bool Unbuffered = false)
: raw_ostream(Unbuffered) {}
void pwrite(const char *Ptr, size_t Size, uint64_t Offset) {
-#ifndef NDBEBUG
+#ifndef NDEBUG
uint64_t Pos = tell();
// /dev/null always reports a pos of 0, so we cannot perform this check
// in that case.
diff --git a/include/llvm/Support/raw_sha1_ostream.h b/include/llvm/Support/raw_sha1_ostream.h
index bd55d98b7c1d..3991691796b5 100644
--- a/include/llvm/Support/raw_sha1_ostream.h
+++ b/include/llvm/Support/raw_sha1_ostream.h
@@ -1,9 +1,8 @@
//==- raw_sha1_ostream.h - raw_ostream that compute SHA1 --*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Support/thread.h b/include/llvm/Support/thread.h
index 787a513d6017..084ed16166fe 100644
--- a/include/llvm/Support/thread.h
+++ b/include/llvm/Support/thread.h
@@ -1,9 +1,8 @@
//===-- llvm/Support/thread.h - Wrapper for <thread> ------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Support/type_traits.h b/include/llvm/Support/type_traits.h
index e7b8f2517b8a..c8c6a76a90f1 100644
--- a/include/llvm/Support/type_traits.h
+++ b/include/llvm/Support/type_traits.h
@@ -1,9 +1,8 @@
//===- llvm/Support/type_traits.h - Simplfied type traits -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -25,35 +24,6 @@
namespace llvm {
-/// isPodLike - This is a type trait that is used to determine whether a given
-/// type can be copied around with memcpy instead of running ctors etc.
-template <typename T>
-struct isPodLike {
- // std::is_trivially_copyable is available in libc++ with clang, libstdc++
- // that comes with GCC 5. MSVC 2015 and newer also have
- // std::is_trivially_copyable.
-#if (__has_feature(is_trivially_copyable) && defined(_LIBCPP_VERSION)) || \
- (defined(__GNUC__) && __GNUC__ >= 5) || defined(_MSC_VER)
- // If the compiler supports the is_trivially_copyable trait use it, as it
- // matches the definition of isPodLike closely.
- static const bool value = std::is_trivially_copyable<T>::value;
-#elif __has_feature(is_trivially_copyable)
- // Use the internal name if the compiler supports is_trivially_copyable but we
- // don't know if the standard library does. This is the case for clang in
- // conjunction with libstdc++ from GCC 4.x.
- static const bool value = __is_trivially_copyable(T);
-#else
- // If we don't know anything else, we can (at least) assume that all non-class
- // types are PODs.
- static const bool value = !std::is_class<T>::value;
-#endif
-};
-
-// std::pair's are pod-like if their elements are.
-template<typename T, typename U>
-struct isPodLike<std::pair<T, U>> {
- static const bool value = isPodLike<T>::value && isPodLike<U>::value;
-};
/// Metafunction that determines whether the given type is either an
/// integral type or an enumeration type, including enum classes.
@@ -120,6 +90,12 @@ template<typename T> union move_construction_triviality_helper {
move_construction_triviality_helper(move_construction_triviality_helper&&) = default;
~move_construction_triviality_helper() = default;
};
+
+template<class T>
+union trivial_helper {
+ T t;
+};
+
} // end namespace detail
/// An implementation of `std::is_trivially_copy_constructible` since we have
@@ -144,6 +120,78 @@ struct is_trivially_move_constructible<T &> : std::true_type {};
template <typename T>
struct is_trivially_move_constructible<T &&> : std::true_type {};
+
+template <typename T>
+struct is_copy_assignable {
+ template<class F>
+ static auto get(F*) -> decltype(std::declval<F &>() = std::declval<const F &>(), std::true_type{});
+ static std::false_type get(...);
+ static constexpr bool value = decltype(get((T*)nullptr))::value;
+};
+
+template <typename T>
+struct is_move_assignable {
+ template<class F>
+ static auto get(F*) -> decltype(std::declval<F &>() = std::declval<F &&>(), std::true_type{});
+ static std::false_type get(...);
+ static constexpr bool value = decltype(get((T*)nullptr))::value;
+};
+
+
+// An implementation of `std::is_trivially_copyable` since STL version
+// is not equally supported by all compilers, especially GCC 4.9.
+// Uniform implementation of this trait is important for ABI compatibility
+// as it has an impact on SmallVector's ABI (among others).
+template <typename T>
+class is_trivially_copyable {
+
+ // copy constructors
+ static constexpr bool has_trivial_copy_constructor =
+ std::is_copy_constructible<detail::trivial_helper<T>>::value;
+ static constexpr bool has_deleted_copy_constructor =
+ !std::is_copy_constructible<T>::value;
+
+ // move constructors
+ static constexpr bool has_trivial_move_constructor =
+ std::is_move_constructible<detail::trivial_helper<T>>::value;
+ static constexpr bool has_deleted_move_constructor =
+ !std::is_move_constructible<T>::value;
+
+ // copy assign
+ static constexpr bool has_trivial_copy_assign =
+ is_copy_assignable<detail::trivial_helper<T>>::value;
+ static constexpr bool has_deleted_copy_assign =
+ !is_copy_assignable<T>::value;
+
+ // move assign
+ static constexpr bool has_trivial_move_assign =
+ is_move_assignable<detail::trivial_helper<T>>::value;
+ static constexpr bool has_deleted_move_assign =
+ !is_move_assignable<T>::value;
+
+ // destructor
+ static constexpr bool has_trivial_destructor =
+ std::is_destructible<detail::trivial_helper<T>>::value;
+
+ public:
+
+ static constexpr bool value =
+ has_trivial_destructor &&
+ (has_deleted_move_assign || has_trivial_move_assign) &&
+ (has_deleted_move_constructor || has_trivial_move_constructor) &&
+ (has_deleted_copy_assign || has_trivial_copy_assign) &&
+ (has_deleted_copy_constructor || has_trivial_copy_constructor);
+
+#ifdef HAVE_STD_IS_TRIVIALLY_COPYABLE
+ static_assert(value == std::is_trivially_copyable<T>::value,
+ "inconsistent behavior between llvm:: and std:: implementation of is_trivially_copyable");
+#endif
+};
+template <typename T>
+class is_trivially_copyable<T*> : public std::true_type {
+};
+
+
} // end namespace llvm
// If the compiler supports detecting whether a class is final, define
diff --git a/include/llvm/TableGen/Error.h b/include/llvm/TableGen/Error.h
index de4d3bf54782..7c83b6298620 100644
--- a/include/llvm/TableGen/Error.h
+++ b/include/llvm/TableGen/Error.h
@@ -1,9 +1,8 @@
//===- llvm/TableGen/Error.h - tblgen error handling helpers ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/TableGen/Main.h b/include/llvm/TableGen/Main.h
index 670572dc8103..e464cd4d4fb5 100644
--- a/include/llvm/TableGen/Main.h
+++ b/include/llvm/TableGen/Main.h
@@ -1,9 +1,8 @@
//===- llvm/TableGen/Main.h - tblgen entry point ----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/TableGen/Record.h b/include/llvm/TableGen/Record.h
index e022bc82b4e4..bf7f02208c28 100644
--- a/include/llvm/TableGen/Record.h
+++ b/include/llvm/TableGen/Record.h
@@ -1,9 +1,8 @@
//===- llvm/TableGen/Record.h - Classes for Table Records -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -316,6 +315,7 @@ protected:
IK_TernOpInit,
IK_UnOpInit,
IK_LastOpInit,
+ IK_CondOpInit,
IK_FoldOpInit,
IK_IsAOpInit,
IK_StringInit,
@@ -623,10 +623,11 @@ public:
class CodeInit : public TypedInit {
StringRef Value;
+ SMLoc Loc;
- explicit CodeInit(StringRef V)
+ explicit CodeInit(StringRef V, const SMLoc &Loc)
: TypedInit(IK_CodeInit, static_cast<RecTy *>(CodeRecTy::get())),
- Value(V) {}
+ Value(V), Loc(Loc) {}
public:
CodeInit(const StringInit &) = delete;
@@ -636,9 +637,10 @@ public:
return I->getKind() == IK_CodeInit;
}
- static CodeInit *get(StringRef);
+ static CodeInit *get(StringRef, const SMLoc &Loc);
StringRef getValue() const { return Value; }
+ const SMLoc &getLoc() const { return Loc; }
Init *convertInitializerTo(RecTy *Ty) const override;
@@ -798,8 +800,9 @@ public:
/// !op (X, Y) - Combine two inits.
class BinOpInit : public OpInit, public FoldingSetNode {
public:
- enum BinaryOp : uint8_t { ADD, AND, OR, SHL, SRA, SRL, LISTCONCAT,
- STRCONCAT, CONCAT, EQ, NE, LE, LT, GE, GT };
+ enum BinaryOp : uint8_t { ADD, MUL, AND, OR, SHL, SRA, SRL, LISTCONCAT,
+ LISTSPLAT, STRCONCAT, CONCAT, EQ, NE, LE, LT, GE,
+ GT };
private:
Init *LHS, *RHS;
@@ -818,6 +821,8 @@ public:
static BinOpInit *get(BinaryOp opc, Init *lhs, Init *rhs,
RecTy *Type);
static Init *getStrConcat(Init *lhs, Init *rhs);
+ static Init *getListConcat(TypedInit *lhs, Init *rhs);
+ static Init *getListSplat(TypedInit *lhs, Init *rhs);
void Profile(FoldingSetNodeID &ID) const;
@@ -912,6 +917,83 @@ public:
std::string getAsString() const override;
};
+/// !cond(condition_1: value1, ... , condition_n: value)
+/// Selects the first value for which condition is true.
+/// Otherwise reports an error.
+class CondOpInit final : public TypedInit, public FoldingSetNode,
+ public TrailingObjects<CondOpInit, Init *> {
+ unsigned NumConds;
+ RecTy *ValType;
+
+ CondOpInit(unsigned NC, RecTy *Type)
+ : TypedInit(IK_CondOpInit, Type),
+ NumConds(NC), ValType(Type) {}
+
+ size_t numTrailingObjects(OverloadToken<Init *>) const {
+ return 2*NumConds;
+ }
+
+public:
+ CondOpInit(const CondOpInit &) = delete;
+ CondOpInit &operator=(const CondOpInit &) = delete;
+
+ static bool classof(const Init *I) {
+ return I->getKind() == IK_CondOpInit;
+ }
+
+ static CondOpInit *get(ArrayRef<Init*> C, ArrayRef<Init*> V,
+ RecTy *Type);
+
+ void Profile(FoldingSetNodeID &ID) const;
+
+ RecTy *getValType() const { return ValType; }
+
+ unsigned getNumConds() const { return NumConds; }
+
+ Init *getCond(unsigned Num) const {
+ assert(Num < NumConds && "Condition number out of range!");
+ return getTrailingObjects<Init *>()[Num];
+ }
+
+ Init *getVal(unsigned Num) const {
+ assert(Num < NumConds && "Val number out of range!");
+ return getTrailingObjects<Init *>()[Num+NumConds];
+ }
+
+ ArrayRef<Init *> getConds() const {
+ return makeArrayRef(getTrailingObjects<Init *>(), NumConds);
+ }
+
+ ArrayRef<Init *> getVals() const {
+ return makeArrayRef(getTrailingObjects<Init *>()+NumConds, NumConds);
+ }
+
+ Init *Fold(Record *CurRec) const;
+
+ Init *resolveReferences(Resolver &R) const override;
+
+ bool isConcrete() const override;
+ bool isComplete() const override;
+ std::string getAsString() const override;
+
+ using const_case_iterator = SmallVectorImpl<Init*>::const_iterator;
+ using const_val_iterator = SmallVectorImpl<Init*>::const_iterator;
+
+ inline const_case_iterator arg_begin() const { return getConds().begin(); }
+ inline const_case_iterator arg_end () const { return getConds().end(); }
+
+ inline size_t case_size () const { return NumConds; }
+ inline bool case_empty() const { return NumConds == 0; }
+
+ inline const_val_iterator name_begin() const { return getVals().begin();}
+ inline const_val_iterator name_end () const { return getVals().end(); }
+
+ inline size_t val_size () const { return NumConds; }
+ inline bool val_empty() const { return NumConds == 0; }
+
+ Init *getBit(unsigned Bit) const override;
+};
+
/// !foldl (a, b, expr, start, lst) - Fold over a list.
class FoldOpInit : public TypedInit, public FoldingSetNode {
private:
diff --git a/include/llvm/TableGen/SearchableTable.td b/include/llvm/TableGen/SearchableTable.td
index 1089d363eb6f..2680c71218ea 100644
--- a/include/llvm/TableGen/SearchableTable.td
+++ b/include/llvm/TableGen/SearchableTable.td
@@ -1,9 +1,8 @@
//===- SearchableTable.td ----------------------------------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/TableGen/SetTheory.h b/include/llvm/TableGen/SetTheory.h
index 4b32f9e3da8f..35156424b0d3 100644
--- a/include/llvm/TableGen/SetTheory.h
+++ b/include/llvm/TableGen/SetTheory.h
@@ -1,9 +1,8 @@
//===- SetTheory.h - Generate ordered sets from DAG expressions -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/TableGen/StringMatcher.h b/include/llvm/TableGen/StringMatcher.h
index 3aa3540d616d..795b7a6d41dc 100644
--- a/include/llvm/TableGen/StringMatcher.h
+++ b/include/llvm/TableGen/StringMatcher.h
@@ -1,9 +1,8 @@
//===- StringMatcher.h - Generate a matcher for input strings ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/TableGen/StringToOffsetTable.h b/include/llvm/TableGen/StringToOffsetTable.h
index 4b11e889ea6c..76ce51893907 100644
--- a/include/llvm/TableGen/StringToOffsetTable.h
+++ b/include/llvm/TableGen/StringToOffsetTable.h
@@ -1,9 +1,8 @@
//===- StringToOffsetTable.h - Emit a big concatenated string ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/TableGen/TableGenBackend.h b/include/llvm/TableGen/TableGenBackend.h
index d226f1f1af7b..a426e4217578 100644
--- a/include/llvm/TableGen/TableGenBackend.h
+++ b/include/llvm/TableGen/TableGenBackend.h
@@ -1,9 +1,8 @@
//===- llvm/TableGen/TableGenBackend.h - Backend utilities ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -23,6 +22,8 @@ class raw_ostream;
/// raw_ostream.
void emitSourceFileHeader(StringRef Desc, raw_ostream &OS);
+extern bool TimeRegions;
+
} // End llvm namespace
#endif
diff --git a/include/llvm/Target/CodeGenCWrappers.h b/include/llvm/Target/CodeGenCWrappers.h
index 3ad77c5d5e00..a99546357053 100644
--- a/include/llvm/Target/CodeGenCWrappers.h
+++ b/include/llvm/Target/CodeGenCWrappers.h
@@ -1,9 +1,8 @@
//===- llvm/Target/CodeGenCWrappers.h - CodeGen C Wrappers ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Target/GenericOpcodes.td b/include/llvm/Target/GenericOpcodes.td
index 045fe2520047..45718327b4a7 100644
--- a/include/llvm/Target/GenericOpcodes.td
+++ b/include/llvm/Target/GenericOpcodes.td
@@ -1,9 +1,8 @@
//===-- GenericOpcodes.td - Opcodes used with GlobalISel ---*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -93,12 +92,14 @@ def G_BITCAST : GenericInstruction {
let hasSideEffects = 0;
}
+// Only supports scalar result types
def G_CONSTANT : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins unknown:$imm);
let hasSideEffects = 0;
}
+// Only supports scalar result types
def G_FCONSTANT : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins unknown:$imm);
@@ -122,31 +123,31 @@ def G_VAARG : GenericInstruction {
def G_CTLZ : GenericInstruction {
let OutOperandList = (outs type0:$dst);
- let InOperandList = (ins type0:$src);
+ let InOperandList = (ins type1:$src);
let hasSideEffects = 0;
}
def G_CTLZ_ZERO_UNDEF : GenericInstruction {
let OutOperandList = (outs type0:$dst);
- let InOperandList = (ins type0:$src);
+ let InOperandList = (ins type1:$src);
let hasSideEffects = 0;
}
def G_CTTZ : GenericInstruction {
let OutOperandList = (outs type0:$dst);
- let InOperandList = (ins type0:$src);
+ let InOperandList = (ins type1:$src);
let hasSideEffects = 0;
}
def G_CTTZ_ZERO_UNDEF : GenericInstruction {
let OutOperandList = (outs type0:$dst);
- let InOperandList = (ins type0:$src);
+ let InOperandList = (ins type1:$src);
let hasSideEffects = 0;
}
def G_CTPOP : GenericInstruction {
let OutOperandList = (outs type0:$dst);
- let InOperandList = (ins type0:$src);
+ let InOperandList = (ins type1:$src);
let hasSideEffects = 0;
}
@@ -168,6 +169,12 @@ def G_BLOCK_ADDR : GenericInstruction {
let hasSideEffects = 0;
}
+def G_JUMP_TABLE : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins unknown:$jti);
+ let hasSideEffects = 0;
+}
+
//------------------------------------------------------------------------------
// Binary ops.
//------------------------------------------------------------------------------
@@ -255,21 +262,21 @@ def G_XOR : GenericInstruction {
// Generic left-shift.
def G_SHL : GenericInstruction {
let OutOperandList = (outs type0:$dst);
- let InOperandList = (ins type0:$src1, type0:$src2);
+ let InOperandList = (ins type0:$src1, type1:$src2);
let hasSideEffects = 0;
}
// Generic logical right-shift.
def G_LSHR : GenericInstruction {
let OutOperandList = (outs type0:$dst);
- let InOperandList = (ins type0:$src1, type0:$src2);
+ let InOperandList = (ins type0:$src1, type1:$src2);
let hasSideEffects = 0;
}
// Generic arithmetic right-shift.
def G_ASHR : GenericInstruction {
let OutOperandList = (outs type0:$dst);
- let InOperandList = (ins type0:$src1, type0:$src2);
+ let InOperandList = (ins type0:$src1, type1:$src2);
let hasSideEffects = 0;
}
@@ -307,6 +314,38 @@ def G_PTR_MASK : GenericInstruction {
let hasSideEffects = 0;
}
+// Generic signed integer minimum.
+def G_SMIN : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1, type0:$src2);
+ let hasSideEffects = 0;
+ let isCommutable = 1;
+}
+
+// Generic signed integer maximum.
+def G_SMAX : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1, type0:$src2);
+ let hasSideEffects = 0;
+ let isCommutable = 1;
+}
+
+// Generic unsigned integer minimum.
+def G_UMIN : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1, type0:$src2);
+ let hasSideEffects = 0;
+ let isCommutable = 1;
+}
+
+// Generic unsigned integer maximum.
+def G_UMAX : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1, type0:$src2);
+ let hasSideEffects = 0;
+ let isCommutable = 1;
+}
+
//------------------------------------------------------------------------------
// Overflow ops
//------------------------------------------------------------------------------
@@ -454,6 +493,74 @@ def G_FABS : GenericInstruction {
let hasSideEffects = 0;
}
+def G_FCOPYSIGN : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src0, type1:$src1);
+ let hasSideEffects = 0;
+}
+
+def G_FCANONICALIZE : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src);
+ let hasSideEffects = 0;
+}
+
+// FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two
+// values.
+//
+// In the case where a single input is a NaN (either signaling or quiet),
+// the non-NaN input is returned.
+//
+// The return value of (FMINNUM 0.0, -0.0) could be either 0.0 or -0.0.
+def G_FMINNUM : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1, type0:$src2);
+ let hasSideEffects = 0;
+ let isCommutable = 1;
+}
+
+def G_FMAXNUM : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1, type0:$src2);
+ let hasSideEffects = 0;
+ let isCommutable = 1;
+}
+
+// FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimum or maximum on
+// two values, following the IEEE-754 2008 definition. This differs from
+// FMINNUM/FMAXNUM in the handling of signaling NaNs. If one input is a
+// signaling NaN, returns a quiet NaN.
+def G_FMINNUM_IEEE : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1, type0:$src2);
+ let hasSideEffects = 0;
+ let isCommutable = 1;
+}
+
+def G_FMAXNUM_IEEE : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1, type0:$src2);
+ let hasSideEffects = 0;
+ let isCommutable = 1;
+}
+
+// FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0
+// as less than 0.0. While FMINNUM_IEEE/FMAXNUM_IEEE follow IEEE 754-2008
+// semantics, FMINIMUM/FMAXIMUM follow IEEE 754-2018 draft semantics.
+def G_FMINIMUM : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1, type0:$src2);
+ let hasSideEffects = 0;
+ let isCommutable = 1;
+}
+
+def G_FMAXIMUM : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1, type0:$src2);
+ let hasSideEffects = 0;
+ let isCommutable = 1;
+}
+
//------------------------------------------------------------------------------
// Floating Point Binary ops.
//------------------------------------------------------------------------------
@@ -554,6 +661,51 @@ def G_FCEIL : GenericInstruction {
let hasSideEffects = 0;
}
+// Floating point cosine of a value.
+def G_FCOS : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1);
+ let hasSideEffects = 0;
+}
+
+// Floating point sine of a value.
+def G_FSIN : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1);
+ let hasSideEffects = 0;
+}
+
+// Floating point square root of a value.
+// This returns NaN for negative nonzero values.
+// NOTE: Unlike libm sqrt(), this never sets errno. In all other respects it's
+// libm-conformant.
+def G_FSQRT : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1);
+ let hasSideEffects = 0;
+}
+
+// Floating point floor of a value.
+def G_FFLOOR : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1);
+ let hasSideEffects = 0;
+}
+
+// Floating point round to next integer.
+def G_FRINT : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1);
+ let hasSideEffects = 0;
+}
+
+// Floating point round to the nearest integer.
+def G_FNEARBYINT : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1);
+ let hasSideEffects = 0;
+}
+
//------------------------------------------------------------------------------
// Opcodes for LLVM Intrinsics
//------------------------------------------------------------------------------
@@ -647,6 +799,12 @@ def G_ATOMICRMW_MIN : G_ATOMICRMW_OP;
def G_ATOMICRMW_UMAX : G_ATOMICRMW_OP;
def G_ATOMICRMW_UMIN : G_ATOMICRMW_OP;
+def G_FENCE : GenericInstruction {
+ let OutOperandList = (outs);
+ let InOperandList = (ins i32imm:$ordering, i32imm:$scope);
+ let hasSideEffects = 1;
+}
+
//------------------------------------------------------------------------------
// Variadic ops
//------------------------------------------------------------------------------
@@ -689,7 +847,9 @@ def G_MERGE_VALUES : GenericInstruction {
let hasSideEffects = 0;
}
-/// Create a vector from multiple scalar registers.
+/// Create a vector from multiple scalar registers. No implicit
+/// conversion is performed (i.e. the result element type must be the
+/// same as all source operands)
def G_BUILD_VECTOR : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type1:$src0, variable_ops);
@@ -759,6 +919,15 @@ def G_BRINDIRECT : GenericInstruction {
let isTerminator = 1;
}
+// Generic branch to jump table entry
+def G_BRJT : GenericInstruction {
+ let OutOperandList = (outs);
+ let InOperandList = (ins ptype0:$tbl, unknown:$jti, type1:$idx);
+ let hasSideEffects = 0;
+ let isBranch = 1;
+ let isTerminator = 1;
+}
+
//------------------------------------------------------------------------------
// Vector ops
//------------------------------------------------------------------------------
diff --git a/include/llvm/Target/GlobalISel/RegisterBank.td b/include/llvm/Target/GlobalISel/RegisterBank.td
index 4dfd139e9fb6..51578b66b160 100644
--- a/include/llvm/Target/GlobalISel/RegisterBank.td
+++ b/include/llvm/Target/GlobalISel/RegisterBank.td
@@ -1,9 +1,8 @@
//===- RegisterBank.td - Register bank definitions ---------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
index 31d26361260d..6cc58d6521da 100644
--- a/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
+++ b/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
@@ -1,9 +1,8 @@
//===- TargetGlobalISel.td - Common code for GlobalISel ----*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -50,6 +49,8 @@ def : GINodeEquiv<G_FCONSTANT, fpimm>;
def : GINodeEquiv<G_ADD, add>;
def : GINodeEquiv<G_SUB, sub>;
def : GINodeEquiv<G_MUL, mul>;
+def : GINodeEquiv<G_UMULH, mulhu>;
+def : GINodeEquiv<G_SMULH, mulhs>;
def : GINodeEquiv<G_SDIV, sdiv>;
def : GINodeEquiv<G_UDIV, udiv>;
def : GINodeEquiv<G_SREM, srem>;
@@ -77,6 +78,7 @@ def : GINodeEquiv<G_FREM, frem>;
def : GINodeEquiv<G_FPOW, fpow>;
def : GINodeEquiv<G_FEXP2, fexp2>;
def : GINodeEquiv<G_FLOG2, flog2>;
+def : GINodeEquiv<G_FCANONICALIZE, fcanonicalize>;
def : GINodeEquiv<G_INTRINSIC, intrinsic_wo_chain>;
// ISD::INTRINSIC_VOID can also be handled with G_INTRINSIC_W_SIDE_EFFECTS.
def : GINodeEquiv<G_INTRINSIC_W_SIDE_EFFECTS, intrinsic_void>;
@@ -89,7 +91,19 @@ def : GINodeEquiv<G_CTLZ_ZERO_UNDEF, ctlz_zero_undef>;
def : GINodeEquiv<G_CTTZ_ZERO_UNDEF, cttz_zero_undef>;
def : GINodeEquiv<G_CTPOP, ctpop>;
def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, vector_extract>;
+def : GINodeEquiv<G_CONCAT_VECTORS, concat_vectors>;
def : GINodeEquiv<G_FCEIL, fceil>;
+def : GINodeEquiv<G_FCOS, fcos>;
+def : GINodeEquiv<G_FSIN, fsin>;
+def : GINodeEquiv<G_FABS, fabs>;
+def : GINodeEquiv<G_FSQRT, fsqrt>;
+def : GINodeEquiv<G_FFLOOR, ffloor>;
+def : GINodeEquiv<G_FRINT, frint>;
+def : GINodeEquiv<G_FNEARBYINT, fnearbyint>;
+def : GINodeEquiv<G_SMIN, smin>;
+def : GINodeEquiv<G_SMAX, smax>;
+def : GINodeEquiv<G_UMIN, umin>;
+def : GINodeEquiv<G_UMAX, umax>;
// Broadly speaking G_LOAD is equivalent to ISD::LOAD but there are some
// complications that tablegen must take care of. For example, Predicates such
@@ -124,6 +138,7 @@ def : GINodeEquiv<G_ATOMICRMW_MIN, atomic_load_min>;
def : GINodeEquiv<G_ATOMICRMW_MAX, atomic_load_max>;
def : GINodeEquiv<G_ATOMICRMW_UMIN, atomic_load_umin>;
def : GINodeEquiv<G_ATOMICRMW_UMAX, atomic_load_umax>;
+def : GINodeEquiv<G_FENCE, atomic_fence>;
// Specifies the GlobalISel equivalents for SelectionDAG's ComplexPattern.
// Should be used on defs that subclass GIComplexOperandMatcher<>.
diff --git a/include/llvm/Target/GlobalISel/Target.td b/include/llvm/Target/GlobalISel/Target.td
index 6740f404a9d3..538ca65e1162 100644
--- a/include/llvm/Target/GlobalISel/Target.td
+++ b/include/llvm/Target/GlobalISel/Target.td
@@ -1,9 +1,8 @@
//===- Target.td - Define GlobalISel rules -----------------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Target/Target.td b/include/llvm/Target/Target.td
index e4b827babb92..d58662e128e0 100644
--- a/include/llvm/Target/Target.td
+++ b/include/llvm/Target/Target.td
@@ -1,9 +1,8 @@
//===- Target.td - Target Independent TableGen interface ---*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -122,6 +121,10 @@ class ComposedSubRegIndex<SubRegIndex A, SubRegIndex B>
// this register class when printing.
class RegAltNameIndex {
string Namespace = "";
+
+ // A set to be used if the name for a register is not defined in this set.
+ // This allows creating name sets with only a few alternative names.
+ RegAltNameIndex FallbackRegAltNameIndex = ?;
}
def NoRegAltName : RegAltNameIndex;
@@ -395,11 +398,49 @@ include "llvm/Target/TargetSchedule.td"
class Predicate; // Forward def
+class InstructionEncoding {
+ // Size of encoded instruction.
+ int Size;
+
+ // The "namespace" in which this instruction exists, on targets like ARM
+ // which multiple ISA namespaces exist.
+ string DecoderNamespace = "";
+
+ // List of predicates which will be turned into isel matching code.
+ list<Predicate> Predicates = [];
+
+ string DecoderMethod = "";
+
+ // Is the instruction decoder method able to completely determine if the
+ // given instruction is valid or not. If the TableGen definition of the
+ // instruction specifies bitpattern A??B where A and B are static bits, the
+ // hasCompleteDecoder flag says whether the decoder method fully handles the
+ // ?? space, i.e. if it is a final arbiter for the instruction validity.
+ // If not then the decoder attempts to continue decoding when the decoder
+ // method fails.
+ //
+ // This allows to handle situations where the encoding is not fully
+ // orthogonal. Example:
+ // * InstA with bitpattern 0b0000????,
+ // * InstB with bitpattern 0b000000?? but the associated decoder method
+ // DecodeInstB() returns Fail when ?? is 0b00 or 0b11.
+ //
+ // The decoder tries to decode a bitpattern that matches both InstA and
+ // InstB bitpatterns first as InstB (because it is the most specific
+ // encoding). In the default case (hasCompleteDecoder = 1), when
+ // DecodeInstB() returns Fail the bitpattern gets rejected. By setting
+ // hasCompleteDecoder = 0 in InstB, the decoder is informed that
+ // DecodeInstB() is not able to determine if all possible values of ?? are
+ // valid or not. If DecodeInstB() returns Fail the decoder will attempt to
+ // decode the bitpattern as InstA too.
+ bit hasCompleteDecoder = 1;
+}
+
//===----------------------------------------------------------------------===//
// Instruction set description - These classes correspond to the C++ classes in
// the Target/TargetInstrInfo.h file.
//
-class Instruction {
+class Instruction : InstructionEncoding {
string Namespace = "";
dag OutOperandList; // An dag containing the MI def operand list.
@@ -424,10 +465,6 @@ class Instruction {
// from the opcode.
int Size = 0;
- // DecoderNamespace - The "namespace" in which this instruction exists, on
- // targets like ARM which multiple ISA namespaces exist.
- string DecoderNamespace = "";
-
// Code size, for instruction selection.
// FIXME: What does this actually mean?
int CodeSize = 0;
@@ -453,11 +490,16 @@ class Instruction {
bit canFoldAsLoad = 0; // Can this be folded as a simple memory operand?
bit mayLoad = ?; // Is it possible for this inst to read memory?
bit mayStore = ?; // Is it possible for this inst to write memory?
+ bit mayRaiseFPException = 0; // Can this raise a floating-point exception?
bit isConvertibleToThreeAddress = 0; // Can this 2-addr instruction promote?
bit isCommutable = 0; // Is this 3 operand instruction commutable?
bit isTerminator = 0; // Is this part of the terminator for a basic block?
bit isReMaterializable = 0; // Is this instruction re-materializable?
- bit isPredicable = 0; // Is this instruction predicable?
+ bit isPredicable = 0; // 1 means this instruction is predicable
+ // even if it does not have any operand
+ // tablegen can identify as a predicate
+ bit isUnpredicable = 0; // 1 means this instruction is not predicable
+ // even if it _does_ have a predicate operand
bit hasDelaySlot = 0; // Does this instruction have an delay slot?
bit usesCustomInserter = 0; // Pseudo instr needing special help.
bit hasPostISelHook = 0; // To be *adjusted* after isel by target hook.
@@ -524,31 +566,6 @@ class Instruction {
string DisableEncoding = "";
string PostEncoderMethod = "";
- string DecoderMethod = "";
-
- // Is the instruction decoder method able to completely determine if the
- // given instruction is valid or not. If the TableGen definition of the
- // instruction specifies bitpattern A??B where A and B are static bits, the
- // hasCompleteDecoder flag says whether the decoder method fully handles the
- // ?? space, i.e. if it is a final arbiter for the instruction validity.
- // If not then the decoder attempts to continue decoding when the decoder
- // method fails.
- //
- // This allows to handle situations where the encoding is not fully
- // orthogonal. Example:
- // * InstA with bitpattern 0b0000????,
- // * InstB with bitpattern 0b000000?? but the associated decoder method
- // DecodeInstB() returns Fail when ?? is 0b00 or 0b11.
- //
- // The decoder tries to decode a bitpattern that matches both InstA and
- // InstB bitpatterns first as InstB (because it is the most specific
- // encoding). In the default case (hasCompleteDecoder = 1), when
- // DecodeInstB() returns Fail the bitpattern gets rejected. By setting
- // hasCompleteDecoder = 0 in InstB, the decoder is informed that
- // DecodeInstB() is not able to determine if all possible values of ?? are
- // valid or not. If DecodeInstB() returns Fail the decoder will attempt to
- // decode the bitpattern as InstA too.
- bit hasCompleteDecoder = 1;
/// Target-specific flags. This becomes the TSFlags field in TargetInstrDesc.
bits<64> TSFlags = 0;
@@ -585,6 +602,13 @@ class Instruction {
bit FastISelShouldIgnore = 0;
}
+/// Defines an additional encoding that disassembles to the given instruction
+/// Like Instruction, the Inst and SoftFail fields are omitted to allow targets
+// to specify their size.
+class AdditionalEncoding<Instruction I> : InstructionEncoding {
+ Instruction AliasOf = I;
+}
+
/// PseudoInstExpansion - Expansion information for a pseudo-instruction.
/// Which instruction it expands to and how the operands map from the
/// pseudo.
@@ -909,7 +933,7 @@ class InstrInfo {
}
// Standard Pseudo Instructions.
-// This list must match TargetOpcodes.h and CodeGenTarget.cpp.
+// This list must match TargetOpcodes.def.
// Only these instructions are allowed in the TargetOpcode namespace.
// Ensure mayLoad and mayStore have a default value, so as not to break
// targets that set guessInstructionProperties=0. Any local definition of
@@ -934,6 +958,15 @@ def INLINEASM : StandardPseudoInstruction {
let AsmString = "";
let hasSideEffects = 0; // Note side effect is encoded in an operand.
}
+def INLINEASM_BR : StandardPseudoInstruction {
+ let OutOperandList = (outs);
+ let InOperandList = (ins variable_ops);
+ let AsmString = "";
+ let hasSideEffects = 0; // Note side effect is encoded in an operand.
+ let isTerminator = 1;
+ let isBranch = 1;
+ let isIndirectBranch = 1;
+}
def CFI_INSTRUCTION : StandardPseudoInstruction {
let OutOperandList = (outs);
let InOperandList = (ins i32imm:$id);
@@ -1037,7 +1070,7 @@ def BUNDLE : StandardPseudoInstruction {
let OutOperandList = (outs);
let InOperandList = (ins variable_ops);
let AsmString = "BUNDLE";
- let hasSideEffects = 1;
+ let hasSideEffects = 0;
}
def LIFETIME_START : StandardPseudoInstruction {
let OutOperandList = (outs);
@@ -1174,7 +1207,7 @@ def FENTRY_CALL : StandardPseudoInstruction {
let hasSideEffects = 1;
}
def ICALL_BRANCH_FUNNEL : StandardPseudoInstruction {
- let OutOperandList = (outs unknown:$dst);
+ let OutOperandList = (outs);
let InOperandList = (ins variable_ops);
let AsmString = "";
let hasSideEffects = 1;
diff --git a/include/llvm/Target/TargetCallingConv.td b/include/llvm/Target/TargetCallingConv.td
index 95d2b4226294..1bc03cf8a49d 100644
--- a/include/llvm/Target/TargetCallingConv.td
+++ b/include/llvm/Target/TargetCallingConv.td
@@ -1,9 +1,8 @@
//===- TargetCallingConv.td - Target Calling Conventions ---*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -83,6 +82,15 @@ class CCIfVarArg<CCAction A> : CCIf<"State.isVarArg()", A> {}
/// CCIfNotVarArg - If the current function is not vararg - apply the action
class CCIfNotVarArg<CCAction A> : CCIf<"!State.isVarArg()", A> {}
+/// CCIfPtrAddrSpace - If the top-level parent of the current argument has
+/// pointer type in the specified address-space.
+class CCIfPtrAddrSpace<int AS, CCAction A>
+ : CCIf<"(ArgFlags.isPointer() && ArgFlags.getPointerAddrSpace() == " # AS # ")", A> {}
+
+/// CCIfPtr - If the top-level parent of the current argument had
+/// pointer type in some address-space.
+class CCIfPtr<CCAction A> : CCIf<"ArgFlags.isPointer()", A> {}
+
/// CCAssignToReg - This action matches if there is a register in the specified
/// list that is still available. If so, it assigns the value to the first
/// available register and succeeds.
@@ -160,6 +168,11 @@ class CCDelegateTo<CallingConv cc> : CCAction {
/// that the target supports.
class CallingConv<list<CCAction> actions> {
list<CCAction> Actions = actions;
+
+ /// If true, this calling convention will be emitted as externally visible in
+ /// the llvm namespaces instead of as a static function.
+ bit Entry = 0;
+
bit Custom = 0;
}
diff --git a/include/llvm/Target/TargetInstrPredicate.td b/include/llvm/Target/TargetInstrPredicate.td
index 4b2c57b34c2e..5623461c648d 100644
--- a/include/llvm/Target/TargetInstrPredicate.td
+++ b/include/llvm/Target/TargetInstrPredicate.td
@@ -1,9 +1,8 @@
//===- TargetInstrPredicate.td - ---------------------------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Target/TargetIntrinsicInfo.h b/include/llvm/Target/TargetIntrinsicInfo.h
index 6a92bdee747e..ef571b15153e 100644
--- a/include/llvm/Target/TargetIntrinsicInfo.h
+++ b/include/llvm/Target/TargetIntrinsicInfo.h
@@ -1,9 +1,8 @@
//===-- llvm/Target/TargetIntrinsicInfo.h - Instruction Info ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Target/TargetItinerary.td b/include/llvm/Target/TargetItinerary.td
index 182054d8444e..b68ed045520c 100644
--- a/include/llvm/Target/TargetItinerary.td
+++ b/include/llvm/Target/TargetItinerary.td
@@ -1,9 +1,8 @@
//===- TargetItinerary.td - Target Itinierary Description --*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Target/TargetLoweringObjectFile.h b/include/llvm/Target/TargetLoweringObjectFile.h
index e80f2bf82f26..3a2497bff11e 100644
--- a/include/llvm/Target/TargetLoweringObjectFile.h
+++ b/include/llvm/Target/TargetLoweringObjectFile.h
@@ -1,9 +1,8 @@
//===-- llvm/Target/TargetLoweringObjectFile.h - Object Info ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -52,6 +51,7 @@ protected:
unsigned PersonalityEncoding = 0;
unsigned LSDAEncoding = 0;
unsigned TTypeEncoding = 0;
+ unsigned CallSiteEncoding = 0;
/// This section contains the static constructor pointer list.
MCSection *StaticCtorSection = nullptr;
@@ -80,6 +80,9 @@ public:
/// Emit the module-level metadata that the platform cares about.
virtual void emitModuleMetadata(MCStreamer &Streamer, Module &M) const {}
+ /// Get the module-level metadata that the platform cares about.
+ virtual void getModuleMetadata(Module &M) {}
+
/// Given a constant with the SectionKind, return a section that it should be
/// placed in.
virtual MCSection *getSectionForConstant(const DataLayout &DL,
@@ -145,6 +148,7 @@ public:
unsigned getPersonalityEncoding() const { return PersonalityEncoding; }
unsigned getLSDAEncoding() const { return LSDAEncoding; }
unsigned getTTypeEncoding() const { return TTypeEncoding; }
+ unsigned getCallSiteEncoding() const { return CallSiteEncoding; }
const MCExpr *getTTypeReference(const MCSymbolRefExpr *Sym, unsigned Encoding,
MCStreamer &Streamer) const;
diff --git a/include/llvm/Target/TargetMachine.h b/include/llvm/Target/TargetMachine.h
index 3eafcc25583a..cdf9f8bfd5ea 100644
--- a/include/llvm/Target/TargetMachine.h
+++ b/include/llvm/Target/TargetMachine.h
@@ -1,9 +1,8 @@
//===-- llvm/Target/TargetMachine.h - Target Information --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -36,6 +35,9 @@ class MCSubtargetInfo;
class MCSymbol;
class raw_pwrite_stream;
class PassManagerBuilder;
+struct PerFunctionMIParsingState;
+class SMDiagnostic;
+class SMRange;
class Target;
class TargetIntrinsicInfo;
class TargetIRAnalysis;
@@ -50,6 +52,10 @@ class PassManagerBase;
}
using legacy::PassManagerBase;
+namespace yaml {
+struct MachineFunctionInfo;
+}
+
//===----------------------------------------------------------------------===//
///
/// Primary interface to the complete machine description for the target
@@ -115,6 +121,27 @@ public:
return nullptr;
}
+ /// Allocate and return a default initialized instance of the YAML
+ /// representation for the MachineFunctionInfo.
+ virtual yaml::MachineFunctionInfo *createDefaultFuncInfoYAML() const {
+ return nullptr;
+ }
+
+ /// Allocate and initialize an instance of the YAML representation of the
+ /// MachineFunctionInfo.
+ virtual yaml::MachineFunctionInfo *
+ convertFuncInfoToYAML(const MachineFunction &MF) const {
+ return nullptr;
+ }
+
+ /// Parse out the target's MachineFunctionInfo from the YAML reprsentation.
+ virtual bool parseMachineFunctionInfo(const yaml::MachineFunctionInfo &,
+ PerFunctionMIParsingState &PFS,
+ SMDiagnostic &Error,
+ SMRange &SourceRange) const {
+ return false;
+ }
+
/// This method returns a pointer to the specified type of
/// TargetSubtargetInfo. In debug builds, it verifies that the object being
/// returned is of the correct type.
@@ -363,9 +390,9 @@ inline CodeModel::Model getEffectiveCodeModel(Optional<CodeModel::Model> CM,
if (CM) {
// By default, targets do not support the tiny and kernel models.
if (*CM == CodeModel::Tiny)
- report_fatal_error("Target does not support the tiny CodeModel");
+ report_fatal_error("Target does not support the tiny CodeModel", false);
if (*CM == CodeModel::Kernel)
- report_fatal_error("Target does not support the kernel CodeModel");
+ report_fatal_error("Target does not support the kernel CodeModel", false);
return *CM;
}
return Default;
diff --git a/include/llvm/Target/TargetOptions.h b/include/llvm/Target/TargetOptions.h
index b18101d92833..8cc2a6010879 100644
--- a/include/llvm/Target/TargetOptions.h
+++ b/include/llvm/Target/TargetOptions.h
@@ -1,9 +1,8 @@
//===-- llvm/Target/TargetOptions.h - Target Options ------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -119,7 +118,8 @@ namespace llvm {
NoTrapAfterNoreturn(false), EmulatedTLS(false),
ExplicitEmulatedTLS(false), EnableIPRA(false),
EmitStackSizeSection(false), EnableMachineOutliner(false),
- SupportsDefaultOutlining(false), EmitAddrsig(false) {}
+ SupportsDefaultOutlining(false), EmitAddrsig(false),
+ EnableDebugEntryValues(false) {}
/// PrintMachineCode - This flag is enabled when the -print-machineinstrs
/// option is specified on the command line, and should enable debugging
@@ -253,6 +253,9 @@ namespace llvm {
/// Emit address-significance table.
unsigned EmitAddrsig : 1;
+ /// Emit debug info about parameter's entry values.
+ unsigned EnableDebugEntryValues : 1;
+
/// FloatABIType - This setting is set by -float-abi=xxx option is specfied
/// on the command line. This setting may either be Default, Soft, or Hard.
/// Default selects the target's default behavior. Soft selects the ABI for
diff --git a/include/llvm/Target/TargetPfmCounters.td b/include/llvm/Target/TargetPfmCounters.td
index dac150f03445..e1d5013c1291 100644
--- a/include/llvm/Target/TargetPfmCounters.td
+++ b/include/llvm/Target/TargetPfmCounters.td
@@ -1,9 +1,8 @@
//===- TargetPfmCounters.td - Target Pfm Counters -*- tablegen ----------*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Target/TargetSchedule.td b/include/llvm/Target/TargetSchedule.td
index 808e183f5a5f..a36d259df831 100644
--- a/include/llvm/Target/TargetSchedule.td
+++ b/include/llvm/Target/TargetSchedule.td
@@ -1,9 +1,8 @@
//===- TargetSchedule.td - Target Independent Scheduling ---*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -136,7 +135,7 @@ class ProcResourceKind;
//
// An optional Super resource may be given to model these resources as
// a subset of the more general super resources. Using one of these
-// resources implies using one of the super resoruces.
+// resources implies using one of the super resources.
//
// ProcResourceUnits normally model a few buffered resources within an
// out-of-order engine. Buffered resources may be held for multiple
diff --git a/include/llvm/Target/TargetSelectionDAG.td b/include/llvm/Target/TargetSelectionDAG.td
index eb5a14bd21b8..b913a054ac2c 100644
--- a/include/llvm/Target/TargetSelectionDAG.td
+++ b/include/llvm/Target/TargetSelectionDAG.td
@@ -1,9 +1,8 @@
//===- TargetSelectionDAG.td - Common code for DAG isels ---*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -125,7 +124,7 @@ def SDTIntSatNoShOp : SDTypeProfile<1, 2, [ // ssat with no shift
def SDTIntBinHiLoOp : SDTypeProfile<2, 2, [ // mulhi, mullo, sdivrem, udivrem
SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>,SDTCisInt<0>
]>;
-def SDTIntScaledBinOp : SDTypeProfile<1, 3, [ // smulfix
+def SDTIntScaledBinOp : SDTypeProfile<1, 3, [ // smulfix, umulfix
SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3>
]>;
@@ -265,6 +264,11 @@ def SDTAtomic3 : SDTypeProfile<1, 3, [
def SDTAtomic2 : SDTypeProfile<1, 2, [
SDTCisSameAs<0,2>, SDTCisInt<0>, SDTCisPtrTy<1>
]>;
+
+def SDTFPAtomic2 : SDTypeProfile<1, 2, [
+ SDTCisSameAs<0,2>, SDTCisFP<0>, SDTCisPtrTy<1>
+]>;
+
def SDTAtomicStore : SDTypeProfile<0, 2, [
SDTCisPtrTy<0>, SDTCisInt<1>
]>;
@@ -385,7 +389,10 @@ def saddsat : SDNode<"ISD::SADDSAT" , SDTIntBinOp, [SDNPCommutative]>;
def uaddsat : SDNode<"ISD::UADDSAT" , SDTIntBinOp, [SDNPCommutative]>;
def ssubsat : SDNode<"ISD::SSUBSAT" , SDTIntBinOp>;
def usubsat : SDNode<"ISD::USUBSAT" , SDTIntBinOp>;
+
def smulfix : SDNode<"ISD::SMULFIX" , SDTIntScaledBinOp, [SDNPCommutative]>;
+def smulfixsat : SDNode<"ISD::SMULFIXSAT", SDTIntScaledBinOp, [SDNPCommutative]>;
+def umulfix : SDNode<"ISD::UMULFIX" , SDTIntScaledBinOp, [SDNPCommutative]>;
def sext_inreg : SDNode<"ISD::SIGN_EXTEND_INREG", SDTExtInreg>;
def sext_invec : SDNode<"ISD::SIGN_EXTEND_VECTOR_INREG", SDTExtInvec>;
@@ -444,6 +451,11 @@ def ffloor : SDNode<"ISD::FFLOOR" , SDTFPUnaryOp>;
def fnearbyint : SDNode<"ISD::FNEARBYINT" , SDTFPUnaryOp>;
def fround : SDNode<"ISD::FROUND" , SDTFPUnaryOp>;
+def lround : SDNode<"ISD::LROUND" , SDTFPToIntOp>;
+def llround : SDNode<"ISD::LLROUND" , SDTFPToIntOp>;
+def lrint : SDNode<"ISD::LRINT" , SDTFPToIntOp>;
+def llrint : SDNode<"ISD::LLRINT" , SDTFPToIntOp>;
+
def fpround : SDNode<"ISD::FP_ROUND" , SDTFPRoundOp>;
def fpextend : SDNode<"ISD::FP_EXTEND" , SDTFPExtendOp>;
def fcopysign : SDNode<"ISD::FCOPYSIGN" , SDTFPSignOp>;
@@ -455,6 +467,53 @@ def fp_to_uint : SDNode<"ISD::FP_TO_UINT" , SDTFPToIntOp>;
def f16_to_fp : SDNode<"ISD::FP16_TO_FP" , SDTIntToFPOp>;
def fp_to_f16 : SDNode<"ISD::FP_TO_FP16" , SDTFPToIntOp>;
+def strict_fadd : SDNode<"ISD::STRICT_FADD",
+ SDTFPBinOp, [SDNPHasChain, SDNPCommutative]>;
+def strict_fsub : SDNode<"ISD::STRICT_FSUB",
+ SDTFPBinOp, [SDNPHasChain]>;
+def strict_fmul : SDNode<"ISD::STRICT_FMUL",
+ SDTFPBinOp, [SDNPHasChain, SDNPCommutative]>;
+def strict_fdiv : SDNode<"ISD::STRICT_FDIV",
+ SDTFPBinOp, [SDNPHasChain]>;
+def strict_frem : SDNode<"ISD::STRICT_FREM",
+ SDTFPBinOp, [SDNPHasChain]>;
+def strict_fma : SDNode<"ISD::STRICT_FMA",
+ SDTFPTernaryOp, [SDNPHasChain]>;
+def strict_fsqrt : SDNode<"ISD::STRICT_FSQRT",
+ SDTFPUnaryOp, [SDNPHasChain]>;
+def strict_fsin : SDNode<"ISD::STRICT_FSIN",
+ SDTFPUnaryOp, [SDNPHasChain]>;
+def strict_fcos : SDNode<"ISD::STRICT_FCOS",
+ SDTFPUnaryOp, [SDNPHasChain]>;
+def strict_fexp2 : SDNode<"ISD::STRICT_FEXP2",
+ SDTFPUnaryOp, [SDNPHasChain]>;
+def strict_fpow : SDNode<"ISD::STRICT_FPOW",
+ SDTFPBinOp, [SDNPHasChain]>;
+def strict_flog2 : SDNode<"ISD::STRICT_FLOG2",
+ SDTFPUnaryOp, [SDNPHasChain]>;
+def strict_frint : SDNode<"ISD::STRICT_FRINT",
+ SDTFPUnaryOp, [SDNPHasChain]>;
+def strict_fnearbyint : SDNode<"ISD::STRICT_FNEARBYINT",
+ SDTFPUnaryOp, [SDNPHasChain]>;
+def strict_fceil : SDNode<"ISD::STRICT_FCEIL",
+ SDTFPUnaryOp, [SDNPHasChain]>;
+def strict_ffloor : SDNode<"ISD::STRICT_FFLOOR",
+ SDTFPUnaryOp, [SDNPHasChain]>;
+def strict_fround : SDNode<"ISD::STRICT_FROUND",
+ SDTFPUnaryOp, [SDNPHasChain]>;
+def strict_ftrunc : SDNode<"ISD::STRICT_FTRUNC",
+ SDTFPUnaryOp, [SDNPHasChain]>;
+def strict_fminnum : SDNode<"ISD::STRICT_FMINNUM",
+ SDTFPBinOp, [SDNPHasChain,
+ SDNPCommutative, SDNPAssociative]>;
+def strict_fmaxnum : SDNode<"ISD::STRICT_FMAXNUM",
+ SDTFPBinOp, [SDNPHasChain,
+ SDNPCommutative, SDNPAssociative]>;
+def strict_fpround : SDNode<"ISD::STRICT_FP_ROUND",
+ SDTFPRoundOp, [SDNPHasChain]>;
+def strict_fpextend : SDNode<"ISD::STRICT_FP_EXTEND",
+ SDTFPExtendOp, [SDNPHasChain]>;
+
def setcc : SDNode<"ISD::SETCC" , SDTSetCC>;
def select : SDNode<"ISD::SELECT" , SDTSelect>;
def vselect : SDNode<"ISD::VSELECT" , SDTVSelect>;
@@ -511,14 +570,19 @@ def atomic_load_umin : SDNode<"ISD::ATOMIC_LOAD_UMIN", SDTAtomic2,
[SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
def atomic_load_umax : SDNode<"ISD::ATOMIC_LOAD_UMAX", SDTAtomic2,
[SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
+def atomic_load_fadd : SDNode<"ISD::ATOMIC_LOAD_FADD" , SDTFPAtomic2,
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
+def atomic_load_fsub : SDNode<"ISD::ATOMIC_LOAD_FSUB" , SDTFPAtomic2,
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
+
def atomic_load : SDNode<"ISD::ATOMIC_LOAD", SDTAtomicLoad,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def atomic_store : SDNode<"ISD::ATOMIC_STORE", SDTAtomicStore,
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
-def masked_store : SDNode<"ISD::MSTORE", SDTMaskedStore,
+def masked_st : SDNode<"ISD::MSTORE", SDTMaskedStore,
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
-def masked_load : SDNode<"ISD::MLOAD", SDTMaskedLoad,
+def masked_ld : SDNode<"ISD::MLOAD", SDTMaskedLoad,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
// Do not use ld, st directly. Use load, extload, sextload, zextload, store,
@@ -673,6 +737,10 @@ class PatFrags<dag ops, list<dag> frags, code pred = [{}],
// cast<StoreSDNode>(N)->isTruncatingStore();
bit IsTruncStore = ?;
+ // cast<MemSDNode>(N)->getAddressSpace() ==
+ // If this empty, accept any address space.
+ list<int> AddressSpaces = ?;
+
// cast<AtomicSDNode>(N)->getOrdering() == AtomicOrdering::Monotonic
bit IsAtomicOrderingMonotonic = ?;
// cast<AtomicSDNode>(N)->getOrdering() == AtomicOrdering::Acquire
@@ -698,6 +766,8 @@ class PatFrags<dag ops, list<dag> frags, code pred = [{}],
// cast<LoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::<VT>;
// cast<StoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::<VT>;
ValueType ScalarMemoryVT = ?;
+
+ // TODO: Add alignment
}
// PatFrag - A version of PatFrags matching only a single fragment.
@@ -771,14 +841,11 @@ class FPImmLeaf<ValueType vt, code pred, SDNodeXForm xform = NOOP_SDNodeXForm>
def vtInt : PatLeaf<(vt), [{ return N->getVT().isInteger(); }]>;
def vtFP : PatLeaf<(vt), [{ return N->getVT().isFloatingPoint(); }]>;
-def immAllOnesV: PatLeaf<(build_vector), [{
- return ISD::isBuildVectorAllOnes(N);
-}]>;
-def immAllZerosV: PatLeaf<(build_vector), [{
- return ISD::isBuildVectorAllZeros(N);
-}]>;
-
-
+// Use ISD::isBuildVectorAllOnes or ISD::isBuildVectorAllZeros to look for
+// the corresponding build_vector. Will look through bitcasts except when used
+// as a pattern root.
+def immAllOnesV; // ISD::isBuildVectorAllOnes
+def immAllZerosV; // ISD::isBuildVectorAllZeros
// Other helper fragments.
def not : PatFrag<(ops node:$in), (xor node:$in, -1)>;
@@ -1163,6 +1230,87 @@ def setle : PatFrag<(ops node:$lhs, node:$rhs),
def setne : PatFrag<(ops node:$lhs, node:$rhs),
(setcc node:$lhs, node:$rhs, SETNE)>;
+// We don't have strict FP extended loads as single DAG nodes, but we can
+// still provide convenience fragments to match those operations.
+def strict_extloadf32 : PatFrag<(ops node:$ptr),
+ (strict_fpextend (f32 (load node:$ptr)))>;
+def strict_extloadf64 : PatFrag<(ops node:$ptr),
+ (strict_fpextend (f64 (load node:$ptr)))>;
+
+// Convenience fragments to match both strict and non-strict fp operations
+def any_fadd : PatFrags<(ops node:$lhs, node:$rhs),
+ [(strict_fadd node:$lhs, node:$rhs),
+ (fadd node:$lhs, node:$rhs)]>;
+def any_fsub : PatFrags<(ops node:$lhs, node:$rhs),
+ [(strict_fsub node:$lhs, node:$rhs),
+ (fsub node:$lhs, node:$rhs)]>;
+def any_fmul : PatFrags<(ops node:$lhs, node:$rhs),
+ [(strict_fmul node:$lhs, node:$rhs),
+ (fmul node:$lhs, node:$rhs)]>;
+def any_fdiv : PatFrags<(ops node:$lhs, node:$rhs),
+ [(strict_fdiv node:$lhs, node:$rhs),
+ (fdiv node:$lhs, node:$rhs)]>;
+def any_frem : PatFrags<(ops node:$lhs, node:$rhs),
+ [(strict_frem node:$lhs, node:$rhs),
+ (frem node:$lhs, node:$rhs)]>;
+def any_fma : PatFrags<(ops node:$src1, node:$src2, node:$src3),
+ [(strict_fma node:$src1, node:$src2, node:$src3),
+ (fma node:$src1, node:$src2, node:$src3)]>;
+def any_fsqrt : PatFrags<(ops node:$src),
+ [(strict_fsqrt node:$src),
+ (fsqrt node:$src)]>;
+def any_fsin : PatFrags<(ops node:$src),
+ [(strict_fsin node:$src),
+ (fsin node:$src)]>;
+def any_fcos : PatFrags<(ops node:$src),
+ [(strict_fcos node:$src),
+ (fcos node:$src)]>;
+def any_fexp2 : PatFrags<(ops node:$src),
+ [(strict_fexp2 node:$src),
+ (fexp2 node:$src)]>;
+def any_fpow : PatFrags<(ops node:$lhs, node:$rhs),
+ [(strict_fpow node:$lhs, node:$rhs),
+ (fpow node:$lhs, node:$rhs)]>;
+def any_flog2 : PatFrags<(ops node:$src),
+ [(strict_flog2 node:$src),
+ (flog2 node:$src)]>;
+def any_frint : PatFrags<(ops node:$src),
+ [(strict_frint node:$src),
+ (frint node:$src)]>;
+def any_fnearbyint : PatFrags<(ops node:$src),
+ [(strict_fnearbyint node:$src),
+ (fnearbyint node:$src)]>;
+def any_fceil : PatFrags<(ops node:$src),
+ [(strict_fceil node:$src),
+ (fceil node:$src)]>;
+def any_ffloor : PatFrags<(ops node:$src),
+ [(strict_ffloor node:$src),
+ (ffloor node:$src)]>;
+def any_fround : PatFrags<(ops node:$src),
+ [(strict_fround node:$src),
+ (fround node:$src)]>;
+def any_ftrunc : PatFrags<(ops node:$src),
+ [(strict_ftrunc node:$src),
+ (ftrunc node:$src)]>;
+def any_fmaxnum : PatFrags<(ops node:$lhs, node:$rhs),
+ [(strict_fmaxnum node:$lhs, node:$rhs),
+ (fmaxnum node:$lhs, node:$rhs)]>;
+def any_fminnum : PatFrags<(ops node:$lhs, node:$rhs),
+ [(strict_fminnum node:$lhs, node:$rhs),
+ (fminnum node:$lhs, node:$rhs)]>;
+def any_fpround : PatFrags<(ops node:$src),
+ [(strict_fpround node:$src),
+ (fpround node:$src)]>;
+def any_fpextend : PatFrags<(ops node:$src),
+ [(strict_fpextend node:$src),
+ (fpextend node:$src)]>;
+def any_extloadf32 : PatFrags<(ops node:$ptr),
+ [(strict_extloadf32 node:$ptr),
+ (extloadf32 node:$ptr)]>;
+def any_extloadf64 : PatFrags<(ops node:$ptr),
+ [(strict_extloadf64 node:$ptr),
+ (extloadf64 node:$ptr)]>;
+
multiclass binary_atomic_op_ord<SDNode atomic_op> {
def #NAME#_monotonic : PatFrag<(ops node:$ptr, node:$val),
(!cast<SDPatternOperator>(#NAME) node:$ptr, node:$val)> {
diff --git a/include/llvm/Testing/Support/Annotations.h b/include/llvm/Testing/Support/Annotations.h
new file mode 100644
index 000000000000..aad1a44f4ec9
--- /dev/null
+++ b/include/llvm/Testing/Support/Annotations.h
@@ -0,0 +1,90 @@
+//===--- Annotations.h - Annotated source code for tests ---------*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_TESTING_SUPPORT_ANNOTATIONS_H
+#define LLVM_TESTING_SUPPORT_ANNOTATIONS_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include <tuple>
+#include <vector>
+
+namespace llvm {
+
+/// Annotations lets you mark points and ranges inside source code, for tests:
+///
+/// Annotations Example(R"cpp(
+/// int complete() { x.pri^ } // ^ indicates a point
+/// void err() { [["hello" == 42]]; } // [[this is a range]]
+/// $definition^class Foo{}; // points can be named: "definition"
+/// $fail[[static_assert(false, "")]] // ranges can be named too: "fail"
+/// )cpp");
+///
+/// StringRef Code = Example.code(); // annotations stripped.
+/// std::vector<size_t> PP = Example.points(); // all unnamed points
+/// size_t P = Example.point(); // there must be exactly one
+/// llvm::Range R = Example.range("fail"); // find named ranges
+///
+/// Points/ranges are coordinated into `code()` which is stripped of
+/// annotations.
+///
+/// Ranges may be nested (and points can be inside ranges), but there's no way
+/// to define general overlapping ranges.
+///
+/// FIXME: the choice of the marking syntax makes it impossible to represent
+/// some of the C++ and Objective C constructs (including common ones
+/// like C++ attributes). We can fix this by:
+/// 1. introducing an escaping mechanism for the special characters,
+/// 2. making characters for marking points and ranges configurable,
+/// 3. changing the syntax to something less commonly used,
+/// 4. ...
+class Annotations {
+public:
+ /// Two offsets pointing to a continuous substring. End is not included, i.e.
+ /// represents a half-open range.
+ struct Range {
+ size_t Begin = 0;
+ size_t End = 0;
+
+ friend bool operator==(const Range &L, const Range &R) {
+ return std::tie(L.Begin, L.End) == std::tie(R.Begin, R.End);
+ }
+ friend bool operator!=(const Range &L, const Range &R) { return !(L == R); }
+ };
+
+ /// Parses the annotations from Text. Crashes if it's malformed.
+ Annotations(llvm::StringRef Text);
+
+ /// The input text with all annotations stripped.
+ /// All points and ranges are relative to this stripped text.
+ llvm::StringRef code() const { return Code; }
+
+ /// Returns the position of the point marked by ^ (or $name^) in the text.
+ /// Crashes if there isn't exactly one.
+ size_t point(llvm::StringRef Name = "") const;
+ /// Returns the position of all points marked by ^ (or $name^) in the text.
+ std::vector<size_t> points(llvm::StringRef Name = "") const;
+
+ /// Returns the location of the range marked by [[ ]] (or $name[[ ]]).
+ /// Crashes if there isn't exactly one.
+ Range range(llvm::StringRef Name = "") const;
+ /// Returns the location of all ranges marked by [[ ]] (or $name[[ ]]).
+ std::vector<Range> ranges(llvm::StringRef Name = "") const;
+
+private:
+ std::string Code;
+ llvm::StringMap<llvm::SmallVector<size_t, 1>> Points;
+ llvm::StringMap<llvm::SmallVector<Range, 1>> Ranges;
+};
+
+llvm::raw_ostream &operator<<(llvm::raw_ostream &O,
+ const llvm::Annotations::Range &R);
+
+} // namespace llvm
+
+#endif
diff --git a/include/llvm/Testing/Support/Error.h b/include/llvm/Testing/Support/Error.h
index 0e5b5403ce87..85328f26440b 100644
--- a/include/llvm/Testing/Support/Error.h
+++ b/include/llvm/Testing/Support/Error.h
@@ -1,9 +1,8 @@
//===- llvm/Testing/Support/Error.h ---------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/Testing/Support/SupportHelpers.h b/include/llvm/Testing/Support/SupportHelpers.h
index b2975ec395d5..38726b1cfaf7 100644
--- a/include/llvm/Testing/Support/SupportHelpers.h
+++ b/include/llvm/Testing/Support/SupportHelpers.h
@@ -1,18 +1,19 @@
//===- Testing/Support/SupportHelpers.h -----------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_TESTING_SUPPORT_SUPPORTHELPERS_H
#define LLVM_TESTING_SUPPORT_SUPPORTHELPERS_H
+#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/raw_os_ostream.h"
+#include "gmock/gmock-matchers.h"
#include "gtest/gtest-printers.h"
#include <string>
@@ -54,11 +55,56 @@ void PrintTo(const ExpectedHolder<T> &Item, std::ostream *Out) {
PrintTo(static_cast<const ErrorHolder &>(Item), Out);
}
}
+
+template <class InnerMatcher> class ValueIsMatcher {
+public:
+ explicit ValueIsMatcher(InnerMatcher ValueMatcher)
+ : ValueMatcher(ValueMatcher) {}
+
+ template <class T>
+ operator ::testing::Matcher<const llvm::Optional<T> &>() const {
+ return ::testing::MakeMatcher(
+ new Impl<T>(::testing::SafeMatcherCast<T>(ValueMatcher)));
+ }
+
+ template <class T>
+ class Impl : public ::testing::MatcherInterface<const llvm::Optional<T> &> {
+ public:
+ explicit Impl(const ::testing::Matcher<T> &ValueMatcher)
+ : ValueMatcher(ValueMatcher) {}
+
+ bool MatchAndExplain(const llvm::Optional<T> &Input,
+ testing::MatchResultListener *L) const override {
+ return Input && ValueMatcher.MatchAndExplain(Input.getValue(), L);
+ }
+
+ void DescribeTo(std::ostream *OS) const override {
+ *OS << "has a value that ";
+ ValueMatcher.DescribeTo(OS);
+ }
+ void DescribeNegationTo(std::ostream *OS) const override {
+ *OS << "does not have a value that ";
+ ValueMatcher.DescribeTo(OS);
+ }
+
+ private:
+ testing::Matcher<T> ValueMatcher;
+ };
+
+private:
+ InnerMatcher ValueMatcher;
+};
} // namespace detail
+/// Matches an llvm::Optional<T> with a value that conforms to an inner matcher.
+/// To match llvm::None you could use Eq(llvm::None).
+template <class InnerMatcher>
+detail::ValueIsMatcher<InnerMatcher> ValueIs(const InnerMatcher &ValueMatcher) {
+ return detail::ValueIsMatcher<InnerMatcher>(ValueMatcher);
+}
namespace unittest {
SmallString<128> getInputFileDirectory(const char *Argv0);
-}
+} // namespace unittest
} // namespace llvm
#endif
diff --git a/include/llvm/TextAPI/ELF/ELFStub.h b/include/llvm/TextAPI/ELF/ELFStub.h
index fa54e6f8b711..76b2af121662 100644
--- a/include/llvm/TextAPI/ELF/ELFStub.h
+++ b/include/llvm/TextAPI/ELF/ELFStub.h
@@ -1,9 +1,8 @@
//===- ELFStub.h ------------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===-----------------------------------------------------------------------===/
///
diff --git a/include/llvm/TextAPI/ELF/TBEHandler.h b/include/llvm/TextAPI/ELF/TBEHandler.h
index 91521c656fa2..1748fd13f3dc 100644
--- a/include/llvm/TextAPI/ELF/TBEHandler.h
+++ b/include/llvm/TextAPI/ELF/TBEHandler.h
@@ -1,9 +1,8 @@
//===- TBEHandler.h ---------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===-----------------------------------------------------------------------===/
///
diff --git a/include/llvm/TextAPI/MachO/Architecture.def b/include/llvm/TextAPI/MachO/Architecture.def
new file mode 100644
index 000000000000..4c695fe18eec
--- /dev/null
+++ b/include/llvm/TextAPI/MachO/Architecture.def
@@ -0,0 +1,38 @@
+//===- llvm/TextAPI/MachO/Architecture.def - Architecture -----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARCHINFO
+#define ARCHINFO(arch)
+#endif
+
+///
+/// X86 architectures sorted by cpu type and sub type id.
+///
+ARCHINFO(i386, MachO::CPU_TYPE_I386, MachO::CPU_SUBTYPE_I386_ALL)
+ARCHINFO(x86_64, MachO::CPU_TYPE_X86_64, MachO::CPU_SUBTYPE_X86_64_ALL)
+ARCHINFO(x86_64h, MachO::CPU_TYPE_X86_64, MachO::CPU_SUBTYPE_X86_64_H)
+
+
+///
+/// ARM architectures sorted by cpu sub type id.
+///
+ARCHINFO(armv4t, MachO::CPU_TYPE_ARM, MachO::CPU_SUBTYPE_ARM_V4T)
+ARCHINFO(armv6, MachO::CPU_TYPE_ARM, MachO::CPU_SUBTYPE_ARM_V6)
+ARCHINFO(armv5, MachO::CPU_TYPE_ARM, MachO::CPU_SUBTYPE_ARM_V5TEJ)
+ARCHINFO(armv7, MachO::CPU_TYPE_ARM, MachO::CPU_SUBTYPE_ARM_V7)
+ARCHINFO(armv7s, MachO::CPU_TYPE_ARM, MachO::CPU_SUBTYPE_ARM_V7S)
+ARCHINFO(armv7k, MachO::CPU_TYPE_ARM, MachO::CPU_SUBTYPE_ARM_V7K)
+ARCHINFO(armv6m, MachO::CPU_TYPE_ARM, MachO::CPU_SUBTYPE_ARM_V6M)
+ARCHINFO(armv7m, MachO::CPU_TYPE_ARM, MachO::CPU_SUBTYPE_ARM_V7M)
+ARCHINFO(armv7em, MachO::CPU_TYPE_ARM, MachO::CPU_SUBTYPE_ARM_V7EM)
+
+
+///
+/// ARM64 architectures sorted by cpu sub type id.
+///
+ARCHINFO(arm64, MachO::CPU_TYPE_ARM64, MachO::CPU_SUBTYPE_ARM64_ALL)
diff --git a/include/llvm/TextAPI/MachO/Architecture.h b/include/llvm/TextAPI/MachO/Architecture.h
new file mode 100644
index 000000000000..055baeb0c0f0
--- /dev/null
+++ b/include/llvm/TextAPI/MachO/Architecture.h
@@ -0,0 +1,47 @@
+//===- llvm/TextAPI/MachO/Architecture.h - Architecture ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Defines the architecture enum and helper methods.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TEXTAPI_MACHO_ARCHITECTURE_H
+#define LLVM_TEXTAPI_MACHO_ARCHITECTURE_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+namespace MachO {
+
+/// Defines the architecture slices that are supported by Text-based Stub files.
+enum Architecture : uint8_t {
+#define ARCHINFO(Arch, Type, SubType) AK_##Arch,
+#include "llvm/TextAPI/MachO/Architecture.def"
+#undef ARCHINFO
+ AK_unknown, // this has to go last.
+};
+
+/// Convert a CPU Type and Subtype pair to an architecture slice.
+Architecture getArchitectureFromCpuType(uint32_t CPUType, uint32_t CPUSubType);
+
+/// Convert a name to an architecture slice.
+Architecture getArchitectureFromName(StringRef Name);
+
+/// Convert an architecture slice to a string.
+StringRef getArchitectureName(Architecture Arch);
+
+/// Convert an architecture slice to a CPU Type and Subtype pair.
+std::pair<uint32_t, uint32_t> getCPUTypeFromArchitecture(Architecture Arch);
+
+raw_ostream &operator<<(raw_ostream &OS, Architecture Arch);
+
+} // end namespace MachO.
+} // end namespace llvm.
+
+#endif // LLVM_TEXTAPI_MACHO_ARCHITECTURE_H
diff --git a/include/llvm/TextAPI/MachO/ArchitectureSet.h b/include/llvm/TextAPI/MachO/ArchitectureSet.h
new file mode 100644
index 000000000000..d8dfc7f1af21
--- /dev/null
+++ b/include/llvm/TextAPI/MachO/ArchitectureSet.h
@@ -0,0 +1,159 @@
+//===- llvm/TextAPI/MachO/ArchitectureSet.h - ArchitectureSet ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Defines the architecture set.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TEXTAPI_MACHO_ARCHITECTURE_SET_H
+#define LLVM_TEXTAPI_MACHO_ARCHITECTURE_SET_H
+
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/TextAPI/MachO/Architecture.h"
+#include <cstddef>
+#include <iterator>
+#include <limits>
+#include <vector>
+
+namespace llvm {
+namespace MachO {
+
+class ArchitectureSet {
+private:
+ using ArchSetType = uint32_t;
+
+ const static ArchSetType EndIndexVal =
+ std::numeric_limits<ArchSetType>::max();
+ ArchSetType ArchSet{0};
+
+public:
+ constexpr ArchitectureSet() = default;
+ constexpr ArchitectureSet(ArchSetType Raw) : ArchSet(Raw) {}
+ ArchitectureSet(Architecture Arch) : ArchitectureSet() { set(Arch); }
+ ArchitectureSet(const std::vector<Architecture> &Archs);
+
+ void set(Architecture Arch) {
+ if (Arch == AK_unknown)
+ return;
+ ArchSet |= 1U << static_cast<int>(Arch);
+ }
+
+ void clear(Architecture Arch) { ArchSet &= ~(1U << static_cast<int>(Arch)); }
+
+ bool has(Architecture Arch) const {
+ return ArchSet & (1U << static_cast<int>(Arch));
+ }
+
+ bool contains(ArchitectureSet Archs) const {
+ return (ArchSet & Archs.ArchSet) == Archs.ArchSet;
+ }
+
+ size_t count() const;
+
+ bool empty() const { return ArchSet == 0; }
+
+ ArchSetType rawValue() const { return ArchSet; }
+
+ template <typename Ty>
+ class arch_iterator
+ : public std::iterator<std::forward_iterator_tag, Architecture, size_t> {
+ private:
+ ArchSetType Index;
+ Ty *ArchSet;
+
+ void findNextSetBit() {
+ if (Index == EndIndexVal)
+ return;
+ while (++Index < sizeof(Ty) * 8) {
+ if (*ArchSet & (1UL << Index))
+ return;
+ }
+
+ Index = EndIndexVal;
+ }
+
+ public:
+ arch_iterator(Ty *ArchSet, ArchSetType Index = 0)
+ : Index(Index), ArchSet(ArchSet) {
+ if (Index != EndIndexVal && !(*ArchSet & (1UL << Index)))
+ findNextSetBit();
+ }
+
+ Architecture operator*() const { return static_cast<Architecture>(Index); }
+
+ arch_iterator &operator++() {
+ findNextSetBit();
+ return *this;
+ }
+
+ arch_iterator operator++(int) {
+ auto tmp = *this;
+ findNextSetBit();
+ return tmp;
+ }
+
+ bool operator==(const arch_iterator &o) const {
+ return std::tie(Index, ArchSet) == std::tie(o.Index, o.ArchSet);
+ }
+
+ bool operator!=(const arch_iterator &o) const { return !(*this == o); }
+ };
+
+ ArchitectureSet operator&(const ArchitectureSet &o) {
+ return {ArchSet & o.ArchSet};
+ }
+
+ ArchitectureSet operator|(const ArchitectureSet &o) {
+ return {ArchSet | o.ArchSet};
+ }
+
+ ArchitectureSet &operator|=(const ArchitectureSet &o) {
+ ArchSet |= o.ArchSet;
+ return *this;
+ }
+
+ ArchitectureSet &operator|=(const Architecture &Arch) {
+ set(Arch);
+ return *this;
+ }
+
+ bool operator==(const ArchitectureSet &o) const {
+ return ArchSet == o.ArchSet;
+ }
+
+ bool operator!=(const ArchitectureSet &o) const {
+ return ArchSet != o.ArchSet;
+ }
+
+ bool operator<(const ArchitectureSet &o) const { return ArchSet < o.ArchSet; }
+
+ using iterator = arch_iterator<ArchSetType>;
+ using const_iterator = arch_iterator<const ArchSetType>;
+
+ iterator begin() { return {&ArchSet}; }
+ iterator end() { return {&ArchSet, EndIndexVal}; }
+
+ const_iterator begin() const { return {&ArchSet}; }
+ const_iterator end() const { return {&ArchSet, EndIndexVal}; }
+
+ operator std::string() const;
+ operator std::vector<Architecture>() const;
+ void print(raw_ostream &OS) const;
+};
+
+inline ArchitectureSet operator|(const Architecture &lhs,
+ const Architecture &rhs) {
+ return ArchitectureSet(lhs) | ArchitectureSet(rhs);
+}
+
+raw_ostream &operator<<(raw_ostream &OS, ArchitectureSet Set);
+
+} // end namespace MachO.
+} // end namespace llvm.
+
+#endif // LLVM_TEXTAPI_MACHO_ARCHITECTURE_SET_H
diff --git a/include/llvm/TextAPI/MachO/InterfaceFile.h b/include/llvm/TextAPI/MachO/InterfaceFile.h
new file mode 100644
index 000000000000..e722449d52f1
--- /dev/null
+++ b/include/llvm/TextAPI/MachO/InterfaceFile.h
@@ -0,0 +1,436 @@
+//===- llvm/TextAPI/MachO/IntefaceFile.h - TAPI Interface File --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// A generic and abstract interface representation for linkable objects. This
+// could be an MachO executable, bundle, dylib, or text-based stub file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TEXTAPI_MACHO_INTERFACE_FILE_H
+#define LLVM_TEXTAPI_MACHO_INTERFACE_FILE_H
+
+#include "llvm/ADT/BitmaskEnum.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator.h"
+#include "llvm/BinaryFormat/MachO.h"
+#include "llvm/BinaryFormat/Magic.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Error.h"
+#include "llvm/TextAPI/MachO/Architecture.h"
+#include "llvm/TextAPI/MachO/ArchitectureSet.h"
+#include "llvm/TextAPI/MachO/PackedVersion.h"
+#include "llvm/TextAPI/MachO/Symbol.h"
+
+namespace llvm {
+namespace MachO {
+
+/// Defines the list of MachO platforms.
+enum class PlatformKind : unsigned {
+ unknown,
+ macOS = MachO::PLATFORM_MACOS,
+ iOS = MachO::PLATFORM_IOS,
+ tvOS = MachO::PLATFORM_TVOS,
+ watchOS = MachO::PLATFORM_WATCHOS,
+ bridgeOS = MachO::PLATFORM_BRIDGEOS,
+};
+
+/// Defines a list of Objective-C constraints.
+enum class ObjCConstraintType : unsigned {
+ /// No constraint.
+ None = 0,
+
+ /// Retain/Release.
+ Retain_Release = 1,
+
+ /// Retain/Release for Simulator.
+ Retain_Release_For_Simulator = 2,
+
+ /// Retain/Release or Garbage Collection.
+ Retain_Release_Or_GC = 3,
+
+ /// Garbage Collection.
+ GC = 4,
+};
+
+// clang-format off
+
+/// Defines the file type this file represents.
+enum FileType : unsigned {
+ /// Invalid file type.
+ Invalid = 0U,
+
+ /// Text-based stub file (.tbd) version 1.0
+ TBD_V1 = 1U << 0,
+
+ /// Text-based stub file (.tbd) version 2.0
+ TBD_V2 = 1U << 1,
+
+ /// Text-based stub file (.tbd) version 3.0
+ TBD_V3 = 1U << 2,
+
+ All = ~0U,
+
+ LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/All),
+};
+
+// clang-format on
+
+/// Reference to an interface file.
+class InterfaceFileRef {
+public:
+ InterfaceFileRef() = default;
+
+ InterfaceFileRef(StringRef InstallName) : InstallName(InstallName) {}
+
+ InterfaceFileRef(StringRef InstallName, ArchitectureSet Archs)
+ : InstallName(InstallName), Architectures(Archs) {}
+
+ StringRef getInstallName() const { return InstallName; };
+ void addArchitectures(ArchitectureSet Archs) { Architectures |= Archs; }
+ ArchitectureSet getArchitectures() const { return Architectures; }
+ bool hasArchitecture(Architecture Arch) const {
+ return Architectures.has(Arch);
+ }
+
+ bool operator==(const InterfaceFileRef &O) const {
+ return std::tie(InstallName, Architectures) ==
+ std::tie(O.InstallName, O.Architectures);
+ }
+
+ bool operator<(const InterfaceFileRef &O) const {
+ return std::tie(InstallName, Architectures) <
+ std::tie(O.InstallName, O.Architectures);
+ }
+
+private:
+ std::string InstallName;
+ ArchitectureSet Architectures;
+};
+
+} // end namespace MachO.
+
+struct SymbolsMapKey {
+ MachO::SymbolKind Kind;
+ StringRef Name;
+
+ SymbolsMapKey(MachO::SymbolKind Kind, StringRef Name)
+ : Kind(Kind), Name(Name) {}
+};
+template <> struct DenseMapInfo<SymbolsMapKey> {
+ static inline SymbolsMapKey getEmptyKey() {
+ return SymbolsMapKey(MachO::SymbolKind::GlobalSymbol, StringRef{});
+ }
+
+ static inline SymbolsMapKey getTombstoneKey() {
+ return SymbolsMapKey(MachO::SymbolKind::ObjectiveCInstanceVariable,
+ StringRef{});
+ }
+
+ static unsigned getHashValue(const SymbolsMapKey &Key) {
+ return hash_combine(hash_value(Key.Kind), hash_value(Key.Name));
+ }
+
+ static bool isEqual(const SymbolsMapKey &LHS, const SymbolsMapKey &RHS) {
+ return std::tie(LHS.Kind, LHS.Name) == std::tie(RHS.Kind, RHS.Name);
+ }
+};
+
+namespace MachO {
+
+/// Defines the interface file.
+class InterfaceFile {
+public:
+ /// Set the path from which this file was generated (if applicable).
+ ///
+ /// \param Path_ The path to the source file.
+ void setPath(StringRef Path_) { Path = Path_; }
+
+ /// Get the path from which this file was generated (if applicable).
+ ///
+ /// \return The path to the source file or empty.
+ StringRef getPath() const { return Path; }
+
+ /// Set the file type.
+ ///
+ /// This is used by the YAML writer to identify the specification it should
+ /// use for writing the file.
+ ///
+ /// \param Kind The file type.
+ void setFileType(FileType Kind) { FileKind = Kind; }
+
+ /// Get the file type.
+ ///
+ /// \return The file type.
+ FileType getFileType() const { return FileKind; }
+
+ /// Set the platform.
+ void setPlatform(PlatformKind Platform_) { Platform = Platform_; }
+
+ /// Get the platform.
+ PlatformKind getPlatform() const { return Platform; }
+
+ /// Specify the set of supported architectures by this file.
+ void setArchitectures(ArchitectureSet Architectures_) {
+ Architectures = Architectures_;
+ }
+
+ /// Add the set of supported architectures by this file.
+ void addArchitectures(ArchitectureSet Architectures_) {
+ Architectures |= Architectures_;
+ }
+
+ /// Add supported architecture by this file..
+ void addArch(Architecture Arch) { Architectures.set(Arch); }
+
+ /// Get the set of supported architectures.
+ ArchitectureSet getArchitectures() const { return Architectures; }
+
+ /// Set the install name of the library.
+ void setInstallName(StringRef InstallName_) { InstallName = InstallName_; }
+
+ /// Get the install name of the library.
+ StringRef getInstallName() const { return InstallName; }
+
+ /// Set the current version of the library.
+ void setCurrentVersion(PackedVersion Version) { CurrentVersion = Version; }
+
+ /// Get the current version of the library.
+ PackedVersion getCurrentVersion() const { return CurrentVersion; }
+
+ /// Set the compatibility version of the library.
+ void setCompatibilityVersion(PackedVersion Version) {
+ CompatibilityVersion = Version;
+ }
+
+ /// Get the compatibility version of the library.
+ PackedVersion getCompatibilityVersion() const { return CompatibilityVersion; }
+
+ /// Set the Swift ABI version of the library.
+ void setSwiftABIVersion(uint8_t Version) { SwiftABIVersion = Version; }
+
+ /// Get the Swift ABI version of the library.
+ uint8_t getSwiftABIVersion() const { return SwiftABIVersion; }
+
+ /// Specify if the library uses two-level namespace (or flat namespace).
+ void setTwoLevelNamespace(bool V = true) { IsTwoLevelNamespace = V; }
+
+ /// Check if the library uses two-level namespace.
+ bool isTwoLevelNamespace() const { return IsTwoLevelNamespace; }
+
+ /// Specify if the library is application extension safe (or not).
+ void setApplicationExtensionSafe(bool V = true) { IsAppExtensionSafe = V; }
+
+ /// Check if the library is application extension safe.
+ bool isApplicationExtensionSafe() const { return IsAppExtensionSafe; }
+
+ /// Set the Objective-C constraint.
+ void setObjCConstraint(ObjCConstraintType Constraint) {
+ ObjcConstraint = Constraint;
+ }
+
+ /// Get the Objective-C constraint.
+ ObjCConstraintType getObjCConstraint() const { return ObjcConstraint; }
+
+ /// Specify if this file was generated during InstallAPI (or not).
+ void setInstallAPI(bool V = true) { IsInstallAPI = V; }
+
+ /// Check if this file was generated during InstallAPI.
+ bool isInstallAPI() const { return IsInstallAPI; }
+
+ /// Set the parent umbrella framework.
+ void setParentUmbrella(StringRef Parent) { ParentUmbrella = Parent; }
+
+ /// Get the parent umbrella framework.
+ StringRef getParentUmbrella() const { return ParentUmbrella; }
+
+ /// Add an allowable client.
+ ///
+ /// Mach-O Dynamic libraries have the concept of allowable clients that are
+ /// checked during static link time. The name of the application or library
+ /// that is being generated needs to match one of the allowable clients or the
+ /// linker refuses to link this library.
+ ///
+ /// \param Name The name of the client that is allowed to link this library.
+ /// \param Architectures The set of architecture for which this applies.
+ void addAllowableClient(StringRef Name, ArchitectureSet Architectures);
+
+ /// Get the list of allowable clients.
+ ///
+ /// \return Returns a list of allowable clients.
+ const std::vector<InterfaceFileRef> &allowableClients() const {
+ return AllowableClients;
+ }
+
+ /// Add a re-exported library.
+ ///
+ /// \param InstallName The name of the library to re-export.
+ /// \param Architectures The set of architecture for which this applies.
+ void addReexportedLibrary(StringRef InstallName,
+ ArchitectureSet Architectures);
+
+ /// Get the list of re-exported libraries.
+ ///
+ /// \return Returns a list of re-exported libraries.
+ const std::vector<InterfaceFileRef> &reexportedLibraries() const {
+ return ReexportedLibraries;
+ }
+
+ /// Add an architecture/UUID pair.
+ ///
+ /// \param Arch The architecture for which this applies.
+ /// \param UUID The UUID of the library for the specified architecture.
+ void addUUID(Architecture Arch, StringRef UUID);
+
+ /// Add an architecture/UUID pair.
+ ///
+ /// \param Arch The architecture for which this applies.
+ /// \param UUID The UUID of the library for the specified architecture.
+ void addUUID(Architecture Arch, uint8_t UUID[16]);
+
+ /// Get the list of architecture/UUID pairs.
+ ///
+ /// \return Returns a list of architecture/UUID pairs.
+ const std::vector<std::pair<Architecture, std::string>> &uuids() const {
+ return UUIDs;
+ }
+
+ /// Add a symbol to the symbols list or extend an existing one.
+ void addSymbol(SymbolKind Kind, StringRef Name, ArchitectureSet Architectures,
+ SymbolFlags Flags = SymbolFlags::None);
+
+ using SymbolMapType = DenseMap<SymbolsMapKey, Symbol *>;
+ struct const_symbol_iterator
+ : public iterator_adaptor_base<
+ const_symbol_iterator, SymbolMapType::const_iterator,
+ std::forward_iterator_tag, const Symbol *, ptrdiff_t,
+ const Symbol *, const Symbol *> {
+ const_symbol_iterator() = default;
+
+ template <typename U>
+ const_symbol_iterator(U &&u)
+ : iterator_adaptor_base(std::forward<U &&>(u)) {}
+
+ reference operator*() const { return I->second; }
+ pointer operator->() const { return I->second; }
+ };
+ using const_symbol_range = iterator_range<const_symbol_iterator>;
+
+ // Custom iterator to return only exported symbols.
+ struct const_export_iterator
+ : public iterator_adaptor_base<
+ const_export_iterator, const_symbol_iterator,
+ std::forward_iterator_tag, const Symbol *> {
+ const_symbol_iterator _end;
+
+ void skipToNextSymbol() {
+ while (I != _end && I->isUndefined())
+ ++I;
+ }
+
+ const_export_iterator() = default;
+ template <typename U>
+ const_export_iterator(U &&it, U &&end)
+ : iterator_adaptor_base(std::forward<U &&>(it)),
+ _end(std::forward<U &&>(end)) {
+ skipToNextSymbol();
+ }
+
+ const_export_iterator &operator++() {
+ ++I;
+ skipToNextSymbol();
+ return *this;
+ }
+
+ const_export_iterator operator++(int) {
+ const_export_iterator tmp(*this);
+ ++(*this);
+ return tmp;
+ }
+ };
+ using const_export_range = llvm::iterator_range<const_export_iterator>;
+
+ // Custom iterator to return only undefined symbols.
+ struct const_undefined_iterator
+ : public iterator_adaptor_base<
+ const_undefined_iterator, const_symbol_iterator,
+ std::forward_iterator_tag, const Symbol *> {
+ const_symbol_iterator _end;
+
+ void skipToNextSymbol() {
+ while (I != _end && !I->isUndefined())
+ ++I;
+ }
+
+ const_undefined_iterator() = default;
+ template <typename U>
+ const_undefined_iterator(U &&it, U &&end)
+ : iterator_adaptor_base(std::forward<U &&>(it)),
+ _end(std::forward<U &&>(end)) {
+ skipToNextSymbol();
+ }
+
+ const_undefined_iterator &operator++() {
+ ++I;
+ skipToNextSymbol();
+ return *this;
+ }
+
+ const_undefined_iterator operator++(int) {
+ const_undefined_iterator tmp(*this);
+ ++(*this);
+ return tmp;
+ }
+ };
+ using const_undefined_range = llvm::iterator_range<const_undefined_iterator>;
+
+ const_symbol_range symbols() const {
+ return {Symbols.begin(), Symbols.end()};
+ }
+ const_export_range exports() const {
+ return {{Symbols.begin(), Symbols.end()}, {Symbols.end(), Symbols.end()}};
+ }
+ const_undefined_range undefineds() const {
+ return {{Symbols.begin(), Symbols.end()}, {Symbols.end(), Symbols.end()}};
+ }
+
+private:
+ llvm::BumpPtrAllocator Allocator;
+ StringRef copyString(StringRef String) {
+ if (String.empty())
+ return {};
+
+ void *Ptr = Allocator.Allocate(String.size(), 1);
+ memcpy(Ptr, String.data(), String.size());
+ return StringRef(reinterpret_cast<const char *>(Ptr), String.size());
+ }
+
+ std::string Path;
+ FileType FileKind;
+ PlatformKind Platform;
+ ArchitectureSet Architectures;
+ std::string InstallName;
+ PackedVersion CurrentVersion;
+ PackedVersion CompatibilityVersion;
+ uint8_t SwiftABIVersion{0};
+ bool IsTwoLevelNamespace{false};
+ bool IsAppExtensionSafe{false};
+ bool IsInstallAPI{false};
+ ObjCConstraintType ObjcConstraint = ObjCConstraintType::None;
+ std::string ParentUmbrella;
+ std::vector<InterfaceFileRef> AllowableClients;
+ std::vector<InterfaceFileRef> ReexportedLibraries;
+ std::vector<std::pair<Architecture, std::string>> UUIDs;
+ SymbolMapType Symbols;
+};
+
+} // end namespace MachO.
+} // end namespace llvm.
+
+#endif // LLVM_TEXTAPI_MACHO_INTERFACE_FILE_H
diff --git a/include/llvm/TextAPI/MachO/PackedVersion.h b/include/llvm/TextAPI/MachO/PackedVersion.h
new file mode 100644
index 000000000000..2d0138097dd9
--- /dev/null
+++ b/include/llvm/TextAPI/MachO/PackedVersion.h
@@ -0,0 +1,64 @@
+//===- llvm/TextAPI/MachO/PackedVersion.h - PackedVersion -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Defines the Mach-O packed version format.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TEXTAPI_MACHO_PACKED_VERSION_H
+#define LLVM_TEXTAPI_MACHO_PACKED_VERSION_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+namespace MachO {
+
+class PackedVersion {
+ uint32_t Version{0};
+
+public:
+ constexpr PackedVersion() = default;
+ explicit constexpr PackedVersion(uint32_t RawVersion) : Version(RawVersion) {}
+ PackedVersion(unsigned Major, unsigned Minor, unsigned Subminor)
+ : Version((Major << 16) | ((Minor & 0xff) << 8) | (Subminor & 0xff)) {}
+
+ bool empty() const { return Version == 0; }
+
+ /// Retrieve the major version number.
+ unsigned getMajor() const { return Version >> 16; }
+
+ /// Retrieve the minor version number, if provided.
+ unsigned getMinor() const { return (Version >> 8) & 0xff; }
+
+ /// Retrieve the subminor version number, if provided.
+ unsigned getSubminor() const { return Version & 0xff; }
+
+ bool parse32(StringRef Str);
+ std::pair<bool, bool> parse64(StringRef Str);
+
+ bool operator<(const PackedVersion &O) const { return Version < O.Version; }
+
+ bool operator==(const PackedVersion &O) const { return Version == O.Version; }
+
+ bool operator!=(const PackedVersion &O) const { return Version != O.Version; }
+
+ uint32_t rawValue() const { return Version; }
+
+ void print(raw_ostream &OS) const;
+};
+
+inline raw_ostream &operator<<(raw_ostream &OS, const PackedVersion &Version) {
+ Version.print(OS);
+ return OS;
+}
+
+} // end namespace MachO.
+} // end namespace llvm.
+
+#endif // LLVM_TEXTAPI_MACHO_PACKED_VERSION_H
diff --git a/include/llvm/TextAPI/MachO/Symbol.h b/include/llvm/TextAPI/MachO/Symbol.h
new file mode 100644
index 000000000000..3c7ff5e0f4ea
--- /dev/null
+++ b/include/llvm/TextAPI/MachO/Symbol.h
@@ -0,0 +1,96 @@
+//===- llvm/TextAPI/Symbol.h - TAPI Symbol ----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TEXTAPI_MACHO_SYMBOL_H
+#define LLVM_TEXTAPI_MACHO_SYMBOL_H
+
+#include "llvm/ADT/BitmaskEnum.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/TextAPI/MachO/ArchitectureSet.h"
+
+namespace llvm {
+namespace MachO {
+
+// clang-format off
+
+/// Symbol flags.
+enum class SymbolFlags : uint8_t {
+ /// No flags
+ None = 0,
+
+ /// Thread-local value symbol
+ ThreadLocalValue = 1U << 0,
+
+ /// Weak defined symbol
+ WeakDefined = 1U << 1,
+
+ /// Weak referenced symbol
+ WeakReferenced = 1U << 2,
+
+ /// Undefined
+ Undefined = 1U << 3,
+
+ LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/Undefined),
+};
+
+// clang-format on
+
+enum class SymbolKind : uint8_t {
+ GlobalSymbol,
+ ObjectiveCClass,
+ ObjectiveCClassEHType,
+ ObjectiveCInstanceVariable,
+};
+
+class Symbol {
+public:
+ constexpr Symbol(SymbolKind Kind, StringRef Name,
+ ArchitectureSet Architectures, SymbolFlags Flags)
+ : Name(Name), Architectures(Architectures), Kind(Kind), Flags(Flags) {}
+
+ SymbolKind getKind() const { return Kind; }
+ StringRef getName() const { return Name; }
+ ArchitectureSet getArchitectures() const { return Architectures; }
+ void addArchitectures(ArchitectureSet Archs) { Architectures |= Archs; }
+ SymbolFlags getFlags() const { return Flags; }
+
+ bool isWeakDefined() const {
+ return (Flags & SymbolFlags::WeakDefined) == SymbolFlags::WeakDefined;
+ }
+
+ bool isWeakReferenced() const {
+ return (Flags & SymbolFlags::WeakReferenced) == SymbolFlags::WeakReferenced;
+ }
+
+ bool isThreadLocalValue() const {
+ return (Flags & SymbolFlags::ThreadLocalValue) ==
+ SymbolFlags::ThreadLocalValue;
+ }
+
+ bool isUndefined() const {
+ return (Flags & SymbolFlags::Undefined) == SymbolFlags::Undefined;
+ }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ void dump(raw_ostream &OS) const;
+ void dump() const { dump(llvm::errs()); }
+#endif
+
+private:
+ StringRef Name;
+ ArchitectureSet Architectures;
+ SymbolKind Kind;
+ SymbolFlags Flags;
+};
+
+} // end namespace MachO.
+} // end namespace llvm.
+
+#endif // LLVM_TEXTAPI_MACHO_SYMBOL_H
diff --git a/include/llvm/TextAPI/MachO/TextAPIReader.h b/include/llvm/TextAPI/MachO/TextAPIReader.h
new file mode 100644
index 000000000000..6d9c09de5294
--- /dev/null
+++ b/include/llvm/TextAPI/MachO/TextAPIReader.h
@@ -0,0 +1,34 @@
+//===--- TextAPIReader.h - Text API Reader ----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TEXTAPI_MACHO_READER_H
+#define LLVM_TEXTAPI_MACHO_READER_H
+
+#include "llvm/Support/Error.h"
+#include "llvm/Support/MemoryBuffer.h"
+
+namespace llvm {
+namespace MachO {
+
+class InterfaceFile;
+
+class TextAPIReader {
+public:
+ static Expected<std::unique_ptr<InterfaceFile>>
+ get(std::unique_ptr<MemoryBuffer> InputBuffer);
+
+ static Expected<std::unique_ptr<InterfaceFile>>
+ getUnmanaged(llvm::MemoryBuffer *InputBuffer);
+
+ TextAPIReader() = delete;
+};
+
+} // end namespace MachO.
+} // end namespace llvm.
+
+#endif // LLVM_TEXTAPI_MACHO_READER_H
diff --git a/include/llvm/TextAPI/MachO/TextAPIWriter.h b/include/llvm/TextAPI/MachO/TextAPIWriter.h
new file mode 100644
index 000000000000..2a45bb86a332
--- /dev/null
+++ b/include/llvm/TextAPI/MachO/TextAPIWriter.h
@@ -0,0 +1,29 @@
+//===--- TextAPIWriter.h - Text API Writer ----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TEXTAPI_MACHO_WRITER_H
+#define LLVM_TEXTAPI_MACHO_WRITER_H
+
+#include "llvm/Support/MemoryBuffer.h"
+
+namespace llvm {
+namespace MachO {
+
+class InterfaceFile;
+
+class TextAPIWriter {
+public:
+ TextAPIWriter() = delete;
+
+ static Error writeToStream(raw_ostream &os, const InterfaceFile &);
+};
+
+} // end namespace MachO.
+} // end namespace llvm.
+
+#endif // LLVM_TEXTAPI_MACHO_WRITER_H
diff --git a/include/llvm/ToolDrivers/llvm-dlltool/DlltoolDriver.h b/include/llvm/ToolDrivers/llvm-dlltool/DlltoolDriver.h
index 964b0f7620a2..d144f62f1cc1 100644
--- a/include/llvm/ToolDrivers/llvm-dlltool/DlltoolDriver.h
+++ b/include/llvm/ToolDrivers/llvm-dlltool/DlltoolDriver.h
@@ -1,9 +1,8 @@
//===- DlltoolDriver.h - dlltool.exe-compatible driver ----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/ToolDrivers/llvm-lib/LibDriver.h b/include/llvm/ToolDrivers/llvm-lib/LibDriver.h
index a4806ac4ad69..23a2fc348a89 100644
--- a/include/llvm/ToolDrivers/llvm-lib/LibDriver.h
+++ b/include/llvm/ToolDrivers/llvm-lib/LibDriver.h
@@ -1,9 +1,8 @@
//===- llvm-lib/LibDriver.h - lib.exe-compatible driver ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -19,6 +18,7 @@ namespace llvm {
template <typename T> class ArrayRef;
int libDriverMain(ArrayRef<const char *> ARgs);
+
}
#endif
diff --git a/include/llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h b/include/llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h
index f970acdc741f..887c8807904e 100644
--- a/include/llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h
+++ b/include/llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h
@@ -1,9 +1,8 @@
//===- AggressiveInstCombine.h - AggressiveInstCombine pass -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/include/llvm/Transforms/Coroutines.h b/include/llvm/Transforms/Coroutines.h
index 51beb44fdc56..9df3ec0f3ef4 100644
--- a/include/llvm/Transforms/Coroutines.h
+++ b/include/llvm/Transforms/Coroutines.h
@@ -1,9 +1,8 @@
//===-- Coroutines.h - Coroutine Transformations ----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// Declare accessor functions for coroutine lowering passes.
diff --git a/include/llvm/Transforms/IPO.h b/include/llvm/Transforms/IPO.h
index 11d363b1200b..de0c80f5b19a 100644
--- a/include/llvm/Transforms/IPO.h
+++ b/include/llvm/Transforms/IPO.h
@@ -1,9 +1,8 @@
//===- llvm/Transforms/IPO.h - Interprocedural Transformations --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -183,6 +182,10 @@ ModulePass *createBlockExtractorPass();
ModulePass *
createBlockExtractorPass(const SmallVectorImpl<BasicBlock *> &BlocksToExtract,
bool EraseFunctions);
+ModulePass *
+createBlockExtractorPass(const SmallVectorImpl<SmallVector<BasicBlock *, 16>>
+ &GroupsOfBlocksToExtract,
+ bool EraseFunctions);
/// createStripDeadPrototypesPass - This pass removes any function declarations
/// (prototypes) that are not used.
diff --git a/include/llvm/Transforms/IPO/AlwaysInliner.h b/include/llvm/Transforms/IPO/AlwaysInliner.h
index b52c0fdbd2c9..64e25230f6da 100644
--- a/include/llvm/Transforms/IPO/AlwaysInliner.h
+++ b/include/llvm/Transforms/IPO/AlwaysInliner.h
@@ -1,9 +1,8 @@
//===-- AlwaysInliner.h - Pass to inline "always_inline" functions --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/include/llvm/Transforms/IPO/ArgumentPromotion.h b/include/llvm/Transforms/IPO/ArgumentPromotion.h
index 49ca6cc73393..c8afb7bdcd65 100644
--- a/include/llvm/Transforms/IPO/ArgumentPromotion.h
+++ b/include/llvm/Transforms/IPO/ArgumentPromotion.h
@@ -1,9 +1,8 @@
//===- ArgumentPromotion.h - Promote by-reference arguments -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/Transforms/IPO/Attributor.h b/include/llvm/Transforms/IPO/Attributor.h
new file mode 100644
index 000000000000..5dbe21ac5e4e
--- /dev/null
+++ b/include/llvm/Transforms/IPO/Attributor.h
@@ -0,0 +1,789 @@
+//===- Attributor.h --- Module-wide attribute deduction ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Attributor: An inter procedural (abstract) "attribute" deduction framework.
+//
+// The Attributor framework is an inter procedural abstract analysis (fixpoint
+// iteration analysis). The goal is to allow easy deduction of new attributes as
+// well as information exchange between abstract attributes in-flight.
+//
+// The Attributor class is the driver and the link between the various abstract
+// attributes. The Attributor will iterate until a fixpoint state is reached by
+// all abstract attributes in-flight, or until it will enforce a pessimistic fix
+// point because an iteration limit is reached.
+//
+// Abstract attributes, derived from the AbstractAttribute class, actually
+// describe properties of the code. They can correspond to actual LLVM-IR
+// attributes, or they can be more general, ultimately unrelated to LLVM-IR
+// attributes. The latter is useful when an abstract attributes provides
+// information to other abstract attributes in-flight but we might not want to
+// manifest the information. The Attributor allows to query in-flight abstract
+// attributes through the `Attributor::getAAFor` method (see the method
+// description for an example). If the method is used by an abstract attribute
+// P, and it results in an abstract attribute Q, the Attributor will
+// automatically capture a potential dependence from Q to P. This dependence
+// will cause P to be reevaluated whenever Q changes in the future.
+//
+// The Attributor will only reevaluated abstract attributes that might have
+// changed since the last iteration. That means that the Attribute will not
+// revisit all instructions/blocks/functions in the module but only query
+// an update from a subset of the abstract attributes.
+//
+// The update method `AbstractAttribute::updateImpl` is implemented by the
+// specific "abstract attribute" subclasses. The method is invoked whenever the
+// currently assumed state (see the AbstractState class) might not be valid
+// anymore. This can, for example, happen if the state was dependent on another
+// abstract attribute that changed. In every invocation, the update method has
+// to adjust the internal state of an abstract attribute to a point that is
+// justifiable by the underlying IR and the current state of abstract attributes
+// in-flight. Since the IR is given and assumed to be valid, the information
+// derived from it can be assumed to hold. However, information derived from
+// other abstract attributes is conditional on various things. If the justifying
+// state changed, the `updateImpl` has to revisit the situation and potentially
+// find another justification or limit the optimistic assumes made.
+//
+// Change is the key in this framework. Until a state of no-change, thus a
+// fixpoint, is reached, the Attributor will query the abstract attributes
+// in-flight to re-evaluate their state. If the (current) state is too
+// optimistic, hence it cannot be justified anymore through other abstract
+// attributes or the state of the IR, the state of the abstract attribute will
+// have to change. Generally, we assume abstract attribute state to be a finite
+// height lattice and the update function to be monotone. However, these
+// conditions are not enforced because the iteration limit will guarantee
+// termination. If an optimistic fixpoint is reached, or a pessimistic fix
+// point is enforced after a timeout, the abstract attributes are tasked to
+// manifest their result in the IR for passes to come.
+//
+// Attribute manifestation is not mandatory. If desired, there is support to
+// generate a single LLVM-IR attribute already in the AbstractAttribute base
+// class. In the simplest case, a subclass overloads
+// `AbstractAttribute::getManifestPosition()` and
+// `AbstractAttribute::getAttrKind()` to return the appropriate values. The
+// Attributor manifestation framework will then create and place a new attribute
+// if it is allowed to do so (based on the abstract state). Other use cases can
+// be achieved by overloading other abstract attribute methods.
+//
+//
+// The "mechanics" of adding a new "abstract attribute":
+// - Define a class (transitively) inheriting from AbstractAttribute and one
+// (which could be the same) that (transitively) inherits from AbstractState.
+// For the latter, consider the already available BooleanState and
+// IntegerState if they fit your needs, e.g., you require only a bit-encoding.
+// - Implement all pure methods. Also use overloading if the attribute is not
+// conforming with the "default" behavior: A (set of) LLVM-IR attribute(s) for
+// an argument, call site argument, function return value, or function. See
+// the class and method descriptions for more information on the two
+// "Abstract" classes and their respective methods.
+// - Register opportunities for the new abstract attribute in the
+// `Attributor::identifyDefaultAbstractAttributes` method if it should be
+// counted as a 'default' attribute.
+// - Add sufficient tests.
+// - Add a Statistics object for bookkeeping. If it is a simple (set of)
+// attribute(s) manifested through the Attributor manifestation framework, see
+// the bookkeeping function in Attributor.cpp.
+// - If instructions with a certain opcode are interesting to the attribute, add
+// that opcode to the switch in `Attributor::identifyAbstractAttributes`. This
+// will make it possible to query all those instructions through the
+// `InformationCache::getOpcodeInstMapForFunction` interface and eliminate the
+// need to traverse the IR repeatedly.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_IPO_ATTRIBUTOR_H
+#define LLVM_TRANSFORMS_IPO_ATTRIBUTOR_H
+
+#include "llvm/Analysis/LazyCallGraph.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+struct AbstractAttribute;
+struct InformationCache;
+
+class Function;
+
+/// Simple enum class that forces the status to be spelled out explicitly.
+///
+///{
+enum class ChangeStatus {
+ CHANGED,
+ UNCHANGED,
+};
+
+ChangeStatus operator|(ChangeStatus l, ChangeStatus r);
+ChangeStatus operator&(ChangeStatus l, ChangeStatus r);
+///}
+
+/// The fixpoint analysis framework that orchestrates the attribute deduction.
+///
+/// The Attributor provides a general abstract analysis framework (guided
+/// fixpoint iteration) as well as helper functions for the deduction of
+/// (LLVM-IR) attributes. However, also other code properties can be deduced,
+/// propagated, and ultimately manifested through the Attributor framework. This
+/// is particularly useful if these properties interact with attributes and a
+/// co-scheduled deduction allows to improve the solution. Even if not, thus if
+/// attributes/properties are completely isolated, they should use the
+/// Attributor framework to reduce the number of fixpoint iteration frameworks
+/// in the code base. Note that the Attributor design makes sure that isolated
+/// attributes are not impacted, in any way, by others derived at the same time
+/// if there is no cross-reasoning performed.
+///
+/// The public facing interface of the Attributor is kept simple and basically
+/// allows abstract attributes to one thing, query abstract attributes
+/// in-flight. There are two reasons to do this:
+/// a) The optimistic state of one abstract attribute can justify an
+/// optimistic state of another, allowing to framework to end up with an
+/// optimistic (=best possible) fixpoint instead of one based solely on
+/// information in the IR.
+/// b) This avoids reimplementing various kinds of lookups, e.g., to check
+/// for existing IR attributes, in favor of a single lookups interface
+/// provided by an abstract attribute subclass.
+///
+/// NOTE: The mechanics of adding a new "concrete" abstract attribute are
+/// described in the file comment.
+struct Attributor {
+ ~Attributor() { DeleteContainerPointers(AllAbstractAttributes); }
+
+ /// Run the analyses until a fixpoint is reached or enforced (timeout).
+ ///
+ /// The attributes registered with this Attributor can be used after as long
+ /// as the Attributor is not destroyed (it owns the attributes now).
+ ///
+ /// \Returns CHANGED if the IR was changed, otherwise UNCHANGED.
+ ChangeStatus run();
+
+ /// Lookup an abstract attribute of type \p AAType anchored at value \p V and
+ /// argument number \p ArgNo. If no attribute is found and \p V is a call base
+ /// instruction, the called function is tried as a value next. Thus, the
+ /// returned abstract attribute might be anchored at the callee of \p V.
+ ///
+ /// This method is the only (supported) way an abstract attribute can retrieve
+ /// information from another abstract attribute. As an example, take an
+ /// abstract attribute that determines the memory access behavior for a
+ /// argument (readnone, readonly, ...). It should use `getAAFor` to get the
+ /// most optimistic information for other abstract attributes in-flight, e.g.
+ /// the one reasoning about the "captured" state for the argument or the one
+ /// reasoning on the memory access behavior of the function as a whole.
+ template <typename AAType>
+ const AAType *getAAFor(AbstractAttribute &QueryingAA, const Value &V,
+ int ArgNo = -1) {
+ static_assert(std::is_base_of<AbstractAttribute, AAType>::value,
+ "Cannot query an attribute with a type not derived from "
+ "'AbstractAttribute'!");
+ assert(AAType::ID != Attribute::None &&
+ "Cannot lookup generic abstract attributes!");
+
+ // Determine the argument number automatically for llvm::Arguments if none
+ // is set. Do not override a given one as it could be a use of the argument
+ // in a call site.
+ if (ArgNo == -1)
+ if (auto *Arg = dyn_cast<Argument>(&V))
+ ArgNo = Arg->getArgNo();
+
+ // If a function was given together with an argument number, perform the
+ // lookup for the actual argument instead. Don't do it for variadic
+ // arguments.
+ if (ArgNo >= 0 && isa<Function>(&V) &&
+ cast<Function>(&V)->arg_size() > (size_t)ArgNo)
+ return getAAFor<AAType>(
+ QueryingAA, *(cast<Function>(&V)->arg_begin() + ArgNo), ArgNo);
+
+ // Lookup the abstract attribute of type AAType. If found, return it after
+ // registering a dependence of QueryingAA on the one returned attribute.
+ const auto &KindToAbstractAttributeMap = AAMap.lookup({&V, ArgNo});
+ if (AAType *AA = static_cast<AAType *>(
+ KindToAbstractAttributeMap.lookup(AAType::ID))) {
+ // Do not return an attribute with an invalid state. This minimizes checks
+ // at the calls sites and allows the fallback below to kick in.
+ if (AA->getState().isValidState()) {
+ QueryMap[AA].insert(&QueryingAA);
+ return AA;
+ }
+ }
+
+ // If no abstract attribute was found and we look for a call site argument,
+ // defer to the actual argument instead.
+ ImmutableCallSite ICS(&V);
+ if (ICS && ICS.getCalledValue())
+ return getAAFor<AAType>(QueryingAA, *ICS.getCalledValue(), ArgNo);
+
+ // No matching attribute found
+ return nullptr;
+ }
+
+ /// Introduce a new abstract attribute into the fixpoint analysis.
+ ///
+ /// Note that ownership of the attribute is given to the Attributor. It will
+ /// invoke delete for the Attributor on destruction of the Attributor.
+ ///
+ /// Attributes are identified by
+ /// (1) their anchored value (see AA.getAnchoredValue()),
+ /// (2) their argument number (\p ArgNo, or Argument::getArgNo()), and
+ /// (3) their default attribute kind (see AAType::ID).
+ template <typename AAType> AAType &registerAA(AAType &AA, int ArgNo = -1) {
+ static_assert(std::is_base_of<AbstractAttribute, AAType>::value,
+ "Cannot register an attribute with a type not derived from "
+ "'AbstractAttribute'!");
+
+ // Determine the anchor value and the argument number which are used to
+ // lookup the attribute together with AAType::ID. If passed an argument,
+ // use its argument number but do not override a given one as it could be a
+ // use of the argument at a call site.
+ Value &AnchoredVal = AA.getAnchoredValue();
+ if (ArgNo == -1)
+ if (auto *Arg = dyn_cast<Argument>(&AnchoredVal))
+ ArgNo = Arg->getArgNo();
+
+ // Put the attribute in the lookup map structure and the container we use to
+ // keep track of all attributes.
+ AAMap[{&AnchoredVal, ArgNo}][AAType::ID] = &AA;
+ AllAbstractAttributes.push_back(&AA);
+ return AA;
+ }
+
+ /// Determine opportunities to derive 'default' attributes in \p F and create
+ /// abstract attribute objects for them.
+ ///
+ /// \param F The function that is checked for attribute opportunities.
+ /// \param InfoCache A cache for information queryable by the new attributes.
+ /// \param Whitelist If not null, a set limiting the attribute opportunities.
+ ///
+ /// Note that abstract attribute instances are generally created even if the
+ /// IR already contains the information they would deduce. The most important
+ /// reason for this is the single interface, the one of the abstract attribute
+ /// instance, which can be queried without the need to look at the IR in
+ /// various places.
+ void identifyDefaultAbstractAttributes(
+ Function &F, InformationCache &InfoCache,
+ DenseSet</* Attribute::AttrKind */ unsigned> *Whitelist = nullptr);
+
+ /// Check \p Pred on all function call sites.
+ ///
+ /// This method will evaluate \p Pred on call sites and return
+ /// true if \p Pred holds in every call sites. However, this is only possible
+ /// all call sites are known, hence the function has internal linkage.
+ bool checkForAllCallSites(Function &F, std::function<bool(CallSite)> &Pred,
+ bool RequireAllCallSites);
+
+private:
+ /// The set of all abstract attributes.
+ ///{
+ using AAVector = SmallVector<AbstractAttribute *, 64>;
+ AAVector AllAbstractAttributes;
+ ///}
+
+ /// A nested map to lookup abstract attributes based on the anchored value and
+ /// an argument positions (or -1) on the outer level, and attribute kinds
+ /// (Attribute::AttrKind) on the inner level.
+ ///{
+ using KindToAbstractAttributeMap = DenseMap<unsigned, AbstractAttribute *>;
+ DenseMap<std::pair<const Value *, int>, KindToAbstractAttributeMap> AAMap;
+ ///}
+
+ /// A map from abstract attributes to the ones that queried them through calls
+ /// to the getAAFor<...>(...) method.
+ ///{
+ using QueryMapTy =
+ DenseMap<AbstractAttribute *, SetVector<AbstractAttribute *>>;
+ QueryMapTy QueryMap;
+ ///}
+};
+
+/// Data structure to hold cached (LLVM-IR) information.
+///
+/// All attributes are given an InformationCache object at creation time to
+/// avoid inspection of the IR by all of them individually. This default
+/// InformationCache will hold information required by 'default' attributes,
+/// thus the ones deduced when Attributor::identifyDefaultAbstractAttributes(..)
+/// is called.
+///
+/// If custom abstract attributes, registered manually through
+/// Attributor::registerAA(...), need more information, especially if it is not
+/// reusable, it is advised to inherit from the InformationCache and cast the
+/// instance down in the abstract attributes.
+struct InformationCache {
+ /// A map type from opcodes to instructions with this opcode.
+ using OpcodeInstMapTy = DenseMap<unsigned, SmallVector<Instruction *, 32>>;
+
+ /// Return the map that relates "interesting" opcodes with all instructions
+ /// with that opcode in \p F.
+ OpcodeInstMapTy &getOpcodeInstMapForFunction(Function &F) {
+ return FuncInstOpcodeMap[&F];
+ }
+
+ /// A vector type to hold instructions.
+ using InstructionVectorTy = std::vector<Instruction *>;
+
+ /// Return the instructions in \p F that may read or write memory.
+ InstructionVectorTy &getReadOrWriteInstsForFunction(Function &F) {
+ return FuncRWInstsMap[&F];
+ }
+
+private:
+ /// A map type from functions to opcode to instruction maps.
+ using FuncInstOpcodeMapTy = DenseMap<Function *, OpcodeInstMapTy>;
+
+ /// A map type from functions to their read or write instructions.
+ using FuncRWInstsMapTy = DenseMap<Function *, InstructionVectorTy>;
+
+ /// A nested map that remembers all instructions in a function with a certain
+ /// instruction opcode (Instruction::getOpcode()).
+ FuncInstOpcodeMapTy FuncInstOpcodeMap;
+
+ /// A map from functions to their instructions that may read or write memory.
+ FuncRWInstsMapTy FuncRWInstsMap;
+
+ /// Give the Attributor access to the members so
+ /// Attributor::identifyDefaultAbstractAttributes(...) can initialize them.
+ friend struct Attributor;
+};
+
+/// An interface to query the internal state of an abstract attribute.
+///
+/// The abstract state is a minimal interface that allows the Attributor to
+/// communicate with the abstract attributes about their internal state without
+/// enforcing or exposing implementation details, e.g., the (existence of an)
+/// underlying lattice.
+///
+/// It is sufficient to be able to query if a state is (1) valid or invalid, (2)
+/// at a fixpoint, and to indicate to the state that (3) an optimistic fixpoint
+/// was reached or (4) a pessimistic fixpoint was enforced.
+///
+/// All methods need to be implemented by the subclass. For the common use case,
+/// a single boolean state or a bit-encoded state, the BooleanState and
+/// IntegerState classes are already provided. An abstract attribute can inherit
+/// from them to get the abstract state interface and additional methods to
+/// directly modify the state based if needed. See the class comments for help.
+struct AbstractState {
+ virtual ~AbstractState() {}
+
+ /// Return if this abstract state is in a valid state. If false, no
+ /// information provided should be used.
+ virtual bool isValidState() const = 0;
+
+ /// Return if this abstract state is fixed, thus does not need to be updated
+ /// if information changes as it cannot change itself.
+ virtual bool isAtFixpoint() const = 0;
+
+ /// Indicate that the abstract state should converge to the optimistic state.
+ ///
+ /// This will usually make the optimistically assumed state the known to be
+ /// true state.
+ virtual void indicateOptimisticFixpoint() = 0;
+
+ /// Indicate that the abstract state should converge to the pessimistic state.
+ ///
+ /// This will usually revert the optimistically assumed state to the known to
+ /// be true state.
+ virtual void indicatePessimisticFixpoint() = 0;
+};
+
+/// Simple state with integers encoding.
+///
+/// The interface ensures that the assumed bits are always a subset of the known
+/// bits. Users can only add known bits and, except through adding known bits,
+/// they can only remove assumed bits. This should guarantee monotoniticy and
+/// thereby the existence of a fixpoint (if used corretly). The fixpoint is
+/// reached when the assumed and known state/bits are equal. Users can
+/// force/inidicate a fixpoint. If an optimistic one is indicated, the known
+/// state will catch up with the assumed one, for a pessimistic fixpoint it is
+/// the other way around.
+struct IntegerState : public AbstractState {
+ /// Underlying integer type, we assume 32 bits to be enough.
+ using base_t = uint32_t;
+
+ /// Initialize the (best) state.
+ IntegerState(base_t BestState = ~0) : Assumed(BestState) {}
+
+ /// Return the worst possible representable state.
+ static constexpr base_t getWorstState() { return 0; }
+
+ /// See AbstractState::isValidState()
+ /// NOTE: For now we simply pretend that the worst possible state is invalid.
+ bool isValidState() const override { return Assumed != getWorstState(); }
+
+ /// See AbstractState::isAtFixpoint()
+ bool isAtFixpoint() const override { return Assumed == Known; }
+
+ /// See AbstractState::indicateOptimisticFixpoint(...)
+ void indicateOptimisticFixpoint() override { Known = Assumed; }
+
+ /// See AbstractState::indicatePessimisticFixpoint(...)
+ void indicatePessimisticFixpoint() override { Assumed = Known; }
+
+ /// Return the known state encoding
+ base_t getKnown() const { return Known; }
+
+ /// Return the assumed state encoding.
+ base_t getAssumed() const { return Assumed; }
+
+ /// Return true if the bits set in \p BitsEncoding are "known bits".
+ bool isKnown(base_t BitsEncoding) const {
+ return (Known & BitsEncoding) == BitsEncoding;
+ }
+
+ /// Return true if the bits set in \p BitsEncoding are "assumed bits".
+ bool isAssumed(base_t BitsEncoding) const {
+ return (Assumed & BitsEncoding) == BitsEncoding;
+ }
+
+ /// Add the bits in \p BitsEncoding to the "known bits".
+ IntegerState &addKnownBits(base_t Bits) {
+ // Make sure we never miss any "known bits".
+ Assumed |= Bits;
+ Known |= Bits;
+ return *this;
+ }
+
+ /// Remove the bits in \p BitsEncoding from the "assumed bits" if not known.
+ IntegerState &removeAssumedBits(base_t BitsEncoding) {
+ // Make sure we never loose any "known bits".
+ Assumed = (Assumed & ~BitsEncoding) | Known;
+ return *this;
+ }
+
+ /// Keep only "assumed bits" also set in \p BitsEncoding but all known ones.
+ IntegerState &intersectAssumedBits(base_t BitsEncoding) {
+ // Make sure we never loose any "known bits".
+ Assumed = (Assumed & BitsEncoding) | Known;
+ return *this;
+ }
+
+private:
+ /// The known state encoding in an integer of type base_t.
+ base_t Known = getWorstState();
+
+ /// The assumed state encoding in an integer of type base_t.
+ base_t Assumed;
+};
+
+/// Simple wrapper for a single bit (boolean) state.
+struct BooleanState : public IntegerState {
+ BooleanState() : IntegerState(1){};
+};
+
+/// Base struct for all "concrete attribute" deductions.
+///
+/// The abstract attribute is a minimal interface that allows the Attributor to
+/// orchestrate the abstract/fixpoint analysis. The design allows to hide away
+/// implementation choices made for the subclasses but also to structure their
+/// implementation and simplify the use of other abstract attributes in-flight.
+///
+/// To allow easy creation of new attributes, most methods have default
+/// implementations. The ones that do not are generally straight forward, except
+/// `AbstractAttribute::updateImpl` which is the location of most reasoning
+/// associated with the abstract attribute. The update is invoked by the
+/// Attributor in case the situation used to justify the current optimistic
+/// state might have changed. The Attributor determines this automatically
+/// by monitoring the `Attributor::getAAFor` calls made by abstract attributes.
+///
+/// The `updateImpl` method should inspect the IR and other abstract attributes
+/// in-flight to justify the best possible (=optimistic) state. The actual
+/// implementation is, similar to the underlying abstract state encoding, not
+/// exposed. In the most common case, the `updateImpl` will go through a list of
+/// reasons why its optimistic state is valid given the current information. If
+/// any combination of them holds and is sufficient to justify the current
+/// optimistic state, the method shall return UNCHAGED. If not, the optimistic
+/// state is adjusted to the situation and the method shall return CHANGED.
+///
+/// If the manifestation of the "concrete attribute" deduced by the subclass
+/// differs from the "default" behavior, which is a (set of) LLVM-IR
+/// attribute(s) for an argument, call site argument, function return value, or
+/// function, the `AbstractAttribute::manifest` method should be overloaded.
+///
+/// NOTE: If the state obtained via getState() is INVALID, thus if
+/// AbstractAttribute::getState().isValidState() returns false, no
+/// information provided by the methods of this class should be used.
+/// NOTE: The Attributor currently has certain limitations to what we can do.
+/// As a general rule of thumb, "concrete" abstract attributes should *for
+/// now* only perform "backward" information propagation. That means
+/// optimistic information obtained through abstract attributes should
+/// only be used at positions that precede the origin of the information
+/// with regards to the program flow. More practically, information can
+/// *now* be propagated from instructions to their enclosing function, but
+/// *not* from call sites to the called function. The mechanisms to allow
+/// both directions will be added in the future.
+/// NOTE: The mechanics of adding a new "concrete" abstract attribute are
+/// described in the file comment.
+struct AbstractAttribute {
+
+ /// The positions attributes can be manifested in.
+ enum ManifestPosition {
+ MP_ARGUMENT, ///< An attribute for a function argument.
+ MP_CALL_SITE_ARGUMENT, ///< An attribute for a call site argument.
+ MP_FUNCTION, ///< An attribute for a function as a whole.
+ MP_RETURNED, ///< An attribute for the function return value.
+ };
+
+ /// An abstract attribute associated with \p AssociatedVal and anchored at
+ /// \p AnchoredVal.
+ ///
+ /// \param AssociatedVal The value this abstract attribute is associated with.
+ /// \param AnchoredVal The value this abstract attributes is anchored at.
+ /// \param InfoCache Cached information accessible to the abstract attribute.
+ AbstractAttribute(Value *AssociatedVal, Value &AnchoredVal,
+ InformationCache &InfoCache)
+ : AssociatedVal(AssociatedVal), AnchoredVal(AnchoredVal),
+ InfoCache(InfoCache) {}
+
+ /// An abstract attribute associated with and anchored at \p V.
+ AbstractAttribute(Value &V, InformationCache &InfoCache)
+ : AbstractAttribute(&V, V, InfoCache) {}
+
+ /// Virtual destructor.
+ virtual ~AbstractAttribute() {}
+
+ /// Initialize the state with the information in the Attributor \p A.
+ ///
+ /// This function is called by the Attributor once all abstract attributes
+ /// have been identified. It can and shall be used for task like:
+ /// - identify existing knowledge in the IR and use it for the "known state"
+ /// - perform any work that is not going to change over time, e.g., determine
+ /// a subset of the IR, or attributes in-flight, that have to be looked at
+ /// in the `updateImpl` method.
+ virtual void initialize(Attributor &A) {}
+
+ /// Return the internal abstract state for inspection.
+ virtual const AbstractState &getState() const = 0;
+
+ /// Return the value this abstract attribute is anchored with.
+ ///
+ /// The anchored value might not be the associated value if the latter is not
+ /// sufficient to determine where arguments will be manifested. This is mostly
+ /// the case for call site arguments as the value is not sufficient to
+ /// pinpoint them. Instead, we can use the call site as an anchor.
+ ///
+ ///{
+ Value &getAnchoredValue() { return AnchoredVal; }
+ const Value &getAnchoredValue() const { return AnchoredVal; }
+ ///}
+
+ /// Return the llvm::Function surrounding the anchored value.
+ ///
+ ///{
+ Function &getAnchorScope();
+ const Function &getAnchorScope() const;
+ ///}
+
+ /// Return the value this abstract attribute is associated with.
+ ///
+ /// The abstract state usually represents this value.
+ ///
+ ///{
+ virtual Value *getAssociatedValue() { return AssociatedVal; }
+ virtual const Value *getAssociatedValue() const { return AssociatedVal; }
+ ///}
+
+ /// Return the position this abstract state is manifested in.
+ virtual ManifestPosition getManifestPosition() const = 0;
+
+ /// Return the kind that identifies the abstract attribute implementation.
+ virtual Attribute::AttrKind getAttrKind() const = 0;
+
+ /// Return the deduced attributes in \p Attrs.
+ virtual void getDeducedAttributes(SmallVectorImpl<Attribute> &Attrs) const {
+ LLVMContext &Ctx = AnchoredVal.getContext();
+ Attrs.emplace_back(Attribute::get(Ctx, getAttrKind()));
+ }
+
+ /// Helper functions, for debug purposes only.
+ ///{
+ virtual void print(raw_ostream &OS) const;
+ void dump() const { print(dbgs()); }
+
+ /// This function should return the "summarized" assumed state as string.
+ virtual const std::string getAsStr() const = 0;
+ ///}
+
+ /// Allow the Attributor access to the protected methods.
+ friend struct Attributor;
+
+protected:
+ /// Hook for the Attributor to trigger an update of the internal state.
+ ///
+ /// If this attribute is already fixed, this method will return UNCHANGED,
+ /// otherwise it delegates to `AbstractAttribute::updateImpl`.
+ ///
+ /// \Return CHANGED if the internal state changed, otherwise UNCHANGED.
+ ChangeStatus update(Attributor &A);
+
+ /// Hook for the Attributor to trigger the manifestation of the information
+ /// represented by the abstract attribute in the LLVM-IR.
+ ///
+ /// \Return CHANGED if the IR was altered, otherwise UNCHANGED.
+ virtual ChangeStatus manifest(Attributor &A);
+
+ /// Return the internal abstract state for careful modification.
+ virtual AbstractState &getState() = 0;
+
+ /// The actual update/transfer function which has to be implemented by the
+ /// derived classes.
+ ///
+ /// If it is called, the environment has changed and we have to determine if
+ /// the current information is still valid or adjust it otherwise.
+ ///
+ /// \Return CHANGED if the internal state changed, otherwise UNCHANGED.
+ virtual ChangeStatus updateImpl(Attributor &A) = 0;
+
+ /// The value this abstract attribute is associated with.
+ Value *AssociatedVal;
+
+ /// The value this abstract attribute is anchored at.
+ Value &AnchoredVal;
+
+ /// The information cache accessible to this abstract attribute.
+ InformationCache &InfoCache;
+};
+
+/// Forward declarations of output streams for debug purposes.
+///
+///{
+raw_ostream &operator<<(raw_ostream &OS, const AbstractAttribute &AA);
+raw_ostream &operator<<(raw_ostream &OS, ChangeStatus S);
+raw_ostream &operator<<(raw_ostream &OS, AbstractAttribute::ManifestPosition);
+raw_ostream &operator<<(raw_ostream &OS, const AbstractState &State);
+///}
+
+struct AttributorPass : public PassInfoMixin<AttributorPass> {
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+};
+
+Pass *createAttributorLegacyPass();
+
+/// ----------------------------------------------------------------------------
+/// Abstract Attribute Classes
+/// ----------------------------------------------------------------------------
+
+/// An abstract attribute for the returned values of a function.
+struct AAReturnedValues : public AbstractAttribute {
+ /// See AbstractAttribute::AbstractAttribute(...).
+ AAReturnedValues(Function &F, InformationCache &InfoCache)
+ : AbstractAttribute(F, InfoCache) {}
+
+ /// Check \p Pred on all returned values.
+ ///
+ /// This method will evaluate \p Pred on returned values and return
+ /// true if (1) all returned values are known, and (2) \p Pred returned true
+ /// for all returned values.
+ virtual bool
+ checkForallReturnedValues(std::function<bool(Value &)> &Pred) const = 0;
+
+ /// See AbstractAttribute::getAttrKind()
+ Attribute::AttrKind getAttrKind() const override { return ID; }
+
+ /// The identifier used by the Attributor for this class of attributes.
+ static constexpr Attribute::AttrKind ID = Attribute::Returned;
+};
+
+struct AANoUnwind : public AbstractAttribute {
+ /// An abstract interface for all nosync attributes.
+ AANoUnwind(Value &V, InformationCache &InfoCache)
+ : AbstractAttribute(V, InfoCache) {}
+
+ /// See AbstractAttribute::getAttrKind()/
+ Attribute::AttrKind getAttrKind() const override { return ID; }
+
+ static constexpr Attribute::AttrKind ID = Attribute::NoUnwind;
+
+ /// Returns true if nounwind is assumed.
+ virtual bool isAssumedNoUnwind() const = 0;
+
+ /// Returns true if nounwind is known.
+ virtual bool isKnownNoUnwind() const = 0;
+};
+
+struct AANoSync : public AbstractAttribute {
+ /// An abstract interface for all nosync attributes.
+ AANoSync(Value &V, InformationCache &InfoCache)
+ : AbstractAttribute(V, InfoCache) {}
+
+ /// See AbstractAttribute::getAttrKind().
+ Attribute::AttrKind getAttrKind() const override { return ID; }
+
+ static constexpr Attribute::AttrKind ID =
+ Attribute::AttrKind(Attribute::NoSync);
+
+ /// Returns true if "nosync" is assumed.
+ virtual bool isAssumedNoSync() const = 0;
+
+ /// Returns true if "nosync" is known.
+ virtual bool isKnownNoSync() const = 0;
+};
+
+/// An abstract interface for all nonnull attributes.
+struct AANonNull : public AbstractAttribute {
+
+ /// See AbstractAttribute::AbstractAttribute(...).
+ AANonNull(Value &V, InformationCache &InfoCache)
+ : AbstractAttribute(V, InfoCache) {}
+
+ /// See AbstractAttribute::AbstractAttribute(...).
+ AANonNull(Value *AssociatedVal, Value &AnchoredValue,
+ InformationCache &InfoCache)
+ : AbstractAttribute(AssociatedVal, AnchoredValue, InfoCache) {}
+
+ /// Return true if we assume that the underlying value is nonnull.
+ virtual bool isAssumedNonNull() const = 0;
+
+ /// Return true if we know that underlying value is nonnull.
+ virtual bool isKnownNonNull() const = 0;
+
+ /// See AbastractState::getAttrKind().
+ Attribute::AttrKind getAttrKind() const override { return ID; }
+
+ /// The identifier used by the Attributor for this class of attributes.
+ static constexpr Attribute::AttrKind ID = Attribute::NonNull;
+};
+
+/// An abstract attribute for norecurse.
+struct AANoRecurse : public AbstractAttribute {
+
+ /// See AbstractAttribute::AbstractAttribute(...).
+ AANoRecurse(Value &V, InformationCache &InfoCache)
+ : AbstractAttribute(V, InfoCache) {}
+
+ /// See AbstractAttribute::getAttrKind()
+ virtual Attribute::AttrKind getAttrKind() const override {
+ return Attribute::NoRecurse;
+ }
+
+ /// Return true if "norecurse" is known.
+ virtual bool isKnownNoRecurse() const = 0;
+
+ /// Return true if "norecurse" is assumed.
+ virtual bool isAssumedNoRecurse() const = 0;
+
+ /// The identifier used by the Attributor for this class of attributes.
+ static constexpr Attribute::AttrKind ID = Attribute::NoRecurse;
+};
+
+/// An abstract attribute for willreturn.
+struct AAWillReturn : public AbstractAttribute {
+
+ /// See AbstractAttribute::AbstractAttribute(...).
+ AAWillReturn(Value &V, InformationCache &InfoCache)
+ : AbstractAttribute(V, InfoCache) {}
+
+ /// See AbstractAttribute::getAttrKind()
+ virtual Attribute::AttrKind getAttrKind() const override {
+ return Attribute::WillReturn;
+ }
+
+ /// Return true if "willreturn" is known.
+ virtual bool isKnownWillReturn() const = 0;
+
+ /// Return true if "willreturn" is assumed.
+ virtual bool isAssumedWillReturn() const = 0;
+
+ /// The identifier used by the Attributor for this class of attributes.
+ static constexpr Attribute::AttrKind ID = Attribute::WillReturn;
+};
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_IPO_FUNCTIONATTRS_H
diff --git a/include/llvm/Transforms/IPO/CalledValuePropagation.h b/include/llvm/Transforms/IPO/CalledValuePropagation.h
index 352bdc7ac17f..c2626d0867b4 100644
--- a/include/llvm/Transforms/IPO/CalledValuePropagation.h
+++ b/include/llvm/Transforms/IPO/CalledValuePropagation.h
@@ -1,9 +1,8 @@
//===- CalledValuePropagation.h - Propagate called values -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/IPO/ConstantMerge.h b/include/llvm/Transforms/IPO/ConstantMerge.h
index e04d3ae1a40e..12d38b5f58fa 100644
--- a/include/llvm/Transforms/IPO/ConstantMerge.h
+++ b/include/llvm/Transforms/IPO/ConstantMerge.h
@@ -1,9 +1,8 @@
//===- ConstantMerge.h - Merge duplicate global constants -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/IPO/CrossDSOCFI.h b/include/llvm/Transforms/IPO/CrossDSOCFI.h
index 0979f5b79e86..8440df639729 100644
--- a/include/llvm/Transforms/IPO/CrossDSOCFI.h
+++ b/include/llvm/Transforms/IPO/CrossDSOCFI.h
@@ -1,9 +1,8 @@
//===-- CrossDSOCFI.cpp - Externalize this module's CFI checks --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/IPO/DeadArgumentElimination.h b/include/llvm/Transforms/IPO/DeadArgumentElimination.h
index ba5666f20a9b..73797bc10017 100644
--- a/include/llvm/Transforms/IPO/DeadArgumentElimination.h
+++ b/include/llvm/Transforms/IPO/DeadArgumentElimination.h
@@ -1,9 +1,8 @@
//===- DeadArgumentElimination.h - Eliminate Dead Args ----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/IPO/ElimAvailExtern.h b/include/llvm/Transforms/IPO/ElimAvailExtern.h
index 94cb954fd2d5..92c319b3cce3 100644
--- a/include/llvm/Transforms/IPO/ElimAvailExtern.h
+++ b/include/llvm/Transforms/IPO/ElimAvailExtern.h
@@ -1,9 +1,8 @@
//===- ElimAvailExtern.h - Optimize Global Variables ------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/IPO/ForceFunctionAttrs.h b/include/llvm/Transforms/IPO/ForceFunctionAttrs.h
index ff8a6546f059..7379009b2592 100644
--- a/include/llvm/Transforms/IPO/ForceFunctionAttrs.h
+++ b/include/llvm/Transforms/IPO/ForceFunctionAttrs.h
@@ -1,9 +1,8 @@
//===-- ForceFunctionAttrs.h - Force function attrs for debugging ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/include/llvm/Transforms/IPO/FunctionAttrs.h b/include/llvm/Transforms/IPO/FunctionAttrs.h
index 901fed7a0fa4..ce61eea05c79 100644
--- a/include/llvm/Transforms/IPO/FunctionAttrs.h
+++ b/include/llvm/Transforms/IPO/FunctionAttrs.h
@@ -1,9 +1,8 @@
//===- FunctionAttrs.h - Compute function attributes ------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/IPO/FunctionImport.h b/include/llvm/Transforms/IPO/FunctionImport.h
index c2103b637266..bbf270c400af 100644
--- a/include/llvm/Transforms/IPO/FunctionImport.h
+++ b/include/llvm/Transforms/IPO/FunctionImport.h
@@ -1,9 +1,8 @@
//===- llvm/Transforms/IPO/FunctionImport.h - ThinLTO importing -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/Transforms/IPO/GlobalDCE.h b/include/llvm/Transforms/IPO/GlobalDCE.h
index 7ca241f4645a..c434484d1ae3 100644
--- a/include/llvm/Transforms/IPO/GlobalDCE.h
+++ b/include/llvm/Transforms/IPO/GlobalDCE.h
@@ -1,9 +1,8 @@
//===-- GlobalDCE.h - DCE unreachable internal functions ------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/IPO/GlobalOpt.h b/include/llvm/Transforms/IPO/GlobalOpt.h
index 5b4878604eab..48a861ff2cf8 100644
--- a/include/llvm/Transforms/IPO/GlobalOpt.h
+++ b/include/llvm/Transforms/IPO/GlobalOpt.h
@@ -1,9 +1,8 @@
//===- GlobalOpt.h - Optimize Global Variables ------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/IPO/GlobalSplit.h b/include/llvm/Transforms/IPO/GlobalSplit.h
index 56cefb7886fe..690b23a2d785 100644
--- a/include/llvm/Transforms/IPO/GlobalSplit.h
+++ b/include/llvm/Transforms/IPO/GlobalSplit.h
@@ -1,9 +1,8 @@
//===- GlobalSplit.h - global variable splitter -----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/IPO/HotColdSplitting.h b/include/llvm/Transforms/IPO/HotColdSplitting.h
index 57e9a9e69187..73668844590d 100644
--- a/include/llvm/Transforms/IPO/HotColdSplitting.h
+++ b/include/llvm/Transforms/IPO/HotColdSplitting.h
@@ -1,9 +1,8 @@
//===- HotColdSplitting.h ---- Outline Cold Regions -------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//===----------------------------------------------------------------------===//
//
// This pass outlines cold regions to a separate function.
diff --git a/include/llvm/Transforms/IPO/InferFunctionAttrs.h b/include/llvm/Transforms/IPO/InferFunctionAttrs.h
index 54e1c243ae27..bb7907fb8ac8 100644
--- a/include/llvm/Transforms/IPO/InferFunctionAttrs.h
+++ b/include/llvm/Transforms/IPO/InferFunctionAttrs.h
@@ -1,9 +1,8 @@
//===-- InferFunctionAttrs.h - Infer implicit function attributes ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/include/llvm/Transforms/IPO/Inliner.h b/include/llvm/Transforms/IPO/Inliner.h
index 610e4500e4b1..8202b94d5a93 100644
--- a/include/llvm/Transforms/IPO/Inliner.h
+++ b/include/llvm/Transforms/IPO/Inliner.h
@@ -1,9 +1,8 @@
//===- Inliner.h - Inliner pass and infrastructure --------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/Transforms/IPO/Internalize.h b/include/llvm/Transforms/IPO/Internalize.h
index 45d676d9f77b..6c1e19ef9fe4 100644
--- a/include/llvm/Transforms/IPO/Internalize.h
+++ b/include/llvm/Transforms/IPO/Internalize.h
@@ -1,9 +1,8 @@
//====- Internalize.h - Internalization API ---------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -22,11 +21,11 @@
#ifndef LLVM_TRANSFORMS_IPO_INTERNALIZE_H
#define LLVM_TRANSFORMS_IPO_INTERNALIZE_H
+#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/PassManager.h"
#include <functional>
-#include <set>
namespace llvm {
class Module;
@@ -45,11 +44,11 @@ class InternalizePass : public PassInfoMixin<InternalizePass> {
/// Internalize GV if it is possible to do so, i.e. it is not externally
/// visible and is not a member of an externally visible comdat.
bool maybeInternalize(GlobalValue &GV,
- const std::set<const Comdat *> &ExternalComdats);
+ const DenseSet<const Comdat *> &ExternalComdats);
/// If GV is part of a comdat and is externally visible, keep track of its
/// comdat so that we don't internalize any of its members.
void checkComdatVisibility(GlobalValue &GV,
- std::set<const Comdat *> &ExternalComdats);
+ DenseSet<const Comdat *> &ExternalComdats);
public:
InternalizePass();
diff --git a/include/llvm/Transforms/IPO/LowerTypeTests.h b/include/llvm/Transforms/IPO/LowerTypeTests.h
index bc448386b63d..39b23f5957db 100644
--- a/include/llvm/Transforms/IPO/LowerTypeTests.h
+++ b/include/llvm/Transforms/IPO/LowerTypeTests.h
@@ -1,9 +1,8 @@
//===- LowerTypeTests.h - type metadata lowering pass -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/IPO/PartialInlining.h b/include/llvm/Transforms/IPO/PartialInlining.h
index ec6dd36dae06..3b8297d65987 100644
--- a/include/llvm/Transforms/IPO/PartialInlining.h
+++ b/include/llvm/Transforms/IPO/PartialInlining.h
@@ -1,9 +1,8 @@
//===- PartialInlining.h - Inline parts of functions ------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/IPO/PassManagerBuilder.h b/include/llvm/Transforms/IPO/PassManagerBuilder.h
index 276306f686ff..63ff00afc2ae 100644
--- a/include/llvm/Transforms/IPO/PassManagerBuilder.h
+++ b/include/llvm/Transforms/IPO/PassManagerBuilder.h
@@ -1,9 +1,8 @@
// llvm/Transforms/IPO/PassManagerBuilder.h - Build Standard Pass -*- C++ -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -58,7 +57,7 @@ class PassManagerBase;
/// ...
class PassManagerBuilder {
public:
- /// Extensions are passed the builder itself (so they can see how it is
+ /// Extensions are passed to the builder itself (so they can see how it is
/// configured) as well as the pass manager to add stuff to.
typedef std::function<void(const PassManagerBuilder &Builder,
legacy::PassManagerBase &PM)>
@@ -113,6 +112,16 @@ public:
/// passes at the end of the main CallGraphSCC passes and before any
/// function simplification passes run by CGPassManager.
EP_CGSCCOptimizerLate,
+
+ /// EP_FullLinkTimeOptimizationEarly - This extensions point allow adding
+ /// passes that
+ /// run at Link Time, before Full Link Time Optimization.
+ EP_FullLinkTimeOptimizationEarly,
+
+ /// EP_FullLinkTimeOptimizationLast - This extensions point allow adding
+ /// passes that
+ /// run at Link Time, after Full Link Time Optimization.
+ EP_FullLinkTimeOptimizationLast,
};
/// The Optimization Level - Specify the basic optimization level.
@@ -143,13 +152,14 @@ public:
const ModuleSummaryIndex *ImportSummary = nullptr;
bool DisableTailCalls;
- bool DisableUnitAtATime;
bool DisableUnrollLoops;
bool SLPVectorize;
bool LoopVectorize;
+ bool LoopsInterleaved;
bool RerollLoops;
bool NewGVN;
bool DisableGVNLoadPRE;
+ bool ForgetAllSCEVInLoopUnroll;
bool VerifyInput;
bool VerifyOutput;
bool MergeFunctions;
@@ -157,9 +167,15 @@ public:
bool PrepareForThinLTO;
bool PerformThinLTO;
bool DivergentTarget;
+ unsigned LicmMssaOptCap;
+ unsigned LicmMssaNoAccForPromotionCap;
/// Enable profile instrumentation pass.
bool EnablePGOInstrGen;
+ /// Enable profile context sensitive instrumentation pass.
+ bool EnablePGOCSInstrGen;
+ /// Enable profile context sensitive profile use pass.
+ bool EnablePGOCSInstrUse;
/// Profile data file name that the instrumentation will be written to.
std::string PGOInstrGen;
/// Path of the profile data file.
@@ -186,7 +202,7 @@ private:
void addInitialAliasAnalysisPasses(legacy::PassManagerBase &PM) const;
void addLTOOptimizationPasses(legacy::PassManagerBase &PM);
void addLateLTOOptimizationPasses(legacy::PassManagerBase &PM);
- void addPGOInstrPasses(legacy::PassManagerBase &MPM);
+ void addPGOInstrPasses(legacy::PassManagerBase &MPM, bool IsCS);
void addFunctionSimplificationPasses(legacy::PassManagerBase &MPM);
void addInstructionCombiningPass(legacy::PassManagerBase &MPM) const;
diff --git a/include/llvm/Transforms/IPO/SCCP.h b/include/llvm/Transforms/IPO/SCCP.h
index fdb7865fbac3..3c40d44ca9de 100644
--- a/include/llvm/Transforms/IPO/SCCP.h
+++ b/include/llvm/Transforms/IPO/SCCP.h
@@ -1,9 +1,8 @@
//===- SCCP.h - Sparse Conditional Constant Propagation ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/IPO/SampleProfile.h b/include/llvm/Transforms/IPO/SampleProfile.h
index af4a933ec1f6..a5ad44551bf6 100644
--- a/include/llvm/Transforms/IPO/SampleProfile.h
+++ b/include/llvm/Transforms/IPO/SampleProfile.h
@@ -1,9 +1,8 @@
//===- SampleProfile.h - SamplePGO pass ---------- --------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/IPO/StripDeadPrototypes.h b/include/llvm/Transforms/IPO/StripDeadPrototypes.h
index 5a05cd75c9d5..f4a15c36afc9 100644
--- a/include/llvm/Transforms/IPO/StripDeadPrototypes.h
+++ b/include/llvm/Transforms/IPO/StripDeadPrototypes.h
@@ -1,9 +1,8 @@
//===-- StripDeadPrototypes.h - Remove unused function declarations -------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/IPO/ThinLTOBitcodeWriter.h b/include/llvm/Transforms/IPO/ThinLTOBitcodeWriter.h
index bf04bbfe92d8..7acb922b37e1 100644
--- a/include/llvm/Transforms/IPO/ThinLTOBitcodeWriter.h
+++ b/include/llvm/Transforms/IPO/ThinLTOBitcodeWriter.h
@@ -1,9 +1,8 @@
//===- ThinLTOBitcodeWriter.h - Bitcode writing pass for ThinLTO ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/IPO/WholeProgramDevirt.h b/include/llvm/Transforms/IPO/WholeProgramDevirt.h
index bf2c79b0751e..509fcc867060 100644
--- a/include/llvm/Transforms/IPO/WholeProgramDevirt.h
+++ b/include/llvm/Transforms/IPO/WholeProgramDevirt.h
@@ -1,9 +1,8 @@
//===- WholeProgramDevirt.h - Whole-program devirt pass ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/InstCombine/InstCombine.h b/include/llvm/Transforms/InstCombine/InstCombine.h
index ab25fe08553a..8894d96e591f 100644
--- a/include/llvm/Transforms/InstCombine/InstCombine.h
+++ b/include/llvm/Transforms/InstCombine/InstCombine.h
@@ -1,9 +1,8 @@
//===- InstCombine.h - InstCombine pass -------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/include/llvm/Transforms/InstCombine/InstCombineWorklist.h b/include/llvm/Transforms/InstCombine/InstCombineWorklist.h
index f860b4b86555..6c33bdbafbd2 100644
--- a/include/llvm/Transforms/InstCombine/InstCombineWorklist.h
+++ b/include/llvm/Transforms/InstCombine/InstCombineWorklist.h
@@ -1,9 +1,8 @@
//===- InstCombineWorklist.h - Worklist for InstCombine pass ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/Transforms/Instrumentation.h b/include/llvm/Transforms/Instrumentation.h
index 017cab0a7750..8b70d2926ae9 100644
--- a/include/llvm/Transforms/Instrumentation.h
+++ b/include/llvm/Transforms/Instrumentation.h
@@ -1,9 +1,8 @@
//===- Transforms/Instrumentation.h - Instrumentation passes ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -88,10 +87,14 @@ struct GCOVOptions {
ModulePass *createGCOVProfilerPass(const GCOVOptions &Options =
GCOVOptions::getDefault());
-// PGO Instrumention
-ModulePass *createPGOInstrumentationGenLegacyPass();
+// PGO Instrumention. Parameter IsCS indicates if this is the context senstive
+// instrumentation.
+ModulePass *createPGOInstrumentationGenLegacyPass(bool IsCS = false);
ModulePass *
-createPGOInstrumentationUseLegacyPass(StringRef Filename = StringRef(""));
+createPGOInstrumentationUseLegacyPass(StringRef Filename = StringRef(""),
+ bool IsCS = false);
+ModulePass *createPGOInstrumentationGenCreateVarLegacyPass(
+ StringRef CSInstrName = StringRef(""));
ModulePass *createPGOIndirectCallPromotionLegacyPass(bool InLTO = false,
bool SamplePGO = false);
FunctionPass *createPGOMemOPSizeOptLegacyPass();
@@ -133,48 +136,27 @@ struct InstrProfOptions {
// Use atomic profile counter increments.
bool Atomic = false;
+ // Use BFI to guide register promotion
+ bool UseBFIInPromotion = false;
+
// Name of the profile file to use as output
std::string InstrProfileOutput;
InstrProfOptions() = default;
};
-/// Insert frontend instrumentation based profiling.
+/// Insert frontend instrumentation based profiling. Parameter IsCS indicates if
+// this is the context senstive instrumentation.
ModulePass *createInstrProfilingLegacyPass(
- const InstrProfOptions &Options = InstrProfOptions());
+ const InstrProfOptions &Options = InstrProfOptions(), bool IsCS = false);
-// Insert AddressSanitizer (address sanity checking) instrumentation
-FunctionPass *createAddressSanitizerFunctionPass(bool CompileKernel = false,
- bool Recover = false,
- bool UseAfterScope = false);
-ModulePass *createAddressSanitizerModulePass(bool CompileKernel = false,
- bool Recover = false,
- bool UseGlobalsGC = true,
- bool UseOdrIndicator = true);
-
-FunctionPass *createHWAddressSanitizerPass(bool CompileKernel = false,
- bool Recover = false);
+ModulePass *createInstrOrderFilePass();
// Insert DataFlowSanitizer (dynamic data flow analysis) instrumentation
ModulePass *createDataFlowSanitizerPass(
const std::vector<std::string> &ABIListFiles = std::vector<std::string>(),
void *(*getArgTLS)() = nullptr, void *(*getRetValTLS)() = nullptr);
-// Options for EfficiencySanitizer sub-tools.
-struct EfficiencySanitizerOptions {
- enum Type {
- ESAN_None = 0,
- ESAN_CacheFrag,
- ESAN_WorkingSet,
- } ToolType = ESAN_None;
-
- EfficiencySanitizerOptions() = default;
-};
-
-// Insert EfficiencySanitizer instrumentation.
-ModulePass *createEfficiencySanitizerPass(
- const EfficiencySanitizerOptions &Options = EfficiencySanitizerOptions());
-
// Options for sanitizer coverage instrumentation.
struct SanitizerCoverageOptions {
enum Type {
diff --git a/include/llvm/Transforms/Instrumentation/AddressSanitizer.h b/include/llvm/Transforms/Instrumentation/AddressSanitizer.h
new file mode 100644
index 000000000000..40007a9b8c53
--- /dev/null
+++ b/include/llvm/Transforms/Instrumentation/AddressSanitizer.h
@@ -0,0 +1,143 @@
+//===--------- Definition of the AddressSanitizer class ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the AddressSanitizer class which is a port of the legacy
+// AddressSanitizer pass to use the new PassManager infrastructure.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_ADDRESSSANITIZERPASS_H
+#define LLVM_TRANSFORMS_INSTRUMENTATION_ADDRESSSANITIZERPASS_H
+
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+/// Frontend-provided metadata for source location.
+struct LocationMetadata {
+ StringRef Filename;
+ int LineNo = 0;
+ int ColumnNo = 0;
+
+ LocationMetadata() = default;
+
+ bool empty() const { return Filename.empty(); }
+ void parse(MDNode *MDN);
+};
+
+/// Frontend-provided metadata for global variables.
+class GlobalsMetadata {
+public:
+ struct Entry {
+ LocationMetadata SourceLoc;
+ StringRef Name;
+ bool IsDynInit = false;
+ bool IsBlacklisted = false;
+
+ Entry() = default;
+ };
+
+ /// Create a default uninitialized GlobalsMetadata instance.
+ GlobalsMetadata() = default;
+
+ /// Create an initialized GlobalsMetadata instance.
+ GlobalsMetadata(Module &M);
+
+ /// Returns metadata entry for a given global.
+ Entry get(GlobalVariable *G) const {
+ auto Pos = Entries.find(G);
+ return (Pos != Entries.end()) ? Pos->second : Entry();
+ }
+
+ /// Handle invalidation from the pass manager.
+ /// These results are never invalidated.
+ bool invalidate(Module &, const PreservedAnalyses &,
+ ModuleAnalysisManager::Invalidator &) {
+ return false;
+ }
+ bool invalidate(Function &, const PreservedAnalyses &,
+ FunctionAnalysisManager::Invalidator &) {
+ return false;
+ }
+
+private:
+ DenseMap<GlobalVariable *, Entry> Entries;
+};
+
+/// The ASanGlobalsMetadataAnalysis initializes and returns a GlobalsMetadata
+/// object. More specifically, ASan requires looking at all globals registered
+/// in 'llvm.asan.globals' before running, which only depends on reading module
+/// level metadata. This analysis is required to run before running the
+/// AddressSanitizerPass since it collects that metadata.
+/// The legacy pass manager equivalent of this is ASanGlobalsMetadataLegacyPass.
+class ASanGlobalsMetadataAnalysis
+ : public AnalysisInfoMixin<ASanGlobalsMetadataAnalysis> {
+public:
+ using Result = GlobalsMetadata;
+
+ Result run(Module &, ModuleAnalysisManager &);
+
+private:
+ friend AnalysisInfoMixin<ASanGlobalsMetadataAnalysis>;
+ static AnalysisKey Key;
+};
+
+/// Public interface to the address sanitizer pass for instrumenting code to
+/// check for various memory errors at runtime.
+///
+/// The sanitizer itself is a function pass that works by inserting various
+/// calls to the ASan runtime library functions. The runtime library essentially
+/// replaces malloc() and free() with custom implementations that allow regions
+/// surrounding requested memory to be checked for invalid accesses.
+class AddressSanitizerPass : public PassInfoMixin<AddressSanitizerPass> {
+public:
+ explicit AddressSanitizerPass(bool CompileKernel = false,
+ bool Recover = false,
+ bool UseAfterScope = false);
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+
+private:
+ bool CompileKernel;
+ bool Recover;
+ bool UseAfterScope;
+};
+
+/// Public interface to the address sanitizer module pass for instrumenting code
+/// to check for various memory errors.
+///
+/// This adds 'asan.module_ctor' to 'llvm.global_ctors'. This pass may also
+/// run intependently of the function address sanitizer.
+class ModuleAddressSanitizerPass
+ : public PassInfoMixin<ModuleAddressSanitizerPass> {
+public:
+ explicit ModuleAddressSanitizerPass(bool CompileKernel = false,
+ bool Recover = false,
+ bool UseGlobalGC = true,
+ bool UseOdrIndicator = false);
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+
+private:
+ bool CompileKernel;
+ bool Recover;
+ bool UseGlobalGC;
+ bool UseOdrIndicator;
+};
+
+// Insert AddressSanitizer (address sanity checking) instrumentation
+FunctionPass *createAddressSanitizerFunctionPass(bool CompileKernel = false,
+ bool Recover = false,
+ bool UseAfterScope = false);
+ModulePass *createModuleAddressSanitizerLegacyPassPass(
+ bool CompileKernel = false, bool Recover = false, bool UseGlobalsGC = true,
+ bool UseOdrIndicator = true);
+
+} // namespace llvm
+
+#endif
diff --git a/include/llvm/Transforms/Instrumentation/BoundsChecking.h b/include/llvm/Transforms/Instrumentation/BoundsChecking.h
index 3d4f62c121c2..120c6a8fb09f 100644
--- a/include/llvm/Transforms/Instrumentation/BoundsChecking.h
+++ b/include/llvm/Transforms/Instrumentation/BoundsChecking.h
@@ -1,9 +1,8 @@
//===- BoundsChecking.h - Bounds checking instrumentation -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/Transforms/Instrumentation/CGProfile.h b/include/llvm/Transforms/Instrumentation/CGProfile.h
index c06c1a28715e..28fd3804dec9 100644
--- a/include/llvm/Transforms/Instrumentation/CGProfile.h
+++ b/include/llvm/Transforms/Instrumentation/CGProfile.h
@@ -1,9 +1,8 @@
//===- Transforms/Instrumentation/CGProfile.h -------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/include/llvm/Transforms/Instrumentation/ControlHeightReduction.h b/include/llvm/Transforms/Instrumentation/ControlHeightReduction.h
index 460342d1631b..18b428582046 100644
--- a/include/llvm/Transforms/Instrumentation/ControlHeightReduction.h
+++ b/include/llvm/Transforms/Instrumentation/ControlHeightReduction.h
@@ -1,9 +1,8 @@
//===- ControlHeightReduction.h - Control Height Reduction ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/Instrumentation/GCOVProfiler.h b/include/llvm/Transforms/Instrumentation/GCOVProfiler.h
index dd55fbe29eed..b3971e49754e 100644
--- a/include/llvm/Transforms/Instrumentation/GCOVProfiler.h
+++ b/include/llvm/Transforms/Instrumentation/GCOVProfiler.h
@@ -1,9 +1,8 @@
//===- Transforms/Instrumentation/GCOVProfiler.h ----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/include/llvm/Transforms/Instrumentation/HWAddressSanitizer.h b/include/llvm/Transforms/Instrumentation/HWAddressSanitizer.h
new file mode 100644
index 000000000000..e3104eeb1d36
--- /dev/null
+++ b/include/llvm/Transforms/Instrumentation/HWAddressSanitizer.h
@@ -0,0 +1,41 @@
+//===--------- Definition of the HWAddressSanitizer class -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Hardware AddressSanitizer class which is a port of the
+// legacy HWAddressSanitizer pass to use the new PassManager infrastructure.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_HWADDRESSSANITIZERPASS_H
+#define LLVM_TRANSFORMS_INSTRUMENTATION_HWADDRESSSANITIZERPASS_H
+
+#include "llvm/IR/Function.h"
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+/// This is a public interface to the hardware address sanitizer pass for
+/// instrumenting code to check for various memory errors at runtime, similar to
+/// AddressSanitizer but based on partial hardware assistance.
+class HWAddressSanitizerPass : public PassInfoMixin<HWAddressSanitizerPass> {
+public:
+ explicit HWAddressSanitizerPass(bool CompileKernel = false,
+ bool Recover = false);
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
+
+private:
+ bool CompileKernel;
+ bool Recover;
+};
+
+FunctionPass *createHWAddressSanitizerLegacyPassPass(bool CompileKernel = false,
+ bool Recover = false);
+
+} // namespace llvm
+
+#endif
diff --git a/include/llvm/Transforms/Instrumentation/InstrOrderFile.h b/include/llvm/Transforms/Instrumentation/InstrOrderFile.h
new file mode 100644
index 000000000000..f1245d8fd785
--- /dev/null
+++ b/include/llvm/Transforms/Instrumentation/InstrOrderFile.h
@@ -0,0 +1,28 @@
+//===- InstrOrderFile.h ---- Late IR instrumentation for order file ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_INSTRORDERFILE_H
+#define LLVM_TRANSFORMS_INSTRORDERFILE_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+class Module;
+
+/// The instrumentation pass for recording function order.
+class InstrOrderFilePass : public PassInfoMixin<InstrOrderFilePass> {
+public:
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_INSTRORDERFILE_H
diff --git a/include/llvm/Transforms/Instrumentation/InstrProfiling.h b/include/llvm/Transforms/Instrumentation/InstrProfiling.h
index 13fb3db4ae6f..8f76d4a1ce55 100644
--- a/include/llvm/Transforms/Instrumentation/InstrProfiling.h
+++ b/include/llvm/Transforms/Instrumentation/InstrProfiling.h
@@ -1,9 +1,8 @@
//===- Transforms/Instrumentation/InstrProfiling.h --------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -35,8 +34,9 @@ using LoadStorePair = std::pair<Instruction *, Instruction *>;
/// instrumentation pass.
class InstrProfiling : public PassInfoMixin<InstrProfiling> {
public:
- InstrProfiling() = default;
- InstrProfiling(const InstrProfOptions &Options) : Options(Options) {}
+ InstrProfiling() : IsCS(false) {}
+ InstrProfiling(const InstrProfOptions &Options, bool IsCS = false)
+ : Options(Options), IsCS(IsCS) {}
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
bool run(Module &M, const TargetLibraryInfo &TLI);
@@ -61,6 +61,9 @@ private:
GlobalVariable *NamesVar;
size_t NamesSize;
+ // Is this lowering for the context-sensitive instrumentation.
+ bool IsCS;
+
// vector of counter load/store pairs to be register promoted.
std::vector<LoadStorePair> PromotionCandidates;
diff --git a/include/llvm/Transforms/Instrumentation/MemorySanitizer.h b/include/llvm/Transforms/Instrumentation/MemorySanitizer.h
index 54f0e2f78230..0739d9e58a61 100644
--- a/include/llvm/Transforms/Instrumentation/MemorySanitizer.h
+++ b/include/llvm/Transforms/Instrumentation/MemorySanitizer.h
@@ -1,9 +1,8 @@
//===- Transforms/Instrumentation/MemorySanitizer.h - MSan Pass -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -19,10 +18,18 @@
namespace llvm {
+struct MemorySanitizerOptions {
+ MemorySanitizerOptions() = default;
+ MemorySanitizerOptions(int TrackOrigins, bool Recover, bool Kernel)
+ : TrackOrigins(TrackOrigins), Recover(Recover), Kernel(Kernel) {}
+ int TrackOrigins = 0;
+ bool Recover = false;
+ bool Kernel = false;
+};
+
// Insert MemorySanitizer instrumentation (detection of uninitialized reads)
-FunctionPass *createMemorySanitizerLegacyPassPass(int TrackOrigins = 0,
- bool Recover = false,
- bool EnableKmsan = false);
+FunctionPass *
+createMemorySanitizerLegacyPassPass(MemorySanitizerOptions Options = {});
/// A function pass for msan instrumentation.
///
@@ -31,17 +38,12 @@ FunctionPass *createMemorySanitizerLegacyPassPass(int TrackOrigins = 0,
/// yet, the pass inserts the declarations. Otherwise the existing globals are
/// used.
struct MemorySanitizerPass : public PassInfoMixin<MemorySanitizerPass> {
- MemorySanitizerPass(int TrackOrigins = 0, bool Recover = false,
- bool EnableKmsan = false)
- : TrackOrigins(TrackOrigins), Recover(Recover), EnableKmsan(EnableKmsan) {
- }
+ MemorySanitizerPass(MemorySanitizerOptions Options) : Options(Options) {}
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
private:
- int TrackOrigins;
- bool Recover;
- bool EnableKmsan;
+ MemorySanitizerOptions Options;
};
}
diff --git a/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h b/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h
index fdc5df68a669..21cf291d82d1 100644
--- a/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h
+++ b/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h
@@ -1,9 +1,8 @@
//===- Transforms/Instrumentation/PGOInstrumentation.h ----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -28,22 +27,46 @@ class Instruction;
class Module;
/// The instrumentation (profile-instr-gen) pass for IR based PGO.
+// We use this pass to create COMDAT profile variables for context
+// sensitive PGO (CSPGO). The reason to have a pass for this is CSPGO
+// can be run after LTO/ThinLTO linking. Lld linker needs to see
+// all the COMDAT variables before linking. So we have this pass
+// always run before linking for CSPGO.
+class PGOInstrumentationGenCreateVar
+ : public PassInfoMixin<PGOInstrumentationGenCreateVar> {
+public:
+ PGOInstrumentationGenCreateVar(std::string CSInstrName = "")
+ : CSInstrName(CSInstrName) {}
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+
+private:
+ std::string CSInstrName;
+};
+
+/// The instrumentation (profile-instr-gen) pass for IR based PGO.
class PGOInstrumentationGen : public PassInfoMixin<PGOInstrumentationGen> {
public:
+ PGOInstrumentationGen(bool IsCS = false) : IsCS(IsCS) {}
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+
+private:
+ // If this is a context sensitive instrumentation.
+ bool IsCS;
};
/// The profile annotation (profile-instr-use) pass for IR based PGO.
class PGOInstrumentationUse : public PassInfoMixin<PGOInstrumentationUse> {
public:
PGOInstrumentationUse(std::string Filename = "",
- std::string RemappingFilename = "");
+ std::string RemappingFilename = "", bool IsCS = false);
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
private:
std::string ProfileFileName;
std::string ProfileRemappingFileName;
+ // If this is a context sensitive instrumentation.
+ bool IsCS;
};
/// The indirect function call promotion pass.
diff --git a/include/llvm/Transforms/Instrumentation/PoisonChecking.h b/include/llvm/Transforms/Instrumentation/PoisonChecking.h
new file mode 100644
index 000000000000..606d3c255359
--- /dev/null
+++ b/include/llvm/Transforms/Instrumentation/PoisonChecking.h
@@ -0,0 +1,25 @@
+//===- PoisonChecking.h - ---------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_POISON_CHECKING_H
+#define LLVM_TRANSFORMS_INSTRUMENTATION_POISON_CHECKING_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+struct PoisonCheckingPass : public PassInfoMixin<PoisonCheckingPass> {
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
+}
+
+
+#endif // LLVM_TRANSFORMS_INSTRUMENTATION_POISON_CHECKING_H
diff --git a/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h b/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h
index 701e2e6ec89e..b4e7d9924ff6 100644
--- a/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h
+++ b/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h
@@ -1,9 +1,8 @@
-//===- Transforms/Instrumentation/MemorySanitizer.h - TSan Pass -----------===//
+//===- Transforms/Instrumentation/ThreadSanitizer.h - TSan Pass -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/ObjCARC.h b/include/llvm/Transforms/ObjCARC.h
index 1897adc2ffbf..2f114c75e2e2 100644
--- a/include/llvm/Transforms/ObjCARC.h
+++ b/include/llvm/Transforms/ObjCARC.h
@@ -1,9 +1,8 @@
//===-- ObjCARC.h - ObjCARC Scalar Transformations --------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/Scalar.h b/include/llvm/Transforms/Scalar.h
index 8fcf9296ba47..f9360b5ee2c8 100644
--- a/include/llvm/Transforms/Scalar.h
+++ b/include/llvm/Transforms/Scalar.h
@@ -1,9 +1,8 @@
//===-- Scalar.h - Scalar Transformations -----------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -138,6 +137,8 @@ Pass *createIndVarSimplifyPass();
// LICM - This pass is a loop invariant code motion and memory promotion pass.
//
Pass *createLICMPass();
+Pass *createLICMPass(unsigned LicmMssaOptCap,
+ unsigned LicmMssaNoAccForPromotionCap);
//===----------------------------------------------------------------------===//
//
@@ -184,11 +185,13 @@ Pass *createLoopInstSimplifyPass();
// LoopUnroll - This pass is a simple loop unrolling pass.
//
Pass *createLoopUnrollPass(int OptLevel = 2, bool OnlyWhenForced = false,
- int Threshold = -1, int Count = -1,
- int AllowPartial = -1, int Runtime = -1,
- int UpperBound = -1, int AllowPeeling = -1);
+ bool ForgetAllSCEV = false, int Threshold = -1,
+ int Count = -1, int AllowPartial = -1,
+ int Runtime = -1, int UpperBound = -1,
+ int AllowPeeling = -1);
// Create an unrolling pass for full unrolling that uses exact trip count only.
-Pass *createSimpleLoopUnrollPass(int OptLevel = 2, bool OnlyWhenForced = false);
+Pass *createSimpleLoopUnrollPass(int OptLevel = 2, bool OnlyWhenForced = false,
+ bool ForgetAllSCEV = false);
//===----------------------------------------------------------------------===//
//
@@ -360,9 +363,15 @@ Pass *createLowerGuardIntrinsicPass();
//===----------------------------------------------------------------------===//
//
+// LowerWidenableCondition - Lower widenable condition to i1 true.
+//
+Pass *createLowerWidenableConditionPass();
+
+//===----------------------------------------------------------------------===//
+//
// MergeICmps - Merge integer comparison chains into a memcmp
//
-Pass *createMergeICmpsPass();
+Pass *createMergeICmpsLegacyPass();
//===----------------------------------------------------------------------===//
//
@@ -374,9 +383,10 @@ Pass *createCorrelatedValuePropagationPass();
//
// InferAddressSpaces - Modify users of addrspacecast instructions with values
// in the source address space if using the destination address space is slower
-// on the target.
+// on the target. If AddressSpace is left to its default value, it will be
+// obtained from the TargetTransformInfo.
//
-FunctionPass *createInferAddressSpacesPass();
+FunctionPass *createInferAddressSpacesPass(unsigned AddressSpace = ~0u);
extern char &InferAddressSpacesID;
//===----------------------------------------------------------------------===//
@@ -453,6 +463,12 @@ FunctionPass *createLoopDistributePass();
//===----------------------------------------------------------------------===//
//
+// LoopFuse - Fuse loops.
+//
+FunctionPass *createLoopFusePass();
+
+//===----------------------------------------------------------------------===//
+//
// LoopLoadElimination - Perform loop-aware load elimination.
//
FunctionPass *createLoopLoadEliminationPass();
diff --git a/include/llvm/Transforms/Scalar/ADCE.h b/include/llvm/Transforms/Scalar/ADCE.h
index f98af62c1a76..7d8b7ae68c00 100644
--- a/include/llvm/Transforms/Scalar/ADCE.h
+++ b/include/llvm/Transforms/Scalar/ADCE.h
@@ -1,9 +1,8 @@
//===- ADCE.h - Aggressive dead code elimination ----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/Scalar/AlignmentFromAssumptions.h b/include/llvm/Transforms/Scalar/AlignmentFromAssumptions.h
index 61975036e9ff..fb1687e1ac5d 100644
--- a/include/llvm/Transforms/Scalar/AlignmentFromAssumptions.h
+++ b/include/llvm/Transforms/Scalar/AlignmentFromAssumptions.h
@@ -1,9 +1,8 @@
//===---- AlignmentFromAssumptions.h ----------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/Scalar/BDCE.h b/include/llvm/Transforms/Scalar/BDCE.h
index d7d2730a8033..996622bccdba 100644
--- a/include/llvm/Transforms/Scalar/BDCE.h
+++ b/include/llvm/Transforms/Scalar/BDCE.h
@@ -1,9 +1,8 @@
//===---- BDCE.cpp - Bit-tracking dead code elimination ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/Scalar/CallSiteSplitting.h b/include/llvm/Transforms/Scalar/CallSiteSplitting.h
index b2ca2a1c09ae..b6055639e8a8 100644
--- a/include/llvm/Transforms/Scalar/CallSiteSplitting.h
+++ b/include/llvm/Transforms/Scalar/CallSiteSplitting.h
@@ -1,9 +1,8 @@
//===- CallSiteSplitting..h - Callsite Splitting ------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/Transforms/Scalar/ConstantHoisting.h b/include/llvm/Transforms/Scalar/ConstantHoisting.h
index ba32e122fa10..6b0fc9c1dd07 100644
--- a/include/llvm/Transforms/Scalar/ConstantHoisting.h
+++ b/include/llvm/Transforms/Scalar/ConstantHoisting.h
@@ -1,9 +1,8 @@
//==- ConstantHoisting.h - Prepare code for expensive constants --*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -56,6 +55,7 @@ class DominatorTree;
class Function;
class GlobalVariable;
class Instruction;
+class ProfileSummaryInfo;
class TargetTransformInfo;
/// A private "module" namespace for types and utilities used by
@@ -125,9 +125,10 @@ public:
// Glue for old PM.
bool runImpl(Function &F, TargetTransformInfo &TTI, DominatorTree &DT,
- BlockFrequencyInfo *BFI, BasicBlock &Entry);
+ BlockFrequencyInfo *BFI, BasicBlock &Entry,
+ ProfileSummaryInfo *PSI);
- void releaseMemory() {
+ void cleanup() {
ClonedCastMap.clear();
ConstIntCandVec.clear();
for (auto MapEntry : ConstGEPCandMap)
@@ -149,6 +150,7 @@ private:
LLVMContext *Ctx;
const DataLayout *DL;
BasicBlock *Entry;
+ ProfileSummaryInfo *PSI;
/// Keeps track of constant candidates found in the function.
using ConstCandVecType = std::vector<consthoist::ConstantCandidate>;
diff --git a/include/llvm/Transforms/Scalar/CorrelatedValuePropagation.h b/include/llvm/Transforms/Scalar/CorrelatedValuePropagation.h
index 20930699b557..25795de5d951 100644
--- a/include/llvm/Transforms/Scalar/CorrelatedValuePropagation.h
+++ b/include/llvm/Transforms/Scalar/CorrelatedValuePropagation.h
@@ -1,9 +1,8 @@
//===- CorrelatedValuePropagation.h -----------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/Transforms/Scalar/DCE.h b/include/llvm/Transforms/Scalar/DCE.h
index 273346cf81d9..974e4b20d152 100644
--- a/include/llvm/Transforms/Scalar/DCE.h
+++ b/include/llvm/Transforms/Scalar/DCE.h
@@ -1,9 +1,8 @@
//===- DCE.h - Dead code elimination ----------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/Scalar/DeadStoreElimination.h b/include/llvm/Transforms/Scalar/DeadStoreElimination.h
index cfeb21814232..b66b0de90c79 100644
--- a/include/llvm/Transforms/Scalar/DeadStoreElimination.h
+++ b/include/llvm/Transforms/Scalar/DeadStoreElimination.h
@@ -1,9 +1,8 @@
//===- DeadStoreElimination.h - Fast Dead Store Elimination -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/Scalar/DivRemPairs.h b/include/llvm/Transforms/Scalar/DivRemPairs.h
index 0a4346f33b12..7401e02cb4ab 100644
--- a/include/llvm/Transforms/Scalar/DivRemPairs.h
+++ b/include/llvm/Transforms/Scalar/DivRemPairs.h
@@ -1,9 +1,8 @@
//===- DivRemPairs.h - Hoist/decompose integer division and remainder -----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/Scalar/EarlyCSE.h b/include/llvm/Transforms/Scalar/EarlyCSE.h
index faf03a4ec489..1e7fd71dcbf4 100644
--- a/include/llvm/Transforms/Scalar/EarlyCSE.h
+++ b/include/llvm/Transforms/Scalar/EarlyCSE.h
@@ -1,9 +1,8 @@
//===- EarlyCSE.h - Simple and fast CSE pass --------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/Scalar/Float2Int.h b/include/llvm/Transforms/Scalar/Float2Int.h
index 206ee980109b..06aeb8322527 100644
--- a/include/llvm/Transforms/Scalar/Float2Int.h
+++ b/include/llvm/Transforms/Scalar/Float2Int.h
@@ -1,9 +1,8 @@
//===-- Float2Int.h - Demote floating point ops to work on integers -------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/Scalar/GVN.h b/include/llvm/Transforms/Scalar/GVN.h
index 9827678b89f2..9fe00a9e7f2d 100644
--- a/include/llvm/Transforms/Scalar/GVN.h
+++ b/include/llvm/Transforms/Scalar/GVN.h
@@ -1,9 +1,8 @@
//===- GVN.h - Eliminate redundant values and loads -------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/include/llvm/Transforms/Scalar/GVNExpression.h b/include/llvm/Transforms/Scalar/GVNExpression.h
index 8b346969b1e9..3dc4515f85a1 100644
--- a/include/llvm/Transforms/Scalar/GVNExpression.h
+++ b/include/llvm/Transforms/Scalar/GVNExpression.h
@@ -1,9 +1,8 @@
//===- GVNExpression.h - GVN Expression classes -----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/Scalar/GuardWidening.h b/include/llvm/Transforms/Scalar/GuardWidening.h
index 2bc0940ac715..06dc9ac97bec 100644
--- a/include/llvm/Transforms/Scalar/GuardWidening.h
+++ b/include/llvm/Transforms/Scalar/GuardWidening.h
@@ -1,9 +1,8 @@
//===- GuardWidening.h - ----------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -17,7 +16,9 @@
#ifndef LLVM_TRANSFORMS_SCALAR_GUARD_WIDENING_H
#define LLVM_TRANSFORMS_SCALAR_GUARD_WIDENING_H
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/Transforms/Scalar/LoopPassManager.h"
namespace llvm {
@@ -25,6 +26,8 @@ class Function;
struct GuardWideningPass : public PassInfoMixin<GuardWideningPass> {
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+ PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
+ LoopStandardAnalysisResults &AR, LPMUpdater &U);
};
}
diff --git a/include/llvm/Transforms/Scalar/IVUsersPrinter.h b/include/llvm/Transforms/Scalar/IVUsersPrinter.h
index fad00d86a95f..a1f20d9ca983 100644
--- a/include/llvm/Transforms/Scalar/IVUsersPrinter.h
+++ b/include/llvm/Transforms/Scalar/IVUsersPrinter.h
@@ -1,9 +1,8 @@
//===- IVUsersPrinter.h - Induction Variable Users Printing -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/Transforms/Scalar/IndVarSimplify.h b/include/llvm/Transforms/Scalar/IndVarSimplify.h
index e321c8fc6e9c..3c20537ab76a 100644
--- a/include/llvm/Transforms/Scalar/IndVarSimplify.h
+++ b/include/llvm/Transforms/Scalar/IndVarSimplify.h
@@ -1,9 +1,8 @@
//===- IndVarSimplify.h - Induction Variable Simplification -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/Scalar/InductiveRangeCheckElimination.h b/include/llvm/Transforms/Scalar/InductiveRangeCheckElimination.h
index 311c549b8326..b1e700714e51 100644
--- a/include/llvm/Transforms/Scalar/InductiveRangeCheckElimination.h
+++ b/include/llvm/Transforms/Scalar/InductiveRangeCheckElimination.h
@@ -1,9 +1,8 @@
//===- InductiveRangeCheckElimination.h - IRCE ------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/Scalar/InstSimplifyPass.h b/include/llvm/Transforms/Scalar/InstSimplifyPass.h
index da79a13eb7cf..0c30b6260536 100644
--- a/include/llvm/Transforms/Scalar/InstSimplifyPass.h
+++ b/include/llvm/Transforms/Scalar/InstSimplifyPass.h
@@ -1,9 +1,8 @@
//===- InstSimplifyPass.h ---------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/include/llvm/Transforms/Scalar/JumpThreading.h b/include/llvm/Transforms/Scalar/JumpThreading.h
index 9894345645a1..0464d40c45e6 100644
--- a/include/llvm/Transforms/Scalar/JumpThreading.h
+++ b/include/llvm/Transforms/Scalar/JumpThreading.h
@@ -1,9 +1,8 @@
//===- JumpThreading.h - thread control through conditional BBs -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -23,7 +22,7 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
-#include "llvm/IR/DomTreeUpdater.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/IR/ValueHandle.h"
#include <memory>
#include <utility>
diff --git a/include/llvm/Transforms/Scalar/LICM.h b/include/llvm/Transforms/Scalar/LICM.h
index 68ad190c7647..f0ea928abd49 100644
--- a/include/llvm/Transforms/Scalar/LICM.h
+++ b/include/llvm/Transforms/Scalar/LICM.h
@@ -1,9 +1,8 @@
//===- LICM.h - Loop Invariant Code Motion Pass -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -39,9 +38,21 @@
namespace llvm {
+extern cl::opt<unsigned> SetLicmMssaOptCap;
+extern cl::opt<unsigned> SetLicmMssaNoAccForPromotionCap;
+
/// Performs Loop Invariant Code Motion Pass.
class LICMPass : public PassInfoMixin<LICMPass> {
+ unsigned LicmMssaOptCap;
+ unsigned LicmMssaNoAccForPromotionCap;
+
public:
+ LICMPass()
+ : LicmMssaOptCap(SetLicmMssaOptCap),
+ LicmMssaNoAccForPromotionCap(SetLicmMssaNoAccForPromotionCap) {}
+ LICMPass(unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap)
+ : LicmMssaOptCap(LicmMssaOptCap),
+ LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap) {}
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR, LPMUpdater &U);
};
diff --git a/include/llvm/Transforms/Scalar/LoopAccessAnalysisPrinter.h b/include/llvm/Transforms/Scalar/LoopAccessAnalysisPrinter.h
index e1b33799578b..3f250fc1ce8c 100644
--- a/include/llvm/Transforms/Scalar/LoopAccessAnalysisPrinter.h
+++ b/include/llvm/Transforms/Scalar/LoopAccessAnalysisPrinter.h
@@ -1,9 +1,8 @@
//===- llvm/Analysis/LoopAccessAnalysisPrinter.h ----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/Transforms/Scalar/LoopDataPrefetch.h b/include/llvm/Transforms/Scalar/LoopDataPrefetch.h
index e1ad67ac6fff..9ebd5984cea9 100644
--- a/include/llvm/Transforms/Scalar/LoopDataPrefetch.h
+++ b/include/llvm/Transforms/Scalar/LoopDataPrefetch.h
@@ -1,10 +1,9 @@
//===-------- LoopDataPrefetch.h - Loop Data Prefetching Pass ---*- C++ -*-===//
//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/include/llvm/Transforms/Scalar/LoopDeletion.h b/include/llvm/Transforms/Scalar/LoopDeletion.h
index 7b8cb1e115c9..557616e2e6ba 100644
--- a/include/llvm/Transforms/Scalar/LoopDeletion.h
+++ b/include/llvm/Transforms/Scalar/LoopDeletion.h
@@ -1,9 +1,8 @@
//===- LoopDeletion.h - Loop Deletion ---------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/Scalar/LoopDistribute.h b/include/llvm/Transforms/Scalar/LoopDistribute.h
index 2bf1c9d696d5..1a82176490c5 100644
--- a/include/llvm/Transforms/Scalar/LoopDistribute.h
+++ b/include/llvm/Transforms/Scalar/LoopDistribute.h
@@ -1,9 +1,8 @@
//===- LoopDistribute.cpp - Loop Distribution Pass --------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/Scalar/LoopFuse.h b/include/llvm/Transforms/Scalar/LoopFuse.h
new file mode 100644
index 000000000000..d3a02db6bd28
--- /dev/null
+++ b/include/llvm/Transforms/Scalar/LoopFuse.h
@@ -0,0 +1,30 @@
+//===- LoopFuse.h - Loop Fusion Pass ----------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file implements the Loop Fusion pass.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_SCALAR_LOOPFUSE_H
+#define LLVM_TRANSFORMS_SCALAR_LOOPFUSE_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class Function;
+
+class LoopFusePass : public PassInfoMixin<LoopFusePass> {
+public:
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_SCALAR_LOOPFUSE_H
diff --git a/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h b/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h
index 7added8d2c61..d2fff8bb5743 100644
--- a/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h
+++ b/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h
@@ -1,9 +1,8 @@
//===- LoopIdiomRecognize.h - Loop Idiom Recognize Pass ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/Scalar/LoopInstSimplify.h b/include/llvm/Transforms/Scalar/LoopInstSimplify.h
index 04dc79c3fa57..f6e86d11ed95 100644
--- a/include/llvm/Transforms/Scalar/LoopInstSimplify.h
+++ b/include/llvm/Transforms/Scalar/LoopInstSimplify.h
@@ -1,9 +1,8 @@
//===- LoopInstSimplify.h - Loop Inst Simplify Pass -------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/Scalar/LoopLoadElimination.h b/include/llvm/Transforms/Scalar/LoopLoadElimination.h
index b0514a4a7c98..65b9aabb8f51 100644
--- a/include/llvm/Transforms/Scalar/LoopLoadElimination.h
+++ b/include/llvm/Transforms/Scalar/LoopLoadElimination.h
@@ -1,9 +1,8 @@
//===- LoopLoadElimination.h ------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/Scalar/LoopPassManager.h b/include/llvm/Transforms/Scalar/LoopPassManager.h
index 46ebb74c413c..61ec58585fd0 100644
--- a/include/llvm/Transforms/Scalar/LoopPassManager.h
+++ b/include/llvm/Transforms/Scalar/LoopPassManager.h
@@ -1,9 +1,8 @@
//===- LoopPassManager.h - Loop pass management -----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/include/llvm/Transforms/Scalar/LoopPredication.h b/include/llvm/Transforms/Scalar/LoopPredication.h
index 57398bdb6bd1..252daafab7a3 100644
--- a/include/llvm/Transforms/Scalar/LoopPredication.h
+++ b/include/llvm/Transforms/Scalar/LoopPredication.h
@@ -1,9 +1,8 @@
//===- LoopPredication.h - Guard based loop predication pass ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/Scalar/LoopRotation.h b/include/llvm/Transforms/Scalar/LoopRotation.h
index ea8d5618e6f7..254e6072906a 100644
--- a/include/llvm/Transforms/Scalar/LoopRotation.h
+++ b/include/llvm/Transforms/Scalar/LoopRotation.h
@@ -1,9 +1,8 @@
//===- LoopRotation.h - Loop Rotation -------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/Scalar/LoopSimplifyCFG.h b/include/llvm/Transforms/Scalar/LoopSimplifyCFG.h
index 7628c7413eac..2d718592aef5 100644
--- a/include/llvm/Transforms/Scalar/LoopSimplifyCFG.h
+++ b/include/llvm/Transforms/Scalar/LoopSimplifyCFG.h
@@ -1,9 +1,8 @@
//===- LoopSimplifyCFG.cpp - Loop CFG Simplification Pass -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/Scalar/LoopSink.h b/include/llvm/Transforms/Scalar/LoopSink.h
index 371a7c8d2c44..234c48cbebc5 100644
--- a/include/llvm/Transforms/Scalar/LoopSink.h
+++ b/include/llvm/Transforms/Scalar/LoopSink.h
@@ -1,9 +1,8 @@
//===- LoopSink.h - Loop Sink Pass ------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/Scalar/LoopStrengthReduce.h b/include/llvm/Transforms/Scalar/LoopStrengthReduce.h
index 62c038a3857d..5cf805bc4939 100644
--- a/include/llvm/Transforms/Scalar/LoopStrengthReduce.h
+++ b/include/llvm/Transforms/Scalar/LoopStrengthReduce.h
@@ -1,9 +1,8 @@
//===- LoopStrengthReduce.h - Loop Strength Reduce Pass ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/Scalar/LoopUnrollAndJamPass.h b/include/llvm/Transforms/Scalar/LoopUnrollAndJamPass.h
index fc69aa361059..7920269b0fb2 100644
--- a/include/llvm/Transforms/Scalar/LoopUnrollAndJamPass.h
+++ b/include/llvm/Transforms/Scalar/LoopUnrollAndJamPass.h
@@ -1,9 +1,8 @@
//===- LoopUnrollAndJamPass.h -----------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/Transforms/Scalar/LoopUnrollPass.h b/include/llvm/Transforms/Scalar/LoopUnrollPass.h
index e38e983cc9eb..a84d889a83ad 100644
--- a/include/llvm/Transforms/Scalar/LoopUnrollPass.h
+++ b/include/llvm/Transforms/Scalar/LoopUnrollPass.h
@@ -1,9 +1,8 @@
//===- LoopUnrollPass.h -----------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -16,6 +15,8 @@
namespace llvm {
+extern cl::opt<bool> ForgetSCEVInLoopUnroll;
+
class Function;
class Loop;
class LPMUpdater;
@@ -29,9 +30,16 @@ class LoopFullUnrollPass : public PassInfoMixin<LoopFullUnrollPass> {
/// metadata are considered. All other loops are skipped.
const bool OnlyWhenForced;
+ /// If true, forget all loops when unrolling. If false, forget top-most loop
+ /// of the currently processed loops, which removes one entry at a time from
+ /// the internal SCEV records. For large loops, the former is faster.
+ const bool ForgetSCEV;
+
public:
- explicit LoopFullUnrollPass(int OptLevel = 2, bool OnlyWhenForced = false)
- : OptLevel(OptLevel), OnlyWhenForced(OnlyWhenForced) {}
+ explicit LoopFullUnrollPass(int OptLevel = 2, bool OnlyWhenForced = false,
+ bool ForgetSCEV = false)
+ : OptLevel(OptLevel), OnlyWhenForced(OnlyWhenForced),
+ ForgetSCEV(ForgetSCEV) {}
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR, LPMUpdater &U);
@@ -61,8 +69,15 @@ struct LoopUnrollOptions {
/// metadata are considered. All other loops are skipped.
bool OnlyWhenForced;
- LoopUnrollOptions(int OptLevel = 2, bool OnlyWhenForced = false)
- : OptLevel(OptLevel), OnlyWhenForced(OnlyWhenForced) {}
+ /// If true, forget all loops when unrolling. If false, forget top-most loop
+ /// of the currently processed loops, which removes one entry at a time from
+ /// the internal SCEV records. For large loops, the former is faster.
+ const bool ForgetSCEV;
+
+ LoopUnrollOptions(int OptLevel = 2, bool OnlyWhenForced = false,
+ bool ForgetSCEV = false)
+ : OptLevel(OptLevel), OnlyWhenForced(OnlyWhenForced),
+ ForgetSCEV(ForgetSCEV) {}
/// Enables or disables partial unrolling. When disabled only full unrolling
/// is allowed.
diff --git a/include/llvm/Transforms/Scalar/LowerAtomic.h b/include/llvm/Transforms/Scalar/LowerAtomic.h
index a4a2e7aafe44..40f8ca571f19 100644
--- a/include/llvm/Transforms/Scalar/LowerAtomic.h
+++ b/include/llvm/Transforms/Scalar/LowerAtomic.h
@@ -1,9 +1,8 @@
//===- LowerAtomic.cpp - Lower atomic intrinsics ----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/include/llvm/Transforms/Scalar/LowerExpectIntrinsic.h b/include/llvm/Transforms/Scalar/LowerExpectIntrinsic.h
index b6ee6523697c..4e47ff70d557 100644
--- a/include/llvm/Transforms/Scalar/LowerExpectIntrinsic.h
+++ b/include/llvm/Transforms/Scalar/LowerExpectIntrinsic.h
@@ -1,9 +1,8 @@
//===- LowerExpectIntrinsic.h - LowerExpectIntrinsic pass -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/include/llvm/Transforms/Scalar/LowerGuardIntrinsic.h b/include/llvm/Transforms/Scalar/LowerGuardIntrinsic.h
index a9f19f6b84b4..ce97b9e4c386 100644
--- a/include/llvm/Transforms/Scalar/LowerGuardIntrinsic.h
+++ b/include/llvm/Transforms/Scalar/LowerGuardIntrinsic.h
@@ -1,9 +1,8 @@
//===--- LowerGuardIntrinsic.h - Lower the guard intrinsic ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/Scalar/LowerWidenableCondition.h b/include/llvm/Transforms/Scalar/LowerWidenableCondition.h
new file mode 100644
index 000000000000..7c1e64b8f3a9
--- /dev/null
+++ b/include/llvm/Transforms/Scalar/LowerWidenableCondition.h
@@ -0,0 +1,26 @@
+//===--- LowerWidenableCondition.h - Lower the guard intrinsic ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass lowers the llvm.widenable.condition intrinsic to default value
+// which is i1 true.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_TRANSFORMS_SCALAR_LOWERWIDENABLECONDITION_H
+#define LLVM_TRANSFORMS_SCALAR_LOWERWIDENABLECONDITION_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+struct LowerWidenableConditionPass : PassInfoMixin<LowerWidenableConditionPass> {
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
+}
+
+#endif //LLVM_TRANSFORMS_SCALAR_LOWERWIDENABLECONDITION_H
diff --git a/include/llvm/Transforms/Scalar/MakeGuardsExplicit.h b/include/llvm/Transforms/Scalar/MakeGuardsExplicit.h
index 41b4aada2baa..525174734303 100644
--- a/include/llvm/Transforms/Scalar/MakeGuardsExplicit.h
+++ b/include/llvm/Transforms/Scalar/MakeGuardsExplicit.h
@@ -1,9 +1,8 @@
//===-- MakeGuardsExplicit.h - Turn guard intrinsics into guard branches --===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/Scalar/MemCpyOptimizer.h b/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
index 046c808bd051..5386f58b2b82 100644
--- a/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
+++ b/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
@@ -1,9 +1,8 @@
//===- MemCpyOptimizer.h - memcpy optimization ------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/Scalar/MergeICmps.h b/include/llvm/Transforms/Scalar/MergeICmps.h
new file mode 100644
index 000000000000..63bdbf8f4d09
--- /dev/null
+++ b/include/llvm/Transforms/Scalar/MergeICmps.h
@@ -0,0 +1,25 @@
+//===- MergeICmps.h -----------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_SCALAR_MERGEICMPS_H
+#define LLVM_TRANSFORMS_SCALAR_MERGEICMPS_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class Function;
+
+struct MergeICmpsPass
+ : PassInfoMixin<MergeICmpsPass> {
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_SCALAR_MERGEICMPS_H
diff --git a/include/llvm/Transforms/Scalar/MergedLoadStoreMotion.h b/include/llvm/Transforms/Scalar/MergedLoadStoreMotion.h
index 48df09cdec9e..9071a56532f8 100644
--- a/include/llvm/Transforms/Scalar/MergedLoadStoreMotion.h
+++ b/include/llvm/Transforms/Scalar/MergedLoadStoreMotion.h
@@ -1,9 +1,8 @@
//===- MergedLoadStoreMotion.h - merge and hoist/sink load/stores ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/Scalar/NaryReassociate.h b/include/llvm/Transforms/Scalar/NaryReassociate.h
index e835bd5f0761..26f5fe185dd5 100644
--- a/include/llvm/Transforms/Scalar/NaryReassociate.h
+++ b/include/llvm/Transforms/Scalar/NaryReassociate.h
@@ -1,9 +1,8 @@
//===- NaryReassociate.h - Reassociate n-ary expressions --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/Scalar/NewGVN.h b/include/llvm/Transforms/Scalar/NewGVN.h
index 3f7541863a19..1f3680fec79c 100644
--- a/include/llvm/Transforms/Scalar/NewGVN.h
+++ b/include/llvm/Transforms/Scalar/NewGVN.h
@@ -1,9 +1,8 @@
//===- NewGVN.h - Global Value Numbering Pass -------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/Scalar/PartiallyInlineLibCalls.h b/include/llvm/Transforms/Scalar/PartiallyInlineLibCalls.h
index 7f73831e0eb3..fd5a06c5051d 100644
--- a/include/llvm/Transforms/Scalar/PartiallyInlineLibCalls.h
+++ b/include/llvm/Transforms/Scalar/PartiallyInlineLibCalls.h
@@ -1,9 +1,8 @@
//===--- PartiallyInlineLibCalls.h - Partially inline libcalls --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/Scalar/Reassociate.h b/include/llvm/Transforms/Scalar/Reassociate.h
index ba7586dffd9d..2db8d8ce309c 100644
--- a/include/llvm/Transforms/Scalar/Reassociate.h
+++ b/include/llvm/Transforms/Scalar/Reassociate.h
@@ -1,9 +1,8 @@
//===- Reassociate.h - Reassociate binary expressions -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -83,7 +82,14 @@ protected:
static const unsigned GlobalReassociateLimit = 10;
static const unsigned NumBinaryOps =
Instruction::BinaryOpsEnd - Instruction::BinaryOpsBegin;
- DenseMap<std::pair<Value *, Value *>, unsigned> PairMap[NumBinaryOps];
+
+ struct PairMapValue {
+ WeakVH Value1;
+ WeakVH Value2;
+ unsigned Score;
+ bool isValid() const { return Value1 && Value2; }
+ };
+ DenseMap<std::pair<Value *, Value *>, PairMapValue> PairMap[NumBinaryOps];
bool MadeChange;
diff --git a/include/llvm/Transforms/Scalar/RewriteStatepointsForGC.h b/include/llvm/Transforms/Scalar/RewriteStatepointsForGC.h
index 128f176f4420..12773c16dcc2 100644
--- a/include/llvm/Transforms/Scalar/RewriteStatepointsForGC.h
+++ b/include/llvm/Transforms/Scalar/RewriteStatepointsForGC.h
@@ -1,9 +1,8 @@
//===- RewriteStatepointsForGC.h - ------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/Scalar/SCCP.h b/include/llvm/Transforms/Scalar/SCCP.h
index 0abbb32fde6a..0ffd983eb3e0 100644
--- a/include/llvm/Transforms/Scalar/SCCP.h
+++ b/include/llvm/Transforms/Scalar/SCCP.h
@@ -1,9 +1,8 @@
//===- SCCP.cpp - Sparse Conditional Constant Propagation -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/Scalar/SROA.h b/include/llvm/Transforms/Scalar/SROA.h
index b36c6f492be1..864a0cbd9db1 100644
--- a/include/llvm/Transforms/Scalar/SROA.h
+++ b/include/llvm/Transforms/Scalar/SROA.h
@@ -1,9 +1,8 @@
//===- SROA.h - Scalar Replacement Of Aggregates ----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -19,7 +18,6 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/PassManager.h"
-#include "llvm/Support/Compiler.h"
#include <vector>
namespace llvm {
diff --git a/include/llvm/Transforms/Scalar/Scalarizer.h b/include/llvm/Transforms/Scalar/Scalarizer.h
index 1a0b9a2b638c..81363130e2e3 100644
--- a/include/llvm/Transforms/Scalar/Scalarizer.h
+++ b/include/llvm/Transforms/Scalar/Scalarizer.h
@@ -1,9 +1,8 @@
//===- Scalarizer.h --- Scalarize vector operations -----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h b/include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h
index eed50ec96161..33c1faaeee0b 100644
--- a/include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h
+++ b/include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h
@@ -1,9 +1,8 @@
//===- SimpleLoopUnswitch.h - Hoist loop-invariant control flow -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/Transforms/Scalar/SimplifyCFG.h b/include/llvm/Transforms/Scalar/SimplifyCFG.h
index ce0a35fc06bd..f9792d38bbe6 100644
--- a/include/llvm/Transforms/Scalar/SimplifyCFG.h
+++ b/include/llvm/Transforms/Scalar/SimplifyCFG.h
@@ -1,9 +1,8 @@
//===- SimplifyCFG.h - Simplify and canonicalize the CFG --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/include/llvm/Transforms/Scalar/Sink.h b/include/llvm/Transforms/Scalar/Sink.h
index f9b3cb0fae39..6cbe964d1580 100644
--- a/include/llvm/Transforms/Scalar/Sink.h
+++ b/include/llvm/Transforms/Scalar/Sink.h
@@ -1,9 +1,8 @@
//===-- Sink.h - Code Sinking -----------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/Scalar/SpeculateAroundPHIs.h b/include/llvm/Transforms/Scalar/SpeculateAroundPHIs.h
index 4a0bfd754723..3c7dafe71e8e 100644
--- a/include/llvm/Transforms/Scalar/SpeculateAroundPHIs.h
+++ b/include/llvm/Transforms/Scalar/SpeculateAroundPHIs.h
@@ -1,9 +1,8 @@
//===- SpeculateAroundPHIs.h - Speculate around PHIs ------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/Transforms/Scalar/SpeculativeExecution.h b/include/llvm/Transforms/Scalar/SpeculativeExecution.h
index d00e950222a0..14da86483213 100644
--- a/include/llvm/Transforms/Scalar/SpeculativeExecution.h
+++ b/include/llvm/Transforms/Scalar/SpeculativeExecution.h
@@ -1,9 +1,8 @@
//===- SpeculativeExecution.h -----------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/Scalar/TailRecursionElimination.h b/include/llvm/Transforms/Scalar/TailRecursionElimination.h
index 793f9bc152ed..906867644504 100644
--- a/include/llvm/Transforms/Scalar/TailRecursionElimination.h
+++ b/include/llvm/Transforms/Scalar/TailRecursionElimination.h
@@ -1,9 +1,8 @@
//===---- TailRecursionElimination.h ----------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/Scalar/WarnMissedTransforms.h b/include/llvm/Transforms/Scalar/WarnMissedTransforms.h
index 018b22a932e6..2d5942a3f569 100644
--- a/include/llvm/Transforms/Scalar/WarnMissedTransforms.h
+++ b/include/llvm/Transforms/Scalar/WarnMissedTransforms.h
@@ -1,9 +1,8 @@
//===- WarnMissedTransforms.h -----------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/Utils.h b/include/llvm/Transforms/Utils.h
index 378552775c77..6e03453babf1 100644
--- a/include/llvm/Transforms/Utils.h
+++ b/include/llvm/Transforms/Utils.h
@@ -1,9 +1,8 @@
//===- llvm/Transforms/Utils.h - Utility Transformations --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/Utils/ASanStackFrameLayout.h b/include/llvm/Transforms/Utils/ASanStackFrameLayout.h
index eaad06a10819..0b570c0d1342 100644
--- a/include/llvm/Transforms/Utils/ASanStackFrameLayout.h
+++ b/include/llvm/Transforms/Utils/ASanStackFrameLayout.h
@@ -1,9 +1,8 @@
//===- ASanStackFrameLayout.h - ComputeASanStackFrameLayout -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/Utils/AddDiscriminators.h b/include/llvm/Transforms/Utils/AddDiscriminators.h
index 4dad06e6c125..f512c6c06331 100644
--- a/include/llvm/Transforms/Utils/AddDiscriminators.h
+++ b/include/llvm/Transforms/Utils/AddDiscriminators.h
@@ -1,9 +1,8 @@
//===- AddDiscriminators.h --------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/Utils/BasicBlockUtils.h b/include/llvm/Transforms/Utils/BasicBlockUtils.h
index 5b16a2c0d0b1..4d861ffe9a31 100644
--- a/include/llvm/Transforms/Utils/BasicBlockUtils.h
+++ b/include/llvm/Transforms/Utils/BasicBlockUtils.h
@@ -1,9 +1,8 @@
//===- Transform/Utils/BasicBlockUtils.h - BasicBlock Utils -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -18,9 +17,9 @@
// FIXME: Move to this file: BasicBlock::removePredecessor, BB::splitBasicBlock
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
-#include "llvm/IR/DomTreeUpdater.h"
#include "llvm/IR/InstrTypes.h"
#include <cassert>
@@ -36,19 +35,38 @@ class LoopInfo;
class MDNode;
class MemoryDependenceResults;
class MemorySSAUpdater;
+class PostDominatorTree;
class ReturnInst;
class TargetLibraryInfo;
class Value;
+/// Replace contents of every block in \p BBs with single unreachable
+/// instruction. If \p Updates is specified, collect all necessary DT updates
+/// into this vector. If \p KeepOneInputPHIs is true, one-input Phis in
+/// successors of blocks being deleted will be preserved.
+void DetatchDeadBlocks(ArrayRef <BasicBlock *> BBs,
+ SmallVectorImpl<DominatorTree::UpdateType> *Updates,
+ bool KeepOneInputPHIs = false);
+
/// Delete the specified block, which must have no predecessors.
-void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU = nullptr);
+void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU = nullptr,
+ bool KeepOneInputPHIs = false);
/// Delete the specified blocks from \p BB. The set of deleted blocks must have
/// no predecessors that are not being deleted themselves. \p BBs must have no
/// duplicating blocks. If there are loops among this set of blocks, all
/// relevant loop info updates should be done before this function is called.
-void DeleteDeadBlocks(SmallVectorImpl <BasicBlock *> &BBs,
- DomTreeUpdater *DTU = nullptr);
+/// If \p KeepOneInputPHIs is true, one-input Phis in successors of blocks
+/// being deleted will be preserved.
+void DeleteDeadBlocks(ArrayRef <BasicBlock *> BBs,
+ DomTreeUpdater *DTU = nullptr,
+ bool KeepOneInputPHIs = false);
+
+/// Delete all basic blocks from \p F that are not reachable from its entry
+/// node. If \p KeepOneInputPHIs is true, one-input Phis in successors of
+/// blocks being deleted will be preserved.
+bool EliminateUnreachableBlocks(Function &F, DomTreeUpdater *DTU = nullptr,
+ bool KeepOneInputPHIs = false);
/// We know that BB has one predecessor. If there are any single-entry PHI nodes
/// in it, fold them away. This handles the case when all entries to the PHI
@@ -92,24 +110,27 @@ void ReplaceInstWithInst(Instruction *From, Instruction *To);
/// during critical edge splitting.
struct CriticalEdgeSplittingOptions {
DominatorTree *DT;
+ PostDominatorTree *PDT;
LoopInfo *LI;
MemorySSAUpdater *MSSAU;
bool MergeIdenticalEdges = false;
- bool DontDeleteUselessPHIs = false;
+ bool KeepOneInputPHIs = false;
bool PreserveLCSSA = false;
+ bool IgnoreUnreachableDests = false;
CriticalEdgeSplittingOptions(DominatorTree *DT = nullptr,
LoopInfo *LI = nullptr,
- MemorySSAUpdater *MSSAU = nullptr)
- : DT(DT), LI(LI), MSSAU(MSSAU) {}
+ MemorySSAUpdater *MSSAU = nullptr,
+ PostDominatorTree *PDT = nullptr)
+ : DT(DT), PDT(PDT), LI(LI), MSSAU(MSSAU) {}
CriticalEdgeSplittingOptions &setMergeIdenticalEdges() {
MergeIdenticalEdges = true;
return *this;
}
- CriticalEdgeSplittingOptions &setDontDeleteUselessPHIs() {
- DontDeleteUselessPHIs = true;
+ CriticalEdgeSplittingOptions &setKeepOneInputPHIs() {
+ KeepOneInputPHIs = true;
return *this;
}
@@ -117,6 +138,11 @@ struct CriticalEdgeSplittingOptions {
PreserveLCSSA = true;
return *this;
}
+
+ CriticalEdgeSplittingOptions &setIgnoreUnreachableDests() {
+ IgnoreUnreachableDests = true;
+ return *this;
+ }
};
/// If this edge is a critical edge, insert a new node to split the critical
@@ -259,7 +285,8 @@ ReturnInst *FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
/// SplitBefore
/// Tail
///
-/// If Unreachable is true, then ThenBlock ends with
+/// If \p ThenBlock is not specified, a new block will be created for it.
+/// If \p Unreachable is true, the newly created block will end with
/// UnreachableInst, otherwise it branches to Tail.
/// Returns the NewBasicBlock's terminator.
///
@@ -268,7 +295,8 @@ Instruction *SplitBlockAndInsertIfThen(Value *Cond, Instruction *SplitBefore,
bool Unreachable,
MDNode *BranchWeights = nullptr,
DominatorTree *DT = nullptr,
- LoopInfo *LI = nullptr);
+ LoopInfo *LI = nullptr,
+ BasicBlock *ThenBlock = nullptr);
/// SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen,
/// but also creates the ElseBlock.
diff --git a/include/llvm/Transforms/Utils/BreakCriticalEdges.h b/include/llvm/Transforms/Utils/BreakCriticalEdges.h
index 9cc81a176cb6..3644f1ed7a13 100644
--- a/include/llvm/Transforms/Utils/BreakCriticalEdges.h
+++ b/include/llvm/Transforms/Utils/BreakCriticalEdges.h
@@ -1,9 +1,8 @@
//===- BreakCriticalEdges.h - Critical Edge Elimination Pass --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/Utils/BuildLibCalls.h b/include/llvm/Transforms/Utils/BuildLibCalls.h
index 28efce6ac3fb..8421c31a36da 100644
--- a/include/llvm/Transforms/Utils/BuildLibCalls.h
+++ b/include/llvm/Transforms/Utils/BuildLibCalls.h
@@ -1,9 +1,8 @@
//===- BuildLibCalls.h - Utility builder for libcalls -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -71,12 +70,22 @@ namespace llvm {
/// Emit a call to the strcpy function to the builder, for the specified
/// pointer arguments.
Value *emitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B,
- const TargetLibraryInfo *TLI, StringRef Name = "strcpy");
+ const TargetLibraryInfo *TLI);
+
+ /// Emit a call to the stpcpy function to the builder, for the specified
+ /// pointer arguments.
+ Value *emitStpCpy(Value *Dst, Value *Src, IRBuilder<> &B,
+ const TargetLibraryInfo *TLI);
/// Emit a call to the strncpy function to the builder, for the specified
/// pointer arguments and length.
Value *emitStrNCpy(Value *Dst, Value *Src, Value *Len, IRBuilder<> &B,
- const TargetLibraryInfo *TLI, StringRef Name = "strncpy");
+ const TargetLibraryInfo *TLI);
+
+ /// Emit a call to the stpncpy function to the builder, for the specified
+ /// pointer arguments and length.
+ Value *emitStpNCpy(Value *Dst, Value *Src, Value *Len, IRBuilder<> &B,
+ const TargetLibraryInfo *TLI);
/// Emit a call to the __memcpy_chk function to the builder. This expects that
/// the Len and ObjSize have type 'intptr_t' and Dst/Src are pointers.
@@ -93,6 +102,47 @@ namespace llvm {
Value *emitMemCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B,
const DataLayout &DL, const TargetLibraryInfo *TLI);
+ /// Emit a call to the bcmp function.
+ Value *emitBCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B,
+ const DataLayout &DL, const TargetLibraryInfo *TLI);
+
+ /// Emit a call to the memccpy function.
+ Value *emitMemCCpy(Value *Ptr1, Value *Ptr2, Value *Val, Value *Len,
+ IRBuilder<> &B, const TargetLibraryInfo *TLI);
+
+ /// Emit a call to the snprintf function.
+ Value *emitSNPrintf(Value *Dest, Value *Size, Value *Fmt,
+ ArrayRef<Value *> Args, IRBuilder<> &B,
+ const TargetLibraryInfo *TLI);
+
+ /// Emit a call to the sprintf function.
+ Value *emitSPrintf(Value *Dest, Value *Fmt, ArrayRef<Value *> VariadicArgs,
+ IRBuilder<> &B, const TargetLibraryInfo *TLI);
+
+ /// Emit a call to the strcat function.
+ Value *emitStrCat(Value *Dest, Value *Src, IRBuilder<> &B,
+ const TargetLibraryInfo *TLI);
+
+ /// Emit a call to the strlcpy function.
+ Value *emitStrLCpy(Value *Dest, Value *Src, Value *Size, IRBuilder<> &B,
+ const TargetLibraryInfo *TLI);
+
+ /// Emit a call to the strlcat function.
+ Value *emitStrLCat(Value *Dest, Value *Src, Value *Size, IRBuilder<> &B,
+ const TargetLibraryInfo *TLI);
+
+ /// Emit a call to the strncat function.
+ Value *emitStrNCat(Value *Dest, Value *Src, Value *Size, IRBuilder<> &B,
+ const TargetLibraryInfo *TLI);
+
+ /// Emit a call to the vsnprintf function.
+ Value *emitVSNPrintf(Value *Dest, Value *Size, Value *Fmt, Value *VAList,
+ IRBuilder<> &B, const TargetLibraryInfo *TLI);
+
+ /// Emit a call to the vsprintf function.
+ Value *emitVSPrintf(Value *Dest, Value *Fmt, Value *VAList, IRBuilder<> &B,
+ const TargetLibraryInfo *TLI);
+
/// Emit a call to the unary function named 'Name' (e.g. 'floor'). This
/// function is known to take a single of type matching 'Op' and returns one
/// value with the same type. If 'Op' is a long double, 'l' is added as the
diff --git a/include/llvm/Transforms/Utils/BypassSlowDivision.h b/include/llvm/Transforms/Utils/BypassSlowDivision.h
index 6eca5ed2154e..471055921fa8 100644
--- a/include/llvm/Transforms/Utils/BypassSlowDivision.h
+++ b/include/llvm/Transforms/Utils/BypassSlowDivision.h
@@ -1,9 +1,8 @@
//===- llvm/Transforms/Utils/BypassSlowDivision.h ---------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/Utils/CallPromotionUtils.h b/include/llvm/Transforms/Utils/CallPromotionUtils.h
index 6e8ece723638..d9d171c6d8bd 100644
--- a/include/llvm/Transforms/Utils/CallPromotionUtils.h
+++ b/include/llvm/Transforms/Utils/CallPromotionUtils.h
@@ -1,9 +1,8 @@
//===- CallPromotionUtils.h - Utilities for call promotion ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/Utils/CanonicalizeAliases.h b/include/llvm/Transforms/Utils/CanonicalizeAliases.h
index f23263783fec..8f23a041a24e 100644
--- a/include/llvm/Transforms/Utils/CanonicalizeAliases.h
+++ b/include/llvm/Transforms/Utils/CanonicalizeAliases.h
@@ -1,9 +1,8 @@
//===-- CanonicalizeAliases.h - Alias Canonicalization Pass -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/Utils/Cloning.h b/include/llvm/Transforms/Utils/Cloning.h
index f5e997324fc8..872ab9cab85c 100644
--- a/include/llvm/Transforms/Utils/Cloning.h
+++ b/include/llvm/Transforms/Utils/Cloning.h
@@ -1,9 +1,8 @@
//===- Cloning.h - Clone various parts of LLVM programs ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -230,10 +229,7 @@ public:
/// and all varargs at the callsite will be passed to any calls to
/// ForwardVarArgsTo. The caller of InlineFunction has to make sure any varargs
/// are only used by ForwardVarArgsTo.
-InlineResult InlineFunction(CallInst *C, InlineFunctionInfo &IFI,
- AAResults *CalleeAAR = nullptr,
- bool InsertLifetime = true);
-InlineResult InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI,
+InlineResult InlineFunction(CallBase *CB, InlineFunctionInfo &IFI,
AAResults *CalleeAAR = nullptr,
bool InsertLifetime = true);
InlineResult InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
@@ -269,6 +265,13 @@ BasicBlock *DuplicateInstructionsInSplitBetween(BasicBlock *BB,
ValueToValueMapTy &ValueMapping,
DomTreeUpdater &DTU);
+/// Updates profile information by adjusting the entry count by adding
+/// entryDelta then scaling callsite information by the new count divided by the
+/// old count. VMap is used during inlinng to also update the new clone
+void updateProfileCallee(
+ Function *Callee, int64_t entryDelta,
+ const ValueMap<const Value *, WeakTrackingVH> *VMap = nullptr);
+
} // end namespace llvm
#endif // LLVM_TRANSFORMS_UTILS_CLONING_H
diff --git a/include/llvm/Transforms/Utils/CodeExtractor.h b/include/llvm/Transforms/Utils/CodeExtractor.h
index fee79fdc3bff..9d79ee1633f6 100644
--- a/include/llvm/Transforms/Utils/CodeExtractor.h
+++ b/include/llvm/Transforms/Utils/CodeExtractor.h
@@ -1,9 +1,8 @@
//===- Transform/Utils/CodeExtractor.h - Code extraction util ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -27,6 +26,7 @@ class BasicBlock;
class BlockFrequency;
class BlockFrequencyInfo;
class BranchProbabilityInfo;
+class AssumptionCache;
class CallInst;
class DominatorTree;
class Function;
@@ -57,6 +57,7 @@ class Value;
const bool AggregateArgs;
BlockFrequencyInfo *BFI;
BranchProbabilityInfo *BPI;
+ AssumptionCache *AC;
// If true, varargs functions can be extracted.
bool AllowVarArgs;
@@ -85,6 +86,7 @@ class Value;
CodeExtractor(ArrayRef<BasicBlock *> BBs, DominatorTree *DT = nullptr,
bool AggregateArgs = false, BlockFrequencyInfo *BFI = nullptr,
BranchProbabilityInfo *BPI = nullptr,
+ AssumptionCache *AC = nullptr,
bool AllowVarArgs = false, bool AllowAlloca = false,
std::string Suffix = "");
@@ -95,6 +97,7 @@ class Value;
CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs = false,
BlockFrequencyInfo *BFI = nullptr,
BranchProbabilityInfo *BPI = nullptr,
+ AssumptionCache *AC = nullptr,
std::string Suffix = "");
/// Perform the extraction, returning the new function.
@@ -148,6 +151,16 @@ class Value;
BasicBlock *findOrCreateBlockForHoisting(BasicBlock *CommonExitBlock);
private:
+ struct LifetimeMarkerInfo {
+ bool SinkLifeStart = false;
+ bool HoistLifeEnd = false;
+ Instruction *LifeStart = nullptr;
+ Instruction *LifeEnd = nullptr;
+ };
+
+ LifetimeMarkerInfo getLifetimeMarkers(Instruction *Addr,
+ BasicBlock *ExitBlock) const;
+
void severSplitPHINodesOfEntry(BasicBlock *&Header);
void severSplitPHINodesOfExits(const SmallPtrSetImpl<BasicBlock *> &Exits);
void splitReturnBlocks();
diff --git a/include/llvm/Transforms/Utils/CtorUtils.h b/include/llvm/Transforms/Utils/CtorUtils.h
index 63e564dcb87a..3625ee662b1c 100644
--- a/include/llvm/Transforms/Utils/CtorUtils.h
+++ b/include/llvm/Transforms/Utils/CtorUtils.h
@@ -1,9 +1,8 @@
//===- CtorUtils.h - Helpers for working with global_ctors ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/Utils/EntryExitInstrumenter.h b/include/llvm/Transforms/Utils/EntryExitInstrumenter.h
index f50c5c922081..3913693af359 100644
--- a/include/llvm/Transforms/Utils/EntryExitInstrumenter.h
+++ b/include/llvm/Transforms/Utils/EntryExitInstrumenter.h
@@ -1,9 +1,8 @@
//===- EntryExitInstrumenter.h - Function Entry/Exit Instrumentation ------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/Utils/EscapeEnumerator.h b/include/llvm/Transforms/Utils/EscapeEnumerator.h
index 1256dfdaca17..e667796c841b 100644
--- a/include/llvm/Transforms/Utils/EscapeEnumerator.h
+++ b/include/llvm/Transforms/Utils/EscapeEnumerator.h
@@ -1,9 +1,8 @@
//===-- EscapeEnumerator.h --------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/Utils/Evaluator.h b/include/llvm/Transforms/Utils/Evaluator.h
index 9908ae6fd393..bffd65f71b2e 100644
--- a/include/llvm/Transforms/Utils/Evaluator.h
+++ b/include/llvm/Transforms/Utils/Evaluator.h
@@ -1,9 +1,8 @@
//===- Evaluator.h - LLVM IR evaluator --------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/Utils/FunctionComparator.h b/include/llvm/Transforms/Utils/FunctionComparator.h
index 35ba0950343c..4e2571b1d0b6 100644
--- a/include/llvm/Transforms/Utils/FunctionComparator.h
+++ b/include/llvm/Transforms/Utils/FunctionComparator.h
@@ -1,9 +1,8 @@
//===- FunctionComparator.h - Function Comparator ---------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/Utils/FunctionImportUtils.h b/include/llvm/Transforms/Utils/FunctionImportUtils.h
index e24398b90012..9c2a9ea531ea 100644
--- a/include/llvm/Transforms/Utils/FunctionImportUtils.h
+++ b/include/llvm/Transforms/Utils/FunctionImportUtils.h
@@ -1,9 +1,8 @@
//===- FunctionImportUtils.h - Importing support utilities -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -44,6 +43,11 @@ class FunctionImportGlobalProcessing {
/// to promote any non-renamable values.
SmallPtrSet<GlobalValue *, 8> Used;
+ /// Keep track of any COMDATs that require renaming (because COMDAT
+ /// leader was promoted and renamed). Maps from original COMDAT to one
+ /// with new name.
+ DenseMap<const Comdat *, Comdat *> RenamedComdats;
+
/// Check if we should promote the given local value to global scope.
bool shouldPromoteLocalToGlobal(const GlobalValue *SGV);
diff --git a/include/llvm/Transforms/Utils/GlobalStatus.h b/include/llvm/Transforms/Utils/GlobalStatus.h
index 8cc265bdf81d..519593c96766 100644
--- a/include/llvm/Transforms/Utils/GlobalStatus.h
+++ b/include/llvm/Transforms/Utils/GlobalStatus.h
@@ -1,9 +1,8 @@
//===- GlobalStatus.h - Compute status info for globals ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/Transforms/Utils/GuardUtils.h b/include/llvm/Transforms/Utils/GuardUtils.h
index 537045edafe4..3b365c56a5c0 100644
--- a/include/llvm/Transforms/Utils/GuardUtils.h
+++ b/include/llvm/Transforms/Utils/GuardUtils.h
@@ -1,9 +1,8 @@
//===-- GuardUtils.h - Utils for work with guards ---------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// Utils that are used to perform transformations related to guards and their
diff --git a/include/llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h b/include/llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h
index b55a9893bcf7..033ea05b77fa 100644
--- a/include/llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h
+++ b/include/llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h
@@ -1,9 +1,8 @@
-//===-- ImportedFunctionsInliningStats.h ------------------------*- C++ -*-===//
+//===-- ImportedFunctionsInliningStatistics.h -------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// Generating inliner statistics for imported functions, mostly useful for
diff --git a/include/llvm/Transforms/Utils/IntegerDivision.h b/include/llvm/Transforms/Utils/IntegerDivision.h
index 5d9927eb51b2..35cae9aa2269 100644
--- a/include/llvm/Transforms/Utils/IntegerDivision.h
+++ b/include/llvm/Transforms/Utils/IntegerDivision.h
@@ -1,9 +1,8 @@
//===- llvm/Transforms/Utils/IntegerDivision.h ------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/Utils/LCSSA.h b/include/llvm/Transforms/Utils/LCSSA.h
index fe717e5f6635..b01c8022a65b 100644
--- a/include/llvm/Transforms/Utils/LCSSA.h
+++ b/include/llvm/Transforms/Utils/LCSSA.h
@@ -1,9 +1,8 @@
//===- LCSSA.h - Loop-closed SSA transform Pass -----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/Utils/LibCallsShrinkWrap.h b/include/llvm/Transforms/Utils/LibCallsShrinkWrap.h
index c9df532e5794..ff1537ace329 100644
--- a/include/llvm/Transforms/Utils/LibCallsShrinkWrap.h
+++ b/include/llvm/Transforms/Utils/LibCallsShrinkWrap.h
@@ -1,9 +1,8 @@
//===- LibCallsShrinkWrap.h - Shrink Wrap Library Calls -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/Utils/Local.h b/include/llvm/Transforms/Utils/Local.h
index ec8b0eda3641..ff516f230979 100644
--- a/include/llvm/Transforms/Utils/Local.h
+++ b/include/llvm/Transforms/Utils/Local.h
@@ -1,9 +1,8 @@
//===- Local.h - Functions to perform local transformations -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -21,12 +20,11 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/TinyPtrVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/Utils/Local.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DomTreeUpdater.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/Operator.h"
@@ -233,7 +231,8 @@ bool FlattenCFG(BasicBlock *BB, AliasAnalysis *AA = nullptr);
/// If this basic block is ONLY a setcc and a branch, and if a predecessor
/// branches to us and one of our successors, fold the setcc into the
/// predecessor and use logical operations to pick the right destination.
-bool FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold = 1);
+bool FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU = nullptr,
+ unsigned BonusInstThreshold = 1);
/// This function takes a virtual register computed by an Instruction and
/// replaces it with a slot in the stack frame, allocated via alloca.
@@ -317,7 +316,7 @@ void findDbgUsers(SmallVectorImpl<DbgVariableIntrinsic *> &DbgInsts, Value *V);
/// (between the optional Deref operations). Offset can be negative.
bool replaceDbgDeclare(Value *Address, Value *NewAddress,
Instruction *InsertBefore, DIBuilder &Builder,
- bool DerefBefore, int Offset, bool DerefAfter);
+ uint8_t DIExprFlags, int Offset);
/// Replaces llvm.dbg.declare instruction when the alloca it describes
/// is replaced with a new value. If Deref is true, an additional
@@ -326,8 +325,8 @@ bool replaceDbgDeclare(Value *Address, Value *NewAddress,
/// optional Deref operations). Offset can be negative. The new
/// llvm.dbg.declare is inserted immediately after AI.
bool replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
- DIBuilder &Builder, bool DerefBefore,
- int Offset, bool DerefAfter);
+ DIBuilder &Builder, uint8_t DIExprFlags,
+ int Offset);
/// Replaces multiple llvm.dbg.value instructions when the alloca it describes
/// is replaced with a new value. If Offset is non-zero, a constant displacement
@@ -337,11 +336,27 @@ bool replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
void replaceDbgValueForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
DIBuilder &Builder, int Offset = 0);
+/// Finds alloca where the value comes from.
+AllocaInst *findAllocaForValue(Value *V,
+ DenseMap<Value *, AllocaInst *> &AllocaForValue);
+
/// Assuming the instruction \p I is going to be deleted, attempt to salvage
/// debug users of \p I by writing the effect of \p I in a DIExpression.
/// Returns true if any debug users were updated.
bool salvageDebugInfo(Instruction &I);
+/// Implementation of salvageDebugInfo, applying only to instructions in
+/// \p Insns, rather than all debug users of \p I.
+bool salvageDebugInfoForDbgValues(Instruction &I,
+ ArrayRef<DbgVariableIntrinsic *> Insns);
+
+/// Given an instruction \p I and DIExpression \p DIExpr operating on it, write
+/// the effects of \p I into the returned DIExpression, or return nullptr if
+/// it cannot be salvaged. \p StackVal: whether DW_OP_stack_value should be
+/// appended to the expression.
+DIExpression *salvageDebugInfoImpl(Instruction &I, DIExpression *DIExpr,
+ bool StackVal);
+
/// Point debug users of \p From to \p To or salvage them. Use this function
/// only when replacing all uses of \p From with \p To, with a guarantee that
/// \p From is going to be deleted.
@@ -367,7 +382,8 @@ unsigned removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB);
/// instruction, making it and the rest of the code in the block dead.
unsigned changeToUnreachable(Instruction *I, bool UseLLVMTrap,
bool PreserveLCSSA = false,
- DomTreeUpdater *DTU = nullptr);
+ DomTreeUpdater *DTU = nullptr,
+ MemorySSAUpdater *MSSAU = nullptr);
/// Convert the CallInst to InvokeInst with the specified unwind edge basic
/// block. This also splits the basic block where CI is located, because
@@ -426,7 +442,7 @@ unsigned replaceDominatedUsesWith(Value *From, Value *To, DominatorTree &DT,
unsigned replaceDominatedUsesWith(Value *From, Value *To, DominatorTree &DT,
const BasicBlock *BB);
-/// Return true if the CallSite CS calls a gc leaf function.
+/// Return true if this call calls a gc leaf function.
///
/// A leaf function is a function that does not safepoint the thread during its
/// execution. During a call or invoke to such a function, the callers stack
@@ -434,7 +450,7 @@ unsigned replaceDominatedUsesWith(Value *From, Value *To, DominatorTree &DT,
///
/// Most passes can and should ignore this information, and it is only used
/// during lowering by the GC infrastructure.
-bool callsGCLeafFunction(ImmutableCallSite CS, const TargetLibraryInfo &TLI);
+bool callsGCLeafFunction(const CallBase *Call, const TargetLibraryInfo &TLI);
/// Copy a nonnull metadata node to a new load instruction.
///
@@ -456,8 +472,7 @@ void dropDebugUsers(Instruction &I);
/// \p DomBlock, by moving its instructions to the insertion point \p InsertPt.
///
/// The moved instructions receive the insertion point debug location values
-/// (DILocations) and their debug intrinsic instructions (dbg.values) are
-/// removed.
+/// (DILocations) and their debug intrinsic instructions are removed.
void hoistAllInstructionsInto(BasicBlock *DomBlock, Instruction *InsertPt,
BasicBlock *BB);
diff --git a/include/llvm/Transforms/Utils/LoopRotationUtils.h b/include/llvm/Transforms/Utils/LoopRotationUtils.h
index cd5bc4301018..1e80722ed8b8 100644
--- a/include/llvm/Transforms/Utils/LoopRotationUtils.h
+++ b/include/llvm/Transforms/Utils/LoopRotationUtils.h
@@ -1,9 +1,8 @@
//===- LoopRotationUtils.h - Utilities to perform loop rotation -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/Utils/LoopSimplify.h b/include/llvm/Transforms/Utils/LoopSimplify.h
index 166da2738ffd..2c1df7942f63 100644
--- a/include/llvm/Transforms/Utils/LoopSimplify.h
+++ b/include/llvm/Transforms/Utils/LoopSimplify.h
@@ -1,9 +1,8 @@
//===- LoopSimplify.h - Loop Canonicalization Pass --------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -46,6 +45,8 @@
namespace llvm {
+class MemorySSAUpdater;
+
/// This pass is responsible for loop canonicalization.
class LoopSimplifyPass : public PassInfoMixin<LoopSimplifyPass> {
public:
@@ -56,9 +57,11 @@ public:
///
/// This takes a potentially un-simplified loop L (and its children) and turns
/// it into a simplified loop nest with preheaders and single backedges. It will
-/// update \c AliasAnalysis and \c ScalarEvolution analyses if they're non-null.
+/// update \c DominatorTree, \c LoopInfo, \c ScalarEvolution and \c MemorySSA
+/// analyses if they're non-null, and LCSSA if \c PreserveLCSSA is true.
bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE,
- AssumptionCache *AC, bool PreserveLCSSA);
+ AssumptionCache *AC, MemorySSAUpdater *MSSAU,
+ bool PreserveLCSSA);
} // end namespace llvm
diff --git a/include/llvm/Transforms/Utils/LoopUtils.h b/include/llvm/Transforms/Utils/LoopUtils.h
index 8c2527b6ae68..68bdded5cf93 100644
--- a/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/include/llvm/Transforms/Utils/LoopUtils.h
@@ -1,9 +1,8 @@
//===- llvm/Transforms/Utils/LoopUtils.h - Loop utilities -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -41,6 +40,7 @@ class BasicBlock;
class DataLayout;
class Loop;
class LoopInfo;
+class MemoryAccess;
class MemorySSAUpdater;
class OptimizationRemarkEmitter;
class PredicatedScalarEvolution;
@@ -51,7 +51,7 @@ class TargetLibraryInfo;
class TargetTransformInfo;
BasicBlock *InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI,
- bool PreserveLCSSA);
+ MemorySSAUpdater *MSSAU, bool PreserveLCSSA);
/// Ensure that all exit blocks of the loop are dedicated exits.
///
@@ -59,7 +59,7 @@ BasicBlock *InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI,
/// predecessors to use a dedicated loop exit block. We update the dominator
/// tree and loop info if provided, and will preserve LCSSA if requested.
bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI,
- bool PreserveLCSSA);
+ MemorySSAUpdater *MSSAU, bool PreserveLCSSA);
/// Ensures LCSSA form for every instruction from the Worklist in the scope of
/// innermost containing loop.
@@ -79,7 +79,8 @@ bool formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
///
/// Looks at all instructions in the loop which have uses outside of the
/// current loop. For each, an LCSSA PHI node is inserted and the uses outside
-/// the loop are rewritten to use this node.
+/// the loop are rewritten to use this node. Sub-loops must be in LCSSA form
+/// already.
///
/// LoopInfo and DominatorTree are required and preserved.
///
@@ -100,6 +101,14 @@ bool formLCSSA(Loop &L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution *SE);
bool formLCSSARecursively(Loop &L, DominatorTree &DT, LoopInfo *LI,
ScalarEvolution *SE);
+struct SinkAndHoistLICMFlags {
+ bool NoOfMemAccTooLarge;
+ unsigned LicmMssaOptCounter;
+ unsigned LicmMssaOptCap;
+ unsigned LicmMssaNoAccForPromotionCap;
+ bool IsSink;
+};
+
/// Walk the specified region of the CFG (defined by all blocks
/// dominated by the specified block, and that are in the current loop) in
/// reverse depth first order w.r.t the DominatorTree. This allows us to visit
@@ -111,7 +120,7 @@ bool formLCSSARecursively(Loop &L, DominatorTree &DT, LoopInfo *LI,
bool sinkRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *,
TargetLibraryInfo *, TargetTransformInfo *, Loop *,
AliasSetTracker *, MemorySSAUpdater *, ICFLoopSafetyInfo *,
- OptimizationRemarkEmitter *ORE);
+ SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *);
/// Walk the specified region of the CFG (defined by all blocks
/// dominated by the specified block, and that are in the current loop) in depth
@@ -124,7 +133,7 @@ bool sinkRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *,
bool hoistRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *,
TargetLibraryInfo *, Loop *, AliasSetTracker *,
MemorySSAUpdater *, ICFLoopSafetyInfo *,
- OptimizationRemarkEmitter *ORE);
+ SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *);
/// This function deletes dead loops. The caller of this function needs to
/// guarantee that the loop is infact dead.
@@ -148,14 +157,12 @@ void deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE,
/// LoopInfo, DominatorTree, Loop, AliasSet information for all instructions
/// of the loop and loop safety information as arguments.
/// Diagnostics is emitted via \p ORE. It returns changed status.
-bool promoteLoopAccessesToScalars(const SmallSetVector<Value *, 8> &,
- SmallVectorImpl<BasicBlock *> &,
- SmallVectorImpl<Instruction *> &,
- PredIteratorCache &, LoopInfo *,
- DominatorTree *, const TargetLibraryInfo *,
- Loop *, AliasSetTracker *,
- ICFLoopSafetyInfo *,
- OptimizationRemarkEmitter *);
+bool promoteLoopAccessesToScalars(
+ const SmallSetVector<Value *, 8> &, SmallVectorImpl<BasicBlock *> &,
+ SmallVectorImpl<Instruction *> &, SmallVectorImpl<MemoryAccess *> &,
+ PredIteratorCache &, LoopInfo *, DominatorTree *, const TargetLibraryInfo *,
+ Loop *, AliasSetTracker *, MemorySSAUpdater *, ICFLoopSafetyInfo *,
+ OptimizationRemarkEmitter *);
/// Does a BFS from a given node to all of its children inside a given loop.
/// The returned vector of nodes includes the starting point.
@@ -277,6 +284,7 @@ void getLoopAnalysisUsage(AnalysisUsage &AU);
bool canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
Loop *CurLoop, AliasSetTracker *CurAST,
MemorySSAUpdater *MSSAU, bool TargetExecutesOncePerLoop,
+ SinkAndHoistLICMFlags *LICMFlags = nullptr,
OptimizationRemarkEmitter *ORE = nullptr);
/// Returns a Min/Max operation corresponding to MinMaxRecurrenceKind.
@@ -292,6 +300,7 @@ getOrderedReduction(IRBuilder<> &Builder, Value *Acc, Value *Src, unsigned Op,
ArrayRef<Value *> RedOps = None);
/// Generates a vector reduction using shufflevectors to reduce the value.
+/// Fast-math-flags are propagated using the IRBuilder's setting.
Value *getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op,
RecurrenceDescriptor::MinMaxRecurrenceKind
MinMaxKind = RecurrenceDescriptor::MRK_Invalid,
@@ -302,6 +311,7 @@ Value *getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op,
/// additional information supplied in \p Flags.
/// The target is queried to determine if intrinsics or shuffle sequences are
/// required to implement the reduction.
+/// Fast-math-flags are propagated using the IRBuilder's setting.
Value *createSimpleTargetReduction(IRBuilder<> &B,
const TargetTransformInfo *TTI,
unsigned Opcode, Value *Src,
@@ -312,6 +322,7 @@ Value *createSimpleTargetReduction(IRBuilder<> &B,
/// Create a generic target reduction using a recurrence descriptor \p Desc
/// The target is queried to determine if intrinsics or shuffle sequences are
/// required to implement the reduction.
+/// Fast-math-flags are propagated using the RecurrenceDescriptor.
Value *createTargetReduction(IRBuilder<> &B, const TargetTransformInfo *TTI,
RecurrenceDescriptor &Desc, Value *Src,
bool NoNaN = false);
diff --git a/include/llvm/Transforms/Utils/LoopVersioning.h b/include/llvm/Transforms/Utils/LoopVersioning.h
index fcd734b37a1f..355c4d7dc6d8 100644
--- a/include/llvm/Transforms/Utils/LoopVersioning.h
+++ b/include/llvm/Transforms/Utils/LoopVersioning.h
@@ -1,9 +1,8 @@
//===- LoopVersioning.h - Utility to version a loop -------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/Utils/LowerInvoke.h b/include/llvm/Transforms/Utils/LowerInvoke.h
index 12774c7fd1f7..c1198b08d3de 100644
--- a/include/llvm/Transforms/Utils/LowerInvoke.h
+++ b/include/llvm/Transforms/Utils/LowerInvoke.h
@@ -1,9 +1,8 @@
//===- LowerInvoke.h - Eliminate Invoke instructions ----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/Utils/LowerMemIntrinsics.h b/include/llvm/Transforms/Utils/LowerMemIntrinsics.h
index 2b7d0f67a324..8e9d7b522c78 100644
--- a/include/llvm/Transforms/Utils/LowerMemIntrinsics.h
+++ b/include/llvm/Transforms/Utils/LowerMemIntrinsics.h
@@ -1,9 +1,8 @@
-//===- llvm/Transforms/Utils/LowerMemintrinsics.h ---------------*- C++ -*-===//
+//===- llvm/Transforms/Utils/LowerMemIntrinsics.h ---------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/Utils/Mem2Reg.h b/include/llvm/Transforms/Utils/Mem2Reg.h
index 407684338a3b..76c1c2c5bffe 100644
--- a/include/llvm/Transforms/Utils/Mem2Reg.h
+++ b/include/llvm/Transforms/Utils/Mem2Reg.h
@@ -1,9 +1,8 @@
//===- Mem2Reg.h - The -mem2reg pass, a wrapper around the Utils lib ------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/Utils/ModuleUtils.h b/include/llvm/Transforms/Utils/ModuleUtils.h
index fee492be2a90..c69af5588741 100644
--- a/include/llvm/Transforms/Utils/ModuleUtils.h
+++ b/include/llvm/Transforms/Utils/ModuleUtils.h
@@ -1,9 +1,8 @@
//===-- ModuleUtils.h - Functions to manipulate Modules ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -22,6 +21,7 @@ namespace llvm {
template <typename T> class ArrayRef;
class Module;
class Function;
+class FunctionCallee;
class GlobalValue;
class GlobalVariable;
class Constant;
@@ -40,20 +40,14 @@ void appendToGlobalCtors(Module &M, Function *F, int Priority,
void appendToGlobalDtors(Module &M, Function *F, int Priority,
Constant *Data = nullptr);
-// Validate the result of Module::getOrInsertFunction called for an interface
-// function of given sanitizer. If the instrumented module defines a function
-// with the same name, their prototypes must match, otherwise
-// getOrInsertFunction returns a bitcast.
-Function *checkSanitizerInterfaceFunction(Constant *FuncOrBitcast);
-
-Function *declareSanitizerInitFunction(Module &M, StringRef InitName,
- ArrayRef<Type *> InitArgTypes);
+FunctionCallee declareSanitizerInitFunction(Module &M, StringRef InitName,
+ ArrayRef<Type *> InitArgTypes);
/// Creates sanitizer constructor function, and calls sanitizer's init
/// function from it.
/// \return Returns pair of pointers to constructor, and init functions
/// respectively.
-std::pair<Function *, Function *> createSanitizerCtorAndInitFunctions(
+std::pair<Function *, FunctionCallee> createSanitizerCtorAndInitFunctions(
Module &M, StringRef CtorName, StringRef InitName,
ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs,
StringRef VersionCheckName = StringRef());
@@ -65,10 +59,10 @@ std::pair<Function *, Function *> createSanitizerCtorAndInitFunctions(
///
/// \return Returns pair of pointers to constructor, and init functions
/// respectively.
-std::pair<Function *, Function *> getOrCreateSanitizerCtorAndInitFunctions(
+std::pair<Function *, FunctionCallee> getOrCreateSanitizerCtorAndInitFunctions(
Module &M, StringRef CtorName, StringRef InitName,
ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs,
- function_ref<void(Function *, Function *)> FunctionsCreatedCallback,
+ function_ref<void(Function *, FunctionCallee)> FunctionsCreatedCallback,
StringRef VersionCheckName = StringRef());
// Creates and returns a sanitizer init function without argument if it doesn't
diff --git a/include/llvm/Transforms/Utils/NameAnonGlobals.h b/include/llvm/Transforms/Utils/NameAnonGlobals.h
index 17fc902eebf8..659ebe33ffa6 100644
--- a/include/llvm/Transforms/Utils/NameAnonGlobals.h
+++ b/include/llvm/Transforms/Utils/NameAnonGlobals.h
@@ -1,9 +1,8 @@
//===-- NameAnonGlobals.h - Anonymous Global Naming Pass --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/Utils/PredicateInfo.h b/include/llvm/Transforms/Utils/PredicateInfo.h
index 2fc38089f3f1..da4a5dcc28c0 100644
--- a/include/llvm/Transforms/Utils/PredicateInfo.h
+++ b/include/llvm/Transforms/Utils/PredicateInfo.h
@@ -1,9 +1,8 @@
//===- PredicateInfo.h - Build PredicateInfo ----------------------*-C++-*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/include/llvm/Transforms/Utils/PromoteMemToReg.h b/include/llvm/Transforms/Utils/PromoteMemToReg.h
index 5ddfbe2bf058..b2b4507bbc74 100644
--- a/include/llvm/Transforms/Utils/PromoteMemToReg.h
+++ b/include/llvm/Transforms/Utils/PromoteMemToReg.h
@@ -1,9 +1,8 @@
//===- PromoteMemToReg.h - Promote Allocas to Scalars -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/Utils/SSAUpdater.h b/include/llvm/Transforms/Utils/SSAUpdater.h
index d02607acbbb5..22b2295cc9d7 100644
--- a/include/llvm/Transforms/Utils/SSAUpdater.h
+++ b/include/llvm/Transforms/Utils/SSAUpdater.h
@@ -1,9 +1,8 @@
//===- SSAUpdater.h - Unstructured SSA Update Tool --------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -148,7 +147,7 @@ public:
/// Insts is a list of loads and stores to promote, and Name is the basename
/// for the PHIs to insert. After this is complete, the loads and stores are
/// removed from the code.
- void run(const SmallVectorImpl<Instruction *> &Insts) const;
+ void run(const SmallVectorImpl<Instruction *> &Insts);
/// Return true if the specified instruction is in the Inst list.
///
@@ -159,7 +158,7 @@ public:
/// This hook is invoked after all the stores are found and inserted as
/// available values.
- virtual void doExtraRewritesBeforeFinalDeletion() const {}
+ virtual void doExtraRewritesBeforeFinalDeletion() {}
/// Clients can choose to implement this to get notified right before
/// a load is RAUW'd another value.
diff --git a/include/llvm/Transforms/Utils/SSAUpdaterBulk.h b/include/llvm/Transforms/Utils/SSAUpdaterBulk.h
index 53a608f01804..5d17d6f3d285 100644
--- a/include/llvm/Transforms/Utils/SSAUpdaterBulk.h
+++ b/include/llvm/Transforms/Utils/SSAUpdaterBulk.h
@@ -1,9 +1,8 @@
//===- SSAUpdaterBulk.h - Unstructured SSA Update Tool ----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/Utils/SSAUpdaterImpl.h b/include/llvm/Transforms/Utils/SSAUpdaterImpl.h
index cab0f3e71575..ee06893ca660 100644
--- a/include/llvm/Transforms/Utils/SSAUpdaterImpl.h
+++ b/include/llvm/Transforms/Utils/SSAUpdaterImpl.h
@@ -1,9 +1,8 @@
//===- SSAUpdaterImpl.h - SSA Updater Implementation ------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/Utils/SanitizerStats.h b/include/llvm/Transforms/Utils/SanitizerStats.h
index d36e34258a3f..14e8ae045cdd 100644
--- a/include/llvm/Transforms/Utils/SanitizerStats.h
+++ b/include/llvm/Transforms/Utils/SanitizerStats.h
@@ -1,9 +1,8 @@
//===- SanitizerStats.h - Sanitizer statistics gathering -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/Utils/SimplifyIndVar.h b/include/llvm/Transforms/Utils/SimplifyIndVar.h
index a1dfed29a22d..dec73ef057e8 100644
--- a/include/llvm/Transforms/Utils/SimplifyIndVar.h
+++ b/include/llvm/Transforms/Utils/SimplifyIndVar.h
@@ -1,9 +1,8 @@
//===-- llvm/Transforms/Utils/SimplifyIndVar.h - Indvar Utils ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/Utils/SimplifyLibCalls.h b/include/llvm/Transforms/Utils/SimplifyLibCalls.h
index 025bcd44e310..2572094ddac8 100644
--- a/include/llvm/Transforms/Utils/SimplifyLibCalls.h
+++ b/include/llvm/Transforms/Utils/SimplifyLibCalls.h
@@ -1,9 +1,8 @@
//===- SimplifyLibCalls.h - Library call simplifier -------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -29,6 +28,8 @@ class TargetLibraryInfo;
class BasicBlock;
class Function;
class OptimizationRemarkEmitter;
+class BlockFrequencyInfo;
+class ProfileSummaryInfo;
/// This class implements simplifications for calls to fortified library
/// functions (__st*cpy_chk, __memcpy_chk, __memmove_chk, __memset_chk), to,
@@ -56,14 +57,41 @@ private:
Value *optimizeMemMoveChk(CallInst *CI, IRBuilder<> &B);
Value *optimizeMemSetChk(CallInst *CI, IRBuilder<> &B);
- // Str/Stp cpy are similar enough to be handled in the same functions.
+ /// Str/Stp cpy are similar enough to be handled in the same functions.
Value *optimizeStrpCpyChk(CallInst *CI, IRBuilder<> &B, LibFunc Func);
Value *optimizeStrpNCpyChk(CallInst *CI, IRBuilder<> &B, LibFunc Func);
+ Value *optimizeMemCCpyChk(CallInst *CI, IRBuilder<> &B);
+ Value *optimizeSNPrintfChk(CallInst *CI, IRBuilder<> &B);
+ Value *optimizeSPrintfChk(CallInst *CI,IRBuilder<> &B);
+ Value *optimizeStrCatChk(CallInst *CI, IRBuilder<> &B);
+ Value *optimizeStrLCat(CallInst *CI, IRBuilder<> &B);
+ Value *optimizeStrNCatChk(CallInst *CI, IRBuilder<> &B);
+ Value *optimizeStrLCpyChk(CallInst *CI, IRBuilder<> &B);
+ Value *optimizeVSNPrintfChk(CallInst *CI, IRBuilder<> &B);
+ Value *optimizeVSPrintfChk(CallInst *CI, IRBuilder<> &B);
/// Checks whether the call \p CI to a fortified libcall is foldable
/// to the non-fortified version.
+ ///
+ /// \param CI the call to the fortified libcall.
+ ///
+ /// \param ObjSizeOp the index of the object size parameter of this chk
+ /// function. Not optional since this is mandatory.
+ ///
+ /// \param SizeOp optionally set to the parameter index of an explicit buffer
+ /// size argument. For instance, set to '2' for __strncpy_chk.
+ ///
+ /// \param StrOp optionally set to the parameter index of the source string
+ /// parameter to strcpy-like functions, where only the strlen of the source
+ /// will be writtin into the destination.
+ ///
+ /// \param FlagsOp optionally set to the parameter index of a 'flags'
+ /// parameter. These are used by an implementation to opt-into stricter
+ /// checking.
bool isFortifiedCallFoldable(CallInst *CI, unsigned ObjSizeOp,
- unsigned SizeOp, bool isString);
+ Optional<unsigned> SizeOp = None,
+ Optional<unsigned> StrOp = None,
+ Optional<unsigned> FlagsOp = None);
};
/// LibCallSimplifier - This class implements a collection of optimizations
@@ -75,6 +103,8 @@ private:
const DataLayout &DL;
const TargetLibraryInfo *TLI;
OptimizationRemarkEmitter &ORE;
+ BlockFrequencyInfo *BFI;
+ ProfileSummaryInfo *PSI;
bool UnsafeFPShrink;
function_ref<void(Instruction *, Value *)> Replacer;
function_ref<void(Instruction *)> Eraser;
@@ -102,6 +132,7 @@ public:
LibCallSimplifier(
const DataLayout &DL, const TargetLibraryInfo *TLI,
OptimizationRemarkEmitter &ORE,
+ BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
function_ref<void(Instruction *, Value *)> Replacer =
&replaceAllUsesWithDefault,
function_ref<void(Instruction *)> Eraser = &eraseFromParentDefault);
@@ -134,6 +165,8 @@ private:
Value *optimizeStrStr(CallInst *CI, IRBuilder<> &B);
Value *optimizeMemChr(CallInst *CI, IRBuilder<> &B);
Value *optimizeMemCmp(CallInst *CI, IRBuilder<> &B);
+ Value *optimizeBCmp(CallInst *CI, IRBuilder<> &B);
+ Value *optimizeMemCmpBCmpCommon(CallInst *CI, IRBuilder<> &B);
Value *optimizeMemCpy(CallInst *CI, IRBuilder<> &B);
Value *optimizeMemMove(CallInst *CI, IRBuilder<> &B);
Value *optimizeMemSet(CallInst *CI, IRBuilder<> &B);
diff --git a/include/llvm/Transforms/Utils/SizeOpts.h b/include/llvm/Transforms/Utils/SizeOpts.h
new file mode 100644
index 000000000000..1a052c694e6d
--- /dev/null
+++ b/include/llvm/Transforms/Utils/SizeOpts.h
@@ -0,0 +1,34 @@
+//===- llvm/Transforms/Utils/SizeOpts.h - size optimization -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains some shared code size optimization related code.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_UTILS_SIZEOPTS_H
+#define LLVM_TRANSFORMS_UTILS_SIZEOPTS_H
+
+namespace llvm {
+
+class BasicBlock;
+class BlockFrequencyInfo;
+class Function;
+class ProfileSummaryInfo;
+
+/// Returns true if function \p F is suggested to be size-optimized base on the
+/// profile.
+bool shouldOptimizeForSize(Function *F, ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI);
+/// Returns true if basic block \p BB is suggested to be size-optimized base
+/// on the profile.
+bool shouldOptimizeForSize(BasicBlock *BB, ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI);
+
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_UTILS_SIZEOPTS_H
diff --git a/include/llvm/Transforms/Utils/SplitModule.h b/include/llvm/Transforms/Utils/SplitModule.h
index d2c31f2701ac..7839c5d9a589 100644
--- a/include/llvm/Transforms/Utils/SplitModule.h
+++ b/include/llvm/Transforms/Utils/SplitModule.h
@@ -1,9 +1,8 @@
//===- SplitModule.h - Split a module into partitions -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/Utils/SymbolRewriter.h b/include/llvm/Transforms/Utils/SymbolRewriter.h
index 5f6488e08b5a..ce9dcaf2b74f 100644
--- a/include/llvm/Transforms/Utils/SymbolRewriter.h
+++ b/include/llvm/Transforms/Utils/SymbolRewriter.h
@@ -1,9 +1,8 @@
//===- SymbolRewriter.h - Symbol Rewriting Pass -----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h b/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h
index 222c601ad608..f68534ecd2eb 100644
--- a/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h
+++ b/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h
@@ -1,9 +1,8 @@
//===-- UnifyFunctionExitNodes.h - Ensure fn's have one return --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/Utils/UnrollLoop.h b/include/llvm/Transforms/Utils/UnrollLoop.h
index 70e936d75008..593ca26feb98 100644
--- a/include/llvm/Transforms/Utils/UnrollLoop.h
+++ b/include/llvm/Transforms/Utils/UnrollLoop.h
@@ -1,9 +1,8 @@
//===- llvm/Transforms/Utils/UnrollLoop.h - Unrolling utilities -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -25,11 +24,13 @@ namespace llvm {
class AssumptionCache;
class BasicBlock;
+class BlockFrequencyInfo;
class DependenceInfo;
class DominatorTree;
class Loop;
class LoopInfo;
class MDNode;
+class ProfileSummaryInfo;
class OptimizationRemarkEmitter;
class ScalarEvolution;
@@ -63,22 +64,31 @@ enum class LoopUnrollResult {
FullyUnrolled
};
-LoopUnrollResult UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
- bool Force, bool AllowRuntime,
- bool AllowExpensiveTripCount, bool PreserveCondBr,
- bool PreserveOnlyFirst, unsigned TripMultiple,
- unsigned PeelCount, bool UnrollRemainder,
- LoopInfo *LI, ScalarEvolution *SE,
- DominatorTree *DT, AssumptionCache *AC,
- OptimizationRemarkEmitter *ORE, bool PreserveLCSSA,
- Loop **RemainderLoop = nullptr);
+struct UnrollLoopOptions {
+ unsigned Count;
+ unsigned TripCount;
+ bool Force;
+ bool AllowRuntime;
+ bool AllowExpensiveTripCount;
+ bool PreserveCondBr;
+ bool PreserveOnlyFirst;
+ unsigned TripMultiple;
+ unsigned PeelCount;
+ bool UnrollRemainder;
+ bool ForgetAllSCEV;
+};
+
+LoopUnrollResult UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
+ ScalarEvolution *SE, DominatorTree *DT,
+ AssumptionCache *AC, OptimizationRemarkEmitter *ORE,
+ bool PreserveLCSSA, Loop **RemainderLoop = nullptr);
bool UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
bool AllowExpensiveTripCount,
bool UseEpilogRemainder, bool UnrollRemainder,
- LoopInfo *LI, ScalarEvolution *SE,
- DominatorTree *DT, AssumptionCache *AC,
- bool PreserveLCSSA,
+ bool ForgetAllSCEV, LoopInfo *LI,
+ ScalarEvolution *SE, DominatorTree *DT,
+ AssumptionCache *AC, bool PreserveLCSSA,
Loop **ResultLoop = nullptr);
void computePeelCount(Loop *L, unsigned LoopSize,
@@ -109,9 +119,6 @@ bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
TargetTransformInfo::UnrollingPreferences &UP,
bool &UseUpperBound);
-BasicBlock *foldBlockIntoPredecessor(BasicBlock *BB, LoopInfo *LI,
- ScalarEvolution *SE, DominatorTree *DT);
-
void remapInstruction(Instruction *I, ValueToValueMapTy &VMap);
void simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI,
@@ -121,7 +128,8 @@ void simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI,
MDNode *GetUnrollMetadata(MDNode *LoopID, StringRef Name);
TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
- Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, int OptLevel,
+ Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI,
+ BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, int OptLevel,
Optional<unsigned> UserThreshold, Optional<unsigned> UserCount,
Optional<bool> UserAllowPartial, Optional<bool> UserRuntime,
Optional<bool> UserUpperBound, Optional<bool> UserAllowPeeling);
diff --git a/include/llvm/Transforms/Utils/VNCoercion.h b/include/llvm/Transforms/Utils/VNCoercion.h
index 1baa9b66e491..f67b9ed0afdd 100644
--- a/include/llvm/Transforms/Utils/VNCoercion.h
+++ b/include/llvm/Transforms/Utils/VNCoercion.h
@@ -1,9 +1,8 @@
//===- VNCoercion.h - Value Numbering Coercion Utilities --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file / This file provides routines used by LLVM's value numbering passes to
diff --git a/include/llvm/Transforms/Utils/ValueMapper.h b/include/llvm/Transforms/Utils/ValueMapper.h
index 4ecb23ea1951..1952a210291e 100644
--- a/include/llvm/Transforms/Utils/ValueMapper.h
+++ b/include/llvm/Transforms/Utils/ValueMapper.h
@@ -1,9 +1,8 @@
//===- ValueMapper.h - Remapping for constants and metadata -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/Vectorize.h b/include/llvm/Transforms/Vectorize.h
index 70f9a2e0741b..88a0e49d0fae 100644
--- a/include/llvm/Transforms/Vectorize.h
+++ b/include/llvm/Transforms/Vectorize.h
@@ -1,9 +1,8 @@
//===-- Vectorize.h - Vectorization Transformations -------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -110,8 +109,9 @@ struct VectorizeConfig {
//
// LoopVectorize - Create a loop vectorization pass.
//
-Pass *createLoopVectorizePass(bool InterleaveOnlyWhenForced = false,
- bool VectorizeOnlyWhenForced = false);
+Pass *createLoopVectorizePass();
+Pass *createLoopVectorizePass(bool InterleaveOnlyWhenForced,
+ bool VectorizeOnlyWhenForced);
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/Vectorize/LoadStoreVectorizer.h b/include/llvm/Transforms/Vectorize/LoadStoreVectorizer.h
index 6b37d7093c44..f72c76c6f0f2 100644
--- a/include/llvm/Transforms/Vectorize/LoadStoreVectorizer.h
+++ b/include/llvm/Transforms/Vectorize/LoadStoreVectorizer.h
@@ -1,9 +1,8 @@
//===- LoadStoreVectorizer.cpp - GPU Load & Store Vectorizer --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index 5c7bba048607..b144006e2628 100644
--- a/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -1,9 +1,8 @@
//===- llvm/Transforms/Vectorize/LoopVectorizationLegality.h ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -99,11 +98,7 @@ public:
OptimizationRemarkEmitter &ORE);
/// Mark the loop L as already vectorized by setting the width to 1.
- void setAlreadyVectorized() {
- IsVectorized.Value = 1;
- Hint Hints[] = {IsVectorized};
- writeHintsToMetadata(Hints);
- }
+ void setAlreadyVectorized();
bool allowVectorization(Function *F, Loop *L,
bool VectorizeOnlyWhenForced) const;
@@ -152,15 +147,6 @@ private:
/// Checks string hint with one operand and set value if valid.
void setHint(StringRef Name, Metadata *Arg);
- /// Create a new hint from name / value pair.
- MDNode *createHintMetadata(StringRef Name, unsigned V) const;
-
- /// Matches metadata with hint name.
- bool matchesHintMetadataName(MDNode *Node, ArrayRef<Hint> HintTypes);
-
- /// Sets current hints into loop metadata, keeping other values intact.
- void writeHintsToMetadata(ArrayRef<Hint> HintTypes);
-
/// The loop these hints belong to.
const Loop *TheLoop;
@@ -219,12 +205,13 @@ class LoopVectorizationLegality {
public:
LoopVectorizationLegality(
Loop *L, PredicatedScalarEvolution &PSE, DominatorTree *DT,
- TargetLibraryInfo *TLI, AliasAnalysis *AA, Function *F,
- std::function<const LoopAccessInfo &(Loop &)> *GetLAA, LoopInfo *LI,
- OptimizationRemarkEmitter *ORE, LoopVectorizationRequirements *R,
- LoopVectorizeHints *H, DemandedBits *DB, AssumptionCache *AC)
- : TheLoop(L), LI(LI), PSE(PSE), TLI(TLI), DT(DT), GetLAA(GetLAA),
- ORE(ORE), Requirements(R), Hints(H), DB(DB), AC(AC) {}
+ TargetTransformInfo *TTI, TargetLibraryInfo *TLI, AliasAnalysis *AA,
+ Function *F, std::function<const LoopAccessInfo &(Loop &)> *GetLAA,
+ LoopInfo *LI, OptimizationRemarkEmitter *ORE,
+ LoopVectorizationRequirements *R, LoopVectorizeHints *H, DemandedBits *DB,
+ AssumptionCache *AC)
+ : TheLoop(L), LI(LI), PSE(PSE), TTI(TTI), TLI(TLI), DT(DT),
+ GetLAA(GetLAA), ORE(ORE), Requirements(R), Hints(H), DB(DB), AC(AC) {}
/// ReductionList contains the reduction descriptors for all
/// of the reductions that were found in the loop.
@@ -385,18 +372,6 @@ private:
void addInductionPhi(PHINode *Phi, const InductionDescriptor &ID,
SmallPtrSetImpl<Value *> &AllowedExit);
- /// Create an analysis remark that explains why vectorization failed
- ///
- /// \p RemarkName is the identifier for the remark. If \p I is passed it is
- /// an instruction that prevents vectorization. Otherwise the loop is used
- /// for the location of the remark. \return the remark object that can be
- /// streamed to.
- OptimizationRemarkAnalysis
- createMissedAnalysis(StringRef RemarkName, Instruction *I = nullptr) const {
- return createLVMissedAnalysis(Hints->vectorizeAnalysisPassName(),
- RemarkName, TheLoop, I);
- }
-
/// If an access has a symbolic strides, this maps the pointer value to
/// the stride symbol.
const ValueToValueMap *getSymbolicStrides() {
@@ -407,6 +382,14 @@ private:
return LAI ? &LAI->getSymbolicStrides() : nullptr;
}
+ /// Reports a vectorization illegality: print \p DebugMsg for debugging
+ /// purposes along with the corresponding optimization remark \p RemarkName.
+ /// If \p I is passed it is an instruction that prevents vectorization.
+ /// Otherwise the loop is used for the location of the remark.
+ void reportVectorizationFailure(const StringRef DebugMsg,
+ const StringRef OREMsg, const StringRef ORETag,
+ Instruction *I = nullptr) const;
+
/// The loop that we evaluate.
Loop *TheLoop;
@@ -420,6 +403,9 @@ private:
/// unrolling.
PredicatedScalarEvolution &PSE;
+ /// Target Transform Info.
+ TargetTransformInfo *TTI;
+
/// Target Library Info.
TargetLibraryInfo *TLI;
@@ -479,7 +465,7 @@ private:
/// Used to emit an analysis of any legality issues.
LoopVectorizeHints *Hints;
- /// The demanded bits analsyis is used to compute the minimum type size in
+ /// The demanded bits analysis is used to compute the minimum type size in
/// which a reduction can be computed.
DemandedBits *DB;
diff --git a/include/llvm/Transforms/Vectorize/LoopVectorize.h b/include/llvm/Transforms/Vectorize/LoopVectorize.h
index d9c4f7b023c1..d1ec06afb02a 100644
--- a/include/llvm/Transforms/Vectorize/LoopVectorize.h
+++ b/include/llvm/Transforms/Vectorize/LoopVectorize.h
@@ -1,9 +1,8 @@
//===- LoopVectorize.h ------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -72,19 +71,63 @@ class Loop;
class LoopAccessInfo;
class LoopInfo;
class OptimizationRemarkEmitter;
+class ProfileSummaryInfo;
class ScalarEvolution;
class TargetLibraryInfo;
class TargetTransformInfo;
+extern cl::opt<bool> EnableLoopInterleaving;
+extern cl::opt<bool> EnableLoopVectorization;
+
+struct LoopVectorizeOptions {
+ /// If false, consider all loops for interleaving.
+ /// If true, only loops that explicitly request interleaving are considered.
+ bool InterleaveOnlyWhenForced;
+
+ /// If false, consider all loops for vectorization.
+ /// If true, only loops that explicitly request vectorization are considered.
+ bool VectorizeOnlyWhenForced;
+
+ /// The current defaults when creating the pass with no arguments are:
+ /// EnableLoopInterleaving = true and EnableLoopVectorization = true. This
+ /// means that interleaving default is consistent with the cl::opt flag, while
+ /// vectorization is not.
+ /// FIXME: The default for EnableLoopVectorization in the cl::opt should be
+ /// set to true, and the corresponding change to account for this be made in
+ /// opt.cpp. The initializations below will become:
+ /// InterleaveOnlyWhenForced(!EnableLoopInterleaving)
+ /// VectorizeOnlyWhenForced(!EnableLoopVectorization).
+ LoopVectorizeOptions()
+ : InterleaveOnlyWhenForced(false), VectorizeOnlyWhenForced(false) {}
+ LoopVectorizeOptions(bool InterleaveOnlyWhenForced,
+ bool VectorizeOnlyWhenForced)
+ : InterleaveOnlyWhenForced(InterleaveOnlyWhenForced),
+ VectorizeOnlyWhenForced(VectorizeOnlyWhenForced) {}
+
+ LoopVectorizeOptions &setInterleaveOnlyWhenForced(bool Value) {
+ InterleaveOnlyWhenForced = Value;
+ return *this;
+ }
+
+ LoopVectorizeOptions &setVectorizeOnlyWhenForced(bool Value) {
+ VectorizeOnlyWhenForced = Value;
+ return *this;
+ }
+};
+
/// The LoopVectorize Pass.
struct LoopVectorizePass : public PassInfoMixin<LoopVectorizePass> {
/// If false, consider all loops for interleaving.
/// If true, only loops that explicitly request interleaving are considered.
- bool InterleaveOnlyWhenForced = false;
+ bool InterleaveOnlyWhenForced;
/// If false, consider all loops for vectorization.
/// If true, only loops that explicitly request vectorization are considered.
- bool VectorizeOnlyWhenForced = false;
+ bool VectorizeOnlyWhenForced;
+
+ LoopVectorizePass(LoopVectorizeOptions Opts = {})
+ : InterleaveOnlyWhenForced(Opts.InterleaveOnlyWhenForced),
+ VectorizeOnlyWhenForced(Opts.VectorizeOnlyWhenForced) {}
ScalarEvolution *SE;
LoopInfo *LI;
@@ -97,6 +140,7 @@ struct LoopVectorizePass : public PassInfoMixin<LoopVectorizePass> {
AssumptionCache *AC;
std::function<const LoopAccessInfo &(Loop &)> *GetLAA;
OptimizationRemarkEmitter *ORE;
+ ProfileSummaryInfo *PSI;
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
@@ -106,7 +150,7 @@ struct LoopVectorizePass : public PassInfoMixin<LoopVectorizePass> {
BlockFrequencyInfo &BFI_, TargetLibraryInfo *TLI_,
DemandedBits &DB_, AliasAnalysis &AA_, AssumptionCache &AC_,
std::function<const LoopAccessInfo &(Loop &)> &GetLAA_,
- OptimizationRemarkEmitter &ORE);
+ OptimizationRemarkEmitter &ORE_, ProfileSummaryInfo *PSI_);
bool processLoop(Loop *L);
};
diff --git a/include/llvm/Transforms/Vectorize/SLPVectorizer.h b/include/llvm/Transforms/Vectorize/SLPVectorizer.h
index 3152e8192fc5..ac6afb761d4d 100644
--- a/include/llvm/Transforms/Vectorize/SLPVectorizer.h
+++ b/include/llvm/Transforms/Vectorize/SLPVectorizer.h
@@ -1,9 +1,8 @@
//===- SLPVectorizer.h ------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// This pass implements the Bottom Up SLP vectorizer. It detects consecutive
@@ -56,6 +55,8 @@ class BoUpSLP;
} // end namespace slpvectorizer
+extern cl::opt<bool> RunSLPVectorization;
+
struct SLPVectorizerPass : public PassInfoMixin<SLPVectorizerPass> {
using StoreList = SmallVector<StoreInst *, 8>;
using StoreListMap = MapVector<Value *, StoreList>;
diff --git a/include/llvm/WindowsManifest/WindowsManifestMerger.h b/include/llvm/WindowsManifest/WindowsManifestMerger.h
index 302d3705887b..935c930ad91d 100644
--- a/include/llvm/WindowsManifest/WindowsManifestMerger.h
+++ b/include/llvm/WindowsManifest/WindowsManifestMerger.h
@@ -1,9 +1,8 @@
//===-- WindowsManifestMerger.h ---------------------------------*- C++-*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===---------------------------------------------------------------------===//
//
diff --git a/include/llvm/WindowsResource/ResourceProcessor.h b/include/llvm/WindowsResource/ResourceProcessor.h
index 4ca0a4b05bd0..4e99c05f4cd9 100644
--- a/include/llvm/WindowsResource/ResourceProcessor.h
+++ b/include/llvm/WindowsResource/ResourceProcessor.h
@@ -1,9 +1,8 @@
//===-- ResourceProcessor.h -------------------------------------*- C++-*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===---------------------------------------------------------------------===//
diff --git a/include/llvm/WindowsResource/ResourceScriptToken.h b/include/llvm/WindowsResource/ResourceScriptToken.h
index 494ae3222a4b..254121cd318a 100644
--- a/include/llvm/WindowsResource/ResourceScriptToken.h
+++ b/include/llvm/WindowsResource/ResourceScriptToken.h
@@ -1,9 +1,8 @@
//===-- ResourceScriptToken.h -----------------------------------*- C++-*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===---------------------------------------------------------------------===//
//
diff --git a/include/llvm/WindowsResource/ResourceScriptTokenList.h b/include/llvm/WindowsResource/ResourceScriptTokenList.h
index 0beed117c3e7..6b44dccaa35f 100644
--- a/include/llvm/WindowsResource/ResourceScriptTokenList.h
+++ b/include/llvm/WindowsResource/ResourceScriptTokenList.h
@@ -1,9 +1,8 @@
//===-- ResourceScriptTokenList.h -------------------------------*- C++-*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===---------------------------------------------------------------------===//
//
diff --git a/include/llvm/XRay/BlockIndexer.h b/include/llvm/XRay/BlockIndexer.h
index b42fa17f3fb7..dafd2b5a5230 100644
--- a/include/llvm/XRay/BlockIndexer.h
+++ b/include/llvm/XRay/BlockIndexer.h
@@ -1,9 +1,8 @@
//===- BlockIndexer.h - FDR Block Indexing Visitor ------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/XRay/BlockPrinter.h b/include/llvm/XRay/BlockPrinter.h
index bfb21e239517..949258085332 100644
--- a/include/llvm/XRay/BlockPrinter.h
+++ b/include/llvm/XRay/BlockPrinter.h
@@ -1,9 +1,8 @@
//===- BlockPrinter.h - FDR Block Pretty Printer -------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/XRay/BlockVerifier.h b/include/llvm/XRay/BlockVerifier.h
index 46371c13891a..c848fdf084bc 100644
--- a/include/llvm/XRay/BlockVerifier.h
+++ b/include/llvm/XRay/BlockVerifier.h
@@ -1,9 +1,8 @@
//===- BlockVerifier.h - FDR Block Verifier -------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/XRay/FDRLogBuilder.h b/include/llvm/XRay/FDRLogBuilder.h
index b5e9ed5c406b..3f49dc6dcb9d 100644
--- a/include/llvm/XRay/FDRLogBuilder.h
+++ b/include/llvm/XRay/FDRLogBuilder.h
@@ -1,9 +1,8 @@
//===- FDRLogBuilder.h - XRay FDR Log Building Utility --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_INCLUDE_LLVM_XRAY_FDRLOGBUILDER_H_
diff --git a/include/llvm/XRay/FDRRecordConsumer.h b/include/llvm/XRay/FDRRecordConsumer.h
index e856e1540558..4fbb1d41d0da 100644
--- a/include/llvm/XRay/FDRRecordConsumer.h
+++ b/include/llvm/XRay/FDRRecordConsumer.h
@@ -1,9 +1,8 @@
//===- FDRRecordConsumer.h - XRay Flight Data Recorder Mode Records -------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_INCLUDE_LLVM_XRAY_FDRRECORDCONSUMER_H_
diff --git a/include/llvm/XRay/FDRRecordProducer.h b/include/llvm/XRay/FDRRecordProducer.h
index efdba2a67b7b..b530a85bc7e1 100644
--- a/include/llvm/XRay/FDRRecordProducer.h
+++ b/include/llvm/XRay/FDRRecordProducer.h
@@ -1,9 +1,8 @@
//===- FDRRecordProducer.h - XRay FDR Mode Record Producer ----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_INCLUDE_LLVM_XRAY_FDRRECORDPRODUCER_H_
diff --git a/include/llvm/XRay/FDRRecords.h b/include/llvm/XRay/FDRRecords.h
index 8a84f4d0c1fb..a8ce74bd88fb 100644
--- a/include/llvm/XRay/FDRRecords.h
+++ b/include/llvm/XRay/FDRRecords.h
@@ -1,9 +1,8 @@
//===- FDRRecords.h - XRay Flight Data Recorder Mode Records --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/XRay/FDRTraceExpander.h b/include/llvm/XRay/FDRTraceExpander.h
index 02a21bed5ce9..f3c36cf4cf66 100644
--- a/include/llvm/XRay/FDRTraceExpander.h
+++ b/include/llvm/XRay/FDRTraceExpander.h
@@ -1,9 +1,8 @@
//===- FDRTraceExpander.h - XRay FDR Mode Log Expander --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/XRay/FDRTraceWriter.h b/include/llvm/XRay/FDRTraceWriter.h
index 7b3b5fa25eff..2498877e27c1 100644
--- a/include/llvm/XRay/FDRTraceWriter.h
+++ b/include/llvm/XRay/FDRTraceWriter.h
@@ -1,9 +1,8 @@
//===- FDRTraceWriter.h - XRay FDR Trace Writer -----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/XRay/FileHeaderReader.h b/include/llvm/XRay/FileHeaderReader.h
index 3b8809bdbb34..1c9681cfd9af 100644
--- a/include/llvm/XRay/FileHeaderReader.h
+++ b/include/llvm/XRay/FileHeaderReader.h
@@ -1,9 +1,8 @@
//===- FileHeaderReader.h - XRay Trace File Header Reading Function -------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/XRay/Graph.h b/include/llvm/XRay/Graph.h
index a4d34a8a4be3..004681512800 100644
--- a/include/llvm/XRay/Graph.h
+++ b/include/llvm/XRay/Graph.h
@@ -1,9 +1,8 @@
//===-- Graph.h - XRay Graph Class ------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/XRay/InstrumentationMap.h b/include/llvm/XRay/InstrumentationMap.h
index 42bfca36a20b..5cbe5c44893b 100644
--- a/include/llvm/XRay/InstrumentationMap.h
+++ b/include/llvm/XRay/InstrumentationMap.h
@@ -1,9 +1,8 @@
//===- InstrumentationMap.h - XRay Instrumentation Map ----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/XRay/Profile.h b/include/llvm/XRay/Profile.h
index 9365630358e8..79d9b53387f3 100644
--- a/include/llvm/XRay/Profile.h
+++ b/include/llvm/XRay/Profile.h
@@ -1,9 +1,8 @@
//===- Profile.h - XRay Profile Abstraction -------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/XRay/RecordPrinter.h b/include/llvm/XRay/RecordPrinter.h
index 649c64ab6f5c..7c7b7a32c56d 100644
--- a/include/llvm/XRay/RecordPrinter.h
+++ b/include/llvm/XRay/RecordPrinter.h
@@ -1,9 +1,8 @@
//===- RecordPrinter.h - FDR Record Printer -------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/XRay/Trace.h b/include/llvm/XRay/Trace.h
index 924addd1560d..eb1f03b2a0d4 100644
--- a/include/llvm/XRay/Trace.h
+++ b/include/llvm/XRay/Trace.h
@@ -1,9 +1,8 @@
//===- Trace.h - XRay Trace Abstraction -----------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/XRay/XRayRecord.h b/include/llvm/XRay/XRayRecord.h
index 7685ec95838a..546b02303b6a 100644
--- a/include/llvm/XRay/XRayRecord.h
+++ b/include/llvm/XRay/XRayRecord.h
@@ -1,9 +1,8 @@
//===- XRayRecord.h - XRay Trace Record -----------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/XRay/YAMLXRayRecord.h b/include/llvm/XRay/YAMLXRayRecord.h
index 6150196ed98d..bc8b03548d6e 100644
--- a/include/llvm/XRay/YAMLXRayRecord.h
+++ b/include/llvm/XRay/YAMLXRayRecord.h
@@ -1,9 +1,8 @@
//===- YAMLXRayRecord.h - XRay Record YAML Support Definitions ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/module.modulemap b/include/llvm/module.modulemap
index bcc12534ec85..9c4668e1473c 100644
--- a/include/llvm/module.modulemap
+++ b/include/llvm/module.modulemap
@@ -5,6 +5,7 @@ module LLVM_Analysis {
// This is intended for (repeated) textual inclusion.
textual header "Analysis/TargetLibraryInfo.def"
+ textual header "Analysis/VecFuncs.def"
}
module LLVM_AsmParser {
@@ -53,6 +54,7 @@ module LLVM_BinaryFormat {
textual header "BinaryFormat/Dwarf.def"
textual header "BinaryFormat/DynamicTags.def"
textual header "BinaryFormat/MachO.def"
+ textual header "BinaryFormat/MinidumpConstants.def"
textual header "BinaryFormat/ELFRelocs/AArch64.def"
textual header "BinaryFormat/ELFRelocs/AMDGPU.def"
textual header "BinaryFormat/ELFRelocs/ARM.def"
@@ -220,7 +222,7 @@ module LLVM_intrinsic_gen {
module IR_ConstantRange { header "IR/ConstantRange.h" export * }
module IR_Dominators { header "IR/Dominators.h" export * }
module Analysis_PostDominators { header "Analysis/PostDominators.h" export * }
- module IR_DomTreeUpdater { header "IR/DomTreeUpdater.h" export * }
+ module Analysis_DomTreeUpdater { header "Analysis/DomTreeUpdater.h" export * }
module IR_IRBuilder { header "IR/IRBuilder.h" export * }
module IR_PassManager { header "IR/PassManager.h" export * }
module IR_PredIteratorCache { header "IR/PredIteratorCache.h" export * }
@@ -235,6 +237,7 @@ module LLVM_intrinsic_gen {
}
module IR_IntrinsicInst { header "IR/IntrinsicInst.h" export * }
module IR_PatternMatch { header "IR/PatternMatch.h" export * }
+ module IR_SafepointIRVerifier { header "IR/SafepointIRVerifier.h" export * }
module IR_Statepoint { header "IR/Statepoint.h" export * }
export *
diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp
index 3446aef39938..32241e355eb8 100644
--- a/lib/Analysis/AliasAnalysis.cpp
+++ b/lib/Analysis/AliasAnalysis.cpp
@@ -1,9 +1,8 @@
//==- AliasAnalysis.cpp - Generic Alias Analysis Interface Implementation --==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -80,12 +79,16 @@ AAResults::~AAResults() {
bool AAResults::invalidate(Function &F, const PreservedAnalyses &PA,
FunctionAnalysisManager::Invalidator &Inv) {
- // Check if the AA manager itself has been invalidated.
+ // AAResults preserves the AAManager by default, due to the stateless nature
+ // of AliasAnalysis. There is no need to check whether it has been preserved
+ // explicitly. Check if any module dependency was invalidated and caused the
+ // AAManager to be invalidated. Invalidate ourselves in that case.
auto PAC = PA.getChecker<AAManager>();
- if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>())
- return true; // The manager needs to be blown away, clear everything.
+ if (!PAC.preservedWhenStateless())
+ return true;
- // Check all of the dependencies registered.
+ // Check if any of the function dependencies were invalidated, and invalidate
+ // ourselves in that case.
for (AnalysisKey *ID : AADeps)
if (Inv.invalidate(ID, F, PA))
return true;
@@ -100,8 +103,14 @@ bool AAResults::invalidate(Function &F, const PreservedAnalyses &PA,
AliasResult AAResults::alias(const MemoryLocation &LocA,
const MemoryLocation &LocB) {
+ AAQueryInfo AAQIP;
+ return alias(LocA, LocB, AAQIP);
+}
+
+AliasResult AAResults::alias(const MemoryLocation &LocA,
+ const MemoryLocation &LocB, AAQueryInfo &AAQI) {
for (const auto &AA : AAs) {
- auto Result = AA->alias(LocA, LocB);
+ auto Result = AA->alias(LocA, LocB, AAQI);
if (Result != MayAlias)
return Result;
}
@@ -110,8 +119,14 @@ AliasResult AAResults::alias(const MemoryLocation &LocA,
bool AAResults::pointsToConstantMemory(const MemoryLocation &Loc,
bool OrLocal) {
+ AAQueryInfo AAQIP;
+ return pointsToConstantMemory(Loc, AAQIP, OrLocal);
+}
+
+bool AAResults::pointsToConstantMemory(const MemoryLocation &Loc,
+ AAQueryInfo &AAQI, bool OrLocal) {
for (const auto &AA : AAs)
- if (AA->pointsToConstantMemory(Loc, OrLocal))
+ if (AA->pointsToConstantMemory(Loc, AAQI, OrLocal))
return true;
return false;
@@ -132,10 +147,16 @@ ModRefInfo AAResults::getArgModRefInfo(const CallBase *Call, unsigned ArgIdx) {
}
ModRefInfo AAResults::getModRefInfo(Instruction *I, const CallBase *Call2) {
+ AAQueryInfo AAQIP;
+ return getModRefInfo(I, Call2, AAQIP);
+}
+
+ModRefInfo AAResults::getModRefInfo(Instruction *I, const CallBase *Call2,
+ AAQueryInfo &AAQI) {
// We may have two calls.
if (const auto *Call1 = dyn_cast<CallBase>(I)) {
// Check if the two calls modify the same memory.
- return getModRefInfo(Call1, Call2);
+ return getModRefInfo(Call1, Call2, AAQI);
} else if (I->isFenceLike()) {
// If this is a fence, just return ModRef.
return ModRefInfo::ModRef;
@@ -145,7 +166,7 @@ ModRefInfo AAResults::getModRefInfo(Instruction *I, const CallBase *Call2) {
// is that if the call references what this instruction
// defines, it must be clobbered by this location.
const MemoryLocation DefLoc = MemoryLocation::get(I);
- ModRefInfo MR = getModRefInfo(Call2, DefLoc);
+ ModRefInfo MR = getModRefInfo(Call2, DefLoc, AAQI);
if (isModOrRefSet(MR))
return setModAndRef(MR);
}
@@ -154,10 +175,17 @@ ModRefInfo AAResults::getModRefInfo(Instruction *I, const CallBase *Call2) {
ModRefInfo AAResults::getModRefInfo(const CallBase *Call,
const MemoryLocation &Loc) {
+ AAQueryInfo AAQIP;
+ return getModRefInfo(Call, Loc, AAQIP);
+}
+
+ModRefInfo AAResults::getModRefInfo(const CallBase *Call,
+ const MemoryLocation &Loc,
+ AAQueryInfo &AAQI) {
ModRefInfo Result = ModRefInfo::ModRef;
for (const auto &AA : AAs) {
- Result = intersectModRef(Result, AA->getModRefInfo(Call, Loc));
+ Result = intersectModRef(Result, AA->getModRefInfo(Call, Loc, AAQI));
// Early-exit the moment we reach the bottom of the lattice.
if (isNoModRef(Result))
@@ -215,10 +243,16 @@ ModRefInfo AAResults::getModRefInfo(const CallBase *Call,
ModRefInfo AAResults::getModRefInfo(const CallBase *Call1,
const CallBase *Call2) {
+ AAQueryInfo AAQIP;
+ return getModRefInfo(Call1, Call2, AAQIP);
+}
+
+ModRefInfo AAResults::getModRefInfo(const CallBase *Call1,
+ const CallBase *Call2, AAQueryInfo &AAQI) {
ModRefInfo Result = ModRefInfo::ModRef;
for (const auto &AA : AAs) {
- Result = intersectModRef(Result, AA->getModRefInfo(Call1, Call2));
+ Result = intersectModRef(Result, AA->getModRefInfo(Call1, Call2, AAQI));
// Early-exit the moment we reach the bottom of the lattice.
if (isNoModRef(Result))
@@ -397,6 +431,12 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, AliasResult AR) {
ModRefInfo AAResults::getModRefInfo(const LoadInst *L,
const MemoryLocation &Loc) {
+ AAQueryInfo AAQIP;
+ return getModRefInfo(L, Loc, AAQIP);
+}
+ModRefInfo AAResults::getModRefInfo(const LoadInst *L,
+ const MemoryLocation &Loc,
+ AAQueryInfo &AAQI) {
// Be conservative in the face of atomic.
if (isStrongerThan(L->getOrdering(), AtomicOrdering::Unordered))
return ModRefInfo::ModRef;
@@ -404,7 +444,7 @@ ModRefInfo AAResults::getModRefInfo(const LoadInst *L,
// If the load address doesn't alias the given address, it doesn't read
// or write the specified memory.
if (Loc.Ptr) {
- AliasResult AR = alias(MemoryLocation::get(L), Loc);
+ AliasResult AR = alias(MemoryLocation::get(L), Loc, AAQI);
if (AR == NoAlias)
return ModRefInfo::NoModRef;
if (AR == MustAlias)
@@ -416,12 +456,18 @@ ModRefInfo AAResults::getModRefInfo(const LoadInst *L,
ModRefInfo AAResults::getModRefInfo(const StoreInst *S,
const MemoryLocation &Loc) {
+ AAQueryInfo AAQIP;
+ return getModRefInfo(S, Loc, AAQIP);
+}
+ModRefInfo AAResults::getModRefInfo(const StoreInst *S,
+ const MemoryLocation &Loc,
+ AAQueryInfo &AAQI) {
// Be conservative in the face of atomic.
if (isStrongerThan(S->getOrdering(), AtomicOrdering::Unordered))
return ModRefInfo::ModRef;
if (Loc.Ptr) {
- AliasResult AR = alias(MemoryLocation::get(S), Loc);
+ AliasResult AR = alias(MemoryLocation::get(S), Loc, AAQI);
// If the store address cannot alias the pointer in question, then the
// specified memory cannot be modified by the store.
if (AR == NoAlias)
@@ -429,7 +475,7 @@ ModRefInfo AAResults::getModRefInfo(const StoreInst *S,
// If the pointer is a pointer to constant memory, then it could not have
// been modified by this store.
- if (pointsToConstantMemory(Loc))
+ if (pointsToConstantMemory(Loc, AAQI))
return ModRefInfo::NoModRef;
// If the store address aliases the pointer as must alias, set Must.
@@ -442,17 +488,31 @@ ModRefInfo AAResults::getModRefInfo(const StoreInst *S,
}
ModRefInfo AAResults::getModRefInfo(const FenceInst *S, const MemoryLocation &Loc) {
+ AAQueryInfo AAQIP;
+ return getModRefInfo(S, Loc, AAQIP);
+}
+
+ModRefInfo AAResults::getModRefInfo(const FenceInst *S,
+ const MemoryLocation &Loc,
+ AAQueryInfo &AAQI) {
// If we know that the location is a constant memory location, the fence
// cannot modify this location.
- if (Loc.Ptr && pointsToConstantMemory(Loc))
+ if (Loc.Ptr && pointsToConstantMemory(Loc, AAQI))
return ModRefInfo::Ref;
return ModRefInfo::ModRef;
}
ModRefInfo AAResults::getModRefInfo(const VAArgInst *V,
const MemoryLocation &Loc) {
+ AAQueryInfo AAQIP;
+ return getModRefInfo(V, Loc, AAQIP);
+}
+
+ModRefInfo AAResults::getModRefInfo(const VAArgInst *V,
+ const MemoryLocation &Loc,
+ AAQueryInfo &AAQI) {
if (Loc.Ptr) {
- AliasResult AR = alias(MemoryLocation::get(V), Loc);
+ AliasResult AR = alias(MemoryLocation::get(V), Loc, AAQI);
// If the va_arg address cannot alias the pointer in question, then the
// specified memory cannot be accessed by the va_arg.
if (AR == NoAlias)
@@ -460,7 +520,7 @@ ModRefInfo AAResults::getModRefInfo(const VAArgInst *V,
// If the pointer is a pointer to constant memory, then it could not have
// been modified by this va_arg.
- if (pointsToConstantMemory(Loc))
+ if (pointsToConstantMemory(Loc, AAQI))
return ModRefInfo::NoModRef;
// If the va_arg aliases the pointer as must alias, set Must.
@@ -474,10 +534,17 @@ ModRefInfo AAResults::getModRefInfo(const VAArgInst *V,
ModRefInfo AAResults::getModRefInfo(const CatchPadInst *CatchPad,
const MemoryLocation &Loc) {
+ AAQueryInfo AAQIP;
+ return getModRefInfo(CatchPad, Loc, AAQIP);
+}
+
+ModRefInfo AAResults::getModRefInfo(const CatchPadInst *CatchPad,
+ const MemoryLocation &Loc,
+ AAQueryInfo &AAQI) {
if (Loc.Ptr) {
// If the pointer is a pointer to constant memory,
// then it could not have been modified by this catchpad.
- if (pointsToConstantMemory(Loc))
+ if (pointsToConstantMemory(Loc, AAQI))
return ModRefInfo::NoModRef;
}
@@ -487,10 +554,17 @@ ModRefInfo AAResults::getModRefInfo(const CatchPadInst *CatchPad,
ModRefInfo AAResults::getModRefInfo(const CatchReturnInst *CatchRet,
const MemoryLocation &Loc) {
+ AAQueryInfo AAQIP;
+ return getModRefInfo(CatchRet, Loc, AAQIP);
+}
+
+ModRefInfo AAResults::getModRefInfo(const CatchReturnInst *CatchRet,
+ const MemoryLocation &Loc,
+ AAQueryInfo &AAQI) {
if (Loc.Ptr) {
// If the pointer is a pointer to constant memory,
// then it could not have been modified by this catchpad.
- if (pointsToConstantMemory(Loc))
+ if (pointsToConstantMemory(Loc, AAQI))
return ModRefInfo::NoModRef;
}
@@ -500,12 +574,19 @@ ModRefInfo AAResults::getModRefInfo(const CatchReturnInst *CatchRet,
ModRefInfo AAResults::getModRefInfo(const AtomicCmpXchgInst *CX,
const MemoryLocation &Loc) {
+ AAQueryInfo AAQIP;
+ return getModRefInfo(CX, Loc, AAQIP);
+}
+
+ModRefInfo AAResults::getModRefInfo(const AtomicCmpXchgInst *CX,
+ const MemoryLocation &Loc,
+ AAQueryInfo &AAQI) {
// Acquire/Release cmpxchg has properties that matter for arbitrary addresses.
if (isStrongerThanMonotonic(CX->getSuccessOrdering()))
return ModRefInfo::ModRef;
if (Loc.Ptr) {
- AliasResult AR = alias(MemoryLocation::get(CX), Loc);
+ AliasResult AR = alias(MemoryLocation::get(CX), Loc, AAQI);
// If the cmpxchg address does not alias the location, it does not access
// it.
if (AR == NoAlias)
@@ -521,12 +602,19 @@ ModRefInfo AAResults::getModRefInfo(const AtomicCmpXchgInst *CX,
ModRefInfo AAResults::getModRefInfo(const AtomicRMWInst *RMW,
const MemoryLocation &Loc) {
+ AAQueryInfo AAQIP;
+ return getModRefInfo(RMW, Loc, AAQIP);
+}
+
+ModRefInfo AAResults::getModRefInfo(const AtomicRMWInst *RMW,
+ const MemoryLocation &Loc,
+ AAQueryInfo &AAQI) {
// Acquire/Release atomicrmw has properties that matter for arbitrary addresses.
if (isStrongerThanMonotonic(RMW->getOrdering()))
return ModRefInfo::ModRef;
if (Loc.Ptr) {
- AliasResult AR = alias(MemoryLocation::get(RMW), Loc);
+ AliasResult AR = alias(MemoryLocation::get(RMW), Loc, AAQI);
// If the atomicrmw address does not alias the location, it does not access
// it.
if (AR == NoAlias)
diff --git a/lib/Analysis/AliasAnalysisEvaluator.cpp b/lib/Analysis/AliasAnalysisEvaluator.cpp
index 85dd4fe95b33..e83703867e09 100644
--- a/lib/Analysis/AliasAnalysisEvaluator.cpp
+++ b/lib/Analysis/AliasAnalysisEvaluator.cpp
@@ -1,9 +1,8 @@
//===- AliasAnalysisEvaluator.cpp - Alias Analysis Accuracy Evaluator -----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Analysis/AliasAnalysisSummary.cpp b/lib/Analysis/AliasAnalysisSummary.cpp
index 2b4879453beb..2f3396a44117 100644
--- a/lib/Analysis/AliasAnalysisSummary.cpp
+++ b/lib/Analysis/AliasAnalysisSummary.cpp
@@ -73,28 +73,28 @@ AliasAttrs getExternallyVisibleAttrs(AliasAttrs Attr) {
}
Optional<InstantiatedValue> instantiateInterfaceValue(InterfaceValue IValue,
- CallSite CS) {
+ CallBase &Call) {
auto Index = IValue.Index;
- auto Value = (Index == 0) ? CS.getInstruction() : CS.getArgument(Index - 1);
- if (Value->getType()->isPointerTy())
- return InstantiatedValue{Value, IValue.DerefLevel};
+ auto *V = (Index == 0) ? &Call : Call.getArgOperand(Index - 1);
+ if (V->getType()->isPointerTy())
+ return InstantiatedValue{V, IValue.DerefLevel};
return None;
}
Optional<InstantiatedRelation>
-instantiateExternalRelation(ExternalRelation ERelation, CallSite CS) {
- auto From = instantiateInterfaceValue(ERelation.From, CS);
+instantiateExternalRelation(ExternalRelation ERelation, CallBase &Call) {
+ auto From = instantiateInterfaceValue(ERelation.From, Call);
if (!From)
return None;
- auto To = instantiateInterfaceValue(ERelation.To, CS);
+ auto To = instantiateInterfaceValue(ERelation.To, Call);
if (!To)
return None;
return InstantiatedRelation{*From, *To, ERelation.Offset};
}
Optional<InstantiatedAttr> instantiateExternalAttribute(ExternalAttribute EAttr,
- CallSite CS) {
- auto Value = instantiateInterfaceValue(EAttr.IValue, CS);
+ CallBase &Call) {
+ auto Value = instantiateInterfaceValue(EAttr.IValue, Call);
if (!Value)
return None;
return InstantiatedAttr{*Value, EAttr.Attr};
diff --git a/lib/Analysis/AliasAnalysisSummary.h b/lib/Analysis/AliasAnalysisSummary.h
index fb93a12420f8..fe75b03cedef 100644
--- a/lib/Analysis/AliasAnalysisSummary.h
+++ b/lib/Analysis/AliasAnalysisSummary.h
@@ -1,9 +1,8 @@
//=====- CFLSummary.h - Abstract stratified sets implementation. --------=====//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -38,7 +37,7 @@
#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/IR/CallSite.h"
+#include "llvm/IR/InstrTypes.h"
#include <bitset>
namespace llvm {
@@ -196,12 +195,13 @@ struct AliasSummary {
SmallVector<ExternalAttribute, 8> RetParamAttributes;
};
-/// This is the result of instantiating InterfaceValue at a particular callsite
+/// This is the result of instantiating InterfaceValue at a particular call
struct InstantiatedValue {
Value *Val;
unsigned DerefLevel;
};
-Optional<InstantiatedValue> instantiateInterfaceValue(InterfaceValue, CallSite);
+Optional<InstantiatedValue> instantiateInterfaceValue(InterfaceValue IValue,
+ CallBase &Call);
inline bool operator==(InstantiatedValue LHS, InstantiatedValue RHS) {
return LHS.Val == RHS.Val && LHS.DerefLevel == RHS.DerefLevel;
@@ -229,8 +229,8 @@ struct InstantiatedRelation {
InstantiatedValue From, To;
int64_t Offset;
};
-Optional<InstantiatedRelation> instantiateExternalRelation(ExternalRelation,
- CallSite);
+Optional<InstantiatedRelation>
+instantiateExternalRelation(ExternalRelation ERelation, CallBase &Call);
/// This is the result of instantiating ExternalAttribute at a particular
/// callsite
@@ -238,8 +238,8 @@ struct InstantiatedAttr {
InstantiatedValue IValue;
AliasAttrs Attr;
};
-Optional<InstantiatedAttr> instantiateExternalAttribute(ExternalAttribute,
- CallSite);
+Optional<InstantiatedAttr> instantiateExternalAttribute(ExternalAttribute EAttr,
+ CallBase &Call);
}
template <> struct DenseMapInfo<cflaa::InstantiatedValue> {
diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp
index f6ad704cc914..a6e5b9fab558 100644
--- a/lib/Analysis/AliasSetTracker.cpp
+++ b/lib/Analysis/AliasSetTracker.cpp
@@ -1,9 +1,8 @@
//===- AliasSetTracker.cpp - Alias Sets Tracker implementation-------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -14,7 +13,9 @@
#include "llvm/Analysis/AliasSetTracker.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/GuardUtils.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@@ -127,24 +128,24 @@ void AliasSet::removeFromTracker(AliasSetTracker &AST) {
void AliasSet::addPointer(AliasSetTracker &AST, PointerRec &Entry,
LocationSize Size, const AAMDNodes &AAInfo,
- bool KnownMustAlias) {
+ bool KnownMustAlias, bool SkipSizeUpdate) {
assert(!Entry.hasAliasSet() && "Entry already in set!");
// Check to see if we have to downgrade to _may_ alias.
- if (isMustAlias() && !KnownMustAlias)
+ if (isMustAlias())
if (PointerRec *P = getSomePointer()) {
- AliasAnalysis &AA = AST.getAliasAnalysis();
- AliasResult Result =
- AA.alias(MemoryLocation(P->getValue(), P->getSize(), P->getAAInfo()),
- MemoryLocation(Entry.getValue(), Size, AAInfo));
- if (Result != MustAlias) {
- Alias = SetMayAlias;
- AST.TotalMayAliasSetSize += size();
- } else {
- // First entry of must alias must have maximum size!
+ if (!KnownMustAlias) {
+ AliasAnalysis &AA = AST.getAliasAnalysis();
+ AliasResult Result = AA.alias(
+ MemoryLocation(P->getValue(), P->getSize(), P->getAAInfo()),
+ MemoryLocation(Entry.getValue(), Size, AAInfo));
+ if (Result != MustAlias) {
+ Alias = SetMayAlias;
+ AST.TotalMayAliasSetSize += size();
+ }
+ assert(Result != NoAlias && "Cannot be part of must set!");
+ } else if (!SkipSizeUpdate)
P->updateSizeAndAAInfo(Size, AAInfo);
- }
- assert(Result != NoAlias && "Cannot be part of must set!");
}
Entry.setAliasSet(this);
@@ -184,14 +185,15 @@ void AliasSet::addUnknownInst(Instruction *I, AliasAnalysis &AA) {
Access = ModRefAccess;
}
-/// aliasesPointer - Return true if the specified pointer "may" (or must)
-/// alias one of the members in the set.
+/// aliasesPointer - If the specified pointer "may" (or must) alias one of the
+/// members in the set return the appropriate AliasResult. Otherwise return
+/// NoAlias.
///
-bool AliasSet::aliasesPointer(const Value *Ptr, LocationSize Size,
- const AAMDNodes &AAInfo,
- AliasAnalysis &AA) const {
+AliasResult AliasSet::aliasesPointer(const Value *Ptr, LocationSize Size,
+ const AAMDNodes &AAInfo,
+ AliasAnalysis &AA) const {
if (AliasAny)
- return true;
+ return MayAlias;
if (Alias == SetMustAlias) {
assert(UnknownInsts.empty() && "Illegal must alias set!");
@@ -208,9 +210,10 @@ bool AliasSet::aliasesPointer(const Value *Ptr, LocationSize Size,
// If this is a may-alias set, we have to check all of the pointers in the set
// to be sure it doesn't alias the set...
for (iterator I = begin(), E = end(); I != E; ++I)
- if (AA.alias(MemoryLocation(Ptr, Size, AAInfo),
- MemoryLocation(I.getPointer(), I.getSize(), I.getAAInfo())))
- return true;
+ if (AliasResult AR = AA.alias(
+ MemoryLocation(Ptr, Size, AAInfo),
+ MemoryLocation(I.getPointer(), I.getSize(), I.getAAInfo())))
+ return AR;
// Check the unknown instructions...
if (!UnknownInsts.empty()) {
@@ -218,10 +221,10 @@ bool AliasSet::aliasesPointer(const Value *Ptr, LocationSize Size,
if (auto *Inst = getUnknownInst(i))
if (isModOrRefSet(
AA.getModRefInfo(Inst, MemoryLocation(Ptr, Size, AAInfo))))
- return true;
+ return MayAlias;
}
- return false;
+ return NoAlias;
}
bool AliasSet::aliasesUnknownInst(const Instruction *Inst,
@@ -288,25 +291,38 @@ void AliasSetTracker::clear() {
AliasSets.clear();
}
-
/// mergeAliasSetsForPointer - Given a pointer, merge all alias sets that may
/// alias the pointer. Return the unified set, or nullptr if no set that aliases
-/// the pointer was found.
+/// the pointer was found. MustAliasAll is updated to true/false if the pointer
+/// is found to MustAlias all the sets it merged.
AliasSet *AliasSetTracker::mergeAliasSetsForPointer(const Value *Ptr,
LocationSize Size,
- const AAMDNodes &AAInfo) {
+ const AAMDNodes &AAInfo,
+ bool &MustAliasAll) {
AliasSet *FoundSet = nullptr;
+ AliasResult AllAR = MustAlias;
for (iterator I = begin(), E = end(); I != E;) {
iterator Cur = I++;
- if (Cur->Forward || !Cur->aliasesPointer(Ptr, Size, AAInfo, AA)) continue;
+ if (Cur->Forward)
+ continue;
+
+ AliasResult AR = Cur->aliasesPointer(Ptr, Size, AAInfo, AA);
+ if (AR == NoAlias)
+ continue;
+
+ AllAR =
+ AliasResult(AllAR & AR); // Possible downgrade to May/Partial, even No
- if (!FoundSet) { // If this is the first alias set ptr can go into.
- FoundSet = &*Cur; // Remember it.
- } else { // Otherwise, we must merge the sets.
- FoundSet->mergeSetIn(*Cur, *this); // Merge in contents.
+ if (!FoundSet) {
+ // If this is the first alias set ptr can go into, remember it.
+ FoundSet = &*Cur;
+ } else {
+ // Otherwise, we must merge the sets.
+ FoundSet->mergeSetIn(*Cur, *this);
}
}
+ MustAliasAll = (AllAR == MustAlias);
return FoundSet;
}
@@ -316,10 +332,13 @@ AliasSet *AliasSetTracker::findAliasSetForUnknownInst(Instruction *Inst) {
iterator Cur = I++;
if (Cur->Forward || !Cur->aliasesUnknownInst(Inst, AA))
continue;
- if (!FoundSet) // If this is the first alias set ptr can go into.
- FoundSet = &*Cur; // Remember it.
- else // Otherwise, we must merge the sets.
- FoundSet->mergeSetIn(*Cur, *this); // Merge in contents.
+ if (!FoundSet) {
+ // If this is the first alias set ptr can go into, remember it.
+ FoundSet = &*Cur;
+ } else {
+ // Otherwise, we must merge the sets.
+ FoundSet->mergeSetIn(*Cur, *this);
+ }
}
return FoundSet;
}
@@ -329,7 +348,7 @@ AliasSet &AliasSetTracker::getAliasSetFor(const MemoryLocation &MemLoc) {
Value * const Pointer = const_cast<Value*>(MemLoc.Ptr);
const LocationSize Size = MemLoc.Size;
const AAMDNodes &AAInfo = MemLoc.AATags;
-
+
AliasSet::PointerRec &Entry = getEntryFor(Pointer);
if (AliasAnyAS) {
@@ -348,6 +367,7 @@ AliasSet &AliasSetTracker::getAliasSetFor(const MemoryLocation &MemLoc) {
return *AliasAnyAS;
}
+ bool MustAliasAll = false;
// Check to see if the pointer is already known.
if (Entry.hasAliasSet()) {
// If the size changed, we may need to merge several alias sets.
@@ -356,20 +376,21 @@ AliasSet &AliasSetTracker::getAliasSetFor(const MemoryLocation &MemLoc) {
// is NoAlias, mergeAliasSetsForPointer(undef, ...) will not find the
// the right set for undef, even if it exists.
if (Entry.updateSizeAndAAInfo(Size, AAInfo))
- mergeAliasSetsForPointer(Pointer, Size, AAInfo);
+ mergeAliasSetsForPointer(Pointer, Size, AAInfo, MustAliasAll);
// Return the set!
return *Entry.getAliasSet(*this)->getForwardedTarget(*this);
}
- if (AliasSet *AS = mergeAliasSetsForPointer(Pointer, Size, AAInfo)) {
+ if (AliasSet *AS =
+ mergeAliasSetsForPointer(Pointer, Size, AAInfo, MustAliasAll)) {
// Add it to the alias set it aliases.
- AS->addPointer(*this, Entry, Size, AAInfo);
+ AS->addPointer(*this, Entry, Size, AAInfo, MustAliasAll);
return *AS;
}
// Otherwise create a new alias set to hold the loaded pointer.
AliasSets.push_back(new AliasSet());
- AliasSets.back().addPointer(*this, Entry, Size, AAInfo);
+ AliasSets.back().addPointer(*this, Entry, Size, AAInfo, true);
return AliasSets.back();
}
@@ -422,14 +443,12 @@ void AliasSetTracker::addUnknown(Instruction *Inst) {
if (!Inst->mayReadOrWriteMemory())
return; // doesn't alias anything
- AliasSet *AS = findAliasSetForUnknownInst(Inst);
- if (AS) {
+ if (AliasSet *AS = findAliasSetForUnknownInst(Inst)) {
AS->addUnknownInst(Inst, AA);
return;
}
AliasSets.push_back(new AliasSet());
- AS = &AliasSets.back();
- AS->addUnknownInst(Inst, AA);
+ AliasSets.back().addUnknownInst(Inst, AA);
}
void AliasSetTracker::add(Instruction *I) {
@@ -516,6 +535,15 @@ void AliasSetTracker::add(const AliasSetTracker &AST) {
}
}
+void AliasSetTracker::addAllInstructionsInLoopUsingMSSA() {
+ assert(MSSA && L && "MSSA and L must be available");
+ for (const BasicBlock *BB : L->blocks())
+ if (auto *Accesses = MSSA->getBlockAccesses(BB))
+ for (auto &Access : *Accesses)
+ if (auto *MUD = dyn_cast<MemoryUseOrDef>(&Access))
+ add(MUD->getMemoryInst());
+}
+
// deleteValue method - This method is used to remove a pointer value from the
// AliasSetTracker entirely. It should be used when an instruction is deleted
// from the program to update the AST. If you don't use this, you would have
@@ -563,9 +591,8 @@ void AliasSetTracker::copyValue(Value *From, Value *To) {
I = PointerMap.find_as(From);
// Add it to the alias set it aliases...
AliasSet *AS = I->second->getAliasSet(*this);
- AS->addPointer(*this, Entry, I->second->getSize(),
- I->second->getAAInfo(),
- true);
+ AS->addPointer(*this, Entry, I->second->getSize(), I->second->getAAInfo(),
+ true, true);
}
AliasSet &AliasSetTracker::mergeAllAliasSets() {
diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp
index bb8742123a0f..d46a8d8e306c 100644
--- a/lib/Analysis/Analysis.cpp
+++ b/lib/Analysis/Analysis.cpp
@@ -1,9 +1,8 @@
//===-- Analysis.cpp ------------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Analysis/AssumptionCache.cpp b/lib/Analysis/AssumptionCache.cpp
index 8bfd24ccf77b..cf2f845dee0a 100644
--- a/lib/Analysis/AssumptionCache.cpp
+++ b/lib/Analysis/AssumptionCache.cpp
@@ -1,9 +1,8 @@
//===- AssumptionCache.cpp - Cache finding @llvm.assume calls -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -54,11 +53,11 @@ AssumptionCache::getOrInsertAffectedValues(Value *V) {
return AVIP.first->second;
}
-void AssumptionCache::updateAffectedValues(CallInst *CI) {
+static void findAffectedValues(CallInst *CI,
+ SmallVectorImpl<Value *> &Affected) {
// Note: This code must be kept in-sync with the code in
// computeKnownBitsFromAssume in ValueTracking.
- SmallVector<Value *, 16> Affected;
auto AddAffected = [&Affected](Value *V) {
if (isa<Argument>(V)) {
Affected.push_back(V);
@@ -109,6 +108,11 @@ void AssumptionCache::updateAffectedValues(CallInst *CI) {
AddAffectedFromEq(B);
}
}
+}
+
+void AssumptionCache::updateAffectedValues(CallInst *CI) {
+ SmallVector<Value *, 16> Affected;
+ findAffectedValues(CI, Affected);
for (auto &AV : Affected) {
auto &AVV = getOrInsertAffectedValues(AV);
@@ -117,6 +121,18 @@ void AssumptionCache::updateAffectedValues(CallInst *CI) {
}
}
+void AssumptionCache::unregisterAssumption(CallInst *CI) {
+ SmallVector<Value *, 16> Affected;
+ findAffectedValues(CI, Affected);
+
+ for (auto &AV : Affected) {
+ auto AVI = AffectedValues.find_as(AV);
+ if (AVI != AffectedValues.end())
+ AffectedValues.erase(AVI);
+ }
+ remove_if(AssumeHandles, [CI](WeakTrackingVH &VH) { return CI == VH; });
+}
+
void AssumptionCache::AffectedValueCallbackVH::deleted() {
auto AVI = AC->AffectedValues.find(getValPtr());
if (AVI != AC->AffectedValues.end())
@@ -241,6 +257,13 @@ AssumptionCache &AssumptionCacheTracker::getAssumptionCache(Function &F) {
return *IP.first->second;
}
+AssumptionCache *AssumptionCacheTracker::lookupAssumptionCache(Function &F) {
+ auto I = AssumptionCaches.find_as(&F);
+ if (I != AssumptionCaches.end())
+ return I->second.get();
+ return nullptr;
+}
+
void AssumptionCacheTracker::verifyAnalysis() const {
// FIXME: In the long term the verifier should not be controllable with a
// flag. We should either fix all passes to correctly update the assumption
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index 332eeaa00e73..3721c99883b8 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -1,9 +1,8 @@
//===- BasicAliasAnalysis.cpp - Stateless Alias Analysis Impl -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -117,25 +116,44 @@ bool BasicAAResult::invalidate(Function &Fn, const PreservedAnalyses &PA,
/// Returns true if the pointer is to a function-local object that never
/// escapes from the function.
-static bool isNonEscapingLocalObject(const Value *V) {
+static bool isNonEscapingLocalObject(
+ const Value *V,
+ SmallDenseMap<const Value *, bool, 8> *IsCapturedCache = nullptr) {
+ SmallDenseMap<const Value *, bool, 8>::iterator CacheIt;
+ if (IsCapturedCache) {
+ bool Inserted;
+ std::tie(CacheIt, Inserted) = IsCapturedCache->insert({V, false});
+ if (!Inserted)
+ // Found cached result, return it!
+ return CacheIt->second;
+ }
+
// If this is a local allocation, check to see if it escapes.
- if (isa<AllocaInst>(V) || isNoAliasCall(V))
+ if (isa<AllocaInst>(V) || isNoAliasCall(V)) {
// Set StoreCaptures to True so that we can assume in our callers that the
// pointer is not the result of a load instruction. Currently
// PointerMayBeCaptured doesn't have any special analysis for the
// StoreCaptures=false case; if it did, our callers could be refined to be
// more precise.
- return !PointerMayBeCaptured(V, false, /*StoreCaptures=*/true);
+ auto Ret = !PointerMayBeCaptured(V, false, /*StoreCaptures=*/true);
+ if (IsCapturedCache)
+ CacheIt->second = Ret;
+ return Ret;
+ }
// If this is an argument that corresponds to a byval or noalias argument,
// then it has not escaped before entering the function. Check if it escapes
// inside the function.
if (const Argument *A = dyn_cast<Argument>(V))
- if (A->hasByValAttr() || A->hasNoAliasAttr())
+ if (A->hasByValAttr() || A->hasNoAliasAttr()) {
// Note even if the argument is marked nocapture, we still need to check
// for copies made inside the function. The nocapture attribute only
// specifies that there are no copies made that outlive the function.
- return !PointerMayBeCaptured(V, false, /*StoreCaptures=*/true);
+ auto Ret = !PointerMayBeCaptured(V, false, /*StoreCaptures=*/true);
+ if (IsCapturedCache)
+ CacheIt->second = Ret;
+ return Ret;
+ }
return false;
}
@@ -613,7 +631,7 @@ bool BasicAAResult::DecomposeGEPExpression(const Value *V,
/// the function, with global constants being considered local to all
/// functions.
bool BasicAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
- bool OrLocal) {
+ AAQueryInfo &AAQI, bool OrLocal) {
assert(Visited.empty() && "Visited must be cleared after use!");
unsigned MaxLookup = 8;
@@ -623,7 +641,7 @@ bool BasicAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
const Value *V = GetUnderlyingObject(Worklist.pop_back_val(), DL);
if (!Visited.insert(V).second) {
Visited.clear();
- return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
+ return AAResultBase::pointsToConstantMemory(Loc, AAQI, OrLocal);
}
// An alloca instruction defines local memory.
@@ -637,7 +655,7 @@ bool BasicAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
// others. GV may even be a declaration, not a definition.
if (!GV->isConstant()) {
Visited.clear();
- return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
+ return AAResultBase::pointsToConstantMemory(Loc, AAQI, OrLocal);
}
continue;
}
@@ -655,7 +673,7 @@ bool BasicAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
// Don't bother inspecting phi nodes with many operands.
if (PN->getNumIncomingValues() > MaxLookup) {
Visited.clear();
- return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
+ return AAResultBase::pointsToConstantMemory(Loc, AAQI, OrLocal);
}
for (Value *IncValue : PN->incoming_values())
Worklist.push_back(IncValue);
@@ -664,7 +682,7 @@ bool BasicAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
// Otherwise be conservative.
Visited.clear();
- return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
+ return AAResultBase::pointsToConstantMemory(Loc, AAQI, OrLocal);
} while (!Worklist.empty() && --MaxLookup);
Visited.clear();
@@ -799,24 +817,25 @@ static bool notDifferentParent(const Value *O1, const Value *O2) {
#endif
AliasResult BasicAAResult::alias(const MemoryLocation &LocA,
- const MemoryLocation &LocB) {
+ const MemoryLocation &LocB,
+ AAQueryInfo &AAQI) {
assert(notDifferentParent(LocA.Ptr, LocB.Ptr) &&
"BasicAliasAnalysis doesn't support interprocedural queries.");
// If we have a directly cached entry for these locations, we have recursed
// through this once, so just return the cached results. Notably, when this
// happens, we don't clear the cache.
- auto CacheIt = AliasCache.find(LocPair(LocA, LocB));
- if (CacheIt != AliasCache.end())
+ auto CacheIt = AAQI.AliasCache.find(AAQueryInfo::LocPair(LocA, LocB));
+ if (CacheIt != AAQI.AliasCache.end())
+ return CacheIt->second;
+
+ CacheIt = AAQI.AliasCache.find(AAQueryInfo::LocPair(LocB, LocA));
+ if (CacheIt != AAQI.AliasCache.end())
return CacheIt->second;
AliasResult Alias = aliasCheck(LocA.Ptr, LocA.Size, LocA.AATags, LocB.Ptr,
- LocB.Size, LocB.AATags);
- // AliasCache rarely has more than 1 or 2 elements, always use
- // shrink_and_clear so it quickly returns to the inline capacity of the
- // SmallDenseMap if it ever grows larger.
- // FIXME: This should really be shrink_to_inline_capacity_and_clear().
- AliasCache.shrink_and_clear();
+ LocB.Size, LocB.AATags, AAQI);
+
VisitedPhiBBs.clear();
return Alias;
}
@@ -828,7 +847,8 @@ AliasResult BasicAAResult::alias(const MemoryLocation &LocA,
/// say much about this query. We do, however, use simple "address taken"
/// analysis on local objects.
ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call,
- const MemoryLocation &Loc) {
+ const MemoryLocation &Loc,
+ AAQueryInfo &AAQI) {
assert(notDifferentParent(Call, Loc.Ptr) &&
"AliasAnalysis query involving multiple functions!");
@@ -855,7 +875,7 @@ ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call,
// then the call can not mod/ref the pointer unless the call takes the pointer
// as an argument, and itself doesn't capture it.
if (!isa<Constant>(Object) && Call != Object &&
- isNonEscapingLocalObject(Object)) {
+ isNonEscapingLocalObject(Object, &AAQI.IsCapturedCache)) {
// Optimistically assume that call doesn't touch Object and check this
// assumption in the following loop.
@@ -881,11 +901,11 @@ ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call,
// If this is a no-capture pointer argument, see if we can tell that it
// is impossible to alias the pointer we're checking.
- AliasResult AR =
- getBestAAResults().alias(MemoryLocation(*CI), MemoryLocation(Object));
+ AliasResult AR = getBestAAResults().alias(MemoryLocation(*CI),
+ MemoryLocation(Object), AAQI);
if (AR != MustAlias)
IsMustAlias = false;
- // Operand doesnt alias 'Object', continue looking for other aliases
+ // Operand doesn't alias 'Object', continue looking for other aliases
if (AR == NoAlias)
continue;
// Operand aliases 'Object', but call doesn't modify it. Strengthen
@@ -928,7 +948,7 @@ ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call,
if (isMallocOrCallocLikeFn(Call, &TLI)) {
// Be conservative if the accessed pointer may alias the allocation -
// fallback to the generic handling below.
- if (getBestAAResults().alias(MemoryLocation(Call), Loc) == NoAlias)
+ if (getBestAAResults().alias(MemoryLocation(Call), Loc, AAQI) == NoAlias)
return ModRefInfo::NoModRef;
}
@@ -940,11 +960,11 @@ ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call,
AliasResult SrcAA, DestAA;
if ((SrcAA = getBestAAResults().alias(MemoryLocation::getForSource(Inst),
- Loc)) == MustAlias)
+ Loc, AAQI)) == MustAlias)
// Loc is exactly the memcpy source thus disjoint from memcpy dest.
return ModRefInfo::Ref;
if ((DestAA = getBestAAResults().alias(MemoryLocation::getForDest(Inst),
- Loc)) == MustAlias)
+ Loc, AAQI)) == MustAlias)
// The converse case.
return ModRefInfo::Mod;
@@ -1000,11 +1020,12 @@ ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call,
return ModRefInfo::Ref;
// The AAResultBase base class has some smarts, lets use them.
- return AAResultBase::getModRefInfo(Call, Loc);
+ return AAResultBase::getModRefInfo(Call, Loc, AAQI);
}
ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call1,
- const CallBase *Call2) {
+ const CallBase *Call2,
+ AAQueryInfo &AAQI) {
// While the assume intrinsic is marked as arbitrarily writing so that
// proper control dependencies will be maintained, it never aliases any
// particular memory location.
@@ -1020,7 +1041,7 @@ ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call1,
// heap state at the point the guard is issued needs to be consistent in case
// the guard invokes the "deopt" continuation.
- // NB! This function is *not* commutative, so we specical case two
+ // NB! This function is *not* commutative, so we special case two
// possibilities for guard intrinsics.
if (isIntrinsicCall(Call1, Intrinsic::experimental_guard))
@@ -1034,7 +1055,7 @@ ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call1,
: ModRefInfo::NoModRef;
// The AAResultBase base class has some smarts, lets use them.
- return AAResultBase::getModRefInfo(Call1, Call2);
+ return AAResultBase::getModRefInfo(Call1, Call2, AAQI);
}
/// Provide ad-hoc rules to disambiguate accesses through two GEP operators,
@@ -1266,11 +1287,10 @@ bool BasicAAResult::isGEPBaseAtNegativeOffset(const GEPOperator *GEPOp,
/// We know that V1 is a GEP, but we don't know anything about V2.
/// UnderlyingV1 is GetUnderlyingObject(GEP1, DL), UnderlyingV2 is the same for
/// V2.
-AliasResult
-BasicAAResult::aliasGEP(const GEPOperator *GEP1, LocationSize V1Size,
- const AAMDNodes &V1AAInfo, const Value *V2,
- LocationSize V2Size, const AAMDNodes &V2AAInfo,
- const Value *UnderlyingV1, const Value *UnderlyingV2) {
+AliasResult BasicAAResult::aliasGEP(
+ const GEPOperator *GEP1, LocationSize V1Size, const AAMDNodes &V1AAInfo,
+ const Value *V2, LocationSize V2Size, const AAMDNodes &V2AAInfo,
+ const Value *UnderlyingV1, const Value *UnderlyingV2, AAQueryInfo &AAQI) {
DecomposedGEP DecompGEP1, DecompGEP2;
unsigned MaxPointerSize = getMaxPointerSize(DL);
DecompGEP1.StructOffset = DecompGEP1.OtherOffset = APInt(MaxPointerSize, 0);
@@ -1306,14 +1326,14 @@ BasicAAResult::aliasGEP(const GEPOperator *GEP1, LocationSize V1Size,
// Do the base pointers alias?
AliasResult BaseAlias =
aliasCheck(UnderlyingV1, LocationSize::unknown(), AAMDNodes(),
- UnderlyingV2, LocationSize::unknown(), AAMDNodes());
+ UnderlyingV2, LocationSize::unknown(), AAMDNodes(), AAQI);
// Check for geps of non-aliasing underlying pointers where the offsets are
// identical.
if ((BaseAlias == MayAlias) && V1Size == V2Size) {
// Do the base pointers alias assuming type and size.
- AliasResult PreciseBaseAlias = aliasCheck(UnderlyingV1, V1Size, V1AAInfo,
- UnderlyingV2, V2Size, V2AAInfo);
+ AliasResult PreciseBaseAlias = aliasCheck(
+ UnderlyingV1, V1Size, V1AAInfo, UnderlyingV2, V2Size, V2AAInfo, AAQI);
if (PreciseBaseAlias == NoAlias) {
// See if the computed offset from the common pointer tells us about the
// relation of the resulting pointer.
@@ -1368,9 +1388,9 @@ BasicAAResult::aliasGEP(const GEPOperator *GEP1, LocationSize V1Size,
if (V1Size == LocationSize::unknown() && V2Size == LocationSize::unknown())
return MayAlias;
- AliasResult R =
- aliasCheck(UnderlyingV1, LocationSize::unknown(), AAMDNodes(), V2,
- LocationSize::unknown(), V2AAInfo, nullptr, UnderlyingV2);
+ AliasResult R = aliasCheck(UnderlyingV1, LocationSize::unknown(),
+ AAMDNodes(), V2, LocationSize::unknown(),
+ V2AAInfo, AAQI, nullptr, UnderlyingV2);
if (R != MustAlias) {
// If V2 may alias GEP base pointer, conservatively returns MayAlias.
// If V2 is known not to alias GEP base pointer, then the two values
@@ -1504,37 +1524,35 @@ static AliasResult MergeAliasResults(AliasResult A, AliasResult B) {
/// Provides a bunch of ad-hoc rules to disambiguate a Select instruction
/// against another.
-AliasResult BasicAAResult::aliasSelect(const SelectInst *SI,
- LocationSize SISize,
- const AAMDNodes &SIAAInfo,
- const Value *V2, LocationSize V2Size,
- const AAMDNodes &V2AAInfo,
- const Value *UnderV2) {
+AliasResult
+BasicAAResult::aliasSelect(const SelectInst *SI, LocationSize SISize,
+ const AAMDNodes &SIAAInfo, const Value *V2,
+ LocationSize V2Size, const AAMDNodes &V2AAInfo,
+ const Value *UnderV2, AAQueryInfo &AAQI) {
// If the values are Selects with the same condition, we can do a more precise
// check: just check for aliases between the values on corresponding arms.
if (const SelectInst *SI2 = dyn_cast<SelectInst>(V2))
if (SI->getCondition() == SI2->getCondition()) {
- AliasResult Alias = aliasCheck(SI->getTrueValue(), SISize, SIAAInfo,
- SI2->getTrueValue(), V2Size, V2AAInfo);
+ AliasResult Alias =
+ aliasCheck(SI->getTrueValue(), SISize, SIAAInfo, SI2->getTrueValue(),
+ V2Size, V2AAInfo, AAQI);
if (Alias == MayAlias)
return MayAlias;
AliasResult ThisAlias =
aliasCheck(SI->getFalseValue(), SISize, SIAAInfo,
- SI2->getFalseValue(), V2Size, V2AAInfo);
+ SI2->getFalseValue(), V2Size, V2AAInfo, AAQI);
return MergeAliasResults(ThisAlias, Alias);
}
// If both arms of the Select node NoAlias or MustAlias V2, then returns
// NoAlias / MustAlias. Otherwise, returns MayAlias.
- AliasResult Alias =
- aliasCheck(V2, V2Size, V2AAInfo, SI->getTrueValue(),
- SISize, SIAAInfo, UnderV2);
+ AliasResult Alias = aliasCheck(V2, V2Size, V2AAInfo, SI->getTrueValue(),
+ SISize, SIAAInfo, AAQI, UnderV2);
if (Alias == MayAlias)
return MayAlias;
- AliasResult ThisAlias =
- aliasCheck(V2, V2Size, V2AAInfo, SI->getFalseValue(), SISize, SIAAInfo,
- UnderV2);
+ AliasResult ThisAlias = aliasCheck(V2, V2Size, V2AAInfo, SI->getFalseValue(),
+ SISize, SIAAInfo, AAQI, UnderV2);
return MergeAliasResults(ThisAlias, Alias);
}
@@ -1544,7 +1562,7 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, LocationSize PNSize,
const AAMDNodes &PNAAInfo, const Value *V2,
LocationSize V2Size,
const AAMDNodes &V2AAInfo,
- const Value *UnderV2) {
+ const Value *UnderV2, AAQueryInfo &AAQI) {
// Track phi nodes we have visited. We use this information when we determine
// value equivalence.
VisitedPhiBBs.insert(PN->getParent());
@@ -1554,8 +1572,8 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, LocationSize PNSize,
// on corresponding edges.
if (const PHINode *PN2 = dyn_cast<PHINode>(V2))
if (PN2->getParent() == PN->getParent()) {
- LocPair Locs(MemoryLocation(PN, PNSize, PNAAInfo),
- MemoryLocation(V2, V2Size, V2AAInfo));
+ AAQueryInfo::LocPair Locs(MemoryLocation(PN, PNSize, PNAAInfo),
+ MemoryLocation(V2, V2Size, V2AAInfo));
if (PN > V2)
std::swap(Locs.first, Locs.second);
// Analyse the PHIs' inputs under the assumption that the PHIs are
@@ -1566,25 +1584,33 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, LocationSize PNSize,
// that causes a MayAlias.
// Pretend the phis do not alias.
AliasResult Alias = NoAlias;
- assert(AliasCache.count(Locs) &&
- "There must exist an entry for the phi node");
- AliasResult OrigAliasResult = AliasCache[Locs];
- AliasCache[Locs] = NoAlias;
+ AliasResult OrigAliasResult;
+ {
+ // Limited lifetime iterator invalidated by the aliasCheck call below.
+ auto CacheIt = AAQI.AliasCache.find(Locs);
+ assert((CacheIt != AAQI.AliasCache.end()) &&
+ "There must exist an entry for the phi node");
+ OrigAliasResult = CacheIt->second;
+ CacheIt->second = NoAlias;
+ }
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
AliasResult ThisAlias =
aliasCheck(PN->getIncomingValue(i), PNSize, PNAAInfo,
PN2->getIncomingValueForBlock(PN->getIncomingBlock(i)),
- V2Size, V2AAInfo);
+ V2Size, V2AAInfo, AAQI);
Alias = MergeAliasResults(ThisAlias, Alias);
if (Alias == MayAlias)
break;
}
// Reset if speculation failed.
- if (Alias != NoAlias)
- AliasCache[Locs] = OrigAliasResult;
-
+ if (Alias != NoAlias) {
+ auto Pair =
+ AAQI.AliasCache.insert(std::make_pair(Locs, OrigAliasResult));
+ assert(!Pair.second && "Entry must have existed");
+ Pair.first->second = OrigAliasResult;
+ }
return Alias;
}
@@ -1658,9 +1684,8 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, LocationSize PNSize,
if (isRecursive)
PNSize = LocationSize::unknown();
- AliasResult Alias =
- aliasCheck(V2, V2Size, V2AAInfo, V1Srcs[0],
- PNSize, PNAAInfo, UnderV2);
+ AliasResult Alias = aliasCheck(V2, V2Size, V2AAInfo, V1Srcs[0], PNSize,
+ PNAAInfo, AAQI, UnderV2);
// Early exit if the check of the first PHI source against V2 is MayAlias.
// Other results are not possible.
@@ -1673,7 +1698,7 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, LocationSize PNSize,
Value *V = V1Srcs[i];
AliasResult ThisAlias =
- aliasCheck(V2, V2Size, V2AAInfo, V, PNSize, PNAAInfo, UnderV2);
+ aliasCheck(V2, V2Size, V2AAInfo, V, PNSize, PNAAInfo, AAQI, UnderV2);
Alias = MergeAliasResults(ThisAlias, Alias);
if (Alias == MayAlias)
break;
@@ -1687,7 +1712,8 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, LocationSize PNSize,
AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size,
AAMDNodes V1AAInfo, const Value *V2,
LocationSize V2Size, AAMDNodes V2AAInfo,
- const Value *O1, const Value *O2) {
+ AAQueryInfo &AAQI, const Value *O1,
+ const Value *O2) {
// If either of the memory references is empty, it doesn't matter what the
// pointer values are.
if (V1Size.isZero() || V2Size.isZero())
@@ -1755,9 +1781,11 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size,
// temporary store the nocapture argument's value in a temporary memory
// location if that memory location doesn't escape. Or it may pass a
// nocapture value to other functions as long as they don't capture it.
- if (isEscapeSource(O1) && isNonEscapingLocalObject(O2))
+ if (isEscapeSource(O1) &&
+ isNonEscapingLocalObject(O2, &AAQI.IsCapturedCache))
return NoAlias;
- if (isEscapeSource(O2) && isNonEscapingLocalObject(O1))
+ if (isEscapeSource(O2) &&
+ isNonEscapingLocalObject(O1, &AAQI.IsCapturedCache))
return NoAlias;
}
@@ -1772,12 +1800,12 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size,
// Check the cache before climbing up use-def chains. This also terminates
// otherwise infinitely recursive queries.
- LocPair Locs(MemoryLocation(V1, V1Size, V1AAInfo),
- MemoryLocation(V2, V2Size, V2AAInfo));
+ AAQueryInfo::LocPair Locs(MemoryLocation(V1, V1Size, V1AAInfo),
+ MemoryLocation(V2, V2Size, V2AAInfo));
if (V1 > V2)
std::swap(Locs.first, Locs.second);
- std::pair<AliasCacheTy::iterator, bool> Pair =
- AliasCache.insert(std::make_pair(Locs, MayAlias));
+ std::pair<AAQueryInfo::AliasCacheT::iterator, bool> Pair =
+ AAQI.AliasCache.try_emplace(Locs, MayAlias);
if (!Pair.second)
return Pair.first->second;
@@ -1791,9 +1819,13 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size,
}
if (const GEPOperator *GV1 = dyn_cast<GEPOperator>(V1)) {
AliasResult Result =
- aliasGEP(GV1, V1Size, V1AAInfo, V2, V2Size, V2AAInfo, O1, O2);
- if (Result != MayAlias)
- return AliasCache[Locs] = Result;
+ aliasGEP(GV1, V1Size, V1AAInfo, V2, V2Size, V2AAInfo, O1, O2, AAQI);
+ if (Result != MayAlias) {
+ auto ItInsPair = AAQI.AliasCache.insert(std::make_pair(Locs, Result));
+ assert(!ItInsPair.second && "Entry must have existed");
+ ItInsPair.first->second = Result;
+ return Result;
+ }
}
if (isa<PHINode>(V2) && !isa<PHINode>(V1)) {
@@ -1803,10 +1835,13 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size,
std::swap(V1AAInfo, V2AAInfo);
}
if (const PHINode *PN = dyn_cast<PHINode>(V1)) {
- AliasResult Result = aliasPHI(PN, V1Size, V1AAInfo,
- V2, V2Size, V2AAInfo, O2);
- if (Result != MayAlias)
- return AliasCache[Locs] = Result;
+ AliasResult Result =
+ aliasPHI(PN, V1Size, V1AAInfo, V2, V2Size, V2AAInfo, O2, AAQI);
+ if (Result != MayAlias) {
+ Pair = AAQI.AliasCache.try_emplace(Locs, Result);
+ assert(!Pair.second && "Entry must have existed");
+ return Pair.first->second = Result;
+ }
}
if (isa<SelectInst>(V2) && !isa<SelectInst>(V1)) {
@@ -1817,9 +1852,12 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size,
}
if (const SelectInst *S1 = dyn_cast<SelectInst>(V1)) {
AliasResult Result =
- aliasSelect(S1, V1Size, V1AAInfo, V2, V2Size, V2AAInfo, O2);
- if (Result != MayAlias)
- return AliasCache[Locs] = Result;
+ aliasSelect(S1, V1Size, V1AAInfo, V2, V2Size, V2AAInfo, O2, AAQI);
+ if (Result != MayAlias) {
+ Pair = AAQI.AliasCache.try_emplace(Locs, Result);
+ assert(!Pair.second && "Entry must have existed");
+ return Pair.first->second = Result;
+ }
}
// If both pointers are pointing into the same object and one of them
@@ -1827,14 +1865,19 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size,
if (O1 == O2)
if (V1Size.isPrecise() && V2Size.isPrecise() &&
(isObjectSize(O1, V1Size.getValue(), DL, TLI, NullIsValidLocation) ||
- isObjectSize(O2, V2Size.getValue(), DL, TLI, NullIsValidLocation)))
- return AliasCache[Locs] = PartialAlias;
+ isObjectSize(O2, V2Size.getValue(), DL, TLI, NullIsValidLocation))) {
+ Pair = AAQI.AliasCache.try_emplace(Locs, PartialAlias);
+ assert(!Pair.second && "Entry must have existed");
+ return Pair.first->second = PartialAlias;
+ }
// Recurse back into the best AA results we have, potentially with refined
// memory locations. We have already ensured that BasicAA has a MayAlias
// cache result for these, so any recursion back into BasicAA won't loop.
- AliasResult Result = getBestAAResults().alias(Locs.first, Locs.second);
- return AliasCache[Locs] = Result;
+ AliasResult Result = getBestAAResults().alias(Locs.first, Locs.second, AAQI);
+ Pair = AAQI.AliasCache.try_emplace(Locs, Result);
+ assert(!Pair.second && "Entry must have existed");
+ return Pair.first->second = Result;
}
/// Check whether two Values can be considered equivalent.
@@ -1863,7 +1906,7 @@ bool BasicAAResult::isValueEqualInPotentialCycles(const Value *V,
// the Values cannot come from different iterations of a potential cycle the
// phi nodes could be involved in.
for (auto *P : VisitedPhiBBs)
- if (isPotentiallyReachable(&P->front(), Inst, DT, LI))
+ if (isPotentiallyReachable(&P->front(), Inst, nullptr, DT, LI))
return false;
return true;
diff --git a/lib/Analysis/BlockFrequencyInfo.cpp b/lib/Analysis/BlockFrequencyInfo.cpp
index ef27c36517ea..de183bbde173 100644
--- a/lib/Analysis/BlockFrequencyInfo.cpp
+++ b/lib/Analysis/BlockFrequencyInfo.cpp
@@ -1,9 +1,8 @@
//===- BlockFrequencyInfo.cpp - Block Frequency Analysis ------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -204,11 +203,12 @@ BlockFrequency BlockFrequencyInfo::getBlockFreq(const BasicBlock *BB) const {
}
Optional<uint64_t>
-BlockFrequencyInfo::getBlockProfileCount(const BasicBlock *BB) const {
+BlockFrequencyInfo::getBlockProfileCount(const BasicBlock *BB,
+ bool AllowSynthetic) const {
if (!BFI)
return None;
- return BFI->getBlockProfileCount(*getFunction(), BB);
+ return BFI->getBlockProfileCount(*getFunction(), BB, AllowSynthetic);
}
Optional<uint64_t>
diff --git a/lib/Analysis/BlockFrequencyInfoImpl.cpp b/lib/Analysis/BlockFrequencyInfoImpl.cpp
index 08ebcc47a807..0db6dd04a7e8 100644
--- a/lib/Analysis/BlockFrequencyInfoImpl.cpp
+++ b/lib/Analysis/BlockFrequencyInfoImpl.cpp
@@ -1,9 +1,8 @@
//===- BlockFrequencyImplInfo.cpp - Block Frequency Info Implementation ---===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -558,14 +557,17 @@ BlockFrequencyInfoImplBase::getBlockFreq(const BlockNode &Node) const {
Optional<uint64_t>
BlockFrequencyInfoImplBase::getBlockProfileCount(const Function &F,
- const BlockNode &Node) const {
- return getProfileCountFromFreq(F, getBlockFreq(Node).getFrequency());
+ const BlockNode &Node,
+ bool AllowSynthetic) const {
+ return getProfileCountFromFreq(F, getBlockFreq(Node).getFrequency(),
+ AllowSynthetic);
}
Optional<uint64_t>
BlockFrequencyInfoImplBase::getProfileCountFromFreq(const Function &F,
- uint64_t Freq) const {
- auto EntryCount = F.getEntryCount();
+ uint64_t Freq,
+ bool AllowSynthetic) const {
+ auto EntryCount = F.getEntryCount(AllowSynthetic);
if (!EntryCount)
return None;
// Use 128 bit APInt to do the arithmetic to avoid overflow.
diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp
index 7f544b27fe9d..5eb95003f5d8 100644
--- a/lib/Analysis/BranchProbabilityInfo.cpp
+++ b/lib/Analysis/BranchProbabilityInfo.cpp
@@ -1,9 +1,8 @@
//===- BranchProbabilityInfo.cpp - Branch Probability Analysis ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -661,8 +660,14 @@ bool BranchProbabilityInfo::calcZeroHeuristics(const BasicBlock *BB,
if (!CI)
return false;
+ auto GetConstantInt = [](Value *V) {
+ if (auto *I = dyn_cast<BitCastInst>(V))
+ return dyn_cast<ConstantInt>(I->getOperand(0));
+ return dyn_cast<ConstantInt>(V);
+ };
+
Value *RHS = CI->getOperand(1);
- ConstantInt *CV = dyn_cast<ConstantInt>(RHS);
+ ConstantInt *CV = GetConstantInt(RHS);
if (!CV)
return false;
diff --git a/lib/Analysis/CFG.cpp b/lib/Analysis/CFG.cpp
index aa880a62b754..18b83d6838cc 100644
--- a/lib/Analysis/CFG.cpp
+++ b/lib/Analysis/CFG.cpp
@@ -1,9 +1,8 @@
//===-- CFG.cpp - BasicBlock analysis --------------------------------------==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -13,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/CFG.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/Dominators.h"
@@ -120,22 +120,33 @@ static const Loop *getOutermostLoop(const LoopInfo *LI, const BasicBlock *BB) {
return L;
}
-// True if there is a loop which contains both BB1 and BB2.
-static bool loopContainsBoth(const LoopInfo *LI,
- const BasicBlock *BB1, const BasicBlock *BB2) {
- const Loop *L1 = getOutermostLoop(LI, BB1);
- const Loop *L2 = getOutermostLoop(LI, BB2);
- return L1 != nullptr && L1 == L2;
-}
-
bool llvm::isPotentiallyReachableFromMany(
SmallVectorImpl<BasicBlock *> &Worklist, BasicBlock *StopBB,
- const DominatorTree *DT, const LoopInfo *LI) {
+ const SmallPtrSetImpl<BasicBlock *> *ExclusionSet, const DominatorTree *DT,
+ const LoopInfo *LI) {
// When the stop block is unreachable, it's dominated from everywhere,
// regardless of whether there's a path between the two blocks.
if (DT && !DT->isReachableFromEntry(StopBB))
DT = nullptr;
+ // We can't skip directly from a block that dominates the stop block if the
+ // exclusion block is potentially in between.
+ if (ExclusionSet && !ExclusionSet->empty())
+ DT = nullptr;
+
+ // Normally any block in a loop is reachable from any other block in a loop,
+ // however excluded blocks might partition the body of a loop to make that
+ // untrue.
+ SmallPtrSet<const Loop *, 8> LoopsWithHoles;
+ if (LI && ExclusionSet) {
+ for (auto BB : *ExclusionSet) {
+ if (const Loop *L = getOutermostLoop(LI, BB))
+ LoopsWithHoles.insert(L);
+ }
+ }
+
+ const Loop *StopLoop = LI ? getOutermostLoop(LI, StopBB) : nullptr;
+
// Limit the number of blocks we visit. The goal is to avoid run-away compile
// times on large CFGs without hampering sensible code. Arbitrarily chosen.
unsigned Limit = 32;
@@ -146,10 +157,23 @@ bool llvm::isPotentiallyReachableFromMany(
continue;
if (BB == StopBB)
return true;
+ if (ExclusionSet && ExclusionSet->count(BB))
+ continue;
if (DT && DT->dominates(BB, StopBB))
return true;
- if (LI && loopContainsBoth(LI, BB, StopBB))
- return true;
+
+ const Loop *Outer = nullptr;
+ if (LI) {
+ Outer = getOutermostLoop(LI, BB);
+ // If we're in a loop with a hole, not all blocks in the loop are
+ // reachable from all other blocks. That implies we can't simply jump to
+ // the loop's exit blocks, as that exit might need to pass through an
+ // excluded block. Clear Outer so we process BB's successors.
+ if (LoopsWithHoles.count(Outer))
+ Outer = nullptr;
+ if (StopLoop && Outer == StopLoop)
+ return true;
+ }
if (!--Limit) {
// We haven't been able to prove it one way or the other. Conservatively
@@ -157,7 +181,7 @@ bool llvm::isPotentiallyReachableFromMany(
return true;
}
- if (const Loop *Outer = LI ? getOutermostLoop(LI, BB) : nullptr) {
+ if (Outer) {
// All blocks in a single loop are reachable from all other blocks. From
// any of these blocks, we can skip directly to the exits of the loop,
// ignoring any other blocks inside the loop body.
@@ -181,11 +205,13 @@ bool llvm::isPotentiallyReachable(const BasicBlock *A, const BasicBlock *B,
Worklist.push_back(const_cast<BasicBlock*>(A));
return isPotentiallyReachableFromMany(Worklist, const_cast<BasicBlock *>(B),
- DT, LI);
+ nullptr, DT, LI);
}
-bool llvm::isPotentiallyReachable(const Instruction *A, const Instruction *B,
- const DominatorTree *DT, const LoopInfo *LI) {
+bool llvm::isPotentiallyReachable(
+ const Instruction *A, const Instruction *B,
+ const SmallPtrSetImpl<BasicBlock *> *ExclusionSet, const DominatorTree *DT,
+ const LoopInfo *LI) {
assert(A->getParent()->getParent() == B->getParent()->getParent() &&
"This analysis is function-local!");
@@ -227,11 +253,20 @@ bool llvm::isPotentiallyReachable(const Instruction *A, const Instruction *B,
Worklist.push_back(const_cast<BasicBlock*>(A->getParent()));
}
- if (A->getParent() == &A->getParent()->getParent()->getEntryBlock())
- return true;
- if (B->getParent() == &A->getParent()->getParent()->getEntryBlock())
- return false;
+ if (DT) {
+ if (DT->isReachableFromEntry(A->getParent()) &&
+ !DT->isReachableFromEntry(B->getParent()))
+ return false;
+ if (!ExclusionSet || ExclusionSet->empty()) {
+ if (A->getParent() == &A->getParent()->getParent()->getEntryBlock() &&
+ DT->isReachableFromEntry(B->getParent()))
+ return true;
+ if (B->getParent() == &A->getParent()->getParent()->getEntryBlock() &&
+ DT->isReachableFromEntry(A->getParent()))
+ return false;
+ }
+ }
return isPotentiallyReachableFromMany(
- Worklist, const_cast<BasicBlock *>(B->getParent()), DT, LI);
+ Worklist, const_cast<BasicBlock *>(B->getParent()), ExclusionSet, DT, LI);
}
diff --git a/lib/Analysis/CFGPrinter.cpp b/lib/Analysis/CFGPrinter.cpp
index 6d01e9d5d447..619b675b58d8 100644
--- a/lib/Analysis/CFGPrinter.cpp
+++ b/lib/Analysis/CFGPrinter.cpp
@@ -1,9 +1,8 @@
//===- CFGPrinter.cpp - DOT printer for the control flow graph ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Analysis/CFLAndersAliasAnalysis.cpp b/lib/Analysis/CFLAndersAliasAnalysis.cpp
index 1c61dd369a05..690e514d4f5c 100644
--- a/lib/Analysis/CFLAndersAliasAnalysis.cpp
+++ b/lib/Analysis/CFLAndersAliasAnalysis.cpp
@@ -1,9 +1,8 @@
//===- CFLAndersAliasAnalysis.cpp - Unification-based Alias Analysis ------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -613,7 +612,7 @@ static void initializeWorkList(std::vector<WorkListItem> &WorkList,
for (unsigned I = 0, E = ValueInfo.getNumLevels(); I < E; ++I) {
auto Src = InstantiatedValue{Val, I};
// If there's an assignment edge from X to Y, it means Y is reachable from
- // X at S2 and X is reachable from Y at S1
+ // X at S3 and X is reachable from Y at S1
for (auto &Edge : ValueInfo.getNodeInfoAtLevel(I).Edges) {
propagate(Edge.Other, Src, MatchState::FlowFromReadOnly, ReachSet,
WorkList);
@@ -876,7 +875,8 @@ AliasResult CFLAndersAAResult::query(const MemoryLocation &LocA,
}
AliasResult CFLAndersAAResult::alias(const MemoryLocation &LocA,
- const MemoryLocation &LocB) {
+ const MemoryLocation &LocB,
+ AAQueryInfo &AAQI) {
if (LocA.Ptr == LocB.Ptr)
return MustAlias;
@@ -886,11 +886,11 @@ AliasResult CFLAndersAAResult::alias(const MemoryLocation &LocA,
// ConstantExpr, but every query needs to have at least one Value tied to a
// Function, and neither GlobalValues nor ConstantExprs are.
if (isa<Constant>(LocA.Ptr) && isa<Constant>(LocB.Ptr))
- return AAResultBase::alias(LocA, LocB);
+ return AAResultBase::alias(LocA, LocB, AAQI);
AliasResult QueryResult = query(LocA, LocB);
if (QueryResult == MayAlias)
- return AAResultBase::alias(LocA, LocB);
+ return AAResultBase::alias(LocA, LocB, AAQI);
return QueryResult;
}
diff --git a/lib/Analysis/CFLGraph.h b/lib/Analysis/CFLGraph.h
index 12121d717433..21842ed36487 100644
--- a/lib/Analysis/CFLGraph.h
+++ b/lib/Analysis/CFLGraph.h
@@ -1,9 +1,8 @@
//===- CFLGraph.h - Abstract stratified sets implementation. -----*- C++-*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -25,7 +24,6 @@
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
@@ -154,7 +152,7 @@ public:
}
};
-///A builder class used to create CFLGraph instance from a given function
+/// A builder class used to create CFLGraph instance from a given function
/// The CFL-AA that uses this builder must provide its own type as a template
/// argument. This is necessary for interprocedural processing: CFLGraphBuilder
/// needs a way of obtaining the summary of other functions when callinsts are
@@ -183,24 +181,23 @@ template <typename CFLAA> class CFLGraphBuilder {
static bool hasUsefulEdges(ConstantExpr *CE) {
// ConstantExpr doesn't have terminators, invokes, or fences, so only
- // needs
- // to check for compares.
+ // needs to check for compares.
return CE->getOpcode() != Instruction::ICmp &&
CE->getOpcode() != Instruction::FCmp;
}
// Returns possible functions called by CS into the given SmallVectorImpl.
// Returns true if targets found, false otherwise.
- static bool getPossibleTargets(CallSite CS,
+ static bool getPossibleTargets(CallBase &Call,
SmallVectorImpl<Function *> &Output) {
- if (auto *Fn = CS.getCalledFunction()) {
+ if (auto *Fn = Call.getCalledFunction()) {
Output.push_back(Fn);
return true;
}
// TODO: If the call is indirect, we might be able to enumerate all
- // potential
- // targets of the call and return them, rather than just failing.
+ // potential targets of the call and return them, rather than just
+ // failing.
return false;
}
@@ -294,6 +291,11 @@ template <typename CFLAA> class CFLGraphBuilder {
addAssignEdge(Op2, &Inst);
}
+ void visitUnaryOperator(UnaryOperator &Inst) {
+ auto *Src = Inst.getOperand(0);
+ addAssignEdge(Src, &Inst);
+ }
+
void visitAtomicCmpXchgInst(AtomicCmpXchgInst &Inst) {
auto *Ptr = Inst.getPointerOperand();
auto *Val = Inst.getNewValOperand();
@@ -370,11 +372,11 @@ template <typename CFLAA> class CFLGraphBuilder {
return !Fn->hasExactDefinition();
}
- bool tryInterproceduralAnalysis(CallSite CS,
+ bool tryInterproceduralAnalysis(CallBase &Call,
const SmallVectorImpl<Function *> &Fns) {
assert(Fns.size() > 0);
- if (CS.arg_size() > MaxSupportedArgsInSummary)
+ if (Call.arg_size() > MaxSupportedArgsInSummary)
return false;
// Exit early if we'll fail anyway
@@ -382,7 +384,7 @@ template <typename CFLAA> class CFLGraphBuilder {
if (isFunctionExternal(Fn) || Fn->isVarArg())
return false;
// Fail if the caller does not provide enough arguments
- assert(Fn->arg_size() <= CS.arg_size());
+ assert(Fn->arg_size() <= Call.arg_size());
if (!AA.getAliasSummary(*Fn))
return false;
}
@@ -393,7 +395,7 @@ template <typename CFLAA> class CFLGraphBuilder {
auto &RetParamRelations = Summary->RetParamRelations;
for (auto &Relation : RetParamRelations) {
- auto IRelation = instantiateExternalRelation(Relation, CS);
+ auto IRelation = instantiateExternalRelation(Relation, Call);
if (IRelation.hasValue()) {
Graph.addNode(IRelation->From);
Graph.addNode(IRelation->To);
@@ -403,7 +405,7 @@ template <typename CFLAA> class CFLGraphBuilder {
auto &RetParamAttributes = Summary->RetParamAttributes;
for (auto &Attribute : RetParamAttributes) {
- auto IAttr = instantiateExternalAttribute(Attribute, CS);
+ auto IAttr = instantiateExternalAttribute(Attribute, Call);
if (IAttr.hasValue())
Graph.addNode(IAttr->IValue, IAttr->Attr);
}
@@ -412,37 +414,35 @@ template <typename CFLAA> class CFLGraphBuilder {
return true;
}
- void visitCallSite(CallSite CS) {
- auto Inst = CS.getInstruction();
-
+ void visitCallBase(CallBase &Call) {
// Make sure all arguments and return value are added to the graph first
- for (Value *V : CS.args())
+ for (Value *V : Call.args())
if (V->getType()->isPointerTy())
addNode(V);
- if (Inst->getType()->isPointerTy())
- addNode(Inst);
+ if (Call.getType()->isPointerTy())
+ addNode(&Call);
// Check if Inst is a call to a library function that
// allocates/deallocates on the heap. Those kinds of functions do not
// introduce any aliases.
// TODO: address other common library functions such as realloc(),
// strdup(), etc.
- if (isMallocOrCallocLikeFn(Inst, &TLI) || isFreeCall(Inst, &TLI))
+ if (isMallocOrCallocLikeFn(&Call, &TLI) || isFreeCall(&Call, &TLI))
return;
// TODO: Add support for noalias args/all the other fun function
// attributes that we can tack on.
SmallVector<Function *, 4> Targets;
- if (getPossibleTargets(CS, Targets))
- if (tryInterproceduralAnalysis(CS, Targets))
+ if (getPossibleTargets(Call, Targets))
+ if (tryInterproceduralAnalysis(Call, Targets))
return;
// Because the function is opaque, we need to note that anything
// could have happened to the arguments (unless the function is marked
// readonly or readnone), and that the result could alias just about
// anything, too (unless the result is marked noalias).
- if (!CS.onlyReadsMemory())
- for (Value *V : CS.args()) {
+ if (!Call.onlyReadsMemory())
+ for (Value *V : Call.args()) {
if (V->getType()->isPointerTy()) {
// The argument itself escapes.
Graph.addAttr(InstantiatedValue{V, 0}, getAttrEscaped());
@@ -453,12 +453,12 @@ template <typename CFLAA> class CFLGraphBuilder {
}
}
- if (Inst->getType()->isPointerTy()) {
- auto *Fn = CS.getCalledFunction();
+ if (Call.getType()->isPointerTy()) {
+ auto *Fn = Call.getCalledFunction();
if (Fn == nullptr || !Fn->returnDoesNotAlias())
// No need to call addNode() since we've added Inst at the
// beginning of this function and we know it is not a global.
- Graph.addAttr(InstantiatedValue{Inst, 0}, getAttrUnknown());
+ Graph.addAttr(InstantiatedValue{&Call, 0}, getAttrUnknown());
}
}
@@ -559,6 +559,7 @@ template <typename CFLAA> class CFLGraphBuilder {
}
case Instruction::Add:
+ case Instruction::FAdd:
case Instruction::Sub:
case Instruction::FSub:
case Instruction::Mul:
@@ -583,6 +584,11 @@ template <typename CFLAA> class CFLGraphBuilder {
break;
}
+ case Instruction::FNeg: {
+ addAssignEdge(CE->getOperand(0), CE);
+ break;
+ }
+
default:
llvm_unreachable("Unknown instruction type encountered!");
}
diff --git a/lib/Analysis/CFLSteensAliasAnalysis.cpp b/lib/Analysis/CFLSteensAliasAnalysis.cpp
index 30ce13578e54..44b1834f70bf 100644
--- a/lib/Analysis/CFLSteensAliasAnalysis.cpp
+++ b/lib/Analysis/CFLSteensAliasAnalysis.cpp
@@ -1,9 +1,8 @@
//===- CFLSteensAliasAnalysis.cpp - Unification-based Alias Analysis ------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Analysis/CGSCCPassManager.cpp b/lib/Analysis/CGSCCPassManager.cpp
index fd2292ced017..a0b3f83cca6a 100644
--- a/lib/Analysis/CGSCCPassManager.cpp
+++ b/lib/Analysis/CGSCCPassManager.cpp
@@ -1,9 +1,8 @@
//===- CGSCCPassManager.cpp - Managing & running CGSCC passes -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -111,6 +110,12 @@ PassManager<LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &,
// ...getContext().yield();
}
+ // Before we mark all of *this* SCC's analyses as preserved below, intersect
+ // this with the cross-SCC preserved analysis set. This is used to allow
+ // CGSCC passes to mutate ancestor SCCs and still trigger proper invalidation
+ // for them.
+ UR.CrossSCCPA.intersect(PA);
+
// Invalidation was handled after each pass in the above loop for the current
// SCC. Therefore, the remaining analysis results in the AnalysisManager are
// preserved. We mark this with a set so that we don't need to inspect each
diff --git a/lib/Analysis/CallGraph.cpp b/lib/Analysis/CallGraph.cpp
index 0da678e1611b..ec5e94d499be 100644
--- a/lib/Analysis/CallGraph.cpp
+++ b/lib/Analysis/CallGraph.cpp
@@ -1,9 +1,8 @@
//===- CallGraph.cpp - Build a Module's call graph ------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -11,7 +10,6 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Config/llvm-config.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Intrinsics.h"
@@ -64,25 +62,25 @@ void CallGraph::addToCallGraph(Function *F) {
// If this function has external linkage or has its address taken, anything
// could call it.
if (!F->hasLocalLinkage() || F->hasAddressTaken())
- ExternalCallingNode->addCalledFunction(CallSite(), Node);
+ ExternalCallingNode->addCalledFunction(nullptr, Node);
// If this function is not defined in this translation unit, it could call
// anything.
if (F->isDeclaration() && !F->isIntrinsic())
- Node->addCalledFunction(CallSite(), CallsExternalNode.get());
+ Node->addCalledFunction(nullptr, CallsExternalNode.get());
// Look for calls by this function.
for (BasicBlock &BB : *F)
for (Instruction &I : BB) {
- if (auto CS = CallSite(&I)) {
- const Function *Callee = CS.getCalledFunction();
+ if (auto *Call = dyn_cast<CallBase>(&I)) {
+ const Function *Callee = Call->getCalledFunction();
if (!Callee || !Intrinsic::isLeaf(Callee->getIntrinsicID()))
// Indirect calls of intrinsics are not allowed so no need to check.
// We can be more precise here by using TargetArg returned by
// Intrinsic::isLeaf.
- Node->addCalledFunction(CS, CallsExternalNode.get());
+ Node->addCalledFunction(Call, CallsExternalNode.get());
else if (!Callee->isIntrinsic())
- Node->addCalledFunction(CS, getOrInsertFunction(Callee));
+ Node->addCalledFunction(Call, getOrInsertFunction(Callee));
}
}
}
@@ -185,10 +183,10 @@ LLVM_DUMP_METHOD void CallGraphNode::dump() const { print(dbgs()); }
/// removeCallEdgeFor - This method removes the edge in the node for the
/// specified call site. Note that this method takes linear time, so it
/// should be used sparingly.
-void CallGraphNode::removeCallEdgeFor(CallSite CS) {
+void CallGraphNode::removeCallEdgeFor(CallBase &Call) {
for (CalledFunctionsVector::iterator I = CalledFunctions.begin(); ; ++I) {
assert(I != CalledFunctions.end() && "Cannot find callsite to remove!");
- if (I->first == CS.getInstruction()) {
+ if (I->first == &Call) {
I->second->DropRef();
*I = CalledFunctions.back();
CalledFunctions.pop_back();
@@ -228,13 +226,13 @@ void CallGraphNode::removeOneAbstractEdgeTo(CallGraphNode *Callee) {
/// replaceCallEdge - This method replaces the edge in the node for the
/// specified call site with a new one. Note that this method takes linear
/// time, so it should be used sparingly.
-void CallGraphNode::replaceCallEdge(CallSite CS,
- CallSite NewCS, CallGraphNode *NewNode){
+void CallGraphNode::replaceCallEdge(CallBase &Call, CallBase &NewCall,
+ CallGraphNode *NewNode) {
for (CalledFunctionsVector::iterator I = CalledFunctions.begin(); ; ++I) {
assert(I != CalledFunctions.end() && "Cannot find callsite to remove!");
- if (I->first == CS.getInstruction()) {
+ if (I->first == &Call) {
I->second->DropRef();
- I->first = NewCS.getInstruction();
+ I->first = &NewCall;
I->second = NewNode;
NewNode->AddRef();
return;
diff --git a/lib/Analysis/CallGraphSCCPass.cpp b/lib/Analysis/CallGraphSCCPass.cpp
index 0aed57a39387..196ef400bc4e 100644
--- a/lib/Analysis/CallGraphSCCPass.cpp
+++ b/lib/Analysis/CallGraphSCCPass.cpp
@@ -1,9 +1,8 @@
//===- CallGraphSCCPass.cpp - Pass that operates BU on call graph ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -20,7 +19,6 @@
#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/CallGraph.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRPrintingPasses.h"
#include "llvm/IR/Intrinsics.h"
@@ -202,7 +200,7 @@ bool CGPassManager::RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC,
/// This never happens in checking mode.
bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
bool CheckingMode) {
- DenseMap<Value*, CallGraphNode*> CallSites;
+ DenseMap<Value *, CallGraphNode *> Calls;
LLVM_DEBUG(dbgs() << "CGSCCPASSMGR: Refreshing SCC with " << CurSCC.size()
<< " nodes:\n";
@@ -231,21 +229,21 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
for (CallGraphNode::iterator I = CGN->begin(), E = CGN->end(); I != E; ) {
// If this call site is null, then the function pass deleted the call
// entirely and the WeakTrackingVH nulled it out.
+ auto *Call = dyn_cast_or_null<CallBase>(I->first);
if (!I->first ||
// If we've already seen this call site, then the FunctionPass RAUW'd
// one call with another, which resulted in two "uses" in the edge
// list of the same call.
- CallSites.count(I->first) ||
+ Calls.count(I->first) ||
// If the call edge is not from a call or invoke, or it is a
// instrinsic call, then the function pass RAUW'd a call with
// another value. This can happen when constant folding happens
// of well known functions etc.
- !CallSite(I->first) ||
- (CallSite(I->first).getCalledFunction() &&
- CallSite(I->first).getCalledFunction()->isIntrinsic() &&
- Intrinsic::isLeaf(
- CallSite(I->first).getCalledFunction()->getIntrinsicID()))) {
+ !Call ||
+ (Call->getCalledFunction() &&
+ Call->getCalledFunction()->isIntrinsic() &&
+ Intrinsic::isLeaf(Call->getCalledFunction()->getIntrinsicID()))) {
assert(!CheckingMode &&
"CallGraphSCCPass did not update the CallGraph correctly!");
@@ -269,15 +267,14 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
continue;
}
- assert(!CallSites.count(I->first) &&
+ assert(!Calls.count(I->first) &&
"Call site occurs in node multiple times");
- CallSite CS(I->first);
- if (CS) {
- Function *Callee = CS.getCalledFunction();
+ if (Call) {
+ Function *Callee = Call->getCalledFunction();
// Ignore intrinsics because they're not really function calls.
if (!Callee || !(Callee->isIntrinsic()))
- CallSites.insert(std::make_pair(I->first, I->second));
+ Calls.insert(std::make_pair(I->first, I->second));
}
++I;
}
@@ -288,23 +285,25 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
for (BasicBlock &BB : *F)
for (Instruction &I : BB) {
- CallSite CS(&I);
- if (!CS) continue;
- Function *Callee = CS.getCalledFunction();
- if (Callee && Callee->isIntrinsic()) continue;
+ auto *Call = dyn_cast<CallBase>(&I);
+ if (!Call)
+ continue;
+ Function *Callee = Call->getCalledFunction();
+ if (Callee && Callee->isIntrinsic())
+ continue;
// If this call site already existed in the callgraph, just verify it
- // matches up to expectations and remove it from CallSites.
- DenseMap<Value*, CallGraphNode*>::iterator ExistingIt =
- CallSites.find(CS.getInstruction());
- if (ExistingIt != CallSites.end()) {
+ // matches up to expectations and remove it from Calls.
+ DenseMap<Value *, CallGraphNode *>::iterator ExistingIt =
+ Calls.find(Call);
+ if (ExistingIt != Calls.end()) {
CallGraphNode *ExistingNode = ExistingIt->second;
- // Remove from CallSites since we have now seen it.
- CallSites.erase(ExistingIt);
+ // Remove from Calls since we have now seen it.
+ Calls.erase(ExistingIt);
// Verify that the callee is right.
- if (ExistingNode->getFunction() == CS.getCalledFunction())
+ if (ExistingNode->getFunction() == Call->getCalledFunction())
continue;
// If we are in checking mode, we are not allowed to actually mutate
@@ -312,7 +311,7 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
// callgraph is less precise than it could be (e.g. an indirect call
// site could be turned direct), don't reject it in checking mode, and
// don't tweak it to be more precise.
- if (CheckingMode && CS.getCalledFunction() &&
+ if (CheckingMode && Call->getCalledFunction() &&
ExistingNode->getFunction() == nullptr)
continue;
@@ -322,7 +321,7 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
// If not, we either went from a direct call to indirect, indirect to
// direct, or direct to different direct.
CallGraphNode *CalleeNode;
- if (Function *Callee = CS.getCalledFunction()) {
+ if (Function *Callee = Call->getCalledFunction()) {
CalleeNode = CG.getOrInsertFunction(Callee);
// Keep track of whether we turned an indirect call into a direct
// one.
@@ -336,7 +335,7 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
}
// Update the edge target in CGN.
- CGN->replaceCallEdge(CS, CS, CalleeNode);
+ CGN->replaceCallEdge(*Call, *Call, CalleeNode);
MadeChange = true;
continue;
}
@@ -346,7 +345,7 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
// If the call site didn't exist in the CGN yet, add it.
CallGraphNode *CalleeNode;
- if (Function *Callee = CS.getCalledFunction()) {
+ if (Function *Callee = Call->getCalledFunction()) {
CalleeNode = CG.getOrInsertFunction(Callee);
++NumDirectAdded;
} else {
@@ -354,7 +353,7 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
++NumIndirectAdded;
}
- CGN->addCalledFunction(CS, CalleeNode);
+ CGN->addCalledFunction(Call, CalleeNode);
MadeChange = true;
}
@@ -376,12 +375,12 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
// they are dangling pointers. WeakTrackingVH should save us for this, so
// abort if
// this happens.
- assert(CallSites.empty() && "Dangling pointers found in call sites map");
+ assert(Calls.empty() && "Dangling pointers found in call sites map");
// Periodically do an explicit clear to remove tombstones when processing
// large scc's.
if ((FunctionNo & 15) == 15)
- CallSites.clear();
+ Calls.clear();
}
LLVM_DEBUG(if (MadeChange) {
@@ -682,11 +681,28 @@ Pass *CallGraphSCCPass::createPrinterPass(raw_ostream &OS,
return new PrintCallGraphPass(Banner, OS);
}
+static std::string getDescription(const CallGraphSCC &SCC) {
+ std::string Desc = "SCC (";
+ bool First = true;
+ for (CallGraphNode *CGN : SCC) {
+ if (First)
+ First = false;
+ else
+ Desc += ", ";
+ Function *F = CGN->getFunction();
+ if (F)
+ Desc += F->getName();
+ else
+ Desc += "<<null function>>";
+ }
+ Desc += ")";
+ return Desc;
+}
+
bool CallGraphSCCPass::skipSCC(CallGraphSCC &SCC) const {
- return !SCC.getCallGraph().getModule()
- .getContext()
- .getOptPassGate()
- .shouldRunPass(this, SCC);
+ OptPassGate &Gate =
+ SCC.getCallGraph().getModule().getContext().getOptPassGate();
+ return Gate.isEnabled() && !Gate.shouldRunPass(this, getDescription(SCC));
}
char DummyCGSCCPass::ID = 0;
diff --git a/lib/Analysis/CallPrinter.cpp b/lib/Analysis/CallPrinter.cpp
index e7017e77652a..d24cbd104bf6 100644
--- a/lib/Analysis/CallPrinter.cpp
+++ b/lib/Analysis/CallPrinter.cpp
@@ -1,9 +1,8 @@
//===- CallPrinter.cpp - DOT printer for call graph -----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Analysis/CaptureTracking.cpp b/lib/Analysis/CaptureTracking.cpp
index 669f4f2835fa..adaa83a6c443 100644
--- a/lib/Analysis/CaptureTracking.cpp
+++ b/lib/Analysis/CaptureTracking.cpp
@@ -1,9 +1,8 @@
//===--- CaptureTracking.cpp - Determine whether a pointer is captured ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -102,14 +101,14 @@ namespace {
SmallVector<BasicBlock*, 32> Worklist;
Worklist.append(succ_begin(BB), succ_end(BB));
- return !isPotentiallyReachableFromMany(Worklist, BB, DT);
+ return !isPotentiallyReachableFromMany(Worklist, BB, nullptr, DT);
}
// If the value is defined in the same basic block as use and BeforeHere,
// there is no need to explore the use if BeforeHere dominates use.
// Check whether there is a path from I to BeforeHere.
if (BeforeHere != I && DT->dominates(BeforeHere, I) &&
- !isPotentiallyReachable(I, BeforeHere, DT))
+ !isPotentiallyReachable(I, BeforeHere, nullptr, DT))
return true;
return false;
@@ -331,14 +330,32 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker,
AddUses(I);
break;
case Instruction::ICmp: {
- // Don't count comparisons of a no-alias return value against null as
- // captures. This allows us to ignore comparisons of malloc results
- // with null, for example.
- if (ConstantPointerNull *CPN =
- dyn_cast<ConstantPointerNull>(I->getOperand(1)))
+ if (auto *CPN = dyn_cast<ConstantPointerNull>(I->getOperand(1))) {
+ // Don't count comparisons of a no-alias return value against null as
+ // captures. This allows us to ignore comparisons of malloc results
+ // with null, for example.
if (CPN->getType()->getAddressSpace() == 0)
if (isNoAliasCall(V->stripPointerCasts()))
break;
+ if (!I->getFunction()->nullPointerIsDefined()) {
+ auto *O = I->getOperand(0)->stripPointerCastsSameRepresentation();
+ // An inbounds GEP can either be a valid pointer (pointing into
+ // or to the end of an allocation), or be null in the default
+ // address space. So for an inbounds GEPs there is no way to let
+ // the pointer escape using clever GEP hacking because doing so
+ // would make the pointer point outside of the allocated object
+ // and thus make the GEP result a poison value.
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(O))
+ if (GEP->isInBounds())
+ break;
+ // Comparing a dereferenceable_or_null argument against null
+ // cannot lead to pointer escapes, because if it is not null it
+ // must be a valid (in-bounds) pointer.
+ bool CanBeNull;
+ if (O->getPointerDereferenceableBytes(I->getModule()->getDataLayout(), CanBeNull))
+ break;
+ }
+ }
// Comparison against value stored in global variable. Given the pointer
// does not escape, its value cannot be guessed and stored separately in a
// global variable.
diff --git a/lib/Analysis/CmpInstAnalysis.cpp b/lib/Analysis/CmpInstAnalysis.cpp
index 27071babec5c..a5757be2c4f4 100644
--- a/lib/Analysis/CmpInstAnalysis.cpp
+++ b/lib/Analysis/CmpInstAnalysis.cpp
@@ -1,9 +1,8 @@
//===- CmpInstAnalysis.cpp - Utils to help fold compares ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Analysis/CodeMetrics.cpp b/lib/Analysis/CodeMetrics.cpp
index 46cc87d2b178..627d955c865f 100644
--- a/lib/Analysis/CodeMetrics.cpp
+++ b/lib/Analysis/CodeMetrics.cpp
@@ -1,9 +1,8 @@
//===- CodeMetrics.cpp - Code cost measurements ---------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -16,7 +15,6 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
#include "llvm/Support/Debug.h"
@@ -126,14 +124,12 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB,
continue;
// Special handling for calls.
- if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
- ImmutableCallSite CS(&I);
-
- if (const Function *F = CS.getCalledFunction()) {
+ if (const auto *Call = dyn_cast<CallBase>(&I)) {
+ if (const Function *F = Call->getCalledFunction()) {
// If a function is both internal and has a single use, then it is
// extremely likely to get inlined in the future (it was probably
// exposed by an interleaved devirtualization pass).
- if (!CS.isNoInline() && F->hasInternalLinkage() && F->hasOneUse())
+ if (!Call->isNoInline() && F->hasInternalLinkage() && F->hasOneUse())
++NumInlineCandidates;
// If this call is to function itself, then the function is recursive.
@@ -148,7 +144,7 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB,
} else {
// We don't want inline asm to count as a call - that would prevent loop
// unrolling. The argument setup cost is still real, though.
- if (!isa<InlineAsm>(CS.getCalledValue()))
+ if (!Call->isInlineAsm())
++NumCalls;
}
}
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index 5da29d6d2372..20231ca78b45 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -1,9 +1,8 @@
//===-- ConstantFolding.cpp - Fold instructions into constants ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -26,6 +25,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/VectorUtils.h"
#include "llvm/Config/config.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
@@ -516,7 +516,7 @@ Constant *FoldReinterpretLoadFromConstPtr(Constant *C, Type *LoadTy,
MapTy = Type::getInt64Ty(C->getContext());
else if (LoadTy->isVectorTy()) {
MapTy = PointerType::getIntNTy(C->getContext(),
- DL.getTypeAllocSizeInBits(LoadTy));
+ DL.getTypeSizeInBits(LoadTy));
} else
return nullptr;
@@ -1000,7 +1000,9 @@ Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, unsigned Opcode,
const TargetLibraryInfo *TLI) {
Type *DestTy = InstOrCE->getType();
- // Handle easy binops first.
+ if (Instruction::isUnaryOp(Opcode))
+ return ConstantFoldUnaryOpOperand(Opcode, Ops[0], DL);
+
if (Instruction::isBinaryOp(Opcode))
return ConstantFoldBinaryOpOperands(Opcode, Ops[0], Ops[1], DL);
@@ -1025,15 +1027,18 @@ Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, unsigned Opcode,
case Instruction::FCmp: llvm_unreachable("Invalid for compares");
case Instruction::Call:
if (auto *F = dyn_cast<Function>(Ops.back())) {
- ImmutableCallSite CS(cast<CallInst>(InstOrCE));
- if (canConstantFoldCallTo(CS, F))
- return ConstantFoldCall(CS, F, Ops.slice(0, Ops.size() - 1), TLI);
+ const auto *Call = cast<CallBase>(InstOrCE);
+ if (canConstantFoldCallTo(Call, F))
+ return ConstantFoldCall(Call, F, Ops.slice(0, Ops.size() - 1), TLI);
}
return nullptr;
case Instruction::Select:
return ConstantExpr::getSelect(Ops[0], Ops[1], Ops[2]);
case Instruction::ExtractElement:
return ConstantExpr::getExtractElement(Ops[0], Ops[1]);
+ case Instruction::ExtractValue:
+ return ConstantExpr::getExtractValue(
+ Ops[0], dyn_cast<ExtractValueInst>(InstOrCE)->getIndices());
case Instruction::InsertElement:
return ConstantExpr::getInsertElement(Ops[0], Ops[1], Ops[2]);
case Instruction::ShuffleVector:
@@ -1263,6 +1268,13 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
return ConstantExpr::getCompare(Predicate, Ops0, Ops1);
}
+Constant *llvm::ConstantFoldUnaryOpOperand(unsigned Opcode, Constant *Op,
+ const DataLayout &DL) {
+ assert(Instruction::isUnaryOp(Opcode));
+
+ return ConstantExpr::get(Opcode, Op);
+}
+
Constant *llvm::ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS,
Constant *RHS,
const DataLayout &DL) {
@@ -1367,8 +1379,8 @@ llvm::ConstantFoldLoadThroughGEPIndices(Constant *C,
// Constant Folding for Calls
//
-bool llvm::canConstantFoldCallTo(ImmutableCallSite CS, const Function *F) {
- if (CS.isNoBuiltin() || CS.isStrictFP())
+bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
+ if (Call->isNoBuiltin() || Call->isStrictFP())
return false;
switch (F->getIntrinsicID()) {
case Intrinsic::fabs:
@@ -1414,6 +1426,8 @@ bool llvm::canConstantFoldCallTo(ImmutableCallSite CS, const Function *F) {
case Intrinsic::uadd_sat:
case Intrinsic::ssub_sat:
case Intrinsic::usub_sat:
+ case Intrinsic::smul_fix:
+ case Intrinsic::smul_fix_sat:
case Intrinsic::convert_from_fp16:
case Intrinsic::convert_to_fp16:
case Intrinsic::bitreverse:
@@ -1518,14 +1532,12 @@ bool llvm::canConstantFoldCallTo(ImmutableCallSite CS, const Function *F) {
namespace {
Constant *GetConstantFoldFPValue(double V, Type *Ty) {
- if (Ty->isHalfTy()) {
+ if (Ty->isHalfTy() || Ty->isFloatTy()) {
APFloat APF(V);
bool unused;
- APF.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &unused);
+ APF.convert(Ty->getFltSemantics(), APFloat::rmNearestTiesToEven, &unused);
return ConstantFP::get(Ty->getContext(), APF);
}
- if (Ty->isFloatTy())
- return ConstantFP::get(Ty->getContext(), APFloat((float)V));
if (Ty->isDoubleTy())
return ConstantFP::get(Ty->getContext(), APFloat(V));
llvm_unreachable("Can only constant fold half/float/double");
@@ -1641,522 +1653,538 @@ static bool getConstIntOrUndef(Value *Op, const APInt *&C) {
return false;
}
-Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty,
- ArrayRef<Constant *> Operands,
- const TargetLibraryInfo *TLI,
- ImmutableCallSite CS) {
- if (Operands.size() == 1) {
- if (IntrinsicID == Intrinsic::is_constant) {
- // We know we have a "Constant" argument. But we want to only
- // return true for manifest constants, not those that depend on
- // constants with unknowable values, e.g. GlobalValue or BlockAddress.
- if (isManifestConstant(Operands[0]))
- return ConstantInt::getTrue(Ty->getContext());
- return nullptr;
- }
- if (isa<UndefValue>(Operands[0])) {
- // cosine(arg) is between -1 and 1. cosine(invalid arg) is NaN.
- // ctpop() is between 0 and bitwidth, pick 0 for undef.
- if (IntrinsicID == Intrinsic::cos ||
- IntrinsicID == Intrinsic::ctpop)
- return Constant::getNullValue(Ty);
- if (IntrinsicID == Intrinsic::bswap ||
- IntrinsicID == Intrinsic::bitreverse ||
- IntrinsicID == Intrinsic::launder_invariant_group ||
- IntrinsicID == Intrinsic::strip_invariant_group)
- return Operands[0];
- }
+static Constant *ConstantFoldScalarCall1(StringRef Name,
+ Intrinsic::ID IntrinsicID,
+ Type *Ty,
+ ArrayRef<Constant *> Operands,
+ const TargetLibraryInfo *TLI,
+ const CallBase *Call) {
+ assert(Operands.size() == 1 && "Wrong number of operands.");
+
+ if (IntrinsicID == Intrinsic::is_constant) {
+ // We know we have a "Constant" argument. But we want to only
+ // return true for manifest constants, not those that depend on
+ // constants with unknowable values, e.g. GlobalValue or BlockAddress.
+ if (isManifestConstant(Operands[0]))
+ return ConstantInt::getTrue(Ty->getContext());
+ return nullptr;
+ }
+ if (isa<UndefValue>(Operands[0])) {
+ // cosine(arg) is between -1 and 1. cosine(invalid arg) is NaN.
+ // ctpop() is between 0 and bitwidth, pick 0 for undef.
+ if (IntrinsicID == Intrinsic::cos ||
+ IntrinsicID == Intrinsic::ctpop)
+ return Constant::getNullValue(Ty);
+ if (IntrinsicID == Intrinsic::bswap ||
+ IntrinsicID == Intrinsic::bitreverse ||
+ IntrinsicID == Intrinsic::launder_invariant_group ||
+ IntrinsicID == Intrinsic::strip_invariant_group)
+ return Operands[0];
+ }
- if (isa<ConstantPointerNull>(Operands[0])) {
- // launder(null) == null == strip(null) iff in addrspace 0
- if (IntrinsicID == Intrinsic::launder_invariant_group ||
- IntrinsicID == Intrinsic::strip_invariant_group) {
- // If instruction is not yet put in a basic block (e.g. when cloning
- // a function during inlining), CS caller may not be available.
- // So check CS's BB first before querying CS.getCaller.
- const Function *Caller = CS.getParent() ? CS.getCaller() : nullptr;
- if (Caller &&
- !NullPointerIsDefined(
- Caller, Operands[0]->getType()->getPointerAddressSpace())) {
- return Operands[0];
- }
- return nullptr;
+ if (isa<ConstantPointerNull>(Operands[0])) {
+ // launder(null) == null == strip(null) iff in addrspace 0
+ if (IntrinsicID == Intrinsic::launder_invariant_group ||
+ IntrinsicID == Intrinsic::strip_invariant_group) {
+ // If instruction is not yet put in a basic block (e.g. when cloning
+ // a function during inlining), Call's caller may not be available.
+ // So check Call's BB first before querying Call->getCaller.
+ const Function *Caller =
+ Call->getParent() ? Call->getCaller() : nullptr;
+ if (Caller &&
+ !NullPointerIsDefined(
+ Caller, Operands[0]->getType()->getPointerAddressSpace())) {
+ return Operands[0];
}
+ return nullptr;
}
+ }
- if (auto *Op = dyn_cast<ConstantFP>(Operands[0])) {
- if (IntrinsicID == Intrinsic::convert_to_fp16) {
- APFloat Val(Op->getValueAPF());
-
- bool lost = false;
- Val.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &lost);
+ if (auto *Op = dyn_cast<ConstantFP>(Operands[0])) {
+ if (IntrinsicID == Intrinsic::convert_to_fp16) {
+ APFloat Val(Op->getValueAPF());
- return ConstantInt::get(Ty->getContext(), Val.bitcastToAPInt());
- }
+ bool lost = false;
+ Val.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &lost);
- if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy())
- return nullptr;
+ return ConstantInt::get(Ty->getContext(), Val.bitcastToAPInt());
+ }
- if (IntrinsicID == Intrinsic::round) {
- APFloat V = Op->getValueAPF();
- V.roundToIntegral(APFloat::rmNearestTiesToAway);
- return ConstantFP::get(Ty->getContext(), V);
- }
+ if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy())
+ return nullptr;
- if (IntrinsicID == Intrinsic::floor) {
- APFloat V = Op->getValueAPF();
- V.roundToIntegral(APFloat::rmTowardNegative);
- return ConstantFP::get(Ty->getContext(), V);
- }
+ if (IntrinsicID == Intrinsic::round) {
+ APFloat V = Op->getValueAPF();
+ V.roundToIntegral(APFloat::rmNearestTiesToAway);
+ return ConstantFP::get(Ty->getContext(), V);
+ }
- if (IntrinsicID == Intrinsic::ceil) {
- APFloat V = Op->getValueAPF();
- V.roundToIntegral(APFloat::rmTowardPositive);
- return ConstantFP::get(Ty->getContext(), V);
- }
+ if (IntrinsicID == Intrinsic::floor) {
+ APFloat V = Op->getValueAPF();
+ V.roundToIntegral(APFloat::rmTowardNegative);
+ return ConstantFP::get(Ty->getContext(), V);
+ }
- if (IntrinsicID == Intrinsic::trunc) {
- APFloat V = Op->getValueAPF();
- V.roundToIntegral(APFloat::rmTowardZero);
- return ConstantFP::get(Ty->getContext(), V);
- }
+ if (IntrinsicID == Intrinsic::ceil) {
+ APFloat V = Op->getValueAPF();
+ V.roundToIntegral(APFloat::rmTowardPositive);
+ return ConstantFP::get(Ty->getContext(), V);
+ }
- if (IntrinsicID == Intrinsic::rint) {
- APFloat V = Op->getValueAPF();
- V.roundToIntegral(APFloat::rmNearestTiesToEven);
- return ConstantFP::get(Ty->getContext(), V);
- }
+ if (IntrinsicID == Intrinsic::trunc) {
+ APFloat V = Op->getValueAPF();
+ V.roundToIntegral(APFloat::rmTowardZero);
+ return ConstantFP::get(Ty->getContext(), V);
+ }
- if (IntrinsicID == Intrinsic::nearbyint) {
- APFloat V = Op->getValueAPF();
- V.roundToIntegral(APFloat::rmNearestTiesToEven);
- return ConstantFP::get(Ty->getContext(), V);
- }
+ if (IntrinsicID == Intrinsic::rint) {
+ APFloat V = Op->getValueAPF();
+ V.roundToIntegral(APFloat::rmNearestTiesToEven);
+ return ConstantFP::get(Ty->getContext(), V);
+ }
- /// We only fold functions with finite arguments. Folding NaN and inf is
- /// likely to be aborted with an exception anyway, and some host libms
- /// have known errors raising exceptions.
- if (Op->getValueAPF().isNaN() || Op->getValueAPF().isInfinity())
- return nullptr;
+ if (IntrinsicID == Intrinsic::nearbyint) {
+ APFloat V = Op->getValueAPF();
+ V.roundToIntegral(APFloat::rmNearestTiesToEven);
+ return ConstantFP::get(Ty->getContext(), V);
+ }
- /// Currently APFloat versions of these functions do not exist, so we use
- /// the host native double versions. Float versions are not called
- /// directly but for all these it is true (float)(f((double)arg)) ==
- /// f(arg). Long double not supported yet.
- double V = getValueAsDouble(Op);
+ /// We only fold functions with finite arguments. Folding NaN and inf is
+ /// likely to be aborted with an exception anyway, and some host libms
+ /// have known errors raising exceptions.
+ if (Op->getValueAPF().isNaN() || Op->getValueAPF().isInfinity())
+ return nullptr;
- switch (IntrinsicID) {
- default: break;
- case Intrinsic::fabs:
- return ConstantFoldFP(fabs, V, Ty);
- case Intrinsic::log2:
- return ConstantFoldFP(Log2, V, Ty);
- case Intrinsic::log:
- return ConstantFoldFP(log, V, Ty);
- case Intrinsic::log10:
- return ConstantFoldFP(log10, V, Ty);
- case Intrinsic::exp:
- return ConstantFoldFP(exp, V, Ty);
- case Intrinsic::exp2:
- return ConstantFoldFP(exp2, V, Ty);
- case Intrinsic::sin:
- return ConstantFoldFP(sin, V, Ty);
- case Intrinsic::cos:
- return ConstantFoldFP(cos, V, Ty);
- case Intrinsic::sqrt:
- return ConstantFoldFP(sqrt, V, Ty);
- }
+ /// Currently APFloat versions of these functions do not exist, so we use
+ /// the host native double versions. Float versions are not called
+ /// directly but for all these it is true (float)(f((double)arg)) ==
+ /// f(arg). Long double not supported yet.
+ double V = getValueAsDouble(Op);
- if (!TLI)
- return nullptr;
+ switch (IntrinsicID) {
+ default: break;
+ case Intrinsic::fabs:
+ return ConstantFoldFP(fabs, V, Ty);
+ case Intrinsic::log2:
+ return ConstantFoldFP(Log2, V, Ty);
+ case Intrinsic::log:
+ return ConstantFoldFP(log, V, Ty);
+ case Intrinsic::log10:
+ return ConstantFoldFP(log10, V, Ty);
+ case Intrinsic::exp:
+ return ConstantFoldFP(exp, V, Ty);
+ case Intrinsic::exp2:
+ return ConstantFoldFP(exp2, V, Ty);
+ case Intrinsic::sin:
+ return ConstantFoldFP(sin, V, Ty);
+ case Intrinsic::cos:
+ return ConstantFoldFP(cos, V, Ty);
+ case Intrinsic::sqrt:
+ return ConstantFoldFP(sqrt, V, Ty);
+ }
- char NameKeyChar = Name[0];
- if (Name[0] == '_' && Name.size() > 2 && Name[1] == '_')
- NameKeyChar = Name[2];
-
- switch (NameKeyChar) {
- case 'a':
- if ((Name == "acos" && TLI->has(LibFunc_acos)) ||
- (Name == "acosf" && TLI->has(LibFunc_acosf)) ||
- (Name == "__acos_finite" && TLI->has(LibFunc_acos_finite)) ||
- (Name == "__acosf_finite" && TLI->has(LibFunc_acosf_finite)))
- return ConstantFoldFP(acos, V, Ty);
- else if ((Name == "asin" && TLI->has(LibFunc_asin)) ||
- (Name == "asinf" && TLI->has(LibFunc_asinf)) ||
- (Name == "__asin_finite" && TLI->has(LibFunc_asin_finite)) ||
- (Name == "__asinf_finite" && TLI->has(LibFunc_asinf_finite)))
- return ConstantFoldFP(asin, V, Ty);
- else if ((Name == "atan" && TLI->has(LibFunc_atan)) ||
- (Name == "atanf" && TLI->has(LibFunc_atanf)))
- return ConstantFoldFP(atan, V, Ty);
- break;
- case 'c':
- if ((Name == "ceil" && TLI->has(LibFunc_ceil)) ||
- (Name == "ceilf" && TLI->has(LibFunc_ceilf)))
- return ConstantFoldFP(ceil, V, Ty);
- else if ((Name == "cos" && TLI->has(LibFunc_cos)) ||
- (Name == "cosf" && TLI->has(LibFunc_cosf)))
- return ConstantFoldFP(cos, V, Ty);
- else if ((Name == "cosh" && TLI->has(LibFunc_cosh)) ||
- (Name == "coshf" && TLI->has(LibFunc_coshf)) ||
- (Name == "__cosh_finite" && TLI->has(LibFunc_cosh_finite)) ||
- (Name == "__coshf_finite" && TLI->has(LibFunc_coshf_finite)))
- return ConstantFoldFP(cosh, V, Ty);
- break;
- case 'e':
- if ((Name == "exp" && TLI->has(LibFunc_exp)) ||
- (Name == "expf" && TLI->has(LibFunc_expf)) ||
- (Name == "__exp_finite" && TLI->has(LibFunc_exp_finite)) ||
- (Name == "__expf_finite" && TLI->has(LibFunc_expf_finite)))
- return ConstantFoldFP(exp, V, Ty);
- if ((Name == "exp2" && TLI->has(LibFunc_exp2)) ||
- (Name == "exp2f" && TLI->has(LibFunc_exp2f)) ||
- (Name == "__exp2_finite" && TLI->has(LibFunc_exp2_finite)) ||
- (Name == "__exp2f_finite" && TLI->has(LibFunc_exp2f_finite)))
- // Constant fold exp2(x) as pow(2,x) in case the host doesn't have a
- // C99 library.
- return ConstantFoldBinaryFP(pow, 2.0, V, Ty);
- break;
- case 'f':
- if ((Name == "fabs" && TLI->has(LibFunc_fabs)) ||
- (Name == "fabsf" && TLI->has(LibFunc_fabsf)))
- return ConstantFoldFP(fabs, V, Ty);
- else if ((Name == "floor" && TLI->has(LibFunc_floor)) ||
- (Name == "floorf" && TLI->has(LibFunc_floorf)))
- return ConstantFoldFP(floor, V, Ty);
- break;
- case 'l':
- if ((Name == "log" && V > 0 && TLI->has(LibFunc_log)) ||
- (Name == "logf" && V > 0 && TLI->has(LibFunc_logf)) ||
- (Name == "__log_finite" && V > 0 &&
- TLI->has(LibFunc_log_finite)) ||
- (Name == "__logf_finite" && V > 0 &&
- TLI->has(LibFunc_logf_finite)))
- return ConstantFoldFP(log, V, Ty);
- else if ((Name == "log10" && V > 0 && TLI->has(LibFunc_log10)) ||
- (Name == "log10f" && V > 0 && TLI->has(LibFunc_log10f)) ||
- (Name == "__log10_finite" && V > 0 &&
- TLI->has(LibFunc_log10_finite)) ||
- (Name == "__log10f_finite" && V > 0 &&
- TLI->has(LibFunc_log10f_finite)))
- return ConstantFoldFP(log10, V, Ty);
- break;
- case 'r':
- if ((Name == "round" && TLI->has(LibFunc_round)) ||
- (Name == "roundf" && TLI->has(LibFunc_roundf)))
- return ConstantFoldFP(round, V, Ty);
- break;
- case 's':
- if ((Name == "sin" && TLI->has(LibFunc_sin)) ||
- (Name == "sinf" && TLI->has(LibFunc_sinf)))
- return ConstantFoldFP(sin, V, Ty);
- else if ((Name == "sinh" && TLI->has(LibFunc_sinh)) ||
- (Name == "sinhf" && TLI->has(LibFunc_sinhf)) ||
- (Name == "__sinh_finite" && TLI->has(LibFunc_sinh_finite)) ||
- (Name == "__sinhf_finite" && TLI->has(LibFunc_sinhf_finite)))
- return ConstantFoldFP(sinh, V, Ty);
- else if ((Name == "sqrt" && V >= 0 && TLI->has(LibFunc_sqrt)) ||
- (Name == "sqrtf" && V >= 0 && TLI->has(LibFunc_sqrtf)))
- return ConstantFoldFP(sqrt, V, Ty);
- break;
- case 't':
- if ((Name == "tan" && TLI->has(LibFunc_tan)) ||
- (Name == "tanf" && TLI->has(LibFunc_tanf)))
- return ConstantFoldFP(tan, V, Ty);
- else if ((Name == "tanh" && TLI->has(LibFunc_tanh)) ||
- (Name == "tanhf" && TLI->has(LibFunc_tanhf)))
- return ConstantFoldFP(tanh, V, Ty);
- break;
- default:
- break;
- }
+ if (!TLI)
return nullptr;
- }
- if (auto *Op = dyn_cast<ConstantInt>(Operands[0])) {
- switch (IntrinsicID) {
- case Intrinsic::bswap:
- return ConstantInt::get(Ty->getContext(), Op->getValue().byteSwap());
- case Intrinsic::ctpop:
- return ConstantInt::get(Ty, Op->getValue().countPopulation());
- case Intrinsic::bitreverse:
- return ConstantInt::get(Ty->getContext(), Op->getValue().reverseBits());
- case Intrinsic::convert_from_fp16: {
- APFloat Val(APFloat::IEEEhalf(), Op->getValue());
-
- bool lost = false;
- APFloat::opStatus status = Val.convert(
- Ty->getFltSemantics(), APFloat::rmNearestTiesToEven, &lost);
-
- // Conversion is always precise.
- (void)status;
- assert(status == APFloat::opOK && !lost &&
- "Precision lost during fp16 constfolding");
-
- return ConstantFP::get(Ty->getContext(), Val);
- }
- default:
- return nullptr;
- }
- }
+ char NameKeyChar = Name[0];
+ if (Name[0] == '_' && Name.size() > 2 && Name[1] == '_')
+ NameKeyChar = Name[2];
- // Support ConstantVector in case we have an Undef in the top.
- if (isa<ConstantVector>(Operands[0]) ||
- isa<ConstantDataVector>(Operands[0])) {
- auto *Op = cast<Constant>(Operands[0]);
- switch (IntrinsicID) {
- default: break;
- case Intrinsic::x86_sse_cvtss2si:
- case Intrinsic::x86_sse_cvtss2si64:
- case Intrinsic::x86_sse2_cvtsd2si:
- case Intrinsic::x86_sse2_cvtsd2si64:
- if (ConstantFP *FPOp =
- dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
- return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
- /*roundTowardZero=*/false, Ty,
- /*IsSigned*/true);
- break;
- case Intrinsic::x86_sse_cvttss2si:
- case Intrinsic::x86_sse_cvttss2si64:
- case Intrinsic::x86_sse2_cvttsd2si:
- case Intrinsic::x86_sse2_cvttsd2si64:
- if (ConstantFP *FPOp =
- dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
- return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
- /*roundTowardZero=*/true, Ty,
- /*IsSigned*/true);
- break;
- }
+ switch (NameKeyChar) {
+ case 'a':
+ if ((Name == "acos" && TLI->has(LibFunc_acos)) ||
+ (Name == "acosf" && TLI->has(LibFunc_acosf)) ||
+ (Name == "__acos_finite" && TLI->has(LibFunc_acos_finite)) ||
+ (Name == "__acosf_finite" && TLI->has(LibFunc_acosf_finite)))
+ return ConstantFoldFP(acos, V, Ty);
+ else if ((Name == "asin" && TLI->has(LibFunc_asin)) ||
+ (Name == "asinf" && TLI->has(LibFunc_asinf)) ||
+ (Name == "__asin_finite" && TLI->has(LibFunc_asin_finite)) ||
+ (Name == "__asinf_finite" && TLI->has(LibFunc_asinf_finite)))
+ return ConstantFoldFP(asin, V, Ty);
+ else if ((Name == "atan" && TLI->has(LibFunc_atan)) ||
+ (Name == "atanf" && TLI->has(LibFunc_atanf)))
+ return ConstantFoldFP(atan, V, Ty);
+ break;
+ case 'c':
+ if ((Name == "ceil" && TLI->has(LibFunc_ceil)) ||
+ (Name == "ceilf" && TLI->has(LibFunc_ceilf)))
+ return ConstantFoldFP(ceil, V, Ty);
+ else if ((Name == "cos" && TLI->has(LibFunc_cos)) ||
+ (Name == "cosf" && TLI->has(LibFunc_cosf)))
+ return ConstantFoldFP(cos, V, Ty);
+ else if ((Name == "cosh" && TLI->has(LibFunc_cosh)) ||
+ (Name == "coshf" && TLI->has(LibFunc_coshf)) ||
+ (Name == "__cosh_finite" && TLI->has(LibFunc_cosh_finite)) ||
+ (Name == "__coshf_finite" && TLI->has(LibFunc_coshf_finite)))
+ return ConstantFoldFP(cosh, V, Ty);
+ break;
+ case 'e':
+ if ((Name == "exp" && TLI->has(LibFunc_exp)) ||
+ (Name == "expf" && TLI->has(LibFunc_expf)) ||
+ (Name == "__exp_finite" && TLI->has(LibFunc_exp_finite)) ||
+ (Name == "__expf_finite" && TLI->has(LibFunc_expf_finite)))
+ return ConstantFoldFP(exp, V, Ty);
+ if ((Name == "exp2" && TLI->has(LibFunc_exp2)) ||
+ (Name == "exp2f" && TLI->has(LibFunc_exp2f)) ||
+ (Name == "__exp2_finite" && TLI->has(LibFunc_exp2_finite)) ||
+ (Name == "__exp2f_finite" && TLI->has(LibFunc_exp2f_finite)))
+ // Constant fold exp2(x) as pow(2,x) in case the host doesn't have a
+ // C99 library.
+ return ConstantFoldBinaryFP(pow, 2.0, V, Ty);
+ break;
+ case 'f':
+ if ((Name == "fabs" && TLI->has(LibFunc_fabs)) ||
+ (Name == "fabsf" && TLI->has(LibFunc_fabsf)))
+ return ConstantFoldFP(fabs, V, Ty);
+ else if ((Name == "floor" && TLI->has(LibFunc_floor)) ||
+ (Name == "floorf" && TLI->has(LibFunc_floorf)))
+ return ConstantFoldFP(floor, V, Ty);
+ break;
+ case 'l':
+ if ((Name == "log" && V > 0 && TLI->has(LibFunc_log)) ||
+ (Name == "logf" && V > 0 && TLI->has(LibFunc_logf)) ||
+ (Name == "__log_finite" && V > 0 &&
+ TLI->has(LibFunc_log_finite)) ||
+ (Name == "__logf_finite" && V > 0 &&
+ TLI->has(LibFunc_logf_finite)))
+ return ConstantFoldFP(log, V, Ty);
+ else if ((Name == "log10" && V > 0 && TLI->has(LibFunc_log10)) ||
+ (Name == "log10f" && V > 0 && TLI->has(LibFunc_log10f)) ||
+ (Name == "__log10_finite" && V > 0 &&
+ TLI->has(LibFunc_log10_finite)) ||
+ (Name == "__log10f_finite" && V > 0 &&
+ TLI->has(LibFunc_log10f_finite)))
+ return ConstantFoldFP(log10, V, Ty);
+ break;
+ case 'r':
+ if ((Name == "round" && TLI->has(LibFunc_round)) ||
+ (Name == "roundf" && TLI->has(LibFunc_roundf)))
+ return ConstantFoldFP(round, V, Ty);
+ break;
+ case 's':
+ if ((Name == "sin" && TLI->has(LibFunc_sin)) ||
+ (Name == "sinf" && TLI->has(LibFunc_sinf)))
+ return ConstantFoldFP(sin, V, Ty);
+ else if ((Name == "sinh" && TLI->has(LibFunc_sinh)) ||
+ (Name == "sinhf" && TLI->has(LibFunc_sinhf)) ||
+ (Name == "__sinh_finite" && TLI->has(LibFunc_sinh_finite)) ||
+ (Name == "__sinhf_finite" && TLI->has(LibFunc_sinhf_finite)))
+ return ConstantFoldFP(sinh, V, Ty);
+ else if ((Name == "sqrt" && V >= 0 && TLI->has(LibFunc_sqrt)) ||
+ (Name == "sqrtf" && V >= 0 && TLI->has(LibFunc_sqrtf)))
+ return ConstantFoldFP(sqrt, V, Ty);
+ break;
+ case 't':
+ if ((Name == "tan" && TLI->has(LibFunc_tan)) ||
+ (Name == "tanf" && TLI->has(LibFunc_tanf)))
+ return ConstantFoldFP(tan, V, Ty);
+ else if ((Name == "tanh" && TLI->has(LibFunc_tanh)) ||
+ (Name == "tanhf" && TLI->has(LibFunc_tanhf)))
+ return ConstantFoldFP(tanh, V, Ty);
+ break;
+ default:
+ break;
}
-
return nullptr;
}
- if (Operands.size() == 2) {
- if (auto *Op1 = dyn_cast<ConstantFP>(Operands[0])) {
- if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy())
- return nullptr;
- double Op1V = getValueAsDouble(Op1);
-
- if (auto *Op2 = dyn_cast<ConstantFP>(Operands[1])) {
- if (Op2->getType() != Op1->getType())
- return nullptr;
+ if (auto *Op = dyn_cast<ConstantInt>(Operands[0])) {
+ switch (IntrinsicID) {
+ case Intrinsic::bswap:
+ return ConstantInt::get(Ty->getContext(), Op->getValue().byteSwap());
+ case Intrinsic::ctpop:
+ return ConstantInt::get(Ty, Op->getValue().countPopulation());
+ case Intrinsic::bitreverse:
+ return ConstantInt::get(Ty->getContext(), Op->getValue().reverseBits());
+ case Intrinsic::convert_from_fp16: {
+ APFloat Val(APFloat::IEEEhalf(), Op->getValue());
+
+ bool lost = false;
+ APFloat::opStatus status = Val.convert(
+ Ty->getFltSemantics(), APFloat::rmNearestTiesToEven, &lost);
+
+ // Conversion is always precise.
+ (void)status;
+ assert(status == APFloat::opOK && !lost &&
+ "Precision lost during fp16 constfolding");
+
+ return ConstantFP::get(Ty->getContext(), Val);
+ }
+ default:
+ return nullptr;
+ }
+ }
- double Op2V = getValueAsDouble(Op2);
- if (IntrinsicID == Intrinsic::pow) {
- return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty);
- }
- if (IntrinsicID == Intrinsic::copysign) {
- APFloat V1 = Op1->getValueAPF();
- const APFloat &V2 = Op2->getValueAPF();
- V1.copySign(V2);
- return ConstantFP::get(Ty->getContext(), V1);
- }
+ // Support ConstantVector in case we have an Undef in the top.
+ if (isa<ConstantVector>(Operands[0]) ||
+ isa<ConstantDataVector>(Operands[0])) {
+ auto *Op = cast<Constant>(Operands[0]);
+ switch (IntrinsicID) {
+ default: break;
+ case Intrinsic::x86_sse_cvtss2si:
+ case Intrinsic::x86_sse_cvtss2si64:
+ case Intrinsic::x86_sse2_cvtsd2si:
+ case Intrinsic::x86_sse2_cvtsd2si64:
+ if (ConstantFP *FPOp =
+ dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
+ return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
+ /*roundTowardZero=*/false, Ty,
+ /*IsSigned*/true);
+ break;
+ case Intrinsic::x86_sse_cvttss2si:
+ case Intrinsic::x86_sse_cvttss2si64:
+ case Intrinsic::x86_sse2_cvttsd2si:
+ case Intrinsic::x86_sse2_cvttsd2si64:
+ if (ConstantFP *FPOp =
+ dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
+ return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
+ /*roundTowardZero=*/true, Ty,
+ /*IsSigned*/true);
+ break;
+ }
+ }
- if (IntrinsicID == Intrinsic::minnum) {
- const APFloat &C1 = Op1->getValueAPF();
- const APFloat &C2 = Op2->getValueAPF();
- return ConstantFP::get(Ty->getContext(), minnum(C1, C2));
- }
+ return nullptr;
+}
- if (IntrinsicID == Intrinsic::maxnum) {
- const APFloat &C1 = Op1->getValueAPF();
- const APFloat &C2 = Op2->getValueAPF();
- return ConstantFP::get(Ty->getContext(), maxnum(C1, C2));
- }
+static Constant *ConstantFoldScalarCall2(StringRef Name,
+ Intrinsic::ID IntrinsicID,
+ Type *Ty,
+ ArrayRef<Constant *> Operands,
+ const TargetLibraryInfo *TLI,
+ const CallBase *Call) {
+ assert(Operands.size() == 2 && "Wrong number of operands.");
- if (IntrinsicID == Intrinsic::minimum) {
- const APFloat &C1 = Op1->getValueAPF();
- const APFloat &C2 = Op2->getValueAPF();
- return ConstantFP::get(Ty->getContext(), minimum(C1, C2));
- }
+ if (auto *Op1 = dyn_cast<ConstantFP>(Operands[0])) {
+ if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy())
+ return nullptr;
+ double Op1V = getValueAsDouble(Op1);
- if (IntrinsicID == Intrinsic::maximum) {
- const APFloat &C1 = Op1->getValueAPF();
- const APFloat &C2 = Op2->getValueAPF();
- return ConstantFP::get(Ty->getContext(), maximum(C1, C2));
- }
+ if (auto *Op2 = dyn_cast<ConstantFP>(Operands[1])) {
+ if (Op2->getType() != Op1->getType())
+ return nullptr;
- if (!TLI)
- return nullptr;
- if ((Name == "pow" && TLI->has(LibFunc_pow)) ||
- (Name == "powf" && TLI->has(LibFunc_powf)) ||
- (Name == "__pow_finite" && TLI->has(LibFunc_pow_finite)) ||
- (Name == "__powf_finite" && TLI->has(LibFunc_powf_finite)))
- return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty);
- if ((Name == "fmod" && TLI->has(LibFunc_fmod)) ||
- (Name == "fmodf" && TLI->has(LibFunc_fmodf)))
- return ConstantFoldBinaryFP(fmod, Op1V, Op2V, Ty);
- if ((Name == "atan2" && TLI->has(LibFunc_atan2)) ||
- (Name == "atan2f" && TLI->has(LibFunc_atan2f)) ||
- (Name == "__atan2_finite" && TLI->has(LibFunc_atan2_finite)) ||
- (Name == "__atan2f_finite" && TLI->has(LibFunc_atan2f_finite)))
- return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty);
- } else if (auto *Op2C = dyn_cast<ConstantInt>(Operands[1])) {
- if (IntrinsicID == Intrinsic::powi && Ty->isHalfTy())
- return ConstantFP::get(Ty->getContext(),
- APFloat((float)std::pow((float)Op1V,
- (int)Op2C->getZExtValue())));
- if (IntrinsicID == Intrinsic::powi && Ty->isFloatTy())
- return ConstantFP::get(Ty->getContext(),
- APFloat((float)std::pow((float)Op1V,
- (int)Op2C->getZExtValue())));
- if (IntrinsicID == Intrinsic::powi && Ty->isDoubleTy())
- return ConstantFP::get(Ty->getContext(),
- APFloat((double)std::pow((double)Op1V,
- (int)Op2C->getZExtValue())));
+ double Op2V = getValueAsDouble(Op2);
+ if (IntrinsicID == Intrinsic::pow) {
+ return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty);
+ }
+ if (IntrinsicID == Intrinsic::copysign) {
+ APFloat V1 = Op1->getValueAPF();
+ const APFloat &V2 = Op2->getValueAPF();
+ V1.copySign(V2);
+ return ConstantFP::get(Ty->getContext(), V1);
}
- return nullptr;
- }
- if (Operands[0]->getType()->isIntegerTy() &&
- Operands[1]->getType()->isIntegerTy()) {
- const APInt *C0, *C1;
- if (!getConstIntOrUndef(Operands[0], C0) ||
- !getConstIntOrUndef(Operands[1], C1))
- return nullptr;
+ if (IntrinsicID == Intrinsic::minnum) {
+ const APFloat &C1 = Op1->getValueAPF();
+ const APFloat &C2 = Op2->getValueAPF();
+ return ConstantFP::get(Ty->getContext(), minnum(C1, C2));
+ }
- switch (IntrinsicID) {
- default: break;
- case Intrinsic::smul_with_overflow:
- case Intrinsic::umul_with_overflow:
- // Even if both operands are undef, we cannot fold muls to undef
- // in the general case. For example, on i2 there are no inputs
- // that would produce { i2 -1, i1 true } as the result.
- if (!C0 || !C1)
- return Constant::getNullValue(Ty);
- LLVM_FALLTHROUGH;
- case Intrinsic::sadd_with_overflow:
- case Intrinsic::uadd_with_overflow:
- case Intrinsic::ssub_with_overflow:
- case Intrinsic::usub_with_overflow: {
- if (!C0 || !C1)
- return UndefValue::get(Ty);
+ if (IntrinsicID == Intrinsic::maxnum) {
+ const APFloat &C1 = Op1->getValueAPF();
+ const APFloat &C2 = Op2->getValueAPF();
+ return ConstantFP::get(Ty->getContext(), maxnum(C1, C2));
+ }
- APInt Res;
- bool Overflow;
- switch (IntrinsicID) {
- default: llvm_unreachable("Invalid case");
- case Intrinsic::sadd_with_overflow:
- Res = C0->sadd_ov(*C1, Overflow);
- break;
- case Intrinsic::uadd_with_overflow:
- Res = C0->uadd_ov(*C1, Overflow);
- break;
- case Intrinsic::ssub_with_overflow:
- Res = C0->ssub_ov(*C1, Overflow);
- break;
- case Intrinsic::usub_with_overflow:
- Res = C0->usub_ov(*C1, Overflow);
- break;
- case Intrinsic::smul_with_overflow:
- Res = C0->smul_ov(*C1, Overflow);
- break;
- case Intrinsic::umul_with_overflow:
- Res = C0->umul_ov(*C1, Overflow);
- break;
- }
- Constant *Ops[] = {
- ConstantInt::get(Ty->getContext(), Res),
- ConstantInt::get(Type::getInt1Ty(Ty->getContext()), Overflow)
- };
- return ConstantStruct::get(cast<StructType>(Ty), Ops);
+ if (IntrinsicID == Intrinsic::minimum) {
+ const APFloat &C1 = Op1->getValueAPF();
+ const APFloat &C2 = Op2->getValueAPF();
+ return ConstantFP::get(Ty->getContext(), minimum(C1, C2));
}
- case Intrinsic::uadd_sat:
- case Intrinsic::sadd_sat:
- if (!C0 && !C1)
- return UndefValue::get(Ty);
- if (!C0 || !C1)
- return Constant::getAllOnesValue(Ty);
- if (IntrinsicID == Intrinsic::uadd_sat)
- return ConstantInt::get(Ty, C0->uadd_sat(*C1));
- else
- return ConstantInt::get(Ty, C0->sadd_sat(*C1));
- case Intrinsic::usub_sat:
- case Intrinsic::ssub_sat:
- if (!C0 && !C1)
- return UndefValue::get(Ty);
- if (!C0 || !C1)
- return Constant::getNullValue(Ty);
- if (IntrinsicID == Intrinsic::usub_sat)
- return ConstantInt::get(Ty, C0->usub_sat(*C1));
- else
- return ConstantInt::get(Ty, C0->ssub_sat(*C1));
- case Intrinsic::cttz:
- case Intrinsic::ctlz:
- assert(C1 && "Must be constant int");
-
- // cttz(0, 1) and ctlz(0, 1) are undef.
- if (C1->isOneValue() && (!C0 || C0->isNullValue()))
- return UndefValue::get(Ty);
- if (!C0)
- return Constant::getNullValue(Ty);
- if (IntrinsicID == Intrinsic::cttz)
- return ConstantInt::get(Ty, C0->countTrailingZeros());
- else
- return ConstantInt::get(Ty, C0->countLeadingZeros());
+
+ if (IntrinsicID == Intrinsic::maximum) {
+ const APFloat &C1 = Op1->getValueAPF();
+ const APFloat &C2 = Op2->getValueAPF();
+ return ConstantFP::get(Ty->getContext(), maximum(C1, C2));
}
- return nullptr;
+ if (!TLI)
+ return nullptr;
+ if ((Name == "pow" && TLI->has(LibFunc_pow)) ||
+ (Name == "powf" && TLI->has(LibFunc_powf)) ||
+ (Name == "__pow_finite" && TLI->has(LibFunc_pow_finite)) ||
+ (Name == "__powf_finite" && TLI->has(LibFunc_powf_finite)))
+ return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty);
+ if ((Name == "fmod" && TLI->has(LibFunc_fmod)) ||
+ (Name == "fmodf" && TLI->has(LibFunc_fmodf)))
+ return ConstantFoldBinaryFP(fmod, Op1V, Op2V, Ty);
+ if ((Name == "atan2" && TLI->has(LibFunc_atan2)) ||
+ (Name == "atan2f" && TLI->has(LibFunc_atan2f)) ||
+ (Name == "__atan2_finite" && TLI->has(LibFunc_atan2_finite)) ||
+ (Name == "__atan2f_finite" && TLI->has(LibFunc_atan2f_finite)))
+ return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty);
+ } else if (auto *Op2C = dyn_cast<ConstantInt>(Operands[1])) {
+ if (IntrinsicID == Intrinsic::powi && Ty->isHalfTy())
+ return ConstantFP::get(Ty->getContext(),
+ APFloat((float)std::pow((float)Op1V,
+ (int)Op2C->getZExtValue())));
+ if (IntrinsicID == Intrinsic::powi && Ty->isFloatTy())
+ return ConstantFP::get(Ty->getContext(),
+ APFloat((float)std::pow((float)Op1V,
+ (int)Op2C->getZExtValue())));
+ if (IntrinsicID == Intrinsic::powi && Ty->isDoubleTy())
+ return ConstantFP::get(Ty->getContext(),
+ APFloat((double)std::pow((double)Op1V,
+ (int)Op2C->getZExtValue())));
}
+ return nullptr;
+ }
- // Support ConstantVector in case we have an Undef in the top.
- if ((isa<ConstantVector>(Operands[0]) ||
- isa<ConstantDataVector>(Operands[0])) &&
- // Check for default rounding mode.
- // FIXME: Support other rounding modes?
- isa<ConstantInt>(Operands[1]) &&
- cast<ConstantInt>(Operands[1])->getValue() == 4) {
- auto *Op = cast<Constant>(Operands[0]);
+ if (Operands[0]->getType()->isIntegerTy() &&
+ Operands[1]->getType()->isIntegerTy()) {
+ const APInt *C0, *C1;
+ if (!getConstIntOrUndef(Operands[0], C0) ||
+ !getConstIntOrUndef(Operands[1], C1))
+ return nullptr;
+
+ switch (IntrinsicID) {
+ default: break;
+ case Intrinsic::smul_with_overflow:
+ case Intrinsic::umul_with_overflow:
+ // Even if both operands are undef, we cannot fold muls to undef
+ // in the general case. For example, on i2 there are no inputs
+ // that would produce { i2 -1, i1 true } as the result.
+ if (!C0 || !C1)
+ return Constant::getNullValue(Ty);
+ LLVM_FALLTHROUGH;
+ case Intrinsic::sadd_with_overflow:
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::ssub_with_overflow:
+ case Intrinsic::usub_with_overflow: {
+ if (!C0 || !C1)
+ return UndefValue::get(Ty);
+
+ APInt Res;
+ bool Overflow;
switch (IntrinsicID) {
- default: break;
- case Intrinsic::x86_avx512_vcvtss2si32:
- case Intrinsic::x86_avx512_vcvtss2si64:
- case Intrinsic::x86_avx512_vcvtsd2si32:
- case Intrinsic::x86_avx512_vcvtsd2si64:
- if (ConstantFP *FPOp =
- dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
- return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
- /*roundTowardZero=*/false, Ty,
- /*IsSigned*/true);
+ default: llvm_unreachable("Invalid case");
+ case Intrinsic::sadd_with_overflow:
+ Res = C0->sadd_ov(*C1, Overflow);
+ break;
+ case Intrinsic::uadd_with_overflow:
+ Res = C0->uadd_ov(*C1, Overflow);
break;
- case Intrinsic::x86_avx512_vcvtss2usi32:
- case Intrinsic::x86_avx512_vcvtss2usi64:
- case Intrinsic::x86_avx512_vcvtsd2usi32:
- case Intrinsic::x86_avx512_vcvtsd2usi64:
- if (ConstantFP *FPOp =
- dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
- return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
- /*roundTowardZero=*/false, Ty,
- /*IsSigned*/false);
+ case Intrinsic::ssub_with_overflow:
+ Res = C0->ssub_ov(*C1, Overflow);
+ break;
+ case Intrinsic::usub_with_overflow:
+ Res = C0->usub_ov(*C1, Overflow);
break;
- case Intrinsic::x86_avx512_cvttss2si:
- case Intrinsic::x86_avx512_cvttss2si64:
- case Intrinsic::x86_avx512_cvttsd2si:
- case Intrinsic::x86_avx512_cvttsd2si64:
- if (ConstantFP *FPOp =
- dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
- return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
- /*roundTowardZero=*/true, Ty,
- /*IsSigned*/true);
+ case Intrinsic::smul_with_overflow:
+ Res = C0->smul_ov(*C1, Overflow);
break;
- case Intrinsic::x86_avx512_cvttss2usi:
- case Intrinsic::x86_avx512_cvttss2usi64:
- case Intrinsic::x86_avx512_cvttsd2usi:
- case Intrinsic::x86_avx512_cvttsd2usi64:
- if (ConstantFP *FPOp =
- dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
- return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
- /*roundTowardZero=*/true, Ty,
- /*IsSigned*/false);
+ case Intrinsic::umul_with_overflow:
+ Res = C0->umul_ov(*C1, Overflow);
break;
}
+ Constant *Ops[] = {
+ ConstantInt::get(Ty->getContext(), Res),
+ ConstantInt::get(Type::getInt1Ty(Ty->getContext()), Overflow)
+ };
+ return ConstantStruct::get(cast<StructType>(Ty), Ops);
+ }
+ case Intrinsic::uadd_sat:
+ case Intrinsic::sadd_sat:
+ if (!C0 && !C1)
+ return UndefValue::get(Ty);
+ if (!C0 || !C1)
+ return Constant::getAllOnesValue(Ty);
+ if (IntrinsicID == Intrinsic::uadd_sat)
+ return ConstantInt::get(Ty, C0->uadd_sat(*C1));
+ else
+ return ConstantInt::get(Ty, C0->sadd_sat(*C1));
+ case Intrinsic::usub_sat:
+ case Intrinsic::ssub_sat:
+ if (!C0 && !C1)
+ return UndefValue::get(Ty);
+ if (!C0 || !C1)
+ return Constant::getNullValue(Ty);
+ if (IntrinsicID == Intrinsic::usub_sat)
+ return ConstantInt::get(Ty, C0->usub_sat(*C1));
+ else
+ return ConstantInt::get(Ty, C0->ssub_sat(*C1));
+ case Intrinsic::cttz:
+ case Intrinsic::ctlz:
+ assert(C1 && "Must be constant int");
+
+ // cttz(0, 1) and ctlz(0, 1) are undef.
+ if (C1->isOneValue() && (!C0 || C0->isNullValue()))
+ return UndefValue::get(Ty);
+ if (!C0)
+ return Constant::getNullValue(Ty);
+ if (IntrinsicID == Intrinsic::cttz)
+ return ConstantInt::get(Ty, C0->countTrailingZeros());
+ else
+ return ConstantInt::get(Ty, C0->countLeadingZeros());
}
+
return nullptr;
}
- if (Operands.size() != 3)
- return nullptr;
+ // Support ConstantVector in case we have an Undef in the top.
+ if ((isa<ConstantVector>(Operands[0]) ||
+ isa<ConstantDataVector>(Operands[0])) &&
+ // Check for default rounding mode.
+ // FIXME: Support other rounding modes?
+ isa<ConstantInt>(Operands[1]) &&
+ cast<ConstantInt>(Operands[1])->getValue() == 4) {
+ auto *Op = cast<Constant>(Operands[0]);
+ switch (IntrinsicID) {
+ default: break;
+ case Intrinsic::x86_avx512_vcvtss2si32:
+ case Intrinsic::x86_avx512_vcvtss2si64:
+ case Intrinsic::x86_avx512_vcvtsd2si32:
+ case Intrinsic::x86_avx512_vcvtsd2si64:
+ if (ConstantFP *FPOp =
+ dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
+ return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
+ /*roundTowardZero=*/false, Ty,
+ /*IsSigned*/true);
+ break;
+ case Intrinsic::x86_avx512_vcvtss2usi32:
+ case Intrinsic::x86_avx512_vcvtss2usi64:
+ case Intrinsic::x86_avx512_vcvtsd2usi32:
+ case Intrinsic::x86_avx512_vcvtsd2usi64:
+ if (ConstantFP *FPOp =
+ dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
+ return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
+ /*roundTowardZero=*/false, Ty,
+ /*IsSigned*/false);
+ break;
+ case Intrinsic::x86_avx512_cvttss2si:
+ case Intrinsic::x86_avx512_cvttss2si64:
+ case Intrinsic::x86_avx512_cvttsd2si:
+ case Intrinsic::x86_avx512_cvttsd2si64:
+ if (ConstantFP *FPOp =
+ dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
+ return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
+ /*roundTowardZero=*/true, Ty,
+ /*IsSigned*/true);
+ break;
+ case Intrinsic::x86_avx512_cvttss2usi:
+ case Intrinsic::x86_avx512_cvttss2usi64:
+ case Intrinsic::x86_avx512_cvttsd2usi:
+ case Intrinsic::x86_avx512_cvttsd2usi64:
+ if (ConstantFP *FPOp =
+ dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
+ return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
+ /*roundTowardZero=*/true, Ty,
+ /*IsSigned*/false);
+ break;
+ }
+ }
+ return nullptr;
+}
+
+static Constant *ConstantFoldScalarCall3(StringRef Name,
+ Intrinsic::ID IntrinsicID,
+ Type *Ty,
+ ArrayRef<Constant *> Operands,
+ const TargetLibraryInfo *TLI,
+ const CallBase *Call) {
+ assert(Operands.size() == 3 && "Wrong number of operands.");
if (const auto *Op1 = dyn_cast<ConstantFP>(Operands[0])) {
if (const auto *Op2 = dyn_cast<ConstantFP>(Operands[1])) {
@@ -2179,6 +2207,43 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty,
}
}
+ if (const auto *Op1 = dyn_cast<ConstantInt>(Operands[0])) {
+ if (const auto *Op2 = dyn_cast<ConstantInt>(Operands[1])) {
+ if (const auto *Op3 = dyn_cast<ConstantInt>(Operands[2])) {
+ switch (IntrinsicID) {
+ default: break;
+ case Intrinsic::smul_fix:
+ case Intrinsic::smul_fix_sat: {
+ // This code performs rounding towards negative infinity in case the
+ // result cannot be represented exactly for the given scale. Targets
+ // that do care about rounding should use a target hook for specifying
+ // how rounding should be done, and provide their own folding to be
+ // consistent with rounding. This is the same approach as used by
+ // DAGTypeLegalizer::ExpandIntRes_MULFIX.
+ APInt Lhs = Op1->getValue();
+ APInt Rhs = Op2->getValue();
+ unsigned Scale = Op3->getValue().getZExtValue();
+ unsigned Width = Lhs.getBitWidth();
+ assert(Scale < Width && "Illegal scale.");
+ unsigned ExtendedWidth = Width * 2;
+ APInt Product = (Lhs.sextOrSelf(ExtendedWidth) *
+ Rhs.sextOrSelf(ExtendedWidth)).ashr(Scale);
+ if (IntrinsicID == Intrinsic::smul_fix_sat) {
+ APInt MaxValue =
+ APInt::getSignedMaxValue(Width).sextOrSelf(ExtendedWidth);
+ APInt MinValue =
+ APInt::getSignedMinValue(Width).sextOrSelf(ExtendedWidth);
+ Product = APIntOps::smin(Product, MaxValue);
+ Product = APIntOps::smax(Product, MinValue);
+ }
+ return ConstantInt::get(Ty->getContext(),
+ Product.sextOrTrunc(Width));
+ }
+ }
+ }
+ }
+ }
+
if (IntrinsicID == Intrinsic::fshl || IntrinsicID == Intrinsic::fshr) {
const APInt *C0, *C1, *C2;
if (!getConstIntOrUndef(Operands[0], C0) ||
@@ -2212,11 +2277,31 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty,
return nullptr;
}
-Constant *ConstantFoldVectorCall(StringRef Name, unsigned IntrinsicID,
- VectorType *VTy, ArrayRef<Constant *> Operands,
- const DataLayout &DL,
- const TargetLibraryInfo *TLI,
- ImmutableCallSite CS) {
+static Constant *ConstantFoldScalarCall(StringRef Name,
+ Intrinsic::ID IntrinsicID,
+ Type *Ty,
+ ArrayRef<Constant *> Operands,
+ const TargetLibraryInfo *TLI,
+ const CallBase *Call) {
+ if (Operands.size() == 1)
+ return ConstantFoldScalarCall1(Name, IntrinsicID, Ty, Operands, TLI, Call);
+
+ if (Operands.size() == 2)
+ return ConstantFoldScalarCall2(Name, IntrinsicID, Ty, Operands, TLI, Call);
+
+ if (Operands.size() == 3)
+ return ConstantFoldScalarCall3(Name, IntrinsicID, Ty, Operands, TLI, Call);
+
+ return nullptr;
+}
+
+static Constant *ConstantFoldVectorCall(StringRef Name,
+ Intrinsic::ID IntrinsicID,
+ VectorType *VTy,
+ ArrayRef<Constant *> Operands,
+ const DataLayout &DL,
+ const TargetLibraryInfo *TLI,
+ const CallBase *Call) {
SmallVector<Constant *, 4> Result(VTy->getNumElements());
SmallVector<Constant *, 4> Lane(Operands.size());
Type *Ty = VTy->getElementType();
@@ -2263,10 +2348,8 @@ Constant *ConstantFoldVectorCall(StringRef Name, unsigned IntrinsicID,
for (unsigned I = 0, E = VTy->getNumElements(); I != E; ++I) {
// Gather a column of constants.
for (unsigned J = 0, JE = Operands.size(); J != JE; ++J) {
- // These intrinsics use a scalar type for their second argument.
- if (J == 1 &&
- (IntrinsicID == Intrinsic::cttz || IntrinsicID == Intrinsic::ctlz ||
- IntrinsicID == Intrinsic::powi)) {
+ // Some intrinsics use a scalar type for certain arguments.
+ if (hasVectorInstrinsicScalarOpd(IntrinsicID, J)) {
Lane[J] = Operands[J];
continue;
}
@@ -2279,7 +2362,8 @@ Constant *ConstantFoldVectorCall(StringRef Name, unsigned IntrinsicID,
}
// Use the regular scalar folding to simplify this column.
- Constant *Folded = ConstantFoldScalarCall(Name, IntrinsicID, Ty, Lane, TLI, CS);
+ Constant *Folded =
+ ConstantFoldScalarCall(Name, IntrinsicID, Ty, Lane, TLI, Call);
if (!Folded)
return nullptr;
Result[I] = Folded;
@@ -2290,11 +2374,10 @@ Constant *ConstantFoldVectorCall(StringRef Name, unsigned IntrinsicID,
} // end anonymous namespace
-Constant *
-llvm::ConstantFoldCall(ImmutableCallSite CS, Function *F,
- ArrayRef<Constant *> Operands,
- const TargetLibraryInfo *TLI) {
- if (CS.isNoBuiltin() || CS.isStrictFP())
+Constant *llvm::ConstantFoldCall(const CallBase *Call, Function *F,
+ ArrayRef<Constant *> Operands,
+ const TargetLibraryInfo *TLI) {
+ if (Call->isNoBuiltin() || Call->isStrictFP())
return nullptr;
if (!F->hasName())
return nullptr;
@@ -2304,17 +2387,19 @@ llvm::ConstantFoldCall(ImmutableCallSite CS, Function *F,
if (auto *VTy = dyn_cast<VectorType>(Ty))
return ConstantFoldVectorCall(Name, F->getIntrinsicID(), VTy, Operands,
- F->getParent()->getDataLayout(), TLI, CS);
+ F->getParent()->getDataLayout(), TLI, Call);
- return ConstantFoldScalarCall(Name, F->getIntrinsicID(), Ty, Operands, TLI, CS);
+ return ConstantFoldScalarCall(Name, F->getIntrinsicID(), Ty, Operands, TLI,
+ Call);
}
-bool llvm::isMathLibCallNoop(CallSite CS, const TargetLibraryInfo *TLI) {
+bool llvm::isMathLibCallNoop(const CallBase *Call,
+ const TargetLibraryInfo *TLI) {
// FIXME: Refactor this code; this duplicates logic in LibCallsShrinkWrap
// (and to some extent ConstantFoldScalarCall).
- if (CS.isNoBuiltin() || CS.isStrictFP())
+ if (Call->isNoBuiltin() || Call->isStrictFP())
return false;
- Function *F = CS.getCalledFunction();
+ Function *F = Call->getCalledFunction();
if (!F)
return false;
@@ -2322,8 +2407,8 @@ bool llvm::isMathLibCallNoop(CallSite CS, const TargetLibraryInfo *TLI) {
if (!TLI || !TLI->getLibFunc(*F, Func))
return false;
- if (CS.getNumArgOperands() == 1) {
- if (ConstantFP *OpC = dyn_cast<ConstantFP>(CS.getArgOperand(0))) {
+ if (Call->getNumArgOperands() == 1) {
+ if (ConstantFP *OpC = dyn_cast<ConstantFP>(Call->getArgOperand(0))) {
const APFloat &Op = OpC->getValueAPF();
switch (Func) {
case LibFunc_logl:
@@ -2421,9 +2506,9 @@ bool llvm::isMathLibCallNoop(CallSite CS, const TargetLibraryInfo *TLI) {
}
}
- if (CS.getNumArgOperands() == 2) {
- ConstantFP *Op0C = dyn_cast<ConstantFP>(CS.getArgOperand(0));
- ConstantFP *Op1C = dyn_cast<ConstantFP>(CS.getArgOperand(1));
+ if (Call->getNumArgOperands() == 2) {
+ ConstantFP *Op0C = dyn_cast<ConstantFP>(Call->getArgOperand(0));
+ ConstantFP *Op1C = dyn_cast<ConstantFP>(Call->getArgOperand(1));
if (Op0C && Op1C) {
const APFloat &Op0 = Op0C->getValueAPF();
const APFloat &Op1 = Op1C->getValueAPF();
diff --git a/lib/Analysis/CostModel.cpp b/lib/Analysis/CostModel.cpp
index 3d55bf20bb40..bf0cdbfd0c8b 100644
--- a/lib/Analysis/CostModel.cpp
+++ b/lib/Analysis/CostModel.cpp
@@ -1,9 +1,8 @@
//===- CostModel.cpp ------ Cost Model Analysis ---------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Analysis/Delinearization.cpp b/lib/Analysis/Delinearization.cpp
index 4cafb7da16d3..c1043e446beb 100644
--- a/lib/Analysis/Delinearization.cpp
+++ b/lib/Analysis/Delinearization.cpp
@@ -1,9 +1,8 @@
//===---- Delinearization.cpp - MultiDimensional Index Delinearization ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Analysis/DemandedBits.cpp b/lib/Analysis/DemandedBits.cpp
index 34f785fb02be..01b8ff10d355 100644
--- a/lib/Analysis/DemandedBits.cpp
+++ b/lib/Analysis/DemandedBits.cpp
@@ -1,9 +1,8 @@
//===- DemandedBits.cpp - Determine demanded bits -------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -340,6 +339,8 @@ void DemandedBits::performAnalysis() {
Type *T = J->getType();
if (T->isIntOrIntVectorTy())
AliveBits[J] = APInt::getAllOnesValue(T->getScalarSizeInBits());
+ else
+ Visited.insert(J);
Worklist.insert(J);
}
}
@@ -355,16 +356,18 @@ void DemandedBits::performAnalysis() {
LLVM_DEBUG(dbgs() << "DemandedBits: Visiting: " << *UserI);
APInt AOut;
+ bool InputIsKnownDead = false;
if (UserI->getType()->isIntOrIntVectorTy()) {
AOut = AliveBits[UserI];
LLVM_DEBUG(dbgs() << " Alive Out: 0x"
<< Twine::utohexstr(AOut.getLimitedValue()));
+
+ // If all bits of the output are dead, then all bits of the input
+ // are also dead.
+ InputIsKnownDead = !AOut && !isAlwaysLive(UserI);
}
LLVM_DEBUG(dbgs() << "\n");
- if (!UserI->getType()->isIntOrIntVectorTy())
- Visited.insert(UserI);
-
KnownBits Known, Known2;
bool KnownBitsComputed = false;
// Compute the set of alive bits for each operand. These are anded into the
@@ -381,10 +384,7 @@ void DemandedBits::performAnalysis() {
if (T->isIntOrIntVectorTy()) {
unsigned BitWidth = T->getScalarSizeInBits();
APInt AB = APInt::getAllOnesValue(BitWidth);
- if (UserI->getType()->isIntOrIntVectorTy() && !AOut &&
- !isAlwaysLive(UserI)) {
- // If all bits of the output are dead, then all bits of the input
- // are also dead.
+ if (InputIsKnownDead) {
AB = APInt(BitWidth, 0);
} else {
// Bits of each operand that are used to compute alive bits of the
@@ -403,18 +403,13 @@ void DemandedBits::performAnalysis() {
// If we've added to the set of alive bits (or the operand has not
// been previously visited), then re-queue the operand to be visited
// again.
- APInt ABPrev(BitWidth, 0);
- auto ABI = AliveBits.find(I);
- if (ABI != AliveBits.end())
- ABPrev = ABI->second;
-
- APInt ABNew = AB | ABPrev;
- if (ABNew != ABPrev || ABI == AliveBits.end()) {
- AliveBits[I] = std::move(ABNew);
+ auto Res = AliveBits.try_emplace(I);
+ if (Res.second || (AB |= Res.first->second) != Res.first->second) {
+ Res.first->second = std::move(AB);
Worklist.insert(I);
}
}
- } else if (I && !Visited.count(I)) {
+ } else if (I && Visited.insert(I).second) {
Worklist.insert(I);
}
}
diff --git a/lib/Analysis/DependenceAnalysis.cpp b/lib/Analysis/DependenceAnalysis.cpp
index 3f4dfa52e1da..75f269e84f9d 100644
--- a/lib/Analysis/DependenceAnalysis.cpp
+++ b/lib/Analysis/DependenceAnalysis.cpp
@@ -1,9 +1,8 @@
//===-- DependenceAnalysis.cpp - DA Implementation --------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -110,6 +109,14 @@ STATISTIC(BanerjeeSuccesses, "Banerjee successes");
static cl::opt<bool>
Delinearize("da-delinearize", cl::init(true), cl::Hidden, cl::ZeroOrMore,
cl::desc("Try to delinearize array references."));
+static cl::opt<bool> DisableDelinearizationChecks(
+ "da-disable-delinearization-checks", cl::init(false), cl::Hidden,
+ cl::ZeroOrMore,
+ cl::desc(
+ "Disable checks that try to statically verify validity of "
+ "delinearized subscripts. Enabling this option may result in incorrect "
+ "dependence vectors for languages that allow the subscript of one "
+ "dimension to underflow or overflow into another dimension."));
//===----------------------------------------------------------------------===//
// basics
@@ -3317,19 +3324,20 @@ bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst,
// and dst.
// FIXME: It may be better to record these sizes and add them as constraints
// to the dependency checks.
- for (int i = 1; i < size; ++i) {
- if (!isKnownNonNegative(SrcSubscripts[i], SrcPtr))
- return false;
+ if (!DisableDelinearizationChecks)
+ for (int i = 1; i < size; ++i) {
+ if (!isKnownNonNegative(SrcSubscripts[i], SrcPtr))
+ return false;
- if (!isKnownLessThan(SrcSubscripts[i], Sizes[i - 1]))
- return false;
+ if (!isKnownLessThan(SrcSubscripts[i], Sizes[i - 1]))
+ return false;
- if (!isKnownNonNegative(DstSubscripts[i], DstPtr))
- return false;
+ if (!isKnownNonNegative(DstSubscripts[i], DstPtr))
+ return false;
- if (!isKnownLessThan(DstSubscripts[i], Sizes[i - 1]))
- return false;
- }
+ if (!isKnownLessThan(DstSubscripts[i], Sizes[i - 1]))
+ return false;
+ }
LLVM_DEBUG({
dbgs() << "\nSrcSubscripts: ";
@@ -3369,6 +3377,19 @@ static void dumpSmallBitVector(SmallBitVector &BV) {
}
#endif
+bool DependenceInfo::invalidate(Function &F, const PreservedAnalyses &PA,
+ FunctionAnalysisManager::Invalidator &Inv) {
+ // Check if the analysis itself has been invalidated.
+ auto PAC = PA.getChecker<DependenceAnalysis>();
+ if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>())
+ return true;
+
+ // Check transitive dependencies.
+ return Inv.invalidate<AAManager>(F, PA) ||
+ Inv.invalidate<ScalarEvolutionAnalysis>(F, PA) ||
+ Inv.invalidate<LoopAnalysis>(F, PA);
+}
+
// depends -
// Returns NULL if there is no dependence.
// Otherwise, return a Dependence with as many details as possible.
@@ -3510,7 +3531,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
// to either Separable or Coupled).
//
// Next, we consider 1 and 2. The intersection of the GroupLoops is empty.
- // Next, 1 and 3. The intersectionof their GroupLoops = {2}, not empty,
+ // Next, 1 and 3. The intersection of their GroupLoops = {2}, not empty,
// so Pair[3].Group = {0, 1, 3} and Done = false.
//
// Next, we compare 2 against 3. The intersection of the GroupLoops is empty.
diff --git a/lib/Analysis/DivergenceAnalysis.cpp b/lib/Analysis/DivergenceAnalysis.cpp
index 7ba23854a3cc..0ccd59ef2bfd 100644
--- a/lib/Analysis/DivergenceAnalysis.cpp
+++ b/lib/Analysis/DivergenceAnalysis.cpp
@@ -1,9 +1,8 @@
//===- DivergenceAnalysis.cpp --------- Divergence Analysis Implementation -==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Analysis/DomPrinter.cpp b/lib/Analysis/DomPrinter.cpp
index 8abc0e7d0df9..d9f43dd746ef 100644
--- a/lib/Analysis/DomPrinter.cpp
+++ b/lib/Analysis/DomPrinter.cpp
@@ -1,9 +1,8 @@
//===- DomPrinter.cpp - DOT printer for the dominance trees ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/IR/DomTreeUpdater.cpp b/lib/Analysis/DomTreeUpdater.cpp
index b72c1b77c2ce..49215889cfd6 100644
--- a/lib/IR/DomTreeUpdater.cpp
+++ b/lib/Analysis/DomTreeUpdater.cpp
@@ -1,9 +1,8 @@
//===- DomTreeUpdater.cpp - DomTree/Post DomTree Updater --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -12,12 +11,14 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/IR/DomTreeUpdater.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/Analysis/PostDominators.h"
#include "llvm/IR/Dominators.h"
#include "llvm/Support/GenericDomTree.h"
#include <algorithm>
#include <functional>
+#include <utility>
namespace llvm {
@@ -54,41 +55,6 @@ bool DomTreeUpdater::isSelfDominance(
return Update.getFrom() == Update.getTo();
}
-bool DomTreeUpdater::applyLazyUpdate(DominatorTree::UpdateKind Kind,
- BasicBlock *From, BasicBlock *To) {
- assert((DT || PDT) &&
- "Call applyLazyUpdate() when both DT and PDT are nullptrs.");
- assert(Strategy == DomTreeUpdater::UpdateStrategy::Lazy &&
- "Call applyLazyUpdate() with Eager strategy error");
- // Analyze pending updates to determine if the update is unnecessary.
- const DominatorTree::UpdateType Update = {Kind, From, To};
- const DominatorTree::UpdateType Invert = {Kind != DominatorTree::Insert
- ? DominatorTree::Insert
- : DominatorTree::Delete,
- From, To};
- // Only check duplicates in updates that are not applied by both trees.
- auto I =
- PendUpdates.begin() + std::max(PendDTUpdateIndex, PendPDTUpdateIndex);
- const auto E = PendUpdates.end();
-
- assert(I <= E && "Iterator out of range.");
-
- for (; I != E; ++I) {
- if (Update == *I)
- return false; // Discard duplicate updates.
-
- if (Invert == *I) {
- // Update and Invert are both valid (equivalent to a no-op). Remove
- // Invert from PendUpdates and discard the Update.
- PendUpdates.erase(I);
- return false;
- }
- }
-
- PendUpdates.push_back(Update); // Save the valid update.
- return true;
-}
-
void DomTreeUpdater::applyDomTreeUpdates() {
// No pending DomTreeUpdates.
if (Strategy != UpdateStrategy::Lazy || !DT)
@@ -262,31 +228,15 @@ void DomTreeUpdater::validateDeleteBB(BasicBlock *DelBB) {
new UnreachableInst(DelBB->getContext(), DelBB);
}
-void DomTreeUpdater::applyUpdates(ArrayRef<DominatorTree::UpdateType> Updates,
- bool ForceRemoveDuplicates) {
+void DomTreeUpdater::applyUpdates(ArrayRef<DominatorTree::UpdateType> Updates) {
if (!DT && !PDT)
return;
- if (Strategy == UpdateStrategy::Lazy || ForceRemoveDuplicates) {
- SmallVector<DominatorTree::UpdateType, 8> Seen;
+ if (Strategy == UpdateStrategy::Lazy) {
for (const auto U : Updates)
- // For Lazy UpdateStrategy, avoid duplicates to applyLazyUpdate() to save
- // on analysis.
- if (llvm::none_of(
- Seen,
- [U](const DominatorTree::UpdateType S) { return S == U; }) &&
- isUpdateValid(U) && !isSelfDominance(U)) {
- Seen.push_back(U);
- if (Strategy == UpdateStrategy::Lazy)
- applyLazyUpdate(U.getKind(), U.getFrom(), U.getTo());
- }
- if (Strategy == UpdateStrategy::Lazy)
- return;
+ if (!isSelfDominance(U))
+ PendUpdates.push_back(U);
- if (DT)
- DT->applyUpdates(Seen);
- if (PDT)
- PDT->applyUpdates(Seen);
return;
}
@@ -296,6 +246,60 @@ void DomTreeUpdater::applyUpdates(ArrayRef<DominatorTree::UpdateType> Updates,
PDT->applyUpdates(Updates);
}
+void DomTreeUpdater::applyUpdatesPermissive(
+ ArrayRef<DominatorTree::UpdateType> Updates) {
+ if (!DT && !PDT)
+ return;
+
+ SmallSet<std::pair<BasicBlock *, BasicBlock *>, 8> Seen;
+ SmallVector<DominatorTree::UpdateType, 8> DeduplicatedUpdates;
+ for (const auto U : Updates) {
+ auto Edge = std::make_pair(U.getFrom(), U.getTo());
+ // Because it is illegal to submit updates that have already been applied
+ // and updates to an edge need to be strictly ordered,
+ // it is safe to infer the existence of an edge from the first update
+ // to this edge.
+ // If the first update to an edge is "Delete", it means that the edge
+ // existed before. If the first update to an edge is "Insert", it means
+ // that the edge didn't exist before.
+ //
+ // For example, if the user submits {{Delete, A, B}, {Insert, A, B}},
+ // because
+ // 1. it is illegal to submit updates that have already been applied,
+ // i.e., user cannot delete an nonexistent edge,
+ // 2. updates to an edge need to be strictly ordered,
+ // So, initially edge A -> B existed.
+ // We can then safely ignore future updates to this edge and directly
+ // inspect the current CFG:
+ // a. If the edge still exists, because the user cannot insert an existent
+ // edge, so both {Delete, A, B}, {Insert, A, B} actually happened and
+ // resulted in a no-op. DTU won't submit any update in this case.
+ // b. If the edge doesn't exist, we can then infer that {Delete, A, B}
+ // actually happened but {Insert, A, B} was an invalid update which never
+ // happened. DTU will submit {Delete, A, B} in this case.
+ if (!isSelfDominance(U) && Seen.count(Edge) == 0) {
+ Seen.insert(Edge);
+ // If the update doesn't appear in the CFG, it means that
+ // either the change isn't made or relevant operations
+ // result in a no-op.
+ if (isUpdateValid(U)) {
+ if (isLazy())
+ PendUpdates.push_back(U);
+ else
+ DeduplicatedUpdates.push_back(U);
+ }
+ }
+ }
+
+ if (Strategy == UpdateStrategy::Lazy)
+ return;
+
+ if (DT)
+ DT->applyUpdates(DeduplicatedUpdates);
+ if (PDT)
+ PDT->applyUpdates(DeduplicatedUpdates);
+}
+
DominatorTree &DomTreeUpdater::getDomTree() {
assert(DT && "Invalid acquisition of a null DomTree");
applyDomTreeUpdates();
@@ -332,7 +336,7 @@ void DomTreeUpdater::insertEdge(BasicBlock *From, BasicBlock *To) {
return;
}
- applyLazyUpdate(DominatorTree::Insert, From, To);
+ PendUpdates.push_back({DominatorTree::Insert, From, To});
}
void DomTreeUpdater::insertEdgeRelaxed(BasicBlock *From, BasicBlock *To) {
@@ -353,7 +357,7 @@ void DomTreeUpdater::insertEdgeRelaxed(BasicBlock *From, BasicBlock *To) {
return;
}
- applyLazyUpdate(DominatorTree::Insert, From, To);
+ PendUpdates.push_back({DominatorTree::Insert, From, To});
}
void DomTreeUpdater::deleteEdge(BasicBlock *From, BasicBlock *To) {
@@ -378,7 +382,7 @@ void DomTreeUpdater::deleteEdge(BasicBlock *From, BasicBlock *To) {
return;
}
- applyLazyUpdate(DominatorTree::Delete, From, To);
+ PendUpdates.push_back({DominatorTree::Delete, From, To});
}
void DomTreeUpdater::deleteEdgeRelaxed(BasicBlock *From, BasicBlock *To) {
@@ -399,7 +403,7 @@ void DomTreeUpdater::deleteEdgeRelaxed(BasicBlock *From, BasicBlock *To) {
return;
}
- applyLazyUpdate(DominatorTree::Delete, From, To);
+ PendUpdates.push_back({DominatorTree::Delete, From, To});
}
void DomTreeUpdater::dropOutOfDateUpdates() {
diff --git a/lib/Analysis/DominanceFrontier.cpp b/lib/Analysis/DominanceFrontier.cpp
index de7f62cf4ecd..f9a554acb7ea 100644
--- a/lib/Analysis/DominanceFrontier.cpp
+++ b/lib/Analysis/DominanceFrontier.cpp
@@ -1,9 +1,8 @@
//===- DominanceFrontier.cpp - Dominance Frontier Calculation -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Analysis/EHPersonalities.cpp b/lib/Analysis/EHPersonalities.cpp
index 0df73aeebbdc..2242541696a4 100644
--- a/lib/Analysis/EHPersonalities.cpp
+++ b/lib/Analysis/EHPersonalities.cpp
@@ -1,9 +1,8 @@
//===- EHPersonalities.cpp - Compute EH-related information ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Analysis/GlobalsModRef.cpp b/lib/Analysis/GlobalsModRef.cpp
index b28abcadca4a..0d6c0ffb18a8 100644
--- a/lib/Analysis/GlobalsModRef.cpp
+++ b/lib/Analysis/GlobalsModRef.cpp
@@ -1,9 +1,8 @@
//===- GlobalsModRef.cpp - Simple Mod/Ref Analysis for Globals ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -514,7 +513,7 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) {
break;
}
- if (F->isDeclaration() || F->hasFnAttribute(Attribute::OptimizeNone)) {
+ if (F->isDeclaration() || F->hasOptNone()) {
// Try to get mod/ref behaviour from function attributes.
if (F->doesNotAccessMemory()) {
// Can't do better than that!
@@ -567,7 +566,7 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) {
// Don't prove any properties based on the implementation of an optnone
// function. Function attributes were already used as a best approximation
// above.
- if (Node->getFunction()->hasFnAttribute(Attribute::OptimizeNone))
+ if (Node->getFunction()->hasOptNone())
continue;
for (Instruction &I : instructions(Node->getFunction())) {
@@ -597,7 +596,7 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) {
}
// All non-call instructions we use the primary predicates for whether
- // thay read or write memory.
+ // they read or write memory.
if (I.mayReadFromMemory())
FI.addModRefInfo(ModRefInfo::Ref);
if (I.mayWriteToMemory())
@@ -791,10 +790,10 @@ bool GlobalsAAResult::isNonEscapingGlobalNoAlias(const GlobalValue *GV,
}
// FIXME: It would be good to handle other obvious no-alias cases here, but
- // it isn't clear how to do so reasonbly without building a small version
+ // it isn't clear how to do so reasonably without building a small version
// of BasicAA into this code. We could recurse into AAResultBase::alias
// here but that seems likely to go poorly as we're inside the
- // implementation of such a query. Until then, just conservatievly retun
+ // implementation of such a query. Until then, just conservatively return
// false.
return false;
} while (!Inputs.empty());
@@ -807,7 +806,8 @@ bool GlobalsAAResult::isNonEscapingGlobalNoAlias(const GlobalValue *GV,
/// other is some random pointer, we know there cannot be an alias, because the
/// address of the global isn't taken.
AliasResult GlobalsAAResult::alias(const MemoryLocation &LocA,
- const MemoryLocation &LocB) {
+ const MemoryLocation &LocB,
+ AAQueryInfo &AAQI) {
// Get the base object these pointers point to.
const Value *UV1 = GetUnderlyingObject(LocA.Ptr, DL);
const Value *UV2 = GetUnderlyingObject(LocB.Ptr, DL);
@@ -882,11 +882,12 @@ AliasResult GlobalsAAResult::alias(const MemoryLocation &LocA,
if ((GV1 || GV2) && GV1 != GV2)
return NoAlias;
- return AAResultBase::alias(LocA, LocB);
+ return AAResultBase::alias(LocA, LocB, AAQI);
}
ModRefInfo GlobalsAAResult::getModRefInfoForArgument(const CallBase *Call,
- const GlobalValue *GV) {
+ const GlobalValue *GV,
+ AAQueryInfo &AAQI) {
if (Call->doesNotAccessMemory())
return ModRefInfo::NoModRef;
ModRefInfo ConservativeResult =
@@ -895,14 +896,15 @@ ModRefInfo GlobalsAAResult::getModRefInfoForArgument(const CallBase *Call,
// Iterate through all the arguments to the called function. If any argument
// is based on GV, return the conservative result.
for (auto &A : Call->args()) {
- SmallVector<Value*, 4> Objects;
+ SmallVector<const Value*, 4> Objects;
GetUnderlyingObjects(A, Objects, DL);
// All objects must be identified.
if (!all_of(Objects, isIdentifiedObject) &&
// Try ::alias to see if all objects are known not to alias GV.
- !all_of(Objects, [&](Value *V) {
- return this->alias(MemoryLocation(V), MemoryLocation(GV)) == NoAlias;
+ !all_of(Objects, [&](const Value *V) {
+ return this->alias(MemoryLocation(V), MemoryLocation(GV), AAQI) ==
+ NoAlias;
}))
return ConservativeResult;
@@ -915,7 +917,8 @@ ModRefInfo GlobalsAAResult::getModRefInfoForArgument(const CallBase *Call,
}
ModRefInfo GlobalsAAResult::getModRefInfo(const CallBase *Call,
- const MemoryLocation &Loc) {
+ const MemoryLocation &Loc,
+ AAQueryInfo &AAQI) {
ModRefInfo Known = ModRefInfo::ModRef;
// If we are asking for mod/ref info of a direct call with a pointer to a
@@ -927,11 +930,11 @@ ModRefInfo GlobalsAAResult::getModRefInfo(const CallBase *Call,
if (NonAddressTakenGlobals.count(GV))
if (const FunctionInfo *FI = getFunctionInfo(F))
Known = unionModRef(FI->getModRefInfoForGlobal(*GV),
- getModRefInfoForArgument(Call, GV));
+ getModRefInfoForArgument(Call, GV, AAQI));
if (!isModOrRefSet(Known))
return ModRefInfo::NoModRef; // No need to query other mod/ref analyses
- return intersectModRef(Known, AAResultBase::getModRefInfo(Call, Loc));
+ return intersectModRef(Known, AAResultBase::getModRefInfo(Call, Loc, AAQI));
}
GlobalsAAResult::GlobalsAAResult(const DataLayout &DL,
diff --git a/lib/Analysis/GuardUtils.cpp b/lib/Analysis/GuardUtils.cpp
index 08fa6abeafb5..cad92f6e56bb 100644
--- a/lib/Analysis/GuardUtils.cpp
+++ b/lib/Analysis/GuardUtils.cpp
@@ -1,9 +1,8 @@
//===-- GuardUtils.cpp - Utils for work with guards -------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// Utils that are used to perform analyzes related to guards and their
@@ -19,3 +18,32 @@ bool llvm::isGuard(const User *U) {
using namespace llvm::PatternMatch;
return match(U, m_Intrinsic<Intrinsic::experimental_guard>());
}
+
+bool llvm::isGuardAsWidenableBranch(const User *U) {
+ Value *Condition, *WidenableCondition;
+ BasicBlock *GuardedBB, *DeoptBB;
+ if (!parseWidenableBranch(U, Condition, WidenableCondition, GuardedBB,
+ DeoptBB))
+ return false;
+ using namespace llvm::PatternMatch;
+ for (auto &Insn : *DeoptBB) {
+ if (match(&Insn, m_Intrinsic<Intrinsic::experimental_deoptimize>()))
+ return true;
+ if (Insn.mayHaveSideEffects())
+ return false;
+ }
+ return false;
+}
+
+bool llvm::parseWidenableBranch(const User *U, Value *&Condition,
+ Value *&WidenableCondition,
+ BasicBlock *&IfTrueBB, BasicBlock *&IfFalseBB) {
+ using namespace llvm::PatternMatch;
+ if (!match(U, m_Br(m_And(m_Value(Condition), m_Value(WidenableCondition)),
+ IfTrueBB, IfFalseBB)))
+ return false;
+ // TODO: At the moment, we only recognize the branch if the WC call in this
+ // specific position. We should generalize!
+ return match(WidenableCondition,
+ m_Intrinsic<Intrinsic::experimental_widenable_condition>());
+}
diff --git a/lib/Analysis/IVDescriptors.cpp b/lib/Analysis/IVDescriptors.cpp
index aaebc4a481ec..ce285f82f720 100644
--- a/lib/Analysis/IVDescriptors.cpp
+++ b/lib/Analysis/IVDescriptors.cpp
@@ -1,9 +1,8 @@
//===- llvm/Analysis/IVDescriptors.cpp - IndVar Descriptors -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -15,6 +14,7 @@
#include "llvm/ADT/ScopeExit.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
@@ -26,7 +26,6 @@
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/DomTreeUpdater.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
@@ -252,6 +251,10 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
Worklist.push_back(Start);
VisitedInsts.insert(Start);
+ // Start with all flags set because we will intersect this with the reduction
+ // flags from all the reduction operations.
+ FastMathFlags FMF = FastMathFlags::getFast();
+
// A value in the reduction can be used:
// - By the reduction:
// - Reduction operation:
@@ -297,6 +300,8 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
ReduxDesc = isRecurrenceInstr(Cur, Kind, ReduxDesc, HasFunNoNaNAttr);
if (!ReduxDesc.isRecurrence())
return false;
+ if (isa<FPMathOperator>(ReduxDesc.getPatternInst()))
+ FMF &= ReduxDesc.getPatternInst()->getFastMathFlags();
}
bool IsASelect = isa<SelectInst>(Cur);
@@ -442,7 +447,7 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
// Save the description of this reduction variable.
RecurrenceDescriptor RD(
- RdxStart, ExitInstruction, Kind, ReduxDesc.getMinMaxKind(),
+ RdxStart, ExitInstruction, Kind, FMF, ReduxDesc.getMinMaxKind(),
ReduxDesc.getUnsafeAlgebraInst(), RecurrenceType, IsSigned, CastInsts);
RedDes = RD;
@@ -550,9 +555,8 @@ RecurrenceDescriptor::isConditionalRdxPattern(
RecurrenceDescriptor::InstDesc
RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurrenceKind Kind,
InstDesc &Prev, bool HasFunNoNaNAttr) {
- bool FP = I->getType()->isFloatingPointTy();
Instruction *UAI = Prev.getUnsafeAlgebraInst();
- if (!UAI && FP && !I->isFast())
+ if (!UAI && isa<FPMathOperator>(I) && !I->hasAllowReassoc())
UAI = I; // Found an unsafe (unvectorizable) algebra instruction.
switch (I->getOpcode()) {
@@ -1010,7 +1014,7 @@ bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop,
// If we started from an UnknownSCEV, and managed to build an addRecurrence
// only after enabling Assume with PSCEV, this means we may have encountered
// cast instructions that required adding a runtime check in order to
- // guarantee the correctness of the AddRecurence respresentation of the
+ // guarantee the correctness of the AddRecurrence respresentation of the
// induction.
if (PhiScev != AR && SymbolicPhi) {
SmallVector<Instruction *, 2> Casts;
@@ -1049,6 +1053,13 @@ bool InductionDescriptor::isInductionPHI(
Value *StartValue =
Phi->getIncomingValueForBlock(AR->getLoop()->getLoopPreheader());
+
+ BasicBlock *Latch = AR->getLoop()->getLoopLatch();
+ if (!Latch)
+ return false;
+ BinaryOperator *BOp =
+ dyn_cast<BinaryOperator>(Phi->getIncomingValueForBlock(Latch));
+
const SCEV *Step = AR->getStepRecurrence(*SE);
// Calculate the pointer stride and check if it is consecutive.
// The stride may be a constant or a loop invariant integer value.
@@ -1057,7 +1068,7 @@ bool InductionDescriptor::isInductionPHI(
return false;
if (PhiTy->isIntegerTy()) {
- D = InductionDescriptor(StartValue, IK_IntInduction, Step, /*BOp=*/nullptr,
+ D = InductionDescriptor(StartValue, IK_IntInduction, Step, BOp,
CastsToIgnore);
return true;
}
@@ -1084,6 +1095,6 @@ bool InductionDescriptor::isInductionPHI(
return false;
auto *StepValue =
SE->getConstant(CV->getType(), CVSize / Size, true /* signed */);
- D = InductionDescriptor(StartValue, IK_PtrInduction, StepValue);
+ D = InductionDescriptor(StartValue, IK_PtrInduction, StepValue, BOp);
return true;
}
diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp
index 609e5e3a1448..681a0cf7e981 100644
--- a/lib/Analysis/IVUsers.cpp
+++ b/lib/Analysis/IVUsers.cpp
@@ -1,9 +1,8 @@
//===- IVUsers.cpp - Induction Variable Users -------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Analysis/IndirectCallPromotionAnalysis.cpp b/lib/Analysis/IndirectCallPromotionAnalysis.cpp
index d6e6e76af03c..6ff840efcb64 100644
--- a/lib/Analysis/IndirectCallPromotionAnalysis.cpp
+++ b/lib/Analysis/IndirectCallPromotionAnalysis.cpp
@@ -1,9 +1,8 @@
//===-- IndirectCallPromotionAnalysis.cpp - Find promotion candidates ===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp
index 6ddb3cbc01a3..0dec146e0465 100644
--- a/lib/Analysis/InlineCost.cpp
+++ b/lib/Analysis/InlineCost.cpp
@@ -1,9 +1,8 @@
//===- InlineCost.cpp - Cost analysis for inliner -------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -28,7 +27,6 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Config/llvm-config.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
@@ -37,6 +35,7 @@
#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Operator.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -51,19 +50,19 @@ static cl::opt<int> InlineThreshold(
cl::desc("Control the amount of inlining to perform (default = 225)"));
static cl::opt<int> HintThreshold(
- "inlinehint-threshold", cl::Hidden, cl::init(325),
+ "inlinehint-threshold", cl::Hidden, cl::init(325), cl::ZeroOrMore,
cl::desc("Threshold for inlining functions with inline hint"));
static cl::opt<int>
ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden,
- cl::init(45),
+ cl::init(45), cl::ZeroOrMore,
cl::desc("Threshold for inlining cold callsites"));
// We introduce this threshold to help performance of instrumentation based
// PGO before we actually hook up inliner with analysis passes such as BPI and
// BFI.
static cl::opt<int> ColdThreshold(
- "inlinecold-threshold", cl::Hidden, cl::init(45),
+ "inlinecold-threshold", cl::Hidden, cl::init(45), cl::ZeroOrMore,
cl::desc("Threshold for inlining functions with cold attribute"));
static cl::opt<int>
@@ -77,7 +76,7 @@ static cl::opt<int> LocallyHotCallSiteThreshold(
static cl::opt<int> ColdCallSiteRelFreq(
"cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore,
- cl::desc("Maxmimum block frequency, expressed as a percentage of caller's "
+ cl::desc("Maximum block frequency, expressed as a percentage of caller's "
"entry frequency, for a callsite to be cold in the absence of "
"profile information."));
@@ -88,7 +87,7 @@ static cl::opt<int> HotCallSiteRelFreq(
"profile information."));
static cl::opt<bool> OptComputeFullInlineCost(
- "inline-cost-full", cl::Hidden, cl::init(false),
+ "inline-cost-full", cl::Hidden, cl::init(false), cl::ZeroOrMore,
cl::desc("Compute the full inline cost of a call site even when the cost "
"exceeds the threshold."));
@@ -122,31 +121,43 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
/// The candidate callsite being analyzed. Please do not use this to do
/// analysis in the caller function; we want the inline cost query to be
/// easily cacheable. Instead, use the cover function paramHasAttr.
- CallSite CandidateCS;
+ CallBase &CandidateCall;
/// Tunable parameters that control the analysis.
const InlineParams &Params;
+ /// Upper bound for the inlining cost. Bonuses are being applied to account
+ /// for speculative "expected profit" of the inlining decision.
int Threshold;
- int Cost;
+
+ /// Inlining cost measured in abstract units, accounts for all the
+ /// instructions expected to be executed for a given function invocation.
+ /// Instructions that are statically proven to be dead based on call-site
+ /// arguments are not counted here.
+ int Cost = 0;
+
bool ComputeFullInlineCost;
- bool IsCallerRecursive;
- bool IsRecursiveCall;
- bool ExposesReturnsTwice;
- bool HasDynamicAlloca;
- bool ContainsNoDuplicateCall;
- bool HasReturn;
- bool HasIndirectBr;
- bool HasUninlineableIntrinsic;
- bool InitsVargArgs;
+ bool IsCallerRecursive = false;
+ bool IsRecursiveCall = false;
+ bool ExposesReturnsTwice = false;
+ bool HasDynamicAlloca = false;
+ bool ContainsNoDuplicateCall = false;
+ bool HasReturn = false;
+ bool HasIndirectBr = false;
+ bool HasUninlineableIntrinsic = false;
+ bool InitsVargArgs = false;
/// Number of bytes allocated statically by the callee.
- uint64_t AllocatedSize;
- unsigned NumInstructions, NumVectorInstructions;
- int VectorBonus, TenPercentVectorBonus;
- // Bonus to be applied when the callee has only one reachable basic block.
- int SingleBBBonus;
+ uint64_t AllocatedSize = 0;
+ unsigned NumInstructions = 0;
+ unsigned NumVectorInstructions = 0;
+
+ /// Bonus to be applied when percentage of vector instructions in callee is
+ /// high (see more details in updateThreshold).
+ int VectorBonus = 0;
+ /// Bonus to be applied when the callee has only one reachable basic block.
+ int SingleBBBonus = 0;
/// While we walk the potentially-inlined instructions, we build up and
/// maintain a mapping of simplified values specific to this callsite. The
@@ -181,7 +192,7 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
/// loads.
bool EnableLoadElimination;
SmallPtrSet<Value *, 16> LoadAddrSet;
- int LoadEliminationCost;
+ int LoadEliminationCost = 0;
// Custom simplification helper routines.
bool isAllocaDerivedArg(Value *V);
@@ -196,7 +207,7 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
bool isGEPFree(GetElementPtrInst &GEP);
bool canFoldInboundsGEP(GetElementPtrInst &I);
bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset);
- bool simplifyCallSite(Function *F, CallSite CS);
+ bool simplifyCallSite(Function *F, CallBase &Call);
template <typename Callable>
bool simplifyInstruction(Instruction &I, Callable Evaluate);
ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
@@ -216,22 +227,28 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
/// attributes and callee hotness for PGO builds. The Callee is explicitly
/// passed to support analyzing indirect calls whose target is inferred by
/// analysis.
- void updateThreshold(CallSite CS, Function &Callee);
+ void updateThreshold(CallBase &Call, Function &Callee);
- /// Return true if size growth is allowed when inlining the callee at CS.
- bool allowSizeGrowth(CallSite CS);
+ /// Return true if size growth is allowed when inlining the callee at \p Call.
+ bool allowSizeGrowth(CallBase &Call);
- /// Return true if \p CS is a cold callsite.
- bool isColdCallSite(CallSite CS, BlockFrequencyInfo *CallerBFI);
+ /// Return true if \p Call is a cold callsite.
+ bool isColdCallSite(CallBase &Call, BlockFrequencyInfo *CallerBFI);
- /// Return a higher threshold if \p CS is a hot callsite.
- Optional<int> getHotCallSiteThreshold(CallSite CS,
+ /// Return a higher threshold if \p Call is a hot callsite.
+ Optional<int> getHotCallSiteThreshold(CallBase &Call,
BlockFrequencyInfo *CallerBFI);
// Custom analysis routines.
InlineResult analyzeBlock(BasicBlock *BB,
SmallPtrSetImpl<const Value *> &EphValues);
+ /// Handle a capped 'int' increment for Cost.
+ void addCost(int64_t Inc, int64_t UpperBound = INT_MAX) {
+ assert(UpperBound > 0 && UpperBound <= INT_MAX && "invalid upper bound");
+ Cost = (int)std::min(UpperBound, Cost + Inc);
+ }
+
// Disable several entry points to the visitor so we don't accidentally use
// them by declaring but not defining them here.
void visit(Module *);
@@ -256,11 +273,12 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
bool visitCmpInst(CmpInst &I);
bool visitSub(BinaryOperator &I);
bool visitBinaryOperator(BinaryOperator &I);
+ bool visitFNeg(UnaryOperator &I);
bool visitLoad(LoadInst &I);
bool visitStore(StoreInst &I);
bool visitExtractValue(ExtractValueInst &I);
bool visitInsertValue(InsertValueInst &I);
- bool visitCallSite(CallSite CS);
+ bool visitCallBase(CallBase &Call);
bool visitReturnInst(ReturnInst &RI);
bool visitBranchInst(BranchInst &BI);
bool visitSelectInst(SelectInst &SI);
@@ -276,38 +294,29 @@ public:
std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
Optional<function_ref<BlockFrequencyInfo &(Function &)>> &GetBFI,
ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE,
- Function &Callee, CallSite CSArg, const InlineParams &Params)
+ Function &Callee, CallBase &Call, const InlineParams &Params)
: TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI),
PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()), ORE(ORE),
- CandidateCS(CSArg), Params(Params), Threshold(Params.DefaultThreshold),
- Cost(0), ComputeFullInlineCost(OptComputeFullInlineCost ||
- Params.ComputeFullInlineCost || ORE),
- IsCallerRecursive(false), IsRecursiveCall(false),
- ExposesReturnsTwice(false), HasDynamicAlloca(false),
- ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),
- HasUninlineableIntrinsic(false), InitsVargArgs(false), AllocatedSize(0),
- NumInstructions(0), NumVectorInstructions(0), VectorBonus(0),
- SingleBBBonus(0), EnableLoadElimination(true), LoadEliminationCost(0),
- NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0),
- NumConstantPtrCmps(0), NumConstantPtrDiffs(0),
- NumInstructionsSimplified(0), SROACostSavings(0),
- SROACostSavingsLost(0) {}
-
- InlineResult analyzeCall(CallSite CS);
+ CandidateCall(Call), Params(Params), Threshold(Params.DefaultThreshold),
+ ComputeFullInlineCost(OptComputeFullInlineCost ||
+ Params.ComputeFullInlineCost || ORE),
+ EnableLoadElimination(true) {}
+
+ InlineResult analyzeCall(CallBase &Call);
int getThreshold() { return Threshold; }
int getCost() { return Cost; }
// Keep a bunch of stats about the cost savings found so we can print them
// out when debugging.
- unsigned NumConstantArgs;
- unsigned NumConstantOffsetPtrArgs;
- unsigned NumAllocaArgs;
- unsigned NumConstantPtrCmps;
- unsigned NumConstantPtrDiffs;
- unsigned NumInstructionsSimplified;
- unsigned SROACostSavings;
- unsigned SROACostSavingsLost;
+ unsigned NumConstantArgs = 0;
+ unsigned NumConstantOffsetPtrArgs = 0;
+ unsigned NumAllocaArgs = 0;
+ unsigned NumConstantPtrCmps = 0;
+ unsigned NumConstantPtrDiffs = 0;
+ unsigned NumInstructionsSimplified = 0;
+ unsigned SROACostSavings = 0;
+ unsigned SROACostSavingsLost = 0;
void dump();
};
@@ -342,7 +351,7 @@ bool CallAnalyzer::lookupSROAArgAndCost(
void CallAnalyzer::disableSROA(DenseMap<Value *, int>::iterator CostIt) {
// If we're no longer able to perform SROA we need to undo its cost savings
// and prevent subsequent analysis.
- Cost += CostIt->second;
+ addCost(CostIt->second);
SROACostSavings -= CostIt->second;
SROACostSavingsLost += CostIt->second;
SROAArgCosts.erase(CostIt);
@@ -366,7 +375,7 @@ void CallAnalyzer::accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
void CallAnalyzer::disableLoadElimination() {
if (EnableLoadElimination) {
- Cost += LoadEliminationCost;
+ addCost(LoadEliminationCost);
LoadEliminationCost = 0;
EnableLoadElimination = false;
}
@@ -701,7 +710,7 @@ bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
}
bool CallAnalyzer::visitCastInst(CastInst &I) {
- // Propagate constants through ptrtoint.
+ // Propagate constants through casts.
if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
return ConstantExpr::getCast(I.getOpcode(), COps[0], I.getType());
}))
@@ -721,7 +730,7 @@ bool CallAnalyzer::visitCastInst(CastInst &I) {
case Instruction::FPToUI:
case Instruction::FPToSI:
if (TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive)
- Cost += InlineConstants::CallPenalty;
+ addCost(InlineConstants::CallPenalty);
break;
default:
break;
@@ -737,14 +746,14 @@ bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) {
}))
return true;
- // Disable any SROA on the argument to arbitrary unary operators.
+ // Disable any SROA on the argument to arbitrary unary instructions.
disableSROA(Operand);
return false;
}
bool CallAnalyzer::paramHasAttr(Argument *A, Attribute::AttrKind Attr) {
- return CandidateCS.paramHasAttr(A->getArgNo(), Attr);
+ return CandidateCall.paramHasAttr(A->getArgNo(), Attr);
}
bool CallAnalyzer::isKnownNonNullInCallee(Value *V) {
@@ -769,7 +778,7 @@ bool CallAnalyzer::isKnownNonNullInCallee(Value *V) {
return false;
}
-bool CallAnalyzer::allowSizeGrowth(CallSite CS) {
+bool CallAnalyzer::allowSizeGrowth(CallBase &Call) {
// If the normal destination of the invoke or the parent block of the call
// site is unreachable-terminated, there is little point in inlining this
// unless there is literally zero cost.
@@ -785,21 +794,21 @@ bool CallAnalyzer::allowSizeGrowth(CallSite CS) {
// For now, we are not handling this corner case here as it is rare in real
// code. In future, we should elaborate this based on BPI and BFI in more
// general threshold adjusting heuristics in updateThreshold().
- Instruction *Instr = CS.getInstruction();
- if (InvokeInst *II = dyn_cast<InvokeInst>(Instr)) {
+ if (InvokeInst *II = dyn_cast<InvokeInst>(&Call)) {
if (isa<UnreachableInst>(II->getNormalDest()->getTerminator()))
return false;
- } else if (isa<UnreachableInst>(Instr->getParent()->getTerminator()))
+ } else if (isa<UnreachableInst>(Call.getParent()->getTerminator()))
return false;
return true;
}
-bool CallAnalyzer::isColdCallSite(CallSite CS, BlockFrequencyInfo *CallerBFI) {
+bool CallAnalyzer::isColdCallSite(CallBase &Call,
+ BlockFrequencyInfo *CallerBFI) {
// If global profile summary is available, then callsite's coldness is
// determined based on that.
if (PSI && PSI->hasProfileSummary())
- return PSI->isColdCallSite(CS, CallerBFI);
+ return PSI->isColdCallSite(CallSite(&Call), CallerBFI);
// Otherwise we need BFI to be available.
if (!CallerBFI)
@@ -810,20 +819,21 @@ bool CallAnalyzer::isColdCallSite(CallSite CS, BlockFrequencyInfo *CallerBFI) {
// complexity is not worth it unless this scaling shows up high in the
// profiles.
const BranchProbability ColdProb(ColdCallSiteRelFreq, 100);
- auto CallSiteBB = CS.getInstruction()->getParent();
+ auto CallSiteBB = Call.getParent();
auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB);
auto CallerEntryFreq =
- CallerBFI->getBlockFreq(&(CS.getCaller()->getEntryBlock()));
+ CallerBFI->getBlockFreq(&(Call.getCaller()->getEntryBlock()));
return CallSiteFreq < CallerEntryFreq * ColdProb;
}
Optional<int>
-CallAnalyzer::getHotCallSiteThreshold(CallSite CS,
+CallAnalyzer::getHotCallSiteThreshold(CallBase &Call,
BlockFrequencyInfo *CallerBFI) {
// If global profile summary is available, then callsite's hotness is
// determined based on that.
- if (PSI && PSI->hasProfileSummary() && PSI->isHotCallSite(CS, CallerBFI))
+ if (PSI && PSI->hasProfileSummary() &&
+ PSI->isHotCallSite(CallSite(&Call), CallerBFI))
return Params.HotCallSiteThreshold;
// Otherwise we need BFI to be available and to have a locally hot callsite
@@ -835,7 +845,7 @@ CallAnalyzer::getHotCallSiteThreshold(CallSite CS,
// potentially cache the computation of scaled entry frequency, but the added
// complexity is not worth it unless this scaling shows up high in the
// profiles.
- auto CallSiteBB = CS.getInstruction()->getParent();
+ auto CallSiteBB = Call.getParent();
auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB).getFrequency();
auto CallerEntryFreq = CallerBFI->getEntryFreq();
if (CallSiteFreq >= CallerEntryFreq * HotCallSiteRelFreq)
@@ -845,14 +855,14 @@ CallAnalyzer::getHotCallSiteThreshold(CallSite CS,
return None;
}
-void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
+void CallAnalyzer::updateThreshold(CallBase &Call, Function &Callee) {
// If no size growth is allowed for this inlining, set Threshold to 0.
- if (!allowSizeGrowth(CS)) {
+ if (!allowSizeGrowth(Call)) {
Threshold = 0;
return;
}
- Function *Caller = CS.getCaller();
+ Function *Caller = Call.getCaller();
// return min(A, B) if B is valid.
auto MinIfValid = [](int A, Optional<int> B) {
@@ -870,15 +880,6 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
// basic block at the given callsite context. This is speculatively applied
// and withdrawn if more than one basic block is seen.
//
- // Vector bonuses: We want to more aggressively inline vector-dense kernels
- // and apply this bonus based on the percentage of vector instructions. A
- // bonus is applied if the vector instructions exceed 50% and half that amount
- // is applied if it exceeds 10%. Note that these bonuses are some what
- // arbitrary and evolved over time by accident as much as because they are
- // principled bonuses.
- // FIXME: It would be nice to base the bonus values on something more
- // scientific.
- //
// LstCallToStaticBonus: This large bonus is applied to ensure the inlining
// of the last call to a static function as inlining such functions is
// guaranteed to reduce code size.
@@ -886,7 +887,7 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
// These bonus percentages may be set to 0 based on properties of the caller
// and the callsite.
int SingleBBBonusPercent = 50;
- int VectorBonusPercent = 150;
+ int VectorBonusPercent = TTI.getInlinerVectorBonusPercent();
int LastCallToStaticBonus = InlineConstants::LastCallToStaticBonus;
// Lambda to set all the above bonus and bonus percentages to 0.
@@ -898,7 +899,7 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
// Use the OptMinSizeThreshold or OptSizeThreshold knob if they are available
// and reduce the threshold if the caller has the necessary attribute.
- if (Caller->optForMinSize()) {
+ if (Caller->hasMinSize()) {
Threshold = MinIfValid(Threshold, Params.OptMinSizeThreshold);
// For minsize, we want to disable the single BB bonus and the vector
// bonuses, but not the last-call-to-static bonus. Inlining the last call to
@@ -906,12 +907,12 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
// call/return instructions.
SingleBBBonusPercent = 0;
VectorBonusPercent = 0;
- } else if (Caller->optForSize())
+ } else if (Caller->hasOptSize())
Threshold = MinIfValid(Threshold, Params.OptSizeThreshold);
// Adjust the threshold based on inlinehint attribute and profile based
// hotness information if the caller does not have MinSize attribute.
- if (!Caller->optForMinSize()) {
+ if (!Caller->hasMinSize()) {
if (Callee.hasFnAttribute(Attribute::InlineHint))
Threshold = MaxIfValid(Threshold, Params.HintThreshold);
@@ -923,15 +924,15 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
// used (which adds hotness metadata to calls) or if caller's
// BlockFrequencyInfo is available.
BlockFrequencyInfo *CallerBFI = GetBFI ? &((*GetBFI)(*Caller)) : nullptr;
- auto HotCallSiteThreshold = getHotCallSiteThreshold(CS, CallerBFI);
- if (!Caller->optForSize() && HotCallSiteThreshold) {
+ auto HotCallSiteThreshold = getHotCallSiteThreshold(Call, CallerBFI);
+ if (!Caller->hasOptSize() && HotCallSiteThreshold) {
LLVM_DEBUG(dbgs() << "Hot callsite.\n");
// FIXME: This should update the threshold only if it exceeds the
// current threshold, but AutoFDO + ThinLTO currently relies on this
// behavior to prevent inlining of hot callsites during ThinLTO
// compile phase.
Threshold = HotCallSiteThreshold.getValue();
- } else if (isColdCallSite(CS, CallerBFI)) {
+ } else if (isColdCallSite(Call, CallerBFI)) {
LLVM_DEBUG(dbgs() << "Cold callsite.\n");
// Do not apply bonuses for a cold callsite including the
// LastCallToStatic bonus. While this bonus might result in code size
@@ -968,7 +969,7 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
VectorBonus = Threshold * VectorBonusPercent / 100;
bool OnlyOneCallAndLocalLinkage =
- F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction();
+ F.hasLocalLinkage() && F.hasOneUse() && &F == Call.getCalledFunction();
// If there is only one call of the function, and it has internal linkage,
// the cost of inlining it drops dramatically. It may seem odd to update
// Cost in updateThreshold, but the bonus depends on the logic in this method.
@@ -1087,10 +1088,34 @@ bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) {
// If the instruction is floating point, and the target says this operation
// is expensive, this may eventually become a library call. Treat the cost
- // as such.
+ // as such. Unless it's fneg which can be implemented with an xor.
+ using namespace llvm::PatternMatch;
if (I.getType()->isFloatingPointTy() &&
- TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive)
- Cost += InlineConstants::CallPenalty;
+ TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive &&
+ !match(&I, m_FNeg(m_Value())))
+ addCost(InlineConstants::CallPenalty);
+
+ return false;
+}
+
+bool CallAnalyzer::visitFNeg(UnaryOperator &I) {
+ Value *Op = I.getOperand(0);
+ Constant *COp = dyn_cast<Constant>(Op);
+ if (!COp)
+ COp = SimplifiedValues.lookup(Op);
+
+ Value *SimpleV = SimplifyFNegInst(COp ? COp : Op,
+ cast<FPMathOperator>(I).getFastMathFlags(),
+ DL);
+
+ if (Constant *C = dyn_cast_or_null<Constant>(SimpleV))
+ SimplifiedValues[&I] = C;
+
+ if (SimpleV)
+ return true;
+
+ // Disable any SROA on arguments to arbitrary, unsimplified fneg.
+ disableSROA(Op);
return false;
}
@@ -1173,62 +1198,61 @@ bool CallAnalyzer::visitInsertValue(InsertValueInst &I) {
/// analyzing the arguments and call itself with instsimplify. Returns true if
/// it has simplified the callsite to some other entity (a constant), making it
/// free.
-bool CallAnalyzer::simplifyCallSite(Function *F, CallSite CS) {
+bool CallAnalyzer::simplifyCallSite(Function *F, CallBase &Call) {
// FIXME: Using the instsimplify logic directly for this is inefficient
// because we have to continually rebuild the argument list even when no
// simplifications can be performed. Until that is fixed with remapping
// inside of instsimplify, directly constant fold calls here.
- if (!canConstantFoldCallTo(CS, F))
+ if (!canConstantFoldCallTo(&Call, F))
return false;
// Try to re-map the arguments to constants.
SmallVector<Constant *, 4> ConstantArgs;
- ConstantArgs.reserve(CS.arg_size());
- for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); I != E;
- ++I) {
- Constant *C = dyn_cast<Constant>(*I);
+ ConstantArgs.reserve(Call.arg_size());
+ for (Value *I : Call.args()) {
+ Constant *C = dyn_cast<Constant>(I);
if (!C)
- C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(*I));
+ C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(I));
if (!C)
return false; // This argument doesn't map to a constant.
ConstantArgs.push_back(C);
}
- if (Constant *C = ConstantFoldCall(CS, F, ConstantArgs)) {
- SimplifiedValues[CS.getInstruction()] = C;
+ if (Constant *C = ConstantFoldCall(&Call, F, ConstantArgs)) {
+ SimplifiedValues[&Call] = C;
return true;
}
return false;
}
-bool CallAnalyzer::visitCallSite(CallSite CS) {
- if (CS.hasFnAttr(Attribute::ReturnsTwice) &&
+bool CallAnalyzer::visitCallBase(CallBase &Call) {
+ if (Call.hasFnAttr(Attribute::ReturnsTwice) &&
!F.hasFnAttribute(Attribute::ReturnsTwice)) {
// This aborts the entire analysis.
ExposesReturnsTwice = true;
return false;
}
- if (CS.isCall() && cast<CallInst>(CS.getInstruction())->cannotDuplicate())
+ if (isa<CallInst>(Call) && cast<CallInst>(Call).cannotDuplicate())
ContainsNoDuplicateCall = true;
- if (Function *F = CS.getCalledFunction()) {
+ if (Function *F = Call.getCalledFunction()) {
// When we have a concrete function, first try to simplify it directly.
- if (simplifyCallSite(F, CS))
+ if (simplifyCallSite(F, Call))
return true;
// Next check if it is an intrinsic we know about.
// FIXME: Lift this into part of the InstVisitor.
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&Call)) {
switch (II->getIntrinsicID()) {
default:
- if (!CS.onlyReadsMemory() && !isAssumeLikeIntrinsic(II))
+ if (!Call.onlyReadsMemory() && !isAssumeLikeIntrinsic(II))
disableLoadElimination();
- return Base::visitCallSite(CS);
+ return Base::visitCallBase(Call);
case Intrinsic::load_relative:
// This is normally lowered to 4 LLVM instructions.
- Cost += 3 * InlineConstants::InstrCost;
+ addCost(3 * InlineConstants::InstrCost);
return false;
case Intrinsic::memset:
@@ -1247,7 +1271,7 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
}
}
- if (F == CS.getInstruction()->getFunction()) {
+ if (F == Call.getFunction()) {
// This flag will fully abort the analysis, so don't bother with anything
// else.
IsRecursiveCall = true;
@@ -1257,34 +1281,34 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
if (TTI.isLoweredToCall(F)) {
// We account for the average 1 instruction per call argument setup
// here.
- Cost += CS.arg_size() * InlineConstants::InstrCost;
+ addCost(Call.arg_size() * InlineConstants::InstrCost);
// Everything other than inline ASM will also have a significant cost
// merely from making the call.
- if (!isa<InlineAsm>(CS.getCalledValue()))
- Cost += InlineConstants::CallPenalty;
+ if (!isa<InlineAsm>(Call.getCalledValue()))
+ addCost(InlineConstants::CallPenalty);
}
- if (!CS.onlyReadsMemory())
+ if (!Call.onlyReadsMemory())
disableLoadElimination();
- return Base::visitCallSite(CS);
+ return Base::visitCallBase(Call);
}
// Otherwise we're in a very special case -- an indirect function call. See
// if we can be particularly clever about this.
- Value *Callee = CS.getCalledValue();
+ Value *Callee = Call.getCalledValue();
// First, pay the price of the argument setup. We account for the average
// 1 instruction per call argument setup here.
- Cost += CS.arg_size() * InlineConstants::InstrCost;
+ addCost(Call.arg_size() * InlineConstants::InstrCost);
// Next, check if this happens to be an indirect function call to a known
// function in this inline context. If not, we've done all we can.
Function *F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee));
if (!F) {
- if (!CS.onlyReadsMemory())
+ if (!Call.onlyReadsMemory())
disableLoadElimination();
- return Base::visitCallSite(CS);
+ return Base::visitCallBase(Call);
}
// If we have a constant that we are calling as a function, we can peer
@@ -1294,9 +1318,9 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
// out. Pretend to inline the function, with a custom threshold.
auto IndirectCallParams = Params;
IndirectCallParams.DefaultThreshold = InlineConstants::IndirectCallThreshold;
- CallAnalyzer CA(TTI, GetAssumptionCache, GetBFI, PSI, ORE, *F, CS,
+ CallAnalyzer CA(TTI, GetAssumptionCache, GetBFI, PSI, ORE, *F, Call,
IndirectCallParams);
- if (CA.analyzeCall(CS)) {
+ if (CA.analyzeCall(Call)) {
// We were able to inline the indirect call! Subtract the cost from the
// threshold to get the bonus we want to apply, but don't go below zero.
Cost -= std::max(0, CA.getThreshold() - CA.getCost());
@@ -1304,7 +1328,7 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
if (!F->onlyReadsMemory())
disableLoadElimination();
- return Base::visitCallSite(CS);
+ return Base::visitCallBase(Call);
}
bool CallAnalyzer::visitReturnInst(ReturnInst &RI) {
@@ -1438,7 +1462,7 @@ bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
(int64_t)SI.getNumCases() * InlineConstants::InstrCost + Cost);
if (CostLowerBound > Threshold && !ComputeFullInlineCost) {
- Cost = CostLowerBound;
+ addCost((int64_t)SI.getNumCases() * InlineConstants::InstrCost);
return false;
}
@@ -1452,7 +1476,7 @@ bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
int64_t JTCost = (int64_t)JumpTableSize * InlineConstants::InstrCost +
4 * InlineConstants::InstrCost;
- Cost = std::min((int64_t)CostUpperBound, JTCost + Cost);
+ addCost(JTCost, (int64_t)CostUpperBound);
return false;
}
@@ -1473,7 +1497,7 @@ bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
// n + n / 2 - 1 = n * 3 / 2 - 1
if (NumCaseCluster <= 3) {
// Suppose a comparison includes one compare and one conditional branch.
- Cost += NumCaseCluster * 2 * InlineConstants::InstrCost;
+ addCost(NumCaseCluster * 2 * InlineConstants::InstrCost);
return false;
}
@@ -1481,7 +1505,7 @@ bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
int64_t SwitchCost =
ExpectedNumberOfCompare * 2 * InlineConstants::InstrCost;
- Cost = std::min((int64_t)CostUpperBound, SwitchCost + Cost);
+ addCost(SwitchCost, (int64_t)CostUpperBound);
return false;
}
@@ -1574,7 +1598,7 @@ CallAnalyzer::analyzeBlock(BasicBlock *BB,
if (Base::visit(&*I))
++NumInstructionsSimplified;
else
- Cost += InlineConstants::InstrCost;
+ addCost(InlineConstants::InstrCost);
using namespace ore;
// If the visit this instruction detected an uninlinable pattern, abort.
@@ -1595,7 +1619,7 @@ CallAnalyzer::analyzeBlock(BasicBlock *BB,
if (ORE)
ORE->emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",
- CandidateCS.getInstruction())
+ &CandidateCall)
<< NV("Callee", &F) << " has uninlinable pattern ("
<< NV("InlineResult", IR.message)
<< ") and cost is not fully computed";
@@ -1612,14 +1636,14 @@ CallAnalyzer::analyzeBlock(BasicBlock *BB,
if (ORE)
ORE->emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",
- CandidateCS.getInstruction())
+ &CandidateCall)
<< NV("Callee", &F) << " is " << NV("InlineResult", IR.message)
<< ". Cost is not fully computed";
});
return IR;
}
- // Check if we've past the maximum possible threshold so we don't spin in
+ // Check if we've passed the maximum possible threshold so we don't spin in
// huge basic blocks that will never inline.
if (Cost >= Threshold && !ComputeFullInlineCost)
return false;
@@ -1676,7 +1700,7 @@ ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
/// blocks to see if all their incoming edges are dead or not.
void CallAnalyzer::findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB) {
auto IsEdgeDead = [&](BasicBlock *Pred, BasicBlock *Succ) {
- // A CFG edge is dead if the predecessor is dead or the predessor has a
+ // A CFG edge is dead if the predecessor is dead or the predecessor has a
// known successor which is not the one under exam.
return (DeadBlocks.count(Pred) ||
(KnownSuccessors[Pred] && KnownSuccessors[Pred] != Succ));
@@ -1712,7 +1736,7 @@ void CallAnalyzer::findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB) {
/// factors and heuristics. If this method returns false but the computed cost
/// is below the computed threshold, then inlining was forcibly disabled by
/// some artifact of the routine.
-InlineResult CallAnalyzer::analyzeCall(CallSite CS) {
+InlineResult CallAnalyzer::analyzeCall(CallBase &Call) {
++NumCallsAnalyzed;
// Perform some tweaks to the cost and threshold based on the direct
@@ -1729,7 +1753,7 @@ InlineResult CallAnalyzer::analyzeCall(CallSite CS) {
assert(NumVectorInstructions == 0);
// Update the threshold based on callsite properties
- updateThreshold(CS, F);
+ updateThreshold(Call, F);
// While Threshold depends on commandline options that can take negative
// values, we want to enforce the invariant that the computed threshold and
@@ -1745,7 +1769,7 @@ InlineResult CallAnalyzer::analyzeCall(CallSite CS) {
// Give out bonuses for the callsite, as the instructions setting them up
// will be gone after inlining.
- Cost -= getCallsiteCost(CS, DL);
+ addCost(-getCallsiteCost(Call, DL));
// If this function uses the coldcc calling convention, prefer not to inline
// it.
@@ -1759,14 +1783,11 @@ InlineResult CallAnalyzer::analyzeCall(CallSite CS) {
if (F.empty())
return true;
- Function *Caller = CS.getInstruction()->getFunction();
+ Function *Caller = Call.getFunction();
// Check if the caller function is recursive itself.
for (User *U : Caller->users()) {
- CallSite Site(U);
- if (!Site)
- continue;
- Instruction *I = Site.getInstruction();
- if (I->getFunction() == Caller) {
+ CallBase *Call = dyn_cast<CallBase>(U);
+ if (Call && Call->getFunction() == Caller) {
IsCallerRecursive = true;
break;
}
@@ -1774,10 +1795,10 @@ InlineResult CallAnalyzer::analyzeCall(CallSite CS) {
// Populate our simplified values by mapping from function arguments to call
// arguments with known important simplifications.
- CallSite::arg_iterator CAI = CS.arg_begin();
+ auto CAI = Call.arg_begin();
for (Function::arg_iterator FAI = F.arg_begin(), FAE = F.arg_end();
FAI != FAE; ++FAI, ++CAI) {
- assert(CAI != CS.arg_end());
+ assert(CAI != Call.arg_end());
if (Constant *C = dyn_cast<Constant>(CAI))
SimplifiedValues[&*FAI] = C;
@@ -1826,14 +1847,18 @@ InlineResult CallAnalyzer::analyzeCall(CallSite CS) {
if (BB->empty())
continue;
- // Disallow inlining a blockaddress. A blockaddress only has defined
- // behavior for an indirect branch in the same function, and we do not
- // currently support inlining indirect branches. But, the inliner may not
- // see an indirect branch that ends up being dead code at a particular call
- // site. If the blockaddress escapes the function, e.g., via a global
- // variable, inlining may lead to an invalid cross-function reference.
+ // Disallow inlining a blockaddress with uses other than strictly callbr.
+ // A blockaddress only has defined behavior for an indirect branch in the
+ // same function, and we do not currently support inlining indirect
+ // branches. But, the inliner may not see an indirect branch that ends up
+ // being dead code at a particular call site. If the blockaddress escapes
+ // the function, e.g., via a global variable, inlining may lead to an
+ // invalid cross-function reference.
+ // FIXME: pr/39560: continue relaxing this overt restriction.
if (BB->hasAddressTaken())
- return "blockaddress";
+ for (User *U : BlockAddress::get(&*BB)->users())
+ if (!isa<CallBrInst>(*U))
+ return "blockaddress used outside of callbr";
// Analyze the cost of this block. If we blow through the threshold, this
// returns false, and we can bail on out.
@@ -1887,7 +1912,7 @@ InlineResult CallAnalyzer::analyzeCall(CallSite CS) {
}
bool OnlyOneCallAndLocalLinkage =
- F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction();
+ F.hasLocalLinkage() && F.hasOneUse() && &F == Call.getCalledFunction();
// If this is a noduplicate call, we can still inline as long as
// inlining this would cause the removal of the caller (so the instruction
// is not actually duplicated, just moved).
@@ -1899,7 +1924,7 @@ InlineResult CallAnalyzer::analyzeCall(CallSite CS) {
// size, we penalise any call sites that perform loops. We do this after all
// other costs here, so will likely only be dealing with relatively small
// functions (and hence DT and LI will hopefully be cheap).
- if (Caller->optForMinSize()) {
+ if (Caller->hasMinSize()) {
DominatorTree DT(F);
LoopInfo LI(DT);
int NumLoops = 0;
@@ -1909,7 +1934,7 @@ InlineResult CallAnalyzer::analyzeCall(CallSite CS) {
continue;
NumLoops++;
}
- Cost += NumLoops * InlineConstants::CallPenalty;
+ addCost(NumLoops * InlineConstants::CallPenalty);
}
// We applied the maximum possible vector bonus at the beginning. Now,
@@ -1953,13 +1978,13 @@ static bool functionsHaveCompatibleAttributes(Function *Caller,
AttributeFuncs::areInlineCompatible(*Caller, *Callee);
}
-int llvm::getCallsiteCost(CallSite CS, const DataLayout &DL) {
+int llvm::getCallsiteCost(CallBase &Call, const DataLayout &DL) {
int Cost = 0;
- for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) {
- if (CS.isByValArgument(I)) {
+ for (unsigned I = 0, E = Call.arg_size(); I != E; ++I) {
+ if (Call.isByValArgument(I)) {
// We approximate the number of loads and stores needed by dividing the
// size of the byval type by the target's pointer size.
- PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType());
+ PointerType *PTy = cast<PointerType>(Call.getArgOperand(I)->getType());
unsigned TypeSize = DL.getTypeSizeInBits(PTy->getElementType());
unsigned AS = PTy->getAddressSpace();
unsigned PointerSize = DL.getPointerSizeInBits(AS);
@@ -1987,16 +2012,16 @@ int llvm::getCallsiteCost(CallSite CS, const DataLayout &DL) {
}
InlineCost llvm::getInlineCost(
- CallSite CS, const InlineParams &Params, TargetTransformInfo &CalleeTTI,
+ CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI,
std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI,
ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) {
- return getInlineCost(CS, CS.getCalledFunction(), Params, CalleeTTI,
+ return getInlineCost(Call, Call.getCalledFunction(), Params, CalleeTTI,
GetAssumptionCache, GetBFI, PSI, ORE);
}
InlineCost llvm::getInlineCost(
- CallSite CS, Function *Callee, const InlineParams &Params,
+ CallBase &Call, Function *Callee, const InlineParams &Params,
TargetTransformInfo &CalleeTTI,
std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI,
@@ -2012,9 +2037,9 @@ InlineCost llvm::getInlineCost(
// argument is in the alloca address space (so it is a little bit complicated
// to solve).
unsigned AllocaAS = Callee->getParent()->getDataLayout().getAllocaAddrSpace();
- for (unsigned I = 0, E = CS.arg_size(); I != E; ++I)
- if (CS.isByValArgument(I)) {
- PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType());
+ for (unsigned I = 0, E = Call.arg_size(); I != E; ++I)
+ if (Call.isByValArgument(I)) {
+ PointerType *PTy = cast<PointerType>(Call.getArgOperand(I)->getType());
if (PTy->getAddressSpace() != AllocaAS)
return llvm::InlineCost::getNever("byval arguments without alloca"
" address space");
@@ -2022,20 +2047,21 @@ InlineCost llvm::getInlineCost(
// Calls to functions with always-inline attributes should be inlined
// whenever possible.
- if (CS.hasFnAttr(Attribute::AlwaysInline)) {
- if (isInlineViable(*Callee))
+ if (Call.hasFnAttr(Attribute::AlwaysInline)) {
+ auto IsViable = isInlineViable(*Callee);
+ if (IsViable)
return llvm::InlineCost::getAlways("always inline attribute");
- return llvm::InlineCost::getNever("inapplicable always inline attribute");
+ return llvm::InlineCost::getNever(IsViable.message);
}
// Never inline functions with conflicting attributes (unless callee has
// always-inline attribute).
- Function *Caller = CS.getCaller();
+ Function *Caller = Call.getCaller();
if (!functionsHaveCompatibleAttributes(Caller, Callee, CalleeTTI))
return llvm::InlineCost::getNever("conflicting attributes");
// Don't inline this call if the caller has the optnone attribute.
- if (Caller->hasFnAttribute(Attribute::OptimizeNone))
+ if (Caller->hasOptNone())
return llvm::InlineCost::getNever("optnone attribute");
// Don't inline a function that treats null pointer as valid into a caller
@@ -2052,15 +2078,15 @@ InlineCost llvm::getInlineCost(
return llvm::InlineCost::getNever("noinline function attribute");
// Don't inline call sites marked noinline.
- if (CS.isNoInline())
+ if (Call.isNoInline())
return llvm::InlineCost::getNever("noinline call site attribute");
LLVM_DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName()
<< "... (caller:" << Caller->getName() << ")\n");
- CallAnalyzer CA(CalleeTTI, GetAssumptionCache, GetBFI, PSI, ORE, *Callee, CS,
- Params);
- InlineResult ShouldInline = CA.analyzeCall(CS);
+ CallAnalyzer CA(CalleeTTI, GetAssumptionCache, GetBFI, PSI, ORE, *Callee,
+ Call, Params);
+ InlineResult ShouldInline = CA.analyzeCall(Call);
LLVM_DEBUG(CA.dump());
@@ -2073,42 +2099,50 @@ InlineCost llvm::getInlineCost(
return llvm::InlineCost::get(CA.getCost(), CA.getThreshold());
}
-bool llvm::isInlineViable(Function &F) {
+InlineResult llvm::isInlineViable(Function &F) {
bool ReturnsTwice = F.hasFnAttribute(Attribute::ReturnsTwice);
for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
- // Disallow inlining of functions which contain indirect branches or
- // blockaddresses.
- if (isa<IndirectBrInst>(BI->getTerminator()) || BI->hasAddressTaken())
- return false;
+ // Disallow inlining of functions which contain indirect branches.
+ if (isa<IndirectBrInst>(BI->getTerminator()))
+ return "contains indirect branches";
+
+ // Disallow inlining of blockaddresses which are used by non-callbr
+ // instructions.
+ if (BI->hasAddressTaken())
+ for (User *U : BlockAddress::get(&*BI)->users())
+ if (!isa<CallBrInst>(*U))
+ return "blockaddress used outside of callbr";
for (auto &II : *BI) {
- CallSite CS(&II);
- if (!CS)
+ CallBase *Call = dyn_cast<CallBase>(&II);
+ if (!Call)
continue;
// Disallow recursive calls.
- if (&F == CS.getCalledFunction())
- return false;
+ if (&F == Call->getCalledFunction())
+ return "recursive call";
// Disallow calls which expose returns-twice to a function not previously
// attributed as such.
- if (!ReturnsTwice && CS.isCall() &&
- cast<CallInst>(CS.getInstruction())->canReturnTwice())
- return false;
+ if (!ReturnsTwice && isa<CallInst>(Call) &&
+ cast<CallInst>(Call)->canReturnTwice())
+ return "exposes returns-twice attribute";
- if (CS.getCalledFunction())
- switch (CS.getCalledFunction()->getIntrinsicID()) {
+ if (Call->getCalledFunction())
+ switch (Call->getCalledFunction()->getIntrinsicID()) {
default:
break;
// Disallow inlining of @llvm.icall.branch.funnel because current
// backend can't separate call targets from call arguments.
case llvm::Intrinsic::icall_branch_funnel:
+ return "disallowed inlining of @llvm.icall.branch.funnel";
// Disallow inlining functions that call @llvm.localescape. Doing this
// correctly would require major changes to the inliner.
case llvm::Intrinsic::localescape:
+ return "disallowed inlining of @llvm.localescape";
// Disallow inlining of functions that initialize VarArgs with va_start.
case llvm::Intrinsic::vastart:
- return false;
+ return "contains VarArgs initialized with va_start";
}
}
}
diff --git a/lib/Analysis/InstCount.cpp b/lib/Analysis/InstCount.cpp
index 95ab6ee3db5b..943a99a5f46d 100644
--- a/lib/Analysis/InstCount.cpp
+++ b/lib/Analysis/InstCount.cpp
@@ -1,9 +1,8 @@
//===-- InstCount.cpp - Collects the count of all instructions ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Analysis/InstructionPrecedenceTracking.cpp b/lib/Analysis/InstructionPrecedenceTracking.cpp
index 816126f407ca..35190ce3e11a 100644
--- a/lib/Analysis/InstructionPrecedenceTracking.cpp
+++ b/lib/Analysis/InstructionPrecedenceTracking.cpp
@@ -1,9 +1,8 @@
//===-- InstructionPrecedenceTracking.cpp -----------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// Implements a class that is able to define some instructions as "special"
@@ -20,6 +19,7 @@
#include "llvm/Analysis/InstructionPrecedenceTracking.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/PatternMatch.h"
using namespace llvm;
@@ -153,5 +153,8 @@ bool ImplicitControlFlowTracking::isSpecialInstruction(
bool MemoryWriteTracking::isSpecialInstruction(
const Instruction *Insn) const {
+ using namespace PatternMatch;
+ if (match(Insn, m_Intrinsic<Intrinsic::experimental_widenable_condition>()))
+ return false;
return Insn->mayWriteToMemory();
}
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index ccf907c144f0..e34bf6f4e43f 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -1,9 +1,8 @@
//===- InstructionSimplify.cpp - Fold instruction operands ----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -34,6 +33,8 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/ValueHandle.h"
@@ -50,6 +51,9 @@ STATISTIC(NumExpand, "Number of expansions");
STATISTIC(NumReassoc, "Number of reassociations");
static Value *SimplifyAndInst(Value *, Value *, const SimplifyQuery &, unsigned);
+static Value *simplifyUnOp(unsigned, Value *, const SimplifyQuery &, unsigned);
+static Value *simplifyFPUnOp(unsigned, Value *, const FastMathFlags &,
+ const SimplifyQuery &, unsigned);
static Value *SimplifyBinOp(unsigned, Value *, Value *, const SimplifyQuery &,
unsigned);
static Value *SimplifyFPBinOp(unsigned, Value *, Value *, const FastMathFlags &,
@@ -655,32 +659,11 @@ static Constant *stripAndComputeConstantOffsets(const DataLayout &DL, Value *&V,
Type *IntPtrTy = DL.getIntPtrType(V->getType())->getScalarType();
APInt Offset = APInt::getNullValue(IntPtrTy->getIntegerBitWidth());
- // Even though we don't look through PHI nodes, we could be called on an
- // instruction in an unreachable block, which may be on a cycle.
- SmallPtrSet<Value *, 4> Visited;
- Visited.insert(V);
- do {
- if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
- if ((!AllowNonInbounds && !GEP->isInBounds()) ||
- !GEP->accumulateConstantOffset(DL, Offset))
- break;
- V = GEP->getPointerOperand();
- } else if (Operator::getOpcode(V) == Instruction::BitCast) {
- V = cast<Operator>(V)->getOperand(0);
- } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
- if (GA->isInterposable())
- break;
- V = GA->getAliasee();
- } else {
- if (auto CS = CallSite(V))
- if (Value *RV = CS.getReturnedArgOperand()) {
- V = RV;
- continue;
- }
- break;
- }
- assert(V->getType()->isPtrOrPtrVectorTy() && "Unexpected operand type!");
- } while (Visited.insert(V).second);
+ V = V->stripAndAccumulateConstantOffsets(DL, Offset, AllowNonInbounds);
+ // As that strip may trace through `addrspacecast`, need to sext or trunc
+ // the offset calculated.
+ IntPtrTy = DL.getIntPtrType(V->getType())->getScalarType();
+ Offset = Offset.sextOrTrunc(IntPtrTy->getIntegerBitWidth());
Constant *OffsetIntPtr = ConstantInt::get(IntPtrTy, Offset);
if (V->getType()->isVectorTy())
@@ -1841,6 +1824,16 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
return Op1;
}
+ // This is a similar pattern used for checking if a value is a power-of-2:
+ // (A - 1) & A --> 0 (if A is a power-of-2 or 0)
+ // A & (A - 1) --> 0 (if A is a power-of-2 or 0)
+ if (match(Op0, m_Add(m_Specific(Op1), m_AllOnes())) &&
+ isKnownToBeAPowerOfTwo(Op1, Q.DL, /*OrZero*/ true, 0, Q.AC, Q.CxtI, Q.DT))
+ return Constant::getNullValue(Op1->getType());
+ if (match(Op1, m_Add(m_Specific(Op0), m_AllOnes())) &&
+ isKnownToBeAPowerOfTwo(Op0, Q.DL, /*OrZero*/ true, 0, Q.AC, Q.CxtI, Q.DT))
+ return Constant::getNullValue(Op0->getType());
+
if (Value *V = simplifyAndOrOfCmps(Q, Op0, Op1, true))
return V;
@@ -2280,12 +2273,12 @@ computePointerICmp(const DataLayout &DL, const TargetLibraryInfo *TLI,
// come from a pointer that cannot overlap with dynamically-allocated
// memory within the lifetime of the current function (allocas, byval
// arguments, globals), then determine the comparison result here.
- SmallVector<Value *, 8> LHSUObjs, RHSUObjs;
+ SmallVector<const Value *, 8> LHSUObjs, RHSUObjs;
GetUnderlyingObjects(LHS, LHSUObjs, DL);
GetUnderlyingObjects(RHS, RHSUObjs, DL);
// Is the set of underlying objects all noalias calls?
- auto IsNAC = [](ArrayRef<Value *> Objects) {
+ auto IsNAC = [](ArrayRef<const Value *> Objects) {
return all_of(Objects, isNoAliasCall);
};
@@ -2295,8 +2288,8 @@ computePointerICmp(const DataLayout &DL, const TargetLibraryInfo *TLI,
// live with the compared-to allocation). For globals, we exclude symbols
// that might be resolve lazily to symbols in another dynamically-loaded
// library (and, thus, could be malloc'ed by the implementation).
- auto IsAllocDisjoint = [](ArrayRef<Value *> Objects) {
- return all_of(Objects, [](Value *V) {
+ auto IsAllocDisjoint = [](ArrayRef<const Value *> Objects) {
+ return all_of(Objects, [](const Value *V) {
if (const AllocaInst *AI = dyn_cast<AllocaInst>(V))
return AI->getParent() && AI->getFunction() && AI->isStaticAlloca();
if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
@@ -2472,228 +2465,6 @@ static Value *simplifyICmpWithZero(CmpInst::Predicate Pred, Value *LHS,
return nullptr;
}
-/// Many binary operators with a constant operand have an easy-to-compute
-/// range of outputs. This can be used to fold a comparison to always true or
-/// always false.
-static void setLimitsForBinOp(BinaryOperator &BO, APInt &Lower, APInt &Upper,
- const InstrInfoQuery &IIQ) {
- unsigned Width = Lower.getBitWidth();
- const APInt *C;
- switch (BO.getOpcode()) {
- case Instruction::Add:
- if (match(BO.getOperand(1), m_APInt(C)) && !C->isNullValue()) {
- // FIXME: If we have both nuw and nsw, we should reduce the range further.
- if (IIQ.hasNoUnsignedWrap(cast<OverflowingBinaryOperator>(&BO))) {
- // 'add nuw x, C' produces [C, UINT_MAX].
- Lower = *C;
- } else if (IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(&BO))) {
- if (C->isNegative()) {
- // 'add nsw x, -C' produces [SINT_MIN, SINT_MAX - C].
- Lower = APInt::getSignedMinValue(Width);
- Upper = APInt::getSignedMaxValue(Width) + *C + 1;
- } else {
- // 'add nsw x, +C' produces [SINT_MIN + C, SINT_MAX].
- Lower = APInt::getSignedMinValue(Width) + *C;
- Upper = APInt::getSignedMaxValue(Width) + 1;
- }
- }
- }
- break;
-
- case Instruction::And:
- if (match(BO.getOperand(1), m_APInt(C)))
- // 'and x, C' produces [0, C].
- Upper = *C + 1;
- break;
-
- case Instruction::Or:
- if (match(BO.getOperand(1), m_APInt(C)))
- // 'or x, C' produces [C, UINT_MAX].
- Lower = *C;
- break;
-
- case Instruction::AShr:
- if (match(BO.getOperand(1), m_APInt(C)) && C->ult(Width)) {
- // 'ashr x, C' produces [INT_MIN >> C, INT_MAX >> C].
- Lower = APInt::getSignedMinValue(Width).ashr(*C);
- Upper = APInt::getSignedMaxValue(Width).ashr(*C) + 1;
- } else if (match(BO.getOperand(0), m_APInt(C))) {
- unsigned ShiftAmount = Width - 1;
- if (!C->isNullValue() && IIQ.isExact(&BO))
- ShiftAmount = C->countTrailingZeros();
- if (C->isNegative()) {
- // 'ashr C, x' produces [C, C >> (Width-1)]
- Lower = *C;
- Upper = C->ashr(ShiftAmount) + 1;
- } else {
- // 'ashr C, x' produces [C >> (Width-1), C]
- Lower = C->ashr(ShiftAmount);
- Upper = *C + 1;
- }
- }
- break;
-
- case Instruction::LShr:
- if (match(BO.getOperand(1), m_APInt(C)) && C->ult(Width)) {
- // 'lshr x, C' produces [0, UINT_MAX >> C].
- Upper = APInt::getAllOnesValue(Width).lshr(*C) + 1;
- } else if (match(BO.getOperand(0), m_APInt(C))) {
- // 'lshr C, x' produces [C >> (Width-1), C].
- unsigned ShiftAmount = Width - 1;
- if (!C->isNullValue() && IIQ.isExact(&BO))
- ShiftAmount = C->countTrailingZeros();
- Lower = C->lshr(ShiftAmount);
- Upper = *C + 1;
- }
- break;
-
- case Instruction::Shl:
- if (match(BO.getOperand(0), m_APInt(C))) {
- if (IIQ.hasNoUnsignedWrap(&BO)) {
- // 'shl nuw C, x' produces [C, C << CLZ(C)]
- Lower = *C;
- Upper = Lower.shl(Lower.countLeadingZeros()) + 1;
- } else if (BO.hasNoSignedWrap()) { // TODO: What if both nuw+nsw?
- if (C->isNegative()) {
- // 'shl nsw C, x' produces [C << CLO(C)-1, C]
- unsigned ShiftAmount = C->countLeadingOnes() - 1;
- Lower = C->shl(ShiftAmount);
- Upper = *C + 1;
- } else {
- // 'shl nsw C, x' produces [C, C << CLZ(C)-1]
- unsigned ShiftAmount = C->countLeadingZeros() - 1;
- Lower = *C;
- Upper = C->shl(ShiftAmount) + 1;
- }
- }
- }
- break;
-
- case Instruction::SDiv:
- if (match(BO.getOperand(1), m_APInt(C))) {
- APInt IntMin = APInt::getSignedMinValue(Width);
- APInt IntMax = APInt::getSignedMaxValue(Width);
- if (C->isAllOnesValue()) {
- // 'sdiv x, -1' produces [INT_MIN + 1, INT_MAX]
- // where C != -1 and C != 0 and C != 1
- Lower = IntMin + 1;
- Upper = IntMax + 1;
- } else if (C->countLeadingZeros() < Width - 1) {
- // 'sdiv x, C' produces [INT_MIN / C, INT_MAX / C]
- // where C != -1 and C != 0 and C != 1
- Lower = IntMin.sdiv(*C);
- Upper = IntMax.sdiv(*C);
- if (Lower.sgt(Upper))
- std::swap(Lower, Upper);
- Upper = Upper + 1;
- assert(Upper != Lower && "Upper part of range has wrapped!");
- }
- } else if (match(BO.getOperand(0), m_APInt(C))) {
- if (C->isMinSignedValue()) {
- // 'sdiv INT_MIN, x' produces [INT_MIN, INT_MIN / -2].
- Lower = *C;
- Upper = Lower.lshr(1) + 1;
- } else {
- // 'sdiv C, x' produces [-|C|, |C|].
- Upper = C->abs() + 1;
- Lower = (-Upper) + 1;
- }
- }
- break;
-
- case Instruction::UDiv:
- if (match(BO.getOperand(1), m_APInt(C)) && !C->isNullValue()) {
- // 'udiv x, C' produces [0, UINT_MAX / C].
- Upper = APInt::getMaxValue(Width).udiv(*C) + 1;
- } else if (match(BO.getOperand(0), m_APInt(C))) {
- // 'udiv C, x' produces [0, C].
- Upper = *C + 1;
- }
- break;
-
- case Instruction::SRem:
- if (match(BO.getOperand(1), m_APInt(C))) {
- // 'srem x, C' produces (-|C|, |C|).
- Upper = C->abs();
- Lower = (-Upper) + 1;
- }
- break;
-
- case Instruction::URem:
- if (match(BO.getOperand(1), m_APInt(C)))
- // 'urem x, C' produces [0, C).
- Upper = *C;
- break;
-
- default:
- break;
- }
-}
-
-/// Some intrinsics with a constant operand have an easy-to-compute range of
-/// outputs. This can be used to fold a comparison to always true or always
-/// false.
-static void setLimitsForIntrinsic(IntrinsicInst &II, APInt &Lower,
- APInt &Upper) {
- unsigned Width = Lower.getBitWidth();
- const APInt *C;
- switch (II.getIntrinsicID()) {
- case Intrinsic::uadd_sat:
- // uadd.sat(x, C) produces [C, UINT_MAX].
- if (match(II.getOperand(0), m_APInt(C)) ||
- match(II.getOperand(1), m_APInt(C)))
- Lower = *C;
- break;
- case Intrinsic::sadd_sat:
- if (match(II.getOperand(0), m_APInt(C)) ||
- match(II.getOperand(1), m_APInt(C))) {
- if (C->isNegative()) {
- // sadd.sat(x, -C) produces [SINT_MIN, SINT_MAX + (-C)].
- Lower = APInt::getSignedMinValue(Width);
- Upper = APInt::getSignedMaxValue(Width) + *C + 1;
- } else {
- // sadd.sat(x, +C) produces [SINT_MIN + C, SINT_MAX].
- Lower = APInt::getSignedMinValue(Width) + *C;
- Upper = APInt::getSignedMaxValue(Width) + 1;
- }
- }
- break;
- case Intrinsic::usub_sat:
- // usub.sat(C, x) produces [0, C].
- if (match(II.getOperand(0), m_APInt(C)))
- Upper = *C + 1;
- // usub.sat(x, C) produces [0, UINT_MAX - C].
- else if (match(II.getOperand(1), m_APInt(C)))
- Upper = APInt::getMaxValue(Width) - *C + 1;
- break;
- case Intrinsic::ssub_sat:
- if (match(II.getOperand(0), m_APInt(C))) {
- if (C->isNegative()) {
- // ssub.sat(-C, x) produces [SINT_MIN, -SINT_MIN + (-C)].
- Lower = APInt::getSignedMinValue(Width);
- Upper = *C - APInt::getSignedMinValue(Width) + 1;
- } else {
- // ssub.sat(+C, x) produces [-SINT_MAX + C, SINT_MAX].
- Lower = *C - APInt::getSignedMaxValue(Width);
- Upper = APInt::getSignedMaxValue(Width) + 1;
- }
- } else if (match(II.getOperand(1), m_APInt(C))) {
- if (C->isNegative()) {
- // ssub.sat(x, -C) produces [SINT_MIN - (-C), SINT_MAX]:
- Lower = APInt::getSignedMinValue(Width) - *C;
- Upper = APInt::getSignedMaxValue(Width) + 1;
- } else {
- // ssub.sat(x, +C) produces [SINT_MIN, SINT_MAX - C].
- Lower = APInt::getSignedMinValue(Width);
- Upper = APInt::getSignedMaxValue(Width) - *C + 1;
- }
- }
- break;
- default:
- break;
- }
-}
-
static Value *simplifyICmpWithConstant(CmpInst::Predicate Pred, Value *LHS,
Value *RHS, const InstrInfoQuery &IIQ) {
Type *ITy = GetCompareTy(RHS); // The return type.
@@ -2721,22 +2492,7 @@ static Value *simplifyICmpWithConstant(CmpInst::Predicate Pred, Value *LHS,
if (RHS_CR.isFullSet())
return ConstantInt::getTrue(ITy);
- // Find the range of possible values for binary operators.
- unsigned Width = C->getBitWidth();
- APInt Lower = APInt(Width, 0);
- APInt Upper = APInt(Width, 0);
- if (auto *BO = dyn_cast<BinaryOperator>(LHS))
- setLimitsForBinOp(*BO, Lower, Upper, IIQ);
- else if (auto *II = dyn_cast<IntrinsicInst>(LHS))
- setLimitsForIntrinsic(*II, Lower, Upper);
-
- ConstantRange LHS_CR =
- Lower != Upper ? ConstantRange(Lower, Upper) : ConstantRange(Width, true);
-
- if (auto *I = dyn_cast<Instruction>(LHS))
- if (auto *Ranges = IIQ.getMetadata(I, LLVMContext::MD_range))
- LHS_CR = LHS_CR.intersectWith(getConstantRangeFromMetadata(*Ranges));
-
+ ConstantRange LHS_CR = computeConstantRange(LHS, IIQ.UseInstrInfo);
if (!LHS_CR.isFullSet()) {
if (RHS_CR.contains(LHS_CR))
return ConstantInt::getTrue(ITy);
@@ -3062,44 +2818,6 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS,
return nullptr;
}
-static Value *simplifyICmpWithAbsNabs(CmpInst::Predicate Pred, Value *Op0,
- Value *Op1) {
- // We need a comparison with a constant.
- const APInt *C;
- if (!match(Op1, m_APInt(C)))
- return nullptr;
-
- // matchSelectPattern returns the negation part of an abs pattern in SP1.
- // If the negate has an NSW flag, abs(INT_MIN) is undefined. Without that
- // constraint, we can't make a contiguous range for the result of abs.
- ICmpInst::Predicate AbsPred = ICmpInst::BAD_ICMP_PREDICATE;
- Value *SP0, *SP1;
- SelectPatternFlavor SPF = matchSelectPattern(Op0, SP0, SP1).Flavor;
- if (SPF == SelectPatternFlavor::SPF_ABS &&
- cast<Instruction>(SP1)->hasNoSignedWrap())
- // The result of abs(X) is >= 0 (with nsw).
- AbsPred = ICmpInst::ICMP_SGE;
- if (SPF == SelectPatternFlavor::SPF_NABS)
- // The result of -abs(X) is <= 0.
- AbsPred = ICmpInst::ICMP_SLE;
-
- if (AbsPred == ICmpInst::BAD_ICMP_PREDICATE)
- return nullptr;
-
- // If there is no intersection between abs/nabs and the range of this icmp,
- // the icmp must be false. If the abs/nabs range is a subset of the icmp
- // range, the icmp must be true.
- APInt Zero = APInt::getNullValue(C->getBitWidth());
- ConstantRange AbsRange = ConstantRange::makeExactICmpRegion(AbsPred, Zero);
- ConstantRange CmpRange = ConstantRange::makeExactICmpRegion(Pred, *C);
- if (AbsRange.intersectWith(CmpRange).isEmptySet())
- return getFalse(GetCompareTy(Op0));
- if (CmpRange.contains(AbsRange))
- return getTrue(GetCompareTy(Op0));
-
- return nullptr;
-}
-
/// Simplify integer comparisons where at least one operand of the compare
/// matches an integer min/max idiom.
static Value *simplifyICmpWithMinMax(CmpInst::Predicate Pred, Value *LHS,
@@ -3319,9 +3037,16 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
std::swap(LHS, RHS);
Pred = CmpInst::getSwappedPredicate(Pred);
}
+ assert(!isa<UndefValue>(LHS) && "Unexpected icmp undef,%X");
Type *ITy = GetCompareTy(LHS); // The return type.
+ // For EQ and NE, we can always pick a value for the undef to make the
+ // predicate pass or fail, so we can return undef.
+ // Matches behavior in llvm::ConstantFoldCompareInstruction.
+ if (isa<UndefValue>(RHS) && ICmpInst::isEquality(Pred))
+ return UndefValue::get(ITy);
+
// icmp X, X -> true/false
// icmp X, undef -> true/false because undef could be X.
if (LHS == RHS || isa<UndefValue>(RHS))
@@ -3531,9 +3256,6 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
if (Value *V = simplifyICmpWithMinMax(Pred, LHS, RHS, Q, MaxRecurse))
return V;
- if (Value *V = simplifyICmpWithAbsNabs(Pred, LHS, RHS))
- return V;
-
// Simplify comparisons of related pointers using a powerful, recursive
// GEP-walk when we have target data available..
if (LHS->getType()->isPointerTy())
@@ -3647,6 +3369,8 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
}
// Handle fcmp with constant RHS.
+ // TODO: Use match with a specific FP value, so these work with vectors with
+ // undef lanes.
const APFloat *C;
if (match(RHS, m_APFloat(C))) {
// Check whether the constant is an infinity.
@@ -3675,28 +3399,7 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
}
}
}
- if (C->isZero()) {
- switch (Pred) {
- case FCmpInst::FCMP_OGE:
- if (FMF.noNaNs() && CannotBeOrderedLessThanZero(LHS, Q.TLI))
- return getTrue(RetTy);
- break;
- case FCmpInst::FCMP_UGE:
- if (CannotBeOrderedLessThanZero(LHS, Q.TLI))
- return getTrue(RetTy);
- break;
- case FCmpInst::FCMP_ULT:
- if (FMF.noNaNs() && CannotBeOrderedLessThanZero(LHS, Q.TLI))
- return getFalse(RetTy);
- break;
- case FCmpInst::FCMP_OLT:
- if (CannotBeOrderedLessThanZero(LHS, Q.TLI))
- return getFalse(RetTy);
- break;
- default:
- break;
- }
- } else if (C->isNegative()) {
+ if (C->isNegative() && !C->isNegZero()) {
assert(!C->isNaN() && "Unexpected NaN constant!");
// TODO: We can catch more cases by using a range check rather than
// relying on CannotBeOrderedLessThanZero.
@@ -3719,6 +3422,67 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
break;
}
}
+
+ // Check comparison of [minnum/maxnum with constant] with other constant.
+ const APFloat *C2;
+ if ((match(LHS, m_Intrinsic<Intrinsic::minnum>(m_Value(), m_APFloat(C2))) &&
+ C2->compare(*C) == APFloat::cmpLessThan) ||
+ (match(LHS, m_Intrinsic<Intrinsic::maxnum>(m_Value(), m_APFloat(C2))) &&
+ C2->compare(*C) == APFloat::cmpGreaterThan)) {
+ bool IsMaxNum =
+ cast<IntrinsicInst>(LHS)->getIntrinsicID() == Intrinsic::maxnum;
+ // The ordered relationship and minnum/maxnum guarantee that we do not
+ // have NaN constants, so ordered/unordered preds are handled the same.
+ switch (Pred) {
+ case FCmpInst::FCMP_OEQ: case FCmpInst::FCMP_UEQ:
+ // minnum(X, LesserC) == C --> false
+ // maxnum(X, GreaterC) == C --> false
+ return getFalse(RetTy);
+ case FCmpInst::FCMP_ONE: case FCmpInst::FCMP_UNE:
+ // minnum(X, LesserC) != C --> true
+ // maxnum(X, GreaterC) != C --> true
+ return getTrue(RetTy);
+ case FCmpInst::FCMP_OGE: case FCmpInst::FCMP_UGE:
+ case FCmpInst::FCMP_OGT: case FCmpInst::FCMP_UGT:
+ // minnum(X, LesserC) >= C --> false
+ // minnum(X, LesserC) > C --> false
+ // maxnum(X, GreaterC) >= C --> true
+ // maxnum(X, GreaterC) > C --> true
+ return ConstantInt::get(RetTy, IsMaxNum);
+ case FCmpInst::FCMP_OLE: case FCmpInst::FCMP_ULE:
+ case FCmpInst::FCMP_OLT: case FCmpInst::FCMP_ULT:
+ // minnum(X, LesserC) <= C --> true
+ // minnum(X, LesserC) < C --> true
+ // maxnum(X, GreaterC) <= C --> false
+ // maxnum(X, GreaterC) < C --> false
+ return ConstantInt::get(RetTy, !IsMaxNum);
+ default:
+ // TRUE/FALSE/ORD/UNO should be handled before this.
+ llvm_unreachable("Unexpected fcmp predicate");
+ }
+ }
+ }
+
+ if (match(RHS, m_AnyZeroFP())) {
+ switch (Pred) {
+ case FCmpInst::FCMP_OGE:
+ case FCmpInst::FCMP_ULT:
+ // Positive or zero X >= 0.0 --> true
+ // Positive or zero X < 0.0 --> false
+ if ((FMF.noNaNs() || isKnownNeverNaN(LHS, Q.TLI)) &&
+ CannotBeOrderedLessThanZero(LHS, Q.TLI))
+ return Pred == FCmpInst::FCMP_OGE ? getTrue(RetTy) : getFalse(RetTy);
+ break;
+ case FCmpInst::FCMP_UGE:
+ case FCmpInst::FCMP_OLT:
+ // Positive or zero or nan X >= 0.0 --> true
+ // Positive or zero or nan X < 0.0 --> false
+ if (CannotBeOrderedLessThanZero(LHS, Q.TLI))
+ return Pred == FCmpInst::FCMP_UGE ? getTrue(RetTy) : getFalse(RetTy);
+ break;
+ default:
+ break;
+ }
}
// If the comparison is with the result of a select instruction, check whether
@@ -3904,27 +3668,44 @@ static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal,
Pred == ICmpInst::ICMP_EQ))
return V;
- // Test for zero-shift-guard-ops around funnel shifts. These are used to
- // avoid UB from oversized shifts in raw IR rotate patterns, but the
- // intrinsics do not have that problem.
+ // Test for a bogus zero-shift-guard-op around funnel-shift or rotate.
Value *ShAmt;
auto isFsh = m_CombineOr(m_Intrinsic<Intrinsic::fshl>(m_Value(X), m_Value(),
m_Value(ShAmt)),
m_Intrinsic<Intrinsic::fshr>(m_Value(), m_Value(X),
m_Value(ShAmt)));
- // (ShAmt != 0) ? fshl(X, *, ShAmt) : X --> fshl(X, *, ShAmt)
- // (ShAmt != 0) ? fshr(*, X, ShAmt) : X --> fshr(*, X, ShAmt)
// (ShAmt == 0) ? fshl(X, *, ShAmt) : X --> X
// (ShAmt == 0) ? fshr(*, X, ShAmt) : X --> X
- if (match(TrueVal, isFsh) && FalseVal == X && CmpLHS == ShAmt)
- return Pred == ICmpInst::ICMP_NE ? TrueVal : X;
-
- // (ShAmt == 0) ? X : fshl(X, *, ShAmt) --> fshl(X, *, ShAmt)
- // (ShAmt == 0) ? X : fshr(*, X, ShAmt) --> fshr(*, X, ShAmt)
+ if (match(TrueVal, isFsh) && FalseVal == X && CmpLHS == ShAmt &&
+ Pred == ICmpInst::ICMP_EQ)
+ return X;
// (ShAmt != 0) ? X : fshl(X, *, ShAmt) --> X
// (ShAmt != 0) ? X : fshr(*, X, ShAmt) --> X
- if (match(FalseVal, isFsh) && TrueVal == X && CmpLHS == ShAmt)
- return Pred == ICmpInst::ICMP_EQ ? FalseVal : X;
+ if (match(FalseVal, isFsh) && TrueVal == X && CmpLHS == ShAmt &&
+ Pred == ICmpInst::ICMP_NE)
+ return X;
+
+ // Test for a zero-shift-guard-op around rotates. These are used to
+ // avoid UB from oversized shifts in raw IR rotate patterns, but the
+ // intrinsics do not have that problem.
+ // We do not allow this transform for the general funnel shift case because
+ // that would not preserve the poison safety of the original code.
+ auto isRotate = m_CombineOr(m_Intrinsic<Intrinsic::fshl>(m_Value(X),
+ m_Deferred(X),
+ m_Value(ShAmt)),
+ m_Intrinsic<Intrinsic::fshr>(m_Value(X),
+ m_Deferred(X),
+ m_Value(ShAmt)));
+ // (ShAmt != 0) ? fshl(X, X, ShAmt) : X --> fshl(X, X, ShAmt)
+ // (ShAmt != 0) ? fshr(X, X, ShAmt) : X --> fshr(X, X, ShAmt)
+ if (match(TrueVal, isRotate) && FalseVal == X && CmpLHS == ShAmt &&
+ Pred == ICmpInst::ICMP_NE)
+ return TrueVal;
+ // (ShAmt == 0) ? X : fshl(X, X, ShAmt) --> fshl(X, X, ShAmt)
+ // (ShAmt == 0) ? X : fshr(X, X, ShAmt) --> fshr(X, X, ShAmt)
+ if (match(FalseVal, isRotate) && TrueVal == X && CmpLHS == ShAmt &&
+ Pred == ICmpInst::ICMP_EQ)
+ return FalseVal;
}
// Check for other compares that behave like bit test.
@@ -4218,6 +3999,17 @@ Value *llvm::SimplifyInsertElementInst(Value *Vec, Value *Val, Value *Idx,
if (isa<UndefValue>(Idx))
return UndefValue::get(Vec->getType());
+ // Inserting an undef scalar? Assume it is the same value as the existing
+ // vector element.
+ if (isa<UndefValue>(Val))
+ return Vec;
+
+ // If we are extracting a value from a vector, then inserting it into the same
+ // place, that's the input vector:
+ // insertelt Vec, (extractelt Vec, Idx), Idx --> Vec
+ if (match(Val, m_ExtractElement(m_Specific(Vec), m_Specific(Idx))))
+ return Vec;
+
return nullptr;
}
@@ -4495,6 +4287,33 @@ Value *llvm::SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask,
return ::SimplifyShuffleVectorInst(Op0, Op1, Mask, RetTy, Q, RecursionLimit);
}
+static Constant *foldConstant(Instruction::UnaryOps Opcode,
+ Value *&Op, const SimplifyQuery &Q) {
+ if (auto *C = dyn_cast<Constant>(Op))
+ return ConstantFoldUnaryOpOperand(Opcode, C, Q.DL);
+ return nullptr;
+}
+
+/// Given the operand for an FNeg, see if we can fold the result. If not, this
+/// returns null.
+static Value *simplifyFNegInst(Value *Op, FastMathFlags FMF,
+ const SimplifyQuery &Q, unsigned MaxRecurse) {
+ if (Constant *C = foldConstant(Instruction::FNeg, Op, Q))
+ return C;
+
+ Value *X;
+ // fneg (fneg X) ==> X
+ if (match(Op, m_FNeg(m_Value(X))))
+ return X;
+
+ return nullptr;
+}
+
+Value *llvm::SimplifyFNegInst(Value *Op, FastMathFlags FMF,
+ const SimplifyQuery &Q) {
+ return ::simplifyFNegInst(Op, FMF, Q, RecursionLimit);
+}
+
static Constant *propagateNaN(Constant *In) {
// If the input is a vector with undef elements, just return a default NaN.
if (!In->isNaN())
@@ -4536,16 +4355,22 @@ static Value *SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
(FMF.noSignedZeros() || CannotBeNegativeZero(Op0, Q.TLI)))
return Op0;
- // With nnan: (+/-0.0 - X) + X --> 0.0 (and commuted variant)
+ // With nnan: -X + X --> 0.0 (and commuted variant)
// We don't have to explicitly exclude infinities (ninf): INF + -INF == NaN.
// Negative zeros are allowed because we always end up with positive zero:
// X = -0.0: (-0.0 - (-0.0)) + (-0.0) == ( 0.0) + (-0.0) == 0.0
// X = -0.0: ( 0.0 - (-0.0)) + (-0.0) == ( 0.0) + (-0.0) == 0.0
// X = 0.0: (-0.0 - ( 0.0)) + ( 0.0) == (-0.0) + ( 0.0) == 0.0
// X = 0.0: ( 0.0 - ( 0.0)) + ( 0.0) == ( 0.0) + ( 0.0) == 0.0
- if (FMF.noNaNs() && (match(Op0, m_FSub(m_AnyZeroFP(), m_Specific(Op1))) ||
- match(Op1, m_FSub(m_AnyZeroFP(), m_Specific(Op0)))))
- return ConstantFP::getNullValue(Op0->getType());
+ if (FMF.noNaNs()) {
+ if (match(Op0, m_FSub(m_AnyZeroFP(), m_Specific(Op1))) ||
+ match(Op1, m_FSub(m_AnyZeroFP(), m_Specific(Op0))))
+ return ConstantFP::getNullValue(Op0->getType());
+
+ if (match(Op0, m_FNeg(m_Specific(Op1))) ||
+ match(Op1, m_FNeg(m_Specific(Op0))))
+ return ConstantFP::getNullValue(Op0->getType());
+ }
// (X - Y) + Y --> X
// Y + (X - Y) --> X
@@ -4578,14 +4403,17 @@ static Value *SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF,
return Op0;
// fsub -0.0, (fsub -0.0, X) ==> X
+ // fsub -0.0, (fneg X) ==> X
Value *X;
if (match(Op0, m_NegZeroFP()) &&
- match(Op1, m_FSub(m_NegZeroFP(), m_Value(X))))
+ match(Op1, m_FNeg(m_Value(X))))
return X;
// fsub 0.0, (fsub 0.0, X) ==> X if signed zeros are ignored.
+ // fsub 0.0, (fneg X) ==> X if signed zeros are ignored.
if (FMF.noSignedZeros() && match(Op0, m_AnyZeroFP()) &&
- match(Op1, m_FSub(m_AnyZeroFP(), m_Value(X))))
+ (match(Op1, m_FSub(m_AnyZeroFP(), m_Value(X))) ||
+ match(Op1, m_FNeg(m_Value(X)))))
return X;
// fsub nnan x, x ==> 0.0
@@ -4722,6 +4550,42 @@ Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF,
//=== Helper functions for higher up the class hierarchy.
+/// Given the operand for a UnaryOperator, see if we can fold the result.
+/// If not, this returns null.
+static Value *simplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q,
+ unsigned MaxRecurse) {
+ switch (Opcode) {
+ case Instruction::FNeg:
+ return simplifyFNegInst(Op, FastMathFlags(), Q, MaxRecurse);
+ default:
+ llvm_unreachable("Unexpected opcode");
+ }
+}
+
+/// Given the operand for a UnaryOperator, see if we can fold the result.
+/// If not, this returns null.
+/// In contrast to SimplifyUnOp, try to use FastMathFlag when folding the
+/// result. In case we don't need FastMathFlags, simply fall to SimplifyUnOp.
+static Value *simplifyFPUnOp(unsigned Opcode, Value *Op,
+ const FastMathFlags &FMF,
+ const SimplifyQuery &Q, unsigned MaxRecurse) {
+ switch (Opcode) {
+ case Instruction::FNeg:
+ return simplifyFNegInst(Op, FMF, Q, MaxRecurse);
+ default:
+ return simplifyUnOp(Opcode, Op, Q, MaxRecurse);
+ }
+}
+
+Value *llvm::SimplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q) {
+ return ::simplifyUnOp(Opcode, Op, Q, RecursionLimit);
+}
+
+Value *llvm::SimplifyFPUnOp(unsigned Opcode, Value *Op, FastMathFlags FMF,
+ const SimplifyQuery &Q) {
+ return ::simplifyFPUnOp(Opcode, Op, FMF, Q, RecursionLimit);
+}
+
/// Given operands for a BinaryOperator, see if we can fold the result.
/// If not, this returns null.
static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
@@ -4885,22 +4749,6 @@ static Value *SimplifyRelativeLoad(Constant *Ptr, Constant *Offset,
return ConstantExpr::getBitCast(LoadedLHSPtr, Int8PtrTy);
}
-static bool maskIsAllZeroOrUndef(Value *Mask) {
- auto *ConstMask = dyn_cast<Constant>(Mask);
- if (!ConstMask)
- return false;
- if (ConstMask->isNullValue() || isa<UndefValue>(ConstMask))
- return true;
- for (unsigned I = 0, E = ConstMask->getType()->getVectorNumElements(); I != E;
- ++I) {
- if (auto *MaskElt = ConstMask->getAggregateElement(I))
- if (MaskElt->isNullValue() || isa<UndefValue>(MaskElt))
- continue;
- return false;
- }
- return true;
-}
-
static Value *simplifyUnaryIntrinsic(Function *F, Value *Op0,
const SimplifyQuery &Q) {
// Idempotent functions return the same result when called repeatedly.
@@ -4941,8 +4789,32 @@ static Value *simplifyUnaryIntrinsic(Function *F, Value *Op0,
case Intrinsic::log2:
// log2(exp2(x)) -> x
if (Q.CxtI->hasAllowReassoc() &&
- match(Op0, m_Intrinsic<Intrinsic::exp2>(m_Value(X)))) return X;
+ (match(Op0, m_Intrinsic<Intrinsic::exp2>(m_Value(X))) ||
+ match(Op0, m_Intrinsic<Intrinsic::pow>(m_SpecificFP(2.0),
+ m_Value(X))))) return X;
+ break;
+ case Intrinsic::log10:
+ // log10(pow(10.0, x)) -> x
+ if (Q.CxtI->hasAllowReassoc() &&
+ match(Op0, m_Intrinsic<Intrinsic::pow>(m_SpecificFP(10.0),
+ m_Value(X)))) return X;
break;
+ case Intrinsic::floor:
+ case Intrinsic::trunc:
+ case Intrinsic::ceil:
+ case Intrinsic::round:
+ case Intrinsic::nearbyint:
+ case Intrinsic::rint: {
+ // floor (sitofp x) -> sitofp x
+ // floor (uitofp x) -> uitofp x
+ //
+ // Converting from int always results in a finite integral number or
+ // infinity. For either of those inputs, these rounding functions always
+ // return the same value, so the rounding can be eliminated.
+ if (match(Op0, m_SIToFP(m_Value())) || match(Op0, m_UIToFP(m_Value())))
+ return Op0;
+ break;
+ }
default:
break;
}
@@ -4960,16 +4832,19 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1,
// X - X -> { 0, false }
if (Op0 == Op1)
return Constant::getNullValue(ReturnType);
- // X - undef -> undef
- // undef - X -> undef
- if (isa<UndefValue>(Op0) || isa<UndefValue>(Op1))
- return UndefValue::get(ReturnType);
- break;
+ LLVM_FALLTHROUGH;
case Intrinsic::uadd_with_overflow:
case Intrinsic::sadd_with_overflow:
- // X + undef -> undef
- if (isa<UndefValue>(Op0) || isa<UndefValue>(Op1))
- return UndefValue::get(ReturnType);
+ // X - undef -> { undef, false }
+ // undef - X -> { undef, false }
+ // X + undef -> { undef, false }
+ // undef + x -> { undef, false }
+ if (isa<UndefValue>(Op0) || isa<UndefValue>(Op1)) {
+ return ConstantStruct::get(
+ cast<StructType>(ReturnType),
+ {UndefValue::get(ReturnType->getStructElementType(0)),
+ Constant::getNullValue(ReturnType->getStructElementType(1))});
+ }
break;
case Intrinsic::umul_with_overflow:
case Intrinsic::smul_with_overflow:
@@ -5085,26 +4960,28 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1,
return nullptr;
}
-template <typename IterTy>
-static Value *simplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd,
- const SimplifyQuery &Q) {
+static Value *simplifyIntrinsic(CallBase *Call, const SimplifyQuery &Q) {
+
// Intrinsics with no operands have some kind of side effect. Don't simplify.
- unsigned NumOperands = std::distance(ArgBegin, ArgEnd);
- if (NumOperands == 0)
+ unsigned NumOperands = Call->getNumArgOperands();
+ if (!NumOperands)
return nullptr;
+ Function *F = cast<Function>(Call->getCalledFunction());
Intrinsic::ID IID = F->getIntrinsicID();
if (NumOperands == 1)
- return simplifyUnaryIntrinsic(F, ArgBegin[0], Q);
+ return simplifyUnaryIntrinsic(F, Call->getArgOperand(0), Q);
if (NumOperands == 2)
- return simplifyBinaryIntrinsic(F, ArgBegin[0], ArgBegin[1], Q);
+ return simplifyBinaryIntrinsic(F, Call->getArgOperand(0),
+ Call->getArgOperand(1), Q);
// Handle intrinsics with 3 or more arguments.
switch (IID) {
- case Intrinsic::masked_load: {
- Value *MaskArg = ArgBegin[2];
- Value *PassthruArg = ArgBegin[3];
+ case Intrinsic::masked_load:
+ case Intrinsic::masked_gather: {
+ Value *MaskArg = Call->getArgOperand(2);
+ Value *PassthruArg = Call->getArgOperand(3);
// If the mask is all zeros or undef, the "passthru" argument is the result.
if (maskIsAllZeroOrUndef(MaskArg))
return PassthruArg;
@@ -5112,7 +4989,8 @@ static Value *simplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd,
}
case Intrinsic::fshl:
case Intrinsic::fshr: {
- Value *Op0 = ArgBegin[0], *Op1 = ArgBegin[1], *ShAmtArg = ArgBegin[2];
+ Value *Op0 = Call->getArgOperand(0), *Op1 = Call->getArgOperand(1),
+ *ShAmtArg = Call->getArgOperand(2);
// If both operands are undef, the result is undef.
if (match(Op0, m_Undef()) && match(Op1, m_Undef()))
@@ -5120,15 +4998,14 @@ static Value *simplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd,
// If shift amount is undef, assume it is zero.
if (match(ShAmtArg, m_Undef()))
- return ArgBegin[IID == Intrinsic::fshl ? 0 : 1];
+ return Call->getArgOperand(IID == Intrinsic::fshl ? 0 : 1);
const APInt *ShAmtC;
if (match(ShAmtArg, m_APInt(ShAmtC))) {
// If there's effectively no shift, return the 1st arg or 2nd arg.
- // TODO: For vectors, we could check each element of a non-splat constant.
APInt BitWidth = APInt(ShAmtC->getBitWidth(), ShAmtC->getBitWidth());
if (ShAmtC->urem(BitWidth).isNullValue())
- return ArgBegin[IID == Intrinsic::fshl ? 0 : 1];
+ return Call->getArgOperand(IID == Intrinsic::fshl ? 0 : 1);
}
return nullptr;
}
@@ -5137,58 +5014,36 @@ static Value *simplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd,
}
}
-template <typename IterTy>
-static Value *SimplifyCall(ImmutableCallSite CS, Value *V, IterTy ArgBegin,
- IterTy ArgEnd, const SimplifyQuery &Q,
- unsigned MaxRecurse) {
- Type *Ty = V->getType();
- if (PointerType *PTy = dyn_cast<PointerType>(Ty))
- Ty = PTy->getElementType();
- FunctionType *FTy = cast<FunctionType>(Ty);
+Value *llvm::SimplifyCall(CallBase *Call, const SimplifyQuery &Q) {
+ Value *Callee = Call->getCalledValue();
// call undef -> undef
// call null -> undef
- if (isa<UndefValue>(V) || isa<ConstantPointerNull>(V))
- return UndefValue::get(FTy->getReturnType());
+ if (isa<UndefValue>(Callee) || isa<ConstantPointerNull>(Callee))
+ return UndefValue::get(Call->getType());
- Function *F = dyn_cast<Function>(V);
+ Function *F = dyn_cast<Function>(Callee);
if (!F)
return nullptr;
if (F->isIntrinsic())
- if (Value *Ret = simplifyIntrinsic(F, ArgBegin, ArgEnd, Q))
+ if (Value *Ret = simplifyIntrinsic(Call, Q))
return Ret;
- if (!canConstantFoldCallTo(CS, F))
+ if (!canConstantFoldCallTo(Call, F))
return nullptr;
SmallVector<Constant *, 4> ConstantArgs;
- ConstantArgs.reserve(ArgEnd - ArgBegin);
- for (IterTy I = ArgBegin, E = ArgEnd; I != E; ++I) {
- Constant *C = dyn_cast<Constant>(*I);
+ unsigned NumArgs = Call->getNumArgOperands();
+ ConstantArgs.reserve(NumArgs);
+ for (auto &Arg : Call->args()) {
+ Constant *C = dyn_cast<Constant>(&Arg);
if (!C)
return nullptr;
ConstantArgs.push_back(C);
}
- return ConstantFoldCall(CS, F, ConstantArgs, Q.TLI);
-}
-
-Value *llvm::SimplifyCall(ImmutableCallSite CS, Value *V,
- User::op_iterator ArgBegin, User::op_iterator ArgEnd,
- const SimplifyQuery &Q) {
- return ::SimplifyCall(CS, V, ArgBegin, ArgEnd, Q, RecursionLimit);
-}
-
-Value *llvm::SimplifyCall(ImmutableCallSite CS, Value *V,
- ArrayRef<Value *> Args, const SimplifyQuery &Q) {
- return ::SimplifyCall(CS, V, Args.begin(), Args.end(), Q, RecursionLimit);
-}
-
-Value *llvm::SimplifyCall(ImmutableCallSite ICS, const SimplifyQuery &Q) {
- CallSite CS(const_cast<Instruction*>(ICS.getInstruction()));
- return ::SimplifyCall(CS, CS.getCalledValue(), CS.arg_begin(), CS.arg_end(),
- Q, RecursionLimit);
+ return ConstantFoldCall(Call, F, ConstantArgs, Q.TLI);
}
/// See if we can compute a simplified version of this instruction.
@@ -5203,6 +5058,9 @@ Value *llvm::SimplifyInstruction(Instruction *I, const SimplifyQuery &SQ,
default:
Result = ConstantFoldInstruction(I, Q.DL, Q.TLI);
break;
+ case Instruction::FNeg:
+ Result = SimplifyFNegInst(I->getOperand(0), I->getFastMathFlags(), Q);
+ break;
case Instruction::FAdd:
Result = SimplifyFAddInst(I->getOperand(0), I->getOperand(1),
I->getFastMathFlags(), Q);
@@ -5327,8 +5185,7 @@ Value *llvm::SimplifyInstruction(Instruction *I, const SimplifyQuery &SQ,
Result = SimplifyPHINode(cast<PHINode>(I), Q);
break;
case Instruction::Call: {
- CallSite CS(cast<CallInst>(I));
- Result = SimplifyCall(CS, Q);
+ Result = SimplifyCall(cast<CallInst>(I), Q);
break;
}
#define HANDLE_CAST_INST(num, opc, clas) case Instruction::opc:
diff --git a/lib/Analysis/Interval.cpp b/lib/Analysis/Interval.cpp
index 6d5de22cb93f..07d6e27c13be 100644
--- a/lib/Analysis/Interval.cpp
+++ b/lib/Analysis/Interval.cpp
@@ -1,9 +1,8 @@
//===- Interval.cpp - Interval class code ---------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Analysis/IntervalPartition.cpp b/lib/Analysis/IntervalPartition.cpp
index c777d91b67c6..d12db010db6a 100644
--- a/lib/Analysis/IntervalPartition.cpp
+++ b/lib/Analysis/IntervalPartition.cpp
@@ -1,9 +1,8 @@
//===- IntervalPartition.cpp - Interval Partition module code -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Analysis/IteratedDominanceFrontier.cpp b/lib/Analysis/IteratedDominanceFrontier.cpp
deleted file mode 100644
index 000fe5ddad54..000000000000
--- a/lib/Analysis/IteratedDominanceFrontier.cpp
+++ /dev/null
@@ -1,110 +0,0 @@
-//===- IteratedDominanceFrontier.cpp - Compute IDF ------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Compute iterated dominance frontiers using a linear time algorithm.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Analysis/IteratedDominanceFrontier.h"
-#include "llvm/IR/CFG.h"
-#include "llvm/IR/Dominators.h"
-#include <queue>
-
-namespace llvm {
-
-template <class NodeTy, bool IsPostDom>
-void IDFCalculator<NodeTy, IsPostDom>::calculate(
- SmallVectorImpl<BasicBlock *> &PHIBlocks) {
- // Use a priority queue keyed on dominator tree level so that inserted nodes
- // are handled from the bottom of the dominator tree upwards. We also augment
- // the level with a DFS number to ensure that the blocks are ordered in a
- // deterministic way.
- typedef std::pair<DomTreeNode *, std::pair<unsigned, unsigned>>
- DomTreeNodePair;
- typedef std::priority_queue<DomTreeNodePair, SmallVector<DomTreeNodePair, 32>,
- less_second> IDFPriorityQueue;
- IDFPriorityQueue PQ;
-
- DT.updateDFSNumbers();
-
- for (BasicBlock *BB : *DefBlocks) {
- if (DomTreeNode *Node = DT.getNode(BB))
- PQ.push({Node, std::make_pair(Node->getLevel(), Node->getDFSNumIn())});
- }
-
- SmallVector<DomTreeNode *, 32> Worklist;
- SmallPtrSet<DomTreeNode *, 32> VisitedPQ;
- SmallPtrSet<DomTreeNode *, 32> VisitedWorklist;
-
- while (!PQ.empty()) {
- DomTreeNodePair RootPair = PQ.top();
- PQ.pop();
- DomTreeNode *Root = RootPair.first;
- unsigned RootLevel = RootPair.second.first;
-
- // Walk all dominator tree children of Root, inspecting their CFG edges with
- // targets elsewhere on the dominator tree. Only targets whose level is at
- // most Root's level are added to the iterated dominance frontier of the
- // definition set.
-
- Worklist.clear();
- Worklist.push_back(Root);
- VisitedWorklist.insert(Root);
-
- while (!Worklist.empty()) {
- DomTreeNode *Node = Worklist.pop_back_val();
- BasicBlock *BB = Node->getBlock();
- // Succ is the successor in the direction we are calculating IDF, so it is
- // successor for IDF, and predecessor for Reverse IDF.
- auto DoWork = [&](BasicBlock *Succ) {
- DomTreeNode *SuccNode = DT.getNode(Succ);
-
- // Quickly skip all CFG edges that are also dominator tree edges instead
- // of catching them below.
- if (SuccNode->getIDom() == Node)
- return;
-
- const unsigned SuccLevel = SuccNode->getLevel();
- if (SuccLevel > RootLevel)
- return;
-
- if (!VisitedPQ.insert(SuccNode).second)
- return;
-
- BasicBlock *SuccBB = SuccNode->getBlock();
- if (useLiveIn && !LiveInBlocks->count(SuccBB))
- return;
-
- PHIBlocks.emplace_back(SuccBB);
- if (!DefBlocks->count(SuccBB))
- PQ.push(std::make_pair(
- SuccNode, std::make_pair(SuccLevel, SuccNode->getDFSNumIn())));
- };
-
- if (GD) {
- for (auto Pair : children<
- std::pair<const GraphDiff<BasicBlock *, IsPostDom> *, NodeTy>>(
- {GD, BB}))
- DoWork(Pair.second);
- } else {
- for (auto *Succ : children<NodeTy>(BB))
- DoWork(Succ);
- }
-
- for (auto DomChild : *Node) {
- if (VisitedWorklist.insert(DomChild).second)
- Worklist.push_back(DomChild);
- }
- }
- }
-}
-
-template class IDFCalculator<BasicBlock *, false>;
-template class IDFCalculator<Inverse<BasicBlock *>, true>;
-}
diff --git a/lib/Analysis/LazyBlockFrequencyInfo.cpp b/lib/Analysis/LazyBlockFrequencyInfo.cpp
index 93c23bca96af..439758560284 100644
--- a/lib/Analysis/LazyBlockFrequencyInfo.cpp
+++ b/lib/Analysis/LazyBlockFrequencyInfo.cpp
@@ -1,9 +1,8 @@
//===- LazyBlockFrequencyInfo.cpp - Lazy Block Frequency Analysis ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Analysis/LazyBranchProbabilityInfo.cpp b/lib/Analysis/LazyBranchProbabilityInfo.cpp
index 429b78c3a47e..f2592c26b373 100644
--- a/lib/Analysis/LazyBranchProbabilityInfo.cpp
+++ b/lib/Analysis/LazyBranchProbabilityInfo.cpp
@@ -1,9 +1,8 @@
//===- LazyBranchProbabilityInfo.cpp - Lazy Branch Probability Analysis ---===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Analysis/LazyCallGraph.cpp b/lib/Analysis/LazyCallGraph.cpp
index 3f22ada803c9..797fcf516429 100644
--- a/lib/Analysis/LazyCallGraph.cpp
+++ b/lib/Analysis/LazyCallGraph.cpp
@@ -1,9 +1,8 @@
//===- LazyCallGraph.cpp - Analysis of a Module's call graph --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -173,6 +172,19 @@ LazyCallGraph::LazyCallGraph(Module &M, TargetLibraryInfo &TLI) {
addEdge(EntryEdges.Edges, EntryEdges.EdgeIndexMap, get(F), Edge::Ref);
}
+ // Externally visible aliases of internal functions are also viable entry
+ // edges to the module.
+ for (auto &A : M.aliases()) {
+ if (A.hasLocalLinkage())
+ continue;
+ if (Function* F = dyn_cast<Function>(A.getAliasee())) {
+ LLVM_DEBUG(dbgs() << " Adding '" << F->getName()
+ << "' with alias '" << A.getName()
+ << "' to entry set of the graph.\n");
+ addEdge(EntryEdges.Edges, EntryEdges.EdgeIndexMap, get(*F), Edge::Ref);
+ }
+ }
+
// Now add entry nodes for functions reachable via initializers to globals.
SmallVector<Constant *, 16> Worklist;
SmallPtrSet<Constant *, 16> Visited;
diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp
index 110c085d3f35..542ff709d475 100644
--- a/lib/Analysis/LazyValueInfo.cpp
+++ b/lib/Analysis/LazyValueInfo.cpp
@@ -1,9 +1,8 @@
//===- LazyValueInfo.cpp - Value constraint analysis ------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -423,10 +422,18 @@ namespace {
BasicBlock *BB);
Optional<ConstantRange> getRangeForOperand(unsigned Op, Instruction *I,
BasicBlock *BB);
+ bool solveBlockValueBinaryOpImpl(
+ ValueLatticeElement &BBLV, Instruction *I, BasicBlock *BB,
+ std::function<ConstantRange(const ConstantRange &,
+ const ConstantRange &)> OpFn);
bool solveBlockValueBinaryOp(ValueLatticeElement &BBLV, BinaryOperator *BBI,
BasicBlock *BB);
bool solveBlockValueCast(ValueLatticeElement &BBLV, CastInst *CI,
BasicBlock *BB);
+ bool solveBlockValueOverflowIntrinsic(
+ ValueLatticeElement &BBLV, WithOverflowInst *WO, BasicBlock *BB);
+ bool solveBlockValueIntrinsic(ValueLatticeElement &BBLV, IntrinsicInst *II,
+ BasicBlock *BB);
void intersectAssumeOrGuardBlockValueConstantRange(Value *Val,
ValueLatticeElement &BBLV,
Instruction *BBI);
@@ -625,7 +632,7 @@ bool LazyValueInfoImpl::solveBlockValueImpl(ValueLatticeElement &Res,
// and the like to prove non-nullness, but it's not clear that's worth it
// compile time wise. The context-insensitive value walk done inside
// isKnownNonZero gets most of the profitable cases at much less expense.
- // This does mean that we have a sensativity to where the defining
+ // This does mean that we have a sensitivity to where the defining
// instruction is placed, even if it could legally be hoisted much higher.
// That is unfortunate.
PointerType *PT = dyn_cast<PointerType>(BBI->getType());
@@ -639,6 +646,14 @@ bool LazyValueInfoImpl::solveBlockValueImpl(ValueLatticeElement &Res,
if (BinaryOperator *BO = dyn_cast<BinaryOperator>(BBI))
return solveBlockValueBinaryOp(Res, BO, BB);
+
+ if (auto *EVI = dyn_cast<ExtractValueInst>(BBI))
+ if (auto *WO = dyn_cast<WithOverflowInst>(EVI->getAggregateOperand()))
+ if (EVI->getNumIndices() == 1 && *EVI->idx_begin() == 0)
+ return solveBlockValueOverflowIntrinsic(Res, WO, BB);
+
+ if (auto *II = dyn_cast<IntrinsicInst>(BBI))
+ return solveBlockValueIntrinsic(Res, II, BB);
}
LLVM_DEBUG(dbgs() << " compute BB '" << BB->getName()
@@ -824,7 +839,9 @@ void LazyValueInfoImpl::intersectAssumeOrGuardBlockValueConstantRange(
if (!GuardDecl || GuardDecl->use_empty())
return;
- for (Instruction &I : make_range(BBI->getIterator().getReverse(),
+ if (BBI->getIterator() == BBI->getParent()->begin())
+ return;
+ for (Instruction &I : make_range(std::next(BBI->getIterator().getReverse()),
BBI->getParent()->rend())) {
Value *Cond = nullptr;
if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(Cond))))
@@ -892,7 +909,28 @@ bool LazyValueInfoImpl::solveBlockValueSelect(ValueLatticeElement &BBLV,
return true;
}
- // TODO: ABS, NABS from the SelectPatternResult
+ if (SPR.Flavor == SPF_ABS) {
+ if (LHS == SI->getTrueValue()) {
+ BBLV = ValueLatticeElement::getRange(TrueCR.abs());
+ return true;
+ }
+ if (LHS == SI->getFalseValue()) {
+ BBLV = ValueLatticeElement::getRange(FalseCR.abs());
+ return true;
+ }
+ }
+
+ if (SPR.Flavor == SPF_NABS) {
+ ConstantRange Zero(APInt::getNullValue(TrueCR.getBitWidth()));
+ if (LHS == SI->getTrueValue()) {
+ BBLV = ValueLatticeElement::getRange(Zero.sub(TrueCR.abs()));
+ return true;
+ }
+ if (LHS == SI->getFalseValue()) {
+ BBLV = ValueLatticeElement::getRange(Zero.sub(FalseCR.abs()));
+ return true;
+ }
+ }
}
// Can we constrain the facts about the true and false values by using the
@@ -962,7 +1000,7 @@ Optional<ConstantRange> LazyValueInfoImpl::getRangeForOperand(unsigned Op,
const unsigned OperandBitWidth =
DL.getTypeSizeInBits(I->getOperand(Op)->getType());
- ConstantRange Range = ConstantRange(OperandBitWidth);
+ ConstantRange Range = ConstantRange::getFull(OperandBitWidth);
if (hasBlockValue(I->getOperand(Op), BB)) {
ValueLatticeElement Val = getBlockValue(I->getOperand(Op), BB);
intersectAssumeOrGuardBlockValueConstantRange(I->getOperand(Op), Val, I);
@@ -1018,56 +1056,83 @@ bool LazyValueInfoImpl::solveBlockValueCast(ValueLatticeElement &BBLV,
return true;
}
+bool LazyValueInfoImpl::solveBlockValueBinaryOpImpl(
+ ValueLatticeElement &BBLV, Instruction *I, BasicBlock *BB,
+ std::function<ConstantRange(const ConstantRange &,
+ const ConstantRange &)> OpFn) {
+ // Figure out the ranges of the operands. If that fails, use a
+ // conservative range, but apply the transfer rule anyways. This
+ // lets us pick up facts from expressions like "and i32 (call i32
+ // @foo()), 32"
+ Optional<ConstantRange> LHSRes = getRangeForOperand(0, I, BB);
+ Optional<ConstantRange> RHSRes = getRangeForOperand(1, I, BB);
+ if (!LHSRes.hasValue() || !RHSRes.hasValue())
+ // More work to do before applying this transfer rule.
+ return false;
+
+ ConstantRange LHSRange = LHSRes.getValue();
+ ConstantRange RHSRange = RHSRes.getValue();
+ BBLV = ValueLatticeElement::getRange(OpFn(LHSRange, RHSRange));
+ return true;
+}
+
bool LazyValueInfoImpl::solveBlockValueBinaryOp(ValueLatticeElement &BBLV,
BinaryOperator *BO,
BasicBlock *BB) {
assert(BO->getOperand(0)->getType()->isSized() &&
"all operands to binary operators are sized");
-
- // Filter out operators we don't know how to reason about before attempting to
- // recurse on our operand(s). This can cut a long search short if we know
- // we're not going to be able to get any useful information anyways.
- switch (BO->getOpcode()) {
- case Instruction::Add:
- case Instruction::Sub:
- case Instruction::Mul:
- case Instruction::UDiv:
- case Instruction::Shl:
- case Instruction::LShr:
- case Instruction::AShr:
- case Instruction::And:
- case Instruction::Or:
- // continue into the code below
- break;
- default:
- // Unhandled instructions are overdefined.
+ if (BO->getOpcode() == Instruction::Xor) {
+ // Xor is the only operation not supported by ConstantRange::binaryOp().
LLVM_DEBUG(dbgs() << " compute BB '" << BB->getName()
<< "' - overdefined (unknown binary operator).\n");
BBLV = ValueLatticeElement::getOverdefined();
return true;
- };
-
- // Figure out the ranges of the operands. If that fails, use a
- // conservative range, but apply the transfer rule anyways. This
- // lets us pick up facts from expressions like "and i32 (call i32
- // @foo()), 32"
- Optional<ConstantRange> LHSRes = getRangeForOperand(0, BO, BB);
- Optional<ConstantRange> RHSRes = getRangeForOperand(1, BO, BB);
+ }
- if (!LHSRes.hasValue() || !RHSRes.hasValue())
- // More work to do before applying this transfer rule.
- return false;
+ return solveBlockValueBinaryOpImpl(BBLV, BO, BB,
+ [BO](const ConstantRange &CR1, const ConstantRange &CR2) {
+ return CR1.binaryOp(BO->getOpcode(), CR2);
+ });
+}
- ConstantRange LHSRange = LHSRes.getValue();
- ConstantRange RHSRange = RHSRes.getValue();
+bool LazyValueInfoImpl::solveBlockValueOverflowIntrinsic(
+ ValueLatticeElement &BBLV, WithOverflowInst *WO, BasicBlock *BB) {
+ return solveBlockValueBinaryOpImpl(BBLV, WO, BB,
+ [WO](const ConstantRange &CR1, const ConstantRange &CR2) {
+ return CR1.binaryOp(WO->getBinaryOp(), CR2);
+ });
+}
- // NOTE: We're currently limited by the set of operations that ConstantRange
- // can evaluate symbolically. Enhancing that set will allows us to analyze
- // more definitions.
- Instruction::BinaryOps BinOp = BO->getOpcode();
- BBLV = ValueLatticeElement::getRange(LHSRange.binaryOp(BinOp, RHSRange));
- return true;
+bool LazyValueInfoImpl::solveBlockValueIntrinsic(
+ ValueLatticeElement &BBLV, IntrinsicInst *II, BasicBlock *BB) {
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::uadd_sat:
+ return solveBlockValueBinaryOpImpl(BBLV, II, BB,
+ [](const ConstantRange &CR1, const ConstantRange &CR2) {
+ return CR1.uadd_sat(CR2);
+ });
+ case Intrinsic::usub_sat:
+ return solveBlockValueBinaryOpImpl(BBLV, II, BB,
+ [](const ConstantRange &CR1, const ConstantRange &CR2) {
+ return CR1.usub_sat(CR2);
+ });
+ case Intrinsic::sadd_sat:
+ return solveBlockValueBinaryOpImpl(BBLV, II, BB,
+ [](const ConstantRange &CR1, const ConstantRange &CR2) {
+ return CR1.sadd_sat(CR2);
+ });
+ case Intrinsic::ssub_sat:
+ return solveBlockValueBinaryOpImpl(BBLV, II, BB,
+ [](const ConstantRange &CR1, const ConstantRange &CR2) {
+ return CR1.ssub_sat(CR2);
+ });
+ default:
+ LLVM_DEBUG(dbgs() << " compute BB '" << BB->getName()
+ << "' - overdefined (unknown intrinsic).\n");
+ BBLV = ValueLatticeElement::getOverdefined();
+ return true;
+ }
}
static ValueLatticeElement getValueFromICmpCondition(Value *Val, ICmpInst *ICI,
@@ -1133,6 +1198,28 @@ static ValueLatticeElement getValueFromICmpCondition(Value *Val, ICmpInst *ICI,
return ValueLatticeElement::getOverdefined();
}
+// Handle conditions of the form
+// extractvalue(op.with.overflow(%x, C), 1).
+static ValueLatticeElement getValueFromOverflowCondition(
+ Value *Val, WithOverflowInst *WO, bool IsTrueDest) {
+ // TODO: This only works with a constant RHS for now. We could also compute
+ // the range of the RHS, but this doesn't fit into the current structure of
+ // the edge value calculation.
+ const APInt *C;
+ if (WO->getLHS() != Val || !match(WO->getRHS(), m_APInt(C)))
+ return ValueLatticeElement::getOverdefined();
+
+ // Calculate the possible values of %x for which no overflow occurs.
+ ConstantRange NWR = ConstantRange::makeExactNoWrapRegion(
+ WO->getBinaryOp(), *C, WO->getNoWrapKind());
+
+ // If overflow is false, %x is constrained to NWR. If overflow is true, %x is
+ // constrained to it's inverse (all values that might cause overflow).
+ if (IsTrueDest)
+ NWR = NWR.inverse();
+ return ValueLatticeElement::getRange(NWR);
+}
+
static ValueLatticeElement
getValueFromCondition(Value *Val, Value *Cond, bool isTrueDest,
DenseMap<Value*, ValueLatticeElement> &Visited);
@@ -1143,6 +1230,11 @@ getValueFromConditionImpl(Value *Val, Value *Cond, bool isTrueDest,
if (ICmpInst *ICI = dyn_cast<ICmpInst>(Cond))
return getValueFromICmpCondition(Val, ICI, isTrueDest);
+ if (auto *EVI = dyn_cast<ExtractValueInst>(Cond))
+ if (auto *WO = dyn_cast<WithOverflowInst>(EVI->getAggregateOperand()))
+ if (EVI->getNumIndices() == 1 && *EVI->idx_begin() == 1)
+ return getValueFromOverflowCondition(Val, WO, isTrueDest);
+
// Handle conditions in the form of (cond1 && cond2), we know that on the
// true dest path both of the conditions hold. Similarly for conditions of
// the form (cond1 || cond2), we know that on the false dest path neither
@@ -1575,14 +1667,14 @@ ConstantRange LazyValueInfo::getConstantRange(Value *V, BasicBlock *BB,
ValueLatticeElement Result =
getImpl(PImpl, AC, &DL, DT).getValueInBlock(V, BB, CxtI);
if (Result.isUndefined())
- return ConstantRange(Width, /*isFullSet=*/false);
+ return ConstantRange::getEmpty(Width);
if (Result.isConstantRange())
return Result.getConstantRange();
// We represent ConstantInt constants as constant ranges but other kinds
// of integer constants, i.e. ConstantExpr will be tagged as constants
assert(!(Result.isConstant() && isa<ConstantInt>(Result.getConstant())) &&
"ConstantInt value must be represented as constantrange");
- return ConstantRange(Width, /*isFullSet=*/true);
+ return ConstantRange::getFull(Width);
}
/// Determine whether the specified value is known to be a
@@ -1614,14 +1706,14 @@ ConstantRange LazyValueInfo::getConstantRangeOnEdge(Value *V,
getImpl(PImpl, AC, &DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI);
if (Result.isUndefined())
- return ConstantRange(Width, /*isFullSet=*/false);
+ return ConstantRange::getEmpty(Width);
if (Result.isConstantRange())
return Result.getConstantRange();
// We represent ConstantInt constants as constant ranges but other kinds
// of integer constants, i.e. ConstantExpr will be tagged as constants
assert(!(Result.isConstant() && isa<ConstantInt>(Result.getConstant())) &&
"ConstantInt value must be represented as constantrange");
- return ConstantRange(Width, /*isFullSet=*/true);
+ return ConstantRange::getFull(Width);
}
static LazyValueInfo::Tristate
@@ -1711,7 +1803,7 @@ LazyValueInfo::getPredicateAt(unsigned Pred, Value *V, Constant *C,
// through would still be correct.
const DataLayout &DL = CxtI->getModule()->getDataLayout();
if (V->getType()->isPointerTy() && C->isNullValue() &&
- isKnownNonZero(V->stripPointerCasts(), DL)) {
+ isKnownNonZero(V->stripPointerCastsSameRepresentation(), DL)) {
if (Pred == ICmpInst::ICMP_EQ)
return LazyValueInfo::False;
else if (Pred == ICmpInst::ICMP_NE)
diff --git a/lib/Analysis/LegacyDivergenceAnalysis.cpp b/lib/Analysis/LegacyDivergenceAnalysis.cpp
index 5540859ebdda..52212e1c42aa 100644
--- a/lib/Analysis/LegacyDivergenceAnalysis.cpp
+++ b/lib/Analysis/LegacyDivergenceAnalysis.cpp
@@ -1,10 +1,9 @@
//===- LegacyDivergenceAnalysis.cpp --------- Legacy Divergence Analysis
//Implementation -==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp
index 5d0a627f8426..d28b8a189d4b 100644
--- a/lib/Analysis/Lint.cpp
+++ b/lib/Analysis/Lint.cpp
@@ -1,9 +1,8 @@
//===-- Lint.cpp - Check for common errors in LLVM IR ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -268,10 +267,14 @@ void Lint::visitCallSite(CallSite CS) {
if (Formal->hasNoAliasAttr() && Actual->getType()->isPointerTy()) {
AttributeList PAL = CS.getAttributes();
unsigned ArgNo = 0;
- for (CallSite::arg_iterator BI = CS.arg_begin(); BI != AE; ++BI) {
+ for (CallSite::arg_iterator BI = CS.arg_begin(); BI != AE;
+ ++BI, ++ArgNo) {
// Skip ByVal arguments since they will be memcpy'd to the callee's
// stack so we're not really passing the pointer anyway.
- if (PAL.hasParamAttribute(ArgNo++, Attribute::ByVal))
+ if (PAL.hasParamAttribute(ArgNo, Attribute::ByVal))
+ continue;
+ // If both arguments are readonly, they have no dependence.
+ if (Formal->onlyReadsMemory() && CS.onlyReadsMemory(ArgNo))
continue;
if (AI != BI && (*BI)->getType()->isPointerTy()) {
AliasResult Result = AA->alias(*AI, *BI);
diff --git a/lib/Analysis/Loads.cpp b/lib/Analysis/Loads.cpp
index 8129795bc0c1..31da4e9ec783 100644
--- a/lib/Analysis/Loads.cpp
+++ b/lib/Analysis/Loads.cpp
@@ -1,9 +1,8 @@
//===- Loads.cpp - Local load analysis ------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -126,7 +125,8 @@ bool llvm::isDereferenceableAndAlignedPointer(const Value *V, unsigned Align,
Visited);
}
-bool llvm::isDereferenceableAndAlignedPointer(const Value *V, unsigned Align,
+bool llvm::isDereferenceableAndAlignedPointer(const Value *V, Type *Ty,
+ unsigned Align,
const DataLayout &DL,
const Instruction *CtxI,
const DominatorTree *DT) {
@@ -134,8 +134,6 @@ bool llvm::isDereferenceableAndAlignedPointer(const Value *V, unsigned Align,
// attribute, we know exactly how many bytes are dereferenceable. If we can
// determine the exact offset to the attributed variable, we can use that
// information here.
- Type *VTy = V->getType();
- Type *Ty = VTy->getPointerElementType();
// Require ABI alignment for loads without alignment specification
if (Align == 0)
@@ -146,14 +144,16 @@ bool llvm::isDereferenceableAndAlignedPointer(const Value *V, unsigned Align,
SmallPtrSet<const Value *, 32> Visited;
return ::isDereferenceableAndAlignedPointer(
- V, Align, APInt(DL.getIndexTypeSizeInBits(VTy), DL.getTypeStoreSize(Ty)), DL,
- CtxI, DT, Visited);
+ V, Align,
+ APInt(DL.getIndexTypeSizeInBits(V->getType()), DL.getTypeStoreSize(Ty)),
+ DL, CtxI, DT, Visited);
}
-bool llvm::isDereferenceablePointer(const Value *V, const DataLayout &DL,
+bool llvm::isDereferenceablePointer(const Value *V, Type *Ty,
+ const DataLayout &DL,
const Instruction *CtxI,
const DominatorTree *DT) {
- return isDereferenceableAndAlignedPointer(V, 1, DL, CtxI, DT);
+ return isDereferenceableAndAlignedPointer(V, Ty, 1, DL, CtxI, DT);
}
/// Test if A and B will obviously have the same value.
@@ -198,7 +198,7 @@ static bool AreEquivalentAddressValues(const Value *A, const Value *B) {
///
/// This uses the pointee type to determine how many bytes need to be safe to
/// load from the pointer.
-bool llvm::isSafeToLoadUnconditionally(Value *V, unsigned Align,
+bool llvm::isSafeToLoadUnconditionally(Value *V, unsigned Align, APInt &Size,
const DataLayout &DL,
Instruction *ScanFrom,
const DominatorTree *DT) {
@@ -209,7 +209,7 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, unsigned Align,
// If DT is not specified we can't make context-sensitive query
const Instruction* CtxI = DT ? ScanFrom : nullptr;
- if (isDereferenceableAndAlignedPointer(V, Align, DL, CtxI, DT))
+ if (isDereferenceableAndAlignedPointer(V, Align, Size, DL, CtxI, DT))
return true;
int64_t ByteOffset = 0;
@@ -281,9 +281,17 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, unsigned Align,
Value *AccessedPtr;
unsigned AccessedAlign;
if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
+ // Ignore volatile loads. The execution of a volatile load cannot
+ // be used to prove an address is backed by regular memory; it can,
+ // for example, point to an MMIO register.
+ if (LI->isVolatile())
+ continue;
AccessedPtr = LI->getPointerOperand();
AccessedAlign = LI->getAlignment();
} else if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) {
+ // Ignore volatile stores (see comment for loads).
+ if (SI->isVolatile())
+ continue;
AccessedPtr = SI->getPointerOperand();
AccessedAlign = SI->getAlignment();
} else
@@ -306,7 +314,15 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, unsigned Align,
return false;
}
-/// DefMaxInstsToScan - the default number of maximum instructions
+bool llvm::isSafeToLoadUnconditionally(Value *V, Type *Ty, unsigned Align,
+ const DataLayout &DL,
+ Instruction *ScanFrom,
+ const DominatorTree *DT) {
+ APInt Size(DL.getIndexTypeSizeInBits(V->getType()), DL.getTypeStoreSize(Ty));
+ return isSafeToLoadUnconditionally(V, Align, Size, DL, ScanFrom, DT);
+}
+
+ /// DefMaxInstsToScan - the default number of maximum instructions
/// to scan in the block, used by FindAvailableLoadedValue().
/// FindAvailableLoadedValue() was introduced in r60148, to improve jump
/// threading in part by eliminating partially redundant loads.
diff --git a/lib/Analysis/LoopAccessAnalysis.cpp b/lib/Analysis/LoopAccessAnalysis.cpp
index 7f3480f512ab..36bd9a8b7ea7 100644
--- a/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/lib/Analysis/LoopAccessAnalysis.cpp
@@ -1,9 +1,8 @@
//===- LoopAccessAnalysis.cpp - Loop Access Analysis Implementation --------==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -843,7 +842,7 @@ void AccessAnalysis::processMemAccesses() {
bool SetHasWrite = false;
// Map of pointers to last access encountered.
- typedef DenseMap<Value*, MemAccessInfo> UnderlyingObjToAccessMap;
+ typedef DenseMap<const Value*, MemAccessInfo> UnderlyingObjToAccessMap;
UnderlyingObjToAccessMap ObjToLastAccess;
// Set of access to check after all writes have been processed.
@@ -904,13 +903,13 @@ void AccessAnalysis::processMemAccesses() {
// Create sets of pointers connected by a shared alias set and
// underlying object.
- typedef SmallVector<Value *, 16> ValueVector;
+ typedef SmallVector<const Value *, 16> ValueVector;
ValueVector TempObjects;
GetUnderlyingObjects(Ptr, TempObjects, DL, LI);
LLVM_DEBUG(dbgs()
<< "Underlying objects for pointer " << *Ptr << "\n");
- for (Value *UnderlyingObj : TempObjects) {
+ for (const Value *UnderlyingObj : TempObjects) {
// nullptr never alias, don't join sets for pointer that have "null"
// in their UnderlyingObjects list.
if (isa<ConstantPointerNull>(UnderlyingObj) &&
@@ -1014,7 +1013,7 @@ int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr,
return 0;
}
- // The accesss function must stride over the innermost loop.
+ // The access function must stride over the innermost loop.
if (Lp != AR->getLoop()) {
LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not striding over innermost loop "
<< *Ptr << " SCEV: " << *AR << "\n");
@@ -1086,7 +1085,7 @@ int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr,
if (Assume) {
// We can avoid this case by adding a run-time check.
LLVM_DEBUG(dbgs() << "LAA: Non unit strided pointer which is not either "
- << "inbouds or in address space 0 may wrap:\n"
+ << "inbounds or in address space 0 may wrap:\n"
<< "LAA: Pointer: " << *Ptr << "\n"
<< "LAA: SCEV: " << *AR << "\n"
<< "LAA: Added an overflow assumption\n");
@@ -1145,10 +1144,9 @@ bool llvm::sortPtrAccesses(ArrayRef<Value *> VL, const DataLayout &DL,
std::iota(SortedIndices.begin(), SortedIndices.end(), 0);
// Sort the memory accesses and keep the order of their uses in UseOrder.
- std::stable_sort(SortedIndices.begin(), SortedIndices.end(),
- [&OffValPairs](unsigned Left, unsigned Right) {
- return OffValPairs[Left].first < OffValPairs[Right].first;
- });
+ llvm::stable_sort(SortedIndices, [&](unsigned Left, unsigned Right) {
+ return OffValPairs[Left].first < OffValPairs[Right].first;
+ });
// Check if the order is consecutive already.
if (llvm::all_of(SortedIndices, [&SortedIndices](const unsigned I) {
@@ -1346,7 +1344,7 @@ static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE,
// where Step is the absolute stride of the memory accesses in bytes,
// then there is no dependence.
//
- // Ratioanle:
+ // Rationale:
// We basically want to check if the absolute distance (|Dist/Step|)
// is >= the loop iteration count (or > BackedgeTakenCount).
// This is equivalent to the Strong SIV Test (Practical Dependence Testing,
@@ -1369,7 +1367,7 @@ static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE,
// The dependence distance can be positive/negative, so we sign extend Dist;
// The multiplication of the absolute stride in bytes and the
- // backdgeTakenCount is non-negative, so we zero extend Product.
+ // backedgeTakenCount is non-negative, so we zero extend Product.
if (DistTypeSize > ProductTypeSize)
CastedProduct = SE.getZeroExtendExpr(Product, Dist.getType());
else
@@ -1780,6 +1778,11 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,
unsigned NumReads = 0;
unsigned NumReadWrites = 0;
+ bool HasComplexMemInst = false;
+
+ // A runtime check is only legal to insert if there are no convergent calls.
+ HasConvergentOp = false;
+
PtrRtChecking->Pointers.clear();
PtrRtChecking->Need = false;
@@ -1787,8 +1790,25 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,
// For each block.
for (BasicBlock *BB : TheLoop->blocks()) {
- // Scan the BB and collect legal loads and stores.
+ // Scan the BB and collect legal loads and stores. Also detect any
+ // convergent instructions.
for (Instruction &I : *BB) {
+ if (auto *Call = dyn_cast<CallBase>(&I)) {
+ if (Call->isConvergent())
+ HasConvergentOp = true;
+ }
+
+ // With both a non-vectorizable memory instruction and a convergent
+ // operation, found in this loop, no reason to continue the search.
+ if (HasComplexMemInst && HasConvergentOp) {
+ CanVecMem = false;
+ return;
+ }
+
+ // Avoid hitting recordAnalysis multiple times.
+ if (HasComplexMemInst)
+ continue;
+
// If this is a load, save it. If this instruction can read from memory
// but is not a load, then we quit. Notice that we don't handle function
// calls that read or write.
@@ -1807,12 +1827,18 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,
continue;
auto *Ld = dyn_cast<LoadInst>(&I);
- if (!Ld || (!Ld->isSimple() && !IsAnnotatedParallel)) {
+ if (!Ld) {
+ recordAnalysis("CantVectorizeInstruction", Ld)
+ << "instruction cannot be vectorized";
+ HasComplexMemInst = true;
+ continue;
+ }
+ if (!Ld->isSimple() && !IsAnnotatedParallel) {
recordAnalysis("NonSimpleLoad", Ld)
<< "read with atomic ordering or volatile read";
LLVM_DEBUG(dbgs() << "LAA: Found a non-simple load.\n");
- CanVecMem = false;
- return;
+ HasComplexMemInst = true;
+ continue;
}
NumLoads++;
Loads.push_back(Ld);
@@ -1828,15 +1854,15 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,
if (!St) {
recordAnalysis("CantVectorizeInstruction", St)
<< "instruction cannot be vectorized";
- CanVecMem = false;
- return;
+ HasComplexMemInst = true;
+ continue;
}
if (!St->isSimple() && !IsAnnotatedParallel) {
recordAnalysis("NonSimpleStore", St)
<< "write with atomic ordering or volatile write";
LLVM_DEBUG(dbgs() << "LAA: Found a non-simple store.\n");
- CanVecMem = false;
- return;
+ HasComplexMemInst = true;
+ continue;
}
NumStores++;
Stores.push_back(St);
@@ -1847,6 +1873,11 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,
} // Next instr.
} // Next block.
+ if (HasComplexMemInst) {
+ CanVecMem = false;
+ return;
+ }
+
// Now we have two lists that hold the loads and the stores.
// Next, we find the pointers that they use.
@@ -1964,7 +1995,7 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,
}
LLVM_DEBUG(
- dbgs() << "LAA: We can perform a memory runtime check if needed.\n");
+ dbgs() << "LAA: May be able to perform a memory runtime check if needed.\n");
CanVecMem = true;
if (Accesses.isDependencyCheckNeeded()) {
@@ -1999,6 +2030,15 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,
}
}
+ if (HasConvergentOp) {
+ recordAnalysis("CantInsertRuntimeCheckWithConvergent")
+ << "cannot add control dependency to convergent operation";
+ LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because a runtime check "
+ "would be needed with a convergent operation\n");
+ CanVecMem = false;
+ return;
+ }
+
if (CanVecMem)
LLVM_DEBUG(
dbgs() << "LAA: No unsafe dependent memory operations in loop. We"
@@ -2252,7 +2292,7 @@ void LoopAccessInfo::collectStridedAccess(Value *MemAccess) {
// Match the types so we can compare the stride and the BETakenCount.
// The Stride can be positive/negative, so we sign extend Stride;
- // The backdgeTakenCount is non-negative, so we zero extend BETakenCount.
+ // The backedgeTakenCount is non-negative, so we zero extend BETakenCount.
const DataLayout &DL = TheLoop->getHeader()->getModule()->getDataLayout();
uint64_t StrideTypeSize = DL.getTypeAllocSize(StrideExpr->getType());
uint64_t BETypeSize = DL.getTypeAllocSize(BETakenCount->getType());
@@ -2287,6 +2327,7 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
PtrRtChecking(llvm::make_unique<RuntimePointerChecking>(SE)),
DepChecker(llvm::make_unique<MemoryDepChecker>(*PSE, L)), TheLoop(L),
NumLoads(0), NumStores(0), MaxSafeDepDistBytes(-1), CanVecMem(false),
+ HasConvergentOp(false),
HasDependenceInvolvingLoopInvariantAddress(false) {
if (canAnalyzeLoop())
analyzeLoop(AA, LI, TLI, DT);
@@ -2303,6 +2344,9 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
OS << "\n";
}
+ if (HasConvergentOp)
+ OS.indent(Depth) << "Has convergent operation in loop\n";
+
if (Report)
OS.indent(Depth) << "Report: " << Report->getMsg() << "\n";
diff --git a/lib/Analysis/LoopAnalysisManager.cpp b/lib/Analysis/LoopAnalysisManager.cpp
index 2a3b29d7fbca..a10a87ce113b 100644
--- a/lib/Analysis/LoopAnalysisManager.cpp
+++ b/lib/Analysis/LoopAnalysisManager.cpp
@@ -1,9 +1,8 @@
//===- LoopAnalysisManager.cpp - Loop analysis management -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -19,11 +18,6 @@
using namespace llvm;
namespace llvm {
-/// Enables memory ssa as a dependency for loop passes in legacy pass manager.
-cl::opt<bool> EnableMSSALoopDependency(
- "enable-mssa-loop-dependency", cl::Hidden, cl::init(false),
- cl::desc("Enable MemorySSA dependency for loop pass manager"));
-
// Explicit template instantiations and specialization definitions for core
// template typedefs.
template class AllAnalysesOn<Loop>;
@@ -147,8 +141,6 @@ PreservedAnalyses llvm::getLoopPassPreservedAnalyses() {
PA.preserve<LoopAnalysis>();
PA.preserve<LoopAnalysisManagerFunctionProxy>();
PA.preserve<ScalarEvolutionAnalysis>();
- if (EnableMSSALoopDependency)
- PA.preserve<MemorySSAAnalysis>();
// FIXME: What we really want to do here is preserve an AA category, but that
// concept doesn't exist yet.
PA.preserve<AAManager>();
diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp
index ef2b1257015c..aa5da0859805 100644
--- a/lib/Analysis/LoopInfo.cpp
+++ b/lib/Analysis/LoopInfo.cpp
@@ -1,9 +1,8 @@
//===- LoopInfo.cpp - Natural Loop Calculator -----------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -18,8 +17,12 @@
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/IVDescriptors.h"
#include "llvm/Analysis/LoopInfoImpl.h"
#include "llvm/Analysis/LoopIterator.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/CFG.h"
@@ -65,15 +68,16 @@ bool Loop::hasLoopInvariantOperands(const Instruction *I) const {
return all_of(I->operands(), [this](Value *V) { return isLoopInvariant(V); });
}
-bool Loop::makeLoopInvariant(Value *V, bool &Changed,
- Instruction *InsertPt) const {
+bool Loop::makeLoopInvariant(Value *V, bool &Changed, Instruction *InsertPt,
+ MemorySSAUpdater *MSSAU) const {
if (Instruction *I = dyn_cast<Instruction>(V))
- return makeLoopInvariant(I, Changed, InsertPt);
+ return makeLoopInvariant(I, Changed, InsertPt, MSSAU);
return true; // All non-instructions are loop-invariant.
}
bool Loop::makeLoopInvariant(Instruction *I, bool &Changed,
- Instruction *InsertPt) const {
+ Instruction *InsertPt,
+ MemorySSAUpdater *MSSAU) const {
// Test if the value is already loop-invariant.
if (isLoopInvariant(I))
return true;
@@ -94,11 +98,14 @@ bool Loop::makeLoopInvariant(Instruction *I, bool &Changed,
}
// Don't hoist instructions with loop-variant operands.
for (Value *Operand : I->operands())
- if (!makeLoopInvariant(Operand, Changed, InsertPt))
+ if (!makeLoopInvariant(Operand, Changed, InsertPt, MSSAU))
return false;
// Hoist.
I->moveBefore(InsertPt);
+ if (MSSAU)
+ if (auto *MUD = MSSAU->getMemorySSA()->getMemoryAccess(I))
+ MSSAU->moveToPlace(MUD, InsertPt->getParent(), MemorySSA::End);
// There is possibility of hoisting this instruction above some arbitrary
// condition. Any metadata defined on it can be control dependent on this
@@ -110,24 +117,37 @@ bool Loop::makeLoopInvariant(Instruction *I, bool &Changed,
return true;
}
-PHINode *Loop::getCanonicalInductionVariable() const {
+bool Loop::getIncomingAndBackEdge(BasicBlock *&Incoming,
+ BasicBlock *&Backedge) const {
BasicBlock *H = getHeader();
- BasicBlock *Incoming = nullptr, *Backedge = nullptr;
+ Incoming = nullptr;
+ Backedge = nullptr;
pred_iterator PI = pred_begin(H);
assert(PI != pred_end(H) && "Loop must have at least one backedge!");
Backedge = *PI++;
if (PI == pred_end(H))
- return nullptr; // dead loop
+ return false; // dead loop
Incoming = *PI++;
if (PI != pred_end(H))
- return nullptr; // multiple backedges?
+ return false; // multiple backedges?
if (contains(Incoming)) {
if (contains(Backedge))
- return nullptr;
+ return false;
std::swap(Incoming, Backedge);
} else if (!contains(Backedge))
+ return false;
+
+ assert(Incoming && Backedge && "expected non-null incoming and backedges");
+ return true;
+}
+
+PHINode *Loop::getCanonicalInductionVariable() const {
+ BasicBlock *H = getHeader();
+
+ BasicBlock *Incoming = nullptr, *Backedge = nullptr;
+ if (!getIncomingAndBackEdge(Incoming, Backedge))
return nullptr;
// Loop over all of the PHI nodes, looking for a canonical indvar.
@@ -146,6 +166,218 @@ PHINode *Loop::getCanonicalInductionVariable() const {
return nullptr;
}
+/// Get the latch condition instruction.
+static ICmpInst *getLatchCmpInst(const Loop &L) {
+ if (BasicBlock *Latch = L.getLoopLatch())
+ if (BranchInst *BI = dyn_cast_or_null<BranchInst>(Latch->getTerminator()))
+ if (BI->isConditional())
+ return dyn_cast<ICmpInst>(BI->getCondition());
+
+ return nullptr;
+}
+
+/// Return the final value of the loop induction variable if found.
+static Value *findFinalIVValue(const Loop &L, const PHINode &IndVar,
+ const Instruction &StepInst) {
+ ICmpInst *LatchCmpInst = getLatchCmpInst(L);
+ if (!LatchCmpInst)
+ return nullptr;
+
+ Value *Op0 = LatchCmpInst->getOperand(0);
+ Value *Op1 = LatchCmpInst->getOperand(1);
+ if (Op0 == &IndVar || Op0 == &StepInst)
+ return Op1;
+
+ if (Op1 == &IndVar || Op1 == &StepInst)
+ return Op0;
+
+ return nullptr;
+}
+
+Optional<Loop::LoopBounds> Loop::LoopBounds::getBounds(const Loop &L,
+ PHINode &IndVar,
+ ScalarEvolution &SE) {
+ InductionDescriptor IndDesc;
+ if (!InductionDescriptor::isInductionPHI(&IndVar, &L, &SE, IndDesc))
+ return None;
+
+ Value *InitialIVValue = IndDesc.getStartValue();
+ Instruction *StepInst = IndDesc.getInductionBinOp();
+ if (!InitialIVValue || !StepInst)
+ return None;
+
+ const SCEV *Step = IndDesc.getStep();
+ Value *StepInstOp1 = StepInst->getOperand(1);
+ Value *StepInstOp0 = StepInst->getOperand(0);
+ Value *StepValue = nullptr;
+ if (SE.getSCEV(StepInstOp1) == Step)
+ StepValue = StepInstOp1;
+ else if (SE.getSCEV(StepInstOp0) == Step)
+ StepValue = StepInstOp0;
+
+ Value *FinalIVValue = findFinalIVValue(L, IndVar, *StepInst);
+ if (!FinalIVValue)
+ return None;
+
+ return LoopBounds(L, *InitialIVValue, *StepInst, StepValue, *FinalIVValue,
+ SE);
+}
+
+using Direction = Loop::LoopBounds::Direction;
+
+ICmpInst::Predicate Loop::LoopBounds::getCanonicalPredicate() const {
+ BasicBlock *Latch = L.getLoopLatch();
+ assert(Latch && "Expecting valid latch");
+
+ BranchInst *BI = dyn_cast_or_null<BranchInst>(Latch->getTerminator());
+ assert(BI && BI->isConditional() && "Expecting conditional latch branch");
+
+ ICmpInst *LatchCmpInst = dyn_cast<ICmpInst>(BI->getCondition());
+ assert(LatchCmpInst &&
+ "Expecting the latch compare instruction to be a CmpInst");
+
+ // Need to inverse the predicate when first successor is not the loop
+ // header
+ ICmpInst::Predicate Pred = (BI->getSuccessor(0) == L.getHeader())
+ ? LatchCmpInst->getPredicate()
+ : LatchCmpInst->getInversePredicate();
+
+ if (LatchCmpInst->getOperand(0) == &getFinalIVValue())
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+
+ // Need to flip strictness of the predicate when the latch compare instruction
+ // is not using StepInst
+ if (LatchCmpInst->getOperand(0) == &getStepInst() ||
+ LatchCmpInst->getOperand(1) == &getStepInst())
+ return Pred;
+
+ // Cannot flip strictness of NE and EQ
+ if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
+ return ICmpInst::getFlippedStrictnessPredicate(Pred);
+
+ Direction D = getDirection();
+ if (D == Direction::Increasing)
+ return ICmpInst::ICMP_SLT;
+
+ if (D == Direction::Decreasing)
+ return ICmpInst::ICMP_SGT;
+
+ // If cannot determine the direction, then unable to find the canonical
+ // predicate
+ return ICmpInst::BAD_ICMP_PREDICATE;
+}
+
+Direction Loop::LoopBounds::getDirection() const {
+ if (const SCEVAddRecExpr *StepAddRecExpr =
+ dyn_cast<SCEVAddRecExpr>(SE.getSCEV(&getStepInst())))
+ if (const SCEV *StepRecur = StepAddRecExpr->getStepRecurrence(SE)) {
+ if (SE.isKnownPositive(StepRecur))
+ return Direction::Increasing;
+ if (SE.isKnownNegative(StepRecur))
+ return Direction::Decreasing;
+ }
+
+ return Direction::Unknown;
+}
+
+Optional<Loop::LoopBounds> Loop::getBounds(ScalarEvolution &SE) const {
+ if (PHINode *IndVar = getInductionVariable(SE))
+ return LoopBounds::getBounds(*this, *IndVar, SE);
+
+ return None;
+}
+
+PHINode *Loop::getInductionVariable(ScalarEvolution &SE) const {
+ if (!isLoopSimplifyForm())
+ return nullptr;
+
+ BasicBlock *Header = getHeader();
+ assert(Header && "Expected a valid loop header");
+ ICmpInst *CmpInst = getLatchCmpInst(*this);
+ if (!CmpInst)
+ return nullptr;
+
+ Instruction *LatchCmpOp0 = dyn_cast<Instruction>(CmpInst->getOperand(0));
+ Instruction *LatchCmpOp1 = dyn_cast<Instruction>(CmpInst->getOperand(1));
+
+ for (PHINode &IndVar : Header->phis()) {
+ InductionDescriptor IndDesc;
+ if (!InductionDescriptor::isInductionPHI(&IndVar, this, &SE, IndDesc))
+ continue;
+
+ Instruction *StepInst = IndDesc.getInductionBinOp();
+
+ // case 1:
+ // IndVar = phi[{InitialValue, preheader}, {StepInst, latch}]
+ // StepInst = IndVar + step
+ // cmp = StepInst < FinalValue
+ if (StepInst == LatchCmpOp0 || StepInst == LatchCmpOp1)
+ return &IndVar;
+
+ // case 2:
+ // IndVar = phi[{InitialValue, preheader}, {StepInst, latch}]
+ // StepInst = IndVar + step
+ // cmp = IndVar < FinalValue
+ if (&IndVar == LatchCmpOp0 || &IndVar == LatchCmpOp1)
+ return &IndVar;
+ }
+
+ return nullptr;
+}
+
+bool Loop::getInductionDescriptor(ScalarEvolution &SE,
+ InductionDescriptor &IndDesc) const {
+ if (PHINode *IndVar = getInductionVariable(SE))
+ return InductionDescriptor::isInductionPHI(IndVar, this, &SE, IndDesc);
+
+ return false;
+}
+
+bool Loop::isAuxiliaryInductionVariable(PHINode &AuxIndVar,
+ ScalarEvolution &SE) const {
+ // Located in the loop header
+ BasicBlock *Header = getHeader();
+ if (AuxIndVar.getParent() != Header)
+ return false;
+
+ // No uses outside of the loop
+ for (User *U : AuxIndVar.users())
+ if (const Instruction *I = dyn_cast<Instruction>(U))
+ if (!contains(I))
+ return false;
+
+ InductionDescriptor IndDesc;
+ if (!InductionDescriptor::isInductionPHI(&AuxIndVar, this, &SE, IndDesc))
+ return false;
+
+ // The step instruction opcode should be add or sub.
+ if (IndDesc.getInductionOpcode() != Instruction::Add &&
+ IndDesc.getInductionOpcode() != Instruction::Sub)
+ return false;
+
+ // Incremented by a loop invariant step for each loop iteration
+ return SE.isLoopInvariant(IndDesc.getStep(), this);
+}
+
+bool Loop::isCanonical(ScalarEvolution &SE) const {
+ InductionDescriptor IndDesc;
+ if (!getInductionDescriptor(SE, IndDesc))
+ return false;
+
+ ConstantInt *Init = dyn_cast_or_null<ConstantInt>(IndDesc.getStartValue());
+ if (!Init || !Init->isZero())
+ return false;
+
+ if (IndDesc.getInductionOpcode() != Instruction::Add)
+ return false;
+
+ ConstantInt *Step = IndDesc.getConstIntStepValue();
+ if (!Step || !Step->isOne())
+ return false;
+
+ return true;
+}
+
// Check that 'BB' doesn't have any uses outside of the 'L'
static bool isBlockInLCSSAForm(const Loop &L, const BasicBlock &BB,
DominatorTree &DT) {
@@ -200,8 +432,11 @@ bool Loop::isLoopSimplifyForm() const {
bool Loop::isSafeToClone() const {
// Return false if any loop blocks contain indirectbrs, or there are any calls
// to noduplicate functions.
+ // FIXME: it should be ok to clone CallBrInst's if we correctly update the
+ // operand list to reflect the newly cloned labels.
for (BasicBlock *BB : this->blocks()) {
- if (isa<IndirectBrInst>(BB->getTerminator()))
+ if (isa<IndirectBrInst>(BB->getTerminator()) ||
+ isa<CallBrInst>(BB->getTerminator()))
return false;
for (Instruction &I : *BB)
@@ -242,48 +477,20 @@ void Loop::setLoopID(MDNode *LoopID) const {
assert((!LoopID || LoopID->getOperand(0) == LoopID) &&
"Loop ID should refer to itself");
- BasicBlock *H = getHeader();
- for (BasicBlock *BB : this->blocks()) {
- Instruction *TI = BB->getTerminator();
- for (BasicBlock *Successor : successors(TI)) {
- if (Successor == H) {
- TI->setMetadata(LLVMContext::MD_loop, LoopID);
- break;
- }
- }
- }
+ SmallVector<BasicBlock *, 4> LoopLatches;
+ getLoopLatches(LoopLatches);
+ for (BasicBlock *BB : LoopLatches)
+ BB->getTerminator()->setMetadata(LLVMContext::MD_loop, LoopID);
}
void Loop::setLoopAlreadyUnrolled() {
- MDNode *LoopID = getLoopID();
- // First remove any existing loop unrolling metadata.
- SmallVector<Metadata *, 4> MDs;
- // Reserve first location for self reference to the LoopID metadata node.
- MDs.push_back(nullptr);
-
- if (LoopID) {
- for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
- bool IsUnrollMetadata = false;
- MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
- if (MD) {
- const MDString *S = dyn_cast<MDString>(MD->getOperand(0));
- IsUnrollMetadata = S && S->getString().startswith("llvm.loop.unroll.");
- }
- if (!IsUnrollMetadata)
- MDs.push_back(LoopID->getOperand(i));
- }
- }
-
- // Add unroll(disable) metadata to disable future unrolling.
LLVMContext &Context = getHeader()->getContext();
- SmallVector<Metadata *, 1> DisableOperands;
- DisableOperands.push_back(MDString::get(Context, "llvm.loop.unroll.disable"));
- MDNode *DisableNode = MDNode::get(Context, DisableOperands);
- MDs.push_back(DisableNode);
- MDNode *NewLoopID = MDNode::get(Context, MDs);
- // Set operand 0 to refer to the loop id itself.
- NewLoopID->replaceOperandWith(0, NewLoopID);
+ MDNode *DisableUnrollMD =
+ MDNode::get(Context, MDString::get(Context, "llvm.loop.unroll.disable"));
+ MDNode *LoopID = getLoopID();
+ MDNode *NewLoopID = makePostTransformationMetadata(
+ Context, LoopID, {"llvm.loop.unroll."}, {DisableUnrollMD});
setLoopID(NewLoopID);
}
@@ -761,6 +968,46 @@ bool llvm::isValidAsAccessGroup(MDNode *Node) {
return Node->getNumOperands() == 0 && Node->isDistinct();
}
+MDNode *llvm::makePostTransformationMetadata(LLVMContext &Context,
+ MDNode *OrigLoopID,
+ ArrayRef<StringRef> RemovePrefixes,
+ ArrayRef<MDNode *> AddAttrs) {
+ // First remove any existing loop metadata related to this transformation.
+ SmallVector<Metadata *, 4> MDs;
+
+ // Reserve first location for self reference to the LoopID metadata node.
+ TempMDTuple TempNode = MDNode::getTemporary(Context, None);
+ MDs.push_back(TempNode.get());
+
+ // Remove metadata for the transformation that has been applied or that became
+ // outdated.
+ if (OrigLoopID) {
+ for (unsigned i = 1, ie = OrigLoopID->getNumOperands(); i < ie; ++i) {
+ bool IsVectorMetadata = false;
+ Metadata *Op = OrigLoopID->getOperand(i);
+ if (MDNode *MD = dyn_cast<MDNode>(Op)) {
+ const MDString *S = dyn_cast<MDString>(MD->getOperand(0));
+ if (S)
+ IsVectorMetadata =
+ llvm::any_of(RemovePrefixes, [S](StringRef Prefix) -> bool {
+ return S->getString().startswith(Prefix);
+ });
+ }
+ if (!IsVectorMetadata)
+ MDs.push_back(Op);
+ }
+ }
+
+ // Add metadata to avoid reapplying a transformation, such as
+ // llvm.loop.unroll.disable and llvm.loop.isvectorized.
+ MDs.append(AddAttrs.begin(), AddAttrs.end());
+
+ MDNode *NewLoopID = MDNode::getDistinct(Context, MDs);
+ // Replace the temporary node with a self-reference.
+ NewLoopID->replaceOperandWith(0, NewLoopID);
+ return NewLoopID;
+}
+
//===----------------------------------------------------------------------===//
// LoopInfo implementation
//
@@ -792,7 +1039,7 @@ void LoopInfoWrapperPass::verifyAnalysis() const {
void LoopInfoWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
- AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequiredTransitive<DominatorTreeWrapperPass>();
}
void LoopInfoWrapperPass::print(raw_ostream &OS, const Module *) const {
diff --git a/lib/Analysis/LoopPass.cpp b/lib/Analysis/LoopPass.cpp
index a68f114b83a0..4ab3798039d8 100644
--- a/lib/Analysis/LoopPass.cpp
+++ b/lib/Analysis/LoopPass.cpp
@@ -1,9 +1,8 @@
//===- LoopPass.cpp - Loop Pass and Loop Pass Manager ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -23,6 +22,7 @@
#include "llvm/IR/PassTimingInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Timer.h"
+#include "llvm/Support/TimeProfiler.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -210,6 +210,8 @@ bool LPPassManager::runOnFunction(Function &F) {
for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
LoopPass *P = getContainedPass(Index);
+ llvm::TimeTraceScope LoopPassScope("RunLoopPass", P->getPassName());
+
dumpPassInfo(P, EXECUTION_MSG, ON_LOOP_MSG,
CurrentLoop->getHeader()->getName());
dumpRequiredSet(P);
@@ -384,16 +386,20 @@ void LoopPass::assignPassManager(PMStack &PMS,
LPPM->add(this);
}
+static std::string getDescription(const Loop &L) {
+ return "loop";
+}
+
bool LoopPass::skipLoop(const Loop *L) const {
const Function *F = L->getHeader()->getParent();
if (!F)
return false;
// Check the opt bisect limit.
- LLVMContext &Context = F->getContext();
- if (!Context.getOptPassGate().shouldRunPass(this, *L))
+ OptPassGate &Gate = F->getContext().getOptPassGate();
+ if (Gate.isEnabled() && !Gate.shouldRunPass(this, getDescription(*L)))
return true;
// Check for the OptimizeNone attribute.
- if (F->hasFnAttribute(Attribute::OptimizeNone)) {
+ if (F->hasOptNone()) {
// FIXME: Report this to dbgs() only once per function.
LLVM_DEBUG(dbgs() << "Skipping pass '" << getPassName() << "' in function "
<< F->getName() << "\n");
diff --git a/lib/Analysis/LoopUnrollAnalyzer.cpp b/lib/Analysis/LoopUnrollAnalyzer.cpp
index c8b91a7a1a51..1728b5e9f6d2 100644
--- a/lib/Analysis/LoopUnrollAnalyzer.cpp
+++ b/lib/Analysis/LoopUnrollAnalyzer.cpp
@@ -1,9 +1,8 @@
//===- LoopUnrollAnalyzer.cpp - Unrolling Effect Estimation -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Analysis/MemDepPrinter.cpp b/lib/Analysis/MemDepPrinter.cpp
index 907b321b231a..6e1bb50e8893 100644
--- a/lib/Analysis/MemDepPrinter.cpp
+++ b/lib/Analysis/MemDepPrinter.cpp
@@ -1,9 +1,8 @@
//===- MemDepPrinter.cpp - Printer for MemoryDependenceAnalysis -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Analysis/MemDerefPrinter.cpp b/lib/Analysis/MemDerefPrinter.cpp
index 4a136c5a0c6d..77ebf89d9a08 100644
--- a/lib/Analysis/MemDerefPrinter.cpp
+++ b/lib/Analysis/MemDerefPrinter.cpp
@@ -1,9 +1,8 @@
//===- MemDerefPrinter.cpp - Printer for isDereferenceablePointer ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -54,9 +53,10 @@ bool MemDerefPrinter::runOnFunction(Function &F) {
for (auto &I: instructions(F)) {
if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
Value *PO = LI->getPointerOperand();
- if (isDereferenceablePointer(PO, DL))
+ if (isDereferenceablePointer(PO, LI->getType(), DL))
Deref.push_back(PO);
- if (isDereferenceableAndAlignedPointer(PO, LI->getAlignment(), DL))
+ if (isDereferenceableAndAlignedPointer(PO, LI->getType(),
+ LI->getAlignment(), DL))
DerefAndAligned.insert(PO);
}
}
diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp
index 686ad294378c..729dad463657 100644
--- a/lib/Analysis/MemoryBuiltins.cpp
+++ b/lib/Analysis/MemoryBuiltins.cpp
@@ -1,9 +1,8 @@
//===- MemoryBuiltins.cpp - Identify calls to memory builtins -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -264,6 +263,19 @@ bool llvm::isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
return getAllocationData(V, AllocLike, TLI, LookThroughBitCast).hasValue();
}
+/// Tests if a value is a call or invoke to a library function that
+/// reallocates memory (e.g., realloc).
+bool llvm::isReallocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
+ bool LookThroughBitCast) {
+ return getAllocationData(V, ReallocLike, TLI, LookThroughBitCast).hasValue();
+}
+
+/// Tests if a functions is a call or invoke to a library function that
+/// reallocates memory (e.g., realloc).
+bool llvm::isReallocLikeFn(const Function *F, const TargetLibraryInfo *TLI) {
+ return getAllocationDataForFunction(F, ReallocLike, TLI).hasValue();
+}
+
/// extractMallocCall - Returns the corresponding CallInst if the instruction
/// is a malloc call. Since CallInst::CreateMalloc() only creates calls, we
/// ignore InvokeInst here.
@@ -359,19 +371,8 @@ const CallInst *llvm::extractCallocCall(const Value *I,
return isCallocLikeFn(I, TLI) ? cast<CallInst>(I) : nullptr;
}
-/// isFreeCall - Returns non-null if the value is a call to the builtin free()
-const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) {
- bool IsNoBuiltinCall;
- const Function *Callee =
- getCalledFunction(I, /*LookThroughBitCast=*/false, IsNoBuiltinCall);
- if (Callee == nullptr || IsNoBuiltinCall)
- return nullptr;
-
- StringRef FnName = Callee->getName();
- LibFunc TLIFn;
- if (!TLI || !TLI->getLibFunc(FnName, TLIFn) || !TLI->has(TLIFn))
- return nullptr;
-
+/// isLibFreeFunction - Returns true if the function is a builtin free()
+bool llvm::isLibFreeFunction(const Function *F, const LibFunc TLIFn) {
unsigned ExpectedNumParams;
if (TLIFn == LibFunc_free ||
TLIFn == LibFunc_ZdlPv || // operator delete(void*)
@@ -402,22 +403,39 @@ const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) {
TLIFn == LibFunc_ZdlPvSt11align_val_tRKSt9nothrow_t) // delete[](void*, align_val_t, nothrow)
ExpectedNumParams = 3;
else
- return nullptr;
+ return false;
// Check free prototype.
// FIXME: workaround for PR5130, this will be obsolete when a nobuiltin
// attribute will exist.
- FunctionType *FTy = Callee->getFunctionType();
+ FunctionType *FTy = F->getFunctionType();
if (!FTy->getReturnType()->isVoidTy())
- return nullptr;
+ return false;
if (FTy->getNumParams() != ExpectedNumParams)
+ return false;
+ if (FTy->getParamType(0) != Type::getInt8PtrTy(F->getContext()))
+ return false;
+
+ return true;
+}
+
+/// isFreeCall - Returns non-null if the value is a call to the builtin free()
+const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) {
+ bool IsNoBuiltinCall;
+ const Function *Callee =
+ getCalledFunction(I, /*LookThroughBitCast=*/false, IsNoBuiltinCall);
+ if (Callee == nullptr || IsNoBuiltinCall)
return nullptr;
- if (FTy->getParamType(0) != Type::getInt8PtrTy(Callee->getContext()))
+
+ StringRef FnName = Callee->getName();
+ LibFunc TLIFn;
+ if (!TLI || !TLI->getLibFunc(FnName, TLIFn) || !TLI->has(TLIFn))
return nullptr;
- return dyn_cast<CallInst>(I);
+ return isLibFreeFunction(Callee, TLIFn) ? dyn_cast<CallInst>(I) : nullptr;
}
+
//===----------------------------------------------------------------------===//
// Utility functions to compute size of objects.
//
@@ -442,10 +460,10 @@ bool llvm::getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout &DL,
return true;
}
-ConstantInt *llvm::lowerObjectSizeCall(IntrinsicInst *ObjectSize,
- const DataLayout &DL,
- const TargetLibraryInfo *TLI,
- bool MustSucceed) {
+Value *llvm::lowerObjectSizeCall(IntrinsicInst *ObjectSize,
+ const DataLayout &DL,
+ const TargetLibraryInfo *TLI,
+ bool MustSucceed) {
assert(ObjectSize->getIntrinsicID() == Intrinsic::objectsize &&
"ObjectSize must be a call to llvm.objectsize!");
@@ -462,13 +480,35 @@ ConstantInt *llvm::lowerObjectSizeCall(IntrinsicInst *ObjectSize,
EvalOptions.NullIsUnknownSize =
cast<ConstantInt>(ObjectSize->getArgOperand(2))->isOne();
- // FIXME: Does it make sense to just return a failure value if the size won't
- // fit in the output and `!MustSucceed`?
- uint64_t Size;
auto *ResultType = cast<IntegerType>(ObjectSize->getType());
- if (getObjectSize(ObjectSize->getArgOperand(0), Size, DL, TLI, EvalOptions) &&
- isUIntN(ResultType->getBitWidth(), Size))
- return ConstantInt::get(ResultType, Size);
+ bool StaticOnly = cast<ConstantInt>(ObjectSize->getArgOperand(3))->isZero();
+ if (StaticOnly) {
+ // FIXME: Does it make sense to just return a failure value if the size won't
+ // fit in the output and `!MustSucceed`?
+ uint64_t Size;
+ if (getObjectSize(ObjectSize->getArgOperand(0), Size, DL, TLI, EvalOptions) &&
+ isUIntN(ResultType->getBitWidth(), Size))
+ return ConstantInt::get(ResultType, Size);
+ } else {
+ LLVMContext &Ctx = ObjectSize->getFunction()->getContext();
+ ObjectSizeOffsetEvaluator Eval(DL, TLI, Ctx, EvalOptions);
+ SizeOffsetEvalType SizeOffsetPair =
+ Eval.compute(ObjectSize->getArgOperand(0));
+
+ if (SizeOffsetPair != ObjectSizeOffsetEvaluator::unknown()) {
+ IRBuilder<TargetFolder> Builder(Ctx, TargetFolder(DL));
+ Builder.SetInsertPoint(ObjectSize);
+
+ // If we've outside the end of the object, then we can always access
+ // exactly 0 bytes.
+ Value *ResultSize =
+ Builder.CreateSub(SizeOffsetPair.first, SizeOffsetPair.second);
+ Value *UseZero =
+ Builder.CreateICmpULT(SizeOffsetPair.first, SizeOffsetPair.second);
+ return Builder.CreateSelect(UseZero, ConstantInt::get(ResultType, 0),
+ ResultSize);
+ }
+ }
if (!MustSucceed)
return nullptr;
@@ -684,7 +724,7 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitGlobalVariable(GlobalVariable &GV){
if (!GV.hasDefinitiveInitializer())
return unknown();
- APInt Size(IntTyBits, DL.getTypeAllocSize(GV.getType()->getElementType()));
+ APInt Size(IntTyBits, DL.getTypeAllocSize(GV.getValueType()));
return std::make_pair(align(Size, GV.getAlignment()), Zero);
}
@@ -743,9 +783,12 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitInstruction(Instruction &I) {
ObjectSizeOffsetEvaluator::ObjectSizeOffsetEvaluator(
const DataLayout &DL, const TargetLibraryInfo *TLI, LLVMContext &Context,
- bool RoundToAlign)
- : DL(DL), TLI(TLI), Context(Context), Builder(Context, TargetFolder(DL)),
- RoundToAlign(RoundToAlign) {
+ ObjectSizeOpts EvalOpts)
+ : DL(DL), TLI(TLI), Context(Context),
+ Builder(Context, TargetFolder(DL),
+ IRBuilderCallbackInserter(
+ [&](Instruction *I) { InsertedInstructions.insert(I); })),
+ EvalOpts(EvalOpts) {
// IntTy and Zero must be set for each compute() since the address space may
// be different for later objects.
}
@@ -767,17 +810,21 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute(Value *V) {
if (CacheIt != CacheMap.end() && anyKnown(CacheIt->second))
CacheMap.erase(CacheIt);
}
+
+ // Erase any instructions we inserted as part of the traversal.
+ for (Instruction *I : InsertedInstructions) {
+ I->replaceAllUsesWith(UndefValue::get(I->getType()));
+ I->eraseFromParent();
+ }
}
SeenVals.clear();
+ InsertedInstructions.clear();
return Result;
}
SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute_(Value *V) {
- ObjectSizeOpts ObjSizeOptions;
- ObjSizeOptions.RoundToAlign = RoundToAlign;
-
- ObjectSizeOffsetVisitor Visitor(DL, TLI, Context, ObjSizeOptions);
+ ObjectSizeOffsetVisitor Visitor(DL, TLI, Context, EvalOpts);
SizeOffsetType Const = Visitor.compute(V);
if (Visitor.bothKnown(Const))
return std::make_pair(ConstantInt::get(Context, Const.first),
@@ -916,24 +963,28 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitPHINode(PHINode &PHI) {
if (!bothKnown(EdgeData)) {
OffsetPHI->replaceAllUsesWith(UndefValue::get(IntTy));
OffsetPHI->eraseFromParent();
+ InsertedInstructions.erase(OffsetPHI);
SizePHI->replaceAllUsesWith(UndefValue::get(IntTy));
SizePHI->eraseFromParent();
+ InsertedInstructions.erase(SizePHI);
return unknown();
}
SizePHI->addIncoming(EdgeData.first, PHI.getIncomingBlock(i));
OffsetPHI->addIncoming(EdgeData.second, PHI.getIncomingBlock(i));
}
- Value *Size = SizePHI, *Offset = OffsetPHI, *Tmp;
- if ((Tmp = SizePHI->hasConstantValue())) {
+ Value *Size = SizePHI, *Offset = OffsetPHI;
+ if (Value *Tmp = SizePHI->hasConstantValue()) {
Size = Tmp;
SizePHI->replaceAllUsesWith(Size);
SizePHI->eraseFromParent();
+ InsertedInstructions.erase(SizePHI);
}
- if ((Tmp = OffsetPHI->hasConstantValue())) {
+ if (Value *Tmp = OffsetPHI->hasConstantValue()) {
Offset = Tmp;
OffsetPHI->replaceAllUsesWith(Offset);
OffsetPHI->eraseFromParent();
+ InsertedInstructions.erase(OffsetPHI);
}
return std::make_pair(Size, Offset);
}
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp
index e22182b99e11..b25b655165d7 100644
--- a/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -1,9 +1,8 @@
//===- MemoryDependenceAnalysis.cpp - Mem Deps Implementation -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -327,7 +326,8 @@ static bool isVolatile(Instruction *Inst) {
MemDepResult MemoryDependenceResults::getPointerDependencyFrom(
const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
- BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) {
+ BasicBlock *BB, Instruction *QueryInst, unsigned *Limit,
+ OrderedBasicBlock *OBB) {
MemDepResult InvariantGroupDependency = MemDepResult::getUnknown();
if (QueryInst != nullptr) {
if (auto *LI = dyn_cast<LoadInst>(QueryInst)) {
@@ -338,7 +338,7 @@ MemDepResult MemoryDependenceResults::getPointerDependencyFrom(
}
}
MemDepResult SimpleDep = getSimplePointerDependencyFrom(
- MemLoc, isLoad, ScanIt, BB, QueryInst, Limit);
+ MemLoc, isLoad, ScanIt, BB, QueryInst, Limit, OBB);
if (SimpleDep.isDef())
return SimpleDep;
// Non-local invariant group dependency indicates there is non local Def
@@ -439,14 +439,13 @@ MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI,
MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
- BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) {
+ BasicBlock *BB, Instruction *QueryInst, unsigned *Limit,
+ OrderedBasicBlock *OBB) {
bool isInvariantLoad = false;
- if (!Limit) {
- unsigned DefaultLimit = BlockScanLimit;
- return getSimplePointerDependencyFrom(MemLoc, isLoad, ScanIt, BB, QueryInst,
- &DefaultLimit);
- }
+ unsigned DefaultLimit = BlockScanLimit;
+ if (!Limit)
+ Limit = &DefaultLimit;
// We must be careful with atomic accesses, as they may allow another thread
// to touch this location, clobbering it. We are conservative: if the
@@ -488,11 +487,14 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
const DataLayout &DL = BB->getModule()->getDataLayout();
- // Create a numbered basic block to lazily compute and cache instruction
+ // If the caller did not provide an ordered basic block,
+ // create one to lazily compute and cache instruction
// positions inside a BB. This is used to provide fast queries for relative
// position between two instructions in a BB and can be used by
// AliasAnalysis::callCapturesBefore.
- OrderedBasicBlock OBB(BB);
+ OrderedBasicBlock OBBTmp(BB);
+ if (!OBB)
+ OBB = &OBBTmp;
// Return "true" if and only if the instruction I is either a non-simple
// load or a non-simple store.
@@ -673,7 +675,7 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
// A release fence requires that all stores complete before it, but does
// not prevent the reordering of following loads or stores 'before' the
// fence. As a result, we look past it when finding a dependency for
- // loads. DSE uses this to find preceeding stores to delete and thus we
+ // loads. DSE uses this to find preceding stores to delete and thus we
// can't bypass the fence if the query instruction is a store.
if (FenceInst *FI = dyn_cast<FenceInst>(Inst))
if (isLoad && FI->getOrdering() == AtomicOrdering::Release)
@@ -683,7 +685,7 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
ModRefInfo MR = AA.getModRefInfo(Inst, MemLoc);
// If necessary, perform additional analysis.
if (isModAndRefSet(MR))
- MR = AA.callCapturesBefore(Inst, MemLoc, &DT, &OBB);
+ MR = AA.callCapturesBefore(Inst, MemLoc, &DT, OBB);
switch (clearMust(MR)) {
case ModRefInfo::NoModRef:
// If the call has no effect on the queried pointer, just ignore it.
@@ -709,7 +711,8 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
return MemDepResult::getNonFuncLocal();
}
-MemDepResult MemoryDependenceResults::getDependency(Instruction *QueryInst) {
+MemDepResult MemoryDependenceResults::getDependency(Instruction *QueryInst,
+ OrderedBasicBlock *OBB) {
Instruction *ScanPos = QueryInst;
// Check for a cached result
@@ -747,8 +750,9 @@ MemDepResult MemoryDependenceResults::getDependency(Instruction *QueryInst) {
if (auto *II = dyn_cast<IntrinsicInst>(QueryInst))
isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_start;
- LocalCache = getPointerDependencyFrom(
- MemLoc, isLoad, ScanPos->getIterator(), QueryParent, QueryInst);
+ LocalCache =
+ getPointerDependencyFrom(MemLoc, isLoad, ScanPos->getIterator(),
+ QueryParent, QueryInst, nullptr, OBB);
} else if (auto *QueryCall = dyn_cast<CallBase>(QueryInst)) {
bool isReadOnly = AA.onlyReadsMemory(QueryCall);
LocalCache = getCallDependencyFrom(QueryCall, isReadOnly,
diff --git a/lib/Analysis/MemoryLocation.cpp b/lib/Analysis/MemoryLocation.cpp
index 27e8d72b8e89..163830eee797 100644
--- a/lib/Analysis/MemoryLocation.cpp
+++ b/lib/Analysis/MemoryLocation.cpp
@@ -1,9 +1,8 @@
//===- MemoryLocation.cpp - Memory location descriptions -------------------==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Analysis/MemorySSA.cpp b/lib/Analysis/MemorySSA.cpp
index 6a5567ed765b..17f5d9b9f0ad 100644
--- a/lib/Analysis/MemorySSA.cpp
+++ b/lib/Analysis/MemorySSA.cpp
@@ -1,9 +1,8 @@
//===- MemorySSA.cpp - Memory SSA Builder ---------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -82,6 +81,11 @@ bool llvm::VerifyMemorySSA = true;
#else
bool llvm::VerifyMemorySSA = false;
#endif
+/// Enables memory ssa as a dependency for loop passes in legacy pass manager.
+cl::opt<bool> llvm::EnableMSSALoopDependency(
+ "enable-mssa-loop-dependency", cl::Hidden, cl::init(false),
+ cl::desc("Enable MemorySSA dependency for loop pass manager"));
+
static cl::opt<bool, true>
VerifyMemorySSAX("verify-memoryssa", cl::location(VerifyMemorySSA),
cl::Hidden, cl::desc("Enable verification of MemorySSA."));
@@ -252,10 +256,10 @@ struct ClobberAlias {
// Return a pair of {IsClobber (bool), AR (AliasResult)}. It relies on AR being
// ignored if IsClobber = false.
-static ClobberAlias instructionClobbersQuery(const MemoryDef *MD,
- const MemoryLocation &UseLoc,
- const Instruction *UseInst,
- AliasAnalysis &AA) {
+template <typename AliasAnalysisType>
+static ClobberAlias
+instructionClobbersQuery(const MemoryDef *MD, const MemoryLocation &UseLoc,
+ const Instruction *UseInst, AliasAnalysisType &AA) {
Instruction *DefInst = MD->getMemoryInst();
assert(DefInst && "Defining instruction not actually an instruction");
const auto *UseCall = dyn_cast<CallBase>(UseInst);
@@ -300,10 +304,11 @@ static ClobberAlias instructionClobbersQuery(const MemoryDef *MD,
return {isModSet(I), AR};
}
+template <typename AliasAnalysisType>
static ClobberAlias instructionClobbersQuery(MemoryDef *MD,
const MemoryUseOrDef *MU,
const MemoryLocOrCall &UseMLOC,
- AliasAnalysis &AA) {
+ AliasAnalysisType &AA) {
// FIXME: This is a temporary hack to allow a single instructionClobbersQuery
// to exist while MemoryLocOrCall is pushed through places.
if (UseMLOC.IsCall)
@@ -346,12 +351,12 @@ struct UpwardsMemoryQuery {
} // end anonymous namespace
static bool lifetimeEndsAt(MemoryDef *MD, const MemoryLocation &Loc,
- AliasAnalysis &AA) {
+ BatchAAResults &AA) {
Instruction *Inst = MD->getMemoryInst();
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
switch (II->getIntrinsicID()) {
case Intrinsic::lifetime_end:
- return AA.isMustAlias(MemoryLocation(II->getArgOperand(1)), Loc);
+ return AA.alias(MemoryLocation(II->getArgOperand(1)), Loc) == MustAlias;
default:
return false;
}
@@ -359,13 +364,14 @@ static bool lifetimeEndsAt(MemoryDef *MD, const MemoryLocation &Loc,
return false;
}
-static bool isUseTriviallyOptimizableToLiveOnEntry(AliasAnalysis &AA,
+template <typename AliasAnalysisType>
+static bool isUseTriviallyOptimizableToLiveOnEntry(AliasAnalysisType &AA,
const Instruction *I) {
// If the memory can't be changed, then loads of the memory can't be
// clobbered.
return isa<LoadInst>(I) && (I->getMetadata(LLVMContext::MD_invariant_load) ||
- AA.pointsToConstantMemory(cast<LoadInst>(I)->
- getPointerOperand()));
+ AA.pointsToConstantMemory(MemoryLocation(
+ cast<LoadInst>(I)->getPointerOperand())));
}
/// Verifies that `Start` is clobbered by `ClobberAt`, and that nothing
@@ -381,10 +387,12 @@ static bool isUseTriviallyOptimizableToLiveOnEntry(AliasAnalysis &AA,
/// \param Query The UpwardsMemoryQuery we used for our search.
/// \param AA The AliasAnalysis we used for our search.
/// \param AllowImpreciseClobber Always false, unless we do relaxed verify.
-static void
+
+template <typename AliasAnalysisType>
+LLVM_ATTRIBUTE_UNUSED static void
checkClobberSanity(const MemoryAccess *Start, MemoryAccess *ClobberAt,
const MemoryLocation &StartLoc, const MemorySSA &MSSA,
- const UpwardsMemoryQuery &Query, AliasAnalysis &AA,
+ const UpwardsMemoryQuery &Query, AliasAnalysisType &AA,
bool AllowImpreciseClobber = false) {
assert(MSSA.dominates(ClobberAt, Start) && "Clobber doesn't dominate start?");
@@ -474,7 +482,7 @@ namespace {
/// Our algorithm for walking (and trying to optimize) clobbers, all wrapped up
/// in one class.
-class ClobberWalker {
+template <class AliasAnalysisType> class ClobberWalker {
/// Save a few bytes by using unsigned instead of size_t.
using ListIndex = unsigned;
@@ -498,9 +506,10 @@ class ClobberWalker {
};
const MemorySSA &MSSA;
- AliasAnalysis &AA;
+ AliasAnalysisType &AA;
DominatorTree &DT;
UpwardsMemoryQuery *Query;
+ unsigned *UpwardWalkLimit;
// Phi optimization bookkeeping
SmallVector<DefPath, 32> Paths;
@@ -539,6 +548,16 @@ class ClobberWalker {
walkToPhiOrClobber(DefPath &Desc, const MemoryAccess *StopAt = nullptr,
const MemoryAccess *SkipStopAt = nullptr) const {
assert(!isa<MemoryUse>(Desc.Last) && "Uses don't exist in my world");
+ assert(UpwardWalkLimit && "Need a valid walk limit");
+ bool LimitAlreadyReached = false;
+ // (*UpwardWalkLimit) may be 0 here, due to the loop in tryOptimizePhi. Set
+ // it to 1. This will not do any alias() calls. It either returns in the
+ // first iteration in the loop below, or is set back to 0 if all def chains
+ // are free of MemoryDefs.
+ if (!*UpwardWalkLimit) {
+ *UpwardWalkLimit = 1;
+ LimitAlreadyReached = true;
+ }
for (MemoryAccess *Current : def_chain(Desc.Last)) {
Desc.Last = Current;
@@ -548,6 +567,10 @@ class ClobberWalker {
if (auto *MD = dyn_cast<MemoryDef>(Current)) {
if (MSSA.isLiveOnEntryDef(MD))
return {MD, true, MustAlias};
+
+ if (!--*UpwardWalkLimit)
+ return {Current, true, MayAlias};
+
ClobberAlias CA =
instructionClobbersQuery(MD, Desc.Loc, Query->Inst, AA);
if (CA.IsClobber)
@@ -555,6 +578,9 @@ class ClobberWalker {
}
}
+ if (LimitAlreadyReached)
+ *UpwardWalkLimit = 0;
+
assert(isa<MemoryPhi>(Desc.Last) &&
"Ended at a non-clobber that's not a phi?");
return {Desc.Last, false, MayAlias};
@@ -626,10 +652,12 @@ class ClobberWalker {
SkipStopWhere = Query->OriginalAccess;
}
- UpwardsWalkResult Res = walkToPhiOrClobber(Node, /*StopAt=*/StopWhere,
+ UpwardsWalkResult Res = walkToPhiOrClobber(Node,
+ /*StopAt=*/StopWhere,
/*SkipStopAt=*/SkipStopWhere);
if (Res.IsKnownClobber) {
assert(Res.Result != StopWhere && Res.Result != SkipStopWhere);
+
// If this wasn't a cache hit, we hit a clobber when walking. That's a
// failure.
TerminatedPath Term{Res.Result, PathIndex};
@@ -662,7 +690,7 @@ class ClobberWalker {
struct generic_def_path_iterator
: public iterator_facade_base<generic_def_path_iterator<T, Walker>,
std::forward_iterator_tag, T *> {
- generic_def_path_iterator() = default;
+ generic_def_path_iterator() {}
generic_def_path_iterator(Walker *W, ListIndex N) : W(W), N(N) {}
T &operator*() const { return curNode(); }
@@ -887,13 +915,19 @@ class ClobberWalker {
}
public:
- ClobberWalker(const MemorySSA &MSSA, AliasAnalysis &AA, DominatorTree &DT)
+ ClobberWalker(const MemorySSA &MSSA, AliasAnalysisType &AA, DominatorTree &DT)
: MSSA(MSSA), AA(AA), DT(DT) {}
+ AliasAnalysisType *getAA() { return &AA; }
/// Finds the nearest clobber for the given query, optimizing phis if
/// possible.
- MemoryAccess *findClobber(MemoryAccess *Start, UpwardsMemoryQuery &Q) {
+ MemoryAccess *findClobber(MemoryAccess *Start, UpwardsMemoryQuery &Q,
+ unsigned &UpWalkLimit) {
Query = &Q;
+ UpwardWalkLimit = &UpWalkLimit;
+ // Starting limit must be > 0.
+ if (!UpWalkLimit)
+ UpWalkLimit++;
MemoryAccess *Current = Start;
// This walker pretends uses don't exist. If we're handed one, silently grab
@@ -918,13 +952,11 @@ public:
}
#ifdef EXPENSIVE_CHECKS
- if (!Q.SkipSelfAccess)
+ if (!Q.SkipSelfAccess && *UpwardWalkLimit > 0)
checkClobberSanity(Current, Result, Q.StartingLoc, MSSA, Q, AA);
#endif
return Result;
}
-
- void verify(const MemorySSA *MSSA) { assert(MSSA == &this->MSSA); }
};
struct RenamePassData {
@@ -947,77 +979,99 @@ struct RenamePassData {
namespace llvm {
-class MemorySSA::ClobberWalkerBase {
- ClobberWalker Walker;
+template <class AliasAnalysisType> class MemorySSA::ClobberWalkerBase {
+ ClobberWalker<AliasAnalysisType> Walker;
MemorySSA *MSSA;
public:
- ClobberWalkerBase(MemorySSA *M, AliasAnalysis *A, DominatorTree *D)
+ ClobberWalkerBase(MemorySSA *M, AliasAnalysisType *A, DominatorTree *D)
: Walker(*M, *A, *D), MSSA(M) {}
MemoryAccess *getClobberingMemoryAccessBase(MemoryAccess *,
- const MemoryLocation &);
- // Second argument (bool), defines whether the clobber search should skip the
+ const MemoryLocation &,
+ unsigned &);
+ // Third argument (bool), defines whether the clobber search should skip the
// original queried access. If true, there will be a follow-up query searching
// for a clobber access past "self". Note that the Optimized access is not
// updated if a new clobber is found by this SkipSelf search. If this
// additional query becomes heavily used we may decide to cache the result.
// Walker instantiations will decide how to set the SkipSelf bool.
- MemoryAccess *getClobberingMemoryAccessBase(MemoryAccess *, bool);
- void verify(const MemorySSA *MSSA) { Walker.verify(MSSA); }
+ MemoryAccess *getClobberingMemoryAccessBase(MemoryAccess *, unsigned &, bool);
};
/// A MemorySSAWalker that does AA walks to disambiguate accesses. It no
/// longer does caching on its own, but the name has been retained for the
/// moment.
+template <class AliasAnalysisType>
class MemorySSA::CachingWalker final : public MemorySSAWalker {
- ClobberWalkerBase *Walker;
+ ClobberWalkerBase<AliasAnalysisType> *Walker;
public:
- CachingWalker(MemorySSA *M, ClobberWalkerBase *W)
+ CachingWalker(MemorySSA *M, ClobberWalkerBase<AliasAnalysisType> *W)
: MemorySSAWalker(M), Walker(W) {}
~CachingWalker() override = default;
using MemorySSAWalker::getClobberingMemoryAccess;
- MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA) override;
+ MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA, unsigned &UWL) {
+ return Walker->getClobberingMemoryAccessBase(MA, UWL, false);
+ }
MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA,
- const MemoryLocation &Loc) override;
+ const MemoryLocation &Loc,
+ unsigned &UWL) {
+ return Walker->getClobberingMemoryAccessBase(MA, Loc, UWL);
+ }
+
+ MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA) override {
+ unsigned UpwardWalkLimit = MaxCheckLimit;
+ return getClobberingMemoryAccess(MA, UpwardWalkLimit);
+ }
+ MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA,
+ const MemoryLocation &Loc) override {
+ unsigned UpwardWalkLimit = MaxCheckLimit;
+ return getClobberingMemoryAccess(MA, Loc, UpwardWalkLimit);
+ }
void invalidateInfo(MemoryAccess *MA) override {
if (auto *MUD = dyn_cast<MemoryUseOrDef>(MA))
MUD->resetOptimized();
}
-
- void verify(const MemorySSA *MSSA) override {
- MemorySSAWalker::verify(MSSA);
- Walker->verify(MSSA);
- }
};
+template <class AliasAnalysisType>
class MemorySSA::SkipSelfWalker final : public MemorySSAWalker {
- ClobberWalkerBase *Walker;
+ ClobberWalkerBase<AliasAnalysisType> *Walker;
public:
- SkipSelfWalker(MemorySSA *M, ClobberWalkerBase *W)
+ SkipSelfWalker(MemorySSA *M, ClobberWalkerBase<AliasAnalysisType> *W)
: MemorySSAWalker(M), Walker(W) {}
~SkipSelfWalker() override = default;
using MemorySSAWalker::getClobberingMemoryAccess;
- MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA) override;
+ MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA, unsigned &UWL) {
+ return Walker->getClobberingMemoryAccessBase(MA, UWL, true);
+ }
MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA,
- const MemoryLocation &Loc) override;
+ const MemoryLocation &Loc,
+ unsigned &UWL) {
+ return Walker->getClobberingMemoryAccessBase(MA, Loc, UWL);
+ }
+
+ MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA) override {
+ unsigned UpwardWalkLimit = MaxCheckLimit;
+ return getClobberingMemoryAccess(MA, UpwardWalkLimit);
+ }
+ MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA,
+ const MemoryLocation &Loc) override {
+ unsigned UpwardWalkLimit = MaxCheckLimit;
+ return getClobberingMemoryAccess(MA, Loc, UpwardWalkLimit);
+ }
void invalidateInfo(MemoryAccess *MA) override {
if (auto *MUD = dyn_cast<MemoryUseOrDef>(MA))
MUD->resetOptimized();
}
-
- void verify(const MemorySSA *MSSA) override {
- MemorySSAWalker::verify(MSSA);
- Walker->verify(MSSA);
- }
};
} // end namespace llvm
@@ -1071,6 +1125,8 @@ MemoryAccess *MemorySSA::renameBlock(BasicBlock *BB, MemoryAccess *IncomingVal,
void MemorySSA::renamePass(DomTreeNode *Root, MemoryAccess *IncomingVal,
SmallPtrSetImpl<BasicBlock *> &Visited,
bool SkipVisited, bool RenameAllUses) {
+ assert(Root && "Trying to rename accesses in an unreachable block");
+
SmallVector<RenamePassData, 32> WorkStack;
// Skip everything if we already renamed this block and we are skipping.
// Note: You can't sink this into the if, because we need it to occur
@@ -1154,9 +1210,20 @@ void MemorySSA::markUnreachableAsLiveOnEntry(BasicBlock *BB) {
}
MemorySSA::MemorySSA(Function &Func, AliasAnalysis *AA, DominatorTree *DT)
- : AA(AA), DT(DT), F(Func), LiveOnEntryDef(nullptr), Walker(nullptr),
+ : AA(nullptr), DT(DT), F(Func), LiveOnEntryDef(nullptr), Walker(nullptr),
SkipWalker(nullptr), NextID(0) {
- buildMemorySSA();
+ // Build MemorySSA using a batch alias analysis. This reuses the internal
+ // state that AA collects during an alias()/getModRefInfo() call. This is
+ // safe because there are no CFG changes while building MemorySSA and can
+ // significantly reduce the time spent by the compiler in AA, because we will
+ // make queries about all the instructions in the Function.
+ BatchAAResults BatchAA(*AA);
+ buildMemorySSA(BatchAA);
+ // Intentionally leave AA to nullptr while building so we don't accidently
+ // use non-batch AliasAnalysis.
+ this->AA = AA;
+ // Also create the walker here.
+ getWalker();
}
MemorySSA::~MemorySSA() {
@@ -1193,11 +1260,9 @@ namespace llvm {
/// which is walking bottom-up.
class MemorySSA::OptimizeUses {
public:
- OptimizeUses(MemorySSA *MSSA, MemorySSAWalker *Walker, AliasAnalysis *AA,
- DominatorTree *DT)
- : MSSA(MSSA), Walker(Walker), AA(AA), DT(DT) {
- Walker = MSSA->getWalker();
- }
+ OptimizeUses(MemorySSA *MSSA, CachingWalker<BatchAAResults> *Walker,
+ BatchAAResults *BAA, DominatorTree *DT)
+ : MSSA(MSSA), Walker(Walker), AA(BAA), DT(DT) {}
void optimizeUses();
@@ -1225,8 +1290,8 @@ private:
DenseMap<MemoryLocOrCall, MemlocStackInfo> &);
MemorySSA *MSSA;
- MemorySSAWalker *Walker;
- AliasAnalysis *AA;
+ CachingWalker<BatchAAResults> *Walker;
+ BatchAAResults *AA;
DominatorTree *DT;
};
@@ -1343,11 +1408,12 @@ void MemorySSA::OptimizeUses::optimizeUsesInBlock(
continue;
}
bool FoundClobberResult = false;
+ unsigned UpwardWalkLimit = MaxCheckLimit;
while (UpperBound > LocInfo.LowerBound) {
if (isa<MemoryPhi>(VersionStack[UpperBound])) {
// For phis, use the walker, see where we ended up, go there
- Instruction *UseInst = MU->getMemoryInst();
- MemoryAccess *Result = Walker->getClobberingMemoryAccess(UseInst);
+ MemoryAccess *Result =
+ Walker->getClobberingMemoryAccess(MU, UpwardWalkLimit);
// We are guaranteed to find it or something is wrong
while (VersionStack[UpperBound] != Result) {
assert(UpperBound != 0);
@@ -1423,7 +1489,7 @@ void MemorySSA::placePHINodes(
createMemoryPhi(BB);
}
-void MemorySSA::buildMemorySSA() {
+void MemorySSA::buildMemorySSA(BatchAAResults &BAA) {
// We create an access to represent "live on entry", for things like
// arguments or users of globals, where the memory they use is defined before
// the beginning of the function. We do not actually insert it into the IR.
@@ -1445,7 +1511,7 @@ void MemorySSA::buildMemorySSA() {
AccessList *Accesses = nullptr;
DefsList *Defs = nullptr;
for (Instruction &I : B) {
- MemoryUseOrDef *MUD = createNewAccess(&I);
+ MemoryUseOrDef *MUD = createNewAccess(&I, &BAA);
if (!MUD)
continue;
@@ -1469,9 +1535,9 @@ void MemorySSA::buildMemorySSA() {
SmallPtrSet<BasicBlock *, 16> Visited;
renamePass(DT->getRootNode(), LiveOnEntryDef.get(), Visited);
- CachingWalker *Walker = getWalkerImpl();
-
- OptimizeUses(this, Walker, AA, DT).optimizeUses();
+ ClobberWalkerBase<BatchAAResults> WalkerBase(this, &BAA, DT);
+ CachingWalker<BatchAAResults> WalkerLocal(this, &WalkerBase);
+ OptimizeUses(this, &WalkerLocal, &BAA, DT).optimizeUses();
// Mark the uses in unreachable blocks as live on entry, so that they go
// somewhere.
@@ -1482,14 +1548,16 @@ void MemorySSA::buildMemorySSA() {
MemorySSAWalker *MemorySSA::getWalker() { return getWalkerImpl(); }
-MemorySSA::CachingWalker *MemorySSA::getWalkerImpl() {
+MemorySSA::CachingWalker<AliasAnalysis> *MemorySSA::getWalkerImpl() {
if (Walker)
return Walker.get();
if (!WalkerBase)
- WalkerBase = llvm::make_unique<ClobberWalkerBase>(this, AA, DT);
+ WalkerBase =
+ llvm::make_unique<ClobberWalkerBase<AliasAnalysis>>(this, AA, DT);
- Walker = llvm::make_unique<CachingWalker>(this, WalkerBase.get());
+ Walker =
+ llvm::make_unique<CachingWalker<AliasAnalysis>>(this, WalkerBase.get());
return Walker.get();
}
@@ -1498,9 +1566,11 @@ MemorySSAWalker *MemorySSA::getSkipSelfWalker() {
return SkipWalker.get();
if (!WalkerBase)
- WalkerBase = llvm::make_unique<ClobberWalkerBase>(this, AA, DT);
+ WalkerBase =
+ llvm::make_unique<ClobberWalkerBase<AliasAnalysis>>(this, AA, DT);
- SkipWalker = llvm::make_unique<SkipSelfWalker>(this, WalkerBase.get());
+ SkipWalker =
+ llvm::make_unique<SkipSelfWalker<AliasAnalysis>>(this, WalkerBase.get());
return SkipWalker.get();
}
@@ -1619,7 +1689,7 @@ MemoryUseOrDef *MemorySSA::createDefinedAccess(Instruction *I,
MemoryAccess *Definition,
const MemoryUseOrDef *Template) {
assert(!isa<PHINode>(I) && "Cannot create a defined access for a PHI");
- MemoryUseOrDef *NewAccess = createNewAccess(I, Template);
+ MemoryUseOrDef *NewAccess = createNewAccess(I, AA, Template);
assert(
NewAccess != nullptr &&
"Tried to create a memory access for a non-memory touching instruction");
@@ -1642,7 +1712,9 @@ static inline bool isOrdered(const Instruction *I) {
}
/// Helper function to create new memory accesses
+template <typename AliasAnalysisType>
MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I,
+ AliasAnalysisType *AAP,
const MemoryUseOrDef *Template) {
// The assume intrinsic has a control dependency which we model by claiming
// that it writes arbitrarily. Ignore that fake memory dependency here.
@@ -1657,7 +1729,7 @@ MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I,
Def = dyn_cast_or_null<MemoryDef>(Template) != nullptr;
Use = dyn_cast_or_null<MemoryUse>(Template) != nullptr;
#if !defined(NDEBUG)
- ModRefInfo ModRef = AA->getModRefInfo(I, None);
+ ModRefInfo ModRef = AAP->getModRefInfo(I, None);
bool DefCheck, UseCheck;
DefCheck = isModSet(ModRef) || isOrdered(I);
UseCheck = isRefSet(ModRef);
@@ -1665,7 +1737,7 @@ MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I,
#endif
} else {
// Find out what affect this instruction has on memory.
- ModRefInfo ModRef = AA->getModRefInfo(I, None);
+ ModRefInfo ModRef = AAP->getModRefInfo(I, None);
// The isOrdered check is used to ensure that volatiles end up as defs
// (atomics end up as ModRef right now anyway). Until we separate the
// ordering chain from the memory chain, this enables people to see at least
@@ -1718,7 +1790,7 @@ void MemorySSA::removeFromLookups(MemoryAccess *MA) {
MUD->setDefiningAccess(nullptr);
// Invalidate our walker's cache if necessary
if (!isa<MemoryUse>(MA))
- Walker->invalidateInfo(MA);
+ getWalker()->invalidateInfo(MA);
Value *MemoryInst;
if (const auto *MUD = dyn_cast<MemoryUseOrDef>(MA))
@@ -1778,35 +1850,16 @@ void MemorySSA::verifyMemorySSA() const {
verifyDomination(F);
verifyOrdering(F);
verifyDominationNumbers(F);
- Walker->verify(this);
- verifyClobberSanity(F);
-}
-
-/// Check sanity of the clobbering instruction for access MA.
-void MemorySSA::checkClobberSanityAccess(const MemoryAccess *MA) const {
- if (const auto *MUD = dyn_cast<MemoryUseOrDef>(MA)) {
- if (!MUD->isOptimized())
- return;
- auto *I = MUD->getMemoryInst();
- auto Loc = MemoryLocation::getOrNone(I);
- if (Loc == None)
- return;
- auto *Clobber = MUD->getOptimized();
- UpwardsMemoryQuery Q(I, MUD);
- checkClobberSanity(MUD, Clobber, *Loc, *this, Q, *AA, true);
- }
-}
-
-void MemorySSA::verifyClobberSanity(const Function &F) const {
-#if !defined(NDEBUG) && defined(EXPENSIVE_CHECKS)
- for (const BasicBlock &BB : F) {
- const AccessList *Accesses = getBlockAccesses(&BB);
- if (!Accesses)
- continue;
- for (const MemoryAccess &MA : *Accesses)
- checkClobberSanityAccess(&MA);
- }
-#endif
+ // Previously, the verification used to also verify that the clobberingAccess
+ // cached by MemorySSA is the same as the clobberingAccess found at a later
+ // query to AA. This does not hold true in general due to the current fragility
+ // of BasicAA which has arbitrary caps on the things it analyzes before giving
+ // up. As a result, transformations that are correct, will lead to BasicAA
+ // returning different Alias answers before and after that transformation.
+ // Invalidating MemorySSA is not an option, as the results in BasicAA can be so
+ // random, in the worst case we'd need to rebuild MemorySSA from scratch after
+ // every transformation, which defeats the purpose of using it. For such an
+ // example, see test4 added in D51960.
}
/// Verify that all of the blocks we believe to have valid domination numbers
@@ -2162,6 +2215,15 @@ MemorySSAAnalysis::Result MemorySSAAnalysis::run(Function &F,
return MemorySSAAnalysis::Result(llvm::make_unique<MemorySSA>(F, &AA, &DT));
}
+bool MemorySSAAnalysis::Result::invalidate(
+ Function &F, const PreservedAnalyses &PA,
+ FunctionAnalysisManager::Invalidator &Inv) {
+ auto PAC = PA.getChecker<MemorySSAAnalysis>();
+ return !(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>()) ||
+ Inv.invalidate<AAManager>(F, PA) ||
+ Inv.invalidate<DominatorTreeAnalysis>(F, PA);
+}
+
PreservedAnalyses MemorySSAPrinterPass::run(Function &F,
FunctionAnalysisManager &AM) {
OS << "MemorySSA for function: " << F.getName() << "\n";
@@ -2210,8 +2272,11 @@ MemorySSAWalker::MemorySSAWalker(MemorySSA *M) : MSSA(M) {}
/// the MemoryAccess that actually clobbers Loc.
///
/// \returns our clobbering memory access
-MemoryAccess *MemorySSA::ClobberWalkerBase::getClobberingMemoryAccessBase(
- MemoryAccess *StartingAccess, const MemoryLocation &Loc) {
+template <typename AliasAnalysisType>
+MemoryAccess *
+MemorySSA::ClobberWalkerBase<AliasAnalysisType>::getClobberingMemoryAccessBase(
+ MemoryAccess *StartingAccess, const MemoryLocation &Loc,
+ unsigned &UpwardWalkLimit) {
if (isa<MemoryPhi>(StartingAccess))
return StartingAccess;
@@ -2239,7 +2304,8 @@ MemoryAccess *MemorySSA::ClobberWalkerBase::getClobberingMemoryAccessBase(
? StartingUseOrDef->getDefiningAccess()
: StartingUseOrDef;
- MemoryAccess *Clobber = Walker.findClobber(DefiningAccess, Q);
+ MemoryAccess *Clobber =
+ Walker.findClobber(DefiningAccess, Q, UpwardWalkLimit);
LLVM_DEBUG(dbgs() << "Starting Memory SSA clobber for " << *I << " is ");
LLVM_DEBUG(dbgs() << *StartingUseOrDef << "\n");
LLVM_DEBUG(dbgs() << "Final Memory SSA clobber for " << *I << " is ");
@@ -2247,9 +2313,10 @@ MemoryAccess *MemorySSA::ClobberWalkerBase::getClobberingMemoryAccessBase(
return Clobber;
}
+template <typename AliasAnalysisType>
MemoryAccess *
-MemorySSA::ClobberWalkerBase::getClobberingMemoryAccessBase(MemoryAccess *MA,
- bool SkipSelf) {
+MemorySSA::ClobberWalkerBase<AliasAnalysisType>::getClobberingMemoryAccessBase(
+ MemoryAccess *MA, unsigned &UpwardWalkLimit, bool SkipSelf) {
auto *StartingAccess = dyn_cast<MemoryUseOrDef>(MA);
// If this is a MemoryPhi, we can't do anything.
if (!StartingAccess)
@@ -2275,7 +2342,7 @@ MemorySSA::ClobberWalkerBase::getClobberingMemoryAccessBase(MemoryAccess *MA,
UpwardsMemoryQuery Q(I, StartingAccess);
- if (isUseTriviallyOptimizableToLiveOnEntry(*MSSA->AA, I)) {
+ if (isUseTriviallyOptimizableToLiveOnEntry(*Walker.getAA(), I)) {
MemoryAccess *LiveOnEntry = MSSA->getLiveOnEntryDef();
StartingAccess->setOptimized(LiveOnEntry);
StartingAccess->setOptimizedAccessType(None);
@@ -2295,7 +2362,7 @@ MemorySSA::ClobberWalkerBase::getClobberingMemoryAccessBase(MemoryAccess *MA,
return DefiningAccess;
}
- OptimizedAccess = Walker.findClobber(DefiningAccess, Q);
+ OptimizedAccess = Walker.findClobber(DefiningAccess, Q, UpwardWalkLimit);
StartingAccess->setOptimized(OptimizedAccess);
if (MSSA->isLiveOnEntryDef(OptimizedAccess))
StartingAccess->setOptimizedAccessType(None);
@@ -2311,10 +2378,10 @@ MemorySSA::ClobberWalkerBase::getClobberingMemoryAccessBase(MemoryAccess *MA,
MemoryAccess *Result;
if (SkipSelf && isa<MemoryPhi>(OptimizedAccess) &&
- isa<MemoryDef>(StartingAccess)) {
+ isa<MemoryDef>(StartingAccess) && UpwardWalkLimit) {
assert(isa<MemoryDef>(Q.OriginalAccess));
Q.SkipSelfAccess = true;
- Result = Walker.findClobber(OptimizedAccess, Q);
+ Result = Walker.findClobber(OptimizedAccess, Q, UpwardWalkLimit);
} else
Result = OptimizedAccess;
@@ -2325,28 +2392,6 @@ MemorySSA::ClobberWalkerBase::getClobberingMemoryAccessBase(MemoryAccess *MA,
}
MemoryAccess *
-MemorySSA::CachingWalker::getClobberingMemoryAccess(MemoryAccess *MA) {
- return Walker->getClobberingMemoryAccessBase(MA, false);
-}
-
-MemoryAccess *
-MemorySSA::CachingWalker::getClobberingMemoryAccess(MemoryAccess *MA,
- const MemoryLocation &Loc) {
- return Walker->getClobberingMemoryAccessBase(MA, Loc);
-}
-
-MemoryAccess *
-MemorySSA::SkipSelfWalker::getClobberingMemoryAccess(MemoryAccess *MA) {
- return Walker->getClobberingMemoryAccessBase(MA, true);
-}
-
-MemoryAccess *
-MemorySSA::SkipSelfWalker::getClobberingMemoryAccess(MemoryAccess *MA,
- const MemoryLocation &Loc) {
- return Walker->getClobberingMemoryAccessBase(MA, Loc);
-}
-
-MemoryAccess *
DoNothingMemorySSAWalker::getClobberingMemoryAccess(MemoryAccess *MA) {
if (auto *Use = dyn_cast<MemoryUseOrDef>(MA))
return Use->getDefiningAccess();
diff --git a/lib/Analysis/MemorySSAUpdater.cpp b/lib/Analysis/MemorySSAUpdater.cpp
index 6c817d203684..4c1feee7fd9a 100644
--- a/lib/Analysis/MemorySSAUpdater.cpp
+++ b/lib/Analysis/MemorySSAUpdater.cpp
@@ -1,9 +1,8 @@
//===-- MemorySSAUpdater.cpp - Memory SSA Updater--------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------===//
//
@@ -73,7 +72,10 @@ MemoryAccess *MemorySSAUpdater::getPreviousDefRecursive(
// potential phi node. This will insert phi nodes if we cycle in order to
// break the cycle and have an operand.
for (auto *Pred : predecessors(BB))
- PhiOps.push_back(getPreviousDefFromEnd(Pred, CachedPreviousDef));
+ if (MSSA->DT->isReachableFromEntry(Pred))
+ PhiOps.push_back(getPreviousDefFromEnd(Pred, CachedPreviousDef));
+ else
+ PhiOps.push_back(MSSA->getLiveOnEntryDef());
// Now try to simplify the ops to avoid placing a phi.
// This may return null if we never created a phi yet, that's okay
@@ -157,8 +159,10 @@ MemoryAccess *MemorySSAUpdater::getPreviousDefFromEnd(
DenseMap<BasicBlock *, TrackingVH<MemoryAccess>> &CachedPreviousDef) {
auto *Defs = MSSA->getWritableBlockDefs(BB);
- if (Defs)
+ if (Defs) {
+ CachedPreviousDef.insert({BB, &*Defs->rbegin()});
return &*Defs->rbegin();
+ }
return getPreviousDefRecursive(BB, CachedPreviousDef);
}
@@ -270,6 +274,8 @@ void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) {
// Also make sure we skip ourselves to avoid self references.
if (isa<MemoryUse>(U.getUser()) || U.getUser() == MD)
continue;
+ // Defs are automatically unoptimized when the user is set to MD below,
+ // because the isOptimized() call will fail to find the same ID.
U.set(MD);
}
}
@@ -277,6 +283,9 @@ void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) {
// and that def is now our defining access.
MD->setDefiningAccess(DefBefore);
+ // Remember the index where we may insert new phis below.
+ unsigned NewPhiIndex = InsertedPHIs.size();
+
SmallVector<WeakVH, 8> FixupList(InsertedPHIs.begin(), InsertedPHIs.end());
if (!DefBeforeSameBlock) {
// If there was a local def before us, we must have the same effect it
@@ -290,9 +299,56 @@ void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) {
// backwards to find the def. To make that work, we'd have to track whether
// getDefRecursive only ever used the single predecessor case. These types
// of paths also only exist in between CFG simplifications.
+
+ // If this is the first def in the block and this insert is in an arbitrary
+ // place, compute IDF and place phis.
+ auto Iter = MD->getDefsIterator();
+ ++Iter;
+ auto IterEnd = MSSA->getBlockDefs(MD->getBlock())->end();
+ if (Iter == IterEnd) {
+ ForwardIDFCalculator IDFs(*MSSA->DT);
+ SmallVector<BasicBlock *, 32> IDFBlocks;
+ SmallPtrSet<BasicBlock *, 2> DefiningBlocks;
+ DefiningBlocks.insert(MD->getBlock());
+ IDFs.setDefiningBlocks(DefiningBlocks);
+ IDFs.calculate(IDFBlocks);
+ SmallVector<AssertingVH<MemoryPhi>, 4> NewInsertedPHIs;
+ for (auto *BBIDF : IDFBlocks)
+ if (!MSSA->getMemoryAccess(BBIDF)) {
+ auto *MPhi = MSSA->createMemoryPhi(BBIDF);
+ NewInsertedPHIs.push_back(MPhi);
+ // Add the phis created into the IDF blocks to NonOptPhis, so they are
+ // not optimized out as trivial by the call to getPreviousDefFromEnd
+ // below. Once they are complete, all these Phis are added to the
+ // FixupList, and removed from NonOptPhis inside fixupDefs().
+ NonOptPhis.insert(MPhi);
+ }
+
+ for (auto &MPhi : NewInsertedPHIs) {
+ auto *BBIDF = MPhi->getBlock();
+ for (auto *Pred : predecessors(BBIDF)) {
+ DenseMap<BasicBlock *, TrackingVH<MemoryAccess>> CachedPreviousDef;
+ MPhi->addIncoming(getPreviousDefFromEnd(Pred, CachedPreviousDef),
+ Pred);
+ }
+ }
+
+ // Re-take the index where we're adding the new phis, because the above
+ // call to getPreviousDefFromEnd, may have inserted into InsertedPHIs.
+ NewPhiIndex = InsertedPHIs.size();
+ for (auto &MPhi : NewInsertedPHIs) {
+ InsertedPHIs.push_back(&*MPhi);
+ FixupList.push_back(&*MPhi);
+ }
+ }
+
FixupList.push_back(MD);
}
+ // Remember the index where we stopped inserting new phis above, since the
+ // fixupDefs call in the loop below may insert more, that are already minimal.
+ unsigned NewPhiIndexEnd = InsertedPHIs.size();
+
while (!FixupList.empty()) {
unsigned StartingPHISize = InsertedPHIs.size();
fixupDefs(FixupList);
@@ -300,6 +356,12 @@ void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) {
// Put any new phis on the fixup list, and process them
FixupList.append(InsertedPHIs.begin() + StartingPHISize, InsertedPHIs.end());
}
+
+ // Optimize potentially non-minimal phis added in this method.
+ unsigned NewPhiSize = NewPhiIndexEnd - NewPhiIndex;
+ if (NewPhiSize)
+ tryRemoveTrivialPhis(ArrayRef<WeakVH>(&InsertedPHIs[NewPhiIndex], NewPhiSize));
+
// Now that all fixups are done, rename all uses if we are asked.
if (RenameUses) {
SmallPtrSet<BasicBlock *, 16> Visited;
@@ -401,8 +463,8 @@ void MemorySSAUpdater::removeEdge(BasicBlock *From, BasicBlock *To) {
}
}
-void MemorySSAUpdater::removeDuplicatePhiEdgesBetween(BasicBlock *From,
- BasicBlock *To) {
+void MemorySSAUpdater::removeDuplicatePhiEdgesBetween(const BasicBlock *From,
+ const BasicBlock *To) {
if (MemoryPhi *MPhi = MSSA->getMemoryAccess(To)) {
bool Found = false;
MPhi->unorderedDeleteIncomingIf([&](const MemoryAccess *, BasicBlock *B) {
@@ -420,7 +482,8 @@ void MemorySSAUpdater::removeDuplicatePhiEdgesBetween(BasicBlock *From,
void MemorySSAUpdater::cloneUsesAndDefs(BasicBlock *BB, BasicBlock *NewBB,
const ValueToValueMapTy &VMap,
- PhiToDefMap &MPhiMap) {
+ PhiToDefMap &MPhiMap,
+ bool CloneWasSimplified) {
auto GetNewDefiningAccess = [&](MemoryAccess *MA) -> MemoryAccess * {
MemoryAccess *InsnDefining = MA;
if (MemoryUseOrDef *DefMUD = dyn_cast<MemoryUseOrDef>(InsnDefining)) {
@@ -450,16 +513,60 @@ void MemorySSAUpdater::cloneUsesAndDefs(BasicBlock *BB, BasicBlock *NewBB,
// instructions. This occurs in LoopRotate when cloning instructions
// from the old header to the old preheader. The cloned instruction may
// also be a simplified Value, not an Instruction (see LoopRotate).
+ // Also in LoopRotate, even when it's an instruction, due to it being
+ // simplified, it may be a Use rather than a Def, so we cannot use MUD as
+ // template. Calls coming from updateForClonedBlockIntoPred, ensure this.
if (Instruction *NewInsn =
dyn_cast_or_null<Instruction>(VMap.lookup(Insn))) {
MemoryAccess *NewUseOrDef = MSSA->createDefinedAccess(
- NewInsn, GetNewDefiningAccess(MUD->getDefiningAccess()), MUD);
+ NewInsn, GetNewDefiningAccess(MUD->getDefiningAccess()),
+ CloneWasSimplified ? nullptr : MUD);
MSSA->insertIntoListsForBlock(NewUseOrDef, NewBB, MemorySSA::End);
}
}
}
}
+void MemorySSAUpdater::updatePhisWhenInsertingUniqueBackedgeBlock(
+ BasicBlock *Header, BasicBlock *Preheader, BasicBlock *BEBlock) {
+ auto *MPhi = MSSA->getMemoryAccess(Header);
+ if (!MPhi)
+ return;
+
+ // Create phi node in the backedge block and populate it with the same
+ // incoming values as MPhi. Skip incoming values coming from Preheader.
+ auto *NewMPhi = MSSA->createMemoryPhi(BEBlock);
+ bool HasUniqueIncomingValue = true;
+ MemoryAccess *UniqueValue = nullptr;
+ for (unsigned I = 0, E = MPhi->getNumIncomingValues(); I != E; ++I) {
+ BasicBlock *IBB = MPhi->getIncomingBlock(I);
+ MemoryAccess *IV = MPhi->getIncomingValue(I);
+ if (IBB != Preheader) {
+ NewMPhi->addIncoming(IV, IBB);
+ if (HasUniqueIncomingValue) {
+ if (!UniqueValue)
+ UniqueValue = IV;
+ else if (UniqueValue != IV)
+ HasUniqueIncomingValue = false;
+ }
+ }
+ }
+
+ // Update incoming edges into MPhi. Remove all but the incoming edge from
+ // Preheader. Add an edge from NewMPhi
+ auto *AccFromPreheader = MPhi->getIncomingValueForBlock(Preheader);
+ MPhi->setIncomingValue(0, AccFromPreheader);
+ MPhi->setIncomingBlock(0, Preheader);
+ for (unsigned I = MPhi->getNumIncomingValues() - 1; I >= 1; --I)
+ MPhi->unorderedDeleteIncoming(I);
+ MPhi->addIncoming(NewMPhi, BEBlock);
+
+ // If NewMPhi is a trivial phi, remove it. Its use in the header MPhi will be
+ // replaced with the unique value.
+ if (HasUniqueIncomingValue)
+ removeMemoryAccess(NewMPhi);
+}
+
void MemorySSAUpdater::updateForClonedLoop(const LoopBlocksRPO &LoopBlocks,
ArrayRef<BasicBlock *> ExitBlocks,
const ValueToValueMapTy &VMap,
@@ -543,10 +650,13 @@ void MemorySSAUpdater::updateForClonedBlockIntoPred(
// Defs from BB being used in BB will be replaced with the cloned defs from
// VM. The uses of BB's Phi (if it exists) in BB will be replaced by the
// incoming def into the Phi from P1.
+ // Instructions cloned into the predecessor are in practice sometimes
+ // simplified, so disable the use of the template, and create an access from
+ // scratch.
PhiToDefMap MPhiMap;
if (MemoryPhi *MPhi = MSSA->getMemoryAccess(BB))
MPhiMap[MPhi] = MPhi->getIncomingValueForBlock(P1);
- cloneUsesAndDefs(BB, P1, VM, MPhiMap);
+ cloneUsesAndDefs(BB, P1, VM, MPhiMap, /*CloneWasSimplified=*/true);
}
template <typename Iter>
@@ -599,7 +709,7 @@ void MemorySSAUpdater::applyUpdates(ArrayRef<CFGUpdate> Updates,
if (!RevDeleteUpdates.empty()) {
// Update for inserted edges: use newDT and snapshot CFG as if deletes had
- // not occured.
+ // not occurred.
// FIXME: This creates a new DT, so it's more expensive to do mix
// delete/inserts vs just inserts. We can do an incremental update on the DT
// to revert deletes, than re-delete the edges. Teaching DT to do this, is
@@ -697,7 +807,7 @@ void MemorySSAUpdater::applyInsertUpdates(ArrayRef<CFGUpdate> Updates,
// Map a BB to its predecessors: added + previously existing. To get a
// deterministic order, store predecessors as SetVectors. The order in each
- // will be defined by teh order in Updates (fixed) and the order given by
+ // will be defined by the order in Updates (fixed) and the order given by
// children<> (also fixed). Since we further iterate over these ordered sets,
// we lose the information of multiple edges possibly existing between two
// blocks, so we'll keep and EdgeCount map for that.
@@ -756,15 +866,15 @@ void MemorySSAUpdater::applyInsertUpdates(ArrayRef<CFGUpdate> Updates,
for (auto *BB : NewBlocks)
PredMap.erase(BB);
- SmallVector<BasicBlock *, 8> BlocksToProcess;
SmallVector<BasicBlock *, 16> BlocksWithDefsToReplace;
+ SmallVector<WeakVH, 8> InsertedPhis;
// First create MemoryPhis in all blocks that don't have one. Create in the
// order found in Updates, not in PredMap, to get deterministic numbering.
for (auto &Edge : Updates) {
BasicBlock *BB = Edge.getTo();
if (PredMap.count(BB) && !MSSA->getMemoryAccess(BB))
- MSSA->createMemoryPhi(BB);
+ InsertedPhis.push_back(MSSA->createMemoryPhi(BB));
}
// Now we'll fill in the MemoryPhis with the right incoming values.
@@ -831,10 +941,6 @@ void MemorySSAUpdater::applyInsertUpdates(ArrayRef<CFGUpdate> Updates,
for (auto *Pred : PrevBlockSet)
for (int I = 0, E = EdgeCountMap[{Pred, BB}]; I < E; ++I)
NewPhi->addIncoming(DefP1, Pred);
-
- // Insert BB in the set of blocks that now have definition. We'll use this
- // to compute IDF and add Phis there next.
- BlocksToProcess.push_back(BB);
}
// Get all blocks that used to dominate BB and no longer do after adding
@@ -849,22 +955,41 @@ void MemorySSAUpdater::applyInsertUpdates(ArrayRef<CFGUpdate> Updates,
GetNoLongerDomBlocks(PrevIDom, NewIDom, BlocksWithDefsToReplace);
}
+ tryRemoveTrivialPhis(InsertedPhis);
+ // Create the set of blocks that now have a definition. We'll use this to
+ // compute IDF and add Phis there next.
+ SmallVector<BasicBlock *, 8> BlocksToProcess;
+ for (auto &VH : InsertedPhis)
+ if (auto *MPhi = cast_or_null<MemoryPhi>(VH))
+ BlocksToProcess.push_back(MPhi->getBlock());
+
// Compute IDF and add Phis in all IDF blocks that do not have one.
SmallVector<BasicBlock *, 32> IDFBlocks;
if (!BlocksToProcess.empty()) {
- ForwardIDFCalculator IDFs(DT);
+ ForwardIDFCalculator IDFs(DT, GD);
SmallPtrSet<BasicBlock *, 16> DefiningBlocks(BlocksToProcess.begin(),
BlocksToProcess.end());
IDFs.setDefiningBlocks(DefiningBlocks);
IDFs.calculate(IDFBlocks);
+
+ SmallSetVector<MemoryPhi *, 4> PhisToFill;
+ // First create all needed Phis.
+ for (auto *BBIDF : IDFBlocks)
+ if (!MSSA->getMemoryAccess(BBIDF)) {
+ auto *IDFPhi = MSSA->createMemoryPhi(BBIDF);
+ InsertedPhis.push_back(IDFPhi);
+ PhisToFill.insert(IDFPhi);
+ }
+ // Then update or insert their correct incoming values.
for (auto *BBIDF : IDFBlocks) {
- if (auto *IDFPhi = MSSA->getMemoryAccess(BBIDF)) {
+ auto *IDFPhi = MSSA->getMemoryAccess(BBIDF);
+ assert(IDFPhi && "Phi must exist");
+ if (!PhisToFill.count(IDFPhi)) {
// Update existing Phi.
// FIXME: some updates may be redundant, try to optimize and skip some.
for (unsigned I = 0, E = IDFPhi->getNumIncomingValues(); I < E; ++I)
IDFPhi->setIncomingValue(I, GetLastDef(IDFPhi->getIncomingBlock(I)));
} else {
- IDFPhi = MSSA->createMemoryPhi(BBIDF);
for (auto &Pair : children<GraphDiffInvBBPair>({GD, BBIDF})) {
BasicBlock *Pi = Pair.second;
IDFPhi->addIncoming(GetLastDef(Pi), Pi);
@@ -907,6 +1032,7 @@ void MemorySSAUpdater::applyInsertUpdates(ArrayRef<CFGUpdate> Updates,
}
}
}
+ tryRemoveTrivialPhis(InsertedPhis);
}
// Move What before Where in the MemorySSA IR.
@@ -1052,7 +1178,7 @@ void MemorySSAUpdater::wireOldPredecessorsToNewImmediatePredecessor(
}
}
-void MemorySSAUpdater::removeMemoryAccess(MemoryAccess *MA) {
+void MemorySSAUpdater::removeMemoryAccess(MemoryAccess *MA, bool OptimizePhis) {
assert(!MSSA->isLiveOnEntryDef(MA) &&
"Trying to remove the live on entry def");
// We can only delete phi nodes if they have no uses, or we can replace all
@@ -1071,6 +1197,8 @@ void MemorySSAUpdater::removeMemoryAccess(MemoryAccess *MA) {
NewDefTarget = cast<MemoryUseOrDef>(MA)->getDefiningAccess();
}
+ SmallSetVector<MemoryPhi *, 4> PhisToCheck;
+
// Re-point the uses at our defining access
if (!isa<MemoryUse>(MA) && !MA->use_empty()) {
// Reset optimized on users of this store, and reset the uses.
@@ -1090,6 +1218,9 @@ void MemorySSAUpdater::removeMemoryAccess(MemoryAccess *MA) {
Use &U = *MA->use_begin();
if (auto *MUD = dyn_cast<MemoryUseOrDef>(U.getUser()))
MUD->resetOptimized();
+ if (OptimizePhis)
+ if (MemoryPhi *MP = dyn_cast<MemoryPhi>(U.getUser()))
+ PhisToCheck.insert(MP);
U.set(NewDefTarget);
}
}
@@ -1098,10 +1229,25 @@ void MemorySSAUpdater::removeMemoryAccess(MemoryAccess *MA) {
// are doing things here
MSSA->removeFromLookups(MA);
MSSA->removeFromLists(MA);
+
+ // Optionally optimize Phi uses. This will recursively remove trivial phis.
+ if (!PhisToCheck.empty()) {
+ SmallVector<WeakVH, 16> PhisToOptimize{PhisToCheck.begin(),
+ PhisToCheck.end()};
+ PhisToCheck.clear();
+
+ unsigned PhisSize = PhisToOptimize.size();
+ while (PhisSize-- > 0)
+ if (MemoryPhi *MP =
+ cast_or_null<MemoryPhi>(PhisToOptimize.pop_back_val())) {
+ auto OperRange = MP->operands();
+ tryRemoveTrivialPhi(MP, OperRange);
+ }
+ }
}
void MemorySSAUpdater::removeBlocks(
- const SmallPtrSetImpl<BasicBlock *> &DeadBlocks) {
+ const SmallSetVector<BasicBlock *, 8> &DeadBlocks) {
// First delete all uses of BB in MemoryPhis.
for (BasicBlock *BB : DeadBlocks) {
Instruction *TI = BB->getTerminator();
@@ -1133,6 +1279,51 @@ void MemorySSAUpdater::removeBlocks(
}
}
+void MemorySSAUpdater::tryRemoveTrivialPhis(ArrayRef<WeakVH> UpdatedPHIs) {
+ for (auto &VH : UpdatedPHIs)
+ if (auto *MPhi = cast_or_null<MemoryPhi>(VH)) {
+ auto OperRange = MPhi->operands();
+ tryRemoveTrivialPhi(MPhi, OperRange);
+ }
+}
+
+void MemorySSAUpdater::changeToUnreachable(const Instruction *I) {
+ const BasicBlock *BB = I->getParent();
+ // Remove memory accesses in BB for I and all following instructions.
+ auto BBI = I->getIterator(), BBE = BB->end();
+ // FIXME: If this becomes too expensive, iterate until the first instruction
+ // with a memory access, then iterate over MemoryAccesses.
+ while (BBI != BBE)
+ removeMemoryAccess(&*(BBI++));
+ // Update phis in BB's successors to remove BB.
+ SmallVector<WeakVH, 16> UpdatedPHIs;
+ for (const BasicBlock *Successor : successors(BB)) {
+ removeDuplicatePhiEdgesBetween(BB, Successor);
+ if (MemoryPhi *MPhi = MSSA->getMemoryAccess(Successor)) {
+ MPhi->unorderedDeleteIncomingBlock(BB);
+ UpdatedPHIs.push_back(MPhi);
+ }
+ }
+ // Optimize trivial phis.
+ tryRemoveTrivialPhis(UpdatedPHIs);
+}
+
+void MemorySSAUpdater::changeCondBranchToUnconditionalTo(const BranchInst *BI,
+ const BasicBlock *To) {
+ const BasicBlock *BB = BI->getParent();
+ SmallVector<WeakVH, 16> UpdatedPHIs;
+ for (const BasicBlock *Succ : successors(BB)) {
+ removeDuplicatePhiEdgesBetween(BB, Succ);
+ if (Succ != To)
+ if (auto *MPhi = MSSA->getMemoryAccess(Succ)) {
+ MPhi->unorderedDeleteIncomingBlock(BB);
+ UpdatedPHIs.push_back(MPhi);
+ }
+ }
+ // Optimize trivial phis.
+ tryRemoveTrivialPhis(UpdatedPHIs);
+}
+
MemoryAccess *MemorySSAUpdater::createMemoryAccessInBB(
Instruction *I, MemoryAccess *Definition, const BasicBlock *BB,
MemorySSA::InsertionPlace Point) {
diff --git a/lib/Analysis/ModuleDebugInfoPrinter.cpp b/lib/Analysis/ModuleDebugInfoPrinter.cpp
index 1e321f17d59f..519242759824 100644
--- a/lib/Analysis/ModuleDebugInfoPrinter.cpp
+++ b/lib/Analysis/ModuleDebugInfoPrinter.cpp
@@ -1,9 +1,8 @@
//===-- ModuleDebugInfoPrinter.cpp - Prints module debug info metadata ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Analysis/ModuleSummaryAnalysis.cpp b/lib/Analysis/ModuleSummaryAnalysis.cpp
index 87f76d43bb1e..e25eb290a665 100644
--- a/lib/Analysis/ModuleSummaryAnalysis.cpp
+++ b/lib/Analysis/ModuleSummaryAnalysis.cpp
@@ -1,9 +1,8 @@
//===- ModuleSummaryAnalysis.cpp - Module summary index builder -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -71,6 +70,11 @@ cl::opt<FunctionSummary::ForceSummaryHotnessType, true> FSEC(
"all-non-critical", "All non-critical edges."),
clEnumValN(FunctionSummary::FSHT_All, "all", "All edges.")));
+cl::opt<std::string> ModuleSummaryDotFile(
+ "module-summary-dot-file", cl::init(""), cl::Hidden,
+ cl::value_desc("filename"),
+ cl::desc("File to emit dot graph of new summary into."));
+
// Walk through the operands of a given User via worklist iteration and populate
// the set of GlobalValue references encountered. Invoked either on an
// Instruction or a GlobalVariable (which walks its initializer).
@@ -227,6 +231,13 @@ static bool isNonVolatileLoad(const Instruction *I) {
return false;
}
+static bool isNonVolatileStore(const Instruction *I) {
+ if (const auto *SI = dyn_cast<StoreInst>(I))
+ return !SI->isVolatile();
+
+ return false;
+}
+
static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
const Function &F, BlockFrequencyInfo *BFI,
ProfileSummaryInfo *PSI, DominatorTree &DT,
@@ -241,7 +252,7 @@ static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
// Map from callee ValueId to profile count. Used to accumulate profile
// counts for all static calls to a given callee.
MapVector<ValueInfo, CalleeInfo> CallGraphEdges;
- SetVector<ValueInfo> RefEdges;
+ SetVector<ValueInfo> RefEdges, LoadRefEdges, StoreRefEdges;
SetVector<GlobalValue::GUID> TypeTests;
SetVector<FunctionSummary::VFuncId> TypeTestAssumeVCalls,
TypeCheckedLoadVCalls;
@@ -254,6 +265,7 @@ static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
// list.
findRefEdges(Index, &F, RefEdges, Visited);
std::vector<const Instruction *> NonVolatileLoads;
+ std::vector<const Instruction *> NonVolatileStores;
bool HasInlineAsmMaybeReferencingInternal = false;
for (const BasicBlock &BB : F)
@@ -261,12 +273,34 @@ static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
if (isa<DbgInfoIntrinsic>(I))
continue;
++NumInsts;
- if (isNonVolatileLoad(&I)) {
- // Postpone processing of non-volatile load instructions
- // See comments below
- Visited.insert(&I);
- NonVolatileLoads.push_back(&I);
- continue;
+ // Regular LTO module doesn't participate in ThinLTO import,
+ // so no reference from it can be read/writeonly, since this
+ // would require importing variable as local copy
+ if (IsThinLTO) {
+ if (isNonVolatileLoad(&I)) {
+ // Postpone processing of non-volatile load instructions
+ // See comments below
+ Visited.insert(&I);
+ NonVolatileLoads.push_back(&I);
+ continue;
+ } else if (isNonVolatileStore(&I)) {
+ Visited.insert(&I);
+ NonVolatileStores.push_back(&I);
+ // All references from second operand of store (destination address)
+ // can be considered write-only if they're not referenced by any
+ // non-store instruction. References from first operand of store
+ // (stored value) can't be treated either as read- or as write-only
+ // so we add them to RefEdges as we do with all other instructions
+ // except non-volatile load.
+ Value *Stored = I.getOperand(0);
+ if (auto *GV = dyn_cast<GlobalValue>(Stored))
+ // findRefEdges will try to examine GV operands, so instead
+ // of calling it we should add GV to RefEdges directly.
+ RefEdges.insert(Index.getOrInsertValueInfo(GV));
+ else if (auto *U = dyn_cast<User>(Stored))
+ findRefEdges(Index, U, RefEdges, Visited);
+ continue;
+ }
}
findRefEdges(Index, &I, RefEdges, Visited);
auto CS = ImmutableCallSite(&I);
@@ -357,24 +391,61 @@ static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
}
}
- // By now we processed all instructions in a function, except
- // non-volatile loads. All new refs we add in a loop below
- // are obviously constant. All constant refs are grouped in the
- // end of RefEdges vector, so we can use a single integer value
- // to identify them.
- unsigned RefCnt = RefEdges.size();
- for (const Instruction *I : NonVolatileLoads) {
- Visited.erase(I);
- findRefEdges(Index, I, RefEdges, Visited);
- }
- std::vector<ValueInfo> Refs = RefEdges.takeVector();
- // Regular LTO module doesn't participate in ThinLTO import,
- // so no reference from it can be readonly, since this would
- // require importing variable as local copy
- if (IsThinLTO)
- for (; RefCnt < Refs.size(); ++RefCnt)
+ std::vector<ValueInfo> Refs;
+ if (IsThinLTO) {
+ auto AddRefEdges = [&](const std::vector<const Instruction *> &Instrs,
+ SetVector<ValueInfo> &Edges,
+ SmallPtrSet<const User *, 8> &Cache) {
+ for (const auto *I : Instrs) {
+ Cache.erase(I);
+ findRefEdges(Index, I, Edges, Cache);
+ }
+ };
+
+ // By now we processed all instructions in a function, except
+ // non-volatile loads and non-volatile value stores. Let's find
+ // ref edges for both of instruction sets
+ AddRefEdges(NonVolatileLoads, LoadRefEdges, Visited);
+ // We can add some values to the Visited set when processing load
+ // instructions which are also used by stores in NonVolatileStores.
+ // For example this can happen if we have following code:
+ //
+ // store %Derived* @foo, %Derived** bitcast (%Base** @bar to %Derived**)
+ // %42 = load %Derived*, %Derived** bitcast (%Base** @bar to %Derived**)
+ //
+ // After processing loads we'll add bitcast to the Visited set, and if
+ // we use the same set while processing stores, we'll never see store
+ // to @bar and @bar will be mistakenly treated as readonly.
+ SmallPtrSet<const llvm::User *, 8> StoreCache;
+ AddRefEdges(NonVolatileStores, StoreRefEdges, StoreCache);
+
+ // If both load and store instruction reference the same variable
+ // we won't be able to optimize it. Add all such reference edges
+ // to RefEdges set.
+ for (auto &VI : StoreRefEdges)
+ if (LoadRefEdges.remove(VI))
+ RefEdges.insert(VI);
+
+ unsigned RefCnt = RefEdges.size();
+ // All new reference edges inserted in two loops below are either
+ // read or write only. They will be grouped in the end of RefEdges
+ // vector, so we can use a single integer value to identify them.
+ for (auto &VI : LoadRefEdges)
+ RefEdges.insert(VI);
+
+ unsigned FirstWORef = RefEdges.size();
+ for (auto &VI : StoreRefEdges)
+ RefEdges.insert(VI);
+
+ Refs = RefEdges.takeVector();
+ for (; RefCnt < FirstWORef; ++RefCnt)
Refs[RefCnt].setReadOnly();
+ for (; RefCnt < Refs.size(); ++RefCnt)
+ Refs[RefCnt].setWriteOnly();
+ } else {
+ Refs = RefEdges.takeVector();
+ }
// Explicit add hot edges to enforce importing for designated GUIDs for
// sample PGO, to enable the same inlines as the profiled optimized binary.
for (auto &I : F.getImportGUIDs())
@@ -387,7 +458,8 @@ static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
bool NotEligibleForImport =
NonRenamableLocal || HasInlineAsmMaybeReferencingInternal;
GlobalValueSummary::GVFlags Flags(F.getLinkage(), NotEligibleForImport,
- /* Live = */ false, F.isDSOLocal());
+ /* Live = */ false, F.isDSOLocal(),
+ F.hasLinkOnceODRLinkage() && F.hasGlobalUnnamedAddr());
FunctionSummary::FFlags FunFlags{
F.hasFnAttribute(Attribute::ReadNone),
F.hasFnAttribute(Attribute::ReadOnly),
@@ -406,26 +478,134 @@ static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
Index.addGlobalValueSummary(F, std::move(FuncSummary));
}
+/// Find function pointers referenced within the given vtable initializer
+/// (or subset of an initializer) \p I. The starting offset of \p I within
+/// the vtable initializer is \p StartingOffset. Any discovered function
+/// pointers are added to \p VTableFuncs along with their cumulative offset
+/// within the initializer.
+static void findFuncPointers(const Constant *I, uint64_t StartingOffset,
+ const Module &M, ModuleSummaryIndex &Index,
+ VTableFuncList &VTableFuncs) {
+ // First check if this is a function pointer.
+ if (I->getType()->isPointerTy()) {
+ auto Fn = dyn_cast<Function>(I->stripPointerCasts());
+ // We can disregard __cxa_pure_virtual as a possible call target, as
+ // calls to pure virtuals are UB.
+ if (Fn && Fn->getName() != "__cxa_pure_virtual")
+ VTableFuncs.push_back({Index.getOrInsertValueInfo(Fn), StartingOffset});
+ return;
+ }
+
+ // Walk through the elements in the constant struct or array and recursively
+ // look for virtual function pointers.
+ const DataLayout &DL = M.getDataLayout();
+ if (auto *C = dyn_cast<ConstantStruct>(I)) {
+ StructType *STy = dyn_cast<StructType>(C->getType());
+ assert(STy);
+ const StructLayout *SL = DL.getStructLayout(C->getType());
+
+ for (StructType::element_iterator EB = STy->element_begin(), EI = EB,
+ EE = STy->element_end();
+ EI != EE; ++EI) {
+ auto Offset = SL->getElementOffset(EI - EB);
+ unsigned Op = SL->getElementContainingOffset(Offset);
+ findFuncPointers(cast<Constant>(I->getOperand(Op)),
+ StartingOffset + Offset, M, Index, VTableFuncs);
+ }
+ } else if (auto *C = dyn_cast<ConstantArray>(I)) {
+ ArrayType *ATy = C->getType();
+ Type *EltTy = ATy->getElementType();
+ uint64_t EltSize = DL.getTypeAllocSize(EltTy);
+ for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) {
+ findFuncPointers(cast<Constant>(I->getOperand(i)),
+ StartingOffset + i * EltSize, M, Index, VTableFuncs);
+ }
+ }
+}
+
+// Identify the function pointers referenced by vtable definition \p V.
+static void computeVTableFuncs(ModuleSummaryIndex &Index,
+ const GlobalVariable &V, const Module &M,
+ VTableFuncList &VTableFuncs) {
+ if (!V.isConstant())
+ return;
+
+ findFuncPointers(V.getInitializer(), /*StartingOffset=*/0, M, Index,
+ VTableFuncs);
+
+#ifndef NDEBUG
+ // Validate that the VTableFuncs list is ordered by offset.
+ uint64_t PrevOffset = 0;
+ for (auto &P : VTableFuncs) {
+ // The findVFuncPointers traversal should have encountered the
+ // functions in offset order. We need to use ">=" since PrevOffset
+ // starts at 0.
+ assert(P.VTableOffset >= PrevOffset);
+ PrevOffset = P.VTableOffset;
+ }
+#endif
+}
+
+/// Record vtable definition \p V for each type metadata it references.
static void
-computeVariableSummary(ModuleSummaryIndex &Index, const GlobalVariable &V,
- DenseSet<GlobalValue::GUID> &CantBePromoted) {
+recordTypeIdCompatibleVtableReferences(ModuleSummaryIndex &Index,
+ const GlobalVariable &V,
+ SmallVectorImpl<MDNode *> &Types) {
+ for (MDNode *Type : Types) {
+ auto TypeID = Type->getOperand(1).get();
+
+ uint64_t Offset =
+ cast<ConstantInt>(
+ cast<ConstantAsMetadata>(Type->getOperand(0))->getValue())
+ ->getZExtValue();
+
+ if (auto *TypeId = dyn_cast<MDString>(TypeID))
+ Index.getOrInsertTypeIdCompatibleVtableSummary(TypeId->getString())
+ .push_back({Offset, Index.getOrInsertValueInfo(&V)});
+ }
+}
+
+static void computeVariableSummary(ModuleSummaryIndex &Index,
+ const GlobalVariable &V,
+ DenseSet<GlobalValue::GUID> &CantBePromoted,
+ const Module &M,
+ SmallVectorImpl<MDNode *> &Types) {
SetVector<ValueInfo> RefEdges;
SmallPtrSet<const User *, 8> Visited;
bool HasBlockAddress = findRefEdges(Index, &V, RefEdges, Visited);
bool NonRenamableLocal = isNonRenamableLocal(V);
GlobalValueSummary::GVFlags Flags(V.getLinkage(), NonRenamableLocal,
- /* Live = */ false, V.isDSOLocal());
+ /* Live = */ false, V.isDSOLocal(),
+ V.hasLinkOnceODRLinkage() && V.hasGlobalUnnamedAddr());
+
+ VTableFuncList VTableFuncs;
+ // If splitting is not enabled, then we compute the summary information
+ // necessary for index-based whole program devirtualization.
+ if (!Index.enableSplitLTOUnit()) {
+ Types.clear();
+ V.getMetadata(LLVMContext::MD_type, Types);
+ if (!Types.empty()) {
+ // Identify the function pointers referenced by this vtable definition.
+ computeVTableFuncs(Index, V, M, VTableFuncs);
+
+ // Record this vtable definition for each type metadata it references.
+ recordTypeIdCompatibleVtableReferences(Index, V, Types);
+ }
+ }
- // Don't mark variables we won't be able to internalize as read-only.
- GlobalVarSummary::GVarFlags VarFlags(
+ // Don't mark variables we won't be able to internalize as read/write-only.
+ bool CanBeInternalized =
!V.hasComdat() && !V.hasAppendingLinkage() && !V.isInterposable() &&
- !V.hasAvailableExternallyLinkage() && !V.hasDLLExportStorageClass());
+ !V.hasAvailableExternallyLinkage() && !V.hasDLLExportStorageClass();
+ GlobalVarSummary::GVarFlags VarFlags(CanBeInternalized, CanBeInternalized);
auto GVarSummary = llvm::make_unique<GlobalVarSummary>(Flags, VarFlags,
RefEdges.takeVector());
if (NonRenamableLocal)
CantBePromoted.insert(V.getGUID());
if (HasBlockAddress)
GVarSummary->setNotEligibleToImport();
+ if (!VTableFuncs.empty())
+ GVarSummary->setVTableFuncs(VTableFuncs);
Index.addGlobalValueSummary(V, std::move(GVarSummary));
}
@@ -434,12 +614,15 @@ computeAliasSummary(ModuleSummaryIndex &Index, const GlobalAlias &A,
DenseSet<GlobalValue::GUID> &CantBePromoted) {
bool NonRenamableLocal = isNonRenamableLocal(A);
GlobalValueSummary::GVFlags Flags(A.getLinkage(), NonRenamableLocal,
- /* Live = */ false, A.isDSOLocal());
+ /* Live = */ false, A.isDSOLocal(),
+ A.hasLinkOnceODRLinkage() && A.hasGlobalUnnamedAddr());
auto AS = llvm::make_unique<AliasSummary>(Flags);
auto *Aliasee = A.getBaseObject();
- auto *AliaseeSummary = Index.getGlobalValueSummary(*Aliasee);
- assert(AliaseeSummary && "Alias expects aliasee summary to be parsed");
- AS->setAliasee(AliaseeSummary);
+ auto AliaseeVI = Index.getValueInfo(Aliasee->getGUID());
+ assert(AliaseeVI && "Alias expects aliasee summary to be available");
+ assert(AliaseeVI.getSummaryList().size() == 1 &&
+ "Expected a single entry per aliasee in per-module index");
+ AS->setAliasee(AliaseeVI, AliaseeVI.getSummaryList()[0].get());
if (NonRenamableLocal)
CantBePromoted.insert(A.getGUID());
Index.addGlobalValueSummary(A, std::move(AS));
@@ -507,7 +690,8 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
GlobalValueSummary::GVFlags GVFlags(GlobalValue::InternalLinkage,
/* NotEligibleToImport = */ true,
/* Live = */ true,
- /* Local */ GV->isDSOLocal());
+ /* Local */ GV->isDSOLocal(),
+ GV->hasLinkOnceODRLinkage() && GV->hasGlobalUnnamedAddr());
CantBePromoted.insert(GV->getGUID());
// Create the appropriate summary type.
if (Function *F = dyn_cast<Function>(GV)) {
@@ -531,7 +715,7 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
} else {
std::unique_ptr<GlobalVarSummary> Summary =
llvm::make_unique<GlobalVarSummary>(
- GVFlags, GlobalVarSummary::GVarFlags(),
+ GVFlags, GlobalVarSummary::GVarFlags(false, false),
ArrayRef<ValueInfo>{});
Index.addGlobalValueSummary(*GV, std::move(Summary));
}
@@ -568,10 +752,11 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
// Compute summaries for all variables defined in module, and save in the
// index.
+ SmallVector<MDNode *, 2> Types;
for (const GlobalVariable &G : M.globals()) {
if (G.isDeclaration())
continue;
- computeVariableSummary(Index, G, CantBePromoted);
+ computeVariableSummary(Index, G, CantBePromoted, M, Types);
}
// Compute summaries for all aliases defined in module, and save in the
@@ -626,6 +811,15 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
}
}
+ if (!ModuleSummaryDotFile.empty()) {
+ std::error_code EC;
+ raw_fd_ostream OSDot(ModuleSummaryDotFile, EC, sys::fs::OpenFlags::F_None);
+ if (EC)
+ report_fatal_error(Twine("Failed to open dot file ") +
+ ModuleSummaryDotFile + ": " + EC.message() + "\n");
+ Index.exportToDot(OSDot);
+ }
+
return Index;
}
diff --git a/lib/Analysis/MustExecute.cpp b/lib/Analysis/MustExecute.cpp
index 180c38ddacc2..b616cd6f762b 100644
--- a/lib/Analysis/MustExecute.cpp
+++ b/lib/Analysis/MustExecute.cpp
@@ -1,9 +1,8 @@
//===- MustExecute.cpp - Printer for isGuaranteedToExecute ----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -194,7 +193,8 @@ bool LoopSafetyInfo::allLoopPathsLeadToBlock(const Loop *CurLoop,
SmallPtrSet<const BasicBlock *, 4> Predecessors;
collectTransitivePredecessors(CurLoop, BB, Predecessors);
- // Make sure that all successors of all predecessors of BB are either:
+ // Make sure that all successors of, all predecessors of BB which are not
+ // dominated by BB, are either:
// 1) BB,
// 2) Also predecessors of BB,
// 3) Exit blocks which are not taken on 1st iteration.
@@ -204,6 +204,12 @@ bool LoopSafetyInfo::allLoopPathsLeadToBlock(const Loop *CurLoop,
// Predecessor block may throw, so it has a side exit.
if (blockMayThrow(Pred))
return false;
+
+ // BB dominates Pred, so if Pred runs, BB must run.
+ // This is true when Pred is a loop latch.
+ if (DT->dominates(BB, Pred))
+ continue;
+
for (auto *Succ : successors(Pred))
if (CheckedSuccessors.insert(Succ).second &&
Succ != BB && !Predecessors.count(Succ))
diff --git a/lib/Analysis/ObjCARCAliasAnalysis.cpp b/lib/Analysis/ObjCARCAliasAnalysis.cpp
index 95ae1a6e744f..811033e73147 100644
--- a/lib/Analysis/ObjCARCAliasAnalysis.cpp
+++ b/lib/Analysis/ObjCARCAliasAnalysis.cpp
@@ -1,9 +1,8 @@
//===- ObjCARCAliasAnalysis.cpp - ObjC ARC Optimization -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -38,9 +37,10 @@ using namespace llvm;
using namespace llvm::objcarc;
AliasResult ObjCARCAAResult::alias(const MemoryLocation &LocA,
- const MemoryLocation &LocB) {
+ const MemoryLocation &LocB,
+ AAQueryInfo &AAQI) {
if (!EnableARCOpts)
- return AAResultBase::alias(LocA, LocB);
+ return AAResultBase::alias(LocA, LocB, AAQI);
// First, strip off no-ops, including ObjC-specific no-ops, and try making a
// precise alias query.
@@ -48,7 +48,7 @@ AliasResult ObjCARCAAResult::alias(const MemoryLocation &LocA,
const Value *SB = GetRCIdentityRoot(LocB.Ptr);
AliasResult Result =
AAResultBase::alias(MemoryLocation(SA, LocA.Size, LocA.AATags),
- MemoryLocation(SB, LocB.Size, LocB.AATags));
+ MemoryLocation(SB, LocB.Size, LocB.AATags), AAQI);
if (Result != MayAlias)
return Result;
@@ -57,7 +57,7 @@ AliasResult ObjCARCAAResult::alias(const MemoryLocation &LocA,
const Value *UA = GetUnderlyingObjCPtr(SA, DL);
const Value *UB = GetUnderlyingObjCPtr(SB, DL);
if (UA != SA || UB != SB) {
- Result = AAResultBase::alias(MemoryLocation(UA), MemoryLocation(UB));
+ Result = AAResultBase::alias(MemoryLocation(UA), MemoryLocation(UB), AAQI);
// We can't use MustAlias or PartialAlias results here because
// GetUnderlyingObjCPtr may return an offsetted pointer value.
if (Result == NoAlias)
@@ -70,22 +70,23 @@ AliasResult ObjCARCAAResult::alias(const MemoryLocation &LocA,
}
bool ObjCARCAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
- bool OrLocal) {
+ AAQueryInfo &AAQI, bool OrLocal) {
if (!EnableARCOpts)
- return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
+ return AAResultBase::pointsToConstantMemory(Loc, AAQI, OrLocal);
// First, strip off no-ops, including ObjC-specific no-ops, and try making
// a precise alias query.
const Value *S = GetRCIdentityRoot(Loc.Ptr);
if (AAResultBase::pointsToConstantMemory(
- MemoryLocation(S, Loc.Size, Loc.AATags), OrLocal))
+ MemoryLocation(S, Loc.Size, Loc.AATags), AAQI, OrLocal))
return true;
// If that failed, climb to the underlying object, including climbing through
// ObjC-specific no-ops, and try making an imprecise alias query.
const Value *U = GetUnderlyingObjCPtr(S, DL);
if (U != S)
- return AAResultBase::pointsToConstantMemory(MemoryLocation(U), OrLocal);
+ return AAResultBase::pointsToConstantMemory(MemoryLocation(U), AAQI,
+ OrLocal);
// If that failed, fail. We don't need to chain here, since that's covered
// by the earlier precise query.
@@ -107,9 +108,10 @@ FunctionModRefBehavior ObjCARCAAResult::getModRefBehavior(const Function *F) {
}
ModRefInfo ObjCARCAAResult::getModRefInfo(const CallBase *Call,
- const MemoryLocation &Loc) {
+ const MemoryLocation &Loc,
+ AAQueryInfo &AAQI) {
if (!EnableARCOpts)
- return AAResultBase::getModRefInfo(Call, Loc);
+ return AAResultBase::getModRefInfo(Call, Loc, AAQI);
switch (GetBasicARCInstKind(Call)) {
case ARCInstKind::Retain:
@@ -128,7 +130,7 @@ ModRefInfo ObjCARCAAResult::getModRefInfo(const CallBase *Call,
break;
}
- return AAResultBase::getModRefInfo(Call, Loc);
+ return AAResultBase::getModRefInfo(Call, Loc, AAQI);
}
ObjCARCAAResult ObjCARCAA::run(Function &F, FunctionAnalysisManager &AM) {
diff --git a/lib/Analysis/ObjCARCAnalysisUtils.cpp b/lib/Analysis/ObjCARCAnalysisUtils.cpp
index d6db6386c38b..56d1cb421225 100644
--- a/lib/Analysis/ObjCARCAnalysisUtils.cpp
+++ b/lib/Analysis/ObjCARCAnalysisUtils.cpp
@@ -1,9 +1,8 @@
//===- ObjCARCAnalysisUtils.cpp -------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Analysis/ObjCARCInstKind.cpp b/lib/Analysis/ObjCARCInstKind.cpp
index 31c432711834..0e96c6e975c9 100644
--- a/lib/Analysis/ObjCARCInstKind.cpp
+++ b/lib/Analysis/ObjCARCInstKind.cpp
@@ -1,9 +1,8 @@
//===- ARCInstKind.cpp - ObjC ARC Optimization ----------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -482,6 +481,41 @@ bool llvm::objcarc::IsNoopOnNull(ARCInstKind Class) {
llvm_unreachable("covered switch isn't covered?");
}
+/// Test if the given class represents instructions which do nothing if
+/// passed a global variable.
+bool llvm::objcarc::IsNoopOnGlobal(ARCInstKind Class) {
+ switch (Class) {
+ case ARCInstKind::Retain:
+ case ARCInstKind::RetainRV:
+ case ARCInstKind::ClaimRV:
+ case ARCInstKind::Release:
+ case ARCInstKind::Autorelease:
+ case ARCInstKind::AutoreleaseRV:
+ case ARCInstKind::RetainBlock:
+ case ARCInstKind::FusedRetainAutorelease:
+ case ARCInstKind::FusedRetainAutoreleaseRV:
+ return true;
+ case ARCInstKind::AutoreleasepoolPush:
+ case ARCInstKind::AutoreleasepoolPop:
+ case ARCInstKind::LoadWeakRetained:
+ case ARCInstKind::StoreWeak:
+ case ARCInstKind::InitWeak:
+ case ARCInstKind::LoadWeak:
+ case ARCInstKind::MoveWeak:
+ case ARCInstKind::CopyWeak:
+ case ARCInstKind::DestroyWeak:
+ case ARCInstKind::StoreStrong:
+ case ARCInstKind::IntrinsicUser:
+ case ARCInstKind::CallOrUser:
+ case ARCInstKind::Call:
+ case ARCInstKind::User:
+ case ARCInstKind::None:
+ case ARCInstKind::NoopCast:
+ return false;
+ }
+ llvm_unreachable("covered switch isn't covered?");
+}
+
/// Test if the given class represents instructions which are always safe
/// to mark with the "tail" keyword.
bool llvm::objcarc::IsAlwaysTail(ARCInstKind Class) {
diff --git a/lib/Analysis/OptimizationRemarkEmitter.cpp b/lib/Analysis/OptimizationRemarkEmitter.cpp
index 8ece0a2a3ed3..72c40a0be232 100644
--- a/lib/Analysis/OptimizationRemarkEmitter.cpp
+++ b/lib/Analysis/OptimizationRemarkEmitter.cpp
@@ -1,9 +1,8 @@
//===- OptimizationRemarkEmitter.cpp - Optimization Diagnostic --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Analysis/OrderedBasicBlock.cpp b/lib/Analysis/OrderedBasicBlock.cpp
index 5f4fe0f7dda2..48f2a4020c66 100644
--- a/lib/Analysis/OrderedBasicBlock.cpp
+++ b/lib/Analysis/OrderedBasicBlock.cpp
@@ -1,9 +1,8 @@
//===- OrderedBasicBlock.cpp --------------------------------- -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -86,3 +85,27 @@ bool OrderedBasicBlock::dominates(const Instruction *A, const Instruction *B) {
return comesBefore(A, B);
}
+
+void OrderedBasicBlock::eraseInstruction(const Instruction *I) {
+ if (LastInstFound != BB->end() && I == &*LastInstFound) {
+ if (LastInstFound == BB->begin()) {
+ LastInstFound = BB->end();
+ NextInstPos = 0;
+ } else
+ LastInstFound--;
+ }
+
+ NumberedInsts.erase(I);
+}
+
+void OrderedBasicBlock::replaceInstruction(const Instruction *Old,
+ const Instruction *New) {
+ auto OI = NumberedInsts.find(Old);
+ if (OI == NumberedInsts.end())
+ return;
+
+ NumberedInsts.insert({New, OI->second});
+ if (LastInstFound != BB->end() && Old == &*LastInstFound)
+ LastInstFound = New->getIterator();
+ NumberedInsts.erase(Old);
+}
diff --git a/lib/Analysis/OrderedInstructions.cpp b/lib/Analysis/OrderedInstructions.cpp
index 7b155208c02e..458c0a7de6c2 100644
--- a/lib/Analysis/OrderedInstructions.cpp
+++ b/lib/Analysis/OrderedInstructions.cpp
@@ -1,9 +1,8 @@
//===-- OrderedInstructions.cpp - Instruction dominance function ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Analysis/PHITransAddr.cpp b/lib/Analysis/PHITransAddr.cpp
index 858f08f6537a..7f77ab146c4c 100644
--- a/lib/Analysis/PHITransAddr.cpp
+++ b/lib/Analysis/PHITransAddr.cpp
@@ -1,9 +1,8 @@
//===- PHITransAddr.cpp - PHI Translation for Addresses -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Analysis/PhiValues.cpp b/lib/Analysis/PhiValues.cpp
index 729227c86697..49749bc44746 100644
--- a/lib/Analysis/PhiValues.cpp
+++ b/lib/Analysis/PhiValues.cpp
@@ -1,9 +1,8 @@
//===- PhiValues.cpp - Phi Value Analysis ---------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Analysis/PostDominators.cpp b/lib/Analysis/PostDominators.cpp
index e6b660fe26d7..4afe22bd5342 100644
--- a/lib/Analysis/PostDominators.cpp
+++ b/lib/Analysis/PostDominators.cpp
@@ -1,9 +1,8 @@
//===- PostDominators.cpp - Post-Dominator Calculation --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Analysis/ProfileSummaryInfo.cpp b/lib/Analysis/ProfileSummaryInfo.cpp
index 1d70c75f2e1c..dce19d6d546e 100644
--- a/lib/Analysis/ProfileSummaryInfo.cpp
+++ b/lib/Analysis/ProfileSummaryInfo.cpp
@@ -1,9 +1,8 @@
//===- ProfileSummaryInfo.cpp - Global profile summary information --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -61,10 +60,9 @@ static cl::opt<int> ProfileSummaryColdCount(
// Find the summary entry for a desired percentile of counts.
static const ProfileSummaryEntry &getEntryForPercentile(SummaryEntryVector &DS,
uint64_t Percentile) {
- auto Compare = [](const ProfileSummaryEntry &Entry, uint64_t Percentile) {
+ auto It = partition_point(DS, [=](const ProfileSummaryEntry &Entry) {
return Entry.Cutoff < Percentile;
- };
- auto It = std::lower_bound(DS.begin(), DS.end(), Percentile, Compare);
+ });
// The required percentile has to be <= one of the percentiles in the
// detailed summary.
if (It == DS.end())
@@ -80,7 +78,14 @@ static const ProfileSummaryEntry &getEntryForPercentile(SummaryEntryVector &DS,
bool ProfileSummaryInfo::computeSummary() {
if (Summary)
return true;
- auto *SummaryMD = M.getProfileSummary();
+ // First try to get context sensitive ProfileSummary.
+ auto *SummaryMD = M.getProfileSummary(/* IsCS */ true);
+ if (SummaryMD) {
+ Summary.reset(ProfileSummary::getFromMD(SummaryMD));
+ return true;
+ }
+ // This will actually return PSK_Instr or PSK_Sample summary.
+ SummaryMD = M.getProfileSummary(/* IsCS */ false);
if (!SummaryMD)
return false;
Summary.reset(ProfileSummary::getFromMD(SummaryMD));
@@ -89,7 +94,8 @@ bool ProfileSummaryInfo::computeSummary() {
Optional<uint64_t>
ProfileSummaryInfo::getProfileCount(const Instruction *Inst,
- BlockFrequencyInfo *BFI) {
+ BlockFrequencyInfo *BFI,
+ bool AllowSynthetic) {
if (!Inst)
return None;
assert((isa<CallInst>(Inst) || isa<InvokeInst>(Inst)) &&
@@ -105,7 +111,7 @@ ProfileSummaryInfo::getProfileCount(const Instruction *Inst,
return None;
}
if (BFI)
- return BFI->getBlockProfileCount(Inst->getParent());
+ return BFI->getBlockProfileCount(Inst->getParent(), AllowSynthetic);
return None;
}
diff --git a/lib/Analysis/PtrUseVisitor.cpp b/lib/Analysis/PtrUseVisitor.cpp
index 1fdaf4d55b59..9a834ba4866a 100644
--- a/lib/Analysis/PtrUseVisitor.cpp
+++ b/lib/Analysis/PtrUseVisitor.cpp
@@ -1,9 +1,8 @@
//===- PtrUseVisitor.cpp - InstVisitors over a pointers uses --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -35,5 +34,11 @@ bool detail::PtrUseVisitorBase::adjustOffsetForGEP(GetElementPtrInst &GEPI) {
if (!IsOffsetKnown)
return false;
- return GEPI.accumulateConstantOffset(DL, Offset);
+ APInt TmpOffset(DL.getIndexTypeSizeInBits(GEPI.getType()), 0);
+ if (GEPI.accumulateConstantOffset(DL, TmpOffset)) {
+ Offset += TmpOffset.sextOrTrunc(Offset.getBitWidth());
+ return true;
+ }
+
+ return false;
}
diff --git a/lib/Analysis/RegionInfo.cpp b/lib/Analysis/RegionInfo.cpp
index 2bd611350f46..8ba38adfb0d2 100644
--- a/lib/Analysis/RegionInfo.cpp
+++ b/lib/Analysis/RegionInfo.cpp
@@ -1,9 +1,8 @@
//===- RegionInfo.cpp - SESE region detection analysis --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// Detects single entry single exit regions in the control flow graph.
diff --git a/lib/Analysis/RegionPass.cpp b/lib/Analysis/RegionPass.cpp
index a101ff109199..6c0d17b45c62 100644
--- a/lib/Analysis/RegionPass.cpp
+++ b/lib/Analysis/RegionPass.cpp
@@ -1,9 +1,8 @@
//===- RegionPass.cpp - Region Pass and Region Pass Manager ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -279,12 +278,17 @@ Pass *RegionPass::createPrinterPass(raw_ostream &O,
return new PrintRegionPass(Banner, O);
}
+static std::string getDescription(const Region &R) {
+ return "region";
+}
+
bool RegionPass::skipRegion(Region &R) const {
Function &F = *R.getEntry()->getParent();
- if (!F.getContext().getOptPassGate().shouldRunPass(this, R))
+ OptPassGate &Gate = F.getContext().getOptPassGate();
+ if (Gate.isEnabled() && !Gate.shouldRunPass(this, getDescription(R)))
return true;
- if (F.hasFnAttribute(Attribute::OptimizeNone)) {
+ if (F.hasOptNone()) {
// Report this only once per function.
if (R.getEntry() == &F.getEntryBlock())
LLVM_DEBUG(dbgs() << "Skipping pass '" << getPassName()
diff --git a/lib/Analysis/RegionPrinter.cpp b/lib/Analysis/RegionPrinter.cpp
index 5986b8c4e0c3..5bdcb31fbe99 100644
--- a/lib/Analysis/RegionPrinter.cpp
+++ b/lib/Analysis/RegionPrinter.cpp
@@ -1,9 +1,8 @@
//===- RegionPrinter.cpp - Print regions tree pass ------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// Print out the region tree of a function using dotty/graphviz.
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index e5134f2eeda9..bc2cfd6fcc42 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -1,9 +1,8 @@
//===- ScalarEvolution.cpp - Scalar Evolution Analysis --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -203,15 +202,20 @@ static cl::opt<unsigned> MaxConstantEvolvingDepth(
cl::desc("Maximum depth of recursive constant evolving"), cl::init(32));
static cl::opt<unsigned>
- MaxExtDepth("scalar-evolution-max-ext-depth", cl::Hidden,
- cl::desc("Maximum depth of recursive SExt/ZExt"),
- cl::init(8));
+ MaxCastDepth("scalar-evolution-max-cast-depth", cl::Hidden,
+ cl::desc("Maximum depth of recursive SExt/ZExt/Trunc"),
+ cl::init(8));
static cl::opt<unsigned>
MaxAddRecSize("scalar-evolution-max-add-rec-size", cl::Hidden,
cl::desc("Max coefficients in AddRec during evolving"),
cl::init(8));
+static cl::opt<unsigned>
+ HugeExprThreshold("scalar-evolution-huge-expr-threshold", cl::Hidden,
+ cl::desc("Size of the expression which is considered huge"),
+ cl::init(4096));
+
//===----------------------------------------------------------------------===//
// SCEV class definitions
//===----------------------------------------------------------------------===//
@@ -273,7 +277,9 @@ void SCEV::print(raw_ostream &OS) const {
case scAddExpr:
case scMulExpr:
case scUMaxExpr:
- case scSMaxExpr: {
+ case scSMaxExpr:
+ case scUMinExpr:
+ case scSMinExpr: {
const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(this);
const char *OpStr = nullptr;
switch (NAry->getSCEVType()) {
@@ -281,6 +287,12 @@ void SCEV::print(raw_ostream &OS) const {
case scMulExpr: OpStr = " * "; break;
case scUMaxExpr: OpStr = " umax "; break;
case scSMaxExpr: OpStr = " smax "; break;
+ case scUMinExpr:
+ OpStr = " umin ";
+ break;
+ case scSMinExpr:
+ OpStr = " smin ";
+ break;
}
OS << "(";
for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
@@ -349,6 +361,8 @@ Type *SCEV::getType() const {
case scMulExpr:
case scUMaxExpr:
case scSMaxExpr:
+ case scUMinExpr:
+ case scSMinExpr:
return cast<SCEVNAryExpr>(this)->getType();
case scAddExpr:
return cast<SCEVAddExpr>(this)->getType();
@@ -393,7 +407,7 @@ bool SCEV::isNonConstantNegative() const {
}
SCEVCouldNotCompute::SCEVCouldNotCompute() :
- SCEV(FoldingSetNodeIDRef(), scCouldNotCompute) {}
+ SCEV(FoldingSetNodeIDRef(), scCouldNotCompute, 0) {}
bool SCEVCouldNotCompute::classof(const SCEV *S) {
return S->getSCEVType() == scCouldNotCompute;
@@ -422,7 +436,7 @@ ScalarEvolution::getConstant(Type *Ty, uint64_t V, bool isSigned) {
SCEVCastExpr::SCEVCastExpr(const FoldingSetNodeIDRef ID,
unsigned SCEVTy, const SCEV *op, Type *ty)
- : SCEV(ID, SCEVTy), Op(op), Ty(ty) {}
+ : SCEV(ID, SCEVTy, computeExpressionSize(op)), Op(op), Ty(ty) {}
SCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeIDRef ID,
const SCEV *op, Type *ty)
@@ -713,7 +727,9 @@ static int CompareSCEVComplexity(
case scAddExpr:
case scMulExpr:
case scSMaxExpr:
- case scUMaxExpr: {
+ case scUMaxExpr:
+ case scSMinExpr:
+ case scUMinExpr: {
const SCEVNAryExpr *LC = cast<SCEVNAryExpr>(LHS);
const SCEVNAryExpr *RC = cast<SCEVNAryExpr>(RHS);
@@ -795,11 +811,10 @@ static void GroupByComplexity(SmallVectorImpl<const SCEV *> &Ops,
}
// Do the rough sort by complexity.
- std::stable_sort(Ops.begin(), Ops.end(),
- [&](const SCEV *LHS, const SCEV *RHS) {
- return CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI,
- LHS, RHS, DT) < 0;
- });
+ llvm::stable_sort(Ops, [&](const SCEV *LHS, const SCEV *RHS) {
+ return CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI, LHS, RHS, DT) <
+ 0;
+ });
// Now that we are sorted by complexity, group elements of the same
// complexity. Note that this is, at worst, N^2, but the vector is likely to
@@ -846,6 +861,17 @@ static inline int sizeOfSCEV(const SCEV *S) {
return F.Size;
}
+/// Returns true if the subtree of \p S contains at least HugeExprThreshold
+/// nodes.
+static bool isHugeExpression(const SCEV *S) {
+ return S->getExpressionSize() >= HugeExprThreshold;
+}
+
+/// Returns true of \p Ops contains a huge SCEV (see definition above).
+static bool hasHugeExpression(ArrayRef<const SCEV *> Ops) {
+ return any_of(Ops, isHugeExpression);
+}
+
namespace {
struct SCEVDivision : public SCEVVisitor<SCEVDivision, void> {
@@ -913,6 +939,8 @@ public:
void visitUDivExpr(const SCEVUDivExpr *Numerator) {}
void visitSMaxExpr(const SCEVSMaxExpr *Numerator) {}
void visitUMaxExpr(const SCEVUMaxExpr *Numerator) {}
+ void visitSMinExpr(const SCEVSMinExpr *Numerator) {}
+ void visitUMinExpr(const SCEVUMinExpr *Numerator) {}
void visitUnknown(const SCEVUnknown *Numerator) {}
void visitCouldNotCompute(const SCEVCouldNotCompute *Numerator) {}
@@ -1219,8 +1247,8 @@ const SCEV *SCEVAddRecExpr::evaluateAtIteration(const SCEV *It,
// SCEV Expression folder implementations
//===----------------------------------------------------------------------===//
-const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,
- Type *Ty) {
+const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, Type *Ty,
+ unsigned Depth) {
assert(getTypeSizeInBits(Op->getType()) > getTypeSizeInBits(Ty) &&
"This is not a truncating conversion!");
assert(isSCEVable(Ty) &&
@@ -1241,15 +1269,23 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,
// trunc(trunc(x)) --> trunc(x)
if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op))
- return getTruncateExpr(ST->getOperand(), Ty);
+ return getTruncateExpr(ST->getOperand(), Ty, Depth + 1);
// trunc(sext(x)) --> sext(x) if widening or trunc(x) if narrowing
if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op))
- return getTruncateOrSignExtend(SS->getOperand(), Ty);
+ return getTruncateOrSignExtend(SS->getOperand(), Ty, Depth + 1);
// trunc(zext(x)) --> zext(x) if widening or trunc(x) if narrowing
if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
- return getTruncateOrZeroExtend(SZ->getOperand(), Ty);
+ return getTruncateOrZeroExtend(SZ->getOperand(), Ty, Depth + 1);
+
+ if (Depth > MaxCastDepth) {
+ SCEV *S =
+ new (SCEVAllocator) SCEVTruncateExpr(ID.Intern(SCEVAllocator), Op, Ty);
+ UniqueSCEVs.InsertNode(S, IP);
+ addToLoopUseLists(S);
+ return S;
+ }
// trunc(x1 + ... + xN) --> trunc(x1) + ... + trunc(xN) and
// trunc(x1 * ... * xN) --> trunc(x1) * ... * trunc(xN),
@@ -1261,7 +1297,7 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,
unsigned numTruncs = 0;
for (unsigned i = 0, e = CommOp->getNumOperands(); i != e && numTruncs < 2;
++i) {
- const SCEV *S = getTruncateExpr(CommOp->getOperand(i), Ty);
+ const SCEV *S = getTruncateExpr(CommOp->getOperand(i), Ty, Depth + 1);
if (!isa<SCEVCastExpr>(CommOp->getOperand(i)) && isa<SCEVTruncateExpr>(S))
numTruncs++;
Operands.push_back(S);
@@ -1285,7 +1321,7 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,
if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Op)) {
SmallVector<const SCEV *, 4> Operands;
for (const SCEV *Op : AddRec->operands())
- Operands.push_back(getTruncateExpr(Op, Ty));
+ Operands.push_back(getTruncateExpr(Op, Ty, Depth + 1));
return getAddRecExpr(Operands, AddRec->getLoop(), SCEV::FlagAnyWrap);
}
@@ -1619,7 +1655,7 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
ID.AddPointer(Ty);
void *IP = nullptr;
if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
- if (Depth > MaxExtDepth) {
+ if (Depth > MaxCastDepth) {
SCEV *S = new (SCEVAllocator) SCEVZeroExtendExpr(ID.Intern(SCEVAllocator),
Op, Ty);
UniqueSCEVs.InsertNode(S, IP);
@@ -1637,7 +1673,7 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
unsigned NewBits = getTypeSizeInBits(Ty);
if (CR.truncate(TruncBits).zeroExtend(NewBits).contains(
CR.zextOrTrunc(NewBits)))
- return getTruncateOrZeroExtend(X, Ty);
+ return getTruncateOrZeroExtend(X, Ty, Depth);
}
// If the input value is a chrec scev, and we can prove that the value
@@ -1679,9 +1715,9 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
// Check whether the backedge-taken count can be losslessly casted to
// the addrec's type. The count is always unsigned.
const SCEV *CastedMaxBECount =
- getTruncateOrZeroExtend(MaxBECount, Start->getType());
- const SCEV *RecastedMaxBECount =
- getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType());
+ getTruncateOrZeroExtend(MaxBECount, Start->getType(), Depth);
+ const SCEV *RecastedMaxBECount = getTruncateOrZeroExtend(
+ CastedMaxBECount, MaxBECount->getType(), Depth);
if (MaxBECount == RecastedMaxBECount) {
Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
// Check whether Start+Step*MaxBECount has no unsigned overflow.
@@ -1930,7 +1966,7 @@ ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
void *IP = nullptr;
if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
// Limit recursion depth.
- if (Depth > MaxExtDepth) {
+ if (Depth > MaxCastDepth) {
SCEV *S = new (SCEVAllocator) SCEVSignExtendExpr(ID.Intern(SCEVAllocator),
Op, Ty);
UniqueSCEVs.InsertNode(S, IP);
@@ -1948,7 +1984,7 @@ ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
unsigned NewBits = getTypeSizeInBits(Ty);
if (CR.truncate(TruncBits).signExtend(NewBits).contains(
CR.sextOrTrunc(NewBits)))
- return getTruncateOrSignExtend(X, Ty);
+ return getTruncateOrSignExtend(X, Ty, Depth);
}
if (auto *SA = dyn_cast<SCEVAddExpr>(Op)) {
@@ -2023,9 +2059,9 @@ ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
// Check whether the backedge-taken count can be losslessly casted to
// the addrec's type. The count is always unsigned.
const SCEV *CastedMaxBECount =
- getTruncateOrZeroExtend(MaxBECount, Start->getType());
- const SCEV *RecastedMaxBECount =
- getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType());
+ getTruncateOrZeroExtend(MaxBECount, Start->getType(), Depth);
+ const SCEV *RecastedMaxBECount = getTruncateOrZeroExtend(
+ CastedMaxBECount, MaxBECount->getType(), Depth);
if (MaxBECount == RecastedMaxBECount) {
Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
// Check whether Start+Step*MaxBECount has no signed overflow.
@@ -2295,7 +2331,7 @@ CollectAddOperandsWithScales(DenseMap<const SCEV *, APInt> &M,
// can't-overflow flags for the operation if possible.
static SCEV::NoWrapFlags
StrengthenNoWrapFlags(ScalarEvolution *SE, SCEVTypes Type,
- const SmallVectorImpl<const SCEV *> &Ops,
+ const ArrayRef<const SCEV *> Ops,
SCEV::NoWrapFlags Flags) {
using namespace std::placeholders;
@@ -2405,7 +2441,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
}
// Limit recursion calls depth.
- if (Depth > MaxArithDepth)
+ if (Depth > MaxArithDepth || hasHugeExpression(Ops))
return getOrCreateAddExpr(Ops, Flags);
// Okay, check to see if the same value occurs in the operand list more than
@@ -2743,7 +2779,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
}
const SCEV *
-ScalarEvolution::getOrCreateAddExpr(SmallVectorImpl<const SCEV *> &Ops,
+ScalarEvolution::getOrCreateAddExpr(ArrayRef<const SCEV *> Ops,
SCEV::NoWrapFlags Flags) {
FoldingSetNodeID ID;
ID.AddInteger(scAddExpr);
@@ -2765,7 +2801,7 @@ ScalarEvolution::getOrCreateAddExpr(SmallVectorImpl<const SCEV *> &Ops,
}
const SCEV *
-ScalarEvolution::getOrCreateAddRecExpr(SmallVectorImpl<const SCEV *> &Ops,
+ScalarEvolution::getOrCreateAddRecExpr(ArrayRef<const SCEV *> Ops,
const Loop *L, SCEV::NoWrapFlags Flags) {
FoldingSetNodeID ID;
ID.AddInteger(scAddRecExpr);
@@ -2788,7 +2824,7 @@ ScalarEvolution::getOrCreateAddRecExpr(SmallVectorImpl<const SCEV *> &Ops,
}
const SCEV *
-ScalarEvolution::getOrCreateMulExpr(SmallVectorImpl<const SCEV *> &Ops,
+ScalarEvolution::getOrCreateMulExpr(ArrayRef<const SCEV *> Ops,
SCEV::NoWrapFlags Flags) {
FoldingSetNodeID ID;
ID.AddInteger(scMulExpr);
@@ -2884,7 +2920,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
Flags = StrengthenNoWrapFlags(this, scMulExpr, Ops, Flags);
// Limit recursion calls depth.
- if (Depth > MaxArithDepth)
+ if (Depth > MaxArithDepth || hasHugeExpression(Ops))
return getOrCreateMulExpr(Ops, Flags);
// If there are any constants, fold them together.
@@ -3057,7 +3093,8 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
// Limit max number of arguments to avoid creation of unreasonably big
// SCEVAddRecs with very complex operands.
if (AddRec->getNumOperands() + OtherAddRec->getNumOperands() - 1 >
- MaxAddRecSize)
+ MaxAddRecSize || isHugeExpression(AddRec) ||
+ isHugeExpression(OtherAddRec))
continue;
bool Overflow = false;
@@ -3090,7 +3127,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
AddRecOps.push_back(getAddExpr(SumOps, SCEV::FlagAnyWrap, Depth + 1));
}
if (!Overflow) {
- const SCEV *NewAddRec = getAddRecExpr(AddRecOps, AddRec->getLoop(),
+ const SCEV *NewAddRec = getAddRecExpr(AddRecOps, AddRecLoop,
SCEV::FlagAnyWrap);
if (Ops.size() == 2) return NewAddRec;
Ops[Idx] = NewAddRec;
@@ -3493,209 +3530,166 @@ ScalarEvolution::getGEPExpr(GEPOperator *GEP,
return getAddExpr(BaseExpr, TotalOffset, Wrap);
}
-const SCEV *ScalarEvolution::getSMaxExpr(const SCEV *LHS,
- const SCEV *RHS) {
- SmallVector<const SCEV *, 2> Ops = {LHS, RHS};
- return getSMaxExpr(Ops);
+std::tuple<const SCEV *, FoldingSetNodeID, void *>
+ScalarEvolution::findExistingSCEVInCache(int SCEVType,
+ ArrayRef<const SCEV *> Ops) {
+ FoldingSetNodeID ID;
+ void *IP = nullptr;
+ ID.AddInteger(SCEVType);
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+ ID.AddPointer(Ops[i]);
+ return std::tuple<const SCEV *, FoldingSetNodeID, void *>(
+ UniqueSCEVs.FindNodeOrInsertPos(ID, IP), std::move(ID), IP);
}
-const SCEV *
-ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
- assert(!Ops.empty() && "Cannot get empty smax!");
+const SCEV *ScalarEvolution::getMinMaxExpr(unsigned Kind,
+ SmallVectorImpl<const SCEV *> &Ops) {
+ assert(!Ops.empty() && "Cannot get empty (u|s)(min|max)!");
if (Ops.size() == 1) return Ops[0];
#ifndef NDEBUG
Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
for (unsigned i = 1, e = Ops.size(); i != e; ++i)
assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
- "SCEVSMaxExpr operand types don't match!");
+ "Operand types don't match!");
#endif
+ bool IsSigned = Kind == scSMaxExpr || Kind == scSMinExpr;
+ bool IsMax = Kind == scSMaxExpr || Kind == scUMaxExpr;
+
// Sort by complexity, this groups all similar expression types together.
GroupByComplexity(Ops, &LI, DT);
+ // Check if we have created the same expression before.
+ if (const SCEV *S = std::get<0>(findExistingSCEVInCache(Kind, Ops))) {
+ return S;
+ }
+
// If there are any constants, fold them together.
unsigned Idx = 0;
if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
++Idx;
assert(Idx < Ops.size());
+ auto FoldOp = [&](const APInt &LHS, const APInt &RHS) {
+ if (Kind == scSMaxExpr)
+ return APIntOps::smax(LHS, RHS);
+ else if (Kind == scSMinExpr)
+ return APIntOps::smin(LHS, RHS);
+ else if (Kind == scUMaxExpr)
+ return APIntOps::umax(LHS, RHS);
+ else if (Kind == scUMinExpr)
+ return APIntOps::umin(LHS, RHS);
+ llvm_unreachable("Unknown SCEV min/max opcode");
+ };
+
while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
// We found two constants, fold them together!
ConstantInt *Fold = ConstantInt::get(
- getContext(), APIntOps::smax(LHSC->getAPInt(), RHSC->getAPInt()));
+ getContext(), FoldOp(LHSC->getAPInt(), RHSC->getAPInt()));
Ops[0] = getConstant(Fold);
Ops.erase(Ops.begin()+1); // Erase the folded element
if (Ops.size() == 1) return Ops[0];
LHSC = cast<SCEVConstant>(Ops[0]);
}
- // If we are left with a constant minimum-int, strip it off.
- if (cast<SCEVConstant>(Ops[0])->getValue()->isMinValue(true)) {
+ bool IsMinV = LHSC->getValue()->isMinValue(IsSigned);
+ bool IsMaxV = LHSC->getValue()->isMaxValue(IsSigned);
+
+ if (IsMax ? IsMinV : IsMaxV) {
+ // If we are left with a constant minimum(/maximum)-int, strip it off.
Ops.erase(Ops.begin());
--Idx;
- } else if (cast<SCEVConstant>(Ops[0])->getValue()->isMaxValue(true)) {
- // If we have an smax with a constant maximum-int, it will always be
- // maximum-int.
- return Ops[0];
+ } else if (IsMax ? IsMaxV : IsMinV) {
+ // If we have a max(/min) with a constant maximum(/minimum)-int,
+ // it will always be the extremum.
+ return LHSC;
}
if (Ops.size() == 1) return Ops[0];
}
- // Find the first SMax
- while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scSMaxExpr)
+ // Find the first operation of the same kind
+ while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < Kind)
++Idx;
- // Check to see if one of the operands is an SMax. If so, expand its operands
- // onto our operand list, and recurse to simplify.
+ // Check to see if one of the operands is of the same kind. If so, expand its
+ // operands onto our operand list, and recurse to simplify.
if (Idx < Ops.size()) {
- bool DeletedSMax = false;
- while (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(Ops[Idx])) {
+ bool DeletedAny = false;
+ while (Ops[Idx]->getSCEVType() == Kind) {
+ const SCEVMinMaxExpr *SMME = cast<SCEVMinMaxExpr>(Ops[Idx]);
Ops.erase(Ops.begin()+Idx);
- Ops.append(SMax->op_begin(), SMax->op_end());
- DeletedSMax = true;
+ Ops.append(SMME->op_begin(), SMME->op_end());
+ DeletedAny = true;
}
- if (DeletedSMax)
- return getSMaxExpr(Ops);
+ if (DeletedAny)
+ return getMinMaxExpr(Kind, Ops);
}
// Okay, check to see if the same value occurs in the operand list twice. If
// so, delete one. Since we sorted the list, these values are required to
// be adjacent.
- for (unsigned i = 0, e = Ops.size()-1; i != e; ++i)
- // X smax Y smax Y --> X smax Y
- // X smax Y --> X, if X is always greater than Y
- if (Ops[i] == Ops[i+1] ||
- isKnownPredicate(ICmpInst::ICMP_SGE, Ops[i], Ops[i+1])) {
- Ops.erase(Ops.begin()+i+1, Ops.begin()+i+2);
- --i; --e;
- } else if (isKnownPredicate(ICmpInst::ICMP_SLE, Ops[i], Ops[i+1])) {
- Ops.erase(Ops.begin()+i, Ops.begin()+i+1);
- --i; --e;
+ llvm::CmpInst::Predicate GEPred =
+ IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
+ llvm::CmpInst::Predicate LEPred =
+ IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
+ llvm::CmpInst::Predicate FirstPred = IsMax ? GEPred : LEPred;
+ llvm::CmpInst::Predicate SecondPred = IsMax ? LEPred : GEPred;
+ for (unsigned i = 0, e = Ops.size() - 1; i != e; ++i) {
+ if (Ops[i] == Ops[i + 1] ||
+ isKnownViaNonRecursiveReasoning(FirstPred, Ops[i], Ops[i + 1])) {
+ // X op Y op Y --> X op Y
+ // X op Y --> X, if we know X, Y are ordered appropriately
+ Ops.erase(Ops.begin() + i + 1, Ops.begin() + i + 2);
+ --i;
+ --e;
+ } else if (isKnownViaNonRecursiveReasoning(SecondPred, Ops[i],
+ Ops[i + 1])) {
+ // X op Y --> Y, if we know X, Y are ordered appropriately
+ Ops.erase(Ops.begin() + i, Ops.begin() + i + 1);
+ --i;
+ --e;
}
+ }
if (Ops.size() == 1) return Ops[0];
assert(!Ops.empty() && "Reduced smax down to nothing!");
- // Okay, it looks like we really DO need an smax expr. Check to see if we
+ // Okay, it looks like we really DO need an expr. Check to see if we
// already have one, otherwise create a new one.
+ const SCEV *ExistingSCEV;
FoldingSetNodeID ID;
- ID.AddInteger(scSMaxExpr);
- for (unsigned i = 0, e = Ops.size(); i != e; ++i)
- ID.AddPointer(Ops[i]);
- void *IP = nullptr;
- if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+ void *IP;
+ std::tie(ExistingSCEV, ID, IP) = findExistingSCEVInCache(Kind, Ops);
+ if (ExistingSCEV)
+ return ExistingSCEV;
const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
std::uninitialized_copy(Ops.begin(), Ops.end(), O);
- SCEV *S = new (SCEVAllocator) SCEVSMaxExpr(ID.Intern(SCEVAllocator),
- O, Ops.size());
+ SCEV *S = new (SCEVAllocator) SCEVMinMaxExpr(
+ ID.Intern(SCEVAllocator), static_cast<SCEVTypes>(Kind), O, Ops.size());
+
UniqueSCEVs.InsertNode(S, IP);
addToLoopUseLists(S);
return S;
}
-const SCEV *ScalarEvolution::getUMaxExpr(const SCEV *LHS,
- const SCEV *RHS) {
+const SCEV *ScalarEvolution::getSMaxExpr(const SCEV *LHS, const SCEV *RHS) {
SmallVector<const SCEV *, 2> Ops = {LHS, RHS};
- return getUMaxExpr(Ops);
+ return getSMaxExpr(Ops);
}
-const SCEV *
-ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
- assert(!Ops.empty() && "Cannot get empty umax!");
- if (Ops.size() == 1) return Ops[0];
-#ifndef NDEBUG
- Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
- for (unsigned i = 1, e = Ops.size(); i != e; ++i)
- assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
- "SCEVUMaxExpr operand types don't match!");
-#endif
-
- // Sort by complexity, this groups all similar expression types together.
- GroupByComplexity(Ops, &LI, DT);
-
- // If there are any constants, fold them together.
- unsigned Idx = 0;
- if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
- ++Idx;
- assert(Idx < Ops.size());
- while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
- // We found two constants, fold them together!
- ConstantInt *Fold = ConstantInt::get(
- getContext(), APIntOps::umax(LHSC->getAPInt(), RHSC->getAPInt()));
- Ops[0] = getConstant(Fold);
- Ops.erase(Ops.begin()+1); // Erase the folded element
- if (Ops.size() == 1) return Ops[0];
- LHSC = cast<SCEVConstant>(Ops[0]);
- }
-
- // If we are left with a constant minimum-int, strip it off.
- if (cast<SCEVConstant>(Ops[0])->getValue()->isMinValue(false)) {
- Ops.erase(Ops.begin());
- --Idx;
- } else if (cast<SCEVConstant>(Ops[0])->getValue()->isMaxValue(false)) {
- // If we have an umax with a constant maximum-int, it will always be
- // maximum-int.
- return Ops[0];
- }
-
- if (Ops.size() == 1) return Ops[0];
- }
-
- // Find the first UMax
- while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scUMaxExpr)
- ++Idx;
-
- // Check to see if one of the operands is a UMax. If so, expand its operands
- // onto our operand list, and recurse to simplify.
- if (Idx < Ops.size()) {
- bool DeletedUMax = false;
- while (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(Ops[Idx])) {
- Ops.erase(Ops.begin()+Idx);
- Ops.append(UMax->op_begin(), UMax->op_end());
- DeletedUMax = true;
- }
-
- if (DeletedUMax)
- return getUMaxExpr(Ops);
- }
-
- // Okay, check to see if the same value occurs in the operand list twice. If
- // so, delete one. Since we sorted the list, these values are required to
- // be adjacent.
- for (unsigned i = 0, e = Ops.size()-1; i != e; ++i)
- // X umax Y umax Y --> X umax Y
- // X umax Y --> X, if X is always greater than Y
- if (Ops[i] == Ops[i + 1] || isKnownViaNonRecursiveReasoning(
- ICmpInst::ICMP_UGE, Ops[i], Ops[i + 1])) {
- Ops.erase(Ops.begin() + i + 1, Ops.begin() + i + 2);
- --i; --e;
- } else if (isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_ULE, Ops[i],
- Ops[i + 1])) {
- Ops.erase(Ops.begin() + i, Ops.begin() + i + 1);
- --i; --e;
- }
-
- if (Ops.size() == 1) return Ops[0];
+const SCEV *ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
+ return getMinMaxExpr(scSMaxExpr, Ops);
+}
- assert(!Ops.empty() && "Reduced umax down to nothing!");
+const SCEV *ScalarEvolution::getUMaxExpr(const SCEV *LHS, const SCEV *RHS) {
+ SmallVector<const SCEV *, 2> Ops = {LHS, RHS};
+ return getUMaxExpr(Ops);
+}
- // Okay, it looks like we really DO need a umax expr. Check to see if we
- // already have one, otherwise create a new one.
- FoldingSetNodeID ID;
- ID.AddInteger(scUMaxExpr);
- for (unsigned i = 0, e = Ops.size(); i != e; ++i)
- ID.AddPointer(Ops[i]);
- void *IP = nullptr;
- if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
- const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
- std::uninitialized_copy(Ops.begin(), Ops.end(), O);
- SCEV *S = new (SCEVAllocator) SCEVUMaxExpr(ID.Intern(SCEVAllocator),
- O, Ops.size());
- UniqueSCEVs.InsertNode(S, IP);
- addToLoopUseLists(S);
- return S;
+const SCEV *ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
+ return getMinMaxExpr(scUMaxExpr, Ops);
}
const SCEV *ScalarEvolution::getSMinExpr(const SCEV *LHS,
@@ -3705,11 +3699,7 @@ const SCEV *ScalarEvolution::getSMinExpr(const SCEV *LHS,
}
const SCEV *ScalarEvolution::getSMinExpr(SmallVectorImpl<const SCEV *> &Ops) {
- // ~smax(~x, ~y, ~z) == smin(x, y, z).
- SmallVector<const SCEV *, 2> NotOps;
- for (auto *S : Ops)
- NotOps.push_back(getNotSCEV(S));
- return getNotSCEV(getSMaxExpr(NotOps));
+ return getMinMaxExpr(scSMinExpr, Ops);
}
const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS,
@@ -3719,16 +3709,7 @@ const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS,
}
const SCEV *ScalarEvolution::getUMinExpr(SmallVectorImpl<const SCEV *> &Ops) {
- assert(!Ops.empty() && "At least one operand must be!");
- // Trivial case.
- if (Ops.size() == 1)
- return Ops[0];
-
- // ~umax(~x, ~y, ~z) == umin(x, y, z).
- SmallVector<const SCEV *, 2> NotOps;
- for (auto *S : Ops)
- NotOps.push_back(getNotSCEV(S));
- return getNotSCEV(getUMaxExpr(NotOps));
+ return getMinMaxExpr(scUMinExpr, Ops);
}
const SCEV *ScalarEvolution::getSizeOfExpr(Type *IntTy, Type *AllocTy) {
@@ -3892,7 +3873,7 @@ void ScalarEvolution::eraseValueFromMap(Value *V) {
}
/// Check whether value has nuw/nsw/exact set but SCEV does not.
-/// TODO: In reality it is better to check the poison recursevely
+/// TODO: In reality it is better to check the poison recursively
/// but this is better than nothing.
static bool SCEVLostPoisonFlags(const SCEV *S, const Value *V) {
if (auto *I = dyn_cast<Instruction>(V)) {
@@ -3970,12 +3951,45 @@ const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V,
V, getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty))), Flags);
}
+/// If Expr computes ~A, return A else return nullptr
+static const SCEV *MatchNotExpr(const SCEV *Expr) {
+ const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Expr);
+ if (!Add || Add->getNumOperands() != 2 ||
+ !Add->getOperand(0)->isAllOnesValue())
+ return nullptr;
+
+ const SCEVMulExpr *AddRHS = dyn_cast<SCEVMulExpr>(Add->getOperand(1));
+ if (!AddRHS || AddRHS->getNumOperands() != 2 ||
+ !AddRHS->getOperand(0)->isAllOnesValue())
+ return nullptr;
+
+ return AddRHS->getOperand(1);
+}
+
/// Return a SCEV corresponding to ~V = -1-V
const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) {
if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V))
return getConstant(
cast<ConstantInt>(ConstantExpr::getNot(VC->getValue())));
+ // Fold ~(u|s)(min|max)(~x, ~y) to (u|s)(max|min)(x, y)
+ if (const SCEVMinMaxExpr *MME = dyn_cast<SCEVMinMaxExpr>(V)) {
+ auto MatchMinMaxNegation = [&](const SCEVMinMaxExpr *MME) {
+ SmallVector<const SCEV *, 2> MatchedOperands;
+ for (const SCEV *Operand : MME->operands()) {
+ const SCEV *Matched = MatchNotExpr(Operand);
+ if (!Matched)
+ return (const SCEV *)nullptr;
+ MatchedOperands.push_back(Matched);
+ }
+ return getMinMaxExpr(
+ SCEVMinMaxExpr::negate(static_cast<SCEVTypes>(MME->getSCEVType())),
+ MatchedOperands);
+ };
+ if (const SCEV *Replaced = MatchMinMaxNegation(MME))
+ return Replaced;
+ }
+
Type *Ty = V->getType();
Ty = getEffectiveSCEVType(Ty);
const SCEV *AllOnes =
@@ -4022,29 +4036,28 @@ const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS,
return getAddExpr(LHS, getNegativeSCEV(RHS, NegFlags), AddFlags, Depth);
}
-const SCEV *
-ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V, Type *Ty) {
+const SCEV *ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V, Type *Ty,
+ unsigned Depth) {
Type *SrcTy = V->getType();
assert(SrcTy->isIntOrPtrTy() && Ty->isIntOrPtrTy() &&
"Cannot truncate or zero extend with non-integer arguments!");
if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
return V; // No conversion
if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty))
- return getTruncateExpr(V, Ty);
- return getZeroExtendExpr(V, Ty);
+ return getTruncateExpr(V, Ty, Depth);
+ return getZeroExtendExpr(V, Ty, Depth);
}
-const SCEV *
-ScalarEvolution::getTruncateOrSignExtend(const SCEV *V,
- Type *Ty) {
+const SCEV *ScalarEvolution::getTruncateOrSignExtend(const SCEV *V, Type *Ty,
+ unsigned Depth) {
Type *SrcTy = V->getType();
assert(SrcTy->isIntOrPtrTy() && Ty->isIntOrPtrTy() &&
"Cannot truncate or zero extend with non-integer arguments!");
if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
return V; // No conversion
if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty))
- return getTruncateExpr(V, Ty);
- return getSignExtendExpr(V, Ty);
+ return getTruncateExpr(V, Ty, Depth);
+ return getSignExtendExpr(V, Ty, Depth);
}
const SCEV *
@@ -4530,52 +4543,21 @@ static Optional<BinaryOp> MatchBinaryOp(Value *V, DominatorTree &DT) {
if (EVI->getNumIndices() != 1 || EVI->getIndices()[0] != 0)
break;
- auto *CI = dyn_cast<CallInst>(EVI->getAggregateOperand());
- if (!CI)
+ auto *WO = dyn_cast<WithOverflowInst>(EVI->getAggregateOperand());
+ if (!WO)
break;
- if (auto *F = CI->getCalledFunction())
- switch (F->getIntrinsicID()) {
- case Intrinsic::sadd_with_overflow:
- case Intrinsic::uadd_with_overflow:
- if (!isOverflowIntrinsicNoWrap(cast<IntrinsicInst>(CI), DT))
- return BinaryOp(Instruction::Add, CI->getArgOperand(0),
- CI->getArgOperand(1));
-
- // Now that we know that all uses of the arithmetic-result component of
- // CI are guarded by the overflow check, we can go ahead and pretend
- // that the arithmetic is non-overflowing.
- if (F->getIntrinsicID() == Intrinsic::sadd_with_overflow)
- return BinaryOp(Instruction::Add, CI->getArgOperand(0),
- CI->getArgOperand(1), /* IsNSW = */ true,
- /* IsNUW = */ false);
- else
- return BinaryOp(Instruction::Add, CI->getArgOperand(0),
- CI->getArgOperand(1), /* IsNSW = */ false,
- /* IsNUW*/ true);
- case Intrinsic::ssub_with_overflow:
- case Intrinsic::usub_with_overflow:
- if (!isOverflowIntrinsicNoWrap(cast<IntrinsicInst>(CI), DT))
- return BinaryOp(Instruction::Sub, CI->getArgOperand(0),
- CI->getArgOperand(1));
-
- // The same reasoning as sadd/uadd above.
- if (F->getIntrinsicID() == Intrinsic::ssub_with_overflow)
- return BinaryOp(Instruction::Sub, CI->getArgOperand(0),
- CI->getArgOperand(1), /* IsNSW = */ true,
- /* IsNUW = */ false);
- else
- return BinaryOp(Instruction::Sub, CI->getArgOperand(0),
- CI->getArgOperand(1), /* IsNSW = */ false,
- /* IsNUW = */ true);
- case Intrinsic::smul_with_overflow:
- case Intrinsic::umul_with_overflow:
- return BinaryOp(Instruction::Mul, CI->getArgOperand(0),
- CI->getArgOperand(1));
- default:
- break;
- }
- break;
+ Instruction::BinaryOps BinOp = WO->getBinaryOp();
+ bool Signed = WO->isSigned();
+ // TODO: Should add nuw/nsw flags for mul as well.
+ if (BinOp == Instruction::Mul || !isOverflowIntrinsicNoWrap(WO, DT))
+ return BinaryOp(BinOp, WO->getLHS(), WO->getRHS());
+
+ // Now that we know that all uses of the arithmetic-result component of
+ // CI are guarded by the overflow check, we can go ahead and pretend
+ // that the arithmetic is non-overflowing.
+ return BinaryOp(BinOp, WO->getLHS(), WO->getRHS(),
+ /* IsNSW = */ Signed, /* IsNUW = */ !Signed);
}
default:
@@ -5009,7 +4991,7 @@ const SCEV *ScalarEvolution::createSimpleAffineAddRec(PHINode *PN,
// overflow.
if (auto *BEInst = dyn_cast<Instruction>(BEValueV))
if (isLoopInvariant(Accum, L) && isAddRecNeverPoison(BEInst, L))
- (void)getAddRecExpr(getAddExpr(StartVal, Accum), Accum, L, Flags);
+ (void)getAddRecExpr(getAddExpr(StartVal, Accum, Flags), Accum, L, Flags);
return PHISCEV;
}
@@ -5196,6 +5178,8 @@ static bool IsAvailableOnEntry(const Loop *L, DominatorTree &DT, const SCEV *S,
switch (S->getSCEVType()) {
case scConstant: case scTruncate: case scZeroExtend: case scSignExtend:
case scAddExpr: case scMulExpr: case scUMaxExpr: case scSMaxExpr:
+ case scUMinExpr:
+ case scSMinExpr:
// These expressions are available if their operand(s) is/are.
return true;
@@ -5551,6 +5535,9 @@ ScalarEvolution::getRangeRef(const SCEV *S,
DenseMap<const SCEV *, ConstantRange> &Cache =
SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED ? UnsignedRanges
: SignedRanges;
+ ConstantRange::PreferredRangeType RangeType =
+ SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED
+ ? ConstantRange::Unsigned : ConstantRange::Signed;
// See if we've computed this range already.
DenseMap<const SCEV *, ConstantRange>::iterator I = Cache.find(S);
@@ -5581,53 +5568,60 @@ ScalarEvolution::getRangeRef(const SCEV *S,
ConstantRange X = getRangeRef(Add->getOperand(0), SignHint);
for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i)
X = X.add(getRangeRef(Add->getOperand(i), SignHint));
- return setRange(Add, SignHint, ConservativeResult.intersectWith(X));
+ return setRange(Add, SignHint,
+ ConservativeResult.intersectWith(X, RangeType));
}
if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
ConstantRange X = getRangeRef(Mul->getOperand(0), SignHint);
for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i)
X = X.multiply(getRangeRef(Mul->getOperand(i), SignHint));
- return setRange(Mul, SignHint, ConservativeResult.intersectWith(X));
+ return setRange(Mul, SignHint,
+ ConservativeResult.intersectWith(X, RangeType));
}
if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) {
ConstantRange X = getRangeRef(SMax->getOperand(0), SignHint);
for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i)
X = X.smax(getRangeRef(SMax->getOperand(i), SignHint));
- return setRange(SMax, SignHint, ConservativeResult.intersectWith(X));
+ return setRange(SMax, SignHint,
+ ConservativeResult.intersectWith(X, RangeType));
}
if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) {
ConstantRange X = getRangeRef(UMax->getOperand(0), SignHint);
for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i)
X = X.umax(getRangeRef(UMax->getOperand(i), SignHint));
- return setRange(UMax, SignHint, ConservativeResult.intersectWith(X));
+ return setRange(UMax, SignHint,
+ ConservativeResult.intersectWith(X, RangeType));
}
if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {
ConstantRange X = getRangeRef(UDiv->getLHS(), SignHint);
ConstantRange Y = getRangeRef(UDiv->getRHS(), SignHint);
return setRange(UDiv, SignHint,
- ConservativeResult.intersectWith(X.udiv(Y)));
+ ConservativeResult.intersectWith(X.udiv(Y), RangeType));
}
if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) {
ConstantRange X = getRangeRef(ZExt->getOperand(), SignHint);
return setRange(ZExt, SignHint,
- ConservativeResult.intersectWith(X.zeroExtend(BitWidth)));
+ ConservativeResult.intersectWith(X.zeroExtend(BitWidth),
+ RangeType));
}
if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) {
ConstantRange X = getRangeRef(SExt->getOperand(), SignHint);
return setRange(SExt, SignHint,
- ConservativeResult.intersectWith(X.signExtend(BitWidth)));
+ ConservativeResult.intersectWith(X.signExtend(BitWidth),
+ RangeType));
}
if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) {
ConstantRange X = getRangeRef(Trunc->getOperand(), SignHint);
return setRange(Trunc, SignHint,
- ConservativeResult.intersectWith(X.truncate(BitWidth)));
+ ConservativeResult.intersectWith(X.truncate(BitWidth),
+ RangeType));
}
if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
@@ -5637,7 +5631,7 @@ ScalarEvolution::getRangeRef(const SCEV *S,
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(AddRec->getStart()))
if (!C->getValue()->isZero())
ConservativeResult = ConservativeResult.intersectWith(
- ConstantRange(C->getAPInt(), APInt(BitWidth, 0)));
+ ConstantRange(C->getAPInt(), APInt(BitWidth, 0)), RangeType);
// If there's no signed wrap, and all the operands have the same sign or
// zero, the value won't ever change sign.
@@ -5651,11 +5645,11 @@ ScalarEvolution::getRangeRef(const SCEV *S,
if (AllNonNeg)
ConservativeResult = ConservativeResult.intersectWith(
ConstantRange(APInt(BitWidth, 0),
- APInt::getSignedMinValue(BitWidth)));
+ APInt::getSignedMinValue(BitWidth)), RangeType);
else if (AllNonPos)
ConservativeResult = ConservativeResult.intersectWith(
ConstantRange(APInt::getSignedMinValue(BitWidth),
- APInt(BitWidth, 1)));
+ APInt(BitWidth, 1)), RangeType);
}
// TODO: non-affine addrec
@@ -5668,14 +5662,14 @@ ScalarEvolution::getRangeRef(const SCEV *S,
BitWidth);
if (!RangeFromAffine.isFullSet())
ConservativeResult =
- ConservativeResult.intersectWith(RangeFromAffine);
+ ConservativeResult.intersectWith(RangeFromAffine, RangeType);
auto RangeFromFactoring = getRangeViaFactoring(
AddRec->getStart(), AddRec->getStepRecurrence(*this), MaxBECount,
BitWidth);
if (!RangeFromFactoring.isFullSet())
ConservativeResult =
- ConservativeResult.intersectWith(RangeFromFactoring);
+ ConservativeResult.intersectWith(RangeFromFactoring, RangeType);
}
}
@@ -5686,7 +5680,8 @@ ScalarEvolution::getRangeRef(const SCEV *S,
// Check if the IR explicitly contains !range metadata.
Optional<ConstantRange> MDRange = GetRangeFromMetadata(U->getValue());
if (MDRange.hasValue())
- ConservativeResult = ConservativeResult.intersectWith(MDRange.getValue());
+ ConservativeResult = ConservativeResult.intersectWith(MDRange.getValue(),
+ RangeType);
// Split here to avoid paying the compile-time cost of calling both
// computeKnownBits and ComputeNumSignBits. This restriction can be lifted
@@ -5697,8 +5692,8 @@ ScalarEvolution::getRangeRef(const SCEV *S,
KnownBits Known = computeKnownBits(U->getValue(), DL, 0, &AC, nullptr, &DT);
if (Known.One != ~Known.Zero + 1)
ConservativeResult =
- ConservativeResult.intersectWith(ConstantRange(Known.One,
- ~Known.Zero + 1));
+ ConservativeResult.intersectWith(
+ ConstantRange(Known.One, ~Known.Zero + 1), RangeType);
} else {
assert(SignHint == ScalarEvolution::HINT_RANGE_SIGNED &&
"generalize as needed!");
@@ -5706,7 +5701,8 @@ ScalarEvolution::getRangeRef(const SCEV *S,
if (NS > 1)
ConservativeResult = ConservativeResult.intersectWith(
ConstantRange(APInt::getSignedMinValue(BitWidth).ashr(NS - 1),
- APInt::getSignedMaxValue(BitWidth).ashr(NS - 1) + 1));
+ APInt::getSignedMaxValue(BitWidth).ashr(NS - 1) + 1),
+ RangeType);
}
// A range of Phi is a subset of union of all ranges of its input.
@@ -5721,7 +5717,8 @@ ScalarEvolution::getRangeRef(const SCEV *S,
if (RangeFromOps.isFullSet())
break;
}
- ConservativeResult = ConservativeResult.intersectWith(RangeFromOps);
+ ConservativeResult =
+ ConservativeResult.intersectWith(RangeFromOps, RangeType);
bool Erased = PendingPhiRanges.erase(Phi);
assert(Erased && "Failed to erase Phi properly?");
(void) Erased;
@@ -5751,7 +5748,7 @@ static ConstantRange getRangeForAffineARHelper(APInt Step,
// FullRange), then we don't know anything about the final range either.
// Return FullRange.
if (StartRange.isFullSet())
- return ConstantRange(BitWidth, /* isFullSet = */ true);
+ return ConstantRange::getFull(BitWidth);
// If Step is signed and negative, then we use its absolute value, but we also
// note that we're moving in the opposite direction.
@@ -5767,7 +5764,7 @@ static ConstantRange getRangeForAffineARHelper(APInt Step,
// Check if Offset is more than full span of BitWidth. If it is, the
// expression is guaranteed to overflow.
if (APInt::getMaxValue(StartRange.getBitWidth()).udiv(Step).ult(MaxBECount))
- return ConstantRange(BitWidth, /* isFullSet = */ true);
+ return ConstantRange::getFull(BitWidth);
// Offset is by how much the expression can change. Checks above guarantee no
// overflow here.
@@ -5786,7 +5783,7 @@ static ConstantRange getRangeForAffineARHelper(APInt Step,
// range (due to wrap around). This means that the expression can take any
// value in this bitwidth, and we have to return full range.
if (StartRange.contains(MovedBoundary))
- return ConstantRange(BitWidth, /* isFullSet = */ true);
+ return ConstantRange::getFull(BitWidth);
APInt NewLower =
Descending ? std::move(MovedBoundary) : std::move(StartLower);
@@ -5794,12 +5791,8 @@ static ConstantRange getRangeForAffineARHelper(APInt Step,
Descending ? std::move(StartUpper) : std::move(MovedBoundary);
NewUpper += 1;
- // If we end up with full range, return a proper full range.
- if (NewLower == NewUpper)
- return ConstantRange(BitWidth, /* isFullSet = */ true);
-
// No overflow detected, return [StartLower, StartUpper + Offset + 1) range.
- return ConstantRange(std::move(NewLower), std::move(NewUpper));
+ return ConstantRange::getNonEmpty(std::move(NewLower), std::move(NewUpper));
}
ConstantRange ScalarEvolution::getRangeForAffineAR(const SCEV *Start,
@@ -5832,7 +5825,7 @@ ConstantRange ScalarEvolution::getRangeForAffineAR(const SCEV *Start,
MaxBECountValue, BitWidth, /* Signed = */ false);
// Finally, intersect signed and unsigned ranges.
- return SR.intersectWith(UR);
+ return SR.intersectWith(UR, ConstantRange::Smallest);
}
ConstantRange ScalarEvolution::getRangeViaFactoring(const SCEV *Start,
@@ -5916,17 +5909,17 @@ ConstantRange ScalarEvolution::getRangeViaFactoring(const SCEV *Start,
SelectPattern StartPattern(*this, BitWidth, Start);
if (!StartPattern.isRecognized())
- return ConstantRange(BitWidth, /* isFullSet = */ true);
+ return ConstantRange::getFull(BitWidth);
SelectPattern StepPattern(*this, BitWidth, Step);
if (!StepPattern.isRecognized())
- return ConstantRange(BitWidth, /* isFullSet = */ true);
+ return ConstantRange::getFull(BitWidth);
if (StartPattern.Condition != StepPattern.Condition) {
// We don't handle this case today; but we could, by considering four
// possibilities below instead of two. I'm not sure if there are cases where
// that will help over what getRange already does, though.
- return ConstantRange(BitWidth, /* isFullSet = */ true);
+ return ConstantRange::getFull(BitWidth);
}
// NB! Calling ScalarEvolution::getConstant is fine, but we should not try to
@@ -6128,7 +6121,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
// to obey basic rules for definitions dominating uses which this
// analysis depends on.
if (!DT.isReachableFromEntry(I->getParent()))
- return getUnknown(V);
+ return getUnknown(UndefValue::get(V->getType()));
} else if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
return getConstant(CI);
else if (isa<ConstantPointerNull>(V))
@@ -6744,6 +6737,28 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
return BackedgeTakenCounts.find(L)->second = std::move(Result);
}
+void ScalarEvolution::forgetAllLoops() {
+ // This method is intended to forget all info about loops. It should
+ // invalidate caches as if the following happened:
+ // - The trip counts of all loops have changed arbitrarily
+ // - Every llvm::Value has been updated in place to produce a different
+ // result.
+ BackedgeTakenCounts.clear();
+ PredicatedBackedgeTakenCounts.clear();
+ LoopPropertiesCache.clear();
+ ConstantEvolutionLoopExitValue.clear();
+ ValueExprMap.clear();
+ ValuesAtScopes.clear();
+ LoopDispositions.clear();
+ BlockDispositions.clear();
+ UnsignedRanges.clear();
+ SignedRanges.clear();
+ ExprValueMap.clear();
+ HasRecMap.clear();
+ MinTrailingZerosCache.clear();
+ PredicatedSCEVRewrites.clear();
+}
+
void ScalarEvolution::forgetLoop(const Loop *L) {
// Drop any stored trip count value.
auto RemoveLoopFromBackedgeMap =
@@ -6972,8 +6987,8 @@ ScalarEvolution::ExitLimit::ExitLimit(const SCEV *E, const SCEV *M,
/// Allocate memory for BackedgeTakenInfo and copy the not-taken count of each
/// computable exit into a persistent ExitNotTakenInfo array.
ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo(
- SmallVectorImpl<ScalarEvolution::BackedgeTakenInfo::EdgeExitInfo>
- &&ExitCounts,
+ ArrayRef<ScalarEvolution::BackedgeTakenInfo::EdgeExitInfo>
+ ExitCounts,
bool Complete, const SCEV *MaxCount, bool MaxOrZero)
: MaxAndComplete(MaxCount, Complete), MaxOrZero(MaxOrZero) {
using EdgeExitInfo = ScalarEvolution::BackedgeTakenInfo::EdgeExitInfo;
@@ -7256,6 +7271,14 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondImpl(
if (EL0.ExactNotTaken == EL1.ExactNotTaken)
BECount = EL0.ExactNotTaken;
}
+ // There are cases (e.g. PR26207) where computeExitLimitFromCond is able
+ // to be more aggressive when computing BECount than when computing
+ // MaxBECount. In these cases it is possible for EL0.ExactNotTaken and
+ // EL1.ExactNotTaken to match, but for EL0.MaxNotTaken and EL1.MaxNotTaken
+ // to not.
+ if (isa<SCEVCouldNotCompute>(MaxBECount) &&
+ !isa<SCEVCouldNotCompute>(BECount))
+ MaxBECount = getConstant(getUnsignedRangeMax(BECount));
return ExitLimit(BECount, MaxBECount, false,
{&EL0.Predicates, &EL1.Predicates});
@@ -7651,7 +7674,7 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeShiftCompareExitLimit(
static bool CanConstantFold(const Instruction *I) {
if (isa<BinaryOperator>(I) || isa<CmpInst>(I) ||
isa<SelectInst>(I) || isa<CastInst>(I) || isa<GetElementPtrInst>(I) ||
- isa<LoadInst>(I))
+ isa<LoadInst>(I) || isa<ExtractValueInst>(I))
return true;
if (const CallInst *CI = dyn_cast<CallInst>(I))
@@ -8075,7 +8098,9 @@ static Constant *BuildConstantFromSCEV(const SCEV *V) {
}
case scSMaxExpr:
case scUMaxExpr:
- break; // TODO: smax, umax.
+ case scSMinExpr:
+ case scUMinExpr:
+ break; // TODO: smax, umax, smin, umax.
}
return nullptr;
}
@@ -8087,44 +8112,64 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
// exit value from the loop without using SCEVs.
if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(V)) {
if (Instruction *I = dyn_cast<Instruction>(SU->getValue())) {
- const Loop *LI = this->LI[I->getParent()];
- if (LI && LI->getParentLoop() == L) // Looking for loop exit value.
- if (PHINode *PN = dyn_cast<PHINode>(I))
- if (PN->getParent() == LI->getHeader()) {
- // Okay, there is no closed form solution for the PHI node. Check
- // to see if the loop that contains it has a known backedge-taken
- // count. If so, we may be able to force computation of the exit
- // value.
- const SCEV *BackedgeTakenCount = getBackedgeTakenCount(LI);
- if (const SCEVConstant *BTCC =
- dyn_cast<SCEVConstant>(BackedgeTakenCount)) {
-
- // This trivial case can show up in some degenerate cases where
- // the incoming IR has not yet been fully simplified.
- if (BTCC->getValue()->isZero()) {
- Value *InitValue = nullptr;
- bool MultipleInitValues = false;
- for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
- if (!LI->contains(PN->getIncomingBlock(i))) {
- if (!InitValue)
- InitValue = PN->getIncomingValue(i);
- else if (InitValue != PN->getIncomingValue(i)) {
- MultipleInitValues = true;
- break;
- }
- }
- if (!MultipleInitValues && InitValue)
- return getSCEV(InitValue);
+ if (PHINode *PN = dyn_cast<PHINode>(I)) {
+ const Loop *LI = this->LI[I->getParent()];
+ // Looking for loop exit value.
+ if (LI && LI->getParentLoop() == L &&
+ PN->getParent() == LI->getHeader()) {
+ // Okay, there is no closed form solution for the PHI node. Check
+ // to see if the loop that contains it has a known backedge-taken
+ // count. If so, we may be able to force computation of the exit
+ // value.
+ const SCEV *BackedgeTakenCount = getBackedgeTakenCount(LI);
+ // This trivial case can show up in some degenerate cases where
+ // the incoming IR has not yet been fully simplified.
+ if (BackedgeTakenCount->isZero()) {
+ Value *InitValue = nullptr;
+ bool MultipleInitValues = false;
+ for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
+ if (!LI->contains(PN->getIncomingBlock(i))) {
+ if (!InitValue)
+ InitValue = PN->getIncomingValue(i);
+ else if (InitValue != PN->getIncomingValue(i)) {
+ MultipleInitValues = true;
+ break;
}
}
- // Okay, we know how many times the containing loop executes. If
- // this is a constant evolving PHI node, get the final value at
- // the specified iteration number.
- Constant *RV =
- getConstantEvolutionLoopExitValue(PN, BTCC->getAPInt(), LI);
- if (RV) return getSCEV(RV);
}
+ if (!MultipleInitValues && InitValue)
+ return getSCEV(InitValue);
}
+ // Do we have a loop invariant value flowing around the backedge
+ // for a loop which must execute the backedge?
+ if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount) &&
+ isKnownPositive(BackedgeTakenCount) &&
+ PN->getNumIncomingValues() == 2) {
+ unsigned InLoopPred = LI->contains(PN->getIncomingBlock(0)) ? 0 : 1;
+ const SCEV *OnBackedge = getSCEV(PN->getIncomingValue(InLoopPred));
+ if (IsAvailableOnEntry(LI, DT, OnBackedge, PN->getParent()))
+ return OnBackedge;
+ }
+ if (auto *BTCC = dyn_cast<SCEVConstant>(BackedgeTakenCount)) {
+ // Okay, we know how many times the containing loop executes. If
+ // this is a constant evolving PHI node, get the final value at
+ // the specified iteration number.
+ Constant *RV =
+ getConstantEvolutionLoopExitValue(PN, BTCC->getAPInt(), LI);
+ if (RV) return getSCEV(RV);
+ }
+ }
+
+ // If there is a single-input Phi, evaluate it at our scope. If we can
+ // prove that this replacement does not break LCSSA form, use new value.
+ if (PN->getNumOperands() == 1) {
+ const SCEV *Input = getSCEV(PN->getOperand(0));
+ const SCEV *InputAtScope = getSCEVAtScope(Input, L);
+ // TODO: We can generalize it using LI.replacementPreservesLCSSAForm,
+ // for the simplest case just support constants.
+ if (isa<SCEVConstant>(InputAtScope)) return InputAtScope;
+ }
+ }
// Okay, this is an expression that we cannot symbolically evaluate
// into a SCEV. Check to see if it's possible to symbolically evaluate
@@ -8198,13 +8243,11 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
NewOps.push_back(OpAtScope);
}
if (isa<SCEVAddExpr>(Comm))
- return getAddExpr(NewOps);
+ return getAddExpr(NewOps, Comm->getNoWrapFlags());
if (isa<SCEVMulExpr>(Comm))
- return getMulExpr(NewOps);
- if (isa<SCEVSMaxExpr>(Comm))
- return getSMaxExpr(NewOps);
- if (isa<SCEVUMaxExpr>(Comm))
- return getUMaxExpr(NewOps);
+ return getMulExpr(NewOps, Comm->getNoWrapFlags());
+ if (isa<SCEVMinMaxExpr>(Comm))
+ return getMinMaxExpr(Comm->getSCEVType(), NewOps);
llvm_unreachable("Unknown commutative SCEV type!");
}
}
@@ -10045,41 +10088,15 @@ bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred,
getNotSCEV(FoundLHS));
}
-/// If Expr computes ~A, return A else return nullptr
-static const SCEV *MatchNotExpr(const SCEV *Expr) {
- const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Expr);
- if (!Add || Add->getNumOperands() != 2 ||
- !Add->getOperand(0)->isAllOnesValue())
- return nullptr;
-
- const SCEVMulExpr *AddRHS = dyn_cast<SCEVMulExpr>(Add->getOperand(1));
- if (!AddRHS || AddRHS->getNumOperands() != 2 ||
- !AddRHS->getOperand(0)->isAllOnesValue())
- return nullptr;
-
- return AddRHS->getOperand(1);
-}
-
-/// Is MaybeMaxExpr an SMax or UMax of Candidate and some other values?
-template<typename MaxExprType>
-static bool IsMaxConsistingOf(const SCEV *MaybeMaxExpr,
- const SCEV *Candidate) {
- const MaxExprType *MaxExpr = dyn_cast<MaxExprType>(MaybeMaxExpr);
- if (!MaxExpr) return false;
-
- return find(MaxExpr->operands(), Candidate) != MaxExpr->op_end();
-}
-
-/// Is MaybeMinExpr an SMin or UMin of Candidate and some other values?
-template<typename MaxExprType>
-static bool IsMinConsistingOf(ScalarEvolution &SE,
- const SCEV *MaybeMinExpr,
- const SCEV *Candidate) {
- const SCEV *MaybeMaxExpr = MatchNotExpr(MaybeMinExpr);
- if (!MaybeMaxExpr)
+/// Is MaybeMinMaxExpr an (U|S)(Min|Max) of Candidate and some other values?
+template <typename MinMaxExprType>
+static bool IsMinMaxConsistingOf(const SCEV *MaybeMinMaxExpr,
+ const SCEV *Candidate) {
+ const MinMaxExprType *MinMaxExpr = dyn_cast<MinMaxExprType>(MaybeMinMaxExpr);
+ if (!MinMaxExpr)
return false;
- return IsMaxConsistingOf<MaxExprType>(MaybeMaxExpr, SE.getNotSCEV(Candidate));
+ return find(MinMaxExpr->operands(), Candidate) != MinMaxExpr->op_end();
}
static bool IsKnownPredicateViaAddRecStart(ScalarEvolution &SE,
@@ -10128,20 +10145,20 @@ static bool IsKnownPredicateViaMinOrMax(ScalarEvolution &SE,
LLVM_FALLTHROUGH;
case ICmpInst::ICMP_SLE:
return
- // min(A, ...) <= A
- IsMinConsistingOf<SCEVSMaxExpr>(SE, LHS, RHS) ||
- // A <= max(A, ...)
- IsMaxConsistingOf<SCEVSMaxExpr>(RHS, LHS);
+ // min(A, ...) <= A
+ IsMinMaxConsistingOf<SCEVSMinExpr>(LHS, RHS) ||
+ // A <= max(A, ...)
+ IsMinMaxConsistingOf<SCEVSMaxExpr>(RHS, LHS);
case ICmpInst::ICMP_UGE:
std::swap(LHS, RHS);
LLVM_FALLTHROUGH;
case ICmpInst::ICMP_ULE:
return
- // min(A, ...) <= A
- IsMinConsistingOf<SCEVUMaxExpr>(SE, LHS, RHS) ||
- // A <= max(A, ...)
- IsMaxConsistingOf<SCEVUMaxExpr>(RHS, LHS);
+ // min(A, ...) <= A
+ IsMinMaxConsistingOf<SCEVUMinExpr>(LHS, RHS) ||
+ // A <= max(A, ...)
+ IsMinMaxConsistingOf<SCEVUMaxExpr>(RHS, LHS);
}
llvm_unreachable("covered switch fell through?!");
@@ -10691,13 +10708,10 @@ ScalarEvolution::howManyGreaterThans(const SCEV *LHS, const SCEV *RHS,
IsSigned ? APIntOps::smax(getSignedRangeMin(RHS), Limit)
: APIntOps::umax(getUnsignedRangeMin(RHS), Limit);
-
- const SCEV *MaxBECount = getCouldNotCompute();
- if (isa<SCEVConstant>(BECount))
- MaxBECount = BECount;
- else
- MaxBECount = computeBECount(getConstant(MaxStart - MinEnd),
- getConstant(MinStride), false);
+ const SCEV *MaxBECount = isa<SCEVConstant>(BECount)
+ ? BECount
+ : computeBECount(getConstant(MaxStart - MinEnd),
+ getConstant(MinStride), false);
if (isa<SCEVCouldNotCompute>(MaxBECount))
MaxBECount = BECount;
@@ -10806,8 +10820,6 @@ static inline bool containsUndefs(const SCEV *S) {
return SCEVExprContains(S, [](const SCEV *S) {
if (const auto *SU = dyn_cast<SCEVUnknown>(S))
return isa<UndefValue>(SU->getValue());
- else if (const auto *SC = dyn_cast<SCEVConstant>(S))
- return isa<UndefValue>(SC->getValue());
return false;
});
}
@@ -11402,19 +11414,23 @@ static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE,
L->getHeader()->printAsOperand(OS, /*PrintType=*/false);
OS << ": ";
- SmallVector<BasicBlock *, 8> ExitBlocks;
- L->getExitBlocks(ExitBlocks);
- if (ExitBlocks.size() != 1)
+ SmallVector<BasicBlock *, 8> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+ if (ExitingBlocks.size() != 1)
OS << "<multiple exits> ";
- if (SE->hasLoopInvariantBackedgeTakenCount(L)) {
- OS << "backedge-taken count is " << *SE->getBackedgeTakenCount(L);
- } else {
- OS << "Unpredictable backedge-taken count. ";
- }
+ if (SE->hasLoopInvariantBackedgeTakenCount(L))
+ OS << "backedge-taken count is " << *SE->getBackedgeTakenCount(L) << "\n";
+ else
+ OS << "Unpredictable backedge-taken count.\n";
- OS << "\n"
- "Loop ";
+ if (ExitingBlocks.size() > 1)
+ for (BasicBlock *ExitingBlock : ExitingBlocks) {
+ OS << " exit count for " << ExitingBlock->getName() << ": "
+ << *SE->getExitCount(L, ExitingBlock) << "\n";
+ }
+
+ OS << "Loop ";
L->getHeader()->printAsOperand(OS, /*PrintType=*/false);
OS << ": ";
@@ -11611,7 +11627,9 @@ ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) {
case scAddExpr:
case scMulExpr:
case scUMaxExpr:
- case scSMaxExpr: {
+ case scSMaxExpr:
+ case scUMinExpr:
+ case scSMinExpr: {
bool HasVarying = false;
for (auto *Op : cast<SCEVNAryExpr>(S)->operands()) {
LoopDisposition D = getLoopDisposition(Op, L);
@@ -11698,7 +11716,9 @@ ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) {
case scAddExpr:
case scMulExpr:
case scUMaxExpr:
- case scSMaxExpr: {
+ case scSMaxExpr:
+ case scUMinExpr:
+ case scSMinExpr: {
const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(S);
bool Proper = true;
for (const SCEV *NAryOp : NAry->operands()) {
diff --git a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
index 289d4f8ae49a..96da0a24cddd 100644
--- a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
+++ b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
@@ -1,9 +1,8 @@
//===- ScalarEvolutionAliasAnalysis.cpp - SCEV-based Alias Analysis -------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -23,7 +22,7 @@
using namespace llvm;
AliasResult SCEVAAResult::alias(const MemoryLocation &LocA,
- const MemoryLocation &LocB) {
+ const MemoryLocation &LocB, AAQueryInfo &AAQI) {
// If either of the memory references is empty, it doesn't matter what the
// pointer values are. This allows the code below to ignore this special
// case.
@@ -86,11 +85,12 @@ AliasResult SCEVAAResult::alias(const MemoryLocation &LocA,
AO ? AAMDNodes() : LocA.AATags),
MemoryLocation(BO ? BO : LocB.Ptr,
BO ? LocationSize::unknown() : LocB.Size,
- BO ? AAMDNodes() : LocB.AATags)) == NoAlias)
+ BO ? AAMDNodes() : LocB.AATags),
+ AAQI) == NoAlias)
return NoAlias;
// Forward the query to the next analysis.
- return AAResultBase::alias(LocA, LocB);
+ return AAResultBase::alias(LocA, LocB, AAQI);
}
/// Given an expression, try to find a base value.
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index ca5cf1663b83..e8a95d35482c 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -1,9 +1,8 @@
//===- ScalarEvolutionExpander.cpp - Scalar Evolution Analysis ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -61,12 +60,10 @@ Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty,
// instructions that might be inserted before BIP.
if (BasicBlock::iterator(CI) != IP || BIP == IP) {
// Create a new cast, and leave the old cast in place in case
- // it is being used as an insert point. Clear its operand
- // so that it doesn't hold anything live.
+ // it is being used as an insert point.
Ret = CastInst::Create(Op, V, Ty, "", &*IP);
Ret->takeName(CI);
CI->replaceAllUsesWith(Ret);
- CI->setOperand(0, UndefValue::get(V->getType()));
break;
}
Ret = CI;
@@ -167,9 +164,11 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) {
}
/// InsertBinop - Insert the specified binary operator, doing a small amount
-/// of work to avoid inserting an obviously redundant operation.
+/// of work to avoid inserting an obviously redundant operation, and hoisting
+/// to an outer loop when the opportunity is there and it is safe.
Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
- Value *LHS, Value *RHS) {
+ Value *LHS, Value *RHS,
+ SCEV::NoWrapFlags Flags, bool IsSafeToHoist) {
// Fold a binop with constant operands.
if (Constant *CLHS = dyn_cast<Constant>(LHS))
if (Constant *CRHS = dyn_cast<Constant>(RHS))
@@ -188,20 +187,22 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
if (isa<DbgInfoIntrinsic>(IP))
ScanLimit++;
- // Conservatively, do not use any instruction which has any of wrap/exact
- // flags installed.
- // TODO: Instead of simply disable poison instructions we can be clever
- // here and match SCEV to this instruction.
- auto canGeneratePoison = [](Instruction *I) {
- if (isa<OverflowingBinaryOperator>(I) &&
- (I->hasNoSignedWrap() || I->hasNoUnsignedWrap()))
- return true;
+ auto canGenerateIncompatiblePoison = [&Flags](Instruction *I) {
+ // Ensure that no-wrap flags match.
+ if (isa<OverflowingBinaryOperator>(I)) {
+ if (I->hasNoSignedWrap() != (Flags & SCEV::FlagNSW))
+ return true;
+ if (I->hasNoUnsignedWrap() != (Flags & SCEV::FlagNUW))
+ return true;
+ }
+ // Conservatively, do not use any instruction which has any of exact
+ // flags installed.
if (isa<PossiblyExactOperator>(I) && I->isExact())
return true;
return false;
};
if (IP->getOpcode() == (unsigned)Opcode && IP->getOperand(0) == LHS &&
- IP->getOperand(1) == RHS && !canGeneratePoison(&*IP))
+ IP->getOperand(1) == RHS && !canGenerateIncompatiblePoison(&*IP))
return &*IP;
if (IP == BlockBegin) break;
}
@@ -211,19 +212,25 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
DebugLoc Loc = Builder.GetInsertPoint()->getDebugLoc();
SCEVInsertPointGuard Guard(Builder, this);
- // Move the insertion point out of as many loops as we can.
- while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) {
- if (!L->isLoopInvariant(LHS) || !L->isLoopInvariant(RHS)) break;
- BasicBlock *Preheader = L->getLoopPreheader();
- if (!Preheader) break;
+ if (IsSafeToHoist) {
+ // Move the insertion point out of as many loops as we can.
+ while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) {
+ if (!L->isLoopInvariant(LHS) || !L->isLoopInvariant(RHS)) break;
+ BasicBlock *Preheader = L->getLoopPreheader();
+ if (!Preheader) break;
- // Ok, move up a level.
- Builder.SetInsertPoint(Preheader->getTerminator());
+ // Ok, move up a level.
+ Builder.SetInsertPoint(Preheader->getTerminator());
+ }
}
// If we haven't found this binop, insert it.
Instruction *BO = cast<Instruction>(Builder.CreateBinOp(Opcode, LHS, RHS));
BO->setDebugLoc(Loc);
+ if (Flags & SCEV::FlagNUW)
+ BO->setHasNoUnsignedWrap();
+ if (Flags & SCEV::FlagNSW)
+ BO->setHasNoSignedWrap();
rememberInstruction(BO);
return BO;
@@ -695,7 +702,7 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
// Sort by loop. Use a stable sort so that constants follow non-constants and
// pointer operands precede non-pointer operands.
- std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(SE.DT));
+ llvm::stable_sort(OpsAndLoops, LoopCompare(SE.DT));
// Emit instructions to add all the operands. Hoist as much as possible
// out of loops, and form meaningful getelementptrs where possible.
@@ -735,7 +742,8 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
// Instead of doing a negate and add, just do a subtract.
Value *W = expandCodeFor(SE.getNegativeSCEV(Op), Ty);
Sum = InsertNoopCastOfTo(Sum, Ty);
- Sum = InsertBinop(Instruction::Sub, Sum, W);
+ Sum = InsertBinop(Instruction::Sub, Sum, W, SCEV::FlagAnyWrap,
+ /*IsSafeToHoist*/ true);
++I;
} else {
// A simple add.
@@ -743,7 +751,8 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
Sum = InsertNoopCastOfTo(Sum, Ty);
// Canonicalize a constant to the RHS.
if (isa<Constant>(Sum)) std::swap(Sum, W);
- Sum = InsertBinop(Instruction::Add, Sum, W);
+ Sum = InsertBinop(Instruction::Add, Sum, W, S->getNoWrapFlags(),
+ /*IsSafeToHoist*/ true);
++I;
}
}
@@ -762,7 +771,7 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
OpsAndLoops.push_back(std::make_pair(getRelevantLoop(*I), *I));
// Sort by loop. Use a stable sort so that constants follow non-constants.
- std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(SE.DT));
+ llvm::stable_sort(OpsAndLoops, LoopCompare(SE.DT));
// Emit instructions to mul all the operands. Hoist as much as possible
// out of loops.
@@ -795,9 +804,13 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
if (Exponent & 1)
Result = P;
for (uint64_t BinExp = 2; BinExp <= Exponent; BinExp <<= 1) {
- P = InsertBinop(Instruction::Mul, P, P);
+ P = InsertBinop(Instruction::Mul, P, P, SCEV::FlagAnyWrap,
+ /*IsSafeToHoist*/ true);
if (Exponent & BinExp)
- Result = Result ? InsertBinop(Instruction::Mul, Result, P) : P;
+ Result = Result ? InsertBinop(Instruction::Mul, Result, P,
+ SCEV::FlagAnyWrap,
+ /*IsSafeToHoist*/ true)
+ : P;
}
I = E;
@@ -812,7 +825,8 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
} else if (I->second->isAllOnesValue()) {
// Instead of doing a multiply by negative one, just do a negate.
Prod = InsertNoopCastOfTo(Prod, Ty);
- Prod = InsertBinop(Instruction::Sub, Constant::getNullValue(Ty), Prod);
+ Prod = InsertBinop(Instruction::Sub, Constant::getNullValue(Ty), Prod,
+ SCEV::FlagAnyWrap, /*IsSafeToHoist*/ true);
++I;
} else {
// A simple mul.
@@ -824,10 +838,16 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
if (match(W, m_Power2(RHS))) {
// Canonicalize Prod*(1<<C) to Prod<<C.
assert(!Ty->isVectorTy() && "vector types are not SCEVable");
+ auto NWFlags = S->getNoWrapFlags();
+ // clear nsw flag if shl will produce poison value.
+ if (RHS->logBase2() == RHS->getBitWidth() - 1)
+ NWFlags = ScalarEvolution::clearFlags(NWFlags, SCEV::FlagNSW);
Prod = InsertBinop(Instruction::Shl, Prod,
- ConstantInt::get(Ty, RHS->logBase2()));
+ ConstantInt::get(Ty, RHS->logBase2()), NWFlags,
+ /*IsSafeToHoist*/ true);
} else {
- Prod = InsertBinop(Instruction::Mul, Prod, W);
+ Prod = InsertBinop(Instruction::Mul, Prod, W, S->getNoWrapFlags(),
+ /*IsSafeToHoist*/ true);
}
}
}
@@ -843,11 +863,13 @@ Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) {
const APInt &RHS = SC->getAPInt();
if (RHS.isPowerOf2())
return InsertBinop(Instruction::LShr, LHS,
- ConstantInt::get(Ty, RHS.logBase2()));
+ ConstantInt::get(Ty, RHS.logBase2()),
+ SCEV::FlagAnyWrap, /*IsSafeToHoist*/ true);
}
Value *RHS = expandCodeFor(S->getRHS(), Ty);
- return InsertBinop(Instruction::UDiv, LHS, RHS);
+ return InsertBinop(Instruction::UDiv, LHS, RHS, SCEV::FlagAnyWrap,
+ /*IsSafeToHoist*/ SE.isKnownNonZero(S->getRHS()));
}
/// Move parts of Base into Rest to leave Base with the minimal
@@ -1634,7 +1656,8 @@ Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) {
for (int i = S->getNumOperands()-2; i >= 0; --i) {
// In the case of mixed integer and pointer types, do the
// rest of the comparisons as integer.
- if (S->getOperand(i)->getType() != Ty) {
+ Type *OpTy = S->getOperand(i)->getType();
+ if (OpTy->isIntegerTy() != Ty->isIntegerTy()) {
Ty = SE.getEffectiveSCEVType(Ty);
LHS = InsertNoopCastOfTo(LHS, Ty);
}
@@ -1658,7 +1681,8 @@ Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) {
for (int i = S->getNumOperands()-2; i >= 0; --i) {
// In the case of mixed integer and pointer types, do the
// rest of the comparisons as integer.
- if (S->getOperand(i)->getType() != Ty) {
+ Type *OpTy = S->getOperand(i)->getType();
+ if (OpTy->isIntegerTy() != Ty->isIntegerTy()) {
Ty = SE.getEffectiveSCEVType(Ty);
LHS = InsertNoopCastOfTo(LHS, Ty);
}
@@ -1676,6 +1700,56 @@ Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) {
return LHS;
}
+Value *SCEVExpander::visitSMinExpr(const SCEVSMinExpr *S) {
+ Value *LHS = expand(S->getOperand(S->getNumOperands() - 1));
+ Type *Ty = LHS->getType();
+ for (int i = S->getNumOperands() - 2; i >= 0; --i) {
+ // In the case of mixed integer and pointer types, do the
+ // rest of the comparisons as integer.
+ Type *OpTy = S->getOperand(i)->getType();
+ if (OpTy->isIntegerTy() != Ty->isIntegerTy()) {
+ Ty = SE.getEffectiveSCEVType(Ty);
+ LHS = InsertNoopCastOfTo(LHS, Ty);
+ }
+ Value *RHS = expandCodeFor(S->getOperand(i), Ty);
+ Value *ICmp = Builder.CreateICmpSLT(LHS, RHS);
+ rememberInstruction(ICmp);
+ Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smin");
+ rememberInstruction(Sel);
+ LHS = Sel;
+ }
+ // In the case of mixed integer and pointer types, cast the
+ // final result back to the pointer type.
+ if (LHS->getType() != S->getType())
+ LHS = InsertNoopCastOfTo(LHS, S->getType());
+ return LHS;
+}
+
+Value *SCEVExpander::visitUMinExpr(const SCEVUMinExpr *S) {
+ Value *LHS = expand(S->getOperand(S->getNumOperands() - 1));
+ Type *Ty = LHS->getType();
+ for (int i = S->getNumOperands() - 2; i >= 0; --i) {
+ // In the case of mixed integer and pointer types, do the
+ // rest of the comparisons as integer.
+ Type *OpTy = S->getOperand(i)->getType();
+ if (OpTy->isIntegerTy() != Ty->isIntegerTy()) {
+ Ty = SE.getEffectiveSCEVType(Ty);
+ LHS = InsertNoopCastOfTo(LHS, Ty);
+ }
+ Value *RHS = expandCodeFor(S->getOperand(i), Ty);
+ Value *ICmp = Builder.CreateICmpULT(LHS, RHS);
+ rememberInstruction(ICmp);
+ Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umin");
+ rememberInstruction(Sel);
+ LHS = Sel;
+ }
+ // In the case of mixed integer and pointer types, cast the
+ // final result back to the pointer type.
+ if (LHS->getType() != S->getType())
+ LHS = InsertNoopCastOfTo(LHS, S->getType());
+ return LHS;
+}
+
Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty,
Instruction *IP) {
setInsertPoint(IP);
@@ -1732,49 +1806,55 @@ Value *SCEVExpander::expand(const SCEV *S) {
// Compute an insertion point for this SCEV object. Hoist the instructions
// as far out in the loop nest as possible.
Instruction *InsertPt = &*Builder.GetInsertPoint();
- for (Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock());;
- L = L->getParentLoop())
- if (SE.isLoopInvariant(S, L)) {
- if (!L) break;
- if (BasicBlock *Preheader = L->getLoopPreheader())
- InsertPt = Preheader->getTerminator();
- else {
- // LSR sets the insertion point for AddRec start/step values to the
- // block start to simplify value reuse, even though it's an invalid
- // position. SCEVExpander must correct for this in all cases.
- InsertPt = &*L->getHeader()->getFirstInsertionPt();
- }
- } else {
- // We can move insertion point only if there is no div or rem operations
- // otherwise we are risky to move it over the check for zero denominator.
- auto SafeToHoist = [](const SCEV *S) {
- return !SCEVExprContains(S, [](const SCEV *S) {
- if (const auto *D = dyn_cast<SCEVUDivExpr>(S)) {
- if (const auto *SC = dyn_cast<SCEVConstant>(D->getRHS()))
- // Division by non-zero constants can be hoisted.
- return SC->getValue()->isZero();
- // All other divisions should not be moved as they may be
- // divisions by zero and should be kept within the
- // conditions of the surrounding loops that guard their
- // execution (see PR35406).
- return true;
- }
- return false;
- });
- };
- // If the SCEV is computable at this level, insert it into the header
- // after the PHIs (and after any other instructions that we've inserted
- // there) so that it is guaranteed to dominate any user inside the loop.
- if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L) &&
- SafeToHoist(S))
- InsertPt = &*L->getHeader()->getFirstInsertionPt();
- while (InsertPt->getIterator() != Builder.GetInsertPoint() &&
- (isInsertedInstruction(InsertPt) ||
- isa<DbgInfoIntrinsic>(InsertPt))) {
- InsertPt = &*std::next(InsertPt->getIterator());
+
+ // We can move insertion point only if there is no div or rem operations
+ // otherwise we are risky to move it over the check for zero denominator.
+ auto SafeToHoist = [](const SCEV *S) {
+ return !SCEVExprContains(S, [](const SCEV *S) {
+ if (const auto *D = dyn_cast<SCEVUDivExpr>(S)) {
+ if (const auto *SC = dyn_cast<SCEVConstant>(D->getRHS()))
+ // Division by non-zero constants can be hoisted.
+ return SC->getValue()->isZero();
+ // All other divisions should not be moved as they may be
+ // divisions by zero and should be kept within the
+ // conditions of the surrounding loops that guard their
+ // execution (see PR35406).
+ return true;
+ }
+ return false;
+ });
+ };
+ if (SafeToHoist(S)) {
+ for (Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock());;
+ L = L->getParentLoop()) {
+ if (SE.isLoopInvariant(S, L)) {
+ if (!L) break;
+ if (BasicBlock *Preheader = L->getLoopPreheader())
+ InsertPt = Preheader->getTerminator();
+ else
+ // LSR sets the insertion point for AddRec start/step values to the
+ // block start to simplify value reuse, even though it's an invalid
+ // position. SCEVExpander must correct for this in all cases.
+ InsertPt = &*L->getHeader()->getFirstInsertionPt();
+ } else {
+ // If the SCEV is computable at this level, insert it into the header
+ // after the PHIs (and after any other instructions that we've inserted
+ // there) so that it is guaranteed to dominate any user inside the loop.
+ if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L))
+ InsertPt = &*L->getHeader()->getFirstInsertionPt();
+ while (InsertPt->getIterator() != Builder.GetInsertPoint() &&
+ (isInsertedInstruction(InsertPt) ||
+ isa<DbgInfoIntrinsic>(InsertPt)))
+ InsertPt = &*std::next(InsertPt->getIterator());
+ break;
}
- break;
}
+ }
+
+ // IndVarSimplify sometimes sets the insertion point at the block start, even
+ // when there are PHIs at that point. We must correct for this.
+ if (isa<PHINode>(*InsertPt))
+ InsertPt = &*InsertPt->getParent()->getFirstInsertionPt();
// Check to see if we already expanded this here.
auto I = InsertedExpressions.find(std::make_pair(S, InsertPt));
@@ -2071,10 +2151,13 @@ bool SCEVExpander::isHighCostExpansionHelper(
if (auto *UDivExpr = dyn_cast<SCEVUDivExpr>(S)) {
// If the divisor is a power of two and the SCEV type fits in a native
- // integer, consider the division cheap irrespective of whether it occurs in
- // the user code since it can be lowered into a right shift.
+ // integer (and the LHS not expensive), consider the division cheap
+ // irrespective of whether it occurs in the user code since it can be
+ // lowered into a right shift.
if (auto *SC = dyn_cast<SCEVConstant>(UDivExpr->getRHS()))
if (SC->getAPInt().isPowerOf2()) {
+ if (isHighCostExpansionHelper(UDivExpr->getLHS(), L, At, Processed))
+ return true;
const DataLayout &DL =
L->getHeader()->getParent()->getParent()->getDataLayout();
unsigned Width = cast<IntegerType>(UDivExpr->getType())->getBitWidth();
@@ -2102,7 +2185,7 @@ bool SCEVExpander::isHighCostExpansionHelper(
// HowManyLessThans uses a Max expression whenever the loop is not guarded by
// the exit condition.
- if (isa<SCEVSMaxExpr>(S) || isa<SCEVUMaxExpr>(S))
+ if (isa<SCEVMinMaxExpr>(S))
return true;
// Recurse past nary expressions, which commonly occur in the
@@ -2339,6 +2422,24 @@ bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE) {
bool isSafeToExpandAt(const SCEV *S, const Instruction *InsertionPoint,
ScalarEvolution &SE) {
- return isSafeToExpand(S, SE) && SE.dominates(S, InsertionPoint->getParent());
+ if (!isSafeToExpand(S, SE))
+ return false;
+ // We have to prove that the expanded site of S dominates InsertionPoint.
+ // This is easy when not in the same block, but hard when S is an instruction
+ // to be expanded somewhere inside the same block as our insertion point.
+ // What we really need here is something analogous to an OrderedBasicBlock,
+ // but for the moment, we paper over the problem by handling two common and
+ // cheap to check cases.
+ if (SE.properlyDominates(S, InsertionPoint->getParent()))
+ return true;
+ if (SE.dominates(S, InsertionPoint->getParent())) {
+ if (InsertionPoint->getParent()->getTerminator() == InsertionPoint)
+ return true;
+ if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S))
+ for (const Value *V : InsertionPoint->operand_values())
+ if (V == U->getValue())
+ return true;
+ }
+ return false;
}
}
diff --git a/lib/Analysis/ScalarEvolutionNormalization.cpp b/lib/Analysis/ScalarEvolutionNormalization.cpp
index 3740039b8f86..209ae66ca53e 100644
--- a/lib/Analysis/ScalarEvolutionNormalization.cpp
+++ b/lib/Analysis/ScalarEvolutionNormalization.cpp
@@ -1,9 +1,8 @@
//===- ScalarEvolutionNormalization.cpp - See below -----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Analysis/ScopedNoAliasAA.cpp b/lib/Analysis/ScopedNoAliasAA.cpp
index 9a581fe46afc..094e4a3d5dc8 100644
--- a/lib/Analysis/ScopedNoAliasAA.cpp
+++ b/lib/Analysis/ScopedNoAliasAA.cpp
@@ -1,9 +1,8 @@
//===- ScopedNoAliasAA.cpp - Scoped No-Alias Alias Analysis ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -76,9 +75,10 @@ public:
} // end anonymous namespace
AliasResult ScopedNoAliasAAResult::alias(const MemoryLocation &LocA,
- const MemoryLocation &LocB) {
+ const MemoryLocation &LocB,
+ AAQueryInfo &AAQI) {
if (!EnableScopedNoAlias)
- return AAResultBase::alias(LocA, LocB);
+ return AAResultBase::alias(LocA, LocB, AAQI);
// Get the attached MDNodes.
const MDNode *AScopes = LocA.AATags.Scope, *BScopes = LocB.AATags.Scope;
@@ -92,13 +92,14 @@ AliasResult ScopedNoAliasAAResult::alias(const MemoryLocation &LocA,
return NoAlias;
// If they may alias, chain to the next AliasAnalysis.
- return AAResultBase::alias(LocA, LocB);
+ return AAResultBase::alias(LocA, LocB, AAQI);
}
ModRefInfo ScopedNoAliasAAResult::getModRefInfo(const CallBase *Call,
- const MemoryLocation &Loc) {
+ const MemoryLocation &Loc,
+ AAQueryInfo &AAQI) {
if (!EnableScopedNoAlias)
- return AAResultBase::getModRefInfo(Call, Loc);
+ return AAResultBase::getModRefInfo(Call, Loc, AAQI);
if (!mayAliasInScopes(Loc.AATags.Scope,
Call->getMetadata(LLVMContext::MD_noalias)))
@@ -108,13 +109,14 @@ ModRefInfo ScopedNoAliasAAResult::getModRefInfo(const CallBase *Call,
Loc.AATags.NoAlias))
return ModRefInfo::NoModRef;
- return AAResultBase::getModRefInfo(Call, Loc);
+ return AAResultBase::getModRefInfo(Call, Loc, AAQI);
}
ModRefInfo ScopedNoAliasAAResult::getModRefInfo(const CallBase *Call1,
- const CallBase *Call2) {
+ const CallBase *Call2,
+ AAQueryInfo &AAQI) {
if (!EnableScopedNoAlias)
- return AAResultBase::getModRefInfo(Call1, Call2);
+ return AAResultBase::getModRefInfo(Call1, Call2, AAQI);
if (!mayAliasInScopes(Call1->getMetadata(LLVMContext::MD_alias_scope),
Call2->getMetadata(LLVMContext::MD_noalias)))
@@ -124,7 +126,7 @@ ModRefInfo ScopedNoAliasAAResult::getModRefInfo(const CallBase *Call1,
Call1->getMetadata(LLVMContext::MD_noalias)))
return ModRefInfo::NoModRef;
- return AAResultBase::getModRefInfo(Call1, Call2);
+ return AAResultBase::getModRefInfo(Call1, Call2, AAQI);
}
static void collectMDInDomain(const MDNode *List, const MDNode *Domain,
diff --git a/lib/Analysis/StackSafetyAnalysis.cpp b/lib/Analysis/StackSafetyAnalysis.cpp
index 66b03845864f..4cf235db86eb 100644
--- a/lib/Analysis/StackSafetyAnalysis.cpp
+++ b/lib/Analysis/StackSafetyAnalysis.cpp
@@ -1,9 +1,8 @@
//===- StackSafetyAnalysis.cpp - Stack memory safety analysis -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -416,7 +415,9 @@ class StackSafetyDataFlowAnalysis {
updateOneNode(F.first, F.second);
}
void runDataFlow();
+#ifndef NDEBUG
void verifyFixedPoint();
+#endif
public:
StackSafetyDataFlowAnalysis(
@@ -527,11 +528,13 @@ void StackSafetyDataFlowAnalysis::runDataFlow() {
}
}
+#ifndef NDEBUG
void StackSafetyDataFlowAnalysis::verifyFixedPoint() {
WorkList.clear();
updateAllNodes();
assert(WorkList.empty());
}
+#endif
StackSafetyGlobalInfo StackSafetyDataFlowAnalysis::run() {
runDataFlow();
diff --git a/lib/Analysis/StratifiedSets.h b/lib/Analysis/StratifiedSets.h
index 2f20cd12506c..60ea2451b0ef 100644
--- a/lib/Analysis/StratifiedSets.h
+++ b/lib/Analysis/StratifiedSets.h
@@ -1,9 +1,8 @@
//===- StratifiedSets.h - Abstract stratified sets implementation. --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Analysis/SyncDependenceAnalysis.cpp b/lib/Analysis/SyncDependenceAnalysis.cpp
index e1a7e4476d12..3cf248a31142 100644
--- a/lib/Analysis/SyncDependenceAnalysis.cpp
+++ b/lib/Analysis/SyncDependenceAnalysis.cpp
@@ -1,10 +1,9 @@
//===- SyncDependenceAnalysis.cpp - Divergent Branch Dependence Calculation
//--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -219,14 +218,9 @@ struct DivergencePropagator {
template <typename SuccessorIterable>
std::unique_ptr<ConstBlockSet>
computeJoinPoints(const BasicBlock &RootBlock,
- SuccessorIterable NodeSuccessors, const Loop *ParentLoop) {
+ SuccessorIterable NodeSuccessors, const Loop *ParentLoop, const BasicBlock * PdBoundBlock) {
assert(JoinBlocks);
- // immediate post dominator (no join block beyond that block)
- const auto *PdNode = PDT.getNode(const_cast<BasicBlock *>(&RootBlock));
- const auto *IpdNode = PdNode->getIDom();
- const auto *PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr;
-
// bootstrap with branch targets
for (const auto *SuccBlock : NodeSuccessors) {
DefMap.emplace(SuccBlock, SuccBlock);
@@ -341,13 +335,23 @@ const ConstBlockSet &SyncDependenceAnalysis::join_blocks(const Loop &Loop) {
// already available in cache?
auto ItCached = CachedLoopExitJoins.find(&Loop);
- if (ItCached != CachedLoopExitJoins.end())
+ if (ItCached != CachedLoopExitJoins.end()) {
return *ItCached->second;
+ }
+
+ // dont propagte beyond the immediate post dom of the loop
+ const auto *PdNode = PDT.getNode(const_cast<BasicBlock *>(Loop.getHeader()));
+ const auto *IpdNode = PdNode->getIDom();
+ const auto *PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr;
+ while (PdBoundBlock && Loop.contains(PdBoundBlock)) {
+ IpdNode = IpdNode->getIDom();
+ PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr;
+ }
// compute all join points
DivergencePropagator Propagator{FuncRPOT, DT, PDT, LI};
auto JoinBlocks = Propagator.computeJoinPoints<const LoopExitVec &>(
- *Loop.getHeader(), LoopExits, Loop.getParentLoop());
+ *Loop.getHeader(), LoopExits, Loop.getParentLoop(), PdBoundBlock);
auto ItInserted = CachedLoopExitJoins.emplace(&Loop, std::move(JoinBlocks));
assert(ItInserted.second);
@@ -366,11 +370,16 @@ SyncDependenceAnalysis::join_blocks(const Instruction &Term) {
if (ItCached != CachedBranchJoins.end())
return *ItCached->second;
+ // dont propagate beyond the immediate post dominator of the branch
+ const auto *PdNode = PDT.getNode(const_cast<BasicBlock *>(Term.getParent()));
+ const auto *IpdNode = PdNode->getIDom();
+ const auto *PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr;
+
// compute all join points
DivergencePropagator Propagator{FuncRPOT, DT, PDT, LI};
const auto &TermBlock = *Term.getParent();
auto JoinBlocks = Propagator.computeJoinPoints<succ_const_range>(
- TermBlock, successors(Term.getParent()), LI.getLoopFor(&TermBlock));
+ TermBlock, successors(Term.getParent()), LI.getLoopFor(&TermBlock), PdBoundBlock);
auto ItInserted = CachedBranchJoins.emplace(&Term, std::move(JoinBlocks));
assert(ItInserted.second);
diff --git a/lib/Analysis/SyntheticCountsUtils.cpp b/lib/Analysis/SyntheticCountsUtils.cpp
index c2d7bb11a4cf..22766e5f07f5 100644
--- a/lib/Analysis/SyntheticCountsUtils.cpp
+++ b/lib/Analysis/SyntheticCountsUtils.cpp
@@ -1,9 +1,8 @@
//===--- SyntheticCountsUtils.cpp - synthetic counts propagation utils ---===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Analysis/TargetLibraryInfo.cpp b/lib/Analysis/TargetLibraryInfo.cpp
index 4643f75da42d..ef139d3257d2 100644
--- a/lib/Analysis/TargetLibraryInfo.cpp
+++ b/lib/Analysis/TargetLibraryInfo.cpp
@@ -1,9 +1,8 @@
//===-- TargetLibraryInfo.cpp - Runtime library information ----------------==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -24,6 +23,8 @@ static cl::opt<TargetLibraryInfoImpl::VectorLibrary> ClVectorLibrary(
"No vector functions library"),
clEnumValN(TargetLibraryInfoImpl::Accelerate, "Accelerate",
"Accelerate framework"),
+ clEnumValN(TargetLibraryInfoImpl::MASSV, "MASSV",
+ "IBM MASS vector library"),
clEnumValN(TargetLibraryInfoImpl::SVML, "SVML",
"Intel SVML library")));
@@ -50,6 +51,16 @@ static bool hasSinCosPiStret(const Triple &T) {
return true;
}
+static bool hasBcmp(const Triple &TT) {
+ // Posix removed support from bcmp() in 2001, but the glibc and several
+ // implementations of the libc still have it.
+ if (TT.isOSLinux())
+ return TT.isGNUEnvironment() || TT.isMusl();
+ // Both NetBSD and OpenBSD are planning to remove the function. Windows does
+ // not have it.
+ return TT.isOSFreeBSD() || TT.isOSSolaris() || TT.isOSDarwin();
+}
+
/// Initialize the set of available library functions based on the specified
/// target triple. This should be carefully written so that a missing target
/// triple gets a sane set of defaults.
@@ -78,8 +89,8 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
ShouldSignExtI32Param = false;
// PowerPC64, Sparc64, SystemZ need signext/zeroext on i32 parameters and
// returns corresponding to C-level ints and unsigned ints.
- if (T.getArch() == Triple::ppc64 || T.getArch() == Triple::ppc64le ||
- T.getArch() == Triple::sparcv9 || T.getArch() == Triple::systemz) {
+ if (T.isPPC64() || T.getArch() == Triple::sparcv9 ||
+ T.getArch() == Triple::systemz) {
ShouldExtI32Param = true;
ShouldExtI32Return = true;
}
@@ -142,6 +153,9 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
TLI.setUnavailable(LibFunc_sincospif_stret);
}
+ if (!hasBcmp(T))
+ TLI.setUnavailable(LibFunc_bcmp);
+
if (T.isMacOSX() && T.getArch() == Triple::x86 &&
!T.isMacOSXVersionLT(10, 7)) {
// x86-32 OSX has a scheme where fwrite and fputs (and some other functions
@@ -153,33 +167,82 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
TLI.setAvailableWithName(LibFunc_fputs, "fputs$UNIX2003");
}
- // iprintf and friends are only available on XCore and TCE.
- if (T.getArch() != Triple::xcore && T.getArch() != Triple::tce) {
+ // iprintf and friends are only available on XCore, TCE, and Emscripten.
+ if (T.getArch() != Triple::xcore && T.getArch() != Triple::tce &&
+ T.getOS() != Triple::Emscripten) {
TLI.setUnavailable(LibFunc_iprintf);
TLI.setUnavailable(LibFunc_siprintf);
TLI.setUnavailable(LibFunc_fiprintf);
}
+ // __small_printf and friends are only available on Emscripten.
+ if (T.getOS() != Triple::Emscripten) {
+ TLI.setUnavailable(LibFunc_small_printf);
+ TLI.setUnavailable(LibFunc_small_sprintf);
+ TLI.setUnavailable(LibFunc_small_fprintf);
+ }
+
if (T.isOSWindows() && !T.isOSCygMing()) {
- // Win32 does not support long double
+ // XXX: The earliest documentation available at the moment is for VS2015/VC19:
+ // https://docs.microsoft.com/en-us/cpp/c-runtime-library/floating-point-support?view=vs-2015
+ // XXX: In order to use an MSVCRT older than VC19,
+ // the specific library version must be explicit in the target triple,
+ // e.g., x86_64-pc-windows-msvc18.
+ bool hasPartialC99 = true;
+ if (T.isKnownWindowsMSVCEnvironment()) {
+ unsigned Major, Minor, Micro;
+ T.getEnvironmentVersion(Major, Minor, Micro);
+ hasPartialC99 = (Major == 0 || Major >= 19);
+ }
+
+ // Latest targets support C89 math functions, in part.
+ bool isARM = (T.getArch() == Triple::aarch64 ||
+ T.getArch() == Triple::arm);
+ bool hasPartialFloat = (isARM ||
+ T.getArch() == Triple::x86_64);
+
+ // Win32 does not support float C89 math functions, in general.
+ if (!hasPartialFloat) {
+ TLI.setUnavailable(LibFunc_acosf);
+ TLI.setUnavailable(LibFunc_asinf);
+ TLI.setUnavailable(LibFunc_atan2f);
+ TLI.setUnavailable(LibFunc_atanf);
+ TLI.setUnavailable(LibFunc_ceilf);
+ TLI.setUnavailable(LibFunc_cosf);
+ TLI.setUnavailable(LibFunc_coshf);
+ TLI.setUnavailable(LibFunc_expf);
+ TLI.setUnavailable(LibFunc_floorf);
+ TLI.setUnavailable(LibFunc_fmodf);
+ TLI.setUnavailable(LibFunc_log10f);
+ TLI.setUnavailable(LibFunc_logf);
+ TLI.setUnavailable(LibFunc_modff);
+ TLI.setUnavailable(LibFunc_powf);
+ TLI.setUnavailable(LibFunc_sinf);
+ TLI.setUnavailable(LibFunc_sinhf);
+ TLI.setUnavailable(LibFunc_sqrtf);
+ TLI.setUnavailable(LibFunc_tanf);
+ TLI.setUnavailable(LibFunc_tanhf);
+ }
+ if (!isARM)
+ TLI.setUnavailable(LibFunc_fabsf);
+ TLI.setUnavailable(LibFunc_frexpf);
+ TLI.setUnavailable(LibFunc_ldexpf);
+
+ // Win32 does not support long double C89 math functions.
TLI.setUnavailable(LibFunc_acosl);
TLI.setUnavailable(LibFunc_asinl);
- TLI.setUnavailable(LibFunc_atanl);
TLI.setUnavailable(LibFunc_atan2l);
+ TLI.setUnavailable(LibFunc_atanl);
TLI.setUnavailable(LibFunc_ceill);
- TLI.setUnavailable(LibFunc_copysignl);
TLI.setUnavailable(LibFunc_cosl);
TLI.setUnavailable(LibFunc_coshl);
TLI.setUnavailable(LibFunc_expl);
- TLI.setUnavailable(LibFunc_fabsf); // Win32 and Win64 both lack fabsf
TLI.setUnavailable(LibFunc_fabsl);
TLI.setUnavailable(LibFunc_floorl);
- TLI.setUnavailable(LibFunc_fmaxl);
- TLI.setUnavailable(LibFunc_fminl);
TLI.setUnavailable(LibFunc_fmodl);
TLI.setUnavailable(LibFunc_frexpl);
- TLI.setUnavailable(LibFunc_ldexpf);
TLI.setUnavailable(LibFunc_ldexpl);
+ TLI.setUnavailable(LibFunc_log10l);
TLI.setUnavailable(LibFunc_logl);
TLI.setUnavailable(LibFunc_modfl);
TLI.setUnavailable(LibFunc_powl);
@@ -189,81 +252,66 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
TLI.setUnavailable(LibFunc_tanl);
TLI.setUnavailable(LibFunc_tanhl);
- // Win32 only has C89 math
- TLI.setUnavailable(LibFunc_acosh);
- TLI.setUnavailable(LibFunc_acoshf);
+ // Win32 does not fully support C99 math functions.
+ if (!hasPartialC99) {
+ TLI.setUnavailable(LibFunc_acosh);
+ TLI.setUnavailable(LibFunc_acoshf);
+ TLI.setUnavailable(LibFunc_asinh);
+ TLI.setUnavailable(LibFunc_asinhf);
+ TLI.setUnavailable(LibFunc_atanh);
+ TLI.setUnavailable(LibFunc_atanhf);
+ TLI.setAvailableWithName(LibFunc_cabs, "_cabs");
+ TLI.setUnavailable(LibFunc_cabsf);
+ TLI.setUnavailable(LibFunc_cbrt);
+ TLI.setUnavailable(LibFunc_cbrtf);
+ TLI.setAvailableWithName(LibFunc_copysign, "_copysign");
+ TLI.setAvailableWithName(LibFunc_copysignf, "_copysignf");
+ TLI.setUnavailable(LibFunc_exp2);
+ TLI.setUnavailable(LibFunc_exp2f);
+ TLI.setUnavailable(LibFunc_expm1);
+ TLI.setUnavailable(LibFunc_expm1f);
+ TLI.setUnavailable(LibFunc_fmax);
+ TLI.setUnavailable(LibFunc_fmaxf);
+ TLI.setUnavailable(LibFunc_fmin);
+ TLI.setUnavailable(LibFunc_fminf);
+ TLI.setUnavailable(LibFunc_log1p);
+ TLI.setUnavailable(LibFunc_log1pf);
+ TLI.setUnavailable(LibFunc_log2);
+ TLI.setUnavailable(LibFunc_log2f);
+ TLI.setAvailableWithName(LibFunc_logb, "_logb");
+ if (hasPartialFloat)
+ TLI.setAvailableWithName(LibFunc_logbf, "_logbf");
+ else
+ TLI.setUnavailable(LibFunc_logbf);
+ TLI.setUnavailable(LibFunc_rint);
+ TLI.setUnavailable(LibFunc_rintf);
+ TLI.setUnavailable(LibFunc_round);
+ TLI.setUnavailable(LibFunc_roundf);
+ TLI.setUnavailable(LibFunc_trunc);
+ TLI.setUnavailable(LibFunc_truncf);
+ }
+
+ // Win32 does not support long double C99 math functions.
TLI.setUnavailable(LibFunc_acoshl);
- TLI.setUnavailable(LibFunc_asinh);
- TLI.setUnavailable(LibFunc_asinhf);
TLI.setUnavailable(LibFunc_asinhl);
- TLI.setUnavailable(LibFunc_atanh);
- TLI.setUnavailable(LibFunc_atanhf);
TLI.setUnavailable(LibFunc_atanhl);
- TLI.setUnavailable(LibFunc_cabs);
- TLI.setUnavailable(LibFunc_cabsf);
TLI.setUnavailable(LibFunc_cabsl);
- TLI.setUnavailable(LibFunc_cbrt);
- TLI.setUnavailable(LibFunc_cbrtf);
TLI.setUnavailable(LibFunc_cbrtl);
- TLI.setUnavailable(LibFunc_exp2);
- TLI.setUnavailable(LibFunc_exp2f);
+ TLI.setUnavailable(LibFunc_copysignl);
TLI.setUnavailable(LibFunc_exp2l);
- TLI.setUnavailable(LibFunc_expm1);
- TLI.setUnavailable(LibFunc_expm1f);
TLI.setUnavailable(LibFunc_expm1l);
- TLI.setUnavailable(LibFunc_log2);
- TLI.setUnavailable(LibFunc_log2f);
- TLI.setUnavailable(LibFunc_log2l);
- TLI.setUnavailable(LibFunc_log1p);
- TLI.setUnavailable(LibFunc_log1pf);
+ TLI.setUnavailable(LibFunc_fmaxl);
+ TLI.setUnavailable(LibFunc_fminl);
TLI.setUnavailable(LibFunc_log1pl);
- TLI.setUnavailable(LibFunc_logb);
- TLI.setUnavailable(LibFunc_logbf);
+ TLI.setUnavailable(LibFunc_log2l);
TLI.setUnavailable(LibFunc_logbl);
- TLI.setUnavailable(LibFunc_nearbyint);
- TLI.setUnavailable(LibFunc_nearbyintf);
TLI.setUnavailable(LibFunc_nearbyintl);
- TLI.setUnavailable(LibFunc_rint);
- TLI.setUnavailable(LibFunc_rintf);
TLI.setUnavailable(LibFunc_rintl);
- TLI.setUnavailable(LibFunc_round);
- TLI.setUnavailable(LibFunc_roundf);
TLI.setUnavailable(LibFunc_roundl);
- TLI.setUnavailable(LibFunc_trunc);
- TLI.setUnavailable(LibFunc_truncf);
TLI.setUnavailable(LibFunc_truncl);
- // Win32 provides some C99 math with mangled names
- TLI.setAvailableWithName(LibFunc_copysign, "_copysign");
-
- if (T.getArch() == Triple::x86) {
- // Win32 on x86 implements single-precision math functions as macros
- TLI.setUnavailable(LibFunc_acosf);
- TLI.setUnavailable(LibFunc_asinf);
- TLI.setUnavailable(LibFunc_atanf);
- TLI.setUnavailable(LibFunc_atan2f);
- TLI.setUnavailable(LibFunc_ceilf);
- TLI.setUnavailable(LibFunc_copysignf);
- TLI.setUnavailable(LibFunc_cosf);
- TLI.setUnavailable(LibFunc_coshf);
- TLI.setUnavailable(LibFunc_expf);
- TLI.setUnavailable(LibFunc_floorf);
- TLI.setUnavailable(LibFunc_fminf);
- TLI.setUnavailable(LibFunc_fmaxf);
- TLI.setUnavailable(LibFunc_fmodf);
- TLI.setUnavailable(LibFunc_logf);
- TLI.setUnavailable(LibFunc_log10f);
- TLI.setUnavailable(LibFunc_modff);
- TLI.setUnavailable(LibFunc_powf);
- TLI.setUnavailable(LibFunc_sinf);
- TLI.setUnavailable(LibFunc_sinhf);
- TLI.setUnavailable(LibFunc_sqrtf);
- TLI.setUnavailable(LibFunc_tanf);
- TLI.setUnavailable(LibFunc_tanhf);
- }
-
- // Win32 does *not* provide these functions, but they are
- // generally available on POSIX-compliant systems:
+ // Win32 does not support these functions, but
+ // they are generally available on POSIX-compliant systems.
TLI.setUnavailable(LibFunc_access);
TLI.setUnavailable(LibFunc_bcmp);
TLI.setUnavailable(LibFunc_bcopy);
@@ -318,12 +366,6 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
TLI.setUnavailable(LibFunc_utime);
TLI.setUnavailable(LibFunc_utimes);
TLI.setUnavailable(LibFunc_write);
-
- // Win32 does *not* provide provide these functions, but they are
- // specified by C99:
- TLI.setUnavailable(LibFunc_atoll);
- TLI.setUnavailable(LibFunc_frexpf);
- TLI.setUnavailable(LibFunc_llabs);
}
switch (T.getOS()) {
@@ -651,11 +693,21 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
return ((NumParams == 2 || NumParams == 3) &&
FTy.getParamType(0)->isPointerTy() &&
FTy.getParamType(1)->isPointerTy());
+ case LibFunc_strcat_chk:
+ --NumParams;
+ if (!IsSizeTTy(FTy.getParamType(NumParams)))
+ return false;
+ LLVM_FALLTHROUGH;
case LibFunc_strcat:
return (NumParams == 2 && FTy.getReturnType()->isPointerTy() &&
FTy.getParamType(0) == FTy.getReturnType() &&
FTy.getParamType(1) == FTy.getReturnType());
+ case LibFunc_strncat_chk:
+ --NumParams;
+ if (!IsSizeTTy(FTy.getParamType(NumParams)))
+ return false;
+ LLVM_FALLTHROUGH;
case LibFunc_strncat:
return (NumParams == 3 && FTy.getReturnType()->isPointerTy() &&
FTy.getParamType(0) == FTy.getReturnType() &&
@@ -674,6 +726,19 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
FTy.getParamType(0) == FTy.getParamType(1) &&
FTy.getParamType(0) == PCharTy);
+ case LibFunc_strlcat_chk:
+ case LibFunc_strlcpy_chk:
+ --NumParams;
+ if (!IsSizeTTy(FTy.getParamType(NumParams)))
+ return false;
+ LLVM_FALLTHROUGH;
+ case LibFunc_strlcat:
+ case LibFunc_strlcpy:
+ return NumParams == 3 && IsSizeTTy(FTy.getReturnType()) &&
+ FTy.getParamType(0)->isPointerTy() &&
+ FTy.getParamType(1)->isPointerTy() &&
+ IsSizeTTy(FTy.getParamType(2));
+
case LibFunc_strncpy_chk:
case LibFunc_stpncpy_chk:
--NumParams;
@@ -739,14 +804,32 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
case LibFunc_stat:
case LibFunc_statvfs:
case LibFunc_siprintf:
+ case LibFunc_small_sprintf:
case LibFunc_sprintf:
return (NumParams >= 2 && FTy.getParamType(0)->isPointerTy() &&
FTy.getParamType(1)->isPointerTy() &&
FTy.getReturnType()->isIntegerTy(32));
+
+ case LibFunc_sprintf_chk:
+ return NumParams == 4 && FTy.getParamType(0)->isPointerTy() &&
+ FTy.getParamType(1)->isIntegerTy(32) &&
+ IsSizeTTy(FTy.getParamType(2)) &&
+ FTy.getParamType(3)->isPointerTy() &&
+ FTy.getReturnType()->isIntegerTy(32);
+
case LibFunc_snprintf:
return (NumParams == 3 && FTy.getParamType(0)->isPointerTy() &&
FTy.getParamType(2)->isPointerTy() &&
FTy.getReturnType()->isIntegerTy(32));
+
+ case LibFunc_snprintf_chk:
+ return NumParams == 5 && FTy.getParamType(0)->isPointerTy() &&
+ IsSizeTTy(FTy.getParamType(1)) &&
+ FTy.getParamType(2)->isIntegerTy(32) &&
+ IsSizeTTy(FTy.getParamType(3)) &&
+ FTy.getParamType(4)->isPointerTy() &&
+ FTy.getReturnType()->isIntegerTy(32);
+
case LibFunc_setitimer:
return (NumParams == 3 && FTy.getParamType(1)->isPointerTy() &&
FTy.getParamType(2)->isPointerTy());
@@ -795,6 +878,11 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
FTy.getParamType(1)->isIntegerTy() &&
IsSizeTTy(FTy.getParamType(2)));
+ case LibFunc_memccpy_chk:
+ --NumParams;
+ if (!IsSizeTTy(FTy.getParamType(NumParams)))
+ return false;
+ LLVM_FALLTHROUGH;
case LibFunc_memccpy:
return (NumParams >= 2 && FTy.getParamType(1)->isPointerTy());
case LibFunc_memalign:
@@ -836,6 +924,7 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
case LibFunc_getenv:
case LibFunc_getpwnam:
case LibFunc_iprintf:
+ case LibFunc_small_printf:
case LibFunc_pclose:
case LibFunc_perror:
case LibFunc_printf:
@@ -915,6 +1004,7 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
FTy.getParamType(1)->isPointerTy());
case LibFunc_fscanf:
case LibFunc_fiprintf:
+ case LibFunc_small_fprintf:
case LibFunc_fprintf:
return (NumParams >= 2 && FTy.getReturnType()->isIntegerTy() &&
FTy.getParamType(0)->isPointerTy() &&
@@ -961,9 +1051,17 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
case LibFunc_vsprintf:
return (NumParams == 3 && FTy.getParamType(0)->isPointerTy() &&
FTy.getParamType(1)->isPointerTy());
+ case LibFunc_vsprintf_chk:
+ return NumParams == 5 && FTy.getParamType(0)->isPointerTy() &&
+ FTy.getParamType(1)->isIntegerTy(32) &&
+ IsSizeTTy(FTy.getParamType(2)) && FTy.getParamType(3)->isPointerTy();
case LibFunc_vsnprintf:
return (NumParams == 4 && FTy.getParamType(0)->isPointerTy() &&
FTy.getParamType(2)->isPointerTy());
+ case LibFunc_vsnprintf_chk:
+ return NumParams == 6 && FTy.getParamType(0)->isPointerTy() &&
+ FTy.getParamType(2)->isIntegerTy(32) &&
+ IsSizeTTy(FTy.getParamType(3)) && FTy.getParamType(4)->isPointerTy();
case LibFunc_open:
return (NumParams >= 2 && FTy.getParamType(0)->isPointerTy());
case LibFunc_opendir:
@@ -1391,6 +1489,11 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
bool TargetLibraryInfoImpl::getLibFunc(const Function &FDecl,
LibFunc &F) const {
+ // Intrinsics don't overlap w/libcalls; if our module has a large number of
+ // intrinsics, this ends up being an interesting compile time win since we
+ // avoid string normalization and comparison.
+ if (FDecl.isIntrinsic()) return false;
+
const DataLayout *DL =
FDecl.getParent() ? &FDecl.getParent()->getDataLayout() : nullptr;
return getLibFunc(FDecl.getName(), F) &&
@@ -1430,151 +1533,24 @@ void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib(
switch (VecLib) {
case Accelerate: {
const VecDesc VecFuncs[] = {
- // Floating-Point Arithmetic and Auxiliary Functions
- {"ceilf", "vceilf", 4},
- {"fabsf", "vfabsf", 4},
- {"llvm.fabs.f32", "vfabsf", 4},
- {"floorf", "vfloorf", 4},
- {"sqrtf", "vsqrtf", 4},
- {"llvm.sqrt.f32", "vsqrtf", 4},
-
- // Exponential and Logarithmic Functions
- {"expf", "vexpf", 4},
- {"llvm.exp.f32", "vexpf", 4},
- {"expm1f", "vexpm1f", 4},
- {"logf", "vlogf", 4},
- {"llvm.log.f32", "vlogf", 4},
- {"log1pf", "vlog1pf", 4},
- {"log10f", "vlog10f", 4},
- {"llvm.log10.f32", "vlog10f", 4},
- {"logbf", "vlogbf", 4},
-
- // Trigonometric Functions
- {"sinf", "vsinf", 4},
- {"llvm.sin.f32", "vsinf", 4},
- {"cosf", "vcosf", 4},
- {"llvm.cos.f32", "vcosf", 4},
- {"tanf", "vtanf", 4},
- {"asinf", "vasinf", 4},
- {"acosf", "vacosf", 4},
- {"atanf", "vatanf", 4},
-
- // Hyperbolic Functions
- {"sinhf", "vsinhf", 4},
- {"coshf", "vcoshf", 4},
- {"tanhf", "vtanhf", 4},
- {"asinhf", "vasinhf", 4},
- {"acoshf", "vacoshf", 4},
- {"atanhf", "vatanhf", 4},
+ #define TLI_DEFINE_ACCELERATE_VECFUNCS
+ #include "llvm/Analysis/VecFuncs.def"
+ };
+ addVectorizableFunctions(VecFuncs);
+ break;
+ }
+ case MASSV: {
+ const VecDesc VecFuncs[] = {
+ #define TLI_DEFINE_MASSV_VECFUNCS
+ #include "llvm/Analysis/VecFuncs.def"
};
addVectorizableFunctions(VecFuncs);
break;
}
case SVML: {
const VecDesc VecFuncs[] = {
- {"sin", "__svml_sin2", 2},
- {"sin", "__svml_sin4", 4},
- {"sin", "__svml_sin8", 8},
-
- {"sinf", "__svml_sinf4", 4},
- {"sinf", "__svml_sinf8", 8},
- {"sinf", "__svml_sinf16", 16},
-
- {"llvm.sin.f64", "__svml_sin2", 2},
- {"llvm.sin.f64", "__svml_sin4", 4},
- {"llvm.sin.f64", "__svml_sin8", 8},
-
- {"llvm.sin.f32", "__svml_sinf4", 4},
- {"llvm.sin.f32", "__svml_sinf8", 8},
- {"llvm.sin.f32", "__svml_sinf16", 16},
-
- {"cos", "__svml_cos2", 2},
- {"cos", "__svml_cos4", 4},
- {"cos", "__svml_cos8", 8},
-
- {"cosf", "__svml_cosf4", 4},
- {"cosf", "__svml_cosf8", 8},
- {"cosf", "__svml_cosf16", 16},
-
- {"llvm.cos.f64", "__svml_cos2", 2},
- {"llvm.cos.f64", "__svml_cos4", 4},
- {"llvm.cos.f64", "__svml_cos8", 8},
-
- {"llvm.cos.f32", "__svml_cosf4", 4},
- {"llvm.cos.f32", "__svml_cosf8", 8},
- {"llvm.cos.f32", "__svml_cosf16", 16},
-
- {"pow", "__svml_pow2", 2},
- {"pow", "__svml_pow4", 4},
- {"pow", "__svml_pow8", 8},
-
- {"powf", "__svml_powf4", 4},
- {"powf", "__svml_powf8", 8},
- {"powf", "__svml_powf16", 16},
-
- { "__pow_finite", "__svml_pow2", 2 },
- { "__pow_finite", "__svml_pow4", 4 },
- { "__pow_finite", "__svml_pow8", 8 },
-
- { "__powf_finite", "__svml_powf4", 4 },
- { "__powf_finite", "__svml_powf8", 8 },
- { "__powf_finite", "__svml_powf16", 16 },
-
- {"llvm.pow.f64", "__svml_pow2", 2},
- {"llvm.pow.f64", "__svml_pow4", 4},
- {"llvm.pow.f64", "__svml_pow8", 8},
-
- {"llvm.pow.f32", "__svml_powf4", 4},
- {"llvm.pow.f32", "__svml_powf8", 8},
- {"llvm.pow.f32", "__svml_powf16", 16},
-
- {"exp", "__svml_exp2", 2},
- {"exp", "__svml_exp4", 4},
- {"exp", "__svml_exp8", 8},
-
- {"expf", "__svml_expf4", 4},
- {"expf", "__svml_expf8", 8},
- {"expf", "__svml_expf16", 16},
-
- { "__exp_finite", "__svml_exp2", 2 },
- { "__exp_finite", "__svml_exp4", 4 },
- { "__exp_finite", "__svml_exp8", 8 },
-
- { "__expf_finite", "__svml_expf4", 4 },
- { "__expf_finite", "__svml_expf8", 8 },
- { "__expf_finite", "__svml_expf16", 16 },
-
- {"llvm.exp.f64", "__svml_exp2", 2},
- {"llvm.exp.f64", "__svml_exp4", 4},
- {"llvm.exp.f64", "__svml_exp8", 8},
-
- {"llvm.exp.f32", "__svml_expf4", 4},
- {"llvm.exp.f32", "__svml_expf8", 8},
- {"llvm.exp.f32", "__svml_expf16", 16},
-
- {"log", "__svml_log2", 2},
- {"log", "__svml_log4", 4},
- {"log", "__svml_log8", 8},
-
- {"logf", "__svml_logf4", 4},
- {"logf", "__svml_logf8", 8},
- {"logf", "__svml_logf16", 16},
-
- { "__log_finite", "__svml_log2", 2 },
- { "__log_finite", "__svml_log4", 4 },
- { "__log_finite", "__svml_log8", 8 },
-
- { "__logf_finite", "__svml_logf4", 4 },
- { "__logf_finite", "__svml_logf8", 8 },
- { "__logf_finite", "__svml_logf16", 16 },
-
- {"llvm.log.f64", "__svml_log2", 2},
- {"llvm.log.f64", "__svml_log4", 4},
- {"llvm.log.f64", "__svml_log8", 8},
-
- {"llvm.log.f32", "__svml_logf4", 4},
- {"llvm.log.f32", "__svml_logf8", 8},
- {"llvm.log.f32", "__svml_logf16", 16},
+ #define TLI_DEFINE_SVML_VECFUNCS
+ #include "llvm/Analysis/VecFuncs.def"
};
addVectorizableFunctions(VecFuncs);
break;
@@ -1589,9 +1565,8 @@ bool TargetLibraryInfoImpl::isFunctionVectorizable(StringRef funcName) const {
if (funcName.empty())
return false;
- std::vector<VecDesc>::const_iterator I = std::lower_bound(
- VectorDescs.begin(), VectorDescs.end(), funcName,
- compareWithScalarFnName);
+ std::vector<VecDesc>::const_iterator I =
+ llvm::lower_bound(VectorDescs, funcName, compareWithScalarFnName);
return I != VectorDescs.end() && StringRef(I->ScalarFnName) == funcName;
}
@@ -1600,8 +1575,8 @@ StringRef TargetLibraryInfoImpl::getVectorizedFunction(StringRef F,
F = sanitizeFunctionName(F);
if (F.empty())
return F;
- std::vector<VecDesc>::const_iterator I = std::lower_bound(
- VectorDescs.begin(), VectorDescs.end(), F, compareWithScalarFnName);
+ std::vector<VecDesc>::const_iterator I =
+ llvm::lower_bound(VectorDescs, F, compareWithScalarFnName);
while (I != VectorDescs.end() && StringRef(I->ScalarFnName) == F) {
if (I->VectorizationFactor == VF)
return I->VectorFnName;
@@ -1616,8 +1591,8 @@ StringRef TargetLibraryInfoImpl::getScalarizedFunction(StringRef F,
if (F.empty())
return F;
- std::vector<VecDesc>::const_iterator I = std::lower_bound(
- ScalarDescs.begin(), ScalarDescs.end(), F, compareWithVectorFnName);
+ std::vector<VecDesc>::const_iterator I =
+ llvm::lower_bound(ScalarDescs, F, compareWithVectorFnName);
if (I == VectorDescs.end() || StringRef(I->VectorFnName) != F)
return StringRef();
VF = I->VectorizationFactor;
diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp
index 9151d46c6cce..eb04c34453fb 100644
--- a/lib/Analysis/TargetTransformInfo.cpp
+++ b/lib/Analysis/TargetTransformInfo.cpp
@@ -1,9 +1,8 @@
//===- llvm/Analysis/TargetTransformInfo.cpp ------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -19,6 +18,8 @@
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/LoopIterator.h"
#include <utility>
using namespace llvm;
@@ -41,6 +42,101 @@ struct NoTTIImpl : TargetTransformInfoImplCRTPBase<NoTTIImpl> {
};
}
+bool HardwareLoopInfo::canAnalyze(LoopInfo &LI) {
+ // If the loop has irreducible control flow, it can not be converted to
+ // Hardware loop.
+ LoopBlocksRPO RPOT(L);
+ RPOT.perform(&LI);
+ if (containsIrreducibleCFG<const BasicBlock *>(RPOT, LI))
+ return false;
+ return true;
+}
+
+bool HardwareLoopInfo::isHardwareLoopCandidate(ScalarEvolution &SE,
+ LoopInfo &LI, DominatorTree &DT,
+ bool ForceNestedLoop,
+ bool ForceHardwareLoopPHI) {
+ SmallVector<BasicBlock *, 4> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+
+ for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(),
+ IE = ExitingBlocks.end();
+ I != IE; ++I) {
+ BasicBlock *BB = *I;
+
+ // If we pass the updated counter back through a phi, we need to know
+ // which latch the updated value will be coming from.
+ if (!L->isLoopLatch(BB)) {
+ if (ForceHardwareLoopPHI || CounterInReg)
+ continue;
+ }
+
+ const SCEV *EC = SE.getExitCount(L, BB);
+ if (isa<SCEVCouldNotCompute>(EC))
+ continue;
+ if (const SCEVConstant *ConstEC = dyn_cast<SCEVConstant>(EC)) {
+ if (ConstEC->getValue()->isZero())
+ continue;
+ } else if (!SE.isLoopInvariant(EC, L))
+ continue;
+
+ if (SE.getTypeSizeInBits(EC->getType()) > CountType->getBitWidth())
+ continue;
+
+ // If this exiting block is contained in a nested loop, it is not eligible
+ // for insertion of the branch-and-decrement since the inner loop would
+ // end up messing up the value in the CTR.
+ if (!IsNestingLegal && LI.getLoopFor(BB) != L && !ForceNestedLoop)
+ continue;
+
+ // We now have a loop-invariant count of loop iterations (which is not the
+ // constant zero) for which we know that this loop will not exit via this
+ // existing block.
+
+ // We need to make sure that this block will run on every loop iteration.
+ // For this to be true, we must dominate all blocks with backedges. Such
+ // blocks are in-loop predecessors to the header block.
+ bool NotAlways = false;
+ for (pred_iterator PI = pred_begin(L->getHeader()),
+ PIE = pred_end(L->getHeader());
+ PI != PIE; ++PI) {
+ if (!L->contains(*PI))
+ continue;
+
+ if (!DT.dominates(*I, *PI)) {
+ NotAlways = true;
+ break;
+ }
+ }
+
+ if (NotAlways)
+ continue;
+
+ // Make sure this blocks ends with a conditional branch.
+ Instruction *TI = BB->getTerminator();
+ if (!TI)
+ continue;
+
+ if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+ if (!BI->isConditional())
+ continue;
+
+ ExitBranch = BI;
+ } else
+ continue;
+
+ // Note that this block may not be the loop latch block, even if the loop
+ // has a latch block.
+ ExitBlock = *I;
+ ExitCount = EC;
+ break;
+ }
+
+ if (!ExitBlock)
+ return false;
+ return true;
+}
+
TargetTransformInfo::TargetTransformInfo(const DataLayout &DL)
: TTIImpl(new Model<NoTTIImpl>(NoTTIImpl(DL))) {}
@@ -61,15 +157,17 @@ int TargetTransformInfo::getOperationCost(unsigned Opcode, Type *Ty,
return Cost;
}
-int TargetTransformInfo::getCallCost(FunctionType *FTy, int NumArgs) const {
- int Cost = TTIImpl->getCallCost(FTy, NumArgs);
+int TargetTransformInfo::getCallCost(FunctionType *FTy, int NumArgs,
+ const User *U) const {
+ int Cost = TTIImpl->getCallCost(FTy, NumArgs, U);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
int TargetTransformInfo::getCallCost(const Function *F,
- ArrayRef<const Value *> Arguments) const {
- int Cost = TTIImpl->getCallCost(F, Arguments);
+ ArrayRef<const Value *> Arguments,
+ const User *U) const {
+ int Cost = TTIImpl->getCallCost(F, Arguments, U);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
@@ -78,6 +176,10 @@ unsigned TargetTransformInfo::getInliningThresholdMultiplier() const {
return TTIImpl->getInliningThresholdMultiplier();
}
+int TargetTransformInfo::getInlinerVectorBonusPercent() const {
+ return TTIImpl->getInlinerVectorBonusPercent();
+}
+
int TargetTransformInfo::getGEPCost(Type *PointeeType, const Value *Ptr,
ArrayRef<const Value *> Operands) const {
return TTIImpl->getGEPCost(PointeeType, Ptr, Operands);
@@ -89,8 +191,9 @@ int TargetTransformInfo::getExtCost(const Instruction *I,
}
int TargetTransformInfo::getIntrinsicCost(
- Intrinsic::ID IID, Type *RetTy, ArrayRef<const Value *> Arguments) const {
- int Cost = TTIImpl->getIntrinsicCost(IID, RetTy, Arguments);
+ Intrinsic::ID IID, Type *RetTy, ArrayRef<const Value *> Arguments,
+ const User *U) const {
+ int Cost = TTIImpl->getIntrinsicCost(IID, RetTy, Arguments, U);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
@@ -128,6 +231,12 @@ bool TargetTransformInfo::isLoweredToCall(const Function *F) const {
return TTIImpl->isLoweredToCall(F);
}
+bool TargetTransformInfo::isHardwareLoopProfitable(
+ Loop *L, ScalarEvolution &SE, AssumptionCache &AC,
+ TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const {
+ return TTIImpl->isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
+}
+
void TargetTransformInfo::getUnrollingPreferences(
Loop *L, ScalarEvolution &SE, UnrollingPreferences &UP) const {
return TTIImpl->getUnrollingPreferences(L, SE, UP);
@@ -159,10 +268,21 @@ bool TargetTransformInfo::canMacroFuseCmp() const {
return TTIImpl->canMacroFuseCmp();
}
+bool TargetTransformInfo::canSaveCmp(Loop *L, BranchInst **BI,
+ ScalarEvolution *SE, LoopInfo *LI,
+ DominatorTree *DT, AssumptionCache *AC,
+ TargetLibraryInfo *LibInfo) const {
+ return TTIImpl->canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);
+}
+
bool TargetTransformInfo::shouldFavorPostInc() const {
return TTIImpl->shouldFavorPostInc();
}
+bool TargetTransformInfo::shouldFavorBackedgeIndex(const Loop *L) const {
+ return TTIImpl->shouldFavorBackedgeIndex(L);
+}
+
bool TargetTransformInfo::isLegalMaskedStore(Type *DataType) const {
return TTIImpl->isLegalMaskedStore(DataType);
}
@@ -171,6 +291,16 @@ bool TargetTransformInfo::isLegalMaskedLoad(Type *DataType) const {
return TTIImpl->isLegalMaskedLoad(DataType);
}
+bool TargetTransformInfo::isLegalNTStore(Type *DataType,
+ unsigned Alignment) const {
+ return TTIImpl->isLegalNTStore(DataType, Alignment);
+}
+
+bool TargetTransformInfo::isLegalNTLoad(Type *DataType,
+ unsigned Alignment) const {
+ return TTIImpl->isLegalNTLoad(DataType, Alignment);
+}
+
bool TargetTransformInfo::isLegalMaskedGather(Type *DataType) const {
return TTIImpl->isLegalMaskedGather(DataType);
}
@@ -179,6 +309,14 @@ bool TargetTransformInfo::isLegalMaskedScatter(Type *DataType) const {
return TTIImpl->isLegalMaskedScatter(DataType);
}
+bool TargetTransformInfo::isLegalMaskedCompressStore(Type *DataType) const {
+ return TTIImpl->isLegalMaskedCompressStore(DataType);
+}
+
+bool TargetTransformInfo::isLegalMaskedExpandLoad(Type *DataType) const {
+ return TTIImpl->isLegalMaskedExpandLoad(DataType);
+}
+
bool TargetTransformInfo::hasDivRemOp(Type *DataType, bool IsSigned) const {
return TTIImpl->hasDivRemOp(DataType, IsSigned);
}
@@ -259,9 +397,9 @@ bool TargetTransformInfo::enableAggressiveInterleaving(bool LoopHasReductions) c
return TTIImpl->enableAggressiveInterleaving(LoopHasReductions);
}
-const TargetTransformInfo::MemCmpExpansionOptions *
-TargetTransformInfo::enableMemCmpExpansion(bool IsZeroCmp) const {
- return TTIImpl->enableMemCmpExpansion(IsZeroCmp);
+TargetTransformInfo::MemCmpExpansionOptions
+TargetTransformInfo::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
+ return TTIImpl->enableMemCmpExpansion(OptSize, IsZeroCmp);
}
bool TargetTransformInfo::enableInterleavedAccessVectorization() const {
@@ -570,6 +708,12 @@ int TargetTransformInfo::getAddressComputationCost(Type *Tp,
return Cost;
}
+int TargetTransformInfo::getMemcpyCost(const Instruction *I) const {
+ int Cost = TTIImpl->getMemcpyCost(I);
+ assert(Cost >= 0 && "TTI should not produce negative costs!");
+ return Cost;
+}
+
int TargetTransformInfo::getArithmeticReductionCost(unsigned Opcode, Type *Ty,
bool IsPairwiseForm) const {
int Cost = TTIImpl->getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm);
@@ -688,6 +832,10 @@ bool TargetTransformInfo::shouldExpandReduction(const IntrinsicInst *II) const {
return TTIImpl->shouldExpandReduction(II);
}
+unsigned TargetTransformInfo::getGISelRematGlobalCost() const {
+ return TTIImpl->getGISelRematGlobalCost();
+}
+
int TargetTransformInfo::getInstructionLatency(const Instruction *I) const {
return TTIImpl->getInstructionLatency(I);
}
@@ -1023,6 +1171,16 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
return getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, Op2VK,
Op1VP, Op2VP, Operands);
}
+ case Instruction::FNeg: {
+ TargetTransformInfo::OperandValueKind Op1VK, Op2VK;
+ TargetTransformInfo::OperandValueProperties Op1VP, Op2VP;
+ Op1VK = getOperandInfo(I->getOperand(0), Op1VP);
+ Op2VK = OK_AnyValue;
+ Op2VP = OP_None;
+ SmallVector<const Value *, 2> Operands(I->operand_values());
+ return getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, Op2VK,
+ Op1VP, Op2VP, Operands);
+ }
case Instruction::Select: {
const SelectInst *SI = cast<SelectInst>(I);
Type *CondTy = SI->getCondition()->getType();
diff --git a/lib/Analysis/Trace.cpp b/lib/Analysis/Trace.cpp
index 4dec53151ed6..879c7172d038 100644
--- a/lib/Analysis/Trace.cpp
+++ b/lib/Analysis/Trace.cpp
@@ -1,9 +1,8 @@
//===- Trace.cpp - Implementation of Trace class --------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Analysis/TypeBasedAliasAnalysis.cpp b/lib/Analysis/TypeBasedAliasAnalysis.cpp
index 83974da30a54..3b9040aa0f52 100644
--- a/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -1,9 +1,8 @@
//===- TypeBasedAliasAnalysis.cpp - Type-Based Alias Analysis -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -368,26 +367,28 @@ static bool isStructPathTBAA(const MDNode *MD) {
}
AliasResult TypeBasedAAResult::alias(const MemoryLocation &LocA,
- const MemoryLocation &LocB) {
+ const MemoryLocation &LocB,
+ AAQueryInfo &AAQI) {
if (!EnableTBAA)
- return AAResultBase::alias(LocA, LocB);
+ return AAResultBase::alias(LocA, LocB, AAQI);
// If accesses may alias, chain to the next AliasAnalysis.
if (Aliases(LocA.AATags.TBAA, LocB.AATags.TBAA))
- return AAResultBase::alias(LocA, LocB);
+ return AAResultBase::alias(LocA, LocB, AAQI);
// Otherwise return a definitive result.
return NoAlias;
}
bool TypeBasedAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
+ AAQueryInfo &AAQI,
bool OrLocal) {
if (!EnableTBAA)
- return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
+ return AAResultBase::pointsToConstantMemory(Loc, AAQI, OrLocal);
const MDNode *M = Loc.AATags.TBAA;
if (!M)
- return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
+ return AAResultBase::pointsToConstantMemory(Loc, AAQI, OrLocal);
// If this is an "immutable" type, we can assume the pointer is pointing
// to constant memory.
@@ -395,7 +396,7 @@ bool TypeBasedAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
(isStructPathTBAA(M) && TBAAStructTagNode(M).isTypeImmutable()))
return true;
- return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
+ return AAResultBase::pointsToConstantMemory(Loc, AAQI, OrLocal);
}
FunctionModRefBehavior
@@ -421,29 +422,31 @@ FunctionModRefBehavior TypeBasedAAResult::getModRefBehavior(const Function *F) {
}
ModRefInfo TypeBasedAAResult::getModRefInfo(const CallBase *Call,
- const MemoryLocation &Loc) {
+ const MemoryLocation &Loc,
+ AAQueryInfo &AAQI) {
if (!EnableTBAA)
- return AAResultBase::getModRefInfo(Call, Loc);
+ return AAResultBase::getModRefInfo(Call, Loc, AAQI);
if (const MDNode *L = Loc.AATags.TBAA)
if (const MDNode *M = Call->getMetadata(LLVMContext::MD_tbaa))
if (!Aliases(L, M))
return ModRefInfo::NoModRef;
- return AAResultBase::getModRefInfo(Call, Loc);
+ return AAResultBase::getModRefInfo(Call, Loc, AAQI);
}
ModRefInfo TypeBasedAAResult::getModRefInfo(const CallBase *Call1,
- const CallBase *Call2) {
+ const CallBase *Call2,
+ AAQueryInfo &AAQI) {
if (!EnableTBAA)
- return AAResultBase::getModRefInfo(Call1, Call2);
+ return AAResultBase::getModRefInfo(Call1, Call2, AAQI);
if (const MDNode *M1 = Call1->getMetadata(LLVMContext::MD_tbaa))
if (const MDNode *M2 = Call2->getMetadata(LLVMContext::MD_tbaa))
if (!Aliases(M1, M2))
return ModRefInfo::NoModRef;
- return AAResultBase::getModRefInfo(Call1, Call2);
+ return AAResultBase::getModRefInfo(Call1, Call2, AAQI);
}
bool MDNode::isTBAAVtableAccess() const {
diff --git a/lib/Analysis/TypeMetadataUtils.cpp b/lib/Analysis/TypeMetadataUtils.cpp
index bd13a43b8d46..9311dfbc6eba 100644
--- a/lib/Analysis/TypeMetadataUtils.cpp
+++ b/lib/Analysis/TypeMetadataUtils.cpp
@@ -1,9 +1,8 @@
//===- TypeMetadataUtils.cpp - Utilities related to type metadata ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Analysis/ValueLattice.cpp b/lib/Analysis/ValueLattice.cpp
index 7de437ca480e..a0115a0eec36 100644
--- a/lib/Analysis/ValueLattice.cpp
+++ b/lib/Analysis/ValueLattice.cpp
@@ -1,9 +1,8 @@
//===- ValueLattice.cpp - Value constraint analysis -------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Analysis/ValueLatticeUtils.cpp b/lib/Analysis/ValueLatticeUtils.cpp
index 22c9de4fe94d..3f9287e26ce7 100644
--- a/lib/Analysis/ValueLatticeUtils.cpp
+++ b/lib/Analysis/ValueLatticeUtils.cpp
@@ -1,9 +1,8 @@
//===-- ValueLatticeUtils.cpp - Utils for solving lattices ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index 0446426c0e66..c70906dcc629 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -1,9 +1,8 @@
//===- ValueTracking.cpp - Walk computations to compute properties --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -39,7 +38,6 @@
#include "llvm/IR/Constant.h"
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
@@ -617,237 +615,242 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known,
if (Depth == MaxDepth)
continue;
+ ICmpInst *Cmp = dyn_cast<ICmpInst>(Arg);
+ if (!Cmp)
+ continue;
+
Value *A, *B;
- auto m_V = m_CombineOr(m_Specific(V),
- m_CombineOr(m_PtrToInt(m_Specific(V)),
- m_BitCast(m_Specific(V))));
+ auto m_V = m_CombineOr(m_Specific(V), m_PtrToInt(m_Specific(V)));
CmpInst::Predicate Pred;
uint64_t C;
- // assume(v = a)
- if (match(Arg, m_c_ICmp(Pred, m_V, m_Value(A))) &&
- Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
- KnownBits RHSKnown(BitWidth);
- computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
- Known.Zero |= RHSKnown.Zero;
- Known.One |= RHSKnown.One;
- // assume(v & b = a)
- } else if (match(Arg,
- m_c_ICmp(Pred, m_c_And(m_V, m_Value(B)), m_Value(A))) &&
- Pred == ICmpInst::ICMP_EQ &&
- isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
- KnownBits RHSKnown(BitWidth);
- computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
- KnownBits MaskKnown(BitWidth);
- computeKnownBits(B, MaskKnown, Depth+1, Query(Q, I));
-
- // For those bits in the mask that are known to be one, we can propagate
- // known bits from the RHS to V.
- Known.Zero |= RHSKnown.Zero & MaskKnown.One;
- Known.One |= RHSKnown.One & MaskKnown.One;
- // assume(~(v & b) = a)
- } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_And(m_V, m_Value(B))),
- m_Value(A))) &&
- Pred == ICmpInst::ICMP_EQ &&
- isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
- KnownBits RHSKnown(BitWidth);
- computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
- KnownBits MaskKnown(BitWidth);
- computeKnownBits(B, MaskKnown, Depth+1, Query(Q, I));
-
- // For those bits in the mask that are known to be one, we can propagate
- // inverted known bits from the RHS to V.
- Known.Zero |= RHSKnown.One & MaskKnown.One;
- Known.One |= RHSKnown.Zero & MaskKnown.One;
- // assume(v | b = a)
- } else if (match(Arg,
- m_c_ICmp(Pred, m_c_Or(m_V, m_Value(B)), m_Value(A))) &&
- Pred == ICmpInst::ICMP_EQ &&
- isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
- KnownBits RHSKnown(BitWidth);
- computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
- KnownBits BKnown(BitWidth);
- computeKnownBits(B, BKnown, Depth+1, Query(Q, I));
-
- // For those bits in B that are known to be zero, we can propagate known
- // bits from the RHS to V.
- Known.Zero |= RHSKnown.Zero & BKnown.Zero;
- Known.One |= RHSKnown.One & BKnown.Zero;
- // assume(~(v | b) = a)
- } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_Or(m_V, m_Value(B))),
- m_Value(A))) &&
- Pred == ICmpInst::ICMP_EQ &&
- isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
- KnownBits RHSKnown(BitWidth);
- computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
- KnownBits BKnown(BitWidth);
- computeKnownBits(B, BKnown, Depth+1, Query(Q, I));
-
- // For those bits in B that are known to be zero, we can propagate
- // inverted known bits from the RHS to V.
- Known.Zero |= RHSKnown.One & BKnown.Zero;
- Known.One |= RHSKnown.Zero & BKnown.Zero;
- // assume(v ^ b = a)
- } else if (match(Arg,
- m_c_ICmp(Pred, m_c_Xor(m_V, m_Value(B)), m_Value(A))) &&
- Pred == ICmpInst::ICMP_EQ &&
- isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
- KnownBits RHSKnown(BitWidth);
- computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
- KnownBits BKnown(BitWidth);
- computeKnownBits(B, BKnown, Depth+1, Query(Q, I));
-
- // For those bits in B that are known to be zero, we can propagate known
- // bits from the RHS to V. For those bits in B that are known to be one,
- // we can propagate inverted known bits from the RHS to V.
- Known.Zero |= RHSKnown.Zero & BKnown.Zero;
- Known.One |= RHSKnown.One & BKnown.Zero;
- Known.Zero |= RHSKnown.One & BKnown.One;
- Known.One |= RHSKnown.Zero & BKnown.One;
- // assume(~(v ^ b) = a)
- } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_Xor(m_V, m_Value(B))),
- m_Value(A))) &&
- Pred == ICmpInst::ICMP_EQ &&
- isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
- KnownBits RHSKnown(BitWidth);
- computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
- KnownBits BKnown(BitWidth);
- computeKnownBits(B, BKnown, Depth+1, Query(Q, I));
-
- // For those bits in B that are known to be zero, we can propagate
- // inverted known bits from the RHS to V. For those bits in B that are
- // known to be one, we can propagate known bits from the RHS to V.
- Known.Zero |= RHSKnown.One & BKnown.Zero;
- Known.One |= RHSKnown.Zero & BKnown.Zero;
- Known.Zero |= RHSKnown.Zero & BKnown.One;
- Known.One |= RHSKnown.One & BKnown.One;
- // assume(v << c = a)
- } else if (match(Arg, m_c_ICmp(Pred, m_Shl(m_V, m_ConstantInt(C)),
- m_Value(A))) &&
- Pred == ICmpInst::ICMP_EQ &&
- isValidAssumeForContext(I, Q.CxtI, Q.DT) &&
- C < BitWidth) {
- KnownBits RHSKnown(BitWidth);
- computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
- // For those bits in RHS that are known, we can propagate them to known
- // bits in V shifted to the right by C.
- RHSKnown.Zero.lshrInPlace(C);
- Known.Zero |= RHSKnown.Zero;
- RHSKnown.One.lshrInPlace(C);
- Known.One |= RHSKnown.One;
- // assume(~(v << c) = a)
- } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_Shl(m_V, m_ConstantInt(C))),
- m_Value(A))) &&
- Pred == ICmpInst::ICMP_EQ &&
- isValidAssumeForContext(I, Q.CxtI, Q.DT) &&
- C < BitWidth) {
- KnownBits RHSKnown(BitWidth);
- computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
- // For those bits in RHS that are known, we can propagate them inverted
- // to known bits in V shifted to the right by C.
- RHSKnown.One.lshrInPlace(C);
- Known.Zero |= RHSKnown.One;
- RHSKnown.Zero.lshrInPlace(C);
- Known.One |= RHSKnown.Zero;
- // assume(v >> c = a)
- } else if (match(Arg,
- m_c_ICmp(Pred, m_Shr(m_V, m_ConstantInt(C)),
- m_Value(A))) &&
- Pred == ICmpInst::ICMP_EQ &&
- isValidAssumeForContext(I, Q.CxtI, Q.DT) &&
- C < BitWidth) {
- KnownBits RHSKnown(BitWidth);
- computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
- // For those bits in RHS that are known, we can propagate them to known
- // bits in V shifted to the right by C.
- Known.Zero |= RHSKnown.Zero << C;
- Known.One |= RHSKnown.One << C;
- // assume(~(v >> c) = a)
- } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_Shr(m_V, m_ConstantInt(C))),
- m_Value(A))) &&
- Pred == ICmpInst::ICMP_EQ &&
- isValidAssumeForContext(I, Q.CxtI, Q.DT) &&
- C < BitWidth) {
- KnownBits RHSKnown(BitWidth);
- computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
- // For those bits in RHS that are known, we can propagate them inverted
- // to known bits in V shifted to the right by C.
- Known.Zero |= RHSKnown.One << C;
- Known.One |= RHSKnown.Zero << C;
- // assume(v >=_s c) where c is non-negative
- } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
- Pred == ICmpInst::ICMP_SGE &&
- isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
- KnownBits RHSKnown(BitWidth);
- computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
-
- if (RHSKnown.isNonNegative()) {
- // We know that the sign bit is zero.
- Known.makeNonNegative();
+ switch (Cmp->getPredicate()) {
+ default:
+ break;
+ case ICmpInst::ICMP_EQ:
+ // assume(v = a)
+ if (match(Cmp, m_c_ICmp(Pred, m_V, m_Value(A))) &&
+ isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
+ KnownBits RHSKnown(BitWidth);
+ computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
+ Known.Zero |= RHSKnown.Zero;
+ Known.One |= RHSKnown.One;
+ // assume(v & b = a)
+ } else if (match(Cmp,
+ m_c_ICmp(Pred, m_c_And(m_V, m_Value(B)), m_Value(A))) &&
+ isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
+ KnownBits RHSKnown(BitWidth);
+ computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
+ KnownBits MaskKnown(BitWidth);
+ computeKnownBits(B, MaskKnown, Depth+1, Query(Q, I));
+
+ // For those bits in the mask that are known to be one, we can propagate
+ // known bits from the RHS to V.
+ Known.Zero |= RHSKnown.Zero & MaskKnown.One;
+ Known.One |= RHSKnown.One & MaskKnown.One;
+ // assume(~(v & b) = a)
+ } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_c_And(m_V, m_Value(B))),
+ m_Value(A))) &&
+ isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
+ KnownBits RHSKnown(BitWidth);
+ computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
+ KnownBits MaskKnown(BitWidth);
+ computeKnownBits(B, MaskKnown, Depth+1, Query(Q, I));
+
+ // For those bits in the mask that are known to be one, we can propagate
+ // inverted known bits from the RHS to V.
+ Known.Zero |= RHSKnown.One & MaskKnown.One;
+ Known.One |= RHSKnown.Zero & MaskKnown.One;
+ // assume(v | b = a)
+ } else if (match(Cmp,
+ m_c_ICmp(Pred, m_c_Or(m_V, m_Value(B)), m_Value(A))) &&
+ isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
+ KnownBits RHSKnown(BitWidth);
+ computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
+ KnownBits BKnown(BitWidth);
+ computeKnownBits(B, BKnown, Depth+1, Query(Q, I));
+
+ // For those bits in B that are known to be zero, we can propagate known
+ // bits from the RHS to V.
+ Known.Zero |= RHSKnown.Zero & BKnown.Zero;
+ Known.One |= RHSKnown.One & BKnown.Zero;
+ // assume(~(v | b) = a)
+ } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_c_Or(m_V, m_Value(B))),
+ m_Value(A))) &&
+ isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
+ KnownBits RHSKnown(BitWidth);
+ computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
+ KnownBits BKnown(BitWidth);
+ computeKnownBits(B, BKnown, Depth+1, Query(Q, I));
+
+ // For those bits in B that are known to be zero, we can propagate
+ // inverted known bits from the RHS to V.
+ Known.Zero |= RHSKnown.One & BKnown.Zero;
+ Known.One |= RHSKnown.Zero & BKnown.Zero;
+ // assume(v ^ b = a)
+ } else if (match(Cmp,
+ m_c_ICmp(Pred, m_c_Xor(m_V, m_Value(B)), m_Value(A))) &&
+ isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
+ KnownBits RHSKnown(BitWidth);
+ computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
+ KnownBits BKnown(BitWidth);
+ computeKnownBits(B, BKnown, Depth+1, Query(Q, I));
+
+ // For those bits in B that are known to be zero, we can propagate known
+ // bits from the RHS to V. For those bits in B that are known to be one,
+ // we can propagate inverted known bits from the RHS to V.
+ Known.Zero |= RHSKnown.Zero & BKnown.Zero;
+ Known.One |= RHSKnown.One & BKnown.Zero;
+ Known.Zero |= RHSKnown.One & BKnown.One;
+ Known.One |= RHSKnown.Zero & BKnown.One;
+ // assume(~(v ^ b) = a)
+ } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_c_Xor(m_V, m_Value(B))),
+ m_Value(A))) &&
+ isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
+ KnownBits RHSKnown(BitWidth);
+ computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
+ KnownBits BKnown(BitWidth);
+ computeKnownBits(B, BKnown, Depth+1, Query(Q, I));
+
+ // For those bits in B that are known to be zero, we can propagate
+ // inverted known bits from the RHS to V. For those bits in B that are
+ // known to be one, we can propagate known bits from the RHS to V.
+ Known.Zero |= RHSKnown.One & BKnown.Zero;
+ Known.One |= RHSKnown.Zero & BKnown.Zero;
+ Known.Zero |= RHSKnown.Zero & BKnown.One;
+ Known.One |= RHSKnown.One & BKnown.One;
+ // assume(v << c = a)
+ } else if (match(Cmp, m_c_ICmp(Pred, m_Shl(m_V, m_ConstantInt(C)),
+ m_Value(A))) &&
+ isValidAssumeForContext(I, Q.CxtI, Q.DT) && C < BitWidth) {
+ KnownBits RHSKnown(BitWidth);
+ computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
+ // For those bits in RHS that are known, we can propagate them to known
+ // bits in V shifted to the right by C.
+ RHSKnown.Zero.lshrInPlace(C);
+ Known.Zero |= RHSKnown.Zero;
+ RHSKnown.One.lshrInPlace(C);
+ Known.One |= RHSKnown.One;
+ // assume(~(v << c) = a)
+ } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_Shl(m_V, m_ConstantInt(C))),
+ m_Value(A))) &&
+ isValidAssumeForContext(I, Q.CxtI, Q.DT) && C < BitWidth) {
+ KnownBits RHSKnown(BitWidth);
+ computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
+ // For those bits in RHS that are known, we can propagate them inverted
+ // to known bits in V shifted to the right by C.
+ RHSKnown.One.lshrInPlace(C);
+ Known.Zero |= RHSKnown.One;
+ RHSKnown.Zero.lshrInPlace(C);
+ Known.One |= RHSKnown.Zero;
+ // assume(v >> c = a)
+ } else if (match(Cmp, m_c_ICmp(Pred, m_Shr(m_V, m_ConstantInt(C)),
+ m_Value(A))) &&
+ isValidAssumeForContext(I, Q.CxtI, Q.DT) && C < BitWidth) {
+ KnownBits RHSKnown(BitWidth);
+ computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
+ // For those bits in RHS that are known, we can propagate them to known
+ // bits in V shifted to the right by C.
+ Known.Zero |= RHSKnown.Zero << C;
+ Known.One |= RHSKnown.One << C;
+ // assume(~(v >> c) = a)
+ } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_Shr(m_V, m_ConstantInt(C))),
+ m_Value(A))) &&
+ isValidAssumeForContext(I, Q.CxtI, Q.DT) && C < BitWidth) {
+ KnownBits RHSKnown(BitWidth);
+ computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
+ // For those bits in RHS that are known, we can propagate them inverted
+ // to known bits in V shifted to the right by C.
+ Known.Zero |= RHSKnown.One << C;
+ Known.One |= RHSKnown.Zero << C;
}
- // assume(v >_s c) where c is at least -1.
- } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
- Pred == ICmpInst::ICMP_SGT &&
- isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
- KnownBits RHSKnown(BitWidth);
- computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
-
- if (RHSKnown.isAllOnes() || RHSKnown.isNonNegative()) {
- // We know that the sign bit is zero.
- Known.makeNonNegative();
+ break;
+ case ICmpInst::ICMP_SGE:
+ // assume(v >=_s c) where c is non-negative
+ if (match(Cmp, m_ICmp(Pred, m_V, m_Value(A))) &&
+ isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
+ KnownBits RHSKnown(BitWidth);
+ computeKnownBits(A, RHSKnown, Depth + 1, Query(Q, I));
+
+ if (RHSKnown.isNonNegative()) {
+ // We know that the sign bit is zero.
+ Known.makeNonNegative();
+ }
}
- // assume(v <=_s c) where c is negative
- } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
- Pred == ICmpInst::ICMP_SLE &&
- isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
- KnownBits RHSKnown(BitWidth);
- computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
-
- if (RHSKnown.isNegative()) {
- // We know that the sign bit is one.
- Known.makeNegative();
+ break;
+ case ICmpInst::ICMP_SGT:
+ // assume(v >_s c) where c is at least -1.
+ if (match(Cmp, m_ICmp(Pred, m_V, m_Value(A))) &&
+ isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
+ KnownBits RHSKnown(BitWidth);
+ computeKnownBits(A, RHSKnown, Depth + 1, Query(Q, I));
+
+ if (RHSKnown.isAllOnes() || RHSKnown.isNonNegative()) {
+ // We know that the sign bit is zero.
+ Known.makeNonNegative();
+ }
}
- // assume(v <_s c) where c is non-positive
- } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
- Pred == ICmpInst::ICMP_SLT &&
- isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
- KnownBits RHSKnown(BitWidth);
- computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
-
- if (RHSKnown.isZero() || RHSKnown.isNegative()) {
- // We know that the sign bit is one.
- Known.makeNegative();
+ break;
+ case ICmpInst::ICMP_SLE:
+ // assume(v <=_s c) where c is negative
+ if (match(Cmp, m_ICmp(Pred, m_V, m_Value(A))) &&
+ isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
+ KnownBits RHSKnown(BitWidth);
+ computeKnownBits(A, RHSKnown, Depth + 1, Query(Q, I));
+
+ if (RHSKnown.isNegative()) {
+ // We know that the sign bit is one.
+ Known.makeNegative();
+ }
}
- // assume(v <=_u c)
- } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
- Pred == ICmpInst::ICMP_ULE &&
- isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
- KnownBits RHSKnown(BitWidth);
- computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
-
- // Whatever high bits in c are zero are known to be zero.
- Known.Zero.setHighBits(RHSKnown.countMinLeadingZeros());
- // assume(v <_u c)
- } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
- Pred == ICmpInst::ICMP_ULT &&
- isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
- KnownBits RHSKnown(BitWidth);
- computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
-
- // If the RHS is known zero, then this assumption must be wrong (nothing
- // is unsigned less than zero). Signal a conflict and get out of here.
- if (RHSKnown.isZero()) {
- Known.Zero.setAllBits();
- Known.One.setAllBits();
- break;
+ break;
+ case ICmpInst::ICMP_SLT:
+ // assume(v <_s c) where c is non-positive
+ if (match(Cmp, m_ICmp(Pred, m_V, m_Value(A))) &&
+ isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
+ KnownBits RHSKnown(BitWidth);
+ computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
+
+ if (RHSKnown.isZero() || RHSKnown.isNegative()) {
+ // We know that the sign bit is one.
+ Known.makeNegative();
+ }
}
-
- // Whatever high bits in c are zero are known to be zero (if c is a power
- // of 2, then one more).
- if (isKnownToBeAPowerOfTwo(A, false, Depth + 1, Query(Q, I)))
- Known.Zero.setHighBits(RHSKnown.countMinLeadingZeros() + 1);
- else
+ break;
+ case ICmpInst::ICMP_ULE:
+ // assume(v <=_u c)
+ if (match(Cmp, m_ICmp(Pred, m_V, m_Value(A))) &&
+ isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
+ KnownBits RHSKnown(BitWidth);
+ computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
+
+ // Whatever high bits in c are zero are known to be zero.
Known.Zero.setHighBits(RHSKnown.countMinLeadingZeros());
+ }
+ break;
+ case ICmpInst::ICMP_ULT:
+ // assume(v <_u c)
+ if (match(Cmp, m_ICmp(Pred, m_V, m_Value(A))) &&
+ isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
+ KnownBits RHSKnown(BitWidth);
+ computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
+
+ // If the RHS is known zero, then this assumption must be wrong (nothing
+ // is unsigned less than zero). Signal a conflict and get out of here.
+ if (RHSKnown.isZero()) {
+ Known.Zero.setAllBits();
+ Known.One.setAllBits();
+ break;
+ }
+
+ // Whatever high bits in c are zero are known to be zero (if c is a power
+ // of 2, then one more).
+ if (isKnownToBeAPowerOfTwo(A, false, Depth + 1, Query(Q, I)))
+ Known.Zero.setHighBits(RHSKnown.countMinLeadingZeros() + 1);
+ else
+ Known.Zero.setHighBits(RHSKnown.countMinLeadingZeros());
+ }
+ break;
}
}
@@ -1129,12 +1132,9 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
Q.DL.getTypeSizeInBits(ScalarTy);
assert(SrcBitWidth && "SrcBitWidth can't be zero");
- Known = Known.zextOrTrunc(SrcBitWidth);
+ Known = Known.zextOrTrunc(SrcBitWidth, false);
computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
- Known = Known.zextOrTrunc(BitWidth);
- // Any top bits are known to be zero.
- if (BitWidth > SrcBitWidth)
- Known.Zero.setBitsFrom(SrcBitWidth);
+ Known = Known.zextOrTrunc(BitWidth, true /* ExtendedBitsAreKnownZero */);
break;
}
case Instruction::BitCast: {
@@ -1527,6 +1527,37 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
Known2.One.shl(ShiftAmt) | Known3.One.lshr(BitWidth - ShiftAmt);
break;
}
+ case Intrinsic::uadd_sat:
+ case Intrinsic::usub_sat: {
+ bool IsAdd = II->getIntrinsicID() == Intrinsic::uadd_sat;
+ computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
+ computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
+
+ // Add: Leading ones of either operand are preserved.
+ // Sub: Leading zeros of LHS and leading ones of RHS are preserved
+ // as leading zeros in the result.
+ unsigned LeadingKnown;
+ if (IsAdd)
+ LeadingKnown = std::max(Known.countMinLeadingOnes(),
+ Known2.countMinLeadingOnes());
+ else
+ LeadingKnown = std::max(Known.countMinLeadingZeros(),
+ Known2.countMinLeadingOnes());
+
+ Known = KnownBits::computeForAddSub(
+ IsAdd, /* NSW */ false, Known, Known2);
+
+ // We select between the operation result and all-ones/zero
+ // respectively, so we can preserve known ones/zeros.
+ if (IsAdd) {
+ Known.One.setHighBits(LeadingKnown);
+ Known.Zero.clearAllBits();
+ } else {
+ Known.Zero.setHighBits(LeadingKnown);
+ Known.One.clearAllBits();
+ }
+ break;
+ }
case Intrinsic::x86_sse42_crc32_64_64:
Known.Zero.setBitsFrom(32);
break;
@@ -1967,6 +1998,15 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) {
// Must be non-zero due to null test above.
return true;
+ if (auto *CE = dyn_cast<ConstantExpr>(C)) {
+ // See the comment for IntToPtr/PtrToInt instructions below.
+ if (CE->getOpcode() == Instruction::IntToPtr ||
+ CE->getOpcode() == Instruction::PtrToInt)
+ if (Q.DL.getTypeSizeInBits(CE->getOperand(0)->getType()) <=
+ Q.DL.getTypeSizeInBits(CE->getType()))
+ return isKnownNonZero(CE->getOperand(0), Depth, Q);
+ }
+
// For constant vectors, check that all elements are undefined or known
// non-zero to determine that the whole vector is known non-zero.
if (auto *VecTy = dyn_cast<VectorType>(C->getType())) {
@@ -2037,11 +2077,33 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) {
if (isKnownNonNullFromDominatingCondition(V, Q.CxtI, Q.DT))
return true;
+ // Look through bitcast operations, GEPs, and int2ptr instructions as they
+ // do not alter the value, or at least not the nullness property of the
+ // value, e.g., int2ptr is allowed to zero/sign extend the value.
+ //
+ // Note that we have to take special care to avoid looking through
+ // truncating casts, e.g., int2ptr/ptr2int with appropriate sizes, as well
+ // as casts that can alter the value, e.g., AddrSpaceCasts.
if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V))
if (isGEPKnownNonNull(GEP, Depth, Q))
return true;
+
+ if (auto *BCO = dyn_cast<BitCastOperator>(V))
+ return isKnownNonZero(BCO->getOperand(0), Depth, Q);
+
+ if (auto *I2P = dyn_cast<IntToPtrInst>(V))
+ if (Q.DL.getTypeSizeInBits(I2P->getSrcTy()) <=
+ Q.DL.getTypeSizeInBits(I2P->getDestTy()))
+ return isKnownNonZero(I2P->getOperand(0), Depth, Q);
}
+ // Similar to int2ptr above, we can look through ptr2int here if the cast
+ // is a no-op or an extend and not a truncate.
+ if (auto *P2I = dyn_cast<PtrToIntInst>(V))
+ if (Q.DL.getTypeSizeInBits(P2I->getSrcTy()) <=
+ Q.DL.getTypeSizeInBits(P2I->getDestTy()))
+ return isKnownNonZero(P2I->getOperand(0), Depth, Q);
+
unsigned BitWidth = getBitWidth(V->getType()->getScalarType(), Q.DL);
// X | Y != 0 if X != 0 or Y != 0.
@@ -3082,6 +3144,11 @@ bool llvm::isKnownNeverNaN(const Value *V, const TargetLibraryInfo *TLI,
case Intrinsic::sqrt:
return isKnownNeverNaN(II->getArgOperand(0), TLI, Depth + 1) &&
CannotBeOrderedLessThanZero(II->getArgOperand(0), TLI);
+ case Intrinsic::minnum:
+ case Intrinsic::maxnum:
+ // If either operand is not NaN, the result is not NaN.
+ return isKnownNeverNaN(II->getArgOperand(0), TLI, Depth + 1) ||
+ isKnownNeverNaN(II->getArgOperand(1), TLI, Depth + 1);
default:
return false;
}
@@ -3107,7 +3174,7 @@ bool llvm::isKnownNeverNaN(const Value *V, const TargetLibraryInfo *TLI,
return true;
}
-Value *llvm::isBytewiseValue(Value *V) {
+Value *llvm::isBytewiseValue(Value *V, const DataLayout &DL) {
// All byte-wide stores are splatable, even of arbitrary variables.
if (V->getType()->isIntegerTy(8))
@@ -3120,6 +3187,10 @@ Value *llvm::isBytewiseValue(Value *V) {
if (isa<UndefValue>(V))
return UndefInt8;
+ const uint64_t Size = DL.getTypeStoreSize(V->getType());
+ if (!Size)
+ return UndefInt8;
+
Constant *C = dyn_cast<Constant>(V);
if (!C) {
// Conceptually, we could handle things like:
@@ -3146,7 +3217,8 @@ Value *llvm::isBytewiseValue(Value *V) {
else if (CFP->getType()->isDoubleTy())
Ty = Type::getInt64Ty(Ctx);
// Don't handle long double formats, which have strange constraints.
- return Ty ? isBytewiseValue(ConstantExpr::getBitCast(CFP, Ty)) : nullptr;
+ return Ty ? isBytewiseValue(ConstantExpr::getBitCast(CFP, Ty), DL)
+ : nullptr;
}
// We can handle constant integers that are multiple of 8 bits.
@@ -3159,6 +3231,17 @@ Value *llvm::isBytewiseValue(Value *V) {
}
}
+ if (auto *CE = dyn_cast<ConstantExpr>(C)) {
+ if (CE->getOpcode() == Instruction::IntToPtr) {
+ auto PS = DL.getPointerSizeInBits(
+ cast<PointerType>(CE->getType())->getAddressSpace());
+ return isBytewiseValue(
+ ConstantExpr::getIntegerCast(CE->getOperand(0),
+ Type::getIntNTy(Ctx, PS), false),
+ DL);
+ }
+ }
+
auto Merge = [&](Value *LHS, Value *RHS) -> Value * {
if (LHS == RHS)
return LHS;
@@ -3174,20 +3257,15 @@ Value *llvm::isBytewiseValue(Value *V) {
if (ConstantDataSequential *CA = dyn_cast<ConstantDataSequential>(C)) {
Value *Val = UndefInt8;
for (unsigned I = 0, E = CA->getNumElements(); I != E; ++I)
- if (!(Val = Merge(Val, isBytewiseValue(CA->getElementAsConstant(I)))))
+ if (!(Val = Merge(Val, isBytewiseValue(CA->getElementAsConstant(I), DL))))
return nullptr;
return Val;
}
- if (isa<ConstantVector>(C)) {
- Constant *Splat = cast<ConstantVector>(C)->getSplatValue();
- return Splat ? isBytewiseValue(Splat) : nullptr;
- }
-
- if (isa<ConstantArray>(C) || isa<ConstantStruct>(C)) {
+ if (isa<ConstantAggregate>(C)) {
Value *Val = UndefInt8;
for (unsigned I = 0, E = C->getNumOperands(); I != E; ++I)
- if (!(Val = Merge(Val, isBytewiseValue(C->getOperand(I)))))
+ if (!(Val = Merge(Val, isBytewiseValue(C->getOperand(I), DL))))
return nullptr;
return Val;
}
@@ -3363,57 +3441,6 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range,
return nullptr;
}
-/// Analyze the specified pointer to see if it can be expressed as a base
-/// pointer plus a constant offset. Return the base and offset to the caller.
-Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
- const DataLayout &DL) {
- unsigned BitWidth = DL.getIndexTypeSizeInBits(Ptr->getType());
- APInt ByteOffset(BitWidth, 0);
-
- // We walk up the defs but use a visited set to handle unreachable code. In
- // that case, we stop after accumulating the cycle once (not that it
- // matters).
- SmallPtrSet<Value *, 16> Visited;
- while (Visited.insert(Ptr).second) {
- if (Ptr->getType()->isVectorTy())
- break;
-
- if (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) {
- // If one of the values we have visited is an addrspacecast, then
- // the pointer type of this GEP may be different from the type
- // of the Ptr parameter which was passed to this function. This
- // means when we construct GEPOffset, we need to use the size
- // of GEP's pointer type rather than the size of the original
- // pointer type.
- APInt GEPOffset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0);
- if (!GEP->accumulateConstantOffset(DL, GEPOffset))
- break;
-
- APInt OrigByteOffset(ByteOffset);
- ByteOffset += GEPOffset.sextOrTrunc(ByteOffset.getBitWidth());
- if (ByteOffset.getMinSignedBits() > 64) {
- // Stop traversal if the pointer offset wouldn't fit into int64_t
- // (this should be removed if Offset is updated to an APInt)
- ByteOffset = OrigByteOffset;
- break;
- }
-
- Ptr = GEP->getPointerOperand();
- } else if (Operator::getOpcode(Ptr) == Instruction::BitCast ||
- Operator::getOpcode(Ptr) == Instruction::AddrSpaceCast) {
- Ptr = cast<Operator>(Ptr)->getOperand(0);
- } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(Ptr)) {
- if (GA->isInterposable())
- break;
- Ptr = GA->getAliasee();
- } else {
- break;
- }
- }
- Offset = ByteOffset.getSExtValue();
- return Ptr;
-}
-
bool llvm::isGEPBasedOnPointerToString(const GEPOperator *GEP,
unsigned CharSize) {
// Make sure the GEP has exactly three arguments.
@@ -3638,7 +3665,9 @@ const Value *llvm::getArgumentAliasingToReturnedPointer(const CallBase *Call) {
bool llvm::isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(
const CallBase *Call) {
return Call->getIntrinsicID() == Intrinsic::launder_invariant_group ||
- Call->getIntrinsicID() == Intrinsic::strip_invariant_group;
+ Call->getIntrinsicID() == Intrinsic::strip_invariant_group ||
+ Call->getIntrinsicID() == Intrinsic::aarch64_irg ||
+ Call->getIntrinsicID() == Intrinsic::aarch64_tagp;
}
/// \p PN defines a loop-variant pointer to an object. Check if the
@@ -3717,26 +3746,27 @@ Value *llvm::GetUnderlyingObject(Value *V, const DataLayout &DL,
return V;
}
-void llvm::GetUnderlyingObjects(Value *V, SmallVectorImpl<Value *> &Objects,
+void llvm::GetUnderlyingObjects(const Value *V,
+ SmallVectorImpl<const Value *> &Objects,
const DataLayout &DL, LoopInfo *LI,
unsigned MaxLookup) {
- SmallPtrSet<Value *, 4> Visited;
- SmallVector<Value *, 4> Worklist;
+ SmallPtrSet<const Value *, 4> Visited;
+ SmallVector<const Value *, 4> Worklist;
Worklist.push_back(V);
do {
- Value *P = Worklist.pop_back_val();
+ const Value *P = Worklist.pop_back_val();
P = GetUnderlyingObject(P, DL, MaxLookup);
if (!Visited.insert(P).second)
continue;
- if (SelectInst *SI = dyn_cast<SelectInst>(P)) {
+ if (auto *SI = dyn_cast<SelectInst>(P)) {
Worklist.push_back(SI->getTrueValue());
Worklist.push_back(SI->getFalseValue());
continue;
}
- if (PHINode *PN = dyn_cast<PHINode>(P)) {
+ if (auto *PN = dyn_cast<PHINode>(P)) {
// If this PHI changes the underlying object in every iteration of the
// loop, don't look through it. Consider:
// int **A;
@@ -3797,10 +3827,10 @@ bool llvm::getUnderlyingObjectsForCodeGen(const Value *V,
do {
V = Working.pop_back_val();
- SmallVector<Value *, 4> Objs;
- GetUnderlyingObjects(const_cast<Value *>(V), Objs, DL);
+ SmallVector<const Value *, 4> Objs;
+ GetUnderlyingObjects(V, Objs, DL);
- for (Value *V : Objs) {
+ for (const Value *V : Objs) {
if (!Visited.insert(V).second)
continue;
if (Operator::getOpcode(V) == Instruction::IntToPtr) {
@@ -3888,7 +3918,8 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V,
return false;
const DataLayout &DL = LI->getModule()->getDataLayout();
return isDereferenceableAndAlignedPointer(LI->getPointerOperand(),
- LI->getAlignment(), DL, CtxI, DT);
+ LI->getType(), LI->getAlignment(),
+ DL, CtxI, DT);
}
case Instruction::Call: {
auto *CI = cast<const CallInst>(Inst);
@@ -3901,6 +3932,7 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V,
case Instruction::VAArg:
case Instruction::Alloca:
case Instruction::Invoke:
+ case Instruction::CallBr:
case Instruction::PHI:
case Instruction::Store:
case Instruction::Ret:
@@ -3926,51 +3958,46 @@ bool llvm::mayBeMemoryDependent(const Instruction &I) {
return I.mayReadOrWriteMemory() || !isSafeToSpeculativelyExecute(&I);
}
+/// Convert ConstantRange OverflowResult into ValueTracking OverflowResult.
+static OverflowResult mapOverflowResult(ConstantRange::OverflowResult OR) {
+ switch (OR) {
+ case ConstantRange::OverflowResult::MayOverflow:
+ return OverflowResult::MayOverflow;
+ case ConstantRange::OverflowResult::AlwaysOverflowsLow:
+ return OverflowResult::AlwaysOverflowsLow;
+ case ConstantRange::OverflowResult::AlwaysOverflowsHigh:
+ return OverflowResult::AlwaysOverflowsHigh;
+ case ConstantRange::OverflowResult::NeverOverflows:
+ return OverflowResult::NeverOverflows;
+ }
+ llvm_unreachable("Unknown OverflowResult");
+}
+
+/// Combine constant ranges from computeConstantRange() and computeKnownBits().
+static ConstantRange computeConstantRangeIncludingKnownBits(
+ const Value *V, bool ForSigned, const DataLayout &DL, unsigned Depth,
+ AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT,
+ OptimizationRemarkEmitter *ORE = nullptr, bool UseInstrInfo = true) {
+ KnownBits Known = computeKnownBits(
+ V, DL, Depth, AC, CxtI, DT, ORE, UseInstrInfo);
+ ConstantRange CR1 = ConstantRange::fromKnownBits(Known, ForSigned);
+ ConstantRange CR2 = computeConstantRange(V, UseInstrInfo);
+ ConstantRange::PreferredRangeType RangeType =
+ ForSigned ? ConstantRange::Signed : ConstantRange::Unsigned;
+ return CR1.intersectWith(CR2, RangeType);
+}
+
OverflowResult llvm::computeOverflowForUnsignedMul(
const Value *LHS, const Value *RHS, const DataLayout &DL,
AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT,
bool UseInstrInfo) {
- // Multiplying n * m significant bits yields a result of n + m significant
- // bits. If the total number of significant bits does not exceed the
- // result bit width (minus 1), there is no overflow.
- // This means if we have enough leading zero bits in the operands
- // we can guarantee that the result does not overflow.
- // Ref: "Hacker's Delight" by Henry Warren
- unsigned BitWidth = LHS->getType()->getScalarSizeInBits();
- KnownBits LHSKnown(BitWidth);
- KnownBits RHSKnown(BitWidth);
- computeKnownBits(LHS, LHSKnown, DL, /*Depth=*/0, AC, CxtI, DT, nullptr,
- UseInstrInfo);
- computeKnownBits(RHS, RHSKnown, DL, /*Depth=*/0, AC, CxtI, DT, nullptr,
- UseInstrInfo);
- // Note that underestimating the number of zero bits gives a more
- // conservative answer.
- unsigned ZeroBits = LHSKnown.countMinLeadingZeros() +
- RHSKnown.countMinLeadingZeros();
- // First handle the easy case: if we have enough zero bits there's
- // definitely no overflow.
- if (ZeroBits >= BitWidth)
- return OverflowResult::NeverOverflows;
-
- // Get the largest possible values for each operand.
- APInt LHSMax = ~LHSKnown.Zero;
- APInt RHSMax = ~RHSKnown.Zero;
-
- // We know the multiply operation doesn't overflow if the maximum values for
- // each operand will not overflow after we multiply them together.
- bool MaxOverflow;
- (void)LHSMax.umul_ov(RHSMax, MaxOverflow);
- if (!MaxOverflow)
- return OverflowResult::NeverOverflows;
-
- // We know it always overflows if multiplying the smallest possible values for
- // the operands also results in overflow.
- bool MinOverflow;
- (void)LHSKnown.One.umul_ov(RHSKnown.One, MinOverflow);
- if (MinOverflow)
- return OverflowResult::AlwaysOverflows;
-
- return OverflowResult::MayOverflow;
+ KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT,
+ nullptr, UseInstrInfo);
+ KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT,
+ nullptr, UseInstrInfo);
+ ConstantRange LHSRange = ConstantRange::fromKnownBits(LHSKnown, false);
+ ConstantRange RHSRange = ConstantRange::fromKnownBits(RHSKnown, false);
+ return mapOverflowResult(LHSRange.unsignedMulMayOverflow(RHSRange));
}
OverflowResult
@@ -4020,69 +4047,13 @@ OverflowResult llvm::computeOverflowForUnsignedAdd(
const Value *LHS, const Value *RHS, const DataLayout &DL,
AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT,
bool UseInstrInfo) {
- KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT,
- nullptr, UseInstrInfo);
- if (LHSKnown.isNonNegative() || LHSKnown.isNegative()) {
- KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT,
- nullptr, UseInstrInfo);
-
- if (LHSKnown.isNegative() && RHSKnown.isNegative()) {
- // The sign bit is set in both cases: this MUST overflow.
- return OverflowResult::AlwaysOverflows;
- }
-
- if (LHSKnown.isNonNegative() && RHSKnown.isNonNegative()) {
- // The sign bit is clear in both cases: this CANNOT overflow.
- return OverflowResult::NeverOverflows;
- }
- }
-
- return OverflowResult::MayOverflow;
-}
-
-/// Return true if we can prove that adding the two values of the
-/// knownbits will not overflow.
-/// Otherwise return false.
-static bool checkRippleForSignedAdd(const KnownBits &LHSKnown,
- const KnownBits &RHSKnown) {
- // Addition of two 2's complement numbers having opposite signs will never
- // overflow.
- if ((LHSKnown.isNegative() && RHSKnown.isNonNegative()) ||
- (LHSKnown.isNonNegative() && RHSKnown.isNegative()))
- return true;
-
- // If either of the values is known to be non-negative, adding them can only
- // overflow if the second is also non-negative, so we can assume that.
- // Two non-negative numbers will only overflow if there is a carry to the
- // sign bit, so we can check if even when the values are as big as possible
- // there is no overflow to the sign bit.
- if (LHSKnown.isNonNegative() || RHSKnown.isNonNegative()) {
- APInt MaxLHS = ~LHSKnown.Zero;
- MaxLHS.clearSignBit();
- APInt MaxRHS = ~RHSKnown.Zero;
- MaxRHS.clearSignBit();
- APInt Result = std::move(MaxLHS) + std::move(MaxRHS);
- return Result.isSignBitClear();
- }
-
- // If either of the values is known to be negative, adding them can only
- // overflow if the second is also negative, so we can assume that.
- // Two negative number will only overflow if there is no carry to the sign
- // bit, so we can check if even when the values are as small as possible
- // there is overflow to the sign bit.
- if (LHSKnown.isNegative() || RHSKnown.isNegative()) {
- APInt MinLHS = LHSKnown.One;
- MinLHS.clearSignBit();
- APInt MinRHS = RHSKnown.One;
- MinRHS.clearSignBit();
- APInt Result = std::move(MinLHS) + std::move(MinRHS);
- return Result.isSignBitSet();
- }
-
- // If we reached here it means that we know nothing about the sign bits.
- // In this case we can't know if there will be an overflow, since by
- // changing the sign bits any two values can be made to overflow.
- return false;
+ ConstantRange LHSRange = computeConstantRangeIncludingKnownBits(
+ LHS, /*ForSigned=*/false, DL, /*Depth=*/0, AC, CxtI, DT,
+ nullptr, UseInstrInfo);
+ ConstantRange RHSRange = computeConstantRangeIncludingKnownBits(
+ RHS, /*ForSigned=*/false, DL, /*Depth=*/0, AC, CxtI, DT,
+ nullptr, UseInstrInfo);
+ return mapOverflowResult(LHSRange.unsignedAddMayOverflow(RHSRange));
}
static OverflowResult computeOverflowForSignedAdd(const Value *LHS,
@@ -4114,30 +4085,35 @@ static OverflowResult computeOverflowForSignedAdd(const Value *LHS,
ComputeNumSignBits(RHS, DL, 0, AC, CxtI, DT) > 1)
return OverflowResult::NeverOverflows;
- KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT);
- KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT);
-
- if (checkRippleForSignedAdd(LHSKnown, RHSKnown))
- return OverflowResult::NeverOverflows;
+ ConstantRange LHSRange = computeConstantRangeIncludingKnownBits(
+ LHS, /*ForSigned=*/true, DL, /*Depth=*/0, AC, CxtI, DT);
+ ConstantRange RHSRange = computeConstantRangeIncludingKnownBits(
+ RHS, /*ForSigned=*/true, DL, /*Depth=*/0, AC, CxtI, DT);
+ OverflowResult OR =
+ mapOverflowResult(LHSRange.signedAddMayOverflow(RHSRange));
+ if (OR != OverflowResult::MayOverflow)
+ return OR;
// The remaining code needs Add to be available. Early returns if not so.
if (!Add)
return OverflowResult::MayOverflow;
// If the sign of Add is the same as at least one of the operands, this add
- // CANNOT overflow. This is particularly useful when the sum is
- // @llvm.assume'ed non-negative rather than proved so from analyzing its
- // operands.
+ // CANNOT overflow. If this can be determined from the known bits of the
+ // operands the above signedAddMayOverflow() check will have already done so.
+ // The only other way to improve on the known bits is from an assumption, so
+ // call computeKnownBitsFromAssume() directly.
bool LHSOrRHSKnownNonNegative =
- (LHSKnown.isNonNegative() || RHSKnown.isNonNegative());
+ (LHSRange.isAllNonNegative() || RHSRange.isAllNonNegative());
bool LHSOrRHSKnownNegative =
- (LHSKnown.isNegative() || RHSKnown.isNegative());
+ (LHSRange.isAllNegative() || RHSRange.isAllNegative());
if (LHSOrRHSKnownNonNegative || LHSOrRHSKnownNegative) {
- KnownBits AddKnown = computeKnownBits(Add, DL, /*Depth=*/0, AC, CxtI, DT);
+ KnownBits AddKnown(LHSRange.getBitWidth());
+ computeKnownBitsFromAssume(
+ Add, AddKnown, /*Depth=*/0, Query(DL, AC, CxtI, DT, true));
if ((AddKnown.isNonNegative() && LHSOrRHSKnownNonNegative) ||
- (AddKnown.isNegative() && LHSOrRHSKnownNegative)) {
+ (AddKnown.isNegative() && LHSOrRHSKnownNegative))
return OverflowResult::NeverOverflows;
- }
}
return OverflowResult::MayOverflow;
@@ -4149,20 +4125,11 @@ OverflowResult llvm::computeOverflowForUnsignedSub(const Value *LHS,
AssumptionCache *AC,
const Instruction *CxtI,
const DominatorTree *DT) {
- KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT);
- if (LHSKnown.isNonNegative() || LHSKnown.isNegative()) {
- KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT);
-
- // If the LHS is negative and the RHS is non-negative, no unsigned wrap.
- if (LHSKnown.isNegative() && RHSKnown.isNonNegative())
- return OverflowResult::NeverOverflows;
-
- // If the LHS is non-negative and the RHS negative, we always wrap.
- if (LHSKnown.isNonNegative() && RHSKnown.isNegative())
- return OverflowResult::AlwaysOverflows;
- }
-
- return OverflowResult::MayOverflow;
+ ConstantRange LHSRange = computeConstantRangeIncludingKnownBits(
+ LHS, /*ForSigned=*/false, DL, /*Depth=*/0, AC, CxtI, DT);
+ ConstantRange RHSRange = computeConstantRangeIncludingKnownBits(
+ RHS, /*ForSigned=*/false, DL, /*Depth=*/0, AC, CxtI, DT);
+ return mapOverflowResult(LHSRange.unsignedSubMayOverflow(RHSRange));
}
OverflowResult llvm::computeOverflowForSignedSub(const Value *LHS,
@@ -4177,37 +4144,19 @@ OverflowResult llvm::computeOverflowForSignedSub(const Value *LHS,
ComputeNumSignBits(RHS, DL, 0, AC, CxtI, DT) > 1)
return OverflowResult::NeverOverflows;
- KnownBits LHSKnown = computeKnownBits(LHS, DL, 0, AC, CxtI, DT);
-
- KnownBits RHSKnown = computeKnownBits(RHS, DL, 0, AC, CxtI, DT);
-
- // Subtraction of two 2's complement numbers having identical signs will
- // never overflow.
- if ((LHSKnown.isNegative() && RHSKnown.isNegative()) ||
- (LHSKnown.isNonNegative() && RHSKnown.isNonNegative()))
- return OverflowResult::NeverOverflows;
-
- // TODO: implement logic similar to checkRippleForAdd
- return OverflowResult::MayOverflow;
+ ConstantRange LHSRange = computeConstantRangeIncludingKnownBits(
+ LHS, /*ForSigned=*/true, DL, /*Depth=*/0, AC, CxtI, DT);
+ ConstantRange RHSRange = computeConstantRangeIncludingKnownBits(
+ RHS, /*ForSigned=*/true, DL, /*Depth=*/0, AC, CxtI, DT);
+ return mapOverflowResult(LHSRange.signedSubMayOverflow(RHSRange));
}
-bool llvm::isOverflowIntrinsicNoWrap(const IntrinsicInst *II,
+bool llvm::isOverflowIntrinsicNoWrap(const WithOverflowInst *WO,
const DominatorTree &DT) {
-#ifndef NDEBUG
- auto IID = II->getIntrinsicID();
- assert((IID == Intrinsic::sadd_with_overflow ||
- IID == Intrinsic::uadd_with_overflow ||
- IID == Intrinsic::ssub_with_overflow ||
- IID == Intrinsic::usub_with_overflow ||
- IID == Intrinsic::smul_with_overflow ||
- IID == Intrinsic::umul_with_overflow) &&
- "Not an overflow intrinsic!");
-#endif
-
SmallVector<const BranchInst *, 2> GuardingBranches;
SmallVector<const ExtractValueInst *, 2> Results;
- for (const User *U : II->users()) {
+ for (const User *U : WO->users()) {
if (const auto *EVI = dyn_cast<ExtractValueInst>(U)) {
assert(EVI->getNumIndices() == 1 && "Obvious from CI's type");
@@ -4307,6 +4256,11 @@ bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) {
if (!CS.doesNotThrow())
return false;
+ // A function which doens't throw and has "willreturn" attribute will
+ // always return.
+ if (CS.hasFnAttr(Attribute::WillReturn))
+ return true;
+
// Non-throwing call sites can loop infinitely, call exit/pthread_exit
// etc. and thus not return. However, LLVM already assumes that
//
@@ -4325,7 +4279,8 @@ bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) {
// is guaranteed to return.
return CS.onlyReadsMemory() || CS.onlyAccessesArgMemory() ||
match(I, m_Intrinsic<Intrinsic::assume>()) ||
- match(I, m_Intrinsic<Intrinsic::sideeffect>());
+ match(I, m_Intrinsic<Intrinsic::sideeffect>()) ||
+ match(I, m_Intrinsic<Intrinsic::experimental_widenable_condition>());
}
// Other instructions return normally.
@@ -4333,7 +4288,7 @@ bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) {
}
bool llvm::isGuaranteedToTransferExecutionToSuccessor(const BasicBlock *BB) {
- // TODO: This is slightly consdervative for invoke instruction since exiting
+ // TODO: This is slightly conservative for invoke instruction since exiting
// via an exception *is* normal control for them.
for (auto I = BB->begin(), E = BB->end(); I != E; ++I)
if (!isGuaranteedToTransferExecutionToSuccessor(&*I))
@@ -4357,6 +4312,8 @@ bool llvm::isGuaranteedToExecuteForEveryIteration(const Instruction *I,
}
bool llvm::propagatesFullPoison(const Instruction *I) {
+ // TODO: This should include all instructions apart from phis, selects and
+ // call-like instructions.
switch (I->getOpcode()) {
case Instruction::Add:
case Instruction::Sub:
@@ -4409,10 +4366,21 @@ const Value *llvm::getGuaranteedNonFullPoisonOp(const Instruction *I) {
return I->getOperand(1);
default:
+ // Note: It's really tempting to think that a conditional branch or
+ // switch should be listed here, but that's incorrect. It's not
+ // branching off of poison which is UB, it is executing a side effecting
+ // instruction which follows the branch.
return nullptr;
}
}
+bool llvm::mustTriggerUB(const Instruction *I,
+ const SmallSet<const Value *, 16>& KnownPoison) {
+ auto *NotPoison = getGuaranteedNonFullPoisonOp(I);
+ return (NotPoison && KnownPoison.count(NotPoison));
+}
+
+
bool llvm::programUndefinedIfFullPoison(const Instruction *PoisonI) {
// We currently only look for uses of poison values within the same basic
// block, as that makes it easier to guarantee that the uses will be
@@ -4436,8 +4404,7 @@ bool llvm::programUndefinedIfFullPoison(const Instruction *PoisonI) {
while (Iter++ < MaxDepth) {
for (auto &I : make_range(Begin, End)) {
if (&I != PoisonI) {
- const Value *NotPoison = getGuaranteedNonFullPoisonOp(&I);
- if (NotPoison != nullptr && YieldsPoison.count(NotPoison))
+ if (mustTriggerUB(&I, YieldsPoison))
return true;
if (!isGuaranteedToTransferExecutionToSuccessor(&I))
return false;
@@ -4926,6 +4893,10 @@ static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred,
if (Pred == ICmpInst::ICMP_SGT && match(CmpRHS, ZeroOrAllOnes))
return {SPF_ABS, SPNB_NA, false};
+ // (X >=s 0) ? X : -X or (X >=s 1) ? X : -X --> ABS(X)
+ if (Pred == ICmpInst::ICMP_SGE && match(CmpRHS, ZeroOrOne))
+ return {SPF_ABS, SPNB_NA, false};
+
// (X <s 0) ? X : -X or (X <s 1) ? X : -X --> NABS(X)
// (-X <s 0) ? -X : X or (-X <s 1) ? -X : X --> NABS(X)
if (Pred == ICmpInst::ICMP_SLT && match(CmpRHS, ZeroOrOne))
@@ -5084,11 +5055,19 @@ SelectPatternResult llvm::matchSelectPattern(Value *V, Value *&LHS, Value *&RHS,
CmpInst *CmpI = dyn_cast<CmpInst>(SI->getCondition());
if (!CmpI) return {SPF_UNKNOWN, SPNB_NA, false};
+ Value *TrueVal = SI->getTrueValue();
+ Value *FalseVal = SI->getFalseValue();
+
+ return llvm::matchDecomposedSelectPattern(CmpI, TrueVal, FalseVal, LHS, RHS,
+ CastOp, Depth);
+}
+
+SelectPatternResult llvm::matchDecomposedSelectPattern(
+ CmpInst *CmpI, Value *TrueVal, Value *FalseVal, Value *&LHS, Value *&RHS,
+ Instruction::CastOps *CastOp, unsigned Depth) {
CmpInst::Predicate Pred = CmpI->getPredicate();
Value *CmpLHS = CmpI->getOperand(0);
Value *CmpRHS = CmpI->getOperand(1);
- Value *TrueVal = SI->getTrueValue();
- Value *FalseVal = SI->getFalseValue();
FastMathFlags FMF;
if (isa<FPMathOperator>(CmpI))
FMF = CmpI->getFastMathFlags();
@@ -5430,3 +5409,298 @@ Optional<bool> llvm::isImpliedByDomCondition(const Value *Cond,
bool CondIsTrue = TrueBB == ContextBB;
return isImpliedCondition(PredCond, Cond, DL, CondIsTrue);
}
+
+static void setLimitsForBinOp(const BinaryOperator &BO, APInt &Lower,
+ APInt &Upper, const InstrInfoQuery &IIQ) {
+ unsigned Width = Lower.getBitWidth();
+ const APInt *C;
+ switch (BO.getOpcode()) {
+ case Instruction::Add:
+ if (match(BO.getOperand(1), m_APInt(C)) && !C->isNullValue()) {
+ // FIXME: If we have both nuw and nsw, we should reduce the range further.
+ if (IIQ.hasNoUnsignedWrap(cast<OverflowingBinaryOperator>(&BO))) {
+ // 'add nuw x, C' produces [C, UINT_MAX].
+ Lower = *C;
+ } else if (IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(&BO))) {
+ if (C->isNegative()) {
+ // 'add nsw x, -C' produces [SINT_MIN, SINT_MAX - C].
+ Lower = APInt::getSignedMinValue(Width);
+ Upper = APInt::getSignedMaxValue(Width) + *C + 1;
+ } else {
+ // 'add nsw x, +C' produces [SINT_MIN + C, SINT_MAX].
+ Lower = APInt::getSignedMinValue(Width) + *C;
+ Upper = APInt::getSignedMaxValue(Width) + 1;
+ }
+ }
+ }
+ break;
+
+ case Instruction::And:
+ if (match(BO.getOperand(1), m_APInt(C)))
+ // 'and x, C' produces [0, C].
+ Upper = *C + 1;
+ break;
+
+ case Instruction::Or:
+ if (match(BO.getOperand(1), m_APInt(C)))
+ // 'or x, C' produces [C, UINT_MAX].
+ Lower = *C;
+ break;
+
+ case Instruction::AShr:
+ if (match(BO.getOperand(1), m_APInt(C)) && C->ult(Width)) {
+ // 'ashr x, C' produces [INT_MIN >> C, INT_MAX >> C].
+ Lower = APInt::getSignedMinValue(Width).ashr(*C);
+ Upper = APInt::getSignedMaxValue(Width).ashr(*C) + 1;
+ } else if (match(BO.getOperand(0), m_APInt(C))) {
+ unsigned ShiftAmount = Width - 1;
+ if (!C->isNullValue() && IIQ.isExact(&BO))
+ ShiftAmount = C->countTrailingZeros();
+ if (C->isNegative()) {
+ // 'ashr C, x' produces [C, C >> (Width-1)]
+ Lower = *C;
+ Upper = C->ashr(ShiftAmount) + 1;
+ } else {
+ // 'ashr C, x' produces [C >> (Width-1), C]
+ Lower = C->ashr(ShiftAmount);
+ Upper = *C + 1;
+ }
+ }
+ break;
+
+ case Instruction::LShr:
+ if (match(BO.getOperand(1), m_APInt(C)) && C->ult(Width)) {
+ // 'lshr x, C' produces [0, UINT_MAX >> C].
+ Upper = APInt::getAllOnesValue(Width).lshr(*C) + 1;
+ } else if (match(BO.getOperand(0), m_APInt(C))) {
+ // 'lshr C, x' produces [C >> (Width-1), C].
+ unsigned ShiftAmount = Width - 1;
+ if (!C->isNullValue() && IIQ.isExact(&BO))
+ ShiftAmount = C->countTrailingZeros();
+ Lower = C->lshr(ShiftAmount);
+ Upper = *C + 1;
+ }
+ break;
+
+ case Instruction::Shl:
+ if (match(BO.getOperand(0), m_APInt(C))) {
+ if (IIQ.hasNoUnsignedWrap(&BO)) {
+ // 'shl nuw C, x' produces [C, C << CLZ(C)]
+ Lower = *C;
+ Upper = Lower.shl(Lower.countLeadingZeros()) + 1;
+ } else if (BO.hasNoSignedWrap()) { // TODO: What if both nuw+nsw?
+ if (C->isNegative()) {
+ // 'shl nsw C, x' produces [C << CLO(C)-1, C]
+ unsigned ShiftAmount = C->countLeadingOnes() - 1;
+ Lower = C->shl(ShiftAmount);
+ Upper = *C + 1;
+ } else {
+ // 'shl nsw C, x' produces [C, C << CLZ(C)-1]
+ unsigned ShiftAmount = C->countLeadingZeros() - 1;
+ Lower = *C;
+ Upper = C->shl(ShiftAmount) + 1;
+ }
+ }
+ }
+ break;
+
+ case Instruction::SDiv:
+ if (match(BO.getOperand(1), m_APInt(C))) {
+ APInt IntMin = APInt::getSignedMinValue(Width);
+ APInt IntMax = APInt::getSignedMaxValue(Width);
+ if (C->isAllOnesValue()) {
+ // 'sdiv x, -1' produces [INT_MIN + 1, INT_MAX]
+ // where C != -1 and C != 0 and C != 1
+ Lower = IntMin + 1;
+ Upper = IntMax + 1;
+ } else if (C->countLeadingZeros() < Width - 1) {
+ // 'sdiv x, C' produces [INT_MIN / C, INT_MAX / C]
+ // where C != -1 and C != 0 and C != 1
+ Lower = IntMin.sdiv(*C);
+ Upper = IntMax.sdiv(*C);
+ if (Lower.sgt(Upper))
+ std::swap(Lower, Upper);
+ Upper = Upper + 1;
+ assert(Upper != Lower && "Upper part of range has wrapped!");
+ }
+ } else if (match(BO.getOperand(0), m_APInt(C))) {
+ if (C->isMinSignedValue()) {
+ // 'sdiv INT_MIN, x' produces [INT_MIN, INT_MIN / -2].
+ Lower = *C;
+ Upper = Lower.lshr(1) + 1;
+ } else {
+ // 'sdiv C, x' produces [-|C|, |C|].
+ Upper = C->abs() + 1;
+ Lower = (-Upper) + 1;
+ }
+ }
+ break;
+
+ case Instruction::UDiv:
+ if (match(BO.getOperand(1), m_APInt(C)) && !C->isNullValue()) {
+ // 'udiv x, C' produces [0, UINT_MAX / C].
+ Upper = APInt::getMaxValue(Width).udiv(*C) + 1;
+ } else if (match(BO.getOperand(0), m_APInt(C))) {
+ // 'udiv C, x' produces [0, C].
+ Upper = *C + 1;
+ }
+ break;
+
+ case Instruction::SRem:
+ if (match(BO.getOperand(1), m_APInt(C))) {
+ // 'srem x, C' produces (-|C|, |C|).
+ Upper = C->abs();
+ Lower = (-Upper) + 1;
+ }
+ break;
+
+ case Instruction::URem:
+ if (match(BO.getOperand(1), m_APInt(C)))
+ // 'urem x, C' produces [0, C).
+ Upper = *C;
+ break;
+
+ default:
+ break;
+ }
+}
+
+static void setLimitsForIntrinsic(const IntrinsicInst &II, APInt &Lower,
+ APInt &Upper) {
+ unsigned Width = Lower.getBitWidth();
+ const APInt *C;
+ switch (II.getIntrinsicID()) {
+ case Intrinsic::uadd_sat:
+ // uadd.sat(x, C) produces [C, UINT_MAX].
+ if (match(II.getOperand(0), m_APInt(C)) ||
+ match(II.getOperand(1), m_APInt(C)))
+ Lower = *C;
+ break;
+ case Intrinsic::sadd_sat:
+ if (match(II.getOperand(0), m_APInt(C)) ||
+ match(II.getOperand(1), m_APInt(C))) {
+ if (C->isNegative()) {
+ // sadd.sat(x, -C) produces [SINT_MIN, SINT_MAX + (-C)].
+ Lower = APInt::getSignedMinValue(Width);
+ Upper = APInt::getSignedMaxValue(Width) + *C + 1;
+ } else {
+ // sadd.sat(x, +C) produces [SINT_MIN + C, SINT_MAX].
+ Lower = APInt::getSignedMinValue(Width) + *C;
+ Upper = APInt::getSignedMaxValue(Width) + 1;
+ }
+ }
+ break;
+ case Intrinsic::usub_sat:
+ // usub.sat(C, x) produces [0, C].
+ if (match(II.getOperand(0), m_APInt(C)))
+ Upper = *C + 1;
+ // usub.sat(x, C) produces [0, UINT_MAX - C].
+ else if (match(II.getOperand(1), m_APInt(C)))
+ Upper = APInt::getMaxValue(Width) - *C + 1;
+ break;
+ case Intrinsic::ssub_sat:
+ if (match(II.getOperand(0), m_APInt(C))) {
+ if (C->isNegative()) {
+ // ssub.sat(-C, x) produces [SINT_MIN, -SINT_MIN + (-C)].
+ Lower = APInt::getSignedMinValue(Width);
+ Upper = *C - APInt::getSignedMinValue(Width) + 1;
+ } else {
+ // ssub.sat(+C, x) produces [-SINT_MAX + C, SINT_MAX].
+ Lower = *C - APInt::getSignedMaxValue(Width);
+ Upper = APInt::getSignedMaxValue(Width) + 1;
+ }
+ } else if (match(II.getOperand(1), m_APInt(C))) {
+ if (C->isNegative()) {
+ // ssub.sat(x, -C) produces [SINT_MIN - (-C), SINT_MAX]:
+ Lower = APInt::getSignedMinValue(Width) - *C;
+ Upper = APInt::getSignedMaxValue(Width) + 1;
+ } else {
+ // ssub.sat(x, +C) produces [SINT_MIN, SINT_MAX - C].
+ Lower = APInt::getSignedMinValue(Width);
+ Upper = APInt::getSignedMaxValue(Width) - *C + 1;
+ }
+ }
+ break;
+ default:
+ break;
+ }
+}
+
+static void setLimitsForSelectPattern(const SelectInst &SI, APInt &Lower,
+ APInt &Upper) {
+ const Value *LHS, *RHS;
+ SelectPatternResult R = matchSelectPattern(&SI, LHS, RHS);
+ if (R.Flavor == SPF_UNKNOWN)
+ return;
+
+ unsigned BitWidth = SI.getType()->getScalarSizeInBits();
+
+ if (R.Flavor == SelectPatternFlavor::SPF_ABS) {
+ // If the negation part of the abs (in RHS) has the NSW flag,
+ // then the result of abs(X) is [0..SIGNED_MAX],
+ // otherwise it is [0..SIGNED_MIN], as -SIGNED_MIN == SIGNED_MIN.
+ Lower = APInt::getNullValue(BitWidth);
+ if (cast<Instruction>(RHS)->hasNoSignedWrap())
+ Upper = APInt::getSignedMaxValue(BitWidth) + 1;
+ else
+ Upper = APInt::getSignedMinValue(BitWidth) + 1;
+ return;
+ }
+
+ if (R.Flavor == SelectPatternFlavor::SPF_NABS) {
+ // The result of -abs(X) is <= 0.
+ Lower = APInt::getSignedMinValue(BitWidth);
+ Upper = APInt(BitWidth, 1);
+ return;
+ }
+
+ const APInt *C;
+ if (!match(LHS, m_APInt(C)) && !match(RHS, m_APInt(C)))
+ return;
+
+ switch (R.Flavor) {
+ case SPF_UMIN:
+ Upper = *C + 1;
+ break;
+ case SPF_UMAX:
+ Lower = *C;
+ break;
+ case SPF_SMIN:
+ Lower = APInt::getSignedMinValue(BitWidth);
+ Upper = *C + 1;
+ break;
+ case SPF_SMAX:
+ Lower = *C;
+ Upper = APInt::getSignedMaxValue(BitWidth) + 1;
+ break;
+ default:
+ break;
+ }
+}
+
+ConstantRange llvm::computeConstantRange(const Value *V, bool UseInstrInfo) {
+ assert(V->getType()->isIntOrIntVectorTy() && "Expected integer instruction");
+
+ const APInt *C;
+ if (match(V, m_APInt(C)))
+ return ConstantRange(*C);
+
+ InstrInfoQuery IIQ(UseInstrInfo);
+ unsigned BitWidth = V->getType()->getScalarSizeInBits();
+ APInt Lower = APInt(BitWidth, 0);
+ APInt Upper = APInt(BitWidth, 0);
+ if (auto *BO = dyn_cast<BinaryOperator>(V))
+ setLimitsForBinOp(*BO, Lower, Upper, IIQ);
+ else if (auto *II = dyn_cast<IntrinsicInst>(V))
+ setLimitsForIntrinsic(*II, Lower, Upper);
+ else if (auto *SI = dyn_cast<SelectInst>(V))
+ setLimitsForSelectPattern(*SI, Lower, Upper);
+
+ ConstantRange CR = ConstantRange::getNonEmpty(Lower, Upper);
+
+ if (auto *I = dyn_cast<Instruction>(V))
+ if (auto *Range = IIQ.getMetadata(I, LLVMContext::MD_range))
+ CR = CR.intersectWith(getConstantRangeFromMetadata(*Range));
+
+ return CR;
+}
diff --git a/lib/Analysis/VectorUtils.cpp b/lib/Analysis/VectorUtils.cpp
index 5656a19d7e0d..986756eb2627 100644
--- a/lib/Analysis/VectorUtils.cpp
+++ b/lib/Analysis/VectorUtils.cpp
@@ -1,9 +1,8 @@
//===----------- VectorUtils.cpp - Vectorizer utility functions -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -38,8 +37,9 @@ static cl::opt<unsigned> MaxInterleaveGroupFactor(
cl::init(8));
/// Return true if all of the intrinsic's arguments and return type are scalars
-/// for the scalar form of the intrinsic and vectors for the vector form of the
-/// intrinsic.
+/// for the scalar form of the intrinsic, and vectors for the vector form of the
+/// intrinsic (except operands that are marked as always being scalar by
+/// hasVectorInstrinsicScalarOpd).
bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
switch (ID) {
case Intrinsic::bswap: // Begin integer bit-manipulation.
@@ -49,6 +49,13 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
case Intrinsic::cttz:
case Intrinsic::fshl:
case Intrinsic::fshr:
+ case Intrinsic::sadd_sat:
+ case Intrinsic::ssub_sat:
+ case Intrinsic::uadd_sat:
+ case Intrinsic::usub_sat:
+ case Intrinsic::smul_fix:
+ case Intrinsic::smul_fix_sat:
+ case Intrinsic::umul_fix:
case Intrinsic::sqrt: // Begin floating-point.
case Intrinsic::sin:
case Intrinsic::cos:
@@ -74,18 +81,13 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
case Intrinsic::fmuladd:
case Intrinsic::powi:
case Intrinsic::canonicalize:
- case Intrinsic::sadd_sat:
- case Intrinsic::ssub_sat:
- case Intrinsic::uadd_sat:
- case Intrinsic::usub_sat:
return true;
default:
return false;
}
}
-/// Identifies if the intrinsic has a scalar operand. It check for
-/// ctlz,cttz and powi special intrinsics whose argument is scalar.
+/// Identifies if the vector form of the intrinsic has a scalar operand.
bool llvm::hasVectorInstrinsicScalarOpd(Intrinsic::ID ID,
unsigned ScalarOpdIdx) {
switch (ID) {
@@ -93,6 +95,10 @@ bool llvm::hasVectorInstrinsicScalarOpd(Intrinsic::ID ID,
case Intrinsic::cttz:
case Intrinsic::powi:
return (ScalarOpdIdx == 1);
+ case Intrinsic::smul_fix:
+ case Intrinsic::smul_fix_sat:
+ case Intrinsic::umul_fix:
+ return (ScalarOpdIdx == 2);
default:
return false;
}
@@ -300,30 +306,60 @@ Value *llvm::findScalarElement(Value *V, unsigned EltNo) {
/// Get splat value if the input is a splat vector or return nullptr.
/// This function is not fully general. It checks only 2 cases:
-/// the input value is (1) a splat constants vector or (2) a sequence
-/// of instructions that broadcast a single value into a vector.
-///
+/// the input value is (1) a splat constant vector or (2) a sequence
+/// of instructions that broadcasts a scalar at element 0.
const llvm::Value *llvm::getSplatValue(const Value *V) {
-
- if (auto *C = dyn_cast<Constant>(V))
- if (isa<VectorType>(V->getType()))
+ if (isa<VectorType>(V->getType()))
+ if (auto *C = dyn_cast<Constant>(V))
return C->getSplatValue();
- auto *ShuffleInst = dyn_cast<ShuffleVectorInst>(V);
- if (!ShuffleInst)
- return nullptr;
- // All-zero (or undef) shuffle mask elements.
- for (int MaskElt : ShuffleInst->getShuffleMask())
- if (MaskElt != 0 && MaskElt != -1)
- return nullptr;
- // The first shuffle source is 'insertelement' with index 0.
- auto *InsertEltInst =
- dyn_cast<InsertElementInst>(ShuffleInst->getOperand(0));
- if (!InsertEltInst || !isa<ConstantInt>(InsertEltInst->getOperand(2)) ||
- !cast<ConstantInt>(InsertEltInst->getOperand(2))->isZero())
- return nullptr;
+ // shuf (inselt ?, Splat, 0), ?, <0, undef, 0, ...>
+ Value *Splat;
+ if (match(V, m_ShuffleVector(m_InsertElement(m_Value(), m_Value(Splat),
+ m_ZeroInt()),
+ m_Value(), m_ZeroInt())))
+ return Splat;
- return InsertEltInst->getOperand(1);
+ return nullptr;
+}
+
+// This setting is based on its counterpart in value tracking, but it could be
+// adjusted if needed.
+const unsigned MaxDepth = 6;
+
+bool llvm::isSplatValue(const Value *V, unsigned Depth) {
+ assert(Depth <= MaxDepth && "Limit Search Depth");
+
+ if (isa<VectorType>(V->getType())) {
+ if (isa<UndefValue>(V))
+ return true;
+ // FIXME: Constant splat analysis does not allow undef elements.
+ if (auto *C = dyn_cast<Constant>(V))
+ return C->getSplatValue() != nullptr;
+ }
+
+ // FIXME: Constant splat analysis does not allow undef elements.
+ Constant *Mask;
+ if (match(V, m_ShuffleVector(m_Value(), m_Value(), m_Constant(Mask))))
+ return Mask->getSplatValue() != nullptr;
+
+ // The remaining tests are all recursive, so bail out if we hit the limit.
+ if (Depth++ == MaxDepth)
+ return false;
+
+ // If both operands of a binop are splats, the result is a splat.
+ Value *X, *Y, *Z;
+ if (match(V, m_BinOp(m_Value(X), m_Value(Y))))
+ return isSplatValue(X, Depth) && isSplatValue(Y, Depth);
+
+ // If all operands of a select are splats, the result is a splat.
+ if (match(V, m_Select(m_Value(X), m_Value(Y), m_Value(Z))))
+ return isSplatValue(X, Depth) && isSplatValue(Y, Depth) &&
+ isSplatValue(Z, Depth);
+
+ // TODO: Add support for unary ops (fneg), casts, intrinsics (overflow ops).
+
+ return false;
}
MapVector<Instruction *, uint64_t>
@@ -711,6 +747,52 @@ Value *llvm::concatenateVectors(IRBuilder<> &Builder, ArrayRef<Value *> Vecs) {
return ResList[0];
}
+bool llvm::maskIsAllZeroOrUndef(Value *Mask) {
+ auto *ConstMask = dyn_cast<Constant>(Mask);
+ if (!ConstMask)
+ return false;
+ if (ConstMask->isNullValue() || isa<UndefValue>(ConstMask))
+ return true;
+ for (unsigned I = 0, E = ConstMask->getType()->getVectorNumElements(); I != E;
+ ++I) {
+ if (auto *MaskElt = ConstMask->getAggregateElement(I))
+ if (MaskElt->isNullValue() || isa<UndefValue>(MaskElt))
+ continue;
+ return false;
+ }
+ return true;
+}
+
+
+bool llvm::maskIsAllOneOrUndef(Value *Mask) {
+ auto *ConstMask = dyn_cast<Constant>(Mask);
+ if (!ConstMask)
+ return false;
+ if (ConstMask->isAllOnesValue() || isa<UndefValue>(ConstMask))
+ return true;
+ for (unsigned I = 0, E = ConstMask->getType()->getVectorNumElements(); I != E;
+ ++I) {
+ if (auto *MaskElt = ConstMask->getAggregateElement(I))
+ if (MaskElt->isAllOnesValue() || isa<UndefValue>(MaskElt))
+ continue;
+ return false;
+ }
+ return true;
+}
+
+/// TODO: This is a lot like known bits, but for
+/// vectors. Is there something we can common this with?
+APInt llvm::possiblyDemandedEltsInMask(Value *Mask) {
+
+ const unsigned VWidth = cast<VectorType>(Mask->getType())->getNumElements();
+ APInt DemandedElts = APInt::getAllOnesValue(VWidth);
+ if (auto *CV = dyn_cast<ConstantVector>(Mask))
+ for (unsigned i = 0; i < VWidth; i++)
+ if (CV->getAggregateElement(i)->isNullValue())
+ DemandedElts.clearBit(i);
+ return DemandedElts;
+}
+
bool InterleavedAccessInfo::isStrided(int Stride) {
unsigned Factor = std::abs(Stride);
return Factor >= 2 && Factor <= MaxInterleaveGroupFactor;
@@ -992,7 +1074,7 @@ void InterleavedAccessInfo::analyzeInterleaving(
// that all the pointers in the group don't wrap.
// So we check only group member 0 (which is always guaranteed to exist),
// and group member Factor - 1; If the latter doesn't exist we rely on
- // peeling (if it is a non-reveresed accsess -- see Case 3).
+ // peeling (if it is a non-reversed accsess -- see Case 3).
Value *FirstMemberPtr = getLoadStorePointerOperand(Group->getMember(0));
if (!getPtrStride(PSE, FirstMemberPtr, TheLoop, Strides, /*Assume=*/false,
/*ShouldCheckWrap=*/true)) {
diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp
index eab7ec819536..72d2357c2933 100644
--- a/lib/AsmParser/LLLexer.cpp
+++ b/lib/AsmParser/LLLexer.cpp
@@ -1,9 +1,8 @@
//===- LLLexer.cpp - Lexer for .ll Files ----------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -571,6 +570,7 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(align);
KEYWORD(addrspace);
KEYWORD(section);
+ KEYWORD(partition);
KEYWORD(alias);
KEYWORD(ifunc);
KEYWORD(module);
@@ -650,6 +650,7 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(nobuiltin);
KEYWORD(nocapture);
KEYWORD(noduplicate);
+ KEYWORD(nofree);
KEYWORD(noimplicitfloat);
KEYWORD(noinline);
KEYWORD(norecurse);
@@ -657,6 +658,7 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(nonnull);
KEYWORD(noredzone);
KEYWORD(noreturn);
+ KEYWORD(nosync);
KEYWORD(nocf_check);
KEYWORD(nounwind);
KEYWORD(optforfuzzing);
@@ -677,14 +679,17 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(shadowcallstack);
KEYWORD(sanitize_address);
KEYWORD(sanitize_hwaddress);
+ KEYWORD(sanitize_memtag);
KEYWORD(sanitize_thread);
KEYWORD(sanitize_memory);
KEYWORD(speculative_load_hardening);
KEYWORD(swifterror);
KEYWORD(swiftself);
KEYWORD(uwtable);
+ KEYWORD(willreturn);
KEYWORD(writeonly);
KEYWORD(zeroext);
+ KEYWORD(immarg);
KEYWORD(type);
KEYWORD(opaque);
@@ -706,6 +711,7 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(xchg); KEYWORD(nand); KEYWORD(max); KEYWORD(min); KEYWORD(umax);
KEYWORD(umin);
+ KEYWORD(vscale);
KEYWORD(x);
KEYWORD(blockaddress);
@@ -733,6 +739,7 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(notEligibleToImport);
KEYWORD(live);
KEYWORD(dsoLocal);
+ KEYWORD(canAutoHide);
KEYWORD(function);
KEYWORD(insts);
KEYWORD(funcFlags);
@@ -749,6 +756,8 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(critical);
KEYWORD(relbf);
KEYWORD(variable);
+ KEYWORD(vTableFuncs);
+ KEYWORD(virtFunc);
KEYWORD(aliasee);
KEYWORD(refs);
KEYWORD(typeIdInfo);
@@ -761,6 +770,7 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(offset);
KEYWORD(args);
KEYWORD(typeid);
+ KEYWORD(typeidCompatibleVTable);
KEYWORD(summary);
KEYWORD(typeTestRes);
KEYWORD(kind);
@@ -859,6 +869,7 @@ lltok::Kind LLLexer::LexIdentifier() {
INSTKEYWORD(invoke, Invoke);
INSTKEYWORD(resume, Resume);
INSTKEYWORD(unreachable, Unreachable);
+ INSTKEYWORD(callbr, CallBr);
INSTKEYWORD(alloca, Alloca);
INSTKEYWORD(load, Load);
@@ -1047,7 +1058,17 @@ lltok::Kind LLLexer::LexDigitOrNegative() {
for (; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)
/*empty*/;
- // Check to see if this really is a label afterall, e.g. "-1:".
+ // Check if this is a fully-numeric label:
+ if (isdigit(TokStart[0]) && CurPtr[0] == ':') {
+ uint64_t Val = atoull(TokStart, CurPtr);
+ ++CurPtr; // Skip the colon.
+ if ((unsigned)Val != Val)
+ Error("invalid value number (too large)!");
+ UIntVal = unsigned(Val);
+ return lltok::LabelID;
+ }
+
+ // Check to see if this really is a string label, e.g. "-1:".
if (isLabelChar(CurPtr[0]) || CurPtr[0] == ':') {
if (const char *End = isLabelTail(CurPtr)) {
StrVal.assign(TokStart, End-1);
diff --git a/lib/AsmParser/LLLexer.h b/lib/AsmParser/LLLexer.h
index 21deb6e08910..4d3a2920e937 100644
--- a/lib/AsmParser/LLLexer.h
+++ b/lib/AsmParser/LLLexer.h
@@ -1,9 +1,8 @@
//===- LLLexer.h - Lexer for LLVM Assembly Files ----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index ee634505581e..87dff6468f2d 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -1,9 +1,8 @@
//===-- LLParser.cpp - Parser Class ---------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -164,6 +163,14 @@ bool LLParser::ValidateEndOfModule() {
AS = AS.addAttributes(Context, AttributeList::FunctionIndex,
AttributeSet::get(Context, FnAttrs));
II->setAttributes(AS);
+ } else if (CallBrInst *CBI = dyn_cast<CallBrInst>(V)) {
+ AttributeList AS = CBI->getAttributes();
+ AttrBuilder FnAttrs(AS.getFnAttributes());
+ AS = AS.removeAttributes(Context, AttributeList::FunctionIndex);
+ FnAttrs.merge(B);
+ AS = AS.addAttributes(Context, AttributeList::FunctionIndex,
+ AttributeSet::get(Context, FnAttrs));
+ CBI->setAttributes(AS);
} else if (auto *GV = dyn_cast<GlobalVariable>(V)) {
AttrBuilder Attrs(GV->getAttributes());
Attrs.merge(B);
@@ -814,19 +821,26 @@ bool LLParser::ParseSummaryEntry() {
if (!Index)
return SkipModuleSummaryEntry();
+ bool result = false;
switch (Lex.getKind()) {
case lltok::kw_gv:
- return ParseGVEntry(SummaryID);
+ result = ParseGVEntry(SummaryID);
+ break;
case lltok::kw_module:
- return ParseModuleEntry(SummaryID);
+ result = ParseModuleEntry(SummaryID);
+ break;
case lltok::kw_typeid:
- return ParseTypeIdEntry(SummaryID);
+ result = ParseTypeIdEntry(SummaryID);
+ break;
+ case lltok::kw_typeidCompatibleVTable:
+ result = ParseTypeIdCompatibleVtableEntry(SummaryID);
break;
default:
- return Error(Lex.getLoc(), "unexpected summary kind");
+ result = Error(Lex.getLoc(), "unexpected summary kind");
+ break;
}
Lex.setIgnoreColonInIdentifiers(false);
- return false;
+ return result;
}
static bool isValidVisibilityForLinkage(unsigned V, unsigned L) {
@@ -845,11 +859,14 @@ static void maybeSetDSOLocal(bool DSOLocal, GlobalValue &GV) {
/// ::= GlobalVar '=' OptionalLinkage OptionalPreemptionSpecifier
/// OptionalVisibility OptionalDLLStorageClass
/// OptionalThreadLocal OptionalUnnamedAddr
-// 'alias|ifunc' IndirectSymbol
+/// 'alias|ifunc' IndirectSymbol IndirectSymbolAttr*
///
/// IndirectSymbol
/// ::= TypeAndValue
///
+/// IndirectSymbolAttr
+/// ::= ',' 'partition' StringConstant
+///
/// Everything through OptionalUnnamedAddr has already been parsed.
///
bool LLParser::parseIndirectSymbol(const std::string &Name, LocTy NameLoc,
@@ -949,6 +966,21 @@ bool LLParser::parseIndirectSymbol(const std::string &Name, LocTy NameLoc,
GA->setUnnamedAddr(UnnamedAddr);
maybeSetDSOLocal(DSOLocal, *GA);
+ // At this point we've parsed everything except for the IndirectSymbolAttrs.
+ // Now parse them if there are any.
+ while (Lex.getKind() == lltok::comma) {
+ Lex.Lex();
+
+ if (Lex.getKind() == lltok::kw_partition) {
+ Lex.Lex();
+ GA->setPartition(Lex.getStrVal());
+ if (ParseToken(lltok::StringConstant, "expected partition string"))
+ return true;
+ } else {
+ return TokError("unknown alias or ifunc property!");
+ }
+ }
+
if (Name.empty())
NumberedVals.push_back(GA.get());
@@ -1084,6 +1116,11 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc,
GV->setSection(Lex.getStrVal());
if (ParseToken(lltok::StringConstant, "expected global section string"))
return true;
+ } else if (Lex.getKind() == lltok::kw_partition) {
+ Lex.Lex();
+ GV->setPartition(Lex.getStrVal());
+ if (ParseToken(lltok::StringConstant, "expected partition string"))
+ return true;
} else if (Lex.getKind() == lltok::kw_align) {
unsigned Alignment;
if (ParseOptionalAlignment(Alignment)) return true;
@@ -1243,12 +1280,14 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B,
case lltok::kw_naked: B.addAttribute(Attribute::Naked); break;
case lltok::kw_nobuiltin: B.addAttribute(Attribute::NoBuiltin); break;
case lltok::kw_noduplicate: B.addAttribute(Attribute::NoDuplicate); break;
+ case lltok::kw_nofree: B.addAttribute(Attribute::NoFree); break;
case lltok::kw_noimplicitfloat:
B.addAttribute(Attribute::NoImplicitFloat); break;
case lltok::kw_noinline: B.addAttribute(Attribute::NoInline); break;
case lltok::kw_nonlazybind: B.addAttribute(Attribute::NonLazyBind); break;
case lltok::kw_noredzone: B.addAttribute(Attribute::NoRedZone); break;
case lltok::kw_noreturn: B.addAttribute(Attribute::NoReturn); break;
+ case lltok::kw_nosync: B.addAttribute(Attribute::NoSync); break;
case lltok::kw_nocf_check: B.addAttribute(Attribute::NoCfCheck); break;
case lltok::kw_norecurse: B.addAttribute(Attribute::NoRecurse); break;
case lltok::kw_nounwind: B.addAttribute(Attribute::NoUnwind); break;
@@ -1272,6 +1311,8 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B,
B.addAttribute(Attribute::SanitizeAddress); break;
case lltok::kw_sanitize_hwaddress:
B.addAttribute(Attribute::SanitizeHWAddress); break;
+ case lltok::kw_sanitize_memtag:
+ B.addAttribute(Attribute::SanitizeMemTag); break;
case lltok::kw_sanitize_thread:
B.addAttribute(Attribute::SanitizeThread); break;
case lltok::kw_sanitize_memory:
@@ -1281,6 +1322,7 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B,
break;
case lltok::kw_strictfp: B.addAttribute(Attribute::StrictFP); break;
case lltok::kw_uwtable: B.addAttribute(Attribute::UWTable); break;
+ case lltok::kw_willreturn: B.addAttribute(Attribute::WillReturn); break;
case lltok::kw_writeonly: B.addAttribute(Attribute::WriteOnly); break;
// Error handling.
@@ -1303,6 +1345,7 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B,
case lltok::kw_sret:
case lltok::kw_swifterror:
case lltok::kw_swiftself:
+ case lltok::kw_immarg:
HaveError |=
Error(Lex.getLoc(),
"invalid use of parameter-only attribute on a function");
@@ -1566,7 +1609,13 @@ bool LLParser::ParseOptionalParamAttrs(AttrBuilder &B) {
B.addAlignmentAttr(Alignment);
continue;
}
- case lltok::kw_byval: B.addAttribute(Attribute::ByVal); break;
+ case lltok::kw_byval: {
+ Type *Ty;
+ if (ParseByValWithOptionalType(Ty))
+ return true;
+ B.addByValAttr(Ty);
+ continue;
+ }
case lltok::kw_dereferenceable: {
uint64_t Bytes;
if (ParseOptionalDerefAttrBytes(lltok::kw_dereferenceable, Bytes))
@@ -1596,6 +1645,7 @@ bool LLParser::ParseOptionalParamAttrs(AttrBuilder &B) {
case lltok::kw_swiftself: B.addAttribute(Attribute::SwiftSelf); break;
case lltok::kw_writeonly: B.addAttribute(Attribute::WriteOnly); break;
case lltok::kw_zeroext: B.addAttribute(Attribute::ZExt); break;
+ case lltok::kw_immarg: B.addAttribute(Attribute::ImmArg); break;
case lltok::kw_alignstack:
case lltok::kw_alwaysinline:
@@ -1620,6 +1670,7 @@ bool LLParser::ParseOptionalParamAttrs(AttrBuilder &B) {
case lltok::kw_returns_twice:
case lltok::kw_sanitize_address:
case lltok::kw_sanitize_hwaddress:
+ case lltok::kw_sanitize_memtag:
case lltok::kw_sanitize_memory:
case lltok::kw_sanitize_thread:
case lltok::kw_speculative_load_hardening:
@@ -1690,6 +1741,7 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) {
case lltok::kw_sret:
case lltok::kw_swifterror:
case lltok::kw_swiftself:
+ case lltok::kw_immarg:
HaveError |= Error(Lex.getLoc(), "invalid use of parameter-only attribute");
break;
@@ -1717,6 +1769,7 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) {
case lltok::kw_returns_twice:
case lltok::kw_sanitize_address:
case lltok::kw_sanitize_hwaddress:
+ case lltok::kw_sanitize_memtag:
case lltok::kw_sanitize_memory:
case lltok::kw_sanitize_thread:
case lltok::kw_speculative_load_hardening:
@@ -2417,6 +2470,22 @@ bool LLParser::ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList,
return false;
}
+/// ParseByValWithOptionalType
+/// ::= byval
+/// ::= byval(<ty>)
+bool LLParser::ParseByValWithOptionalType(Type *&Result) {
+ Result = nullptr;
+ if (!EatIfPresent(lltok::kw_byval))
+ return true;
+ if (!EatIfPresent(lltok::lparen))
+ return false;
+ if (ParseType(Result))
+ return true;
+ if (!EatIfPresent(lltok::rparen))
+ return Error(Lex.getLoc(), "expected ')'");
+ return false;
+}
+
/// ParseOptionalOperandBundles
/// ::= /*empty*/
/// ::= '[' OperandBundle [, OperandBundle ]* ']'
@@ -2684,7 +2753,18 @@ bool LLParser::ParseStructBody(SmallVectorImpl<Type*> &Body) {
/// Type
/// ::= '[' APSINTVAL 'x' Types ']'
/// ::= '<' APSINTVAL 'x' Types '>'
+/// ::= '<' 'vscale' 'x' APSINTVAL 'x' Types '>'
bool LLParser::ParseArrayVectorType(Type *&Result, bool isVector) {
+ bool Scalable = false;
+
+ if (isVector && Lex.getKind() == lltok::kw_vscale) {
+ Lex.Lex(); // consume the 'vscale'
+ if (ParseToken(lltok::kw_x, "expected 'x' after vscale"))
+ return true;
+
+ Scalable = true;
+ }
+
if (Lex.getKind() != lltok::APSInt || Lex.getAPSIntVal().isSigned() ||
Lex.getAPSIntVal().getBitWidth() > 64)
return TokError("expected number in address space");
@@ -2711,7 +2791,7 @@ bool LLParser::ParseArrayVectorType(Type *&Result, bool isVector) {
return Error(SizeLoc, "size too large for vector");
if (!VectorType::isValidElementType(EltTy))
return Error(TypeLoc, "invalid vector element type");
- Result = VectorType::get(EltTy, unsigned(Size));
+ Result = VectorType::get(EltTy, unsigned(Size), Scalable);
} else {
if (!ArrayType::isValidElementType(EltTy))
return Error(TypeLoc, "invalid array element type");
@@ -2916,13 +2996,27 @@ BasicBlock *LLParser::PerFunctionState::GetBB(unsigned ID, LocTy Loc) {
/// unnamed. If there is an error, this returns null otherwise it returns
/// the block being defined.
BasicBlock *LLParser::PerFunctionState::DefineBB(const std::string &Name,
- LocTy Loc) {
+ int NameID, LocTy Loc) {
BasicBlock *BB;
- if (Name.empty())
+ if (Name.empty()) {
+ if (NameID != -1 && unsigned(NameID) != NumberedVals.size()) {
+ P.Error(Loc, "label expected to be numbered '" +
+ Twine(NumberedVals.size()) + "'");
+ return nullptr;
+ }
BB = GetBB(NumberedVals.size(), Loc);
- else
+ if (!BB) {
+ P.Error(Loc, "unable to create block numbered '" +
+ Twine(NumberedVals.size()) + "'");
+ return nullptr;
+ }
+ } else {
BB = GetBB(Name, Loc);
- if (!BB) return nullptr; // Already diagnosed error.
+ if (!BB) {
+ P.Error(Loc, "unable to create block named '" + Name + "'");
+ return nullptr;
+ }
+ }
// Move the block to the end of the function. Forward ref'd blocks are
// inserted wherever they happen to be referenced.
@@ -3342,7 +3436,6 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
unsigned Opc = Lex.getUIntVal();
Constant *Val0, *Val1;
Lex.Lex();
- LocTy ModifierLoc = Lex.getLoc();
if (Opc == Instruction::Add || Opc == Instruction::Sub ||
Opc == Instruction::Mul || Opc == Instruction::Shl) {
if (EatIfPresent(lltok::kw_nuw))
@@ -3365,12 +3458,6 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
return true;
if (Val0->getType() != Val1->getType())
return Error(ID.Loc, "operands of constexpr must have same type");
- if (!Val0->getType()->isIntOrIntVectorTy()) {
- if (NUW)
- return Error(ModifierLoc, "nuw only applies to integer operations");
- if (NSW)
- return Error(ModifierLoc, "nsw only applies to integer operations");
- }
// Check that the type is valid for the operator.
switch (Opc) {
case Instruction::Add:
@@ -4640,6 +4727,24 @@ bool LLParser::ParseDILexicalBlockFile(MDNode *&Result, bool IsDistinct) {
return false;
}
+/// ParseDICommonBlock:
+/// ::= !DICommonBlock(scope: !0, file: !2, name: "COMMON name", line: 9)
+bool LLParser::ParseDICommonBlock(MDNode *&Result, bool IsDistinct) {
+#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \
+ REQUIRED(scope, MDField, ); \
+ OPTIONAL(declaration, MDField, ); \
+ OPTIONAL(name, MDStringField, ); \
+ OPTIONAL(file, MDField, ); \
+ OPTIONAL(line, LineField, );
+ PARSE_MD_FIELDS();
+#undef VISIT_MD_FIELDS
+
+ Result = GET_OR_DISTINCT(DICommonBlock,
+ (Context, scope.Val, declaration.Val, name.Val,
+ file.Val, line.Val));
+ return false;
+}
+
/// ParseDINamespace:
/// ::= !DINamespace(scope: !0, file: !2, name: "SomeNamespace", line: 9)
bool LLParser::ParseDINamespace(MDNode *&Result, bool IsDistinct) {
@@ -4828,6 +4933,15 @@ bool LLParser::ParseDIExpression(MDNode *&Result, bool IsDistinct) {
return TokError(Twine("invalid DWARF op '") + Lex.getStrVal() + "'");
}
+ if (Lex.getKind() == lltok::DwarfAttEncoding) {
+ if (unsigned Op = dwarf::getAttributeEncoding(Lex.getStrVal())) {
+ Lex.Lex();
+ Elements.push_back(Op);
+ continue;
+ }
+ return TokError(Twine("invalid DWARF attribute encoding '") + Lex.getStrVal() + "'");
+ }
+
if (Lex.getKind() != lltok::APSInt || Lex.getAPSIntVal().isSigned())
return TokError("expected unsigned integer");
@@ -5239,6 +5353,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
std::vector<unsigned> FwdRefAttrGrps;
LocTy BuiltinLoc;
std::string Section;
+ std::string Partition;
unsigned Alignment;
std::string GC;
GlobalValue::UnnamedAddr UnnamedAddr = GlobalValue::UnnamedAddr::None;
@@ -5255,6 +5370,8 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
BuiltinLoc) ||
(EatIfPresent(lltok::kw_section) &&
ParseStringConstant(Section)) ||
+ (EatIfPresent(lltok::kw_partition) &&
+ ParseStringConstant(Partition)) ||
parseOptionalComdat(FunctionName, C) ||
ParseOptionalAlignment(Alignment) ||
(EatIfPresent(lltok::kw_gc) &&
@@ -5356,6 +5473,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
Fn->setUnnamedAddr(UnnamedAddr);
Fn->setAlignment(Alignment);
Fn->setSection(Section);
+ Fn->setPartition(Partition);
Fn->setComdat(C);
Fn->setPersonalityFn(PersonalityFn);
if (!GC.empty()) Fn->setGC(GC);
@@ -5470,20 +5588,23 @@ bool LLParser::ParseFunctionBody(Function &Fn) {
}
/// ParseBasicBlock
-/// ::= LabelStr? Instruction*
+/// ::= (LabelStr|LabelID)? Instruction*
bool LLParser::ParseBasicBlock(PerFunctionState &PFS) {
// If this basic block starts out with a name, remember it.
std::string Name;
+ int NameID = -1;
LocTy NameLoc = Lex.getLoc();
if (Lex.getKind() == lltok::LabelStr) {
Name = Lex.getStrVal();
Lex.Lex();
+ } else if (Lex.getKind() == lltok::LabelID) {
+ NameID = Lex.getUIntVal();
+ Lex.Lex();
}
- BasicBlock *BB = PFS.DefineBB(Name, NameLoc);
+ BasicBlock *BB = PFS.DefineBB(Name, NameID, NameLoc);
if (!BB)
- return Error(NameLoc,
- "unable to create block named '" + Name + "'");
+ return true;
std::string NameStr;
@@ -5567,10 +5688,11 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB,
case lltok::kw_catchswitch: return ParseCatchSwitch(Inst, PFS);
case lltok::kw_catchpad: return ParseCatchPad(Inst, PFS);
case lltok::kw_cleanuppad: return ParseCleanupPad(Inst, PFS);
+ case lltok::kw_callbr: return ParseCallBr(Inst, PFS);
// Unary Operators.
case lltok::kw_fneg: {
FastMathFlags FMF = EatFastMathFlagsIfPresent();
- int Res = ParseUnaryOp(Inst, PFS, KeywordVal, 2);
+ int Res = ParseUnaryOp(Inst, PFS, KeywordVal, /*IsFP*/true);
if (Res != 0)
return Res;
if (FMF.any())
@@ -5586,7 +5708,7 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB,
bool NSW = EatIfPresent(lltok::kw_nsw);
if (!NUW) NUW = EatIfPresent(lltok::kw_nuw);
- if (ParseArithmetic(Inst, PFS, KeywordVal, 1)) return true;
+ if (ParseArithmetic(Inst, PFS, KeywordVal, /*IsFP*/false)) return true;
if (NUW) cast<BinaryOperator>(Inst)->setHasNoUnsignedWrap(true);
if (NSW) cast<BinaryOperator>(Inst)->setHasNoSignedWrap(true);
@@ -5598,7 +5720,7 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB,
case lltok::kw_fdiv:
case lltok::kw_frem: {
FastMathFlags FMF = EatFastMathFlagsIfPresent();
- int Res = ParseArithmetic(Inst, PFS, KeywordVal, 2);
+ int Res = ParseArithmetic(Inst, PFS, KeywordVal, /*IsFP*/true);
if (Res != 0)
return Res;
if (FMF.any())
@@ -5612,13 +5734,14 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB,
case lltok::kw_ashr: {
bool Exact = EatIfPresent(lltok::kw_exact);
- if (ParseArithmetic(Inst, PFS, KeywordVal, 1)) return true;
+ if (ParseArithmetic(Inst, PFS, KeywordVal, /*IsFP*/false)) return true;
if (Exact) cast<BinaryOperator>(Inst)->setIsExact(true);
return false;
}
case lltok::kw_urem:
- case lltok::kw_srem: return ParseArithmetic(Inst, PFS, KeywordVal, 1);
+ case lltok::kw_srem: return ParseArithmetic(Inst, PFS, KeywordVal,
+ /*IsFP*/false);
case lltok::kw_and:
case lltok::kw_or:
case lltok::kw_xor: return ParseLogical(Inst, PFS, KeywordVal);
@@ -5648,7 +5771,19 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB,
case lltok::kw_inttoptr:
case lltok::kw_ptrtoint: return ParseCast(Inst, PFS, KeywordVal);
// Other.
- case lltok::kw_select: return ParseSelect(Inst, PFS);
+ case lltok::kw_select: {
+ FastMathFlags FMF = EatFastMathFlagsIfPresent();
+ int Res = ParseSelect(Inst, PFS);
+ if (Res != 0)
+ return Res;
+ if (FMF.any()) {
+ if (!Inst->getType()->isFPOrFPVectorTy())
+ return Error(Loc, "fast-math-flags specified for select without "
+ "floating-point scalar or vector return type");
+ Inst->setFastMathFlags(FMF);
+ }
+ return 0;
+ }
case lltok::kw_va_arg: return ParseVA_Arg(Inst, PFS);
case lltok::kw_extractelement: return ParseExtractElement(Inst, PFS);
case lltok::kw_insertelement: return ParseInsertElement(Inst, PFS);
@@ -6155,28 +6290,16 @@ bool LLParser::ParseCleanupPad(Instruction *&Inst, PerFunctionState &PFS) {
/// ParseUnaryOp
/// ::= UnaryOp TypeAndValue ',' Value
///
-/// If OperandType is 0, then any FP or integer operand is allowed. If it is 1,
-/// then any integer operand is allowed, if it is 2, any fp operand is allowed.
+/// If IsFP is false, then any integer operand is allowed, if it is true, any fp
+/// operand is allowed.
bool LLParser::ParseUnaryOp(Instruction *&Inst, PerFunctionState &PFS,
- unsigned Opc, unsigned OperandType) {
+ unsigned Opc, bool IsFP) {
LocTy Loc; Value *LHS;
if (ParseTypeAndValue(LHS, Loc, PFS))
return true;
- bool Valid;
- switch (OperandType) {
- default: llvm_unreachable("Unknown operand type!");
- case 0: // int or FP.
- Valid = LHS->getType()->isIntOrIntVectorTy() ||
- LHS->getType()->isFPOrFPVectorTy();
- break;
- case 1:
- Valid = LHS->getType()->isIntOrIntVectorTy();
- break;
- case 2:
- Valid = LHS->getType()->isFPOrFPVectorTy();
- break;
- }
+ bool Valid = IsFP ? LHS->getType()->isFPOrFPVectorTy()
+ : LHS->getType()->isIntOrIntVectorTy();
if (!Valid)
return Error(Loc, "invalid operand type for instruction");
@@ -6185,6 +6308,124 @@ bool LLParser::ParseUnaryOp(Instruction *&Inst, PerFunctionState &PFS,
return false;
}
+/// ParseCallBr
+/// ::= 'callbr' OptionalCallingConv OptionalAttrs Type Value ParamList
+/// OptionalAttrs OptionalOperandBundles 'to' TypeAndValue
+/// '[' LabelList ']'
+bool LLParser::ParseCallBr(Instruction *&Inst, PerFunctionState &PFS) {
+ LocTy CallLoc = Lex.getLoc();
+ AttrBuilder RetAttrs, FnAttrs;
+ std::vector<unsigned> FwdRefAttrGrps;
+ LocTy NoBuiltinLoc;
+ unsigned CC;
+ Type *RetType = nullptr;
+ LocTy RetTypeLoc;
+ ValID CalleeID;
+ SmallVector<ParamInfo, 16> ArgList;
+ SmallVector<OperandBundleDef, 2> BundleList;
+
+ BasicBlock *DefaultDest;
+ if (ParseOptionalCallingConv(CC) || ParseOptionalReturnAttrs(RetAttrs) ||
+ ParseType(RetType, RetTypeLoc, true /*void allowed*/) ||
+ ParseValID(CalleeID) || ParseParameterList(ArgList, PFS) ||
+ ParseFnAttributeValuePairs(FnAttrs, FwdRefAttrGrps, false,
+ NoBuiltinLoc) ||
+ ParseOptionalOperandBundles(BundleList, PFS) ||
+ ParseToken(lltok::kw_to, "expected 'to' in callbr") ||
+ ParseTypeAndBasicBlock(DefaultDest, PFS) ||
+ ParseToken(lltok::lsquare, "expected '[' in callbr"))
+ return true;
+
+ // Parse the destination list.
+ SmallVector<BasicBlock *, 16> IndirectDests;
+
+ if (Lex.getKind() != lltok::rsquare) {
+ BasicBlock *DestBB;
+ if (ParseTypeAndBasicBlock(DestBB, PFS))
+ return true;
+ IndirectDests.push_back(DestBB);
+
+ while (EatIfPresent(lltok::comma)) {
+ if (ParseTypeAndBasicBlock(DestBB, PFS))
+ return true;
+ IndirectDests.push_back(DestBB);
+ }
+ }
+
+ if (ParseToken(lltok::rsquare, "expected ']' at end of block list"))
+ return true;
+
+ // If RetType is a non-function pointer type, then this is the short syntax
+ // for the call, which means that RetType is just the return type. Infer the
+ // rest of the function argument types from the arguments that are present.
+ FunctionType *Ty = dyn_cast<FunctionType>(RetType);
+ if (!Ty) {
+ // Pull out the types of all of the arguments...
+ std::vector<Type *> ParamTypes;
+ for (unsigned i = 0, e = ArgList.size(); i != e; ++i)
+ ParamTypes.push_back(ArgList[i].V->getType());
+
+ if (!FunctionType::isValidReturnType(RetType))
+ return Error(RetTypeLoc, "Invalid result type for LLVM function");
+
+ Ty = FunctionType::get(RetType, ParamTypes, false);
+ }
+
+ CalleeID.FTy = Ty;
+
+ // Look up the callee.
+ Value *Callee;
+ if (ConvertValIDToValue(PointerType::getUnqual(Ty), CalleeID, Callee, &PFS,
+ /*IsCall=*/true))
+ return true;
+
+ if (isa<InlineAsm>(Callee) && !Ty->getReturnType()->isVoidTy())
+ return Error(RetTypeLoc, "asm-goto outputs not supported");
+
+ // Set up the Attribute for the function.
+ SmallVector<Value *, 8> Args;
+ SmallVector<AttributeSet, 8> ArgAttrs;
+
+ // Loop through FunctionType's arguments and ensure they are specified
+ // correctly. Also, gather any parameter attributes.
+ FunctionType::param_iterator I = Ty->param_begin();
+ FunctionType::param_iterator E = Ty->param_end();
+ for (unsigned i = 0, e = ArgList.size(); i != e; ++i) {
+ Type *ExpectedTy = nullptr;
+ if (I != E) {
+ ExpectedTy = *I++;
+ } else if (!Ty->isVarArg()) {
+ return Error(ArgList[i].Loc, "too many arguments specified");
+ }
+
+ if (ExpectedTy && ExpectedTy != ArgList[i].V->getType())
+ return Error(ArgList[i].Loc, "argument is not of expected type '" +
+ getTypeString(ExpectedTy) + "'");
+ Args.push_back(ArgList[i].V);
+ ArgAttrs.push_back(ArgList[i].Attrs);
+ }
+
+ if (I != E)
+ return Error(CallLoc, "not enough parameters specified for call");
+
+ if (FnAttrs.hasAlignmentAttr())
+ return Error(CallLoc, "callbr instructions may not have an alignment");
+
+ // Finish off the Attribute and check them
+ AttributeList PAL =
+ AttributeList::get(Context, AttributeSet::get(Context, FnAttrs),
+ AttributeSet::get(Context, RetAttrs), ArgAttrs);
+
+ CallBrInst *CBI =
+ CallBrInst::Create(Ty, Callee, DefaultDest, IndirectDests, Args,
+ BundleList);
+ CBI->setCallingConv(CC);
+ CBI->setAttributes(PAL);
+ ForwardRefAttrGroups[CBI] = FwdRefAttrGrps;
+ Inst = CBI;
+ return false;
+}
+
//===----------------------------------------------------------------------===//
// Binary Operators.
//===----------------------------------------------------------------------===//
@@ -6192,26 +6433,18 @@ bool LLParser::ParseUnaryOp(Instruction *&Inst, PerFunctionState &PFS,
/// ParseArithmetic
/// ::= ArithmeticOps TypeAndValue ',' Value
///
-/// If OperandType is 0, then any FP or integer operand is allowed. If it is 1,
-/// then any integer operand is allowed, if it is 2, any fp operand is allowed.
+/// If IsFP is false, then any integer operand is allowed, if it is true, any fp
+/// operand is allowed.
bool LLParser::ParseArithmetic(Instruction *&Inst, PerFunctionState &PFS,
- unsigned Opc, unsigned OperandType) {
+ unsigned Opc, bool IsFP) {
LocTy Loc; Value *LHS, *RHS;
if (ParseTypeAndValue(LHS, Loc, PFS) ||
ParseToken(lltok::comma, "expected ',' in arithmetic operation") ||
ParseValue(LHS->getType(), RHS, PFS))
return true;
- bool Valid;
- switch (OperandType) {
- default: llvm_unreachable("Unknown operand type!");
- case 0: // int or FP.
- Valid = LHS->getType()->isIntOrIntVectorTy() ||
- LHS->getType()->isFPOrFPVectorTy();
- break;
- case 1: Valid = LHS->getType()->isIntOrIntVectorTy(); break;
- case 2: Valid = LHS->getType()->isFPOrFPVectorTy(); break;
- }
+ bool Valid = IsFP ? LHS->getType()->isFPOrFPVectorTy()
+ : LHS->getType()->isIntOrIntVectorTy();
if (!Valid)
return Error(Loc, "invalid operand type for instruction");
@@ -6816,6 +7049,7 @@ int LLParser::ParseAtomicRMW(Instruction *&Inst, PerFunctionState &PFS) {
AtomicOrdering Ordering = AtomicOrdering::NotAtomic;
SyncScope::ID SSID = SyncScope::System;
bool isVolatile = false;
+ bool IsFP = false;
AtomicRMWInst::BinOp Operation;
if (EatIfPresent(lltok::kw_volatile))
@@ -6834,6 +7068,14 @@ int LLParser::ParseAtomicRMW(Instruction *&Inst, PerFunctionState &PFS) {
case lltok::kw_min: Operation = AtomicRMWInst::Min; break;
case lltok::kw_umax: Operation = AtomicRMWInst::UMax; break;
case lltok::kw_umin: Operation = AtomicRMWInst::UMin; break;
+ case lltok::kw_fadd:
+ Operation = AtomicRMWInst::FAdd;
+ IsFP = true;
+ break;
+ case lltok::kw_fsub:
+ Operation = AtomicRMWInst::FSub;
+ IsFP = true;
+ break;
}
Lex.Lex(); // Eat the operation.
@@ -6850,10 +7092,25 @@ int LLParser::ParseAtomicRMW(Instruction *&Inst, PerFunctionState &PFS) {
if (cast<PointerType>(Ptr->getType())->getElementType() != Val->getType())
return Error(ValLoc, "atomicrmw value and pointer type do not match");
- if (!Val->getType()->isIntegerTy()) {
- return Error(ValLoc, "atomicrmw " +
- AtomicRMWInst::getOperationName(Operation) +
- " operand must be an integer");
+ if (Operation == AtomicRMWInst::Xchg) {
+ if (!Val->getType()->isIntegerTy() &&
+ !Val->getType()->isFloatingPointTy()) {
+ return Error(ValLoc, "atomicrmw " +
+ AtomicRMWInst::getOperationName(Operation) +
+ " operand must be an integer or floating point type");
+ }
+ } else if (IsFP) {
+ if (!Val->getType()->isFloatingPointTy()) {
+ return Error(ValLoc, "atomicrmw " +
+ AtomicRMWInst::getOperationName(Operation) +
+ " operand must be a floating point type");
+ }
+ } else {
+ if (!Val->getType()->isIntegerTy()) {
+ return Error(ValLoc, "atomicrmw " +
+ AtomicRMWInst::getOperationName(Operation) +
+ " operand must be an integer");
+ }
}
unsigned Size = Val->getType()->getPrimitiveSizeInBits();
@@ -7249,6 +7506,92 @@ bool LLParser::ParseTypeIdSummary(TypeIdSummary &TIS) {
return false;
}
+static ValueInfo EmptyVI =
+ ValueInfo(false, (GlobalValueSummaryMapTy::value_type *)-8);
+
+/// TypeIdCompatibleVtableEntry
+/// ::= 'typeidCompatibleVTable' ':' '(' 'name' ':' STRINGCONSTANT ','
+/// TypeIdCompatibleVtableInfo
+/// ')'
+bool LLParser::ParseTypeIdCompatibleVtableEntry(unsigned ID) {
+ assert(Lex.getKind() == lltok::kw_typeidCompatibleVTable);
+ Lex.Lex();
+
+ std::string Name;
+ if (ParseToken(lltok::colon, "expected ':' here") ||
+ ParseToken(lltok::lparen, "expected '(' here") ||
+ ParseToken(lltok::kw_name, "expected 'name' here") ||
+ ParseToken(lltok::colon, "expected ':' here") ||
+ ParseStringConstant(Name))
+ return true;
+
+ TypeIdCompatibleVtableInfo &TI =
+ Index->getOrInsertTypeIdCompatibleVtableSummary(Name);
+ if (ParseToken(lltok::comma, "expected ',' here") ||
+ ParseToken(lltok::kw_summary, "expected 'summary' here") ||
+ ParseToken(lltok::colon, "expected ':' here") ||
+ ParseToken(lltok::lparen, "expected '(' here"))
+ return true;
+
+ IdToIndexMapType IdToIndexMap;
+ // Parse each call edge
+ do {
+ uint64_t Offset;
+ if (ParseToken(lltok::lparen, "expected '(' here") ||
+ ParseToken(lltok::kw_offset, "expected 'offset' here") ||
+ ParseToken(lltok::colon, "expected ':' here") || ParseUInt64(Offset) ||
+ ParseToken(lltok::comma, "expected ',' here"))
+ return true;
+
+ LocTy Loc = Lex.getLoc();
+ unsigned GVId;
+ ValueInfo VI;
+ if (ParseGVReference(VI, GVId))
+ return true;
+
+ // Keep track of the TypeIdCompatibleVtableInfo array index needing a
+ // forward reference. We will save the location of the ValueInfo needing an
+ // update, but can only do so once the std::vector is finalized.
+ if (VI == EmptyVI)
+ IdToIndexMap[GVId].push_back(std::make_pair(TI.size(), Loc));
+ TI.push_back({Offset, VI});
+
+ if (ParseToken(lltok::rparen, "expected ')' in call"))
+ return true;
+ } while (EatIfPresent(lltok::comma));
+
+ // Now that the TI vector is finalized, it is safe to save the locations
+ // of any forward GV references that need updating later.
+ for (auto I : IdToIndexMap) {
+ for (auto P : I.second) {
+ assert(TI[P.first].VTableVI == EmptyVI &&
+ "Forward referenced ValueInfo expected to be empty");
+ auto FwdRef = ForwardRefValueInfos.insert(std::make_pair(
+ I.first, std::vector<std::pair<ValueInfo *, LocTy>>()));
+ FwdRef.first->second.push_back(
+ std::make_pair(&TI[P.first].VTableVI, P.second));
+ }
+ }
+
+ if (ParseToken(lltok::rparen, "expected ')' here") ||
+ ParseToken(lltok::rparen, "expected ')' here"))
+ return true;
+
+ // Check if this ID was forward referenced, and if so, update the
+ // corresponding GUIDs.
+ auto FwdRefTIDs = ForwardRefTypeIds.find(ID);
+ if (FwdRefTIDs != ForwardRefTypeIds.end()) {
+ for (auto TIDRef : FwdRefTIDs->second) {
+ assert(!*TIDRef.first &&
+ "Forward referenced type id GUID expected to be 0");
+ *TIDRef.first = GlobalValue::getGUID(Name);
+ }
+ ForwardRefTypeIds.erase(FwdRefTIDs);
+ }
+
+ return false;
+}
+
/// TypeTestResolution
/// ::= 'typeTestRes' ':' '(' 'kind' ':'
/// ( 'unsat' | 'byteArray' | 'inline' | 'single' | 'allOnes' ) ','
@@ -7523,9 +7866,13 @@ static const auto FwdVIRef = (GlobalValueSummaryMapTy::value_type *)-8;
static void resolveFwdRef(ValueInfo *Fwd, ValueInfo &Resolved) {
bool ReadOnly = Fwd->isReadOnly();
+ bool WriteOnly = Fwd->isWriteOnly();
+ assert(!(ReadOnly && WriteOnly));
*Fwd = Resolved;
if (ReadOnly)
Fwd->setReadOnly();
+ if (WriteOnly)
+ Fwd->setWriteOnly();
}
/// Stores the given Name/GUID and associated summary into the Index.
@@ -7554,10 +7901,6 @@ void LLParser::AddGlobalValueToIndex(
}
}
- // Add the summary if one was provided.
- if (Summary)
- Index->addGlobalValueSummary(VI, std::move(Summary));
-
// Resolve forward references from calls/refs
auto FwdRefVIs = ForwardRefValueInfos.find(ID);
if (FwdRefVIs != ForwardRefValueInfos.end()) {
@@ -7575,11 +7918,16 @@ void LLParser::AddGlobalValueToIndex(
for (auto AliaseeRef : FwdRefAliasees->second) {
assert(!AliaseeRef.first->hasAliasee() &&
"Forward referencing alias already has aliasee");
- AliaseeRef.first->setAliasee(VI.getSummaryList().front().get());
+ assert(Summary && "Aliasee must be a definition");
+ AliaseeRef.first->setAliasee(VI, Summary.get());
}
ForwardRefAliasees.erase(FwdRefAliasees);
}
+ // Add the summary if one was provided.
+ if (Summary)
+ Index->addGlobalValueSummary(VI, std::move(Summary));
+
// Save the associated ValueInfo for use in later references by ID.
if (ID == NumberedValueInfos.size())
NumberedValueInfos.push_back(VI);
@@ -7683,7 +8031,7 @@ bool LLParser::ParseFunctionSummary(std::string Name, GlobalValue::GUID GUID,
StringRef ModulePath;
GlobalValueSummary::GVFlags GVFlags = GlobalValueSummary::GVFlags(
/*Linkage=*/GlobalValue::ExternalLinkage, /*NotEligibleToImport=*/false,
- /*Live=*/false, /*IsLocal=*/false);
+ /*Live=*/false, /*IsLocal=*/false, /*CanAutoHide=*/false);
unsigned InstCount;
std::vector<FunctionSummary::EdgeTy> Calls;
FunctionSummary::TypeIdInfo TypeIdInfo;
@@ -7753,9 +8101,11 @@ bool LLParser::ParseVariableSummary(std::string Name, GlobalValue::GUID GUID,
StringRef ModulePath;
GlobalValueSummary::GVFlags GVFlags = GlobalValueSummary::GVFlags(
/*Linkage=*/GlobalValue::ExternalLinkage, /*NotEligibleToImport=*/false,
- /*Live=*/false, /*IsLocal=*/false);
- GlobalVarSummary::GVarFlags GVarFlags(/*ReadOnly*/ false);
+ /*Live=*/false, /*IsLocal=*/false, /*CanAutoHide=*/false);
+ GlobalVarSummary::GVarFlags GVarFlags(/*ReadOnly*/ false,
+ /* WriteOnly */ false);
std::vector<ValueInfo> Refs;
+ VTableFuncList VTableFuncs;
if (ParseToken(lltok::colon, "expected ':' here") ||
ParseToken(lltok::lparen, "expected '(' here") ||
ParseModuleReference(ModulePath) ||
@@ -7764,10 +8114,20 @@ bool LLParser::ParseVariableSummary(std::string Name, GlobalValue::GUID GUID,
ParseGVarFlags(GVarFlags))
return true;
- // Parse optional refs field
- if (EatIfPresent(lltok::comma)) {
- if (ParseOptionalRefs(Refs))
- return true;
+ // Parse optional fields
+ while (EatIfPresent(lltok::comma)) {
+ switch (Lex.getKind()) {
+ case lltok::kw_vTableFuncs:
+ if (ParseOptionalVTableFuncs(VTableFuncs))
+ return true;
+ break;
+ case lltok::kw_refs:
+ if (ParseOptionalRefs(Refs))
+ return true;
+ break;
+ default:
+ return Error(Lex.getLoc(), "expected optional variable summary field");
+ }
}
if (ParseToken(lltok::rparen, "expected ')' here"))
@@ -7777,6 +8137,7 @@ bool LLParser::ParseVariableSummary(std::string Name, GlobalValue::GUID GUID,
llvm::make_unique<GlobalVarSummary>(GVFlags, GVarFlags, std::move(Refs));
GS->setModulePath(ModulePath);
+ GS->setVTableFuncs(std::move(VTableFuncs));
AddGlobalValueToIndex(Name, GUID, (GlobalValue::LinkageTypes)GVFlags.Linkage,
ID, std::move(GS));
@@ -7796,7 +8157,7 @@ bool LLParser::ParseAliasSummary(std::string Name, GlobalValue::GUID GUID,
StringRef ModulePath;
GlobalValueSummary::GVFlags GVFlags = GlobalValueSummary::GVFlags(
/*Linkage=*/GlobalValue::ExternalLinkage, /*NotEligibleToImport=*/false,
- /*Live=*/false, /*IsLocal=*/false);
+ /*Live=*/false, /*IsLocal=*/false, /*CanAutoHide=*/false);
if (ParseToken(lltok::colon, "expected ':' here") ||
ParseToken(lltok::lparen, "expected '(' here") ||
ParseModuleReference(ModulePath) ||
@@ -7823,8 +8184,11 @@ bool LLParser::ParseAliasSummary(std::string Name, GlobalValue::GUID GUID,
auto FwdRef = ForwardRefAliasees.insert(
std::make_pair(GVId, std::vector<std::pair<AliasSummary *, LocTy>>()));
FwdRef.first->second.push_back(std::make_pair(AS.get(), Loc));
- } else
- AS->setAliasee(AliaseeVI.getSummaryList().front().get());
+ } else {
+ auto Summary = Index->findSummaryInModule(AliaseeVI, ModulePath);
+ assert(Summary && "Aliasee must be a definition");
+ AS->setAliasee(AliaseeVI, Summary);
+ }
AddGlobalValueToIndex(Name, GUID, (GlobalValue::LinkageTypes)GVFlags.Linkage,
ID, std::move(AS));
@@ -7856,7 +8220,7 @@ bool LLParser::ParseOptionalFFlags(FunctionSummary::FFlags &FFlags) {
return true;
do {
- unsigned Val;
+ unsigned Val = 0;
switch (Lex.getKind()) {
case lltok::kw_readNone:
Lex.Lex();
@@ -7994,6 +8358,67 @@ bool LLParser::ParseHotness(CalleeInfo::HotnessType &Hotness) {
return false;
}
+/// OptionalVTableFuncs
+/// := 'vTableFuncs' ':' '(' VTableFunc [',' VTableFunc]* ')'
+/// VTableFunc ::= '(' 'virtFunc' ':' GVReference ',' 'offset' ':' UInt64 ')'
+bool LLParser::ParseOptionalVTableFuncs(VTableFuncList &VTableFuncs) {
+ assert(Lex.getKind() == lltok::kw_vTableFuncs);
+ Lex.Lex();
+
+ if (ParseToken(lltok::colon, "expected ':' in vTableFuncs") |
+ ParseToken(lltok::lparen, "expected '(' in vTableFuncs"))
+ return true;
+
+ IdToIndexMapType IdToIndexMap;
+ // Parse each virtual function pair
+ do {
+ ValueInfo VI;
+ if (ParseToken(lltok::lparen, "expected '(' in vTableFunc") ||
+ ParseToken(lltok::kw_virtFunc, "expected 'callee' in vTableFunc") ||
+ ParseToken(lltok::colon, "expected ':'"))
+ return true;
+
+ LocTy Loc = Lex.getLoc();
+ unsigned GVId;
+ if (ParseGVReference(VI, GVId))
+ return true;
+
+ uint64_t Offset;
+ if (ParseToken(lltok::comma, "expected comma") ||
+ ParseToken(lltok::kw_offset, "expected offset") ||
+ ParseToken(lltok::colon, "expected ':'") || ParseUInt64(Offset))
+ return true;
+
+ // Keep track of the VTableFuncs array index needing a forward reference.
+ // We will save the location of the ValueInfo needing an update, but
+ // can only do so once the std::vector is finalized.
+ if (VI == EmptyVI)
+ IdToIndexMap[GVId].push_back(std::make_pair(VTableFuncs.size(), Loc));
+ VTableFuncs.push_back({VI, Offset});
+
+ if (ParseToken(lltok::rparen, "expected ')' in vTableFunc"))
+ return true;
+ } while (EatIfPresent(lltok::comma));
+
+ // Now that the VTableFuncs vector is finalized, it is safe to save the
+ // locations of any forward GV references that need updating later.
+ for (auto I : IdToIndexMap) {
+ for (auto P : I.second) {
+ assert(VTableFuncs[P.first].FuncVI == EmptyVI &&
+ "Forward referenced ValueInfo expected to be empty");
+ auto FwdRef = ForwardRefValueInfos.insert(std::make_pair(
+ I.first, std::vector<std::pair<ValueInfo *, LocTy>>()));
+ FwdRef.first->second.push_back(
+ std::make_pair(&VTableFuncs[P.first].FuncVI, P.second));
+ }
+ }
+
+ if (ParseToken(lltok::rparen, "expected ')' in vTableFuncs"))
+ return true;
+
+ return false;
+}
+
/// OptionalRefs
/// := 'refs' ':' '(' GVReference [',' GVReference]* ')'
bool LLParser::ParseOptionalRefs(std::vector<ValueInfo> &Refs) {
@@ -8019,10 +8444,11 @@ bool LLParser::ParseOptionalRefs(std::vector<ValueInfo> &Refs) {
VContexts.push_back(VC);
} while (EatIfPresent(lltok::comma));
- // Sort value contexts so that ones with readonly ValueInfo are at the end
- // of VContexts vector. This is needed to match immutableRefCount() behavior.
+ // Sort value contexts so that ones with writeonly
+ // and readonly ValueInfo are at the end of VContexts vector.
+ // See FunctionSummary::specialRefCounts()
llvm::sort(VContexts, [](const ValueContext &VC1, const ValueContext &VC2) {
- return VC1.VI.isReadOnly() < VC2.VI.isReadOnly();
+ return VC1.VI.getAccessSpecifier() < VC2.VI.getAccessSpecifier();
});
IdToIndexMapType IdToIndexMap;
@@ -8283,41 +8709,55 @@ bool LLParser::ParseVFuncId(FunctionSummary::VFuncId &VFuncId,
/// GVFlags
/// ::= 'flags' ':' '(' 'linkage' ':' OptionalLinkageAux ','
/// 'notEligibleToImport' ':' Flag ',' 'live' ':' Flag ','
-/// 'dsoLocal' ':' Flag ')'
+/// 'dsoLocal' ':' Flag ',' 'canAutoHide' ':' Flag ')'
bool LLParser::ParseGVFlags(GlobalValueSummary::GVFlags &GVFlags) {
assert(Lex.getKind() == lltok::kw_flags);
Lex.Lex();
- bool HasLinkage;
if (ParseToken(lltok::colon, "expected ':' here") ||
- ParseToken(lltok::lparen, "expected '(' here") ||
- ParseToken(lltok::kw_linkage, "expected 'linkage' here") ||
- ParseToken(lltok::colon, "expected ':' here"))
- return true;
-
- GVFlags.Linkage = parseOptionalLinkageAux(Lex.getKind(), HasLinkage);
- assert(HasLinkage && "Linkage not optional in summary entry");
- Lex.Lex();
-
- unsigned Flag;
- if (ParseToken(lltok::comma, "expected ',' here") ||
- ParseToken(lltok::kw_notEligibleToImport,
- "expected 'notEligibleToImport' here") ||
- ParseToken(lltok::colon, "expected ':' here") || ParseFlag(Flag))
- return true;
- GVFlags.NotEligibleToImport = Flag;
-
- if (ParseToken(lltok::comma, "expected ',' here") ||
- ParseToken(lltok::kw_live, "expected 'live' here") ||
- ParseToken(lltok::colon, "expected ':' here") || ParseFlag(Flag))
+ ParseToken(lltok::lparen, "expected '(' here"))
return true;
- GVFlags.Live = Flag;
- if (ParseToken(lltok::comma, "expected ',' here") ||
- ParseToken(lltok::kw_dsoLocal, "expected 'dsoLocal' here") ||
- ParseToken(lltok::colon, "expected ':' here") || ParseFlag(Flag))
- return true;
- GVFlags.DSOLocal = Flag;
+ do {
+ unsigned Flag = 0;
+ switch (Lex.getKind()) {
+ case lltok::kw_linkage:
+ Lex.Lex();
+ if (ParseToken(lltok::colon, "expected ':'"))
+ return true;
+ bool HasLinkage;
+ GVFlags.Linkage = parseOptionalLinkageAux(Lex.getKind(), HasLinkage);
+ assert(HasLinkage && "Linkage not optional in summary entry");
+ Lex.Lex();
+ break;
+ case lltok::kw_notEligibleToImport:
+ Lex.Lex();
+ if (ParseToken(lltok::colon, "expected ':'") || ParseFlag(Flag))
+ return true;
+ GVFlags.NotEligibleToImport = Flag;
+ break;
+ case lltok::kw_live:
+ Lex.Lex();
+ if (ParseToken(lltok::colon, "expected ':'") || ParseFlag(Flag))
+ return true;
+ GVFlags.Live = Flag;
+ break;
+ case lltok::kw_dsoLocal:
+ Lex.Lex();
+ if (ParseToken(lltok::colon, "expected ':'") || ParseFlag(Flag))
+ return true;
+ GVFlags.DSOLocal = Flag;
+ break;
+ case lltok::kw_canAutoHide:
+ Lex.Lex();
+ if (ParseToken(lltok::colon, "expected ':'") || ParseFlag(Flag))
+ return true;
+ GVFlags.CanAutoHide = Flag;
+ break;
+ default:
+ return Error(Lex.getLoc(), "expected gv flag type");
+ }
+ } while (EatIfPresent(lltok::comma));
if (ParseToken(lltok::rparen, "expected ')' here"))
return true;
@@ -8326,24 +8766,41 @@ bool LLParser::ParseGVFlags(GlobalValueSummary::GVFlags &GVFlags) {
}
/// GVarFlags
-/// ::= 'varFlags' ':' '(' 'readonly' ':' Flag ')'
+/// ::= 'varFlags' ':' '(' 'readonly' ':' Flag
+/// ',' 'writeonly' ':' Flag ')'
bool LLParser::ParseGVarFlags(GlobalVarSummary::GVarFlags &GVarFlags) {
assert(Lex.getKind() == lltok::kw_varFlags);
Lex.Lex();
- unsigned Flag;
if (ParseToken(lltok::colon, "expected ':' here") ||
- ParseToken(lltok::lparen, "expected '(' here") ||
- ParseToken(lltok::kw_readonly, "expected 'readonly' here") ||
- ParseToken(lltok::colon, "expected ':' here"))
+ ParseToken(lltok::lparen, "expected '(' here"))
return true;
- ParseFlag(Flag);
- GVarFlags.ReadOnly = Flag;
+ auto ParseRest = [this](unsigned int &Val) {
+ Lex.Lex();
+ if (ParseToken(lltok::colon, "expected ':'"))
+ return true;
+ return ParseFlag(Val);
+ };
- if (ParseToken(lltok::rparen, "expected ')' here"))
- return true;
- return false;
+ do {
+ unsigned Flag = 0;
+ switch (Lex.getKind()) {
+ case lltok::kw_readonly:
+ if (ParseRest(Flag))
+ return true;
+ GVarFlags.MaybeReadOnly = Flag;
+ break;
+ case lltok::kw_writeonly:
+ if (ParseRest(Flag))
+ return true;
+ GVarFlags.MaybeWriteOnly = Flag;
+ break;
+ default:
+ return Error(Lex.getLoc(), "expected gvar flag type");
+ }
+ } while (EatIfPresent(lltok::comma));
+ return ParseToken(lltok::rparen, "expected ')' here");
}
/// ModuleReference
@@ -8366,7 +8823,9 @@ bool LLParser::ParseModuleReference(StringRef &ModulePath) {
/// GVReference
/// ::= SummaryID
bool LLParser::ParseGVReference(ValueInfo &VI, unsigned &GVId) {
- bool ReadOnly = EatIfPresent(lltok::kw_readonly);
+ bool WriteOnly = false, ReadOnly = EatIfPresent(lltok::kw_readonly);
+ if (!ReadOnly)
+ WriteOnly = EatIfPresent(lltok::kw_writeonly);
if (ParseToken(lltok::SummaryID, "expected GV ID"))
return true;
@@ -8381,5 +8840,7 @@ bool LLParser::ParseGVReference(ValueInfo &VI, unsigned &GVId) {
if (ReadOnly)
VI.setReadOnly();
+ if (WriteOnly)
+ VI.setWriteOnly();
return false;
}
diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h
index 5a0fc297265d..610e2e262008 100644
--- a/lib/AsmParser/LLParser.h
+++ b/lib/AsmParser/LLParser.h
@@ -1,9 +1,8 @@
//===-- LLParser.h - Parser Class -------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -340,6 +339,7 @@ namespace llvm {
bool ParseFnAttributeValuePairs(AttrBuilder &B,
std::vector<unsigned> &FwdRefAttrGrps,
bool inAttrGrp, LocTy &BuiltinLoc);
+ bool ParseByValWithOptionalType(Type *&Result);
// Module Summary Index Parsing.
bool SkipModuleSummaryEntry();
@@ -369,9 +369,11 @@ namespace llvm {
IdToIndexMapType &IdToIndexMap, unsigned Index);
bool ParseVFuncId(FunctionSummary::VFuncId &VFuncId,
IdToIndexMapType &IdToIndexMap, unsigned Index);
+ bool ParseOptionalVTableFuncs(VTableFuncList &VTableFuncs);
bool ParseOptionalRefs(std::vector<ValueInfo> &Refs);
bool ParseTypeIdEntry(unsigned ID);
bool ParseTypeIdSummary(TypeIdSummary &TIS);
+ bool ParseTypeIdCompatibleVtableEntry(unsigned ID);
bool ParseTypeTestResolution(TypeTestResolution &TTRes);
bool ParseOptionalWpdResolutions(
std::map<uint64_t, WholeProgramDevirtResolution> &WPDResMap);
@@ -446,7 +448,7 @@ namespace llvm {
/// DefineBB - Define the specified basic block, which is either named or
/// unnamed. If there is an error, this returns null otherwise it returns
/// the block being defined.
- BasicBlock *DefineBB(const std::string &Name, LocTy Loc);
+ BasicBlock *DefineBB(const std::string &Name, int NameID, LocTy Loc);
bool resolveForwardRefBlockAddresses();
};
@@ -571,11 +573,12 @@ namespace llvm {
bool ParseCatchSwitch(Instruction *&Inst, PerFunctionState &PFS);
bool ParseCatchPad(Instruction *&Inst, PerFunctionState &PFS);
bool ParseCleanupPad(Instruction *&Inst, PerFunctionState &PFS);
+ bool ParseCallBr(Instruction *&Inst, PerFunctionState &PFS);
bool ParseUnaryOp(Instruction *&Inst, PerFunctionState &PFS, unsigned Opc,
- unsigned OperandType);
+ bool IsFP);
bool ParseArithmetic(Instruction *&Inst, PerFunctionState &PFS, unsigned Opc,
- unsigned OperandType);
+ bool IsFP);
bool ParseLogical(Instruction *&Inst, PerFunctionState &PFS, unsigned Opc);
bool ParseCompare(Instruction *&Inst, PerFunctionState &PFS, unsigned Opc);
bool ParseCast(Instruction *&Inst, PerFunctionState &PFS, unsigned Opc);
diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h
index c2e2795a9467..0e9ba4db4742 100644
--- a/lib/AsmParser/LLToken.h
+++ b/lib/AsmParser/LLToken.h
@@ -1,9 +1,8 @@
//===- LLToken.h - Token Codes for LLVM Assembly Files ----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -38,6 +37,7 @@ enum Kind {
bar, // |
colon, // :
+ kw_vscale,
kw_x,
kw_true,
kw_false,
@@ -114,6 +114,7 @@ enum Kind {
kw_align,
kw_addrspace,
kw_section,
+ kw_partition,
kw_alias,
kw_ifunc,
kw_module,
@@ -175,6 +176,7 @@ enum Kind {
kw_argmemonly,
kw_sanitize_address,
kw_sanitize_hwaddress,
+ kw_sanitize_memtag,
kw_builtin,
kw_byval,
kw_inalloca,
@@ -194,6 +196,7 @@ enum Kind {
kw_nobuiltin,
kw_nocapture,
kw_noduplicate,
+ kw_nofree,
kw_noimplicitfloat,
kw_noinline,
kw_norecurse,
@@ -201,6 +204,7 @@ enum Kind {
kw_nonnull,
kw_noredzone,
kw_noreturn,
+ kw_nosync,
kw_nocf_check,
kw_nounwind,
kw_optforfuzzing,
@@ -225,8 +229,10 @@ enum Kind {
kw_swifterror,
kw_swiftself,
kw_uwtable,
+ kw_willreturn,
kw_writeonly,
kw_zeroext,
+ kw_immarg,
kw_type,
kw_opaque,
@@ -328,6 +334,7 @@ enum Kind {
kw_catchret,
kw_catchpad,
kw_cleanuppad,
+ kw_callbr,
kw_alloca,
kw_load,
@@ -363,6 +370,7 @@ enum Kind {
kw_notEligibleToImport,
kw_live,
kw_dsoLocal,
+ kw_canAutoHide,
kw_function,
kw_insts,
kw_funcFlags,
@@ -379,6 +387,8 @@ enum Kind {
kw_critical,
kw_relbf,
kw_variable,
+ kw_vTableFuncs,
+ kw_virtFunc,
kw_aliasee,
kw_refs,
kw_typeIdInfo,
@@ -391,6 +401,7 @@ enum Kind {
kw_offset,
kw_args,
kw_typeid,
+ kw_typeidCompatibleVTable,
kw_summary,
kw_typeTestRes,
kw_kind,
@@ -421,6 +432,7 @@ enum Kind {
kw_varFlags,
// Unsigned Valued tokens (UIntVal).
+ LabelID, // 42:
GlobalID, // @42
LocalVarID, // %42
AttrGrpID, // #42
diff --git a/lib/AsmParser/Parser.cpp b/lib/AsmParser/Parser.cpp
index 1205dff24e8a..b13c6237f411 100644
--- a/lib/AsmParser/Parser.cpp
+++ b/lib/AsmParser/Parser.cpp
@@ -1,9 +1,8 @@
//===- Parser.cpp - Main dispatch module for the Parser library -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp b/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp
index b789f646b5f6..3f36dff9f55c 100644
--- a/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp
+++ b/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp
@@ -1,9 +1,8 @@
//===- AMDGPUMetadataVerifier.cpp - MsgPack Types ---------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -21,98 +20,92 @@ namespace HSAMD {
namespace V3 {
bool MetadataVerifier::verifyScalar(
- msgpack::Node &Node, msgpack::ScalarNode::ScalarKind SKind,
- function_ref<bool(msgpack::ScalarNode &)> verifyValue) {
- auto ScalarPtr = dyn_cast<msgpack::ScalarNode>(&Node);
- if (!ScalarPtr)
- return false;
- auto &Scalar = *ScalarPtr;
- // Do not output extraneous tags for types we know from the spec.
- Scalar.IgnoreTag = true;
- if (Scalar.getScalarKind() != SKind) {
+ msgpack::DocNode &Node, msgpack::Type SKind,
+ function_ref<bool(msgpack::DocNode &)> verifyValue) {
+ if (!Node.isScalar())
+ return false;
+ if (Node.getKind() != SKind) {
if (Strict)
return false;
// If we are not strict, we interpret string values as "implicitly typed"
// and attempt to coerce them to the expected type here.
- if (Scalar.getScalarKind() != msgpack::ScalarNode::SK_String)
+ if (Node.getKind() != msgpack::Type::String)
return false;
- std::string StringValue = Scalar.getString();
- Scalar.setScalarKind(SKind);
- if (Scalar.inputYAML(StringValue) != StringRef())
+ StringRef StringValue = Node.getString();
+ Node.fromString(StringValue);
+ if (Node.getKind() != SKind)
return false;
}
if (verifyValue)
- return verifyValue(Scalar);
+ return verifyValue(Node);
return true;
}
-bool MetadataVerifier::verifyInteger(msgpack::Node &Node) {
- if (!verifyScalar(Node, msgpack::ScalarNode::SK_UInt))
- if (!verifyScalar(Node, msgpack::ScalarNode::SK_Int))
+bool MetadataVerifier::verifyInteger(msgpack::DocNode &Node) {
+ if (!verifyScalar(Node, msgpack::Type::UInt))
+ if (!verifyScalar(Node, msgpack::Type::Int))
return false;
return true;
}
bool MetadataVerifier::verifyArray(
- msgpack::Node &Node, function_ref<bool(msgpack::Node &)> verifyNode,
+ msgpack::DocNode &Node, function_ref<bool(msgpack::DocNode &)> verifyNode,
Optional<size_t> Size) {
- auto ArrayPtr = dyn_cast<msgpack::ArrayNode>(&Node);
- if (!ArrayPtr)
+ if (!Node.isArray())
return false;
- auto &Array = *ArrayPtr;
+ auto &Array = Node.getArray();
if (Size && Array.size() != *Size)
return false;
for (auto &Item : Array)
- if (!verifyNode(*Item.get()))
+ if (!verifyNode(Item))
return false;
return true;
}
bool MetadataVerifier::verifyEntry(
- msgpack::MapNode &MapNode, StringRef Key, bool Required,
- function_ref<bool(msgpack::Node &)> verifyNode) {
+ msgpack::MapDocNode &MapNode, StringRef Key, bool Required,
+ function_ref<bool(msgpack::DocNode &)> verifyNode) {
auto Entry = MapNode.find(Key);
if (Entry == MapNode.end())
return !Required;
- return verifyNode(*Entry->second.get());
+ return verifyNode(Entry->second);
}
bool MetadataVerifier::verifyScalarEntry(
- msgpack::MapNode &MapNode, StringRef Key, bool Required,
- msgpack::ScalarNode::ScalarKind SKind,
- function_ref<bool(msgpack::ScalarNode &)> verifyValue) {
- return verifyEntry(MapNode, Key, Required, [=](msgpack::Node &Node) {
+ msgpack::MapDocNode &MapNode, StringRef Key, bool Required,
+ msgpack::Type SKind,
+ function_ref<bool(msgpack::DocNode &)> verifyValue) {
+ return verifyEntry(MapNode, Key, Required, [=](msgpack::DocNode &Node) {
return verifyScalar(Node, SKind, verifyValue);
});
}
-bool MetadataVerifier::verifyIntegerEntry(msgpack::MapNode &MapNode,
+bool MetadataVerifier::verifyIntegerEntry(msgpack::MapDocNode &MapNode,
StringRef Key, bool Required) {
- return verifyEntry(MapNode, Key, Required, [this](msgpack::Node &Node) {
+ return verifyEntry(MapNode, Key, Required, [this](msgpack::DocNode &Node) {
return verifyInteger(Node);
});
}
-bool MetadataVerifier::verifyKernelArgs(msgpack::Node &Node) {
- auto ArgsMapPtr = dyn_cast<msgpack::MapNode>(&Node);
- if (!ArgsMapPtr)
+bool MetadataVerifier::verifyKernelArgs(msgpack::DocNode &Node) {
+ if (!Node.isMap())
return false;
- auto &ArgsMap = *ArgsMapPtr;
+ auto &ArgsMap = Node.getMap();
if (!verifyScalarEntry(ArgsMap, ".name", false,
- msgpack::ScalarNode::SK_String))
+ msgpack::Type::String))
return false;
if (!verifyScalarEntry(ArgsMap, ".type_name", false,
- msgpack::ScalarNode::SK_String))
+ msgpack::Type::String))
return false;
if (!verifyIntegerEntry(ArgsMap, ".size", true))
return false;
if (!verifyIntegerEntry(ArgsMap, ".offset", true))
return false;
if (!verifyScalarEntry(ArgsMap, ".value_kind", true,
- msgpack::ScalarNode::SK_String,
- [](msgpack::ScalarNode &SNode) {
+ msgpack::Type::String,
+ [](msgpack::DocNode &SNode) {
return StringSwitch<bool>(SNode.getString())
.Case("by_value", true)
.Case("global_buffer", true)
@@ -128,12 +121,13 @@ bool MetadataVerifier::verifyKernelArgs(msgpack::Node &Node) {
.Case("hidden_printf_buffer", true)
.Case("hidden_default_queue", true)
.Case("hidden_completion_action", true)
+ .Case("hidden_multigrid_sync_arg", true)
.Default(false);
}))
return false;
if (!verifyScalarEntry(ArgsMap, ".value_type", true,
- msgpack::ScalarNode::SK_String,
- [](msgpack::ScalarNode &SNode) {
+ msgpack::Type::String,
+ [](msgpack::DocNode &SNode) {
return StringSwitch<bool>(SNode.getString())
.Case("struct", true)
.Case("i8", true)
@@ -153,8 +147,8 @@ bool MetadataVerifier::verifyKernelArgs(msgpack::Node &Node) {
if (!verifyIntegerEntry(ArgsMap, ".pointee_align", false))
return false;
if (!verifyScalarEntry(ArgsMap, ".address_space", false,
- msgpack::ScalarNode::SK_String,
- [](msgpack::ScalarNode &SNode) {
+ msgpack::Type::String,
+ [](msgpack::DocNode &SNode) {
return StringSwitch<bool>(SNode.getString())
.Case("private", true)
.Case("global", true)
@@ -166,8 +160,8 @@ bool MetadataVerifier::verifyKernelArgs(msgpack::Node &Node) {
}))
return false;
if (!verifyScalarEntry(ArgsMap, ".access", false,
- msgpack::ScalarNode::SK_String,
- [](msgpack::ScalarNode &SNode) {
+ msgpack::Type::String,
+ [](msgpack::DocNode &SNode) {
return StringSwitch<bool>(SNode.getString())
.Case("read_only", true)
.Case("write_only", true)
@@ -176,8 +170,8 @@ bool MetadataVerifier::verifyKernelArgs(msgpack::Node &Node) {
}))
return false;
if (!verifyScalarEntry(ArgsMap, ".actual_access", false,
- msgpack::ScalarNode::SK_String,
- [](msgpack::ScalarNode &SNode) {
+ msgpack::Type::String,
+ [](msgpack::DocNode &SNode) {
return StringSwitch<bool>(SNode.getString())
.Case("read_only", true)
.Case("write_only", true)
@@ -186,36 +180,35 @@ bool MetadataVerifier::verifyKernelArgs(msgpack::Node &Node) {
}))
return false;
if (!verifyScalarEntry(ArgsMap, ".is_const", false,
- msgpack::ScalarNode::SK_Boolean))
+ msgpack::Type::Boolean))
return false;
if (!verifyScalarEntry(ArgsMap, ".is_restrict", false,
- msgpack::ScalarNode::SK_Boolean))
+ msgpack::Type::Boolean))
return false;
if (!verifyScalarEntry(ArgsMap, ".is_volatile", false,
- msgpack::ScalarNode::SK_Boolean))
+ msgpack::Type::Boolean))
return false;
if (!verifyScalarEntry(ArgsMap, ".is_pipe", false,
- msgpack::ScalarNode::SK_Boolean))
+ msgpack::Type::Boolean))
return false;
return true;
}
-bool MetadataVerifier::verifyKernel(msgpack::Node &Node) {
- auto KernelMapPtr = dyn_cast<msgpack::MapNode>(&Node);
- if (!KernelMapPtr)
+bool MetadataVerifier::verifyKernel(msgpack::DocNode &Node) {
+ if (!Node.isMap())
return false;
- auto &KernelMap = *KernelMapPtr;
+ auto &KernelMap = Node.getMap();
if (!verifyScalarEntry(KernelMap, ".name", true,
- msgpack::ScalarNode::SK_String))
+ msgpack::Type::String))
return false;
if (!verifyScalarEntry(KernelMap, ".symbol", true,
- msgpack::ScalarNode::SK_String))
+ msgpack::Type::String))
return false;
if (!verifyScalarEntry(KernelMap, ".language", false,
- msgpack::ScalarNode::SK_String,
- [](msgpack::ScalarNode &SNode) {
+ msgpack::Type::String,
+ [](msgpack::DocNode &SNode) {
return StringSwitch<bool>(SNode.getString())
.Case("OpenCL C", true)
.Case("OpenCL C++", true)
@@ -227,41 +220,41 @@ bool MetadataVerifier::verifyKernel(msgpack::Node &Node) {
}))
return false;
if (!verifyEntry(
- KernelMap, ".language_version", false, [this](msgpack::Node &Node) {
+ KernelMap, ".language_version", false, [this](msgpack::DocNode &Node) {
return verifyArray(
Node,
- [this](msgpack::Node &Node) { return verifyInteger(Node); }, 2);
+ [this](msgpack::DocNode &Node) { return verifyInteger(Node); }, 2);
}))
return false;
- if (!verifyEntry(KernelMap, ".args", false, [this](msgpack::Node &Node) {
- return verifyArray(Node, [this](msgpack::Node &Node) {
+ if (!verifyEntry(KernelMap, ".args", false, [this](msgpack::DocNode &Node) {
+ return verifyArray(Node, [this](msgpack::DocNode &Node) {
return verifyKernelArgs(Node);
});
}))
return false;
if (!verifyEntry(KernelMap, ".reqd_workgroup_size", false,
- [this](msgpack::Node &Node) {
+ [this](msgpack::DocNode &Node) {
return verifyArray(Node,
- [this](msgpack::Node &Node) {
+ [this](msgpack::DocNode &Node) {
return verifyInteger(Node);
},
3);
}))
return false;
if (!verifyEntry(KernelMap, ".workgroup_size_hint", false,
- [this](msgpack::Node &Node) {
+ [this](msgpack::DocNode &Node) {
return verifyArray(Node,
- [this](msgpack::Node &Node) {
+ [this](msgpack::DocNode &Node) {
return verifyInteger(Node);
},
3);
}))
return false;
if (!verifyScalarEntry(KernelMap, ".vec_type_hint", false,
- msgpack::ScalarNode::SK_String))
+ msgpack::Type::String))
return false;
if (!verifyScalarEntry(KernelMap, ".device_enqueue_symbol", false,
- msgpack::ScalarNode::SK_String))
+ msgpack::Type::String))
return false;
if (!verifyIntegerEntry(KernelMap, ".kernarg_segment_size", true))
return false;
@@ -287,29 +280,28 @@ bool MetadataVerifier::verifyKernel(msgpack::Node &Node) {
return true;
}
-bool MetadataVerifier::verify(msgpack::Node &HSAMetadataRoot) {
- auto RootMapPtr = dyn_cast<msgpack::MapNode>(&HSAMetadataRoot);
- if (!RootMapPtr)
+bool MetadataVerifier::verify(msgpack::DocNode &HSAMetadataRoot) {
+ if (!HSAMetadataRoot.isMap())
return false;
- auto &RootMap = *RootMapPtr;
+ auto &RootMap = HSAMetadataRoot.getMap();
if (!verifyEntry(
- RootMap, "amdhsa.version", true, [this](msgpack::Node &Node) {
+ RootMap, "amdhsa.version", true, [this](msgpack::DocNode &Node) {
return verifyArray(
Node,
- [this](msgpack::Node &Node) { return verifyInteger(Node); }, 2);
+ [this](msgpack::DocNode &Node) { return verifyInteger(Node); }, 2);
}))
return false;
if (!verifyEntry(
- RootMap, "amdhsa.printf", false, [this](msgpack::Node &Node) {
- return verifyArray(Node, [this](msgpack::Node &Node) {
- return verifyScalar(Node, msgpack::ScalarNode::SK_String);
+ RootMap, "amdhsa.printf", false, [this](msgpack::DocNode &Node) {
+ return verifyArray(Node, [this](msgpack::DocNode &Node) {
+ return verifyScalar(Node, msgpack::Type::String);
});
}))
return false;
if (!verifyEntry(RootMap, "amdhsa.kernels", true,
- [this](msgpack::Node &Node) {
- return verifyArray(Node, [this](msgpack::Node &Node) {
+ [this](msgpack::DocNode &Node) {
+ return verifyArray(Node, [this](msgpack::DocNode &Node) {
return verifyKernel(Node);
});
}))
diff --git a/lib/BinaryFormat/Dwarf.cpp b/lib/BinaryFormat/Dwarf.cpp
index 46f8056774b7..eb6bd33ce583 100644
--- a/lib/BinaryFormat/Dwarf.cpp
+++ b/lib/BinaryFormat/Dwarf.cpp
@@ -1,9 +1,8 @@
//===-- llvm/BinaryFormat/Dwarf.cpp - Dwarf Framework ------------*- C++-*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -144,8 +143,12 @@ StringRef llvm::dwarf::OperationEncodingString(unsigned Encoding) {
case DW_OP_##NAME: \
return "DW_OP_" #NAME;
#include "llvm/BinaryFormat/Dwarf.def"
+ case DW_OP_LLVM_convert:
+ return "DW_OP_LLVM_convert";
case DW_OP_LLVM_fragment:
return "DW_OP_LLVM_fragment";
+ case DW_OP_LLVM_tag_offset:
+ return "DW_OP_LLVM_tag_offset";
}
}
@@ -154,7 +157,9 @@ unsigned llvm::dwarf::getOperationEncoding(StringRef OperationEncodingString) {
#define HANDLE_DW_OP(ID, NAME, VERSION, VENDOR) \
.Case("DW_OP_" #NAME, DW_OP_##NAME)
#include "llvm/BinaryFormat/Dwarf.def"
+ .Case("DW_OP_LLVM_convert", DW_OP_LLVM_convert)
.Case("DW_OP_LLVM_fragment", DW_OP_LLVM_fragment)
+ .Case("DW_OP_LLVM_tag_offset", DW_OP_LLVM_tag_offset)
.Default(0);
}
diff --git a/lib/BinaryFormat/Magic.cpp b/lib/BinaryFormat/Magic.cpp
index 78efa6ec87be..7dfe23690a50 100644
--- a/lib/BinaryFormat/Magic.cpp
+++ b/lib/BinaryFormat/Magic.cpp
@@ -1,9 +1,8 @@
//===- llvm/BinaryFormat/Magic.cpp - File magic identification --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -62,6 +61,15 @@ file_magic llvm::identify_magic(StringRef Magic) {
return file_magic::wasm_object;
break;
}
+
+ case 0x01:
+ // XCOFF format
+ if (startswith(Magic, "\x01\xDF"))
+ return file_magic::xcoff_object_32;
+ if (startswith(Magic, "\x01\xF7"))
+ return file_magic::xcoff_object_64;
+ break;
+
case 0xDE: // 0x0B17C0DE = BC wraper
if (startswith(Magic, "\xDE\xC0\x17\x0B"))
return file_magic::bitcode;
@@ -182,7 +190,8 @@ file_magic llvm::identify_magic(StringRef Magic) {
return file_magic::coff_object;
break;
- case 'M': // Possible MS-DOS stub on Windows PE file or MSF/PDB file.
+ case 'M': // Possible MS-DOS stub on Windows PE file, MSF/PDB file or a
+ // Minidump file.
if (startswith(Magic, "MZ") && Magic.size() >= 0x3c + 4) {
uint32_t off = read32le(Magic.data() + 0x3c);
// PE/COFF file, either EXE or DLL.
@@ -192,6 +201,8 @@ file_magic llvm::identify_magic(StringRef Magic) {
}
if (Magic.startswith("Microsoft C/C++ MSF 7.00\r\n"))
return file_magic::pdb;
+ if (startswith(Magic, "MDMP"))
+ return file_magic::minidump;
break;
case 0x64: // x86-64 or ARM64 Windows.
diff --git a/lib/BinaryFormat/Minidump.cpp b/lib/BinaryFormat/Minidump.cpp
new file mode 100644
index 000000000000..b618fb157012
--- /dev/null
+++ b/lib/BinaryFormat/Minidump.cpp
@@ -0,0 +1,14 @@
+//===-- Minidump.cpp - Minidump constants and structures ---------*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/BinaryFormat/Minidump.h"
+
+using namespace llvm::minidump;
+
+constexpr uint32_t Header::MagicSignature;
+constexpr uint16_t Header::MagicVersion;
diff --git a/lib/BinaryFormat/MsgPackDocument.cpp b/lib/BinaryFormat/MsgPackDocument.cpp
new file mode 100644
index 000000000000..e12c54a37ad0
--- /dev/null
+++ b/lib/BinaryFormat/MsgPackDocument.cpp
@@ -0,0 +1,245 @@
+//===-- MsgPackDocument.cpp - MsgPack Document --------------------------*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// This file implements a class that exposes a simple in-memory representation
+/// of a document of MsgPack objects, that can be read from MsgPack, written to
+/// MsgPack, and inspected and modified in memory. This is intended to be a
+/// lighter-weight (in terms of memory allocations) replacement for
+/// MsgPackTypes.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/BinaryFormat/MsgPackDocument.h"
+#include "llvm/BinaryFormat/MsgPackWriter.h"
+
+using namespace llvm;
+using namespace msgpack;
+
+// Convert this DocNode into an empty array.
+void DocNode::convertToArray() { *this = getDocument()->getArrayNode(); }
+
+// Convert this DocNode into an empty map.
+void DocNode::convertToMap() { *this = getDocument()->getMapNode(); }
+
+/// Find the key in the MapDocNode.
+DocNode::MapTy::iterator MapDocNode::find(StringRef S) {
+ return find(getDocument()->getNode(S));
+}
+
+/// Member access for MapDocNode. The string data must remain valid for the
+/// lifetime of the Document.
+DocNode &MapDocNode::operator[](StringRef S) {
+ return (*this)[getDocument()->getNode(S)];
+}
+
+/// Member access for MapDocNode.
+DocNode &MapDocNode::operator[](DocNode Key) {
+ assert(!Key.isEmpty());
+ MapTy::value_type Entry(Key, DocNode());
+ auto ItAndInserted = Map->insert(Entry);
+ if (ItAndInserted.second) {
+ // Ensure a new element has its KindAndDoc initialized.
+ ItAndInserted.first->second = getDocument()->getNode();
+ }
+ return ItAndInserted.first->second;
+}
+
+/// Array element access. This extends the array if necessary.
+DocNode &ArrayDocNode::operator[](size_t Index) {
+ if (size() <= Index) {
+ // Ensure new elements have their KindAndDoc initialized.
+ Array->resize(Index + 1, getDocument()->getNode());
+ }
+ return (*Array)[Index];
+}
+
+// A level in the document reading stack.
+struct StackLevel {
+ DocNode Node;
+ size_t Length;
+ // Points to map entry when we have just processed a map key.
+ DocNode *MapEntry;
+};
+
+// Read a document from a binary msgpack blob.
+// The blob data must remain valid for the lifetime of this Document (because a
+// string object in the document contains a StringRef into the original blob).
+// If Multi, then this sets root to an array and adds top-level objects to it.
+// If !Multi, then it only reads a single top-level object, even if there are
+// more, and sets root to that.
+// Returns false if failed due to illegal format.
+bool Document::readFromBlob(StringRef Blob, bool Multi) {
+ msgpack::Reader MPReader(Blob);
+ SmallVector<StackLevel, 4> Stack;
+ if (Multi) {
+ // Create the array for multiple top-level objects.
+ Root = getArrayNode();
+ Stack.push_back(StackLevel({Root, (size_t)-1, nullptr}));
+ }
+ do {
+ // On to next element (or key if doing a map key next).
+ // Read the value.
+ Object Obj;
+ if (!MPReader.read(Obj)) {
+ if (Multi && Stack.size() == 1) {
+ // OK to finish here as we've just done a top-level element with Multi
+ break;
+ }
+ return false; // Finished too early
+ }
+ // Convert it into a DocNode.
+ DocNode Node;
+ switch (Obj.Kind) {
+ case Type::Nil:
+ Node = getNode();
+ break;
+ case Type::Int:
+ Node = getNode(Obj.Int);
+ break;
+ case Type::UInt:
+ Node = getNode(Obj.UInt);
+ break;
+ case Type::Boolean:
+ Node = getNode(Obj.Bool);
+ break;
+ case Type::Float:
+ Node = getNode(Obj.Float);
+ break;
+ case Type::String:
+ Node = getNode(Obj.Raw);
+ break;
+ case Type::Map:
+ Node = getMapNode();
+ break;
+ case Type::Array:
+ Node = getArrayNode();
+ break;
+ default:
+ return false; // Raw and Extension not supported
+ }
+
+ // Store it.
+ if (Stack.empty())
+ Root = Node;
+ else if (Stack.back().Node.getKind() == Type::Array) {
+ // Reading an array entry.
+ auto &Array = Stack.back().Node.getArray();
+ Array.push_back(Node);
+ } else {
+ auto &Map = Stack.back().Node.getMap();
+ if (!Stack.back().MapEntry) {
+ // Reading a map key.
+ Stack.back().MapEntry = &Map[Node];
+ } else {
+ // Reading the value for the map key read in the last iteration.
+ *Stack.back().MapEntry = Node;
+ Stack.back().MapEntry = nullptr;
+ }
+ }
+
+ // See if we're starting a new array or map.
+ switch (Node.getKind()) {
+ case msgpack::Type::Array:
+ case msgpack::Type::Map:
+ Stack.push_back(StackLevel({Node, Obj.Length, nullptr}));
+ break;
+ default:
+ break;
+ }
+
+ // Pop finished stack levels.
+ while (!Stack.empty()) {
+ if (Stack.back().Node.getKind() == msgpack::Type::Array) {
+ if (Stack.back().Node.getArray().size() != Stack.back().Length)
+ break;
+ } else {
+ if (Stack.back().MapEntry ||
+ Stack.back().Node.getMap().size() != Stack.back().Length)
+ break;
+ }
+ Stack.pop_back();
+ }
+ } while (!Stack.empty());
+ return true;
+}
+
+struct WriterStackLevel {
+ DocNode Node;
+ DocNode::MapTy::iterator MapIt;
+ DocNode::ArrayTy::iterator ArrayIt;
+ bool OnKey;
+};
+
+/// Write a MsgPack document to a binary MsgPack blob.
+void Document::writeToBlob(std::string &Blob) {
+ Blob.clear();
+ raw_string_ostream OS(Blob);
+ msgpack::Writer MPWriter(OS);
+ SmallVector<WriterStackLevel, 4> Stack;
+ DocNode Node = getRoot();
+ for (;;) {
+ switch (Node.getKind()) {
+ case Type::Array:
+ MPWriter.writeArraySize(Node.getArray().size());
+ Stack.push_back(
+ {Node, DocNode::MapTy::iterator(), Node.getArray().begin(), false});
+ break;
+ case Type::Map:
+ MPWriter.writeMapSize(Node.getMap().size());
+ Stack.push_back(
+ {Node, Node.getMap().begin(), DocNode::ArrayTy::iterator(), true});
+ break;
+ case Type::Nil:
+ MPWriter.writeNil();
+ break;
+ case Type::Boolean:
+ MPWriter.write(Node.getBool());
+ break;
+ case Type::Int:
+ MPWriter.write(Node.getInt());
+ break;
+ case Type::UInt:
+ MPWriter.write(Node.getUInt());
+ break;
+ case Type::String:
+ MPWriter.write(Node.getString());
+ break;
+ default:
+ llvm_unreachable("unhandled msgpack object kind");
+ }
+ // Pop finished stack levels.
+ while (!Stack.empty()) {
+ if (Stack.back().Node.getKind() == Type::Map) {
+ if (Stack.back().MapIt != Stack.back().Node.getMap().end())
+ break;
+ } else {
+ if (Stack.back().ArrayIt != Stack.back().Node.getArray().end())
+ break;
+ }
+ Stack.pop_back();
+ }
+ if (Stack.empty())
+ break;
+ // Get the next value.
+ if (Stack.back().Node.getKind() == Type::Map) {
+ if (Stack.back().OnKey) {
+ // Do the key of a key,value pair in a map.
+ Node = Stack.back().MapIt->first;
+ Stack.back().OnKey = false;
+ } else {
+ Node = Stack.back().MapIt->second;
+ ++Stack.back().MapIt;
+ Stack.back().OnKey = true;
+ }
+ } else {
+ Node = *Stack.back().ArrayIt;
+ ++Stack.back().ArrayIt;
+ }
+ }
+}
+
diff --git a/lib/BinaryFormat/MsgPackDocumentYAML.cpp b/lib/BinaryFormat/MsgPackDocumentYAML.cpp
new file mode 100644
index 000000000000..1d9c81ef8ebc
--- /dev/null
+++ b/lib/BinaryFormat/MsgPackDocumentYAML.cpp
@@ -0,0 +1,249 @@
+//===-- MsgPackDocumentYAML.cpp - MsgPack Document YAML interface -------*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// This file implements YAMLIO on a msgpack::Document.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/BinaryFormat/MsgPackDocument.h"
+#include "llvm/Support/YAMLTraits.h"
+
+using namespace llvm;
+using namespace msgpack;
+
+namespace {
+
+// Struct used to represent scalar node. (MapDocNode and ArrayDocNode already
+// exist in MsgPackDocument.h.)
+struct ScalarDocNode : DocNode {
+ ScalarDocNode(DocNode N) : DocNode(N) {}
+
+ /// Get the YAML tag for this ScalarDocNode. This normally returns ""; it only
+ /// returns something else if the result of toString would be ambiguous, e.g.
+ /// a string that parses as a number or boolean.
+ StringRef getYAMLTag() const;
+};
+
+} // namespace
+
+/// Convert this DocNode to a string, assuming it is scalar.
+std::string DocNode::toString() const {
+ std::string S;
+ raw_string_ostream OS(S);
+ switch (getKind()) {
+ case msgpack::Type::String:
+ OS << Raw;
+ break;
+ case msgpack::Type::Nil:
+ break;
+ case msgpack::Type::Boolean:
+ OS << (Bool ? "true" : "false");
+ break;
+ case msgpack::Type::Int:
+ OS << Int;
+ break;
+ case msgpack::Type::UInt:
+ if (getDocument()->getHexMode())
+ OS << format("%#llx", (unsigned long long)UInt);
+ else
+ OS << UInt;
+ break;
+ case msgpack::Type::Float:
+ OS << Float;
+ break;
+ default:
+ llvm_unreachable("not scalar");
+ break;
+ }
+ return OS.str();
+}
+
+/// Convert the StringRef and use it to set this DocNode (assuming scalar). If
+/// it is a string, copy the string into the Document's strings list so we do
+/// not rely on S having a lifetime beyond this call. Tag is "" or a YAML tag.
+StringRef DocNode::fromString(StringRef S, StringRef Tag) {
+ if (Tag == "tag:yaml.org,2002:str")
+ Tag = "";
+ if (Tag == "!int" || Tag == "") {
+ // Try unsigned int then signed int.
+ *this = getDocument()->getNode(uint64_t(0));
+ StringRef Err = yaml::ScalarTraits<uint64_t>::input(S, nullptr, getUInt());
+ if (Err != "") {
+ *this = getDocument()->getNode(int64_t(0));
+ Err = yaml::ScalarTraits<int64_t>::input(S, nullptr, getInt());
+ }
+ if (Err == "" || Tag != "")
+ return Err;
+ }
+ if (Tag == "!nil") {
+ *this = getDocument()->getNode();
+ return "";
+ }
+ if (Tag == "!bool" || Tag == "") {
+ *this = getDocument()->getNode(false);
+ StringRef Err = yaml::ScalarTraits<bool>::input(S, nullptr, getBool());
+ if (Err == "" || Tag != "")
+ return Err;
+ }
+ if (Tag == "!float" || Tag == "") {
+ *this = getDocument()->getNode(0.0);
+ StringRef Err = yaml::ScalarTraits<double>::input(S, nullptr, getFloat());
+ if (Err == "" || Tag != "")
+ return Err;
+ }
+ assert((Tag == "!str" || Tag == "") && "unsupported tag");
+ std::string V;
+ StringRef Err = yaml::ScalarTraits<std::string>::input(S, nullptr, V);
+ if (Err == "")
+ *this = getDocument()->getNode(V, /*Copy=*/true);
+ return Err;
+}
+
+/// Get the YAML tag for this ScalarDocNode. This normally returns ""; it only
+/// returns something else if the result of toString would be ambiguous, e.g.
+/// a string that parses as a number or boolean.
+StringRef ScalarDocNode::getYAMLTag() const {
+ if (getKind() == msgpack::Type::Nil)
+ return "!nil";
+ // Try converting both ways and see if we get the same kind. If not, we need
+ // a tag.
+ ScalarDocNode N = getDocument()->getNode();
+ N.fromString(toString(), "");
+ if (N.getKind() == getKind())
+ return "";
+ // Tolerate signedness of int changing, as tags do not differentiate between
+ // them anyway.
+ if (N.getKind() == msgpack::Type::UInt && getKind() == msgpack::Type::Int)
+ return "";
+ if (N.getKind() == msgpack::Type::Int && getKind() == msgpack::Type::UInt)
+ return "";
+ // We do need a tag.
+ switch (getKind()) {
+ case msgpack::Type::String:
+ return "!str";
+ case msgpack::Type::Int:
+ return "!int";
+ case msgpack::Type::UInt:
+ return "!int";
+ case msgpack::Type::Boolean:
+ return "!bool";
+ case msgpack::Type::Float:
+ return "!float";
+ default:
+ llvm_unreachable("unrecognized kind");
+ }
+}
+
+namespace llvm {
+namespace yaml {
+
+/// YAMLIO for DocNode
+template <> struct PolymorphicTraits<DocNode> {
+
+ static NodeKind getKind(const DocNode &N) {
+ switch (N.getKind()) {
+ case msgpack::Type::Map:
+ return NodeKind::Map;
+ case msgpack::Type::Array:
+ return NodeKind::Sequence;
+ default:
+ return NodeKind::Scalar;
+ }
+ }
+
+ static MapDocNode &getAsMap(DocNode &N) { return N.getMap(/*Convert=*/true); }
+
+ static ArrayDocNode &getAsSequence(DocNode &N) {
+ N.getArray(/*Convert=*/true);
+ return *static_cast<ArrayDocNode *>(&N);
+ }
+
+ static ScalarDocNode &getAsScalar(DocNode &N) {
+ return *static_cast<ScalarDocNode *>(&N);
+ }
+};
+
+/// YAMLIO for ScalarDocNode
+template <> struct TaggedScalarTraits<ScalarDocNode> {
+
+ static void output(const ScalarDocNode &S, void *Ctxt, raw_ostream &OS,
+ raw_ostream &TagOS) {
+ TagOS << S.getYAMLTag();
+ OS << S.toString();
+ }
+
+ static StringRef input(StringRef Str, StringRef Tag, void *Ctxt,
+ ScalarDocNode &S) {
+ return S.fromString(Str, Tag);
+ }
+
+ static QuotingType mustQuote(const ScalarDocNode &S, StringRef ScalarStr) {
+ switch (S.getKind()) {
+ case Type::Int:
+ return ScalarTraits<int64_t>::mustQuote(ScalarStr);
+ case Type::UInt:
+ return ScalarTraits<uint64_t>::mustQuote(ScalarStr);
+ case Type::Nil:
+ return ScalarTraits<StringRef>::mustQuote(ScalarStr);
+ case Type::Boolean:
+ return ScalarTraits<bool>::mustQuote(ScalarStr);
+ case Type::Float:
+ return ScalarTraits<double>::mustQuote(ScalarStr);
+ case Type::Binary:
+ case Type::String:
+ return ScalarTraits<std::string>::mustQuote(ScalarStr);
+ default:
+ llvm_unreachable("unrecognized ScalarKind");
+ }
+ }
+};
+
+/// YAMLIO for MapDocNode
+template <> struct CustomMappingTraits<MapDocNode> {
+
+ static void inputOne(IO &IO, StringRef Key, MapDocNode &M) {
+ ScalarDocNode KeyObj = M.getDocument()->getNode();
+ KeyObj.fromString(Key, "");
+ IO.mapRequired(Key.str().c_str(), M.getMap()[KeyObj]);
+ }
+
+ static void output(IO &IO, MapDocNode &M) {
+ for (auto I : M.getMap()) {
+ IO.mapRequired(I.first.toString().c_str(), I.second);
+ }
+ }
+};
+
+/// YAMLIO for ArrayNode
+template <> struct SequenceTraits<ArrayDocNode> {
+
+ static size_t size(IO &IO, ArrayDocNode &A) { return A.size(); }
+
+ static DocNode &element(IO &IO, ArrayDocNode &A, size_t Index) {
+ return A[Index];
+ }
+};
+
+} // namespace yaml
+} // namespace llvm
+
+/// Convert MsgPack Document to YAML text.
+void msgpack::Document::toYAML(raw_ostream &OS) {
+ yaml::Output Yout(OS);
+ Yout << getRoot();
+}
+
+/// Read YAML text into the MsgPack document. Returns false on failure.
+bool msgpack::Document::fromYAML(StringRef S) {
+ clear();
+ yaml::Input Yin(S);
+ Yin >> getRoot();
+ return !Yin.error();
+}
+
diff --git a/lib/BinaryFormat/MsgPackReader.cpp b/lib/BinaryFormat/MsgPackReader.cpp
index b510fdba9608..872a6e0e29f8 100644
--- a/lib/BinaryFormat/MsgPackReader.cpp
+++ b/lib/BinaryFormat/MsgPackReader.cpp
@@ -1,9 +1,8 @@
//===- MsgPackReader.cpp - Simple MsgPack reader ----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/lib/BinaryFormat/MsgPackTypes.cpp b/lib/BinaryFormat/MsgPackTypes.cpp
deleted file mode 100644
index 4a8f70b10fb8..000000000000
--- a/lib/BinaryFormat/MsgPackTypes.cpp
+++ /dev/null
@@ -1,303 +0,0 @@
-//===- MsgPackTypes.cpp - MsgPack Types -------------------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// Implementation of types representing MessagePack "documents".
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/BinaryFormat/MsgPackTypes.h"
-#include "llvm/Support/Error.h"
-
-using namespace llvm;
-using namespace msgpack;
-
-namespace llvm {
-namespace msgpack {
-void ScalarNode::anchor() {}
-void ArrayNode::anchor() {}
-void MapNode::anchor() {}
-}
-}
-
-Expected<OptNodePtr> Node::readArray(Reader &MPReader, size_t Length) {
- auto A = std::make_shared<ArrayNode>();
- for (size_t I = 0; I < Length; ++I) {
- auto OptNodeOrErr = Node::read(MPReader);
- if (auto Err = OptNodeOrErr.takeError())
- return std::move(Err);
- if (!*OptNodeOrErr)
- return make_error<StringError>(
- "Insufficient array elements",
- std::make_error_code(std::errc::invalid_argument));
- A->push_back(std::move(**OptNodeOrErr));
- }
- return OptNodePtr(std::move(A));
-}
-
-Expected<OptNodePtr> Node::readMap(Reader &MPReader, size_t Length) {
- auto M = std::make_shared<MapNode>();
- for (size_t I = 0; I < Length; ++I) {
- auto OptKeyOrErr = Node::read(MPReader);
- if (auto Err = OptKeyOrErr.takeError())
- return std::move(Err);
- if (!*OptKeyOrErr)
- return make_error<StringError>(
- "Insufficient map elements",
- std::make_error_code(std::errc::invalid_argument));
- auto OptValOrErr = Node::read(MPReader);
- if (auto Err = OptValOrErr.takeError())
- return std::move(Err);
- if (!*OptValOrErr)
- return make_error<StringError>(
- "Insufficient map elements",
- std::make_error_code(std::errc::invalid_argument));
- auto *Key = dyn_cast<ScalarNode>((*OptKeyOrErr)->get());
- if (!Key)
- return make_error<StringError>(
- "Only string map keys are supported",
- std::make_error_code(std::errc::invalid_argument));
- if (Key->getScalarKind() != ScalarNode::SK_String)
- return make_error<StringError>(
- "Only string map keys are supported",
- std::make_error_code(std::errc::invalid_argument));
- M->try_emplace(Key->getString(), std::move(**OptValOrErr));
- }
- return OptNodePtr(std::move(M));
-}
-
-Expected<OptNodePtr> Node::read(Reader &MPReader) {
- Object Obj;
-
- auto ContinueOrErr = MPReader.read(Obj);
- if (auto Err = ContinueOrErr.takeError())
- return std::move(Err);
- if (!*ContinueOrErr)
- return None;
-
- switch (Obj.Kind) {
- case Type::Int:
- return OptNodePtr(std::make_shared<ScalarNode>(Obj.Int));
- case Type::UInt:
- return OptNodePtr(std::make_shared<ScalarNode>(Obj.UInt));
- case Type::Nil:
- return OptNodePtr(std::make_shared<ScalarNode>());
- case Type::Boolean:
- return OptNodePtr(std::make_shared<ScalarNode>(Obj.Bool));
- case Type::Float:
- return OptNodePtr(std::make_shared<ScalarNode>(Obj.Float));
- case Type::String:
- return OptNodePtr(std::make_shared<ScalarNode>(Obj.Raw));
- case Type::Binary:
- return OptNodePtr(std::make_shared<ScalarNode>(Obj.Raw));
- case Type::Array:
- return Node::readArray(MPReader, Obj.Length);
- case Type::Map:
- return Node::readMap(MPReader, Obj.Length);
- case Type::Extension:
- return make_error<StringError>(
- "Extension types are not supported",
- std::make_error_code(std::errc::invalid_argument));
- }
- llvm_unreachable("msgpack::Type not handled");
-}
-
-void ScalarNode::destroy() {
- switch (SKind) {
- case SK_String:
- case SK_Binary:
- StringValue.~basic_string();
- break;
- default:
- // POD types do not require destruction
- break;
- }
-}
-
-ScalarNode::ScalarNode(int64_t IntValue)
- : Node(NK_Scalar), SKind(SK_Int), IntValue(IntValue) {}
-
-ScalarNode::ScalarNode(int32_t IntValue)
- : ScalarNode(static_cast<int64_t>(IntValue)) {}
-
-ScalarNode::ScalarNode(uint64_t UIntValue)
- : Node(NK_Scalar), SKind(SK_UInt), UIntValue(UIntValue) {}
-
-ScalarNode::ScalarNode(uint32_t IntValue)
- : ScalarNode(static_cast<uint64_t>(IntValue)) {}
-
-ScalarNode::ScalarNode() : Node(NK_Scalar), SKind(SK_Nil) {}
-
-ScalarNode::ScalarNode(bool BoolValue)
- : Node(NK_Scalar), SKind(SK_Boolean), BoolValue(BoolValue) {}
-
-ScalarNode::ScalarNode(double FloatValue)
- : Node(NK_Scalar), SKind(SK_Float), BoolValue(FloatValue) {}
-
-ScalarNode::ScalarNode(StringRef StringValue)
- : Node(NK_Scalar), SKind(SK_String) {
- new (&this->StringValue) std::string(StringValue);
-}
-
-ScalarNode::ScalarNode(const char *StringValue)
- : ScalarNode(StringRef(StringValue)) {}
-
-ScalarNode::ScalarNode(std::string &&StringValue)
- : Node(NK_Scalar), SKind(SK_String) {
- new (&this->StringValue) std::string(StringValue);
-}
-
-ScalarNode::ScalarNode(MemoryBufferRef BinaryValue)
- : Node(NK_Scalar), SKind(SK_Binary) {
- new (&StringValue) std::string(BinaryValue.getBuffer());
-}
-
-ScalarNode::~ScalarNode() { destroy(); }
-
-ScalarNode &ScalarNode::operator=(ScalarNode &&RHS) {
- destroy();
- switch (SKind = RHS.SKind) {
- case SK_Int:
- IntValue = RHS.IntValue;
- break;
- case SK_UInt:
- UIntValue = RHS.UIntValue;
- break;
- case SK_Boolean:
- BoolValue = RHS.BoolValue;
- break;
- case SK_Float:
- FloatValue = RHS.FloatValue;
- break;
- case SK_String:
- case SK_Binary:
- new (&StringValue) std::string(std::move(RHS.StringValue));
- break;
- case SK_Nil:
- // pass
- break;
- }
- return *this;
-}
-
-StringRef ScalarNode::inputYAML(StringRef ScalarStr) {
- switch (SKind) {
- case SK_Int:
- return yaml::ScalarTraits<int64_t>::input(ScalarStr, nullptr, IntValue);
- case SK_UInt:
- return yaml::ScalarTraits<uint64_t>::input(ScalarStr, nullptr, UIntValue);
- case SK_Nil:
- return StringRef();
- case SK_Boolean:
- return yaml::ScalarTraits<bool>::input(ScalarStr, nullptr, BoolValue);
- case SK_Float:
- return yaml::ScalarTraits<double>::input(ScalarStr, nullptr, FloatValue);
- case SK_Binary:
- case SK_String:
- return yaml::ScalarTraits<std::string>::input(ScalarStr, nullptr,
- StringValue);
- }
- llvm_unreachable("unrecognized ScalarKind");
-}
-
-void ScalarNode::outputYAML(raw_ostream &OS) const {
- switch (SKind) {
- case SK_Int:
- yaml::ScalarTraits<int64_t>::output(IntValue, nullptr, OS);
- break;
- case SK_UInt:
- yaml::ScalarTraits<uint64_t>::output(UIntValue, nullptr, OS);
- break;
- case SK_Nil:
- yaml::ScalarTraits<StringRef>::output("", nullptr, OS);
- break;
- case SK_Boolean:
- yaml::ScalarTraits<bool>::output(BoolValue, nullptr, OS);
- break;
- case SK_Float:
- yaml::ScalarTraits<double>::output(FloatValue, nullptr, OS);
- break;
- case SK_Binary:
- case SK_String:
- yaml::ScalarTraits<std::string>::output(StringValue, nullptr, OS);
- break;
- }
-}
-
-yaml::QuotingType ScalarNode::mustQuoteYAML(StringRef ScalarStr) const {
- switch (SKind) {
- case SK_Int:
- return yaml::ScalarTraits<int64_t>::mustQuote(ScalarStr);
- case SK_UInt:
- return yaml::ScalarTraits<uint64_t>::mustQuote(ScalarStr);
- case SK_Nil:
- return yaml::ScalarTraits<StringRef>::mustQuote(ScalarStr);
- case SK_Boolean:
- return yaml::ScalarTraits<bool>::mustQuote(ScalarStr);
- case SK_Float:
- return yaml::ScalarTraits<double>::mustQuote(ScalarStr);
- case SK_Binary:
- case SK_String:
- return yaml::ScalarTraits<std::string>::mustQuote(ScalarStr);
- }
- llvm_unreachable("unrecognized ScalarKind");
-}
-
-const char *ScalarNode::IntTag = "!int";
-const char *ScalarNode::NilTag = "!nil";
-const char *ScalarNode::BooleanTag = "!bool";
-const char *ScalarNode::FloatTag = "!float";
-const char *ScalarNode::StringTag = "!str";
-const char *ScalarNode::BinaryTag = "!bin";
-
-StringRef ScalarNode::getYAMLTag() const {
- switch (SKind) {
- case SK_Int:
- return IntTag;
- case SK_UInt:
- return IntTag;
- case SK_Nil:
- return NilTag;
- case SK_Boolean:
- return BooleanTag;
- case SK_Float:
- return FloatTag;
- case SK_String:
- return StringTag;
- case SK_Binary:
- return BinaryTag;
- }
- llvm_unreachable("unrecognized ScalarKind");
-}
-
-void ScalarNode::write(Writer &MPWriter) {
- switch (SKind) {
- case SK_Int:
- MPWriter.write(IntValue);
- break;
- case SK_UInt:
- MPWriter.write(UIntValue);
- break;
- case SK_Nil:
- MPWriter.writeNil();
- break;
- case SK_Boolean:
- MPWriter.write(BoolValue);
- break;
- case SK_Float:
- MPWriter.write(FloatValue);
- break;
- case SK_String:
- MPWriter.write(StringValue);
- break;
- case SK_Binary:
- MPWriter.write(MemoryBufferRef(StringValue, ""));
- break;
- }
-}
diff --git a/lib/BinaryFormat/MsgPackWriter.cpp b/lib/BinaryFormat/MsgPackWriter.cpp
index d024bb0fcdb2..b4d70e8f78c1 100644
--- a/lib/BinaryFormat/MsgPackWriter.cpp
+++ b/lib/BinaryFormat/MsgPackWriter.cpp
@@ -1,9 +1,8 @@
//===- MsgPackWriter.cpp - Simple MsgPack writer ----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/lib/BinaryFormat/Wasm.cpp b/lib/BinaryFormat/Wasm.cpp
index 94d40bf02a39..d46be481edb3 100644
--- a/lib/BinaryFormat/Wasm.cpp
+++ b/lib/BinaryFormat/Wasm.cpp
@@ -1,16 +1,15 @@
//===-- llvm/BinaryFormat/Wasm.cpp -------------------------------*- C++-*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "llvm/BinaryFormat/Wasm.h"
-std::string llvm::wasm::toString(wasm::WasmSymbolType type) {
- switch (type) {
+std::string llvm::wasm::toString(wasm::WasmSymbolType Type) {
+ switch (Type) {
case wasm::WASM_SYMBOL_TYPE_FUNCTION:
return "WASM_SYMBOL_TYPE_FUNCTION";
case wasm::WASM_SYMBOL_TYPE_GLOBAL:
@@ -25,8 +24,8 @@ std::string llvm::wasm::toString(wasm::WasmSymbolType type) {
llvm_unreachable("unknown symbol type");
}
-std::string llvm::wasm::relocTypetoString(uint32_t type) {
- switch (type) {
+std::string llvm::wasm::relocTypetoString(uint32_t Type) {
+ switch (Type) {
#define WASM_RELOC(NAME, VALUE) \
case VALUE: \
return #NAME;
@@ -36,3 +35,17 @@ std::string llvm::wasm::relocTypetoString(uint32_t type) {
llvm_unreachable("unknown reloc type");
}
}
+
+bool llvm::wasm::relocTypeHasAddend(uint32_t Type) {
+ switch (Type) {
+ case R_WASM_MEMORY_ADDR_LEB:
+ case R_WASM_MEMORY_ADDR_SLEB:
+ case R_WASM_MEMORY_ADDR_REL_SLEB:
+ case R_WASM_MEMORY_ADDR_I32:
+ case R_WASM_FUNCTION_OFFSET_I32:
+ case R_WASM_SECTION_OFFSET_I32:
+ return true;
+ default:
+ return false;
+ }
+}
diff --git a/lib/Bitcode/Reader/BitReader.cpp b/lib/Bitcode/Reader/BitReader.cpp
index 3ec45956b3e5..5ac893aef14e 100644
--- a/lib/Bitcode/Reader/BitReader.cpp
+++ b/lib/Bitcode/Reader/BitReader.cpp
@@ -1,9 +1,8 @@
//===-- BitReader.cpp -----------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Bitcode/Reader/BitcodeAnalyzer.cpp b/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
new file mode 100644
index 000000000000..9c30d563a314
--- /dev/null
+++ b/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
@@ -0,0 +1,980 @@
+//===- BitcodeAnalyzer.cpp - Internal BitcodeAnalyzer implementation ------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Bitcode/BitcodeAnalyzer.h"
+#include "llvm/Bitcode/BitcodeReader.h"
+#include "llvm/Bitcode/LLVMBitCodes.h"
+#include "llvm/Bitstream/BitCodes.h"
+#include "llvm/Bitstream/BitstreamReader.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/SHA1.h"
+
+using namespace llvm;
+
+static Error reportError(StringRef Message) {
+ return createStringError(std::errc::illegal_byte_sequence, Message.data());
+}
+
+/// Return a symbolic block name if known, otherwise return null.
+static Optional<const char *> GetBlockName(unsigned BlockID,
+ const BitstreamBlockInfo &BlockInfo,
+ CurStreamTypeType CurStreamType) {
+ // Standard blocks for all bitcode files.
+ if (BlockID < bitc::FIRST_APPLICATION_BLOCKID) {
+ if (BlockID == bitc::BLOCKINFO_BLOCK_ID)
+ return "BLOCKINFO_BLOCK";
+ return None;
+ }
+
+ // Check to see if we have a blockinfo record for this block, with a name.
+ if (const BitstreamBlockInfo::BlockInfo *Info =
+ BlockInfo.getBlockInfo(BlockID)) {
+ if (!Info->Name.empty())
+ return Info->Name.c_str();
+ }
+
+ if (CurStreamType != LLVMIRBitstream)
+ return None;
+
+ switch (BlockID) {
+ default:
+ return None;
+ case bitc::OPERAND_BUNDLE_TAGS_BLOCK_ID:
+ return "OPERAND_BUNDLE_TAGS_BLOCK";
+ case bitc::MODULE_BLOCK_ID:
+ return "MODULE_BLOCK";
+ case bitc::PARAMATTR_BLOCK_ID:
+ return "PARAMATTR_BLOCK";
+ case bitc::PARAMATTR_GROUP_BLOCK_ID:
+ return "PARAMATTR_GROUP_BLOCK_ID";
+ case bitc::TYPE_BLOCK_ID_NEW:
+ return "TYPE_BLOCK_ID";
+ case bitc::CONSTANTS_BLOCK_ID:
+ return "CONSTANTS_BLOCK";
+ case bitc::FUNCTION_BLOCK_ID:
+ return "FUNCTION_BLOCK";
+ case bitc::IDENTIFICATION_BLOCK_ID:
+ return "IDENTIFICATION_BLOCK_ID";
+ case bitc::VALUE_SYMTAB_BLOCK_ID:
+ return "VALUE_SYMTAB";
+ case bitc::METADATA_BLOCK_ID:
+ return "METADATA_BLOCK";
+ case bitc::METADATA_KIND_BLOCK_ID:
+ return "METADATA_KIND_BLOCK";
+ case bitc::METADATA_ATTACHMENT_ID:
+ return "METADATA_ATTACHMENT_BLOCK";
+ case bitc::USELIST_BLOCK_ID:
+ return "USELIST_BLOCK_ID";
+ case bitc::GLOBALVAL_SUMMARY_BLOCK_ID:
+ return "GLOBALVAL_SUMMARY_BLOCK";
+ case bitc::FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID:
+ return "FULL_LTO_GLOBALVAL_SUMMARY_BLOCK";
+ case bitc::MODULE_STRTAB_BLOCK_ID:
+ return "MODULE_STRTAB_BLOCK";
+ case bitc::STRTAB_BLOCK_ID:
+ return "STRTAB_BLOCK";
+ case bitc::SYMTAB_BLOCK_ID:
+ return "SYMTAB_BLOCK";
+ }
+}
+
+/// Return a symbolic code name if known, otherwise return null.
+static Optional<const char *> GetCodeName(unsigned CodeID, unsigned BlockID,
+ const BitstreamBlockInfo &BlockInfo,
+ CurStreamTypeType CurStreamType) {
+ // Standard blocks for all bitcode files.
+ if (BlockID < bitc::FIRST_APPLICATION_BLOCKID) {
+ if (BlockID == bitc::BLOCKINFO_BLOCK_ID) {
+ switch (CodeID) {
+ default:
+ return None;
+ case bitc::BLOCKINFO_CODE_SETBID:
+ return "SETBID";
+ case bitc::BLOCKINFO_CODE_BLOCKNAME:
+ return "BLOCKNAME";
+ case bitc::BLOCKINFO_CODE_SETRECORDNAME:
+ return "SETRECORDNAME";
+ }
+ }
+ return None;
+ }
+
+ // Check to see if we have a blockinfo record for this record, with a name.
+ if (const BitstreamBlockInfo::BlockInfo *Info =
+ BlockInfo.getBlockInfo(BlockID)) {
+ for (unsigned i = 0, e = Info->RecordNames.size(); i != e; ++i)
+ if (Info->RecordNames[i].first == CodeID)
+ return Info->RecordNames[i].second.c_str();
+ }
+
+ if (CurStreamType != LLVMIRBitstream)
+ return None;
+
+#define STRINGIFY_CODE(PREFIX, CODE) \
+ case bitc::PREFIX##_##CODE: \
+ return #CODE;
+ switch (BlockID) {
+ default:
+ return None;
+ case bitc::MODULE_BLOCK_ID:
+ switch (CodeID) {
+ default:
+ return None;
+ STRINGIFY_CODE(MODULE_CODE, VERSION)
+ STRINGIFY_CODE(MODULE_CODE, TRIPLE)
+ STRINGIFY_CODE(MODULE_CODE, DATALAYOUT)
+ STRINGIFY_CODE(MODULE_CODE, ASM)
+ STRINGIFY_CODE(MODULE_CODE, SECTIONNAME)
+ STRINGIFY_CODE(MODULE_CODE, DEPLIB) // FIXME: Remove in 4.0
+ STRINGIFY_CODE(MODULE_CODE, GLOBALVAR)
+ STRINGIFY_CODE(MODULE_CODE, FUNCTION)
+ STRINGIFY_CODE(MODULE_CODE, ALIAS)
+ STRINGIFY_CODE(MODULE_CODE, GCNAME)
+ STRINGIFY_CODE(MODULE_CODE, VSTOFFSET)
+ STRINGIFY_CODE(MODULE_CODE, METADATA_VALUES_UNUSED)
+ STRINGIFY_CODE(MODULE_CODE, SOURCE_FILENAME)
+ STRINGIFY_CODE(MODULE_CODE, HASH)
+ }
+ case bitc::IDENTIFICATION_BLOCK_ID:
+ switch (CodeID) {
+ default:
+ return None;
+ STRINGIFY_CODE(IDENTIFICATION_CODE, STRING)
+ STRINGIFY_CODE(IDENTIFICATION_CODE, EPOCH)
+ }
+ case bitc::PARAMATTR_BLOCK_ID:
+ switch (CodeID) {
+ default:
+ return None;
+ // FIXME: Should these be different?
+ case bitc::PARAMATTR_CODE_ENTRY_OLD:
+ return "ENTRY";
+ case bitc::PARAMATTR_CODE_ENTRY:
+ return "ENTRY";
+ }
+ case bitc::PARAMATTR_GROUP_BLOCK_ID:
+ switch (CodeID) {
+ default:
+ return None;
+ case bitc::PARAMATTR_GRP_CODE_ENTRY:
+ return "ENTRY";
+ }
+ case bitc::TYPE_BLOCK_ID_NEW:
+ switch (CodeID) {
+ default:
+ return None;
+ STRINGIFY_CODE(TYPE_CODE, NUMENTRY)
+ STRINGIFY_CODE(TYPE_CODE, VOID)
+ STRINGIFY_CODE(TYPE_CODE, FLOAT)
+ STRINGIFY_CODE(TYPE_CODE, DOUBLE)
+ STRINGIFY_CODE(TYPE_CODE, LABEL)
+ STRINGIFY_CODE(TYPE_CODE, OPAQUE)
+ STRINGIFY_CODE(TYPE_CODE, INTEGER)
+ STRINGIFY_CODE(TYPE_CODE, POINTER)
+ STRINGIFY_CODE(TYPE_CODE, ARRAY)
+ STRINGIFY_CODE(TYPE_CODE, VECTOR)
+ STRINGIFY_CODE(TYPE_CODE, X86_FP80)
+ STRINGIFY_CODE(TYPE_CODE, FP128)
+ STRINGIFY_CODE(TYPE_CODE, PPC_FP128)
+ STRINGIFY_CODE(TYPE_CODE, METADATA)
+ STRINGIFY_CODE(TYPE_CODE, STRUCT_ANON)
+ STRINGIFY_CODE(TYPE_CODE, STRUCT_NAME)
+ STRINGIFY_CODE(TYPE_CODE, STRUCT_NAMED)
+ STRINGIFY_CODE(TYPE_CODE, FUNCTION)
+ }
+
+ case bitc::CONSTANTS_BLOCK_ID:
+ switch (CodeID) {
+ default:
+ return None;
+ STRINGIFY_CODE(CST_CODE, SETTYPE)
+ STRINGIFY_CODE(CST_CODE, NULL)
+ STRINGIFY_CODE(CST_CODE, UNDEF)
+ STRINGIFY_CODE(CST_CODE, INTEGER)
+ STRINGIFY_CODE(CST_CODE, WIDE_INTEGER)
+ STRINGIFY_CODE(CST_CODE, FLOAT)
+ STRINGIFY_CODE(CST_CODE, AGGREGATE)
+ STRINGIFY_CODE(CST_CODE, STRING)
+ STRINGIFY_CODE(CST_CODE, CSTRING)
+ STRINGIFY_CODE(CST_CODE, CE_BINOP)
+ STRINGIFY_CODE(CST_CODE, CE_CAST)
+ STRINGIFY_CODE(CST_CODE, CE_GEP)
+ STRINGIFY_CODE(CST_CODE, CE_INBOUNDS_GEP)
+ STRINGIFY_CODE(CST_CODE, CE_SELECT)
+ STRINGIFY_CODE(CST_CODE, CE_EXTRACTELT)
+ STRINGIFY_CODE(CST_CODE, CE_INSERTELT)
+ STRINGIFY_CODE(CST_CODE, CE_SHUFFLEVEC)
+ STRINGIFY_CODE(CST_CODE, CE_CMP)
+ STRINGIFY_CODE(CST_CODE, INLINEASM)
+ STRINGIFY_CODE(CST_CODE, CE_SHUFVEC_EX)
+ STRINGIFY_CODE(CST_CODE, CE_UNOP)
+ case bitc::CST_CODE_BLOCKADDRESS:
+ return "CST_CODE_BLOCKADDRESS";
+ STRINGIFY_CODE(CST_CODE, DATA)
+ }
+ case bitc::FUNCTION_BLOCK_ID:
+ switch (CodeID) {
+ default:
+ return None;
+ STRINGIFY_CODE(FUNC_CODE, DECLAREBLOCKS)
+ STRINGIFY_CODE(FUNC_CODE, INST_BINOP)
+ STRINGIFY_CODE(FUNC_CODE, INST_CAST)
+ STRINGIFY_CODE(FUNC_CODE, INST_GEP_OLD)
+ STRINGIFY_CODE(FUNC_CODE, INST_INBOUNDS_GEP_OLD)
+ STRINGIFY_CODE(FUNC_CODE, INST_SELECT)
+ STRINGIFY_CODE(FUNC_CODE, INST_EXTRACTELT)
+ STRINGIFY_CODE(FUNC_CODE, INST_INSERTELT)
+ STRINGIFY_CODE(FUNC_CODE, INST_SHUFFLEVEC)
+ STRINGIFY_CODE(FUNC_CODE, INST_CMP)
+ STRINGIFY_CODE(FUNC_CODE, INST_RET)
+ STRINGIFY_CODE(FUNC_CODE, INST_BR)
+ STRINGIFY_CODE(FUNC_CODE, INST_SWITCH)
+ STRINGIFY_CODE(FUNC_CODE, INST_INVOKE)
+ STRINGIFY_CODE(FUNC_CODE, INST_UNOP)
+ STRINGIFY_CODE(FUNC_CODE, INST_UNREACHABLE)
+ STRINGIFY_CODE(FUNC_CODE, INST_CLEANUPRET)
+ STRINGIFY_CODE(FUNC_CODE, INST_CATCHRET)
+ STRINGIFY_CODE(FUNC_CODE, INST_CATCHPAD)
+ STRINGIFY_CODE(FUNC_CODE, INST_PHI)
+ STRINGIFY_CODE(FUNC_CODE, INST_ALLOCA)
+ STRINGIFY_CODE(FUNC_CODE, INST_LOAD)
+ STRINGIFY_CODE(FUNC_CODE, INST_VAARG)
+ STRINGIFY_CODE(FUNC_CODE, INST_STORE)
+ STRINGIFY_CODE(FUNC_CODE, INST_EXTRACTVAL)
+ STRINGIFY_CODE(FUNC_CODE, INST_INSERTVAL)
+ STRINGIFY_CODE(FUNC_CODE, INST_CMP2)
+ STRINGIFY_CODE(FUNC_CODE, INST_VSELECT)
+ STRINGIFY_CODE(FUNC_CODE, DEBUG_LOC_AGAIN)
+ STRINGIFY_CODE(FUNC_CODE, INST_CALL)
+ STRINGIFY_CODE(FUNC_CODE, DEBUG_LOC)
+ STRINGIFY_CODE(FUNC_CODE, INST_GEP)
+ STRINGIFY_CODE(FUNC_CODE, OPERAND_BUNDLE)
+ STRINGIFY_CODE(FUNC_CODE, INST_FENCE)
+ STRINGIFY_CODE(FUNC_CODE, INST_ATOMICRMW)
+ STRINGIFY_CODE(FUNC_CODE, INST_LOADATOMIC)
+ STRINGIFY_CODE(FUNC_CODE, INST_STOREATOMIC)
+ STRINGIFY_CODE(FUNC_CODE, INST_CMPXCHG)
+ STRINGIFY_CODE(FUNC_CODE, INST_CALLBR)
+ }
+ case bitc::VALUE_SYMTAB_BLOCK_ID:
+ switch (CodeID) {
+ default:
+ return None;
+ STRINGIFY_CODE(VST_CODE, ENTRY)
+ STRINGIFY_CODE(VST_CODE, BBENTRY)
+ STRINGIFY_CODE(VST_CODE, FNENTRY)
+ STRINGIFY_CODE(VST_CODE, COMBINED_ENTRY)
+ }
+ case bitc::MODULE_STRTAB_BLOCK_ID:
+ switch (CodeID) {
+ default:
+ return None;
+ STRINGIFY_CODE(MST_CODE, ENTRY)
+ STRINGIFY_CODE(MST_CODE, HASH)
+ }
+ case bitc::GLOBALVAL_SUMMARY_BLOCK_ID:
+ case bitc::FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID:
+ switch (CodeID) {
+ default:
+ return None;
+ STRINGIFY_CODE(FS, PERMODULE)
+ STRINGIFY_CODE(FS, PERMODULE_PROFILE)
+ STRINGIFY_CODE(FS, PERMODULE_RELBF)
+ STRINGIFY_CODE(FS, PERMODULE_GLOBALVAR_INIT_REFS)
+ STRINGIFY_CODE(FS, PERMODULE_VTABLE_GLOBALVAR_INIT_REFS)
+ STRINGIFY_CODE(FS, COMBINED)
+ STRINGIFY_CODE(FS, COMBINED_PROFILE)
+ STRINGIFY_CODE(FS, COMBINED_GLOBALVAR_INIT_REFS)
+ STRINGIFY_CODE(FS, ALIAS)
+ STRINGIFY_CODE(FS, COMBINED_ALIAS)
+ STRINGIFY_CODE(FS, COMBINED_ORIGINAL_NAME)
+ STRINGIFY_CODE(FS, VERSION)
+ STRINGIFY_CODE(FS, FLAGS)
+ STRINGIFY_CODE(FS, TYPE_TESTS)
+ STRINGIFY_CODE(FS, TYPE_TEST_ASSUME_VCALLS)
+ STRINGIFY_CODE(FS, TYPE_CHECKED_LOAD_VCALLS)
+ STRINGIFY_CODE(FS, TYPE_TEST_ASSUME_CONST_VCALL)
+ STRINGIFY_CODE(FS, TYPE_CHECKED_LOAD_CONST_VCALL)
+ STRINGIFY_CODE(FS, VALUE_GUID)
+ STRINGIFY_CODE(FS, CFI_FUNCTION_DEFS)
+ STRINGIFY_CODE(FS, CFI_FUNCTION_DECLS)
+ STRINGIFY_CODE(FS, TYPE_ID)
+ STRINGIFY_CODE(FS, TYPE_ID_METADATA)
+ }
+ case bitc::METADATA_ATTACHMENT_ID:
+ switch (CodeID) {
+ default:
+ return None;
+ STRINGIFY_CODE(METADATA, ATTACHMENT)
+ }
+ case bitc::METADATA_BLOCK_ID:
+ switch (CodeID) {
+ default:
+ return None;
+ STRINGIFY_CODE(METADATA, STRING_OLD)
+ STRINGIFY_CODE(METADATA, VALUE)
+ STRINGIFY_CODE(METADATA, NODE)
+ STRINGIFY_CODE(METADATA, NAME)
+ STRINGIFY_CODE(METADATA, DISTINCT_NODE)
+ STRINGIFY_CODE(METADATA, KIND) // Older bitcode has it in a MODULE_BLOCK
+ STRINGIFY_CODE(METADATA, LOCATION)
+ STRINGIFY_CODE(METADATA, OLD_NODE)
+ STRINGIFY_CODE(METADATA, OLD_FN_NODE)
+ STRINGIFY_CODE(METADATA, NAMED_NODE)
+ STRINGIFY_CODE(METADATA, GENERIC_DEBUG)
+ STRINGIFY_CODE(METADATA, SUBRANGE)
+ STRINGIFY_CODE(METADATA, ENUMERATOR)
+ STRINGIFY_CODE(METADATA, BASIC_TYPE)
+ STRINGIFY_CODE(METADATA, FILE)
+ STRINGIFY_CODE(METADATA, DERIVED_TYPE)
+ STRINGIFY_CODE(METADATA, COMPOSITE_TYPE)
+ STRINGIFY_CODE(METADATA, SUBROUTINE_TYPE)
+ STRINGIFY_CODE(METADATA, COMPILE_UNIT)
+ STRINGIFY_CODE(METADATA, SUBPROGRAM)
+ STRINGIFY_CODE(METADATA, LEXICAL_BLOCK)
+ STRINGIFY_CODE(METADATA, LEXICAL_BLOCK_FILE)
+ STRINGIFY_CODE(METADATA, NAMESPACE)
+ STRINGIFY_CODE(METADATA, TEMPLATE_TYPE)
+ STRINGIFY_CODE(METADATA, TEMPLATE_VALUE)
+ STRINGIFY_CODE(METADATA, GLOBAL_VAR)
+ STRINGIFY_CODE(METADATA, LOCAL_VAR)
+ STRINGIFY_CODE(METADATA, EXPRESSION)
+ STRINGIFY_CODE(METADATA, OBJC_PROPERTY)
+ STRINGIFY_CODE(METADATA, IMPORTED_ENTITY)
+ STRINGIFY_CODE(METADATA, MODULE)
+ STRINGIFY_CODE(METADATA, MACRO)
+ STRINGIFY_CODE(METADATA, MACRO_FILE)
+ STRINGIFY_CODE(METADATA, STRINGS)
+ STRINGIFY_CODE(METADATA, GLOBAL_DECL_ATTACHMENT)
+ STRINGIFY_CODE(METADATA, GLOBAL_VAR_EXPR)
+ STRINGIFY_CODE(METADATA, INDEX_OFFSET)
+ STRINGIFY_CODE(METADATA, INDEX)
+ }
+ case bitc::METADATA_KIND_BLOCK_ID:
+ switch (CodeID) {
+ default:
+ return None;
+ STRINGIFY_CODE(METADATA, KIND)
+ }
+ case bitc::USELIST_BLOCK_ID:
+ switch (CodeID) {
+ default:
+ return None;
+ case bitc::USELIST_CODE_DEFAULT:
+ return "USELIST_CODE_DEFAULT";
+ case bitc::USELIST_CODE_BB:
+ return "USELIST_CODE_BB";
+ }
+
+ case bitc::OPERAND_BUNDLE_TAGS_BLOCK_ID:
+ switch (CodeID) {
+ default:
+ return None;
+ case bitc::OPERAND_BUNDLE_TAG:
+ return "OPERAND_BUNDLE_TAG";
+ }
+ case bitc::STRTAB_BLOCK_ID:
+ switch (CodeID) {
+ default:
+ return None;
+ case bitc::STRTAB_BLOB:
+ return "BLOB";
+ }
+ case bitc::SYMTAB_BLOCK_ID:
+ switch (CodeID) {
+ default:
+ return None;
+ case bitc::SYMTAB_BLOB:
+ return "BLOB";
+ }
+ }
+#undef STRINGIFY_CODE
+}
+
+static void printSize(raw_ostream &OS, double Bits) {
+ OS << format("%.2f/%.2fB/%luW", Bits, Bits / 8, (unsigned long)(Bits / 32));
+}
+static void printSize(raw_ostream &OS, uint64_t Bits) {
+ OS << format("%lub/%.2fB/%luW", (unsigned long)Bits, (double)Bits / 8,
+ (unsigned long)(Bits / 32));
+}
+
+static Expected<CurStreamTypeType> ReadSignature(BitstreamCursor &Stream) {
+ auto tryRead = [&Stream](char &Dest, size_t size) -> Error {
+ if (Expected<SimpleBitstreamCursor::word_t> MaybeWord = Stream.Read(size))
+ Dest = MaybeWord.get();
+ else
+ return MaybeWord.takeError();
+ return Error::success();
+ };
+
+ char Signature[6];
+ if (Error Err = tryRead(Signature[0], 8))
+ return std::move(Err);
+ if (Error Err = tryRead(Signature[1], 8))
+ return std::move(Err);
+
+ // Autodetect the file contents, if it is one we know.
+ if (Signature[0] == 'C' && Signature[1] == 'P') {
+ if (Error Err = tryRead(Signature[2], 8))
+ return std::move(Err);
+ if (Error Err = tryRead(Signature[3], 8))
+ return std::move(Err);
+ if (Signature[2] == 'C' && Signature[3] == 'H')
+ return ClangSerializedASTBitstream;
+ } else if (Signature[0] == 'D' && Signature[1] == 'I') {
+ if (Error Err = tryRead(Signature[2], 8))
+ return std::move(Err);
+ if (Error Err = tryRead(Signature[3], 8))
+ return std::move(Err);
+ if (Signature[2] == 'A' && Signature[3] == 'G')
+ return ClangSerializedDiagnosticsBitstream;
+ } else {
+ if (Error Err = tryRead(Signature[2], 4))
+ return std::move(Err);
+ if (Error Err = tryRead(Signature[3], 4))
+ return std::move(Err);
+ if (Error Err = tryRead(Signature[4], 4))
+ return std::move(Err);
+ if (Error Err = tryRead(Signature[5], 4))
+ return std::move(Err);
+ if (Signature[0] == 'B' && Signature[1] == 'C' && Signature[2] == 0x0 &&
+ Signature[3] == 0xC && Signature[4] == 0xE && Signature[5] == 0xD)
+ return LLVMIRBitstream;
+ }
+ return UnknownBitstream;
+}
+
+static Expected<CurStreamTypeType> analyzeHeader(Optional<BCDumpOptions> O,
+ BitstreamCursor &Stream) {
+ ArrayRef<uint8_t> Bytes = Stream.getBitcodeBytes();
+ const unsigned char *BufPtr = (const unsigned char *)Bytes.data();
+ const unsigned char *EndBufPtr = BufPtr + Bytes.size();
+
+ // If we have a wrapper header, parse it and ignore the non-bc file
+ // contents. The magic number is 0x0B17C0DE stored in little endian.
+ if (isBitcodeWrapper(BufPtr, EndBufPtr)) {
+ if (Bytes.size() < BWH_HeaderSize)
+ return reportError("Invalid bitcode wrapper header");
+
+ if (O) {
+ unsigned Magic = support::endian::read32le(&BufPtr[BWH_MagicField]);
+ unsigned Version = support::endian::read32le(&BufPtr[BWH_VersionField]);
+ unsigned Offset = support::endian::read32le(&BufPtr[BWH_OffsetField]);
+ unsigned Size = support::endian::read32le(&BufPtr[BWH_SizeField]);
+ unsigned CPUType = support::endian::read32le(&BufPtr[BWH_CPUTypeField]);
+
+ O->OS << "<BITCODE_WRAPPER_HEADER"
+ << " Magic=" << format_hex(Magic, 10)
+ << " Version=" << format_hex(Version, 10)
+ << " Offset=" << format_hex(Offset, 10)
+ << " Size=" << format_hex(Size, 10)
+ << " CPUType=" << format_hex(CPUType, 10) << "/>\n";
+ }
+
+ if (SkipBitcodeWrapperHeader(BufPtr, EndBufPtr, true))
+ return reportError("Invalid bitcode wrapper header");
+ }
+
+ // Use the cursor modified by skipping the wrapper header.
+ Stream = BitstreamCursor(ArrayRef<uint8_t>(BufPtr, EndBufPtr));
+
+ return ReadSignature(Stream);
+}
+
+static bool canDecodeBlob(unsigned Code, unsigned BlockID) {
+ return BlockID == bitc::METADATA_BLOCK_ID && Code == bitc::METADATA_STRINGS;
+}
+
+Error BitcodeAnalyzer::decodeMetadataStringsBlob(StringRef Indent,
+ ArrayRef<uint64_t> Record,
+ StringRef Blob,
+ raw_ostream &OS) {
+ if (Blob.empty())
+ return reportError("Cannot decode empty blob.");
+
+ if (Record.size() != 2)
+ return reportError(
+ "Decoding metadata strings blob needs two record entries.");
+
+ unsigned NumStrings = Record[0];
+ unsigned StringsOffset = Record[1];
+ OS << " num-strings = " << NumStrings << " {\n";
+
+ StringRef Lengths = Blob.slice(0, StringsOffset);
+ SimpleBitstreamCursor R(Lengths);
+ StringRef Strings = Blob.drop_front(StringsOffset);
+ do {
+ if (R.AtEndOfStream())
+ return reportError("bad length");
+
+ Expected<uint32_t> MaybeSize = R.ReadVBR(6);
+ if (!MaybeSize)
+ return MaybeSize.takeError();
+ uint32_t Size = MaybeSize.get();
+ if (Strings.size() < Size)
+ return reportError("truncated chars");
+
+ OS << Indent << " '";
+ OS.write_escaped(Strings.slice(0, Size), /*hex=*/true);
+ OS << "'\n";
+ Strings = Strings.drop_front(Size);
+ } while (--NumStrings);
+
+ OS << Indent << " }";
+ return Error::success();
+}
+
+BitcodeAnalyzer::BitcodeAnalyzer(StringRef Buffer,
+ Optional<StringRef> BlockInfoBuffer)
+ : Stream(Buffer) {
+ if (BlockInfoBuffer)
+ BlockInfoStream.emplace(*BlockInfoBuffer);
+}
+
+Error BitcodeAnalyzer::analyze(Optional<BCDumpOptions> O,
+ Optional<StringRef> CheckHash) {
+ Expected<CurStreamTypeType> MaybeType = analyzeHeader(O, Stream);
+ if (!MaybeType)
+ return MaybeType.takeError();
+ else
+ CurStreamType = *MaybeType;
+
+ Stream.setBlockInfo(&BlockInfo);
+
+ // Read block info from BlockInfoStream, if specified.
+ // The block info must be a top-level block.
+ if (BlockInfoStream) {
+ BitstreamCursor BlockInfoCursor(*BlockInfoStream);
+ Expected<CurStreamTypeType> H = analyzeHeader(O, BlockInfoCursor);
+ if (!H)
+ return H.takeError();
+
+ while (!BlockInfoCursor.AtEndOfStream()) {
+ Expected<unsigned> MaybeCode = BlockInfoCursor.ReadCode();
+ if (!MaybeCode)
+ return MaybeCode.takeError();
+ if (MaybeCode.get() != bitc::ENTER_SUBBLOCK)
+ return reportError("Invalid record at top-level in block info file");
+
+ Expected<unsigned> MaybeBlockID = BlockInfoCursor.ReadSubBlockID();
+ if (!MaybeBlockID)
+ return MaybeBlockID.takeError();
+ if (MaybeBlockID.get() == bitc::BLOCKINFO_BLOCK_ID) {
+ Expected<Optional<BitstreamBlockInfo>> MaybeNewBlockInfo =
+ BlockInfoCursor.ReadBlockInfoBlock(/*ReadBlockInfoNames=*/true);
+ if (!MaybeNewBlockInfo)
+ return MaybeNewBlockInfo.takeError();
+ Optional<BitstreamBlockInfo> NewBlockInfo =
+ std::move(MaybeNewBlockInfo.get());
+ if (!NewBlockInfo)
+ return reportError("Malformed BlockInfoBlock in block info file");
+ BlockInfo = std::move(*NewBlockInfo);
+ break;
+ }
+
+ if (Error Err = BlockInfoCursor.SkipBlock())
+ return Err;
+ }
+ }
+
+ // Parse the top-level structure. We only allow blocks at the top-level.
+ while (!Stream.AtEndOfStream()) {
+ Expected<unsigned> MaybeCode = Stream.ReadCode();
+ if (!MaybeCode)
+ return MaybeCode.takeError();
+ if (MaybeCode.get() != bitc::ENTER_SUBBLOCK)
+ return reportError("Invalid record at top-level");
+
+ Expected<unsigned> MaybeBlockID = Stream.ReadSubBlockID();
+ if (!MaybeBlockID)
+ return MaybeBlockID.takeError();
+
+ if (Error E = parseBlock(MaybeBlockID.get(), 0, O, CheckHash))
+ return E;
+ ++NumTopBlocks;
+ }
+
+ return Error::success();
+}
+
+void BitcodeAnalyzer::printStats(BCDumpOptions O,
+ Optional<StringRef> Filename) {
+ uint64_t BufferSizeBits = Stream.getBitcodeBytes().size() * CHAR_BIT;
+ // Print a summary of the read file.
+ O.OS << "Summary ";
+ if (Filename)
+ O.OS << "of " << Filename->data() << ":\n";
+ O.OS << " Total size: ";
+ printSize(O.OS, BufferSizeBits);
+ O.OS << "\n";
+ O.OS << " Stream type: ";
+ switch (CurStreamType) {
+ case UnknownBitstream:
+ O.OS << "unknown\n";
+ break;
+ case LLVMIRBitstream:
+ O.OS << "LLVM IR\n";
+ break;
+ case ClangSerializedASTBitstream:
+ O.OS << "Clang Serialized AST\n";
+ break;
+ case ClangSerializedDiagnosticsBitstream:
+ O.OS << "Clang Serialized Diagnostics\n";
+ break;
+ }
+ O.OS << " # Toplevel Blocks: " << NumTopBlocks << "\n";
+ O.OS << "\n";
+
+ // Emit per-block stats.
+ O.OS << "Per-block Summary:\n";
+ for (std::map<unsigned, PerBlockIDStats>::iterator I = BlockIDStats.begin(),
+ E = BlockIDStats.end();
+ I != E; ++I) {
+ O.OS << " Block ID #" << I->first;
+ if (Optional<const char *> BlockName =
+ GetBlockName(I->first, BlockInfo, CurStreamType))
+ O.OS << " (" << *BlockName << ")";
+ O.OS << ":\n";
+
+ const PerBlockIDStats &Stats = I->second;
+ O.OS << " Num Instances: " << Stats.NumInstances << "\n";
+ O.OS << " Total Size: ";
+ printSize(O.OS, Stats.NumBits);
+ O.OS << "\n";
+ double pct = (Stats.NumBits * 100.0) / BufferSizeBits;
+ O.OS << " Percent of file: " << format("%2.4f%%", pct) << "\n";
+ if (Stats.NumInstances > 1) {
+ O.OS << " Average Size: ";
+ printSize(O.OS, Stats.NumBits / (double)Stats.NumInstances);
+ O.OS << "\n";
+ O.OS << " Tot/Avg SubBlocks: " << Stats.NumSubBlocks << "/"
+ << Stats.NumSubBlocks / (double)Stats.NumInstances << "\n";
+ O.OS << " Tot/Avg Abbrevs: " << Stats.NumAbbrevs << "/"
+ << Stats.NumAbbrevs / (double)Stats.NumInstances << "\n";
+ O.OS << " Tot/Avg Records: " << Stats.NumRecords << "/"
+ << Stats.NumRecords / (double)Stats.NumInstances << "\n";
+ } else {
+ O.OS << " Num SubBlocks: " << Stats.NumSubBlocks << "\n";
+ O.OS << " Num Abbrevs: " << Stats.NumAbbrevs << "\n";
+ O.OS << " Num Records: " << Stats.NumRecords << "\n";
+ }
+ if (Stats.NumRecords) {
+ double pct = (Stats.NumAbbreviatedRecords * 100.0) / Stats.NumRecords;
+ O.OS << " Percent Abbrevs: " << format("%2.4f%%", pct) << "\n";
+ }
+ O.OS << "\n";
+
+ // Print a histogram of the codes we see.
+ if (O.Histogram && !Stats.CodeFreq.empty()) {
+ std::vector<std::pair<unsigned, unsigned>> FreqPairs; // <freq,code>
+ for (unsigned i = 0, e = Stats.CodeFreq.size(); i != e; ++i)
+ if (unsigned Freq = Stats.CodeFreq[i].NumInstances)
+ FreqPairs.push_back(std::make_pair(Freq, i));
+ llvm::stable_sort(FreqPairs);
+ std::reverse(FreqPairs.begin(), FreqPairs.end());
+
+ O.OS << "\tRecord Histogram:\n";
+ O.OS << "\t\t Count # Bits b/Rec % Abv Record Kind\n";
+ for (unsigned i = 0, e = FreqPairs.size(); i != e; ++i) {
+ const PerRecordStats &RecStats = Stats.CodeFreq[FreqPairs[i].second];
+
+ O.OS << format("\t\t%7d %9lu", RecStats.NumInstances,
+ (unsigned long)RecStats.TotalBits);
+
+ if (RecStats.NumInstances > 1)
+ O.OS << format(" %9.1f",
+ (double)RecStats.TotalBits / RecStats.NumInstances);
+ else
+ O.OS << " ";
+
+ if (RecStats.NumAbbrev)
+ O.OS << format(" %7.2f", (double)RecStats.NumAbbrev /
+ RecStats.NumInstances * 100);
+ else
+ O.OS << " ";
+
+ O.OS << " ";
+ if (Optional<const char *> CodeName = GetCodeName(
+ FreqPairs[i].second, I->first, BlockInfo, CurStreamType))
+ O.OS << *CodeName << "\n";
+ else
+ O.OS << "UnknownCode" << FreqPairs[i].second << "\n";
+ }
+ O.OS << "\n";
+ }
+ }
+}
+
+Error BitcodeAnalyzer::parseBlock(unsigned BlockID, unsigned IndentLevel,
+ Optional<BCDumpOptions> O,
+ Optional<StringRef> CheckHash) {
+ std::string Indent(IndentLevel * 2, ' ');
+ uint64_t BlockBitStart = Stream.GetCurrentBitNo();
+
+ // Get the statistics for this BlockID.
+ PerBlockIDStats &BlockStats = BlockIDStats[BlockID];
+
+ BlockStats.NumInstances++;
+
+ // BLOCKINFO is a special part of the stream.
+ bool DumpRecords = O.hasValue();
+ if (BlockID == bitc::BLOCKINFO_BLOCK_ID) {
+ if (O)
+ O->OS << Indent << "<BLOCKINFO_BLOCK/>\n";
+ Expected<Optional<BitstreamBlockInfo>> MaybeNewBlockInfo =
+ Stream.ReadBlockInfoBlock(/*ReadBlockInfoNames=*/true);
+ if (!MaybeNewBlockInfo)
+ return MaybeNewBlockInfo.takeError();
+ Optional<BitstreamBlockInfo> NewBlockInfo =
+ std::move(MaybeNewBlockInfo.get());
+ if (!NewBlockInfo)
+ return reportError("Malformed BlockInfoBlock");
+ BlockInfo = std::move(*NewBlockInfo);
+ if (Error Err = Stream.JumpToBit(BlockBitStart))
+ return Err;
+ // It's not really interesting to dump the contents of the blockinfo
+ // block.
+ DumpRecords = false;
+ }
+
+ unsigned NumWords = 0;
+ if (Error Err = Stream.EnterSubBlock(BlockID, &NumWords))
+ return Err;
+
+ // Keep it for later, when we see a MODULE_HASH record
+ uint64_t BlockEntryPos = Stream.getCurrentByteNo();
+
+ Optional<const char *> BlockName = None;
+ if (DumpRecords) {
+ O->OS << Indent << "<";
+ if ((BlockName = GetBlockName(BlockID, BlockInfo, CurStreamType)))
+ O->OS << *BlockName;
+ else
+ O->OS << "UnknownBlock" << BlockID;
+
+ if (!O->Symbolic && BlockName)
+ O->OS << " BlockID=" << BlockID;
+
+ O->OS << " NumWords=" << NumWords
+ << " BlockCodeSize=" << Stream.getAbbrevIDWidth() << ">\n";
+ }
+
+ SmallVector<uint64_t, 64> Record;
+
+ // Keep the offset to the metadata index if seen.
+ uint64_t MetadataIndexOffset = 0;
+
+ // Read all the records for this block.
+ while (1) {
+ if (Stream.AtEndOfStream())
+ return reportError("Premature end of bitstream");
+
+ uint64_t RecordStartBit = Stream.GetCurrentBitNo();
+
+ Expected<BitstreamEntry> MaybeEntry =
+ Stream.advance(BitstreamCursor::AF_DontAutoprocessAbbrevs);
+ if (!MaybeEntry)
+ return MaybeEntry.takeError();
+ BitstreamEntry Entry = MaybeEntry.get();
+
+ switch (Entry.Kind) {
+ case BitstreamEntry::Error:
+ return reportError("malformed bitcode file");
+ case BitstreamEntry::EndBlock: {
+ uint64_t BlockBitEnd = Stream.GetCurrentBitNo();
+ BlockStats.NumBits += BlockBitEnd - BlockBitStart;
+ if (DumpRecords) {
+ O->OS << Indent << "</";
+ if (BlockName)
+ O->OS << *BlockName << ">\n";
+ else
+ O->OS << "UnknownBlock" << BlockID << ">\n";
+ }
+ return Error::success();
+ }
+
+ case BitstreamEntry::SubBlock: {
+ uint64_t SubBlockBitStart = Stream.GetCurrentBitNo();
+ if (Error E = parseBlock(Entry.ID, IndentLevel + 1, O, CheckHash))
+ return E;
+ ++BlockStats.NumSubBlocks;
+ uint64_t SubBlockBitEnd = Stream.GetCurrentBitNo();
+
+ // Don't include subblock sizes in the size of this block.
+ BlockBitStart += SubBlockBitEnd - SubBlockBitStart;
+ continue;
+ }
+ case BitstreamEntry::Record:
+ // The interesting case.
+ break;
+ }
+
+ if (Entry.ID == bitc::DEFINE_ABBREV) {
+ if (Error Err = Stream.ReadAbbrevRecord())
+ return Err;
+ ++BlockStats.NumAbbrevs;
+ continue;
+ }
+
+ Record.clear();
+
+ ++BlockStats.NumRecords;
+
+ StringRef Blob;
+ uint64_t CurrentRecordPos = Stream.GetCurrentBitNo();
+ Expected<unsigned> MaybeCode = Stream.readRecord(Entry.ID, Record, &Blob);
+ if (!MaybeCode)
+ return MaybeCode.takeError();
+ unsigned Code = MaybeCode.get();
+
+ // Increment the # occurrences of this code.
+ if (BlockStats.CodeFreq.size() <= Code)
+ BlockStats.CodeFreq.resize(Code + 1);
+ BlockStats.CodeFreq[Code].NumInstances++;
+ BlockStats.CodeFreq[Code].TotalBits +=
+ Stream.GetCurrentBitNo() - RecordStartBit;
+ if (Entry.ID != bitc::UNABBREV_RECORD) {
+ BlockStats.CodeFreq[Code].NumAbbrev++;
+ ++BlockStats.NumAbbreviatedRecords;
+ }
+
+ if (DumpRecords) {
+ O->OS << Indent << " <";
+ Optional<const char *> CodeName =
+ GetCodeName(Code, BlockID, BlockInfo, CurStreamType);
+ if (CodeName)
+ O->OS << *CodeName;
+ else
+ O->OS << "UnknownCode" << Code;
+ if (!O->Symbolic && CodeName)
+ O->OS << " codeid=" << Code;
+ const BitCodeAbbrev *Abbv = nullptr;
+ if (Entry.ID != bitc::UNABBREV_RECORD) {
+ Abbv = Stream.getAbbrev(Entry.ID);
+ O->OS << " abbrevid=" << Entry.ID;
+ }
+
+ for (unsigned i = 0, e = Record.size(); i != e; ++i)
+ O->OS << " op" << i << "=" << (int64_t)Record[i];
+
+ // If we found a metadata index, let's verify that we had an offset
+ // before and validate its forward reference offset was correct!
+ if (BlockID == bitc::METADATA_BLOCK_ID) {
+ if (Code == bitc::METADATA_INDEX_OFFSET) {
+ if (Record.size() != 2)
+ O->OS << "(Invalid record)";
+ else {
+ auto Offset = Record[0] + (Record[1] << 32);
+ MetadataIndexOffset = Stream.GetCurrentBitNo() + Offset;
+ }
+ }
+ if (Code == bitc::METADATA_INDEX) {
+ O->OS << " (offset ";
+ if (MetadataIndexOffset == RecordStartBit)
+ O->OS << "match)";
+ else
+ O->OS << "mismatch: " << MetadataIndexOffset << " vs "
+ << RecordStartBit << ")";
+ }
+ }
+
+ // If we found a module hash, let's verify that it matches!
+ if (BlockID == bitc::MODULE_BLOCK_ID && Code == bitc::MODULE_CODE_HASH &&
+ CheckHash.hasValue()) {
+ if (Record.size() != 5)
+ O->OS << " (invalid)";
+ else {
+ // Recompute the hash and compare it to the one in the bitcode
+ SHA1 Hasher;
+ StringRef Hash;
+ Hasher.update(*CheckHash);
+ {
+ int BlockSize = (CurrentRecordPos / 8) - BlockEntryPos;
+ auto Ptr = Stream.getPointerToByte(BlockEntryPos, BlockSize);
+ Hasher.update(ArrayRef<uint8_t>(Ptr, BlockSize));
+ Hash = Hasher.result();
+ }
+ SmallString<20> RecordedHash;
+ RecordedHash.resize(20);
+ int Pos = 0;
+ for (auto &Val : Record) {
+ assert(!(Val >> 32) && "Unexpected high bits set");
+ RecordedHash[Pos++] = (Val >> 24) & 0xFF;
+ RecordedHash[Pos++] = (Val >> 16) & 0xFF;
+ RecordedHash[Pos++] = (Val >> 8) & 0xFF;
+ RecordedHash[Pos++] = (Val >> 0) & 0xFF;
+ }
+ if (Hash == RecordedHash)
+ O->OS << " (match)";
+ else
+ O->OS << " (!mismatch!)";
+ }
+ }
+
+ O->OS << "/>";
+
+ if (Abbv) {
+ for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i != e; ++i) {
+ const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
+ if (!Op.isEncoding() || Op.getEncoding() != BitCodeAbbrevOp::Array)
+ continue;
+ assert(i + 2 == e && "Array op not second to last");
+ std::string Str;
+ bool ArrayIsPrintable = true;
+ for (unsigned j = i - 1, je = Record.size(); j != je; ++j) {
+ if (!isPrint(static_cast<unsigned char>(Record[j]))) {
+ ArrayIsPrintable = false;
+ break;
+ }
+ Str += (char)Record[j];
+ }
+ if (ArrayIsPrintable)
+ O->OS << " record string = '" << Str << "'";
+ break;
+ }
+ }
+
+ if (Blob.data()) {
+ if (canDecodeBlob(Code, BlockID)) {
+ if (Error E = decodeMetadataStringsBlob(Indent, Record, Blob, O->OS))
+ return E;
+ } else {
+ O->OS << " blob data = ";
+ if (O->ShowBinaryBlobs) {
+ O->OS << "'";
+ O->OS.write_escaped(Blob, /*hex=*/true) << "'";
+ } else {
+ bool BlobIsPrintable = true;
+ for (unsigned i = 0, e = Blob.size(); i != e; ++i)
+ if (!isPrint(static_cast<unsigned char>(Blob[i]))) {
+ BlobIsPrintable = false;
+ break;
+ }
+
+ if (BlobIsPrintable)
+ O->OS << "'" << Blob << "'";
+ else
+ O->OS << "unprintable, " << Blob.size() << " bytes.";
+ }
+ }
+ }
+
+ O->OS << "\n";
+ }
+
+ // Make sure that we can skip the current record.
+ if (Error Err = Stream.JumpToBit(CurrentRecordPos))
+ return Err;
+ if (Expected<unsigned> Skipped = Stream.skipRecord(Entry.ID))
+ ; // Do nothing.
+ else
+ return Skipped.takeError();
+ }
+}
+
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index fe051e7a9125..29dc7f616392 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -1,9 +1,8 @@
//===- BitcodeReader.cpp - Internal BitcodeReader implementation ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -21,7 +20,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
-#include "llvm/Bitcode/BitstreamReader.h"
+#include "llvm/Bitstream/BitstreamReader.h"
#include "llvm/Bitcode/LLVMBitCodes.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/Argument.h"
@@ -106,18 +105,25 @@ static Error error(const Twine &Message) {
Message, make_error_code(BitcodeError::CorruptedBitcode));
}
-/// Helper to read the header common to all bitcode files.
-static bool hasValidBitcodeHeader(BitstreamCursor &Stream) {
- // Sniff for the signature.
- if (!Stream.canSkipToPos(4) ||
- Stream.Read(8) != 'B' ||
- Stream.Read(8) != 'C' ||
- Stream.Read(4) != 0x0 ||
- Stream.Read(4) != 0xC ||
- Stream.Read(4) != 0xE ||
- Stream.Read(4) != 0xD)
- return false;
- return true;
+static Error hasInvalidBitcodeHeader(BitstreamCursor &Stream) {
+ if (!Stream.canSkipToPos(4))
+ return createStringError(std::errc::illegal_byte_sequence,
+ "file too small to contain bitcode header");
+ for (unsigned C : {'B', 'C'})
+ if (Expected<SimpleBitstreamCursor::word_t> Res = Stream.Read(8)) {
+ if (Res.get() != C)
+ return createStringError(std::errc::illegal_byte_sequence,
+ "file doesn't start with bitcode header");
+ } else
+ return Res.takeError();
+ for (unsigned C : {0x0, 0xC, 0xE, 0xD})
+ if (Expected<SimpleBitstreamCursor::word_t> Res = Stream.Read(4)) {
+ if (Res.get() != C)
+ return createStringError(std::errc::illegal_byte_sequence,
+ "file doesn't start with bitcode header");
+ } else
+ return Res.takeError();
+ return Error::success();
}
static Expected<BitstreamCursor> initStream(MemoryBufferRef Buffer) {
@@ -134,8 +140,8 @@ static Expected<BitstreamCursor> initStream(MemoryBufferRef Buffer) {
return error("Invalid bitcode wrapper header");
BitstreamCursor Stream(ArrayRef<uint8_t>(BufPtr, BufEnd));
- if (!hasValidBitcodeHeader(Stream))
- return error("Invalid bitcode signature");
+ if (Error Err = hasInvalidBitcodeHeader(Stream))
+ return std::move(Err);
return std::move(Stream);
}
@@ -165,8 +171,8 @@ static void stripTBAA(Module *M) {
/// Read the "IDENTIFICATION_BLOCK_ID" block, do some basic enforcement on the
/// "epoch" encoded in the bitcode, and return the producer name if any.
static Expected<std::string> readIdentificationBlock(BitstreamCursor &Stream) {
- if (Stream.EnterSubBlock(bitc::IDENTIFICATION_BLOCK_ID))
- return error("Invalid record");
+ if (Error Err = Stream.EnterSubBlock(bitc::IDENTIFICATION_BLOCK_ID))
+ return std::move(Err);
// Read all the records.
SmallVector<uint64_t, 64> Record;
@@ -174,7 +180,11 @@ static Expected<std::string> readIdentificationBlock(BitstreamCursor &Stream) {
std::string ProducerIdentification;
while (true) {
- BitstreamEntry Entry = Stream.advance();
+ BitstreamEntry Entry;
+ if (Expected<BitstreamEntry> Res = Stream.advance())
+ Entry = Res.get();
+ else
+ return Res.takeError();
switch (Entry.Kind) {
default:
@@ -189,8 +199,10 @@ static Expected<std::string> readIdentificationBlock(BitstreamCursor &Stream) {
// Read a record.
Record.clear();
- unsigned BitCode = Stream.readRecord(Entry.ID, Record);
- switch (BitCode) {
+ Expected<unsigned> MaybeBitCode = Stream.readRecord(Entry.ID, Record);
+ if (!MaybeBitCode)
+ return MaybeBitCode.takeError();
+ switch (MaybeBitCode.get()) {
default: // Default behavior: reject
return error("Invalid value");
case bitc::IDENTIFICATION_CODE_STRING: // IDENTIFICATION: [strchr x N]
@@ -215,7 +227,12 @@ static Expected<std::string> readIdentificationCode(BitstreamCursor &Stream) {
if (Stream.AtEndOfStream())
return "";
- BitstreamEntry Entry = Stream.advance();
+ BitstreamEntry Entry;
+ if (Expected<BitstreamEntry> Res = Stream.advance())
+ Entry = std::move(Res.get());
+ else
+ return Res.takeError();
+
switch (Entry.Kind) {
case BitstreamEntry::EndBlock:
case BitstreamEntry::Error:
@@ -226,25 +243,30 @@ static Expected<std::string> readIdentificationCode(BitstreamCursor &Stream) {
return readIdentificationBlock(Stream);
// Ignore other sub-blocks.
- if (Stream.SkipBlock())
- return error("Malformed block");
+ if (Error Err = Stream.SkipBlock())
+ return std::move(Err);
continue;
case BitstreamEntry::Record:
- Stream.skipRecord(Entry.ID);
- continue;
+ if (Expected<unsigned> Skipped = Stream.skipRecord(Entry.ID))
+ continue;
+ else
+ return Skipped.takeError();
}
}
}
static Expected<bool> hasObjCCategoryInModule(BitstreamCursor &Stream) {
- if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
- return error("Invalid record");
+ if (Error Err = Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
+ return std::move(Err);
SmallVector<uint64_t, 64> Record;
// Read all the records for this module.
while (true) {
- BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+ Expected<BitstreamEntry> MaybeEntry = Stream.advanceSkippingSubblocks();
+ if (!MaybeEntry)
+ return MaybeEntry.takeError();
+ BitstreamEntry Entry = MaybeEntry.get();
switch (Entry.Kind) {
case BitstreamEntry::SubBlock: // Handled for us already.
@@ -258,7 +280,10 @@ static Expected<bool> hasObjCCategoryInModule(BitstreamCursor &Stream) {
}
// Read a record.
- switch (Stream.readRecord(Entry.ID, Record)) {
+ Expected<unsigned> MaybeRecord = Stream.readRecord(Entry.ID, Record);
+ if (!MaybeRecord)
+ return MaybeRecord.takeError();
+ switch (MaybeRecord.get()) {
default:
break; // Default behavior, ignore unknown content.
case bitc::MODULE_CODE_SECTIONNAME: { // SECTIONNAME: [strchr x N]
@@ -281,7 +306,11 @@ static Expected<bool> hasObjCCategory(BitstreamCursor &Stream) {
// We expect a number of well-defined blocks, though we don't necessarily
// need to understand them all.
while (true) {
- BitstreamEntry Entry = Stream.advance();
+ BitstreamEntry Entry;
+ if (Expected<BitstreamEntry> Res = Stream.advance())
+ Entry = std::move(Res.get());
+ else
+ return Res.takeError();
switch (Entry.Kind) {
case BitstreamEntry::Error:
@@ -294,20 +323,22 @@ static Expected<bool> hasObjCCategory(BitstreamCursor &Stream) {
return hasObjCCategoryInModule(Stream);
// Ignore other sub-blocks.
- if (Stream.SkipBlock())
- return error("Malformed block");
+ if (Error Err = Stream.SkipBlock())
+ return std::move(Err);
continue;
case BitstreamEntry::Record:
- Stream.skipRecord(Entry.ID);
- continue;
+ if (Expected<unsigned> Skipped = Stream.skipRecord(Entry.ID))
+ continue;
+ else
+ return Skipped.takeError();
}
}
}
static Expected<std::string> readModuleTriple(BitstreamCursor &Stream) {
- if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
- return error("Invalid record");
+ if (Error Err = Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
+ return std::move(Err);
SmallVector<uint64_t, 64> Record;
@@ -315,7 +346,10 @@ static Expected<std::string> readModuleTriple(BitstreamCursor &Stream) {
// Read all the records for this module.
while (true) {
- BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+ Expected<BitstreamEntry> MaybeEntry = Stream.advanceSkippingSubblocks();
+ if (!MaybeEntry)
+ return MaybeEntry.takeError();
+ BitstreamEntry Entry = MaybeEntry.get();
switch (Entry.Kind) {
case BitstreamEntry::SubBlock: // Handled for us already.
@@ -329,7 +363,10 @@ static Expected<std::string> readModuleTriple(BitstreamCursor &Stream) {
}
// Read a record.
- switch (Stream.readRecord(Entry.ID, Record)) {
+ Expected<unsigned> MaybeRecord = Stream.readRecord(Entry.ID, Record);
+ if (!MaybeRecord)
+ return MaybeRecord.takeError();
+ switch (MaybeRecord.get()) {
default: break; // Default behavior, ignore unknown content.
case bitc::MODULE_CODE_TRIPLE: { // TRIPLE: [strchr x N]
std::string S;
@@ -348,7 +385,10 @@ static Expected<std::string> readTriple(BitstreamCursor &Stream) {
// We expect a number of well-defined blocks, though we don't necessarily
// need to understand them all.
while (true) {
- BitstreamEntry Entry = Stream.advance();
+ Expected<BitstreamEntry> MaybeEntry = Stream.advance();
+ if (!MaybeEntry)
+ return MaybeEntry.takeError();
+ BitstreamEntry Entry = MaybeEntry.get();
switch (Entry.Kind) {
case BitstreamEntry::Error:
@@ -361,13 +401,15 @@ static Expected<std::string> readTriple(BitstreamCursor &Stream) {
return readModuleTriple(Stream);
// Ignore other sub-blocks.
- if (Stream.SkipBlock())
- return error("Malformed block");
+ if (Error Err = Stream.SkipBlock())
+ return std::move(Err);
continue;
case BitstreamEntry::Record:
- Stream.skipRecord(Entry.ID);
- continue;
+ if (llvm::Expected<unsigned> Skipped = Stream.skipRecord(Entry.ID))
+ continue;
+ else
+ return Skipped.takeError();
}
}
}
@@ -452,6 +494,7 @@ class BitcodeReader : public BitcodeReaderBase, public GVMaterializer {
std::vector<std::string> GCTable;
std::vector<Type*> TypeList;
+ DenseMap<Function *, FunctionType *> FunctionTypes;
BitcodeReaderValueList ValueList;
Optional<MetadataLoader> MDLoader;
std::vector<Comdat *> ComdatList;
@@ -550,12 +593,42 @@ private:
StructType *createIdentifiedStructType(LLVMContext &Context, StringRef Name);
StructType *createIdentifiedStructType(LLVMContext &Context);
- Type *getTypeByID(unsigned ID);
+ /// Map all pointer types within \param Ty to the opaque pointer
+ /// type in the same address space if opaque pointers are being
+ /// used, otherwise nop. This converts a bitcode-reader internal
+ /// type into one suitable for use in a Value.
+ Type *flattenPointerTypes(Type *Ty) {
+ return Ty;
+ }
+
+ /// Given a fully structured pointer type (i.e. not opaque), return
+ /// the flattened form of its element, suitable for use in a Value.
+ Type *getPointerElementFlatType(Type *Ty) {
+ return flattenPointerTypes(cast<PointerType>(Ty)->getElementType());
+ }
+
+ /// Given a fully structured pointer type, get its element type in
+ /// both fully structured form, and flattened form suitable for use
+ /// in a Value.
+ std::pair<Type *, Type *> getPointerElementTypes(Type *FullTy) {
+ Type *ElTy = cast<PointerType>(FullTy)->getElementType();
+ return std::make_pair(ElTy, flattenPointerTypes(ElTy));
+ }
- Value *getFnValueByID(unsigned ID, Type *Ty) {
+ /// Return the flattened type (suitable for use in a Value)
+ /// specified by the given \param ID .
+ Type *getTypeByID(unsigned ID) {
+ return flattenPointerTypes(getFullyStructuredTypeByID(ID));
+ }
+
+ /// Return the fully structured (bitcode-reader internal) type
+ /// corresponding to the given \param ID .
+ Type *getFullyStructuredTypeByID(unsigned ID);
+
+ Value *getFnValueByID(unsigned ID, Type *Ty, Type **FullTy = nullptr) {
if (Ty && Ty->isMetadataTy())
return MetadataAsValue::get(Ty->getContext(), getFnMetadataByID(ID));
- return ValueList.getValueFwdRef(ID, Ty);
+ return ValueList.getValueFwdRef(ID, Ty, FullTy);
}
Metadata *getFnMetadataByID(unsigned ID) {
@@ -577,7 +650,8 @@ private:
/// Increment Slot past the number of slots used in the record. Return true on
/// failure.
bool getValueTypePair(SmallVectorImpl<uint64_t> &Record, unsigned &Slot,
- unsigned InstNum, Value *&ResVal) {
+ unsigned InstNum, Value *&ResVal,
+ Type **FullTy = nullptr) {
if (Slot == Record.size()) return true;
unsigned ValNo = (unsigned)Record[Slot++];
// Adjust the ValNo, if it was encoded relative to the InstNum.
@@ -586,7 +660,7 @@ private:
if (ValNo < InstNum) {
// If this is not a forward reference, just return the value we already
// have.
- ResVal = getFnValueByID(ValNo, nullptr);
+ ResVal = getFnValueByID(ValNo, nullptr, FullTy);
return ResVal == nullptr;
}
if (Slot == Record.size())
@@ -594,6 +668,8 @@ private:
unsigned TypeNo = (unsigned)Record[Slot++];
ResVal = getFnValueByID(ValNo, getTypeByID(TypeNo));
+ if (FullTy)
+ *FullTy = getFullyStructuredTypeByID(TypeNo);
return ResVal == nullptr;
}
@@ -639,6 +715,10 @@ private:
return getFnValueByID(ValNo, Ty);
}
+ /// Upgrades old-style typeless byval attributes by adding the corresponding
+ /// argument's pointee type.
+ void propagateByValTypes(CallBase *CB, ArrayRef<Type *> ArgsFullTys);
+
/// Converts alignment exponent (i.e. power of two (or zero)) to the
/// corresponding alignment to use. If alignment is too large, returns
/// a corresponding error code.
@@ -748,6 +828,9 @@ private:
bool HasRelBF);
Error parseEntireSummary(unsigned ID);
Error parseModuleStringTable();
+ void parseTypeIdCompatibleVtableSummaryRecord(ArrayRef<uint64_t> Record);
+ void parseTypeIdCompatibleVtableInfo(ArrayRef<uint64_t> Record, size_t &Slot,
+ TypeIdCompatibleVtableInfo &TypeId);
std::pair<ValueInfo, GlobalValue::GUID>
getValueInfoFromValueId(unsigned ValueId);
@@ -775,7 +858,7 @@ BitcodeReader::BitcodeReader(BitstreamCursor Stream, StringRef Strtab,
StringRef ProducerIdentification,
LLVMContext &Context)
: BitcodeReaderBase(std::move(Stream), Strtab), Context(Context),
- ValueList(Context) {
+ ValueList(Context, Stream.SizeInBytes()) {
this->ProducerIdentification = ProducerIdentification;
}
@@ -894,13 +977,15 @@ static GlobalValueSummary::GVFlags getDecodedGVSummaryFlags(uint64_t RawFlags,
// values as live.
bool Live = (RawFlags & 0x2) || Version < 3;
bool Local = (RawFlags & 0x4);
+ bool AutoHide = (RawFlags & 0x8);
- return GlobalValueSummary::GVFlags(Linkage, NotEligibleToImport, Live, Local);
+ return GlobalValueSummary::GVFlags(Linkage, NotEligibleToImport, Live, Local, AutoHide);
}
// Decode the flags for GlobalVariable in the summary
static GlobalVarSummary::GVarFlags getDecodedGVarFlags(uint64_t RawFlags) {
- return GlobalVarSummary::GVarFlags((RawFlags & 0x1) ? true : false);
+ return GlobalVarSummary::GVarFlags((RawFlags & 0x1) ? true : false,
+ (RawFlags & 0x2) ? true : false);
}
static GlobalValue::VisibilityTypes getDecodedVisibility(unsigned Val) {
@@ -1035,6 +1120,8 @@ static AtomicRMWInst::BinOp getDecodedRMWOperation(unsigned Val) {
case bitc::RMW_MIN: return AtomicRMWInst::Min;
case bitc::RMW_UMAX: return AtomicRMWInst::UMax;
case bitc::RMW_UMIN: return AtomicRMWInst::UMin;
+ case bitc::RMW_FADD: return AtomicRMWInst::FAdd;
+ case bitc::RMW_FSUB: return AtomicRMWInst::FSub;
}
}
@@ -1095,7 +1182,7 @@ static void upgradeDLLImportExportLinkage(GlobalValue *GV, unsigned Val) {
}
}
-Type *BitcodeReader::getTypeByID(unsigned ID) {
+Type *BitcodeReader::getFullyStructuredTypeByID(unsigned ID) {
// The type table size is always specified correctly.
if (ID >= TypeList.size())
return nullptr;
@@ -1187,6 +1274,15 @@ static uint64_t getRawAttributeMask(Attribute::AttrKind Val) {
case Attribute::ShadowCallStack: return 1ULL << 59;
case Attribute::SpeculativeLoadHardening:
return 1ULL << 60;
+ case Attribute::ImmArg:
+ return 1ULL << 61;
+ case Attribute::WillReturn:
+ return 1ULL << 62;
+ case Attribute::NoFree:
+ return 1ULL << 63;
+ case Attribute::NoSync:
+ llvm_unreachable("nosync attribute not supported in raw format");
+ break;
case Attribute::Dereferenceable:
llvm_unreachable("dereferenceable attribute not supported in raw format");
break;
@@ -1200,6 +1296,9 @@ static uint64_t getRawAttributeMask(Attribute::AttrKind Val) {
case Attribute::AllocSize:
llvm_unreachable("allocsize not supported in raw format");
break;
+ case Attribute::SanitizeMemTag:
+ llvm_unreachable("sanitize_memtag attribute not supported in raw format");
+ break;
}
llvm_unreachable("Unsupported attribute type");
}
@@ -1209,10 +1308,12 @@ static void addRawAttributeValue(AttrBuilder &B, uint64_t Val) {
for (Attribute::AttrKind I = Attribute::None; I != Attribute::EndAttrKinds;
I = Attribute::AttrKind(I + 1)) {
- if (I == Attribute::Dereferenceable ||
+ if (I == Attribute::SanitizeMemTag ||
+ I == Attribute::Dereferenceable ||
I == Attribute::DereferenceableOrNull ||
I == Attribute::ArgMemOnly ||
- I == Attribute::AllocSize)
+ I == Attribute::AllocSize ||
+ I == Attribute::NoSync)
continue;
if (uint64_t A = (Val & getRawAttributeMask(I))) {
if (I == Attribute::Alignment)
@@ -1245,8 +1346,8 @@ static void decodeLLVMAttributesForBitcode(AttrBuilder &B,
}
Error BitcodeReader::parseAttributeBlock() {
- if (Stream.EnterSubBlock(bitc::PARAMATTR_BLOCK_ID))
- return error("Invalid record");
+ if (Error Err = Stream.EnterSubBlock(bitc::PARAMATTR_BLOCK_ID))
+ return Err;
if (!MAttributes.empty())
return error("Invalid multiple blocks");
@@ -1257,7 +1358,10 @@ Error BitcodeReader::parseAttributeBlock() {
// Read all the records.
while (true) {
- BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+ Expected<BitstreamEntry> MaybeEntry = Stream.advanceSkippingSubblocks();
+ if (!MaybeEntry)
+ return MaybeEntry.takeError();
+ BitstreamEntry Entry = MaybeEntry.get();
switch (Entry.Kind) {
case BitstreamEntry::SubBlock: // Handled for us already.
@@ -1272,7 +1376,10 @@ Error BitcodeReader::parseAttributeBlock() {
// Read a record.
Record.clear();
- switch (Stream.readRecord(Entry.ID, Record)) {
+ Expected<unsigned> MaybeRecord = Stream.readRecord(Entry.ID, Record);
+ if (!MaybeRecord)
+ return MaybeRecord.takeError();
+ switch (MaybeRecord.get()) {
default: // Default behavior: ignore.
break;
case bitc::PARAMATTR_CODE_ENTRY_OLD: // ENTRY: [paramidx0, attr0, ...]
@@ -1345,6 +1452,8 @@ static Attribute::AttrKind getAttrFromCode(uint64_t Code) {
return Attribute::NoCapture;
case bitc::ATTR_KIND_NO_DUPLICATE:
return Attribute::NoDuplicate;
+ case bitc::ATTR_KIND_NOFREE:
+ return Attribute::NoFree;
case bitc::ATTR_KIND_NO_IMPLICIT_FLOAT:
return Attribute::NoImplicitFloat;
case bitc::ATTR_KIND_NO_INLINE:
@@ -1365,6 +1474,8 @@ static Attribute::AttrKind getAttrFromCode(uint64_t Code) {
return Attribute::NoRedZone;
case bitc::ATTR_KIND_NO_RETURN:
return Attribute::NoReturn;
+ case bitc::ATTR_KIND_NOSYNC:
+ return Attribute::NoSync;
case bitc::ATTR_KIND_NOCF_CHECK:
return Attribute::NoCfCheck;
case bitc::ATTR_KIND_NO_UNWIND:
@@ -1419,10 +1530,16 @@ static Attribute::AttrKind getAttrFromCode(uint64_t Code) {
return Attribute::SwiftSelf;
case bitc::ATTR_KIND_UW_TABLE:
return Attribute::UWTable;
+ case bitc::ATTR_KIND_WILLRETURN:
+ return Attribute::WillReturn;
case bitc::ATTR_KIND_WRITEONLY:
return Attribute::WriteOnly;
case bitc::ATTR_KIND_Z_EXT:
return Attribute::ZExt;
+ case bitc::ATTR_KIND_IMMARG:
+ return Attribute::ImmArg;
+ case bitc::ATTR_KIND_SANITIZE_MEMTAG:
+ return Attribute::SanitizeMemTag;
}
}
@@ -1444,8 +1561,8 @@ Error BitcodeReader::parseAttrKind(uint64_t Code, Attribute::AttrKind *Kind) {
}
Error BitcodeReader::parseAttributeGroupBlock() {
- if (Stream.EnterSubBlock(bitc::PARAMATTR_GROUP_BLOCK_ID))
- return error("Invalid record");
+ if (Error Err = Stream.EnterSubBlock(bitc::PARAMATTR_GROUP_BLOCK_ID))
+ return Err;
if (!MAttributeGroups.empty())
return error("Invalid multiple blocks");
@@ -1454,7 +1571,10 @@ Error BitcodeReader::parseAttributeGroupBlock() {
// Read all the records.
while (true) {
- BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+ Expected<BitstreamEntry> MaybeEntry = Stream.advanceSkippingSubblocks();
+ if (!MaybeEntry)
+ return MaybeEntry.takeError();
+ BitstreamEntry Entry = MaybeEntry.get();
switch (Entry.Kind) {
case BitstreamEntry::SubBlock: // Handled for us already.
@@ -1469,7 +1589,10 @@ Error BitcodeReader::parseAttributeGroupBlock() {
// Read a record.
Record.clear();
- switch (Stream.readRecord(Entry.ID, Record)) {
+ Expected<unsigned> MaybeRecord = Stream.readRecord(Entry.ID, Record);
+ if (!MaybeRecord)
+ return MaybeRecord.takeError();
+ switch (MaybeRecord.get()) {
default: // Default behavior: ignore.
break;
case bitc::PARAMATTR_GRP_CODE_ENTRY: { // ENTRY: [grpid, idx, a0, a1, ...]
@@ -1486,6 +1609,12 @@ Error BitcodeReader::parseAttributeGroupBlock() {
if (Error Err = parseAttrKind(Record[++i], &Kind))
return Err;
+ // Upgrade old-style byval attribute to one with a type, even if it's
+ // nullptr. We will have to insert the real type when we associate
+ // this AttributeList with a function.
+ if (Kind == Attribute::ByVal)
+ B.addByValAttr(nullptr);
+
B.addAttribute(Kind);
} else if (Record[i] == 1) { // Integer attribute
Attribute::AttrKind Kind;
@@ -1501,9 +1630,7 @@ Error BitcodeReader::parseAttributeGroupBlock() {
B.addDereferenceableOrNullAttr(Record[++i]);
else if (Kind == Attribute::AllocSize)
B.addAllocSizeAttrFromRawRepr(Record[++i]);
- } else { // String attribute
- assert((Record[i] == 3 || Record[i] == 4) &&
- "Invalid attribute group entry");
+ } else if (Record[i] == 3 || Record[i] == 4) { // String attribute
bool HasValue = (Record[i++] == 4);
SmallString<64> KindStr;
SmallString<64> ValStr;
@@ -1521,6 +1648,15 @@ Error BitcodeReader::parseAttributeGroupBlock() {
}
B.addAttribute(KindStr.str(), ValStr.str());
+ } else {
+ assert((Record[i] == 5 || Record[i] == 6) &&
+ "Invalid attribute group entry");
+ bool HasType = Record[i] == 6;
+ Attribute::AttrKind Kind;
+ if (Error Err = parseAttrKind(Record[++i], &Kind))
+ return Err;
+ if (Kind == Attribute::ByVal)
+ B.addByValAttr(HasType ? getTypeByID(Record[++i]) : nullptr);
}
}
@@ -1532,8 +1668,8 @@ Error BitcodeReader::parseAttributeGroupBlock() {
}
Error BitcodeReader::parseTypeTable() {
- if (Stream.EnterSubBlock(bitc::TYPE_BLOCK_ID_NEW))
- return error("Invalid record");
+ if (Error Err = Stream.EnterSubBlock(bitc::TYPE_BLOCK_ID_NEW))
+ return Err;
return parseTypeTableBody();
}
@@ -1549,7 +1685,10 @@ Error BitcodeReader::parseTypeTableBody() {
// Read all the records for this type table.
while (true) {
- BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+ Expected<BitstreamEntry> MaybeEntry = Stream.advanceSkippingSubblocks();
+ if (!MaybeEntry)
+ return MaybeEntry.takeError();
+ BitstreamEntry Entry = MaybeEntry.get();
switch (Entry.Kind) {
case BitstreamEntry::SubBlock: // Handled for us already.
@@ -1567,7 +1706,10 @@ Error BitcodeReader::parseTypeTableBody() {
// Read a record.
Record.clear();
Type *ResultTy = nullptr;
- switch (Stream.readRecord(Entry.ID, Record)) {
+ Expected<unsigned> MaybeRecord = Stream.readRecord(Entry.ID, Record);
+ if (!MaybeRecord)
+ return MaybeRecord.takeError();
+ switch (MaybeRecord.get()) {
default:
return error("Invalid value");
case bitc::TYPE_CODE_NUMENTRY: // TYPE_CODE_NUMENTRY: [numentries]
@@ -1752,7 +1894,8 @@ Error BitcodeReader::parseTypeTableBody() {
return error("Invalid type");
ResultTy = ArrayType::get(ResultTy, Record[0]);
break;
- case bitc::TYPE_CODE_VECTOR: // VECTOR: [numelts, eltty]
+ case bitc::TYPE_CODE_VECTOR: // VECTOR: [numelts, eltty] or
+ // [numelts, eltty, scalable]
if (Record.size() < 2)
return error("Invalid record");
if (Record[0] == 0)
@@ -1760,7 +1903,8 @@ Error BitcodeReader::parseTypeTableBody() {
ResultTy = getTypeByID(Record[1]);
if (!ResultTy || !StructType::isValidElementType(ResultTy))
return error("Invalid type");
- ResultTy = VectorType::get(ResultTy, Record[0]);
+ bool Scalable = Record.size() > 2 ? Record[2] : false;
+ ResultTy = VectorType::get(ResultTy, Record[0], Scalable);
break;
}
@@ -1775,8 +1919,8 @@ Error BitcodeReader::parseTypeTableBody() {
}
Error BitcodeReader::parseOperandBundleTags() {
- if (Stream.EnterSubBlock(bitc::OPERAND_BUNDLE_TAGS_BLOCK_ID))
- return error("Invalid record");
+ if (Error Err = Stream.EnterSubBlock(bitc::OPERAND_BUNDLE_TAGS_BLOCK_ID))
+ return Err;
if (!BundleTags.empty())
return error("Invalid multiple blocks");
@@ -1784,7 +1928,10 @@ Error BitcodeReader::parseOperandBundleTags() {
SmallVector<uint64_t, 64> Record;
while (true) {
- BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+ Expected<BitstreamEntry> MaybeEntry = Stream.advanceSkippingSubblocks();
+ if (!MaybeEntry)
+ return MaybeEntry.takeError();
+ BitstreamEntry Entry = MaybeEntry.get();
switch (Entry.Kind) {
case BitstreamEntry::SubBlock: // Handled for us already.
@@ -1799,7 +1946,10 @@ Error BitcodeReader::parseOperandBundleTags() {
// Tags are implicitly mapped to integers by their order.
- if (Stream.readRecord(Entry.ID, Record) != bitc::OPERAND_BUNDLE_TAG)
+ Expected<unsigned> MaybeRecord = Stream.readRecord(Entry.ID, Record);
+ if (!MaybeRecord)
+ return MaybeRecord.takeError();
+ if (MaybeRecord.get() != bitc::OPERAND_BUNDLE_TAG)
return error("Invalid record");
// OPERAND_BUNDLE_TAG: [strchr x N]
@@ -1811,15 +1961,19 @@ Error BitcodeReader::parseOperandBundleTags() {
}
Error BitcodeReader::parseSyncScopeNames() {
- if (Stream.EnterSubBlock(bitc::SYNC_SCOPE_NAMES_BLOCK_ID))
- return error("Invalid record");
+ if (Error Err = Stream.EnterSubBlock(bitc::SYNC_SCOPE_NAMES_BLOCK_ID))
+ return Err;
if (!SSIDs.empty())
return error("Invalid multiple synchronization scope names blocks");
SmallVector<uint64_t, 64> Record;
while (true) {
- BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+ Expected<BitstreamEntry> MaybeEntry = Stream.advanceSkippingSubblocks();
+ if (!MaybeEntry)
+ return MaybeEntry.takeError();
+ BitstreamEntry Entry = MaybeEntry.get();
+
switch (Entry.Kind) {
case BitstreamEntry::SubBlock: // Handled for us already.
case BitstreamEntry::Error:
@@ -1836,7 +1990,10 @@ Error BitcodeReader::parseSyncScopeNames() {
// Synchronization scope names are implicitly mapped to synchronization
// scope IDs by their order.
- if (Stream.readRecord(Entry.ID, Record) != bitc::SYNC_SCOPE_NAME)
+ Expected<unsigned> MaybeRecord = Stream.readRecord(Entry.ID, Record);
+ if (!MaybeRecord)
+ return MaybeRecord.takeError();
+ if (MaybeRecord.get() != bitc::SYNC_SCOPE_NAME)
return error("Invalid record");
SmallString<16> SSN;
@@ -1877,22 +2034,18 @@ Expected<Value *> BitcodeReader::recordValue(SmallVectorImpl<uint64_t> &Record,
/// Helper to note and return the current location, and jump to the given
/// offset.
-static uint64_t jumpToValueSymbolTable(uint64_t Offset,
- BitstreamCursor &Stream) {
+static Expected<uint64_t> jumpToValueSymbolTable(uint64_t Offset,
+ BitstreamCursor &Stream) {
// Save the current parsing location so we can jump back at the end
// of the VST read.
uint64_t CurrentBit = Stream.GetCurrentBitNo();
- Stream.JumpToBit(Offset * 32);
-#ifndef NDEBUG
- // Do some checking if we are in debug mode.
- BitstreamEntry Entry = Stream.advance();
- assert(Entry.Kind == BitstreamEntry::SubBlock);
- assert(Entry.ID == bitc::VALUE_SYMTAB_BLOCK_ID);
-#else
- // In NDEBUG mode ignore the output so we don't get an unused variable
- // warning.
- Stream.advance();
-#endif
+ if (Error JumpFailed = Stream.JumpToBit(Offset * 32))
+ return std::move(JumpFailed);
+ Expected<BitstreamEntry> MaybeEntry = Stream.advance();
+ if (!MaybeEntry)
+ return MaybeEntry.takeError();
+ assert(MaybeEntry.get().Kind == BitstreamEntry::SubBlock);
+ assert(MaybeEntry.get().ID == bitc::VALUE_SYMTAB_BLOCK_ID);
return CurrentBit;
}
@@ -1917,12 +2070,15 @@ Error BitcodeReader::parseGlobalValueSymbolTable() {
unsigned FuncBitcodeOffsetDelta =
Stream.getAbbrevIDWidth() + bitc::BlockIDWidth;
- if (Stream.EnterSubBlock(bitc::VALUE_SYMTAB_BLOCK_ID))
- return error("Invalid record");
+ if (Error Err = Stream.EnterSubBlock(bitc::VALUE_SYMTAB_BLOCK_ID))
+ return Err;
SmallVector<uint64_t, 64> Record;
while (true) {
- BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+ Expected<BitstreamEntry> MaybeEntry = Stream.advanceSkippingSubblocks();
+ if (!MaybeEntry)
+ return MaybeEntry.takeError();
+ BitstreamEntry Entry = MaybeEntry.get();
switch (Entry.Kind) {
case BitstreamEntry::SubBlock:
@@ -1935,7 +2091,10 @@ Error BitcodeReader::parseGlobalValueSymbolTable() {
}
Record.clear();
- switch (Stream.readRecord(Entry.ID, Record)) {
+ Expected<unsigned> MaybeRecord = Stream.readRecord(Entry.ID, Record);
+ if (!MaybeRecord)
+ return MaybeRecord.takeError();
+ switch (MaybeRecord.get()) {
case bitc::VST_CODE_FNENTRY: // [valueid, offset]
setDeferredFunctionInfo(FuncBitcodeOffsetDelta,
cast<Function>(ValueList[Record[0]]), Record);
@@ -1952,12 +2111,16 @@ Error BitcodeReader::parseValueSymbolTable(uint64_t Offset) {
// VST (where we want to jump to the VST offset) and the function-level
// VST (where we don't).
if (Offset > 0) {
- CurrentBit = jumpToValueSymbolTable(Offset, Stream);
+ Expected<uint64_t> MaybeCurrentBit = jumpToValueSymbolTable(Offset, Stream);
+ if (!MaybeCurrentBit)
+ return MaybeCurrentBit.takeError();
+ CurrentBit = MaybeCurrentBit.get();
// If this module uses a string table, read this as a module-level VST.
if (UseStrtab) {
if (Error Err = parseGlobalValueSymbolTable())
return Err;
- Stream.JumpToBit(CurrentBit);
+ if (Error JumpFailed = Stream.JumpToBit(CurrentBit))
+ return JumpFailed;
return Error::success();
}
// Otherwise, the VST will be in a similar format to a function-level VST,
@@ -1978,8 +2141,8 @@ Error BitcodeReader::parseValueSymbolTable(uint64_t Offset) {
unsigned FuncBitcodeOffsetDelta =
Stream.getAbbrevIDWidth() + bitc::BlockIDWidth;
- if (Stream.EnterSubBlock(bitc::VALUE_SYMTAB_BLOCK_ID))
- return error("Invalid record");
+ if (Error Err = Stream.EnterSubBlock(bitc::VALUE_SYMTAB_BLOCK_ID))
+ return Err;
SmallVector<uint64_t, 64> Record;
@@ -1989,7 +2152,10 @@ Error BitcodeReader::parseValueSymbolTable(uint64_t Offset) {
SmallString<128> ValueName;
while (true) {
- BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+ Expected<BitstreamEntry> MaybeEntry = Stream.advanceSkippingSubblocks();
+ if (!MaybeEntry)
+ return MaybeEntry.takeError();
+ BitstreamEntry Entry = MaybeEntry.get();
switch (Entry.Kind) {
case BitstreamEntry::SubBlock: // Handled for us already.
@@ -1997,7 +2163,8 @@ Error BitcodeReader::parseValueSymbolTable(uint64_t Offset) {
return error("Malformed block");
case BitstreamEntry::EndBlock:
if (Offset > 0)
- Stream.JumpToBit(CurrentBit);
+ if (Error JumpFailed = Stream.JumpToBit(CurrentBit))
+ return JumpFailed;
return Error::success();
case BitstreamEntry::Record:
// The interesting case.
@@ -2006,7 +2173,10 @@ Error BitcodeReader::parseValueSymbolTable(uint64_t Offset) {
// Read a record.
Record.clear();
- switch (Stream.readRecord(Entry.ID, Record)) {
+ Expected<unsigned> MaybeRecord = Stream.readRecord(Entry.ID, Record);
+ if (!MaybeRecord)
+ return MaybeRecord.takeError();
+ switch (MaybeRecord.get()) {
default: // Default behavior: unknown type.
break;
case bitc::VST_CODE_ENTRY: { // VST_CODE_ENTRY: [valueid, namechar x N]
@@ -2151,17 +2321,21 @@ static APInt readWideAPInt(ArrayRef<uint64_t> Vals, unsigned TypeBits) {
}
Error BitcodeReader::parseConstants() {
- if (Stream.EnterSubBlock(bitc::CONSTANTS_BLOCK_ID))
- return error("Invalid record");
+ if (Error Err = Stream.EnterSubBlock(bitc::CONSTANTS_BLOCK_ID))
+ return Err;
SmallVector<uint64_t, 64> Record;
// Read all the records for this value table.
Type *CurTy = Type::getInt32Ty(Context);
+ Type *CurFullTy = Type::getInt32Ty(Context);
unsigned NextCstNo = ValueList.size();
while (true) {
- BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+ Expected<BitstreamEntry> MaybeEntry = Stream.advanceSkippingSubblocks();
+ if (!MaybeEntry)
+ return MaybeEntry.takeError();
+ BitstreamEntry Entry = MaybeEntry.get();
switch (Entry.Kind) {
case BitstreamEntry::SubBlock: // Handled for us already.
@@ -2184,8 +2358,10 @@ Error BitcodeReader::parseConstants() {
Record.clear();
Type *VoidType = Type::getVoidTy(Context);
Value *V = nullptr;
- unsigned BitCode = Stream.readRecord(Entry.ID, Record);
- switch (BitCode) {
+ Expected<unsigned> MaybeBitCode = Stream.readRecord(Entry.ID, Record);
+ if (!MaybeBitCode)
+ return MaybeBitCode.takeError();
+ switch (unsigned BitCode = MaybeBitCode.get()) {
default: // Default behavior: unknown constant
case bitc::CST_CODE_UNDEF: // UNDEF
V = UndefValue::get(CurTy);
@@ -2197,7 +2373,8 @@ Error BitcodeReader::parseConstants() {
return error("Invalid record");
if (TypeList[Record[0]] == VoidType)
return error("Invalid constant type");
- CurTy = TypeList[Record[0]];
+ CurFullTy = TypeList[Record[0]];
+ CurTy = flattenPointerTypes(CurFullTy);
continue; // Skip the ValueList manipulation.
case bitc::CST_CODE_NULL: // NULL
V = Constant::getNullValue(CurTy);
@@ -2416,23 +2593,27 @@ Error BitcodeReader::parseConstants() {
InBounds = true;
SmallVector<Constant*, 16> Elts;
+ Type *Elt0FullTy = nullptr;
while (OpNum != Record.size()) {
+ if (!Elt0FullTy)
+ Elt0FullTy = getFullyStructuredTypeByID(Record[OpNum]);
Type *ElTy = getTypeByID(Record[OpNum++]);
if (!ElTy)
return error("Invalid record");
Elts.push_back(ValueList.getConstantFwdRef(Record[OpNum++], ElTy));
}
- if (PointeeType &&
- PointeeType !=
- cast<PointerType>(Elts[0]->getType()->getScalarType())
- ->getElementType())
- return error("Explicit gep operator type does not match pointee type "
- "of pointer operand");
-
if (Elts.size() < 1)
return error("Invalid gep with no operands");
+ Type *ImplicitPointeeType =
+ getPointerElementFlatType(Elt0FullTy->getScalarType());
+ if (!PointeeType)
+ PointeeType = ImplicitPointeeType;
+ else if (PointeeType != ImplicitPointeeType)
+ return error("Explicit gep operator type does not match pointee type "
+ "of pointer operand");
+
ArrayRef<Constant *> Indices(Elts.begin() + 1, Elts.end());
V = ConstantExpr::getGetElementPtr(PointeeType, Elts[0], Indices,
InBounds, InRangeIndex);
@@ -2560,10 +2741,10 @@ Error BitcodeReader::parseConstants() {
AsmStr += (char)Record[2+i];
for (unsigned i = 0; i != ConstStrSize; ++i)
ConstrStr += (char)Record[3+AsmStrSize+i];
- PointerType *PTy = cast<PointerType>(CurTy);
UpgradeInlineAsmString(&AsmStr);
- V = InlineAsm::get(cast<FunctionType>(PTy->getElementType()),
- AsmStr, ConstrStr, HasSideEffects, IsAlignStack);
+ V = InlineAsm::get(
+ cast<FunctionType>(getPointerElementFlatType(CurFullTy)), AsmStr,
+ ConstrStr, HasSideEffects, IsAlignStack);
break;
}
// This version adds support for the asm dialect keywords (e.g.,
@@ -2586,11 +2767,11 @@ Error BitcodeReader::parseConstants() {
AsmStr += (char)Record[2+i];
for (unsigned i = 0; i != ConstStrSize; ++i)
ConstrStr += (char)Record[3+AsmStrSize+i];
- PointerType *PTy = cast<PointerType>(CurTy);
UpgradeInlineAsmString(&AsmStr);
- V = InlineAsm::get(cast<FunctionType>(PTy->getElementType()),
- AsmStr, ConstrStr, HasSideEffects, IsAlignStack,
- InlineAsm::AsmDialect(AsmDialect));
+ V = InlineAsm::get(
+ cast<FunctionType>(getPointerElementFlatType(CurFullTy)), AsmStr,
+ ConstrStr, HasSideEffects, IsAlignStack,
+ InlineAsm::AsmDialect(AsmDialect));
break;
}
case bitc::CST_CODE_BLOCKADDRESS:{
@@ -2636,20 +2817,25 @@ Error BitcodeReader::parseConstants() {
}
}
- ValueList.assignValue(V, NextCstNo);
+ assert(V->getType() == flattenPointerTypes(CurFullTy) &&
+ "Incorrect fully structured type provided for Constant");
+ ValueList.assignValue(V, NextCstNo, CurFullTy);
++NextCstNo;
}
}
Error BitcodeReader::parseUseLists() {
- if (Stream.EnterSubBlock(bitc::USELIST_BLOCK_ID))
- return error("Invalid record");
+ if (Error Err = Stream.EnterSubBlock(bitc::USELIST_BLOCK_ID))
+ return Err;
// Read all the records.
SmallVector<uint64_t, 64> Record;
while (true) {
- BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+ Expected<BitstreamEntry> MaybeEntry = Stream.advanceSkippingSubblocks();
+ if (!MaybeEntry)
+ return MaybeEntry.takeError();
+ BitstreamEntry Entry = MaybeEntry.get();
switch (Entry.Kind) {
case BitstreamEntry::SubBlock: // Handled for us already.
@@ -2665,7 +2851,10 @@ Error BitcodeReader::parseUseLists() {
// Read a use list record.
Record.clear();
bool IsBB = false;
- switch (Stream.readRecord(Entry.ID, Record)) {
+ Expected<unsigned> MaybeRecord = Stream.readRecord(Entry.ID, Record);
+ if (!MaybeRecord)
+ return MaybeRecord.takeError();
+ switch (MaybeRecord.get()) {
default: // Default behavior: unknown type.
break;
case bitc::USELIST_CODE_BB:
@@ -2714,15 +2903,16 @@ Error BitcodeReader::rememberAndSkipMetadata() {
DeferredMetadataInfo.push_back(CurBit);
// Skip over the block for now.
- if (Stream.SkipBlock())
- return error("Invalid record");
+ if (Error Err = Stream.SkipBlock())
+ return Err;
return Error::success();
}
Error BitcodeReader::materializeMetadata() {
for (uint64_t BitPos : DeferredMetadataInfo) {
// Move the bit stream to the saved position.
- Stream.JumpToBit(BitPos);
+ if (Error JumpFailed = Stream.JumpToBit(BitPos))
+ return JumpFailed;
if (Error Err = MDLoader->parseModuleMetadata())
return Err;
}
@@ -2760,8 +2950,8 @@ Error BitcodeReader::rememberAndSkipFunctionBody() {
DeferredFunctionInfo[Fn] = CurBit;
// Skip over the function block for now.
- if (Stream.SkipBlock())
- return error("Invalid record");
+ if (Error Err = Stream.SkipBlock())
+ return Err;
return Error::success();
}
@@ -2786,8 +2976,14 @@ Error BitcodeReader::globalCleanup() {
}
// Look for global variables which need to be renamed.
+ std::vector<std::pair<GlobalVariable *, GlobalVariable *>> UpgradedVariables;
for (GlobalVariable &GV : TheModule->globals())
- UpgradeGlobalVariable(&GV);
+ if (GlobalVariable *Upgraded = UpgradeGlobalVariable(&GV))
+ UpgradedVariables.emplace_back(&GV, Upgraded);
+ for (auto &Pair : UpgradedVariables) {
+ Pair.first->eraseFromParent();
+ TheModule->getGlobalList().push_back(Pair.second);
+ }
// Force deallocation of memory for these vectors to favor the client that
// want lazy deserialization.
@@ -2802,7 +2998,8 @@ Error BitcodeReader::globalCleanup() {
/// or if we have an anonymous function being materialized, since anonymous
/// functions do not have a name and are therefore not in the VST.
Error BitcodeReader::rememberAndSkipFunctionBodies() {
- Stream.JumpToBit(NextUnreadBit);
+ if (Error JumpFailed = Stream.JumpToBit(NextUnreadBit))
+ return JumpFailed;
if (Stream.AtEndOfStream())
return error("Could not find function in stream");
@@ -2817,7 +3014,11 @@ Error BitcodeReader::rememberAndSkipFunctionBodies() {
SmallVector<uint64_t, 64> Record;
while (true) {
- BitstreamEntry Entry = Stream.advance();
+ Expected<llvm::BitstreamEntry> MaybeEntry = Stream.advance();
+ if (!MaybeEntry)
+ return MaybeEntry.takeError();
+ llvm::BitstreamEntry Entry = MaybeEntry.get();
+
switch (Entry.Kind) {
default:
return error("Expect SubBlock");
@@ -2836,7 +3037,12 @@ Error BitcodeReader::rememberAndSkipFunctionBodies() {
}
bool BitcodeReaderBase::readBlockInfo() {
- Optional<BitstreamBlockInfo> NewBlockInfo = Stream.ReadBlockInfoBlock();
+ Expected<Optional<BitstreamBlockInfo>> MaybeNewBlockInfo =
+ Stream.ReadBlockInfoBlock();
+ if (!MaybeNewBlockInfo)
+ return true; // FIXME Handle the error.
+ Optional<BitstreamBlockInfo> NewBlockInfo =
+ std::move(MaybeNewBlockInfo.get());
if (!NewBlockInfo)
return true;
BlockInfo = std::move(*NewBlockInfo);
@@ -2878,14 +3084,16 @@ static void inferDSOLocal(GlobalValue *GV) {
Error BitcodeReader::parseGlobalVarRecord(ArrayRef<uint64_t> Record) {
// v1: [pointer type, isconst, initid, linkage, alignment, section,
// visibility, threadlocal, unnamed_addr, externally_initialized,
- // dllstorageclass, comdat, attributes, preemption specifier] (name in VST)
+ // dllstorageclass, comdat, attributes, preemption specifier,
+ // partition strtab offset, partition strtab size] (name in VST)
// v2: [strtab_offset, strtab_size, v1]
StringRef Name;
std::tie(Name, Record) = readNameFromStrtab(Record);
if (Record.size() < 6)
return error("Invalid record");
- Type *Ty = getTypeByID(Record[0]);
+ Type *FullTy = getFullyStructuredTypeByID(Record[0]);
+ Type *Ty = flattenPointerTypes(FullTy);
if (!Ty)
return error("Invalid record");
bool isConstant = Record[1] & 1;
@@ -2897,7 +3105,7 @@ Error BitcodeReader::parseGlobalVarRecord(ArrayRef<uint64_t> Record) {
if (!Ty->isPointerTy())
return error("Invalid type for value");
AddressSpace = cast<PointerType>(Ty)->getAddressSpace();
- Ty = cast<PointerType>(Ty)->getElementType();
+ std::tie(FullTy, Ty) = getPointerElementTypes(FullTy);
}
uint64_t RawLinkage = Record[3];
@@ -2943,7 +3151,10 @@ Error BitcodeReader::parseGlobalVarRecord(ArrayRef<uint64_t> Record) {
else
upgradeDLLImportExportLinkage(NewGV, RawLinkage);
- ValueList.push_back(NewGV);
+ FullTy = PointerType::get(FullTy, AddressSpace);
+ assert(NewGV->getType() == flattenPointerTypes(FullTy) &&
+ "Incorrect fully specified type for GlobalVariable");
+ ValueList.push_back(NewGV, FullTy);
// Remember which value to use for the global initializer.
if (unsigned InitID = Record[2])
@@ -2969,6 +3180,10 @@ Error BitcodeReader::parseGlobalVarRecord(ArrayRef<uint64_t> Record) {
}
inferDSOLocal(NewGV);
+ // Check whether we have enough values to read a partition name.
+ if (Record.size() > 15)
+ NewGV->setPartition(StringRef(Strtab.data() + Record[14], Record[15]));
+
return Error::success();
}
@@ -2982,13 +3197,14 @@ Error BitcodeReader::parseFunctionRecord(ArrayRef<uint64_t> Record) {
if (Record.size() < 8)
return error("Invalid record");
- Type *Ty = getTypeByID(Record[0]);
- if (!Ty)
- return error("Invalid record");
- if (auto *PTy = dyn_cast<PointerType>(Ty))
- Ty = PTy->getElementType();
- auto *FTy = dyn_cast<FunctionType>(Ty);
+ Type *FullFTy = getFullyStructuredTypeByID(Record[0]);
+ Type *FTy = flattenPointerTypes(FullFTy);
if (!FTy)
+ return error("Invalid record");
+ if (isa<PointerType>(FTy))
+ std::tie(FullFTy, FTy) = getPointerElementTypes(FullFTy);
+
+ if (!isa<FunctionType>(FTy))
return error("Invalid type for value");
auto CC = static_cast<CallingConv::ID>(Record[1]);
if (CC & ~CallingConv::MaxID)
@@ -2998,8 +3214,13 @@ Error BitcodeReader::parseFunctionRecord(ArrayRef<uint64_t> Record) {
if (Record.size() > 16)
AddrSpace = Record[16];
- Function *Func = Function::Create(FTy, GlobalValue::ExternalLinkage,
- AddrSpace, Name, TheModule);
+ Function *Func =
+ Function::Create(cast<FunctionType>(FTy), GlobalValue::ExternalLinkage,
+ AddrSpace, Name, TheModule);
+
+ assert(Func->getFunctionType() == flattenPointerTypes(FullFTy) &&
+ "Incorrect fully specified type provided for function");
+ FunctionTypes[Func] = cast<FunctionType>(FullFTy);
Func->setCallingConv(CC);
bool isProto = Record[2];
@@ -3007,6 +3228,19 @@ Error BitcodeReader::parseFunctionRecord(ArrayRef<uint64_t> Record) {
Func->setLinkage(getDecodedLinkage(RawLinkage));
Func->setAttributes(getAttributes(Record[4]));
+ // Upgrade any old-style byval without a type by propagating the argument's
+ // pointee type. There should be no opaque pointers where the byval type is
+ // implicit.
+ for (unsigned i = 0; i != Func->arg_size(); ++i) {
+ if (!Func->hasParamAttribute(i, Attribute::ByVal))
+ continue;
+
+ Type *PTy = cast<FunctionType>(FullFTy)->getParamType(i);
+ Func->removeParamAttr(i, Attribute::ByVal);
+ Func->addParamAttr(i, Attribute::getWithByValType(
+ Context, getPointerElementFlatType(PTy)));
+ }
+
unsigned Alignment;
if (Error Err = parseAlignmentValue(Record[5], Alignment))
return Err;
@@ -3058,7 +3292,16 @@ Error BitcodeReader::parseFunctionRecord(ArrayRef<uint64_t> Record) {
}
inferDSOLocal(Func);
- ValueList.push_back(Func);
+ // Record[16] is the address space number.
+
+ // Check whether we have enough values to read a partition name.
+ if (Record.size() > 18)
+ Func->setPartition(StringRef(Strtab.data() + Record[17], Record[18]));
+
+ Type *FullTy = PointerType::get(FullFTy, AddrSpace);
+ assert(Func->getType() == flattenPointerTypes(FullTy) &&
+ "Incorrect fully specified type provided for Function");
+ ValueList.push_back(Func, FullTy);
// If this is a function with a body, remember the prototype we are
// creating now, so that we can match up the body with them later.
@@ -3087,7 +3330,8 @@ Error BitcodeReader::parseGlobalIndirectSymbolRecord(
if (Record.size() < (3 + (unsigned)NewRecord))
return error("Invalid record");
unsigned OpNum = 0;
- Type *Ty = getTypeByID(Record[OpNum++]);
+ Type *FullTy = getFullyStructuredTypeByID(Record[OpNum++]);
+ Type *Ty = flattenPointerTypes(FullTy);
if (!Ty)
return error("Invalid record");
@@ -3096,7 +3340,7 @@ Error BitcodeReader::parseGlobalIndirectSymbolRecord(
auto *PTy = dyn_cast<PointerType>(Ty);
if (!PTy)
return error("Invalid type for value");
- Ty = PTy->getElementType();
+ std::tie(FullTy, Ty) = getPointerElementTypes(FullTy);
AddrSpace = PTy->getAddressSpace();
} else {
AddrSpace = Record[OpNum++];
@@ -3112,6 +3356,9 @@ Error BitcodeReader::parseGlobalIndirectSymbolRecord(
else
NewGA = GlobalIFunc::create(Ty, AddrSpace, getDecodedLinkage(Linkage), Name,
nullptr, TheModule);
+
+ assert(NewGA->getValueType() == flattenPointerTypes(FullTy) &&
+ "Incorrect fully structured type provided for GlobalIndirectSymbol");
// Old bitcode files didn't have visibility field.
// Local linkage must have default visibility.
if (OpNum != Record.size()) {
@@ -3135,23 +3382,37 @@ Error BitcodeReader::parseGlobalIndirectSymbolRecord(
NewGA->setDSOLocal(getDecodedDSOLocal(Record[OpNum++]));
inferDSOLocal(NewGA);
- ValueList.push_back(NewGA);
+ // Check whether we have enough values to read a partition name.
+ if (OpNum + 1 < Record.size()) {
+ NewGA->setPartition(
+ StringRef(Strtab.data() + Record[OpNum], Record[OpNum + 1]));
+ OpNum += 2;
+ }
+
+ FullTy = PointerType::get(FullTy, AddrSpace);
+ assert(NewGA->getType() == flattenPointerTypes(FullTy) &&
+ "Incorrect fully structured type provided for GlobalIndirectSymbol");
+ ValueList.push_back(NewGA, FullTy);
IndirectSymbolInits.push_back(std::make_pair(NewGA, Val));
return Error::success();
}
Error BitcodeReader::parseModule(uint64_t ResumeBit,
bool ShouldLazyLoadMetadata) {
- if (ResumeBit)
- Stream.JumpToBit(ResumeBit);
- else if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
- return error("Invalid record");
+ if (ResumeBit) {
+ if (Error JumpFailed = Stream.JumpToBit(ResumeBit))
+ return JumpFailed;
+ } else if (Error Err = Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
+ return Err;
SmallVector<uint64_t, 64> Record;
// Read all the records for this module.
while (true) {
- BitstreamEntry Entry = Stream.advance();
+ Expected<llvm::BitstreamEntry> MaybeEntry = Stream.advance();
+ if (!MaybeEntry)
+ return MaybeEntry.takeError();
+ llvm::BitstreamEntry Entry = MaybeEntry.get();
switch (Entry.Kind) {
case BitstreamEntry::Error:
@@ -3162,8 +3423,8 @@ Error BitcodeReader::parseModule(uint64_t ResumeBit,
case BitstreamEntry::SubBlock:
switch (Entry.ID) {
default: // Skip unknown content.
- if (Stream.SkipBlock())
- return error("Invalid record");
+ if (Error Err = Stream.SkipBlock())
+ return Err;
break;
case bitc::BLOCKINFO_BLOCK_ID:
if (readBlockInfo())
@@ -3196,8 +3457,8 @@ Error BitcodeReader::parseModule(uint64_t ResumeBit,
// We must have had a VST forward declaration record, which caused
// the parser to jump to and parse the VST earlier.
assert(VSTOffset > 0);
- if (Stream.SkipBlock())
- return error("Invalid record");
+ if (Error Err = Stream.SkipBlock())
+ return Err;
}
break;
case bitc::CONSTANTS_BLOCK_ID:
@@ -3249,8 +3510,8 @@ Error BitcodeReader::parseModule(uint64_t ResumeBit,
// materializing functions. The ResumeBit points to the
// start of the last function block recorded in the
// DeferredFunctionInfo map. Skip it.
- if (Stream.SkipBlock())
- return error("Invalid record");
+ if (Error Err = Stream.SkipBlock())
+ return Err;
continue;
}
}
@@ -3294,8 +3555,10 @@ Error BitcodeReader::parseModule(uint64_t ResumeBit,
}
// Read a record.
- auto BitCode = Stream.readRecord(Entry.ID, Record);
- switch (BitCode) {
+ Expected<unsigned> MaybeBitCode = Stream.readRecord(Entry.ID, Record);
+ if (!MaybeBitCode)
+ return MaybeBitCode.takeError();
+ switch (unsigned BitCode = MaybeBitCode.get()) {
default: break; // Default behavior, ignore unknown content.
case bitc::MODULE_CODE_VERSION: {
Expected<unsigned> VersionOrErr = parseVersionRecord(Record);
@@ -3407,10 +3670,23 @@ Error BitcodeReader::typeCheckLoadStoreInst(Type *ValType, Type *PtrType) {
return Error::success();
}
+void BitcodeReader::propagateByValTypes(CallBase *CB,
+ ArrayRef<Type *> ArgsFullTys) {
+ for (unsigned i = 0; i != CB->arg_size(); ++i) {
+ if (!CB->paramHasAttr(i, Attribute::ByVal))
+ continue;
+
+ CB->removeParamAttr(i, Attribute::ByVal);
+ CB->addParamAttr(
+ i, Attribute::getWithByValType(
+ Context, getPointerElementFlatType(ArgsFullTys[i])));
+ }
+}
+
/// Lazily parse the specified function body block.
Error BitcodeReader::parseFunctionBody(Function *F) {
- if (Stream.EnterSubBlock(bitc::FUNCTION_BLOCK_ID))
- return error("Invalid record");
+ if (Error Err = Stream.EnterSubBlock(bitc::FUNCTION_BLOCK_ID))
+ return Err;
// Unexpected unresolved metadata when parsing function.
if (MDLoader->hasFwdRefs())
@@ -3421,9 +3697,13 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
unsigned ModuleMDLoaderSize = MDLoader->size();
// Add all the function arguments to the value table.
- for (Argument &I : F->args())
- ValueList.push_back(&I);
-
+ unsigned ArgNo = 0;
+ FunctionType *FullFTy = FunctionTypes[F];
+ for (Argument &I : F->args()) {
+ assert(I.getType() == flattenPointerTypes(FullFTy->getParamType(ArgNo)) &&
+ "Incorrect fully specified type for Function Argument");
+ ValueList.push_back(&I, FullFTy->getParamType(ArgNo++));
+ }
unsigned NextValueNo = ValueList.size();
BasicBlock *CurBB = nullptr;
unsigned CurBBNo = 0;
@@ -3444,7 +3724,10 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
SmallVector<uint64_t, 64> Record;
while (true) {
- BitstreamEntry Entry = Stream.advance();
+ Expected<llvm::BitstreamEntry> MaybeEntry = Stream.advance();
+ if (!MaybeEntry)
+ return MaybeEntry.takeError();
+ llvm::BitstreamEntry Entry = MaybeEntry.get();
switch (Entry.Kind) {
case BitstreamEntry::Error:
@@ -3455,8 +3738,8 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
case BitstreamEntry::SubBlock:
switch (Entry.ID) {
default: // Skip unknown content.
- if (Stream.SkipBlock())
- return error("Invalid record");
+ if (Error Err = Stream.SkipBlock())
+ return Err;
break;
case bitc::CONSTANTS_BLOCK_ID:
if (Error Err = parseConstants())
@@ -3492,8 +3775,11 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
// Read a record.
Record.clear();
Instruction *I = nullptr;
- unsigned BitCode = Stream.readRecord(Entry.ID, Record);
- switch (BitCode) {
+ Type *FullTy = nullptr;
+ Expected<unsigned> MaybeBitCode = Stream.readRecord(Entry.ID, Record);
+ if (!MaybeBitCode)
+ return MaybeBitCode.takeError();
+ switch (unsigned BitCode = MaybeBitCode.get()) {
default: // Default behavior: reject
return error("Invalid value");
case bitc::FUNC_CODE_DECLAREBLOCKS: { // DECLAREBLOCKS: [nblocks]
@@ -3634,7 +3920,8 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
OpNum+2 != Record.size())
return error("Invalid record");
- Type *ResTy = getTypeByID(Record[OpNum]);
+ FullTy = getFullyStructuredTypeByID(Record[OpNum]);
+ Type *ResTy = flattenPointerTypes(FullTy);
int Opc = getDecodedCastOpcode(Record[OpNum + 1]);
if (Opc == -1 || !ResTy)
return error("Invalid record");
@@ -3663,22 +3950,22 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
if (BitCode == bitc::FUNC_CODE_INST_GEP) {
InBounds = Record[OpNum++];
- Ty = getTypeByID(Record[OpNum++]);
+ FullTy = getFullyStructuredTypeByID(Record[OpNum++]);
+ Ty = flattenPointerTypes(FullTy);
} else {
InBounds = BitCode == bitc::FUNC_CODE_INST_INBOUNDS_GEP_OLD;
Ty = nullptr;
}
Value *BasePtr;
- if (getValueTypePair(Record, OpNum, NextValueNo, BasePtr))
+ Type *FullBaseTy = nullptr;
+ if (getValueTypePair(Record, OpNum, NextValueNo, BasePtr, &FullBaseTy))
return error("Invalid record");
- if (!Ty)
- Ty = cast<PointerType>(BasePtr->getType()->getScalarType())
- ->getElementType();
- else if (Ty !=
- cast<PointerType>(BasePtr->getType()->getScalarType())
- ->getElementType())
+ if (!Ty) {
+ std::tie(FullTy, Ty) =
+ getPointerElementTypes(FullBaseTy->getScalarType());
+ } else if (Ty != getPointerElementFlatType(FullBaseTy->getScalarType()))
return error(
"Explicit gep type does not match pointee type of pointer operand");
@@ -3691,6 +3978,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
}
I = GetElementPtrInst::Create(Ty, BasePtr, GEPIdx);
+ FullTy = GetElementPtrInst::getGEPReturnType(FullTy, I, GEPIdx);
InstructionList.push_back(I);
if (InBounds)
@@ -3702,7 +3990,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
// EXTRACTVAL: [opty, opval, n x indices]
unsigned OpNum = 0;
Value *Agg;
- if (getValueTypePair(Record, OpNum, NextValueNo, Agg))
+ if (getValueTypePair(Record, OpNum, NextValueNo, Agg, &FullTy))
return error("Invalid record");
unsigned RecSize = Record.size();
@@ -3710,26 +3998,25 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
return error("EXTRACTVAL: Invalid instruction with 0 indices");
SmallVector<unsigned, 4> EXTRACTVALIdx;
- Type *CurTy = Agg->getType();
for (; OpNum != RecSize; ++OpNum) {
- bool IsArray = CurTy->isArrayTy();
- bool IsStruct = CurTy->isStructTy();
+ bool IsArray = FullTy->isArrayTy();
+ bool IsStruct = FullTy->isStructTy();
uint64_t Index = Record[OpNum];
if (!IsStruct && !IsArray)
return error("EXTRACTVAL: Invalid type");
if ((unsigned)Index != Index)
return error("Invalid value");
- if (IsStruct && Index >= CurTy->getStructNumElements())
+ if (IsStruct && Index >= FullTy->getStructNumElements())
return error("EXTRACTVAL: Invalid struct index");
- if (IsArray && Index >= CurTy->getArrayNumElements())
+ if (IsArray && Index >= FullTy->getArrayNumElements())
return error("EXTRACTVAL: Invalid array index");
EXTRACTVALIdx.push_back((unsigned)Index);
if (IsStruct)
- CurTy = CurTy->getStructElementType(Index);
+ FullTy = FullTy->getStructElementType(Index);
else
- CurTy = CurTy->getArrayElementType();
+ FullTy = FullTy->getArrayElementType();
}
I = ExtractValueInst::Create(Agg, EXTRACTVALIdx);
@@ -3741,7 +4028,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
// INSERTVAL: [opty, opval, opty, opval, n x indices]
unsigned OpNum = 0;
Value *Agg;
- if (getValueTypePair(Record, OpNum, NextValueNo, Agg))
+ if (getValueTypePair(Record, OpNum, NextValueNo, Agg, &FullTy))
return error("Invalid record");
Value *Val;
if (getValueTypePair(Record, OpNum, NextValueNo, Val))
@@ -3787,7 +4074,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
// handles select i1 ... in old bitcode
unsigned OpNum = 0;
Value *TrueVal, *FalseVal, *Cond;
- if (getValueTypePair(Record, OpNum, NextValueNo, TrueVal) ||
+ if (getValueTypePair(Record, OpNum, NextValueNo, TrueVal, &FullTy) ||
popValue(Record, OpNum, NextValueNo, TrueVal->getType(), FalseVal) ||
popValue(Record, OpNum, NextValueNo, Type::getInt1Ty(Context), Cond))
return error("Invalid record");
@@ -3802,7 +4089,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
// handles select i1 or select [N x i1]
unsigned OpNum = 0;
Value *TrueVal, *FalseVal, *Cond;
- if (getValueTypePair(Record, OpNum, NextValueNo, TrueVal) ||
+ if (getValueTypePair(Record, OpNum, NextValueNo, TrueVal, &FullTy) ||
popValue(Record, OpNum, NextValueNo, TrueVal->getType(), FalseVal) ||
getValueTypePair(Record, OpNum, NextValueNo, Cond))
return error("Invalid record");
@@ -3821,18 +4108,24 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
I = SelectInst::Create(Cond, TrueVal, FalseVal);
InstructionList.push_back(I);
+ if (OpNum < Record.size() && isa<FPMathOperator>(I)) {
+ FastMathFlags FMF = getDecodedFastMathFlags(Record[OpNum]);
+ if (FMF.any())
+ I->setFastMathFlags(FMF);
+ }
break;
}
case bitc::FUNC_CODE_INST_EXTRACTELT: { // EXTRACTELT: [opty, opval, opval]
unsigned OpNum = 0;
Value *Vec, *Idx;
- if (getValueTypePair(Record, OpNum, NextValueNo, Vec) ||
+ if (getValueTypePair(Record, OpNum, NextValueNo, Vec, &FullTy) ||
getValueTypePair(Record, OpNum, NextValueNo, Idx))
return error("Invalid record");
if (!Vec->getType()->isVectorTy())
return error("Invalid type for value");
I = ExtractElementInst::Create(Vec, Idx);
+ FullTy = FullTy->getVectorElementType();
InstructionList.push_back(I);
break;
}
@@ -3840,7 +4133,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
case bitc::FUNC_CODE_INST_INSERTELT: { // INSERTELT: [ty, opval,opval,opval]
unsigned OpNum = 0;
Value *Vec, *Elt, *Idx;
- if (getValueTypePair(Record, OpNum, NextValueNo, Vec))
+ if (getValueTypePair(Record, OpNum, NextValueNo, Vec, &FullTy))
return error("Invalid record");
if (!Vec->getType()->isVectorTy())
return error("Invalid type for value");
@@ -3856,7 +4149,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
case bitc::FUNC_CODE_INST_SHUFFLEVEC: {// SHUFFLEVEC: [opval,ty,opval,opval]
unsigned OpNum = 0;
Value *Vec1, *Vec2, *Mask;
- if (getValueTypePair(Record, OpNum, NextValueNo, Vec1) ||
+ if (getValueTypePair(Record, OpNum, NextValueNo, Vec1, &FullTy) ||
popValue(Record, OpNum, NextValueNo, Vec1->getType(), Vec2))
return error("Invalid record");
@@ -3865,6 +4158,8 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
if (!Vec1->getType()->isVectorTy() || !Vec2->getType()->isVectorTy())
return error("Invalid type for value");
I = new ShuffleVectorInst(Vec1, Vec2, Mask);
+ FullTy = VectorType::get(FullTy->getVectorElementType(),
+ Mask->getType()->getVectorNumElements());
InstructionList.push_back(I);
break;
}
@@ -3882,6 +4177,10 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
popValue(Record, OpNum, NextValueNo, LHS->getType(), RHS))
return error("Invalid record");
+ if (OpNum >= Record.size())
+ return error(
+ "Invalid record: operand number exceeded available operands");
+
unsigned PredVal = Record[OpNum];
bool IsFP = LHS->getType()->isFPOrFPVectorTy();
FastMathFlags FMF;
@@ -4168,31 +4467,40 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
BasicBlock *UnwindBB = getBasicBlock(Record[OpNum++]);
FunctionType *FTy = nullptr;
- if (CCInfo >> 13 & 1 &&
- !(FTy = dyn_cast<FunctionType>(getTypeByID(Record[OpNum++]))))
- return error("Explicit invoke type is not a function type");
+ FunctionType *FullFTy = nullptr;
+ if ((CCInfo >> 13) & 1) {
+ FullFTy =
+ dyn_cast<FunctionType>(getFullyStructuredTypeByID(Record[OpNum++]));
+ if (!FullFTy)
+ return error("Explicit invoke type is not a function type");
+ FTy = cast<FunctionType>(flattenPointerTypes(FullFTy));
+ }
Value *Callee;
- if (getValueTypePair(Record, OpNum, NextValueNo, Callee))
+ if (getValueTypePair(Record, OpNum, NextValueNo, Callee, &FullTy))
return error("Invalid record");
PointerType *CalleeTy = dyn_cast<PointerType>(Callee->getType());
if (!CalleeTy)
return error("Callee is not a pointer");
if (!FTy) {
- FTy = dyn_cast<FunctionType>(CalleeTy->getElementType());
- if (!FTy)
+ FullFTy =
+ dyn_cast<FunctionType>(cast<PointerType>(FullTy)->getElementType());
+ if (!FullFTy)
return error("Callee is not of pointer to function type");
- } else if (CalleeTy->getElementType() != FTy)
+ FTy = cast<FunctionType>(flattenPointerTypes(FullFTy));
+ } else if (getPointerElementFlatType(FullTy) != FTy)
return error("Explicit invoke type does not match pointee type of "
"callee operand");
if (Record.size() < FTy->getNumParams() + OpNum)
return error("Insufficient operands to call");
SmallVector<Value*, 16> Ops;
+ SmallVector<Type *, 16> ArgsFullTys;
for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i, ++OpNum) {
Ops.push_back(getValue(Record, OpNum, NextValueNo,
FTy->getParamType(i)));
+ ArgsFullTys.push_back(FullFTy->getParamType(i));
if (!Ops.back())
return error("Invalid record");
}
@@ -4204,18 +4512,24 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
// Read type/value pairs for varargs params.
while (OpNum != Record.size()) {
Value *Op;
- if (getValueTypePair(Record, OpNum, NextValueNo, Op))
+ Type *FullTy;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Op, &FullTy))
return error("Invalid record");
Ops.push_back(Op);
+ ArgsFullTys.push_back(FullTy);
}
}
- I = InvokeInst::Create(Callee, NormalBB, UnwindBB, Ops, OperandBundles);
+ I = InvokeInst::Create(FTy, Callee, NormalBB, UnwindBB, Ops,
+ OperandBundles);
+ FullTy = FullFTy->getReturnType();
OperandBundles.clear();
InstructionList.push_back(I);
cast<InvokeInst>(I)->setCallingConv(
static_cast<CallingConv::ID>(CallingConv::MaxID & CCInfo));
cast<InvokeInst>(I)->setAttributes(PAL);
+ propagateByValTypes(cast<CallBase>(I), ArgsFullTys);
+
break;
}
case bitc::FUNC_CODE_INST_RESUME: { // RESUME: [opval]
@@ -4227,6 +4541,82 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
InstructionList.push_back(I);
break;
}
+ case bitc::FUNC_CODE_INST_CALLBR: {
+ // CALLBR: [attr, cc, norm, transfs, fty, fnid, args]
+ unsigned OpNum = 0;
+ AttributeList PAL = getAttributes(Record[OpNum++]);
+ unsigned CCInfo = Record[OpNum++];
+
+ BasicBlock *DefaultDest = getBasicBlock(Record[OpNum++]);
+ unsigned NumIndirectDests = Record[OpNum++];
+ SmallVector<BasicBlock *, 16> IndirectDests;
+ for (unsigned i = 0, e = NumIndirectDests; i != e; ++i)
+ IndirectDests.push_back(getBasicBlock(Record[OpNum++]));
+
+ FunctionType *FTy = nullptr;
+ FunctionType *FullFTy = nullptr;
+ if ((CCInfo >> bitc::CALL_EXPLICIT_TYPE) & 1) {
+ FullFTy =
+ dyn_cast<FunctionType>(getFullyStructuredTypeByID(Record[OpNum++]));
+ if (!FullFTy)
+ return error("Explicit call type is not a function type");
+ FTy = cast<FunctionType>(flattenPointerTypes(FullFTy));
+ }
+
+ Value *Callee;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Callee, &FullTy))
+ return error("Invalid record");
+
+ PointerType *OpTy = dyn_cast<PointerType>(Callee->getType());
+ if (!OpTy)
+ return error("Callee is not a pointer type");
+ if (!FTy) {
+ FullFTy =
+ dyn_cast<FunctionType>(cast<PointerType>(FullTy)->getElementType());
+ if (!FullFTy)
+ return error("Callee is not of pointer to function type");
+ FTy = cast<FunctionType>(flattenPointerTypes(FullFTy));
+ } else if (getPointerElementFlatType(FullTy) != FTy)
+ return error("Explicit call type does not match pointee type of "
+ "callee operand");
+ if (Record.size() < FTy->getNumParams() + OpNum)
+ return error("Insufficient operands to call");
+
+ SmallVector<Value*, 16> Args;
+ // Read the fixed params.
+ for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i, ++OpNum) {
+ if (FTy->getParamType(i)->isLabelTy())
+ Args.push_back(getBasicBlock(Record[OpNum]));
+ else
+ Args.push_back(getValue(Record, OpNum, NextValueNo,
+ FTy->getParamType(i)));
+ if (!Args.back())
+ return error("Invalid record");
+ }
+
+ // Read type/value pairs for varargs params.
+ if (!FTy->isVarArg()) {
+ if (OpNum != Record.size())
+ return error("Invalid record");
+ } else {
+ while (OpNum != Record.size()) {
+ Value *Op;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Op))
+ return error("Invalid record");
+ Args.push_back(Op);
+ }
+ }
+
+ I = CallBrInst::Create(FTy, Callee, DefaultDest, IndirectDests, Args,
+ OperandBundles);
+ FullTy = FullFTy->getReturnType();
+ OperandBundles.clear();
+ InstructionList.push_back(I);
+ cast<CallBrInst>(I)->setCallingConv(
+ static_cast<CallingConv::ID>((0x7ff & CCInfo) >> bitc::CALL_CCONV));
+ cast<CallBrInst>(I)->setAttributes(PAL);
+ break;
+ }
case bitc::FUNC_CODE_INST_UNREACHABLE: // UNREACHABLE
I = new UnreachableInst(Context);
InstructionList.push_back(I);
@@ -4234,7 +4624,8 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
case bitc::FUNC_CODE_INST_PHI: { // PHI: [ty, val0,bb0, ...]
if (Record.size() < 1 || ((Record.size()-1)&1))
return error("Invalid record");
- Type *Ty = getTypeByID(Record[0]);
+ FullTy = getFullyStructuredTypeByID(Record[0]);
+ Type *Ty = flattenPointerTypes(FullTy);
if (!Ty)
return error("Invalid record");
@@ -4271,7 +4662,8 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
if (Record.size() < 4)
return error("Invalid record");
}
- Type *Ty = getTypeByID(Record[Idx++]);
+ FullTy = getFullyStructuredTypeByID(Record[Idx++]);
+ Type *Ty = flattenPointerTypes(FullTy);
if (!Ty)
return error("Invalid record");
if (BitCode == bitc::FUNC_CODE_INST_LANDINGPAD_OLD) {
@@ -4324,12 +4716,13 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
SwiftErrorMask;
bool InAlloca = AlignRecord & InAllocaMask;
bool SwiftError = AlignRecord & SwiftErrorMask;
- Type *Ty = getTypeByID(Record[0]);
+ FullTy = getFullyStructuredTypeByID(Record[0]);
+ Type *Ty = flattenPointerTypes(FullTy);
if ((AlignRecord & ExplicitTypeMask) == 0) {
auto *PTy = dyn_cast_or_null<PointerType>(Ty);
if (!PTy)
return error("Old-style alloca with a non-pointer type");
- Ty = PTy->getElementType();
+ std::tie(FullTy, Ty) = getPointerElementTypes(FullTy);
}
Type *OpTy = getTypeByID(Record[1]);
Value *Size = getFnValueByID(Record[2], OpTy);
@@ -4348,29 +4741,34 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
AI->setUsedWithInAlloca(InAlloca);
AI->setSwiftError(SwiftError);
I = AI;
+ FullTy = PointerType::get(FullTy, AS);
InstructionList.push_back(I);
break;
}
case bitc::FUNC_CODE_INST_LOAD: { // LOAD: [opty, op, align, vol]
unsigned OpNum = 0;
Value *Op;
- if (getValueTypePair(Record, OpNum, NextValueNo, Op) ||
+ if (getValueTypePair(Record, OpNum, NextValueNo, Op, &FullTy) ||
(OpNum + 2 != Record.size() && OpNum + 3 != Record.size()))
return error("Invalid record");
+ if (!isa<PointerType>(Op->getType()))
+ return error("Load operand is not a pointer type");
+
Type *Ty = nullptr;
- if (OpNum + 3 == Record.size())
- Ty = getTypeByID(Record[OpNum++]);
+ if (OpNum + 3 == Record.size()) {
+ FullTy = getFullyStructuredTypeByID(Record[OpNum++]);
+ Ty = flattenPointerTypes(FullTy);
+ } else
+ std::tie(FullTy, Ty) = getPointerElementTypes(FullTy);
+
if (Error Err = typeCheckLoadStoreInst(Ty, Op->getType()))
return Err;
- if (!Ty)
- Ty = cast<PointerType>(Op->getType())->getElementType();
unsigned Align;
if (Error Err = parseAlignmentValue(Record[OpNum], Align))
return Err;
I = new LoadInst(Ty, Op, "", Record[OpNum + 1], Align);
-
InstructionList.push_back(I);
break;
}
@@ -4378,17 +4776,22 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
// LOADATOMIC: [opty, op, align, vol, ordering, ssid]
unsigned OpNum = 0;
Value *Op;
- if (getValueTypePair(Record, OpNum, NextValueNo, Op) ||
+ if (getValueTypePair(Record, OpNum, NextValueNo, Op, &FullTy) ||
(OpNum + 4 != Record.size() && OpNum + 5 != Record.size()))
return error("Invalid record");
+ if (!isa<PointerType>(Op->getType()))
+ return error("Load operand is not a pointer type");
+
Type *Ty = nullptr;
- if (OpNum + 5 == Record.size())
- Ty = getTypeByID(Record[OpNum++]);
+ if (OpNum + 5 == Record.size()) {
+ FullTy = getFullyStructuredTypeByID(Record[OpNum++]);
+ Ty = flattenPointerTypes(FullTy);
+ } else
+ std::tie(FullTy, Ty) = getPointerElementTypes(FullTy);
+
if (Error Err = typeCheckLoadStoreInst(Ty, Op->getType()))
return Err;
- if (!Ty)
- Ty = cast<PointerType>(Op->getType())->getElementType();
AtomicOrdering Ordering = getDecodedOrdering(Record[OpNum + 2]);
if (Ordering == AtomicOrdering::NotAtomic ||
@@ -4402,8 +4805,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
unsigned Align;
if (Error Err = parseAlignmentValue(Record[OpNum], Align))
return Err;
- I = new LoadInst(Op, "", Record[OpNum+1], Align, Ordering, SSID);
-
+ I = new LoadInst(Ty, Op, "", Record[OpNum + 1], Align, Ordering, SSID);
InstructionList.push_back(I);
break;
}
@@ -4411,12 +4813,12 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
case bitc::FUNC_CODE_INST_STORE_OLD: { // STORE2:[ptrty, ptr, val, align, vol]
unsigned OpNum = 0;
Value *Val, *Ptr;
- if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) ||
+ Type *FullTy;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Ptr, &FullTy) ||
(BitCode == bitc::FUNC_CODE_INST_STORE
? getValueTypePair(Record, OpNum, NextValueNo, Val)
: popValue(Record, OpNum, NextValueNo,
- cast<PointerType>(Ptr->getType())->getElementType(),
- Val)) ||
+ getPointerElementFlatType(FullTy), Val)) ||
OpNum + 2 != Record.size())
return error("Invalid record");
@@ -4434,13 +4836,13 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
// STOREATOMIC: [ptrty, ptr, val, align, vol, ordering, ssid]
unsigned OpNum = 0;
Value *Val, *Ptr;
- if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) ||
+ Type *FullTy;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Ptr, &FullTy) ||
!isa<PointerType>(Ptr->getType()) ||
(BitCode == bitc::FUNC_CODE_INST_STOREATOMIC
? getValueTypePair(Record, OpNum, NextValueNo, Val)
: popValue(Record, OpNum, NextValueNo,
- cast<PointerType>(Ptr->getType())->getElementType(),
- Val)) ||
+ getPointerElementFlatType(FullTy), Val)) ||
OpNum + 4 != Record.size())
return error("Invalid record");
@@ -4468,15 +4870,25 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
// failureordering?, isweak?]
unsigned OpNum = 0;
Value *Ptr, *Cmp, *New;
- if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) ||
- (BitCode == bitc::FUNC_CODE_INST_CMPXCHG
- ? getValueTypePair(Record, OpNum, NextValueNo, Cmp)
- : popValue(Record, OpNum, NextValueNo,
- cast<PointerType>(Ptr->getType())->getElementType(),
- Cmp)) ||
- popValue(Record, OpNum, NextValueNo, Cmp->getType(), New) ||
+ if (getValueTypePair(Record, OpNum, NextValueNo, Ptr, &FullTy))
+ return error("Invalid record");
+
+ if (!isa<PointerType>(Ptr->getType()))
+ return error("Cmpxchg operand is not a pointer type");
+
+ if (BitCode == bitc::FUNC_CODE_INST_CMPXCHG) {
+ if (getValueTypePair(Record, OpNum, NextValueNo, Cmp, &FullTy))
+ return error("Invalid record");
+ } else if (popValue(Record, OpNum, NextValueNo,
+ getPointerElementFlatType(FullTy), Cmp))
+ return error("Invalid record");
+ else
+ FullTy = cast<PointerType>(FullTy)->getElementType();
+
+ if (popValue(Record, OpNum, NextValueNo, Cmp->getType(), New) ||
Record.size() < OpNum + 3 || Record.size() > OpNum + 5)
return error("Invalid record");
+
AtomicOrdering SuccessOrdering = getDecodedOrdering(Record[OpNum + 1]);
if (SuccessOrdering == AtomicOrdering::NotAtomic ||
SuccessOrdering == AtomicOrdering::Unordered)
@@ -4494,6 +4906,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
I = new AtomicCmpXchgInst(Ptr, Cmp, New, SuccessOrdering, FailureOrdering,
SSID);
+ FullTy = StructType::get(Context, {FullTy, Type::getInt1Ty(Context)});
cast<AtomicCmpXchgInst>(I)->setVolatile(Record[OpNum]);
if (Record.size() < 8) {
@@ -4502,6 +4915,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
// expecting the first component of a modern cmpxchg.
CurBB->getInstList().push_back(I);
I = ExtractValueInst::Create(I, 0);
+ FullTy = cast<StructType>(FullTy)->getElementType(0);
} else {
cast<AtomicCmpXchgInst>(I)->setWeak(Record[OpNum+4]);
}
@@ -4513,11 +4927,11 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
// ATOMICRMW:[ptrty, ptr, val, op, vol, ordering, ssid]
unsigned OpNum = 0;
Value *Ptr, *Val;
- if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) ||
+ if (getValueTypePair(Record, OpNum, NextValueNo, Ptr, &FullTy) ||
!isa<PointerType>(Ptr->getType()) ||
popValue(Record, OpNum, NextValueNo,
- cast<PointerType>(Ptr->getType())->getElementType(), Val) ||
- OpNum+4 != Record.size())
+ getPointerElementFlatType(FullTy), Val) ||
+ OpNum + 4 != Record.size())
return error("Invalid record");
AtomicRMWInst::BinOp Operation = getDecodedRMWOperation(Record[OpNum]);
if (Operation < AtomicRMWInst::FIRST_BINOP ||
@@ -4529,6 +4943,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
return error("Invalid record");
SyncScope::ID SSID = getDecodedSyncScopeID(Record[OpNum + 3]);
I = new AtomicRMWInst(Operation, Ptr, Val, Ordering, SSID);
+ FullTy = getPointerElementFlatType(FullTy);
cast<AtomicRMWInst>(I)->setVolatile(Record[OpNum+1]);
InstructionList.push_back(I);
break;
@@ -4563,28 +4978,36 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
}
FunctionType *FTy = nullptr;
- if (CCInfo >> bitc::CALL_EXPLICIT_TYPE & 1 &&
- !(FTy = dyn_cast<FunctionType>(getTypeByID(Record[OpNum++]))))
- return error("Explicit call type is not a function type");
+ FunctionType *FullFTy = nullptr;
+ if ((CCInfo >> bitc::CALL_EXPLICIT_TYPE) & 1) {
+ FullFTy =
+ dyn_cast<FunctionType>(getFullyStructuredTypeByID(Record[OpNum++]));
+ if (!FullFTy)
+ return error("Explicit call type is not a function type");
+ FTy = cast<FunctionType>(flattenPointerTypes(FullFTy));
+ }
Value *Callee;
- if (getValueTypePair(Record, OpNum, NextValueNo, Callee))
+ if (getValueTypePair(Record, OpNum, NextValueNo, Callee, &FullTy))
return error("Invalid record");
PointerType *OpTy = dyn_cast<PointerType>(Callee->getType());
if (!OpTy)
return error("Callee is not a pointer type");
if (!FTy) {
- FTy = dyn_cast<FunctionType>(OpTy->getElementType());
- if (!FTy)
+ FullFTy =
+ dyn_cast<FunctionType>(cast<PointerType>(FullTy)->getElementType());
+ if (!FullFTy)
return error("Callee is not of pointer to function type");
- } else if (OpTy->getElementType() != FTy)
+ FTy = cast<FunctionType>(flattenPointerTypes(FullFTy));
+ } else if (getPointerElementFlatType(FullTy) != FTy)
return error("Explicit call type does not match pointee type of "
"callee operand");
if (Record.size() < FTy->getNumParams() + OpNum)
return error("Insufficient operands to call");
SmallVector<Value*, 16> Args;
+ SmallVector<Type*, 16> ArgsFullTys;
// Read the fixed params.
for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i, ++OpNum) {
if (FTy->getParamType(i)->isLabelTy())
@@ -4592,6 +5015,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
else
Args.push_back(getValue(Record, OpNum, NextValueNo,
FTy->getParamType(i)));
+ ArgsFullTys.push_back(FullFTy->getParamType(i));
if (!Args.back())
return error("Invalid record");
}
@@ -4603,13 +5027,16 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
} else {
while (OpNum != Record.size()) {
Value *Op;
- if (getValueTypePair(Record, OpNum, NextValueNo, Op))
+ Type *FullTy;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Op, &FullTy))
return error("Invalid record");
Args.push_back(Op);
+ ArgsFullTys.push_back(FullTy);
}
}
I = CallInst::Create(FTy, Callee, Args, OperandBundles);
+ FullTy = FullFTy->getReturnType();
OperandBundles.clear();
InstructionList.push_back(I);
cast<CallInst>(I)->setCallingConv(
@@ -4623,6 +5050,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
TCK = CallInst::TCK_NoTail;
cast<CallInst>(I)->setTailCallKind(TCK);
cast<CallInst>(I)->setAttributes(PAL);
+ propagateByValTypes(cast<CallBase>(I), ArgsFullTys);
if (FMF.any()) {
if (!isa<FPMathOperator>(I))
return error("Fast-math-flags specified for call without "
@@ -4636,7 +5064,8 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
return error("Invalid record");
Type *OpTy = getTypeByID(Record[0]);
Value *Op = getValue(Record, 1, NextValueNo, OpTy);
- Type *ResTy = getTypeByID(Record[2]);
+ FullTy = getFullyStructuredTypeByID(Record[2]);
+ Type *ResTy = flattenPointerTypes(FullTy);
if (!OpTy || !Op || !ResTy)
return error("Invalid record");
I = new VAArgInst(Op, ResTy);
@@ -4686,8 +5115,23 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
}
// Non-void values get registered in the value table for future use.
- if (I && !I->getType()->isVoidTy())
- ValueList.assignValue(I, NextValueNo++);
+ if (I && !I->getType()->isVoidTy()) {
+ if (!FullTy) {
+ FullTy = I->getType();
+ assert(
+ !FullTy->isPointerTy() && !isa<StructType>(FullTy) &&
+ !isa<ArrayType>(FullTy) &&
+ (!isa<VectorType>(FullTy) ||
+ FullTy->getVectorElementType()->isFloatingPointTy() ||
+ FullTy->getVectorElementType()->isIntegerTy()) &&
+ "Structured types must be assigned with corresponding non-opaque "
+ "pointer type");
+ }
+
+ assert(I->getType() == flattenPointerTypes(FullTy) &&
+ "Incorrect fully structured type provided for Instruction");
+ ValueList.assignValue(I, NextValueNo++, FullTy);
+ }
}
OutOfRecordLoop:
@@ -4769,8 +5213,8 @@ Error BitcodeReader::materialize(GlobalValue *GV) {
return Err;
// Move the bit stream to the saved position of the deferred function body.
- Stream.JumpToBit(DFII->second);
-
+ if (Error JumpFailed = Stream.JumpToBit(DFII->second))
+ return JumpFailed;
if (Error Err = parseFunctionBody(F))
return Err;
F->setIsMaterializable(false);
@@ -4933,10 +5377,13 @@ Error ModuleSummaryIndexBitcodeReader::parseValueSymbolTable(
return Error::success();
assert(Offset > 0 && "Expected non-zero VST offset");
- uint64_t CurrentBit = jumpToValueSymbolTable(Offset, Stream);
+ Expected<uint64_t> MaybeCurrentBit = jumpToValueSymbolTable(Offset, Stream);
+ if (!MaybeCurrentBit)
+ return MaybeCurrentBit.takeError();
+ uint64_t CurrentBit = MaybeCurrentBit.get();
- if (Stream.EnterSubBlock(bitc::VALUE_SYMTAB_BLOCK_ID))
- return error("Invalid record");
+ if (Error Err = Stream.EnterSubBlock(bitc::VALUE_SYMTAB_BLOCK_ID))
+ return Err;
SmallVector<uint64_t, 64> Record;
@@ -4944,7 +5391,10 @@ Error ModuleSummaryIndexBitcodeReader::parseValueSymbolTable(
SmallString<128> ValueName;
while (true) {
- BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+ Expected<BitstreamEntry> MaybeEntry = Stream.advanceSkippingSubblocks();
+ if (!MaybeEntry)
+ return MaybeEntry.takeError();
+ BitstreamEntry Entry = MaybeEntry.get();
switch (Entry.Kind) {
case BitstreamEntry::SubBlock: // Handled for us already.
@@ -4952,7 +5402,8 @@ Error ModuleSummaryIndexBitcodeReader::parseValueSymbolTable(
return error("Malformed block");
case BitstreamEntry::EndBlock:
// Done parsing VST, jump back to wherever we came from.
- Stream.JumpToBit(CurrentBit);
+ if (Error JumpFailed = Stream.JumpToBit(CurrentBit))
+ return JumpFailed;
return Error::success();
case BitstreamEntry::Record:
// The interesting case.
@@ -4961,7 +5412,10 @@ Error ModuleSummaryIndexBitcodeReader::parseValueSymbolTable(
// Read a record.
Record.clear();
- switch (Stream.readRecord(Entry.ID, Record)) {
+ Expected<unsigned> MaybeRecord = Stream.readRecord(Entry.ID, Record);
+ if (!MaybeRecord)
+ return MaybeRecord.takeError();
+ switch (MaybeRecord.get()) {
default: // Default behavior: ignore (e.g. VST_CODE_BBENTRY records).
break;
case bitc::VST_CODE_ENTRY: { // VST_CODE_ENTRY: [valueid, namechar x N]
@@ -5009,8 +5463,8 @@ Error ModuleSummaryIndexBitcodeReader::parseValueSymbolTable(
// At the end of this routine the module Index is populated with a map
// from global value id to GlobalValueSummary objects.
Error ModuleSummaryIndexBitcodeReader::parseModule() {
- if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
- return error("Invalid record");
+ if (Error Err = Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
+ return Err;
SmallVector<uint64_t, 64> Record;
DenseMap<unsigned, GlobalValue::LinkageTypes> ValueIdToLinkageMap;
@@ -5018,7 +5472,10 @@ Error ModuleSummaryIndexBitcodeReader::parseModule() {
// Read the index for this module.
while (true) {
- BitstreamEntry Entry = Stream.advance();
+ Expected<llvm::BitstreamEntry> MaybeEntry = Stream.advance();
+ if (!MaybeEntry)
+ return MaybeEntry.takeError();
+ llvm::BitstreamEntry Entry = MaybeEntry.get();
switch (Entry.Kind) {
case BitstreamEntry::Error:
@@ -5029,8 +5486,8 @@ Error ModuleSummaryIndexBitcodeReader::parseModule() {
case BitstreamEntry::SubBlock:
switch (Entry.ID) {
default: // Skip unknown content.
- if (Stream.SkipBlock())
- return error("Invalid record");
+ if (Error Err = Stream.SkipBlock())
+ return Err;
break;
case bitc::BLOCKINFO_BLOCK_ID:
// Need to parse these to get abbrev ids (e.g. for VST)
@@ -5043,8 +5500,8 @@ Error ModuleSummaryIndexBitcodeReader::parseModule() {
assert(((SeenValueSymbolTable && VSTOffset > 0) ||
!SeenGlobalValSummary) &&
"Expected early VST parse via VSTOffset record");
- if (Stream.SkipBlock())
- return error("Invalid record");
+ if (Error Err = Stream.SkipBlock())
+ return Err;
break;
case bitc::GLOBALVAL_SUMMARY_BLOCK_ID:
case bitc::FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID:
@@ -5075,8 +5532,10 @@ Error ModuleSummaryIndexBitcodeReader::parseModule() {
case BitstreamEntry::Record: {
Record.clear();
- auto BitCode = Stream.readRecord(Entry.ID, Record);
- switch (BitCode) {
+ Expected<unsigned> MaybeBitCode = Stream.readRecord(Entry.ID, Record);
+ if (!MaybeBitCode)
+ return MaybeBitCode.takeError();
+ switch (MaybeBitCode.get()) {
default:
break; // Default behavior, ignore unknown content.
case bitc::MODULE_CODE_VERSION: {
@@ -5224,32 +5683,66 @@ static void parseTypeIdSummaryRecord(ArrayRef<uint64_t> Record,
parseWholeProgramDevirtResolution(Record, Strtab, Slot, TypeId);
}
-static void setImmutableRefs(std::vector<ValueInfo> &Refs, unsigned Count) {
- // Read-only refs are in the end of the refs list.
- for (unsigned RefNo = Refs.size() - Count; RefNo < Refs.size(); ++RefNo)
+void ModuleSummaryIndexBitcodeReader::parseTypeIdCompatibleVtableInfo(
+ ArrayRef<uint64_t> Record, size_t &Slot,
+ TypeIdCompatibleVtableInfo &TypeId) {
+ uint64_t Offset = Record[Slot++];
+ ValueInfo Callee = getValueInfoFromValueId(Record[Slot++]).first;
+ TypeId.push_back({Offset, Callee});
+}
+
+void ModuleSummaryIndexBitcodeReader::parseTypeIdCompatibleVtableSummaryRecord(
+ ArrayRef<uint64_t> Record) {
+ size_t Slot = 0;
+ TypeIdCompatibleVtableInfo &TypeId =
+ TheIndex.getOrInsertTypeIdCompatibleVtableSummary(
+ {Strtab.data() + Record[Slot],
+ static_cast<size_t>(Record[Slot + 1])});
+ Slot += 2;
+
+ while (Slot < Record.size())
+ parseTypeIdCompatibleVtableInfo(Record, Slot, TypeId);
+}
+
+static void setSpecialRefs(std::vector<ValueInfo> &Refs, unsigned ROCnt,
+ unsigned WOCnt) {
+ // Readonly and writeonly refs are in the end of the refs list.
+ assert(ROCnt + WOCnt <= Refs.size());
+ unsigned FirstWORef = Refs.size() - WOCnt;
+ unsigned RefNo = FirstWORef - ROCnt;
+ for (; RefNo < FirstWORef; ++RefNo)
Refs[RefNo].setReadOnly();
+ for (; RefNo < Refs.size(); ++RefNo)
+ Refs[RefNo].setWriteOnly();
}
// Eagerly parse the entire summary block. This populates the GlobalValueSummary
// objects in the index.
Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
- if (Stream.EnterSubBlock(ID))
- return error("Invalid record");
+ if (Error Err = Stream.EnterSubBlock(ID))
+ return Err;
SmallVector<uint64_t, 64> Record;
// Parse version
{
- BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+ Expected<BitstreamEntry> MaybeEntry = Stream.advanceSkippingSubblocks();
+ if (!MaybeEntry)
+ return MaybeEntry.takeError();
+ BitstreamEntry Entry = MaybeEntry.get();
+
if (Entry.Kind != BitstreamEntry::Record)
return error("Invalid Summary Block: record for version expected");
- if (Stream.readRecord(Entry.ID, Record) != bitc::FS_VERSION)
+ Expected<unsigned> MaybeRecord = Stream.readRecord(Entry.ID, Record);
+ if (!MaybeRecord)
+ return MaybeRecord.takeError();
+ if (MaybeRecord.get() != bitc::FS_VERSION)
return error("Invalid Summary Block: version expected");
}
const uint64_t Version = Record[0];
const bool IsOldProfileFormat = Version == 1;
- if (Version < 1 || Version > 6)
+ if (Version < 1 || Version > 7)
return error("Invalid summary version " + Twine(Version) +
- ". Version should be in the range [1-6].");
+ ". Version should be in the range [1-7].");
Record.clear();
// Keep around the last seen summary to be used when we see an optional
@@ -5267,7 +5760,10 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
PendingTypeCheckedLoadConstVCalls;
while (true) {
- BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+ Expected<BitstreamEntry> MaybeEntry = Stream.advanceSkippingSubblocks();
+ if (!MaybeEntry)
+ return MaybeEntry.takeError();
+ BitstreamEntry Entry = MaybeEntry.get();
switch (Entry.Kind) {
case BitstreamEntry::SubBlock: // Handled for us already.
@@ -5288,8 +5784,10 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
// in the combined index VST entries). The records also contain
// information used for ThinLTO renaming and importing.
Record.clear();
- auto BitCode = Stream.readRecord(Entry.ID, Record);
- switch (BitCode) {
+ Expected<unsigned> MaybeBitCode = Stream.readRecord(Entry.ID, Record);
+ if (!MaybeBitCode)
+ return MaybeBitCode.takeError();
+ switch (unsigned BitCode = MaybeBitCode.get()) {
default: // Default behavior: ignore.
break;
case bitc::FS_FLAGS: { // [flags]
@@ -5343,15 +5841,19 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
unsigned InstCount = Record[2];
uint64_t RawFunFlags = 0;
unsigned NumRefs = Record[3];
- unsigned NumImmutableRefs = 0;
+ unsigned NumRORefs = 0, NumWORefs = 0;
int RefListStartIndex = 4;
if (Version >= 4) {
RawFunFlags = Record[3];
NumRefs = Record[4];
RefListStartIndex = 5;
if (Version >= 5) {
- NumImmutableRefs = Record[5];
+ NumRORefs = Record[5];
RefListStartIndex = 6;
+ if (Version >= 7) {
+ NumWORefs = Record[6];
+ RefListStartIndex = 7;
+ }
}
}
@@ -5371,7 +5873,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
std::vector<FunctionSummary::EdgeTy> Calls = makeCallList(
ArrayRef<uint64_t>(Record).slice(CallGraphEdgeStartIndex),
IsOldProfileFormat, HasProfile, HasRelBF);
- setImmutableRefs(Refs, NumImmutableRefs);
+ setSpecialRefs(Refs, NumRORefs, NumWORefs);
auto FS = llvm::make_unique<FunctionSummary>(
Flags, InstCount, getDecodedFFlags(RawFunFlags), /*EntryCount=*/0,
std::move(Refs), std::move(Calls), std::move(PendingTypeTests),
@@ -5406,14 +5908,11 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
// ownership.
AS->setModulePath(getThisModule()->first());
- GlobalValue::GUID AliaseeGUID =
- getValueInfoFromValueId(AliaseeID).first.getGUID();
- auto AliaseeInModule =
- TheIndex.findSummaryInModule(AliaseeGUID, ModulePath);
+ auto AliaseeVI = getValueInfoFromValueId(AliaseeID).first;
+ auto AliaseeInModule = TheIndex.findSummaryInModule(AliaseeVI, ModulePath);
if (!AliaseeInModule)
return error("Alias expects aliasee summary to be parsed");
- AS->setAliasee(AliaseeInModule);
- AS->setAliaseeGUID(AliaseeGUID);
+ AS->setAliasee(AliaseeVI, AliaseeInModule);
auto GUID = getValueInfoFromValueId(ValueID);
AS->setOriginalName(GUID.second);
@@ -5425,7 +5924,8 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
unsigned ValueID = Record[0];
uint64_t RawFlags = Record[1];
unsigned RefArrayStart = 2;
- GlobalVarSummary::GVarFlags GVF;
+ GlobalVarSummary::GVarFlags GVF(/* ReadOnly */ false,
+ /* WriteOnly */ false);
auto Flags = getDecodedGVSummaryFlags(RawFlags, Version);
if (Version >= 5) {
GVF = getDecodedGVarFlags(Record[2]);
@@ -5441,6 +5941,34 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
TheIndex.addGlobalValueSummary(GUID.first, std::move(FS));
break;
}
+ // FS_PERMODULE_VTABLE_GLOBALVAR_INIT_REFS: [valueid, flags, varflags,
+ // numrefs, numrefs x valueid,
+ // n x (valueid, offset)]
+ case bitc::FS_PERMODULE_VTABLE_GLOBALVAR_INIT_REFS: {
+ unsigned ValueID = Record[0];
+ uint64_t RawFlags = Record[1];
+ GlobalVarSummary::GVarFlags GVF = getDecodedGVarFlags(Record[2]);
+ unsigned NumRefs = Record[3];
+ unsigned RefListStartIndex = 4;
+ unsigned VTableListStartIndex = RefListStartIndex + NumRefs;
+ auto Flags = getDecodedGVSummaryFlags(RawFlags, Version);
+ std::vector<ValueInfo> Refs = makeRefList(
+ ArrayRef<uint64_t>(Record).slice(RefListStartIndex, NumRefs));
+ VTableFuncList VTableFuncs;
+ for (unsigned I = VTableListStartIndex, E = Record.size(); I != E; ++I) {
+ ValueInfo Callee = getValueInfoFromValueId(Record[I]).first;
+ uint64_t Offset = Record[++I];
+ VTableFuncs.push_back({Callee, Offset});
+ }
+ auto VS =
+ llvm::make_unique<GlobalVarSummary>(Flags, GVF, std::move(Refs));
+ VS->setModulePath(getThisModule()->first());
+ VS->setVTableFuncs(VTableFuncs);
+ auto GUID = getValueInfoFromValueId(ValueID);
+ VS->setOriginalName(GUID.second);
+ TheIndex.addGlobalValueSummary(GUID.first, std::move(VS));
+ break;
+ }
// FS_COMBINED: [valueid, modid, flags, instcount, fflags, numrefs,
// numrefs x valueid, n x (valueid)]
// FS_COMBINED_PROFILE: [valueid, modid, flags, instcount, fflags, numrefs,
@@ -5454,7 +5982,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
uint64_t RawFunFlags = 0;
uint64_t EntryCount = 0;
unsigned NumRefs = Record[4];
- unsigned NumImmutableRefs = 0;
+ unsigned NumRORefs = 0, NumWORefs = 0;
int RefListStartIndex = 5;
if (Version >= 4) {
@@ -5462,13 +5990,19 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
RefListStartIndex = 6;
size_t NumRefsIndex = 5;
if (Version >= 5) {
+ unsigned NumRORefsOffset = 1;
RefListStartIndex = 7;
if (Version >= 6) {
NumRefsIndex = 6;
EntryCount = Record[5];
RefListStartIndex = 8;
+ if (Version >= 7) {
+ RefListStartIndex = 9;
+ NumWORefs = Record[8];
+ NumRORefsOffset = 2;
+ }
}
- NumImmutableRefs = Record[RefListStartIndex - 1];
+ NumRORefs = Record[RefListStartIndex - NumRORefsOffset];
}
NumRefs = Record[NumRefsIndex];
}
@@ -5484,7 +6018,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
ArrayRef<uint64_t>(Record).slice(CallGraphEdgeStartIndex),
IsOldProfileFormat, HasProfile, false);
ValueInfo VI = getValueInfoFromValueId(ValueID).first;
- setImmutableRefs(Refs, NumImmutableRefs);
+ setSpecialRefs(Refs, NumRORefs, NumWORefs);
auto FS = llvm::make_unique<FunctionSummary>(
Flags, InstCount, getDecodedFFlags(RawFunFlags), EntryCount,
std::move(Refs), std::move(Edges), std::move(PendingTypeTests),
@@ -5516,12 +6050,9 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
LastSeenSummary = AS.get();
AS->setModulePath(ModuleIdMap[ModuleId]);
- auto AliaseeGUID =
- getValueInfoFromValueId(AliaseeValueId).first.getGUID();
- auto AliaseeInModule =
- TheIndex.findSummaryInModule(AliaseeGUID, AS->modulePath());
- AS->setAliasee(AliaseeInModule);
- AS->setAliaseeGUID(AliaseeGUID);
+ auto AliaseeVI = getValueInfoFromValueId(AliaseeValueId).first;
+ auto AliaseeInModule = TheIndex.findSummaryInModule(AliaseeVI, AS->modulePath());
+ AS->setAliasee(AliaseeVI, AliaseeInModule);
ValueInfo VI = getValueInfoFromValueId(ValueID).first;
LastSeenGUID = VI.getGUID();
@@ -5534,7 +6065,8 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
uint64_t ModuleId = Record[1];
uint64_t RawFlags = Record[2];
unsigned RefArrayStart = 3;
- GlobalVarSummary::GVarFlags GVF;
+ GlobalVarSummary::GVarFlags GVF(/* ReadOnly */ false,
+ /* WriteOnly */ false);
auto Flags = getDecodedGVSummaryFlags(RawFlags, Version);
if (Version >= 5) {
GVF = getDecodedGVarFlags(Record[3]);
@@ -5610,6 +6142,10 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
case bitc::FS_TYPE_ID:
parseTypeIdSummaryRecord(Record, Strtab, TheIndex);
break;
+
+ case bitc::FS_TYPE_ID_METADATA:
+ parseTypeIdCompatibleVtableSummaryRecord(Record);
+ break;
}
}
llvm_unreachable("Exit infinite loop");
@@ -5618,8 +6154,8 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
// Parse the module string table block into the Index.
// This populates the ModulePathStringTable map in the index.
Error ModuleSummaryIndexBitcodeReader::parseModuleStringTable() {
- if (Stream.EnterSubBlock(bitc::MODULE_STRTAB_BLOCK_ID))
- return error("Invalid record");
+ if (Error Err = Stream.EnterSubBlock(bitc::MODULE_STRTAB_BLOCK_ID))
+ return Err;
SmallVector<uint64_t, 64> Record;
@@ -5627,7 +6163,10 @@ Error ModuleSummaryIndexBitcodeReader::parseModuleStringTable() {
ModuleSummaryIndex::ModuleInfo *LastSeenModule = nullptr;
while (true) {
- BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+ Expected<BitstreamEntry> MaybeEntry = Stream.advanceSkippingSubblocks();
+ if (!MaybeEntry)
+ return MaybeEntry.takeError();
+ BitstreamEntry Entry = MaybeEntry.get();
switch (Entry.Kind) {
case BitstreamEntry::SubBlock: // Handled for us already.
@@ -5641,7 +6180,10 @@ Error ModuleSummaryIndexBitcodeReader::parseModuleStringTable() {
}
Record.clear();
- switch (Stream.readRecord(Entry.ID, Record)) {
+ Expected<unsigned> MaybeRecord = Stream.readRecord(Entry.ID, Record);
+ if (!MaybeRecord)
+ return MaybeRecord.takeError();
+ switch (MaybeRecord.get()) {
default: // Default behavior: ignore.
break;
case bitc::MST_CODE_ENTRY: {
@@ -5707,12 +6249,16 @@ const std::error_category &llvm::BitcodeErrorCategory() {
static Expected<StringRef> readBlobInRecord(BitstreamCursor &Stream,
unsigned Block, unsigned RecordID) {
- if (Stream.EnterSubBlock(Block))
- return error("Invalid record");
+ if (Error Err = Stream.EnterSubBlock(Block))
+ return std::move(Err);
StringRef Strtab;
while (true) {
- BitstreamEntry Entry = Stream.advance();
+ Expected<llvm::BitstreamEntry> MaybeEntry = Stream.advance();
+ if (!MaybeEntry)
+ return MaybeEntry.takeError();
+ llvm::BitstreamEntry Entry = MaybeEntry.get();
+
switch (Entry.Kind) {
case BitstreamEntry::EndBlock:
return Strtab;
@@ -5721,14 +6267,18 @@ static Expected<StringRef> readBlobInRecord(BitstreamCursor &Stream,
return error("Malformed block");
case BitstreamEntry::SubBlock:
- if (Stream.SkipBlock())
- return error("Malformed block");
+ if (Error Err = Stream.SkipBlock())
+ return std::move(Err);
break;
case BitstreamEntry::Record:
StringRef Blob;
SmallVector<uint64_t, 1> Record;
- if (Stream.readRecord(Entry.ID, Record, &Blob) == RecordID)
+ Expected<unsigned> MaybeRecord =
+ Stream.readRecord(Entry.ID, Record, &Blob);
+ if (!MaybeRecord)
+ return MaybeRecord.takeError();
+ if (MaybeRecord.get() == RecordID)
Strtab = Blob;
break;
}
@@ -5764,7 +6314,11 @@ llvm::getBitcodeFileContents(MemoryBufferRef Buffer) {
if (BCBegin + 8 >= Stream.getBitcodeBytes().size())
return F;
- BitstreamEntry Entry = Stream.advance();
+ Expected<llvm::BitstreamEntry> MaybeEntry = Stream.advance();
+ if (!MaybeEntry)
+ return MaybeEntry.takeError();
+ llvm::BitstreamEntry Entry = MaybeEntry.get();
+
switch (Entry.Kind) {
case BitstreamEntry::EndBlock:
case BitstreamEntry::Error:
@@ -5774,10 +6328,16 @@ llvm::getBitcodeFileContents(MemoryBufferRef Buffer) {
uint64_t IdentificationBit = -1ull;
if (Entry.ID == bitc::IDENTIFICATION_BLOCK_ID) {
IdentificationBit = Stream.GetCurrentBitNo() - BCBegin * 8;
- if (Stream.SkipBlock())
- return error("Malformed block");
+ if (Error Err = Stream.SkipBlock())
+ return std::move(Err);
+
+ {
+ Expected<llvm::BitstreamEntry> MaybeEntry = Stream.advance();
+ if (!MaybeEntry)
+ return MaybeEntry.takeError();
+ Entry = MaybeEntry.get();
+ }
- Entry = Stream.advance();
if (Entry.Kind != BitstreamEntry::SubBlock ||
Entry.ID != bitc::MODULE_BLOCK_ID)
return error("Malformed block");
@@ -5785,8 +6345,8 @@ llvm::getBitcodeFileContents(MemoryBufferRef Buffer) {
if (Entry.ID == bitc::MODULE_BLOCK_ID) {
uint64_t ModuleBit = Stream.GetCurrentBitNo() - BCBegin * 8;
- if (Stream.SkipBlock())
- return error("Malformed block");
+ if (Error Err = Stream.SkipBlock())
+ return std::move(Err);
F.Mods.push_back({Stream.getBitcodeBytes().slice(
BCBegin, Stream.getCurrentByteNo() - BCBegin),
@@ -5834,13 +6394,15 @@ llvm::getBitcodeFileContents(MemoryBufferRef Buffer) {
continue;
}
- if (Stream.SkipBlock())
- return error("Malformed block");
+ if (Error Err = Stream.SkipBlock())
+ return std::move(Err);
continue;
}
case BitstreamEntry::Record:
- Stream.skipRecord(Entry.ID);
- continue;
+ if (Expected<unsigned> StreamFailed = Stream.skipRecord(Entry.ID))
+ continue;
+ else
+ return StreamFailed.takeError();
}
}
}
@@ -5860,7 +6422,8 @@ BitcodeModule::getModuleImpl(LLVMContext &Context, bool MaterializeAll,
std::string ProducerIdentification;
if (IdentificationBit != -1ull) {
- Stream.JumpToBit(IdentificationBit);
+ if (Error JumpFailed = Stream.JumpToBit(IdentificationBit))
+ return std::move(JumpFailed);
Expected<std::string> ProducerIdentificationOrErr =
readIdentificationBlock(Stream);
if (!ProducerIdentificationOrErr)
@@ -5869,7 +6432,8 @@ BitcodeModule::getModuleImpl(LLVMContext &Context, bool MaterializeAll,
ProducerIdentification = *ProducerIdentificationOrErr;
}
- Stream.JumpToBit(ModuleBit);
+ if (Error JumpFailed = Stream.JumpToBit(ModuleBit))
+ return std::move(JumpFailed);
auto *R = new BitcodeReader(std::move(Stream), Strtab, ProducerIdentification,
Context);
@@ -5907,7 +6471,8 @@ BitcodeModule::getLazyModule(LLVMContext &Context, bool ShouldLazyLoadMetadata,
Error BitcodeModule::readSummary(ModuleSummaryIndex &CombinedIndex,
StringRef ModulePath, uint64_t ModuleId) {
BitstreamCursor Stream(Buffer);
- Stream.JumpToBit(ModuleBit);
+ if (Error JumpFailed = Stream.JumpToBit(ModuleBit))
+ return JumpFailed;
ModuleSummaryIndexBitcodeReader R(std::move(Stream), Strtab, CombinedIndex,
ModulePath, ModuleId);
@@ -5917,7 +6482,8 @@ Error BitcodeModule::readSummary(ModuleSummaryIndex &CombinedIndex,
// Parse the specified bitcode buffer, returning the function info index.
Expected<std::unique_ptr<ModuleSummaryIndex>> BitcodeModule::getSummary() {
BitstreamCursor Stream(Buffer);
- Stream.JumpToBit(ModuleBit);
+ if (Error JumpFailed = Stream.JumpToBit(ModuleBit))
+ return std::move(JumpFailed);
auto Index = llvm::make_unique<ModuleSummaryIndex>(/*HaveGVs=*/false);
ModuleSummaryIndexBitcodeReader R(std::move(Stream), Strtab, *Index,
@@ -5931,12 +6497,15 @@ Expected<std::unique_ptr<ModuleSummaryIndex>> BitcodeModule::getSummary() {
static Expected<bool> getEnableSplitLTOUnitFlag(BitstreamCursor &Stream,
unsigned ID) {
- if (Stream.EnterSubBlock(ID))
- return error("Invalid record");
+ if (Error Err = Stream.EnterSubBlock(ID))
+ return std::move(Err);
SmallVector<uint64_t, 64> Record;
while (true) {
- BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+ Expected<BitstreamEntry> MaybeEntry = Stream.advanceSkippingSubblocks();
+ if (!MaybeEntry)
+ return MaybeEntry.takeError();
+ BitstreamEntry Entry = MaybeEntry.get();
switch (Entry.Kind) {
case BitstreamEntry::SubBlock: // Handled for us already.
@@ -5953,8 +6522,10 @@ static Expected<bool> getEnableSplitLTOUnitFlag(BitstreamCursor &Stream,
// Look for the FS_FLAGS record.
Record.clear();
- auto BitCode = Stream.readRecord(Entry.ID, Record);
- switch (BitCode) {
+ Expected<unsigned> MaybeBitCode = Stream.readRecord(Entry.ID, Record);
+ if (!MaybeBitCode)
+ return MaybeBitCode.takeError();
+ switch (MaybeBitCode.get()) {
default: // Default behavior: ignore.
break;
case bitc::FS_FLAGS: { // [flags]
@@ -5972,13 +6543,17 @@ static Expected<bool> getEnableSplitLTOUnitFlag(BitstreamCursor &Stream,
// Check if the given bitcode buffer contains a global value summary block.
Expected<BitcodeLTOInfo> BitcodeModule::getLTOInfo() {
BitstreamCursor Stream(Buffer);
- Stream.JumpToBit(ModuleBit);
+ if (Error JumpFailed = Stream.JumpToBit(ModuleBit))
+ return std::move(JumpFailed);
- if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
- return error("Invalid record");
+ if (Error Err = Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
+ return std::move(Err);
while (true) {
- BitstreamEntry Entry = Stream.advance();
+ Expected<llvm::BitstreamEntry> MaybeEntry = Stream.advance();
+ if (!MaybeEntry)
+ return MaybeEntry.takeError();
+ llvm::BitstreamEntry Entry = MaybeEntry.get();
switch (Entry.Kind) {
case BitstreamEntry::Error:
@@ -6007,13 +6582,15 @@ Expected<BitcodeLTOInfo> BitcodeModule::getLTOInfo() {
}
// Ignore other sub-blocks.
- if (Stream.SkipBlock())
- return error("Malformed block");
+ if (Error Err = Stream.SkipBlock())
+ return std::move(Err);
continue;
case BitstreamEntry::Record:
- Stream.skipRecord(Entry.ID);
- continue;
+ if (Expected<unsigned> StreamFailed = Stream.skipRecord(Entry.ID))
+ continue;
+ else
+ return StreamFailed.takeError();
}
}
}
diff --git a/lib/Bitcode/Reader/MetadataLoader.cpp b/lib/Bitcode/Reader/MetadataLoader.cpp
index 3289aa0acddd..108f71189585 100644
--- a/lib/Bitcode/Reader/MetadataLoader.cpp
+++ b/lib/Bitcode/Reader/MetadataLoader.cpp
@@ -1,9 +1,8 @@
//===- MetadataLoader.cpp - Internal BitcodeReader implementation ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -23,7 +22,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Bitcode/BitcodeReader.h"
-#include "llvm/Bitcode/BitstreamReader.h"
+#include "llvm/Bitstream/BitstreamReader.h"
#include "llvm/Bitcode/LLVMBitCodes.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
@@ -104,7 +103,7 @@ static cl::opt<bool> DisableLazyLoading(
namespace {
-static int64_t unrotateSign(uint64_t U) { return U & 1 ? ~(U >> 1) : U >> 1; }
+static int64_t unrotateSign(uint64_t U) { return (U & 1) ? ~(U >> 1) : U >> 1; }
class BitcodeReaderMetadataList {
/// Array of metadata references.
@@ -131,8 +130,15 @@ class BitcodeReaderMetadataList {
LLVMContext &Context;
+ /// Maximum number of valid references. Forward references exceeding the
+ /// maximum must be invalid.
+ unsigned RefsUpperBound;
+
public:
- BitcodeReaderMetadataList(LLVMContext &C) : Context(C) {}
+ BitcodeReaderMetadataList(LLVMContext &C, size_t RefsUpperBound)
+ : Context(C),
+ RefsUpperBound(std::min((size_t)std::numeric_limits<unsigned>::max(),
+ RefsUpperBound)) {}
// vector compatibility methods
unsigned size() const { return MetadataPtrs.size(); }
@@ -219,6 +225,10 @@ void BitcodeReaderMetadataList::assignValue(Metadata *MD, unsigned Idx) {
}
Metadata *BitcodeReaderMetadataList::getMetadataFwdRef(unsigned Idx) {
+ // Bail out for a clearly invalid value.
+ if (Idx >= RefsUpperBound)
+ return nullptr;
+
if (Idx >= size())
resize(Idx + 1);
@@ -338,7 +348,7 @@ Metadata *BitcodeReaderMetadataList::resolveTypeRefArray(Metadata *MaybeTuple) {
if (!Tuple || Tuple->isDistinct())
return MaybeTuple;
- // Look through the DITypeRefArray, upgrading each DITypeRef.
+ // Look through the DITypeRefArray, upgrading each DIType *.
SmallVector<Metadata *, 32> Ops;
Ops.reserve(Tuple->getNumOperands());
for (Metadata *MD : Tuple->operands())
@@ -626,9 +636,10 @@ public:
BitcodeReaderValueList &ValueList,
std::function<Type *(unsigned)> getTypeByID,
bool IsImporting)
- : MetadataList(TheModule.getContext()), ValueList(ValueList),
- Stream(Stream), Context(TheModule.getContext()), TheModule(TheModule),
- getTypeByID(std::move(getTypeByID)), IsImporting(IsImporting) {}
+ : MetadataList(TheModule.getContext(), Stream.SizeInBytes()),
+ ValueList(ValueList), Stream(Stream), Context(TheModule.getContext()),
+ TheModule(TheModule), getTypeByID(std::move(getTypeByID)),
+ IsImporting(IsImporting) {}
Error parseMetadata(bool ModuleLevel);
@@ -675,8 +686,12 @@ MetadataLoader::MetadataLoaderImpl::lazyLoadModuleMetadataBlock() {
SmallVector<uint64_t, 64> Record;
// Get the abbrevs, and preload record positions to make them lazy-loadable.
while (true) {
- BitstreamEntry Entry = IndexCursor.advanceSkippingSubblocks(
+ Expected<BitstreamEntry> MaybeEntry = IndexCursor.advanceSkippingSubblocks(
BitstreamCursor::AF_DontPopBlockAtEnd);
+ if (!MaybeEntry)
+ return MaybeEntry.takeError();
+ BitstreamEntry Entry = MaybeEntry.get();
+
switch (Entry.Kind) {
case BitstreamEntry::SubBlock: // Handled for us already.
case BitstreamEntry::Error:
@@ -688,14 +703,22 @@ MetadataLoader::MetadataLoaderImpl::lazyLoadModuleMetadataBlock() {
// The interesting case.
++NumMDRecordLoaded;
uint64_t CurrentPos = IndexCursor.GetCurrentBitNo();
- auto Code = IndexCursor.skipRecord(Entry.ID);
+ Expected<unsigned> MaybeCode = IndexCursor.skipRecord(Entry.ID);
+ if (!MaybeCode)
+ return MaybeCode.takeError();
+ unsigned Code = MaybeCode.get();
switch (Code) {
case bitc::METADATA_STRINGS: {
// Rewind and parse the strings.
- IndexCursor.JumpToBit(CurrentPos);
+ if (Error Err = IndexCursor.JumpToBit(CurrentPos))
+ return std::move(Err);
StringRef Blob;
Record.clear();
- IndexCursor.readRecord(Entry.ID, Record, &Blob);
+ if (Expected<unsigned> MaybeRecord =
+ IndexCursor.readRecord(Entry.ID, Record, &Blob))
+ ;
+ else
+ return MaybeRecord.takeError();
unsigned NumStrings = Record[0];
MDStringRef.reserve(NumStrings);
auto IndexNextMDString = [&](StringRef Str) {
@@ -708,26 +731,37 @@ MetadataLoader::MetadataLoaderImpl::lazyLoadModuleMetadataBlock() {
case bitc::METADATA_INDEX_OFFSET: {
// This is the offset to the index, when we see this we skip all the
// records and load only an index to these.
- IndexCursor.JumpToBit(CurrentPos);
+ if (Error Err = IndexCursor.JumpToBit(CurrentPos))
+ return std::move(Err);
Record.clear();
- IndexCursor.readRecord(Entry.ID, Record);
+ if (Expected<unsigned> MaybeRecord =
+ IndexCursor.readRecord(Entry.ID, Record))
+ ;
+ else
+ return MaybeRecord.takeError();
if (Record.size() != 2)
return error("Invalid record");
auto Offset = Record[0] + (Record[1] << 32);
auto BeginPos = IndexCursor.GetCurrentBitNo();
- IndexCursor.JumpToBit(BeginPos + Offset);
- Entry = IndexCursor.advanceSkippingSubblocks(
- BitstreamCursor::AF_DontPopBlockAtEnd);
+ if (Error Err = IndexCursor.JumpToBit(BeginPos + Offset))
+ return std::move(Err);
+ Expected<BitstreamEntry> MaybeEntry =
+ IndexCursor.advanceSkippingSubblocks(
+ BitstreamCursor::AF_DontPopBlockAtEnd);
+ if (!MaybeEntry)
+ return MaybeEntry.takeError();
+ Entry = MaybeEntry.get();
assert(Entry.Kind == BitstreamEntry::Record &&
"Corrupted bitcode: Expected `Record` when trying to find the "
"Metadata index");
Record.clear();
- auto Code = IndexCursor.readRecord(Entry.ID, Record);
- (void)Code;
- assert(Code == bitc::METADATA_INDEX && "Corrupted bitcode: Expected "
- "`METADATA_INDEX` when trying "
- "to find the Metadata index");
-
+ if (Expected<unsigned> MaybeCode =
+ IndexCursor.readRecord(Entry.ID, Record))
+ assert(MaybeCode.get() == bitc::METADATA_INDEX &&
+ "Corrupted bitcode: Expected `METADATA_INDEX` when trying to "
+ "find the Metadata index");
+ else
+ return MaybeCode.takeError();
// Delta unpack
auto CurrentValue = BeginPos;
GlobalMetadataBitPosIndex.reserve(Record.size());
@@ -743,21 +777,33 @@ MetadataLoader::MetadataLoaderImpl::lazyLoadModuleMetadataBlock() {
return error("Corrupted Metadata block");
case bitc::METADATA_NAME: {
// Named metadata need to be materialized now and aren't deferred.
- IndexCursor.JumpToBit(CurrentPos);
+ if (Error Err = IndexCursor.JumpToBit(CurrentPos))
+ return std::move(Err);
Record.clear();
- unsigned Code = IndexCursor.readRecord(Entry.ID, Record);
- assert(Code == bitc::METADATA_NAME);
+
+ unsigned Code;
+ if (Expected<unsigned> MaybeCode =
+ IndexCursor.readRecord(Entry.ID, Record)) {
+ Code = MaybeCode.get();
+ assert(Code == bitc::METADATA_NAME);
+ } else
+ return MaybeCode.takeError();
// Read name of the named metadata.
SmallString<8> Name(Record.begin(), Record.end());
- Code = IndexCursor.ReadCode();
+ if (Expected<unsigned> MaybeCode = IndexCursor.ReadCode())
+ Code = MaybeCode.get();
+ else
+ return MaybeCode.takeError();
// Named Metadata comes in two parts, we expect the name to be followed
// by the node
Record.clear();
- unsigned NextBitCode = IndexCursor.readRecord(Code, Record);
- assert(NextBitCode == bitc::METADATA_NAMED_NODE);
- (void)NextBitCode;
+ if (Expected<unsigned> MaybeNextBitCode =
+ IndexCursor.readRecord(Code, Record))
+ assert(MaybeNextBitCode.get() == bitc::METADATA_NAMED_NODE);
+ else
+ return MaybeNextBitCode.takeError();
// Read named metadata elements.
unsigned Size = Record.size();
@@ -776,9 +822,14 @@ MetadataLoader::MetadataLoaderImpl::lazyLoadModuleMetadataBlock() {
case bitc::METADATA_GLOBAL_DECL_ATTACHMENT: {
// FIXME: we need to do this early because we don't materialize global
// value explicitly.
- IndexCursor.JumpToBit(CurrentPos);
+ if (Error Err = IndexCursor.JumpToBit(CurrentPos))
+ return std::move(Err);
Record.clear();
- IndexCursor.readRecord(Entry.ID, Record);
+ if (Expected<unsigned> MaybeRecord =
+ IndexCursor.readRecord(Entry.ID, Record))
+ ;
+ else
+ return MaybeRecord.takeError();
if (Record.size() % 2 == 0)
return error("Invalid record");
unsigned ValueID = Record[0];
@@ -812,6 +863,7 @@ MetadataLoader::MetadataLoaderImpl::lazyLoadModuleMetadataBlock() {
case bitc::METADATA_LEXICAL_BLOCK:
case bitc::METADATA_LEXICAL_BLOCK_FILE:
case bitc::METADATA_NAMESPACE:
+ case bitc::METADATA_COMMON_BLOCK:
case bitc::METADATA_MACRO:
case bitc::METADATA_MACRO_FILE:
case bitc::METADATA_TEMPLATE_TYPE:
@@ -845,8 +897,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseMetadata(bool ModuleLevel) {
// skip the whole block in case we lazy-load.
auto EntryPos = Stream.GetCurrentBitNo();
- if (Stream.EnterSubBlock(bitc::METADATA_BLOCK_ID))
- return error("Invalid record");
+ if (Error Err = Stream.EnterSubBlock(bitc::METADATA_BLOCK_ID))
+ return Err;
SmallVector<uint64_t, 64> Record;
PlaceholderQueue Placeholders;
@@ -871,9 +923,14 @@ Error MetadataLoader::MetadataLoaderImpl::parseMetadata(bool ModuleLevel) {
// Return at the beginning of the block, since it is easy to skip it
// entirely from there.
Stream.ReadBlockEnd(); // Pop the abbrev block context.
- Stream.JumpToBit(EntryPos);
- if (Stream.SkipBlock())
- return error("Invalid record");
+ if (Error Err = IndexCursor.JumpToBit(EntryPos))
+ return Err;
+ if (Error Err = Stream.SkipBlock()) {
+ // FIXME this drops the error on the floor, which
+ // ThinLTO/X86/debuginfo-cu-import.ll relies on.
+ consumeError(std::move(Err));
+ return Error::success();
+ }
return Error::success();
}
// Couldn't load an index, fallback to loading all the block "old-style".
@@ -883,7 +940,10 @@ Error MetadataLoader::MetadataLoaderImpl::parseMetadata(bool ModuleLevel) {
// Read all the records.
while (true) {
- BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+ Expected<BitstreamEntry> MaybeEntry = Stream.advanceSkippingSubblocks();
+ if (!MaybeEntry)
+ return MaybeEntry.takeError();
+ BitstreamEntry Entry = MaybeEntry.get();
switch (Entry.Kind) {
case BitstreamEntry::SubBlock: // Handled for us already.
@@ -902,10 +962,13 @@ Error MetadataLoader::MetadataLoaderImpl::parseMetadata(bool ModuleLevel) {
Record.clear();
StringRef Blob;
++NumMDRecordLoaded;
- unsigned Code = Stream.readRecord(Entry.ID, Record, &Blob);
- if (Error Err =
- parseOneMetadata(Record, Code, Placeholders, Blob, NextMetadataNo))
- return Err;
+ if (Expected<unsigned> MaybeCode =
+ Stream.readRecord(Entry.ID, Record, &Blob)) {
+ if (Error Err = parseOneMetadata(Record, MaybeCode.get(), Placeholders,
+ Blob, NextMetadataNo))
+ return Err;
+ } else
+ return MaybeCode.takeError();
}
}
@@ -930,12 +993,25 @@ void MetadataLoader::MetadataLoaderImpl::lazyLoadOneMetadata(
}
SmallVector<uint64_t, 64> Record;
StringRef Blob;
- IndexCursor.JumpToBit(GlobalMetadataBitPosIndex[ID - MDStringRef.size()]);
- auto Entry = IndexCursor.advanceSkippingSubblocks();
+ if (Error Err = IndexCursor.JumpToBit(
+ GlobalMetadataBitPosIndex[ID - MDStringRef.size()]))
+ report_fatal_error("lazyLoadOneMetadata failed jumping: " +
+ toString(std::move(Err)));
+ Expected<BitstreamEntry> MaybeEntry = IndexCursor.advanceSkippingSubblocks();
+ if (!MaybeEntry)
+ // FIXME this drops the error on the floor.
+ report_fatal_error("lazyLoadOneMetadata failed advanceSkippingSubblocks: " +
+ toString(MaybeEntry.takeError()));
+ BitstreamEntry Entry = MaybeEntry.get();
++NumMDRecordLoaded;
- unsigned Code = IndexCursor.readRecord(Entry.ID, Record, &Blob);
- if (Error Err = parseOneMetadata(Record, Code, Placeholders, Blob, ID))
- report_fatal_error("Can't lazyload MD");
+ if (Expected<unsigned> MaybeCode =
+ IndexCursor.readRecord(Entry.ID, Record, &Blob)) {
+ if (Error Err =
+ parseOneMetadata(Record, MaybeCode.get(), Placeholders, Blob, ID))
+ report_fatal_error("Can't lazyload MD, parseOneMetadata: " +
+ toString(std::move(Err)));
+ } else
+ report_fatal_error("Can't lazyload MD: " + toString(MaybeCode.takeError()));
}
/// Ensure that all forward-references and placeholders are resolved.
@@ -1032,12 +1108,17 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
// Read name of the named metadata.
SmallString<8> Name(Record.begin(), Record.end());
Record.clear();
- Code = Stream.ReadCode();
+ Expected<unsigned> MaybeCode = Stream.ReadCode();
+ if (!MaybeCode)
+ return MaybeCode.takeError();
+ Code = MaybeCode.get();
++NumMDRecordLoaded;
- unsigned NextBitCode = Stream.readRecord(Code, Record);
- if (NextBitCode != bitc::METADATA_NAMED_NODE)
- return error("METADATA_NAME not followed by METADATA_NAMED_NODE");
+ if (Expected<unsigned> MaybeNextBitCode = Stream.readRecord(Code, Record)) {
+ if (MaybeNextBitCode.get() != bitc::METADATA_NAMED_NODE)
+ return error("METADATA_NAME not followed by METADATA_NAMED_NODE");
+ } else
+ return MaybeNextBitCode.takeError();
// Read named metadata elements.
unsigned Size = Record.size();
@@ -1407,12 +1488,33 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
return error("Invalid record");
bool HasSPFlags = Record[0] & 4;
- DISubprogram::DISPFlags SPFlags =
- HasSPFlags
- ? static_cast<DISubprogram::DISPFlags>(Record[9])
- : DISubprogram::toSPFlags(
- /*IsLocalToUnit=*/Record[7], /*IsDefinition=*/Record[8],
- /*IsOptimized=*/Record[14], /*Virtuality=*/Record[11]);
+
+ DINode::DIFlags Flags;
+ DISubprogram::DISPFlags SPFlags;
+ if (!HasSPFlags)
+ Flags = static_cast<DINode::DIFlags>(Record[11 + 2]);
+ else {
+ Flags = static_cast<DINode::DIFlags>(Record[11]);
+ SPFlags = static_cast<DISubprogram::DISPFlags>(Record[9]);
+ }
+
+ // Support for old metadata when
+ // subprogram specific flags are placed in DIFlags.
+ const unsigned DIFlagMainSubprogram = 1 << 21;
+ bool HasOldMainSubprogramFlag = Flags & DIFlagMainSubprogram;
+ if (HasOldMainSubprogramFlag)
+ // Remove old DIFlagMainSubprogram from DIFlags.
+ // Note: This assumes that any future use of bit 21 defaults to it
+ // being 0.
+ Flags &= ~static_cast<DINode::DIFlags>(DIFlagMainSubprogram);
+
+ if (HasOldMainSubprogramFlag && HasSPFlags)
+ SPFlags |= DISubprogram::SPFlagMainSubprogram;
+ else if (!HasSPFlags)
+ SPFlags = DISubprogram::toSPFlags(
+ /*IsLocalToUnit=*/Record[7], /*IsDefinition=*/Record[8],
+ /*IsOptimized=*/Record[14], /*Virtuality=*/Record[11],
+ /*DIFlagMainSubprogram*/HasOldMainSubprogramFlag);
// All definitions should be distinct.
IsDistinct = (Record[0] & 1) || (SPFlags & DISubprogram::SPFlagDefinition);
@@ -1456,7 +1558,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
getDITypeRefOrNull(Record[8 + OffsetA]), // containingType
Record[10 + OffsetA], // virtualIndex
HasThisAdj ? Record[16 + OffsetB] : 0, // thisAdjustment
- static_cast<DINode::DIFlags>(Record[11 + OffsetA]),// flags
+ Flags, // flags
SPFlags, // SPFlags
HasUnit ? CUorFn : nullptr, // unit
getMDOrNull(Record[13 + OffsetB]), // templateParams
@@ -1508,6 +1610,17 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
NextMetadataNo++;
break;
}
+ case bitc::METADATA_COMMON_BLOCK: {
+ IsDistinct = Record[0] & 1;
+ MetadataList.assignValue(
+ GET_OR_DISTINCT(DICommonBlock,
+ (Context, getMDOrNull(Record[1]),
+ getMDOrNull(Record[2]), getMDString(Record[3]),
+ getMDOrNull(Record[4]), Record[5])),
+ NextMetadataNo);
+ NextMetadataNo++;
+ break;
+ }
case bitc::METADATA_NAMESPACE: {
// Newer versions of DINamespace dropped file and line.
MDString *Name;
@@ -1831,7 +1944,10 @@ Error MetadataLoader::MetadataLoaderImpl::parseMetadataStrings(
if (R.AtEndOfStream())
return error("Invalid record: metadata strings bad length");
- unsigned Size = R.ReadVBR(6);
+ Expected<uint32_t> MaybeSize = R.ReadVBR(6);
+ if (!MaybeSize)
+ return MaybeSize.takeError();
+ uint32_t Size = MaybeSize.get();
if (Strings.size() < Size)
return error("Invalid record: metadata strings truncated chars");
@@ -1860,14 +1976,17 @@ Error MetadataLoader::MetadataLoaderImpl::parseGlobalObjectAttachment(
/// Parse metadata attachments.
Error MetadataLoader::MetadataLoaderImpl::parseMetadataAttachment(
Function &F, const SmallVectorImpl<Instruction *> &InstructionList) {
- if (Stream.EnterSubBlock(bitc::METADATA_ATTACHMENT_ID))
- return error("Invalid record");
+ if (Error Err = Stream.EnterSubBlock(bitc::METADATA_ATTACHMENT_ID))
+ return Err;
SmallVector<uint64_t, 64> Record;
PlaceholderQueue Placeholders;
while (true) {
- BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+ Expected<BitstreamEntry> MaybeEntry = Stream.advanceSkippingSubblocks();
+ if (!MaybeEntry)
+ return MaybeEntry.takeError();
+ BitstreamEntry Entry = MaybeEntry.get();
switch (Entry.Kind) {
case BitstreamEntry::SubBlock: // Handled for us already.
@@ -1884,7 +2003,10 @@ Error MetadataLoader::MetadataLoaderImpl::parseMetadataAttachment(
// Read a metadata attachment record.
Record.clear();
++NumMDRecordLoaded;
- switch (Stream.readRecord(Entry.ID, Record)) {
+ Expected<unsigned> MaybeRecord = Stream.readRecord(Entry.ID, Record);
+ if (!MaybeRecord)
+ return MaybeRecord.takeError();
+ switch (MaybeRecord.get()) {
default: // Default behavior: ignore.
break;
case bitc::METADATA_ATTACHMENT: {
@@ -1958,14 +2080,17 @@ Error MetadataLoader::MetadataLoaderImpl::parseMetadataKindRecord(
/// Parse the metadata kinds out of the METADATA_KIND_BLOCK.
Error MetadataLoader::MetadataLoaderImpl::parseMetadataKinds() {
- if (Stream.EnterSubBlock(bitc::METADATA_KIND_BLOCK_ID))
- return error("Invalid record");
+ if (Error Err = Stream.EnterSubBlock(bitc::METADATA_KIND_BLOCK_ID))
+ return Err;
SmallVector<uint64_t, 64> Record;
// Read all the records.
while (true) {
- BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+ Expected<BitstreamEntry> MaybeEntry = Stream.advanceSkippingSubblocks();
+ if (!MaybeEntry)
+ return MaybeEntry.takeError();
+ BitstreamEntry Entry = MaybeEntry.get();
switch (Entry.Kind) {
case BitstreamEntry::SubBlock: // Handled for us already.
@@ -1981,8 +2106,10 @@ Error MetadataLoader::MetadataLoaderImpl::parseMetadataKinds() {
// Read a record.
Record.clear();
++NumMDRecordLoaded;
- unsigned Code = Stream.readRecord(Entry.ID, Record);
- switch (Code) {
+ Expected<unsigned> MaybeCode = Stream.readRecord(Entry.ID, Record);
+ if (!MaybeCode)
+ return MaybeCode.takeError();
+ switch (MaybeCode.get()) {
default: // Default behavior: ignore.
break;
case bitc::METADATA_KIND: {
diff --git a/lib/Bitcode/Reader/MetadataLoader.h b/lib/Bitcode/Reader/MetadataLoader.h
index 07a77a086f32..fe2b20273249 100644
--- a/lib/Bitcode/Reader/MetadataLoader.h
+++ b/lib/Bitcode/Reader/MetadataLoader.h
@@ -1,9 +1,8 @@
//===-- Bitcode/Reader/MetadataLoader.h - Load Metadatas -------*- C++ -*-====//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Bitcode/Reader/ValueList.cpp b/lib/Bitcode/Reader/ValueList.cpp
index b3945a37408f..431995fd40ac 100644
--- a/lib/Bitcode/Reader/ValueList.cpp
+++ b/lib/Bitcode/Reader/ValueList.cpp
@@ -1,9 +1,8 @@
//===- ValueList.cpp - Internal BitcodeReader implementation --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -66,15 +65,18 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ConstantPlaceHolder, Value)
} // end namespace llvm
-void BitcodeReaderValueList::assignValue(Value *V, unsigned Idx) {
+void BitcodeReaderValueList::assignValue(Value *V, unsigned Idx, Type *FullTy) {
if (Idx == size()) {
- push_back(V);
+ push_back(V, FullTy);
return;
}
if (Idx >= size())
resize(Idx + 1);
+ assert(FullTypes[Idx] == nullptr || FullTypes[Idx] == FullTy);
+ FullTypes[Idx] = FullTy;
+
WeakTrackingVH &OldV = ValuePtrs[Idx];
if (!OldV) {
OldV = V;
@@ -95,6 +97,10 @@ void BitcodeReaderValueList::assignValue(Value *V, unsigned Idx) {
}
Constant *BitcodeReaderValueList::getConstantFwdRef(unsigned Idx, Type *Ty) {
+ // Bail out for a clearly invalid value.
+ if (Idx >= RefsUpperBound)
+ return nullptr;
+
if (Idx >= size())
resize(Idx + 1);
@@ -110,9 +116,10 @@ Constant *BitcodeReaderValueList::getConstantFwdRef(unsigned Idx, Type *Ty) {
return C;
}
-Value *BitcodeReaderValueList::getValueFwdRef(unsigned Idx, Type *Ty) {
- // Bail out for a clearly invalid value. This would make us call resize(0)
- if (Idx == std::numeric_limits<unsigned>::max())
+Value *BitcodeReaderValueList::getValueFwdRef(unsigned Idx, Type *Ty,
+ Type **FullTy) {
+ // Bail out for a clearly invalid value.
+ if (Idx >= RefsUpperBound)
return nullptr;
if (Idx >= size())
@@ -122,6 +129,8 @@ Value *BitcodeReaderValueList::getValueFwdRef(unsigned Idx, Type *Ty) {
// If the types don't match, it's invalid.
if (Ty && Ty != V->getType())
return nullptr;
+ if (FullTy)
+ *FullTy = FullTypes[Idx];
return V;
}
@@ -181,8 +190,8 @@ void BitcodeReaderValueList::resolveConstantForwardRefs() {
NewOp = RealVal;
} else {
// Otherwise, look up the placeholder in ResolveConstants.
- ResolveConstantsTy::iterator It = std::lower_bound(
- ResolveConstants.begin(), ResolveConstants.end(),
+ ResolveConstantsTy::iterator It = llvm::lower_bound(
+ ResolveConstants,
std::pair<Constant *, unsigned>(cast<Constant>(*I), 0));
assert(It != ResolveConstants.end() && It->first == *I);
NewOp = operator[](It->second);
diff --git a/lib/Bitcode/Reader/ValueList.h b/lib/Bitcode/Reader/ValueList.h
index 5ad7899347ad..49900498c294 100644
--- a/lib/Bitcode/Reader/ValueList.h
+++ b/lib/Bitcode/Reader/ValueList.h
@@ -1,9 +1,8 @@
//===-- Bitcode/Reader/ValueList.h - Number values --------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -29,6 +28,13 @@ class Value;
class BitcodeReaderValueList {
std::vector<WeakTrackingVH> ValuePtrs;
+ /// Struct containing fully-specified copies of the type of each
+ /// value. When pointers are opaque, this will be contain non-opaque
+ /// variants so that restructuring instructions can determine their
+ /// type correctly even if being loaded from old bitcode where some
+ /// types are implicit.
+ std::vector<Type *> FullTypes;
+
/// As we resolve forward-referenced constants, we add information about them
/// to this vector. This allows us to resolve them in bulk instead of
/// resolving each reference at a time. See the code in
@@ -40,8 +46,15 @@ class BitcodeReaderValueList {
ResolveConstantsTy ResolveConstants;
LLVMContext &Context;
+ /// Maximum number of valid references. Forward references exceeding the
+ /// maximum must be invalid.
+ unsigned RefsUpperBound;
+
public:
- BitcodeReaderValueList(LLVMContext &C) : Context(C) {}
+ BitcodeReaderValueList(LLVMContext &C, size_t RefsUpperBound)
+ : Context(C),
+ RefsUpperBound(std::min((size_t)std::numeric_limits<unsigned>::max(),
+ RefsUpperBound)) {}
~BitcodeReaderValueList() {
assert(ResolveConstants.empty() && "Constants not resolved?");
@@ -49,12 +62,19 @@ public:
// vector compatibility methods
unsigned size() const { return ValuePtrs.size(); }
- void resize(unsigned N) { ValuePtrs.resize(N); }
- void push_back(Value *V) { ValuePtrs.emplace_back(V); }
+ void resize(unsigned N) {
+ ValuePtrs.resize(N);
+ FullTypes.resize(N);
+ }
+ void push_back(Value *V, Type *Ty) {
+ ValuePtrs.emplace_back(V);
+ FullTypes.emplace_back(Ty);
+ }
void clear() {
assert(ResolveConstants.empty() && "Constants not resolved?");
ValuePtrs.clear();
+ FullTypes.clear();
}
Value *operator[](unsigned i) const {
@@ -63,18 +83,22 @@ public:
}
Value *back() const { return ValuePtrs.back(); }
- void pop_back() { ValuePtrs.pop_back(); }
+ void pop_back() {
+ ValuePtrs.pop_back();
+ FullTypes.pop_back();
+ }
bool empty() const { return ValuePtrs.empty(); }
void shrinkTo(unsigned N) {
assert(N <= size() && "Invalid shrinkTo request!");
ValuePtrs.resize(N);
+ FullTypes.resize(N);
}
Constant *getConstantFwdRef(unsigned Idx, Type *Ty);
- Value *getValueFwdRef(unsigned Idx, Type *Ty);
+ Value *getValueFwdRef(unsigned Idx, Type *Ty, Type **FullTy = nullptr);
- void assignValue(Value *V, unsigned Idx);
+ void assignValue(Value *V, unsigned Idx, Type *FullTy);
/// Once all constants are read, this method bulk resolves any forward
/// references.
diff --git a/lib/Bitcode/Writer/BitWriter.cpp b/lib/Bitcode/Writer/BitWriter.cpp
index 763cd12aa2d7..76ca89147e52 100644
--- a/lib/Bitcode/Writer/BitWriter.cpp
+++ b/lib/Bitcode/Writer/BitWriter.cpp
@@ -1,9 +1,8 @@
//===-- BitWriter.cpp -----------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index ba4f932e2e6d..5c7b970a3a75 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -1,9 +1,8 @@
//===- Bitcode/Writer/BitcodeWriter.cpp - Bitcode Writer ------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -25,8 +24,8 @@
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
-#include "llvm/Bitcode/BitCodes.h"
-#include "llvm/Bitcode/BitstreamWriter.h"
+#include "llvm/Bitstream/BitCodes.h"
+#include "llvm/Bitstream/BitstreamWriter.h"
#include "llvm/Bitcode/LLVMBitCodes.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/Attributes.h"
@@ -215,7 +214,8 @@ private:
const Function &F);
void writeModuleLevelReferences(const GlobalVariable &V,
SmallVector<uint64_t, 64> &NameVals,
- unsigned FSModRefsAbbrev);
+ unsigned FSModRefsAbbrev,
+ unsigned FSModVTableRefsAbbrev);
void assignValueId(GlobalValue::GUID ValGUID) {
GUIDToValueIdMap[ValGUID] = ++GlobalValueId;
@@ -318,6 +318,8 @@ private:
void writeDILexicalBlockFile(const DILexicalBlockFile *N,
SmallVectorImpl<uint64_t> &Record,
unsigned Abbrev);
+ void writeDICommonBlock(const DICommonBlock *N,
+ SmallVectorImpl<uint64_t> &Record, unsigned Abbrev);
void writeDINamespace(const DINamespace *N, SmallVectorImpl<uint64_t> &Record,
unsigned Abbrev);
void writeDIMacro(const DIMacro *N, SmallVectorImpl<uint64_t> &Record,
@@ -560,6 +562,8 @@ static unsigned getEncodedRMWOperation(AtomicRMWInst::BinOp Op) {
case AtomicRMWInst::Min: return bitc::RMW_MIN;
case AtomicRMWInst::UMax: return bitc::RMW_UMAX;
case AtomicRMWInst::UMin: return bitc::RMW_UMIN;
+ case AtomicRMWInst::FAdd: return bitc::RMW_FADD;
+ case AtomicRMWInst::FSub: return bitc::RMW_FSUB;
}
}
@@ -635,6 +639,8 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) {
return bitc::ATTR_KIND_NO_CAPTURE;
case Attribute::NoDuplicate:
return bitc::ATTR_KIND_NO_DUPLICATE;
+ case Attribute::NoFree:
+ return bitc::ATTR_KIND_NOFREE;
case Attribute::NoImplicitFloat:
return bitc::ATTR_KIND_NO_IMPLICIT_FLOAT;
case Attribute::NoInline:
@@ -653,6 +659,8 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) {
return bitc::ATTR_KIND_NO_RED_ZONE;
case Attribute::NoReturn:
return bitc::ATTR_KIND_NO_RETURN;
+ case Attribute::NoSync:
+ return bitc::ATTR_KIND_NOSYNC;
case Attribute::NoCfCheck:
return bitc::ATTR_KIND_NOCF_CHECK;
case Attribute::NoUnwind:
@@ -707,10 +715,16 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) {
return bitc::ATTR_KIND_SWIFT_SELF;
case Attribute::UWTable:
return bitc::ATTR_KIND_UW_TABLE;
+ case Attribute::WillReturn:
+ return bitc::ATTR_KIND_WILLRETURN;
case Attribute::WriteOnly:
return bitc::ATTR_KIND_WRITEONLY;
case Attribute::ZExt:
return bitc::ATTR_KIND_Z_EXT;
+ case Attribute::ImmArg:
+ return bitc::ATTR_KIND_IMMARG;
+ case Attribute::SanitizeMemTag:
+ return bitc::ATTR_KIND_SANITIZE_MEMTAG;
case Attribute::EndAttrKinds:
llvm_unreachable("Can not encode end-attribute kinds marker.");
case Attribute::None:
@@ -742,7 +756,7 @@ void ModuleBitcodeWriter::writeAttributeGroupTable() {
Record.push_back(1);
Record.push_back(getAttrKindEncoding(Attr.getKindAsEnum()));
Record.push_back(Attr.getValueAsInt());
- } else {
+ } else if (Attr.isStringAttribute()) {
StringRef Kind = Attr.getKindAsString();
StringRef Val = Attr.getValueAsString();
@@ -753,6 +767,13 @@ void ModuleBitcodeWriter::writeAttributeGroupTable() {
Record.append(Val.begin(), Val.end());
Record.push_back(0);
}
+ } else {
+ assert(Attr.isTypeAttribute());
+ Type *Ty = Attr.getValueAsType();
+ Record.push_back(Ty ? 6 : 5);
+ Record.push_back(getAttrKindEncoding(Attr.getKindAsEnum()));
+ if (Ty)
+ Record.push_back(VE.getTypeID(Attr.getValueAsType()));
}
}
@@ -926,10 +947,13 @@ void ModuleBitcodeWriter::writeTypeTable() {
}
case Type::VectorTyID: {
VectorType *VT = cast<VectorType>(T);
- // VECTOR [numelts, eltty]
+ // VECTOR [numelts, eltty] or
+ // [numelts, eltty, scalable]
Code = bitc::TYPE_CODE_VECTOR;
TypeVals.push_back(VT->getNumElements());
TypeVals.push_back(VE.getTypeID(VT->getElementType()));
+ if (VT->isScalable())
+ TypeVals.push_back(VT->isScalable());
break;
}
}
@@ -991,6 +1015,7 @@ static uint64_t getEncodedGVSummaryFlags(GlobalValueSummary::GVFlags Flags) {
RawFlags |= Flags.NotEligibleToImport; // bool
RawFlags |= (Flags.Live << 1);
RawFlags |= (Flags.DSOLocal << 2);
+ RawFlags |= (Flags.CanAutoHide << 3);
// Linkage don't need to be remapped at that time for the summary. Any future
// change to the getEncodedLinkage() function will need to be taken into
@@ -1001,7 +1026,7 @@ static uint64_t getEncodedGVSummaryFlags(GlobalValueSummary::GVFlags Flags) {
}
static uint64_t getEncodedGVarFlags(GlobalVarSummary::GVarFlags Flags) {
- uint64_t RawFlags = Flags.ReadOnly;
+ uint64_t RawFlags = Flags.MaybeReadOnly | (Flags.MaybeWriteOnly << 1);
return RawFlags;
}
@@ -1256,7 +1281,8 @@ void ModuleBitcodeWriter::writeModuleInfo() {
GV.getDLLStorageClass() != GlobalValue::DefaultStorageClass ||
GV.hasComdat() ||
GV.hasAttributes() ||
- GV.isDSOLocal()) {
+ GV.isDSOLocal() ||
+ GV.hasPartition()) {
Vals.push_back(getEncodedVisibility(GV));
Vals.push_back(getEncodedThreadLocalMode(GV));
Vals.push_back(getEncodedUnnamedAddr(GV));
@@ -1268,6 +1294,8 @@ void ModuleBitcodeWriter::writeModuleInfo() {
Vals.push_back(VE.getAttributeListID(AL));
Vals.push_back(GV.isDSOLocal());
+ Vals.push_back(addToStrtab(GV.getPartition()));
+ Vals.push_back(GV.getPartition().size());
} else {
AbbrevToUse = SimpleGVarAbbrev;
}
@@ -1305,6 +1333,8 @@ void ModuleBitcodeWriter::writeModuleInfo() {
Vals.push_back(F.isDSOLocal());
Vals.push_back(F.getAddressSpace());
+ Vals.push_back(addToStrtab(F.getPartition()));
+ Vals.push_back(F.getPartition().size());
unsigned AbbrevToUse = 0;
Stream.EmitRecord(bitc::MODULE_CODE_FUNCTION, Vals, AbbrevToUse);
@@ -1327,6 +1357,8 @@ void ModuleBitcodeWriter::writeModuleInfo() {
Vals.push_back(getEncodedThreadLocalMode(A));
Vals.push_back(getEncodedUnnamedAddr(A));
Vals.push_back(A.isDSOLocal());
+ Vals.push_back(addToStrtab(A.getPartition()));
+ Vals.push_back(A.getPartition().size());
unsigned AbbrevToUse = 0;
Stream.EmitRecord(bitc::MODULE_CODE_ALIAS, Vals, AbbrevToUse);
@@ -1345,6 +1377,8 @@ void ModuleBitcodeWriter::writeModuleInfo() {
Vals.push_back(getEncodedLinkage(I));
Vals.push_back(getEncodedVisibility(I));
Vals.push_back(I.isDSOLocal());
+ Vals.push_back(addToStrtab(I.getPartition()));
+ Vals.push_back(I.getPartition().size());
Stream.EmitRecord(bitc::MODULE_CODE_IFUNC, Vals);
Vals.clear();
}
@@ -1683,6 +1717,20 @@ void ModuleBitcodeWriter::writeDILexicalBlockFile(
Record.clear();
}
+void ModuleBitcodeWriter::writeDICommonBlock(const DICommonBlock *N,
+ SmallVectorImpl<uint64_t> &Record,
+ unsigned Abbrev) {
+ Record.push_back(N->isDistinct());
+ Record.push_back(VE.getMetadataOrNullID(N->getScope()));
+ Record.push_back(VE.getMetadataOrNullID(N->getDecl()));
+ Record.push_back(VE.getMetadataOrNullID(N->getRawName()));
+ Record.push_back(VE.getMetadataOrNullID(N->getFile()));
+ Record.push_back(N->getLineNo());
+
+ Stream.EmitRecord(bitc::METADATA_COMMON_BLOCK, Record, Abbrev);
+ Record.clear();
+}
+
void ModuleBitcodeWriter::writeDINamespace(const DINamespace *N,
SmallVectorImpl<uint64_t> &Record,
unsigned Abbrev) {
@@ -2616,12 +2664,16 @@ void ModuleBitcodeWriter::writeInstruction(const Instruction &I,
Vals.append(IVI->idx_begin(), IVI->idx_end());
break;
}
- case Instruction::Select:
+ case Instruction::Select: {
Code = bitc::FUNC_CODE_INST_VSELECT;
pushValueAndType(I.getOperand(1), InstID, Vals);
pushValue(I.getOperand(2), InstID, Vals);
pushValueAndType(I.getOperand(0), InstID, Vals);
+ uint64_t Flags = getOptimizationFlags(&I);
+ if (Flags != 0)
+ Vals.push_back(Flags);
break;
+ }
case Instruction::ExtractElement:
Code = bitc::FUNC_CODE_INST_EXTRACTELT;
pushValueAndType(I.getOperand(0), InstID, Vals);
@@ -2776,6 +2828,41 @@ void ModuleBitcodeWriter::writeInstruction(const Instruction &I,
Vals.push_back(VE.getValueID(CatchSwitch.getUnwindDest()));
break;
}
+ case Instruction::CallBr: {
+ const CallBrInst *CBI = cast<CallBrInst>(&I);
+ const Value *Callee = CBI->getCalledValue();
+ FunctionType *FTy = CBI->getFunctionType();
+
+ if (CBI->hasOperandBundles())
+ writeOperandBundles(CBI, InstID);
+
+ Code = bitc::FUNC_CODE_INST_CALLBR;
+
+ Vals.push_back(VE.getAttributeListID(CBI->getAttributes()));
+
+ Vals.push_back(CBI->getCallingConv() << bitc::CALL_CCONV |
+ 1 << bitc::CALL_EXPLICIT_TYPE);
+
+ Vals.push_back(VE.getValueID(CBI->getDefaultDest()));
+ Vals.push_back(CBI->getNumIndirectDests());
+ for (unsigned i = 0, e = CBI->getNumIndirectDests(); i != e; ++i)
+ Vals.push_back(VE.getValueID(CBI->getIndirectDest(i)));
+
+ Vals.push_back(VE.getTypeID(FTy));
+ pushValueAndType(Callee, InstID, Vals);
+
+ // Emit value #'s for the fixed parameters.
+ for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
+ pushValue(I.getOperand(i), InstID, Vals); // fixed param.
+
+ // Emit type/value pairs for varargs params.
+ if (FTy->isVarArg()) {
+ for (unsigned i = FTy->getNumParams(), e = CBI->getNumArgOperands();
+ i != e; ++i)
+ pushValueAndType(I.getOperand(i), InstID, Vals); // vararg
+ }
+ break;
+ }
case Instruction::Unreachable:
Code = bitc::FUNC_CODE_INST_UNREACHABLE;
AbbrevToUse = FUNCTION_INST_UNREACHABLE_ABBREV;
@@ -3528,6 +3615,19 @@ static void writeTypeIdSummaryRecord(SmallVector<uint64_t, 64> &NameVals,
W.second);
}
+static void writeTypeIdCompatibleVtableSummaryRecord(
+ SmallVector<uint64_t, 64> &NameVals, StringTableBuilder &StrtabBuilder,
+ const std::string &Id, const TypeIdCompatibleVtableInfo &Summary,
+ ValueEnumerator &VE) {
+ NameVals.push_back(StrtabBuilder.add(Id));
+ NameVals.push_back(Id.size());
+
+ for (auto &P : Summary) {
+ NameVals.push_back(P.AddressPointOffset);
+ NameVals.push_back(VE.getValueID(P.VTableVI.getValue()));
+ }
+}
+
// Helper to emit a single function summary record.
void ModuleBitcodeWriterBase::writePerModuleFunctionSummaryRecord(
SmallVector<uint64_t, 64> &NameVals, GlobalValueSummary *Summary,
@@ -3538,11 +3638,13 @@ void ModuleBitcodeWriterBase::writePerModuleFunctionSummaryRecord(
FunctionSummary *FS = cast<FunctionSummary>(Summary);
writeFunctionTypeMetadataRecords(Stream, FS);
+ auto SpecialRefCnts = FS->specialRefCounts();
NameVals.push_back(getEncodedGVSummaryFlags(FS->flags()));
NameVals.push_back(FS->instCount());
NameVals.push_back(getEncodedFFlags(FS->fflags()));
NameVals.push_back(FS->refs().size());
- NameVals.push_back(FS->immutableRefCount());
+ NameVals.push_back(SpecialRefCnts.first); // rorefcnt
+ NameVals.push_back(SpecialRefCnts.second); // worefcnt
for (auto &RI : FS->refs())
NameVals.push_back(VE.getValueID(RI.getValue()));
@@ -3572,7 +3674,7 @@ void ModuleBitcodeWriterBase::writePerModuleFunctionSummaryRecord(
// and emit them in a summary record.
void ModuleBitcodeWriterBase::writeModuleLevelReferences(
const GlobalVariable &V, SmallVector<uint64_t, 64> &NameVals,
- unsigned FSModRefsAbbrev) {
+ unsigned FSModRefsAbbrev, unsigned FSModVTableRefsAbbrev) {
auto VI = Index->getValueInfo(V.getGUID());
if (!VI || VI.getSummaryList().empty()) {
// Only declarations should not have a summary (a declaration might however
@@ -3586,6 +3688,10 @@ void ModuleBitcodeWriterBase::writeModuleLevelReferences(
NameVals.push_back(getEncodedGVSummaryFlags(VS->flags()));
NameVals.push_back(getEncodedGVarFlags(VS->varflags()));
+ auto VTableFuncs = VS->vTableFuncs();
+ if (!VTableFuncs.empty())
+ NameVals.push_back(VS->refs().size());
+
unsigned SizeBeforeRefs = NameVals.size();
for (auto &RI : VS->refs())
NameVals.push_back(VE.getValueID(RI.getValue()));
@@ -3593,15 +3699,26 @@ void ModuleBitcodeWriterBase::writeModuleLevelReferences(
// been initialized from a DenseSet.
llvm::sort(NameVals.begin() + SizeBeforeRefs, NameVals.end());
- Stream.EmitRecord(bitc::FS_PERMODULE_GLOBALVAR_INIT_REFS, NameVals,
- FSModRefsAbbrev);
+ if (VTableFuncs.empty())
+ Stream.EmitRecord(bitc::FS_PERMODULE_GLOBALVAR_INIT_REFS, NameVals,
+ FSModRefsAbbrev);
+ else {
+ // VTableFuncs pairs should already be sorted by offset.
+ for (auto &P : VTableFuncs) {
+ NameVals.push_back(VE.getValueID(P.FuncVI.getValue()));
+ NameVals.push_back(P.VTableOffset);
+ }
+
+ Stream.EmitRecord(bitc::FS_PERMODULE_VTABLE_GLOBALVAR_INIT_REFS, NameVals,
+ FSModVTableRefsAbbrev);
+ }
NameVals.clear();
}
// Current version for the summary.
// This is bumped whenever we introduce changes in the way some record are
// interpreted, like flags for instance.
-static const uint64_t INDEX_VERSION = 6;
+static const uint64_t INDEX_VERSION = 7;
/// Emit the per-module summary section alongside the rest of
/// the module's bitcode.
@@ -3643,7 +3760,8 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // instcount
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // fflags
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numrefs
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // immutablerefcnt
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // rorefcnt
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // worefcnt
// numrefs x valueid, n x (valueid, hotness)
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
@@ -3660,7 +3778,8 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // instcount
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // fflags
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numrefs
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // immutablerefcnt
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // rorefcnt
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // worefcnt
// numrefs x valueid, n x (valueid [, rel_block_freq])
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
@@ -3675,6 +3794,17 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
unsigned FSModRefsAbbrev = Stream.EmitAbbrev(std::move(Abbv));
+ // Abbrev for FS_PERMODULE_VTABLE_GLOBALVAR_INIT_REFS.
+ Abbv = std::make_shared<BitCodeAbbrev>();
+ Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE_VTABLE_GLOBALVAR_INIT_REFS));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // flags
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numrefs
+ // numrefs x valueid, n x (valueid , offset)
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+ unsigned FSModVTableRefsAbbrev = Stream.EmitAbbrev(std::move(Abbv));
+
// Abbrev for FS_ALIAS.
Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::FS_ALIAS));
@@ -3683,6 +3813,16 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid
unsigned FSAliasAbbrev = Stream.EmitAbbrev(std::move(Abbv));
+ // Abbrev for FS_TYPE_ID_METADATA
+ Abbv = std::make_shared<BitCodeAbbrev>();
+ Abbv->Add(BitCodeAbbrevOp(bitc::FS_TYPE_ID_METADATA));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // typeid strtab index
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // typeid length
+ // n x (valueid , offset)
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+ unsigned TypeIdCompatibleVtableAbbrev = Stream.EmitAbbrev(std::move(Abbv));
+
SmallVector<uint64_t, 64> NameVals;
// Iterate over the list of functions instead of the Index to
// ensure the ordering is stable.
@@ -3707,7 +3847,8 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
// Capture references from GlobalVariable initializers, which are outside
// of a function scope.
for (const GlobalVariable &G : M.globals())
- writeModuleLevelReferences(G, NameVals, FSModRefsAbbrev);
+ writeModuleLevelReferences(G, NameVals, FSModRefsAbbrev,
+ FSModVTableRefsAbbrev);
for (const GlobalAlias &A : M.aliases()) {
auto *Aliasee = A.getBaseObject();
@@ -3725,6 +3866,14 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
NameVals.clear();
}
+ for (auto &S : Index->typeIdCompatibleVtableMap()) {
+ writeTypeIdCompatibleVtableSummaryRecord(NameVals, StrtabBuilder, S.first,
+ S.second, VE);
+ Stream.EmitRecord(bitc::FS_TYPE_ID_METADATA, NameVals,
+ TypeIdCompatibleVtableAbbrev);
+ NameVals.clear();
+ }
+
Stream.ExitBlock();
}
@@ -3762,7 +3911,8 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // fflags
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // entrycount
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numrefs
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // immutablerefcnt
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // rorefcnt
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // worefcnt
// numrefs x valueid, n x (valueid)
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
@@ -3776,8 +3926,10 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // flags
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // instcount
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // fflags
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // entrycount
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numrefs
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // immutablerefcnt
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // rorefcnt
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // worefcnt
// numrefs x valueid, n x (valueid, hotness)
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
@@ -3825,9 +3977,13 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
NameVals.clear();
};
+ std::set<GlobalValue::GUID> DefOrUseGUIDs;
forEachSummary([&](GVInfo I, bool IsAliasee) {
GlobalValueSummary *S = I.second;
assert(S);
+ DefOrUseGUIDs.insert(I.first);
+ for (const ValueInfo &VI : S->refs())
+ DefOrUseGUIDs.insert(VI.getGUID());
auto ValueId = getValueId(I.first);
assert(ValueId);
@@ -3879,20 +4035,24 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
// Fill in below
NameVals.push_back(0); // numrefs
- NameVals.push_back(0); // immutablerefcnt
+ NameVals.push_back(0); // rorefcnt
+ NameVals.push_back(0); // worefcnt
- unsigned Count = 0, ImmutableRefCnt = 0;
+ unsigned Count = 0, RORefCnt = 0, WORefCnt = 0;
for (auto &RI : FS->refs()) {
auto RefValueId = getValueId(RI.getGUID());
if (!RefValueId)
continue;
NameVals.push_back(*RefValueId);
if (RI.isReadOnly())
- ImmutableRefCnt++;
+ RORefCnt++;
+ else if (RI.isWriteOnly())
+ WORefCnt++;
Count++;
}
NameVals[6] = Count;
- NameVals[7] = ImmutableRefCnt;
+ NameVals[7] = RORefCnt;
+ NameVals[8] = WORefCnt;
bool HasProfileData = false;
for (auto &EI : FS->calls()) {
@@ -3968,20 +4128,30 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
if (!Index.cfiFunctionDefs().empty()) {
for (auto &S : Index.cfiFunctionDefs()) {
- NameVals.push_back(StrtabBuilder.add(S));
- NameVals.push_back(S.size());
+ if (DefOrUseGUIDs.count(
+ GlobalValue::getGUID(GlobalValue::dropLLVMManglingEscape(S)))) {
+ NameVals.push_back(StrtabBuilder.add(S));
+ NameVals.push_back(S.size());
+ }
+ }
+ if (!NameVals.empty()) {
+ Stream.EmitRecord(bitc::FS_CFI_FUNCTION_DEFS, NameVals);
+ NameVals.clear();
}
- Stream.EmitRecord(bitc::FS_CFI_FUNCTION_DEFS, NameVals);
- NameVals.clear();
}
if (!Index.cfiFunctionDecls().empty()) {
for (auto &S : Index.cfiFunctionDecls()) {
- NameVals.push_back(StrtabBuilder.add(S));
- NameVals.push_back(S.size());
+ if (DefOrUseGUIDs.count(
+ GlobalValue::getGUID(GlobalValue::dropLLVMManglingEscape(S)))) {
+ NameVals.push_back(StrtabBuilder.add(S));
+ NameVals.push_back(S.size());
+ }
+ }
+ if (!NameVals.empty()) {
+ Stream.EmitRecord(bitc::FS_CFI_FUNCTION_DECLS, NameVals);
+ NameVals.clear();
}
- Stream.EmitRecord(bitc::FS_CFI_FUNCTION_DECLS, NameVals);
- NameVals.clear();
}
// Walk the GUIDs that were referenced, and write the
@@ -4055,15 +4225,15 @@ void ModuleBitcodeWriter::write() {
// Emit blockinfo, which defines the standard abbreviations etc.
writeBlockInfo();
+ // Emit information describing all of the types in the module.
+ writeTypeTable();
+
// Emit information about attribute groups.
writeAttributeGroupTable();
// Emit information about parameter attributes.
writeAttributeTable();
- // Emit information describing all of the types in the module.
- writeTypeTable();
-
writeComdats();
// Emit top-level description of module, including target triple, inline asm,
diff --git a/lib/Bitcode/Writer/BitcodeWriterPass.cpp b/lib/Bitcode/Writer/BitcodeWriterPass.cpp
index 41212e575f8e..6796cf8cee54 100644
--- a/lib/Bitcode/Writer/BitcodeWriterPass.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriterPass.cpp
@@ -1,9 +1,8 @@
//===- BitcodeWriterPass.cpp - Bitcode writing pass -----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Bitcode/Writer/ValueEnumerator.cpp b/lib/Bitcode/Writer/ValueEnumerator.cpp
index deb04f1bb36c..f59c906c7b75 100644
--- a/lib/Bitcode/Writer/ValueEnumerator.cpp
+++ b/lib/Bitcode/Writer/ValueEnumerator.cpp
@@ -1,9 +1,8 @@
//===- ValueEnumerator.cpp - Number values and types for bitcode writer ---===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -415,10 +414,8 @@ ValueEnumerator::ValueEnumerator(const Module &M,
EnumerateMetadata(&F, MD->getMetadata());
}
EnumerateType(I.getType());
- if (const CallInst *CI = dyn_cast<CallInst>(&I))
- EnumerateAttributes(CI->getAttributes());
- else if (const InvokeInst *II = dyn_cast<InvokeInst>(&I))
- EnumerateAttributes(II->getAttributes());
+ if (const auto *Call = dyn_cast<CallBase>(&I))
+ EnumerateAttributes(Call->getAttributes());
// Enumerate metadata attached with this instruction.
MDs.clear();
@@ -752,7 +749,8 @@ void ValueEnumerator::organizeMetadata() {
// Rebuild MDs, index the metadata ranges for each function in FunctionMDs,
// and fix up MetadataMap.
- std::vector<const Metadata *> OldMDs = std::move(MDs);
+ std::vector<const Metadata *> OldMDs;
+ MDs.swap(OldMDs);
MDs.reserve(OldMDs.size());
for (unsigned I = 0, E = Order.size(); I != E && !Order[I].F; ++I) {
auto *MD = Order[I].get(OldMDs);
@@ -951,9 +949,11 @@ void ValueEnumerator::incorporateFunction(const Function &F) {
incorporateFunctionMetadata(F);
// Adding function arguments to the value table.
- for (const auto &I : F.args())
+ for (const auto &I : F.args()) {
EnumerateValue(&I);
-
+ if (I.hasAttribute(Attribute::ByVal))
+ EnumerateType(I.getParamByValType());
+ }
FirstFuncConstantID = Values.size();
// Add all function-level constants to the value table.
diff --git a/lib/Bitcode/Writer/ValueEnumerator.h b/lib/Bitcode/Writer/ValueEnumerator.h
index 011356c32601..112f0b4a1dc4 100644
--- a/lib/Bitcode/Writer/ValueEnumerator.h
+++ b/lib/Bitcode/Writer/ValueEnumerator.h
@@ -1,9 +1,8 @@
//===- Bitcode/Writer/ValueEnumerator.h - Number values ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Bitcode/Reader/BitstreamReader.cpp b/lib/Bitstream/Reader/BitstreamReader.cpp
index 771cf3d927bc..a4a97ced5457 100644
--- a/lib/Bitcode/Reader/BitstreamReader.cpp
+++ b/lib/Bitstream/Reader/BitstreamReader.cpp
@@ -1,13 +1,12 @@
//===- BitstreamReader.cpp - BitstreamReader implementation ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-#include "llvm/Bitcode/BitstreamReader.h"
+#include "llvm/Bitstream/BitstreamReader.h"
#include "llvm/ADT/StringRef.h"
#include <cassert>
#include <string>
@@ -18,9 +17,8 @@ using namespace llvm;
// BitstreamCursor implementation
//===----------------------------------------------------------------------===//
-/// EnterSubBlock - Having read the ENTER_SUBBLOCK abbrevid, enter
-/// the block, and return true if the block has an error.
-bool BitstreamCursor::EnterSubBlock(unsigned BlockID, unsigned *NumWordsP) {
+/// Having read the ENTER_SUBBLOCK abbrevid, enter the block.
+Error BitstreamCursor::EnterSubBlock(unsigned BlockID, unsigned *NumWordsP) {
// Save the current block's state on BlockScope.
BlockScope.push_back(Block(CurCodeSize));
BlockScope.back().PrevAbbrevs.swap(CurAbbrevs);
@@ -35,21 +33,39 @@ bool BitstreamCursor::EnterSubBlock(unsigned BlockID, unsigned *NumWordsP) {
}
// Get the codesize of this block.
- CurCodeSize = ReadVBR(bitc::CodeLenWidth);
- // We can't read more than MaxChunkSize at a time
+ Expected<uint32_t> MaybeVBR = ReadVBR(bitc::CodeLenWidth);
+ if (!MaybeVBR)
+ return MaybeVBR.takeError();
+ CurCodeSize = MaybeVBR.get();
+
if (CurCodeSize > MaxChunkSize)
- return true;
+ return llvm::createStringError(
+ std::errc::illegal_byte_sequence,
+ "can't read more than %zu at a time, trying to read %u", +MaxChunkSize,
+ CurCodeSize);
SkipToFourByteBoundary();
- unsigned NumWords = Read(bitc::BlockSizeWidth);
- if (NumWordsP) *NumWordsP = NumWords;
-
- // Validate that this block is sane.
- return CurCodeSize == 0 || AtEndOfStream();
+ Expected<word_t> MaybeNum = Read(bitc::BlockSizeWidth);
+ if (!MaybeNum)
+ return MaybeNum.takeError();
+ word_t NumWords = MaybeNum.get();
+ if (NumWordsP)
+ *NumWordsP = NumWords;
+
+ if (CurCodeSize == 0)
+ return llvm::createStringError(
+ std::errc::illegal_byte_sequence,
+ "can't enter sub-block: current code size is 0");
+ if (AtEndOfStream())
+ return llvm::createStringError(
+ std::errc::illegal_byte_sequence,
+ "can't enter sub block: already at end of stream");
+
+ return Error::success();
}
-static uint64_t readAbbreviatedField(BitstreamCursor &Cursor,
- const BitCodeAbbrevOp &Op) {
+static Expected<uint64_t> readAbbreviatedField(BitstreamCursor &Cursor,
+ const BitCodeAbbrevOp &Op) {
assert(!Op.isLiteral() && "Not to be used with literals!");
// Decode the value as we are commanded.
@@ -64,13 +80,16 @@ static uint64_t readAbbreviatedField(BitstreamCursor &Cursor,
assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize);
return Cursor.ReadVBR64((unsigned)Op.getEncodingData());
case BitCodeAbbrevOp::Char6:
- return BitCodeAbbrevOp::DecodeChar6(Cursor.Read(6));
+ if (Expected<unsigned> Res = Cursor.Read(6))
+ return BitCodeAbbrevOp::DecodeChar6(Res.get());
+ else
+ return Res.takeError();
}
llvm_unreachable("invalid abbreviation encoding");
}
-static void skipAbbreviatedField(BitstreamCursor &Cursor,
- const BitCodeAbbrevOp &Op) {
+static Error skipAbbreviatedField(BitstreamCursor &Cursor,
+ const BitCodeAbbrevOp &Op) {
assert(!Op.isLiteral() && "Not to be used with literals!");
// Decode the value as we are commanded.
@@ -80,26 +99,43 @@ static void skipAbbreviatedField(BitstreamCursor &Cursor,
llvm_unreachable("Should not reach here");
case BitCodeAbbrevOp::Fixed:
assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize);
- Cursor.Read((unsigned)Op.getEncodingData());
- break;
+ if (Expected<unsigned> Res = Cursor.Read((unsigned)Op.getEncodingData()))
+ break;
+ else
+ return Res.takeError();
case BitCodeAbbrevOp::VBR:
assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize);
- Cursor.ReadVBR64((unsigned)Op.getEncodingData());
- break;
+ if (Expected<uint64_t> Res =
+ Cursor.ReadVBR64((unsigned)Op.getEncodingData()))
+ break;
+ else
+ return Res.takeError();
case BitCodeAbbrevOp::Char6:
- Cursor.Read(6);
- break;
+ if (Expected<unsigned> Res = Cursor.Read(6))
+ break;
+ else
+ return Res.takeError();
}
+ return ErrorSuccess();
}
/// skipRecord - Read the current record and discard it.
-unsigned BitstreamCursor::skipRecord(unsigned AbbrevID) {
+Expected<unsigned> BitstreamCursor::skipRecord(unsigned AbbrevID) {
// Skip unabbreviated records by reading past their entries.
if (AbbrevID == bitc::UNABBREV_RECORD) {
- unsigned Code = ReadVBR(6);
- unsigned NumElts = ReadVBR(6);
+ Expected<uint32_t> MaybeCode = ReadVBR(6);
+ if (!MaybeCode)
+ return MaybeCode.takeError();
+ unsigned Code = MaybeCode.get();
+ Expected<uint32_t> MaybeVBR = ReadVBR(6);
+ if (!MaybeVBR)
+ return MaybeVBR.get();
+ unsigned NumElts = MaybeVBR.get();
for (unsigned i = 0; i != NumElts; ++i)
- (void)ReadVBR64(6);
+ if (Expected<uint64_t> Res = ReadVBR64(6))
+ ; // Skip!
+ else
+ return Res.takeError();
return Code;
}
@@ -111,8 +147,13 @@ unsigned BitstreamCursor::skipRecord(unsigned AbbrevID) {
else {
if (CodeOp.getEncoding() == BitCodeAbbrevOp::Array ||
CodeOp.getEncoding() == BitCodeAbbrevOp::Blob)
- report_fatal_error("Abbreviation starts with an Array or a Blob");
- Code = readAbbreviatedField(*this, CodeOp);
+ return llvm::createStringError(
+ std::errc::illegal_byte_sequence,
+ "Abbreviation starts with an Array or a Blob");
+ Expected<uint64_t> MaybeCode = readAbbreviatedField(*this, CodeOp);
+ if (!MaybeCode)
+ return MaybeCode.takeError();
+ Code = MaybeCode.get();
}
for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i < e; ++i) {
@@ -122,13 +163,17 @@ unsigned BitstreamCursor::skipRecord(unsigned AbbrevID) {
if (Op.getEncoding() != BitCodeAbbrevOp::Array &&
Op.getEncoding() != BitCodeAbbrevOp::Blob) {
- skipAbbreviatedField(*this, Op);
+ if (Error Err = skipAbbreviatedField(*this, Op))
+ return std::move(Err);
continue;
}
if (Op.getEncoding() == BitCodeAbbrevOp::Array) {
// Array case. Read the number of elements as a vbr6.
- unsigned NumElts = ReadVBR(6);
+ Expected<uint32_t> MaybeNum = ReadVBR(6);
+ if (!MaybeNum)
+ return MaybeNum.takeError();
+ unsigned NumElts = MaybeNum.get();
// Get the element encoding.
assert(i+2 == e && "array op not second to last?");
@@ -141,15 +186,22 @@ unsigned BitstreamCursor::skipRecord(unsigned AbbrevID) {
report_fatal_error("Array element type can't be an Array or a Blob");
case BitCodeAbbrevOp::Fixed:
assert((unsigned)EltEnc.getEncodingData() <= MaxChunkSize);
- JumpToBit(GetCurrentBitNo() + NumElts * EltEnc.getEncodingData());
+ if (Error Err = JumpToBit(GetCurrentBitNo() +
+ NumElts * EltEnc.getEncodingData()))
+ return std::move(Err);
break;
case BitCodeAbbrevOp::VBR:
assert((unsigned)EltEnc.getEncodingData() <= MaxChunkSize);
for (; NumElts; --NumElts)
- ReadVBR64((unsigned)EltEnc.getEncodingData());
+ if (Expected<uint64_t> Res =
+ ReadVBR64((unsigned)EltEnc.getEncodingData()))
+ ; // Skip!
+ else
+ return Res.takeError();
break;
case BitCodeAbbrevOp::Char6:
- JumpToBit(GetCurrentBitNo() + NumElts * 6);
+ if (Error Err = JumpToBit(GetCurrentBitNo() + NumElts * 6))
+ return std::move(Err);
break;
}
continue;
@@ -157,7 +209,10 @@ unsigned BitstreamCursor::skipRecord(unsigned AbbrevID) {
assert(Op.getEncoding() == BitCodeAbbrevOp::Blob);
// Blob case. Read the number of bytes as a vbr6.
- unsigned NumElts = ReadVBR(6);
+ Expected<uint32_t> MaybeNum = ReadVBR(6);
+ if (!MaybeNum)
+ return MaybeNum.takeError();
+ unsigned NumElts = MaybeNum.get();
SkipToFourByteBoundary(); // 32-bit alignment
// Figure out where the end of this blob will be including tail padding.
@@ -171,19 +226,30 @@ unsigned BitstreamCursor::skipRecord(unsigned AbbrevID) {
}
// Skip over the blob.
- JumpToBit(NewEnd);
+ if (Error Err = JumpToBit(NewEnd))
+ return std::move(Err);
}
return Code;
}
-unsigned BitstreamCursor::readRecord(unsigned AbbrevID,
- SmallVectorImpl<uint64_t> &Vals,
- StringRef *Blob) {
+Expected<unsigned> BitstreamCursor::readRecord(unsigned AbbrevID,
+ SmallVectorImpl<uint64_t> &Vals,
+ StringRef *Blob) {
if (AbbrevID == bitc::UNABBREV_RECORD) {
- unsigned Code = ReadVBR(6);
- unsigned NumElts = ReadVBR(6);
+ Expected<uint32_t> MaybeCode = ReadVBR(6);
+ if (!MaybeCode)
+ return MaybeCode.takeError();
+ uint32_t Code = MaybeCode.get();
+ Expected<uint32_t> MaybeNumElts = ReadVBR(6);
+ if (!MaybeNumElts)
+ return MaybeNumElts.takeError();
+ uint32_t NumElts = MaybeNumElts.get();
+
for (unsigned i = 0; i != NumElts; ++i)
- Vals.push_back(ReadVBR64(6));
+ if (Expected<uint64_t> MaybeVal = ReadVBR64(6))
+ Vals.push_back(MaybeVal.get());
+ else
+ return MaybeVal.takeError();
return Code;
}
@@ -199,7 +265,10 @@ unsigned BitstreamCursor::readRecord(unsigned AbbrevID,
if (CodeOp.getEncoding() == BitCodeAbbrevOp::Array ||
CodeOp.getEncoding() == BitCodeAbbrevOp::Blob)
report_fatal_error("Abbreviation starts with an Array or a Blob");
- Code = readAbbreviatedField(*this, CodeOp);
+ if (Expected<uint64_t> MaybeCode = readAbbreviatedField(*this, CodeOp))
+ Code = MaybeCode.get();
+ else
+ return MaybeCode.takeError();
}
for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i != e; ++i) {
@@ -211,13 +280,19 @@ unsigned BitstreamCursor::readRecord(unsigned AbbrevID,
if (Op.getEncoding() != BitCodeAbbrevOp::Array &&
Op.getEncoding() != BitCodeAbbrevOp::Blob) {
- Vals.push_back(readAbbreviatedField(*this, Op));
+ if (Expected<uint64_t> MaybeVal = readAbbreviatedField(*this, Op))
+ Vals.push_back(MaybeVal.get());
+ else
+ return MaybeVal.takeError();
continue;
}
if (Op.getEncoding() == BitCodeAbbrevOp::Array) {
// Array case. Read the number of elements as a vbr6.
- unsigned NumElts = ReadVBR(6);
+ Expected<uint32_t> MaybeNumElts = ReadVBR(6);
+ if (!MaybeNumElts)
+ return MaybeNumElts.takeError();
+ uint32_t NumElts = MaybeNumElts.get();
// Get the element encoding.
if (i + 2 != e)
@@ -233,22 +308,36 @@ unsigned BitstreamCursor::readRecord(unsigned AbbrevID,
report_fatal_error("Array element type can't be an Array or a Blob");
case BitCodeAbbrevOp::Fixed:
for (; NumElts; --NumElts)
- Vals.push_back(Read((unsigned)EltEnc.getEncodingData()));
+ if (Expected<SimpleBitstreamCursor::word_t> MaybeVal =
+ Read((unsigned)EltEnc.getEncodingData()))
+ Vals.push_back(MaybeVal.get());
+ else
+ return MaybeVal.takeError();
break;
case BitCodeAbbrevOp::VBR:
for (; NumElts; --NumElts)
- Vals.push_back(ReadVBR64((unsigned)EltEnc.getEncodingData()));
+ if (Expected<uint64_t> MaybeVal =
+ ReadVBR64((unsigned)EltEnc.getEncodingData()))
+ Vals.push_back(MaybeVal.get());
+ else
+ return MaybeVal.takeError();
break;
case BitCodeAbbrevOp::Char6:
for (; NumElts; --NumElts)
- Vals.push_back(BitCodeAbbrevOp::DecodeChar6(Read(6)));
+ if (Expected<SimpleBitstreamCursor::word_t> MaybeVal = Read(6))
+ Vals.push_back(BitCodeAbbrevOp::DecodeChar6(MaybeVal.get()));
+ else
+ return MaybeVal.takeError();
}
continue;
}
assert(Op.getEncoding() == BitCodeAbbrevOp::Blob);
// Blob case. Read the number of bytes as a vbr6.
- unsigned NumElts = ReadVBR(6);
+ Expected<uint32_t> MaybeNumElts = ReadVBR(6);
+ if (!MaybeNumElts)
+ return MaybeNumElts.takeError();
+ uint32_t NumElts = MaybeNumElts.get();
SkipToFourByteBoundary(); // 32-bit alignment
// Figure out where the end of this blob will be including tail padding.
@@ -266,7 +355,8 @@ unsigned BitstreamCursor::readRecord(unsigned AbbrevID,
// Otherwise, inform the streamer that we need these bytes in memory. Skip
// over tail padding first, in case jumping to NewEnd invalidates the Blob
// pointer.
- JumpToBit(NewEnd);
+ if (Error Err = JumpToBit(NewEnd))
+ return std::move(Err);
const char *Ptr = (const char *)getPointerToBit(CurBitPos, NumElts);
// If we can return a reference to the data, do so to avoid copying it.
@@ -282,19 +372,35 @@ unsigned BitstreamCursor::readRecord(unsigned AbbrevID,
return Code;
}
-void BitstreamCursor::ReadAbbrevRecord() {
+Error BitstreamCursor::ReadAbbrevRecord() {
auto Abbv = std::make_shared<BitCodeAbbrev>();
- unsigned NumOpInfo = ReadVBR(5);
+ Expected<uint32_t> MaybeNumOpInfo = ReadVBR(5);
+ if (!MaybeNumOpInfo)
+ return MaybeNumOpInfo.takeError();
+ unsigned NumOpInfo = MaybeNumOpInfo.get();
for (unsigned i = 0; i != NumOpInfo; ++i) {
- bool IsLiteral = Read(1);
+ Expected<word_t> MaybeIsLiteral = Read(1);
+ if (!MaybeIsLiteral)
+ return MaybeIsLiteral.takeError();
+ bool IsLiteral = MaybeIsLiteral.get();
if (IsLiteral) {
- Abbv->Add(BitCodeAbbrevOp(ReadVBR64(8)));
+ Expected<uint64_t> MaybeOp = ReadVBR64(8);
+ if (!MaybeOp)
+ return MaybeOp.takeError();
+ Abbv->Add(BitCodeAbbrevOp(MaybeOp.get()));
continue;
}
- BitCodeAbbrevOp::Encoding E = (BitCodeAbbrevOp::Encoding)Read(3);
+ Expected<word_t> MaybeEncoding = Read(3);
+ if (!MaybeEncoding)
+ return MaybeEncoding.takeError();
+ BitCodeAbbrevOp::Encoding E =
+ (BitCodeAbbrevOp::Encoding)MaybeEncoding.get();
if (BitCodeAbbrevOp::hasEncodingData(E)) {
- uint64_t Data = ReadVBR64(5);
+ Expected<uint64_t> MaybeData = ReadVBR64(5);
+ if (!MaybeData)
+ return MaybeData.takeError();
+ uint64_t Data = MaybeData.get();
// As a special case, handle fixed(0) (i.e., a fixed field with zero bits)
// and vbr(0) as a literal zero. This is decoded the same way, and avoids
@@ -318,11 +424,14 @@ void BitstreamCursor::ReadAbbrevRecord() {
if (Abbv->getNumOperandInfos() == 0)
report_fatal_error("Abbrev record with no operands");
CurAbbrevs.push_back(std::move(Abbv));
+
+ return Error::success();
}
-Optional<BitstreamBlockInfo>
+Expected<Optional<BitstreamBlockInfo>>
BitstreamCursor::ReadBlockInfoBlock(bool ReadBlockInfoNames) {
- if (EnterSubBlock(bitc::BLOCKINFO_BLOCK_ID)) return None;
+ if (llvm::Error Err = EnterSubBlock(bitc::BLOCKINFO_BLOCK_ID))
+ return std::move(Err);
BitstreamBlockInfo NewBlockInfo;
@@ -331,7 +440,11 @@ BitstreamCursor::ReadBlockInfoBlock(bool ReadBlockInfoNames) {
// Read all the records for this module.
while (true) {
- BitstreamEntry Entry = advanceSkippingSubblocks(AF_DontAutoprocessAbbrevs);
+ Expected<BitstreamEntry> MaybeEntry =
+ advanceSkippingSubblocks(AF_DontAutoprocessAbbrevs);
+ if (!MaybeEntry)
+ return MaybeEntry.takeError();
+ BitstreamEntry Entry = MaybeEntry.get();
switch (Entry.Kind) {
case llvm::BitstreamEntry::SubBlock: // Handled for us already.
@@ -347,7 +460,8 @@ BitstreamCursor::ReadBlockInfoBlock(bool ReadBlockInfoNames) {
// Read abbrev records, associate them with CurBID.
if (Entry.ID == bitc::DEFINE_ABBREV) {
if (!CurBlockInfo) return None;
- ReadAbbrevRecord();
+ if (Error Err = ReadAbbrevRecord())
+ return std::move(Err);
// ReadAbbrevRecord installs the abbrev in CurAbbrevs. Move it to the
// appropriate BlockInfo.
@@ -358,22 +472,28 @@ BitstreamCursor::ReadBlockInfoBlock(bool ReadBlockInfoNames) {
// Read a record.
Record.clear();
- switch (readRecord(Entry.ID, Record)) {
- default: break; // Default behavior, ignore unknown content.
- case bitc::BLOCKINFO_CODE_SETBID:
- if (Record.size() < 1) return None;
- CurBlockInfo = &NewBlockInfo.getOrCreateBlockInfo((unsigned)Record[0]);
- break;
- case bitc::BLOCKINFO_CODE_BLOCKNAME: {
- if (!CurBlockInfo) return None;
- if (!ReadBlockInfoNames)
- break; // Ignore name.
- std::string Name;
- for (unsigned i = 0, e = Record.size(); i != e; ++i)
- Name += (char)Record[i];
- CurBlockInfo->Name = Name;
- break;
- }
+ Expected<unsigned> MaybeBlockInfo = readRecord(Entry.ID, Record);
+ if (!MaybeBlockInfo)
+ return MaybeBlockInfo.takeError();
+ switch (MaybeBlockInfo.get()) {
+ default:
+ break; // Default behavior, ignore unknown content.
+ case bitc::BLOCKINFO_CODE_SETBID:
+ if (Record.size() < 1)
+ return None;
+ CurBlockInfo = &NewBlockInfo.getOrCreateBlockInfo((unsigned)Record[0]);
+ break;
+ case bitc::BLOCKINFO_CODE_BLOCKNAME: {
+ if (!CurBlockInfo)
+ return None;
+ if (!ReadBlockInfoNames)
+ break; // Ignore name.
+ std::string Name;
+ for (unsigned i = 0, e = Record.size(); i != e; ++i)
+ Name += (char)Record[i];
+ CurBlockInfo->Name = Name;
+ break;
+ }
case bitc::BLOCKINFO_CODE_SETRECORDNAME: {
if (!CurBlockInfo) return None;
if (!ReadBlockInfoNames)
@@ -385,6 +505,6 @@ BitstreamCursor::ReadBlockInfoBlock(bool ReadBlockInfoNames) {
Name));
break;
}
- }
+ }
}
}
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index 632ea8e9cdc4..444f618d8b8c 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -1,9 +1,8 @@
//===- AggressiveAntiDepBreaker.cpp - Anti-dep breaker --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.h b/lib/CodeGen/AggressiveAntiDepBreaker.h
index 5dce3c2499e5..0cf2e6d78f7f 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.h
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.h
@@ -1,9 +1,8 @@
//==- llvm/CodeGen/AggressiveAntiDepBreaker.h - Anti-Dep Support -*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/AllocationOrder.cpp b/lib/CodeGen/AllocationOrder.cpp
index 37dcb0be824e..c99800659bfd 100644
--- a/lib/CodeGen/AllocationOrder.cpp
+++ b/lib/CodeGen/AllocationOrder.cpp
@@ -1,9 +1,8 @@
//===-- llvm/CodeGen/AllocationOrder.cpp - Allocation Order ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/AllocationOrder.h b/lib/CodeGen/AllocationOrder.h
index 467bcc2edc6f..9247dd844936 100644
--- a/lib/CodeGen/AllocationOrder.h
+++ b/lib/CodeGen/AllocationOrder.h
@@ -1,9 +1,8 @@
//===-- llvm/CodeGen/AllocationOrder.h - Allocation Order -*- C++ -*-------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp
index 797f05ee5cf3..d158e70b86ac 100644
--- a/lib/CodeGen/Analysis.cpp
+++ b/lib/CodeGen/Analysis.cpp
@@ -1,9 +1,8 @@
//===-- Analysis.cpp - CodeGen LLVM IR Analysis Utilities -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -83,6 +82,7 @@ unsigned llvm::ComputeLinearIndex(Type *Ty,
///
void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL,
Type *Ty, SmallVectorImpl<EVT> &ValueVTs,
+ SmallVectorImpl<EVT> *MemVTs,
SmallVectorImpl<uint64_t> *Offsets,
uint64_t StartingOffset) {
// Given a struct type, recursively traverse the elements.
@@ -92,7 +92,7 @@ void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL,
EI = EB,
EE = STy->element_end();
EI != EE; ++EI)
- ComputeValueVTs(TLI, DL, *EI, ValueVTs, Offsets,
+ ComputeValueVTs(TLI, DL, *EI, ValueVTs, MemVTs, Offsets,
StartingOffset + SL->getElementOffset(EI - EB));
return;
}
@@ -101,7 +101,7 @@ void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL,
Type *EltTy = ATy->getElementType();
uint64_t EltSize = DL.getTypeAllocSize(EltTy);
for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)
- ComputeValueVTs(TLI, DL, EltTy, ValueVTs, Offsets,
+ ComputeValueVTs(TLI, DL, EltTy, ValueVTs, MemVTs, Offsets,
StartingOffset + i * EltSize);
return;
}
@@ -110,10 +110,50 @@ void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL,
return;
// Base case: we can get an EVT for this LLVM IR type.
ValueVTs.push_back(TLI.getValueType(DL, Ty));
+ if (MemVTs)
+ MemVTs->push_back(TLI.getMemValueType(DL, Ty));
if (Offsets)
Offsets->push_back(StartingOffset);
}
+void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL,
+ Type *Ty, SmallVectorImpl<EVT> &ValueVTs,
+ SmallVectorImpl<uint64_t> *Offsets,
+ uint64_t StartingOffset) {
+ return ComputeValueVTs(TLI, DL, Ty, ValueVTs, /*MemVTs=*/nullptr, Offsets,
+ StartingOffset);
+}
+
+void llvm::computeValueLLTs(const DataLayout &DL, Type &Ty,
+ SmallVectorImpl<LLT> &ValueTys,
+ SmallVectorImpl<uint64_t> *Offsets,
+ uint64_t StartingOffset) {
+ // Given a struct type, recursively traverse the elements.
+ if (StructType *STy = dyn_cast<StructType>(&Ty)) {
+ const StructLayout *SL = DL.getStructLayout(STy);
+ for (unsigned I = 0, E = STy->getNumElements(); I != E; ++I)
+ computeValueLLTs(DL, *STy->getElementType(I), ValueTys, Offsets,
+ StartingOffset + SL->getElementOffset(I));
+ return;
+ }
+ // Given an array type, recursively traverse the elements.
+ if (ArrayType *ATy = dyn_cast<ArrayType>(&Ty)) {
+ Type *EltTy = ATy->getElementType();
+ uint64_t EltSize = DL.getTypeAllocSize(EltTy);
+ for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)
+ computeValueLLTs(DL, *EltTy, ValueTys, Offsets,
+ StartingOffset + i * EltSize);
+ return;
+ }
+ // Interpret void as zero return values.
+ if (Ty.isVoidTy())
+ return;
+ // Base case: we can get an LLT for this LLVM IR type.
+ ValueTys.push_back(getLLTForType(Ty, DL));
+ if (Offsets != nullptr)
+ Offsets->push_back(StartingOffset * 8);
+}
+
/// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V.
GlobalValue *llvm::ExtractTypeInfo(Value *V) {
V = V->stripPointerCasts();
diff --git a/lib/CodeGen/AntiDepBreaker.h b/lib/CodeGen/AntiDepBreaker.h
index d93716287981..b11148595136 100644
--- a/lib/CodeGen/AntiDepBreaker.h
+++ b/lib/CodeGen/AntiDepBreaker.h
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/AntiDepBreaker.h - Anti-Dependence Breaking -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/AsmPrinter/ARMException.cpp b/lib/CodeGen/AsmPrinter/ARMException.cpp
index 9011f025f595..f6ef85a5b78f 100644
--- a/lib/CodeGen/AsmPrinter/ARMException.cpp
+++ b/lib/CodeGen/AsmPrinter/ARMException.cpp
@@ -1,9 +1,8 @@
//===-- CodeGen/AsmPrinter/ARMException.cpp - ARM EHABI Exception Impl ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/AsmPrinter/AccelTable.cpp b/lib/CodeGen/AsmPrinter/AccelTable.cpp
index 95875ccb8a0b..b1b7921ea976 100644
--- a/lib/CodeGen/AsmPrinter/AccelTable.cpp
+++ b/lib/CodeGen/AsmPrinter/AccelTable.cpp
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/AsmPrinter/AccelTable.cpp - Accelerator Tables --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -56,10 +55,10 @@ void AccelTableBase::finalize(AsmPrinter *Asm, StringRef Prefix) {
// Create the individual hash data outputs.
for (auto &E : Entries) {
// Unique the entries.
- std::stable_sort(E.second.Values.begin(), E.second.Values.end(),
- [](const AccelTableData *A, const AccelTableData *B) {
- return *A < *B;
- });
+ llvm::stable_sort(E.second.Values,
+ [](const AccelTableData *A, const AccelTableData *B) {
+ return *A < *B;
+ });
E.second.Values.erase(
std::unique(E.second.Values.begin(), E.second.Values.end()),
E.second.Values.end());
@@ -82,10 +81,9 @@ void AccelTableBase::finalize(AsmPrinter *Asm, StringRef Prefix) {
// Sort the contents of the buckets by hash value so that hash collisions end
// up together. Stable sort makes testing easier and doesn't cost much more.
for (auto &Bucket : Buckets)
- std::stable_sort(Bucket.begin(), Bucket.end(),
- [](HashData *LHS, HashData *RHS) {
- return LHS->HashValue < RHS->HashValue;
- });
+ llvm::stable_sort(Bucket, [](HashData *LHS, HashData *RHS) {
+ return LHS->HashValue < RHS->HashValue;
+ });
}
namespace {
@@ -557,8 +555,8 @@ void llvm::emitDWARF5AccelTable(
SmallVector<unsigned, 1> CUIndex(CUs.size());
int Count = 0;
for (const auto &CU : enumerate(CUs)) {
- if (CU.value()->getCUNode()->getNameTableKind() ==
- DICompileUnit::DebugNameTableKind::None)
+ if (CU.value()->getCUNode()->getNameTableKind() !=
+ DICompileUnit::DebugNameTableKind::Default)
continue;
CUIndex[CU.index()] = Count++;
assert(CU.index() == CU.value()->getUniqueID());
@@ -616,30 +614,10 @@ void AppleAccelTableStaticTypeData::emit(AsmPrinter *Asm) const {
Asm->emitInt32(QualifiedNameHash);
}
-#ifndef _MSC_VER
-// The lines below are rejected by older versions (TBD) of MSVC.
constexpr AppleAccelTableData::Atom AppleAccelTableTypeData::Atoms[];
constexpr AppleAccelTableData::Atom AppleAccelTableOffsetData::Atoms[];
constexpr AppleAccelTableData::Atom AppleAccelTableStaticOffsetData::Atoms[];
constexpr AppleAccelTableData::Atom AppleAccelTableStaticTypeData::Atoms[];
-#else
-// FIXME: Erase this path once the minimum MSCV version has been bumped.
-const SmallVector<AppleAccelTableData::Atom, 4>
- AppleAccelTableOffsetData::Atoms = {
- Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)};
-const SmallVector<AppleAccelTableData::Atom, 4> AppleAccelTableTypeData::Atoms =
- {Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4),
- Atom(dwarf::DW_ATOM_die_tag, dwarf::DW_FORM_data2),
- Atom(dwarf::DW_ATOM_type_flags, dwarf::DW_FORM_data1)};
-const SmallVector<AppleAccelTableData::Atom, 4>
- AppleAccelTableStaticOffsetData::Atoms = {
- Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)};
-const SmallVector<AppleAccelTableData::Atom, 4>
- AppleAccelTableStaticTypeData::Atoms = {
- Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4),
- Atom(dwarf::DW_ATOM_die_tag, dwarf::DW_FORM_data2),
- Atom(5, dwarf::DW_FORM_data1), Atom(6, dwarf::DW_FORM_data4)};
-#endif
#ifndef NDEBUG
void AppleAccelTableWriter::Header::print(raw_ostream &OS) const {
diff --git a/lib/CodeGen/AsmPrinter/AddressPool.cpp b/lib/CodeGen/AsmPrinter/AddressPool.cpp
index 042243b79259..f11c7de5ed8a 100644
--- a/lib/CodeGen/AsmPrinter/AddressPool.cpp
+++ b/lib/CodeGen/AsmPrinter/AddressPool.cpp
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/AddressPool.cpp - Dwarf Debug Framework ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -24,21 +23,24 @@ unsigned AddressPool::getIndex(const MCSymbol *Sym, bool TLS) {
return IterBool.first->second.Number;
}
-
-void AddressPool::emitHeader(AsmPrinter &Asm, MCSection *Section) {
+MCSymbol *AddressPool::emitHeader(AsmPrinter &Asm, MCSection *Section) {
static const uint8_t AddrSize = Asm.getDataLayout().getPointerSize();
- uint64_t Length = sizeof(uint16_t) // version
- + sizeof(uint8_t) // address_size
- + sizeof(uint8_t) // segment_selector_size
- + AddrSize * Pool.size(); // entries
+ StringRef Prefix = "debug_addr_";
+ MCSymbol *BeginLabel = Asm.createTempSymbol(Prefix + "start");
+ MCSymbol *EndLabel = Asm.createTempSymbol(Prefix + "end");
+
Asm.OutStreamer->AddComment("Length of contribution");
- Asm.emitInt32(Length); // TODO: Support DWARF64 format.
+ Asm.EmitLabelDifference(EndLabel, BeginLabel,
+ 4); // TODO: Support DWARF64 format.
+ Asm.OutStreamer->EmitLabel(BeginLabel);
Asm.OutStreamer->AddComment("DWARF version number");
Asm.emitInt16(Asm.getDwarfVersion());
Asm.OutStreamer->AddComment("Address size");
Asm.emitInt8(AddrSize);
Asm.OutStreamer->AddComment("Segment selector size");
Asm.emitInt8(0); // TODO: Support non-zero segment_selector_size.
+
+ return EndLabel;
}
// Emit addresses into the section given.
@@ -49,8 +51,10 @@ void AddressPool::emit(AsmPrinter &Asm, MCSection *AddrSection) {
// Start the dwarf addr section.
Asm.OutStreamer->SwitchSection(AddrSection);
+ MCSymbol *EndLabel = nullptr;
+
if (Asm.getDwarfVersion() >= 5)
- emitHeader(Asm, AddrSection);
+ EndLabel = emitHeader(Asm, AddrSection);
// Define the symbol that marks the start of the contribution.
// It is referenced via DW_AT_addr_base.
@@ -67,4 +71,7 @@ void AddressPool::emit(AsmPrinter &Asm, MCSection *AddrSection) {
for (const MCExpr *Entry : Entries)
Asm.OutStreamer->EmitValue(Entry, Asm.getDataLayout().getPointerSize());
+
+ if (EndLabel)
+ Asm.OutStreamer->EmitLabel(EndLabel);
}
diff --git a/lib/CodeGen/AsmPrinter/AddressPool.h b/lib/CodeGen/AsmPrinter/AddressPool.h
index 2209c7eb50ed..f92cf72093ca 100644
--- a/lib/CodeGen/AsmPrinter/AddressPool.h
+++ b/lib/CodeGen/AsmPrinter/AddressPool.h
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/AddressPool.h - Dwarf Debug Framework -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -55,7 +54,7 @@ public:
void setLabel(MCSymbol *Sym) { AddressTableBaseSym = Sym; }
private:
- void emitHeader(AsmPrinter &Asm, MCSection *Section);
+ MCSymbol *emitHeader(AsmPrinter &Asm, MCSection *Section);
/// Symbol designates the start of the contribution to the address table.
MCSymbol *AddressTableBaseSym = nullptr;
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 7070451e3330..54f6cc2d5571 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -1,9 +1,8 @@
//===- AsmPrinter.cpp - Common AsmPrinter code ----------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -35,7 +34,6 @@
#include "llvm/BinaryFormat/COFF.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/BinaryFormat/ELF.h"
-#include "llvm/CodeGen/AsmPrinterHandler.h"
#include "llvm/CodeGen/GCMetadata.h"
#include "llvm/CodeGen/GCMetadataPrinter.h"
#include "llvm/CodeGen/GCStrategy.h"
@@ -60,7 +58,6 @@
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
-#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Comdat.h"
#include "llvm/IR/Constant.h"
@@ -80,6 +77,7 @@
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
+#include "llvm/IR/RemarkStreamer.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/MC/MCAsmInfo.h"
@@ -101,6 +99,9 @@
#include "llvm/MC/MCValue.h"
#include "llvm/MC/SectionKind.h"
#include "llvm/Pass.h"
+#include "llvm/Remarks/Remark.h"
+#include "llvm/Remarks/RemarkFormat.h"
+#include "llvm/Remarks/RemarkStringTable.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
@@ -143,9 +144,10 @@ static const char *const CodeViewLineTablesGroupDescription =
STATISTIC(EmittedInsts, "Number of machine instrs printed");
-static cl::opt<bool>
- PrintSchedule("print-schedule", cl::Hidden, cl::init(false),
- cl::desc("Print 'sched: [latency:throughput]' in .s output"));
+static cl::opt<bool> EnableRemarksSection(
+ "remarks-section",
+ cl::desc("Emit a section containing remark diagnostics metadata"),
+ cl::init(false));
char AsmPrinter::ID = 0;
@@ -232,6 +234,12 @@ void AsmPrinter::EmitToStreamer(MCStreamer &S, const MCInst &Inst) {
S.EmitInstruction(Inst, getSubtargetInfo());
}
+void AsmPrinter::emitInitialRawDwarfLocDirective(const MachineFunction &MF) {
+ assert(DD && "Dwarf debug file is not defined.");
+ assert(OutStreamer->hasRawTextSupport() && "Expected assembly output mode.");
+ (void)DD->emitInitialLocDirective(MF, /*CUID=*/0);
+}
+
/// getCurrentSection() - Return the current section we are emitting to.
const MCSection *AsmPrinter::getCurrentSection() const {
return OutStreamer->getCurrentSectionOnly();
@@ -252,6 +260,9 @@ bool AsmPrinter::doInitialization(Module &M) {
const_cast<TargetLoweringObjectFile&>(getObjFileLowering())
.Initialize(OutContext, TM);
+ const_cast<TargetLoweringObjectFile &>(getObjFileLowering())
+ .getModuleMetadata(M);
+
OutStreamer->InitSections(false);
// Emit the version-min deployment target directive if needed.
@@ -300,16 +311,17 @@ bool AsmPrinter::doInitialization(Module &M) {
if (MAI->doesSupportDebugInformation()) {
bool EmitCodeView = MMI->getModule()->getCodeViewFlag();
if (EmitCodeView && TM.getTargetTriple().isOSWindows()) {
- Handlers.push_back(HandlerInfo(new CodeViewDebug(this),
- DbgTimerName, DbgTimerDescription,
- CodeViewLineTablesGroupName,
- CodeViewLineTablesGroupDescription));
+ Handlers.emplace_back(llvm::make_unique<CodeViewDebug>(this),
+ DbgTimerName, DbgTimerDescription,
+ CodeViewLineTablesGroupName,
+ CodeViewLineTablesGroupDescription);
}
if (!EmitCodeView || MMI->getModule()->getDwarfVersion()) {
DD = new DwarfDebug(this, &M);
DD->beginModule();
- Handlers.push_back(HandlerInfo(DD, DbgTimerName, DbgTimerDescription,
- DWARFGroupName, DWARFGroupDescription));
+ Handlers.emplace_back(std::unique_ptr<DwarfDebug>(DD), DbgTimerName,
+ DbgTimerDescription, DWARFGroupName,
+ DWARFGroupDescription);
}
}
@@ -362,14 +374,15 @@ bool AsmPrinter::doInitialization(Module &M) {
break;
}
if (ES)
- Handlers.push_back(HandlerInfo(ES, EHTimerName, EHTimerDescription,
- DWARFGroupName, DWARFGroupDescription));
+ Handlers.emplace_back(std::unique_ptr<EHStreamer>(ES), EHTimerName,
+ EHTimerDescription, DWARFGroupName,
+ DWARFGroupDescription);
if (mdconst::extract_or_null<ConstantInt>(
MMI->getModule()->getModuleFlag("cfguardtable")))
- Handlers.push_back(HandlerInfo(new WinCFGuard(this), CFGuardName,
- CFGuardDescription, DWARFGroupName,
- DWARFGroupDescription));
+ Handlers.emplace_back(llvm::make_unique<WinCFGuard>(this), CFGuardName,
+ CFGuardDescription, DWARFGroupName,
+ DWARFGroupDescription);
return false;
}
@@ -483,7 +496,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM);
const DataLayout &DL = GV->getParent()->getDataLayout();
- uint64_t Size = DL.getTypeAllocSize(GV->getType()->getElementType());
+ uint64_t Size = DL.getTypeAllocSize(GV->getValueType());
// If the alignment is specified, we *must* obey it. Overaligning a global
// with a specified alignment is a prompt way to break globals emitted to
@@ -658,6 +671,9 @@ void AsmPrinter::EmitFunctionHeader() {
if (MAI->hasDotTypeDotSizeDirective())
OutStreamer->EmitSymbolAttribute(CurrentFnSym, MCSA_ELF_TypeFunction);
+ if (F.hasFnAttribute(Attribute::Cold))
+ OutStreamer->EmitSymbolAttribute(CurrentFnSym, MCSA_Cold);
+
if (isVerbose()) {
F.printAsOperand(OutStreamer->GetCommentOS(),
/*PrintType=*/false, F.getParent());
@@ -738,74 +754,30 @@ void AsmPrinter::EmitFunctionEntryLabel() {
}
/// emitComments - Pretty-print comments for instructions.
-/// It returns true iff the sched comment was emitted.
-/// Otherwise it returns false.
-static bool emitComments(const MachineInstr &MI, raw_ostream &CommentOS,
- AsmPrinter *AP) {
+static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
const MachineFunction *MF = MI.getMF();
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
// Check for spills and reloads
- int FI;
-
- const MachineFrameInfo &MFI = MF->getFrameInfo();
- bool Commented = false;
-
- auto getSize =
- [&MFI](const SmallVectorImpl<const MachineMemOperand *> &Accesses) {
- unsigned Size = 0;
- for (auto A : Accesses)
- if (MFI.isSpillSlotObjectIndex(
- cast<FixedStackPseudoSourceValue>(A->getPseudoValue())
- ->getFrameIndex()))
- Size += A->getSize();
- return Size;
- };
// We assume a single instruction only has a spill or reload, not
// both.
- const MachineMemOperand *MMO;
- SmallVector<const MachineMemOperand *, 2> Accesses;
- if (TII->isLoadFromStackSlotPostFE(MI, FI)) {
- if (MFI.isSpillSlotObjectIndex(FI)) {
- MMO = *MI.memoperands_begin();
- CommentOS << MMO->getSize() << "-byte Reload";
- Commented = true;
- }
- } else if (TII->hasLoadFromStackSlot(MI, Accesses)) {
- if (auto Size = getSize(Accesses)) {
- CommentOS << Size << "-byte Folded Reload";
- Commented = true;
- }
- } else if (TII->isStoreToStackSlotPostFE(MI, FI)) {
- if (MFI.isSpillSlotObjectIndex(FI)) {
- MMO = *MI.memoperands_begin();
- CommentOS << MMO->getSize() << "-byte Spill";
- Commented = true;
- }
- } else if (TII->hasStoreToStackSlot(MI, Accesses)) {
- if (auto Size = getSize(Accesses)) {
- CommentOS << Size << "-byte Folded Spill";
- Commented = true;
- }
+ Optional<unsigned> Size;
+ if ((Size = MI.getRestoreSize(TII))) {
+ CommentOS << *Size << "-byte Reload\n";
+ } else if ((Size = MI.getFoldedRestoreSize(TII))) {
+ if (*Size)
+ CommentOS << *Size << "-byte Folded Reload\n";
+ } else if ((Size = MI.getSpillSize(TII))) {
+ CommentOS << *Size << "-byte Spill\n";
+ } else if ((Size = MI.getFoldedSpillSize(TII))) {
+ if (*Size)
+ CommentOS << *Size << "-byte Folded Spill\n";
}
// Check for spill-induced copies
- if (MI.getAsmPrinterFlag(MachineInstr::ReloadReuse)) {
- Commented = true;
- CommentOS << " Reload Reuse";
- }
-
- if (Commented) {
- if (AP->EnablePrintSchedInfo) {
- // If any comment was added above and we need sched info comment then add
- // this new comment just after the above comment w/o "\n" between them.
- CommentOS << " " << MF->getSubtarget().getSchedInfoStr(MI) << "\n";
- return true;
- }
- CommentOS << "\n";
- }
- return false;
+ if (MI.getAsmPrinterFlag(MachineInstr::ReloadReuse))
+ CommentOS << " Reload Reuse\n";
}
/// emitImplicitDef - This method emits the specified machine instruction
@@ -1093,10 +1065,8 @@ void AsmPrinter::EmitFunctionBody() {
}
}
- if (isVerbose() && emitComments(MI, OutStreamer->GetCommentOS(), this)) {
- MachineInstr *MIP = const_cast<MachineInstr *>(&MI);
- MIP->setAsmPrinterFlag(MachineInstr::NoSchedComment);
- }
+ if (isVerbose())
+ emitComments(MI, OutStreamer->GetCommentOS());
switch (MI.getOpcode()) {
case TargetOpcode::CFI_INSTRUCTION:
@@ -1105,11 +1075,13 @@ void AsmPrinter::EmitFunctionBody() {
case TargetOpcode::LOCAL_ESCAPE:
emitFrameAlloc(MI);
break;
+ case TargetOpcode::ANNOTATION_LABEL:
case TargetOpcode::EH_LABEL:
case TargetOpcode::GC_LABEL:
OutStreamer->EmitLabel(MI.getOperand(0).getMCSymbol());
break;
case TargetOpcode::INLINEASM:
+ case TargetOpcode::INLINEASM_BR:
EmitInlineAsm(&MI);
break;
case TargetOpcode::DBG_VALUE:
@@ -1266,7 +1238,7 @@ static bool isGOTEquivalentCandidate(const GlobalVariable *GV,
// GlobalVariable or Function, i.e., as GlobalValue.
if (!GV->hasGlobalUnnamedAddr() || !GV->hasInitializer() ||
!GV->isConstant() || !GV->isDiscardableIfUnused() ||
- !dyn_cast<GlobalValue>(GV->getOperand(0)))
+ !isa<GlobalValue>(GV->getOperand(0)))
return false;
// To be a got equivalent, at least one of its users need to be a constant
@@ -1329,9 +1301,19 @@ void AsmPrinter::emitGlobalIndirectSymbol(Module &M,
else
assert(GIS.hasLocalLinkage() && "Invalid alias or ifunc linkage");
+ bool IsFunction = GIS.getValueType()->isFunctionTy();
+
+ // Treat bitcasts of functions as functions also. This is important at least
+ // on WebAssembly where object and function addresses can't alias each other.
+ if (!IsFunction)
+ if (auto *CE = dyn_cast<ConstantExpr>(GIS.getIndirectSymbol()))
+ if (CE->getOpcode() == Instruction::BitCast)
+ IsFunction =
+ CE->getOperand(0)->getType()->getPointerElementType()->isFunctionTy();
+
// Set the symbol type to function if the alias has a function type.
// This affects codegen when the aliasee is not a function.
- if (GIS.getType()->getPointerElementType()->isFunctionTy()) {
+ if (IsFunction) {
OutStreamer->EmitSymbolAttribute(Name, MCSA_ELF_TypeFunction);
if (isa<GlobalIFunc>(GIS))
OutStreamer->EmitSymbolAttribute(Name, MCSA_ELF_TypeIndFunction);
@@ -1363,6 +1345,66 @@ void AsmPrinter::emitGlobalIndirectSymbol(Module &M,
}
}
+void AsmPrinter::emitRemarksSection(Module &M) {
+ RemarkStreamer *RS = M.getContext().getRemarkStreamer();
+ if (!RS)
+ return;
+ const remarks::Serializer &Serializer = RS->getSerializer();
+
+ // Switch to the right section: .remarks/__remarks.
+ MCSection *RemarksSection =
+ OutContext.getObjectFileInfo()->getRemarksSection();
+ OutStreamer->SwitchSection(RemarksSection);
+
+ // Emit the magic number.
+ OutStreamer->EmitBytes(remarks::Magic);
+ // Explicitly emit a '\0'.
+ OutStreamer->EmitIntValue(/*Value=*/0, /*Size=*/1);
+
+ // Emit the version number: little-endian uint64_t.
+ // The version number is located at the offset 0x0 in the section.
+ std::array<char, 8> Version;
+ support::endian::write64le(Version.data(), remarks::Version);
+ OutStreamer->EmitBinaryData(StringRef(Version.data(), Version.size()));
+
+ // Emit the string table in the section.
+ // Note: we need to use the streamer here to emit it in the section. We can't
+ // just use the serialize function with a raw_ostream because of the way
+ // MCStreamers work.
+ uint64_t StrTabSize =
+ Serializer.StrTab ? Serializer.StrTab->SerializedSize : 0;
+ // Emit the total size of the string table (the size itself excluded):
+ // little-endian uint64_t.
+ // The total size is located after the version number.
+ // Note: even if no string table is used, emit 0.
+ std::array<char, 8> StrTabSizeBuf;
+ support::endian::write64le(StrTabSizeBuf.data(), StrTabSize);
+ OutStreamer->EmitBinaryData(
+ StringRef(StrTabSizeBuf.data(), StrTabSizeBuf.size()));
+
+ if (const Optional<remarks::StringTable> &StrTab = Serializer.StrTab) {
+ std::vector<StringRef> StrTabStrings = StrTab->serialize();
+ // Emit a list of null-terminated strings.
+ // Note: the order is important here: the ID used in the remarks corresponds
+ // to the position of the string in the section.
+ for (StringRef Str : StrTabStrings) {
+ OutStreamer->EmitBytes(Str);
+ // Explicitly emit a '\0'.
+ OutStreamer->EmitIntValue(/*Value=*/0, /*Size=*/1);
+ }
+ }
+
+ // Emit the null-terminated absolute path to the remark file.
+ // The path is located at the offset 0x4 in the section.
+ StringRef FilenameRef = RS->getFilename();
+ SmallString<128> Filename = FilenameRef;
+ sys::fs::make_absolute(Filename);
+ assert(!Filename.empty() && "The filename can't be empty.");
+ OutStreamer->EmitBytes(Filename);
+ // Explicitly emit a '\0'.
+ OutStreamer->EmitIntValue(/*Value=*/0, /*Size=*/1);
+}
+
bool AsmPrinter::doFinalization(Module &M) {
// Set the MachineFunction to nullptr so that we can catch attempted
// accesses to MF specific features at the module level and so that
@@ -1394,6 +1436,12 @@ bool AsmPrinter::doFinalization(Module &M) {
EmitVisibility(Name, V, false);
}
+ // Emit the remarks section contents.
+ // FIXME: Figure out when is the safest time to emit this section. It should
+ // not come after debug info.
+ if (EnableRemarksSection)
+ emitRemarksSection(M);
+
const TargetLoweringObjectFile &TLOF = getObjFileLowering();
TLOF.emitModuleMetadata(*OutStreamer, M);
@@ -1448,7 +1496,6 @@ bool AsmPrinter::doFinalization(Module &M) {
NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName,
HI.TimerGroupDescription, TimePassesIsEnabled);
HI.Handler->endModule();
- delete HI.Handler;
}
Handlers.clear();
DD = nullptr;
@@ -1592,6 +1639,24 @@ bool AsmPrinter::doFinalization(Module &M) {
OutStreamer->EmitAddrsigSym(getSymbol(&GV));
}
+ // Emit symbol partition specifications (ELF only).
+ if (TM.getTargetTriple().isOSBinFormatELF()) {
+ unsigned UniqueID = 0;
+ for (const GlobalValue &GV : M.global_values()) {
+ if (!GV.hasPartition() || GV.isDeclarationForLinker() ||
+ GV.getVisibility() != GlobalValue::DefaultVisibility)
+ continue;
+
+ OutStreamer->SwitchSection(OutContext.getELFSection(
+ ".llvm_sympart", ELF::SHT_LLVM_SYMPART, 0, 0, "", ++UniqueID));
+ OutStreamer->EmitBytes(GV.getPartition());
+ OutStreamer->EmitZeros(1);
+ OutStreamer->EmitValue(
+ MCSymbolRefExpr::create(getSymbol(&GV), OutContext),
+ MAI->getCodePointerSize());
+ }
+ }
+
// Allow the target to emit any magic that it wants at the end of the file,
// after everything else has gone out.
EmitEndOfAsmFile(M);
@@ -1628,11 +1693,6 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
}
ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
-
- const TargetSubtargetInfo &STI = MF.getSubtarget();
- EnablePrintSchedInfo = PrintSchedule.getNumOccurrences()
- ? PrintSchedule
- : STI.supportPrintSchedInfo();
}
namespace {
@@ -1905,8 +1965,7 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
}
/// EmitLLVMUsedList - For targets that define a MAI::UsedDirective, mark each
-/// global in the specified llvm.used list for which emitUsedDirectiveFor
-/// is true, as being used with this directive.
+/// global in the specified llvm.used list.
void AsmPrinter::EmitLLVMUsedList(const ConstantArray *InitList) {
// Should be an array of 'i8*'.
for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
@@ -1933,7 +1992,7 @@ struct Structor {
/// priority.
void AsmPrinter::EmitXXStructorList(const DataLayout &DL, const Constant *List,
bool isCtor) {
- // Should be an array of '{ int, void ()* }' structs. The first value is the
+ // Should be an array of '{ i32, void ()*, i8* }' structs. The first value is the
// init priority.
if (!isa<ConstantArray>(List)) return;
@@ -1941,12 +2000,10 @@ void AsmPrinter::EmitXXStructorList(const DataLayout &DL, const Constant *List,
const ConstantArray *InitList = dyn_cast<ConstantArray>(List);
if (!InitList) return; // Not an array!
StructType *ETy = dyn_cast<StructType>(InitList->getType()->getElementType());
- // FIXME: Only allow the 3-field form in LLVM 4.0.
- if (!ETy || ETy->getNumElements() < 2 || ETy->getNumElements() > 3)
- return; // Not an array of two or three elements!
- if (!isa<IntegerType>(ETy->getTypeAtIndex(0U)) ||
- !isa<PointerType>(ETy->getTypeAtIndex(1U))) return; // Not (int, ptr).
- if (ETy->getNumElements() == 3 && !isa<PointerType>(ETy->getTypeAtIndex(2U)))
+ if (!ETy || ETy->getNumElements() != 3 ||
+ !isa<IntegerType>(ETy->getTypeAtIndex(0U)) ||
+ !isa<PointerType>(ETy->getTypeAtIndex(1U)) ||
+ !isa<PointerType>(ETy->getTypeAtIndex(2U)))
return; // Not (int, ptr, ptr).
// Gather the structors in a form that's convenient for sorting by priority.
@@ -1962,16 +2019,16 @@ void AsmPrinter::EmitXXStructorList(const DataLayout &DL, const Constant *List,
Structor &S = Structors.back();
S.Priority = Priority->getLimitedValue(65535);
S.Func = CS->getOperand(1);
- if (ETy->getNumElements() == 3 && !CS->getOperand(2)->isNullValue())
+ if (!CS->getOperand(2)->isNullValue())
S.ComdatKey =
dyn_cast<GlobalValue>(CS->getOperand(2)->stripPointerCasts());
}
// Emit the function pointers in the target-specific order
unsigned Align = Log2_32(DL.getPointerPrefAlignment());
- std::stable_sort(Structors.begin(), Structors.end(),
- [](const Structor &L,
- const Structor &R) { return L.Priority < R.Priority; });
+ llvm::stable_sort(Structors, [](const Structor &L, const Structor &R) {
+ return L.Priority < R.Priority;
+ });
for (Structor &S : Structors) {
const TargetLoweringObjectFile &Obj = getObjFileLowering();
const MCSymbol *KeySym = nullptr;
@@ -2199,7 +2256,10 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
// We can emit the pointer value into this slot if the slot is an
// integer slot equal to the size of the pointer.
- if (DL.getTypeAllocSize(Ty) == DL.getTypeAllocSize(Op->getType()))
+ //
+ // If the pointer is larger than the resultant integer, then
+ // as with Trunc just depend on the assembler to truncate it.
+ if (DL.getTypeAllocSize(Ty) <= DL.getTypeAllocSize(Op->getType()))
return OpExpr;
// Otherwise the pointer is smaller than the resultant integer, mask off
@@ -2740,7 +2800,7 @@ MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BasicBlock *BB) const {
/// GetCPISymbol - Return the symbol for the specified constant pool entry.
MCSymbol *AsmPrinter::GetCPISymbol(unsigned CPID) const {
- if (getSubtargetInfo().getTargetTriple().isKnownWindowsMSVCEnvironment()) {
+ if (getSubtargetInfo().getTargetTriple().isWindowsMSVCEnvironment()) {
const MachineConstantPoolEntry &CPE =
MF->getConstantPool()->getConstants()[CPID];
if (!CPE.isMachineConstantPoolEntry()) {
@@ -2858,7 +2918,7 @@ void AsmPrinter::setupCodePaddingContext(const MachineBasicBlock &MBB,
MCCodePaddingContext &Context) const {
assert(MF != nullptr && "Machine function must be valid");
Context.IsPaddingActive = !MF->hasInlineAsm() &&
- !MF->getFunction().optForSize() &&
+ !MF->getFunction().hasOptSize() &&
TM.getOptLevel() != CodeGenOpt::None;
Context.IsBasicBlockReachableViaFallthrough =
std::find(MBB.pred_begin(), MBB.pred_end(), MBB.getPrevNode()) !=
@@ -2918,13 +2978,16 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const {
// Print the main label for the block.
if (MBB.pred_empty() ||
- (isBlockOnlyReachableByFallthrough(&MBB) && !MBB.isEHFuncletEntry())) {
+ (isBlockOnlyReachableByFallthrough(&MBB) && !MBB.isEHFuncletEntry() &&
+ !MBB.hasLabelMustBeEmitted())) {
if (isVerbose()) {
// NOTE: Want this comment at start of line, don't emit with AddComment.
OutStreamer->emitRawComment(" %bb." + Twine(MBB.getNumber()) + ":",
false);
}
} else {
+ if (isVerbose() && MBB.hasLabelMustBeEmitted())
+ OutStreamer->AddComment("Label of block must be emitted");
OutStreamer->EmitLabel(MBB.getSymbol());
}
}
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index afce3ad3133b..992e44d95306 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -1,9 +1,8 @@
//===-- AsmPrinterDwarf.cpp - AsmPrinter Dwarf Support --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -19,6 +18,7 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCStreamer.h"
@@ -43,11 +43,11 @@ void AsmPrinter::EmitSLEB128(int64_t Value, const char *Desc) const {
OutStreamer->EmitSLEB128IntValue(Value);
}
-void AsmPrinter::EmitULEB128(uint64_t Value, const char *Desc) const {
+void AsmPrinter::EmitULEB128(uint64_t Value, const char *Desc, unsigned PadTo) const {
if (isVerbose() && Desc)
OutStreamer->AddComment(Desc);
- OutStreamer->EmitULEB128IntValue(Value);
+ OutStreamer->EmitULEB128IntValue(Value, PadTo);
}
/// Emit something like ".uleb128 Hi-Lo".
@@ -183,6 +183,25 @@ void AsmPrinter::EmitDwarfOffset(const MCSymbol *Label, uint64_t Offset) const {
EmitLabelPlusOffset(Label, Offset, MAI->getCodePointerSize());
}
+void AsmPrinter::EmitCallSiteOffset(const MCSymbol *Hi,
+ const MCSymbol *Lo,
+ unsigned Encoding) const {
+ // The least significant 3 bits specify the width of the encoding
+ if ((Encoding & 0x7) == dwarf::DW_EH_PE_uleb128)
+ EmitLabelDifferenceAsULEB128(Hi, Lo);
+ else
+ EmitLabelDifference(Hi, Lo, GetSizeOfEncodedValue(Encoding));
+}
+
+void AsmPrinter::EmitCallSiteValue(uint64_t Value,
+ unsigned Encoding) const {
+ // The least significant 3 bits specify the width of the encoding
+ if ((Encoding & 0x7) == dwarf::DW_EH_PE_uleb128)
+ EmitULEB128(Value);
+ else
+ OutStreamer->EmitIntValue(Value, GetSizeOfEncodedValue(Encoding));
+}
+
//===----------------------------------------------------------------------===//
// Dwarf Lowering Routines
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index 62103e3107c0..7721e996aca5 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -1,9 +1,8 @@
//===-- AsmPrinterInlineAsm.cpp - AsmPrinter Inline Asm Handling ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -19,7 +18,6 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
-#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/InlineAsm.h"
@@ -155,15 +153,10 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
" we don't have an asm parser for this target\n");
Parser->setAssemblerDialect(Dialect);
Parser->setTargetParser(*TAP.get());
- Parser->setEnablePrintSchedInfo(EnablePrintSchedInfo);
// Enable lexing Masm binary and hex integer literals in intel inline
// assembly.
if (Dialect == InlineAsm::AD_Intel)
Parser->getLexer().setLexMasmIntegers(true);
- if (MF) {
- const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
- TAP->SetFrameRegister(TRI->getFrameRegister(*MF));
- }
emitInlineAsmStart();
// Don't implicitly switch to the text section before the asm.
@@ -176,9 +169,8 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
}
static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
- MachineModuleInfo *MMI, int InlineAsmVariant,
- AsmPrinter *AP, unsigned LocCookie,
- raw_ostream &OS) {
+ MachineModuleInfo *MMI, AsmPrinter *AP,
+ unsigned LocCookie, raw_ostream &OS) {
// Switch to the inline assembly variant.
OS << "\t.intel_syntax\n\t";
@@ -270,11 +262,9 @@ static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
++OpNo; // Skip over the ID number.
if (InlineAsm::isMemKind(OpFlags)) {
- Error = AP->PrintAsmMemoryOperand(MI, OpNo, InlineAsmVariant,
- /*Modifier*/ nullptr, OS);
+ Error = AP->PrintAsmMemoryOperand(MI, OpNo, /*Modifier*/ nullptr, OS);
} else {
- Error = AP->PrintAsmOperand(MI, OpNo, InlineAsmVariant,
- /*Modifier*/ nullptr, OS);
+ Error = AP->PrintAsmOperand(MI, OpNo, /*Modifier*/ nullptr, OS);
}
}
if (Error) {
@@ -291,9 +281,9 @@ static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
}
static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
- MachineModuleInfo *MMI, int InlineAsmVariant,
- int AsmPrinterVariant, AsmPrinter *AP,
- unsigned LocCookie, raw_ostream &OS) {
+ MachineModuleInfo *MMI, int AsmPrinterVariant,
+ AsmPrinter *AP, unsigned LocCookie,
+ raw_ostream &OS) {
int CurVariant = -1; // The number of the {.|.|.} region we are in.
const char *LastEmitted = AsmStr; // One past the last character emitted.
unsigned NumOperands = MI->getNumOperands();
@@ -435,17 +425,25 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
unsigned OpFlags = MI->getOperand(OpNo).getImm();
++OpNo; // Skip over the ID number.
+ // FIXME: Shouldn't arch-independent output template handling go into
+ // PrintAsmOperand?
if (Modifier[0] == 'l') { // Labels are target independent.
- // FIXME: What if the operand isn't an MBB, report error?
- const MCSymbol *Sym = MI->getOperand(OpNo).getMBB()->getSymbol();
- Sym->print(OS, AP->MAI);
+ if (MI->getOperand(OpNo).isBlockAddress()) {
+ const BlockAddress *BA = MI->getOperand(OpNo).getBlockAddress();
+ MCSymbol *Sym = AP->GetBlockAddressSymbol(BA);
+ Sym->print(OS, AP->MAI);
+ } else if (MI->getOperand(OpNo).isMBB()) {
+ const MCSymbol *Sym = MI->getOperand(OpNo).getMBB()->getSymbol();
+ Sym->print(OS, AP->MAI);
+ } else {
+ Error = true;
+ }
} else {
if (InlineAsm::isMemKind(OpFlags)) {
- Error = AP->PrintAsmMemoryOperand(MI, OpNo, InlineAsmVariant,
- Modifier[0] ? Modifier : nullptr,
- OS);
+ Error = AP->PrintAsmMemoryOperand(
+ MI, OpNo, Modifier[0] ? Modifier : nullptr, OS);
} else {
- Error = AP->PrintAsmOperand(MI, OpNo, InlineAsmVariant,
+ Error = AP->PrintAsmOperand(MI, OpNo,
Modifier[0] ? Modifier : nullptr, OS);
}
}
@@ -515,18 +513,11 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
// The variant of the current asmprinter.
int AsmPrinterVariant = MAI->getAssemblerDialect();
- InlineAsm::AsmDialect InlineAsmVariant = MI->getInlineAsmDialect();
AsmPrinter *AP = const_cast<AsmPrinter*>(this);
- if (InlineAsmVariant == InlineAsm::AD_ATT)
- EmitGCCInlineAsmStr(AsmStr, MI, MMI, InlineAsmVariant, AsmPrinterVariant,
- AP, LocCookie, OS);
+ if (MI->getInlineAsmDialect() == InlineAsm::AD_ATT)
+ EmitGCCInlineAsmStr(AsmStr, MI, MMI, AsmPrinterVariant, AP, LocCookie, OS);
else
- EmitMSInlineAsmStr(AsmStr, MI, MMI, InlineAsmVariant, AP, LocCookie, OS);
-
- // Reset SanitizeAddress based on the function's attribute.
- MCTargetOptions MCOptions = TM.Options.MCOptions;
- MCOptions.SanitizeAddress =
- MF->getFunction().hasFnAttribute(Attribute::SanitizeAddress);
+ EmitMSInlineAsmStr(AsmStr, MI, MMI, AP, LocCookie, OS);
// Emit warnings if we use reserved registers on the clobber list, as
// that might give surprising results.
@@ -566,7 +557,7 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
SrcMgr.PrintMessage(Loc, SourceMgr::DK_Note, Note);
}
- EmitInlineAsm(OS.str(), getSubtargetInfo(), MCOptions, LocMD,
+ EmitInlineAsm(OS.str(), getSubtargetInfo(), TM.Options.MCOptions, LocMD,
MI->getInlineAsmDialect());
// Emit the #NOAPP end marker. This has to happen even if verbose-asm isn't
@@ -608,32 +599,50 @@ void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS,
}
}
+void AsmPrinter::PrintSymbolOperand(const MachineOperand &MO, raw_ostream &OS) {
+ assert(MO.isGlobal() && "caller should check MO.isGlobal");
+ getSymbol(MO.getGlobal())->print(OS, MAI);
+ printOffset(MO.getOffset(), OS);
+}
+
/// PrintAsmOperand - Print the specified operand of MI, an INLINEASM
/// instruction, using the specified assembler variant. Targets should
-/// override this to format as appropriate.
+/// override this to format as appropriate for machine specific ExtraCodes
+/// or when the arch-independent handling would be too complex otherwise.
bool AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
- unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &O) {
+ const char *ExtraCode, raw_ostream &O) {
// Does this asm operand have a single letter operand modifier?
if (ExtraCode && ExtraCode[0]) {
if (ExtraCode[1] != 0) return true; // Unknown modifier.
+ // https://gcc.gnu.org/onlinedocs/gccint/Output-Template.html
const MachineOperand &MO = MI->getOperand(OpNo);
switch (ExtraCode[0]) {
default:
return true; // Unknown modifier.
+ case 'a': // Print as memory address.
+ if (MO.isReg()) {
+ PrintAsmMemoryOperand(MI, OpNo, nullptr, O);
+ return false;
+ }
+ LLVM_FALLTHROUGH; // GCC allows '%a' to behave like '%c' with immediates.
case 'c': // Substitute immediate value without immediate syntax
- if (MO.getType() != MachineOperand::MO_Immediate)
- return true;
- O << MO.getImm();
- return false;
+ if (MO.isImm()) {
+ O << MO.getImm();
+ return false;
+ }
+ if (MO.isGlobal()) {
+ PrintSymbolOperand(MO, O);
+ return false;
+ }
+ return true;
case 'n': // Negate the immediate constant.
- if (MO.getType() != MachineOperand::MO_Immediate)
+ if (!MO.isImm())
return true;
O << -MO.getImm();
return false;
case 's': // The GCC deprecated s modifier
- if (MO.getType() != MachineOperand::MO_Immediate)
+ if (!MO.isImm())
return true;
O << ((32 - MO.getImm()) & 31);
return false;
@@ -643,7 +652,6 @@ bool AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
}
bool AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
- unsigned AsmVariant,
const char *ExtraCode, raw_ostream &O) {
// Target doesn't support this yet!
return true;
diff --git a/lib/CodeGen/AsmPrinter/ByteStreamer.h b/lib/CodeGen/AsmPrinter/ByteStreamer.h
index 2163cc7e3e11..db2ff458eb2e 100644
--- a/lib/CodeGen/AsmPrinter/ByteStreamer.h
+++ b/lib/CodeGen/AsmPrinter/ByteStreamer.h
@@ -1,9 +1,8 @@
//===-- llvm/CodeGen/ByteStreamer.h - ByteStreamer class --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -32,7 +31,7 @@ class ByteStreamer {
// For now we're just handling the calls we need for dwarf emission/hashing.
virtual void EmitInt8(uint8_t Byte, const Twine &Comment = "") = 0;
virtual void EmitSLEB128(uint64_t DWord, const Twine &Comment = "") = 0;
- virtual void EmitULEB128(uint64_t DWord, const Twine &Comment = "") = 0;
+ virtual void EmitULEB128(uint64_t DWord, const Twine &Comment = "", unsigned PadTo = 0) = 0;
};
class APByteStreamer final : public ByteStreamer {
@@ -49,7 +48,7 @@ public:
AP.OutStreamer->AddComment(Comment);
AP.EmitSLEB128(DWord);
}
- void EmitULEB128(uint64_t DWord, const Twine &Comment) override {
+ void EmitULEB128(uint64_t DWord, const Twine &Comment, unsigned PadTo) override {
AP.OutStreamer->AddComment(Comment);
AP.EmitULEB128(DWord);
}
@@ -66,7 +65,7 @@ class HashingByteStreamer final : public ByteStreamer {
void EmitSLEB128(uint64_t DWord, const Twine &Comment) override {
Hash.addSLEB128(DWord);
}
- void EmitULEB128(uint64_t DWord, const Twine &Comment) override {
+ void EmitULEB128(uint64_t DWord, const Twine &Comment, unsigned PadTo) override {
Hash.addULEB128(DWord);
}
};
@@ -103,9 +102,9 @@ public:
}
}
- void EmitULEB128(uint64_t DWord, const Twine &Comment) override {
+ void EmitULEB128(uint64_t DWord, const Twine &Comment, unsigned PadTo) override {
raw_svector_ostream OSE(Buffer);
- unsigned Length = encodeULEB128(DWord, OSE);
+ unsigned Length = encodeULEB128(DWord, OSE, PadTo);
if (GenerateComments) {
Comments.push_back(Comment.str());
// Add some empty comments to keep the Buffer and Comments vectors aligned
diff --git a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
index 8cabad4ad312..932959c311fa 100644
--- a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
@@ -1,9 +1,8 @@
//===- llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp ----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -42,6 +41,7 @@
#include "llvm/Config/llvm-config.h"
#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/CodeView/CodeViewRecordIO.h"
#include "llvm/DebugInfo/CodeView/ContinuationRecordBuilder.h"
#include "llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h"
#include "llvm/DebugInfo/CodeView/EnumTables.h"
@@ -51,6 +51,7 @@
#include "llvm/DebugInfo/CodeView/TypeIndex.h"
#include "llvm/DebugInfo/CodeView/TypeRecord.h"
#include "llvm/DebugInfo/CodeView/TypeTableCollection.h"
+#include "llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfoMetadata.h"
@@ -67,6 +68,7 @@
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/BinaryByteStream.h"
#include "llvm/Support/BinaryStreamReader.h"
+#include "llvm/Support/BinaryStreamWriter.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
@@ -93,6 +95,26 @@
using namespace llvm;
using namespace llvm::codeview;
+namespace {
+class CVMCAdapter : public CodeViewRecordStreamer {
+public:
+ CVMCAdapter(MCStreamer &OS) : OS(&OS) {}
+
+ void EmitBytes(StringRef Data) { OS->EmitBytes(Data); }
+
+ void EmitIntValue(uint64_t Value, unsigned Size) {
+ OS->EmitIntValueInHex(Value, Size);
+ }
+
+ void EmitBinaryData(StringRef Data) { OS->EmitBinaryData(Data); }
+
+ void AddComment(const Twine &T) { OS->AddComment(T); }
+
+private:
+ MCStreamer *OS = nullptr;
+};
+} // namespace
+
static CPUType mapArchToCVCPUType(Triple::ArchType Type) {
switch (Type) {
case Triple::ArchType::x86:
@@ -273,7 +295,7 @@ static const DISubprogram *getQualifiedNameComponents(
StringRef ScopeName = getPrettyScopeName(Scope);
if (!ScopeName.empty())
QualifiedNameComponents.push_back(ScopeName);
- Scope = Scope->getScope().resolve();
+ Scope = Scope->getScope();
}
return ClosestSubprogram;
}
@@ -309,7 +331,7 @@ struct CodeViewDebug::TypeLoweringScope {
};
static std::string getFullyQualifiedName(const DIScope *Ty) {
- const DIScope *Scope = Ty->getScope().resolve();
+ const DIScope *Scope = Ty->getScope();
return getFullyQualifiedName(Scope, getPrettyScopeName(Ty));
}
@@ -344,7 +366,7 @@ TypeIndex CodeViewDebug::getFuncIdForSubprogram(const DISubprogram *SP) {
// MSVC.
StringRef DisplayName = SP->getName().split('<').first;
- const DIScope *Scope = SP->getScope().resolve();
+ const DIScope *Scope = SP->getScope();
TypeIndex TI;
if (const auto *Class = dyn_cast_or_null<DICompositeType>(Scope)) {
// If the scope is a DICompositeType, then this must be a method. Member
@@ -364,8 +386,8 @@ TypeIndex CodeViewDebug::getFuncIdForSubprogram(const DISubprogram *SP) {
return recordTypeIndexForDINode(SP, TI);
}
-static bool isTrivial(const DICompositeType *DCTy) {
- return ((DCTy->getFlags() & DINode::FlagTrivial) == DINode::FlagTrivial);
+static bool isNonTrivial(const DICompositeType *DCTy) {
+ return ((DCTy->getFlags() & DINode::FlagNonTrivial) == DINode::FlagNonTrivial);
}
static FunctionOptions
@@ -376,16 +398,16 @@ getFunctionOptions(const DISubroutineType *Ty,
const DIType *ReturnTy = nullptr;
if (auto TypeArray = Ty->getTypeArray()) {
if (TypeArray.size())
- ReturnTy = TypeArray[0].resolve();
+ ReturnTy = TypeArray[0];
}
if (auto *ReturnDCTy = dyn_cast_or_null<DICompositeType>(ReturnTy)) {
- if (!isTrivial(ReturnDCTy))
+ if (isNonTrivial(ReturnDCTy))
FO |= FunctionOptions::CxxReturnUdt;
}
// DISubroutineType is unnamed. Use DISubprogram's i.e. SPName in comparison.
- if (ClassTy && !isTrivial(ClassTy) && SPName == ClassTy->getName()) {
+ if (ClassTy && isNonTrivial(ClassTy) && SPName == ClassTy->getName()) {
FO |= FunctionOptions::Constructor;
// TODO: put the FunctionOptions::ConstructorWithVirtualBases flag.
@@ -582,8 +604,9 @@ void CodeViewDebug::endModule() {
clear();
}
-static void emitNullTerminatedSymbolName(MCStreamer &OS, StringRef S,
- unsigned MaxFixedRecordLength = 0xF00) {
+static void
+emitNullTerminatedSymbolName(MCStreamer &OS, StringRef S,
+ unsigned MaxFixedRecordLength = 0xF00) {
// The maximum CV record length is 0xFF00. Most of the strings we emit appear
// after a fixed length portion of the record. The fixed length portion should
// always be less than 0xF00 (3840) bytes, so truncate the string so that the
@@ -594,6 +617,13 @@ static void emitNullTerminatedSymbolName(MCStreamer &OS, StringRef S,
OS.EmitBytes(NullTerminatedString);
}
+static StringRef getTypeLeafName(TypeLeafKind TypeKind) {
+ for (const EnumEntry<TypeLeafKind> &EE : getTypeLeafNames())
+ if (EE.Value == TypeKind)
+ return EE.Name;
+ return "";
+}
+
void CodeViewDebug::emitTypeInformation() {
if (TypeTable.empty())
return;
@@ -610,31 +640,55 @@ void CodeViewDebug::emitTypeInformation() {
}
TypeTableCollection Table(TypeTable.records());
+ SmallString<512> CommentBlock;
+ raw_svector_ostream CommentOS(CommentBlock);
+ std::unique_ptr<ScopedPrinter> SP;
+ std::unique_ptr<TypeDumpVisitor> TDV;
+ TypeVisitorCallbackPipeline Pipeline;
+
+ if (OS.isVerboseAsm()) {
+ // To construct block comment describing the type record for readability.
+ SP = llvm::make_unique<ScopedPrinter>(CommentOS);
+ SP->setPrefix(CommentPrefix);
+ TDV = llvm::make_unique<TypeDumpVisitor>(Table, SP.get(), false);
+ Pipeline.addCallbackToPipeline(*TDV);
+ }
+
+ // To emit type record using Codeview MCStreamer adapter
+ CVMCAdapter CVMCOS(OS);
+ TypeRecordMapping typeMapping(CVMCOS);
+ Pipeline.addCallbackToPipeline(typeMapping);
+
Optional<TypeIndex> B = Table.getFirst();
while (B) {
// This will fail if the record data is invalid.
CVType Record = Table.getType(*B);
+ CommentBlock.clear();
+
+ auto RecordLen = Record.length();
+ auto RecordKind = Record.kind();
+ if (OS.isVerboseAsm())
+ CVMCOS.AddComment("Record length");
+ CVMCOS.EmitIntValue(RecordLen - 2, 2);
+ if (OS.isVerboseAsm())
+ CVMCOS.AddComment("Record kind: " + getTypeLeafName(RecordKind));
+ CVMCOS.EmitIntValue(RecordKind, sizeof(RecordKind));
+
+ Error E = codeview::visitTypeRecord(Record, *B, Pipeline);
+
+ if (E) {
+ logAllUnhandledErrors(std::move(E), errs(), "error: ");
+ llvm_unreachable("produced malformed type record");
+ }
+
if (OS.isVerboseAsm()) {
- // Emit a block comment describing the type record for readability.
- SmallString<512> CommentBlock;
- raw_svector_ostream CommentOS(CommentBlock);
- ScopedPrinter SP(CommentOS);
- SP.setPrefix(CommentPrefix);
- TypeDumpVisitor TDV(Table, &SP, false);
-
- Error E = codeview::visitTypeRecord(Record, *B, TDV);
- if (E) {
- logAllUnhandledErrors(std::move(E), errs(), "error: ");
- llvm_unreachable("produced malformed type record");
- }
// emitRawComment will insert its own tab and comment string before
// the first line, so strip off our first one. It also prints its own
// newline.
OS.emitRawComment(
CommentOS.str().drop_front(CommentPrefix.size() - 1).rtrim());
}
- OS.EmitBinaryData(Record.str_data());
B = Table.getNext(*B);
}
}
@@ -700,6 +754,8 @@ static SourceLanguage MapDWLangToCVLang(unsigned DWLang) {
return SourceLanguage::Java;
case dwarf::DW_LANG_D:
return SourceLanguage::D;
+ case dwarf::DW_LANG_Swift:
+ return SourceLanguage::Swift;
default:
// There's no CodeView representation for this language, and CV doesn't
// have an "unknown" option for the language field, so we'll use MASM,
@@ -973,8 +1029,7 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,
// If we have a display name, build the fully qualified name by walking the
// chain of scopes.
if (!SP->getName().empty())
- FuncName =
- getFullyQualifiedName(SP->getScope().resolve(), SP->getName());
+ FuncName = getFullyQualifiedName(SP->getScope(), SP->getName());
// If our DISubprogram name is empty, use the mangled name.
if (FuncName.empty())
@@ -1071,6 +1126,28 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,
endSymbolRecord(AnnotEnd);
}
+ for (auto HeapAllocSite : FI.HeapAllocSites) {
+ MCSymbol *BeginLabel = std::get<0>(HeapAllocSite);
+ MCSymbol *EndLabel = std::get<1>(HeapAllocSite);
+
+ // The labels might not be defined if the instruction was replaced
+ // somewhere in the codegen pipeline.
+ if (!BeginLabel->isDefined() || !EndLabel->isDefined())
+ continue;
+
+ DIType *DITy = std::get<2>(HeapAllocSite);
+ MCSymbol *HeapAllocEnd = beginSymbolRecord(SymbolKind::S_HEAPALLOCSITE);
+ OS.AddComment("Call site offset");
+ OS.EmitCOFFSecRel32(BeginLabel, /*Offset=*/0);
+ OS.AddComment("Call site section index");
+ OS.EmitCOFFSectionIndex(BeginLabel);
+ OS.AddComment("Call instruction length");
+ OS.emitAbsoluteSymbolDiff(EndLabel, BeginLabel, 2);
+ OS.AddComment("Type index");
+ OS.EmitIntValue(getCompleteTypeIndex(DITy).getIndex(), 4);
+ endSymbolRecord(HeapAllocEnd);
+ }
+
if (SP != nullptr)
emitDebugInfoForUDTs(LocalUDTs);
@@ -1118,9 +1195,15 @@ void CodeViewDebug::collectVariableInfoFromMFTable(
// If the variable has an attached offset expression, extract it.
// FIXME: Try to handle DW_OP_deref as well.
int64_t ExprOffset = 0;
- if (VI.Expr)
- if (!VI.Expr->extractIfOffset(ExprOffset))
+ bool Deref = false;
+ if (VI.Expr) {
+ // If there is one DW_OP_deref element, use offset of 0 and keep going.
+ if (VI.Expr->getNumElements() == 1 &&
+ VI.Expr->getElement(0) == llvm::dwarf::DW_OP_deref)
+ Deref = true;
+ else if (!VI.Expr->extractIfOffset(ExprOffset))
continue;
+ }
// Get the frame register used and the offset.
unsigned FrameReg = 0;
@@ -1130,6 +1213,7 @@ void CodeViewDebug::collectVariableInfoFromMFTable(
// Calculate the label ranges.
LocalVarDefRange DefRange =
createDefRangeMem(CVReg, FrameOffset + ExprOffset);
+
for (const InsnRange &Range : Scope->getRanges()) {
const MCSymbol *Begin = getLabelBeforeInsn(Range.first);
const MCSymbol *End = getLabelAfterInsn(Range.second);
@@ -1140,6 +1224,9 @@ void CodeViewDebug::collectVariableInfoFromMFTable(
LocalVariable Var;
Var.DIVar = VI.Var;
Var.DefRanges.emplace_back(std::move(DefRange));
+ if (Deref)
+ Var.UseReferenceType = true;
+
recordLocalVariable(std::move(Var), Scope);
}
}
@@ -1153,13 +1240,15 @@ static bool needsReferenceType(const DbgVariableLocation &Loc) {
}
void CodeViewDebug::calculateRanges(
- LocalVariable &Var, const DbgValueHistoryMap::InstrRanges &Ranges) {
+ LocalVariable &Var, const DbgValueHistoryMap::Entries &Entries) {
const TargetRegisterInfo *TRI = Asm->MF->getSubtarget().getRegisterInfo();
// Calculate the definition ranges.
- for (auto I = Ranges.begin(), E = Ranges.end(); I != E; ++I) {
- const InsnRange &Range = *I;
- const MachineInstr *DVInst = Range.first;
+ for (auto I = Entries.begin(), E = Entries.end(); I != E; ++I) {
+ const auto &Entry = *I;
+ if (!Entry.isDbgValue())
+ continue;
+ const MachineInstr *DVInst = Entry.getInstr();
assert(DVInst->isDebugValue() && "Invalid History entry");
// FIXME: Find a way to represent constant variables, since they are
// relatively common.
@@ -1186,7 +1275,7 @@ void CodeViewDebug::calculateRanges(
// Start over using that.
Var.UseReferenceType = true;
Var.DefRanges.clear();
- calculateRanges(Var, Ranges);
+ calculateRanges(Var, Entries);
return;
}
@@ -1214,21 +1303,15 @@ void CodeViewDebug::calculateRanges(
}
// Compute the label range.
- const MCSymbol *Begin = getLabelBeforeInsn(Range.first);
- const MCSymbol *End = getLabelAfterInsn(Range.second);
- if (!End) {
- // This range is valid until the next overlapping bitpiece. In the
- // common case, ranges will not be bitpieces, so they will overlap.
- auto J = std::next(I);
- const DIExpression *DIExpr = DVInst->getDebugExpression();
- while (J != E &&
- !DIExpr->fragmentsOverlap(J->first->getDebugExpression()))
- ++J;
- if (J != E)
- End = getLabelBeforeInsn(J->first);
- else
- End = Asm->getFunctionEnd();
- }
+ const MCSymbol *Begin = getLabelBeforeInsn(Entry.getInstr());
+ const MCSymbol *End;
+ if (Entry.getEndIndex() != DbgValueHistoryMap::NoEntry) {
+ auto &EndingEntry = Entries[Entry.getEndIndex()];
+ End = EndingEntry.isDbgValue()
+ ? getLabelBeforeInsn(EndingEntry.getInstr())
+ : getLabelAfterInsn(EndingEntry.getInstr());
+ } else
+ End = Asm->getFunctionEnd();
// If the last range end is our begin, just extend the last range.
// Otherwise make a new range.
@@ -1256,7 +1339,7 @@ void CodeViewDebug::collectVariableInfo(const DISubprogram *SP) {
const DILocation *InlinedAt = IV.second;
// Instruction ranges, specifying where IV is accessible.
- const auto &Ranges = I.second;
+ const auto &Entries = I.second;
LexicalScope *Scope = nullptr;
if (InlinedAt)
@@ -1270,7 +1353,7 @@ void CodeViewDebug::collectVariableInfo(const DISubprogram *SP) {
LocalVariable Var;
Var.DIVar = DIVar;
- calculateRanges(Var, Ranges);
+ calculateRanges(Var, Entries);
recordLocalVariable(std::move(Var), Scope);
}
}
@@ -1340,8 +1423,8 @@ void CodeViewDebug::beginFunctionImpl(const MachineFunction *MF) {
FPO |= FrameProcedureOptions::SecurityChecks;
FPO |= FrameProcedureOptions(uint32_t(CurFn->EncodedLocalFramePtrReg) << 14U);
FPO |= FrameProcedureOptions(uint32_t(CurFn->EncodedParamFramePtrReg) << 16U);
- if (Asm->TM.getOptLevel() != CodeGenOpt::None && !GV.optForSize() &&
- !GV.hasFnAttribute(Attribute::OptimizeNone))
+ if (Asm->TM.getOptLevel() != CodeGenOpt::None &&
+ !GV.hasOptSize() && !GV.hasOptNone())
FPO |= FrameProcedureOptions::OptimizedForSpeed;
// FIXME: Set GuardCfg when it is implemented.
CurFn->FrameProcOpts = FPO;
@@ -1379,7 +1462,7 @@ static bool shouldEmitUdt(const DIType *T) {
// MSVC does not emit UDTs for typedefs that are scoped to classes.
if (T->getTag() == dwarf::DW_TAG_typedef) {
- if (DIScope *Scope = T->getScope().resolve()) {
+ if (DIScope *Scope = T->getScope()) {
switch (Scope->getTag()) {
case dwarf::DW_TAG_structure_type:
case dwarf::DW_TAG_class_type:
@@ -1396,7 +1479,7 @@ static bool shouldEmitUdt(const DIType *T) {
const DIDerivedType *DT = dyn_cast<DIDerivedType>(T);
if (!DT)
return true;
- T = DT->getBaseType().resolve();
+ T = DT->getBaseType();
}
return true;
}
@@ -1409,8 +1492,8 @@ void CodeViewDebug::addToUDTs(const DIType *Ty) {
return;
SmallVector<StringRef, 5> QualifiedNameComponents;
- const DISubprogram *ClosestSubprogram = getQualifiedNameComponents(
- Ty->getScope().resolve(), QualifiedNameComponents);
+ const DISubprogram *ClosestSubprogram =
+ getQualifiedNameComponents(Ty->getScope(), QualifiedNameComponents);
std::string FullyQualifiedName =
getQualifiedName(QualifiedNameComponents, getPrettyScopeName(Ty));
@@ -1479,8 +1562,7 @@ TypeIndex CodeViewDebug::lowerType(const DIType *Ty, const DIType *ClassTy) {
}
TypeIndex CodeViewDebug::lowerTypeAlias(const DIDerivedType *Ty) {
- DITypeRef UnderlyingTypeRef = Ty->getBaseType();
- TypeIndex UnderlyingTypeIndex = getTypeIndex(UnderlyingTypeRef);
+ TypeIndex UnderlyingTypeIndex = getTypeIndex(Ty->getBaseType());
StringRef TypeName = Ty->getName();
addToUDTs(Ty);
@@ -1496,14 +1578,14 @@ TypeIndex CodeViewDebug::lowerTypeAlias(const DIDerivedType *Ty) {
}
TypeIndex CodeViewDebug::lowerTypeArray(const DICompositeType *Ty) {
- DITypeRef ElementTypeRef = Ty->getBaseType();
- TypeIndex ElementTypeIndex = getTypeIndex(ElementTypeRef);
+ const DIType *ElementType = Ty->getBaseType();
+ TypeIndex ElementTypeIndex = getTypeIndex(ElementType);
// IndexType is size_t, which depends on the bitness of the target.
TypeIndex IndexType = getPointerSizeInBytes() == 8
? TypeIndex(SimpleTypeKind::UInt64Quad)
: TypeIndex(SimpleTypeKind::UInt32Long);
- uint64_t ElementSize = getBaseTypeSize(ElementTypeRef) / 8;
+ uint64_t ElementSize = getBaseTypeSize(ElementType) / 8;
// Add subranges to array type.
DINodeArray Elements = Ty->getElements();
@@ -1764,7 +1846,7 @@ TypeIndex CodeViewDebug::lowerTypeModifier(const DIDerivedType *Ty) {
break;
}
if (IsModifier)
- BaseTy = cast<DIDerivedType>(BaseTy)->getBaseType().resolve();
+ BaseTy = cast<DIDerivedType>(BaseTy)->getBaseType();
}
// Check if the inner type will use an LF_POINTER record. If so, the
@@ -1797,8 +1879,8 @@ TypeIndex CodeViewDebug::lowerTypeModifier(const DIDerivedType *Ty) {
TypeIndex CodeViewDebug::lowerTypeFunction(const DISubroutineType *Ty) {
SmallVector<TypeIndex, 8> ReturnAndArgTypeIndices;
- for (DITypeRef ArgTypeRef : Ty->getTypeArray())
- ReturnAndArgTypeIndices.push_back(getTypeIndex(ArgTypeRef));
+ for (const DIType *ArgType : Ty->getTypeArray())
+ ReturnAndArgTypeIndices.push_back(getTypeIndex(ArgType));
// MSVC uses type none for variadic argument.
if (ReturnAndArgTypeIndices.size() > 1 &&
@@ -1836,7 +1918,10 @@ TypeIndex CodeViewDebug::lowerTypeMemberFunction(const DISubroutineType *Ty,
unsigned Index = 0;
SmallVector<TypeIndex, 8> ArgTypeIndices;
- TypeIndex ReturnTypeIndex = getTypeIndex(ReturnAndArgs[Index++]);
+ TypeIndex ReturnTypeIndex = TypeIndex::Void();
+ if (ReturnAndArgs.size() > Index) {
+ ReturnTypeIndex = getTypeIndex(ReturnAndArgs[Index++]);
+ }
// If the first argument is a pointer type and this isn't a static method,
// treat it as the special 'this' parameter, which is encoded separately from
@@ -1844,7 +1929,7 @@ TypeIndex CodeViewDebug::lowerTypeMemberFunction(const DISubroutineType *Ty,
TypeIndex ThisTypeIndex;
if (!IsStaticMethod && ReturnAndArgs.size() > Index) {
if (const DIDerivedType *PtrTy =
- dyn_cast_or_null<DIDerivedType>(ReturnAndArgs[Index].resolve())) {
+ dyn_cast_or_null<DIDerivedType>(ReturnAndArgs[Index])) {
if (PtrTy->getTag() == dwarf::DW_TAG_pointer_type) {
ThisTypeIndex = getTypeIndexForThisPtr(PtrTy, Ty);
Index++;
@@ -1942,7 +2027,7 @@ static ClassOptions getCommonClassOptions(const DICompositeType *Ty) {
// Put the Nested flag on a type if it appears immediately inside a tag type.
// Do not walk the scope chain. Do not attempt to compute ContainsNestedClass
// here. That flag is only set on definitions, and not forward declarations.
- const DIScope *ImmediateScope = Ty->getScope().resolve();
+ const DIScope *ImmediateScope = Ty->getScope();
if (ImmediateScope && isa<DICompositeType>(ImmediateScope))
CO |= ClassOptions::Nested;
@@ -1955,7 +2040,7 @@ static ClassOptions getCommonClassOptions(const DICompositeType *Ty) {
CO |= ClassOptions::Scoped;
} else {
for (const DIScope *Scope = ImmediateScope; Scope != nullptr;
- Scope = Scope->getScope().resolve()) {
+ Scope = Scope->getScope()) {
if (isa<DISubprogram>(Scope)) {
CO |= ClassOptions::Scoped;
break;
@@ -2075,7 +2160,7 @@ void CodeViewDebug::collectMemberInfo(ClassInfo &Info,
// succeeds, and drop the member if that fails.
assert((DDTy->getOffsetInBits() % 8) == 0 && "Unnamed bitfield member!");
uint64_t Offset = DDTy->getOffsetInBits();
- const DIType *Ty = DDTy->getBaseType().resolve();
+ const DIType *Ty = DDTy->getBaseType();
bool FullyResolved = false;
while (!FullyResolved) {
switch (Ty->getTag()) {
@@ -2083,7 +2168,7 @@ void CodeViewDebug::collectMemberInfo(ClassInfo &Info,
case dwarf::DW_TAG_volatile_type:
// FIXME: we should apply the qualifier types to the indirect fields
// rather than dropping them.
- Ty = cast<DIDerivedType>(Ty)->getBaseType().resolve();
+ Ty = cast<DIDerivedType>(Ty)->getBaseType();
break;
default:
FullyResolved = true;
@@ -2184,6 +2269,14 @@ TypeIndex CodeViewDebug::lowerCompleteTypeClass(const DICompositeType *Ty) {
if (ContainsNestedClass)
CO |= ClassOptions::ContainsNestedClass;
+ // MSVC appears to set this flag by searching any destructor or method with
+ // FunctionOptions::Constructor among the emitted members. Clang AST has all
+ // the members, however special member functions are not yet emitted into
+ // debug information. For now checking a class's non-triviality seems enough.
+ // FIXME: not true for a nested unnamed struct.
+ if (isNonTrivial(Ty))
+ CO |= ClassOptions::HasConstructorOrDestructor;
+
std::string FullName = getFullyQualifiedName(Ty);
uint64_t SizeInBytes = Ty->getSizeInBits() / 8;
@@ -2358,7 +2451,7 @@ CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) {
// Create nested classes.
for (const DIType *Nested : Info.NestedTypes) {
- NestedTypeRecord R(getTypeIndex(DITypeRef(Nested)), Nested->getName());
+ NestedTypeRecord R(getTypeIndex(Nested), Nested->getName());
ContinuationBuilder.writeMemberType(R);
MemberCount++;
}
@@ -2385,10 +2478,7 @@ TypeIndex CodeViewDebug::getVBPTypeIndex() {
return VBPType;
}
-TypeIndex CodeViewDebug::getTypeIndex(DITypeRef TypeRef, DITypeRef ClassTyRef) {
- const DIType *Ty = TypeRef.resolve();
- const DIType *ClassTy = ClassTyRef.resolve();
-
+TypeIndex CodeViewDebug::getTypeIndex(const DIType *Ty, const DIType *ClassTy) {
// The null DIType is the void type. Don't try to hash it.
if (!Ty)
return TypeIndex::Void();
@@ -2431,8 +2521,7 @@ CodeViewDebug::getTypeIndexForThisPtr(const DIDerivedType *PtrTy,
return recordTypeIndexForDINode(PtrTy, TI, SubroutineTy);
}
-TypeIndex CodeViewDebug::getTypeIndexForReferenceTo(DITypeRef TypeRef) {
- DIType *Ty = TypeRef.resolve();
+TypeIndex CodeViewDebug::getTypeIndexForReferenceTo(const DIType *Ty) {
PointerRecord PR(getTypeIndex(Ty),
getPointerSizeInBytes() == 8 ? PointerKind::Near64
: PointerKind::Near32,
@@ -2441,9 +2530,7 @@ TypeIndex CodeViewDebug::getTypeIndexForReferenceTo(DITypeRef TypeRef) {
return TypeTable.writeLeafType(PR);
}
-TypeIndex CodeViewDebug::getCompleteTypeIndex(DITypeRef TypeRef) {
- const DIType *Ty = TypeRef.resolve();
-
+TypeIndex CodeViewDebug::getCompleteTypeIndex(const DIType *Ty) {
// The null DIType is the void type. Don't try to hash it.
if (!Ty)
return TypeIndex::Void();
@@ -2454,7 +2541,7 @@ TypeIndex CodeViewDebug::getCompleteTypeIndex(DITypeRef TypeRef) {
if (Ty->getTag() == dwarf::DW_TAG_typedef)
(void)getTypeIndex(Ty);
while (Ty->getTag() == dwarf::DW_TAG_typedef)
- Ty = cast<DIDerivedType>(Ty)->getBaseType().resolve();
+ Ty = cast<DIDerivedType>(Ty)->getBaseType();
// If this is a non-record type, the complete type index is the same as the
// normal type index. Just call getTypeIndex.
@@ -2467,11 +2554,7 @@ TypeIndex CodeViewDebug::getCompleteTypeIndex(DITypeRef TypeRef) {
return getTypeIndex(Ty);
}
- // Check if we've already translated the complete record type.
const auto *CTy = cast<DICompositeType>(Ty);
- auto InsertResult = CompleteTypeIndices.insert({CTy, TypeIndex()});
- if (!InsertResult.second)
- return InsertResult.first->second;
TypeLoweringScope S(*this);
@@ -2489,6 +2572,13 @@ TypeIndex CodeViewDebug::getCompleteTypeIndex(DITypeRef TypeRef) {
return FwdDeclTI;
}
+ // Check if we've already translated the complete record type.
+ // Insert the type with a null TypeIndex to signify that the type is currently
+ // being lowered.
+ auto InsertResult = CompleteTypeIndices.insert({CTy, TypeIndex()});
+ if (!InsertResult.second)
+ return InsertResult.first->second;
+
TypeIndex TI;
switch (CTy->getTag()) {
case dwarf::DW_TAG_class_type:
@@ -2799,6 +2889,7 @@ void CodeViewDebug::endFunctionImpl(const MachineFunction *MF) {
}
CurFn->Annotations = MF->getCodeViewAnnotations();
+ CurFn->HeapAllocSites = MF->getCodeViewHeapAllocSites();
CurFn->End = Asm->getFunctionEnd();
@@ -2914,10 +3005,19 @@ void CodeViewDebug::collectGlobalVariableInfo() {
for (const MDNode *Node : CUs->operands()) {
const auto *CU = cast<DICompileUnit>(Node);
for (const auto *GVE : CU->getGlobalVariables()) {
+ const DIGlobalVariable *DIGV = GVE->getVariable();
+ const DIExpression *DIE = GVE->getExpression();
+
+ // Emit constant global variables in a global symbol section.
+ if (GlobalMap.count(GVE) == 0 && DIE->isConstant()) {
+ CVGlobalVariable CVGV = {DIGV, DIE};
+ GlobalVariables.emplace_back(std::move(CVGV));
+ }
+
const auto *GV = GlobalMap.lookup(GVE);
if (!GV || GV->isDeclarationForLinker())
continue;
- const DIGlobalVariable *DIGV = GVE->getVariable();
+
DIScope *Scope = DIGV->getScope();
SmallVector<CVGlobalVariable, 1> *VariableList;
if (Scope && isa<DILocalScope>(Scope)) {
@@ -2932,7 +3032,7 @@ void CodeViewDebug::collectGlobalVariableInfo() {
// Emit this global variable into a COMDAT section.
VariableList = &ComdatVariables;
else
- // Emit this globla variable in a single global symbol section.
+ // Emit this global variable in a single global symbol section.
VariableList = &GlobalVariables;
CVGlobalVariable CVGV = {DIGV, GV};
VariableList->emplace_back(std::move(CVGV));
@@ -2955,13 +3055,14 @@ void CodeViewDebug::emitDebugInfoForGlobals() {
// Second, emit each global that is in a comdat into its own .debug$S
// section along with its own symbol substream.
for (const CVGlobalVariable &CVGV : ComdatVariables) {
- MCSymbol *GVSym = Asm->getSymbol(CVGV.GV);
+ const GlobalVariable *GV = CVGV.GVInfo.get<const GlobalVariable *>();
+ MCSymbol *GVSym = Asm->getSymbol(GV);
OS.AddComment("Symbol subsection for " +
- Twine(GlobalValue::dropLLVMManglingEscape(CVGV.GV->getName())));
+ Twine(GlobalValue::dropLLVMManglingEscape(GV->getName())));
switchToDebugSectionForSymbol(GVSym);
MCSymbol *EndLabel = beginCVSubsection(DebugSubsectionKind::Symbols);
// FIXME: emitDebugInfoForGlobal() doesn't handle DIExpressions.
- emitDebugInfoForGlobal(CVGV.DIGV, CVGV.GV, GVSym);
+ emitDebugInfoForGlobal(CVGV);
endCVSubsection(EndLabel);
}
}
@@ -2981,31 +3082,63 @@ void CodeViewDebug::emitDebugInfoForRetainedTypes() {
// Emit each global variable in the specified array.
void CodeViewDebug::emitGlobalVariableList(ArrayRef<CVGlobalVariable> Globals) {
for (const CVGlobalVariable &CVGV : Globals) {
- MCSymbol *GVSym = Asm->getSymbol(CVGV.GV);
// FIXME: emitDebugInfoForGlobal() doesn't handle DIExpressions.
- emitDebugInfoForGlobal(CVGV.DIGV, CVGV.GV, GVSym);
- }
-}
-
-void CodeViewDebug::emitDebugInfoForGlobal(const DIGlobalVariable *DIGV,
- const GlobalVariable *GV,
- MCSymbol *GVSym) {
- // DataSym record, see SymbolRecord.h for more info. Thread local data
- // happens to have the same format as global data.
- SymbolKind DataSym = GV->isThreadLocal()
- ? (DIGV->isLocalToUnit() ? SymbolKind::S_LTHREAD32
- : SymbolKind::S_GTHREAD32)
- : (DIGV->isLocalToUnit() ? SymbolKind::S_LDATA32
- : SymbolKind::S_GDATA32);
- MCSymbol *DataEnd = beginSymbolRecord(DataSym);
- OS.AddComment("Type");
- OS.EmitIntValue(getCompleteTypeIndex(DIGV->getType()).getIndex(), 4);
- OS.AddComment("DataOffset");
- OS.EmitCOFFSecRel32(GVSym, /*Offset=*/0);
- OS.AddComment("Segment");
- OS.EmitCOFFSectionIndex(GVSym);
- OS.AddComment("Name");
- const unsigned LengthOfDataRecord = 12;
- emitNullTerminatedSymbolName(OS, DIGV->getName(), LengthOfDataRecord);
- endSymbolRecord(DataEnd);
+ emitDebugInfoForGlobal(CVGV);
+ }
+}
+
+void CodeViewDebug::emitDebugInfoForGlobal(const CVGlobalVariable &CVGV) {
+ const DIGlobalVariable *DIGV = CVGV.DIGV;
+ if (const GlobalVariable *GV =
+ CVGV.GVInfo.dyn_cast<const GlobalVariable *>()) {
+ // DataSym record, see SymbolRecord.h for more info. Thread local data
+ // happens to have the same format as global data.
+ MCSymbol *GVSym = Asm->getSymbol(GV);
+ SymbolKind DataSym = GV->isThreadLocal()
+ ? (DIGV->isLocalToUnit() ? SymbolKind::S_LTHREAD32
+ : SymbolKind::S_GTHREAD32)
+ : (DIGV->isLocalToUnit() ? SymbolKind::S_LDATA32
+ : SymbolKind::S_GDATA32);
+ MCSymbol *DataEnd = beginSymbolRecord(DataSym);
+ OS.AddComment("Type");
+ OS.EmitIntValue(getCompleteTypeIndex(DIGV->getType()).getIndex(), 4);
+ OS.AddComment("DataOffset");
+ OS.EmitCOFFSecRel32(GVSym, /*Offset=*/0);
+ OS.AddComment("Segment");
+ OS.EmitCOFFSectionIndex(GVSym);
+ OS.AddComment("Name");
+ const unsigned LengthOfDataRecord = 12;
+ emitNullTerminatedSymbolName(OS, DIGV->getName(), LengthOfDataRecord);
+ endSymbolRecord(DataEnd);
+ } else {
+ // FIXME: Currently this only emits the global variables in the IR metadata.
+ // This should also emit enums and static data members.
+ const DIExpression *DIE = CVGV.GVInfo.get<const DIExpression *>();
+ assert(DIE->isConstant() &&
+ "Global constant variables must contain a constant expression.");
+ uint64_t Val = DIE->getElement(1);
+
+ MCSymbol *SConstantEnd = beginSymbolRecord(SymbolKind::S_CONSTANT);
+ OS.AddComment("Type");
+ OS.EmitIntValue(getTypeIndex(DIGV->getType()).getIndex(), 4);
+ OS.AddComment("Value");
+
+ // Encoded integers shouldn't need more than 10 bytes.
+ uint8_t data[10];
+ BinaryStreamWriter Writer(data, llvm::support::endianness::little);
+ CodeViewRecordIO IO(Writer);
+ cantFail(IO.mapEncodedInteger(Val));
+ StringRef SRef((char *)data, Writer.getOffset());
+ OS.EmitBinaryData(SRef);
+
+ OS.AddComment("Name");
+ const DIScope *Scope = DIGV->getScope();
+ // For static data members, get the scope from the declaration.
+ if (const auto *MemberDecl = dyn_cast_or_null<DIDerivedType>(
+ DIGV->getRawStaticDataMemberDeclaration()))
+ Scope = MemberDecl->getScope();
+ emitNullTerminatedSymbolName(OS,
+ getFullyQualifiedName(Scope, DIGV->getName()));
+ endSymbolRecord(SConstantEnd);
+ }
}
diff --git a/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/lib/CodeGen/AsmPrinter/CodeViewDebug.h
index 21557ed1be35..ce57b789d7fa 100644
--- a/lib/CodeGen/AsmPrinter/CodeViewDebug.h
+++ b/lib/CodeGen/AsmPrinter/CodeViewDebug.h
@@ -1,9 +1,8 @@
//===- llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h --------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -18,6 +17,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/PointerUnion.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/DbgEntityHistoryCalculator.h"
@@ -101,7 +101,7 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
struct CVGlobalVariable {
const DIGlobalVariable *DIGV;
- const GlobalVariable *GV;
+ PointerUnion<const GlobalVariable *, const DIExpression *> GVInfo;
};
struct InlineSite {
@@ -148,6 +148,7 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
SmallVector<LexicalBlock *, 1> ChildBlocks;
std::vector<std::pair<MCSymbol *, MDNode *>> Annotations;
+ std::vector<std::tuple<MCSymbol *, MCSymbol *, DIType *>> HeapAllocSites;
const MCSymbol *Begin = nullptr;
const MCSymbol *End = nullptr;
@@ -223,7 +224,7 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
codeview::TypeIndex getFuncIdForSubprogram(const DISubprogram *SP);
void calculateRanges(LocalVariable &Var,
- const DbgValueHistoryMap::InstrRanges &Ranges);
+ const DbgValueHistoryMap::Entries &Entries);
static void collectInlineSiteChildren(SmallVectorImpl<unsigned> &Children,
const FunctionInfo &FI,
@@ -313,8 +314,7 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
void emitDebugInfoForGlobals();
void emitGlobalVariableList(ArrayRef<CVGlobalVariable> Globals);
- void emitDebugInfoForGlobal(const DIGlobalVariable *DIGV,
- const GlobalVariable *GV, MCSymbol *GVSym);
+ void emitDebugInfoForGlobal(const CVGlobalVariable &CVGV);
/// Opens a subsection of the given kind in a .debug$S codeview section.
/// Returns an end label for use with endCVSubsection when the subsection is
@@ -373,14 +373,14 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
/// Translates the DIType to codeview if necessary and returns a type index
/// for it.
- codeview::TypeIndex getTypeIndex(DITypeRef TypeRef,
- DITypeRef ClassTyRef = DITypeRef());
+ codeview::TypeIndex getTypeIndex(const DIType *Ty,
+ const DIType *ClassTy = nullptr);
codeview::TypeIndex
getTypeIndexForThisPtr(const DIDerivedType *PtrTy,
const DISubroutineType *SubroutineTy);
- codeview::TypeIndex getTypeIndexForReferenceTo(DITypeRef TypeRef);
+ codeview::TypeIndex getTypeIndexForReferenceTo(const DIType *Ty);
codeview::TypeIndex getMemberFunctionType(const DISubprogram *SP,
const DICompositeType *Class);
@@ -419,7 +419,7 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
/// use this entry point when generating symbol records. The complete and
/// incomplete type indices only differ for record types. All other types use
/// the same index.
- codeview::TypeIndex getCompleteTypeIndex(DITypeRef TypeRef);
+ codeview::TypeIndex getCompleteTypeIndex(const DIType *Ty);
codeview::TypeIndex lowerCompleteTypeClass(const DICompositeType *Ty);
codeview::TypeIndex lowerCompleteTypeUnion(const DICompositeType *Ty);
diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp
index e27659494f08..f4134da48caa 100644
--- a/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -1,9 +1,8 @@
//===--- lib/CodeGen/DIE.cpp - DWARF Info Entries -------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -212,7 +211,7 @@ const DIE *DIE::getUnitDie() const {
return nullptr;
}
-const DIEUnit *DIE::getUnit() const {
+DIEUnit *DIE::getUnit() const {
const DIE *UnitDie = getUnitDie();
if (UnitDie)
return UnitDie->Owner.dyn_cast<DIEUnit*>();
@@ -507,6 +506,23 @@ LLVM_DUMP_METHOD
void DIELabel::print(raw_ostream &O) const { O << "Lbl: " << Label->getName(); }
//===----------------------------------------------------------------------===//
+// DIEBaseTypeRef Implementation
+//===----------------------------------------------------------------------===//
+
+void DIEBaseTypeRef::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
+ uint64_t Offset = CU->ExprRefedBaseTypes[Index].Die->getOffset();
+ assert(Offset < (1ULL << (ULEB128PadSize * 7)) && "Offset wont fit");
+ AP->EmitULEB128(Offset, nullptr, ULEB128PadSize);
+}
+
+unsigned DIEBaseTypeRef::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
+ return ULEB128PadSize;
+}
+
+LLVM_DUMP_METHOD
+void DIEBaseTypeRef::print(raw_ostream &O) const { O << "BaseTypeRef: " << Index; }
+
+//===----------------------------------------------------------------------===//
// DIEDelta Implementation
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/AsmPrinter/DIEHash.cpp b/lib/CodeGen/AsmPrinter/DIEHash.cpp
index b8f1202494d7..bfac8850a2a6 100644
--- a/lib/CodeGen/AsmPrinter/DIEHash.cpp
+++ b/lib/CodeGen/AsmPrinter/DIEHash.cpp
@@ -1,9 +1,8 @@
//===-- llvm/CodeGen/DIEHash.cpp - Dwarf Hashing Framework ----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -226,7 +225,7 @@ void DIEHash::hashLocList(const DIELocList &LocList) {
DwarfDebug &DD = *AP->getDwarfDebug();
const DebugLocStream &Locs = DD.getDebugLocs();
for (const auto &Entry : Locs.getEntries(Locs.getList(LocList.getValue())))
- DD.emitDebugLocEntry(Streamer, Entry);
+ DD.emitDebugLocEntry(Streamer, Entry, nullptr);
}
// Hash an individual attribute \param Attr based on the type of attribute and
@@ -310,6 +309,7 @@ void DIEHash::hashAttribute(const DIEValue &Value, dwarf::Tag Tag) {
// FIXME: It's uncertain whether or not we should handle this at the moment.
case DIEValue::isExpr:
case DIEValue::isLabel:
+ case DIEValue::isBaseTypeRef:
case DIEValue::isDelta:
llvm_unreachable("Add support for additional value types.");
}
diff --git a/lib/CodeGen/AsmPrinter/DIEHash.h b/lib/CodeGen/AsmPrinter/DIEHash.h
index dae517ab2c29..2e49514c98be 100644
--- a/lib/CodeGen/AsmPrinter/DIEHash.h
+++ b/lib/CodeGen/AsmPrinter/DIEHash.h
@@ -1,9 +1,8 @@
//===-- llvm/CodeGen/DIEHash.h - Dwarf Hashing Framework -------*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp b/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
index 09867822c30a..ddd60575b6c0 100644
--- a/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
+++ b/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
@@ -1,15 +1,15 @@
//===- llvm/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/DbgEntityHistoryCalculator.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -31,51 +31,62 @@ using namespace llvm;
#define DEBUG_TYPE "dwarfdebug"
+namespace {
+using EntryIndex = DbgValueHistoryMap::EntryIndex;
+}
+
// If @MI is a DBG_VALUE with debug value described by a
// defined register, returns the number of this register.
// In the other case, returns 0.
-static unsigned isDescribedByReg(const MachineInstr &MI) {
+static Register isDescribedByReg(const MachineInstr &MI) {
assert(MI.isDebugValue());
assert(MI.getNumOperands() == 4);
+ // If the location of variable is an entry value (DW_OP_entry_value)
+ // do not consider it as a register location.
+ if (MI.getDebugExpression()->isEntryValue())
+ return 0;
// If location of variable is described using a register (directly or
// indirectly), this register is always a first operand.
- return MI.getOperand(0).isReg() ? MI.getOperand(0).getReg() : 0;
+ return MI.getOperand(0).isReg() ? MI.getOperand(0).getReg() : Register();
}
-void DbgValueHistoryMap::startInstrRange(InlinedEntity Var,
- const MachineInstr &MI) {
+bool DbgValueHistoryMap::startDbgValue(InlinedEntity Var,
+ const MachineInstr &MI,
+ EntryIndex &NewIndex) {
// Instruction range should start with a DBG_VALUE instruction for the
// variable.
assert(MI.isDebugValue() && "not a DBG_VALUE");
- auto &Ranges = VarInstrRanges[Var];
- if (!Ranges.empty() && Ranges.back().second == nullptr &&
- Ranges.back().first->isIdenticalTo(MI)) {
+ auto &Entries = VarEntries[Var];
+ if (!Entries.empty() && Entries.back().isDbgValue() &&
+ !Entries.back().isClosed() &&
+ Entries.back().getInstr()->isIdenticalTo(MI)) {
LLVM_DEBUG(dbgs() << "Coalescing identical DBG_VALUE entries:\n"
- << "\t" << Ranges.back().first << "\t" << MI << "\n");
- return;
+ << "\t" << Entries.back().getInstr() << "\t" << MI
+ << "\n");
+ return false;
}
- Ranges.push_back(std::make_pair(&MI, nullptr));
+ Entries.emplace_back(&MI, Entry::DbgValue);
+ NewIndex = Entries.size() - 1;
+ return true;
}
-void DbgValueHistoryMap::endInstrRange(InlinedEntity Var,
- const MachineInstr &MI) {
- auto &Ranges = VarInstrRanges[Var];
- // Verify that the current instruction range is not yet closed.
- assert(!Ranges.empty() && Ranges.back().second == nullptr);
- // For now, instruction ranges are not allowed to cross basic block
- // boundaries.
- assert(Ranges.back().first->getParent() == MI.getParent());
- Ranges.back().second = &MI;
+EntryIndex DbgValueHistoryMap::startClobber(InlinedEntity Var,
+ const MachineInstr &MI) {
+ auto &Entries = VarEntries[Var];
+ // If an instruction clobbers multiple registers that the variable is
+ // described by, then we may have already created a clobbering instruction.
+ if (Entries.back().isClobber() && Entries.back().getInstr() == &MI)
+ return Entries.size() - 1;
+ Entries.emplace_back(&MI, Entry::Clobber);
+ return Entries.size() - 1;
}
-unsigned DbgValueHistoryMap::getRegisterForVar(InlinedEntity Var) const {
- const auto &I = VarInstrRanges.find(Var);
- if (I == VarInstrRanges.end())
- return 0;
- const auto &Ranges = I->second;
- if (Ranges.empty() || Ranges.back().second != nullptr)
- return 0;
- return isDescribedByReg(*Ranges.back().first);
+void DbgValueHistoryMap::Entry::endEntry(EntryIndex Index) {
+ // For now, instruction ranges are not allowed to cross basic block
+ // boundaries.
+ assert(isDbgValue() && "Setting end index for non-debug value");
+ assert(!isClosed() && "End index has already been set");
+ EndIndex = Index;
}
void DbgLabelInstrMap::addInstr(InlinedEntity Label, const MachineInstr &MI) {
@@ -89,6 +100,12 @@ namespace {
using InlinedEntity = DbgValueHistoryMap::InlinedEntity;
using RegDescribedVarsMap = std::map<unsigned, SmallVector<InlinedEntity, 1>>;
+// Keeps track of the debug value entries that are currently live for each
+// inlined entity. As the history map entries are stored in a SmallVector, they
+// may be moved at insertion of new entries, so store indices rather than
+// pointers.
+using DbgValueEntriesMap = std::map<InlinedEntity, SmallSet<EntryIndex, 1>>;
+
} // end anonymous namespace
// Claim that @Var is not described by @RegNo anymore.
@@ -114,16 +131,88 @@ static void addRegDescribedVar(RegDescribedVarsMap &RegVars, unsigned RegNo,
VarSet.push_back(Var);
}
+/// Create a clobbering entry and end all open debug value entries
+/// for \p Var that are described by \p RegNo using that entry.
+static void clobberRegEntries(InlinedEntity Var, unsigned RegNo,
+ const MachineInstr &ClobberingInstr,
+ DbgValueEntriesMap &LiveEntries,
+ DbgValueHistoryMap &HistMap) {
+ EntryIndex ClobberIndex = HistMap.startClobber(Var, ClobberingInstr);
+
+ // Close all entries whose values are described by the register.
+ SmallVector<EntryIndex, 4> IndicesToErase;
+ for (auto Index : LiveEntries[Var]) {
+ auto &Entry = HistMap.getEntry(Var, Index);
+ assert(Entry.isDbgValue() && "Not a DBG_VALUE in LiveEntries");
+ if (isDescribedByReg(*Entry.getInstr()) == RegNo) {
+ IndicesToErase.push_back(Index);
+ Entry.endEntry(ClobberIndex);
+ }
+ }
+
+ // Drop all entries that have ended.
+ for (auto Index : IndicesToErase)
+ LiveEntries[Var].erase(Index);
+}
+
+/// Add a new debug value for \p Var. Closes all overlapping debug values.
+static void handleNewDebugValue(InlinedEntity Var, const MachineInstr &DV,
+ RegDescribedVarsMap &RegVars,
+ DbgValueEntriesMap &LiveEntries,
+ DbgValueHistoryMap &HistMap) {
+ EntryIndex NewIndex;
+ if (HistMap.startDbgValue(Var, DV, NewIndex)) {
+ SmallDenseMap<unsigned, bool, 4> TrackedRegs;
+
+ // If we have created a new debug value entry, close all preceding
+ // live entries that overlap.
+ SmallVector<EntryIndex, 4> IndicesToErase;
+ const DIExpression *DIExpr = DV.getDebugExpression();
+ for (auto Index : LiveEntries[Var]) {
+ auto &Entry = HistMap.getEntry(Var, Index);
+ assert(Entry.isDbgValue() && "Not a DBG_VALUE in LiveEntries");
+ const MachineInstr &DV = *Entry.getInstr();
+ bool Overlaps = DIExpr->fragmentsOverlap(DV.getDebugExpression());
+ if (Overlaps) {
+ IndicesToErase.push_back(Index);
+ Entry.endEntry(NewIndex);
+ }
+ if (unsigned Reg = isDescribedByReg(DV))
+ TrackedRegs[Reg] |= !Overlaps;
+ }
+
+ // If the new debug value is described by a register, add tracking of
+ // that register if it is not already tracked.
+ if (unsigned NewReg = isDescribedByReg(DV)) {
+ if (!TrackedRegs.count(NewReg))
+ addRegDescribedVar(RegVars, NewReg, Var);
+ LiveEntries[Var].insert(NewIndex);
+ TrackedRegs[NewReg] = true;
+ }
+
+ // Drop tracking of registers that are no longer used.
+ for (auto I : TrackedRegs)
+ if (!I.second)
+ dropRegDescribedVar(RegVars, I.first, Var);
+
+ // Drop all entries that have ended, and mark the new entry as live.
+ for (auto Index : IndicesToErase)
+ LiveEntries[Var].erase(Index);
+ LiveEntries[Var].insert(NewIndex);
+ }
+}
+
// Terminate the location range for variables described by register at
// @I by inserting @ClobberingInstr to their history.
static void clobberRegisterUses(RegDescribedVarsMap &RegVars,
RegDescribedVarsMap::iterator I,
DbgValueHistoryMap &HistMap,
+ DbgValueEntriesMap &LiveEntries,
const MachineInstr &ClobberingInstr) {
// Iterate over all variables described by this register and add this
// instruction to their history, clobbering it.
for (const auto &Var : I->second)
- HistMap.endInstrRange(Var, ClobberingInstr);
+ clobberRegEntries(Var, I->first, ClobberingInstr, LiveEntries, HistMap);
RegVars.erase(I);
}
@@ -131,115 +220,25 @@ static void clobberRegisterUses(RegDescribedVarsMap &RegVars,
// @RegNo by inserting @ClobberingInstr to their history.
static void clobberRegisterUses(RegDescribedVarsMap &RegVars, unsigned RegNo,
DbgValueHistoryMap &HistMap,
+ DbgValueEntriesMap &LiveEntries,
const MachineInstr &ClobberingInstr) {
const auto &I = RegVars.find(RegNo);
if (I == RegVars.end())
return;
- clobberRegisterUses(RegVars, I, HistMap, ClobberingInstr);
-}
-
-// Returns the first instruction in @MBB which corresponds to
-// the function epilogue, or nullptr if @MBB doesn't contain an epilogue.
-static const MachineInstr *getFirstEpilogueInst(const MachineBasicBlock &MBB) {
- auto LastMI = MBB.getLastNonDebugInstr();
- if (LastMI == MBB.end() || !LastMI->isReturn())
- return nullptr;
- // Assume that epilogue starts with instruction having the same debug location
- // as the return instruction.
- DebugLoc LastLoc = LastMI->getDebugLoc();
- auto Res = LastMI;
- for (MachineBasicBlock::const_reverse_iterator I = LastMI.getReverse(),
- E = MBB.rend();
- I != E; ++I) {
- if (I->getDebugLoc() != LastLoc)
- return &*Res;
- Res = &*I;
- }
- // If all instructions have the same debug location, assume whole MBB is
- // an epilogue.
- return &*MBB.begin();
-}
-
-// Collect registers that are modified in the function body (their
-// contents is changed outside of the prologue and epilogue).
-static void collectChangingRegs(const MachineFunction *MF,
- const TargetRegisterInfo *TRI,
- BitVector &Regs) {
- for (const auto &MBB : *MF) {
- auto FirstEpilogueInst = getFirstEpilogueInst(MBB);
-
- for (const auto &MI : MBB) {
- // Avoid looking at prologue or epilogue instructions.
- if (&MI == FirstEpilogueInst)
- break;
- if (MI.getFlag(MachineInstr::FrameSetup))
- continue;
-
- // Look for register defs and register masks. Register masks are
- // typically on calls and they clobber everything not in the mask.
- for (const MachineOperand &MO : MI.operands()) {
- // Skip virtual registers since they are handled by the parent.
- if (MO.isReg() && MO.isDef() && MO.getReg() &&
- !TRI->isVirtualRegister(MO.getReg())) {
- for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid();
- ++AI)
- Regs.set(*AI);
- } else if (MO.isRegMask()) {
- Regs.setBitsNotInMask(MO.getRegMask());
- }
- }
- }
- }
+ clobberRegisterUses(RegVars, I, HistMap, LiveEntries, ClobberingInstr);
}
void llvm::calculateDbgEntityHistory(const MachineFunction *MF,
const TargetRegisterInfo *TRI,
DbgValueHistoryMap &DbgValues,
DbgLabelInstrMap &DbgLabels) {
- BitVector ChangingRegs(TRI->getNumRegs());
- collectChangingRegs(MF, TRI, ChangingRegs);
-
const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
unsigned SP = TLI->getStackPointerRegisterToSaveRestore();
+ unsigned FrameReg = TRI->getFrameRegister(*MF);
RegDescribedVarsMap RegVars;
+ DbgValueEntriesMap LiveEntries;
for (const auto &MBB : *MF) {
for (const auto &MI : MBB) {
- if (!MI.isDebugInstr()) {
- // Not a DBG_VALUE instruction. It may clobber registers which describe
- // some variables.
- for (const MachineOperand &MO : MI.operands()) {
- if (MO.isReg() && MO.isDef() && MO.getReg()) {
- // Ignore call instructions that claim to clobber SP. The AArch64
- // backend does this for aggregate function arguments.
- if (MI.isCall() && MO.getReg() == SP)
- continue;
- // If this is a virtual register, only clobber it since it doesn't
- // have aliases.
- if (TRI->isVirtualRegister(MO.getReg()))
- clobberRegisterUses(RegVars, MO.getReg(), DbgValues, MI);
- // If this is a register def operand, it may end a debug value
- // range.
- else {
- for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid();
- ++AI)
- if (ChangingRegs.test(*AI))
- clobberRegisterUses(RegVars, *AI, DbgValues, MI);
- }
- } else if (MO.isRegMask()) {
- // If this is a register mask operand, clobber all debug values in
- // non-CSRs.
- for (unsigned I : ChangingRegs.set_bits()) {
- // Don't consider SP to be clobbered by register masks.
- if (unsigned(I) != SP && TRI->isPhysicalRegister(I) &&
- MO.clobbersPhysReg(I)) {
- clobberRegisterUses(RegVars, I, DbgValues, MI);
- }
- }
- }
- }
- continue;
- }
-
if (MI.isDebugValue()) {
assert(MI.getNumOperands() > 1 && "Invalid DBG_VALUE instruction!");
// Use the base variable (without any DW_OP_piece expressions)
@@ -250,13 +249,7 @@ void llvm::calculateDbgEntityHistory(const MachineFunction *MF,
"Expected inlined-at fields to agree");
InlinedEntity Var(RawVar, MI.getDebugLoc()->getInlinedAt());
- if (unsigned PrevReg = DbgValues.getRegisterForVar(Var))
- dropRegDescribedVar(RegVars, PrevReg, Var);
-
- DbgValues.startInstrRange(Var, MI);
-
- if (unsigned NewReg = isDescribedByReg(MI))
- addRegDescribedVar(RegVars, NewReg, Var);
+ handleNewDebugValue(Var, MI, RegVars, LiveEntries, DbgValues);
} else if (MI.isDebugLabel()) {
assert(MI.getNumOperands() == 1 && "Invalid DBG_LABEL instruction!");
const DILabel *RawLabel = MI.getDebugLabel();
@@ -268,18 +261,75 @@ void llvm::calculateDbgEntityHistory(const MachineFunction *MF,
InlinedEntity L(RawLabel, MI.getDebugLoc()->getInlinedAt());
DbgLabels.addInstr(L, MI);
}
- }
- // Make sure locations for register-described variables are valid only
- // until the end of the basic block (unless it's the last basic block, in
- // which case let their liveness run off to the end of the function).
+ if (MI.isDebugInstr())
+ continue;
+
+ // Not a DBG_VALUE instruction. It may clobber registers which describe
+ // some variables.
+ for (const MachineOperand &MO : MI.operands()) {
+ if (MO.isReg() && MO.isDef() && MO.getReg()) {
+ // Ignore call instructions that claim to clobber SP. The AArch64
+ // backend does this for aggregate function arguments.
+ if (MI.isCall() && MO.getReg() == SP)
+ continue;
+ // If this is a virtual register, only clobber it since it doesn't
+ // have aliases.
+ if (TRI->isVirtualRegister(MO.getReg()))
+ clobberRegisterUses(RegVars, MO.getReg(), DbgValues, LiveEntries,
+ MI);
+ // If this is a register def operand, it may end a debug value
+ // range. Ignore frame-register defs in the epilogue and prologue,
+ // we expect debuggers to understand that stack-locations are
+ // invalid outside of the function body.
+ else if (MO.getReg() != FrameReg ||
+ (!MI.getFlag(MachineInstr::FrameDestroy) &&
+ !MI.getFlag(MachineInstr::FrameSetup))) {
+ for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid();
+ ++AI)
+ clobberRegisterUses(RegVars, *AI, DbgValues, LiveEntries, MI);
+ }
+ } else if (MO.isRegMask()) {
+ // If this is a register mask operand, clobber all debug values in
+ // non-CSRs.
+ SmallVector<unsigned, 32> RegsToClobber;
+ // Don't consider SP to be clobbered by register masks.
+ for (auto It : RegVars) {
+ unsigned int Reg = It.first;
+ if (Reg != SP && TRI->isPhysicalRegister(Reg) &&
+ MO.clobbersPhysReg(Reg))
+ RegsToClobber.push_back(Reg);
+ }
+
+ for (unsigned Reg : RegsToClobber) {
+ clobberRegisterUses(RegVars, Reg, DbgValues, LiveEntries, MI);
+ }
+ }
+ } // End MO loop.
+ } // End instr loop.
+
+ // Make sure locations for all variables are valid only until the end of
+ // the basic block (unless it's the last basic block, in which case let
+ // their liveness run off to the end of the function).
if (!MBB.empty() && &MBB != &MF->back()) {
- for (auto I = RegVars.begin(), E = RegVars.end(); I != E;) {
- auto CurElem = I++; // CurElem can be erased below.
- if (TRI->isVirtualRegister(CurElem->first) ||
- ChangingRegs.test(CurElem->first))
- clobberRegisterUses(RegVars, CurElem, DbgValues, MBB.back());
+ // Iterate over all variables that have open debug values.
+ for (auto &Pair : LiveEntries) {
+ if (Pair.second.empty())
+ continue;
+
+ // Create a clobbering entry.
+ EntryIndex ClobIdx = DbgValues.startClobber(Pair.first, MBB.back());
+
+ // End all entries.
+ for (EntryIndex Idx : Pair.second) {
+ DbgValueHistoryMap::Entry &Ent = DbgValues.getEntry(Pair.first, Idx);
+ assert(Ent.isDbgValue() && !Ent.isClosed());
+ Ent.endEntry(ClobIdx);
+ }
}
+
+ LiveEntries.clear();
+ RegVars.clear();
}
}
}
@@ -289,7 +339,7 @@ LLVM_DUMP_METHOD void DbgValueHistoryMap::dump() const {
dbgs() << "DbgValueHistoryMap:\n";
for (const auto &VarRangePair : *this) {
const InlinedEntity &Var = VarRangePair.first;
- const InstrRanges &Ranges = VarRangePair.second;
+ const Entries &Entries = VarRangePair.second;
const DILocalVariable *LocalVar = cast<DILocalVariable>(Var.first);
const DILocation *Location = Var.second;
@@ -304,10 +354,20 @@ LLVM_DUMP_METHOD void DbgValueHistoryMap::dump() const {
dbgs() << " --\n";
- for (const InstrRange &Range : Ranges) {
- dbgs() << " Begin: " << *Range.first;
- if (Range.second)
- dbgs() << " End : " << *Range.second;
+ for (const auto &E : enumerate(Entries)) {
+ const auto &Entry = E.value();
+ dbgs() << " Entry[" << E.index() << "]: ";
+ if (Entry.isDbgValue())
+ dbgs() << "Debug value\n";
+ else
+ dbgs() << "Clobber\n";
+ dbgs() << " Instr: " << *Entry.getInstr();
+ if (Entry.isDbgValue()) {
+ if (Entry.getEndIndex() == NoEntry)
+ dbgs() << " - Valid until end of function\n";
+ else
+ dbgs() << " - Closed by Entry[" << Entry.getEndIndex() << "]\n";
+ }
dbgs() << "\n";
}
}
diff --git a/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
index 551cd36d1984..22f458e4b03e 100644
--- a/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
+++ b/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
@@ -1,9 +1,8 @@
//===-- llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp -------*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -141,10 +140,9 @@ DebugHandlerBase::getFunctionLocalOffsetAfterInsn(const MachineInstr *MI) {
}
/// If this type is derived from a base type then return base type size.
-uint64_t DebugHandlerBase::getBaseTypeSize(const DITypeRef TyRef) {
- DIType *Ty = TyRef.resolve();
+uint64_t DebugHandlerBase::getBaseTypeSize(const DIType *Ty) {
assert(Ty);
- DIDerivedType *DDTy = dyn_cast<DIDerivedType>(Ty);
+ const DIDerivedType *DDTy = dyn_cast<DIDerivedType>(Ty);
if (!DDTy)
return Ty->getSizeInBits();
@@ -155,7 +153,7 @@ uint64_t DebugHandlerBase::getBaseTypeSize(const DITypeRef TyRef) {
Tag != dwarf::DW_TAG_restrict_type && Tag != dwarf::DW_TAG_atomic_type)
return DDTy->getSizeInBits();
- DIType *BaseType = DDTy->getBaseType().resolve();
+ DIType *BaseType = DDTy->getBaseType();
if (!BaseType)
return 0;
@@ -212,36 +210,58 @@ void DebugHandlerBase::beginFunction(const MachineFunction *MF) {
// Request labels for the full history.
for (const auto &I : DbgValues) {
- const auto &Ranges = I.second;
- if (Ranges.empty())
+ const auto &Entries = I.second;
+ if (Entries.empty())
continue;
- // The first mention of a function argument gets the CurrentFnBegin
- // label, so arguments are visible when breaking at function entry.
- const DILocalVariable *DIVar = Ranges.front().first->getDebugVariable();
+ auto IsDescribedByReg = [](const MachineInstr *MI) {
+ return MI->getOperand(0).isReg() && MI->getOperand(0).getReg();
+ };
+
+ // The first mention of a function argument gets the CurrentFnBegin label,
+ // so arguments are visible when breaking at function entry.
+ //
+ // We do not change the label for values that are described by registers,
+ // as that could place them above their defining instructions. We should
+ // ideally not change the labels for constant debug values either, since
+ // doing that violates the ranges that are calculated in the history map.
+ // However, we currently do not emit debug values for constant arguments
+ // directly at the start of the function, so this code is still useful.
+ const DILocalVariable *DIVar =
+ Entries.front().getInstr()->getDebugVariable();
if (DIVar->isParameter() &&
getDISubprogram(DIVar->getScope())->describes(&MF->getFunction())) {
- LabelsBeforeInsn[Ranges.front().first] = Asm->getFunctionBegin();
- if (Ranges.front().first->getDebugExpression()->isFragment()) {
+ if (!IsDescribedByReg(Entries.front().getInstr()))
+ LabelsBeforeInsn[Entries.front().getInstr()] = Asm->getFunctionBegin();
+ if (Entries.front().getInstr()->getDebugExpression()->isFragment()) {
// Mark all non-overlapping initial fragments.
- for (auto I = Ranges.begin(); I != Ranges.end(); ++I) {
- const DIExpression *Fragment = I->first->getDebugExpression();
- if (std::all_of(Ranges.begin(), I,
- [&](DbgValueHistoryMap::InstrRange Pred) {
- return !Fragment->fragmentsOverlap(
- Pred.first->getDebugExpression());
+ for (auto I = Entries.begin(); I != Entries.end(); ++I) {
+ if (!I->isDbgValue())
+ continue;
+ const DIExpression *Fragment = I->getInstr()->getDebugExpression();
+ if (std::any_of(Entries.begin(), I,
+ [&](DbgValueHistoryMap::Entry Pred) {
+ return Pred.isDbgValue() &&
+ Fragment->fragmentsOverlap(
+ Pred.getInstr()->getDebugExpression());
}))
- LabelsBeforeInsn[I->first] = Asm->getFunctionBegin();
- else
break;
+ // The code that generates location lists for DWARF assumes that the
+ // entries' start labels are monotonically increasing, and since we
+ // don't change the label for fragments that are described by
+ // registers, we must bail out when encountering such a fragment.
+ if (IsDescribedByReg(I->getInstr()))
+ break;
+ LabelsBeforeInsn[I->getInstr()] = Asm->getFunctionBegin();
}
}
}
- for (const auto &Range : Ranges) {
- requestLabelBeforeInsn(Range.first);
- if (Range.second)
- requestLabelAfterInsn(Range.second);
+ for (const auto &Entry : Entries) {
+ if (Entry.isDbgValue())
+ requestLabelBeforeInsn(Entry.getInstr());
+ else
+ requestLabelAfterInsn(Entry.getInstr());
}
}
diff --git a/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/lib/CodeGen/AsmPrinter/DebugLocEntry.h
index befa4b941c8d..17e39b3d3268 100644
--- a/lib/CodeGen/AsmPrinter/DebugLocEntry.h
+++ b/lib/CodeGen/AsmPrinter/DebugLocEntry.h
@@ -1,9 +1,8 @@
//===-- llvm/CodeGen/DebugLocEntry.h - Entry in debug_loc list -*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -21,6 +20,73 @@
namespace llvm {
class AsmPrinter;
+/// A single location or constant.
+class DbgValueLoc {
+ /// Any complex address location expression for this DbgValueLoc.
+ const DIExpression *Expression;
+
+ /// Type of entry that this represents.
+ enum EntryType { E_Location, E_Integer, E_ConstantFP, E_ConstantInt };
+ enum EntryType EntryKind;
+
+ /// Either a constant,
+ union {
+ int64_t Int;
+ const ConstantFP *CFP;
+ const ConstantInt *CIP;
+ } Constant;
+
+ /// Or a location in the machine frame.
+ MachineLocation Loc;
+
+public:
+ DbgValueLoc(const DIExpression *Expr, int64_t i)
+ : Expression(Expr), EntryKind(E_Integer) {
+ Constant.Int = i;
+ }
+ DbgValueLoc(const DIExpression *Expr, const ConstantFP *CFP)
+ : Expression(Expr), EntryKind(E_ConstantFP) {
+ Constant.CFP = CFP;
+ }
+ DbgValueLoc(const DIExpression *Expr, const ConstantInt *CIP)
+ : Expression(Expr), EntryKind(E_ConstantInt) {
+ Constant.CIP = CIP;
+ }
+ DbgValueLoc(const DIExpression *Expr, MachineLocation Loc)
+ : Expression(Expr), EntryKind(E_Location), Loc(Loc) {
+ assert(cast<DIExpression>(Expr)->isValid());
+ }
+
+ bool isLocation() const { return EntryKind == E_Location; }
+ bool isInt() const { return EntryKind == E_Integer; }
+ bool isConstantFP() const { return EntryKind == E_ConstantFP; }
+ bool isConstantInt() const { return EntryKind == E_ConstantInt; }
+ int64_t getInt() const { return Constant.Int; }
+ const ConstantFP *getConstantFP() const { return Constant.CFP; }
+ const ConstantInt *getConstantInt() const { return Constant.CIP; }
+ MachineLocation getLoc() const { return Loc; }
+ bool isFragment() const { return getExpression()->isFragment(); }
+ bool isEntryVal() const { return getExpression()->isEntryValue(); }
+ const DIExpression *getExpression() const { return Expression; }
+ friend bool operator==(const DbgValueLoc &, const DbgValueLoc &);
+ friend bool operator<(const DbgValueLoc &, const DbgValueLoc &);
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ LLVM_DUMP_METHOD void dump() const {
+ if (isLocation()) {
+ llvm::dbgs() << "Loc = { reg=" << Loc.getReg() << " ";
+ if (Loc.isIndirect())
+ llvm::dbgs() << "+0";
+ llvm::dbgs() << "} ";
+ } else if (isConstantInt())
+ Constant.CIP->dump();
+ else if (isConstantFP())
+ Constant.CFP->dump();
+ if (Expression)
+ Expression->dump();
+ }
+#endif
+};
+
/// This struct describes location entries emitted in the .debug_loc
/// section.
class DebugLocEntry {
@@ -28,90 +94,20 @@ class DebugLocEntry {
const MCSymbol *Begin;
const MCSymbol *End;
-public:
- /// A single location or constant.
- struct Value {
- Value(const DIExpression *Expr, int64_t i)
- : Expression(Expr), EntryKind(E_Integer) {
- Constant.Int = i;
- }
- Value(const DIExpression *Expr, const ConstantFP *CFP)
- : Expression(Expr), EntryKind(E_ConstantFP) {
- Constant.CFP = CFP;
- }
- Value(const DIExpression *Expr, const ConstantInt *CIP)
- : Expression(Expr), EntryKind(E_ConstantInt) {
- Constant.CIP = CIP;
- }
- Value(const DIExpression *Expr, MachineLocation Loc)
- : Expression(Expr), EntryKind(E_Location), Loc(Loc) {
- assert(cast<DIExpression>(Expr)->isValid());
- }
-
- /// Any complex address location expression for this Value.
- const DIExpression *Expression;
-
- /// Type of entry that this represents.
- enum EntryType { E_Location, E_Integer, E_ConstantFP, E_ConstantInt };
- enum EntryType EntryKind;
-
- /// Either a constant,
- union {
- int64_t Int;
- const ConstantFP *CFP;
- const ConstantInt *CIP;
- } Constant;
-
- // Or a location in the machine frame.
- MachineLocation Loc;
-
- bool isLocation() const { return EntryKind == E_Location; }
- bool isInt() const { return EntryKind == E_Integer; }
- bool isConstantFP() const { return EntryKind == E_ConstantFP; }
- bool isConstantInt() const { return EntryKind == E_ConstantInt; }
- int64_t getInt() const { return Constant.Int; }
- const ConstantFP *getConstantFP() const { return Constant.CFP; }
- const ConstantInt *getConstantInt() const { return Constant.CIP; }
- MachineLocation getLoc() const { return Loc; }
- bool isFragment() const { return getExpression()->isFragment(); }
- const DIExpression *getExpression() const { return Expression; }
- friend bool operator==(const Value &, const Value &);
- friend bool operator<(const Value &, const Value &);
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- LLVM_DUMP_METHOD void dump() const {
- if (isLocation()) {
- llvm::dbgs() << "Loc = { reg=" << Loc.getReg() << " ";
- if (Loc.isIndirect())
- llvm::dbgs() << "+0";
- llvm::dbgs() << "} ";
- }
- else if (isConstantInt())
- Constant.CIP->dump();
- else if (isConstantFP())
- Constant.CFP->dump();
- if (Expression)
- Expression->dump();
- }
-#endif
- };
-
-private:
/// A nonempty list of locations/constants belonging to this entry,
/// sorted by offset.
- SmallVector<Value, 1> Values;
+ SmallVector<DbgValueLoc, 1> Values;
public:
- DebugLocEntry(const MCSymbol *B, const MCSymbol *E, Value Val)
- : Begin(B), End(E) {
- Values.push_back(std::move(Val));
+ /// Create a location list entry for the range [\p Begin, \p End).
+ ///
+ /// \param Vals One or more values describing (parts of) the variable.
+ DebugLocEntry(const MCSymbol *Begin, const MCSymbol *End,
+ ArrayRef<DbgValueLoc> Vals)
+ : Begin(Begin), End(End) {
+ addValues(Vals);
}
- /// If this and Next are describing different pieces of the same
- /// variable, merge them by appending Next's values to the current
- /// list of values.
- /// Return true if the merge was successful.
- bool MergeValues(const DebugLocEntry &Next);
-
/// Attempt to merge this DebugLocEntry with Next and return
/// true if the merge was successful. Entries can be merged if they
/// share the same Loc/Constant and if Next immediately follows this
@@ -127,35 +123,36 @@ public:
const MCSymbol *getBeginSym() const { return Begin; }
const MCSymbol *getEndSym() const { return End; }
- ArrayRef<Value> getValues() const { return Values; }
- void addValues(ArrayRef<DebugLocEntry::Value> Vals) {
+ ArrayRef<DbgValueLoc> getValues() const { return Values; }
+ void addValues(ArrayRef<DbgValueLoc> Vals) {
Values.append(Vals.begin(), Vals.end());
sortUniqueValues();
- assert(all_of(Values, [](DebugLocEntry::Value V) {
- return V.isFragment();
- }) && "value must be a piece");
+ assert((Values.size() == 1 || all_of(Values, [](DbgValueLoc V) {
+ return V.isFragment();
+ })) && "must either have a single value or multiple pieces");
}
// Sort the pieces by offset.
// Remove any duplicate entries by dropping all but the first.
void sortUniqueValues() {
llvm::sort(Values);
- Values.erase(
- std::unique(
- Values.begin(), Values.end(), [](const Value &A, const Value &B) {
- return A.getExpression() == B.getExpression();
- }),
- Values.end());
+ Values.erase(std::unique(Values.begin(), Values.end(),
+ [](const DbgValueLoc &A, const DbgValueLoc &B) {
+ return A.getExpression() == B.getExpression();
+ }),
+ Values.end());
}
/// Lower this entry into a DWARF expression.
- void finalize(const AsmPrinter &AP, DebugLocStream::ListBuilder &List,
- const DIBasicType *BT);
+ void finalize(const AsmPrinter &AP,
+ DebugLocStream::ListBuilder &List,
+ const DIBasicType *BT,
+ DwarfCompileUnit &TheCU);
};
-/// Compare two Values for equality.
-inline bool operator==(const DebugLocEntry::Value &A,
- const DebugLocEntry::Value &B) {
+/// Compare two DbgValueLocs for equality.
+inline bool operator==(const DbgValueLoc &A,
+ const DbgValueLoc &B) {
if (A.EntryKind != B.EntryKind)
return false;
@@ -163,21 +160,21 @@ inline bool operator==(const DebugLocEntry::Value &A,
return false;
switch (A.EntryKind) {
- case DebugLocEntry::Value::E_Location:
+ case DbgValueLoc::E_Location:
return A.Loc == B.Loc;
- case DebugLocEntry::Value::E_Integer:
+ case DbgValueLoc::E_Integer:
return A.Constant.Int == B.Constant.Int;
- case DebugLocEntry::Value::E_ConstantFP:
+ case DbgValueLoc::E_ConstantFP:
return A.Constant.CFP == B.Constant.CFP;
- case DebugLocEntry::Value::E_ConstantInt:
+ case DbgValueLoc::E_ConstantInt:
return A.Constant.CIP == B.Constant.CIP;
}
llvm_unreachable("unhandled EntryKind");
}
/// Compare two fragments based on their offset.
-inline bool operator<(const DebugLocEntry::Value &A,
- const DebugLocEntry::Value &B) {
+inline bool operator<(const DbgValueLoc &A,
+ const DbgValueLoc &B) {
return A.getExpression()->getFragmentInfo()->OffsetInBits <
B.getExpression()->getFragmentInfo()->OffsetInBits;
}
diff --git a/lib/CodeGen/AsmPrinter/DebugLocStream.cpp b/lib/CodeGen/AsmPrinter/DebugLocStream.cpp
index 7e8ed7104af3..f483d532ff07 100644
--- a/lib/CodeGen/AsmPrinter/DebugLocStream.cpp
+++ b/lib/CodeGen/AsmPrinter/DebugLocStream.cpp
@@ -1,9 +1,8 @@
//===- DebugLocStream.cpp - DWARF debug_loc stream --------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/AsmPrinter/DebugLocStream.h b/lib/CodeGen/AsmPrinter/DebugLocStream.h
index 8dcf5cbc1889..789291771b5a 100644
--- a/lib/CodeGen/AsmPrinter/DebugLocStream.h
+++ b/lib/CodeGen/AsmPrinter/DebugLocStream.h
@@ -1,9 +1,8 @@
//===--- lib/CodeGen/DebugLocStream.h - DWARF debug_loc stream --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
index 1990456cc555..207a7284dafa 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -1,9 +1,8 @@
//===-- CodeGen/AsmPrinter/DwarfException.cpp - Dwarf Exception Impl ------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index 1dca3f0fce5b..9548ad9918c1 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/DwarfCompileUnit.cpp - Dwarf Compile Units ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -18,6 +17,7 @@
#include "DwarfUnit.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/Dwarf.h"
@@ -104,7 +104,7 @@ unsigned DwarfCompileUnit::getOrCreateSourceID(const DIFile *File) {
// extend .file to support this.
unsigned CUID = Asm->OutStreamer->hasRawTextSupport() ? 0 : getUniqueID();
if (!File)
- return Asm->OutStreamer->EmitDwarfFileDirective(0, "", "", nullptr, None, CUID);
+ return Asm->OutStreamer->EmitDwarfFileDirective(0, "", "", None, None, CUID);
return Asm->OutStreamer->EmitDwarfFileDirective(
0, File->getDirectory(), File->getFilename(), getMD5AsBytes(File),
File->getSource(), CUID);
@@ -119,17 +119,19 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
assert(GV);
auto *GVContext = GV->getScope();
- auto *GTy = DD->resolve(GV->getType());
+ const DIType *GTy = GV->getType();
// Construct the context before querying for the existence of the DIE in
// case such construction creates the DIE.
- DIE *ContextDIE = getOrCreateContextDIE(GVContext);
+ auto *CB = GVContext ? dyn_cast<DICommonBlock>(GVContext) : nullptr;
+ DIE *ContextDIE = CB ? getOrCreateCommonBlock(CB, GlobalExprs)
+ : getOrCreateContextDIE(GVContext);
// Add to map.
DIE *VariableDIE = &createAndAddDIE(GV->getTag(), *ContextDIE, GV);
DIScope *DeclContext;
if (auto *SDMDecl = GV->getStaticDataMemberDeclaration()) {
- DeclContext = resolve(SDMDecl->getScope());
+ DeclContext = SDMDecl->getScope();
assert(SDMDecl->isStaticMember() && "Expected static member decl");
assert(GV->isDefinition());
// We need the declaration DIE that is in the static member's class.
@@ -137,7 +139,7 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
addDIEEntry(*VariableDIE, dwarf::DW_AT_specification, *VariableSpecDIE);
// If the global variable's type is different from the one in the class
// member type, assume that it's more specific and also emit it.
- if (GTy != DD->resolve(SDMDecl->getBaseType()))
+ if (GTy != SDMDecl->getBaseType())
addType(*VariableDIE, GTy);
} else {
DeclContext = GV->getScope();
@@ -166,8 +168,16 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
addTemplateParams(*VariableDIE, DINodeArray(TP));
// Add location.
+ addLocationAttribute(VariableDIE, GV, GlobalExprs);
+
+ return VariableDIE;
+}
+
+void DwarfCompileUnit::addLocationAttribute(
+ DIE *VariableDIE, const DIGlobalVariable *GV, ArrayRef<GlobalExpr> GlobalExprs) {
bool addToAccelTable = false;
DIELoc *Loc = nullptr;
+ Optional<unsigned> NVPTXAddressSpace;
std::unique_ptr<DIEDwarfExpression> DwarfExpr;
for (const auto &GE : GlobalExprs) {
const GlobalVariable *Global = GE.Var;
@@ -201,8 +211,24 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
DwarfExpr = llvm::make_unique<DIEDwarfExpression>(*Asm, *this, *Loc);
}
- if (Expr)
+ if (Expr) {
+ // According to
+ // https://docs.nvidia.com/cuda/archive/10.0/ptx-writers-guide-to-interoperability/index.html#cuda-specific-dwarf
+ // cuda-gdb requires DW_AT_address_class for all variables to be able to
+ // correctly interpret address space of the variable address.
+ // Decode DW_OP_constu <DWARF Address Space> DW_OP_swap DW_OP_xderef
+ // sequence for the NVPTX + gdb target.
+ unsigned LocalNVPTXAddressSpace;
+ if (Asm->TM.getTargetTriple().isNVPTX() && DD->tuneForGDB()) {
+ const DIExpression *NewExpr =
+ DIExpression::extractAddressClass(Expr, LocalNVPTXAddressSpace);
+ if (NewExpr != Expr) {
+ Expr = NewExpr;
+ NVPTXAddressSpace = LocalNVPTXAddressSpace;
+ }
+ }
DwarfExpr->addFragmentOffset(Expr);
+ }
if (Global) {
const MCSymbol *Sym = Asm->getSymbol(Global);
@@ -247,6 +273,15 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
DwarfExpr->setMemoryLocationKind();
DwarfExpr->addExpression(Expr);
}
+ if (Asm->TM.getTargetTriple().isNVPTX() && DD->tuneForGDB()) {
+ // According to
+ // https://docs.nvidia.com/cuda/archive/10.0/ptx-writers-guide-to-interoperability/index.html#cuda-specific-dwarf
+ // cuda-gdb requires DW_AT_address_class for all variables to be able to
+ // correctly interpret address space of the variable address.
+ const unsigned NVPTX_ADDR_global_space = 5;
+ addUInt(*VariableDIE, dwarf::DW_AT_address_class, dwarf::DW_FORM_data1,
+ NVPTXAddressSpace ? *NVPTXAddressSpace : NVPTX_ADDR_global_space);
+ }
if (Loc)
addBlock(*VariableDIE, dwarf::DW_AT_location, DwarfExpr->finalize());
@@ -262,8 +297,25 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
DD->useAllLinkageNames())
DD->addAccelName(*CUNode, GV->getLinkageName(), *VariableDIE);
}
+}
- return VariableDIE;
+DIE *DwarfCompileUnit::getOrCreateCommonBlock(
+ const DICommonBlock *CB, ArrayRef<GlobalExpr> GlobalExprs) {
+ // Construct the context before querying for the existence of the DIE in case
+ // such construction creates the DIE.
+ DIE *ContextDIE = getOrCreateContextDIE(CB->getScope());
+
+ if (DIE *NDie = getDIE(CB))
+ return NDie;
+ DIE &NDie = createAndAddDIE(dwarf::DW_TAG_common_block, *ContextDIE, CB);
+ StringRef Name = CB->getName().empty() ? "_BLNK_" : CB->getName();
+ addString(NDie, dwarf::DW_AT_name, Name);
+ addGlobalName(Name, NDie, CB->getScope());
+ if (CB->getFile())
+ addSourceLine(NDie, CB->getLineNo(), CB->getFile());
+ if (DIGlobalVariable *V = CB->getDecl())
+ getCU().addLocationAttribute(&NDie, V, GlobalExprs);
+ return &NDie;
}
void DwarfCompileUnit::addRange(RangeSpan Range) {
@@ -491,6 +543,8 @@ DIE *DwarfCompileUnit::constructInlinedScopeDIE(LexicalScope *Scope) {
addUInt(*ScopeDIE, dwarf::DW_AT_call_file, None,
getOrCreateSourceID(IA->getFile()));
addUInt(*ScopeDIE, dwarf::DW_AT_call_line, None, IA->getLine());
+ if (IA->getColumn())
+ addUInt(*ScopeDIE, dwarf::DW_AT_call_column, None, IA->getColumn());
if (IA->getDiscriminator() && DD->getDwarfVersion() >= 4)
addUInt(*ScopeDIE, dwarf::DW_AT_GNU_discriminator, None,
IA->getDiscriminator());
@@ -555,36 +609,27 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
return VariableDie;
}
- // Check if variable is described by a DBG_VALUE instruction.
- if (const MachineInstr *DVInsn = DV.getMInsn()) {
- assert(DVInsn->getNumOperands() == 4);
- if (DVInsn->getOperand(0).isReg()) {
- auto RegOp = DVInsn->getOperand(0);
- auto Op1 = DVInsn->getOperand(1);
- // If the second operand is an immediate, this is an indirect value.
- assert((!Op1.isImm() || (Op1.getImm() == 0)) && "unexpected offset");
- MachineLocation Location(RegOp.getReg(), Op1.isImm());
- addVariableAddress(DV, *VariableDie, Location);
- } else if (DVInsn->getOperand(0).isImm()) {
- // This variable is described by a single constant.
- // Check whether it has a DIExpression.
+ // Check if variable has a single location description.
+ if (auto *DVal = DV.getValueLoc()) {
+ if (DVal->isLocation())
+ addVariableAddress(DV, *VariableDie, DVal->getLoc());
+ else if (DVal->isInt()) {
auto *Expr = DV.getSingleExpression();
if (Expr && Expr->getNumElements()) {
DIELoc *Loc = new (DIEValueAllocator) DIELoc;
DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
// If there is an expression, emit raw unsigned bytes.
DwarfExpr.addFragmentOffset(Expr);
- DwarfExpr.addUnsignedConstant(DVInsn->getOperand(0).getImm());
+ DwarfExpr.addUnsignedConstant(DVal->getInt());
DwarfExpr.addExpression(Expr);
addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize());
} else
- addConstantValue(*VariableDie, DVInsn->getOperand(0), DV.getType());
- } else if (DVInsn->getOperand(0).isFPImm())
- addConstantFPValue(*VariableDie, DVInsn->getOperand(0));
- else if (DVInsn->getOperand(0).isCImm())
- addConstantValue(*VariableDie, DVInsn->getOperand(0).getCImm(),
- DV.getType());
-
+ addConstantValue(*VariableDie, DVal->getInt(), DV.getType());
+ } else if (DVal->isConstantFP()) {
+ addConstantFPValue(*VariableDie, DVal->getConstantFP());
+ } else if (DVal->isConstantInt()) {
+ addConstantValue(*VariableDie, DVal->getConstantInt(), DV.getType());
+ }
return VariableDie;
}
@@ -592,6 +637,7 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
if (!DV.hasFrameIndexExprs())
return VariableDie;
+ Optional<unsigned> NVPTXAddressSpace;
DIELoc *Loc = new (DIEValueAllocator) DIELoc;
DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
for (auto &Fragment : DV.getFrameIndexExprs()) {
@@ -603,7 +649,23 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
SmallVector<uint64_t, 8> Ops;
Ops.push_back(dwarf::DW_OP_plus_uconst);
Ops.push_back(Offset);
- Ops.append(Expr->elements_begin(), Expr->elements_end());
+ // According to
+ // https://docs.nvidia.com/cuda/archive/10.0/ptx-writers-guide-to-interoperability/index.html#cuda-specific-dwarf
+ // cuda-gdb requires DW_AT_address_class for all variables to be able to
+ // correctly interpret address space of the variable address.
+ // Decode DW_OP_constu <DWARF Address Space> DW_OP_swap DW_OP_xderef
+ // sequence for the NVPTX + gdb target.
+ unsigned LocalNVPTXAddressSpace;
+ if (Asm->TM.getTargetTriple().isNVPTX() && DD->tuneForGDB()) {
+ const DIExpression *NewExpr =
+ DIExpression::extractAddressClass(Expr, LocalNVPTXAddressSpace);
+ if (NewExpr != Expr) {
+ Expr = NewExpr;
+ NVPTXAddressSpace = LocalNVPTXAddressSpace;
+ }
+ }
+ if (Expr)
+ Ops.append(Expr->elements_begin(), Expr->elements_end());
DIExpressionCursor Cursor(Ops);
DwarfExpr.setMemoryLocationKind();
if (const MCSymbol *FrameSymbol = Asm->getFunctionFrameSymbol())
@@ -613,7 +675,19 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
*Asm->MF->getSubtarget().getRegisterInfo(), Cursor, FrameReg);
DwarfExpr.addExpression(std::move(Cursor));
}
+ if (Asm->TM.getTargetTriple().isNVPTX() && DD->tuneForGDB()) {
+ // According to
+ // https://docs.nvidia.com/cuda/archive/10.0/ptx-writers-guide-to-interoperability/index.html#cuda-specific-dwarf
+ // cuda-gdb requires DW_AT_address_class for all variables to be able to
+ // correctly interpret address space of the variable address.
+ const unsigned NVPTX_ADDR_local_space = 6;
+ addUInt(*VariableDie, dwarf::DW_AT_address_class, dwarf::DW_FORM_data1,
+ NVPTXAddressSpace ? *NVPTXAddressSpace : NVPTX_ADDR_local_space);
+ }
addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize());
+ if (DwarfExpr.TagOffset)
+ addUInt(*VariableDie, dwarf::DW_AT_LLVM_tag_offset, dwarf::DW_FORM_data1,
+ *DwarfExpr.TagOffset);
return VariableDie;
}
@@ -800,7 +874,7 @@ void DwarfCompileUnit::constructAbstractSubprogramScopeDIE(
ContextDIE = &getUnitDie();
getOrCreateSubprogramDIE(SPDecl);
} else {
- ContextDIE = getOrCreateContextDIE(resolve(SP->getScope()));
+ ContextDIE = getOrCreateContextDIE(SP->getScope());
// The scope may be shared with a subprogram that has already been
// constructed in another CU, in which case we need to construct this
// subprogram in the same CU.
@@ -849,7 +923,7 @@ DIE *DwarfCompileUnit::constructImportedEntityDIE(
DIE *IMDie = DIE::get(DIEValueAllocator, (dwarf::Tag)Module->getTag());
insertDIE(Module, IMDie);
DIE *EntityDie;
- auto *Entity = resolve(Module->getEntity());
+ auto *Entity = Module->getEntity();
if (auto *NS = dyn_cast<DINamespace>(Entity))
EntityDie = getOrCreateNameSpace(NS);
else if (auto *M = dyn_cast<DIModule>(Entity))
@@ -958,7 +1032,9 @@ bool DwarfCompileUnit::hasDwarfPubSections() const {
return true;
case DICompileUnit::DebugNameTableKind::Default:
return DD->tuneForGDB() && !includeMinimalInlineScopes() &&
- !CUNode->isDebugDirectivesOnly();
+ !CUNode->isDebugDirectivesOnly() &&
+ DD->getAccelTableKind() != AccelTableKind::Apple &&
+ DD->getDwarfVersion() < 5;
}
llvm_unreachable("Unhandled DICompileUnit::DebugNameTableKind enum");
}
@@ -1054,6 +1130,12 @@ void DwarfCompileUnit::addComplexAddress(const DbgVariable &DV, DIE &Die,
DwarfExpr.setMemoryLocationKind();
DIExpressionCursor Cursor(DIExpr);
+
+ if (DIExpr->isEntryValue()) {
+ DwarfExpr.setEntryValueFlag();
+ DwarfExpr.addEntryValueExpression(Cursor);
+ }
+
const TargetRegisterInfo &TRI = *Asm->MF->getSubtarget().getRegisterInfo();
if (!DwarfExpr.addMachineRegExpression(TRI, Cursor, Location.getReg()))
return;
@@ -1112,7 +1194,7 @@ void DwarfCompileUnit::addAddressExpr(DIE &Die, dwarf::Attribute Attribute,
void DwarfCompileUnit::applySubprogramAttributesToDefinition(
const DISubprogram *SP, DIE &SPDie) {
auto *SPDecl = SP->getDeclaration();
- auto *Context = resolve(SPDecl ? SPDecl->getScope() : SP->getScope());
+ auto *Context = SPDecl ? SPDecl->getScope() : SP->getScope();
applySubprogramAttributes(SP, SPDie, includeMinimalInlineScopes());
addGlobalName(SP->getName(), SPDie, Context);
}
@@ -1121,6 +1203,10 @@ bool DwarfCompileUnit::isDwoUnit() const {
return DD->useSplitDwarf() && Skeleton;
}
+void DwarfCompileUnit::finishNonUnitTypeDIE(DIE& D, const DICompositeType *CTy) {
+ constructTypeDIE(D, CTy);
+}
+
bool DwarfCompileUnit::includeMinimalInlineScopes() const {
return getCUNode()->getEmissionKind() == DICompileUnit::LineTablesOnly ||
(DD->useSplitDwarf() && !Skeleton);
@@ -1134,3 +1220,27 @@ void DwarfCompileUnit::addAddrTableBase() {
: dwarf::DW_AT_GNU_addr_base,
Label, TLOF.getDwarfAddrSection()->getBeginSymbol());
}
+
+void DwarfCompileUnit::addBaseTypeRef(DIEValueList &Die, int64_t Idx) {
+ Die.addValue(DIEValueAllocator, (dwarf::Attribute)0, dwarf::DW_FORM_udata,
+ new (DIEValueAllocator) DIEBaseTypeRef(this, Idx));
+}
+
+void DwarfCompileUnit::createBaseTypeDIEs() {
+ // Insert the base_type DIEs directly after the CU so that their offsets will
+ // fit in the fixed size ULEB128 used inside the location expressions.
+ // Maintain order by iterating backwards and inserting to the front of CU
+ // child list.
+ for (auto &Btr : reverse(ExprRefedBaseTypes)) {
+ DIE &Die = getUnitDie().addChildFront(
+ DIE::get(DIEValueAllocator, dwarf::DW_TAG_base_type));
+ SmallString<32> Str;
+ addString(Die, dwarf::DW_AT_name,
+ Twine(dwarf::AttributeEncodingString(Btr.Encoding) +
+ "_" + Twine(Btr.BitSize)).toStringRef(Str));
+ addUInt(Die, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, Btr.Encoding);
+ addUInt(Die, dwarf::DW_AT_byte_size, None, Btr.BitSize / 8);
+
+ Btr.Die = &Die;
+ }
+}
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index 9ec22f68c12f..ea980dfda17e 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/DwarfCompileUnit.h - Dwarf Compile Unit -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -101,6 +100,8 @@ class DwarfCompileUnit final : public DwarfUnit {
return DU->getAbstractEntities();
}
+ void finishNonUnitTypeDIE(DIE& D, const DICompositeType *CTy) override;
+
public:
DwarfCompileUnit(unsigned UID, const DICompileUnit *Node, AsmPrinter *A,
DwarfDebug *DW, DwarfFile *DWU);
@@ -125,11 +126,27 @@ public:
const DIExpression *Expr;
};
+ struct BaseTypeRef {
+ BaseTypeRef(unsigned BitSize, dwarf::TypeKind Encoding) :
+ BitSize(BitSize), Encoding(Encoding) {}
+ unsigned BitSize;
+ dwarf::TypeKind Encoding;
+ DIE *Die = nullptr;
+ };
+
+ std::vector<BaseTypeRef> ExprRefedBaseTypes;
+
/// Get or create global variable DIE.
DIE *
getOrCreateGlobalVariableDIE(const DIGlobalVariable *GV,
ArrayRef<GlobalExpr> GlobalExprs);
+ DIE *getOrCreateCommonBlock(const DICommonBlock *CB,
+ ArrayRef<GlobalExpr> GlobalExprs);
+
+ void addLocationAttribute(DIE *ToDIE, const DIGlobalVariable *GV,
+ ArrayRef<GlobalExpr> GlobalExprs);
+
/// addLabelAddress - Add a dwarf label attribute data and value using
/// either DW_FORM_addr or DW_FORM_GNU_addr_index.
void addLabelAddress(DIE &Die, dwarf::Attribute Attribute,
@@ -200,6 +217,8 @@ public:
SmallVectorImpl<DIE *> &Children,
bool *HasNonScopeChildren = nullptr);
+ void createBaseTypeDIEs();
+
/// Construct a DIE for this subprogram scope.
DIE &constructSubprogramScopeDIE(const DISubprogram *Sub,
LexicalScope *Scope);
@@ -314,6 +333,8 @@ public:
void setDWOId(uint64_t DwoId) { DWOId = DwoId; }
bool hasDwarfPubSections() const;
+
+ void addBaseTypeRef(DIEValueList &Die, int64_t Idx);
};
} // end namespace llvm
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 1de2ffb6cfa1..71bb2b0858cc 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/DwarfDebug.cpp - Dwarf Debug Framework ----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -42,6 +41,8 @@
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/DebugInfo/DWARF/DWARFExpression.h"
+#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
@@ -162,6 +163,7 @@ static const char *const DWARFGroupName = "dwarf";
static const char *const DWARFGroupDescription = "DWARF Emission";
static const char *const DbgTimerName = "writer";
static const char *const DbgTimerDescription = "DWARF Debug Writer";
+static constexpr unsigned ULEB128PadSize = 4;
void DebugLocDwarfExpression::emitOp(uint8_t Op, const char *Comment) {
BS.EmitInt8(
@@ -177,6 +179,15 @@ void DebugLocDwarfExpression::emitUnsigned(uint64_t Value) {
BS.EmitULEB128(Value, Twine(Value));
}
+void DebugLocDwarfExpression::emitData1(uint8_t Value) {
+ BS.EmitInt8(Value, Twine(Value));
+}
+
+void DebugLocDwarfExpression::emitBaseTypeRef(uint64_t Idx) {
+ assert(Idx < (1ULL << (ULEB128PadSize * 7)) && "Idx wont fit");
+ BS.EmitULEB128(Idx, Twine(Idx), ULEB128PadSize);
+}
+
bool DebugLocDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI,
unsigned MachineReg) {
// This information is not available while emitting .debug_loc entries.
@@ -185,11 +196,11 @@ bool DebugLocDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI,
bool DbgVariable::isBlockByrefVariable() const {
assert(getVariable() && "Invalid complex DbgVariable!");
- return getVariable()->getType().resolve()->isBlockByrefStruct();
+ return getVariable()->getType()->isBlockByrefStruct();
}
const DIType *DbgVariable::getType() const {
- DIType *Ty = getVariable()->getType().resolve();
+ DIType *Ty = getVariable()->getType();
// FIXME: isBlockByrefVariable should be reformulated in terms of complex
// addresses instead.
if (Ty->isBlockByrefStruct()) {
@@ -221,18 +232,55 @@ const DIType *DbgVariable::getType() const {
uint16_t tag = Ty->getTag();
if (tag == dwarf::DW_TAG_pointer_type)
- subType = resolve(cast<DIDerivedType>(Ty)->getBaseType());
+ subType = cast<DIDerivedType>(Ty)->getBaseType();
auto Elements = cast<DICompositeType>(subType)->getElements();
for (unsigned i = 0, N = Elements.size(); i < N; ++i) {
auto *DT = cast<DIDerivedType>(Elements[i]);
if (getName() == DT->getName())
- return resolve(DT->getBaseType());
+ return DT->getBaseType();
}
}
return Ty;
}
+/// Get .debug_loc entry for the instruction range starting at MI.
+static DbgValueLoc getDebugLocValue(const MachineInstr *MI) {
+ const DIExpression *Expr = MI->getDebugExpression();
+ assert(MI->getNumOperands() == 4);
+ if (MI->getOperand(0).isReg()) {
+ auto RegOp = MI->getOperand(0);
+ auto Op1 = MI->getOperand(1);
+ // If the second operand is an immediate, this is a
+ // register-indirect address.
+ assert((!Op1.isImm() || (Op1.getImm() == 0)) && "unexpected offset");
+ MachineLocation MLoc(RegOp.getReg(), Op1.isImm());
+ return DbgValueLoc(Expr, MLoc);
+ }
+ if (MI->getOperand(0).isImm())
+ return DbgValueLoc(Expr, MI->getOperand(0).getImm());
+ if (MI->getOperand(0).isFPImm())
+ return DbgValueLoc(Expr, MI->getOperand(0).getFPImm());
+ if (MI->getOperand(0).isCImm())
+ return DbgValueLoc(Expr, MI->getOperand(0).getCImm());
+
+ llvm_unreachable("Unexpected 4-operand DBG_VALUE instruction!");
+}
+
+void DbgVariable::initializeDbgValue(const MachineInstr *DbgValue) {
+ assert(FrameIndexExprs.empty() && "Already initialized?");
+ assert(!ValueLoc.get() && "Already initialized?");
+
+ assert(getVariable() == DbgValue->getDebugVariable() && "Wrong variable");
+ assert(getInlinedAt() == DbgValue->getDebugLoc()->getInlinedAt() &&
+ "Wrong inlined-at");
+
+ ValueLoc = llvm::make_unique<DbgValueLoc>(getDebugLocValue(DbgValue));
+ if (auto *E = DbgValue->getDebugExpression())
+ if (E->getNumElements())
+ FrameIndexExprs.push_back({0, E});
+}
+
ArrayRef<DbgVariable::FrameIndexExpr> DbgVariable::getFrameIndexExprs() const {
if (FrameIndexExprs.size() == 1)
return FrameIndexExprs;
@@ -252,8 +300,8 @@ ArrayRef<DbgVariable::FrameIndexExpr> DbgVariable::getFrameIndexExprs() const {
}
void DbgVariable::addMMIEntry(const DbgVariable &V) {
- assert(DebugLocListIndex == ~0U && !MInsn && "not an MMI entry");
- assert(V.DebugLocListIndex == ~0U && !V.MInsn && "not an MMI entry");
+ assert(DebugLocListIndex == ~0U && !ValueLoc.get() && "not an MMI entry");
+ assert(V.DebugLocListIndex == ~0U && !V.ValueLoc.get() && "not an MMI entry");
assert(V.getVariable() == getVariable() && "conflicting variable");
assert(V.getInlinedAt() == getInlinedAt() && "conflicting inlined-at location");
@@ -315,7 +363,7 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
IsDarwin(A->TM.getTargetTriple().isOSDarwin()) {
const Triple &TT = Asm->TM.getTargetTriple();
- // Make sure we know our "debugger tuning." The target option takes
+ // Make sure we know our "debugger tuning". The target option takes
// precedence; fall back to triple-based defaults.
if (Asm->TM.Options.DebuggerTuning != DebuggerKind::Default)
DebuggerTuning = Asm->TM.Options.DebuggerTuning;
@@ -658,6 +706,11 @@ DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) {
NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoSection());
}
+ // Create DIEs for function declarations used for call site debug info.
+ for (auto Scope : DIUnit->getRetainedTypes())
+ if (auto *SP = dyn_cast_or_null<DISubprogram>(Scope))
+ NewCU.getOrCreateSubprogramDIE(SP);
+
CUMap.insert({DIUnit, &NewCU});
CUDieMap.insert({&NewCU.getUnitDie(), &NewCU});
return NewCU;
@@ -890,13 +943,6 @@ void DwarfDebug::finalizeModuleInfo() {
// ranges for all subprogram DIEs for mach-o.
DwarfCompileUnit &U = SkCU ? *SkCU : TheCU;
- // We don't keep track of which addresses are used in which CU so this
- // is a bit pessimistic under LTO.
- if (!AddrPool.isEmpty() &&
- (getDwarfVersion() >= 5 ||
- (SkCU && !empty(TheCU.getUnitDie().children()))))
- U.addAddrTableBase();
-
if (unsigned NumRanges = TheCU.getRanges().size()) {
if (NumRanges > 1 && useRangesSection())
// A DW_AT_low_pc attribute may also be specified in combination with
@@ -909,6 +955,13 @@ void DwarfDebug::finalizeModuleInfo() {
U.attachRangesOrLowHighPC(U.getUnitDie(), TheCU.takeRanges());
}
+ // We don't keep track of which addresses are used in which CU so this
+ // is a bit pessimistic under LTO.
+ if (!AddrPool.isEmpty() &&
+ (getDwarfVersion() >= 5 ||
+ (SkCU && !empty(TheCU.getUnitDie().children()))))
+ U.addAddrTableBase();
+
if (getDwarfVersion() >= 5) {
if (U.hasRangeLists())
U.addRnglistsBase();
@@ -941,6 +994,11 @@ void DwarfDebug::endModule() {
assert(CurFn == nullptr);
assert(CurMI == nullptr);
+ for (const auto &P : CUMap) {
+ auto &CU = *P.second;
+ CU.createBaseTypeDIEs();
+ }
+
// If we aren't actually generating debug info (check beginModule -
// conditionalized on !DisableDebugInfoPrinting and the presence of the
// llvm.dbg.cu metadata node)
@@ -1059,161 +1117,177 @@ void DwarfDebug::collectVariableInfoFromMFTable(
}
}
-// Get .debug_loc entry for the instruction range starting at MI.
-static DebugLocEntry::Value getDebugLocValue(const MachineInstr *MI) {
- const DIExpression *Expr = MI->getDebugExpression();
- assert(MI->getNumOperands() == 4);
- if (MI->getOperand(0).isReg()) {
- auto RegOp = MI->getOperand(0);
- auto Op1 = MI->getOperand(1);
- // If the second operand is an immediate, this is a
- // register-indirect address.
- assert((!Op1.isImm() || (Op1.getImm() == 0)) && "unexpected offset");
- MachineLocation MLoc(RegOp.getReg(), Op1.isImm());
- return DebugLocEntry::Value(Expr, MLoc);
- }
- if (MI->getOperand(0).isImm())
- return DebugLocEntry::Value(Expr, MI->getOperand(0).getImm());
- if (MI->getOperand(0).isFPImm())
- return DebugLocEntry::Value(Expr, MI->getOperand(0).getFPImm());
- if (MI->getOperand(0).isCImm())
- return DebugLocEntry::Value(Expr, MI->getOperand(0).getCImm());
-
- llvm_unreachable("Unexpected 4-operand DBG_VALUE instruction!");
-}
+/// Determine whether a *singular* DBG_VALUE is valid for the entirety of its
+/// enclosing lexical scope. The check ensures there are no other instructions
+/// in the same lexical scope preceding the DBG_VALUE and that its range is
+/// either open or otherwise rolls off the end of the scope.
+static bool validThroughout(LexicalScopes &LScopes,
+ const MachineInstr *DbgValue,
+ const MachineInstr *RangeEnd) {
+ assert(DbgValue->getDebugLoc() && "DBG_VALUE without a debug location");
+ auto MBB = DbgValue->getParent();
+ auto DL = DbgValue->getDebugLoc();
+ auto *LScope = LScopes.findLexicalScope(DL);
+ // Scope doesn't exist; this is a dead DBG_VALUE.
+ if (!LScope)
+ return false;
+ auto &LSRange = LScope->getRanges();
+ if (LSRange.size() == 0)
+ return false;
-/// If this and Next are describing different fragments of the same
-/// variable, merge them by appending Next's values to the current
-/// list of values.
-/// Return true if the merge was successful.
-bool DebugLocEntry::MergeValues(const DebugLocEntry &Next) {
- if (Begin == Next.Begin) {
- auto *FirstExpr = cast<DIExpression>(Values[0].Expression);
- auto *FirstNextExpr = cast<DIExpression>(Next.Values[0].Expression);
- if (!FirstExpr->isFragment() || !FirstNextExpr->isFragment())
+ // Determine if the DBG_VALUE is valid at the beginning of its lexical block.
+ const MachineInstr *LScopeBegin = LSRange.front().first;
+ // Early exit if the lexical scope begins outside of the current block.
+ if (LScopeBegin->getParent() != MBB)
+ return false;
+ MachineBasicBlock::const_reverse_iterator Pred(DbgValue);
+ for (++Pred; Pred != MBB->rend(); ++Pred) {
+ if (Pred->getFlag(MachineInstr::FrameSetup))
+ break;
+ auto PredDL = Pred->getDebugLoc();
+ if (!PredDL || Pred->isMetaInstruction())
+ continue;
+ // Check whether the instruction preceding the DBG_VALUE is in the same
+ // (sub)scope as the DBG_VALUE.
+ if (DL->getScope() == PredDL->getScope())
+ return false;
+ auto *PredScope = LScopes.findLexicalScope(PredDL);
+ if (!PredScope || LScope->dominates(PredScope))
return false;
+ }
- // We can only merge entries if none of the fragments overlap any others.
- // In doing so, we can take advantage of the fact that both lists are
- // sorted.
- for (unsigned i = 0, j = 0; i < Values.size(); ++i) {
- for (; j < Next.Values.size(); ++j) {
- int res = cast<DIExpression>(Values[i].Expression)->fragmentCmp(
- cast<DIExpression>(Next.Values[j].Expression));
- if (res == 0) // The two expressions overlap, we can't merge.
- return false;
- // Values[i] is entirely before Next.Values[j],
- // so go back to the next entry of Values.
- else if (res == -1)
- break;
- // Next.Values[j] is entirely before Values[i], so go on to the
- // next entry of Next.Values.
- }
- }
+ // If the range of the DBG_VALUE is open-ended, report success.
+ if (!RangeEnd)
+ return true;
- addValues(Next.Values);
- End = Next.End;
+ // Fail if there are instructions belonging to our scope in another block.
+ const MachineInstr *LScopeEnd = LSRange.back().second;
+ if (LScopeEnd->getParent() != MBB)
+ return false;
+
+ // Single, constant DBG_VALUEs in the prologue are promoted to be live
+ // throughout the function. This is a hack, presumably for DWARF v2 and not
+ // necessarily correct. It would be much better to use a dbg.declare instead
+ // if we know the constant is live throughout the scope.
+ if (DbgValue->getOperand(0).isImm() && MBB->pred_empty())
return true;
- }
+
return false;
}
/// Build the location list for all DBG_VALUEs in the function that
-/// describe the same variable. If the ranges of several independent
-/// fragments of the same variable overlap partially, split them up and
-/// combine the ranges. The resulting DebugLocEntries are will have
+/// describe the same variable. The resulting DebugLocEntries will have
/// strict monotonically increasing begin addresses and will never
-/// overlap.
+/// overlap. If the resulting list has only one entry that is valid
+/// throughout variable's scope return true.
+//
+// See the definition of DbgValueHistoryMap::Entry for an explanation of the
+// different kinds of history map entries. One thing to be aware of is that if
+// a debug value is ended by another entry (rather than being valid until the
+// end of the function), that entry's instruction may or may not be included in
+// the range, depending on if the entry is a clobbering entry (it has an
+// instruction that clobbers one or more preceding locations), or if it is an
+// (overlapping) debug value entry. This distinction can be seen in the example
+// below. The first debug value is ended by the clobbering entry 2, and the
+// second and third debug values are ended by the overlapping debug value entry
+// 4.
//
// Input:
//
-// Ranges History [var, loc, fragment ofs size]
-// 0 | [x, (reg0, fragment 0, 32)]
-// 1 | | [x, (reg1, fragment 32, 32)] <- IsFragmentOfPrevEntry
-// 2 | | ...
-// 3 | [clobber reg0]
-// 4 [x, (mem, fragment 0, 64)] <- overlapping with both previous fragments of
-// x.
+// History map entries [type, end index, mi]
//
-// Output:
+// 0 | [DbgValue, 2, DBG_VALUE $reg0, [...] (fragment 0, 32)]
+// 1 | | [DbgValue, 4, DBG_VALUE $reg1, [...] (fragment 32, 32)]
+// 2 | | [Clobber, $reg0 = [...], -, -]
+// 3 | | [DbgValue, 4, DBG_VALUE 123, [...] (fragment 64, 32)]
+// 4 [DbgValue, ~0, DBG_VALUE @g, [...] (fragment 0, 96)]
//
-// [0-1] [x, (reg0, fragment 0, 32)]
-// [1-3] [x, (reg0, fragment 0, 32), (reg1, fragment 32, 32)]
-// [3-4] [x, (reg1, fragment 32, 32)]
-// [4- ] [x, (mem, fragment 0, 64)]
-void
-DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
- const DbgValueHistoryMap::InstrRanges &Ranges) {
- SmallVector<DebugLocEntry::Value, 4> OpenRanges;
-
- for (auto I = Ranges.begin(), E = Ranges.end(); I != E; ++I) {
- const MachineInstr *Begin = I->first;
- const MachineInstr *End = I->second;
- assert(Begin->isDebugValue() && "Invalid History entry");
-
- // Check if a variable is inaccessible in this range.
- if (Begin->getNumOperands() > 1 &&
- Begin->getOperand(0).isReg() && !Begin->getOperand(0).getReg()) {
- OpenRanges.clear();
- continue;
- }
-
- // If this fragment overlaps with any open ranges, truncate them.
- const DIExpression *DIExpr = Begin->getDebugExpression();
- auto Last = remove_if(OpenRanges, [&](DebugLocEntry::Value R) {
- return DIExpr->fragmentsOverlap(R.getExpression());
- });
+// Output [start, end) [Value...]:
+//
+// [0-1) [(reg0, fragment 0, 32)]
+// [1-3) [(reg0, fragment 0, 32), (reg1, fragment 32, 32)]
+// [3-4) [(reg1, fragment 32, 32), (123, fragment 64, 32)]
+// [4-) [(@g, fragment 0, 96)]
+bool DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
+ const DbgValueHistoryMap::Entries &Entries) {
+ using OpenRange =
+ std::pair<DbgValueHistoryMap::EntryIndex, DbgValueLoc>;
+ SmallVector<OpenRange, 4> OpenRanges;
+ bool isSafeForSingleLocation = true;
+ const MachineInstr *StartDebugMI = nullptr;
+ const MachineInstr *EndMI = nullptr;
+
+ for (auto EB = Entries.begin(), EI = EB, EE = Entries.end(); EI != EE; ++EI) {
+ const MachineInstr *Instr = EI->getInstr();
+
+ // Remove all values that are no longer live.
+ size_t Index = std::distance(EB, EI);
+ auto Last =
+ remove_if(OpenRanges, [&](OpenRange &R) { return R.first <= Index; });
OpenRanges.erase(Last, OpenRanges.end());
- const MCSymbol *StartLabel = getLabelBeforeInsn(Begin);
- assert(StartLabel && "Forgot label before DBG_VALUE starting a range!");
+ // If we are dealing with a clobbering entry, this iteration will result in
+ // a location list entry starting after the clobbering instruction.
+ const MCSymbol *StartLabel =
+ EI->isClobber() ? getLabelAfterInsn(Instr) : getLabelBeforeInsn(Instr);
+ assert(StartLabel &&
+ "Forgot label before/after instruction starting a range!");
const MCSymbol *EndLabel;
- if (End != nullptr)
- EndLabel = getLabelAfterInsn(End);
- else if (std::next(I) == Ranges.end())
+ if (std::next(EI) == Entries.end()) {
EndLabel = Asm->getFunctionEnd();
+ if (EI->isClobber())
+ EndMI = EI->getInstr();
+ }
+ else if (std::next(EI)->isClobber())
+ EndLabel = getLabelAfterInsn(std::next(EI)->getInstr());
else
- EndLabel = getLabelBeforeInsn(std::next(I)->first);
+ EndLabel = getLabelBeforeInsn(std::next(EI)->getInstr());
assert(EndLabel && "Forgot label after instruction ending a range!");
- LLVM_DEBUG(dbgs() << "DotDebugLoc: " << *Begin << "\n");
+ if (EI->isDbgValue())
+ LLVM_DEBUG(dbgs() << "DotDebugLoc: " << *Instr << "\n");
+
+ // If this history map entry has a debug value, add that to the list of
+ // open ranges and check if its location is valid for a single value
+ // location.
+ if (EI->isDbgValue()) {
+ // Do not add undef debug values, as they are redundant information in
+ // the location list entries. An undef debug results in an empty location
+ // description. If there are any non-undef fragments then padding pieces
+ // with empty location descriptions will automatically be inserted, and if
+ // all fragments are undef then the whole location list entry is
+ // redundant.
+ if (!Instr->isUndefDebugValue()) {
+ auto Value = getDebugLocValue(Instr);
+ OpenRanges.emplace_back(EI->getEndIndex(), Value);
+
+ // TODO: Add support for single value fragment locations.
+ if (Instr->getDebugExpression()->isFragment())
+ isSafeForSingleLocation = false;
+
+ if (!StartDebugMI)
+ StartDebugMI = Instr;
+ } else {
+ isSafeForSingleLocation = false;
+ }
+ }
- auto Value = getDebugLocValue(Begin);
+ // Location list entries with empty location descriptions are redundant
+ // information in DWARF, so do not emit those.
+ if (OpenRanges.empty())
+ continue;
// Omit entries with empty ranges as they do not have any effect in DWARF.
if (StartLabel == EndLabel) {
- // If this is a fragment, we must still add the value to the list of
- // open ranges, since it may describe non-overlapping parts of the
- // variable.
- if (DIExpr->isFragment())
- OpenRanges.push_back(Value);
LLVM_DEBUG(dbgs() << "Omitting location list entry with empty range.\n");
continue;
}
- DebugLocEntry Loc(StartLabel, EndLabel, Value);
- bool couldMerge = false;
-
- // If this is a fragment, it may belong to the current DebugLocEntry.
- if (DIExpr->isFragment()) {
- // Add this value to the list of open ranges.
- OpenRanges.push_back(Value);
-
- // Attempt to add the fragment to the last entry.
- if (!DebugLoc.empty())
- if (DebugLoc.back().MergeValues(Loc))
- couldMerge = true;
- }
-
- if (!couldMerge) {
- // Need to add a new DebugLocEntry. Add all values from still
- // valid non-overlapping fragments.
- if (OpenRanges.size())
- Loc.addValues(OpenRanges);
-
- DebugLoc.push_back(std::move(Loc));
- }
+ SmallVector<DbgValueLoc, 4> Values;
+ for (auto &R : OpenRanges)
+ Values.push_back(R.second);
+ DebugLoc.emplace_back(StartLabel, EndLabel, Values);
// Attempt to coalesce the ranges of two otherwise identical
// DebugLocEntries.
@@ -1229,6 +1303,9 @@ DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
if (PrevEntry != DebugLoc.rend() && PrevEntry->MergeRanges(*CurEntry))
DebugLoc.pop_back();
}
+
+ return DebugLoc.size() == 1 && isSafeForSingleLocation &&
+ validThroughout(LScopes, StartDebugMI, EndMI);
}
DbgEntity *DwarfDebug::createConcreteEntity(DwarfCompileUnit &TheCU,
@@ -1253,64 +1330,6 @@ DbgEntity *DwarfDebug::createConcreteEntity(DwarfCompileUnit &TheCU,
return ConcreteEntities.back().get();
}
-/// Determine whether a *singular* DBG_VALUE is valid for the entirety of its
-/// enclosing lexical scope. The check ensures there are no other instructions
-/// in the same lexical scope preceding the DBG_VALUE and that its range is
-/// either open or otherwise rolls off the end of the scope.
-static bool validThroughout(LexicalScopes &LScopes,
- const MachineInstr *DbgValue,
- const MachineInstr *RangeEnd) {
- assert(DbgValue->getDebugLoc() && "DBG_VALUE without a debug location");
- auto MBB = DbgValue->getParent();
- auto DL = DbgValue->getDebugLoc();
- auto *LScope = LScopes.findLexicalScope(DL);
- // Scope doesn't exist; this is a dead DBG_VALUE.
- if (!LScope)
- return false;
- auto &LSRange = LScope->getRanges();
- if (LSRange.size() == 0)
- return false;
-
- // Determine if the DBG_VALUE is valid at the beginning of its lexical block.
- const MachineInstr *LScopeBegin = LSRange.front().first;
- // Early exit if the lexical scope begins outside of the current block.
- if (LScopeBegin->getParent() != MBB)
- return false;
- MachineBasicBlock::const_reverse_iterator Pred(DbgValue);
- for (++Pred; Pred != MBB->rend(); ++Pred) {
- if (Pred->getFlag(MachineInstr::FrameSetup))
- break;
- auto PredDL = Pred->getDebugLoc();
- if (!PredDL || Pred->isMetaInstruction())
- continue;
- // Check whether the instruction preceding the DBG_VALUE is in the same
- // (sub)scope as the DBG_VALUE.
- if (DL->getScope() == PredDL->getScope())
- return false;
- auto *PredScope = LScopes.findLexicalScope(PredDL);
- if (!PredScope || LScope->dominates(PredScope))
- return false;
- }
-
- // If the range of the DBG_VALUE is open-ended, report success.
- if (!RangeEnd)
- return true;
-
- // Fail if there are instructions belonging to our scope in another block.
- const MachineInstr *LScopeEnd = LSRange.back().second;
- if (LScopeEnd->getParent() != MBB)
- return false;
-
- // Single, constant DBG_VALUEs in the prologue are promoted to be live
- // throughout the function. This is a hack, presumably for DWARF v2 and not
- // necessarily correct. It would be much better to use a dbg.declare instead
- // if we know the constant is live throughout the scope.
- if (DbgValue->getOperand(0).isImm() && MBB->pred_empty())
- return true;
-
- return false;
-}
-
// Find variables for each lexical scope.
void DwarfDebug::collectEntityInfo(DwarfCompileUnit &TheCU,
const DISubprogram *SP,
@@ -1324,8 +1343,8 @@ void DwarfDebug::collectEntityInfo(DwarfCompileUnit &TheCU,
continue;
// Instruction ranges, specifying where IV is accessible.
- const auto &Ranges = I.second;
- if (Ranges.empty())
+ const auto &HistoryMapEntries = I.second;
+ if (HistoryMapEntries.empty())
continue;
LexicalScope *Scope = nullptr;
@@ -1342,15 +1361,24 @@ void DwarfDebug::collectEntityInfo(DwarfCompileUnit &TheCU,
DbgVariable *RegVar = cast<DbgVariable>(createConcreteEntity(TheCU,
*Scope, LocalVar, IV.second));
- const MachineInstr *MInsn = Ranges.front().first;
+ const MachineInstr *MInsn = HistoryMapEntries.front().getInstr();
assert(MInsn->isDebugValue() && "History must begin with debug value");
// Check if there is a single DBG_VALUE, valid throughout the var's scope.
- if (Ranges.size() == 1 &&
- validThroughout(LScopes, MInsn, Ranges.front().second)) {
- RegVar->initializeDbgValue(MInsn);
- continue;
+ // If the history map contains a single debug value, there may be an
+ // additional entry which clobbers the debug value.
+ size_t HistSize = HistoryMapEntries.size();
+ bool SingleValueWithClobber =
+ HistSize == 2 && HistoryMapEntries[1].isClobber();
+ if (HistSize == 1 || SingleValueWithClobber) {
+ const auto *End =
+ SingleValueWithClobber ? HistoryMapEntries[1].getInstr() : nullptr;
+ if (validThroughout(LScopes, MInsn, End)) {
+ RegVar->initializeDbgValue(MInsn);
+ continue;
+ }
}
+
// Do not emit location lists if .debug_loc secton is disabled.
if (!useLocSection())
continue;
@@ -1360,7 +1388,15 @@ void DwarfDebug::collectEntityInfo(DwarfCompileUnit &TheCU,
// Build the location list for this variable.
SmallVector<DebugLocEntry, 8> Entries;
- buildLocationList(Entries, Ranges);
+ bool isValidSingleLocation = buildLocationList(Entries, HistoryMapEntries);
+
+ // Check whether buildLocationList managed to merge all locations to one
+ // that is valid throughout the variable's scope. If so, produce single
+ // value location.
+ if (isValidSingleLocation) {
+ RegVar->initializeDbgValue(Entries[0].getValues()[0]);
+ continue;
+ }
// If the variable has a DIBasicType, extract it. Basic types cannot have
// unique identifiers, so don't bother resolving the type with the
@@ -1370,7 +1406,7 @@ void DwarfDebug::collectEntityInfo(DwarfCompileUnit &TheCU,
// Finalize the entry by lowering it into a DWARF bytestream.
for (auto &Entry : Entries)
- Entry.finalize(*Asm, List, BT);
+ Entry.finalize(*Asm, List, BT, TheCU);
}
// For each InlinedEntity collected from DBG_LABEL instructions, convert to
@@ -1489,7 +1525,7 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
// We have an explicit location, different from the previous location.
// Don't repeat a line-0 record, but otherwise emit the new location.
// (The new location might be an explicit line 0, which we do emit.)
- if (PrevInstLoc && DL.getLine() == 0 && LastAsmLine == 0)
+ if (DL.getLine() == 0 && LastAsmLine == 0)
return;
unsigned Flags = 0;
if (DL == PrologEndLoc) {
@@ -1521,6 +1557,46 @@ static DebugLoc findPrologueEndLoc(const MachineFunction *MF) {
return DebugLoc();
}
+/// Register a source line with debug info. Returns the unique label that was
+/// emitted and which provides correspondence to the source line list.
+static void recordSourceLine(AsmPrinter &Asm, unsigned Line, unsigned Col,
+ const MDNode *S, unsigned Flags, unsigned CUID,
+ uint16_t DwarfVersion,
+ ArrayRef<std::unique_ptr<DwarfCompileUnit>> DCUs) {
+ StringRef Fn;
+ unsigned FileNo = 1;
+ unsigned Discriminator = 0;
+ if (auto *Scope = cast_or_null<DIScope>(S)) {
+ Fn = Scope->getFilename();
+ if (Line != 0 && DwarfVersion >= 4)
+ if (auto *LBF = dyn_cast<DILexicalBlockFile>(Scope))
+ Discriminator = LBF->getDiscriminator();
+
+ FileNo = static_cast<DwarfCompileUnit &>(*DCUs[CUID])
+ .getOrCreateSourceID(Scope->getFile());
+ }
+ Asm.OutStreamer->EmitDwarfLocDirective(FileNo, Line, Col, Flags, 0,
+ Discriminator, Fn);
+}
+
+DebugLoc DwarfDebug::emitInitialLocDirective(const MachineFunction &MF,
+ unsigned CUID) {
+ // Get beginning of function.
+ if (DebugLoc PrologEndLoc = findPrologueEndLoc(&MF)) {
+ // Ensure the compile unit is created if the function is called before
+ // beginFunction().
+ (void)getOrCreateDwarfCompileUnit(
+ MF.getFunction().getSubprogram()->getUnit());
+ // We'd like to list the prologue as "not statements" but GDB behaves
+ // poorly if we do that. Revisit this with caution/GDB (7.5+) testing.
+ const DISubprogram *SP = PrologEndLoc->getInlinedAtScope()->getSubprogram();
+ ::recordSourceLine(*Asm, SP->getScopeLine(), 0, SP, DWARF2_FLAG_IS_STMT,
+ CUID, getDwarfVersion(), getUnits());
+ return PrologEndLoc;
+ }
+ return DebugLoc();
+}
+
// Gather pre-function debug information. Assumes being called immediately
// after the function entry point has been emitted.
void DwarfDebug::beginFunctionImpl(const MachineFunction *MF) {
@@ -1543,13 +1619,8 @@ void DwarfDebug::beginFunctionImpl(const MachineFunction *MF) {
Asm->OutStreamer->getContext().setDwarfCompileUnitID(CU.getUniqueID());
// Record beginning of function.
- PrologEndLoc = findPrologueEndLoc(MF);
- if (PrologEndLoc) {
- // We'd like to list the prologue as "not statements" but GDB behaves
- // poorly if we do that. Revisit this with caution/GDB (7.5+) testing.
- auto *SP = PrologEndLoc->getInlinedAtScope()->getSubprogram();
- recordSourceLine(SP->getScopeLine(), 0, SP, DWARF2_FLAG_IS_STMT);
- }
+ PrologEndLoc = emitInitialLocDirective(
+ *MF, Asm->OutStreamer->getContext().getDwarfCompileUnitID());
}
void DwarfDebug::skippedNonDebugFunction() {
@@ -1647,21 +1718,9 @@ void DwarfDebug::endFunctionImpl(const MachineFunction *MF) {
// emitted and which provides correspondence to the source line list.
void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S,
unsigned Flags) {
- StringRef Fn;
- unsigned FileNo = 1;
- unsigned Discriminator = 0;
- if (auto *Scope = cast_or_null<DIScope>(S)) {
- Fn = Scope->getFilename();
- if (Line != 0 && getDwarfVersion() >= 4)
- if (auto *LBF = dyn_cast<DILexicalBlockFile>(Scope))
- Discriminator = LBF->getDiscriminator();
-
- unsigned CUID = Asm->OutStreamer->getContext().getDwarfCompileUnitID();
- FileNo = static_cast<DwarfCompileUnit &>(*InfoHolder.getUnits()[CUID])
- .getOrCreateSourceID(Scope->getFile());
- }
- Asm->OutStreamer->EmitDwarfLocDirective(FileNo, Line, Col, Flags, 0,
- Discriminator, Fn);
+ ::recordSourceLine(*Asm, Line, Col, S, Flags,
+ Asm->OutStreamer->getContext().getDwarfCompileUnitID(),
+ getDwarfVersion(), getUnits());
}
//===----------------------------------------------------------------------===//
@@ -1890,17 +1949,59 @@ void DwarfDebug::emitDebugStr() {
}
void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer,
- const DebugLocStream::Entry &Entry) {
+ const DebugLocStream::Entry &Entry,
+ const DwarfCompileUnit *CU) {
auto &&Comments = DebugLocs.getComments(Entry);
auto Comment = Comments.begin();
auto End = Comments.end();
- for (uint8_t Byte : DebugLocs.getBytes(Entry))
- Streamer.EmitInt8(Byte, Comment != End ? *(Comment++) : "");
+
+ // The expressions are inserted into a byte stream rather early (see
+ // DwarfExpression::addExpression) so for those ops (e.g. DW_OP_convert) that
+ // need to reference a base_type DIE the offset of that DIE is not yet known.
+ // To deal with this we instead insert a placeholder early and then extract
+ // it here and replace it with the real reference.
+ unsigned PtrSize = Asm->MAI->getCodePointerSize();
+ DWARFDataExtractor Data(StringRef(DebugLocs.getBytes(Entry).data(),
+ DebugLocs.getBytes(Entry).size()),
+ Asm->getDataLayout().isLittleEndian(), PtrSize);
+ DWARFExpression Expr(Data, getDwarfVersion(), PtrSize);
+
+ using Encoding = DWARFExpression::Operation::Encoding;
+ uint32_t Offset = 0;
+ for (auto &Op : Expr) {
+ assert(Op.getCode() != dwarf::DW_OP_const_type &&
+ "3 operand ops not yet supported");
+ Streamer.EmitInt8(Op.getCode(), Comment != End ? *(Comment++) : "");
+ Offset++;
+ for (unsigned I = 0; I < 2; ++I) {
+ if (Op.getDescription().Op[I] == Encoding::SizeNA)
+ continue;
+ if (Op.getDescription().Op[I] == Encoding::BaseTypeRef) {
+ if (CU) {
+ uint64_t Offset = CU->ExprRefedBaseTypes[Op.getRawOperand(I)].Die->getOffset();
+ assert(Offset < (1ULL << (ULEB128PadSize * 7)) && "Offset wont fit");
+ Asm->EmitULEB128(Offset, nullptr, ULEB128PadSize);
+ } else {
+ // Emit a reference to the 'generic type'.
+ Asm->EmitULEB128(0, nullptr, ULEB128PadSize);
+ }
+ // Make sure comments stay aligned.
+ for (unsigned J = 0; J < ULEB128PadSize; ++J)
+ if (Comment != End)
+ Comment++;
+ } else {
+ for (uint32_t J = Offset; J < Op.getOperandEndOffset(I); ++J)
+ Streamer.EmitInt8(Data.getData()[J], Comment != End ? *(Comment++) : "");
+ }
+ Offset = Op.getOperandEndOffset(I);
+ }
+ assert(Offset == Op.getEndOffset());
+ }
}
-static void emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT,
- const DebugLocEntry::Value &Value,
- DwarfExpression &DwarfExpr) {
+void DwarfDebug::emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT,
+ const DbgValueLoc &Value,
+ DwarfExpression &DwarfExpr) {
auto *DIExpr = Value.getExpression();
DIExpressionCursor ExprCursor(DIExpr);
DwarfExpr.addFragmentOffset(DIExpr);
@@ -1916,6 +2017,12 @@ static void emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT,
if (Location.isIndirect())
DwarfExpr.setMemoryLocationKind();
DIExpressionCursor Cursor(DIExpr);
+
+ if (DIExpr->isEntryValue()) {
+ DwarfExpr.setEntryValueFlag();
+ DwarfExpr.addEntryValueExpression(Cursor);
+ }
+
const TargetRegisterInfo &TRI = *AP.MF->getSubtarget().getRegisterInfo();
if (!DwarfExpr.addMachineRegExpression(TRI, Cursor, Location.getReg()))
return;
@@ -1929,38 +2036,50 @@ static void emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT,
void DebugLocEntry::finalize(const AsmPrinter &AP,
DebugLocStream::ListBuilder &List,
- const DIBasicType *BT) {
+ const DIBasicType *BT,
+ DwarfCompileUnit &TheCU) {
+ assert(!Values.empty() &&
+ "location list entries without values are redundant");
assert(Begin != End && "unexpected location list entry with empty range");
DebugLocStream::EntryBuilder Entry(List, Begin, End);
BufferByteStreamer Streamer = Entry.getStreamer();
- DebugLocDwarfExpression DwarfExpr(AP.getDwarfVersion(), Streamer);
- const DebugLocEntry::Value &Value = Values[0];
+ DebugLocDwarfExpression DwarfExpr(AP.getDwarfVersion(), Streamer, TheCU);
+ const DbgValueLoc &Value = Values[0];
if (Value.isFragment()) {
// Emit all fragments that belong to the same variable and range.
- assert(llvm::all_of(Values, [](DebugLocEntry::Value P) {
+ assert(llvm::all_of(Values, [](DbgValueLoc P) {
return P.isFragment();
}) && "all values are expected to be fragments");
assert(std::is_sorted(Values.begin(), Values.end()) &&
"fragments are expected to be sorted");
for (auto Fragment : Values)
- emitDebugLocValue(AP, BT, Fragment, DwarfExpr);
+ DwarfDebug::emitDebugLocValue(AP, BT, Fragment, DwarfExpr);
} else {
assert(Values.size() == 1 && "only fragments may have >1 value");
- emitDebugLocValue(AP, BT, Value, DwarfExpr);
+ DwarfDebug::emitDebugLocValue(AP, BT, Value, DwarfExpr);
}
DwarfExpr.finalize();
}
-void DwarfDebug::emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry) {
+void DwarfDebug::emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry,
+ const DwarfCompileUnit *CU) {
// Emit the size.
Asm->OutStreamer->AddComment("Loc expr size");
- Asm->emitInt16(DebugLocs.getBytes(Entry).size());
-
+ if (getDwarfVersion() >= 5)
+ Asm->EmitULEB128(DebugLocs.getBytes(Entry).size());
+ else if (DebugLocs.getBytes(Entry).size() <= std::numeric_limits<uint16_t>::max())
+ Asm->emitInt16(DebugLocs.getBytes(Entry).size());
+ else {
+ // The entry is too big to fit into 16 bit, drop it as there is nothing we
+ // can do.
+ Asm->emitInt16(0);
+ return;
+ }
// Emit the entry.
APByteStreamer Streamer(*Asm);
- emitDebugLocEntry(Streamer, Entry);
+ emitDebugLocEntry(Streamer, Entry, CU);
}
// Emit the common part of the DWARF 5 range/locations list tables header.
@@ -2060,7 +2179,7 @@ void DwarfDebug::emitDebugLoc() {
Asm->EmitLabelDifference(Entry.EndSym, Base, Size);
}
- emitDebugLocEntryLocation(Entry);
+ emitDebugLocEntryLocation(Entry, CU);
continue;
}
@@ -2081,7 +2200,7 @@ void DwarfDebug::emitDebugLoc() {
Asm->OutStreamer->EmitSymbolValue(Entry.EndSym, Size);
}
- emitDebugLocEntryLocation(Entry);
+ emitDebugLocEntryLocation(Entry, CU);
}
if (IsLocLists) {
@@ -2100,9 +2219,9 @@ void DwarfDebug::emitDebugLoc() {
}
void DwarfDebug::emitDebugLocDWO() {
- Asm->OutStreamer->SwitchSection(
- Asm->getObjFileLowering().getDwarfLocDWOSection());
for (const auto &List : DebugLocs.getLists()) {
+ Asm->OutStreamer->SwitchSection(
+ Asm->getObjFileLowering().getDwarfLocDWOSection());
Asm->OutStreamer->EmitLabel(List.Label);
for (const auto &Entry : DebugLocs.getEntries(List)) {
// GDB only supports startx_length in pre-standard split-DWARF.
@@ -2117,7 +2236,7 @@ void DwarfDebug::emitDebugLocDWO() {
Asm->EmitULEB128(idx);
Asm->EmitLabelDifference(Entry.EndSym, Entry.BeginSym, 4);
- emitDebugLocEntryLocation(Entry);
+ emitDebugLocEntryLocation(Entry, List.CU);
}
Asm->emitInt8(dwarf::DW_LLE_end_of_list);
}
@@ -2170,19 +2289,18 @@ void DwarfDebug::emitDebugARanges() {
}
// Sort the symbols by offset within the section.
- std::stable_sort(
- List.begin(), List.end(), [&](const SymbolCU &A, const SymbolCU &B) {
- unsigned IA = A.Sym ? Asm->OutStreamer->GetSymbolOrder(A.Sym) : 0;
- unsigned IB = B.Sym ? Asm->OutStreamer->GetSymbolOrder(B.Sym) : 0;
-
- // Symbols with no order assigned should be placed at the end.
- // (e.g. section end labels)
- if (IA == 0)
- return false;
- if (IB == 0)
- return true;
- return IA < IB;
- });
+ llvm::stable_sort(List, [&](const SymbolCU &A, const SymbolCU &B) {
+ unsigned IA = A.Sym ? Asm->OutStreamer->GetSymbolOrder(A.Sym) : 0;
+ unsigned IB = B.Sym ? Asm->OutStreamer->GetSymbolOrder(B.Sym) : 0;
+
+ // Symbols with no order assigned should be placed at the end.
+ // (e.g. section end labels)
+ if (IA == 0)
+ return false;
+ if (IB == 0)
+ return true;
+ return IA < IB;
+ });
// Insert a final terminator.
List.push_back(SymbolCU(nullptr, Asm->OutStreamer->endSection(Section)));
@@ -2687,6 +2805,22 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
CU.addDIETypeSignature(RefDie, Signature);
}
+DwarfDebug::NonTypeUnitContext::NonTypeUnitContext(DwarfDebug *DD)
+ : DD(DD),
+ TypeUnitsUnderConstruction(std::move(DD->TypeUnitsUnderConstruction)) {
+ DD->TypeUnitsUnderConstruction.clear();
+ assert(TypeUnitsUnderConstruction.empty() || !DD->AddrPool.hasBeenUsed());
+}
+
+DwarfDebug::NonTypeUnitContext::~NonTypeUnitContext() {
+ DD->TypeUnitsUnderConstruction = std::move(TypeUnitsUnderConstruction);
+ DD->AddrPool.resetUsedFlag();
+}
+
+DwarfDebug::NonTypeUnitContext DwarfDebug::enterNonTypeUnitContext() {
+ return NonTypeUnitContext(this);
+}
+
// Add the Name along with its companion DIE to the appropriate accelerator
// table (for AccelTableKind::Dwarf it's always AccelDebugNames, for
// AccelTableKind::Apple, we use the table we got as an argument). If
@@ -2699,7 +2833,7 @@ void DwarfDebug::addAccelNameImpl(const DICompileUnit &CU,
return;
if (getAccelTableKind() != AccelTableKind::Apple &&
- CU.getNameTableKind() == DICompileUnit::DebugNameTableKind::None)
+ CU.getNameTableKind() != DICompileUnit::DebugNameTableKind::Default)
return;
DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 8a31e989b289..3ac474e2bdda 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/DwarfDebug.h - Dwarf Debug Framework --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -16,6 +15,7 @@
#include "AddressPool.h"
#include "DebugLocStream.h"
+#include "DebugLocEntry.h"
#include "DwarfFile.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
@@ -52,6 +52,7 @@ class ByteStreamer;
class DebugLocEntry;
class DIE;
class DwarfCompileUnit;
+class DwarfExpression;
class DwarfTypeUnit;
class DwarfUnit;
class LexicalScope;
@@ -111,12 +112,14 @@ public:
///
/// Variables can be created from \c DBG_VALUE instructions. Those whose
/// location changes over time use \a DebugLocListIndex, while those with a
-/// single instruction use \a MInsn and (optionally) a single entry of \a Expr.
+/// single location use \a ValueLoc and (optionally) a single entry of \a Expr.
///
/// Variables that have been optimized out use none of these fields.
class DbgVariable : public DbgEntity {
- unsigned DebugLocListIndex = ~0u; /// Offset in DebugLocs.
- const MachineInstr *MInsn = nullptr; /// DBG_VALUE instruction.
+ /// Offset in DebugLocs.
+ unsigned DebugLocListIndex = ~0u;
+ /// Single value location description.
+ std::unique_ptr<DbgValueLoc> ValueLoc = nullptr;
struct FrameIndexExpr {
int FI;
@@ -136,7 +139,7 @@ public:
/// Initialize from the MMI table.
void initializeMMI(const DIExpression *E, int FI) {
assert(FrameIndexExprs.empty() && "Already initialized?");
- assert(!MInsn && "Already initialized?");
+ assert(!ValueLoc.get() && "Already initialized?");
assert((!E || E->isValid()) && "Expected valid expression");
assert(FI != std::numeric_limits<int>::max() && "Expected valid index");
@@ -144,35 +147,35 @@ public:
FrameIndexExprs.push_back({FI, E});
}
- /// Initialize from a DBG_VALUE instruction.
- void initializeDbgValue(const MachineInstr *DbgValue) {
+ // Initialize variable's location.
+ void initializeDbgValue(DbgValueLoc Value) {
assert(FrameIndexExprs.empty() && "Already initialized?");
- assert(!MInsn && "Already initialized?");
+ assert(!ValueLoc && "Already initialized?");
+ assert(!Value.getExpression()->isFragment() && "Fragments not supported.");
- assert(getVariable() == DbgValue->getDebugVariable() && "Wrong variable");
- assert(getInlinedAt() == DbgValue->getDebugLoc()->getInlinedAt() &&
- "Wrong inlined-at");
-
- MInsn = DbgValue;
- if (auto *E = DbgValue->getDebugExpression())
+ ValueLoc = llvm::make_unique<DbgValueLoc>(Value);
+ if (auto *E = ValueLoc->getExpression())
if (E->getNumElements())
FrameIndexExprs.push_back({0, E});
}
+ /// Initialize from a DBG_VALUE instruction.
+ void initializeDbgValue(const MachineInstr *DbgValue);
+
// Accessors.
const DILocalVariable *getVariable() const {
return cast<DILocalVariable>(getEntity());
}
const DIExpression *getSingleExpression() const {
- assert(MInsn && FrameIndexExprs.size() <= 1);
+ assert(ValueLoc.get() && FrameIndexExprs.size() <= 1);
return FrameIndexExprs.size() ? FrameIndexExprs[0].Expr : nullptr;
}
void setDebugLocListIndex(unsigned O) { DebugLocListIndex = O; }
unsigned getDebugLocListIndex() const { return DebugLocListIndex; }
StringRef getName() const { return getVariable()->getName(); }
- const MachineInstr *getMInsn() const { return MInsn; }
+ const DbgValueLoc *getValueLoc() const { return ValueLoc.get(); }
/// Get the FI entries, sorted by fragment offset.
ArrayRef<FrameIndexExpr> getFrameIndexExprs() const;
bool hasFrameIndexExprs() const { return !FrameIndexExprs.empty(); }
@@ -205,7 +208,7 @@ public:
}
bool hasComplexAddress() const {
- assert(MInsn && "Expected DBG_VALUE, not MMI variable");
+ assert(ValueLoc.get() && "Expected DBG_VALUE, not MMI variable");
assert((FrameIndexExprs.empty() ||
(FrameIndexExprs.size() == 1 &&
FrameIndexExprs[0].Expr->getNumElements())) &&
@@ -219,11 +222,6 @@ public:
static bool classof(const DbgEntity *N) {
return N->getDbgEntityID() == DbgVariableKind;
}
-
-private:
- template <typename T> T *resolve(TypedDINodeRef<T> Ref) const {
- return Ref.resolve();
- }
};
//===----------------------------------------------------------------------===//
@@ -254,11 +252,6 @@ public:
static bool classof(const DbgEntity *N) {
return N->getDbgEntityID() == DbgLabelKind;
}
-
-private:
- template <typename T> T *resolve(TypedDINodeRef<T> Ref) const {
- return Ref.resolve();
- }
};
/// Helper used to pair up a symbol and its DWARF compile unit.
@@ -558,9 +551,11 @@ class DwarfDebug : public DebugHandlerBase {
DenseSet<InlinedEntity> &ProcessedVars);
/// Build the location list for all DBG_VALUEs in the
- /// function that describe the same variable.
- void buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
- const DbgValueHistoryMap::InstrRanges &Ranges);
+ /// function that describe the same variable. If the resulting
+ /// list has only one entry that is valid for entire variable's
+ /// scope return true.
+ bool buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
+ const DbgValueHistoryMap::Entries &Entries);
/// Collect variable information from the side table maintained by MF.
void collectVariableInfoFromMFTable(DwarfCompileUnit &TheCU,
@@ -593,6 +588,9 @@ public:
/// Emit all Dwarf sections that should come after the content.
void endModule() override;
+ /// Emits inital debug location directive.
+ DebugLoc emitInitialLocDirective(const MachineFunction &MF, unsigned CUID);
+
/// Process beginning of an instruction.
void beginInstruction(const MachineInstr *MI) override;
@@ -604,6 +602,19 @@ public:
void addDwarfTypeUnitType(DwarfCompileUnit &CU, StringRef Identifier,
DIE &Die, const DICompositeType *CTy);
+ friend class NonTypeUnitContext;
+ class NonTypeUnitContext {
+ DwarfDebug *DD;
+ decltype(DwarfDebug::TypeUnitsUnderConstruction) TypeUnitsUnderConstruction;
+ friend class DwarfDebug;
+ NonTypeUnitContext(DwarfDebug *DD);
+ public:
+ NonTypeUnitContext(NonTypeUnitContext&&) = default;
+ ~NonTypeUnitContext();
+ };
+
+ NonTypeUnitContext enterNonTypeUnitContext();
+
/// Add a label so that arange data can be generated for it.
void addArangeLabel(SymbolCU SCU) { ArangeLabels.push_back(SCU); }
@@ -680,15 +691,12 @@ public:
/// Emit an entry for the debug loc section. This can be used to
/// handle an entry that's going to be emitted into the debug loc section.
void emitDebugLocEntry(ByteStreamer &Streamer,
- const DebugLocStream::Entry &Entry);
+ const DebugLocStream::Entry &Entry,
+ const DwarfCompileUnit *CU);
/// Emit the location for a debug loc entry, including the size header.
- void emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry);
-
- /// Find the MDNode for the given reference.
- template <typename T> T *resolve(TypedDINodeRef<T> Ref) const {
- return Ref.resolve();
- }
+ void emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry,
+ const DwarfCompileUnit *CU);
void addSubprogramNames(const DICompileUnit &CU, const DISubprogram *SP,
DIE &Die);
@@ -728,6 +736,10 @@ public:
void addSectionLabel(const MCSymbol *Sym);
const MCSymbol *getSectionLabel(const MCSection *S);
+
+ static void emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT,
+ const DbgValueLoc &Value,
+ DwarfExpression &DwarfExpr);
};
} // end namespace llvm
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h
index b57ea8fc6322..24bbf58b91ec 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.h
+++ b/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -1,9 +1,8 @@
//===-- DwarfException.h - Dwarf Exception Framework -----------*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
index 19c350afbf17..2858afaa1cf1 100644
--- a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/DwarfExpression.cpp - Dwarf Debug Framework -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -12,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "DwarfExpression.h"
+#include "DwarfCompileUnit.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/BinaryFormat/Dwarf.h"
@@ -40,7 +40,7 @@ void DwarfExpression::emitConstu(uint64_t Value) {
void DwarfExpression::addReg(int DwarfReg, const char *Comment) {
assert(DwarfReg >= 0 && "invalid negative dwarf register number");
- assert((LocationKind == Unknown || LocationKind == Register) &&
+ assert((isUnknownLocation() || isRegisterLocation()) &&
"location description already locked down");
LocationKind = Register;
if (DwarfReg < 32) {
@@ -53,7 +53,7 @@ void DwarfExpression::addReg(int DwarfReg, const char *Comment) {
void DwarfExpression::addBReg(int DwarfReg, int Offset) {
assert(DwarfReg >= 0 && "invalid negative dwarf register number");
- assert(LocationKind != Register && "location description already locked down");
+ assert(!isRegisterLocation() && "location description already locked down");
if (DwarfReg < 32) {
emitOp(dwarf::DW_OP_breg0 + DwarfReg);
} else {
@@ -184,20 +184,20 @@ void DwarfExpression::addStackValue() {
}
void DwarfExpression::addSignedConstant(int64_t Value) {
- assert(LocationKind == Implicit || LocationKind == Unknown);
+ assert(isImplicitLocation() || isUnknownLocation());
LocationKind = Implicit;
emitOp(dwarf::DW_OP_consts);
emitSigned(Value);
}
void DwarfExpression::addUnsignedConstant(uint64_t Value) {
- assert(LocationKind == Implicit || LocationKind == Unknown);
+ assert(isImplicitLocation() || isUnknownLocation());
LocationKind = Implicit;
emitConstu(Value);
}
void DwarfExpression::addUnsignedConstant(const APInt &Value) {
- assert(LocationKind == Implicit || LocationKind == Unknown);
+ assert(isImplicitLocation() || isUnknownLocation());
LocationKind = Implicit;
unsigned Size = Value.getBitWidth();
@@ -242,12 +242,16 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI,
}
// Handle simple register locations.
- if (LocationKind != Memory && !HasComplexExpression) {
+ if (!isMemoryLocation() && !HasComplexExpression) {
for (auto &Reg : DwarfRegs) {
if (Reg.DwarfRegNo >= 0)
addReg(Reg.DwarfRegNo, Reg.Comment);
addOpPiece(Reg.Size);
}
+
+ if (isEntryValue() && DwarfVersion >= 4)
+ emitOp(dwarf::DW_OP_stack_value);
+
DwarfRegs.clear();
return true;
}
@@ -296,6 +300,19 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI,
return true;
}
+void DwarfExpression::addEntryValueExpression(DIExpressionCursor &ExprCursor) {
+ auto Op = ExprCursor.take();
+ assert(Op && Op->getOp() == dwarf::DW_OP_entry_value);
+ assert(!isMemoryLocation() &&
+ "We don't support entry values of memory locations yet");
+
+ if (DwarfVersion >= 5)
+ emitOp(dwarf::DW_OP_entry_value);
+ else
+ emitOp(dwarf::DW_OP_GNU_entry_value);
+ emitUnsigned(Op->getArg(0));
+}
+
/// Assuming a well-formed expression, match "DW_OP_deref* DW_OP_LLVM_fragment?".
static bool isMemoryLocation(DIExpressionCursor ExprCursor) {
while (ExprCursor) {
@@ -319,6 +336,8 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor,
if (SubRegisterSizeInBits && N && (N->getOp() != dwarf::DW_OP_LLVM_fragment))
maskSubRegister();
+ Optional<DIExpression::ExprOperand> PrevConvertOp = None;
+
while (ExprCursor) {
auto Op = ExprCursor.take();
switch (Op->getOp()) {
@@ -341,7 +360,7 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor,
SizeInBits = std::min<unsigned>(SizeInBits, SubRegisterSizeInBits);
// Emit a DW_OP_stack_value for implicit location descriptions.
- if (LocationKind == Implicit)
+ if (isImplicitLocation())
addStackValue();
// Emit the DW_OP_piece.
@@ -352,7 +371,7 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor,
return;
}
case dwarf::DW_OP_plus_uconst:
- assert(LocationKind != Register);
+ assert(!isRegisterLocation());
emitOp(dwarf::DW_OP_plus_uconst);
emitUnsigned(Op->getArg(0));
break;
@@ -373,8 +392,8 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor,
emitOp(Op->getOp());
break;
case dwarf::DW_OP_deref:
- assert(LocationKind != Register);
- if (LocationKind != Memory && ::isMemoryLocation(ExprCursor))
+ assert(!isRegisterLocation());
+ if (!isMemoryLocation() && ::isMemoryLocation(ExprCursor))
// Turning this into a memory location description makes the deref
// implicit.
LocationKind = Memory;
@@ -382,26 +401,69 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor,
emitOp(dwarf::DW_OP_deref);
break;
case dwarf::DW_OP_constu:
- assert(LocationKind != Register);
+ assert(!isRegisterLocation());
emitConstu(Op->getArg(0));
break;
+ case dwarf::DW_OP_LLVM_convert: {
+ unsigned BitSize = Op->getArg(0);
+ dwarf::TypeKind Encoding = static_cast<dwarf::TypeKind>(Op->getArg(1));
+ if (DwarfVersion >= 5) {
+ emitOp(dwarf::DW_OP_convert);
+ // Reuse the base_type if we already have one in this CU otherwise we
+ // create a new one.
+ unsigned I = 0, E = CU.ExprRefedBaseTypes.size();
+ for (; I != E; ++I)
+ if (CU.ExprRefedBaseTypes[I].BitSize == BitSize &&
+ CU.ExprRefedBaseTypes[I].Encoding == Encoding)
+ break;
+
+ if (I == E)
+ CU.ExprRefedBaseTypes.emplace_back(BitSize, Encoding);
+
+ // If targeting a location-list; simply emit the index into the raw
+ // byte stream as ULEB128, DwarfDebug::emitDebugLocEntry has been
+ // fitted with means to extract it later.
+ // If targeting a inlined DW_AT_location; insert a DIEBaseTypeRef
+ // (containing the index and a resolve mechanism during emit) into the
+ // DIE value list.
+ emitBaseTypeRef(I);
+ } else {
+ if (PrevConvertOp && PrevConvertOp->getArg(0) < BitSize) {
+ if (Encoding == dwarf::DW_ATE_signed)
+ emitLegacySExt(PrevConvertOp->getArg(0));
+ else if (Encoding == dwarf::DW_ATE_unsigned)
+ emitLegacyZExt(PrevConvertOp->getArg(0));
+ PrevConvertOp = None;
+ } else {
+ PrevConvertOp = Op;
+ }
+ }
+ break;
+ }
case dwarf::DW_OP_stack_value:
LocationKind = Implicit;
break;
case dwarf::DW_OP_swap:
- assert(LocationKind != Register);
+ assert(!isRegisterLocation());
emitOp(dwarf::DW_OP_swap);
break;
case dwarf::DW_OP_xderef:
- assert(LocationKind != Register);
+ assert(!isRegisterLocation());
emitOp(dwarf::DW_OP_xderef);
break;
+ case dwarf::DW_OP_deref_size:
+ emitOp(dwarf::DW_OP_deref_size);
+ emitData1(Op->getArg(0));
+ break;
+ case dwarf::DW_OP_LLVM_tag_offset:
+ TagOffset = Op->getArg(0);
+ break;
default:
llvm_unreachable("unhandled opcode found in expression");
}
}
- if (LocationKind == Implicit)
+ if (isImplicitLocation())
// Turn this into an implicit location description.
addStackValue();
}
@@ -437,3 +499,25 @@ void DwarfExpression::addFragmentOffset(const DIExpression *Expr) {
addOpPiece(FragmentOffset - OffsetInBits);
OffsetInBits = FragmentOffset;
}
+
+void DwarfExpression::emitLegacySExt(unsigned FromBits) {
+ // (((X >> (FromBits - 1)) * (~0)) << FromBits) | X
+ emitOp(dwarf::DW_OP_dup);
+ emitOp(dwarf::DW_OP_constu);
+ emitUnsigned(FromBits - 1);
+ emitOp(dwarf::DW_OP_shr);
+ emitOp(dwarf::DW_OP_lit0);
+ emitOp(dwarf::DW_OP_not);
+ emitOp(dwarf::DW_OP_mul);
+ emitOp(dwarf::DW_OP_constu);
+ emitUnsigned(FromBits);
+ emitOp(dwarf::DW_OP_shl);
+ emitOp(dwarf::DW_OP_or);
+}
+
+void DwarfExpression::emitLegacyZExt(unsigned FromBits) {
+ // (X & (1 << FromBits - 1))
+ emitOp(dwarf::DW_OP_constu);
+ emitUnsigned((1ULL << FromBits) - 1);
+ emitOp(dwarf::DW_OP_and);
+}
diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.h b/lib/CodeGen/AsmPrinter/DwarfExpression.h
index 91568ba6d107..ec2ef6e575f7 100644
--- a/lib/CodeGen/AsmPrinter/DwarfExpression.h
+++ b/lib/CodeGen/AsmPrinter/DwarfExpression.h
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/DwarfExpression.h - Dwarf Compile Unit ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -28,7 +27,7 @@ namespace llvm {
class AsmPrinter;
class APInt;
class ByteStreamer;
-class DwarfUnit;
+class DwarfCompileUnit;
class DIELoc;
class TargetRegisterInfo;
@@ -105,23 +104,56 @@ protected:
const char *Comment;
};
+ DwarfCompileUnit &CU;
+
/// The register location, if any.
SmallVector<Register, 2> DwarfRegs;
/// Current Fragment Offset in Bits.
uint64_t OffsetInBits = 0;
- unsigned DwarfVersion;
/// Sometimes we need to add a DW_OP_bit_piece to describe a subregister.
- unsigned SubRegisterSizeInBits = 0;
- unsigned SubRegisterOffsetInBits = 0;
+ unsigned SubRegisterSizeInBits : 16;
+ unsigned SubRegisterOffsetInBits : 16;
/// The kind of location description being produced.
- enum { Unknown = 0, Register, Memory, Implicit } LocationKind = Unknown;
+ enum { Unknown = 0, Register, Memory, Implicit };
+
+ /// The flags of location description being produced.
+ enum { EntryValue = 1 };
+
+ unsigned LocationKind : 3;
+ unsigned LocationFlags : 2;
+ unsigned DwarfVersion : 4;
+
+public:
+ bool isUnknownLocation() const {
+ return LocationKind == Unknown;
+ }
+
+ bool isMemoryLocation() const {
+ return LocationKind == Memory;
+ }
+
+ bool isRegisterLocation() const {
+ return LocationKind == Register;
+ }
+
+ bool isImplicitLocation() const {
+ return LocationKind == Implicit;
+ }
+
+ bool isEntryValue() const {
+ return LocationFlags & EntryValue;
+ }
+ Optional<uint8_t> TagOffset;
+
+protected:
/// Push a DW_OP_piece / DW_OP_bit_piece for emitting later, if one is needed
/// to represent a subregister.
void setSubRegisterPiece(unsigned SizeInBits, unsigned OffsetInBits) {
+ assert(SizeInBits < 65536 && OffsetInBits < 65536);
SubRegisterSizeInBits = SizeInBits;
SubRegisterOffsetInBits = OffsetInBits;
}
@@ -138,6 +170,10 @@ protected:
/// Emit a raw unsigned value.
virtual void emitUnsigned(uint64_t Value) = 0;
+ virtual void emitData1(uint8_t Value) = 0;
+
+ virtual void emitBaseTypeRef(uint64_t Idx) = 0;
+
/// Emit a normalized unsigned constant.
void emitConstu(uint64_t Value);
@@ -200,7 +236,10 @@ protected:
~DwarfExpression() = default;
public:
- DwarfExpression(unsigned DwarfVersion) : DwarfVersion(DwarfVersion) {}
+ DwarfExpression(unsigned DwarfVersion, DwarfCompileUnit &CU)
+ : CU(CU), SubRegisterSizeInBits(0), SubRegisterOffsetInBits(0),
+ LocationKind(Unknown), LocationFlags(Unknown),
+ DwarfVersion(DwarfVersion) {}
/// This needs to be called last to commit any pending changes.
void finalize();
@@ -214,15 +253,17 @@ public:
/// Emit an unsigned constant.
void addUnsignedConstant(const APInt &Value);
- bool isMemoryLocation() const { return LocationKind == Memory; }
- bool isUnknownLocation() const { return LocationKind == Unknown; }
-
/// Lock this down to become a memory location description.
void setMemoryLocationKind() {
- assert(LocationKind == Unknown);
+ assert(isUnknownLocation());
LocationKind = Memory;
}
+ /// Lock this down to become an entry value location.
+ void setEntryValueFlag() {
+ LocationFlags |= EntryValue;
+ }
+
/// Emit a machine register location. As an optimization this may also consume
/// the prefix of a DwarfExpression if a more efficient representation for
/// combining the register location and the first operation exists.
@@ -237,6 +278,9 @@ public:
DIExpressionCursor &Expr, unsigned MachineReg,
unsigned FragmentOffsetInBits = 0);
+ /// Emit entry value dwarf operation.
+ void addEntryValueExpression(DIExpressionCursor &ExprCursor);
+
/// Emit all remaining operations in the DIExpressionCursor.
///
/// \param FragmentOffsetInBits If this is one fragment out of multiple
@@ -248,6 +292,9 @@ public:
/// If applicable, emit an empty DW_OP_piece / DW_OP_bit_piece to advance to
/// the fragment described by \c Expr.
void addFragmentOffset(const DIExpression *Expr);
+
+ void emitLegacySExt(unsigned FromBits);
+ void emitLegacyZExt(unsigned FromBits);
};
/// DwarfExpression implementation for .debug_loc entries.
@@ -257,27 +304,30 @@ class DebugLocDwarfExpression final : public DwarfExpression {
void emitOp(uint8_t Op, const char *Comment = nullptr) override;
void emitSigned(int64_t Value) override;
void emitUnsigned(uint64_t Value) override;
+ void emitData1(uint8_t Value) override;
+ void emitBaseTypeRef(uint64_t Idx) override;
bool isFrameRegister(const TargetRegisterInfo &TRI,
unsigned MachineReg) override;
public:
- DebugLocDwarfExpression(unsigned DwarfVersion, ByteStreamer &BS)
- : DwarfExpression(DwarfVersion), BS(BS) {}
+ DebugLocDwarfExpression(unsigned DwarfVersion, ByteStreamer &BS, DwarfCompileUnit &CU)
+ : DwarfExpression(DwarfVersion, CU), BS(BS) {}
};
/// DwarfExpression implementation for singular DW_AT_location.
class DIEDwarfExpression final : public DwarfExpression {
const AsmPrinter &AP;
- DwarfUnit &DU;
DIELoc &DIE;
void emitOp(uint8_t Op, const char *Comment = nullptr) override;
void emitSigned(int64_t Value) override;
void emitUnsigned(uint64_t Value) override;
+ void emitData1(uint8_t Value) override;
+ void emitBaseTypeRef(uint64_t Idx) override;
bool isFrameRegister(const TargetRegisterInfo &TRI,
unsigned MachineReg) override;
public:
- DIEDwarfExpression(const AsmPrinter &AP, DwarfUnit &DU, DIELoc &DIE);
+ DIEDwarfExpression(const AsmPrinter &AP, DwarfCompileUnit &CU, DIELoc &DIE);
DIELoc *finalize() {
DwarfExpression::finalize();
diff --git a/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/lib/CodeGen/AsmPrinter/DwarfFile.cpp
index 78ccad481411..e3c9095d1343 100644
--- a/lib/CodeGen/AsmPrinter/DwarfFile.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfFile.cpp
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/DwarfFile.cpp - Dwarf Debug Framework -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -44,6 +43,11 @@ void DwarfFile::emitUnit(DwarfUnit *TheU, bool UseOffsets) {
if (!S)
return;
+ // Skip CUs that ended up not being needed (split CUs that were abandoned
+ // because they added no information beyond the non-split CU)
+ if (llvm::empty(TheU->getUnitDie().values()))
+ return;
+
Asm->OutStreamer->SwitchSection(S);
TheU->emitHeader(UseOffsets);
Asm->emitDwarfDIE(TheU->getUnitDie());
@@ -63,6 +67,11 @@ void DwarfFile::computeSizeAndOffsets() {
if (TheU->getCUNode()->isDebugDirectivesOnly())
continue;
+ // Skip CUs that ended up not being needed (split CUs that were abandoned
+ // because they added no information beyond the non-split CU)
+ if (llvm::empty(TheU->getUnitDie().values()))
+ return;
+
TheU->setDebugSectionOffset(SecOffset);
SecOffset += computeSizeAndOffsetsForUnit(TheU.get());
}
diff --git a/lib/CodeGen/AsmPrinter/DwarfFile.h b/lib/CodeGen/AsmPrinter/DwarfFile.h
index 51acca8c1e53..244678ce9dc1 100644
--- a/lib/CodeGen/AsmPrinter/DwarfFile.h
+++ b/lib/CodeGen/AsmPrinter/DwarfFile.h
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/DwarfFile.h - Dwarf Debug Framework ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -59,7 +58,6 @@ public:
MCSymbol *getSym() const { return RangeSym; }
const DwarfCompileUnit &getCU() const { return *CU; }
const SmallVectorImpl<RangeSpan> &getRanges() const { return Ranges; }
- void addRange(RangeSpan Range) { Ranges.push_back(Range); }
};
class DwarfFile {
@@ -148,7 +146,7 @@ public:
void emitUnits(bool UseOffsets);
/// Emit the given unit to its section.
- void emitUnit(DwarfUnit *U, bool UseOffsets);
+ void emitUnit(DwarfUnit *TheU, bool UseOffsets);
/// Emit a set of abbreviations to the specific section.
void emitAbbrevs(MCSection *);
diff --git a/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp b/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
index 02016534a774..2a76dcb1b082 100644
--- a/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/DwarfStringPool.cpp - Dwarf Debug Framework -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/AsmPrinter/DwarfStringPool.h b/lib/CodeGen/AsmPrinter/DwarfStringPool.h
index f484540d8d37..c5f5637fdae3 100644
--- a/lib/CodeGen/AsmPrinter/DwarfStringPool.h
+++ b/lib/CodeGen/AsmPrinter/DwarfStringPool.h
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/DwarfStringPool.h - Dwarf Debug Framework ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index 80b365f1aa43..991ab94b50ab 100644
--- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -1,9 +1,8 @@
//===-- llvm/CodeGen/DwarfUnit.cpp - Dwarf Type and Compile Units ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -47,21 +46,30 @@ using namespace llvm;
#define DEBUG_TYPE "dwarfdebug"
-DIEDwarfExpression::DIEDwarfExpression(const AsmPrinter &AP, DwarfUnit &DU,
+DIEDwarfExpression::DIEDwarfExpression(const AsmPrinter &AP,
+ DwarfCompileUnit &CU,
DIELoc &DIE)
- : DwarfExpression(AP.getDwarfVersion()), AP(AP), DU(DU),
+ : DwarfExpression(AP.getDwarfVersion(), CU), AP(AP),
DIE(DIE) {}
void DIEDwarfExpression::emitOp(uint8_t Op, const char* Comment) {
- DU.addUInt(DIE, dwarf::DW_FORM_data1, Op);
+ CU.addUInt(DIE, dwarf::DW_FORM_data1, Op);
}
void DIEDwarfExpression::emitSigned(int64_t Value) {
- DU.addSInt(DIE, dwarf::DW_FORM_sdata, Value);
+ CU.addSInt(DIE, dwarf::DW_FORM_sdata, Value);
}
void DIEDwarfExpression::emitUnsigned(uint64_t Value) {
- DU.addUInt(DIE, dwarf::DW_FORM_udata, Value);
+ CU.addUInt(DIE, dwarf::DW_FORM_udata, Value);
+}
+
+void DIEDwarfExpression::emitData1(uint8_t Value) {
+ CU.addUInt(DIE, dwarf::DW_FORM_data1, Value);
+}
+
+void DIEDwarfExpression::emitBaseTypeRef(uint64_t Idx) {
+ CU.addBaseTypeRef(DIE, Idx);
}
bool DIEDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI,
@@ -285,21 +293,21 @@ void DwarfUnit::addSectionOffset(DIE &Die, dwarf::Attribute Attribute,
addUInt(Die, Attribute, dwarf::DW_FORM_data4, Integer);
}
-MD5::MD5Result *DwarfUnit::getMD5AsBytes(const DIFile *File) const {
+Optional<MD5::MD5Result> DwarfUnit::getMD5AsBytes(const DIFile *File) const {
assert(File);
if (DD->getDwarfVersion() < 5)
- return nullptr;
+ return None;
Optional<DIFile::ChecksumInfo<StringRef>> Checksum = File->getChecksum();
if (!Checksum || Checksum->Kind != DIFile::CSK_MD5)
- return nullptr;
+ return None;
// Convert the string checksum to an MD5Result for the streamer.
// The verifier validates the checksum so we assume it's okay.
// An MD5 checksum is 16 bytes.
std::string ChecksumString = fromHex(Checksum->Value);
- void *CKMem = Asm->OutStreamer->getContext().allocate(16, 1);
- memcpy(CKMem, ChecksumString.data(), 16);
- return reinterpret_cast<MD5::MD5Result *>(CKMem);
+ MD5::MD5Result CKMem;
+ std::copy(ChecksumString.begin(), ChecksumString.end(), CKMem.Bytes.data());
+ return CKMem;
}
unsigned DwarfTypeUnit::getOrCreateSourceID(const DIFile *File) {
@@ -311,7 +319,9 @@ unsigned DwarfTypeUnit::getOrCreateSourceID(const DIFile *File) {
addSectionOffset(getUnitDie(), dwarf::DW_AT_stmt_list, 0);
}
return SplitLineTable->getFile(File->getDirectory(), File->getFilename(),
- getMD5AsBytes(File), File->getSource());
+ getMD5AsBytes(File),
+ Asm->OutContext.getDwarfVersion(),
+ File->getSource());
}
void DwarfUnit::addOpAddress(DIELoc &Die, const MCSymbol *Sym) {
@@ -393,7 +403,6 @@ void DwarfUnit::addSourceLine(DIE &Die, unsigned Line, const DIFile *File) {
return;
unsigned FileID = getOrCreateSourceID(File);
- assert(FileID && "Invalid file id");
addUInt(Die, dwarf::DW_AT_decl_file, None, FileID);
addUInt(Die, dwarf::DW_AT_decl_line, None, Line);
}
@@ -462,9 +471,8 @@ static bool isUnsignedDIType(DwarfDebug *DD, const DIType *Ty) {
assert(T == dwarf::DW_TAG_typedef || T == dwarf::DW_TAG_const_type ||
T == dwarf::DW_TAG_volatile_type ||
T == dwarf::DW_TAG_restrict_type || T == dwarf::DW_TAG_atomic_type);
- DITypeRef Deriv = DTy->getBaseType();
- assert(Deriv && "Expected valid base type");
- return isUnsignedDIType(DD, DD->resolve(Deriv));
+ assert(DTy->getBaseType() && "Expected valid base type");
+ return isUnsignedDIType(DD, DTy->getBaseType());
}
auto *BTy = cast<DIBasicType>(Ty);
@@ -523,6 +531,10 @@ void DwarfUnit::addConstantValue(DIE &Die, const MachineOperand &MO,
addConstantValue(Die, isUnsignedDIType(DD, Ty), MO.getImm());
}
+void DwarfUnit::addConstantValue(DIE &Die, uint64_t Val, const DIType *Ty) {
+ addConstantValue(Die, isUnsignedDIType(DD, Ty), Val);
+}
+
void DwarfUnit::addConstantValue(DIE &Die, bool Unsigned, uint64_t Val) {
// FIXME: This is a bit conservative/simple - it emits negative values always
// sign extended to 64 bits rather than minimizing the number of bytes.
@@ -603,8 +615,8 @@ DIE *DwarfUnit::getOrCreateContextDIE(const DIScope *Context) {
return getDIE(Context);
}
-DIE *DwarfTypeUnit::createTypeDIE(const DICompositeType *Ty) {
- auto *Context = resolve(Ty->getScope());
+DIE *DwarfUnit::createTypeDIE(const DICompositeType *Ty) {
+ auto *Context = Ty->getScope();
DIE *ContextDIE = getOrCreateContextDIE(Context);
if (DIE *TyDIE = getDIE(Ty))
@@ -619,6 +631,37 @@ DIE *DwarfTypeUnit::createTypeDIE(const DICompositeType *Ty) {
return &TyDIE;
}
+DIE *DwarfUnit::createTypeDIE(const DIScope *Context, DIE &ContextDIE,
+ const DIType *Ty) {
+ // Create new type.
+ DIE &TyDIE = createAndAddDIE(Ty->getTag(), ContextDIE, Ty);
+
+ updateAcceleratorTables(Context, Ty, TyDIE);
+
+ if (auto *BT = dyn_cast<DIBasicType>(Ty))
+ constructTypeDIE(TyDIE, BT);
+ else if (auto *STy = dyn_cast<DISubroutineType>(Ty))
+ constructTypeDIE(TyDIE, STy);
+ else if (auto *CTy = dyn_cast<DICompositeType>(Ty)) {
+ if (DD->generateTypeUnits() && !Ty->isForwardDecl() &&
+ (Ty->getRawName() || CTy->getRawIdentifier())) {
+ // Skip updating the accelerator tables since this is not the full type.
+ if (MDString *TypeId = CTy->getRawIdentifier())
+ DD->addDwarfTypeUnitType(getCU(), TypeId->getString(), TyDIE, CTy);
+ else {
+ auto X = DD->enterNonTypeUnitContext();
+ finishNonUnitTypeDIE(TyDIE, CTy);
+ }
+ return &TyDIE;
+ }
+ constructTypeDIE(TyDIE, CTy);
+ } else {
+ constructTypeDIE(TyDIE, cast<DIDerivedType>(Ty));
+ }
+
+ return &TyDIE;
+}
+
DIE *DwarfUnit::getOrCreateTypeDIE(const MDNode *TyNode) {
if (!TyNode)
return nullptr;
@@ -627,43 +670,23 @@ DIE *DwarfUnit::getOrCreateTypeDIE(const MDNode *TyNode) {
// DW_TAG_restrict_type is not supported in DWARF2
if (Ty->getTag() == dwarf::DW_TAG_restrict_type && DD->getDwarfVersion() <= 2)
- return getOrCreateTypeDIE(resolve(cast<DIDerivedType>(Ty)->getBaseType()));
+ return getOrCreateTypeDIE(cast<DIDerivedType>(Ty)->getBaseType());
// DW_TAG_atomic_type is not supported in DWARF < 5
if (Ty->getTag() == dwarf::DW_TAG_atomic_type && DD->getDwarfVersion() < 5)
- return getOrCreateTypeDIE(resolve(cast<DIDerivedType>(Ty)->getBaseType()));
+ return getOrCreateTypeDIE(cast<DIDerivedType>(Ty)->getBaseType());
// Construct the context before querying for the existence of the DIE in case
// such construction creates the DIE.
- auto *Context = resolve(Ty->getScope());
+ auto *Context = Ty->getScope();
DIE *ContextDIE = getOrCreateContextDIE(Context);
assert(ContextDIE);
if (DIE *TyDIE = getDIE(Ty))
return TyDIE;
- // Create new type.
- DIE &TyDIE = createAndAddDIE(Ty->getTag(), *ContextDIE, Ty);
-
- updateAcceleratorTables(Context, Ty, TyDIE);
-
- if (auto *BT = dyn_cast<DIBasicType>(Ty))
- constructTypeDIE(TyDIE, BT);
- else if (auto *STy = dyn_cast<DISubroutineType>(Ty))
- constructTypeDIE(TyDIE, STy);
- else if (auto *CTy = dyn_cast<DICompositeType>(Ty)) {
- if (DD->generateTypeUnits() && !Ty->isForwardDecl())
- if (MDString *TypeId = CTy->getRawIdentifier()) {
- DD->addDwarfTypeUnitType(getCU(), TypeId->getString(), TyDIE, CTy);
- // Skip updating the accelerator tables since this is not the full type.
- return &TyDIE;
- }
- constructTypeDIE(TyDIE, CTy);
- } else {
- constructTypeDIE(TyDIE, cast<DIDerivedType>(Ty));
- }
-
- return &TyDIE;
+ return static_cast<DwarfUnit *>(ContextDIE->getUnit())
+ ->createTypeDIE(Context, *ContextDIE, Ty);
}
void DwarfUnit::updateAcceleratorTables(const DIScope *Context,
@@ -679,7 +702,7 @@ void DwarfUnit::updateAcceleratorTables(const DIScope *Context,
DD->addAccelType(*CUNode, Ty->getName(), TyDIE, Flags);
if (!Context || isa<DICompileUnit>(Context) || isa<DIFile>(Context) ||
- isa<DINamespace>(Context))
+ isa<DINamespace>(Context) || isa<DICommonBlock>(Context))
addGlobalType(Ty, TyDIE, Context);
}
}
@@ -702,8 +725,8 @@ std::string DwarfUnit::getParentContextString(const DIScope *Context) const {
SmallVector<const DIScope *, 1> Parents;
while (!isa<DICompileUnit>(Context)) {
Parents.push_back(Context);
- if (Context->getScope())
- Context = resolve(Context->getScope());
+ if (const DIScope *S = Context->getScope())
+ Context = S;
else
// Structure, etc types will have a NULL context if they're at the top
// level.
@@ -754,7 +777,7 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIDerivedType *DTy) {
uint16_t Tag = Buffer.getTag();
// Map to main type, void will not have a type.
- const DIType *FromTy = resolve(DTy->getBaseType());
+ const DIType *FromTy = DTy->getBaseType();
if (FromTy)
addType(Buffer, FromTy);
@@ -770,24 +793,23 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIDerivedType *DTy) {
addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size);
if (Tag == dwarf::DW_TAG_ptr_to_member_type)
- addDIEEntry(
- Buffer, dwarf::DW_AT_containing_type,
- *getOrCreateTypeDIE(resolve(cast<DIDerivedType>(DTy)->getClassType())));
+ addDIEEntry(Buffer, dwarf::DW_AT_containing_type,
+ *getOrCreateTypeDIE(cast<DIDerivedType>(DTy)->getClassType()));
// Add source line info if available and TyDesc is not a forward declaration.
if (!DTy->isForwardDecl())
addSourceLine(Buffer, DTy);
- // If DWARF address space value is other than None, add it for pointer and
- // reference types as DW_AT_address_class.
- if (DTy->getDWARFAddressSpace() && (Tag == dwarf::DW_TAG_pointer_type ||
- Tag == dwarf::DW_TAG_reference_type))
+ // If DWARF address space value is other than None, add it. The IR
+ // verifier checks that DWARF address space only exists for pointer
+ // or reference types.
+ if (DTy->getDWARFAddressSpace())
addUInt(Buffer, dwarf::DW_AT_address_class, dwarf::DW_FORM_data4,
DTy->getDWARFAddressSpace().getValue());
}
void DwarfUnit::constructSubprogramArguments(DIE &Buffer, DITypeRefArray Args) {
for (unsigned i = 1, N = Args.size(); i < N; ++i) {
- const DIType *Ty = resolve(Args[i]);
+ const DIType *Ty = Args[i];
if (!Ty) {
assert(i == N-1 && "Unspecified parameter must be the last argument");
createAndAddDIE(dwarf::DW_TAG_unspecified_parameters, Buffer);
@@ -804,7 +826,7 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DISubroutineType *CTy) {
// Add return type. A void return won't have a type.
auto Elements = cast<DISubroutineType>(CTy)->getTypeArray();
if (Elements.size())
- if (auto RTy = resolve(Elements[0]))
+ if (auto RTy = Elements[0])
addType(Buffer, RTy);
bool isPrototyped = true;
@@ -875,7 +897,7 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
else if (auto *DDTy = dyn_cast<DIDerivedType>(Element)) {
if (DDTy->getTag() == dwarf::DW_TAG_friend) {
DIE &ElemDie = createAndAddDIE(dwarf::DW_TAG_friend, Buffer);
- addType(ElemDie, resolve(DDTy->getBaseType()), dwarf::DW_AT_friend);
+ addType(ElemDie, DDTy->getBaseType(), dwarf::DW_AT_friend);
} else if (DDTy->isStaticMember()) {
getOrCreateStaticMemberDIE(DDTy);
} else if (Tag == dwarf::DW_TAG_variant_part) {
@@ -884,7 +906,7 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
DIE &Variant = createAndAddDIE(dwarf::DW_TAG_variant, Buffer);
if (const ConstantInt *CI =
dyn_cast_or_null<ConstantInt>(DDTy->getDiscriminantValue())) {
- if (isUnsignedDIType(DD, resolve(Discriminator->getBaseType())))
+ if (isUnsignedDIType(DD, Discriminator->getBaseType()))
addUInt(Variant, dwarf::DW_AT_discr_value, None, CI->getZExtValue());
else
addSInt(Variant, dwarf::DW_AT_discr_value, None, CI->getSExtValue());
@@ -898,7 +920,7 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
StringRef PropertyName = Property->getName();
addString(ElemDie, dwarf::DW_AT_APPLE_property_name, PropertyName);
if (Property->getType())
- addType(ElemDie, resolve(Property->getType()));
+ addType(ElemDie, Property->getType());
addSourceLine(ElemDie, Property);
StringRef GetterName = Property->getGetterName();
if (!GetterName.empty())
@@ -924,7 +946,7 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
// inside C++ composite types to point to the base class with the vtable.
// Rust uses DW_AT_containing_type to link a vtable to the type
// for which it was created.
- if (auto *ContainingType = resolve(CTy->getVTableHolder()))
+ if (auto *ContainingType = CTy->getVTableHolder())
addDIEEntry(Buffer, dwarf::DW_AT_containing_type,
*getOrCreateTypeDIE(ContainingType));
@@ -994,7 +1016,7 @@ void DwarfUnit::constructTemplateTypeParameterDIE(
createAndAddDIE(dwarf::DW_TAG_template_type_parameter, Buffer);
// Add the type if it exists, it could be void and therefore no type.
if (TP->getType())
- addType(ParamDIE, resolve(TP->getType()));
+ addType(ParamDIE, TP->getType());
if (!TP->getName().empty())
addString(ParamDIE, dwarf::DW_AT_name, TP->getName());
}
@@ -1006,12 +1028,12 @@ void DwarfUnit::constructTemplateValueParameterDIE(
// Add the type if there is one, template template and template parameter
// packs will not have a type.
if (VP->getTag() == dwarf::DW_TAG_template_value_parameter)
- addType(ParamDIE, resolve(VP->getType()));
+ addType(ParamDIE, VP->getType());
if (!VP->getName().empty())
addString(ParamDIE, dwarf::DW_AT_name, VP->getName());
if (Metadata *Val = VP->getValue()) {
if (ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Val))
- addConstantValue(ParamDIE, CI, resolve(VP->getType()));
+ addConstantValue(ParamDIE, CI, VP->getType());
else if (GlobalValue *GV = mdconst::dyn_extract<GlobalValue>(Val)) {
// We cannot describe the location of dllimport'd entities: the
// computation of their address requires loads from the IAT.
@@ -1085,7 +1107,7 @@ DIE *DwarfUnit::getOrCreateSubprogramDIE(const DISubprogram *SP, bool Minimal) {
// such construction creates the DIE (as is the case for member function
// declarations).
DIE *ContextDIE =
- Minimal ? &getUnitDie() : getOrCreateContextDIE(resolve(SP->getScope()));
+ Minimal ? &getUnitDie() : getOrCreateContextDIE(SP->getScope());
if (DIE *SPDie = getDIE(SP))
return SPDie;
@@ -1107,7 +1129,8 @@ DIE *DwarfUnit::getOrCreateSubprogramDIE(const DISubprogram *SP, bool Minimal) {
if (SP->isDefinition())
return &SPDie;
- applySubprogramAttributes(SP, SPDie);
+ static_cast<DwarfUnit *>(SPDie.getUnit())
+ ->applySubprogramAttributes(SP, SPDie);
return &SPDie;
}
@@ -1197,7 +1220,7 @@ void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie,
// Add a return type. If this is a type like a C/C++ void type we don't add a
// return type.
if (Args.size())
- if (auto Ty = resolve(Args[0]))
+ if (auto Ty = Args[0])
addType(SPDie, Ty);
unsigned VK = SP->getVirtuality();
@@ -1209,8 +1232,7 @@ void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie,
addUInt(*Block, dwarf::DW_FORM_udata, SP->getVirtualIndex());
addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, Block);
}
- ContainingTypeMap.insert(
- std::make_pair(&SPDie, resolve(SP->getContainingType())));
+ ContainingTypeMap.insert(std::make_pair(&SPDie, SP->getContainingType()));
}
if (!SP->isDefinition()) {
@@ -1261,6 +1283,12 @@ void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie,
if (SP->isMainSubprogram())
addFlag(SPDie, dwarf::DW_AT_main_subprogram);
+ if (SP->isPure())
+ addFlag(SPDie, dwarf::DW_AT_pure);
+ if (SP->isElemental())
+ addFlag(SPDie, dwarf::DW_AT_elemental);
+ if (SP->isRecursive())
+ addFlag(SPDie, dwarf::DW_AT_recursive);
}
void DwarfUnit::constructSubrangeDIE(DIE &Buffer, const DISubrange *SR,
@@ -1310,7 +1338,7 @@ static bool hasVectorBeenPadded(const DICompositeType *CTy) {
const uint64_t ActualSize = CTy->getSizeInBits();
// Obtain the size of each element in the vector.
- DIType *BaseTy = CTy->getBaseType().resolve();
+ DIType *BaseTy = CTy->getBaseType();
assert(BaseTy && "Unknown vector element type.");
const uint64_t ElementSize = BaseTy->getSizeInBits();
@@ -1338,7 +1366,7 @@ void DwarfUnit::constructArrayTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
}
// Emit the element type.
- addType(Buffer, resolve(CTy->getBaseType()));
+ addType(Buffer, CTy->getBaseType());
// Get an anonymous type for index type.
// FIXME: This type should be passed down from the front end
@@ -1356,7 +1384,7 @@ void DwarfUnit::constructArrayTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
}
void DwarfUnit::constructEnumTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
- const DIType *DTy = resolve(CTy->getBaseType());
+ const DIType *DTy = CTy->getBaseType();
bool IsUnsigned = DTy && isUnsignedDIType(DD, DTy);
if (DTy) {
if (DD->getDwarfVersion() >= 3)
@@ -1365,6 +1393,9 @@ void DwarfUnit::constructEnumTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
addFlag(Buffer, dwarf::DW_AT_enum_class);
}
+ auto *Context = CTy->getScope();
+ bool IndexEnumerators = !Context || isa<DICompileUnit>(Context) || isa<DIFile>(Context) ||
+ isa<DINamespace>(Context) || isa<DICommonBlock>(Context);
DINodeArray Elements = CTy->getElements();
// Add enumerators to enumeration type.
@@ -1376,6 +1407,8 @@ void DwarfUnit::constructEnumTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
addString(Enumerator, dwarf::DW_AT_name, Name);
auto Value = static_cast<uint64_t>(Enum->getValue());
addConstantValue(Enumerator, IsUnsigned, Value);
+ if (IndexEnumerators)
+ addGlobalName(Name, Enumerator, Context);
}
}
}
@@ -1400,7 +1433,7 @@ DIE &DwarfUnit::constructMemberDIE(DIE &Buffer, const DIDerivedType *DT) {
if (!Name.empty())
addString(MemberDie, dwarf::DW_AT_name, Name);
- if (DIType *Resolved = resolve(DT->getBaseType()))
+ if (DIType *Resolved = DT->getBaseType())
addType(MemberDie, Resolved);
addSourceLine(MemberDie, DT);
@@ -1509,7 +1542,7 @@ DIE *DwarfUnit::getOrCreateStaticMemberDIE(const DIDerivedType *DT) {
// Construct the context before querying for the existence of the DIE in case
// such construction creates the DIE.
- DIE *ContextDIE = getOrCreateContextDIE(resolve(DT->getScope()));
+ DIE *ContextDIE = getOrCreateContextDIE(DT->getScope());
assert(dwarf::isType(ContextDIE->getTag()) &&
"Static member should belong to a type.");
@@ -1518,7 +1551,7 @@ DIE *DwarfUnit::getOrCreateStaticMemberDIE(const DIDerivedType *DT) {
DIE &StaticMemberDIE = createAndAddDIE(DT->getTag(), *ContextDIE, DT);
- const DIType *Ty = resolve(DT->getBaseType());
+ const DIType *Ty = DT->getBaseType();
addString(StaticMemberDIE, dwarf::DW_AT_name, DT->getName());
addType(StaticMemberDIE, Ty);
@@ -1671,3 +1704,11 @@ void DwarfUnit::addLoclistsBase() {
DU->getLoclistsTableBaseSym(),
TLOF.getDwarfLoclistsSection()->getBeginSymbol());
}
+
+void DwarfTypeUnit::finishNonUnitTypeDIE(DIE& D, const DICompositeType *CTy) {
+ addFlag(D, dwarf::DW_AT_declaration);
+ StringRef Name = CTy->getName();
+ if (!Name.empty())
+ addString(D, dwarf::DW_AT_name, Name);
+ getCU().createTypeDIE(CTy);
+}
diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.h b/lib/CodeGen/AsmPrinter/DwarfUnit.h
index a59ebb7c1465..56c934a35ae8 100644
--- a/lib/CodeGen/AsmPrinter/DwarfUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfUnit.h
@@ -1,9 +1,8 @@
//===-- llvm/CodeGen/DwarfUnit.h - Dwarf Compile Unit ---*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -199,6 +198,7 @@ public:
void addConstantValue(DIE &Die, const ConstantInt *CI, const DIType *Ty);
void addConstantValue(DIE &Die, const APInt &Val, const DIType *Ty);
void addConstantValue(DIE &Die, const APInt &Val, bool Unsigned);
+ void addConstantValue(DIE &Die, uint64_t Val, const DIType *Ty);
void addConstantValue(DIE &Die, bool Unsigned, uint64_t Val);
/// Add constant value entry in variable DIE.
@@ -237,6 +237,9 @@ public:
void applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie,
bool SkipSPAttributes = false);
+ /// Creates type DIE with specific context.
+ DIE *createTypeDIE(const DIScope *Context, DIE &ContextDIE, const DIType *Ty);
+
/// Find existing DIE or create new DIE for the given type.
DIE *getOrCreateTypeDIE(const MDNode *TyNode);
@@ -294,7 +297,10 @@ public:
/// If the \p File has an MD5 checksum, return it as an MD5Result
/// allocated in the MCContext.
- MD5::MD5Result *getMD5AsBytes(const DIFile *File) const;
+ Optional<MD5::MD5Result> getMD5AsBytes(const DIFile *File) const;
+
+ /// Get context owner's DIE.
+ DIE *createTypeDIE(const DICompositeType *Ty);
protected:
~DwarfUnit();
@@ -306,17 +312,6 @@ protected:
/// create a new ID and insert it in the line table.
virtual unsigned getOrCreateSourceID(const DIFile *File) = 0;
- /// Look in the DwarfDebug map for the MDNode that corresponds to the
- /// reference.
- template <typename T> T *resolve(TypedDINodeRef<T> Ref) const {
- return Ref.resolve();
- }
-
- /// If this is a named finished type then include it in the list of types for
- /// the accelerator tables.
- void updateAcceleratorTables(const DIScope *Context, const DIType *Ty,
- const DIE &TyDIE);
-
/// Emit the common part of the header for this unit.
void emitCommonHeader(bool UseOffsets, dwarf::UnitType UT);
@@ -344,6 +339,13 @@ private:
/// Set D as anonymous type for index which can be reused later.
void setIndexTyDie(DIE *D) { IndexTyDie = D; }
+ virtual void finishNonUnitTypeDIE(DIE& D, const DICompositeType *CTy) = 0;
+
+ /// If this is a named finished type then include it in the list of types for
+ /// the accelerator tables.
+ void updateAcceleratorTables(const DIScope *Context, const DIType *Ty,
+ const DIE &TyDIE);
+
virtual bool isDwoUnit() const = 0;
const MCSymbol *getCrossSectionRelativeBaseAddress() const override;
};
@@ -356,6 +358,7 @@ class DwarfTypeUnit final : public DwarfUnit {
bool UsedLineTable = false;
unsigned getOrCreateSourceID(const DIFile *File) override;
+ void finishNonUnitTypeDIE(DIE& D, const DICompositeType *CTy) override;
bool isDwoUnit() const override;
public:
@@ -365,9 +368,6 @@ public:
void setTypeSignature(uint64_t Signature) { TypeSignature = Signature; }
void setType(const DIE *Ty) { this->Ty = Ty; }
- /// Get context owner's DIE.
- DIE *createTypeDIE(const DICompositeType *Ty);
-
/// Emit the header for this unit, not including the initial length field.
void emitHeader(bool UseOffsets) override;
unsigned getHeaderSize() const override {
diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/lib/CodeGen/AsmPrinter/EHStreamer.cpp
index 7599121de2b0..99e3687b36b8 100644
--- a/lib/CodeGen/AsmPrinter/EHStreamer.cpp
+++ b/lib/CodeGen/AsmPrinter/EHStreamer.cpp
@@ -1,9 +1,8 @@
//===- CodeGen/AsmPrinter/EHStreamer.cpp - Exception Directive Streamer ---===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -379,7 +378,8 @@ MCSymbol *EHStreamer::emitExceptionTable() {
bool IsSJLJ = Asm->MAI->getExceptionHandlingType() == ExceptionHandling::SjLj;
bool IsWasm = Asm->MAI->getExceptionHandlingType() == ExceptionHandling::Wasm;
unsigned CallSiteEncoding =
- IsSJLJ ? dwarf::DW_EH_PE_udata4 : dwarf::DW_EH_PE_uleb128;
+ IsSJLJ ? static_cast<unsigned>(dwarf::DW_EH_PE_udata4) :
+ Asm->getObjFileLowering().getCallSiteEncoding();
bool HaveTTData = !TypeInfos.empty() || !FilterIds.empty();
// Type infos.
@@ -524,24 +524,24 @@ MCSymbol *EHStreamer::emitExceptionTable() {
// Offset of the call site relative to the start of the procedure.
if (VerboseAsm)
Asm->OutStreamer->AddComment(">> Call Site " + Twine(++Entry) + " <<");
- Asm->EmitLabelDifferenceAsULEB128(BeginLabel, EHFuncBeginSym);
+ Asm->EmitCallSiteOffset(BeginLabel, EHFuncBeginSym, CallSiteEncoding);
if (VerboseAsm)
Asm->OutStreamer->AddComment(Twine(" Call between ") +
BeginLabel->getName() + " and " +
EndLabel->getName());
- Asm->EmitLabelDifferenceAsULEB128(EndLabel, BeginLabel);
+ Asm->EmitCallSiteOffset(EndLabel, BeginLabel, CallSiteEncoding);
// Offset of the landing pad relative to the start of the procedure.
if (!S.LPad) {
if (VerboseAsm)
Asm->OutStreamer->AddComment(" has no landing pad");
- Asm->EmitULEB128(0);
+ Asm->EmitCallSiteValue(0, CallSiteEncoding);
} else {
if (VerboseAsm)
Asm->OutStreamer->AddComment(Twine(" jumps to ") +
S.LPad->LandingPadLabel->getName());
- Asm->EmitLabelDifferenceAsULEB128(S.LPad->LandingPadLabel,
- EHFuncBeginSym);
+ Asm->EmitCallSiteOffset(S.LPad->LandingPadLabel, EHFuncBeginSym,
+ CallSiteEncoding);
}
// Offset of the first associated action record, relative to the start of
diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.h b/lib/CodeGen/AsmPrinter/EHStreamer.h
index ce912d032c6d..e62cf17a05d4 100644
--- a/lib/CodeGen/AsmPrinter/EHStreamer.h
+++ b/lib/CodeGen/AsmPrinter/EHStreamer.h
@@ -1,9 +1,8 @@
//===- EHStreamer.h - Exception Handling Directive Streamer -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
index 34677ecc9e69..39392b79e960 100644
--- a/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
@@ -1,9 +1,8 @@
//===- ErlangGCPrinter.cpp - Erlang/OTP frametable emitter ----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
index 3479a00def23..3145cc90dc73 100644
--- a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
@@ -1,9 +1,8 @@
//===- OcamlGCPrinter.cpp - Ocaml frametable emitter ----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/AsmPrinter/WasmException.cpp b/lib/CodeGen/AsmPrinter/WasmException.cpp
index 527e5ae50146..444b0ed17b6d 100644
--- a/lib/CodeGen/AsmPrinter/WasmException.cpp
+++ b/lib/CodeGen/AsmPrinter/WasmException.cpp
@@ -1,9 +1,8 @@
//===-- CodeGen/AsmPrinter/WasmException.cpp - Wasm Exception Impl --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -19,10 +18,10 @@
using namespace llvm;
void WasmException::endModule() {
- // This is the symbol used in 'throw' and 'if_except' instruction to denote
+ // This is the symbol used in 'throw' and 'br_on_exn' instruction to denote
// this is a C++ exception. This symbol has to be emitted somewhere once in
// the module. Check if the symbol has already been created, i.e., we have at
- // least one 'throw' or 'if_except' instruction in the module, and emit the
+ // least one 'throw' or 'br_on_exn' instruction in the module, and emit the
// symbol only if so.
SmallString<60> NameStr;
Mangler::getNameWithPrefix(NameStr, "__cpp_exception", Asm->getDataLayout());
diff --git a/lib/CodeGen/AsmPrinter/WasmException.h b/lib/CodeGen/AsmPrinter/WasmException.h
index cbdb42457cf8..1893b6b2df43 100644
--- a/lib/CodeGen/AsmPrinter/WasmException.h
+++ b/lib/CodeGen/AsmPrinter/WasmException.h
@@ -1,9 +1,8 @@
//===-- WasmException.h - Wasm Exception Framework -------------*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/AsmPrinter/WinCFGuard.cpp b/lib/CodeGen/AsmPrinter/WinCFGuard.cpp
index 18d37caf57ee..290be81c6baa 100644
--- a/lib/CodeGen/AsmPrinter/WinCFGuard.cpp
+++ b/lib/CodeGen/AsmPrinter/WinCFGuard.cpp
@@ -1,9 +1,8 @@
//===-- CodeGen/AsmPrinter/WinCFGuard.cpp - Control Flow Guard Impl ------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/AsmPrinter/WinCFGuard.h b/lib/CodeGen/AsmPrinter/WinCFGuard.h
index 28f119e35966..def0a59ab007 100644
--- a/lib/CodeGen/AsmPrinter/WinCFGuard.h
+++ b/lib/CodeGen/AsmPrinter/WinCFGuard.h
@@ -1,9 +1,8 @@
//===-- WinCFGuard.h - Windows Control Flow Guard Handling ----*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/AsmPrinter/WinException.cpp b/lib/CodeGen/AsmPrinter/WinException.cpp
index cf8e8c69bc2a..155e91ce61a1 100644
--- a/lib/CodeGen/AsmPrinter/WinException.cpp
+++ b/lib/CodeGen/AsmPrinter/WinException.cpp
@@ -1,9 +1,8 @@
//===-- CodeGen/AsmPrinter/WinException.cpp - Dwarf Exception Impl ------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -110,6 +109,12 @@ void WinException::beginFunction(const MachineFunction *MF) {
beginFunclet(MF->front(), Asm->CurrentFnSym);
}
+void WinException::markFunctionEnd() {
+ if (isAArch64 && CurrentFuncletEntry &&
+ (shouldEmitMoves || shouldEmitPersonality))
+ Asm->OutStreamer->EmitWinCFIFuncletOrFuncEnd();
+}
+
/// endFunction - Gather and emit post-function exception information.
///
void WinException::endFunction(const MachineFunction *MF) {
@@ -129,7 +134,7 @@ void WinException::endFunction(const MachineFunction *MF) {
NonConstMF->tidyLandingPads();
}
- endFunclet();
+ endFuncletImpl();
// endFunclet will emit the necessary .xdata tables for x64 SEH.
if (Per == EHPersonality::MSVC_Win64SEH && MF->hasEHFunclets())
@@ -232,6 +237,15 @@ void WinException::beginFunclet(const MachineBasicBlock &MBB,
}
void WinException::endFunclet() {
+ if (isAArch64 && CurrentFuncletEntry &&
+ (shouldEmitMoves || shouldEmitPersonality)) {
+ Asm->OutStreamer->SwitchSection(CurrentFuncletTextSection);
+ Asm->OutStreamer->EmitWinCFIFuncletOrFuncEnd();
+ }
+ endFuncletImpl();
+}
+
+void WinException::endFuncletImpl() {
// No funclet to process? Great, we have nothing to do.
if (!CurrentFuncletEntry)
return;
@@ -247,8 +261,6 @@ void WinException::endFunclet() {
// to EmitWinEHHandlerData below can calculate the size of the funclet or
// function.
if (isAArch64) {
- Asm->OutStreamer->SwitchSection(CurrentFuncletTextSection);
- Asm->OutStreamer->EmitWinCFIFuncletOrFuncEnd();
MCSection *XData = Asm->OutStreamer->getAssociatedXDataSection(
Asm->OutStreamer->getCurrentSectionOnly());
Asm->OutStreamer->SwitchSection(XData);
@@ -545,15 +557,17 @@ void WinException::emitCSpecificHandlerTable(const MachineFunction *MF) {
OS.AddComment(Comment);
};
- // Emit a label assignment with the SEH frame offset so we can use it for
- // llvm.eh.recoverfp.
- StringRef FLinkageName =
- GlobalValue::dropLLVMManglingEscape(MF->getFunction().getName());
- MCSymbol *ParentFrameOffset =
- Ctx.getOrCreateParentFrameOffsetSymbol(FLinkageName);
- const MCExpr *MCOffset =
- MCConstantExpr::create(FuncInfo.SEHSetFrameOffset, Ctx);
- Asm->OutStreamer->EmitAssignment(ParentFrameOffset, MCOffset);
+ if (!isAArch64) {
+ // Emit a label assignment with the SEH frame offset so we can use it for
+ // llvm.eh.recoverfp.
+ StringRef FLinkageName =
+ GlobalValue::dropLLVMManglingEscape(MF->getFunction().getName());
+ MCSymbol *ParentFrameOffset =
+ Ctx.getOrCreateParentFrameOffsetSymbol(FLinkageName);
+ const MCExpr *MCOffset =
+ MCConstantExpr::create(FuncInfo.SEHSetFrameOffset, Ctx);
+ Asm->OutStreamer->EmitAssignment(ParentFrameOffset, MCOffset);
+ }
// Use the assembler to compute the number of table entries through label
// difference and division.
@@ -936,8 +950,7 @@ void WinException::emitEHRegistrationOffsetLabel(const WinEHFuncInfo &FuncInfo,
int FI = FuncInfo.EHRegNodeFrameIndex;
if (FI != INT_MAX) {
const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering();
- unsigned UnusedReg;
- Offset = TFI->getFrameIndexReference(*Asm->MF, FI, UnusedReg);
+ Offset = TFI->getNonLocalFrameIndexReference(*Asm->MF, FI);
}
MCContext &Ctx = Asm->OutContext;
diff --git a/lib/CodeGen/AsmPrinter/WinException.h b/lib/CodeGen/AsmPrinter/WinException.h
index 37c796f89765..dc5036302131 100644
--- a/lib/CodeGen/AsmPrinter/WinException.h
+++ b/lib/CodeGen/AsmPrinter/WinException.h
@@ -1,9 +1,8 @@
//===-- WinException.h - Windows Exception Handling ----------*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -86,6 +85,7 @@ class LLVM_LIBRARY_VISIBILITY WinException : public EHStreamer {
/// only), it is relative to the frame pointer.
int getFrameIndexOffset(int FrameIndex, const WinEHFuncInfo &FuncInfo);
+ void endFuncletImpl();
public:
//===--------------------------------------------------------------------===//
// Main entry points.
@@ -100,6 +100,8 @@ public:
/// immediately after the function entry point.
void beginFunction(const MachineFunction *MF) override;
+ void markFunctionEnd() override;
+
/// Gather and emit post-function exception information.
void endFunction(const MachineFunction *) override;
diff --git a/lib/CodeGen/AtomicExpandPass.cpp b/lib/CodeGen/AtomicExpandPass.cpp
index 95581c09dd1c..dc7eaf6a5fe7 100644
--- a/lib/CodeGen/AtomicExpandPass.cpp
+++ b/lib/CodeGen/AtomicExpandPass.cpp
@@ -1,9 +1,8 @@
//===- AtomicExpandPass.cpp - Expand atomic instructions ------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -361,7 +360,7 @@ bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order) {
/// Get the iX type with the same bitwidth as T.
IntegerType *AtomicExpand::getCorrespondingIntegerType(Type *T,
const DataLayout &DL) {
- EVT VT = TLI->getValueType(DL, T);
+ EVT VT = TLI->getMemValueType(DL, T);
unsigned BitWidth = VT.getStoreSizeInBits();
assert(BitWidth == VT.getSizeInBits() && "must be a power of two");
return IntegerType::get(T->getContext(), BitWidth);
@@ -382,7 +381,7 @@ LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) {
Addr->getType()->getPointerAddressSpace());
Value *NewAddr = Builder.CreateBitCast(Addr, PT);
- auto *NewLI = Builder.CreateLoad(NewAddr);
+ auto *NewLI = Builder.CreateLoad(NewTy, NewAddr);
NewLI->setAlignment(LI->getAlignment());
NewLI->setVolatile(LI->isVolatile());
NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID());
@@ -431,6 +430,9 @@ bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) {
bool AtomicExpand::expandAtomicLoadToCmpXchg(LoadInst *LI) {
IRBuilder<> Builder(LI);
AtomicOrdering Order = LI->getOrdering();
+ if (Order == AtomicOrdering::Unordered)
+ Order = AtomicOrdering::Monotonic;
+
Value *Addr = LI->getPointerOperand();
Type *Ty = cast<PointerType>(Addr->getType())->getElementType();
Constant *DummyVal = Constant::getNullValue(Ty);
@@ -496,11 +498,26 @@ static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr,
Value *Loaded, Value *NewVal,
AtomicOrdering MemOpOrder,
Value *&Success, Value *&NewLoaded) {
+ Type *OrigTy = NewVal->getType();
+
+ // This code can go away when cmpxchg supports FP types.
+ bool NeedBitcast = OrigTy->isFloatingPointTy();
+ if (NeedBitcast) {
+ IntegerType *IntTy = Builder.getIntNTy(OrigTy->getPrimitiveSizeInBits());
+ unsigned AS = Addr->getType()->getPointerAddressSpace();
+ Addr = Builder.CreateBitCast(Addr, IntTy->getPointerTo(AS));
+ NewVal = Builder.CreateBitCast(NewVal, IntTy);
+ Loaded = Builder.CreateBitCast(Loaded, IntTy);
+ }
+
Value* Pair = Builder.CreateAtomicCmpXchg(
Addr, Loaded, NewVal, MemOpOrder,
AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder));
Success = Builder.CreateExtractValue(Pair, 1, "success");
NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
+
+ if (NeedBitcast)
+ NewLoaded = Builder.CreateBitCast(NewLoaded, OrigTy);
}
/// Emit IR to implement the given atomicrmw operation on values in registers,
@@ -535,6 +552,10 @@ static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder,
case AtomicRMWInst::UMin:
NewVal = Builder.CreateICmpULE(Loaded, Inc);
return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
+ case AtomicRMWInst::FAdd:
+ return Builder.CreateFAdd(Loaded, Inc, "new");
+ case AtomicRMWInst::FSub:
+ return Builder.CreateFSub(Loaded, Inc, "new");
default:
llvm_unreachable("Unknown atomic op");
}
@@ -564,6 +585,10 @@ bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
unsigned ValueSize = getAtomicOpSize(AI);
if (ValueSize < MinCASSize) {
+ // TODO: Handle atomicrmw fadd/fsub
+ if (AI->getType()->isFloatingPointTy())
+ return false;
+
expandPartwordAtomicRMW(AI,
TargetLoweringBase::AtomicExpansionKind::CmpXChg);
} else {
@@ -1090,11 +1115,11 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
bool HasReleasedLoadBB = !CI->isWeak() && ShouldInsertFencesForAtomic &&
SuccessOrder != AtomicOrdering::Monotonic &&
SuccessOrder != AtomicOrdering::Acquire &&
- !F->optForMinSize();
+ !F->hasMinSize();
// There's no overhead for sinking the release barrier in a weak cmpxchg, so
// do it even on minsize.
- bool UseUnconditionalReleaseBarrier = F->optForMinSize() && !CI->isWeak();
+ bool UseUnconditionalReleaseBarrier = F->hasMinSize() && !CI->isWeak();
// Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
//
@@ -1533,6 +1558,8 @@ static ArrayRef<RTLIB::Libcall> GetRMWLibcall(AtomicRMWInst::BinOp Op) {
case AtomicRMWInst::Min:
case AtomicRMWInst::UMax:
case AtomicRMWInst::UMin:
+ case AtomicRMWInst::FAdd:
+ case AtomicRMWInst::FSub:
// No atomic libcalls are available for max/min/umax/umin.
return {};
}
@@ -1671,16 +1698,25 @@ bool AtomicExpand::expandAtomicOpToLibcall(
}
// 'ptr' argument.
- Value *PtrVal =
- Builder.CreateBitCast(PointerOperand, Type::getInt8PtrTy(Ctx));
+ // note: This assumes all address spaces share a common libfunc
+ // implementation and that addresses are convertable. For systems without
+ // that property, we'd need to extend this mechanism to support AS-specific
+ // families of atomic intrinsics.
+ auto PtrTypeAS = PointerOperand->getType()->getPointerAddressSpace();
+ Value *PtrVal = Builder.CreateBitCast(PointerOperand,
+ Type::getInt8PtrTy(Ctx, PtrTypeAS));
+ PtrVal = Builder.CreateAddrSpaceCast(PtrVal, Type::getInt8PtrTy(Ctx));
Args.push_back(PtrVal);
// 'expected' argument, if present.
if (CASExpected) {
AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType());
AllocaCASExpected->setAlignment(AllocaAlignment);
+ unsigned AllocaAS = AllocaCASExpected->getType()->getPointerAddressSpace();
+
AllocaCASExpected_i8 =
- Builder.CreateBitCast(AllocaCASExpected, Type::getInt8PtrTy(Ctx));
+ Builder.CreateBitCast(AllocaCASExpected,
+ Type::getInt8PtrTy(Ctx, AllocaAS));
Builder.CreateLifetimeStart(AllocaCASExpected_i8, SizeVal64);
Builder.CreateAlignedStore(CASExpected, AllocaCASExpected, AllocaAlignment);
Args.push_back(AllocaCASExpected_i8);
@@ -1707,8 +1743,9 @@ bool AtomicExpand::expandAtomicOpToLibcall(
if (!CASExpected && HasResult && !UseSizedLibcall) {
AllocaResult = AllocaBuilder.CreateAlloca(I->getType());
AllocaResult->setAlignment(AllocaAlignment);
+ unsigned AllocaAS = AllocaResult->getType()->getPointerAddressSpace();
AllocaResult_i8 =
- Builder.CreateBitCast(AllocaResult, Type::getInt8PtrTy(Ctx));
+ Builder.CreateBitCast(AllocaResult, Type::getInt8PtrTy(Ctx, AllocaAS));
Builder.CreateLifetimeStart(AllocaResult_i8, SizeVal64);
Args.push_back(AllocaResult_i8);
}
@@ -1734,7 +1771,7 @@ bool AtomicExpand::expandAtomicOpToLibcall(
for (Value *Arg : Args)
ArgTys.push_back(Arg->getType());
FunctionType *FnType = FunctionType::get(ResultTy, ArgTys, false);
- Constant *LibcallFn =
+ FunctionCallee LibcallFn =
M->getOrInsertFunction(TLI->getLibcallName(RTLibType), FnType, Attr);
CallInst *Call = Builder.CreateCall(LibcallFn, Args);
Call->setAttributes(Attr);
@@ -1749,8 +1786,8 @@ bool AtomicExpand::expandAtomicOpToLibcall(
// from call}
Type *FinalResultTy = I->getType();
Value *V = UndefValue::get(FinalResultTy);
- Value *ExpectedOut =
- Builder.CreateAlignedLoad(AllocaCASExpected, AllocaAlignment);
+ Value *ExpectedOut = Builder.CreateAlignedLoad(
+ CASExpected->getType(), AllocaCASExpected, AllocaAlignment);
Builder.CreateLifetimeEnd(AllocaCASExpected_i8, SizeVal64);
V = Builder.CreateInsertValue(V, ExpectedOut, 0);
V = Builder.CreateInsertValue(V, Result, 1);
@@ -1760,7 +1797,8 @@ bool AtomicExpand::expandAtomicOpToLibcall(
if (UseSizedLibcall)
V = Builder.CreateBitOrPointerCast(Result, I->getType());
else {
- V = Builder.CreateAlignedLoad(AllocaResult, AllocaAlignment);
+ V = Builder.CreateAlignedLoad(I->getType(), AllocaResult,
+ AllocaAlignment);
Builder.CreateLifetimeEnd(AllocaResult_i8, SizeVal64);
}
I->replaceAllUsesWith(V);
diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp
index d11f375b176e..57cefae2066a 100644
--- a/lib/CodeGen/BasicTargetTransformInfo.cpp
+++ b/lib/CodeGen/BasicTargetTransformInfo.cpp
@@ -1,9 +1,8 @@
//===- BasicTargetTransformInfo.cpp - Basic target-independent TTI impl ---===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index efbfd5f4ab2c..fb54b5d6c8d8 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -1,9 +1,8 @@
//===- BranchFolding.cpp - Fold machine code branch instructions ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -722,7 +721,7 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
// branch instruction, which is likely to be smaller than the 2
// instructions that would be deleted in the merge.
MachineFunction *MF = MBB1->getParent();
- return EffectiveTailLen >= 2 && MF->getFunction().optForSize() &&
+ return EffectiveTailLen >= 2 && MF->getFunction().hasOptSize() &&
(I1 == MBB1->begin() || I2 == MBB2->begin());
}
@@ -1071,31 +1070,29 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
bool MadeChange = false;
- if (!EnableTailMerge) return MadeChange;
+ if (!EnableTailMerge)
+ return MadeChange;
// First find blocks with no successors.
- // Block placement does not create new tail merging opportunities for these
- // blocks.
- if (!AfterBlockPlacement) {
- MergePotentials.clear();
- for (MachineBasicBlock &MBB : MF) {
- if (MergePotentials.size() == TailMergeThreshold)
- break;
- if (!TriedMerging.count(&MBB) && MBB.succ_empty())
- MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(MBB), &MBB));
- }
-
- // If this is a large problem, avoid visiting the same basic blocks
- // multiple times.
+ // Block placement may create new tail merging opportunities for these blocks.
+ MergePotentials.clear();
+ for (MachineBasicBlock &MBB : MF) {
if (MergePotentials.size() == TailMergeThreshold)
- for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i)
- TriedMerging.insert(MergePotentials[i].getBlock());
-
- // See if we can do any tail merging on those.
- if (MergePotentials.size() >= 2)
- MadeChange |= TryTailMergeBlocks(nullptr, nullptr, MinCommonTailLength);
+ break;
+ if (!TriedMerging.count(&MBB) && MBB.succ_empty())
+ MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(MBB), &MBB));
}
+ // If this is a large problem, avoid visiting the same basic blocks
+ // multiple times.
+ if (MergePotentials.size() == TailMergeThreshold)
+ for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i)
+ TriedMerging.insert(MergePotentials[i].getBlock());
+
+ // See if we can do any tail merging on those.
+ if (MergePotentials.size() >= 2)
+ MadeChange |= TryTailMergeBlocks(nullptr, nullptr, MinCommonTailLength);
+
// Look at blocks (IBB) with multiple predecessors (PBB).
// We change each predecessor to a canonical form, by
// (1) temporarily removing any unconditional branch from the predecessor
@@ -1183,29 +1180,6 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
}
}
- // Failing case: the only way IBB can be reached from PBB is via
- // exception handling. Happens for landing pads. Would be nice to have
- // a bit in the edge so we didn't have to do all this.
- if (IBB->isEHPad()) {
- MachineFunction::iterator IP = ++PBB->getIterator();
- MachineBasicBlock *PredNextBB = nullptr;
- if (IP != MF.end())
- PredNextBB = &*IP;
- if (!TBB) {
- if (IBB != PredNextBB) // fallthrough
- continue;
- } else if (FBB) {
- if (TBB != IBB && FBB != IBB) // cbr then ubr
- continue;
- } else if (Cond.empty()) {
- if (TBB != IBB) // ubr
- continue;
- } else {
- if (TBB != IBB && IBB != PredNextBB) // cbr
- continue;
- }
- }
-
// Remove the unconditional branch at the end, if any.
if (TBB && (Cond.empty() || FBB)) {
DebugLoc dl = PBB->findBranchDebugLoc();
@@ -1598,7 +1572,7 @@ ReoptimizeBlock:
}
if (!IsEmptyBlock(MBB) && MBB->pred_size() == 1 &&
- MF.getFunction().optForSize()) {
+ MF.getFunction().hasOptSize()) {
// Changing "Jcc foo; foo: jmp bar;" into "Jcc bar;" might change the branch
// direction, thereby defeating careful block placement and regressing
// performance. Therefore, only consider this for optsize functions.
diff --git a/lib/CodeGen/BranchFolding.h b/lib/CodeGen/BranchFolding.h
index accd0ab7317b..761ff9c7d54e 100644
--- a/lib/CodeGen/BranchFolding.h
+++ b/lib/CodeGen/BranchFolding.h
@@ -1,9 +1,8 @@
//===- BranchFolding.h - Fold machine code branch instructions --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/BranchRelaxation.cpp b/lib/CodeGen/BranchRelaxation.cpp
index c092da2b6602..3ad6266d4f35 100644
--- a/lib/CodeGen/BranchRelaxation.cpp
+++ b/lib/CodeGen/BranchRelaxation.cpp
@@ -1,9 +1,8 @@
//===- BranchRelaxation.cpp -----------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/BreakFalseDeps.cpp b/lib/CodeGen/BreakFalseDeps.cpp
index 210699cbf239..cc4b2caa9bed 100644
--- a/lib/CodeGen/BreakFalseDeps.cpp
+++ b/lib/CodeGen/BreakFalseDeps.cpp
@@ -1,9 +1,8 @@
//==- llvm/CodeGen/BreakFalseDeps.cpp - Break False Dependency Fix -*- C++ -*==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/BuiltinGCs.cpp b/lib/CodeGen/BuiltinGCs.cpp
index 93939e573b7b..bfc10cb3fef2 100644
--- a/lib/CodeGen/BuiltinGCs.cpp
+++ b/lib/CodeGen/BuiltinGCs.cpp
@@ -1,9 +1,8 @@
//===- BuiltinGCs.cpp - Boilerplate for our built in GC types -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/CFIInstrInserter.cpp b/lib/CodeGen/CFIInstrInserter.cpp
index c4799855a2b3..1a4d54231cfd 100644
--- a/lib/CodeGen/CFIInstrInserter.cpp
+++ b/lib/CodeGen/CFIInstrInserter.cpp
@@ -1,9 +1,8 @@
//===------ CFIInstrInserter.cpp - Insert additional CFI instructions -----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp
index 02347b9f0b5c..7164fdfb7886 100644
--- a/lib/CodeGen/CalcSpillWeights.cpp
+++ b/lib/CodeGen/CalcSpillWeights.cpp
@@ -1,9 +1,8 @@
//===- CalcSpillWeights.cpp -----------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/CallingConvLower.cpp b/lib/CodeGen/CallingConvLower.cpp
index 3593089b206d..497fcb147849 100644
--- a/lib/CodeGen/CallingConvLower.cpp
+++ b/lib/CodeGen/CallingConvLower.cpp
@@ -1,9 +1,8 @@
//===-- CallingConvLower.cpp - Calling Conventions ------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp
index 66166482c78b..c37ed57781d4 100644
--- a/lib/CodeGen/CodeGen.cpp
+++ b/lib/CodeGen/CodeGen.cpp
@@ -1,9 +1,8 @@
//===-- CodeGen.cpp -------------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -31,14 +30,15 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeEarlyIfConverterPass(Registry);
initializeEarlyMachineLICMPass(Registry);
initializeEarlyTailDuplicatePass(Registry);
- initializeExpandISelPseudosPass(Registry);
initializeExpandMemCmpPassPass(Registry);
initializeExpandPostRAPass(Registry);
initializeFEntryInserterPass(Registry);
+ initializeFinalizeISelPass(Registry);
initializeFinalizeMachineBundlesPass(Registry);
initializeFuncletLayoutPass(Registry);
initializeGCMachineCodeAnalysisPass(Registry);
initializeGCModuleInfoPass(Registry);
+ initializeHardwareLoopsPass(Registry);
initializeIfConverterPass(Registry);
initializeImplicitNullChecksPass(Registry);
initializeIndirectBrExpandPassPass(Registry);
diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp
index c35f8666fa3c..52b4bbea012b 100644
--- a/lib/CodeGen/CodeGenPrepare.cpp
+++ b/lib/CodeGen/CodeGenPrepare.cpp
@@ -1,9 +1,8 @@
//===- CodeGenPrepare.cpp - Prepare a function for code generation --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -16,6 +15,7 @@
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -32,6 +32,7 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
@@ -292,15 +293,16 @@ class TypePromotionTransaction;
/// Keep track of SExt promoted.
ValueToSExts ValToSExtendedUses;
- /// True if CFG is modified in any way.
- bool ModifiedDT;
-
/// True if optimizing for size.
bool OptSize;
/// DataLayout for the Function being processed.
const DataLayout *DL = nullptr;
+ /// Building the dominator tree can be expensive, so we only build it
+ /// lazily and update it when required.
+ std::unique_ptr<DominatorTree> DT;
+
public:
static char ID; // Pass identification, replacement for typeid
@@ -339,6 +341,13 @@ class TypePromotionTransaction;
}
}
+ // Get the DominatorTree, building if necessary.
+ DominatorTree &getDT(Function &F) {
+ if (!DT)
+ DT = llvm::make_unique<DominatorTree>(F);
+ return *DT;
+ }
+
bool eliminateFallThrough(Function &F);
bool eliminateMostlyEmptyBlocks(Function &F);
BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB);
@@ -355,11 +364,12 @@ class TypePromotionTransaction;
bool optimizeExt(Instruction *&I);
bool optimizeExtUses(Instruction *I);
bool optimizeLoadExt(LoadInst *Load);
+ bool optimizeShiftInst(BinaryOperator *BO);
bool optimizeSelectInst(SelectInst *SI);
bool optimizeShuffleVectorInst(ShuffleVectorInst *SVI);
bool optimizeSwitchInst(SwitchInst *SI);
bool optimizeExtractElementInst(Instruction *Inst);
- bool dupRetToEnableTailCallOpts(BasicBlock *BB);
+ bool dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT);
bool placeDbgValues(Function &F);
bool canFormExtLd(const SmallVectorImpl<Instruction *> &MovedExts,
LoadInst *&LI, Instruction *&Inst, bool HasPromoted);
@@ -374,8 +384,15 @@ class TypePromotionTransaction;
bool AllowPromotionWithoutCommonHeader,
bool HasPromoted, TypePromotionTransaction &TPT,
SmallVectorImpl<Instruction *> &SpeculativelyMovedExts);
- bool splitBranchCondition(Function &F);
+ bool splitBranchCondition(Function &F, bool &ModifiedDT);
bool simplifyOffsetableRelocate(Instruction &I);
+
+ bool tryToSinkFreeOperands(Instruction *I);
+ bool replaceMathCmpWithIntrinsic(BinaryOperator *BO, CmpInst *Cmp,
+ Intrinsic::ID IID);
+ bool optimizeCmp(CmpInst *Cmp, bool &ModifiedDT);
+ bool combineToUSubWithOverflow(CmpInst *Cmp, bool &ModifiedDT);
+ bool combineToUAddWithOverflow(CmpInst *Cmp, bool &ModifiedDT);
};
} // end anonymous namespace
@@ -401,7 +418,6 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
InsertedInsts.clear();
PromotedInsts.clear();
- ModifiedDT = false;
if (auto *TPC = getAnalysisIfAvailable<TargetPassConfig>()) {
TM = &TPC->getTM<TargetMachine>();
SubtargetInfo = TM->getSubtargetImpl(F);
@@ -413,7 +429,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
BPI.reset(new BranchProbabilityInfo(F, *LI));
BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI));
- OptSize = F.optForSize();
+ OptSize = F.hasOptSize();
ProfileSummaryInfo *PSI =
&getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
@@ -444,8 +460,9 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
// unconditional branch.
EverMadeChange |= eliminateMostlyEmptyBlocks(F);
+ bool ModifiedDT = false;
if (!DisableBranchOpts)
- EverMadeChange |= splitBranchCondition(F);
+ EverMadeChange |= splitBranchCondition(F, ModifiedDT);
// Split some critical edges where one of the sources is an indirect branch,
// to help generate sane code for PHIs involving such edges.
@@ -454,6 +471,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
bool MadeChange = true;
while (MadeChange) {
MadeChange = false;
+ DT.reset();
for (Function::iterator I = F.begin(); I != F.end(); ) {
BasicBlock *BB = &*I++;
bool ModifiedDTOnIteration = false;
@@ -654,6 +672,16 @@ bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB,
BB->getSinglePredecessor()->getSingleSuccessor()))
return false;
+ // Skip merging if the block's successor is also a successor to any callbr
+ // that leads to this block.
+ // FIXME: Is this really needed? Is this a correctness issue?
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+ if (auto *CBI = dyn_cast<CallBrInst>((*PI)->getTerminator()))
+ for (unsigned i = 0, e = CBI->getNumSuccessors(); i != e; ++i)
+ if (DestBB == CBI->getSuccessor(i))
+ return false;
+ }
+
// Try to skip merging if the unique predecessor of BB is terminated by a
// switch or indirect branch instruction, and BB is used as an incoming block
// of PHIs in DestBB. In such case, merging BB and DestBB would cause ISel to
@@ -1040,7 +1068,7 @@ bool CodeGenPrepare::simplifyOffsetableRelocate(Instruction &I) {
return MadeChange;
}
-/// SinkCast - Sink the specified cast instruction into its user blocks
+/// Sink the specified cast instruction into its user blocks.
static bool SinkCast(CastInst *CI) {
BasicBlock *DefBB = CI->getParent();
@@ -1114,8 +1142,8 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI,
// Sink only "cheap" (or nop) address-space casts. This is a weaker condition
// than sinking only nop casts, but is helpful on some platforms.
if (auto *ASC = dyn_cast<AddrSpaceCastInst>(CI)) {
- if (!TLI.isCheapAddrSpaceCast(ASC->getSrcAddressSpace(),
- ASC->getDestAddressSpace()))
+ if (!TLI.isFreeAddrSpaceCast(ASC->getSrcAddressSpace(),
+ ASC->getDestAddressSpace()))
return false;
}
@@ -1148,54 +1176,169 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI,
return SinkCast(CI);
}
-/// Try to combine CI into a call to the llvm.uadd.with.overflow intrinsic if
-/// possible.
-///
-/// Return true if any changes were made.
-static bool CombineUAddWithOverflow(CmpInst *CI) {
- Value *A, *B;
- Instruction *AddI;
- if (!match(CI,
- m_UAddWithOverflow(m_Value(A), m_Value(B), m_Instruction(AddI))))
+bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO,
+ CmpInst *Cmp,
+ Intrinsic::ID IID) {
+ if (BO->getParent() != Cmp->getParent()) {
+ // We used to use a dominator tree here to allow multi-block optimization.
+ // But that was problematic because:
+ // 1. It could cause a perf regression by hoisting the math op into the
+ // critical path.
+ // 2. It could cause a perf regression by creating a value that was live
+ // across multiple blocks and increasing register pressure.
+ // 3. Use of a dominator tree could cause large compile-time regression.
+ // This is because we recompute the DT on every change in the main CGP
+ // run-loop. The recomputing is probably unnecessary in many cases, so if
+ // that was fixed, using a DT here would be ok.
+ return false;
+ }
+
+ // We allow matching the canonical IR (add X, C) back to (usubo X, -C).
+ Value *Arg0 = BO->getOperand(0);
+ Value *Arg1 = BO->getOperand(1);
+ if (BO->getOpcode() == Instruction::Add &&
+ IID == Intrinsic::usub_with_overflow) {
+ assert(isa<Constant>(Arg1) && "Unexpected input for usubo");
+ Arg1 = ConstantExpr::getNeg(cast<Constant>(Arg1));
+ }
+
+ // Insert at the first instruction of the pair.
+ Instruction *InsertPt = nullptr;
+ for (Instruction &Iter : *Cmp->getParent()) {
+ if (&Iter == BO || &Iter == Cmp) {
+ InsertPt = &Iter;
+ break;
+ }
+ }
+ assert(InsertPt != nullptr && "Parent block did not contain cmp or binop");
+
+ IRBuilder<> Builder(InsertPt);
+ Value *MathOV = Builder.CreateBinaryIntrinsic(IID, Arg0, Arg1);
+ Value *Math = Builder.CreateExtractValue(MathOV, 0, "math");
+ Value *OV = Builder.CreateExtractValue(MathOV, 1, "ov");
+ BO->replaceAllUsesWith(Math);
+ Cmp->replaceAllUsesWith(OV);
+ BO->eraseFromParent();
+ Cmp->eraseFromParent();
+ return true;
+}
+
+/// Match special-case patterns that check for unsigned add overflow.
+static bool matchUAddWithOverflowConstantEdgeCases(CmpInst *Cmp,
+ BinaryOperator *&Add) {
+ // Add = add A, 1; Cmp = icmp eq A,-1 (overflow if A is max val)
+ // Add = add A,-1; Cmp = icmp ne A, 0 (overflow if A is non-zero)
+ Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1);
+
+ // We are not expecting non-canonical/degenerate code. Just bail out.
+ if (isa<Constant>(A))
+ return false;
+
+ ICmpInst::Predicate Pred = Cmp->getPredicate();
+ if (Pred == ICmpInst::ICMP_EQ && match(B, m_AllOnes()))
+ B = ConstantInt::get(B->getType(), 1);
+ else if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt()))
+ B = ConstantInt::get(B->getType(), -1);
+ else
return false;
- Type *Ty = AddI->getType();
- if (!isa<IntegerType>(Ty))
+ // Check the users of the variable operand of the compare looking for an add
+ // with the adjusted constant.
+ for (User *U : A->users()) {
+ if (match(U, m_Add(m_Specific(A), m_Specific(B)))) {
+ Add = cast<BinaryOperator>(U);
+ return true;
+ }
+ }
+ return false;
+}
+
+/// Try to combine the compare into a call to the llvm.uadd.with.overflow
+/// intrinsic. Return true if any changes were made.
+bool CodeGenPrepare::combineToUAddWithOverflow(CmpInst *Cmp,
+ bool &ModifiedDT) {
+ Value *A, *B;
+ BinaryOperator *Add;
+ if (!match(Cmp, m_UAddWithOverflow(m_Value(A), m_Value(B), m_BinOp(Add))))
+ if (!matchUAddWithOverflowConstantEdgeCases(Cmp, Add))
+ return false;
+
+ if (!TLI->shouldFormOverflowOp(ISD::UADDO,
+ TLI->getValueType(*DL, Add->getType())))
return false;
- // We don't want to move around uses of condition values this late, so we we
+ // We don't want to move around uses of condition values this late, so we
// check if it is legal to create the call to the intrinsic in the basic
- // block containing the icmp:
+ // block containing the icmp.
+ if (Add->getParent() != Cmp->getParent() && !Add->hasOneUse())
+ return false;
- if (AddI->getParent() != CI->getParent() && !AddI->hasOneUse())
+ if (!replaceMathCmpWithIntrinsic(Add, Cmp, Intrinsic::uadd_with_overflow))
return false;
-#ifndef NDEBUG
- // Someday m_UAddWithOverflow may get smarter, but this is a safe assumption
- // for now:
- if (AddI->hasOneUse())
- assert(*AddI->user_begin() == CI && "expected!");
-#endif
+ // Reset callers - do not crash by iterating over a dead instruction.
+ ModifiedDT = true;
+ return true;
+}
+
+bool CodeGenPrepare::combineToUSubWithOverflow(CmpInst *Cmp,
+ bool &ModifiedDT) {
+ // We are not expecting non-canonical/degenerate code. Just bail out.
+ Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1);
+ if (isa<Constant>(A) && isa<Constant>(B))
+ return false;
+
+ // Convert (A u> B) to (A u< B) to simplify pattern matching.
+ ICmpInst::Predicate Pred = Cmp->getPredicate();
+ if (Pred == ICmpInst::ICMP_UGT) {
+ std::swap(A, B);
+ Pred = ICmpInst::ICMP_ULT;
+ }
+ // Convert special-case: (A == 0) is the same as (A u< 1).
+ if (Pred == ICmpInst::ICMP_EQ && match(B, m_ZeroInt())) {
+ B = ConstantInt::get(B->getType(), 1);
+ Pred = ICmpInst::ICMP_ULT;
+ }
+ // Convert special-case: (A != 0) is the same as (0 u< A).
+ if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt())) {
+ std::swap(A, B);
+ Pred = ICmpInst::ICMP_ULT;
+ }
+ if (Pred != ICmpInst::ICMP_ULT)
+ return false;
+
+ // Walk the users of a variable operand of a compare looking for a subtract or
+ // add with that same operand. Also match the 2nd operand of the compare to
+ // the add/sub, but that may be a negated constant operand of an add.
+ Value *CmpVariableOperand = isa<Constant>(A) ? B : A;
+ BinaryOperator *Sub = nullptr;
+ for (User *U : CmpVariableOperand->users()) {
+ // A - B, A u< B --> usubo(A, B)
+ if (match(U, m_Sub(m_Specific(A), m_Specific(B)))) {
+ Sub = cast<BinaryOperator>(U);
+ break;
+ }
+
+ // A + (-C), A u< C (canonicalized form of (sub A, C))
+ const APInt *CmpC, *AddC;
+ if (match(U, m_Add(m_Specific(A), m_APInt(AddC))) &&
+ match(B, m_APInt(CmpC)) && *AddC == -(*CmpC)) {
+ Sub = cast<BinaryOperator>(U);
+ break;
+ }
+ }
+ if (!Sub)
+ return false;
+
+ if (!TLI->shouldFormOverflowOp(ISD::USUBO,
+ TLI->getValueType(*DL, Sub->getType())))
+ return false;
+
+ if (!replaceMathCmpWithIntrinsic(Sub, Cmp, Intrinsic::usub_with_overflow))
+ return false;
- Module *M = CI->getModule();
- Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, Ty);
-
- auto *InsertPt = AddI->hasOneUse() ? CI : AddI;
-
- DebugLoc Loc = CI->getDebugLoc();
- auto *UAddWithOverflow =
- CallInst::Create(F, {A, B}, "uadd.overflow", InsertPt);
- UAddWithOverflow->setDebugLoc(Loc);
- auto *UAdd = ExtractValueInst::Create(UAddWithOverflow, 0, "uadd", InsertPt);
- UAdd->setDebugLoc(Loc);
- auto *Overflow =
- ExtractValueInst::Create(UAddWithOverflow, 1, "overflow", InsertPt);
- Overflow->setDebugLoc(Loc);
-
- CI->replaceAllUsesWith(Overflow);
- AddI->replaceAllUsesWith(UAdd);
- CI->eraseFromParent();
- AddI->eraseFromParent();
+ // Reset callers - do not crash by iterating over a dead instruction.
+ ModifiedDT = true;
return true;
}
@@ -1205,18 +1348,19 @@ static bool CombineUAddWithOverflow(CmpInst *CI) {
/// lose; some adjustment may be wanted there.
///
/// Return true if any changes are made.
-static bool SinkCmpExpression(CmpInst *CI, const TargetLowering *TLI) {
- BasicBlock *DefBB = CI->getParent();
+static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) {
+ if (TLI.hasMultipleConditionRegisters())
+ return false;
// Avoid sinking soft-FP comparisons, since this can move them into a loop.
- if (TLI && TLI->useSoftFloat() && isa<FCmpInst>(CI))
+ if (TLI.useSoftFloat() && isa<FCmpInst>(Cmp))
return false;
// Only insert a cmp in each block once.
DenseMap<BasicBlock*, CmpInst*> InsertedCmps;
bool MadeChange = false;
- for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end();
+ for (Value::user_iterator UI = Cmp->user_begin(), E = Cmp->user_end();
UI != E; ) {
Use &TheUse = UI.getUse();
Instruction *User = cast<Instruction>(*UI);
@@ -1230,6 +1374,7 @@ static bool SinkCmpExpression(CmpInst *CI, const TargetLowering *TLI) {
// Figure out which BB this cmp is used in.
BasicBlock *UserBB = User->getParent();
+ BasicBlock *DefBB = Cmp->getParent();
// If this user is in the same block as the cmp, don't change the cmp.
if (UserBB == DefBB) continue;
@@ -1241,10 +1386,11 @@ static bool SinkCmpExpression(CmpInst *CI, const TargetLowering *TLI) {
BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
assert(InsertPt != UserBB->end());
InsertedCmp =
- CmpInst::Create(CI->getOpcode(), CI->getPredicate(),
- CI->getOperand(0), CI->getOperand(1), "", &*InsertPt);
+ CmpInst::Create(Cmp->getOpcode(), Cmp->getPredicate(),
+ Cmp->getOperand(0), Cmp->getOperand(1), "",
+ &*InsertPt);
// Propagate the debug info.
- InsertedCmp->setDebugLoc(CI->getDebugLoc());
+ InsertedCmp->setDebugLoc(Cmp->getDebugLoc());
}
// Replace a use of the cmp with a use of the new cmp.
@@ -1254,19 +1400,22 @@ static bool SinkCmpExpression(CmpInst *CI, const TargetLowering *TLI) {
}
// If we removed all uses, nuke the cmp.
- if (CI->use_empty()) {
- CI->eraseFromParent();
+ if (Cmp->use_empty()) {
+ Cmp->eraseFromParent();
MadeChange = true;
}
return MadeChange;
}
-static bool OptimizeCmpExpression(CmpInst *CI, const TargetLowering *TLI) {
- if (SinkCmpExpression(CI, TLI))
+bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, bool &ModifiedDT) {
+ if (sinkCmpExpression(Cmp, *TLI))
return true;
- if (CombineUAddWithOverflow(CI))
+ if (combineToUAddWithOverflow(Cmp, ModifiedDT))
+ return true;
+
+ if (combineToUSubWithOverflow(Cmp, ModifiedDT))
return true;
return false;
@@ -1301,7 +1450,7 @@ static bool sinkAndCmp0Expression(Instruction *AndI,
for (auto *U : AndI->users()) {
Instruction *User = cast<Instruction>(U);
- // Only sink for and mask feeding icmp with 0.
+ // Only sink 'and' feeding icmp with 0.
if (!isa<ICmpInst>(User))
return false;
@@ -1704,9 +1853,23 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
if (II) {
switch (II->getIntrinsicID()) {
default: break;
+ case Intrinsic::experimental_widenable_condition: {
+ // Give up on future widening oppurtunties so that we can fold away dead
+ // paths and merge blocks before going into block-local instruction
+ // selection.
+ if (II->use_empty()) {
+ II->eraseFromParent();
+ return true;
+ }
+ Constant *RetVal = ConstantInt::getTrue(II->getContext());
+ resetIteratorIfInvalidatedWhileCalling(BB, [&]() {
+ replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr);
+ });
+ return true;
+ }
case Intrinsic::objectsize: {
// Lower all uses of llvm.objectsize.*
- ConstantInt *RetVal =
+ Value *RetVal =
lowerObjectSizeCall(II, *DL, TLInfo, /*MustSucceed=*/true);
resetIteratorIfInvalidatedWhileCalling(BB, [&]() {
@@ -1735,6 +1898,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
InsertedInsts.insert(ExtVal);
return true;
}
+
case Intrinsic::launder_invariant_group:
case Intrinsic::strip_invariant_group: {
Value *ArgVal = II->getArgOperand(0);
@@ -1818,7 +1982,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
/// %tmp2 = tail call i32 @f2()
/// ret i32 %tmp2
/// @endcode
-bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB) {
+bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT) {
if (!TLI)
return false;
@@ -1846,10 +2010,8 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB) {
// return is the first instruction in the block.
if (PN) {
BasicBlock::iterator BI = BB->begin();
- do { ++BI; } while (isa<DbgInfoIntrinsic>(BI));
- if (&*BI == BCI)
- // Also skip over the bitcast.
- ++BI;
+ // Skip over debug and the bitcast.
+ do { ++BI; } while (isa<DbgInfoIntrinsic>(BI) || &*BI == BCI);
if (&*BI != RetI)
return false;
} else {
@@ -1865,7 +2027,9 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB) {
SmallVector<CallInst*, 4> TailCalls;
if (PN) {
for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
- CallInst *CI = dyn_cast<CallInst>(PN->getIncomingValue(I));
+ // Look through bitcasts.
+ Value *IncomingVal = PN->getIncomingValue(I)->stripPointerCasts();
+ CallInst *CI = dyn_cast<CallInst>(IncomingVal);
// Make sure the phi value is indeed produced by the tail call.
if (CI && CI->hasOneUse() && CI->getParent() == PN->getIncomingBlock(I) &&
TLI->mayBeEmittedAsTailCall(CI) &&
@@ -1929,6 +2093,7 @@ struct ExtAddrMode : public TargetLowering::AddrMode {
Value *BaseReg = nullptr;
Value *ScaledReg = nullptr;
Value *OriginalValue = nullptr;
+ bool InBounds = true;
enum FieldName {
NoField = 0x00,
@@ -1940,6 +2105,7 @@ struct ExtAddrMode : public TargetLowering::AddrMode {
MultipleFields = 0xff
};
+
ExtAddrMode() = default;
void print(raw_ostream &OS) const;
@@ -1958,6 +2124,10 @@ struct ExtAddrMode : public TargetLowering::AddrMode {
ScaledReg->getType() != other.ScaledReg->getType())
return MultipleFields;
+ // Conservatively reject 'inbounds' mismatches.
+ if (InBounds != other.InBounds)
+ return MultipleFields;
+
// Check each field to see if it differs.
unsigned Result = NoField;
if (BaseReg != other.BaseReg)
@@ -2056,6 +2226,8 @@ static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) {
void ExtAddrMode::print(raw_ostream &OS) const {
bool NeedPlus = false;
OS << "[";
+ if (InBounds)
+ OS << "inbounds ";
if (BaseGV) {
OS << (NeedPlus ? " + " : "")
<< "GV:";
@@ -3126,6 +3298,8 @@ private:
PhiNodeSet &PhiNodesToMatch) {
SmallVector<PHIPair, 8> WorkList;
Matcher.insert({ PHI, Candidate });
+ SmallSet<PHINode *, 8> MatchedPHIs;
+ MatchedPHIs.insert(PHI);
WorkList.push_back({ PHI, Candidate });
SmallSet<PHIPair, 8> Visited;
while (!WorkList.empty()) {
@@ -3158,8 +3332,10 @@ private:
if (Matcher.count({ FirstPhi, SecondPhi }))
continue;
// So the values are different and does not match. So we need them to
- // match.
- Matcher.insert({ FirstPhi, SecondPhi });
+ // match. (But we register no more than one match per PHI node, so that
+ // we won't later try to replace them twice.)
+ if (!MatchedPHIs.insert(FirstPhi).second)
+ Matcher.insert({ FirstPhi, SecondPhi });
// But me must check it.
WorkList.push_back({ FirstPhi, SecondPhi });
}
@@ -3354,6 +3530,7 @@ bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale,
ConstantInt *CI = nullptr; Value *AddLHS = nullptr;
if (isa<Instruction>(ScaleReg) && // not a constant expr.
match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) {
+ TestAddrMode.InBounds = false;
TestAddrMode.ScaledReg = AddLHS;
TestAddrMode.BaseOffs += CI->getSExtValue()*TestAddrMode.Scale;
@@ -3928,6 +4105,7 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
TypePromotionTransaction::ConstRestorationPt LastKnownGood =
TPT.getRestorationPoint();
+ AddrMode.InBounds = false;
if (matchAddr(AddrInst->getOperand(1), Depth+1) &&
matchAddr(AddrInst->getOperand(0), Depth+1))
return true;
@@ -3954,6 +4132,7 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
case Instruction::Mul:
case Instruction::Shl: {
// Can only handle X*C and X << C.
+ AddrMode.InBounds = false;
ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
if (!RHS || RHS->getBitWidth() > 64)
return false;
@@ -4005,8 +4184,11 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
if (ConstantOffset == 0 ||
TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) {
// Check to see if we can fold the base pointer in too.
- if (matchAddr(AddrInst->getOperand(0), Depth+1))
+ if (matchAddr(AddrInst->getOperand(0), Depth+1)) {
+ if (!cast<GEPOperator>(AddrInst)->isInBounds())
+ AddrMode.InBounds = false;
return true;
+ }
} else if (EnableGEPOffsetSplit && isa<GetElementPtrInst>(AddrInst) &&
TLI.shouldConsiderGEPOffsetSplit() && Depth == 0 &&
ConstantOffset > 0) {
@@ -4020,15 +4202,11 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
if (isa<Argument>(Base) || isa<GlobalValue>(Base) ||
(BaseI && !isa<CastInst>(BaseI) &&
!isa<GetElementPtrInst>(BaseI))) {
- // If the base is an instruction, make sure the GEP is not in the same
- // basic block as the base. If the base is an argument or global
- // value, make sure the GEP is not in the entry block. Otherwise,
- // instruction selection can undo the split. Also make sure the
- // parent block allows inserting non-PHI instructions before the
- // terminator.
+ // Make sure the parent block allows inserting non-PHI instructions
+ // before the terminator.
BasicBlock *Parent =
BaseI ? BaseI->getParent() : &GEP->getFunction()->getEntryBlock();
- if (GEP->getParent() != Parent && !Parent->getTerminator()->isEHPad())
+ if (!Parent->getTerminator()->isEHPad())
LargeOffsetGEP = std::make_pair(GEP, ConstantOffset);
}
}
@@ -4042,6 +4220,8 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
// See if the scale and offset amount is valid for this target.
AddrMode.BaseOffs += ConstantOffset;
+ if (!cast<GEPOperator>(AddrInst)->isInBounds())
+ AddrMode.InBounds = false;
// Match the base operand of the GEP.
if (!matchAddr(AddrInst->getOperand(0), Depth+1)) {
@@ -4268,7 +4448,7 @@ static bool FindAllMemoryUses(
if (!MightBeFoldableInst(I))
return true;
- const bool OptSize = I->getFunction()->optForSize();
+ const bool OptSize = I->getFunction()->hasOptSize();
// Loop over all the uses, recursively processing them.
for (Use &U : I->uses()) {
@@ -4556,8 +4736,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP);
GetElementPtrInst *GEP = LargeOffsetGEP.first;
- if (GEP && GEP->getParent() != MemoryInst->getParent() &&
- !NewGEPBases.count(GEP)) {
+ if (GEP && !NewGEPBases.count(GEP)) {
// If splitting the underlying data structure can reduce the offset of a
// GEP, collect the GEP. Skip the GEPs that are the new bases of
// previously split data structures.
@@ -4727,7 +4906,11 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
// SDAG consecutive load/store merging.
if (ResultPtr->getType() != I8PtrTy)
ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
- ResultPtr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
+ ResultPtr =
+ AddrMode.InBounds
+ ? Builder.CreateInBoundsGEP(I8Ty, ResultPtr, ResultIndex,
+ "sunkaddr")
+ : Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
}
ResultIndex = V;
@@ -4738,7 +4921,11 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
} else {
if (ResultPtr->getType() != I8PtrTy)
ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
- SunkAddr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
+ SunkAddr =
+ AddrMode.InBounds
+ ? Builder.CreateInBoundsGEP(I8Ty, ResultPtr, ResultIndex,
+ "sunkaddr")
+ : Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
}
if (SunkAddr->getType() != Addr->getType())
@@ -5037,7 +5224,6 @@ bool CodeGenPrepare::tryToPromoteExts(
/// Merging redundant sexts when one is dominating the other.
bool CodeGenPrepare::mergeSExts(Function &F) {
- DominatorTree DT(F);
bool Changed = false;
for (auto &Entry : ValToSExtendedUses) {
SExts &Insts = Entry.second;
@@ -5048,7 +5234,7 @@ bool CodeGenPrepare::mergeSExts(Function &F) {
continue;
bool inserted = false;
for (auto &Pt : CurPts) {
- if (DT.dominates(Inst, Pt)) {
+ if (getDT(F).dominates(Inst, Pt)) {
Pt->replaceAllUsesWith(Inst);
RemovedInsts.insert(Pt);
Pt->removeFromParent();
@@ -5057,7 +5243,7 @@ bool CodeGenPrepare::mergeSExts(Function &F) {
Changed = true;
break;
}
- if (!DT.dominates(Pt, Inst))
+ if (!getDT(F).dominates(Pt, Inst))
// Give up if we need to merge in a common dominator as the
// experiments show it is not profitable.
continue;
@@ -5715,7 +5901,7 @@ static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI,
static Value *getTrueOrFalseValue(
SelectInst *SI, bool isTrue,
const SmallPtrSet<const Instruction *, 2> &Selects) {
- Value *V;
+ Value *V = nullptr;
for (SelectInst *DefSI = SI; DefSI != nullptr && Selects.count(DefSI);
DefSI = dyn_cast<SelectInst>(V)) {
@@ -5723,9 +5909,44 @@ static Value *getTrueOrFalseValue(
"The condition of DefSI does not match with SI");
V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue());
}
+
+ assert(V && "Failed to get select true/false value");
return V;
}
+bool CodeGenPrepare::optimizeShiftInst(BinaryOperator *Shift) {
+ assert(Shift->isShift() && "Expected a shift");
+
+ // If this is (1) a vector shift, (2) shifts by scalars are cheaper than
+ // general vector shifts, and (3) the shift amount is a select-of-splatted
+ // values, hoist the shifts before the select:
+ // shift Op0, (select Cond, TVal, FVal) -->
+ // select Cond, (shift Op0, TVal), (shift Op0, FVal)
+ //
+ // This is inverting a generic IR transform when we know that the cost of a
+ // general vector shift is more than the cost of 2 shift-by-scalars.
+ // We can't do this effectively in SDAG because we may not be able to
+ // determine if the select operands are splats from within a basic block.
+ Type *Ty = Shift->getType();
+ if (!Ty->isVectorTy() || !TLI->isVectorShiftByScalarCheap(Ty))
+ return false;
+ Value *Cond, *TVal, *FVal;
+ if (!match(Shift->getOperand(1),
+ m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))))
+ return false;
+ if (!isSplatValue(TVal) || !isSplatValue(FVal))
+ return false;
+
+ IRBuilder<> Builder(Shift);
+ BinaryOperator::BinaryOps Opcode = Shift->getOpcode();
+ Value *NewTVal = Builder.CreateBinOp(Opcode, Shift->getOperand(0), TVal);
+ Value *NewFVal = Builder.CreateBinOp(Opcode, Shift->getOperand(0), FVal);
+ Value *NewSel = Builder.CreateSelect(Cond, NewTVal, NewFVal);
+ Shift->replaceAllUsesWith(NewSel);
+ Shift->eraseFromParent();
+ return true;
+}
+
/// If we have a SelectInst that will likely profit from branch prediction,
/// turn it into a branch.
bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
@@ -5769,7 +5990,11 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
!isFormingBranchFromSelectProfitable(TTI, TLI, SI))
return false;
- ModifiedDT = true;
+ // The DominatorTree needs to be rebuilt by any consumers after this
+ // transformation. We simply reset here rather than setting the ModifiedDT
+ // flag to avoid restarting the function walk in runOnFunction for each
+ // select optimized.
+ DT.reset();
// Transform a sequence like this:
// start:
@@ -5943,6 +6168,7 @@ bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
InsertedShuffle =
new ShuffleVectorInst(SVI->getOperand(0), SVI->getOperand(1),
SVI->getOperand(2), "", &*InsertPt);
+ InsertedShuffle->setDebugLoc(SVI->getDebugLoc());
}
UI->replaceUsesOfWith(SVI, InsertedShuffle);
@@ -5958,6 +6184,48 @@ bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
return MadeChange;
}
+bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) {
+ // If the operands of I can be folded into a target instruction together with
+ // I, duplicate and sink them.
+ SmallVector<Use *, 4> OpsToSink;
+ if (!TLI || !TLI->shouldSinkOperands(I, OpsToSink))
+ return false;
+
+ // OpsToSink can contain multiple uses in a use chain (e.g.
+ // (%u1 with %u1 = shufflevector), (%u2 with %u2 = zext %u1)). The dominating
+ // uses must come first, which means they are sunk first, temporarily creating
+ // invalid IR. This will be fixed once their dominated users are sunk and
+ // updated.
+ BasicBlock *TargetBB = I->getParent();
+ bool Changed = false;
+ SmallVector<Use *, 4> ToReplace;
+ for (Use *U : OpsToSink) {
+ auto *UI = cast<Instruction>(U->get());
+ if (UI->getParent() == TargetBB || isa<PHINode>(UI))
+ continue;
+ ToReplace.push_back(U);
+ }
+
+ SmallPtrSet<Instruction *, 4> MaybeDead;
+ for (Use *U : ToReplace) {
+ auto *UI = cast<Instruction>(U->get());
+ Instruction *NI = UI->clone();
+ MaybeDead.insert(UI);
+ LLVM_DEBUG(dbgs() << "Sinking " << *UI << " to user " << *I << "\n");
+ NI->insertBefore(I);
+ InsertedInsts.insert(NI);
+ U->set(NI);
+ Changed = true;
+ }
+
+ // Remove instructions that are dead after sinking.
+ for (auto *I : MaybeDead)
+ if (!I->hasNUsesOrMore(1))
+ I->eraseFromParent();
+
+ return Changed;
+}
+
bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {
if (!TLI || !DL)
return false;
@@ -6412,14 +6680,17 @@ static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL,
const TargetLowering &TLI) {
// Handle simple but common cases only.
Type *StoreType = SI.getValueOperand()->getType();
- if (DL.getTypeStoreSizeInBits(StoreType) != DL.getTypeSizeInBits(StoreType) ||
+ if (!DL.typeSizeEqualsStoreSize(StoreType) ||
DL.getTypeSizeInBits(StoreType) == 0)
return false;
unsigned HalfValBitSize = DL.getTypeSizeInBits(StoreType) / 2;
Type *SplitStoreType = Type::getIntNTy(SI.getContext(), HalfValBitSize);
- if (DL.getTypeStoreSizeInBits(SplitStoreType) !=
- DL.getTypeSizeInBits(SplitStoreType))
+ if (!DL.typeSizeEqualsStoreSize(SplitStoreType))
+ return false;
+
+ // Don't split the store if it is volatile.
+ if (SI.isVolatile())
return false;
// Match the following patterns:
@@ -6658,11 +6929,13 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {
if (InsertedInsts.count(I))
return false;
+ // TODO: Move into the switch on opcode below here.
if (PHINode *P = dyn_cast<PHINode>(I)) {
// It is possible for very late stage optimizations (such as SimplifyCFG)
// to introduce PHI nodes too late to be cleaned up. If we detect such a
// trivial PHI, go ahead and zap it here.
if (Value *V = SimplifyInstruction(P, {*DL, TLInfo})) {
+ LargeOffsetGEPMap.erase(P);
P->replaceAllUsesWith(V);
P->eraseFromParent();
++NumPHIsElim;
@@ -6700,9 +6973,9 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {
return false;
}
- if (CmpInst *CI = dyn_cast<CmpInst>(I))
- if (!TLI || !TLI->hasMultipleConditionRegisters())
- return OptimizeCmpExpression(CI, TLI);
+ if (auto *Cmp = dyn_cast<CmpInst>(I))
+ if (TLI && optimizeCmp(Cmp, ModifiedDT))
+ return true;
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
LI->setMetadata(LLVMContext::MD_invariant_group, nullptr);
@@ -6745,13 +7018,13 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {
EnableAndCmpSinking && TLI)
return sinkAndCmp0Expression(BinOp, *TLI, InsertedInsts);
+ // TODO: Move this into the switch on opcode - it handles shifts already.
if (BinOp && (BinOp->getOpcode() == Instruction::AShr ||
BinOp->getOpcode() == Instruction::LShr)) {
ConstantInt *CI = dyn_cast<ConstantInt>(BinOp->getOperand(1));
if (TLI && CI && TLI->hasExtractBitsInsn())
- return OptimizeExtractBits(BinOp, CI, *TLI, *DL);
-
- return false;
+ if (OptimizeExtractBits(BinOp, CI, *TLI, *DL))
+ return true;
}
if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
@@ -6772,20 +7045,25 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {
return false;
}
- if (CallInst *CI = dyn_cast<CallInst>(I))
- return optimizeCallInst(CI, ModifiedDT);
-
- if (SelectInst *SI = dyn_cast<SelectInst>(I))
- return optimizeSelectInst(SI);
-
- if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I))
- return optimizeShuffleVectorInst(SVI);
-
- if (auto *Switch = dyn_cast<SwitchInst>(I))
- return optimizeSwitchInst(Switch);
+ if (tryToSinkFreeOperands(I))
+ return true;
- if (isa<ExtractElementInst>(I))
- return optimizeExtractElementInst(I);
+ switch (I->getOpcode()) {
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ return optimizeShiftInst(cast<BinaryOperator>(I));
+ case Instruction::Call:
+ return optimizeCallInst(cast<CallInst>(I), ModifiedDT);
+ case Instruction::Select:
+ return optimizeSelectInst(cast<SelectInst>(I));
+ case Instruction::ShuffleVector:
+ return optimizeShuffleVectorInst(cast<ShuffleVectorInst>(I));
+ case Instruction::Switch:
+ return optimizeSwitchInst(cast<SwitchInst>(I));
+ case Instruction::ExtractElement:
+ return optimizeExtractElementInst(cast<ExtractElementInst>(I));
+ }
return false;
}
@@ -6833,7 +7111,7 @@ bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, bool &ModifiedDT) {
}
}
}
- MadeChange |= dupRetToEnableTailCallOpts(&BB);
+ MadeChange |= dupRetToEnableTailCallOpts(&BB, ModifiedDT);
return MadeChange;
}
@@ -6909,7 +7187,7 @@ static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) {
///
/// FIXME: Remove the (equivalent?) implementation in SelectionDAG.
///
-bool CodeGenPrepare::splitBranchCondition(Function &F) {
+bool CodeGenPrepare::splitBranchCondition(Function &F, bool &ModifiedDT) {
if (!TM || !TM->Options.EnableFastISel || !TLI || TLI->isJumpExpensive())
return false;
@@ -6983,11 +7261,7 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) {
std::swap(TBB, FBB);
// Replace the old BB with the new BB.
- for (PHINode &PN : TBB->phis()) {
- int i;
- while ((i = PN.getBasicBlockIndex(&BB)) >= 0)
- PN.setIncomingBlock(i, TmpBB);
- }
+ TBB->replacePhiUsesWith(&BB, TmpBB);
// Add another incoming edge form the new BB.
for (PHINode &PN : FBB->phis()) {
@@ -7066,10 +7340,7 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) {
}
}
- // Note: No point in getting fancy here, since the DT info is never
- // available to CodeGenPrepare.
ModifiedDT = true;
-
MadeChange = true;
LLVM_DEBUG(dbgs() << "After branch condition splitting\n"; BB.dump();
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp
index 5a5960b16130..4144c243a341 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -1,9 +1,8 @@
//===- CriticalAntiDepBreaker.cpp - Anti-dep breaker ----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h
index 09c4423a2f05..4e127ce525c8 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.h
+++ b/lib/CodeGen/CriticalAntiDepBreaker.h
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/CriticalAntiDepBreaker.h - Anti-Dep Support -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/DFAPacketizer.cpp b/lib/CodeGen/DFAPacketizer.cpp
index 68034afe98d5..b99be5d7a87c 100644
--- a/lib/CodeGen/DFAPacketizer.cpp
+++ b/lib/CodeGen/DFAPacketizer.cpp
@@ -1,9 +1,8 @@
//=- llvm/CodeGen/DFAPacketizer.cpp - DFA Packetizer for VLIW -*- C++ -*-=====//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// This class implements a deterministic finite automaton (DFA) based
diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp
index ff44c5660bad..049ce7063307 100644
--- a/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -1,9 +1,8 @@
//===- DeadMachineInstructionElim.cpp - Remove dead machine instructions --===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -82,9 +81,11 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const {
if (LivePhysRegs.test(Reg) || MRI->isReserved(Reg))
return false;
} else {
- if (!MRI->use_nodbg_empty(Reg))
- // This def has a non-debug use. Don't delete the instruction!
- return false;
+ for (const MachineInstr &Use : MRI->use_nodbg_instructions(Reg)) {
+ if (&Use != MI)
+ // This def has a non-debug use. Don't delete the instruction!
+ return false;
+ }
}
}
}
diff --git a/lib/CodeGen/DetectDeadLanes.cpp b/lib/CodeGen/DetectDeadLanes.cpp
index c83db476a4de..fe78acf4d80a 100644
--- a/lib/CodeGen/DetectDeadLanes.cpp
+++ b/lib/CodeGen/DetectDeadLanes.cpp
@@ -1,9 +1,8 @@
//===- DetectDeadLanes.cpp - SubRegister Lane Usage Analysis --*- C++ -*---===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp
index 4586649d17f0..ddd6cec5a178 100644
--- a/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/lib/CodeGen/DwarfEHPrepare.cpp
@@ -1,9 +1,8 @@
//===- DwarfEHPrepare - Prepare exception handling for code generation ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -46,7 +45,7 @@ namespace {
class DwarfEHPrepare : public FunctionPass {
// RewindFunction - _Unwind_Resume or the target equivalent.
- Constant *RewindFunction = nullptr;
+ FunctionCallee RewindFunction = nullptr;
DominatorTree *DT = nullptr;
const TargetLowering *TLI = nullptr;
@@ -146,7 +145,7 @@ size_t DwarfEHPrepare::pruneUnreachableResumes(
size_t ResumeIndex = 0;
for (auto *RI : Resumes) {
for (auto *LP : CleanupLPads) {
- if (isPotentiallyReachable(LP, RI, DT)) {
+ if (isPotentiallyReachable(LP, RI, nullptr, DT)) {
ResumeReachable.set(ResumeIndex);
break;
}
diff --git a/lib/CodeGen/EarlyIfConversion.cpp b/lib/CodeGen/EarlyIfConversion.cpp
index 364e1f030942..0a83760befaa 100644
--- a/lib/CodeGen/EarlyIfConversion.cpp
+++ b/lib/CodeGen/EarlyIfConversion.cpp
@@ -1,9 +1,8 @@
//===-- EarlyIfConversion.cpp - If-conversion on SSA form machine code ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/EdgeBundles.cpp b/lib/CodeGen/EdgeBundles.cpp
index 54c53eb16312..486720cadd27 100644
--- a/lib/CodeGen/EdgeBundles.cpp
+++ b/lib/CodeGen/EdgeBundles.cpp
@@ -1,9 +1,8 @@
//===-------- EdgeBundles.cpp - Bundles of CFG edges ----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -28,7 +27,7 @@ ViewEdgeBundles("view-edge-bundles", cl::Hidden,
char EdgeBundles::ID = 0;
INITIALIZE_PASS(EdgeBundles, "edge-bundles", "Bundle Machine CFG Edges",
- /* cfg = */true, /* analysis = */ true)
+ /* cfg = */true, /* is_analysis = */ true)
char &llvm::EdgeBundlesID = EdgeBundles::ID;
diff --git a/lib/CodeGen/ExecutionDomainFix.cpp b/lib/CodeGen/ExecutionDomainFix.cpp
index 458dcf2b0e26..a2dd5eee33b7 100644
--- a/lib/CodeGen/ExecutionDomainFix.cpp
+++ b/lib/CodeGen/ExecutionDomainFix.cpp
@@ -1,9 +1,8 @@
//===- ExecutionDomainFix.cpp - Fix execution domain issues ----*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -337,11 +336,10 @@ void ExecutionDomainFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
}
// Sorted insertion.
// Enables giving priority to the latest domains during merging.
- auto I = std::upper_bound(
- Regs.begin(), Regs.end(), rx, [&](int LHS, const int RHS) {
- return RDA->getReachingDef(mi, RC->getRegister(LHS)) <
- RDA->getReachingDef(mi, RC->getRegister(RHS));
- });
+ const int Def = RDA->getReachingDef(mi, RC->getRegister(rx));
+ auto I = partition_point(Regs, [&](int I) {
+ return RDA->getReachingDef(mi, RC->getRegister(I)) <= Def;
+ });
Regs.insert(I, rx);
}
diff --git a/lib/CodeGen/ExpandMemCmp.cpp b/lib/CodeGen/ExpandMemCmp.cpp
index ee7683adbcdd..b425482e6adf 100644
--- a/lib/CodeGen/ExpandMemCmp.cpp
+++ b/lib/CodeGen/ExpandMemCmp.cpp
@@ -1,9 +1,8 @@
//===--- ExpandMemCmp.cpp - Expand memcmp() to load/stores ----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -37,6 +36,14 @@ static cl::opt<unsigned> MemCmpEqZeroNumLoadsPerBlock(
cl::desc("The number of loads per basic block for inline expansion of "
"memcmp that is only being compared against zero."));
+static cl::opt<unsigned> MaxLoadsPerMemcmp(
+ "max-loads-per-memcmp", cl::Hidden,
+ cl::desc("Set maximum number of loads used in expanded memcmp"));
+
+static cl::opt<unsigned> MaxLoadsPerMemcmpOptSize(
+ "max-loads-per-memcmp-opt-size", cl::Hidden,
+ cl::desc("Set maximum number of loads used in expanded memcmp for -Os/Oz"));
+
namespace {
@@ -106,8 +113,7 @@ class MemCmpExpansion {
public:
MemCmpExpansion(CallInst *CI, uint64_t Size,
const TargetTransformInfo::MemCmpExpansionOptions &Options,
- unsigned MaxNumLoads, const bool IsUsedForZeroCmp,
- unsigned MaxLoadsPerBlockForZeroCmp, const DataLayout &TheDataLayout);
+ const bool IsUsedForZeroCmp, const DataLayout &TheDataLayout);
unsigned getNumBlocks();
uint64_t getNumLoads() const { return LoadSequence.size(); }
@@ -196,16 +202,10 @@ MemCmpExpansion::computeOverlappingLoadSequence(uint64_t Size,
MemCmpExpansion::MemCmpExpansion(
CallInst *const CI, uint64_t Size,
const TargetTransformInfo::MemCmpExpansionOptions &Options,
- const unsigned MaxNumLoads, const bool IsUsedForZeroCmp,
- const unsigned MaxLoadsPerBlockForZeroCmp, const DataLayout &TheDataLayout)
- : CI(CI),
- Size(Size),
- MaxLoadSize(0),
- NumLoadsNonOneByte(0),
- NumLoadsPerBlockForZeroCmp(MaxLoadsPerBlockForZeroCmp),
- IsUsedForZeroCmp(IsUsedForZeroCmp),
- DL(TheDataLayout),
- Builder(CI) {
+ const bool IsUsedForZeroCmp, const DataLayout &TheDataLayout)
+ : CI(CI), Size(Size), MaxLoadSize(0), NumLoadsNonOneByte(0),
+ NumLoadsPerBlockForZeroCmp(Options.NumLoadsPerBlock),
+ IsUsedForZeroCmp(IsUsedForZeroCmp), DL(TheDataLayout), Builder(CI) {
assert(Size > 0 && "zero blocks");
// Scale the max size down if the target can load more bytes than we need.
llvm::ArrayRef<unsigned> LoadSizes(Options.LoadSizes);
@@ -216,17 +216,17 @@ MemCmpExpansion::MemCmpExpansion(
MaxLoadSize = LoadSizes.front();
// Compute the decomposition.
unsigned GreedyNumLoadsNonOneByte = 0;
- LoadSequence = computeGreedyLoadSequence(Size, LoadSizes, MaxNumLoads,
+ LoadSequence = computeGreedyLoadSequence(Size, LoadSizes, Options.MaxNumLoads,
GreedyNumLoadsNonOneByte);
NumLoadsNonOneByte = GreedyNumLoadsNonOneByte;
- assert(LoadSequence.size() <= MaxNumLoads && "broken invariant");
+ assert(LoadSequence.size() <= Options.MaxNumLoads && "broken invariant");
// If we allow overlapping loads and the load sequence is not already optimal,
// use overlapping loads.
if (Options.AllowOverlappingLoads &&
(LoadSequence.empty() || LoadSequence.size() > 2)) {
unsigned OverlappingNumLoadsNonOneByte = 0;
auto OverlappingLoads = computeOverlappingLoadSequence(
- Size, MaxLoadSize, MaxNumLoads, OverlappingNumLoadsNonOneByte);
+ Size, MaxLoadSize, Options.MaxNumLoads, OverlappingNumLoadsNonOneByte);
if (!OverlappingLoads.empty() &&
(LoadSequence.empty() ||
OverlappingLoads.size() < LoadSequence.size())) {
@@ -234,7 +234,7 @@ MemCmpExpansion::MemCmpExpansion(
NumLoadsNonOneByte = OverlappingNumLoadsNonOneByte;
}
}
- assert(LoadSequence.size() <= MaxNumLoads && "broken invariant");
+ assert(LoadSequence.size() <= Options.MaxNumLoads && "broken invariant");
}
unsigned MemCmpExpansion::getNumBlocks() {
@@ -316,7 +316,7 @@ Value *MemCmpExpansion::getCompareLoadPairs(unsigned BlockIndex,
assert(LoadIndex < getNumLoads() &&
"getCompareLoadPairs() called with no remaining loads");
std::vector<Value *> XorList, OrList;
- Value *Diff;
+ Value *Diff = nullptr;
const unsigned NumLoads =
std::min(getNumLoads() - LoadIndex, NumLoadsPerBlockForZeroCmp);
@@ -393,6 +393,8 @@ Value *MemCmpExpansion::getCompareLoadPairs(unsigned BlockIndex,
while (OrList.size() != 1) {
OrList = pairWiseOr(OrList);
}
+
+ assert(Diff && "Failed to find comparison diff");
Cmp = Builder.CreateICmpNE(OrList[0], ConstantInt::get(Diff->getType(), 0));
}
@@ -722,7 +724,7 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
NumMemCmpCalls++;
// Early exit from expansion if -Oz.
- if (CI->getFunction()->optForMinSize())
+ if (CI->getFunction()->hasMinSize())
return false;
// Early exit from expansion if size is not a constant.
@@ -739,18 +741,21 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
// TTI call to check if target would like to expand memcmp. Also, get the
// available load sizes.
const bool IsUsedForZeroCmp = isOnlyUsedInZeroEqualityComparison(CI);
- const auto *const Options = TTI->enableMemCmpExpansion(IsUsedForZeroCmp);
+ auto Options = TTI->enableMemCmpExpansion(CI->getFunction()->hasOptSize(),
+ IsUsedForZeroCmp);
if (!Options) return false;
- const unsigned MaxNumLoads =
- TLI->getMaxExpandSizeMemcmp(CI->getFunction()->optForSize());
+ if (MemCmpEqZeroNumLoadsPerBlock.getNumOccurrences())
+ Options.NumLoadsPerBlock = MemCmpEqZeroNumLoadsPerBlock;
+
+ if (CI->getFunction()->hasOptSize() &&
+ MaxLoadsPerMemcmpOptSize.getNumOccurrences())
+ Options.MaxNumLoads = MaxLoadsPerMemcmpOptSize;
- unsigned NumLoadsPerBlock = MemCmpEqZeroNumLoadsPerBlock.getNumOccurrences()
- ? MemCmpEqZeroNumLoadsPerBlock
- : TLI->getMemcmpEqZeroLoadsPerBlock();
+ if (!CI->getFunction()->hasOptSize() && MaxLoadsPerMemcmp.getNumOccurrences())
+ Options.MaxNumLoads = MaxLoadsPerMemcmp;
- MemCmpExpansion Expansion(CI, SizeVal, *Options, MaxNumLoads,
- IsUsedForZeroCmp, NumLoadsPerBlock, *DL);
+ MemCmpExpansion Expansion(CI, SizeVal, Options, IsUsedForZeroCmp, *DL);
// Don't expand if this will require more loads than desired by the target.
if (Expansion.getNumLoads() == 0) {
@@ -824,7 +829,8 @@ bool ExpandMemCmpPass::runOnBlock(
}
LibFunc Func;
if (TLI->getLibFunc(ImmutableCallSite(CI), Func) &&
- Func == LibFunc_memcmp && expandMemCmp(CI, TTI, TL, &DL)) {
+ (Func == LibFunc_memcmp || Func == LibFunc_bcmp) &&
+ expandMemCmp(CI, TTI, TL, &DL)) {
return true;
}
}
diff --git a/lib/CodeGen/ExpandPostRAPseudos.cpp b/lib/CodeGen/ExpandPostRAPseudos.cpp
index f2a2bcbb94b1..0ab70aff7dc4 100644
--- a/lib/CodeGen/ExpandPostRAPseudos.cpp
+++ b/lib/CodeGen/ExpandPostRAPseudos.cpp
@@ -1,9 +1,8 @@
//===-- ExpandPostRAPseudos.cpp - Pseudo instruction expansion pass -------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/ExpandReductions.cpp b/lib/CodeGen/ExpandReductions.cpp
index 7552ba8cd85d..1069a2423b8b 100644
--- a/lib/CodeGen/ExpandReductions.cpp
+++ b/lib/CodeGen/ExpandReductions.cpp
@@ -1,9 +1,8 @@
//===--- ExpandReductions.cpp - Expand experimental reduction intrinsics --===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -30,9 +29,9 @@ namespace {
unsigned getOpcode(Intrinsic::ID ID) {
switch (ID) {
- case Intrinsic::experimental_vector_reduce_fadd:
+ case Intrinsic::experimental_vector_reduce_v2_fadd:
return Instruction::FAdd;
- case Intrinsic::experimental_vector_reduce_fmul:
+ case Intrinsic::experimental_vector_reduce_v2_fmul:
return Instruction::FMul;
case Intrinsic::experimental_vector_reduce_add:
return Instruction::Add;
@@ -84,22 +83,33 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
Worklist.push_back(II);
for (auto *II : Worklist) {
+ if (!TTI->shouldExpandReduction(II))
+ continue;
+
+ FastMathFlags FMF =
+ isa<FPMathOperator>(II) ? II->getFastMathFlags() : FastMathFlags{};
+ Intrinsic::ID ID = II->getIntrinsicID();
+ RecurrenceDescriptor::MinMaxRecurrenceKind MRK = getMRK(ID);
+
+ Value *Rdx = nullptr;
IRBuilder<> Builder(II);
- bool IsOrdered = false;
- Value *Acc = nullptr;
- Value *Vec = nullptr;
- auto ID = II->getIntrinsicID();
- auto MRK = RecurrenceDescriptor::MRK_Invalid;
+ IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
+ Builder.setFastMathFlags(FMF);
switch (ID) {
- case Intrinsic::experimental_vector_reduce_fadd:
- case Intrinsic::experimental_vector_reduce_fmul:
+ case Intrinsic::experimental_vector_reduce_v2_fadd:
+ case Intrinsic::experimental_vector_reduce_v2_fmul: {
// FMFs must be attached to the call, otherwise it's an ordered reduction
// and it can't be handled by generating a shuffle sequence.
- if (!II->getFastMathFlags().isFast())
- IsOrdered = true;
- Acc = II->getArgOperand(0);
- Vec = II->getArgOperand(1);
- break;
+ Value *Acc = II->getArgOperand(0);
+ Value *Vec = II->getArgOperand(1);
+ if (!FMF.allowReassoc())
+ Rdx = getOrderedReduction(Builder, Acc, Vec, getOpcode(ID), MRK);
+ else {
+ Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);
+ Rdx = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(ID),
+ Acc, Rdx, "bin.rdx");
+ }
+ } break;
case Intrinsic::experimental_vector_reduce_add:
case Intrinsic::experimental_vector_reduce_mul:
case Intrinsic::experimental_vector_reduce_and:
@@ -110,18 +120,13 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
case Intrinsic::experimental_vector_reduce_umax:
case Intrinsic::experimental_vector_reduce_umin:
case Intrinsic::experimental_vector_reduce_fmax:
- case Intrinsic::experimental_vector_reduce_fmin:
- Vec = II->getArgOperand(0);
- MRK = getMRK(ID);
- break;
+ case Intrinsic::experimental_vector_reduce_fmin: {
+ Value *Vec = II->getArgOperand(0);
+ Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);
+ } break;
default:
continue;
}
- if (!TTI->shouldExpandReduction(II))
- continue;
- Value *Rdx =
- IsOrdered ? getOrderedReduction(Builder, Acc, Vec, getOpcode(ID), MRK)
- : getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);
II->replaceAllUsesWith(Rdx);
II->eraseFromParent();
Changed = true;
diff --git a/lib/CodeGen/FEntryInserter.cpp b/lib/CodeGen/FEntryInserter.cpp
index 4ddf9f92836c..a122f490884e 100644
--- a/lib/CodeGen/FEntryInserter.cpp
+++ b/lib/CodeGen/FEntryInserter.cpp
@@ -1,9 +1,8 @@
//===-- FEntryInsertion.cpp - Patchable prologues for LLVM -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/FaultMaps.cpp b/lib/CodeGen/FaultMaps.cpp
index 361558a0e562..600f72d320eb 100644
--- a/lib/CodeGen/FaultMaps.cpp
+++ b/lib/CodeGen/FaultMaps.cpp
@@ -1,9 +1,8 @@
//===- FaultMaps.cpp ------------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/ExpandISelPseudos.cpp b/lib/CodeGen/FinalizeISel.cpp
index ec586a2caea3..772d7f71bb37 100644
--- a/lib/CodeGen/ExpandISelPseudos.cpp
+++ b/lib/CodeGen/FinalizeISel.cpp
@@ -1,16 +1,16 @@
-//===-- llvm/CodeGen/ExpandISelPseudos.cpp ----------------------*- C++ -*-===//
+//===-- llvm/CodeGen/FinalizeISel.cpp ---------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
-// Expand Pseudo-instructions produced by ISel. These are usually to allow
-// the expansion to contain control flow, such as a conditional move
-// implemented with a conditional branch and a phi, or an atomic operation
-// implemented with a loop.
+/// This pass expands Pseudo-instructions produced by ISel, fixes register
+/// reservations and may do machine frame information adjustments.
+/// The pseudo instructions are used to allow the expansion to contain control
+/// flow, such as a conditional move implemented with a conditional branch and a
+/// phi, or an atomic operation implemented with a loop.
//
//===----------------------------------------------------------------------===//
@@ -22,13 +22,13 @@
#include "llvm/Support/Debug.h"
using namespace llvm;
-#define DEBUG_TYPE "expand-isel-pseudos"
+#define DEBUG_TYPE "finalize-isel"
namespace {
- class ExpandISelPseudos : public MachineFunctionPass {
+ class FinalizeISel : public MachineFunctionPass {
public:
static char ID; // Pass identification, replacement for typeid
- ExpandISelPseudos() : MachineFunctionPass(ID) {}
+ FinalizeISel() : MachineFunctionPass(ID) {}
private:
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -39,12 +39,12 @@ namespace {
};
} // end anonymous namespace
-char ExpandISelPseudos::ID = 0;
-char &llvm::ExpandISelPseudosID = ExpandISelPseudos::ID;
-INITIALIZE_PASS(ExpandISelPseudos, DEBUG_TYPE,
- "Expand ISel Pseudo-instructions", false, false)
+char FinalizeISel::ID = 0;
+char &llvm::FinalizeISelID = FinalizeISel::ID;
+INITIALIZE_PASS(FinalizeISel, DEBUG_TYPE,
+ "Finalize ISel and expand pseudo-instructions", false, false)
-bool ExpandISelPseudos::runOnMachineFunction(MachineFunction &MF) {
+bool FinalizeISel::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
@@ -70,5 +70,7 @@ bool ExpandISelPseudos::runOnMachineFunction(MachineFunction &MF) {
}
}
+ TLI->finalizeLowering(MF);
+
return Changed;
}
diff --git a/lib/CodeGen/FuncletLayout.cpp b/lib/CodeGen/FuncletLayout.cpp
index 581cd423f2d4..75f6d0b8f0bf 100644
--- a/lib/CodeGen/FuncletLayout.cpp
+++ b/lib/CodeGen/FuncletLayout.cpp
@@ -1,9 +1,8 @@
//===-- FuncletLayout.cpp - Contiguously lay out funclets -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/GCMetadata.cpp b/lib/CodeGen/GCMetadata.cpp
index 1c80556dfef5..9c53550eaa9d 100644
--- a/lib/CodeGen/GCMetadata.cpp
+++ b/lib/CodeGen/GCMetadata.cpp
@@ -1,9 +1,8 @@
//===-- GCMetadata.cpp - Garbage collector metadata -----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/GCMetadataPrinter.cpp b/lib/CodeGen/GCMetadataPrinter.cpp
index bc7beb6f6c2d..500dba9aea37 100644
--- a/lib/CodeGen/GCMetadataPrinter.cpp
+++ b/lib/CodeGen/GCMetadataPrinter.cpp
@@ -1,9 +1,8 @@
//===- GCMetadataPrinter.cpp - Garbage collection infrastructure ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/GCRootLowering.cpp b/lib/CodeGen/GCRootLowering.cpp
index e8ccd84b0b93..90571d090bfb 100644
--- a/lib/CodeGen/GCRootLowering.cpp
+++ b/lib/CodeGen/GCRootLowering.cpp
@@ -1,9 +1,8 @@
//===-- GCRootLowering.cpp - Garbage collection infrastructure ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -214,7 +213,7 @@ bool LowerIntrinsics::DoLowering(Function &F, GCStrategy &S) {
}
case Intrinsic::gcread: {
// Replace a read barrier with a simple load.
- Value *Ld = new LoadInst(CI->getArgOperand(1), "", CI);
+ Value *Ld = new LoadInst(CI->getType(), CI->getArgOperand(1), "", CI);
Ld->takeName(CI);
CI->replaceAllUsesWith(Ld);
CI->eraseFromParent();
diff --git a/lib/CodeGen/GCStrategy.cpp b/lib/CodeGen/GCStrategy.cpp
index 6be4c16c6301..43d06b0f82e9 100644
--- a/lib/CodeGen/GCStrategy.cpp
+++ b/lib/CodeGen/GCStrategy.cpp
@@ -1,9 +1,8 @@
//===- GCStrategy.cpp - Garbage Collector Description ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/GlobalISel/CSEInfo.cpp b/lib/CodeGen/GlobalISel/CSEInfo.cpp
index 89c525c5ba15..4518dbee1a9f 100644
--- a/lib/CodeGen/GlobalISel/CSEInfo.cpp
+++ b/lib/CodeGen/GlobalISel/CSEInfo.cpp
@@ -1,9 +1,8 @@
//===- CSEInfo.cpp ------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -28,8 +27,8 @@ void UniqueMachineInstr::Profile(FoldingSetNodeID &ID) {
}
/// -----------------------------------------
-/// --------- CSEConfig ---------- ///
-bool CSEConfig::shouldCSEOpc(unsigned Opc) {
+/// --------- CSEConfigFull ---------- ///
+bool CSEConfigFull::shouldCSEOpc(unsigned Opc) {
switch (Opc) {
default:
break;
@@ -61,6 +60,17 @@ bool CSEConfig::shouldCSEOpc(unsigned Opc) {
bool CSEConfigConstantOnly::shouldCSEOpc(unsigned Opc) {
return Opc == TargetOpcode::G_CONSTANT;
}
+
+std::unique_ptr<CSEConfigBase>
+llvm::getStandardCSEConfigForOpt(CodeGenOpt::Level Level) {
+ std::unique_ptr<CSEConfigBase> Config;
+ if (Level == CodeGenOpt::None)
+ Config = make_unique<CSEConfigConstantOnly>();
+ else
+ Config = make_unique<CSEConfigFull>();
+ return Config;
+}
+
/// -----------------------------------------
/// -------- GISelCSEInfo -------------//
@@ -139,7 +149,7 @@ MachineInstr *GISelCSEInfo::getMachineInstrIfExists(FoldingSetNodeID &ID,
void *&InsertPos) {
handleRecordedInsts();
if (auto *Inst = getNodeIfExists(ID, MBB, InsertPos)) {
- LLVM_DEBUG(dbgs() << "CSEInfo: Found Instr " << *Inst->MI << "\n";);
+ LLVM_DEBUG(dbgs() << "CSEInfo::Found Instr " << *Inst->MI;);
return const_cast<MachineInstr *>(Inst->MI);
}
return nullptr;
@@ -158,14 +168,14 @@ void GISelCSEInfo::countOpcodeHit(unsigned Opc) {
void GISelCSEInfo::recordNewInstruction(MachineInstr *MI) {
if (shouldCSE(MI->getOpcode())) {
TemporaryInsts.insert(MI);
- LLVM_DEBUG(dbgs() << "CSEInfo: Recording new MI" << *MI << "\n";);
+ LLVM_DEBUG(dbgs() << "CSEInfo::Recording new MI " << *MI);
}
}
void GISelCSEInfo::handleRecordedInst(MachineInstr *MI) {
assert(shouldCSE(MI->getOpcode()) && "Invalid instruction for CSE");
auto *UMI = InstrMapping.lookup(MI);
- LLVM_DEBUG(dbgs() << "CSEInfo: Handling recorded MI" << *MI << "\n";);
+ LLVM_DEBUG(dbgs() << "CSEInfo::Handling recorded MI " << *MI);
if (UMI) {
// Invalidate this MI.
invalidateUniqueMachineInstr(UMI);
@@ -224,14 +234,14 @@ void GISelCSEInfo::analyze(MachineFunction &MF) {
for (MachineInstr &MI : MBB) {
if (!shouldCSE(MI.getOpcode()))
continue;
- LLVM_DEBUG(dbgs() << "CSEInfo::Add MI: " << MI << "\n";);
+ LLVM_DEBUG(dbgs() << "CSEInfo::Add MI: " << MI);
insertInstr(&MI);
}
}
}
void GISelCSEInfo::releaseMemory() {
- // print();
+ print();
CSEMap.clear();
InstrMapping.clear();
UniqueInstrAllocator.Reset();
@@ -245,11 +255,11 @@ void GISelCSEInfo::releaseMemory() {
}
void GISelCSEInfo::print() {
-#ifndef NDEBUG
- for (auto &It : OpcodeHitTable) {
- dbgs() << "CSE Count for Opc " << It.first << " : " << It.second << "\n";
- };
-#endif
+ LLVM_DEBUG(for (auto &It
+ : OpcodeHitTable) {
+ dbgs() << "CSEInfo::CSE Hit for Opc " << It.first << " : " << It.second
+ << "\n";
+ };);
}
/// -----------------------------------------
// ---- Profiling methods for FoldingSetNode --- //
@@ -349,8 +359,9 @@ const GISelInstProfileBuilder &GISelInstProfileBuilder::addNodeIDMachineOperand(
return *this;
}
-GISelCSEInfo &GISelCSEAnalysisWrapper::get(std::unique_ptr<CSEConfig> CSEOpt,
- bool Recompute) {
+GISelCSEInfo &
+GISelCSEAnalysisWrapper::get(std::unique_ptr<CSEConfigBase> CSEOpt,
+ bool Recompute) {
if (!AlreadyComputed || Recompute) {
Info.setCSEConfig(std::move(CSEOpt));
Info.analyze(*MF);
diff --git a/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp b/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
index 863efe0c3e34..461bc6038c2c 100644
--- a/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
+++ b/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
@@ -1,9 +1,8 @@
//===-- llvm/CodeGen/GlobalISel/CSEMIRBuilder.cpp - MIBuilder--*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -40,6 +39,7 @@ CSEMIRBuilder::getDominatingInstrForID(FoldingSetNodeID &ID,
MachineInstr *MI =
CSEInfo->getMachineInstrIfExists(ID, CurMBB, NodeInsertPos);
if (MI) {
+ CSEInfo->countOpcodeHit(MI->getOpcode());
auto CurrPos = getInsertPt();
if (!dominates(MI, CurrPos))
CurMBB->splice(CurrPos, CurMBB, MI);
@@ -195,6 +195,12 @@ MachineInstrBuilder CSEMIRBuilder::buildConstant(const DstOp &Res,
constexpr unsigned Opc = TargetOpcode::G_CONSTANT;
if (!canPerformCSEForOpc(Opc))
return MachineIRBuilder::buildConstant(Res, Val);
+
+ // For vectors, CSE the element only for now.
+ LLT Ty = Res.getLLTTy(*getMRI());
+ if (Ty.isVector())
+ return buildSplatVector(Res, buildConstant(Ty.getElementType(), Val));
+
FoldingSetNodeID ID;
GISelInstProfileBuilder ProfBuilder(ID, *getMRI());
void *InsertPos = nullptr;
@@ -206,6 +212,7 @@ MachineInstrBuilder CSEMIRBuilder::buildConstant(const DstOp &Res,
// Handle generating copies here.
return generateCopiesIfRequired({Res}, MIB);
}
+
MachineInstrBuilder NewMIB = MachineIRBuilder::buildConstant(Res, Val);
return memoizeMI(NewMIB, InsertPos);
}
@@ -215,6 +222,12 @@ MachineInstrBuilder CSEMIRBuilder::buildFConstant(const DstOp &Res,
constexpr unsigned Opc = TargetOpcode::G_FCONSTANT;
if (!canPerformCSEForOpc(Opc))
return MachineIRBuilder::buildFConstant(Res, Val);
+
+ // For vectors, CSE the element only for now.
+ LLT Ty = Res.getLLTTy(*getMRI());
+ if (Ty.isVector())
+ return buildSplatVector(Res, buildFConstant(Ty.getElementType(), Val));
+
FoldingSetNodeID ID;
GISelInstProfileBuilder ProfBuilder(ID, *getMRI());
void *InsertPos = nullptr;
diff --git a/lib/CodeGen/GlobalISel/CallLowering.cpp b/lib/CodeGen/GlobalISel/CallLowering.cpp
index 724ecedf3b3f..a5d8205a34a8 100644
--- a/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -1,9 +1,8 @@
//===-- lib/CodeGen/GlobalISel/CallLowering.cpp - Call lowering -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -13,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
+#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -21,13 +21,17 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
+#define DEBUG_TYPE "call-lowering"
+
using namespace llvm;
void CallLowering::anchor() {}
-bool CallLowering::lowerCall(
- MachineIRBuilder &MIRBuilder, ImmutableCallSite CS, unsigned ResReg,
- ArrayRef<unsigned> ArgRegs, std::function<unsigned()> GetCalleeReg) const {
+bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, ImmutableCallSite CS,
+ ArrayRef<Register> ResRegs,
+ ArrayRef<ArrayRef<Register>> ArgRegs,
+ Register SwiftErrorVReg,
+ std::function<unsigned()> GetCalleeReg) const {
auto &DL = CS.getParent()->getParent()->getParent()->getDataLayout();
// First step is to marshall all the function's parameters into the correct
@@ -40,8 +44,8 @@ bool CallLowering::lowerCall(
ArgInfo OrigArg{ArgRegs[i], Arg->getType(), ISD::ArgFlagsTy{},
i < NumFixedArgs};
setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, CS);
- // We don't currently support swifterror or swiftself args.
- if (OrigArg.Flags.isSwiftError() || OrigArg.Flags.isSwiftSelf())
+ // We don't currently support swiftself args.
+ if (OrigArg.Flags.isSwiftSelf())
return false;
OrigArgs.push_back(OrigArg);
++i;
@@ -53,11 +57,12 @@ bool CallLowering::lowerCall(
else
Callee = MachineOperand::CreateReg(GetCalleeReg(), false);
- ArgInfo OrigRet{ResReg, CS.getType(), ISD::ArgFlagsTy{}};
+ ArgInfo OrigRet{ResRegs, CS.getType(), ISD::ArgFlagsTy{}};
if (!OrigRet.Ty->isVoidTy())
setArgFlags(OrigRet, AttributeList::ReturnIndex, DL, CS);
- return lowerCall(MIRBuilder, CS.getCallingConv(), Callee, OrigRet, OrigArgs);
+ return lowerCall(MIRBuilder, CS.getCallingConv(), Callee, OrigRet, OrigArgs,
+ SwiftErrorVReg);
}
template <typename FuncInfoTy>
@@ -84,7 +89,10 @@ void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx,
if (Arg.Flags.isByVal() || Arg.Flags.isInAlloca()) {
Type *ElementTy = cast<PointerType>(Arg.Ty)->getElementType();
- Arg.Flags.setByValSize(DL.getTypeAllocSize(ElementTy));
+
+ auto Ty = Attrs.getAttribute(OpIdx, Attribute::ByVal).getValueAsType();
+ Arg.Flags.setByValSize(DL.getTypeAllocSize(Ty ? Ty : ElementTy));
+
// For ByVal, alignment should be passed from FE. BE will guess if
// this info is not there but there are cases it cannot get right.
unsigned FrameAlign;
@@ -109,21 +117,78 @@ CallLowering::setArgFlags<CallInst>(CallLowering::ArgInfo &Arg, unsigned OpIdx,
const DataLayout &DL,
const CallInst &FuncInfo) const;
+Register CallLowering::packRegs(ArrayRef<Register> SrcRegs, Type *PackedTy,
+ MachineIRBuilder &MIRBuilder) const {
+ assert(SrcRegs.size() > 1 && "Nothing to pack");
+
+ const DataLayout &DL = MIRBuilder.getMF().getDataLayout();
+ MachineRegisterInfo *MRI = MIRBuilder.getMRI();
+
+ LLT PackedLLT = getLLTForType(*PackedTy, DL);
+
+ SmallVector<LLT, 8> LLTs;
+ SmallVector<uint64_t, 8> Offsets;
+ computeValueLLTs(DL, *PackedTy, LLTs, &Offsets);
+ assert(LLTs.size() == SrcRegs.size() && "Regs / types mismatch");
+
+ Register Dst = MRI->createGenericVirtualRegister(PackedLLT);
+ MIRBuilder.buildUndef(Dst);
+ for (unsigned i = 0; i < SrcRegs.size(); ++i) {
+ Register NewDst = MRI->createGenericVirtualRegister(PackedLLT);
+ MIRBuilder.buildInsert(NewDst, Dst, SrcRegs[i], Offsets[i]);
+ Dst = NewDst;
+ }
+
+ return Dst;
+}
+
+void CallLowering::unpackRegs(ArrayRef<Register> DstRegs, Register SrcReg,
+ Type *PackedTy,
+ MachineIRBuilder &MIRBuilder) const {
+ assert(DstRegs.size() > 1 && "Nothing to unpack");
+
+ const DataLayout &DL = MIRBuilder.getMF().getDataLayout();
+
+ SmallVector<LLT, 8> LLTs;
+ SmallVector<uint64_t, 8> Offsets;
+ computeValueLLTs(DL, *PackedTy, LLTs, &Offsets);
+ assert(LLTs.size() == DstRegs.size() && "Regs / types mismatch");
+
+ for (unsigned i = 0; i < DstRegs.size(); ++i)
+ MIRBuilder.buildExtract(DstRegs[i], SrcReg, Offsets[i]);
+}
+
bool CallLowering::handleAssignments(MachineIRBuilder &MIRBuilder,
ArrayRef<ArgInfo> Args,
ValueHandler &Handler) const {
MachineFunction &MF = MIRBuilder.getMF();
const Function &F = MF.getFunction();
- const DataLayout &DL = F.getParent()->getDataLayout();
-
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
+ return handleAssignments(CCInfo, ArgLocs, MIRBuilder, Args, Handler);
+}
+
+bool CallLowering::handleAssignments(CCState &CCInfo,
+ SmallVectorImpl<CCValAssign> &ArgLocs,
+ MachineIRBuilder &MIRBuilder,
+ ArrayRef<ArgInfo> Args,
+ ValueHandler &Handler) const {
+ MachineFunction &MF = MIRBuilder.getMF();
+ const Function &F = MF.getFunction();
+ const DataLayout &DL = F.getParent()->getDataLayout();
unsigned NumArgs = Args.size();
for (unsigned i = 0; i != NumArgs; ++i) {
MVT CurVT = MVT::getVT(Args[i].Ty);
- if (Handler.assignArg(i, CurVT, CurVT, CCValAssign::Full, Args[i], CCInfo))
- return false;
+ if (Handler.assignArg(i, CurVT, CurVT, CCValAssign::Full, Args[i], CCInfo)) {
+ // Try to use the register type if we couldn't assign the VT.
+ if (!Handler.isArgumentHandler() || !CurVT.isValid())
+ return false;
+ CurVT = TLI->getRegisterTypeForCallingConv(
+ F.getContext(), F.getCallingConv(), EVT(CurVT));
+ if (Handler.assignArg(i, CurVT, CurVT, CCValAssign::Full, Args[i], CCInfo))
+ return false;
+ }
}
for (unsigned i = 0, e = Args.size(), j = 0; i != e; ++i, ++j) {
@@ -137,16 +202,49 @@ bool CallLowering::handleAssignments(MachineIRBuilder &MIRBuilder,
continue;
}
- if (VA.isRegLoc())
- Handler.assignValueToReg(Args[i].Reg, VA.getLocReg(), VA);
- else if (VA.isMemLoc()) {
- unsigned Size = VA.getValVT() == MVT::iPTR
- ? DL.getPointerSize()
- : alignTo(VA.getValVT().getSizeInBits(), 8) / 8;
+ assert(Args[i].Regs.size() == 1 &&
+ "Can't handle multiple virtual regs yet");
+
+ // FIXME: Pack registers if we have more than one.
+ Register ArgReg = Args[i].Regs[0];
+
+ if (VA.isRegLoc()) {
+ MVT OrigVT = MVT::getVT(Args[i].Ty);
+ MVT VAVT = VA.getValVT();
+ if (Handler.isArgumentHandler() && VAVT != OrigVT) {
+ if (VAVT.getSizeInBits() < OrigVT.getSizeInBits())
+ return false; // Can't handle this type of arg yet.
+ const LLT VATy(VAVT);
+ Register NewReg =
+ MIRBuilder.getMRI()->createGenericVirtualRegister(VATy);
+ Handler.assignValueToReg(NewReg, VA.getLocReg(), VA);
+ // If it's a vector type, we either need to truncate the elements
+ // or do an unmerge to get the lower block of elements.
+ if (VATy.isVector() &&
+ VATy.getNumElements() > OrigVT.getVectorNumElements()) {
+ const LLT OrigTy(OrigVT);
+ // Just handle the case where the VA type is 2 * original type.
+ if (VATy.getNumElements() != OrigVT.getVectorNumElements() * 2) {
+ LLVM_DEBUG(dbgs()
+ << "Incoming promoted vector arg has too many elts");
+ return false;
+ }
+ auto Unmerge = MIRBuilder.buildUnmerge({OrigTy, OrigTy}, {NewReg});
+ MIRBuilder.buildCopy(ArgReg, Unmerge.getReg(0));
+ } else {
+ MIRBuilder.buildTrunc(ArgReg, {NewReg}).getReg(0);
+ }
+ } else {
+ Handler.assignValueToReg(ArgReg, VA.getLocReg(), VA);
+ }
+ } else if (VA.isMemLoc()) {
+ MVT VT = MVT::getVT(Args[i].Ty);
+ unsigned Size = VT == MVT::iPTR ? DL.getPointerSize()
+ : alignTo(VT.getSizeInBits(), 8) / 8;
unsigned Offset = VA.getLocMemOffset();
MachinePointerInfo MPO;
- unsigned StackAddr = Handler.getStackAddress(Size, Offset, MPO);
- Handler.assignValueToAddress(Args[i].Reg, StackAddr, Size, MPO, VA);
+ Register StackAddr = Handler.getStackAddress(Size, Offset, MPO);
+ Handler.assignValueToAddress(ArgReg, StackAddr, Size, MPO, VA);
} else {
// FIXME: Support byvals and other weirdness
return false;
@@ -155,9 +253,11 @@ bool CallLowering::handleAssignments(MachineIRBuilder &MIRBuilder,
return true;
}
-unsigned CallLowering::ValueHandler::extendRegister(unsigned ValReg,
+Register CallLowering::ValueHandler::extendRegister(Register ValReg,
CCValAssign &VA) {
LLT LocTy{VA.getLocVT()};
+ if (LocTy.getSizeInBits() == MRI.getType(ValReg).getSizeInBits())
+ return ValReg;
switch (VA.getLocInfo()) {
default: break;
case CCValAssign::Full:
@@ -170,12 +270,12 @@ unsigned CallLowering::ValueHandler::extendRegister(unsigned ValReg,
return MIB->getOperand(0).getReg();
}
case CCValAssign::SExt: {
- unsigned NewReg = MRI.createGenericVirtualRegister(LocTy);
+ Register NewReg = MRI.createGenericVirtualRegister(LocTy);
MIRBuilder.buildSExt(NewReg, ValReg);
return NewReg;
}
case CCValAssign::ZExt: {
- unsigned NewReg = MRI.createGenericVirtualRegister(LocTy);
+ Register NewReg = MRI.createGenericVirtualRegister(LocTy);
MIRBuilder.buildZExt(NewReg, ValReg);
return NewReg;
}
diff --git a/lib/CodeGen/GlobalISel/Combiner.cpp b/lib/CodeGen/GlobalISel/Combiner.cpp
index 45b0e36fd7d9..31cb1dbbc9b5 100644
--- a/lib/CodeGen/GlobalISel/Combiner.cpp
+++ b/lib/CodeGen/GlobalISel/Combiner.cpp
@@ -1,9 +1,8 @@
//===-- lib/CodeGen/GlobalISel/Combiner.cpp -------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -51,7 +50,7 @@ public:
}
void erasingInstr(MachineInstr &MI) override {
- LLVM_DEBUG(dbgs() << "Erased: " << MI << "\n");
+ LLVM_DEBUG(dbgs() << "Erasing: " << MI << "\n");
WorkList.remove(&MI);
}
void createdInstr(MachineInstr &MI) override {
@@ -130,9 +129,10 @@ bool Combiner::combineMachineInstrs(MachineFunction &MF,
CurMI->eraseFromParentAndMarkDBGValuesForRemoval();
continue;
}
- WorkList.insert(CurMI);
+ WorkList.deferred_insert(CurMI);
}
}
+ WorkList.finalize();
// Main Loop. Process the instructions here.
while (!WorkList.empty()) {
MachineInstr *CurrInst = WorkList.pop_back_val();
diff --git a/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index b1c5670a6dec..9cbf3dd83ff1 100644
--- a/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -1,9 +1,8 @@
//===-- lib/CodeGen/GlobalISel/GICombinerHelper.cpp -----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
@@ -23,8 +22,8 @@ CombinerHelper::CombinerHelper(GISelChangeObserver &Observer,
MachineIRBuilder &B)
: Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer) {}
-void CombinerHelper::replaceRegWith(MachineRegisterInfo &MRI, unsigned FromReg,
- unsigned ToReg) const {
+void CombinerHelper::replaceRegWith(MachineRegisterInfo &MRI, Register FromReg,
+ Register ToReg) const {
Observer.changingAllUsesOfReg(MRI, FromReg);
if (MRI.constrainRegAttrs(ToReg, FromReg))
@@ -37,7 +36,7 @@ void CombinerHelper::replaceRegWith(MachineRegisterInfo &MRI, unsigned FromReg,
void CombinerHelper::replaceRegOpWith(MachineRegisterInfo &MRI,
MachineOperand &FromRegOp,
- unsigned ToReg) const {
+ Register ToReg) const {
assert(FromRegOp.getParent() && "Expected an operand in an MI");
Observer.changingInstr(*FromRegOp.getParent());
@@ -47,6 +46,13 @@ void CombinerHelper::replaceRegOpWith(MachineRegisterInfo &MRI,
}
bool CombinerHelper::tryCombineCopy(MachineInstr &MI) {
+ if (matchCombineCopy(MI)) {
+ applyCombineCopy(MI);
+ return true;
+ }
+ return false;
+}
+bool CombinerHelper::matchCombineCopy(MachineInstr &MI) {
if (MI.getOpcode() != TargetOpcode::COPY)
return false;
unsigned DstReg = MI.getOperand(0).getReg();
@@ -55,20 +61,18 @@ bool CombinerHelper::tryCombineCopy(MachineInstr &MI) {
LLT SrcTy = MRI.getType(SrcReg);
// Simple Copy Propagation.
// a(sx) = COPY b(sx) -> Replace all uses of a with b.
- if (DstTy.isValid() && SrcTy.isValid() && DstTy == SrcTy) {
- MI.eraseFromParent();
- replaceRegWith(MRI, DstReg, SrcReg);
+ if (DstTy.isValid() && SrcTy.isValid() && DstTy == SrcTy)
return true;
- }
return false;
}
+void CombinerHelper::applyCombineCopy(MachineInstr &MI) {
+ unsigned DstReg = MI.getOperand(0).getReg();
+ unsigned SrcReg = MI.getOperand(1).getReg();
+ MI.eraseFromParent();
+ replaceRegWith(MRI, DstReg, SrcReg);
+}
namespace {
-struct PreferredTuple {
- LLT Ty; // The result type of the extend.
- unsigned ExtendOpcode; // G_ANYEXT/G_SEXT/G_ZEXT
- MachineInstr *MI;
-};
/// Select a preference between two uses. CurrentUse is the current preference
/// while *ForCandidate is attributes of the candidate under consideration.
@@ -127,7 +131,8 @@ PreferredTuple ChoosePreferredUse(PreferredTuple &CurrentUse,
/// want to try harder to find a dominating block.
static void InsertInsnsWithoutSideEffectsBeforeUse(
MachineIRBuilder &Builder, MachineInstr &DefMI, MachineOperand &UseMO,
- std::function<void(MachineBasicBlock *, MachineBasicBlock::iterator)>
+ std::function<void(MachineBasicBlock *, MachineBasicBlock::iterator,
+ MachineOperand &UseMO)>
Inserter) {
MachineInstr &UseMI = *UseMO.getParent();
@@ -143,26 +148,26 @@ static void InsertInsnsWithoutSideEffectsBeforeUse(
// the def instead of at the start of the block.
if (InsertBB == DefMI.getParent()) {
MachineBasicBlock::iterator InsertPt = &DefMI;
- Inserter(InsertBB, std::next(InsertPt));
+ Inserter(InsertBB, std::next(InsertPt), UseMO);
return;
}
// Otherwise we want the start of the BB
- Inserter(InsertBB, InsertBB->getFirstNonPHI());
+ Inserter(InsertBB, InsertBB->getFirstNonPHI(), UseMO);
}
} // end anonymous namespace
bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) {
- struct InsertionPoint {
- MachineOperand *UseMO;
- MachineBasicBlock *InsertIntoBB;
- MachineBasicBlock::iterator InsertBefore;
- InsertionPoint(MachineOperand *UseMO, MachineBasicBlock *InsertIntoBB,
- MachineBasicBlock::iterator InsertBefore)
- : UseMO(UseMO), InsertIntoBB(InsertIntoBB), InsertBefore(InsertBefore) {
- }
- };
+ PreferredTuple Preferred;
+ if (matchCombineExtendingLoads(MI, Preferred)) {
+ applyCombineExtendingLoads(MI, Preferred);
+ return true;
+ }
+ return false;
+}
+bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI,
+ PreferredTuple &Preferred) {
// We match the loads and follow the uses to the extend instead of matching
// the extends and following the def to the load. This is because the load
// must remain in the same position for correctness (unless we also add code
@@ -182,6 +187,19 @@ bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) {
if (!LoadValueTy.isScalar())
return false;
+ // Most architectures are going to legalize <s8 loads into at least a 1 byte
+ // load, and the MMOs can only describe memory accesses in multiples of bytes.
+ // If we try to perform extload combining on those, we can end up with
+ // %a(s8) = extload %ptr (load 1 byte from %ptr)
+ // ... which is an illegal extload instruction.
+ if (LoadValueTy.getSizeInBits() < 8)
+ return false;
+
+ // For non power-of-2 types, they will very likely be legalized into multiple
+ // loads. Don't bother trying to match them into extending loads.
+ if (!isPowerOf2_32(LoadValueTy.getSizeInBits()))
+ return false;
+
// Find the preferred type aside from the any-extends (unless it's the only
// one) and non-extending ops. We'll emit an extending load to that type and
// and emit a variant of (extend (trunc X)) for the others according to the
@@ -192,7 +210,7 @@ bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) {
: MI.getOpcode() == TargetOpcode::G_SEXTLOAD
? TargetOpcode::G_SEXT
: TargetOpcode::G_ZEXT;
- PreferredTuple Preferred = {LLT(), PreferredOpcode, nullptr};
+ Preferred = {LLT(), PreferredOpcode, nullptr};
for (auto &UseMI : MRI.use_instructions(LoadValue.getReg())) {
if (UseMI.getOpcode() == TargetOpcode::G_SEXT ||
UseMI.getOpcode() == TargetOpcode::G_ZEXT ||
@@ -211,9 +229,35 @@ bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) {
assert(Preferred.Ty != LoadValueTy && "Extending to same type?");
LLVM_DEBUG(dbgs() << "Preferred use is: " << *Preferred.MI);
+ return true;
+}
+void CombinerHelper::applyCombineExtendingLoads(MachineInstr &MI,
+ PreferredTuple &Preferred) {
// Rewrite the load to the chosen extending load.
- unsigned ChosenDstReg = Preferred.MI->getOperand(0).getReg();
+ Register ChosenDstReg = Preferred.MI->getOperand(0).getReg();
+
+ // Inserter to insert a truncate back to the original type at a given point
+ // with some basic CSE to limit truncate duplication to one per BB.
+ DenseMap<MachineBasicBlock *, MachineInstr *> EmittedInsns;
+ auto InsertTruncAt = [&](MachineBasicBlock *InsertIntoBB,
+ MachineBasicBlock::iterator InsertBefore,
+ MachineOperand &UseMO) {
+ MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB);
+ if (PreviouslyEmitted) {
+ Observer.changingInstr(*UseMO.getParent());
+ UseMO.setReg(PreviouslyEmitted->getOperand(0).getReg());
+ Observer.changedInstr(*UseMO.getParent());
+ return;
+ }
+
+ Builder.setInsertPt(*InsertIntoBB, InsertBefore);
+ Register NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
+ MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg);
+ EmittedInsns[InsertIntoBB] = NewMI;
+ replaceRegOpWith(MRI, UseMO, NewDstReg);
+ };
+
Observer.changingInstr(MI);
MI.setDesc(
Builder.getTII().get(Preferred.ExtendOpcode == TargetOpcode::G_SEXT
@@ -223,10 +267,13 @@ bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) {
: TargetOpcode::G_LOAD));
// Rewrite all the uses to fix up the types.
- SmallVector<MachineInstr *, 1> ScheduleForErase;
- SmallVector<InsertionPoint, 4> ScheduleForInsert;
- for (auto &UseMO : MRI.use_operands(LoadValue.getReg())) {
- MachineInstr *UseMI = UseMO.getParent();
+ auto &LoadValue = MI.getOperand(0);
+ SmallVector<MachineOperand *, 4> Uses;
+ for (auto &UseMO : MRI.use_operands(LoadValue.getReg()))
+ Uses.push_back(&UseMO);
+
+ for (auto *UseMO : Uses) {
+ MachineInstr *UseMI = UseMO->getParent();
// If the extend is compatible with the preferred extend then we should fix
// up the type and extend so that it uses the preferred use.
@@ -247,7 +294,8 @@ bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) {
// %2:_(s32) = G_SEXTLOAD ...
// ... = ... %2(s32)
replaceRegWith(MRI, UseDstReg, ChosenDstReg);
- ScheduleForErase.push_back(UseMO.getParent());
+ Observer.erasingInstr(*UseMO->getParent());
+ UseMO->getParent()->eraseFromParent();
} else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) {
// If the preferred size is smaller, then keep the extend but extend
// from the result of the extending load. For example:
@@ -272,59 +320,87 @@ bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) {
// %4:_(s8) = G_TRUNC %2:_(s32)
// %3:_(s64) = G_ZEXT %2:_(s8)
// ... = ... %3(s64)
- InsertInsnsWithoutSideEffectsBeforeUse(
- Builder, MI, UseMO,
- [&](MachineBasicBlock *InsertIntoBB,
- MachineBasicBlock::iterator InsertBefore) {
- ScheduleForInsert.emplace_back(&UseMO, InsertIntoBB, InsertBefore);
- });
+ InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO,
+ InsertTruncAt);
}
continue;
}
// The use is (one of) the uses of the preferred use we chose earlier.
// We're going to update the load to def this value later so just erase
// the old extend.
- ScheduleForErase.push_back(UseMO.getParent());
+ Observer.erasingInstr(*UseMO->getParent());
+ UseMO->getParent()->eraseFromParent();
continue;
}
// The use isn't an extend. Truncate back to the type we originally loaded.
// This is free on many targets.
- InsertInsnsWithoutSideEffectsBeforeUse(
- Builder, MI, UseMO,
- [&](MachineBasicBlock *InsertIntoBB,
- MachineBasicBlock::iterator InsertBefore) {
- ScheduleForInsert.emplace_back(&UseMO, InsertIntoBB, InsertBefore);
- });
+ InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO, InsertTruncAt);
}
- DenseMap<MachineBasicBlock *, MachineInstr *> EmittedInsns;
- for (auto &InsertionInfo : ScheduleForInsert) {
- MachineOperand *UseMO = InsertionInfo.UseMO;
- MachineBasicBlock *InsertIntoBB = InsertionInfo.InsertIntoBB;
- MachineBasicBlock::iterator InsertBefore = InsertionInfo.InsertBefore;
-
- MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB);
- if (PreviouslyEmitted) {
- Observer.changingInstr(*UseMO->getParent());
- UseMO->setReg(PreviouslyEmitted->getOperand(0).getReg());
- Observer.changedInstr(*UseMO->getParent());
- continue;
- }
-
- Builder.setInsertPt(*InsertIntoBB, InsertBefore);
- unsigned NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
- MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg);
- EmittedInsns[InsertIntoBB] = NewMI;
- replaceRegOpWith(MRI, *UseMO, NewDstReg);
- }
- for (auto &EraseMI : ScheduleForErase) {
- Observer.erasingInstr(*EraseMI);
- EraseMI->eraseFromParent();
- }
MI.getOperand(0).setReg(ChosenDstReg);
Observer.changedInstr(MI);
+}
+
+bool CombinerHelper::matchCombineBr(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_BR && "Expected a G_BR");
+ // Try to match the following:
+ // bb1:
+ // %c(s32) = G_ICMP pred, %a, %b
+ // %c1(s1) = G_TRUNC %c(s32)
+ // G_BRCOND %c1, %bb2
+ // G_BR %bb3
+ // bb2:
+ // ...
+ // bb3:
+
+ // The above pattern does not have a fall through to the successor bb2, always
+ // resulting in a branch no matter which path is taken. Here we try to find
+ // and replace that pattern with conditional branch to bb3 and otherwise
+ // fallthrough to bb2.
+
+ MachineBasicBlock *MBB = MI.getParent();
+ MachineBasicBlock::iterator BrIt(MI);
+ if (BrIt == MBB->begin())
+ return false;
+ assert(std::next(BrIt) == MBB->end() && "expected G_BR to be a terminator");
+
+ MachineInstr *BrCond = &*std::prev(BrIt);
+ if (BrCond->getOpcode() != TargetOpcode::G_BRCOND)
+ return false;
+ // Check that the next block is the conditional branch target.
+ if (!MBB->isLayoutSuccessor(BrCond->getOperand(1).getMBB()))
+ return false;
+
+ MachineInstr *CmpMI = MRI.getVRegDef(BrCond->getOperand(0).getReg());
+ if (!CmpMI || CmpMI->getOpcode() != TargetOpcode::G_ICMP ||
+ !MRI.hasOneUse(CmpMI->getOperand(0).getReg()))
+ return false;
+ return true;
+}
+
+bool CombinerHelper::tryCombineBr(MachineInstr &MI) {
+ if (!matchCombineBr(MI))
+ return false;
+ MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB();
+ MachineBasicBlock::iterator BrIt(MI);
+ MachineInstr *BrCond = &*std::prev(BrIt);
+ MachineInstr *CmpMI = MRI.getVRegDef(BrCond->getOperand(0).getReg());
+
+ CmpInst::Predicate InversePred = CmpInst::getInversePredicate(
+ (CmpInst::Predicate)CmpMI->getOperand(1).getPredicate());
+
+ // Invert the G_ICMP condition.
+ Observer.changingInstr(*CmpMI);
+ CmpMI->getOperand(1).setPredicate(InversePred);
+ Observer.changedInstr(*CmpMI);
+
+ // Change the conditional branch target.
+ Observer.changingInstr(*BrCond);
+ BrCond->getOperand(1).setMBB(BrTarget);
+ Observer.changedInstr(*BrCond);
+ MI.eraseFromParent();
return true;
}
diff --git a/lib/CodeGen/GlobalISel/GISelChangeObserver.cpp b/lib/CodeGen/GlobalISel/GISelChangeObserver.cpp
index c693acbbf10b..62b903c30b89 100644
--- a/lib/CodeGen/GlobalISel/GISelChangeObserver.cpp
+++ b/lib/CodeGen/GlobalISel/GISelChangeObserver.cpp
@@ -1,9 +1,8 @@
//===-- lib/CodeGen/GlobalISel/GISelChangeObserver.cpp --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -27,6 +26,7 @@ void GISelChangeObserver::changingAllUsesOfReg(
void GISelChangeObserver::finishedChangingAllUsesOfReg() {
for (auto *ChangedMI : ChangingAllUsesOfReg)
changedInstr(*ChangedMI);
+ ChangingAllUsesOfReg.clear();
}
RAIIDelegateInstaller::RAIIDelegateInstaller(MachineFunction &MF,
diff --git a/lib/CodeGen/GlobalISel/GlobalISel.cpp b/lib/CodeGen/GlobalISel/GlobalISel.cpp
index 00c6a9d63158..e0391e6f6467 100644
--- a/lib/CodeGen/GlobalISel/GlobalISel.cpp
+++ b/lib/CodeGen/GlobalISel/GlobalISel.cpp
@@ -1,9 +1,8 @@
//===-- llvm/CodeGen/GlobalISel/GlobalIsel.cpp --- GlobalISel ----*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/lib/CodeGen/GlobalISel/IRTranslator.cpp b/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 95f6274aa068..6e99bdbd8264 100644
--- a/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/GlobalISel/IRTranslator.cpp - IRTranslator ---*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -16,8 +15,11 @@
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/LowLevelType.h"
@@ -106,9 +108,7 @@ static void reportTranslationError(MachineFunction &MF,
ORE.emit(R);
}
-IRTranslator::IRTranslator() : MachineFunctionPass(ID) {
- initializeIRTranslatorPass(*PassRegistry::getPassRegistry());
-}
+IRTranslator::IRTranslator() : MachineFunctionPass(ID) { }
#ifndef NDEBUG
namespace {
@@ -136,7 +136,11 @@ public:
LLVM_DEBUG(dbgs() << "Checking DILocation from " << *CurrInst
<< " was copied to " << MI);
#endif
- assert(CurrInst->getDebugLoc() == MI.getDebugLoc() &&
+ // We allow insts in the entry block to have a debug loc line of 0 because
+ // they could have originated from constants, and we don't want a jumpy
+ // debug experience.
+ assert((CurrInst->getDebugLoc() == MI.getDebugLoc() ||
+ MI.getDebugLoc().getLine() == 0) &&
"Line info was not transferred to all instructions");
}
};
@@ -152,36 +156,6 @@ void IRTranslator::getAnalysisUsage(AnalysisUsage &AU) const {
MachineFunctionPass::getAnalysisUsage(AU);
}
-static void computeValueLLTs(const DataLayout &DL, Type &Ty,
- SmallVectorImpl<LLT> &ValueTys,
- SmallVectorImpl<uint64_t> *Offsets = nullptr,
- uint64_t StartingOffset = 0) {
- // Given a struct type, recursively traverse the elements.
- if (StructType *STy = dyn_cast<StructType>(&Ty)) {
- const StructLayout *SL = DL.getStructLayout(STy);
- for (unsigned I = 0, E = STy->getNumElements(); I != E; ++I)
- computeValueLLTs(DL, *STy->getElementType(I), ValueTys, Offsets,
- StartingOffset + SL->getElementOffset(I));
- return;
- }
- // Given an array type, recursively traverse the elements.
- if (ArrayType *ATy = dyn_cast<ArrayType>(&Ty)) {
- Type *EltTy = ATy->getElementType();
- uint64_t EltSize = DL.getTypeAllocSize(EltTy);
- for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)
- computeValueLLTs(DL, *EltTy, ValueTys, Offsets,
- StartingOffset + i * EltSize);
- return;
- }
- // Interpret void as zero return values.
- if (Ty.isVoidTy())
- return;
- // Base case: we can get an LLT for this LLVM IR type.
- ValueTys.push_back(getLLTForType(Ty, DL));
- if (Offsets != nullptr)
- Offsets->push_back(StartingOffset * 8);
-}
-
IRTranslator::ValueToVRegInfo::VRegListT &
IRTranslator::allocateVRegs(const Value &Val) {
assert(!VMap.contains(Val) && "Value already allocated in VMap");
@@ -195,7 +169,7 @@ IRTranslator::allocateVRegs(const Value &Val) {
return *Regs;
}
-ArrayRef<unsigned> IRTranslator::getOrCreateVRegs(const Value &Val) {
+ArrayRef<Register> IRTranslator::getOrCreateVRegs(const Value &Val) {
auto VRegsIt = VMap.findVRegs(Val);
if (VRegsIt != VMap.vregs_end())
return *VRegsIt->second;
@@ -249,7 +223,7 @@ int IRTranslator::getOrCreateFrameIndex(const AllocaInst &AI) {
if (FrameIndices.find(&AI) != FrameIndices.end())
return FrameIndices[&AI];
- unsigned ElementSize = DL->getTypeStoreSize(AI.getAllocatedType());
+ unsigned ElementSize = DL->getTypeAllocSize(AI.getAllocatedType());
unsigned Size =
ElementSize * cast<ConstantInt>(AI.getArraySize())->getZExtValue();
@@ -311,21 +285,20 @@ void IRTranslator::addMachineCFGPred(CFGEdge Edge, MachineBasicBlock *NewPred) {
bool IRTranslator::translateBinaryOp(unsigned Opcode, const User &U,
MachineIRBuilder &MIRBuilder) {
- // FIXME: handle signed/unsigned wrapping flags.
-
// Get or create a virtual register for each value.
// Unless the value is a Constant => loadimm cst?
// or inline constant each time?
// Creation of a virtual register needs to have a size.
- unsigned Op0 = getOrCreateVReg(*U.getOperand(0));
- unsigned Op1 = getOrCreateVReg(*U.getOperand(1));
- unsigned Res = getOrCreateVReg(U);
- auto FBinOp = MIRBuilder.buildInstr(Opcode).addDef(Res).addUse(Op0).addUse(Op1);
+ Register Op0 = getOrCreateVReg(*U.getOperand(0));
+ Register Op1 = getOrCreateVReg(*U.getOperand(1));
+ Register Res = getOrCreateVReg(U);
+ uint16_t Flags = 0;
if (isa<Instruction>(U)) {
- MachineInstr *FBinOpMI = FBinOp.getInstr();
const Instruction &I = cast<Instruction>(U);
- FBinOpMI->copyIRFlags(I);
+ Flags = MachineInstr::copyFlagsFromInstruction(I);
}
+
+ MIRBuilder.buildInstr(Opcode, {Res}, {Op0, Op1}, Flags);
return true;
}
@@ -333,27 +306,38 @@ bool IRTranslator::translateFSub(const User &U, MachineIRBuilder &MIRBuilder) {
// -0.0 - X --> G_FNEG
if (isa<Constant>(U.getOperand(0)) &&
U.getOperand(0) == ConstantFP::getZeroValueForNegation(U.getType())) {
- MIRBuilder.buildInstr(TargetOpcode::G_FNEG)
- .addDef(getOrCreateVReg(U))
- .addUse(getOrCreateVReg(*U.getOperand(1)));
+ Register Op1 = getOrCreateVReg(*U.getOperand(1));
+ Register Res = getOrCreateVReg(U);
+ uint16_t Flags = 0;
+ if (isa<Instruction>(U)) {
+ const Instruction &I = cast<Instruction>(U);
+ Flags = MachineInstr::copyFlagsFromInstruction(I);
+ }
+ // Negate the last operand of the FSUB
+ MIRBuilder.buildInstr(TargetOpcode::G_FNEG, {Res}, {Op1}, Flags);
return true;
}
return translateBinaryOp(TargetOpcode::G_FSUB, U, MIRBuilder);
}
bool IRTranslator::translateFNeg(const User &U, MachineIRBuilder &MIRBuilder) {
- MIRBuilder.buildInstr(TargetOpcode::G_FNEG)
- .addDef(getOrCreateVReg(U))
- .addUse(getOrCreateVReg(*U.getOperand(1)));
+ Register Op0 = getOrCreateVReg(*U.getOperand(0));
+ Register Res = getOrCreateVReg(U);
+ uint16_t Flags = 0;
+ if (isa<Instruction>(U)) {
+ const Instruction &I = cast<Instruction>(U);
+ Flags = MachineInstr::copyFlagsFromInstruction(I);
+ }
+ MIRBuilder.buildInstr(TargetOpcode::G_FNEG, {Res}, {Op0}, Flags);
return true;
}
bool IRTranslator::translateCompare(const User &U,
MachineIRBuilder &MIRBuilder) {
const CmpInst *CI = dyn_cast<CmpInst>(&U);
- unsigned Op0 = getOrCreateVReg(*U.getOperand(0));
- unsigned Op1 = getOrCreateVReg(*U.getOperand(1));
- unsigned Res = getOrCreateVReg(U);
+ Register Op0 = getOrCreateVReg(*U.getOperand(0));
+ Register Op1 = getOrCreateVReg(*U.getOperand(1));
+ Register Res = getOrCreateVReg(U);
CmpInst::Predicate Pred =
CI ? CI->getPredicate() : static_cast<CmpInst::Predicate>(
cast<ConstantExpr>(U).getPredicate());
@@ -366,8 +350,8 @@ bool IRTranslator::translateCompare(const User &U,
MIRBuilder.buildCopy(
Res, getOrCreateVReg(*Constant::getAllOnesValue(CI->getType())));
else {
- auto FCmp = MIRBuilder.buildFCmp(Pred, Res, Op0, Op1);
- FCmp->copyIRFlags(*CI);
+ MIRBuilder.buildInstr(TargetOpcode::G_FCMP, {Res}, {Pred, Op0, Op1},
+ MachineInstr::copyFlagsFromInstruction(*CI));
}
return true;
@@ -379,15 +363,20 @@ bool IRTranslator::translateRet(const User &U, MachineIRBuilder &MIRBuilder) {
if (Ret && DL->getTypeStoreSize(Ret->getType()) == 0)
Ret = nullptr;
- ArrayRef<unsigned> VRegs;
+ ArrayRef<Register> VRegs;
if (Ret)
VRegs = getOrCreateVRegs(*Ret);
+ Register SwiftErrorVReg = 0;
+ if (CLI->supportSwiftError() && SwiftError.getFunctionArg()) {
+ SwiftErrorVReg = SwiftError.getOrCreateVRegUseAt(
+ &RI, &MIRBuilder.getMBB(), SwiftError.getFunctionArg());
+ }
+
// The target may mess up with the insertion point, but
// this is not important as a return is the last instruction
// of the block anyway.
-
- return CLI->lowerReturn(MIRBuilder, Ret, VRegs);
+ return CLI->lowerReturn(MIRBuilder, Ret, VRegs, SwiftErrorVReg);
}
bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) {
@@ -395,7 +384,7 @@ bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) {
unsigned Succ = 0;
if (!BrInst.isUnconditional()) {
// We want a G_BRCOND to the true BB followed by an unconditional branch.
- unsigned Tst = getOrCreateVReg(*BrInst.getCondition());
+ Register Tst = getOrCreateVReg(*BrInst.getCondition());
const BasicBlock &TrueTgt = *cast<BasicBlock>(BrInst.getSuccessor(Succ++));
MachineBasicBlock &TrueBB = getMBB(TrueTgt);
MIRBuilder.buildBrCond(Tst, TrueBB);
@@ -415,48 +404,429 @@ bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) {
return true;
}
-bool IRTranslator::translateSwitch(const User &U,
- MachineIRBuilder &MIRBuilder) {
- // For now, just translate as a chain of conditional branches.
- // FIXME: could we share most of the logic/code in
- // SelectionDAGBuilder::visitSwitch between SelectionDAG and GlobalISel?
- // At first sight, it seems most of the logic in there is independent of
- // SelectionDAG-specifics and a lot of work went in to optimize switch
- // lowering in there.
-
- const SwitchInst &SwInst = cast<SwitchInst>(U);
- const unsigned SwCondValue = getOrCreateVReg(*SwInst.getCondition());
- const BasicBlock *OrigBB = SwInst.getParent();
-
- LLT LLTi1 = getLLTForType(*Type::getInt1Ty(U.getContext()), *DL);
- for (auto &CaseIt : SwInst.cases()) {
- const unsigned CaseValueReg = getOrCreateVReg(*CaseIt.getCaseValue());
- const unsigned Tst = MRI->createGenericVirtualRegister(LLTi1);
- MIRBuilder.buildICmp(CmpInst::ICMP_EQ, Tst, CaseValueReg, SwCondValue);
- MachineBasicBlock &CurMBB = MIRBuilder.getMBB();
- const BasicBlock *TrueBB = CaseIt.getCaseSuccessor();
- MachineBasicBlock &TrueMBB = getMBB(*TrueBB);
-
- MIRBuilder.buildBrCond(Tst, TrueMBB);
- CurMBB.addSuccessor(&TrueMBB);
- addMachineCFGPred({OrigBB, TrueBB}, &CurMBB);
-
- MachineBasicBlock *FalseMBB =
- MF->CreateMachineBasicBlock(SwInst.getParent());
- // Insert the comparison blocks one after the other.
- MF->insert(std::next(CurMBB.getIterator()), FalseMBB);
- MIRBuilder.buildBr(*FalseMBB);
- CurMBB.addSuccessor(FalseMBB);
-
- MIRBuilder.setMBB(*FalseMBB);
- }
- // handle default case
- const BasicBlock *DefaultBB = SwInst.getDefaultDest();
- MachineBasicBlock &DefaultMBB = getMBB(*DefaultBB);
- MIRBuilder.buildBr(DefaultMBB);
- MachineBasicBlock &CurMBB = MIRBuilder.getMBB();
- CurMBB.addSuccessor(&DefaultMBB);
- addMachineCFGPred({OrigBB, DefaultBB}, &CurMBB);
+void IRTranslator::addSuccessorWithProb(MachineBasicBlock *Src,
+ MachineBasicBlock *Dst,
+ BranchProbability Prob) {
+ if (!FuncInfo.BPI) {
+ Src->addSuccessorWithoutProb(Dst);
+ return;
+ }
+ if (Prob.isUnknown())
+ Prob = getEdgeProbability(Src, Dst);
+ Src->addSuccessor(Dst, Prob);
+}
+
+BranchProbability
+IRTranslator::getEdgeProbability(const MachineBasicBlock *Src,
+ const MachineBasicBlock *Dst) const {
+ const BasicBlock *SrcBB = Src->getBasicBlock();
+ const BasicBlock *DstBB = Dst->getBasicBlock();
+ if (!FuncInfo.BPI) {
+ // If BPI is not available, set the default probability as 1 / N, where N is
+ // the number of successors.
+ auto SuccSize = std::max<uint32_t>(succ_size(SrcBB), 1);
+ return BranchProbability(1, SuccSize);
+ }
+ return FuncInfo.BPI->getEdgeProbability(SrcBB, DstBB);
+}
+
+bool IRTranslator::translateSwitch(const User &U, MachineIRBuilder &MIB) {
+ using namespace SwitchCG;
+ // Extract cases from the switch.
+ const SwitchInst &SI = cast<SwitchInst>(U);
+ BranchProbabilityInfo *BPI = FuncInfo.BPI;
+ CaseClusterVector Clusters;
+ Clusters.reserve(SI.getNumCases());
+ for (auto &I : SI.cases()) {
+ MachineBasicBlock *Succ = &getMBB(*I.getCaseSuccessor());
+ assert(Succ && "Could not find successor mbb in mapping");
+ const ConstantInt *CaseVal = I.getCaseValue();
+ BranchProbability Prob =
+ BPI ? BPI->getEdgeProbability(SI.getParent(), I.getSuccessorIndex())
+ : BranchProbability(1, SI.getNumCases() + 1);
+ Clusters.push_back(CaseCluster::range(CaseVal, CaseVal, Succ, Prob));
+ }
+
+ MachineBasicBlock *DefaultMBB = &getMBB(*SI.getDefaultDest());
+
+ // Cluster adjacent cases with the same destination. We do this at all
+ // optimization levels because it's cheap to do and will make codegen faster
+ // if there are many clusters.
+ sortAndRangeify(Clusters);
+
+ MachineBasicBlock *SwitchMBB = &getMBB(*SI.getParent());
+
+ // If there is only the default destination, jump there directly.
+ if (Clusters.empty()) {
+ SwitchMBB->addSuccessor(DefaultMBB);
+ if (DefaultMBB != SwitchMBB->getNextNode())
+ MIB.buildBr(*DefaultMBB);
+ return true;
+ }
+
+ SL->findJumpTables(Clusters, &SI, DefaultMBB);
+
+ LLVM_DEBUG({
+ dbgs() << "Case clusters: ";
+ for (const CaseCluster &C : Clusters) {
+ if (C.Kind == CC_JumpTable)
+ dbgs() << "JT:";
+ if (C.Kind == CC_BitTests)
+ dbgs() << "BT:";
+
+ C.Low->getValue().print(dbgs(), true);
+ if (C.Low != C.High) {
+ dbgs() << '-';
+ C.High->getValue().print(dbgs(), true);
+ }
+ dbgs() << ' ';
+ }
+ dbgs() << '\n';
+ });
+
+ assert(!Clusters.empty());
+ SwitchWorkList WorkList;
+ CaseClusterIt First = Clusters.begin();
+ CaseClusterIt Last = Clusters.end() - 1;
+ auto DefaultProb = getEdgeProbability(SwitchMBB, DefaultMBB);
+ WorkList.push_back({SwitchMBB, First, Last, nullptr, nullptr, DefaultProb});
+
+ // FIXME: At the moment we don't do any splitting optimizations here like
+ // SelectionDAG does, so this worklist only has one entry.
+ while (!WorkList.empty()) {
+ SwitchWorkListItem W = WorkList.back();
+ WorkList.pop_back();
+ if (!lowerSwitchWorkItem(W, SI.getCondition(), SwitchMBB, DefaultMBB, MIB))
+ return false;
+ }
+ return true;
+}
+
+void IRTranslator::emitJumpTable(SwitchCG::JumpTable &JT,
+ MachineBasicBlock *MBB) {
+ // Emit the code for the jump table
+ assert(JT.Reg != -1U && "Should lower JT Header first!");
+ MachineIRBuilder MIB(*MBB->getParent());
+ MIB.setMBB(*MBB);
+ MIB.setDebugLoc(CurBuilder->getDebugLoc());
+
+ Type *PtrIRTy = Type::getInt8PtrTy(MF->getFunction().getContext());
+ const LLT PtrTy = getLLTForType(*PtrIRTy, *DL);
+
+ auto Table = MIB.buildJumpTable(PtrTy, JT.JTI);
+ MIB.buildBrJT(Table.getReg(0), JT.JTI, JT.Reg);
+}
+
+bool IRTranslator::emitJumpTableHeader(SwitchCG::JumpTable &JT,
+ SwitchCG::JumpTableHeader &JTH,
+ MachineBasicBlock *HeaderBB) {
+ MachineIRBuilder MIB(*HeaderBB->getParent());
+ MIB.setMBB(*HeaderBB);
+ MIB.setDebugLoc(CurBuilder->getDebugLoc());
+
+ const Value &SValue = *JTH.SValue;
+ // Subtract the lowest switch case value from the value being switched on.
+ const LLT SwitchTy = getLLTForType(*SValue.getType(), *DL);
+ Register SwitchOpReg = getOrCreateVReg(SValue);
+ auto FirstCst = MIB.buildConstant(SwitchTy, JTH.First);
+ auto Sub = MIB.buildSub({SwitchTy}, SwitchOpReg, FirstCst);
+
+ // This value may be smaller or larger than the target's pointer type, and
+ // therefore require extension or truncating.
+ Type *PtrIRTy = SValue.getType()->getPointerTo();
+ const LLT PtrScalarTy = LLT::scalar(DL->getTypeSizeInBits(PtrIRTy));
+ Sub = MIB.buildZExtOrTrunc(PtrScalarTy, Sub);
+
+ JT.Reg = Sub.getReg(0);
+
+ if (JTH.OmitRangeCheck) {
+ if (JT.MBB != HeaderBB->getNextNode())
+ MIB.buildBr(*JT.MBB);
+ return true;
+ }
+
+ // Emit the range check for the jump table, and branch to the default block
+ // for the switch statement if the value being switched on exceeds the
+ // largest case in the switch.
+ auto Cst = getOrCreateVReg(
+ *ConstantInt::get(SValue.getType(), JTH.Last - JTH.First));
+ Cst = MIB.buildZExtOrTrunc(PtrScalarTy, Cst).getReg(0);
+ auto Cmp = MIB.buildICmp(CmpInst::ICMP_UGT, LLT::scalar(1), Sub, Cst);
+
+ auto BrCond = MIB.buildBrCond(Cmp.getReg(0), *JT.Default);
+
+ // Avoid emitting unnecessary branches to the next block.
+ if (JT.MBB != HeaderBB->getNextNode())
+ BrCond = MIB.buildBr(*JT.MBB);
+ return true;
+}
+
+void IRTranslator::emitSwitchCase(SwitchCG::CaseBlock &CB,
+ MachineBasicBlock *SwitchBB,
+ MachineIRBuilder &MIB) {
+ Register CondLHS = getOrCreateVReg(*CB.CmpLHS);
+ Register Cond;
+ DebugLoc OldDbgLoc = MIB.getDebugLoc();
+ MIB.setDebugLoc(CB.DbgLoc);
+ MIB.setMBB(*CB.ThisBB);
+
+ if (CB.PredInfo.NoCmp) {
+ // Branch or fall through to TrueBB.
+ addSuccessorWithProb(CB.ThisBB, CB.TrueBB, CB.TrueProb);
+ addMachineCFGPred({SwitchBB->getBasicBlock(), CB.TrueBB->getBasicBlock()},
+ CB.ThisBB);
+ CB.ThisBB->normalizeSuccProbs();
+ if (CB.TrueBB != CB.ThisBB->getNextNode())
+ MIB.buildBr(*CB.TrueBB);
+ MIB.setDebugLoc(OldDbgLoc);
+ return;
+ }
+
+ const LLT i1Ty = LLT::scalar(1);
+ // Build the compare.
+ if (!CB.CmpMHS) {
+ Register CondRHS = getOrCreateVReg(*CB.CmpRHS);
+ Cond = MIB.buildICmp(CB.PredInfo.Pred, i1Ty, CondLHS, CondRHS).getReg(0);
+ } else {
+ assert(CB.PredInfo.Pred == CmpInst::ICMP_ULE &&
+ "Can only handle ULE ranges");
+
+ const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue();
+ const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue();
+
+ Register CmpOpReg = getOrCreateVReg(*CB.CmpMHS);
+ if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) {
+ Register CondRHS = getOrCreateVReg(*CB.CmpRHS);
+ Cond =
+ MIB.buildICmp(CmpInst::ICMP_ULE, i1Ty, CmpOpReg, CondRHS).getReg(0);
+ } else {
+ const LLT &CmpTy = MRI->getType(CmpOpReg);
+ auto Sub = MIB.buildSub({CmpTy}, CmpOpReg, CondLHS);
+ auto Diff = MIB.buildConstant(CmpTy, High - Low);
+ Cond = MIB.buildICmp(CmpInst::ICMP_ULE, i1Ty, Sub, Diff).getReg(0);
+ }
+ }
+
+ // Update successor info
+ addSuccessorWithProb(CB.ThisBB, CB.TrueBB, CB.TrueProb);
+
+ addMachineCFGPred({SwitchBB->getBasicBlock(), CB.TrueBB->getBasicBlock()},
+ CB.ThisBB);
+
+ // TrueBB and FalseBB are always different unless the incoming IR is
+ // degenerate. This only happens when running llc on weird IR.
+ if (CB.TrueBB != CB.FalseBB)
+ addSuccessorWithProb(CB.ThisBB, CB.FalseBB, CB.FalseProb);
+ CB.ThisBB->normalizeSuccProbs();
+
+ // if (SwitchBB->getBasicBlock() != CB.FalseBB->getBasicBlock())
+ addMachineCFGPred({SwitchBB->getBasicBlock(), CB.FalseBB->getBasicBlock()},
+ CB.ThisBB);
+
+ // If the lhs block is the next block, invert the condition so that we can
+ // fall through to the lhs instead of the rhs block.
+ if (CB.TrueBB == CB.ThisBB->getNextNode()) {
+ std::swap(CB.TrueBB, CB.FalseBB);
+ auto True = MIB.buildConstant(i1Ty, 1);
+ Cond = MIB.buildInstr(TargetOpcode::G_XOR, {i1Ty}, {Cond, True}, None)
+ .getReg(0);
+ }
+
+ MIB.buildBrCond(Cond, *CB.TrueBB);
+ MIB.buildBr(*CB.FalseBB);
+ MIB.setDebugLoc(OldDbgLoc);
+}
+
+bool IRTranslator::lowerJumpTableWorkItem(SwitchCG::SwitchWorkListItem W,
+ MachineBasicBlock *SwitchMBB,
+ MachineBasicBlock *CurMBB,
+ MachineBasicBlock *DefaultMBB,
+ MachineIRBuilder &MIB,
+ MachineFunction::iterator BBI,
+ BranchProbability UnhandledProbs,
+ SwitchCG::CaseClusterIt I,
+ MachineBasicBlock *Fallthrough,
+ bool FallthroughUnreachable) {
+ using namespace SwitchCG;
+ MachineFunction *CurMF = SwitchMBB->getParent();
+ // FIXME: Optimize away range check based on pivot comparisons.
+ JumpTableHeader *JTH = &SL->JTCases[I->JTCasesIndex].first;
+ SwitchCG::JumpTable *JT = &SL->JTCases[I->JTCasesIndex].second;
+ BranchProbability DefaultProb = W.DefaultProb;
+
+ // The jump block hasn't been inserted yet; insert it here.
+ MachineBasicBlock *JumpMBB = JT->MBB;
+ CurMF->insert(BBI, JumpMBB);
+
+ // Since the jump table block is separate from the switch block, we need
+ // to keep track of it as a machine predecessor to the default block,
+ // otherwise we lose the phi edges.
+ addMachineCFGPred({SwitchMBB->getBasicBlock(), DefaultMBB->getBasicBlock()},
+ CurMBB);
+ addMachineCFGPred({SwitchMBB->getBasicBlock(), DefaultMBB->getBasicBlock()},
+ JumpMBB);
+
+ auto JumpProb = I->Prob;
+ auto FallthroughProb = UnhandledProbs;
+
+ // If the default statement is a target of the jump table, we evenly
+ // distribute the default probability to successors of CurMBB. Also
+ // update the probability on the edge from JumpMBB to Fallthrough.
+ for (MachineBasicBlock::succ_iterator SI = JumpMBB->succ_begin(),
+ SE = JumpMBB->succ_end();
+ SI != SE; ++SI) {
+ if (*SI == DefaultMBB) {
+ JumpProb += DefaultProb / 2;
+ FallthroughProb -= DefaultProb / 2;
+ JumpMBB->setSuccProbability(SI, DefaultProb / 2);
+ JumpMBB->normalizeSuccProbs();
+ } else {
+ // Also record edges from the jump table block to it's successors.
+ addMachineCFGPred({SwitchMBB->getBasicBlock(), (*SI)->getBasicBlock()},
+ JumpMBB);
+ }
+ }
+
+ // Skip the range check if the fallthrough block is unreachable.
+ if (FallthroughUnreachable)
+ JTH->OmitRangeCheck = true;
+
+ if (!JTH->OmitRangeCheck)
+ addSuccessorWithProb(CurMBB, Fallthrough, FallthroughProb);
+ addSuccessorWithProb(CurMBB, JumpMBB, JumpProb);
+ CurMBB->normalizeSuccProbs();
+
+ // The jump table header will be inserted in our current block, do the
+ // range check, and fall through to our fallthrough block.
+ JTH->HeaderBB = CurMBB;
+ JT->Default = Fallthrough; // FIXME: Move Default to JumpTableHeader.
+
+ // If we're in the right place, emit the jump table header right now.
+ if (CurMBB == SwitchMBB) {
+ if (!emitJumpTableHeader(*JT, *JTH, CurMBB))
+ return false;
+ JTH->Emitted = true;
+ }
+ return true;
+}
+bool IRTranslator::lowerSwitchRangeWorkItem(SwitchCG::CaseClusterIt I,
+ Value *Cond,
+ MachineBasicBlock *Fallthrough,
+ bool FallthroughUnreachable,
+ BranchProbability UnhandledProbs,
+ MachineBasicBlock *CurMBB,
+ MachineIRBuilder &MIB,
+ MachineBasicBlock *SwitchMBB) {
+ using namespace SwitchCG;
+ const Value *RHS, *LHS, *MHS;
+ CmpInst::Predicate Pred;
+ if (I->Low == I->High) {
+ // Check Cond == I->Low.
+ Pred = CmpInst::ICMP_EQ;
+ LHS = Cond;
+ RHS = I->Low;
+ MHS = nullptr;
+ } else {
+ // Check I->Low <= Cond <= I->High.
+ Pred = CmpInst::ICMP_ULE;
+ LHS = I->Low;
+ MHS = Cond;
+ RHS = I->High;
+ }
+
+ // If Fallthrough is unreachable, fold away the comparison.
+ // The false probability is the sum of all unhandled cases.
+ CaseBlock CB(Pred, FallthroughUnreachable, LHS, RHS, MHS, I->MBB, Fallthrough,
+ CurMBB, MIB.getDebugLoc(), I->Prob, UnhandledProbs);
+
+ emitSwitchCase(CB, SwitchMBB, MIB);
+ return true;
+}
+
+bool IRTranslator::lowerSwitchWorkItem(SwitchCG::SwitchWorkListItem W,
+ Value *Cond,
+ MachineBasicBlock *SwitchMBB,
+ MachineBasicBlock *DefaultMBB,
+ MachineIRBuilder &MIB) {
+ using namespace SwitchCG;
+ MachineFunction *CurMF = FuncInfo.MF;
+ MachineBasicBlock *NextMBB = nullptr;
+ MachineFunction::iterator BBI(W.MBB);
+ if (++BBI != FuncInfo.MF->end())
+ NextMBB = &*BBI;
+
+ if (EnableOpts) {
+ // Here, we order cases by probability so the most likely case will be
+ // checked first. However, two clusters can have the same probability in
+ // which case their relative ordering is non-deterministic. So we use Low
+ // as a tie-breaker as clusters are guaranteed to never overlap.
+ llvm::sort(W.FirstCluster, W.LastCluster + 1,
+ [](const CaseCluster &a, const CaseCluster &b) {
+ return a.Prob != b.Prob
+ ? a.Prob > b.Prob
+ : a.Low->getValue().slt(b.Low->getValue());
+ });
+
+ // Rearrange the case blocks so that the last one falls through if possible
+ // without changing the order of probabilities.
+ for (CaseClusterIt I = W.LastCluster; I > W.FirstCluster;) {
+ --I;
+ if (I->Prob > W.LastCluster->Prob)
+ break;
+ if (I->Kind == CC_Range && I->MBB == NextMBB) {
+ std::swap(*I, *W.LastCluster);
+ break;
+ }
+ }
+ }
+
+ // Compute total probability.
+ BranchProbability DefaultProb = W.DefaultProb;
+ BranchProbability UnhandledProbs = DefaultProb;
+ for (CaseClusterIt I = W.FirstCluster; I <= W.LastCluster; ++I)
+ UnhandledProbs += I->Prob;
+
+ MachineBasicBlock *CurMBB = W.MBB;
+ for (CaseClusterIt I = W.FirstCluster, E = W.LastCluster; I <= E; ++I) {
+ bool FallthroughUnreachable = false;
+ MachineBasicBlock *Fallthrough;
+ if (I == W.LastCluster) {
+ // For the last cluster, fall through to the default destination.
+ Fallthrough = DefaultMBB;
+ FallthroughUnreachable = isa<UnreachableInst>(
+ DefaultMBB->getBasicBlock()->getFirstNonPHIOrDbg());
+ } else {
+ Fallthrough = CurMF->CreateMachineBasicBlock(CurMBB->getBasicBlock());
+ CurMF->insert(BBI, Fallthrough);
+ }
+ UnhandledProbs -= I->Prob;
+
+ switch (I->Kind) {
+ case CC_BitTests: {
+ LLVM_DEBUG(dbgs() << "Switch to bit test optimization unimplemented");
+ return false; // Bit tests currently unimplemented.
+ }
+ case CC_JumpTable: {
+ if (!lowerJumpTableWorkItem(W, SwitchMBB, CurMBB, DefaultMBB, MIB, BBI,
+ UnhandledProbs, I, Fallthrough,
+ FallthroughUnreachable)) {
+ LLVM_DEBUG(dbgs() << "Failed to lower jump table");
+ return false;
+ }
+ break;
+ }
+ case CC_Range: {
+ if (!lowerSwitchRangeWorkItem(I, Cond, Fallthrough,
+ FallthroughUnreachable, UnhandledProbs,
+ CurMBB, MIB, SwitchMBB)) {
+ LLVM_DEBUG(dbgs() << "Failed to lower switch range");
+ return false;
+ }
+ break;
+ }
+ }
+ CurMBB = Fallthrough;
+ }
return true;
}
@@ -465,7 +835,7 @@ bool IRTranslator::translateIndirectBr(const User &U,
MachineIRBuilder &MIRBuilder) {
const IndirectBrInst &BrInst = cast<IndirectBrInst>(U);
- const unsigned Tgt = getOrCreateVReg(*BrInst.getAddress());
+ const Register Tgt = getOrCreateVReg(*BrInst.getAddress());
MIRBuilder.buildBrIndirect(Tgt);
// Link successors.
@@ -476,6 +846,14 @@ bool IRTranslator::translateIndirectBr(const User &U,
return true;
}
+static bool isSwiftError(const Value *V) {
+ if (auto Arg = dyn_cast<Argument>(V))
+ return Arg->hasSwiftErrorAttr();
+ if (auto AI = dyn_cast<AllocaInst>(V))
+ return AI->isSwiftError();
+ return false;
+}
+
bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) {
const LoadInst &LI = cast<LoadInst>(U);
@@ -486,13 +864,25 @@ bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) {
if (DL->getTypeStoreSize(LI.getType()) == 0)
return true;
- ArrayRef<unsigned> Regs = getOrCreateVRegs(LI);
+ ArrayRef<Register> Regs = getOrCreateVRegs(LI);
ArrayRef<uint64_t> Offsets = *VMap.getOffsets(LI);
- unsigned Base = getOrCreateVReg(*LI.getPointerOperand());
+ Register Base = getOrCreateVReg(*LI.getPointerOperand());
+
+ Type *OffsetIRTy = DL->getIntPtrType(LI.getPointerOperandType());
+ LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL);
+
+ if (CLI->supportSwiftError() && isSwiftError(LI.getPointerOperand())) {
+ assert(Regs.size() == 1 && "swifterror should be single pointer");
+ Register VReg = SwiftError.getOrCreateVRegUseAt(&LI, &MIRBuilder.getMBB(),
+ LI.getPointerOperand());
+ MIRBuilder.buildCopy(Regs[0], VReg);
+ return true;
+ }
+
for (unsigned i = 0; i < Regs.size(); ++i) {
- unsigned Addr = 0;
- MIRBuilder.materializeGEP(Addr, Base, LLT::scalar(64), Offsets[i] / 8);
+ Register Addr;
+ MIRBuilder.materializeGEP(Addr, Base, OffsetTy, Offsets[i] / 8);
MachinePointerInfo Ptr(LI.getPointerOperand(), Offsets[i] / 8);
unsigned BaseAlign = getMemOpAlignment(LI);
@@ -515,13 +905,25 @@ bool IRTranslator::translateStore(const User &U, MachineIRBuilder &MIRBuilder) {
if (DL->getTypeStoreSize(SI.getValueOperand()->getType()) == 0)
return true;
- ArrayRef<unsigned> Vals = getOrCreateVRegs(*SI.getValueOperand());
+ ArrayRef<Register> Vals = getOrCreateVRegs(*SI.getValueOperand());
ArrayRef<uint64_t> Offsets = *VMap.getOffsets(*SI.getValueOperand());
- unsigned Base = getOrCreateVReg(*SI.getPointerOperand());
+ Register Base = getOrCreateVReg(*SI.getPointerOperand());
+
+ Type *OffsetIRTy = DL->getIntPtrType(SI.getPointerOperandType());
+ LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL);
+
+ if (CLI->supportSwiftError() && isSwiftError(SI.getPointerOperand())) {
+ assert(Vals.size() == 1 && "swifterror should be single pointer");
+
+ Register VReg = SwiftError.getOrCreateVRegDefAt(&SI, &MIRBuilder.getMBB(),
+ SI.getPointerOperand());
+ MIRBuilder.buildCopy(VReg, Vals[0]);
+ return true;
+ }
for (unsigned i = 0; i < Vals.size(); ++i) {
- unsigned Addr = 0;
- MIRBuilder.materializeGEP(Addr, Base, LLT::scalar(64), Offsets[i] / 8);
+ Register Addr;
+ MIRBuilder.materializeGEP(Addr, Base, OffsetTy, Offsets[i] / 8);
MachinePointerInfo Ptr(SI.getPointerOperand(), Offsets[i] / 8);
unsigned BaseAlign = getMemOpAlignment(SI);
@@ -562,10 +964,9 @@ bool IRTranslator::translateExtractValue(const User &U,
MachineIRBuilder &MIRBuilder) {
const Value *Src = U.getOperand(0);
uint64_t Offset = getOffsetFromIndices(U, *DL);
- ArrayRef<unsigned> SrcRegs = getOrCreateVRegs(*Src);
+ ArrayRef<Register> SrcRegs = getOrCreateVRegs(*Src);
ArrayRef<uint64_t> Offsets = *VMap.getOffsets(*Src);
- unsigned Idx = std::lower_bound(Offsets.begin(), Offsets.end(), Offset) -
- Offsets.begin();
+ unsigned Idx = llvm::lower_bound(Offsets, Offset) - Offsets.begin();
auto &DstRegs = allocateVRegs(U);
for (unsigned i = 0; i < DstRegs.size(); ++i)
@@ -580,8 +981,8 @@ bool IRTranslator::translateInsertValue(const User &U,
uint64_t Offset = getOffsetFromIndices(U, *DL);
auto &DstRegs = allocateVRegs(U);
ArrayRef<uint64_t> DstOffsets = *VMap.getOffsets(U);
- ArrayRef<unsigned> SrcRegs = getOrCreateVRegs(*Src);
- ArrayRef<unsigned> InsertedRegs = getOrCreateVRegs(*U.getOperand(1));
+ ArrayRef<Register> SrcRegs = getOrCreateVRegs(*Src);
+ ArrayRef<Register> InsertedRegs = getOrCreateVRegs(*U.getOperand(1));
auto InsertedIt = InsertedRegs.begin();
for (unsigned i = 0; i < DstRegs.size(); ++i) {
@@ -596,19 +997,19 @@ bool IRTranslator::translateInsertValue(const User &U,
bool IRTranslator::translateSelect(const User &U,
MachineIRBuilder &MIRBuilder) {
- unsigned Tst = getOrCreateVReg(*U.getOperand(0));
- ArrayRef<unsigned> ResRegs = getOrCreateVRegs(U);
- ArrayRef<unsigned> Op0Regs = getOrCreateVRegs(*U.getOperand(1));
- ArrayRef<unsigned> Op1Regs = getOrCreateVRegs(*U.getOperand(2));
+ Register Tst = getOrCreateVReg(*U.getOperand(0));
+ ArrayRef<Register> ResRegs = getOrCreateVRegs(U);
+ ArrayRef<Register> Op0Regs = getOrCreateVRegs(*U.getOperand(1));
+ ArrayRef<Register> Op1Regs = getOrCreateVRegs(*U.getOperand(2));
const SelectInst &SI = cast<SelectInst>(U);
- const CmpInst *Cmp = dyn_cast<CmpInst>(SI.getCondition());
+ uint16_t Flags = 0;
+ if (const CmpInst *Cmp = dyn_cast<CmpInst>(SI.getCondition()))
+ Flags = MachineInstr::copyFlagsFromInstruction(*Cmp);
+
for (unsigned i = 0; i < ResRegs.size(); ++i) {
- auto Select =
- MIRBuilder.buildSelect(ResRegs[i], Tst, Op0Regs[i], Op1Regs[i]);
- if (Cmp && isa<FPMathOperator>(Cmp)) {
- Select->copyIRFlags(*Cmp);
- }
+ MIRBuilder.buildInstr(TargetOpcode::G_SELECT, {ResRegs[i]},
+ {Tst, Op0Regs[i], Op1Regs[i]}, Flags);
}
return true;
@@ -619,7 +1020,7 @@ bool IRTranslator::translateBitCast(const User &U,
// If we're bitcasting to the source type, we can reuse the source vreg.
if (getLLTForType(*U.getOperand(0)->getType(), *DL) ==
getLLTForType(*U.getType(), *DL)) {
- unsigned SrcReg = getOrCreateVReg(*U.getOperand(0));
+ Register SrcReg = getOrCreateVReg(*U.getOperand(0));
auto &Regs = *VMap.getVRegs(U);
// If we already assigned a vreg for this bitcast, we can't change that.
// Emit a copy to satisfy the users we already emitted.
@@ -636,9 +1037,9 @@ bool IRTranslator::translateBitCast(const User &U,
bool IRTranslator::translateCast(unsigned Opcode, const User &U,
MachineIRBuilder &MIRBuilder) {
- unsigned Op = getOrCreateVReg(*U.getOperand(0));
- unsigned Res = getOrCreateVReg(U);
- MIRBuilder.buildInstr(Opcode).addDef(Res).addUse(Op);
+ Register Op = getOrCreateVReg(*U.getOperand(0));
+ Register Res = getOrCreateVReg(U);
+ MIRBuilder.buildInstr(Opcode, {Res}, {Op});
return true;
}
@@ -649,7 +1050,7 @@ bool IRTranslator::translateGetElementPtr(const User &U,
return false;
Value &Op0 = *U.getOperand(0);
- unsigned BaseReg = getOrCreateVReg(Op0);
+ Register BaseReg = getOrCreateVReg(Op0);
Type *PtrIRTy = Op0.getType();
LLT PtrTy = getLLTForType(*PtrIRTy, *DL);
Type *OffsetIRTy = DL->getIntPtrType(PtrIRTy);
@@ -674,43 +1075,43 @@ bool IRTranslator::translateGetElementPtr(const User &U,
}
if (Offset != 0) {
- unsigned NewBaseReg = MRI->createGenericVirtualRegister(PtrTy);
- unsigned OffsetReg =
- getOrCreateVReg(*ConstantInt::get(OffsetIRTy, Offset));
- MIRBuilder.buildGEP(NewBaseReg, BaseReg, OffsetReg);
+ Register NewBaseReg = MRI->createGenericVirtualRegister(PtrTy);
+ LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL);
+ auto OffsetMIB = MIRBuilder.buildConstant({OffsetTy}, Offset);
+ MIRBuilder.buildGEP(NewBaseReg, BaseReg, OffsetMIB.getReg(0));
BaseReg = NewBaseReg;
Offset = 0;
}
- unsigned IdxReg = getOrCreateVReg(*Idx);
+ Register IdxReg = getOrCreateVReg(*Idx);
if (MRI->getType(IdxReg) != OffsetTy) {
- unsigned NewIdxReg = MRI->createGenericVirtualRegister(OffsetTy);
+ Register NewIdxReg = MRI->createGenericVirtualRegister(OffsetTy);
MIRBuilder.buildSExtOrTrunc(NewIdxReg, IdxReg);
IdxReg = NewIdxReg;
}
// N = N + Idx * ElementSize;
// Avoid doing it for ElementSize of 1.
- unsigned GepOffsetReg;
+ Register GepOffsetReg;
if (ElementSize != 1) {
- unsigned ElementSizeReg =
- getOrCreateVReg(*ConstantInt::get(OffsetIRTy, ElementSize));
-
GepOffsetReg = MRI->createGenericVirtualRegister(OffsetTy);
- MIRBuilder.buildMul(GepOffsetReg, ElementSizeReg, IdxReg);
+ auto ElementSizeMIB = MIRBuilder.buildConstant(
+ getLLTForType(*OffsetIRTy, *DL), ElementSize);
+ MIRBuilder.buildMul(GepOffsetReg, ElementSizeMIB.getReg(0), IdxReg);
} else
GepOffsetReg = IdxReg;
- unsigned NewBaseReg = MRI->createGenericVirtualRegister(PtrTy);
+ Register NewBaseReg = MRI->createGenericVirtualRegister(PtrTy);
MIRBuilder.buildGEP(NewBaseReg, BaseReg, GepOffsetReg);
BaseReg = NewBaseReg;
}
}
if (Offset != 0) {
- unsigned OffsetReg = getOrCreateVReg(*ConstantInt::get(OffsetIRTy, Offset));
- MIRBuilder.buildGEP(getOrCreateVReg(U), BaseReg, OffsetReg);
+ auto OffsetMIB =
+ MIRBuilder.buildConstant(getLLTForType(*OffsetIRTy, *DL), Offset);
+ MIRBuilder.buildGEP(getOrCreateVReg(U), BaseReg, OffsetMIB.getReg(0));
return true;
}
@@ -721,6 +1122,19 @@ bool IRTranslator::translateGetElementPtr(const User &U,
bool IRTranslator::translateMemfunc(const CallInst &CI,
MachineIRBuilder &MIRBuilder,
unsigned ID) {
+
+ // If the source is undef, then just emit a nop.
+ if (isa<UndefValue>(CI.getArgOperand(1))) {
+ switch (ID) {
+ case Intrinsic::memmove:
+ case Intrinsic::memcpy:
+ case Intrinsic::memset:
+ return true;
+ default:
+ break;
+ }
+ }
+
LLT SizeTy = getLLTForType(*CI.getArgOperand(2)->getType(), *DL);
Type *DstTy = CI.getArgOperand(0)->getType();
if (cast<PointerType>(DstTy)->getAddressSpace() != 0 ||
@@ -752,10 +1166,10 @@ bool IRTranslator::translateMemfunc(const CallInst &CI,
return CLI->lowerCall(MIRBuilder, CI.getCallingConv(),
MachineOperand::CreateES(Callee),
- CallLowering::ArgInfo(0, CI.getType()), Args);
+ CallLowering::ArgInfo({0}, CI.getType()), Args);
}
-void IRTranslator::getStackGuard(unsigned DstReg,
+void IRTranslator::getStackGuard(Register DstReg,
MachineIRBuilder &MIRBuilder) {
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
MRI->setRegClass(DstReg, TRI->getPointerRegClass(*MF));
@@ -778,7 +1192,7 @@ void IRTranslator::getStackGuard(unsigned DstReg,
bool IRTranslator::translateOverflowIntrinsic(const CallInst &CI, unsigned Op,
MachineIRBuilder &MIRBuilder) {
- ArrayRef<unsigned> ResRegs = getOrCreateVRegs(CI);
+ ArrayRef<Register> ResRegs = getOrCreateVRegs(CI);
MIRBuilder.buildInstr(Op)
.addDef(ResRegs[0])
.addDef(ResRegs[1])
@@ -788,19 +1202,123 @@ bool IRTranslator::translateOverflowIntrinsic(const CallInst &CI, unsigned Op,
return true;
}
+unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) {
+ switch (ID) {
+ default:
+ break;
+ case Intrinsic::bswap:
+ return TargetOpcode::G_BSWAP;
+ case Intrinsic::ceil:
+ return TargetOpcode::G_FCEIL;
+ case Intrinsic::cos:
+ return TargetOpcode::G_FCOS;
+ case Intrinsic::ctpop:
+ return TargetOpcode::G_CTPOP;
+ case Intrinsic::exp:
+ return TargetOpcode::G_FEXP;
+ case Intrinsic::exp2:
+ return TargetOpcode::G_FEXP2;
+ case Intrinsic::fabs:
+ return TargetOpcode::G_FABS;
+ case Intrinsic::copysign:
+ return TargetOpcode::G_FCOPYSIGN;
+ case Intrinsic::minnum:
+ return TargetOpcode::G_FMINNUM;
+ case Intrinsic::maxnum:
+ return TargetOpcode::G_FMAXNUM;
+ case Intrinsic::minimum:
+ return TargetOpcode::G_FMINIMUM;
+ case Intrinsic::maximum:
+ return TargetOpcode::G_FMAXIMUM;
+ case Intrinsic::canonicalize:
+ return TargetOpcode::G_FCANONICALIZE;
+ case Intrinsic::floor:
+ return TargetOpcode::G_FFLOOR;
+ case Intrinsic::fma:
+ return TargetOpcode::G_FMA;
+ case Intrinsic::log:
+ return TargetOpcode::G_FLOG;
+ case Intrinsic::log2:
+ return TargetOpcode::G_FLOG2;
+ case Intrinsic::log10:
+ return TargetOpcode::G_FLOG10;
+ case Intrinsic::nearbyint:
+ return TargetOpcode::G_FNEARBYINT;
+ case Intrinsic::pow:
+ return TargetOpcode::G_FPOW;
+ case Intrinsic::rint:
+ return TargetOpcode::G_FRINT;
+ case Intrinsic::round:
+ return TargetOpcode::G_INTRINSIC_ROUND;
+ case Intrinsic::sin:
+ return TargetOpcode::G_FSIN;
+ case Intrinsic::sqrt:
+ return TargetOpcode::G_FSQRT;
+ case Intrinsic::trunc:
+ return TargetOpcode::G_INTRINSIC_TRUNC;
+ }
+ return Intrinsic::not_intrinsic;
+}
+
+bool IRTranslator::translateSimpleIntrinsic(const CallInst &CI,
+ Intrinsic::ID ID,
+ MachineIRBuilder &MIRBuilder) {
+
+ unsigned Op = getSimpleIntrinsicOpcode(ID);
+
+ // Is this a simple intrinsic?
+ if (Op == Intrinsic::not_intrinsic)
+ return false;
+
+ // Yes. Let's translate it.
+ SmallVector<llvm::SrcOp, 4> VRegs;
+ for (auto &Arg : CI.arg_operands())
+ VRegs.push_back(getOrCreateVReg(*Arg));
+
+ MIRBuilder.buildInstr(Op, {getOrCreateVReg(CI)}, VRegs,
+ MachineInstr::copyFlagsFromInstruction(CI));
+ return true;
+}
+
bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
MachineIRBuilder &MIRBuilder) {
+
+ // If this is a simple intrinsic (that is, we just need to add a def of
+ // a vreg, and uses for each arg operand, then translate it.
+ if (translateSimpleIntrinsic(CI, ID, MIRBuilder))
+ return true;
+
switch (ID) {
default:
break;
case Intrinsic::lifetime_start:
- case Intrinsic::lifetime_end:
- // Stack coloring is not enabled in O0 (which we care about now) so we can
- // drop these. Make sure someone notices when we start compiling at higher
- // opts though.
- if (MF->getTarget().getOptLevel() != CodeGenOpt::None)
- return false;
+ case Intrinsic::lifetime_end: {
+ // No stack colouring in O0, discard region information.
+ if (MF->getTarget().getOptLevel() == CodeGenOpt::None)
+ return true;
+
+ unsigned Op = ID == Intrinsic::lifetime_start ? TargetOpcode::LIFETIME_START
+ : TargetOpcode::LIFETIME_END;
+
+ // Get the underlying objects for the location passed on the lifetime
+ // marker.
+ SmallVector<const Value *, 4> Allocas;
+ GetUnderlyingObjects(CI.getArgOperand(1), Allocas, *DL);
+
+ // Iterate over each underlying object, creating lifetime markers for each
+ // static alloca. Quit if we find a non-static alloca.
+ for (const Value *V : Allocas) {
+ const AllocaInst *AI = dyn_cast<AllocaInst>(V);
+ if (!AI)
+ continue;
+
+ if (!AI->isStaticAlloca())
+ return true;
+
+ MIRBuilder.buildInstr(Op).addFrameIndex(getOrCreateFrameIndex(*AI));
+ }
return true;
+ }
case Intrinsic::dbg_declare: {
const DbgDeclareInst &DI = cast<DbgDeclareInst>(CI);
assert(DI.getVariable() && "Missing variable");
@@ -848,10 +1366,11 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
Value *Ptr = CI.getArgOperand(0);
unsigned ListSize = TLI.getVaListSizeInBits(*DL) / 8;
+ // FIXME: Get alignment
MIRBuilder.buildInstr(TargetOpcode::G_VASTART)
.addUse(getOrCreateVReg(*Ptr))
.addMemOperand(MF->getMachineMemOperand(
- MachinePointerInfo(Ptr), MachineMemOperand::MOStore, ListSize, 0));
+ MachinePointerInfo(Ptr), MachineMemOperand::MOStore, ListSize, 1));
return true;
}
case Intrinsic::dbg_value: {
@@ -868,7 +1387,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
} else if (const auto *CI = dyn_cast<Constant>(V)) {
MIRBuilder.buildConstDbgValue(*CI, DI.getVariable(), DI.getExpression());
} else {
- unsigned Reg = getOrCreateVReg(*V);
+ Register Reg = getOrCreateVReg(*V);
// FIXME: This does not handle register-indirect values at offset 0. The
// direct/indirect thing shouldn't really be handled by something as
// implicit as reg+noreg vs reg+imm in the first palce, but it seems
@@ -889,94 +1408,25 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
return translateOverflowIntrinsic(CI, TargetOpcode::G_UMULO, MIRBuilder);
case Intrinsic::smul_with_overflow:
return translateOverflowIntrinsic(CI, TargetOpcode::G_SMULO, MIRBuilder);
- case Intrinsic::pow: {
- auto Pow = MIRBuilder.buildInstr(TargetOpcode::G_FPOW)
- .addDef(getOrCreateVReg(CI))
- .addUse(getOrCreateVReg(*CI.getArgOperand(0)))
- .addUse(getOrCreateVReg(*CI.getArgOperand(1)));
- Pow->copyIRFlags(CI);
- return true;
- }
- case Intrinsic::exp: {
- auto Exp = MIRBuilder.buildInstr(TargetOpcode::G_FEXP)
- .addDef(getOrCreateVReg(CI))
- .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
- Exp->copyIRFlags(CI);
- return true;
- }
- case Intrinsic::exp2: {
- auto Exp2 = MIRBuilder.buildInstr(TargetOpcode::G_FEXP2)
- .addDef(getOrCreateVReg(CI))
- .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
- Exp2->copyIRFlags(CI);
- return true;
- }
- case Intrinsic::log: {
- auto Log = MIRBuilder.buildInstr(TargetOpcode::G_FLOG)
- .addDef(getOrCreateVReg(CI))
- .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
- Log->copyIRFlags(CI);
- return true;
- }
- case Intrinsic::log2: {
- auto Log2 = MIRBuilder.buildInstr(TargetOpcode::G_FLOG2)
- .addDef(getOrCreateVReg(CI))
- .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
- Log2->copyIRFlags(CI);
- return true;
- }
- case Intrinsic::log10: {
- auto Log10 = MIRBuilder.buildInstr(TargetOpcode::G_FLOG10)
- .addDef(getOrCreateVReg(CI))
- .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
- Log10->copyIRFlags(CI);
- return true;
- }
- case Intrinsic::fabs: {
- auto Fabs = MIRBuilder.buildInstr(TargetOpcode::G_FABS)
- .addDef(getOrCreateVReg(CI))
- .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
- Fabs->copyIRFlags(CI);
- return true;
- }
- case Intrinsic::trunc:
- MIRBuilder.buildInstr(TargetOpcode::G_INTRINSIC_TRUNC)
- .addDef(getOrCreateVReg(CI))
- .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
- return true;
- case Intrinsic::round:
- MIRBuilder.buildInstr(TargetOpcode::G_INTRINSIC_ROUND)
- .addDef(getOrCreateVReg(CI))
- .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
- return true;
- case Intrinsic::fma: {
- auto FMA = MIRBuilder.buildInstr(TargetOpcode::G_FMA)
- .addDef(getOrCreateVReg(CI))
- .addUse(getOrCreateVReg(*CI.getArgOperand(0)))
- .addUse(getOrCreateVReg(*CI.getArgOperand(1)))
- .addUse(getOrCreateVReg(*CI.getArgOperand(2)));
- FMA->copyIRFlags(CI);
- return true;
- }
case Intrinsic::fmuladd: {
const TargetMachine &TM = MF->getTarget();
const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering();
- unsigned Dst = getOrCreateVReg(CI);
- unsigned Op0 = getOrCreateVReg(*CI.getArgOperand(0));
- unsigned Op1 = getOrCreateVReg(*CI.getArgOperand(1));
- unsigned Op2 = getOrCreateVReg(*CI.getArgOperand(2));
+ Register Dst = getOrCreateVReg(CI);
+ Register Op0 = getOrCreateVReg(*CI.getArgOperand(0));
+ Register Op1 = getOrCreateVReg(*CI.getArgOperand(1));
+ Register Op2 = getOrCreateVReg(*CI.getArgOperand(2));
if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
TLI.isFMAFasterThanFMulAndFAdd(TLI.getValueType(*DL, CI.getType()))) {
// TODO: Revisit this to see if we should move this part of the
// lowering to the combiner.
- auto FMA = MIRBuilder.buildInstr(TargetOpcode::G_FMA, {Dst}, {Op0, Op1, Op2});
- FMA->copyIRFlags(CI);
+ MIRBuilder.buildInstr(TargetOpcode::G_FMA, {Dst}, {Op0, Op1, Op2},
+ MachineInstr::copyFlagsFromInstruction(CI));
} else {
LLT Ty = getLLTForType(*CI.getType(), *DL);
- auto FMul = MIRBuilder.buildInstr(TargetOpcode::G_FMUL, {Ty}, {Op0, Op1});
- FMul->copyIRFlags(CI);
- auto FAdd = MIRBuilder.buildInstr(TargetOpcode::G_FADD, {Dst}, {FMul, Op2});
- FAdd->copyIRFlags(CI);
+ auto FMul = MIRBuilder.buildInstr(TargetOpcode::G_FMUL, {Ty}, {Op0, Op1},
+ MachineInstr::copyFlagsFromInstruction(CI));
+ MIRBuilder.buildInstr(TargetOpcode::G_FADD, {Dst}, {FMul, Op2},
+ MachineInstr::copyFlagsFromInstruction(CI));
}
return true;
}
@@ -986,7 +1436,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
return translateMemfunc(CI, MIRBuilder, ID);
case Intrinsic::eh_typeid_for: {
GlobalValue *GV = ExtractTypeInfo(CI.getArgOperand(0));
- unsigned Reg = getOrCreateVReg(CI);
+ Register Reg = getOrCreateVReg(CI);
unsigned TypeID = MF->getTypeIDFor(GV);
MIRBuilder.buildConstant(Reg, TypeID);
return true;
@@ -1008,7 +1458,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
return true;
case Intrinsic::stackprotector: {
LLT PtrTy = getLLTForType(*CI.getArgOperand(0)->getType(), *DL);
- unsigned GuardVal = MRI->createGenericVirtualRegister(PtrTy);
+ Register GuardVal = MRI->createGenericVirtualRegister(PtrTy);
getStackGuard(GuardVal, MIRBuilder);
AllocaInst *Slot = cast<AllocaInst>(CI.getArgOperand(1));
@@ -1023,6 +1473,34 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
PtrTy.getSizeInBits() / 8, 8));
return true;
}
+ case Intrinsic::stacksave: {
+ // Save the stack pointer to the location provided by the intrinsic.
+ Register Reg = getOrCreateVReg(CI);
+ Register StackPtr = MF->getSubtarget()
+ .getTargetLowering()
+ ->getStackPointerRegisterToSaveRestore();
+
+ // If the target doesn't specify a stack pointer, then fall back.
+ if (!StackPtr)
+ return false;
+
+ MIRBuilder.buildCopy(Reg, StackPtr);
+ return true;
+ }
+ case Intrinsic::stackrestore: {
+ // Restore the stack pointer from the location provided by the intrinsic.
+ Register Reg = getOrCreateVReg(*CI.getArgOperand(0));
+ Register StackPtr = MF->getSubtarget()
+ .getTargetLowering()
+ ->getStackPointerRegisterToSaveRestore();
+
+ // If the target doesn't specify a stack pointer, then fall back.
+ if (!StackPtr)
+ return false;
+
+ MIRBuilder.buildCopy(StackPtr, Reg);
+ return true;
+ }
case Intrinsic::cttz:
case Intrinsic::ctlz: {
ConstantInt *Cst = cast<ConstantInt>(CI.getArgOperand(1));
@@ -1037,24 +1515,18 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
.addUse(getOrCreateVReg(*CI.getArgOperand(0)));
return true;
}
- case Intrinsic::ctpop: {
- MIRBuilder.buildInstr(TargetOpcode::G_CTPOP)
- .addDef(getOrCreateVReg(CI))
- .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
- return true;
- }
case Intrinsic::invariant_start: {
LLT PtrTy = getLLTForType(*CI.getArgOperand(0)->getType(), *DL);
- unsigned Undef = MRI->createGenericVirtualRegister(PtrTy);
+ Register Undef = MRI->createGenericVirtualRegister(PtrTy);
MIRBuilder.buildUndef(Undef);
return true;
}
case Intrinsic::invariant_end:
return true;
- case Intrinsic::ceil:
- MIRBuilder.buildInstr(TargetOpcode::G_FCEIL)
- .addDef(getOrCreateVReg(CI))
- .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
+ case Intrinsic::assume:
+ case Intrinsic::var_annotation:
+ case Intrinsic::sideeffect:
+ // Discard annotate attributes, assumptions, and artificial side-effects.
return true;
}
return false;
@@ -1079,34 +1551,6 @@ bool IRTranslator::translateInlineAsm(const CallInst &CI,
return true;
}
-unsigned IRTranslator::packRegs(const Value &V,
- MachineIRBuilder &MIRBuilder) {
- ArrayRef<unsigned> Regs = getOrCreateVRegs(V);
- ArrayRef<uint64_t> Offsets = *VMap.getOffsets(V);
- LLT BigTy = getLLTForType(*V.getType(), *DL);
-
- if (Regs.size() == 1)
- return Regs[0];
-
- unsigned Dst = MRI->createGenericVirtualRegister(BigTy);
- MIRBuilder.buildUndef(Dst);
- for (unsigned i = 0; i < Regs.size(); ++i) {
- unsigned NewDst = MRI->createGenericVirtualRegister(BigTy);
- MIRBuilder.buildInsert(NewDst, Dst, Regs[i], Offsets[i]);
- Dst = NewDst;
- }
- return Dst;
-}
-
-void IRTranslator::unpackRegs(const Value &V, unsigned Src,
- MachineIRBuilder &MIRBuilder) {
- ArrayRef<unsigned> Regs = getOrCreateVRegs(V);
- ArrayRef<uint64_t> Offsets = *VMap.getOffsets(V);
-
- for (unsigned i = 0; i < Regs.size(); ++i)
- MIRBuilder.buildExtract(Regs[i], Src, Offsets[i]);
-}
-
bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
const CallInst &CI = cast<CallInst>(U);
auto TII = MF->getTarget().getIntrinsicInfo();
@@ -1126,23 +1570,32 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
ID = static_cast<Intrinsic::ID>(TII->getIntrinsicID(F));
}
- bool IsSplitType = valueIsSplit(CI);
if (!F || !F->isIntrinsic() || ID == Intrinsic::not_intrinsic) {
- unsigned Res = IsSplitType ? MRI->createGenericVirtualRegister(
- getLLTForType(*CI.getType(), *DL))
- : getOrCreateVReg(CI);
-
- SmallVector<unsigned, 8> Args;
- for (auto &Arg: CI.arg_operands())
- Args.push_back(packRegs(*Arg, MIRBuilder));
+ ArrayRef<Register> Res = getOrCreateVRegs(CI);
+
+ SmallVector<ArrayRef<Register>, 8> Args;
+ Register SwiftInVReg = 0;
+ Register SwiftErrorVReg = 0;
+ for (auto &Arg: CI.arg_operands()) {
+ if (CLI->supportSwiftError() && isSwiftError(Arg)) {
+ assert(SwiftInVReg == 0 && "Expected only one swift error argument");
+ LLT Ty = getLLTForType(*Arg->getType(), *DL);
+ SwiftInVReg = MRI->createGenericVirtualRegister(Ty);
+ MIRBuilder.buildCopy(SwiftInVReg, SwiftError.getOrCreateVRegUseAt(
+ &CI, &MIRBuilder.getMBB(), Arg));
+ Args.emplace_back(makeArrayRef(SwiftInVReg));
+ SwiftErrorVReg =
+ SwiftError.getOrCreateVRegDefAt(&CI, &MIRBuilder.getMBB(), Arg);
+ continue;
+ }
+ Args.push_back(getOrCreateVRegs(*Arg));
+ }
MF->getFrameInfo().setHasCalls(true);
- bool Success = CLI->lowerCall(MIRBuilder, &CI, Res, Args, [&]() {
- return getOrCreateVReg(*CI.getCalledValue());
- });
+ bool Success =
+ CLI->lowerCall(MIRBuilder, &CI, Res, Args, SwiftErrorVReg,
+ [&]() { return getOrCreateVReg(*CI.getCalledValue()); });
- if (IsSplitType)
- unpackRegs(CI, Res, MIRBuilder);
return Success;
}
@@ -1151,35 +1604,39 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
if (translateKnownIntrinsic(CI, ID, MIRBuilder))
return true;
- unsigned Res = 0;
- if (!CI.getType()->isVoidTy()) {
- if (IsSplitType)
- Res =
- MRI->createGenericVirtualRegister(getLLTForType(*CI.getType(), *DL));
- else
- Res = getOrCreateVReg(CI);
- }
+ ArrayRef<Register> ResultRegs;
+ if (!CI.getType()->isVoidTy())
+ ResultRegs = getOrCreateVRegs(CI);
+
+ // Ignore the callsite attributes. Backend code is most likely not expecting
+ // an intrinsic to sometimes have side effects and sometimes not.
MachineInstrBuilder MIB =
- MIRBuilder.buildIntrinsic(ID, Res, !CI.doesNotAccessMemory());
+ MIRBuilder.buildIntrinsic(ID, ResultRegs, !F->doesNotAccessMemory());
+ if (isa<FPMathOperator>(CI))
+ MIB->copyIRFlags(CI);
for (auto &Arg : CI.arg_operands()) {
// Some intrinsics take metadata parameters. Reject them.
if (isa<MetadataAsValue>(Arg))
return false;
- MIB.addUse(packRegs(*Arg, MIRBuilder));
+ ArrayRef<Register> VRegs = getOrCreateVRegs(*Arg);
+ if (VRegs.size() > 1)
+ return false;
+ MIB.addUse(VRegs[0]);
}
- if (IsSplitType)
- unpackRegs(CI, Res, MIRBuilder);
-
// Add a MachineMemOperand if it is a target mem intrinsic.
const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering();
TargetLowering::IntrinsicInfo Info;
// TODO: Add a GlobalISel version of getTgtMemIntrinsic.
if (TLI.getTgtMemIntrinsic(Info, CI, *MF, ID)) {
+ unsigned Align = Info.align;
+ if (Align == 0)
+ Align = DL->getABITypeAlignment(Info.memVT.getTypeForEVT(F->getContext()));
+
uint64_t Size = Info.memVT.getStoreSize();
MIB.addMemOperand(MF->getMachineMemOperand(MachinePointerInfo(Info.ptrVal),
- Info.flags, Size, Info.align));
+ Info.flags, Size, Align));
}
return true;
@@ -1215,18 +1672,32 @@ bool IRTranslator::translateInvoke(const User &U,
MCSymbol *BeginSymbol = Context.createTempSymbol();
MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(BeginSymbol);
- unsigned Res =
- MRI->createGenericVirtualRegister(getLLTForType(*I.getType(), *DL));
- SmallVector<unsigned, 8> Args;
- for (auto &Arg: I.arg_operands())
- Args.push_back(packRegs(*Arg, MIRBuilder));
+ ArrayRef<Register> Res;
+ if (!I.getType()->isVoidTy())
+ Res = getOrCreateVRegs(I);
+ SmallVector<ArrayRef<Register>, 8> Args;
+ Register SwiftErrorVReg = 0;
+ Register SwiftInVReg = 0;
+ for (auto &Arg : I.arg_operands()) {
+ if (CLI->supportSwiftError() && isSwiftError(Arg)) {
+ assert(SwiftInVReg == 0 && "Expected only one swift error argument");
+ LLT Ty = getLLTForType(*Arg->getType(), *DL);
+ SwiftInVReg = MRI->createGenericVirtualRegister(Ty);
+ MIRBuilder.buildCopy(SwiftInVReg, SwiftError.getOrCreateVRegUseAt(
+ &I, &MIRBuilder.getMBB(), Arg));
+ Args.push_back(makeArrayRef(SwiftInVReg));
+ SwiftErrorVReg =
+ SwiftError.getOrCreateVRegDefAt(&I, &MIRBuilder.getMBB(), Arg);
+ continue;
+ }
+
+ Args.push_back(getOrCreateVRegs(*Arg));
+ }
- if (!CLI->lowerCall(MIRBuilder, &I, Res, Args,
+ if (!CLI->lowerCall(MIRBuilder, &I, Res, Args, SwiftErrorVReg,
[&]() { return getOrCreateVReg(*I.getCalledValue()); }))
return false;
- unpackRegs(I, Res, MIRBuilder);
-
MCSymbol *EndSymbol = Context.createTempSymbol();
MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(EndSymbol);
@@ -1241,6 +1712,12 @@ bool IRTranslator::translateInvoke(const User &U,
return true;
}
+bool IRTranslator::translateCallBr(const User &U,
+ MachineIRBuilder &MIRBuilder) {
+ // FIXME: Implement this.
+ return false;
+}
+
bool IRTranslator::translateLandingPad(const User &U,
MachineIRBuilder &MIRBuilder) {
const LandingPadInst &LP = cast<LandingPadInst>(U);
@@ -1270,7 +1747,7 @@ bool IRTranslator::translateLandingPad(const User &U,
.addSym(MF->addLandingPad(&MBB));
LLT Ty = getLLTForType(*LP.getType(), *DL);
- unsigned Undef = MRI->createGenericVirtualRegister(Ty);
+ Register Undef = MRI->createGenericVirtualRegister(Ty);
MIRBuilder.buildUndef(Undef);
SmallVector<LLT, 2> Tys;
@@ -1279,20 +1756,20 @@ bool IRTranslator::translateLandingPad(const User &U,
assert(Tys.size() == 2 && "Only two-valued landingpads are supported");
// Mark exception register as live in.
- unsigned ExceptionReg = TLI.getExceptionPointerRegister(PersonalityFn);
+ Register ExceptionReg = TLI.getExceptionPointerRegister(PersonalityFn);
if (!ExceptionReg)
return false;
MBB.addLiveIn(ExceptionReg);
- ArrayRef<unsigned> ResRegs = getOrCreateVRegs(LP);
+ ArrayRef<Register> ResRegs = getOrCreateVRegs(LP);
MIRBuilder.buildCopy(ResRegs[0], ExceptionReg);
- unsigned SelectorReg = TLI.getExceptionSelectorRegister(PersonalityFn);
+ Register SelectorReg = TLI.getExceptionSelectorRegister(PersonalityFn);
if (!SelectorReg)
return false;
MBB.addLiveIn(SelectorReg);
- unsigned PtrVReg = MRI->createGenericVirtualRegister(Tys[0]);
+ Register PtrVReg = MRI->createGenericVirtualRegister(Tys[0]);
MIRBuilder.buildCopy(PtrVReg, SelectorReg);
MIRBuilder.buildCast(ResRegs[1], PtrVReg);
@@ -1304,10 +1781,10 @@ bool IRTranslator::translateAlloca(const User &U,
auto &AI = cast<AllocaInst>(U);
if (AI.isSwiftError())
- return false;
+ return true;
if (AI.isStaticAlloca()) {
- unsigned Res = getOrCreateVReg(AI);
+ Register Res = getOrCreateVReg(AI);
int FI = getOrCreateFrameIndex(AI);
MIRBuilder.buildFrameIndex(Res, FI);
return true;
@@ -1322,29 +1799,29 @@ bool IRTranslator::translateAlloca(const User &U,
unsigned Align =
std::max((unsigned)DL->getPrefTypeAlignment(Ty), AI.getAlignment());
- unsigned NumElts = getOrCreateVReg(*AI.getArraySize());
+ Register NumElts = getOrCreateVReg(*AI.getArraySize());
Type *IntPtrIRTy = DL->getIntPtrType(AI.getType());
LLT IntPtrTy = getLLTForType(*IntPtrIRTy, *DL);
if (MRI->getType(NumElts) != IntPtrTy) {
- unsigned ExtElts = MRI->createGenericVirtualRegister(IntPtrTy);
+ Register ExtElts = MRI->createGenericVirtualRegister(IntPtrTy);
MIRBuilder.buildZExtOrTrunc(ExtElts, NumElts);
NumElts = ExtElts;
}
- unsigned AllocSize = MRI->createGenericVirtualRegister(IntPtrTy);
- unsigned TySize =
+ Register AllocSize = MRI->createGenericVirtualRegister(IntPtrTy);
+ Register TySize =
getOrCreateVReg(*ConstantInt::get(IntPtrIRTy, -DL->getTypeAllocSize(Ty)));
MIRBuilder.buildMul(AllocSize, NumElts, TySize);
LLT PtrTy = getLLTForType(*AI.getType(), *DL);
auto &TLI = *MF->getSubtarget().getTargetLowering();
- unsigned SPReg = TLI.getStackPointerRegisterToSaveRestore();
+ Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
- unsigned SPTmp = MRI->createGenericVirtualRegister(PtrTy);
+ Register SPTmp = MRI->createGenericVirtualRegister(PtrTy);
MIRBuilder.buildCopy(SPTmp, SPReg);
- unsigned AllocTmp = MRI->createGenericVirtualRegister(PtrTy);
+ Register AllocTmp = MRI->createGenericVirtualRegister(PtrTy);
MIRBuilder.buildGEP(AllocTmp, SPTmp, AllocSize);
// Handle alignment. We have to realign if the allocation granule was smaller
@@ -1357,7 +1834,7 @@ bool IRTranslator::translateAlloca(const User &U,
// Round the size of the allocation up to the stack alignment size
// by add SA-1 to the size. This doesn't overflow because we're computing
// an address inside an alloca.
- unsigned AlignedAlloc = MRI->createGenericVirtualRegister(PtrTy);
+ Register AlignedAlloc = MRI->createGenericVirtualRegister(PtrTy);
MIRBuilder.buildPtrMask(AlignedAlloc, AllocTmp, Log2_32(Align));
AllocTmp = AlignedAlloc;
}
@@ -1387,7 +1864,7 @@ bool IRTranslator::translateInsertElement(const User &U,
// If it is a <1 x Ty> vector, use the scalar as it is
// not a legal vector type in LLT.
if (U.getType()->getVectorNumElements() == 1) {
- unsigned Elt = getOrCreateVReg(*U.getOperand(1));
+ Register Elt = getOrCreateVReg(*U.getOperand(1));
auto &Regs = *VMap.getVRegs(U);
if (Regs.empty()) {
Regs.push_back(Elt);
@@ -1398,10 +1875,10 @@ bool IRTranslator::translateInsertElement(const User &U,
return true;
}
- unsigned Res = getOrCreateVReg(U);
- unsigned Val = getOrCreateVReg(*U.getOperand(0));
- unsigned Elt = getOrCreateVReg(*U.getOperand(1));
- unsigned Idx = getOrCreateVReg(*U.getOperand(2));
+ Register Res = getOrCreateVReg(U);
+ Register Val = getOrCreateVReg(*U.getOperand(0));
+ Register Elt = getOrCreateVReg(*U.getOperand(1));
+ Register Idx = getOrCreateVReg(*U.getOperand(2));
MIRBuilder.buildInsertVectorElement(Res, Val, Elt, Idx);
return true;
}
@@ -1411,7 +1888,7 @@ bool IRTranslator::translateExtractElement(const User &U,
// If it is a <1 x Ty> vector, use the scalar as it is
// not a legal vector type in LLT.
if (U.getOperand(0)->getType()->getVectorNumElements() == 1) {
- unsigned Elt = getOrCreateVReg(*U.getOperand(0));
+ Register Elt = getOrCreateVReg(*U.getOperand(0));
auto &Regs = *VMap.getVRegs(U);
if (Regs.empty()) {
Regs.push_back(Elt);
@@ -1421,11 +1898,11 @@ bool IRTranslator::translateExtractElement(const User &U,
}
return true;
}
- unsigned Res = getOrCreateVReg(U);
- unsigned Val = getOrCreateVReg(*U.getOperand(0));
+ Register Res = getOrCreateVReg(U);
+ Register Val = getOrCreateVReg(*U.getOperand(0));
const auto &TLI = *MF->getSubtarget().getTargetLowering();
unsigned PreferredVecIdxWidth = TLI.getVectorIdxTy(*DL).getSizeInBits();
- unsigned Idx = 0;
+ Register Idx;
if (auto *CI = dyn_cast<ConstantInt>(U.getOperand(1))) {
if (CI->getBitWidth() != PreferredVecIdxWidth) {
APInt NewIdx = CI->getValue().sextOrTrunc(PreferredVecIdxWidth);
@@ -1481,11 +1958,11 @@ bool IRTranslator::translateAtomicCmpXchg(const User &U,
Type *ValType = ResType->Type::getStructElementType(0);
auto Res = getOrCreateVRegs(I);
- unsigned OldValRes = Res[0];
- unsigned SuccessRes = Res[1];
- unsigned Addr = getOrCreateVReg(*I.getPointerOperand());
- unsigned Cmp = getOrCreateVReg(*I.getCompareOperand());
- unsigned NewVal = getOrCreateVReg(*I.getNewValOperand());
+ Register OldValRes = Res[0];
+ Register SuccessRes = Res[1];
+ Register Addr = getOrCreateVReg(*I.getPointerOperand());
+ Register Cmp = getOrCreateVReg(*I.getCompareOperand());
+ Register NewVal = getOrCreateVReg(*I.getNewValOperand());
MIRBuilder.buildAtomicCmpXchgWithSuccess(
OldValRes, SuccessRes, Addr, Cmp, NewVal,
@@ -1507,9 +1984,9 @@ bool IRTranslator::translateAtomicRMW(const User &U,
Type *ResType = I.getType();
- unsigned Res = getOrCreateVReg(I);
- unsigned Addr = getOrCreateVReg(*I.getPointerOperand());
- unsigned Val = getOrCreateVReg(*I.getValOperand());
+ Register Res = getOrCreateVReg(I);
+ Register Addr = getOrCreateVReg(*I.getPointerOperand());
+ Register Val = getOrCreateVReg(*I.getValOperand());
unsigned Opcode = 0;
switch (I.getOperation()) {
@@ -1560,6 +2037,14 @@ bool IRTranslator::translateAtomicRMW(const User &U,
return true;
}
+bool IRTranslator::translateFence(const User &U,
+ MachineIRBuilder &MIRBuilder) {
+ const FenceInst &Fence = cast<FenceInst>(U);
+ MIRBuilder.buildFence(static_cast<unsigned>(Fence.getOrdering()),
+ Fence.getSyncScopeID());
+ return true;
+}
+
void IRTranslator::finishPendingPhis() {
#ifndef NDEBUG
DILocationVerifier Verifier;
@@ -1569,27 +2054,20 @@ void IRTranslator::finishPendingPhis() {
for (auto &Phi : PendingPHIs) {
const PHINode *PI = Phi.first;
ArrayRef<MachineInstr *> ComponentPHIs = Phi.second;
+ MachineBasicBlock *PhiMBB = ComponentPHIs[0]->getParent();
EntryBuilder->setDebugLoc(PI->getDebugLoc());
#ifndef NDEBUG
Verifier.setCurrentInst(PI);
#endif // ifndef NDEBUG
- // All MachineBasicBlocks exist, add them to the PHI. We assume IRTranslator
- // won't create extra control flow here, otherwise we need to find the
- // dominating predecessor here (or perhaps force the weirder IRTranslators
- // to provide a simple boundary).
- SmallSet<const BasicBlock *, 4> HandledPreds;
-
+ SmallSet<const MachineBasicBlock *, 16> SeenPreds;
for (unsigned i = 0; i < PI->getNumIncomingValues(); ++i) {
auto IRPred = PI->getIncomingBlock(i);
- if (HandledPreds.count(IRPred))
- continue;
-
- HandledPreds.insert(IRPred);
- ArrayRef<unsigned> ValRegs = getOrCreateVRegs(*PI->getIncomingValue(i));
+ ArrayRef<Register> ValRegs = getOrCreateVRegs(*PI->getIncomingValue(i));
for (auto Pred : getMachinePredBBs({IRPred, PI->getParent()})) {
- assert(Pred->isSuccessor(ComponentPHIs[0]->getParent()) &&
- "incorrect CFG at MachineBasicBlock level");
+ if (SeenPreds.count(Pred) || !PhiMBB->isPredecessor(Pred))
+ continue;
+ SeenPreds.insert(Pred);
for (unsigned j = 0; j < ValRegs.size(); ++j) {
MachineInstrBuilder MIB(*MF, ComponentPHIs[j]);
MIB.addUse(ValRegs[j]);
@@ -1611,8 +2089,15 @@ bool IRTranslator::valueIsSplit(const Value &V,
bool IRTranslator::translate(const Instruction &Inst) {
CurBuilder->setDebugLoc(Inst.getDebugLoc());
- EntryBuilder->setDebugLoc(Inst.getDebugLoc());
- switch(Inst.getOpcode()) {
+ // We only emit constants into the entry block from here. To prevent jumpy
+ // debug behaviour set the line to 0.
+ if (const DebugLoc &DL = Inst.getDebugLoc())
+ EntryBuilder->setDebugLoc(
+ DebugLoc::get(0, 0, DL.getScope(), DL.getInlinedAt()));
+ else
+ EntryBuilder->setDebugLoc(DebugLoc());
+
+ switch (Inst.getOpcode()) {
#define HANDLE_INST(NUM, OPCODE, CLASS) \
case Instruction::OPCODE: \
return translate##OPCODE(Inst, *CurBuilder.get());
@@ -1622,7 +2107,7 @@ bool IRTranslator::translate(const Instruction &Inst) {
}
}
-bool IRTranslator::translate(const Constant &C, unsigned Reg) {
+bool IRTranslator::translate(const Constant &C, Register Reg) {
if (auto CI = dyn_cast<ConstantInt>(&C))
EntryBuilder->buildConstant(Reg, *CI);
else if (auto CF = dyn_cast<ConstantFP>(&C))
@@ -1635,7 +2120,7 @@ bool IRTranslator::translate(const Constant &C, unsigned Reg) {
unsigned NullSize = DL->getTypeSizeInBits(C.getType());
auto *ZeroTy = Type::getIntNTy(C.getContext(), NullSize);
auto *ZeroVal = ConstantInt::get(ZeroTy, 0);
- unsigned ZeroReg = getOrCreateVReg(*ZeroVal);
+ Register ZeroReg = getOrCreateVReg(*ZeroVal);
EntryBuilder->buildCast(Reg, ZeroReg);
} else if (auto GV = dyn_cast<GlobalValue>(&C))
EntryBuilder->buildGlobalValue(Reg, GV);
@@ -1645,7 +2130,7 @@ bool IRTranslator::translate(const Constant &C, unsigned Reg) {
// Return the scalar if it is a <1 x Ty> vector.
if (CAZ->getNumElements() == 1)
return translate(*CAZ->getElementValue(0u), Reg);
- SmallVector<unsigned, 4> Ops;
+ SmallVector<Register, 4> Ops;
for (unsigned i = 0; i < CAZ->getNumElements(); ++i) {
Constant &Elt = *CAZ->getElementValue(i);
Ops.push_back(getOrCreateVReg(Elt));
@@ -1655,7 +2140,7 @@ bool IRTranslator::translate(const Constant &C, unsigned Reg) {
// Return the scalar if it is a <1 x Ty> vector.
if (CV->getNumElements() == 1)
return translate(*CV->getElementAsConstant(0), Reg);
- SmallVector<unsigned, 4> Ops;
+ SmallVector<Register, 4> Ops;
for (unsigned i = 0; i < CV->getNumElements(); ++i) {
Constant &Elt = *CV->getElementAsConstant(i);
Ops.push_back(getOrCreateVReg(Elt));
@@ -1673,7 +2158,7 @@ bool IRTranslator::translate(const Constant &C, unsigned Reg) {
} else if (auto CV = dyn_cast<ConstantVector>(&C)) {
if (CV->getNumOperands() == 1)
return translate(*CV->getOperand(0), Reg);
- SmallVector<unsigned, 4> Ops;
+ SmallVector<Register, 4> Ops;
for (unsigned i = 0; i < CV->getNumOperands(); ++i) {
Ops.push_back(getOrCreateVReg(*CV->getOperand(i)));
}
@@ -1686,6 +2171,17 @@ bool IRTranslator::translate(const Constant &C, unsigned Reg) {
return true;
}
+void IRTranslator::finalizeBasicBlock() {
+ for (auto &JTCase : SL->JTCases) {
+ // Emit header first, if it wasn't already emitted.
+ if (!JTCase.first.Emitted)
+ emitJumpTableHeader(JTCase.second, JTCase.first, JTCase.first.HeaderBB);
+
+ emitJumpTable(JTCase.second, JTCase.second.MBB);
+ }
+ SL->JTCases.clear();
+}
+
void IRTranslator::finalizeFunction() {
// Release the memory used by the different maps we
// needed during the translation.
@@ -1698,6 +2194,7 @@ void IRTranslator::finalizeFunction() {
// destroying it twice (in ~IRTranslator() and ~LLVMContext())
EntryBuilder.reset();
CurBuilder.reset();
+ FuncInfo.clear();
}
bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
@@ -1710,13 +2207,13 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
// Set the CSEConfig and run the analysis.
GISelCSEInfo *CSEInfo = nullptr;
TPC = &getAnalysis<TargetPassConfig>();
- bool IsO0 = TPC->getOptLevel() == CodeGenOpt::Level::None;
- // Disable CSE for O0.
- bool EnableCSE = !IsO0 && EnableCSEInIRTranslator;
+ bool EnableCSE = EnableCSEInIRTranslator.getNumOccurrences()
+ ? EnableCSEInIRTranslator
+ : TPC->isGISelCSEEnabled();
+
if (EnableCSE) {
EntryBuilder = make_unique<CSEMIRBuilder>(CurMF);
- std::unique_ptr<CSEConfig> Config = make_unique<CSEConfig>();
- CSEInfo = &Wrapper.get(std::move(Config));
+ CSEInfo = &Wrapper.get(TPC->getCSEConfig());
EntryBuilder->setCSEInfo(CSEInfo);
CurBuilder = make_unique<CSEMIRBuilder>(CurMF);
CurBuilder->setCSEInfo(CSEInfo);
@@ -1730,6 +2227,14 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
MRI = &MF->getRegInfo();
DL = &F.getParent()->getDataLayout();
ORE = llvm::make_unique<OptimizationRemarkEmitter>(&F);
+ FuncInfo.MF = MF;
+ FuncInfo.BPI = nullptr;
+ const auto &TLI = *MF->getSubtarget().getTargetLowering();
+ const TargetMachine &TM = MF->getTarget();
+ SL = make_unique<GISelSwitchLowering>(this, FuncInfo);
+ SL->init(TLI, TM, *DL);
+
+ EnableOpts = TM.getOptLevel() != CodeGenOpt::None && !skipFunction(F);
assert(PendingPHIs.empty() && "stale PHIs");
@@ -1749,6 +2254,10 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
MF->push_back(EntryBB);
EntryBuilder->setMBB(*EntryBB);
+ DebugLoc DbgLoc = F.getEntryBlock().getFirstNonPHI()->getDebugLoc();
+ SwiftError.setFunction(CurMF);
+ SwiftError.createEntriesInEntryBlock(DbgLoc);
+
// Create all blocks, in IR order, to preserve the layout.
for (const BasicBlock &BB: F) {
auto *&MBB = BBToMBB[&BB];
@@ -1764,20 +2273,25 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
EntryBB->addSuccessor(&getMBB(F.front()));
// Lower the actual args into this basic block.
- SmallVector<unsigned, 8> VRegArgs;
+ SmallVector<ArrayRef<Register>, 8> VRegArgs;
for (const Argument &Arg: F.args()) {
if (DL->getTypeStoreSize(Arg.getType()) == 0)
continue; // Don't handle zero sized types.
- VRegArgs.push_back(
- MRI->createGenericVirtualRegister(getLLTForType(*Arg.getType(), *DL)));
+ ArrayRef<Register> VRegs = getOrCreateVRegs(Arg);
+ VRegArgs.push_back(VRegs);
+
+ if (Arg.hasSwiftErrorAttr()) {
+ assert(VRegs.size() == 1 && "Too many vregs for Swift error");
+ SwiftError.setCurrentVReg(EntryBB, SwiftError.getFunctionArg(), VRegs[0]);
+ }
}
// We don't currently support translating swifterror or swiftself functions.
for (auto &Arg : F.args()) {
- if (Arg.hasSwiftErrorAttr() || Arg.hasSwiftSelfAttr()) {
+ if (Arg.hasSwiftSelfAttr()) {
OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
F.getSubprogram(), &F.getEntryBlock());
- R << "unable to lower arguments due to swifterror/swiftself: "
+ R << "unable to lower arguments due to swiftself: "
<< ore::NV("Prototype", F.getType());
reportTranslationError(*MF, *TPC, *ORE, R);
return false;
@@ -1792,20 +2306,6 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
return false;
}
- auto ArgIt = F.arg_begin();
- for (auto &VArg : VRegArgs) {
- // If the argument is an unsplit scalar then don't use unpackRegs to avoid
- // creating redundant copies.
- if (!valueIsSplit(*ArgIt, VMap.getOffsets(*ArgIt))) {
- auto &VRegs = *VMap.getVRegs(cast<Value>(*ArgIt));
- assert(VRegs.empty() && "VRegs already populated?");
- VRegs.push_back(VArg);
- } else {
- unpackRegs(*ArgIt, VArg, *EntryBuilder.get());
- }
- ArgIt++;
- }
-
// Need to visit defs before uses when translating instructions.
GISelObserverWrapper WrapperObserver;
if (EnableCSE && CSEInfo)
@@ -1845,6 +2345,8 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
reportTranslationError(*MF, *TPC, *ORE, R);
return false;
}
+
+ finalizeBasicBlock();
}
#ifndef NDEBUG
WrapperObserver.removeObserver(&Verifier);
@@ -1853,6 +2355,8 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
finishPendingPhis();
+ SwiftError.propagateVRegs();
+
// Merge the argument lowering and constants block with its single
// successor, the LLVM-IR entry block. We want the basic block to
// be maximal.
diff --git a/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/lib/CodeGen/GlobalISel/InstructionSelect.cpp
index c83c791327e4..70694fe6b6c8 100644
--- a/lib/CodeGen/GlobalISel/InstructionSelect.cpp
+++ b/lib/CodeGen/GlobalISel/InstructionSelect.cpp
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/GlobalISel/InstructionSelect.cpp - InstructionSelect ---==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -50,9 +49,7 @@ INITIALIZE_PASS_END(InstructionSelect, DEBUG_TYPE,
"Select target instructions out of generic instructions",
false, false)
-InstructionSelect::InstructionSelect() : MachineFunctionPass(ID) {
- initializeInstructionSelectPass(*PassRegistry::getPassRegistry());
-}
+InstructionSelect::InstructionSelect() : MachineFunctionPass(ID) { }
void InstructionSelect::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<TargetPassConfig>();
@@ -90,10 +87,10 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
"instruction is not legal", *MI);
return false;
}
-#endif
// FIXME: We could introduce new blocks and will need to fix the outer loop.
// Until then, keep track of the number of blocks to assert that we don't.
const size_t NumBlocks = MF.size();
+#endif
for (MachineBasicBlock *MBB : post_order(&MF)) {
if (MBB->empty())
@@ -145,8 +142,6 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
}
}
- const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
-
for (MachineBasicBlock &MBB : MF) {
if (MBB.empty())
continue;
@@ -178,6 +173,8 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
}
}
+#ifndef NDEBUG
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
// Now that selection is complete, there are no more generic vregs. Verify
// that the size of the now-constrained vreg is unchanged and that it has a
// register class.
@@ -216,7 +213,7 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
reportGISelFailure(MF, TPC, MORE, R);
return false;
}
-
+#endif
auto &TLI = *MF.getSubtarget().getTargetLowering();
TLI.finalizeLowering(MF);
diff --git a/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/lib/CodeGen/GlobalISel/InstructionSelector.cpp
index 38913e4afcba..2ad35b3a72c9 100644
--- a/lib/CodeGen/GlobalISel/InstructionSelector.cpp
+++ b/lib/CodeGen/GlobalISel/InstructionSelector.cpp
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/GlobalISel/InstructionSelector.cpp --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -42,16 +41,16 @@ bool InstructionSelector::constrainOperandRegToRegClass(
MachineFunction &MF = *MBB.getParent();
MachineRegisterInfo &MRI = MF.getRegInfo();
- return
- constrainRegToClass(MRI, TII, RBI, I, I.getOperand(OpIdx).getReg(), RC);
+ return constrainOperandRegClass(MF, TRI, MRI, TII, RBI, I, RC,
+ I.getOperand(OpIdx), OpIdx);
}
bool InstructionSelector::isOperandImmEqual(
const MachineOperand &MO, int64_t Value,
const MachineRegisterInfo &MRI) const {
if (MO.isReg() && MO.getReg())
- if (auto VRegVal = getConstantVRegVal(MO.getReg(), MRI))
- return *VRegVal == Value;
+ if (auto VRegVal = getConstantVRegValWithLookThrough(MO.getReg(), MRI))
+ return VRegVal->Value == Value;
return false;
}
@@ -79,6 +78,6 @@ bool InstructionSelector::isObviouslySafeToFold(MachineInstr &MI,
std::next(MI.getIterator()) == IntoMI.getIterator())
return true;
- return !MI.mayLoadOrStore() && !MI.hasUnmodeledSideEffects() &&
- empty(MI.implicit_operands());
+ return !MI.mayLoadOrStore() && !MI.mayRaiseFPException() &&
+ !MI.hasUnmodeledSideEffects() && empty(MI.implicit_operands());
}
diff --git a/lib/CodeGen/GlobalISel/LegalityPredicates.cpp b/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
index 94eab9ae00c8..601d50e9806f 100644
--- a/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
+++ b/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
@@ -1,9 +1,8 @@
//===- lib/CodeGen/GlobalISel/LegalizerPredicates.cpp - Predicates --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -39,15 +38,19 @@ LegalityPredicate LegalityPredicates::typePairInSet(
};
}
-LegalityPredicate LegalityPredicates::typePairAndMemSizeInSet(
+LegalityPredicate LegalityPredicates::typePairAndMemDescInSet(
unsigned TypeIdx0, unsigned TypeIdx1, unsigned MMOIdx,
- std::initializer_list<TypePairAndMemSize> TypesAndMemSizeInit) {
- SmallVector<TypePairAndMemSize, 4> TypesAndMemSize = TypesAndMemSizeInit;
+ std::initializer_list<TypePairAndMemDesc> TypesAndMemDescInit) {
+ SmallVector<TypePairAndMemDesc, 4> TypesAndMemDesc = TypesAndMemDescInit;
return [=](const LegalityQuery &Query) {
- TypePairAndMemSize Match = {Query.Types[TypeIdx0], Query.Types[TypeIdx1],
- Query.MMODescrs[MMOIdx].SizeInBits};
- return std::find(TypesAndMemSize.begin(), TypesAndMemSize.end(), Match) !=
- TypesAndMemSize.end();
+ TypePairAndMemDesc Match = {Query.Types[TypeIdx0], Query.Types[TypeIdx1],
+ Query.MMODescrs[MMOIdx].SizeInBits,
+ Query.MMODescrs[MMOIdx].AlignInBits};
+ return std::find_if(
+ TypesAndMemDesc.begin(), TypesAndMemDesc.end(),
+ [=](const TypePairAndMemDesc &Entry) ->bool {
+ return Match.isCompatible(Entry);
+ }) != TypesAndMemDesc.end();
};
}
@@ -57,10 +60,30 @@ LegalityPredicate LegalityPredicates::isScalar(unsigned TypeIdx) {
};
}
+LegalityPredicate LegalityPredicates::isVector(unsigned TypeIdx) {
+ return [=](const LegalityQuery &Query) {
+ return Query.Types[TypeIdx].isVector();
+ };
+}
+
+LegalityPredicate LegalityPredicates::isPointer(unsigned TypeIdx) {
+ return [=](const LegalityQuery &Query) {
+ return Query.Types[TypeIdx].isPointer();
+ };
+}
+
+LegalityPredicate LegalityPredicates::isPointer(unsigned TypeIdx,
+ unsigned AddrSpace) {
+ return [=](const LegalityQuery &Query) {
+ LLT Ty = Query.Types[TypeIdx];
+ return Ty.isPointer() && Ty.getAddressSpace() == AddrSpace;
+ };
+}
+
LegalityPredicate LegalityPredicates::narrowerThan(unsigned TypeIdx,
unsigned Size) {
return [=](const LegalityQuery &Query) {
- const LLT &QueryTy = Query.Types[TypeIdx];
+ const LLT QueryTy = Query.Types[TypeIdx];
return QueryTy.isScalar() && QueryTy.getSizeInBits() < Size;
};
}
@@ -68,18 +91,49 @@ LegalityPredicate LegalityPredicates::narrowerThan(unsigned TypeIdx,
LegalityPredicate LegalityPredicates::widerThan(unsigned TypeIdx,
unsigned Size) {
return [=](const LegalityQuery &Query) {
- const LLT &QueryTy = Query.Types[TypeIdx];
+ const LLT QueryTy = Query.Types[TypeIdx];
return QueryTy.isScalar() && QueryTy.getSizeInBits() > Size;
};
}
+LegalityPredicate LegalityPredicates::scalarOrEltNarrowerThan(unsigned TypeIdx,
+ unsigned Size) {
+ return [=](const LegalityQuery &Query) {
+ const LLT QueryTy = Query.Types[TypeIdx];
+ return QueryTy.getScalarSizeInBits() < Size;
+ };
+}
+
+LegalityPredicate LegalityPredicates::scalarOrEltWiderThan(unsigned TypeIdx,
+ unsigned Size) {
+ return [=](const LegalityQuery &Query) {
+ const LLT QueryTy = Query.Types[TypeIdx];
+ return QueryTy.getScalarSizeInBits() > Size;
+ };
+}
+
+LegalityPredicate LegalityPredicates::scalarOrEltSizeNotPow2(unsigned TypeIdx) {
+ return [=](const LegalityQuery &Query) {
+ const LLT QueryTy = Query.Types[TypeIdx];
+ return !isPowerOf2_32(QueryTy.getScalarSizeInBits());
+ };
+}
+
LegalityPredicate LegalityPredicates::sizeNotPow2(unsigned TypeIdx) {
return [=](const LegalityQuery &Query) {
- const LLT &QueryTy = Query.Types[TypeIdx];
+ const LLT QueryTy = Query.Types[TypeIdx];
return QueryTy.isScalar() && !isPowerOf2_32(QueryTy.getSizeInBits());
};
}
+LegalityPredicate LegalityPredicates::sameSize(unsigned TypeIdx0,
+ unsigned TypeIdx1) {
+ return [=](const LegalityQuery &Query) {
+ return Query.Types[TypeIdx0].getSizeInBits() ==
+ Query.Types[TypeIdx1].getSizeInBits();
+ };
+}
+
LegalityPredicate LegalityPredicates::memSizeInBytesNotPow2(unsigned MMOIdx) {
return [=](const LegalityQuery &Query) {
return !isPowerOf2_32(Query.MMODescrs[MMOIdx].SizeInBits / 8);
@@ -88,8 +142,8 @@ LegalityPredicate LegalityPredicates::memSizeInBytesNotPow2(unsigned MMOIdx) {
LegalityPredicate LegalityPredicates::numElementsNotPow2(unsigned TypeIdx) {
return [=](const LegalityQuery &Query) {
- const LLT &QueryTy = Query.Types[TypeIdx];
- return QueryTy.isVector() && isPowerOf2_32(QueryTy.getNumElements());
+ const LLT QueryTy = Query.Types[TypeIdx];
+ return QueryTy.isVector() && !isPowerOf2_32(QueryTy.getNumElements());
};
}
diff --git a/lib/CodeGen/GlobalISel/LegalizeMutations.cpp b/lib/CodeGen/GlobalISel/LegalizeMutations.cpp
index a29b32ecdc03..fcbecf90a845 100644
--- a/lib/CodeGen/GlobalISel/LegalizeMutations.cpp
+++ b/lib/CodeGen/GlobalISel/LegalizeMutations.cpp
@@ -1,9 +1,8 @@
//===- lib/CodeGen/GlobalISel/LegalizerMutations.cpp - Mutations ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -27,25 +26,46 @@ LegalizeMutation LegalizeMutations::changeTo(unsigned TypeIdx,
};
}
-LegalizeMutation LegalizeMutations::widenScalarToNextPow2(unsigned TypeIdx,
- unsigned Min) {
+LegalizeMutation LegalizeMutations::changeElementTo(unsigned TypeIdx,
+ unsigned FromTypeIdx) {
return [=](const LegalityQuery &Query) {
- unsigned NewSizeInBits =
- 1 << Log2_32_Ceil(Query.Types[TypeIdx].getSizeInBits());
- if (NewSizeInBits < Min)
- NewSizeInBits = Min;
- return std::make_pair(TypeIdx, LLT::scalar(NewSizeInBits));
+ const LLT OldTy = Query.Types[TypeIdx];
+ const LLT NewTy = Query.Types[FromTypeIdx];
+ return std::make_pair(TypeIdx, OldTy.changeElementType(NewTy));
+ };
+}
+
+LegalizeMutation LegalizeMutations::changeElementTo(unsigned TypeIdx,
+ LLT NewEltTy) {
+ return [=](const LegalityQuery &Query) {
+ const LLT OldTy = Query.Types[TypeIdx];
+ return std::make_pair(TypeIdx, OldTy.changeElementType(NewEltTy));
+ };
+}
+
+LegalizeMutation LegalizeMutations::widenScalarOrEltToNextPow2(unsigned TypeIdx,
+ unsigned Min) {
+ return [=](const LegalityQuery &Query) {
+ const LLT Ty = Query.Types[TypeIdx];
+ unsigned NewEltSizeInBits =
+ std::max(1u << Log2_32_Ceil(Ty.getScalarSizeInBits()), Min);
+ return std::make_pair(TypeIdx, Ty.changeElementSize(NewEltSizeInBits));
};
}
LegalizeMutation LegalizeMutations::moreElementsToNextPow2(unsigned TypeIdx,
unsigned Min) {
return [=](const LegalityQuery &Query) {
- const LLT &VecTy = Query.Types[TypeIdx];
- unsigned NewNumElements = 1 << Log2_32_Ceil(VecTy.getNumElements());
- if (NewNumElements < Min)
- NewNumElements = Min;
- return std::make_pair(
- TypeIdx, LLT::vector(NewNumElements, VecTy.getScalarSizeInBits()));
+ const LLT VecTy = Query.Types[TypeIdx];
+ unsigned NewNumElements =
+ std::max(1u << Log2_32_Ceil(VecTy.getNumElements()), Min);
+ return std::make_pair(TypeIdx,
+ LLT::vector(NewNumElements, VecTy.getElementType()));
+ };
+}
+
+LegalizeMutation LegalizeMutations::scalarize(unsigned TypeIdx) {
+ return [=](const LegalityQuery &Query) {
+ return std::make_pair(TypeIdx, Query.Types[TypeIdx].getElementType());
};
}
diff --git a/lib/CodeGen/GlobalISel/Legalizer.cpp b/lib/CodeGen/GlobalISel/Legalizer.cpp
index 84131e59948c..b5b26bff34bb 100644
--- a/lib/CodeGen/GlobalISel/Legalizer.cpp
+++ b/lib/CodeGen/GlobalISel/Legalizer.cpp
@@ -1,9 +1,8 @@
//===-- llvm/CodeGen/GlobalISel/Legalizer.cpp -----------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -28,6 +27,7 @@
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetMachine.h"
#include <iterator>
@@ -50,9 +50,7 @@ INITIALIZE_PASS_END(Legalizer, DEBUG_TYPE,
"Legalize the Machine IR a function's Machine IR", false,
false)
-Legalizer::Legalizer() : MachineFunctionPass(ID) {
- initializeLegalizerPass(*PassRegistry::getPassRegistry());
-}
+Legalizer::Legalizer() : MachineFunctionPass(ID) { }
void Legalizer::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<TargetPassConfig>();
@@ -77,6 +75,7 @@ static bool isArtifact(const MachineInstr &MI) {
case TargetOpcode::G_UNMERGE_VALUES:
case TargetOpcode::G_CONCAT_VECTORS:
case TargetOpcode::G_BUILD_VECTOR:
+ case TargetOpcode::G_EXTRACT:
return true;
}
}
@@ -87,12 +86,15 @@ namespace {
class LegalizerWorkListManager : public GISelChangeObserver {
InstListTy &InstList;
ArtifactListTy &ArtifactList;
+#ifndef NDEBUG
+ SmallVector<MachineInstr *, 4> NewMIs;
+#endif
public:
LegalizerWorkListManager(InstListTy &Insts, ArtifactListTy &Arts)
: InstList(Insts), ArtifactList(Arts) {}
- void createdInstr(MachineInstr &MI) override {
+ void createdOrChangedInstr(MachineInstr &MI) {
// Only legalize pre-isel generic instructions.
// Legalization process could generate Target specific pseudo
// instructions with generic types. Don't record them
@@ -102,7 +104,20 @@ public:
else
InstList.insert(&MI);
}
+ }
+
+ void createdInstr(MachineInstr &MI) override {
LLVM_DEBUG(dbgs() << ".. .. New MI: " << MI);
+ LLVM_DEBUG(NewMIs.push_back(&MI));
+ createdOrChangedInstr(MI);
+ }
+
+ void printNewInstrs() {
+ LLVM_DEBUG({
+ for (const auto *MI : NewMIs)
+ dbgs() << ".. .. New MI: " << *MI;
+ NewMIs.clear();
+ });
}
void erasingInstr(MachineInstr &MI) override {
@@ -119,7 +134,7 @@ public:
// When insts change, we want to revisit them to legalize them again.
// We'll consider them the same as created.
LLVM_DEBUG(dbgs() << ".. .. Changed MI: " << MI);
- createdInstr(MI);
+ createdOrChangedInstr(MI);
}
};
} // namespace
@@ -155,20 +170,22 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
if (!isPreISelGenericOpcode(MI.getOpcode()))
continue;
if (isArtifact(MI))
- ArtifactList.insert(&MI);
+ ArtifactList.deferred_insert(&MI);
else
- InstList.insert(&MI);
+ InstList.deferred_insert(&MI);
}
}
+ ArtifactList.finalize();
+ InstList.finalize();
std::unique_ptr<MachineIRBuilder> MIRBuilder;
GISelCSEInfo *CSEInfo = nullptr;
- bool IsO0 = TPC.getOptLevel() == CodeGenOpt::Level::None;
- // Disable CSE for O0.
- bool EnableCSE = !IsO0 && EnableCSEInLegalizer;
+ bool EnableCSE = EnableCSEInLegalizer.getNumOccurrences()
+ ? EnableCSEInLegalizer
+ : TPC.isGISelCSEEnabled();
+
if (EnableCSE) {
MIRBuilder = make_unique<CSEMIRBuilder>();
- std::unique_ptr<CSEConfig> Config = make_unique<CSEConfig>();
- CSEInfo = &Wrapper.get(std::move(Config));
+ CSEInfo = &Wrapper.get(TPC.getCSEConfig());
MIRBuilder->setCSEInfo(CSEInfo);
} else
MIRBuilder = make_unique<MachineIRBuilder>();
@@ -210,6 +227,7 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
"unable to legalize instruction", MI);
return false;
}
+ WorkListObserver.printNewInstrs();
Changed |= Res == LegalizerHelper::Legalized;
}
while (!ArtifactList.empty()) {
@@ -222,7 +240,9 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
continue;
}
SmallVector<MachineInstr *, 4> DeadInstructions;
- if (ArtCombiner.tryCombineInstruction(MI, DeadInstructions)) {
+ if (ArtCombiner.tryCombineInstruction(MI, DeadInstructions,
+ WrapperObserver)) {
+ WorkListObserver.printNewInstrs();
for (auto *DeadMI : DeadInstructions) {
LLVM_DEBUG(dbgs() << *DeadMI << "Is dead\n");
RemoveDeadInstFromLists(DeadMI);
diff --git a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index b3fc94cdec60..f5cf7fc9bd9b 100644
--- a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -1,9 +1,8 @@
//===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -30,6 +29,39 @@
using namespace llvm;
using namespace LegalizeActions;
+/// Try to break down \p OrigTy into \p NarrowTy sized pieces.
+///
+/// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
+/// with any leftover piece as type \p LeftoverTy
+///
+/// Returns -1 in the first element of the pair if the breakdown is not
+/// satisfiable.
+static std::pair<int, int>
+getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
+ assert(!LeftoverTy.isValid() && "this is an out argument");
+
+ unsigned Size = OrigTy.getSizeInBits();
+ unsigned NarrowSize = NarrowTy.getSizeInBits();
+ unsigned NumParts = Size / NarrowSize;
+ unsigned LeftoverSize = Size - NumParts * NarrowSize;
+ assert(Size > NarrowSize);
+
+ if (LeftoverSize == 0)
+ return {NumParts, 0};
+
+ if (NarrowTy.isVector()) {
+ unsigned EltSize = OrigTy.getScalarSizeInBits();
+ if (LeftoverSize % EltSize != 0)
+ return {-1, -1};
+ LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize);
+ } else {
+ LeftoverTy = LLT::scalar(LeftoverSize);
+ }
+
+ int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
+ return std::make_pair(NumParts, NumLeftover);
+}
+
LegalizerHelper::LegalizerHelper(MachineFunction &MF,
GISelChangeObserver &Observer,
MachineIRBuilder &Builder)
@@ -50,6 +82,10 @@ LegalizerHelper::LegalizeResult
LegalizerHelper::legalizeInstrStep(MachineInstr &MI) {
LLVM_DEBUG(dbgs() << "Legalizing: "; MI.print(dbgs()));
+ if (MI.getOpcode() == TargetOpcode::G_INTRINSIC ||
+ MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)
+ return LI.legalizeIntrinsic(MI, MRI, MIRBuilder) ? Legalized
+ : UnableToLegalize;
auto Step = LI.getAction(MI, MRI);
switch (Step.Action) {
case Legal:
@@ -70,6 +106,9 @@ LegalizerHelper::legalizeInstrStep(MachineInstr &MI) {
case FewerElements:
LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
+ case MoreElements:
+ LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
+ return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
case Custom:
LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
return LI.legalizeCustom(MI, MRI, MIRBuilder, Observer) ? Legalized
@@ -80,13 +119,103 @@ LegalizerHelper::legalizeInstrStep(MachineInstr &MI) {
}
}
-void LegalizerHelper::extractParts(unsigned Reg, LLT Ty, int NumParts,
- SmallVectorImpl<unsigned> &VRegs) {
+void LegalizerHelper::extractParts(Register Reg, LLT Ty, int NumParts,
+ SmallVectorImpl<Register> &VRegs) {
for (int i = 0; i < NumParts; ++i)
VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
MIRBuilder.buildUnmerge(VRegs, Reg);
}
+bool LegalizerHelper::extractParts(Register Reg, LLT RegTy,
+ LLT MainTy, LLT &LeftoverTy,
+ SmallVectorImpl<Register> &VRegs,
+ SmallVectorImpl<Register> &LeftoverRegs) {
+ assert(!LeftoverTy.isValid() && "this is an out argument");
+
+ unsigned RegSize = RegTy.getSizeInBits();
+ unsigned MainSize = MainTy.getSizeInBits();
+ unsigned NumParts = RegSize / MainSize;
+ unsigned LeftoverSize = RegSize - NumParts * MainSize;
+
+ // Use an unmerge when possible.
+ if (LeftoverSize == 0) {
+ for (unsigned I = 0; I < NumParts; ++I)
+ VRegs.push_back(MRI.createGenericVirtualRegister(MainTy));
+ MIRBuilder.buildUnmerge(VRegs, Reg);
+ return true;
+ }
+
+ if (MainTy.isVector()) {
+ unsigned EltSize = MainTy.getScalarSizeInBits();
+ if (LeftoverSize % EltSize != 0)
+ return false;
+ LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize);
+ } else {
+ LeftoverTy = LLT::scalar(LeftoverSize);
+ }
+
+ // For irregular sizes, extract the individual parts.
+ for (unsigned I = 0; I != NumParts; ++I) {
+ Register NewReg = MRI.createGenericVirtualRegister(MainTy);
+ VRegs.push_back(NewReg);
+ MIRBuilder.buildExtract(NewReg, Reg, MainSize * I);
+ }
+
+ for (unsigned Offset = MainSize * NumParts; Offset < RegSize;
+ Offset += LeftoverSize) {
+ Register NewReg = MRI.createGenericVirtualRegister(LeftoverTy);
+ LeftoverRegs.push_back(NewReg);
+ MIRBuilder.buildExtract(NewReg, Reg, Offset);
+ }
+
+ return true;
+}
+
+void LegalizerHelper::insertParts(Register DstReg,
+ LLT ResultTy, LLT PartTy,
+ ArrayRef<Register> PartRegs,
+ LLT LeftoverTy,
+ ArrayRef<Register> LeftoverRegs) {
+ if (!LeftoverTy.isValid()) {
+ assert(LeftoverRegs.empty());
+
+ if (!ResultTy.isVector()) {
+ MIRBuilder.buildMerge(DstReg, PartRegs);
+ return;
+ }
+
+ if (PartTy.isVector())
+ MIRBuilder.buildConcatVectors(DstReg, PartRegs);
+ else
+ MIRBuilder.buildBuildVector(DstReg, PartRegs);
+ return;
+ }
+
+ unsigned PartSize = PartTy.getSizeInBits();
+ unsigned LeftoverPartSize = LeftoverTy.getSizeInBits();
+
+ Register CurResultReg = MRI.createGenericVirtualRegister(ResultTy);
+ MIRBuilder.buildUndef(CurResultReg);
+
+ unsigned Offset = 0;
+ for (Register PartReg : PartRegs) {
+ Register NewResultReg = MRI.createGenericVirtualRegister(ResultTy);
+ MIRBuilder.buildInsert(NewResultReg, CurResultReg, PartReg, Offset);
+ CurResultReg = NewResultReg;
+ Offset += PartSize;
+ }
+
+ for (unsigned I = 0, E = LeftoverRegs.size(); I != E; ++I) {
+ // Use the original output register for the final insert to avoid a copy.
+ Register NewResultReg = (I + 1 == E) ?
+ DstReg : MRI.createGenericVirtualRegister(ResultTy);
+
+ MIRBuilder.buildInsert(NewResultReg, CurResultReg, LeftoverRegs[I], Offset);
+ CurResultReg = NewResultReg;
+ Offset += LeftoverPartSize;
+ }
+}
+
static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
switch (Opcode) {
case TargetOpcode::G_SDIV:
@@ -116,6 +245,12 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
case TargetOpcode::G_FDIV:
assert((Size == 32 || Size == 64) && "Unsupported size");
return Size == 64 ? RTLIB::DIV_F64 : RTLIB::DIV_F32;
+ case TargetOpcode::G_FEXP:
+ assert((Size == 32 || Size == 64) && "Unsupported size");
+ return Size == 64 ? RTLIB::EXP_F64 : RTLIB::EXP_F32;
+ case TargetOpcode::G_FEXP2:
+ assert((Size == 32 || Size == 64) && "Unsupported size");
+ return Size == 64 ? RTLIB::EXP2_F64 : RTLIB::EXP2_F32;
case TargetOpcode::G_FREM:
return Size == 64 ? RTLIB::REM_F64 : RTLIB::REM_F32;
case TargetOpcode::G_FPOW:
@@ -123,6 +258,32 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
case TargetOpcode::G_FMA:
assert((Size == 32 || Size == 64) && "Unsupported size");
return Size == 64 ? RTLIB::FMA_F64 : RTLIB::FMA_F32;
+ case TargetOpcode::G_FSIN:
+ assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
+ return Size == 128 ? RTLIB::SIN_F128
+ : Size == 64 ? RTLIB::SIN_F64 : RTLIB::SIN_F32;
+ case TargetOpcode::G_FCOS:
+ assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
+ return Size == 128 ? RTLIB::COS_F128
+ : Size == 64 ? RTLIB::COS_F64 : RTLIB::COS_F32;
+ case TargetOpcode::G_FLOG10:
+ assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
+ return Size == 128 ? RTLIB::LOG10_F128
+ : Size == 64 ? RTLIB::LOG10_F64 : RTLIB::LOG10_F32;
+ case TargetOpcode::G_FLOG:
+ assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
+ return Size == 128 ? RTLIB::LOG_F128
+ : Size == 64 ? RTLIB::LOG_F64 : RTLIB::LOG_F32;
+ case TargetOpcode::G_FLOG2:
+ assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
+ return Size == 128 ? RTLIB::LOG2_F128
+ : Size == 64 ? RTLIB::LOG2_F64 : RTLIB::LOG2_F32;
+ case TargetOpcode::G_FCEIL:
+ assert((Size == 32 || Size == 64) && "Unsupported size");
+ return Size == 64 ? RTLIB::CEIL_F64 : RTLIB::CEIL_F32;
+ case TargetOpcode::G_FFLOOR:
+ assert((Size == 32 || Size == 64) && "Unsupported size");
+ return Size == 64 ? RTLIB::FLOOR_F64 : RTLIB::FLOOR_F32;
}
llvm_unreachable("Unknown libcall function");
}
@@ -214,7 +375,20 @@ LegalizerHelper::libcall(MachineInstr &MI) {
case TargetOpcode::G_FDIV:
case TargetOpcode::G_FMA:
case TargetOpcode::G_FPOW:
- case TargetOpcode::G_FREM: {
+ case TargetOpcode::G_FREM:
+ case TargetOpcode::G_FCOS:
+ case TargetOpcode::G_FSIN:
+ case TargetOpcode::G_FLOG10:
+ case TargetOpcode::G_FLOG:
+ case TargetOpcode::G_FLOG2:
+ case TargetOpcode::G_FEXP:
+ case TargetOpcode::G_FEXP2:
+ case TargetOpcode::G_FCEIL:
+ case TargetOpcode::G_FFLOOR: {
+ if (Size > 64) {
+ LLVM_DEBUG(dbgs() << "Size " << Size << " too large to legalize.\n");
+ return UnableToLegalize;
+ }
Type *HLTy = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx);
auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
if (Status != Legalized)
@@ -250,10 +424,11 @@ LegalizerHelper::libcall(MachineInstr &MI) {
// FIXME: Support other types
unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
- if (ToSize != 32 || (FromSize != 32 && FromSize != 64))
+ if ((ToSize != 32 && ToSize != 64) || (FromSize != 32 && FromSize != 64))
return UnableToLegalize;
LegalizeResult Status = conversionLibcall(
- MI, MIRBuilder, Type::getInt32Ty(Ctx),
+ MI, MIRBuilder,
+ ToSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx),
FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx));
if (Status != Legalized)
return Status;
@@ -264,12 +439,12 @@ LegalizerHelper::libcall(MachineInstr &MI) {
// FIXME: Support other types
unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
- if (FromSize != 32 || (ToSize != 32 && ToSize != 64))
+ if ((FromSize != 32 && FromSize != 64) || (ToSize != 32 && ToSize != 64))
return UnableToLegalize;
LegalizeResult Status = conversionLibcall(
MI, MIRBuilder,
ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
- Type::getInt32Ty(Ctx));
+ FromSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx));
if (Status != Legalized)
return Status;
break;
@@ -283,10 +458,6 @@ LegalizerHelper::libcall(MachineInstr &MI) {
LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
unsigned TypeIdx,
LLT NarrowTy) {
- // FIXME: Don't know how to handle secondary types yet.
- if (TypeIdx != 0 && MI.getOpcode() != TargetOpcode::G_EXTRACT)
- return UnableToLegalize;
-
MIRBuilder.setInstr(MI);
uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
@@ -302,12 +473,12 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
return UnableToLegalize;
int NumParts = SizeOp0 / NarrowSize;
- SmallVector<unsigned, 2> DstRegs;
+ SmallVector<Register, 2> DstRegs;
for (int i = 0; i < NumParts; ++i)
DstRegs.push_back(
MIRBuilder.buildUndef(NarrowTy)->getOperand(0).getReg());
- unsigned DstReg = MI.getOperand(0).getReg();
+ Register DstReg = MI.getOperand(0).getReg();
if(MRI.getType(DstReg).isVector())
MIRBuilder.buildBuildVector(DstReg, DstRegs);
else
@@ -315,6 +486,38 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
MI.eraseFromParent();
return Legalized;
}
+ case TargetOpcode::G_CONSTANT: {
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+ const APInt &Val = MI.getOperand(1).getCImm()->getValue();
+ unsigned TotalSize = Ty.getSizeInBits();
+ unsigned NarrowSize = NarrowTy.getSizeInBits();
+ int NumParts = TotalSize / NarrowSize;
+
+ SmallVector<Register, 4> PartRegs;
+ for (int I = 0; I != NumParts; ++I) {
+ unsigned Offset = I * NarrowSize;
+ auto K = MIRBuilder.buildConstant(NarrowTy,
+ Val.lshr(Offset).trunc(NarrowSize));
+ PartRegs.push_back(K.getReg(0));
+ }
+
+ LLT LeftoverTy;
+ unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
+ SmallVector<Register, 1> LeftoverRegs;
+ if (LeftoverBits != 0) {
+ LeftoverTy = LLT::scalar(LeftoverBits);
+ auto K = MIRBuilder.buildConstant(
+ LeftoverTy,
+ Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
+ LeftoverRegs.push_back(K.getReg(0));
+ }
+
+ insertParts(MI.getOperand(0).getReg(),
+ Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
+
+ MI.eraseFromParent();
+ return Legalized;
+ }
case TargetOpcode::G_ADD: {
// FIXME: add support for when SizeOp0 isn't an exact multiple of
// NarrowSize.
@@ -323,16 +526,16 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
// Expand in terms of carry-setting/consuming G_ADDE instructions.
int NumParts = SizeOp0 / NarrowTy.getSizeInBits();
- SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs;
+ SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
- unsigned CarryIn = MRI.createGenericVirtualRegister(LLT::scalar(1));
+ Register CarryIn = MRI.createGenericVirtualRegister(LLT::scalar(1));
MIRBuilder.buildConstant(CarryIn, 0);
for (int i = 0; i < NumParts; ++i) {
- unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
- unsigned CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
+ Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
+ Register CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
MIRBuilder.buildUAdde(DstReg, CarryOut, Src1Regs[i],
Src2Regs[i], CarryIn);
@@ -340,67 +543,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
DstRegs.push_back(DstReg);
CarryIn = CarryOut;
}
- unsigned DstReg = MI.getOperand(0).getReg();
- if(MRI.getType(DstReg).isVector())
- MIRBuilder.buildBuildVector(DstReg, DstRegs);
- else
- MIRBuilder.buildMerge(DstReg, DstRegs);
- MI.eraseFromParent();
- return Legalized;
- }
- case TargetOpcode::G_EXTRACT: {
- if (TypeIdx != 1)
- return UnableToLegalize;
-
- int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
- // FIXME: add support for when SizeOp1 isn't an exact multiple of
- // NarrowSize.
- if (SizeOp1 % NarrowSize != 0)
- return UnableToLegalize;
- int NumParts = SizeOp1 / NarrowSize;
-
- SmallVector<unsigned, 2> SrcRegs, DstRegs;
- SmallVector<uint64_t, 2> Indexes;
- extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
-
- unsigned OpReg = MI.getOperand(0).getReg();
- uint64_t OpStart = MI.getOperand(2).getImm();
- uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
- for (int i = 0; i < NumParts; ++i) {
- unsigned SrcStart = i * NarrowSize;
-
- if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
- // No part of the extract uses this subregister, ignore it.
- continue;
- } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
- // The entire subregister is extracted, forward the value.
- DstRegs.push_back(SrcRegs[i]);
- continue;
- }
-
- // OpSegStart is where this destination segment would start in OpReg if it
- // extended infinitely in both directions.
- int64_t ExtractOffset;
- uint64_t SegSize;
- if (OpStart < SrcStart) {
- ExtractOffset = 0;
- SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
- } else {
- ExtractOffset = OpStart - SrcStart;
- SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
- }
-
- unsigned SegReg = SrcRegs[i];
- if (ExtractOffset != 0 || SegSize != NarrowSize) {
- // A genuine extract is needed.
- SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
- MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
- }
-
- DstRegs.push_back(SegReg);
- }
-
- unsigned DstReg = MI.getOperand(0).getReg();
+ Register DstReg = MI.getOperand(0).getReg();
if(MRI.getType(DstReg).isVector())
MIRBuilder.buildBuildVector(DstReg, DstRegs);
else
@@ -408,178 +551,117 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
MI.eraseFromParent();
return Legalized;
}
- case TargetOpcode::G_INSERT: {
+ case TargetOpcode::G_SUB: {
// FIXME: add support for when SizeOp0 isn't an exact multiple of
// NarrowSize.
if (SizeOp0 % NarrowSize != 0)
return UnableToLegalize;
- int NumParts = SizeOp0 / NarrowSize;
-
- SmallVector<unsigned, 2> SrcRegs, DstRegs;
- SmallVector<uint64_t, 2> Indexes;
- extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
+ int NumParts = SizeOp0 / NarrowTy.getSizeInBits();
- unsigned OpReg = MI.getOperand(2).getReg();
- uint64_t OpStart = MI.getOperand(3).getImm();
- uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
- for (int i = 0; i < NumParts; ++i) {
- unsigned DstStart = i * NarrowSize;
-
- if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
- // No part of the insert affects this subregister, forward the original.
- DstRegs.push_back(SrcRegs[i]);
- continue;
- } else if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
- // The entire subregister is defined by this insert, forward the new
- // value.
- DstRegs.push_back(OpReg);
- continue;
- }
+ SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
+ extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
+ extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
- // OpSegStart is where this destination segment would start in OpReg if it
- // extended infinitely in both directions.
- int64_t ExtractOffset, InsertOffset;
- uint64_t SegSize;
- if (OpStart < DstStart) {
- InsertOffset = 0;
- ExtractOffset = DstStart - OpStart;
- SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
- } else {
- InsertOffset = OpStart - DstStart;
- ExtractOffset = 0;
- SegSize =
- std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
- }
+ Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
+ Register BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
+ MIRBuilder.buildInstr(TargetOpcode::G_USUBO, {DstReg, BorrowOut},
+ {Src1Regs[0], Src2Regs[0]});
+ DstRegs.push_back(DstReg);
+ Register BorrowIn = BorrowOut;
+ for (int i = 1; i < NumParts; ++i) {
+ DstReg = MRI.createGenericVirtualRegister(NarrowTy);
+ BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
- unsigned SegReg = OpReg;
- if (ExtractOffset != 0 || SegSize != OpSize) {
- // A genuine extract is needed.
- SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
- MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
- }
+ MIRBuilder.buildInstr(TargetOpcode::G_USUBE, {DstReg, BorrowOut},
+ {Src1Regs[i], Src2Regs[i], BorrowIn});
- unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
- MIRBuilder.buildInsert(DstReg, SrcRegs[i], SegReg, InsertOffset);
DstRegs.push_back(DstReg);
+ BorrowIn = BorrowOut;
}
-
- assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered");
- unsigned DstReg = MI.getOperand(0).getReg();
- if(MRI.getType(DstReg).isVector())
- MIRBuilder.buildBuildVector(DstReg, DstRegs);
- else
- MIRBuilder.buildMerge(DstReg, DstRegs);
+ MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs);
MI.eraseFromParent();
return Legalized;
}
+ case TargetOpcode::G_MUL:
+ case TargetOpcode::G_UMULH:
+ return narrowScalarMul(MI, NarrowTy);
+ case TargetOpcode::G_EXTRACT:
+ return narrowScalarExtract(MI, TypeIdx, NarrowTy);
+ case TargetOpcode::G_INSERT:
+ return narrowScalarInsert(MI, TypeIdx, NarrowTy);
case TargetOpcode::G_LOAD: {
- // FIXME: add support for when SizeOp0 isn't an exact multiple of
- // NarrowSize.
- if (SizeOp0 % NarrowSize != 0)
- return UnableToLegalize;
-
const auto &MMO = **MI.memoperands_begin();
- // This implementation doesn't work for atomics. Give up instead of doing
- // something invalid.
- if (MMO.getOrdering() != AtomicOrdering::NotAtomic ||
- MMO.getFailureOrdering() != AtomicOrdering::NotAtomic)
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ if (DstTy.isVector())
return UnableToLegalize;
- int NumParts = SizeOp0 / NarrowSize;
- LLT OffsetTy = LLT::scalar(
- MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits());
-
- SmallVector<unsigned, 2> DstRegs;
- for (int i = 0; i < NumParts; ++i) {
- unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
- unsigned SrcReg = 0;
- unsigned Adjustment = i * NarrowSize / 8;
- unsigned Alignment = MinAlign(MMO.getAlignment(), Adjustment);
-
- MachineMemOperand *SplitMMO = MIRBuilder.getMF().getMachineMemOperand(
- MMO.getPointerInfo().getWithOffset(Adjustment), MMO.getFlags(),
- NarrowSize / 8, Alignment, MMO.getAAInfo(), MMO.getRanges(),
- MMO.getSyncScopeID(), MMO.getOrdering(), MMO.getFailureOrdering());
-
- MIRBuilder.materializeGEP(SrcReg, MI.getOperand(1).getReg(), OffsetTy,
- Adjustment);
+ if (8 * MMO.getSize() != DstTy.getSizeInBits()) {
+ Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
+ auto &MMO = **MI.memoperands_begin();
+ MIRBuilder.buildLoad(TmpReg, MI.getOperand(1).getReg(), MMO);
+ MIRBuilder.buildAnyExt(DstReg, TmpReg);
+ MI.eraseFromParent();
+ return Legalized;
+ }
- MIRBuilder.buildLoad(DstReg, SrcReg, *SplitMMO);
+ return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
+ }
+ case TargetOpcode::G_ZEXTLOAD:
+ case TargetOpcode::G_SEXTLOAD: {
+ bool ZExt = MI.getOpcode() == TargetOpcode::G_ZEXTLOAD;
+ Register DstReg = MI.getOperand(0).getReg();
+ Register PtrReg = MI.getOperand(1).getReg();
- DstRegs.push_back(DstReg);
+ Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
+ auto &MMO = **MI.memoperands_begin();
+ if (MMO.getSizeInBits() == NarrowSize) {
+ MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
+ } else {
+ unsigned ExtLoad = ZExt ? TargetOpcode::G_ZEXTLOAD
+ : TargetOpcode::G_SEXTLOAD;
+ MIRBuilder.buildInstr(ExtLoad)
+ .addDef(TmpReg)
+ .addUse(PtrReg)
+ .addMemOperand(&MMO);
}
- unsigned DstReg = MI.getOperand(0).getReg();
- if(MRI.getType(DstReg).isVector())
- MIRBuilder.buildBuildVector(DstReg, DstRegs);
+
+ if (ZExt)
+ MIRBuilder.buildZExt(DstReg, TmpReg);
else
- MIRBuilder.buildMerge(DstReg, DstRegs);
+ MIRBuilder.buildSExt(DstReg, TmpReg);
+
MI.eraseFromParent();
return Legalized;
}
case TargetOpcode::G_STORE: {
- // FIXME: add support for when SizeOp0 isn't an exact multiple of
- // NarrowSize.
- if (SizeOp0 % NarrowSize != 0)
- return UnableToLegalize;
-
const auto &MMO = **MI.memoperands_begin();
- // This implementation doesn't work for atomics. Give up instead of doing
- // something invalid.
- if (MMO.getOrdering() != AtomicOrdering::NotAtomic ||
- MMO.getFailureOrdering() != AtomicOrdering::NotAtomic)
+
+ Register SrcReg = MI.getOperand(0).getReg();
+ LLT SrcTy = MRI.getType(SrcReg);
+ if (SrcTy.isVector())
return UnableToLegalize;
int NumParts = SizeOp0 / NarrowSize;
- LLT OffsetTy = LLT::scalar(
- MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits());
-
- SmallVector<unsigned, 2> SrcRegs;
- extractParts(MI.getOperand(0).getReg(), NarrowTy, NumParts, SrcRegs);
-
- for (int i = 0; i < NumParts; ++i) {
- unsigned DstReg = 0;
- unsigned Adjustment = i * NarrowSize / 8;
- unsigned Alignment = MinAlign(MMO.getAlignment(), Adjustment);
-
- MachineMemOperand *SplitMMO = MIRBuilder.getMF().getMachineMemOperand(
- MMO.getPointerInfo().getWithOffset(Adjustment), MMO.getFlags(),
- NarrowSize / 8, Alignment, MMO.getAAInfo(), MMO.getRanges(),
- MMO.getSyncScopeID(), MMO.getOrdering(), MMO.getFailureOrdering());
-
- MIRBuilder.materializeGEP(DstReg, MI.getOperand(1).getReg(), OffsetTy,
- Adjustment);
-
- MIRBuilder.buildStore(SrcRegs[i], DstReg, *SplitMMO);
- }
- MI.eraseFromParent();
- return Legalized;
- }
- case TargetOpcode::G_CONSTANT: {
- // FIXME: add support for when SizeOp0 isn't an exact multiple of
- // NarrowSize.
- if (SizeOp0 % NarrowSize != 0)
+ unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
+ unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
+ if (SrcTy.isVector() && LeftoverBits != 0)
return UnableToLegalize;
- int NumParts = SizeOp0 / NarrowSize;
- const APInt &Cst = MI.getOperand(1).getCImm()->getValue();
- LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
- SmallVector<unsigned, 2> DstRegs;
- for (int i = 0; i < NumParts; ++i) {
- unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
- ConstantInt *CI =
- ConstantInt::get(Ctx, Cst.lshr(NarrowSize * i).trunc(NarrowSize));
- MIRBuilder.buildConstant(DstReg, *CI);
- DstRegs.push_back(DstReg);
+ if (8 * MMO.getSize() != SrcTy.getSizeInBits()) {
+ Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
+ auto &MMO = **MI.memoperands_begin();
+ MIRBuilder.buildTrunc(TmpReg, SrcReg);
+ MIRBuilder.buildStore(TmpReg, MI.getOperand(1).getReg(), MMO);
+ MI.eraseFromParent();
+ return Legalized;
}
- unsigned DstReg = MI.getOperand(0).getReg();
- if(MRI.getType(DstReg).isVector())
- MIRBuilder.buildBuildVector(DstReg, DstRegs);
- else
- MIRBuilder.buildMerge(DstReg, DstRegs);
- MI.eraseFromParent();
- return Legalized;
+
+ return reduceLoadStoreWidth(MI, 0, NarrowTy);
}
+ case TargetOpcode::G_SELECT:
+ return narrowScalarSelect(MI, TypeIdx, NarrowTy);
case TargetOpcode::G_AND:
case TargetOpcode::G_OR:
case TargetOpcode::G_XOR: {
@@ -592,44 +674,112 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
// ...
// AN = BinOp<Ty/N> BN, CN
// A = G_MERGE_VALUES A1, ..., AN
+ return narrowScalarBasic(MI, TypeIdx, NarrowTy);
+ }
+ case TargetOpcode::G_SHL:
+ case TargetOpcode::G_LSHR:
+ case TargetOpcode::G_ASHR:
+ return narrowScalarShift(MI, TypeIdx, NarrowTy);
+ case TargetOpcode::G_CTLZ:
+ case TargetOpcode::G_CTLZ_ZERO_UNDEF:
+ case TargetOpcode::G_CTTZ:
+ case TargetOpcode::G_CTTZ_ZERO_UNDEF:
+ case TargetOpcode::G_CTPOP:
+ if (TypeIdx != 0)
+ return UnableToLegalize; // TODO
- // FIXME: add support for when SizeOp0 isn't an exact multiple of
- // NarrowSize.
- if (SizeOp0 % NarrowSize != 0)
+ Observer.changingInstr(MI);
+ narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
+ Observer.changedInstr(MI);
+ return Legalized;
+ case TargetOpcode::G_INTTOPTR:
+ if (TypeIdx != 1)
return UnableToLegalize;
- int NumParts = SizeOp0 / NarrowSize;
- // List the registers where the destination will be scattered.
- SmallVector<unsigned, 2> DstRegs;
- // List the registers where the first argument will be split.
- SmallVector<unsigned, 2> SrcsReg1;
- // List the registers where the second argument will be split.
- SmallVector<unsigned, 2> SrcsReg2;
- // Create all the temporary registers.
- for (int i = 0; i < NumParts; ++i) {
- unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
- unsigned SrcReg1 = MRI.createGenericVirtualRegister(NarrowTy);
- unsigned SrcReg2 = MRI.createGenericVirtualRegister(NarrowTy);
+ Observer.changingInstr(MI);
+ narrowScalarSrc(MI, NarrowTy, 1);
+ Observer.changedInstr(MI);
+ return Legalized;
+ case TargetOpcode::G_PTRTOINT:
+ if (TypeIdx != 0)
+ return UnableToLegalize;
- DstRegs.push_back(DstReg);
- SrcsReg1.push_back(SrcReg1);
- SrcsReg2.push_back(SrcReg2);
+ Observer.changingInstr(MI);
+ narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
+ Observer.changedInstr(MI);
+ return Legalized;
+ case TargetOpcode::G_PHI: {
+ unsigned NumParts = SizeOp0 / NarrowSize;
+ SmallVector<Register, 2> DstRegs;
+ SmallVector<SmallVector<Register, 2>, 2> SrcRegs;
+ DstRegs.resize(NumParts);
+ SrcRegs.resize(MI.getNumOperands() / 2);
+ Observer.changingInstr(MI);
+ for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
+ MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
+ MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
+ extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
+ SrcRegs[i / 2]);
}
- // Explode the big arguments into smaller chunks.
- MIRBuilder.buildUnmerge(SrcsReg1, MI.getOperand(1).getReg());
- MIRBuilder.buildUnmerge(SrcsReg2, MI.getOperand(2).getReg());
+ MachineBasicBlock &MBB = *MI.getParent();
+ MIRBuilder.setInsertPt(MBB, MI);
+ for (unsigned i = 0; i < NumParts; ++i) {
+ DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
+ MachineInstrBuilder MIB =
+ MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
+ for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
+ MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
+ }
+ MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
+ MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs);
+ Observer.changedInstr(MI);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case TargetOpcode::G_EXTRACT_VECTOR_ELT:
+ case TargetOpcode::G_INSERT_VECTOR_ELT: {
+ if (TypeIdx != 2)
+ return UnableToLegalize;
- // Do the operation on each small part.
- for (int i = 0; i < NumParts; ++i)
- MIRBuilder.buildInstr(MI.getOpcode(), {DstRegs[i]},
- {SrcsReg1[i], SrcsReg2[i]});
+ int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
+ Observer.changingInstr(MI);
+ narrowScalarSrc(MI, NarrowTy, OpIdx);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+ case TargetOpcode::G_ICMP: {
+ uint64_t SrcSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
+ if (NarrowSize * 2 != SrcSize)
+ return UnableToLegalize;
- // Gather the destination registers into the final destination.
- unsigned DstReg = MI.getOperand(0).getReg();
- if(MRI.getType(DstReg).isVector())
- MIRBuilder.buildBuildVector(DstReg, DstRegs);
- else
- MIRBuilder.buildMerge(DstReg, DstRegs);
+ Observer.changingInstr(MI);
+ Register LHSL = MRI.createGenericVirtualRegister(NarrowTy);
+ Register LHSH = MRI.createGenericVirtualRegister(NarrowTy);
+ MIRBuilder.buildUnmerge({LHSL, LHSH}, MI.getOperand(2).getReg());
+
+ Register RHSL = MRI.createGenericVirtualRegister(NarrowTy);
+ Register RHSH = MRI.createGenericVirtualRegister(NarrowTy);
+ MIRBuilder.buildUnmerge({RHSL, RHSH}, MI.getOperand(3).getReg());
+
+ CmpInst::Predicate Pred =
+ static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
+
+ if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) {
+ MachineInstrBuilder XorL = MIRBuilder.buildXor(NarrowTy, LHSL, RHSL);
+ MachineInstrBuilder XorH = MIRBuilder.buildXor(NarrowTy, LHSH, RHSH);
+ MachineInstrBuilder Or = MIRBuilder.buildOr(NarrowTy, XorL, XorH);
+ MachineInstrBuilder Zero = MIRBuilder.buildConstant(NarrowTy, 0);
+ MIRBuilder.buildICmp(Pred, MI.getOperand(0).getReg(), Or, Zero);
+ } else {
+ const LLT s1 = LLT::scalar(1);
+ MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, s1, LHSH, RHSH);
+ MachineInstrBuilder CmpHEQ =
+ MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, s1, LHSH, RHSH);
+ MachineInstrBuilder CmpLU = MIRBuilder.buildICmp(
+ ICmpInst::getUnsignedPredicate(Pred), s1, LHSL, RHSL);
+ MIRBuilder.buildSelect(MI.getOperand(0).getReg(), CmpHEQ, CmpLU, CmpH);
+ }
+ Observer.changedInstr(MI);
MI.eraseFromParent();
return Legalized;
}
@@ -643,15 +793,322 @@ void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy,
MO.setReg(ExtB->getOperand(0).getReg());
}
+void LegalizerHelper::narrowScalarSrc(MachineInstr &MI, LLT NarrowTy,
+ unsigned OpIdx) {
+ MachineOperand &MO = MI.getOperand(OpIdx);
+ auto ExtB = MIRBuilder.buildInstr(TargetOpcode::G_TRUNC, {NarrowTy},
+ {MO.getReg()});
+ MO.setReg(ExtB->getOperand(0).getReg());
+}
+
void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy,
unsigned OpIdx, unsigned TruncOpcode) {
MachineOperand &MO = MI.getOperand(OpIdx);
- unsigned DstExt = MRI.createGenericVirtualRegister(WideTy);
+ Register DstExt = MRI.createGenericVirtualRegister(WideTy);
MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
MIRBuilder.buildInstr(TruncOpcode, {MO.getReg()}, {DstExt});
MO.setReg(DstExt);
}
+void LegalizerHelper::narrowScalarDst(MachineInstr &MI, LLT NarrowTy,
+ unsigned OpIdx, unsigned ExtOpcode) {
+ MachineOperand &MO = MI.getOperand(OpIdx);
+ Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
+ MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
+ MIRBuilder.buildInstr(ExtOpcode, {MO.getReg()}, {DstTrunc});
+ MO.setReg(DstTrunc);
+}
+
+void LegalizerHelper::moreElementsVectorDst(MachineInstr &MI, LLT WideTy,
+ unsigned OpIdx) {
+ MachineOperand &MO = MI.getOperand(OpIdx);
+ Register DstExt = MRI.createGenericVirtualRegister(WideTy);
+ MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
+ MIRBuilder.buildExtract(MO.getReg(), DstExt, 0);
+ MO.setReg(DstExt);
+}
+
+void LegalizerHelper::moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy,
+ unsigned OpIdx) {
+ MachineOperand &MO = MI.getOperand(OpIdx);
+
+ LLT OldTy = MRI.getType(MO.getReg());
+ unsigned OldElts = OldTy.getNumElements();
+ unsigned NewElts = MoreTy.getNumElements();
+
+ unsigned NumParts = NewElts / OldElts;
+
+ // Use concat_vectors if the result is a multiple of the number of elements.
+ if (NumParts * OldElts == NewElts) {
+ SmallVector<Register, 8> Parts;
+ Parts.push_back(MO.getReg());
+
+ Register ImpDef = MIRBuilder.buildUndef(OldTy).getReg(0);
+ for (unsigned I = 1; I != NumParts; ++I)
+ Parts.push_back(ImpDef);
+
+ auto Concat = MIRBuilder.buildConcatVectors(MoreTy, Parts);
+ MO.setReg(Concat.getReg(0));
+ return;
+ }
+
+ Register MoreReg = MRI.createGenericVirtualRegister(MoreTy);
+ Register ImpDef = MIRBuilder.buildUndef(MoreTy).getReg(0);
+ MIRBuilder.buildInsert(MoreReg, ImpDef, MO.getReg(), 0);
+ MO.setReg(MoreReg);
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
+ LLT WideTy) {
+ if (TypeIdx != 1)
+ return UnableToLegalize;
+
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ if (DstTy.isVector())
+ return UnableToLegalize;
+
+ Register Src1 = MI.getOperand(1).getReg();
+ LLT SrcTy = MRI.getType(Src1);
+ const int DstSize = DstTy.getSizeInBits();
+ const int SrcSize = SrcTy.getSizeInBits();
+ const int WideSize = WideTy.getSizeInBits();
+ const int NumMerge = (DstSize + WideSize - 1) / WideSize;
+
+ unsigned NumOps = MI.getNumOperands();
+ unsigned NumSrc = MI.getNumOperands() - 1;
+ unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
+
+ if (WideSize >= DstSize) {
+ // Directly pack the bits in the target type.
+ Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1).getReg(0);
+
+ for (unsigned I = 2; I != NumOps; ++I) {
+ const unsigned Offset = (I - 1) * PartSize;
+
+ Register SrcReg = MI.getOperand(I).getReg();
+ assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
+
+ auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
+
+ Register NextResult = I + 1 == NumOps && WideSize == DstSize ? DstReg :
+ MRI.createGenericVirtualRegister(WideTy);
+
+ auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
+ auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
+ MIRBuilder.buildOr(NextResult, ResultReg, Shl);
+ ResultReg = NextResult;
+ }
+
+ if (WideSize > DstSize)
+ MIRBuilder.buildTrunc(DstReg, ResultReg);
+
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ // Unmerge the original values to the GCD type, and recombine to the next
+ // multiple greater than the original type.
+ //
+ // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
+ // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
+ // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
+ // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
+ // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
+ // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
+ // %12:_(s12) = G_MERGE_VALUES %10, %11
+ //
+ // Padding with undef if necessary:
+ //
+ // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
+ // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
+ // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
+ // %7:_(s2) = G_IMPLICIT_DEF
+ // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
+ // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
+ // %10:_(s12) = G_MERGE_VALUES %8, %9
+
+ const int GCD = greatestCommonDivisor(SrcSize, WideSize);
+ LLT GCDTy = LLT::scalar(GCD);
+
+ SmallVector<Register, 8> Parts;
+ SmallVector<Register, 8> NewMergeRegs;
+ SmallVector<Register, 8> Unmerges;
+ LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
+
+ // Decompose the original operands if they don't evenly divide.
+ for (int I = 1, E = MI.getNumOperands(); I != E; ++I) {
+ Register SrcReg = MI.getOperand(I).getReg();
+ if (GCD == SrcSize) {
+ Unmerges.push_back(SrcReg);
+ } else {
+ auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
+ for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
+ Unmerges.push_back(Unmerge.getReg(J));
+ }
+ }
+
+ // Pad with undef to the next size that is a multiple of the requested size.
+ if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
+ Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
+ for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
+ Unmerges.push_back(UndefReg);
+ }
+
+ const int PartsPerGCD = WideSize / GCD;
+
+ // Build merges of each piece.
+ ArrayRef<Register> Slicer(Unmerges);
+ for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
+ auto Merge = MIRBuilder.buildMerge(WideTy, Slicer.take_front(PartsPerGCD));
+ NewMergeRegs.push_back(Merge.getReg(0));
+ }
+
+ // A truncate may be necessary if the requested type doesn't evenly divide the
+ // original result type.
+ if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
+ MIRBuilder.buildMerge(DstReg, NewMergeRegs);
+ } else {
+ auto FinalMerge = MIRBuilder.buildMerge(WideDstTy, NewMergeRegs);
+ MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
+ }
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
+ LLT WideTy) {
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ unsigned NumDst = MI.getNumOperands() - 1;
+ Register SrcReg = MI.getOperand(NumDst).getReg();
+ LLT SrcTy = MRI.getType(SrcReg);
+ if (!SrcTy.isScalar())
+ return UnableToLegalize;
+
+ Register Dst0Reg = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(Dst0Reg);
+ if (!DstTy.isScalar())
+ return UnableToLegalize;
+
+ unsigned NewSrcSize = NumDst * WideTy.getSizeInBits();
+ LLT NewSrcTy = LLT::scalar(NewSrcSize);
+ unsigned SizeDiff = WideTy.getSizeInBits() - DstTy.getSizeInBits();
+
+ auto WideSrc = MIRBuilder.buildZExt(NewSrcTy, SrcReg);
+
+ for (unsigned I = 1; I != NumDst; ++I) {
+ auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, SizeDiff * I);
+ auto Shl = MIRBuilder.buildShl(NewSrcTy, WideSrc, ShiftAmt);
+ WideSrc = MIRBuilder.buildOr(NewSrcTy, WideSrc, Shl);
+ }
+
+ Observer.changingInstr(MI);
+
+ MI.getOperand(NumDst).setReg(WideSrc->getOperand(0).getReg());
+ for (unsigned I = 0; I != NumDst; ++I)
+ widenScalarDst(MI, WideTy, I);
+
+ Observer.changedInstr(MI);
+
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
+ LLT WideTy) {
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ LLT SrcTy = MRI.getType(SrcReg);
+
+ LLT DstTy = MRI.getType(DstReg);
+ unsigned Offset = MI.getOperand(2).getImm();
+
+ if (TypeIdx == 0) {
+ if (SrcTy.isVector() || DstTy.isVector())
+ return UnableToLegalize;
+
+ SrcOp Src(SrcReg);
+ if (SrcTy.isPointer()) {
+ // Extracts from pointers can be handled only if they are really just
+ // simple integers.
+ const DataLayout &DL = MIRBuilder.getDataLayout();
+ if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
+ return UnableToLegalize;
+
+ LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
+ Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
+ SrcTy = SrcAsIntTy;
+ }
+
+ if (DstTy.isPointer())
+ return UnableToLegalize;
+
+ if (Offset == 0) {
+ // Avoid a shift in the degenerate case.
+ MIRBuilder.buildTrunc(DstReg,
+ MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ // Do a shift in the source type.
+ LLT ShiftTy = SrcTy;
+ if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
+ Src = MIRBuilder.buildAnyExt(WideTy, Src);
+ ShiftTy = WideTy;
+ } else if (WideTy.getSizeInBits() > SrcTy.getSizeInBits())
+ return UnableToLegalize;
+
+ auto LShr = MIRBuilder.buildLShr(
+ ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
+ MIRBuilder.buildTrunc(DstReg, LShr);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ if (SrcTy.isScalar()) {
+ Observer.changingInstr(MI);
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+
+ if (!SrcTy.isVector())
+ return UnableToLegalize;
+
+ if (DstTy != SrcTy.getElementType())
+ return UnableToLegalize;
+
+ if (Offset % SrcTy.getScalarSizeInBits() != 0)
+ return UnableToLegalize;
+
+ Observer.changingInstr(MI);
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
+
+ MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
+ Offset);
+ widenScalarDst(MI, WideTy.getScalarType(), 0);
+ Observer.changedInstr(MI);
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
+ LLT WideTy) {
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+ Observer.changingInstr(MI);
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
+ widenScalarDst(MI, WideTy);
+ Observer.changedInstr(MI);
+ return Legalized;
+}
+
LegalizerHelper::LegalizeResult
LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
MIRBuilder.setInstr(MI);
@@ -659,6 +1116,14 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
switch (MI.getOpcode()) {
default:
return UnableToLegalize;
+ case TargetOpcode::G_EXTRACT:
+ return widenScalarExtract(MI, TypeIdx, WideTy);
+ case TargetOpcode::G_INSERT:
+ return widenScalarInsert(MI, TypeIdx, WideTy);
+ case TargetOpcode::G_MERGE_VALUES:
+ return widenScalarMergeValues(MI, TypeIdx, WideTy);
+ case TargetOpcode::G_UNMERGE_VALUES:
+ return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
case TargetOpcode::G_UADDO:
case TargetOpcode::G_USUBO: {
if (TypeIdx == 1)
@@ -690,19 +1155,28 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
case TargetOpcode::G_CTLZ:
case TargetOpcode::G_CTLZ_ZERO_UNDEF:
case TargetOpcode::G_CTPOP: {
+ if (TypeIdx == 0) {
+ Observer.changingInstr(MI);
+ widenScalarDst(MI, WideTy, 0);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+
+ Register SrcReg = MI.getOperand(1).getReg();
+
// First ZEXT the input.
- auto MIBSrc = MIRBuilder.buildZExt(WideTy, MI.getOperand(1).getReg());
- LLT CurTy = MRI.getType(MI.getOperand(0).getReg());
+ auto MIBSrc = MIRBuilder.buildZExt(WideTy, SrcReg);
+ LLT CurTy = MRI.getType(SrcReg);
if (MI.getOpcode() == TargetOpcode::G_CTTZ) {
// The count is the same in the larger type except if the original
// value was zero. This can be handled by setting the bit just off
// the top of the original type.
auto TopBit =
APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits());
- MIBSrc = MIRBuilder.buildInstr(
- TargetOpcode::G_OR, {WideTy},
- {MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit.getSExtValue())});
+ MIBSrc = MIRBuilder.buildOr(
+ WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
}
+
// Perform the operation at the larger size.
auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {MIBSrc});
// This is already the correct result for CTPOP and CTTZs
@@ -714,22 +1188,43 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
TargetOpcode::G_SUB, {WideTy},
{MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff)});
}
- auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
- // Make the original instruction a trunc now, and update its source.
+
+ MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case TargetOpcode::G_BSWAP: {
Observer.changingInstr(MI);
- MI.setDesc(TII.get(TargetOpcode::G_TRUNC));
- MI.getOperand(1).setReg(MIBNewOp->getOperand(0).getReg());
+ Register DstReg = MI.getOperand(0).getReg();
+
+ Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
+ Register DstExt = MRI.createGenericVirtualRegister(WideTy);
+ Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
+
+ MI.getOperand(0).setReg(DstExt);
+
+ MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
+
+ LLT Ty = MRI.getType(DstReg);
+ unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
+ MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
+ MIRBuilder.buildInstr(TargetOpcode::G_LSHR)
+ .addDef(ShrReg)
+ .addUse(DstExt)
+ .addUse(ShiftAmtReg);
+
+ MIRBuilder.buildTrunc(DstReg, ShrReg);
Observer.changedInstr(MI);
return Legalized;
}
-
case TargetOpcode::G_ADD:
case TargetOpcode::G_AND:
case TargetOpcode::G_MUL:
case TargetOpcode::G_OR:
case TargetOpcode::G_XOR:
case TargetOpcode::G_SUB:
- // Perform operation at larger width (any extension is fine here, high bits
+ // Perform operation at larger width (any extension is fines here, high bits
// don't affect the result) and then truncate the result back to the
// original type.
Observer.changingInstr(MI);
@@ -741,16 +1236,24 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
case TargetOpcode::G_SHL:
Observer.changingInstr(MI);
- widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
- // The "number of bits to shift" operand must preserve its value as an
- // unsigned integer:
- widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
- widenScalarDst(MI, WideTy);
+
+ if (TypeIdx == 0) {
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
+ widenScalarDst(MI, WideTy);
+ } else {
+ assert(TypeIdx == 1);
+ // The "number of bits to shift" operand must preserve its value as an
+ // unsigned integer:
+ widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
+ }
+
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_SDIV:
case TargetOpcode::G_SREM:
+ case TargetOpcode::G_SMIN:
+ case TargetOpcode::G_SMAX:
Observer.changingInstr(MI);
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
@@ -759,18 +1262,28 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
return Legalized;
case TargetOpcode::G_ASHR:
+ case TargetOpcode::G_LSHR:
Observer.changingInstr(MI);
- widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
- // The "number of bits to shift" operand must preserve its value as an
- // unsigned integer:
- widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
- widenScalarDst(MI, WideTy);
+
+ if (TypeIdx == 0) {
+ unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ?
+ TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
+
+ widenScalarSrc(MI, WideTy, 1, CvtOp);
+ widenScalarDst(MI, WideTy);
+ } else {
+ assert(TypeIdx == 1);
+ // The "number of bits to shift" operand must preserve its value as an
+ // unsigned integer:
+ widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
+ }
+
Observer.changedInstr(MI);
return Legalized;
-
case TargetOpcode::G_UDIV:
case TargetOpcode::G_UREM:
- case TargetOpcode::G_LSHR:
+ case TargetOpcode::G_UMIN:
+ case TargetOpcode::G_UMAX:
Observer.changingInstr(MI);
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
@@ -788,8 +1301,9 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
widenScalarDst(MI, WideTy);
} else {
+ bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
// Explicit extension is required here since high bits affect the result.
- widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
+ widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
}
Observer.changedInstr(MI);
return Legalized;
@@ -819,23 +1333,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
Observer.changedInstr(MI);
return Legalized;
- case TargetOpcode::G_INSERT:
- if (TypeIdx != 0)
- return UnableToLegalize;
- Observer.changingInstr(MI);
- widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
- widenScalarDst(MI, WideTy);
- Observer.changedInstr(MI);
- return Legalized;
-
case TargetOpcode::G_LOAD:
- // For some types like i24, we might try to widen to i32. To properly handle
- // this we should be using a dedicated extending load, until then avoid
- // trying to legalize.
- if (alignTo(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(), 8) !=
- WideTy.getSizeInBits())
- return UnableToLegalize;
- LLVM_FALLTHROUGH;
case TargetOpcode::G_SEXTLOAD:
case TargetOpcode::G_ZEXTLOAD:
Observer.changingInstr(MI);
@@ -844,12 +1342,19 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
return Legalized;
case TargetOpcode::G_STORE: {
- if (MRI.getType(MI.getOperand(0).getReg()) != LLT::scalar(1) ||
- WideTy != LLT::scalar(8))
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+ if (!isPowerOf2_32(Ty.getSizeInBits()))
return UnableToLegalize;
Observer.changingInstr(MI);
- widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ZEXT);
+
+ unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
+ TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
+ widenScalarSrc(MI, WideTy, 0, ExtType);
+
Observer.changedInstr(MI);
return Legalized;
}
@@ -871,14 +1376,19 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
bool LosesInfo;
switch (WideTy.getSizeInBits()) {
case 32:
- Val.convert(APFloat::IEEEsingle(), APFloat::rmTowardZero, &LosesInfo);
+ Val.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
+ &LosesInfo);
break;
case 64:
- Val.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &LosesInfo);
+ Val.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven,
+ &LosesInfo);
break;
default:
- llvm_unreachable("Unhandled fp widen type");
+ return UnableToLegalize;
}
+
+ assert(!LosesInfo && "extend should always be lossless");
+
Observer.changingInstr(MI);
SrcMO.setFPImm(ConstantFP::get(Ctx, Val));
@@ -894,7 +1404,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
}
case TargetOpcode::G_BRCOND:
Observer.changingInstr(MI);
- widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ANYEXT);
+ widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
Observer.changedInstr(MI);
return Legalized;
@@ -947,23 +1457,103 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
Observer.changedInstr(MI);
return Legalized;
}
- case TargetOpcode::G_EXTRACT_VECTOR_ELT:
+ case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
+ if (TypeIdx == 0) {
+ Register VecReg = MI.getOperand(1).getReg();
+ LLT VecTy = MRI.getType(VecReg);
+ Observer.changingInstr(MI);
+
+ widenScalarSrc(MI, LLT::vector(VecTy.getNumElements(),
+ WideTy.getSizeInBits()),
+ 1, TargetOpcode::G_SEXT);
+
+ widenScalarDst(MI, WideTy, 0);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+
if (TypeIdx != 2)
return UnableToLegalize;
Observer.changingInstr(MI);
widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
Observer.changedInstr(MI);
return Legalized;
-
+ }
+ case TargetOpcode::G_FADD:
+ case TargetOpcode::G_FMUL:
+ case TargetOpcode::G_FSUB:
+ case TargetOpcode::G_FMA:
+ case TargetOpcode::G_FNEG:
+ case TargetOpcode::G_FABS:
+ case TargetOpcode::G_FCANONICALIZE:
+ case TargetOpcode::G_FMINNUM:
+ case TargetOpcode::G_FMAXNUM:
+ case TargetOpcode::G_FMINNUM_IEEE:
+ case TargetOpcode::G_FMAXNUM_IEEE:
+ case TargetOpcode::G_FMINIMUM:
+ case TargetOpcode::G_FMAXIMUM:
+ case TargetOpcode::G_FDIV:
+ case TargetOpcode::G_FREM:
case TargetOpcode::G_FCEIL:
+ case TargetOpcode::G_FFLOOR:
+ case TargetOpcode::G_FCOS:
+ case TargetOpcode::G_FSIN:
+ case TargetOpcode::G_FLOG10:
+ case TargetOpcode::G_FLOG:
+ case TargetOpcode::G_FLOG2:
+ case TargetOpcode::G_FRINT:
+ case TargetOpcode::G_FNEARBYINT:
+ case TargetOpcode::G_FSQRT:
+ case TargetOpcode::G_FEXP:
+ case TargetOpcode::G_FEXP2:
+ case TargetOpcode::G_FPOW:
+ case TargetOpcode::G_INTRINSIC_TRUNC:
+ case TargetOpcode::G_INTRINSIC_ROUND:
+ assert(TypeIdx == 0);
+ Observer.changingInstr(MI);
+
+ for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
+ widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
+
+ widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
+ Observer.changedInstr(MI);
+ return Legalized;
+ case TargetOpcode::G_INTTOPTR:
+ if (TypeIdx != 1)
+ return UnableToLegalize;
+
+ Observer.changingInstr(MI);
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
+ Observer.changedInstr(MI);
+ return Legalized;
+ case TargetOpcode::G_PTRTOINT:
if (TypeIdx != 0)
return UnableToLegalize;
+
Observer.changingInstr(MI);
- widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
- widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
+ widenScalarDst(MI, WideTy, 0);
+ Observer.changedInstr(MI);
+ return Legalized;
+ case TargetOpcode::G_BUILD_VECTOR: {
+ Observer.changingInstr(MI);
+
+ const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
+ for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
+ widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
+
+ // Avoid changing the result vector type if the source element type was
+ // requested.
+ if (TypeIdx == 1) {
+ auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
+ MI.setDesc(TII.get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
+ } else {
+ widenScalarDst(MI, WideTy, 0);
+ }
+
Observer.changedInstr(MI);
return Legalized;
}
+ }
}
LegalizerHelper::LegalizeResult
@@ -976,13 +1566,13 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
return UnableToLegalize;
case TargetOpcode::G_SREM:
case TargetOpcode::G_UREM: {
- unsigned QuotReg = MRI.createGenericVirtualRegister(Ty);
+ Register QuotReg = MRI.createGenericVirtualRegister(Ty);
MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV)
.addDef(QuotReg)
.addUse(MI.getOperand(1).getReg())
.addUse(MI.getOperand(2).getReg());
- unsigned ProdReg = MRI.createGenericVirtualRegister(Ty);
+ Register ProdReg = MRI.createGenericVirtualRegister(Ty);
MIRBuilder.buildMul(ProdReg, QuotReg, MI.getOperand(2).getReg());
MIRBuilder.buildSub(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(),
ProdReg);
@@ -993,10 +1583,10 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
case TargetOpcode::G_UMULO: {
// Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
// result.
- unsigned Res = MI.getOperand(0).getReg();
- unsigned Overflow = MI.getOperand(1).getReg();
- unsigned LHS = MI.getOperand(2).getReg();
- unsigned RHS = MI.getOperand(3).getReg();
+ Register Res = MI.getOperand(0).getReg();
+ Register Overflow = MI.getOperand(1).getReg();
+ Register LHS = MI.getOperand(2).getReg();
+ Register RHS = MI.getOperand(3).getReg();
MIRBuilder.buildMul(Res, LHS, RHS);
@@ -1004,20 +1594,20 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
? TargetOpcode::G_SMULH
: TargetOpcode::G_UMULH;
- unsigned HiPart = MRI.createGenericVirtualRegister(Ty);
+ Register HiPart = MRI.createGenericVirtualRegister(Ty);
MIRBuilder.buildInstr(Opcode)
.addDef(HiPart)
.addUse(LHS)
.addUse(RHS);
- unsigned Zero = MRI.createGenericVirtualRegister(Ty);
+ Register Zero = MRI.createGenericVirtualRegister(Ty);
MIRBuilder.buildConstant(Zero, 0);
// For *signed* multiply, overflow is detected by checking:
// (hi != (lo >> bitwidth-1))
if (Opcode == TargetOpcode::G_SMULH) {
- unsigned Shifted = MRI.createGenericVirtualRegister(Ty);
- unsigned ShiftAmt = MRI.createGenericVirtualRegister(Ty);
+ Register Shifted = MRI.createGenericVirtualRegister(Ty);
+ Register ShiftAmt = MRI.createGenericVirtualRegister(Ty);
MIRBuilder.buildConstant(ShiftAmt, Ty.getSizeInBits() - 1);
MIRBuilder.buildInstr(TargetOpcode::G_ASHR)
.addDef(Shifted)
@@ -1035,7 +1625,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
// represent them.
if (Ty.isVector())
return UnableToLegalize;
- unsigned Res = MI.getOperand(0).getReg();
+ Register Res = MI.getOperand(0).getReg();
Type *ZeroTy;
LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
switch (Ty.getSizeInBits()) {
@@ -1057,10 +1647,10 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
ConstantFP &ZeroForNegation =
*cast<ConstantFP>(ConstantFP::getZeroValueForNegation(ZeroTy));
auto Zero = MIRBuilder.buildFConstant(Ty, ZeroForNegation);
- MIRBuilder.buildInstr(TargetOpcode::G_FSUB)
- .addDef(Res)
- .addUse(Zero->getOperand(0).getReg())
- .addUse(MI.getOperand(1).getReg());
+ Register SubByReg = MI.getOperand(1).getReg();
+ Register ZeroReg = Zero->getOperand(0).getReg();
+ MIRBuilder.buildInstr(TargetOpcode::G_FSUB, {Res}, {ZeroReg, SubByReg},
+ MI.getFlags());
MI.eraseFromParent();
return Legalized;
}
@@ -1070,24 +1660,21 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
// end up with an infinite loop as G_FSUB is used to legalize G_FNEG.
if (LI.getAction({G_FNEG, {Ty}}).Action == Lower)
return UnableToLegalize;
- unsigned Res = MI.getOperand(0).getReg();
- unsigned LHS = MI.getOperand(1).getReg();
- unsigned RHS = MI.getOperand(2).getReg();
- unsigned Neg = MRI.createGenericVirtualRegister(Ty);
+ Register Res = MI.getOperand(0).getReg();
+ Register LHS = MI.getOperand(1).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+ Register Neg = MRI.createGenericVirtualRegister(Ty);
MIRBuilder.buildInstr(TargetOpcode::G_FNEG).addDef(Neg).addUse(RHS);
- MIRBuilder.buildInstr(TargetOpcode::G_FADD)
- .addDef(Res)
- .addUse(LHS)
- .addUse(Neg);
+ MIRBuilder.buildInstr(TargetOpcode::G_FADD, {Res}, {LHS, Neg}, MI.getFlags());
MI.eraseFromParent();
return Legalized;
}
case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
- unsigned OldValRes = MI.getOperand(0).getReg();
- unsigned SuccessRes = MI.getOperand(1).getReg();
- unsigned Addr = MI.getOperand(2).getReg();
- unsigned CmpVal = MI.getOperand(3).getReg();
- unsigned NewVal = MI.getOperand(4).getReg();
+ Register OldValRes = MI.getOperand(0).getReg();
+ Register SuccessRes = MI.getOperand(1).getReg();
+ Register Addr = MI.getOperand(2).getReg();
+ Register CmpVal = MI.getOperand(3).getReg();
+ Register NewVal = MI.getOperand(4).getReg();
MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal,
**MI.memoperands_begin());
MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal);
@@ -1098,8 +1685,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
case TargetOpcode::G_SEXTLOAD:
case TargetOpcode::G_ZEXTLOAD: {
// Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
- unsigned DstReg = MI.getOperand(0).getReg();
- unsigned PtrReg = MI.getOperand(1).getReg();
+ Register DstReg = MI.getOperand(0).getReg();
+ Register PtrReg = MI.getOperand(1).getReg();
LLT DstTy = MRI.getType(DstReg);
auto &MMO = **MI.memoperands_begin();
@@ -1114,8 +1701,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
}
if (DstTy.isScalar()) {
- unsigned TmpReg = MRI.createGenericVirtualRegister(
- LLT::scalar(MMO.getSize() /* in bytes */ * 8));
+ Register TmpReg =
+ MRI.createGenericVirtualRegister(LLT::scalar(MMO.getSizeInBits()));
MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
switch (MI.getOpcode()) {
default:
@@ -1142,15 +1729,27 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
case TargetOpcode::G_CTTZ:
case TargetOpcode::G_CTPOP:
return lowerBitCount(MI, TypeIdx, Ty);
+ case G_UADDO: {
+ Register Res = MI.getOperand(0).getReg();
+ Register CarryOut = MI.getOperand(1).getReg();
+ Register LHS = MI.getOperand(2).getReg();
+ Register RHS = MI.getOperand(3).getReg();
+
+ MIRBuilder.buildAdd(Res, LHS, RHS);
+ MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, RHS);
+
+ MI.eraseFromParent();
+ return Legalized;
+ }
case G_UADDE: {
- unsigned Res = MI.getOperand(0).getReg();
- unsigned CarryOut = MI.getOperand(1).getReg();
- unsigned LHS = MI.getOperand(2).getReg();
- unsigned RHS = MI.getOperand(3).getReg();
- unsigned CarryIn = MI.getOperand(4).getReg();
+ Register Res = MI.getOperand(0).getReg();
+ Register CarryOut = MI.getOperand(1).getReg();
+ Register LHS = MI.getOperand(2).getReg();
+ Register RHS = MI.getOperand(3).getReg();
+ Register CarryIn = MI.getOperand(4).getReg();
- unsigned TmpRes = MRI.createGenericVirtualRegister(Ty);
- unsigned ZExtCarryIn = MRI.createGenericVirtualRegister(Ty);
+ Register TmpRes = MRI.createGenericVirtualRegister(Ty);
+ Register ZExtCarryIn = MRI.createGenericVirtualRegister(Ty);
MIRBuilder.buildAdd(TmpRes, LHS, RHS);
MIRBuilder.buildZExt(ZExtCarryIn, CarryIn);
@@ -1160,113 +1759,1325 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
MI.eraseFromParent();
return Legalized;
}
+ case G_USUBO: {
+ Register Res = MI.getOperand(0).getReg();
+ Register BorrowOut = MI.getOperand(1).getReg();
+ Register LHS = MI.getOperand(2).getReg();
+ Register RHS = MI.getOperand(3).getReg();
+
+ MIRBuilder.buildSub(Res, LHS, RHS);
+ MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS);
+
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case G_USUBE: {
+ Register Res = MI.getOperand(0).getReg();
+ Register BorrowOut = MI.getOperand(1).getReg();
+ Register LHS = MI.getOperand(2).getReg();
+ Register RHS = MI.getOperand(3).getReg();
+ Register BorrowIn = MI.getOperand(4).getReg();
+
+ Register TmpRes = MRI.createGenericVirtualRegister(Ty);
+ Register ZExtBorrowIn = MRI.createGenericVirtualRegister(Ty);
+ Register LHS_EQ_RHS = MRI.createGenericVirtualRegister(LLT::scalar(1));
+ Register LHS_ULT_RHS = MRI.createGenericVirtualRegister(LLT::scalar(1));
+
+ MIRBuilder.buildSub(TmpRes, LHS, RHS);
+ MIRBuilder.buildZExt(ZExtBorrowIn, BorrowIn);
+ MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
+ MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LHS_EQ_RHS, LHS, RHS);
+ MIRBuilder.buildICmp(CmpInst::ICMP_ULT, LHS_ULT_RHS, LHS, RHS);
+ MIRBuilder.buildSelect(BorrowOut, LHS_EQ_RHS, BorrowIn, LHS_ULT_RHS);
+
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case G_UITOFP:
+ return lowerUITOFP(MI, TypeIdx, Ty);
+ case G_SITOFP:
+ return lowerSITOFP(MI, TypeIdx, Ty);
+ case G_SMIN:
+ case G_SMAX:
+ case G_UMIN:
+ case G_UMAX:
+ return lowerMinMax(MI, TypeIdx, Ty);
+ case G_FCOPYSIGN:
+ return lowerFCopySign(MI, TypeIdx, Ty);
+ case G_FMINNUM:
+ case G_FMAXNUM:
+ return lowerFMinNumMaxNum(MI);
}
}
+LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorImplicitDef(
+ MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) {
+ SmallVector<Register, 2> DstRegs;
+
+ unsigned NarrowSize = NarrowTy.getSizeInBits();
+ Register DstReg = MI.getOperand(0).getReg();
+ unsigned Size = MRI.getType(DstReg).getSizeInBits();
+ int NumParts = Size / NarrowSize;
+ // FIXME: Don't know how to handle the situation where the small vectors
+ // aren't all the same size yet.
+ if (Size % NarrowSize != 0)
+ return UnableToLegalize;
+
+ for (int i = 0; i < NumParts; ++i) {
+ Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
+ MIRBuilder.buildUndef(TmpReg);
+ DstRegs.push_back(TmpReg);
+ }
+
+ if (NarrowTy.isVector())
+ MIRBuilder.buildConcatVectors(DstReg, DstRegs);
+ else
+ MIRBuilder.buildBuildVector(DstReg, DstRegs);
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
LegalizerHelper::LegalizeResult
-LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
- LLT NarrowTy) {
- // FIXME: Don't know how to handle secondary types yet.
+LegalizerHelper::fewerElementsVectorBasic(MachineInstr &MI, unsigned TypeIdx,
+ LLT NarrowTy) {
+ const unsigned Opc = MI.getOpcode();
+ const unsigned NumOps = MI.getNumOperands() - 1;
+ const unsigned NarrowSize = NarrowTy.getSizeInBits();
+ const Register DstReg = MI.getOperand(0).getReg();
+ const unsigned Flags = MI.getFlags();
+ const LLT DstTy = MRI.getType(DstReg);
+ const unsigned Size = DstTy.getSizeInBits();
+ const int NumParts = Size / NarrowSize;
+ const LLT EltTy = DstTy.getElementType();
+ const unsigned EltSize = EltTy.getSizeInBits();
+ const unsigned BitsForNumParts = NarrowSize * NumParts;
+
+ // Check if we have any leftovers. If we do, then only handle the case where
+ // the leftover is one element.
+ if (BitsForNumParts != Size && BitsForNumParts + EltSize != Size)
+ return UnableToLegalize;
+
+ if (BitsForNumParts != Size) {
+ Register AccumDstReg = MRI.createGenericVirtualRegister(DstTy);
+ MIRBuilder.buildUndef(AccumDstReg);
+
+ // Handle the pieces which evenly divide into the requested type with
+ // extract/op/insert sequence.
+ for (unsigned Offset = 0; Offset < BitsForNumParts; Offset += NarrowSize) {
+ SmallVector<SrcOp, 4> SrcOps;
+ for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
+ Register PartOpReg = MRI.createGenericVirtualRegister(NarrowTy);
+ MIRBuilder.buildExtract(PartOpReg, MI.getOperand(I).getReg(), Offset);
+ SrcOps.push_back(PartOpReg);
+ }
+
+ Register PartDstReg = MRI.createGenericVirtualRegister(NarrowTy);
+ MIRBuilder.buildInstr(Opc, {PartDstReg}, SrcOps, Flags);
+
+ Register PartInsertReg = MRI.createGenericVirtualRegister(DstTy);
+ MIRBuilder.buildInsert(PartInsertReg, AccumDstReg, PartDstReg, Offset);
+ AccumDstReg = PartInsertReg;
+ }
+
+ // Handle the remaining element sized leftover piece.
+ SmallVector<SrcOp, 4> SrcOps;
+ for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
+ Register PartOpReg = MRI.createGenericVirtualRegister(EltTy);
+ MIRBuilder.buildExtract(PartOpReg, MI.getOperand(I).getReg(),
+ BitsForNumParts);
+ SrcOps.push_back(PartOpReg);
+ }
+
+ Register PartDstReg = MRI.createGenericVirtualRegister(EltTy);
+ MIRBuilder.buildInstr(Opc, {PartDstReg}, SrcOps, Flags);
+ MIRBuilder.buildInsert(DstReg, AccumDstReg, PartDstReg, BitsForNumParts);
+ MI.eraseFromParent();
+
+ return Legalized;
+ }
+
+ SmallVector<Register, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs;
+
+ extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src0Regs);
+
+ if (NumOps >= 2)
+ extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src1Regs);
+
+ if (NumOps >= 3)
+ extractParts(MI.getOperand(3).getReg(), NarrowTy, NumParts, Src2Regs);
+
+ for (int i = 0; i < NumParts; ++i) {
+ Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
+
+ if (NumOps == 1)
+ MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i]}, Flags);
+ else if (NumOps == 2) {
+ MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i], Src1Regs[i]}, Flags);
+ } else if (NumOps == 3) {
+ MIRBuilder.buildInstr(Opc, {DstReg},
+ {Src0Regs[i], Src1Regs[i], Src2Regs[i]}, Flags);
+ }
+
+ DstRegs.push_back(DstReg);
+ }
+
+ if (NarrowTy.isVector())
+ MIRBuilder.buildConcatVectors(DstReg, DstRegs);
+ else
+ MIRBuilder.buildBuildVector(DstReg, DstRegs);
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+// Handle splitting vector operations which need to have the same number of
+// elements in each type index, but each type index may have a different element
+// type.
+//
+// e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
+// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
+// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
+//
+// Also handles some irregular breakdown cases, e.g.
+// e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
+// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
+// s64 = G_SHL s64, s32
+LegalizerHelper::LegalizeResult
+LegalizerHelper::fewerElementsVectorMultiEltType(
+ MachineInstr &MI, unsigned TypeIdx, LLT NarrowTyArg) {
if (TypeIdx != 0)
return UnableToLegalize;
- MIRBuilder.setInstr(MI);
- switch (MI.getOpcode()) {
- default:
+ const LLT NarrowTy0 = NarrowTyArg;
+ const unsigned NewNumElts =
+ NarrowTy0.isVector() ? NarrowTy0.getNumElements() : 1;
+
+ const Register DstReg = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ LLT LeftoverTy0;
+
+ // All of the operands need to have the same number of elements, so if we can
+ // determine a type breakdown for the result type, we can for all of the
+ // source types.
+ int NumParts = getNarrowTypeBreakDown(DstTy, NarrowTy0, LeftoverTy0).first;
+ if (NumParts < 0)
return UnableToLegalize;
- case TargetOpcode::G_IMPLICIT_DEF: {
- SmallVector<unsigned, 2> DstRegs;
- unsigned NarrowSize = NarrowTy.getSizeInBits();
- unsigned DstReg = MI.getOperand(0).getReg();
- unsigned Size = MRI.getType(DstReg).getSizeInBits();
- int NumParts = Size / NarrowSize;
- // FIXME: Don't know how to handle the situation where the small vectors
- // aren't all the same size yet.
- if (Size % NarrowSize != 0)
+ SmallVector<MachineInstrBuilder, 4> NewInsts;
+
+ SmallVector<Register, 4> DstRegs, LeftoverDstRegs;
+ SmallVector<Register, 4> PartRegs, LeftoverRegs;
+
+ for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
+ LLT LeftoverTy;
+ Register SrcReg = MI.getOperand(I).getReg();
+ LLT SrcTyI = MRI.getType(SrcReg);
+ LLT NarrowTyI = LLT::scalarOrVector(NewNumElts, SrcTyI.getScalarType());
+ LLT LeftoverTyI;
+
+ // Split this operand into the requested typed registers, and any leftover
+ // required to reproduce the original type.
+ if (!extractParts(SrcReg, SrcTyI, NarrowTyI, LeftoverTyI, PartRegs,
+ LeftoverRegs))
return UnableToLegalize;
- for (int i = 0; i < NumParts; ++i) {
- unsigned TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
- MIRBuilder.buildUndef(TmpReg);
- DstRegs.push_back(TmpReg);
+ if (I == 1) {
+ // For the first operand, create an instruction for each part and setup
+ // the result.
+ for (Register PartReg : PartRegs) {
+ Register PartDstReg = MRI.createGenericVirtualRegister(NarrowTy0);
+ NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode())
+ .addDef(PartDstReg)
+ .addUse(PartReg));
+ DstRegs.push_back(PartDstReg);
+ }
+
+ for (Register LeftoverReg : LeftoverRegs) {
+ Register PartDstReg = MRI.createGenericVirtualRegister(LeftoverTy0);
+ NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode())
+ .addDef(PartDstReg)
+ .addUse(LeftoverReg));
+ LeftoverDstRegs.push_back(PartDstReg);
+ }
+ } else {
+ assert(NewInsts.size() == PartRegs.size() + LeftoverRegs.size());
+
+ // Add the newly created operand splits to the existing instructions. The
+ // odd-sized pieces are ordered after the requested NarrowTyArg sized
+ // pieces.
+ unsigned InstCount = 0;
+ for (unsigned J = 0, JE = PartRegs.size(); J != JE; ++J)
+ NewInsts[InstCount++].addUse(PartRegs[J]);
+ for (unsigned J = 0, JE = LeftoverRegs.size(); J != JE; ++J)
+ NewInsts[InstCount++].addUse(LeftoverRegs[J]);
}
- if (NarrowTy.isVector())
- MIRBuilder.buildConcatVectors(DstReg, DstRegs);
- else
- MIRBuilder.buildBuildVector(DstReg, DstRegs);
+ PartRegs.clear();
+ LeftoverRegs.clear();
+ }
- MI.eraseFromParent();
- return Legalized;
+ // Insert the newly built operations and rebuild the result register.
+ for (auto &MIB : NewInsts)
+ MIRBuilder.insertInstr(MIB);
+
+ insertParts(DstReg, DstTy, NarrowTy0, DstRegs, LeftoverTy0, LeftoverDstRegs);
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx,
+ LLT NarrowTy) {
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ LLT SrcTy = MRI.getType(SrcReg);
+
+ LLT NarrowTy0 = NarrowTy;
+ LLT NarrowTy1;
+ unsigned NumParts;
+
+ if (NarrowTy.isVector()) {
+ // Uneven breakdown not handled.
+ NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
+ if (NumParts * NarrowTy.getNumElements() != DstTy.getNumElements())
+ return UnableToLegalize;
+
+ NarrowTy1 = LLT::vector(NumParts, SrcTy.getElementType().getSizeInBits());
+ } else {
+ NumParts = DstTy.getNumElements();
+ NarrowTy1 = SrcTy.getElementType();
}
- case TargetOpcode::G_ADD: {
- unsigned NarrowSize = NarrowTy.getSizeInBits();
- unsigned DstReg = MI.getOperand(0).getReg();
- unsigned Size = MRI.getType(DstReg).getSizeInBits();
- int NumParts = Size / NarrowSize;
+
+ SmallVector<Register, 4> SrcRegs, DstRegs;
+ extractParts(SrcReg, NarrowTy1, NumParts, SrcRegs);
+
+ for (unsigned I = 0; I < NumParts; ++I) {
+ Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
+ MachineInstr *NewInst = MIRBuilder.buildInstr(MI.getOpcode())
+ .addDef(DstReg)
+ .addUse(SrcRegs[I]);
+
+ NewInst->setFlags(MI.getFlags());
+ DstRegs.push_back(DstReg);
+ }
+
+ if (NarrowTy.isVector())
+ MIRBuilder.buildConcatVectors(DstReg, DstRegs);
+ else
+ MIRBuilder.buildBuildVector(DstReg, DstRegs);
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::fewerElementsVectorCmp(MachineInstr &MI, unsigned TypeIdx,
+ LLT NarrowTy) {
+ Register DstReg = MI.getOperand(0).getReg();
+ Register Src0Reg = MI.getOperand(2).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ LLT SrcTy = MRI.getType(Src0Reg);
+
+ unsigned NumParts;
+ LLT NarrowTy0, NarrowTy1;
+
+ if (TypeIdx == 0) {
+ unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
+ unsigned OldElts = DstTy.getNumElements();
+
+ NarrowTy0 = NarrowTy;
+ NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : DstTy.getNumElements();
+ NarrowTy1 = NarrowTy.isVector() ?
+ LLT::vector(NarrowTy.getNumElements(), SrcTy.getScalarSizeInBits()) :
+ SrcTy.getElementType();
+
+ } else {
+ unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
+ unsigned OldElts = SrcTy.getNumElements();
+
+ NumParts = NarrowTy.isVector() ? (OldElts / NewElts) :
+ NarrowTy.getNumElements();
+ NarrowTy0 = LLT::vector(NarrowTy.getNumElements(),
+ DstTy.getScalarSizeInBits());
+ NarrowTy1 = NarrowTy;
+ }
+
+ // FIXME: Don't know how to handle the situation where the small vectors
+ // aren't all the same size yet.
+ if (NarrowTy1.isVector() &&
+ NarrowTy1.getNumElements() * NumParts != DstTy.getNumElements())
+ return UnableToLegalize;
+
+ CmpInst::Predicate Pred
+ = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
+
+ SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
+ extractParts(MI.getOperand(2).getReg(), NarrowTy1, NumParts, Src1Regs);
+ extractParts(MI.getOperand(3).getReg(), NarrowTy1, NumParts, Src2Regs);
+
+ for (unsigned I = 0; I < NumParts; ++I) {
+ Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
+ DstRegs.push_back(DstReg);
+
+ if (MI.getOpcode() == TargetOpcode::G_ICMP)
+ MIRBuilder.buildICmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
+ else {
+ MachineInstr *NewCmp
+ = MIRBuilder.buildFCmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
+ NewCmp->setFlags(MI.getFlags());
+ }
+ }
+
+ if (NarrowTy1.isVector())
+ MIRBuilder.buildConcatVectors(DstReg, DstRegs);
+ else
+ MIRBuilder.buildBuildVector(DstReg, DstRegs);
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::fewerElementsVectorSelect(MachineInstr &MI, unsigned TypeIdx,
+ LLT NarrowTy) {
+ Register DstReg = MI.getOperand(0).getReg();
+ Register CondReg = MI.getOperand(1).getReg();
+
+ unsigned NumParts = 0;
+ LLT NarrowTy0, NarrowTy1;
+
+ LLT DstTy = MRI.getType(DstReg);
+ LLT CondTy = MRI.getType(CondReg);
+ unsigned Size = DstTy.getSizeInBits();
+
+ assert(TypeIdx == 0 || CondTy.isVector());
+
+ if (TypeIdx == 0) {
+ NarrowTy0 = NarrowTy;
+ NarrowTy1 = CondTy;
+
+ unsigned NarrowSize = NarrowTy0.getSizeInBits();
// FIXME: Don't know how to handle the situation where the small vectors
// aren't all the same size yet.
if (Size % NarrowSize != 0)
return UnableToLegalize;
- SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs;
- extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
- extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
+ NumParts = Size / NarrowSize;
- for (int i = 0; i < NumParts; ++i) {
- unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
- MIRBuilder.buildAdd(DstReg, Src1Regs[i], Src2Regs[i]);
- DstRegs.push_back(DstReg);
+ // Need to break down the condition type
+ if (CondTy.isVector()) {
+ if (CondTy.getNumElements() == NumParts)
+ NarrowTy1 = CondTy.getElementType();
+ else
+ NarrowTy1 = LLT::vector(CondTy.getNumElements() / NumParts,
+ CondTy.getScalarSizeInBits());
+ }
+ } else {
+ NumParts = CondTy.getNumElements();
+ if (NarrowTy.isVector()) {
+ // TODO: Handle uneven breakdown.
+ if (NumParts * NarrowTy.getNumElements() != CondTy.getNumElements())
+ return UnableToLegalize;
+
+ return UnableToLegalize;
+ } else {
+ NarrowTy0 = DstTy.getElementType();
+ NarrowTy1 = NarrowTy;
}
+ }
+
+ SmallVector<Register, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs;
+ if (CondTy.isVector())
+ extractParts(MI.getOperand(1).getReg(), NarrowTy1, NumParts, Src0Regs);
+
+ extractParts(MI.getOperand(2).getReg(), NarrowTy0, NumParts, Src1Regs);
+ extractParts(MI.getOperand(3).getReg(), NarrowTy0, NumParts, Src2Regs);
+ for (unsigned i = 0; i < NumParts; ++i) {
+ Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
+ MIRBuilder.buildSelect(DstReg, CondTy.isVector() ? Src0Regs[i] : CondReg,
+ Src1Regs[i], Src2Regs[i]);
+ DstRegs.push_back(DstReg);
+ }
+
+ if (NarrowTy0.isVector())
MIRBuilder.buildConcatVectors(DstReg, DstRegs);
- MI.eraseFromParent();
- return Legalized;
+ else
+ MIRBuilder.buildBuildVector(DstReg, DstRegs);
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::fewerElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
+ LLT NarrowTy) {
+ const Register DstReg = MI.getOperand(0).getReg();
+ LLT PhiTy = MRI.getType(DstReg);
+ LLT LeftoverTy;
+
+ // All of the operands need to have the same number of elements, so if we can
+ // determine a type breakdown for the result type, we can for all of the
+ // source types.
+ int NumParts, NumLeftover;
+ std::tie(NumParts, NumLeftover)
+ = getNarrowTypeBreakDown(PhiTy, NarrowTy, LeftoverTy);
+ if (NumParts < 0)
+ return UnableToLegalize;
+
+ SmallVector<Register, 4> DstRegs, LeftoverDstRegs;
+ SmallVector<MachineInstrBuilder, 4> NewInsts;
+
+ const int TotalNumParts = NumParts + NumLeftover;
+
+ // Insert the new phis in the result block first.
+ for (int I = 0; I != TotalNumParts; ++I) {
+ LLT Ty = I < NumParts ? NarrowTy : LeftoverTy;
+ Register PartDstReg = MRI.createGenericVirtualRegister(Ty);
+ NewInsts.push_back(MIRBuilder.buildInstr(TargetOpcode::G_PHI)
+ .addDef(PartDstReg));
+ if (I < NumParts)
+ DstRegs.push_back(PartDstReg);
+ else
+ LeftoverDstRegs.push_back(PartDstReg);
}
- case TargetOpcode::G_LOAD:
- case TargetOpcode::G_STORE: {
- bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
- unsigned ValReg = MI.getOperand(0).getReg();
- unsigned AddrReg = MI.getOperand(1).getReg();
- unsigned NarrowSize = NarrowTy.getSizeInBits();
- unsigned Size = MRI.getType(ValReg).getSizeInBits();
- unsigned NumParts = Size / NarrowSize;
-
- SmallVector<unsigned, 8> NarrowRegs;
- if (!IsLoad)
- extractParts(ValReg, NarrowTy, NumParts, NarrowRegs);
-
- const LLT OffsetTy =
- LLT::scalar(MRI.getType(AddrReg).getScalarSizeInBits());
- MachineFunction &MF = *MI.getMF();
- MachineMemOperand *MMO = *MI.memoperands_begin();
- for (unsigned Idx = 0; Idx < NumParts; ++Idx) {
- unsigned Adjustment = Idx * NarrowTy.getSizeInBits() / 8;
- unsigned Alignment = MinAlign(MMO->getAlignment(), Adjustment);
- unsigned NewAddrReg = 0;
- MIRBuilder.materializeGEP(NewAddrReg, AddrReg, OffsetTy, Adjustment);
- MachineMemOperand &NewMMO = *MF.getMachineMemOperand(
- MMO->getPointerInfo().getWithOffset(Adjustment), MMO->getFlags(),
- NarrowTy.getSizeInBits() / 8, Alignment);
+
+ MachineBasicBlock *MBB = MI.getParent();
+ MIRBuilder.setInsertPt(*MBB, MBB->getFirstNonPHI());
+ insertParts(DstReg, PhiTy, NarrowTy, DstRegs, LeftoverTy, LeftoverDstRegs);
+
+ SmallVector<Register, 4> PartRegs, LeftoverRegs;
+
+ // Insert code to extract the incoming values in each predecessor block.
+ for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
+ PartRegs.clear();
+ LeftoverRegs.clear();
+
+ Register SrcReg = MI.getOperand(I).getReg();
+ MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
+ MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
+
+ LLT Unused;
+ if (!extractParts(SrcReg, PhiTy, NarrowTy, Unused, PartRegs,
+ LeftoverRegs))
+ return UnableToLegalize;
+
+ // Add the newly created operand splits to the existing instructions. The
+ // odd-sized pieces are ordered after the requested NarrowTyArg sized
+ // pieces.
+ for (int J = 0; J != TotalNumParts; ++J) {
+ MachineInstrBuilder MIB = NewInsts[J];
+ MIB.addUse(J < NumParts ? PartRegs[J] : LeftoverRegs[J - NumParts]);
+ MIB.addMBB(&OpMBB);
+ }
+ }
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx,
+ LLT NarrowTy) {
+ // FIXME: Don't know how to handle secondary types yet.
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ MachineMemOperand *MMO = *MI.memoperands_begin();
+
+ // This implementation doesn't work for atomics. Give up instead of doing
+ // something invalid.
+ if (MMO->getOrdering() != AtomicOrdering::NotAtomic ||
+ MMO->getFailureOrdering() != AtomicOrdering::NotAtomic)
+ return UnableToLegalize;
+
+ bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
+ Register ValReg = MI.getOperand(0).getReg();
+ Register AddrReg = MI.getOperand(1).getReg();
+ LLT ValTy = MRI.getType(ValReg);
+
+ int NumParts = -1;
+ int NumLeftover = -1;
+ LLT LeftoverTy;
+ SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
+ if (IsLoad) {
+ std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
+ } else {
+ if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
+ NarrowLeftoverRegs)) {
+ NumParts = NarrowRegs.size();
+ NumLeftover = NarrowLeftoverRegs.size();
+ }
+ }
+
+ if (NumParts == -1)
+ return UnableToLegalize;
+
+ const LLT OffsetTy = LLT::scalar(MRI.getType(AddrReg).getScalarSizeInBits());
+
+ unsigned TotalSize = ValTy.getSizeInBits();
+
+ // Split the load/store into PartTy sized pieces starting at Offset. If this
+ // is a load, return the new registers in ValRegs. For a store, each elements
+ // of ValRegs should be PartTy. Returns the next offset that needs to be
+ // handled.
+ auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
+ unsigned Offset) -> unsigned {
+ MachineFunction &MF = MIRBuilder.getMF();
+ unsigned PartSize = PartTy.getSizeInBits();
+ for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
+ Offset += PartSize, ++Idx) {
+ unsigned ByteSize = PartSize / 8;
+ unsigned ByteOffset = Offset / 8;
+ Register NewAddrReg;
+
+ MIRBuilder.materializeGEP(NewAddrReg, AddrReg, OffsetTy, ByteOffset);
+
+ MachineMemOperand *NewMMO =
+ MF.getMachineMemOperand(MMO, ByteOffset, ByteSize);
+
if (IsLoad) {
- unsigned Dst = MRI.createGenericVirtualRegister(NarrowTy);
- NarrowRegs.push_back(Dst);
- MIRBuilder.buildLoad(Dst, NewAddrReg, NewMMO);
+ Register Dst = MRI.createGenericVirtualRegister(PartTy);
+ ValRegs.push_back(Dst);
+ MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
} else {
- MIRBuilder.buildStore(NarrowRegs[Idx], NewAddrReg, NewMMO);
+ MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
}
}
- if (IsLoad) {
- if (NarrowTy.isVector())
- MIRBuilder.buildConcatVectors(ValReg, NarrowRegs);
- else
- MIRBuilder.buildBuildVector(ValReg, NarrowRegs);
- }
+
+ return Offset;
+ };
+
+ unsigned HandledOffset = splitTypePieces(NarrowTy, NarrowRegs, 0);
+
+ // Handle the rest of the register if this isn't an even type breakdown.
+ if (LeftoverTy.isValid())
+ splitTypePieces(LeftoverTy, NarrowLeftoverRegs, HandledOffset);
+
+ if (IsLoad) {
+ insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
+ LeftoverTy, NarrowLeftoverRegs);
+ }
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
+ LLT NarrowTy) {
+ using namespace TargetOpcode;
+
+ MIRBuilder.setInstr(MI);
+ switch (MI.getOpcode()) {
+ case G_IMPLICIT_DEF:
+ return fewerElementsVectorImplicitDef(MI, TypeIdx, NarrowTy);
+ case G_AND:
+ case G_OR:
+ case G_XOR:
+ case G_ADD:
+ case G_SUB:
+ case G_MUL:
+ case G_SMULH:
+ case G_UMULH:
+ case G_FADD:
+ case G_FMUL:
+ case G_FSUB:
+ case G_FNEG:
+ case G_FABS:
+ case G_FCANONICALIZE:
+ case G_FDIV:
+ case G_FREM:
+ case G_FMA:
+ case G_FPOW:
+ case G_FEXP:
+ case G_FEXP2:
+ case G_FLOG:
+ case G_FLOG2:
+ case G_FLOG10:
+ case G_FNEARBYINT:
+ case G_FCEIL:
+ case G_FFLOOR:
+ case G_FRINT:
+ case G_INTRINSIC_ROUND:
+ case G_INTRINSIC_TRUNC:
+ case G_FCOS:
+ case G_FSIN:
+ case G_FSQRT:
+ case G_BSWAP:
+ case G_SDIV:
+ case G_SMIN:
+ case G_SMAX:
+ case G_UMIN:
+ case G_UMAX:
+ case G_FMINNUM:
+ case G_FMAXNUM:
+ case G_FMINNUM_IEEE:
+ case G_FMAXNUM_IEEE:
+ case G_FMINIMUM:
+ case G_FMAXIMUM:
+ return fewerElementsVectorBasic(MI, TypeIdx, NarrowTy);
+ case G_SHL:
+ case G_LSHR:
+ case G_ASHR:
+ case G_CTLZ:
+ case G_CTLZ_ZERO_UNDEF:
+ case G_CTTZ:
+ case G_CTTZ_ZERO_UNDEF:
+ case G_CTPOP:
+ case G_FCOPYSIGN:
+ return fewerElementsVectorMultiEltType(MI, TypeIdx, NarrowTy);
+ case G_ZEXT:
+ case G_SEXT:
+ case G_ANYEXT:
+ case G_FPEXT:
+ case G_FPTRUNC:
+ case G_SITOFP:
+ case G_UITOFP:
+ case G_FPTOSI:
+ case G_FPTOUI:
+ case G_INTTOPTR:
+ case G_PTRTOINT:
+ case G_ADDRSPACE_CAST:
+ return fewerElementsVectorCasts(MI, TypeIdx, NarrowTy);
+ case G_ICMP:
+ case G_FCMP:
+ return fewerElementsVectorCmp(MI, TypeIdx, NarrowTy);
+ case G_SELECT:
+ return fewerElementsVectorSelect(MI, TypeIdx, NarrowTy);
+ case G_PHI:
+ return fewerElementsVectorPhi(MI, TypeIdx, NarrowTy);
+ case G_LOAD:
+ case G_STORE:
+ return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
+ default:
+ return UnableToLegalize;
+ }
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt,
+ const LLT HalfTy, const LLT AmtTy) {
+
+ Register InL = MRI.createGenericVirtualRegister(HalfTy);
+ Register InH = MRI.createGenericVirtualRegister(HalfTy);
+ MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1).getReg());
+
+ if (Amt.isNullValue()) {
+ MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {InL, InH});
MI.eraseFromParent();
return Legalized;
}
+
+ LLT NVT = HalfTy;
+ unsigned NVTBits = HalfTy.getSizeInBits();
+ unsigned VTBits = 2 * NVTBits;
+
+ SrcOp Lo(Register(0)), Hi(Register(0));
+ if (MI.getOpcode() == TargetOpcode::G_SHL) {
+ if (Amt.ugt(VTBits)) {
+ Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
+ } else if (Amt.ugt(NVTBits)) {
+ Lo = MIRBuilder.buildConstant(NVT, 0);
+ Hi = MIRBuilder.buildShl(NVT, InL,
+ MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
+ } else if (Amt == NVTBits) {
+ Lo = MIRBuilder.buildConstant(NVT, 0);
+ Hi = InL;
+ } else {
+ Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
+ auto OrLHS =
+ MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
+ auto OrRHS = MIRBuilder.buildLShr(
+ NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
+ Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
+ }
+ } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
+ if (Amt.ugt(VTBits)) {
+ Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
+ } else if (Amt.ugt(NVTBits)) {
+ Lo = MIRBuilder.buildLShr(NVT, InH,
+ MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
+ Hi = MIRBuilder.buildConstant(NVT, 0);
+ } else if (Amt == NVTBits) {
+ Lo = InH;
+ Hi = MIRBuilder.buildConstant(NVT, 0);
+ } else {
+ auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
+
+ auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
+ auto OrRHS = MIRBuilder.buildShl(
+ NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
+
+ Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
+ Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
+ }
+ } else {
+ if (Amt.ugt(VTBits)) {
+ Hi = Lo = MIRBuilder.buildAShr(
+ NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
+ } else if (Amt.ugt(NVTBits)) {
+ Lo = MIRBuilder.buildAShr(NVT, InH,
+ MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
+ Hi = MIRBuilder.buildAShr(NVT, InH,
+ MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
+ } else if (Amt == NVTBits) {
+ Lo = InH;
+ Hi = MIRBuilder.buildAShr(NVT, InH,
+ MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
+ } else {
+ auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
+
+ auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
+ auto OrRHS = MIRBuilder.buildShl(
+ NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
+
+ Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
+ Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
+ }
}
+
+ MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {Lo.getReg(), Hi.getReg()});
+ MI.eraseFromParent();
+
+ return Legalized;
+}
+
+// TODO: Optimize if constant shift amount.
+LegalizerHelper::LegalizeResult
+LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx,
+ LLT RequestedTy) {
+ if (TypeIdx == 1) {
+ Observer.changingInstr(MI);
+ narrowScalarSrc(MI, RequestedTy, 2);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ if (DstTy.isVector())
+ return UnableToLegalize;
+
+ Register Amt = MI.getOperand(2).getReg();
+ LLT ShiftAmtTy = MRI.getType(Amt);
+ const unsigned DstEltSize = DstTy.getScalarSizeInBits();
+ if (DstEltSize % 2 != 0)
+ return UnableToLegalize;
+
+ // Ignore the input type. We can only go to exactly half the size of the
+ // input. If that isn't small enough, the resulting pieces will be further
+ // legalized.
+ const unsigned NewBitSize = DstEltSize / 2;
+ const LLT HalfTy = LLT::scalar(NewBitSize);
+ const LLT CondTy = LLT::scalar(1);
+
+ if (const MachineInstr *KShiftAmt =
+ getOpcodeDef(TargetOpcode::G_CONSTANT, Amt, MRI)) {
+ return narrowScalarShiftByConstant(
+ MI, KShiftAmt->getOperand(1).getCImm()->getValue(), HalfTy, ShiftAmtTy);
+ }
+
+ // TODO: Expand with known bits.
+
+ // Handle the fully general expansion by an unknown amount.
+ auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
+
+ Register InL = MRI.createGenericVirtualRegister(HalfTy);
+ Register InH = MRI.createGenericVirtualRegister(HalfTy);
+ MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1).getReg());
+
+ auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
+ auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
+
+ auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
+ auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
+ auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
+
+ Register ResultRegs[2];
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_SHL: {
+ // Short: ShAmt < NewBitSize
+ auto LoS = MIRBuilder.buildShl(HalfTy, InH, Amt);
+
+ auto OrLHS = MIRBuilder.buildShl(HalfTy, InH, Amt);
+ auto OrRHS = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
+ auto HiS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS);
+
+ // Long: ShAmt >= NewBitSize
+ auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero.
+ auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
+
+ auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
+ auto Hi = MIRBuilder.buildSelect(
+ HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
+
+ ResultRegs[0] = Lo.getReg(0);
+ ResultRegs[1] = Hi.getReg(0);
+ break;
+ }
+ case TargetOpcode::G_LSHR: {
+ // Short: ShAmt < NewBitSize
+ auto HiS = MIRBuilder.buildLShr(HalfTy, InH, Amt);
+
+ auto OrLHS = MIRBuilder.buildLShr(HalfTy, InL, Amt);
+ auto OrRHS = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
+ auto LoS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS);
+
+ // Long: ShAmt >= NewBitSize
+ auto HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero.
+ auto LoL = MIRBuilder.buildLShr(HalfTy, InH, AmtExcess); // Lo from Hi part.
+
+ auto Lo = MIRBuilder.buildSelect(
+ HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
+ auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
+
+ ResultRegs[0] = Lo.getReg(0);
+ ResultRegs[1] = Hi.getReg(0);
+ break;
+ }
+ case TargetOpcode::G_ASHR: {
+ // Short: ShAmt < NewBitSize
+ auto HiS = MIRBuilder.buildAShr(HalfTy, InH, Amt);
+
+ auto OrLHS = MIRBuilder.buildLShr(HalfTy, InL, Amt);
+ auto OrRHS = MIRBuilder.buildLShr(HalfTy, InH, AmtLack);
+ auto LoS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS);
+
+ // Long: ShAmt >= NewBitSize
+
+ // Sign of Hi part.
+ auto HiL = MIRBuilder.buildAShr(
+ HalfTy, InH, MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1));
+
+ auto LoL = MIRBuilder.buildAShr(HalfTy, InH, AmtExcess); // Lo from Hi part.
+
+ auto Lo = MIRBuilder.buildSelect(
+ HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
+
+ auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
+
+ ResultRegs[0] = Lo.getReg(0);
+ ResultRegs[1] = Hi.getReg(0);
+ break;
+ }
+ default:
+ llvm_unreachable("not a shift");
+ }
+
+ MIRBuilder.buildMerge(DstReg, ResultRegs);
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
+ LLT MoreTy) {
+ assert(TypeIdx == 0 && "Expecting only Idx 0");
+
+ Observer.changingInstr(MI);
+ for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
+ MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
+ MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
+ moreElementsVectorSrc(MI, MoreTy, I);
+ }
+
+ MachineBasicBlock &MBB = *MI.getParent();
+ MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
+ moreElementsVectorDst(MI, MoreTy, 0);
+ Observer.changedInstr(MI);
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
+ LLT MoreTy) {
+ MIRBuilder.setInstr(MI);
+ unsigned Opc = MI.getOpcode();
+ switch (Opc) {
+ case TargetOpcode::G_IMPLICIT_DEF: {
+ Observer.changingInstr(MI);
+ moreElementsVectorDst(MI, MoreTy, 0);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+ case TargetOpcode::G_AND:
+ case TargetOpcode::G_OR:
+ case TargetOpcode::G_XOR:
+ case TargetOpcode::G_SMIN:
+ case TargetOpcode::G_SMAX:
+ case TargetOpcode::G_UMIN:
+ case TargetOpcode::G_UMAX: {
+ Observer.changingInstr(MI);
+ moreElementsVectorSrc(MI, MoreTy, 1);
+ moreElementsVectorSrc(MI, MoreTy, 2);
+ moreElementsVectorDst(MI, MoreTy, 0);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+ case TargetOpcode::G_EXTRACT:
+ if (TypeIdx != 1)
+ return UnableToLegalize;
+ Observer.changingInstr(MI);
+ moreElementsVectorSrc(MI, MoreTy, 1);
+ Observer.changedInstr(MI);
+ return Legalized;
+ case TargetOpcode::G_INSERT:
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+ Observer.changingInstr(MI);
+ moreElementsVectorSrc(MI, MoreTy, 1);
+ moreElementsVectorDst(MI, MoreTy, 0);
+ Observer.changedInstr(MI);
+ return Legalized;
+ case TargetOpcode::G_SELECT:
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+ if (MRI.getType(MI.getOperand(1).getReg()).isVector())
+ return UnableToLegalize;
+
+ Observer.changingInstr(MI);
+ moreElementsVectorSrc(MI, MoreTy, 2);
+ moreElementsVectorSrc(MI, MoreTy, 3);
+ moreElementsVectorDst(MI, MoreTy, 0);
+ Observer.changedInstr(MI);
+ return Legalized;
+ case TargetOpcode::G_PHI:
+ return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
+ default:
+ return UnableToLegalize;
+ }
+}
+
+void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
+ ArrayRef<Register> Src1Regs,
+ ArrayRef<Register> Src2Regs,
+ LLT NarrowTy) {
+ MachineIRBuilder &B = MIRBuilder;
+ unsigned SrcParts = Src1Regs.size();
+ unsigned DstParts = DstRegs.size();
+
+ unsigned DstIdx = 0; // Low bits of the result.
+ Register FactorSum =
+ B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
+ DstRegs[DstIdx] = FactorSum;
+
+ unsigned CarrySumPrevDstIdx;
+ SmallVector<Register, 4> Factors;
+
+ for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
+ // Collect low parts of muls for DstIdx.
+ for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
+ i <= std::min(DstIdx, SrcParts - 1); ++i) {
+ MachineInstrBuilder Mul =
+ B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
+ Factors.push_back(Mul.getReg(0));
+ }
+ // Collect high parts of muls from previous DstIdx.
+ for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
+ i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
+ MachineInstrBuilder Umulh =
+ B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
+ Factors.push_back(Umulh.getReg(0));
+ }
+ // Add CarrySum from additons calculated for previous DstIdx.
+ if (DstIdx != 1) {
+ Factors.push_back(CarrySumPrevDstIdx);
+ }
+
+ Register CarrySum;
+ // Add all factors and accumulate all carries into CarrySum.
+ if (DstIdx != DstParts - 1) {
+ MachineInstrBuilder Uaddo =
+ B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]);
+ FactorSum = Uaddo.getReg(0);
+ CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
+ for (unsigned i = 2; i < Factors.size(); ++i) {
+ MachineInstrBuilder Uaddo =
+ B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]);
+ FactorSum = Uaddo.getReg(0);
+ MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
+ CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
+ }
+ } else {
+ // Since value for the next index is not calculated, neither is CarrySum.
+ FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
+ for (unsigned i = 2; i < Factors.size(); ++i)
+ FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
+ }
+
+ CarrySumPrevDstIdx = CarrySum;
+ DstRegs[DstIdx] = FactorSum;
+ Factors.clear();
+ }
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) {
+ Register DstReg = MI.getOperand(0).getReg();
+ Register Src1 = MI.getOperand(1).getReg();
+ Register Src2 = MI.getOperand(2).getReg();
+
+ LLT Ty = MRI.getType(DstReg);
+ if (Ty.isVector())
+ return UnableToLegalize;
+
+ unsigned SrcSize = MRI.getType(Src1).getSizeInBits();
+ unsigned DstSize = Ty.getSizeInBits();
+ unsigned NarrowSize = NarrowTy.getSizeInBits();
+ if (DstSize % NarrowSize != 0 || SrcSize % NarrowSize != 0)
+ return UnableToLegalize;
+
+ unsigned NumDstParts = DstSize / NarrowSize;
+ unsigned NumSrcParts = SrcSize / NarrowSize;
+ bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
+ unsigned DstTmpParts = NumDstParts * (IsMulHigh ? 2 : 1);
+
+ SmallVector<Register, 2> Src1Parts, Src2Parts, DstTmpRegs;
+ extractParts(Src1, NarrowTy, NumSrcParts, Src1Parts);
+ extractParts(Src2, NarrowTy, NumSrcParts, Src2Parts);
+ DstTmpRegs.resize(DstTmpParts);
+ multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
+
+ // Take only high half of registers if this is high mul.
+ ArrayRef<Register> DstRegs(
+ IsMulHigh ? &DstTmpRegs[DstTmpParts / 2] : &DstTmpRegs[0], NumDstParts);
+ MIRBuilder.buildMerge(DstReg, DstRegs);
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx,
+ LLT NarrowTy) {
+ if (TypeIdx != 1)
+ return UnableToLegalize;
+
+ uint64_t NarrowSize = NarrowTy.getSizeInBits();
+
+ int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
+ // FIXME: add support for when SizeOp1 isn't an exact multiple of
+ // NarrowSize.
+ if (SizeOp1 % NarrowSize != 0)
+ return UnableToLegalize;
+ int NumParts = SizeOp1 / NarrowSize;
+
+ SmallVector<Register, 2> SrcRegs, DstRegs;
+ SmallVector<uint64_t, 2> Indexes;
+ extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
+
+ Register OpReg = MI.getOperand(0).getReg();
+ uint64_t OpStart = MI.getOperand(2).getImm();
+ uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
+ for (int i = 0; i < NumParts; ++i) {
+ unsigned SrcStart = i * NarrowSize;
+
+ if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
+ // No part of the extract uses this subregister, ignore it.
+ continue;
+ } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
+ // The entire subregister is extracted, forward the value.
+ DstRegs.push_back(SrcRegs[i]);
+ continue;
+ }
+
+ // OpSegStart is where this destination segment would start in OpReg if it
+ // extended infinitely in both directions.
+ int64_t ExtractOffset;
+ uint64_t SegSize;
+ if (OpStart < SrcStart) {
+ ExtractOffset = 0;
+ SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
+ } else {
+ ExtractOffset = OpStart - SrcStart;
+ SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
+ }
+
+ Register SegReg = SrcRegs[i];
+ if (ExtractOffset != 0 || SegSize != NarrowSize) {
+ // A genuine extract is needed.
+ SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
+ MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
+ }
+
+ DstRegs.push_back(SegReg);
+ }
+
+ Register DstReg = MI.getOperand(0).getReg();
+ if(MRI.getType(DstReg).isVector())
+ MIRBuilder.buildBuildVector(DstReg, DstRegs);
+ else
+ MIRBuilder.buildMerge(DstReg, DstRegs);
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx,
+ LLT NarrowTy) {
+ // FIXME: Don't know how to handle secondary types yet.
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+ uint64_t NarrowSize = NarrowTy.getSizeInBits();
+
+ // FIXME: add support for when SizeOp0 isn't an exact multiple of
+ // NarrowSize.
+ if (SizeOp0 % NarrowSize != 0)
+ return UnableToLegalize;
+
+ int NumParts = SizeOp0 / NarrowSize;
+
+ SmallVector<Register, 2> SrcRegs, DstRegs;
+ SmallVector<uint64_t, 2> Indexes;
+ extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
+
+ Register OpReg = MI.getOperand(2).getReg();
+ uint64_t OpStart = MI.getOperand(3).getImm();
+ uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
+ for (int i = 0; i < NumParts; ++i) {
+ unsigned DstStart = i * NarrowSize;
+
+ if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
+ // No part of the insert affects this subregister, forward the original.
+ DstRegs.push_back(SrcRegs[i]);
+ continue;
+ } else if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
+ // The entire subregister is defined by this insert, forward the new
+ // value.
+ DstRegs.push_back(OpReg);
+ continue;
+ }
+
+ // OpSegStart is where this destination segment would start in OpReg if it
+ // extended infinitely in both directions.
+ int64_t ExtractOffset, InsertOffset;
+ uint64_t SegSize;
+ if (OpStart < DstStart) {
+ InsertOffset = 0;
+ ExtractOffset = DstStart - OpStart;
+ SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
+ } else {
+ InsertOffset = OpStart - DstStart;
+ ExtractOffset = 0;
+ SegSize =
+ std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
+ }
+
+ Register SegReg = OpReg;
+ if (ExtractOffset != 0 || SegSize != OpSize) {
+ // A genuine extract is needed.
+ SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
+ MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
+ }
+
+ Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
+ MIRBuilder.buildInsert(DstReg, SrcRegs[i], SegReg, InsertOffset);
+ DstRegs.push_back(DstReg);
+ }
+
+ assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered");
+ Register DstReg = MI.getOperand(0).getReg();
+ if(MRI.getType(DstReg).isVector())
+ MIRBuilder.buildBuildVector(DstReg, DstRegs);
+ else
+ MIRBuilder.buildMerge(DstReg, DstRegs);
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx,
+ LLT NarrowTy) {
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+
+ assert(MI.getNumOperands() == 3 && TypeIdx == 0);
+
+ SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
+ SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs;
+ SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
+ LLT LeftoverTy;
+ if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
+ Src0Regs, Src0LeftoverRegs))
+ return UnableToLegalize;
+
+ LLT Unused;
+ if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
+ Src1Regs, Src1LeftoverRegs))
+ llvm_unreachable("inconsistent extractParts result");
+
+ for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
+ auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
+ {Src0Regs[I], Src1Regs[I]});
+ DstRegs.push_back(Inst->getOperand(0).getReg());
+ }
+
+ for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
+ auto Inst = MIRBuilder.buildInstr(
+ MI.getOpcode(),
+ {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
+ DstLeftoverRegs.push_back(Inst->getOperand(0).getReg());
+ }
+
+ insertParts(DstReg, DstTy, NarrowTy, DstRegs,
+ LeftoverTy, DstLeftoverRegs);
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx,
+ LLT NarrowTy) {
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ Register CondReg = MI.getOperand(1).getReg();
+ LLT CondTy = MRI.getType(CondReg);
+ if (CondTy.isVector()) // TODO: Handle vselect
+ return UnableToLegalize;
+
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+
+ SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
+ SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
+ SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs;
+ LLT LeftoverTy;
+ if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
+ Src1Regs, Src1LeftoverRegs))
+ return UnableToLegalize;
+
+ LLT Unused;
+ if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
+ Src2Regs, Src2LeftoverRegs))
+ llvm_unreachable("inconsistent extractParts result");
+
+ for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
+ auto Select = MIRBuilder.buildSelect(NarrowTy,
+ CondReg, Src1Regs[I], Src2Regs[I]);
+ DstRegs.push_back(Select->getOperand(0).getReg());
+ }
+
+ for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
+ auto Select = MIRBuilder.buildSelect(
+ LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]);
+ DstLeftoverRegs.push_back(Select->getOperand(0).getReg());
+ }
+
+ insertParts(DstReg, DstTy, NarrowTy, DstRegs,
+ LeftoverTy, DstLeftoverRegs);
+
+ MI.eraseFromParent();
+ return Legalized;
}
LegalizerHelper::LegalizeResult
@@ -1288,9 +3099,9 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
return Legalized;
}
case TargetOpcode::G_CTLZ: {
- unsigned SrcReg = MI.getOperand(1).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
unsigned Len = Ty.getSizeInBits();
- if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {Ty}})) {
+ if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {Ty, Ty}})) {
// If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
auto MIBCtlzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTLZ_ZERO_UNDEF,
{Ty}, {SrcReg});
@@ -1314,7 +3125,7 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
// return Len - popcount(x);
//
// Ref: "Hacker's Delight" by Henry Warren
- unsigned Op = SrcReg;
+ Register Op = SrcReg;
unsigned NewLen = PowerOf2Ceil(Len);
for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
auto MIBShiftAmt = MIRBuilder.buildConstant(Ty, 1ULL << i);
@@ -1338,9 +3149,9 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
return Legalized;
}
case TargetOpcode::G_CTTZ: {
- unsigned SrcReg = MI.getOperand(1).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
unsigned Len = Ty.getSizeInBits();
- if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {Ty}})) {
+ if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {Ty, Ty}})) {
// If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
// zero.
auto MIBCttzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTTZ_ZERO_UNDEF,
@@ -1365,8 +3176,8 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
TargetOpcode::G_AND, {Ty},
{MIBNot, MIRBuilder.buildInstr(TargetOpcode::G_ADD, {Ty},
{SrcReg, MIBCstNeg1})});
- if (!isSupported({TargetOpcode::G_CTPOP, {Ty}}) &&
- isSupported({TargetOpcode::G_CTLZ, {Ty}})) {
+ if (!isSupported({TargetOpcode::G_CTPOP, {Ty, Ty}}) &&
+ isSupported({TargetOpcode::G_CTLZ, {Ty, Ty}})) {
auto MIBCstLen = MIRBuilder.buildConstant(Ty, Len);
MIRBuilder.buildInstr(
TargetOpcode::G_SUB, {MI.getOperand(0).getReg()},
@@ -1381,3 +3192,230 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
}
}
}
+
+// Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
+// representation.
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerU64ToF32BitOps(MachineInstr &MI) {
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src = MI.getOperand(1).getReg();
+ const LLT S64 = LLT::scalar(64);
+ const LLT S32 = LLT::scalar(32);
+ const LLT S1 = LLT::scalar(1);
+
+ assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
+
+ // unsigned cul2f(ulong u) {
+ // uint lz = clz(u);
+ // uint e = (u != 0) ? 127U + 63U - lz : 0;
+ // u = (u << lz) & 0x7fffffffffffffffUL;
+ // ulong t = u & 0xffffffffffUL;
+ // uint v = (e << 23) | (uint)(u >> 40);
+ // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
+ // return as_float(v + r);
+ // }
+
+ auto Zero32 = MIRBuilder.buildConstant(S32, 0);
+ auto Zero64 = MIRBuilder.buildConstant(S64, 0);
+
+ auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src);
+
+ auto K = MIRBuilder.buildConstant(S32, 127U + 63U);
+ auto Sub = MIRBuilder.buildSub(S32, K, LZ);
+
+ auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64);
+ auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32);
+
+ auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1);
+ auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ);
+
+ auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0);
+
+ auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL);
+ auto T = MIRBuilder.buildAnd(S64, U, Mask1);
+
+ auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40));
+ auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23));
+ auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl));
+
+ auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL);
+ auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C);
+ auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C);
+ auto One = MIRBuilder.buildConstant(S32, 1);
+
+ auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One);
+ auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32);
+ auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0);
+ MIRBuilder.buildAdd(Dst, V, R);
+
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerUITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src = MI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(Dst);
+ LLT SrcTy = MRI.getType(Src);
+
+ if (SrcTy != LLT::scalar(64))
+ return UnableToLegalize;
+
+ if (DstTy == LLT::scalar(32)) {
+ // TODO: SelectionDAG has several alternative expansions to port which may
+ // be more reasonble depending on the available instructions. If a target
+ // has sitofp, does not have CTLZ, or can efficiently use f64 as an
+ // intermediate type, this is probably worse.
+ return lowerU64ToF32BitOps(MI);
+ }
+
+ return UnableToLegalize;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerSITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src = MI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(Dst);
+ LLT SrcTy = MRI.getType(Src);
+
+ const LLT S64 = LLT::scalar(64);
+ const LLT S32 = LLT::scalar(32);
+ const LLT S1 = LLT::scalar(1);
+
+ if (SrcTy != S64)
+ return UnableToLegalize;
+
+ if (DstTy == S32) {
+ // signed cl2f(long l) {
+ // long s = l >> 63;
+ // float r = cul2f((l + s) ^ s);
+ // return s ? -r : r;
+ // }
+ Register L = Src;
+ auto SignBit = MIRBuilder.buildConstant(S64, 63);
+ auto S = MIRBuilder.buildAShr(S64, L, SignBit);
+
+ auto LPlusS = MIRBuilder.buildAdd(S64, L, S);
+ auto Xor = MIRBuilder.buildXor(S64, LPlusS, S);
+ auto R = MIRBuilder.buildUITOFP(S32, Xor);
+
+ auto RNeg = MIRBuilder.buildFNeg(S32, R);
+ auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S,
+ MIRBuilder.buildConstant(S64, 0));
+ MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
+ return Legalized;
+ }
+
+ return UnableToLegalize;
+}
+
+static CmpInst::Predicate minMaxToCompare(unsigned Opc) {
+ switch (Opc) {
+ case TargetOpcode::G_SMIN:
+ return CmpInst::ICMP_SLT;
+ case TargetOpcode::G_SMAX:
+ return CmpInst::ICMP_SGT;
+ case TargetOpcode::G_UMIN:
+ return CmpInst::ICMP_ULT;
+ case TargetOpcode::G_UMAX:
+ return CmpInst::ICMP_UGT;
+ default:
+ llvm_unreachable("not in integer min/max");
+ }
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerMinMax(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src0 = MI.getOperand(1).getReg();
+ Register Src1 = MI.getOperand(2).getReg();
+
+ const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
+ LLT CmpType = MRI.getType(Dst).changeElementSize(1);
+
+ auto Cmp = MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1);
+ MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1);
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerFCopySign(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src0 = MI.getOperand(1).getReg();
+ Register Src1 = MI.getOperand(2).getReg();
+
+ const LLT Src0Ty = MRI.getType(Src0);
+ const LLT Src1Ty = MRI.getType(Src1);
+
+ const int Src0Size = Src0Ty.getScalarSizeInBits();
+ const int Src1Size = Src1Ty.getScalarSizeInBits();
+
+ auto SignBitMask = MIRBuilder.buildConstant(
+ Src0Ty, APInt::getSignMask(Src0Size));
+
+ auto NotSignBitMask = MIRBuilder.buildConstant(
+ Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1));
+
+ auto And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask);
+ MachineInstr *Or;
+
+ if (Src0Ty == Src1Ty) {
+ auto And1 = MIRBuilder.buildAnd(Src1Ty, Src0, SignBitMask);
+ Or = MIRBuilder.buildOr(Dst, And0, And1);
+ } else if (Src0Size > Src1Size) {
+ auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size);
+ auto Zext = MIRBuilder.buildZExt(Src0Ty, Src1);
+ auto Shift = MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt);
+ auto And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask);
+ Or = MIRBuilder.buildOr(Dst, And0, And1);
+ } else {
+ auto ShiftAmt = MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size);
+ auto Shift = MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt);
+ auto Trunc = MIRBuilder.buildTrunc(Src0Ty, Shift);
+ auto And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask);
+ Or = MIRBuilder.buildOr(Dst, And0, And1);
+ }
+
+ // Be careful about setting nsz/nnan/ninf on every instruction, since the
+ // constants are a nan and -0.0, but the final result should preserve
+ // everything.
+ if (unsigned Flags = MI.getFlags())
+ Or->setFlags(Flags);
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) {
+ unsigned NewOp = MI.getOpcode() == TargetOpcode::G_FMINNUM ?
+ TargetOpcode::G_FMINNUM_IEEE : TargetOpcode::G_FMAXNUM_IEEE;
+
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src0 = MI.getOperand(1).getReg();
+ Register Src1 = MI.getOperand(2).getReg();
+ LLT Ty = MRI.getType(Dst);
+
+ if (!MI.getFlag(MachineInstr::FmNoNans)) {
+ // Insert canonicalizes if it's possible we need to quiet to get correct
+ // sNaN behavior.
+
+ // Note this must be done here, and not as an optimization combine in the
+ // absence of a dedicate quiet-snan instruction as we're using an
+ // omni-purpose G_FCANONICALIZE.
+ if (!isKnownNeverSNaN(Src0, MRI))
+ Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0);
+
+ if (!isKnownNeverSNaN(Src1, MRI))
+ Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0);
+ }
+
+ // If there are no nans, it's safe to simply replace this with the non-IEEE
+ // version.
+ MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags());
+ MI.eraseFromParent();
+ return Legalized;
+}
diff --git a/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
index fa36ede5b976..6e1de95b3277 100644
--- a/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
+++ b/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
@@ -1,9 +1,8 @@
//===- lib/CodeGen/GlobalISel/LegalizerInfo.cpp - Legalizer ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -43,6 +42,45 @@ cl::opt<bool> llvm::DisableGISelLegalityCheck(
cl::desc("Don't verify that MIR is fully legal between GlobalISel passes"),
cl::Hidden);
+raw_ostream &llvm::operator<<(raw_ostream &OS, LegalizeAction Action) {
+ switch (Action) {
+ case Legal:
+ OS << "Legal";
+ break;
+ case NarrowScalar:
+ OS << "NarrowScalar";
+ break;
+ case WidenScalar:
+ OS << "WidenScalar";
+ break;
+ case FewerElements:
+ OS << "FewerElements";
+ break;
+ case MoreElements:
+ OS << "MoreElements";
+ break;
+ case Lower:
+ OS << "Lower";
+ break;
+ case Libcall:
+ OS << "Libcall";
+ break;
+ case Custom:
+ OS << "Custom";
+ break;
+ case Unsupported:
+ OS << "Unsupported";
+ break;
+ case NotFound:
+ OS << "NotFound";
+ break;
+ case UseLegacyRules:
+ OS << "UseLegacyRules";
+ break;
+ }
+ return OS;
+}
+
raw_ostream &LegalityQuery::print(raw_ostream &OS) const {
OS << Opcode << ", Tys={";
for (const auto &Type : Types) {
@@ -59,6 +97,86 @@ raw_ostream &LegalityQuery::print(raw_ostream &OS) const {
return OS;
}
+#ifndef NDEBUG
+// Make sure the rule won't (trivially) loop forever.
+static bool hasNoSimpleLoops(const LegalizeRule &Rule, const LegalityQuery &Q,
+ const std::pair<unsigned, LLT> &Mutation) {
+ switch (Rule.getAction()) {
+ case Custom:
+ case Lower:
+ case MoreElements:
+ case FewerElements:
+ break;
+ default:
+ return Q.Types[Mutation.first] != Mutation.second;
+ }
+ return true;
+}
+
+// Make sure the returned mutation makes sense for the match type.
+static bool mutationIsSane(const LegalizeRule &Rule,
+ const LegalityQuery &Q,
+ std::pair<unsigned, LLT> Mutation) {
+ // If the user wants a custom mutation, then we can't really say much about
+ // it. Return true, and trust that they're doing the right thing.
+ if (Rule.getAction() == Custom)
+ return true;
+
+ const unsigned TypeIdx = Mutation.first;
+ const LLT OldTy = Q.Types[TypeIdx];
+ const LLT NewTy = Mutation.second;
+
+ switch (Rule.getAction()) {
+ case FewerElements:
+ case MoreElements: {
+ if (!OldTy.isVector())
+ return false;
+
+ if (NewTy.isVector()) {
+ if (Rule.getAction() == FewerElements) {
+ // Make sure the element count really decreased.
+ if (NewTy.getNumElements() >= OldTy.getNumElements())
+ return false;
+ } else {
+ // Make sure the element count really increased.
+ if (NewTy.getNumElements() <= OldTy.getNumElements())
+ return false;
+ }
+ }
+
+ // Make sure the element type didn't change.
+ return NewTy.getScalarType() == OldTy.getElementType();
+ }
+ case NarrowScalar:
+ case WidenScalar: {
+ if (OldTy.isVector()) {
+ // Number of elements should not change.
+ if (!NewTy.isVector() || OldTy.getNumElements() != NewTy.getNumElements())
+ return false;
+ } else {
+ // Both types must be vectors
+ if (NewTy.isVector())
+ return false;
+ }
+
+ if (Rule.getAction() == NarrowScalar) {
+ // Make sure the size really decreased.
+ if (NewTy.getScalarSizeInBits() >= OldTy.getScalarSizeInBits())
+ return false;
+ } else {
+ // Make sure the size really increased.
+ if (NewTy.getScalarSizeInBits() <= OldTy.getScalarSizeInBits())
+ return false;
+ }
+
+ return true;
+ }
+ default:
+ return true;
+ }
+}
+#endif
+
LegalizeActionStep LegalizeRuleSet::apply(const LegalityQuery &Query) const {
LLVM_DEBUG(dbgs() << "Applying legalizer ruleset to: "; Query.print(dbgs());
dbgs() << "\n");
@@ -66,17 +184,15 @@ LegalizeActionStep LegalizeRuleSet::apply(const LegalityQuery &Query) const {
LLVM_DEBUG(dbgs() << ".. fallback to legacy rules (no rules defined)\n");
return {LegalizeAction::UseLegacyRules, 0, LLT{}};
}
- for (const auto &Rule : Rules) {
+ for (const LegalizeRule &Rule : Rules) {
if (Rule.match(Query)) {
LLVM_DEBUG(dbgs() << ".. match\n");
std::pair<unsigned, LLT> Mutation = Rule.determineMutation(Query);
- LLVM_DEBUG(dbgs() << ".. .. " << (unsigned)Rule.getAction() << ", "
+ LLVM_DEBUG(dbgs() << ".. .. " << Rule.getAction() << ", "
<< Mutation.first << ", " << Mutation.second << "\n");
- assert((Query.Types[Mutation.first] != Mutation.second ||
- Rule.getAction() == Lower ||
- Rule.getAction() == MoreElements ||
- Rule.getAction() == FewerElements) &&
- "Simple loop detected");
+ assert(mutationIsSane(Rule, Query, Mutation) &&
+ "legality mutation invalid for match");
+ assert(hasNoSimpleLoops(Rule, Query, Mutation) && "Simple loop detected");
return {Rule.getAction(), Mutation.first, Mutation.second};
} else
LLVM_DEBUG(dbgs() << ".. no match\n");
@@ -180,16 +296,14 @@ void LegalizerInfo::computeTables() {
if (TypeIdx < ScalarSizeChangeStrategies[OpcodeIdx].size() &&
ScalarSizeChangeStrategies[OpcodeIdx][TypeIdx] != nullptr)
S = ScalarSizeChangeStrategies[OpcodeIdx][TypeIdx];
- llvm::sort(ScalarSpecifiedActions.begin(),
- ScalarSpecifiedActions.end());
+ llvm::sort(ScalarSpecifiedActions);
checkPartialSizeAndActionsVector(ScalarSpecifiedActions);
setScalarAction(Opcode, TypeIdx, S(ScalarSpecifiedActions));
}
// 2. Handle pointer types
for (auto PointerSpecifiedActions : AddressSpace2SpecifiedActions) {
- llvm::sort(PointerSpecifiedActions.second.begin(),
- PointerSpecifiedActions.second.end());
+ llvm::sort(PointerSpecifiedActions.second);
checkPartialSizeAndActionsVector(PointerSpecifiedActions.second);
// For pointer types, we assume that there isn't a meaningfull way
// to change the number of bits used in the pointer.
@@ -201,8 +315,7 @@ void LegalizerInfo::computeTables() {
// 3. Handle vector types
SizeAndActionsVec ElementSizesSeen;
for (auto VectorSpecifiedActions : ElemSize2SpecifiedActions) {
- llvm::sort(VectorSpecifiedActions.second.begin(),
- VectorSpecifiedActions.second.end());
+ llvm::sort(VectorSpecifiedActions.second);
const uint16_t ElementSize = VectorSpecifiedActions.first;
ElementSizesSeen.push_back({ElementSize, Legal});
checkPartialSizeAndActionsVector(VectorSpecifiedActions.second);
@@ -328,9 +441,8 @@ LegalizerInfo::getAction(const LegalityQuery &Query) const {
for (unsigned i = 0; i < Query.Types.size(); ++i) {
auto Action = getAspectAction({Query.Opcode, i, Query.Types[i]});
if (Action.first != Legal) {
- LLVM_DEBUG(dbgs() << ".. (legacy) Type " << i
- << " Action=" << (unsigned)Action.first << ", "
- << Action.second << "\n");
+ LLVM_DEBUG(dbgs() << ".. (legacy) Type " << i << " Action="
+ << Action.first << ", " << Action.second << "\n");
return {Action.first, i, Action.second};
} else
LLVM_DEBUG(dbgs() << ".. (legacy) Type " << i << " Legal\n");
@@ -364,8 +476,9 @@ LegalizerInfo::getAction(const MachineInstr &MI,
SmallVector<LegalityQuery::MemDesc, 2> MemDescrs;
for (const auto &MMO : MI.memoperands())
- MemDescrs.push_back(
- {MMO->getSize() /* in bytes */ * 8, MMO->getOrdering()});
+ MemDescrs.push_back({8 * MMO->getSize() /* in bits */,
+ 8 * MMO->getAlignment(),
+ MMO->getOrdering()});
return getAction({MI.getOpcode(), Types, MemDescrs});
}
@@ -375,6 +488,14 @@ bool LegalizerInfo::isLegal(const MachineInstr &MI,
return getAction(MI, MRI).Action == Legal;
}
+bool LegalizerInfo::isLegalOrCustom(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI) const {
+ auto Action = getAction(MI, MRI).Action;
+ // If the action is custom, it may not necessarily modify the instruction,
+ // so we have to assume it's legal.
+ return Action == Legal || Action == Custom;
+}
+
bool LegalizerInfo::legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &MIRBuilder,
GISelChangeObserver &Observer) const {
@@ -423,14 +544,10 @@ LegalizerInfo::findAction(const SizeAndActionsVec &Vec, const uint32_t Size) {
// Find the last element in Vec that has a bitsize equal to or smaller than
// the requested bit size.
// That is the element just before the first element that is bigger than Size.
- auto VecIt = std::upper_bound(
- Vec.begin(), Vec.end(), Size,
- [](const uint32_t Size, const SizeAndAction lhs) -> bool {
- return Size < lhs.first;
- });
- assert(VecIt != Vec.begin() && "Does Vec not start with size 1?");
- --VecIt;
- int VecIdx = VecIt - Vec.begin();
+ auto It = partition_point(
+ Vec, [=](const SizeAndAction &A) { return A.first <= Size; });
+ assert(It != Vec.begin() && "Does Vec not start with size 1?");
+ int VecIdx = It - Vec.begin() - 1;
LegalizeAction Action = Vec[VecIdx].second;
switch (Action) {
@@ -541,6 +658,12 @@ LegalizerInfo::findVectorLegalAction(const InstrAspect &Aspect) const {
IntermediateType.getScalarSizeInBits())};
}
+bool LegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIRBuilder) const {
+ return true;
+}
+
/// \pre Type indices of every opcode form a dense set starting from 0.
void LegalizerInfo::verify(const MCInstrInfo &MII) const {
#ifndef NDEBUG
@@ -584,7 +707,8 @@ const MachineInstr *llvm::machineFunctionIsIllegal(const MachineFunction &MF) {
const MachineRegisterInfo &MRI = MF.getRegInfo();
for (const MachineBasicBlock &MBB : MF)
for (const MachineInstr &MI : MBB)
- if (isPreISelGenericOpcode(MI.getOpcode()) && !MLI->isLegal(MI, MRI))
+ if (isPreISelGenericOpcode(MI.getOpcode()) &&
+ !MLI->isLegalOrCustom(MI, MRI))
return &MI;
}
return nullptr;
diff --git a/lib/CodeGen/GlobalISel/Localizer.cpp b/lib/CodeGen/GlobalISel/Localizer.cpp
index 52b340753a50..3592409710a7 100644
--- a/lib/CodeGen/GlobalISel/Localizer.cpp
+++ b/lib/CodeGen/GlobalISel/Localizer.cpp
@@ -1,9 +1,8 @@
//===- Localizer.cpp ---------------------- Localize some instrs -*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -11,8 +10,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/Localizer.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/Debug.h"
@@ -21,17 +20,53 @@
using namespace llvm;
char Localizer::ID = 0;
-INITIALIZE_PASS(Localizer, DEBUG_TYPE,
- "Move/duplicate certain instructions close to their use", false,
- false)
+INITIALIZE_PASS_BEGIN(Localizer, DEBUG_TYPE,
+ "Move/duplicate certain instructions close to their use",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_END(Localizer, DEBUG_TYPE,
+ "Move/duplicate certain instructions close to their use",
+ false, false)
-Localizer::Localizer() : MachineFunctionPass(ID) {
- initializeLocalizerPass(*PassRegistry::getPassRegistry());
-}
+Localizer::Localizer() : MachineFunctionPass(ID) { }
-void Localizer::init(MachineFunction &MF) { MRI = &MF.getRegInfo(); }
+void Localizer::init(MachineFunction &MF) {
+ MRI = &MF.getRegInfo();
+ TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(MF.getFunction());
+}
bool Localizer::shouldLocalize(const MachineInstr &MI) {
+ // Assuming a spill and reload of a value has a cost of 1 instruction each,
+ // this helper function computes the maximum number of uses we should consider
+ // for remat. E.g. on arm64 global addresses take 2 insts to materialize. We
+ // break even in terms of code size when the original MI has 2 users vs
+ // choosing to potentially spill. Any more than 2 users we we have a net code
+ // size increase. This doesn't take into account register pressure though.
+ auto maxUses = [](unsigned RematCost) {
+ // A cost of 1 means remats are basically free.
+ if (RematCost == 1)
+ return UINT_MAX;
+ if (RematCost == 2)
+ return 2U;
+
+ // Remat is too expensive, only sink if there's one user.
+ if (RematCost > 2)
+ return 1U;
+ llvm_unreachable("Unexpected remat cost");
+ };
+
+ // Helper to walk through uses and terminate if we've reached a limit. Saves
+ // us spending time traversing uses if all we want to know is if it's >= min.
+ auto isUsesAtMost = [&](unsigned Reg, unsigned MaxUses) {
+ unsigned NumUses = 0;
+ auto UI = MRI->use_instr_nodbg_begin(Reg), UE = MRI->use_instr_nodbg_end();
+ for (; UI != UE && NumUses < MaxUses; ++UI) {
+ NumUses++;
+ }
+ // If we haven't reached the end yet then there are more than MaxUses users.
+ return UI == UE;
+ };
+
switch (MI.getOpcode()) {
default:
return false;
@@ -40,11 +75,22 @@ bool Localizer::shouldLocalize(const MachineInstr &MI) {
case TargetOpcode::G_CONSTANT:
case TargetOpcode::G_FCONSTANT:
case TargetOpcode::G_FRAME_INDEX:
+ case TargetOpcode::G_INTTOPTR:
return true;
+ case TargetOpcode::G_GLOBAL_VALUE: {
+ unsigned RematCost = TTI->getGISelRematGlobalCost();
+ unsigned Reg = MI.getOperand(0).getReg();
+ unsigned MaxUses = maxUses(RematCost);
+ if (MaxUses == UINT_MAX)
+ return true; // Remats are "free" so always localize.
+ bool B = isUsesAtMost(Reg, MaxUses);
+ return B;
+ }
}
}
void Localizer::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetTransformInfoWrapperPass>();
getSelectionDAGFallbackAnalysisUsage(AU);
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -58,6 +104,107 @@ bool Localizer::isLocalUse(MachineOperand &MOUse, const MachineInstr &Def,
return InsertMBB == Def.getParent();
}
+bool Localizer::localizeInterBlock(MachineFunction &MF,
+ LocalizedSetVecT &LocalizedInstrs) {
+ bool Changed = false;
+ DenseMap<std::pair<MachineBasicBlock *, unsigned>, unsigned> MBBWithLocalDef;
+
+ // Since the IRTranslator only emits constants into the entry block, and the
+ // rest of the GISel pipeline generally emits constants close to their users,
+ // we only localize instructions in the entry block here. This might change if
+ // we start doing CSE across blocks.
+ auto &MBB = MF.front();
+ for (auto RI = MBB.rbegin(), RE = MBB.rend(); RI != RE; ++RI) {
+ MachineInstr &MI = *RI;
+ if (!shouldLocalize(MI))
+ continue;
+ LLVM_DEBUG(dbgs() << "Should localize: " << MI);
+ assert(MI.getDesc().getNumDefs() == 1 &&
+ "More than one definition not supported yet");
+ unsigned Reg = MI.getOperand(0).getReg();
+ // Check if all the users of MI are local.
+ // We are going to invalidation the list of use operands, so we
+ // can't use range iterator.
+ for (auto MOIt = MRI->use_begin(Reg), MOItEnd = MRI->use_end();
+ MOIt != MOItEnd;) {
+ MachineOperand &MOUse = *MOIt++;
+ // Check if the use is already local.
+ MachineBasicBlock *InsertMBB;
+ LLVM_DEBUG(MachineInstr &MIUse = *MOUse.getParent();
+ dbgs() << "Checking use: " << MIUse
+ << " #Opd: " << MIUse.getOperandNo(&MOUse) << '\n');
+ if (isLocalUse(MOUse, MI, InsertMBB))
+ continue;
+ LLVM_DEBUG(dbgs() << "Fixing non-local use\n");
+ Changed = true;
+ auto MBBAndReg = std::make_pair(InsertMBB, Reg);
+ auto NewVRegIt = MBBWithLocalDef.find(MBBAndReg);
+ if (NewVRegIt == MBBWithLocalDef.end()) {
+ // Create the localized instruction.
+ MachineInstr *LocalizedMI = MF.CloneMachineInstr(&MI);
+ LocalizedInstrs.insert(LocalizedMI);
+ MachineInstr &UseMI = *MOUse.getParent();
+ if (MRI->hasOneUse(Reg) && !UseMI.isPHI())
+ InsertMBB->insert(InsertMBB->SkipPHIsAndLabels(UseMI), LocalizedMI);
+ else
+ InsertMBB->insert(InsertMBB->SkipPHIsAndLabels(InsertMBB->begin()),
+ LocalizedMI);
+
+ // Set a new register for the definition.
+ unsigned NewReg = MRI->createGenericVirtualRegister(MRI->getType(Reg));
+ MRI->setRegClassOrRegBank(NewReg, MRI->getRegClassOrRegBank(Reg));
+ LocalizedMI->getOperand(0).setReg(NewReg);
+ NewVRegIt =
+ MBBWithLocalDef.insert(std::make_pair(MBBAndReg, NewReg)).first;
+ LLVM_DEBUG(dbgs() << "Inserted: " << *LocalizedMI);
+ }
+ LLVM_DEBUG(dbgs() << "Update use with: " << printReg(NewVRegIt->second)
+ << '\n');
+ // Update the user reg.
+ MOUse.setReg(NewVRegIt->second);
+ }
+ }
+ return Changed;
+}
+
+bool Localizer::localizeIntraBlock(LocalizedSetVecT &LocalizedInstrs) {
+ bool Changed = false;
+
+ // For each already-localized instruction which has multiple users, then we
+ // scan the block top down from the current position until we hit one of them.
+
+ // FIXME: Consider doing inst duplication if live ranges are very long due to
+ // many users, but this case may be better served by regalloc improvements.
+
+ for (MachineInstr *MI : LocalizedInstrs) {
+ unsigned Reg = MI->getOperand(0).getReg();
+ MachineBasicBlock &MBB = *MI->getParent();
+ // All of the user MIs of this reg.
+ SmallPtrSet<MachineInstr *, 32> Users;
+ for (MachineInstr &UseMI : MRI->use_nodbg_instructions(Reg)) {
+ if (!UseMI.isPHI())
+ Users.insert(&UseMI);
+ }
+ // If all the users were PHIs then they're not going to be in our block,
+ // don't try to move this instruction.
+ if (Users.empty())
+ continue;
+
+ MachineBasicBlock::iterator II(MI);
+ ++II;
+ while (II != MBB.end() && !Users.count(&*II))
+ ++II;
+
+ LLVM_DEBUG(dbgs() << "Intra-block: moving " << *MI << " before " << *&*II
+ << "\n");
+ assert(II != MBB.end() && "Didn't find the user in the MBB");
+ MI->removeFromParent();
+ MBB.insert(II, MI);
+ Changed = true;
+ }
+ return Changed;
+}
+
bool Localizer::runOnMachineFunction(MachineFunction &MF) {
// If the ISel pipeline failed, do not bother running that pass.
if (MF.getProperties().hasProperty(
@@ -68,62 +215,10 @@ bool Localizer::runOnMachineFunction(MachineFunction &MF) {
init(MF);
- bool Changed = false;
- // Keep track of the instructions we localized.
- // We won't need to process them if we see them later in the CFG.
- SmallPtrSet<MachineInstr *, 16> LocalizedInstrs;
- DenseMap<std::pair<MachineBasicBlock *, unsigned>, unsigned> MBBWithLocalDef;
- // TODO: Do bottom up traversal.
- for (MachineBasicBlock &MBB : MF) {
- for (MachineInstr &MI : MBB) {
- if (LocalizedInstrs.count(&MI) || !shouldLocalize(MI))
- continue;
- LLVM_DEBUG(dbgs() << "Should localize: " << MI);
- assert(MI.getDesc().getNumDefs() == 1 &&
- "More than one definition not supported yet");
- unsigned Reg = MI.getOperand(0).getReg();
- // Check if all the users of MI are local.
- // We are going to invalidation the list of use operands, so we
- // can't use range iterator.
- for (auto MOIt = MRI->use_begin(Reg), MOItEnd = MRI->use_end();
- MOIt != MOItEnd;) {
- MachineOperand &MOUse = *MOIt++;
- // Check if the use is already local.
- MachineBasicBlock *InsertMBB;
- LLVM_DEBUG(MachineInstr &MIUse = *MOUse.getParent();
- dbgs() << "Checking use: " << MIUse
- << " #Opd: " << MIUse.getOperandNo(&MOUse) << '\n');
- if (isLocalUse(MOUse, MI, InsertMBB))
- continue;
- LLVM_DEBUG(dbgs() << "Fixing non-local use\n");
- Changed = true;
- auto MBBAndReg = std::make_pair(InsertMBB, Reg);
- auto NewVRegIt = MBBWithLocalDef.find(MBBAndReg);
- if (NewVRegIt == MBBWithLocalDef.end()) {
- // Create the localized instruction.
- MachineInstr *LocalizedMI = MF.CloneMachineInstr(&MI);
- LocalizedInstrs.insert(LocalizedMI);
- // Don't try to be smart for the insertion point.
- // There is no guarantee that the first seen use is the first
- // use in the block.
- InsertMBB->insert(InsertMBB->SkipPHIsAndLabels(InsertMBB->begin()),
- LocalizedMI);
+ // Keep track of the instructions we localized. We'll do a second pass of
+ // intra-block localization to further reduce live ranges.
+ LocalizedSetVecT LocalizedInstrs;
- // Set a new register for the definition.
- unsigned NewReg =
- MRI->createGenericVirtualRegister(MRI->getType(Reg));
- MRI->setRegClassOrRegBank(NewReg, MRI->getRegClassOrRegBank(Reg));
- LocalizedMI->getOperand(0).setReg(NewReg);
- NewVRegIt =
- MBBWithLocalDef.insert(std::make_pair(MBBAndReg, NewReg)).first;
- LLVM_DEBUG(dbgs() << "Inserted: " << *LocalizedMI);
- }
- LLVM_DEBUG(dbgs() << "Update use with: " << printReg(NewVRegIt->second)
- << '\n');
- // Update the user reg.
- MOUse.setReg(NewVRegIt->second);
- }
- }
- }
- return Changed;
+ bool Changed = localizeInterBlock(MF, LocalizedInstrs);
+ return Changed |= localizeIntraBlock(LocalizedInstrs);
}
diff --git a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index 1f5611061994..b7a73326b85c 100644
--- a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -1,9 +1,8 @@
//===-- llvm/CodeGen/GlobalISel/MachineIRBuilder.cpp - MIBuilder--*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -17,6 +16,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DebugInfo.h"
@@ -87,7 +87,7 @@ MachineInstrBuilder MachineIRBuilder::insertInstr(MachineInstrBuilder MIB) {
}
MachineInstrBuilder
-MachineIRBuilder::buildDirectDbgValue(unsigned Reg, const MDNode *Variable,
+MachineIRBuilder::buildDirectDbgValue(Register Reg, const MDNode *Variable,
const MDNode *Expr) {
assert(isa<DILocalVariable>(Variable) && "not a variable");
assert(cast<DIExpression>(Expr)->isValid() && "not an expression");
@@ -100,7 +100,7 @@ MachineIRBuilder::buildDirectDbgValue(unsigned Reg, const MDNode *Variable,
}
MachineInstrBuilder
-MachineIRBuilder::buildIndirectDbgValue(unsigned Reg, const MDNode *Variable,
+MachineIRBuilder::buildIndirectDbgValue(Register Reg, const MDNode *Variable,
const MDNode *Expr) {
assert(isa<DILocalVariable>(Variable) && "not a variable");
assert(cast<DIExpression>(Expr)->isValid() && "not an expression");
@@ -160,23 +160,32 @@ MachineInstrBuilder MachineIRBuilder::buildDbgLabel(const MDNode *Label) {
return MIB.addMetadata(Label);
}
-MachineInstrBuilder MachineIRBuilder::buildFrameIndex(unsigned Res, int Idx) {
- assert(getMRI()->getType(Res).isPointer() && "invalid operand type");
- return buildInstr(TargetOpcode::G_FRAME_INDEX)
- .addDef(Res)
- .addFrameIndex(Idx);
+MachineInstrBuilder MachineIRBuilder::buildFrameIndex(const DstOp &Res,
+ int Idx) {
+ assert(Res.getLLTTy(*getMRI()).isPointer() && "invalid operand type");
+ auto MIB = buildInstr(TargetOpcode::G_FRAME_INDEX);
+ Res.addDefToMIB(*getMRI(), MIB);
+ MIB.addFrameIndex(Idx);
+ return MIB;
}
-MachineInstrBuilder MachineIRBuilder::buildGlobalValue(unsigned Res,
+MachineInstrBuilder MachineIRBuilder::buildGlobalValue(const DstOp &Res,
const GlobalValue *GV) {
- assert(getMRI()->getType(Res).isPointer() && "invalid operand type");
- assert(getMRI()->getType(Res).getAddressSpace() ==
+ assert(Res.getLLTTy(*getMRI()).isPointer() && "invalid operand type");
+ assert(Res.getLLTTy(*getMRI()).getAddressSpace() ==
GV->getType()->getAddressSpace() &&
"address space mismatch");
- return buildInstr(TargetOpcode::G_GLOBAL_VALUE)
- .addDef(Res)
- .addGlobalAddress(GV);
+ auto MIB = buildInstr(TargetOpcode::G_GLOBAL_VALUE);
+ Res.addDefToMIB(*getMRI(), MIB);
+ MIB.addGlobalAddress(GV);
+ return MIB;
+}
+
+MachineInstrBuilder MachineIRBuilder::buildJumpTable(const LLT PtrTy,
+ unsigned JTI) {
+ return buildInstr(TargetOpcode::G_JUMP_TABLE, {PtrTy}, {})
+ .addJumpTableIndex(JTI);
}
void MachineIRBuilder::validateBinaryOp(const LLT &Res, const LLT &Op0,
@@ -185,20 +194,28 @@ void MachineIRBuilder::validateBinaryOp(const LLT &Res, const LLT &Op0,
assert((Res == Op0 && Res == Op1) && "type mismatch");
}
-MachineInstrBuilder MachineIRBuilder::buildGEP(unsigned Res, unsigned Op0,
- unsigned Op1) {
- assert(getMRI()->getType(Res).isPointer() &&
- getMRI()->getType(Res) == getMRI()->getType(Op0) && "type mismatch");
- assert(getMRI()->getType(Op1).isScalar() && "invalid offset type");
+void MachineIRBuilder::validateShiftOp(const LLT &Res, const LLT &Op0,
+ const LLT &Op1) {
+ assert((Res.isScalar() || Res.isVector()) && "invalid operand type");
+ assert((Res == Op0) && "type mismatch");
+}
+
+MachineInstrBuilder MachineIRBuilder::buildGEP(const DstOp &Res,
+ const SrcOp &Op0,
+ const SrcOp &Op1) {
+ assert(Res.getLLTTy(*getMRI()).isPointer() &&
+ Res.getLLTTy(*getMRI()) == Op0.getLLTTy(*getMRI()) && "type mismatch");
+ assert(Op1.getLLTTy(*getMRI()).isScalar() && "invalid offset type");
- return buildInstr(TargetOpcode::G_GEP)
- .addDef(Res)
- .addUse(Op0)
- .addUse(Op1);
+ auto MIB = buildInstr(TargetOpcode::G_GEP);
+ Res.addDefToMIB(*getMRI(), MIB);
+ Op0.addSrcToMIB(MIB);
+ Op1.addSrcToMIB(MIB);
+ return MIB;
}
Optional<MachineInstrBuilder>
-MachineIRBuilder::materializeGEP(unsigned &Res, unsigned Op0,
+MachineIRBuilder::materializeGEP(Register &Res, Register Op0,
const LLT &ValueTy, uint64_t Value) {
assert(Res == 0 && "Res is a result argument");
assert(ValueTy.isScalar() && "invalid offset type");
@@ -209,32 +226,43 @@ MachineIRBuilder::materializeGEP(unsigned &Res, unsigned Op0,
}
Res = getMRI()->createGenericVirtualRegister(getMRI()->getType(Op0));
- unsigned TmpReg = getMRI()->createGenericVirtualRegister(ValueTy);
-
- buildConstant(TmpReg, Value);
- return buildGEP(Res, Op0, TmpReg);
+ auto Cst = buildConstant(ValueTy, Value);
+ return buildGEP(Res, Op0, Cst.getReg(0));
}
-MachineInstrBuilder MachineIRBuilder::buildPtrMask(unsigned Res, unsigned Op0,
+MachineInstrBuilder MachineIRBuilder::buildPtrMask(const DstOp &Res,
+ const SrcOp &Op0,
uint32_t NumBits) {
- assert(getMRI()->getType(Res).isPointer() &&
- getMRI()->getType(Res) == getMRI()->getType(Op0) && "type mismatch");
+ assert(Res.getLLTTy(*getMRI()).isPointer() &&
+ Res.getLLTTy(*getMRI()) == Op0.getLLTTy(*getMRI()) && "type mismatch");
- return buildInstr(TargetOpcode::G_PTR_MASK)
- .addDef(Res)
- .addUse(Op0)
- .addImm(NumBits);
+ auto MIB = buildInstr(TargetOpcode::G_PTR_MASK);
+ Res.addDefToMIB(*getMRI(), MIB);
+ Op0.addSrcToMIB(MIB);
+ MIB.addImm(NumBits);
+ return MIB;
}
MachineInstrBuilder MachineIRBuilder::buildBr(MachineBasicBlock &Dest) {
return buildInstr(TargetOpcode::G_BR).addMBB(&Dest);
}
-MachineInstrBuilder MachineIRBuilder::buildBrIndirect(unsigned Tgt) {
+MachineInstrBuilder MachineIRBuilder::buildBrIndirect(Register Tgt) {
assert(getMRI()->getType(Tgt).isPointer() && "invalid branch destination");
return buildInstr(TargetOpcode::G_BRINDIRECT).addUse(Tgt);
}
+MachineInstrBuilder MachineIRBuilder::buildBrJT(Register TablePtr,
+ unsigned JTI,
+ Register IndexReg) {
+ assert(getMRI()->getType(TablePtr).isPointer() &&
+ "Table reg must be a pointer");
+ return buildInstr(TargetOpcode::G_BRJT)
+ .addUse(TablePtr)
+ .addJumpTableIndex(JTI)
+ .addUse(IndexReg);
+}
+
MachineInstrBuilder MachineIRBuilder::buildCopy(const DstOp &Res,
const SrcOp &Op) {
return buildInstr(TargetOpcode::COPY, Res, Op);
@@ -243,36 +271,60 @@ MachineInstrBuilder MachineIRBuilder::buildCopy(const DstOp &Res,
MachineInstrBuilder MachineIRBuilder::buildConstant(const DstOp &Res,
const ConstantInt &Val) {
LLT Ty = Res.getLLTTy(*getMRI());
+ LLT EltTy = Ty.getScalarType();
+ assert(EltTy.getScalarSizeInBits() == Val.getBitWidth() &&
+ "creating constant with the wrong size");
+
+ if (Ty.isVector()) {
+ auto Const = buildInstr(TargetOpcode::G_CONSTANT)
+ .addDef(getMRI()->createGenericVirtualRegister(EltTy))
+ .addCImm(&Val);
+ return buildSplatVector(Res, Const);
+ }
- assert((Ty.isScalar() || Ty.isPointer()) && "invalid operand type");
-
- const ConstantInt *NewVal = &Val;
- if (Ty.getSizeInBits() != Val.getBitWidth())
- NewVal = ConstantInt::get(getMF().getFunction().getContext(),
- Val.getValue().sextOrTrunc(Ty.getSizeInBits()));
-
- auto MIB = buildInstr(TargetOpcode::G_CONSTANT);
- Res.addDefToMIB(*getMRI(), MIB);
- MIB.addCImm(NewVal);
- return MIB;
+ auto Const = buildInstr(TargetOpcode::G_CONSTANT);
+ Res.addDefToMIB(*getMRI(), Const);
+ Const.addCImm(&Val);
+ return Const;
}
MachineInstrBuilder MachineIRBuilder::buildConstant(const DstOp &Res,
int64_t Val) {
auto IntN = IntegerType::get(getMF().getFunction().getContext(),
- Res.getLLTTy(*getMRI()).getSizeInBits());
+ Res.getLLTTy(*getMRI()).getScalarSizeInBits());
ConstantInt *CI = ConstantInt::get(IntN, Val, true);
return buildConstant(Res, *CI);
}
MachineInstrBuilder MachineIRBuilder::buildFConstant(const DstOp &Res,
const ConstantFP &Val) {
- assert(Res.getLLTTy(*getMRI()).isScalar() && "invalid operand type");
+ LLT Ty = Res.getLLTTy(*getMRI());
+ LLT EltTy = Ty.getScalarType();
- auto MIB = buildInstr(TargetOpcode::G_FCONSTANT);
- Res.addDefToMIB(*getMRI(), MIB);
- MIB.addFPImm(&Val);
- return MIB;
+ assert(APFloat::getSizeInBits(Val.getValueAPF().getSemantics())
+ == EltTy.getSizeInBits() &&
+ "creating fconstant with the wrong size");
+
+ assert(!Ty.isPointer() && "invalid operand type");
+
+ if (Ty.isVector()) {
+ auto Const = buildInstr(TargetOpcode::G_FCONSTANT)
+ .addDef(getMRI()->createGenericVirtualRegister(EltTy))
+ .addFPImm(&Val);
+
+ return buildSplatVector(Res, Const);
+ }
+
+ auto Const = buildInstr(TargetOpcode::G_FCONSTANT);
+ Res.addDefToMIB(*getMRI(), Const);
+ Const.addFPImm(&Val);
+ return Const;
+}
+
+MachineInstrBuilder MachineIRBuilder::buildConstant(const DstOp &Res,
+ const APInt &Val) {
+ ConstantInt *CI = ConstantInt::get(getMF().getFunction().getContext(), Val);
+ return buildConstant(Res, *CI);
}
MachineInstrBuilder MachineIRBuilder::buildFConstant(const DstOp &Res,
@@ -280,44 +332,62 @@ MachineInstrBuilder MachineIRBuilder::buildFConstant(const DstOp &Res,
LLT DstTy = Res.getLLTTy(*getMRI());
auto &Ctx = getMF().getFunction().getContext();
auto *CFP =
- ConstantFP::get(Ctx, getAPFloatFromSize(Val, DstTy.getSizeInBits()));
+ ConstantFP::get(Ctx, getAPFloatFromSize(Val, DstTy.getScalarSizeInBits()));
return buildFConstant(Res, *CFP);
}
-MachineInstrBuilder MachineIRBuilder::buildBrCond(unsigned Tst,
+MachineInstrBuilder MachineIRBuilder::buildFConstant(const DstOp &Res,
+ const APFloat &Val) {
+ auto &Ctx = getMF().getFunction().getContext();
+ auto *CFP = ConstantFP::get(Ctx, Val);
+ return buildFConstant(Res, *CFP);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildBrCond(Register Tst,
MachineBasicBlock &Dest) {
assert(getMRI()->getType(Tst).isScalar() && "invalid operand type");
return buildInstr(TargetOpcode::G_BRCOND).addUse(Tst).addMBB(&Dest);
}
-MachineInstrBuilder MachineIRBuilder::buildLoad(unsigned Res, unsigned Addr,
+MachineInstrBuilder MachineIRBuilder::buildLoad(const DstOp &Res,
+ const SrcOp &Addr,
MachineMemOperand &MMO) {
return buildLoadInstr(TargetOpcode::G_LOAD, Res, Addr, MMO);
}
MachineInstrBuilder MachineIRBuilder::buildLoadInstr(unsigned Opcode,
- unsigned Res,
- unsigned Addr,
+ const DstOp &Res,
+ const SrcOp &Addr,
MachineMemOperand &MMO) {
- assert(getMRI()->getType(Res).isValid() && "invalid operand type");
- assert(getMRI()->getType(Addr).isPointer() && "invalid operand type");
+ assert(Res.getLLTTy(*getMRI()).isValid() && "invalid operand type");
+ assert(Addr.getLLTTy(*getMRI()).isPointer() && "invalid operand type");
- return buildInstr(Opcode)
- .addDef(Res)
- .addUse(Addr)
- .addMemOperand(&MMO);
+ auto MIB = buildInstr(Opcode);
+ Res.addDefToMIB(*getMRI(), MIB);
+ Addr.addSrcToMIB(MIB);
+ MIB.addMemOperand(&MMO);
+ return MIB;
}
-MachineInstrBuilder MachineIRBuilder::buildStore(unsigned Val, unsigned Addr,
+MachineInstrBuilder MachineIRBuilder::buildStore(const SrcOp &Val,
+ const SrcOp &Addr,
MachineMemOperand &MMO) {
- assert(getMRI()->getType(Val).isValid() && "invalid operand type");
- assert(getMRI()->getType(Addr).isPointer() && "invalid operand type");
+ assert(Val.getLLTTy(*getMRI()).isValid() && "invalid operand type");
+ assert(Addr.getLLTTy(*getMRI()).isPointer() && "invalid operand type");
- return buildInstr(TargetOpcode::G_STORE)
- .addUse(Val)
- .addUse(Addr)
- .addMemOperand(&MMO);
+ auto MIB = buildInstr(TargetOpcode::G_STORE);
+ Val.addSrcToMIB(MIB);
+ Addr.addSrcToMIB(MIB);
+ MIB.addMemOperand(&MMO);
+ return MIB;
+}
+
+MachineInstrBuilder MachineIRBuilder::buildUAddo(const DstOp &Res,
+ const DstOp &CarryOut,
+ const SrcOp &Op0,
+ const SrcOp &Op1) {
+ return buildInstr(TargetOpcode::G_UADDO, {Res, CarryOut}, {Op0, Op1});
}
MachineInstrBuilder MachineIRBuilder::buildUAdde(const DstOp &Res,
@@ -344,6 +414,25 @@ MachineInstrBuilder MachineIRBuilder::buildZExt(const DstOp &Res,
return buildInstr(TargetOpcode::G_ZEXT, Res, Op);
}
+unsigned MachineIRBuilder::getBoolExtOp(bool IsVec, bool IsFP) const {
+ const auto *TLI = getMF().getSubtarget().getTargetLowering();
+ switch (TLI->getBooleanContents(IsVec, IsFP)) {
+ case TargetLoweringBase::ZeroOrNegativeOneBooleanContent:
+ return TargetOpcode::G_SEXT;
+ case TargetLoweringBase::ZeroOrOneBooleanContent:
+ return TargetOpcode::G_ZEXT;
+ default:
+ return TargetOpcode::G_ANYEXT;
+ }
+}
+
+MachineInstrBuilder MachineIRBuilder::buildBoolExt(const DstOp &Res,
+ const SrcOp &Op,
+ bool IsFP) {
+ unsigned ExtOp = getBoolExtOp(getMRI()->getType(Op.getReg()).isVector(), IsFP);
+ return buildInstr(ExtOp, Res, Op);
+}
+
MachineInstrBuilder MachineIRBuilder::buildExtOrTrunc(unsigned ExtOpc,
const DstOp &Res,
const SrcOp &Op) {
@@ -403,29 +492,32 @@ MachineInstrBuilder MachineIRBuilder::buildCast(const DstOp &Dst,
return buildInstr(Opcode, Dst, Src);
}
-MachineInstrBuilder MachineIRBuilder::buildExtract(unsigned Res, unsigned Src,
+MachineInstrBuilder MachineIRBuilder::buildExtract(const DstOp &Dst,
+ const SrcOp &Src,
uint64_t Index) {
+ LLT SrcTy = Src.getLLTTy(*getMRI());
+ LLT DstTy = Dst.getLLTTy(*getMRI());
+
#ifndef NDEBUG
- assert(getMRI()->getType(Src).isValid() && "invalid operand type");
- assert(getMRI()->getType(Res).isValid() && "invalid operand type");
- assert(Index + getMRI()->getType(Res).getSizeInBits() <=
- getMRI()->getType(Src).getSizeInBits() &&
+ assert(SrcTy.isValid() && "invalid operand type");
+ assert(DstTy.isValid() && "invalid operand type");
+ assert(Index + DstTy.getSizeInBits() <= SrcTy.getSizeInBits() &&
"extracting off end of register");
#endif
- if (getMRI()->getType(Res).getSizeInBits() ==
- getMRI()->getType(Src).getSizeInBits()) {
+ if (DstTy.getSizeInBits() == SrcTy.getSizeInBits()) {
assert(Index == 0 && "insertion past the end of a register");
- return buildCast(Res, Src);
+ return buildCast(Dst, Src);
}
- return buildInstr(TargetOpcode::G_EXTRACT)
- .addDef(Res)
- .addUse(Src)
- .addImm(Index);
+ auto Extract = buildInstr(TargetOpcode::G_EXTRACT);
+ Dst.addDefToMIB(*getMRI(), Extract);
+ Src.addSrcToMIB(Extract);
+ Extract.addImm(Index);
+ return Extract;
}
-void MachineIRBuilder::buildSequence(unsigned Res, ArrayRef<unsigned> Ops,
+void MachineIRBuilder::buildSequence(Register Res, ArrayRef<Register> Ops,
ArrayRef<uint64_t> Indices) {
#ifndef NDEBUG
assert(Ops.size() == Indices.size() && "incompatible args");
@@ -454,11 +546,11 @@ void MachineIRBuilder::buildSequence(unsigned Res, ArrayRef<unsigned> Ops,
return;
}
- unsigned ResIn = getMRI()->createGenericVirtualRegister(ResTy);
+ Register ResIn = getMRI()->createGenericVirtualRegister(ResTy);
buildUndef(ResIn);
for (unsigned i = 0; i < Ops.size(); ++i) {
- unsigned ResOut = i + 1 == Ops.size()
+ Register ResOut = i + 1 == Ops.size()
? Res
: getMRI()->createGenericVirtualRegister(ResTy);
buildInsert(ResOut, ResIn, Ops[i], Indices[i]);
@@ -471,11 +563,12 @@ MachineInstrBuilder MachineIRBuilder::buildUndef(const DstOp &Res) {
}
MachineInstrBuilder MachineIRBuilder::buildMerge(const DstOp &Res,
- ArrayRef<unsigned> Ops) {
+ ArrayRef<Register> Ops) {
// Unfortunately to convert from ArrayRef<LLT> to ArrayRef<SrcOp>,
// we need some temporary storage for the DstOp objects. Here we use a
// sufficiently large SmallVector to not go through the heap.
SmallVector<SrcOp, 8> TmpVec(Ops.begin(), Ops.end());
+ assert(TmpVec.size() > 1);
return buildInstr(TargetOpcode::G_MERGE_VALUES, Res, TmpVec);
}
@@ -485,31 +578,48 @@ MachineInstrBuilder MachineIRBuilder::buildUnmerge(ArrayRef<LLT> Res,
// we need some temporary storage for the DstOp objects. Here we use a
// sufficiently large SmallVector to not go through the heap.
SmallVector<DstOp, 8> TmpVec(Res.begin(), Res.end());
+ assert(TmpVec.size() > 1);
return buildInstr(TargetOpcode::G_UNMERGE_VALUES, TmpVec, Op);
}
-MachineInstrBuilder MachineIRBuilder::buildUnmerge(ArrayRef<unsigned> Res,
+MachineInstrBuilder MachineIRBuilder::buildUnmerge(LLT Res,
+ const SrcOp &Op) {
+ unsigned NumReg = Op.getLLTTy(*getMRI()).getSizeInBits() / Res.getSizeInBits();
+ SmallVector<Register, 8> TmpVec;
+ for (unsigned I = 0; I != NumReg; ++I)
+ TmpVec.push_back(getMRI()->createGenericVirtualRegister(Res));
+ return buildUnmerge(TmpVec, Op);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildUnmerge(ArrayRef<Register> Res,
const SrcOp &Op) {
- // Unfortunately to convert from ArrayRef<unsigned> to ArrayRef<DstOp>,
+ // Unfortunately to convert from ArrayRef<Register> to ArrayRef<DstOp>,
// we need some temporary storage for the DstOp objects. Here we use a
// sufficiently large SmallVector to not go through the heap.
SmallVector<DstOp, 8> TmpVec(Res.begin(), Res.end());
+ assert(TmpVec.size() > 1);
return buildInstr(TargetOpcode::G_UNMERGE_VALUES, TmpVec, Op);
}
MachineInstrBuilder MachineIRBuilder::buildBuildVector(const DstOp &Res,
- ArrayRef<unsigned> Ops) {
- // Unfortunately to convert from ArrayRef<unsigned> to ArrayRef<SrcOp>,
+ ArrayRef<Register> Ops) {
+ // Unfortunately to convert from ArrayRef<Register> to ArrayRef<SrcOp>,
// we need some temporary storage for the DstOp objects. Here we use a
// sufficiently large SmallVector to not go through the heap.
SmallVector<SrcOp, 8> TmpVec(Ops.begin(), Ops.end());
return buildInstr(TargetOpcode::G_BUILD_VECTOR, Res, TmpVec);
}
+MachineInstrBuilder MachineIRBuilder::buildSplatVector(const DstOp &Res,
+ const SrcOp &Src) {
+ SmallVector<SrcOp, 8> TmpVec(Res.getLLTTy(*getMRI()).getNumElements(), Src);
+ return buildInstr(TargetOpcode::G_BUILD_VECTOR, Res, TmpVec);
+}
+
MachineInstrBuilder
MachineIRBuilder::buildBuildVectorTrunc(const DstOp &Res,
- ArrayRef<unsigned> Ops) {
- // Unfortunately to convert from ArrayRef<unsigned> to ArrayRef<SrcOp>,
+ ArrayRef<Register> Ops) {
+ // Unfortunately to convert from ArrayRef<Register> to ArrayRef<SrcOp>,
// we need some temporary storage for the DstOp objects. Here we use a
// sufficiently large SmallVector to not go through the heap.
SmallVector<SrcOp, 8> TmpVec(Ops.begin(), Ops.end());
@@ -517,16 +627,16 @@ MachineIRBuilder::buildBuildVectorTrunc(const DstOp &Res,
}
MachineInstrBuilder
-MachineIRBuilder::buildConcatVectors(const DstOp &Res, ArrayRef<unsigned> Ops) {
- // Unfortunately to convert from ArrayRef<unsigned> to ArrayRef<SrcOp>,
+MachineIRBuilder::buildConcatVectors(const DstOp &Res, ArrayRef<Register> Ops) {
+ // Unfortunately to convert from ArrayRef<Register> to ArrayRef<SrcOp>,
// we need some temporary storage for the DstOp objects. Here we use a
// sufficiently large SmallVector to not go through the heap.
SmallVector<SrcOp, 8> TmpVec(Ops.begin(), Ops.end());
return buildInstr(TargetOpcode::G_CONCAT_VECTORS, Res, TmpVec);
}
-MachineInstrBuilder MachineIRBuilder::buildInsert(unsigned Res, unsigned Src,
- unsigned Op, unsigned Index) {
+MachineInstrBuilder MachineIRBuilder::buildInsert(Register Res, Register Src,
+ Register Op, unsigned Index) {
assert(Index + getMRI()->getType(Op).getSizeInBits() <=
getMRI()->getType(Res).getSizeInBits() &&
"insertion past the end of a register");
@@ -544,13 +654,25 @@ MachineInstrBuilder MachineIRBuilder::buildInsert(unsigned Res, unsigned Src,
}
MachineInstrBuilder MachineIRBuilder::buildIntrinsic(Intrinsic::ID ID,
- unsigned Res,
+ ArrayRef<Register> ResultRegs,
bool HasSideEffects) {
auto MIB =
buildInstr(HasSideEffects ? TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS
: TargetOpcode::G_INTRINSIC);
- if (Res)
- MIB.addDef(Res);
+ for (unsigned ResultReg : ResultRegs)
+ MIB.addDef(ResultReg);
+ MIB.addIntrinsicID(ID);
+ return MIB;
+}
+
+MachineInstrBuilder MachineIRBuilder::buildIntrinsic(Intrinsic::ID ID,
+ ArrayRef<DstOp> Results,
+ bool HasSideEffects) {
+ auto MIB =
+ buildInstr(HasSideEffects ? TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS
+ : TargetOpcode::G_INTRINSIC);
+ for (DstOp Result : Results)
+ Result.addDefToMIB(*getMRI(), MIB);
MIB.addIntrinsicID(ID);
return MIB;
}
@@ -601,8 +723,8 @@ MachineIRBuilder::buildExtractVectorElement(const DstOp &Res, const SrcOp &Val,
}
MachineInstrBuilder MachineIRBuilder::buildAtomicCmpXchgWithSuccess(
- unsigned OldValRes, unsigned SuccessRes, unsigned Addr, unsigned CmpVal,
- unsigned NewVal, MachineMemOperand &MMO) {
+ Register OldValRes, Register SuccessRes, Register Addr, Register CmpVal,
+ Register NewVal, MachineMemOperand &MMO) {
#ifndef NDEBUG
LLT OldValResTy = getMRI()->getType(OldValRes);
LLT SuccessResTy = getMRI()->getType(SuccessRes);
@@ -628,8 +750,8 @@ MachineInstrBuilder MachineIRBuilder::buildAtomicCmpXchgWithSuccess(
}
MachineInstrBuilder
-MachineIRBuilder::buildAtomicCmpXchg(unsigned OldValRes, unsigned Addr,
- unsigned CmpVal, unsigned NewVal,
+MachineIRBuilder::buildAtomicCmpXchg(Register OldValRes, Register Addr,
+ Register CmpVal, Register NewVal,
MachineMemOperand &MMO) {
#ifndef NDEBUG
LLT OldValResTy = getMRI()->getType(OldValRes);
@@ -653,9 +775,9 @@ MachineIRBuilder::buildAtomicCmpXchg(unsigned OldValRes, unsigned Addr,
}
MachineInstrBuilder MachineIRBuilder::buildAtomicRMW(unsigned Opcode,
- unsigned OldValRes,
- unsigned Addr,
- unsigned Val,
+ Register OldValRes,
+ Register Addr,
+ Register Val,
MachineMemOperand &MMO) {
#ifndef NDEBUG
LLT OldValResTy = getMRI()->getType(OldValRes);
@@ -675,75 +797,82 @@ MachineInstrBuilder MachineIRBuilder::buildAtomicRMW(unsigned Opcode,
}
MachineInstrBuilder
-MachineIRBuilder::buildAtomicRMWXchg(unsigned OldValRes, unsigned Addr,
- unsigned Val, MachineMemOperand &MMO) {
+MachineIRBuilder::buildAtomicRMWXchg(Register OldValRes, Register Addr,
+ Register Val, MachineMemOperand &MMO) {
return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_XCHG, OldValRes, Addr, Val,
MMO);
}
MachineInstrBuilder
-MachineIRBuilder::buildAtomicRMWAdd(unsigned OldValRes, unsigned Addr,
- unsigned Val, MachineMemOperand &MMO) {
+MachineIRBuilder::buildAtomicRMWAdd(Register OldValRes, Register Addr,
+ Register Val, MachineMemOperand &MMO) {
return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_ADD, OldValRes, Addr, Val,
MMO);
}
MachineInstrBuilder
-MachineIRBuilder::buildAtomicRMWSub(unsigned OldValRes, unsigned Addr,
- unsigned Val, MachineMemOperand &MMO) {
+MachineIRBuilder::buildAtomicRMWSub(Register OldValRes, Register Addr,
+ Register Val, MachineMemOperand &MMO) {
return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_SUB, OldValRes, Addr, Val,
MMO);
}
MachineInstrBuilder
-MachineIRBuilder::buildAtomicRMWAnd(unsigned OldValRes, unsigned Addr,
- unsigned Val, MachineMemOperand &MMO) {
+MachineIRBuilder::buildAtomicRMWAnd(Register OldValRes, Register Addr,
+ Register Val, MachineMemOperand &MMO) {
return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_AND, OldValRes, Addr, Val,
MMO);
}
MachineInstrBuilder
-MachineIRBuilder::buildAtomicRMWNand(unsigned OldValRes, unsigned Addr,
- unsigned Val, MachineMemOperand &MMO) {
+MachineIRBuilder::buildAtomicRMWNand(Register OldValRes, Register Addr,
+ Register Val, MachineMemOperand &MMO) {
return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_NAND, OldValRes, Addr, Val,
MMO);
}
-MachineInstrBuilder MachineIRBuilder::buildAtomicRMWOr(unsigned OldValRes,
- unsigned Addr,
- unsigned Val,
+MachineInstrBuilder MachineIRBuilder::buildAtomicRMWOr(Register OldValRes,
+ Register Addr,
+ Register Val,
MachineMemOperand &MMO) {
return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_OR, OldValRes, Addr, Val,
MMO);
}
MachineInstrBuilder
-MachineIRBuilder::buildAtomicRMWXor(unsigned OldValRes, unsigned Addr,
- unsigned Val, MachineMemOperand &MMO) {
+MachineIRBuilder::buildAtomicRMWXor(Register OldValRes, Register Addr,
+ Register Val, MachineMemOperand &MMO) {
return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_XOR, OldValRes, Addr, Val,
MMO);
}
MachineInstrBuilder
-MachineIRBuilder::buildAtomicRMWMax(unsigned OldValRes, unsigned Addr,
- unsigned Val, MachineMemOperand &MMO) {
+MachineIRBuilder::buildAtomicRMWMax(Register OldValRes, Register Addr,
+ Register Val, MachineMemOperand &MMO) {
return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_MAX, OldValRes, Addr, Val,
MMO);
}
MachineInstrBuilder
-MachineIRBuilder::buildAtomicRMWMin(unsigned OldValRes, unsigned Addr,
- unsigned Val, MachineMemOperand &MMO) {
+MachineIRBuilder::buildAtomicRMWMin(Register OldValRes, Register Addr,
+ Register Val, MachineMemOperand &MMO) {
return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_MIN, OldValRes, Addr, Val,
MMO);
}
MachineInstrBuilder
-MachineIRBuilder::buildAtomicRMWUmax(unsigned OldValRes, unsigned Addr,
- unsigned Val, MachineMemOperand &MMO) {
+MachineIRBuilder::buildAtomicRMWUmax(Register OldValRes, Register Addr,
+ Register Val, MachineMemOperand &MMO) {
return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_UMAX, OldValRes, Addr, Val,
MMO);
}
MachineInstrBuilder
-MachineIRBuilder::buildAtomicRMWUmin(unsigned OldValRes, unsigned Addr,
- unsigned Val, MachineMemOperand &MMO) {
+MachineIRBuilder::buildAtomicRMWUmin(Register OldValRes, Register Addr,
+ Register Val, MachineMemOperand &MMO) {
return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_UMIN, OldValRes, Addr, Val,
MMO);
}
MachineInstrBuilder
-MachineIRBuilder::buildBlockAddress(unsigned Res, const BlockAddress *BA) {
+MachineIRBuilder::buildFence(unsigned Ordering, unsigned Scope) {
+ return buildInstr(TargetOpcode::G_FENCE)
+ .addImm(Ordering)
+ .addImm(Scope);
+}
+
+MachineInstrBuilder
+MachineIRBuilder::buildBlockAddress(Register Res, const BlockAddress *BA) {
#ifndef NDEBUG
assert(getMRI()->getType(Res).isPointer() && "invalid res type");
#endif
@@ -803,17 +932,18 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc,
}
case TargetOpcode::G_ADD:
case TargetOpcode::G_AND:
- case TargetOpcode::G_ASHR:
- case TargetOpcode::G_LSHR:
case TargetOpcode::G_MUL:
case TargetOpcode::G_OR:
- case TargetOpcode::G_SHL:
case TargetOpcode::G_SUB:
case TargetOpcode::G_XOR:
case TargetOpcode::G_UDIV:
case TargetOpcode::G_SDIV:
case TargetOpcode::G_UREM:
- case TargetOpcode::G_SREM: {
+ case TargetOpcode::G_SREM:
+ case TargetOpcode::G_SMIN:
+ case TargetOpcode::G_SMAX:
+ case TargetOpcode::G_UMIN:
+ case TargetOpcode::G_UMAX: {
// All these are binary ops.
assert(DstOps.size() == 1 && "Invalid Dst");
assert(SrcOps.size() == 2 && "Invalid Srcs");
@@ -821,6 +951,17 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc,
SrcOps[0].getLLTTy(*getMRI()),
SrcOps[1].getLLTTy(*getMRI()));
break;
+ }
+ case TargetOpcode::G_SHL:
+ case TargetOpcode::G_ASHR:
+ case TargetOpcode::G_LSHR: {
+ assert(DstOps.size() == 1 && "Invalid Dst");
+ assert(SrcOps.size() == 2 && "Invalid Srcs");
+ validateShiftOp(DstOps[0].getLLTTy(*getMRI()),
+ SrcOps[0].getLLTTy(*getMRI()),
+ SrcOps[1].getLLTTy(*getMRI()));
+ break;
+ }
case TargetOpcode::G_SEXT:
case TargetOpcode::G_ZEXT:
case TargetOpcode::G_ANYEXT:
@@ -830,7 +971,7 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc,
SrcOps[0].getLLTTy(*getMRI()), true);
break;
case TargetOpcode::G_TRUNC:
- case TargetOpcode::G_FPTRUNC:
+ case TargetOpcode::G_FPTRUNC: {
assert(DstOps.size() == 1 && "Invalid Dst");
assert(SrcOps.size() == 1 && "Invalid Srcs");
validateTruncExt(DstOps[0].getLLTTy(*getMRI()),
@@ -839,10 +980,8 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc,
}
case TargetOpcode::COPY:
assert(DstOps.size() == 1 && "Invalid Dst");
- assert(SrcOps.size() == 1 && "Invalid Srcs");
- assert(DstOps[0].getLLTTy(*getMRI()) == LLT() ||
- SrcOps[0].getLLTTy(*getMRI()) == LLT() ||
- DstOps[0].getLLTTy(*getMRI()) == SrcOps[0].getLLTTy(*getMRI()));
+ // If the caller wants to add a subreg source it has to be done separately
+ // so we may not have any SrcOps at this point yet.
break;
case TargetOpcode::G_FCMP:
case TargetOpcode::G_ICMP: {
@@ -943,7 +1082,7 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc,
"type mismatch in input list");
assert(SrcOps.size() * SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() ==
DstOps[0].getLLTTy(*getMRI()).getSizeInBits() &&
- "input scalars do not exactly cover the outpur vector register");
+ "input scalars do not exactly cover the output vector register");
break;
}
case TargetOpcode::G_BUILD_VECTOR_TRUNC: {
@@ -976,7 +1115,7 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc,
"type mismatch in input list");
assert(SrcOps.size() * SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() ==
DstOps[0].getLLTTy(*getMRI()).getSizeInBits() &&
- "input vectors do not exactly cover the outpur vector register");
+ "input vectors do not exactly cover the output vector register");
break;
}
case TargetOpcode::G_UADDE: {
diff --git a/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/lib/CodeGen/GlobalISel/RegBankSelect.cpp
index dcc8b7cc23c5..42be88fcf947 100644
--- a/lib/CodeGen/GlobalISel/RegBankSelect.cpp
+++ b/lib/CodeGen/GlobalISel/RegBankSelect.cpp
@@ -1,9 +1,8 @@
//==- llvm/CodeGen/GlobalISel/RegBankSelect.cpp - RegBankSelect --*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -72,7 +71,6 @@ INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE,
RegBankSelect::RegBankSelect(Mode RunningMode)
: MachineFunctionPass(ID), OptMode(RunningMode) {
- initializeRegBankSelectPass(*PassRegistry::getPassRegistry());
if (RegBankSelectMode.getNumOccurrences() != 0) {
OptMode = RegBankSelectMode;
if (RegBankSelectMode != RunningMode)
@@ -110,7 +108,7 @@ void RegBankSelect::getAnalysisUsage(AnalysisUsage &AU) const {
}
bool RegBankSelect::assignmentMatch(
- unsigned Reg, const RegisterBankInfo::ValueMapping &ValMapping,
+ Register Reg, const RegisterBankInfo::ValueMapping &ValMapping,
bool &OnlyAssign) const {
// By default we assume we will have to repair something.
OnlyAssign = false;
@@ -135,34 +133,84 @@ bool RegBankSelect::assignmentMatch(
bool RegBankSelect::repairReg(
MachineOperand &MO, const RegisterBankInfo::ValueMapping &ValMapping,
RegBankSelect::RepairingPlacement &RepairPt,
- const iterator_range<SmallVectorImpl<unsigned>::const_iterator> &NewVRegs) {
- if (ValMapping.NumBreakDowns != 1 && !TPC->isGlobalISelAbortEnabled())
- return false;
- assert(ValMapping.NumBreakDowns == 1 && "Not yet implemented");
+ const iterator_range<SmallVectorImpl<Register>::const_iterator> &NewVRegs) {
+
+ assert(ValMapping.NumBreakDowns == (unsigned)size(NewVRegs) &&
+ "need new vreg for each breakdown");
+
// An empty range of new register means no repairing.
assert(!empty(NewVRegs) && "We should not have to repair");
- // Assume we are repairing a use and thus, the original reg will be
- // the source of the repairing.
- unsigned Src = MO.getReg();
- unsigned Dst = *NewVRegs.begin();
-
- // If we repair a definition, swap the source and destination for
- // the repairing.
- if (MO.isDef())
- std::swap(Src, Dst);
-
- assert((RepairPt.getNumInsertPoints() == 1 ||
- TargetRegisterInfo::isPhysicalRegister(Dst)) &&
- "We are about to create several defs for Dst");
-
- // Build the instruction used to repair, then clone it at the right
- // places. Avoiding buildCopy bypasses the check that Src and Dst have the
- // same types because the type is a placeholder when this function is called.
- MachineInstr *MI =
- MIRBuilder.buildInstrNoInsert(TargetOpcode::COPY).addDef(Dst).addUse(Src);
- LLVM_DEBUG(dbgs() << "Copy: " << printReg(Src) << " to: " << printReg(Dst)
- << '\n');
+ MachineInstr *MI;
+ if (ValMapping.NumBreakDowns == 1) {
+ // Assume we are repairing a use and thus, the original reg will be
+ // the source of the repairing.
+ Register Src = MO.getReg();
+ Register Dst = *NewVRegs.begin();
+
+ // If we repair a definition, swap the source and destination for
+ // the repairing.
+ if (MO.isDef())
+ std::swap(Src, Dst);
+
+ assert((RepairPt.getNumInsertPoints() == 1 ||
+ TargetRegisterInfo::isPhysicalRegister(Dst)) &&
+ "We are about to create several defs for Dst");
+
+ // Build the instruction used to repair, then clone it at the right
+ // places. Avoiding buildCopy bypasses the check that Src and Dst have the
+ // same types because the type is a placeholder when this function is called.
+ MI = MIRBuilder.buildInstrNoInsert(TargetOpcode::COPY)
+ .addDef(Dst)
+ .addUse(Src);
+ LLVM_DEBUG(dbgs() << "Copy: " << printReg(Src) << " to: " << printReg(Dst)
+ << '\n');
+ } else {
+ // TODO: Support with G_IMPLICIT_DEF + G_INSERT sequence or G_EXTRACT
+ // sequence.
+ assert(ValMapping.partsAllUniform() && "irregular breakdowns not supported");
+
+ LLT RegTy = MRI->getType(MO.getReg());
+ if (MO.isDef()) {
+ unsigned MergeOp;
+ if (RegTy.isVector()) {
+ if (ValMapping.NumBreakDowns == RegTy.getNumElements())
+ MergeOp = TargetOpcode::G_BUILD_VECTOR;
+ else {
+ assert(
+ (ValMapping.BreakDown[0].Length * ValMapping.NumBreakDowns ==
+ RegTy.getSizeInBits()) &&
+ (ValMapping.BreakDown[0].Length % RegTy.getScalarSizeInBits() ==
+ 0) &&
+ "don't understand this value breakdown");
+
+ MergeOp = TargetOpcode::G_CONCAT_VECTORS;
+ }
+ } else
+ MergeOp = TargetOpcode::G_MERGE_VALUES;
+
+ auto MergeBuilder =
+ MIRBuilder.buildInstrNoInsert(MergeOp)
+ .addDef(MO.getReg());
+
+ for (Register SrcReg : NewVRegs)
+ MergeBuilder.addUse(SrcReg);
+
+ MI = MergeBuilder;
+ } else {
+ MachineInstrBuilder UnMergeBuilder =
+ MIRBuilder.buildInstrNoInsert(TargetOpcode::G_UNMERGE_VALUES);
+ for (Register DefReg : NewVRegs)
+ UnMergeBuilder.addDef(DefReg);
+
+ UnMergeBuilder.addUse(MO.getReg());
+ MI = UnMergeBuilder;
+ }
+ }
+
+ if (RepairPt.getNumInsertPoints() != 1)
+ report_fatal_error("need testcase to support multiple insertion points");
+
// TODO:
// Check if MI is legal. if not, we need to legalize all the
// instructions we are going to insert.
@@ -195,7 +243,8 @@ uint64_t RegBankSelect::getRepairCost(
const RegisterBank *CurRegBank = RBI->getRegBank(MO.getReg(), *MRI, *TRI);
// If MO does not have a register bank, we should have just been
// able to set one unless we have to break the value down.
- assert((!IsSameNumOfValues || CurRegBank) && "We should not have to repair");
+ assert(CurRegBank || MO.isDef());
+
// Def: Val <- NewDefs
// Same number of values: copy
// Different number: Val = build_sequence Defs1, Defs2, ...
@@ -206,6 +255,9 @@ uint64_t RegBankSelect::getRepairCost(
// We should remember that this value is available somewhere else to
// coalesce the value.
+ if (ValMapping.NumBreakDowns != 1)
+ return RBI->getBreakDownCost(ValMapping, CurRegBank);
+
if (IsSameNumOfValues) {
const RegisterBank *DesiredRegBrank = ValMapping.BreakDown[0].RegBank;
// If we repair a definition, swap the source and destination for
@@ -345,7 +397,7 @@ void RegBankSelect::tryAvoidingSplit(
// repairing.
// Check if this is a physical or virtual register.
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
// We are going to split every outgoing edges.
// Check that this is possible.
@@ -416,7 +468,7 @@ RegBankSelect::MappingCost RegBankSelect::computeMapping(
const MachineOperand &MO = MI.getOperand(OpIdx);
if (!MO.isReg())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (!Reg)
continue;
LLVM_DEBUG(dbgs() << "Opd" << OpIdx << '\n');
@@ -542,7 +594,7 @@ bool RegBankSelect::applyMapping(
MachineOperand &MO = MI.getOperand(OpIdx);
const RegisterBankInfo::ValueMapping &ValMapping =
InstrMapping.getOperandMapping(OpIdx);
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
switch (RepairPt.getKind()) {
case RepairingPlacement::Reassign:
@@ -605,7 +657,7 @@ bool RegBankSelect::runOnMachineFunction(MachineFunction &MF) {
LLVM_DEBUG(dbgs() << "Assign register banks for: " << MF.getName() << '\n');
const Function &F = MF.getFunction();
Mode SaveOptMode = OptMode;
- if (F.hasFnAttribute(Attribute::OptimizeNone))
+ if (F.hasOptNone())
OptMode = Mode::Fast;
init(MF);
@@ -644,8 +696,21 @@ bool RegBankSelect::runOnMachineFunction(MachineFunction &MF) {
"unable to map instruction", MI);
return false;
}
+
+ // It's possible the mapping changed control flow, and moved the following
+ // instruction to a new block, so figure out the new parent.
+ if (MII != End) {
+ MachineBasicBlock *NextInstBB = MII->getParent();
+ if (NextInstBB != MBB) {
+ LLVM_DEBUG(dbgs() << "Instruction mapping changed control flow\n");
+ MBB = NextInstBB;
+ MIRBuilder.setMBB(*MBB);
+ End = MBB->end();
+ }
+ }
}
}
+
OptMode = SaveOptMode;
return false;
}
@@ -692,7 +757,7 @@ RegBankSelect::RepairingPlacement::RepairingPlacement(
MachineBasicBlock &Pred = *MI.getOperand(OpIdx + 1).getMBB();
// Check if we can move the insertion point prior to the
// terminators of the predecessor.
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
MachineBasicBlock::iterator It = Pred.getLastNonDebugInstr();
for (auto Begin = Pred.begin(); It != Begin && It->isTerminator(); --It)
if (It->modifiesRegister(Reg, &TRI)) {
diff --git a/lib/CodeGen/GlobalISel/RegisterBank.cpp b/lib/CodeGen/GlobalISel/RegisterBank.cpp
index 16f67a217ce1..4e41f338934d 100644
--- a/lib/CodeGen/GlobalISel/RegisterBank.cpp
+++ b/lib/CodeGen/GlobalISel/RegisterBank.cpp
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/GlobalISel/RegisterBank.cpp - Register Bank --*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
index 28404e52d6ea..159422e38878 100644
--- a/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
+++ b/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/GlobalISel/RegisterBankInfo.cpp --------------*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -81,7 +80,7 @@ bool RegisterBankInfo::verify(const TargetRegisterInfo &TRI) const {
}
const RegisterBank *
-RegisterBankInfo::getRegBank(unsigned Reg, const MachineRegisterInfo &MRI,
+RegisterBankInfo::getRegBank(Register Reg, const MachineRegisterInfo &MRI,
const TargetRegisterInfo &TRI) const {
if (TargetRegisterInfo::isPhysicalRegister(Reg))
return &getRegBankFromRegClass(getMinimalPhysRegClass(Reg, TRI));
@@ -96,7 +95,7 @@ RegisterBankInfo::getRegBank(unsigned Reg, const MachineRegisterInfo &MRI,
}
const TargetRegisterClass &
-RegisterBankInfo::getMinimalPhysRegClass(unsigned Reg,
+RegisterBankInfo::getMinimalPhysRegClass(Register Reg,
const TargetRegisterInfo &TRI) const {
assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
"Reg must be a physreg");
@@ -126,7 +125,7 @@ const RegisterBank *RegisterBankInfo::getRegBankFromConstraints(
}
const TargetRegisterClass *RegisterBankInfo::constrainGenericRegister(
- unsigned Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI) {
+ Register Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI) {
// If the register already has a class, fallback to MRI::constrainRegClass.
auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
@@ -181,7 +180,7 @@ RegisterBankInfo::getInstrMappingImpl(const MachineInstr &MI) const {
const MachineOperand &MO = MI.getOperand(OpIdx);
if (!MO.isReg())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (!Reg)
continue;
// The register bank of Reg is just a side effect of the current
@@ -208,19 +207,49 @@ RegisterBankInfo::getInstrMappingImpl(const MachineInstr &MI) const {
continue;
}
}
- const ValueMapping *ValMapping =
- &getValueMapping(0, getSizeInBits(Reg, MRI, TRI), *CurRegBank);
+
+ unsigned Size = getSizeInBits(Reg, MRI, TRI);
+ const ValueMapping *ValMapping = &getValueMapping(0, Size, *CurRegBank);
if (IsCopyLike) {
- OperandsMapping[0] = ValMapping;
+ if (!OperandsMapping[0]) {
+ if (MI.isRegSequence()) {
+ // For reg_sequence, the result size does not match the input.
+ unsigned ResultSize = getSizeInBits(MI.getOperand(0).getReg(),
+ MRI, TRI);
+ OperandsMapping[0] = &getValueMapping(0, ResultSize, *CurRegBank);
+ } else {
+ OperandsMapping[0] = ValMapping;
+ }
+ }
+
+ // The default handling assumes any register bank can be copied to any
+ // other. If this isn't the case, the target should specially deal with
+ // reg_sequence/phi. There may also be unsatisfiable copies.
+ for (; OpIdx != EndIdx; ++OpIdx) {
+ const MachineOperand &MO = MI.getOperand(OpIdx);
+ if (!MO.isReg())
+ continue;
+ Register Reg = MO.getReg();
+ if (!Reg)
+ continue;
+
+ const RegisterBank *AltRegBank = getRegBank(Reg, MRI, TRI);
+ if (AltRegBank &&
+ cannotCopy(*CurRegBank, *AltRegBank, getSizeInBits(Reg, MRI, TRI)))
+ return getInvalidInstructionMapping();
+ }
+
CompleteMapping = true;
break;
}
+
OperandsMapping[OpIdx] = ValMapping;
}
- if (IsCopyLike && !CompleteMapping)
+ if (IsCopyLike && !CompleteMapping) {
// No way to deduce the type from what we have.
return getInvalidInstructionMapping();
+ }
assert(CompleteMapping && "Setting an uncomplete mapping");
return getInstructionMapping(
@@ -363,11 +392,8 @@ RegisterBankInfo::getInstructionMappingImpl(
++NumInstructionMappingsCreated;
auto &InstrMapping = MapOfInstructionMappings[Hash];
- if (IsInvalid)
- InstrMapping = llvm::make_unique<InstructionMapping>();
- else
- InstrMapping = llvm::make_unique<InstructionMapping>(
- ID, Cost, OperandsMapping, NumOperands);
+ InstrMapping = llvm::make_unique<InstructionMapping>(
+ ID, Cost, OperandsMapping, NumOperands);
return *InstrMapping;
}
@@ -382,8 +408,12 @@ RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
RegisterBankInfo::InstructionMappings
RegisterBankInfo::getInstrPossibleMappings(const MachineInstr &MI) const {
InstructionMappings PossibleMappings;
- // Put the default mapping first.
- PossibleMappings.push_back(&getInstrMapping(MI));
+ const auto &Mapping = getInstrMapping(MI);
+ if (Mapping.isValid()) {
+ // Put the default mapping first.
+ PossibleMappings.push_back(&Mapping);
+ }
+
// Then the alternative mapping, if any.
InstructionMappings AltMappings = getInstrAlternativeMappings(MI);
for (const InstructionMapping *AltMapping : AltMappings)
@@ -424,14 +454,14 @@ void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) {
assert(OpdMapper.getInstrMapping().getOperandMapping(OpIdx).NumBreakDowns ==
1 &&
"This mapping is too complex for this function");
- iterator_range<SmallVectorImpl<unsigned>::const_iterator> NewRegs =
+ iterator_range<SmallVectorImpl<Register>::const_iterator> NewRegs =
OpdMapper.getVRegs(OpIdx);
if (empty(NewRegs)) {
LLVM_DEBUG(dbgs() << " has not been repaired, nothing to be done\n");
continue;
}
- unsigned OrigReg = MO.getReg();
- unsigned NewReg = *NewRegs.begin();
+ Register OrigReg = MO.getReg();
+ Register NewReg = *NewRegs.begin();
LLVM_DEBUG(dbgs() << " changed, replace " << printReg(OrigReg, nullptr));
MO.setReg(NewReg);
LLVM_DEBUG(dbgs() << " with " << printReg(NewReg, nullptr));
@@ -456,7 +486,7 @@ void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) {
}
}
-unsigned RegisterBankInfo::getSizeInBits(unsigned Reg,
+unsigned RegisterBankInfo::getSizeInBits(Register Reg,
const MachineRegisterInfo &MRI,
const TargetRegisterInfo &TRI) const {
if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
@@ -498,6 +528,19 @@ void RegisterBankInfo::PartialMapping::print(raw_ostream &OS) const {
OS << "nullptr";
}
+bool RegisterBankInfo::ValueMapping::partsAllUniform() const {
+ if (NumBreakDowns < 2)
+ return true;
+
+ const PartialMapping *First = begin();
+ for (const PartialMapping *Part = First + 1; Part != end(); ++Part) {
+ if (Part->Length != First->Length || Part->RegBank != First->RegBank)
+ return false;
+ }
+
+ return true;
+}
+
bool RegisterBankInfo::ValueMapping::verify(unsigned MeaningfulBitWidth) const {
assert(NumBreakDowns && "Value mapped nowhere?!");
unsigned OrigValueBitWidth = 0;
@@ -565,7 +608,7 @@ bool RegisterBankInfo::InstructionMapping::verify(
"We should not care about non-reg mapping");
continue;
}
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (!Reg)
continue;
assert(getOperandMapping(Idx).isValid() &&
@@ -610,7 +653,7 @@ RegisterBankInfo::OperandsMapper::OperandsMapper(
assert(InstrMapping.verify(MI) && "Invalid mapping for MI");
}
-iterator_range<SmallVectorImpl<unsigned>::iterator>
+iterator_range<SmallVectorImpl<Register>::iterator>
RegisterBankInfo::OperandsMapper::getVRegsMem(unsigned OpIdx) {
assert(OpIdx < getInstrMapping().getNumOperands() && "Out-of-bound access");
unsigned NumPartialVal =
@@ -626,18 +669,18 @@ RegisterBankInfo::OperandsMapper::getVRegsMem(unsigned OpIdx) {
for (unsigned i = 0; i < NumPartialVal; ++i)
NewVRegs.push_back(0);
}
- SmallVectorImpl<unsigned>::iterator End =
+ SmallVectorImpl<Register>::iterator End =
getNewVRegsEnd(StartIdx, NumPartialVal);
return make_range(&NewVRegs[StartIdx], End);
}
-SmallVectorImpl<unsigned>::const_iterator
+SmallVectorImpl<Register>::const_iterator
RegisterBankInfo::OperandsMapper::getNewVRegsEnd(unsigned StartIdx,
unsigned NumVal) const {
return const_cast<OperandsMapper *>(this)->getNewVRegsEnd(StartIdx, NumVal);
}
-SmallVectorImpl<unsigned>::iterator
+SmallVectorImpl<Register>::iterator
RegisterBankInfo::OperandsMapper::getNewVRegsEnd(unsigned StartIdx,
unsigned NumVal) {
assert((NewVRegs.size() == StartIdx + NumVal ||
@@ -649,11 +692,11 @@ RegisterBankInfo::OperandsMapper::getNewVRegsEnd(unsigned StartIdx,
void RegisterBankInfo::OperandsMapper::createVRegs(unsigned OpIdx) {
assert(OpIdx < getInstrMapping().getNumOperands() && "Out-of-bound access");
- iterator_range<SmallVectorImpl<unsigned>::iterator> NewVRegsForOpIdx =
+ iterator_range<SmallVectorImpl<Register>::iterator> NewVRegsForOpIdx =
getVRegsMem(OpIdx);
const ValueMapping &ValMapping = getInstrMapping().getOperandMapping(OpIdx);
const PartialMapping *PartMap = ValMapping.begin();
- for (unsigned &NewVReg : NewVRegsForOpIdx) {
+ for (Register &NewVReg : NewVRegsForOpIdx) {
assert(PartMap != ValMapping.end() && "Out-of-bound access");
assert(NewVReg == 0 && "Register has already been created");
// The new registers are always bound to scalar with the right size.
@@ -669,7 +712,7 @@ void RegisterBankInfo::OperandsMapper::createVRegs(unsigned OpIdx) {
void RegisterBankInfo::OperandsMapper::setVRegs(unsigned OpIdx,
unsigned PartialMapIdx,
- unsigned NewVReg) {
+ Register NewVReg) {
assert(OpIdx < getInstrMapping().getNumOperands() && "Out-of-bound access");
assert(getInstrMapping().getOperandMapping(OpIdx).NumBreakDowns >
PartialMapIdx &&
@@ -681,7 +724,7 @@ void RegisterBankInfo::OperandsMapper::setVRegs(unsigned OpIdx,
NewVRegs[OpToNewVRegIdx[OpIdx] + PartialMapIdx] = NewVReg;
}
-iterator_range<SmallVectorImpl<unsigned>::const_iterator>
+iterator_range<SmallVectorImpl<Register>::const_iterator>
RegisterBankInfo::OperandsMapper::getVRegs(unsigned OpIdx,
bool ForDebug) const {
(void)ForDebug;
@@ -693,12 +736,12 @@ RegisterBankInfo::OperandsMapper::getVRegs(unsigned OpIdx,
unsigned PartMapSize =
getInstrMapping().getOperandMapping(OpIdx).NumBreakDowns;
- SmallVectorImpl<unsigned>::const_iterator End =
+ SmallVectorImpl<Register>::const_iterator End =
getNewVRegsEnd(StartIdx, PartMapSize);
- iterator_range<SmallVectorImpl<unsigned>::const_iterator> Res =
+ iterator_range<SmallVectorImpl<Register>::const_iterator> Res =
make_range(&NewVRegs[StartIdx], End);
#ifndef NDEBUG
- for (unsigned VReg : Res)
+ for (Register VReg : Res)
assert((VReg || ForDebug) && "Some registers are uninitialized");
#endif
return Res;
@@ -747,7 +790,7 @@ void RegisterBankInfo::OperandsMapper::print(raw_ostream &OS,
IsFirst = false;
OS << '(' << printReg(getMI().getOperand(Idx).getReg(), TRI) << ", [";
bool IsFirstNewVReg = true;
- for (unsigned VReg : getVRegs(Idx)) {
+ for (Register VReg : getVRegs(Idx)) {
if (!IsFirstNewVReg)
OS << ", ";
IsFirstNewVReg = false;
diff --git a/lib/CodeGen/GlobalISel/Utils.cpp b/lib/CodeGen/GlobalISel/Utils.cpp
index 59cbf93e7cd1..766ea1d60bac 100644
--- a/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/lib/CodeGen/GlobalISel/Utils.cpp
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/GlobalISel/Utils.cpp -------------------------*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file This file implements the utility functions used by the GlobalISel
@@ -30,16 +29,10 @@ using namespace llvm;
unsigned llvm::constrainRegToClass(MachineRegisterInfo &MRI,
const TargetInstrInfo &TII,
- const RegisterBankInfo &RBI,
- MachineInstr &InsertPt, unsigned Reg,
+ const RegisterBankInfo &RBI, unsigned Reg,
const TargetRegisterClass &RegClass) {
- if (!RBI.constrainGenericRegister(Reg, RegClass, MRI)) {
- unsigned NewReg = MRI.createVirtualRegister(&RegClass);
- BuildMI(*InsertPt.getParent(), InsertPt, InsertPt.getDebugLoc(),
- TII.get(TargetOpcode::COPY), NewReg)
- .addReg(Reg);
- return NewReg;
- }
+ if (!RBI.constrainGenericRegister(Reg, RegClass, MRI))
+ return MRI.createVirtualRegister(&RegClass);
return Reg;
}
@@ -47,6 +40,37 @@ unsigned llvm::constrainRegToClass(MachineRegisterInfo &MRI,
unsigned llvm::constrainOperandRegClass(
const MachineFunction &MF, const TargetRegisterInfo &TRI,
MachineRegisterInfo &MRI, const TargetInstrInfo &TII,
+ const RegisterBankInfo &RBI, MachineInstr &InsertPt,
+ const TargetRegisterClass &RegClass, const MachineOperand &RegMO,
+ unsigned OpIdx) {
+ unsigned Reg = RegMO.getReg();
+ // Assume physical registers are properly constrained.
+ assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
+ "PhysReg not implemented");
+
+ unsigned ConstrainedReg = constrainRegToClass(MRI, TII, RBI, Reg, RegClass);
+ // If we created a new virtual register because the class is not compatible
+ // then create a copy between the new and the old register.
+ if (ConstrainedReg != Reg) {
+ MachineBasicBlock::iterator InsertIt(&InsertPt);
+ MachineBasicBlock &MBB = *InsertPt.getParent();
+ if (RegMO.isUse()) {
+ BuildMI(MBB, InsertIt, InsertPt.getDebugLoc(),
+ TII.get(TargetOpcode::COPY), ConstrainedReg)
+ .addReg(Reg);
+ } else {
+ assert(RegMO.isDef() && "Must be a definition");
+ BuildMI(MBB, std::next(InsertIt), InsertPt.getDebugLoc(),
+ TII.get(TargetOpcode::COPY), Reg)
+ .addReg(ConstrainedReg);
+ }
+ }
+ return ConstrainedReg;
+}
+
+unsigned llvm::constrainOperandRegClass(
+ const MachineFunction &MF, const TargetRegisterInfo &TRI,
+ MachineRegisterInfo &MRI, const TargetInstrInfo &TII,
const RegisterBankInfo &RBI, MachineInstr &InsertPt, const MCInstrDesc &II,
const MachineOperand &RegMO, unsigned OpIdx) {
unsigned Reg = RegMO.getReg();
@@ -82,7 +106,8 @@ unsigned llvm::constrainOperandRegClass(
// and they never reach this function.
return Reg;
}
- return constrainRegToClass(MRI, TII, RBI, InsertPt, Reg, *RegClass);
+ return constrainOperandRegClass(MF, TRI, MRI, TII, RBI, InsertPt, *RegClass,
+ RegMO, OpIdx);
}
bool llvm::constrainSelectedInstRegOperands(MachineInstr &I,
@@ -184,18 +209,71 @@ void llvm::reportGISelFailure(MachineFunction &MF, const TargetPassConfig &TPC,
Optional<int64_t> llvm::getConstantVRegVal(unsigned VReg,
const MachineRegisterInfo &MRI) {
- MachineInstr *MI = MRI.getVRegDef(VReg);
- if (MI->getOpcode() != TargetOpcode::G_CONSTANT)
+ Optional<ValueAndVReg> ValAndVReg =
+ getConstantVRegValWithLookThrough(VReg, MRI, /*LookThroughInstrs*/ false);
+ assert((!ValAndVReg || ValAndVReg->VReg == VReg) &&
+ "Value found while looking through instrs");
+ if (!ValAndVReg)
+ return None;
+ return ValAndVReg->Value;
+}
+
+Optional<ValueAndVReg> llvm::getConstantVRegValWithLookThrough(
+ unsigned VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs) {
+ SmallVector<std::pair<unsigned, unsigned>, 4> SeenOpcodes;
+ MachineInstr *MI;
+ while ((MI = MRI.getVRegDef(VReg)) &&
+ MI->getOpcode() != TargetOpcode::G_CONSTANT && LookThroughInstrs) {
+ switch (MI->getOpcode()) {
+ case TargetOpcode::G_TRUNC:
+ case TargetOpcode::G_SEXT:
+ case TargetOpcode::G_ZEXT:
+ SeenOpcodes.push_back(std::make_pair(
+ MI->getOpcode(),
+ MRI.getType(MI->getOperand(0).getReg()).getSizeInBits()));
+ VReg = MI->getOperand(1).getReg();
+ break;
+ case TargetOpcode::COPY:
+ VReg = MI->getOperand(1).getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(VReg))
+ return None;
+ break;
+ case TargetOpcode::G_INTTOPTR:
+ VReg = MI->getOperand(1).getReg();
+ break;
+ default:
+ return None;
+ }
+ }
+ if (!MI || MI->getOpcode() != TargetOpcode::G_CONSTANT ||
+ (!MI->getOperand(1).isImm() && !MI->getOperand(1).isCImm()))
return None;
- if (MI->getOperand(1).isImm())
- return MI->getOperand(1).getImm();
+ const MachineOperand &CstVal = MI->getOperand(1);
+ unsigned BitWidth = MRI.getType(MI->getOperand(0).getReg()).getSizeInBits();
+ APInt Val = CstVal.isImm() ? APInt(BitWidth, CstVal.getImm())
+ : CstVal.getCImm()->getValue();
+ assert(Val.getBitWidth() == BitWidth &&
+ "Value bitwidth doesn't match definition type");
+ while (!SeenOpcodes.empty()) {
+ std::pair<unsigned, unsigned> OpcodeAndSize = SeenOpcodes.pop_back_val();
+ switch (OpcodeAndSize.first) {
+ case TargetOpcode::G_TRUNC:
+ Val = Val.trunc(OpcodeAndSize.second);
+ break;
+ case TargetOpcode::G_SEXT:
+ Val = Val.sext(OpcodeAndSize.second);
+ break;
+ case TargetOpcode::G_ZEXT:
+ Val = Val.zext(OpcodeAndSize.second);
+ break;
+ }
+ }
- if (MI->getOperand(1).isCImm() &&
- MI->getOperand(1).getCImm()->getBitWidth() <= 64)
- return MI->getOperand(1).getCImm()->getSExtValue();
+ if (Val.getBitWidth() > 64)
+ return None;
- return None;
+ return ValueAndVReg{Val.getSExtValue(), VReg};
}
const llvm::ConstantFP* llvm::getConstantFPVRegVal(unsigned VReg,
@@ -206,8 +284,8 @@ const llvm::ConstantFP* llvm::getConstantFPVRegVal(unsigned VReg,
return MI->getOperand(1).getFPImm();
}
-llvm::MachineInstr *llvm::getOpcodeDef(unsigned Opcode, unsigned Reg,
- const MachineRegisterInfo &MRI) {
+llvm::MachineInstr *llvm::getDefIgnoringCopies(Register Reg,
+ const MachineRegisterInfo &MRI) {
auto *DefMI = MRI.getVRegDef(Reg);
auto DstTy = MRI.getType(DefMI->getOperand(0).getReg());
if (!DstTy.isValid())
@@ -219,7 +297,13 @@ llvm::MachineInstr *llvm::getOpcodeDef(unsigned Opcode, unsigned Reg,
break;
DefMI = MRI.getVRegDef(SrcReg);
}
- return DefMI->getOpcode() == Opcode ? DefMI : nullptr;
+ return DefMI;
+}
+
+llvm::MachineInstr *llvm::getOpcodeDef(unsigned Opcode, Register Reg,
+ const MachineRegisterInfo &MRI) {
+ MachineInstr *DefMI = getDefIgnoringCopies(Reg, MRI);
+ return DefMI && DefMI->getOpcode() == Opcode ? DefMI : nullptr;
}
APFloat llvm::getAPFloatFromSize(double Val, unsigned Size) {
@@ -286,6 +370,31 @@ Optional<APInt> llvm::ConstantFoldBinOp(unsigned Opcode, const unsigned Op1,
return None;
}
+bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI,
+ bool SNaN) {
+ const MachineInstr *DefMI = MRI.getVRegDef(Val);
+ if (!DefMI)
+ return false;
+
+ if (DefMI->getFlag(MachineInstr::FmNoNans))
+ return true;
+
+ if (SNaN) {
+ // FP operations quiet. For now, just handle the ones inserted during
+ // legalization.
+ switch (DefMI->getOpcode()) {
+ case TargetOpcode::G_FPEXT:
+ case TargetOpcode::G_FPTRUNC:
+ case TargetOpcode::G_FCANONICALIZE:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ return false;
+}
+
void llvm::getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU) {
AU.addPreserved<StackProtector>();
}
diff --git a/lib/CodeGen/GlobalMerge.cpp b/lib/CodeGen/GlobalMerge.cpp
index d3364952f244..09201c2e7bae 100644
--- a/lib/CodeGen/GlobalMerge.cpp
+++ b/lib/CodeGen/GlobalMerge.cpp
@@ -1,9 +1,8 @@
//===- GlobalMerge.cpp - Internal globals merging -------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -220,11 +219,11 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
Module &M, bool isConst, unsigned AddrSpace) const {
auto &DL = M.getDataLayout();
// FIXME: Find better heuristics
- std::stable_sort(Globals.begin(), Globals.end(),
- [&DL](const GlobalVariable *GV1, const GlobalVariable *GV2) {
- return DL.getTypeAllocSize(GV1->getValueType()) <
- DL.getTypeAllocSize(GV2->getValueType());
- });
+ llvm::stable_sort(
+ Globals, [&DL](const GlobalVariable *GV1, const GlobalVariable *GV2) {
+ return DL.getTypeAllocSize(GV1->getValueType()) <
+ DL.getTypeAllocSize(GV2->getValueType());
+ });
// If we want to just blindly group all globals together, do so.
if (!GlobalMergeGroupByUse) {
@@ -331,7 +330,7 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
Function *ParentFn = I->getParent()->getParent();
// If we're only optimizing for size, ignore non-minsize functions.
- if (OnlyOptimizeForSize && !ParentFn->optForMinSize())
+ if (OnlyOptimizeForSize && !ParentFn->hasMinSize())
continue;
size_t UGSIdx = GlobalUsesByFunction[ParentFn];
@@ -386,11 +385,11 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
//
// Multiply that by the size of the set to give us a crude profitability
// metric.
- std::stable_sort(UsedGlobalSets.begin(), UsedGlobalSets.end(),
- [](const UsedGlobalSet &UGS1, const UsedGlobalSet &UGS2) {
- return UGS1.Globals.count() * UGS1.UsageCount <
- UGS2.Globals.count() * UGS2.UsageCount;
- });
+ llvm::stable_sort(UsedGlobalSets,
+ [](const UsedGlobalSet &UGS1, const UsedGlobalSet &UGS2) {
+ return UGS1.Globals.count() * UGS1.UsageCount <
+ UGS2.Globals.count() * UGS2.UsageCount;
+ });
// We can choose to merge all globals together, but ignore globals never used
// with another global. This catches the obviously non-profitable cases of
diff --git a/lib/CodeGen/HardwareLoops.cpp b/lib/CodeGen/HardwareLoops.cpp
new file mode 100644
index 000000000000..5f57cabbe865
--- /dev/null
+++ b/lib/CodeGen/HardwareLoops.cpp
@@ -0,0 +1,463 @@
+//===-- HardwareLoops.cpp - Target Independent Hardware Loops --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// Insert hardware loop intrinsics into loops which are deemed profitable by
+/// the target, by querying TargetTransformInfo. A hardware loop comprises of
+/// two intrinsics: one, outside the loop, to set the loop iteration count and
+/// another, in the exit block, to decrement the counter. The decremented value
+/// can either be carried through the loop via a phi or handled in some opaque
+/// way by the target.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
+#include "llvm/PassSupport.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+
+#define DEBUG_TYPE "hardware-loops"
+
+#define HW_LOOPS_NAME "Hardware Loop Insertion"
+
+using namespace llvm;
+
+static cl::opt<bool>
+ForceHardwareLoops("force-hardware-loops", cl::Hidden, cl::init(false),
+ cl::desc("Force hardware loops intrinsics to be inserted"));
+
+static cl::opt<bool>
+ForceHardwareLoopPHI(
+ "force-hardware-loop-phi", cl::Hidden, cl::init(false),
+ cl::desc("Force hardware loop counter to be updated through a phi"));
+
+static cl::opt<bool>
+ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false),
+ cl::desc("Force allowance of nested hardware loops"));
+
+static cl::opt<unsigned>
+LoopDecrement("hardware-loop-decrement", cl::Hidden, cl::init(1),
+ cl::desc("Set the loop decrement value"));
+
+static cl::opt<unsigned>
+CounterBitWidth("hardware-loop-counter-bitwidth", cl::Hidden, cl::init(32),
+ cl::desc("Set the loop counter bitwidth"));
+
+static cl::opt<bool>
+ForceGuardLoopEntry(
+ "force-hardware-loop-guard", cl::Hidden, cl::init(false),
+ cl::desc("Force generation of loop guard intrinsic"));
+
+STATISTIC(NumHWLoops, "Number of loops converted to hardware loops");
+
+namespace {
+
+ using TTI = TargetTransformInfo;
+
+ class HardwareLoops : public FunctionPass {
+ public:
+ static char ID;
+
+ HardwareLoops() : FunctionPass(ID) {
+ initializeHardwareLoopsPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addPreserved<LoopInfoWrapperPass>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ }
+
+ // Try to convert the given Loop into a hardware loop.
+ bool TryConvertLoop(Loop *L);
+
+ // Given that the target believes the loop to be profitable, try to
+ // convert it.
+ bool TryConvertLoop(HardwareLoopInfo &HWLoopInfo);
+
+ private:
+ ScalarEvolution *SE = nullptr;
+ LoopInfo *LI = nullptr;
+ const DataLayout *DL = nullptr;
+ const TargetTransformInfo *TTI = nullptr;
+ DominatorTree *DT = nullptr;
+ bool PreserveLCSSA = false;
+ AssumptionCache *AC = nullptr;
+ TargetLibraryInfo *LibInfo = nullptr;
+ Module *M = nullptr;
+ bool MadeChange = false;
+ };
+
+ class HardwareLoop {
+ // Expand the trip count scev into a value that we can use.
+ Value *InitLoopCount();
+
+ // Insert the set_loop_iteration intrinsic.
+ void InsertIterationSetup(Value *LoopCountInit);
+
+ // Insert the loop_decrement intrinsic.
+ void InsertLoopDec();
+
+ // Insert the loop_decrement_reg intrinsic.
+ Instruction *InsertLoopRegDec(Value *EltsRem);
+
+ // If the target requires the counter value to be updated in the loop,
+ // insert a phi to hold the value. The intended purpose is for use by
+ // loop_decrement_reg.
+ PHINode *InsertPHICounter(Value *NumElts, Value *EltsRem);
+
+ // Create a new cmp, that checks the returned value of loop_decrement*,
+ // and update the exit branch to use it.
+ void UpdateBranch(Value *EltsRem);
+
+ public:
+ HardwareLoop(HardwareLoopInfo &Info, ScalarEvolution &SE,
+ const DataLayout &DL) :
+ SE(SE), DL(DL), L(Info.L), M(L->getHeader()->getModule()),
+ ExitCount(Info.ExitCount),
+ CountType(Info.CountType),
+ ExitBranch(Info.ExitBranch),
+ LoopDecrement(Info.LoopDecrement),
+ UsePHICounter(Info.CounterInReg),
+ UseLoopGuard(Info.PerformEntryTest) { }
+
+ void Create();
+
+ private:
+ ScalarEvolution &SE;
+ const DataLayout &DL;
+ Loop *L = nullptr;
+ Module *M = nullptr;
+ const SCEV *ExitCount = nullptr;
+ Type *CountType = nullptr;
+ BranchInst *ExitBranch = nullptr;
+ Value *LoopDecrement = nullptr;
+ bool UsePHICounter = false;
+ bool UseLoopGuard = false;
+ BasicBlock *BeginBB = nullptr;
+ };
+}
+
+char HardwareLoops::ID = 0;
+
+bool HardwareLoops::runOnFunction(Function &F) {
+ if (skipFunction(F))
+ return false;
+
+ LLVM_DEBUG(dbgs() << "HWLoops: Running on " << F.getName() << "\n");
+
+ LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ DL = &F.getParent()->getDataLayout();
+ auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
+ LibInfo = TLIP ? &TLIP->getTLI() : nullptr;
+ PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
+ AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+ M = F.getParent();
+
+ for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) {
+ Loop *L = *I;
+ if (!L->getParentLoop())
+ TryConvertLoop(L);
+ }
+
+ return MadeChange;
+}
+
+// Return true if the search should stop, which will be when an inner loop is
+// converted and the parent loop doesn't support containing a hardware loop.
+bool HardwareLoops::TryConvertLoop(Loop *L) {
+ // Process nested loops first.
+ for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
+ if (TryConvertLoop(*I))
+ return true; // Stop search.
+
+ HardwareLoopInfo HWLoopInfo(L);
+ if (!HWLoopInfo.canAnalyze(*LI))
+ return false;
+
+ if (TTI->isHardwareLoopProfitable(L, *SE, *AC, LibInfo, HWLoopInfo) ||
+ ForceHardwareLoops) {
+
+ // Allow overriding of the counter width and loop decrement value.
+ if (CounterBitWidth.getNumOccurrences())
+ HWLoopInfo.CountType =
+ IntegerType::get(M->getContext(), CounterBitWidth);
+
+ if (LoopDecrement.getNumOccurrences())
+ HWLoopInfo.LoopDecrement =
+ ConstantInt::get(HWLoopInfo.CountType, LoopDecrement);
+
+ MadeChange |= TryConvertLoop(HWLoopInfo);
+ return MadeChange && (!HWLoopInfo.IsNestingLegal && !ForceNestedLoop);
+ }
+
+ return false;
+}
+
+bool HardwareLoops::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) {
+
+ Loop *L = HWLoopInfo.L;
+ LLVM_DEBUG(dbgs() << "HWLoops: Try to convert profitable loop: " << *L);
+
+ if (!HWLoopInfo.isHardwareLoopCandidate(*SE, *LI, *DT, ForceNestedLoop,
+ ForceHardwareLoopPHI))
+ return false;
+
+ assert(
+ (HWLoopInfo.ExitBlock && HWLoopInfo.ExitBranch && HWLoopInfo.ExitCount) &&
+ "Hardware Loop must have set exit info.");
+
+ BasicBlock *Preheader = L->getLoopPreheader();
+
+ // If we don't have a preheader, then insert one.
+ if (!Preheader)
+ Preheader = InsertPreheaderForLoop(L, DT, LI, nullptr, PreserveLCSSA);
+ if (!Preheader)
+ return false;
+
+ HardwareLoop HWLoop(HWLoopInfo, *SE, *DL);
+ HWLoop.Create();
+ ++NumHWLoops;
+ return true;
+}
+
+void HardwareLoop::Create() {
+ LLVM_DEBUG(dbgs() << "HWLoops: Converting loop..\n");
+
+ Value *LoopCountInit = InitLoopCount();
+ if (!LoopCountInit)
+ return;
+
+ InsertIterationSetup(LoopCountInit);
+
+ if (UsePHICounter || ForceHardwareLoopPHI) {
+ Instruction *LoopDec = InsertLoopRegDec(LoopCountInit);
+ Value *EltsRem = InsertPHICounter(LoopCountInit, LoopDec);
+ LoopDec->setOperand(0, EltsRem);
+ UpdateBranch(LoopDec);
+ } else
+ InsertLoopDec();
+
+ // Run through the basic blocks of the loop and see if any of them have dead
+ // PHIs that can be removed.
+ for (auto I : L->blocks())
+ DeleteDeadPHIs(I);
+}
+
+static bool CanGenerateTest(Loop *L, Value *Count) {
+ BasicBlock *Preheader = L->getLoopPreheader();
+ if (!Preheader->getSinglePredecessor())
+ return false;
+
+ BasicBlock *Pred = Preheader->getSinglePredecessor();
+ if (!isa<BranchInst>(Pred->getTerminator()))
+ return false;
+
+ auto *BI = cast<BranchInst>(Pred->getTerminator());
+ if (BI->isUnconditional() || !isa<ICmpInst>(BI->getCondition()))
+ return false;
+
+ // Check that the icmp is checking for equality of Count and zero and that
+ // a non-zero value results in entering the loop.
+ auto ICmp = cast<ICmpInst>(BI->getCondition());
+ LLVM_DEBUG(dbgs() << " - Found condition: " << *ICmp << "\n");
+ if (!ICmp->isEquality())
+ return false;
+
+ auto IsCompareZero = [](ICmpInst *ICmp, Value *Count, unsigned OpIdx) {
+ if (auto *Const = dyn_cast<ConstantInt>(ICmp->getOperand(OpIdx)))
+ return Const->isZero() && ICmp->getOperand(OpIdx ^ 1) == Count;
+ return false;
+ };
+
+ if (!IsCompareZero(ICmp, Count, 0) && !IsCompareZero(ICmp, Count, 1))
+ return false;
+
+ unsigned SuccIdx = ICmp->getPredicate() == ICmpInst::ICMP_NE ? 0 : 1;
+ if (BI->getSuccessor(SuccIdx) != Preheader)
+ return false;
+
+ return true;
+}
+
+Value *HardwareLoop::InitLoopCount() {
+ LLVM_DEBUG(dbgs() << "HWLoops: Initialising loop counter value:\n");
+ // Can we replace a conditional branch with an intrinsic that sets the
+ // loop counter and tests that is not zero?
+
+ SCEVExpander SCEVE(SE, DL, "loopcnt");
+ if (!ExitCount->getType()->isPointerTy() &&
+ ExitCount->getType() != CountType)
+ ExitCount = SE.getZeroExtendExpr(ExitCount, CountType);
+
+ ExitCount = SE.getAddExpr(ExitCount, SE.getOne(CountType));
+
+ // If we're trying to use the 'test and set' form of the intrinsic, we need
+ // to replace a conditional branch that is controlling entry to the loop. It
+ // is likely (guaranteed?) that the preheader has an unconditional branch to
+ // the loop header, so also check if it has a single predecessor.
+ if (SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, ExitCount,
+ SE.getZero(ExitCount->getType()))) {
+ LLVM_DEBUG(dbgs() << " - Attempting to use test.set counter.\n");
+ UseLoopGuard |= ForceGuardLoopEntry;
+ } else
+ UseLoopGuard = false;
+
+ BasicBlock *BB = L->getLoopPreheader();
+ if (UseLoopGuard && BB->getSinglePredecessor() &&
+ cast<BranchInst>(BB->getTerminator())->isUnconditional())
+ BB = BB->getSinglePredecessor();
+
+ if (!isSafeToExpandAt(ExitCount, BB->getTerminator(), SE)) {
+ LLVM_DEBUG(dbgs() << "- Bailing, unsafe to expand ExitCount "
+ << *ExitCount << "\n");
+ return nullptr;
+ }
+
+ Value *Count = SCEVE.expandCodeFor(ExitCount, CountType,
+ BB->getTerminator());
+
+ // FIXME: We've expanded Count where we hope to insert the counter setting
+ // intrinsic. But, in the case of the 'test and set' form, we may fallback to
+ // the just 'set' form and in which case the insertion block is most likely
+ // different. It means there will be instruction(s) in a block that possibly
+ // aren't needed. The isLoopEntryGuardedByCond is trying to avoid this issue,
+ // but it's doesn't appear to work in all cases.
+
+ UseLoopGuard = UseLoopGuard && CanGenerateTest(L, Count);
+ BeginBB = UseLoopGuard ? BB : L->getLoopPreheader();
+ LLVM_DEBUG(dbgs() << " - Loop Count: " << *Count << "\n"
+ << " - Expanded Count in " << BB->getName() << "\n"
+ << " - Will insert set counter intrinsic into: "
+ << BeginBB->getName() << "\n");
+ return Count;
+}
+
+void HardwareLoop::InsertIterationSetup(Value *LoopCountInit) {
+ IRBuilder<> Builder(BeginBB->getTerminator());
+ Type *Ty = LoopCountInit->getType();
+ Intrinsic::ID ID = UseLoopGuard ?
+ Intrinsic::test_set_loop_iterations : Intrinsic::set_loop_iterations;
+ Function *LoopIter = Intrinsic::getDeclaration(M, ID, Ty);
+ Value *SetCount = Builder.CreateCall(LoopIter, LoopCountInit);
+
+ // Use the return value of the intrinsic to control the entry of the loop.
+ if (UseLoopGuard) {
+ assert((isa<BranchInst>(BeginBB->getTerminator()) &&
+ cast<BranchInst>(BeginBB->getTerminator())->isConditional()) &&
+ "Expected conditional branch");
+ auto *LoopGuard = cast<BranchInst>(BeginBB->getTerminator());
+ LoopGuard->setCondition(SetCount);
+ if (LoopGuard->getSuccessor(0) != L->getLoopPreheader())
+ LoopGuard->swapSuccessors();
+ }
+ LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop counter: "
+ << *SetCount << "\n");
+}
+
+void HardwareLoop::InsertLoopDec() {
+ IRBuilder<> CondBuilder(ExitBranch);
+
+ Function *DecFunc =
+ Intrinsic::getDeclaration(M, Intrinsic::loop_decrement,
+ LoopDecrement->getType());
+ Value *Ops[] = { LoopDecrement };
+ Value *NewCond = CondBuilder.CreateCall(DecFunc, Ops);
+ Value *OldCond = ExitBranch->getCondition();
+ ExitBranch->setCondition(NewCond);
+
+ // The false branch must exit the loop.
+ if (!L->contains(ExitBranch->getSuccessor(0)))
+ ExitBranch->swapSuccessors();
+
+ // The old condition may be dead now, and may have even created a dead PHI
+ // (the original induction variable).
+ RecursivelyDeleteTriviallyDeadInstructions(OldCond);
+
+ LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop dec: " << *NewCond << "\n");
+}
+
+Instruction* HardwareLoop::InsertLoopRegDec(Value *EltsRem) {
+ IRBuilder<> CondBuilder(ExitBranch);
+
+ Function *DecFunc =
+ Intrinsic::getDeclaration(M, Intrinsic::loop_decrement_reg,
+ { EltsRem->getType(), EltsRem->getType(),
+ LoopDecrement->getType()
+ });
+ Value *Ops[] = { EltsRem, LoopDecrement };
+ Value *Call = CondBuilder.CreateCall(DecFunc, Ops);
+
+ LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop dec: " << *Call << "\n");
+ return cast<Instruction>(Call);
+}
+
+PHINode* HardwareLoop::InsertPHICounter(Value *NumElts, Value *EltsRem) {
+ BasicBlock *Preheader = L->getLoopPreheader();
+ BasicBlock *Header = L->getHeader();
+ BasicBlock *Latch = ExitBranch->getParent();
+ IRBuilder<> Builder(Header->getFirstNonPHI());
+ PHINode *Index = Builder.CreatePHI(NumElts->getType(), 2);
+ Index->addIncoming(NumElts, Preheader);
+ Index->addIncoming(EltsRem, Latch);
+ LLVM_DEBUG(dbgs() << "HWLoops: PHI Counter: " << *Index << "\n");
+ return Index;
+}
+
+void HardwareLoop::UpdateBranch(Value *EltsRem) {
+ IRBuilder<> CondBuilder(ExitBranch);
+ Value *NewCond =
+ CondBuilder.CreateICmpNE(EltsRem, ConstantInt::get(EltsRem->getType(), 0));
+ Value *OldCond = ExitBranch->getCondition();
+ ExitBranch->setCondition(NewCond);
+
+ // The false branch must exit the loop.
+ if (!L->contains(ExitBranch->getSuccessor(0)))
+ ExitBranch->swapSuccessors();
+
+ // The old condition may be dead now, and may have even created a dead PHI
+ // (the original induction variable).
+ RecursivelyDeleteTriviallyDeadInstructions(OldCond);
+}
+
+INITIALIZE_PASS_BEGIN(HardwareLoops, DEBUG_TYPE, HW_LOOPS_NAME, false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+INITIALIZE_PASS_END(HardwareLoops, DEBUG_TYPE, HW_LOOPS_NAME, false, false)
+
+FunctionPass *llvm::createHardwareLoopsPass() { return new HardwareLoops(); }
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index ceeba639ee09..b17a253fe23f 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -1,9 +1,8 @@
//===- IfConversion.cpp - Machine code if conversion pass -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -1317,7 +1316,7 @@ void IfConverter::AnalyzeBlocks(
AnalyzeBlock(MBB, Tokens);
// Sort to favor more complex ifcvt scheme.
- std::stable_sort(Tokens.begin(), Tokens.end(), IfcvtTokenCmp);
+ llvm::stable_sort(Tokens, IfcvtTokenCmp);
}
/// Returns true either if ToMBB is the next block after MBB or that all the
diff --git a/lib/CodeGen/ImplicitNullChecks.cpp b/lib/CodeGen/ImplicitNullChecks.cpp
index f411ee6745d0..1e82ea659617 100644
--- a/lib/CodeGen/ImplicitNullChecks.cpp
+++ b/lib/CodeGen/ImplicitNullChecks.cpp
@@ -1,9 +1,8 @@
//===- ImplicitNullChecks.cpp - Fold null checks into memory accesses -----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -181,7 +180,8 @@ class ImplicitNullChecks : public MachineFunctionPass {
/// Returns AR_NoAlias if \p MI memory operation does not alias with
/// \p PrevMI, AR_MayAlias if they may alias and AR_WillAliasEverything if
/// they may alias and any further memory operation may alias with \p PrevMI.
- AliasResult areMemoryOpsAliased(MachineInstr &MI, MachineInstr *PrevMI);
+ AliasResult areMemoryOpsAliased(const MachineInstr &MI,
+ const MachineInstr *PrevMI) const;
enum SuitabilityResult {
SR_Suitable,
@@ -195,7 +195,8 @@ class ImplicitNullChecks : public MachineFunctionPass {
/// no sense to continue lookup due to any other instruction will not be able
/// to be used. \p PrevInsts is the set of instruction seen since
/// the explicit null check on \p PointerReg.
- SuitabilityResult isSuitableMemoryOp(MachineInstr &MI, unsigned PointerReg,
+ SuitabilityResult isSuitableMemoryOp(const MachineInstr &MI,
+ unsigned PointerReg,
ArrayRef<MachineInstr *> PrevInsts);
/// Return true if \p FaultingMI can be hoisted from after the
@@ -228,7 +229,8 @@ public:
} // end anonymous namespace
bool ImplicitNullChecks::canHandle(const MachineInstr *MI) {
- if (MI->isCall() || MI->hasUnmodeledSideEffects())
+ if (MI->isCall() || MI->mayRaiseFPException() ||
+ MI->hasUnmodeledSideEffects())
return false;
auto IsRegMask = [](const MachineOperand &MO) { return MO.isRegMask(); };
(void)IsRegMask;
@@ -319,8 +321,8 @@ static bool AnyAliasLiveIn(const TargetRegisterInfo *TRI,
}
ImplicitNullChecks::AliasResult
-ImplicitNullChecks::areMemoryOpsAliased(MachineInstr &MI,
- MachineInstr *PrevMI) {
+ImplicitNullChecks::areMemoryOpsAliased(const MachineInstr &MI,
+ const MachineInstr *PrevMI) const {
// If it is not memory access, skip the check.
if (!(PrevMI->mayStore() || PrevMI->mayLoad()))
return AR_NoAlias;
@@ -357,10 +359,11 @@ ImplicitNullChecks::areMemoryOpsAliased(MachineInstr &MI,
}
ImplicitNullChecks::SuitabilityResult
-ImplicitNullChecks::isSuitableMemoryOp(MachineInstr &MI, unsigned PointerReg,
+ImplicitNullChecks::isSuitableMemoryOp(const MachineInstr &MI,
+ unsigned PointerReg,
ArrayRef<MachineInstr *> PrevInsts) {
int64_t Offset;
- MachineOperand *BaseOp;
+ const MachineOperand *BaseOp;
if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI) ||
!BaseOp->isReg() || BaseOp->getReg() != PointerReg)
diff --git a/lib/CodeGen/IndirectBrExpandPass.cpp b/lib/CodeGen/IndirectBrExpandPass.cpp
index 7b05ebf820fd..7ac093ba4a71 100644
--- a/lib/CodeGen/IndirectBrExpandPass.cpp
+++ b/lib/CodeGen/IndirectBrExpandPass.cpp
@@ -1,9 +1,8 @@
//===- IndirectBrExpandPass.cpp - Expand indirectbr to switch -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -149,11 +148,9 @@ bool IndirectBrExpandPass::runOnFunction(Function &F) {
ConstantInt *BBIndexC = ConstantInt::get(ITy, BBIndex);
// Now rewrite the blockaddress to an integer constant based on the index.
- // FIXME: We could potentially preserve the uses as arguments to inline asm.
- // This would allow some uses such as diagnostic information in crashes to
- // have higher quality even when this transform is enabled, but would break
- // users that round-trip blockaddresses through inline assembly and then
- // back into an indirectbr.
+ // FIXME: This part doesn't properly recognize other uses of blockaddress
+ // expressions, for instance, where they are used to pass labels to
+ // asm-goto. This part of the pass needs a rework.
BA->replaceAllUsesWith(ConstantExpr::getIntToPtr(BBIndexC, BA->getType()));
}
diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp
index 007e9283d833..41ae8061a917 100644
--- a/lib/CodeGen/InlineSpiller.cpp
+++ b/lib/CodeGen/InlineSpiller.cpp
@@ -1,9 +1,8 @@
//===- InlineSpiller.cpp - Insert spills and restores inline --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -76,6 +75,10 @@ STATISTIC(NumRemats, "Number of rematerialized defs for spilling");
static cl::opt<bool> DisableHoisting("disable-spill-hoist", cl::Hidden,
cl::desc("Disable inline spill hoisting"));
+static cl::opt<bool>
+RestrictStatepointRemat("restrict-statepoint-remat",
+ cl::init(false), cl::Hidden,
+ cl::desc("Restrict remat for statepoint operands"));
namespace {
@@ -215,6 +218,7 @@ private:
void eliminateRedundantSpills(LiveInterval &LI, VNInfo *VNI);
void markValueUsed(LiveInterval*, VNInfo*);
+ bool canGuaranteeAssignmentAfterRemat(unsigned VReg, MachineInstr &MI);
bool reMaterializeFor(LiveInterval &, MachineInstr &MI);
void reMaterializeAll();
@@ -514,6 +518,28 @@ void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) {
} while (!WorkList.empty());
}
+bool InlineSpiller::canGuaranteeAssignmentAfterRemat(unsigned VReg,
+ MachineInstr &MI) {
+ if (!RestrictStatepointRemat)
+ return true;
+ // Here's a quick explanation of the problem we're trying to handle here:
+ // * There are some pseudo instructions with more vreg uses than there are
+ // physical registers on the machine.
+ // * This is normally handled by spilling the vreg, and folding the reload
+ // into the user instruction. (Thus decreasing the number of used vregs
+ // until the remainder can be assigned to physregs.)
+ // * However, since we may try to spill vregs in any order, we can end up
+ // trying to spill each operand to the instruction, and then rematting it
+ // instead. When that happens, the new live intervals (for the remats) are
+ // expected to be trivially assignable (i.e. RS_Done). However, since we
+ // may have more remats than physregs, we're guaranteed to fail to assign
+ // one.
+ // At the moment, we only handle this for STATEPOINTs since they're the only
+ // psuedo op where we've seen this. If we start seeing other instructions
+ // with the same problem, we need to revisit this.
+ return (MI.getOpcode() != TargetOpcode::STATEPOINT);
+}
+
/// reMaterializeFor - Attempt to rematerialize before MI instead of reloading.
bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) {
// Analyze instruction
@@ -569,6 +595,14 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) {
return true;
}
+ // If we can't guarantee that we'll be able to actually assign the new vreg,
+ // we can't remat.
+ if (!canGuaranteeAssignmentAfterRemat(VirtReg.reg, MI)) {
+ markValueUsed(&VirtReg, ParentVNI);
+ LLVM_DEBUG(dbgs() << "\tcannot remat for " << UseIdx << '\t' << MI);
+ return false;
+ }
+
// Allocate a new register for the remat.
unsigned NewVReg = Edit->createFrom(Original);
@@ -799,11 +833,11 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops,
if (FoldOps.empty())
return false;
- MachineInstrSpan MIS(MI);
+ MachineInstrSpan MIS(MI, MI->getParent());
MachineInstr *FoldMI =
LoadMI ? TII.foldMemoryOperand(*MI, FoldOps, *LoadMI, &LIS)
- : TII.foldMemoryOperand(*MI, FoldOps, StackSlot, &LIS);
+ : TII.foldMemoryOperand(*MI, FoldOps, StackSlot, &LIS, &VRM);
if (!FoldMI)
return false;
@@ -834,6 +868,8 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops,
HSpiller.rmFromMergeableSpills(*MI, FI))
--NumSpills;
LIS.ReplaceMachineInstrInMaps(*MI, *FoldMI);
+ if (MI->isCall())
+ MI->getMF()->updateCallSiteInfo(MI, FoldMI);
MI->eraseFromParent();
// Insert any new instructions other than FoldMI into the LIS maps.
@@ -871,7 +907,7 @@ void InlineSpiller::insertReload(unsigned NewVReg,
MachineBasicBlock::iterator MI) {
MachineBasicBlock &MBB = *MI->getParent();
- MachineInstrSpan MIS(MI);
+ MachineInstrSpan MIS(MI, &MBB);
TII.loadRegFromStackSlot(MBB, MI, NewVReg, StackSlot,
MRI.getRegClass(NewVReg), &TRI);
@@ -901,7 +937,7 @@ void InlineSpiller::insertSpill(unsigned NewVReg, bool isKill,
MachineBasicBlock::iterator MI) {
MachineBasicBlock &MBB = *MI->getParent();
- MachineInstrSpan MIS(MI);
+ MachineInstrSpan MIS(MI, &MBB);
bool IsRealSpill = true;
if (isFullUndefDef(*MI)) {
// Don't spill undef value.
diff --git a/lib/CodeGen/InterferenceCache.cpp b/lib/CodeGen/InterferenceCache.cpp
index 82f6e8d8e234..7b50dac4cd1a 100644
--- a/lib/CodeGen/InterferenceCache.cpp
+++ b/lib/CodeGen/InterferenceCache.cpp
@@ -1,9 +1,8 @@
//===- InterferenceCache.cpp - Caching per-block interference -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/InterferenceCache.h b/lib/CodeGen/InterferenceCache.h
index 160e2b16e294..50c6ac62d194 100644
--- a/lib/CodeGen/InterferenceCache.h
+++ b/lib/CodeGen/InterferenceCache.h
@@ -1,9 +1,8 @@
//===- InterferenceCache.h - Caching per-block interference ----*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/InterleavedAccessPass.cpp b/lib/CodeGen/InterleavedAccessPass.cpp
index fd2ff162630a..14bc560a561c 100644
--- a/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/lib/CodeGen/InterleavedAccessPass.cpp
@@ -1,9 +1,8 @@
//===- InterleavedAccessPass.cpp ------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -164,14 +163,19 @@ static bool isDeInterleaveMaskOfFactor(ArrayRef<int> Mask, unsigned Factor,
/// <0, 2, 4, 6> (mask of index 0 to extract even elements)
/// <1, 3, 5, 7> (mask of index 1 to extract odd elements)
static bool isDeInterleaveMask(ArrayRef<int> Mask, unsigned &Factor,
- unsigned &Index, unsigned MaxFactor) {
+ unsigned &Index, unsigned MaxFactor,
+ unsigned NumLoadElements) {
if (Mask.size() < 2)
return false;
// Check potential Factors.
- for (Factor = 2; Factor <= MaxFactor; Factor++)
+ for (Factor = 2; Factor <= MaxFactor; Factor++) {
+ // Make sure we don't produce a load wider than the input load.
+ if (Mask.size() * Factor > NumLoadElements)
+ return false;
if (isDeInterleaveMaskOfFactor(Mask, Factor, Index))
return true;
+ }
return false;
}
@@ -303,9 +307,10 @@ bool InterleavedAccess::lowerInterleavedLoad(
unsigned Factor, Index;
+ unsigned NumLoadElements = LI->getType()->getVectorNumElements();
// Check if the first shufflevector is DE-interleave shuffle.
if (!isDeInterleaveMask(Shuffles[0]->getShuffleMask(), Factor, Index,
- MaxFactor))
+ MaxFactor, NumLoadElements))
return false;
// Holds the corresponding index for each DE-interleave shuffle.
diff --git a/lib/CodeGen/InterleavedLoadCombinePass.cpp b/lib/CodeGen/InterleavedLoadCombinePass.cpp
index 989fa164ad2d..9525da849e2a 100644
--- a/lib/CodeGen/InterleavedLoadCombinePass.cpp
+++ b/lib/CodeGen/InterleavedLoadCombinePass.cpp
@@ -1,9 +1,8 @@
//===- InterleavedLoadCombine.cpp - Combine Interleaved Loads ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -961,6 +960,7 @@ public:
if (!PtrTy) {
Result = Polynomial();
BasePtr = nullptr;
+ return;
}
unsigned PointerBits =
DL.getIndexSizeInBits(PtrTy->getPointerAddressSpace());
@@ -1219,7 +1219,7 @@ bool InterleavedLoadCombineImpl::combine(std::list<VectorInfo> &InterleavedLoad,
"interleaved.wide.ptrcast");
// Create the wide load and update the MemorySSA.
- auto LI = Builder.CreateAlignedLoad(CI, InsertionPoint->getAlignment(),
+ auto LI = Builder.CreateAlignedLoad(ILTy, CI, InsertionPoint->getAlignment(),
"interleaved.wide.load");
auto MSSAU = MemorySSAUpdater(&MSSA);
MemoryUse *MSSALoad = cast<MemoryUse>(MSSAU.createMemoryAccessBefore(
diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp
index 707113bd973b..8cbd8bcaeabb 100644
--- a/lib/CodeGen/IntrinsicLowering.cpp
+++ b/lib/CodeGen/IntrinsicLowering.cpp
@@ -1,9 +1,8 @@
//===-- IntrinsicLowering.cpp - Intrinsic Lowering default implementation -===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -24,39 +23,6 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
-template <class ArgIt>
-static void EnsureFunctionExists(Module &M, const char *Name,
- ArgIt ArgBegin, ArgIt ArgEnd,
- Type *RetTy) {
- // Insert a correctly-typed definition now.
- std::vector<Type *> ParamTys;
- for (ArgIt I = ArgBegin; I != ArgEnd; ++I)
- ParamTys.push_back(I->getType());
- M.getOrInsertFunction(Name, FunctionType::get(RetTy, ParamTys, false));
-}
-
-static void EnsureFPIntrinsicsExist(Module &M, Function &Fn,
- const char *FName,
- const char *DName, const char *LDName) {
- // Insert definitions for all the floating point types.
- switch((int)Fn.arg_begin()->getType()->getTypeID()) {
- case Type::FloatTyID:
- EnsureFunctionExists(M, FName, Fn.arg_begin(), Fn.arg_end(),
- Type::getFloatTy(M.getContext()));
- break;
- case Type::DoubleTyID:
- EnsureFunctionExists(M, DName, Fn.arg_begin(), Fn.arg_end(),
- Type::getDoubleTy(M.getContext()));
- break;
- case Type::X86_FP80TyID:
- case Type::FP128TyID:
- case Type::PPC_FP128TyID:
- EnsureFunctionExists(M, LDName, Fn.arg_begin(), Fn.arg_end(),
- Fn.arg_begin()->getType());
- break;
- }
-}
-
/// This function is used when we want to lower an intrinsic call to a call of
/// an external function. This handles hard cases such as when there was already
/// a prototype for the external function, but that prototype doesn't match the
@@ -72,8 +38,8 @@ static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI,
std::vector<Type *> ParamTys;
for (ArgIt I = ArgBegin; I != ArgEnd; ++I)
ParamTys.push_back((*I)->getType());
- Constant* FCache = M->getOrInsertFunction(NewFn,
- FunctionType::get(RetTy, ParamTys, false));
+ FunctionCallee FCache =
+ M->getOrInsertFunction(NewFn, FunctionType::get(RetTy, ParamTys, false));
IRBuilder<> Builder(CI->getParent(), CI->getIterator());
SmallVector<Value *, 8> Args(ArgBegin, ArgEnd);
@@ -92,75 +58,6 @@ static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI,
# define setjmp_undefined_for_msvc
#endif
-void IntrinsicLowering::AddPrototypes(Module &M) {
- LLVMContext &Context = M.getContext();
- for (auto &F : M)
- if (F.isDeclaration() && !F.use_empty())
- switch (F.getIntrinsicID()) {
- default: break;
- case Intrinsic::setjmp:
- EnsureFunctionExists(M, "setjmp", F.arg_begin(), F.arg_end(),
- Type::getInt32Ty(M.getContext()));
- break;
- case Intrinsic::longjmp:
- EnsureFunctionExists(M, "longjmp", F.arg_begin(), F.arg_end(),
- Type::getVoidTy(M.getContext()));
- break;
- case Intrinsic::siglongjmp:
- EnsureFunctionExists(M, "abort", F.arg_end(), F.arg_end(),
- Type::getVoidTy(M.getContext()));
- break;
- case Intrinsic::memcpy:
- M.getOrInsertFunction("memcpy",
- Type::getInt8PtrTy(Context),
- Type::getInt8PtrTy(Context),
- Type::getInt8PtrTy(Context),
- DL.getIntPtrType(Context));
- break;
- case Intrinsic::memmove:
- M.getOrInsertFunction("memmove",
- Type::getInt8PtrTy(Context),
- Type::getInt8PtrTy(Context),
- Type::getInt8PtrTy(Context),
- DL.getIntPtrType(Context));
- break;
- case Intrinsic::memset:
- M.getOrInsertFunction("memset",
- Type::getInt8PtrTy(Context),
- Type::getInt8PtrTy(Context),
- Type::getInt32Ty(M.getContext()),
- DL.getIntPtrType(Context));
- break;
- case Intrinsic::sqrt:
- EnsureFPIntrinsicsExist(M, F, "sqrtf", "sqrt", "sqrtl");
- break;
- case Intrinsic::sin:
- EnsureFPIntrinsicsExist(M, F, "sinf", "sin", "sinl");
- break;
- case Intrinsic::cos:
- EnsureFPIntrinsicsExist(M, F, "cosf", "cos", "cosl");
- break;
- case Intrinsic::pow:
- EnsureFPIntrinsicsExist(M, F, "powf", "pow", "powl");
- break;
- case Intrinsic::log:
- EnsureFPIntrinsicsExist(M, F, "logf", "log", "logl");
- break;
- case Intrinsic::log2:
- EnsureFPIntrinsicsExist(M, F, "log2f", "log2", "log2l");
- break;
- case Intrinsic::log10:
- EnsureFPIntrinsicsExist(M, F, "log10f", "log10", "log10l");
- break;
- case Intrinsic::exp:
- EnsureFPIntrinsicsExist(M, F, "expf", "exp", "expl");
- break;
- case Intrinsic::exp2:
- EnsureFPIntrinsicsExist(M, F, "exp2f", "exp2", "exp2l");
- break;
- }
-}
-
/// Emit the code to lower bswap of V before the specified instruction IP.
static Value *LowerBSWAP(LLVMContext &Context, Value *V, Instruction *IP) {
assert(V->getType()->isIntOrIntVectorTy() && "Can't bswap a non-integer type!");
@@ -601,7 +498,7 @@ bool IntrinsicLowering::LowerToByteSwap(CallInst *CI) {
// Okay, we can do this xform, do so now.
Module *M = CI->getModule();
- Constant *Int = Intrinsic::getDeclaration(M, Intrinsic::bswap, Ty);
+ Function *Int = Intrinsic::getDeclaration(M, Intrinsic::bswap, Ty);
Value *Op = CI->getArgOperand(0);
Op = CallInst::Create(Int, Op, CI->getName(), CI);
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index 52e832cc38c1..886ae7e94adb 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -1,9 +1,8 @@
//===-- LLVMTargetMachine.cpp - Implement the LLVMTargetMachine class -----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -202,6 +201,15 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
return true;
if (!TargetPassConfig::willCompleteCodeGenPipeline()) {
+ if (this->getTargetTriple().isOSAIX()) {
+ // On AIX, we might manifest MCSymbols during SDAG lowering. For MIR
+ // testing to be meaningful, we need to ensure that the symbols created
+ // are MCSymbolXCOFF variants, which requires that
+ // the TargetLoweringObjectFile instance has been initialized.
+ MCContext &Ctx = MMI->getContext();
+ const_cast<TargetLoweringObjectFile &>(*this->getObjFileLowering())
+ .Initialize(Ctx, *this);
+ }
PM.add(createPrintMIRPass(Out));
} else if (addAsmPrinter(PM, Out, DwoOut, FileType, MMI->getContext()))
return true;
diff --git a/lib/CodeGen/LatencyPriorityQueue.cpp b/lib/CodeGen/LatencyPriorityQueue.cpp
index f9f33a98a9d1..8a7a41d0f763 100644
--- a/lib/CodeGen/LatencyPriorityQueue.cpp
+++ b/lib/CodeGen/LatencyPriorityQueue.cpp
@@ -1,9 +1,8 @@
//===---- LatencyPriorityQueue.cpp - A latency-oriented priority queue ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp b/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp
index 5b52cc66a297..200ac0ba15bf 100644
--- a/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp
+++ b/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp
@@ -1,9 +1,8 @@
///===- LazyMachineBlockFrequencyInfo.cpp - Lazy Machine Block Frequency --===//
///
-/// The LLVM Compiler Infrastructure
-///
-/// This file is distributed under the University of Illinois Open Source
-/// License. See LICENSE.TXT for details.
+/// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+/// See https://llvm.org/LICENSE.txt for license information.
+/// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
///
///===---------------------------------------------------------------------===//
/// \file
diff --git a/lib/CodeGen/LexicalScopes.cpp b/lib/CodeGen/LexicalScopes.cpp
index d06821bdfcce..503821537ed9 100644
--- a/lib/CodeGen/LexicalScopes.cpp
+++ b/lib/CodeGen/LexicalScopes.cpp
@@ -1,9 +1,8 @@
//===- LexicalScopes.cpp - Collecting lexical scope info ------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/LiveDebugValues.cpp b/lib/CodeGen/LiveDebugValues.cpp
index fc0ebea2d36c..a669e64692b9 100644
--- a/lib/CodeGen/LiveDebugValues.cpp
+++ b/lib/CodeGen/LiveDebugValues.cpp
@@ -1,9 +1,8 @@
//===- LiveDebugValues.cpp - Tracking Debug Value MIs ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -21,6 +20,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/SparseBitVector.h"
#include "llvm/ADT/Statistic.h"
@@ -35,13 +35,15 @@
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
-#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/Config/llvm-config.h"
+#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
@@ -57,6 +59,7 @@
#include <cstdint>
#include <functional>
#include <queue>
+#include <tuple>
#include <utility>
#include <vector>
@@ -68,12 +71,12 @@ STATISTIC(NumInserted, "Number of DBG_VALUE instructions inserted");
// If @MI is a DBG_VALUE with debug value described by a defined
// register, returns the number of this register. In the other case, returns 0.
-static unsigned isDbgValueDescribedByReg(const MachineInstr &MI) {
+static Register isDbgValueDescribedByReg(const MachineInstr &MI) {
assert(MI.isDebugValue() && "expected a DBG_VALUE");
assert(MI.getNumOperands() == 4 && "malformed DBG_VALUE");
// If location of variable is described using a register (directly
// or indirectly), this register is always a first operand.
- return MI.getOperand(0).isReg() ? MI.getOperand(0).getReg() : 0;
+ return MI.getOperand(0).isReg() ? MI.getOperand(0).getReg() : Register();
}
namespace {
@@ -86,6 +89,8 @@ private:
BitVector CalleeSavedRegs;
LexicalScopes LS;
+ enum struct TransferKind { TransferCopy, TransferSpill, TransferRestore };
+
/// Keeps track of lexical scopes associated with a user value's source
/// location.
class UserValueScopes {
@@ -105,51 +110,134 @@ private:
}
};
- /// Based on std::pair so it can be used as an index into a DenseMap.
- using DebugVariableBase =
- std::pair<const DILocalVariable *, const DILocation *>;
- /// A potentially inlined instance of a variable.
- struct DebugVariable : public DebugVariableBase {
- DebugVariable(const DILocalVariable *Var, const DILocation *InlinedAt)
- : DebugVariableBase(Var, InlinedAt) {}
-
- const DILocalVariable *getVar() const { return this->first; }
- const DILocation *getInlinedAt() const { return this->second; }
-
- bool operator<(const DebugVariable &DV) const {
- if (getVar() == DV.getVar())
- return getInlinedAt() < DV.getInlinedAt();
- return getVar() < DV.getVar();
+ using FragmentInfo = DIExpression::FragmentInfo;
+ using OptFragmentInfo = Optional<DIExpression::FragmentInfo>;
+
+ /// Storage for identifying a potentially inlined instance of a variable,
+ /// or a fragment thereof.
+ class DebugVariable {
+ const DILocalVariable *Variable;
+ OptFragmentInfo Fragment;
+ const DILocation *InlinedAt;
+
+ /// Fragment that will overlap all other fragments. Used as default when
+ /// caller demands a fragment.
+ static const FragmentInfo DefaultFragment;
+
+ public:
+ DebugVariable(const DILocalVariable *Var, OptFragmentInfo &&FragmentInfo,
+ const DILocation *InlinedAt)
+ : Variable(Var), Fragment(FragmentInfo), InlinedAt(InlinedAt) {}
+
+ DebugVariable(const DILocalVariable *Var, OptFragmentInfo &FragmentInfo,
+ const DILocation *InlinedAt)
+ : Variable(Var), Fragment(FragmentInfo), InlinedAt(InlinedAt) {}
+
+ DebugVariable(const DILocalVariable *Var, const DIExpression *DIExpr,
+ const DILocation *InlinedAt)
+ : DebugVariable(Var, DIExpr->getFragmentInfo(), InlinedAt) {}
+
+ DebugVariable(const MachineInstr &MI)
+ : DebugVariable(MI.getDebugVariable(),
+ MI.getDebugExpression()->getFragmentInfo(),
+ MI.getDebugLoc()->getInlinedAt()) {}
+
+ const DILocalVariable *getVar() const { return Variable; }
+ const OptFragmentInfo &getFragment() const { return Fragment; }
+ const DILocation *getInlinedAt() const { return InlinedAt; }
+
+ const FragmentInfo getFragmentDefault() const {
+ return Fragment.getValueOr(DefaultFragment);
+ }
+
+ static bool isFragmentDefault(FragmentInfo &F) {
+ return F == DefaultFragment;
+ }
+
+ bool operator==(const DebugVariable &Other) const {
+ return std::tie(Variable, Fragment, InlinedAt) ==
+ std::tie(Other.Variable, Other.Fragment, Other.InlinedAt);
+ }
+
+ bool operator<(const DebugVariable &Other) const {
+ return std::tie(Variable, Fragment, InlinedAt) <
+ std::tie(Other.Variable, Other.Fragment, Other.InlinedAt);
}
};
+ friend struct llvm::DenseMapInfo<DebugVariable>;
+
/// A pair of debug variable and value location.
struct VarLoc {
+ // The location at which a spilled variable resides. It consists of a
+ // register and an offset.
+ struct SpillLoc {
+ unsigned SpillBase;
+ int SpillOffset;
+ bool operator==(const SpillLoc &Other) const {
+ return SpillBase == Other.SpillBase && SpillOffset == Other.SpillOffset;
+ }
+ };
+
const DebugVariable Var;
const MachineInstr &MI; ///< Only used for cloning a new DBG_VALUE.
mutable UserValueScopes UVS;
- enum { InvalidKind = 0, RegisterKind } Kind = InvalidKind;
+ enum VarLocKind {
+ InvalidKind = 0,
+ RegisterKind,
+ SpillLocKind,
+ ImmediateKind,
+ EntryValueKind
+ } Kind = InvalidKind;
/// The value location. Stored separately to avoid repeatedly
/// extracting it from MI.
union {
uint64_t RegNo;
+ SpillLoc SpillLocation;
uint64_t Hash;
+ int64_t Immediate;
+ const ConstantFP *FPImm;
+ const ConstantInt *CImm;
} Loc;
- VarLoc(const MachineInstr &MI, LexicalScopes &LS)
- : Var(MI.getDebugVariable(), MI.getDebugLoc()->getInlinedAt()), MI(MI),
- UVS(MI.getDebugLoc(), LS) {
+ VarLoc(const MachineInstr &MI, LexicalScopes &LS,
+ VarLocKind K = InvalidKind)
+ : Var(MI), MI(MI), UVS(MI.getDebugLoc(), LS){
static_assert((sizeof(Loc) == sizeof(uint64_t)),
"hash does not cover all members of Loc");
assert(MI.isDebugValue() && "not a DBG_VALUE");
assert(MI.getNumOperands() == 4 && "malformed DBG_VALUE");
if (int RegNo = isDbgValueDescribedByReg(MI)) {
- Kind = RegisterKind;
+ Kind = MI.isDebugEntryValue() ? EntryValueKind : RegisterKind;
Loc.RegNo = RegNo;
+ } else if (MI.getOperand(0).isImm()) {
+ Kind = ImmediateKind;
+ Loc.Immediate = MI.getOperand(0).getImm();
+ } else if (MI.getOperand(0).isFPImm()) {
+ Kind = ImmediateKind;
+ Loc.FPImm = MI.getOperand(0).getFPImm();
+ } else if (MI.getOperand(0).isCImm()) {
+ Kind = ImmediateKind;
+ Loc.CImm = MI.getOperand(0).getCImm();
}
+ assert((Kind != ImmediateKind || !MI.isDebugEntryValue()) &&
+ "entry values must be register locations");
+ }
+
+ /// The constructor for spill locations.
+ VarLoc(const MachineInstr &MI, unsigned SpillBase, int SpillOffset,
+ LexicalScopes &LS)
+ : Var(MI), MI(MI), UVS(MI.getDebugLoc(), LS) {
+ assert(MI.isDebugValue() && "not a DBG_VALUE");
+ assert(MI.getNumOperands() == 4 && "malformed DBG_VALUE");
+ Kind = SpillLocKind;
+ Loc.SpillLocation = {SpillBase, SpillOffset};
}
+ // Is the Loc field a constant or constant object?
+ bool isConstant() const { return Kind == ImmediateKind; }
+
/// If this variable is described by a register, return it,
/// otherwise return 0.
unsigned isDescribedByReg() const {
@@ -167,17 +255,18 @@ private:
#endif
bool operator==(const VarLoc &Other) const {
- return Var == Other.Var && Loc.Hash == Other.Loc.Hash;
+ return Kind == Other.Kind && Var == Other.Var &&
+ Loc.Hash == Other.Loc.Hash;
}
/// This operator guarantees that VarLocs are sorted by Variable first.
bool operator<(const VarLoc &Other) const {
- if (Var == Other.Var)
- return Loc.Hash < Other.Loc.Hash;
- return Var < Other.Var;
+ return std::tie(Var, Kind, Loc.Hash) <
+ std::tie(Other.Var, Other.Kind, Other.Loc.Hash);
}
};
+ using DebugParamMap = SmallDenseMap<const DILocalVariable *, MachineInstr *>;
using VarLocMap = UniqueVector<VarLoc>;
using VarLocSet = SparseBitVector<>;
using VarLocInMBB = SmallDenseMap<const MachineBasicBlock *, VarLocSet>;
@@ -187,26 +276,35 @@ private:
};
using TransferMap = SmallVector<TransferDebugPair, 4>;
+ // Types for recording sets of variable fragments that overlap. For a given
+ // local variable, we record all other fragments of that variable that could
+ // overlap it, to reduce search time.
+ using FragmentOfVar =
+ std::pair<const DILocalVariable *, DIExpression::FragmentInfo>;
+ using OverlapMap =
+ DenseMap<FragmentOfVar, SmallVector<DIExpression::FragmentInfo, 1>>;
+
+ // Helper while building OverlapMap, a map of all fragments seen for a given
+ // DILocalVariable.
+ using VarToFragments =
+ DenseMap<const DILocalVariable *, SmallSet<FragmentInfo, 4>>;
+
/// This holds the working set of currently open ranges. For fast
/// access, this is done both as a set of VarLocIDs, and a map of
/// DebugVariable to recent VarLocID. Note that a DBG_VALUE ends all
/// previous open ranges for the same variable.
class OpenRangesSet {
VarLocSet VarLocs;
- SmallDenseMap<DebugVariableBase, unsigned, 8> Vars;
+ SmallDenseMap<DebugVariable, unsigned, 8> Vars;
+ OverlapMap &OverlappingFragments;
public:
+ OpenRangesSet(OverlapMap &_OLapMap) : OverlappingFragments(_OLapMap) {}
+
const VarLocSet &getVarLocs() const { return VarLocs; }
/// Terminate all open ranges for Var by removing it from the set.
- void erase(DebugVariable Var) {
- auto It = Vars.find(Var);
- if (It != Vars.end()) {
- unsigned ID = It->second;
- VarLocs.reset(ID);
- Vars.erase(It);
- }
- }
+ void erase(DebugVariable Var);
/// Terminate all open ranges listed in \c KillSet by removing
/// them from the set.
@@ -217,7 +315,7 @@ private:
}
/// Insert a new range into the set.
- void insert(unsigned VarLocID, DebugVariableBase Var) {
+ void insert(unsigned VarLocID, DebugVariable Var) {
VarLocs.set(VarLocID);
Vars.insert({Var, VarLocID});
}
@@ -237,24 +335,43 @@ private:
bool isSpillInstruction(const MachineInstr &MI, MachineFunction *MF,
unsigned &Reg);
- int extractSpillBaseRegAndOffset(const MachineInstr &MI, unsigned &Reg);
+ /// If a given instruction is identified as a spill, return the spill location
+ /// and set \p Reg to the spilled register.
+ Optional<VarLoc::SpillLoc> isRestoreInstruction(const MachineInstr &MI,
+ MachineFunction *MF,
+ unsigned &Reg);
+ /// Given a spill instruction, extract the register and offset used to
+ /// address the spill location in a target independent way.
+ VarLoc::SpillLoc extractSpillBaseRegAndOffset(const MachineInstr &MI);
void insertTransferDebugPair(MachineInstr &MI, OpenRangesSet &OpenRanges,
TransferMap &Transfers, VarLocMap &VarLocIDs,
- unsigned OldVarID, unsigned NewReg = 0);
+ unsigned OldVarID, TransferKind Kind,
+ unsigned NewReg = 0);
void transferDebugValue(const MachineInstr &MI, OpenRangesSet &OpenRanges,
VarLocMap &VarLocIDs);
- void transferSpillInst(MachineInstr &MI, OpenRangesSet &OpenRanges,
- VarLocMap &VarLocIDs, TransferMap &Transfers);
+ void transferSpillOrRestoreInst(MachineInstr &MI, OpenRangesSet &OpenRanges,
+ VarLocMap &VarLocIDs, TransferMap &Transfers);
+ void emitEntryValues(MachineInstr &MI, OpenRangesSet &OpenRanges,
+ VarLocMap &VarLocIDs, TransferMap &Transfers,
+ DebugParamMap &DebugEntryVals,
+ SparseBitVector<> &KillSet);
void transferRegisterCopy(MachineInstr &MI, OpenRangesSet &OpenRanges,
VarLocMap &VarLocIDs, TransferMap &Transfers);
void transferRegisterDef(MachineInstr &MI, OpenRangesSet &OpenRanges,
- const VarLocMap &VarLocIDs);
+ VarLocMap &VarLocIDs, TransferMap &Transfers,
+ DebugParamMap &DebugEntryVals);
bool transferTerminatorInst(MachineInstr &MI, OpenRangesSet &OpenRanges,
VarLocInMBB &OutLocs, const VarLocMap &VarLocIDs);
+
bool process(MachineInstr &MI, OpenRangesSet &OpenRanges,
VarLocInMBB &OutLocs, VarLocMap &VarLocIDs,
- TransferMap &Transfers, bool transferChanges);
+ TransferMap &Transfers, DebugParamMap &DebugEntryVals,
+ bool transferChanges, OverlapMap &OverlapFragments,
+ VarToFragments &SeenFragments);
+
+ void accumulateFragmentMap(MachineInstr &MI, VarToFragments &SeenFragments,
+ OverlapMap &OLapMap);
bool join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs, VarLocInMBB &InLocs,
const VarLocMap &VarLocIDs,
@@ -289,10 +406,46 @@ public:
} // end anonymous namespace
+namespace llvm {
+
+template <> struct DenseMapInfo<LiveDebugValues::DebugVariable> {
+ using DV = LiveDebugValues::DebugVariable;
+ using OptFragmentInfo = LiveDebugValues::OptFragmentInfo;
+ using FragmentInfo = LiveDebugValues::FragmentInfo;
+
+ // Empty key: no key should be generated that has no DILocalVariable.
+ static inline DV getEmptyKey() {
+ return DV(nullptr, OptFragmentInfo(), nullptr);
+ }
+
+ // Difference in tombstone is that the Optional is meaningful
+ static inline DV getTombstoneKey() {
+ return DV(nullptr, OptFragmentInfo({0, 0}), nullptr);
+ }
+
+ static unsigned getHashValue(const DV &D) {
+ unsigned HV = 0;
+ const OptFragmentInfo &Fragment = D.getFragment();
+ if (Fragment)
+ HV = DenseMapInfo<FragmentInfo>::getHashValue(*Fragment);
+
+ return hash_combine(D.getVar(), HV, D.getInlinedAt());
+ }
+
+ static bool isEqual(const DV &A, const DV &B) { return A == B; }
+};
+
+} // namespace llvm
+
//===----------------------------------------------------------------------===//
// Implementation
//===----------------------------------------------------------------------===//
+const DIExpression::FragmentInfo
+ LiveDebugValues::DebugVariable::DefaultFragment = {
+ std::numeric_limits<uint64_t>::max(),
+ std::numeric_limits<uint64_t>::min()};
+
char LiveDebugValues::ID = 0;
char &llvm::LiveDebugValuesID = LiveDebugValues::ID;
@@ -312,6 +465,39 @@ void LiveDebugValues::getAnalysisUsage(AnalysisUsage &AU) const {
MachineFunctionPass::getAnalysisUsage(AU);
}
+/// Erase a variable from the set of open ranges, and additionally erase any
+/// fragments that may overlap it.
+void LiveDebugValues::OpenRangesSet::erase(DebugVariable Var) {
+ // Erasure helper.
+ auto DoErase = [this](DebugVariable VarToErase) {
+ auto It = Vars.find(VarToErase);
+ if (It != Vars.end()) {
+ unsigned ID = It->second;
+ VarLocs.reset(ID);
+ Vars.erase(It);
+ }
+ };
+
+ // Erase the variable/fragment that ends here.
+ DoErase(Var);
+
+ // Extract the fragment. Interpret an empty fragment as one that covers all
+ // possible bits.
+ FragmentInfo ThisFragment = Var.getFragmentDefault();
+
+ // There may be fragments that overlap the designated fragment. Look them up
+ // in the pre-computed overlap map, and erase them too.
+ auto MapIt = OverlappingFragments.find({Var.getVar(), ThisFragment});
+ if (MapIt != OverlappingFragments.end()) {
+ for (auto Fragment : MapIt->second) {
+ LiveDebugValues::OptFragmentInfo FragmentHolder;
+ if (!DebugVariable::isFragmentDefault(Fragment))
+ FragmentHolder = LiveDebugValues::OptFragmentInfo(Fragment);
+ DoErase({Var.getVar(), FragmentHolder, Var.getInlinedAt()});
+ }
+ }
+}
+
//===----------------------------------------------------------------------===//
// Debug Range Extension Implementation
//===----------------------------------------------------------------------===//
@@ -339,10 +525,8 @@ void LiveDebugValues::printVarLocInMBB(const MachineFunction &MF,
}
#endif
-/// Given a spill instruction, extract the register and offset used to
-/// address the spill location in a target independent way.
-int LiveDebugValues::extractSpillBaseRegAndOffset(const MachineInstr &MI,
- unsigned &Reg) {
+LiveDebugValues::VarLoc::SpillLoc
+LiveDebugValues::extractSpillBaseRegAndOffset(const MachineInstr &MI) {
assert(MI.hasOneMemOperand() &&
"Spill instruction does not have exactly one memory operand?");
auto MMOI = MI.memoperands_begin();
@@ -351,7 +535,9 @@ int LiveDebugValues::extractSpillBaseRegAndOffset(const MachineInstr &MI,
"Inconsistent memory operand in spill instruction");
int FI = cast<FixedStackPseudoSourceValue>(PVal)->getFrameIndex();
const MachineBasicBlock *MBB = MI.getParent();
- return TFI->getFrameIndexReference(*MBB->getParent(), FI, Reg);
+ unsigned Reg;
+ int Offset = TFI->getFrameIndexReference(*MBB->getParent(), FI, Reg);
+ return {Reg, Offset};
}
/// End all previous ranges related to @MI and start a new range from @MI
@@ -362,21 +548,72 @@ void LiveDebugValues::transferDebugValue(const MachineInstr &MI,
if (!MI.isDebugValue())
return;
const DILocalVariable *Var = MI.getDebugVariable();
+ const DIExpression *Expr = MI.getDebugExpression();
const DILocation *DebugLoc = MI.getDebugLoc();
const DILocation *InlinedAt = DebugLoc->getInlinedAt();
assert(Var->isValidLocationForIntrinsic(DebugLoc) &&
"Expected inlined-at fields to agree");
// End all previous ranges of Var.
- DebugVariable V(Var, InlinedAt);
+ DebugVariable V(Var, Expr, InlinedAt);
OpenRanges.erase(V);
// Add the VarLoc to OpenRanges from this DBG_VALUE.
- // TODO: Currently handles DBG_VALUE which has only reg as location.
- if (isDbgValueDescribedByReg(MI)) {
+ unsigned ID;
+ if (isDbgValueDescribedByReg(MI) || MI.getOperand(0).isImm() ||
+ MI.getOperand(0).isFPImm() || MI.getOperand(0).isCImm()) {
+ // Use normal VarLoc constructor for registers and immediates.
VarLoc VL(MI, LS);
- unsigned ID = VarLocIDs.insert(VL);
+ ID = VarLocIDs.insert(VL);
+ OpenRanges.insert(ID, VL.Var);
+ } else if (MI.hasOneMemOperand()) {
+ // It's a stack spill -- fetch spill base and offset.
+ VarLoc::SpillLoc SpillLocation = extractSpillBaseRegAndOffset(MI);
+ VarLoc VL(MI, SpillLocation.SpillBase, SpillLocation.SpillOffset, LS);
+ ID = VarLocIDs.insert(VL);
OpenRanges.insert(ID, VL.Var);
+ } else {
+ // This must be an undefined location. We should leave OpenRanges closed.
+ assert(MI.getOperand(0).isReg() && MI.getOperand(0).getReg() == 0 &&
+ "Unexpected non-undef DBG_VALUE encountered");
+ }
+}
+
+void LiveDebugValues::emitEntryValues(MachineInstr &MI,
+ OpenRangesSet &OpenRanges,
+ VarLocMap &VarLocIDs,
+ TransferMap &Transfers,
+ DebugParamMap &DebugEntryVals,
+ SparseBitVector<> &KillSet) {
+ MachineFunction *MF = MI.getParent()->getParent();
+ for (unsigned ID : KillSet) {
+ if (!VarLocIDs[ID].Var.getVar()->isParameter())
+ continue;
+
+ const MachineInstr *CurrDebugInstr = &VarLocIDs[ID].MI;
+
+ // If parameter's DBG_VALUE is not in the map that means we can't
+ // generate parameter's entry value.
+ if (!DebugEntryVals.count(CurrDebugInstr->getDebugVariable()))
+ continue;
+
+ auto ParamDebugInstr = DebugEntryVals[CurrDebugInstr->getDebugVariable()];
+ DIExpression *NewExpr = DIExpression::prepend(
+ ParamDebugInstr->getDebugExpression(), DIExpression::EntryValue);
+ MachineInstr *EntryValDbgMI =
+ BuildMI(*MF, ParamDebugInstr->getDebugLoc(), ParamDebugInstr->getDesc(),
+ ParamDebugInstr->isIndirectDebugValue(),
+ ParamDebugInstr->getOperand(0).getReg(),
+ ParamDebugInstr->getDebugVariable(), NewExpr);
+
+ if (ParamDebugInstr->isIndirectDebugValue())
+ EntryValDbgMI->getOperand(1).setImm(
+ ParamDebugInstr->getOperand(1).getImm());
+
+ Transfers.push_back({&MI, EntryValDbgMI});
+ VarLoc VL(*EntryValDbgMI, LS);
+ unsigned EntryValLocID = VarLocIDs.insert(VL);
+ OpenRanges.insert(EntryValLocID, VL.Var);
}
}
@@ -387,51 +624,92 @@ void LiveDebugValues::transferDebugValue(const MachineInstr &MI,
/// otherwise it is variable's location on the stack.
void LiveDebugValues::insertTransferDebugPair(
MachineInstr &MI, OpenRangesSet &OpenRanges, TransferMap &Transfers,
- VarLocMap &VarLocIDs, unsigned OldVarID, unsigned NewReg) {
- const MachineInstr *DMI = &VarLocIDs[OldVarID].MI;
+ VarLocMap &VarLocIDs, unsigned OldVarID, TransferKind Kind,
+ unsigned NewReg) {
+ const MachineInstr *DebugInstr = &VarLocIDs[OldVarID].MI;
MachineFunction *MF = MI.getParent()->getParent();
- MachineInstr *NewDMI;
- if (NewReg) {
+ MachineInstr *NewDebugInstr;
+
+ auto ProcessVarLoc = [&MI, &OpenRanges, &Transfers, &DebugInstr,
+ &VarLocIDs](VarLoc &VL, MachineInstr *NewDebugInstr) {
+ unsigned LocId = VarLocIDs.insert(VL);
+
+ // Close this variable's previous location range.
+ DebugVariable V(*DebugInstr);
+ OpenRanges.erase(V);
+
+ OpenRanges.insert(LocId, VL.Var);
+ // The newly created DBG_VALUE instruction NewDebugInstr must be inserted
+ // after MI. Keep track of the pairing.
+ TransferDebugPair MIP = {&MI, NewDebugInstr};
+ Transfers.push_back(MIP);
+ };
+
+ // End all previous ranges of Var.
+ OpenRanges.erase(VarLocIDs[OldVarID].Var);
+ switch (Kind) {
+ case TransferKind::TransferCopy: {
+ assert(NewReg &&
+ "No register supplied when handling a copy of a debug value");
// Create a DBG_VALUE instruction to describe the Var in its new
// register location.
- NewDMI = BuildMI(*MF, DMI->getDebugLoc(), DMI->getDesc(),
- DMI->isIndirectDebugValue(), NewReg,
- DMI->getDebugVariable(), DMI->getDebugExpression());
- if (DMI->isIndirectDebugValue())
- NewDMI->getOperand(1).setImm(DMI->getOperand(1).getImm());
+ NewDebugInstr = BuildMI(
+ *MF, DebugInstr->getDebugLoc(), DebugInstr->getDesc(),
+ DebugInstr->isIndirectDebugValue(), NewReg,
+ DebugInstr->getDebugVariable(), DebugInstr->getDebugExpression());
+ if (DebugInstr->isIndirectDebugValue())
+ NewDebugInstr->getOperand(1).setImm(DebugInstr->getOperand(1).getImm());
+ VarLoc VL(*NewDebugInstr, LS);
+ ProcessVarLoc(VL, NewDebugInstr);
LLVM_DEBUG(dbgs() << "Creating DBG_VALUE inst for register copy: ";
- NewDMI->print(dbgs(), false, false, false, TII));
- } else {
+ NewDebugInstr->print(dbgs(), /*IsStandalone*/false,
+ /*SkipOpers*/false, /*SkipDebugLoc*/false,
+ /*AddNewLine*/true, TII));
+ return;
+ }
+ case TransferKind::TransferSpill: {
// Create a DBG_VALUE instruction to describe the Var in its spilled
// location.
- unsigned SpillBase;
- int SpillOffset = extractSpillBaseRegAndOffset(MI, SpillBase);
- auto *SpillExpr = DIExpression::prepend(DMI->getDebugExpression(),
- DIExpression::NoDeref, SpillOffset);
- NewDMI = BuildMI(*MF, DMI->getDebugLoc(), DMI->getDesc(), true, SpillBase,
- DMI->getDebugVariable(), SpillExpr);
+ VarLoc::SpillLoc SpillLocation = extractSpillBaseRegAndOffset(MI);
+ auto *SpillExpr = DIExpression::prepend(DebugInstr->getDebugExpression(),
+ DIExpression::ApplyOffset,
+ SpillLocation.SpillOffset);
+ NewDebugInstr = BuildMI(
+ *MF, DebugInstr->getDebugLoc(), DebugInstr->getDesc(), true,
+ SpillLocation.SpillBase, DebugInstr->getDebugVariable(), SpillExpr);
+ VarLoc VL(*NewDebugInstr, SpillLocation.SpillBase,
+ SpillLocation.SpillOffset, LS);
+ ProcessVarLoc(VL, NewDebugInstr);
LLVM_DEBUG(dbgs() << "Creating DBG_VALUE inst for spill: ";
- NewDMI->print(dbgs(), false, false, false, TII));
+ NewDebugInstr->print(dbgs(), /*IsStandalone*/false,
+ /*SkipOpers*/false, /*SkipDebugLoc*/false,
+ /*AddNewLine*/true, TII));
+ return;
}
-
- // The newly created DBG_VALUE instruction NewDMI must be inserted after
- // MI. Keep track of the pairing.
- TransferDebugPair MIP = {&MI, NewDMI};
- Transfers.push_back(MIP);
-
- // End all previous ranges of Var.
- OpenRanges.erase(VarLocIDs[OldVarID].Var);
-
- // Add the VarLoc to OpenRanges.
- VarLoc VL(*NewDMI, LS);
- unsigned LocID = VarLocIDs.insert(VL);
- OpenRanges.insert(LocID, VL.Var);
+ case TransferKind::TransferRestore: {
+ assert(NewReg &&
+ "No register supplied when handling a restore of a debug value");
+ MachineFunction *MF = MI.getMF();
+ DIBuilder DIB(*const_cast<Function &>(MF->getFunction()).getParent());
+ NewDebugInstr =
+ BuildMI(*MF, DebugInstr->getDebugLoc(), DebugInstr->getDesc(), false,
+ NewReg, DebugInstr->getDebugVariable(), DIB.createExpression());
+ VarLoc VL(*NewDebugInstr, LS);
+ ProcessVarLoc(VL, NewDebugInstr);
+ LLVM_DEBUG(dbgs() << "Creating DBG_VALUE inst for register restore: ";
+ NewDebugInstr->print(dbgs(), /*IsStandalone*/false,
+ /*SkipOpers*/false, /*SkipDebugLoc*/false,
+ /*AddNewLine*/true, TII));
+ return;
+ }
+ }
+ llvm_unreachable("Invalid transfer kind");
}
/// A definition of a register may mark the end of a range.
-void LiveDebugValues::transferRegisterDef(MachineInstr &MI,
- OpenRangesSet &OpenRanges,
- const VarLocMap &VarLocIDs) {
+void LiveDebugValues::transferRegisterDef(
+ MachineInstr &MI, OpenRangesSet &OpenRanges, VarLocMap &VarLocIDs,
+ TransferMap &Transfers, DebugParamMap &DebugEntryVals) {
MachineFunction *MF = MI.getMF();
const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
unsigned SP = TLI->getStackPointerRegisterToSaveRestore();
@@ -461,6 +739,13 @@ void LiveDebugValues::transferRegisterDef(MachineInstr &MI,
}
}
OpenRanges.erase(KillSet, VarLocIDs);
+
+ if (auto *TPC = getAnalysisIfAvailable<TargetPassConfig>()) {
+ auto &TM = TPC->getTM<TargetMachine>();
+ if (TM.Options.EnableDebugEntryValues)
+ emitEntryValues(MI, OpenRanges, VarLocIDs, Transfers, DebugEntryVals,
+ KillSet);
+ }
}
/// Decide if @MI is a spill instruction and return true if it is. We use 2
@@ -471,24 +756,15 @@ void LiveDebugValues::transferRegisterDef(MachineInstr &MI,
/// other spills). We do not handle this yet (more than one memory operand).
bool LiveDebugValues::isSpillInstruction(const MachineInstr &MI,
MachineFunction *MF, unsigned &Reg) {
- const MachineFrameInfo &FrameInfo = MF->getFrameInfo();
- int FI;
SmallVector<const MachineMemOperand*, 1> Accesses;
// TODO: Handle multiple stores folded into one.
if (!MI.hasOneMemOperand())
return false;
- // To identify a spill instruction, use the same criteria as in AsmPrinter.
- if (!((TII->isStoreToStackSlotPostFE(MI, FI) &&
- FrameInfo.isSpillSlotObjectIndex(FI)) ||
- (TII->hasStoreToStackSlot(MI, Accesses) &&
- llvm::any_of(Accesses, [&FrameInfo](const MachineMemOperand *MMO) {
- return FrameInfo.isSpillSlotObjectIndex(
- cast<FixedStackPseudoSourceValue>(MMO->getPseudoValue())
- ->getFrameIndex());
- }))))
- return false;
+ if (!MI.getSpillSize(TII) && !MI.getFoldedSpillSize(TII))
+ return false; // This is not a spill instruction, since no valid size was
+ // returned from either function.
auto isKilledReg = [&](const MachineOperand MO, unsigned &Reg) {
if (!MO.isReg() || !MO.isUse()) {
@@ -525,29 +801,67 @@ bool LiveDebugValues::isSpillInstruction(const MachineInstr &MI,
return false;
}
+Optional<LiveDebugValues::VarLoc::SpillLoc>
+LiveDebugValues::isRestoreInstruction(const MachineInstr &MI,
+ MachineFunction *MF, unsigned &Reg) {
+ if (!MI.hasOneMemOperand())
+ return None;
+
+ // FIXME: Handle folded restore instructions with more than one memory
+ // operand.
+ if (MI.getRestoreSize(TII)) {
+ Reg = MI.getOperand(0).getReg();
+ return extractSpillBaseRegAndOffset(MI);
+ }
+ return None;
+}
+
/// A spilled register may indicate that we have to end the current range of
/// a variable and create a new one for the spill location.
+/// A restored register may indicate the reverse situation.
/// We don't want to insert any instructions in process(), so we just create
/// the DBG_VALUE without inserting it and keep track of it in \p Transfers.
/// It will be inserted into the BB when we're done iterating over the
/// instructions.
-void LiveDebugValues::transferSpillInst(MachineInstr &MI,
- OpenRangesSet &OpenRanges,
- VarLocMap &VarLocIDs,
- TransferMap &Transfers) {
- unsigned Reg;
+void LiveDebugValues::transferSpillOrRestoreInst(MachineInstr &MI,
+ OpenRangesSet &OpenRanges,
+ VarLocMap &VarLocIDs,
+ TransferMap &Transfers) {
MachineFunction *MF = MI.getMF();
- if (!isSpillInstruction(MI, MF, Reg))
- return;
+ TransferKind TKind;
+ unsigned Reg;
+ Optional<VarLoc::SpillLoc> Loc;
- // Check if the register is the location of a debug value.
+ LLVM_DEBUG(dbgs() << "Examining instruction: "; MI.dump(););
+
+ if (isSpillInstruction(MI, MF, Reg)) {
+ TKind = TransferKind::TransferSpill;
+ LLVM_DEBUG(dbgs() << "Recognized as spill: "; MI.dump(););
+ LLVM_DEBUG(dbgs() << "Register: " << Reg << " " << printReg(Reg, TRI)
+ << "\n");
+ } else {
+ if (!(Loc = isRestoreInstruction(MI, MF, Reg)))
+ return;
+ TKind = TransferKind::TransferRestore;
+ LLVM_DEBUG(dbgs() << "Recognized as restore: "; MI.dump(););
+ LLVM_DEBUG(dbgs() << "Register: " << Reg << " " << printReg(Reg, TRI)
+ << "\n");
+ }
+ // Check if the register or spill location is the location of a debug value.
for (unsigned ID : OpenRanges.getVarLocs()) {
- if (VarLocIDs[ID].isDescribedByReg() == Reg) {
+ if (TKind == TransferKind::TransferSpill &&
+ VarLocIDs[ID].isDescribedByReg() == Reg) {
LLVM_DEBUG(dbgs() << "Spilling Register " << printReg(Reg, TRI) << '('
<< VarLocIDs[ID].Var.getVar()->getName() << ")\n");
- insertTransferDebugPair(MI, OpenRanges, Transfers, VarLocIDs, ID);
- return;
- }
+ } else if (TKind == TransferKind::TransferRestore &&
+ VarLocIDs[ID].Loc.SpillLocation == *Loc) {
+ LLVM_DEBUG(dbgs() << "Restoring Register " << printReg(Reg, TRI) << '('
+ << VarLocIDs[ID].Var.getVar()->getName() << ")\n");
+ } else
+ continue;
+ insertTransferDebugPair(MI, OpenRanges, Transfers, VarLocIDs, ID, TKind,
+ Reg);
+ return;
}
}
@@ -585,7 +899,7 @@ void LiveDebugValues::transferRegisterCopy(MachineInstr &MI,
for (unsigned ID : OpenRanges.getVarLocs()) {
if (VarLocIDs[ID].isDescribedByReg() == SrcReg) {
insertTransferDebugPair(MI, OpenRanges, Transfers, VarLocIDs, ID,
- DestReg);
+ TransferKind::TransferCopy, DestReg);
return;
}
}
@@ -612,20 +926,92 @@ bool LiveDebugValues::transferTerminatorInst(MachineInstr &MI,
});
VarLocSet &VLS = OutLocs[CurMBB];
Changed = VLS |= OpenRanges.getVarLocs();
+ // New OutLocs set may be different due to spill, restore or register
+ // copy instruction processing.
+ if (Changed)
+ VLS = OpenRanges.getVarLocs();
OpenRanges.clear();
return Changed;
}
+/// Accumulate a mapping between each DILocalVariable fragment and other
+/// fragments of that DILocalVariable which overlap. This reduces work during
+/// the data-flow stage from "Find any overlapping fragments" to "Check if the
+/// known-to-overlap fragments are present".
+/// \param MI A previously unprocessed DEBUG_VALUE instruction to analyze for
+/// fragment usage.
+/// \param SeenFragments Map from DILocalVariable to all fragments of that
+/// Variable which are known to exist.
+/// \param OverlappingFragments The overlap map being constructed, from one
+/// Var/Fragment pair to a vector of fragments known to overlap.
+void LiveDebugValues::accumulateFragmentMap(MachineInstr &MI,
+ VarToFragments &SeenFragments,
+ OverlapMap &OverlappingFragments) {
+ DebugVariable MIVar(MI);
+ FragmentInfo ThisFragment = MIVar.getFragmentDefault();
+
+ // If this is the first sighting of this variable, then we are guaranteed
+ // there are currently no overlapping fragments either. Initialize the set
+ // of seen fragments, record no overlaps for the current one, and return.
+ auto SeenIt = SeenFragments.find(MIVar.getVar());
+ if (SeenIt == SeenFragments.end()) {
+ SmallSet<FragmentInfo, 4> OneFragment;
+ OneFragment.insert(ThisFragment);
+ SeenFragments.insert({MIVar.getVar(), OneFragment});
+
+ OverlappingFragments.insert({{MIVar.getVar(), ThisFragment}, {}});
+ return;
+ }
+
+ // If this particular Variable/Fragment pair already exists in the overlap
+ // map, it has already been accounted for.
+ auto IsInOLapMap =
+ OverlappingFragments.insert({{MIVar.getVar(), ThisFragment}, {}});
+ if (!IsInOLapMap.second)
+ return;
+
+ auto &ThisFragmentsOverlaps = IsInOLapMap.first->second;
+ auto &AllSeenFragments = SeenIt->second;
+
+ // Otherwise, examine all other seen fragments for this variable, with "this"
+ // fragment being a previously unseen fragment. Record any pair of
+ // overlapping fragments.
+ for (auto &ASeenFragment : AllSeenFragments) {
+ // Does this previously seen fragment overlap?
+ if (DIExpression::fragmentsOverlap(ThisFragment, ASeenFragment)) {
+ // Yes: Mark the current fragment as being overlapped.
+ ThisFragmentsOverlaps.push_back(ASeenFragment);
+ // Mark the previously seen fragment as being overlapped by the current
+ // one.
+ auto ASeenFragmentsOverlaps =
+ OverlappingFragments.find({MIVar.getVar(), ASeenFragment});
+ assert(ASeenFragmentsOverlaps != OverlappingFragments.end() &&
+ "Previously seen var fragment has no vector of overlaps");
+ ASeenFragmentsOverlaps->second.push_back(ThisFragment);
+ }
+ }
+
+ AllSeenFragments.insert(ThisFragment);
+}
+
/// This routine creates OpenRanges and OutLocs.
bool LiveDebugValues::process(MachineInstr &MI, OpenRangesSet &OpenRanges,
VarLocInMBB &OutLocs, VarLocMap &VarLocIDs,
- TransferMap &Transfers, bool transferChanges) {
+ TransferMap &Transfers, DebugParamMap &DebugEntryVals,
+ bool transferChanges,
+ OverlapMap &OverlapFragments,
+ VarToFragments &SeenFragments) {
bool Changed = false;
transferDebugValue(MI, OpenRanges, VarLocIDs);
- transferRegisterDef(MI, OpenRanges, VarLocIDs);
+ transferRegisterDef(MI, OpenRanges, VarLocIDs, Transfers,
+ DebugEntryVals);
if (transferChanges) {
transferRegisterCopy(MI, OpenRanges, VarLocIDs, Transfers);
- transferSpillInst(MI, OpenRanges, VarLocIDs, Transfers);
+ transferSpillOrRestoreInst(MI, OpenRanges, VarLocIDs, Transfers);
+ } else {
+ // Build up a map of overlapping fragments on the first run through.
+ if (MI.isDebugValue())
+ accumulateFragmentMap(MI, SeenFragments, OverlapFragments);
}
Changed = transferTerminatorInst(MI, OpenRanges, OutLocs, VarLocIDs);
return Changed;
@@ -713,13 +1099,23 @@ bool LiveDebugValues::join(
// new range is started for the var from the mbb's beginning by inserting
// a new DBG_VALUE. process() will end this range however appropriate.
const VarLoc &DiffIt = VarLocIDs[ID];
- const MachineInstr *DMI = &DiffIt.MI;
- MachineInstr *MI =
- BuildMI(MBB, MBB.instr_begin(), DMI->getDebugLoc(), DMI->getDesc(),
- DMI->isIndirectDebugValue(), DMI->getOperand(0).getReg(),
- DMI->getDebugVariable(), DMI->getDebugExpression());
- if (DMI->isIndirectDebugValue())
- MI->getOperand(1).setImm(DMI->getOperand(1).getImm());
+ const MachineInstr *DebugInstr = &DiffIt.MI;
+ MachineInstr *MI = nullptr;
+ if (DiffIt.isConstant()) {
+ MachineOperand MO(DebugInstr->getOperand(0));
+ MI = BuildMI(MBB, MBB.instr_begin(), DebugInstr->getDebugLoc(),
+ DebugInstr->getDesc(), false, MO,
+ DebugInstr->getDebugVariable(),
+ DebugInstr->getDebugExpression());
+ } else {
+ MI = BuildMI(MBB, MBB.instr_begin(), DebugInstr->getDebugLoc(),
+ DebugInstr->getDesc(), DebugInstr->isIndirectDebugValue(),
+ DebugInstr->getOperand(0).getReg(),
+ DebugInstr->getDebugVariable(),
+ DebugInstr->getDebugExpression());
+ if (DebugInstr->isIndirectDebugValue())
+ MI->getOperand(1).setImm(DebugInstr->getOperand(1).getImm());
+ }
LLVM_DEBUG(dbgs() << "Inserted: "; MI->dump(););
ILS.set(ID);
++NumInserted;
@@ -737,11 +1133,15 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
bool OLChanged = false;
bool MBBJoined = false;
- VarLocMap VarLocIDs; // Map VarLoc<>unique ID for use in bitvectors.
- OpenRangesSet OpenRanges; // Ranges that are open until end of bb.
- VarLocInMBB OutLocs; // Ranges that exist beyond bb.
- VarLocInMBB InLocs; // Ranges that are incoming after joining.
- TransferMap Transfers; // DBG_VALUEs associated with spills.
+ VarLocMap VarLocIDs; // Map VarLoc<>unique ID for use in bitvectors.
+ OverlapMap OverlapFragments; // Map of overlapping variable fragments
+ OpenRangesSet OpenRanges(OverlapFragments);
+ // Ranges that are open until end of bb.
+ VarLocInMBB OutLocs; // Ranges that exist beyond bb.
+ VarLocInMBB InLocs; // Ranges that are incoming after joining.
+ TransferMap Transfers; // DBG_VALUEs associated with spills.
+
+ VarToFragments SeenFragments;
// Blocks which are artificial, i.e. blocks which exclusively contain
// instructions without locations, or with line 0 locations.
@@ -758,15 +1158,61 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
enum : bool { dontTransferChanges = false, transferChanges = true };
+ // Besides parameter's modification, check whether a DBG_VALUE is inlined
+ // in order to deduce whether the variable that it tracks comes from
+ // a different function. If that is the case we can't track its entry value.
+ auto IsUnmodifiedFuncParam = [&](const MachineInstr &MI) {
+ auto *DIVar = MI.getDebugVariable();
+ return DIVar->isParameter() && DIVar->isNotModified() &&
+ !MI.getDebugLoc()->getInlinedAt();
+ };
+
+ const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
+ unsigned SP = TLI->getStackPointerRegisterToSaveRestore();
+ unsigned FP = TRI->getFrameRegister(MF);
+ auto IsRegOtherThanSPAndFP = [&](const MachineOperand &Op) -> bool {
+ return Op.isReg() && Op.getReg() != SP && Op.getReg() != FP;
+ };
+
+ // Working set of currently collected debug variables mapped to DBG_VALUEs
+ // representing candidates for production of debug entry values.
+ DebugParamMap DebugEntryVals;
+
+ MachineBasicBlock &First_MBB = *(MF.begin());
+ // Only in the case of entry MBB collect DBG_VALUEs representing
+ // function parameters in order to generate debug entry values for them.
+ // Currently, we generate debug entry values only for parameters that are
+ // unmodified throughout the function and located in a register.
+ // TODO: Add support for parameters that are described as fragments.
+ // TODO: Add support for modified arguments that can be expressed
+ // by using its entry value.
+ // TODO: Add support for local variables that are expressed in terms of
+ // parameters entry values.
+ for (auto &MI : First_MBB)
+ if (MI.isDebugValue() && IsUnmodifiedFuncParam(MI) &&
+ !MI.isIndirectDebugValue() && IsRegOtherThanSPAndFP(MI.getOperand(0)) &&
+ !DebugEntryVals.count(MI.getDebugVariable()) &&
+ !MI.getDebugExpression()->isFragment())
+ DebugEntryVals[MI.getDebugVariable()] = &MI;
+
// Initialize every mbb with OutLocs.
// We are not looking at any spill instructions during the initial pass
// over the BBs. The LiveDebugVariables pass has already created DBG_VALUE
// instructions for spills of registers that are known to be user variables
// within the BB in which the spill occurs.
- for (auto &MBB : MF)
- for (auto &MI : MBB)
- process(MI, OpenRanges, OutLocs, VarLocIDs, Transfers,
- dontTransferChanges);
+ for (auto &MBB : MF) {
+ for (auto &MI : MBB) {
+ process(MI, OpenRanges, OutLocs, VarLocIDs, Transfers, DebugEntryVals,
+ dontTransferChanges, OverlapFragments, SeenFragments);
+ }
+ // Add any entry DBG_VALUE instructions necessitated by parameter
+ // clobbering.
+ for (auto &TR : Transfers) {
+ MBB.insertAfter(MachineBasicBlock::iterator(*TR.TransferInst),
+ TR.DebugInst);
+ }
+ Transfers.clear();
+ }
auto hasNonArtificialLocation = [](const MachineInstr &MI) -> bool {
if (const DebugLoc &DL = MI.getDebugLoc())
@@ -812,8 +1258,10 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
// examine spill instructions to see whether they spill registers that
// correspond to user variables.
for (auto &MI : *MBB)
- OLChanged |= process(MI, OpenRanges, OutLocs, VarLocIDs, Transfers,
- transferChanges);
+ OLChanged |=
+ process(MI, OpenRanges, OutLocs, VarLocIDs, Transfers,
+ DebugEntryVals, transferChanges, OverlapFragments,
+ SeenFragments);
// Add any DBG_VALUE instructions necessitated by spills.
for (auto &TR : Transfers)
diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp
index d0d889782a35..656ec7d4bdfd 100644
--- a/lib/CodeGen/LiveDebugVariables.cpp
+++ b/lib/CodeGen/LiveDebugVariables.cpp
@@ -1,9 +1,8 @@
//===- LiveDebugVariables.cpp - Tracking debug info variables -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -23,6 +22,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/IntervalMap.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -71,6 +71,7 @@ EnableLDV("live-debug-variables", cl::init(true),
cl::desc("Enable the live debug variables pass"), cl::Hidden);
STATISTIC(NumInsertedDebugValues, "Number of DBG_VALUEs inserted");
+STATISTIC(NumInsertedDebugLabels, "Number of DBG_LABELs inserted");
char LiveDebugVariables::ID = 0;
@@ -166,10 +167,6 @@ class UserValue {
/// Map of slot indices where this value is live.
LocMap locInts;
- /// Set of interval start indexes that have been trimmed to the
- /// lexical scope.
- SmallSet<SlotIndex, 2> trimmedDefs;
-
/// Insert a DBG_VALUE into MBB at Idx for LocNo.
void insertDebugValue(MachineBasicBlock *MBB, SlotIndex StartIdx,
SlotIndex StopIdx, DbgValueLocation Loc, bool Spilled,
@@ -339,6 +336,37 @@ public:
void print(raw_ostream &, const TargetRegisterInfo *);
};
+/// A user label is a part of a debug info user label.
+class UserLabel {
+ const DILabel *Label; ///< The debug info label we are part of.
+ DebugLoc dl; ///< The debug location for the label. This is
+ ///< used by dwarf writer to find lexical scope.
+ SlotIndex loc; ///< Slot used by the debug label.
+
+ /// Insert a DBG_LABEL into MBB at Idx.
+ void insertDebugLabel(MachineBasicBlock *MBB, SlotIndex Idx,
+ LiveIntervals &LIS, const TargetInstrInfo &TII);
+
+public:
+ /// Create a new UserLabel.
+ UserLabel(const DILabel *label, DebugLoc L, SlotIndex Idx)
+ : Label(label), dl(std::move(L)), loc(Idx) {}
+
+ /// Does this UserLabel match the parameters?
+ bool match(const DILabel *L, const DILocation *IA,
+ const SlotIndex Index) const {
+ return Label == L && dl->getInlinedAt() == IA && loc == Index;
+ }
+
+ /// Recreate DBG_LABEL instruction from data structures.
+ void emitDebugLabel(LiveIntervals &LIS, const TargetInstrInfo &TII);
+
+ /// Return DebugLoc of this UserLabel.
+ DebugLoc getDebugLoc() { return dl; }
+
+ void print(raw_ostream &, const TargetRegisterInfo *);
+};
+
/// Implementation of the LiveDebugVariables pass.
class LDVImpl {
LiveDebugVariables &pass;
@@ -356,6 +384,9 @@ class LDVImpl {
/// All allocated UserValue instances.
SmallVector<std::unique_ptr<UserValue>, 8> userValues;
+ /// All allocated UserLabel instances.
+ SmallVector<std::unique_ptr<UserLabel>, 2> userLabels;
+
/// Map virtual register to eq class leader.
using VRMap = DenseMap<unsigned, UserValue *>;
VRMap virtRegToEqClass;
@@ -379,6 +410,14 @@ class LDVImpl {
/// \returns True if the DBG_VALUE instruction should be deleted.
bool handleDebugValue(MachineInstr &MI, SlotIndex Idx);
+ /// Add DBG_LABEL instruction to UserLabel.
+ ///
+ /// \param MI DBG_LABEL instruction
+ /// \param Idx Last valid SlotIndex before instruction.
+ ///
+ /// \returns True if the DBG_LABEL instruction should be deleted.
+ bool handleDebugLabel(MachineInstr &MI, SlotIndex Idx);
+
/// Collect and erase all DBG_VALUE instructions, adding a UserValue def
/// for each instruction.
///
@@ -400,6 +439,7 @@ public:
void clear() {
MF = nullptr;
userValues.clear();
+ userLabels.clear();
virtRegToEqClass.clear();
userVarMap.clear();
// Make sure we call emitDebugValues if the machine function was modified.
@@ -445,13 +485,23 @@ static void printDebugLoc(const DebugLoc &DL, raw_ostream &CommentOS,
CommentOS << " ]";
}
-static void printExtendedName(raw_ostream &OS, const DILocalVariable *V,
+static void printExtendedName(raw_ostream &OS, const DINode *Node,
const DILocation *DL) {
- const LLVMContext &Ctx = V->getContext();
- StringRef Res = V->getName();
+ const LLVMContext &Ctx = Node->getContext();
+ StringRef Res;
+ unsigned Line;
+ if (const auto *V = dyn_cast<const DILocalVariable>(Node)) {
+ Res = V->getName();
+ Line = V->getLine();
+ } else if (const auto *L = dyn_cast<const DILabel>(Node)) {
+ Res = L->getName();
+ Line = L->getLine();
+ }
+
if (!Res.empty())
- OS << Res << "," << V->getLine();
- if (auto *InlinedAt = DL->getInlinedAt()) {
+ OS << Res << "," << Line;
+ auto *InlinedAt = DL ? DL->getInlinedAt() : nullptr;
+ if (InlinedAt) {
if (DebugLoc InlinedAtDL = InlinedAt) {
OS << " @[";
printDebugLoc(InlinedAtDL, OS, Ctx);
@@ -461,9 +511,8 @@ static void printExtendedName(raw_ostream &OS, const DILocalVariable *V,
}
void UserValue::print(raw_ostream &OS, const TargetRegisterInfo *TRI) {
- auto *DV = cast<DILocalVariable>(Variable);
OS << "!\"";
- printExtendedName(OS, DV, dl);
+ printExtendedName(OS, Variable, dl);
OS << "\"\t";
for (LocMap::const_iterator I = locInts.begin(); I.valid(); ++I) {
@@ -483,10 +532,22 @@ void UserValue::print(raw_ostream &OS, const TargetRegisterInfo *TRI) {
OS << '\n';
}
+void UserLabel::print(raw_ostream &OS, const TargetRegisterInfo *TRI) {
+ OS << "!\"";
+ printExtendedName(OS, Label, dl);
+
+ OS << "\"\t";
+ OS << loc;
+ OS << '\n';
+}
+
void LDVImpl::print(raw_ostream &OS) {
OS << "********** DEBUG VARIABLES **********\n";
- for (unsigned i = 0, e = userValues.size(); i != e; ++i)
- userValues[i]->print(OS, TRI);
+ for (auto &userValue : userValues)
+ userValue->print(OS, TRI);
+ OS << "********** DEBUG LABELS **********\n";
+ for (auto &userLabel : userLabels)
+ userLabel->print(OS, TRI);
}
#endif
@@ -556,7 +617,7 @@ bool LDVImpl::handleDebugValue(MachineInstr &MI, SlotIndex Idx) {
} else {
// The DBG_VALUE is only valid if either Reg is live out from Idx, or Reg
// is defined dead at Idx (where Idx is the slot index for the instruction
- // preceeding the DBG_VALUE).
+ // preceding the DBG_VALUE).
const LiveInterval &LI = LIS->getInterval(Reg);
LiveQueryResult LRQ = LI.Query(Idx);
if (!LRQ.valueOutOrDead()) {
@@ -587,6 +648,29 @@ bool LDVImpl::handleDebugValue(MachineInstr &MI, SlotIndex Idx) {
return true;
}
+bool LDVImpl::handleDebugLabel(MachineInstr &MI, SlotIndex Idx) {
+ // DBG_LABEL label
+ if (MI.getNumOperands() != 1 || !MI.getOperand(0).isMetadata()) {
+ LLVM_DEBUG(dbgs() << "Can't handle " << MI);
+ return false;
+ }
+
+ // Get or create the UserLabel for label here.
+ const DILabel *Label = MI.getDebugLabel();
+ const DebugLoc &DL = MI.getDebugLoc();
+ bool Found = false;
+ for (auto const &L : userLabels) {
+ if (L->match(Label, DL->getInlinedAt(), Idx)) {
+ Found = true;
+ break;
+ }
+ }
+ if (!Found)
+ userLabels.push_back(llvm::make_unique<UserLabel>(Label, DL, Idx));
+
+ return true;
+}
+
bool LDVImpl::collectDebugValues(MachineFunction &mf) {
bool Changed = false;
for (MachineFunction::iterator MFI = mf.begin(), MFE = mf.end(); MFI != MFE;
@@ -610,7 +694,8 @@ bool LDVImpl::collectDebugValues(MachineFunction &mf) {
do {
// Only handle DBG_VALUE in handleDebugValue(). Skip all other
// kinds of debug instructions.
- if (MBBI->isDebugValue() && handleDebugValue(*MBBI, Idx)) {
+ if ((MBBI->isDebugValue() && handleDebugValue(*MBBI, Idx)) ||
+ (MBBI->isDebugLabel() && handleDebugLabel(*MBBI, Idx))) {
MBBI = MBB->erase(MBBI);
Changed = true;
} else
@@ -655,10 +740,8 @@ void UserValue::extendDef(SlotIndex Idx, DbgValueLocation Loc, LiveRange *LR,
}
// Limited by the next def.
- if (I.valid() && I.start() < Stop) {
+ if (I.valid() && I.start() < Stop)
Stop = I.start();
- ToEnd = false;
- }
// Limited by VNI's live range.
else if (!ToEnd && Kills)
Kills->push_back(Stop);
@@ -826,8 +909,7 @@ void UserValue::computeIntervals(MachineRegisterInfo &MRI,
++I;
// If the interval also overlaps the start of the "next" (i.e.
- // current) range create a new interval for the remainder (which
- // may be further trimmed).
+ // current) range create a new interval for the remainder
if (RStart < IStop)
I.insert(RStart, IStop, Loc);
}
@@ -837,13 +919,6 @@ void UserValue::computeIntervals(MachineRegisterInfo &MRI,
if (!I.valid())
return;
- if (I.start() < RStart) {
- // Interval start overlaps range - trim to the scope range.
- I.setStartUnchecked(RStart);
- // Remember that this interval was trimmed.
- trimmedDefs.insert(RStart);
- }
-
// The end of a lexical scope range is the last instruction in the
// range. To convert to an interval we need the index of the
// instruction after it.
@@ -1227,11 +1302,13 @@ void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex StartIdx,
// that the original virtual register was a pointer. Also, add the stack slot
// offset for the spilled register to the expression.
const DIExpression *Expr = Expression;
+ uint8_t DIExprFlags = DIExpression::ApplyOffset;
bool IsIndirect = Loc.wasIndirect();
if (Spilled) {
- auto Deref = IsIndirect ? DIExpression::WithDeref : DIExpression::NoDeref;
+ if (IsIndirect)
+ DIExprFlags |= DIExpression::DerefAfter;
Expr =
- DIExpression::prepend(Expr, DIExpression::NoDeref, SpillOffset, Deref);
+ DIExpression::prepend(Expr, DIExprFlags, SpillOffset);
IsIndirect = true;
}
@@ -1247,6 +1324,15 @@ void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex StartIdx,
} while (I != MBB->end());
}
+void UserLabel::insertDebugLabel(MachineBasicBlock *MBB, SlotIndex Idx,
+ LiveIntervals &LIS,
+ const TargetInstrInfo &TII) {
+ MachineBasicBlock::iterator I = findInsertLocation(MBB, Idx, LIS);
+ ++NumInsertedDebugLabels;
+ BuildMI(*MBB, I, getDebugLoc(), TII.get(TargetOpcode::DBG_LABEL))
+ .addMetadata(Label);
+}
+
void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS,
const TargetInstrInfo &TII,
const TargetRegisterInfo &TRI,
@@ -1262,12 +1348,6 @@ void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS,
bool Spilled = SpillIt != SpillOffsets.end();
unsigned SpillOffset = Spilled ? SpillIt->second : 0;
- // If the interval start was trimmed to the lexical scope insert the
- // DBG_VALUE at the previous index (otherwise it appears after the
- // first instruction in the range).
- if (trimmedDefs.count(Start))
- Start = Start.getPrevIndex();
-
LLVM_DEBUG(dbgs() << "\t[" << Start << ';' << Stop << "):" << Loc.locNo());
MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start)->getIterator();
SlotIndex MBBEnd = LIS.getMBBEndIdx(&*MBB);
@@ -1295,16 +1375,31 @@ void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS,
}
}
+void UserLabel::emitDebugLabel(LiveIntervals &LIS, const TargetInstrInfo &TII) {
+ LLVM_DEBUG(dbgs() << "\t" << loc);
+ MachineFunction::iterator MBB = LIS.getMBBFromIndex(loc)->getIterator();
+
+ LLVM_DEBUG(dbgs() << ' ' << printMBBReference(*MBB));
+ insertDebugLabel(&*MBB, loc, LIS, TII);
+
+ LLVM_DEBUG(dbgs() << '\n');
+}
+
void LDVImpl::emitDebugValues(VirtRegMap *VRM) {
LLVM_DEBUG(dbgs() << "********** EMITTING LIVE DEBUG VARIABLES **********\n");
if (!MF)
return;
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
SpillOffsetMap SpillOffsets;
- for (unsigned i = 0, e = userValues.size(); i != e; ++i) {
- LLVM_DEBUG(userValues[i]->print(dbgs(), TRI));
- userValues[i]->rewriteLocations(*VRM, *MF, *TII, *TRI, SpillOffsets);
- userValues[i]->emitDebugValues(VRM, *LIS, *TII, *TRI, SpillOffsets);
+ for (auto &userValue : userValues) {
+ LLVM_DEBUG(userValue->print(dbgs(), TRI));
+ userValue->rewriteLocations(*VRM, *MF, *TII, *TRI, SpillOffsets);
+ userValue->emitDebugValues(VRM, *LIS, *TII, *TRI, SpillOffsets);
+ }
+ LLVM_DEBUG(dbgs() << "********** EMITTING LIVE DEBUG LABELS **********\n");
+ for (auto &userLabel : userLabels) {
+ LLVM_DEBUG(userLabel->print(dbgs(), TRI));
+ userLabel->emitDebugLabel(*LIS, *TII);
}
EmitDone = true;
}
diff --git a/lib/CodeGen/LiveDebugVariables.h b/lib/CodeGen/LiveDebugVariables.h
index 0060399c2b04..0cbe10c6a422 100644
--- a/lib/CodeGen/LiveDebugVariables.h
+++ b/lib/CodeGen/LiveDebugVariables.h
@@ -1,9 +1,8 @@
//===- LiveDebugVariables.h - Tracking debug info variables -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp
index 2340b6abd87c..70b2a77fe800 100644
--- a/lib/CodeGen/LiveInterval.cpp
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -1,9 +1,8 @@
//===- LiveInterval.cpp - Live Interval Representation --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -297,9 +296,7 @@ private:
iterator find(SlotIndex Pos) { return LR->find(Pos); }
- iterator findInsertPos(Segment S) {
- return std::upper_bound(LR->begin(), LR->end(), S.start);
- }
+ iterator findInsertPos(Segment S) { return llvm::upper_bound(*LR, S.start); }
};
//===----------------------------------------------------------------------===//
@@ -880,8 +877,53 @@ void LiveInterval::clearSubRanges() {
SubRanges = nullptr;
}
-void LiveInterval::refineSubRanges(BumpPtrAllocator &Allocator,
- LaneBitmask LaneMask, std::function<void(LiveInterval::SubRange&)> Apply) {
+/// For each VNI in \p SR, check whether or not that value defines part
+/// of the mask describe by \p LaneMask and if not, remove that value
+/// from \p SR.
+static void stripValuesNotDefiningMask(unsigned Reg, LiveInterval::SubRange &SR,
+ LaneBitmask LaneMask,
+ const SlotIndexes &Indexes,
+ const TargetRegisterInfo &TRI) {
+ // Phys reg should not be tracked at subreg level.
+ // Same for noreg (Reg == 0).
+ if (!TargetRegisterInfo::isVirtualRegister(Reg) || !Reg)
+ return;
+ // Remove the values that don't define those lanes.
+ SmallVector<VNInfo *, 8> ToBeRemoved;
+ for (VNInfo *VNI : SR.valnos) {
+ if (VNI->isUnused())
+ continue;
+ // PHI definitions don't have MI attached, so there is nothing
+ // we can use to strip the VNI.
+ if (VNI->isPHIDef())
+ continue;
+ const MachineInstr *MI = Indexes.getInstructionFromIndex(VNI->def);
+ assert(MI && "Cannot find the definition of a value");
+ bool hasDef = false;
+ for (ConstMIBundleOperands MOI(*MI); MOI.isValid(); ++MOI) {
+ if (!MOI->isReg() || !MOI->isDef())
+ continue;
+ if (MOI->getReg() != Reg)
+ continue;
+ if ((TRI.getSubRegIndexLaneMask(MOI->getSubReg()) & LaneMask).none())
+ continue;
+ hasDef = true;
+ break;
+ }
+
+ if (!hasDef)
+ ToBeRemoved.push_back(VNI);
+ }
+ for (VNInfo *VNI : ToBeRemoved)
+ SR.removeValNo(VNI);
+
+ assert(!SR.empty() && "At least one value should be defined by this mask");
+}
+
+void LiveInterval::refineSubRanges(
+ BumpPtrAllocator &Allocator, LaneBitmask LaneMask,
+ std::function<void(LiveInterval::SubRange &)> Apply,
+ const SlotIndexes &Indexes, const TargetRegisterInfo &TRI) {
LaneBitmask ToApply = LaneMask;
for (SubRange &SR : subranges()) {
LaneBitmask SRMask = SR.LaneMask;
@@ -899,6 +941,10 @@ void LiveInterval::refineSubRanges(BumpPtrAllocator &Allocator,
SR.LaneMask = SRMask & ~Matching;
// Create a new subrange for the matching part
MatchingRange = createSubRangeFrom(Allocator, Matching, SR);
+ // Now that the subrange is split in half, make sure we
+ // only keep in the subranges the VNIs that touch the related half.
+ stripValuesNotDefiningMask(reg, *MatchingRange, Matching, Indexes, TRI);
+ stripValuesNotDefiningMask(reg, SR, SR.LaneMask, Indexes, TRI);
}
Apply(*MatchingRange);
ToApply &= ~Matching;
diff --git a/lib/CodeGen/LiveIntervalUnion.cpp b/lib/CodeGen/LiveIntervalUnion.cpp
index 36428e0335f9..43fa8f2d7157 100644
--- a/lib/CodeGen/LiveIntervalUnion.cpp
+++ b/lib/CodeGen/LiveIntervalUnion.cpp
@@ -1,9 +1,8 @@
//===- LiveIntervalUnion.cpp - Live interval union data structure ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/LiveIntervals.cpp b/lib/CodeGen/LiveIntervals.cpp
index 471775f8706b..aa85569063b3 100644
--- a/lib/CodeGen/LiveIntervals.cpp
+++ b/lib/CodeGen/LiveIntervals.cpp
@@ -1,9 +1,8 @@
//===- LiveIntervals.cpp - Live Interval Analysis -------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -901,8 +900,7 @@ bool LiveIntervals::checkRegMaskInterference(LiveInterval &LI,
// We are going to enumerate all the register mask slots contained in LI.
// Start with a binary search of RegMaskSlots to find a starting point.
- ArrayRef<SlotIndex>::iterator SlotI =
- std::lower_bound(Slots.begin(), Slots.end(), LiveI->start);
+ ArrayRef<SlotIndex>::iterator SlotI = llvm::lower_bound(Slots, LiveI->start);
ArrayRef<SlotIndex>::iterator SlotE = Slots.end();
// No slots in range, LI begins after the last call.
@@ -1371,8 +1369,7 @@ private:
void updateRegMaskSlots() {
SmallVectorImpl<SlotIndex>::iterator RI =
- std::lower_bound(LIS.RegMaskSlots.begin(), LIS.RegMaskSlots.end(),
- OldIdx);
+ llvm::lower_bound(LIS.RegMaskSlots, OldIdx);
assert(RI != LIS.RegMaskSlots.end() && *RI == OldIdx.getRegSlot() &&
"No RegMask at OldIdx.");
*RI = NewIdx.getRegSlot();
diff --git a/lib/CodeGen/LivePhysRegs.cpp b/lib/CodeGen/LivePhysRegs.cpp
index 619643acb6d3..cd3d248ac878 100644
--- a/lib/CodeGen/LivePhysRegs.cpp
+++ b/lib/CodeGen/LivePhysRegs.cpp
@@ -1,9 +1,8 @@
//===--- LivePhysRegs.cpp - Live Physical Register Set --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/LiveRangeCalc.cpp b/lib/CodeGen/LiveRangeCalc.cpp
index 70e135ab1aff..d670f28df6ba 100644
--- a/lib/CodeGen/LiveRangeCalc.cpp
+++ b/lib/CodeGen/LiveRangeCalc.cpp
@@ -1,9 +1,8 @@
//===- LiveRangeCalc.cpp - Calculate live ranges --------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -96,10 +95,11 @@ void LiveRangeCalc::calculate(LiveInterval &LI, bool TrackSubRegs) {
}
LI.refineSubRanges(*Alloc, SubMask,
- [&MO, this](LiveInterval::SubRange &SR) {
- if (MO.isDef())
- createDeadDef(*Indexes, *Alloc, SR, MO);
- });
+ [&MO, this](LiveInterval::SubRange &SR) {
+ if (MO.isDef())
+ createDeadDef(*Indexes, *Alloc, SR, MO);
+ },
+ *Indexes, TRI);
}
// Create the def in the main liverange. We do not have to do this if
diff --git a/lib/CodeGen/LiveRangeCalc.h b/lib/CodeGen/LiveRangeCalc.h
index 9f226b154a67..11aea5a3b016 100644
--- a/lib/CodeGen/LiveRangeCalc.h
+++ b/lib/CodeGen/LiveRangeCalc.h
@@ -1,9 +1,8 @@
//===- LiveRangeCalc.h - Calculate live ranges ------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp
index 8dfe8b68c3af..882e562ba95c 100644
--- a/lib/CodeGen/LiveRangeEdit.cpp
+++ b/lib/CodeGen/LiveRangeEdit.cpp
@@ -1,9 +1,8 @@
//===-- LiveRangeEdit.cpp - Basic tools for editing a register live range -===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -232,6 +231,8 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI,
return false;
LLVM_DEBUG(dbgs() << " folded: " << *FoldMI);
LIS.ReplaceMachineInstrInMaps(*UseMI, *FoldMI);
+ if (UseMI->isCall())
+ UseMI->getMF()->updateCallSiteInfo(UseMI, FoldMI);
UseMI->eraseFromParent();
DefMI->addRegisterDead(LI->reg, nullptr);
Dead.push_back(DefMI);
diff --git a/lib/CodeGen/LiveRangeShrink.cpp b/lib/CodeGen/LiveRangeShrink.cpp
index f75d513c89f5..8818f1ce0ad9 100644
--- a/lib/CodeGen/LiveRangeShrink.cpp
+++ b/lib/CodeGen/LiveRangeShrink.cpp
@@ -1,9 +1,8 @@
//===- LiveRangeShrink.cpp - Move instructions to shrink live range -------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
///===---------------------------------------------------------------------===//
///
diff --git a/lib/CodeGen/LiveRangeUtils.h b/lib/CodeGen/LiveRangeUtils.h
index bd57609c3d84..0e6bfeb0d4a5 100644
--- a/lib/CodeGen/LiveRangeUtils.h
+++ b/lib/CodeGen/LiveRangeUtils.h
@@ -1,9 +1,8 @@
//===-- LiveRangeUtils.h - Live Range modification utilities ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/LiveRegMatrix.cpp b/lib/CodeGen/LiveRegMatrix.cpp
index e72977b02675..ce99e5535c25 100644
--- a/lib/CodeGen/LiveRegMatrix.cpp
+++ b/lib/CodeGen/LiveRegMatrix.cpp
@@ -1,9 +1,8 @@
//===- LiveRegMatrix.cpp - Track register interference --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/LiveRegUnits.cpp b/lib/CodeGen/LiveRegUnits.cpp
index c22681385492..6afb7fb7aa11 100644
--- a/lib/CodeGen/LiveRegUnits.cpp
+++ b/lib/CodeGen/LiveRegUnits.cpp
@@ -1,9 +1,8 @@
//===- LiveRegUnits.cpp - Register Unit Set -------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -126,13 +125,15 @@ void LiveRegUnits::addPristines(const MachineFunction &MF) {
void LiveRegUnits::addLiveOuts(const MachineBasicBlock &MBB) {
const MachineFunction &MF = *MBB.getParent();
- if (!MBB.succ_empty()) {
- addPristines(MF);
- // To get the live-outs we simply merge the live-ins of all successors.
- for (const MachineBasicBlock *Succ : MBB.successors())
- addBlockLiveIns(*this, *Succ);
- } else if (MBB.isReturnBlock()) {
- // For the return block: Add all callee saved registers.
+
+ addPristines(MF);
+
+ // To get the live-outs we simply merge the live-ins of all successors.
+ for (const MachineBasicBlock *Succ : MBB.successors())
+ addBlockLiveIns(*this, *Succ);
+
+ // For the return block: Add all callee saved registers.
+ if (MBB.isReturnBlock()) {
const MachineFrameInfo &MFI = MF.getFrameInfo();
if (MFI.isCalleeSavedInfoValid())
addCalleeSavedRegs(*this, MF);
diff --git a/lib/CodeGen/LiveStacks.cpp b/lib/CodeGen/LiveStacks.cpp
index 80ecfdb7a507..f55977d72723 100644
--- a/lib/CodeGen/LiveStacks.cpp
+++ b/lib/CodeGen/LiveStacks.cpp
@@ -1,9 +1,8 @@
//===-- LiveStacks.cpp - Live Stack Slot Analysis -------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
index 0b92eab83806..aaff982ef1b0 100644
--- a/lib/CodeGen/LiveVariables.cpp
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -1,9 +1,8 @@
//===-- LiveVariables.cpp - Live Variable Analysis for Machine Code -------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -401,7 +400,7 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
true/*IsImp*/, true/*IsKill*/));
else {
MachineOperand *MO =
- LastRefOrPartRef->findRegisterDefOperand(Reg, false, TRI);
+ LastRefOrPartRef->findRegisterDefOperand(Reg, false, false, TRI);
bool NeedEC = MO->isEarlyClobber() && MO->getReg() != Reg;
// If the last reference is the last def, then it's not used at all.
// That is, unless we are currently processing the last reference itself.
diff --git a/lib/CodeGen/LocalStackSlotAllocation.cpp b/lib/CodeGen/LocalStackSlotAllocation.cpp
index 795028e97929..b14d76a585f7 100644
--- a/lib/CodeGen/LocalStackSlotAllocation.cpp
+++ b/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -1,9 +1,8 @@
//===- LocalStackSlotAllocation.cpp - Pre-allocate locals to stack slots --===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -200,19 +199,27 @@ void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
// Make sure that the stack protector comes before the local variables on the
// stack.
SmallSet<int, 16> ProtectedObjs;
- if (MFI.getStackProtectorIndex() >= 0) {
+ if (MFI.hasStackProtectorIndex()) {
+ int StackProtectorFI = MFI.getStackProtectorIndex();
+
+ // We need to make sure we didn't pre-allocate the stack protector when
+ // doing this.
+ // If we already have a stack protector, this will re-assign it to a slot
+ // that is **not** covering the protected objects.
+ assert(!MFI.isObjectPreAllocated(StackProtectorFI) &&
+ "Stack protector pre-allocated in LocalStackSlotAllocation");
+
StackObjSet LargeArrayObjs;
StackObjSet SmallArrayObjs;
StackObjSet AddrOfObjs;
- AdjustStackOffset(MFI, MFI.getStackProtectorIndex(), Offset,
- StackGrowsDown, MaxAlign);
+ AdjustStackOffset(MFI, StackProtectorFI, Offset, StackGrowsDown, MaxAlign);
// Assign large stack objects first.
for (unsigned i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) {
if (MFI.isDeadObjectIndex(i))
continue;
- if (MFI.getStackProtectorIndex() == (int)i)
+ if (StackProtectorFI == (int)i)
continue;
switch (MFI.getObjectSSPLayout(i)) {
diff --git a/lib/CodeGen/LoopTraversal.cpp b/lib/CodeGen/LoopTraversal.cpp
index a02d10e09d7d..9490dfc40a82 100644
--- a/lib/CodeGen/LoopTraversal.cpp
+++ b/lib/CodeGen/LoopTraversal.cpp
@@ -1,9 +1,8 @@
//===- LoopTraversal.cpp - Optimal basic block traversal order --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/LowLevelType.cpp b/lib/CodeGen/LowLevelType.cpp
index 1c682e72fa49..ca0daa14fedf 100644
--- a/lib/CodeGen/LowLevelType.cpp
+++ b/lib/CodeGen/LowLevelType.cpp
@@ -1,9 +1,8 @@
//===-- llvm/CodeGen/LowLevelType.cpp -------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/LowerEmuTLS.cpp b/lib/CodeGen/LowerEmuTLS.cpp
index 36c1d358a9bd..c8cf6abda4fc 100644
--- a/lib/CodeGen/LowerEmuTLS.cpp
+++ b/lib/CodeGen/LowerEmuTLS.cpp
@@ -1,9 +1,8 @@
//===- LowerEmuTLS.cpp - Add __emutls_[vt].* variables --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/MIRCanonicalizerPass.cpp b/lib/CodeGen/MIRCanonicalizerPass.cpp
index f17c23619ed5..f49bc854e23f 100644
--- a/lib/CodeGen/MIRCanonicalizerPass.cpp
+++ b/lib/CodeGen/MIRCanonicalizerPass.cpp
@@ -1,9 +1,8 @@
//===-------------- MIRCanonicalizer.cpp - MIR Canonicalizer --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -105,6 +104,8 @@ INITIALIZE_PASS_END(MIRCanonicalizer, "mir-canonicalizer",
"Rename Register Operands Canonically", false, false)
static std::vector<MachineBasicBlock *> GetRPOList(MachineFunction &MF) {
+ if (MF.empty())
+ return {};
ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin());
std::vector<MachineBasicBlock *> RPOList;
for (auto MBB : RPOT) {
@@ -179,6 +180,8 @@ static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount,
}
std::map<MachineInstr *, std::vector<MachineInstr *>> MultiUsers;
+ std::map<unsigned, MachineInstr *> MultiUserLookup;
+ unsigned UseToBringDefCloserToCount = 0;
std::vector<MachineInstr *> PseudoIdempotentInstructions;
std::vector<unsigned> PhysRegDefs;
for (auto *II : Instructions) {
@@ -254,6 +257,7 @@ static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount,
if (Delta < Distance) {
Distance = Delta;
UseToBringDefCloserTo = UseInst;
+ MultiUserLookup[UseToBringDefCloserToCount++] = UseToBringDefCloserTo;
}
}
@@ -293,11 +297,11 @@ static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount,
}
// Sort the defs for users of multiple defs lexographically.
- for (const auto &E : MultiUsers) {
+ for (const auto &E : MultiUserLookup) {
auto UseI =
std::find_if(MBB->instr_begin(), MBB->instr_end(),
- [&](MachineInstr &MI) -> bool { return &MI == E.first; });
+ [&](MachineInstr &MI) -> bool { return &MI == E.second; });
if (UseI == MBB->instr_end())
continue;
@@ -305,7 +309,8 @@ static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount,
LLVM_DEBUG(
dbgs() << "Rescheduling Multi-Use Instructions Lexographically.";);
Changed |= rescheduleLexographically(
- E.second, MBB, [&]() -> MachineBasicBlock::iterator { return UseI; });
+ MultiUsers[E.second], MBB,
+ [&]() -> MachineBasicBlock::iterator { return UseI; });
}
PseudoIdempotentInstCount = PseudoIdempotentInstructions.size();
@@ -342,15 +347,23 @@ static bool propagateLocalCopies(MachineBasicBlock *MBB) {
continue;
if (!TargetRegisterInfo::isVirtualRegister(Src))
continue;
+ // Not folding COPY instructions if regbankselect has not set the RCs.
+ // Why are we only considering Register Classes? Because the verifier
+ // sometimes gets upset if the register classes don't match even if the
+ // types do. A future patch might add COPY folding for matching types in
+ // pre-registerbankselect code.
+ if (!MRI.getRegClassOrNull(Dst))
+ continue;
if (MRI.getRegClass(Dst) != MRI.getRegClass(Src))
continue;
- for (auto UI = MRI.use_begin(Dst); UI != MRI.use_end(); ++UI) {
- MachineOperand *MO = &*UI;
+ std::vector<MachineOperand *> Uses;
+ for (auto UI = MRI.use_begin(Dst); UI != MRI.use_end(); ++UI)
+ Uses.push_back(&*UI);
+ for (auto *MO : Uses)
MO->setReg(Src);
- Changed = true;
- }
+ Changed = true;
MI->eraseFromParent();
}
@@ -474,18 +487,14 @@ class NamedVRegCursor {
unsigned virtualVRegNumber;
public:
- NamedVRegCursor(MachineRegisterInfo &MRI) : MRI(MRI) {
- unsigned VRegGapIndex = 0;
- const unsigned VR_GAP = (++VRegGapIndex * 1000);
-
- unsigned I = MRI.createIncompleteVirtualRegister();
- const unsigned E = (((I + VR_GAP) / VR_GAP) + 1) * VR_GAP;
-
- virtualVRegNumber = E;
- }
+ NamedVRegCursor(MachineRegisterInfo &MRI) : MRI(MRI), virtualVRegNumber(0) {}
void SkipVRegs() {
unsigned VRegGapIndex = 1;
+ if (!virtualVRegNumber) {
+ VRegGapIndex = 0;
+ virtualVRegNumber = MRI.createIncompleteVirtualRegister();
+ }
const unsigned VR_GAP = (++VRegGapIndex * 1000);
unsigned I = virtualVRegNumber;
@@ -501,14 +510,17 @@ public:
return virtualVRegNumber;
}
- unsigned createVirtualRegister(const TargetRegisterClass *RC) {
+ unsigned createVirtualRegister(unsigned VReg) {
+ if (!virtualVRegNumber)
+ SkipVRegs();
std::string S;
raw_string_ostream OS(S);
OS << "namedVReg" << (virtualVRegNumber & ~0x80000000);
OS.flush();
virtualVRegNumber++;
-
- return MRI.createVirtualRegister(RC, OS.str());
+ if (auto RC = MRI.getRegClassOrNull(VReg))
+ return MRI.createVirtualRegister(RC, OS.str());
+ return MRI.createGenericVirtualRegister(MRI.getType(VReg), OS.str());
}
};
} // namespace
@@ -558,7 +570,7 @@ GetVRegRenameMap(const std::vector<TypedVReg> &VRegs,
continue;
}
- auto Rename = NVC.createVirtualRegister(MRI.getRegClass(Reg));
+ auto Rename = NVC.createVirtualRegister(Reg);
if (VRegRenameMap.find(Reg) == VRegRenameMap.end()) {
LLVM_DEBUG(dbgs() << "Mapping vreg ";);
@@ -735,14 +747,15 @@ static bool runOnBasicBlock(MachineBasicBlock *MBB,
// of the MachineBasicBlock so that they are named in the order that we sorted
// them alphabetically. Eventually we wont need SkipVRegs because we will use
// named vregs instead.
- NVC.SkipVRegs();
+ if (IdempotentInstCount)
+ NVC.SkipVRegs();
auto MII = MBB->begin();
for (unsigned i = 0; i < IdempotentInstCount && MII != MBB->end(); ++i) {
MachineInstr &MI = *MII++;
Changed = true;
unsigned vRegToRename = MI.getOperand(0).getReg();
- auto Rename = NVC.createVirtualRegister(MRI.getRegClass(vRegToRename));
+ auto Rename = NVC.createVirtualRegister(vRegToRename);
std::vector<MachineOperand *> RenameMOs;
for (auto &MO : MRI.reg_operands(vRegToRename)) {
diff --git a/lib/CodeGen/MIRParser/MILexer.cpp b/lib/CodeGen/MIRParser/MILexer.cpp
index 265877c2f5b4..4899bd3f5811 100644
--- a/lib/CodeGen/MIRParser/MILexer.cpp
+++ b/lib/CodeGen/MIRParser/MILexer.cpp
@@ -1,9 +1,8 @@
//===- MILexer.cpp - Machine instructions lexer implementation ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -205,6 +204,7 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
.Case("nuw" , MIToken::kw_nuw)
.Case("nsw" , MIToken::kw_nsw)
.Case("exact" , MIToken::kw_exact)
+ .Case("fpexcept", MIToken::kw_fpexcept)
.Case("debug-location", MIToken::kw_debug_location)
.Case("same_value", MIToken::kw_cfi_same_value)
.Case("offset", MIToken::kw_cfi_offset)
diff --git a/lib/CodeGen/MIRParser/MILexer.h b/lib/CodeGen/MIRParser/MILexer.h
index ceff79087d81..0fe3f9f706db 100644
--- a/lib/CodeGen/MIRParser/MILexer.h
+++ b/lib/CodeGen/MIRParser/MILexer.h
@@ -1,9 +1,8 @@
//===- MILexer.h - Lexer for machine instructions ---------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -74,6 +73,7 @@ struct MIToken {
kw_nuw,
kw_nsw,
kw_exact,
+ kw_fpexcept,
kw_debug_location,
kw_cfi_same_value,
kw_cfi_offset,
diff --git a/lib/CodeGen/MIRParser/MIParser.cpp b/lib/CodeGen/MIRParser/MIParser.cpp
index 6f2d8bb53ac8..c0b800a0b870 100644
--- a/lib/CodeGen/MIRParser/MIParser.cpp
+++ b/lib/CodeGen/MIRParser/MIParser.cpp
@@ -1,9 +1,8 @@
//===- MIParser.cpp - Machine instructions parser implementation ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -11,7 +10,7 @@
//
//===----------------------------------------------------------------------===//
-#include "MIParser.h"
+#include "llvm/CodeGen/MIRParser/MIParser.h"
#include "MILexer.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/APSInt.h"
@@ -27,6 +26,8 @@
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/AsmParser/Parser.h"
#include "llvm/AsmParser/SlotMapping.h"
+#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
+#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/MIRPrinter.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -81,12 +82,242 @@
using namespace llvm;
+void PerTargetMIParsingState::setTarget(
+ const TargetSubtargetInfo &NewSubtarget) {
+
+ // If the subtarget changed, over conservatively assume everything is invalid.
+ if (&Subtarget == &NewSubtarget)
+ return;
+
+ Names2InstrOpCodes.clear();
+ Names2Regs.clear();
+ Names2RegMasks.clear();
+ Names2SubRegIndices.clear();
+ Names2TargetIndices.clear();
+ Names2DirectTargetFlags.clear();
+ Names2BitmaskTargetFlags.clear();
+ Names2MMOTargetFlags.clear();
+
+ initNames2RegClasses();
+ initNames2RegBanks();
+}
+
+void PerTargetMIParsingState::initNames2Regs() {
+ if (!Names2Regs.empty())
+ return;
+
+ // The '%noreg' register is the register 0.
+ Names2Regs.insert(std::make_pair("noreg", 0));
+ const auto *TRI = Subtarget.getRegisterInfo();
+ assert(TRI && "Expected target register info");
+
+ for (unsigned I = 0, E = TRI->getNumRegs(); I < E; ++I) {
+ bool WasInserted =
+ Names2Regs.insert(std::make_pair(StringRef(TRI->getName(I)).lower(), I))
+ .second;
+ (void)WasInserted;
+ assert(WasInserted && "Expected registers to be unique case-insensitively");
+ }
+}
+
+bool PerTargetMIParsingState::getRegisterByName(StringRef RegName,
+ unsigned &Reg) {
+ initNames2Regs();
+ auto RegInfo = Names2Regs.find(RegName);
+ if (RegInfo == Names2Regs.end())
+ return true;
+ Reg = RegInfo->getValue();
+ return false;
+}
+
+void PerTargetMIParsingState::initNames2InstrOpCodes() {
+ if (!Names2InstrOpCodes.empty())
+ return;
+ const auto *TII = Subtarget.getInstrInfo();
+ assert(TII && "Expected target instruction info");
+ for (unsigned I = 0, E = TII->getNumOpcodes(); I < E; ++I)
+ Names2InstrOpCodes.insert(std::make_pair(StringRef(TII->getName(I)), I));
+}
+
+bool PerTargetMIParsingState::parseInstrName(StringRef InstrName,
+ unsigned &OpCode) {
+ initNames2InstrOpCodes();
+ auto InstrInfo = Names2InstrOpCodes.find(InstrName);
+ if (InstrInfo == Names2InstrOpCodes.end())
+ return true;
+ OpCode = InstrInfo->getValue();
+ return false;
+}
+
+void PerTargetMIParsingState::initNames2RegMasks() {
+ if (!Names2RegMasks.empty())
+ return;
+ const auto *TRI = Subtarget.getRegisterInfo();
+ assert(TRI && "Expected target register info");
+ ArrayRef<const uint32_t *> RegMasks = TRI->getRegMasks();
+ ArrayRef<const char *> RegMaskNames = TRI->getRegMaskNames();
+ assert(RegMasks.size() == RegMaskNames.size());
+ for (size_t I = 0, E = RegMasks.size(); I < E; ++I)
+ Names2RegMasks.insert(
+ std::make_pair(StringRef(RegMaskNames[I]).lower(), RegMasks[I]));
+}
+
+const uint32_t *PerTargetMIParsingState::getRegMask(StringRef Identifier) {
+ initNames2RegMasks();
+ auto RegMaskInfo = Names2RegMasks.find(Identifier);
+ if (RegMaskInfo == Names2RegMasks.end())
+ return nullptr;
+ return RegMaskInfo->getValue();
+}
+
+void PerTargetMIParsingState::initNames2SubRegIndices() {
+ if (!Names2SubRegIndices.empty())
+ return;
+ const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
+ for (unsigned I = 1, E = TRI->getNumSubRegIndices(); I < E; ++I)
+ Names2SubRegIndices.insert(
+ std::make_pair(TRI->getSubRegIndexName(I), I));
+}
+
+unsigned PerTargetMIParsingState::getSubRegIndex(StringRef Name) {
+ initNames2SubRegIndices();
+ auto SubRegInfo = Names2SubRegIndices.find(Name);
+ if (SubRegInfo == Names2SubRegIndices.end())
+ return 0;
+ return SubRegInfo->getValue();
+}
+
+void PerTargetMIParsingState::initNames2TargetIndices() {
+ if (!Names2TargetIndices.empty())
+ return;
+ const auto *TII = Subtarget.getInstrInfo();
+ assert(TII && "Expected target instruction info");
+ auto Indices = TII->getSerializableTargetIndices();
+ for (const auto &I : Indices)
+ Names2TargetIndices.insert(std::make_pair(StringRef(I.second), I.first));
+}
+
+bool PerTargetMIParsingState::getTargetIndex(StringRef Name, int &Index) {
+ initNames2TargetIndices();
+ auto IndexInfo = Names2TargetIndices.find(Name);
+ if (IndexInfo == Names2TargetIndices.end())
+ return true;
+ Index = IndexInfo->second;
+ return false;
+}
+
+void PerTargetMIParsingState::initNames2DirectTargetFlags() {
+ if (!Names2DirectTargetFlags.empty())
+ return;
+
+ const auto *TII = Subtarget.getInstrInfo();
+ assert(TII && "Expected target instruction info");
+ auto Flags = TII->getSerializableDirectMachineOperandTargetFlags();
+ for (const auto &I : Flags)
+ Names2DirectTargetFlags.insert(
+ std::make_pair(StringRef(I.second), I.first));
+}
+
+bool PerTargetMIParsingState::getDirectTargetFlag(StringRef Name,
+ unsigned &Flag) {
+ initNames2DirectTargetFlags();
+ auto FlagInfo = Names2DirectTargetFlags.find(Name);
+ if (FlagInfo == Names2DirectTargetFlags.end())
+ return true;
+ Flag = FlagInfo->second;
+ return false;
+}
+
+void PerTargetMIParsingState::initNames2BitmaskTargetFlags() {
+ if (!Names2BitmaskTargetFlags.empty())
+ return;
+
+ const auto *TII = Subtarget.getInstrInfo();
+ assert(TII && "Expected target instruction info");
+ auto Flags = TII->getSerializableBitmaskMachineOperandTargetFlags();
+ for (const auto &I : Flags)
+ Names2BitmaskTargetFlags.insert(
+ std::make_pair(StringRef(I.second), I.first));
+}
+
+bool PerTargetMIParsingState::getBitmaskTargetFlag(StringRef Name,
+ unsigned &Flag) {
+ initNames2BitmaskTargetFlags();
+ auto FlagInfo = Names2BitmaskTargetFlags.find(Name);
+ if (FlagInfo == Names2BitmaskTargetFlags.end())
+ return true;
+ Flag = FlagInfo->second;
+ return false;
+}
+
+void PerTargetMIParsingState::initNames2MMOTargetFlags() {
+ if (!Names2MMOTargetFlags.empty())
+ return;
+
+ const auto *TII = Subtarget.getInstrInfo();
+ assert(TII && "Expected target instruction info");
+ auto Flags = TII->getSerializableMachineMemOperandTargetFlags();
+ for (const auto &I : Flags)
+ Names2MMOTargetFlags.insert(std::make_pair(StringRef(I.second), I.first));
+}
+
+bool PerTargetMIParsingState::getMMOTargetFlag(StringRef Name,
+ MachineMemOperand::Flags &Flag) {
+ initNames2MMOTargetFlags();
+ auto FlagInfo = Names2MMOTargetFlags.find(Name);
+ if (FlagInfo == Names2MMOTargetFlags.end())
+ return true;
+ Flag = FlagInfo->second;
+ return false;
+}
+
+void PerTargetMIParsingState::initNames2RegClasses() {
+ if (!Names2RegClasses.empty())
+ return;
+
+ const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
+ for (unsigned I = 0, E = TRI->getNumRegClasses(); I < E; ++I) {
+ const auto *RC = TRI->getRegClass(I);
+ Names2RegClasses.insert(
+ std::make_pair(StringRef(TRI->getRegClassName(RC)).lower(), RC));
+ }
+}
+
+void PerTargetMIParsingState::initNames2RegBanks() {
+ if (!Names2RegBanks.empty())
+ return;
+
+ const RegisterBankInfo *RBI = Subtarget.getRegBankInfo();
+ // If the target does not support GlobalISel, we may not have a
+ // register bank info.
+ if (!RBI)
+ return;
+
+ for (unsigned I = 0, E = RBI->getNumRegBanks(); I < E; ++I) {
+ const auto &RegBank = RBI->getRegBank(I);
+ Names2RegBanks.insert(
+ std::make_pair(StringRef(RegBank.getName()).lower(), &RegBank));
+ }
+}
+
+const TargetRegisterClass *
+PerTargetMIParsingState::getRegClass(StringRef Name) {
+ auto RegClassInfo = Names2RegClasses.find(Name);
+ if (RegClassInfo == Names2RegClasses.end())
+ return nullptr;
+ return RegClassInfo->getValue();
+}
+
+const RegisterBank *PerTargetMIParsingState::getRegBank(StringRef Name) {
+ auto RegBankInfo = Names2RegBanks.find(Name);
+ if (RegBankInfo == Names2RegBanks.end())
+ return nullptr;
+ return RegBankInfo->getValue();
+}
+
PerFunctionMIParsingState::PerFunctionMIParsingState(MachineFunction &MF,
- SourceMgr &SM, const SlotMapping &IRSlots,
- const Name2RegClassMap &Names2RegClasses,
- const Name2RegBankMap &Names2RegBanks)
- : MF(MF), SM(&SM), IRSlots(IRSlots), Names2RegClasses(Names2RegClasses),
- Names2RegBanks(Names2RegBanks) {
+ SourceMgr &SM, const SlotMapping &IRSlots, PerTargetMIParsingState &T)
+ : MF(MF), SM(&SM), IRSlots(IRSlots), Target(T) {
}
VRegInfo &PerFunctionMIParsingState::getVRegInfo(unsigned Num) {
@@ -137,26 +368,10 @@ class MIParser {
StringRef Source, CurrentSource;
MIToken Token;
PerFunctionMIParsingState &PFS;
- /// Maps from instruction names to op codes.
- StringMap<unsigned> Names2InstrOpCodes;
- /// Maps from register names to registers.
- StringMap<unsigned> Names2Regs;
- /// Maps from register mask names to register masks.
- StringMap<const uint32_t *> Names2RegMasks;
- /// Maps from subregister names to subregister indices.
- StringMap<unsigned> Names2SubRegIndices;
/// Maps from slot numbers to function's unnamed basic blocks.
DenseMap<unsigned, const BasicBlock *> Slots2BasicBlocks;
/// Maps from slot numbers to function's unnamed values.
DenseMap<unsigned, const Value *> Slots2Values;
- /// Maps from target index names to target indices.
- StringMap<int> Names2TargetIndices;
- /// Maps from direct target flag names to the direct target flag values.
- StringMap<unsigned> Names2DirectTargetFlags;
- /// Maps from direct target flag names to the bitmask target flag values.
- StringMap<unsigned> Names2BitmaskTargetFlags;
- /// Maps from MMO target flag names to MMO target flag values.
- StringMap<MachineMemOperand::Flags> Names2MMOTargetFlags;
public:
MIParser(PerFunctionMIParsingState &PFS, SMDiagnostic &Error,
@@ -281,12 +496,6 @@ private:
/// Otherwise return false.
bool consumeIfPresent(MIToken::TokenKind TokenKind);
- void initNames2InstrOpCodes();
-
- /// Try to convert an instruction name to an opcode. Return true if the
- /// instruction name is invalid.
- bool parseInstrName(StringRef InstrName, unsigned &OpCode);
-
bool parseInstruction(unsigned &OpCode, unsigned &Flags);
bool assignRegisterTies(MachineInstr &MI,
@@ -295,62 +504,11 @@ private:
bool verifyImplicitOperands(ArrayRef<ParsedMachineOperand> Operands,
const MCInstrDesc &MCID);
- void initNames2Regs();
-
- /// Try to convert a register name to a register number. Return true if the
- /// register name is invalid.
- bool getRegisterByName(StringRef RegName, unsigned &Reg);
-
- void initNames2RegMasks();
-
- /// Check if the given identifier is a name of a register mask.
- ///
- /// Return null if the identifier isn't a register mask.
- const uint32_t *getRegMask(StringRef Identifier);
-
- void initNames2SubRegIndices();
-
- /// Check if the given identifier is a name of a subregister index.
- ///
- /// Return 0 if the name isn't a subregister index class.
- unsigned getSubRegIndex(StringRef Name);
-
const BasicBlock *getIRBlock(unsigned Slot);
const BasicBlock *getIRBlock(unsigned Slot, const Function &F);
const Value *getIRValue(unsigned Slot);
- void initNames2TargetIndices();
-
- /// Try to convert a name of target index to the corresponding target index.
- ///
- /// Return true if the name isn't a name of a target index.
- bool getTargetIndex(StringRef Name, int &Index);
-
- void initNames2DirectTargetFlags();
-
- /// Try to convert a name of a direct target flag to the corresponding
- /// target flag.
- ///
- /// Return true if the name isn't a name of a direct flag.
- bool getDirectTargetFlag(StringRef Name, unsigned &Flag);
-
- void initNames2BitmaskTargetFlags();
-
- /// Try to convert a name of a bitmask target flag to the corresponding
- /// target flag.
- ///
- /// Return true if the name isn't a name of a bitmask target flag.
- bool getBitmaskTargetFlag(StringRef Name, unsigned &Flag);
-
- void initNames2MMOTargetFlags();
-
- /// Try to convert a name of a MachineMemOperand target flag to the
- /// corresponding target flag.
- ///
- /// Return true if the name isn't a name of a target MMO flag.
- bool getMMOTargetFlag(StringRef Name, MachineMemOperand::Flags &Flag);
-
/// Get or create an MCSymbol for a given name.
MCSymbol *getOrCreateMCSymbol(StringRef Name);
@@ -978,7 +1136,8 @@ bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) {
Token.is(MIToken::kw_reassoc) ||
Token.is(MIToken::kw_nuw) ||
Token.is(MIToken::kw_nsw) ||
- Token.is(MIToken::kw_exact)) {
+ Token.is(MIToken::kw_exact) ||
+ Token.is(MIToken::kw_fpexcept)) {
// Mine frame and fast math flags
if (Token.is(MIToken::kw_frame_setup))
Flags |= MachineInstr::FrameSetup;
@@ -1004,13 +1163,15 @@ bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) {
Flags |= MachineInstr::NoSWrap;
if (Token.is(MIToken::kw_exact))
Flags |= MachineInstr::IsExact;
+ if (Token.is(MIToken::kw_fpexcept))
+ Flags |= MachineInstr::FPExcept;
lex();
}
if (Token.isNot(MIToken::Identifier))
return error("expected a machine instruction");
StringRef InstrName = Token.stringValue();
- if (parseInstrName(InstrName, OpCode))
+ if (PFS.Target.parseInstrName(InstrName, OpCode))
return error(Twine("unknown machine instruction name '") + InstrName + "'");
lex();
return false;
@@ -1019,7 +1180,7 @@ bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) {
bool MIParser::parseNamedRegister(unsigned &Reg) {
assert(Token.is(MIToken::NamedRegister) && "Needs NamedRegister token");
StringRef Name = Token.stringValue();
- if (getRegisterByName(Name, Reg))
+ if (PFS.Target.getRegisterByName(Name, Reg))
return error(Twine("unknown register name '") + Name + "'");
return false;
}
@@ -1070,21 +1231,20 @@ bool MIParser::parseRegisterClassOrBank(VRegInfo &RegInfo) {
StringRef Name = Token.stringValue();
// Was it a register class?
- auto RCNameI = PFS.Names2RegClasses.find(Name);
- if (RCNameI != PFS.Names2RegClasses.end()) {
+ const TargetRegisterClass *RC = PFS.Target.getRegClass(Name);
+ if (RC) {
lex();
- const TargetRegisterClass &RC = *RCNameI->getValue();
switch (RegInfo.Kind) {
case VRegInfo::UNKNOWN:
case VRegInfo::NORMAL:
RegInfo.Kind = VRegInfo::NORMAL;
- if (RegInfo.Explicit && RegInfo.D.RC != &RC) {
+ if (RegInfo.Explicit && RegInfo.D.RC != RC) {
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
return error(Loc, Twine("conflicting register classes, previously: ") +
Twine(TRI.getRegClassName(RegInfo.D.RC)));
}
- RegInfo.D.RC = &RC;
+ RegInfo.D.RC = RC;
RegInfo.Explicit = true;
return false;
@@ -1098,10 +1258,9 @@ bool MIParser::parseRegisterClassOrBank(VRegInfo &RegInfo) {
// Should be a register bank or a generic register.
const RegisterBank *RegBank = nullptr;
if (Name != "_") {
- auto RBNameI = PFS.Names2RegBanks.find(Name);
- if (RBNameI == PFS.Names2RegBanks.end())
+ RegBank = PFS.Target.getRegBank(Name);
+ if (!RegBank)
return error(Loc, "expected '_', register class, or register bank name");
- RegBank = RBNameI->getValue();
}
lex();
@@ -1173,7 +1332,7 @@ bool MIParser::parseSubRegisterIndex(unsigned &SubReg) {
if (Token.isNot(MIToken::Identifier))
return error("expected a subregister index after '.'");
auto Name = Token.stringValue();
- SubReg = getSubRegIndex(Name);
+ SubReg = PFS.Target.getSubRegIndex(Name);
if (!SubReg)
return error(Twine("use of unknown subregister index '") + Name + "'");
lex();
@@ -1341,6 +1500,19 @@ bool MIParser::parseIRConstant(StringRef::iterator Loc, const Constant *&C) {
return false;
}
+// See LLT implemntation for bit size limits.
+static bool verifyScalarSize(uint64_t Size) {
+ return Size != 0 && isUInt<16>(Size);
+}
+
+static bool verifyVectorElementCount(uint64_t NumElts) {
+ return NumElts != 0 && isUInt<16>(NumElts);
+}
+
+static bool verifyAddrSpace(uint64_t AddrSpace) {
+ return isUInt<24>(AddrSpace);
+}
+
bool MIParser::parseLowLevelType(StringRef::iterator Loc, LLT &Ty) {
if (Token.range().front() == 's' || Token.range().front() == 'p') {
StringRef SizeStr = Token.range().drop_front();
@@ -1349,12 +1521,19 @@ bool MIParser::parseLowLevelType(StringRef::iterator Loc, LLT &Ty) {
}
if (Token.range().front() == 's') {
- Ty = LLT::scalar(APSInt(Token.range().drop_front()).getZExtValue());
+ auto ScalarSize = APSInt(Token.range().drop_front()).getZExtValue();
+ if (!verifyScalarSize(ScalarSize))
+ return error("invalid size for scalar type");
+
+ Ty = LLT::scalar(ScalarSize);
lex();
return false;
} else if (Token.range().front() == 'p') {
const DataLayout &DL = MF.getDataLayout();
- unsigned AS = APSInt(Token.range().drop_front()).getZExtValue();
+ uint64_t AS = APSInt(Token.range().drop_front()).getZExtValue();
+ if (!verifyAddrSpace(AS))
+ return error("invalid address space number");
+
Ty = LLT::pointer(AS, DL.getPointerSizeInBits(AS));
lex();
return false;
@@ -1369,6 +1548,9 @@ bool MIParser::parseLowLevelType(StringRef::iterator Loc, LLT &Ty) {
if (Token.isNot(MIToken::IntegerLiteral))
return error(Loc, "expected <M x sN> or <M x pA> for vector type");
uint64_t NumElements = Token.integerValue().getZExtValue();
+ if (!verifyVectorElementCount(NumElements))
+ return error("invalid number of vector elements");
+
lex();
if (Token.isNot(MIToken::Identifier) || Token.stringValue() != "x")
@@ -1381,11 +1563,17 @@ bool MIParser::parseLowLevelType(StringRef::iterator Loc, LLT &Ty) {
if (SizeStr.size() == 0 || !llvm::all_of(SizeStr, isdigit))
return error("expected integers after 's'/'p' type character");
- if (Token.range().front() == 's')
- Ty = LLT::scalar(APSInt(Token.range().drop_front()).getZExtValue());
- else if (Token.range().front() == 'p') {
+ if (Token.range().front() == 's') {
+ auto ScalarSize = APSInt(Token.range().drop_front()).getZExtValue();
+ if (!verifyScalarSize(ScalarSize))
+ return error("invalid size for scalar type");
+ Ty = LLT::scalar(ScalarSize);
+ } else if (Token.range().front() == 'p') {
const DataLayout &DL = MF.getDataLayout();
- unsigned AS = APSInt(Token.range().drop_front()).getZExtValue();
+ uint64_t AS = APSInt(Token.range().drop_front()).getZExtValue();
+ if (!verifyAddrSpace(AS))
+ return error("invalid address space number");
+
Ty = LLT::pointer(AS, DL.getPointerSizeInBits(AS));
} else
return error(Loc, "expected <M x sN> or <M x pA> for vector type");
@@ -1625,7 +1813,7 @@ bool MIParser::parseMCSymbolOperand(MachineOperand &Dest) {
bool MIParser::parseSubRegisterIndexOperand(MachineOperand &Dest) {
assert(Token.is(MIToken::SubRegisterIndex));
StringRef Name = Token.stringValue();
- unsigned SubRegIndex = getSubRegIndex(Token.stringValue());
+ unsigned SubRegIndex = PFS.Target.getSubRegIndex(Token.stringValue());
if (SubRegIndex == 0)
return error(Twine("unknown subregister index '") + Name + "'");
lex();
@@ -1669,6 +1857,11 @@ bool MIParser::parseDIExpression(MDNode *&Expr) {
Elements.push_back(Op);
continue;
}
+ if (unsigned Enc = dwarf::getAttributeEncoding(Token.stringValue())) {
+ lex();
+ Elements.push_back(Enc);
+ continue;
+ }
return error(Twine("invalid DWARF op '") + Token.stringValue() + "'");
}
@@ -2100,7 +2293,7 @@ bool MIParser::parseTargetIndexOperand(MachineOperand &Dest) {
if (Token.isNot(MIToken::Identifier))
return error("expected the name of the target index");
int Index = 0;
- if (getTargetIndex(Token.stringValue(), Index))
+ if (PFS.Target.getTargetIndex(Token.stringValue(), Index))
return error("use of undefined target index '" + Token.stringValue() + "'");
lex();
if (expectAndConsume(MIToken::rparen))
@@ -2242,7 +2435,7 @@ bool MIParser::parseMachineOperand(MachineOperand &Dest,
case MIToken::Error:
return true;
case MIToken::Identifier:
- if (const auto *RegMask = getRegMask(Token.stringValue())) {
+ if (const auto *RegMask = PFS.Target.getRegMask(Token.stringValue())) {
Dest = MachineOperand::CreateRegMask(RegMask);
lex();
break;
@@ -2268,8 +2461,8 @@ bool MIParser::parseMachineOperandAndTargetFlags(
return true;
if (Token.isNot(MIToken::Identifier))
return error("expected the name of the target flag");
- if (getDirectTargetFlag(Token.stringValue(), TF)) {
- if (getBitmaskTargetFlag(Token.stringValue(), TF))
+ if (PFS.Target.getDirectTargetFlag(Token.stringValue(), TF)) {
+ if (PFS.Target.getBitmaskTargetFlag(Token.stringValue(), TF))
return error("use of undefined target flag '" + Token.stringValue() +
"'");
}
@@ -2279,7 +2472,7 @@ bool MIParser::parseMachineOperandAndTargetFlags(
if (Token.isNot(MIToken::Identifier))
return error("expected the name of the target flag");
unsigned BitFlag = 0;
- if (getBitmaskTargetFlag(Token.stringValue(), BitFlag))
+ if (PFS.Target.getBitmaskTargetFlag(Token.stringValue(), BitFlag))
return error("use of undefined target flag '" + Token.stringValue() +
"'");
// TODO: Report an error when using a duplicate bit target flag.
@@ -2325,6 +2518,10 @@ bool MIParser::parseAlignment(unsigned &Alignment) {
if (getUnsigned(Alignment))
return true;
lex();
+
+ if (!isPowerOf2_32(Alignment))
+ return error("expected a power-of-2 literal after 'align'");
+
return false;
}
@@ -2436,7 +2633,7 @@ bool MIParser::parseMemoryOperandFlag(MachineMemOperand::Flags &Flags) {
break;
case MIToken::StringConstant: {
MachineMemOperand::Flags TF;
- if (getMMOTargetFlag(Token.stringValue(), TF))
+ if (PFS.Target.getMMOTargetFlag(Token.stringValue(), TF))
return error("use of undefined target MMO flag '" + Token.stringValue() +
"'");
Flags |= TF;
@@ -2711,87 +2908,6 @@ bool MIParser::parsePreOrPostInstrSymbol(MCSymbol *&Symbol) {
return false;
}
-void MIParser::initNames2InstrOpCodes() {
- if (!Names2InstrOpCodes.empty())
- return;
- const auto *TII = MF.getSubtarget().getInstrInfo();
- assert(TII && "Expected target instruction info");
- for (unsigned I = 0, E = TII->getNumOpcodes(); I < E; ++I)
- Names2InstrOpCodes.insert(std::make_pair(StringRef(TII->getName(I)), I));
-}
-
-bool MIParser::parseInstrName(StringRef InstrName, unsigned &OpCode) {
- initNames2InstrOpCodes();
- auto InstrInfo = Names2InstrOpCodes.find(InstrName);
- if (InstrInfo == Names2InstrOpCodes.end())
- return true;
- OpCode = InstrInfo->getValue();
- return false;
-}
-
-void MIParser::initNames2Regs() {
- if (!Names2Regs.empty())
- return;
- // The '%noreg' register is the register 0.
- Names2Regs.insert(std::make_pair("noreg", 0));
- const auto *TRI = MF.getSubtarget().getRegisterInfo();
- assert(TRI && "Expected target register info");
- for (unsigned I = 0, E = TRI->getNumRegs(); I < E; ++I) {
- bool WasInserted =
- Names2Regs.insert(std::make_pair(StringRef(TRI->getName(I)).lower(), I))
- .second;
- (void)WasInserted;
- assert(WasInserted && "Expected registers to be unique case-insensitively");
- }
-}
-
-bool MIParser::getRegisterByName(StringRef RegName, unsigned &Reg) {
- initNames2Regs();
- auto RegInfo = Names2Regs.find(RegName);
- if (RegInfo == Names2Regs.end())
- return true;
- Reg = RegInfo->getValue();
- return false;
-}
-
-void MIParser::initNames2RegMasks() {
- if (!Names2RegMasks.empty())
- return;
- const auto *TRI = MF.getSubtarget().getRegisterInfo();
- assert(TRI && "Expected target register info");
- ArrayRef<const uint32_t *> RegMasks = TRI->getRegMasks();
- ArrayRef<const char *> RegMaskNames = TRI->getRegMaskNames();
- assert(RegMasks.size() == RegMaskNames.size());
- for (size_t I = 0, E = RegMasks.size(); I < E; ++I)
- Names2RegMasks.insert(
- std::make_pair(StringRef(RegMaskNames[I]).lower(), RegMasks[I]));
-}
-
-const uint32_t *MIParser::getRegMask(StringRef Identifier) {
- initNames2RegMasks();
- auto RegMaskInfo = Names2RegMasks.find(Identifier);
- if (RegMaskInfo == Names2RegMasks.end())
- return nullptr;
- return RegMaskInfo->getValue();
-}
-
-void MIParser::initNames2SubRegIndices() {
- if (!Names2SubRegIndices.empty())
- return;
- const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
- for (unsigned I = 1, E = TRI->getNumSubRegIndices(); I < E; ++I)
- Names2SubRegIndices.insert(
- std::make_pair(StringRef(TRI->getSubRegIndexName(I)).lower(), I));
-}
-
-unsigned MIParser::getSubRegIndex(StringRef Name) {
- initNames2SubRegIndices();
- auto SubRegInfo = Names2SubRegIndices.find(Name);
- if (SubRegInfo == Names2SubRegIndices.end())
- return 0;
- return SubRegInfo->getValue();
-}
-
static void initSlots2BasicBlocks(
const Function &F,
DenseMap<unsigned, const BasicBlock *> &Slots2BasicBlocks) {
@@ -2861,86 +2977,6 @@ const Value *MIParser::getIRValue(unsigned Slot) {
return ValueInfo->second;
}
-void MIParser::initNames2TargetIndices() {
- if (!Names2TargetIndices.empty())
- return;
- const auto *TII = MF.getSubtarget().getInstrInfo();
- assert(TII && "Expected target instruction info");
- auto Indices = TII->getSerializableTargetIndices();
- for (const auto &I : Indices)
- Names2TargetIndices.insert(std::make_pair(StringRef(I.second), I.first));
-}
-
-bool MIParser::getTargetIndex(StringRef Name, int &Index) {
- initNames2TargetIndices();
- auto IndexInfo = Names2TargetIndices.find(Name);
- if (IndexInfo == Names2TargetIndices.end())
- return true;
- Index = IndexInfo->second;
- return false;
-}
-
-void MIParser::initNames2DirectTargetFlags() {
- if (!Names2DirectTargetFlags.empty())
- return;
- const auto *TII = MF.getSubtarget().getInstrInfo();
- assert(TII && "Expected target instruction info");
- auto Flags = TII->getSerializableDirectMachineOperandTargetFlags();
- for (const auto &I : Flags)
- Names2DirectTargetFlags.insert(
- std::make_pair(StringRef(I.second), I.first));
-}
-
-bool MIParser::getDirectTargetFlag(StringRef Name, unsigned &Flag) {
- initNames2DirectTargetFlags();
- auto FlagInfo = Names2DirectTargetFlags.find(Name);
- if (FlagInfo == Names2DirectTargetFlags.end())
- return true;
- Flag = FlagInfo->second;
- return false;
-}
-
-void MIParser::initNames2BitmaskTargetFlags() {
- if (!Names2BitmaskTargetFlags.empty())
- return;
- const auto *TII = MF.getSubtarget().getInstrInfo();
- assert(TII && "Expected target instruction info");
- auto Flags = TII->getSerializableBitmaskMachineOperandTargetFlags();
- for (const auto &I : Flags)
- Names2BitmaskTargetFlags.insert(
- std::make_pair(StringRef(I.second), I.first));
-}
-
-bool MIParser::getBitmaskTargetFlag(StringRef Name, unsigned &Flag) {
- initNames2BitmaskTargetFlags();
- auto FlagInfo = Names2BitmaskTargetFlags.find(Name);
- if (FlagInfo == Names2BitmaskTargetFlags.end())
- return true;
- Flag = FlagInfo->second;
- return false;
-}
-
-void MIParser::initNames2MMOTargetFlags() {
- if (!Names2MMOTargetFlags.empty())
- return;
- const auto *TII = MF.getSubtarget().getInstrInfo();
- assert(TII && "Expected target instruction info");
- auto Flags = TII->getSerializableMachineMemOperandTargetFlags();
- for (const auto &I : Flags)
- Names2MMOTargetFlags.insert(
- std::make_pair(StringRef(I.second), I.first));
-}
-
-bool MIParser::getMMOTargetFlag(StringRef Name,
- MachineMemOperand::Flags &Flag) {
- initNames2MMOTargetFlags();
- auto FlagInfo = Names2MMOTargetFlags.find(Name);
- if (FlagInfo == Names2MMOTargetFlags.end())
- return true;
- Flag = FlagInfo->second;
- return false;
-}
-
MCSymbol *MIParser::getOrCreateMCSymbol(StringRef Name) {
// FIXME: Currently we can't recognize temporary or local symbols and call all
// of the appropriate forms to create them. However, this handles basic cases
diff --git a/lib/CodeGen/MIRParser/MIParser.h b/lib/CodeGen/MIRParser/MIParser.h
deleted file mode 100644
index b06ceb21b740..000000000000
--- a/lib/CodeGen/MIRParser/MIParser.h
+++ /dev/null
@@ -1,125 +0,0 @@
-//===- MIParser.h - Machine Instructions Parser -----------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the function that parses the machine instructions.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_CODEGEN_MIRPARSER_MIPARSER_H
-#define LLVM_LIB_CODEGEN_MIRPARSER_MIPARSER_H
-
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/StringMap.h"
-#include "llvm/Support/Allocator.h"
-
-namespace llvm {
-
-class MachineBasicBlock;
-class MachineFunction;
-class MDNode;
-class RegisterBank;
-struct SlotMapping;
-class SMDiagnostic;
-class SourceMgr;
-class StringRef;
-class TargetRegisterClass;
-
-struct VRegInfo {
- enum uint8_t {
- UNKNOWN, NORMAL, GENERIC, REGBANK
- } Kind = UNKNOWN;
- bool Explicit = false; ///< VReg was explicitly specified in the .mir file.
- union {
- const TargetRegisterClass *RC;
- const RegisterBank *RegBank;
- } D;
- unsigned VReg;
- unsigned PreferredReg = 0;
-};
-
-using Name2RegClassMap = StringMap<const TargetRegisterClass *>;
-using Name2RegBankMap = StringMap<const RegisterBank *>;
-
-struct PerFunctionMIParsingState {
- BumpPtrAllocator Allocator;
- MachineFunction &MF;
- SourceMgr *SM;
- const SlotMapping &IRSlots;
- const Name2RegClassMap &Names2RegClasses;
- const Name2RegBankMap &Names2RegBanks;
-
- DenseMap<unsigned, MachineBasicBlock *> MBBSlots;
- DenseMap<unsigned, VRegInfo*> VRegInfos;
- StringMap<VRegInfo*> VRegInfosNamed;
- DenseMap<unsigned, int> FixedStackObjectSlots;
- DenseMap<unsigned, int> StackObjectSlots;
- DenseMap<unsigned, unsigned> ConstantPoolSlots;
- DenseMap<unsigned, unsigned> JumpTableSlots;
-
- PerFunctionMIParsingState(MachineFunction &MF, SourceMgr &SM,
- const SlotMapping &IRSlots,
- const Name2RegClassMap &Names2RegClasses,
- const Name2RegBankMap &Names2RegBanks);
-
- VRegInfo &getVRegInfo(unsigned Num);
- VRegInfo &getVRegInfoNamed(StringRef RegName);
-};
-
-/// Parse the machine basic block definitions, and skip the machine
-/// instructions.
-///
-/// This function runs the first parsing pass on the machine function's body.
-/// It parses only the machine basic block definitions and creates the machine
-/// basic blocks in the given machine function.
-///
-/// The machine instructions aren't parsed during the first pass because all
-/// the machine basic blocks aren't defined yet - this makes it impossible to
-/// resolve the machine basic block references.
-///
-/// Return true if an error occurred.
-bool parseMachineBasicBlockDefinitions(PerFunctionMIParsingState &PFS,
- StringRef Src, SMDiagnostic &Error);
-
-/// Parse the machine instructions.
-///
-/// This function runs the second parsing pass on the machine function's body.
-/// It skips the machine basic block definitions and parses only the machine
-/// instructions and basic block attributes like liveins and successors.
-///
-/// The second parsing pass assumes that the first parsing pass already ran
-/// on the given source string.
-///
-/// Return true if an error occurred.
-bool parseMachineInstructions(PerFunctionMIParsingState &PFS, StringRef Src,
- SMDiagnostic &Error);
-
-bool parseMBBReference(PerFunctionMIParsingState &PFS,
- MachineBasicBlock *&MBB, StringRef Src,
- SMDiagnostic &Error);
-
-bool parseRegisterReference(PerFunctionMIParsingState &PFS,
- unsigned &Reg, StringRef Src,
- SMDiagnostic &Error);
-
-bool parseNamedRegisterReference(PerFunctionMIParsingState &PFS, unsigned &Reg,
- StringRef Src, SMDiagnostic &Error);
-
-bool parseVirtualRegisterReference(PerFunctionMIParsingState &PFS,
- VRegInfo *&Info, StringRef Src,
- SMDiagnostic &Error);
-
-bool parseStackObjectReference(PerFunctionMIParsingState &PFS, int &FI,
- StringRef Src, SMDiagnostic &Error);
-
-bool parseMDNode(PerFunctionMIParsingState &PFS, MDNode *&Node, StringRef Src,
- SMDiagnostic &Error);
-
-} // end namespace llvm
-
-#endif // LLVM_LIB_CODEGEN_MIRPARSER_MIPARSER_H
diff --git a/lib/CodeGen/MIRParser/MIRParser.cpp b/lib/CodeGen/MIRParser/MIRParser.cpp
index 00da92a92ec6..b242934def80 100644
--- a/lib/CodeGen/MIRParser/MIRParser.cpp
+++ b/lib/CodeGen/MIRParser/MIRParser.cpp
@@ -1,9 +1,8 @@
//===- MIRParser.cpp - MIR serialization format parser implementation -----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -13,7 +12,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/MIRParser/MIRParser.h"
-#include "MIParser.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringMap.h"
@@ -22,12 +20,14 @@
#include "llvm/AsmParser/SlotMapping.h"
#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+#include "llvm/CodeGen/MIRParser/MIParser.h"
#include "llvm/CodeGen/MIRYamlMapping.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DiagnosticInfo.h"
@@ -40,6 +40,7 @@
#include "llvm/Support/SMLoc.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/YAMLTraits.h"
+#include "llvm/Target/TargetMachine.h"
#include <memory>
using namespace llvm;
@@ -54,10 +55,8 @@ class MIRParserImpl {
StringRef Filename;
LLVMContext &Context;
SlotMapping IRSlots;
- /// Maps from register class names to register classes.
- Name2RegClassMap Names2RegClasses;
- /// Maps from register bank names to register banks.
- Name2RegBankMap Names2RegBanks;
+ std::unique_ptr<PerTargetMIParsingState> Target;
+
/// True when the MIR file doesn't have LLVM IR. Dummy IR functions are
/// created and inserted into the given module when this is true.
bool NoLLVMIR = false;
@@ -117,6 +116,9 @@ public:
bool initializeFrameInfo(PerFunctionMIParsingState &PFS,
const yaml::MachineFunction &YamlMF);
+ bool initializeCallSiteInfo(PerFunctionMIParsingState &PFS,
+ const yaml::MachineFunction &YamlMF);
+
bool parseCalleeSavedRegister(PerFunctionMIParsingState &PFS,
std::vector<CalleeSavedInfo> &CSIInfo,
const yaml::StringValue &RegisterSource,
@@ -151,20 +153,6 @@ private:
SMDiagnostic diagFromBlockStringDiag(const SMDiagnostic &Error,
SMRange SourceRange);
- void initNames2RegClasses(const MachineFunction &MF);
- void initNames2RegBanks(const MachineFunction &MF);
-
- /// Check if the given identifier is a name of a register class.
- ///
- /// Return null if the name isn't a register class.
- const TargetRegisterClass *getRegClass(const MachineFunction &MF,
- StringRef Name);
-
- /// Check if the given identifier is a name of a register bank.
- ///
- /// Return null if the name isn't a register bank.
- const RegisterBank *getRegBank(const MachineFunction &MF, StringRef Name);
-
void computeFunctionProperties(MachineFunction &MF);
};
@@ -271,8 +259,9 @@ bool MIRParserImpl::parseMachineFunctions(Module &M, MachineModuleInfo &MMI) {
/// Create an empty function with the given name.
static Function *createDummyFunction(StringRef Name, Module &M) {
auto &Context = M.getContext();
- Function *F = cast<Function>(M.getOrInsertFunction(
- Name, FunctionType::get(Type::getVoidTy(Context), false)));
+ Function *F =
+ Function::Create(FunctionType::get(Type::getVoidTy(Context), false),
+ Function::ExternalLinkage, Name, M);
BasicBlock *BB = BasicBlock::Create(Context, "entry", F);
new UnreachableInst(Context, BB);
return F;
@@ -282,6 +271,11 @@ bool MIRParserImpl::parseMachineFunction(Module &M, MachineModuleInfo &MMI) {
// Parse the yaml.
yaml::MachineFunction YamlMF;
yaml::EmptyContext Ctx;
+
+ const LLVMTargetMachine &TM = MMI.getTarget();
+ YamlMF.MachineFuncInfo = std::unique_ptr<yaml::MachineFunctionInfo>(
+ TM.createDefaultFuncInfoYAML());
+
yaml::yamlize(In, YamlMF, false, Ctx);
if (In.error())
return true;
@@ -346,12 +340,58 @@ void MIRParserImpl::computeFunctionProperties(MachineFunction &MF) {
Properties.set(MachineFunctionProperties::Property::NoVRegs);
}
+bool MIRParserImpl::initializeCallSiteInfo(
+ PerFunctionMIParsingState &PFS, const yaml::MachineFunction &YamlMF) {
+ MachineFunction &MF = PFS.MF;
+ SMDiagnostic Error;
+ const LLVMTargetMachine &TM = MF.getTarget();
+ for (auto YamlCSInfo : YamlMF.CallSitesInfo) {
+ yaml::CallSiteInfo::MachineInstrLoc MILoc = YamlCSInfo.CallLocation;
+ if (MILoc.BlockNum >= MF.size())
+ return error(Twine(MF.getName()) +
+ Twine(" call instruction block out of range.") +
+ " Unable to reference bb:" + Twine(MILoc.BlockNum));
+ auto CallB = std::next(MF.begin(), MILoc.BlockNum);
+ if (MILoc.Offset >= CallB->size())
+ return error(Twine(MF.getName()) +
+ Twine(" call instruction offset out of range.") +
+ "Unable to reference instruction at bb: " +
+ Twine(MILoc.BlockNum) + " at offset:" + Twine(MILoc.Offset));
+ auto CallI = std::next(CallB->begin(), MILoc.Offset);
+ if (!CallI->isCall())
+ return error(Twine(MF.getName()) +
+ Twine(" call site info should reference call "
+ "instruction. Instruction at bb:") +
+ Twine(MILoc.BlockNum) + " at offset:" + Twine(MILoc.Offset) +
+ " is not a call instruction");
+ MachineFunction::CallSiteInfo CSInfo;
+ for (auto ArgRegPair : YamlCSInfo.ArgForwardingRegs) {
+ unsigned Reg = 0;
+ if (parseNamedRegisterReference(PFS, Reg, ArgRegPair.Reg.Value, Error))
+ return error(Error, ArgRegPair.Reg.SourceRange);
+ CSInfo.emplace_back(Reg, ArgRegPair.ArgNo);
+ }
+
+ if (TM.Options.EnableDebugEntryValues)
+ MF.addCallArgsForwardingRegs(&*CallI, std::move(CSInfo));
+ }
+
+ if (YamlMF.CallSitesInfo.size() && !TM.Options.EnableDebugEntryValues)
+ return error(Twine("Call site info provided but not used"));
+ return false;
+}
+
bool
MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF,
MachineFunction &MF) {
// TODO: Recreate the machine function.
- initNames2RegClasses(MF);
- initNames2RegBanks(MF);
+ if (Target) {
+ // Avoid clearing state if we're using the same subtarget again.
+ Target->setTarget(MF.getSubtarget());
+ } else {
+ Target.reset(new PerTargetMIParsingState(MF.getSubtarget()));
+ }
+
if (YamlMF.Alignment)
MF.setAlignment(YamlMF.Alignment);
MF.setExposesReturnsTwice(YamlMF.ExposesReturnsTwice);
@@ -367,8 +407,7 @@ MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF,
if (YamlMF.FailedISel)
MF.getProperties().set(MachineFunctionProperties::Property::FailedISel);
- PerFunctionMIParsingState PFS(MF, SM, IRSlots, Names2RegClasses,
- Names2RegBanks);
+ PerFunctionMIParsingState PFS(MF, SM, IRSlots, *Target);
if (parseRegisterInfo(PFS, YamlMF))
return true;
if (!YamlMF.Constants.empty()) {
@@ -419,8 +458,32 @@ MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF,
if (setupRegisterInfo(PFS, YamlMF))
return true;
+ if (YamlMF.MachineFuncInfo) {
+ const LLVMTargetMachine &TM = MF.getTarget();
+ // Note this is called after the initial constructor of the
+ // MachineFunctionInfo based on the MachineFunction, which may depend on the
+ // IR.
+
+ SMRange SrcRange;
+ if (TM.parseMachineFunctionInfo(*YamlMF.MachineFuncInfo, PFS, Error,
+ SrcRange)) {
+ return error(Error, SrcRange);
+ }
+ }
+
+ // Set the reserved registers after parsing MachineFuncInfo. The target may
+ // have been recording information used to select the reserved registers
+ // there.
+ // FIXME: This is a temporary workaround until the reserved registers can be
+ // serialized.
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ MRI.freezeReservedRegs(MF);
+
computeFunctionProperties(MF);
+ if (initializeCallSiteInfo(PFS, YamlMF))
+ return false;
+
MF.getSubtarget().mirFileLoaded(MF);
MF.verify();
@@ -449,12 +512,12 @@ bool MIRParserImpl::parseRegisterInfo(PerFunctionMIParsingState &PFS,
Info.Kind = VRegInfo::GENERIC;
Info.D.RegBank = nullptr;
} else {
- const auto *RC = getRegClass(MF, VReg.Class.Value);
+ const auto *RC = Target->getRegClass(VReg.Class.Value);
if (RC) {
Info.Kind = VRegInfo::NORMAL;
Info.D.RC = RC;
} else {
- const RegisterBank *RegBank = getRegBank(MF, VReg.Class.Value);
+ const RegisterBank *RegBank = Target->getRegBank(VReg.Class.Value);
if (!RegBank)
return error(
VReg.Class.SourceRange.Start,
@@ -557,9 +620,6 @@ bool MIRParserImpl::setupRegisterInfo(const PerFunctionMIParsingState &PFS,
}
}
- // FIXME: This is a temporary workaround until the reserved registers can be
- // serialized.
- MRI.freezeReservedRegs(MF);
return Error;
}
@@ -567,6 +627,7 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS,
const yaml::MachineFunction &YamlMF) {
MachineFunction &MF = PFS.MF;
MachineFrameInfo &MFI = MF.getFrameInfo();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
const Function &F = MF.getFunction();
const yaml::MachineFrameInfo &YamlMFI = YamlMF.FrameInfo;
MFI.setFrameAddressIsTaken(YamlMFI.IsFrameAddressTaken);
@@ -608,8 +669,12 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS,
Object.IsImmutable, Object.IsAliased);
else
ObjectIdx = MFI.CreateFixedSpillStackObject(Object.Size, Object.Offset);
- MFI.setObjectAlignment(ObjectIdx, Object.Alignment);
+
+ if (!TFI->isSupportedStackID(Object.StackID))
+ return error(Object.ID.SourceRange.Start,
+ Twine("StackID is not supported by target"));
MFI.setStackID(ObjectIdx, Object.StackID);
+ MFI.setObjectAlignment(ObjectIdx, Object.Alignment);
if (!PFS.FixedStackObjectSlots.insert(std::make_pair(Object.ID.Value,
ObjectIdx))
.second)
@@ -637,14 +702,17 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS,
"' isn't defined in the function '" + F.getName() +
"'");
}
+ if (!TFI->isSupportedStackID(Object.StackID))
+ return error(Object.ID.SourceRange.Start,
+ Twine("StackID is not supported by target"));
if (Object.Type == yaml::MachineStackObject::VariableSized)
ObjectIdx = MFI.CreateVariableSizedObject(Object.Alignment, Alloca);
else
ObjectIdx = MFI.CreateStackObject(
Object.Size, Object.Alignment,
- Object.Type == yaml::MachineStackObject::SpillSlot, Alloca);
+ Object.Type == yaml::MachineStackObject::SpillSlot, Alloca,
+ Object.StackID);
MFI.setObjectOffset(ObjectIdx, Object.Offset);
- MFI.setStackID(ObjectIdx, Object.StackID);
if (!PFS.StackObjectSlots.insert(std::make_pair(Object.ID.Value, ObjectIdx))
.second)
@@ -844,48 +912,6 @@ SMDiagnostic MIRParserImpl::diagFromBlockStringDiag(const SMDiagnostic &Error,
Error.getFixIts());
}
-void MIRParserImpl::initNames2RegClasses(const MachineFunction &MF) {
- if (!Names2RegClasses.empty())
- return;
- const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
- for (unsigned I = 0, E = TRI->getNumRegClasses(); I < E; ++I) {
- const auto *RC = TRI->getRegClass(I);
- Names2RegClasses.insert(
- std::make_pair(StringRef(TRI->getRegClassName(RC)).lower(), RC));
- }
-}
-
-void MIRParserImpl::initNames2RegBanks(const MachineFunction &MF) {
- if (!Names2RegBanks.empty())
- return;
- const RegisterBankInfo *RBI = MF.getSubtarget().getRegBankInfo();
- // If the target does not support GlobalISel, we may not have a
- // register bank info.
- if (!RBI)
- return;
- for (unsigned I = 0, E = RBI->getNumRegBanks(); I < E; ++I) {
- const auto &RegBank = RBI->getRegBank(I);
- Names2RegBanks.insert(
- std::make_pair(StringRef(RegBank.getName()).lower(), &RegBank));
- }
-}
-
-const TargetRegisterClass *MIRParserImpl::getRegClass(const MachineFunction &MF,
- StringRef Name) {
- auto RegClassInfo = Names2RegClasses.find(Name);
- if (RegClassInfo == Names2RegClasses.end())
- return nullptr;
- return RegClassInfo->getValue();
-}
-
-const RegisterBank *MIRParserImpl::getRegBank(const MachineFunction &MF,
- StringRef Name) {
- auto RegBankInfo = Names2RegBanks.find(Name);
- if (RegBankInfo == Names2RegBanks.end())
- return nullptr;
- return RegBankInfo->getValue();
-}
-
MIRParser::MIRParser(std::unique_ptr<MIRParserImpl> Impl)
: Impl(std::move(Impl)) {}
diff --git a/lib/CodeGen/MIRPrinter.cpp b/lib/CodeGen/MIRPrinter.cpp
index d9dcc428943f..0a95a0ced0f5 100644
--- a/lib/CodeGen/MIRPrinter.cpp
+++ b/lib/CodeGen/MIRPrinter.cpp
@@ -1,9 +1,8 @@
//===- MIRPrinter.cpp - MIR serialization format printer ------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -36,6 +35,7 @@
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfo.h"
@@ -129,6 +129,9 @@ public:
const MachineJumpTableInfo &JTI);
void convertStackObjects(yaml::MachineFunction &YMF,
const MachineFunction &MF, ModuleSlotTracker &MST);
+ void convertCallSiteObjects(yaml::MachineFunction &YMF,
+ const MachineFunction &MF,
+ ModuleSlotTracker &MST);
private:
void initRegisterMaskIds(const MachineFunction &MF);
@@ -212,10 +215,16 @@ void MIRPrinter::print(const MachineFunction &MF) {
MST.incorporateFunction(MF.getFunction());
convert(MST, YamlMF.FrameInfo, MF.getFrameInfo());
convertStackObjects(YamlMF, MF, MST);
+ convertCallSiteObjects(YamlMF, MF, MST);
if (const auto *ConstantPool = MF.getConstantPool())
convert(YamlMF, *ConstantPool);
if (const auto *JumpTableInfo = MF.getJumpTableInfo())
convert(MST, YamlMF.JumpTableInfo, *JumpTableInfo);
+
+ const TargetMachine &TM = MF.getTarget();
+ YamlMF.MachineFuncInfo =
+ std::unique_ptr<yaml::MachineFunctionInfo>(TM.convertFuncInfoToYAML(MF));
+
raw_string_ostream StrOS(YamlMF.Body.Value.Value);
bool IsNewlineNeeded = false;
for (const auto &MBB : MF) {
@@ -352,7 +361,7 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &YMF,
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
// Process fixed stack objects.
unsigned ID = 0;
- for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) {
+ for (int I = MFI.getObjectIndexBegin(); I < 0; ++I, ++ID) {
if (MFI.isDeadObjectIndex(I))
continue;
@@ -364,17 +373,17 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &YMF,
YamlObject.Offset = MFI.getObjectOffset(I);
YamlObject.Size = MFI.getObjectSize(I);
YamlObject.Alignment = MFI.getObjectAlignment(I);
- YamlObject.StackID = MFI.getStackID(I);
+ YamlObject.StackID = (TargetStackID::Value)MFI.getStackID(I);
YamlObject.IsImmutable = MFI.isImmutableObjectIndex(I);
YamlObject.IsAliased = MFI.isAliasedObjectIndex(I);
YMF.FixedStackObjects.push_back(YamlObject);
StackObjectOperandMapping.insert(
- std::make_pair(I, FrameIndexOperand::createFixed(ID++)));
+ std::make_pair(I, FrameIndexOperand::createFixed(ID)));
}
// Process ordinary stack objects.
ID = 0;
- for (int I = 0, E = MFI.getObjectIndexEnd(); I < E; ++I) {
+ for (int I = 0, E = MFI.getObjectIndexEnd(); I < E; ++I, ++ID) {
if (MFI.isDeadObjectIndex(I))
continue;
@@ -391,14 +400,17 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &YMF,
YamlObject.Offset = MFI.getObjectOffset(I);
YamlObject.Size = MFI.getObjectSize(I);
YamlObject.Alignment = MFI.getObjectAlignment(I);
- YamlObject.StackID = MFI.getStackID(I);
+ YamlObject.StackID = (TargetStackID::Value)MFI.getStackID(I);
YMF.StackObjects.push_back(YamlObject);
StackObjectOperandMapping.insert(std::make_pair(
- I, FrameIndexOperand::create(YamlObject.Name.Value, ID++)));
+ I, FrameIndexOperand::create(YamlObject.Name.Value, ID)));
}
for (const auto &CSInfo : MFI.getCalleeSavedInfo()) {
+ if (!CSInfo.isSpilledToReg() && MFI.isDeadObjectIndex(CSInfo.getFrameIdx()))
+ continue;
+
yaml::StringValue Reg;
printRegMIR(CSInfo.getReg(), Reg, TRI);
if (!CSInfo.isSpilledToReg()) {
@@ -452,6 +464,39 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &YMF,
}
}
+void MIRPrinter::convertCallSiteObjects(yaml::MachineFunction &YMF,
+ const MachineFunction &MF,
+ ModuleSlotTracker &MST) {
+ const auto *TRI = MF.getSubtarget().getRegisterInfo();
+ for (auto CSInfo : MF.getCallSitesInfo()) {
+ yaml::CallSiteInfo YmlCS;
+ yaml::CallSiteInfo::MachineInstrLoc CallLocation;
+
+ // Prepare instruction position.
+ MachineBasicBlock::const_iterator CallI = CSInfo.first->getIterator();
+ CallLocation.BlockNum = CallI->getParent()->getNumber();
+ // Get call instruction offset from the beginning of block.
+ CallLocation.Offset = std::distance(CallI->getParent()->begin(), CallI);
+ YmlCS.CallLocation = CallLocation;
+ // Construct call arguments and theirs forwarding register info.
+ for (auto ArgReg : CSInfo.second) {
+ yaml::CallSiteInfo::ArgRegPair YmlArgReg;
+ YmlArgReg.ArgNo = ArgReg.ArgNo;
+ printRegMIR(ArgReg.Reg, YmlArgReg.Reg, TRI);
+ YmlCS.ArgForwardingRegs.emplace_back(YmlArgReg);
+ }
+ YMF.CallSitesInfo.push_back(YmlCS);
+ }
+
+ // Sort call info by position of call instructions.
+ llvm::sort(YMF.CallSitesInfo.begin(), YMF.CallSitesInfo.end(),
+ [](yaml::CallSiteInfo A, yaml::CallSiteInfo B) {
+ if (A.CallLocation.BlockNum == B.CallLocation.BlockNum)
+ return A.CallLocation.Offset < B.CallLocation.Offset;
+ return A.CallLocation.BlockNum < B.CallLocation.BlockNum;
+ });
+}
+
void MIRPrinter::convert(yaml::MachineFunction &MF,
const MachineConstantPool &ConstantPool) {
unsigned ID = 0;
@@ -706,6 +751,8 @@ void MIPrinter::print(const MachineInstr &MI) {
OS << "nsw ";
if (MI.getFlag(MachineInstr::IsExact))
OS << "exact ";
+ if (MI.getFlag(MachineInstr::FPExcept))
+ OS << "fpexcept ";
OS << TII->getName(MI.getOpcode());
if (I < E)
diff --git a/lib/CodeGen/MIRPrintingPass.cpp b/lib/CodeGen/MIRPrintingPass.cpp
index 1a8427430ea0..e032fffd658c 100644
--- a/lib/CodeGen/MIRPrintingPass.cpp
+++ b/lib/CodeGen/MIRPrintingPass.cpp
@@ -1,9 +1,8 @@
//===- MIRPrintingPass.cpp - Pass that prints out using the MIR format ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index 03771bc5dae1..4d29e883d879 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -1,9 +1,8 @@
//===-- llvm/CodeGen/MachineBasicBlock.cpp ----------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -133,8 +132,12 @@ void ilist_traits<MachineInstr>::transferNodesFromList(ilist_traits &FromList,
instr_iterator First,
instr_iterator Last) {
assert(Parent->getParent() == FromList.Parent->getParent() &&
- "MachineInstr parent mismatch!");
- assert(this != &FromList && "Called without a real transfer...");
+ "cannot transfer MachineInstrs between MachineFunctions");
+
+ // If it's within the same BB, there's nothing to do.
+ if (this == &FromList)
+ return;
+
assert(Parent != FromList.Parent && "Two lists have the same parent?");
// If splicing between two blocks within the same function, just update the
@@ -995,7 +998,7 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ,
while (!KilledRegs.empty()) {
unsigned Reg = KilledRegs.pop_back_val();
for (instr_iterator I = instr_end(), E = instr_begin(); I != E;) {
- if (!(--I)->addRegisterKilled(Reg, TRI, /* addIfNotFound= */ false))
+ if (!(--I)->addRegisterKilled(Reg, TRI, /* AddIfNotFound= */ false))
continue;
if (TargetRegisterInfo::isVirtualRegister(Reg))
LV->getVarInfo(Reg).Kills.push_back(&*I);
diff --git a/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/lib/CodeGen/MachineBlockFrequencyInfo.cpp
index 3459a9f71a73..53a35b7e89c2 100644
--- a/lib/CodeGen/MachineBlockFrequencyInfo.cpp
+++ b/lib/CodeGen/MachineBlockFrequencyInfo.cpp
@@ -1,9 +1,8 @@
//===- MachineBlockFrequencyInfo.cpp - MBB Frequency Analysis -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp
index 4fee9c4ea027..639b588766a1 100644
--- a/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/lib/CodeGen/MachineBlockPlacement.cpp
@@ -1,9 +1,8 @@
//===- MachineBlockPlacement.cpp - Basic Block Code Layout optimization ---===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -452,15 +451,28 @@ class MachineBlockPlacement : public MachineFunctionPass {
void buildChain(const MachineBasicBlock *BB, BlockChain &Chain,
BlockFilterSet *BlockFilter = nullptr);
+ bool canMoveBottomBlockToTop(const MachineBasicBlock *BottomBlock,
+ const MachineBasicBlock *OldTop);
+ bool hasViableTopFallthrough(const MachineBasicBlock *Top,
+ const BlockFilterSet &LoopBlockSet);
+ BlockFrequency TopFallThroughFreq(const MachineBasicBlock *Top,
+ const BlockFilterSet &LoopBlockSet);
+ BlockFrequency FallThroughGains(const MachineBasicBlock *NewTop,
+ const MachineBasicBlock *OldTop,
+ const MachineBasicBlock *ExitBB,
+ const BlockFilterSet &LoopBlockSet);
+ MachineBasicBlock *findBestLoopTopHelper(MachineBasicBlock *OldTop,
+ const MachineLoop &L, const BlockFilterSet &LoopBlockSet);
MachineBasicBlock *findBestLoopTop(
const MachineLoop &L, const BlockFilterSet &LoopBlockSet);
MachineBasicBlock *findBestLoopExit(
- const MachineLoop &L, const BlockFilterSet &LoopBlockSet);
+ const MachineLoop &L, const BlockFilterSet &LoopBlockSet,
+ BlockFrequency &ExitFreq);
BlockFilterSet collectLoopBlockSet(const MachineLoop &L);
void buildLoopChains(const MachineLoop &L);
void rotateLoop(
BlockChain &LoopChain, const MachineBasicBlock *ExitingBB,
- const BlockFilterSet &LoopBlockSet);
+ BlockFrequency ExitFreq, const BlockFilterSet &LoopBlockSet);
void rotateLoopWithProfile(
BlockChain &LoopChain, const MachineLoop &L,
const BlockFilterSet &LoopBlockSet);
@@ -938,8 +950,8 @@ MachineBlockPlacement::getBestNonConflictingEdges(
// Sort for highest frequency.
auto Cmp = [](WeightedEdge A, WeightedEdge B) { return A.Weight > B.Weight; };
- std::stable_sort(Edges[0].begin(), Edges[0].end(), Cmp);
- std::stable_sort(Edges[1].begin(), Edges[1].end(), Cmp);
+ llvm::stable_sort(Edges[0], Cmp);
+ llvm::stable_sort(Edges[1], Cmp);
auto BestA = Edges[0].begin();
auto BestB = Edges[1].begin();
// Arrange for the correct answer to be in BestA and BestB
@@ -1527,15 +1539,12 @@ MachineBlockPlacement::selectBestSuccessor(
// profitable than BestSucc. Position is important because we preserve it and
// prefer first best match. Here we aren't comparing in order, so we capture
// the position instead.
- if (DupCandidates.size() != 0) {
- auto cmp =
- [](const std::tuple<BranchProbability, MachineBasicBlock *> &a,
- const std::tuple<BranchProbability, MachineBasicBlock *> &b) {
- return std::get<0>(a) > std::get<0>(b);
- };
- std::stable_sort(DupCandidates.begin(), DupCandidates.end(), cmp);
- }
- for(auto &Tup : DupCandidates) {
+ llvm::stable_sort(DupCandidates,
+ [](std::tuple<BranchProbability, MachineBasicBlock *> L,
+ std::tuple<BranchProbability, MachineBasicBlock *> R) {
+ return std::get<0>(L) > std::get<0>(R);
+ });
+ for (auto &Tup : DupCandidates) {
BranchProbability DupProb;
MachineBasicBlock *Succ;
std::tie(DupProb, Succ) = Tup;
@@ -1757,63 +1766,238 @@ void MachineBlockPlacement::buildChain(
<< getBlockName(*Chain.begin()) << "\n");
}
-/// Find the best loop top block for layout.
+// If bottom of block BB has only one successor OldTop, in most cases it is
+// profitable to move it before OldTop, except the following case:
+//
+// -->OldTop<-
+// | . |
+// | . |
+// | . |
+// ---Pred |
+// | |
+// BB-----
+//
+// If BB is moved before OldTop, Pred needs a taken branch to BB, and it can't
+// layout the other successor below it, so it can't reduce taken branch.
+// In this case we keep its original layout.
+bool
+MachineBlockPlacement::canMoveBottomBlockToTop(
+ const MachineBasicBlock *BottomBlock,
+ const MachineBasicBlock *OldTop) {
+ if (BottomBlock->pred_size() != 1)
+ return true;
+ MachineBasicBlock *Pred = *BottomBlock->pred_begin();
+ if (Pred->succ_size() != 2)
+ return true;
+
+ MachineBasicBlock *OtherBB = *Pred->succ_begin();
+ if (OtherBB == BottomBlock)
+ OtherBB = *Pred->succ_rbegin();
+ if (OtherBB == OldTop)
+ return false;
+
+ return true;
+}
+
+// Find out the possible fall through frequence to the top of a loop.
+BlockFrequency
+MachineBlockPlacement::TopFallThroughFreq(
+ const MachineBasicBlock *Top,
+ const BlockFilterSet &LoopBlockSet) {
+ BlockFrequency MaxFreq = 0;
+ for (MachineBasicBlock *Pred : Top->predecessors()) {
+ BlockChain *PredChain = BlockToChain[Pred];
+ if (!LoopBlockSet.count(Pred) &&
+ (!PredChain || Pred == *std::prev(PredChain->end()))) {
+ // Found a Pred block can be placed before Top.
+ // Check if Top is the best successor of Pred.
+ auto TopProb = MBPI->getEdgeProbability(Pred, Top);
+ bool TopOK = true;
+ for (MachineBasicBlock *Succ : Pred->successors()) {
+ auto SuccProb = MBPI->getEdgeProbability(Pred, Succ);
+ BlockChain *SuccChain = BlockToChain[Succ];
+ // Check if Succ can be placed after Pred.
+ // Succ should not be in any chain, or it is the head of some chain.
+ if (!LoopBlockSet.count(Succ) && (SuccProb > TopProb) &&
+ (!SuccChain || Succ == *SuccChain->begin())) {
+ TopOK = false;
+ break;
+ }
+ }
+ if (TopOK) {
+ BlockFrequency EdgeFreq = MBFI->getBlockFreq(Pred) *
+ MBPI->getEdgeProbability(Pred, Top);
+ if (EdgeFreq > MaxFreq)
+ MaxFreq = EdgeFreq;
+ }
+ }
+ }
+ return MaxFreq;
+}
+
+// Compute the fall through gains when move NewTop before OldTop.
+//
+// In following diagram, edges marked as "-" are reduced fallthrough, edges
+// marked as "+" are increased fallthrough, this function computes
+//
+// SUM(increased fallthrough) - SUM(decreased fallthrough)
+//
+// |
+// | -
+// V
+// --->OldTop
+// | .
+// | .
+// +| . +
+// | Pred --->
+// | |-
+// | V
+// --- NewTop <---
+// |-
+// V
+//
+BlockFrequency
+MachineBlockPlacement::FallThroughGains(
+ const MachineBasicBlock *NewTop,
+ const MachineBasicBlock *OldTop,
+ const MachineBasicBlock *ExitBB,
+ const BlockFilterSet &LoopBlockSet) {
+ BlockFrequency FallThrough2Top = TopFallThroughFreq(OldTop, LoopBlockSet);
+ BlockFrequency FallThrough2Exit = 0;
+ if (ExitBB)
+ FallThrough2Exit = MBFI->getBlockFreq(NewTop) *
+ MBPI->getEdgeProbability(NewTop, ExitBB);
+ BlockFrequency BackEdgeFreq = MBFI->getBlockFreq(NewTop) *
+ MBPI->getEdgeProbability(NewTop, OldTop);
+
+ // Find the best Pred of NewTop.
+ MachineBasicBlock *BestPred = nullptr;
+ BlockFrequency FallThroughFromPred = 0;
+ for (MachineBasicBlock *Pred : NewTop->predecessors()) {
+ if (!LoopBlockSet.count(Pred))
+ continue;
+ BlockChain *PredChain = BlockToChain[Pred];
+ if (!PredChain || Pred == *std::prev(PredChain->end())) {
+ BlockFrequency EdgeFreq = MBFI->getBlockFreq(Pred) *
+ MBPI->getEdgeProbability(Pred, NewTop);
+ if (EdgeFreq > FallThroughFromPred) {
+ FallThroughFromPred = EdgeFreq;
+ BestPred = Pred;
+ }
+ }
+ }
+
+ // If NewTop is not placed after Pred, another successor can be placed
+ // after Pred.
+ BlockFrequency NewFreq = 0;
+ if (BestPred) {
+ for (MachineBasicBlock *Succ : BestPred->successors()) {
+ if ((Succ == NewTop) || (Succ == BestPred) || !LoopBlockSet.count(Succ))
+ continue;
+ if (ComputedEdges.find(Succ) != ComputedEdges.end())
+ continue;
+ BlockChain *SuccChain = BlockToChain[Succ];
+ if ((SuccChain && (Succ != *SuccChain->begin())) ||
+ (SuccChain == BlockToChain[BestPred]))
+ continue;
+ BlockFrequency EdgeFreq = MBFI->getBlockFreq(BestPred) *
+ MBPI->getEdgeProbability(BestPred, Succ);
+ if (EdgeFreq > NewFreq)
+ NewFreq = EdgeFreq;
+ }
+ BlockFrequency OrigEdgeFreq = MBFI->getBlockFreq(BestPred) *
+ MBPI->getEdgeProbability(BestPred, NewTop);
+ if (NewFreq > OrigEdgeFreq) {
+ // If NewTop is not the best successor of Pred, then Pred doesn't
+ // fallthrough to NewTop. So there is no FallThroughFromPred and
+ // NewFreq.
+ NewFreq = 0;
+ FallThroughFromPred = 0;
+ }
+ }
+
+ BlockFrequency Result = 0;
+ BlockFrequency Gains = BackEdgeFreq + NewFreq;
+ BlockFrequency Lost = FallThrough2Top + FallThrough2Exit +
+ FallThroughFromPred;
+ if (Gains > Lost)
+ Result = Gains - Lost;
+ return Result;
+}
+
+/// Helper function of findBestLoopTop. Find the best loop top block
+/// from predecessors of old top.
+///
+/// Look for a block which is strictly better than the old top for laying
+/// out before the old top of the loop. This looks for only two patterns:
+///
+/// 1. a block has only one successor, the old loop top
+///
+/// Because such a block will always result in an unconditional jump,
+/// rotating it in front of the old top is always profitable.
+///
+/// 2. a block has two successors, one is old top, another is exit
+/// and it has more than one predecessors
///
-/// Look for a block which is strictly better than the loop header for laying
-/// out at the top of the loop. This looks for one and only one pattern:
-/// a latch block with no conditional exit. This block will cause a conditional
-/// jump around it or will be the bottom of the loop if we lay it out in place,
-/// but if it it doesn't end up at the bottom of the loop for any reason,
-/// rotation alone won't fix it. Because such a block will always result in an
-/// unconditional jump (for the backedge) rotating it in front of the loop
-/// header is always profitable.
+/// If it is below one of its predecessors P, only P can fall through to
+/// it, all other predecessors need a jump to it, and another conditional
+/// jump to loop header. If it is moved before loop header, all its
+/// predecessors jump to it, then fall through to loop header. So all its
+/// predecessors except P can reduce one taken branch.
+/// At the same time, move it before old top increases the taken branch
+/// to loop exit block, so the reduced taken branch will be compared with
+/// the increased taken branch to the loop exit block.
MachineBasicBlock *
-MachineBlockPlacement::findBestLoopTop(const MachineLoop &L,
- const BlockFilterSet &LoopBlockSet) {
- // Placing the latch block before the header may introduce an extra branch
- // that skips this block the first time the loop is executed, which we want
- // to avoid when optimising for size.
- // FIXME: in theory there is a case that does not introduce a new branch,
- // i.e. when the layout predecessor does not fallthrough to the loop header.
- // In practice this never happens though: there always seems to be a preheader
- // that can fallthrough and that is also placed before the header.
- if (F->getFunction().optForSize())
- return L.getHeader();
-
+MachineBlockPlacement::findBestLoopTopHelper(
+ MachineBasicBlock *OldTop,
+ const MachineLoop &L,
+ const BlockFilterSet &LoopBlockSet) {
// Check that the header hasn't been fused with a preheader block due to
// crazy branches. If it has, we need to start with the header at the top to
// prevent pulling the preheader into the loop body.
- BlockChain &HeaderChain = *BlockToChain[L.getHeader()];
+ BlockChain &HeaderChain = *BlockToChain[OldTop];
if (!LoopBlockSet.count(*HeaderChain.begin()))
- return L.getHeader();
+ return OldTop;
- LLVM_DEBUG(dbgs() << "Finding best loop top for: "
- << getBlockName(L.getHeader()) << "\n");
+ LLVM_DEBUG(dbgs() << "Finding best loop top for: " << getBlockName(OldTop)
+ << "\n");
- BlockFrequency BestPredFreq;
+ BlockFrequency BestGains = 0;
MachineBasicBlock *BestPred = nullptr;
- for (MachineBasicBlock *Pred : L.getHeader()->predecessors()) {
+ for (MachineBasicBlock *Pred : OldTop->predecessors()) {
if (!LoopBlockSet.count(Pred))
continue;
- LLVM_DEBUG(dbgs() << " header pred: " << getBlockName(Pred) << ", has "
+ if (Pred == L.getHeader())
+ continue;
+ LLVM_DEBUG(dbgs() << " old top pred: " << getBlockName(Pred) << ", has "
<< Pred->succ_size() << " successors, ";
MBFI->printBlockFreq(dbgs(), Pred) << " freq\n");
- if (Pred->succ_size() > 1)
+ if (Pred->succ_size() > 2)
continue;
- BlockFrequency PredFreq = MBFI->getBlockFreq(Pred);
- if (!BestPred || PredFreq > BestPredFreq ||
- (!(PredFreq < BestPredFreq) &&
- Pred->isLayoutSuccessor(L.getHeader()))) {
+ MachineBasicBlock *OtherBB = nullptr;
+ if (Pred->succ_size() == 2) {
+ OtherBB = *Pred->succ_begin();
+ if (OtherBB == OldTop)
+ OtherBB = *Pred->succ_rbegin();
+ }
+
+ if (!canMoveBottomBlockToTop(Pred, OldTop))
+ continue;
+
+ BlockFrequency Gains = FallThroughGains(Pred, OldTop, OtherBB,
+ LoopBlockSet);
+ if ((Gains > 0) && (Gains > BestGains ||
+ ((Gains == BestGains) && Pred->isLayoutSuccessor(OldTop)))) {
BestPred = Pred;
- BestPredFreq = PredFreq;
+ BestGains = Gains;
}
}
// If no direct predecessor is fine, just use the loop header.
if (!BestPred) {
LLVM_DEBUG(dbgs() << " final top unchanged\n");
- return L.getHeader();
+ return OldTop;
}
// Walk backwards through any straight line of predecessors.
@@ -1826,6 +2010,34 @@ MachineBlockPlacement::findBestLoopTop(const MachineLoop &L,
return BestPred;
}
+/// Find the best loop top block for layout.
+///
+/// This function iteratively calls findBestLoopTopHelper, until no new better
+/// BB can be found.
+MachineBasicBlock *
+MachineBlockPlacement::findBestLoopTop(const MachineLoop &L,
+ const BlockFilterSet &LoopBlockSet) {
+ // Placing the latch block before the header may introduce an extra branch
+ // that skips this block the first time the loop is executed, which we want
+ // to avoid when optimising for size.
+ // FIXME: in theory there is a case that does not introduce a new branch,
+ // i.e. when the layout predecessor does not fallthrough to the loop header.
+ // In practice this never happens though: there always seems to be a preheader
+ // that can fallthrough and that is also placed before the header.
+ if (F->getFunction().hasOptSize())
+ return L.getHeader();
+
+ MachineBasicBlock *OldTop = nullptr;
+ MachineBasicBlock *NewTop = L.getHeader();
+ while (NewTop != OldTop) {
+ OldTop = NewTop;
+ NewTop = findBestLoopTopHelper(OldTop, L, LoopBlockSet);
+ if (NewTop != OldTop)
+ ComputedEdges[NewTop] = { OldTop, false };
+ }
+ return NewTop;
+}
+
/// Find the best loop exiting block for layout.
///
/// This routine implements the logic to analyze the loop looking for the best
@@ -1833,7 +2045,8 @@ MachineBlockPlacement::findBestLoopTop(const MachineLoop &L,
/// fallthrough opportunities.
MachineBasicBlock *
MachineBlockPlacement::findBestLoopExit(const MachineLoop &L,
- const BlockFilterSet &LoopBlockSet) {
+ const BlockFilterSet &LoopBlockSet,
+ BlockFrequency &ExitFreq) {
// We don't want to layout the loop linearly in all cases. If the loop header
// is just a normal basic block in the loop, we want to look for what block
// within the loop is the best one to layout at the top. However, if the loop
@@ -1944,9 +2157,43 @@ MachineBlockPlacement::findBestLoopExit(const MachineLoop &L,
LLVM_DEBUG(dbgs() << " Best exiting block: " << getBlockName(ExitingBB)
<< "\n");
+ ExitFreq = BestExitEdgeFreq;
return ExitingBB;
}
+/// Check if there is a fallthrough to loop header Top.
+///
+/// 1. Look for a Pred that can be layout before Top.
+/// 2. Check if Top is the most possible successor of Pred.
+bool
+MachineBlockPlacement::hasViableTopFallthrough(
+ const MachineBasicBlock *Top,
+ const BlockFilterSet &LoopBlockSet) {
+ for (MachineBasicBlock *Pred : Top->predecessors()) {
+ BlockChain *PredChain = BlockToChain[Pred];
+ if (!LoopBlockSet.count(Pred) &&
+ (!PredChain || Pred == *std::prev(PredChain->end()))) {
+ // Found a Pred block can be placed before Top.
+ // Check if Top is the best successor of Pred.
+ auto TopProb = MBPI->getEdgeProbability(Pred, Top);
+ bool TopOK = true;
+ for (MachineBasicBlock *Succ : Pred->successors()) {
+ auto SuccProb = MBPI->getEdgeProbability(Pred, Succ);
+ BlockChain *SuccChain = BlockToChain[Succ];
+ // Check if Succ can be placed after Pred.
+ // Succ should not be in any chain, or it is the head of some chain.
+ if ((!SuccChain || Succ == *SuccChain->begin()) && SuccProb > TopProb) {
+ TopOK = false;
+ break;
+ }
+ }
+ if (TopOK)
+ return true;
+ }
+ }
+ return false;
+}
+
/// Attempt to rotate an exiting block to the bottom of the loop.
///
/// Once we have built a chain, try to rotate it to line up the hot exit block
@@ -1955,6 +2202,7 @@ MachineBlockPlacement::findBestLoopExit(const MachineLoop &L,
/// of its bottom already, don't rotate it.
void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
const MachineBasicBlock *ExitingBB,
+ BlockFrequency ExitFreq,
const BlockFilterSet &LoopBlockSet) {
if (!ExitingBB)
return;
@@ -1966,15 +2214,7 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
if (Bottom == ExitingBB)
return;
- bool ViableTopFallthrough = false;
- for (MachineBasicBlock *Pred : Top->predecessors()) {
- BlockChain *PredChain = BlockToChain[Pred];
- if (!LoopBlockSet.count(Pred) &&
- (!PredChain || Pred == *std::prev(PredChain->end()))) {
- ViableTopFallthrough = true;
- break;
- }
- }
+ bool ViableTopFallthrough = hasViableTopFallthrough(Top, LoopBlockSet);
// If the header has viable fallthrough, check whether the current loop
// bottom is a viable exiting block. If so, bail out as rotating will
@@ -1986,6 +2226,12 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
(!SuccChain || Succ == *SuccChain->begin()))
return;
}
+
+ // Rotate will destroy the top fallthrough, we need to ensure the new exit
+ // frequency is larger than top fallthrough.
+ BlockFrequency FallThrough2Top = TopFallThroughFreq(Top, LoopBlockSet);
+ if (FallThrough2Top >= ExitFreq)
+ return;
}
BlockChain::iterator ExitIt = llvm::find(LoopChain, ExitingBB);
@@ -2041,8 +2287,6 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
void MachineBlockPlacement::rotateLoopWithProfile(
BlockChain &LoopChain, const MachineLoop &L,
const BlockFilterSet &LoopBlockSet) {
- auto HeaderBB = L.getHeader();
- auto HeaderIter = llvm::find(LoopChain, HeaderBB);
auto RotationPos = LoopChain.end();
BlockFrequency SmallestRotationCost = BlockFrequency::getMaxFrequency();
@@ -2062,12 +2306,13 @@ void MachineBlockPlacement::rotateLoopWithProfile(
// chain head is not the loop header. As we only consider natural loops with
// single header, this computation can be done only once.
BlockFrequency HeaderFallThroughCost(0);
- for (auto *Pred : HeaderBB->predecessors()) {
+ MachineBasicBlock *ChainHeaderBB = *LoopChain.begin();
+ for (auto *Pred : ChainHeaderBB->predecessors()) {
BlockChain *PredChain = BlockToChain[Pred];
if (!LoopBlockSet.count(Pred) &&
(!PredChain || Pred == *std::prev(PredChain->end()))) {
- auto EdgeFreq =
- MBFI->getBlockFreq(Pred) * MBPI->getEdgeProbability(Pred, HeaderBB);
+ auto EdgeFreq = MBFI->getBlockFreq(Pred) *
+ MBPI->getEdgeProbability(Pred, ChainHeaderBB);
auto FallThruCost = ScaleBlockFrequency(EdgeFreq, MisfetchCost);
// If the predecessor has only an unconditional jump to the header, we
// need to consider the cost of this jump.
@@ -2117,7 +2362,7 @@ void MachineBlockPlacement::rotateLoopWithProfile(
// If the current BB is the loop header, we need to take into account the
// cost of the missed fall through edge from outside of the loop to the
// header.
- if (Iter != HeaderIter)
+ if (Iter != LoopChain.begin())
Cost += HeaderFallThroughCost;
// Collect the loop exit cost by summing up frequencies of all exit edges
@@ -2238,9 +2483,7 @@ void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) {
// loop. This will default to the header, but may end up as one of the
// predecessors to the header if there is one which will result in strictly
// fewer branches in the loop body.
- // When we use profile data to rotate the loop, this is unnecessary.
- MachineBasicBlock *LoopTop =
- RotateLoopWithProfile ? L.getHeader() : findBestLoopTop(L, LoopBlockSet);
+ MachineBasicBlock *LoopTop = findBestLoopTop(L, LoopBlockSet);
// If we selected just the header for the loop top, look for a potentially
// profitable exit block in the event that rotating the loop can eliminate
@@ -2249,8 +2492,9 @@ void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) {
// Loops are processed innermost to uttermost, make sure we clear
// PreferredLoopExit before processing a new loop.
PreferredLoopExit = nullptr;
+ BlockFrequency ExitFreq;
if (!RotateLoopWithProfile && LoopTop == L.getHeader())
- PreferredLoopExit = findBestLoopExit(L, LoopBlockSet);
+ PreferredLoopExit = findBestLoopExit(L, LoopBlockSet, ExitFreq);
BlockChain &LoopChain = *BlockToChain[LoopTop];
@@ -2270,7 +2514,7 @@ void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) {
if (RotateLoopWithProfile)
rotateLoopWithProfile(LoopChain, L, LoopBlockSet);
else
- rotateLoop(LoopChain, PreferredLoopExit, LoopBlockSet);
+ rotateLoop(LoopChain, PreferredLoopExit, ExitFreq, LoopBlockSet);
LLVM_DEBUG({
// Crash at the end so we get all of the debugging output first.
@@ -2497,8 +2741,8 @@ void MachineBlockPlacement::alignBlocks() {
// exclusively on the loop info here so that we can align backedges in
// unnatural CFGs and backedges that were introduced purely because of the
// loop rotations done during this layout pass.
- if (F->getFunction().optForMinSize() ||
- (F->getFunction().optForSize() && !TLI->alignLoopsWithOptSize()))
+ if (F->getFunction().hasMinSize() ||
+ (F->getFunction().hasOptSize() && !TLI->alignLoopsWithOptSize()))
return;
BlockChain &FunctionChain = *BlockToChain[&F->front()];
if (FunctionChain.begin() == FunctionChain.end())
@@ -2773,7 +3017,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
if (allowTailDupPlacement()) {
MPDT = &getAnalysis<MachinePostDominatorTree>();
- if (MF.getFunction().optForSize())
+ if (MF.getFunction().hasOptSize())
TailDupSize = 1;
bool PreRegAlloc = false;
TailDup.initMF(MF, PreRegAlloc, MBPI, /* LayoutMode */ true, TailDupSize);
@@ -2796,7 +3040,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
if (BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo(),
getAnalysisIfAvailable<MachineModuleInfo>(), MLI,
- /*AfterBlockPlacement=*/true)) {
+ /*AfterPlacement=*/true)) {
// Redo the layout if tail merging creates/removes/moves blocks.
BlockToChain.clear();
ComputedEdges.clear();
diff --git a/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/lib/CodeGen/MachineBranchProbabilityInfo.cpp
index e4952aaaba06..d2277ce51746 100644
--- a/lib/CodeGen/MachineBranchProbabilityInfo.cpp
+++ b/lib/CodeGen/MachineBranchProbabilityInfo.cpp
@@ -1,9 +1,8 @@
//===- MachineBranchProbabilityInfo.cpp - Machine Branch Probability Info -===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp
index 6ee8571c28aa..2df6d40d9293 100644
--- a/lib/CodeGen/MachineCSE.cpp
+++ b/lib/CodeGen/MachineCSE.cpp
@@ -1,9 +1,8 @@
//===- MachineCSE.cpp - Machine Common Subexpression Elimination Pass -----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -20,6 +19,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CFG.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -50,6 +50,8 @@ using namespace llvm;
STATISTIC(NumCoalesces, "Number of copies coalesced");
STATISTIC(NumCSEs, "Number of common subexpression eliminated");
+STATISTIC(NumPREs, "Number of partial redundant expression"
+ " transformed to fully redundant");
STATISTIC(NumPhysCSEs,
"Number of physreg referencing common subexpr eliminated");
STATISTIC(NumCrossBBCSEs,
@@ -85,6 +87,7 @@ namespace {
void releaseMemory() override {
ScopeMap.clear();
+ PREMap.clear();
Exps.clear();
}
@@ -95,9 +98,12 @@ namespace {
ScopedHashTable<MachineInstr *, unsigned, MachineInstrExpressionTrait,
AllocatorTy>;
using ScopeType = ScopedHTType::ScopeTy;
+ using PhysDefVector = SmallVector<std::pair<unsigned, unsigned>, 2>;
unsigned LookAheadLimit = 0;
DenseMap<MachineBasicBlock *, ScopeType *> ScopeMap;
+ DenseMap<MachineInstr *, MachineBasicBlock *, MachineInstrExpressionTrait>
+ PREMap;
ScopedHTType VNT;
SmallVector<MachineInstr *, 64> Exps;
unsigned CurrVN = 0;
@@ -109,22 +115,24 @@ namespace {
MachineBasicBlock::const_iterator E) const;
bool hasLivePhysRegDefUses(const MachineInstr *MI,
const MachineBasicBlock *MBB,
- SmallSet<unsigned,8> &PhysRefs,
- SmallVectorImpl<unsigned> &PhysDefs,
- bool &PhysUseDef) const;
+ SmallSet<unsigned, 8> &PhysRefs,
+ PhysDefVector &PhysDefs, bool &PhysUseDef) const;
bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
- SmallSet<unsigned,8> &PhysRefs,
- SmallVectorImpl<unsigned> &PhysDefs,
- bool &NonLocal) const;
+ SmallSet<unsigned, 8> &PhysRefs,
+ PhysDefVector &PhysDefs, bool &NonLocal) const;
bool isCSECandidate(MachineInstr *MI);
bool isProfitableToCSE(unsigned CSReg, unsigned Reg,
- MachineInstr *CSMI, MachineInstr *MI);
+ MachineBasicBlock *CSBB, MachineInstr *MI);
void EnterScope(MachineBasicBlock *MBB);
void ExitScope(MachineBasicBlock *MBB);
- bool ProcessBlock(MachineBasicBlock *MBB);
+ bool ProcessBlockCSE(MachineBasicBlock *MBB);
void ExitScopeIfDone(MachineDomTreeNode *Node,
DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren);
bool PerformCSE(MachineDomTreeNode *Node);
+
+ bool isPRECandidate(MachineInstr *MI);
+ bool ProcessBlockPRE(MachineDominatorTree *MDT, MachineBasicBlock *MBB);
+ bool PerformSimplePRE(MachineDominatorTree *DT);
};
} // end anonymous namespace
@@ -256,9 +264,9 @@ static bool isCallerPreservedOrConstPhysReg(unsigned Reg,
/// instruction does not uses a physical register.
bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
const MachineBasicBlock *MBB,
- SmallSet<unsigned,8> &PhysRefs,
- SmallVectorImpl<unsigned> &PhysDefs,
- bool &PhysUseDef) const{
+ SmallSet<unsigned, 8> &PhysRefs,
+ PhysDefVector &PhysDefs,
+ bool &PhysUseDef) const {
// First, add all uses to PhysRefs.
for (const MachineOperand &MO : MI->operands()) {
if (!MO.isReg() || MO.isDef())
@@ -278,7 +286,8 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
// (which currently contains only uses), set the PhysUseDef flag.
PhysUseDef = false;
MachineBasicBlock::const_iterator I = MI; I = std::next(I);
- for (const MachineOperand &MO : MI->operands()) {
+ for (const auto &MOP : llvm::enumerate(MI->operands())) {
+ const MachineOperand &MO = MOP.value();
if (!MO.isReg() || !MO.isDef())
continue;
unsigned Reg = MO.getReg();
@@ -293,20 +302,21 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
// common since this pass is run before livevariables. We can scan
// forward a few instructions and check if it is obviously dead.
if (!MO.isDead() && !isPhysDefTriviallyDead(Reg, I, MBB->end()))
- PhysDefs.push_back(Reg);
+ PhysDefs.push_back(std::make_pair(MOP.index(), Reg));
}
// Finally, add all defs to PhysRefs as well.
for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i)
- for (MCRegAliasIterator AI(PhysDefs[i], TRI, true); AI.isValid(); ++AI)
+ for (MCRegAliasIterator AI(PhysDefs[i].second, TRI, true); AI.isValid();
+ ++AI)
PhysRefs.insert(*AI);
return !PhysRefs.empty();
}
bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
- SmallSet<unsigned,8> &PhysRefs,
- SmallVectorImpl<unsigned> &PhysDefs,
+ SmallSet<unsigned, 8> &PhysRefs,
+ PhysDefVector &PhysDefs,
bool &NonLocal) const {
// For now conservatively returns false if the common subexpression is
// not in the same basic block as the given instruction. The only exception
@@ -320,7 +330,8 @@ bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
return false;
for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i) {
- if (MRI->isAllocatable(PhysDefs[i]) || MRI->isReserved(PhysDefs[i]))
+ if (MRI->isAllocatable(PhysDefs[i].second) ||
+ MRI->isReserved(PhysDefs[i].second))
// Avoid extending live range of physical registers if they are
//allocatable or reserved.
return false;
@@ -381,7 +392,7 @@ bool MachineCSE::isCSECandidate(MachineInstr *MI) {
// Ignore stuff that we obviously can't move.
if (MI->mayStore() || MI->isCall() || MI->isTerminator() ||
- MI->hasUnmodeledSideEffects())
+ MI->mayRaiseFPException() || MI->hasUnmodeledSideEffects())
return false;
if (MI->mayLoad()) {
@@ -404,9 +415,10 @@ bool MachineCSE::isCSECandidate(MachineInstr *MI) {
}
/// isProfitableToCSE - Return true if it's profitable to eliminate MI with a
-/// common expression that defines Reg.
+/// common expression that defines Reg. CSBB is basic block where CSReg is
+/// defined.
bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
- MachineInstr *CSMI, MachineInstr *MI) {
+ MachineBasicBlock *CSBB, MachineInstr *MI) {
// FIXME: Heuristics that works around the lack the live range splitting.
// If CSReg is used at all uses of Reg, CSE should not increase register
@@ -432,7 +444,6 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
// an immediate predecessor. We don't want to increase register pressure and
// end up causing other computation to be spilled.
if (TII->isAsCheapAsAMove(*MI)) {
- MachineBasicBlock *CSBB = CSMI->getParent();
MachineBasicBlock *BB = MI->getParent();
if (CSBB != BB && !CSBB->isSuccessor(BB))
return false;
@@ -487,7 +498,7 @@ void MachineCSE::ExitScope(MachineBasicBlock *MBB) {
ScopeMap.erase(SI);
}
-bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
+bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) {
bool Changed = false;
SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs;
@@ -536,7 +547,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
// It's also not safe if the instruction uses physical registers.
bool CrossMBBPhysDef = false;
SmallSet<unsigned, 8> PhysRefs;
- SmallVector<unsigned, 2> PhysDefs;
+ PhysDefVector PhysDefs;
bool PhysUseDef = false;
if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs,
PhysDefs, PhysUseDef)) {
@@ -597,7 +608,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
TargetRegisterInfo::isVirtualRegister(NewReg) &&
"Do not CSE physical register defs!");
- if (!isProfitableToCSE(NewReg, OldReg, CSMI, MI)) {
+ if (!isProfitableToCSE(NewReg, OldReg, CSMI->getParent(), MI)) {
LLVM_DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n");
DoCSE = false;
break;
@@ -635,6 +646,9 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
// we should make sure it is not dead at CSMI.
for (unsigned ImplicitDefToUpdate : ImplicitDefsToUpdate)
CSMI->getOperand(ImplicitDefToUpdate).setIsDead(false);
+ for (auto PhysDef : PhysDefs)
+ if (!MI->getOperand(PhysDef.first).isDead())
+ CSMI->getOperand(PhysDef.first).setIsDead(false);
// Go through implicit defs of CSMI and MI, and clear the kill flags on
// their uses in all the instructions between CSMI and MI.
@@ -663,9 +677,9 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
// Add physical register defs now coming in from a predecessor to MBB
// livein list.
while (!PhysDefs.empty()) {
- unsigned LiveIn = PhysDefs.pop_back_val();
- if (!MBB->isLiveIn(LiveIn))
- MBB->addLiveIn(LiveIn);
+ auto LiveIn = PhysDefs.pop_back_val();
+ if (!MBB->isLiveIn(LiveIn.second))
+ MBB->addLiveIn(LiveIn.second);
}
++NumCrossBBCSEs;
}
@@ -734,7 +748,7 @@ bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) {
for (MachineDomTreeNode *Node : Scopes) {
MachineBasicBlock *MBB = Node->getBlock();
EnterScope(MBB);
- Changed |= ProcessBlock(MBB);
+ Changed |= ProcessBlockCSE(MBB);
// If it's a leaf node, it's done. Traverse upwards to pop ancestors.
ExitScopeIfDone(Node, OpenChildren);
}
@@ -742,6 +756,104 @@ bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) {
return Changed;
}
+// We use stronger checks for PRE candidate rather than for CSE ones to embrace
+// checks inside ProcessBlockCSE(), not only inside isCSECandidate(). This helps
+// to exclude instrs created by PRE that won't be CSEed later.
+bool MachineCSE::isPRECandidate(MachineInstr *MI) {
+ if (!isCSECandidate(MI) ||
+ MI->isNotDuplicable() ||
+ MI->mayLoad() ||
+ MI->isAsCheapAsAMove() ||
+ MI->getNumDefs() != 1 ||
+ MI->getNumExplicitDefs() != 1)
+ return false;
+
+ for (auto def : MI->defs())
+ if (!TRI->isVirtualRegister(def.getReg()))
+ return false;
+
+ for (auto use : MI->uses())
+ if (use.isReg() && !TRI->isVirtualRegister(use.getReg()))
+ return false;
+
+ return true;
+}
+
+bool MachineCSE::ProcessBlockPRE(MachineDominatorTree *DT,
+ MachineBasicBlock *MBB) {
+ bool Changed = false;
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;) {
+ MachineInstr *MI = &*I;
+ ++I;
+
+ if (!isPRECandidate(MI))
+ continue;
+
+ if (!PREMap.count(MI)) {
+ PREMap[MI] = MBB;
+ continue;
+ }
+
+ auto MBB1 = PREMap[MI];
+ assert(
+ !DT->properlyDominates(MBB, MBB1) &&
+ "MBB cannot properly dominate MBB1 while DFS through dominators tree!");
+ auto CMBB = DT->findNearestCommonDominator(MBB, MBB1);
+ if (!CMBB->isLegalToHoistInto())
+ continue;
+
+ // Two instrs are partial redundant if their basic blocks are reachable
+ // from one to another but one doesn't dominate another.
+ if (CMBB != MBB1) {
+ auto BB = MBB->getBasicBlock(), BB1 = MBB1->getBasicBlock();
+ if (BB != nullptr && BB1 != nullptr &&
+ (isPotentiallyReachable(BB1, BB) ||
+ isPotentiallyReachable(BB, BB1))) {
+
+ assert(MI->getOperand(0).isDef() &&
+ "First operand of instr with one explicit def must be this def");
+ unsigned VReg = MI->getOperand(0).getReg();
+ unsigned NewReg = MRI->cloneVirtualRegister(VReg);
+ if (!isProfitableToCSE(NewReg, VReg, CMBB, MI))
+ continue;
+ MachineInstr &NewMI =
+ TII->duplicate(*CMBB, CMBB->getFirstTerminator(), *MI);
+ NewMI.getOperand(0).setReg(NewReg);
+
+ PREMap[MI] = CMBB;
+ ++NumPREs;
+ Changed = true;
+ }
+ }
+ }
+ return Changed;
+}
+
+// This simple PRE (partial redundancy elimination) pass doesn't actually
+// eliminate partial redundancy but transforms it to full redundancy,
+// anticipating that the next CSE step will eliminate this created redundancy.
+// If CSE doesn't eliminate this, than created instruction will remain dead
+// and eliminated later by Remove Dead Machine Instructions pass.
+bool MachineCSE::PerformSimplePRE(MachineDominatorTree *DT) {
+ SmallVector<MachineDomTreeNode *, 32> BBs;
+
+ PREMap.clear();
+ bool Changed = false;
+ BBs.push_back(DT->getRootNode());
+ do {
+ auto Node = BBs.pop_back_val();
+ const std::vector<MachineDomTreeNode *> &Children = Node->getChildren();
+ for (MachineDomTreeNode *Child : Children)
+ BBs.push_back(Child);
+
+ MachineBasicBlock *MBB = Node->getBlock();
+ Changed |= ProcessBlockPRE(DT, MBB);
+
+ } while (!BBs.empty());
+
+ return Changed;
+}
+
bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
@@ -752,5 +864,8 @@ bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
DT = &getAnalysis<MachineDominatorTree>();
LookAheadLimit = TII->getMachineCSELookAheadLimit();
- return PerformCSE(DT->getRootNode());
+ bool ChangedPRE, ChangedCSE;
+ ChangedPRE = PerformSimplePRE(DT);
+ ChangedCSE = PerformCSE(DT->getRootNode());
+ return ChangedPRE || ChangedCSE;
}
diff --git a/lib/CodeGen/MachineCombiner.cpp b/lib/CodeGen/MachineCombiner.cpp
index f51b482e20e3..0584ec0bd2b3 100644
--- a/lib/CodeGen/MachineCombiner.cpp
+++ b/lib/CodeGen/MachineCombiner.cpp
@@ -1,9 +1,8 @@
//===---- MachineCombiner.cpp - Instcombining on SSA form machine code ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -559,16 +558,15 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
continue;
LLVM_DEBUG(if (dump_intrs) {
- dbgs() << "\tFor the Pattern (" << (int)P << ") these instructions could be removed\n";
- for (auto const *InstrPtr : DelInstrs) {
- dbgs() << "\t\t" << STI->getSchedInfoStr(*InstrPtr) << ": ";
- InstrPtr->print(dbgs(), false, false, false, TII);
- }
+ dbgs() << "\tFor the Pattern (" << (int)P
+ << ") these instructions could be removed\n";
+ for (auto const *InstrPtr : DelInstrs)
+ InstrPtr->print(dbgs(), /*IsStandalone*/false, /*SkipOpers*/false,
+ /*SkipDebugLoc*/false, /*AddNewLine*/true, TII);
dbgs() << "\tThese instructions could replace the removed ones\n";
- for (auto const *InstrPtr : InsInstrs) {
- dbgs() << "\t\t" << STI->getSchedInfoStr(*InstrPtr) << ": ";
- InstrPtr->print(dbgs(), false, false, false, TII);
- }
+ for (auto const *InstrPtr : InsInstrs)
+ InstrPtr->print(dbgs(), /*IsStandalone*/false, /*SkipOpers*/false,
+ /*SkipDebugLoc*/false, /*AddNewLine*/true, TII);
});
bool SubstituteAlways = false;
@@ -641,7 +639,7 @@ bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) {
MLI = &getAnalysis<MachineLoopInfo>();
Traces = &getAnalysis<MachineTraceMetrics>();
MinInstr = nullptr;
- OptSize = MF.getFunction().optForSize();
+ OptSize = MF.getFunction().hasOptSize();
LLVM_DEBUG(dbgs() << getPassName() << ": " << MF.getName() << '\n');
if (!TII->useMachineCombiner()) {
diff --git a/lib/CodeGen/MachineCopyPropagation.cpp b/lib/CodeGen/MachineCopyPropagation.cpp
index 19879fe89007..9fc12ac89e12 100644
--- a/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/lib/CodeGen/MachineCopyPropagation.cpp
@@ -1,9 +1,8 @@
//===- MachineCopyPropagation.cpp - Machine Copy Propagation Pass ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/MachineDominanceFrontier.cpp b/lib/CodeGen/MachineDominanceFrontier.cpp
index b559e4e513a6..6704298c17d6 100644
--- a/lib/CodeGen/MachineDominanceFrontier.cpp
+++ b/lib/CodeGen/MachineDominanceFrontier.cpp
@@ -1,9 +1,8 @@
//===- MachineDominanceFrontier.cpp ---------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/MachineDominators.cpp b/lib/CodeGen/MachineDominators.cpp
index 6b2802626456..1dfba8638c22 100644
--- a/lib/CodeGen/MachineDominators.cpp
+++ b/lib/CodeGen/MachineDominators.cpp
@@ -1,9 +1,8 @@
//===- MachineDominators.cpp - Machine Dominator Calculation --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/MachineFrameInfo.cpp b/lib/CodeGen/MachineFrameInfo.cpp
index 0b316871dbdf..bae3a4333bda 100644
--- a/lib/CodeGen/MachineFrameInfo.cpp
+++ b/lib/CodeGen/MachineFrameInfo.cpp
@@ -1,9 +1,8 @@
//===-- MachineFrameInfo.cpp ---------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -57,7 +56,8 @@ int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment,
!IsSpillSlot, StackID));
int Index = (int)Objects.size() - NumFixedObjects - 1;
assert(Index >= 0 && "Bad frame index!");
- ensureMaxAlignment(Alignment);
+ if (StackID == 0)
+ ensureMaxAlignment(Alignment);
return Index;
}
@@ -92,7 +92,7 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset,
Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment);
Objects.insert(Objects.begin(),
StackObject(Size, Alignment, SPOffset, IsImmutable,
- /*isSpillSlot=*/false, /*Alloca=*/nullptr,
+ /*IsSpillSlot=*/false, /*Alloca=*/nullptr,
IsAliased));
return -++NumFixedObjects;
}
@@ -142,11 +142,15 @@ unsigned MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const {
// should keep in mind that there's tight coupling between the two.
for (int i = getObjectIndexBegin(); i != 0; ++i) {
+ // Only estimate stack size of default stack.
+ if (getStackID(i) != TargetStackID::Default)
+ continue;
int FixedOff = -getObjectOffset(i);
if (FixedOff > Offset) Offset = FixedOff;
}
for (unsigned i = 0, e = getObjectIndexEnd(); i != e; ++i) {
- if (isDeadObjectIndex(i))
+ // Only estimate stack size of live objects on default stack.
+ if (isDeadObjectIndex(i) || getStackID(i) != TargetStackID::Default)
continue;
Offset += getObjectSize(i);
unsigned Align = getObjectAlignment(i);
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index 3495319670a5..4df5ce2dcedc 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -1,9 +1,8 @@
//===- MachineFunction.cpp ------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -44,6 +43,7 @@
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
@@ -165,7 +165,7 @@ void MachineFunction::init() {
!F.hasFnAttribute("no-realign-stack");
FrameInfo = new (Allocator) MachineFrameInfo(
getFnStackAlignment(STI, F), /*StackRealignable=*/CanRealignSP,
- /*ForceRealign=*/CanRealignSP &&
+ /*ForcedRealign=*/CanRealignSP &&
F.hasFnAttribute(Attribute::StackAlignment));
if (F.hasFnAttribute(Attribute::StackAlignment))
@@ -175,7 +175,7 @@ void MachineFunction::init() {
Alignment = STI->getTargetLowering()->getMinFunctionAlignment();
// FIXME: Shouldn't use pref alignment if explicit alignment is set on F.
- // FIXME: Use Function::optForSize().
+ // FIXME: Use Function::hasOptSize().
if (!F.hasFnAttribute(Attribute::OptimizeForSize))
Alignment = std::max(Alignment,
STI->getTargetLowering()->getPrefFunctionAlignment());
@@ -274,6 +274,12 @@ bool MachineFunction::shouldSplitStack() const {
return getFunction().hasFnAttribute("split-stack");
}
+LLVM_NODISCARD unsigned
+MachineFunction::addFrameInst(const MCCFIInstruction &Inst) {
+ FrameInstructions.push_back(Inst);
+ return FrameInstructions.size() - 1;
+}
+
/// This discards all of the MachineBasicBlock numbers and recomputes them.
/// This guarantees that the MBB numbers are sequential, dense, and match the
/// ordering of the blocks within the function. If a specific MachineBasicBlock
@@ -357,6 +363,13 @@ MachineInstr &MachineFunction::CloneMachineInstrBundle(MachineBasicBlock &MBB,
/// ~MachineInstr() destructor must be empty.
void
MachineFunction::DeleteMachineInstr(MachineInstr *MI) {
+ // Verify that a call site info is at valid state. This assertion should
+ // be triggered during the implementation of support for the
+ // call site info of a new architecture. If the assertion is triggered,
+ // back trace will tell where to insert a call to updateCallSiteInfo().
+ assert((!MI->isCall(MachineInstr::IgnoreBundle) ||
+ CallSitesInfo.find(MI) == CallSitesInfo.end()) &&
+ "Call site info was not updated!");
// Strip it for parts. The operand array and the MI object itself are
// independently recyclable.
if (MI->Operands)
@@ -396,19 +409,18 @@ MachineMemOperand *MachineFunction::getMachineMemOperand(
MachineMemOperand *
MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO,
int64_t Offset, uint64_t Size) {
- if (MMO->getValue())
- return new (Allocator)
- MachineMemOperand(MachinePointerInfo(MMO->getValue(),
- MMO->getOffset()+Offset),
- MMO->getFlags(), Size, MMO->getBaseAlignment(),
- AAMDNodes(), nullptr, MMO->getSyncScopeID(),
- MMO->getOrdering(), MMO->getFailureOrdering());
+ const MachinePointerInfo &PtrInfo = MMO->getPointerInfo();
+
+ // If there is no pointer value, the offset isn't tracked so we need to adjust
+ // the base alignment.
+ unsigned Align = PtrInfo.V.isNull()
+ ? MinAlign(MMO->getBaseAlignment(), Offset)
+ : MMO->getBaseAlignment();
+
return new (Allocator)
- MachineMemOperand(MachinePointerInfo(MMO->getPseudoValue(),
- MMO->getOffset()+Offset),
- MMO->getFlags(), Size, MMO->getBaseAlignment(),
- AAMDNodes(), nullptr, MMO->getSyncScopeID(),
- MMO->getOrdering(), MMO->getFailureOrdering());
+ MachineMemOperand(PtrInfo.getWithOffset(Offset), MMO->getFlags(), Size,
+ Align, AAMDNodes(), nullptr, MMO->getSyncScopeID(),
+ MMO->getOrdering(), MMO->getFailureOrdering());
}
MachineMemOperand *
@@ -425,6 +437,15 @@ MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO,
MMO->getOrdering(), MMO->getFailureOrdering());
}
+MachineMemOperand *
+MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO,
+ MachineMemOperand::Flags Flags) {
+ return new (Allocator) MachineMemOperand(
+ MMO->getPointerInfo(), Flags, MMO->getSize(), MMO->getBaseAlignment(),
+ MMO->getAAInfo(), MMO->getRanges(), MMO->getSyncScopeID(),
+ MMO->getOrdering(), MMO->getFailureOrdering());
+}
+
MachineInstr::ExtraInfo *
MachineFunction::createMIExtraInfo(ArrayRef<MachineMemOperand *> MMOs,
MCSymbol *PreInstrSymbol,
@@ -802,6 +823,32 @@ try_next:;
return FilterID;
}
+void MachineFunction::addCodeViewHeapAllocSite(MachineInstr *I, MDNode *MD) {
+ MCSymbol *BeginLabel = Ctx.createTempSymbol("heapallocsite", true);
+ MCSymbol *EndLabel = Ctx.createTempSymbol("heapallocsite", true);
+ I->setPreInstrSymbol(*this, BeginLabel);
+ I->setPostInstrSymbol(*this, EndLabel);
+
+ DIType *DI = dyn_cast<DIType>(MD);
+ CodeViewHeapAllocSites.push_back(std::make_tuple(BeginLabel, EndLabel, DI));
+}
+
+void MachineFunction::updateCallSiteInfo(const MachineInstr *Old,
+ const MachineInstr *New) {
+ if (!Target.Options.EnableDebugEntryValues || Old == New)
+ return;
+
+ assert(Old->isCall() && (!New || New->isCall()) &&
+ "Call site info referes only to call instructions!");
+ CallSiteInfoMap::iterator CSIt = CallSitesInfo.find(Old);
+ if (CSIt == CallSitesInfo.end())
+ return;
+ CallSiteInfo CSInfo = std::move(CSIt->second);
+ CallSitesInfo.erase(CSIt);
+ if (New)
+ CallSitesInfo[New] = CSInfo;
+}
+
/// \}
//===----------------------------------------------------------------------===//
@@ -888,9 +935,11 @@ void MachineJumpTableInfo::print(raw_ostream &OS) const {
OS << "Jump Tables:\n";
for (unsigned i = 0, e = JumpTables.size(); i != e; ++i) {
- OS << printJumpTableEntryReference(i) << ": ";
+ OS << printJumpTableEntryReference(i) << ':';
for (unsigned j = 0, f = JumpTables[i].MBBs.size(); j != f; ++j)
OS << ' ' << printMBBReference(*JumpTables[i].MBBs[j]);
+ if (i != e)
+ OS << '\n';
}
OS << '\n';
diff --git a/lib/CodeGen/MachineFunctionPass.cpp b/lib/CodeGen/MachineFunctionPass.cpp
index 5db4e299fa70..0da4cf3fc90c 100644
--- a/lib/CodeGen/MachineFunctionPass.cpp
+++ b/lib/CodeGen/MachineFunctionPass.cpp
@@ -1,9 +1,8 @@
//===-- MachineFunctionPass.cpp -------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/MachineFunctionPrinterPass.cpp b/lib/CodeGen/MachineFunctionPrinterPass.cpp
index 9c96ba748778..0ea8975cc74c 100644
--- a/lib/CodeGen/MachineFunctionPrinterPass.cpp
+++ b/lib/CodeGen/MachineFunctionPrinterPass.cpp
@@ -1,9 +1,8 @@
//===-- MachineFunctionPrinterPass.cpp ------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index 764a84c7e132..e5c398a2d10c 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -1,9 +1,8 @@
//===- lib/CodeGen/MachineInstr.cpp ---------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -26,6 +25,7 @@
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineInstrBundle.h"
@@ -50,9 +50,9 @@
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/ModuleSlotTracker.h"
+#include "llvm/IR/Operator.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
-#include "llvm/IR/Operator.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSymbol.h"
@@ -225,12 +225,13 @@ void MachineInstr::addOperand(MachineFunction &MF, const MachineOperand &Op) {
}
#ifndef NDEBUG
- bool isMetaDataOp = Op.getType() == MachineOperand::MO_Metadata;
+ bool isDebugOp = Op.getType() == MachineOperand::MO_Metadata ||
+ Op.getType() == MachineOperand::MO_MCSymbol;
// OpNo now points as the desired insertion point. Unless this is a variadic
// instruction, only implicit regs are allowed beyond MCID->getNumOperands().
// RegMask operands go between the explicit and implicit operands.
assert((isImpReg || Op.isRegMask() || MCID->isVariadic() ||
- OpNo < MCID->getNumOperands() || isMetaDataOp) &&
+ OpNo < MCID->getNumOperands() || isDebugOp) &&
"Trying to add an operand to a machine instr that is already done!");
#endif
@@ -512,45 +513,65 @@ void MachineInstr::setPostInstrSymbol(MachineFunction &MF, MCSymbol *Symbol) {
MF.createMIExtraInfo(memoperands(), getPreInstrSymbol(), Symbol));
}
+void MachineInstr::cloneInstrSymbols(MachineFunction &MF,
+ const MachineInstr &MI) {
+ if (this == &MI)
+ // Nothing to do for a self-clone!
+ return;
+
+ assert(&MF == MI.getMF() &&
+ "Invalid machine functions when cloning instruction symbols!");
+
+ setPreInstrSymbol(MF, MI.getPreInstrSymbol());
+ setPostInstrSymbol(MF, MI.getPostInstrSymbol());
+}
+
uint16_t MachineInstr::mergeFlagsWith(const MachineInstr &Other) const {
// For now, the just return the union of the flags. If the flags get more
// complicated over time, we might need more logic here.
return getFlags() | Other.getFlags();
}
-void MachineInstr::copyIRFlags(const Instruction &I) {
+uint16_t MachineInstr::copyFlagsFromInstruction(const Instruction &I) {
+ uint16_t MIFlags = 0;
// Copy the wrapping flags.
if (const OverflowingBinaryOperator *OB =
dyn_cast<OverflowingBinaryOperator>(&I)) {
if (OB->hasNoSignedWrap())
- setFlag(MachineInstr::MIFlag::NoSWrap);
+ MIFlags |= MachineInstr::MIFlag::NoSWrap;
if (OB->hasNoUnsignedWrap())
- setFlag(MachineInstr::MIFlag::NoUWrap);
+ MIFlags |= MachineInstr::MIFlag::NoUWrap;
}
// Copy the exact flag.
if (const PossiblyExactOperator *PE = dyn_cast<PossiblyExactOperator>(&I))
if (PE->isExact())
- setFlag(MachineInstr::MIFlag::IsExact);
+ MIFlags |= MachineInstr::MIFlag::IsExact;
// Copy the fast-math flags.
if (const FPMathOperator *FP = dyn_cast<FPMathOperator>(&I)) {
const FastMathFlags Flags = FP->getFastMathFlags();
if (Flags.noNaNs())
- setFlag(MachineInstr::MIFlag::FmNoNans);
+ MIFlags |= MachineInstr::MIFlag::FmNoNans;
if (Flags.noInfs())
- setFlag(MachineInstr::MIFlag::FmNoInfs);
+ MIFlags |= MachineInstr::MIFlag::FmNoInfs;
if (Flags.noSignedZeros())
- setFlag(MachineInstr::MIFlag::FmNsz);
+ MIFlags |= MachineInstr::MIFlag::FmNsz;
if (Flags.allowReciprocal())
- setFlag(MachineInstr::MIFlag::FmArcp);
+ MIFlags |= MachineInstr::MIFlag::FmArcp;
if (Flags.allowContract())
- setFlag(MachineInstr::MIFlag::FmContract);
+ MIFlags |= MachineInstr::MIFlag::FmContract;
if (Flags.approxFunc())
- setFlag(MachineInstr::MIFlag::FmAfn);
+ MIFlags |= MachineInstr::MIFlag::FmAfn;
if (Flags.allowReassoc())
- setFlag(MachineInstr::MIFlag::FmReassoc);
+ MIFlags |= MachineInstr::MIFlag::FmReassoc;
}
+
+ return MIFlags;
+}
+
+void MachineInstr::copyIRFlags(const Instruction &I) {
+ Flags = copyFlagsFromInstruction(I);
}
bool MachineInstr::hasPropertyInBundle(uint64_t Mask, QueryType Type) const {
@@ -1157,7 +1178,7 @@ bool MachineInstr::isSafeToMove(AliasAnalysis *AA, bool &SawStore) const {
}
if (isPosition() || isDebugInstr() || isTerminator() ||
- hasUnmodeledSideEffects())
+ mayRaiseFPException() || hasUnmodeledSideEffects())
return false;
// See if this instruction does a load. If so, we have to guarantee that the
@@ -1173,8 +1194,8 @@ bool MachineInstr::isSafeToMove(AliasAnalysis *AA, bool &SawStore) const {
return true;
}
-bool MachineInstr::mayAlias(AliasAnalysis *AA, MachineInstr &Other,
- bool UseTBAA) {
+bool MachineInstr::mayAlias(AliasAnalysis *AA, const MachineInstr &Other,
+ bool UseTBAA) const {
const MachineFunction *MF = getMF();
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
const MachineFrameInfo &MFI = MF->getFrameInfo();
@@ -1304,7 +1325,11 @@ bool MachineInstr::isDereferenceableInvariantLoad(AliasAnalysis *AA) const {
const MachineFrameInfo &MFI = getParent()->getParent()->getFrameInfo();
for (MachineMemOperand *MMO : memoperands()) {
- if (MMO->isVolatile()) return false;
+ if (!MMO->isUnordered())
+ // If the memory operand has ordering side effects, we can't move the
+ // instruction. Such an instruction is technically an invariant load,
+ // but the caller code would need updated to expect that.
+ return false;
if (MMO->isStore()) return false;
if (MMO->isInvariant() && MMO->isDereferenceable())
continue;
@@ -1447,7 +1472,7 @@ void MachineInstr::print(raw_ostream &OS, bool IsStandalone, bool SkipOpers,
ModuleSlotTracker MST(M);
if (F)
MST.incorporateFunction(*F);
- print(OS, MST, IsStandalone, SkipOpers, SkipDebugLoc, TII);
+ print(OS, MST, IsStandalone, SkipOpers, SkipDebugLoc, AddNewLine, TII);
}
void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
@@ -1519,6 +1544,8 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
OS << "nsw ";
if (getFlag(MachineInstr::IsExact))
OS << "exact ";
+ if (getFlag(MachineInstr::FPExcept))
+ OS << "fpexcept ";
// Print the opcode name.
if (TII)
@@ -1905,7 +1932,7 @@ void MachineInstr::setRegisterDefReadUndef(unsigned Reg, bool IsUndef) {
void MachineInstr::addRegisterDefined(unsigned Reg,
const TargetRegisterInfo *RegInfo) {
if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
- MachineOperand *MO = findRegisterDefOperand(Reg, false, RegInfo);
+ MachineOperand *MO = findRegisterDefOperand(Reg, false, false, RegInfo);
if (MO)
return;
} else {
@@ -2050,7 +2077,7 @@ static const DIExpression *computeExprForSpill(const MachineInstr &MI) {
const DIExpression *Expr = MI.getDebugExpression();
if (MI.isIndirectDebugValue()) {
assert(MI.getOperand(1).getImm() == 0 && "DBG_VALUE with nonzero offset");
- Expr = DIExpression::prepend(Expr, DIExpression::WithDeref);
+ Expr = DIExpression::prepend(Expr, DIExpression::DerefBefore);
}
return Expr;
}
@@ -2100,3 +2127,54 @@ void MachineInstr::changeDebugValuesDefReg(unsigned Reg) {
for (auto *DBI : DbgValues)
DBI->getOperand(0).setReg(Reg);
}
+
+using MMOList = SmallVector<const MachineMemOperand *, 2>;
+
+static unsigned getSpillSlotSize(MMOList &Accesses,
+ const MachineFrameInfo &MFI) {
+ unsigned Size = 0;
+ for (auto A : Accesses)
+ if (MFI.isSpillSlotObjectIndex(
+ cast<FixedStackPseudoSourceValue>(A->getPseudoValue())
+ ->getFrameIndex()))
+ Size += A->getSize();
+ return Size;
+}
+
+Optional<unsigned>
+MachineInstr::getSpillSize(const TargetInstrInfo *TII) const {
+ int FI;
+ if (TII->isStoreToStackSlotPostFE(*this, FI)) {
+ const MachineFrameInfo &MFI = getMF()->getFrameInfo();
+ if (MFI.isSpillSlotObjectIndex(FI))
+ return (*memoperands_begin())->getSize();
+ }
+ return None;
+}
+
+Optional<unsigned>
+MachineInstr::getFoldedSpillSize(const TargetInstrInfo *TII) const {
+ MMOList Accesses;
+ if (TII->hasStoreToStackSlot(*this, Accesses))
+ return getSpillSlotSize(Accesses, getMF()->getFrameInfo());
+ return None;
+}
+
+Optional<unsigned>
+MachineInstr::getRestoreSize(const TargetInstrInfo *TII) const {
+ int FI;
+ if (TII->isLoadFromStackSlotPostFE(*this, FI)) {
+ const MachineFrameInfo &MFI = getMF()->getFrameInfo();
+ if (MFI.isSpillSlotObjectIndex(FI))
+ return (*memoperands_begin())->getSize();
+ }
+ return None;
+}
+
+Optional<unsigned>
+MachineInstr::getFoldedRestoreSize(const TargetInstrInfo *TII) const {
+ MMOList Accesses;
+ if (TII->hasLoadFromStackSlot(*this, Accesses))
+ return getSpillSlotSize(Accesses, getMF()->getFrameInfo());
+ return None;
+}
diff --git a/lib/CodeGen/MachineInstrBundle.cpp b/lib/CodeGen/MachineInstrBundle.cpp
index ae378cc8c464..32e266e9401e 100644
--- a/lib/CodeGen/MachineInstrBundle.cpp
+++ b/lib/CodeGen/MachineInstrBundle.cpp
@@ -1,9 +1,8 @@
//===-- lib/CodeGen/MachineInstrBundle.cpp --------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index 58fd1f238420..1107e609c258 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -1,9 +1,8 @@
//===- MachineLICM.cpp - Machine Loop Invariant Code Motion Pass ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/MachineLoopInfo.cpp b/lib/CodeGen/MachineLoopInfo.cpp
index 2bce59235057..3b8b430d1b0f 100644
--- a/lib/CodeGen/MachineLoopInfo.cpp
+++ b/lib/CodeGen/MachineLoopInfo.cpp
@@ -1,9 +1,8 @@
//===- MachineLoopInfo.cpp - Natural Loop Calculator ----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp
index 6ef8de88f8b1..aadcd7319799 100644
--- a/lib/CodeGen/MachineModuleInfo.cpp
+++ b/lib/CodeGen/MachineModuleInfo.cpp
@@ -1,9 +1,8 @@
//===-- llvm/CodeGen/MachineModuleInfo.cpp ----------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -206,11 +205,11 @@ MachineModuleInfo::~MachineModuleInfo() = default;
bool MachineModuleInfo::doInitialization(Module &M) {
ObjFileMMI = nullptr;
CurCallSite = 0;
- UsesVAFloatArgument = UsesMorestackAddr = false;
+ UsesMSVCFloatingPoint = UsesMorestackAddr = false;
HasSplitStack = HasNosplitStack = false;
AddrLabelSymbols = nullptr;
TheModule = &M;
- DbgInfoAvailable = !empty(M.debug_compile_units());
+ DbgInfoAvailable = !llvm::empty(M.debug_compile_units());
return false;
}
@@ -328,22 +327,3 @@ char FreeMachineFunction::ID;
FunctionPass *llvm::createFreeMachineFunctionPass() {
return new FreeMachineFunction();
}
-
-//===- MMI building helpers -----------------------------------------------===//
-
-void llvm::computeUsesVAFloatArgument(const CallInst &I,
- MachineModuleInfo &MMI) {
- FunctionType *FT =
- cast<FunctionType>(I.getCalledValue()->getType()->getContainedType(0));
- if (FT->isVarArg() && !MMI.usesVAFloatArgument()) {
- for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
- Type *T = I.getArgOperand(i)->getType();
- for (auto i : post_order(T)) {
- if (i->isFloatingPointTy()) {
- MMI.setUsesVAFloatArgument(true);
- return;
- }
- }
- }
- }
-}
diff --git a/lib/CodeGen/MachineModuleInfoImpls.cpp b/lib/CodeGen/MachineModuleInfoImpls.cpp
index 7b4f64bfe60d..16d24880ebe4 100644
--- a/lib/CodeGen/MachineModuleInfoImpls.cpp
+++ b/lib/CodeGen/MachineModuleInfoImpls.cpp
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/MachineModuleInfoImpls.cpp ----------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/MachineOperand.cpp b/lib/CodeGen/MachineOperand.cpp
index 05e51e1873cf..4fa4ea7f6cf5 100644
--- a/lib/CodeGen/MachineOperand.cpp
+++ b/lib/CodeGen/MachineOperand.cpp
@@ -1,9 +1,8 @@
//===- lib/CodeGen/MachineOperand.cpp -------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -25,6 +24,7 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/IRPrintingPasses.h"
#include "llvm/IR/ModuleSlotTracker.h"
+#include "llvm/MC/MCDwarf.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
#include "llvm/Target/TargetMachine.h"
@@ -181,6 +181,19 @@ void MachineOperand::ChangeToES(const char *SymName,
setTargetFlags(TargetFlags);
}
+void MachineOperand::ChangeToGA(const GlobalValue *GV, int64_t Offset,
+ unsigned char TargetFlags) {
+ assert((!isReg() || !isTied()) &&
+ "Cannot change a tied operand into a global address");
+
+ removeRegFromUses();
+
+ OpKind = MO_GlobalAddress;
+ Contents.OffsetedInfo.Val.GV = GV;
+ setOffset(Offset);
+ setTargetFlags(TargetFlags);
+}
+
void MachineOperand::ChangeToMCSymbol(MCSymbol *Sym) {
assert((!isReg() || !isTied()) &&
"Cannot change a tied operand into an MCSymbol");
@@ -329,7 +342,7 @@ hash_code llvm::hash_value(const MachineOperand &MO) {
switch (MO.getType()) {
case MachineOperand::MO_Register:
// Register operands don't have target flags.
- return hash_combine(MO.getType(), MO.getReg(), MO.getSubReg(), MO.isDef());
+ return hash_combine(MO.getType(), (unsigned)MO.getReg(), MO.getSubReg(), MO.isDef());
case MachineOperand::MO_Immediate:
return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getImm());
case MachineOperand::MO_CImmediate:
@@ -348,7 +361,7 @@ hash_code llvm::hash_value(const MachineOperand &MO) {
return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex());
case MachineOperand::MO_ExternalSymbol:
return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getOffset(),
- MO.getSymbolName());
+ StringRef(MO.getSymbolName()));
case MachineOperand::MO_GlobalAddress:
return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getGlobal(),
MO.getOffset());
@@ -994,7 +1007,7 @@ MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, Flags f,
assert((PtrInfo.V.isNull() || PtrInfo.V.is<const PseudoSourceValue *>() ||
isa<PointerType>(PtrInfo.V.get<const Value *>()->getType())) &&
"invalid pointer value");
- assert(getBaseAlignment() == a && "Alignment is not a power of 2!");
+ assert(getBaseAlignment() == a && a != 0 && "Alignment is not a power of 2!");
assert((isLoad() || isStore()) && "Not a load/store!");
AtomicInfo.SSID = static_cast<unsigned>(SSID);
@@ -1125,7 +1138,7 @@ void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
printLLVMNameWithoutPrefix(
OS, cast<ExternalSymbolPseudoSourceValue>(PVal)->getSymbol());
break;
- case PseudoSourceValue::TargetCustom:
+ default:
// FIXME: This is not necessarily the correct MIR serialization format for
// a custom pseudo source value, but at least it allows
// -print-machineinstrs to work on a target with custom pseudo source
diff --git a/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp b/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
index 906d5560d568..27db9106b337 100644
--- a/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
+++ b/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
@@ -1,9 +1,8 @@
///===- MachineOptimizationRemarkEmitter.cpp - Opt Diagnostic -*- C++ -*---===//
///
-/// The LLVM Compiler Infrastructure
-///
-/// This file is distributed under the University of Illinois Open Source
-/// License. See LICENSE.TXT for details.
+/// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+/// See https://llvm.org/LICENSE.txt for license information.
+/// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
///
///===---------------------------------------------------------------------===//
/// \file
diff --git a/lib/CodeGen/MachineOutliner.cpp b/lib/CodeGen/MachineOutliner.cpp
index ad96c0e579e4..80a235aeaa5c 100644
--- a/lib/CodeGen/MachineOutliner.cpp
+++ b/lib/CodeGen/MachineOutliner.cpp
@@ -1,9 +1,8 @@
//===---- MachineOutliner.cpp - Outline instructions -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -74,8 +73,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include <functional>
-#include <map>
-#include <sstream>
#include <tuple>
#include <vector>
@@ -1095,19 +1092,15 @@ MachineOutliner::createOutlinedFunction(Module &M, OutlinedFunction &OF,
InstructionMapper &Mapper,
unsigned Name) {
- // Create the function name. This should be unique. For now, just hash the
- // module name and include it in the function name plus the number of this
- // function.
- std::ostringstream NameStream;
+ // Create the function name. This should be unique.
// FIXME: We should have a better naming scheme. This should be stable,
// regardless of changes to the outliner's cost model/traversal order.
- NameStream << "OUTLINED_FUNCTION_" << Name;
+ std::string FunctionName = ("OUTLINED_FUNCTION_" + Twine(Name)).str();
// Create the function using an IR-level function.
LLVMContext &C = M.getContext();
- Function *F = dyn_cast<Function>(
- M.getOrInsertFunction(NameStream.str(), Type::getVoidTy(C)));
- assert(F && "Function was null!");
+ Function *F = Function::Create(FunctionType::get(Type::getVoidTy(C), false),
+ Function::ExternalLinkage, FunctionName, M);
// NOTE: If this is linkonceodr, then we can take advantage of linker deduping
// which gives us better results when we outline from linkonceodr functions.
@@ -1205,11 +1198,10 @@ bool MachineOutliner::outline(Module &M,
unsigned OutlinedFunctionNum = 0;
// Sort by benefit. The most beneficial functions should be outlined first.
- std::stable_sort(
- FunctionList.begin(), FunctionList.end(),
- [](const OutlinedFunction &LHS, const OutlinedFunction &RHS) {
- return LHS.getBenefit() > RHS.getBenefit();
- });
+ llvm::stable_sort(FunctionList, [](const OutlinedFunction &LHS,
+ const OutlinedFunction &RHS) {
+ return LHS.getBenefit() > RHS.getBenefit();
+ });
// Walk over each function, outlining them as we go along. Functions are
// outlined greedily, based off the sort above.
@@ -1253,8 +1245,9 @@ bool MachineOutliner::outline(Module &M,
if (MBB.getParent()->getProperties().hasProperty(
MachineFunctionProperties::Property::TracksLiveness)) {
// Helper lambda for adding implicit def operands to the call
- // instruction.
- auto CopyDefs = [&CallInst](MachineInstr &MI) {
+ // instruction. It also updates call site information for moved
+ // code.
+ auto CopyDefsAndUpdateCalls = [&CallInst](MachineInstr &MI) {
for (MachineOperand &MOP : MI.operands()) {
// Skip over anything that isn't a register.
if (!MOP.isReg())
@@ -1266,13 +1259,16 @@ bool MachineOutliner::outline(Module &M,
MOP.getReg(), true, /* isDef = true */
true /* isImp = true */));
}
+ if (MI.isCall())
+ MI.getMF()->updateCallSiteInfo(&MI);
};
// Copy over the defs in the outlined range.
// First inst in outlined range <-- Anything that's defined in this
// ... .. range has to be added as an
// implicit Last inst in outlined range <-- def to the call
- // instruction.
- std::for_each(CallInst, std::next(EndIt), CopyDefs);
+ // instruction. Also remove call site information for outlined block
+ // of code.
+ std::for_each(CallInst, std::next(EndIt), CopyDefsAndUpdateCalls);
}
// Erase from the point after where the call was inserted up to, and
diff --git a/lib/CodeGen/MachinePipeliner.cpp b/lib/CodeGen/MachinePipeliner.cpp
index 4d451bdd7f69..54df522d371a 100644
--- a/lib/CodeGen/MachinePipeliner.cpp
+++ b/lib/CodeGen/MachinePipeliner.cpp
@@ -1,9 +1,8 @@
//===- MachinePipeliner.cpp - Machine Software Pipeliner Pass -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -97,6 +96,14 @@ using namespace llvm;
STATISTIC(NumTrytoPipeline, "Number of loops that we attempt to pipeline");
STATISTIC(NumPipelined, "Number of loops software pipelined");
STATISTIC(NumNodeOrderIssues, "Number of node order issues found");
+STATISTIC(NumFailBranch, "Pipeliner abort due to unknown branch");
+STATISTIC(NumFailLoop, "Pipeliner abort due to unsupported loop");
+STATISTIC(NumFailPreheader, "Pipeliner abort due to missing preheader");
+STATISTIC(NumFailLargeMaxMII, "Pipeliner abort due to MaxMII too large");
+STATISTIC(NumFailZeroMII, "Pipeliner abort due to zero MII");
+STATISTIC(NumFailNoSchedule, "Pipeliner abort due to no schedule found");
+STATISTIC(NumFailZeroStage, "Pipeliner abort due to zero stage");
+STATISTIC(NumFailLargeMaxStage, "Pipeliner abort due to too many stages");
/// A command line option to turn software pipelining on or off.
static cl::opt<bool> EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true),
@@ -141,6 +148,11 @@ static cl::opt<bool> SwpIgnoreRecMII("pipeliner-ignore-recmii",
cl::ReallyHidden, cl::init(false),
cl::ZeroOrMore, cl::desc("Ignore RecMII"));
+static cl::opt<bool> SwpShowResMask("pipeliner-show-mask", cl::Hidden,
+ cl::init(false));
+static cl::opt<bool> SwpDebugResource("pipeliner-dbg-res", cl::Hidden,
+ cl::init(false));
+
namespace llvm {
// A command line option to enable the CopyToPhi DAG mutation.
@@ -180,6 +192,16 @@ bool MachinePipeliner::runOnMachineFunction(MachineFunction &mf) {
!EnableSWPOptSize.getPosition())
return false;
+ if (!mf.getSubtarget().enableMachinePipeliner())
+ return false;
+
+ // Cannot pipeline loops without instruction itineraries if we are using
+ // DFA for the pipeliner.
+ if (mf.getSubtarget().useDFAforSMS() &&
+ (!mf.getSubtarget().getInstrItineraryData() ||
+ mf.getSubtarget().getInstrItineraryData()->isEmpty()))
+ return false;
+
MF = &mf;
MLI = &getAnalysis<MachineLoopInfo>();
MDT = &getAnalysis<MachineDominatorTree>();
@@ -211,8 +233,11 @@ bool MachinePipeliner::scheduleLoop(MachineLoop &L) {
}
#endif
- if (!canPipelineLoop(L))
+ setPragmaPipelineOptions(L);
+ if (!canPipelineLoop(L)) {
+ LLVM_DEBUG(dbgs() << "\n!!! Can not pipeline loop.\n");
return Changed;
+ }
++NumTrytoPipeline;
@@ -221,6 +246,50 @@ bool MachinePipeliner::scheduleLoop(MachineLoop &L) {
return Changed;
}
+void MachinePipeliner::setPragmaPipelineOptions(MachineLoop &L) {
+ MachineBasicBlock *LBLK = L.getTopBlock();
+
+ if (LBLK == nullptr)
+ return;
+
+ const BasicBlock *BBLK = LBLK->getBasicBlock();
+ if (BBLK == nullptr)
+ return;
+
+ const Instruction *TI = BBLK->getTerminator();
+ if (TI == nullptr)
+ return;
+
+ MDNode *LoopID = TI->getMetadata(LLVMContext::MD_loop);
+ if (LoopID == nullptr)
+ return;
+
+ assert(LoopID->getNumOperands() > 0 && "requires atleast one operand");
+ assert(LoopID->getOperand(0) == LoopID && "invalid loop");
+
+ for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) {
+ MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
+
+ if (MD == nullptr)
+ continue;
+
+ MDString *S = dyn_cast<MDString>(MD->getOperand(0));
+
+ if (S == nullptr)
+ continue;
+
+ if (S->getString() == "llvm.loop.pipeline.initiationinterval") {
+ assert(MD->getNumOperands() == 2 &&
+ "Pipeline initiation interval hint metadata should have two operands.");
+ II_setByPragma =
+ mdconst::extract<ConstantInt>(MD->getOperand(1))->getZExtValue();
+ assert(II_setByPragma >= 1 && "Pipeline initiation interval must be positive.");
+ } else if (S->getString() == "llvm.loop.pipeline.disable") {
+ disabledByPragma = true;
+ }
+ }
+}
+
/// Return true if the loop can be software pipelined. The algorithm is
/// restricted to loops with a single basic block. Make sure that the
/// branch in the loop can be analyzed.
@@ -228,21 +297,36 @@ bool MachinePipeliner::canPipelineLoop(MachineLoop &L) {
if (L.getNumBlocks() != 1)
return false;
+ if (disabledByPragma)
+ return false;
+
// Check if the branch can't be understood because we can't do pipelining
// if that's the case.
LI.TBB = nullptr;
LI.FBB = nullptr;
LI.BrCond.clear();
- if (TII->analyzeBranch(*L.getHeader(), LI.TBB, LI.FBB, LI.BrCond))
+ if (TII->analyzeBranch(*L.getHeader(), LI.TBB, LI.FBB, LI.BrCond)) {
+ LLVM_DEBUG(
+ dbgs() << "Unable to analyzeBranch, can NOT pipeline current Loop\n");
+ NumFailBranch++;
return false;
+ }
LI.LoopInductionVar = nullptr;
LI.LoopCompare = nullptr;
- if (TII->analyzeLoop(L, LI.LoopInductionVar, LI.LoopCompare))
+ if (TII->analyzeLoop(L, LI.LoopInductionVar, LI.LoopCompare)) {
+ LLVM_DEBUG(
+ dbgs() << "Unable to analyzeLoop, can NOT pipeline current Loop\n");
+ NumFailLoop++;
return false;
+ }
- if (!L.getLoopPreheader())
+ if (!L.getLoopPreheader()) {
+ LLVM_DEBUG(
+ dbgs() << "Preheader not found, can NOT pipeline current Loop\n");
+ NumFailPreheader++;
return false;
+ }
// Remove any subregisters from inputs to phi nodes.
preprocessPhiNodes(*L.getHeader());
@@ -286,7 +370,8 @@ void MachinePipeliner::preprocessPhiNodes(MachineBasicBlock &B) {
bool MachinePipeliner::swingModuloScheduler(MachineLoop &L) {
assert(L.getBlocks().size() == 1 && "SMS works on single blocks only.");
- SwingSchedulerDAG SMS(*this, L, getAnalysis<LiveIntervals>(), RegClassInfo);
+ SwingSchedulerDAG SMS(*this, L, getAnalysis<LiveIntervals>(), RegClassInfo,
+ II_setByPragma);
MachineBasicBlock *MBB = L.getHeader();
// The kernel should not include any terminator instructions. These
@@ -309,6 +394,20 @@ bool MachinePipeliner::swingModuloScheduler(MachineLoop &L) {
return SMS.hasNewSchedule();
}
+void SwingSchedulerDAG::setMII(unsigned ResMII, unsigned RecMII) {
+ if (II_setByPragma > 0)
+ MII = II_setByPragma;
+ else
+ MII = std::max(ResMII, RecMII);
+}
+
+void SwingSchedulerDAG::setMAX_II() {
+ if (II_setByPragma > 0)
+ MAX_II = II_setByPragma;
+ else
+ MAX_II = MII + 10;
+}
+
/// We override the schedule function in ScheduleDAGInstrs to implement the
/// scheduling part of the Swing Modulo Scheduling algorithm.
void SwingSchedulerDAG::schedule() {
@@ -335,17 +434,28 @@ void SwingSchedulerDAG::schedule() {
if (SwpIgnoreRecMII)
RecMII = 0;
- MII = std::max(ResMII, RecMII);
- LLVM_DEBUG(dbgs() << "MII = " << MII << " (rec=" << RecMII
- << ", res=" << ResMII << ")\n");
+ setMII(ResMII, RecMII);
+ setMAX_II();
+
+ LLVM_DEBUG(dbgs() << "MII = " << MII << " MAX_II = " << MAX_II
+ << " (rec=" << RecMII << ", res=" << ResMII << ")\n");
// Can't schedule a loop without a valid MII.
- if (MII == 0)
+ if (MII == 0) {
+ LLVM_DEBUG(
+ dbgs()
+ << "0 is not a valid Minimal Initiation Interval, can NOT schedule\n");
+ NumFailZeroMII++;
return;
+ }
// Don't pipeline large loops.
- if (SwpMaxMii != -1 && (int)MII > SwpMaxMii)
+ if (SwpMaxMii != -1 && (int)MII > SwpMaxMii) {
+ LLVM_DEBUG(dbgs() << "MII > " << SwpMaxMii
+ << ", we don't pipleline large loops\n");
+ NumFailLargeMaxMII++;
return;
+ }
computeNodeFunctions(NodeSets);
@@ -362,7 +472,7 @@ void SwingSchedulerDAG::schedule() {
}
});
- std::stable_sort(NodeSets.begin(), NodeSets.end(), std::greater<NodeSet>());
+ llvm::stable_sort(NodeSets, std::greater<NodeSet>());
groupRemainingNodes(NodeSets);
@@ -383,17 +493,27 @@ void SwingSchedulerDAG::schedule() {
SMSchedule Schedule(Pass.MF);
Scheduled = schedulePipeline(Schedule);
- if (!Scheduled)
+ if (!Scheduled){
+ LLVM_DEBUG(dbgs() << "No schedule found, return\n");
+ NumFailNoSchedule++;
return;
+ }
unsigned numStages = Schedule.getMaxStageCount();
// No need to generate pipeline if there are no overlapped iterations.
- if (numStages == 0)
+ if (numStages == 0) {
+ LLVM_DEBUG(
+ dbgs() << "No overlapped iterations, no need to generate pipeline\n");
+ NumFailZeroStage++;
return;
-
+ }
// Check that the maximum stage count is less than user-defined limit.
- if (SwpMaxStages > -1 && (int)numStages > SwpMaxStages)
+ if (SwpMaxStages > -1 && (int)numStages > SwpMaxStages) {
+ LLVM_DEBUG(dbgs() << "numStages:" << numStages << ">" << SwpMaxStages
+ << " : too many stages, abort\n");
+ NumFailLargeMaxStage++;
return;
+ }
generatePipelinedLoop(Schedule);
++NumPipelined;
@@ -467,7 +587,8 @@ static bool isSuccOrder(SUnit *SUa, SUnit *SUb) {
/// Return true if the instruction causes a chain between memory
/// references before and after it.
static bool isDependenceBarrier(MachineInstr &MI, AliasAnalysis *AA) {
- return MI.isCall() || MI.hasUnmodeledSideEffects() ||
+ return MI.isCall() || MI.mayRaiseFPException() ||
+ MI.hasUnmodeledSideEffects() ||
(MI.hasOrderedMemoryRef() &&
(!MI.mayLoad() || !MI.isDereferenceableInvariantLoad(AA)));
}
@@ -475,16 +596,16 @@ static bool isDependenceBarrier(MachineInstr &MI, AliasAnalysis *AA) {
/// Return the underlying objects for the memory references of an instruction.
/// This function calls the code in ValueTracking, but first checks that the
/// instruction has a memory operand.
-static void getUnderlyingObjects(MachineInstr *MI,
- SmallVectorImpl<Value *> &Objs,
+static void getUnderlyingObjects(const MachineInstr *MI,
+ SmallVectorImpl<const Value *> &Objs,
const DataLayout &DL) {
if (!MI->hasOneMemOperand())
return;
MachineMemOperand *MM = *MI->memoperands_begin();
if (!MM->getValue())
return;
- GetUnderlyingObjects(const_cast<Value *>(MM->getValue()), Objs, DL);
- for (Value *V : Objs) {
+ GetUnderlyingObjects(MM->getValue(), Objs, DL);
+ for (const Value *V : Objs) {
if (!isIdentifiedObject(V)) {
Objs.clear();
return;
@@ -498,7 +619,7 @@ static void getUnderlyingObjects(MachineInstr *MI,
/// dependence. This code is very similar to the code in ScheduleDAGInstrs
/// but that code doesn't create loop carried dependences.
void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
- MapVector<Value *, SmallVector<SUnit *, 4>> PendingLoads;
+ MapVector<const Value *, SmallVector<SUnit *, 4>> PendingLoads;
Value *UnknownValue =
UndefValue::get(Type::getVoidTy(MF.getFunction().getContext()));
for (auto &SU : SUnits) {
@@ -506,7 +627,7 @@ void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
if (isDependenceBarrier(MI, AA))
PendingLoads.clear();
else if (MI.mayLoad()) {
- SmallVector<Value *, 4> Objs;
+ SmallVector<const Value *, 4> Objs;
getUnderlyingObjects(&MI, Objs, MF.getDataLayout());
if (Objs.empty())
Objs.push_back(UnknownValue);
@@ -515,12 +636,12 @@ void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
SUs.push_back(&SU);
}
} else if (MI.mayStore()) {
- SmallVector<Value *, 4> Objs;
+ SmallVector<const Value *, 4> Objs;
getUnderlyingObjects(&MI, Objs, MF.getDataLayout());
if (Objs.empty())
Objs.push_back(UnknownValue);
for (auto V : Objs) {
- MapVector<Value *, SmallVector<SUnit *, 4>>::iterator I =
+ MapVector<const Value *, SmallVector<SUnit *, 4>>::iterator I =
PendingLoads.find(V);
if (I == PendingLoads.end())
continue;
@@ -531,7 +652,7 @@ void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
// First, perform the cheaper check that compares the base register.
// If they are the same and the load offset is less than the store
// offset, then mark the dependence as loop carried potentially.
- MachineOperand *BaseOp1, *BaseOp2;
+ const MachineOperand *BaseOp1, *BaseOp2;
int64_t Offset1, Offset2;
if (TII->getMemOperandWithOffset(LdMI, BaseOp1, Offset1, TRI) &&
TII->getMemOperandWithOffset(MI, BaseOp2, Offset2, TRI)) {
@@ -744,27 +865,55 @@ namespace {
// the number of functional unit choices.
struct FuncUnitSorter {
const InstrItineraryData *InstrItins;
+ const MCSubtargetInfo *STI;
DenseMap<unsigned, unsigned> Resources;
- FuncUnitSorter(const InstrItineraryData *IID) : InstrItins(IID) {}
+ FuncUnitSorter(const TargetSubtargetInfo &TSI)
+ : InstrItins(TSI.getInstrItineraryData()), STI(&TSI) {}
// Compute the number of functional unit alternatives needed
// at each stage, and take the minimum value. We prioritize the
// instructions by the least number of choices first.
unsigned minFuncUnits(const MachineInstr *Inst, unsigned &F) const {
- unsigned schedClass = Inst->getDesc().getSchedClass();
+ unsigned SchedClass = Inst->getDesc().getSchedClass();
unsigned min = UINT_MAX;
- for (const InstrStage *IS = InstrItins->beginStage(schedClass),
- *IE = InstrItins->endStage(schedClass);
- IS != IE; ++IS) {
- unsigned funcUnits = IS->getUnits();
- unsigned numAlternatives = countPopulation(funcUnits);
- if (numAlternatives < min) {
- min = numAlternatives;
- F = funcUnits;
+ if (InstrItins && !InstrItins->isEmpty()) {
+ for (const InstrStage &IS :
+ make_range(InstrItins->beginStage(SchedClass),
+ InstrItins->endStage(SchedClass))) {
+ unsigned funcUnits = IS.getUnits();
+ unsigned numAlternatives = countPopulation(funcUnits);
+ if (numAlternatives < min) {
+ min = numAlternatives;
+ F = funcUnits;
+ }
}
+ return min;
+ }
+ if (STI && STI->getSchedModel().hasInstrSchedModel()) {
+ const MCSchedClassDesc *SCDesc =
+ STI->getSchedModel().getSchedClassDesc(SchedClass);
+ if (!SCDesc->isValid())
+ // No valid Schedule Class Desc for schedClass, should be
+ // Pseudo/PostRAPseudo
+ return min;
+
+ for (const MCWriteProcResEntry &PRE :
+ make_range(STI->getWriteProcResBegin(SCDesc),
+ STI->getWriteProcResEnd(SCDesc))) {
+ if (!PRE.Cycles)
+ continue;
+ const MCProcResourceDesc *ProcResource =
+ STI->getSchedModel().getProcResource(PRE.ProcResourceIdx);
+ unsigned NumUnits = ProcResource->NumUnits;
+ if (NumUnits < min) {
+ min = NumUnits;
+ F = PRE.ProcResourceIdx;
+ }
+ }
+ return min;
}
- return min;
+ llvm_unreachable("Should have non-empty InstrItins or hasInstrSchedModel!");
}
// Compute the critical resources needed by the instruction. This
@@ -774,13 +923,34 @@ struct FuncUnitSorter {
// the same, highly used, functional unit have high priority.
void calcCriticalResources(MachineInstr &MI) {
unsigned SchedClass = MI.getDesc().getSchedClass();
- for (const InstrStage *IS = InstrItins->beginStage(SchedClass),
- *IE = InstrItins->endStage(SchedClass);
- IS != IE; ++IS) {
- unsigned FuncUnits = IS->getUnits();
- if (countPopulation(FuncUnits) == 1)
- Resources[FuncUnits]++;
+ if (InstrItins && !InstrItins->isEmpty()) {
+ for (const InstrStage &IS :
+ make_range(InstrItins->beginStage(SchedClass),
+ InstrItins->endStage(SchedClass))) {
+ unsigned FuncUnits = IS.getUnits();
+ if (countPopulation(FuncUnits) == 1)
+ Resources[FuncUnits]++;
+ }
+ return;
+ }
+ if (STI && STI->getSchedModel().hasInstrSchedModel()) {
+ const MCSchedClassDesc *SCDesc =
+ STI->getSchedModel().getSchedClassDesc(SchedClass);
+ if (!SCDesc->isValid())
+ // No valid Schedule Class Desc for schedClass, should be
+ // Pseudo/PostRAPseudo
+ return;
+
+ for (const MCWriteProcResEntry &PRE :
+ make_range(STI->getWriteProcResBegin(SCDesc),
+ STI->getWriteProcResEnd(SCDesc))) {
+ if (!PRE.Cycles)
+ continue;
+ Resources[PRE.ProcResourceIdx]++;
+ }
+ return;
}
+ llvm_unreachable("Should have non-empty InstrItins or hasInstrSchedModel!");
}
/// Return true if IS1 has less priority than IS2.
@@ -803,14 +973,15 @@ struct FuncUnitSorter {
/// to add it to each existing DFA, until a legal space is found. If the
/// instruction cannot be reserved in an existing DFA, we create a new one.
unsigned SwingSchedulerDAG::calculateResMII() {
- SmallVector<DFAPacketizer *, 8> Resources;
+
+ LLVM_DEBUG(dbgs() << "calculateResMII:\n");
+ SmallVector<ResourceManager*, 8> Resources;
MachineBasicBlock *MBB = Loop.getHeader();
- Resources.push_back(TII->CreateTargetScheduleState(MF.getSubtarget()));
+ Resources.push_back(new ResourceManager(&MF.getSubtarget()));
// Sort the instructions by the number of available choices for scheduling,
// least to most. Use the number of critical resources as the tie breaker.
- FuncUnitSorter FUS =
- FuncUnitSorter(MF.getSubtarget().getInstrItineraryData());
+ FuncUnitSorter FUS = FuncUnitSorter(MF.getSubtarget());
for (MachineBasicBlock::iterator I = MBB->getFirstNonPHI(),
E = MBB->getFirstTerminator();
I != E; ++I)
@@ -832,33 +1003,40 @@ unsigned SwingSchedulerDAG::calculateResMII() {
// DFA is needed for each cycle.
unsigned NumCycles = getSUnit(MI)->Latency;
unsigned ReservedCycles = 0;
- SmallVectorImpl<DFAPacketizer *>::iterator RI = Resources.begin();
- SmallVectorImpl<DFAPacketizer *>::iterator RE = Resources.end();
+ SmallVectorImpl<ResourceManager *>::iterator RI = Resources.begin();
+ SmallVectorImpl<ResourceManager *>::iterator RE = Resources.end();
+ LLVM_DEBUG({
+ dbgs() << "Trying to reserve resource for " << NumCycles
+ << " cycles for \n";
+ MI->dump();
+ });
for (unsigned C = 0; C < NumCycles; ++C)
while (RI != RE) {
- if ((*RI++)->canReserveResources(*MI)) {
+ if ((*RI)->canReserveResources(*MI)) {
+ (*RI)->reserveResources(*MI);
++ReservedCycles;
break;
}
+ RI++;
}
- // Start reserving resources using existing DFAs.
- for (unsigned C = 0; C < ReservedCycles; ++C) {
- --RI;
- (*RI)->reserveResources(*MI);
- }
+ LLVM_DEBUG(dbgs() << "ReservedCycles:" << ReservedCycles
+ << ", NumCycles:" << NumCycles << "\n");
// Add new DFAs, if needed, to reserve resources.
for (unsigned C = ReservedCycles; C < NumCycles; ++C) {
- DFAPacketizer *NewResource =
- TII->CreateTargetScheduleState(MF.getSubtarget());
+ LLVM_DEBUG(if (SwpDebugResource) dbgs()
+ << "NewResource created to reserve resources"
+ << "\n");
+ ResourceManager *NewResource = new ResourceManager(&MF.getSubtarget());
assert(NewResource->canReserveResources(*MI) && "Reserve error.");
NewResource->reserveResources(*MI);
Resources.push_back(NewResource);
}
}
int Resmii = Resources.size();
+ LLVM_DEBUG(dbgs() << "Retrun Res MII:" << Resmii << "\n");
// Delete the memory for each of the DFAs that were created earlier.
- for (DFAPacketizer *RI : Resources) {
- DFAPacketizer *D = RI;
+ for (ResourceManager *RI : Resources) {
+ ResourceManager *D = RI;
delete D;
}
Resources.clear();
@@ -1517,7 +1695,7 @@ void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) {
}
}
-/// Add the node to the set, and add all is its connected nodes to the set.
+/// Add the node to the set, and add all of its connected nodes to the set.
void SwingSchedulerDAG::addConnectedNodes(SUnit *SU, NodeSet &NewSet,
SetVector<SUnit *> &NodesAdded) {
NewSet.insert(SU);
@@ -1741,12 +1919,16 @@ void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {
/// Process the nodes in the computed order and create the pipelined schedule
/// of the instructions, if possible. Return true if a schedule is found.
bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
- if (NodeOrder.empty())
+
+ if (NodeOrder.empty()){
+ LLVM_DEBUG(dbgs() << "NodeOrder is empty! abort scheduling\n" );
return false;
+ }
bool scheduleFound = false;
+ unsigned II = 0;
// Keep increasing II until a valid schedule is found.
- for (unsigned II = MII; II < MII + 10 && !scheduleFound; ++II) {
+ for (II = MII; II <= MAX_II && !scheduleFound; ++II) {
Schedule.reset();
Schedule.setInitiationInterval(II);
LLVM_DEBUG(dbgs() << "Try to schedule with " << II << "\n");
@@ -1767,13 +1949,14 @@ bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
Schedule.computeStart(SU, &EarlyStart, &LateStart, &SchedEnd, &SchedStart,
II, this);
LLVM_DEBUG({
+ dbgs() << "\n";
dbgs() << "Inst (" << SU->NodeNum << ") ";
SU->getInstr()->dump();
dbgs() << "\n";
});
LLVM_DEBUG({
- dbgs() << "\tes: " << EarlyStart << " ls: " << LateStart
- << " me: " << SchedEnd << " ms: " << SchedStart << "\n";
+ dbgs() << format("\tes: %8x ls: %8x me: %8x ms: %8x\n", EarlyStart,
+ LateStart, SchedEnd, SchedStart);
});
if (EarlyStart > LateStart || SchedEnd < EarlyStart ||
@@ -1818,7 +2001,8 @@ bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
scheduleFound = Schedule.isValidSchedule(this);
}
- LLVM_DEBUG(dbgs() << "Schedule Found? " << scheduleFound << "\n");
+ LLVM_DEBUG(dbgs() << "Schedule Found? " << scheduleFound << " (II=" << II
+ << ")\n");
if (scheduleFound)
Schedule.finalizeSchedule(this);
@@ -1847,6 +2031,10 @@ void SwingSchedulerDAG::generatePipelinedLoop(SMSchedule &Schedule) {
InstrMapTy InstrMap;
SmallVector<MachineBasicBlock *, 4> PrologBBs;
+
+ MachineBasicBlock *PreheaderBB = MLI->getLoopFor(BB)->getLoopPreheader();
+ assert(PreheaderBB != nullptr &&
+ "Need to add code to handle loops w/o preheader");
// Generate the prolog instructions that set up the pipeline.
generateProlog(Schedule, MaxStageCount, KernelBB, VRMap, PrologBBs);
MF.insert(BB->getIterator(), KernelBB);
@@ -1903,7 +2091,7 @@ void SwingSchedulerDAG::generatePipelinedLoop(SMSchedule &Schedule) {
removeDeadInstructions(KernelBB, EpilogBBs);
// Add branches between prolog and epilog blocks.
- addBranches(PrologBBs, KernelBB, EpilogBBs, Schedule, VRMap);
+ addBranches(*PreheaderBB, PrologBBs, KernelBB, EpilogBBs, Schedule, VRMap);
// Remove the original loop since it's no longer referenced.
for (auto &I : *BB)
@@ -2242,7 +2430,7 @@ void SwingSchedulerDAG::generateExistingPhis(
// Use the value defined by the Phi, unless we're generating the first
// epilog and the Phi refers to a Phi in a different stage.
else if (VRMap[PrevStage - np].count(Def) &&
- (!LoopDefIsPhi || PrevStage != LastStageNum))
+ (!LoopDefIsPhi || (PrevStage != LastStageNum) || (LoopValStage == StageScheduled)))
PhiOp2 = VRMap[PrevStage - np][Def];
}
@@ -2588,7 +2776,8 @@ static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming) {
/// Create branches from each prolog basic block to the appropriate epilog
/// block. These edges are needed if the loop ends before reaching the
/// kernel.
-void SwingSchedulerDAG::addBranches(MBBVectorTy &PrologBBs,
+void SwingSchedulerDAG::addBranches(MachineBasicBlock &PreheaderBB,
+ MBBVectorTy &PrologBBs,
MachineBasicBlock *KernelBB,
MBBVectorTy &EpilogBBs,
SMSchedule &Schedule, ValueMapTy *VRMap) {
@@ -2615,8 +2804,8 @@ void SwingSchedulerDAG::addBranches(MBBVectorTy &PrologBBs,
// Check if the LOOP0 has already been removed. If so, then there is no need
// to reduce the trip count.
if (LC != 0)
- LC = TII->reduceLoopCount(*Prolog, IndVar, *Cmp, Cond, PrevInsts, j,
- MaxIter);
+ LC = TII->reduceLoopCount(*Prolog, PreheaderBB, IndVar, *Cmp, Cond,
+ PrevInsts, j, MaxIter);
// Record the value of the first trip count, which is used to determine if
// branches and blocks can be removed for constant trip counts.
@@ -2657,7 +2846,7 @@ void SwingSchedulerDAG::addBranches(MBBVectorTy &PrologBBs,
/// during each iteration. Set Delta to the amount of the change.
bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) {
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
- MachineOperand *BaseOp;
+ const MachineOperand *BaseOp;
int64_t Offset;
if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI))
return false;
@@ -2698,7 +2887,9 @@ void SwingSchedulerDAG::updateMemOperands(MachineInstr &NewMI,
return;
SmallVector<MachineMemOperand *, 2> NewMMOs;
for (MachineMemOperand *MMO : NewMI.memoperands()) {
- if (MMO->isVolatile() || (MMO->isInvariant() && MMO->isDereferenceable()) ||
+ // TODO: Figure out whether isAtomic is really necessary (see D57601).
+ if (MMO->isVolatile() || MMO->isAtomic() ||
+ (MMO->isInvariant() && MMO->isDereferenceable()) ||
(!MMO->getValue())) {
NewMMOs.push_back(MMO);
continue;
@@ -3058,6 +3249,7 @@ bool SwingSchedulerDAG::isLoopCarriedDep(SUnit *Source, const SDep &Dep,
// Assume ordered loads and stores may have a loop carried dependence.
if (SI->hasUnmodeledSideEffects() || DI->hasUnmodeledSideEffects() ||
+ SI->mayRaiseFPException() || DI->mayRaiseFPException() ||
SI->hasOrderedMemoryRef() || DI->hasOrderedMemoryRef())
return true;
@@ -3069,7 +3261,7 @@ bool SwingSchedulerDAG::isLoopCarriedDep(SUnit *Source, const SDep &Dep,
if (!computeDelta(*SI, DeltaS) || !computeDelta(*DI, DeltaD))
return true;
- MachineOperand *BaseOpS, *BaseOpD;
+ const MachineOperand *BaseOpS, *BaseOpD;
int64_t OffsetS, OffsetD;
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
if (!TII->getMemOperandWithOffset(*SI, BaseOpS, OffsetS, TRI) ||
@@ -3097,12 +3289,14 @@ bool SwingSchedulerDAG::isLoopCarriedDep(SUnit *Source, const SDep &Dep,
// This is the main test, which checks the offset values and the loop
// increment value to determine if the accesses may be loop carried.
- if (OffsetS >= OffsetD)
- return OffsetS + AccessSizeS > DeltaS;
- else
- return OffsetD + AccessSizeD > DeltaD;
+ if (AccessSizeS == MemoryLocation::UnknownSize ||
+ AccessSizeD == MemoryLocation::UnknownSize)
+ return true;
- return true;
+ if (DeltaS != DeltaD || DeltaS < AccessSizeS || DeltaD < AccessSizeD)
+ return true;
+
+ return (OffsetS + (int64_t)AccessSizeS < OffsetD + (int64_t)AccessSizeD);
}
void SwingSchedulerDAG::postprocessDAG() {
@@ -3117,6 +3311,10 @@ void SwingSchedulerDAG::postprocessDAG() {
/// the relative values of StartCycle and EndCycle.
bool SMSchedule::insert(SUnit *SU, int StartCycle, int EndCycle, int II) {
bool forward = true;
+ LLVM_DEBUG({
+ dbgs() << "Trying to insert node between " << StartCycle << " and "
+ << EndCycle << " II: " << II << "\n";
+ });
if (StartCycle > EndCycle)
forward = false;
@@ -3125,8 +3323,9 @@ bool SMSchedule::insert(SUnit *SU, int StartCycle, int EndCycle, int II) {
for (int curCycle = StartCycle; curCycle != termCycle;
forward ? ++curCycle : --curCycle) {
- // Add the already scheduled instructions at the specified cycle to the DFA.
- Resources->clearResources();
+ // Add the already scheduled instructions at the specified cycle to the
+ // DFA.
+ ProcItinResources.clearResources();
for (int checkCycle = FirstCycle + ((curCycle - FirstCycle) % II);
checkCycle <= LastCycle; checkCycle += II) {
std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[checkCycle];
@@ -3136,13 +3335,13 @@ bool SMSchedule::insert(SUnit *SU, int StartCycle, int EndCycle, int II) {
I != E; ++I) {
if (ST.getInstrInfo()->isZeroCost((*I)->getInstr()->getOpcode()))
continue;
- assert(Resources->canReserveResources(*(*I)->getInstr()) &&
+ assert(ProcItinResources.canReserveResources(*(*I)->getInstr()) &&
"These instructions have already been scheduled.");
- Resources->reserveResources(*(*I)->getInstr());
+ ProcItinResources.reserveResources(*(*I)->getInstr());
}
}
if (ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()) ||
- Resources->canReserveResources(*SU->getInstr())) {
+ ProcItinResources.canReserveResources(*SU->getInstr())) {
LLVM_DEBUG({
dbgs() << "\tinsert at cycle " << curCycle << " ";
SU->getInstr()->dump();
@@ -3360,6 +3559,14 @@ void SMSchedule::orderDependence(SwingSchedulerDAG *SSD, SUnit *SU,
if (Pos < MoveUse)
MoveUse = Pos;
}
+ // We did not handle HW dependences in previous for loop,
+ // and we normally set Latency = 0 for Anti deps,
+ // so may have nodes in same cycle with Anti denpendent on HW regs.
+ else if (S.getKind() == SDep::Anti && stageScheduled(*I) == StageInst1) {
+ OrderBeforeUse = true;
+ if ((MoveUse == 0) || (Pos < MoveUse))
+ MoveUse = Pos;
+ }
}
for (auto &P : SU->Preds) {
if (P.getSUnit() != *I)
@@ -3523,9 +3730,8 @@ void SwingSchedulerDAG::checkValidNodeOrder(const NodeSetType &Circuits) const {
for (SDep &PredEdge : SU->Preds) {
SUnit *PredSU = PredEdge.getSUnit();
- unsigned PredIndex =
- std::get<1>(*std::lower_bound(Indices.begin(), Indices.end(),
- std::make_pair(PredSU, 0), CompareKey));
+ unsigned PredIndex = std::get<1>(
+ *llvm::lower_bound(Indices, std::make_pair(PredSU, 0), CompareKey));
if (!PredSU->getInstr()->isPHI() && PredIndex < Index) {
PredBefore = true;
Pred = PredSU;
@@ -3535,9 +3741,13 @@ void SwingSchedulerDAG::checkValidNodeOrder(const NodeSetType &Circuits) const {
for (SDep &SuccEdge : SU->Succs) {
SUnit *SuccSU = SuccEdge.getSUnit();
- unsigned SuccIndex =
- std::get<1>(*std::lower_bound(Indices.begin(), Indices.end(),
- std::make_pair(SuccSU, 0), CompareKey));
+ // Do not process a boundary node, it was not included in NodeOrder,
+ // hence not in Indices either, call to std::lower_bound() below will
+ // return Indices.end().
+ if (SuccSU->isBoundaryNode())
+ continue;
+ unsigned SuccIndex = std::get<1>(
+ *llvm::lower_bound(Indices, std::make_pair(SuccSU, 0), CompareKey));
if (!SuccSU->getInstr()->isPHI() && SuccIndex < Index) {
SuccBefore = true;
Succ = SuccSU;
@@ -3548,9 +3758,8 @@ void SwingSchedulerDAG::checkValidNodeOrder(const NodeSetType &Circuits) const {
if (PredBefore && SuccBefore && !SU->getInstr()->isPHI()) {
// instructions in circuits are allowed to be scheduled
// after both a successor and predecessor.
- bool InCircuit = std::any_of(
- Circuits.begin(), Circuits.end(),
- [SU](const NodeSet &Circuit) { return Circuit.count(SU); });
+ bool InCircuit = llvm::any_of(
+ Circuits, [SU](const NodeSet &Circuit) { return Circuit.count(SU); });
if (InCircuit)
LLVM_DEBUG(dbgs() << "In a circuit, predecessor ";);
else {
@@ -3740,5 +3949,140 @@ LLVM_DUMP_METHOD void NodeSet::dump() const { print(dbgs()); }
#endif
+void ResourceManager::initProcResourceVectors(
+ const MCSchedModel &SM, SmallVectorImpl<uint64_t> &Masks) {
+ unsigned ProcResourceID = 0;
+
+ // We currently limit the resource kinds to 64 and below so that we can use
+ // uint64_t for Masks
+ assert(SM.getNumProcResourceKinds() < 64 &&
+ "Too many kinds of resources, unsupported");
+ // Create a unique bitmask for every processor resource unit.
+ // Skip resource at index 0, since it always references 'InvalidUnit'.
+ Masks.resize(SM.getNumProcResourceKinds());
+ for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+ const MCProcResourceDesc &Desc = *SM.getProcResource(I);
+ if (Desc.SubUnitsIdxBegin)
+ continue;
+ Masks[I] = 1ULL << ProcResourceID;
+ ProcResourceID++;
+ }
+ // Create a unique bitmask for every processor resource group.
+ for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+ const MCProcResourceDesc &Desc = *SM.getProcResource(I);
+ if (!Desc.SubUnitsIdxBegin)
+ continue;
+ Masks[I] = 1ULL << ProcResourceID;
+ for (unsigned U = 0; U < Desc.NumUnits; ++U)
+ Masks[I] |= Masks[Desc.SubUnitsIdxBegin[U]];
+ ProcResourceID++;
+ }
+ LLVM_DEBUG({
+ if (SwpShowResMask) {
+ dbgs() << "ProcResourceDesc:\n";
+ for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+ const MCProcResourceDesc *ProcResource = SM.getProcResource(I);
+ dbgs() << format(" %16s(%2d): Mask: 0x%08x, NumUnits:%2d\n",
+ ProcResource->Name, I, Masks[I],
+ ProcResource->NumUnits);
+ }
+ dbgs() << " -----------------\n";
+ }
+ });
+}
+
+bool ResourceManager::canReserveResources(const MCInstrDesc *MID) const {
+
+ LLVM_DEBUG({
+ if (SwpDebugResource)
+ dbgs() << "canReserveResources:\n";
+ });
+ if (UseDFA)
+ return DFAResources->canReserveResources(MID);
+
+ unsigned InsnClass = MID->getSchedClass();
+ const MCSchedClassDesc *SCDesc = SM.getSchedClassDesc(InsnClass);
+ if (!SCDesc->isValid()) {
+ LLVM_DEBUG({
+ dbgs() << "No valid Schedule Class Desc for schedClass!\n";
+ dbgs() << "isPseduo:" << MID->isPseudo() << "\n";
+ });
+ return true;
+ }
+
+ const MCWriteProcResEntry *I = STI->getWriteProcResBegin(SCDesc);
+ const MCWriteProcResEntry *E = STI->getWriteProcResEnd(SCDesc);
+ for (; I != E; ++I) {
+ if (!I->Cycles)
+ continue;
+ const MCProcResourceDesc *ProcResource =
+ SM.getProcResource(I->ProcResourceIdx);
+ unsigned NumUnits = ProcResource->NumUnits;
+ LLVM_DEBUG({
+ if (SwpDebugResource)
+ dbgs() << format(" %16s(%2d): Count: %2d, NumUnits:%2d, Cycles:%2d\n",
+ ProcResource->Name, I->ProcResourceIdx,
+ ProcResourceCount[I->ProcResourceIdx], NumUnits,
+ I->Cycles);
+ });
+ if (ProcResourceCount[I->ProcResourceIdx] >= NumUnits)
+ return false;
+ }
+ LLVM_DEBUG(if (SwpDebugResource) dbgs() << "return true\n\n";);
+ return true;
+}
+
+void ResourceManager::reserveResources(const MCInstrDesc *MID) {
+ LLVM_DEBUG({
+ if (SwpDebugResource)
+ dbgs() << "reserveResources:\n";
+ });
+ if (UseDFA)
+ return DFAResources->reserveResources(MID);
+ unsigned InsnClass = MID->getSchedClass();
+ const MCSchedClassDesc *SCDesc = SM.getSchedClassDesc(InsnClass);
+ if (!SCDesc->isValid()) {
+ LLVM_DEBUG({
+ dbgs() << "No valid Schedule Class Desc for schedClass!\n";
+ dbgs() << "isPseduo:" << MID->isPseudo() << "\n";
+ });
+ return;
+ }
+ for (const MCWriteProcResEntry &PRE :
+ make_range(STI->getWriteProcResBegin(SCDesc),
+ STI->getWriteProcResEnd(SCDesc))) {
+ if (!PRE.Cycles)
+ continue;
+ ++ProcResourceCount[PRE.ProcResourceIdx];
+ LLVM_DEBUG({
+ if (SwpDebugResource) {
+ const MCProcResourceDesc *ProcResource =
+ SM.getProcResource(PRE.ProcResourceIdx);
+ dbgs() << format(" %16s(%2d): Count: %2d, NumUnits:%2d, Cycles:%2d\n",
+ ProcResource->Name, PRE.ProcResourceIdx,
+ ProcResourceCount[PRE.ProcResourceIdx],
+ ProcResource->NumUnits, PRE.Cycles);
+ }
+ });
+ }
+ LLVM_DEBUG({
+ if (SwpDebugResource)
+ dbgs() << "reserveResources: done!\n\n";
+ });
+}
+
+bool ResourceManager::canReserveResources(const MachineInstr &MI) const {
+ return canReserveResources(&MI.getDesc());
+}
+
+void ResourceManager::reserveResources(const MachineInstr &MI) {
+ return reserveResources(&MI.getDesc());
+}
+
+void ResourceManager::clearResources() {
+ if (UseDFA)
+ return DFAResources->clearResources();
+ std::fill(ProcResourceCount.begin(), ProcResourceCount.end(), 0);
+}
diff --git a/lib/CodeGen/MachinePostDominators.cpp b/lib/CodeGen/MachinePostDominators.cpp
index 488377998cb3..7f220ed1fd8f 100644
--- a/lib/CodeGen/MachinePostDominators.cpp
+++ b/lib/CodeGen/MachinePostDominators.cpp
@@ -1,9 +1,8 @@
//===- MachinePostDominators.cpp -Machine Post Dominator Calculation ------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/MachineRegionInfo.cpp b/lib/CodeGen/MachineRegionInfo.cpp
index 2619d8f78276..2961d456be0d 100644
--- a/lib/CodeGen/MachineRegionInfo.cpp
+++ b/lib/CodeGen/MachineRegionInfo.cpp
@@ -1,9 +1,8 @@
//===- lib/Codegen/MachineRegionInfo.cpp ----------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index 6e5ca45d5e5e..f0fd0405d69d 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -1,9 +1,8 @@
//===- lib/Codegen/MachineRegisterInfo.cpp --------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -155,7 +154,7 @@ unsigned MachineRegisterInfo::createIncompleteVirtualRegister(StringRef Name) {
/// createVirtualRegister - Create and return a new virtual register in the
/// function with the specified register class.
///
-unsigned
+Register
MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass,
StringRef Name) {
assert(RegClass && "Cannot create register without RegClass!");
@@ -170,7 +169,7 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass,
return Reg;
}
-unsigned MachineRegisterInfo::cloneVirtualRegister(unsigned VReg,
+Register MachineRegisterInfo::cloneVirtualRegister(Register VReg,
StringRef Name) {
unsigned Reg = createIncompleteVirtualRegister(Name);
VRegInfo[Reg].first = VRegInfo[VReg].first;
@@ -185,7 +184,7 @@ void MachineRegisterInfo::setType(unsigned VReg, LLT Ty) {
VRegToType[VReg] = Ty;
}
-unsigned
+Register
MachineRegisterInfo::createGenericVirtualRegister(LLT Ty, StringRef Name) {
// New virtual register number.
unsigned Reg = createIncompleteVirtualRegister(Name);
@@ -424,6 +423,13 @@ bool MachineRegisterInfo::hasOneNonDBGUse(unsigned RegNo) const {
return ++UI == use_nodbg_end();
}
+bool MachineRegisterInfo::hasOneNonDBGUser(unsigned RegNo) const {
+ use_instr_nodbg_iterator UI = use_instr_nodbg_begin(RegNo);
+ if (UI == use_instr_nodbg_end())
+ return false;
+ return ++UI == use_instr_nodbg_end();
+}
+
/// clearKillFlags - Iterate over all the uses of the given register and
/// clear the kill flag from the MachineOperand. This function is used by
/// optimization passes which extend register lifetimes and need only
diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp
index 542491eabbf2..e8b42047b49f 100644
--- a/lib/CodeGen/MachineSSAUpdater.cpp
+++ b/lib/CodeGen/MachineSSAUpdater.cpp
@@ -1,9 +1,8 @@
//===- MachineSSAUpdater.cpp - Unstructured SSA Update Tool ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index 90dad9d399fe..ae1170ad1be6 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -1,9 +1,8 @@
//===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -487,13 +486,17 @@ getSchedRegions(MachineBasicBlock *MBB,
MachineInstr &MI = *std::prev(I);
if (isSchedBoundary(&MI, &*MBB, MF, TII))
break;
- if (!MI.isDebugInstr())
+ if (!MI.isDebugInstr()) {
// MBB::size() uses instr_iterator to count. Here we need a bundle to
// count as a single instruction.
++NumRegionInstrs;
+ }
}
- Regions.push_back(SchedRegion(I, RegionEnd, NumRegionInstrs));
+ // It's possible we found a scheduling region that only has debug
+ // instructions. Don't bother scheduling these.
+ if (NumRegionInstrs != 0)
+ Regions.push_back(SchedRegion(I, RegionEnd, NumRegionInstrs));
}
if (RegionsTopDown)
@@ -605,23 +608,6 @@ LLVM_DUMP_METHOD void ReadyQueue::dump() const {
// Provide a vtable anchor.
ScheduleDAGMI::~ScheduleDAGMI() = default;
-bool ScheduleDAGMI::canAddEdge(SUnit *SuccSU, SUnit *PredSU) {
- return SuccSU == &ExitSU || !Topo.IsReachable(PredSU, SuccSU);
-}
-
-bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) {
- if (SuccSU != &ExitSU) {
- // Do not use WillCreateCycle, it assumes SD scheduling.
- // If Pred is reachable from Succ, then the edge creates a cycle.
- if (Topo.IsReachable(PredDep.getSUnit(), SuccSU))
- return false;
- Topo.AddPred(SuccSU, PredDep.getSUnit());
- }
- SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial());
- // Return true regardless of whether a new edge needed to be inserted.
- return true;
-}
-
/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
/// NumPredsLeft reaches zero, release the successor node.
///
@@ -762,8 +748,6 @@ void ScheduleDAGMI::schedule() {
// Build the DAG.
buildSchedGraph(AA);
- Topo.InitDAGTopologicalSorting();
-
postprocessDAG();
SmallVector<SUnit*, 8> TopRoots, BotRoots;
@@ -1212,8 +1196,6 @@ void ScheduleDAGMILive::schedule() {
LLVM_DEBUG(SchedImpl->dumpPolicy());
buildDAGWithRegPressure();
- Topo.InitDAGTopologicalSorting();
-
postprocessDAG();
SmallVector<SUnit*, 8> TopRoots, BotRoots;
@@ -1484,10 +1466,10 @@ namespace {
class BaseMemOpClusterMutation : public ScheduleDAGMutation {
struct MemOpInfo {
SUnit *SU;
- MachineOperand *BaseOp;
+ const MachineOperand *BaseOp;
int64_t Offset;
- MemOpInfo(SUnit *su, MachineOperand *Op, int64_t ofs)
+ MemOpInfo(SUnit *su, const MachineOperand *Op, int64_t ofs)
: SU(su), BaseOp(Op), Offset(ofs) {}
bool operator<(const MemOpInfo &RHS) const {
@@ -1533,7 +1515,7 @@ public:
void apply(ScheduleDAGInstrs *DAGInstrs) override;
protected:
- void clusterNeighboringMemOps(ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG);
+ void clusterNeighboringMemOps(ArrayRef<SUnit *> MemOps, ScheduleDAGInstrs *DAG);
};
class StoreClusterMutation : public BaseMemOpClusterMutation {
@@ -1570,10 +1552,10 @@ createStoreClusterDAGMutation(const TargetInstrInfo *TII,
} // end namespace llvm
void BaseMemOpClusterMutation::clusterNeighboringMemOps(
- ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG) {
+ ArrayRef<SUnit *> MemOps, ScheduleDAGInstrs *DAG) {
SmallVector<MemOpInfo, 32> MemOpRecords;
for (SUnit *SU : MemOps) {
- MachineOperand *BaseOp;
+ const MachineOperand *BaseOp;
int64_t Offset;
if (TII->getMemOperandWithOffset(*SU->getInstr(), BaseOp, Offset, TRI))
MemOpRecords.push_back(MemOpInfo(SU, BaseOp, Offset));
@@ -1610,9 +1592,7 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps(
}
/// Callback from DAG postProcessing to create cluster edges for loads.
-void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAGInstrs) {
- ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
-
+void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAG) {
// Map DAG NodeNum to store chain ID.
DenseMap<unsigned, unsigned> StoreChainIDs;
// Map each store chain to a set of dependent MemOps.
@@ -1857,9 +1837,15 @@ SchedBoundary::~SchedBoundary() { delete HazardRec; }
/// Given a Count of resource usage and a Latency value, return true if a
/// SchedBoundary becomes resource limited.
+/// If we are checking after scheduling a node, we should return true when
+/// we just reach the resource limit.
static bool checkResourceLimit(unsigned LFactor, unsigned Count,
- unsigned Latency) {
- return (int)(Count - (Latency * LFactor)) > (int)LFactor;
+ unsigned Latency, bool AfterSchedNode) {
+ int ResCntFactor = (int)(Count - (Latency * LFactor));
+ if (AfterSchedNode)
+ return ResCntFactor >= (int)LFactor;
+ else
+ return ResCntFactor > (int)LFactor;
}
void SchedBoundary::reset() {
@@ -1883,6 +1869,7 @@ void SchedBoundary::reset() {
ZoneCritResIdx = 0;
IsResourceLimited = false;
ReservedCycles.clear();
+ ReservedCyclesIndex.clear();
#ifndef NDEBUG
// Track the maximum number of stall cycles that could arise either from the
// latency of a DAG edge or the number of cycles that a processor resource is
@@ -1921,8 +1908,17 @@ init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
SchedModel = smodel;
Rem = rem;
if (SchedModel->hasInstrSchedModel()) {
- ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds());
- ReservedCycles.resize(SchedModel->getNumProcResourceKinds(), InvalidCycle);
+ unsigned ResourceCount = SchedModel->getNumProcResourceKinds();
+ ReservedCyclesIndex.resize(ResourceCount);
+ ExecutedResCounts.resize(ResourceCount);
+ unsigned NumUnits = 0;
+
+ for (unsigned i = 0; i < ResourceCount; ++i) {
+ ReservedCyclesIndex[i] = NumUnits;
+ NumUnits += SchedModel->getProcResource(i)->NumUnits;
+ }
+
+ ReservedCycles.resize(NumUnits, InvalidCycle);
}
}
@@ -1943,11 +1939,11 @@ unsigned SchedBoundary::getLatencyStallCycles(SUnit *SU) {
return 0;
}
-/// Compute the next cycle at which the given processor resource can be
-/// scheduled.
-unsigned SchedBoundary::
-getNextResourceCycle(unsigned PIdx, unsigned Cycles) {
- unsigned NextUnreserved = ReservedCycles[PIdx];
+/// Compute the next cycle at which the given processor resource unit
+/// can be scheduled.
+unsigned SchedBoundary::getNextResourceCycleByInstance(unsigned InstanceIdx,
+ unsigned Cycles) {
+ unsigned NextUnreserved = ReservedCycles[InstanceIdx];
// If this resource has never been used, always return cycle zero.
if (NextUnreserved == InvalidCycle)
return 0;
@@ -1957,6 +1953,29 @@ getNextResourceCycle(unsigned PIdx, unsigned Cycles) {
return NextUnreserved;
}
+/// Compute the next cycle at which the given processor resource can be
+/// scheduled. Returns the next cycle and the index of the processor resource
+/// instance in the reserved cycles vector.
+std::pair<unsigned, unsigned>
+SchedBoundary::getNextResourceCycle(unsigned PIdx, unsigned Cycles) {
+ unsigned MinNextUnreserved = InvalidCycle;
+ unsigned InstanceIdx = 0;
+ unsigned StartIndex = ReservedCyclesIndex[PIdx];
+ unsigned NumberOfInstances = SchedModel->getProcResource(PIdx)->NumUnits;
+ assert(NumberOfInstances > 0 &&
+ "Cannot have zero instances of a ProcResource");
+
+ for (unsigned I = StartIndex, End = StartIndex + NumberOfInstances; I < End;
+ ++I) {
+ unsigned NextUnreserved = getNextResourceCycleByInstance(I, Cycles);
+ if (MinNextUnreserved > NextUnreserved) {
+ InstanceIdx = I;
+ MinNextUnreserved = NextUnreserved;
+ }
+ }
+ return std::make_pair(MinNextUnreserved, InstanceIdx);
+}
+
/// Does this SU have a hazard within the current instruction group.
///
/// The scheduler supports two modes of hazard recognition. The first is the
@@ -1998,14 +2017,16 @@ bool SchedBoundary::checkHazard(SUnit *SU) {
SchedModel->getWriteProcResEnd(SC))) {
unsigned ResIdx = PE.ProcResourceIdx;
unsigned Cycles = PE.Cycles;
- unsigned NRCycle = getNextResourceCycle(ResIdx, Cycles);
+ unsigned NRCycle, InstanceIdx;
+ std::tie(NRCycle, InstanceIdx) = getNextResourceCycle(ResIdx, Cycles);
if (NRCycle > CurrCycle) {
#ifndef NDEBUG
MaxObservedStall = std::max(Cycles, MaxObservedStall);
#endif
LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") "
- << SchedModel->getResourceName(ResIdx) << "="
- << NRCycle << "c\n");
+ << SchedModel->getResourceName(ResIdx)
+ << '[' << InstanceIdx - ReservedCyclesIndex[ResIdx] << ']'
+ << "=" << NRCycle << "c\n");
return true;
}
}
@@ -2119,7 +2140,7 @@ void SchedBoundary::bumpCycle(unsigned NextCycle) {
CheckPending = true;
IsResourceLimited =
checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
- getScheduledLatency());
+ getScheduledLatency(), true);
LLVM_DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName()
<< '\n');
@@ -2160,10 +2181,12 @@ countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) {
<< "c\n");
}
// For reserved resources, record the highest cycle using the resource.
- unsigned NextAvailable = getNextResourceCycle(PIdx, Cycles);
+ unsigned NextAvailable, InstanceIdx;
+ std::tie(NextAvailable, InstanceIdx) = getNextResourceCycle(PIdx, Cycles);
if (NextAvailable > CurrCycle) {
LLVM_DEBUG(dbgs() << " Resource conflict: "
- << SchedModel->getProcResource(PIdx)->Name
+ << SchedModel->getResourceName(PIdx)
+ << '[' << InstanceIdx - ReservedCyclesIndex[PIdx] << ']'
<< " reserved until @" << NextAvailable << "\n");
}
return NextAvailable;
@@ -2179,6 +2202,8 @@ void SchedBoundary::bumpNode(SUnit *SU) {
HazardRec->Reset();
}
HazardRec->EmitInstruction(SU);
+ // Scheduling an instruction may have made pending instructions available.
+ CheckPending = true;
}
// checkHazard should prevent scheduling multiple instructions per cycle that
// exceed the issue width.
@@ -2251,12 +2276,13 @@ void SchedBoundary::bumpNode(SUnit *SU) {
PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
unsigned PIdx = PI->ProcResourceIdx;
if (SchedModel->getProcResource(PIdx)->BufferSize == 0) {
+ unsigned ReservedUntil, InstanceIdx;
+ std::tie(ReservedUntil, InstanceIdx) = getNextResourceCycle(PIdx, 0);
if (isTop()) {
- ReservedCycles[PIdx] =
- std::max(getNextResourceCycle(PIdx, 0), NextCycle + PI->Cycles);
- }
- else
- ReservedCycles[PIdx] = NextCycle;
+ ReservedCycles[InstanceIdx] =
+ std::max(ReservedUntil, NextCycle + PI->Cycles);
+ } else
+ ReservedCycles[InstanceIdx] = NextCycle;
}
}
}
@@ -2282,7 +2308,7 @@ void SchedBoundary::bumpNode(SUnit *SU) {
// resource limited. If a stall occurred, bumpCycle does this.
IsResourceLimited =
checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
- getScheduledLatency());
+ getScheduledLatency(), true);
// Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle
// resets CurrMOps. Loop to handle instructions with more MOps than issue in
@@ -2501,7 +2527,7 @@ void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA,
RemLatency = computeRemLatency(CurrZone);
RemLatencyComputed = true;
OtherResLimited = checkResourceLimit(SchedModel->getLatencyFactor(),
- OtherCount, RemLatency);
+ OtherCount, RemLatency, false);
}
// Schedule aggressively for latency in PostRA mode. We don't check for
@@ -2741,8 +2767,10 @@ void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
MF.getSubtarget().overrideSchedPolicy(RegionPolicy, NumRegionInstrs);
// After subtarget overrides, apply command line options.
- if (!EnableRegPressure)
+ if (!EnableRegPressure) {
RegionPolicy.ShouldTrackPressure = false;
+ RegionPolicy.ShouldTrackLaneMasks = false;
+ }
// Check -misched-topdown/bottomup can force or unforce scheduling direction.
// e.g. -misched-bottomup=false allows scheduling in both directions.
diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp
index cdc597db6401..41db2c88ce50 100644
--- a/lib/CodeGen/MachineSink.cpp
+++ b/lib/CodeGen/MachineSink.cpp
@@ -1,9 +1,8 @@
//===- MachineSink.cpp - Sinking for machine instructions -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -585,9 +584,8 @@ MachineSinking::GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB,
AllSuccs.push_back(DTChild->getBlock());
// Sort Successors according to their loop depth or block frequency info.
- std::stable_sort(
- AllSuccs.begin(), AllSuccs.end(),
- [this](const MachineBasicBlock *L, const MachineBasicBlock *R) {
+ llvm::stable_sort(
+ AllSuccs, [this](const MachineBasicBlock *L, const MachineBasicBlock *R) {
uint64_t LHSFreq = MBFI ? MBFI->getBlockFreq(L).getFrequency() : 0;
uint64_t RHSFreq = MBFI ? MBFI->getBlockFreq(R).getFrequency() : 0;
bool HasBlockFreq = LHSFreq != 0 && RHSFreq != 0;
@@ -716,7 +714,7 @@ static bool SinkingPreventsImplicitNullCheck(MachineInstr &MI,
!PredBB->getTerminator()->getMetadata(LLVMContext::MD_make_implicit))
return false;
- MachineOperand *BaseOp;
+ const MachineOperand *BaseOp;
int64_t Offset;
if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI))
return false;
@@ -1203,6 +1201,9 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
}
bool PostRAMachineSinking::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(MF.getFunction()))
+ return false;
+
bool Changed = false;
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp
index e62ed3094651..f9505df4e7f4 100644
--- a/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/lib/CodeGen/MachineTraceMetrics.cpp
@@ -1,9 +1,8 @@
//===- lib/CodeGen/MachineTraceMetrics.cpp --------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index 534d3699db29..0ad792ac62cf 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -1,9 +1,8 @@
//===- MachineVerifier.cpp - Machine Code Verifier ------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -219,7 +218,7 @@ namespace {
bool isAllocatable(unsigned Reg) const {
return Reg < TRI->getNumRegs() && TRI->isInAllocatableClass(Reg) &&
- !regsReserved.test(Reg);
+ !regsReserved.test(Reg);
}
// Analysis information if available
@@ -231,6 +230,9 @@ namespace {
void visitMachineFunctionBefore();
void visitMachineBasicBlockBefore(const MachineBasicBlock *MBB);
void visitMachineBundleBefore(const MachineInstr *MI);
+
+ bool verifyVectorElementMatch(LLT Ty0, LLT Ty1, const MachineInstr *MI);
+ void verifyPreISelGenericInstruction(const MachineInstr *MI);
void visitMachineInstrBefore(const MachineInstr *MI);
void visitMachineOperand(const MachineOperand *MO, unsigned MONum);
void visitMachineInstrAfter(const MachineInstr *MI);
@@ -838,7 +840,7 @@ void MachineVerifier::visitMachineBundleBefore(const MachineInstr *MI) {
if (MI->isTerminator() && !TII->isPredicated(*MI)) {
if (!FirstTerminator)
FirstTerminator = MI;
- } else if (FirstTerminator) {
+ } else if (FirstTerminator && !MI->isDebugEntryValue()) {
report("Non-terminator instruction after the first terminator", MI);
errs() << "First terminator was:\t" << *FirstTerminator;
}
@@ -889,109 +891,150 @@ void MachineVerifier::verifyInlineAsm(const MachineInstr *MI) {
}
}
-void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
- const MCInstrDesc &MCID = MI->getDesc();
- if (MI->getNumOperands() < MCID.getNumOperands()) {
- report("Too few operands", MI);
- errs() << MCID.getNumOperands() << " operands expected, but "
- << MI->getNumOperands() << " given.\n";
+/// Check that types are consistent when two operands need to have the same
+/// number of vector elements.
+/// \return true if the types are valid.
+bool MachineVerifier::verifyVectorElementMatch(LLT Ty0, LLT Ty1,
+ const MachineInstr *MI) {
+ if (Ty0.isVector() != Ty1.isVector()) {
+ report("operand types must be all-vector or all-scalar", MI);
+ // Generally we try to report as many issues as possible at once, but in
+ // this case it's not clear what should we be comparing the size of the
+ // scalar with: the size of the whole vector or its lane. Instead of
+ // making an arbitrary choice and emitting not so helpful message, let's
+ // avoid the extra noise and stop here.
+ return false;
}
- if (MI->isPHI()) {
- if (MF->getProperties().hasProperty(
- MachineFunctionProperties::Property::NoPHIs))
- report("Found PHI instruction with NoPHIs property set", MI);
+ if (Ty0.isVector() && Ty0.getNumElements() != Ty1.getNumElements()) {
+ report("operand types must preserve number of vector elements", MI);
+ return false;
+ }
- if (FirstNonPHI)
- report("Found PHI instruction after non-PHI", MI);
- } else if (FirstNonPHI == nullptr)
- FirstNonPHI = MI;
+ return true;
+}
- // Check the tied operands.
- if (MI->isInlineAsm())
- verifyInlineAsm(MI);
+void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
+ if (isFunctionSelected)
+ report("Unexpected generic instruction in a Selected function", MI);
- // Check the MachineMemOperands for basic consistency.
- for (MachineInstr::mmo_iterator I = MI->memoperands_begin(),
- E = MI->memoperands_end();
+ const MCInstrDesc &MCID = MI->getDesc();
+ unsigned NumOps = MI->getNumOperands();
+
+ // Check types.
+ SmallVector<LLT, 4> Types;
+ for (unsigned I = 0, E = std::min(MCID.getNumOperands(), NumOps);
I != E; ++I) {
- if ((*I)->isLoad() && !MI->mayLoad())
- report("Missing mayLoad flag", MI);
- if ((*I)->isStore() && !MI->mayStore())
- report("Missing mayStore flag", MI);
- }
+ if (!MCID.OpInfo[I].isGenericType())
+ continue;
+ // Generic instructions specify type equality constraints between some of
+ // their operands. Make sure these are consistent.
+ size_t TypeIdx = MCID.OpInfo[I].getGenericTypeIndex();
+ Types.resize(std::max(TypeIdx + 1, Types.size()));
+
+ const MachineOperand *MO = &MI->getOperand(I);
+ if (!MO->isReg()) {
+ report("generic instruction must use register operands", MI);
+ continue;
+ }
- // Debug values must not have a slot index.
- // Other instructions must have one, unless they are inside a bundle.
- if (LiveInts) {
- bool mapped = !LiveInts->isNotInMIMap(*MI);
- if (MI->isDebugInstr()) {
- if (mapped)
- report("Debug instruction has a slot index", MI);
- } else if (MI->isInsideBundle()) {
- if (mapped)
- report("Instruction inside bundle has a slot index", MI);
+ LLT OpTy = MRI->getType(MO->getReg());
+ // Don't report a type mismatch if there is no actual mismatch, only a
+ // type missing, to reduce noise:
+ if (OpTy.isValid()) {
+ // Only the first valid type for a type index will be printed: don't
+ // overwrite it later so it's always clear which type was expected:
+ if (!Types[TypeIdx].isValid())
+ Types[TypeIdx] = OpTy;
+ else if (Types[TypeIdx] != OpTy)
+ report("Type mismatch in generic instruction", MO, I, OpTy);
} else {
- if (!mapped)
- report("Missing slot index", MI);
+ // Generic instructions must have types attached to their operands.
+ report("Generic instruction is missing a virtual register type", MO, I);
}
}
- if (isPreISelGenericOpcode(MCID.getOpcode())) {
- if (isFunctionSelected)
- report("Unexpected generic instruction in a Selected function", MI);
-
- // Check types.
- SmallVector<LLT, 4> Types;
- for (unsigned I = 0; I < MCID.getNumOperands(); ++I) {
- if (!MCID.OpInfo[I].isGenericType())
- continue;
- // Generic instructions specify type equality constraints between some of
- // their operands. Make sure these are consistent.
- size_t TypeIdx = MCID.OpInfo[I].getGenericTypeIndex();
- Types.resize(std::max(TypeIdx + 1, Types.size()));
-
- const MachineOperand *MO = &MI->getOperand(I);
- LLT OpTy = MRI->getType(MO->getReg());
- // Don't report a type mismatch if there is no actual mismatch, only a
- // type missing, to reduce noise:
- if (OpTy.isValid()) {
- // Only the first valid type for a type index will be printed: don't
- // overwrite it later so it's always clear which type was expected:
- if (!Types[TypeIdx].isValid())
- Types[TypeIdx] = OpTy;
- else if (Types[TypeIdx] != OpTy)
- report("Type mismatch in generic instruction", MO, I, OpTy);
- } else {
- // Generic instructions must have types attached to their operands.
- report("Generic instruction is missing a virtual register type", MO, I);
- }
- }
-
- // Generic opcodes must not have physical register operands.
- for (unsigned I = 0; I < MI->getNumOperands(); ++I) {
- const MachineOperand *MO = &MI->getOperand(I);
- if (MO->isReg() && TargetRegisterInfo::isPhysicalRegister(MO->getReg()))
- report("Generic instruction cannot have physical register", MO, I);
- }
+ // Generic opcodes must not have physical register operands.
+ for (unsigned I = 0; I < MI->getNumOperands(); ++I) {
+ const MachineOperand *MO = &MI->getOperand(I);
+ if (MO->isReg() && TargetRegisterInfo::isPhysicalRegister(MO->getReg()))
+ report("Generic instruction cannot have physical register", MO, I);
}
+ // Avoid out of bounds in checks below. This was already reported earlier.
+ if (MI->getNumOperands() < MCID.getNumOperands())
+ return;
+
StringRef ErrorInfo;
if (!TII->verifyInstruction(*MI, ErrorInfo))
report(ErrorInfo.data(), MI);
// Verify properties of various specific instruction types
- switch(MI->getOpcode()) {
- default:
+ switch (MI->getOpcode()) {
+ case TargetOpcode::G_CONSTANT:
+ case TargetOpcode::G_FCONSTANT: {
+ if (MI->getNumOperands() < MCID.getNumOperands())
+ break;
+
+ LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+ if (DstTy.isVector())
+ report("Instruction cannot use a vector result type", MI);
+
+ if (MI->getOpcode() == TargetOpcode::G_CONSTANT) {
+ if (!MI->getOperand(1).isCImm()) {
+ report("G_CONSTANT operand must be cimm", MI);
+ break;
+ }
+
+ const ConstantInt *CI = MI->getOperand(1).getCImm();
+ if (CI->getBitWidth() != DstTy.getSizeInBits())
+ report("inconsistent constant size", MI);
+ } else {
+ if (!MI->getOperand(1).isFPImm()) {
+ report("G_FCONSTANT operand must be fpimm", MI);
+ break;
+ }
+ const ConstantFP *CF = MI->getOperand(1).getFPImm();
+
+ if (APFloat::getSizeInBits(CF->getValueAPF().getSemantics()) !=
+ DstTy.getSizeInBits()) {
+ report("inconsistent constant size", MI);
+ }
+ }
+
break;
+ }
case TargetOpcode::G_LOAD:
case TargetOpcode::G_STORE:
+ case TargetOpcode::G_ZEXTLOAD:
+ case TargetOpcode::G_SEXTLOAD: {
+ LLT ValTy = MRI->getType(MI->getOperand(0).getReg());
+ LLT PtrTy = MRI->getType(MI->getOperand(1).getReg());
+ if (!PtrTy.isPointer())
+ report("Generic memory instruction must access a pointer", MI);
+
// Generic loads and stores must have a single MachineMemOperand
// describing that access.
- if (!MI->hasOneMemOperand())
+ if (!MI->hasOneMemOperand()) {
report("Generic instruction accessing memory must have one mem operand",
MI);
+ } else {
+ const MachineMemOperand &MMO = **MI->memoperands_begin();
+ if (MI->getOpcode() == TargetOpcode::G_ZEXTLOAD ||
+ MI->getOpcode() == TargetOpcode::G_SEXTLOAD) {
+ if (MMO.getSizeInBits() >= ValTy.getSizeInBits())
+ report("Generic extload must have a narrower memory type", MI);
+ } else if (MI->getOpcode() == TargetOpcode::G_LOAD) {
+ if (MMO.getSize() > ValTy.getSizeInBytes())
+ report("load memory size cannot exceed result size", MI);
+ } else if (MI->getOpcode() == TargetOpcode::G_STORE) {
+ if (ValTy.getSizeInBytes() < MMO.getSize())
+ report("store memory size cannot exceed value size", MI);
+ }
+ }
+
break;
+ }
case TargetOpcode::G_PHI: {
LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
if (!DstTy.isValid() ||
@@ -1009,6 +1052,70 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
MI);
break;
}
+ case TargetOpcode::G_BITCAST: {
+ LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+ LLT SrcTy = MRI->getType(MI->getOperand(1).getReg());
+ if (!DstTy.isValid() || !SrcTy.isValid())
+ break;
+
+ if (SrcTy.isPointer() != DstTy.isPointer())
+ report("bitcast cannot convert between pointers and other types", MI);
+
+ if (SrcTy.getSizeInBits() != DstTy.getSizeInBits())
+ report("bitcast sizes must match", MI);
+ break;
+ }
+ case TargetOpcode::G_INTTOPTR:
+ case TargetOpcode::G_PTRTOINT:
+ case TargetOpcode::G_ADDRSPACE_CAST: {
+ LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+ LLT SrcTy = MRI->getType(MI->getOperand(1).getReg());
+ if (!DstTy.isValid() || !SrcTy.isValid())
+ break;
+
+ verifyVectorElementMatch(DstTy, SrcTy, MI);
+
+ DstTy = DstTy.getScalarType();
+ SrcTy = SrcTy.getScalarType();
+
+ if (MI->getOpcode() == TargetOpcode::G_INTTOPTR) {
+ if (!DstTy.isPointer())
+ report("inttoptr result type must be a pointer", MI);
+ if (SrcTy.isPointer())
+ report("inttoptr source type must not be a pointer", MI);
+ } else if (MI->getOpcode() == TargetOpcode::G_PTRTOINT) {
+ if (!SrcTy.isPointer())
+ report("ptrtoint source type must be a pointer", MI);
+ if (DstTy.isPointer())
+ report("ptrtoint result type must not be a pointer", MI);
+ } else {
+ assert(MI->getOpcode() == TargetOpcode::G_ADDRSPACE_CAST);
+ if (!SrcTy.isPointer() || !DstTy.isPointer())
+ report("addrspacecast types must be pointers", MI);
+ else {
+ if (SrcTy.getAddressSpace() == DstTy.getAddressSpace())
+ report("addrspacecast must convert different address spaces", MI);
+ }
+ }
+
+ break;
+ }
+ case TargetOpcode::G_GEP: {
+ LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+ LLT PtrTy = MRI->getType(MI->getOperand(1).getReg());
+ LLT OffsetTy = MRI->getType(MI->getOperand(2).getReg());
+ if (!DstTy.isValid() || !PtrTy.isValid() || !OffsetTy.isValid())
+ break;
+
+ if (!PtrTy.getScalarType().isPointer())
+ report("gep first operand must be a pointer", MI);
+
+ if (OffsetTy.getScalarType().isPointer())
+ report("gep offset operand must not be a pointer", MI);
+
+ // TODO: Is the offset allowed to be a scalar with a vector?
+ break;
+ }
case TargetOpcode::G_SEXT:
case TargetOpcode::G_ZEXT:
case TargetOpcode::G_ANYEXT:
@@ -1021,30 +1128,18 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
// instructions aren't guaranteed to have the right number of operands or
// types attached to them at this point
assert(MCID.getNumOperands() == 2 && "Expected 2 operands G_*{EXT,TRUNC}");
- if (MI->getNumOperands() < MCID.getNumOperands())
- break;
LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
LLT SrcTy = MRI->getType(MI->getOperand(1).getReg());
if (!DstTy.isValid() || !SrcTy.isValid())
break;
- LLT DstElTy = DstTy.isVector() ? DstTy.getElementType() : DstTy;
- LLT SrcElTy = SrcTy.isVector() ? SrcTy.getElementType() : SrcTy;
+ LLT DstElTy = DstTy.getScalarType();
+ LLT SrcElTy = SrcTy.getScalarType();
if (DstElTy.isPointer() || SrcElTy.isPointer())
report("Generic extend/truncate can not operate on pointers", MI);
- if (DstTy.isVector() != SrcTy.isVector()) {
- report("Generic extend/truncate must be all-vector or all-scalar", MI);
- // Generally we try to report as many issues as possible at once, but in
- // this case it's not clear what should we be comparing the size of the
- // scalar with: the size of the whole vector or its lane. Instead of
- // making an arbitrary choice and emitting not so helpful message, let's
- // avoid the extra noise and stop here.
- break;
- }
- if (DstTy.isVector() && DstTy.getNumElements() != SrcTy.getNumElements())
- report("Generic vector extend/truncate must preserve number of lanes",
- MI);
+ verifyVectorElementMatch(DstTy, SrcTy, MI);
+
unsigned DstSize = DstElTy.getSizeInBits();
unsigned SrcSize = SrcElTy.getSizeInBits();
switch (MI->getOpcode()) {
@@ -1061,6 +1156,17 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
}
break;
}
+ case TargetOpcode::G_SELECT: {
+ LLT SelTy = MRI->getType(MI->getOperand(0).getReg());
+ LLT CondTy = MRI->getType(MI->getOperand(1).getReg());
+ if (!SelTy.isValid() || !CondTy.isValid())
+ break;
+
+ // Scalar condition select on a vector is valid.
+ if (CondTy.isVector())
+ verifyVectorElementMatch(SelTy, CondTy, MI);
+ break;
+ }
case TargetOpcode::G_MERGE_VALUES: {
// G_MERGE_VALUES should only be used to merge scalars into a larger scalar,
// e.g. s2N = MERGE sN, sN
@@ -1070,6 +1176,16 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
LLT SrcTy = MRI->getType(MI->getOperand(1).getReg());
if (DstTy.isVector() || SrcTy.isVector())
report("G_MERGE_VALUES cannot operate on vectors", MI);
+
+ const unsigned NumOps = MI->getNumOperands();
+ if (DstTy.getSizeInBits() != SrcTy.getSizeInBits() * (NumOps - 1))
+ report("G_MERGE_VALUES result size is inconsistent", MI);
+
+ for (unsigned I = 2; I != NumOps; ++I) {
+ if (MRI->getType(MI->getOperand(I).getReg()) != SrcTy)
+ report("G_MERGE_VALUES source types do not match", MI);
+ }
+
break;
}
case TargetOpcode::G_UNMERGE_VALUES: {
@@ -1092,18 +1208,23 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
// must match the dest vector size.
LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
LLT SrcEltTy = MRI->getType(MI->getOperand(1).getReg());
- if (!DstTy.isVector() || SrcEltTy.isVector())
+ if (!DstTy.isVector() || SrcEltTy.isVector()) {
report("G_BUILD_VECTOR must produce a vector from scalar operands", MI);
+ break;
+ }
+
+ if (DstTy.getElementType() != SrcEltTy)
+ report("G_BUILD_VECTOR result element type must match source type", MI);
+
+ if (DstTy.getNumElements() != MI->getNumOperands() - 1)
+ report("G_BUILD_VECTOR must have an operand for each elemement", MI);
+
for (unsigned i = 2; i < MI->getNumOperands(); ++i) {
if (MRI->getType(MI->getOperand(1).getReg()) !=
MRI->getType(MI->getOperand(i).getReg()))
report("G_BUILD_VECTOR source operand types are not homogeneous", MI);
}
- if (DstTy.getSizeInBits() !=
- SrcEltTy.getSizeInBits() * (MI->getNumOperands() - 1))
- report("G_BUILD_VECTOR src operands total size don't match dest "
- "size.",
- MI);
+
break;
}
case TargetOpcode::G_BUILD_VECTOR_TRUNC: {
@@ -1144,6 +1265,176 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
report("G_CONCAT_VECTOR num dest and source elements should match", MI);
break;
}
+ case TargetOpcode::G_ICMP:
+ case TargetOpcode::G_FCMP: {
+ LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+ LLT SrcTy = MRI->getType(MI->getOperand(2).getReg());
+
+ if ((DstTy.isVector() != SrcTy.isVector()) ||
+ (DstTy.isVector() && DstTy.getNumElements() != SrcTy.getNumElements()))
+ report("Generic vector icmp/fcmp must preserve number of lanes", MI);
+
+ break;
+ }
+ case TargetOpcode::G_EXTRACT: {
+ const MachineOperand &SrcOp = MI->getOperand(1);
+ if (!SrcOp.isReg()) {
+ report("extract source must be a register", MI);
+ break;
+ }
+
+ const MachineOperand &OffsetOp = MI->getOperand(2);
+ if (!OffsetOp.isImm()) {
+ report("extract offset must be a constant", MI);
+ break;
+ }
+
+ unsigned DstSize = MRI->getType(MI->getOperand(0).getReg()).getSizeInBits();
+ unsigned SrcSize = MRI->getType(SrcOp.getReg()).getSizeInBits();
+ if (SrcSize == DstSize)
+ report("extract source must be larger than result", MI);
+
+ if (DstSize + OffsetOp.getImm() > SrcSize)
+ report("extract reads past end of register", MI);
+ break;
+ }
+ case TargetOpcode::G_INSERT: {
+ const MachineOperand &SrcOp = MI->getOperand(2);
+ if (!SrcOp.isReg()) {
+ report("insert source must be a register", MI);
+ break;
+ }
+
+ const MachineOperand &OffsetOp = MI->getOperand(3);
+ if (!OffsetOp.isImm()) {
+ report("insert offset must be a constant", MI);
+ break;
+ }
+
+ unsigned DstSize = MRI->getType(MI->getOperand(0).getReg()).getSizeInBits();
+ unsigned SrcSize = MRI->getType(SrcOp.getReg()).getSizeInBits();
+
+ if (DstSize <= SrcSize)
+ report("inserted size must be smaller than total register", MI);
+
+ if (SrcSize + OffsetOp.getImm() > DstSize)
+ report("insert writes past end of register", MI);
+
+ break;
+ }
+ case TargetOpcode::G_JUMP_TABLE: {
+ if (!MI->getOperand(1).isJTI())
+ report("G_JUMP_TABLE source operand must be a jump table index", MI);
+ LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+ if (!DstTy.isPointer())
+ report("G_JUMP_TABLE dest operand must have a pointer type", MI);
+ break;
+ }
+ case TargetOpcode::G_BRJT: {
+ if (!MRI->getType(MI->getOperand(0).getReg()).isPointer())
+ report("G_BRJT src operand 0 must be a pointer type", MI);
+
+ if (!MI->getOperand(1).isJTI())
+ report("G_BRJT src operand 1 must be a jump table index", MI);
+
+ const auto &IdxOp = MI->getOperand(2);
+ if (!IdxOp.isReg() || MRI->getType(IdxOp.getReg()).isPointer())
+ report("G_BRJT src operand 2 must be a scalar reg type", MI);
+ break;
+ }
+ case TargetOpcode::G_INTRINSIC:
+ case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: {
+ // TODO: Should verify number of def and use operands, but the current
+ // interface requires passing in IR types for mangling.
+ const MachineOperand &IntrIDOp = MI->getOperand(MI->getNumExplicitDefs());
+ if (!IntrIDOp.isIntrinsicID()) {
+ report("G_INTRINSIC first src operand must be an intrinsic ID", MI);
+ break;
+ }
+
+ bool NoSideEffects = MI->getOpcode() == TargetOpcode::G_INTRINSIC;
+ unsigned IntrID = IntrIDOp.getIntrinsicID();
+ if (IntrID != 0 && IntrID < Intrinsic::num_intrinsics) {
+ AttributeList Attrs
+ = Intrinsic::getAttributes(MF->getFunction().getContext(),
+ static_cast<Intrinsic::ID>(IntrID));
+ bool DeclHasSideEffects = !Attrs.hasFnAttribute(Attribute::ReadNone);
+ if (NoSideEffects && DeclHasSideEffects) {
+ report("G_INTRINSIC used with intrinsic that accesses memory", MI);
+ break;
+ }
+ if (!NoSideEffects && !DeclHasSideEffects) {
+ report("G_INTRINSIC_W_SIDE_EFFECTS used with readnone intrinsic", MI);
+ break;
+ }
+ }
+
+ break;
+ }
+ default:
+ break;
+ }
+}
+
+void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (MI->getNumOperands() < MCID.getNumOperands()) {
+ report("Too few operands", MI);
+ errs() << MCID.getNumOperands() << " operands expected, but "
+ << MI->getNumOperands() << " given.\n";
+ }
+
+ if (MI->isPHI()) {
+ if (MF->getProperties().hasProperty(
+ MachineFunctionProperties::Property::NoPHIs))
+ report("Found PHI instruction with NoPHIs property set", MI);
+
+ if (FirstNonPHI)
+ report("Found PHI instruction after non-PHI", MI);
+ } else if (FirstNonPHI == nullptr)
+ FirstNonPHI = MI;
+
+ // Check the tied operands.
+ if (MI->isInlineAsm())
+ verifyInlineAsm(MI);
+
+ // Check the MachineMemOperands for basic consistency.
+ for (MachineInstr::mmo_iterator I = MI->memoperands_begin(),
+ E = MI->memoperands_end();
+ I != E; ++I) {
+ if ((*I)->isLoad() && !MI->mayLoad())
+ report("Missing mayLoad flag", MI);
+ if ((*I)->isStore() && !MI->mayStore())
+ report("Missing mayStore flag", MI);
+ }
+
+ // Debug values must not have a slot index.
+ // Other instructions must have one, unless they are inside a bundle.
+ if (LiveInts) {
+ bool mapped = !LiveInts->isNotInMIMap(*MI);
+ if (MI->isDebugInstr()) {
+ if (mapped)
+ report("Debug instruction has a slot index", MI);
+ } else if (MI->isInsideBundle()) {
+ if (mapped)
+ report("Instruction inside bundle has a slot index", MI);
+ } else {
+ if (!mapped)
+ report("Missing slot index", MI);
+ }
+ }
+
+ if (isPreISelGenericOpcode(MCID.getOpcode())) {
+ verifyPreISelGenericInstruction(MI);
+ return;
+ }
+
+ StringRef ErrorInfo;
+ if (!TII->verifyInstruction(*MI, ErrorInfo))
+ report(ErrorInfo.data(), MI);
+
+ // Verify properties of various specific instruction types
+ switch (MI->getOpcode()) {
case TargetOpcode::COPY: {
if (foundErrors)
break;
@@ -1193,7 +1484,8 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
VerifyStackMapConstant(VarStart + StatepointOpers::NumDeoptOperandsOffset);
// TODO: verify we have properly encoded deopt arguments
- };
+ break;
+ }
}
void
@@ -1356,7 +1648,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
return;
}
if (SubIdx) {
- report("Generic virtual register does not subregister index", MO,
+ report("Generic virtual register does not allow subregister index", MO,
MONum);
return;
}
@@ -1911,6 +2203,10 @@ void MachineVerifier::visitMachineFunctionAfter() {
verifyLiveVariables();
if (LiveInts)
verifyLiveIntervals();
+
+ for (auto CSInfo : MF->getCallSitesInfo())
+ if (!CSInfo.first->isCall())
+ report("Call site info referencing instruction that is not call", MF);
}
void MachineVerifier::verifyLiveVariables() {
diff --git a/lib/CodeGen/MacroFusion.cpp b/lib/CodeGen/MacroFusion.cpp
index 82b6d642c73b..2db1e86905a4 100644
--- a/lib/CodeGen/MacroFusion.cpp
+++ b/lib/CodeGen/MacroFusion.cpp
@@ -1,9 +1,8 @@
//===- MacroFusion.cpp - Macro Fusion -------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -37,7 +36,7 @@ static bool isHazard(const SDep &Dep) {
return Dep.getKind() == SDep::Anti || Dep.getKind() == SDep::Output;
}
-static bool fuseInstructionPair(ScheduleDAGMI &DAG, SUnit &FirstSU,
+static bool fuseInstructionPair(ScheduleDAGInstrs &DAG, SUnit &FirstSU,
SUnit &SecondSU) {
// Check that neither instr is already paired with another along the edge
// between them.
@@ -49,7 +48,7 @@ static bool fuseInstructionPair(ScheduleDAGMI &DAG, SUnit &FirstSU,
if (SI.isCluster())
return false;
// Though the reachability checks above could be made more generic,
- // perhaps as part of ScheduleDAGMI::addEdge(), since such edges are valid,
+ // perhaps as part of ScheduleDAGInstrs::addEdge(), since such edges are valid,
// the extra computation cost makes it less interesting in general cases.
// Create a single weak edge between the adjacent instrs. The only effect is
@@ -118,7 +117,7 @@ namespace {
class MacroFusion : public ScheduleDAGMutation {
ShouldSchedulePredTy shouldScheduleAdjacent;
bool FuseBlock;
- bool scheduleAdjacentImpl(ScheduleDAGMI &DAG, SUnit &AnchorSU);
+ bool scheduleAdjacentImpl(ScheduleDAGInstrs &DAG, SUnit &AnchorSU);
public:
MacroFusion(ShouldSchedulePredTy shouldScheduleAdjacent, bool FuseBlock)
@@ -129,9 +128,7 @@ public:
} // end anonymous namespace
-void MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) {
- ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
-
+void MacroFusion::apply(ScheduleDAGInstrs *DAG) {
if (FuseBlock)
// For each of the SUnits in the scheduling block, try to fuse the instr in
// it with one in its predecessors.
@@ -145,7 +142,7 @@ void MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) {
/// Implement the fusion of instr pairs in the scheduling DAG,
/// anchored at the instr in AnchorSU..
-bool MacroFusion::scheduleAdjacentImpl(ScheduleDAGMI &DAG, SUnit &AnchorSU) {
+bool MacroFusion::scheduleAdjacentImpl(ScheduleDAGInstrs &DAG, SUnit &AnchorSU) {
const MachineInstr &AnchorMI = *AnchorSU.getInstr();
const TargetInstrInfo &TII = *DAG.TII;
const TargetSubtargetInfo &ST = DAG.MF.getSubtarget();
diff --git a/lib/CodeGen/OptimizePHIs.cpp b/lib/CodeGen/OptimizePHIs.cpp
index 770f6c5b0403..c70b62252139 100644
--- a/lib/CodeGen/OptimizePHIs.cpp
+++ b/lib/CodeGen/OptimizePHIs.cpp
@@ -1,9 +1,8 @@
//===- OptimizePHIs.cpp - Optimize machine instruction PHIs ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -182,11 +181,12 @@ bool OptimizePHIs::OptimizeBB(MachineBasicBlock &MBB) {
if (!MRI->constrainRegClass(SingleValReg, MRI->getRegClass(OldReg)))
continue;
- // for the case SingleValReg taken from copy instr
- MRI->clearKillFlags(SingleValReg);
-
MRI->replaceRegWith(OldReg, SingleValReg);
MI->eraseFromParent();
+
+ // The kill flags on OldReg and SingleValReg may no longer be correct.
+ MRI->clearKillFlags(SingleValReg);
+
++NumPHICycles;
Changed = true;
continue;
diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp
index b9801c6fd97b..948a5835438c 100644
--- a/lib/CodeGen/PHIElimination.cpp
+++ b/lib/CodeGen/PHIElimination.cpp
@@ -1,9 +1,8 @@
//===- PhiElimination.cpp - Eliminate PHI nodes by inserting copies -------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/PHIEliminationUtils.cpp b/lib/CodeGen/PHIEliminationUtils.cpp
index 4e67ff2e5088..3a2cdaf3bd3c 100644
--- a/lib/CodeGen/PHIEliminationUtils.cpp
+++ b/lib/CodeGen/PHIEliminationUtils.cpp
@@ -1,9 +1,8 @@
//===-- PHIEliminationUtils.cpp - Helper functions for PHI elimination ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/PHIEliminationUtils.h b/lib/CodeGen/PHIEliminationUtils.h
index b997d7ac5f4f..0ff3a41f47d3 100644
--- a/lib/CodeGen/PHIEliminationUtils.h
+++ b/lib/CodeGen/PHIEliminationUtils.h
@@ -1,9 +1,8 @@
//=- PHIEliminationUtils.h - Helper functions for PHI elimination -*- C++ -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/ParallelCG.cpp b/lib/CodeGen/ParallelCG.cpp
index bc3f2a6e9b5a..e4c73658cb4f 100644
--- a/lib/CodeGen/ParallelCG.cpp
+++ b/lib/CodeGen/ParallelCG.cpp
@@ -1,9 +1,8 @@
//===-- ParallelCG.cpp ----------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/PatchableFunction.cpp b/lib/CodeGen/PatchableFunction.cpp
index afb4b0a7e174..a3fa1b0ad8ed 100644
--- a/lib/CodeGen/PatchableFunction.cpp
+++ b/lib/CodeGen/PatchableFunction.cpp
@@ -1,9 +1,8 @@
//===-- PatchableFunction.cpp - Patchable prologues for LLVM -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp
index 1d058ccfb633..b918396aa8c5 100644
--- a/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/lib/CodeGen/PeepholeOptimizer.cpp
@@ -1,9 +1,8 @@
//===- PeepholeOptimizer.cpp - Peephole Optimizations ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -1307,7 +1306,7 @@ bool PeepholeOptimizer::optimizeUncoalescableCopy(
/// Check whether MI is a candidate for folding into a later instruction.
/// We only fold loads to virtual registers and the virtual register defined
-/// has a single use.
+/// has a single user.
bool PeepholeOptimizer::isLoadFoldable(
MachineInstr &MI, SmallSet<unsigned, 16> &FoldAsLoadDefCandidates) {
if (!MI.canFoldAsLoad() || !MI.mayLoad())
@@ -1317,12 +1316,12 @@ bool PeepholeOptimizer::isLoadFoldable(
return false;
unsigned Reg = MI.getOperand(0).getReg();
- // To reduce compilation time, we check MRI->hasOneNonDBGUse when inserting
+ // To reduce compilation time, we check MRI->hasOneNonDBGUser when inserting
// loads. It should be checked when processing uses of the load, since
// uses can be removed during peephole.
if (!MI.getOperand(0).getSubReg() &&
TargetRegisterInfo::isVirtualRegister(Reg) &&
- MRI->hasOneNonDBGUse(Reg)) {
+ MRI->hasOneNonDBGUser(Reg)) {
FoldAsLoadDefCandidates.insert(Reg);
return true;
}
@@ -1778,6 +1777,8 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
LocalMIs.erase(MI);
LocalMIs.erase(DefMI);
LocalMIs.insert(FoldMI);
+ if (MI->isCall())
+ MI->getMF()->updateCallSiteInfo(MI, FoldMI);
MI->eraseFromParent();
DefMI->eraseFromParent();
MRI->markUsesInDebugValueAsUndef(FoldedReg);
@@ -1826,7 +1827,7 @@ ValueTrackerResult ValueTracker::getNextSourceFromBitcast() {
assert(Def->isBitcast() && "Invalid definition");
// Bail if there are effects that a plain copy will not expose.
- if (Def->hasUnmodeledSideEffects())
+ if (Def->mayRaiseFPException() || Def->hasUnmodeledSideEffects())
return ValueTrackerResult();
// Bitcasts with more than one def are not supported.
@@ -1901,13 +1902,8 @@ ValueTrackerResult ValueTracker::getNextSourceFromRegSequence() {
// Def = REG_SEQUENCE v0, sub0, v1, sub1, ...
// Check if one of the operand defines the subreg we are interested in.
for (const RegSubRegPairAndIdx &RegSeqInput : RegSeqInputRegs) {
- if (RegSeqInput.SubIdx == DefSubReg) {
- if (RegSeqInput.SubReg)
- // Bail if we have to compose sub registers.
- return ValueTrackerResult();
-
+ if (RegSeqInput.SubIdx == DefSubReg)
return ValueTrackerResult(RegSeqInput.Reg, RegSeqInput.SubReg);
- }
}
// If the subreg we are tracking is super-defined by another subreg,
diff --git a/lib/CodeGen/PostRAHazardRecognizer.cpp b/lib/CodeGen/PostRAHazardRecognizer.cpp
index f9d4a9746e41..0a3838617bc5 100644
--- a/lib/CodeGen/PostRAHazardRecognizer.cpp
+++ b/lib/CodeGen/PostRAHazardRecognizer.cpp
@@ -1,9 +1,8 @@
//===----- PostRAHazardRecognizer.cpp - hazard recognizer -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index dd0a5fe1b39d..5bea9f2893c9 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -1,9 +1,8 @@
//===----- SchedulePostRAList.cpp - list scheduler ------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/PreISelIntrinsicLowering.cpp b/lib/CodeGen/PreISelIntrinsicLowering.cpp
index b0e9ac03612d..2752e186875c 100644
--- a/lib/CodeGen/PreISelIntrinsicLowering.cpp
+++ b/lib/CodeGen/PreISelIntrinsicLowering.cpp
@@ -1,9 +1,8 @@
//===- PreISelIntrinsicLowering.cpp - Pre-ISel intrinsic lowering pass ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -45,7 +44,7 @@ static bool lowerLoadRelative(Function &F) {
Value *OffsetPtr =
B.CreateGEP(Int8Ty, CI->getArgOperand(0), CI->getArgOperand(1));
Value *OffsetPtrI32 = B.CreateBitCast(OffsetPtr, Int32PtrTy);
- Value *OffsetI32 = B.CreateAlignedLoad(OffsetPtrI32, 4);
+ Value *OffsetI32 = B.CreateAlignedLoad(Int32Ty, OffsetPtrI32, 4);
Value *ResultPtr = B.CreateGEP(Int8Ty, CI->getArgOperand(0), OffsetI32);
@@ -65,9 +64,9 @@ static bool lowerObjCCall(Function &F, const char *NewFn,
// If we haven't already looked up this function, check to see if the
// program already contains a function with this name.
Module *M = F.getParent();
- Constant* FCache = M->getOrInsertFunction(NewFn, F.getFunctionType());
+ FunctionCallee FCache = M->getOrInsertFunction(NewFn, F.getFunctionType());
- if (Function* Fn = dyn_cast<Function>(FCache)) {
+ if (Function *Fn = dyn_cast<Function>(FCache.getCallee())) {
Fn->setLinkage(F.getLinkage());
if (setNonLazyBind && !Fn->isWeakForLinker()) {
// If we have Native ARC, set nonlazybind attribute for these APIs for
diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp
index 7e9b4af12ee9..b38987ad1c90 100644
--- a/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -1,9 +1,8 @@
//===---------------------- ProcessImplicitDefs.cpp -----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index 23754e487a18..d463bee67595 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -1,9 +1,8 @@
//===- PrologEpilogInserter.cpp - Insert Prolog/Epilog code in function ---===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -32,6 +31,7 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
@@ -169,6 +169,46 @@ void PEI::getAnalysisUsage(AnalysisUsage &AU) const {
/// StackObjSet - A set of stack object indexes
using StackObjSet = SmallSetVector<int, 8>;
+using SavedDbgValuesMap =
+ SmallDenseMap<MachineBasicBlock *, SmallVector<MachineInstr *, 4>, 4>;
+
+/// Stash DBG_VALUEs that describe parameters and which are placed at the start
+/// of the block. Later on, after the prologue code has been emitted, the
+/// stashed DBG_VALUEs will be reinserted at the start of the block.
+static void stashEntryDbgValues(MachineBasicBlock &MBB,
+ SavedDbgValuesMap &EntryDbgValues) {
+ SmallVector<const MachineInstr *, 4> FrameIndexValues;
+
+ for (auto &MI : MBB) {
+ if (!MI.isDebugInstr())
+ break;
+ if (!MI.isDebugValue() || !MI.getDebugVariable()->isParameter())
+ continue;
+ if (MI.getOperand(0).isFI()) {
+ // We can only emit valid locations for frame indices after the frame
+ // setup, so do not stash away them.
+ FrameIndexValues.push_back(&MI);
+ continue;
+ }
+ const DILocalVariable *Var = MI.getDebugVariable();
+ const DIExpression *Expr = MI.getDebugExpression();
+ auto Overlaps = [Var, Expr](const MachineInstr *DV) {
+ return Var == DV->getDebugVariable() &&
+ Expr->fragmentsOverlap(DV->getDebugExpression());
+ };
+ // See if the debug value overlaps with any preceding debug value that will
+ // not be stashed. If that is the case, then we can't stash this value, as
+ // we would then reorder the values at reinsertion.
+ if (llvm::none_of(FrameIndexValues, Overlaps))
+ EntryDbgValues[&MBB].push_back(&MI);
+ }
+
+ // Remove stashed debug values from the block.
+ if (EntryDbgValues.count(&MBB))
+ for (auto *MI : EntryDbgValues[&MBB])
+ MI->removeFromParent();
+}
+
/// runOnMachineFunction - Insert prolog/epilog code and replace abstract
/// frame indexes with appropriate references.
bool PEI::runOnMachineFunction(MachineFunction &MF) {
@@ -179,8 +219,6 @@ bool PEI::runOnMachineFunction(MachineFunction &MF) {
RS = TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : nullptr;
FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(MF);
- FrameIndexEliminationScavenging = (RS && !FrameIndexVirtualScavenging) ||
- TRI->requiresFrameIndexReplacementScavenging(MF);
ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
// Calculate the MaxCallFrameSize and AdjustsStack variables for the
@@ -192,6 +230,11 @@ bool PEI::runOnMachineFunction(MachineFunction &MF) {
// place all spills in the entry block, all restores in return blocks.
calculateSaveRestoreBlocks(MF);
+ // Stash away DBG_VALUEs that should not be moved by insertion of prolog code.
+ SavedDbgValuesMap EntryDbgValues;
+ for (MachineBasicBlock *SaveBlock : SaveBlocks)
+ stashEntryDbgValues(*SaveBlock, EntryDbgValues);
+
// Handle CSR spilling and restoring, for targets that need it.
if (MF.getTarget().usesPhysRegsForPEI())
spillCalleeSavedRegs(MF);
@@ -211,6 +254,10 @@ bool PEI::runOnMachineFunction(MachineFunction &MF) {
if (!F.hasFnAttribute(Attribute::Naked))
insertPrologEpilogCode(MF);
+ // Reinsert stashed debug values at the start of the entry blocks.
+ for (auto &I : EntryDbgValues)
+ I.first->insert(I.first->begin(), I.second.begin(), I.second.end());
+
// Replace all MO_FrameIndex operands with physical register references
// and actual offsets.
//
@@ -495,9 +542,16 @@ static void insertCSRSaves(MachineBasicBlock &SaveBlock,
for (const CalleeSavedInfo &CS : CSI) {
// Insert the spill to the stack frame.
unsigned Reg = CS.getReg();
- const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
- TII.storeRegToStackSlot(SaveBlock, I, Reg, true, CS.getFrameIdx(), RC,
- TRI);
+
+ if (CS.isSpilledToReg()) {
+ BuildMI(SaveBlock, I, DebugLoc(),
+ TII.get(TargetOpcode::COPY), CS.getDstReg())
+ .addReg(Reg, getKillRegState(true));
+ } else {
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.storeRegToStackSlot(SaveBlock, I, Reg, true, CS.getFrameIdx(), RC,
+ TRI);
+ }
}
}
}
@@ -517,12 +571,17 @@ static void insertCSRRestores(MachineBasicBlock &RestoreBlock,
if (!TFI->restoreCalleeSavedRegisters(RestoreBlock, I, CSI, TRI)) {
for (const CalleeSavedInfo &CI : reverse(CSI)) {
unsigned Reg = CI.getReg();
- const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
- TII.loadRegFromStackSlot(RestoreBlock, I, Reg, CI.getFrameIdx(), RC, TRI);
- assert(I != RestoreBlock.begin() &&
- "loadRegFromStackSlot didn't insert any code!");
- // Insert in reverse order. loadRegFromStackSlot can insert
- // multiple instructions.
+ if (CI.isSpilledToReg()) {
+ BuildMI(RestoreBlock, I, DebugLoc(), TII.get(TargetOpcode::COPY), Reg)
+ .addReg(CI.getDstReg(), getKillRegState(true));
+ } else {
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.loadRegFromStackSlot(RestoreBlock, I, Reg, CI.getFrameIdx(), RC, TRI);
+ assert(I != RestoreBlock.begin() &&
+ "loadRegFromStackSlot didn't insert any code!");
+ // Insert in reverse order. loadRegFromStackSlot can insert
+ // multiple instructions.
+ }
}
}
}
@@ -615,10 +674,13 @@ computeFreeStackSlots(MachineFrameInfo &MFI, bool StackGrowsDown,
SmallVector<int, 16> AllocatedFrameSlots;
// Add fixed objects.
for (int i = MFI.getObjectIndexBegin(); i != 0; ++i)
- AllocatedFrameSlots.push_back(i);
+ // StackSlot scavenging is only implemented for the default stack.
+ if (MFI.getStackID(i) == TargetStackID::Default)
+ AllocatedFrameSlots.push_back(i);
// Add callee-save objects.
for (int i = MinCSFrameIndex; i <= (int)MaxCSFrameIndex; ++i)
- AllocatedFrameSlots.push_back(i);
+ if (MFI.getStackID(i) == TargetStackID::Default)
+ AllocatedFrameSlots.push_back(i);
for (int i : AllocatedFrameSlots) {
// These are converted from int64_t, but they should always fit in int
@@ -740,11 +802,23 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
// Skew to be applied to alignment.
unsigned Skew = TFI.getStackAlignmentSkew(MF);
+#ifdef EXPENSIVE_CHECKS
+ for (unsigned i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i)
+ if (!MFI.isDeadObjectIndex(i) &&
+ MFI.getStackID(i) == TargetStackID::Default)
+ assert(MFI.getObjectAlignment(i) <= MFI.getMaxAlignment() &&
+ "MaxAlignment is invalid");
+#endif
+
// If there are fixed sized objects that are preallocated in the local area,
// non-fixed objects can't be allocated right at the start of local area.
// Adjust 'Offset' to point to the end of last fixed sized preallocated
// object.
for (int i = MFI.getObjectIndexBegin(); i != 0; ++i) {
+ if (MFI.getStackID(i) !=
+ TargetStackID::Default) // Only allocate objects on the default stack.
+ continue;
+
int64_t FixedOff;
if (StackGrowsDown) {
// The maximum distance from the stack pointer is at lower address of
@@ -763,6 +837,10 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
// callee saved registers.
if (StackGrowsDown) {
for (unsigned i = MinCSFrameIndex; i <= MaxCSFrameIndex; ++i) {
+ if (MFI.getStackID(i) !=
+ TargetStackID::Default) // Only allocate objects on the default stack.
+ continue;
+
// If the stack grows down, we need to add the size to find the lowest
// address of the object.
Offset += MFI.getObjectSize(i);
@@ -777,6 +855,10 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
} else if (MaxCSFrameIndex >= MinCSFrameIndex) {
// Be careful about underflow in comparisons agains MinCSFrameIndex.
for (unsigned i = MaxCSFrameIndex; i != MinCSFrameIndex - 1; --i) {
+ if (MFI.getStackID(i) !=
+ TargetStackID::Default) // Only allocate objects on the default stack.
+ continue;
+
if (MFI.isDeadObjectIndex(i))
continue;
@@ -845,18 +927,26 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
// Make sure that the stack protector comes before the local variables on the
// stack.
SmallSet<int, 16> ProtectedObjs;
- if (MFI.getStackProtectorIndex() >= 0) {
+ if (MFI.hasStackProtectorIndex()) {
+ int StackProtectorFI = MFI.getStackProtectorIndex();
StackObjSet LargeArrayObjs;
StackObjSet SmallArrayObjs;
StackObjSet AddrOfObjs;
- AdjustStackOffset(MFI, MFI.getStackProtectorIndex(), StackGrowsDown,
- Offset, MaxAlign, Skew);
+ // If we need a stack protector, we need to make sure that
+ // LocalStackSlotPass didn't already allocate a slot for it.
+ // If we are told to use the LocalStackAllocationBlock, the stack protector
+ // is expected to be already pre-allocated.
+ if (!MFI.getUseLocalStackAllocationBlock())
+ AdjustStackOffset(MFI, StackProtectorFI, StackGrowsDown, Offset, MaxAlign,
+ Skew);
+ else if (!MFI.isObjectPreAllocated(MFI.getStackProtectorIndex()))
+ llvm_unreachable(
+ "Stack protector not pre-allocated by LocalStackSlotPass.");
// Assign large stack objects first.
for (unsigned i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) {
- if (MFI.isObjectPreAllocated(i) &&
- MFI.getUseLocalStackAllocationBlock())
+ if (MFI.isObjectPreAllocated(i) && MFI.getUseLocalStackAllocationBlock())
continue;
if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex)
continue;
@@ -864,8 +954,10 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
continue;
if (MFI.isDeadObjectIndex(i))
continue;
- if (MFI.getStackProtectorIndex() == (int)i ||
- EHRegNodeFrameIndex == (int)i)
+ if (StackProtectorFI == (int)i || EHRegNodeFrameIndex == (int)i)
+ continue;
+ if (MFI.getStackID(i) !=
+ TargetStackID::Default) // Only allocate objects on the default stack.
continue;
switch (MFI.getObjectSSPLayout(i)) {
@@ -884,6 +976,15 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
llvm_unreachable("Unexpected SSPLayoutKind.");
}
+ // We expect **all** the protected stack objects to be pre-allocated by
+ // LocalStackSlotPass. If it turns out that PEI still has to allocate some
+ // of them, we may end up messing up the expected order of the objects.
+ if (MFI.getUseLocalStackAllocationBlock() &&
+ !(LargeArrayObjs.empty() && SmallArrayObjs.empty() &&
+ AddrOfObjs.empty()))
+ llvm_unreachable("Found protected stack objects not pre-allocated by "
+ "LocalStackSlotPass.");
+
AssignProtectedObjSet(LargeArrayObjs, ProtectedObjs, MFI, StackGrowsDown,
Offset, MaxAlign, Skew);
AssignProtectedObjSet(SmallArrayObjs, ProtectedObjs, MFI, StackGrowsDown,
@@ -905,11 +1006,13 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
continue;
if (MFI.isDeadObjectIndex(i))
continue;
- if (MFI.getStackProtectorIndex() == (int)i ||
- EHRegNodeFrameIndex == (int)i)
+ if (MFI.getStackProtectorIndex() == (int)i || EHRegNodeFrameIndex == (int)i)
continue;
if (ProtectedObjs.count(i))
continue;
+ if (MFI.getStackID(i) !=
+ TargetStackID::Default) // Only allocate objects on the default stack.
+ continue;
// Add the objects that we need to allocate to our working set.
ObjectsToAllocate.push_back(i);
@@ -1026,8 +1129,16 @@ void PEI::insertPrologEpilogCode(MachineFunction &MF) {
/// replaceFrameIndices - Replace all MO_FrameIndex operands with physical
/// register references and actual offsets.
void PEI::replaceFrameIndices(MachineFunction &MF) {
- const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
- if (!TFI.needsFrameIndexResolution(MF)) return;
+ const auto &ST = MF.getSubtarget();
+ const TargetFrameLowering &TFI = *ST.getFrameLowering();
+ if (!TFI.needsFrameIndexResolution(MF))
+ return;
+
+ const TargetRegisterInfo *TRI = ST.getRegisterInfo();
+
+ // Allow the target to determine this after knowing the frame size.
+ FrameIndexEliminationScavenging = (RS && !FrameIndexVirtualScavenging) ||
+ TRI->requiresFrameIndexReplacementScavenging(MF);
// Store SPAdj at exit of a basic block.
SmallVector<int, 8> SPState;
@@ -1095,12 +1206,37 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF,
assert(i == 0 && "Frame indices can only appear as the first "
"operand of a DBG_VALUE machine instruction");
unsigned Reg;
+ unsigned FrameIdx = MI.getOperand(0).getIndex();
+ unsigned Size = MF.getFrameInfo().getObjectSize(FrameIdx);
+
int64_t Offset =
- TFI->getFrameIndexReference(MF, MI.getOperand(0).getIndex(), Reg);
+ TFI->getFrameIndexReference(MF, FrameIdx, Reg);
MI.getOperand(0).ChangeToRegister(Reg, false /*isDef*/);
MI.getOperand(0).setIsDebug();
- auto *DIExpr = DIExpression::prepend(MI.getDebugExpression(),
- DIExpression::NoDeref, Offset);
+
+ const DIExpression *DIExpr = MI.getDebugExpression();
+
+ // If we have a direct DBG_VALUE, and its location expression isn't
+ // currently complex, then adding an offset will morph it into a
+ // complex location that is interpreted as being a memory address.
+ // This changes a pointer-valued variable to dereference that pointer,
+ // which is incorrect. Fix by adding DW_OP_stack_value.
+ unsigned PrependFlags = DIExpression::ApplyOffset;
+ if (!MI.isIndirectDebugValue() && !DIExpr->isComplex())
+ PrependFlags |= DIExpression::StackValue;
+
+ // If we have DBG_VALUE that is indirect and has a Implicit location
+ // expression need to insert a deref before prepending a Memory
+ // location expression. Also after doing this we change the DBG_VALUE
+ // to be direct.
+ if (MI.isIndirectDebugValue() && DIExpr->isImplicit()) {
+ SmallVector<uint64_t, 2> Ops = {dwarf::DW_OP_deref_size, Size};
+ bool WithStackValue = true;
+ DIExpr = DIExpression::prependOpcodes(DIExpr, Ops, WithStackValue);
+ // Make the DBG_VALUE direct.
+ MI.getOperand(1).ChangeToRegister(0, false);
+ }
+ DIExpr = DIExpression::prepend(DIExpr, PrependFlags, Offset);
MI.getOperand(3).setMetadata(DIExpr);
continue;
}
diff --git a/lib/CodeGen/PseudoSourceValue.cpp b/lib/CodeGen/PseudoSourceValue.cpp
index 6ca8d86e3f8e..da3ef4b771f3 100644
--- a/lib/CodeGen/PseudoSourceValue.cpp
+++ b/lib/CodeGen/PseudoSourceValue.cpp
@@ -1,9 +1,8 @@
//===-- llvm/CodeGen/PseudoSourceValue.cpp ----------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/ReachingDefAnalysis.cpp b/lib/CodeGen/ReachingDefAnalysis.cpp
index a9f0a9387297..f05c97ad621e 100644
--- a/lib/CodeGen/ReachingDefAnalysis.cpp
+++ b/lib/CodeGen/ReachingDefAnalysis.cpp
@@ -1,9 +1,8 @@
//===---- ReachingDefAnalysis.cpp - Reaching Def Analysis ---*- C++ -*-----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/RegAllocBase.cpp b/lib/CodeGen/RegAllocBase.cpp
index bc28a054c680..1cbe75c27d13 100644
--- a/lib/CodeGen/RegAllocBase.cpp
+++ b/lib/CodeGen/RegAllocBase.cpp
@@ -1,9 +1,8 @@
//===- RegAllocBase.cpp - Register Allocator Base Class -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -20,6 +19,7 @@
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LiveRegMatrix.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
@@ -119,16 +119,19 @@ void RegAllocBase::allocatePhysRegs() {
for (MachineRegisterInfo::reg_instr_iterator
I = MRI->reg_instr_begin(VirtReg->reg), E = MRI->reg_instr_end();
I != E; ) {
- MachineInstr *TmpMI = &*(I++);
- if (TmpMI->isInlineAsm()) {
- MI = TmpMI;
+ MI = &*(I++);
+ if (MI->isInlineAsm())
break;
- }
}
- if (MI)
+ if (MI && MI->isInlineAsm()) {
MI->emitError("inline assembly requires more registers than available");
- else
+ } else if (MI) {
+ LLVMContext &Context =
+ MI->getParent()->getParent()->getMMI().getModule()->getContext();
+ Context.emitError("ran out of registers during register allocation");
+ } else {
report_fatal_error("ran out of registers during register allocation");
+ }
// Keep going after reporting the error.
VRM->assignVirt2Phys(VirtReg->reg,
RegClassInfo.getOrder(MRI->getRegClass(VirtReg->reg)).front());
diff --git a/lib/CodeGen/RegAllocBase.h b/lib/CodeGen/RegAllocBase.h
index 686ffc36e049..6a7cc5ba4308 100644
--- a/lib/CodeGen/RegAllocBase.h
+++ b/lib/CodeGen/RegAllocBase.h
@@ -1,9 +1,8 @@
//===- RegAllocBase.h - basic regalloc interface and driver -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp
index daeff3fc3963..46f6946f7003 100644
--- a/lib/CodeGen/RegAllocBasic.cpp
+++ b/lib/CodeGen/RegAllocBasic.cpp
@@ -1,9 +1,8 @@
//===-- RegAllocBasic.cpp - Basic Register Allocator ----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp
index eb3a4e481f5d..2ffa5e389f89 100644
--- a/lib/CodeGen/RegAllocFast.cpp
+++ b/lib/CodeGen/RegAllocFast.cpp
@@ -1,9 +1,8 @@
//===- RegAllocFast.cpp - A fast register allocator for debug code --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -102,6 +101,10 @@ namespace {
DenseMap<unsigned, SmallVector<MachineInstr *, 2>> LiveDbgValueMap;
+ /// Has a bit set for every virtual register for which it was determined
+ /// that it is alive across blocks.
+ BitVector MayLiveAcrossBlocks;
+
/// State of a physical register.
enum RegState {
/// A disabled register is not available for allocation, but an alias may
@@ -152,6 +155,7 @@ namespace {
enum : unsigned {
spillClean = 50,
spillDirty = 100,
+ spillPrefBonus = 20,
spillImpossible = ~0u
};
@@ -204,19 +208,26 @@ namespace {
}
void allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint);
+ void allocVirtRegUndef(MachineOperand &MO);
MCPhysReg defineVirtReg(MachineInstr &MI, unsigned OpNum, unsigned VirtReg,
unsigned Hint);
LiveReg &reloadVirtReg(MachineInstr &MI, unsigned OpNum, unsigned VirtReg,
unsigned Hint);
- void spillAll(MachineBasicBlock::iterator MI);
+ void spillAll(MachineBasicBlock::iterator MI, bool OnlyLiveOut);
bool setPhysReg(MachineInstr &MI, MachineOperand &MO, MCPhysReg PhysReg);
+ unsigned traceCopies(unsigned VirtReg) const;
+ unsigned traceCopyChain(unsigned Reg) const;
+
int getStackSpaceFor(unsigned VirtReg);
void spill(MachineBasicBlock::iterator Before, unsigned VirtReg,
MCPhysReg AssignedReg, bool Kill);
void reload(MachineBasicBlock::iterator Before, unsigned VirtReg,
MCPhysReg PhysReg);
+ bool mayLiveOut(unsigned VirtReg);
+ bool mayLiveIn(unsigned VirtReg);
+
void dumpState();
};
@@ -251,6 +262,53 @@ int RegAllocFast::getStackSpaceFor(unsigned VirtReg) {
return FrameIdx;
}
+/// Returns false if \p VirtReg is known to not live out of the current block.
+bool RegAllocFast::mayLiveOut(unsigned VirtReg) {
+ if (MayLiveAcrossBlocks.test(TargetRegisterInfo::virtReg2Index(VirtReg))) {
+ // Cannot be live-out if there are no successors.
+ return !MBB->succ_empty();
+ }
+
+ // If this block loops back to itself, it would be necessary to check whether
+ // the use comes after the def.
+ if (MBB->isSuccessor(MBB)) {
+ MayLiveAcrossBlocks.set(TargetRegisterInfo::virtReg2Index(VirtReg));
+ return true;
+ }
+
+ // See if the first \p Limit uses of the register are all in the current
+ // block.
+ static const unsigned Limit = 8;
+ unsigned C = 0;
+ for (const MachineInstr &UseInst : MRI->reg_nodbg_instructions(VirtReg)) {
+ if (UseInst.getParent() != MBB || ++C >= Limit) {
+ MayLiveAcrossBlocks.set(TargetRegisterInfo::virtReg2Index(VirtReg));
+ // Cannot be live-out if there are no successors.
+ return !MBB->succ_empty();
+ }
+ }
+
+ return false;
+}
+
+/// Returns false if \p VirtReg is known to not be live into the current block.
+bool RegAllocFast::mayLiveIn(unsigned VirtReg) {
+ if (MayLiveAcrossBlocks.test(TargetRegisterInfo::virtReg2Index(VirtReg)))
+ return !MBB->pred_empty();
+
+ // See if the first \p Limit def of the register are all in the current block.
+ static const unsigned Limit = 8;
+ unsigned C = 0;
+ for (const MachineInstr &DefInst : MRI->def_instructions(VirtReg)) {
+ if (DefInst.getParent() != MBB || ++C >= Limit) {
+ MayLiveAcrossBlocks.set(TargetRegisterInfo::virtReg2Index(VirtReg));
+ return !MBB->pred_empty();
+ }
+ }
+
+ return false;
+}
+
/// Insert spill instruction for \p AssignedReg before \p Before. Update
/// DBG_VALUEs with \p VirtReg operands with the stack slot.
void RegAllocFast::spill(MachineBasicBlock::iterator Before, unsigned VirtReg,
@@ -374,7 +432,7 @@ void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI, LiveReg &LR) {
}
/// Spill all dirty virtregs without killing them.
-void RegAllocFast::spillAll(MachineBasicBlock::iterator MI) {
+void RegAllocFast::spillAll(MachineBasicBlock::iterator MI, bool OnlyLiveOut) {
if (LiveVirtRegs.empty())
return;
// The LiveRegMap is keyed by an unsigned (the virtreg number), so the order
@@ -382,6 +440,8 @@ void RegAllocFast::spillAll(MachineBasicBlock::iterator MI) {
for (LiveReg &LR : LiveVirtRegs) {
if (!LR.PhysReg)
continue;
+ if (OnlyLiveOut && !mayLiveOut(LR.VirtReg))
+ continue;
spillVirtReg(MI, LR);
}
LiveVirtRegs.clear();
@@ -558,8 +618,48 @@ void RegAllocFast::assignVirtToPhysReg(LiveReg &LR, MCPhysReg PhysReg) {
setPhysRegState(PhysReg, VirtReg);
}
+static bool isCoalescable(const MachineInstr &MI) {
+ return MI.isFullCopy();
+}
+
+unsigned RegAllocFast::traceCopyChain(unsigned Reg) const {
+ static const unsigned ChainLengthLimit = 3;
+ unsigned C = 0;
+ do {
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ return Reg;
+ assert(TargetRegisterInfo::isVirtualRegister(Reg));
+
+ MachineInstr *VRegDef = MRI->getUniqueVRegDef(Reg);
+ if (!VRegDef || !isCoalescable(*VRegDef))
+ return 0;
+ Reg = VRegDef->getOperand(1).getReg();
+ } while (++C <= ChainLengthLimit);
+ return 0;
+}
+
+/// Check if any of \p VirtReg's definitions is a copy. If it is follow the
+/// chain of copies to check whether we reach a physical register we can
+/// coalesce with.
+unsigned RegAllocFast::traceCopies(unsigned VirtReg) const {
+ static const unsigned DefLimit = 3;
+ unsigned C = 0;
+ for (const MachineInstr &MI : MRI->def_instructions(VirtReg)) {
+ if (isCoalescable(MI)) {
+ unsigned Reg = MI.getOperand(1).getReg();
+ Reg = traceCopyChain(Reg);
+ if (Reg != 0)
+ return Reg;
+ }
+
+ if (++C >= DefLimit)
+ break;
+ }
+ return 0;
+}
+
/// Allocates a physical register for VirtReg.
-void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint) {
+void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint0) {
const unsigned VirtReg = LR.VirtReg;
assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
@@ -567,32 +667,54 @@ void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint) {
const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
LLVM_DEBUG(dbgs() << "Search register for " << printReg(VirtReg)
- << " in class " << TRI->getRegClassName(&RC) << '\n');
+ << " in class " << TRI->getRegClassName(&RC)
+ << " with hint " << printReg(Hint0, TRI) << '\n');
// Take hint when possible.
- if (TargetRegisterInfo::isPhysicalRegister(Hint) &&
- MRI->isAllocatable(Hint) && RC.contains(Hint)) {
+ if (TargetRegisterInfo::isPhysicalRegister(Hint0) &&
+ MRI->isAllocatable(Hint0) && RC.contains(Hint0)) {
// Ignore the hint if we would have to spill a dirty register.
- unsigned Cost = calcSpillCost(Hint);
+ unsigned Cost = calcSpillCost(Hint0);
if (Cost < spillDirty) {
+ LLVM_DEBUG(dbgs() << "\tPreferred Register 1: " << printReg(Hint0, TRI)
+ << '\n');
if (Cost)
- definePhysReg(MI, Hint, regFree);
- assignVirtToPhysReg(LR, Hint);
+ definePhysReg(MI, Hint0, regFree);
+ assignVirtToPhysReg(LR, Hint0);
return;
+ } else {
+ LLVM_DEBUG(dbgs() << "\tPreferred Register 1: " << printReg(Hint0, TRI)
+ << "occupied\n");
}
+ } else {
+ Hint0 = 0;
}
- // First try to find a completely free register.
- ArrayRef<MCPhysReg> AllocationOrder = RegClassInfo.getOrder(&RC);
- for (MCPhysReg PhysReg : AllocationOrder) {
- if (PhysRegState[PhysReg] == regFree && !isRegUsedInInstr(PhysReg)) {
- assignVirtToPhysReg(LR, PhysReg);
+ // Try other hint.
+ unsigned Hint1 = traceCopies(VirtReg);
+ if (TargetRegisterInfo::isPhysicalRegister(Hint1) &&
+ MRI->isAllocatable(Hint1) && RC.contains(Hint1) &&
+ !isRegUsedInInstr(Hint1)) {
+ // Ignore the hint if we would have to spill a dirty register.
+ unsigned Cost = calcSpillCost(Hint1);
+ if (Cost < spillDirty) {
+ LLVM_DEBUG(dbgs() << "\tPreferred Register 0: " << printReg(Hint1, TRI)
+ << '\n');
+ if (Cost)
+ definePhysReg(MI, Hint1, regFree);
+ assignVirtToPhysReg(LR, Hint1);
return;
+ } else {
+ LLVM_DEBUG(dbgs() << "\tPreferred Register 0: " << printReg(Hint1, TRI)
+ << "occupied\n");
}
+ } else {
+ Hint1 = 0;
}
MCPhysReg BestReg = 0;
unsigned BestCost = spillImpossible;
+ ArrayRef<MCPhysReg> AllocationOrder = RegClassInfo.getOrder(&RC);
for (MCPhysReg PhysReg : AllocationOrder) {
LLVM_DEBUG(dbgs() << "\tRegister: " << printReg(PhysReg, TRI) << ' ');
unsigned Cost = calcSpillCost(PhysReg);
@@ -602,6 +724,10 @@ void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint) {
assignVirtToPhysReg(LR, PhysReg);
return;
}
+
+ if (PhysReg == Hint1 || PhysReg == Hint0)
+ Cost -= spillPrefBonus;
+
if (Cost < BestCost) {
BestReg = PhysReg;
BestCost = Cost;
@@ -624,6 +750,31 @@ void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint) {
assignVirtToPhysReg(LR, BestReg);
}
+void RegAllocFast::allocVirtRegUndef(MachineOperand &MO) {
+ assert(MO.isUndef() && "expected undef use");
+ unsigned VirtReg = MO.getReg();
+ assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Expected virtreg");
+
+ LiveRegMap::const_iterator LRI = findLiveVirtReg(VirtReg);
+ MCPhysReg PhysReg;
+ if (LRI != LiveVirtRegs.end() && LRI->PhysReg) {
+ PhysReg = LRI->PhysReg;
+ } else {
+ const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
+ ArrayRef<MCPhysReg> AllocationOrder = RegClassInfo.getOrder(&RC);
+ assert(!AllocationOrder.empty() && "Allocation order must not be empty");
+ PhysReg = AllocationOrder[0];
+ }
+
+ unsigned SubRegIdx = MO.getSubReg();
+ if (SubRegIdx != 0) {
+ PhysReg = TRI->getSubReg(PhysReg, SubRegIdx);
+ MO.setSubReg(0);
+ }
+ MO.setReg(PhysReg);
+ MO.setIsRenamable(true);
+}
+
/// Allocates a register for VirtReg and mark it as dirty.
MCPhysReg RegAllocFast::defineVirtReg(MachineInstr &MI, unsigned OpNum,
unsigned VirtReg, unsigned Hint) {
@@ -941,12 +1092,23 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
// Second scan.
// Allocate virtreg uses.
+ bool HasUndefUse = false;
for (unsigned I = 0; I != VirtOpEnd; ++I) {
MachineOperand &MO = MI.getOperand(I);
if (!MO.isReg()) continue;
unsigned Reg = MO.getReg();
if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
if (MO.isUse()) {
+ if (MO.isUndef()) {
+ HasUndefUse = true;
+ // There is no need to allocate a register for an undef use.
+ continue;
+ }
+
+ // Populate MayLiveAcrossBlocks in case the use block is allocated before
+ // the def block (removing the vreg uses).
+ mayLiveIn(Reg);
+
LiveReg &LR = reloadVirtReg(MI, I, Reg, CopyDstReg);
MCPhysReg PhysReg = LR.PhysReg;
CopySrcReg = (CopySrcReg == Reg || CopySrcReg == PhysReg) ? PhysReg : 0;
@@ -955,6 +1117,22 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
}
}
+ // Allocate undef operands. This is a separate step because in a situation
+ // like ` = OP undef %X, %X` both operands need the same register assign
+ // so we should perform the normal assignment first.
+ if (HasUndefUse) {
+ for (MachineOperand &MO : MI.uses()) {
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+
+ assert(MO.isUndef() && "Should only have undef virtreg uses left");
+ allocVirtRegUndef(MO);
+ }
+ }
+
// Track registers defined by instruction - early clobbers and tied uses at
// this point.
UsedInInstr.clear();
@@ -979,10 +1157,24 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
// definitions may be used later on and we do not want to reuse
// those for virtual registers in between.
LLVM_DEBUG(dbgs() << " Spilling remaining registers before call.\n");
- spillAll(MI);
+ spillAll(MI, /*OnlyLiveOut*/ false);
}
// Third scan.
+ // Mark all physreg defs as used before allocating virtreg defs.
+ for (unsigned I = 0; I != DefOpEnd; ++I) {
+ const MachineOperand &MO = MI.getOperand(I);
+ if (!MO.isReg() || !MO.isDef() || !MO.getReg() || MO.isEarlyClobber())
+ continue;
+ unsigned Reg = MO.getReg();
+
+ if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg) ||
+ !MRI->isAllocatable(Reg))
+ continue;
+ definePhysReg(MI, Reg, MO.isDead() ? regFree : regReserved);
+ }
+
+ // Fourth scan.
// Allocate defs and collect dead defs.
for (unsigned I = 0; I != DefOpEnd; ++I) {
const MachineOperand &MO = MI.getOperand(I);
@@ -990,11 +1182,9 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
continue;
unsigned Reg = MO.getReg();
- if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
- if (!MRI->isAllocatable(Reg)) continue;
- definePhysReg(MI, Reg, MO.isDead() ? regFree : regReserved);
+ // We have already dealt with phys regs in the previous scan.
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
continue;
- }
MCPhysReg PhysReg = defineVirtReg(MI, I, Reg, CopySrcReg);
if (setPhysReg(MI, MI.getOperand(I), PhysReg)) {
VirtDead.push_back(Reg);
@@ -1089,7 +1279,7 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) {
// Spill all physical registers holding virtual registers now.
LLVM_DEBUG(dbgs() << "Spilling live registers at end of block.\n");
- spillAll(MBB.getFirstTerminator());
+ spillAll(MBB.getFirstTerminator(), /*OnlyLiveOut*/ true);
// Erase all the coalesced copies. We are delaying it until now because
// LiveVirtRegs might refer to the instrs.
@@ -1118,6 +1308,8 @@ bool RegAllocFast::runOnMachineFunction(MachineFunction &MF) {
unsigned NumVirtRegs = MRI->getNumVirtRegs();
StackSlotForVirtReg.resize(NumVirtRegs);
LiveVirtRegs.setUniverse(NumVirtRegs);
+ MayLiveAcrossBlocks.clear();
+ MayLiveAcrossBlocks.resize(NumVirtRegs);
// Loop over all of the basic blocks, eliminating virtual register references
for (MachineBasicBlock &MBB : MF)
diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp
index 81b21b442437..771fc46415db 100644
--- a/lib/CodeGen/RegAllocGreedy.cpp
+++ b/lib/CodeGen/RegAllocGreedy.cpp
@@ -1,9 +1,8 @@
//===- RegAllocGreedy.cpp - greedy register allocator ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -138,7 +137,7 @@ CSRFirstTimeCost("regalloc-csr-first-time-cost",
cl::init(0), cl::Hidden);
static cl::opt<bool> ConsiderLocalIntervalCost(
- "condsider-local-interval-cost", cl::Hidden,
+ "consider-local-interval-cost", cl::Hidden,
cl::desc("Consider the cost of local intervals created by a split "
"candidate when choosing the best split candidate."),
cl::init(false));
@@ -465,7 +464,8 @@ private:
void calcGapWeights(unsigned, SmallVectorImpl<float>&);
unsigned canReassign(LiveInterval &VirtReg, unsigned PrevReg);
bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool);
- bool canEvictInterference(LiveInterval&, unsigned, bool, EvictionCost&);
+ bool canEvictInterference(LiveInterval&, unsigned, bool, EvictionCost&,
+ const SmallVirtRegSet&);
bool canEvictInterferenceInRange(LiveInterval &VirtReg, unsigned PhysReg,
SlotIndex Start, SlotIndex End,
EvictionCost &MaxCost);
@@ -479,9 +479,11 @@ private:
const SmallVirtRegSet &FixedRegisters);
unsigned tryAssign(LiveInterval&, AllocationOrder&,
- SmallVectorImpl<unsigned>&);
+ SmallVectorImpl<unsigned>&,
+ const SmallVirtRegSet&);
unsigned tryEvict(LiveInterval&, AllocationOrder&,
- SmallVectorImpl<unsigned>&, unsigned = ~0u);
+ SmallVectorImpl<unsigned>&, unsigned,
+ const SmallVirtRegSet&);
unsigned tryRegionSplit(LiveInterval&, AllocationOrder&,
SmallVectorImpl<unsigned>&);
unsigned isSplitBenefitWorthCost(LiveInterval &VirtReg);
@@ -508,7 +510,8 @@ private:
unsigned tryLocalSplit(LiveInterval&, AllocationOrder&,
SmallVectorImpl<unsigned>&);
unsigned trySplit(LiveInterval&, AllocationOrder&,
- SmallVectorImpl<unsigned>&);
+ SmallVectorImpl<unsigned>&,
+ const SmallVirtRegSet&);
unsigned tryLastChanceRecoloring(LiveInterval &, AllocationOrder &,
SmallVectorImpl<unsigned> &,
SmallVirtRegSet &, unsigned);
@@ -758,7 +761,8 @@ LiveInterval *RAGreedy::dequeue(PQueue &CurQueue) {
/// tryAssign - Try to assign VirtReg to an available register.
unsigned RAGreedy::tryAssign(LiveInterval &VirtReg,
AllocationOrder &Order,
- SmallVectorImpl<unsigned> &NewVRegs) {
+ SmallVectorImpl<unsigned> &NewVRegs,
+ const SmallVirtRegSet &FixedRegisters) {
Order.rewind();
unsigned PhysReg;
while ((PhysReg = Order.next()))
@@ -776,7 +780,7 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg,
LLVM_DEBUG(dbgs() << "missed hint " << printReg(Hint, TRI) << '\n');
EvictionCost MaxCost;
MaxCost.setBrokenHints(1);
- if (canEvictInterference(VirtReg, Hint, true, MaxCost)) {
+ if (canEvictInterference(VirtReg, Hint, true, MaxCost, FixedRegisters)) {
evictInterference(VirtReg, Hint, NewVRegs);
return Hint;
}
@@ -794,7 +798,7 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg,
LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << " is available at cost "
<< Cost << '\n');
- unsigned CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost);
+ unsigned CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost, FixedRegisters);
return CheapReg ? CheapReg : PhysReg;
}
@@ -866,7 +870,8 @@ bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint,
/// when returning true.
/// @returns True when interference can be evicted cheaper than MaxCost.
bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg,
- bool IsHint, EvictionCost &MaxCost) {
+ bool IsHint, EvictionCost &MaxCost,
+ const SmallVirtRegSet &FixedRegisters) {
// It is only possible to evict virtual register interference.
if (Matrix->checkInterference(VirtReg, PhysReg) > LiveRegMatrix::IK_VirtReg)
return false;
@@ -896,6 +901,13 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg,
LiveInterval *Intf = Q.interferingVRegs()[i - 1];
assert(TargetRegisterInfo::isVirtualRegister(Intf->reg) &&
"Only expecting virtual register interference from query");
+
+ // Do not allow eviction of a virtual register if we are in the middle
+ // of last-chance recoloring and this virtual register is one that we
+ // have scavenged a physical register for.
+ if (FixedRegisters.count(Intf->reg))
+ return false;
+
// Never evict spill products. They cannot split or spill.
if (getStage(*Intf) == RS_Done)
return false;
@@ -1094,7 +1106,8 @@ bool RAGreedy::isUnusedCalleeSavedReg(unsigned PhysReg) const {
unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,
AllocationOrder &Order,
SmallVectorImpl<unsigned> &NewVRegs,
- unsigned CostPerUseLimit) {
+ unsigned CostPerUseLimit,
+ const SmallVirtRegSet &FixedRegisters) {
NamedRegionTimer T("evict", "Evict", TimerGroupName, TimerGroupDescription,
TimePassesIsEnabled);
@@ -1142,7 +1155,8 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,
continue;
}
- if (!canEvictInterference(VirtReg, PhysReg, false, BestCost))
+ if (!canEvictInterference(VirtReg, PhysReg, false, BestCost,
+ FixedRegisters))
continue;
// Best so far.
@@ -2248,8 +2262,8 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
ArrayRef<SlotIndex> RMS = LIS->getRegMaskSlotsInBlock(BI.MBB->getNumber());
LLVM_DEBUG(dbgs() << RMS.size() << " regmasks in block:");
// Constrain to VirtReg's live range.
- unsigned ri = std::lower_bound(RMS.begin(), RMS.end(),
- Uses.front().getRegSlot()) - RMS.begin();
+ unsigned ri =
+ llvm::lower_bound(RMS, Uses.front().getRegSlot()) - RMS.begin();
unsigned re = RMS.size();
for (unsigned i = 0; i != NumGaps && ri != re; ++i) {
// Look for Uses[i] <= RMS <= Uses[i+1].
@@ -2444,7 +2458,8 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
/// assignable.
/// @return Physreg when VirtReg may be assigned and/or new NewVRegs.
unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,
- SmallVectorImpl<unsigned>&NewVRegs) {
+ SmallVectorImpl<unsigned>&NewVRegs,
+ const SmallVirtRegSet &FixedRegisters) {
// Ranges must be Split2 or less.
if (getStage(VirtReg) >= RS_Spill)
return 0;
@@ -2472,7 +2487,7 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,
if (SA->didRepairRange()) {
// VirtReg has changed, so all cached queries are invalid.
Matrix->invalidateVirtRegs();
- if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs))
+ if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs, FixedRegisters))
return PhysReg;
}
@@ -2611,6 +2626,7 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
DenseMap<unsigned, unsigned> VirtRegToPhysReg;
// Mark VirtReg as fixed, i.e., it will not be recolored pass this point in
// this recoloring "session".
+ assert(!FixedRegisters.count(VirtReg.reg));
FixedRegisters.insert(VirtReg.reg);
SmallVector<unsigned, 4> CurrentNewVRegs;
@@ -2858,14 +2874,14 @@ void RAGreedy::collectHintInfo(unsigned Reg, HintsInfo &Out) {
if (!Instr.isFullCopy())
continue;
// Look for the other end of the copy.
- unsigned OtherReg = Instr.getOperand(0).getReg();
+ Register OtherReg = Instr.getOperand(0).getReg();
if (OtherReg == Reg) {
OtherReg = Instr.getOperand(1).getReg();
if (OtherReg == Reg)
continue;
}
// Get the current assignment.
- unsigned OtherPhysReg = TargetRegisterInfo::isPhysicalRegister(OtherReg)
+ Register OtherPhysReg = TargetRegisterInfo::isPhysicalRegister(OtherReg)
? OtherReg
: VRM->getPhys(OtherReg);
// Push the collected information.
@@ -3022,7 +3038,7 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
unsigned CostPerUseLimit = ~0u;
// First try assigning a free register.
AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo, Matrix);
- if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) {
+ if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs, FixedRegisters)) {
// If VirtReg got an assignment, the eviction info is no longre relevant.
LastEvicted.clearEvicteeInfo(VirtReg.reg);
// When NewVRegs is not empty, we may have made decisions such as evicting
@@ -3049,7 +3065,8 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
// get a second chance until they have been split.
if (Stage != RS_Split)
if (unsigned PhysReg =
- tryEvict(VirtReg, Order, NewVRegs, CostPerUseLimit)) {
+ tryEvict(VirtReg, Order, NewVRegs, CostPerUseLimit,
+ FixedRegisters)) {
unsigned Hint = MRI->getSimpleHint(VirtReg.reg);
// If VirtReg has a hint and that hint is broken record this
// virtual register as a recoloring candidate for broken hint.
@@ -3079,7 +3096,7 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
if (Stage < RS_Spill) {
// Try splitting VirtReg or interferences.
unsigned NewVRegSizeBefore = NewVRegs.size();
- unsigned PhysReg = trySplit(VirtReg, Order, NewVRegs);
+ unsigned PhysReg = trySplit(VirtReg, Order, NewVRegs, FixedRegisters);
if (PhysReg || (NewVRegs.size() - NewVRegSizeBefore)) {
// If VirtReg got split, the eviction info is no longre relevant.
LastEvicted.clearEvicteeInfo(VirtReg.reg);
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index c19001c8403d..7a5a6c148ed4 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -1,9 +1,8 @@
//===- RegAllocPBQP.cpp ---- PBQP Register Allocator ----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/RegUsageInfoCollector.cpp b/lib/CodeGen/RegUsageInfoCollector.cpp
index 66c7c5cd7dbf..b37dfada7101 100644
--- a/lib/CodeGen/RegUsageInfoCollector.cpp
+++ b/lib/CodeGen/RegUsageInfoCollector.cpp
@@ -1,9 +1,8 @@
//===-- RegUsageInfoCollector.cpp - Register Usage Information Collector --===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -78,14 +77,48 @@ FunctionPass *llvm::createRegUsageInfoCollector() {
return new RegUsageInfoCollector();
}
+// TODO: Move to hook somwehere?
+
+// Return true if it is useful to track the used registers for IPRA / no CSR
+// optimizations. This is not useful for entry points, and computing the
+// register usage information is expensive.
+static bool isCallableFunction(const MachineFunction &MF) {
+ switch (MF.getFunction().getCallingConv()) {
+ case CallingConv::AMDGPU_VS:
+ case CallingConv::AMDGPU_GS:
+ case CallingConv::AMDGPU_PS:
+ case CallingConv::AMDGPU_CS:
+ case CallingConv::AMDGPU_HS:
+ case CallingConv::AMDGPU_ES:
+ case CallingConv::AMDGPU_LS:
+ case CallingConv::AMDGPU_KERNEL:
+ return false;
+ default:
+ return true;
+ }
+}
+
bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) {
MachineRegisterInfo *MRI = &MF.getRegInfo();
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
const LLVMTargetMachine &TM = MF.getTarget();
LLVM_DEBUG(dbgs() << " -------------------- " << getPassName()
- << " -------------------- \n");
- LLVM_DEBUG(dbgs() << "Function Name : " << MF.getName() << "\n");
+ << " -------------------- \nFunction Name : "
+ << MF.getName() << '\n');
+
+ // Analyzing the register usage may be expensive on some targets.
+ if (!isCallableFunction(MF)) {
+ LLVM_DEBUG(dbgs() << "Not analyzing non-callable function\n");
+ return false;
+ }
+
+ // If there are no callers, there's no point in computing more precise
+ // register usage here.
+ if (MF.getFunction().use_empty()) {
+ LLVM_DEBUG(dbgs() << "Not analyzing function with no callers\n");
+ return false;
+ }
std::vector<uint32_t> RegMask;
@@ -111,6 +144,7 @@ bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) {
};
// Scan all the physical registers. When a register is defined in the current
// function set it and all the aliasing registers as defined in the regmask.
+ // FIXME: Rewrite to use regunits.
for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg) {
// Don't count registers that are saved and restored.
if (SavedRegs.test(PReg))
@@ -136,11 +170,14 @@ bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) {
<< " function optimized for not having CSR.\n");
}
- for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg)
- if (MachineOperand::clobbersPhysReg(&(RegMask[0]), PReg))
- LLVM_DEBUG(dbgs() << printReg(PReg, TRI) << " ");
+ LLVM_DEBUG(
+ for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg) {
+ if (MachineOperand::clobbersPhysReg(&(RegMask[0]), PReg))
+ dbgs() << printReg(PReg, TRI) << " ";
+ }
- LLVM_DEBUG(dbgs() << " \n----------------------------------------\n");
+ dbgs() << " \n----------------------------------------\n";
+ );
PRUI.storeUpdateRegUsageInfo(F, RegMask);
@@ -155,38 +192,17 @@ computeCalleeSavedRegs(BitVector &SavedRegs, MachineFunction &MF) {
// Target will return the set of registers that it saves/restores as needed.
SavedRegs.clear();
TFI.determineCalleeSaves(MF, SavedRegs);
+ if (SavedRegs.none())
+ return;
// Insert subregs.
const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(&MF);
for (unsigned i = 0; CSRegs[i]; ++i) {
- unsigned Reg = CSRegs[i];
- if (SavedRegs.test(Reg))
- for (MCSubRegIterator SR(Reg, &TRI, false); SR.isValid(); ++SR)
+ MCPhysReg Reg = CSRegs[i];
+ if (SavedRegs.test(Reg)) {
+ // Save subregisters
+ for (MCSubRegIterator SR(Reg, &TRI); SR.isValid(); ++SR)
SavedRegs.set(*SR);
- }
-
- // Insert any register fully saved via subregisters.
- for (const TargetRegisterClass *RC : TRI.regclasses()) {
- if (!RC->CoveredBySubRegs)
- continue;
-
- for (unsigned PReg = 1, PRegE = TRI.getNumRegs(); PReg < PRegE; ++PReg) {
- if (SavedRegs.test(PReg))
- continue;
-
- // Check if PReg is fully covered by its subregs.
- if (!RC->contains(PReg))
- continue;
-
- // Add PReg to SavedRegs if all subregs are saved.
- bool AllSubRegsSaved = true;
- for (MCSubRegIterator SR(PReg, &TRI, false); SR.isValid(); ++SR)
- if (!SavedRegs.test(*SR)) {
- AllSubRegsSaved = false;
- break;
- }
- if (AllSubRegsSaved)
- SavedRegs.set(PReg);
}
}
}
diff --git a/lib/CodeGen/RegUsageInfoPropagate.cpp b/lib/CodeGen/RegUsageInfoPropagate.cpp
index 256de295821d..fc4be82d215e 100644
--- a/lib/CodeGen/RegUsageInfoPropagate.cpp
+++ b/lib/CodeGen/RegUsageInfoPropagate.cpp
@@ -1,9 +1,8 @@
//=--- RegUsageInfoPropagate.cpp - Register Usage Informartion Propagation --=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/lib/CodeGen/RegisterClassInfo.cpp b/lib/CodeGen/RegisterClassInfo.cpp
index add8faec97d4..530e0cccf1d4 100644
--- a/lib/CodeGen/RegisterClassInfo.cpp
+++ b/lib/CodeGen/RegisterClassInfo.cpp
@@ -1,9 +1,8 @@
//===- RegisterClassInfo.cpp - Dynamic Register Class Info ----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -91,6 +90,7 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) {
void RegisterClassInfo::compute(const TargetRegisterClass *RC) const {
assert(RC && "no register class given");
RCInfo &RCI = RegClass[RC->getID()];
+ auto &STI = MF->getSubtarget();
// Raw register count, including all reserved regs.
unsigned NumRegs = RC->getNumRegs();
@@ -115,7 +115,8 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const {
unsigned Cost = TRI->getCostPerUse(PhysReg);
MinCost = std::min(MinCost, Cost);
- if (CalleeSavedAliases[PhysReg])
+ if (CalleeSavedAliases[PhysReg] &&
+ !STI.ignoreCSRForAllocationOrder(*MF, PhysReg))
// PhysReg aliases a CSR, save it for later.
CSRAlias.push_back(PhysReg);
else {
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
index 2a06d5e95fbb..2db6ab454cea 100644
--- a/lib/CodeGen/RegisterCoalescer.cpp
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -1,9 +1,8 @@
//===- RegisterCoalescer.cpp - Generic Register Coalescing Interface ------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -105,6 +104,19 @@ static cl::opt<unsigned> LateRematUpdateThreshold(
"repeated work. "),
cl::init(100));
+static cl::opt<unsigned> LargeIntervalSizeThreshold(
+ "large-interval-size-threshold", cl::Hidden,
+ cl::desc("If the valnos size of an interval is larger than the threshold, "
+ "it is regarded as a large interval. "),
+ cl::init(100));
+
+static cl::opt<unsigned> LargeIntervalFreqThreshold(
+ "large-interval-freq-threshold", cl::Hidden,
+ cl::desc("For a large interval, if it is coalesed with other live "
+ "intervals many times more than the threshold, stop its "
+ "coalescing to control the compile time. "),
+ cl::init(100));
+
namespace {
class RegisterCoalescer : public MachineFunctionPass,
@@ -153,6 +165,10 @@ namespace {
/// lateLiveIntervalUpdate is called.
DenseSet<unsigned> ToBeUpdated;
+ /// Record how many times the large live interval with many valnos
+ /// has been tried to join with other live interval.
+ DenseMap<unsigned, unsigned long> LargeLIVisitCounter;
+
/// Recursively eliminate dead defs in DeadDefs.
void eliminateDeadDefs();
@@ -195,6 +211,11 @@ namespace {
/// Attempt joining two virtual registers. Return true on success.
bool joinVirtRegs(CoalescerPair &CP);
+ /// If a live interval has many valnos and is coalesced with other
+ /// live intervals many times, we regard such live interval as having
+ /// high compile time cost.
+ bool isHighCostLiveInterval(LiveInterval &LI);
+
/// Attempt joining with a reserved physreg.
bool joinReservedPhysReg(CoalescerPair &CP);
@@ -337,9 +358,10 @@ INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_END(RegisterCoalescer, "simple-register-coalescing",
"Simple Register Coalescing", false, false)
-static bool isMoveInstr(const TargetRegisterInfo &tri, const MachineInstr *MI,
- unsigned &Src, unsigned &Dst,
- unsigned &SrcSub, unsigned &DstSub) {
+LLVM_NODISCARD static bool isMoveInstr(const TargetRegisterInfo &tri,
+ const MachineInstr *MI, unsigned &Src,
+ unsigned &Dst, unsigned &SrcSub,
+ unsigned &DstSub) {
if (MI->isCopy()) {
Dst = MI->getOperand(0).getReg();
DstSub = MI->getOperand(0).getSubReg();
@@ -672,8 +694,7 @@ bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA,
for (LiveRange::Segment &ASeg : IntA.segments) {
if (ASeg.valno != AValNo) continue;
- LiveInterval::iterator BI =
- std::upper_bound(IntB.begin(), IntB.end(), ASeg.start);
+ LiveInterval::iterator BI = llvm::upper_bound(IntB, ASeg.start);
if (BI != IntB.begin())
--BI;
for (; BI != IntB.end() && ASeg.end >= BI->start; ++BI) {
@@ -903,23 +924,32 @@ RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
}
SlotIndex AIdx = CopyIdx.getRegSlot(true);
LaneBitmask MaskA;
+ const SlotIndexes &Indexes = *LIS->getSlotIndexes();
for (LiveInterval::SubRange &SA : IntA.subranges()) {
VNInfo *ASubValNo = SA.getVNInfoAt(AIdx);
- assert(ASubValNo != nullptr);
+ // Even if we are dealing with a full copy, some lanes can
+ // still be undefined.
+ // E.g.,
+ // undef A.subLow = ...
+ // B = COPY A <== A.subHigh is undefined here and does
+ // not have a value number.
+ if (!ASubValNo)
+ continue;
MaskA |= SA.LaneMask;
- IntB.refineSubRanges(Allocator, SA.LaneMask,
- [&Allocator,&SA,CopyIdx,ASubValNo,&ShrinkB]
- (LiveInterval::SubRange &SR) {
- VNInfo *BSubValNo = SR.empty()
- ? SR.getNextValue(CopyIdx, Allocator)
- : SR.getVNInfoAt(CopyIdx);
- assert(BSubValNo != nullptr);
- auto P = addSegmentsWithValNo(SR, BSubValNo, SA, ASubValNo);
- ShrinkB |= P.second;
- if (P.first)
- BSubValNo->def = ASubValNo->def;
- });
+ IntB.refineSubRanges(
+ Allocator, SA.LaneMask,
+ [&Allocator, &SA, CopyIdx, ASubValNo,
+ &ShrinkB](LiveInterval::SubRange &SR) {
+ VNInfo *BSubValNo = SR.empty() ? SR.getNextValue(CopyIdx, Allocator)
+ : SR.getVNInfoAt(CopyIdx);
+ assert(BSubValNo != nullptr);
+ auto P = addSegmentsWithValNo(SR, BSubValNo, SA, ASubValNo);
+ ShrinkB |= P.second;
+ if (P.first)
+ BSubValNo->def = ASubValNo->def;
+ },
+ Indexes, *TRI);
}
// Go over all subranges of IntB that have not been covered by IntA,
// and delete the segments starting at CopyIdx. This can happen if
@@ -947,7 +977,7 @@ RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
/// For copy B = A in BB2, if A is defined by A = B in BB0 which is a
/// predecessor of BB2, and if B is not redefined on the way from A = B
-/// in BB2 to B = A in BB2, B = A in BB2 is partially redundant if the
+/// in BB0 to B = A in BB2, B = A in BB2 is partially redundant if the
/// execution goes through the path from BB0 to BB2. We may move B = A
/// to the predecessor without such reversed copy.
/// So we will transform the program from:
@@ -1494,7 +1524,8 @@ MachineInstr *RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {
// CoalescerPair may have a new register class with adjusted subreg indices
// at this point.
unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
- isMoveInstr(*TRI, CopyMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx);
+ if(!isMoveInstr(*TRI, CopyMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx))
+ return nullptr;
SlotIndex Idx = LIS->getInstructionIndex(*CopyMI);
const LiveInterval &SrcLI = LIS->getInterval(SrcReg);
@@ -1994,19 +2025,19 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
if (CP.isFlipped()) {
// Physreg is copied into vreg
// %y = COPY %physreg_x
- // ... //< no other def of %x here
+ // ... //< no other def of %physreg_x here
// use %y
// =>
// ...
- // use %x
+ // use %physreg_x
CopyMI = MRI->getVRegDef(SrcReg);
} else {
// VReg is copied into physreg:
// %y = def
- // ... //< no other def or use of %y here
- // %y = COPY %physreg_x
+ // ... //< no other def or use of %physreg_x here
+ // %physreg_x = COPY %y
// =>
- // %y = def
+ // %physreg_x = def
// ...
if (!MRI->hasOneNonDBGUse(SrcReg)) {
LLVM_DEBUG(dbgs() << "\t\tMultiple vreg uses!\n");
@@ -3010,7 +3041,9 @@ void JoinVals::pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask) {
// If a subrange starts at the copy then an undefined value has been
// copied and we must remove that subrange value as well.
VNInfo *ValueOut = Q.valueOutOrDead();
- if (ValueOut != nullptr && Q.valueIn() == nullptr) {
+ if (ValueOut != nullptr && (Q.valueIn() == nullptr ||
+ (V.Identical && V.Resolution == CR_Erase &&
+ ValueOut->def == Def))) {
LLVM_DEBUG(dbgs() << "\t\tPrune sublane " << PrintLaneMask(S.LaneMask)
<< " at " << Def << "\n");
SmallVector<SlotIndex,8> EndPoints;
@@ -3019,7 +3052,7 @@ void JoinVals::pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask) {
// Mark value number as unused.
ValueOut->markUnused();
- if (V.Identical && S.Query(OtherDef).valueOut()) {
+ if (V.Identical && S.Query(OtherDef).valueOutOrDead()) {
// If V is identical to V.OtherVNI (and S was live at OtherDef),
// then we can't simply prune V from S. V needs to be replaced
// with V.OtherVNI.
@@ -3241,16 +3274,29 @@ void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
LaneBitmask LaneMask,
CoalescerPair &CP) {
BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
- LI.refineSubRanges(Allocator, LaneMask,
- [this,&Allocator,&ToMerge,&CP](LiveInterval::SubRange &SR) {
- if (SR.empty()) {
- SR.assign(ToMerge, Allocator);
- } else {
- // joinSubRegRange() destroys the merged range, so we need a copy.
- LiveRange RangeCopy(ToMerge, Allocator);
- joinSubRegRanges(SR, RangeCopy, SR.LaneMask, CP);
- }
- });
+ LI.refineSubRanges(
+ Allocator, LaneMask,
+ [this, &Allocator, &ToMerge, &CP](LiveInterval::SubRange &SR) {
+ if (SR.empty()) {
+ SR.assign(ToMerge, Allocator);
+ } else {
+ // joinSubRegRange() destroys the merged range, so we need a copy.
+ LiveRange RangeCopy(ToMerge, Allocator);
+ joinSubRegRanges(SR, RangeCopy, SR.LaneMask, CP);
+ }
+ },
+ *LIS->getSlotIndexes(), *TRI);
+}
+
+bool RegisterCoalescer::isHighCostLiveInterval(LiveInterval &LI) {
+ if (LI.valnos.size() < LargeIntervalSizeThreshold)
+ return false;
+ auto &Counter = LargeLIVisitCounter[LI.reg];
+ if (Counter < LargeIntervalFreqThreshold) {
+ Counter++;
+ return false;
+ }
+ return true;
}
bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
@@ -3265,6 +3311,9 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
LLVM_DEBUG(dbgs() << "\t\tRHS = " << RHS << "\n\t\tLHS = " << LHS << '\n');
+ if (isHighCostLiveInterval(LHS) || isHighCostLiveInterval(RHS))
+ return false;
+
// First compute NewVNInfo and the simple value mappings.
// Detect impossible conflicts early.
if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals))
@@ -3474,7 +3523,8 @@ bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const {
if (!UseTerminalRule)
return false;
unsigned DstReg, DstSubReg, SrcReg, SrcSubReg;
- isMoveInstr(*TRI, &Copy, SrcReg, DstReg, SrcSubReg, DstSubReg);
+ if (!isMoveInstr(*TRI, &Copy, SrcReg, DstReg, SrcSubReg, DstSubReg))
+ return false;
// Check if the destination of this copy has any other affinity.
if (TargetRegisterInfo::isPhysicalRegister(DstReg) ||
// If SrcReg is a physical register, the copy won't be coalesced.
@@ -3498,8 +3548,9 @@ bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const {
if (&MI == &Copy || !MI.isCopyLike() || MI.getParent() != OrigBB)
continue;
unsigned OtherReg, OtherSubReg, OtherSrcReg, OtherSrcSubReg;
- isMoveInstr(*TRI, &Copy, OtherSrcReg, OtherReg, OtherSrcSubReg,
- OtherSubReg);
+ if (!isMoveInstr(*TRI, &Copy, OtherSrcReg, OtherReg, OtherSrcSubReg,
+ OtherSubReg))
+ return false;
if (OtherReg == SrcReg)
OtherReg = OtherSrcReg;
// Check if OtherReg is a non-terminal.
@@ -3620,6 +3671,7 @@ void RegisterCoalescer::releaseMemory() {
WorkList.clear();
DeadDefs.clear();
InflateRegs.clear();
+ LargeLIVisitCounter.clear();
}
bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
diff --git a/lib/CodeGen/RegisterCoalescer.h b/lib/CodeGen/RegisterCoalescer.h
index 1a46f6d053e6..f505d46cd338 100644
--- a/lib/CodeGen/RegisterCoalescer.h
+++ b/lib/CodeGen/RegisterCoalescer.h
@@ -1,9 +1,8 @@
//===- RegisterCoalescer.h - Register Coalescing Interface ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/RegisterPressure.cpp b/lib/CodeGen/RegisterPressure.cpp
index 1099e468e885..7d9b3aa9b2d7 100644
--- a/lib/CodeGen/RegisterPressure.cpp
+++ b/lib/CodeGen/RegisterPressure.cpp
@@ -1,9 +1,8 @@
//===- RegisterPressure.cpp - Dynamic Register Pressure -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -846,7 +845,7 @@ void RegPressureTracker::recedeSkipDebugValues() {
CurrPos = skipDebugInstructionsBackward(std::prev(CurrPos), MBB->begin());
SlotIndex SlotIdx;
- if (RequireIntervals)
+ if (RequireIntervals && !CurrPos->isDebugInstr())
SlotIdx = LIS->getInstructionIndex(*CurrPos).getRegSlot();
// Open the top of the region using slot indexes.
@@ -856,6 +855,12 @@ void RegPressureTracker::recedeSkipDebugValues() {
void RegPressureTracker::recede(SmallVectorImpl<RegisterMaskPair> *LiveUses) {
recedeSkipDebugValues();
+ if (CurrPos->isDebugValue()) {
+ // It's possible to only have debug_value instructions and hit the start of
+ // the block.
+ assert(CurrPos == MBB->begin());
+ return;
+ }
const MachineInstr &MI = *CurrPos;
RegisterOperands RegOpers;
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
index 3660586c1358..bb19110e6d70 100644
--- a/lib/CodeGen/RegisterScavenging.cpp
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -1,9 +1,8 @@
//===- RegisterScavenging.cpp - Machine register scavenging ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -534,7 +533,7 @@ RegScavenger::spill(unsigned Reg, const TargetRegisterClass &RC, int SPAdj,
unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
MachineBasicBlock::iterator I,
- int SPAdj) {
+ int SPAdj, bool AllowSpill) {
MachineInstr &MI = *I;
const MachineFunction &MF = *MI.getMF();
// Consider all allocatable registers in the register class initially
@@ -565,6 +564,9 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
return SReg;
}
+ if (!AllowSpill)
+ return 0;
+
ScavengedInfo &Scavenged = spill(SReg, *RC, SPAdj, I, UseMI);
Scavenged.Restore = &*std::prev(UseMI);
@@ -576,7 +578,8 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
unsigned RegScavenger::scavengeRegisterBackwards(const TargetRegisterClass &RC,
MachineBasicBlock::iterator To,
- bool RestoreAfter, int SPAdj) {
+ bool RestoreAfter, int SPAdj,
+ bool AllowSpill) {
const MachineBasicBlock &MBB = *To->getParent();
const MachineFunction &MF = *MBB.getParent();
@@ -590,21 +593,25 @@ unsigned RegScavenger::scavengeRegisterBackwards(const TargetRegisterClass &RC,
MachineBasicBlock::iterator SpillBefore = P.second;
assert(Reg != 0 && "No register left to scavenge!");
// Found an available register?
- if (SpillBefore != MBB.end()) {
- MachineBasicBlock::iterator ReloadAfter =
- RestoreAfter ? std::next(MBBI) : MBBI;
- MachineBasicBlock::iterator ReloadBefore = std::next(ReloadAfter);
- if (ReloadBefore != MBB.end())
- LLVM_DEBUG(dbgs() << "Reload before: " << *ReloadBefore << '\n');
- ScavengedInfo &Scavenged = spill(Reg, RC, SPAdj, SpillBefore, ReloadBefore);
- Scavenged.Restore = &*std::prev(SpillBefore);
- LiveUnits.removeReg(Reg);
- LLVM_DEBUG(dbgs() << "Scavenged register with spill: " << printReg(Reg, TRI)
- << " until " << *SpillBefore);
- } else {
+ if (SpillBefore == MBB.end()) {
LLVM_DEBUG(dbgs() << "Scavenged free register: " << printReg(Reg, TRI)
- << '\n');
+ << '\n');
+ return Reg;
}
+
+ if (!AllowSpill)
+ return 0;
+
+ MachineBasicBlock::iterator ReloadAfter =
+ RestoreAfter ? std::next(MBBI) : MBBI;
+ MachineBasicBlock::iterator ReloadBefore = std::next(ReloadAfter);
+ if (ReloadBefore != MBB.end())
+ LLVM_DEBUG(dbgs() << "Reload before: " << *ReloadBefore << '\n');
+ ScavengedInfo &Scavenged = spill(Reg, RC, SPAdj, SpillBefore, ReloadBefore);
+ Scavenged.Restore = &*std::prev(SpillBefore);
+ LiveUnits.removeReg(Reg);
+ LLVM_DEBUG(dbgs() << "Scavenged register with spill: " << printReg(Reg, TRI)
+ << " until " << *SpillBefore);
return Reg;
}
diff --git a/lib/CodeGen/RegisterUsageInfo.cpp b/lib/CodeGen/RegisterUsageInfo.cpp
index 6b9880a8913f..6858d7233bc5 100644
--- a/lib/CodeGen/RegisterUsageInfo.cpp
+++ b/lib/CodeGen/RegisterUsageInfo.cpp
@@ -1,9 +1,8 @@
//===- RegisterUsageInfo.cpp - Register Usage Information Storage ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/lib/CodeGen/RenameIndependentSubregs.cpp b/lib/CodeGen/RenameIndependentSubregs.cpp
index 156d1c81c238..22cff48c3051 100644
--- a/lib/CodeGen/RenameIndependentSubregs.cpp
+++ b/lib/CodeGen/RenameIndependentSubregs.cpp
@@ -1,9 +1,8 @@
//===-- RenameIndependentSubregs.cpp - Live Interval Analysis -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/ResetMachineFunctionPass.cpp b/lib/CodeGen/ResetMachineFunctionPass.cpp
index a02302e6ff99..019de6554d2a 100644
--- a/lib/CodeGen/ResetMachineFunctionPass.cpp
+++ b/lib/CodeGen/ResetMachineFunctionPass.cpp
@@ -1,9 +1,8 @@
//===-- ResetMachineFunctionPass.cpp - Reset Machine Function ----*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -27,6 +26,7 @@ using namespace llvm;
#define DEBUG_TYPE "reset-machine-function"
STATISTIC(NumFunctionsReset, "Number of functions reset");
+STATISTIC(NumFunctionsVisited, "Number of functions visited");
namespace {
class ResetMachineFunction : public MachineFunctionPass {
@@ -51,6 +51,7 @@ namespace {
}
bool runOnMachineFunction(MachineFunction &MF) override {
+ ++NumFunctionsVisited;
// No matter what happened, whether we successfully selected the function
// or not, nothing is going to use the vreg types after us. Make sure they
// disappear.
diff --git a/lib/CodeGen/SafeStack.cpp b/lib/CodeGen/SafeStack.cpp
index c356fb57ac6d..a6bc7330e2cc 100644
--- a/lib/CodeGen/SafeStack.cpp
+++ b/lib/CodeGen/SafeStack.cpp
@@ -1,9 +1,8 @@
//===- SafeStack.cpp - Safe Stack Insertion -------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -372,7 +371,7 @@ Value *SafeStack::getStackGuard(IRBuilder<> &IRB, Function &F) {
if (!StackGuardVar)
StackGuardVar =
F.getParent()->getOrInsertGlobal("__stack_chk_guard", StackPtrTy);
- return IRB.CreateLoad(StackGuardVar, "StackGuard");
+ return IRB.CreateLoad(StackPtrTy, StackGuardVar, "StackGuard");
}
void SafeStack::findInsts(Function &F,
@@ -453,7 +452,8 @@ SafeStack::createStackRestorePoints(IRBuilder<> &IRB, Function &F,
++NumUnsafeStackRestorePoints;
IRB.SetInsertPoint(I->getNextNode());
- Value *CurrentTop = DynamicTop ? IRB.CreateLoad(DynamicTop) : StaticTop;
+ Value *CurrentTop =
+ DynamicTop ? IRB.CreateLoad(StackPtrTy, DynamicTop) : StaticTop;
IRB.CreateStore(CurrentTop, UnsafeStackPtr);
}
@@ -462,7 +462,7 @@ SafeStack::createStackRestorePoints(IRBuilder<> &IRB, Function &F,
void SafeStack::checkStackGuard(IRBuilder<> &IRB, Function &F, ReturnInst &RI,
AllocaInst *StackGuardSlot, Value *StackGuard) {
- Value *V = IRB.CreateLoad(StackGuardSlot);
+ Value *V = IRB.CreateLoad(StackPtrTy, StackGuardSlot);
Value *Cmp = IRB.CreateICmpNE(StackGuard, V);
auto SuccessProb = BranchProbabilityInfo::getBranchProbStackProtector(true);
@@ -475,8 +475,8 @@ void SafeStack::checkStackGuard(IRBuilder<> &IRB, Function &F, ReturnInst &RI,
/* Unreachable */ true, Weights);
IRBuilder<> IRBFail(CheckTerm);
// FIXME: respect -fsanitize-trap / -ftrap-function here?
- Constant *StackChkFail = F.getParent()->getOrInsertFunction(
- "__stack_chk_fail", IRB.getVoidTy());
+ FunctionCallee StackChkFail =
+ F.getParent()->getOrInsertFunction("__stack_chk_fail", IRB.getVoidTy());
IRBFail.CreateCall(StackChkFail, {});
}
@@ -550,7 +550,7 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack(
if (StackGuardSlot) {
unsigned Offset = SSL.getObjectOffset(StackGuardSlot);
- Value *Off = IRB.CreateGEP(BasePointer, // BasePointer is i8*
+ Value *Off = IRB.CreateGEP(Int8Ty, BasePointer, // BasePointer is i8*
ConstantInt::get(Int32Ty, -Offset));
Value *NewAI =
IRB.CreateBitCast(Off, StackGuardSlot->getType(), "StackGuardSlot");
@@ -569,14 +569,14 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack(
if (Size == 0)
Size = 1; // Don't create zero-sized stack objects.
- Value *Off = IRB.CreateGEP(BasePointer, // BasePointer is i8*
+ Value *Off = IRB.CreateGEP(Int8Ty, BasePointer, // BasePointer is i8*
ConstantInt::get(Int32Ty, -Offset));
Value *NewArg = IRB.CreateBitCast(Off, Arg->getType(),
Arg->getName() + ".unsafe-byval");
// Replace alloc with the new location.
replaceDbgDeclare(Arg, BasePointer, BasePointer->getNextNode(), DIB,
- DIExpression::NoDeref, -Offset, DIExpression::NoDeref);
+ DIExpression::ApplyOffset, -Offset);
Arg->replaceAllUsesWith(NewArg);
IRB.SetInsertPoint(cast<Instruction>(NewArg)->getNextNode());
IRB.CreateMemCpy(Off, Align, Arg, Arg->getParamAlignment(), Size);
@@ -587,12 +587,8 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack(
IRB.SetInsertPoint(AI);
unsigned Offset = SSL.getObjectOffset(AI);
- uint64_t Size = getStaticAllocaAllocationSize(AI);
- if (Size == 0)
- Size = 1; // Don't create zero-sized stack objects.
-
- replaceDbgDeclareForAlloca(AI, BasePointer, DIB, DIExpression::NoDeref,
- -Offset, DIExpression::NoDeref);
+ replaceDbgDeclareForAlloca(AI, BasePointer, DIB, DIExpression::ApplyOffset,
+ -Offset);
replaceDbgValueForAlloca(AI, BasePointer, DIB, -Offset);
// Replace uses of the alloca with the new location.
@@ -609,20 +605,16 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack(
InsertBefore = User;
IRBuilder<> IRBUser(InsertBefore);
- Value *Off = IRBUser.CreateGEP(BasePointer, // BasePointer is i8*
+ Value *Off = IRBUser.CreateGEP(Int8Ty, BasePointer, // BasePointer is i8*
ConstantInt::get(Int32Ty, -Offset));
Value *Replacement = IRBUser.CreateBitCast(Off, AI->getType(), Name);
- if (auto *PHI = dyn_cast<PHINode>(User)) {
+ if (auto *PHI = dyn_cast<PHINode>(User))
// PHI nodes may have multiple incoming edges from the same BB (why??),
// all must be updated at once with the same incoming value.
- auto *BB = PHI->getIncomingBlock(U);
- for (unsigned I = 0; I < PHI->getNumIncomingValues(); ++I)
- if (PHI->getIncomingBlock(I) == BB)
- PHI->setIncomingValue(I, Replacement);
- } else {
+ PHI->setIncomingValueForBlock(PHI->getIncomingBlock(U), Replacement);
+ else
U.set(Replacement);
- }
}
AI->eraseFromParent();
@@ -637,7 +629,7 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack(
IRB.SetInsertPoint(BasePointer->getNextNode());
Value *StaticTop =
- IRB.CreateGEP(BasePointer, ConstantInt::get(Int32Ty, -FrameSize),
+ IRB.CreateGEP(Int8Ty, BasePointer, ConstantInt::get(Int32Ty, -FrameSize),
"unsafe_stack_static_top");
IRB.CreateStore(StaticTop, UnsafeStackPtr);
return StaticTop;
@@ -660,7 +652,8 @@ void SafeStack::moveDynamicAllocasToUnsafeStack(
uint64_t TySize = DL.getTypeAllocSize(Ty);
Value *Size = IRB.CreateMul(ArraySize, ConstantInt::get(IntPtrTy, TySize));
- Value *SP = IRB.CreatePtrToInt(IRB.CreateLoad(UnsafeStackPtr), IntPtrTy);
+ Value *SP = IRB.CreatePtrToInt(IRB.CreateLoad(StackPtrTy, UnsafeStackPtr),
+ IntPtrTy);
SP = IRB.CreateSub(SP, Size);
// Align the SP value to satisfy the AllocaInst, type and stack alignments.
@@ -682,8 +675,7 @@ void SafeStack::moveDynamicAllocasToUnsafeStack(
if (AI->hasName() && isa<Instruction>(NewAI))
NewAI->takeName(AI);
- replaceDbgDeclareForAlloca(AI, NewAI, DIB, DIExpression::NoDeref, 0,
- DIExpression::NoDeref);
+ replaceDbgDeclareForAlloca(AI, NewAI, DIB, DIExpression::ApplyOffset, 0);
AI->replaceAllUsesWith(NewAI);
AI->eraseFromParent();
}
@@ -698,7 +690,7 @@ void SafeStack::moveDynamicAllocasToUnsafeStack(
if (II->getIntrinsicID() == Intrinsic::stacksave) {
IRBuilder<> IRB(II);
- Instruction *LI = IRB.CreateLoad(UnsafeStackPtr);
+ Instruction *LI = IRB.CreateLoad(StackPtrTy, UnsafeStackPtr);
LI->takeName(II);
II->replaceAllUsesWith(LI);
II->eraseFromParent();
@@ -727,7 +719,7 @@ void SafeStack::TryInlinePointerAddress() {
if (!isa<CallInst>(UnsafeStackPtr))
return;
- if(F.hasFnAttribute(Attribute::OptimizeNone))
+ if(F.hasOptNone())
return;
CallSite CS(UnsafeStackPtr);
@@ -783,7 +775,7 @@ bool SafeStack::run() {
if (DISubprogram *SP = F.getSubprogram())
IRB.SetCurrentDebugLocation(DebugLoc::get(SP->getScopeLine(), 0, SP));
if (SafeStackUsePointerAddress) {
- Value *Fn = F.getParent()->getOrInsertFunction(
+ FunctionCallee Fn = F.getParent()->getOrInsertFunction(
"__safestack_pointer_address", StackPtrTy->getPointerTo(0));
UnsafeStackPtr = IRB.CreateCall(Fn);
} else {
@@ -793,7 +785,7 @@ bool SafeStack::run() {
// Load the current stack pointer (we'll also use it as a base pointer).
// FIXME: use a dedicated register for it ?
Instruction *BasePointer =
- IRB.CreateLoad(UnsafeStackPtr, false, "unsafe_stack_ptr");
+ IRB.CreateLoad(StackPtrTy, UnsafeStackPtr, false, "unsafe_stack_ptr");
assert(BasePointer->getType() == StackPtrTy);
AllocaInst *StackGuardSlot = nullptr;
diff --git a/lib/CodeGen/SafeStackColoring.cpp b/lib/CodeGen/SafeStackColoring.cpp
index 726c38002817..04a5c4b6d892 100644
--- a/lib/CodeGen/SafeStackColoring.cpp
+++ b/lib/CodeGen/SafeStackColoring.cpp
@@ -1,9 +1,8 @@
//===- SafeStackColoring.cpp - SafeStack frame coloring -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/SafeStackColoring.h b/lib/CodeGen/SafeStackColoring.h
index 902e63ebeb7e..b696b1b6baed 100644
--- a/lib/CodeGen/SafeStackColoring.h
+++ b/lib/CodeGen/SafeStackColoring.h
@@ -1,9 +1,8 @@
//===- SafeStackColoring.h - SafeStack frame coloring ----------*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/SafeStackLayout.cpp b/lib/CodeGen/SafeStackLayout.cpp
index 07b6a5d1883b..09964866e4d3 100644
--- a/lib/CodeGen/SafeStackLayout.cpp
+++ b/lib/CodeGen/SafeStackLayout.cpp
@@ -1,9 +1,8 @@
//===- SafeStackLayout.cpp - SafeStack frame layout -----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/SafeStackLayout.h b/lib/CodeGen/SafeStackLayout.h
index ac531d800f6e..349d9a8b595c 100644
--- a/lib/CodeGen/SafeStackLayout.h
+++ b/lib/CodeGen/SafeStackLayout.h
@@ -1,9 +1,8 @@
//===- SafeStackLayout.h - SafeStack frame layout --------------*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp b/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp
index 2684f92b3a93..7776dffb4e9c 100644
--- a/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp
+++ b/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp
@@ -1,10 +1,9 @@
//===- ScalarizeMaskedMemIntrin.cpp - Scalarize unsupported masked mem ----===//
// instrinsics
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -124,7 +123,7 @@ static bool isConstantIntVector(Value *Mask) {
// %10 = extractelement <16 x i1> %mask, i32 2
// br i1 %10, label %cond.load4, label %else5
//
-static void scalarizeMaskedLoad(CallInst *CI) {
+static void scalarizeMaskedLoad(CallInst *CI, bool &ModifiedDT) {
Value *Ptr = CI->getArgOperand(0);
Value *Alignment = CI->getArgOperand(1);
Value *Mask = CI->getArgOperand(2);
@@ -144,7 +143,7 @@ static void scalarizeMaskedLoad(CallInst *CI) {
// Short-cut if the mask is all-true.
if (isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue()) {
- Value *NewI = Builder.CreateAlignedLoad(Ptr, AlignVal);
+ Value *NewI = Builder.CreateAlignedLoad(VecType, Ptr, AlignVal);
CI->replaceAllUsesWith(NewI);
CI->eraseFromParent();
return;
@@ -152,9 +151,9 @@ static void scalarizeMaskedLoad(CallInst *CI) {
// Adjust alignment for the scalar instruction.
AlignVal = MinAlign(AlignVal, EltTy->getPrimitiveSizeInBits() / 8);
- // Bitcast %addr fron i8* to EltTy*
+ // Bitcast %addr from i8* to EltTy*
Type *NewPtrType =
- EltTy->getPointerTo(cast<PointerType>(Ptr->getType())->getAddressSpace());
+ EltTy->getPointerTo(Ptr->getType()->getPointerAddressSpace());
Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
unsigned VectorWidth = VecType->getNumElements();
@@ -165,11 +164,9 @@ static void scalarizeMaskedLoad(CallInst *CI) {
for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
continue;
- Value *Gep =
- Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
- LoadInst *Load = Builder.CreateAlignedLoad(Gep, AlignVal);
- VResult =
- Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx));
+ Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx);
+ LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Gep, AlignVal);
+ VResult = Builder.CreateInsertElement(VResult, Load, Idx);
}
CI->replaceAllUsesWith(VResult);
CI->eraseFromParent();
@@ -184,8 +181,7 @@ static void scalarizeMaskedLoad(CallInst *CI) {
// br i1 %mask_1, label %cond.load, label %else
//
- Value *Predicate =
- Builder.CreateExtractElement(Mask, Builder.getInt32(Idx));
+ Value *Predicate = Builder.CreateExtractElement(Mask, Idx);
// Create "cond" block
//
@@ -197,11 +193,9 @@ static void scalarizeMaskedLoad(CallInst *CI) {
"cond.load");
Builder.SetInsertPoint(InsertPt);
- Value *Gep =
- Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
- LoadInst *Load = Builder.CreateAlignedLoad(Gep, AlignVal);
- Value *NewVResult = Builder.CreateInsertElement(VResult, Load,
- Builder.getInt32(Idx));
+ Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx);
+ LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Gep, AlignVal);
+ Value *NewVResult = Builder.CreateInsertElement(VResult, Load, Idx);
// Create "else" block, fill it in the next iteration
BasicBlock *NewIfBlock =
@@ -222,6 +216,8 @@ static void scalarizeMaskedLoad(CallInst *CI) {
CI->replaceAllUsesWith(VResult);
CI->eraseFromParent();
+
+ ModifiedDT = true;
}
// Translate a masked store intrinsic, like
@@ -250,7 +246,7 @@ static void scalarizeMaskedLoad(CallInst *CI) {
// store i32 %6, i32* %7
// br label %else2
// . . .
-static void scalarizeMaskedStore(CallInst *CI) {
+static void scalarizeMaskedStore(CallInst *CI, bool &ModifiedDT) {
Value *Src = CI->getArgOperand(0);
Value *Ptr = CI->getArgOperand(1);
Value *Alignment = CI->getArgOperand(2);
@@ -276,9 +272,9 @@ static void scalarizeMaskedStore(CallInst *CI) {
// Adjust alignment for the scalar instruction.
AlignVal = MinAlign(AlignVal, EltTy->getPrimitiveSizeInBits() / 8);
- // Bitcast %addr fron i8* to EltTy*
+ // Bitcast %addr from i8* to EltTy*
Type *NewPtrType =
- EltTy->getPointerTo(cast<PointerType>(Ptr->getType())->getAddressSpace());
+ EltTy->getPointerTo(Ptr->getType()->getPointerAddressSpace());
Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
unsigned VectorWidth = VecType->getNumElements();
@@ -286,9 +282,8 @@ static void scalarizeMaskedStore(CallInst *CI) {
for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
continue;
- Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx));
- Value *Gep =
- Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
+ Value *OneElt = Builder.CreateExtractElement(Src, Idx);
+ Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx);
Builder.CreateAlignedStore(OneElt, Gep, AlignVal);
}
CI->eraseFromParent();
@@ -301,8 +296,7 @@ static void scalarizeMaskedStore(CallInst *CI) {
// %mask_1 = extractelement <16 x i1> %mask, i32 Idx
// br i1 %mask_1, label %cond.store, label %else
//
- Value *Predicate =
- Builder.CreateExtractElement(Mask, Builder.getInt32(Idx));
+ Value *Predicate = Builder.CreateExtractElement(Mask, Idx);
// Create "cond" block
//
@@ -314,9 +308,8 @@ static void scalarizeMaskedStore(CallInst *CI) {
IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.store");
Builder.SetInsertPoint(InsertPt);
- Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx));
- Value *Gep =
- Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
+ Value *OneElt = Builder.CreateExtractElement(Src, Idx);
+ Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx);
Builder.CreateAlignedStore(OneElt, Gep, AlignVal);
// Create "else" block, fill it in the next iteration
@@ -329,6 +322,8 @@ static void scalarizeMaskedStore(CallInst *CI) {
IfBlock = NewIfBlock;
}
CI->eraseFromParent();
+
+ ModifiedDT = true;
}
// Translate a masked gather intrinsic like
@@ -360,13 +355,14 @@ static void scalarizeMaskedStore(CallInst *CI) {
// . . .
// %Result = select <16 x i1> %Mask, <16 x i32> %res.phi.select, <16 x i32> %Src
// ret <16 x i32> %Result
-static void scalarizeMaskedGather(CallInst *CI) {
+static void scalarizeMaskedGather(CallInst *CI, bool &ModifiedDT) {
Value *Ptrs = CI->getArgOperand(0);
Value *Alignment = CI->getArgOperand(1);
Value *Mask = CI->getArgOperand(2);
Value *Src0 = CI->getArgOperand(3);
VectorType *VecType = cast<VectorType>(CI->getType());
+ Type *EltTy = VecType->getElementType();
IRBuilder<> Builder(CI->getContext());
Instruction *InsertPt = CI;
@@ -385,12 +381,11 @@ static void scalarizeMaskedGather(CallInst *CI) {
for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
continue;
- Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx),
- "Ptr" + Twine(Idx));
+ Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
LoadInst *Load =
- Builder.CreateAlignedLoad(Ptr, AlignVal, "Load" + Twine(Idx));
- VResult = Builder.CreateInsertElement(
- VResult, Load, Builder.getInt32(Idx), "Res" + Twine(Idx));
+ Builder.CreateAlignedLoad(EltTy, Ptr, AlignVal, "Load" + Twine(Idx));
+ VResult =
+ Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx));
}
CI->replaceAllUsesWith(VResult);
CI->eraseFromParent();
@@ -404,8 +399,8 @@ static void scalarizeMaskedGather(CallInst *CI) {
// br i1 %Mask1, label %cond.load, label %else
//
- Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx),
- "Mask" + Twine(Idx));
+ Value *Predicate =
+ Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
// Create "cond" block
//
@@ -416,13 +411,11 @@ static void scalarizeMaskedGather(CallInst *CI) {
BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.load");
Builder.SetInsertPoint(InsertPt);
- Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx),
- "Ptr" + Twine(Idx));
+ Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
LoadInst *Load =
- Builder.CreateAlignedLoad(Ptr, AlignVal, "Load" + Twine(Idx));
- Value *NewVResult = Builder.CreateInsertElement(VResult, Load,
- Builder.getInt32(Idx),
- "Res" + Twine(Idx));
+ Builder.CreateAlignedLoad(EltTy, Ptr, AlignVal, "Load" + Twine(Idx));
+ Value *NewVResult =
+ Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx));
// Create "else" block, fill it in the next iteration
BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
@@ -441,6 +434,8 @@ static void scalarizeMaskedGather(CallInst *CI) {
CI->replaceAllUsesWith(VResult);
CI->eraseFromParent();
+
+ ModifiedDT = true;
}
// Translate a masked scatter intrinsic, like
@@ -469,7 +464,7 @@ static void scalarizeMaskedGather(CallInst *CI) {
// store i32 %Elt1, i32* %Ptr1, align 4
// br label %else2
// . . .
-static void scalarizeMaskedScatter(CallInst *CI) {
+static void scalarizeMaskedScatter(CallInst *CI, bool &ModifiedDT) {
Value *Src = CI->getArgOperand(0);
Value *Ptrs = CI->getArgOperand(1);
Value *Alignment = CI->getArgOperand(2);
@@ -493,12 +488,11 @@ static void scalarizeMaskedScatter(CallInst *CI) {
// Shorten the way if the mask is a vector of constants.
if (isConstantIntVector(Mask)) {
for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
- if (cast<ConstantVector>(Mask)->getAggregateElement(Idx)->isNullValue())
+ if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
continue;
- Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx),
- "Elt" + Twine(Idx));
- Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx),
- "Ptr" + Twine(Idx));
+ Value *OneElt =
+ Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx));
+ Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
Builder.CreateAlignedStore(OneElt, Ptr, AlignVal);
}
CI->eraseFromParent();
@@ -511,8 +505,8 @@ static void scalarizeMaskedScatter(CallInst *CI) {
// %Mask1 = extractelement <16 x i1> %Mask, i32 Idx
// br i1 %Mask1, label %cond.store, label %else
//
- Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx),
- "Mask" + Twine(Idx));
+ Value *Predicate =
+ Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
// Create "cond" block
//
@@ -523,10 +517,8 @@ static void scalarizeMaskedScatter(CallInst *CI) {
BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.store");
Builder.SetInsertPoint(InsertPt);
- Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx),
- "Elt" + Twine(Idx));
- Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx),
- "Ptr" + Twine(Idx));
+ Value *OneElt = Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx));
+ Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
Builder.CreateAlignedStore(OneElt, Ptr, AlignVal);
// Create "else" block, fill it in the next iteration
@@ -538,6 +530,156 @@ static void scalarizeMaskedScatter(CallInst *CI) {
IfBlock = NewIfBlock;
}
CI->eraseFromParent();
+
+ ModifiedDT = true;
+}
+
+static void scalarizeMaskedExpandLoad(CallInst *CI, bool &ModifiedDT) {
+ Value *Ptr = CI->getArgOperand(0);
+ Value *Mask = CI->getArgOperand(1);
+ Value *PassThru = CI->getArgOperand(2);
+
+ VectorType *VecType = cast<VectorType>(CI->getType());
+
+ Type *EltTy = VecType->getElementType();
+
+ IRBuilder<> Builder(CI->getContext());
+ Instruction *InsertPt = CI;
+ BasicBlock *IfBlock = CI->getParent();
+
+ Builder.SetInsertPoint(InsertPt);
+ Builder.SetCurrentDebugLocation(CI->getDebugLoc());
+
+ unsigned VectorWidth = VecType->getNumElements();
+
+ // The result vector
+ Value *VResult = PassThru;
+
+ for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+ // Fill the "else" block, created in the previous iteration
+ //
+ // %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
+ // %mask_1 = extractelement <16 x i1> %mask, i32 Idx
+ // br i1 %mask_1, label %cond.load, label %else
+ //
+
+ Value *Predicate =
+ Builder.CreateExtractElement(Mask, Idx);
+
+ // Create "cond" block
+ //
+ // %EltAddr = getelementptr i32* %1, i32 0
+ // %Elt = load i32* %EltAddr
+ // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
+ //
+ BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt->getIterator(),
+ "cond.load");
+ Builder.SetInsertPoint(InsertPt);
+
+ LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Ptr, 1);
+ Value *NewVResult = Builder.CreateInsertElement(VResult, Load, Idx);
+
+ // Move the pointer if there are more blocks to come.
+ Value *NewPtr;
+ if ((Idx + 1) != VectorWidth)
+ NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, 1);
+
+ // Create "else" block, fill it in the next iteration
+ BasicBlock *NewIfBlock =
+ CondBlock->splitBasicBlock(InsertPt->getIterator(), "else");
+ Builder.SetInsertPoint(InsertPt);
+ Instruction *OldBr = IfBlock->getTerminator();
+ BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr);
+ OldBr->eraseFromParent();
+ BasicBlock *PrevIfBlock = IfBlock;
+ IfBlock = NewIfBlock;
+
+ // Create the phi to join the new and previous value.
+ PHINode *ResultPhi = Builder.CreatePHI(VecType, 2, "res.phi.else");
+ ResultPhi->addIncoming(NewVResult, CondBlock);
+ ResultPhi->addIncoming(VResult, PrevIfBlock);
+ VResult = ResultPhi;
+
+ // Add a PHI for the pointer if this isn't the last iteration.
+ if ((Idx + 1) != VectorWidth) {
+ PHINode *PtrPhi = Builder.CreatePHI(Ptr->getType(), 2, "ptr.phi.else");
+ PtrPhi->addIncoming(NewPtr, CondBlock);
+ PtrPhi->addIncoming(Ptr, PrevIfBlock);
+ Ptr = PtrPhi;
+ }
+ }
+
+ CI->replaceAllUsesWith(VResult);
+ CI->eraseFromParent();
+
+ ModifiedDT = true;
+}
+
+static void scalarizeMaskedCompressStore(CallInst *CI, bool &ModifiedDT) {
+ Value *Src = CI->getArgOperand(0);
+ Value *Ptr = CI->getArgOperand(1);
+ Value *Mask = CI->getArgOperand(2);
+
+ VectorType *VecType = cast<VectorType>(Src->getType());
+
+ IRBuilder<> Builder(CI->getContext());
+ Instruction *InsertPt = CI;
+ BasicBlock *IfBlock = CI->getParent();
+
+ Builder.SetInsertPoint(InsertPt);
+ Builder.SetCurrentDebugLocation(CI->getDebugLoc());
+
+ Type *EltTy = VecType->getVectorElementType();
+
+ unsigned VectorWidth = VecType->getNumElements();
+
+ for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+ // Fill the "else" block, created in the previous iteration
+ //
+ // %mask_1 = extractelement <16 x i1> %mask, i32 Idx
+ // br i1 %mask_1, label %cond.store, label %else
+ //
+ Value *Predicate = Builder.CreateExtractElement(Mask, Idx);
+
+ // Create "cond" block
+ //
+ // %OneElt = extractelement <16 x i32> %Src, i32 Idx
+ // %EltAddr = getelementptr i32* %1, i32 0
+ // %store i32 %OneElt, i32* %EltAddr
+ //
+ BasicBlock *CondBlock =
+ IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.store");
+ Builder.SetInsertPoint(InsertPt);
+
+ Value *OneElt = Builder.CreateExtractElement(Src, Idx);
+ Builder.CreateAlignedStore(OneElt, Ptr, 1);
+
+ // Move the pointer if there are more blocks to come.
+ Value *NewPtr;
+ if ((Idx + 1) != VectorWidth)
+ NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, 1);
+
+ // Create "else" block, fill it in the next iteration
+ BasicBlock *NewIfBlock =
+ CondBlock->splitBasicBlock(InsertPt->getIterator(), "else");
+ Builder.SetInsertPoint(InsertPt);
+ Instruction *OldBr = IfBlock->getTerminator();
+ BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr);
+ OldBr->eraseFromParent();
+ BasicBlock *PrevIfBlock = IfBlock;
+ IfBlock = NewIfBlock;
+
+ // Add a PHI for the pointer if this isn't the last iteration.
+ if ((Idx + 1) != VectorWidth) {
+ PHINode *PtrPhi = Builder.CreatePHI(Ptr->getType(), 2, "ptr.phi.else");
+ PtrPhi->addIncoming(NewPtr, CondBlock);
+ PtrPhi->addIncoming(Ptr, PrevIfBlock);
+ Ptr = PtrPhi;
+ }
+ }
+ CI->eraseFromParent();
+
+ ModifiedDT = true;
}
bool ScalarizeMaskedMemIntrin::runOnFunction(Function &F) {
@@ -587,33 +729,35 @@ bool ScalarizeMaskedMemIntrin::optimizeCallInst(CallInst *CI,
break;
case Intrinsic::masked_load:
// Scalarize unsupported vector masked load
- if (!TTI->isLegalMaskedLoad(CI->getType())) {
- scalarizeMaskedLoad(CI);
- ModifiedDT = true;
- return true;
- }
- return false;
+ if (TTI->isLegalMaskedLoad(CI->getType()))
+ return false;
+ scalarizeMaskedLoad(CI, ModifiedDT);
+ return true;
case Intrinsic::masked_store:
- if (!TTI->isLegalMaskedStore(CI->getArgOperand(0)->getType())) {
- scalarizeMaskedStore(CI);
- ModifiedDT = true;
- return true;
- }
- return false;
+ if (TTI->isLegalMaskedStore(CI->getArgOperand(0)->getType()))
+ return false;
+ scalarizeMaskedStore(CI, ModifiedDT);
+ return true;
case Intrinsic::masked_gather:
- if (!TTI->isLegalMaskedGather(CI->getType())) {
- scalarizeMaskedGather(CI);
- ModifiedDT = true;
- return true;
- }
- return false;
+ if (TTI->isLegalMaskedGather(CI->getType()))
+ return false;
+ scalarizeMaskedGather(CI, ModifiedDT);
+ return true;
case Intrinsic::masked_scatter:
- if (!TTI->isLegalMaskedScatter(CI->getArgOperand(0)->getType())) {
- scalarizeMaskedScatter(CI);
- ModifiedDT = true;
- return true;
- }
- return false;
+ if (TTI->isLegalMaskedScatter(CI->getArgOperand(0)->getType()))
+ return false;
+ scalarizeMaskedScatter(CI, ModifiedDT);
+ return true;
+ case Intrinsic::masked_expandload:
+ if (TTI->isLegalMaskedExpandLoad(CI->getType()))
+ return false;
+ scalarizeMaskedExpandLoad(CI, ModifiedDT);
+ return true;
+ case Intrinsic::masked_compressstore:
+ if (TTI->isLegalMaskedCompressStore(CI->getArgOperand(0)->getType()))
+ return false;
+ scalarizeMaskedCompressStore(CI, ModifiedDT);
+ return true;
}
}
diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp
index 6c135b3d69d6..dc3a11670a16 100644
--- a/lib/CodeGen/ScheduleDAG.cpp
+++ b/lib/CodeGen/ScheduleDAG.cpp
@@ -1,9 +1,8 @@
//===- ScheduleDAG.cpp - Implement the ScheduleDAG class ------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -15,6 +14,7 @@
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
@@ -38,6 +38,10 @@ using namespace llvm;
#define DEBUG_TYPE "pre-RA-sched"
+STATISTIC(NumNewPredsAdded, "Number of times a single predecessor was added");
+STATISTIC(NumTopoInits,
+ "Number of times the topological order has been recomputed");
+
#ifndef NDEBUG
static cl::opt<bool> StressSchedOpt(
"stress-sched", cl::Hidden, cl::init(false),
@@ -458,6 +462,11 @@ void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() {
// On insertion of the edge X->Y, the algorithm first marks by calling DFS
// the nodes reachable from Y, and then shifts them using Shift to lie
// immediately after X in Index2Node.
+
+ // Cancel pending updates, mark as valid.
+ Dirty = false;
+ Updates.clear();
+
unsigned DAGSize = SUnits.size();
std::vector<SUnit*> WorkList;
WorkList.reserve(DAGSize);
@@ -498,6 +507,7 @@ void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() {
}
Visited.resize(DAGSize);
+ NumTopoInits++;
#ifndef NDEBUG
// Check correctness of the ordering
@@ -510,6 +520,31 @@ void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() {
#endif
}
+void ScheduleDAGTopologicalSort::FixOrder() {
+ // Recompute from scratch after new nodes have been added.
+ if (Dirty) {
+ InitDAGTopologicalSorting();
+ return;
+ }
+
+ // Otherwise apply updates one-by-one.
+ for (auto &U : Updates)
+ AddPred(U.first, U.second);
+ Updates.clear();
+}
+
+void ScheduleDAGTopologicalSort::AddPredQueued(SUnit *Y, SUnit *X) {
+ // Recomputing the order from scratch is likely more efficient than applying
+ // updates one-by-one for too many updates. The current cut-off is arbitrarily
+ // chosen.
+ Dirty = Dirty || Updates.size() > 10;
+
+ if (Dirty)
+ return;
+
+ Updates.emplace_back(Y, X);
+}
+
void ScheduleDAGTopologicalSort::AddPred(SUnit *Y, SUnit *X) {
int UpperBound, LowerBound;
LowerBound = Node2Index[Y->NodeNum];
@@ -524,6 +559,8 @@ void ScheduleDAGTopologicalSort::AddPred(SUnit *Y, SUnit *X) {
// Recompute topological indexes.
Shift(Visited, LowerBound, UpperBound);
}
+
+ NumNewPredsAdded++;
}
void ScheduleDAGTopologicalSort::RemovePred(SUnit *M, SUnit *N) {
@@ -665,6 +702,7 @@ void ScheduleDAGTopologicalSort::Shift(BitVector& Visited, int LowerBound,
}
bool ScheduleDAGTopologicalSort::WillCreateCycle(SUnit *TargetSU, SUnit *SU) {
+ FixOrder();
// Is SU reachable from TargetSU via successor edges?
if (IsReachable(SU, TargetSU))
return true;
@@ -677,6 +715,7 @@ bool ScheduleDAGTopologicalSort::WillCreateCycle(SUnit *TargetSU, SUnit *SU) {
bool ScheduleDAGTopologicalSort::IsReachable(const SUnit *SU,
const SUnit *TargetSU) {
+ FixOrder();
// If insertion of the edge SU->TargetSU would create a cycle
// then there is a path from TargetSU to SU.
int UpperBound, LowerBound;
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index 99406ed1496a..d5ad7e92299d 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -1,9 +1,8 @@
//===---- ScheduleDAGInstrs.cpp - MachineInstr Rescheduling ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -115,7 +114,7 @@ ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
: ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()),
RemoveKillFlags(RemoveKillFlags),
UnknownValue(UndefValue::get(
- Type::getVoidTy(mf.getFunction().getContext()))) {
+ Type::getVoidTy(mf.getFunction().getContext()))), Topo(SUnits, &ExitSU) {
DbgValues.clear();
const TargetSubtargetInfo &ST = mf.getSubtarget();
@@ -132,7 +131,8 @@ static bool getUnderlyingObjectsForInstr(const MachineInstr *MI,
const DataLayout &DL) {
auto allMMOsOkay = [&]() {
for (const MachineMemOperand *MMO : MI->memoperands()) {
- if (MMO->isVolatile())
+ // TODO: Figure out whether isAtomic is really necessary (see D57601).
+ if (MMO->isVolatile() || MMO->isAtomic())
return false;
if (const PseudoSourceValue *PSV = MMO->getPseudoValue()) {
@@ -743,6 +743,14 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
// done.
Value2SUsMap NonAliasStores, NonAliasLoads(1 /*TrueMemOrderLatency*/);
+ // Track all instructions that may raise floating-point exceptions.
+ // These do not depend on one other (or normal loads or stores), but
+ // must not be rescheduled across global barriers. Note that we don't
+ // really need a "map" here since we don't track those MIs by value;
+ // using the same Value2SUsMap data type here is simply a matter of
+ // convenience.
+ Value2SUsMap FPExceptions;
+
// Remove any stale debug info; sometimes BuildSchedGraph is called again
// without emitting the info from the previous call.
DbgValues.clear();
@@ -870,10 +878,26 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
addBarrierChain(Loads);
addBarrierChain(NonAliasStores);
addBarrierChain(NonAliasLoads);
+ addBarrierChain(FPExceptions);
continue;
}
+ // Instructions that may raise FP exceptions may not be moved
+ // across any global barriers.
+ if (MI.mayRaiseFPException()) {
+ if (BarrierChain)
+ BarrierChain->addPredBarrier(SU);
+
+ FPExceptions.insert(SU, UnknownValue);
+
+ if (FPExceptions.size() >= HugeRegion) {
+ LLVM_DEBUG(dbgs() << "Reducing FPExceptions map.\n";);
+ Value2SUsMap empty;
+ reduceHugeMemNodeMaps(FPExceptions, empty, getReductionSize());
+ }
+ }
+
// If it's not a store or a variant load, we're done.
if (!MI.mayStore() &&
!(MI.mayLoad() && !MI.isDereferenceableInvariantLoad(AA)))
@@ -968,6 +992,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
Uses.clear();
CurrentVRegDefs.clear();
CurrentVRegUses.clear();
+
+ Topo.MarkDirty();
}
raw_ostream &llvm::operator<<(raw_ostream &OS, const PseudoSourceValue* PSV) {
@@ -1089,22 +1115,21 @@ void ScheduleDAGInstrs::fixupKills(MachineBasicBlock &MBB) {
if (!MI.isBundled()) {
toggleKills(MRI, LiveRegs, MI, true);
} else {
- MachineBasicBlock::instr_iterator First = MI.getIterator();
- if (MI.isBundle()) {
+ MachineBasicBlock::instr_iterator Bundle = MI.getIterator();
+ if (MI.isBundle())
toggleKills(MRI, LiveRegs, MI, false);
- ++First;
- }
+
// Some targets make the (questionable) assumtion that the instructions
// inside the bundle are ordered and consequently only the last use of
// a register inside the bundle can kill it.
- MachineBasicBlock::instr_iterator I = std::next(First);
+ MachineBasicBlock::instr_iterator I = std::next(Bundle);
while (I->isBundledWithSucc())
++I;
do {
if (!I->isDebugInstr())
toggleKills(MRI, LiveRegs, *I, true);
--I;
- } while(I != First);
+ } while (I != Bundle);
}
}
}
@@ -1146,6 +1171,23 @@ std::string ScheduleDAGInstrs::getDAGName() const {
return "dag." + BB->getFullName();
}
+bool ScheduleDAGInstrs::canAddEdge(SUnit *SuccSU, SUnit *PredSU) {
+ return SuccSU == &ExitSU || !Topo.IsReachable(PredSU, SuccSU);
+}
+
+bool ScheduleDAGInstrs::addEdge(SUnit *SuccSU, const SDep &PredDep) {
+ if (SuccSU != &ExitSU) {
+ // Do not use WillCreateCycle, it assumes SD scheduling.
+ // If Pred is reachable from Succ, then the edge creates a cycle.
+ if (Topo.IsReachable(PredDep.getSUnit(), SuccSU))
+ return false;
+ Topo.AddPredQueued(SuccSU, PredDep.getSUnit());
+ }
+ SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial());
+ // Return true regardless of whether a new edge needed to be inserted.
+ return true;
+}
+
//===----------------------------------------------------------------------===//
// SchedDFSResult Implementation
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/ScheduleDAGPrinter.cpp b/lib/CodeGen/ScheduleDAGPrinter.cpp
index ff2085aae865..8d04711f07c6 100644
--- a/lib/CodeGen/ScheduleDAGPrinter.cpp
+++ b/lib/CodeGen/ScheduleDAGPrinter.cpp
@@ -1,9 +1,8 @@
//===-- ScheduleDAGPrinter.cpp - Implement ScheduleDAG::viewGraph() -------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/lib/CodeGen/ScoreboardHazardRecognizer.cpp
index 4301372179b8..a9fda56f2dac 100644
--- a/lib/CodeGen/ScoreboardHazardRecognizer.cpp
+++ b/lib/CodeGen/ScoreboardHazardRecognizer.cpp
@@ -1,9 +1,8 @@
//===- ScoreboardHazardRecognizer.cpp - Scheduler Support -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index ff5505c97721..49c922f560fa 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1,9 +1,8 @@
//===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -112,6 +111,10 @@ static cl::opt<bool>
MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
cl::desc("DAG combiner may split indexing from loads"));
+static cl::opt<unsigned> TokenFactorInlineLimit(
+ "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
+ cl::desc("Limit the number of operands to inline for Token Factors"));
+
namespace {
class DAGCombiner {
@@ -138,6 +141,10 @@ namespace {
/// them) when they are deleted from the underlying DAG. It relies on
/// stable indices of nodes within the worklist.
DenseMap<SDNode *, unsigned> WorklistMap;
+ /// This records all nodes attempted to add to the worklist since we
+ /// considered a new worklist entry. As we keep do not add duplicate nodes
+ /// in the worklist, this is different from the tail of the worklist.
+ SmallSetVector<SDNode *, 32> PruningList;
/// Set of nodes which have been combined (at least once).
///
@@ -155,6 +162,37 @@ namespace {
AddToWorklist(Node);
}
+ // Prune potentially dangling nodes. This is called after
+ // any visit to a node, but should also be called during a visit after any
+ // failed combine which may have created a DAG node.
+ void clearAddedDanglingWorklistEntries() {
+ // Check any nodes added to the worklist to see if they are prunable.
+ while (!PruningList.empty()) {
+ auto *N = PruningList.pop_back_val();
+ if (N->use_empty())
+ recursivelyDeleteUnusedNodes(N);
+ }
+ }
+
+ SDNode *getNextWorklistEntry() {
+ // Before we do any work, remove nodes that are not in use.
+ clearAddedDanglingWorklistEntries();
+ SDNode *N = nullptr;
+ // The Worklist holds the SDNodes in order, but it may contain null
+ // entries.
+ while (!N && !Worklist.empty()) {
+ N = Worklist.pop_back_val();
+ }
+
+ if (N) {
+ bool GoodWorklistEntry = WorklistMap.erase(N);
+ (void)GoodWorklistEntry;
+ assert(GoodWorklistEntry &&
+ "Found a worklist entry without a corresponding map entry!");
+ }
+ return N;
+ }
+
/// Call the node-specific routine that folds each particular type of node.
SDValue visit(SDNode *N);
@@ -162,7 +200,7 @@ namespace {
DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
: DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
OptLevel(OL), AA(AA) {
- ForCodeSize = DAG.getMachineFunction().getFunction().optForSize();
+ ForCodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
MaximumLegalStoreInBits = 0;
for (MVT VT : MVT::all_valuetypes())
@@ -172,6 +210,11 @@ namespace {
MaximumLegalStoreInBits = VT.getSizeInBits();
}
+ void ConsiderForPruning(SDNode *N) {
+ // Mark this for potential pruning.
+ PruningList.insert(N);
+ }
+
/// Add to the worklist making sure its instance is at the back (next to be
/// processed.)
void AddToWorklist(SDNode *N) {
@@ -183,6 +226,8 @@ namespace {
if (N->getOpcode() == ISD::HANDLENODE)
return;
+ ConsiderForPruning(N);
+
if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
Worklist.push_back(N);
}
@@ -190,6 +235,7 @@ namespace {
/// Remove all instances of N from the worklist.
void removeFromWorklist(SDNode *N) {
CombinedNodes.erase(N);
+ PruningList.remove(N);
auto It = WorklistMap.find(N);
if (It == WorklistMap.end())
@@ -229,8 +275,15 @@ namespace {
/// If so, return true.
bool SimplifyDemandedBits(SDValue Op) {
unsigned BitWidth = Op.getScalarValueSizeInBits();
- APInt Demanded = APInt::getAllOnesValue(BitWidth);
- return SimplifyDemandedBits(Op, Demanded);
+ APInt DemandedBits = APInt::getAllOnesValue(BitWidth);
+ return SimplifyDemandedBits(Op, DemandedBits);
+ }
+
+ bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
+ EVT VT = Op.getValueType();
+ unsigned NumElts = VT.isVector() ? VT.getVectorNumElements() : 1;
+ APInt DemandedElts = APInt::getAllOnesValue(NumElts);
+ return SimplifyDemandedBits(Op, DemandedBits, DemandedElts);
}
/// Check the specified vector node value to see if it can be simplified or
@@ -238,12 +291,13 @@ namespace {
/// elements. If so, return true.
bool SimplifyDemandedVectorElts(SDValue Op) {
unsigned NumElts = Op.getValueType().getVectorNumElements();
- APInt Demanded = APInt::getAllOnesValue(NumElts);
- return SimplifyDemandedVectorElts(Op, Demanded);
+ APInt DemandedElts = APInt::getAllOnesValue(NumElts);
+ return SimplifyDemandedVectorElts(Op, DemandedElts);
}
- bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
- bool SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded,
+ bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
+ const APInt &DemandedElts);
+ bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
bool AssumeSingleUse = false);
bool CombineToPreIndexedLoadStore(SDNode *N);
@@ -291,15 +345,16 @@ namespace {
SDValue visitTokenFactor(SDNode *N);
SDValue visitMERGE_VALUES(SDNode *N);
SDValue visitADD(SDNode *N);
- SDValue visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference);
+ SDValue visitADDLike(SDNode *N);
+ SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
SDValue visitSUB(SDNode *N);
SDValue visitADDSAT(SDNode *N);
SDValue visitSUBSAT(SDNode *N);
SDValue visitADDC(SDNode *N);
- SDValue visitUADDO(SDNode *N);
+ SDValue visitADDO(SDNode *N);
SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
SDValue visitSUBC(SDNode *N);
- SDValue visitUSUBO(SDNode *N);
+ SDValue visitSUBO(SDNode *N);
SDValue visitADDE(SDNode *N);
SDValue visitADDCARRY(SDNode *N);
SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
@@ -316,8 +371,7 @@ namespace {
SDValue visitMULHS(SDNode *N);
SDValue visitSMUL_LOHI(SDNode *N);
SDValue visitUMUL_LOHI(SDNode *N);
- SDValue visitSMULO(SDNode *N);
- SDValue visitUMULO(SDNode *N);
+ SDValue visitMULO(SDNode *N);
SDValue visitIMINMAX(SDNode *N);
SDValue visitAND(SDNode *N);
SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
@@ -386,6 +440,7 @@ namespace {
SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
SDValue visitSTORE(SDNode *N);
+ SDValue visitLIFETIME_END(SDNode *N);
SDValue visitINSERT_VECTOR_ELT(SDNode *N);
SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
SDValue visitBUILD_VECTOR(SDNode *N);
@@ -400,13 +455,19 @@ namespace {
SDValue visitMSCATTER(SDNode *N);
SDValue visitFP_TO_FP16(SDNode *N);
SDValue visitFP16_TO_FP(SDNode *N);
+ SDValue visitVECREDUCE(SDNode *N);
SDValue visitFADDForFMACombine(SDNode *N);
SDValue visitFSUBForFMACombine(SDNode *N);
SDValue visitFMULForFMADistributiveCombine(SDNode *N);
SDValue XformToShuffleWithZero(SDNode *N);
- SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
+ bool reassociationCanBreakAddressingModePattern(unsigned Opc,
+ const SDLoc &DL, SDValue N0,
+ SDValue N1);
+ SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
+ SDValue N1);
+ SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
SDValue N1, SDNodeFlags Flags);
SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
@@ -466,6 +527,7 @@ namespace {
const SDLoc &DL);
SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
SDValue MatchLoadCombine(SDNode *N);
+ SDValue MatchStoreCombine(StoreSDNode *N);
SDValue ReduceLoadWidth(SDNode *N);
SDValue ReduceLoadOpStoreWidth(SDNode *N);
SDValue splitMergedValStore(StoreSDNode *ST);
@@ -475,7 +537,8 @@ namespace {
SDValue reduceBuildVecToShuffle(SDNode *N);
SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
ArrayRef<int> VectorMask, SDValue VecIn1,
- SDValue VecIn2, unsigned LeftIdx);
+ SDValue VecIn2, unsigned LeftIdx,
+ bool DidSplitVec);
SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
/// Walk up chain skipping non-aliasing memory nodes,
@@ -484,7 +547,7 @@ namespace {
SmallVectorImpl<SDValue> &Aliases);
/// Return true if there is any possibility that the two addresses overlap.
- bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const;
+ bool isAlias(SDNode *Op0, SDNode *Op1) const;
/// Walk up chain skipping non-aliasing memory nodes, looking for a better
/// chain (aliasing node.)
@@ -642,6 +705,18 @@ public:
}
};
+class WorklistInserter : public SelectionDAG::DAGUpdateListener {
+ DAGCombiner &DC;
+
+public:
+ explicit WorklistInserter(DAGCombiner &dc)
+ : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
+
+ // FIXME: Ideally we could add N to the worklist, but this causes exponential
+ // compile time costs in large DAGs, e.g. Halide.
+ void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
+};
+
} // end anonymous namespace
//===----------------------------------------------------------------------===//
@@ -697,20 +772,23 @@ void DAGCombiner::deleteAndRecombine(SDNode *N) {
static char isNegatibleForFree(SDValue Op, bool LegalOperations,
const TargetLowering &TLI,
const TargetOptions *Options,
+ bool ForCodeSize,
unsigned Depth = 0) {
// fneg is removable even if it has multiple uses.
- if (Op.getOpcode() == ISD::FNEG) return 2;
+ if (Op.getOpcode() == ISD::FNEG)
+ return 2;
// Don't allow anything with multiple uses unless we know it is free.
EVT VT = Op.getValueType();
const SDNodeFlags Flags = Op->getFlags();
- if (!Op.hasOneUse())
- if (!(Op.getOpcode() == ISD::FP_EXTEND &&
- TLI.isFPExtFree(VT, Op.getOperand(0).getValueType())))
- return 0;
+ if (!Op.hasOneUse() &&
+ !(Op.getOpcode() == ISD::FP_EXTEND &&
+ TLI.isFPExtFree(VT, Op.getOperand(0).getValueType())))
+ return 0;
// Don't recurse exponentially.
- if (Depth > 6) return 0;
+ if (Depth > 6)
+ return 0;
switch (Op.getOpcode()) {
default: return false;
@@ -721,7 +799,25 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations,
// Don't invert constant FP values after legalization unless the target says
// the negated constant is legal.
return TLI.isOperationLegal(ISD::ConstantFP, VT) ||
- TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT);
+ TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
+ ForCodeSize);
+ }
+ case ISD::BUILD_VECTOR: {
+ // Only permit BUILD_VECTOR of constants.
+ if (llvm::any_of(Op->op_values(), [&](SDValue N) {
+ return !N.isUndef() && !isa<ConstantFPSDNode>(N);
+ }))
+ return 0;
+ if (!LegalOperations)
+ return 1;
+ if (TLI.isOperationLegal(ISD::ConstantFP, VT) &&
+ TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
+ return 1;
+ return llvm::all_of(Op->op_values(), [&](SDValue N) {
+ return N.isUndef() ||
+ TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
+ ForCodeSize);
+ });
}
case ISD::FADD:
if (!Options->UnsafeFPMath && !Flags.hasNoSignedZeros())
@@ -733,15 +829,14 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations,
// fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
- Options, Depth + 1))
+ Options, ForCodeSize, Depth + 1))
return V;
// fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
- Depth + 1);
+ ForCodeSize, Depth + 1);
case ISD::FSUB:
// We can't turn -(A-B) into B-A when we honor signed zeros.
- if (!Options->NoSignedZerosFPMath &&
- !Flags.hasNoSignedZeros())
+ if (!Options->NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
return 0;
// fold (fneg (fsub A, B)) -> (fsub B, A)
@@ -751,30 +846,31 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations,
case ISD::FDIV:
// fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
- Options, Depth + 1))
+ Options, ForCodeSize, Depth + 1))
return V;
return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
- Depth + 1);
+ ForCodeSize, Depth + 1);
case ISD::FP_EXTEND:
case ISD::FP_ROUND:
case ISD::FSIN:
return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
- Depth + 1);
+ ForCodeSize, Depth + 1);
}
}
/// If isNegatibleForFree returns true, return the newly negated expression.
static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
- bool LegalOperations, unsigned Depth = 0) {
- const TargetOptions &Options = DAG.getTarget().Options;
+ bool LegalOperations, bool ForCodeSize,
+ unsigned Depth = 0) {
// fneg is removable even if it has multiple uses.
- if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
+ if (Op.getOpcode() == ISD::FNEG)
+ return Op.getOperand(0);
assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
-
- const SDNodeFlags Flags = Op.getNode()->getFlags();
+ const TargetOptions &Options = DAG.getTarget().Options;
+ const SDNodeFlags Flags = Op->getFlags();
switch (Op.getOpcode()) {
default: llvm_unreachable("Unknown code");
@@ -783,24 +879,41 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
V.changeSign();
return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
}
+ case ISD::BUILD_VECTOR: {
+ SmallVector<SDValue, 4> Ops;
+ for (SDValue C : Op->op_values()) {
+ if (C.isUndef()) {
+ Ops.push_back(C);
+ continue;
+ }
+ APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
+ V.changeSign();
+ Ops.push_back(DAG.getConstantFP(V, SDLoc(Op), C.getValueType()));
+ }
+ return DAG.getBuildVector(Op.getValueType(), SDLoc(Op), Ops);
+ }
case ISD::FADD:
assert(Options.UnsafeFPMath || Flags.hasNoSignedZeros());
// fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
- DAG.getTargetLoweringInfo(), &Options, Depth+1))
+ DAG.getTargetLoweringInfo(), &Options, ForCodeSize,
+ Depth + 1))
return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
GetNegatedExpression(Op.getOperand(0), DAG,
- LegalOperations, Depth+1),
+ LegalOperations, ForCodeSize,
+ Depth + 1),
Op.getOperand(1), Flags);
// fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
GetNegatedExpression(Op.getOperand(1), DAG,
- LegalOperations, Depth+1),
+ LegalOperations, ForCodeSize,
+ Depth + 1),
Op.getOperand(0), Flags);
case ISD::FSUB:
// fold (fneg (fsub 0, B)) -> B
- if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
+ if (ConstantFPSDNode *N0CFP =
+ isConstOrConstSplatFP(Op.getOperand(0), /*AllowUndefs*/ true))
if (N0CFP->isZero())
return Op.getOperand(1);
@@ -812,28 +925,33 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
case ISD::FDIV:
// fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
- DAG.getTargetLoweringInfo(), &Options, Depth+1))
+ DAG.getTargetLoweringInfo(), &Options, ForCodeSize,
+ Depth + 1))
return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
GetNegatedExpression(Op.getOperand(0), DAG,
- LegalOperations, Depth+1),
+ LegalOperations, ForCodeSize,
+ Depth + 1),
Op.getOperand(1), Flags);
// fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
Op.getOperand(0),
GetNegatedExpression(Op.getOperand(1), DAG,
- LegalOperations, Depth+1), Flags);
+ LegalOperations, ForCodeSize,
+ Depth + 1), Flags);
case ISD::FP_EXTEND:
case ISD::FSIN:
return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
GetNegatedExpression(Op.getOperand(0), DAG,
- LegalOperations, Depth+1));
+ LegalOperations, ForCodeSize,
+ Depth + 1));
case ISD::FP_ROUND:
- return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
- GetNegatedExpression(Op.getOperand(0), DAG,
- LegalOperations, Depth+1),
- Op.getOperand(1));
+ return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
+ GetNegatedExpression(Op.getOperand(0), DAG,
+ LegalOperations, ForCodeSize,
+ Depth + 1),
+ Op.getOperand(1));
}
}
@@ -924,53 +1042,113 @@ static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
ISD::isBuildVectorOfConstantFPSDNodes(V.getNode());
}
-SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
- SDValue N1, SDNodeFlags Flags) {
- // Don't reassociate reductions.
- if (Flags.hasVectorReduction())
- return SDValue();
+bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
+ const SDLoc &DL,
+ SDValue N0,
+ SDValue N1) {
+ // Currently this only tries to ensure we don't undo the GEP splits done by
+ // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
+ // we check if the following transformation would be problematic:
+ // (load/store (add, (add, x, offset1), offset2)) ->
+ // (load/store (add, x, offset1+offset2)).
- EVT VT = N0.getValueType();
- if (N0.getOpcode() == Opc && !N0->getFlags().hasVectorReduction()) {
- if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
- if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
- // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
- if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R))
- return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
- return SDValue();
- }
- if (N0.hasOneUse()) {
- // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one
- // use
- SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
- if (!OpNode.getNode())
- return SDValue();
- AddToWorklist(OpNode.getNode());
- return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
- }
+ if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
+ return false;
+
+ if (N0.hasOneUse())
+ return false;
+
+ auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ auto *C2 = dyn_cast<ConstantSDNode>(N1);
+ if (!C1 || !C2)
+ return false;
+
+ const APInt &C1APIntVal = C1->getAPIntValue();
+ const APInt &C2APIntVal = C2->getAPIntValue();
+ if (C1APIntVal.getBitWidth() > 64 || C2APIntVal.getBitWidth() > 64)
+ return false;
+
+ const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
+ if (CombinedValueIntVal.getBitWidth() > 64)
+ return false;
+ const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
+
+ for (SDNode *Node : N0->uses()) {
+ auto LoadStore = dyn_cast<MemSDNode>(Node);
+ if (LoadStore) {
+ // Is x[offset2] already not a legal addressing mode? If so then
+ // reassociating the constants breaks nothing (we test offset2 because
+ // that's the one we hope to fold into the load or store).
+ TargetLoweringBase::AddrMode AM;
+ AM.HasBaseReg = true;
+ AM.BaseOffs = C2APIntVal.getSExtValue();
+ EVT VT = LoadStore->getMemoryVT();
+ unsigned AS = LoadStore->getAddressSpace();
+ Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
+ if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
+ continue;
+
+ // Would x[offset1+offset2] still be a legal addressing mode?
+ AM.BaseOffs = CombinedValue;
+ if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
+ return true;
}
}
- if (N1.getOpcode() == Opc && !N1->getFlags().hasVectorReduction()) {
- if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) {
- if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
- // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
- if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L))
- return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
+ return false;
+}
+
+// Helper for DAGCombiner::reassociateOps. Try to reassociate an expression
+// such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc.
+SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
+ SDValue N0, SDValue N1) {
+ EVT VT = N0.getValueType();
+
+ if (N0.getOpcode() != Opc)
+ return SDValue();
+
+ // Don't reassociate reductions.
+ if (N0->getFlags().hasVectorReduction())
+ return SDValue();
+
+ if (SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
+ if (SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
+ // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
+ if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, C1, C2))
+ return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
+ return SDValue();
+ }
+ if (N0.hasOneUse()) {
+ // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
+ // iff (op x, c1) has one use
+ SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
+ if (!OpNode.getNode())
return SDValue();
- }
- if (N1.hasOneUse()) {
- // reassoc. (op x, (op y, c1)) -> (op (op x, y), c1) iff x+c1 has one
- // use
- SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0, N1.getOperand(0));
- if (!OpNode.getNode())
- return SDValue();
- AddToWorklist(OpNode.getNode());
- return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
- }
+ AddToWorklist(OpNode.getNode());
+ return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
}
}
+ return SDValue();
+}
+// Try to reassociate commutative binops.
+SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
+ SDValue N1, SDNodeFlags Flags) {
+ assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
+ // Don't reassociate reductions.
+ if (Flags.hasVectorReduction())
+ return SDValue();
+
+ // Floating-point reassociation is not allowed without loose FP math.
+ if (N0.getValueType().isFloatingPoint() ||
+ N1.getValueType().isFloatingPoint())
+ if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
+ return SDValue();
+
+ if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1))
+ return Combined;
+ if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0))
+ return Combined;
return SDValue();
}
@@ -1026,10 +1204,11 @@ CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
/// Check the specified integer node value to see if it can be simplified or if
/// things it uses can be simplified by bit propagation. If so, return true.
-bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
+bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
+ const APInt &DemandedElts) {
TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
KnownBits Known;
- if (!TLI.SimplifyDemandedBits(Op, Demanded, Known, TLO))
+ if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO))
return false;
// Revisit the node.
@@ -1048,12 +1227,13 @@ bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
/// Check the specified vector node value to see if it can be simplified or
/// if things it uses can be simplified as it only uses some of the elements.
/// If so, return true.
-bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded,
+bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
+ const APInt &DemandedElts,
bool AssumeSingleUse) {
TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
APInt KnownUndef, KnownZero;
- if (!TLI.SimplifyDemandedVectorElts(Op, Demanded, KnownUndef, KnownZero, TLO,
- 0, AssumeSingleUse))
+ if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
+ TLO, 0, AssumeSingleUse))
return false;
// Revisit the node.
@@ -1383,6 +1563,8 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
LegalOperations = Level >= AfterLegalizeVectorOps;
LegalTypes = Level >= AfterLegalizeTypes;
+ WorklistInserter AddNodes(*this);
+
// Add all the dag nodes to the worklist.
for (SDNode &Node : DAG.allnodes())
AddToWorklist(&Node);
@@ -1392,19 +1574,8 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
// changes of the root.
HandleSDNode Dummy(DAG.getRoot());
- // While the worklist isn't empty, find a node and try to combine it.
- while (!WorklistMap.empty()) {
- SDNode *N;
- // The Worklist holds the SDNodes in order, but it may contain null entries.
- do {
- N = Worklist.pop_back_val();
- } while (!N);
-
- bool GoodWorklistEntry = WorklistMap.erase(N);
- (void)GoodWorklistEntry;
- assert(GoodWorklistEntry &&
- "Found a worklist entry without a corresponding map entry!");
-
+ // While we have a valid worklist entry node, try to combine it.
+ while (SDNode *N = getNextWorklistEntry()) {
// If N has no uses, it is dead. Make sure to revisit all N's operands once
// N is deleted from the DAG, since they too may now be dead or may have a
// reduced number of uses, allowing other xforms.
@@ -1493,9 +1664,11 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::SSUBSAT:
case ISD::USUBSAT: return visitSUBSAT(N);
case ISD::ADDC: return visitADDC(N);
- case ISD::UADDO: return visitUADDO(N);
+ case ISD::SADDO:
+ case ISD::UADDO: return visitADDO(N);
case ISD::SUBC: return visitSUBC(N);
- case ISD::USUBO: return visitUSUBO(N);
+ case ISD::SSUBO:
+ case ISD::USUBO: return visitSUBO(N);
case ISD::ADDE: return visitADDE(N);
case ISD::ADDCARRY: return visitADDCARRY(N);
case ISD::SUBE: return visitSUBE(N);
@@ -1509,8 +1682,8 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::MULHS: return visitMULHS(N);
case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
- case ISD::SMULO: return visitSMULO(N);
- case ISD::UMULO: return visitUMULO(N);
+ case ISD::SMULO:
+ case ISD::UMULO: return visitMULO(N);
case ISD::SMIN:
case ISD::SMAX:
case ISD::UMIN:
@@ -1590,8 +1763,22 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::MLOAD: return visitMLOAD(N);
case ISD::MSCATTER: return visitMSCATTER(N);
case ISD::MSTORE: return visitMSTORE(N);
+ case ISD::LIFETIME_END: return visitLIFETIME_END(N);
case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
+ case ISD::VECREDUCE_FADD:
+ case ISD::VECREDUCE_FMUL:
+ case ISD::VECREDUCE_ADD:
+ case ISD::VECREDUCE_MUL:
+ case ISD::VECREDUCE_AND:
+ case ISD::VECREDUCE_OR:
+ case ISD::VECREDUCE_XOR:
+ case ISD::VECREDUCE_SMAX:
+ case ISD::VECREDUCE_SMIN:
+ case ISD::VECREDUCE_UMAX:
+ case ISD::VECREDUCE_UMIN:
+ case ISD::VECREDUCE_FMAX:
+ case ISD::VECREDUCE_FMIN: return visitVECREDUCE(N);
}
return SDValue();
}
@@ -1644,7 +1831,7 @@ SDValue DAGCombiner::combine(SDNode *N) {
}
}
- // If N is a commutative binary node, try eliminate it if the commuted
+ // If N is a commutative binary node, try to eliminate it if the commuted
// version is already present in the DAG.
if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
N->getNumValues() == 1) {
@@ -1693,6 +1880,12 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
if (OptLevel == CodeGenOpt::None)
return SDValue();
+ // If the sole user is a token factor, we should make sure we have a
+ // chance to merge them together. This prevents TF chains from inhibiting
+ // optimizations.
+ if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
+ AddToWorklist(*(N->use_begin()));
+
SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
SmallPtrSet<SDNode*, 16> SeenOps;
@@ -1704,8 +1897,19 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
// Iterate through token factors. The TFs grows when new token factors are
// encountered.
for (unsigned i = 0; i < TFs.size(); ++i) {
- SDNode *TF = TFs[i];
+ // Limit number of nodes to inline, to avoid quadratic compile times.
+ // We have to add the outstanding Token Factors to Ops, otherwise we might
+ // drop Ops from the resulting Token Factors.
+ if (Ops.size() > TokenFactorInlineLimit) {
+ for (unsigned j = i; j < TFs.size(); j++)
+ Ops.emplace_back(TFs[j], 0);
+ // Drop unprocessed Token Factors from TFs, so we do not add them to the
+ // combiner worklist later.
+ TFs.resize(i);
+ break;
+ }
+ SDNode *TF = TFs[i];
// Check each of the operands.
for (const SDValue &Op : TF->op_values()) {
switch (Op.getOpcode()) {
@@ -1719,8 +1923,6 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
// Queue up for processing.
TFs.push_back(Op.getNode());
- // Clean up in case the token factor is removed.
- AddToWorklist(Op.getNode());
Changed = true;
break;
}
@@ -1737,6 +1939,11 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
}
}
+ // Re-visit inlined Token Factors, to clean them up in case they have been
+ // removed. Skip the first Token Factor, as this is the current node.
+ for (unsigned i = 1, e = TFs.size(); i < e; i++)
+ AddToWorklist(TFs[i]);
+
// Remove Nodes that are chained to another node in the list. Do so
// by walking up chains breath-first stopping when we've seen
// another operand. In general we must climb to the EntryNode, but we can exit
@@ -1803,6 +2010,8 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
for (const SDValue &Op : CurNode->op_values())
AddToWorklist(i, Op.getNode(), CurOpNumber);
break;
+ case ISD::LIFETIME_START:
+ case ISD::LIFETIME_END:
case ISD::CopyFromReg:
case ISD::CopyToReg:
AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
@@ -1831,9 +2040,9 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
if (SeenChains.count(Op.getNode()) == 0)
PrunedOps.push_back(Op);
}
- Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, PrunedOps);
+ Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
} else {
- Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
+ Result = DAG.getTokenFactor(SDLoc(N), Ops);
}
}
return Result;
@@ -1869,7 +2078,8 @@ static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
}
SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
- assert(ISD::isBinaryOp(BO) && "Unexpected binary operator");
+ assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
+ "Unexpected binary operator");
// Don't do this unless the old select is going away. We want to eliminate the
// binary operator, not replace a binop with a select.
@@ -1940,7 +2150,9 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
!isConstantFPBuildVectorOrConstantFP(NewCF))
return SDValue();
- return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
+ SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
+ SelectOp->setFlags(BO->getFlags());
+ return SelectOp;
}
static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
@@ -1990,6 +2202,7 @@ static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
// We need a constant operand for the add/sub, and the other operand is a
// logical shift right: add (srl), C or sub C, (srl).
+ // TODO - support non-uniform vector amounts.
bool IsAdd = N->getOpcode() == ISD::ADD;
SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
@@ -2006,7 +2219,7 @@ static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
EVT VT = ShiftOp.getValueType();
SDValue ShAmt = ShiftOp.getOperand(1);
ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
- if (!ShAmtC || ShAmtC->getZExtValue() != VT.getScalarSizeInBits() - 1)
+ if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
return SDValue();
// Eliminate the 'not' by adjusting the shift and add/sub constant:
@@ -2019,7 +2232,10 @@ static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
return DAG.getNode(ISD::ADD, DL, VT, NewShift, DAG.getConstant(NewC, DL, VT));
}
-SDValue DAGCombiner::visitADD(SDNode *N) {
+/// Try to fold a node that behaves like an ADD (note that N isn't necessarily
+/// an ISD::ADD here, it could for example be an ISD::OR if we know that there
+/// are no common bits set in the operands).
+SDValue DAGCombiner::visitADDLike(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
@@ -2058,13 +2274,22 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
return N0;
if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
+ // fold ((A-c1)+c2) -> (A+(c2-c1))
+ if (N0.getOpcode() == ISD::SUB &&
+ isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
+ SDValue Sub = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N1.getNode(),
+ N0.getOperand(1).getNode());
+ assert(Sub && "Constant folding failed");
+ return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
+ }
+
// fold ((c1-A)+c2) -> (c1+c2)-A
if (N0.getOpcode() == ISD::SUB &&
isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
- // FIXME: Adding 2 constants should be handled by FoldConstantArithmetic.
- return DAG.getNode(ISD::SUB, DL, VT,
- DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
- N0.getOperand(1));
+ SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N1.getNode(),
+ N0.getOperand(0).getNode());
+ assert(Add && "Constant folding failed");
+ return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
}
// add (sext i1 X), 1 -> zext (not i1 X)
@@ -2097,9 +2322,10 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
return NewSel;
// reassociate add
- if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
- return RADD;
-
+ if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N0, N1)) {
+ if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
+ return RADD;
+ }
// fold ((0-A) + B) -> B-A
if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
@@ -2116,6 +2342,18 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
return N0.getOperand(0);
+ // fold ((A-B)+(C-A)) -> (C-B)
+ if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
+ N0.getOperand(0) == N1.getOperand(1))
+ return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
+ N0.getOperand(1));
+
+ // fold ((A-B)+(B-C)) -> (A-C)
+ if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
+ N0.getOperand(1) == N1.getOperand(0))
+ return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
+ N1.getOperand(1));
+
// fold (A+(B-(A+C))) to (B-C)
if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
N0 == N1.getOperand(1).getOperand(0))
@@ -2148,31 +2386,93 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
}
+ // fold (add (umax X, C), -C) --> (usubsat X, C)
+ if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
+ auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
+ return (!Max && !Op) ||
+ (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
+ };
+ if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
+ /*AllowUndefs*/ true))
+ return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
+ N0.getOperand(1));
+ }
+
+ if (SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ if (isOneOrOneSplat(N1)) {
+ // fold (add (xor a, -1), 1) -> (sub 0, a)
+ if (isBitwiseNot(N0))
+ return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
+ N0.getOperand(0));
+
+ // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
+ if (N0.getOpcode() == ISD::ADD ||
+ N0.getOpcode() == ISD::UADDO ||
+ N0.getOpcode() == ISD::SADDO) {
+ SDValue A, Xor;
+
+ if (isBitwiseNot(N0.getOperand(0))) {
+ A = N0.getOperand(1);
+ Xor = N0.getOperand(0);
+ } else if (isBitwiseNot(N0.getOperand(1))) {
+ A = N0.getOperand(0);
+ Xor = N0.getOperand(1);
+ }
+
+ if (Xor)
+ return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
+ }
+
+ // Look for:
+ // add (add x, y), 1
+ // And if the target does not like this form then turn into:
+ // sub y, (xor x, -1)
+ if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
+ N0.getOpcode() == ISD::ADD) {
+ SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
+ DAG.getAllOnesConstant(DL, VT));
+ return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
+ }
+ }
+
+ // (x - y) + -1 -> add (xor y, -1), x
+ if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
+ isAllOnesOrAllOnesSplat(N1)) {
+ SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1);
+ return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
+ }
+
+ if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
+ return Combined;
+
+ if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
+ return Combined;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitADD(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N0.getValueType();
+ SDLoc DL(N);
+
+ if (SDValue Combined = visitADDLike(N))
+ return Combined;
+
if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
return V;
if (SDValue V = foldAddSubOfSignBit(N, DAG))
return V;
- if (SimplifyDemandedBits(SDValue(N, 0)))
- return SDValue(N, 0);
-
// fold (a+b) -> (a|b) iff a and b share no bits.
if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
DAG.haveNoCommonBitsSet(N0, N1))
return DAG.getNode(ISD::OR, DL, VT, N0, N1);
- // fold (add (xor a, -1), 1) -> (sub 0, a)
- if (isBitwiseNot(N0) && isOneOrOneSplat(N1))
- return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
- N0.getOperand(0));
-
- if (SDValue Combined = visitADDLike(N0, N1, N))
- return Combined;
-
- if (SDValue Combined = visitADDLike(N1, N0, N))
- return Combined;
-
return SDValue();
}
@@ -2246,6 +2546,10 @@ static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
return SDValue();
+ EVT VT = V.getNode()->getValueType(0);
+ if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
+ return SDValue();
+
// If the result is masked, then no matter what kind of bool it is we can
// return. If it isn't, then we need to make sure the bool type is either 0 or
// 1 and not other values.
@@ -2257,7 +2561,26 @@ static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
return SDValue();
}
-SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference) {
+/// Given the operands of an add/sub operation, see if the 2nd operand is a
+/// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
+/// the opcode and bypass the mask operation.
+static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
+ SelectionDAG &DAG, const SDLoc &DL) {
+ if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
+ return SDValue();
+
+ EVT VT = N0.getValueType();
+ if (DAG.ComputeNumSignBits(N1.getOperand(0)) != VT.getScalarSizeInBits())
+ return SDValue();
+
+ // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
+ // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
+ return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N1.getOperand(0));
+}
+
+/// Helper for doing combines based on N0 and N1 being added to each other.
+SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
+ SDNode *LocReference) {
EVT VT = N0.getValueType();
SDLoc DL(LocReference);
@@ -2269,21 +2592,42 @@ SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference)
N1.getOperand(0).getOperand(1),
N1.getOperand(1)));
- if (N1.getOpcode() == ISD::AND) {
- SDValue AndOp0 = N1.getOperand(0);
- unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0);
- unsigned DestBits = VT.getScalarSizeInBits();
-
- // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
- // and similar xforms where the inner op is either ~0 or 0.
- if (NumSignBits == DestBits && isOneOrOneSplat(N1->getOperand(1)))
- return DAG.getNode(ISD::SUB, DL, VT, N0, AndOp0);
- }
+ if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
+ return V;
- // add (sext i1), X -> sub X, (zext i1)
+ // Look for:
+ // add (add x, 1), y
+ // And if the target does not like this form then turn into:
+ // sub y, (xor x, -1)
+ if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
+ N0.getOpcode() == ISD::ADD && isOneOrOneSplat(N0.getOperand(1))) {
+ SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
+ DAG.getAllOnesConstant(DL, VT));
+ return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
+ }
+
+ // Hoist one-use subtraction by non-opaque constant:
+ // (x - C) + y -> (x + y) - C
+ // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
+ if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
+ isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
+ SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
+ return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
+ }
+ // Hoist one-use subtraction from non-opaque constant:
+ // (C - x) + y -> (y - x) + C
+ if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
+ isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
+ SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
+ return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
+ }
+
+ // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
+ // rather than 'add 0/-1' (the zext should get folded).
+ // add (sext i1 Y), X --> sub X, (zext i1 Y)
if (N0.getOpcode() == ISD::SIGN_EXTEND &&
- N0.getOperand(0).getValueType() == MVT::i1 &&
- !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) {
+ N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
+ TLI.getBooleanContents(VT) == TargetLowering::ZeroOrOneBooleanContent) {
SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
}
@@ -2344,8 +2688,10 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {
return SDValue();
}
-static SDValue flipBoolean(SDValue V, const SDLoc &DL, EVT VT,
+static SDValue flipBoolean(SDValue V, const SDLoc &DL,
SelectionDAG &DAG, const TargetLowering &TLI) {
+ EVT VT = V.getValueType();
+
SDValue Cst;
switch (TLI.getBooleanContents(VT)) {
case TargetLowering::ZeroOrOneBooleanContent:
@@ -2353,35 +2699,60 @@ static SDValue flipBoolean(SDValue V, const SDLoc &DL, EVT VT,
Cst = DAG.getConstant(1, DL, VT);
break;
case TargetLowering::ZeroOrNegativeOneBooleanContent:
- Cst = DAG.getConstant(-1, DL, VT);
+ Cst = DAG.getAllOnesConstant(DL, VT);
break;
}
return DAG.getNode(ISD::XOR, DL, VT, V, Cst);
}
-static bool isBooleanFlip(SDValue V, EVT VT, const TargetLowering &TLI) {
- if (V.getOpcode() != ISD::XOR) return false;
- ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V.getOperand(1));
- if (!Const) return false;
+/**
+ * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
+ * then the flip also occurs if computing the inverse is the same cost.
+ * This function returns an empty SDValue in case it cannot flip the boolean
+ * without increasing the cost of the computation. If you want to flip a boolean
+ * no matter what, use flipBoolean.
+ */
+static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG,
+ const TargetLowering &TLI,
+ bool Force) {
+ if (Force && isa<ConstantSDNode>(V))
+ return flipBoolean(V, SDLoc(V), DAG, TLI);
+
+ if (V.getOpcode() != ISD::XOR)
+ return SDValue();
+
+ ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
+ if (!Const)
+ return SDValue();
+ EVT VT = V.getValueType();
+
+ bool IsFlip = false;
switch(TLI.getBooleanContents(VT)) {
case TargetLowering::ZeroOrOneBooleanContent:
- return Const->isOne();
+ IsFlip = Const->isOne();
+ break;
case TargetLowering::ZeroOrNegativeOneBooleanContent:
- return Const->isAllOnesValue();
+ IsFlip = Const->isAllOnesValue();
+ break;
case TargetLowering::UndefinedBooleanContent:
- return (Const->getAPIntValue() & 0x01) == 1;
+ IsFlip = (Const->getAPIntValue() & 0x01) == 1;
+ break;
}
- llvm_unreachable("Unsupported boolean content");
+
+ if (IsFlip)
+ return V.getOperand(0);
+ if (Force)
+ return flipBoolean(V, SDLoc(V), DAG, TLI);
+ return SDValue();
}
-SDValue DAGCombiner::visitUADDO(SDNode *N) {
+SDValue DAGCombiner::visitADDO(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
- if (VT.isVector())
- return SDValue();
+ bool IsSigned = (ISD::SADDO == N->getOpcode());
EVT CarryVT = N->getValueType(1);
SDLoc DL(N);
@@ -2392,40 +2763,42 @@ SDValue DAGCombiner::visitUADDO(SDNode *N) {
DAG.getUNDEF(CarryVT));
// canonicalize constant to RHS.
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
- if (N0C && !N1C)
- return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N1, N0);
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
- // fold (uaddo x, 0) -> x + no carry out
- if (isNullConstant(N1))
+ // fold (addo x, 0) -> x + no carry out
+ if (isNullOrNullSplat(N1))
return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
- // If it cannot overflow, transform into an add.
- if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
- return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
- DAG.getConstant(0, DL, CarryVT));
+ if (!IsSigned) {
+ // If it cannot overflow, transform into an add.
+ if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
+ return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
+ DAG.getConstant(0, DL, CarryVT));
- // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
- if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
- SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
- DAG.getConstant(0, DL, VT),
- N0.getOperand(0));
- return CombineTo(N, Sub,
- flipBoolean(Sub.getValue(1), DL, CarryVT, DAG, TLI));
- }
+ // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
+ if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
+ SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
+ DAG.getConstant(0, DL, VT), N0.getOperand(0));
+ return CombineTo(N, Sub,
+ flipBoolean(Sub.getValue(1), DL, DAG, TLI));
+ }
- if (SDValue Combined = visitUADDOLike(N0, N1, N))
- return Combined;
+ if (SDValue Combined = visitUADDOLike(N0, N1, N))
+ return Combined;
- if (SDValue Combined = visitUADDOLike(N1, N0, N))
- return Combined;
+ if (SDValue Combined = visitUADDOLike(N1, N0, N))
+ return Combined;
+ }
return SDValue();
}
SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
- auto VT = N0.getValueType();
+ EVT VT = N0.getValueType();
+ if (VT.isVector())
+ return SDValue();
// (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
// If Y + 1 cannot overflow.
@@ -2484,11 +2857,10 @@ SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
}
- EVT CarryVT = CarryIn.getValueType();
-
// fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
if (isNullConstant(N0) && isNullConstant(N1)) {
EVT VT = N0.getValueType();
+ EVT CarryVT = CarryIn.getValueType();
SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
AddToWorklist(CarryExt.getNode());
return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
@@ -2496,16 +2868,6 @@ SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
DAG.getConstant(0, DL, CarryVT));
}
- // fold (addcarry (xor a, -1), 0, !b) -> (subcarry 0, a, b) and flip carry.
- if (isBitwiseNot(N0) && isNullConstant(N1) &&
- isBooleanFlip(CarryIn, CarryVT, TLI)) {
- SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(),
- DAG.getConstant(0, DL, N0.getValueType()),
- N0.getOperand(0), CarryIn.getOperand(0));
- return CombineTo(N, Sub,
- flipBoolean(Sub.getValue(1), DL, CarryVT, DAG, TLI));
- }
-
if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
return Combined;
@@ -2515,12 +2877,112 @@ SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
return SDValue();
}
+/**
+ * If we are facing some sort of diamond carry propapagtion pattern try to
+ * break it up to generate something like:
+ * (addcarry X, 0, (addcarry A, B, Z):Carry)
+ *
+ * The end result is usually an increase in operation required, but because the
+ * carry is now linearized, other tranforms can kick in and optimize the DAG.
+ *
+ * Patterns typically look something like
+ * (uaddo A, B)
+ * / \
+ * Carry Sum
+ * | \
+ * | (addcarry *, 0, Z)
+ * | /
+ * \ Carry
+ * | /
+ * (addcarry X, *, *)
+ *
+ * But numerous variation exist. Our goal is to identify A, B, X and Z and
+ * produce a combine with a single path for carry propagation.
+ */
+static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
+ SDValue X, SDValue Carry0, SDValue Carry1,
+ SDNode *N) {
+ if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
+ return SDValue();
+ if (Carry1.getOpcode() != ISD::UADDO)
+ return SDValue();
+
+ SDValue Z;
+
+ /**
+ * First look for a suitable Z. It will present itself in the form of
+ * (addcarry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
+ */
+ if (Carry0.getOpcode() == ISD::ADDCARRY &&
+ isNullConstant(Carry0.getOperand(1))) {
+ Z = Carry0.getOperand(2);
+ } else if (Carry0.getOpcode() == ISD::UADDO &&
+ isOneConstant(Carry0.getOperand(1))) {
+ EVT VT = Combiner.getSetCCResultType(Carry0.getValueType());
+ Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
+ } else {
+ // We couldn't find a suitable Z.
+ return SDValue();
+ }
+
+
+ auto cancelDiamond = [&](SDValue A,SDValue B) {
+ SDLoc DL(N);
+ SDValue NewY = DAG.getNode(ISD::ADDCARRY, DL, Carry0->getVTList(), A, B, Z);
+ Combiner.AddToWorklist(NewY.getNode());
+ return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), X,
+ DAG.getConstant(0, DL, X.getValueType()),
+ NewY.getValue(1));
+ };
+
+ /**
+ * (uaddo A, B)
+ * |
+ * Sum
+ * |
+ * (addcarry *, 0, Z)
+ */
+ if (Carry0.getOperand(0) == Carry1.getValue(0)) {
+ return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
+ }
+
+ /**
+ * (addcarry A, 0, Z)
+ * |
+ * Sum
+ * |
+ * (uaddo *, B)
+ */
+ if (Carry1.getOperand(0) == Carry0.getValue(0)) {
+ return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
+ }
+
+ if (Carry1.getOperand(1) == Carry0.getValue(0)) {
+ return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
+ }
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
SDNode *N) {
+ // fold (addcarry (xor a, -1), b, c) -> (subcarry b, a, !c) and flip carry.
+ if (isBitwiseNot(N0))
+ if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
+ SDLoc DL(N);
+ SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), N1,
+ N0.getOperand(0), NotC);
+ return CombineTo(N, Sub,
+ flipBoolean(Sub.getValue(1), DL, DAG, TLI));
+ }
+
// Iff the flag result is dead:
// (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
+ // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
+ // or the dependency between the instructions.
if ((N0.getOpcode() == ISD::ADD ||
- (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0)) &&
+ (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
+ N0.getValue(1) != CarryIn)) &&
isNullConstant(N1) && !N->hasAnyUseOfValue(1))
return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
N0.getOperand(0), N0.getOperand(1), CarryIn);
@@ -2529,35 +2991,13 @@ SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
* When one of the addcarry argument is itself a carry, we may be facing
* a diamond carry propagation. In which case we try to transform the DAG
* to ensure linear carry propagation if that is possible.
- *
- * We are trying to get:
- * (addcarry X, 0, (addcarry A, B, Z):Carry)
*/
if (auto Y = getAsCarry(TLI, N1)) {
- /**
- * (uaddo A, B)
- * / \
- * Carry Sum
- * | \
- * | (addcarry *, 0, Z)
- * | /
- * \ Carry
- * | /
- * (addcarry X, *, *)
- */
- if (Y.getOpcode() == ISD::UADDO &&
- CarryIn.getResNo() == 1 &&
- CarryIn.getOpcode() == ISD::ADDCARRY &&
- isNullConstant(CarryIn.getOperand(1)) &&
- CarryIn.getOperand(0) == Y.getValue(0)) {
- auto NewY = DAG.getNode(ISD::ADDCARRY, SDLoc(N), Y->getVTList(),
- Y.getOperand(0), Y.getOperand(1),
- CarryIn.getOperand(2));
- AddToWorklist(NewY.getNode());
- return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
- DAG.getConstant(0, SDLoc(N), N0.getValueType()),
- NewY.getValue(1));
- }
+ // Because both are carries, Y and Z can be swapped.
+ if (auto R = combineADDCARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
+ return R;
+ if (auto R = combineADDCARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
+ return R;
}
return SDValue();
@@ -2620,7 +3060,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
// -(X >>s 31) -> (X >>u 31)
if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
- if (ShiftAmt && ShiftAmt->getZExtValue() == BitWidth - 1) {
+ if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
@@ -2662,16 +3102,48 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
return N0.getOperand(0);
+ // fold (A+C1)-C2 -> A+(C1-C2)
+ if (N0.getOpcode() == ISD::ADD &&
+ isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
+ isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
+ SDValue NewC = DAG.FoldConstantArithmetic(
+ ISD::SUB, DL, VT, N0.getOperand(1).getNode(), N1.getNode());
+ assert(NewC && "Constant folding failed");
+ return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
+ }
+
// fold C2-(A+C1) -> (C2-C1)-A
if (N1.getOpcode() == ISD::ADD) {
SDValue N11 = N1.getOperand(1);
if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
- SDValue NewC = DAG.getNode(ISD::SUB, DL, VT, N0, N11);
+ SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
+ N11.getNode());
+ assert(NewC && "Constant folding failed");
return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
}
}
+ // fold (A-C1)-C2 -> A-(C1+C2)
+ if (N0.getOpcode() == ISD::SUB &&
+ isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
+ isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
+ SDValue NewC = DAG.FoldConstantArithmetic(
+ ISD::ADD, DL, VT, N0.getOperand(1).getNode(), N1.getNode());
+ assert(NewC && "Constant folding failed");
+ return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
+ }
+
+ // fold (c1-A)-c2 -> (c1-c2)-A
+ if (N0.getOpcode() == ISD::SUB &&
+ isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
+ isConstantOrConstantVector(N0.getOperand(0), /* NoOpaques */ true)) {
+ SDValue NewC = DAG.FoldConstantArithmetic(
+ ISD::SUB, DL, VT, N0.getOperand(0).getNode(), N1.getNode());
+ assert(NewC && "Constant folding failed");
+ return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
+ }
+
// fold ((A+(B+or-C))-B) -> A+or-C
if (N0.getOpcode() == ISD::ADD &&
(N0.getOperand(1).getOpcode() == ISD::SUB ||
@@ -2728,6 +3200,63 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
if (SDValue V = foldAddSubOfSignBit(N, DAG))
return V;
+ if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
+ return V;
+
+ // (x - y) - 1 -> add (xor y, -1), x
+ if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && isOneOrOneSplat(N1)) {
+ SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
+ DAG.getAllOnesConstant(DL, VT));
+ return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
+ }
+
+ // Look for:
+ // sub y, (xor x, -1)
+ // And if the target does not like this form then turn into:
+ // add (add x, y), 1
+ if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
+ SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
+ return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
+ }
+
+ // Hoist one-use addition by non-opaque constant:
+ // (x + C) - y -> (x - y) + C
+ if (N0.hasOneUse() && N0.getOpcode() == ISD::ADD &&
+ isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
+ SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
+ return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
+ }
+ // y - (x + C) -> (y - x) - C
+ if (N1.hasOneUse() && N1.getOpcode() == ISD::ADD &&
+ isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
+ SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
+ return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
+ }
+ // (x - C) - y -> (x - y) - C
+ // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
+ if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
+ isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
+ SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
+ return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
+ }
+ // (C - x) - y -> C - (x + y)
+ if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
+ isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
+ SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
+ return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
+ }
+
+ // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
+ // rather than 'sub 0/1' (the sext should get folded).
+ // sub X, (zext i1 Y) --> add X, (sext i1 Y)
+ if (N1.getOpcode() == ISD::ZERO_EXTEND &&
+ N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
+ TLI.getBooleanContents(VT) ==
+ TargetLowering::ZeroOrNegativeOneBooleanContent) {
+ SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
+ return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
+ }
+
// fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
@@ -2772,7 +3301,8 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
SDValue ShAmt = N1.getOperand(1);
ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
- if (ShAmtC && ShAmtC->getZExtValue() == N1.getScalarValueSizeInBits() - 1) {
+ if (ShAmtC &&
+ ShAmtC->getAPIntValue() == (N1.getScalarValueSizeInBits() - 1)) {
SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
}
@@ -2846,12 +3376,11 @@ SDValue DAGCombiner::visitSUBC(SDNode *N) {
return SDValue();
}
-SDValue DAGCombiner::visitUSUBO(SDNode *N) {
+SDValue DAGCombiner::visitSUBO(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
- if (VT.isVector())
- return SDValue();
+ bool IsSigned = (ISD::SSUBO == N->getOpcode());
EVT CarryVT = N->getValueType(1);
SDLoc DL(N);
@@ -2861,17 +3390,25 @@ SDValue DAGCombiner::visitUSUBO(SDNode *N) {
return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
DAG.getUNDEF(CarryVT));
- // fold (usubo x, x) -> 0 + no borrow
+ // fold (subo x, x) -> 0 + no borrow
if (N0 == N1)
return CombineTo(N, DAG.getConstant(0, DL, VT),
DAG.getConstant(0, DL, CarryVT));
- // fold (usubo x, 0) -> x + no borrow
- if (isNullConstant(N1))
+ ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
+
+ // fold (subox, c) -> (addo x, -c)
+ if (IsSigned && N1C && !N1C->getAPIntValue().isMinSignedValue()) {
+ return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
+ DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
+ }
+
+ // fold (subo x, 0) -> x + no borrow
+ if (isNullOrNullSplat(N1))
return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
// Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
- if (isAllOnesConstant(N0))
+ if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
DAG.getConstant(0, DL, CarryVT));
@@ -3012,13 +3549,13 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
MathOp = ISD::SUB;
if (MathOp != ISD::DELETED_NODE) {
- unsigned ShAmt = MathOp == ISD::ADD ? (MulC - 1).logBase2()
- : (MulC + 1).logBase2();
- assert(ShAmt > 0 && ShAmt < VT.getScalarSizeInBits() &&
- "Not expecting multiply-by-constant that could have simplified");
+ unsigned ShAmt =
+ MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
+ assert(ShAmt < VT.getScalarSizeInBits() &&
+ "multiply-by-constant generated out of bounds shift");
SDLoc DL(N);
- SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, N0,
- DAG.getConstant(ShAmt, DL, VT));
+ SDValue Shl =
+ DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
SDValue R = DAG.getNode(MathOp, DL, VT, Shl, N0);
if (ConstValue1.isNegative())
R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
@@ -3069,7 +3606,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
N0.getOperand(1), N1));
// reassociate mul
- if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
+ if (SDValue RMUL = reassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
return RMUL;
return SDValue();
@@ -3612,7 +4149,6 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
// fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) {
- SDLoc DL(N);
unsigned NumEltBits = VT.getScalarSizeInBits();
SDValue LogBase2 = BuildLogBase2(N1, DL);
SDValue SRLAmt = DAG.getNode(
@@ -3753,22 +4289,14 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
return SDValue();
}
-SDValue DAGCombiner::visitSMULO(SDNode *N) {
- // (smulo x, 2) -> (saddo x, x)
- if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
- if (C2->getAPIntValue() == 2)
- return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(),
- N->getOperand(0), N->getOperand(0));
-
- return SDValue();
-}
+SDValue DAGCombiner::visitMULO(SDNode *N) {
+ bool IsSigned = (ISD::SMULO == N->getOpcode());
-SDValue DAGCombiner::visitUMULO(SDNode *N) {
- // (umulo x, 2) -> (uaddo x, x)
- if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
+ // (mulo x, 2) -> (addo x, x)
+ if (ConstantSDNode *C2 = isConstOrConstSplat(N->getOperand(1)))
if (C2->getAPIntValue() == 2)
- return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(),
- N->getOperand(0), N->getOperand(0));
+ return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, SDLoc(N),
+ N->getVTList(), N->getOperand(0), N->getOperand(0));
return SDValue();
}
@@ -4075,6 +4603,33 @@ SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
SDValue Zero = DAG.getConstant(0, DL, OpVT);
return DAG.getSetCC(DL, VT, Or, Zero, CC1);
}
+
+ // Turn compare of constants whose difference is 1 bit into add+and+setcc.
+ // TODO - support non-uniform vector amounts.
+ if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
+ // Match a shared variable operand and 2 non-opaque constant operands.
+ ConstantSDNode *C0 = isConstOrConstSplat(LR);
+ ConstantSDNode *C1 = isConstOrConstSplat(RR);
+ if (LL == RL && C0 && C1 && !C0->isOpaque() && !C1->isOpaque()) {
+ // Canonicalize larger constant as C0.
+ if (C1->getAPIntValue().ugt(C0->getAPIntValue()))
+ std::swap(C0, C1);
+
+ // The difference of the constants must be a single bit.
+ const APInt &C0Val = C0->getAPIntValue();
+ const APInt &C1Val = C1->getAPIntValue();
+ if ((C0Val - C1Val).isPowerOf2()) {
+ // and/or (setcc X, C0, ne), (setcc X, C1, ne/eq) -->
+ // setcc ((add X, -C1), ~(C0 - C1)), 0, ne/eq
+ SDValue OffsetC = DAG.getConstant(-C1Val, DL, OpVT);
+ SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LL, OffsetC);
+ SDValue MaskC = DAG.getConstant(~(C0Val - C1Val), DL, OpVT);
+ SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Add, MaskC);
+ SDValue Zero = DAG.getConstant(0, DL, OpVT);
+ return DAG.getSetCC(DL, VT, And, Zero, CC0);
+ }
+ }
+ }
}
// Canonicalize equivalent operands to LL == RL.
@@ -4259,7 +4814,8 @@ bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
// Ensure that this isn't going to produce an unsupported unaligned access.
if (ShAmt &&
!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
- LDST->getAddressSpace(), ShAmt / 8))
+ LDST->getAddressSpace(), ShAmt / 8,
+ LDST->getMemOperand()->getFlags()))
return false;
// It's not possible to generate a constant of extended or untyped type.
@@ -4316,9 +4872,7 @@ bool DAGCombiner::SearchForAndLoads(SDNode *N,
SDNode *&NodeToMask) {
// Recursively search for the operands, looking for loads which can be
// narrowed.
- for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) {
- SDValue Op = N->getOperand(i);
-
+ for (SDValue Op : N->op_values()) {
if (Op.getValueType().isVector())
return false;
@@ -4480,7 +5034,7 @@ SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
SDValue N1 = N->getOperand(1);
// Do we actually prefer shifts over mask?
- if (!TLI.preferShiftsToClearExtremeBits(N0))
+ if (!TLI.shouldFoldMaskToVariableShiftPair(N0))
return SDValue();
// Try to match (-1 '[outer] logical shift' y)
@@ -4575,7 +5129,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
return NewSel;
// reassociate and
- if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
+ if (SDValue RAND = reassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
return RAND;
// Try to convert a constant mask AND into a shuffle clear mask.
@@ -4644,24 +5198,22 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// the first vector value and FF for the rest, repeating. We need a mask
// that will apply equally to all members of the vector, so AND all the
// lanes of the constant together.
- EVT VT = Vector->getValueType(0);
- unsigned BitWidth = VT.getScalarSizeInBits();
+ unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
// If the splat value has been compressed to a bitlength lower
// than the size of the vector lane, we need to re-expand it to
// the lane size.
- if (BitWidth > SplatBitSize)
- for (SplatValue = SplatValue.zextOrTrunc(BitWidth);
- SplatBitSize < BitWidth;
- SplatBitSize = SplatBitSize * 2)
+ if (EltBitWidth > SplatBitSize)
+ for (SplatValue = SplatValue.zextOrTrunc(EltBitWidth);
+ SplatBitSize < EltBitWidth; SplatBitSize = SplatBitSize * 2)
SplatValue |= SplatValue.shl(SplatBitSize);
// Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
// multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
- if (SplatBitSize % BitWidth == 0) {
- Constant = APInt::getAllOnesValue(BitWidth);
- for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
- Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
+ if ((SplatBitSize % EltBitWidth) == 0) {
+ Constant = APInt::getAllOnesValue(EltBitWidth);
+ for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
+ Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
}
}
}
@@ -4773,44 +5325,29 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
return SDValue(N, 0);
// fold (zext_inreg (extload x)) -> (zextload x)
- if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
- LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- EVT MemVT = LN0->getMemoryVT();
- // If we zero all the possible extended bits, then we can turn this into
- // a zextload if we are running before legalize or the operation is legal.
- unsigned BitWidth = N1.getScalarValueSizeInBits();
- if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
- BitWidth - MemVT.getScalarSizeInBits())) &&
- ((!LegalOperations && !LN0->isVolatile()) ||
- TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
- SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
- LN0->getChain(), LN0->getBasePtr(),
- MemVT, LN0->getMemOperand());
- AddToWorklist(N);
- CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
- return SDValue(N, 0); // Return N so it doesn't get rechecked!
- }
- }
// fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
- if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
- N0.hasOneUse()) {
+ if (ISD::isUNINDEXEDLoad(N0.getNode()) &&
+ (ISD::isEXTLoad(N0.getNode()) ||
+ (ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
EVT MemVT = LN0->getMemoryVT();
// If we zero all the possible extended bits, then we can turn this into
// a zextload if we are running before legalize or the operation is legal.
- unsigned BitWidth = N1.getScalarValueSizeInBits();
- if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
- BitWidth - MemVT.getScalarSizeInBits())) &&
+ unsigned ExtBitSize = N1.getScalarValueSizeInBits();
+ unsigned MemBitSize = MemVT.getScalarSizeInBits();
+ APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
+ if (DAG.MaskedValueIsZero(N1, ExtBits) &&
((!LegalOperations && !LN0->isVolatile()) ||
TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
- SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
- LN0->getChain(), LN0->getBasePtr(),
- MemVT, LN0->getMemOperand());
+ SDValue ExtLoad =
+ DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
+ LN0->getBasePtr(), MemVT, LN0->getMemOperand());
AddToWorklist(N);
CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
- return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
+
// fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
@@ -5155,6 +5692,23 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
return SDValue();
}
+/// OR combines for which the commuted variant will be tried as well.
+static SDValue visitORCommutative(
+ SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N) {
+ EVT VT = N0.getValueType();
+ if (N0.getOpcode() == ISD::AND) {
+ // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
+ if (isBitwiseNot(N0.getOperand(1)) && N0.getOperand(1).getOperand(0) == N1)
+ return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(0), N1);
+
+ // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
+ if (isBitwiseNot(N0.getOperand(0)) && N0.getOperand(0).getOperand(0) == N1)
+ return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(1), N1);
+ }
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitOR(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -5284,7 +5838,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
return BSwap;
// reassociate or
- if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
+ if (SDValue ROR = reassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
return ROR;
// Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
@@ -5302,6 +5856,11 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
}
}
+ if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
+ return Combined;
+ if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
+ return Combined;
+
// Simplify: (or (op x...), (op y...)) -> (op (or x, y))
if (N0.getOpcode() == N1.getOpcode())
if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
@@ -5318,6 +5877,12 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
if (SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
+ // If OR can be rewritten into ADD, try combines based on ADD.
+ if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
+ DAG.haveNoCommonBitsSet(N0, N1))
+ if (SDValue Combined = visitADDLike(N))
+ return Combined;
+
return SDValue();
}
@@ -5869,6 +6434,213 @@ calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
return None;
}
+static unsigned LittleEndianByteAt(unsigned BW, unsigned i) {
+ return i;
+}
+
+static unsigned BigEndianByteAt(unsigned BW, unsigned i) {
+ return BW - i - 1;
+}
+
+// Check if the bytes offsets we are looking at match with either big or
+// little endian value loaded. Return true for big endian, false for little
+// endian, and None if match failed.
+static Optional<bool> isBigEndian(const SmallVector<int64_t, 4> &ByteOffsets,
+ int64_t FirstOffset) {
+ // The endian can be decided only when it is 2 bytes at least.
+ unsigned Width = ByteOffsets.size();
+ if (Width < 2)
+ return None;
+
+ bool BigEndian = true, LittleEndian = true;
+ for (unsigned i = 0; i < Width; i++) {
+ int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
+ LittleEndian &= CurrentByteOffset == LittleEndianByteAt(Width, i);
+ BigEndian &= CurrentByteOffset == BigEndianByteAt(Width, i);
+ if (!BigEndian && !LittleEndian)
+ return None;
+ }
+
+ assert((BigEndian != LittleEndian) && "It should be either big endian or"
+ "little endian");
+ return BigEndian;
+}
+
+static SDValue stripTruncAndExt(SDValue Value) {
+ switch (Value.getOpcode()) {
+ case ISD::TRUNCATE:
+ case ISD::ZERO_EXTEND:
+ case ISD::SIGN_EXTEND:
+ case ISD::ANY_EXTEND:
+ return stripTruncAndExt(Value.getOperand(0));
+ }
+ return Value;
+}
+
+/// Match a pattern where a wide type scalar value is stored by several narrow
+/// stores. Fold it into a single store or a BSWAP and a store if the targets
+/// supports it.
+///
+/// Assuming little endian target:
+/// i8 *p = ...
+/// i32 val = ...
+/// p[0] = (val >> 0) & 0xFF;
+/// p[1] = (val >> 8) & 0xFF;
+/// p[2] = (val >> 16) & 0xFF;
+/// p[3] = (val >> 24) & 0xFF;
+/// =>
+/// *((i32)p) = val;
+///
+/// i8 *p = ...
+/// i32 val = ...
+/// p[0] = (val >> 24) & 0xFF;
+/// p[1] = (val >> 16) & 0xFF;
+/// p[2] = (val >> 8) & 0xFF;
+/// p[3] = (val >> 0) & 0xFF;
+/// =>
+/// *((i32)p) = BSWAP(val);
+SDValue DAGCombiner::MatchStoreCombine(StoreSDNode *N) {
+ // Collect all the stores in the chain.
+ SDValue Chain;
+ SmallVector<StoreSDNode *, 8> Stores;
+ for (StoreSDNode *Store = N; Store; Store = dyn_cast<StoreSDNode>(Chain)) {
+ if (Store->getMemoryVT() != MVT::i8 ||
+ Store->isVolatile() || Store->isIndexed())
+ return SDValue();
+ Stores.push_back(Store);
+ Chain = Store->getChain();
+ }
+ // Handle the simple type only.
+ unsigned Width = Stores.size();
+ EVT VT = EVT::getIntegerVT(
+ *DAG.getContext(), Width * N->getMemoryVT().getSizeInBits());
+ if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
+ return SDValue();
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (LegalOperations && !TLI.isOperationLegal(ISD::STORE, VT))
+ return SDValue();
+
+ // Check if all the bytes of the combined value we are looking at are stored
+ // to the same base address. Collect bytes offsets from Base address into
+ // ByteOffsets.
+ SDValue CombinedValue;
+ SmallVector<int64_t, 4> ByteOffsets(Width, INT64_MAX);
+ int64_t FirstOffset = INT64_MAX;
+ StoreSDNode *FirstStore = nullptr;
+ Optional<BaseIndexOffset> Base;
+ for (auto Store : Stores) {
+ // All the stores store different byte of the CombinedValue. A truncate is
+ // required to get that byte value.
+ SDValue Trunc = Store->getValue();
+ if (Trunc.getOpcode() != ISD::TRUNCATE)
+ return SDValue();
+ // A shift operation is required to get the right byte offset, except the
+ // first byte.
+ int64_t Offset = 0;
+ SDValue Value = Trunc.getOperand(0);
+ if (Value.getOpcode() == ISD::SRL ||
+ Value.getOpcode() == ISD::SRA) {
+ ConstantSDNode *ShiftOffset =
+ dyn_cast<ConstantSDNode>(Value.getOperand(1));
+ // Trying to match the following pattern. The shift offset must be
+ // a constant and a multiple of 8. It is the byte offset in "y".
+ //
+ // x = srl y, offset
+ // i8 z = trunc x
+ // store z, ...
+ if (!ShiftOffset || (ShiftOffset->getSExtValue() % 8))
+ return SDValue();
+
+ Offset = ShiftOffset->getSExtValue()/8;
+ Value = Value.getOperand(0);
+ }
+
+ // Stores must share the same combined value with different offsets.
+ if (!CombinedValue)
+ CombinedValue = Value;
+ else if (stripTruncAndExt(CombinedValue) != stripTruncAndExt(Value))
+ return SDValue();
+
+ // The trunc and all the extend operation should be stripped to get the
+ // real value we are stored.
+ else if (CombinedValue.getValueType() != VT) {
+ if (Value.getValueType() == VT ||
+ Value.getValueSizeInBits() > CombinedValue.getValueSizeInBits())
+ CombinedValue = Value;
+ // Give up if the combined value type is smaller than the store size.
+ if (CombinedValue.getValueSizeInBits() < VT.getSizeInBits())
+ return SDValue();
+ }
+
+ // Stores must share the same base address
+ BaseIndexOffset Ptr = BaseIndexOffset::match(Store, DAG);
+ int64_t ByteOffsetFromBase = 0;
+ if (!Base)
+ Base = Ptr;
+ else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
+ return SDValue();
+
+ // Remember the first byte store
+ if (ByteOffsetFromBase < FirstOffset) {
+ FirstStore = Store;
+ FirstOffset = ByteOffsetFromBase;
+ }
+ // Map the offset in the store and the offset in the combined value, and
+ // early return if it has been set before.
+ if (Offset < 0 || Offset >= Width || ByteOffsets[Offset] != INT64_MAX)
+ return SDValue();
+ ByteOffsets[Offset] = ByteOffsetFromBase;
+ }
+
+ assert(FirstOffset != INT64_MAX && "First byte offset must be set");
+ assert(FirstStore && "First store must be set");
+
+ // Check if the bytes of the combined value we are looking at match with
+ // either big or little endian value store.
+ Optional<bool> IsBigEndian = isBigEndian(ByteOffsets, FirstOffset);
+ if (!IsBigEndian.hasValue())
+ return SDValue();
+
+ // The node we are looking at matches with the pattern, check if we can
+ // replace it with a single bswap if needed and store.
+
+ // If the store needs byte swap check if the target supports it
+ bool NeedsBswap = DAG.getDataLayout().isBigEndian() != *IsBigEndian;
+
+ // Before legalize we can introduce illegal bswaps which will be later
+ // converted to an explicit bswap sequence. This way we end up with a single
+ // store and byte shuffling instead of several stores and byte shuffling.
+ if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT))
+ return SDValue();
+
+ // Check that a store of the wide type is both allowed and fast on the target
+ bool Fast = false;
+ bool Allowed =
+ TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
+ *FirstStore->getMemOperand(), &Fast);
+ if (!Allowed || !Fast)
+ return SDValue();
+
+ if (VT != CombinedValue.getValueType()) {
+ assert(CombinedValue.getValueType().getSizeInBits() > VT.getSizeInBits() &&
+ "Get unexpected store value to combine");
+ CombinedValue = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT,
+ CombinedValue);
+ }
+
+ if (NeedsBswap)
+ CombinedValue = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, CombinedValue);
+
+ SDValue NewStore =
+ DAG.getStore(Chain, SDLoc(N), CombinedValue, FirstStore->getBasePtr(),
+ FirstStore->getPointerInfo(), FirstStore->getAlignment());
+
+ // Rely on other DAG combine rules to remove the other individual stores.
+ DAG.ReplaceAllUsesWith(N, NewStore.getNode());
+ return NewStore;
+}
+
/// Match a pattern where a wide type scalar value is loaded by several narrow
/// loads and combined by shifts and ors. Fold it into a single load or a load
/// and a BSWAP if the targets supports it.
@@ -5916,11 +6688,6 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
if (LegalOperations && !TLI.isOperationLegal(ISD::LOAD, VT))
return SDValue();
- std::function<unsigned(unsigned, unsigned)> LittleEndianByteAt = [](
- unsigned BW, unsigned i) { return i; };
- std::function<unsigned(unsigned, unsigned)> BigEndianByteAt = [](
- unsigned BW, unsigned i) { return BW - i - 1; };
-
bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
auto MemoryByteOffset = [&] (ByteProvider P) {
assert(P.isMemory() && "Must be a memory byte provider");
@@ -5987,15 +6754,10 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
// Check if the bytes of the OR we are looking at match with either big or
// little endian value load
- bool BigEndian = true, LittleEndian = true;
- for (unsigned i = 0; i < ByteWidth; i++) {
- int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
- LittleEndian &= CurrentByteOffset == LittleEndianByteAt(ByteWidth, i);
- BigEndian &= CurrentByteOffset == BigEndianByteAt(ByteWidth, i);
- if (!BigEndian && !LittleEndian)
- return SDValue();
- }
- assert((BigEndian != LittleEndian) && "should be either or");
+ Optional<bool> IsBigEndian = isBigEndian(ByteOffsets, FirstOffset);
+ if (!IsBigEndian.hasValue())
+ return SDValue();
+
assert(FirstByteProvider && "must be set");
// Ensure that the first byte is loaded from zero offset of the first load.
@@ -6008,7 +6770,7 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
// replace it with a single load and bswap if needed.
// If the load needs byte swap check if the target supports it
- bool NeedsBswap = IsBigEndianTarget != BigEndian;
+ bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
// Before legalize we can introduce illegal bswaps which will be later
// converted to an explicit bswap sequence. This way we end up with a single
@@ -6019,8 +6781,7 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
// Check that a load of the wide type is both allowed and fast on the target
bool Fast = false;
bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
- VT, FirstLoad->getAddressSpace(),
- FirstLoad->getAlignment(), &Fast);
+ VT, *FirstLoad->getMemOperand(), &Fast);
if (!Allowed || !Fast)
return SDValue();
@@ -6160,7 +6921,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
return NewSel;
// reassociate xor
- if (SDValue RXOR = ReassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
+ if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
return RXOR;
// fold !(x cc y) -> (x !cc y)
@@ -6218,6 +6979,16 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
return DAG.getNode(NewOpcode, DL, VT, LHS, RHS);
}
}
+
+ // fold (not (neg x)) -> (add X, -1)
+ // FIXME: This can be generalized to (not (sub Y, X)) -> (add X, ~Y) if
+ // Y is a constant or the subtract has a single use.
+ if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::SUB &&
+ isNullConstant(N0.getOperand(0))) {
+ return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
+ DAG.getAllOnesConstant(DL, VT));
+ }
+
// fold (xor (and x, y), y) -> (and (not x), y)
if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
SDValue X = N0.getOperand(0);
@@ -6310,11 +7081,16 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
/// Handle transforms common to the three shifts, when the shift amount is a
/// constant.
+/// We are looking for: (shift being one of shl/sra/srl)
+/// shift (binop X, C0), C1
+/// And want to transform into:
+/// binop (shift X, C1), (shift C0, C1)
SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
// Do not turn a 'not' into a regular xor.
if (isBitwiseNot(N->getOperand(0)))
return SDValue();
+ // The inner binop must be one-use, since we want to replace it.
SDNode *LHS = N->getOperand(0).getNode();
if (!LHS->hasOneUse()) return SDValue();
@@ -6322,56 +7098,43 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
// instead of (shift (and)), likewise for add, or, xor, etc. This sort of
// thing happens with address calculations, so it's important to canonicalize
// it.
- bool HighBitSet = false; // Can we transform this if the high bit is set?
-
switch (LHS->getOpcode()) {
- default: return SDValue();
+ default:
+ return SDValue();
case ISD::OR:
case ISD::XOR:
- HighBitSet = false; // We can only transform sra if the high bit is clear.
- break;
case ISD::AND:
- HighBitSet = true; // We can only transform sra if the high bit is set.
break;
case ISD::ADD:
if (N->getOpcode() != ISD::SHL)
return SDValue(); // only shl(add) not sr[al](add).
- HighBitSet = false; // We can only transform sra if the high bit is clear.
break;
}
// We require the RHS of the binop to be a constant and not opaque as well.
ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1));
- if (!BinOpCst) return SDValue();
+ if (!BinOpCst)
+ return SDValue();
// FIXME: disable this unless the input to the binop is a shift by a constant
- // or is copy/select.Enable this in other cases when figure out it's exactly profitable.
- SDNode *BinOpLHSVal = LHS->getOperand(0).getNode();
- bool isShift = BinOpLHSVal->getOpcode() == ISD::SHL ||
- BinOpLHSVal->getOpcode() == ISD::SRA ||
- BinOpLHSVal->getOpcode() == ISD::SRL;
- bool isCopyOrSelect = BinOpLHSVal->getOpcode() == ISD::CopyFromReg ||
- BinOpLHSVal->getOpcode() == ISD::SELECT;
-
- if ((!isShift || !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1))) &&
- !isCopyOrSelect)
+ // or is copy/select. Enable this in other cases when figure out it's exactly
+ // profitable.
+ SDValue BinOpLHSVal = LHS->getOperand(0);
+ bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL ||
+ BinOpLHSVal.getOpcode() == ISD::SRA ||
+ BinOpLHSVal.getOpcode() == ISD::SRL) &&
+ isa<ConstantSDNode>(BinOpLHSVal.getOperand(1));
+ bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg ||
+ BinOpLHSVal.getOpcode() == ISD::SELECT;
+
+ if (!IsShiftByConstant && !IsCopyOrSelect)
return SDValue();
- if (isCopyOrSelect && N->hasOneUse())
+ if (IsCopyOrSelect && N->hasOneUse())
return SDValue();
EVT VT = N->getValueType(0);
- // If this is a signed shift right, and the high bit is modified by the
- // logical operation, do not perform the transformation. The highBitSet
- // boolean indicates the value of the high bit of the constant which would
- // cause it to be modified for this operation.
- if (N->getOpcode() == ISD::SRA) {
- bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative();
- if (BinOpRHSSignSet != HighBitSet)
- return SDValue();
- }
-
if (!TLI.isDesirableToCommuteWithShift(N, Level))
return SDValue();
@@ -6395,11 +7158,12 @@ SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
assert(N->getOperand(0).getOpcode() == ISD::AND);
// (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
- if (N->hasOneUse() && N->getOperand(0).hasOneUse()) {
+ EVT TruncVT = N->getValueType(0);
+ if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
+ TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) {
SDValue N01 = N->getOperand(0).getOperand(1);
if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
SDLoc DL(N);
- EVT TruncVT = N->getValueType(0);
SDValue N00 = N->getOperand(0).getOperand(0);
SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
@@ -6431,6 +7195,7 @@ SDValue DAGCombiner::visitRotate(SDNode *N) {
}
// fold (rot x, c) -> (rot x, c % BitSize)
+ // TODO - support non-uniform vector amounts.
if (ConstantSDNode *Cst = isConstOrConstSplat(N1)) {
if (Cst->getAPIntValue().uge(Bitsize)) {
uint64_t RotAmt = Cst->getAPIntValue().urem(Bitsize);
@@ -6476,6 +7241,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
return V;
EVT VT = N0.getValueType();
+ EVT ShiftVT = N1.getValueType();
unsigned OpSizeInBits = VT.getScalarSizeInBits();
// fold vector ops
@@ -6506,6 +7272,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
ConstantSDNode *N1C = isConstOrConstSplat(N1);
// fold (shl c1, c2) -> c1<<c2
+ // TODO - support non-uniform vector shift amounts.
ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
if (N0C && N1C && !N1C->isOpaque())
return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C);
@@ -6517,6 +7284,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
if (DAG.MaskedValueIsZero(SDValue(N, 0),
APInt::getAllOnesValue(OpSizeInBits)))
return DAG.getConstant(0, SDLoc(N), VT);
+
// fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
if (N1.getOpcode() == ISD::TRUNCATE &&
N1.getOperand(0).getOpcode() == ISD::AND) {
@@ -6524,6 +7292,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
}
+ // TODO - support non-uniform vector shift amounts.
if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
@@ -6548,69 +7317,86 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
};
if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
SDLoc DL(N);
- EVT ShiftVT = N1.getValueType();
SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
}
}
- // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2)))
+ // fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2))
// For this to be valid, the second form must not preserve any of the bits
// that are shifted out by the inner shift in the first form. This means
// the outer shift size must be >= the number of bits added by the ext.
// As a corollary, we don't care what kind of ext it is.
- if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND ||
- N0.getOpcode() == ISD::ANY_EXTEND ||
- N0.getOpcode() == ISD::SIGN_EXTEND) &&
+ if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
+ N0.getOpcode() == ISD::ANY_EXTEND ||
+ N0.getOpcode() == ISD::SIGN_EXTEND) &&
N0.getOperand(0).getOpcode() == ISD::SHL) {
SDValue N0Op0 = N0.getOperand(0);
- if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
- APInt c1 = N0Op0C1->getAPIntValue();
- APInt c2 = N1C->getAPIntValue();
- zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
+ SDValue InnerShiftAmt = N0Op0.getOperand(1);
+ EVT InnerVT = N0Op0.getValueType();
+ uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits();
- EVT InnerShiftVT = N0Op0.getValueType();
- uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
- if (c2.uge(OpSizeInBits - InnerShiftSize)) {
- SDLoc DL(N0);
- APInt Sum = c1 + c2;
- if (Sum.uge(OpSizeInBits))
- return DAG.getConstant(0, DL, VT);
+ auto MatchOutOfRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
+ ConstantSDNode *RHS) {
+ APInt c1 = LHS->getAPIntValue();
+ APInt c2 = RHS->getAPIntValue();
+ zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
+ return c2.uge(OpSizeInBits - InnerBitwidth) &&
+ (c1 + c2).uge(OpSizeInBits);
+ };
+ if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange,
+ /*AllowUndefs*/ false,
+ /*AllowTypeMismatch*/ true))
+ return DAG.getConstant(0, SDLoc(N), VT);
- return DAG.getNode(
- ISD::SHL, DL, VT,
- DAG.getNode(N0.getOpcode(), DL, VT, N0Op0->getOperand(0)),
- DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
- }
+ auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
+ ConstantSDNode *RHS) {
+ APInt c1 = LHS->getAPIntValue();
+ APInt c2 = RHS->getAPIntValue();
+ zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
+ return c2.uge(OpSizeInBits - InnerBitwidth) &&
+ (c1 + c2).ult(OpSizeInBits);
+ };
+ if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange,
+ /*AllowUndefs*/ false,
+ /*AllowTypeMismatch*/ true)) {
+ SDLoc DL(N);
+ SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0));
+ SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT);
+ Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1);
+ return DAG.getNode(ISD::SHL, DL, VT, Ext, Sum);
}
}
// fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
// Only fold this if the inner zext has no other uses to avoid increasing
// the total number of instructions.
- if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
+ if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
N0.getOperand(0).getOpcode() == ISD::SRL) {
SDValue N0Op0 = N0.getOperand(0);
- if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
- if (N0Op0C1->getAPIntValue().ult(VT.getScalarSizeInBits())) {
- uint64_t c1 = N0Op0C1->getZExtValue();
- uint64_t c2 = N1C->getZExtValue();
- if (c1 == c2) {
- SDValue NewOp0 = N0.getOperand(0);
- EVT CountVT = NewOp0.getOperand(1).getValueType();
- SDLoc DL(N);
- SDValue NewSHL = DAG.getNode(ISD::SHL, DL, NewOp0.getValueType(),
- NewOp0,
- DAG.getConstant(c2, DL, CountVT));
- AddToWorklist(NewSHL.getNode());
- return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
- }
- }
+ SDValue InnerShiftAmt = N0Op0.getOperand(1);
+
+ auto MatchEqual = [VT](ConstantSDNode *LHS, ConstantSDNode *RHS) {
+ APInt c1 = LHS->getAPIntValue();
+ APInt c2 = RHS->getAPIntValue();
+ zeroExtendToMatch(c1, c2);
+ return c1.ult(VT.getScalarSizeInBits()) && (c1 == c2);
+ };
+ if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual,
+ /*AllowUndefs*/ false,
+ /*AllowTypeMismatch*/ true)) {
+ SDLoc DL(N);
+ EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType();
+ SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT);
+ NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL);
+ AddToWorklist(NewSHL.getNode());
+ return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
}
}
// fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2
// fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 > C2
+ // TODO - support non-uniform vector shift amounts.
if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
N0->getFlags().hasExact()) {
if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
@@ -6619,9 +7405,9 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
SDLoc DL(N);
if (C1 <= C2)
return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
- DAG.getConstant(C2 - C1, DL, N1.getValueType()));
+ DAG.getConstant(C2 - C1, DL, ShiftVT));
return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
- DAG.getConstant(C1 - C2, DL, N1.getValueType()));
+ DAG.getConstant(C1 - C2, DL, ShiftVT));
}
}
@@ -6629,11 +7415,13 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
// (and (srl x, (sub c1, c2), MASK)
// Only fold this if the inner shift has no other uses -- if it does, folding
// this will increase the total number of instructions.
+ // TODO - drop hasOneUse requirement if c1 == c2?
+ // TODO - support non-uniform vector shift amounts.
if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
- TLI.shouldFoldShiftPairToMask(N, Level)) {
+ TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
- uint64_t c1 = N0C1->getZExtValue();
- if (c1 < OpSizeInBits) {
+ if (N0C1->getAPIntValue().ult(OpSizeInBits)) {
+ uint64_t c1 = N0C1->getZExtValue();
uint64_t c2 = N1C->getZExtValue();
APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
SDValue Shift;
@@ -6641,12 +7429,12 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
Mask <<= c2 - c1;
SDLoc DL(N);
Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
- DAG.getConstant(c2 - c1, DL, N1.getValueType()));
+ DAG.getConstant(c2 - c1, DL, ShiftVT));
} else {
Mask.lshrInPlace(c1 - c2);
SDLoc DL(N);
Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
- DAG.getConstant(c1 - c2, DL, N1.getValueType()));
+ DAG.getConstant(c1 - c2, DL, ShiftVT));
}
SDLoc DL(N0);
return DAG.getNode(ISD::AND, DL, VT, Shift,
@@ -6719,6 +7507,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
ConstantSDNode *N1C = isConstOrConstSplat(N1);
// fold (sra c1, c2) -> (sra c1, c2)
+ // TODO - support non-uniform vector shift amounts.
ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
if (N0C && N1C && !N1C->isOpaque())
return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C);
@@ -6815,32 +7604,32 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
}
+ // fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
// fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
// if c1 is equal to the number of bits the trunc removes
+ // TODO - support non-uniform vector shift amounts.
if (N0.getOpcode() == ISD::TRUNCATE &&
(N0.getOperand(0).getOpcode() == ISD::SRL ||
N0.getOperand(0).getOpcode() == ISD::SRA) &&
N0.getOperand(0).hasOneUse() &&
- N0.getOperand(0).getOperand(1).hasOneUse() &&
- N1C) {
+ N0.getOperand(0).getOperand(1).hasOneUse() && N1C) {
SDValue N0Op0 = N0.getOperand(0);
if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
- unsigned LargeShiftVal = LargeShift->getZExtValue();
EVT LargeVT = N0Op0.getValueType();
-
- if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) {
+ unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
+ if (LargeShift->getAPIntValue() == TruncBits) {
SDLoc DL(N);
- SDValue Amt =
- DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), DL,
- getShiftAmountTy(N0Op0.getOperand(0).getValueType()));
- SDValue SRA = DAG.getNode(ISD::SRA, DL, LargeVT,
- N0Op0.getOperand(0), Amt);
+ SDValue Amt = DAG.getConstant(N1C->getZExtValue() + TruncBits, DL,
+ getShiftAmountTy(LargeVT));
+ SDValue SRA =
+ DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt);
return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
}
}
}
// Simplify, based on bits shifted out of the LHS.
+ // TODO - support non-uniform vector shift amounts.
if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
@@ -6872,6 +7661,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
ConstantSDNode *N1C = isConstOrConstSplat(N1);
// fold (srl c1, c2) -> c1 >>u c2
+ // TODO - support non-uniform vector shift amounts.
ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
if (N0C && N1C && !N1C->isOpaque())
return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C);
@@ -6912,6 +7702,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
}
// fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
+ // TODO - support non-uniform vector shift amounts.
if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
N0.getOperand(0).getOpcode() == ISD::SRL) {
if (auto N001C = isConstOrConstSplat(N0.getOperand(0).getOperand(1))) {
@@ -6935,6 +7726,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
}
// fold (srl (shl x, c), c) -> (and x, cst2)
+ // TODO - (srl (shl x, c1), c2).
if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
SDLoc DL(N);
@@ -6945,11 +7737,12 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
}
// fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
+ // TODO - support non-uniform vector shift amounts.
if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
// Shifting in all undef bits?
EVT SmallVT = N0.getOperand(0).getValueType();
unsigned BitSize = SmallVT.getScalarSizeInBits();
- if (N1C->getZExtValue() >= BitSize)
+ if (N1C->getAPIntValue().uge(BitSize))
return DAG.getUNDEF(VT);
if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
@@ -6970,7 +7763,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
// fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign
// bit, which is unmodified by sra.
- if (N1C && N1C->getZExtValue() + 1 == OpSizeInBits) {
+ if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) {
if (N0.getOpcode() == ISD::SRA)
return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
}
@@ -7021,6 +7814,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
// fold operands of srl based on knowledge that the low bits are not
// demanded.
+ // TODO - support non-uniform vector shift amounts.
if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
@@ -7079,13 +7873,49 @@ SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
return IsFSHL ? N0 : N1;
- // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
+ auto IsUndefOrZero = [](SDValue V) {
+ return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true);
+ };
+
+ // TODO - support non-uniform vector shift amounts.
if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
+ EVT ShAmtTy = N2.getValueType();
+
+ // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
if (Cst->getAPIntValue().uge(BitWidth)) {
uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1,
- DAG.getConstant(RotAmt, SDLoc(N), N2.getValueType()));
+ DAG.getConstant(RotAmt, SDLoc(N), ShAmtTy));
}
+
+ unsigned ShAmt = Cst->getZExtValue();
+ if (ShAmt == 0)
+ return IsFSHL ? N0 : N1;
+
+ // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
+ // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
+ // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
+ // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
+ if (IsUndefOrZero(N0))
+ return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1,
+ DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt,
+ SDLoc(N), ShAmtTy));
+ if (IsUndefOrZero(N1))
+ return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
+ DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt,
+ SDLoc(N), ShAmtTy));
+ }
+
+ // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
+ // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
+ // iff We know the shift amount is in range.
+ // TODO: when is it worth doing SUB(BW, N2) as well?
+ if (isPowerOf2_32(BitWidth)) {
+ APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
+ if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
+ return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, N2);
+ if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
+ return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N2);
}
// fold (fshl N0, N0, N2) -> (rotl N0, N2)
@@ -7096,6 +7926,10 @@ SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
if (N0 == N1 && hasOperation(RotOpc, VT))
return DAG.getNode(RotOpc, SDLoc(N), VT, N0, N2);
+ // Simplify, based on bits shifted out of N0/N1.
+ if (SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
return SDValue();
}
@@ -7207,11 +8041,14 @@ SDValue DAGCombiner::visitCTPOP(SDNode *N) {
// FIXME: This should be checking for no signed zeros on individual operands, as
// well as no nans.
-static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS, SDValue RHS) {
+static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS,
+ SDValue RHS,
+ const TargetLowering &TLI) {
const TargetOptions &Options = DAG.getTarget().Options;
EVT VT = LHS.getValueType();
return Options.NoSignedZerosFPMath && VT.isFloatingPoint() &&
+ TLI.isProfitableToCombineMinNumMaxNum(VT) &&
DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS);
}
@@ -7364,6 +8201,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
EVT VT = N->getValueType(0);
EVT VT0 = N0.getValueType();
SDLoc DL(N);
+ SDNodeFlags Flags = N->getFlags();
if (SDValue V = DAG.simplifySelect(N0, N1, N2))
return V;
@@ -7414,20 +8252,26 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
SDValue Cond0 = N0->getOperand(0);
SDValue Cond1 = N0->getOperand(1);
SDValue InnerSelect =
- DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2);
+ DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags);
if (normalizeToSequence || !InnerSelect.use_empty())
return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
- InnerSelect, N2);
+ InnerSelect, N2, Flags);
+ // Cleanup on failure.
+ if (InnerSelect.use_empty())
+ recursivelyDeleteUnusedNodes(InnerSelect.getNode());
}
// select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
SDValue Cond0 = N0->getOperand(0);
SDValue Cond1 = N0->getOperand(1);
- SDValue InnerSelect =
- DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2);
+ SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(),
+ Cond1, N1, N2, Flags);
if (normalizeToSequence || !InnerSelect.use_empty())
return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
- InnerSelect);
+ InnerSelect, Flags);
+ // Cleanup on failure.
+ if (InnerSelect.use_empty())
+ recursivelyDeleteUnusedNodes(InnerSelect.getNode());
}
// select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
@@ -7439,12 +8283,14 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
// Create the actual and node if we can generate good code for it.
if (!normalizeToSequence) {
SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
- return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1, N2);
+ return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1,
+ N2, Flags);
}
// Otherwise see if we can optimize the "and" to a better pattern.
- if (SDValue Combined = visitANDLike(N0, N1_0, N))
+ if (SDValue Combined = visitANDLike(N0, N1_0, N)) {
return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
- N2);
+ N2, Flags);
+ }
}
}
// select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
@@ -7456,20 +8302,22 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
// Create the actual or node if we can generate good code for it.
if (!normalizeToSequence) {
SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
- return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1, N2_2);
+ return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1,
+ N2_2, Flags);
}
// Otherwise see if we can optimize to a better pattern.
if (SDValue Combined = visitORLike(N0, N2_0, N))
return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
- N2_2);
+ N2_2, Flags);
}
}
}
- if (VT0 == MVT::i1) {
- // select (not Cond), N1, N2 -> select Cond, N2, N1
- if (isBitwiseNot(N0))
- return DAG.getNode(ISD::SELECT, DL, VT, N0->getOperand(0), N2, N1);
+ // select (not Cond), N1, N2 -> select Cond, N2, N1
+ if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) {
+ SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1);
+ SelectOp->setFlags(Flags);
+ return SelectOp;
}
// Fold selects based on a setcc into other things, such as min/max/abs.
@@ -7481,7 +8329,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
// select (fcmp gt x, y), x, y -> fmaxnum x, y
//
// This is OK if we don't care what happens if either operand is a NaN.
- if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2))
+ if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, TLI))
if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2,
CC, TLI, DAG))
return FMinMax;
@@ -7516,9 +8364,16 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
}
if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
- (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)))
- return DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1, N2,
- N0.getOperand(2));
+ (!LegalOperations &&
+ TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))) {
+ // Any flags available in a select/setcc fold will be on the setcc as they
+ // migrated from fcmp
+ Flags = N0.getNode()->getFlags();
+ SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1,
+ N2, N0.getOperand(2));
+ SelectNode->setFlags(Flags);
+ return SelectNode;
+ }
return SimplifySelect(DL, N0, N1, N2);
}
@@ -7599,14 +8454,19 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
}
SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
- if (Level >= AfterLegalizeTypes)
- return SDValue();
-
MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
SDValue Mask = MSC->getMask();
- SDValue Data = MSC->getValue();
+ SDValue Data = MSC->getValue();
+ SDValue Chain = MSC->getChain();
SDLoc DL(N);
+ // Zap scatters with a zero mask.
+ if (ISD::isBuildVectorAllZeros(Mask.getNode()))
+ return Chain;
+
+ if (Level >= AfterLegalizeTypes)
+ return SDValue();
+
// If the MSCATTER data type requires splitting and the mask is provided by a
// SETCC, then split both nodes and its operands before legalization. This
// prevents the type legalizer from unrolling SETCC into scalar comparisons
@@ -7624,8 +8484,6 @@ SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
EVT LoVT, HiVT;
std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0));
- SDValue Chain = MSC->getChain();
-
EVT MemoryVT = MSC->getMemoryVT();
unsigned Alignment = MSC->getOriginalAlignment();
@@ -7658,15 +8516,20 @@ SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
}
SDValue DAGCombiner::visitMSTORE(SDNode *N) {
- if (Level >= AfterLegalizeTypes)
- return SDValue();
-
- MaskedStoreSDNode *MST = dyn_cast<MaskedStoreSDNode>(N);
+ MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
SDValue Mask = MST->getMask();
- SDValue Data = MST->getValue();
+ SDValue Data = MST->getValue();
+ SDValue Chain = MST->getChain();
EVT VT = Data.getValueType();
SDLoc DL(N);
+ // Zap masked stores with a zero mask.
+ if (ISD::isBuildVectorAllZeros(Mask.getNode()))
+ return Chain;
+
+ if (Level >= AfterLegalizeTypes)
+ return SDValue();
+
// If the MSTORE data type requires splitting and the mask is provided by a
// SETCC, then split both nodes and its operands before legalization. This
// prevents the type legalizer from unrolling SETCC into scalar comparisons
@@ -7680,17 +8543,11 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
SDValue MaskLo, MaskHi, Lo, Hi;
std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
- SDValue Chain = MST->getChain();
SDValue Ptr = MST->getBasePtr();
EVT MemoryVT = MST->getMemoryVT();
unsigned Alignment = MST->getOriginalAlignment();
- // if Alignment is equal to the vector size,
- // take the half of it for the second part
- unsigned SecondHalfAlignment =
- (Alignment == VT.getSizeInBits() / 8) ? Alignment / 2 : Alignment;
-
EVT LoMemVT, HiMemVT;
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
@@ -7712,7 +8569,7 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
MMO = DAG.getMachineFunction().getMachineMemOperand(
MST->getPointerInfo().getWithOffset(HiOffset),
- MachineMemOperand::MOStore, HiMemVT.getStoreSize(), SecondHalfAlignment,
+ MachineMemOperand::MOStore, HiMemVT.getStoreSize(), Alignment,
MST->getAAInfo(), MST->getRanges());
Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
@@ -7728,13 +8585,17 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
}
SDValue DAGCombiner::visitMGATHER(SDNode *N) {
- if (Level >= AfterLegalizeTypes)
- return SDValue();
-
MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
SDValue Mask = MGT->getMask();
SDLoc DL(N);
+ // Zap gathers with a zero mask.
+ if (ISD::isBuildVectorAllZeros(Mask.getNode()))
+ return CombineTo(N, MGT->getPassThru(), MGT->getChain());
+
+ if (Level >= AfterLegalizeTypes)
+ return SDValue();
+
// If the MGATHER result requires splitting and the mask is provided by a
// SETCC, then split both nodes and its operands before legalization. This
// prevents the type legalizer from unrolling SETCC into scalar comparisons
@@ -7805,13 +8666,17 @@ SDValue DAGCombiner::visitMGATHER(SDNode *N) {
}
SDValue DAGCombiner::visitMLOAD(SDNode *N) {
- if (Level >= AfterLegalizeTypes)
- return SDValue();
-
- MaskedLoadSDNode *MLD = dyn_cast<MaskedLoadSDNode>(N);
+ MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
SDValue Mask = MLD->getMask();
SDLoc DL(N);
+ // Zap masked loads with a zero mask.
+ if (ISD::isBuildVectorAllZeros(Mask.getNode()))
+ return CombineTo(N, MLD->getPassThru(), MLD->getChain());
+
+ if (Level >= AfterLegalizeTypes)
+ return SDValue();
+
// If the MLOAD result requires splitting and the mask is provided by a
// SETCC, then split both nodes and its operands before legalization. This
// prevents the type legalizer from unrolling SETCC into scalar comparisons
@@ -7839,12 +8704,6 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
EVT MemoryVT = MLD->getMemoryVT();
unsigned Alignment = MLD->getOriginalAlignment();
- // if Alignment is equal to the vector size,
- // take the half of it for the second part
- unsigned SecondHalfAlignment =
- (Alignment == MLD->getValueType(0).getSizeInBits()/8) ?
- Alignment/2 : Alignment;
-
EVT LoMemVT, HiMemVT;
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
@@ -7862,7 +8721,7 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
MMO = DAG.getMachineFunction().getMachineMemOperand(
MLD->getPointerInfo().getWithOffset(HiOffset),
- MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), SecondHalfAlignment,
+ MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), Alignment,
MLD->getAAInfo(), MLD->getRanges());
Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, PassThruHi, HiMemVT,
@@ -7943,11 +8802,16 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue N2 = N->getOperand(2);
+ EVT VT = N->getValueType(0);
SDLoc DL(N);
if (SDValue V = DAG.simplifySelect(N0, N1, N2))
return V;
+ // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
+ if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
+ return DAG.getSelect(DL, VT, F, N2, N1);
+
// Canonicalize integer abs.
// vselect (setg[te] X, 0), X, -X ->
// vselect (setgt X, -1), X, -X ->
@@ -7987,11 +8851,10 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
// This is OK if we don't care about what happens if either operand is a
// NaN.
//
- EVT VT = N->getValueType(0);
- if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N0.getOperand(0), N0.getOperand(1))) {
- ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
+ if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N0.getOperand(0),
+ N0.getOperand(1), TLI)) {
if (SDValue FMinMax = combineMinNumMaxNum(
- DL, VT, N0.getOperand(0), N0.getOperand(1), N1, N2, CC, TLI, DAG))
+ DL, VT, N0.getOperand(0), N0.getOperand(1), N1, N2, CC, TLI, DAG))
return FMinMax;
}
@@ -8080,9 +8943,11 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
return N2;
} else if (SCC.getOpcode() == ISD::SETCC) {
// Fold to a simpler select_cc
- return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(),
- SCC.getOperand(0), SCC.getOperand(1), N2, N3,
- SCC.getOperand(2));
+ SDValue SelectOp = DAG.getNode(
+ ISD::SELECT_CC, SDLoc(N), N2.getValueType(), SCC.getOperand(0),
+ SCC.getOperand(1), N2, N3, SCC.getOperand(2));
+ SelectOp->setFlags(SCC->getFlags());
+ return SelectOp;
}
}
@@ -8148,6 +9013,7 @@ static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
unsigned Opcode = N->getOpcode();
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
+ SDLoc DL(N);
assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
@@ -8158,7 +9024,33 @@ static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
// fold (zext c1) -> c1
// fold (aext c1) -> c1
if (isa<ConstantSDNode>(N0))
- return DAG.getNode(Opcode, SDLoc(N), VT, N0);
+ return DAG.getNode(Opcode, DL, VT, N0);
+
+ // fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
+ // fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
+ // fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
+ if (N0->getOpcode() == ISD::SELECT) {
+ SDValue Op1 = N0->getOperand(1);
+ SDValue Op2 = N0->getOperand(2);
+ if (isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) &&
+ (Opcode != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0.getValueType(), VT))) {
+ // For any_extend, choose sign extension of the constants to allow a
+ // possible further transform to sign_extend_inreg.i.e.
+ //
+ // t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
+ // t2: i64 = any_extend t1
+ // -->
+ // t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
+ // -->
+ // t4: i64 = sign_extend_inreg t3
+ unsigned FoldOpc = Opcode;
+ if (FoldOpc == ISD::ANY_EXTEND)
+ FoldOpc = ISD::SIGN_EXTEND;
+ return DAG.getSelect(DL, VT, N0->getOperand(0),
+ DAG.getNode(FoldOpc, DL, VT, Op1),
+ DAG.getNode(FoldOpc, DL, VT, Op2));
+ }
+ }
// fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
// fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
@@ -8173,7 +9065,6 @@ static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
SmallVector<SDValue, 8> Elts;
unsigned NumElts = VT.getVectorNumElements();
- SDLoc DL(N);
// For zero-extensions, UNDEF elements still guarantee to have the upper
// bits set to zero.
@@ -8387,6 +9278,9 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
assert(N->getOpcode() == ISD::ZERO_EXTEND);
EVT VT = N->getValueType(0);
+ EVT OrigVT = N->getOperand(0).getValueType();
+ if (TLI.isZExtFree(OrigVT, VT))
+ return SDValue();
// and/or/xor
SDValue N0 = N->getOperand(0);
@@ -8450,6 +9344,10 @@ SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
Load->getValueType(0), ExtLoad);
CombineTo(Load, Trunc, ExtLoad.getValue(1));
}
+
+ // N0 is dead at this point.
+ recursivelyDeleteUnusedNodes(N0.getNode());
+
return SDValue(N,0); // Return N so it doesn't get rechecked!
}
@@ -8509,19 +9407,21 @@ static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,
: ISD::isZEXTLoad(N0Node);
if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
!ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
- return {};
+ return SDValue();
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
EVT MemVT = LN0->getMemoryVT();
if ((LegalOperations || LN0->isVolatile() || VT.isVector()) &&
!TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
- return {};
+ return SDValue();
SDValue ExtLoad =
DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
LN0->getBasePtr(), MemVT, LN0->getMemOperand());
Combiner.CombineTo(N, ExtLoad);
DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
+ if (LN0->use_empty())
+ Combiner.recursivelyDeleteUnusedNodes(LN0);
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
@@ -8559,6 +9459,7 @@ static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
Combiner.CombineTo(N, ExtLoad);
if (NoReplaceTrunc) {
DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
+ Combiner.recursivelyDeleteUnusedNodes(LN0);
} else {
SDValue Trunc =
DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
@@ -8804,6 +9705,25 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
return NewVSel;
+ // Eliminate this sign extend by doing a negation in the destination type:
+ // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
+ if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
+ isNullOrNullSplat(N0.getOperand(0)) &&
+ N0.getOperand(1).getOpcode() == ISD::ZERO_EXTEND &&
+ TLI.isOperationLegalOrCustom(ISD::SUB, VT)) {
+ SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
+ return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Zext);
+ }
+ // Eliminate this sign extend by doing a decrement in the destination type:
+ // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
+ if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
+ isAllOnesOrAllOnesSplat(N0.getOperand(1)) &&
+ N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
+ TLI.isOperationLegalOrCustom(ISD::ADD, VT)) {
+ SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
+ return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
+ }
+
return SDValue();
}
@@ -9061,14 +9981,13 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
N0.hasOneUse()) {
SDValue ShAmt = N0.getOperand(1);
- unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue();
if (N0.getOpcode() == ISD::SHL) {
SDValue InnerZExt = N0.getOperand(0);
// If the original shl may be shifting out bits, do not perform this
// transformation.
unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
InnerZExt.getOperand(0).getValueSizeInBits();
- if (ShAmtVal > KnownZeroBits)
+ if (cast<ConstantSDNode>(ShAmt)->getAPIntValue().ugt(KnownZeroBits))
return SDValue();
}
@@ -9162,6 +10081,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
CombineTo(N, ExtLoad);
if (NoReplaceTrunc) {
DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
+ recursivelyDeleteUnusedNodes(LN0);
} else {
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
N0.getValueType(), ExtLoad);
@@ -9185,6 +10105,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
MemVT, LN0->getMemOperand());
CombineTo(N, ExtLoad);
DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
+ recursivelyDeleteUnusedNodes(LN0);
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
@@ -9574,14 +10495,14 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
// fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
// We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
if (N0.getOpcode() == ISD::SRL) {
- if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
- if (ShAmt->getZExtValue()+EVTBits <= VTBits) {
+ if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
+ if (ShAmt->getAPIntValue().ule(VTBits - EVTBits)) {
// We can turn this into an SRA iff the input to the SRL is already sign
// extended enough.
unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
- if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits)
- return DAG.getNode(ISD::SRA, SDLoc(N), VT,
- N0.getOperand(0), N0.getOperand(1));
+ if (((VTBits - EVTBits) - ShAmt->getZExtValue()) < InSignBits)
+ return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0),
+ N0.getOperand(1));
}
}
@@ -9667,10 +10588,11 @@ SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
+ EVT SrcVT = N0.getValueType();
bool isLE = DAG.getDataLayout().isLittleEndian();
// noop truncate
- if (N0.getValueType() == N->getValueType(0))
+ if (SrcVT == VT)
return N0;
// fold (truncate (truncate x)) -> (truncate x)
@@ -9740,7 +10662,6 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
// trunc (select c, a, b) -> select c, (trunc a), (trunc b)
if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
- EVT SrcVT = N0.getValueType();
if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
TLI.isTruncateFree(SrcVT, VT)) {
SDLoc SL(N0);
@@ -9753,7 +10674,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
// trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
- (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
SDValue Amt = N0.getOperand(1);
KnownBits Known = DAG.computeKnownBits(Amt);
@@ -9771,6 +10692,19 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
}
}
+ // Attempt to pre-truncate BUILD_VECTOR sources.
+ if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
+ TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType())) {
+ SDLoc DL(N);
+ EVT SVT = VT.getScalarType();
+ SmallVector<SDValue, 8> TruncOps;
+ for (const SDValue &Op : N0->op_values()) {
+ SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op);
+ TruncOps.push_back(TruncOp);
+ }
+ return DAG.getBuildVector(VT, DL, TruncOps);
+ }
+
// Fold a series of buildvector, bitcast, and truncate if possible.
// For example fold
// (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
@@ -9906,7 +10840,9 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
// When the adde's carry is not used.
if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) &&
N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) &&
- (!LegalOperations || TLI.isOperationLegal(N0.getOpcode(), VT))) {
+ // We only do for addcarry before legalize operation
+ ((!LegalOperations && N0.getOpcode() == ISD::ADDCARRY) ||
+ TLI.isOperationLegal(N0.getOpcode(), VT))) {
SDLoc SL(N);
auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
@@ -10070,14 +11006,17 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
return DAG.getUNDEF(VT);
// If the input is a BUILD_VECTOR with all constant elements, fold this now.
- // Only do this before legalize types, since we might create an illegal
- // scalar type. Even if we knew we wouldn't create an illegal scalar type
- // we can only do this before legalize ops, since the target maybe
- // depending on the bitcast.
+ // Only do this before legalize types, unless both types are integer and the
+ // scalar type is legal. Only do this before legalize ops, since the target
+ // maybe depending on the bitcast.
// First check to see if this is all constant.
- if (!LegalTypes &&
+ // TODO: Support FP bitcasts after legalize types.
+ if (VT.isVector() &&
+ (!LegalTypes ||
+ (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
+ TLI.isTypeLegal(VT.getVectorElementType()))) &&
N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
- VT.isVector() && cast<BuildVectorSDNode>(N0)->isConstant())
+ cast<BuildVectorSDNode>(N0)->isConstant())
return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
VT.getVectorElementType());
@@ -10113,18 +11052,14 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
// as we assume software couldn't rely on the number of accesses of an
// illegal type.
((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
- TLI.isOperationLegal(ISD::LOAD, VT)) &&
- TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) {
+ TLI.isOperationLegal(ISD::LOAD, VT))) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- unsigned OrigAlign = LN0->getAlignment();
- bool Fast = false;
- if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
- LN0->getAddressSpace(), OrigAlign, &Fast) &&
- Fast) {
+ if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
+ *LN0->getMemOperand())) {
SDValue Load =
DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
- LN0->getPointerInfo(), OrigAlign,
+ LN0->getPointerInfo(), LN0->getAlignment(),
LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
return Load;
@@ -11071,15 +12006,17 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
// fold (fadd A, (fneg B)) -> (fsub A, B)
if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
- isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2)
+ isNegatibleForFree(N1, LegalOperations, TLI, &Options, ForCodeSize) == 2)
return DAG.getNode(ISD::FSUB, DL, VT, N0,
- GetNegatedExpression(N1, DAG, LegalOperations), Flags);
+ GetNegatedExpression(N1, DAG, LegalOperations,
+ ForCodeSize), Flags);
// fold (fadd (fneg A), B) -> (fsub B, A)
if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
- isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2)
+ isNegatibleForFree(N0, LegalOperations, TLI, &Options, ForCodeSize) == 2)
return DAG.getNode(ISD::FSUB, DL, VT, N1,
- GetNegatedExpression(N0, DAG, LegalOperations), Flags);
+ GetNegatedExpression(N0, DAG, LegalOperations,
+ ForCodeSize), Flags);
auto isFMulNegTwo = [](SDValue FMul) {
if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
@@ -11105,8 +12042,8 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
// Selection pass has a hard time dealing with FP constants.
bool AllowNewConst = (Level < AfterLegalizeDAG);
- // If 'unsafe math' or nnan is enabled, fold lots of things.
- if ((Options.UnsafeFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
+ // If nnan is enabled, fold lots of things.
+ if ((Options.NoNaNsFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
// If allowed, fold (fadd (fneg x), x) -> 0.0
if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
return DAG.getConstantFP(0.0, DL, VT);
@@ -11246,16 +12183,20 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
if (N0 == N1) {
// (fsub x, x) -> 0.0
- if (Options.UnsafeFPMath || Flags.hasNoNaNs())
+ if (Options.NoNaNsFPMath || Flags.hasNoNaNs())
return DAG.getConstantFP(0.0f, DL, VT);
}
// (fsub -0.0, N1) -> -N1
+ // NOTE: It is safe to transform an FSUB(-0.0,X) into an FNEG(X), since the
+ // FSUB does not specify the sign bit of a NaN. Also note that for
+ // the same reason, the inverse transform is not safe, unless fast math
+ // flags are in play.
if (N0CFP && N0CFP->isZero()) {
if (N0CFP->isNegative() ||
(Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
- if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
- return GetNegatedExpression(N1, DAG, LegalOperations);
+ if (isNegatibleForFree(N1, LegalOperations, TLI, &Options, ForCodeSize))
+ return GetNegatedExpression(N1, DAG, LegalOperations, ForCodeSize);
if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags);
}
@@ -11273,9 +12214,10 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
}
// fold (fsub A, (fneg B)) -> (fadd A, B)
- if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
+ if (isNegatibleForFree(N1, LegalOperations, TLI, &Options, ForCodeSize))
return DAG.getNode(ISD::FADD, DL, VT, N0,
- GetNegatedExpression(N1, DAG, LegalOperations), Flags);
+ GetNegatedExpression(N1, DAG, LegalOperations,
+ ForCodeSize), Flags);
// FSUB -> FMA combines:
if (SDValue Fused = visitFSUBForFMACombine(N)) {
@@ -11319,7 +12261,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
- if (Options.UnsafeFPMath ||
+ if ((Options.NoNaNsFPMath && Options.NoSignedZerosFPMath) ||
(Flags.hasNoNaNs() && Flags.hasNoSignedZeros())) {
// fold (fmul A, 0) -> 0
if (N1CFP && N1CFP->isZero())
@@ -11361,14 +12303,18 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
return DAG.getNode(ISD::FNEG, DL, VT, N0);
// fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
- if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
- if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
+ if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options,
+ ForCodeSize)) {
+ if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options,
+ ForCodeSize)) {
// Both can be negated for free, check to see if at least one is cheaper
// negated.
if (LHSNeg == 2 || RHSNeg == 2)
return DAG.getNode(ISD::FMUL, DL, VT,
- GetNegatedExpression(N0, DAG, LegalOperations),
- GetNegatedExpression(N1, DAG, LegalOperations),
+ GetNegatedExpression(N0, DAG, LegalOperations,
+ ForCodeSize),
+ GetNegatedExpression(N1, DAG, LegalOperations,
+ ForCodeSize),
Flags);
}
}
@@ -11506,7 +12452,8 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
// fma (fneg x), K, y -> fma x -K, y
if (N0.getOpcode() == ISD::FNEG &&
(TLI.isOperationLegal(ISD::ConstantFP, VT) ||
- (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT)))) {
+ (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT,
+ ForCodeSize)))) {
return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
DAG.getNode(ISD::FNEG, DL, VT, N1, Flags), N2);
}
@@ -11541,22 +12488,33 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
// FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
// is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
+ // TODO: Limit this transform based on optsize/minsize - it always creates at
+ // least 1 extra instruction. But the perf win may be substantial enough
+ // that only minsize should restrict this.
bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
const SDNodeFlags Flags = N->getFlags();
if (!UnsafeMath && !Flags.hasAllowReciprocal())
return SDValue();
- // Skip if current node is a reciprocal.
+ // Skip if current node is a reciprocal/fneg-reciprocal.
SDValue N0 = N->getOperand(0);
- ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
- if (N0CFP && N0CFP->isExactlyValue(1.0))
+ ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
+ if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
return SDValue();
// Exit early if the target does not want this transform or if there can't
// possibly be enough uses of the divisor to make the transform worthwhile.
SDValue N1 = N->getOperand(1);
unsigned MinUses = TLI.combineRepeatedFPDivisors();
- if (!MinUses || N1->use_size() < MinUses)
+
+ // For splat vectors, scale the number of uses by the splat factor. If we can
+ // convert the division into a scalar op, that will likely be much faster.
+ unsigned NumElts = 1;
+ EVT VT = N->getValueType(0);
+ if (VT.isVector() && DAG.isSplatValue(N1))
+ NumElts = VT.getVectorNumElements();
+
+ if (!MinUses || (N1->use_size() * NumElts) < MinUses)
return SDValue();
// Find all FDIV users of the same divisor.
@@ -11573,10 +12531,9 @@ SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
// Now that we have the actual number of divisor uses, make sure it meets
// the minimum threshold specified by the target.
- if (Users.size() < MinUses)
+ if ((Users.size() * NumElts) < MinUses)
return SDValue();
- EVT VT = N->getValueType(0);
SDLoc DL(N);
SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
@@ -11619,6 +12576,9 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
+ if (SDValue V = combineRepeatedFPDivisors(N))
+ return V;
+
if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
// fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
if (N1CFP) {
@@ -11634,7 +12594,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
// backend)... we should handle this gracefully after Legalize.
// TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
TLI.isOperationLegal(ISD::ConstantFP, VT) ||
- TLI.isFPImmLegal(Recip, VT)))
+ TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
return DAG.getNode(ISD::FMUL, DL, VT, N0,
DAG.getConstantFP(Recip, DL, VT), Flags);
}
@@ -11692,21 +12652,22 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
}
// (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
- if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
- if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
+ if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options,
+ ForCodeSize)) {
+ if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options,
+ ForCodeSize)) {
// Both can be negated for free, check to see if at least one is cheaper
// negated.
if (LHSNeg == 2 || RHSNeg == 2)
return DAG.getNode(ISD::FDIV, SDLoc(N), VT,
- GetNegatedExpression(N0, DAG, LegalOperations),
- GetNegatedExpression(N1, DAG, LegalOperations),
+ GetNegatedExpression(N0, DAG, LegalOperations,
+ ForCodeSize),
+ GetNegatedExpression(N1, DAG, LegalOperations,
+ ForCodeSize),
Flags);
}
}
- if (SDValue CombineRepeatedDivisors = combineRepeatedFPDivisors(N))
- return CombineRepeatedDivisors;
-
return SDValue();
}
@@ -11838,18 +12799,24 @@ SDValue DAGCombiner::visitFPOW(SDNode *N) {
return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0), Flags);
}
- // Try to convert x ** (1/4) into square roots.
+ // Try to convert x ** (1/4) and x ** (3/4) into square roots.
// x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
// TODO: This could be extended (using a target hook) to handle smaller
// power-of-2 fractional exponents.
- if (ExponentC->getValueAPF().isExactlyValue(0.25)) {
+ bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
+ bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
+ if (ExponentIs025 || ExponentIs075) {
// pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
// pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) = NaN.
+ // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
+ // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) = NaN.
// For regular numbers, rounding may cause the results to differ.
// Therefore, we require { nsz ninf afn } for this transform.
// TODO: We could select out the special cases if we don't have nsz/ninf.
SDNodeFlags Flags = N->getFlags();
- if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() ||
+
+ // We only need no signed zeros for the 0.25 case.
+ if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() ||
!Flags.hasApproximateFuncs())
return SDValue();
@@ -11859,13 +12826,17 @@ SDValue DAGCombiner::visitFPOW(SDNode *N) {
// Assume that libcalls are the smallest code.
// TODO: This restriction should probably be lifted for vectors.
- if (DAG.getMachineFunction().getFunction().optForSize())
+ if (DAG.getMachineFunction().getFunction().hasOptSize())
return SDValue();
// pow(X, 0.25) --> sqrt(sqrt(X))
SDLoc DL(N);
SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0), Flags);
- return DAG.getNode(ISD::FSQRT, DL, VT, Sqrt, Flags);
+ SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt, Flags);
+ if (ExponentIs025)
+ return SqrtSqrt;
+ // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
+ return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt, Flags);
}
return SDValue();
@@ -11911,6 +12882,10 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
EVT VT = N->getValueType(0);
EVT OpVT = N0.getValueType();
+ // [us]itofp(undef) = 0, because the result value is bounded.
+ if (N0.isUndef())
+ return DAG.getConstantFP(0.0, SDLoc(N), VT);
+
// fold (sint_to_fp c1) -> c1fp
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
// ...but only if the target supports immediate floating-point values
@@ -11968,6 +12943,10 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
EVT VT = N->getValueType(0);
EVT OpVT = N0.getValueType();
+ // [us]itofp(undef) = 0, because the result value is bounded.
+ if (N0.isUndef())
+ return DAG.getConstantFP(0.0, SDLoc(N), VT);
+
// fold (uint_to_fp c1) -> c1fp
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
// ...but only if the target supports immediate floating-point values
@@ -12051,6 +13030,10 @@ SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
+ // fold (fp_to_sint undef) -> undef
+ if (N0.isUndef())
+ return DAG.getUNDEF(VT);
+
// fold (fp_to_sint c1fp) -> c1
if (isConstantFPBuildVectorOrConstantFP(N0))
return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
@@ -12062,6 +13045,10 @@ SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
+ // fold (fp_to_uint undef) -> undef
+ if (N0.isUndef())
+ return DAG.getUNDEF(VT);
+
// fold (fp_to_uint c1fp) -> c1
if (isConstantFPBuildVectorOrConstantFP(N0))
return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
@@ -12250,8 +13237,8 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
- &DAG.getTarget().Options))
- return GetNegatedExpression(N0, DAG, LegalOperations);
+ &DAG.getTarget().Options, ForCodeSize))
+ return GetNegatedExpression(N0, DAG, LegalOperations, ForCodeSize);
// Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
// constant pool values.
@@ -12287,7 +13274,7 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
APFloat CVal = CFP1->getValueAPF();
CVal.changeSign();
if (Level >= AfterLegalizeDAG &&
- (TLI.isFPImmLegal(CVal, VT) ||
+ (TLI.isFPImmLegal(CVal, VT, ForCodeSize) ||
TLI.isOperationLegal(ISD::ConstantFP, VT)))
return DAG.getNode(
ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
@@ -12556,6 +13543,7 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
TargetLowering::AddrMode AM;
if (N->getOpcode() == ISD::ADD) {
+ AM.HasBaseReg = true;
ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
if (Offset)
// [reg +/- imm]
@@ -12564,6 +13552,7 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
// [reg +/- reg]
AM.Scale = 1;
} else if (N->getOpcode() == ISD::SUB) {
+ AM.HasBaseReg = true;
ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
if (Offset)
// [reg +/- imm]
@@ -12653,7 +13642,13 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
// Check #2.
if (!isLoad) {
SDValue Val = cast<StoreSDNode>(N)->getValue();
- if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode()))
+
+ // Would require a copy.
+ if (Val == BasePtr)
+ return false;
+
+ // Would create a cycle.
+ if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode()))
return false;
}
@@ -13190,7 +14185,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
if (LD->isUnindexed()) {
// Walk up chain skipping non-aliasing memory nodes.
- SDValue BetterChain = FindBetterChain(N, Chain);
+ SDValue BetterChain = FindBetterChain(LD, Chain);
// If there is a better chain.
if (Chain != BetterChain) {
@@ -13378,7 +14373,7 @@ struct LoadedSlice {
/// Get the alignment of the load used for this slice.
unsigned getAlignment() const {
unsigned Alignment = Origin->getAlignment();
- unsigned Offset = getOffsetFromBase();
+ uint64_t Offset = getOffsetFromBase();
if (Offset != 0)
Alignment = MinAlign(Alignment, Alignment + Offset);
return Alignment;
@@ -13500,9 +14495,11 @@ struct LoadedSlice {
assert(DAG && "Missing context");
const TargetLowering &TLI = DAG->getTargetLoweringInfo();
EVT ResVT = Use->getValueType(0);
- const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT());
+ const TargetRegisterClass *ResRC =
+ TLI.getRegClassFor(ResVT.getSimpleVT(), Use->isDivergent());
const TargetRegisterClass *ArgRC =
- TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT());
+ TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT(),
+ Use->getOperand(0)->isDivergent());
if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
return false;
@@ -13826,7 +14823,7 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
// For narrowing to be valid, it must be the case that the load the
- // immediately preceeding memory operation before the store.
+ // immediately preceding memory operation before the store.
if (LD == Chain.getNode())
; // ok.
else if (Chain->getOpcode() == ISD::TokenFactor &&
@@ -14039,11 +15036,9 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
/// load / store operations if the target deems the transformation profitable.
SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
StoreSDNode *ST = cast<StoreSDNode>(N);
- SDValue Chain = ST->getChain();
SDValue Value = ST->getValue();
if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
- Value.hasOneUse() &&
- Chain == SDValue(Value.getNode(), 1)) {
+ Value.hasOneUse()) {
LoadSDNode *LD = cast<LoadSDNode>(Value);
EVT VT = LD->getMemoryVT();
if (!VT.isFloatingPoint() ||
@@ -14073,7 +15068,7 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
LD->getPointerInfo(), LDAlign);
SDValue NewST =
- DAG.getStore(NewLD.getValue(1), SDLoc(N), NewLD, ST->getBasePtr(),
+ DAG.getStore(ST->getChain(), SDLoc(N), NewLD, ST->getBasePtr(),
ST->getPointerInfo(), STAlign);
AddToWorklist(NewLD.getNode());
@@ -14171,14 +15166,14 @@ SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
Visited.insert(StoreNodes[i].MemNode);
}
- // don't include nodes that are children
+ // don't include nodes that are children or repeated nodes.
for (unsigned i = 0; i < NumStores; ++i) {
- if (Visited.count(StoreNodes[i].MemNode->getChain().getNode()) == 0)
+ if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second)
Chains.push_back(StoreNodes[i].MemNode->getChain());
}
assert(Chains.size() > 0 && "Chain should have generated a chain");
- return DAG.getNode(ISD::TokenFactor, StoreDL, MVT::Other, Chains);
+ return DAG.getTokenFactor(StoreDL, Chains);
}
bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
@@ -14372,15 +15367,19 @@ void DAGCombiner::getStoreMergeCandidates(
// Loads must only have one use.
if (!Ld->hasNUsesOfValue(1, 0))
return;
- // The memory operands must not be volatile.
+ // The memory operands must not be volatile/indexed.
if (Ld->isVolatile() || Ld->isIndexed())
return;
}
auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
int64_t &Offset) -> bool {
+ // The memory operands must not be volatile/indexed.
if (Other->isVolatile() || Other->isIndexed())
return false;
- SDValue Val = peekThroughBitcasts(Other->getValue());
+ // Don't mix temporal stores with non-temporal stores.
+ if (St->isNonTemporal() != Other->isNonTemporal())
+ return false;
+ SDValue OtherBC = peekThroughBitcasts(Other->getValue());
// Allow merging constants of different types as integers.
bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
: Other->getMemoryVT() != MemVT;
@@ -14388,16 +15387,19 @@ void DAGCombiner::getStoreMergeCandidates(
if (NoTypeMatch)
return false;
// The Load's Base Ptr must also match
- if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(Val)) {
- auto LPtr = BaseIndexOffset::match(OtherLd, DAG);
+ if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(OtherBC)) {
+ BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG);
if (LoadVT != OtherLd->getMemoryVT())
return false;
// Loads must only have one use.
if (!OtherLd->hasNUsesOfValue(1, 0))
return false;
- // The memory operands must not be volatile.
+ // The memory operands must not be volatile/indexed.
if (OtherLd->isVolatile() || OtherLd->isIndexed())
return false;
+ // Don't mix temporal loads with non-temporal loads.
+ if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
+ return false;
if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
return false;
} else
@@ -14406,17 +15408,17 @@ void DAGCombiner::getStoreMergeCandidates(
if (IsConstantSrc) {
if (NoTypeMatch)
return false;
- if (!(isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val)))
+ if (!(isa<ConstantSDNode>(OtherBC) || isa<ConstantFPSDNode>(OtherBC)))
return false;
}
if (IsExtractVecSrc) {
// Do not merge truncated stores here.
if (Other->isTruncatingStore())
return false;
- if (!MemVT.bitsEq(Val.getValueType()))
+ if (!MemVT.bitsEq(OtherBC.getValueType()))
return false;
- if (Val.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
- Val.getOpcode() != ISD::EXTRACT_SUBVECTOR)
+ if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
+ OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR)
return false;
}
Ptr = BaseIndexOffset::match(Other, DAG);
@@ -14441,9 +15443,11 @@ void DAGCombiner::getStoreMergeCandidates(
RootNode = St->getChain().getNode();
+ unsigned NumNodesExplored = 0;
if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
RootNode = Ldn->getChain().getNode();
- for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I)
+ for (auto I = RootNode->use_begin(), E = RootNode->use_end();
+ I != E && NumNodesExplored < 1024; ++I, ++NumNodesExplored)
if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) // walk down chain
for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
if (I2.getOperandNo() == 0)
@@ -14454,7 +15458,8 @@ void DAGCombiner::getStoreMergeCandidates(
StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
}
} else
- for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I)
+ for (auto I = RootNode->use_begin(), E = RootNode->use_end();
+ I != E && NumNodesExplored < 1024; ++I, ++NumNodesExplored)
if (I.getOperandNo() == 0)
if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
BaseIndexOffset Ptr;
@@ -14551,6 +15556,9 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
isa<ConstantFPSDNode>(StoredVal);
bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR);
+ bool IsNonTemporalStore = St->isNonTemporal();
+ bool IsNonTemporalLoad =
+ IsLoadSrc && cast<LoadSDNode>(StoredVal)->isNonTemporal();
if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc)
return false;
@@ -14652,8 +15660,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
if (TLI.isTypeLegal(StoreTy) &&
TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
- TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
- FirstStoreAlign, &IsFast) &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy,
+ *FirstInChain->getMemOperand(), &IsFast) &&
IsFast) {
LastIntegerTrunc = false;
LastLegalType = i + 1;
@@ -14664,8 +15672,9 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
- TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
- FirstStoreAlign, &IsFast) &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy,
+ *FirstInChain->getMemOperand(),
+ &IsFast) &&
IsFast) {
LastIntegerTrunc = true;
LastLegalType = i + 1;
@@ -14683,8 +15692,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
- TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
- FirstStoreAlign, &IsFast) &&
+ TLI.allowsMemoryAccess(
+ Context, DL, Ty, *FirstInChain->getMemOperand(), &IsFast) &&
IsFast)
LastLegalVectorType = i + 1;
}
@@ -14755,8 +15764,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
if (TLI.isTypeLegal(Ty) &&
TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
- TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
- FirstStoreAlign, &IsFast) &&
+ TLI.allowsMemoryAccess(Context, DL, Ty,
+ *FirstInChain->getMemOperand(), &IsFast) &&
IsFast)
NumStoresToMerge = i + 1;
}
@@ -14847,7 +15856,6 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
unsigned FirstStoreAS = FirstInChain->getAddressSpace();
unsigned FirstStoreAlign = FirstInChain->getAlignment();
LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
- unsigned FirstLoadAS = FirstLoad->getAddressSpace();
unsigned FirstLoadAlign = FirstLoad->getAlignment();
// Scan the memory operations on the chain and find the first
@@ -14887,11 +15895,11 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
bool IsFastSt, IsFastLd;
if (TLI.isTypeLegal(StoreTy) &&
TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
- TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
- FirstStoreAlign, &IsFastSt) &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy,
+ *FirstInChain->getMemOperand(), &IsFastSt) &&
IsFastSt &&
- TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
- FirstLoadAlign, &IsFastLd) &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy,
+ *FirstLoad->getMemOperand(), &IsFastLd) &&
IsFastLd) {
LastLegalVectorType = i + 1;
}
@@ -14901,11 +15909,11 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
StoreTy = EVT::getIntegerVT(Context, SizeInBits);
if (TLI.isTypeLegal(StoreTy) &&
TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
- TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
- FirstStoreAlign, &IsFastSt) &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy,
+ *FirstInChain->getMemOperand(), &IsFastSt) &&
IsFastSt &&
- TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
- FirstLoadAlign, &IsFastLd) &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy,
+ *FirstLoad->getMemOperand(), &IsFastLd) &&
IsFastLd) {
LastLegalIntegerType = i + 1;
DoIntegerTruncate = false;
@@ -14920,11 +15928,12 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy,
StoreTy) &&
TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
- TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
- FirstStoreAlign, &IsFastSt) &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy,
+ *FirstInChain->getMemOperand(),
+ &IsFastSt) &&
IsFastSt &&
- TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
- FirstLoadAlign, &IsFastLd) &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy,
+ *FirstLoad->getMemOperand(), &IsFastLd) &&
IsFastLd) {
LastLegalIntegerType = i + 1;
DoIntegerTruncate = true;
@@ -14994,26 +16003,32 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
AddToWorklist(NewStoreChain.getNode());
- MachineMemOperand::Flags MMOFlags =
+ MachineMemOperand::Flags LdMMOFlags =
isDereferenceable ? MachineMemOperand::MODereferenceable
: MachineMemOperand::MONone;
+ if (IsNonTemporalLoad)
+ LdMMOFlags |= MachineMemOperand::MONonTemporal;
+
+ MachineMemOperand::Flags StMMOFlags =
+ IsNonTemporalStore ? MachineMemOperand::MONonTemporal
+ : MachineMemOperand::MONone;
SDValue NewLoad, NewStore;
if (UseVectorTy || !DoIntegerTruncate) {
NewLoad =
DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(),
FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
- FirstLoadAlign, MMOFlags);
+ FirstLoadAlign, LdMMOFlags);
NewStore = DAG.getStore(
NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
- FirstInChain->getPointerInfo(), FirstStoreAlign);
+ FirstInChain->getPointerInfo(), FirstStoreAlign, StMMOFlags);
} else { // This must be the truncstore/extload case
EVT ExtendedTy =
TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
FirstLoad->getChain(), FirstLoad->getBasePtr(),
FirstLoad->getPointerInfo(), JointMemOpVT,
- FirstLoadAlign, MMOFlags);
+ FirstLoadAlign, LdMMOFlags);
NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad,
FirstInChain->getBasePtr(),
FirstInChain->getPointerInfo(),
@@ -15168,16 +16183,11 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
// illegal type.
if (((!LegalOperations && !ST->isVolatile()) ||
TLI.isOperationLegal(ISD::STORE, SVT)) &&
- TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT)) {
- unsigned OrigAlign = ST->getAlignment();
- bool Fast = false;
- if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), SVT,
- ST->getAddressSpace(), OrigAlign, &Fast) &&
- Fast) {
- return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
- ST->getPointerInfo(), OrigAlign,
- ST->getMemOperand()->getFlags(), ST->getAAInfo());
- }
+ TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
+ DAG, *ST->getMemOperand())) {
+ return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
+ ST->getPointerInfo(), ST->getAlignment(),
+ ST->getMemOperand()->getFlags(), ST->getAAInfo());
}
}
@@ -15205,6 +16215,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
if (SDValue NewST = TransformFPLoadStorePair(N))
return NewST;
+ // Try transforming several stores into STORE (BSWAP).
+ if (SDValue Store = MatchStoreCombine(ST))
+ return Store;
+
if (ST->isUnindexed()) {
// Walk up chain skipping non-aliasing memory nodes, on this store and any
// adjacent stores.
@@ -15221,23 +16235,22 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
Value.getValueType().isInteger() &&
(!isa<ConstantSDNode>(Value) ||
!cast<ConstantSDNode>(Value)->isOpaque())) {
+ APInt TruncDemandedBits =
+ APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
+ ST->getMemoryVT().getScalarSizeInBits());
+
// See if we can simplify the input to this truncstore with knowledge that
// only the low bits are being used. For example:
// "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
- SDValue Shorter = DAG.GetDemandedBits(
- Value, APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
- ST->getMemoryVT().getScalarSizeInBits()));
+ SDValue Shorter = DAG.GetDemandedBits(Value, TruncDemandedBits);
AddToWorklist(Value.getNode());
- if (Shorter.getNode())
- return DAG.getTruncStore(Chain, SDLoc(N), Shorter,
- Ptr, ST->getMemoryVT(), ST->getMemOperand());
+ if (Shorter)
+ return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
+ ST->getMemOperand());
// Otherwise, see if we can simplify the operation with
// SimplifyDemandedBits, which only works if the value has a single use.
- if (SimplifyDemandedBits(
- Value,
- APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
- ST->getMemoryVT().getScalarSizeInBits()))) {
+ if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
// Re-visit the store if anything changed and the store hasn't been merged
// with another node (N is deleted) SimplifyDemandedBits will add Value's
// node back to the worklist if necessary, but we also need to re-visit
@@ -15263,25 +16276,55 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
if (ST->isUnindexed() && !ST->isVolatile() && ST1->isUnindexed() &&
- !ST1->isVolatile() && ST1->getBasePtr() == Ptr &&
- ST->getMemoryVT() == ST1->getMemoryVT()) {
- // If this is a store followed by a store with the same value to the same
- // location, then the store is dead/noop.
- if (ST1->getValue() == Value) {
- // The store is dead, remove it.
+ !ST1->isVolatile()) {
+ if (ST1->getBasePtr() == Ptr && ST1->getValue() == Value &&
+ ST->getMemoryVT() == ST1->getMemoryVT()) {
+ // If this is a store followed by a store with the same value to the
+ // same location, then the store is dead/noop.
return Chain;
}
- // If this is a store who's preceeding store to the same location
- // and no one other node is chained to that store we can effectively
- // drop the store. Do not remove stores to undef as they may be used as
- // data sinks.
if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
!ST1->getBasePtr().isUndef()) {
- // ST1 is fully overwritten and can be elided. Combine with it's chain
- // value.
- CombineTo(ST1, ST1->getChain());
- return SDValue();
+ const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
+ const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
+ unsigned STBitSize = ST->getMemoryVT().getSizeInBits();
+ unsigned ChainBitSize = ST1->getMemoryVT().getSizeInBits();
+ // If this is a store who's preceding store to a subset of the current
+ // location and no one other node is chained to that store we can
+ // effectively drop the store. Do not remove stores to undef as they may
+ // be used as data sinks.
+ if (STBase.contains(DAG, STBitSize, ChainBase, ChainBitSize)) {
+ CombineTo(ST1, ST1->getChain());
+ return SDValue();
+ }
+
+ // If ST stores to a subset of preceding store's write set, we may be
+ // able to fold ST's value into the preceding stored value. As we know
+ // the other uses of ST1's chain are unconcerned with ST, this folding
+ // will not affect those nodes.
+ int64_t BitOffset;
+ if (ChainBase.contains(DAG, ChainBitSize, STBase, STBitSize,
+ BitOffset)) {
+ SDValue ChainValue = ST1->getValue();
+ if (auto *C1 = dyn_cast<ConstantSDNode>(ChainValue)) {
+ if (auto *C = dyn_cast<ConstantSDNode>(Value)) {
+ APInt Val = C1->getAPIntValue();
+ APInt InsertVal = C->getAPIntValue().zextOrTrunc(STBitSize);
+ // FIXME: Handle Big-endian mode.
+ if (!DAG.getDataLayout().isBigEndian()) {
+ Val.insertBits(InsertVal, BitOffset);
+ SDValue NewSDVal =
+ DAG.getConstant(Val, SDLoc(C), ChainValue.getValueType(),
+ C1->isTargetOpcode(), C1->isOpaque());
+ SDNode *NewST1 = DAG.UpdateNodeOperands(
+ ST1, ST1->getChain(), NewSDVal, ST1->getOperand(2),
+ ST1->getOperand(3));
+ return CombineTo(ST, SDValue(NewST1, 0));
+ }
+ }
+ }
+ } // End ST subset of ST1 case.
}
}
}
@@ -15299,7 +16342,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
// Always perform this optimization before types are legal. If the target
// prefers, also try this after legalization to catch stores that were created
// by intrinsics or other nodes.
- if (!LegalTypes || (TLI.mergeStoresAfterLegalization())) {
+ if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) {
while (true) {
// There can be multiple store sequences on the same chain.
// Keep trying to merge store sequences until we are unable to do so
@@ -15333,6 +16376,54 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
return ReduceLoadOpStoreWidth(N);
}
+SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
+ const auto *LifetimeEnd = cast<LifetimeSDNode>(N);
+ if (!LifetimeEnd->hasOffset())
+ return SDValue();
+
+ const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(),
+ LifetimeEnd->getOffset(), false);
+
+ // We walk up the chains to find stores.
+ SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
+ while (!Chains.empty()) {
+ SDValue Chain = Chains.back();
+ Chains.pop_back();
+ if (!Chain.hasOneUse())
+ continue;
+ switch (Chain.getOpcode()) {
+ case ISD::TokenFactor:
+ for (unsigned Nops = Chain.getNumOperands(); Nops;)
+ Chains.push_back(Chain.getOperand(--Nops));
+ break;
+ case ISD::LIFETIME_START:
+ case ISD::LIFETIME_END:
+ // We can forward past any lifetime start/end that can be proven not to
+ // alias the node.
+ if (!isAlias(Chain.getNode(), N))
+ Chains.push_back(Chain.getOperand(0));
+ break;
+ case ISD::STORE: {
+ StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
+ if (ST->isVolatile() || ST->isIndexed())
+ continue;
+ const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
+ // If we store purely within object bounds just before its lifetime ends,
+ // we can remove the store.
+ if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase,
+ ST->getMemoryVT().getStoreSizeInBits())) {
+ LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();
+ dbgs() << "\nwithin LIFETIME_END of : ";
+ LifetimeEndBase.dump(); dbgs() << "\n");
+ CombineTo(ST, ST->getChain());
+ return SDValue(N, 0);
+ }
+ }
+ }
+ }
+ return SDValue();
+}
+
/// For the instruction sequence of store below, F and I values
/// are bundled together as an i64 value before being stored into memory.
/// Sometimes it is more efficent to generate separate stores for F and I,
@@ -15616,7 +16707,9 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
Offset = DAG.getNode(
ISD::MUL, DL, PtrType, Offset,
DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType));
- MPI = OriginalLoad->getPointerInfo();
+ // Discard the pointer info except the address space because the memory
+ // operand can't represent this new access since the offset is variable.
+ MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
}
NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset);
@@ -15668,14 +16761,15 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
/// the math/logic after an extract element of a vector.
static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG,
bool LegalOperations) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue Vec = ExtElt->getOperand(0);
SDValue Index = ExtElt->getOperand(1);
auto *IndexC = dyn_cast<ConstantSDNode>(Index);
- if (!IndexC || !ISD::isBinaryOp(Vec.getNode()) || !Vec.hasOneUse())
+ if (!IndexC || !TLI.isBinOp(Vec.getOpcode()) || !Vec.hasOneUse() ||
+ Vec.getNode()->getNumValues() != 1)
return SDValue();
// Targets may want to avoid this to prevent an expensive register transfer.
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (!TLI.shouldScalarizeBinop(Vec))
return SDValue();
@@ -16073,7 +17167,7 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
ArrayRef<int> VectorMask,
SDValue VecIn1, SDValue VecIn2,
- unsigned LeftIdx) {
+ unsigned LeftIdx, bool DidSplitVec) {
MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
SDValue ZeroIdx = DAG.getConstant(0, DL, IdxTy);
@@ -16081,17 +17175,12 @@ SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
EVT InVT1 = VecIn1.getValueType();
EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
- unsigned Vec2Offset = 0;
unsigned NumElems = VT.getVectorNumElements();
unsigned ShuffleNumElems = NumElems;
- // In case both the input vectors are extracted from same base
- // vector we do not need extra addend (Vec2Offset) while
- // computing shuffle mask.
- if (!VecIn2 || !(VecIn1.getOpcode() == ISD::EXTRACT_SUBVECTOR) ||
- !(VecIn2.getOpcode() == ISD::EXTRACT_SUBVECTOR) ||
- !(VecIn1.getOperand(0) == VecIn2.getOperand(0)))
- Vec2Offset = InVT1.getVectorNumElements();
+ // If we artificially split a vector in two already, then the offsets in the
+ // operands will all be based off of VecIn1, even those in VecIn2.
+ unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
// We can't generate a shuffle node with mismatched input and output types.
// Try to make the types match the type of the output.
@@ -16214,23 +17303,29 @@ static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
// The build vector contains some number of undef elements and exactly
// one other element. That other element must be a zero-extended scalar
// extracted from a vector at a constant index to turn this into a shuffle.
+ // Also, require that the build vector does not implicitly truncate/extend
+ // its elements.
// TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
+ EVT VT = BV->getValueType(0);
SDValue Zext = BV->getOperand(ZextElt);
if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
Zext.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
- !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)))
+ !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) ||
+ Zext.getValueSizeInBits() != VT.getScalarSizeInBits())
return SDValue();
- // The zero-extend must be a multiple of the source size.
+ // The zero-extend must be a multiple of the source size, and we must be
+ // building a vector of the same size as the source of the extract element.
SDValue Extract = Zext.getOperand(0);
unsigned DestSize = Zext.getValueSizeInBits();
unsigned SrcSize = Extract.getValueSizeInBits();
- if (DestSize % SrcSize != 0)
+ if (DestSize % SrcSize != 0 ||
+ Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
return SDValue();
// Create a shuffle mask that will combine the extracted element with zeros
// and undefs.
- int ZextRatio = DestSize / SrcSize;
+ int ZextRatio = DestSize / SrcSize;
int NumMaskElts = NumBVOps * ZextRatio;
SmallVector<int, 32> ShufMask(NumMaskElts, -1);
for (int i = 0; i != NumMaskElts; ++i) {
@@ -16260,7 +17355,7 @@ static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
SDValue Shuf = DAG.getVectorShuffle(VecVT, DL, Extract.getOperand(0), ZeroVec,
ShufMask);
- return DAG.getBitcast(BV->getValueType(0), Shuf);
+ return DAG.getBitcast(VT, Shuf);
}
// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
@@ -16316,7 +17411,7 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
return SDValue();
SDValue ExtractedFromVec = Op.getOperand(0);
- APInt ExtractIdx = cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue();
+ const APInt &ExtractIdx = Op.getConstantOperandAPInt(1);
if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
return SDValue();
@@ -16344,6 +17439,7 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
// vector, then split the vector efficiently based on the maximum
// vector access index and adjust the VectorMask and
// VecIn accordingly.
+ bool DidSplitVec = false;
if (VecIn.size() == 2) {
unsigned MaxIndex = 0;
unsigned NearestPow2 = 0;
@@ -16374,6 +17470,7 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
VecIn.pop_back();
VecIn.push_back(VecIn1);
VecIn.push_back(VecIn2);
+ DidSplitVec = true;
for (unsigned i = 0; i < NumElems; i++) {
if (VectorMask[i] <= 0)
@@ -16411,7 +17508,7 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
(LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
- VecRight, LeftIdx))
+ VecRight, LeftIdx, DidSplitVec))
Shuffles.push_back(Shuffle);
else
return SDValue();
@@ -16477,18 +17574,20 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
// Try to turn a build vector of zero extends of extract vector elts into a
// a vector zero extend and possibly an extract subvector.
-// TODO: Support sign extend or any extend?
+// TODO: Support sign extend?
// TODO: Allow undef elements?
-// TODO: Don't require the extracts to start at element 0.
SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
if (LegalOperations)
return SDValue();
EVT VT = N->getValueType(0);
+ bool FoundZeroExtend = false;
SDValue Op0 = N->getOperand(0);
auto checkElem = [&](SDValue Op) -> int64_t {
- if (Op.getOpcode() == ISD::ZERO_EXTEND &&
+ unsigned Opc = Op.getOpcode();
+ FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND);
+ if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) &&
Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
@@ -16520,7 +17619,8 @@ SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
SDLoc DL(N);
In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
Op0.getOperand(0).getOperand(1));
- return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, In);
+ return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL,
+ VT, In);
}
SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
@@ -16885,14 +17985,14 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
return SDValue();
}
- unsigned IdentityIndex = i * PartNumElem;
- ConstantSDNode *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ auto *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1));
// The extract index must be constant.
if (!CS)
return SDValue();
// Check that we are reading from the identity index.
- if (CS->getZExtValue() != IdentityIndex)
+ unsigned IdentityIndex = i * PartNumElem;
+ if (CS->getAPIntValue() != IdentityIndex)
return SDValue();
}
@@ -16902,12 +18002,59 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
return SDValue();
}
+static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract,
+ SelectionDAG &DAG) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue BinOp = Extract->getOperand(0);
+ unsigned BinOpcode = BinOp.getOpcode();
+ if (!TLI.isBinOp(BinOpcode) || BinOp.getNode()->getNumValues() != 1)
+ return SDValue();
+
+ SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
+ SDValue Index = Extract->getOperand(1);
+ EVT VT = Extract->getValueType(0);
+
+ // Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
+ // if the source subvector is the same type as the one being extracted.
+ auto GetSubVector = [VT, Index](SDValue V) -> SDValue {
+ if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
+ V.getOperand(1).getValueType() == VT && V.getOperand(2) == Index) {
+ return V.getOperand(1);
+ }
+ auto *IndexC = dyn_cast<ConstantSDNode>(Index);
+ if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&
+ V.getOperand(0).getValueType() == VT &&
+ (IndexC->getZExtValue() % VT.getVectorNumElements()) == 0) {
+ uint64_t SubIdx = IndexC->getZExtValue() / VT.getVectorNumElements();
+ return V.getOperand(SubIdx);
+ }
+ return SDValue();
+ };
+ SDValue Sub0 = GetSubVector(Bop0);
+ SDValue Sub1 = GetSubVector(Bop1);
+
+ // TODO: We could handle the case where only 1 operand is being inserted by
+ // creating an extract of the other operand, but that requires checking
+ // number of uses and/or costs.
+ if (!Sub0 || !Sub1 || !TLI.isOperationLegalOrCustom(BinOpcode, VT))
+ return SDValue();
+
+ // We are inserting both operands of the wide binop only to extract back
+ // to the narrow vector size. Eliminate all of the insert/extract:
+ // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
+ return DAG.getNode(BinOpcode, SDLoc(Extract), VT, Sub0, Sub1,
+ BinOp->getFlags());
+}
+
/// If we are extracting a subvector produced by a wide binary operator try
/// to use a narrow binary operator and/or avoid concatenation and extraction.
static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
// TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
// some of these bailouts with other transforms.
+ if (SDValue V = narrowInsertExtractVectorBinOp(Extract, DAG))
+ return V;
+
// The extract index must be a constant, so we can map it to a concat operand.
auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
if (!ExtractIndexC)
@@ -16915,8 +18062,10 @@ static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
// We are looking for an optionally bitcasted wide vector binary operator
// feeding an extract subvector.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
- if (!ISD::isBinaryOp(BinOp.getNode()))
+ unsigned BOpcode = BinOp.getOpcode();
+ if (!TLI.isBinOp(BOpcode) || BinOp.getNode()->getNumValues() != 1)
return SDValue();
// The binop must be a vector type, so we can extract some fraction of it.
@@ -16945,8 +18094,6 @@ static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
// Bail out if the target does not support a narrower version of the binop.
EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
WideNumElts / NarrowingRatio);
- unsigned BOpcode = BinOp.getOpcode();
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
return SDValue();
@@ -16986,35 +18133,35 @@ static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
// We need at least one concatenation operation of a binop operand to make
// this transform worthwhile. The concat must double the input vector sizes.
- // TODO: Should we also handle INSERT_SUBVECTOR patterns?
- SDValue LHS = peekThroughBitcasts(BinOp.getOperand(0));
- SDValue RHS = peekThroughBitcasts(BinOp.getOperand(1));
- bool ConcatL =
- LHS.getOpcode() == ISD::CONCAT_VECTORS && LHS.getNumOperands() == 2;
- bool ConcatR =
- RHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getNumOperands() == 2;
- if (!ConcatL && !ConcatR)
+ auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue {
+ if (V.getOpcode() == ISD::CONCAT_VECTORS && V.getNumOperands() == 2)
+ return V.getOperand(ConcatOpNum);
return SDValue();
+ };
+ SDValue SubVecL = GetSubVector(peekThroughBitcasts(BinOp.getOperand(0)));
+ SDValue SubVecR = GetSubVector(peekThroughBitcasts(BinOp.getOperand(1)));
+
+ if (SubVecL || SubVecR) {
+ // If a binop operand was not the result of a concat, we must extract a
+ // half-sized operand for our new narrow binop:
+ // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
+ // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
+ // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
+ SDLoc DL(Extract);
+ SDValue IndexC = DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT);
+ SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL)
+ : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
+ BinOp.getOperand(0), IndexC);
- // If one of the binop operands was not the result of a concat, we must
- // extract a half-sized operand for our new narrow binop.
- SDLoc DL(Extract);
-
- // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
- // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, N)
- // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, N), YN
- SDValue X = ConcatL ? DAG.getBitcast(NarrowBVT, LHS.getOperand(ConcatOpNum))
- : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
- BinOp.getOperand(0),
- DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT));
+ SDValue Y = SubVecR ? DAG.getBitcast(NarrowBVT, SubVecR)
+ : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
+ BinOp.getOperand(1), IndexC);
- SDValue Y = ConcatR ? DAG.getBitcast(NarrowBVT, RHS.getOperand(ConcatOpNum))
- : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
- BinOp.getOperand(1),
- DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT));
+ SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
+ return DAG.getBitcast(VT, NarrowBinOp);
+ }
- SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
- return DAG.getBitcast(VT, NarrowBinOp);
+ return SDValue();
}
/// If we are extracting a subvector from a wide vector load, convert to a
@@ -17052,7 +18199,7 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
return NewLd;
}
-SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
+SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
EVT NVT = N->getValueType(0);
SDValue V = N->getOperand(0);
@@ -17064,14 +18211,51 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
return NarrowLoad;
+ // Combine an extract of an extract into a single extract_subvector.
+ // ext (ext X, C), 0 --> ext X, C
+ SDValue Index = N->getOperand(1);
+ if (isNullConstant(Index) && V.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ V.hasOneUse() && isa<ConstantSDNode>(V.getOperand(1))) {
+ if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
+ V.getConstantOperandVal(1)) &&
+ TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NVT)) {
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, V.getOperand(0),
+ V.getOperand(1));
+ }
+ }
+
+ // Try to move vector bitcast after extract_subv by scaling extraction index:
+ // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
+ if (isa<ConstantSDNode>(Index) && V.getOpcode() == ISD::BITCAST &&
+ V.getOperand(0).getValueType().isVector()) {
+ SDValue SrcOp = V.getOperand(0);
+ EVT SrcVT = SrcOp.getValueType();
+ unsigned SrcNumElts = SrcVT.getVectorNumElements();
+ unsigned DestNumElts = V.getValueType().getVectorNumElements();
+ if ((SrcNumElts % DestNumElts) == 0) {
+ unsigned SrcDestRatio = SrcNumElts / DestNumElts;
+ unsigned NewExtNumElts = NVT.getVectorNumElements() * SrcDestRatio;
+ EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
+ NewExtNumElts);
+ if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
+ unsigned IndexValScaled = N->getConstantOperandVal(1) * SrcDestRatio;
+ SDLoc DL(N);
+ SDValue NewIndex = DAG.getIntPtrConstant(IndexValScaled, DL);
+ SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
+ V.getOperand(0), NewIndex);
+ return DAG.getBitcast(NVT, NewExtract);
+ }
+ }
+ // TODO - handle (DestNumElts % SrcNumElts) == 0
+ }
+
// Combine:
// (extract_subvec (concat V1, V2, ...), i)
// Into:
// Vi if possible
// Only operand 0 is checked as 'concat' assumes all inputs of the same
// type.
- if (V.getOpcode() == ISD::CONCAT_VECTORS &&
- isa<ConstantSDNode>(N->getOperand(1)) &&
+ if (V.getOpcode() == ISD::CONCAT_VECTORS && isa<ConstantSDNode>(Index) &&
V.getOperand(0).getValueType() == NVT) {
unsigned Idx = N->getConstantOperandVal(1);
unsigned NumElems = NVT.getVectorNumElements();
@@ -17084,7 +18268,7 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
// If the input is a build vector. Try to make a smaller build vector.
if (V.getOpcode() == ISD::BUILD_VECTOR) {
- if (auto *Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
+ if (auto *IdxC = dyn_cast<ConstantSDNode>(Index)) {
EVT InVT = V.getValueType();
unsigned ExtractSize = NVT.getSizeInBits();
unsigned EltSize = InVT.getScalarSizeInBits();
@@ -17092,26 +18276,27 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
if (ExtractSize % EltSize == 0) {
unsigned NumElems = ExtractSize / EltSize;
EVT EltVT = InVT.getVectorElementType();
- EVT ExtractVT = NumElems == 1 ? EltVT :
- EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
+ EVT ExtractVT = NumElems == 1 ? EltVT
+ : EVT::getVectorVT(*DAG.getContext(),
+ EltVT, NumElems);
if ((Level < AfterLegalizeDAG ||
(NumElems == 1 ||
TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
(!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
- unsigned IdxVal = (Idx->getZExtValue() * NVT.getScalarSizeInBits()) /
- EltSize;
+ unsigned IdxVal = IdxC->getZExtValue();
+ IdxVal *= NVT.getScalarSizeInBits();
+ IdxVal /= EltSize;
+
if (NumElems == 1) {
SDValue Src = V->getOperand(IdxVal);
if (EltVT != Src.getValueType())
Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src);
-
return DAG.getBitcast(NVT, Src);
}
// Extract the pieces from the original build_vector.
- SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N),
- makeArrayRef(V->op_begin() + IdxVal,
- NumElems));
+ SDValue BuildVec = DAG.getBuildVector(
+ ExtractVT, SDLoc(N), V->ops().slice(IdxVal, NumElems));
return DAG.getBitcast(NVT, BuildVec);
}
}
@@ -17126,9 +18311,8 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
return SDValue();
// Only handle cases where both indexes are constants.
- auto *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ auto *ExtIdx = dyn_cast<ConstantSDNode>(Index);
auto *InsIdx = dyn_cast<ConstantSDNode>(V.getOperand(2));
-
if (InsIdx && ExtIdx) {
// Combine:
// (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
@@ -17141,7 +18325,7 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
return DAG.getNode(
ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
- N->getOperand(1));
+ Index);
}
}
@@ -17154,6 +18338,53 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
return SDValue();
}
+/// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
+/// followed by concatenation. Narrow vector ops may have better performance
+/// than wide ops, and this can unlock further narrowing of other vector ops.
+/// Targets can invert this transform later if it is not profitable.
+static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf,
+ SelectionDAG &DAG) {
+ SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1);
+ if (N0.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 ||
+ N1.getOpcode() != ISD::CONCAT_VECTORS || N1.getNumOperands() != 2 ||
+ !N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef())
+ return SDValue();
+
+ // Split the wide shuffle mask into halves. Any mask element that is accessing
+ // operand 1 is offset down to account for narrowing of the vectors.
+ ArrayRef<int> Mask = Shuf->getMask();
+ EVT VT = Shuf->getValueType(0);
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned HalfNumElts = NumElts / 2;
+ SmallVector<int, 16> Mask0(HalfNumElts, -1);
+ SmallVector<int, 16> Mask1(HalfNumElts, -1);
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (Mask[i] == -1)
+ continue;
+ int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
+ if (i < HalfNumElts)
+ Mask0[i] = M;
+ else
+ Mask1[i - HalfNumElts] = M;
+ }
+
+ // Ask the target if this is a valid transform.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
+ HalfNumElts);
+ if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) ||
+ !TLI.isShuffleMaskLegal(Mask1, HalfVT))
+ return SDValue();
+
+ // shuffle (concat X, undef), (concat Y, undef), Mask -->
+ // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
+ SDValue X = N0.getOperand(0), Y = N1.getOperand(0);
+ SDLoc DL(Shuf);
+ SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0);
+ SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1);
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1);
+}
+
// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
// or turn a shuffle of a single concat into simpler shuffle then concat.
static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
@@ -17163,20 +18394,24 @@ static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
+ ArrayRef<int> Mask = SVN->getMask();
SmallVector<SDValue, 4> Ops;
EVT ConcatVT = N0.getOperand(0).getValueType();
unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
unsigned NumConcats = NumElts / NumElemsPerConcat;
+ auto IsUndefMaskElt = [](int i) { return i == -1; };
+
// Special case: shuffle(concat(A,B)) can be more efficiently represented
// as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
// half vector elements.
if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
- std::all_of(SVN->getMask().begin() + NumElemsPerConcat,
- SVN->getMask().end(), [](int i) { return i == -1; })) {
- N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0), N0.getOperand(1),
- makeArrayRef(SVN->getMask().begin(), NumElemsPerConcat));
+ llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat),
+ IsUndefMaskElt)) {
+ N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0),
+ N0.getOperand(1),
+ Mask.slice(0, NumElemsPerConcat));
N1 = DAG.getUNDEF(ConcatVT);
return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
}
@@ -17184,35 +18419,32 @@ static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
// Look at every vector that's inserted. We're looking for exact
// subvector-sized copies from a concatenated vector
for (unsigned I = 0; I != NumConcats; ++I) {
- // Make sure we're dealing with a copy.
unsigned Begin = I * NumElemsPerConcat;
- bool AllUndef = true, NoUndef = true;
- for (unsigned J = Begin; J != Begin + NumElemsPerConcat; ++J) {
- if (SVN->getMaskElt(J) >= 0)
- AllUndef = false;
- else
- NoUndef = false;
+ ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat);
+
+ // Make sure we're dealing with a copy.
+ if (llvm::all_of(SubMask, IsUndefMaskElt)) {
+ Ops.push_back(DAG.getUNDEF(ConcatVT));
+ continue;
}
- if (NoUndef) {
- if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0)
+ int OpIdx = -1;
+ for (int i = 0; i != (int)NumElemsPerConcat; ++i) {
+ if (IsUndefMaskElt(SubMask[i]))
+ continue;
+ if ((SubMask[i] % (int)NumElemsPerConcat) != i)
return SDValue();
-
- for (unsigned J = 1; J != NumElemsPerConcat; ++J)
- if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J))
- return SDValue();
-
- unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat;
- if (FirstElt < N0.getNumOperands())
- Ops.push_back(N0.getOperand(FirstElt));
- else
- Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands()));
-
- } else if (AllUndef) {
- Ops.push_back(DAG.getUNDEF(N0.getOperand(0).getValueType()));
- } else { // Mixed with general masks and undefs, can't do optimization.
- return SDValue();
+ int EltOpIdx = SubMask[i] / NumElemsPerConcat;
+ if (0 <= OpIdx && EltOpIdx != OpIdx)
+ return SDValue();
+ OpIdx = EltOpIdx;
}
+ assert(0 <= OpIdx && "Unknown concat_vectors op");
+
+ if (OpIdx < (int)N0.getNumOperands())
+ Ops.push_back(N0.getOperand(OpIdx));
+ else
+ Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
}
return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
@@ -17278,8 +18510,8 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
if (S.getOpcode() == ISD::BUILD_VECTOR) {
Op = S.getOperand(Idx);
} else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
- assert(Idx == 0 && "Unexpected SCALAR_TO_VECTOR operand index.");
- Op = S.getOperand(0);
+ SDValue Op0 = S.getOperand(0);
+ Op = Idx == 0 ? Op0 : DAG.getUNDEF(Op0.getValueType());
} else {
// Operand can't be combined - bail out.
return SDValue();
@@ -17433,11 +18665,17 @@ static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
// If splat-mask contains undef elements, we need to be careful about
// introducing undef's in the folded mask which are not the result of composing
// the masks of the shuffles.
-static SDValue combineShuffleOfSplat(ArrayRef<int> UserMask,
- ShuffleVectorSDNode *Splat,
- SelectionDAG &DAG) {
+static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf,
+ SelectionDAG &DAG) {
+ if (!Shuf->getOperand(1).isUndef())
+ return SDValue();
+ auto *Splat = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
+ if (!Splat || !Splat->isSplat())
+ return SDValue();
+
+ ArrayRef<int> ShufMask = Shuf->getMask();
ArrayRef<int> SplatMask = Splat->getMask();
- assert(UserMask.size() == SplatMask.size() && "Mask length mismatch");
+ assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch");
// Prefer simplifying to the splat-shuffle, if possible. This is legal if
// every undef mask element in the splat-shuffle has a corresponding undef
@@ -17463,13 +18701,13 @@ static SDValue combineShuffleOfSplat(ArrayRef<int> UserMask,
return false;
return true;
};
- if (CanSimplifyToExistingSplat(UserMask, SplatMask))
- return SDValue(Splat, 0);
+ if (CanSimplifyToExistingSplat(ShufMask, SplatMask))
+ return Shuf->getOperand(0);
// Create a new shuffle with a mask that is composed of the two shuffles'
// masks.
SmallVector<int, 32> NewMask;
- for (int Idx : UserMask)
+ for (int Idx : ShufMask)
NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
@@ -17555,6 +18793,34 @@ static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
Op1, Op0.getOperand(1), NewInsIndex);
}
+/// If we have a unary shuffle of a shuffle, see if it can be folded away
+/// completely. This has the potential to lose undef knowledge because the first
+/// shuffle may not have an undef mask element where the second one does. So
+/// only call this after doing simplifications based on demanded elements.
+static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf) {
+ // shuf (shuf0 X, Y, Mask0), undef, Mask
+ auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
+ if (!Shuf0 || !Shuf->getOperand(1).isUndef())
+ return SDValue();
+
+ ArrayRef<int> Mask = Shuf->getMask();
+ ArrayRef<int> Mask0 = Shuf0->getMask();
+ for (int i = 0, e = (int)Mask.size(); i != e; ++i) {
+ // Ignore undef elements.
+ if (Mask[i] == -1)
+ continue;
+ assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value");
+
+ // Is the element of the shuffle operand chosen by this shuffle the same as
+ // the element chosen by the shuffle operand itself?
+ if (Mask0[Mask[i]] != Mask0[i])
+ return SDValue();
+ }
+ // Every element of this shuffle is identical to the result of the previous
+ // shuffle, so we can replace this value.
+ return Shuf->getOperand(0);
+}
+
SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
EVT VT = N->getValueType(0);
unsigned NumElts = VT.getVectorNumElements();
@@ -17604,19 +18870,35 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
return InsElt;
- // A shuffle of a single vector that is a splat can always be folded.
- if (auto *N0Shuf = dyn_cast<ShuffleVectorSDNode>(N0))
- if (N1->isUndef() && N0Shuf->isSplat())
- return combineShuffleOfSplat(SVN->getMask(), N0Shuf, DAG);
+ // A shuffle of a single vector that is a splatted value can always be folded.
+ if (SDValue V = combineShuffleOfSplatVal(SVN, DAG))
+ return V;
// If it is a splat, check if the argument vector is another splat or a
// build_vector.
if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
- SDNode *V = N0.getNode();
+ int SplatIndex = SVN->getSplatIndex();
+ if (TLI.isExtractVecEltCheap(VT, SplatIndex) &&
+ TLI.isBinOp(N0.getOpcode()) && N0.getNode()->getNumValues() == 1) {
+ // splat (vector_bo L, R), Index -->
+ // splat (scalar_bo (extelt L, Index), (extelt R, Index))
+ SDValue L = N0.getOperand(0), R = N0.getOperand(1);
+ SDLoc DL(N);
+ EVT EltVT = VT.getScalarType();
+ SDValue Index = DAG.getIntPtrConstant(SplatIndex, DL);
+ SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
+ SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
+ SDValue NewBO = DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR,
+ N0.getNode()->getFlags());
+ SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO);
+ SmallVector<int, 16> ZeroMask(VT.getVectorNumElements(), 0);
+ return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask);
+ }
// If this is a bit convert that changes the element type of the vector but
// not the number of vector elements, look through it. Be careful not to
// look though conversions that change things like v4f32 to v2f64.
+ SDNode *V = N0.getNode();
if (V->getOpcode() == ISD::BITCAST) {
SDValue ConvInput = V->getOperand(0);
if (ConvInput.getValueType().isVector() &&
@@ -17649,7 +18931,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
return N0;
// Canonicalize any other splat as a build_vector.
- const SDValue &Splatted = V->getOperand(SVN->getSplatIndex());
+ SDValue Splatted = V->getOperand(SplatIndex);
SmallVector<SDValue, 8> Ops(NumElts, Splatted);
SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
@@ -17665,6 +18947,11 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
if (SimplifyDemandedVectorElts(SDValue(N, 0)))
return SDValue(N, 0);
+ // This is intentionally placed after demanded elements simplification because
+ // it could eliminate knowledge of undef elements created by this shuffle.
+ if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN))
+ return ShufOp;
+
// Match shuffles that can be converted to any_vector_extend_in_reg.
if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))
return V;
@@ -17704,7 +18991,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
NewMask.push_back(M < 0 ? -1 : Scale * M + s);
return NewMask;
};
-
+
SDValue BC0 = peekThroughOneUseBitcasts(N0);
if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
EVT SVT = VT.getScalarType();
@@ -17884,6 +19171,9 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask);
}
+ if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
+ return V;
+
return SDValue();
}
@@ -18006,7 +19296,44 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
if (!isa<ConstantSDNode>(N2))
return SDValue();
- unsigned InsIdx = cast<ConstantSDNode>(N2)->getZExtValue();
+ uint64_t InsIdx = cast<ConstantSDNode>(N2)->getZExtValue();
+
+ // Push subvector bitcasts to the output, adjusting the index as we go.
+ // insert_subvector(bitcast(v), bitcast(s), c1)
+ // -> bitcast(insert_subvector(v, s, c2))
+ if ((N0.isUndef() || N0.getOpcode() == ISD::BITCAST) &&
+ N1.getOpcode() == ISD::BITCAST) {
+ SDValue N0Src = peekThroughBitcasts(N0);
+ SDValue N1Src = peekThroughBitcasts(N1);
+ EVT N0SrcSVT = N0Src.getValueType().getScalarType();
+ EVT N1SrcSVT = N1Src.getValueType().getScalarType();
+ if ((N0.isUndef() || N0SrcSVT == N1SrcSVT) &&
+ N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) {
+ EVT NewVT;
+ SDLoc DL(N);
+ SDValue NewIdx;
+ MVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
+ LLVMContext &Ctx = *DAG.getContext();
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned EltSizeInBits = VT.getScalarSizeInBits();
+ if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
+ unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
+ NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale);
+ NewIdx = DAG.getConstant(InsIdx * Scale, DL, IdxVT);
+ } else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
+ unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
+ if ((NumElts % Scale) == 0 && (InsIdx % Scale) == 0) {
+ NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts / Scale);
+ NewIdx = DAG.getConstant(InsIdx / Scale, DL, IdxVT);
+ }
+ }
+ if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) {
+ SDValue Res = DAG.getBitcast(NewVT, N0Src);
+ Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx);
+ return DAG.getBitcast(VT, Res);
+ }
+ }
+ }
// Canonicalize insert_subvector dag nodes.
// Example:
@@ -18070,6 +19397,36 @@ SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N0.getValueType();
+ unsigned Opcode = N->getOpcode();
+
+ // VECREDUCE over 1-element vector is just an extract.
+ if (VT.getVectorNumElements() == 1) {
+ SDLoc dl(N);
+ SDValue Res = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, VT.getVectorElementType(), N0,
+ DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ if (Res.getValueType() != N->getValueType(0))
+ Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
+ return Res;
+ }
+
+ // On an boolean vector an and/or reduction is the same as a umin/umax
+ // reduction. Convert them if the latter is legal while the former isn't.
+ if (Opcode == ISD::VECREDUCE_AND || Opcode == ISD::VECREDUCE_OR) {
+ unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
+ ? ISD::VECREDUCE_UMIN : ISD::VECREDUCE_UMAX;
+ if (!TLI.isOperationLegalOrCustom(Opcode, VT) &&
+ TLI.isOperationLegalOrCustom(NewOpcode, VT) &&
+ DAG.ComputeNumSignBits(N0) == VT.getScalarSizeInBits())
+ return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
+ }
+
+ return SDValue();
+}
+
/// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
/// with the destination vector and a zero vector.
/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
@@ -18161,6 +19518,53 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
return SDValue();
}
+/// If a vector binop is performed on splat values, it may be profitable to
+/// extract, scalarize, and insert/splat.
+static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ unsigned Opcode = N->getOpcode();
+ EVT VT = N->getValueType(0);
+ EVT EltVT = VT.getVectorElementType();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // TODO: Remove/replace the extract cost check? If the elements are available
+ // as scalars, then there may be no extract cost. Should we ask if
+ // inserting a scalar back into a vector is cheap instead?
+ int Index0, Index1;
+ SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
+ SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
+ if (!Src0 || !Src1 || Index0 != Index1 ||
+ Src0.getValueType().getVectorElementType() != EltVT ||
+ Src1.getValueType().getVectorElementType() != EltVT ||
+ !TLI.isExtractVecEltCheap(VT, Index0) ||
+ !TLI.isOperationLegalOrCustom(Opcode, EltVT))
+ return SDValue();
+
+ SDLoc DL(N);
+ SDValue IndexC =
+ DAG.getConstant(Index0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()));
+ SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, N0, IndexC);
+ SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, N1, IndexC);
+ SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
+
+ // If all lanes but 1 are undefined, no need to splat the scalar result.
+ // TODO: Keep track of undefs and use that info in the general case.
+ if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode() &&
+ count_if(N0->ops(), [](SDValue V) { return !V.isUndef(); }) == 1 &&
+ count_if(N1->ops(), [](SDValue V) { return !V.isUndef(); }) == 1) {
+ // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
+ // build_vec ..undef, (bo X, Y), undef...
+ SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), DAG.getUNDEF(EltVT));
+ Ops[Index0] = ScalarBO;
+ return DAG.getBuildVector(VT, DL, Ops);
+ }
+
+ // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
+ SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), ScalarBO);
+ return DAG.getBuildVector(VT, DL, Ops);
+}
+
/// Visit a binary vector operation, like ADD.
SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
assert(N->getValueType(0).isVector() &&
@@ -18169,34 +19573,63 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
SDValue Ops[] = {LHS, RHS};
+ EVT VT = N->getValueType(0);
+ unsigned Opcode = N->getOpcode();
// See if we can constant fold the vector operation.
if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
- N->getOpcode(), SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
+ Opcode, SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
return Fold;
- // Type legalization might introduce new shuffles in the DAG.
- // Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask)))
- // -> (shuffle (VBinOp (A, B)), Undef, Mask).
- if (LegalTypes && isa<ShuffleVectorSDNode>(LHS) &&
- isa<ShuffleVectorSDNode>(RHS) && LHS.hasOneUse() && RHS.hasOneUse() &&
- LHS.getOperand(1).isUndef() &&
- RHS.getOperand(1).isUndef()) {
- ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(LHS);
- ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(RHS);
-
- if (SVN0->getMask().equals(SVN1->getMask())) {
- EVT VT = N->getValueType(0);
- SDValue UndefVector = LHS.getOperand(1);
- SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
- LHS.getOperand(0), RHS.getOperand(0),
- N->getFlags());
- AddUsersToWorklist(N);
- return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector,
- SVN0->getMask());
+ // Move unary shuffles with identical masks after a vector binop:
+ // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
+ // --> shuffle (VBinOp A, B), Undef, Mask
+ // This does not require type legality checks because we are creating the
+ // same types of operations that are in the original sequence. We do have to
+ // restrict ops like integer div that have immediate UB (eg, div-by-zero)
+ // though. This code is adapted from the identical transform in instcombine.
+ if (Opcode != ISD::UDIV && Opcode != ISD::SDIV &&
+ Opcode != ISD::UREM && Opcode != ISD::SREM &&
+ Opcode != ISD::UDIVREM && Opcode != ISD::SDIVREM) {
+ auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
+ auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
+ if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
+ LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
+ (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
+ SDLoc DL(N);
+ SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
+ RHS.getOperand(0), N->getFlags());
+ SDValue UndefV = LHS.getOperand(1);
+ return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
+ }
+ }
+
+ // The following pattern is likely to emerge with vector reduction ops. Moving
+ // the binary operation ahead of insertion may allow using a narrower vector
+ // instruction that has better performance than the wide version of the op:
+ // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
+ if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() &&
+ RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() &&
+ LHS.getOperand(2) == RHS.getOperand(2) &&
+ (LHS.hasOneUse() || RHS.hasOneUse())) {
+ SDValue X = LHS.getOperand(1);
+ SDValue Y = RHS.getOperand(1);
+ SDValue Z = LHS.getOperand(2);
+ EVT NarrowVT = X.getValueType();
+ if (NarrowVT == Y.getValueType() &&
+ TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
+ // (binop undef, undef) may not return undef, so compute that result.
+ SDLoc DL(N);
+ SDValue VecC =
+ DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
+ SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
}
}
+ if (SDValue V = scalarizeBinOpOfSplats(N, DAG))
+ return V;
+
return SDValue();
}
@@ -18214,13 +19647,16 @@ SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
// Check to see if we got a select_cc back (to turn into setcc/select).
// Otherwise, just return whatever node we got back, like fabs.
if (SCC.getOpcode() == ISD::SELECT_CC) {
+ const SDNodeFlags Flags = N0.getNode()->getFlags();
SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
N0.getValueType(),
SCC.getOperand(0), SCC.getOperand(1),
- SCC.getOperand(4));
+ SCC.getOperand(4), Flags);
AddToWorklist(SETCC.getNode());
- return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
- SCC.getOperand(2), SCC.getOperand(3));
+ SDValue SelectNode = DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
+ SCC.getOperand(2), SCC.getOperand(3));
+ SelectNode->setFlags(Flags);
+ return SelectNode;
}
return SCC;
@@ -18305,6 +19741,10 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
// locations are not in the default address space.
LLD->getPointerInfo().getAddrSpace() != 0 ||
RLD->getPointerInfo().getAddrSpace() != 0 ||
+ // We can't produce a CMOV of a TargetFrameIndex since we won't
+ // generate the address generation required.
+ LLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
+ RLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
!TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
LLD->getBasePtr().getValueType()))
return false;
@@ -18501,8 +19941,8 @@ SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
// If a constant can be materialized without loads, this does not make sense.
if (TLI.getOperationAction(ISD::ConstantFP, VT) == TargetLowering::Legal ||
- TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0)) ||
- TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0)))
+ TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) ||
+ TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize))
return SDValue();
// If both constants have multiple uses, then we won't need to do an extra
@@ -18547,20 +19987,20 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
if (N2 == N3) return N2;
EVT CmpOpVT = N0.getValueType();
+ EVT CmpResVT = getSetCCResultType(CmpOpVT);
EVT VT = N2.getValueType();
auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
// Determine if the condition we're dealing with is constant.
- SDValue SCC = SimplifySetCC(getSetCCResultType(CmpOpVT), N0, N1, CC, DL,
- false);
- if (SCC.getNode()) AddToWorklist(SCC.getNode());
-
- if (auto *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) {
- // fold select_cc true, x, y -> x
- // fold select_cc false, x, y -> y
- return !SCCC->isNullValue() ? N2 : N3;
+ if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) {
+ AddToWorklist(SCC.getNode());
+ if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
+ // fold select_cc true, x, y -> x
+ // fold select_cc false, x, y -> y
+ return !(SCCC->isNullValue()) ? N2 : N3;
+ }
}
if (SDValue V =
@@ -18621,7 +20061,7 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
SDValue Temp, SCC;
// zext (setcc n0, n1)
if (LegalTypes) {
- SCC = DAG.getSetCC(DL, getSetCCResultType(CmpOpVT), N0, N1, CC);
+ SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC);
if (VT.bitsLT(SCC.getValueType()))
Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), VT);
else
@@ -18644,36 +20084,6 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
getShiftAmountTy(Temp.getValueType())));
}
- // Check to see if this is an integer abs.
- // select_cc setg[te] X, 0, X, -X ->
- // select_cc setgt X, -1, X, -X ->
- // select_cc setl[te] X, 0, -X, X ->
- // select_cc setlt X, 1, -X, X ->
- // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
- if (N1C) {
- ConstantSDNode *SubC = nullptr;
- if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
- (N1C->isAllOnesValue() && CC == ISD::SETGT)) &&
- N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1))
- SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0));
- else if (((N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE)) ||
- (N1C->isOne() && CC == ISD::SETLT)) &&
- N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1))
- SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0));
-
- if (SubC && SubC->isNullValue() && CmpOpVT.isInteger()) {
- SDLoc DL(N0);
- SDValue Shift = DAG.getNode(ISD::SRA, DL, CmpOpVT, N0,
- DAG.getConstant(CmpOpVT.getSizeInBits() - 1,
- DL,
- getShiftAmountTy(CmpOpVT)));
- SDValue Add = DAG.getNode(ISD::ADD, DL, CmpOpVT, N0, Shift);
- AddToWorklist(Shift.getNode());
- AddToWorklist(Add.getNode());
- return DAG.getNode(ISD::XOR, DL, CmpOpVT, Add, Shift);
- }
- }
-
// select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
// select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
// select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
@@ -18728,7 +20138,7 @@ SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
SDValue DAGCombiner::BuildSDIV(SDNode *N) {
// when optimising for minimum size, we don't want to expand a div to a mul
// and a shift.
- if (DAG.getMachineFunction().getFunction().optForMinSize())
+ if (DAG.getMachineFunction().getFunction().hasMinSize())
return SDValue();
SmallVector<SDNode *, 8> Built;
@@ -18769,7 +20179,7 @@ SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
SDValue DAGCombiner::BuildUDIV(SDNode *N) {
// when optimising for minimum size, we don't want to expand a div to a mul
// and a shift.
- if (DAG.getMachineFunction().getFunction().optForMinSize())
+ if (DAG.getMachineFunction().getFunction().hasMinSize())
return SDValue();
SmallVector<SDNode *, 8> Built;
@@ -18821,7 +20231,6 @@ SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags) {
AddToWorklist(Est.getNode());
if (Iterations) {
- EVT VT = Op.getValueType();
SDLoc DL(Op);
SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
@@ -18977,7 +20386,6 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
if (!Reciprocal) {
// The estimate is now completely wrong if the input was exactly 0.0 or
// possibly a denormal. Force the answer to 0.0 for those cases.
- EVT VT = Op.getValueType();
SDLoc DL(Op);
EVT CCVT = getSetCCResultType(VT);
ISD::NodeType SelOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT;
@@ -19020,79 +20428,95 @@ SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
}
/// Return true if there is any possibility that the two addresses overlap.
-bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
- // If they are the same then they must be aliases.
- if (Op0->getBasePtr() == Op1->getBasePtr()) return true;
+bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const {
- // If they are both volatile then they cannot be reordered.
- if (Op0->isVolatile() && Op1->isVolatile()) return true;
+ struct MemUseCharacteristics {
+ bool IsVolatile;
+ SDValue BasePtr;
+ int64_t Offset;
+ Optional<int64_t> NumBytes;
+ MachineMemOperand *MMO;
+ };
- // If one operation reads from invariant memory, and the other may store, they
- // cannot alias. These should really be checking the equivalent of mayWrite,
- // but it only matters for memory nodes other than load /store.
- if (Op0->isInvariant() && Op1->writeMem())
- return false;
+ auto getCharacteristics = [](SDNode *N) -> MemUseCharacteristics {
+ if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
+ int64_t Offset = 0;
+ if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
+ Offset = (LSN->getAddressingMode() == ISD::PRE_INC)
+ ? C->getSExtValue()
+ : (LSN->getAddressingMode() == ISD::PRE_DEC)
+ ? -1 * C->getSExtValue()
+ : 0;
+ return {LSN->isVolatile(), LSN->getBasePtr(), Offset /*base offset*/,
+ Optional<int64_t>(LSN->getMemoryVT().getStoreSize()),
+ LSN->getMemOperand()};
+ }
+ if (const auto *LN = cast<LifetimeSDNode>(N))
+ return {false /*isVolatile*/, LN->getOperand(1),
+ (LN->hasOffset()) ? LN->getOffset() : 0,
+ (LN->hasOffset()) ? Optional<int64_t>(LN->getSize())
+ : Optional<int64_t>(),
+ (MachineMemOperand *)nullptr};
+ // Default.
+ return {false /*isvolatile*/, SDValue(), (int64_t)0 /*offset*/,
+ Optional<int64_t>() /*size*/, (MachineMemOperand *)nullptr};
+ };
- if (Op1->isInvariant() && Op0->writeMem())
- return false;
+ MemUseCharacteristics MUC0 = getCharacteristics(Op0),
+ MUC1 = getCharacteristics(Op1);
- unsigned NumBytes0 = Op0->getMemoryVT().getStoreSize();
- unsigned NumBytes1 = Op1->getMemoryVT().getStoreSize();
-
- // Check for BaseIndexOffset matching.
- BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0, DAG);
- BaseIndexOffset BasePtr1 = BaseIndexOffset::match(Op1, DAG);
- int64_t PtrDiff;
- if (BasePtr0.getBase().getNode() && BasePtr1.getBase().getNode()) {
- if (BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff))
- return !((NumBytes0 <= PtrDiff) || (PtrDiff + NumBytes1 <= 0));
-
- // If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be
- // able to calculate their relative offset if at least one arises
- // from an alloca. However, these allocas cannot overlap and we
- // can infer there is no alias.
- if (auto *A = dyn_cast<FrameIndexSDNode>(BasePtr0.getBase()))
- if (auto *B = dyn_cast<FrameIndexSDNode>(BasePtr1.getBase())) {
- MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
- // If the base are the same frame index but the we couldn't find a
- // constant offset, (indices are different) be conservative.
- if (A != B && (!MFI.isFixedObjectIndex(A->getIndex()) ||
- !MFI.isFixedObjectIndex(B->getIndex())))
- return false;
- }
+ // If they are to the same address, then they must be aliases.
+ if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr &&
+ MUC0.Offset == MUC1.Offset)
+ return true;
- bool IsFI0 = isa<FrameIndexSDNode>(BasePtr0.getBase());
- bool IsFI1 = isa<FrameIndexSDNode>(BasePtr1.getBase());
- bool IsGV0 = isa<GlobalAddressSDNode>(BasePtr0.getBase());
- bool IsGV1 = isa<GlobalAddressSDNode>(BasePtr1.getBase());
- bool IsCV0 = isa<ConstantPoolSDNode>(BasePtr0.getBase());
- bool IsCV1 = isa<ConstantPoolSDNode>(BasePtr1.getBase());
-
- // If of mismatched base types or checkable indices we can check
- // they do not alias.
- if ((BasePtr0.getIndex() == BasePtr1.getIndex() || (IsFI0 != IsFI1) ||
- (IsGV0 != IsGV1) || (IsCV0 != IsCV1)) &&
- (IsFI0 || IsGV0 || IsCV0) && (IsFI1 || IsGV1 || IsCV1))
+ // If they are both volatile then they cannot be reordered.
+ if (MUC0.IsVolatile && MUC1.IsVolatile)
+ return true;
+
+ if (MUC0.MMO && MUC1.MMO) {
+ if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
+ (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
return false;
}
+ // Try to prove that there is aliasing, or that there is no aliasing. Either
+ // way, we can return now. If nothing can be proved, proceed with more tests.
+ bool IsAlias;
+ if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes,
+ DAG, IsAlias))
+ return IsAlias;
+
+ // The following all rely on MMO0 and MMO1 being valid. Fail conservatively if
+ // either are not known.
+ if (!MUC0.MMO || !MUC1.MMO)
+ return true;
+
+ // If one operation reads from invariant memory, and the other may store, they
+ // cannot alias. These should really be checking the equivalent of mayWrite,
+ // but it only matters for memory nodes other than load /store.
+ if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
+ (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
+ return false;
+
// If we know required SrcValue1 and SrcValue2 have relatively large
// alignment compared to the size and offset of the access, we may be able
// to prove they do not alias. This check is conservative for now to catch
// cases created by splitting vector types.
- int64_t SrcValOffset0 = Op0->getSrcValueOffset();
- int64_t SrcValOffset1 = Op1->getSrcValueOffset();
- unsigned OrigAlignment0 = Op0->getOriginalAlignment();
- unsigned OrigAlignment1 = Op1->getOriginalAlignment();
+ int64_t SrcValOffset0 = MUC0.MMO->getOffset();
+ int64_t SrcValOffset1 = MUC1.MMO->getOffset();
+ unsigned OrigAlignment0 = MUC0.MMO->getBaseAlignment();
+ unsigned OrigAlignment1 = MUC1.MMO->getBaseAlignment();
if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
- NumBytes0 == NumBytes1 && OrigAlignment0 > NumBytes0) {
+ MUC0.NumBytes.hasValue() && MUC1.NumBytes.hasValue() &&
+ *MUC0.NumBytes == *MUC1.NumBytes && OrigAlignment0 > *MUC0.NumBytes) {
int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0;
int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1;
// There is no overlap between these relatively aligned accesses of
// similar size. Return no alias.
- if ((OffAlign0 + NumBytes0) <= OffAlign1 ||
- (OffAlign1 + NumBytes1) <= OffAlign0)
+ if ((OffAlign0 + *MUC0.NumBytes) <= OffAlign1 ||
+ (OffAlign1 + *MUC1.NumBytes) <= OffAlign0)
return false;
}
@@ -19105,17 +20529,16 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
UseAA = false;
#endif
- if (UseAA && AA &&
- Op0->getMemOperand()->getValue() && Op1->getMemOperand()->getValue()) {
+ if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue()) {
// Use alias analysis information.
int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
- int64_t Overlap0 = NumBytes0 + SrcValOffset0 - MinOffset;
- int64_t Overlap1 = NumBytes1 + SrcValOffset1 - MinOffset;
- AliasResult AAResult =
- AA->alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap0,
- UseTBAA ? Op0->getAAInfo() : AAMDNodes()),
- MemoryLocation(Op1->getMemOperand()->getValue(), Overlap1,
- UseTBAA ? Op1->getAAInfo() : AAMDNodes()) );
+ int64_t Overlap0 = *MUC0.NumBytes + SrcValOffset0 - MinOffset;
+ int64_t Overlap1 = *MUC1.NumBytes + SrcValOffset1 - MinOffset;
+ AliasResult AAResult = AA->alias(
+ MemoryLocation(MUC0.MMO->getValue(), Overlap0,
+ UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
+ MemoryLocation(MUC1.MMO->getValue(), Overlap1,
+ UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes()));
if (AAResult == NoAlias)
return false;
}
@@ -19132,18 +20555,64 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
SmallPtrSet<SDNode *, 16> Visited; // Visited node set.
// Get alias information for node.
- bool IsLoad = isa<LoadSDNode>(N) && !cast<LSBaseSDNode>(N)->isVolatile();
+ const bool IsLoad = isa<LoadSDNode>(N) && !cast<LoadSDNode>(N)->isVolatile();
// Starting off.
Chains.push_back(OriginalChain);
unsigned Depth = 0;
+ // Attempt to improve chain by a single step
+ std::function<bool(SDValue &)> ImproveChain = [&](SDValue &C) -> bool {
+ switch (C.getOpcode()) {
+ case ISD::EntryToken:
+ // No need to mark EntryToken.
+ C = SDValue();
+ return true;
+ case ISD::LOAD:
+ case ISD::STORE: {
+ // Get alias information for C.
+ bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
+ !cast<LSBaseSDNode>(C.getNode())->isVolatile();
+ if ((IsLoad && IsOpLoad) || !isAlias(N, C.getNode())) {
+ // Look further up the chain.
+ C = C.getOperand(0);
+ return true;
+ }
+ // Alias, so stop here.
+ return false;
+ }
+
+ case ISD::CopyFromReg:
+ // Always forward past past CopyFromReg.
+ C = C.getOperand(0);
+ return true;
+
+ case ISD::LIFETIME_START:
+ case ISD::LIFETIME_END: {
+ // We can forward past any lifetime start/end that can be proven not to
+ // alias the memory access.
+ if (!isAlias(N, C.getNode())) {
+ // Look further up the chain.
+ C = C.getOperand(0);
+ return true;
+ }
+ return false;
+ }
+ default:
+ return false;
+ }
+ };
+
// Look at each chain and determine if it is an alias. If so, add it to the
// aliases list. If not, then continue up the chain looking for the next
// candidate.
while (!Chains.empty()) {
SDValue Chain = Chains.pop_back_val();
+ // Don't bother if we've seen Chain before.
+ if (!Visited.insert(Chain.getNode()).second)
+ continue;
+
// For TokenFactor nodes, look at each operand and only continue up the
// chain until we reach the depth limit.
//
@@ -19156,58 +20625,30 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
return;
}
- // Don't bother if we've been before.
- if (!Visited.insert(Chain.getNode()).second)
- continue;
-
- switch (Chain.getOpcode()) {
- case ISD::EntryToken:
- // Entry token is ideal chain operand, but handled in FindBetterChain.
- break;
-
- case ISD::LOAD:
- case ISD::STORE: {
- // Get alias information for Chain.
- bool IsOpLoad = isa<LoadSDNode>(Chain.getNode()) &&
- !cast<LSBaseSDNode>(Chain.getNode())->isVolatile();
-
- // If chain is alias then stop here.
- if (!(IsLoad && IsOpLoad) &&
- isAlias(cast<LSBaseSDNode>(N), cast<LSBaseSDNode>(Chain.getNode()))) {
- Aliases.push_back(Chain);
- } else {
- // Look further up the chain.
- Chains.push_back(Chain.getOperand(0));
- ++Depth;
- }
- break;
- }
-
- case ISD::TokenFactor:
+ if (Chain.getOpcode() == ISD::TokenFactor) {
// We have to check each of the operands of the token factor for "small"
// token factors, so we queue them up. Adding the operands to the queue
// (stack) in reverse order maintains the original order and increases the
// likelihood that getNode will find a matching token factor (CSE.)
if (Chain.getNumOperands() > 16) {
Aliases.push_back(Chain);
- break;
+ continue;
}
for (unsigned n = Chain.getNumOperands(); n;)
Chains.push_back(Chain.getOperand(--n));
++Depth;
- break;
-
- case ISD::CopyFromReg:
- // Forward past CopyFromReg.
- Chains.push_back(Chain.getOperand(0));
+ continue;
+ }
+ // Everything else
+ if (ImproveChain(Chain)) {
+ // Updated Chain Found, Consider new chain if one exists.
+ if (Chain.getNode())
+ Chains.push_back(Chain);
++Depth;
- break;
-
- default:
- // For all other instructions we will just have to take what we can get.
- Aliases.push_back(Chain);
- break;
+ continue;
}
+ // No Improved Chain Possible, treat as Alias.
+ Aliases.push_back(Chain);
}
}
@@ -19232,13 +20673,15 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
return Aliases[0];
// Construct a custom tailored token factor.
- return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases);
+ return DAG.getTokenFactor(SDLoc(N), Aliases);
}
+namespace {
// TODO: Replace with with std::monostate when we move to C++17.
struct UnitT { } Unit;
bool operator==(const UnitT &, const UnitT &) { return true; }
bool operator!=(const UnitT &, const UnitT &) { return false; }
+} // namespace
// This function tries to collect a bunch of potentially interesting
// nodes to improve the chains of, all at once. This might seem
@@ -19349,7 +20792,7 @@ bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
if (AddNewChain)
TFOps.insert(TFOps.begin(), NewChain);
- SDValue TF = DAG.getNode(ISD::TokenFactor, SDLoc(STChain), MVT::Other, TFOps);
+ SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
CombineTo(St, TF);
AddToWorklist(STChain);
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index a9a3c44ea0c9..22c23ba877e8 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -1,9 +1,8 @@
//===- FastISel.cpp - Implementation of the FastISel class ----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -782,7 +781,7 @@ bool FastISel::addStackMapLiveVars(SmallVectorImpl<MachineOperand> &Ops,
unsigned Reg = getRegForValue(Val);
if (!Reg)
return false;
- Ops.push_back(MachineOperand::CreateReg(Reg, /*IsDef=*/false));
+ Ops.push_back(MachineOperand::CreateReg(Reg, /*isDef=*/false));
}
}
return true;
@@ -831,8 +830,8 @@ bool FastISel::selectStackmap(const CallInst *I) {
const MCPhysReg *ScratchRegs = TLI.getScratchRegisters(CC);
for (unsigned i = 0; ScratchRegs[i]; ++i)
Ops.push_back(MachineOperand::CreateReg(
- ScratchRegs[i], /*IsDef=*/true, /*IsImp=*/true, /*IsKill=*/false,
- /*IsDead=*/false, /*IsUndef=*/false, /*IsEarlyClobber=*/true));
+ ScratchRegs[i], /*isDef=*/true, /*isImp=*/true, /*isKill=*/false,
+ /*isDead=*/false, /*isUndef=*/false, /*isEarlyClobber=*/true));
// Issue CALLSEQ_START
unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
@@ -942,7 +941,7 @@ bool FastISel::selectPatchpoint(const CallInst *I) {
assert(CLI.NumResultRegs == 0 && "Unexpected result register.");
CLI.ResultReg = createResultReg(TLI.getRegClassFor(MVT::i64));
CLI.NumResultRegs = 1;
- Ops.push_back(MachineOperand::CreateReg(CLI.ResultReg, /*IsDef=*/true));
+ Ops.push_back(MachineOperand::CreateReg(CLI.ResultReg, /*isDef=*/true));
}
// Add the <id> and <numBytes> constants.
@@ -991,13 +990,13 @@ bool FastISel::selectPatchpoint(const CallInst *I) {
unsigned Reg = getRegForValue(I->getArgOperand(i));
if (!Reg)
return false;
- Ops.push_back(MachineOperand::CreateReg(Reg, /*IsDef=*/false));
+ Ops.push_back(MachineOperand::CreateReg(Reg, /*isDef=*/false));
}
}
// Push the arguments from the call instruction.
for (auto Reg : CLI.OutRegs)
- Ops.push_back(MachineOperand::CreateReg(Reg, /*IsDef=*/false));
+ Ops.push_back(MachineOperand::CreateReg(Reg, /*isDef=*/false));
// Push live variables for the stack map.
if (!addStackMapLiveVars(Ops, I, NumMetaOpers + NumArgs))
@@ -1011,13 +1010,13 @@ bool FastISel::selectPatchpoint(const CallInst *I) {
const MCPhysReg *ScratchRegs = TLI.getScratchRegisters(CC);
for (unsigned i = 0; ScratchRegs[i]; ++i)
Ops.push_back(MachineOperand::CreateReg(
- ScratchRegs[i], /*IsDef=*/true, /*IsImp=*/true, /*IsKill=*/false,
- /*IsDead=*/false, /*IsUndef=*/false, /*IsEarlyClobber=*/true));
+ ScratchRegs[i], /*isDef=*/true, /*isImp=*/true, /*isKill=*/false,
+ /*isDead=*/false, /*isUndef=*/false, /*isEarlyClobber=*/true));
// Add implicit defs (return values).
for (auto Reg : CLI.InRegs)
- Ops.push_back(MachineOperand::CreateReg(Reg, /*IsDef=*/true,
- /*IsImpl=*/true));
+ Ops.push_back(MachineOperand::CreateReg(Reg, /*isDef=*/true,
+ /*isImp=*/true));
// Insert the patchpoint instruction before the call generated by the target.
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, CLI.Call, DbgLoc,
@@ -1045,9 +1044,9 @@ bool FastISel::selectXRayCustomEvent(const CallInst *I) {
return true; // don't do anything to this instruction.
SmallVector<MachineOperand, 8> Ops;
Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(0)),
- /*IsDef=*/false));
+ /*isDef=*/false));
Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(1)),
- /*IsDef=*/false));
+ /*isDef=*/false));
MachineInstrBuilder MIB =
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::PATCHABLE_EVENT_CALL));
@@ -1064,11 +1063,11 @@ bool FastISel::selectXRayTypedEvent(const CallInst *I) {
return true; // don't do anything to this instruction.
SmallVector<MachineOperand, 8> Ops;
Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(0)),
- /*IsDef=*/false));
+ /*isDef=*/false));
Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(1)),
- /*IsDef=*/false));
+ /*isDef=*/false));
Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(2)),
- /*IsDef=*/false));
+ /*isDef=*/false));
MachineInstrBuilder MIB =
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::PATCHABLE_TYPED_EVENT_CALL));
@@ -1205,9 +1204,11 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) {
if (Arg.IsByVal || Arg.IsInAlloca) {
PointerType *Ty = cast<PointerType>(Arg.Ty);
Type *ElementTy = Ty->getElementType();
- unsigned FrameSize = DL.getTypeAllocSize(ElementTy);
- // For ByVal, alignment should come from FE. BE will guess if this info is
- // not there, but there are cases it cannot get right.
+ unsigned FrameSize =
+ DL.getTypeAllocSize(Arg.ByValType ? Arg.ByValType : ElementTy);
+
+ // For ByVal, alignment should come from FE. BE will guess if this info
+ // is not there, but there are cases it cannot get right.
unsigned FrameAlign = Arg.Alignment;
if (!FrameAlign)
FrameAlign = TLI.getByValTypeAlignment(ElementTy, DL);
@@ -1235,6 +1236,12 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) {
if (CLI.NumResultRegs && CLI.CS)
updateValueMap(CLI.CS->getInstruction(), CLI.ResultReg, CLI.NumResultRegs);
+ // Set labels for heapallocsite call.
+ if (CLI.CS && CLI.CS->getInstruction()->getMetadata("heapallocsite")) {
+ MDNode *MD = CLI.CS->getInstruction()->getMetadata("heapallocsite");
+ MF->addCodeViewHeapAllocSite(CLI.Call, MD);
+ }
+
return true;
}
@@ -1304,9 +1311,6 @@ bool FastISel::selectCall(const User *I) {
return true;
}
- MachineModuleInfo &MMI = FuncInfo.MF->getMMI();
- computeUsesVAFloatArgument(*Call, MMI);
-
// Handle intrinsic function calls.
if (const auto *II = dyn_cast<IntrinsicInst>(Call))
return selectIntrinsicCall(II);
@@ -1710,14 +1714,11 @@ void FastISel::finishCondBranch(const BasicBlock *BranchBB,
}
/// Emit an FNeg operation.
-bool FastISel::selectFNeg(const User *I) {
- Value *X;
- if (!match(I, m_FNeg(m_Value(X))))
- return false;
- unsigned OpReg = getRegForValue(X);
+bool FastISel::selectFNeg(const User *I, const Value *In) {
+ unsigned OpReg = getRegForValue(In);
if (!OpReg)
return false;
- bool OpRegIsKill = hasTrivialKill(I);
+ bool OpRegIsKill = hasTrivialKill(In);
// If the target has ISD::FNEG, use it.
EVT VT = TLI.getValueType(DL, I->getType());
@@ -1804,9 +1805,13 @@ bool FastISel::selectOperator(const User *I, unsigned Opcode) {
return selectBinaryOp(I, ISD::FADD);
case Instruction::Sub:
return selectBinaryOp(I, ISD::SUB);
- case Instruction::FSub:
+ case Instruction::FSub: {
// FNeg is currently represented in LLVM IR as a special case of FSub.
- return selectFNeg(I) || selectBinaryOp(I, ISD::FSUB);
+ Value *X;
+ if (match(I, m_FNeg(m_Value(X))))
+ return selectFNeg(I, X);
+ return selectBinaryOp(I, ISD::FSUB);
+ }
case Instruction::Mul:
return selectBinaryOp(I, ISD::MUL);
case Instruction::FMul:
@@ -1836,6 +1841,9 @@ bool FastISel::selectOperator(const User *I, unsigned Opcode) {
case Instruction::Xor:
return selectBinaryOp(I, ISD::XOR);
+ case Instruction::FNeg:
+ return selectFNeg(I, I->getOperand(0));
+
case Instruction::GetElementPtr:
return selectGetElementPtr(I);
@@ -1869,6 +1877,13 @@ bool FastISel::selectOperator(const User *I, unsigned Opcode) {
return false;
case Instruction::Call:
+ // On AIX, call lowering uses the DAG-ISEL path currently so that the
+ // callee of the direct function call instruction will be mapped to the
+ // symbol for the function's entry point, which is distinct from the
+ // function descriptor symbol. The latter is the symbol whose XCOFF symbol
+ // name is the C-linkage name of the source level function.
+ if (TM.getTargetTriple().isOSAIX())
+ return false;
return selectCall(I);
case Instruction::BitCast:
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index fba728625b07..8b1759246b76 100644
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -1,9 +1,8 @@
//===-- FunctionLoweringInfo.cpp ------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -86,6 +85,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
RegInfo = &MF->getRegInfo();
const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
unsigned StackAlign = TFI->getStackAlignment();
+ DA = DAG->getDivergenceAnalysis();
// Check whether the function can return without sret-demotion.
SmallVector<ISD::OutputArg, 4> Outs;
@@ -151,7 +151,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
auto Iter = CatchObjects.find(AI);
if (Iter != CatchObjects.end() && TLI->needsFixedCatchObjects()) {
FrameIndex = MF->getFrameInfo().CreateFixedObject(
- TySize, 0, /*Immutable=*/false, /*isAliased=*/true);
+ TySize, 0, /*IsImmutable=*/false, /*isAliased=*/true);
MF->getFrameInfo().setObjectAlignment(FrameIndex, Align);
} else {
FrameIndex =
@@ -322,13 +322,6 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
NewMap[MBBMap[Src]] = MBBMap[Dst];
}
EHInfo.EHPadUnwindMap = std::move(NewMap);
- NewMap.clear();
- for (auto &KV : EHInfo.ThrowUnwindMap) {
- const auto *Src = KV.first.get<const BasicBlock *>();
- const auto *Dst = KV.second.get<const BasicBlock *>();
- NewMap[MBBMap[Src]] = MBBMap[Dst];
- }
- EHInfo.ThrowUnwindMap = std::move(NewMap);
}
}
@@ -343,6 +336,7 @@ void FunctionLoweringInfo::clear() {
LiveOutRegInfo.clear();
VisitedBBs.clear();
ArgDbgValues.clear();
+ DescribedArgs.clear();
ByValArgFrameIndexMap.clear();
RegFixups.clear();
RegsWithFixups.clear();
@@ -352,9 +346,9 @@ void FunctionLoweringInfo::clear() {
}
/// CreateReg - Allocate a single virtual register for the given type.
-unsigned FunctionLoweringInfo::CreateReg(MVT VT) {
+unsigned FunctionLoweringInfo::CreateReg(MVT VT, bool isDivergent) {
return RegInfo->createVirtualRegister(
- MF->getSubtarget().getTargetLowering()->getRegClassFor(VT));
+ MF->getSubtarget().getTargetLowering()->getRegClassFor(VT, isDivergent));
}
/// CreateRegs - Allocate the appropriate number of virtual registers of
@@ -364,7 +358,7 @@ unsigned FunctionLoweringInfo::CreateReg(MVT VT) {
/// In the case that the given value has struct or array type, this function
/// will assign registers for each member or element.
///
-unsigned FunctionLoweringInfo::CreateRegs(Type *Ty) {
+unsigned FunctionLoweringInfo::CreateRegs(Type *Ty, bool isDivergent) {
const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
SmallVector<EVT, 4> ValueVTs;
@@ -377,13 +371,18 @@ unsigned FunctionLoweringInfo::CreateRegs(Type *Ty) {
unsigned NumRegs = TLI->getNumRegisters(Ty->getContext(), ValueVT);
for (unsigned i = 0; i != NumRegs; ++i) {
- unsigned R = CreateReg(RegisterVT);
+ unsigned R = CreateReg(RegisterVT, isDivergent);
if (!FirstReg) FirstReg = R;
}
}
return FirstReg;
}
+unsigned FunctionLoweringInfo::CreateRegs(const Value *V) {
+ return CreateRegs(V->getType(), DA && !TLI->requiresUniformRegister(*MF, V) &&
+ DA->isDivergent(V));
+}
+
/// GetLiveOutRegInfo - Gets LiveOutInfo for a register, returning NULL if the
/// register is a PHI destination and the PHI's LiveOutInfo is not valid. If
/// the register's LiveOutInfo is for a smaller bit width, it is extended to
@@ -400,7 +399,7 @@ FunctionLoweringInfo::GetLiveOutRegInfo(unsigned Reg, unsigned BitWidth) {
if (BitWidth > LOI->Known.getBitWidth()) {
LOI->NumSignBits = 1;
- LOI->Known = LOI->Known.zextOrTrunc(BitWidth);
+ LOI->Known = LOI->Known.zext(BitWidth, false /* => any extend */);
}
return LOI;
@@ -526,56 +525,6 @@ unsigned FunctionLoweringInfo::getCatchPadExceptionPointerVReg(
return VReg;
}
-unsigned
-FunctionLoweringInfo::getOrCreateSwiftErrorVReg(const MachineBasicBlock *MBB,
- const Value *Val) {
- auto Key = std::make_pair(MBB, Val);
- auto It = SwiftErrorVRegDefMap.find(Key);
- // If this is the first use of this swifterror value in this basic block,
- // create a new virtual register.
- // After we processed all basic blocks we will satisfy this "upwards exposed
- // use" by inserting a copy or phi at the beginning of this block.
- if (It == SwiftErrorVRegDefMap.end()) {
- auto &DL = MF->getDataLayout();
- const TargetRegisterClass *RC = TLI->getRegClassFor(TLI->getPointerTy(DL));
- auto VReg = MF->getRegInfo().createVirtualRegister(RC);
- SwiftErrorVRegDefMap[Key] = VReg;
- SwiftErrorVRegUpwardsUse[Key] = VReg;
- return VReg;
- } else return It->second;
-}
-
-void FunctionLoweringInfo::setCurrentSwiftErrorVReg(
- const MachineBasicBlock *MBB, const Value *Val, unsigned VReg) {
- SwiftErrorVRegDefMap[std::make_pair(MBB, Val)] = VReg;
-}
-
-std::pair<unsigned, bool>
-FunctionLoweringInfo::getOrCreateSwiftErrorVRegDefAt(const Instruction *I) {
- auto Key = PointerIntPair<const Instruction *, 1, bool>(I, true);
- auto It = SwiftErrorVRegDefUses.find(Key);
- if (It == SwiftErrorVRegDefUses.end()) {
- auto &DL = MF->getDataLayout();
- const TargetRegisterClass *RC = TLI->getRegClassFor(TLI->getPointerTy(DL));
- unsigned VReg = MF->getRegInfo().createVirtualRegister(RC);
- SwiftErrorVRegDefUses[Key] = VReg;
- return std::make_pair(VReg, true);
- }
- return std::make_pair(It->second, false);
-}
-
-std::pair<unsigned, bool>
-FunctionLoweringInfo::getOrCreateSwiftErrorVRegUseAt(const Instruction *I, const MachineBasicBlock *MBB, const Value *Val) {
- auto Key = PointerIntPair<const Instruction *, 1, bool>(I, false);
- auto It = SwiftErrorVRegDefUses.find(Key);
- if (It == SwiftErrorVRegDefUses.end()) {
- unsigned VReg = getOrCreateSwiftErrorVReg(MBB, Val);
- SwiftErrorVRegDefUses[Key] = VReg;
- return std::make_pair(VReg, true);
- }
- return std::make_pair(It->second, false);
-}
-
const Value *
FunctionLoweringInfo::getValueFromVirtualReg(unsigned Vreg) {
if (VirtReg2Value.empty()) {
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 6a6114677cc2..9bc07d35dfc5 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -1,9 +1,8 @@
//==--- InstrEmitter.cpp - Emit MachineInstrs for the SelectionDAG class ---==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -106,7 +105,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
// Stick to the preferred register classes for legal types.
if (TLI->isTypeLegal(VT))
- UseRC = TLI->getRegClassFor(VT);
+ UseRC = TLI->getRegClassFor(VT, Node->isDivergent());
if (!IsClone && !IsCloned)
for (SDNode *User : Node->uses()) {
@@ -165,7 +164,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
"Incompatible phys register def and uses!");
DstRC = UseRC;
} else {
- DstRC = TLI->getRegClassFor(VT);
+ DstRC = TLI->getRegClassFor(VT, Node->isDivergent());
}
// If all uses are reading from the src physical register and copying the
@@ -187,24 +186,6 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
assert(isNew && "Node emitted out of order - early");
}
-/// getDstOfCopyToRegUse - If the only use of the specified result number of
-/// node is a CopyToReg, return its destination register. Return 0 otherwise.
-unsigned InstrEmitter::getDstOfOnlyCopyToRegUse(SDNode *Node,
- unsigned ResNo) const {
- if (!Node->hasOneUse())
- return 0;
-
- SDNode *User = *Node->use_begin();
- if (User->getOpcode() == ISD::CopyToReg &&
- User->getOperand(2).getNode() == Node &&
- User->getOperand(2).getResNo() == ResNo) {
- unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
- if (TargetRegisterInfo::isVirtualRegister(Reg))
- return Reg;
- }
- return 0;
-}
-
void InstrEmitter::CreateVirtualRegisters(SDNode *Node,
MachineInstrBuilder &MIB,
const MCInstrDesc &II,
@@ -226,8 +207,9 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node,
// type correctly. For example, a 64-bit float (X86::FR64) can't live in
// the 32-bit float super-class (X86::FR32).
if (i < NumResults && TLI->isTypeLegal(Node->getSimpleValueType(i))) {
- const TargetRegisterClass *VTRC =
- TLI->getRegClassFor(Node->getSimpleValueType(i));
+ const TargetRegisterClass *VTRC = TLI->getRegClassFor(
+ Node->getSimpleValueType(i),
+ (Node->isDivergent() || (RC && TRI->isDivergentRegClass(RC))));
if (RC)
VTRC = TRI->getCommonSubClass(RC, VTRC);
if (VTRC)
@@ -286,14 +268,11 @@ unsigned InstrEmitter::getVR(SDValue Op,
if (Op.isMachineOpcode() &&
Op.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) {
// Add an IMPLICIT_DEF instruction before every use.
- unsigned VReg = getDstOfOnlyCopyToRegUse(Op.getNode(), Op.getResNo());
// IMPLICIT_DEF can produce any type of result so its MCInstrDesc
// does not include operand register class info.
- if (!VReg) {
- const TargetRegisterClass *RC =
- TLI->getRegClassFor(Op.getSimpleValueType());
- VReg = MRI->createVirtualRegister(RC);
- }
+ const TargetRegisterClass *RC = TLI->getRegClassFor(
+ Op.getSimpleValueType(), Op.getNode()->isDivergent());
+ unsigned VReg = MRI->createVirtualRegister(RC);
BuildMI(*MBB, InsertPos, Op.getDebugLoc(),
TII->get(TargetOpcode::IMPLICIT_DEF), VReg);
return VReg;
@@ -396,11 +375,15 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB,
} else if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op)) {
unsigned VReg = R->getReg();
MVT OpVT = Op.getSimpleValueType();
- const TargetRegisterClass *OpRC =
- TLI->isTypeLegal(OpVT) ? TLI->getRegClassFor(OpVT) : nullptr;
const TargetRegisterClass *IIRC =
II ? TRI->getAllocatableClass(TII->getRegClass(*II, IIOpNum, TRI, *MF))
: nullptr;
+ const TargetRegisterClass *OpRC =
+ TLI->isTypeLegal(OpVT)
+ ? TLI->getRegClassFor(OpVT,
+ Op.getNode()->isDivergent() ||
+ (IIRC && TRI->isDivergentRegClass(IIRC)))
+ : nullptr;
if (OpRC && IIRC && OpRC != IIRC &&
TargetRegisterInfo::isVirtualRegister(VReg)) {
@@ -465,7 +448,7 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB,
}
unsigned InstrEmitter::ConstrainForSubReg(unsigned VReg, unsigned SubIdx,
- MVT VT, const DebugLoc &DL) {
+ MVT VT, bool isDivergent, const DebugLoc &DL) {
const TargetRegisterClass *VRC = MRI->getRegClass(VReg);
const TargetRegisterClass *RC = TRI->getSubClassWithSubReg(VRC, SubIdx);
@@ -480,7 +463,7 @@ unsigned InstrEmitter::ConstrainForSubReg(unsigned VReg, unsigned SubIdx,
// VReg couldn't be reasonably constrained. Emit a COPY to a new virtual
// register instead.
- RC = TRI->getSubClassWithSubReg(TLI->getRegClassFor(VT), SubIdx);
+ RC = TRI->getSubClassWithSubReg(TLI->getRegClassFor(VT, isDivergent), SubIdx);
assert(RC && "No legal register class for VT supports that SubIdx");
unsigned NewReg = MRI->createVirtualRegister(RC);
BuildMI(*MBB, InsertPos, DL, TII->get(TargetOpcode::COPY), NewReg)
@@ -515,7 +498,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
// classes.
unsigned SubIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
const TargetRegisterClass *TRC =
- TLI->getRegClassFor(Node->getSimpleValueType(0));
+ TLI->getRegClassFor(Node->getSimpleValueType(0), Node->isDivergent());
unsigned Reg;
MachineInstr *DefMI;
@@ -549,8 +532,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
if (TargetRegisterInfo::isVirtualRegister(Reg))
Reg = ConstrainForSubReg(Reg, SubIdx,
Node->getOperand(0).getSimpleValueType(),
- Node->getDebugLoc());
-
+ Node->isDivergent(), Node->getDebugLoc());
// Create the destreg if it is missing.
if (VRBase == 0)
VRBase = MRI->createVirtualRegister(TRC);
@@ -585,7 +567,8 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
//
// There is no constraint on the %src register class.
//
- const TargetRegisterClass *SRC = TLI->getRegClassFor(Node->getSimpleValueType(0));
+ const TargetRegisterClass *SRC =
+ TLI->getRegClassFor(Node->getSimpleValueType(0), Node->isDivergent());
SRC = TRI->getSubClassWithSubReg(SRC, SubIdx);
assert(SRC && "No register class supports VT and SubIdx for INSERT_SUBREG");
@@ -900,6 +883,9 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
if (Flags.hasExact())
MI->setFlag(MachineInstr::MIFlag::IsExact);
+
+ if (Flags.hasFPExcept())
+ MI->setFlag(MachineInstr::MIFlag::FPExcept);
}
// Emit all of the actual operands of this instruction, adding them to the
@@ -1007,14 +993,23 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
case ISD::TokenFactor: // fall thru
break;
case ISD::CopyToReg: {
- unsigned SrcReg;
+ unsigned DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
SDValue SrcVal = Node->getOperand(2);
+ if (TargetRegisterInfo::isVirtualRegister(DestReg) &&
+ SrcVal.isMachineOpcode() &&
+ SrcVal.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) {
+ // Instead building a COPY to that vreg destination, build an
+ // IMPLICIT_DEF instruction instead.
+ BuildMI(*MBB, InsertPos, Node->getDebugLoc(),
+ TII->get(TargetOpcode::IMPLICIT_DEF), DestReg);
+ break;
+ }
+ unsigned SrcReg;
if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(SrcVal))
SrcReg = R->getReg();
else
SrcReg = getVR(SrcVal, VRBaseMap);
- unsigned DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
if (SrcReg == DestReg) // Coalesced away the copy? Ignore.
break;
@@ -1049,14 +1044,18 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
break;
}
- case ISD::INLINEASM: {
+ case ISD::INLINEASM:
+ case ISD::INLINEASM_BR: {
unsigned NumOps = Node->getNumOperands();
if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
--NumOps; // Ignore the glue operand.
// Create the inline asm machine instruction.
- MachineInstrBuilder MIB = BuildMI(*MF, Node->getDebugLoc(),
- TII->get(TargetOpcode::INLINEASM));
+ unsigned TgtOpc = Node->getOpcode() == ISD::INLINEASM_BR
+ ? TargetOpcode::INLINEASM_BR
+ : TargetOpcode::INLINEASM;
+ MachineInstrBuilder MIB =
+ BuildMI(*MF, Node->getDebugLoc(), TII->get(TgtOpc));
// Add the asm string as an external symbol operand.
SDValue AsmStrV = Node->getOperand(InlineAsm::Op_AsmString);
@@ -1137,7 +1136,8 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
// then remove the early-clobber flag.
for (unsigned Reg : ECRegs) {
if (MIB->readsRegister(Reg, TRI)) {
- MachineOperand *MO = MIB->findRegisterDefOperand(Reg, false, TRI);
+ MachineOperand *MO =
+ MIB->findRegisterDefOperand(Reg, false, false, TRI);
assert(MO && "No def operand for clobbered register?");
MO->setIsEarlyClobber(false);
}
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.h b/lib/CodeGen/SelectionDAG/InstrEmitter.h
index 701b6368690b..cfe99dd977b5 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.h
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.h
@@ -1,9 +1,8 @@
//===- InstrEmitter.h - Emit MachineInstrs for the SelectionDAG -*- C++ -*--==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -43,11 +42,6 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter {
unsigned SrcReg,
DenseMap<SDValue, unsigned> &VRBaseMap);
- /// getDstOfCopyToRegUse - If the only use of the specified result number of
- /// node is a CopyToReg, return its destination register. Return 0 otherwise.
- unsigned getDstOfOnlyCopyToRegUse(SDNode *Node,
- unsigned ResNo) const;
-
void CreateVirtualRegisters(SDNode *Node,
MachineInstrBuilder &MIB,
const MCInstrDesc &II,
@@ -84,7 +78,7 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter {
/// supports SubIdx sub-registers. Emit a copy if that isn't possible.
/// Return the virtual register to use.
unsigned ConstrainForSubReg(unsigned VReg, unsigned SubIdx, MVT VT,
- const DebugLoc &DL);
+ bool isDivergent, const DebugLoc &DL);
/// EmitSubregNode - Generate machine code for subreg nodes.
///
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index d3aea37f944d..bf817f00f83d 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -1,9 +1,8 @@
//===- LegalizeDAG.cpp - Implement SelectionDAG::Legalize -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -137,8 +136,6 @@ private:
bool &NeedInvert, const SDLoc &dl);
SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned);
- SDValue ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, const SDValue *Ops,
- unsigned NumOps, bool isSigned, const SDLoc &dl);
std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC,
SDNode *Node, bool isSigned);
@@ -152,11 +149,17 @@ private:
RTLIB::Libcall Call_I32,
RTLIB::Libcall Call_I64,
RTLIB::Libcall Call_I128);
+ SDValue ExpandArgFPLibCall(SDNode *Node,
+ RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64,
+ RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128,
+ RTLIB::Libcall Call_PPCF128);
void ExpandDivRemLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results);
void ExpandSinCosLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results);
SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT,
const SDLoc &dl);
+ SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT,
+ const SDLoc &dl, SDValue ChainIn);
SDValue ExpandBUILD_VECTOR(SDNode *Node);
SDValue ExpandSCALAR_TO_VECTOR(SDNode *Node);
void ExpandDYNAMIC_STACKALLOC(SDNode *Node,
@@ -489,10 +492,9 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
// If this is an unaligned store and the target doesn't support it,
// expand it.
EVT MemVT = ST->getMemoryVT();
- unsigned AS = ST->getAddressSpace();
- unsigned Align = ST->getAlignment();
const DataLayout &DL = DAG.getDataLayout();
- if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) {
+ if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT,
+ *ST->getMemOperand())) {
LLVM_DEBUG(dbgs() << "Expanding unsupported unaligned store\n");
SDValue Result = TLI.expandUnalignedStore(ST, DAG);
ReplaceNode(SDValue(ST, 0), Result);
@@ -542,7 +544,9 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
} else if (StWidth & (StWidth - 1)) {
// If not storing a power-of-2 number of bits, expand as two stores.
assert(!StVT.isVector() && "Unsupported truncstore!");
- unsigned RoundWidth = 1 << Log2_32(StWidth);
+ unsigned LogStWidth = Log2_32(StWidth);
+ assert(LogStWidth < 32);
+ unsigned RoundWidth = 1 << LogStWidth;
assert(RoundWidth < StWidth);
unsigned ExtraWidth = StWidth - RoundWidth;
assert(ExtraWidth < RoundWidth);
@@ -602,11 +606,10 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
default: llvm_unreachable("This action is not supported yet!");
case TargetLowering::Legal: {
EVT MemVT = ST->getMemoryVT();
- unsigned AS = ST->getAddressSpace();
- unsigned Align = ST->getAlignment();
// If this is an unaligned store and the target doesn't support it,
// expand it.
- if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) {
+ if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT,
+ *ST->getMemOperand())) {
SDValue Result = TLI.expandUnalignedStore(ST, DAG);
ReplaceNode(SDValue(ST, 0), Result);
}
@@ -663,13 +666,12 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
default: llvm_unreachable("This action is not supported yet!");
case TargetLowering::Legal: {
EVT MemVT = LD->getMemoryVT();
- unsigned AS = LD->getAddressSpace();
- unsigned Align = LD->getAlignment();
const DataLayout &DL = DAG.getDataLayout();
// If this is an unaligned load and the target doesn't support it,
// expand it.
- if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) {
- std::tie(RVal, RChain) = TLI.expandUnalignedLoad(LD, DAG);
+ if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT,
+ *LD->getMemOperand())) {
+ std::tie(RVal, RChain) = TLI.expandUnalignedLoad(LD, DAG);
}
break;
}
@@ -756,7 +758,9 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
} else if (SrcWidth & (SrcWidth - 1)) {
// If not loading a power-of-2 number of bits, expand as two loads.
assert(!SrcVT.isVector() && "Unsupported extload!");
- unsigned RoundWidth = 1 << Log2_32(SrcWidth);
+ unsigned LogSrcWidth = Log2_32(SrcWidth);
+ assert(LogSrcWidth < 32);
+ unsigned RoundWidth = 1 << LogSrcWidth;
assert(RoundWidth < SrcWidth);
unsigned ExtraWidth = SrcWidth - RoundWidth;
assert(ExtraWidth < RoundWidth);
@@ -853,10 +857,9 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
// If this is an unaligned load and the target doesn't support it,
// expand it.
EVT MemVT = LD->getMemoryVT();
- unsigned AS = LD->getAddressSpace();
- unsigned Align = LD->getAlignment();
const DataLayout &DL = DAG.getDataLayout();
- if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) {
+ if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT,
+ *LD->getMemOperand())) {
std::tie(Value, Chain) = TLI.expandUnalignedLoad(LD, DAG);
}
}
@@ -994,6 +997,10 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
case ISD::EXTRACT_VECTOR_ELT:
+ case ISD::LROUND:
+ case ISD::LLROUND:
+ case ISD::LRINT:
+ case ISD::LLRINT:
Action = TLI.getOperationAction(Node->getOpcode(),
Node->getOperand(0).getValueType());
break;
@@ -1114,6 +1121,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
case ISD::STRICT_FFLOOR:
case ISD::STRICT_FROUND:
case ISD::STRICT_FTRUNC:
+ case ISD::STRICT_FP_ROUND:
+ case ISD::STRICT_FP_EXTEND:
// These pseudo-ops get legalized as if they were their non-strict
// equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT
// is also legal, but if ISD::FSQRT requires expansion then so does
@@ -1128,7 +1137,9 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
break;
}
- case ISD::SMULFIX: {
+ case ISD::SMULFIX:
+ case ISD::SMULFIXSAT:
+ case ISD::UMULFIX: {
unsigned Scale = Node->getConstantOperandVal(2);
Action = TLI.getFixedPointOperationAction(Node->getOpcode(),
Node->getValueType(0), Scale);
@@ -1142,6 +1153,22 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
Action = TLI.getOperationAction(Node->getOpcode(),
cast<MaskedStoreSDNode>(Node)->getValue().getValueType());
break;
+ case ISD::VECREDUCE_FADD:
+ case ISD::VECREDUCE_FMUL:
+ case ISD::VECREDUCE_ADD:
+ case ISD::VECREDUCE_MUL:
+ case ISD::VECREDUCE_AND:
+ case ISD::VECREDUCE_OR:
+ case ISD::VECREDUCE_XOR:
+ case ISD::VECREDUCE_SMAX:
+ case ISD::VECREDUCE_SMIN:
+ case ISD::VECREDUCE_UMAX:
+ case ISD::VECREDUCE_UMIN:
+ case ISD::VECREDUCE_FMAX:
+ case ISD::VECREDUCE_FMIN:
+ Action = TLI.getOperationAction(
+ Node->getOpcode(), Node->getOperand(0).getValueType());
+ break;
default:
if (Node->getOpcode() >= ISD::BUILTIN_OP_END) {
Action = TargetLowering::Legal;
@@ -1386,6 +1413,7 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
// Emit a store of each element to the stack slot.
SmallVector<SDValue, 8> Stores;
unsigned TypeByteSize = EltVT.getSizeInBits() / 8;
+ assert(TypeByteSize > 0 && "Vector element type too small for stack store!");
// Store (in the right endianness) the elements to memory.
for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
// Ignore undef elements.
@@ -1723,6 +1751,12 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS,
/// The resultant code need not be legal.
SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, EVT SlotVT,
EVT DestVT, const SDLoc &dl) {
+ return EmitStackConvert(SrcOp, SlotVT, DestVT, dl, DAG.getEntryNode());
+}
+
+SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, EVT SlotVT,
+ EVT DestVT, const SDLoc &dl,
+ SDValue Chain) {
// Create the stack frame object.
unsigned SrcAlign = DAG.getDataLayout().getPrefTypeAlignment(
SrcOp.getValueType().getTypeForEVT(*DAG.getContext()));
@@ -1743,19 +1777,19 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, EVT SlotVT,
// later than DestVT.
SDValue Store;
- if (SrcSize > SlotSize)
- Store = DAG.getTruncStore(DAG.getEntryNode(), dl, SrcOp, FIPtr, PtrInfo,
+ if (SrcSize > SlotSize)
+ Store = DAG.getTruncStore(Chain, dl, SrcOp, FIPtr, PtrInfo,
SlotVT, SrcAlign);
else {
assert(SrcSize == SlotSize && "Invalid store");
Store =
- DAG.getStore(DAG.getEntryNode(), dl, SrcOp, FIPtr, PtrInfo, SrcAlign);
+ DAG.getStore(Chain, dl, SrcOp, FIPtr, PtrInfo, SrcAlign);
}
// Result is a load from the stack slot.
if (SlotSize == DestSize)
return DAG.getLoad(DestVT, dl, Store, FIPtr, PtrInfo, DestAlign);
-
+
assert(SlotSize < DestSize && "Unknown extension!");
return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr, PtrInfo, SlotVT,
DestAlign);
@@ -2049,41 +2083,6 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
return CallInfo.first;
}
-/// Generate a libcall taking the given operands as arguments
-/// and returning a result of type RetVT.
-SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT,
- const SDValue *Ops, unsigned NumOps,
- bool isSigned, const SDLoc &dl) {
- TargetLowering::ArgListTy Args;
- Args.reserve(NumOps);
-
- TargetLowering::ArgListEntry Entry;
- for (unsigned i = 0; i != NumOps; ++i) {
- Entry.Node = Ops[i];
- Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
- Entry.IsSExt = isSigned;
- Entry.IsZExt = !isSigned;
- Args.push_back(Entry);
- }
- SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
- TLI.getPointerTy(DAG.getDataLayout()));
-
- Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
-
- TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(dl)
- .setChain(DAG.getEntryNode())
- .setLibCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee,
- std::move(Args))
- .setSExtResult(isSigned)
- .setZExtResult(!isSigned)
- .setIsPostTypeLegalization(true);
-
- std::pair<SDValue,SDValue> CallInfo = TLI.LowerCallTo(CLI);
-
- return CallInfo.first;
-}
-
// Expand a node into a call to a libcall. Similar to
// ExpandLibCall except that the first operand is the in-chain.
std::pair<SDValue, SDValue>
@@ -2160,6 +2159,27 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
return ExpandLibCall(LC, Node, isSigned);
}
+/// Expand the node to a libcall based on first argument type (for instance
+/// lround and its variant).
+SDValue SelectionDAGLegalize::ExpandArgFPLibCall(SDNode* Node,
+ RTLIB::Libcall Call_F32,
+ RTLIB::Libcall Call_F64,
+ RTLIB::Libcall Call_F80,
+ RTLIB::Libcall Call_F128,
+ RTLIB::Libcall Call_PPCF128) {
+ RTLIB::Libcall LC;
+ switch (Node->getOperand(0).getValueType().getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unexpected request for libcall!");
+ case MVT::f32: LC = Call_F32; break;
+ case MVT::f64: LC = Call_F64; break;
+ case MVT::f80: LC = Call_F80; break;
+ case MVT::f128: LC = Call_F128; break;
+ case MVT::ppcf128: LC = Call_PPCF128; break;
+ }
+
+ return ExpandLibCall(LC, Node, false);
+}
+
/// Issue libcalls to __{u}divmod to compute div / rem pairs.
void
SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
@@ -2530,16 +2550,12 @@ SDValue SelectionDAGLegalize::ExpandBITREVERSE(SDValue Op, const SDLoc &dl) {
// TODO: We can easily support i4/i2 legal types if any target ever does.
if (Sz >= 8 && isPowerOf2_32(Sz)) {
// Create the masks - repeating the pattern every byte.
- APInt MaskHi4(Sz, 0), MaskHi2(Sz, 0), MaskHi1(Sz, 0);
- APInt MaskLo4(Sz, 0), MaskLo2(Sz, 0), MaskLo1(Sz, 0);
- for (unsigned J = 0; J != Sz; J += 8) {
- MaskHi4 = MaskHi4 | (0xF0ull << J);
- MaskLo4 = MaskLo4 | (0x0Full << J);
- MaskHi2 = MaskHi2 | (0xCCull << J);
- MaskLo2 = MaskLo2 | (0x33ull << J);
- MaskHi1 = MaskHi1 | (0xAAull << J);
- MaskLo1 = MaskLo1 | (0x55ull << J);
- }
+ APInt MaskHi4 = APInt::getSplat(Sz, APInt(8, 0xF0));
+ APInt MaskHi2 = APInt::getSplat(Sz, APInt(8, 0xCC));
+ APInt MaskHi1 = APInt::getSplat(Sz, APInt(8, 0xAA));
+ APInt MaskLo4 = APInt::getSplat(Sz, APInt(8, 0x0F));
+ APInt MaskLo2 = APInt::getSplat(Sz, APInt(8, 0x33));
+ APInt MaskLo1 = APInt::getSplat(Sz, APInt(8, 0x55));
// BSWAP if the type is wider than a single byte.
Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT, Op) : Op);
@@ -2593,9 +2609,8 @@ SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, const SDLoc &dl) {
switch (VT.getSimpleVT().getScalarType().SimpleTy) {
default: llvm_unreachable("Unhandled Expand type in BSWAP!");
case MVT::i16:
- Tmp2 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
- Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
- return DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
+ // Use a rotate by 8. This can be further expanded if necessary.
+ return DAG.getNode(ISD::ROTL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
case MVT::i32:
Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
@@ -2799,12 +2814,27 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
}
break;
}
+ case ISD::STRICT_FP_ROUND:
+ Tmp1 = EmitStackConvert(Node->getOperand(1),
+ Node->getValueType(0),
+ Node->getValueType(0), dl, Node->getOperand(0));
+ ReplaceNode(Node, Tmp1.getNode());
+ LLVM_DEBUG(dbgs() << "Successfully expanded STRICT_FP_ROUND node\n");
+ return true;
case ISD::FP_ROUND:
case ISD::BITCAST:
- Tmp1 = EmitStackConvert(Node->getOperand(0), Node->getValueType(0),
+ Tmp1 = EmitStackConvert(Node->getOperand(0),
+ Node->getValueType(0),
Node->getValueType(0), dl);
Results.push_back(Tmp1);
break;
+ case ISD::STRICT_FP_EXTEND:
+ Tmp1 = EmitStackConvert(Node->getOperand(1),
+ Node->getOperand(1).getValueType(),
+ Node->getValueType(0), dl, Node->getOperand(0));
+ ReplaceNode(Node, Tmp1.getNode());
+ LLVM_DEBUG(dbgs() << "Successfully expanded STRICT_FP_EXTEND node\n");
+ return true;
case ISD::FP_EXTEND:
Tmp1 = EmitStackConvert(Node->getOperand(0),
Node->getOperand(0).getValueType(),
@@ -2875,6 +2905,30 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
if (TLI.expandFP_TO_UINT(Node, Tmp1, DAG))
Results.push_back(Tmp1);
break;
+ case ISD::LROUND:
+ Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LROUND_F32,
+ RTLIB::LROUND_F64, RTLIB::LROUND_F80,
+ RTLIB::LROUND_F128,
+ RTLIB::LROUND_PPCF128));
+ break;
+ case ISD::LLROUND:
+ Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LLROUND_F32,
+ RTLIB::LLROUND_F64, RTLIB::LLROUND_F80,
+ RTLIB::LLROUND_F128,
+ RTLIB::LLROUND_PPCF128));
+ break;
+ case ISD::LRINT:
+ Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LRINT_F32,
+ RTLIB::LRINT_F64, RTLIB::LRINT_F80,
+ RTLIB::LRINT_F128,
+ RTLIB::LRINT_PPCF128));
+ break;
+ case ISD::LLRINT:
+ Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LLRINT_F32,
+ RTLIB::LLRINT_F64, RTLIB::LLRINT_F80,
+ RTLIB::LLRINT_F128,
+ RTLIB::LLRINT_PPCF128));
+ break;
case ISD::VAARG:
Results.push_back(DAG.expandVAArg(Node));
Results.push_back(Results[0].getValue(1));
@@ -3117,7 +3171,8 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Node);
// Check to see if this FP immediate is already legal.
// If this is a legal constant, turn it into a TargetConstantFP node.
- if (!TLI.isFPImmLegal(CFP->getValueAPF(), Node->getValueType(0)))
+ if (!TLI.isFPImmLegal(CFP->getValueAPF(), Node->getValueType(0),
+ DAG.getMachineFunction().getFunction().hasOptSize()))
Results.push_back(ExpandConstantFP(CFP, true));
break;
}
@@ -3291,176 +3346,75 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Results.push_back(TLI.expandAddSubSat(Node, DAG));
break;
case ISD::SMULFIX:
- Results.push_back(TLI.getExpandedFixedPointMultiplication(Node, DAG));
+ case ISD::SMULFIXSAT:
+ case ISD::UMULFIX:
+ Results.push_back(TLI.expandFixedPointMul(Node, DAG));
break;
- case ISD::SADDO:
- case ISD::SSUBO: {
+ case ISD::ADDCARRY:
+ case ISD::SUBCARRY: {
SDValue LHS = Node->getOperand(0);
SDValue RHS = Node->getOperand(1);
- SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::SADDO ?
- ISD::ADD : ISD::SUB, dl, LHS.getValueType(),
- LHS, RHS);
- Results.push_back(Sum);
- EVT ResultType = Node->getValueType(1);
- EVT OType = getSetCCResultType(Node->getValueType(0));
-
- SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
-
- // LHSSign -> LHS >= 0
- // RHSSign -> RHS >= 0
- // SumSign -> Sum >= 0
- //
- // Add:
- // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
- // Sub:
- // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
- SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE);
- SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE);
- SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign,
- Node->getOpcode() == ISD::SADDO ?
- ISD::SETEQ : ISD::SETNE);
-
- SDValue SumSign = DAG.getSetCC(dl, OType, Sum, Zero, ISD::SETGE);
- SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE);
-
- SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE);
- Results.push_back(DAG.getBoolExtOrTrunc(Cmp, dl, ResultType, ResultType));
- break;
- }
- case ISD::UADDO:
- case ISD::USUBO: {
- SDValue LHS = Node->getOperand(0);
- SDValue RHS = Node->getOperand(1);
- bool IsAdd = Node->getOpcode() == ISD::UADDO;
- // If ADD/SUBCARRY is legal, use that instead.
- unsigned OpcCarry = IsAdd ? ISD::ADDCARRY : ISD::SUBCARRY;
- if (TLI.isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) {
- SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1));
- SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(),
- { LHS, RHS, CarryIn });
- Results.push_back(SDValue(NodeCarry.getNode(), 0));
- Results.push_back(SDValue(NodeCarry.getNode(), 1));
- break;
- }
+ SDValue Carry = Node->getOperand(2);
+
+ bool IsAdd = Node->getOpcode() == ISD::ADDCARRY;
- SDValue Sum = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
- LHS.getValueType(), LHS, RHS);
- Results.push_back(Sum);
+ // Initial add of the 2 operands.
+ unsigned Op = IsAdd ? ISD::ADD : ISD::SUB;
+ EVT VT = LHS.getValueType();
+ SDValue Sum = DAG.getNode(Op, dl, VT, LHS, RHS);
- EVT ResultType = Node->getValueType(1);
+ // Initial check for overflow.
+ EVT CarryType = Node->getValueType(1);
EVT SetCCType = getSetCCResultType(Node->getValueType(0));
ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
- SDValue SetCC = DAG.getSetCC(dl, SetCCType, Sum, LHS, CC);
-
- Results.push_back(DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType));
+ SDValue Overflow = DAG.getSetCC(dl, SetCCType, Sum, LHS, CC);
+
+ // Add of the sum and the carry.
+ SDValue CarryExt =
+ DAG.getZeroExtendInReg(DAG.getZExtOrTrunc(Carry, dl, VT), dl, MVT::i1);
+ SDValue Sum2 = DAG.getNode(Op, dl, VT, Sum, CarryExt);
+
+ // Second check for overflow. If we are adding, we can only overflow if the
+ // initial sum is all 1s ang the carry is set, resulting in a new sum of 0.
+ // If we are subtracting, we can only overflow if the initial sum is 0 and
+ // the carry is set, resulting in a new sum of all 1s.
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+ SDValue Overflow2 =
+ IsAdd ? DAG.getSetCC(dl, SetCCType, Sum2, Zero, ISD::SETEQ)
+ : DAG.getSetCC(dl, SetCCType, Sum, Zero, ISD::SETEQ);
+ Overflow2 = DAG.getNode(ISD::AND, dl, SetCCType, Overflow2,
+ DAG.getZExtOrTrunc(Carry, dl, SetCCType));
+
+ SDValue ResultCarry =
+ DAG.getNode(ISD::OR, dl, SetCCType, Overflow, Overflow2);
+
+ Results.push_back(Sum2);
+ Results.push_back(DAG.getBoolExtOrTrunc(ResultCarry, dl, CarryType, VT));
+ break;
+ }
+ case ISD::SADDO:
+ case ISD::SSUBO: {
+ SDValue Result, Overflow;
+ TLI.expandSADDSUBO(Node, Result, Overflow, DAG);
+ Results.push_back(Result);
+ Results.push_back(Overflow);
+ break;
+ }
+ case ISD::UADDO:
+ case ISD::USUBO: {
+ SDValue Result, Overflow;
+ TLI.expandUADDSUBO(Node, Result, Overflow, DAG);
+ Results.push_back(Result);
+ Results.push_back(Overflow);
break;
}
case ISD::UMULO:
case ISD::SMULO: {
- EVT VT = Node->getValueType(0);
- EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
- SDValue LHS = Node->getOperand(0);
- SDValue RHS = Node->getOperand(1);
- SDValue BottomHalf;
- SDValue TopHalf;
- static const unsigned Ops[2][3] =
- { { ISD::MULHU, ISD::UMUL_LOHI, ISD::ZERO_EXTEND },
- { ISD::MULHS, ISD::SMUL_LOHI, ISD::SIGN_EXTEND }};
- bool isSigned = Node->getOpcode() == ISD::SMULO;
- if (TLI.isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
- BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
- TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS);
- } else if (TLI.isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
- BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
- RHS);
- TopHalf = BottomHalf.getValue(1);
- } else if (TLI.isTypeLegal(WideVT)) {
- LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
- RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
- Tmp1 = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
- BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Tmp1,
- DAG.getIntPtrConstant(0, dl));
- TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Tmp1,
- DAG.getIntPtrConstant(1, dl));
- } else {
- // We can fall back to a libcall with an illegal type for the MUL if we
- // have a libcall big enough.
- // Also, we can fall back to a division in some cases, but that's a big
- // performance hit in the general case.
- RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
- if (WideVT == MVT::i16)
- LC = RTLIB::MUL_I16;
- else if (WideVT == MVT::i32)
- LC = RTLIB::MUL_I32;
- else if (WideVT == MVT::i64)
- LC = RTLIB::MUL_I64;
- else if (WideVT == MVT::i128)
- LC = RTLIB::MUL_I128;
- assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!");
-
- SDValue HiLHS;
- SDValue HiRHS;
- if (isSigned) {
- // The high part is obtained by SRA'ing all but one of the bits of low
- // part.
- unsigned LoSize = VT.getSizeInBits();
- HiLHS =
- DAG.getNode(ISD::SRA, dl, VT, LHS,
- DAG.getConstant(LoSize - 1, dl,
- TLI.getPointerTy(DAG.getDataLayout())));
- HiRHS =
- DAG.getNode(ISD::SRA, dl, VT, RHS,
- DAG.getConstant(LoSize - 1, dl,
- TLI.getPointerTy(DAG.getDataLayout())));
- } else {
- HiLHS = DAG.getConstant(0, dl, VT);
- HiRHS = DAG.getConstant(0, dl, VT);
- }
-
- // Here we're passing the 2 arguments explicitly as 4 arguments that are
- // pre-lowered to the correct types. This all depends upon WideVT not
- // being a legal type for the architecture and thus has to be split to
- // two arguments.
- SDValue Ret;
- if(DAG.getDataLayout().isLittleEndian()) {
- // Halves of WideVT are packed into registers in different order
- // depending on platform endianness. This is usually handled by
- // the C calling convention, but we can't defer to it in
- // the legalizer.
- SDValue Args[] = { LHS, HiLHS, RHS, HiRHS };
- Ret = ExpandLibCall(LC, WideVT, Args, 4, isSigned, dl);
- } else {
- SDValue Args[] = { HiLHS, LHS, HiRHS, RHS };
- Ret = ExpandLibCall(LC, WideVT, Args, 4, isSigned, dl);
- }
- assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
- "Ret value is a collection of constituent nodes holding result.");
- BottomHalf = Ret.getOperand(0);
- TopHalf = Ret.getOperand(1);
+ SDValue Result, Overflow;
+ if (TLI.expandMULO(Node, Result, Overflow, DAG)) {
+ Results.push_back(Result);
+ Results.push_back(Overflow);
}
-
- if (isSigned) {
- Tmp1 = DAG.getConstant(
- VT.getSizeInBits() - 1, dl,
- TLI.getShiftAmountTy(BottomHalf.getValueType(), DAG.getDataLayout()));
- Tmp1 = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, Tmp1);
- TopHalf = DAG.getSetCC(dl, getSetCCResultType(VT), TopHalf, Tmp1,
- ISD::SETNE);
- } else {
- TopHalf = DAG.getSetCC(dl, getSetCCResultType(VT), TopHalf,
- DAG.getConstant(0, dl, VT), ISD::SETNE);
- }
-
- // Truncate the result if SetCC returns a larger type than needed.
- EVT RType = Node->getValueType(1);
- if (RType.getSizeInBits() < TopHalf.getValueSizeInBits())
- TopHalf = DAG.getNode(ISD::TRUNCATE, dl, RType, TopHalf);
-
- assert(RType.getSizeInBits() == TopHalf.getValueSizeInBits() &&
- "Unexpected result type for S/UMULO legalization");
-
- Results.push_back(BottomHalf);
- Results.push_back(TopHalf);
break;
}
case ISD::BUILD_PAIR: {
@@ -3487,6 +3441,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
DAG.getConstant(0, dl, Tmp1.getValueType()),
Tmp2, Tmp3, ISD::SETNE);
}
+ Tmp1->setFlags(Node->getFlags());
Results.push_back(Tmp1);
break;
case ISD::BR_JT: {
@@ -3570,7 +3525,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
// condition code, create a new SETCC node.
if (Tmp3.getNode())
Tmp1 = DAG.getNode(ISD::SETCC, dl, Node->getValueType(0),
- Tmp1, Tmp2, Tmp3);
+ Tmp1, Tmp2, Tmp3, Node->getFlags());
// If we expanded the SETCC by inverting the condition code, then wrap
// the existing SETCC in a NOT to restore the intended condition.
@@ -3598,6 +3553,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
DAG.getConstant(TrueValue, dl, VT),
DAG.getConstant(0, dl, VT),
Tmp3);
+ Tmp1->setFlags(Node->getFlags());
Results.push_back(Tmp1);
break;
}
@@ -3617,9 +3573,8 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
assert(!TLI.isOperationExpand(ISD::SELECT, VT) &&
"Cannot expand ISD::SELECT_CC when ISD::SELECT also needs to be "
"expanded.");
- EVT CCVT =
- TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT);
- SDValue Cond = DAG.getNode(ISD::SETCC, dl, CCVT, Tmp1, Tmp2, CC);
+ EVT CCVT = getSetCCResultType(CmpVT);
+ SDValue Cond = DAG.getNode(ISD::SETCC, dl, CCVT, Tmp1, Tmp2, CC, Node->getFlags());
Results.push_back(DAG.getSelect(dl, VT, Cond, Tmp3, Tmp4));
break;
}
@@ -3635,6 +3590,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
// Use the new condition code and swap true and false
Legalized = true;
Tmp1 = DAG.getSelectCC(dl, Tmp1, Tmp2, Tmp4, Tmp3, InvCC);
+ Tmp1->setFlags(Node->getFlags());
} else {
// If The inverse is not legal, then try to swap the arguments using
// the inverse condition code.
@@ -3644,6 +3600,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
// lhs and rhs.
Legalized = true;
Tmp1 = DAG.getSelectCC(dl, Tmp2, Tmp1, Tmp4, Tmp3, SwapInvCC);
+ Tmp1->setFlags(Node->getFlags());
}
}
@@ -3670,6 +3627,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1,
Tmp2, Tmp3, Tmp4, CC);
}
+ Tmp1->setFlags(Node->getFlags());
}
Results.push_back(Tmp1);
break;
@@ -3729,6 +3687,21 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
ReplaceNode(SDValue(Node, 0), Result);
break;
}
+ case ISD::VECREDUCE_FADD:
+ case ISD::VECREDUCE_FMUL:
+ case ISD::VECREDUCE_ADD:
+ case ISD::VECREDUCE_MUL:
+ case ISD::VECREDUCE_AND:
+ case ISD::VECREDUCE_OR:
+ case ISD::VECREDUCE_XOR:
+ case ISD::VECREDUCE_SMAX:
+ case ISD::VECREDUCE_SMIN:
+ case ISD::VECREDUCE_UMAX:
+ case ISD::VECREDUCE_UMIN:
+ case ISD::VECREDUCE_FMAX:
+ case ISD::VECREDUCE_FMIN:
+ Results.push_back(TLI.expandVecReduce(Node, DAG));
+ break;
case ISD::GLOBAL_OFFSET_TABLE:
case ISD::GlobalAddress:
case ISD::GlobalTLSAddress:
@@ -4273,6 +4246,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
Tmp3 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(2));
// Perform the larger operation, then round down.
Tmp1 = DAG.getSelect(dl, NVT, Tmp1, Tmp2, Tmp3);
+ Tmp1->setFlags(Node->getFlags());
if (TruncOp != ISD::FP_ROUND)
Tmp1 = DAG.getNode(TruncOp, dl, Node->getValueType(0), Tmp1);
else
@@ -4303,8 +4277,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
}
Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(0));
Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1));
- Results.push_back(DAG.getNode(ISD::SETCC, dl, Node->getValueType(0),
- Tmp1, Tmp2, Node->getOperand(2)));
+ Results.push_back(DAG.getNode(ISD::SETCC, dl, Node->getValueType(0), Tmp1,
+ Tmp2, Node->getOperand(2), Node->getFlags()));
break;
}
case ISD::BR_CC: {
@@ -4532,6 +4506,24 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
Results.push_back(CvtVec);
break;
}
+ case ISD::ATOMIC_SWAP: {
+ AtomicSDNode *AM = cast<AtomicSDNode>(Node);
+ SDLoc SL(Node);
+ SDValue CastVal = DAG.getNode(ISD::BITCAST, SL, NVT, AM->getVal());
+ assert(NVT.getSizeInBits() == OVT.getSizeInBits() &&
+ "unexpected promotion type");
+ assert(AM->getMemoryVT().getSizeInBits() == NVT.getSizeInBits() &&
+ "unexpected atomic_swap with illegal type");
+
+ SDValue NewAtomic
+ = DAG.getAtomic(ISD::ATOMIC_SWAP, SL, NVT,
+ DAG.getVTList(NVT, MVT::Other),
+ { AM->getChain(), AM->getBasePtr(), CastVal },
+ AM->getMemOperand());
+ Results.push_back(DAG.getNode(ISD::BITCAST, SL, OVT, NewAtomic));
+ Results.push_back(NewAtomic.getValue(1));
+ break;
+ }
}
// Replace the original node with the legalized result.
diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 4644e9588e7b..b4849b2881e6 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -1,9 +1,8 @@
//===-------- LegalizeFloatTypes.cpp - Legalization of float types --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -104,6 +103,7 @@ bool DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
case ISD::FSUB: R = SoftenFloatRes_FSUB(N); break;
case ISD::FTRUNC: R = SoftenFloatRes_FTRUNC(N); break;
case ISD::LOAD: R = SoftenFloatRes_LOAD(N, ResNo); break;
+ case ISD::ATOMIC_SWAP: R = BitcastToInt_ATOMIC_SWAP(N); break;
case ISD::SELECT: R = SoftenFloatRes_SELECT(N, ResNo); break;
case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N, ResNo); break;
case ISD::SINT_TO_FP:
@@ -440,6 +440,15 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N, unsigned ResNo) {
return SDValue(N, ResNo);
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDLoc dl(N);
+
+ EVT FloatVT = N->getValueType(ResNo);
+ if (FloatVT == MVT::f32 || FloatVT == MVT::f64 || FloatVT == MVT::f128) {
+ // Expand Y = FNEG(X) -> Y = X ^ sign mask
+ APInt SignMask = APInt::getSignMask(NVT.getSizeInBits());
+ return DAG.getNode(ISD::XOR, dl, NVT, GetSoftenedFloat(N->getOperand(0)),
+ DAG.getConstant(SignMask, dl, NVT));
+ }
+
// Expand Y = FNEG(X) -> Y = SUB -0.0, X
SDValue Ops[2] = { DAG.getConstantFP(-0.0, dl, N->getValueType(0)),
GetSoftenedFloat(N->getOperand(0)) };
@@ -763,6 +772,10 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break;
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT: Res = SoftenFloatOp_FP_TO_XINT(N); break;
+ case ISD::LROUND: Res = SoftenFloatOp_LROUND(N); break;
+ case ISD::LLROUND: Res = SoftenFloatOp_LLROUND(N); break;
+ case ISD::LRINT: Res = SoftenFloatOp_LRINT(N); break;
+ case ISD::LLRINT: Res = SoftenFloatOp_LLRINT(N); break;
case ISD::SELECT: Res = SoftenFloatOp_SELECT(N); break;
case ISD::SELECT_CC: Res = SoftenFloatOp_SELECT_CC(N); break;
case ISD::SETCC: Res = SoftenFloatOp_SETCC(N); break;
@@ -1029,6 +1042,61 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) {
ST->getMemOperand());
}
+SDValue DAGTypeLegalizer::SoftenFloatOp_LROUND(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+ return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
+ RTLIB::LROUND_F32,
+ RTLIB::LROUND_F64,
+ RTLIB::LROUND_F80,
+ RTLIB::LROUND_F128,
+ RTLIB::LROUND_PPCF128),
+ NVT, Op, false, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_LLROUND(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+ return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
+ RTLIB::LLROUND_F32,
+ RTLIB::LLROUND_F64,
+ RTLIB::LLROUND_F80,
+ RTLIB::LLROUND_F128,
+ RTLIB::LLROUND_PPCF128),
+ NVT, Op, false, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_LRINT(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+ return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
+ RTLIB::LRINT_F32,
+ RTLIB::LRINT_F64,
+ RTLIB::LRINT_F80,
+ RTLIB::LRINT_F128,
+ RTLIB::LRINT_PPCF128),
+ NVT, Op, false, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_LLRINT(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+ return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
+ RTLIB::LLRINT_F32,
+ RTLIB::LLRINT_F64,
+ RTLIB::LLRINT_F80,
+ RTLIB::LLRINT_F128,
+ RTLIB::LLRINT_PPCF128),
+ NVT, Op, false, SDLoc(N)).first;
+}
//===----------------------------------------------------------------------===//
// Float Result Expansion
@@ -1562,6 +1630,10 @@ bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) {
case ISD::FP_ROUND: Res = ExpandFloatOp_FP_ROUND(N); break;
case ISD::FP_TO_SINT: Res = ExpandFloatOp_FP_TO_SINT(N); break;
case ISD::FP_TO_UINT: Res = ExpandFloatOp_FP_TO_UINT(N); break;
+ case ISD::LROUND: Res = ExpandFloatOp_LROUND(N); break;
+ case ISD::LLROUND: Res = ExpandFloatOp_LLROUND(N); break;
+ case ISD::LRINT: Res = ExpandFloatOp_LRINT(N); break;
+ case ISD::LLRINT: Res = ExpandFloatOp_LLRINT(N); break;
case ISD::SELECT_CC: Res = ExpandFloatOp_SELECT_CC(N); break;
case ISD::SETCC: Res = ExpandFloatOp_SETCC(N); break;
case ISD::STORE: Res = ExpandFloatOp_STORE(cast<StoreSDNode>(N),
@@ -1732,6 +1804,54 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) {
ST->getMemoryVT(), ST->getMemOperand());
}
+SDValue DAGTypeLegalizer::ExpandFloatOp_LROUND(SDNode *N) {
+ EVT RVT = N->getValueType(0);
+ EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+ return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
+ RTLIB::LROUND_F32,
+ RTLIB::LROUND_F64,
+ RTLIB::LROUND_F80,
+ RTLIB::LROUND_F128,
+ RTLIB::LROUND_PPCF128),
+ RVT, N->getOperand(0), false, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_LLROUND(SDNode *N) {
+ EVT RVT = N->getValueType(0);
+ EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+ return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
+ RTLIB::LLROUND_F32,
+ RTLIB::LLROUND_F64,
+ RTLIB::LLROUND_F80,
+ RTLIB::LLROUND_F128,
+ RTLIB::LLROUND_PPCF128),
+ RVT, N->getOperand(0), false, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_LRINT(SDNode *N) {
+ EVT RVT = N->getValueType(0);
+ EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+ return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
+ RTLIB::LRINT_F32,
+ RTLIB::LRINT_F64,
+ RTLIB::LRINT_F80,
+ RTLIB::LRINT_F128,
+ RTLIB::LRINT_PPCF128),
+ RVT, N->getOperand(0), false, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_LLRINT(SDNode *N) {
+ EVT RVT = N->getValueType(0);
+ EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+ return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
+ RTLIB::LLRINT_F32,
+ RTLIB::LLRINT_F64,
+ RTLIB::LLRINT_F80,
+ RTLIB::LLRINT_F128,
+ RTLIB::LLRINT_PPCF128),
+ RVT, N->getOperand(0), false, SDLoc(N)).first;
+}
+
//===----------------------------------------------------------------------===//
// Float Operand Promotion
//===----------------------------------------------------------------------===//
@@ -1748,6 +1868,8 @@ static ISD::NodeType GetPromotionOpcode(EVT OpVT, EVT RetVT) {
}
bool DAGTypeLegalizer::PromoteFloatOperand(SDNode *N, unsigned OpNo) {
+ LLVM_DEBUG(dbgs() << "Promote float operand " << OpNo << ": "; N->dump(&DAG);
+ dbgs() << "\n");
SDValue R = SDValue();
if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) {
@@ -1762,6 +1884,10 @@ bool DAGTypeLegalizer::PromoteFloatOperand(SDNode *N, unsigned OpNo) {
// a part of PromoteFloatResult.
switch (N->getOpcode()) {
default:
+ #ifndef NDEBUG
+ dbgs() << "PromoteFloatOperand Op #" << OpNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+ #endif
llvm_unreachable("Do not know how to promote this operator's operand!");
case ISD::BITCAST: R = PromoteFloatOp_BITCAST(N, OpNo); break;
@@ -1872,6 +1998,8 @@ SDValue DAGTypeLegalizer::PromoteFloatOp_STORE(SDNode *N, unsigned OpNo) {
//===----------------------------------------------------------------------===//
void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
+ LLVM_DEBUG(dbgs() << "Promote float result " << ResNo << ": "; N->dump(&DAG);
+ dbgs() << "\n");
SDValue R = SDValue();
switch (N->getOpcode()) {
@@ -1880,6 +2008,10 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
case ISD::FP16_TO_FP:
case ISD::FP_TO_FP16:
default:
+#ifndef NDEBUG
+ dbgs() << "PromoteFloatResult #" << ResNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+#endif
llvm_unreachable("Do not know how to promote this operator's result!");
case ISD::BITCAST: R = PromoteFloatRes_BITCAST(N); break;
@@ -1932,7 +2064,7 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP: R = PromoteFloatRes_XINT_TO_FP(N); break;
case ISD::UNDEF: R = PromoteFloatRes_UNDEF(N); break;
-
+ case ISD::ATOMIC_SWAP: R = BitcastToInt_ATOMIC_SWAP(N); break;
}
if (R.getNode())
@@ -2166,3 +2298,29 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_UNDEF(SDNode *N) {
N->getValueType(0)));
}
+SDValue DAGTypeLegalizer::BitcastToInt_ATOMIC_SWAP(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ EVT NFPVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+
+ AtomicSDNode *AM = cast<AtomicSDNode>(N);
+ SDLoc SL(N);
+
+ SDValue CastVal = BitConvertToInteger(AM->getVal());
+ EVT CastVT = CastVal.getValueType();
+
+ SDValue NewAtomic
+ = DAG.getAtomic(ISD::ATOMIC_SWAP, SL, CastVT,
+ DAG.getVTList(CastVT, MVT::Other),
+ { AM->getChain(), AM->getBasePtr(), CastVal },
+ AM->getMemOperand());
+
+ SDValue ResultCast = DAG.getNode(GetPromotionOpcode(VT, NFPVT), SL, NFPVT,
+ NewAtomic);
+ // Legalize the chain result by replacing uses of the old value chain with the
+ // new one
+ ReplaceValueWith(SDValue(N, 1), NewAtomic.getValue(1));
+
+ return ResultCast;
+
+}
+
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 5fbc70fce60d..15ac45c37c66 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -1,9 +1,8 @@
//===----- LegalizeIntegerTypes.cpp - Legalization of integer types -------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -149,7 +148,10 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::UADDSAT:
case ISD::SSUBSAT:
case ISD::USUBSAT: Res = PromoteIntRes_ADDSUBSAT(N); break;
- case ISD::SMULFIX: Res = PromoteIntRes_SMULFIX(N); break;
+ case ISD::SMULFIX:
+ case ISD::SMULFIXSAT:
+ case ISD::UMULFIX: Res = PromoteIntRes_MULFIX(N); break;
+ case ISD::ABS: Res = PromoteIntRes_ABS(N); break;
case ISD::ATOMIC_LOAD:
Res = PromoteIntRes_Atomic0(cast<AtomicSDNode>(N)); break;
@@ -172,6 +174,18 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
Res = PromoteIntRes_AtomicCmpSwap(cast<AtomicSDNode>(N), ResNo);
break;
+
+ case ISD::VECREDUCE_ADD:
+ case ISD::VECREDUCE_MUL:
+ case ISD::VECREDUCE_AND:
+ case ISD::VECREDUCE_OR:
+ case ISD::VECREDUCE_XOR:
+ case ISD::VECREDUCE_SMAX:
+ case ISD::VECREDUCE_SMIN:
+ case ISD::VECREDUCE_UMAX:
+ case ISD::VECREDUCE_UMIN:
+ Res = PromoteIntRes_VECREDUCE(N);
+ break;
}
// If the result is null then the sub-method took care of registering it.
@@ -293,21 +307,24 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
BitConvertToInteger(GetScalarizedVector(InOp)));
break;
case TargetLowering::TypeSplitVector: {
- // For example, i32 = BITCAST v2i16 on alpha. Convert the split
- // pieces of the input into integers and reassemble in the final type.
- SDValue Lo, Hi;
- GetSplitVector(N->getOperand(0), Lo, Hi);
- Lo = BitConvertToInteger(Lo);
- Hi = BitConvertToInteger(Hi);
-
- if (DAG.getDataLayout().isBigEndian())
- std::swap(Lo, Hi);
-
- InOp = DAG.getNode(ISD::ANY_EXTEND, dl,
- EVT::getIntegerVT(*DAG.getContext(),
- NOutVT.getSizeInBits()),
- JoinIntegers(Lo, Hi));
- return DAG.getNode(ISD::BITCAST, dl, NOutVT, InOp);
+ if (!NOutVT.isVector()) {
+ // For example, i32 = BITCAST v2i16 on alpha. Convert the split
+ // pieces of the input into integers and reassemble in the final type.
+ SDValue Lo, Hi;
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ Lo = BitConvertToInteger(Lo);
+ Hi = BitConvertToInteger(Hi);
+
+ if (DAG.getDataLayout().isBigEndian())
+ std::swap(Lo, Hi);
+
+ InOp = DAG.getNode(ISD::ANY_EXTEND, dl,
+ EVT::getIntegerVT(*DAG.getContext(),
+ NOutVT.getSizeInBits()),
+ JoinIntegers(Lo, Hi));
+ return DAG.getNode(ISD::BITCAST, dl, NOutVT, InOp);
+ }
+ break;
}
case TargetLowering::TypeWidenVector:
// The input is widened to the same size. Convert to the widened value.
@@ -555,7 +572,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MLOAD(MaskedLoadSDNode *N) {
SDLoc dl(N);
SDValue Res = DAG.getMaskedLoad(NVT, dl, N->getChain(), N->getBasePtr(),
N->getMask(), ExtPassThru, N->getMemoryVT(),
- N->getMemOperand(), ISD::SEXTLOAD);
+ N->getMemOperand(), ISD::EXTLOAD);
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
@@ -582,23 +599,27 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MGATHER(MaskedGatherSDNode *N) {
/// Promote the overflow flag of an overflowing arithmetic node.
SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) {
- // Simply change the return type of the boolean result.
+ // Change the return type of the boolean result while obeying
+ // getSetCCResultType.
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(1));
- EVT ValueVTs[] = { N->getValueType(0), NVT };
+ EVT VT = N->getValueType(0);
+ EVT SVT = getSetCCResultType(VT);
SDValue Ops[3] = { N->getOperand(0), N->getOperand(1) };
unsigned NumOps = N->getNumOperands();
assert(NumOps <= 3 && "Too many operands");
if (NumOps == 3)
Ops[2] = N->getOperand(2);
- SDValue Res = DAG.getNode(N->getOpcode(), SDLoc(N),
- DAG.getVTList(ValueVTs), makeArrayRef(Ops, NumOps));
+ SDLoc dl(N);
+ SDValue Res = DAG.getNode(N->getOpcode(), dl, DAG.getVTList(VT, SVT),
+ makeArrayRef(Ops, NumOps));
// Modified the sum result - switch anything that used the old sum to use
// the new one.
ReplaceValueWith(SDValue(N, 0), Res);
- return SDValue(Res.getNode(), 1);
+ // Convert to the expected type.
+ return DAG.getBoolExtOrTrunc(Res.getValue(1), dl, NVT, VT);
}
SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSAT(SDNode *N) {
@@ -646,12 +667,39 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSAT(SDNode *N) {
return DAG.getNode(ShiftOp, dl, PromotedType, Result, ShiftAmount);
}
-SDValue DAGTypeLegalizer::PromoteIntRes_SMULFIX(SDNode *N) {
+SDValue DAGTypeLegalizer::PromoteIntRes_MULFIX(SDNode *N) {
// Can just promote the operands then continue with operation.
SDLoc dl(N);
- SDValue Op1Promoted = SExtPromotedInteger(N->getOperand(0));
- SDValue Op2Promoted = SExtPromotedInteger(N->getOperand(1));
+ SDValue Op1Promoted, Op2Promoted;
+ bool Signed =
+ N->getOpcode() == ISD::SMULFIX || N->getOpcode() == ISD::SMULFIXSAT;
+ if (Signed) {
+ Op1Promoted = SExtPromotedInteger(N->getOperand(0));
+ Op2Promoted = SExtPromotedInteger(N->getOperand(1));
+ } else {
+ Op1Promoted = ZExtPromotedInteger(N->getOperand(0));
+ Op2Promoted = ZExtPromotedInteger(N->getOperand(1));
+ }
+ EVT OldType = N->getOperand(0).getValueType();
EVT PromotedType = Op1Promoted.getValueType();
+ unsigned DiffSize =
+ PromotedType.getScalarSizeInBits() - OldType.getScalarSizeInBits();
+
+ bool Saturating = N->getOpcode() == ISD::SMULFIXSAT;
+ if (Saturating) {
+ // Promoting the operand and result values changes the saturation width,
+ // which is extends the values that we clamp to on saturation. This could be
+ // resolved by shifting one of the operands the same amount, which would
+ // also shift the result we compare against, then shifting back.
+ EVT ShiftTy = TLI.getShiftAmountTy(PromotedType, DAG.getDataLayout());
+ Op1Promoted = DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted,
+ DAG.getConstant(DiffSize, dl, ShiftTy));
+ SDValue Result = DAG.getNode(N->getOpcode(), dl, PromotedType, Op1Promoted,
+ Op2Promoted, N->getOperand(2));
+ unsigned ShiftOp = Signed ? ISD::SRA : ISD::SRL;
+ return DAG.getNode(ShiftOp, dl, PromotedType, Result,
+ DAG.getConstant(DiffSize, dl, ShiftTy));
+ }
return DAG.getNode(N->getOpcode(), dl, PromotedType, Op1Promoted, Op2Promoted,
N->getOperand(2));
}
@@ -875,7 +923,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) {
// Calculate the overflow flag: zero extend the arithmetic result from
// the original type.
- SDValue Ofl = DAG.getZeroExtendInReg(Res, dl, OVT);
+ SDValue Ofl = DAG.getZeroExtendInReg(Res, dl, OVT.getScalarType());
// Overflowed if and only if this is not equal to Res.
Ofl = DAG.getSetCC(dl, N->getValueType(1), Ofl, Res, ISD::SETNE);
@@ -917,6 +965,11 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo) {
return SDValue(Res.getNode(), 0);
}
+SDValue DAGTypeLegalizer::PromoteIntRes_ABS(SDNode *N) {
+ SDValue Op0 = SExtPromotedInteger(N->getOperand(0));
+ return DAG.getNode(ISD::ABS, SDLoc(N), Op0.getValueType(), Op0);
+}
+
SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) {
// Promote the overflow bit trivially.
if (ResNo == 1)
@@ -946,9 +999,11 @@ SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) {
SDValue Overflow;
if (N->getOpcode() == ISD::UMULO) {
// Unsigned overflow occurred if the high part is non-zero.
+ unsigned Shift = SmallVT.getScalarSizeInBits();
+ EVT ShiftTy = getShiftAmountTyForConstant(Shift, Mul.getValueType(),
+ TLI, DAG);
SDValue Hi = DAG.getNode(ISD::SRL, DL, Mul.getValueType(), Mul,
- DAG.getIntPtrConstant(SmallVT.getSizeInBits(),
- DL));
+ DAG.getConstant(Shift, DL, ShiftTy));
Overflow = DAG.getSetCC(DL, N->getValueType(1), Hi,
DAG.getConstant(0, DL, Hi.getValueType()),
ISD::SETNE);
@@ -1091,7 +1146,21 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::PREFETCH: Res = PromoteIntOp_PREFETCH(N, OpNo); break;
- case ISD::SMULFIX: Res = PromoteIntOp_SMULFIX(N); break;
+ case ISD::SMULFIX:
+ case ISD::SMULFIXSAT:
+ case ISD::UMULFIX: Res = PromoteIntOp_MULFIX(N); break;
+
+ case ISD::FPOWI: Res = PromoteIntOp_FPOWI(N); break;
+
+ case ISD::VECREDUCE_ADD:
+ case ISD::VECREDUCE_MUL:
+ case ISD::VECREDUCE_AND:
+ case ISD::VECREDUCE_OR:
+ case ISD::VECREDUCE_XOR:
+ case ISD::VECREDUCE_SMAX:
+ case ISD::VECREDUCE_SMIN:
+ case ISD::VECREDUCE_UMAX:
+ case ISD::VECREDUCE_UMIN: Res = PromoteIntOp_VECREDUCE(N); break;
}
// If the result is null, the sub-method took care of registering results etc.
@@ -1434,24 +1503,12 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ADDSUBCARRY(SDNode *N, unsigned OpNo) {
SDValue Carry = N->getOperand(2);
SDLoc DL(N);
- auto VT = getSetCCResultType(LHS.getValueType());
- TargetLoweringBase::BooleanContent BoolType = TLI.getBooleanContents(VT);
- switch (BoolType) {
- case TargetLoweringBase::UndefinedBooleanContent:
- Carry = DAG.getAnyExtOrTrunc(Carry, DL, VT);
- break;
- case TargetLoweringBase::ZeroOrOneBooleanContent:
- Carry = DAG.getZExtOrTrunc(Carry, DL, VT);
- break;
- case TargetLoweringBase::ZeroOrNegativeOneBooleanContent:
- Carry = DAG.getSExtOrTrunc(Carry, DL, VT);
- break;
- }
+ Carry = PromoteTargetBoolean(Carry, LHS.getValueType());
return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, Carry), 0);
}
-SDValue DAGTypeLegalizer::PromoteIntOp_SMULFIX(SDNode *N) {
+SDValue DAGTypeLegalizer::PromoteIntOp_MULFIX(SDNode *N) {
SDValue Op2 = ZExtPromotedInteger(N->getOperand(2));
return SDValue(
DAG.UpdateNodeOperands(N, N->getOperand(0), N->getOperand(1), Op2), 0);
@@ -1475,6 +1532,44 @@ SDValue DAGTypeLegalizer::PromoteIntOp_PREFETCH(SDNode *N, unsigned OpNo) {
0);
}
+SDValue DAGTypeLegalizer::PromoteIntOp_FPOWI(SDNode *N) {
+ SDValue Op = SExtPromotedInteger(N->getOperand(1));
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_VECREDUCE(SDNode *N) {
+ SDLoc dl(N);
+ SDValue Op;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("Expected integer vector reduction");
+ case ISD::VECREDUCE_ADD:
+ case ISD::VECREDUCE_MUL:
+ case ISD::VECREDUCE_AND:
+ case ISD::VECREDUCE_OR:
+ case ISD::VECREDUCE_XOR:
+ Op = GetPromotedInteger(N->getOperand(0));
+ break;
+ case ISD::VECREDUCE_SMAX:
+ case ISD::VECREDUCE_SMIN:
+ Op = SExtPromotedInteger(N->getOperand(0));
+ break;
+ case ISD::VECREDUCE_UMAX:
+ case ISD::VECREDUCE_UMIN:
+ Op = ZExtPromotedInteger(N->getOperand(0));
+ break;
+ }
+
+ EVT EltVT = Op.getValueType().getVectorElementType();
+ EVT VT = N->getValueType(0);
+ if (VT.bitsGE(EltVT))
+ return DAG.getNode(N->getOpcode(), SDLoc(N), VT, Op);
+
+ // Result size must be >= element size. If this is not the case after
+ // promotion, also promote the result type and then truncate.
+ SDValue Reduce = DAG.getNode(N->getOpcode(), dl, EltVT, Op);
+ return DAG.getNode(ISD::TRUNCATE, dl, VT, Reduce);
+}
+
//===----------------------------------------------------------------------===//
// Integer Result Expansion
//===----------------------------------------------------------------------===//
@@ -1499,7 +1594,8 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
dbgs() << "ExpandIntegerResult #" << ResNo << ": ";
N->dump(&DAG); dbgs() << "\n";
#endif
- llvm_unreachable("Do not know how to expand the result of this operator!");
+ report_fatal_error("Do not know how to expand the result of this "
+ "operator!");
case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, ResNo, Lo, Hi); break;
case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break;
@@ -1518,6 +1614,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::BITREVERSE: ExpandIntRes_BITREVERSE(N, Lo, Hi); break;
case ISD::BSWAP: ExpandIntRes_BSWAP(N, Lo, Hi); break;
case ISD::Constant: ExpandIntRes_Constant(N, Lo, Hi); break;
+ case ISD::ABS: ExpandIntRes_ABS(N, Lo, Hi); break;
case ISD::CTLZ_ZERO_UNDEF:
case ISD::CTLZ: ExpandIntRes_CTLZ(N, Lo, Hi); break;
case ISD::CTPOP: ExpandIntRes_CTPOP(N, Lo, Hi); break;
@@ -1526,6 +1623,8 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::FLT_ROUNDS_: ExpandIntRes_FLT_ROUNDS(N, Lo, Hi); break;
case ISD::FP_TO_SINT: ExpandIntRes_FP_TO_SINT(N, Lo, Hi); break;
case ISD::FP_TO_UINT: ExpandIntRes_FP_TO_UINT(N, Lo, Hi); break;
+ case ISD::LLROUND: ExpandIntRes_LLROUND(N, Lo, Hi); break;
+ case ISD::LLRINT: ExpandIntRes_LLRINT(N, Lo, Hi); break;
case ISD::LOAD: ExpandIntRes_LOAD(cast<LoadSDNode>(N), Lo, Hi); break;
case ISD::MUL: ExpandIntRes_MUL(N, Lo, Hi); break;
case ISD::READCYCLECOUNTER: ExpandIntRes_READCYCLECOUNTER(N, Lo, Hi); break;
@@ -1613,7 +1712,20 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::UADDSAT:
case ISD::SSUBSAT:
case ISD::USUBSAT: ExpandIntRes_ADDSUBSAT(N, Lo, Hi); break;
- case ISD::SMULFIX: ExpandIntRes_SMULFIX(N, Lo, Hi); break;
+
+ case ISD::SMULFIX:
+ case ISD::SMULFIXSAT:
+ case ISD::UMULFIX: ExpandIntRes_MULFIX(N, Lo, Hi); break;
+
+ case ISD::VECREDUCE_ADD:
+ case ISD::VECREDUCE_MUL:
+ case ISD::VECREDUCE_AND:
+ case ISD::VECREDUCE_OR:
+ case ISD::VECREDUCE_XOR:
+ case ISD::VECREDUCE_SMAX:
+ case ISD::VECREDUCE_SMIN:
+ case ISD::VECREDUCE_UMAX:
+ case ISD::VECREDUCE_UMIN: ExpandIntRes_VECREDUCE(N, Lo, Hi); break;
}
// If Lo/Hi is null, the sub-method took care of registering results etc.
@@ -2267,6 +2379,25 @@ void DAGTypeLegalizer::ExpandIntRes_Constant(SDNode *N,
IsOpaque);
}
+void DAGTypeLegalizer::ExpandIntRes_ABS(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ SDLoc dl(N);
+
+ // abs(HiLo) -> (Hi < 0 ? -HiLo : HiLo)
+ EVT VT = N->getValueType(0);
+ SDValue N0 = N->getOperand(0);
+ SDValue Neg = DAG.getNode(ISD::SUB, dl, VT,
+ DAG.getConstant(0, dl, VT), N0);
+ SDValue NegLo, NegHi;
+ SplitInteger(Neg, NegLo, NegHi);
+
+ GetExpandedInteger(N0, Lo, Hi);
+ EVT NVT = Lo.getValueType();
+ SDValue HiIsNeg = DAG.getSetCC(dl, getSetCCResultType(NVT),
+ DAG.getConstant(0, dl, NVT), Hi, ISD::SETGT);
+ Lo = DAG.getSelect(dl, NVT, HiIsNeg, NegLo, Lo);
+ Hi = DAG.getSelect(dl, NVT, HiIsNeg, NegHi, Hi);
+}
+
void DAGTypeLegalizer::ExpandIntRes_CTLZ(SDNode *N,
SDValue &Lo, SDValue &Hi) {
SDLoc dl(N);
@@ -2361,6 +2492,58 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo,
Lo, Hi);
}
+void DAGTypeLegalizer::ExpandIntRes_LLROUND(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ EVT VT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+ if (VT == MVT::f32)
+ LC = RTLIB::LLROUND_F32;
+ else if (VT == MVT::f64)
+ LC = RTLIB::LLROUND_F64;
+ else if (VT == MVT::f80)
+ LC = RTLIB::LLROUND_F80;
+ else if (VT == MVT::f128)
+ LC = RTLIB::LLROUND_F128;
+ else if (VT == MVT::ppcf128)
+ LC = RTLIB::LLROUND_PPCF128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected llround input type!");
+
+ SDValue Op = N->getOperand(0);
+ if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat)
+ Op = GetPromotedFloat(Op);
+
+ SDLoc dl(N);
+ EVT RetVT = N->getValueType(0);
+ SplitInteger(TLI.makeLibCall(DAG, LC, RetVT, Op, true/*irrelevant*/, dl).first,
+ Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_LLRINT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ EVT VT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+ if (VT == MVT::f32)
+ LC = RTLIB::LLRINT_F32;
+ else if (VT == MVT::f64)
+ LC = RTLIB::LLRINT_F64;
+ else if (VT == MVT::f80)
+ LC = RTLIB::LLRINT_F80;
+ else if (VT == MVT::f128)
+ LC = RTLIB::LLRINT_F128;
+ else if (VT == MVT::ppcf128)
+ LC = RTLIB::LLRINT_PPCF128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected llrint input type!");
+
+ SDValue Op = N->getOperand(0);
+ if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat)
+ Op = GetPromotedFloat(Op);
+
+ SDLoc dl(N);
+ EVT RetVT = N->getValueType(0);
+ SplitInteger(TLI.makeLibCall(DAG, LC, RetVT, Op, true/*irrelevant*/, dl).first,
+ Lo, Hi);
+}
+
void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
SDValue &Lo, SDValue &Hi) {
if (ISD::isNormalLoad(N)) {
@@ -2581,15 +2764,39 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUBSAT(SDNode *N, SDValue &Lo,
SplitInteger(Result, Lo, Hi);
}
-void DAGTypeLegalizer::ExpandIntRes_SMULFIX(SDNode *N, SDValue &Lo,
- SDValue &Hi) {
+/// This performs an expansion of the integer result for a fixed point
+/// multiplication. The default expansion performs rounding down towards
+/// negative infinity, though targets that do care about rounding should specify
+/// a target hook for rounding and provide their own expansion or lowering of
+/// fixed point multiplication to be consistent with rounding.
+void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
SDLoc dl(N);
EVT VT = N->getValueType(0);
+ unsigned VTSize = VT.getScalarSizeInBits();
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
uint64_t Scale = N->getConstantOperandVal(2);
+ bool Saturating = N->getOpcode() == ISD::SMULFIXSAT;
+ EVT BoolVT = getSetCCResultType(VT);
+ SDValue Zero = DAG.getConstant(0, dl, VT);
if (!Scale) {
- SDValue Result = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
+ SDValue Result;
+ if (!Saturating) {
+ Result = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
+ } else {
+ Result = DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
+ SDValue Product = Result.getValue(0);
+ SDValue Overflow = Result.getValue(1);
+
+ APInt MinVal = APInt::getSignedMinValue(VTSize);
+ APInt MaxVal = APInt::getSignedMaxValue(VTSize);
+ SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
+ SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
+ SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Product, Zero, ISD::SETLT);
+ Result = DAG.getSelect(dl, VT, ProdNeg, SatMax, SatMin);
+ Result = DAG.getSelect(dl, VT, Overflow, Result, Product);
+ }
SplitInteger(Result, Lo, Hi);
return;
}
@@ -2600,15 +2807,19 @@ void DAGTypeLegalizer::ExpandIntRes_SMULFIX(SDNode *N, SDValue &Lo,
GetExpandedInteger(RHS, RL, RH);
SmallVector<SDValue, 4> Result;
- if (!TLI.expandMUL_LOHI(ISD::SMUL_LOHI, VT, dl, LHS, RHS, Result, NVT, DAG,
+ bool Signed = (N->getOpcode() == ISD::SMULFIX ||
+ N->getOpcode() == ISD::SMULFIXSAT);
+ unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
+ if (!TLI.expandMUL_LOHI(LoHiOp, VT, dl, LHS, RHS, Result, NVT, DAG,
TargetLowering::MulExpansionKind::OnlyLegalOrCustom,
LL, LH, RL, RH)) {
- report_fatal_error("Unable to expand SMUL_FIX using SMUL_LOHI.");
+ report_fatal_error("Unable to expand MUL_FIX using MUL_LOHI.");
return;
}
- unsigned VTSize = VT.getScalarSizeInBits();
unsigned NVTSize = NVT.getScalarSizeInBits();
+ assert((VTSize == NVTSize * 2) && "Expected the new value type to be half "
+ "the size of the current value type");
EVT ShiftTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout());
// Shift whole amount by scale.
@@ -2617,6 +2828,11 @@ void DAGTypeLegalizer::ExpandIntRes_SMULFIX(SDNode *N, SDValue &Lo,
SDValue ResultHL = Result[2];
SDValue ResultHH = Result[3];
+ SDValue SatMax, SatMin;
+ SDValue NVTZero = DAG.getConstant(0, dl, NVT);
+ SDValue NVTNeg1 = DAG.getConstant(-1, dl, NVT);
+ EVT BoolNVT = getSetCCResultType(NVT);
+
// After getting the multplication result in 4 parts, we need to perform a
// shift right by the amount of the scale to get the result in that scale.
// Let's say we multiply 2 64 bit numbers. The resulting value can be held in
@@ -2645,11 +2861,60 @@ void DAGTypeLegalizer::ExpandIntRes_SMULFIX(SDNode *N, SDValue &Lo,
Hi = DAG.getNode(ISD::SRL, dl, NVT, ResultLH, SRLAmnt);
Hi = DAG.getNode(ISD::OR, dl, NVT, Hi,
DAG.getNode(ISD::SHL, dl, NVT, ResultHL, SHLAmnt));
+
+ // We cannot overflow past HH when multiplying 2 ints of size VTSize, so the
+ // highest bit of HH determines saturation direction in the event of
+ // saturation.
+ // The number of overflow bits we can check are VTSize - Scale + 1 (we
+ // include the sign bit). If these top bits are > 0, then we overflowed past
+ // the max value. If these top bits are < -1, then we overflowed past the
+ // min value. Otherwise, we did not overflow.
+ if (Saturating) {
+ unsigned OverflowBits = VTSize - Scale + 1;
+ assert(OverflowBits <= VTSize && OverflowBits > NVTSize &&
+ "Extent of overflow bits must start within HL");
+ SDValue HLHiMask = DAG.getConstant(
+ APInt::getHighBitsSet(NVTSize, OverflowBits - NVTSize), dl, NVT);
+ SDValue HLLoMask = DAG.getConstant(
+ APInt::getLowBitsSet(NVTSize, VTSize - OverflowBits), dl, NVT);
+
+ // HH > 0 or HH == 0 && HL > HLLoMask
+ SDValue HHPos = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETGT);
+ SDValue HHZero = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETEQ);
+ SDValue HLPos =
+ DAG.getSetCC(dl, BoolNVT, ResultHL, HLLoMask, ISD::SETUGT);
+ SatMax = DAG.getNode(ISD::OR, dl, BoolNVT, HHPos,
+ DAG.getNode(ISD::AND, dl, BoolNVT, HHZero, HLPos));
+
+ // HH < -1 or HH == -1 && HL < HLHiMask
+ SDValue HHNeg = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETLT);
+ SDValue HHNeg1 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETEQ);
+ SDValue HLNeg =
+ DAG.getSetCC(dl, BoolNVT, ResultHL, HLHiMask, ISD::SETULT);
+ SatMin = DAG.getNode(ISD::OR, dl, BoolNVT, HHNeg,
+ DAG.getNode(ISD::AND, dl, BoolNVT, HHNeg1, HLNeg));
+ }
} else if (Scale == NVTSize) {
// If the scales are equal, Lo and Hi are ResultLH and Result HL,
// respectively. Avoid shifting to prevent undefined behavior.
Lo = ResultLH;
Hi = ResultHL;
+
+ // We overflow max if HH > 0 or HH == 0 && HL sign bit is 1.
+ // We overflow min if HH < -1 or HH == -1 && HL sign bit is 0.
+ if (Saturating) {
+ SDValue HHPos = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETGT);
+ SDValue HHZero = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETEQ);
+ SDValue HLNeg = DAG.getSetCC(dl, BoolNVT, ResultHL, NVTZero, ISD::SETLT);
+ SatMax = DAG.getNode(ISD::OR, dl, BoolNVT, HHPos,
+ DAG.getNode(ISD::AND, dl, BoolNVT, HHZero, HLNeg));
+
+ SDValue HHNeg = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETLT);
+ SDValue HHNeg1 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETEQ);
+ SDValue HLPos = DAG.getSetCC(dl, BoolNVT, ResultHL, NVTZero, ISD::SETGE);
+ SatMin = DAG.getNode(ISD::OR, dl, BoolNVT, HHNeg,
+ DAG.getNode(ISD::AND, dl, BoolNVT, HHNeg1, HLPos));
+ }
} else if (Scale < VTSize) {
// If the scale is instead less than the old VT size, but greater than or
// equal to the expanded VT size, the first part of the result (ResultLL) is
@@ -2664,9 +2929,39 @@ void DAGTypeLegalizer::ExpandIntRes_SMULFIX(SDNode *N, SDValue &Lo,
Hi = DAG.getNode(ISD::SRL, dl, NVT, ResultHL, SRLAmnt);
Hi = DAG.getNode(ISD::OR, dl, NVT, Hi,
DAG.getNode(ISD::SHL, dl, NVT, ResultHH, SHLAmnt));
+
+ // This is similar to the case when we saturate if Scale < NVTSize, but we
+ // only need to chech HH.
+ if (Saturating) {
+ unsigned OverflowBits = VTSize - Scale + 1;
+ SDValue HHHiMask = DAG.getConstant(
+ APInt::getHighBitsSet(NVTSize, OverflowBits), dl, NVT);
+ SDValue HHLoMask = DAG.getConstant(
+ APInt::getLowBitsSet(NVTSize, NVTSize - OverflowBits), dl, NVT);
+
+ SatMax = DAG.getSetCC(dl, BoolNVT, ResultHH, HHLoMask, ISD::SETGT);
+ SatMin = DAG.getSetCC(dl, BoolNVT, ResultHH, HHHiMask, ISD::SETLT);
+ }
+ } else if (Scale == VTSize) {
+ assert(
+ !Signed &&
+ "Only unsigned types can have a scale equal to the operand bit width");
+
+ Lo = ResultHL;
+ Hi = ResultHH;
} else {
- llvm_unreachable(
- "Expected the scale to be less than the width of the operands");
+ llvm_unreachable("Expected the scale to be less than or equal to the width "
+ "of the operands");
+ }
+
+ if (Saturating) {
+ APInt LHMax = APInt::getSignedMaxValue(NVTSize);
+ APInt LLMax = APInt::getAllOnesValue(NVTSize);
+ APInt LHMin = APInt::getSignedMinValue(NVTSize);
+ Hi = DAG.getSelect(dl, NVT, SatMax, DAG.getConstant(LHMax, dl, NVT), Hi);
+ Hi = DAG.getSelect(dl, NVT, SatMin, DAG.getConstant(LHMin, dl, NVT), Hi);
+ Lo = DAG.getSelect(dl, NVT, SatMax, DAG.getConstant(LLMax, dl, NVT), Lo);
+ Lo = DAG.getSelect(dl, NVT, SatMin, NVTZero, Lo);
}
}
@@ -2765,11 +3060,15 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
}
// Next check to see if the target supports this SHL_PARTS operation or if it
- // will custom expand it.
+ // will custom expand it. Don't lower this to SHL_PARTS when we optimise for
+ // size, but create a libcall instead.
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
TargetLowering::LegalizeAction Action = TLI.getOperationAction(PartsOpc, NVT);
- if ((Action == TargetLowering::Legal && TLI.isTypeLegal(NVT)) ||
- Action == TargetLowering::Custom) {
+ const bool LegalOrCustom =
+ (Action == TargetLowering::Legal && TLI.isTypeLegal(NVT)) ||
+ Action == TargetLowering::Custom;
+
+ if (LegalOrCustom && TLI.shouldExpandShift(DAG, N)) {
// Expand the subcomponents.
SDValue LHSL, LHSH;
GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
@@ -3145,6 +3444,14 @@ void DAGTypeLegalizer::ExpandIntRes_ATOMIC_LOAD(SDNode *N,
ReplaceValueWith(SDValue(N, 1), Swap.getValue(2));
}
+void DAGTypeLegalizer::ExpandIntRes_VECREDUCE(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ // TODO For VECREDUCE_(AND|OR|XOR) we could split the vector and calculate
+ // both halves independently.
+ SDValue Res = TLI.expandVecReduce(N, DAG);
+ SplitInteger(Res, Lo, Hi);
+}
+
//===----------------------------------------------------------------------===//
// Integer Operand Expansion
//===----------------------------------------------------------------------===//
@@ -3167,7 +3474,7 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) {
dbgs() << "ExpandIntegerOperand Op #" << OpNo << ": ";
N->dump(&DAG); dbgs() << "\n";
#endif
- llvm_unreachable("Do not know how to expand this operator's operand!");
+ report_fatal_error("Do not know how to expand this operator's operand!");
case ISD::BITCAST: Res = ExpandOp_BITCAST(N); break;
case ISD::BR_CC: Res = ExpandIntOp_BR_CC(N); break;
@@ -3632,8 +3939,6 @@ SDValue DAGTypeLegalizer::ExpandIntOp_ATOMIC_STORE(SDNode *N) {
SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) {
- SDValue InOp0 = N->getOperand(0);
- EVT InVT = InOp0.getValueType();
EVT OutVT = N->getValueType(0);
EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
@@ -3644,6 +3949,12 @@ SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) {
SDLoc dl(N);
SDValue BaseIdx = N->getOperand(1);
+ SDValue InOp0 = N->getOperand(0);
+ if (getTypeAction(InOp0.getValueType()) == TargetLowering::TypePromoteInteger)
+ InOp0 = GetPromotedInteger(N->getOperand(0));
+
+ EVT InVT = InOp0.getValueType();
+
SmallVector<SDValue, 8> Ops;
Ops.reserve(OutNumElems);
for (unsigned i = 0; i != OutNumElems; ++i) {
@@ -3654,7 +3965,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) {
SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
InVT.getVectorElementType(), N->getOperand(0), Index);
- SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, Ext);
+ SDValue Op = DAG.getAnyExtOrTrunc(Ext, dl, NOutVTElem);
// Insert the converted element to the new vector.
Ops.push_back(Op);
}
@@ -3809,6 +4120,14 @@ SDValue DAGTypeLegalizer::PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N) {
V0, ConvElem, N->getOperand(2));
}
+SDValue DAGTypeLegalizer::PromoteIntRes_VECREDUCE(SDNode *N) {
+ // The VECREDUCE result size may be larger than the element size, so
+ // we can simply change the result type.
+ SDLoc dl(N);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));
+}
+
SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N) {
SDLoc dl(N);
SDValue V0 = GetPromotedInteger(N->getOperand(0));
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index a9f144c06e9a..14fd5be23ccb 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -1,9 +1,8 @@
//===-- LegalizeTypes.cpp - Common code for DAG type legalizer ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -708,6 +707,7 @@ void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) {
auto &OpIdEntry = PromotedIntegers[getTableId(Op)];
assert((OpIdEntry == 0) && "Node is already promoted!");
OpIdEntry = getTableId(Result);
+ Result->setFlags(Op->getFlags());
DAG.transferDbgValues(Op, Result);
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 032000f6cb79..1d489b1b3a33 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -1,9 +1,8 @@
//===-- LegalizeTypes.h - DAG Type Legalizer class definition ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -345,8 +344,10 @@ private:
SDValue PromoteIntRes_VAARG(SDNode *N);
SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo);
SDValue PromoteIntRes_ADDSUBSAT(SDNode *N);
- SDValue PromoteIntRes_SMULFIX(SDNode *N);
+ SDValue PromoteIntRes_MULFIX(SDNode *N);
SDValue PromoteIntRes_FLT_ROUNDS(SDNode *N);
+ SDValue PromoteIntRes_VECREDUCE(SDNode *N);
+ SDValue PromoteIntRes_ABS(SDNode *N);
// Integer Operand Promotion.
bool PromoteIntegerOperand(SDNode *N, unsigned OpNo);
@@ -379,7 +380,9 @@ private:
SDValue PromoteIntOp_ADDSUBCARRY(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_FRAMERETURNADDR(SDNode *N);
SDValue PromoteIntOp_PREFETCH(SDNode *N, unsigned OpNo);
- SDValue PromoteIntOp_SMULFIX(SDNode *N);
+ SDValue PromoteIntOp_MULFIX(SDNode *N);
+ SDValue PromoteIntOp_FPOWI(SDNode *N);
+ SDValue PromoteIntOp_VECREDUCE(SDNode *N);
void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code);
@@ -402,6 +405,7 @@ private:
void ExpandIntRes_AssertSext (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_AssertZext (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_Constant (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_ABS (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_CTLZ (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_CTPOP (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_CTTZ (SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -414,6 +418,8 @@ private:
void ExpandIntRes_FLT_ROUNDS (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_FP_TO_SINT (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_FP_TO_UINT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_LLROUND (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_LLRINT (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_Logical (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_ADDSUB (SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -435,9 +441,10 @@ private:
void ExpandIntRes_UADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_XMULO (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_ADDSUBSAT (SDNode *N, SDValue &Lo, SDValue &Hi);
- void ExpandIntRes_SMULFIX (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_MULFIX (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_ATOMIC_LOAD (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_VECREDUCE (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandShiftByConstant(SDNode *N, const APInt &Amt,
SDValue &Lo, SDValue &Hi);
@@ -548,6 +555,10 @@ private:
SDValue SoftenFloatOp_FP_EXTEND(SDNode *N);
SDValue SoftenFloatOp_FP_ROUND(SDNode *N);
SDValue SoftenFloatOp_FP_TO_XINT(SDNode *N);
+ SDValue SoftenFloatOp_LROUND(SDNode *N);
+ SDValue SoftenFloatOp_LLROUND(SDNode *N);
+ SDValue SoftenFloatOp_LRINT(SDNode *N);
+ SDValue SoftenFloatOp_LLRINT(SDNode *N);
SDValue SoftenFloatOp_SELECT(SDNode *N);
SDValue SoftenFloatOp_SELECT_CC(SDNode *N);
SDValue SoftenFloatOp_SETCC(SDNode *N);
@@ -607,6 +618,10 @@ private:
SDValue ExpandFloatOp_FP_ROUND(SDNode *N);
SDValue ExpandFloatOp_FP_TO_SINT(SDNode *N);
SDValue ExpandFloatOp_FP_TO_UINT(SDNode *N);
+ SDValue ExpandFloatOp_LROUND(SDNode *N);
+ SDValue ExpandFloatOp_LLROUND(SDNode *N);
+ SDValue ExpandFloatOp_LRINT(SDNode *N);
+ SDValue ExpandFloatOp_LLRINT(SDNode *N);
SDValue ExpandFloatOp_SELECT_CC(SDNode *N);
SDValue ExpandFloatOp_SETCC(SDNode *N);
SDValue ExpandFloatOp_STORE(SDNode *N, unsigned OpNo);
@@ -640,6 +655,7 @@ private:
SDValue PromoteFloatRes_SELECT_CC(SDNode *N);
SDValue PromoteFloatRes_UnaryOp(SDNode *N);
SDValue PromoteFloatRes_UNDEF(SDNode *N);
+ SDValue BitcastToInt_ATOMIC_SWAP(SDNode *N);
SDValue PromoteFloatRes_XINT_TO_FP(SDNode *N);
bool PromoteFloatOperand(SDNode *N, unsigned OpNo);
@@ -673,6 +689,7 @@ private:
SDValue ScalarizeVecRes_TernaryOp(SDNode *N);
SDValue ScalarizeVecRes_UnaryOp(SDNode *N);
SDValue ScalarizeVecRes_StrictFPOp(SDNode *N);
+ SDValue ScalarizeVecRes_OverflowOp(SDNode *N, unsigned ResNo);
SDValue ScalarizeVecRes_InregOp(SDNode *N);
SDValue ScalarizeVecRes_VecInregOp(SDNode *N);
@@ -680,6 +697,7 @@ private:
SDValue ScalarizeVecRes_BUILD_VECTOR(SDNode *N);
SDValue ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N);
SDValue ScalarizeVecRes_FP_ROUND(SDNode *N);
+ SDValue ScalarizeVecRes_STRICT_FP_ROUND(SDNode *N);
SDValue ScalarizeVecRes_FPOWI(SDNode *N);
SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N);
SDValue ScalarizeVecRes_LOAD(LoadSDNode *N);
@@ -691,7 +709,7 @@ private:
SDValue ScalarizeVecRes_UNDEF(SDNode *N);
SDValue ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N);
- SDValue ScalarizeVecRes_SMULFIX(SDNode *N);
+ SDValue ScalarizeVecRes_MULFIX(SDNode *N);
// Vector Operand Scalarization: <1 x ty> -> ty.
bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo);
@@ -703,6 +721,8 @@ private:
SDValue ScalarizeVecOp_VSETCC(SDNode *N);
SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo);
SDValue ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo);
+ SDValue ScalarizeVecOp_STRICT_FP_ROUND(SDNode *N, unsigned OpNo);
+ SDValue ScalarizeVecOp_VECREDUCE(SDNode *N);
//===--------------------------------------------------------------------===//
// Vector Splitting Support: LegalizeVectorTypes.cpp
@@ -727,8 +747,10 @@ private:
void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_ExtVecInRegOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_StrictFPOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_OverflowOp(SDNode *N, unsigned ResNo,
+ SDValue &Lo, SDValue &Hi);
- void SplitVecRes_SMULFIX(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_MULFIX(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -745,6 +767,7 @@ private:
void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo,
SDValue &Hi);
+ void SplitVecRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi);
// Vector Operand Splitting: <128 x ty> -> 2 x <64 x ty>.
bool SplitVectorOperand(SDNode *N, unsigned OpNo);
@@ -808,7 +831,9 @@ private:
SDValue WidenVecRes_Binary(SDNode *N);
SDValue WidenVecRes_BinaryCanTrap(SDNode *N);
SDValue WidenVecRes_StrictFP(SDNode *N);
+ SDValue WidenVecRes_OverflowOp(SDNode *N, unsigned ResNo);
SDValue WidenVecRes_Convert(SDNode *N);
+ SDValue WidenVecRes_Convert_StrictFP(SDNode *N);
SDValue WidenVecRes_FCOPYSIGN(SDNode *N);
SDValue WidenVecRes_POWI(SDNode *N);
SDValue WidenVecRes_Shift(SDNode *N);
@@ -827,9 +852,16 @@ private:
SDValue WidenVecOp_MGATHER(SDNode* N, unsigned OpNo);
SDValue WidenVecOp_MSCATTER(SDNode* N, unsigned OpNo);
SDValue WidenVecOp_SETCC(SDNode* N);
+ SDValue WidenVecOp_VSELECT(SDNode *N);
SDValue WidenVecOp_Convert(SDNode *N);
SDValue WidenVecOp_FCOPYSIGN(SDNode *N);
+ SDValue WidenVecOp_VECREDUCE(SDNode *N);
+
+ /// Helper function to generate a set of operations to perform
+ /// a vector operation for a wider type.
+ ///
+ SDValue UnrollVectorOp_StrictFP(SDNode *N, unsigned ResNE);
//===--------------------------------------------------------------------===//
// Vector Widening Utilities Support: LegalizeVectorTypes.cpp
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index b9d370441c3e..943f63f46c47 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -1,9 +1,8 @@
//===-------- LegalizeTypesGeneric.cpp - Generic type legalization --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 4923a529c21b..10b8b705869e 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -1,9 +1,8 @@
//===- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -141,7 +140,11 @@ class VectorLegalizer {
SDValue ExpandFunnelShift(SDValue Op);
SDValue ExpandROT(SDValue Op);
SDValue ExpandFMINNUM_FMAXNUM(SDValue Op);
+ SDValue ExpandUADDSUBO(SDValue Op);
+ SDValue ExpandSADDSUBO(SDValue Op);
+ SDValue ExpandMULO(SDValue Op);
SDValue ExpandAddSubSat(SDValue Op);
+ SDValue ExpandFixedPointMul(SDValue Op);
SDValue ExpandStrictFPOp(SDValue Op);
/// Implements vector promotion.
@@ -263,7 +266,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
LLVM_FALLTHROUGH;
case TargetLowering::Expand:
Changed = true;
- return LegalizeOp(ExpandLoad(Op));
+ return ExpandLoad(Op);
}
}
} else if (Op.getOpcode() == ISD::STORE) {
@@ -288,17 +291,18 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
}
case TargetLowering::Expand:
Changed = true;
- return LegalizeOp(ExpandStore(Op));
+ return ExpandStore(Op);
}
}
}
- bool HasVectorValue = false;
- for (SDNode::value_iterator J = Node->value_begin(), E = Node->value_end();
- J != E;
- ++J)
- HasVectorValue |= J->isVector();
- if (!HasVectorValue)
+ bool HasVectorValueOrOp = false;
+ for (auto J = Node->value_begin(), E = Node->value_end(); J != E; ++J)
+ HasVectorValueOrOp |= J->isVector();
+ for (const SDValue &Op : Node->op_values())
+ HasVectorValueOrOp |= Op.getValueType().isVector();
+
+ if (!HasVectorValueOrOp)
return TranslateLegalizeResults(Op, Result);
TargetLowering::LegalizeAction Action = TargetLowering::Legal;
@@ -329,6 +333,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::STRICT_FFLOOR:
case ISD::STRICT_FROUND:
case ISD::STRICT_FTRUNC:
+ case ISD::STRICT_FP_ROUND:
+ case ISD::STRICT_FP_EXTEND:
// These pseudo-ops get legalized as if they were their non-strict
// equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT
// is also legal, but if ISD::FSQRT requires expansion then so does
@@ -418,6 +424,12 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::UMAX:
case ISD::SMUL_LOHI:
case ISD::UMUL_LOHI:
+ case ISD::SADDO:
+ case ISD::UADDO:
+ case ISD::SSUBO:
+ case ISD::USUBO:
+ case ISD::SMULO:
+ case ISD::UMULO:
case ISD::FCANONICALIZE:
case ISD::SADDSAT:
case ISD::UADDSAT:
@@ -425,7 +437,9 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::USUBSAT:
Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
break;
- case ISD::SMULFIX: {
+ case ISD::SMULFIX:
+ case ISD::SMULFIXSAT:
+ case ISD::UMULFIX: {
unsigned Scale = Node->getConstantOperandVal(2);
Action = TLI.getFixedPointOperationAction(Node->getOpcode(),
Node->getValueType(0), Scale);
@@ -437,6 +451,19 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
break;
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
+ case ISD::VECREDUCE_ADD:
+ case ISD::VECREDUCE_MUL:
+ case ISD::VECREDUCE_AND:
+ case ISD::VECREDUCE_OR:
+ case ISD::VECREDUCE_XOR:
+ case ISD::VECREDUCE_SMAX:
+ case ISD::VECREDUCE_SMIN:
+ case ISD::VECREDUCE_UMAX:
+ case ISD::VECREDUCE_UMIN:
+ case ISD::VECREDUCE_FADD:
+ case ISD::VECREDUCE_FMUL:
+ case ISD::VECREDUCE_FMAX:
+ case ISD::VECREDUCE_FMIN:
Action = TLI.getOperationAction(Node->getOpcode(),
Node->getOperand(0).getValueType());
break;
@@ -650,23 +677,21 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
LoadChains.push_back(ScalarLoad.getValue(1));
}
- // Extract bits, pack and extend/trunc them into destination type.
- unsigned SrcEltBits = SrcEltVT.getSizeInBits();
- SDValue SrcEltBitMask = DAG.getConstant((1U << SrcEltBits) - 1, dl, WideVT);
-
unsigned BitOffset = 0;
unsigned WideIdx = 0;
unsigned WideBits = WideVT.getSizeInBits();
+ // Extract bits, pack and extend/trunc them into destination type.
+ unsigned SrcEltBits = SrcEltVT.getSizeInBits();
+ SDValue SrcEltBitMask = DAG.getConstant(
+ APInt::getLowBitsSet(WideBits, SrcEltBits), dl, WideVT);
+
for (unsigned Idx = 0; Idx != NumElem; ++Idx) {
- SDValue Lo, Hi, ShAmt;
+ assert(BitOffset < WideBits && "Unexpected offset!");
- if (BitOffset < WideBits) {
- ShAmt = DAG.getConstant(
- BitOffset, dl, TLI.getShiftAmountTy(WideVT, DAG.getDataLayout()));
- Lo = DAG.getNode(ISD::SRL, dl, WideVT, LoadVals[WideIdx], ShAmt);
- Lo = DAG.getNode(ISD::AND, dl, WideVT, Lo, SrcEltBitMask);
- }
+ SDValue ShAmt = DAG.getConstant(
+ BitOffset, dl, TLI.getShiftAmountTy(WideVT, DAG.getDataLayout()));
+ SDValue Lo = DAG.getNode(ISD::SRL, dl, WideVT, LoadVals[WideIdx], ShAmt);
BitOffset += SrcEltBits;
if (BitOffset >= WideBits) {
@@ -676,13 +701,13 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
ShAmt = DAG.getConstant(
SrcEltBits - BitOffset, dl,
TLI.getShiftAmountTy(WideVT, DAG.getDataLayout()));
- Hi = DAG.getNode(ISD::SHL, dl, WideVT, LoadVals[WideIdx], ShAmt);
- Hi = DAG.getNode(ISD::AND, dl, WideVT, Hi, SrcEltBitMask);
+ SDValue Hi =
+ DAG.getNode(ISD::SHL, dl, WideVT, LoadVals[WideIdx], ShAmt);
+ Lo = DAG.getNode(ISD::OR, dl, WideVT, Lo, Hi);
}
}
- if (Hi.getNode())
- Lo = DAG.getNode(ISD::OR, dl, WideVT, Lo, Hi);
+ Lo = DAG.getNode(ISD::AND, dl, WideVT, Lo, SrcEltBitMask);
switch (ExtType) {
default: llvm_unreachable("Unknown extended-load op!");
@@ -778,11 +803,23 @@ SDValue VectorLegalizer::Expand(SDValue Op) {
case ISD::FMINNUM:
case ISD::FMAXNUM:
return ExpandFMINNUM_FMAXNUM(Op);
+ case ISD::UADDO:
+ case ISD::USUBO:
+ return ExpandUADDSUBO(Op);
+ case ISD::SADDO:
+ case ISD::SSUBO:
+ return ExpandSADDSUBO(Op);
+ case ISD::UMULO:
+ case ISD::SMULO:
+ return ExpandMULO(Op);
case ISD::USUBSAT:
case ISD::SSUBSAT:
case ISD::UADDSAT:
case ISD::SADDSAT:
return ExpandAddSubSat(Op);
+ case ISD::SMULFIX:
+ case ISD::UMULFIX:
+ return ExpandFixedPointMul(Op);
case ISD::STRICT_FADD:
case ISD::STRICT_FSUB:
case ISD::STRICT_FMUL:
@@ -808,6 +845,20 @@ SDValue VectorLegalizer::Expand(SDValue Op) {
case ISD::STRICT_FROUND:
case ISD::STRICT_FTRUNC:
return ExpandStrictFPOp(Op);
+ case ISD::VECREDUCE_ADD:
+ case ISD::VECREDUCE_MUL:
+ case ISD::VECREDUCE_AND:
+ case ISD::VECREDUCE_OR:
+ case ISD::VECREDUCE_XOR:
+ case ISD::VECREDUCE_SMAX:
+ case ISD::VECREDUCE_SMIN:
+ case ISD::VECREDUCE_UMAX:
+ case ISD::VECREDUCE_UMIN:
+ case ISD::VECREDUCE_FADD:
+ case ISD::VECREDUCE_FMUL:
+ case ISD::VECREDUCE_FMAX:
+ case ISD::VECREDUCE_FMIN:
+ return TLI.expandVecReduce(Op.getNode(), DAG);
default:
return DAG.UnrollVectorOp(Op.getNode());
}
@@ -898,6 +949,19 @@ SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDValue Op) {
EVT SrcVT = Src.getValueType();
int NumSrcElements = SrcVT.getVectorNumElements();
+ // *_EXTEND_VECTOR_INREG SrcVT can be smaller than VT - so insert the vector
+ // into a larger vector type.
+ if (SrcVT.bitsLE(VT)) {
+ assert((VT.getSizeInBits() % SrcVT.getScalarSizeInBits()) == 0 &&
+ "ANY_EXTEND_VECTOR_INREG vector size mismatch");
+ NumSrcElements = VT.getSizeInBits() / SrcVT.getScalarSizeInBits();
+ SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
+ NumSrcElements);
+ Src = DAG.getNode(
+ ISD::INSERT_SUBVECTOR, DL, SrcVT, DAG.getUNDEF(SrcVT), Src,
+ DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ }
+
// Build a base mask of undef shuffles.
SmallVector<int, 16> ShuffleMask;
ShuffleMask.resize(NumSrcElements, -1);
@@ -945,6 +1009,19 @@ SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op) {
EVT SrcVT = Src.getValueType();
int NumSrcElements = SrcVT.getVectorNumElements();
+ // *_EXTEND_VECTOR_INREG SrcVT can be smaller than VT - so insert the vector
+ // into a larger vector type.
+ if (SrcVT.bitsLE(VT)) {
+ assert((VT.getSizeInBits() % SrcVT.getScalarSizeInBits()) == 0 &&
+ "ZERO_EXTEND_VECTOR_INREG vector size mismatch");
+ NumSrcElements = VT.getSizeInBits() / SrcVT.getScalarSizeInBits();
+ SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
+ NumSrcElements);
+ Src = DAG.getNode(
+ ISD::INSERT_SUBVECTOR, DL, SrcVT, DAG.getUNDEF(SrcVT), Src,
+ DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ }
+
// Build up a zero vector to blend into this one.
SDValue Zero = DAG.getConstant(0, DL, SrcVT);
@@ -1212,12 +1289,58 @@ SDValue VectorLegalizer::ExpandFMINNUM_FMAXNUM(SDValue Op) {
return DAG.UnrollVectorOp(Op.getNode());
}
+SDValue VectorLegalizer::ExpandUADDSUBO(SDValue Op) {
+ SDValue Result, Overflow;
+ TLI.expandUADDSUBO(Op.getNode(), Result, Overflow, DAG);
+
+ if (Op.getResNo() == 0) {
+ AddLegalizedOperand(Op.getValue(1), LegalizeOp(Overflow));
+ return Result;
+ } else {
+ AddLegalizedOperand(Op.getValue(0), LegalizeOp(Result));
+ return Overflow;
+ }
+}
+
+SDValue VectorLegalizer::ExpandSADDSUBO(SDValue Op) {
+ SDValue Result, Overflow;
+ TLI.expandSADDSUBO(Op.getNode(), Result, Overflow, DAG);
+
+ if (Op.getResNo() == 0) {
+ AddLegalizedOperand(Op.getValue(1), LegalizeOp(Overflow));
+ return Result;
+ } else {
+ AddLegalizedOperand(Op.getValue(0), LegalizeOp(Result));
+ return Overflow;
+ }
+}
+
+SDValue VectorLegalizer::ExpandMULO(SDValue Op) {
+ SDValue Result, Overflow;
+ if (!TLI.expandMULO(Op.getNode(), Result, Overflow, DAG))
+ std::tie(Result, Overflow) = DAG.UnrollVectorOverflowOp(Op.getNode());
+
+ if (Op.getResNo() == 0) {
+ AddLegalizedOperand(Op.getValue(1), LegalizeOp(Overflow));
+ return Result;
+ } else {
+ AddLegalizedOperand(Op.getValue(0), LegalizeOp(Result));
+ return Overflow;
+ }
+}
+
SDValue VectorLegalizer::ExpandAddSubSat(SDValue Op) {
if (SDValue Expanded = TLI.expandAddSubSat(Op.getNode(), DAG))
return Expanded;
return DAG.UnrollVectorOp(Op.getNode());
}
+SDValue VectorLegalizer::ExpandFixedPointMul(SDValue Op) {
+ if (SDValue Expanded = TLI.expandFixedPointMul(Op.getNode(), DAG))
+ return Expanded;
+ return DAG.UnrollVectorOp(Op.getNode());
+}
+
SDValue VectorLegalizer::ExpandStrictFPOp(SDValue Op) {
EVT VT = Op.getValueType();
EVT EltVT = VT.getVectorElementType();
@@ -1245,7 +1368,7 @@ SDValue VectorLegalizer::ExpandStrictFPOp(SDValue Op) {
if (OperVT.isVector())
Oper = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
- EltVT, Oper, Idx);
+ OperVT.getVectorElementType(), Oper, Idx);
Opers.push_back(Oper);
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index f367e9358576..7e4d52617977 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -1,9 +1,8 @@
//===------- LegalizeVectorTypes.cpp - Legalization of vector types -------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -51,6 +50,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::BITCAST: R = ScalarizeVecRes_BITCAST(N); break;
case ISD::BUILD_VECTOR: R = ScalarizeVecRes_BUILD_VECTOR(N); break;
case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break;
+ case ISD::STRICT_FP_ROUND: R = ScalarizeVecRes_STRICT_FP_ROUND(N); break;
case ISD::FP_ROUND: R = ScalarizeVecRes_FP_ROUND(N); break;
case ISD::FP_ROUND_INREG: R = ScalarizeVecRes_InregOp(N); break;
case ISD::FPOWI: R = ScalarizeVecRes_FPOWI(N); break;
@@ -69,6 +69,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::ZERO_EXTEND_VECTOR_INREG:
R = ScalarizeVecRes_VecInregOp(N);
break;
+ case ISD::ABS:
case ISD::ANY_EXTEND:
case ISD::BITREVERSE:
case ISD::BSWAP:
@@ -170,10 +171,21 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::STRICT_FFLOOR:
case ISD::STRICT_FROUND:
case ISD::STRICT_FTRUNC:
+ case ISD::STRICT_FP_EXTEND:
R = ScalarizeVecRes_StrictFPOp(N);
break;
+ case ISD::UADDO:
+ case ISD::SADDO:
+ case ISD::USUBO:
+ case ISD::SSUBO:
+ case ISD::UMULO:
+ case ISD::SMULO:
+ R = ScalarizeVecRes_OverflowOp(N, ResNo);
+ break;
case ISD::SMULFIX:
- R = ScalarizeVecRes_SMULFIX(N);
+ case ISD::SMULFIXSAT:
+ case ISD::UMULFIX:
+ R = ScalarizeVecRes_MULFIX(N);
break;
}
@@ -197,7 +209,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_TernaryOp(SDNode *N) {
Op0.getValueType(), Op0, Op1, Op2);
}
-SDValue DAGTypeLegalizer::ScalarizeVecRes_SMULFIX(SDNode *N) {
+SDValue DAGTypeLegalizer::ScalarizeVecRes_MULFIX(SDNode *N) {
SDValue Op0 = GetScalarizedVector(N->getOperand(0));
SDValue Op1 = GetScalarizedVector(N->getOperand(1));
SDValue Op2 = N->getOperand(2);
@@ -235,6 +247,43 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_StrictFPOp(SDNode *N) {
return Result;
}
+SDValue DAGTypeLegalizer::ScalarizeVecRes_OverflowOp(SDNode *N,
+ unsigned ResNo) {
+ SDLoc DL(N);
+ EVT ResVT = N->getValueType(0);
+ EVT OvVT = N->getValueType(1);
+
+ SDValue ScalarLHS, ScalarRHS;
+ if (getTypeAction(ResVT) == TargetLowering::TypeScalarizeVector) {
+ ScalarLHS = GetScalarizedVector(N->getOperand(0));
+ ScalarRHS = GetScalarizedVector(N->getOperand(1));
+ } else {
+ SmallVector<SDValue, 1> ElemsLHS, ElemsRHS;
+ DAG.ExtractVectorElements(N->getOperand(0), ElemsLHS);
+ DAG.ExtractVectorElements(N->getOperand(1), ElemsRHS);
+ ScalarLHS = ElemsLHS[0];
+ ScalarRHS = ElemsRHS[0];
+ }
+
+ SDVTList ScalarVTs = DAG.getVTList(
+ ResVT.getVectorElementType(), OvVT.getVectorElementType());
+ SDNode *ScalarNode = DAG.getNode(
+ N->getOpcode(), DL, ScalarVTs, ScalarLHS, ScalarRHS).getNode();
+
+ // Replace the other vector result not being explicitly scalarized here.
+ unsigned OtherNo = 1 - ResNo;
+ EVT OtherVT = N->getValueType(OtherNo);
+ if (getTypeAction(OtherVT) == TargetLowering::TypeScalarizeVector) {
+ SetScalarizedVector(SDValue(N, OtherNo), SDValue(ScalarNode, OtherNo));
+ } else {
+ SDValue OtherVal = DAG.getNode(
+ ISD::SCALAR_TO_VECTOR, DL, OtherVT, SDValue(ScalarNode, OtherNo));
+ ReplaceValueWith(SDValue(N, OtherNo), OtherVal);
+ }
+
+ return SDValue(ScalarNode, ResNo);
+}
+
SDValue DAGTypeLegalizer::ScalarizeVecRes_MERGE_VALUES(SDNode *N,
unsigned ResNo) {
SDValue Op = DisintegrateMERGE_VALUES(N, ResNo);
@@ -275,6 +324,18 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_FP_ROUND(SDNode *N) {
NewVT, Op, N->getOperand(1));
}
+SDValue DAGTypeLegalizer::ScalarizeVecRes_STRICT_FP_ROUND(SDNode *N) {
+ EVT NewVT = N->getValueType(0).getVectorElementType();
+ SDValue Op = GetScalarizedVector(N->getOperand(1));
+ SDValue Res = DAG.getNode(ISD::STRICT_FP_ROUND, SDLoc(N),
+ { NewVT, MVT::Other },
+ { N->getOperand(0), Op, N->getOperand(2) });
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+}
+
SDValue DAGTypeLegalizer::ScalarizeVecRes_FPOWI(SDNode *N) {
SDValue Op = GetScalarizedVector(N->getOperand(0));
return DAG.getNode(ISD::FPOWI, SDLoc(N),
@@ -558,9 +619,27 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::STORE:
Res = ScalarizeVecOp_STORE(cast<StoreSDNode>(N), OpNo);
break;
+ case ISD::STRICT_FP_ROUND:
+ Res = ScalarizeVecOp_STRICT_FP_ROUND(N, OpNo);
+ break;
case ISD::FP_ROUND:
Res = ScalarizeVecOp_FP_ROUND(N, OpNo);
break;
+ case ISD::VECREDUCE_FADD:
+ case ISD::VECREDUCE_FMUL:
+ case ISD::VECREDUCE_ADD:
+ case ISD::VECREDUCE_MUL:
+ case ISD::VECREDUCE_AND:
+ case ISD::VECREDUCE_OR:
+ case ISD::VECREDUCE_XOR:
+ case ISD::VECREDUCE_SMAX:
+ case ISD::VECREDUCE_SMIN:
+ case ISD::VECREDUCE_UMAX:
+ case ISD::VECREDUCE_UMIN:
+ case ISD::VECREDUCE_FMAX:
+ case ISD::VECREDUCE_FMIN:
+ Res = ScalarizeVecOp_VECREDUCE(N);
+ break;
}
}
@@ -691,6 +770,28 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo) {
return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res);
}
+SDValue DAGTypeLegalizer::ScalarizeVecOp_STRICT_FP_ROUND(SDNode *N,
+ unsigned OpNo) {
+ assert(OpNo == 1 && "Wrong operand for scalarization!");
+ SDValue Elt = GetScalarizedVector(N->getOperand(1));
+ SDValue Res = DAG.getNode(ISD::STRICT_FP_ROUND, SDLoc(N),
+ { N->getValueType(0).getVectorElementType(),
+ MVT::Other },
+ { N->getOperand(0), Elt, N->getOperand(2) });
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res);
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecOp_VECREDUCE(SDNode *N) {
+ SDValue Res = GetScalarizedVector(N->getOperand(0));
+ // Result type may be wider than element type.
+ if (Res.getValueType() != N->getValueType(0))
+ Res = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), N->getValueType(0), Res);
+ return Res;
+}
+
//===----------------------------------------------------------------------===//
// Result Vector Splitting
//===----------------------------------------------------------------------===//
@@ -748,6 +849,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::VECTOR_SHUFFLE:
SplitVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N), Lo, Hi);
break;
+ case ISD::VAARG:
+ SplitVecRes_VAARG(N, Lo, Hi);
+ break;
case ISD::ANY_EXTEND_VECTOR_INREG:
case ISD::SIGN_EXTEND_VECTOR_INREG:
@@ -755,6 +859,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
SplitVecRes_ExtVecInRegOp(N, Lo, Hi);
break;
+ case ISD::ABS:
case ISD::BITREVERSE:
case ISD::BSWAP:
case ISD::CTLZ:
@@ -774,7 +879,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FNEARBYINT:
case ISD::FNEG:
case ISD::FP_EXTEND:
+ case ISD::STRICT_FP_EXTEND:
case ISD::FP_ROUND:
+ case ISD::STRICT_FP_ROUND:
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
case ISD::FRINT:
@@ -859,8 +966,18 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::STRICT_FTRUNC:
SplitVecRes_StrictFPOp(N, Lo, Hi);
break;
+ case ISD::UADDO:
+ case ISD::SADDO:
+ case ISD::USUBO:
+ case ISD::SSUBO:
+ case ISD::UMULO:
+ case ISD::SMULO:
+ SplitVecRes_OverflowOp(N, ResNo, Lo, Hi);
+ break;
case ISD::SMULFIX:
- SplitVecRes_SMULFIX(N, Lo, Hi);
+ case ISD::SMULFIXSAT:
+ case ISD::UMULFIX:
+ SplitVecRes_MULFIX(N, Lo, Hi);
break;
}
@@ -899,8 +1016,7 @@ void DAGTypeLegalizer::SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo,
Op0Hi, Op1Hi, Op2Hi);
}
-void DAGTypeLegalizer::SplitVecRes_SMULFIX(SDNode *N, SDValue &Lo,
- SDValue &Hi) {
+void DAGTypeLegalizer::SplitVecRes_MULFIX(SDNode *N, SDValue &Lo, SDValue &Hi) {
SDValue LHSLo, LHSHi;
GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
SDValue RHSLo, RHSHi;
@@ -1205,6 +1321,104 @@ void DAGTypeLegalizer::SplitVecRes_StrictFPOp(SDNode *N, SDValue &Lo,
ReplaceValueWith(SDValue(N, 1), Chain);
}
+SDValue DAGTypeLegalizer::UnrollVectorOp_StrictFP(SDNode *N, unsigned ResNE) {
+ SDValue Chain = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ unsigned NE = VT.getVectorNumElements();
+ EVT EltVT = VT.getVectorElementType();
+ SDLoc dl(N);
+
+ SmallVector<SDValue, 8> Scalars;
+ SmallVector<SDValue, 4> Operands(N->getNumOperands());
+
+ // If ResNE is 0, fully unroll the vector op.
+ if (ResNE == 0)
+ ResNE = NE;
+ else if (NE > ResNE)
+ NE = ResNE;
+
+ //The results of each unrolled operation, including the chain.
+ EVT ChainVTs[] = {EltVT, MVT::Other};
+ SmallVector<SDValue, 8> Chains;
+
+ unsigned i;
+ for (i = 0; i != NE; ++i) {
+ Operands[0] = Chain;
+ for (unsigned j = 1, e = N->getNumOperands(); j != e; ++j) {
+ SDValue Operand = N->getOperand(j);
+ EVT OperandVT = Operand.getValueType();
+ if (OperandVT.isVector()) {
+ EVT OperandEltVT = OperandVT.getVectorElementType();
+ Operands[j] =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, OperandEltVT, Operand,
+ DAG.getConstant(i, dl, TLI.getVectorIdxTy(
+ DAG.getDataLayout())));
+ } else {
+ Operands[j] = Operand;
+ }
+ }
+ SDValue Scalar = DAG.getNode(N->getOpcode(), dl, ChainVTs, Operands);
+ Scalar.getNode()->setFlags(N->getFlags());
+
+ //Add in the scalar as well as its chain value to the
+ //result vectors.
+ Scalars.push_back(Scalar);
+ Chains.push_back(Scalar.getValue(1));
+ }
+
+ for (; i < ResNE; ++i)
+ Scalars.push_back(DAG.getUNDEF(EltVT));
+
+ // Build a new factor node to connect the chain back together.
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
+ ReplaceValueWith(SDValue(N, 1), Chain);
+
+ // Create a new BUILD_VECTOR node
+ EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, ResNE);
+ return DAG.getBuildVector(VecVT, dl, Scalars);
+}
+
+void DAGTypeLegalizer::SplitVecRes_OverflowOp(SDNode *N, unsigned ResNo,
+ SDValue &Lo, SDValue &Hi) {
+ SDLoc dl(N);
+ EVT ResVT = N->getValueType(0);
+ EVT OvVT = N->getValueType(1);
+ EVT LoResVT, HiResVT, LoOvVT, HiOvVT;
+ std::tie(LoResVT, HiResVT) = DAG.GetSplitDestVTs(ResVT);
+ std::tie(LoOvVT, HiOvVT) = DAG.GetSplitDestVTs(OvVT);
+
+ SDValue LoLHS, HiLHS, LoRHS, HiRHS;
+ if (getTypeAction(ResVT) == TargetLowering::TypeSplitVector) {
+ GetSplitVector(N->getOperand(0), LoLHS, HiLHS);
+ GetSplitVector(N->getOperand(1), LoRHS, HiRHS);
+ } else {
+ std::tie(LoLHS, HiLHS) = DAG.SplitVectorOperand(N, 0);
+ std::tie(LoRHS, HiRHS) = DAG.SplitVectorOperand(N, 1);
+ }
+
+ unsigned Opcode = N->getOpcode();
+ SDVTList LoVTs = DAG.getVTList(LoResVT, LoOvVT);
+ SDVTList HiVTs = DAG.getVTList(HiResVT, HiOvVT);
+ SDNode *LoNode = DAG.getNode(Opcode, dl, LoVTs, LoLHS, LoRHS).getNode();
+ SDNode *HiNode = DAG.getNode(Opcode, dl, HiVTs, HiLHS, HiRHS).getNode();
+
+ Lo = SDValue(LoNode, ResNo);
+ Hi = SDValue(HiNode, ResNo);
+
+ // Replace the other vector result not being explicitly split here.
+ unsigned OtherNo = 1 - ResNo;
+ EVT OtherVT = N->getValueType(OtherNo);
+ if (getTypeAction(OtherVT) == TargetLowering::TypeSplitVector) {
+ SetSplitVector(SDValue(N, OtherNo),
+ SDValue(LoNode, OtherNo), SDValue(HiNode, OtherNo));
+ } else {
+ SDValue OtherVal = DAG.getNode(
+ ISD::CONCAT_VECTORS, dl, OtherVT,
+ SDValue(LoNode, OtherNo), SDValue(HiNode, OtherNo));
+ ReplaceValueWith(SDValue(N, OtherNo), OtherVal);
+ }
+}
+
void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDValue Vec = N->getOperand(0);
@@ -1344,12 +1558,6 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
unsigned Alignment = MLD->getOriginalAlignment();
ISD::LoadExtType ExtType = MLD->getExtensionType();
- // if Alignment is equal to the vector size,
- // take the half of it for the second part
- unsigned SecondHalfAlignment =
- (Alignment == MLD->getValueType(0).getSizeInBits()/8) ?
- Alignment/2 : Alignment;
-
// Split Mask operand
SDValue MaskLo, MaskHi;
if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
@@ -1381,7 +1589,7 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
MMO = DAG.getMachineFunction().getMachineMemOperand(
MLD->getPointerInfo().getWithOffset(HiOffset), MachineMemOperand::MOLoad,
- HiMemVT.getStoreSize(), SecondHalfAlignment, MLD->getAAInfo(),
+ HiMemVT.getStoreSize(), Alignment, MLD->getAAInfo(),
MLD->getRanges());
Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, MaskHi, PassThruHi, HiMemVT, MMO,
@@ -1496,15 +1704,34 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo,
// If the input also splits, handle it directly for a compile time speedup.
// Otherwise split it by hand.
- EVT InVT = N->getOperand(0).getValueType();
+ unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
+ EVT InVT = N->getOperand(OpNo).getValueType();
if (getTypeAction(InVT) == TargetLowering::TypeSplitVector)
- GetSplitVector(N->getOperand(0), Lo, Hi);
+ GetSplitVector(N->getOperand(OpNo), Lo, Hi);
else
- std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0);
+ std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, OpNo);
if (N->getOpcode() == ISD::FP_ROUND) {
Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getOperand(1));
Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi, N->getOperand(1));
+ } else if (N->getOpcode() == ISD::STRICT_FP_ROUND) {
+ Lo = DAG.getNode(N->getOpcode(), dl, { LoVT, MVT::Other },
+ { N->getOperand(0), Lo, N->getOperand(2) });
+ Hi = DAG.getNode(N->getOpcode(), dl, { HiVT, MVT::Other },
+ { N->getOperand(0), Hi, N->getOperand(2) });
+ SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ Lo.getValue(1), Hi.getValue(1));
+ ReplaceValueWith(SDValue(N, 1), NewChain);
+ } else if (N->isStrictFPOpcode()) {
+ Lo = DAG.getNode(N->getOpcode(), dl, { LoVT, MVT::Other },
+ { N->getOperand(0), Lo });
+ Hi = DAG.getNode(N->getOpcode(), dl, { HiVT, MVT::Other },
+ { N->getOperand(0), Hi });
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ Lo.getValue(1), Hi.getValue(1));
+ ReplaceValueWith(SDValue(N, 1), NewChain);
} else {
Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo);
Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi);
@@ -1669,6 +1896,26 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
}
}
+void DAGTypeLegalizer::SplitVecRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ EVT OVT = N->getValueType(0);
+ EVT NVT = OVT.getHalfNumVectorElementsVT(*DAG.getContext());
+ SDValue Chain = N->getOperand(0);
+ SDValue Ptr = N->getOperand(1);
+ SDValue SV = N->getOperand(2);
+ SDLoc dl(N);
+
+ const unsigned Alignment = DAG.getDataLayout().getABITypeAlignment(
+ NVT.getTypeForEVT(*DAG.getContext()));
+
+ Lo = DAG.getVAArg(NVT, dl, Chain, Ptr, SV, Alignment);
+ Hi = DAG.getVAArg(NVT, dl, Lo.getValue(1), Ptr, SV, Alignment);
+ Chain = Hi.getValue(1);
+
+ // Modified the chain - switch anything that used the old chain to use
+ // the new one.
+ ReplaceValueWith(SDValue(N, 1), Chain);
+}
+
//===----------------------------------------------------------------------===//
// Operand Vector Splitting
@@ -1705,6 +1952,7 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::TRUNCATE:
Res = SplitVecOp_TruncateHelper(N);
break;
+ case ISD::STRICT_FP_ROUND:
case ISD::FP_ROUND: Res = SplitVecOp_FP_ROUND(N); break;
case ISD::FCOPYSIGN: Res = SplitVecOp_FCOPYSIGN(N); break;
case ISD::STORE:
@@ -1734,6 +1982,7 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::CTTZ:
case ISD::CTLZ:
case ISD::CTPOP:
+ case ISD::STRICT_FP_EXTEND:
case ISD::FP_EXTEND:
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
@@ -1775,7 +2024,11 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
if (Res.getNode() == N)
return true;
- assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ if (N->isStrictFPOpcode())
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 2 &&
+ "Invalid operand expansion");
+ else
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
"Invalid operand expansion");
ReplaceValueWith(SDValue(N, 0), Res);
@@ -1863,14 +2116,30 @@ SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) {
EVT ResVT = N->getValueType(0);
SDValue Lo, Hi;
SDLoc dl(N);
- GetSplitVector(N->getOperand(0), Lo, Hi);
+ GetSplitVector(N->getOperand(N->isStrictFPOpcode() ? 1 : 0), Lo, Hi);
EVT InVT = Lo.getValueType();
EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(),
InVT.getVectorNumElements());
- Lo = DAG.getNode(N->getOpcode(), dl, OutVT, Lo);
- Hi = DAG.getNode(N->getOpcode(), dl, OutVT, Hi);
+ if (N->isStrictFPOpcode()) {
+ Lo = DAG.getNode(N->getOpcode(), dl, { OutVT, MVT::Other },
+ { N->getOperand(0), Lo });
+ Hi = DAG.getNode(N->getOpcode(), dl, { OutVT, MVT::Other },
+ { N->getOperand(0), Hi });
+
+ // Build a factor node to remember that this operation is independent
+ // of the other one.
+ SDValue Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Ch);
+ } else {
+ Lo = DAG.getNode(N->getOpcode(), dl, OutVT, Lo);
+ Hi = DAG.getNode(N->getOpcode(), dl, OutVT, Hi);
+ }
return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi);
}
@@ -1920,7 +2189,6 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
if (isa<ConstantSDNode>(Idx)) {
uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
- assert(IdxVal < VecVT.getVectorNumElements() && "Invalid vector index!");
SDValue Lo, Hi;
GetSplitVector(Vec, Lo, Hi);
@@ -2079,12 +2347,6 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
else
std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL);
- // if Alignment is equal to the vector size,
- // take the half of it for the second part
- unsigned SecondHalfAlignment =
- (Alignment == Data->getValueType(0).getSizeInBits()/8) ?
- Alignment/2 : Alignment;
-
SDValue Lo, Hi;
MachineMemOperand *MMO = DAG.getMachineFunction().
getMachineMemOperand(N->getPointerInfo(),
@@ -2101,7 +2363,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
MMO = DAG.getMachineFunction().getMachineMemOperand(
N->getPointerInfo().getWithOffset(HiOffset), MachineMemOperand::MOStore,
- HiMemVT.getStoreSize(), SecondHalfAlignment, N->getAAInfo(),
+ HiMemVT.getStoreSize(), Alignment, N->getAAInfo(),
N->getRanges());
Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
@@ -2343,14 +2605,26 @@ SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) {
EVT ResVT = N->getValueType(0);
SDValue Lo, Hi;
SDLoc DL(N);
- GetSplitVector(N->getOperand(0), Lo, Hi);
+ GetSplitVector(N->getOperand(N->isStrictFPOpcode() ? 1 : 0), Lo, Hi);
EVT InVT = Lo.getValueType();
EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(),
InVT.getVectorNumElements());
- Lo = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Lo, N->getOperand(1));
- Hi = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Hi, N->getOperand(1));
+ if (N->isStrictFPOpcode()) {
+ Lo = DAG.getNode(N->getOpcode(), DL, { OutVT, MVT::Other },
+ { N->getOperand(0), Lo, N->getOperand(2) });
+ Hi = DAG.getNode(N->getOpcode(), DL, { OutVT, MVT::Other },
+ { N->getOperand(0), Hi, N->getOperand(2) });
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
+ Lo.getValue(1), Hi.getValue(1));
+ ReplaceValueWith(SDValue(N, 1), NewChain);
+ } else {
+ Lo = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Lo, N->getOperand(1));
+ Hi = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Hi, N->getOperand(1));
+ }
return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
}
@@ -2472,6 +2746,15 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
Res = WidenVecRes_StrictFP(N);
break;
+ case ISD::UADDO:
+ case ISD::SADDO:
+ case ISD::USUBO:
+ case ISD::SSUBO:
+ case ISD::UMULO:
+ case ISD::SMULO:
+ Res = WidenVecRes_OverflowOp(N, ResNo);
+ break;
+
case ISD::FCOPYSIGN:
Res = WidenVecRes_FCOPYSIGN(N);
break;
@@ -2505,6 +2788,11 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
Res = WidenVecRes_Convert(N);
break;
+ case ISD::STRICT_FP_EXTEND:
+ case ISD::STRICT_FP_ROUND:
+ Res = WidenVecRes_Convert_StrictFP(N);
+ break;
+
case ISD::FABS:
case ISD::FCEIL:
case ISD::FCOS:
@@ -2523,13 +2811,11 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
// We're going to widen this vector op to a legal type by padding with undef
// elements. If the wide vector op is eventually going to be expanded to
// scalar libcalls, then unroll into scalar ops now to avoid unnecessary
- // libcalls on the undef elements. We are assuming that if the scalar op
- // requires expanding, then the vector op needs expanding too.
+ // libcalls on the undef elements.
EVT VT = N->getValueType(0);
- if (TLI.isOperationExpand(N->getOpcode(), VT.getScalarType())) {
- EVT WideVecVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
- assert(!TLI.isOperationLegalOrCustom(N->getOpcode(), WideVecVT) &&
- "Target supports vector op, but scalar requires expansion?");
+ EVT WideVecVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ if (!TLI.isOperationLegalOrCustom(N->getOpcode(), WideVecVT) &&
+ TLI.isOperationExpand(N->getOpcode(), VT.getScalarType())) {
Res = DAG.UnrollVectorOp(N, WideVecVT.getVectorNumElements());
break;
}
@@ -2539,11 +2825,14 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
// any other unary ops.
LLVM_FALLTHROUGH;
+ case ISD::ABS:
case ISD::BITREVERSE:
case ISD::BSWAP:
case ISD::CTLZ:
+ case ISD::CTLZ_ZERO_UNDEF:
case ISD::CTPOP:
case ISD::CTTZ:
+ case ISD::CTTZ_ZERO_UNDEF:
case ISD::FNEG:
case ISD::FCANONICALIZE:
Res = WidenVecRes_Unary(N);
@@ -2593,14 +2882,13 @@ static SDValue CollectOpsToWiden(SelectionDAG &DAG, const TargetLowering &TLI,
SDLoc dl(ConcatOps[0]);
EVT WidenEltVT = WidenVT.getVectorElementType();
- int Idx = 0;
// while (Some element of ConcatOps is not of type MaxVT) {
// From the end of ConcatOps, collect elements of the same type and put
// them into an op of the next larger supported type
// }
while (ConcatOps[ConcatEnd-1].getValueType() != MaxVT) {
- Idx = ConcatEnd - 1;
+ int Idx = ConcatEnd - 1;
VT = ConcatOps[Idx--].getValueType();
while (Idx >= 0 && ConcatOps[Idx].getValueType() == VT)
Idx--;
@@ -2750,7 +3038,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_StrictFP(SDNode *N) {
// No legal vector version so unroll the vector operation and then widen.
if (NumElts == 1)
- return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements());
+ return UnrollVectorOp_StrictFP(N, WidenVT.getVectorNumElements());
// Since the operation can trap, apply operation on the original vector.
EVT MaxVT = VT;
@@ -2846,6 +3134,58 @@ SDValue DAGTypeLegalizer::WidenVecRes_StrictFP(SDNode *N) {
return CollectOpsToWiden(DAG, TLI, ConcatOps, ConcatEnd, VT, MaxVT, WidenVT);
}
+SDValue DAGTypeLegalizer::WidenVecRes_OverflowOp(SDNode *N, unsigned ResNo) {
+ SDLoc DL(N);
+ EVT ResVT = N->getValueType(0);
+ EVT OvVT = N->getValueType(1);
+ EVT WideResVT, WideOvVT;
+ SDValue WideLHS, WideRHS;
+
+ // TODO: This might result in a widen/split loop.
+ if (ResNo == 0) {
+ WideResVT = TLI.getTypeToTransformTo(*DAG.getContext(), ResVT);
+ WideOvVT = EVT::getVectorVT(
+ *DAG.getContext(), OvVT.getVectorElementType(),
+ WideResVT.getVectorNumElements());
+
+ WideLHS = GetWidenedVector(N->getOperand(0));
+ WideRHS = GetWidenedVector(N->getOperand(1));
+ } else {
+ WideOvVT = TLI.getTypeToTransformTo(*DAG.getContext(), OvVT);
+ WideResVT = EVT::getVectorVT(
+ *DAG.getContext(), ResVT.getVectorElementType(),
+ WideOvVT.getVectorNumElements());
+
+ SDValue Zero = DAG.getConstant(
+ 0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()));
+ WideLHS = DAG.getNode(
+ ISD::INSERT_SUBVECTOR, DL, WideResVT, DAG.getUNDEF(WideResVT),
+ N->getOperand(0), Zero);
+ WideRHS = DAG.getNode(
+ ISD::INSERT_SUBVECTOR, DL, WideResVT, DAG.getUNDEF(WideResVT),
+ N->getOperand(1), Zero);
+ }
+
+ SDVTList WideVTs = DAG.getVTList(WideResVT, WideOvVT);
+ SDNode *WideNode = DAG.getNode(
+ N->getOpcode(), DL, WideVTs, WideLHS, WideRHS).getNode();
+
+ // Replace the other vector result not being explicitly widened here.
+ unsigned OtherNo = 1 - ResNo;
+ EVT OtherVT = N->getValueType(OtherNo);
+ if (getTypeAction(OtherVT) == TargetLowering::TypeWidenVector) {
+ SetWidenedVector(SDValue(N, OtherNo), SDValue(WideNode, OtherNo));
+ } else {
+ SDValue Zero = DAG.getConstant(
+ 0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()));
+ SDValue OtherVal = DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, DL, OtherVT, SDValue(WideNode, OtherNo), Zero);
+ ReplaceValueWith(SDValue(N, OtherNo), OtherVal);
+ }
+
+ return SDValue(WideNode, ResNo);
+}
+
SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
SDValue InOp = N->getOperand(0);
SDLoc DL(N);
@@ -2929,6 +3269,43 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
return DAG.getBuildVector(WidenVT, DL, Ops);
}
+SDValue DAGTypeLegalizer::WidenVecRes_Convert_StrictFP(SDNode *N) {
+ SDValue InOp = N->getOperand(1);
+ SDLoc DL(N);
+ SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());
+
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+ SmallVector<EVT, 2> WidenVTs = { WidenVT, MVT::Other };
+
+ EVT InVT = InOp.getValueType();
+ EVT InEltVT = InVT.getVectorElementType();
+
+ unsigned Opcode = N->getOpcode();
+
+ // FIXME: Optimizations need to be implemented here.
+
+ // Otherwise unroll into some nasty scalar code and rebuild the vector.
+ EVT EltVT = WidenVT.getVectorElementType();
+ SmallVector<EVT, 2> EltVTs = { EltVT, MVT::Other };
+ SmallVector<SDValue, 16> Ops(WidenNumElts, DAG.getUNDEF(EltVT));
+ SmallVector<SDValue, 32> OpChains;
+ // Use the original element count so we don't do more scalar opts than
+ // necessary.
+ unsigned MinElts = N->getValueType(0).getVectorNumElements();
+ for (unsigned i=0; i < MinElts; ++i) {
+ NewOps[1] = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp,
+ DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ Ops[i] = DAG.getNode(Opcode, DL, EltVTs, NewOps);
+ OpChains.push_back(Ops[i].getValue(1));
+ }
+ SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OpChains);
+ ReplaceValueWith(SDValue(N, 1), NewChain);
+
+ return DAG.getBuildVector(WidenVT, DL, Ops);
+}
+
SDValue DAGTypeLegalizer::WidenVecRes_EXTEND_VECTOR_INREG(SDNode *N) {
unsigned Opcode = N->getOpcode();
SDValue InOp = N->getOperand(0);
@@ -3654,8 +4031,15 @@ SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) {
return Res;
}
- InOp1 = GetWidenedVector(InOp1);
- SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+ // If the inputs also widen, handle them directly. Otherwise widen by hand.
+ SDValue InOp2 = N->getOperand(1);
+ if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) {
+ InOp1 = GetWidenedVector(InOp1);
+ InOp2 = GetWidenedVector(InOp2);
+ } else {
+ InOp1 = DAG.WidenVector(InOp1, SDLoc(N));
+ InOp2 = DAG.WidenVector(InOp2, SDLoc(N));
+ }
// Assume that the input and output will be widen appropriately. If not,
// we will have to unroll it at some point.
@@ -3698,6 +4082,7 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::MGATHER: Res = WidenVecOp_MGATHER(N, OpNo); break;
case ISD::MSCATTER: Res = WidenVecOp_MSCATTER(N, OpNo); break;
case ISD::SETCC: Res = WidenVecOp_SETCC(N); break;
+ case ISD::VSELECT: Res = WidenVecOp_VSELECT(N); break;
case ISD::FCOPYSIGN: Res = WidenVecOp_FCOPYSIGN(N); break;
case ISD::ANY_EXTEND:
@@ -3707,6 +4092,7 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
break;
case ISD::FP_EXTEND:
+ case ISD::STRICT_FP_EXTEND:
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
case ISD::SINT_TO_FP:
@@ -3714,6 +4100,22 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::TRUNCATE:
Res = WidenVecOp_Convert(N);
break;
+
+ case ISD::VECREDUCE_FADD:
+ case ISD::VECREDUCE_FMUL:
+ case ISD::VECREDUCE_ADD:
+ case ISD::VECREDUCE_MUL:
+ case ISD::VECREDUCE_AND:
+ case ISD::VECREDUCE_OR:
+ case ISD::VECREDUCE_XOR:
+ case ISD::VECREDUCE_SMAX:
+ case ISD::VECREDUCE_SMIN:
+ case ISD::VECREDUCE_UMAX:
+ case ISD::VECREDUCE_UMIN:
+ case ISD::VECREDUCE_FMAX:
+ case ISD::VECREDUCE_FMIN:
+ Res = WidenVecOp_VECREDUCE(N);
+ break;
}
// If Res is null, the sub-method took care of registering the result.
@@ -3725,8 +4127,12 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
return true;
- assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
- "Invalid operand expansion");
+ if (N->isStrictFPOpcode())
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 2 &&
+ "Invalid operand expansion");
+ else
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ "Invalid operand expansion");
ReplaceValueWith(SDValue(N, 0), Res);
return false;
@@ -3806,7 +4212,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
EVT EltVT = VT.getVectorElementType();
SDLoc dl(N);
unsigned NumElts = VT.getVectorNumElements();
- SDValue InOp = N->getOperand(0);
+ SDValue InOp = N->getOperand(N->isStrictFPOpcode() ? 1 : 0);
assert(getTypeAction(InOp.getValueType()) ==
TargetLowering::TypeWidenVector &&
"Unexpected type action");
@@ -3815,10 +4221,19 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
unsigned Opcode = N->getOpcode();
// See if a widened result type would be legal, if so widen the node.
+ // FIXME: This isn't safe for StrictFP. Other optimization here is needed.
EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
InVT.getVectorNumElements());
- if (TLI.isTypeLegal(WideVT)) {
- SDValue Res = DAG.getNode(Opcode, dl, WideVT, InOp);
+ if (TLI.isTypeLegal(WideVT) && !N->isStrictFPOpcode()) {
+ SDValue Res;
+ if (N->isStrictFPOpcode()) {
+ Res = DAG.getNode(Opcode, dl, { WideVT, MVT::Other },
+ { N->getOperand(0), InOp });
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ } else
+ Res = DAG.getNode(Opcode, dl, WideVT, InOp);
return DAG.getNode(
ISD::EXTRACT_SUBVECTOR, dl, VT, Res,
DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
@@ -3828,12 +4243,26 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
// Unroll the convert into some scalar code and create a nasty build vector.
SmallVector<SDValue, 16> Ops(NumElts);
- for (unsigned i=0; i < NumElts; ++i)
- Ops[i] = DAG.getNode(
- Opcode, dl, EltVT,
- DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
- DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))));
+ if (N->isStrictFPOpcode()) {
+ SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());
+ SmallVector<SDValue, 32> OpChains;
+ for (unsigned i=0; i < NumElts; ++i) {
+ NewOps[1] = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
+ DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ Ops[i] = DAG.getNode(Opcode, dl, { EltVT, MVT::Other }, NewOps);
+ OpChains.push_back(Ops[i].getValue(1));
+ }
+ SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OpChains);
+ ReplaceValueWith(SDValue(N, 1), NewChain);
+ } else {
+ for (unsigned i = 0; i < NumElts; ++i)
+ Ops[i] = DAG.getNode(
+ Opcode, dl, EltVT,
+ DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
+ DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))));
+ }
return DAG.getBuildVector(VT, dl, Ops);
}
@@ -3859,6 +4288,24 @@ SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) {
}
}
+ // Handle a case like bitcast v12i8 -> v3i32. Normally that would get widened
+ // to v16i8 -> v4i32, but for a target where v3i32 is legal but v12i8 is not,
+ // we end up here. Handling the case here with EXTRACT_SUBVECTOR avoids
+ // having to copy via memory.
+ if (VT.isVector()) {
+ EVT EltVT = VT.getVectorElementType();
+ unsigned EltSize = EltVT.getSizeInBits();
+ if (InWidenSize % EltSize == 0) {
+ unsigned NewNumElts = InWidenSize / EltSize;
+ EVT NewVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NewNumElts);
+ if (TLI.isTypeLegal(NewVT)) {
+ SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, BitOp,
+ DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ }
+ }
+ }
+
return CreateStackStoreLoad(InOp, VT);
}
@@ -4000,10 +4447,9 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) {
SDValue Index = MSC->getIndex();
SDValue Scale = MSC->getScale();
- unsigned NumElts;
if (OpNo == 1) {
DataOp = GetWidenedVector(DataOp);
- NumElts = DataOp.getValueType().getVectorNumElements();
+ unsigned NumElts = DataOp.getValueType().getVectorNumElements();
// Widen index.
EVT IndexVT = Index.getValueType();
@@ -4041,8 +4487,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {
// Get a new SETCC node to compare the newly widened operands.
// Only some of the compared elements are legal.
- EVT SVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
- InOp0.getValueType());
+ EVT SVT = getSetCCResultType(InOp0.getValueType());
// The result type is legal, if its vXi1, keep vXi1 for the new SETCC.
if (VT.getScalarType() == MVT::i1)
SVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
@@ -4062,6 +4507,80 @@ SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {
return PromoteTargetBoolean(CC, VT);
}
+SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE(SDNode *N) {
+ SDLoc dl(N);
+ SDValue Op = GetWidenedVector(N->getOperand(0));
+ EVT OrigVT = N->getOperand(0).getValueType();
+ EVT WideVT = Op.getValueType();
+ EVT ElemVT = OrigVT.getVectorElementType();
+
+ SDValue NeutralElem;
+ switch (N->getOpcode()) {
+ case ISD::VECREDUCE_ADD:
+ case ISD::VECREDUCE_OR:
+ case ISD::VECREDUCE_XOR:
+ case ISD::VECREDUCE_UMAX:
+ NeutralElem = DAG.getConstant(0, dl, ElemVT);
+ break;
+ case ISD::VECREDUCE_MUL:
+ NeutralElem = DAG.getConstant(1, dl, ElemVT);
+ break;
+ case ISD::VECREDUCE_AND:
+ case ISD::VECREDUCE_UMIN:
+ NeutralElem = DAG.getAllOnesConstant(dl, ElemVT);
+ break;
+ case ISD::VECREDUCE_SMAX:
+ NeutralElem = DAG.getConstant(
+ APInt::getSignedMinValue(ElemVT.getSizeInBits()), dl, ElemVT);
+ break;
+ case ISD::VECREDUCE_SMIN:
+ NeutralElem = DAG.getConstant(
+ APInt::getSignedMaxValue(ElemVT.getSizeInBits()), dl, ElemVT);
+ break;
+ case ISD::VECREDUCE_FADD:
+ NeutralElem = DAG.getConstantFP(0.0, dl, ElemVT);
+ break;
+ case ISD::VECREDUCE_FMUL:
+ NeutralElem = DAG.getConstantFP(1.0, dl, ElemVT);
+ break;
+ case ISD::VECREDUCE_FMAX:
+ NeutralElem = DAG.getConstantFP(
+ std::numeric_limits<double>::infinity(), dl, ElemVT);
+ break;
+ case ISD::VECREDUCE_FMIN:
+ NeutralElem = DAG.getConstantFP(
+ -std::numeric_limits<double>::infinity(), dl, ElemVT);
+ break;
+ }
+
+ // Pad the vector with the neutral element.
+ unsigned OrigElts = OrigVT.getVectorNumElements();
+ unsigned WideElts = WideVT.getVectorNumElements();
+ for (unsigned Idx = OrigElts; Idx < WideElts; Idx++)
+ Op = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, WideVT, Op, NeutralElem,
+ DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+
+ return DAG.getNode(N->getOpcode(), dl, N->getValueType(0), Op, N->getFlags());
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_VSELECT(SDNode *N) {
+ // This only gets called in the case that the left and right inputs and
+ // result are of a legal odd vector type, and the condition is illegal i1 of
+ // the same odd width that needs widening.
+ EVT VT = N->getValueType(0);
+ assert(VT.isVector() && !VT.isPow2VectorType() && isTypeLegal(VT));
+
+ SDValue Cond = GetWidenedVector(N->getOperand(0));
+ SDValue LeftIn = DAG.WidenVector(N->getOperand(1), SDLoc(N));
+ SDValue RightIn = DAG.WidenVector(N->getOperand(2), SDLoc(N));
+ SDLoc DL(N);
+
+ SDValue Select = DAG.getNode(N->getOpcode(), DL, LeftIn.getValueType(), Cond,
+ LeftIn, RightIn);
+ return DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, DL, VT, Select,
+ DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+}
//===----------------------------------------------------------------------===//
// Vector Widening Utilities
@@ -4102,6 +4621,8 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
isPowerOf2_32(WidenWidth / MemVTWidth) &&
(MemVTWidth <= Width ||
(Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
+ if (MemVTWidth == WidenWidth)
+ return MemVT;
RetVT = MemVT;
break;
}
@@ -4113,7 +4634,10 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
VT >= (unsigned)MVT::FIRST_VECTOR_VALUETYPE; --VT) {
EVT MemVT = (MVT::SimpleValueType) VT;
unsigned MemVTWidth = MemVT.getSizeInBits();
- if (TLI.isTypeLegal(MemVT) && WidenEltVT == MemVT.getVectorElementType() &&
+ auto Action = TLI.getTypeAction(*DAG.getContext(), MemVT);
+ if ((Action == TargetLowering::TypeLegal ||
+ Action == TargetLowering::TypePromoteInteger) &&
+ WidenEltVT == MemVT.getVectorElementType() &&
(WidenWidth % MemVTWidth) == 0 &&
isPowerOf2_32(WidenWidth / MemVTWidth) &&
(MemVTWidth <= Width ||
diff --git a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
index 7f369c746d24..34660e3a48ec 100644
--- a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
+++ b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
@@ -1,9 +1,8 @@
//===- ResourcePriorityQueue.cpp - A DFA-oriented priority queue -*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -85,6 +84,7 @@ ResourcePriorityQueue::numberRCValPredInSU(SUnit *SU, unsigned RCId) {
case ISD::CopyFromReg: NumberDeps++; break;
case ISD::CopyToReg: break;
case ISD::INLINEASM: break;
+ case ISD::INLINEASM_BR: break;
}
if (!ScegN->isMachineOpcode())
continue;
@@ -121,6 +121,7 @@ unsigned ResourcePriorityQueue::numberRCValSuccInSU(SUnit *SU,
case ISD::CopyFromReg: break;
case ISD::CopyToReg: NumberDeps++; break;
case ISD::INLINEASM: break;
+ case ISD::INLINEASM_BR: break;
}
if (!ScegN->isMachineOpcode())
continue;
@@ -446,6 +447,7 @@ int ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) {
break;
case ISD::INLINEASM:
+ case ISD::INLINEASM_BR:
ResCount += PriorityThree;
break;
}
@@ -548,6 +550,7 @@ void ResourcePriorityQueue::initNumRegDefsLeft(SUnit *SU) {
NodeNumDefs++;
break;
case ISD::INLINEASM:
+ case ISD::INLINEASM_BR:
NodeNumDefs++;
break;
}
diff --git a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
index f7566b246f32..65b9d017fc5c 100644
--- a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
+++ b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
@@ -1,9 +1,8 @@
//===-- llvm/CodeGen/SDNodeDbgValue.h - SelectionDAG dbg_value --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -136,7 +135,8 @@ public:
/// dbg.addr is emitted twice.
void clearIsEmitted() { Emitted = false; }
- LLVM_DUMP_METHOD void dump(raw_ostream &OS) const;
+ LLVM_DUMP_METHOD void dump() const;
+ LLVM_DUMP_METHOD void print(raw_ostream &OS) const;
};
/// Holds the information from a dbg_label node through SDISel.
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index 90e109b022fd..2cb850fa1a3d 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -1,9 +1,8 @@
//===----- ScheduleDAGFast.cpp - Fast poor list scheduler -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -480,7 +479,8 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU,
}
for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
- if (Node->getOpcode() == ISD::INLINEASM) {
+ if (Node->getOpcode() == ISD::INLINEASM ||
+ Node->getOpcode() == ISD::INLINEASM_BR) {
// Inline asm can clobber physical defs.
unsigned NumOps = Node->getNumOperands();
if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 8d75b8133a30..34b4c8502353 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -1,9 +1,8 @@
//===- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler ------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -220,6 +219,14 @@ public:
return Topo.WillCreateCycle(SU, TargetSU);
}
+ /// AddPredQueued - Queues and update to add a predecessor edge to SUnit SU.
+ /// This returns true if this is a new predecessor.
+ /// Does *NOT* update the topological ordering! It just queues an update.
+ void AddPredQueued(SUnit *SU, const SDep &D) {
+ Topo.AddPredQueued(SU, D.getSUnit());
+ SU->addPred(D);
+ }
+
/// AddPred - adds a predecessor edge to SUnit SU.
/// This returns true if this is a new predecessor.
/// Updates the topological ordering if required.
@@ -267,24 +274,22 @@ private:
void ListScheduleBottomUp();
/// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
- /// Updates the topological ordering if required.
SUnit *CreateNewSUnit(SDNode *N) {
unsigned NumSUnits = SUnits.size();
SUnit *NewNode = newSUnit(N);
// Update the topological ordering.
if (NewNode->NodeNum >= NumSUnits)
- Topo.InitDAGTopologicalSorting();
+ Topo.MarkDirty();
return NewNode;
}
/// CreateClone - Creates a new SUnit from an existing one.
- /// Updates the topological ordering if required.
SUnit *CreateClone(SUnit *N) {
unsigned NumSUnits = SUnits.size();
SUnit *NewNode = Clone(N);
// Update the topological ordering.
if (NewNode->NodeNum >= NumSUnits)
- Topo.InitDAGTopologicalSorting();
+ Topo.MarkDirty();
return NewNode;
}
@@ -366,7 +371,7 @@ void ScheduleDAGRRList::Schedule() {
BuildSchedGraph(nullptr);
LLVM_DEBUG(dump());
- Topo.InitDAGTopologicalSorting();
+ Topo.MarkDirty();
AvailableQueue->initNodes(SUnits);
@@ -709,6 +714,7 @@ void ScheduleDAGRRList::EmitNode(SUnit *SU) {
// removed.
return;
case ISD::INLINEASM:
+ case ISD::INLINEASM_BR:
// For inline asm, clear the pipeline state.
HazardRec->Reset();
return;
@@ -1017,8 +1023,9 @@ SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) {
NewSU = &SUnits[N->getNodeId()];
// If NewSU has already been scheduled, we need to clone it, but this
// negates the benefit to unfolding so just return SU.
- if (NewSU->isScheduled)
+ if (NewSU->isScheduled) {
return SU;
+ }
isNewN = false;
} else {
NewSU = CreateNewSUnit(N);
@@ -1071,23 +1078,23 @@ SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) {
for (const SDep &Pred : ChainPreds) {
RemovePred(SU, Pred);
if (isNewLoad)
- AddPred(LoadSU, Pred);
+ AddPredQueued(LoadSU, Pred);
}
for (const SDep &Pred : LoadPreds) {
RemovePred(SU, Pred);
if (isNewLoad)
- AddPred(LoadSU, Pred);
+ AddPredQueued(LoadSU, Pred);
}
for (const SDep &Pred : NodePreds) {
RemovePred(SU, Pred);
- AddPred(NewSU, Pred);
+ AddPredQueued(NewSU, Pred);
}
for (SDep D : NodeSuccs) {
SUnit *SuccDep = D.getSUnit();
D.setSUnit(SU);
RemovePred(SuccDep, D);
D.setSUnit(NewSU);
- AddPred(SuccDep, D);
+ AddPredQueued(SuccDep, D);
// Balance register pressure.
if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled &&
!D.isCtrl() && NewSU->NumRegDefsLeft > 0)
@@ -1099,7 +1106,7 @@ SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) {
RemovePred(SuccDep, D);
if (isNewLoad) {
D.setSUnit(LoadSU);
- AddPred(SuccDep, D);
+ AddPredQueued(SuccDep, D);
}
}
@@ -1107,7 +1114,7 @@ SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) {
// by LoadSU.
SDep D(LoadSU, SDep::Data, 0);
D.setLatency(LoadSU->Latency);
- AddPred(NewSU, D);
+ AddPredQueued(NewSU, D);
if (isNewLoad)
AvailableQueue->addNode(LoadSU);
@@ -1179,7 +1186,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
// New SUnit has the exact same predecessors.
for (SDep &Pred : SU->Preds)
if (!Pred.isArtificial())
- AddPred(NewSU, Pred);
+ AddPredQueued(NewSU, Pred);
// Only copy scheduled successors. Cut them from old node's successor
// list and move them over.
@@ -1191,7 +1198,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
if (SuccSU->isScheduled) {
SDep D = Succ;
D.setSUnit(NewSU);
- AddPred(SuccSU, D);
+ AddPredQueued(SuccSU, D);
D.setSUnit(SU);
DelDeps.push_back(std::make_pair(SuccSU, D));
}
@@ -1230,14 +1237,14 @@ void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
if (SuccSU->isScheduled) {
SDep D = Succ;
D.setSUnit(CopyToSU);
- AddPred(SuccSU, D);
+ AddPredQueued(SuccSU, D);
DelDeps.push_back(std::make_pair(SuccSU, Succ));
}
else {
// Avoid scheduling the def-side copy before other successors. Otherwise
// we could introduce another physreg interference on the copy and
// continue inserting copies indefinitely.
- AddPred(SuccSU, SDep(CopyFromSU, SDep::Artificial));
+ AddPredQueued(SuccSU, SDep(CopyFromSU, SDep::Artificial));
}
}
for (auto &DelDep : DelDeps)
@@ -1245,10 +1252,10 @@ void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
SDep FromDep(SU, SDep::Data, Reg);
FromDep.setLatency(SU->Latency);
- AddPred(CopyFromSU, FromDep);
+ AddPredQueued(CopyFromSU, FromDep);
SDep ToDep(CopyFromSU, SDep::Data, 0);
ToDep.setLatency(CopyFromSU->Latency);
- AddPred(CopyToSU, ToDep);
+ AddPredQueued(CopyToSU, ToDep);
AvailableQueue->updateNode(SU);
AvailableQueue->addNode(CopyFromSU);
@@ -1348,7 +1355,8 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {
}
for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
- if (Node->getOpcode() == ISD::INLINEASM) {
+ if (Node->getOpcode() == ISD::INLINEASM ||
+ Node->getOpcode() == ISD::INLINEASM_BR) {
// Inline asm can clobber physical defs.
unsigned NumOps = Node->getNumOperands();
if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
@@ -1477,6 +1485,11 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
if (CurSU)
return CurSU;
+ // We query the topological order in the loop body, so make sure outstanding
+ // updates are applied before entering it (we only enter the loop if there
+ // are some interferences). If we make changes to the ordering, we exit
+ // the loop.
+
// All candidates are delayed due to live physical reg dependencies.
// Try backtracking, code duplication, or inserting cross class copies
// to resolve it.
@@ -1506,7 +1519,7 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
}
LLVM_DEBUG(dbgs() << "ARTIFICIAL edge from SU(" << BtSU->NodeNum
<< ") to SU(" << TrySU->NodeNum << ")\n");
- AddPred(TrySU, SDep(BtSU, SDep::Artificial));
+ AddPredQueued(TrySU, SDep(BtSU, SDep::Artificial));
// If one or more successors has been unscheduled, then the current
// node is no longer available.
@@ -1560,14 +1573,14 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
LLVM_DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum
<< " to SU #" << Copies.front()->NodeNum << "\n");
- AddPred(TrySU, SDep(Copies.front(), SDep::Artificial));
+ AddPredQueued(TrySU, SDep(Copies.front(), SDep::Artificial));
NewDef = Copies.back();
}
LLVM_DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum
<< " to SU #" << TrySU->NodeNum << "\n");
LiveRegDefs[Reg] = NewDef;
- AddPred(NewDef, SDep(TrySU, SDep::Artificial));
+ AddPredQueued(NewDef, SDep(TrySU, SDep::Artificial));
TrySU->isAvailable = false;
CurSU = NewDef;
}
@@ -2939,6 +2952,29 @@ void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
(cast<RegisterSDNode>(N->getOperand(1))->getReg()))
continue;
+ SDNode *PredFrameSetup = nullptr;
+ for (const SDep &Pred : SU.Preds)
+ if (Pred.isCtrl() && Pred.getSUnit()) {
+ // Find the predecessor which is not data dependence.
+ SDNode *PredND = Pred.getSUnit()->getNode();
+
+ // If PredND is FrameSetup, we should not pre-scheduled the node,
+ // or else, when bottom up scheduling, ADJCALLSTACKDOWN and
+ // ADJCALLSTACKUP may hold CallResource too long and make other
+ // calls can't be scheduled. If there's no other available node
+ // to schedule, the schedular will try to rename the register by
+ // creating copy to avoid the conflict which will fail because
+ // CallResource is not a real physical register.
+ if (PredND && PredND->isMachineOpcode() &&
+ (PredND->getMachineOpcode() == TII->getCallFrameSetupOpcode())) {
+ PredFrameSetup = PredND;
+ break;
+ }
+ }
+ // Skip the node has FrameSetup parent.
+ if (PredFrameSetup != nullptr)
+ continue;
+
// Locate the single data predecessor.
SUnit *PredSU = nullptr;
for (const SDep &Pred : SU.Preds)
@@ -2993,9 +3029,9 @@ void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
if (SuccSU != &SU) {
Edge.setSUnit(PredSU);
scheduleDAG->RemovePred(SuccSU, Edge);
- scheduleDAG->AddPred(&SU, Edge);
+ scheduleDAG->AddPredQueued(&SU, Edge);
Edge.setSUnit(&SU);
- scheduleDAG->AddPred(SuccSU, Edge);
+ scheduleDAG->AddPredQueued(SuccSU, Edge);
--i;
}
}
@@ -3077,7 +3113,7 @@ void RegReductionPQBase::AddPseudoTwoAddrDeps() {
LLVM_DEBUG(dbgs()
<< " Adding a pseudo-two-addr edge from SU #"
<< SU.NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
- scheduleDAG->AddPred(&SU, SDep(SuccSU, SDep::Artificial));
+ scheduleDAG->AddPredQueued(&SU, SDep(SuccSU, SDep::Artificial));
}
}
}
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index e258f0a218a5..568c6191e512 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -1,9 +1,8 @@
//===--- ScheduleDAGSDNodes.cpp - Implement the ScheduleDAGSDNodes class --===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -206,6 +205,19 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) {
if (!Chain)
return;
+ // Skip any load instruction that has a tied input. There may be an additional
+ // dependency requiring a different order than by increasing offsets, and the
+ // added glue may introduce a cycle.
+ auto hasTiedInput = [this](const SDNode *N) {
+ const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
+ for (unsigned I = 0; I != MCID.getNumOperands(); ++I) {
+ if (MCID.getOperandConstraint(I, MCOI::TIED_TO) != -1)
+ return true;
+ }
+
+ return false;
+ };
+
// Look for other loads of the same chain. Find loads that are loading from
// the same base pointer and different offsets.
SmallPtrSet<SDNode*, 16> Visited;
@@ -213,6 +225,10 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) {
DenseMap<long long, SDNode*> O2SMap; // Map from offset to SDNode.
bool Cluster = false;
SDNode *Base = Node;
+
+ if (hasTiedInput(Base))
+ return;
+
// This algorithm requires a reasonably low use count before finding a match
// to avoid uselessly blowing up compile time in large blocks.
unsigned UseCount = 0;
@@ -223,10 +239,12 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) {
continue;
int64_t Offset1, Offset2;
if (!TII->areLoadsFromSameBasePtr(Base, User, Offset1, Offset2) ||
- Offset1 == Offset2)
+ Offset1 == Offset2 ||
+ hasTiedInput(User)) {
// FIXME: Should be ok if they addresses are identical. But earlier
// optimizations really should have eliminated one of the loads.
continue;
+ }
if (O2SMap.insert(std::make_pair(Offset1, Base)).second)
Offsets.push_back(Offset1);
O2SMap.insert(std::make_pair(Offset2, User));
@@ -741,28 +759,27 @@ ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter,
static void
ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter,
DenseMap<SDValue, unsigned> &VRBaseMap,
- SmallVectorImpl<std::pair<unsigned, MachineInstr*> > &Orders,
- SmallSet<unsigned, 8> &Seen) {
+ SmallVectorImpl<std::pair<unsigned, MachineInstr *>> &Orders,
+ SmallSet<unsigned, 8> &Seen, MachineInstr *NewInsn) {
unsigned Order = N->getIROrder();
- if (!Order || !Seen.insert(Order).second) {
+ if (!Order || Seen.count(Order)) {
// Process any valid SDDbgValues even if node does not have any order
// assigned.
ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, 0);
return;
}
- MachineBasicBlock *BB = Emitter.getBlock();
- auto IP = Emitter.getInsertPos();
- if (IP == BB->begin() || BB->back().isPHI() ||
- // Fast-isel may have inserted some instructions, in which case the
- // BB->back().isPHI() test will not fire when we want it to.
- std::prev(IP)->isPHI()) {
- // Did not insert any instruction.
- Orders.push_back({Order, (MachineInstr *)nullptr});
- return;
+ // If a new instruction was generated for this Order number, record it.
+ // Otherwise, leave this order number unseen: we will either find later
+ // instructions for it, or leave it unseen if there were no instructions at
+ // all.
+ if (NewInsn) {
+ Seen.insert(Order);
+ Orders.push_back({Order, NewInsn});
}
- Orders.push_back({Order, &*std::prev(IP)});
+ // Even if no instruction was generated, a Value may have become defined via
+ // earlier nodes. Try to process them now.
ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, Order);
}
@@ -815,6 +832,43 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
SmallSet<unsigned, 8> Seen;
bool HasDbg = DAG->hasDebugValues();
+ // Emit a node, and determine where its first instruction is for debuginfo.
+ // Zero, one, or multiple instructions can be created when emitting a node.
+ auto EmitNode =
+ [&](SDNode *Node, bool IsClone, bool IsCloned,
+ DenseMap<SDValue, unsigned> &VRBaseMap) -> MachineInstr * {
+ // Fetch instruction prior to this, or end() if nonexistant.
+ auto GetPrevInsn = [&](MachineBasicBlock::iterator I) {
+ if (I == BB->begin())
+ return BB->end();
+ else
+ return std::prev(Emitter.getInsertPos());
+ };
+
+ MachineBasicBlock::iterator Before = GetPrevInsn(Emitter.getInsertPos());
+ Emitter.EmitNode(Node, IsClone, IsCloned, VRBaseMap);
+ MachineBasicBlock::iterator After = GetPrevInsn(Emitter.getInsertPos());
+
+ // If the iterator did not change, no instructions were inserted.
+ if (Before == After)
+ return nullptr;
+
+ MachineInstr *MI;
+ if (Before == BB->end()) {
+ // There were no prior instructions; the new ones must start at the
+ // beginning of the block.
+ MI = &Emitter.getBlock()->instr_front();
+ } else {
+ // Return first instruction after the pre-existing instructions.
+ MI = &*std::next(Before);
+ }
+
+ if (MI->isCall() && DAG->getTarget().Options.EnableDebugEntryValues)
+ MF.addCallArgsForwardingRegs(MI, DAG->getSDCallSiteInfo(Node));
+
+ return MI;
+ };
+
// If this is the first BB, emit byval parameter dbg_value's.
if (HasDbg && BB->getParent()->begin() == MachineFunction::iterator(BB)) {
SDDbgInfo::DbgIterator PDI = DAG->ByvalParmDbgBegin();
@@ -851,18 +905,18 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
GluedNodes.push_back(N);
while (!GluedNodes.empty()) {
SDNode *N = GluedNodes.back();
- Emitter.EmitNode(N, SU->OrigNode != SU, SU->isCloned, VRBaseMap);
+ auto NewInsn = EmitNode(N, SU->OrigNode != SU, SU->isCloned, VRBaseMap);
// Remember the source order of the inserted instruction.
if (HasDbg)
- ProcessSourceNode(N, DAG, Emitter, VRBaseMap, Orders, Seen);
+ ProcessSourceNode(N, DAG, Emitter, VRBaseMap, Orders, Seen, NewInsn);
GluedNodes.pop_back();
}
- Emitter.EmitNode(SU->getNode(), SU->OrigNode != SU, SU->isCloned,
- VRBaseMap);
+ auto NewInsn =
+ EmitNode(SU->getNode(), SU->OrigNode != SU, SU->isCloned, VRBaseMap);
// Remember the source order of the inserted instruction.
if (HasDbg)
- ProcessSourceNode(SU->getNode(), DAG, Emitter, VRBaseMap, Orders,
- Seen);
+ ProcessSourceNode(SU->getNode(), DAG, Emitter, VRBaseMap, Orders, Seen,
+ NewInsn);
}
// Insert all the dbg_values which have not already been inserted in source
@@ -873,7 +927,7 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
// Sort the source order instructions and use the order to insert debug
// values. Use stable_sort so that DBG_VALUEs are inserted in the same order
// regardless of the host's implementation fo std::sort.
- std::stable_sort(Orders.begin(), Orders.end(), less_first());
+ llvm::stable_sort(Orders, less_first());
std::stable_sort(DAG->DbgBegin(), DAG->DbgEnd(),
[](const SDDbgValue *LHS, const SDDbgValue *RHS) {
return LHS->getOrder() < RHS->getOrder();
@@ -887,8 +941,7 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
unsigned Order = Orders[i].first;
MachineInstr *MI = Orders[i].second;
// Insert all SDDbgValue's whose order(s) are before "Order".
- if (!MI)
- continue;
+ assert(MI);
for (; DI != DE; ++DI) {
if ((*DI)->getOrder() < LastOrder || (*DI)->getOrder() >= Order)
break;
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 3fa7ad895725..5163b4fa4fd3 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -1,9 +1,8 @@
//===---- ScheduleDAGSDNodes.h - SDNode Scheduling --------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
index 416061475b1a..ab06b55b49fd 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
@@ -1,9 +1,8 @@
//===- ScheduleDAGVLIW.cpp - SelectionDAG list scheduler for VLIW -*- C++ -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 647496c1afcb..5852e693fa9f 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -1,9 +1,8 @@
//===- SelectionDAG.cpp - Implement the SelectionDAG data structures ------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -86,6 +85,7 @@ static SDVTList makeVTList(const EVT *VTs, unsigned NumVTs) {
// Default null implementations of the callbacks.
void SelectionDAG::DAGUpdateListener::NodeDeleted(SDNode*, SDNode*) {}
void SelectionDAG::DAGUpdateListener::NodeUpdated(SDNode*) {}
+void SelectionDAG::DAGUpdateListener::NodeInserted(SDNode *) {}
void SelectionDAG::DAGNodeDeletedListener::anchor() {}
@@ -262,12 +262,7 @@ bool ISD::allOperandsUndef(const SDNode *N) {
// is probably the desired behavior.
if (N->getNumOperands() == 0)
return false;
-
- for (const SDValue &Op : N->op_values())
- if (!Op.isUndef())
- return false;
-
- return true;
+ return all_of(N->op_values(), [](SDValue Op) { return Op.isUndef(); });
}
bool ISD::matchUnaryPredicate(SDValue Op,
@@ -299,8 +294,8 @@ bool ISD::matchUnaryPredicate(SDValue Op,
bool ISD::matchBinaryPredicate(
SDValue LHS, SDValue RHS,
std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match,
- bool AllowUndefs) {
- if (LHS.getValueType() != RHS.getValueType())
+ bool AllowUndefs, bool AllowTypeMismatch) {
+ if (!AllowTypeMismatch && LHS.getValueType() != RHS.getValueType())
return false;
// TODO: Add support for scalar UNDEF cases?
@@ -323,8 +318,8 @@ bool ISD::matchBinaryPredicate(
auto *RHSCst = dyn_cast<ConstantSDNode>(RHSOp);
if ((!LHSCst && !LHSUndef) || (!RHSCst && !RHSUndef))
return false;
- if (LHSOp.getValueType() != SVT ||
- LHSOp.getValueType() != RHSOp.getValueType())
+ if (!AllowTypeMismatch && (LHSOp.getValueType() != SVT ||
+ LHSOp.getValueType() != RHSOp.getValueType()))
return false;
if (!Match(LHSCst, RHSCst))
return false;
@@ -518,6 +513,13 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
case ISD::TargetFrameIndex:
ID.AddInteger(cast<FrameIndexSDNode>(N)->getIndex());
break;
+ case ISD::LIFETIME_START:
+ case ISD::LIFETIME_END:
+ if (cast<LifetimeSDNode>(N)->hasOffset()) {
+ ID.AddInteger(cast<LifetimeSDNode>(N)->getSize());
+ ID.AddInteger(cast<LifetimeSDNode>(N)->getOffset());
+ }
+ break;
case ISD::JumpTable:
case ISD::TargetJumpTable:
ID.AddInteger(cast<JumpTableSDNode>(N)->getIndex());
@@ -834,6 +836,8 @@ void SelectionDAG::InsertNode(SDNode *N) {
N->PersistentId = NextPersistentId++;
VerifySDNode(N);
#endif
+ for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next)
+ DUL->NodeInserted(N);
}
/// RemoveNodeFromCSEMaps - Take the specified node out of the CSE map that
@@ -1136,6 +1140,18 @@ SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) {
getConstant(Imm, DL, Op.getValueType()));
}
+SDValue SelectionDAG::getPtrExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) {
+ // Only unsigned pointer semantics are supported right now. In the future this
+ // might delegate to TLI to check pointer signedness.
+ return getZExtOrTrunc(Op, DL, VT);
+}
+
+SDValue SelectionDAG::getPtrExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) {
+ // Only unsigned pointer semantics are supported right now. In the future this
+ // might delegate to TLI to check pointer signedness.
+ return getZeroExtendInReg(Op, DL, VT);
+}
+
/// getNOT - Create a bitwise NOT operation as (XOR Val, -1).
SDValue SelectionDAG::getNOT(const SDLoc &DL, SDValue Val, EVT VT) {
EVT EltVT = VT.getScalarType();
@@ -1274,6 +1290,12 @@ SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, const SDLoc &DL,
return getConstant(Val, DL, TLI->getPointerTy(getDataLayout()), isTarget);
}
+SDValue SelectionDAG::getShiftAmountConstant(uint64_t Val, EVT VT,
+ const SDLoc &DL, bool LegalTypes) {
+ EVT ShiftVT = TLI->getShiftAmountTy(VT, getDataLayout(), LegalTypes);
+ return getConstant(Val, DL, ShiftVT);
+}
+
SDValue SelectionDAG::getConstantFP(const APFloat &V, const SDLoc &DL, EVT VT,
bool isTarget) {
return getConstantFP(*ConstantFP::get(*getContext(), V), DL, VT, isTarget);
@@ -1403,7 +1425,7 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT,
assert((TargetFlags == 0 || isTarget) &&
"Cannot set target flags on target-independent globals");
if (Alignment == 0)
- Alignment = MF->getFunction().optForSize()
+ Alignment = MF->getFunction().hasOptSize()
? getDataLayout().getABITypeAlignment(C->getType())
: getDataLayout().getPrefTypeAlignment(C->getType());
unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
@@ -1770,7 +1792,8 @@ SDValue SelectionDAG::getLabelNode(unsigned Opcode, const SDLoc &dl,
if (SDNode *E = FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
- auto *N = newSDNode<LabelSDNode>(dl.getIROrder(), dl.getDebugLoc(), Label);
+ auto *N =
+ newSDNode<LabelSDNode>(Opcode, dl.getIROrder(), dl.getDebugLoc(), Label);
createOperands(N, Ops);
CSEMap.InsertNode(N, IP);
@@ -1965,10 +1988,30 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
case ISD::SETUO:
case ISD::SETUEQ:
case ISD::SETUNE:
- assert(!N1.getValueType().isInteger() && "Illegal setcc for integer!");
+ assert(!OpVT.isInteger() && "Illegal setcc for integer!");
break;
}
+ if (OpVT.isInteger()) {
+ // For EQ and NE, we can always pick a value for the undef to make the
+ // predicate pass or fail, so we can return undef.
+ // Matches behavior in llvm::ConstantFoldCompareInstruction.
+ // icmp eq/ne X, undef -> undef.
+ if ((N1.isUndef() || N2.isUndef()) &&
+ (Cond == ISD::SETEQ || Cond == ISD::SETNE))
+ return getUNDEF(VT);
+
+ // If both operands are undef, we can return undef for int comparison.
+ // icmp undef, undef -> undef.
+ if (N1.isUndef() && N2.isUndef())
+ return getUNDEF(VT);
+
+ // icmp X, X -> true/false
+ // icmp X, undef -> true/false because undef could be X.
+ if (N1 == N2)
+ return getBoolConstant(ISD::isTrueWhenEqual(Cond), dl, VT, OpVT);
+ }
+
if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2)) {
const APInt &C2 = N2C->getAPIntValue();
if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1)) {
@@ -1989,71 +2032,88 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
}
}
}
- if (ConstantFPSDNode *N1C = dyn_cast<ConstantFPSDNode>(N1)) {
- if (ConstantFPSDNode *N2C = dyn_cast<ConstantFPSDNode>(N2)) {
- APFloat::cmpResult R = N1C->getValueAPF().compare(N2C->getValueAPF());
- switch (Cond) {
- default: break;
- case ISD::SETEQ: if (R==APFloat::cmpUnordered)
- return getUNDEF(VT);
- LLVM_FALLTHROUGH;
- case ISD::SETOEQ: return getBoolConstant(R==APFloat::cmpEqual, dl, VT,
- OpVT);
- case ISD::SETNE: if (R==APFloat::cmpUnordered)
- return getUNDEF(VT);
- LLVM_FALLTHROUGH;
- case ISD::SETONE: return getBoolConstant(R==APFloat::cmpGreaterThan ||
- R==APFloat::cmpLessThan, dl, VT,
- OpVT);
- case ISD::SETLT: if (R==APFloat::cmpUnordered)
- return getUNDEF(VT);
- LLVM_FALLTHROUGH;
- case ISD::SETOLT: return getBoolConstant(R==APFloat::cmpLessThan, dl, VT,
- OpVT);
- case ISD::SETGT: if (R==APFloat::cmpUnordered)
- return getUNDEF(VT);
- LLVM_FALLTHROUGH;
- case ISD::SETOGT: return getBoolConstant(R==APFloat::cmpGreaterThan, dl,
- VT, OpVT);
- case ISD::SETLE: if (R==APFloat::cmpUnordered)
- return getUNDEF(VT);
- LLVM_FALLTHROUGH;
- case ISD::SETOLE: return getBoolConstant(R==APFloat::cmpLessThan ||
- R==APFloat::cmpEqual, dl, VT,
- OpVT);
- case ISD::SETGE: if (R==APFloat::cmpUnordered)
- return getUNDEF(VT);
- LLVM_FALLTHROUGH;
- case ISD::SETOGE: return getBoolConstant(R==APFloat::cmpGreaterThan ||
- R==APFloat::cmpEqual, dl, VT, OpVT);
- case ISD::SETO: return getBoolConstant(R!=APFloat::cmpUnordered, dl, VT,
- OpVT);
- case ISD::SETUO: return getBoolConstant(R==APFloat::cmpUnordered, dl, VT,
- OpVT);
- case ISD::SETUEQ: return getBoolConstant(R==APFloat::cmpUnordered ||
- R==APFloat::cmpEqual, dl, VT,
- OpVT);
- case ISD::SETUNE: return getBoolConstant(R!=APFloat::cmpEqual, dl, VT,
- OpVT);
- case ISD::SETULT: return getBoolConstant(R==APFloat::cmpUnordered ||
- R==APFloat::cmpLessThan, dl, VT,
- OpVT);
- case ISD::SETUGT: return getBoolConstant(R==APFloat::cmpGreaterThan ||
- R==APFloat::cmpUnordered, dl, VT,
- OpVT);
- case ISD::SETULE: return getBoolConstant(R!=APFloat::cmpGreaterThan, dl,
- VT, OpVT);
- case ISD::SETUGE: return getBoolConstant(R!=APFloat::cmpLessThan, dl, VT,
- OpVT);
- }
- } else {
- // Ensure that the constant occurs on the RHS.
- ISD::CondCode SwappedCond = ISD::getSetCCSwappedOperands(Cond);
- MVT CompVT = N1.getValueType().getSimpleVT();
- if (!TLI->isCondCodeLegal(SwappedCond, CompVT))
- return SDValue();
- return getSetCC(dl, VT, N2, N1, SwappedCond);
+ auto *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ auto *N2CFP = dyn_cast<ConstantFPSDNode>(N2);
+
+ if (N1CFP && N2CFP) {
+ APFloat::cmpResult R = N1CFP->getValueAPF().compare(N2CFP->getValueAPF());
+ switch (Cond) {
+ default: break;
+ case ISD::SETEQ: if (R==APFloat::cmpUnordered)
+ return getUNDEF(VT);
+ LLVM_FALLTHROUGH;
+ case ISD::SETOEQ: return getBoolConstant(R==APFloat::cmpEqual, dl, VT,
+ OpVT);
+ case ISD::SETNE: if (R==APFloat::cmpUnordered)
+ return getUNDEF(VT);
+ LLVM_FALLTHROUGH;
+ case ISD::SETONE: return getBoolConstant(R==APFloat::cmpGreaterThan ||
+ R==APFloat::cmpLessThan, dl, VT,
+ OpVT);
+ case ISD::SETLT: if (R==APFloat::cmpUnordered)
+ return getUNDEF(VT);
+ LLVM_FALLTHROUGH;
+ case ISD::SETOLT: return getBoolConstant(R==APFloat::cmpLessThan, dl, VT,
+ OpVT);
+ case ISD::SETGT: if (R==APFloat::cmpUnordered)
+ return getUNDEF(VT);
+ LLVM_FALLTHROUGH;
+ case ISD::SETOGT: return getBoolConstant(R==APFloat::cmpGreaterThan, dl,
+ VT, OpVT);
+ case ISD::SETLE: if (R==APFloat::cmpUnordered)
+ return getUNDEF(VT);
+ LLVM_FALLTHROUGH;
+ case ISD::SETOLE: return getBoolConstant(R==APFloat::cmpLessThan ||
+ R==APFloat::cmpEqual, dl, VT,
+ OpVT);
+ case ISD::SETGE: if (R==APFloat::cmpUnordered)
+ return getUNDEF(VT);
+ LLVM_FALLTHROUGH;
+ case ISD::SETOGE: return getBoolConstant(R==APFloat::cmpGreaterThan ||
+ R==APFloat::cmpEqual, dl, VT, OpVT);
+ case ISD::SETO: return getBoolConstant(R!=APFloat::cmpUnordered, dl, VT,
+ OpVT);
+ case ISD::SETUO: return getBoolConstant(R==APFloat::cmpUnordered, dl, VT,
+ OpVT);
+ case ISD::SETUEQ: return getBoolConstant(R==APFloat::cmpUnordered ||
+ R==APFloat::cmpEqual, dl, VT,
+ OpVT);
+ case ISD::SETUNE: return getBoolConstant(R!=APFloat::cmpEqual, dl, VT,
+ OpVT);
+ case ISD::SETULT: return getBoolConstant(R==APFloat::cmpUnordered ||
+ R==APFloat::cmpLessThan, dl, VT,
+ OpVT);
+ case ISD::SETUGT: return getBoolConstant(R==APFloat::cmpGreaterThan ||
+ R==APFloat::cmpUnordered, dl, VT,
+ OpVT);
+ case ISD::SETULE: return getBoolConstant(R!=APFloat::cmpGreaterThan, dl,
+ VT, OpVT);
+ case ISD::SETUGE: return getBoolConstant(R!=APFloat::cmpLessThan, dl, VT,
+ OpVT);
+ }
+ } else if (N1CFP && OpVT.isSimple() && !N2.isUndef()) {
+ // Ensure that the constant occurs on the RHS.
+ ISD::CondCode SwappedCond = ISD::getSetCCSwappedOperands(Cond);
+ if (!TLI->isCondCodeLegal(SwappedCond, OpVT.getSimpleVT()))
+ return SDValue();
+ return getSetCC(dl, VT, N2, N1, SwappedCond);
+ } else if ((N2CFP && N2CFP->getValueAPF().isNaN()) ||
+ (OpVT.isFloatingPoint() && (N1.isUndef() || N2.isUndef()))) {
+ // If an operand is known to be a nan (or undef that could be a nan), we can
+ // fold it.
+ // Choosing NaN for the undef will always make unordered comparison succeed
+ // and ordered comparison fails.
+ // Matches behavior in llvm::ConstantFoldCompareInstruction.
+ switch (ISD::getUnorderedFlavor(Cond)) {
+ default:
+ llvm_unreachable("Unknown flavor!");
+ case 0: // Known false.
+ return getBoolConstant(false, dl, VT, OpVT);
+ case 1: // Known true.
+ return getBoolConstant(true, dl, VT, OpVT);
+ case 2: // Undefined.
+ return getUNDEF(VT);
}
}
@@ -2062,16 +2122,32 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
}
/// See if the specified operand can be simplified with the knowledge that only
-/// the bits specified by Mask are used.
-SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &Mask) {
+/// the bits specified by DemandedBits are used.
+/// TODO: really we should be making this into the DAG equivalent of
+/// SimplifyMultipleUseDemandedBits and not generate any new nodes.
+SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits) {
+ EVT VT = V.getValueType();
+ APInt DemandedElts = VT.isVector()
+ ? APInt::getAllOnesValue(VT.getVectorNumElements())
+ : APInt(1, 1);
+ return GetDemandedBits(V, DemandedBits, DemandedElts);
+}
+
+/// See if the specified operand can be simplified with the knowledge that only
+/// the bits specified by DemandedBits are used in the elements specified by
+/// DemandedElts.
+/// TODO: really we should be making this into the DAG equivalent of
+/// SimplifyMultipleUseDemandedBits and not generate any new nodes.
+SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits,
+ const APInt &DemandedElts) {
switch (V.getOpcode()) {
default:
break;
case ISD::Constant: {
- const ConstantSDNode *CV = cast<ConstantSDNode>(V.getNode());
+ auto *CV = cast<ConstantSDNode>(V.getNode());
assert(CV && "Const value should be ConstSDNode.");
const APInt &CVal = CV->getAPIntValue();
- APInt NewVal = CVal & Mask;
+ APInt NewVal = CVal & DemandedBits;
if (NewVal != CVal)
return getConstant(NewVal, SDLoc(V), V.getValueType());
break;
@@ -2079,44 +2155,51 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &Mask) {
case ISD::OR:
case ISD::XOR:
// If the LHS or RHS don't contribute bits to the or, drop them.
- if (MaskedValueIsZero(V.getOperand(0), Mask))
+ if (MaskedValueIsZero(V.getOperand(0), DemandedBits))
return V.getOperand(1);
- if (MaskedValueIsZero(V.getOperand(1), Mask))
+ if (MaskedValueIsZero(V.getOperand(1), DemandedBits))
return V.getOperand(0);
break;
case ISD::SRL:
// Only look at single-use SRLs.
if (!V.getNode()->hasOneUse())
break;
- if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
+ if (auto *RHSC = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
// See if we can recursively simplify the LHS.
unsigned Amt = RHSC->getZExtValue();
// Watch out for shift count overflow though.
- if (Amt >= Mask.getBitWidth())
+ if (Amt >= DemandedBits.getBitWidth())
break;
- APInt NewMask = Mask << Amt;
- if (SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask))
+ APInt SrcDemandedBits = DemandedBits << Amt;
+ if (SDValue SimplifyLHS =
+ GetDemandedBits(V.getOperand(0), SrcDemandedBits))
return getNode(ISD::SRL, SDLoc(V), V.getValueType(), SimplifyLHS,
V.getOperand(1));
}
break;
case ISD::AND: {
// X & -1 -> X (ignoring bits which aren't demanded).
- ConstantSDNode *AndVal = isConstOrConstSplat(V.getOperand(1));
- if (AndVal && Mask.isSubsetOf(AndVal->getAPIntValue()))
- return V.getOperand(0);
+ // Also handle the case where masked out bits in X are known to be zero.
+ if (ConstantSDNode *RHSC = isConstOrConstSplat(V.getOperand(1))) {
+ const APInt &AndVal = RHSC->getAPIntValue();
+ if (DemandedBits.isSubsetOf(AndVal) ||
+ DemandedBits.isSubsetOf(computeKnownBits(V.getOperand(0)).Zero |
+ AndVal))
+ return V.getOperand(0);
+ }
break;
}
case ISD::ANY_EXTEND: {
SDValue Src = V.getOperand(0);
unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
// Being conservative here - only peek through if we only demand bits in the
- // non-extended source (even though the extended bits are technically undef).
- if (Mask.getActiveBits() > SrcBitWidth)
+ // non-extended source (even though the extended bits are technically
+ // undef).
+ if (DemandedBits.getActiveBits() > SrcBitWidth)
break;
- APInt SrcMask = Mask.trunc(SrcBitWidth);
- if (SDValue DemandedSrc = GetDemandedBits(Src, SrcMask))
+ APInt SrcDemandedBits = DemandedBits.trunc(SrcBitWidth);
+ if (SDValue DemandedSrc = GetDemandedBits(Src, SrcDemandedBits))
return getNode(ISD::ANY_EXTEND, SDLoc(V), V.getValueType(), DemandedSrc);
break;
}
@@ -2125,7 +2208,7 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &Mask) {
unsigned ExVTBits = ExVT.getScalarSizeInBits();
// If none of the extended bits are demanded, eliminate the sextinreg.
- if (Mask.getActiveBits() <= ExVTBits)
+ if (DemandedBits.getActiveBits() <= ExVTBits)
return V.getOperand(0);
break;
@@ -2143,9 +2226,28 @@ bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const {
/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use
/// this predicate to simplify operations downstream. Mask is known to be zero
/// for bits that V cannot have.
-bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask,
+bool SelectionDAG::MaskedValueIsZero(SDValue V, const APInt &Mask,
+ unsigned Depth) const {
+ EVT VT = V.getValueType();
+ APInt DemandedElts = VT.isVector()
+ ? APInt::getAllOnesValue(VT.getVectorNumElements())
+ : APInt(1, 1);
+ return MaskedValueIsZero(V, Mask, DemandedElts, Depth);
+}
+
+/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero in
+/// DemandedElts. We use this predicate to simplify operations downstream.
+/// Mask is known to be zero for bits that V cannot have.
+bool SelectionDAG::MaskedValueIsZero(SDValue V, const APInt &Mask,
+ const APInt &DemandedElts,
unsigned Depth) const {
- return Mask.isSubsetOf(computeKnownBits(Op, Depth).Zero);
+ return Mask.isSubsetOf(computeKnownBits(V, DemandedElts, Depth).Zero);
+}
+
+/// MaskedValueIsAllOnes - Return true if '(Op & Mask) == Mask'.
+bool SelectionDAG::MaskedValueIsAllOnes(SDValue V, const APInt &Mask,
+ unsigned Depth) const {
+ return Mask.isSubsetOf(computeKnownBits(V, Depth).One);
}
/// isSplatValue - Return true if the vector V has the same value
@@ -2244,28 +2346,50 @@ bool SelectionDAG::isSplatValue(SDValue V, bool AllowUndefs) {
(AllowUndefs || !UndefElts);
}
-/// Helper function that checks to see if a node is a constant or a
-/// build vector of splat constants at least within the demanded elts.
-static ConstantSDNode *isConstOrDemandedConstSplat(SDValue N,
- const APInt &DemandedElts) {
- if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N))
- return CN;
- if (N.getOpcode() != ISD::BUILD_VECTOR)
- return nullptr;
- EVT VT = N.getValueType();
- ConstantSDNode *Cst = nullptr;
- unsigned NumElts = VT.getVectorNumElements();
- assert(DemandedElts.getBitWidth() == NumElts && "Unexpected vector size");
- for (unsigned i = 0; i != NumElts; ++i) {
- if (!DemandedElts[i])
- continue;
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(i));
- if (!C || (Cst && Cst->getAPIntValue() != C->getAPIntValue()) ||
- C->getValueType(0) != VT.getScalarType())
- return nullptr;
- Cst = C;
+SDValue SelectionDAG::getSplatSourceVector(SDValue V, int &SplatIdx) {
+ V = peekThroughExtractSubvectors(V);
+
+ EVT VT = V.getValueType();
+ unsigned Opcode = V.getOpcode();
+ switch (Opcode) {
+ default: {
+ APInt UndefElts;
+ APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements());
+ if (isSplatValue(V, DemandedElts, UndefElts)) {
+ // Handle case where all demanded elements are UNDEF.
+ if (DemandedElts.isSubsetOf(UndefElts)) {
+ SplatIdx = 0;
+ return getUNDEF(VT);
+ }
+ SplatIdx = (UndefElts & DemandedElts).countTrailingOnes();
+ return V;
+ }
+ break;
+ }
+ case ISD::VECTOR_SHUFFLE: {
+ // Check if this is a shuffle node doing a splat.
+ // TODO - remove this and rely purely on SelectionDAG::isSplatValue,
+ // getTargetVShiftNode currently struggles without the splat source.
+ auto *SVN = cast<ShuffleVectorSDNode>(V);
+ if (!SVN->isSplat())
+ break;
+ int Idx = SVN->getSplatIndex();
+ int NumElts = V.getValueType().getVectorNumElements();
+ SplatIdx = Idx % NumElts;
+ return V.getOperand(Idx / NumElts);
}
- return Cst;
+ }
+
+ return SDValue();
+}
+
+SDValue SelectionDAG::getSplatValue(SDValue V) {
+ int SplatIdx;
+ if (SDValue SrcVector = getSplatSourceVector(V, SplatIdx))
+ return getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(V),
+ SrcVector.getValueType().getScalarType(), SrcVector,
+ getIntPtrConstant(SplatIdx, SDLoc(V)));
+ return SDValue();
}
/// If a SHL/SRA/SRL node has a constant or splat constant shift amount that
@@ -2708,8 +2832,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
break;
case ISD::FSHL:
case ISD::FSHR:
- if (ConstantSDNode *C =
- isConstOrDemandedConstSplat(Op.getOperand(2), DemandedElts)) {
+ if (ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(2), DemandedElts)) {
unsigned Amt = C->getAPIntValue().urem(BitWidth);
// For fshl, 0-shift returns the 1st arg.
@@ -2801,8 +2924,59 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
}
case ISD::LOAD: {
LoadSDNode *LD = cast<LoadSDNode>(Op);
- // If this is a ZEXTLoad and we are looking at the loaded value.
- if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
+ const Constant *Cst = TLI->getTargetConstantFromLoad(LD);
+ if (ISD::isNON_EXTLoad(LD) && Cst) {
+ // Determine any common known bits from the loaded constant pool value.
+ Type *CstTy = Cst->getType();
+ if ((NumElts * BitWidth) == CstTy->getPrimitiveSizeInBits()) {
+ // If its a vector splat, then we can (quickly) reuse the scalar path.
+ // NOTE: We assume all elements match and none are UNDEF.
+ if (CstTy->isVectorTy()) {
+ if (const Constant *Splat = Cst->getSplatValue()) {
+ Cst = Splat;
+ CstTy = Cst->getType();
+ }
+ }
+ // TODO - do we need to handle different bitwidths?
+ if (CstTy->isVectorTy() && BitWidth == CstTy->getScalarSizeInBits()) {
+ // Iterate across all vector elements finding common known bits.
+ Known.One.setAllBits();
+ Known.Zero.setAllBits();
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (!DemandedElts[i])
+ continue;
+ if (Constant *Elt = Cst->getAggregateElement(i)) {
+ if (auto *CInt = dyn_cast<ConstantInt>(Elt)) {
+ const APInt &Value = CInt->getValue();
+ Known.One &= Value;
+ Known.Zero &= ~Value;
+ continue;
+ }
+ if (auto *CFP = dyn_cast<ConstantFP>(Elt)) {
+ APInt Value = CFP->getValueAPF().bitcastToAPInt();
+ Known.One &= Value;
+ Known.Zero &= ~Value;
+ continue;
+ }
+ }
+ Known.One.clearAllBits();
+ Known.Zero.clearAllBits();
+ break;
+ }
+ } else if (BitWidth == CstTy->getPrimitiveSizeInBits()) {
+ if (auto *CInt = dyn_cast<ConstantInt>(Cst)) {
+ const APInt &Value = CInt->getValue();
+ Known.One = Value;
+ Known.Zero = ~Value;
+ } else if (auto *CFP = dyn_cast<ConstantFP>(Cst)) {
+ APInt Value = CFP->getValueAPF().bitcastToAPInt();
+ Known.One = Value;
+ Known.Zero = ~Value;
+ }
+ }
+ }
+ } else if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
+ // If this is a ZEXTLoad and we are looking at the loaded value.
EVT VT = LD->getMemoryVT();
unsigned MemBits = VT.getScalarSizeInBits();
Known.Zero.setBitsFrom(MemBits);
@@ -2816,15 +2990,12 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
EVT InVT = Op.getOperand(0).getValueType();
APInt InDemandedElts = DemandedElts.zextOrSelf(InVT.getVectorNumElements());
Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1);
- Known = Known.zext(BitWidth);
- Known.Zero.setBitsFrom(InVT.getScalarSizeInBits());
+ Known = Known.zext(BitWidth, true /* ExtendedBitsAreKnownZero */);
break;
}
case ISD::ZERO_EXTEND: {
- EVT InVT = Op.getOperand(0).getValueType();
Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
- Known = Known.zext(BitWidth);
- Known.Zero.setBitsFrom(InVT.getScalarSizeInBits());
+ Known = Known.zext(BitWidth, true /* ExtendedBitsAreKnownZero */);
break;
}
case ISD::SIGN_EXTEND_VECTOR_INREG: {
@@ -2845,7 +3016,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
}
case ISD::ANY_EXTEND: {
Known = computeKnownBits(Op.getOperand(0), Depth+1);
- Known = Known.zext(BitWidth);
+ Known = Known.zext(BitWidth, false /* ExtendedBitsAreKnownZero */);
break;
}
case ISD::TRUNCATE: {
@@ -2878,39 +3049,10 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
LLVM_FALLTHROUGH;
case ISD::SUB:
case ISD::SUBC: {
- if (ConstantSDNode *CLHS = isConstOrConstSplat(Op.getOperand(0))) {
- // We know that the top bits of C-X are clear if X contains less bits
- // than C (i.e. no wrap-around can happen). For example, 20-X is
- // positive if we can prove that X is >= 0 and < 16.
- if (CLHS->getAPIntValue().isNonNegative()) {
- unsigned NLZ = (CLHS->getAPIntValue()+1).countLeadingZeros();
- // NLZ can't be BitWidth with no sign bit
- APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1);
- Known2 = computeKnownBits(Op.getOperand(1), DemandedElts,
- Depth + 1);
-
- // If all of the MaskV bits are known to be zero, then we know the
- // output top bits are zero, because we now know that the output is
- // from [0-C].
- if ((Known2.Zero & MaskV) == MaskV) {
- unsigned NLZ2 = CLHS->getAPIntValue().countLeadingZeros();
- // Top bits known zero.
- Known.Zero.setHighBits(NLZ2);
- }
- }
- }
-
- // If low bits are know to be zero in both operands, then we know they are
- // going to be 0 in the result. Both addition and complement operations
- // preserve the low zero bits.
- Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
- unsigned KnownZeroLow = Known2.countMinTrailingZeros();
- if (KnownZeroLow == 0)
- break;
-
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
- KnownZeroLow = std::min(KnownZeroLow, Known2.countMinTrailingZeros());
- Known.Zero.setLowBits(KnownZeroLow);
+ Known = KnownBits::computeForAddSub(/* Add */ false, /* NSW */ false,
+ Known, Known2);
break;
}
case ISD::UADDO:
@@ -2928,34 +3070,26 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
case ISD::ADD:
case ISD::ADDC:
case ISD::ADDE: {
- // Output known-0 bits are known if clear or set in both the low clear bits
- // common to both LHS & RHS. For example, 8+(X<<3) is known to have the
- // low 3 bits clear.
- // Output known-0 bits are also known if the top bits of each input are
- // known to be clear. For example, if one input has the top 10 bits clear
- // and the other has the top 8 bits clear, we know the top 7 bits of the
- // output must be clear.
- Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
- unsigned KnownZeroHigh = Known2.countMinLeadingZeros();
- unsigned KnownZeroLow = Known2.countMinTrailingZeros();
+ assert(Op.getResNo() == 0 && "We only compute knownbits for the sum here.");
+
+ // With ADDE and ADDCARRY, a carry bit may be added in.
+ KnownBits Carry(1);
+ if (Opcode == ISD::ADDE)
+ // Can't track carry from glue, set carry to unknown.
+ Carry.resetAll();
+ else if (Opcode == ISD::ADDCARRY)
+ // TODO: Compute known bits for the carry operand. Not sure if it is worth
+ // the trouble (how often will we find a known carry bit). And I haven't
+ // tested this very much yet, but something like this might work:
+ // Carry = computeKnownBits(Op.getOperand(2), DemandedElts, Depth + 1);
+ // Carry = Carry.zextOrTrunc(1, false);
+ Carry.resetAll();
+ else
+ Carry.setAllZero();
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
- KnownZeroHigh = std::min(KnownZeroHigh, Known2.countMinLeadingZeros());
- KnownZeroLow = std::min(KnownZeroLow, Known2.countMinTrailingZeros());
-
- if (Opcode == ISD::ADDE || Opcode == ISD::ADDCARRY) {
- // With ADDE and ADDCARRY, a carry bit may be added in, so we can only
- // use this information if we know (at least) that the low two bits are
- // clear. We then return to the caller that the low bit is unknown but
- // that other bits are known zero.
- if (KnownZeroLow >= 2)
- Known.Zero.setBits(1, KnownZeroLow);
- break;
- }
-
- Known.Zero.setLowBits(KnownZeroLow);
- if (KnownZeroHigh > 1)
- Known.Zero.setHighBits(KnownZeroHigh - 1);
+ Known = KnownBits::computeForAddCarry(Known, Known2, Carry);
break;
}
case ISD::SREM:
@@ -3010,21 +3144,20 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
case ISD::EXTRACT_ELEMENT: {
Known = computeKnownBits(Op.getOperand(0), Depth+1);
const unsigned Index = Op.getConstantOperandVal(1);
- const unsigned BitWidth = Op.getValueSizeInBits();
+ const unsigned EltBitWidth = Op.getValueSizeInBits();
// Remove low part of known bits mask
- Known.Zero = Known.Zero.getHiBits(Known.Zero.getBitWidth() - Index * BitWidth);
- Known.One = Known.One.getHiBits(Known.One.getBitWidth() - Index * BitWidth);
+ Known.Zero = Known.Zero.getHiBits(Known.getBitWidth() - Index * EltBitWidth);
+ Known.One = Known.One.getHiBits(Known.getBitWidth() - Index * EltBitWidth);
// Remove high part of known bit mask
- Known = Known.trunc(BitWidth);
+ Known = Known.trunc(EltBitWidth);
break;
}
case ISD::EXTRACT_VECTOR_ELT: {
SDValue InVec = Op.getOperand(0);
SDValue EltNo = Op.getOperand(1);
EVT VecVT = InVec.getValueType();
- const unsigned BitWidth = Op.getValueSizeInBits();
const unsigned EltBitWidth = VecVT.getScalarSizeInBits();
const unsigned NumSrcElts = VecVT.getVectorNumElements();
// If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
@@ -3042,7 +3175,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known = computeKnownBits(InVec, Depth + 1);
}
if (BitWidth > EltBitWidth)
- Known = Known.zext(BitWidth);
+ Known = Known.zext(BitWidth, false /* => any extend */);
break;
}
case ISD::INSERT_VECTOR_ELT: {
@@ -3146,10 +3279,10 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
// the minimum of the clamp min/max range.
bool IsMax = (Opcode == ISD::SMAX);
ConstantSDNode *CstLow = nullptr, *CstHigh = nullptr;
- if ((CstLow = isConstOrDemandedConstSplat(Op.getOperand(1), DemandedElts)))
+ if ((CstLow = isConstOrConstSplat(Op.getOperand(1), DemandedElts)))
if (Op.getOperand(0).getOpcode() == (IsMax ? ISD::SMIN : ISD::SMAX))
- CstHigh = isConstOrDemandedConstSplat(Op.getOperand(0).getOperand(1),
- DemandedElts);
+ CstHigh =
+ isConstOrConstSplat(Op.getOperand(0).getOperand(1), DemandedElts);
if (CstLow && CstHigh) {
if (!IsMax)
std::swap(CstLow, CstHigh);
@@ -3430,7 +3563,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1);
// SRA X, C -> adds C sign bits.
if (ConstantSDNode *C =
- isConstOrDemandedConstSplat(Op.getOperand(1), DemandedElts)) {
+ isConstOrConstSplat(Op.getOperand(1), DemandedElts)) {
APInt ShiftVal = C->getAPIntValue();
ShiftVal += Tmp;
Tmp = ShiftVal.uge(VTBits) ? VTBits : ShiftVal.getZExtValue();
@@ -3438,7 +3571,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
return Tmp;
case ISD::SHL:
if (ConstantSDNode *C =
- isConstOrDemandedConstSplat(Op.getOperand(1), DemandedElts)) {
+ isConstOrConstSplat(Op.getOperand(1), DemandedElts)) {
// shl destroys sign bits.
Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1);
if (C->getAPIntValue().uge(VTBits) || // Bad shift.
@@ -3478,10 +3611,10 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
// the minimum of the clamp min/max range.
bool IsMax = (Opcode == ISD::SMAX);
ConstantSDNode *CstLow = nullptr, *CstHigh = nullptr;
- if ((CstLow = isConstOrDemandedConstSplat(Op.getOperand(1), DemandedElts)))
+ if ((CstLow = isConstOrConstSplat(Op.getOperand(1), DemandedElts)))
if (Op.getOperand(0).getOpcode() == (IsMax ? ISD::SMIN : ISD::SMAX))
- CstHigh = isConstOrDemandedConstSplat(Op.getOperand(0).getOperand(1),
- DemandedElts);
+ CstHigh =
+ isConstOrConstSplat(Op.getOperand(0).getOperand(1), DemandedElts);
if (CstLow && CstHigh) {
if (!IsMax)
std::swap(CstLow, CstHigh);
@@ -3621,7 +3754,6 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
SDValue InVec = Op.getOperand(0);
SDValue InVal = Op.getOperand(1);
SDValue EltNo = Op.getOperand(2);
- unsigned NumElts = InVec.getValueType().getVectorNumElements();
ConstantSDNode *CEltNo = dyn_cast<ConstantSDNode>(EltNo);
if (CEltNo && CEltNo->getAPIntValue().ult(NumElts)) {
@@ -3752,13 +3884,43 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) {
unsigned ExtType = LD->getExtensionType();
switch (ExtType) {
- default: break;
- case ISD::SEXTLOAD: // '17' bits known
- Tmp = LD->getMemoryVT().getScalarSizeInBits();
- return VTBits-Tmp+1;
- case ISD::ZEXTLOAD: // '16' bits known
- Tmp = LD->getMemoryVT().getScalarSizeInBits();
- return VTBits-Tmp;
+ default: break;
+ case ISD::SEXTLOAD: // e.g. i16->i32 = '17' bits known.
+ Tmp = LD->getMemoryVT().getScalarSizeInBits();
+ return VTBits - Tmp + 1;
+ case ISD::ZEXTLOAD: // e.g. i16->i32 = '16' bits known.
+ Tmp = LD->getMemoryVT().getScalarSizeInBits();
+ return VTBits - Tmp;
+ case ISD::NON_EXTLOAD:
+ if (const Constant *Cst = TLI->getTargetConstantFromLoad(LD)) {
+ // We only need to handle vectors - computeKnownBits should handle
+ // scalar cases.
+ Type *CstTy = Cst->getType();
+ if (CstTy->isVectorTy() &&
+ (NumElts * VTBits) == CstTy->getPrimitiveSizeInBits()) {
+ Tmp = VTBits;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (!DemandedElts[i])
+ continue;
+ if (Constant *Elt = Cst->getAggregateElement(i)) {
+ if (auto *CInt = dyn_cast<ConstantInt>(Elt)) {
+ const APInt &Value = CInt->getValue();
+ Tmp = std::min(Tmp, Value.getNumSignBits());
+ continue;
+ }
+ if (auto *CFP = dyn_cast<ConstantFP>(Elt)) {
+ APInt Value = CFP->getValueAPF().bitcastToAPInt();
+ Tmp = std::min(Tmp, Value.getNumSignBits());
+ continue;
+ }
+ }
+ // Unknown type. Conservatively assume no bits match sign bit.
+ return 1;
+ }
+ return Tmp;
+ }
+ }
+ break;
}
}
}
@@ -3803,8 +3965,7 @@ bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const {
return false;
if (Op.getOpcode() == ISD::OR &&
- !MaskedValueIsZero(Op.getOperand(0),
- cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue()))
+ !MaskedValueIsZero(Op.getOperand(0), Op.getConstantOperandAPInt(1)))
return false;
return true;
@@ -4013,7 +4174,9 @@ static SDValue FoldBUILD_VECTOR(const SDLoc &DL, EVT VT,
return SDValue();
}
-static SDValue FoldCONCAT_VECTORS(const SDLoc &DL, EVT VT,
+/// Try to simplify vector concatenation to an input value, undef, or build
+/// vector.
+static SDValue foldCONCAT_VECTORS(const SDLoc &DL, EVT VT,
ArrayRef<SDValue> Ops,
SelectionDAG &DAG) {
assert(!Ops.empty() && "Can't concatenate an empty list of vectors!");
@@ -4033,6 +4196,31 @@ static SDValue FoldCONCAT_VECTORS(const SDLoc &DL, EVT VT,
if (llvm::all_of(Ops, [](SDValue Op) { return Op.isUndef(); }))
return DAG.getUNDEF(VT);
+ // Scan the operands and look for extract operations from a single source
+ // that correspond to insertion at the same location via this concatenation:
+ // concat (extract X, 0*subvec_elts), (extract X, 1*subvec_elts), ...
+ SDValue IdentitySrc;
+ bool IsIdentity = true;
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+ SDValue Op = Ops[i];
+ unsigned IdentityIndex = i * Op.getValueType().getVectorNumElements();
+ if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
+ Op.getOperand(0).getValueType() != VT ||
+ (IdentitySrc && Op.getOperand(0) != IdentitySrc) ||
+ !isa<ConstantSDNode>(Op.getOperand(1)) ||
+ Op.getConstantOperandVal(1) != IdentityIndex) {
+ IsIdentity = false;
+ break;
+ }
+ assert((!IdentitySrc || IdentitySrc == Op.getOperand(0)) &&
+ "Unexpected identity source vector for concat of extracts");
+ IdentitySrc = Op.getOperand(0);
+ }
+ if (IsIdentity) {
+ assert(IdentitySrc && "Failed to set source vector of extracts");
+ return IdentitySrc;
+ }
+
// A CONCAT_VECTOR with all UNDEF/BUILD_VECTOR operands can be
// simplified to one big BUILD_VECTOR.
// FIXME: Add support for SCALAR_TO_VECTOR as well.
@@ -4288,9 +4476,23 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
if (Operand.isUndef())
return getUNDEF(VT);
break;
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ if (Operand.isUndef())
+ return getUNDEF(VT);
+ break;
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ // [us]itofp(undef) = 0, because the result value is bounded.
+ if (Operand.isUndef())
+ return getConstantFP(0.0, DL, VT);
+ break;
case ISD::SIGN_EXTEND:
assert(VT.isInteger() && Operand.getValueType().isInteger() &&
"Invalid SIGN_EXTEND!");
+ assert(VT.isVector() == Operand.getValueType().isVector() &&
+ "SIGN_EXTEND result type type should be vector iff the operand "
+ "type is vector!");
if (Operand.getValueType() == VT) return Operand; // noop extension
assert((!VT.isVector() ||
VT.getVectorNumElements() ==
@@ -4307,6 +4509,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case ISD::ZERO_EXTEND:
assert(VT.isInteger() && Operand.getValueType().isInteger() &&
"Invalid ZERO_EXTEND!");
+ assert(VT.isVector() == Operand.getValueType().isVector() &&
+ "ZERO_EXTEND result type type should be vector iff the operand "
+ "type is vector!");
if (Operand.getValueType() == VT) return Operand; // noop extension
assert((!VT.isVector() ||
VT.getVectorNumElements() ==
@@ -4323,6 +4528,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case ISD::ANY_EXTEND:
assert(VT.isInteger() && Operand.getValueType().isInteger() &&
"Invalid ANY_EXTEND!");
+ assert(VT.isVector() == Operand.getValueType().isVector() &&
+ "ANY_EXTEND result type type should be vector iff the operand "
+ "type is vector!");
if (Operand.getValueType() == VT) return Operand; // noop extension
assert((!VT.isVector() ||
VT.getVectorNumElements() ==
@@ -4350,6 +4558,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case ISD::TRUNCATE:
assert(VT.isInteger() && Operand.getValueType().isInteger() &&
"Invalid TRUNCATE!");
+ assert(VT.isVector() == Operand.getValueType().isVector() &&
+ "TRUNCATE result type type should be vector iff the operand "
+ "type is vector!");
if (Operand.getValueType() == VT) return Operand; // noop truncate
assert((!VT.isVector() ||
VT.getVectorNumElements() ==
@@ -4429,6 +4640,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
return Operand.getOperand(0);
break;
case ISD::FNEG:
+ // Negation of an unknown bag of bits is still completely undefined.
+ if (OpOpcode == ISD::UNDEF)
+ return getUNDEF(VT);
+
// -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
if ((getTarget().Options.UnsafeFPMath || Flags.hasNoSignedZeros()) &&
OpOpcode == ISD::FSUB)
@@ -4513,13 +4728,13 @@ static std::pair<APInt, bool> FoldValue(unsigned Opcode, const APInt &C1,
}
SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
- EVT VT, const ConstantSDNode *Cst1,
- const ConstantSDNode *Cst2) {
- if (Cst1->isOpaque() || Cst2->isOpaque())
+ EVT VT, const ConstantSDNode *C1,
+ const ConstantSDNode *C2) {
+ if (C1->isOpaque() || C2->isOpaque())
return SDValue();
- std::pair<APInt, bool> Folded = FoldValue(Opcode, Cst1->getAPIntValue(),
- Cst2->getAPIntValue());
+ std::pair<APInt, bool> Folded = FoldValue(Opcode, C1->getAPIntValue(),
+ C2->getAPIntValue());
if (!Folded.second)
return SDValue();
return getConstant(Folded.first, DL, VT);
@@ -4532,16 +4747,16 @@ SDValue SelectionDAG::FoldSymbolOffset(unsigned Opcode, EVT VT,
return SDValue();
if (!TLI->isOffsetFoldingLegal(GA))
return SDValue();
- const ConstantSDNode *Cst2 = dyn_cast<ConstantSDNode>(N2);
- if (!Cst2)
+ auto *C2 = dyn_cast<ConstantSDNode>(N2);
+ if (!C2)
return SDValue();
- int64_t Offset = Cst2->getSExtValue();
+ int64_t Offset = C2->getSExtValue();
switch (Opcode) {
case ISD::ADD: break;
case ISD::SUB: Offset = -uint64_t(Offset); break;
default: return SDValue();
}
- return getGlobalAddress(GA->getGlobal(), SDLoc(Cst2), VT,
+ return getGlobalAddress(GA->getGlobal(), SDLoc(C2), VT,
GA->getOffset() + uint64_t(Offset));
}
@@ -4571,21 +4786,20 @@ bool SelectionDAG::isUndef(unsigned Opcode, ArrayRef<SDValue> Ops) {
}
SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
- EVT VT, SDNode *Cst1,
- SDNode *Cst2) {
+ EVT VT, SDNode *N1, SDNode *N2) {
// If the opcode is a target-specific ISD node, there's nothing we can
// do here and the operand rules may not line up with the below, so
// bail early.
if (Opcode >= ISD::BUILTIN_OP_END)
return SDValue();
- if (isUndef(Opcode, {SDValue(Cst1, 0), SDValue(Cst2, 0)}))
+ if (isUndef(Opcode, {SDValue(N1, 0), SDValue(N2, 0)}))
return getUNDEF(VT);
// Handle the case of two scalars.
- if (const ConstantSDNode *Scalar1 = dyn_cast<ConstantSDNode>(Cst1)) {
- if (const ConstantSDNode *Scalar2 = dyn_cast<ConstantSDNode>(Cst2)) {
- SDValue Folded = FoldConstantArithmetic(Opcode, DL, VT, Scalar1, Scalar2);
+ if (auto *C1 = dyn_cast<ConstantSDNode>(N1)) {
+ if (auto *C2 = dyn_cast<ConstantSDNode>(N2)) {
+ SDValue Folded = FoldConstantArithmetic(Opcode, DL, VT, C1, C2);
assert((!Folded || !VT.isVector()) &&
"Can't fold vectors ops with scalar operands");
return Folded;
@@ -4593,19 +4807,19 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
}
// fold (add Sym, c) -> Sym+c
- if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Cst1))
- return FoldSymbolOffset(Opcode, VT, GA, Cst2);
+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N1))
+ return FoldSymbolOffset(Opcode, VT, GA, N2);
if (TLI->isCommutativeBinOp(Opcode))
- if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Cst2))
- return FoldSymbolOffset(Opcode, VT, GA, Cst1);
+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N2))
+ return FoldSymbolOffset(Opcode, VT, GA, N1);
// For vectors, extract each constant element and fold them individually.
// Either input may be an undef value.
- auto *BV1 = dyn_cast<BuildVectorSDNode>(Cst1);
- if (!BV1 && !Cst1->isUndef())
+ auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
+ if (!BV1 && !N1->isUndef())
return SDValue();
- auto *BV2 = dyn_cast<BuildVectorSDNode>(Cst2);
- if (!BV2 && !Cst2->isUndef())
+ auto *BV2 = dyn_cast<BuildVectorSDNode>(N2);
+ if (!BV2 && !N2->isUndef())
return SDValue();
// If both operands are undef, that's handled the same way as scalars.
if (!BV1 && !BV2)
@@ -4755,6 +4969,64 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode,
return V;
}
+SDValue SelectionDAG::foldConstantFPMath(unsigned Opcode, const SDLoc &DL,
+ EVT VT, SDValue N1, SDValue N2) {
+ // TODO: We don't do any constant folding for strict FP opcodes here, but we
+ // should. That will require dealing with a potentially non-default
+ // rounding mode, checking the "opStatus" return value from the APFloat
+ // math calculations, and possibly other variations.
+ auto *N1CFP = dyn_cast<ConstantFPSDNode>(N1.getNode());
+ auto *N2CFP = dyn_cast<ConstantFPSDNode>(N2.getNode());
+ if (N1CFP && N2CFP) {
+ APFloat C1 = N1CFP->getValueAPF(), C2 = N2CFP->getValueAPF();
+ switch (Opcode) {
+ case ISD::FADD:
+ C1.add(C2, APFloat::rmNearestTiesToEven);
+ return getConstantFP(C1, DL, VT);
+ case ISD::FSUB:
+ C1.subtract(C2, APFloat::rmNearestTiesToEven);
+ return getConstantFP(C1, DL, VT);
+ case ISD::FMUL:
+ C1.multiply(C2, APFloat::rmNearestTiesToEven);
+ return getConstantFP(C1, DL, VT);
+ case ISD::FDIV:
+ C1.divide(C2, APFloat::rmNearestTiesToEven);
+ return getConstantFP(C1, DL, VT);
+ case ISD::FREM:
+ C1.mod(C2);
+ return getConstantFP(C1, DL, VT);
+ case ISD::FCOPYSIGN:
+ C1.copySign(C2);
+ return getConstantFP(C1, DL, VT);
+ default: break;
+ }
+ }
+ if (N1CFP && Opcode == ISD::FP_ROUND) {
+ APFloat C1 = N1CFP->getValueAPF(); // make copy
+ bool Unused;
+ // This can return overflow, underflow, or inexact; we don't care.
+ // FIXME need to be more flexible about rounding mode.
+ (void) C1.convert(EVTToAPFloatSemantics(VT), APFloat::rmNearestTiesToEven,
+ &Unused);
+ return getConstantFP(C1, DL, VT);
+ }
+
+ switch (Opcode) {
+ case ISD::FADD:
+ case ISD::FSUB:
+ case ISD::FMUL:
+ case ISD::FDIV:
+ case ISD::FREM:
+ // If both operands are undef, the result is undef. If 1 operand is undef,
+ // the result is NaN. This should match the behavior of the IR optimizer.
+ if (N1.isUndef() && N2.isUndef())
+ return getUNDEF(VT);
+ if (N1.isUndef() || N2.isUndef())
+ return getConstantFP(APFloat::getNaN(EVTToAPFloatSemantics(VT)), DL, VT);
+ }
+ return SDValue();
+}
+
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
SDValue N1, SDValue N2, const SDNodeFlags Flags) {
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
@@ -4791,9 +5063,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
break;
}
case ISD::CONCAT_VECTORS: {
- // Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF.
SDValue Ops[] = {N1, N2};
- if (SDValue V = FoldCONCAT_VECTORS(DL, VT, Ops, *this))
+ if (SDValue V = foldCONCAT_VECTORS(DL, VT, Ops, *this))
return V;
break;
}
@@ -4847,6 +5118,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
assert(VT.isFloatingPoint() && "This operator only applies to FP types!");
assert(N1.getValueType() == N2.getValueType() &&
N1.getValueType() == VT && "Binary operator types must match!");
+ if (SDValue V = simplifyFPBinop(Opcode, N1, N2))
+ return V;
break;
case ISD::FCOPYSIGN: // N1 and result must match. N1/N2 need not match.
assert(N1.getValueType() == VT &&
@@ -5100,73 +5373,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
FoldConstantArithmetic(Opcode, DL, VT, N1.getNode(), N2.getNode()))
return SV;
- // Constant fold FP operations.
- bool HasFPExceptions = TLI->hasFloatingPointExceptions();
- if (N1CFP) {
- if (N2CFP) {
- APFloat V1 = N1CFP->getValueAPF(), V2 = N2CFP->getValueAPF();
- APFloat::opStatus s;
- switch (Opcode) {
- case ISD::FADD:
- s = V1.add(V2, APFloat::rmNearestTiesToEven);
- if (!HasFPExceptions || s != APFloat::opInvalidOp)
- return getConstantFP(V1, DL, VT);
- break;
- case ISD::FSUB:
- s = V1.subtract(V2, APFloat::rmNearestTiesToEven);
- if (!HasFPExceptions || s!=APFloat::opInvalidOp)
- return getConstantFP(V1, DL, VT);
- break;
- case ISD::FMUL:
- s = V1.multiply(V2, APFloat::rmNearestTiesToEven);
- if (!HasFPExceptions || s!=APFloat::opInvalidOp)
- return getConstantFP(V1, DL, VT);
- break;
- case ISD::FDIV:
- s = V1.divide(V2, APFloat::rmNearestTiesToEven);
- if (!HasFPExceptions || (s!=APFloat::opInvalidOp &&
- s!=APFloat::opDivByZero)) {
- return getConstantFP(V1, DL, VT);
- }
- break;
- case ISD::FREM :
- s = V1.mod(V2);
- if (!HasFPExceptions || (s!=APFloat::opInvalidOp &&
- s!=APFloat::opDivByZero)) {
- return getConstantFP(V1, DL, VT);
- }
- break;
- case ISD::FCOPYSIGN:
- V1.copySign(V2);
- return getConstantFP(V1, DL, VT);
- default: break;
- }
- }
-
- if (Opcode == ISD::FP_ROUND) {
- APFloat V = N1CFP->getValueAPF(); // make copy
- bool ignored;
- // This can return overflow, underflow, or inexact; we don't care.
- // FIXME need to be more flexible about rounding mode.
- (void)V.convert(EVTToAPFloatSemantics(VT),
- APFloat::rmNearestTiesToEven, &ignored);
- return getConstantFP(V, DL, VT);
- }
- }
-
- switch (Opcode) {
- case ISD::FADD:
- case ISD::FSUB:
- case ISD::FMUL:
- case ISD::FDIV:
- case ISD::FREM:
- // If both operands are undef, the result is undef. If 1 operand is undef,
- // the result is NaN. This should match the behavior of the IR optimizer.
- if (N1.isUndef() && N2.isUndef())
- return getUNDEF(VT);
- if (N1.isUndef() || N2.isUndef())
- return getConstantFP(APFloat::getNaN(EVTToAPFloatSemantics(VT)), DL, VT);
- }
+ if (SDValue V = foldConstantFPMath(Opcode, DL, VT, N1, N2))
+ return V;
// Canonicalize an UNDEF to the RHS, even over a constant.
if (N1.isUndef()) {
@@ -5261,10 +5469,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
APFloat V1 = N1CFP->getValueAPF();
const APFloat &V2 = N2CFP->getValueAPF();
const APFloat &V3 = N3CFP->getValueAPF();
- APFloat::opStatus s =
- V1.fusedMultiplyAdd(V2, V3, APFloat::rmNearestTiesToEven);
- if (!TLI->hasFloatingPointExceptions() || s != APFloat::opInvalidOp)
- return getConstantFP(V1, DL, VT);
+ V1.fusedMultiplyAdd(V2, V3, APFloat::rmNearestTiesToEven);
+ return getConstantFP(V1, DL, VT);
}
break;
}
@@ -5276,9 +5482,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
break;
}
case ISD::CONCAT_VECTORS: {
- // Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF.
SDValue Ops[] = {N1, N2, N3};
- if (SDValue V = FoldCONCAT_VECTORS(DL, VT, Ops, *this))
+ if (SDValue V = foldCONCAT_VECTORS(DL, VT, Ops, *this))
return V;
break;
}
@@ -5317,6 +5522,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
break;
}
case ISD::INSERT_SUBVECTOR: {
+ // Inserting undef into undef is still undef.
+ if (N1.isUndef() && N2.isUndef())
+ return getUNDEF(VT);
SDValue Index = N3;
if (VT.isSimple() && N1.getValueType().isSimple()
&& N2.getValueType().isSimple()) {
@@ -5337,6 +5545,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
// Trivial insertion.
if (VT.getSimpleVT() == N2.getSimpleValueType())
return N2;
+
+ // If this is an insert of an extracted vector into an undef vector, we
+ // can just use the input to the extract.
+ if (N1.isUndef() && N2.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ N2.getOperand(1) == N3 && N2.getOperand(0).getValueType() == VT)
+ return N2.getOperand(0);
}
break;
}
@@ -5521,116 +5735,12 @@ static bool isMemSrcFromConstant(SDValue Src, ConstantDataArraySlice &Slice) {
SrcDelta + G->getOffset());
}
-/// Determines the optimal series of memory ops to replace the memset / memcpy.
-/// Return true if the number of memory ops is below the threshold (Limit).
-/// It returns the types of the sequence of memory ops to perform
-/// memset / memcpy by reference.
-static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
- unsigned Limit, uint64_t Size,
- unsigned DstAlign, unsigned SrcAlign,
- bool IsMemset,
- bool ZeroMemset,
- bool MemcpyStrSrc,
- bool AllowOverlap,
- unsigned DstAS, unsigned SrcAS,
- SelectionDAG &DAG,
- const TargetLowering &TLI) {
- assert((SrcAlign == 0 || SrcAlign >= DstAlign) &&
- "Expecting memcpy / memset source to meet alignment requirement!");
- // If 'SrcAlign' is zero, that means the memory operation does not need to
- // load the value, i.e. memset or memcpy from constant string. Otherwise,
- // it's the inferred alignment of the source. 'DstAlign', on the other hand,
- // is the specified alignment of the memory operation. If it is zero, that
- // means it's possible to change the alignment of the destination.
- // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does
- // not need to be loaded.
- EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign,
- IsMemset, ZeroMemset, MemcpyStrSrc,
- DAG.getMachineFunction());
-
- if (VT == MVT::Other) {
- // Use the largest integer type whose alignment constraints are satisfied.
- // We only need to check DstAlign here as SrcAlign is always greater or
- // equal to DstAlign (or zero).
- VT = MVT::i64;
- while (DstAlign && DstAlign < VT.getSizeInBits() / 8 &&
- !TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign))
- VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
- assert(VT.isInteger());
-
- // Find the largest legal integer type.
- MVT LVT = MVT::i64;
- while (!TLI.isTypeLegal(LVT))
- LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
- assert(LVT.isInteger());
-
- // If the type we've chosen is larger than the largest legal integer type
- // then use that instead.
- if (VT.bitsGT(LVT))
- VT = LVT;
- }
-
- unsigned NumMemOps = 0;
- while (Size != 0) {
- unsigned VTSize = VT.getSizeInBits() / 8;
- while (VTSize > Size) {
- // For now, only use non-vector load / store's for the left-over pieces.
- EVT NewVT = VT;
- unsigned NewVTSize;
-
- bool Found = false;
- if (VT.isVector() || VT.isFloatingPoint()) {
- NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
- if (TLI.isOperationLegalOrCustom(ISD::STORE, NewVT) &&
- TLI.isSafeMemOpType(NewVT.getSimpleVT()))
- Found = true;
- else if (NewVT == MVT::i64 &&
- TLI.isOperationLegalOrCustom(ISD::STORE, MVT::f64) &&
- TLI.isSafeMemOpType(MVT::f64)) {
- // i64 is usually not legal on 32-bit targets, but f64 may be.
- NewVT = MVT::f64;
- Found = true;
- }
- }
-
- if (!Found) {
- do {
- NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
- if (NewVT == MVT::i8)
- break;
- } while (!TLI.isSafeMemOpType(NewVT.getSimpleVT()));
- }
- NewVTSize = NewVT.getSizeInBits() / 8;
-
- // If the new VT cannot cover all of the remaining bits, then consider
- // issuing a (or a pair of) unaligned and overlapping load / store.
- bool Fast;
- if (NumMemOps && AllowOverlap && NewVTSize < Size &&
- TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign, &Fast) &&
- Fast)
- VTSize = Size;
- else {
- VT = NewVT;
- VTSize = NewVTSize;
- }
- }
-
- if (++NumMemOps > Limit)
- return false;
-
- MemOps.push_back(VT);
- Size -= VTSize;
- }
-
- return true;
-}
-
static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
// On Darwin, -Os means optimize for size without hurting performance, so
// only really optimize for size when -Oz (MinSize) is used.
if (MF.getTarget().getTargetTriple().isOSDarwin())
- return MF.getFunction().optForMinSize();
- return MF.getFunction().optForSize();
+ return MF.getFunction().hasMinSize();
+ return MF.getFunction().hasOptSize();
}
static void chainLoadsAndStoresForMemcpy(SelectionDAG &DAG, const SDLoc &dl,
@@ -5665,6 +5775,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
MachinePointerInfo DstPtrInfo,
MachinePointerInfo SrcPtrInfo) {
// Turn a memcpy of undef to nop.
+ // FIXME: We need to honor volatile even is Src is undef.
if (Src.isUndef())
return Chain;
@@ -5691,13 +5802,12 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
bool isZeroConstant = CopyFromConstant && Slice.Array == nullptr;
unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize);
- if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
- (DstAlignCanChange ? 0 : Align),
- (isZeroConstant ? 0 : SrcAlign),
- false, false, CopyFromConstant, true,
- DstPtrInfo.getAddrSpace(),
- SrcPtrInfo.getAddrSpace(),
- DAG, TLI))
+ if (!TLI.findOptimalMemOpLowering(
+ MemOps, Limit, Size, (DstAlignCanChange ? 0 : Align),
+ (isZeroConstant ? 0 : SrcAlign), /*IsMemset=*/false,
+ /*ZeroMemset=*/false, /*MemcpyStrSrc=*/CopyFromConstant,
+ /*AllowOverlap=*/!isVol, DstPtrInfo.getAddrSpace(),
+ SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes()))
return SDValue();
if (DstAlignCanChange) {
@@ -5851,6 +5961,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
MachinePointerInfo DstPtrInfo,
MachinePointerInfo SrcPtrInfo) {
// Turn a memmove of undef to nop.
+ // FIXME: We need to honor volatile even is Src is undef.
if (Src.isUndef())
return Chain;
@@ -5871,13 +5982,15 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
if (Align > SrcAlign)
SrcAlign = Align;
unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(OptSize);
-
- if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
- (DstAlignCanChange ? 0 : Align), SrcAlign,
- false, false, false, false,
- DstPtrInfo.getAddrSpace(),
- SrcPtrInfo.getAddrSpace(),
- DAG, TLI))
+ // FIXME: `AllowOverlap` should really be `!isVol` but there is a bug in
+ // findOptimalMemOpLowering. Meanwhile, setting it to `false` produces the
+ // correct code.
+ bool AllowOverlap = false;
+ if (!TLI.findOptimalMemOpLowering(
+ MemOps, Limit, Size, (DstAlignCanChange ? 0 : Align), SrcAlign,
+ /*IsMemset=*/false, /*ZeroMemset=*/false, /*MemcpyStrSrc=*/false,
+ AllowOverlap, DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
+ MF.getFunction().getAttributes()))
return SDValue();
if (DstAlignCanChange) {
@@ -5956,6 +6069,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
uint64_t Size, unsigned Align, bool isVol,
MachinePointerInfo DstPtrInfo) {
// Turn a memset of undef to nop.
+ // FIXME: We need to honor volatile even is Src is undef.
if (Src.isUndef())
return Chain;
@@ -5972,11 +6086,12 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
DstAlignCanChange = true;
bool IsZeroVal =
isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue();
- if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(OptSize),
- Size, (DstAlignCanChange ? 0 : Align), 0,
- true, IsZeroVal, false, true,
- DstPtrInfo.getAddrSpace(), ~0u,
- DAG, TLI))
+ if (!TLI.findOptimalMemOpLowering(
+ MemOps, TLI.getMaxStoresPerMemset(OptSize), Size,
+ (DstAlignCanChange ? 0 : Align), 0, /*IsMemset=*/true,
+ /*ZeroMemset=*/IsZeroVal, /*MemcpyStrSrc=*/false,
+ /*AllowOverlap=*/!isVol, DstPtrInfo.getAddrSpace(), ~0u,
+ MF.getFunction().getAttributes()))
return SDValue();
if (DstAlignCanChange) {
@@ -6097,9 +6212,11 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst,
// Emit a library call.
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
- Entry.Ty = getDataLayout().getIntPtrType(*getContext());
+ Entry.Ty = Type::getInt8PtrTy(*getContext());
Entry.Node = Dst; Args.push_back(Entry);
Entry.Node = Src; Args.push_back(Entry);
+
+ Entry.Ty = getDataLayout().getIntPtrType(*getContext());
Entry.Node = Size; Args.push_back(Entry);
// FIXME: pass in SDLoc
TargetLowering::CallLoweringInfo CLI(*this);
@@ -6199,9 +6316,11 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst,
// Emit a library call.
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
- Entry.Ty = getDataLayout().getIntPtrType(*getContext());
+ Entry.Ty = Type::getInt8PtrTy(*getContext());
Entry.Node = Dst; Args.push_back(Entry);
Entry.Node = Src; Args.push_back(Entry);
+
+ Entry.Ty = getDataLayout().getIntPtrType(*getContext());
Entry.Node = Size; Args.push_back(Entry);
// FIXME: pass in SDLoc
TargetLowering::CallLoweringInfo CLI(*this);
@@ -6294,16 +6413,15 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,
checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace());
// Emit a library call.
- Type *IntPtrTy = getDataLayout().getIntPtrType(*getContext());
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
- Entry.Node = Dst; Entry.Ty = IntPtrTy;
+ Entry.Node = Dst; Entry.Ty = Type::getInt8PtrTy(*getContext());
Args.push_back(Entry);
Entry.Node = Src;
Entry.Ty = Src.getValueType().getTypeForEVT(*getContext());
Args.push_back(Entry);
Entry.Node = Size;
- Entry.Ty = IntPtrTy;
+ Entry.Ty = getDataLayout().getIntPtrType(*getContext());
Args.push_back(Entry);
// FIXME: pass in SDLoc
@@ -6384,32 +6502,6 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
return SDValue(N, 0);
}
-SDValue SelectionDAG::getAtomicCmpSwap(
- unsigned Opcode, const SDLoc &dl, EVT MemVT, SDVTList VTs, SDValue Chain,
- SDValue Ptr, SDValue Cmp, SDValue Swp, MachinePointerInfo PtrInfo,
- unsigned Alignment, AtomicOrdering SuccessOrdering,
- AtomicOrdering FailureOrdering, SyncScope::ID SSID) {
- assert(Opcode == ISD::ATOMIC_CMP_SWAP ||
- Opcode == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS);
- assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types");
-
- if (Alignment == 0) // Ensure that codegen never sees alignment 0
- Alignment = getEVTAlignment(MemVT);
-
- MachineFunction &MF = getMachineFunction();
-
- // FIXME: Volatile isn't really correct; we should keep track of atomic
- // orderings in the memoperand.
- auto Flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad |
- MachineMemOperand::MOStore;
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment,
- AAMDNodes(), nullptr, SSID, SuccessOrdering,
- FailureOrdering);
-
- return getAtomicCmpSwap(Opcode, dl, MemVT, VTs, Chain, Ptr, Cmp, Swp, MMO);
-}
-
SDValue SelectionDAG::getAtomicCmpSwap(unsigned Opcode, const SDLoc &dl,
EVT MemVT, SDVTList VTs, SDValue Chain,
SDValue Ptr, SDValue Cmp, SDValue Swp,
@@ -6424,35 +6516,6 @@ SDValue SelectionDAG::getAtomicCmpSwap(unsigned Opcode, const SDLoc &dl,
SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
SDValue Chain, SDValue Ptr, SDValue Val,
- const Value *PtrVal, unsigned Alignment,
- AtomicOrdering Ordering,
- SyncScope::ID SSID) {
- if (Alignment == 0) // Ensure that codegen never sees alignment 0
- Alignment = getEVTAlignment(MemVT);
-
- MachineFunction &MF = getMachineFunction();
- // An atomic store does not load. An atomic load does not store.
- // (An atomicrmw obviously both loads and stores.)
- // For now, atomics are considered to be volatile always, and they are
- // chained as such.
- // FIXME: Volatile isn't really correct; we should keep track of atomic
- // orderings in the memoperand.
- auto Flags = MachineMemOperand::MOVolatile;
- if (Opcode != ISD::ATOMIC_STORE)
- Flags |= MachineMemOperand::MOLoad;
- if (Opcode != ISD::ATOMIC_LOAD)
- Flags |= MachineMemOperand::MOStore;
-
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo(PtrVal), Flags,
- MemVT.getStoreSize(), Alignment, AAMDNodes(),
- nullptr, SSID, Ordering);
-
- return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Val, MMO);
-}
-
-SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
- SDValue Chain, SDValue Ptr, SDValue Val,
MachineMemOperand *MMO) {
assert((Opcode == ISD::ATOMIC_LOAD_ADD ||
Opcode == ISD::ATOMIC_LOAD_SUB ||
@@ -6465,6 +6528,8 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
Opcode == ISD::ATOMIC_LOAD_MAX ||
Opcode == ISD::ATOMIC_LOAD_UMIN ||
Opcode == ISD::ATOMIC_LOAD_UMAX ||
+ Opcode == ISD::ATOMIC_LOAD_FADD ||
+ Opcode == ISD::ATOMIC_LOAD_FSUB ||
Opcode == ISD::ATOMIC_SWAP ||
Opcode == ISD::ATOMIC_STORE) &&
"Invalid Atomic Op");
@@ -6502,7 +6567,7 @@ SDValue SelectionDAG::getMergeValues(ArrayRef<SDValue> Ops, const SDLoc &dl) {
SDValue SelectionDAG::getMemIntrinsicNode(
unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef<SDValue> Ops,
EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align,
- MachineMemOperand::Flags Flags, unsigned Size) {
+ MachineMemOperand::Flags Flags, unsigned Size, const AAMDNodes &AAInfo) {
if (Align == 0) // Ensure that codegen never sees alignment 0
Align = getEVTAlignment(MemVT);
@@ -6511,7 +6576,7 @@ SDValue SelectionDAG::getMemIntrinsicNode(
MachineFunction &MF = getMachineFunction();
MachineMemOperand *MMO =
- MF.getMachineMemOperand(PtrInfo, Flags, Size, Align);
+ MF.getMachineMemOperand(PtrInfo, Flags, Size, Align, AAInfo);
return getMemIntrinsicNode(Opcode, dl, VTList, Ops, MemVT, MMO);
}
@@ -6557,6 +6622,36 @@ SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl,
return SDValue(N, 0);
}
+SDValue SelectionDAG::getLifetimeNode(bool IsStart, const SDLoc &dl,
+ SDValue Chain, int FrameIndex,
+ int64_t Size, int64_t Offset) {
+ const unsigned Opcode = IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END;
+ const auto VTs = getVTList(MVT::Other);
+ SDValue Ops[2] = {
+ Chain,
+ getFrameIndex(FrameIndex,
+ getTargetLoweringInfo().getFrameIndexTy(getDataLayout()),
+ true)};
+
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTs, Ops);
+ ID.AddInteger(FrameIndex);
+ ID.AddInteger(Size);
+ ID.AddInteger(Offset);
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP))
+ return SDValue(E, 0);
+
+ LifetimeSDNode *N = newSDNode<LifetimeSDNode>(
+ Opcode, dl.getIROrder(), dl.getDebugLoc(), VTs, Size, Offset);
+ createOperands(N, Ops);
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
/// InferPointerInfo - If the specified ptr/offset is a frame index, infer a
/// MachinePointerInfo record from it. This is particularly useful because the
/// code generator has many cases where it doesn't bother passing in a
@@ -6875,7 +6970,7 @@ SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain,
SDValue Ops[] = { Chain, Ptr, Mask, PassThru };
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::MLOAD, VTs, Ops);
- ID.AddInteger(VT.getRawBits());
+ ID.AddInteger(MemVT.getRawBits());
ID.AddInteger(getSyntheticNodeSubclassData<MaskedLoadSDNode>(
dl.getIROrder(), VTs, ExtTy, isExpanding, MemVT, MMO));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
@@ -6901,12 +6996,11 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl,
bool IsTruncating, bool IsCompressing) {
assert(Chain.getValueType() == MVT::Other &&
"Invalid chain type");
- EVT VT = Val.getValueType();
SDVTList VTs = getVTList(MVT::Other);
SDValue Ops[] = { Chain, Val, Ptr, Mask };
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::MSTORE, VTs, Ops);
- ID.AddInteger(VT.getRawBits());
+ ID.AddInteger(MemVT.getRawBits());
ID.AddInteger(getSyntheticNodeSubclassData<MaskedStoreSDNode>(
dl.getIROrder(), VTs, IsTruncating, IsCompressing, MemVT, MMO));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
@@ -7057,6 +7151,31 @@ SDValue SelectionDAG::simplifyShift(SDValue X, SDValue Y) {
return SDValue();
}
+// TODO: Use fast-math-flags to enable more simplifications.
+SDValue SelectionDAG::simplifyFPBinop(unsigned Opcode, SDValue X, SDValue Y) {
+ ConstantFPSDNode *YC = isConstOrConstSplatFP(Y, /* AllowUndefs */ true);
+ if (!YC)
+ return SDValue();
+
+ // X + -0.0 --> X
+ if (Opcode == ISD::FADD)
+ if (YC->getValueAPF().isNegZero())
+ return X;
+
+ // X - +0.0 --> X
+ if (Opcode == ISD::FSUB)
+ if (YC->getValueAPF().isPosZero())
+ return X;
+
+ // X * 1.0 --> X
+ // X / 1.0 --> X
+ if (Opcode == ISD::FMUL || Opcode == ISD::FDIV)
+ if (YC->getValueAPF().isExactlyValue(1.0))
+ return X;
+
+ return SDValue();
+}
+
SDValue SelectionDAG::getVAArg(EVT VT, const SDLoc &dl, SDValue Chain,
SDValue Ptr, SDValue SV, unsigned Align) {
SDValue Ops[] = { Chain, Ptr, SV, getTargetConstant(Align, dl, MVT::i32) };
@@ -7098,8 +7217,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
return V;
break;
case ISD::CONCAT_VECTORS:
- // Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF.
- if (SDValue V = FoldCONCAT_VECTORS(DL, VT, Ops, *this))
+ if (SDValue V = foldCONCAT_VECTORS(DL, VT, Ops, *this))
return V;
break;
case ISD::SELECT_CC:
@@ -7629,56 +7747,50 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) {
unsigned OrigOpc = Node->getOpcode();
unsigned NewOpc;
- bool IsUnary = false;
- bool IsTernary = false;
switch (OrigOpc) {
default:
llvm_unreachable("mutateStrictFPToFP called with unexpected opcode!");
- case ISD::STRICT_FADD: NewOpc = ISD::FADD; break;
- case ISD::STRICT_FSUB: NewOpc = ISD::FSUB; break;
- case ISD::STRICT_FMUL: NewOpc = ISD::FMUL; break;
- case ISD::STRICT_FDIV: NewOpc = ISD::FDIV; break;
- case ISD::STRICT_FREM: NewOpc = ISD::FREM; break;
- case ISD::STRICT_FMA: NewOpc = ISD::FMA; IsTernary = true; break;
- case ISD::STRICT_FSQRT: NewOpc = ISD::FSQRT; IsUnary = true; break;
- case ISD::STRICT_FPOW: NewOpc = ISD::FPOW; break;
- case ISD::STRICT_FPOWI: NewOpc = ISD::FPOWI; break;
- case ISD::STRICT_FSIN: NewOpc = ISD::FSIN; IsUnary = true; break;
- case ISD::STRICT_FCOS: NewOpc = ISD::FCOS; IsUnary = true; break;
- case ISD::STRICT_FEXP: NewOpc = ISD::FEXP; IsUnary = true; break;
- case ISD::STRICT_FEXP2: NewOpc = ISD::FEXP2; IsUnary = true; break;
- case ISD::STRICT_FLOG: NewOpc = ISD::FLOG; IsUnary = true; break;
- case ISD::STRICT_FLOG10: NewOpc = ISD::FLOG10; IsUnary = true; break;
- case ISD::STRICT_FLOG2: NewOpc = ISD::FLOG2; IsUnary = true; break;
- case ISD::STRICT_FRINT: NewOpc = ISD::FRINT; IsUnary = true; break;
- case ISD::STRICT_FNEARBYINT:
- NewOpc = ISD::FNEARBYINT;
- IsUnary = true;
- break;
- case ISD::STRICT_FMAXNUM: NewOpc = ISD::FMAXNUM; break;
- case ISD::STRICT_FMINNUM: NewOpc = ISD::FMINNUM; break;
- case ISD::STRICT_FCEIL: NewOpc = ISD::FCEIL; IsUnary = true; break;
- case ISD::STRICT_FFLOOR: NewOpc = ISD::FFLOOR; IsUnary = true; break;
- case ISD::STRICT_FROUND: NewOpc = ISD::FROUND; IsUnary = true; break;
- case ISD::STRICT_FTRUNC: NewOpc = ISD::FTRUNC; IsUnary = true; break;
- }
+ case ISD::STRICT_FADD: NewOpc = ISD::FADD; break;
+ case ISD::STRICT_FSUB: NewOpc = ISD::FSUB; break;
+ case ISD::STRICT_FMUL: NewOpc = ISD::FMUL; break;
+ case ISD::STRICT_FDIV: NewOpc = ISD::FDIV; break;
+ case ISD::STRICT_FREM: NewOpc = ISD::FREM; break;
+ case ISD::STRICT_FMA: NewOpc = ISD::FMA; break;
+ case ISD::STRICT_FSQRT: NewOpc = ISD::FSQRT; break;
+ case ISD::STRICT_FPOW: NewOpc = ISD::FPOW; break;
+ case ISD::STRICT_FPOWI: NewOpc = ISD::FPOWI; break;
+ case ISD::STRICT_FSIN: NewOpc = ISD::FSIN; break;
+ case ISD::STRICT_FCOS: NewOpc = ISD::FCOS; break;
+ case ISD::STRICT_FEXP: NewOpc = ISD::FEXP; break;
+ case ISD::STRICT_FEXP2: NewOpc = ISD::FEXP2; break;
+ case ISD::STRICT_FLOG: NewOpc = ISD::FLOG; break;
+ case ISD::STRICT_FLOG10: NewOpc = ISD::FLOG10; break;
+ case ISD::STRICT_FLOG2: NewOpc = ISD::FLOG2; break;
+ case ISD::STRICT_FRINT: NewOpc = ISD::FRINT; break;
+ case ISD::STRICT_FNEARBYINT: NewOpc = ISD::FNEARBYINT; break;
+ case ISD::STRICT_FMAXNUM: NewOpc = ISD::FMAXNUM; break;
+ case ISD::STRICT_FMINNUM: NewOpc = ISD::FMINNUM; break;
+ case ISD::STRICT_FCEIL: NewOpc = ISD::FCEIL; break;
+ case ISD::STRICT_FFLOOR: NewOpc = ISD::FFLOOR; break;
+ case ISD::STRICT_FROUND: NewOpc = ISD::FROUND; break;
+ case ISD::STRICT_FTRUNC: NewOpc = ISD::FTRUNC; break;
+ case ISD::STRICT_FP_ROUND: NewOpc = ISD::FP_ROUND; break;
+ case ISD::STRICT_FP_EXTEND: NewOpc = ISD::FP_EXTEND; break;
+ }
+
+ assert(Node->getNumValues() == 2 && "Unexpected number of results!");
// We're taking this node out of the chain, so we need to re-link things.
SDValue InputChain = Node->getOperand(0);
SDValue OutputChain = SDValue(Node, 1);
ReplaceAllUsesOfValueWith(OutputChain, InputChain);
- SDVTList VTs = getVTList(Node->getOperand(1).getValueType());
- SDNode *Res = nullptr;
- if (IsUnary)
- Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1) });
- else if (IsTernary)
- Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1),
- Node->getOperand(2),
- Node->getOperand(3)});
- else
- Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1),
- Node->getOperand(2) });
+ SmallVector<SDValue, 3> Ops;
+ for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i)
+ Ops.push_back(Node->getOperand(i));
+
+ SDVTList VTs = getVTList(Node->getValueType(0));
+ SDNode *Res = MorphNodeTo(Node, NewOpc, VTs, Ops);
// MorphNodeTo can operate in two ways: if an existing node with the
// specified operands exists, it can just return it. Otherwise, it
@@ -7980,9 +8092,8 @@ void SelectionDAG::salvageDebugInfo(SDNode &N) {
// DIExpression, we need to mark the expression with a
// DW_OP_stack_value.
auto *DIExpr = DV->getExpression();
- DIExpr = DIExpression::prepend(DIExpr, DIExpression::NoDeref, Offset,
- DIExpression::NoDeref,
- DIExpression::WithStackValue);
+ DIExpr =
+ DIExpression::prepend(DIExpr, DIExpression::StackValue, Offset);
SDDbgValue *Clone =
getDbgValue(DV->getVariable(), DIExpr, N0.getNode(), N0.getResNo(),
DV->isIndirect(), DV->getDebugLoc(), DV->getOrder());
@@ -8288,19 +8399,17 @@ void SelectionDAG::updateDivergence(SDNode * N)
}
}
-
-void SelectionDAG::CreateTopologicalOrder(std::vector<SDNode*>& Order) {
+void SelectionDAG::CreateTopologicalOrder(std::vector<SDNode *> &Order) {
DenseMap<SDNode *, unsigned> Degree;
Order.reserve(AllNodes.size());
- for (auto & N : allnodes()) {
+ for (auto &N : allnodes()) {
unsigned NOps = N.getNumOperands();
Degree[&N] = NOps;
if (0 == NOps)
Order.push_back(&N);
}
- for (std::vector<SDNode *>::iterator I = Order.begin();
- I!=Order.end();++I) {
- SDNode * N = *I;
+ for (size_t I = 0; I != Order.size(); ++I) {
+ SDNode *N = Order[I];
for (auto U : N->uses()) {
unsigned &UnsortedOps = Degree[U];
if (0 == --UnsortedOps)
@@ -8310,9 +8419,8 @@ void SelectionDAG::CreateTopologicalOrder(std::vector<SDNode*>& Order) {
}
#ifndef NDEBUG
-void SelectionDAG::VerifyDAGDiverence()
-{
- std::vector<SDNode*> TopoOrder;
+void SelectionDAG::VerifyDAGDiverence() {
+ std::vector<SDNode *> TopoOrder;
CreateTopologicalOrder(TopoOrder);
const TargetLowering &TLI = getTargetLoweringInfo();
DenseMap<const SDNode *, bool> DivergenceMap;
@@ -8338,7 +8446,6 @@ void SelectionDAG::VerifyDAGDiverence()
}
#endif
-
/// ReplaceAllUsesOfValuesWith - Replace any uses of From with To, leaving
/// uses of other values produced by From.getNode() alone. The same value
/// may appear in both the From and To list. The Deleted vector is
@@ -8584,14 +8691,24 @@ SDValue llvm::peekThroughOneUseBitcasts(SDValue V) {
return V;
}
-bool llvm::isBitwiseNot(SDValue V) {
+SDValue llvm::peekThroughExtractSubvectors(SDValue V) {
+ while (V.getOpcode() == ISD::EXTRACT_SUBVECTOR)
+ V = V.getOperand(0);
+ return V;
+}
+
+bool llvm::isBitwiseNot(SDValue V, bool AllowUndefs) {
if (V.getOpcode() != ISD::XOR)
return false;
- ConstantSDNode *C = isConstOrConstSplat(peekThroughBitcasts(V.getOperand(1)));
- return C && C->isAllOnesValue();
+ V = peekThroughBitcasts(V.getOperand(1));
+ unsigned NumBits = V.getScalarValueSizeInBits();
+ ConstantSDNode *C =
+ isConstOrConstSplat(V, AllowUndefs, /*AllowTruncation*/ true);
+ return C && (C->getAPIntValue().countTrailingOnes() >= NumBits);
}
-ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, bool AllowUndefs) {
+ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, bool AllowUndefs,
+ bool AllowTruncation) {
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N))
return CN;
@@ -8599,10 +8716,39 @@ ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, bool AllowUndefs) {
BitVector UndefElements;
ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements);
- // BuildVectors can truncate their operands. Ignore that case here.
- if (CN && (UndefElements.none() || AllowUndefs) &&
- CN->getValueType(0) == N.getValueType().getScalarType())
- return CN;
+ // BuildVectors can truncate their operands. Ignore that case here unless
+ // AllowTruncation is set.
+ if (CN && (UndefElements.none() || AllowUndefs)) {
+ EVT CVT = CN->getValueType(0);
+ EVT NSVT = N.getValueType().getScalarType();
+ assert(CVT.bitsGE(NSVT) && "Illegal build vector element extension");
+ if (AllowTruncation || (CVT == NSVT))
+ return CN;
+ }
+ }
+
+ return nullptr;
+}
+
+ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, const APInt &DemandedElts,
+ bool AllowUndefs,
+ bool AllowTruncation) {
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N))
+ return CN;
+
+ if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
+ BitVector UndefElements;
+ ConstantSDNode *CN = BV->getConstantSplatNode(DemandedElts, &UndefElements);
+
+ // BuildVectors can truncate their operands. Ignore that case here unless
+ // AllowTruncation is set.
+ if (CN && (UndefElements.none() || AllowUndefs)) {
+ EVT CVT = CN->getValueType(0);
+ EVT NSVT = N.getValueType().getScalarType();
+ assert(CVT.bitsGE(NSVT) && "Illegal build vector element extension");
+ if (AllowTruncation || (CVT == NSVT))
+ return CN;
+ }
}
return nullptr;
@@ -8622,9 +8768,26 @@ ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N, bool AllowUndefs) {
return nullptr;
}
-bool llvm::isNullOrNullSplat(SDValue N) {
+ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N,
+ const APInt &DemandedElts,
+ bool AllowUndefs) {
+ if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
+ return CN;
+
+ if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
+ BitVector UndefElements;
+ ConstantFPSDNode *CN =
+ BV->getConstantFPSplatNode(DemandedElts, &UndefElements);
+ if (CN && (UndefElements.none() || AllowUndefs))
+ return CN;
+ }
+
+ return nullptr;
+}
+
+bool llvm::isNullOrNullSplat(SDValue N, bool AllowUndefs) {
// TODO: may want to use peekThroughBitcast() here.
- ConstantSDNode *C = isConstOrConstSplat(N);
+ ConstantSDNode *C = isConstOrConstSplat(N, AllowUndefs);
return C && C->isNullValue();
}
@@ -8773,17 +8936,12 @@ bool SDNode::areOnlyUsersOf(ArrayRef<const SDNode *> Nodes, const SDNode *N) {
/// isOperand - Return true if this node is an operand of N.
bool SDValue::isOperandOf(const SDNode *N) const {
- for (const SDValue &Op : N->op_values())
- if (*this == Op)
- return true;
- return false;
+ return any_of(N->op_values(), [this](SDValue Op) { return *this == Op; });
}
bool SDNode::isOperandOf(const SDNode *N) const {
- for (const SDValue &Op : N->op_values())
- if (this == Op.getNode())
- return true;
- return false;
+ return any_of(N->op_values(),
+ [this](SDValue Op) { return this == Op.getNode(); });
}
/// reachesChainWithoutSideEffects - Return true if this operand (which must
@@ -8973,6 +9131,56 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
return getBuildVector(VecVT, dl, Scalars);
}
+std::pair<SDValue, SDValue> SelectionDAG::UnrollVectorOverflowOp(
+ SDNode *N, unsigned ResNE) {
+ unsigned Opcode = N->getOpcode();
+ assert((Opcode == ISD::UADDO || Opcode == ISD::SADDO ||
+ Opcode == ISD::USUBO || Opcode == ISD::SSUBO ||
+ Opcode == ISD::UMULO || Opcode == ISD::SMULO) &&
+ "Expected an overflow opcode");
+
+ EVT ResVT = N->getValueType(0);
+ EVT OvVT = N->getValueType(1);
+ EVT ResEltVT = ResVT.getVectorElementType();
+ EVT OvEltVT = OvVT.getVectorElementType();
+ SDLoc dl(N);
+
+ // If ResNE is 0, fully unroll the vector op.
+ unsigned NE = ResVT.getVectorNumElements();
+ if (ResNE == 0)
+ ResNE = NE;
+ else if (NE > ResNE)
+ NE = ResNE;
+
+ SmallVector<SDValue, 8> LHSScalars;
+ SmallVector<SDValue, 8> RHSScalars;
+ ExtractVectorElements(N->getOperand(0), LHSScalars, 0, NE);
+ ExtractVectorElements(N->getOperand(1), RHSScalars, 0, NE);
+
+ EVT SVT = TLI->getSetCCResultType(getDataLayout(), *getContext(), ResEltVT);
+ SDVTList VTs = getVTList(ResEltVT, SVT);
+ SmallVector<SDValue, 8> ResScalars;
+ SmallVector<SDValue, 8> OvScalars;
+ for (unsigned i = 0; i < NE; ++i) {
+ SDValue Res = getNode(Opcode, dl, VTs, LHSScalars[i], RHSScalars[i]);
+ SDValue Ov =
+ getSelect(dl, OvEltVT, Res.getValue(1),
+ getBoolConstant(true, dl, OvEltVT, ResVT),
+ getConstant(0, dl, OvEltVT));
+
+ ResScalars.push_back(Res);
+ OvScalars.push_back(Ov);
+ }
+
+ ResScalars.append(ResNE - NE, getUNDEF(ResEltVT));
+ OvScalars.append(ResNE - NE, getUNDEF(OvEltVT));
+
+ EVT NewResVT = EVT::getVectorVT(*getContext(), ResEltVT, ResNE);
+ EVT NewOvVT = EVT::getVectorVT(*getContext(), OvEltVT, ResNE);
+ return std::make_pair(getBuildVector(NewResVT, dl, ResScalars),
+ getBuildVector(NewOvVT, dl, OvScalars));
+}
+
bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD,
LoadSDNode *Base,
unsigned Bytes,
@@ -9014,7 +9222,7 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
// If this is a direct reference to a stack slot, use information about the
// stack slot's alignment.
- int FrameIdx = 1 << 31;
+ int FrameIdx = INT_MIN;
int64_t FrameOffset = 0;
if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr)) {
FrameIdx = FI->getIndex();
@@ -9025,7 +9233,7 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
FrameOffset = Ptr.getConstantOperandVal(1);
}
- if (FrameIdx != (1 << 31)) {
+ if (FrameIdx != INT_MIN) {
const MachineFrameInfo &MFI = getMachineFunction().getFrameInfo();
unsigned FIInfoAlign = MinAlign(MFI.getObjectAlignment(FrameIdx),
FrameOffset);
@@ -9065,6 +9273,15 @@ SelectionDAG::SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT,
return std::make_pair(Lo, Hi);
}
+/// Widen the vector up to the next power of two using INSERT_SUBVECTOR.
+SDValue SelectionDAG::WidenVector(const SDValue &N, const SDLoc &DL) {
+ EVT VT = N.getValueType();
+ EVT WideVT = EVT::getVectorVT(*getContext(), VT.getVectorElementType(),
+ NextPowerOf2(VT.getVectorNumElements()));
+ return getNode(ISD::INSERT_SUBVECTOR, DL, WideVT, getUNDEF(WideVT), N,
+ getConstant(0, DL, TLI->getVectorIdxTy(getDataLayout())));
+}
+
void SelectionDAG::ExtractVectorElements(SDValue Op,
SmallVectorImpl<SDValue> &Args,
unsigned Start, unsigned Count) {
@@ -9158,13 +9375,20 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, APInt &SplatUndef,
return true;
}
-SDValue BuildVectorSDNode::getSplatValue(BitVector *UndefElements) const {
+SDValue BuildVectorSDNode::getSplatValue(const APInt &DemandedElts,
+ BitVector *UndefElements) const {
if (UndefElements) {
UndefElements->clear();
UndefElements->resize(getNumOperands());
}
+ assert(getNumOperands() == DemandedElts.getBitWidth() &&
+ "Unexpected vector size");
+ if (!DemandedElts)
+ return SDValue();
SDValue Splatted;
for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ if (!DemandedElts[i])
+ continue;
SDValue Op = getOperand(i);
if (Op.isUndef()) {
if (UndefElements)
@@ -9177,20 +9401,40 @@ SDValue BuildVectorSDNode::getSplatValue(BitVector *UndefElements) const {
}
if (!Splatted) {
- assert(getOperand(0).isUndef() &&
+ unsigned FirstDemandedIdx = DemandedElts.countTrailingZeros();
+ assert(getOperand(FirstDemandedIdx).isUndef() &&
"Can only have a splat without a constant for all undefs.");
- return getOperand(0);
+ return getOperand(FirstDemandedIdx);
}
return Splatted;
}
+SDValue BuildVectorSDNode::getSplatValue(BitVector *UndefElements) const {
+ APInt DemandedElts = APInt::getAllOnesValue(getNumOperands());
+ return getSplatValue(DemandedElts, UndefElements);
+}
+
+ConstantSDNode *
+BuildVectorSDNode::getConstantSplatNode(const APInt &DemandedElts,
+ BitVector *UndefElements) const {
+ return dyn_cast_or_null<ConstantSDNode>(
+ getSplatValue(DemandedElts, UndefElements));
+}
+
ConstantSDNode *
BuildVectorSDNode::getConstantSplatNode(BitVector *UndefElements) const {
return dyn_cast_or_null<ConstantSDNode>(getSplatValue(UndefElements));
}
ConstantFPSDNode *
+BuildVectorSDNode::getConstantFPSplatNode(const APInt &DemandedElts,
+ BitVector *UndefElements) const {
+ return dyn_cast_or_null<ConstantFPSDNode>(
+ getSplatValue(DemandedElts, UndefElements));
+}
+
+ConstantFPSDNode *
BuildVectorSDNode::getConstantFPSplatNode(BitVector *UndefElements) const {
return dyn_cast_or_null<ConstantFPSDNode>(getSplatValue(UndefElements));
}
@@ -9228,7 +9472,10 @@ bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) {
for (i = 0, e = VT.getVectorNumElements(); i != e && Mask[i] < 0; ++i)
/* search */;
- assert(i != e && "VECTOR_SHUFFLE node with all undef indices!");
+ // If all elements are undefined, this shuffle can be considered a splat
+ // (although it should eventually get simplified away completely).
+ if (i == e)
+ return true;
// Make sure all remaining elements are either undef or the same as the first
// non-undef value.
@@ -9266,8 +9513,7 @@ SDNode *SelectionDAG::isConstantFPBuildVectorOrConstantFP(SDValue N) {
void SelectionDAG::createOperands(SDNode *Node, ArrayRef<SDValue> Vals) {
assert(!Node->OperandList && "Node already has operands");
- assert(std::numeric_limits<decltype(SDNode::NumOperands)>::max() >=
- Vals.size() &&
+ assert(SDNode::getMaxNumOperands() >= Vals.size() &&
"too many operands to fit into SDNode");
SDUse *Ops = OperandRecycler.allocate(
ArrayRecycler<SDUse>::Capacity::get(Vals.size()), OperandAllocator);
@@ -9287,6 +9533,19 @@ void SelectionDAG::createOperands(SDNode *Node, ArrayRef<SDValue> Vals) {
checkForCycles(Node);
}
+SDValue SelectionDAG::getTokenFactor(const SDLoc &DL,
+ SmallVectorImpl<SDValue> &Vals) {
+ size_t Limit = SDNode::getMaxNumOperands();
+ while (Vals.size() > Limit) {
+ unsigned SliceIdx = Vals.size() - Limit;
+ auto ExtractedTFs = ArrayRef<SDValue>(Vals).slice(SliceIdx, Limit);
+ SDValue NewTF = getNode(ISD::TokenFactor, DL, MVT::Other, ExtractedTFs);
+ Vals.erase(Vals.begin() + SliceIdx, Vals.end());
+ Vals.emplace_back(NewTF);
+ }
+ return getNode(ISD::TokenFactor, DL, MVT::Other, Vals);
+}
+
#ifndef NDEBUG
static void checkForCyclesHelper(const SDNode *N,
SmallPtrSetImpl<const SDNode*> &Visited,
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
index 488bac1a9a80..9592bc30a4e1 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
@@ -1,9 +1,8 @@
//==- llvm/CodeGen/SelectionDAGAddressAnalysis.cpp - DAG Address Analysis --==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -25,8 +24,10 @@ bool BaseIndexOffset::equalBaseIndex(const BaseIndexOffset &Other,
// Conservatively fail if we a match failed..
if (!Base.getNode() || !Other.Base.getNode())
return false;
+ if (!hasValidOffset() || !Other.hasValidOffset())
+ return false;
// Initial Offset difference.
- Off = Other.Offset - Offset;
+ Off = *Other.Offset - *Offset;
if ((Other.Index == Index) && (Other.IsIndexSignExt == IsIndexSignExt)) {
// Trivial match.
@@ -60,24 +61,110 @@ bool BaseIndexOffset::equalBaseIndex(const BaseIndexOffset &Other,
const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
- // Match non-equal FrameIndexes - If both frame indices are fixed
- // we know their relative offsets and can compare them. Otherwise
- // we must be conservative.
+ // Match FrameIndexes.
if (auto *A = dyn_cast<FrameIndexSDNode>(Base))
- if (auto *B = dyn_cast<FrameIndexSDNode>(Other.Base))
+ if (auto *B = dyn_cast<FrameIndexSDNode>(Other.Base)) {
+ // Equal FrameIndexes - offsets are directly comparable.
+ if (A->getIndex() == B->getIndex())
+ return true;
+ // Non-equal FrameIndexes - If both frame indices are fixed
+ // we know their relative offsets and can compare them. Otherwise
+ // we must be conservative.
if (MFI.isFixedObjectIndex(A->getIndex()) &&
MFI.isFixedObjectIndex(B->getIndex())) {
Off += MFI.getObjectOffset(B->getIndex()) -
MFI.getObjectOffset(A->getIndex());
return true;
}
+ }
}
return false;
}
+bool BaseIndexOffset::computeAliasing(const SDNode *Op0,
+ const Optional<int64_t> NumBytes0,
+ const SDNode *Op1,
+ const Optional<int64_t> NumBytes1,
+ const SelectionDAG &DAG, bool &IsAlias) {
+
+ BaseIndexOffset BasePtr0 = match(Op0, DAG);
+ BaseIndexOffset BasePtr1 = match(Op1, DAG);
+
+ if (!(BasePtr0.getBase().getNode() && BasePtr1.getBase().getNode()))
+ return false;
+ int64_t PtrDiff;
+ if (NumBytes0.hasValue() && NumBytes1.hasValue() &&
+ BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff)) {
+ // BasePtr1 is PtrDiff away from BasePtr0. They alias if none of the
+ // following situations arise:
+ IsAlias = !(
+ // [----BasePtr0----]
+ // [---BasePtr1--]
+ // ========PtrDiff========>
+ (*NumBytes0 <= PtrDiff) ||
+ // [----BasePtr0----]
+ // [---BasePtr1--]
+ // =====(-PtrDiff)====>
+ (PtrDiff + *NumBytes1 <= 0)); // i.e. *NumBytes1 < -PtrDiff.
+ return true;
+ }
+ // If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be
+ // able to calculate their relative offset if at least one arises
+ // from an alloca. However, these allocas cannot overlap and we
+ // can infer there is no alias.
+ if (auto *A = dyn_cast<FrameIndexSDNode>(BasePtr0.getBase()))
+ if (auto *B = dyn_cast<FrameIndexSDNode>(BasePtr1.getBase())) {
+ MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+ // If the base are the same frame index but the we couldn't find a
+ // constant offset, (indices are different) be conservative.
+ if (A != B && (!MFI.isFixedObjectIndex(A->getIndex()) ||
+ !MFI.isFixedObjectIndex(B->getIndex()))) {
+ IsAlias = false;
+ return true;
+ }
+ }
+
+ bool IsFI0 = isa<FrameIndexSDNode>(BasePtr0.getBase());
+ bool IsFI1 = isa<FrameIndexSDNode>(BasePtr1.getBase());
+ bool IsGV0 = isa<GlobalAddressSDNode>(BasePtr0.getBase());
+ bool IsGV1 = isa<GlobalAddressSDNode>(BasePtr1.getBase());
+ bool IsCV0 = isa<ConstantPoolSDNode>(BasePtr0.getBase());
+ bool IsCV1 = isa<ConstantPoolSDNode>(BasePtr1.getBase());
+
+ // If of mismatched base types or checkable indices we can check
+ // they do not alias.
+ if ((BasePtr0.getIndex() == BasePtr1.getIndex() || (IsFI0 != IsFI1) ||
+ (IsGV0 != IsGV1) || (IsCV0 != IsCV1)) &&
+ (IsFI0 || IsGV0 || IsCV0) && (IsFI1 || IsGV1 || IsCV1)) {
+ IsAlias = false;
+ return true;
+ }
+ return false; // Cannot determine whether the pointers alias.
+}
+
+bool BaseIndexOffset::contains(const SelectionDAG &DAG, int64_t BitSize,
+ const BaseIndexOffset &Other,
+ int64_t OtherBitSize, int64_t &BitOffset) const {
+ int64_t Offset;
+ if (!equalBaseIndex(Other, DAG, Offset))
+ return false;
+ if (Offset >= 0) {
+ // Other is after *this:
+ // [-------*this---------]
+ // [---Other--]
+ // ==Offset==>
+ BitOffset = 8 * Offset;
+ return BitOffset + OtherBitSize <= BitSize;
+ }
+ // Other starts strictly before *this, it cannot be fully contained.
+ // [-------*this---------]
+ // [--Other--]
+ return false;
+}
+
/// Parses tree in Ptr for base, index, offset addresses.
-BaseIndexOffset BaseIndexOffset::match(const LSBaseSDNode *N,
- const SelectionDAG &DAG) {
+static BaseIndexOffset matchLSNode(const LSBaseSDNode *N,
+ const SelectionDAG &DAG) {
SDValue Ptr = N->getBasePtr();
// (((B + I*M) + c)) + c ...
@@ -178,3 +265,33 @@ BaseIndexOffset BaseIndexOffset::match(const LSBaseSDNode *N,
}
return BaseIndexOffset(Base, Index, Offset, IsIndexSignExt);
}
+
+BaseIndexOffset BaseIndexOffset::match(const SDNode *N,
+ const SelectionDAG &DAG) {
+ if (const auto *LS0 = dyn_cast<LSBaseSDNode>(N))
+ return matchLSNode(LS0, DAG);
+ if (const auto *LN = dyn_cast<LifetimeSDNode>(N)) {
+ if (LN->hasOffset())
+ return BaseIndexOffset(LN->getOperand(1), SDValue(), LN->getOffset(),
+ false);
+ return BaseIndexOffset(LN->getOperand(1), SDValue(), false);
+ }
+ return BaseIndexOffset();
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+
+LLVM_DUMP_METHOD void BaseIndexOffset::dump() const {
+ print(dbgs());
+}
+
+void BaseIndexOffset::print(raw_ostream& OS) const {
+ OS << "BaseIndexOffset base=[";
+ Base->print(OS);
+ OS << "] index=[";
+ if (Index)
+ Index->print(OS);
+ OS << "] offset=" << Offset;
+}
+
+#endif
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 871ab9b29881..e818dd27c05e 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -1,9 +1,8 @@
//===- SelectionDAGBuilder.cpp - Selection-DAG building -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -55,6 +54,7 @@
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
#include "llvm/CodeGen/StackMaps.h"
+#include "llvm/CodeGen/SwiftErrorValueTracking.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -109,6 +109,7 @@
#include "llvm/Target/TargetIntrinsicInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
@@ -123,6 +124,7 @@
using namespace llvm;
using namespace PatternMatch;
+using namespace SwitchCG;
#define DEBUG_TYPE "isel"
@@ -215,8 +217,8 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL,
unsigned ValueBits = ValueVT.getSizeInBits();
// Assemble the power of 2 part.
- unsigned RoundParts = NumParts & (NumParts - 1) ?
- 1 << Log2_32(NumParts) : NumParts;
+ unsigned RoundParts =
+ (NumParts & (NumParts - 1)) ? 1 << Log2_32(NumParts) : NumParts;
unsigned RoundBits = PartBits * RoundParts;
EVT RoundVT = RoundBits == ValueBits ?
ValueVT : EVT::getIntegerVT(*DAG.getContext(), RoundBits);
@@ -322,7 +324,15 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL,
return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val);
}
- llvm_unreachable("Unknown mismatch!");
+ // Handle MMX to a narrower integer type by bitcasting MMX to integer and
+ // then truncating.
+ if (PartEVT == MVT::x86mmx && ValueVT.isInteger() &&
+ ValueVT.bitsLT(PartEVT)) {
+ Val = DAG.getNode(ISD::BITCAST, DL, MVT::i64, Val);
+ return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
+ }
+
+ report_fatal_error("Unknown mismatch in getCopyFromParts!");
}
static void diagnosePossiblyInvalidConstraint(LLVMContext &Ctx, const Value *V,
@@ -573,7 +583,8 @@ static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val,
unsigned RoundBits = RoundParts * PartBits;
unsigned OddParts = NumParts - RoundParts;
SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val,
- DAG.getIntPtrConstant(RoundBits, DL));
+ DAG.getShiftAmountConstant(RoundBits, ValueVT, DL, /*LegalTypes*/false));
+
getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V,
CallConv);
@@ -1003,6 +1014,7 @@ void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis *aa,
DL = &DAG.getDataLayout();
Context = DAG.getContext();
LPadToCallSiteMap.clear();
+ SL->init(DAG.getTargetLoweringInfo(), TM, DAG.getDataLayout());
}
void SelectionDAGBuilder::clear() {
@@ -1032,19 +1044,7 @@ SDValue SelectionDAGBuilder::getRoot() {
}
// Otherwise, we have to make a token factor node.
- // If we have >= 2^16 loads then split across multiple token factors as
- // there's a 64k limit on the number of SDNode operands.
- SDValue Root;
- size_t Limit = (1 << 16) - 1;
- while (PendingLoads.size() > Limit) {
- unsigned SliceIdx = PendingLoads.size() - Limit;
- auto ExtractedTFs = ArrayRef<SDValue>(PendingLoads).slice(SliceIdx, Limit);
- SDValue NewTF =
- DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, ExtractedTFs);
- PendingLoads.erase(PendingLoads.begin() + SliceIdx, PendingLoads.end());
- PendingLoads.emplace_back(NewTF);
- }
- Root = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, PendingLoads);
+ SDValue Root = DAG.getTokenFactor(getCurSDLoc(), PendingLoads);
PendingLoads.clear();
DAG.setRoot(Root);
return Root;
@@ -1144,6 +1144,13 @@ void SelectionDAGBuilder::dropDanglingDebugInfo(const DILocalVariable *Variable,
for (auto &DDIMI : DanglingDebugInfoMap) {
DanglingDebugInfoVector &DDIV = DDIMI.second;
+
+ // If debug info is to be dropped, run it through final checks to see
+ // whether it can be salvaged.
+ for (auto &DDI : DDIV)
+ if (isMatchingDbgValue(DDI))
+ salvageUnresolvedDbgValue(DDI);
+
DDIV.erase(remove_if(DDIV, isMatchingDbgValue), DDIV.end());
}
}
@@ -1169,6 +1176,12 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
"Expected inlined-at fields to agree");
SDDbgValue *SDV;
if (Val.getNode()) {
+ // FIXME: I doubt that it is correct to resolve a dangling DbgValue as a
+ // FuncArgumentDbgValue (it would be hoisted to the function entry, and if
+ // we couldn't resolve it directly when examining the DbgValue intrinsic
+ // in the first place we should not be more successful here). Unless we
+ // have some test case that prove this to be correct we should avoid
+ // calling EmitFuncArgumentDbgValue here.
if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, false, Val)) {
LLVM_DEBUG(dbgs() << "Resolve dangling debug info [order="
<< DbgSDNodeOrder << "] for:\n " << *DI << "\n");
@@ -1186,12 +1199,173 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
} else
LLVM_DEBUG(dbgs() << "Resolved dangling debug info for " << *DI
<< "in EmitFuncArgumentDbgValue\n");
- } else
+ } else {
LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
+ auto Undef =
+ UndefValue::get(DDI.getDI()->getVariableLocation()->getType());
+ auto SDV =
+ DAG.getConstantDbgValue(Variable, Expr, Undef, dl, DbgSDNodeOrder);
+ DAG.AddDbgValue(SDV, nullptr, false);
+ }
}
DDIV.clear();
}
+void SelectionDAGBuilder::salvageUnresolvedDbgValue(DanglingDebugInfo &DDI) {
+ Value *V = DDI.getDI()->getValue();
+ DILocalVariable *Var = DDI.getDI()->getVariable();
+ DIExpression *Expr = DDI.getDI()->getExpression();
+ DebugLoc DL = DDI.getdl();
+ DebugLoc InstDL = DDI.getDI()->getDebugLoc();
+ unsigned SDOrder = DDI.getSDNodeOrder();
+
+ // Currently we consider only dbg.value intrinsics -- we tell the salvager
+ // that DW_OP_stack_value is desired.
+ assert(isa<DbgValueInst>(DDI.getDI()));
+ bool StackValue = true;
+
+ // Can this Value can be encoded without any further work?
+ if (handleDebugValue(V, Var, Expr, DL, InstDL, SDOrder))
+ return;
+
+ // Attempt to salvage back through as many instructions as possible. Bail if
+ // a non-instruction is seen, such as a constant expression or global
+ // variable. FIXME: Further work could recover those too.
+ while (isa<Instruction>(V)) {
+ Instruction &VAsInst = *cast<Instruction>(V);
+ DIExpression *NewExpr = salvageDebugInfoImpl(VAsInst, Expr, StackValue);
+
+ // If we cannot salvage any further, and haven't yet found a suitable debug
+ // expression, bail out.
+ if (!NewExpr)
+ break;
+
+ // New value and expr now represent this debuginfo.
+ V = VAsInst.getOperand(0);
+ Expr = NewExpr;
+
+ // Some kind of simplification occurred: check whether the operand of the
+ // salvaged debug expression can be encoded in this DAG.
+ if (handleDebugValue(V, Var, Expr, DL, InstDL, SDOrder)) {
+ LLVM_DEBUG(dbgs() << "Salvaged debug location info for:\n "
+ << DDI.getDI() << "\nBy stripping back to:\n " << V);
+ return;
+ }
+ }
+
+ // This was the final opportunity to salvage this debug information, and it
+ // couldn't be done. Place an undef DBG_VALUE at this location to terminate
+ // any earlier variable location.
+ auto Undef = UndefValue::get(DDI.getDI()->getVariableLocation()->getType());
+ auto SDV = DAG.getConstantDbgValue(Var, Expr, Undef, DL, SDNodeOrder);
+ DAG.AddDbgValue(SDV, nullptr, false);
+
+ LLVM_DEBUG(dbgs() << "Dropping debug value info for:\n " << DDI.getDI()
+ << "\n");
+ LLVM_DEBUG(dbgs() << " Last seen at:\n " << *DDI.getDI()->getOperand(0)
+ << "\n");
+}
+
+bool SelectionDAGBuilder::handleDebugValue(const Value *V, DILocalVariable *Var,
+ DIExpression *Expr, DebugLoc dl,
+ DebugLoc InstDL, unsigned Order) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDDbgValue *SDV;
+ if (isa<ConstantInt>(V) || isa<ConstantFP>(V) || isa<UndefValue>(V) ||
+ isa<ConstantPointerNull>(V)) {
+ SDV = DAG.getConstantDbgValue(Var, Expr, V, dl, SDNodeOrder);
+ DAG.AddDbgValue(SDV, nullptr, false);
+ return true;
+ }
+
+ // If the Value is a frame index, we can create a FrameIndex debug value
+ // without relying on the DAG at all.
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
+ auto SI = FuncInfo.StaticAllocaMap.find(AI);
+ if (SI != FuncInfo.StaticAllocaMap.end()) {
+ auto SDV =
+ DAG.getFrameIndexDbgValue(Var, Expr, SI->second,
+ /*IsIndirect*/ false, dl, SDNodeOrder);
+ // Do not attach the SDNodeDbgValue to an SDNode: this variable location
+ // is still available even if the SDNode gets optimized out.
+ DAG.AddDbgValue(SDV, nullptr, false);
+ return true;
+ }
+ }
+
+ // Do not use getValue() in here; we don't want to generate code at
+ // this point if it hasn't been done yet.
+ SDValue N = NodeMap[V];
+ if (!N.getNode() && isa<Argument>(V)) // Check unused arguments map.
+ N = UnusedArgNodeMap[V];
+ if (N.getNode()) {
+ if (EmitFuncArgumentDbgValue(V, Var, Expr, dl, false, N))
+ return true;
+ SDV = getDbgValue(N, Var, Expr, dl, SDNodeOrder);
+ DAG.AddDbgValue(SDV, N.getNode(), false);
+ return true;
+ }
+
+ // Special rules apply for the first dbg.values of parameter variables in a
+ // function. Identify them by the fact they reference Argument Values, that
+ // they're parameters, and they are parameters of the current function. We
+ // need to let them dangle until they get an SDNode.
+ bool IsParamOfFunc = isa<Argument>(V) && Var->isParameter() &&
+ !InstDL.getInlinedAt();
+ if (!IsParamOfFunc) {
+ // The value is not used in this block yet (or it would have an SDNode).
+ // We still want the value to appear for the user if possible -- if it has
+ // an associated VReg, we can refer to that instead.
+ auto VMI = FuncInfo.ValueMap.find(V);
+ if (VMI != FuncInfo.ValueMap.end()) {
+ unsigned Reg = VMI->second;
+ // If this is a PHI node, it may be split up into several MI PHI nodes
+ // (in FunctionLoweringInfo::set).
+ RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg,
+ V->getType(), None);
+ if (RFV.occupiesMultipleRegs()) {
+ unsigned Offset = 0;
+ unsigned BitsToDescribe = 0;
+ if (auto VarSize = Var->getSizeInBits())
+ BitsToDescribe = *VarSize;
+ if (auto Fragment = Expr->getFragmentInfo())
+ BitsToDescribe = Fragment->SizeInBits;
+ for (auto RegAndSize : RFV.getRegsAndSizes()) {
+ unsigned RegisterSize = RegAndSize.second;
+ // Bail out if all bits are described already.
+ if (Offset >= BitsToDescribe)
+ break;
+ unsigned FragmentSize = (Offset + RegisterSize > BitsToDescribe)
+ ? BitsToDescribe - Offset
+ : RegisterSize;
+ auto FragmentExpr = DIExpression::createFragmentExpression(
+ Expr, Offset, FragmentSize);
+ if (!FragmentExpr)
+ continue;
+ SDV = DAG.getVRegDbgValue(Var, *FragmentExpr, RegAndSize.first,
+ false, dl, SDNodeOrder);
+ DAG.AddDbgValue(SDV, nullptr, false);
+ Offset += RegisterSize;
+ }
+ } else {
+ SDV = DAG.getVRegDbgValue(Var, Expr, Reg, false, dl, SDNodeOrder);
+ DAG.AddDbgValue(SDV, nullptr, false);
+ }
+ return true;
+ }
+ }
+
+ return false;
+}
+
+void SelectionDAGBuilder::resolveOrClearDbgInfo() {
+ // Try to fixup any remaining dangling debug info -- and drop it if we can't.
+ for (auto &Pair : DanglingDebugInfoMap)
+ for (auto &DDI : Pair.second)
+ salvageUnresolvedDbgValue(DDI);
+ clearDanglingDebugInfo();
+}
+
/// getCopyFromRegs - If there was virtual register allocated for the value V
/// emit CopyFromReg of the specified type Ty. Return empty SDValue() otherwise.
SDValue SelectionDAGBuilder::getCopyFromRegs(const Value *V, Type *Ty) {
@@ -1469,6 +1643,36 @@ void SelectionDAGBuilder::visitCleanupPad(const CleanupPadInst &CPI) {
}
}
+// For wasm, there's alwyas a single catch pad attached to a catchswitch, and
+// the control flow always stops at the single catch pad, as it does for a
+// cleanup pad. In case the exception caught is not of the types the catch pad
+// catches, it will be rethrown by a rethrow.
+static void findWasmUnwindDestinations(
+ FunctionLoweringInfo &FuncInfo, const BasicBlock *EHPadBB,
+ BranchProbability Prob,
+ SmallVectorImpl<std::pair<MachineBasicBlock *, BranchProbability>>
+ &UnwindDests) {
+ while (EHPadBB) {
+ const Instruction *Pad = EHPadBB->getFirstNonPHI();
+ if (isa<CleanupPadInst>(Pad)) {
+ // Stop on cleanup pads.
+ UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob);
+ UnwindDests.back().first->setIsEHScopeEntry();
+ break;
+ } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) {
+ // Add the catchpad handlers to the possible destinations. We don't
+ // continue to the unwind destination of the catchswitch for wasm.
+ for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) {
+ UnwindDests.emplace_back(FuncInfo.MBBMap[CatchPadBB], Prob);
+ UnwindDests.back().first->setIsEHScopeEntry();
+ }
+ break;
+ } else {
+ continue;
+ }
+ }
+}
+
/// When an invoke or a cleanupret unwinds to the next EH pad, there are
/// many places it could ultimately go. In the IR, we have a single unwind
/// destination, but in the machine CFG, we enumerate all the possible blocks.
@@ -1489,6 +1693,13 @@ static void findUnwindDestinations(
bool IsWasmCXX = Personality == EHPersonality::Wasm_CXX;
bool IsSEH = isAsynchronousEHPersonality(Personality);
+ if (IsWasmCXX) {
+ findWasmUnwindDestinations(FuncInfo, EHPadBB, Prob, UnwindDests);
+ assert(UnwindDests.size() <= 1 &&
+ "There should be at most one unwind destination for wasm");
+ return;
+ }
+
while (EHPadBB) {
const Instruction *Pad = EHPadBB->getFirstNonPHI();
BasicBlock *NewEHPadBB = nullptr;
@@ -1501,8 +1712,7 @@ static void findUnwindDestinations(
// personalities.
UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob);
UnwindDests.back().first->setIsEHScopeEntry();
- if (!IsWasmCXX)
- UnwindDests.back().first->setIsEHFuncletEntry();
+ UnwindDests.back().first->setIsEHFuncletEntry();
break;
} else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) {
// Add the catchpad handlers to the possible destinations.
@@ -1588,9 +1798,10 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
DemoteReg, PtrValueVTs[0]);
SDValue RetOp = getValue(I.getOperand(0));
- SmallVector<EVT, 4> ValueVTs;
+ SmallVector<EVT, 4> ValueVTs, MemVTs;
SmallVector<uint64_t, 4> Offsets;
- ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs, &Offsets);
+ ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs, &MemVTs,
+ &Offsets);
unsigned NumValues = ValueVTs.size();
SmallVector<SDValue, 4> Chains(NumValues);
@@ -1598,8 +1809,11 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
// An aggregate return value cannot wrap around the address space, so
// offsets to its parts don't wrap either.
SDValue Ptr = DAG.getObjectPtrOffset(getCurSDLoc(), RetPtr, Offsets[i]);
- Chains[i] = DAG.getStore(
- Chain, getCurSDLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + i),
+
+ SDValue Val = RetOp.getValue(i);
+ if (MemVTs[i] != ValueVTs[i])
+ Val = DAG.getPtrExtOrTrunc(Val, getCurSDLoc(), MemVTs[i]);
+ Chains[i] = DAG.getStore(Chain, getCurSDLoc(), Val,
// FIXME: better loc info would be nice.
Ptr, MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()));
}
@@ -1615,6 +1829,10 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
const Function *F = I.getParent()->getParent();
+ bool NeedsRegBlock = TLI.functionArgumentNeedsConsecutiveRegisters(
+ I.getOperand(0)->getType(), F->getCallingConv(),
+ /*IsVarArg*/ false);
+
ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex,
Attribute::SExt))
@@ -1647,6 +1865,18 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
if (RetInReg)
Flags.setInReg();
+ if (I.getOperand(0)->getType()->isPointerTy()) {
+ Flags.setPointer();
+ Flags.setPointerAddrSpace(
+ cast<PointerType>(I.getOperand(0)->getType())->getAddressSpace());
+ }
+
+ if (NeedsRegBlock) {
+ Flags.setInConsecutiveRegs();
+ if (j == NumValues - 1)
+ Flags.setInConsecutiveRegsLast();
+ }
+
// Propagate extension type if any
if (ExtendKind == ISD::SIGN_EXTEND)
Flags.setSExt();
@@ -1668,7 +1898,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
const Function *F = I.getParent()->getParent();
if (TLI.supportSwiftError() &&
F->getAttributes().hasAttrSomewhere(Attribute::SwiftError)) {
- assert(FuncInfo.SwiftErrorArg && "Need a swift error argument");
+ assert(SwiftError.getFunctionArg() && "Need a swift error argument");
ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
Flags.setSwiftError();
Outs.push_back(ISD::OutputArg(Flags, EVT(TLI.getPointerTy(DL)) /*vt*/,
@@ -1677,8 +1907,8 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
0 /*partOffs*/));
// Create SDNode for the swifterror virtual register.
OutVals.push_back(
- DAG.getRegister(FuncInfo.getOrCreateSwiftErrorVRegUseAt(
- &I, FuncInfo.MBB, FuncInfo.SwiftErrorArg).first,
+ DAG.getRegister(SwiftError.getOrCreateVRegUseAt(
+ &I, FuncInfo.MBB, SwiftError.getFunctionArg()),
EVT(TLI.getPointerTy(DL))));
}
@@ -1825,7 +2055,7 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
CaseBlock CB(Condition, BOp->getOperand(0), BOp->getOperand(1), nullptr,
TBB, FBB, CurBB, getCurSDLoc(), TProb, FProb);
- SwitchCases.push_back(CB);
+ SL->SwitchCases.push_back(CB);
return;
}
}
@@ -1834,7 +2064,7 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
ISD::CondCode Opc = InvertCond ? ISD::SETNE : ISD::SETEQ;
CaseBlock CB(Opc, Cond, ConstantInt::getTrue(*DAG.getContext()),
nullptr, TBB, FBB, CurBB, getCurSDLoc(), TProb, FProb);
- SwitchCases.push_back(CB);
+ SL->SwitchCases.push_back(CB);
}
void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
@@ -2043,27 +2273,27 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
// If the compares in later blocks need to use values not currently
// exported from this block, export them now. This block should always
// be the first entry.
- assert(SwitchCases[0].ThisBB == BrMBB && "Unexpected lowering!");
+ assert(SL->SwitchCases[0].ThisBB == BrMBB && "Unexpected lowering!");
// Allow some cases to be rejected.
- if (ShouldEmitAsBranches(SwitchCases)) {
- for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) {
- ExportFromCurrentBlock(SwitchCases[i].CmpLHS);
- ExportFromCurrentBlock(SwitchCases[i].CmpRHS);
+ if (ShouldEmitAsBranches(SL->SwitchCases)) {
+ for (unsigned i = 1, e = SL->SwitchCases.size(); i != e; ++i) {
+ ExportFromCurrentBlock(SL->SwitchCases[i].CmpLHS);
+ ExportFromCurrentBlock(SL->SwitchCases[i].CmpRHS);
}
// Emit the branch for this block.
- visitSwitchCase(SwitchCases[0], BrMBB);
- SwitchCases.erase(SwitchCases.begin());
+ visitSwitchCase(SL->SwitchCases[0], BrMBB);
+ SL->SwitchCases.erase(SL->SwitchCases.begin());
return;
}
// Okay, we decided not to do this, remove any inserted MBB's and clear
// SwitchCases.
- for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i)
- FuncInfo.MF->erase(SwitchCases[i].ThisBB);
+ for (unsigned i = 1, e = SL->SwitchCases.size(); i != e; ++i)
+ FuncInfo.MF->erase(SL->SwitchCases[i].ThisBB);
- SwitchCases.clear();
+ SL->SwitchCases.clear();
}
}
@@ -2084,6 +2314,20 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
SDValue CondLHS = getValue(CB.CmpLHS);
SDLoc dl = CB.DL;
+ if (CB.CC == ISD::SETTRUE) {
+ // Branch or fall through to TrueBB.
+ addSuccessorWithProb(SwitchBB, CB.TrueBB, CB.TrueProb);
+ SwitchBB->normalizeSuccProbs();
+ if (CB.TrueBB != NextBlock(SwitchBB)) {
+ DAG.setRoot(DAG.getNode(ISD::BR, dl, MVT::Other, getControlRoot(),
+ DAG.getBasicBlock(CB.TrueBB)));
+ }
+ return;
+ }
+
+ auto &TLI = DAG.getTargetLoweringInfo();
+ EVT MemVT = TLI.getMemValueType(DAG.getDataLayout(), CB.CmpLHS->getType());
+
// Build the setcc now.
if (!CB.CmpMHS) {
// Fold "(X == true)" to X and "(X == false)" to !X to
@@ -2095,8 +2339,18 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
CB.CC == ISD::SETEQ) {
SDValue True = DAG.getConstant(1, dl, CondLHS.getValueType());
Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True);
- } else
- Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC);
+ } else {
+ SDValue CondRHS = getValue(CB.CmpRHS);
+
+ // If a pointer's DAG type is larger than its memory type then the DAG
+ // values are zero-extended. This breaks signed comparisons so truncate
+ // back to the underlying type before doing the compare.
+ if (CondLHS.getValueType() != MemVT) {
+ CondLHS = DAG.getPtrExtOrTrunc(CondLHS, getCurSDLoc(), MemVT);
+ CondRHS = DAG.getPtrExtOrTrunc(CondRHS, getCurSDLoc(), MemVT);
+ }
+ Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, CondRHS, CB.CC);
+ }
} else {
assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now");
@@ -2147,7 +2401,7 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
}
/// visitJumpTable - Emit JumpTable node in the current MBB
-void SelectionDAGBuilder::visitJumpTable(JumpTable &JT) {
+void SelectionDAGBuilder::visitJumpTable(SwitchCG::JumpTable &JT) {
// Emit the code for the jump table
assert(JT.Reg != -1U && "Should lower JT Header first!");
EVT PTy = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
@@ -2162,14 +2416,12 @@ void SelectionDAGBuilder::visitJumpTable(JumpTable &JT) {
/// visitJumpTableHeader - This function emits necessary code to produce index
/// in the JumpTable from switch case.
-void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT,
+void SelectionDAGBuilder::visitJumpTableHeader(SwitchCG::JumpTable &JT,
JumpTableHeader &JTH,
MachineBasicBlock *SwitchBB) {
SDLoc dl = getCurSDLoc();
- // Subtract the lowest switch case value from the value being switched on and
- // conditional branch to default mbb if the result is greater than the
- // difference between smallest and largest cases.
+ // Subtract the lowest switch case value from the value being switched on.
SDValue SwitchOp = getValue(JTH.SValue);
EVT VT = SwitchOp.getValueType();
SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SwitchOp,
@@ -2189,24 +2441,33 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT,
JumpTableReg, SwitchOp);
JT.Reg = JumpTableReg;
- // Emit the range check for the jump table, and branch to the default block
- // for the switch statement if the value being switched on exceeds the largest
- // case in the switch.
- SDValue CMP = DAG.getSetCC(
- dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
- Sub.getValueType()),
- Sub, DAG.getConstant(JTH.Last - JTH.First, dl, VT), ISD::SETUGT);
-
- SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
- MVT::Other, CopyTo, CMP,
- DAG.getBasicBlock(JT.Default));
-
- // Avoid emitting unnecessary branches to the next block.
- if (JT.MBB != NextBlock(SwitchBB))
- BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
- DAG.getBasicBlock(JT.MBB));
-
- DAG.setRoot(BrCond);
+ if (!JTH.OmitRangeCheck) {
+ // Emit the range check for the jump table, and branch to the default block
+ // for the switch statement if the value being switched on exceeds the
+ // largest case in the switch.
+ SDValue CMP = DAG.getSetCC(
+ dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
+ Sub.getValueType()),
+ Sub, DAG.getConstant(JTH.Last - JTH.First, dl, VT), ISD::SETUGT);
+
+ SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
+ MVT::Other, CopyTo, CMP,
+ DAG.getBasicBlock(JT.Default));
+
+ // Avoid emitting unnecessary branches to the next block.
+ if (JT.MBB != NextBlock(SwitchBB))
+ BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
+ DAG.getBasicBlock(JT.MBB));
+
+ DAG.setRoot(BrCond);
+ } else {
+ // Avoid emitting unnecessary branches to the next block.
+ if (JT.MBB != NextBlock(SwitchBB))
+ DAG.setRoot(DAG.getNode(ISD::BR, dl, MVT::Other, CopyTo,
+ DAG.getBasicBlock(JT.MBB)));
+ else
+ DAG.setRoot(CopyTo);
+ }
}
/// Create a LOAD_STACK_GUARD node, and let it carry the target specific global
@@ -2215,6 +2476,7 @@ static SDValue getLoadStackGuard(SelectionDAG &DAG, const SDLoc &DL,
SDValue &Chain) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
+ EVT PtrMemTy = TLI.getPointerMemTy(DAG.getDataLayout());
MachineFunction &MF = DAG.getMachineFunction();
Value *Global = TLI.getSDagStackGuard(*MF.getFunction().getParent());
MachineSDNode *Node =
@@ -2227,6 +2489,8 @@ static SDValue getLoadStackGuard(SelectionDAG &DAG, const SDLoc &DL,
MPInfo, Flags, PtrTy.getSizeInBits() / 8, DAG.getEVTAlignment(PtrTy));
DAG.setNodeMemRefs(Node, {MemRef});
}
+ if (PtrTy != PtrMemTy)
+ return DAG.getPtrExtOrTrunc(SDValue(Node, 0), DL, PtrMemTy);
return SDValue(Node, 0);
}
@@ -2242,6 +2506,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
// First create the loads to the guard/stack slot for the comparison.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
+ EVT PtrMemTy = TLI.getPointerMemTy(DAG.getDataLayout());
MachineFrameInfo &MFI = ParentBB->getParent()->getFrameInfo();
int FI = MFI.getStackProtectorIndex();
@@ -2254,7 +2519,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
// Generate code to load the content of the guard slot.
SDValue GuardVal = DAG.getLoad(
- PtrTy, dl, DAG.getEntryNode(), StackSlotPtr,
+ PtrMemTy, dl, DAG.getEntryNode(), StackSlotPtr,
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), Align,
MachineMemOperand::MOVolatile);
@@ -2262,27 +2527,26 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
GuardVal = TLI.emitStackGuardXorFP(DAG, GuardVal, dl);
// Retrieve guard check function, nullptr if instrumentation is inlined.
- if (const Value *GuardCheck = TLI.getSSPStackGuardCheck(M)) {
+ if (const Function *GuardCheckFn = TLI.getSSPStackGuardCheck(M)) {
// The target provides a guard check function to validate the guard value.
// Generate a call to that function with the content of the guard slot as
// argument.
- auto *Fn = cast<Function>(GuardCheck);
- FunctionType *FnTy = Fn->getFunctionType();
+ FunctionType *FnTy = GuardCheckFn->getFunctionType();
assert(FnTy->getNumParams() == 1 && "Invalid function signature");
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
Entry.Node = GuardVal;
Entry.Ty = FnTy->getParamType(0);
- if (Fn->hasAttribute(1, Attribute::AttrKind::InReg))
+ if (GuardCheckFn->hasAttribute(1, Attribute::AttrKind::InReg))
Entry.IsInReg = true;
Args.push_back(Entry);
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(getCurSDLoc())
- .setChain(DAG.getEntryNode())
- .setCallee(Fn->getCallingConv(), FnTy->getReturnType(),
- getValue(GuardCheck), std::move(Args));
+ .setChain(DAG.getEntryNode())
+ .setCallee(GuardCheckFn->getCallingConv(), FnTy->getReturnType(),
+ getValue(GuardCheckFn), std::move(Args));
std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
DAG.setRoot(Result.second);
@@ -2298,9 +2562,9 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
const Value *IRGuard = TLI.getSDagStackGuard(M);
SDValue GuardPtr = getValue(IRGuard);
- Guard =
- DAG.getLoad(PtrTy, dl, Chain, GuardPtr, MachinePointerInfo(IRGuard, 0),
- Align, MachineMemOperand::MOVolatile);
+ Guard = DAG.getLoad(PtrMemTy, dl, Chain, GuardPtr,
+ MachinePointerInfo(IRGuard, 0), Align,
+ MachineMemOperand::MOVolatile);
}
// Perform the comparison via a subtract/getsetcc.
@@ -2339,6 +2603,12 @@ SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) {
SDValue Chain =
TLI.makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, MVT::isVoid,
None, false, getCurSDLoc(), false, false).second;
+ // On PS4, the "return address" must still be within the calling function,
+ // even if it's at the very end, so emit an explicit TRAP here.
+ // Passing 'true' for doesNotReturn above won't generate the trap for us.
+ if (TM.getTargetTriple().isPS4CPU())
+ Chain = DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, Chain);
+
DAG.setRoot(Chain);
}
@@ -2493,6 +2763,20 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
case Intrinsic::experimental_gc_statepoint:
LowerStatepoint(ImmutableStatepoint(&I), EHPadBB);
break;
+ case Intrinsic::wasm_rethrow_in_catch: {
+ // This is usually done in visitTargetIntrinsic, but this intrinsic is
+ // special because it can be invoked, so we manually lower it to a DAG
+ // node here.
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(getRoot()); // inchain
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ Ops.push_back(
+ DAG.getTargetConstant(Intrinsic::wasm_rethrow_in_catch, getCurSDLoc(),
+ TLI.getPointerTy(DAG.getDataLayout())));
+ SDVTList VTs = DAG.getVTList(ArrayRef<EVT>({MVT::Other})); // outchain
+ DAG.setRoot(DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops));
+ break;
+ }
}
} else if (I.countOperandBundlesOfType(LLVMContext::OB_deopt)) {
// Currently we do not lower any intrinsic calls with deopt operand bundles.
@@ -2528,6 +2812,35 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
InvokeMBB->normalizeSuccProbs();
// Drop into normal successor.
+ DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, getControlRoot(),
+ DAG.getBasicBlock(Return)));
+}
+
+void SelectionDAGBuilder::visitCallBr(const CallBrInst &I) {
+ MachineBasicBlock *CallBrMBB = FuncInfo.MBB;
+
+ // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
+ // have to do anything here to lower funclet bundles.
+ assert(!I.hasOperandBundlesOtherThan(
+ {LLVMContext::OB_deopt, LLVMContext::OB_funclet}) &&
+ "Cannot lower callbrs with arbitrary operand bundles yet!");
+
+ assert(isa<InlineAsm>(I.getCalledValue()) &&
+ "Only know how to handle inlineasm callbr");
+ visitInlineAsm(&I);
+
+ // Retrieve successors.
+ MachineBasicBlock *Return = FuncInfo.MBBMap[I.getDefaultDest()];
+
+ // Update successor info.
+ addSuccessorWithProb(CallBrMBB, Return);
+ for (unsigned i = 0, e = I.getNumIndirectDests(); i < e; ++i) {
+ MachineBasicBlock *Target = FuncInfo.MBBMap[I.getIndirectDest(i)];
+ addSuccessorWithProb(CallBrMBB, Target);
+ }
+ CallBrMBB->normalizeSuccProbs();
+
+ // Drop into default successor.
DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(),
MVT::Other, getControlRoot(),
DAG.getBasicBlock(Return)));
@@ -2585,49 +2898,17 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
setValue(&LP, Res);
}
-void SelectionDAGBuilder::sortAndRangeify(CaseClusterVector &Clusters) {
-#ifndef NDEBUG
- for (const CaseCluster &CC : Clusters)
- assert(CC.Low == CC.High && "Input clusters must be single-case");
-#endif
-
- llvm::sort(Clusters, [](const CaseCluster &a, const CaseCluster &b) {
- return a.Low->getValue().slt(b.Low->getValue());
- });
-
- // Merge adjacent clusters with the same destination.
- const unsigned N = Clusters.size();
- unsigned DstIndex = 0;
- for (unsigned SrcIndex = 0; SrcIndex < N; ++SrcIndex) {
- CaseCluster &CC = Clusters[SrcIndex];
- const ConstantInt *CaseVal = CC.Low;
- MachineBasicBlock *Succ = CC.MBB;
-
- if (DstIndex != 0 && Clusters[DstIndex - 1].MBB == Succ &&
- (CaseVal->getValue() - Clusters[DstIndex - 1].High->getValue()) == 1) {
- // If this case has the same successor and is a neighbour, merge it into
- // the previous cluster.
- Clusters[DstIndex - 1].High = CaseVal;
- Clusters[DstIndex - 1].Prob += CC.Prob;
- } else {
- std::memmove(&Clusters[DstIndex++], &Clusters[SrcIndex],
- sizeof(Clusters[SrcIndex]));
- }
- }
- Clusters.resize(DstIndex);
-}
-
void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First,
MachineBasicBlock *Last) {
// Update JTCases.
- for (unsigned i = 0, e = JTCases.size(); i != e; ++i)
- if (JTCases[i].first.HeaderBB == First)
- JTCases[i].first.HeaderBB = Last;
+ for (unsigned i = 0, e = SL->JTCases.size(); i != e; ++i)
+ if (SL->JTCases[i].first.HeaderBB == First)
+ SL->JTCases[i].first.HeaderBB = Last;
// Update BitTestCases.
- for (unsigned i = 0, e = BitTestCases.size(); i != e; ++i)
- if (BitTestCases[i].Parent == First)
- BitTestCases[i].Parent = Last;
+ for (unsigned i = 0, e = SL->BitTestCases.size(); i != e; ++i)
+ if (SL->BitTestCases[i].Parent == First)
+ SL->BitTestCases[i].Parent = Last;
}
void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
@@ -2916,6 +3197,18 @@ void SelectionDAGBuilder::visitICmp(const User &I) {
SDValue Op2 = getValue(I.getOperand(1));
ISD::CondCode Opcode = getICmpCondCode(predicate);
+ auto &TLI = DAG.getTargetLoweringInfo();
+ EVT MemVT =
+ TLI.getMemValueType(DAG.getDataLayout(), I.getOperand(0)->getType());
+
+ // If a pointer's DAG type is larger than its memory type then the DAG values
+ // are zero-extended. This breaks signed comparisons so truncate back to the
+ // underlying type before doing the compare.
+ if (Op1.getValueType() != MemVT) {
+ Op1 = DAG.getPtrExtOrTrunc(Op1, getCurSDLoc(), MemVT);
+ Op2 = DAG.getPtrExtOrTrunc(Op2, getCurSDLoc(), MemVT);
+ }
+
EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
I.getType());
setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Opcode));
@@ -2963,6 +3256,8 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
ISD::NodeType OpCode = Cond.getValueType().isVector() ?
ISD::VSELECT : ISD::SELECT;
+ bool IsUnaryAbs = false;
+
// Min/max matching is only viable if all output VTs are the same.
if (is_splat(ValueVTs)) {
EVT VT = ValueVTs[0];
@@ -3023,10 +3318,16 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
break;
}
break;
+ case SPF_ABS:
+ IsUnaryAbs = true;
+ Opc = ISD::ABS;
+ break;
+ case SPF_NABS:
+ // TODO: we need to produce sub(0, abs(X)).
default: break;
}
- if (Opc != ISD::DELETED_NODE &&
+ if (!IsUnaryAbs && Opc != ISD::DELETED_NODE &&
(TLI.isOperationLegalOrCustom(Opc, VT) ||
(UseScalarMinMax &&
TLI.isOperationLegalOrCustom(Opc, VT.getScalarType()))) &&
@@ -3039,15 +3340,30 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
RHSVal = getValue(RHS);
BaseOps = {};
}
+
+ if (IsUnaryAbs) {
+ OpCode = Opc;
+ LHSVal = getValue(LHS);
+ BaseOps = {};
+ }
}
- for (unsigned i = 0; i != NumValues; ++i) {
- SmallVector<SDValue, 3> Ops(BaseOps.begin(), BaseOps.end());
- Ops.push_back(SDValue(LHSVal.getNode(), LHSVal.getResNo() + i));
- Ops.push_back(SDValue(RHSVal.getNode(), RHSVal.getResNo() + i));
- Values[i] = DAG.getNode(OpCode, getCurSDLoc(),
- LHSVal.getNode()->getValueType(LHSVal.getResNo()+i),
- Ops);
+ if (IsUnaryAbs) {
+ for (unsigned i = 0; i != NumValues; ++i) {
+ Values[i] =
+ DAG.getNode(OpCode, getCurSDLoc(),
+ LHSVal.getNode()->getValueType(LHSVal.getResNo() + i),
+ SDValue(LHSVal.getNode(), LHSVal.getResNo() + i));
+ }
+ } else {
+ for (unsigned i = 0; i != NumValues; ++i) {
+ SmallVector<SDValue, 3> Ops(BaseOps.begin(), BaseOps.end());
+ Ops.push_back(SDValue(LHSVal.getNode(), LHSVal.getResNo() + i));
+ Ops.push_back(SDValue(RHSVal.getNode(), RHSVal.getResNo() + i));
+ Values[i] = DAG.getNode(
+ OpCode, getCurSDLoc(),
+ LHSVal.getNode()->getValueType(LHSVal.getResNo() + i), Ops);
+ }
}
setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
@@ -3135,18 +3451,26 @@ void SelectionDAGBuilder::visitPtrToInt(const User &I) {
// What to do depends on the size of the integer and the size of the pointer.
// We can either truncate, zero extend, or no-op, accordingly.
SDValue N = getValue(I.getOperand(0));
+ auto &TLI = DAG.getTargetLoweringInfo();
EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
I.getType());
- setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT));
+ EVT PtrMemVT =
+ TLI.getMemValueType(DAG.getDataLayout(), I.getOperand(0)->getType());
+ N = DAG.getPtrExtOrTrunc(N, getCurSDLoc(), PtrMemVT);
+ N = DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT);
+ setValue(&I, N);
}
void SelectionDAGBuilder::visitIntToPtr(const User &I) {
// What to do depends on the size of the integer and the size of the pointer.
// We can either truncate, zero extend, or no-op, accordingly.
SDValue N = getValue(I.getOperand(0));
- EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
- I.getType());
- setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT));
+ auto &TLI = DAG.getTargetLoweringInfo();
+ EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ EVT PtrMemVT = TLI.getMemValueType(DAG.getDataLayout(), I.getType());
+ N = DAG.getZExtOrTrunc(N, getCurSDLoc(), PtrMemVT);
+ N = DAG.getPtrExtOrTrunc(N, getCurSDLoc(), DestVT);
+ setValue(&I, N);
}
void SelectionDAGBuilder::visitBitCast(const User &I) {
@@ -3284,12 +3608,8 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
MOps1[0] = Src1;
MOps2[0] = Src2;
- Src1 = Src1.isUndef()
- ? DAG.getUNDEF(PaddedVT)
- : DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps1);
- Src2 = Src2.isUndef()
- ? DAG.getUNDEF(PaddedVT)
- : DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps2);
+ Src1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps1);
+ Src2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps2);
// Readjust mask for new input vector length.
SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1);
@@ -3498,6 +3818,9 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
unsigned AS = Op0->getType()->getScalarType()->getPointerAddressSpace();
SDValue N = getValue(Op0);
SDLoc dl = getCurSDLoc();
+ auto &TLI = DAG.getTargetLoweringInfo();
+ MVT PtrTy = TLI.getPointerTy(DAG.getDataLayout(), AS);
+ MVT PtrMemTy = TLI.getPointerMemTy(DAG.getDataLayout(), AS);
// Normalize Vector GEP - all scalar operands should be converted to the
// splat vector.
@@ -3555,6 +3878,8 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
if (Offs.isNonNegative() && cast<GEPOperator>(I).isInBounds())
Flags.setNoUnsignedWrap(true);
+ OffsVal = DAG.getSExtOrTrunc(OffsVal, dl, N.getValueType());
+
N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal, Flags);
continue;
}
@@ -3580,7 +3905,8 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
N.getValueType(), IdxN,
DAG.getConstant(Amt, dl, IdxN.getValueType()));
} else {
- SDValue Scale = DAG.getConstant(ElementSize, dl, IdxN.getValueType());
+ SDValue Scale = DAG.getConstant(ElementSize.getZExtValue(), dl,
+ IdxN.getValueType());
IdxN = DAG.getNode(ISD::MUL, dl,
N.getValueType(), IdxN, Scale);
}
@@ -3591,6 +3917,9 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
}
}
+ if (PtrMemTy != PtrTy && !cast<GEPOperator>(I).isInBounds())
+ N = DAG.getPtrExtendInReg(N, dl, PtrMemTy);
+
setValue(&I, N);
}
@@ -3675,16 +4004,17 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
bool isVolatile = I.isVolatile();
bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr;
bool isInvariant = I.getMetadata(LLVMContext::MD_invariant_load) != nullptr;
- bool isDereferenceable = isDereferenceablePointer(SV, DAG.getDataLayout());
+ bool isDereferenceable =
+ isDereferenceablePointer(SV, I.getType(), DAG.getDataLayout());
unsigned Alignment = I.getAlignment();
AAMDNodes AAInfo;
I.getAAMetadata(AAInfo);
const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
- SmallVector<EVT, 4> ValueVTs;
+ SmallVector<EVT, 4> ValueVTs, MemVTs;
SmallVector<uint64_t, 4> Offsets;
- ComputeValueVTs(TLI, DAG.getDataLayout(), Ty, ValueVTs, &Offsets);
+ ComputeValueVTs(TLI, DAG.getDataLayout(), Ty, ValueVTs, &MemVTs, &Offsets);
unsigned NumValues = ValueVTs.size();
if (NumValues == 0)
return;
@@ -3750,12 +4080,15 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
MMOFlags |= MachineMemOperand::MODereferenceable;
MMOFlags |= TLI.getMMOFlags(I);
- SDValue L = DAG.getLoad(ValueVTs[i], dl, Root, A,
+ SDValue L = DAG.getLoad(MemVTs[i], dl, Root, A,
MachinePointerInfo(SV, Offsets[i]), Alignment,
MMOFlags, AAInfo, Ranges);
+ Chains[ChainI] = L.getValue(1);
+
+ if (MemVTs[i] != ValueVTs[i])
+ L = DAG.getZExtOrTrunc(L, dl, ValueVTs[i]);
Values[i] = L;
- Chains[ChainI] = L.getValue(1);
}
if (!ConstantMemory) {
@@ -3785,15 +4118,13 @@ void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst &I) {
SDValue Src = getValue(SrcV);
// Create a virtual register, then update the virtual register.
- unsigned VReg; bool CreatedVReg;
- std::tie(VReg, CreatedVReg) = FuncInfo.getOrCreateSwiftErrorVRegDefAt(&I);
+ unsigned VReg =
+ SwiftError.getOrCreateVRegDefAt(&I, FuncInfo.MBB, I.getPointerOperand());
// Chain, DL, Reg, N or Chain, DL, Reg, N, Glue
// Chain can be getRoot or getControlRoot.
SDValue CopyNode = DAG.getCopyToReg(getRoot(), getCurSDLoc(), VReg,
SDValue(Src.getNode(), Src.getResNo()));
DAG.setRoot(CopyNode);
- if (CreatedVReg)
- FuncInfo.setCurrentSwiftErrorVReg(FuncInfo.MBB, I.getOperand(1), VReg);
}
void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) {
@@ -3826,8 +4157,7 @@ void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) {
// Chain, DL, Reg, VT, Glue or Chain, DL, Reg, VT
SDValue L = DAG.getCopyFromReg(
getRoot(), getCurSDLoc(),
- FuncInfo.getOrCreateSwiftErrorVRegUseAt(&I, FuncInfo.MBB, SV).first,
- ValueVTs[0]);
+ SwiftError.getOrCreateVRegUseAt(&I, FuncInfo.MBB, SV), ValueVTs[0]);
setValue(&I, L);
}
@@ -3854,10 +4184,10 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
}
}
- SmallVector<EVT, 4> ValueVTs;
+ SmallVector<EVT, 4> ValueVTs, MemVTs;
SmallVector<uint64_t, 4> Offsets;
ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(),
- SrcV->getType(), ValueVTs, &Offsets);
+ SrcV->getType(), ValueVTs, &MemVTs, &Offsets);
unsigned NumValues = ValueVTs.size();
if (NumValues == 0)
return;
@@ -3899,9 +4229,12 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
}
SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr,
DAG.getConstant(Offsets[i], dl, PtrVT), Flags);
- SDValue St = DAG.getStore(
- Root, dl, SDValue(Src.getNode(), Src.getResNo() + i), Add,
- MachinePointerInfo(PtrV, Offsets[i]), Alignment, MMOFlags, AAInfo);
+ SDValue Val = SDValue(Src.getNode(), Src.getResNo() + i);
+ if (MemVTs[i] != ValueVTs[i])
+ Val = DAG.getPtrExtOrTrunc(Val, dl, MemVTs[i]);
+ SDValue St =
+ DAG.getStore(Root, dl, Val, Add, MachinePointerInfo(PtrV, Offsets[i]),
+ Alignment, MMOFlags, AAInfo);
Chains[ChainI] = St;
}
@@ -4181,19 +4514,34 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) {
SDLoc dl = getCurSDLoc();
- AtomicOrdering SuccessOrder = I.getSuccessOrdering();
- AtomicOrdering FailureOrder = I.getFailureOrdering();
+ AtomicOrdering SuccessOrdering = I.getSuccessOrdering();
+ AtomicOrdering FailureOrdering = I.getFailureOrdering();
SyncScope::ID SSID = I.getSyncScopeID();
SDValue InChain = getRoot();
MVT MemVT = getValue(I.getCompareOperand()).getSimpleValueType();
SDVTList VTs = DAG.getVTList(MemVT, MVT::i1, MVT::Other);
- SDValue L = DAG.getAtomicCmpSwap(
- ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl, MemVT, VTs, InChain,
- getValue(I.getPointerOperand()), getValue(I.getCompareOperand()),
- getValue(I.getNewValOperand()), MachinePointerInfo(I.getPointerOperand()),
- /*Alignment=*/ 0, SuccessOrder, FailureOrder, SSID);
+
+ auto Alignment = DAG.getEVTAlignment(MemVT);
+
+ auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+ if (I.isVolatile())
+ Flags |= MachineMemOperand::MOVolatile;
+ Flags |= DAG.getTargetLoweringInfo().getMMOFlags(I);
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()),
+ Flags, MemVT.getStoreSize(), Alignment,
+ AAMDNodes(), nullptr, SSID, SuccessOrdering,
+ FailureOrdering);
+
+ SDValue L = DAG.getAtomicCmpSwap(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS,
+ dl, MemVT, VTs, InChain,
+ getValue(I.getPointerOperand()),
+ getValue(I.getCompareOperand()),
+ getValue(I.getNewValOperand()), MMO);
SDValue OutChain = L.getValue(2);
@@ -4217,20 +4565,32 @@ void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) {
case AtomicRMWInst::Min: NT = ISD::ATOMIC_LOAD_MIN; break;
case AtomicRMWInst::UMax: NT = ISD::ATOMIC_LOAD_UMAX; break;
case AtomicRMWInst::UMin: NT = ISD::ATOMIC_LOAD_UMIN; break;
+ case AtomicRMWInst::FAdd: NT = ISD::ATOMIC_LOAD_FADD; break;
+ case AtomicRMWInst::FSub: NT = ISD::ATOMIC_LOAD_FSUB; break;
}
- AtomicOrdering Order = I.getOrdering();
+ AtomicOrdering Ordering = I.getOrdering();
SyncScope::ID SSID = I.getSyncScopeID();
SDValue InChain = getRoot();
+ auto MemVT = getValue(I.getValOperand()).getSimpleValueType();
+ auto Alignment = DAG.getEVTAlignment(MemVT);
+
+ auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+ if (I.isVolatile())
+ Flags |= MachineMemOperand::MOVolatile;
+ Flags |= DAG.getTargetLoweringInfo().getMMOFlags(I);
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()), Flags,
+ MemVT.getStoreSize(), Alignment, AAMDNodes(),
+ nullptr, SSID, Ordering);
+
SDValue L =
- DAG.getAtomic(NT, dl,
- getValue(I.getValOperand()).getSimpleValueType(),
- InChain,
- getValue(I.getPointerOperand()),
- getValue(I.getValOperand()),
- I.getPointerOperand(),
- /* Alignment=*/ 0, Order, SSID);
+ DAG.getAtomic(NT, dl, MemVT, InChain,
+ getValue(I.getPointerOperand()), getValue(I.getValOperand()),
+ MMO);
SDValue OutChain = L.getValue(1);
@@ -4259,27 +4619,39 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ EVT MemVT = TLI.getMemValueType(DAG.getDataLayout(), I.getType());
if (!TLI.supportsUnalignedAtomics() &&
- I.getAlignment() < VT.getStoreSize())
+ I.getAlignment() < MemVT.getSizeInBits() / 8)
report_fatal_error("Cannot generate unaligned atomic load");
+ auto Flags = MachineMemOperand::MOLoad;
+ if (I.isVolatile())
+ Flags |= MachineMemOperand::MOVolatile;
+ if (I.getMetadata(LLVMContext::MD_invariant_load) != nullptr)
+ Flags |= MachineMemOperand::MOInvariant;
+ if (isDereferenceablePointer(I.getPointerOperand(), I.getType(),
+ DAG.getDataLayout()))
+ Flags |= MachineMemOperand::MODereferenceable;
+
+ Flags |= TLI.getMMOFlags(I);
+
MachineMemOperand *MMO =
DAG.getMachineFunction().
getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()),
- MachineMemOperand::MOVolatile |
- MachineMemOperand::MOLoad,
- VT.getStoreSize(),
+ Flags, MemVT.getStoreSize(),
I.getAlignment() ? I.getAlignment() :
- DAG.getEVTAlignment(VT),
+ DAG.getEVTAlignment(MemVT),
AAMDNodes(), nullptr, SSID, Order);
InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG);
SDValue L =
- DAG.getAtomic(ISD::ATOMIC_LOAD, dl, VT, VT, InChain,
+ DAG.getAtomic(ISD::ATOMIC_LOAD, dl, MemVT, MemVT, InChain,
getValue(I.getPointerOperand()), MMO);
SDValue OutChain = L.getValue(1);
+ if (MemVT != VT)
+ L = DAG.getPtrExtOrTrunc(L, dl, VT);
setValue(&I, L);
DAG.setRoot(OutChain);
@@ -4288,25 +4660,36 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
SDLoc dl = getCurSDLoc();
- AtomicOrdering Order = I.getOrdering();
+ AtomicOrdering Ordering = I.getOrdering();
SyncScope::ID SSID = I.getSyncScopeID();
SDValue InChain = getRoot();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- EVT VT =
- TLI.getValueType(DAG.getDataLayout(), I.getValueOperand()->getType());
+ EVT MemVT =
+ TLI.getMemValueType(DAG.getDataLayout(), I.getValueOperand()->getType());
- if (I.getAlignment() < VT.getStoreSize())
+ if (I.getAlignment() < MemVT.getSizeInBits() / 8)
report_fatal_error("Cannot generate unaligned atomic store");
- SDValue OutChain =
- DAG.getAtomic(ISD::ATOMIC_STORE, dl, VT,
- InChain,
- getValue(I.getPointerOperand()),
- getValue(I.getValueOperand()),
- I.getPointerOperand(), I.getAlignment(),
- Order, SSID);
+ auto Flags = MachineMemOperand::MOStore;
+ if (I.isVolatile())
+ Flags |= MachineMemOperand::MOVolatile;
+ Flags |= TLI.getMMOFlags(I);
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()), Flags,
+ MemVT.getStoreSize(), I.getAlignment(), AAMDNodes(),
+ nullptr, SSID, Ordering);
+
+ SDValue Val = getValue(I.getValueOperand());
+ if (Val.getValueType() != MemVT)
+ Val = DAG.getPtrExtOrTrunc(Val, dl, MemVT);
+
+ SDValue OutChain = DAG.getAtomic(ISD::ATOMIC_STORE, dl, MemVT, InChain,
+ getValue(I.getPointerOperand()), Val, MMO);
+
DAG.setRoot(OutChain);
}
@@ -4364,10 +4747,12 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
SDValue Result;
if (IsTgtIntrinsic) {
// This is target intrinsic that touches memory
- Result = DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs,
- Ops, Info.memVT,
- MachinePointerInfo(Info.ptrVal, Info.offset), Info.align,
- Info.flags, Info.size);
+ AAMDNodes AAInfo;
+ I.getAAMetadata(AAInfo);
+ Result =
+ DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, Ops, Info.memVT,
+ MachinePointerInfo(Info.ptrVal, Info.offset),
+ Info.align, Info.flags, Info.size, AAInfo);
} else if (!HasChain) {
Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops);
} else if (!I.getType()->isVoidTy()) {
@@ -4889,7 +5274,7 @@ static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS,
return DAG.getConstantFP(1.0, DL, LHS.getValueType());
const Function &F = DAG.getMachineFunction().getFunction();
- if (!F.optForSize() ||
+ if (!F.hasOptSize() ||
// If optimizing for size, don't insert too many multiplies.
// This inserts up to 5 multiplies.
countPopulation(Val) + Log2_32(Val) < 7) {
@@ -4952,6 +5337,71 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
if (!Arg)
return false;
+ if (!IsDbgDeclare) {
+ // ArgDbgValues are hoisted to the beginning of the entry block. So we
+ // should only emit as ArgDbgValue if the dbg.value intrinsic is found in
+ // the entry block.
+ bool IsInEntryBlock = FuncInfo.MBB == &FuncInfo.MF->front();
+ if (!IsInEntryBlock)
+ return false;
+
+ // ArgDbgValues are hoisted to the beginning of the entry block. So we
+ // should only emit as ArgDbgValue if the dbg.value intrinsic describes a
+ // variable that also is a param.
+ //
+ // Although, if we are at the top of the entry block already, we can still
+ // emit using ArgDbgValue. This might catch some situations when the
+ // dbg.value refers to an argument that isn't used in the entry block, so
+ // any CopyToReg node would be optimized out and the only way to express
+ // this DBG_VALUE is by using the physical reg (or FI) as done in this
+ // method. ArgDbgValues are hoisted to the beginning of the entry block. So
+ // we should only emit as ArgDbgValue if the Variable is an argument to the
+ // current function, and the dbg.value intrinsic is found in the entry
+ // block.
+ bool VariableIsFunctionInputArg = Variable->isParameter() &&
+ !DL->getInlinedAt();
+ bool IsInPrologue = SDNodeOrder == LowestSDNodeOrder;
+ if (!IsInPrologue && !VariableIsFunctionInputArg)
+ return false;
+
+ // Here we assume that a function argument on IR level only can be used to
+ // describe one input parameter on source level. If we for example have
+ // source code like this
+ //
+ // struct A { long x, y; };
+ // void foo(struct A a, long b) {
+ // ...
+ // b = a.x;
+ // ...
+ // }
+ //
+ // and IR like this
+ //
+ // define void @foo(i32 %a1, i32 %a2, i32 %b) {
+ // entry:
+ // call void @llvm.dbg.value(metadata i32 %a1, "a", DW_OP_LLVM_fragment
+ // call void @llvm.dbg.value(metadata i32 %a2, "a", DW_OP_LLVM_fragment
+ // call void @llvm.dbg.value(metadata i32 %b, "b",
+ // ...
+ // call void @llvm.dbg.value(metadata i32 %a1, "b"
+ // ...
+ //
+ // then the last dbg.value is describing a parameter "b" using a value that
+ // is an argument. But since we already has used %a1 to describe a parameter
+ // we should not handle that last dbg.value here (that would result in an
+ // incorrect hoisting of the DBG_VALUE to the function entry).
+ // Notice that we allow one dbg.value per IR level argument, to accomodate
+ // for the situation with fragments above.
+ if (VariableIsFunctionInputArg) {
+ unsigned ArgNo = Arg->getArgNo();
+ if (ArgNo >= FuncInfo.DescribedArgs.size())
+ FuncInfo.DescribedArgs.resize(ArgNo + 1, false);
+ else if (!IsInPrologue && FuncInfo.DescribedArgs.test(ArgNo))
+ return false;
+ FuncInfo.DescribedArgs.set(ArgNo);
+ }
+ }
+
MachineFunction &MF = DAG.getMachineFunction();
const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();
@@ -4976,12 +5426,14 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
}
}
- if (!Op && N.getNode())
+ if (!Op && N.getNode()) {
// Check if frame index is available.
- if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(N.getNode()))
+ SDValue LCandidate = peekThroughBitcasts(N);
+ if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(LCandidate.getNode()))
if (FrameIndexSDNode *FINode =
dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode()))
Op = MachineOperand::CreateFI(FINode->getIndex());
+ }
if (!Op) {
// Check if ValueMap has reg number.
@@ -5055,11 +5507,29 @@ SDDbgValue *SelectionDAGBuilder::getDbgValue(SDValue N,
# define setjmp_undefined_for_msvc
#endif
-/// Lower the call to the specified intrinsic function. If we want to emit this
-/// as a call to a named external function, return the name. Otherwise, lower it
-/// and return null.
-const char *
-SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
+static unsigned FixedPointIntrinsicToOpcode(unsigned Intrinsic) {
+ switch (Intrinsic) {
+ case Intrinsic::smul_fix:
+ return ISD::SMULFIX;
+ case Intrinsic::umul_fix:
+ return ISD::UMULFIX;
+ default:
+ llvm_unreachable("Unhandled fixed point intrinsic");
+ }
+}
+
+void SelectionDAGBuilder::lowerCallToExternalSymbol(const CallInst &I,
+ const char *FunctionName) {
+ assert(FunctionName && "FunctionName must not be nullptr");
+ SDValue Callee = DAG.getExternalSymbol(
+ FunctionName,
+ DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()));
+ LowerCallTo(&I, Callee, I.isTailCall());
+}
+
+/// Lower the call to the specified intrinsic function.
+void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
+ unsigned Intrinsic) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDLoc sdl = getCurSDLoc();
DebugLoc dl = getCurDebugLoc();
@@ -5069,28 +5539,28 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
default:
// By default, turn this into a target intrinsic node.
visitTargetIntrinsic(I, Intrinsic);
- return nullptr;
- case Intrinsic::vastart: visitVAStart(I); return nullptr;
- case Intrinsic::vaend: visitVAEnd(I); return nullptr;
- case Intrinsic::vacopy: visitVACopy(I); return nullptr;
+ return;
+ case Intrinsic::vastart: visitVAStart(I); return;
+ case Intrinsic::vaend: visitVAEnd(I); return;
+ case Intrinsic::vacopy: visitVACopy(I); return;
case Intrinsic::returnaddress:
setValue(&I, DAG.getNode(ISD::RETURNADDR, sdl,
TLI.getPointerTy(DAG.getDataLayout()),
getValue(I.getArgOperand(0))));
- return nullptr;
+ return;
case Intrinsic::addressofreturnaddress:
setValue(&I, DAG.getNode(ISD::ADDROFRETURNADDR, sdl,
TLI.getPointerTy(DAG.getDataLayout())));
- return nullptr;
+ return;
case Intrinsic::sponentry:
setValue(&I, DAG.getNode(ISD::SPONENTRY, sdl,
TLI.getPointerTy(DAG.getDataLayout())));
- return nullptr;
+ return;
case Intrinsic::frameaddress:
setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl,
TLI.getPointerTy(DAG.getDataLayout()),
getValue(I.getArgOperand(0))));
- return nullptr;
+ return;
case Intrinsic::read_register: {
Value *Reg = I.getArgOperand(0);
SDValue Chain = getRoot();
@@ -5101,7 +5571,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
DAG.getVTList(VT, MVT::Other), Chain, RegName);
setValue(&I, Res);
DAG.setRoot(Res.getValue(1));
- return nullptr;
+ return;
}
case Intrinsic::write_register: {
Value *Reg = I.getArgOperand(0);
@@ -5111,12 +5581,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
DAG.getMDNode(cast<MDNode>(cast<MetadataAsValue>(Reg)->getMetadata()));
DAG.setRoot(DAG.getNode(ISD::WRITE_REGISTER, sdl, MVT::Other, Chain,
RegName, getValue(RegValue)));
- return nullptr;
+ return;
}
case Intrinsic::setjmp:
- return &"_setjmp"[!TLI.usesUnderscoreSetJmp()];
+ lowerCallToExternalSymbol(I, &"_setjmp"[!TLI.usesUnderscoreSetJmp()]);
+ return;
case Intrinsic::longjmp:
- return &"_longjmp"[!TLI.usesUnderscoreLongJmp()];
+ lowerCallToExternalSymbol(I, &"_longjmp"[!TLI.usesUnderscoreLongJmp()]);
+ return;
case Intrinsic::memcpy: {
const auto &MCI = cast<MemCpyInst>(I);
SDValue Op1 = getValue(I.getArgOperand(0));
@@ -5135,7 +5607,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
MachinePointerInfo(I.getArgOperand(0)),
MachinePointerInfo(I.getArgOperand(1)));
updateDAGForMaybeTailCall(MC);
- return nullptr;
+ return;
}
case Intrinsic::memset: {
const auto &MSI = cast<MemSetInst>(I);
@@ -5149,7 +5621,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
SDValue MS = DAG.getMemset(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,
isTC, MachinePointerInfo(I.getArgOperand(0)));
updateDAGForMaybeTailCall(MS);
- return nullptr;
+ return;
}
case Intrinsic::memmove: {
const auto &MMI = cast<MemMoveInst>(I);
@@ -5168,7 +5640,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
isTC, MachinePointerInfo(I.getArgOperand(0)),
MachinePointerInfo(I.getArgOperand(1)));
updateDAGForMaybeTailCall(MM);
- return nullptr;
+ return;
}
case Intrinsic::memcpy_element_unordered_atomic: {
const AtomicMemCpyInst &MI = cast<AtomicMemCpyInst>(I);
@@ -5186,7 +5658,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
MachinePointerInfo(MI.getRawDest()),
MachinePointerInfo(MI.getRawSource()));
updateDAGForMaybeTailCall(MC);
- return nullptr;
+ return;
}
case Intrinsic::memmove_element_unordered_atomic: {
auto &MI = cast<AtomicMemMoveInst>(I);
@@ -5204,7 +5676,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
MachinePointerInfo(MI.getRawDest()),
MachinePointerInfo(MI.getRawSource()));
updateDAGForMaybeTailCall(MC);
- return nullptr;
+ return;
}
case Intrinsic::memset_element_unordered_atomic: {
auto &MI = cast<AtomicMemSetInst>(I);
@@ -5220,7 +5692,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
LengthTy, ElemSz, isTC,
MachinePointerInfo(MI.getRawDest()));
updateDAGForMaybeTailCall(MC);
- return nullptr;
+ return;
}
case Intrinsic::dbg_addr:
case Intrinsic::dbg_declare: {
@@ -5235,7 +5707,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
if (!Address || isa<UndefValue>(Address) ||
(Address->use_empty() && !isa<Argument>(Address))) {
LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
- return nullptr;
+ return;
}
bool isParameter = Variable->isParameter() || isa<Argument>(Address);
@@ -5264,7 +5736,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
Variable, Expression, FI, /*IsIndirect*/ true, dl, SDNodeOrder);
DAG.AddDbgValue(SDV, getRoot().getNode(), isParameter);
}
- return nullptr;
+ return;
}
SDValue &N = NodeMap[Address];
@@ -5286,7 +5758,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
// Address is an argument, so try to emit its dbg value using
// virtual register info from the FuncInfo.ValueMap.
EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, true, N);
- return nullptr;
+ return;
} else {
SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(),
true, dl, SDNodeOrder);
@@ -5300,7 +5772,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
}
}
- return nullptr;
+ return;
}
case Intrinsic::dbg_label: {
const DbgLabelInst &DI = cast<DbgLabelInst>(I);
@@ -5310,7 +5782,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
SDDbgLabel *SDV;
SDV = DAG.getDbgLabel(Label, dl, SDNodeOrder);
DAG.AddDbgLabel(SDV);
- return nullptr;
+ return;
}
case Intrinsic::dbg_value: {
const DbgValueInst &DI = cast<DbgValueInst>(I);
@@ -5321,88 +5793,19 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
dropDanglingDebugInfo(Variable, Expression);
const Value *V = DI.getValue();
if (!V)
- return nullptr;
-
- SDDbgValue *SDV;
- if (isa<ConstantInt>(V) || isa<ConstantFP>(V) || isa<UndefValue>(V) ||
- isa<ConstantPointerNull>(V)) {
- SDV = DAG.getConstantDbgValue(Variable, Expression, V, dl, SDNodeOrder);
- DAG.AddDbgValue(SDV, nullptr, false);
- return nullptr;
- }
-
- // Do not use getValue() in here; we don't want to generate code at
- // this point if it hasn't been done yet.
- SDValue N = NodeMap[V];
- if (!N.getNode() && isa<Argument>(V)) // Check unused arguments map.
- N = UnusedArgNodeMap[V];
- if (N.getNode()) {
- if (EmitFuncArgumentDbgValue(V, Variable, Expression, dl, false, N))
- return nullptr;
- SDV = getDbgValue(N, Variable, Expression, dl, SDNodeOrder);
- DAG.AddDbgValue(SDV, N.getNode(), false);
- return nullptr;
- }
-
- // PHI nodes have already been selected, so we should know which VReg that
- // is assigns to already.
- if (isa<PHINode>(V)) {
- auto VMI = FuncInfo.ValueMap.find(V);
- if (VMI != FuncInfo.ValueMap.end()) {
- unsigned Reg = VMI->second;
- // The PHI node may be split up into several MI PHI nodes (in
- // FunctionLoweringInfo::set).
- RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg,
- V->getType(), None);
- if (RFV.occupiesMultipleRegs()) {
- unsigned Offset = 0;
- unsigned BitsToDescribe = 0;
- if (auto VarSize = Variable->getSizeInBits())
- BitsToDescribe = *VarSize;
- if (auto Fragment = Expression->getFragmentInfo())
- BitsToDescribe = Fragment->SizeInBits;
- for (auto RegAndSize : RFV.getRegsAndSizes()) {
- unsigned RegisterSize = RegAndSize.second;
- // Bail out if all bits are described already.
- if (Offset >= BitsToDescribe)
- break;
- unsigned FragmentSize = (Offset + RegisterSize > BitsToDescribe)
- ? BitsToDescribe - Offset
- : RegisterSize;
- auto FragmentExpr = DIExpression::createFragmentExpression(
- Expression, Offset, FragmentSize);
- if (!FragmentExpr)
- continue;
- SDV = DAG.getVRegDbgValue(Variable, *FragmentExpr, RegAndSize.first,
- false, dl, SDNodeOrder);
- DAG.AddDbgValue(SDV, nullptr, false);
- Offset += RegisterSize;
- }
- } else {
- SDV = DAG.getVRegDbgValue(Variable, Expression, Reg, false, dl,
- SDNodeOrder);
- DAG.AddDbgValue(SDV, nullptr, false);
- }
- return nullptr;
- }
- }
+ return;
- // TODO: When we get here we will either drop the dbg.value completely, or
- // we try to move it forward by letting it dangle for awhile. So we should
- // probably add an extra DbgValue to the DAG here, with a reference to
- // "noreg", to indicate that we have lost the debug location for the
- // variable.
+ if (handleDebugValue(V, Variable, Expression, dl, DI.getDebugLoc(),
+ SDNodeOrder))
+ return;
- if (!V->use_empty() ) {
- // Do not call getValue(V) yet, as we don't want to generate code.
- // Remember it for later.
- DanglingDebugInfoMap[V].emplace_back(&DI, dl, SDNodeOrder);
- return nullptr;
- }
+ // TODO: Dangling debug info will eventually either be resolved or produce
+ // an Undef DBG_VALUE. However in the resolution case, a gap may appear
+ // between the original dbg.value location and its resolved DBG_VALUE, which
+ // we should ideally fill with an extra Undef DBG_VALUE.
- LLVM_DEBUG(dbgs() << "Dropping debug location info for:\n " << DI << "\n");
- LLVM_DEBUG(dbgs() << " Last seen at:\n " << *V << "\n");
- return nullptr;
+ DanglingDebugInfoMap[V].emplace_back(&DI, dl, SDNodeOrder);
+ return;
}
case Intrinsic::eh_typeid_for: {
@@ -5411,7 +5814,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
unsigned TypeID = DAG.getMachineFunction().getTypeIDFor(GV);
Res = DAG.getConstant(TypeID, sdl, MVT::i32);
setValue(&I, Res);
- return nullptr;
+ return;
}
case Intrinsic::eh_return_i32:
@@ -5422,15 +5825,15 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
getControlRoot(),
getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1))));
- return nullptr;
+ return;
case Intrinsic::eh_unwind_init:
DAG.getMachineFunction().setCallsUnwindInit(true);
- return nullptr;
+ return;
case Intrinsic::eh_dwarf_cfa:
setValue(&I, DAG.getNode(ISD::EH_DWARF_CFA, sdl,
TLI.getPointerTy(DAG.getDataLayout()),
getValue(I.getArgOperand(0))));
- return nullptr;
+ return;
case Intrinsic::eh_sjlj_callsite: {
MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(0));
@@ -5438,7 +5841,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
assert(MMI.getCurrentCallSite() == 0 && "Overlapping call sites!");
MMI.setCurrentCallSite(CI->getZExtValue());
- return nullptr;
+ return;
}
case Intrinsic::eh_sjlj_functioncontext: {
// Get and store the index of the function context.
@@ -5447,7 +5850,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
cast<AllocaInst>(I.getArgOperand(0)->stripPointerCasts());
int FI = FuncInfo.StaticAllocaMap[FnCtx];
MFI.setFunctionContextIndex(FI);
- return nullptr;
+ return;
}
case Intrinsic::eh_sjlj_setjmp: {
SDValue Ops[2];
@@ -5457,34 +5860,34 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
DAG.getVTList(MVT::i32, MVT::Other), Ops);
setValue(&I, Op.getValue(0));
DAG.setRoot(Op.getValue(1));
- return nullptr;
+ return;
}
case Intrinsic::eh_sjlj_longjmp:
DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, sdl, MVT::Other,
getRoot(), getValue(I.getArgOperand(0))));
- return nullptr;
+ return;
case Intrinsic::eh_sjlj_setup_dispatch:
DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_SETUP_DISPATCH, sdl, MVT::Other,
getRoot()));
- return nullptr;
+ return;
case Intrinsic::masked_gather:
visitMaskedGather(I);
- return nullptr;
+ return;
case Intrinsic::masked_load:
visitMaskedLoad(I);
- return nullptr;
+ return;
case Intrinsic::masked_scatter:
visitMaskedScatter(I);
- return nullptr;
+ return;
case Intrinsic::masked_store:
visitMaskedStore(I);
- return nullptr;
+ return;
case Intrinsic::masked_expandload:
visitMaskedLoad(I, true /* IsExpanding */);
- return nullptr;
+ return;
case Intrinsic::masked_compressstore:
visitMaskedStore(I, true /* IsCompressing */);
- return nullptr;
+ return;
case Intrinsic::x86_mmx_pslli_w:
case Intrinsic::x86_mmx_pslli_d:
case Intrinsic::x86_mmx_pslli_q:
@@ -5496,7 +5899,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
SDValue ShAmt = getValue(I.getArgOperand(1));
if (isa<ConstantSDNode>(ShAmt)) {
visitTargetIntrinsic(I, Intrinsic);
- return nullptr;
+ return;
}
unsigned NewIntrinsic = 0;
EVT ShAmtVT = MVT::v2i32;
@@ -5542,31 +5945,31 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
DAG.getConstant(NewIntrinsic, sdl, MVT::i32),
getValue(I.getArgOperand(0)), ShAmt);
setValue(&I, Res);
- return nullptr;
+ return;
}
case Intrinsic::powi:
setValue(&I, ExpandPowI(sdl, getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1)), DAG));
- return nullptr;
+ return;
case Intrinsic::log:
setValue(&I, expandLog(sdl, getValue(I.getArgOperand(0)), DAG, TLI));
- return nullptr;
+ return;
case Intrinsic::log2:
setValue(&I, expandLog2(sdl, getValue(I.getArgOperand(0)), DAG, TLI));
- return nullptr;
+ return;
case Intrinsic::log10:
setValue(&I, expandLog10(sdl, getValue(I.getArgOperand(0)), DAG, TLI));
- return nullptr;
+ return;
case Intrinsic::exp:
setValue(&I, expandExp(sdl, getValue(I.getArgOperand(0)), DAG, TLI));
- return nullptr;
+ return;
case Intrinsic::exp2:
setValue(&I, expandExp2(sdl, getValue(I.getArgOperand(0)), DAG, TLI));
- return nullptr;
+ return;
case Intrinsic::pow:
setValue(&I, expandPow(sdl, getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1)), DAG, TLI));
- return nullptr;
+ return;
case Intrinsic::sqrt:
case Intrinsic::fabs:
case Intrinsic::sin:
@@ -5597,61 +6000,71 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
setValue(&I, DAG.getNode(Opcode, sdl,
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0))));
- return nullptr;
+ return;
}
- case Intrinsic::minnum: {
- auto VT = getValue(I.getArgOperand(0)).getValueType();
- unsigned Opc =
- I.hasNoNaNs() && TLI.isOperationLegalOrCustom(ISD::FMINIMUM, VT)
- ? ISD::FMINIMUM
- : ISD::FMINNUM;
- setValue(&I, DAG.getNode(Opc, sdl, VT,
+ case Intrinsic::lround:
+ case Intrinsic::llround:
+ case Intrinsic::lrint:
+ case Intrinsic::llrint: {
+ unsigned Opcode;
+ switch (Intrinsic) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::lround: Opcode = ISD::LROUND; break;
+ case Intrinsic::llround: Opcode = ISD::LLROUND; break;
+ case Intrinsic::lrint: Opcode = ISD::LRINT; break;
+ case Intrinsic::llrint: Opcode = ISD::LLRINT; break;
+ }
+
+ EVT RetVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ setValue(&I, DAG.getNode(Opcode, sdl, RetVT,
+ getValue(I.getArgOperand(0))));
+ return;
+ }
+ case Intrinsic::minnum:
+ setValue(&I, DAG.getNode(ISD::FMINNUM, sdl,
+ getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1))));
- return nullptr;
- }
- case Intrinsic::maxnum: {
- auto VT = getValue(I.getArgOperand(0)).getValueType();
- unsigned Opc =
- I.hasNoNaNs() && TLI.isOperationLegalOrCustom(ISD::FMAXIMUM, VT)
- ? ISD::FMAXIMUM
- : ISD::FMAXNUM;
- setValue(&I, DAG.getNode(Opc, sdl, VT,
+ return;
+ case Intrinsic::maxnum:
+ setValue(&I, DAG.getNode(ISD::FMAXNUM, sdl,
+ getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1))));
- return nullptr;
- }
+ return;
case Intrinsic::minimum:
setValue(&I, DAG.getNode(ISD::FMINIMUM, sdl,
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1))));
- return nullptr;
+ return;
case Intrinsic::maximum:
setValue(&I, DAG.getNode(ISD::FMAXIMUM, sdl,
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1))));
- return nullptr;
+ return;
case Intrinsic::copysign:
setValue(&I, DAG.getNode(ISD::FCOPYSIGN, sdl,
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1))));
- return nullptr;
+ return;
case Intrinsic::fma:
setValue(&I, DAG.getNode(ISD::FMA, sdl,
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1)),
getValue(I.getArgOperand(2))));
- return nullptr;
+ return;
case Intrinsic::experimental_constrained_fadd:
case Intrinsic::experimental_constrained_fsub:
case Intrinsic::experimental_constrained_fmul:
case Intrinsic::experimental_constrained_fdiv:
case Intrinsic::experimental_constrained_frem:
case Intrinsic::experimental_constrained_fma:
+ case Intrinsic::experimental_constrained_fptrunc:
+ case Intrinsic::experimental_constrained_fpext:
case Intrinsic::experimental_constrained_sqrt:
case Intrinsic::experimental_constrained_pow:
case Intrinsic::experimental_constrained_powi:
@@ -5671,7 +6084,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::experimental_constrained_round:
case Intrinsic::experimental_constrained_trunc:
visitConstrainedFPIntrinsic(cast<ConstrainedFPIntrinsic>(I));
- return nullptr;
+ return;
case Intrinsic::fmuladd: {
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
@@ -5693,7 +6106,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
getValue(I.getArgOperand(2)));
setValue(&I, Add);
}
- return nullptr;
+ return;
}
case Intrinsic::convert_to_fp16:
setValue(&I, DAG.getNode(ISD::BITCAST, sdl, MVT::i16,
@@ -5701,17 +6114,17 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
getValue(I.getArgOperand(0)),
DAG.getTargetConstant(0, sdl,
MVT::i32))));
- return nullptr;
+ return;
case Intrinsic::convert_from_fp16:
setValue(&I, DAG.getNode(ISD::FP_EXTEND, sdl,
TLI.getValueType(DAG.getDataLayout(), I.getType()),
DAG.getNode(ISD::BITCAST, sdl, MVT::f16,
getValue(I.getArgOperand(0)))));
- return nullptr;
+ return;
case Intrinsic::pcmarker: {
SDValue Tmp = getValue(I.getArgOperand(0));
DAG.setRoot(DAG.getNode(ISD::PCMARKER, sdl, MVT::Other, getRoot(), Tmp));
- return nullptr;
+ return;
}
case Intrinsic::readcyclecounter: {
SDValue Op = getRoot();
@@ -5719,25 +6132,25 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
DAG.getVTList(MVT::i64, MVT::Other), Op);
setValue(&I, Res);
DAG.setRoot(Res.getValue(1));
- return nullptr;
+ return;
}
case Intrinsic::bitreverse:
setValue(&I, DAG.getNode(ISD::BITREVERSE, sdl,
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0))));
- return nullptr;
+ return;
case Intrinsic::bswap:
setValue(&I, DAG.getNode(ISD::BSWAP, sdl,
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0))));
- return nullptr;
+ return;
case Intrinsic::cttz: {
SDValue Arg = getValue(I.getArgOperand(0));
ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1));
EVT Ty = Arg.getValueType();
setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTTZ : ISD::CTTZ_ZERO_UNDEF,
sdl, Ty, Arg));
- return nullptr;
+ return;
}
case Intrinsic::ctlz: {
SDValue Arg = getValue(I.getArgOperand(0));
@@ -5745,13 +6158,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
EVT Ty = Arg.getValueType();
setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTLZ : ISD::CTLZ_ZERO_UNDEF,
sdl, Ty, Arg));
- return nullptr;
+ return;
}
case Intrinsic::ctpop: {
SDValue Arg = getValue(I.getArgOperand(0));
EVT Ty = Arg.getValueType();
setValue(&I, DAG.getNode(ISD::CTPOP, sdl, Ty, Arg));
- return nullptr;
+ return;
}
case Intrinsic::fshl:
case Intrinsic::fshr: {
@@ -5767,7 +6180,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
auto FunnelOpcode = IsFSHL ? ISD::FSHL : ISD::FSHR;
if (TLI.isOperationLegalOrCustom(FunnelOpcode, VT)) {
setValue(&I, DAG.getNode(FunnelOpcode, sdl, VT, X, Y, Z));
- return nullptr;
+ return;
}
// When X == Y, this is rotate. If the data type has a power-of-2 size, we
@@ -5777,7 +6190,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
auto RotateOpcode = IsFSHL ? ISD::ROTL : ISD::ROTR;
if (TLI.isOperationLegalOrCustom(RotateOpcode, VT)) {
setValue(&I, DAG.getNode(RotateOpcode, sdl, VT, X, Z));
- return nullptr;
+ return;
}
// Some targets only rotate one way. Try the opposite direction.
@@ -5786,7 +6199,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
// Negate the shift amount because it is safe to ignore the high bits.
SDValue NegShAmt = DAG.getNode(ISD::SUB, sdl, VT, Zero, Z);
setValue(&I, DAG.getNode(RotateOpcode, sdl, VT, X, NegShAmt));
- return nullptr;
+ return;
}
// fshl (rotl): (X << (Z % BW)) | (X >> ((0 - Z) % BW))
@@ -5796,7 +6209,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
SDValue ShX = DAG.getNode(ISD::SHL, sdl, VT, X, IsFSHL ? ShAmt : NShAmt);
SDValue ShY = DAG.getNode(ISD::SRL, sdl, VT, X, IsFSHL ? NShAmt : ShAmt);
setValue(&I, DAG.getNode(ISD::OR, sdl, VT, ShX, ShY));
- return nullptr;
+ return;
}
// fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
@@ -5816,39 +6229,48 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
// For fshr, 0-shift returns the 2nd arg (Y).
SDValue IsZeroShift = DAG.getSetCC(sdl, CCVT, ShAmt, Zero, ISD::SETEQ);
setValue(&I, DAG.getSelect(sdl, VT, IsZeroShift, IsFSHL ? X : Y, Or));
- return nullptr;
+ return;
}
case Intrinsic::sadd_sat: {
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
setValue(&I, DAG.getNode(ISD::SADDSAT, sdl, Op1.getValueType(), Op1, Op2));
- return nullptr;
+ return;
}
case Intrinsic::uadd_sat: {
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
setValue(&I, DAG.getNode(ISD::UADDSAT, sdl, Op1.getValueType(), Op1, Op2));
- return nullptr;
+ return;
}
case Intrinsic::ssub_sat: {
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
setValue(&I, DAG.getNode(ISD::SSUBSAT, sdl, Op1.getValueType(), Op1, Op2));
- return nullptr;
+ return;
}
case Intrinsic::usub_sat: {
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
setValue(&I, DAG.getNode(ISD::USUBSAT, sdl, Op1.getValueType(), Op1, Op2));
- return nullptr;
+ return;
}
- case Intrinsic::smul_fix: {
+ case Intrinsic::smul_fix:
+ case Intrinsic::umul_fix: {
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
SDValue Op3 = getValue(I.getArgOperand(2));
- setValue(&I,
- DAG.getNode(ISD::SMULFIX, sdl, Op1.getValueType(), Op1, Op2, Op3));
- return nullptr;
+ setValue(&I, DAG.getNode(FixedPointIntrinsicToOpcode(Intrinsic), sdl,
+ Op1.getValueType(), Op1, Op2, Op3));
+ return;
+ }
+ case Intrinsic::smul_fix_sat: {
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ SDValue Op3 = getValue(I.getArgOperand(2));
+ setValue(&I, DAG.getNode(ISD::SMULFIXSAT, sdl, Op1.getValueType(), Op1, Op2,
+ Op3));
+ return;
}
case Intrinsic::stacksave: {
SDValue Op = getRoot();
@@ -5857,26 +6279,26 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
DAG.getVTList(TLI.getPointerTy(DAG.getDataLayout()), MVT::Other), Op);
setValue(&I, Res);
DAG.setRoot(Res.getValue(1));
- return nullptr;
+ return;
}
case Intrinsic::stackrestore:
Res = getValue(I.getArgOperand(0));
DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, sdl, MVT::Other, getRoot(), Res));
- return nullptr;
+ return;
case Intrinsic::get_dynamic_area_offset: {
SDValue Op = getRoot();
EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
EVT ResTy = TLI.getValueType(DAG.getDataLayout(), I.getType());
// Result type for @llvm.get.dynamic.area.offset should match PtrTy for
// target.
- if (PtrTy != ResTy)
+ if (PtrTy.getSizeInBits() < ResTy.getSizeInBits())
report_fatal_error("Wrong result type for @llvm.get.dynamic.area.offset"
" intrinsic!");
Res = DAG.getNode(ISD::GET_DYNAMIC_AREA_OFFSET, sdl, DAG.getVTList(ResTy),
Op);
DAG.setRoot(Op);
setValue(&I, Res);
- return nullptr;
+ return;
}
case Intrinsic::stackguard: {
EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
@@ -5896,7 +6318,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
Res = TLI.emitStackGuardXorFP(DAG, Res, sdl);
DAG.setRoot(Chain);
setValue(&I, Res);
- return nullptr;
+ return;
}
case Intrinsic::stackprotector: {
// Emit code into the DAG to store the stack guard onto the stack.
@@ -5923,7 +6345,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
/* Alignment = */ 0, MachineMemOperand::MOVolatile);
setValue(&I, Res);
DAG.setRoot(Res);
- return nullptr;
+ return;
}
case Intrinsic::objectsize: {
// If we don't know by now, we're never going to know.
@@ -5940,14 +6362,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
Res = DAG.getConstant(0, sdl, Ty);
setValue(&I, Res);
- return nullptr;
+ return;
}
case Intrinsic::is_constant:
// If this wasn't constant-folded away by now, then it's not a
// constant.
setValue(&I, DAG.getConstant(0, sdl, MVT::i1));
- return nullptr;
+ return;
case Intrinsic::annotation:
case Intrinsic::ptr_annotation:
@@ -5955,12 +6377,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::strip_invariant_group:
// Drop the intrinsic, but forward the value
setValue(&I, getValue(I.getOperand(0)));
- return nullptr;
+ return;
case Intrinsic::assume:
case Intrinsic::var_annotation:
case Intrinsic::sideeffect:
// Discard annotate attributes, assumptions, and artificial side-effects.
- return nullptr;
+ return;
case Intrinsic::codeview_annotation: {
// Emit a label associated with this metadata.
@@ -5971,7 +6393,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
MF.addCodeViewAnnotation(Label, cast<MDNode>(MD));
Res = DAG.getLabelNode(ISD::ANNOTATION_LABEL, sdl, getRoot(), Label);
DAG.setRoot(Res);
- return nullptr;
+ return;
}
case Intrinsic::init_trampoline: {
@@ -5988,13 +6410,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
Res = DAG.getNode(ISD::INIT_TRAMPOLINE, sdl, MVT::Other, Ops);
DAG.setRoot(Res);
- return nullptr;
+ return;
}
case Intrinsic::adjust_trampoline:
setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, sdl,
TLI.getPointerTy(DAG.getDataLayout()),
getValue(I.getArgOperand(0))));
- return nullptr;
+ return;
case Intrinsic::gcroot: {
assert(DAG.getMachineFunction().getFunction().hasGC() &&
"only valid in functions with gc specified, enforced by Verifier");
@@ -6004,19 +6426,19 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
FrameIndexSDNode *FI = cast<FrameIndexSDNode>(getValue(Alloca).getNode());
GFI->addStackRoot(FI->getIndex(), TypeMap);
- return nullptr;
+ return;
}
case Intrinsic::gcread:
case Intrinsic::gcwrite:
llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!");
case Intrinsic::flt_rounds:
setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, sdl, MVT::i32));
- return nullptr;
+ return;
case Intrinsic::expect:
// Just replace __builtin_expect(exp, c) with EXP.
setValue(&I, getValue(I.getArgOperand(0)));
- return nullptr;
+ return;
case Intrinsic::debugtrap:
case Intrinsic::trap: {
@@ -6028,7 +6450,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
ISD::NodeType Op = (Intrinsic == Intrinsic::trap) ?
ISD::TRAP : ISD::DEBUGTRAP;
DAG.setRoot(DAG.getNode(Op, sdl,MVT::Other, getRoot()));
- return nullptr;
+ return;
}
TargetLowering::ArgListTy Args;
@@ -6041,7 +6463,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
DAG.setRoot(Result.second);
- return nullptr;
+ return;
}
case Intrinsic::uadd_with_overflow:
@@ -6063,9 +6485,15 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
- SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1);
+ EVT ResultVT = Op1.getValueType();
+ EVT OverflowVT = MVT::i1;
+ if (ResultVT.isVector())
+ OverflowVT = EVT::getVectorVT(
+ *Context, OverflowVT, ResultVT.getVectorNumElements());
+
+ SDVTList VTs = DAG.getVTList(ResultVT, OverflowVT);
setValue(&I, DAG.getNode(Op, sdl, VTs, Op1, Op2));
- return nullptr;
+ return;
}
case Intrinsic::prefetch: {
SDValue Ops[5];
@@ -6088,21 +6516,24 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
PendingLoads.push_back(Result);
Result = getRoot();
DAG.setRoot(Result);
- return nullptr;
+ return;
}
case Intrinsic::lifetime_start:
case Intrinsic::lifetime_end: {
bool IsStart = (Intrinsic == Intrinsic::lifetime_start);
// Stack coloring is not enabled in O0, discard region information.
if (TM.getOptLevel() == CodeGenOpt::None)
- return nullptr;
+ return;
- SmallVector<Value *, 4> Allocas;
- GetUnderlyingObjects(I.getArgOperand(1), Allocas, *DL);
+ const int64_t ObjectSize =
+ cast<ConstantInt>(I.getArgOperand(0))->getSExtValue();
+ Value *const ObjectPtr = I.getArgOperand(1);
+ SmallVector<const Value *, 4> Allocas;
+ GetUnderlyingObjects(ObjectPtr, Allocas, *DL);
- for (SmallVectorImpl<Value*>::iterator Object = Allocas.begin(),
+ for (SmallVectorImpl<const Value*>::iterator Object = Allocas.begin(),
E = Allocas.end(); Object != E; ++Object) {
- AllocaInst *LifetimeObject = dyn_cast_or_null<AllocaInst>(*Object);
+ const AllocaInst *LifetimeObject = dyn_cast_or_null<AllocaInst>(*Object);
// Could not find an Alloca.
if (!LifetimeObject)
@@ -6112,49 +6543,50 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
// valid frame index.
auto SI = FuncInfo.StaticAllocaMap.find(LifetimeObject);
if (SI == FuncInfo.StaticAllocaMap.end())
- return nullptr;
-
- int FI = SI->second;
-
- SDValue Ops[2];
- Ops[0] = getRoot();
- Ops[1] =
- DAG.getFrameIndex(FI, TLI.getFrameIndexTy(DAG.getDataLayout()), true);
- unsigned Opcode = (IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END);
+ return;
- Res = DAG.getNode(Opcode, sdl, MVT::Other, Ops);
+ const int FrameIndex = SI->second;
+ int64_t Offset;
+ if (GetPointerBaseWithConstantOffset(
+ ObjectPtr, Offset, DAG.getDataLayout()) != LifetimeObject)
+ Offset = -1; // Cannot determine offset from alloca to lifetime object.
+ Res = DAG.getLifetimeNode(IsStart, sdl, getRoot(), FrameIndex, ObjectSize,
+ Offset);
DAG.setRoot(Res);
}
- return nullptr;
+ return;
}
case Intrinsic::invariant_start:
// Discard region information.
setValue(&I, DAG.getUNDEF(TLI.getPointerTy(DAG.getDataLayout())));
- return nullptr;
+ return;
case Intrinsic::invariant_end:
// Discard region information.
- return nullptr;
+ return;
case Intrinsic::clear_cache:
- return TLI.getClearCacheBuiltinName();
+ /// FunctionName may be null.
+ if (const char *FunctionName = TLI.getClearCacheBuiltinName())
+ lowerCallToExternalSymbol(I, FunctionName);
+ return;
case Intrinsic::donothing:
// ignore
- return nullptr;
+ return;
case Intrinsic::experimental_stackmap:
visitStackmap(I);
- return nullptr;
+ return;
case Intrinsic::experimental_patchpoint_void:
case Intrinsic::experimental_patchpoint_i64:
visitPatchpoint(&I);
- return nullptr;
+ return;
case Intrinsic::experimental_gc_statepoint:
LowerStatepoint(ImmutableStatepoint(&I));
- return nullptr;
+ return;
case Intrinsic::experimental_gc_result:
visitGCResult(cast<GCResultInst>(I));
- return nullptr;
+ return;
case Intrinsic::experimental_gc_relocate:
visitGCRelocate(cast<GCRelocateInst>(I));
- return nullptr;
+ return;
case Intrinsic::instrprof_increment:
llvm_unreachable("instrprof failed to lower an increment");
case Intrinsic::instrprof_value_profile:
@@ -6182,7 +6614,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
.addFrameIndex(FI);
}
- return nullptr;
+ return;
}
case Intrinsic::localrecover: {
@@ -6211,7 +6643,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
SDValue Add = DAG.getNode(ISD::ADD, sdl, PtrVT, FPVal, OffsetVal);
setValue(&I, Add);
- return nullptr;
+ return;
}
case Intrinsic::eh_exceptionpointer:
@@ -6226,7 +6658,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
if (Intrinsic == Intrinsic::eh_exceptioncode)
N = DAG.getZExtOrTrunc(N, getCurSDLoc(), MVT::i32);
setValue(&I, N);
- return nullptr;
+ return;
}
case Intrinsic::xray_customevent: {
// Here we want to make sure that the intrinsic behaves as if it has a
@@ -6234,7 +6666,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
// FIXME: Support other platforms later.
const auto &Triple = DAG.getTarget().getTargetTriple();
if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux())
- return nullptr;
+ return;
SDLoc DL = getCurSDLoc();
SmallVector<SDValue, 8> Ops;
@@ -6257,7 +6689,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
SDValue patchableNode = SDValue(MN, 0);
DAG.setRoot(patchableNode);
setValue(&I, patchableNode);
- return nullptr;
+ return;
}
case Intrinsic::xray_typedevent: {
// Here we want to make sure that the intrinsic behaves as if it has a
@@ -6265,7 +6697,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
// FIXME: Support other platforms later.
const auto &Triple = DAG.getTarget().getTargetTriple();
if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux())
- return nullptr;
+ return;
SDLoc DL = getCurSDLoc();
SmallVector<SDValue, 8> Ops;
@@ -6292,14 +6724,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
SDValue patchableNode = SDValue(MN, 0);
DAG.setRoot(patchableNode);
setValue(&I, patchableNode);
- return nullptr;
+ return;
}
case Intrinsic::experimental_deoptimize:
LowerDeoptimizeCall(&I);
- return nullptr;
+ return;
- case Intrinsic::experimental_vector_reduce_fadd:
- case Intrinsic::experimental_vector_reduce_fmul:
+ case Intrinsic::experimental_vector_reduce_v2_fadd:
+ case Intrinsic::experimental_vector_reduce_v2_fmul:
case Intrinsic::experimental_vector_reduce_add:
case Intrinsic::experimental_vector_reduce_mul:
case Intrinsic::experimental_vector_reduce_and:
@@ -6312,11 +6744,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::experimental_vector_reduce_fmax:
case Intrinsic::experimental_vector_reduce_fmin:
visitVectorReduce(I, Intrinsic);
- return nullptr;
+ return;
case Intrinsic::icall_branch_funnel: {
SmallVector<SDValue, 16> Ops;
- Ops.push_back(DAG.getRoot());
Ops.push_back(getValue(I.getArgOperand(0)));
int64_t Offset;
@@ -6359,20 +6790,34 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
Ops.push_back(T.Target);
}
+ Ops.push_back(DAG.getRoot()); // Chain
SDValue N(DAG.getMachineNode(TargetOpcode::ICALL_BRANCH_FUNNEL,
getCurSDLoc(), MVT::Other, Ops),
0);
DAG.setRoot(N);
setValue(&I, N);
HasTailCall = true;
- return nullptr;
+ return;
}
case Intrinsic::wasm_landingpad_index:
// Information this intrinsic contained has been transferred to
// MachineFunction in SelectionDAGISel::PrepareEHLandingPad. We can safely
// delete it now.
- return nullptr;
+ return;
+
+ case Intrinsic::aarch64_settag:
+ case Intrinsic::aarch64_settag_zero: {
+ const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
+ bool ZeroMemory = Intrinsic == Intrinsic::aarch64_settag_zero;
+ SDValue Val = TSI.EmitTargetCodeForSetTag(
+ DAG, getCurSDLoc(), getRoot(), getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)), MachinePointerInfo(I.getArgOperand(0)),
+ ZeroMemory);
+ DAG.setRoot(Val);
+ setValue(&I, Val);
+ return;
+ }
}
}
@@ -6400,6 +6845,12 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
case Intrinsic::experimental_constrained_fma:
Opcode = ISD::STRICT_FMA;
break;
+ case Intrinsic::experimental_constrained_fptrunc:
+ Opcode = ISD::STRICT_FP_ROUND;
+ break;
+ case Intrinsic::experimental_constrained_fpext:
+ Opcode = ISD::STRICT_FP_EXTEND;
+ break;
case Intrinsic::experimental_constrained_sqrt:
Opcode = ISD::STRICT_FSQRT;
break;
@@ -6463,7 +6914,12 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
SDVTList VTs = DAG.getVTList(ValueVTs);
SDValue Result;
- if (FPI.isUnaryOp())
+ if (Opcode == ISD::STRICT_FP_ROUND)
+ Result = DAG.getNode(Opcode, sdl, VTs,
+ { Chain, getValue(FPI.getArgOperand(0)),
+ DAG.getTargetConstant(0, sdl,
+ TLI.getPointerTy(DAG.getDataLayout())) });
+ else if (FPI.isUnaryOp())
Result = DAG.getNode(Opcode, sdl, VTs,
{ Chain, getValue(FPI.getArgOperand(0)) });
else if (FPI.isTernaryOp())
@@ -6476,6 +6932,13 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
{ Chain, getValue(FPI.getArgOperand(0)),
getValue(FPI.getArgOperand(1)) });
+ if (FPI.getExceptionBehavior() !=
+ ConstrainedFPIntrinsic::ExceptionBehavior::ebIgnore) {
+ SDNodeFlags Flags;
+ Flags.setFPExcept(true);
+ Result->setFlags(Flags);
+ }
+
assert(Result.getNode()->getNumValues() == 2);
SDValue OutChain = Result.getValue(1);
DAG.setRoot(OutChain);
@@ -6596,11 +7059,9 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
SwiftErrorVal = V;
// We find the virtual register for the actual swifterror argument.
// Instead of using the Value, we use the virtual register instead.
- Entry.Node = DAG.getRegister(FuncInfo
- .getOrCreateSwiftErrorVRegUseAt(
- CS.getInstruction(), FuncInfo.MBB, V)
- .first,
- EVT(TLI.getPointerTy(DL)));
+ Entry.Node = DAG.getRegister(
+ SwiftError.getOrCreateVRegUseAt(CS.getInstruction(), FuncInfo.MBB, V),
+ EVT(TLI.getPointerTy(DL)));
}
Args.push_back(Entry);
@@ -6641,13 +7102,9 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
if (SwiftErrorVal && TLI.supportSwiftError()) {
// Get the last element of InVals.
SDValue Src = CLI.InVals.back();
- unsigned VReg; bool CreatedVReg;
- std::tie(VReg, CreatedVReg) =
- FuncInfo.getOrCreateSwiftErrorVRegDefAt(CS.getInstruction());
+ unsigned VReg = SwiftError.getOrCreateVRegDefAt(
+ CS.getInstruction(), FuncInfo.MBB, SwiftErrorVal);
SDValue CopyNode = CLI.DAG.getCopyToReg(Result.second, CLI.DL, VReg, Src);
- // We update the virtual register for the actual swifterror argument.
- if (CreatedVReg)
- FuncInfo.setCurrentSwiftErrorVReg(FuncInfo.MBB, SwiftErrorVal, VReg);
DAG.setRoot(CopyNode);
}
}
@@ -6995,10 +7452,6 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
return;
}
- MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
- computeUsesVAFloatArgument(I, MMI);
-
- const char *RenameFn = nullptr;
if (Function *F = I.getCalledFunction()) {
if (F->isDeclaration()) {
// Is this an LLVM intrinsic or a target-specific intrinsic?
@@ -7008,9 +7461,8 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
IID = II->getIntrinsicID(F);
if (IID) {
- RenameFn = visitIntrinsicCall(I, IID);
- if (!RenameFn)
- return;
+ visitIntrinsicCall(I, IID);
+ return;
}
}
@@ -7159,20 +7611,14 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
}
}
- SDValue Callee;
- if (!RenameFn)
- Callee = getValue(I.getCalledValue());
- else
- Callee = DAG.getExternalSymbol(
- RenameFn,
- DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()));
-
// Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
// have to do anything here to lower funclet bundles.
assert(!I.hasOperandBundlesOtherThan(
{LLVMContext::OB_deopt, LLVMContext::OB_funclet}) &&
"Cannot lower calls with arbitrary operand bundles!");
+ SDValue Callee = getValue(I.getCalledValue());
+
if (I.countOperandBundlesOfType(LLVMContext::OB_deopt))
LowerCallSiteWithDeoptBundle(&I, Callee, nullptr);
else
@@ -7328,8 +7774,9 @@ static SDValue getAddressForMemoryInput(SDValue Chain, const SDLoc &Location,
MachineFunction &MF = DAG.getMachineFunction();
int SSFI = MF.getFrameInfo().CreateStackObject(TySize, Align, false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getFrameIndexTy(DL));
- Chain = DAG.getStore(Chain, Location, OpInfo.CallOperand, StackSlot,
- MachinePointerInfo::getFixedStack(MF, SSFI));
+ Chain = DAG.getTruncStore(Chain, Location, OpInfo.CallOperand, StackSlot,
+ MachinePointerInfo::getFixedStack(MF, SSFI),
+ TLI.getMemValueType(DL, Ty));
OpInfo.CallOperand = StackSlot;
return Chain;
@@ -7353,6 +7800,10 @@ static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,
SmallVector<unsigned, 4> Regs;
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+ // No work to do for memory operations.
+ if (OpInfo.ConstraintType == TargetLowering::C_Memory)
+ return;
+
// If this is a constraint for a single physreg, or a constraint for a
// register class, find it.
unsigned AssignedReg;
@@ -7435,7 +7886,7 @@ static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,
for (; NumRegs; --NumRegs, ++I) {
assert(I != RC->end() && "Ran out of registers to allocate!");
- auto R = (AssignedReg) ? *I : RegInfo.createVirtualRegister(RC);
+ Register R = AssignedReg ? Register(*I) : RegInfo.createVirtualRegister(RC);
Regs.push_back(R);
}
@@ -7509,9 +7960,9 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(
DAG.getDataLayout(), DAG.getSubtarget().getRegisterInfo(), CS);
- bool hasMemory = false;
-
- // Remember the HasSideEffect, AlignStack, AsmDialect, MayLoad and MayStore
+ // First Pass: Calculate HasSideEffects and ExtraFlags (AlignStack,
+ // AsmDialect, MayLoad, MayStore).
+ bool HasSideEffect = IA->hasSideEffects();
ExtraFlags ExtraInfo(CS);
unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
@@ -7527,7 +7978,14 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// Process the call argument. BasicBlocks are labels, currently appearing
// only in asm's.
- if (const BasicBlock *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) {
+ const Instruction *I = CS.getInstruction();
+ if (isa<CallBrInst>(I) &&
+ (ArgNo - 1) >= (cast<CallBrInst>(I)->getNumArgOperands() -
+ cast<CallBrInst>(I)->getNumIndirectDests())) {
+ const auto *BA = cast<BlockAddress>(OpInfo.CallOperandVal);
+ EVT VT = TLI.getValueType(DAG.getDataLayout(), BA->getType(), true);
+ OpInfo.CallOperand = DAG.getTargetBlockAddress(BA, VT);
+ } else if (const auto *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) {
OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]);
} else {
OpInfo.CallOperand = getValue(OpInfo.CallOperandVal);
@@ -7554,8 +8012,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
OpInfo.ConstraintVT = MVT::Other;
}
- if (!hasMemory)
- hasMemory = OpInfo.hasMemory(TLI);
+ if (!HasSideEffect)
+ HasSideEffect = OpInfo.hasMemory(TLI);
// Determine if this InlineAsm MayLoad or MayStore based on the constraints.
// FIXME: Could we compute this on OpInfo rather than T?
@@ -7566,17 +8024,20 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
ExtraInfo.update(T);
}
- SDValue Chain, Flag;
// We won't need to flush pending loads if this asm doesn't touch
// memory and is nonvolatile.
- if (hasMemory || IA->hasSideEffects())
- Chain = getRoot();
- else
- Chain = DAG.getRoot();
+ SDValue Flag, Chain = (HasSideEffect) ? getRoot() : DAG.getRoot();
+
+ bool IsCallBr = isa<CallBrInst>(CS.getInstruction());
+ if (IsCallBr) {
+ // If this is a callbr we need to flush pending exports since inlineasm_br
+ // is a terminator. We need to do this before nodes are glued to
+ // the inlineasm_br node.
+ Chain = getControlRoot();
+ }
- // Second pass over the constraints: compute which constraint option to use
- // and assign registers to constraints that want a specific physreg.
+ // Second pass over the constraints: compute which constraint option to use.
for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) {
// If this is an output operand with a matching input operand, look up the
// matching input. If their types mismatch, e.g. one is an integer, the
@@ -7612,28 +8073,6 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
OpInfo.isIndirect = true;
}
- // If this constraint is for a specific register, allocate it before
- // anything else.
- SDISelAsmOperandInfo &RefOpInfo =
- OpInfo.isMatchingInputConstraint()
- ? ConstraintOperands[OpInfo.getMatchedOperand()]
- : OpInfo;
- if (RefOpInfo.ConstraintType == TargetLowering::C_Register)
- GetRegistersForValue(DAG, getCurSDLoc(), OpInfo, RefOpInfo);
- }
-
- // Third pass - Loop over all of the operands, assigning virtual or physregs
- // to register class operands.
- for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) {
- SDISelAsmOperandInfo &RefOpInfo =
- OpInfo.isMatchingInputConstraint()
- ? ConstraintOperands[OpInfo.getMatchedOperand()]
- : OpInfo;
-
- // C_Register operands have already been allocated, Other/Memory don't need
- // to be.
- if (RefOpInfo.ConstraintType == TargetLowering::C_RegisterClass)
- GetRegistersForValue(DAG, getCurSDLoc(), OpInfo, RefOpInfo);
}
// AsmNodeOperands - The operands for the ISD::INLINEASM node.
@@ -7653,21 +8092,21 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
AsmNodeOperands.push_back(DAG.getTargetConstant(
ExtraInfo.get(), getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout())));
- // Loop over all of the inputs, copying the operand values into the
- // appropriate registers and processing the output regs.
- RegsForValue RetValRegs;
-
- // IndirectStoresToEmit - The set of stores to emit after the inline asm node.
- std::vector<std::pair<RegsForValue, Value *>> IndirectStoresToEmit;
-
+ // Third pass: Loop over operands to prepare DAG-level operands.. As part of
+ // this, assign virtual and physical registers for inputs and otput.
for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) {
+ // Assign Registers.
+ SDISelAsmOperandInfo &RefOpInfo =
+ OpInfo.isMatchingInputConstraint()
+ ? ConstraintOperands[OpInfo.getMatchedOperand()]
+ : OpInfo;
+ GetRegistersForValue(DAG, getCurSDLoc(), OpInfo, RefOpInfo);
+
switch (OpInfo.Type) {
case InlineAsm::isOutput:
- if (OpInfo.ConstraintType != TargetLowering::C_RegisterClass &&
- OpInfo.ConstraintType != TargetLowering::C_Register) {
- // Memory output, or 'other' output (e.g. 'X' constraint).
- assert(OpInfo.isIndirect && "Memory output must be indirect operand");
-
+ if (OpInfo.ConstraintType == TargetLowering::C_Memory ||
+ (OpInfo.ConstraintType == TargetLowering::C_Other &&
+ OpInfo.isIndirect)) {
unsigned ConstraintID =
TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode);
assert(ConstraintID != InlineAsm::Constraint_Unknown &&
@@ -7680,38 +8119,27 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
MVT::i32));
AsmNodeOperands.push_back(OpInfo.CallOperand);
break;
- }
-
- // Otherwise, this is a register or register class output.
-
- // Copy the output from the appropriate register. Find a register that
- // we can use.
- if (OpInfo.AssignedRegs.Regs.empty()) {
- emitInlineAsmError(
- CS, "couldn't allocate output register for constraint '" +
- Twine(OpInfo.ConstraintCode) + "'");
- return;
- }
+ } else if ((OpInfo.ConstraintType == TargetLowering::C_Other &&
+ !OpInfo.isIndirect) ||
+ OpInfo.ConstraintType == TargetLowering::C_Register ||
+ OpInfo.ConstraintType == TargetLowering::C_RegisterClass) {
+ // Otherwise, this outputs to a register (directly for C_Register /
+ // C_RegisterClass, and a target-defined fashion for C_Other). Find a
+ // register that we can use.
+ if (OpInfo.AssignedRegs.Regs.empty()) {
+ emitInlineAsmError(
+ CS, "couldn't allocate output register for constraint '" +
+ Twine(OpInfo.ConstraintCode) + "'");
+ return;
+ }
- // If this is an indirect operand, store through the pointer after the
- // asm.
- if (OpInfo.isIndirect) {
- IndirectStoresToEmit.push_back(std::make_pair(OpInfo.AssignedRegs,
- OpInfo.CallOperandVal));
- } else {
- // This is the result value of the call.
- assert(!CS.getType()->isVoidTy() && "Bad inline asm!");
- // Concatenate this output onto the outputs list.
- RetValRegs.append(OpInfo.AssignedRegs);
+ // Add information to the INLINEASM node to know that this register is
+ // set.
+ OpInfo.AssignedRegs.AddInlineAsmOperands(
+ OpInfo.isEarlyClobber ? InlineAsm::Kind_RegDefEarlyClobber
+ : InlineAsm::Kind_RegDef,
+ false, 0, getCurSDLoc(), DAG, AsmNodeOperands);
}
-
- // Add information to the INLINEASM node to know that this register is
- // set.
- OpInfo.AssignedRegs
- .AddInlineAsmOperands(OpInfo.isEarlyClobber
- ? InlineAsm::Kind_RegDefEarlyClobber
- : InlineAsm::Kind_RegDef,
- false, 0, getCurSDLoc(), DAG, AsmNodeOperands);
break;
case InlineAsm::isInput: {
@@ -7865,98 +8293,117 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
if (Flag.getNode()) AsmNodeOperands.push_back(Flag);
- Chain = DAG.getNode(ISD::INLINEASM, getCurSDLoc(),
+ unsigned ISDOpc = IsCallBr ? ISD::INLINEASM_BR : ISD::INLINEASM;
+ Chain = DAG.getNode(ISDOpc, getCurSDLoc(),
DAG.getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
Flag = Chain.getValue(1);
- // If this asm returns a register value, copy the result from that register
- // and set it as the value of the call.
- if (!RetValRegs.Regs.empty()) {
- SDValue Val = RetValRegs.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(),
- Chain, &Flag, CS.getInstruction());
-
- llvm::Type *CSResultType = CS.getType();
- unsigned numRet;
- ArrayRef<Type *> ResultTypes;
- SmallVector<SDValue, 1> ResultValues(1);
- if (StructType *StructResult = dyn_cast<StructType>(CSResultType)) {
- numRet = StructResult->getNumElements();
- assert(Val->getNumOperands() == numRet &&
- "Mismatch in number of output operands in asm result");
- ResultTypes = StructResult->elements();
- ArrayRef<SDUse> ValueUses = Val->ops();
- ResultValues.resize(numRet);
- std::transform(ValueUses.begin(), ValueUses.end(), ResultValues.begin(),
- [](const SDUse &u) -> SDValue { return u.get(); });
- } else {
- numRet = 1;
- ResultValues[0] = Val;
- ResultTypes = makeArrayRef(CSResultType);
- }
- SmallVector<EVT, 1> ResultVTs(numRet);
- for (unsigned i = 0; i < numRet; i++) {
- EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), ResultTypes[i]);
- SDValue Val = ResultValues[i];
- assert(ResultTypes[i]->isSized() && "Unexpected unsized type");
- // If the type of the inline asm call site return value is different but
- // has same size as the type of the asm output bitcast it. One example
- // of this is for vectors with different width / number of elements.
- // This can happen for register classes that can contain multiple
- // different value types. The preg or vreg allocated may not have the
- // same VT as was expected.
- //
- // This can also happen for a return value that disagrees with the
- // register class it is put in, eg. a double in a general-purpose
- // register on a 32-bit machine.
- if (ResultVT != Val.getValueType() &&
- ResultVT.getSizeInBits() == Val.getValueSizeInBits())
- Val = DAG.getNode(ISD::BITCAST, getCurSDLoc(), ResultVT, Val);
- else if (ResultVT != Val.getValueType() && ResultVT.isInteger() &&
- Val.getValueType().isInteger()) {
- // If a result value was tied to an input value, the computed result
- // may have a wider width than the expected result. Extract the
- // relevant portion.
- Val = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), ResultVT, Val);
- }
+ // Do additional work to generate outputs.
- assert(ResultVT == Val.getValueType() && "Asm result value mismatch!");
- ResultVTs[i] = ResultVT;
- ResultValues[i] = Val;
- }
+ SmallVector<EVT, 1> ResultVTs;
+ SmallVector<SDValue, 1> ResultValues;
+ SmallVector<SDValue, 8> OutChains;
- Val = DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
- DAG.getVTList(ResultVTs), ResultValues);
- setValue(CS.getInstruction(), Val);
- // Don't need to use this as a chain in this case.
- if (!IA->hasSideEffects() && !hasMemory && IndirectStoresToEmit.empty())
- return;
- }
+ llvm::Type *CSResultType = CS.getType();
+ ArrayRef<Type *> ResultTypes;
+ if (StructType *StructResult = dyn_cast<StructType>(CSResultType))
+ ResultTypes = StructResult->elements();
+ else if (!CSResultType->isVoidTy())
+ ResultTypes = makeArrayRef(CSResultType);
+
+ auto CurResultType = ResultTypes.begin();
+ auto handleRegAssign = [&](SDValue V) {
+ assert(CurResultType != ResultTypes.end() && "Unexpected value");
+ assert((*CurResultType)->isSized() && "Unexpected unsized type");
+ EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), *CurResultType);
+ ++CurResultType;
+ // If the type of the inline asm call site return value is different but has
+ // same size as the type of the asm output bitcast it. One example of this
+ // is for vectors with different width / number of elements. This can
+ // happen for register classes that can contain multiple different value
+ // types. The preg or vreg allocated may not have the same VT as was
+ // expected.
+ //
+ // This can also happen for a return value that disagrees with the register
+ // class it is put in, eg. a double in a general-purpose register on a
+ // 32-bit machine.
+ if (ResultVT != V.getValueType() &&
+ ResultVT.getSizeInBits() == V.getValueSizeInBits())
+ V = DAG.getNode(ISD::BITCAST, getCurSDLoc(), ResultVT, V);
+ else if (ResultVT != V.getValueType() && ResultVT.isInteger() &&
+ V.getValueType().isInteger()) {
+ // If a result value was tied to an input value, the computed result
+ // may have a wider width than the expected result. Extract the
+ // relevant portion.
+ V = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), ResultVT, V);
+ }
+ assert(ResultVT == V.getValueType() && "Asm result value mismatch!");
+ ResultVTs.push_back(ResultVT);
+ ResultValues.push_back(V);
+ };
- std::vector<std::pair<SDValue, const Value *>> StoresToEmit;
+ // Deal with output operands.
+ for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) {
+ if (OpInfo.Type == InlineAsm::isOutput) {
+ SDValue Val;
+ // Skip trivial output operands.
+ if (OpInfo.AssignedRegs.Regs.empty())
+ continue;
- // Process indirect outputs, first output all of the flagged copies out of
- // physregs.
- for (unsigned i = 0, e = IndirectStoresToEmit.size(); i != e; ++i) {
- RegsForValue &OutRegs = IndirectStoresToEmit[i].first;
- const Value *Ptr = IndirectStoresToEmit[i].second;
- SDValue OutVal = OutRegs.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(),
- Chain, &Flag, IA);
- StoresToEmit.push_back(std::make_pair(OutVal, Ptr));
+ switch (OpInfo.ConstraintType) {
+ case TargetLowering::C_Register:
+ case TargetLowering::C_RegisterClass:
+ Val = OpInfo.AssignedRegs.getCopyFromRegs(
+ DAG, FuncInfo, getCurSDLoc(), Chain, &Flag, CS.getInstruction());
+ break;
+ case TargetLowering::C_Other:
+ Val = TLI.LowerAsmOutputForConstraint(Chain, Flag, getCurSDLoc(),
+ OpInfo, DAG);
+ break;
+ case TargetLowering::C_Memory:
+ break; // Already handled.
+ case TargetLowering::C_Unknown:
+ assert(false && "Unexpected unknown constraint");
+ }
+
+ // Indirect output manifest as stores. Record output chains.
+ if (OpInfo.isIndirect) {
+ const Value *Ptr = OpInfo.CallOperandVal;
+ assert(Ptr && "Expected value CallOperandVal for indirect asm operand");
+ SDValue Store = DAG.getStore(Chain, getCurSDLoc(), Val, getValue(Ptr),
+ MachinePointerInfo(Ptr));
+ OutChains.push_back(Store);
+ } else {
+ // generate CopyFromRegs to associated registers.
+ assert(!CS.getType()->isVoidTy() && "Bad inline asm!");
+ if (Val.getOpcode() == ISD::MERGE_VALUES) {
+ for (const SDValue &V : Val->op_values())
+ handleRegAssign(V);
+ } else
+ handleRegAssign(Val);
+ }
+ }
}
- // Emit the non-flagged stores from the physregs.
- SmallVector<SDValue, 8> OutChains;
- for (unsigned i = 0, e = StoresToEmit.size(); i != e; ++i) {
- SDValue Val = DAG.getStore(Chain, getCurSDLoc(), StoresToEmit[i].first,
- getValue(StoresToEmit[i].second),
- MachinePointerInfo(StoresToEmit[i].second));
- OutChains.push_back(Val);
+ // Set results.
+ if (!ResultValues.empty()) {
+ assert(CurResultType == ResultTypes.end() &&
+ "Mismatch in number of ResultTypes");
+ assert(ResultValues.size() == ResultTypes.size() &&
+ "Mismatch in number of output operands in asm result");
+
+ SDValue V = DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
+ DAG.getVTList(ResultVTs), ResultValues);
+ setValue(CS.getInstruction(), V);
}
+ // Collect store chains.
if (!OutChains.empty())
Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, OutChains);
- DAG.setRoot(Chain);
+ // Only Update Root if inline assembly has a memory effect.
+ if (ResultValues.empty() || HasSideEffect || !OutChains.empty() || IsCallBr)
+ DAG.setRoot(Chain);
}
void SelectionDAGBuilder::emitInlineAsmError(ImmutableCallSite CS,
@@ -7989,12 +8436,16 @@ void SelectionDAGBuilder::visitVAStart(const CallInst &I) {
void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
const DataLayout &DL = DAG.getDataLayout();
- SDValue V = DAG.getVAArg(TLI.getValueType(DAG.getDataLayout(), I.getType()),
- getCurSDLoc(), getRoot(), getValue(I.getOperand(0)),
- DAG.getSrcValue(I.getOperand(0)),
- DL.getABITypeAlignment(I.getType()));
- setValue(&I, V);
+ SDValue V = DAG.getVAArg(
+ TLI.getMemValueType(DAG.getDataLayout(), I.getType()), getCurSDLoc(),
+ getRoot(), getValue(I.getOperand(0)), DAG.getSrcValue(I.getOperand(0)),
+ DL.getABITypeAlignment(I.getType()));
DAG.setRoot(V.getValue(1));
+
+ if (I.getType()->isPointerTy())
+ V = DAG.getPtrExtOrTrunc(
+ V, getCurSDLoc(), TLI.getValueType(DAG.getDataLayout(), I.getType()));
+ setValue(&I, V);
}
void SelectionDAGBuilder::visitVAEnd(const CallInst &I) {
@@ -8021,7 +8472,7 @@ SDValue SelectionDAGBuilder::lowerRangeToAssertZExt(SelectionDAG &DAG,
return Op;
ConstantRange CR = getConstantRangeFromMetadata(*Range);
- if (CR.isFullSet() || CR.isEmptySet() || CR.isWrappedSet())
+ if (CR.isFullSet() || CR.isEmptySet() || CR.isUpperWrapped())
return Op;
APInt Lo = CR.getUnsignedMin();
@@ -8058,7 +8509,7 @@ SDValue SelectionDAGBuilder::lowerRangeToAssertZExt(SelectionDAG &DAG,
/// convention or require stack pointer adjustment. Only a subset of the
/// intrinsic's operands need to participate in the calling convention.
void SelectionDAGBuilder::populateCallLoweringInfo(
- TargetLowering::CallLoweringInfo &CLI, ImmutableCallSite CS,
+ TargetLowering::CallLoweringInfo &CLI, const CallBase *Call,
unsigned ArgIdx, unsigned NumArgs, SDValue Callee, Type *ReturnTy,
bool IsPatchPoint) {
TargetLowering::ArgListTy Args;
@@ -8068,21 +8519,21 @@ void SelectionDAGBuilder::populateCallLoweringInfo(
// Attributes for args start at offset 1, after the return attribute.
for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs;
ArgI != ArgE; ++ArgI) {
- const Value *V = CS->getOperand(ArgI);
+ const Value *V = Call->getOperand(ArgI);
assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic.");
TargetLowering::ArgListEntry Entry;
Entry.Node = getValue(V);
Entry.Ty = V->getType();
- Entry.setAttributes(&CS, ArgI);
+ Entry.setAttributes(Call, ArgI);
Args.push_back(Entry);
}
CLI.setDebugLoc(getCurSDLoc())
.setChain(getRoot())
- .setCallee(CS.getCallingConv(), ReturnTy, Callee, std::move(Args))
- .setDiscardResult(CS->use_empty())
+ .setCallee(Call->getCallingConv(), ReturnTy, Callee, std::move(Args))
+ .setDiscardResult(Call->use_empty())
.setIsPatchPoint(IsPatchPoint);
}
@@ -8093,7 +8544,7 @@ void SelectionDAGBuilder::populateCallLoweringInfo(
/// avoid constant materialization and register allocation.
///
/// FrameIndex operands are converted to TargetFrameIndex so that ISEL does not
-/// generate addess computation nodes, and so ExpandISelPseudo can convert the
+/// generate addess computation nodes, and so FinalizeISel can convert the
/// TargetFrameIndex into a DirectMemRefOp StackMap location. This avoids
/// address materialization and register allocation, but may also be required
/// for correctness. If a StackMap (or PatchPoint) intrinsic directly uses an
@@ -8226,8 +8677,8 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS,
IsAnyRegCC ? Type::getVoidTy(*DAG.getContext()) : CS->getType();
TargetLowering::CallLoweringInfo CLI(DAG);
- populateCallLoweringInfo(CLI, CS, NumMetaOpers, NumCallArgs, Callee, ReturnTy,
- true);
+ populateCallLoweringInfo(CLI, cast<CallBase>(CS.getInstruction()),
+ NumMetaOpers, NumCallArgs, Callee, ReturnTy, true);
std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB);
SDNode *CallEnd = Result.second.getNode();
@@ -8351,15 +8802,17 @@ void SelectionDAGBuilder::visitVectorReduce(const CallInst &I,
FMF = I.getFastMathFlags();
switch (Intrinsic) {
- case Intrinsic::experimental_vector_reduce_fadd:
- if (FMF.isFast())
- Res = DAG.getNode(ISD::VECREDUCE_FADD, dl, VT, Op2);
+ case Intrinsic::experimental_vector_reduce_v2_fadd:
+ if (FMF.allowReassoc())
+ Res = DAG.getNode(ISD::FADD, dl, VT, Op1,
+ DAG.getNode(ISD::VECREDUCE_FADD, dl, VT, Op2));
else
Res = DAG.getNode(ISD::VECREDUCE_STRICT_FADD, dl, VT, Op1, Op2);
break;
- case Intrinsic::experimental_vector_reduce_fmul:
- if (FMF.isFast())
- Res = DAG.getNode(ISD::VECREDUCE_FMUL, dl, VT, Op2);
+ case Intrinsic::experimental_vector_reduce_v2_fmul:
+ if (FMF.allowReassoc())
+ Res = DAG.getNode(ISD::FMUL, dl, VT, Op1,
+ DAG.getNode(ISD::VECREDUCE_FMUL, dl, VT, Op2));
else
Res = DAG.getNode(ISD::VECREDUCE_STRICT_FMUL, dl, VT, Op1, Op2);
break;
@@ -8433,8 +8886,11 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
if (CLI.IsPostTypeLegalization) {
// If we are lowering a libcall after legalization, split the return type.
- SmallVector<EVT, 4> OldRetTys = std::move(RetTys);
- SmallVector<uint64_t, 4> OldOffsets = std::move(Offsets);
+ SmallVector<EVT, 4> OldRetTys;
+ SmallVector<uint64_t, 4> OldOffsets;
+ RetTys.swap(OldRetTys);
+ Offsets.swap(OldOffsets);
+
for (size_t i = 0, e = OldRetTys.size(); i != e; ++i) {
EVT RetVT = OldRetTys[i];
uint64_t Offset = OldOffsets[i];
@@ -8489,7 +8945,15 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
// points into the callers stack frame.
CLI.IsTailCall = false;
} else {
+ bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters(
+ CLI.RetTy, CLI.CallConv, CLI.IsVarArg);
for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
+ ISD::ArgFlagsTy Flags;
+ if (NeedsRegBlock) {
+ Flags.setInConsecutiveRegs();
+ if (I == RetTys.size() - 1)
+ Flags.setInConsecutiveRegsLast();
+ }
EVT VT = RetTys[I];
MVT RegisterVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(),
CLI.CallConv, VT);
@@ -8497,9 +8961,15 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
CLI.CallConv, VT);
for (unsigned i = 0; i != NumRegs; ++i) {
ISD::InputArg MyFlags;
+ MyFlags.Flags = Flags;
MyFlags.VT = RegisterVT;
MyFlags.ArgVT = VT;
MyFlags.Used = CLI.IsReturnValueUsed;
+ if (CLI.RetTy->isPointerTy()) {
+ MyFlags.Flags.setPointer();
+ MyFlags.Flags.setPointerAddrSpace(
+ cast<PointerType>(CLI.RetTy)->getAddressSpace());
+ }
if (CLI.RetSExt)
MyFlags.Flags.setSExt();
if (CLI.RetZExt)
@@ -8550,6 +9020,11 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
// specify the alignment it wants.
unsigned OriginalAlignment = getABIAlignmentForCallingConv(ArgTy, DL);
+ if (Args[i].Ty->isPointerTy()) {
+ Flags.setPointer();
+ Flags.setPointerAddrSpace(
+ cast<PointerType>(Args[i].Ty)->getAddressSpace());
+ }
if (Args[i].IsZExt)
Flags.setZExt();
if (Args[i].IsSExt)
@@ -8587,8 +9062,11 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
if (Args[i].IsByVal || Args[i].IsInAlloca) {
PointerType *Ty = cast<PointerType>(Args[i].Ty);
Type *ElementTy = Ty->getElementType();
- Flags.setByValSize(DL.getTypeAllocSize(ElementTy));
- // For ByVal, alignment should come from FE. BE will guess if this
+
+ unsigned FrameSize = DL.getTypeAllocSize(
+ Args[i].ByValType ? Args[i].ByValType : ElementTy);
+ Flags.setByValSize(FrameSize);
+
// info is not there but there are cases it cannot get right.
unsigned FrameAlign;
if (Args[i].Alignment)
@@ -8619,8 +9097,11 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
// for now.
if (Args[i].IsReturned && !Op.getValueType().isVector() &&
CanLowerReturn) {
- assert(CLI.RetTy == Args[i].Ty && RetTys.size() == NumValues &&
- "unexpected use of 'returned'");
+ assert((CLI.RetTy == Args[i].Ty ||
+ (CLI.RetTy->isPointerTy() && Args[i].Ty->isPointerTy() &&
+ CLI.RetTy->getPointerAddressSpace() ==
+ Args[i].Ty->getPointerAddressSpace())) &&
+ RetTys.size() == NumValues && "unexpected use of 'returned'");
// Before passing 'returned' to the target lowering code, ensure that
// either the register MVT and the actual EVT are the same size or that
// the return value and argument are extended in the same way; in these
@@ -9023,7 +9504,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
unsigned PartBase = 0;
Type *FinalType = Arg.getType();
if (Arg.hasAttribute(Attribute::ByVal))
- FinalType = cast<PointerType>(FinalType)->getElementType();
+ FinalType = Arg.getParamByValType();
bool NeedsRegBlock = TLI->functionArgumentNeedsConsecutiveRegisters(
FinalType, F.getCallingConv(), F.isVarArg());
for (unsigned Value = 0, NumValues = ValueVTs.size();
@@ -9038,6 +9519,11 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
unsigned OriginalAlignment =
TLI->getABIAlignmentForCallingConv(ArgTy, DL);
+ if (Arg.getType()->isPointerTy()) {
+ Flags.setPointer();
+ Flags.setPointerAddrSpace(
+ cast<PointerType>(Arg.getType())->getAddressSpace());
+ }
if (Arg.hasAttribute(Attribute::ZExt))
Flags.setZExt();
if (Arg.hasAttribute(Attribute::SExt))
@@ -9078,11 +9564,14 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
Flags.setByVal();
}
if (Flags.isByVal() || Flags.isInAlloca()) {
- PointerType *Ty = cast<PointerType>(Arg.getType());
- Type *ElementTy = Ty->getElementType();
- Flags.setByValSize(DL.getTypeAllocSize(ElementTy));
- // For ByVal, alignment should be passed from FE. BE will guess if
- // this info is not there but there are cases it cannot get right.
+ Type *ElementTy = Arg.getParamByValType();
+
+ // For ByVal, size and alignment should be passed from FE. BE will
+ // guess if this info is not there but there are cases it cannot get
+ // right.
+ unsigned FrameSize = DL.getTypeAllocSize(Arg.getParamByValType());
+ Flags.setByValSize(FrameSize);
+
unsigned FrameAlign;
if (Arg.getParamAlignment())
FrameAlign = Arg.getParamAlignment();
@@ -9263,17 +9752,16 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
if (Res.getOpcode() == ISD::CopyFromReg && isSwiftErrorArg) {
unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg();
if (TargetRegisterInfo::isVirtualRegister(Reg))
- FuncInfo->setCurrentSwiftErrorVReg(FuncInfo->MBB,
- FuncInfo->SwiftErrorArg, Reg);
+ SwiftError->setCurrentVReg(FuncInfo->MBB, SwiftError->getFunctionArg(),
+ Reg);
}
// If this argument is live outside of the entry block, insert a copy from
// wherever we got it to the vreg that other BB's will reference it as.
- if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::CopyFromReg) {
+ if (Res.getOpcode() == ISD::CopyFromReg) {
// If we can, though, try to skip creating an unnecessary vreg.
// FIXME: This isn't very clean... it would be nice to make this more
- // general. It's also subtly incompatible with the hacks FastISel
- // uses with vregs.
+ // general.
unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg();
if (TargetRegisterInfo::isVirtualRegister(Reg)) {
FuncInfo->ValueMap[&Arg] = Reg;
@@ -9354,7 +9842,7 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
if (const Constant *C = dyn_cast<Constant>(PHIOp)) {
unsigned &RegOut = ConstantsOut[C];
if (RegOut == 0) {
- RegOut = FuncInfo.CreateRegs(C->getType());
+ RegOut = FuncInfo.CreateRegs(C);
CopyValueToVirtualRegister(C, RegOut);
}
Reg = RegOut;
@@ -9367,7 +9855,7 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
assert(isa<AllocaInst>(PHIOp) &&
FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) &&
"Didn't codegen value into a register!??");
- Reg = FuncInfo.CreateRegs(PHIOp->getType());
+ Reg = FuncInfo.CreateRegs(PHIOp);
CopyValueToVirtualRegister(PHIOp, Reg);
}
}
@@ -9432,450 +9920,6 @@ void SelectionDAGBuilder::updateDAGForMaybeTailCall(SDValue MaybeTC) {
HasTailCall = true;
}
-uint64_t
-SelectionDAGBuilder::getJumpTableRange(const CaseClusterVector &Clusters,
- unsigned First, unsigned Last) const {
- assert(Last >= First);
- const APInt &LowCase = Clusters[First].Low->getValue();
- const APInt &HighCase = Clusters[Last].High->getValue();
- assert(LowCase.getBitWidth() == HighCase.getBitWidth());
-
- // FIXME: A range of consecutive cases has 100% density, but only requires one
- // comparison to lower. We should discriminate against such consecutive ranges
- // in jump tables.
-
- return (HighCase - LowCase).getLimitedValue((UINT64_MAX - 1) / 100) + 1;
-}
-
-uint64_t SelectionDAGBuilder::getJumpTableNumCases(
- const SmallVectorImpl<unsigned> &TotalCases, unsigned First,
- unsigned Last) const {
- assert(Last >= First);
- assert(TotalCases[Last] >= TotalCases[First]);
- uint64_t NumCases =
- TotalCases[Last] - (First == 0 ? 0 : TotalCases[First - 1]);
- return NumCases;
-}
-
-bool SelectionDAGBuilder::buildJumpTable(const CaseClusterVector &Clusters,
- unsigned First, unsigned Last,
- const SwitchInst *SI,
- MachineBasicBlock *DefaultMBB,
- CaseCluster &JTCluster) {
- assert(First <= Last);
-
- auto Prob = BranchProbability::getZero();
- unsigned NumCmps = 0;
- std::vector<MachineBasicBlock*> Table;
- DenseMap<MachineBasicBlock*, BranchProbability> JTProbs;
-
- // Initialize probabilities in JTProbs.
- for (unsigned I = First; I <= Last; ++I)
- JTProbs[Clusters[I].MBB] = BranchProbability::getZero();
-
- for (unsigned I = First; I <= Last; ++I) {
- assert(Clusters[I].Kind == CC_Range);
- Prob += Clusters[I].Prob;
- const APInt &Low = Clusters[I].Low->getValue();
- const APInt &High = Clusters[I].High->getValue();
- NumCmps += (Low == High) ? 1 : 2;
- if (I != First) {
- // Fill the gap between this and the previous cluster.
- const APInt &PreviousHigh = Clusters[I - 1].High->getValue();
- assert(PreviousHigh.slt(Low));
- uint64_t Gap = (Low - PreviousHigh).getLimitedValue() - 1;
- for (uint64_t J = 0; J < Gap; J++)
- Table.push_back(DefaultMBB);
- }
- uint64_t ClusterSize = (High - Low).getLimitedValue() + 1;
- for (uint64_t J = 0; J < ClusterSize; ++J)
- Table.push_back(Clusters[I].MBB);
- JTProbs[Clusters[I].MBB] += Clusters[I].Prob;
- }
-
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- unsigned NumDests = JTProbs.size();
- if (TLI.isSuitableForBitTests(
- NumDests, NumCmps, Clusters[First].Low->getValue(),
- Clusters[Last].High->getValue(), DAG.getDataLayout())) {
- // Clusters[First..Last] should be lowered as bit tests instead.
- return false;
- }
-
- // Create the MBB that will load from and jump through the table.
- // Note: We create it here, but it's not inserted into the function yet.
- MachineFunction *CurMF = FuncInfo.MF;
- MachineBasicBlock *JumpTableMBB =
- CurMF->CreateMachineBasicBlock(SI->getParent());
-
- // Add successors. Note: use table order for determinism.
- SmallPtrSet<MachineBasicBlock *, 8> Done;
- for (MachineBasicBlock *Succ : Table) {
- if (Done.count(Succ))
- continue;
- addSuccessorWithProb(JumpTableMBB, Succ, JTProbs[Succ]);
- Done.insert(Succ);
- }
- JumpTableMBB->normalizeSuccProbs();
-
- unsigned JTI = CurMF->getOrCreateJumpTableInfo(TLI.getJumpTableEncoding())
- ->createJumpTableIndex(Table);
-
- // Set up the jump table info.
- JumpTable JT(-1U, JTI, JumpTableMBB, nullptr);
- JumpTableHeader JTH(Clusters[First].Low->getValue(),
- Clusters[Last].High->getValue(), SI->getCondition(),
- nullptr, false);
- JTCases.emplace_back(std::move(JTH), std::move(JT));
-
- JTCluster = CaseCluster::jumpTable(Clusters[First].Low, Clusters[Last].High,
- JTCases.size() - 1, Prob);
- return true;
-}
-
-void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters,
- const SwitchInst *SI,
- MachineBasicBlock *DefaultMBB) {
-#ifndef NDEBUG
- // Clusters must be non-empty, sorted, and only contain Range clusters.
- assert(!Clusters.empty());
- for (CaseCluster &C : Clusters)
- assert(C.Kind == CC_Range);
- for (unsigned i = 1, e = Clusters.size(); i < e; ++i)
- assert(Clusters[i - 1].High->getValue().slt(Clusters[i].Low->getValue()));
-#endif
-
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (!TLI.areJTsAllowed(SI->getParent()->getParent()))
- return;
-
- const int64_t N = Clusters.size();
- const unsigned MinJumpTableEntries = TLI.getMinimumJumpTableEntries();
- const unsigned SmallNumberOfEntries = MinJumpTableEntries / 2;
-
- if (N < 2 || N < MinJumpTableEntries)
- return;
-
- // TotalCases[i]: Total nbr of cases in Clusters[0..i].
- SmallVector<unsigned, 8> TotalCases(N);
- for (unsigned i = 0; i < N; ++i) {
- const APInt &Hi = Clusters[i].High->getValue();
- const APInt &Lo = Clusters[i].Low->getValue();
- TotalCases[i] = (Hi - Lo).getLimitedValue() + 1;
- if (i != 0)
- TotalCases[i] += TotalCases[i - 1];
- }
-
- // Cheap case: the whole range may be suitable for jump table.
- uint64_t Range = getJumpTableRange(Clusters,0, N - 1);
- uint64_t NumCases = getJumpTableNumCases(TotalCases, 0, N - 1);
- assert(NumCases < UINT64_MAX / 100);
- assert(Range >= NumCases);
- if (TLI.isSuitableForJumpTable(SI, NumCases, Range)) {
- CaseCluster JTCluster;
- if (buildJumpTable(Clusters, 0, N - 1, SI, DefaultMBB, JTCluster)) {
- Clusters[0] = JTCluster;
- Clusters.resize(1);
- return;
- }
- }
-
- // The algorithm below is not suitable for -O0.
- if (TM.getOptLevel() == CodeGenOpt::None)
- return;
-
- // Split Clusters into minimum number of dense partitions. The algorithm uses
- // the same idea as Kannan & Proebsting "Correction to 'Producing Good Code
- // for the Case Statement'" (1994), but builds the MinPartitions array in
- // reverse order to make it easier to reconstruct the partitions in ascending
- // order. In the choice between two optimal partitionings, it picks the one
- // which yields more jump tables.
-
- // MinPartitions[i] is the minimum nbr of partitions of Clusters[i..N-1].
- SmallVector<unsigned, 8> MinPartitions(N);
- // LastElement[i] is the last element of the partition starting at i.
- SmallVector<unsigned, 8> LastElement(N);
- // PartitionsScore[i] is used to break ties when choosing between two
- // partitionings resulting in the same number of partitions.
- SmallVector<unsigned, 8> PartitionsScore(N);
- // For PartitionsScore, a small number of comparisons is considered as good as
- // a jump table and a single comparison is considered better than a jump
- // table.
- enum PartitionScores : unsigned {
- NoTable = 0,
- Table = 1,
- FewCases = 1,
- SingleCase = 2
- };
-
- // Base case: There is only one way to partition Clusters[N-1].
- MinPartitions[N - 1] = 1;
- LastElement[N - 1] = N - 1;
- PartitionsScore[N - 1] = PartitionScores::SingleCase;
-
- // Note: loop indexes are signed to avoid underflow.
- for (int64_t i = N - 2; i >= 0; i--) {
- // Find optimal partitioning of Clusters[i..N-1].
- // Baseline: Put Clusters[i] into a partition on its own.
- MinPartitions[i] = MinPartitions[i + 1] + 1;
- LastElement[i] = i;
- PartitionsScore[i] = PartitionsScore[i + 1] + PartitionScores::SingleCase;
-
- // Search for a solution that results in fewer partitions.
- for (int64_t j = N - 1; j > i; j--) {
- // Try building a partition from Clusters[i..j].
- uint64_t Range = getJumpTableRange(Clusters, i, j);
- uint64_t NumCases = getJumpTableNumCases(TotalCases, i, j);
- assert(NumCases < UINT64_MAX / 100);
- assert(Range >= NumCases);
- if (TLI.isSuitableForJumpTable(SI, NumCases, Range)) {
- unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]);
- unsigned Score = j == N - 1 ? 0 : PartitionsScore[j + 1];
- int64_t NumEntries = j - i + 1;
-
- if (NumEntries == 1)
- Score += PartitionScores::SingleCase;
- else if (NumEntries <= SmallNumberOfEntries)
- Score += PartitionScores::FewCases;
- else if (NumEntries >= MinJumpTableEntries)
- Score += PartitionScores::Table;
-
- // If this leads to fewer partitions, or to the same number of
- // partitions with better score, it is a better partitioning.
- if (NumPartitions < MinPartitions[i] ||
- (NumPartitions == MinPartitions[i] && Score > PartitionsScore[i])) {
- MinPartitions[i] = NumPartitions;
- LastElement[i] = j;
- PartitionsScore[i] = Score;
- }
- }
- }
- }
-
- // Iterate over the partitions, replacing some with jump tables in-place.
- unsigned DstIndex = 0;
- for (unsigned First = 0, Last; First < N; First = Last + 1) {
- Last = LastElement[First];
- assert(Last >= First);
- assert(DstIndex <= First);
- unsigned NumClusters = Last - First + 1;
-
- CaseCluster JTCluster;
- if (NumClusters >= MinJumpTableEntries &&
- buildJumpTable(Clusters, First, Last, SI, DefaultMBB, JTCluster)) {
- Clusters[DstIndex++] = JTCluster;
- } else {
- for (unsigned I = First; I <= Last; ++I)
- std::memmove(&Clusters[DstIndex++], &Clusters[I], sizeof(Clusters[I]));
- }
- }
- Clusters.resize(DstIndex);
-}
-
-bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters,
- unsigned First, unsigned Last,
- const SwitchInst *SI,
- CaseCluster &BTCluster) {
- assert(First <= Last);
- if (First == Last)
- return false;
-
- BitVector Dests(FuncInfo.MF->getNumBlockIDs());
- unsigned NumCmps = 0;
- for (int64_t I = First; I <= Last; ++I) {
- assert(Clusters[I].Kind == CC_Range);
- Dests.set(Clusters[I].MBB->getNumber());
- NumCmps += (Clusters[I].Low == Clusters[I].High) ? 1 : 2;
- }
- unsigned NumDests = Dests.count();
-
- APInt Low = Clusters[First].Low->getValue();
- APInt High = Clusters[Last].High->getValue();
- assert(Low.slt(High));
-
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- const DataLayout &DL = DAG.getDataLayout();
- if (!TLI.isSuitableForBitTests(NumDests, NumCmps, Low, High, DL))
- return false;
-
- APInt LowBound;
- APInt CmpRange;
-
- const int BitWidth = TLI.getPointerTy(DL).getSizeInBits();
- assert(TLI.rangeFitsInWord(Low, High, DL) &&
- "Case range must fit in bit mask!");
-
- // Check if the clusters cover a contiguous range such that no value in the
- // range will jump to the default statement.
- bool ContiguousRange = true;
- for (int64_t I = First + 1; I <= Last; ++I) {
- if (Clusters[I].Low->getValue() != Clusters[I - 1].High->getValue() + 1) {
- ContiguousRange = false;
- break;
- }
- }
-
- if (Low.isStrictlyPositive() && High.slt(BitWidth)) {
- // Optimize the case where all the case values fit in a word without having
- // to subtract minValue. In this case, we can optimize away the subtraction.
- LowBound = APInt::getNullValue(Low.getBitWidth());
- CmpRange = High;
- ContiguousRange = false;
- } else {
- LowBound = Low;
- CmpRange = High - Low;
- }
-
- CaseBitsVector CBV;
- auto TotalProb = BranchProbability::getZero();
- for (unsigned i = First; i <= Last; ++i) {
- // Find the CaseBits for this destination.
- unsigned j;
- for (j = 0; j < CBV.size(); ++j)
- if (CBV[j].BB == Clusters[i].MBB)
- break;
- if (j == CBV.size())
- CBV.push_back(
- CaseBits(0, Clusters[i].MBB, 0, BranchProbability::getZero()));
- CaseBits *CB = &CBV[j];
-
- // Update Mask, Bits and ExtraProb.
- uint64_t Lo = (Clusters[i].Low->getValue() - LowBound).getZExtValue();
- uint64_t Hi = (Clusters[i].High->getValue() - LowBound).getZExtValue();
- assert(Hi >= Lo && Hi < 64 && "Invalid bit case!");
- CB->Mask |= (-1ULL >> (63 - (Hi - Lo))) << Lo;
- CB->Bits += Hi - Lo + 1;
- CB->ExtraProb += Clusters[i].Prob;
- TotalProb += Clusters[i].Prob;
- }
-
- BitTestInfo BTI;
- llvm::sort(CBV, [](const CaseBits &a, const CaseBits &b) {
- // Sort by probability first, number of bits second, bit mask third.
- if (a.ExtraProb != b.ExtraProb)
- return a.ExtraProb > b.ExtraProb;
- if (a.Bits != b.Bits)
- return a.Bits > b.Bits;
- return a.Mask < b.Mask;
- });
-
- for (auto &CB : CBV) {
- MachineBasicBlock *BitTestBB =
- FuncInfo.MF->CreateMachineBasicBlock(SI->getParent());
- BTI.push_back(BitTestCase(CB.Mask, BitTestBB, CB.BB, CB.ExtraProb));
- }
- BitTestCases.emplace_back(std::move(LowBound), std::move(CmpRange),
- SI->getCondition(), -1U, MVT::Other, false,
- ContiguousRange, nullptr, nullptr, std::move(BTI),
- TotalProb);
-
- BTCluster = CaseCluster::bitTests(Clusters[First].Low, Clusters[Last].High,
- BitTestCases.size() - 1, TotalProb);
- return true;
-}
-
-void SelectionDAGBuilder::findBitTestClusters(CaseClusterVector &Clusters,
- const SwitchInst *SI) {
-// Partition Clusters into as few subsets as possible, where each subset has a
-// range that fits in a machine word and has <= 3 unique destinations.
-
-#ifndef NDEBUG
- // Clusters must be sorted and contain Range or JumpTable clusters.
- assert(!Clusters.empty());
- assert(Clusters[0].Kind == CC_Range || Clusters[0].Kind == CC_JumpTable);
- for (const CaseCluster &C : Clusters)
- assert(C.Kind == CC_Range || C.Kind == CC_JumpTable);
- for (unsigned i = 1; i < Clusters.size(); ++i)
- assert(Clusters[i-1].High->getValue().slt(Clusters[i].Low->getValue()));
-#endif
-
- // The algorithm below is not suitable for -O0.
- if (TM.getOptLevel() == CodeGenOpt::None)
- return;
-
- // If target does not have legal shift left, do not emit bit tests at all.
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- const DataLayout &DL = DAG.getDataLayout();
-
- EVT PTy = TLI.getPointerTy(DL);
- if (!TLI.isOperationLegal(ISD::SHL, PTy))
- return;
-
- int BitWidth = PTy.getSizeInBits();
- const int64_t N = Clusters.size();
-
- // MinPartitions[i] is the minimum nbr of partitions of Clusters[i..N-1].
- SmallVector<unsigned, 8> MinPartitions(N);
- // LastElement[i] is the last element of the partition starting at i.
- SmallVector<unsigned, 8> LastElement(N);
-
- // FIXME: This might not be the best algorithm for finding bit test clusters.
-
- // Base case: There is only one way to partition Clusters[N-1].
- MinPartitions[N - 1] = 1;
- LastElement[N - 1] = N - 1;
-
- // Note: loop indexes are signed to avoid underflow.
- for (int64_t i = N - 2; i >= 0; --i) {
- // Find optimal partitioning of Clusters[i..N-1].
- // Baseline: Put Clusters[i] into a partition on its own.
- MinPartitions[i] = MinPartitions[i + 1] + 1;
- LastElement[i] = i;
-
- // Search for a solution that results in fewer partitions.
- // Note: the search is limited by BitWidth, reducing time complexity.
- for (int64_t j = std::min(N - 1, i + BitWidth - 1); j > i; --j) {
- // Try building a partition from Clusters[i..j].
-
- // Check the range.
- if (!TLI.rangeFitsInWord(Clusters[i].Low->getValue(),
- Clusters[j].High->getValue(), DL))
- continue;
-
- // Check nbr of destinations and cluster types.
- // FIXME: This works, but doesn't seem very efficient.
- bool RangesOnly = true;
- BitVector Dests(FuncInfo.MF->getNumBlockIDs());
- for (int64_t k = i; k <= j; k++) {
- if (Clusters[k].Kind != CC_Range) {
- RangesOnly = false;
- break;
- }
- Dests.set(Clusters[k].MBB->getNumber());
- }
- if (!RangesOnly || Dests.count() > 3)
- break;
-
- // Check if it's a better partition.
- unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]);
- if (NumPartitions < MinPartitions[i]) {
- // Found a better partition.
- MinPartitions[i] = NumPartitions;
- LastElement[i] = j;
- }
- }
- }
-
- // Iterate over the partitions, replacing with bit-test clusters in-place.
- unsigned DstIndex = 0;
- for (unsigned First = 0, Last; First < N; First = Last + 1) {
- Last = LastElement[First];
- assert(First <= Last);
- assert(DstIndex <= First);
-
- CaseCluster BitTestCluster;
- if (buildBitTests(Clusters, First, Last, SI, BitTestCluster)) {
- Clusters[DstIndex++] = BitTestCluster;
- } else {
- size_t NumClusters = Last - First + 1;
- std::memmove(&Clusters[DstIndex], &Clusters[First],
- sizeof(Clusters[0]) * NumClusters);
- DstIndex += NumClusters;
- }
- }
- Clusters.resize(DstIndex);
-}
-
void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
MachineBasicBlock *SwitchMBB,
MachineBasicBlock *DefaultMBB) {
@@ -9977,10 +10021,13 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
MachineBasicBlock *CurMBB = W.MBB;
for (CaseClusterIt I = W.FirstCluster, E = W.LastCluster; I <= E; ++I) {
+ bool FallthroughUnreachable = false;
MachineBasicBlock *Fallthrough;
if (I == W.LastCluster) {
// For the last cluster, fall through to the default destination.
Fallthrough = DefaultMBB;
+ FallthroughUnreachable = isa<UnreachableInst>(
+ DefaultMBB->getBasicBlock()->getFirstNonPHIOrDbg());
} else {
Fallthrough = CurMF->CreateMachineBasicBlock(CurMBB->getBasicBlock());
CurMF->insert(BBI, Fallthrough);
@@ -9992,8 +10039,8 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
switch (I->Kind) {
case CC_JumpTable: {
// FIXME: Optimize away range check based on pivot comparisons.
- JumpTableHeader *JTH = &JTCases[I->JTCasesIndex].first;
- JumpTable *JT = &JTCases[I->JTCasesIndex].second;
+ JumpTableHeader *JTH = &SL->JTCases[I->JTCasesIndex].first;
+ SwitchCG::JumpTable *JT = &SL->JTCases[I->JTCasesIndex].second;
// The jump block hasn't been inserted yet; insert it here.
MachineBasicBlock *JumpMBB = JT->MBB;
@@ -10017,7 +10064,13 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
}
}
- addSuccessorWithProb(CurMBB, Fallthrough, FallthroughProb);
+ if (FallthroughUnreachable) {
+ // Skip the range check if the fallthrough block is unreachable.
+ JTH->OmitRangeCheck = true;
+ }
+
+ if (!JTH->OmitRangeCheck)
+ addSuccessorWithProb(CurMBB, Fallthrough, FallthroughProb);
addSuccessorWithProb(CurMBB, JumpMBB, JumpProb);
CurMBB->normalizeSuccProbs();
@@ -10034,8 +10087,10 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
break;
}
case CC_BitTests: {
+ // FIXME: If Fallthrough is unreachable, skip the range check.
+
// FIXME: Optimize away range check based on pivot comparisons.
- BitTestBlock *BTB = &BitTestCases[I->BTCasesIndex];
+ BitTestBlock *BTB = &SL->BitTestCases[I->BTCasesIndex];
// The bit test blocks haven't been inserted yet; insert them here.
for (BitTestCase &BTC : BTB->Cases)
@@ -10078,6 +10133,10 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
RHS = I->High;
}
+ // If Fallthrough is unreachable, fold away the comparison.
+ if (FallthroughUnreachable)
+ CC = ISD::SETTRUE;
+
// The false probability is the sum of all unhandled cases.
CaseBlock CB(CC, LHS, RHS, MHS, I->MBB, Fallthrough, CurMBB,
getCurSDLoc(), I->Prob, UnhandledProbs);
@@ -10085,7 +10144,7 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
if (CurMBB == SwitchMBB)
visitSwitchCase(CB, SwitchMBB);
else
- SwitchCases.push_back(CB);
+ SL->SwitchCases.push_back(CB);
break;
}
@@ -10236,7 +10295,7 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList,
if (W.MBB == SwitchMBB)
visitSwitchCase(CB, SwitchMBB);
else
- SwitchCases.push_back(CB);
+ SL->SwitchCases.push_back(CB);
}
// Scale CaseProb after peeling a case with the probablity of PeeledCaseProb
@@ -10265,7 +10324,7 @@ MachineBasicBlock *SelectionDAGBuilder::peelDominantCaseCluster(
// Don't perform if there is only one cluster or optimizing for size.
if (SwitchPeelThreshold > 100 || !FuncInfo.BPI || Clusters.size() < 2 ||
TM.getOptLevel() == CodeGenOpt::None ||
- SwitchMBB->getParent()->getFunction().optForMinSize())
+ SwitchMBB->getParent()->getFunction().hasMinSize())
return SwitchMBB;
BranchProbability TopCaseProb = BranchProbability(SwitchPeelThreshold, 100);
@@ -10331,38 +10390,6 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
// if there are many clusters.
sortAndRangeify(Clusters);
- if (TM.getOptLevel() != CodeGenOpt::None) {
- // Replace an unreachable default with the most popular destination.
- // FIXME: Exploit unreachable default more aggressively.
- bool UnreachableDefault =
- isa<UnreachableInst>(SI.getDefaultDest()->getFirstNonPHIOrDbg());
- if (UnreachableDefault && !Clusters.empty()) {
- DenseMap<const BasicBlock *, unsigned> Popularity;
- unsigned MaxPop = 0;
- const BasicBlock *MaxBB = nullptr;
- for (auto I : SI.cases()) {
- const BasicBlock *BB = I.getCaseSuccessor();
- if (++Popularity[BB] > MaxPop) {
- MaxPop = Popularity[BB];
- MaxBB = BB;
- }
- }
- // Set new default.
- assert(MaxPop > 0 && MaxBB);
- DefaultMBB = FuncInfo.MBBMap[MaxBB];
-
- // Remove cases that were pointing to the destination that is now the
- // default.
- CaseClusterVector New;
- New.reserve(Clusters.size());
- for (CaseCluster &CC : Clusters) {
- if (CC.MBB != DefaultMBB)
- New.push_back(CC);
- }
- Clusters = std::move(New);
- }
- }
-
// The branch probablity of the peeled case.
BranchProbability PeeledCaseProb = BranchProbability::getZero();
MachineBasicBlock *PeeledSwitchMBB =
@@ -10380,8 +10407,8 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
return;
}
- findJumpTables(Clusters, &SI, DefaultMBB);
- findBitTestClusters(Clusters, &SI);
+ SL->findJumpTables(Clusters, &SI, DefaultMBB);
+ SL->findBitTestClusters(Clusters, &SI);
LLVM_DEBUG({
dbgs() << "Case clusters: ";
@@ -10420,7 +10447,7 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
unsigned NumClusters = W.LastCluster - W.FirstCluster + 1;
if (NumClusters > 3 && TM.getOptLevel() != CodeGenOpt::None &&
- !DefaultMBB->getParent()->getFunction().optForMinSize()) {
+ !DefaultMBB->getParent()->getFunction().hasMinSize()) {
// For optimized builds, lower large range as a balanced binary tree.
splitWorkItem(WorkList, W, SI.getCondition(), SwitchMBB);
continue;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 5f9cdb69daf7..0072e33f23b7 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -1,9 +1,8 @@
//===- SelectionDAGBuilder.h - Selection-DAG building -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -18,11 +17,13 @@
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/SwitchLoweringUtils.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/CallSite.h"
@@ -47,6 +48,7 @@ class AtomicRMWInst;
class BasicBlock;
class BranchInst;
class CallInst;
+class CallBrInst;
class CatchPadInst;
class CatchReturnInst;
class CatchSwitchInst;
@@ -76,6 +78,7 @@ class ResumeInst;
class ReturnInst;
class SDDbgValue;
class StoreInst;
+class SwiftErrorValueTracking;
class SwitchInst;
class TargetLibraryInfo;
class TargetMachine;
@@ -91,16 +94,16 @@ class Value;
/// implementation that is parameterized by a TargetLowering object.
///
class SelectionDAGBuilder {
- /// CurInst - The current instruction being visited
+ /// The current instruction being visited.
const Instruction *CurInst = nullptr;
DenseMap<const Value*, SDValue> NodeMap;
- /// UnusedArgNodeMap - Maps argument value for unused arguments. This is used
+ /// Maps argument value for unused arguments. This is used
/// to preserve debug information for incoming arguments.
DenseMap<const Value*, SDValue> UnusedArgNodeMap;
- /// DanglingDebugInfo - Helper type for DanglingDebugInfoMap.
+ /// Helper type for DanglingDebugInfoMap.
class DanglingDebugInfo {
const DbgValueInst* DI = nullptr;
DebugLoc dl;
@@ -116,18 +119,17 @@ class SelectionDAGBuilder {
unsigned getSDNodeOrder() { return SDNodeOrder; }
};
- /// DanglingDebugInfoVector - Helper type for DanglingDebugInfoMap.
+ /// Helper type for DanglingDebugInfoMap.
typedef std::vector<DanglingDebugInfo> DanglingDebugInfoVector;
- /// DanglingDebugInfoMap - Keeps track of dbg_values for which we have not
- /// yet seen the referent. We defer handling these until we do see it.
- DenseMap<const Value*, DanglingDebugInfoVector> DanglingDebugInfoMap;
+ /// Keeps track of dbg_values for which we have not yet seen the referent.
+ /// We defer handling these until we do see it.
+ MapVector<const Value*, DanglingDebugInfoVector> DanglingDebugInfoMap;
public:
- /// PendingLoads - Loads are not emitted to the program immediately. We bunch
- /// them up and then emit token factor nodes when possible. This allows us to
- /// get simple disambiguation between loads without worrying about alias
- /// analysis.
+ /// Loads are not emitted to the program immediately. We bunch them up and
+ /// then emit token factor nodes when possible. This allows us to get simple
+ /// disambiguation between loads without worrying about alias analysis.
SmallVector<SDValue, 8> PendingLoads;
/// State used while lowering a statepoint sequence (gc_statepoint,
@@ -135,247 +137,37 @@ public:
StatepointLoweringState StatepointLowering;
private:
- /// PendingExports - CopyToReg nodes that copy values to virtual registers
- /// for export to other blocks need to be emitted before any terminator
- /// instruction, but they have no other ordering requirements. We bunch them
- /// up and the emit a single tokenfactor for them just before terminator
- /// instructions.
+ /// CopyToReg nodes that copy values to virtual registers for export to other
+ /// blocks need to be emitted before any terminator instruction, but they have
+ /// no other ordering requirements. We bunch them up and the emit a single
+ /// tokenfactor for them just before terminator instructions.
SmallVector<SDValue, 8> PendingExports;
- /// SDNodeOrder - A unique monotonically increasing number used to order the
- /// SDNodes we create.
+ /// A unique monotonically increasing number used to order the SDNodes we
+ /// create.
unsigned SDNodeOrder;
- enum CaseClusterKind {
- /// A cluster of adjacent case labels with the same destination, or just one
- /// case.
- CC_Range,
- /// A cluster of cases suitable for jump table lowering.
- CC_JumpTable,
- /// A cluster of cases suitable for bit test lowering.
- CC_BitTests
- };
-
- /// A cluster of case labels.
- struct CaseCluster {
- CaseClusterKind Kind;
- const ConstantInt *Low, *High;
- union {
- MachineBasicBlock *MBB;
- unsigned JTCasesIndex;
- unsigned BTCasesIndex;
- };
- BranchProbability Prob;
-
- static CaseCluster range(const ConstantInt *Low, const ConstantInt *High,
- MachineBasicBlock *MBB, BranchProbability Prob) {
- CaseCluster C;
- C.Kind = CC_Range;
- C.Low = Low;
- C.High = High;
- C.MBB = MBB;
- C.Prob = Prob;
- return C;
- }
-
- static CaseCluster jumpTable(const ConstantInt *Low,
- const ConstantInt *High, unsigned JTCasesIndex,
- BranchProbability Prob) {
- CaseCluster C;
- C.Kind = CC_JumpTable;
- C.Low = Low;
- C.High = High;
- C.JTCasesIndex = JTCasesIndex;
- C.Prob = Prob;
- return C;
- }
-
- static CaseCluster bitTests(const ConstantInt *Low, const ConstantInt *High,
- unsigned BTCasesIndex, BranchProbability Prob) {
- CaseCluster C;
- C.Kind = CC_BitTests;
- C.Low = Low;
- C.High = High;
- C.BTCasesIndex = BTCasesIndex;
- C.Prob = Prob;
- return C;
- }
- };
-
- using CaseClusterVector = std::vector<CaseCluster>;
- using CaseClusterIt = CaseClusterVector::iterator;
-
- struct CaseBits {
- uint64_t Mask = 0;
- MachineBasicBlock* BB = nullptr;
- unsigned Bits = 0;
- BranchProbability ExtraProb;
-
- CaseBits() = default;
- CaseBits(uint64_t mask, MachineBasicBlock* bb, unsigned bits,
- BranchProbability Prob):
- Mask(mask), BB(bb), Bits(bits), ExtraProb(Prob) {}
- };
-
- using CaseBitsVector = std::vector<CaseBits>;
-
- /// Sort Clusters and merge adjacent cases.
- void sortAndRangeify(CaseClusterVector &Clusters);
-
- /// CaseBlock - This structure is used to communicate between
- /// SelectionDAGBuilder and SDISel for the code generation of additional basic
- /// blocks needed by multi-case switch statements.
- struct CaseBlock {
- // CC - the condition code to use for the case block's setcc node
- ISD::CondCode CC;
-
- // CmpLHS/CmpRHS/CmpMHS - The LHS/MHS/RHS of the comparison to emit.
- // Emit by default LHS op RHS. MHS is used for range comparisons:
- // If MHS is not null: (LHS <= MHS) and (MHS <= RHS).
- const Value *CmpLHS, *CmpMHS, *CmpRHS;
-
- // TrueBB/FalseBB - the block to branch to if the setcc is true/false.
- MachineBasicBlock *TrueBB, *FalseBB;
-
- // ThisBB - the block into which to emit the code for the setcc and branches
- MachineBasicBlock *ThisBB;
-
- /// The debug location of the instruction this CaseBlock was
- /// produced from.
- SDLoc DL;
-
- // TrueProb/FalseProb - branch weights.
- BranchProbability TrueProb, FalseProb;
-
- CaseBlock(ISD::CondCode cc, const Value *cmplhs, const Value *cmprhs,
- const Value *cmpmiddle, MachineBasicBlock *truebb,
- MachineBasicBlock *falsebb, MachineBasicBlock *me,
- SDLoc dl,
- BranchProbability trueprob = BranchProbability::getUnknown(),
- BranchProbability falseprob = BranchProbability::getUnknown())
- : CC(cc), CmpLHS(cmplhs), CmpMHS(cmpmiddle), CmpRHS(cmprhs),
- TrueBB(truebb), FalseBB(falsebb), ThisBB(me), DL(dl),
- TrueProb(trueprob), FalseProb(falseprob) {}
- };
-
- struct JumpTable {
- /// Reg - the virtual register containing the index of the jump table entry
- //. to jump to.
- unsigned Reg;
- /// JTI - the JumpTableIndex for this jump table in the function.
- unsigned JTI;
- /// MBB - the MBB into which to emit the code for the indirect jump.
- MachineBasicBlock *MBB;
- /// Default - the MBB of the default bb, which is a successor of the range
- /// check MBB. This is when updating PHI nodes in successors.
- MachineBasicBlock *Default;
-
- JumpTable(unsigned R, unsigned J, MachineBasicBlock *M,
- MachineBasicBlock *D): Reg(R), JTI(J), MBB(M), Default(D) {}
- };
- struct JumpTableHeader {
- APInt First;
- APInt Last;
- const Value *SValue;
- MachineBasicBlock *HeaderBB;
- bool Emitted;
-
- JumpTableHeader(APInt F, APInt L, const Value *SV, MachineBasicBlock *H,
- bool E = false)
- : First(std::move(F)), Last(std::move(L)), SValue(SV), HeaderBB(H),
- Emitted(E) {}
- };
- using JumpTableBlock = std::pair<JumpTableHeader, JumpTable>;
-
- struct BitTestCase {
- uint64_t Mask;
- MachineBasicBlock *ThisBB;
- MachineBasicBlock *TargetBB;
- BranchProbability ExtraProb;
-
- BitTestCase(uint64_t M, MachineBasicBlock* T, MachineBasicBlock* Tr,
- BranchProbability Prob):
- Mask(M), ThisBB(T), TargetBB(Tr), ExtraProb(Prob) {}
- };
-
- using BitTestInfo = SmallVector<BitTestCase, 3>;
-
- struct BitTestBlock {
- APInt First;
- APInt Range;
- const Value *SValue;
- unsigned Reg;
- MVT RegVT;
- bool Emitted;
- bool ContiguousRange;
- MachineBasicBlock *Parent;
- MachineBasicBlock *Default;
- BitTestInfo Cases;
- BranchProbability Prob;
- BranchProbability DefaultProb;
-
- BitTestBlock(APInt F, APInt R, const Value *SV, unsigned Rg, MVT RgVT,
- bool E, bool CR, MachineBasicBlock *P, MachineBasicBlock *D,
- BitTestInfo C, BranchProbability Pr)
- : First(std::move(F)), Range(std::move(R)), SValue(SV), Reg(Rg),
- RegVT(RgVT), Emitted(E), ContiguousRange(CR), Parent(P), Default(D),
- Cases(std::move(C)), Prob(Pr) {}
- };
-
- /// Return the range of value in [First..Last].
- uint64_t getJumpTableRange(const CaseClusterVector &Clusters, unsigned First,
- unsigned Last) const;
-
- /// Return the number of cases in [First..Last].
- uint64_t getJumpTableNumCases(const SmallVectorImpl<unsigned> &TotalCases,
- unsigned First, unsigned Last) const;
-
- /// Build a jump table cluster from Clusters[First..Last]. Returns false if it
- /// decides it's not a good idea.
- bool buildJumpTable(const CaseClusterVector &Clusters, unsigned First,
- unsigned Last, const SwitchInst *SI,
- MachineBasicBlock *DefaultMBB, CaseCluster &JTCluster);
-
- /// Find clusters of cases suitable for jump table lowering.
- void findJumpTables(CaseClusterVector &Clusters, const SwitchInst *SI,
- MachineBasicBlock *DefaultMBB);
-
- /// Build a bit test cluster from Clusters[First..Last]. Returns false if it
- /// decides it's not a good idea.
- bool buildBitTests(CaseClusterVector &Clusters, unsigned First, unsigned Last,
- const SwitchInst *SI, CaseCluster &BTCluster);
-
- /// Find clusters of cases suitable for bit test lowering.
- void findBitTestClusters(CaseClusterVector &Clusters, const SwitchInst *SI);
-
- struct SwitchWorkListItem {
- MachineBasicBlock *MBB;
- CaseClusterIt FirstCluster;
- CaseClusterIt LastCluster;
- const ConstantInt *GE;
- const ConstantInt *LT;
- BranchProbability DefaultProb;
- };
- using SwitchWorkList = SmallVector<SwitchWorkListItem, 4>;
-
/// Determine the rank by weight of CC in [First,Last]. If CC has more weight
/// than each cluster in the range, its rank is 0.
- static unsigned caseClusterRank(const CaseCluster &CC, CaseClusterIt First,
- CaseClusterIt Last);
+ unsigned caseClusterRank(const SwitchCG::CaseCluster &CC,
+ SwitchCG::CaseClusterIt First,
+ SwitchCG::CaseClusterIt Last);
/// Emit comparison and split W into two subtrees.
- void splitWorkItem(SwitchWorkList &WorkList, const SwitchWorkListItem &W,
- Value *Cond, MachineBasicBlock *SwitchMBB);
+ void splitWorkItem(SwitchCG::SwitchWorkList &WorkList,
+ const SwitchCG::SwitchWorkListItem &W, Value *Cond,
+ MachineBasicBlock *SwitchMBB);
/// Lower W.
- void lowerWorkItem(SwitchWorkListItem W, Value *Cond,
+ void lowerWorkItem(SwitchCG::SwitchWorkListItem W, Value *Cond,
MachineBasicBlock *SwitchMBB,
MachineBasicBlock *DefaultMBB);
/// Peel the top probability case if it exceeds the threshold
- MachineBasicBlock *peelDominantCaseCluster(const SwitchInst &SI,
- CaseClusterVector &Clusters,
- BranchProbability &PeeledCaseProb);
+ MachineBasicBlock *
+ peelDominantCaseCluster(const SwitchInst &SI,
+ SwitchCG::CaseClusterVector &Clusters,
+ BranchProbability &PeeledCaseProb);
/// A class which encapsulates all of the information needed to generate a
/// stack protector check and signals to isel via its state being initialized
@@ -588,17 +380,22 @@ public:
AliasAnalysis *AA = nullptr;
const TargetLibraryInfo *LibInfo;
- /// SwitchCases - Vector of CaseBlock structures used to communicate
- /// SwitchInst code generation information.
- std::vector<CaseBlock> SwitchCases;
+ class SDAGSwitchLowering : public SwitchCG::SwitchLowering {
+ public:
+ SDAGSwitchLowering(SelectionDAGBuilder *sdb, FunctionLoweringInfo &funcinfo)
+ : SwitchCG::SwitchLowering(funcinfo), SDB(sdb) {}
+
+ virtual void addSuccessorWithProb(
+ MachineBasicBlock *Src, MachineBasicBlock *Dst,
+ BranchProbability Prob = BranchProbability::getUnknown()) override {
+ SDB->addSuccessorWithProb(Src, Dst, Prob);
+ }
- /// JTCases - Vector of JumpTable structures used to communicate
- /// SwitchInst code generation information.
- std::vector<JumpTableBlock> JTCases;
+ private:
+ SelectionDAGBuilder *SDB;
+ };
- /// BitTestCases - Vector of BitTestBlock structures used to communicate
- /// SwitchInst code generation information.
- std::vector<BitTestBlock> BitTestCases;
+ std::unique_ptr<SDAGSwitchLowering> SL;
/// A StackProtectorDescriptor structure used to communicate stack protector
/// information in between SelectBasicBlock and FinishBasicBlock.
@@ -608,27 +405,29 @@ public:
// PHI nodes.
DenseMap<const Constant *, unsigned> ConstantsOut;
- /// FuncInfo - Information about the function as a whole.
- ///
+ /// Information about the function as a whole.
FunctionLoweringInfo &FuncInfo;
- /// GFI - Garbage collection metadata for the function.
+ /// Information about the swifterror values used throughout the function.
+ SwiftErrorValueTracking &SwiftError;
+
+ /// Garbage collection metadata for the function.
GCFunctionInfo *GFI;
- /// LPadToCallSiteMap - Map a landing pad to the call site indexes.
+ /// Map a landing pad to the call site indexes.
DenseMap<MachineBasicBlock *, SmallVector<unsigned, 4>> LPadToCallSiteMap;
- /// HasTailCall - This is set to true if a call in the current
- /// block has been translated as a tail call. In this case,
- /// no subsequent DAG nodes should be created.
+ /// This is set to true if a call in the current block has been translated as
+ /// a tail call. In this case, no subsequent DAG nodes should be created.
bool HasTailCall = false;
LLVMContext *Context;
SelectionDAGBuilder(SelectionDAG &dag, FunctionLoweringInfo &funcinfo,
- CodeGenOpt::Level ol)
- : SDNodeOrder(LowestSDNodeOrder), TM(dag.getTarget()), DAG(dag),
- FuncInfo(funcinfo) {}
+ SwiftErrorValueTracking &swifterror, CodeGenOpt::Level ol)
+ : SDNodeOrder(LowestSDNodeOrder), TM(dag.getTarget()), DAG(dag),
+ SL(make_unique<SDAGSwitchLowering>(this, funcinfo)), FuncInfo(funcinfo),
+ SwiftError(swifterror) {}
void init(GCFunctionInfo *gfi, AliasAnalysis *AA,
const TargetLibraryInfo *li);
@@ -670,20 +469,34 @@ public:
void visit(unsigned Opcode, const User &I);
- /// getCopyFromRegs - If there was virtual register allocated for the value V
- /// emit CopyFromReg of the specified type Ty. Return empty SDValue() otherwise.
+ /// If there was virtual register allocated for the value V emit CopyFromReg
+ /// of the specified type Ty. Return empty SDValue() otherwise.
SDValue getCopyFromRegs(const Value *V, Type *Ty);
/// If we have dangling debug info that describes \p Variable, or an
/// overlapping part of variable considering the \p Expr, then this method
- /// weill drop that debug info as it isn't valid any longer.
+ /// will drop that debug info as it isn't valid any longer.
void dropDanglingDebugInfo(const DILocalVariable *Variable,
const DIExpression *Expr);
- // resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V,
- // generate the debug data structures now that we've seen its definition.
+ /// If we saw an earlier dbg_value referring to V, generate the debug data
+ /// structures now that we've seen its definition.
void resolveDanglingDebugInfo(const Value *V, SDValue Val);
+ /// For the given dangling debuginfo record, perform last-ditch efforts to
+ /// resolve the debuginfo to something that is represented in this DAG. If
+ /// this cannot be done, produce an Undef debug value record.
+ void salvageUnresolvedDbgValue(DanglingDebugInfo &DDI);
+
+ /// For a given Value, attempt to create and record a SDDbgValue in the
+ /// SelectionDAG.
+ bool handleDebugValue(const Value *V, DILocalVariable *Var,
+ DIExpression *Expr, DebugLoc CurDL,
+ DebugLoc InstDL, unsigned Order);
+
+ /// Evict any dangling debug information, attempting to salvage it first.
+ void resolveOrClearDbgInfo();
+
SDValue getValue(const Value *V);
bool findValue(const Value *V) const;
@@ -720,7 +533,7 @@ public:
MachineBasicBlock *SwitchBB,
BranchProbability TProb, BranchProbability FProb,
bool InvertCond);
- bool ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases);
+ bool ShouldEmitAsBranches(const std::vector<SwitchCG::CaseBlock> &Cases);
bool isExportableFromCurrentBlock(const Value *V, const BasicBlock *FromBB);
void CopyToExportRegsIfNeeded(const Value *V);
void ExportFromCurrentBlock(const Value *V);
@@ -733,7 +546,7 @@ public:
SDValue Op);
void populateCallLoweringInfo(TargetLowering::CallLoweringInfo &CLI,
- ImmutableCallSite CS, unsigned ArgIdx,
+ const CallBase *Call, unsigned ArgIdx,
unsigned NumArgs, SDValue Callee,
Type *ReturnTy, bool IsPatchPoint);
@@ -741,7 +554,7 @@ public:
lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
const BasicBlock *EHPadBB = nullptr);
- /// UpdateSplitBlock - When an MBB was split during scheduling, update the
+ /// When an MBB was split during scheduling, update the
/// references that need to refer to the last resulting block.
void UpdateSplitBlock(MachineBasicBlock *First, MachineBasicBlock *Last);
@@ -797,13 +610,13 @@ public:
void LowerStatepoint(ImmutableStatepoint ISP,
const BasicBlock *EHPadBB = nullptr);
- void LowerCallSiteWithDeoptBundle(ImmutableCallSite CS, SDValue Callee,
+ void LowerCallSiteWithDeoptBundle(const CallBase *Call, SDValue Callee,
const BasicBlock *EHPadBB);
void LowerDeoptimizeCall(const CallInst *CI);
void LowerDeoptimizingReturn();
- void LowerCallSiteWithDeoptBundleImpl(ImmutableCallSite CS, SDValue Callee,
+ void LowerCallSiteWithDeoptBundleImpl(const CallBase *Call, SDValue Callee,
const BasicBlock *EHPadBB,
bool VarArgDisallowed,
bool ForceVoidReturnTy);
@@ -833,25 +646,24 @@ private:
BranchProbability Prob = BranchProbability::getUnknown());
public:
- void visitSwitchCase(CaseBlock &CB,
- MachineBasicBlock *SwitchBB);
+ void visitSwitchCase(SwitchCG::CaseBlock &CB, MachineBasicBlock *SwitchBB);
void visitSPDescriptorParent(StackProtectorDescriptor &SPD,
MachineBasicBlock *ParentBB);
void visitSPDescriptorFailure(StackProtectorDescriptor &SPD);
- void visitBitTestHeader(BitTestBlock &B, MachineBasicBlock *SwitchBB);
- void visitBitTestCase(BitTestBlock &BB,
- MachineBasicBlock* NextMBB,
- BranchProbability BranchProbToNext,
- unsigned Reg,
- BitTestCase &B,
- MachineBasicBlock *SwitchBB);
- void visitJumpTable(JumpTable &JT);
- void visitJumpTableHeader(JumpTable &JT, JumpTableHeader &JTH,
+ void visitBitTestHeader(SwitchCG::BitTestBlock &B,
+ MachineBasicBlock *SwitchBB);
+ void visitBitTestCase(SwitchCG::BitTestBlock &BB, MachineBasicBlock *NextMBB,
+ BranchProbability BranchProbToNext, unsigned Reg,
+ SwitchCG::BitTestCase &B, MachineBasicBlock *SwitchBB);
+ void visitJumpTable(SwitchCG::JumpTable &JT);
+ void visitJumpTableHeader(SwitchCG::JumpTable &JT,
+ SwitchCG::JumpTableHeader &JTH,
MachineBasicBlock *SwitchBB);
private:
// These all get lowered before this pass.
void visitInvoke(const InvokeInst &I);
+ void visitCallBr(const CallBrInst &I);
void visitResume(const ResumeInst &I);
void visitUnary(const User &I, unsigned Opcode);
@@ -932,7 +744,7 @@ private:
void visitStoreToSwiftError(const StoreInst &I);
void visitInlineAsm(ImmutableCallSite CS);
- const char *visitIntrinsicCall(const CallInst &I, unsigned Intrinsic);
+ void visitIntrinsicCall(const CallInst &I, unsigned Intrinsic);
void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic);
void visitConstrainedFPIntrinsic(const ConstrainedFPIntrinsic &FPI);
@@ -982,9 +794,12 @@ private:
SDDbgValue *getDbgValue(SDValue N, DILocalVariable *Variable,
DIExpression *Expr, const DebugLoc &dl,
unsigned DbgSDNodeOrder);
+
+ /// Lowers CallInst to an external symbol.
+ void lowerCallToExternalSymbol(const CallInst &I, const char *FunctionName);
};
-/// RegsForValue - This struct represents the registers (physical or virtual)
+/// This struct represents the registers (physical or virtual)
/// that a particular set of values is assigned, and the type information about
/// the value. The most common situation is to represent one value at a time,
/// but struct or array values are handled element-wise as multiple values. The
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 43df2abb674b..da3049881d31 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -1,9 +1,8 @@
//===- SelectionDAGDumper.cpp - Implement SelectionDAG::dump() ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -96,6 +95,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::ATOMIC_LOAD_MAX: return "AtomicLoadMax";
case ISD::ATOMIC_LOAD_UMIN: return "AtomicLoadUMin";
case ISD::ATOMIC_LOAD_UMAX: return "AtomicLoadUMax";
+ case ISD::ATOMIC_LOAD_FADD: return "AtomicLoadFAdd";
case ISD::ATOMIC_LOAD: return "AtomicLoad";
case ISD::ATOMIC_STORE: return "AtomicStore";
case ISD::PCMARKER: return "PCMarker";
@@ -145,6 +145,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
unsigned IID = cast<ConstantSDNode>(getOperand(OpNo))->getZExtValue();
if (IID < Intrinsic::num_intrinsics)
return Intrinsic::getName((Intrinsic::ID)IID, None);
+ else if (!G)
+ return "Unknown intrinsic";
else if (const TargetIntrinsicInfo *TII = G->getTarget().getIntrinsicInfo())
return TII->getName(IID);
llvm_unreachable("Invalid intrinsic ID");
@@ -170,7 +172,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::UNDEF: return "undef";
case ISD::MERGE_VALUES: return "merge_values";
case ISD::INLINEASM: return "inlineasm";
+ case ISD::INLINEASM_BR: return "inlineasm_br";
case ISD::EH_LABEL: return "eh_label";
+ case ISD::ANNOTATION_LABEL: return "annotation_label";
case ISD::HANDLENODE: return "handlenode";
// Unary operators
@@ -297,7 +301,10 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::UADDSAT: return "uaddsat";
case ISD::SSUBSAT: return "ssubsat";
case ISD::USUBSAT: return "usubsat";
+
case ISD::SMULFIX: return "smulfix";
+ case ISD::SMULFIXSAT: return "smulfixsat";
+ case ISD::UMULFIX: return "umulfix";
// Conversion operators.
case ISD::SIGN_EXTEND: return "sign_extend";
@@ -309,9 +316,11 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::ZERO_EXTEND_VECTOR_INREG: return "zero_extend_vector_inreg";
case ISD::TRUNCATE: return "truncate";
case ISD::FP_ROUND: return "fp_round";
+ case ISD::STRICT_FP_ROUND: return "strict_fp_round";
case ISD::FLT_ROUNDS_: return "flt_rounds";
case ISD::FP_ROUND_INREG: return "fp_round_inreg";
case ISD::FP_EXTEND: return "fp_extend";
+ case ISD::STRICT_FP_EXTEND: return "strict_fp_extend";
case ISD::SINT_TO_FP: return "sint_to_fp";
case ISD::UINT_TO_FP: return "uint_to_fp";
@@ -321,6 +330,10 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::ADDRSPACECAST: return "addrspacecast";
case ISD::FP16_TO_FP: return "fp16_to_fp";
case ISD::FP_TO_FP16: return "fp_to_fp16";
+ case ISD::LROUND: return "lround";
+ case ISD::LLROUND: return "llround";
+ case ISD::LRINT: return "lrint";
+ case ISD::LLRINT: return "llrint";
// Control flow instructions
case ISD::BR: return "br";
@@ -650,6 +663,36 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
OS << ", " << AM;
OS << ">";
+ } else if (const MaskedLoadSDNode *MLd = dyn_cast<MaskedLoadSDNode>(this)) {
+ OS << "<";
+
+ printMemOperand(OS, *MLd->getMemOperand(), G);
+
+ bool doExt = true;
+ switch (MLd->getExtensionType()) {
+ default: doExt = false; break;
+ case ISD::EXTLOAD: OS << ", anyext"; break;
+ case ISD::SEXTLOAD: OS << ", sext"; break;
+ case ISD::ZEXTLOAD: OS << ", zext"; break;
+ }
+ if (doExt)
+ OS << " from " << MLd->getMemoryVT().getEVTString();
+
+ if (MLd->isExpandingLoad())
+ OS << ", expanding";
+
+ OS << ">";
+ } else if (const MaskedStoreSDNode *MSt = dyn_cast<MaskedStoreSDNode>(this)) {
+ OS << "<";
+ printMemOperand(OS, *MSt->getMemOperand(), G);
+
+ if (MSt->isTruncatingStore())
+ OS << ", trunc to " << MSt->getMemoryVT().getEVTString();
+
+ if (MSt->isCompressingStore())
+ OS << ", compressing";
+
+ OS << ">";
} else if (const MemSDNode* M = dyn_cast<MemSDNode>(this)) {
OS << "<";
printMemOperand(OS, *M->getMemOperand(), G);
@@ -675,6 +718,9 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
<< " -> "
<< ASC->getDestAddressSpace()
<< ']';
+ } else if (const LifetimeSDNode *LN = dyn_cast<LifetimeSDNode>(this)) {
+ if (LN->hasOffset())
+ OS << "<" << LN->getOffset() << " to " << LN->getOffset() + LN->getSize() << ">";
}
if (VerboseDAGDumping) {
@@ -684,45 +730,63 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
if (getNodeId() != -1)
OS << " [ID=" << getNodeId() << ']';
if (!(isa<ConstantSDNode>(this) || (isa<ConstantFPSDNode>(this))))
- OS << "# D:" << isDivergent();
-
- if (!G)
- return;
-
- DILocation *L = getDebugLoc();
- if (!L)
- return;
-
- if (auto *Scope = L->getScope())
- OS << Scope->getFilename();
- else
- OS << "<unknown>";
- OS << ':' << L->getLine();
- if (unsigned C = L->getColumn())
- OS << ':' << C;
-
- for (SDDbgValue *Dbg : G->GetDbgValues(this)) {
- if (Dbg->getKind() != SDDbgValue::SDNODE || Dbg->isInvalidated())
- continue;
- Dbg->dump(OS);
- }
+ OS << " # D:" << isDivergent();
+
+ if (G && !G->GetDbgValues(this).empty()) {
+ OS << " [NoOfDbgValues=" << G->GetDbgValues(this).size() << ']';
+ for (SDDbgValue *Dbg : G->GetDbgValues(this))
+ if (!Dbg->isInvalidated())
+ Dbg->print(OS);
+ } else if (getHasDebugValue())
+ OS << " [NoOfDbgValues>0]";
}
}
-LLVM_DUMP_METHOD void SDDbgValue::dump(raw_ostream &OS) const {
- OS << " DbgVal";
- if (kind==SDNODE)
- OS << '(' << u.s.ResNo << ')';
- OS << ":\"" << Var->getName() << '"';
+LLVM_DUMP_METHOD void SDDbgValue::print(raw_ostream &OS) const {
+ OS << " DbgVal(Order=" << getOrder() << ')';
+ if (isInvalidated()) OS << "(Invalidated)";
+ if (isEmitted()) OS << "(Emitted)";
+ switch (getKind()) {
+ case SDNODE:
+ if (getSDNode())
+ OS << "(SDNODE=" << PrintNodeId(*getSDNode()) << ':' << getResNo() << ')';
+ else
+ OS << "(SDNODE)";
+ break;
+ case CONST:
+ OS << "(CONST)";
+ break;
+ case FRAMEIX:
+ OS << "(FRAMEIX=" << getFrameIx() << ')';
+ break;
+ case VREG:
+ OS << "(VREG=" << getVReg() << ')';
+ break;
+ }
+ if (isIndirect()) OS << "(Indirect)";
+ OS << ":\"" << Var->getName() << '"';
#ifndef NDEBUG
- if (Expr->getNumElements())
- Expr->dump();
+ if (Expr->getNumElements())
+ Expr->dump();
#endif
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void SDDbgValue::dump() const {
+ if (isInvalidated())
+ return;
+ print(dbgs());
+ dbgs() << "\n";
+}
+#endif
+
/// Return true if this node is so simple that we should just print it inline
/// if it appears as an operand.
-static bool shouldPrintInline(const SDNode &Node) {
+static bool shouldPrintInline(const SDNode &Node, const SelectionDAG *G) {
+ // Avoid lots of cluttering when inline printing nodes with associated
+ // DbgValues in verbose mode.
+ if (VerboseDAGDumping && G && !G->GetDbgValues(&Node).empty())
+ return false;
if (Node.getOpcode() == ISD::EntryToken)
return false;
return Node.getNumOperands() == 0;
@@ -731,7 +795,7 @@ static bool shouldPrintInline(const SDNode &Node) {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) {
for (const SDValue &Op : N->op_values()) {
- if (shouldPrintInline(*Op.getNode()))
+ if (shouldPrintInline(*Op.getNode(), G))
continue;
if (Op.getNode()->hasOneUse())
DumpNodes(Op.getNode(), indent+2, G);
@@ -748,12 +812,24 @@ LLVM_DUMP_METHOD void SelectionDAG::dump() const {
I != E; ++I) {
const SDNode *N = &*I;
if (!N->hasOneUse() && N != getRoot().getNode() &&
- (!shouldPrintInline(*N) || N->use_empty()))
+ (!shouldPrintInline(*N, this) || N->use_empty()))
DumpNodes(N, 2, this);
}
if (getRoot().getNode()) DumpNodes(getRoot().getNode(), 2, this);
- dbgs() << "\n\n";
+ dbgs() << "\n";
+
+ if (VerboseDAGDumping) {
+ if (DbgBegin() != DbgEnd())
+ dbgs() << "SDDbgValues:\n";
+ for (auto *Dbg : make_range(DbgBegin(), DbgEnd()))
+ Dbg->dump();
+ if (ByvalParmDbgBegin() != ByvalParmDbgEnd())
+ dbgs() << "Byval SDDbgValues:\n";
+ for (auto *Dbg : make_range(ByvalParmDbgBegin(), ByvalParmDbgEnd()))
+ Dbg->dump();
+ }
+ dbgs() << "\n";
}
#endif
@@ -769,7 +845,7 @@ static bool printOperand(raw_ostream &OS, const SelectionDAG *G,
if (!Value.getNode()) {
OS << "<null>";
return false;
- } else if (shouldPrintInline(*Value.getNode())) {
+ } else if (shouldPrintInline(*Value.getNode(), G)) {
OS << Value->getOperationName(G) << ':';
Value->print_types(OS, G);
Value->print_details(OS, G);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index af5c2433fa2f..bdf9f2c166e1 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -1,9 +1,8 @@
//===- SelectionDAGISel.cpp - Implement the SelectionDAGISel class --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -42,6 +41,7 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachinePassRegistry.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -49,6 +49,7 @@
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/StackProtector.h"
+#include "llvm/CodeGen/SwiftErrorValueTracking.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
@@ -63,6 +64,7 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/InstIterator.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
@@ -306,8 +308,9 @@ SelectionDAGISel::SelectionDAGISel(TargetMachine &tm,
CodeGenOpt::Level OL) :
MachineFunctionPass(ID), TM(tm),
FuncInfo(new FunctionLoweringInfo()),
+ SwiftError(new SwiftErrorValueTracking()),
CurDAG(new SelectionDAG(tm, OL)),
- SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, OL)),
+ SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, *SwiftError, OL)),
AA(), GFI(),
OptLevel(OL),
DAGSize(0) {
@@ -323,6 +326,7 @@ SelectionDAGISel::~SelectionDAGISel() {
delete SDB;
delete CurDAG;
delete FuncInfo;
+ delete SwiftError;
}
void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
@@ -379,6 +383,30 @@ static void SplitCriticalSideEffectEdges(Function &Fn, DominatorTree *DT,
}
}
+static void computeUsesMSVCFloatingPoint(const Triple &TT, const Function &F,
+ MachineModuleInfo &MMI) {
+ // Only needed for MSVC
+ if (!TT.isWindowsMSVCEnvironment())
+ return;
+
+ // If it's already set, nothing to do.
+ if (MMI.usesMSVCFloatingPoint())
+ return;
+
+ for (const Instruction &I : instructions(F)) {
+ if (I.getType()->isFPOrFPVectorTy()) {
+ MMI.setUsesMSVCFloatingPoint(true);
+ return;
+ }
+ for (const auto &Op : I.operands()) {
+ if (Op->getType()->isFPOrFPVectorTy()) {
+ MMI.setUsesMSVCFloatingPoint(true);
+ return;
+ }
+ }
+ }
+}
+
bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
// If we already selected that function, we do not need to run SDISel.
if (mf.getProperties().hasProperty(
@@ -421,6 +449,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
CurDAG->init(*MF, *ORE, this, LibInfo,
getAnalysisIfAvailable<LegacyDivergenceAnalysis>());
FuncInfo->set(Fn, *MF, CurDAG);
+ SwiftError->setFunction(*MF);
// Now get the optional analyzes if we want to.
// This is based on the possibly changed OptLevel (after optnone is taken
@@ -474,6 +503,40 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
Fn.getContext().diagnose(DiagFallback);
}
+ // Replace forward-declared registers with the registers containing
+ // the desired value.
+ // Note: it is important that this happens **before** the call to
+ // EmitLiveInCopies, since implementations can skip copies of unused
+ // registers. If we don't apply the reg fixups before, some registers may
+ // appear as unused and will be skipped, resulting in bad MI.
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ for (DenseMap<unsigned, unsigned>::iterator I = FuncInfo->RegFixups.begin(),
+ E = FuncInfo->RegFixups.end();
+ I != E; ++I) {
+ unsigned From = I->first;
+ unsigned To = I->second;
+ // If To is also scheduled to be replaced, find what its ultimate
+ // replacement is.
+ while (true) {
+ DenseMap<unsigned, unsigned>::iterator J = FuncInfo->RegFixups.find(To);
+ if (J == E)
+ break;
+ To = J->second;
+ }
+ // Make sure the new register has a sufficiently constrained register class.
+ if (TargetRegisterInfo::isVirtualRegister(From) &&
+ TargetRegisterInfo::isVirtualRegister(To))
+ MRI.constrainRegClass(To, MRI.getRegClass(From));
+ // Replace it.
+
+ // Replacing one register with another won't touch the kill flags.
+ // We need to conservatively clear the kill flags as a kill on the old
+ // register might dominate existing uses of the new register.
+ if (!MRI.use_empty(To))
+ MRI.clearKillFlags(From);
+ MRI.replaceRegWith(From, To);
+ }
+
// If the first basic block in the function has live ins that need to be
// copied into vregs, emit the copies into the top of the block before
// emitting the code for the block.
@@ -507,7 +570,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
for (unsigned i = 0, e = FuncInfo->ArgDbgValues.size(); i != e; ++i) {
MachineInstr *MI = FuncInfo->ArgDbgValues[e-i-1];
bool hasFI = MI->getOperand(0).isFI();
- unsigned Reg =
+ Register Reg =
hasFI ? TRI.getFrameRegister(*MF) : MI->getOperand(0).getReg();
if (TargetRegisterInfo::isPhysicalRegister(Reg))
EntryMBB->insert(EntryMBB->begin(), MI);
@@ -590,9 +653,11 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
// Determine if there is a call to setjmp in the machine function.
MF->setExposesReturnsTwice(Fn.callsFunctionThatReturnsTwice());
+ // Determine if floating point is used for msvc
+ computeUsesMSVCFloatingPoint(TM.getTargetTriple(), Fn, MF->getMMI());
+
// Replace forward-declared registers with the registers containing
// the desired value.
- MachineRegisterInfo &MRI = MF->getRegInfo();
for (DenseMap<unsigned, unsigned>::iterator
I = FuncInfo->RegFixups.begin(), E = FuncInfo->RegFixups.end();
I != E; ++I) {
@@ -663,6 +728,7 @@ void SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin,
// Make sure the root of the DAG is up-to-date.
CurDAG->setRoot(SDB->getControlRoot());
HadTailCall = SDB->HasTailCall;
+ SDB->resolveOrClearDbgInfo();
SDB->clear();
// Final step, emit the lowered DAG as machine code.
@@ -713,8 +779,6 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
StringRef GroupName = "sdag";
StringRef GroupDescription = "Instruction Selection and Scheduling";
std::string BlockName;
- int BlockNumber = -1;
- (void)BlockNumber;
bool MatchFilterBB = false; (void)MatchFilterBB;
#ifndef NDEBUG
TargetTransformInfo &TTI =
@@ -735,7 +799,6 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
ViewSUnitDAGs)
#endif
{
- BlockNumber = FuncInfo->MBB->getNumber();
BlockName =
(MF->getName() + ":" + FuncInfo->MBB->getBasicBlock()->getName()).str();
}
@@ -1092,16 +1155,14 @@ void SelectionDAGISel::DoInstructionSelection() {
#endif
// When we are using non-default rounding modes or FP exception behavior
- // FP operations are represented by StrictFP pseudo-operations. They
- // need to be simplified here so that the target-specific instruction
- // selectors know how to handle them.
- //
- // If the current node is a strict FP pseudo-op, the isStrictFPOp()
- // function will provide the corresponding normal FP opcode to which the
- // node should be mutated.
- //
- // FIXME: The backends need a way to handle FP constraints.
- if (Node->isStrictFPOpcode())
+ // FP operations are represented by StrictFP pseudo-operations. For
+ // targets that do not (yet) understand strict FP operations directly,
+ // we convert them to normal FP opcodes instead at this point. This
+ // will allow them to be handled by existing target-specific instruction
+ // selectors.
+ if (Node->isStrictFPOpcode() &&
+ (TLI->getOperationAction(Node->getOpcode(), Node->getValueType(0))
+ != TargetLowering::Legal))
Node = CurDAG->mutateStrictFPToFP(Node);
LLVM_DEBUG(dbgs() << "\nISEL: Starting selection on root node: ";
@@ -1228,77 +1289,6 @@ static bool isFoldedOrDeadInstruction(const Instruction *I,
!FuncInfo->isExportedInst(I); // Exported instrs must be computed.
}
-/// Set up SwiftErrorVals by going through the function. If the function has
-/// swifterror argument, it will be the first entry.
-static void setupSwiftErrorVals(const Function &Fn, const TargetLowering *TLI,
- FunctionLoweringInfo *FuncInfo) {
- if (!TLI->supportSwiftError())
- return;
-
- FuncInfo->SwiftErrorVals.clear();
- FuncInfo->SwiftErrorVRegDefMap.clear();
- FuncInfo->SwiftErrorVRegUpwardsUse.clear();
- FuncInfo->SwiftErrorVRegDefUses.clear();
- FuncInfo->SwiftErrorArg = nullptr;
-
- // Check if function has a swifterror argument.
- bool HaveSeenSwiftErrorArg = false;
- for (Function::const_arg_iterator AI = Fn.arg_begin(), AE = Fn.arg_end();
- AI != AE; ++AI)
- if (AI->hasSwiftErrorAttr()) {
- assert(!HaveSeenSwiftErrorArg &&
- "Must have only one swifterror parameter");
- (void)HaveSeenSwiftErrorArg; // silence warning.
- HaveSeenSwiftErrorArg = true;
- FuncInfo->SwiftErrorArg = &*AI;
- FuncInfo->SwiftErrorVals.push_back(&*AI);
- }
-
- for (const auto &LLVMBB : Fn)
- for (const auto &Inst : LLVMBB) {
- if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(&Inst))
- if (Alloca->isSwiftError())
- FuncInfo->SwiftErrorVals.push_back(Alloca);
- }
-}
-
-static void createSwiftErrorEntriesInEntryBlock(FunctionLoweringInfo *FuncInfo,
- FastISel *FastIS,
- const TargetLowering *TLI,
- const TargetInstrInfo *TII,
- SelectionDAGBuilder *SDB) {
- if (!TLI->supportSwiftError())
- return;
-
- // We only need to do this when we have swifterror parameter or swifterror
- // alloc.
- if (FuncInfo->SwiftErrorVals.empty())
- return;
-
- assert(FuncInfo->MBB == &*FuncInfo->MF->begin() &&
- "expected to insert into entry block");
- auto &DL = FuncInfo->MF->getDataLayout();
- auto const *RC = TLI->getRegClassFor(TLI->getPointerTy(DL));
- for (const auto *SwiftErrorVal : FuncInfo->SwiftErrorVals) {
- // We will always generate a copy from the argument. It is always used at
- // least by the 'return' of the swifterror.
- if (FuncInfo->SwiftErrorArg && FuncInfo->SwiftErrorArg == SwiftErrorVal)
- continue;
- unsigned VReg = FuncInfo->MF->getRegInfo().createVirtualRegister(RC);
- // Assign Undef to Vreg. We construct MI directly to make sure it works
- // with FastISel.
- BuildMI(*FuncInfo->MBB, FuncInfo->MBB->getFirstNonPHI(),
- SDB->getCurDebugLoc(), TII->get(TargetOpcode::IMPLICIT_DEF),
- VReg);
-
- // Keep FastIS informed about the value we just inserted.
- if (FastIS)
- FastIS->setLastLocalValue(&*std::prev(FuncInfo->InsertPt));
-
- FuncInfo->setCurrentSwiftErrorVReg(FuncInfo->MBB, SwiftErrorVal, VReg);
- }
-}
-
/// Collect llvm.dbg.declare information. This is done after argument lowering
/// in case the declarations refer to arguments.
static void processDbgDeclares(FunctionLoweringInfo *FuncInfo) {
@@ -1337,202 +1327,13 @@ static void processDbgDeclares(FunctionLoweringInfo *FuncInfo) {
DIExpression *Expr = DI->getExpression();
if (Offset.getBoolValue())
- Expr = DIExpression::prepend(Expr, DIExpression::NoDeref,
+ Expr = DIExpression::prepend(Expr, DIExpression::ApplyOffset,
Offset.getZExtValue());
MF->setVariableDbgInfo(DI->getVariable(), Expr, FI, DI->getDebugLoc());
}
}
}
-/// Propagate swifterror values through the machine function CFG.
-static void propagateSwiftErrorVRegs(FunctionLoweringInfo *FuncInfo) {
- auto *TLI = FuncInfo->TLI;
- if (!TLI->supportSwiftError())
- return;
-
- // We only need to do this when we have swifterror parameter or swifterror
- // alloc.
- if (FuncInfo->SwiftErrorVals.empty())
- return;
-
- // For each machine basic block in reverse post order.
- ReversePostOrderTraversal<MachineFunction *> RPOT(FuncInfo->MF);
- for (MachineBasicBlock *MBB : RPOT) {
- // For each swifterror value in the function.
- for(const auto *SwiftErrorVal : FuncInfo->SwiftErrorVals) {
- auto Key = std::make_pair(MBB, SwiftErrorVal);
- auto UUseIt = FuncInfo->SwiftErrorVRegUpwardsUse.find(Key);
- auto VRegDefIt = FuncInfo->SwiftErrorVRegDefMap.find(Key);
- bool UpwardsUse = UUseIt != FuncInfo->SwiftErrorVRegUpwardsUse.end();
- unsigned UUseVReg = UpwardsUse ? UUseIt->second : 0;
- bool DownwardDef = VRegDefIt != FuncInfo->SwiftErrorVRegDefMap.end();
- assert(!(UpwardsUse && !DownwardDef) &&
- "We can't have an upwards use but no downwards def");
-
- // If there is no upwards exposed use and an entry for the swifterror in
- // the def map for this value we don't need to do anything: We already
- // have a downward def for this basic block.
- if (!UpwardsUse && DownwardDef)
- continue;
-
- // Otherwise we either have an upwards exposed use vreg that we need to
- // materialize or need to forward the downward def from predecessors.
-
- // Check whether we have a single vreg def from all predecessors.
- // Otherwise we need a phi.
- SmallVector<std::pair<MachineBasicBlock *, unsigned>, 4> VRegs;
- SmallSet<const MachineBasicBlock*, 8> Visited;
- for (auto *Pred : MBB->predecessors()) {
- if (!Visited.insert(Pred).second)
- continue;
- VRegs.push_back(std::make_pair(
- Pred, FuncInfo->getOrCreateSwiftErrorVReg(Pred, SwiftErrorVal)));
- if (Pred != MBB)
- continue;
- // We have a self-edge.
- // If there was no upwards use in this basic block there is now one: the
- // phi needs to use it self.
- if (!UpwardsUse) {
- UpwardsUse = true;
- UUseIt = FuncInfo->SwiftErrorVRegUpwardsUse.find(Key);
- assert(UUseIt != FuncInfo->SwiftErrorVRegUpwardsUse.end());
- UUseVReg = UUseIt->second;
- }
- }
-
- // We need a phi node if we have more than one predecessor with different
- // downward defs.
- bool needPHI =
- VRegs.size() >= 1 &&
- std::find_if(
- VRegs.begin(), VRegs.end(),
- [&](const std::pair<const MachineBasicBlock *, unsigned> &V)
- -> bool { return V.second != VRegs[0].second; }) !=
- VRegs.end();
-
- // If there is no upwards exposed used and we don't need a phi just
- // forward the swifterror vreg from the predecessor(s).
- if (!UpwardsUse && !needPHI) {
- assert(!VRegs.empty() &&
- "No predecessors? The entry block should bail out earlier");
- // Just forward the swifterror vreg from the predecessor(s).
- FuncInfo->setCurrentSwiftErrorVReg(MBB, SwiftErrorVal, VRegs[0].second);
- continue;
- }
-
- auto DLoc = isa<Instruction>(SwiftErrorVal)
- ? cast<Instruction>(SwiftErrorVal)->getDebugLoc()
- : DebugLoc();
- const auto *TII = FuncInfo->MF->getSubtarget().getInstrInfo();
-
- // If we don't need a phi create a copy to the upward exposed vreg.
- if (!needPHI) {
- assert(UpwardsUse);
- assert(!VRegs.empty() &&
- "No predecessors? Is the Calling Convention correct?");
- unsigned DestReg = UUseVReg;
- BuildMI(*MBB, MBB->getFirstNonPHI(), DLoc, TII->get(TargetOpcode::COPY),
- DestReg)
- .addReg(VRegs[0].second);
- continue;
- }
-
- // We need a phi: if there is an upwards exposed use we already have a
- // destination virtual register number otherwise we generate a new one.
- auto &DL = FuncInfo->MF->getDataLayout();
- auto const *RC = TLI->getRegClassFor(TLI->getPointerTy(DL));
- unsigned PHIVReg =
- UpwardsUse ? UUseVReg
- : FuncInfo->MF->getRegInfo().createVirtualRegister(RC);
- MachineInstrBuilder SwiftErrorPHI =
- BuildMI(*MBB, MBB->getFirstNonPHI(), DLoc,
- TII->get(TargetOpcode::PHI), PHIVReg);
- for (auto BBRegPair : VRegs) {
- SwiftErrorPHI.addReg(BBRegPair.second).addMBB(BBRegPair.first);
- }
-
- // We did not have a definition in this block before: store the phi's vreg
- // as this block downward exposed def.
- if (!UpwardsUse)
- FuncInfo->setCurrentSwiftErrorVReg(MBB, SwiftErrorVal, PHIVReg);
- }
- }
-}
-
-static void preassignSwiftErrorRegs(const TargetLowering *TLI,
- FunctionLoweringInfo *FuncInfo,
- BasicBlock::const_iterator Begin,
- BasicBlock::const_iterator End) {
- if (!TLI->supportSwiftError() || FuncInfo->SwiftErrorVals.empty())
- return;
-
- // Iterator over instructions and assign vregs to swifterror defs and uses.
- for (auto It = Begin; It != End; ++It) {
- ImmutableCallSite CS(&*It);
- if (CS) {
- // A call-site with a swifterror argument is both use and def.
- const Value *SwiftErrorAddr = nullptr;
- for (auto &Arg : CS.args()) {
- if (!Arg->isSwiftError())
- continue;
- // Use of swifterror.
- assert(!SwiftErrorAddr && "Cannot have multiple swifterror arguments");
- SwiftErrorAddr = &*Arg;
- assert(SwiftErrorAddr->isSwiftError() &&
- "Must have a swifterror value argument");
- unsigned VReg; bool CreatedReg;
- std::tie(VReg, CreatedReg) = FuncInfo->getOrCreateSwiftErrorVRegUseAt(
- &*It, FuncInfo->MBB, SwiftErrorAddr);
- assert(CreatedReg);
- }
- if (!SwiftErrorAddr)
- continue;
-
- // Def of swifterror.
- unsigned VReg; bool CreatedReg;
- std::tie(VReg, CreatedReg) =
- FuncInfo->getOrCreateSwiftErrorVRegDefAt(&*It);
- assert(CreatedReg);
- FuncInfo->setCurrentSwiftErrorVReg(FuncInfo->MBB, SwiftErrorAddr, VReg);
-
- // A load is a use.
- } else if (const LoadInst *LI = dyn_cast<const LoadInst>(&*It)) {
- const Value *V = LI->getOperand(0);
- if (!V->isSwiftError())
- continue;
-
- unsigned VReg; bool CreatedReg;
- std::tie(VReg, CreatedReg) =
- FuncInfo->getOrCreateSwiftErrorVRegUseAt(LI, FuncInfo->MBB, V);
- assert(CreatedReg);
-
- // A store is a def.
- } else if (const StoreInst *SI = dyn_cast<const StoreInst>(&*It)) {
- const Value *SwiftErrorAddr = SI->getOperand(1);
- if (!SwiftErrorAddr->isSwiftError())
- continue;
-
- // Def of swifterror.
- unsigned VReg; bool CreatedReg;
- std::tie(VReg, CreatedReg) =
- FuncInfo->getOrCreateSwiftErrorVRegDefAt(&*It);
- assert(CreatedReg);
- FuncInfo->setCurrentSwiftErrorVReg(FuncInfo->MBB, SwiftErrorAddr, VReg);
-
- // A return in a swiferror returning function is a use.
- } else if (const ReturnInst *R = dyn_cast<const ReturnInst>(&*It)) {
- const Function *F = R->getParent()->getParent();
- if(!F->getAttributes().hasAttrSomewhere(Attribute::SwiftError))
- continue;
-
- unsigned VReg; bool CreatedReg;
- std::tie(VReg, CreatedReg) = FuncInfo->getOrCreateSwiftErrorVRegUseAt(
- R, FuncInfo->MBB, FuncInfo->SwiftErrorArg);
- assert(CreatedReg);
- }
- }
-}
-
void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
FastISelFailed = false;
// Initialize the Fast-ISel state, if needed.
@@ -1542,8 +1343,6 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
FastIS = TLI->createFastISel(*FuncInfo, LibInfo);
}
- setupSwiftErrorVals(Fn, TLI, FuncInfo);
-
ReversePostOrderTraversal<const Function*> RPOT(&Fn);
// Lower arguments up front. An RPO iteration always visits the entry block
@@ -1589,7 +1388,11 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
else
FastIS->setLastLocalValue(nullptr);
}
- createSwiftErrorEntriesInEntryBlock(FuncInfo, FastIS, TLI, TII, SDB);
+
+ bool Inserted = SwiftError->createEntriesInEntryBlock(SDB->getCurDebugLoc());
+
+ if (FastIS && Inserted)
+ FastIS->setLastLocalValue(&*std::prev(FuncInfo->InsertPt));
processDbgDeclares(FuncInfo);
@@ -1644,7 +1447,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
unsigned NumFastIselRemaining = std::distance(Begin, End);
// Pre-assign swifterror vregs.
- preassignSwiftErrorRegs(TLI, FuncInfo, Begin, End);
+ SwiftError->preassignVRegs(FuncInfo->MBB, Begin, End);
// Do FastISel on as many instructions as possible.
for (; BI != Begin; --BI) {
@@ -1692,7 +1495,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
// to keep track of gc-relocates for a particular gc-statepoint. This is
// done by SelectionDAGBuilder::LowerAsSTATEPOINT, called before
// visitGCRelocate.
- if (isa<CallInst>(Inst) && !isStatepoint(Inst) && !isGCRelocate(Inst)) {
+ if (isa<CallInst>(Inst) && !isStatepoint(Inst) && !isGCRelocate(Inst) &&
+ !isGCResult(Inst)) {
OptimizationRemarkMissed R("sdagisel", "FastISelFailure",
Inst->getDebugLoc(), LLVMBB);
@@ -1712,7 +1516,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
!Inst->use_empty()) {
unsigned &R = FuncInfo->ValueMap[Inst];
if (!R)
- R = FuncInfo->CreateRegs(Inst->getType());
+ R = FuncInfo->CreateRegs(Inst);
}
bool HadTailCall = false;
@@ -1799,7 +1603,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
SP.copyToMachineFrameInfo(MF->getFrameInfo());
- propagateSwiftErrorVRegs(FuncInfo);
+ SwiftError->propagateVRegs();
delete FastIS;
SDB->clearDanglingDebugInfo();
@@ -1969,7 +1773,7 @@ SelectionDAGISel::FinishBasicBlock() {
}
// Lower each BitTestBlock.
- for (auto &BTB : SDB->BitTestCases) {
+ for (auto &BTB : SDB->SL->BitTestCases) {
// Lower header first, if it wasn't already lowered
if (!BTB.Emitted) {
// Set the current basic block to the mbb we wish to insert the code into
@@ -2050,30 +1854,30 @@ SelectionDAGISel::FinishBasicBlock() {
}
}
}
- SDB->BitTestCases.clear();
+ SDB->SL->BitTestCases.clear();
// If the JumpTable record is filled in, then we need to emit a jump table.
// Updating the PHI nodes is tricky in this case, since we need to determine
// whether the PHI is a successor of the range check MBB or the jump table MBB
- for (unsigned i = 0, e = SDB->JTCases.size(); i != e; ++i) {
+ for (unsigned i = 0, e = SDB->SL->JTCases.size(); i != e; ++i) {
// Lower header first, if it wasn't already lowered
- if (!SDB->JTCases[i].first.Emitted) {
+ if (!SDB->SL->JTCases[i].first.Emitted) {
// Set the current basic block to the mbb we wish to insert the code into
- FuncInfo->MBB = SDB->JTCases[i].first.HeaderBB;
+ FuncInfo->MBB = SDB->SL->JTCases[i].first.HeaderBB;
FuncInfo->InsertPt = FuncInfo->MBB->end();
// Emit the code
- SDB->visitJumpTableHeader(SDB->JTCases[i].second, SDB->JTCases[i].first,
- FuncInfo->MBB);
+ SDB->visitJumpTableHeader(SDB->SL->JTCases[i].second,
+ SDB->SL->JTCases[i].first, FuncInfo->MBB);
CurDAG->setRoot(SDB->getRoot());
SDB->clear();
CodeGenAndEmitDAG();
}
// Set the current basic block to the mbb we wish to insert the code into
- FuncInfo->MBB = SDB->JTCases[i].second.MBB;
+ FuncInfo->MBB = SDB->SL->JTCases[i].second.MBB;
FuncInfo->InsertPt = FuncInfo->MBB->end();
// Emit the code
- SDB->visitJumpTable(SDB->JTCases[i].second);
+ SDB->visitJumpTable(SDB->SL->JTCases[i].second);
CurDAG->setRoot(SDB->getRoot());
SDB->clear();
CodeGenAndEmitDAG();
@@ -2086,31 +1890,31 @@ SelectionDAGISel::FinishBasicBlock() {
assert(PHI->isPHI() &&
"This is not a machine PHI node that we are updating!");
// "default" BB. We can go there only from header BB.
- if (PHIBB == SDB->JTCases[i].second.Default)
+ if (PHIBB == SDB->SL->JTCases[i].second.Default)
PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second)
- .addMBB(SDB->JTCases[i].first.HeaderBB);
+ .addMBB(SDB->SL->JTCases[i].first.HeaderBB);
// JT BB. Just iterate over successors here
if (FuncInfo->MBB->isSuccessor(PHIBB))
PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second).addMBB(FuncInfo->MBB);
}
}
- SDB->JTCases.clear();
+ SDB->SL->JTCases.clear();
// If we generated any switch lowering information, build and codegen any
// additional DAGs necessary.
- for (unsigned i = 0, e = SDB->SwitchCases.size(); i != e; ++i) {
+ for (unsigned i = 0, e = SDB->SL->SwitchCases.size(); i != e; ++i) {
// Set the current basic block to the mbb we wish to insert the code into
- FuncInfo->MBB = SDB->SwitchCases[i].ThisBB;
+ FuncInfo->MBB = SDB->SL->SwitchCases[i].ThisBB;
FuncInfo->InsertPt = FuncInfo->MBB->end();
// Determine the unique successors.
SmallVector<MachineBasicBlock *, 2> Succs;
- Succs.push_back(SDB->SwitchCases[i].TrueBB);
- if (SDB->SwitchCases[i].TrueBB != SDB->SwitchCases[i].FalseBB)
- Succs.push_back(SDB->SwitchCases[i].FalseBB);
+ Succs.push_back(SDB->SL->SwitchCases[i].TrueBB);
+ if (SDB->SL->SwitchCases[i].TrueBB != SDB->SL->SwitchCases[i].FalseBB)
+ Succs.push_back(SDB->SL->SwitchCases[i].FalseBB);
// Emit the code. Note that this could result in FuncInfo->MBB being split.
- SDB->visitSwitchCase(SDB->SwitchCases[i], FuncInfo->MBB);
+ SDB->visitSwitchCase(SDB->SL->SwitchCases[i], FuncInfo->MBB);
CurDAG->setRoot(SDB->getRoot());
SDB->clear();
CodeGenAndEmitDAG();
@@ -2146,7 +1950,7 @@ SelectionDAGISel::FinishBasicBlock() {
}
}
}
- SDB->SwitchCases.clear();
+ SDB->SL->SwitchCases.clear();
}
/// Create the scheduler. If a specific scheduler was specified
@@ -2413,14 +2217,14 @@ bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root,
return !findNonImmUse(Root, N.getNode(), U, IgnoreChains);
}
-void SelectionDAGISel::Select_INLINEASM(SDNode *N) {
+void SelectionDAGISel::Select_INLINEASM(SDNode *N, bool Branch) {
SDLoc DL(N);
std::vector<SDValue> Ops(N->op_begin(), N->op_end());
SelectInlineAsmMemoryOperands(Ops, DL);
const EVT VTs[] = {MVT::Other, MVT::Glue};
- SDValue New = CurDAG->getNode(ISD::INLINEASM, DL, VTs, Ops);
+ SDValue New = CurDAG->getNode(Branch ? ISD::INLINEASM_BR : ISD::INLINEASM, DL, VTs, Ops);
New->setNodeId(-1);
ReplaceUses(N, New.getNode());
CurDAG->RemoveDeadNode(N);
@@ -2728,6 +2532,14 @@ CheckCondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
}
LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
+CheckChild2CondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDValue N) {
+ if (2 >= N.getNumOperands())
+ return false;
+ return ::CheckCondCode(MatcherTable, MatcherIndex, N.getOperand(2));
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
CheckValueType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N, const TargetLowering *TLI, const DataLayout &DL) {
MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
@@ -2842,6 +2654,9 @@ static unsigned IsPredicateKnownToFail(const unsigned char *Table,
case SelectionDAGISel::OPC_CheckCondCode:
Result = !::CheckCondCode(Table, Index, N);
return Index;
+ case SelectionDAGISel::OPC_CheckChild2CondCode:
+ Result = !::CheckChild2CondCode(Table, Index, N);
+ return Index;
case SelectionDAGISel::OPC_CheckValueType:
Result = !::CheckValueType(Table, Index, N, SDISel.TLI,
SDISel.CurDAG->getDataLayout());
@@ -2970,7 +2785,9 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
CurDAG->RemoveDeadNode(NodeToMatch);
return;
case ISD::INLINEASM:
- Select_INLINEASM(NodeToMatch);
+ case ISD::INLINEASM_BR:
+ Select_INLINEASM(NodeToMatch,
+ NodeToMatch->getOpcode() == ISD::INLINEASM_BR);
return;
case ISD::READ_REGISTER:
Select_READ_REGISTER(NodeToMatch);
@@ -3328,6 +3145,9 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
case OPC_CheckCondCode:
if (!::CheckCondCode(MatcherTable, MatcherIndex, N)) break;
continue;
+ case OPC_CheckChild2CondCode:
+ if (!::CheckChild2CondCode(MatcherTable, MatcherIndex, N)) break;
+ continue;
case OPC_CheckValueType:
if (!::CheckValueType(MatcherTable, MatcherIndex, N, TLI,
CurDAG->getDataLayout()))
@@ -3348,6 +3168,12 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
case OPC_CheckOrImm:
if (!::CheckOrImm(MatcherTable, MatcherIndex, N, *this)) break;
continue;
+ case OPC_CheckImmAllOnesV:
+ if (!ISD::isBuildVectorAllOnes(N.getNode())) break;
+ continue;
+ case OPC_CheckImmAllZerosV:
+ if (!ISD::isBuildVectorAllZeros(N.getNode())) break;
+ continue;
case OPC_CheckFoldableChainNode: {
assert(NodeStack.size() != 1 && "No parent node");
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
index 3b19bff4743d..cdc09d59f6a4 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -1,9 +1,8 @@
//===-- SelectionDAGPrinter.cpp - Implement SelectionDAG::viewGraph() -----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGTargetInfo.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGTargetInfo.cpp
index 3a283bc5fdc0..3a2df6f60593 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGTargetInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGTargetInfo.cpp
@@ -1,9 +1,8 @@
//===- SelectionDAGTargetInfo.cpp - SelectionDAG Info ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index 90a1b350fc94..395e9a8a4fc5 100644
--- a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -1,9 +1,8 @@
//===- StatepointLowering.cpp - SDAGBuilder's statepoint code -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -348,16 +347,28 @@ static std::pair<SDValue, SDNode *> lowerCallFromStatepointLoweringInfo(
return std::make_pair(ReturnValue, CallEnd->getOperand(0).getNode());
}
+static MachineMemOperand* getMachineMemOperand(MachineFunction &MF,
+ FrameIndexSDNode &FI) {
+ auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FI.getIndex());
+ auto MMOFlags = MachineMemOperand::MOStore |
+ MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile;
+ auto &MFI = MF.getFrameInfo();
+ return MF.getMachineMemOperand(PtrInfo, MMOFlags,
+ MFI.getObjectSize(FI.getIndex()),
+ MFI.getObjectAlignment(FI.getIndex()));
+}
+
/// Spill a value incoming to the statepoint. It might be either part of
/// vmstate
/// or gcstate. In both cases unconditionally spill it on the stack unless it
/// is a null constant. Return pair with first element being frame index
/// containing saved value and second element with outgoing chain from the
/// emitted store
-static std::pair<SDValue, SDValue>
+static std::tuple<SDValue, SDValue, MachineMemOperand*>
spillIncomingStatepointValue(SDValue Incoming, SDValue Chain,
SelectionDAGBuilder &Builder) {
SDValue Loc = Builder.StatepointLowering.getLocation(Incoming);
+ MachineMemOperand* MMO = nullptr;
// Emit new store if we didn't do it for this ptr before
if (!Loc.getNode()) {
@@ -367,10 +378,6 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain,
// We use TargetFrameIndex so that isel will not select it into LEA
Loc = Builder.DAG.getTargetFrameIndex(Index, Builder.getFrameIndexTy());
- // TODO: We can create TokenFactor node instead of
- // chaining stores one after another, this may allow
- // a bit more optimal scheduling for them
-
#ifndef NDEBUG
// Right now we always allocate spill slots that are of the same
// size as the value we're about to spill (the size of spillee can
@@ -382,15 +389,18 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain,
"Bad spill: stack slot does not match!");
#endif
+ auto &MF = Builder.DAG.getMachineFunction();
+ auto PtrInfo = MachinePointerInfo::getFixedStack(MF, Index);
Chain = Builder.DAG.getStore(Chain, Builder.getCurSDLoc(), Incoming, Loc,
- MachinePointerInfo::getFixedStack(
- Builder.DAG.getMachineFunction(), Index));
+ PtrInfo);
+ MMO = getMachineMemOperand(MF, *cast<FrameIndexSDNode>(Loc));
+
Builder.StatepointLowering.setLocation(Incoming, Loc);
}
assert(Loc.getNode());
- return std::make_pair(Loc, Chain);
+ return std::make_tuple(Loc, Chain, MMO);
}
/// Lower a single value incoming to a statepoint node. This value can be
@@ -398,7 +408,11 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain,
/// case constants and allocas, then fall back to spilling if required.
static void lowerIncomingStatepointValue(SDValue Incoming, bool LiveInOnly,
SmallVectorImpl<SDValue> &Ops,
+ SmallVectorImpl<MachineMemOperand*> &MemRefs,
SelectionDAGBuilder &Builder) {
+ // Note: We know all of these spills are independent, but don't bother to
+ // exploit that chain wise. DAGCombine will happily do so as needed, so
+ // doing it here would be a small compile time win at most.
SDValue Chain = Builder.getRoot();
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Incoming)) {
@@ -417,6 +431,11 @@ static void lowerIncomingStatepointValue(SDValue Incoming, bool LiveInOnly,
"Incoming value is a frame index!");
Ops.push_back(Builder.DAG.getTargetFrameIndex(FI->getIndex(),
Builder.getFrameIndexTy()));
+
+ auto &MF = Builder.DAG.getMachineFunction();
+ auto *MMO = getMachineMemOperand(MF, *FI);
+ MemRefs.push_back(MMO);
+
} else if (LiveInOnly) {
// If this value is live in (not live-on-return, or live-through), we can
// treat it the same way patchpoint treats it's "live in" values. We'll
@@ -433,8 +452,10 @@ static void lowerIncomingStatepointValue(SDValue Incoming, bool LiveInOnly,
// need to be optional since it requires a lot of complexity on the
// runtime side which not all would support.
auto Res = spillIncomingStatepointValue(Incoming, Chain, Builder);
- Ops.push_back(Res.first);
- Chain = Res.second;
+ Ops.push_back(std::get<0>(Res));
+ if (auto *MMO = std::get<2>(Res))
+ MemRefs.push_back(MMO);
+ Chain = std::get<1>(Res);;
}
Builder.DAG.setRoot(Chain);
@@ -449,7 +470,7 @@ static void lowerIncomingStatepointValue(SDValue Incoming, bool LiveInOnly,
/// will be set to the last value spilled (if any were).
static void
lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
- SelectionDAGBuilder::StatepointLoweringInfo &SI,
+ SmallVectorImpl<MachineMemOperand*> &MemRefs, SelectionDAGBuilder::StatepointLoweringInfo &SI,
SelectionDAGBuilder &Builder) {
// Lower the deopt and gc arguments for this statepoint. Layout will be:
// deopt argument length, deopt arguments.., gc arguments...
@@ -533,7 +554,7 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
if (!Incoming.getNode())
Incoming = Builder.getValue(V);
const bool LiveInValue = LiveInDeopt && !isGCValue(V);
- lowerIncomingStatepointValue(Incoming, LiveInValue, Ops, Builder);
+ lowerIncomingStatepointValue(Incoming, LiveInValue, Ops, MemRefs, Builder);
}
// Finally, go ahead and lower all the gc arguments. There's no prefixed
@@ -544,11 +565,11 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
for (unsigned i = 0; i < SI.Bases.size(); ++i) {
const Value *Base = SI.Bases[i];
lowerIncomingStatepointValue(Builder.getValue(Base), /*LiveInOnly*/ false,
- Ops, Builder);
+ Ops, MemRefs, Builder);
const Value *Ptr = SI.Ptrs[i];
lowerIncomingStatepointValue(Builder.getValue(Ptr), /*LiveInOnly*/ false,
- Ops, Builder);
+ Ops, MemRefs, Builder);
}
// If there are any explicit spill slots passed to the statepoint, record
@@ -564,6 +585,10 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
"Incoming value is a frame index!");
Ops.push_back(Builder.DAG.getTargetFrameIndex(FI->getIndex(),
Builder.getFrameIndexTy()));
+
+ auto &MF = Builder.DAG.getMachineFunction();
+ auto *MMO = getMachineMemOperand(MF, *FI);
+ MemRefs.push_back(MMO);
}
}
@@ -630,7 +655,8 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
// Lower statepoint vmstate and gcstate arguments
SmallVector<SDValue, 10> LoweredMetaArgs;
- lowerStatepointMetaArgs(LoweredMetaArgs, SI, *this);
+ SmallVector<MachineMemOperand*, 16> MemRefs;
+ lowerStatepointMetaArgs(LoweredMetaArgs, MemRefs, SI, *this);
// Now that we've emitted the spills, we need to update the root so that the
// call sequence is ordered correctly.
@@ -746,8 +772,9 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
// input. This allows someone else to chain off us as needed.
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
- SDNode *StatepointMCNode =
- DAG.getMachineNode(TargetOpcode::STATEPOINT, getCurSDLoc(), NodeTys, Ops);
+ MachineSDNode *StatepointMCNode =
+ DAG.getMachineNode(TargetOpcode::STATEPOINT, getCurSDLoc(), NodeTys, Ops);
+ DAG.setNodeMemRefs(StatepointMCNode, MemRefs);
SDNode *SinkNode = StatepointMCNode;
@@ -799,7 +826,7 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
void
SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP,
const BasicBlock *EHPadBB /*= nullptr*/) {
- assert(ISP.getCallSite().getCallingConv() != CallingConv::AnyReg &&
+ assert(ISP.getCall()->getCallingConv() != CallingConv::AnyReg &&
"anyregcc is not supported on statepoints!");
#ifndef NDEBUG
@@ -832,7 +859,7 @@ SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP,
}
StatepointLoweringInfo SI(DAG);
- populateCallLoweringInfo(SI.CLI, ISP.getCallSite(),
+ populateCallLoweringInfo(SI.CLI, ISP.getCall(),
ImmutableStatepoint::CallArgsBeginPos,
ISP.getNumCallArgs(), ActualCallee,
ISP.getActualReturnType(), false /* IsPatchPoint */);
@@ -859,7 +886,7 @@ SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP,
const GCResultInst *GCResult = ISP.getGCResult();
Type *RetTy = ISP.getActualReturnType();
if (!RetTy->isVoidTy() && GCResult) {
- if (GCResult->getParent() != ISP.getCallSite().getParent()) {
+ if (GCResult->getParent() != ISP.getCall()->getParent()) {
// Result value will be used in a different basic block so we need to
// export it now. Default exporting mechanism will not work here because
// statepoint call has a different type than the actual call. It means
@@ -871,7 +898,7 @@ SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP,
unsigned Reg = FuncInfo.CreateRegs(RetTy);
RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
DAG.getDataLayout(), Reg, RetTy,
- ISP.getCallSite().getCallingConv());
+ ISP.getCall()->getCallingConv());
SDValue Chain = DAG.getEntryNode();
RFV.getCopyToRegs(ReturnValue, DAG, getCurSDLoc(), Chain, nullptr);
@@ -891,22 +918,22 @@ SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP,
}
void SelectionDAGBuilder::LowerCallSiteWithDeoptBundleImpl(
- ImmutableCallSite CS, SDValue Callee, const BasicBlock *EHPadBB,
+ const CallBase *Call, SDValue Callee, const BasicBlock *EHPadBB,
bool VarArgDisallowed, bool ForceVoidReturnTy) {
StatepointLoweringInfo SI(DAG);
- unsigned ArgBeginIndex = CS.arg_begin() - CS.getInstruction()->op_begin();
+ unsigned ArgBeginIndex = Call->arg_begin() - Call->op_begin();
populateCallLoweringInfo(
- SI.CLI, CS, ArgBeginIndex, CS.getNumArgOperands(), Callee,
- ForceVoidReturnTy ? Type::getVoidTy(*DAG.getContext()) : CS.getType(),
+ SI.CLI, Call, ArgBeginIndex, Call->getNumArgOperands(), Callee,
+ ForceVoidReturnTy ? Type::getVoidTy(*DAG.getContext()) : Call->getType(),
false);
if (!VarArgDisallowed)
- SI.CLI.IsVarArg = CS.getFunctionType()->isVarArg();
+ SI.CLI.IsVarArg = Call->getFunctionType()->isVarArg();
- auto DeoptBundle = *CS.getOperandBundle(LLVMContext::OB_deopt);
+ auto DeoptBundle = *Call->getOperandBundle(LLVMContext::OB_deopt);
unsigned DefaultID = StatepointDirectives::DeoptBundleStatepointID;
- auto SD = parseStatepointDirectivesFromAttrs(CS.getAttributes());
+ auto SD = parseStatepointDirectivesFromAttrs(Call->getAttributes());
SI.ID = SD.StatepointID.getValueOr(DefaultID);
SI.NumPatchBytes = SD.NumPatchBytes.getValueOr(0);
@@ -918,15 +945,14 @@ void SelectionDAGBuilder::LowerCallSiteWithDeoptBundleImpl(
// NB! The GC arguments are deliberately left empty.
if (SDValue ReturnVal = LowerAsSTATEPOINT(SI)) {
- const Instruction *Inst = CS.getInstruction();
- ReturnVal = lowerRangeToAssertZExt(DAG, *Inst, ReturnVal);
- setValue(Inst, ReturnVal);
+ ReturnVal = lowerRangeToAssertZExt(DAG, *Call, ReturnVal);
+ setValue(Call, ReturnVal);
}
}
void SelectionDAGBuilder::LowerCallSiteWithDeoptBundle(
- ImmutableCallSite CS, SDValue Callee, const BasicBlock *EHPadBB) {
- LowerCallSiteWithDeoptBundleImpl(CS, Callee, EHPadBB,
+ const CallBase *Call, SDValue Callee, const BasicBlock *EHPadBB) {
+ LowerCallSiteWithDeoptBundleImpl(Call, Callee, EHPadBB,
/* VarArgDisallowed = */ false,
/* ForceVoidReturnTy = */ false);
}
@@ -986,11 +1012,11 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) {
}
SDValue SpillSlot =
- DAG.getTargetFrameIndex(*DerivedPtrLocation, getFrameIndexTy());
+ DAG.getTargetFrameIndex(*DerivedPtrLocation, getFrameIndexTy());
- // Be conservative: flush all pending loads
- // TODO: Probably we can be less restrictive on this,
- // it may allow more scheduling opportunities.
+ // Note: We know all of these reloads are independent, but don't bother to
+ // exploit that chain wise. DAGCombine will happily do so as needed, so
+ // doing it here would be a small compile time win at most.
SDValue Chain = getRoot();
SDValue SpillLoad =
@@ -1000,7 +1026,6 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) {
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(),
*DerivedPtrLocation));
- // Again, be conservative, don't emit pending loads
DAG.setRoot(SpillLoad.getValue(1));
assert(SpillLoad.getNode());
diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.h b/lib/CodeGen/SelectionDAG/StatepointLowering.h
index 372c82a359f6..70507932681d 100644
--- a/lib/CodeGen/SelectionDAG/StatepointLowering.h
+++ b/lib/CodeGen/SelectionDAG/StatepointLowering.h
@@ -1,9 +1,8 @@
//===- StatepointLowering.h - SDAGBuilder's statepoint code ---*- C++ -*---===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -67,13 +66,18 @@ public:
/// before the next statepoint. If we don't see it, we'll report
/// an assertion.
void scheduleRelocCall(const CallInst &RelocCall) {
- PendingGCRelocateCalls.push_back(&RelocCall);
+ // We are not interested in lowering dead instructions.
+ if (!RelocCall.use_empty())
+ PendingGCRelocateCalls.push_back(&RelocCall);
}
/// Remove this gc_relocate from the list we're expecting to see
/// before the next statepoint. If we weren't expecting to see
/// it, we'll report an assertion.
void relocCallVisited(const CallInst &RelocCall) {
+ // We are not interested in lowering dead instructions.
+ if (RelocCall.use_empty())
+ return;
auto I = llvm::find(PendingGCRelocateCalls, &RelocCall);
assert(I != PendingGCRelocateCalls.end() &&
"Visited unexpected gcrelocate call");
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index a2f05c1e3cef..b260cd91d468 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -1,9 +1,8 @@
//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -100,19 +99,22 @@ bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,
/// Set CallLoweringInfo attribute flags based on a call instruction
/// and called function attributes.
-void TargetLoweringBase::ArgListEntry::setAttributes(ImmutableCallSite *CS,
+void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,
unsigned ArgIdx) {
- IsSExt = CS->paramHasAttr(ArgIdx, Attribute::SExt);
- IsZExt = CS->paramHasAttr(ArgIdx, Attribute::ZExt);
- IsInReg = CS->paramHasAttr(ArgIdx, Attribute::InReg);
- IsSRet = CS->paramHasAttr(ArgIdx, Attribute::StructRet);
- IsNest = CS->paramHasAttr(ArgIdx, Attribute::Nest);
- IsByVal = CS->paramHasAttr(ArgIdx, Attribute::ByVal);
- IsInAlloca = CS->paramHasAttr(ArgIdx, Attribute::InAlloca);
- IsReturned = CS->paramHasAttr(ArgIdx, Attribute::Returned);
- IsSwiftSelf = CS->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
- IsSwiftError = CS->paramHasAttr(ArgIdx, Attribute::SwiftError);
- Alignment = CS->getParamAlignment(ArgIdx);
+ IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
+ IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
+ IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
+ IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
+ IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
+ IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
+ IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
+ IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
+ IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
+ IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
+ Alignment = Call->getParamAlignment(ArgIdx);
+ ByValType = nullptr;
+ if (Call->paramHasAttr(ArgIdx, Attribute::ByVal))
+ ByValType = Call->getParamByValType(ArgIdx);
}
/// Generate a libcall taking the given operands as arguments and returning a
@@ -121,7 +123,8 @@ std::pair<SDValue, SDValue>
TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
ArrayRef<SDValue> Ops, bool isSigned,
const SDLoc &dl, bool doesNotReturn,
- bool isReturnValueUsed) const {
+ bool isReturnValueUsed,
+ bool isPostTypeLegalization) const {
TargetLowering::ArgListTy Args;
Args.reserve(Ops.size());
@@ -147,11 +150,114 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
.setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
.setNoReturn(doesNotReturn)
.setDiscardResult(!isReturnValueUsed)
+ .setIsPostTypeLegalization(isPostTypeLegalization)
.setSExtResult(signExtend)
.setZExtResult(!signExtend);
return LowerCallTo(CLI);
}
+bool
+TargetLowering::findOptimalMemOpLowering(std::vector<EVT> &MemOps,
+ unsigned Limit, uint64_t Size,
+ unsigned DstAlign, unsigned SrcAlign,
+ bool IsMemset,
+ bool ZeroMemset,
+ bool MemcpyStrSrc,
+ bool AllowOverlap,
+ unsigned DstAS, unsigned SrcAS,
+ const AttributeList &FuncAttributes) const {
+ // If 'SrcAlign' is zero, that means the memory operation does not need to
+ // load the value, i.e. memset or memcpy from constant string. Otherwise,
+ // it's the inferred alignment of the source. 'DstAlign', on the other hand,
+ // is the specified alignment of the memory operation. If it is zero, that
+ // means it's possible to change the alignment of the destination.
+ // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does
+ // not need to be loaded.
+ if (!(SrcAlign == 0 || SrcAlign >= DstAlign))
+ return false;
+
+ EVT VT = getOptimalMemOpType(Size, DstAlign, SrcAlign,
+ IsMemset, ZeroMemset, MemcpyStrSrc,
+ FuncAttributes);
+
+ if (VT == MVT::Other) {
+ // Use the largest integer type whose alignment constraints are satisfied.
+ // We only need to check DstAlign here as SrcAlign is always greater or
+ // equal to DstAlign (or zero).
+ VT = MVT::i64;
+ while (DstAlign && DstAlign < VT.getSizeInBits() / 8 &&
+ !allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign))
+ VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
+ assert(VT.isInteger());
+
+ // Find the largest legal integer type.
+ MVT LVT = MVT::i64;
+ while (!isTypeLegal(LVT))
+ LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
+ assert(LVT.isInteger());
+
+ // If the type we've chosen is larger than the largest legal integer type
+ // then use that instead.
+ if (VT.bitsGT(LVT))
+ VT = LVT;
+ }
+
+ unsigned NumMemOps = 0;
+ while (Size != 0) {
+ unsigned VTSize = VT.getSizeInBits() / 8;
+ while (VTSize > Size) {
+ // For now, only use non-vector load / store's for the left-over pieces.
+ EVT NewVT = VT;
+ unsigned NewVTSize;
+
+ bool Found = false;
+ if (VT.isVector() || VT.isFloatingPoint()) {
+ NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
+ if (isOperationLegalOrCustom(ISD::STORE, NewVT) &&
+ isSafeMemOpType(NewVT.getSimpleVT()))
+ Found = true;
+ else if (NewVT == MVT::i64 &&
+ isOperationLegalOrCustom(ISD::STORE, MVT::f64) &&
+ isSafeMemOpType(MVT::f64)) {
+ // i64 is usually not legal on 32-bit targets, but f64 may be.
+ NewVT = MVT::f64;
+ Found = true;
+ }
+ }
+
+ if (!Found) {
+ do {
+ NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
+ if (NewVT == MVT::i8)
+ break;
+ } while (!isSafeMemOpType(NewVT.getSimpleVT()));
+ }
+ NewVTSize = NewVT.getSizeInBits() / 8;
+
+ // If the new VT cannot cover all of the remaining bits, then consider
+ // issuing a (or a pair of) unaligned and overlapping load / store.
+ bool Fast;
+ if (NumMemOps && AllowOverlap && NewVTSize < Size &&
+ allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign,
+ MachineMemOperand::MONone, &Fast) &&
+ Fast)
+ VTSize = Size;
+ else {
+ VT = NewVT;
+ VTSize = NewVTSize;
+ }
+ }
+
+ if (++NumMemOps > Limit)
+ return false;
+
+ MemOps.push_back(VT);
+ Size -= VTSize;
+ }
+
+ return true;
+}
+
/// Soften the operands of a comparison. This code is shared among BR_CC,
/// SELECT_CC, and SETCC handlers.
void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
@@ -346,7 +452,6 @@ TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
/// return true.
bool TargetLowering::ShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
TargetLoweringOpt &TLO) const {
- SelectionDAG &DAG = TLO.DAG;
SDLoc DL(Op);
unsigned Opcode = Op.getOpcode();
@@ -372,8 +477,8 @@ bool TargetLowering::ShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
if (!C.isSubsetOf(Demanded)) {
EVT VT = Op.getValueType();
- SDValue NewC = DAG.getConstant(Demanded & C, DL, VT);
- SDValue NewOp = DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC);
+ SDValue NewC = TLO.DAG.getConstant(Demanded & C, DL, VT);
+ SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC);
return TLO.CombineTo(Op, NewOp);
}
@@ -487,6 +592,10 @@ bool TargetLowering::SimplifyDemandedBits(
// Don't know anything.
Known = KnownBits(BitWidth);
+ // Undef operand.
+ if (Op.isUndef())
+ return false;
+
if (Op.getOpcode() == ISD::Constant) {
// We know all of the bits for a constant!
Known.One = cast<ConstantSDNode>(Op)->getAPIntValue();
@@ -509,40 +618,116 @@ bool TargetLowering::SimplifyDemandedBits(
DemandedElts = APInt::getAllOnesValue(NumElts);
} else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
// Not demanding any bits/elts from Op.
- if (!Op.isUndef())
- return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
- return false;
+ return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
} else if (Depth == 6) { // Limit search depth.
return false;
}
KnownBits Known2, KnownOut;
switch (Op.getOpcode()) {
+ case ISD::SCALAR_TO_VECTOR: {
+ if (!DemandedElts[0])
+ return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
+
+ KnownBits SrcKnown;
+ SDValue Src = Op.getOperand(0);
+ unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
+ APInt SrcDemandedBits = DemandedBits.zextOrSelf(SrcBitWidth);
+ if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
+ return true;
+ Known = SrcKnown.zextOrTrunc(BitWidth, false);
+ break;
+ }
case ISD::BUILD_VECTOR:
- // Collect the known bits that are shared by every constant vector element.
- Known.Zero.setAllBits(); Known.One.setAllBits();
- for (SDValue SrcOp : Op->ops()) {
- if (!isa<ConstantSDNode>(SrcOp)) {
- // We can only handle all constant values - bail out with no known bits.
- Known = KnownBits(BitWidth);
- return false;
- }
- Known2.One = cast<ConstantSDNode>(SrcOp)->getAPIntValue();
- Known2.Zero = ~Known2.One;
-
- // BUILD_VECTOR can implicitly truncate sources, we must handle this.
- if (Known2.One.getBitWidth() != BitWidth) {
- assert(Known2.getBitWidth() > BitWidth &&
- "Expected BUILD_VECTOR implicit truncation");
- Known2 = Known2.trunc(BitWidth);
+ // Collect the known bits that are shared by every demanded element.
+ // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
+ Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
+ return false; // Don't fall through, will infinitely loop.
+ case ISD::LOAD: {
+ LoadSDNode *LD = cast<LoadSDNode>(Op);
+ if (getTargetConstantFromLoad(LD)) {
+ Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
+ return false; // Don't fall through, will infinitely loop.
+ }
+ break;
+ }
+ case ISD::INSERT_VECTOR_ELT: {
+ SDValue Vec = Op.getOperand(0);
+ SDValue Scl = Op.getOperand(1);
+ auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
+ EVT VecVT = Vec.getValueType();
+
+ // If index isn't constant, assume we need all vector elements AND the
+ // inserted element.
+ APInt DemandedVecElts(DemandedElts);
+ if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
+ unsigned Idx = CIdx->getZExtValue();
+ DemandedVecElts.clearBit(Idx);
+
+ // Inserted element is not required.
+ if (!DemandedElts[Idx])
+ return TLO.CombineTo(Op, Vec);
+ }
+
+ KnownBits KnownScl;
+ unsigned NumSclBits = Scl.getScalarValueSizeInBits();
+ APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
+ if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
+ return true;
+
+ Known = KnownScl.zextOrTrunc(BitWidth, false);
+
+ KnownBits KnownVec;
+ if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
+ Depth + 1))
+ return true;
+
+ if (!!DemandedVecElts) {
+ Known.One &= KnownVec.One;
+ Known.Zero &= KnownVec.Zero;
+ }
+
+ return false;
+ }
+ case ISD::INSERT_SUBVECTOR: {
+ SDValue Base = Op.getOperand(0);
+ SDValue Sub = Op.getOperand(1);
+ EVT SubVT = Sub.getValueType();
+ unsigned NumSubElts = SubVT.getVectorNumElements();
+
+ // If index isn't constant, assume we need the original demanded base
+ // elements and ALL the inserted subvector elements.
+ APInt BaseElts = DemandedElts;
+ APInt SubElts = APInt::getAllOnesValue(NumSubElts);
+ if (isa<ConstantSDNode>(Op.getOperand(2))) {
+ const APInt &Idx = Op.getConstantOperandAPInt(2);
+ if (Idx.ule(NumElts - NumSubElts)) {
+ unsigned SubIdx = Idx.getZExtValue();
+ SubElts = DemandedElts.extractBits(NumSubElts, SubIdx);
+ BaseElts.insertBits(APInt::getNullValue(NumSubElts), SubIdx);
}
+ }
- // Known bits are the values that are shared by every element.
- // TODO: support per-element known bits.
- Known.One &= Known2.One;
- Known.Zero &= Known2.Zero;
+ KnownBits KnownSub, KnownBase;
+ if (SimplifyDemandedBits(Sub, DemandedBits, SubElts, KnownSub, TLO,
+ Depth + 1))
+ return true;
+ if (SimplifyDemandedBits(Base, DemandedBits, BaseElts, KnownBase, TLO,
+ Depth + 1))
+ return true;
+
+ Known.Zero.setAllBits();
+ Known.One.setAllBits();
+ if (!!SubElts) {
+ Known.One &= KnownSub.One;
+ Known.Zero &= KnownSub.Zero;
}
- return false; // Don't fall through, will infinitely loop.
+ if (!!BaseElts) {
+ Known.One &= KnownBase.One;
+ Known.Zero &= KnownBase.Zero;
+ }
+ break;
+ }
case ISD::CONCAT_VECTORS: {
Known.Zero.setAllBits();
Known.One.setAllBits();
@@ -640,11 +825,12 @@ bool TargetLowering::SimplifyDemandedBits(
}
}
- if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO, Depth + 1))
+ if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
+ Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
- if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts, Known2, TLO,
- Depth + 1))
+ if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
+ Known2, TLO, Depth + 1))
return true;
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
@@ -674,11 +860,12 @@ bool TargetLowering::SimplifyDemandedBits(
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
- if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO, Depth + 1))
+ if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
+ Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
- if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts, Known2, TLO,
- Depth + 1))
+ if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
+ Known2, TLO, Depth + 1))
return true;
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
@@ -705,10 +892,12 @@ bool TargetLowering::SimplifyDemandedBits(
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
- if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO, Depth + 1))
+ if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
+ Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
- if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO, Depth + 1))
+ if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
+ Depth + 1))
return true;
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
@@ -831,20 +1020,23 @@ bool TargetLowering::SimplifyDemandedBits(
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
- if (ConstantSDNode *SA = isConstOrConstSplat(Op1)) {
+ if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
// If the shift count is an invalid immediate, don't do anything.
if (SA->getAPIntValue().uge(BitWidth))
break;
unsigned ShAmt = SA->getZExtValue();
+ if (ShAmt == 0)
+ return TLO.CombineTo(Op, Op0);
// If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
// single shift. We can do this if the bottom bits (which are shifted
// out) are never demanded.
+ // TODO - support non-uniform vector amounts.
if (Op0.getOpcode() == ISD::SRL) {
- if (ShAmt &&
- (DemandedBits & APInt::getLowBitsSet(BitWidth, ShAmt)) == 0) {
- if (ConstantSDNode *SA2 = isConstOrConstSplat(Op0.getOperand(1))) {
+ if ((DemandedBits & APInt::getLowBitsSet(BitWidth, ShAmt)) == 0) {
+ if (ConstantSDNode *SA2 =
+ isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
if (SA2->getAPIntValue().ult(BitWidth)) {
unsigned C1 = SA2->getZExtValue();
unsigned Opc = ISD::SHL;
@@ -862,8 +1054,14 @@ bool TargetLowering::SimplifyDemandedBits(
}
}
- if (SimplifyDemandedBits(Op0, DemandedBits.lshr(ShAmt), DemandedElts, Known, TLO,
- Depth + 1))
+ if (SimplifyDemandedBits(Op0, DemandedBits.lshr(ShAmt), DemandedElts,
+ Known, TLO, Depth + 1))
+ return true;
+
+ // Try shrinking the operation as long as the shift amount will still be
+ // in range.
+ if ((ShAmt < DemandedBits.getActiveBits()) &&
+ ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
return true;
// Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
@@ -919,12 +1117,16 @@ bool TargetLowering::SimplifyDemandedBits(
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
- if (ConstantSDNode *SA = isConstOrConstSplat(Op1)) {
+ if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
// If the shift count is an invalid immediate, don't do anything.
if (SA->getAPIntValue().uge(BitWidth))
break;
unsigned ShAmt = SA->getZExtValue();
+ if (ShAmt == 0)
+ return TLO.CombineTo(Op, Op0);
+
+ EVT ShiftVT = Op1.getValueType();
APInt InDemandedMask = (DemandedBits << ShAmt);
// If the shift is exact, then it does demand the low bits (and knows that
@@ -935,10 +1137,11 @@ bool TargetLowering::SimplifyDemandedBits(
// If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
// single shift. We can do this if the top bits (which are shifted out)
// are never demanded.
+ // TODO - support non-uniform vector amounts.
if (Op0.getOpcode() == ISD::SHL) {
- if (ConstantSDNode *SA2 = isConstOrConstSplat(Op0.getOperand(1))) {
- if (ShAmt &&
- (DemandedBits & APInt::getHighBitsSet(BitWidth, ShAmt)) == 0) {
+ if (ConstantSDNode *SA2 =
+ isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
+ if ((DemandedBits & APInt::getHighBitsSet(BitWidth, ShAmt)) == 0) {
if (SA2->getAPIntValue().ult(BitWidth)) {
unsigned C1 = SA2->getZExtValue();
unsigned Opc = ISD::SRL;
@@ -948,7 +1151,7 @@ bool TargetLowering::SimplifyDemandedBits(
Opc = ISD::SHL;
}
- SDValue NewSA = TLO.DAG.getConstant(Diff, dl, Op1.getValueType());
+ SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
return TLO.CombineTo(
Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
}
@@ -957,7 +1160,8 @@ bool TargetLowering::SimplifyDemandedBits(
}
// Compute the new bits that are at the top now.
- if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO, Depth + 1))
+ if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
+ Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
Known.Zero.lshrInPlace(ShAmt);
@@ -978,12 +1182,15 @@ bool TargetLowering::SimplifyDemandedBits(
if (DemandedBits.isOneValue())
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
- if (ConstantSDNode *SA = isConstOrConstSplat(Op1)) {
+ if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
// If the shift count is an invalid immediate, don't do anything.
if (SA->getAPIntValue().uge(BitWidth))
break;
unsigned ShAmt = SA->getZExtValue();
+ if (ShAmt == 0)
+ return TLO.CombineTo(Op, Op0);
+
APInt InDemandedMask = (DemandedBits << ShAmt);
// If the shift is exact, then it does demand the low bits (and knows that
@@ -996,7 +1203,8 @@ bool TargetLowering::SimplifyDemandedBits(
if (DemandedBits.countLeadingZeros() < ShAmt)
InDemandedMask.setSignBit();
- if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO, Depth + 1))
+ if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
+ Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
Known.Zero.lshrInPlace(ShAmt);
@@ -1026,6 +1234,55 @@ bool TargetLowering::SimplifyDemandedBits(
}
break;
}
+ case ISD::FSHL:
+ case ISD::FSHR: {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ SDValue Op2 = Op.getOperand(2);
+ bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
+
+ if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
+ unsigned Amt = SA->getAPIntValue().urem(BitWidth);
+
+ // For fshl, 0-shift returns the 1st arg.
+ // For fshr, 0-shift returns the 2nd arg.
+ if (Amt == 0) {
+ if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
+ Known, TLO, Depth + 1))
+ return true;
+ break;
+ }
+
+ // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
+ // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
+ APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
+ APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
+ if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
+ Depth + 1))
+ return true;
+ if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
+ Depth + 1))
+ return true;
+
+ Known2.One <<= (IsFSHL ? Amt : (BitWidth - Amt));
+ Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt));
+ Known.One.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
+ Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
+ Known.One |= Known2.One;
+ Known.Zero |= Known2.Zero;
+ }
+ break;
+ }
+ case ISD::BITREVERSE: {
+ SDValue Src = Op.getOperand(0);
+ APInt DemandedSrcBits = DemandedBits.reverseBits();
+ if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
+ Depth + 1))
+ return true;
+ Known.One = Known2.One.reverseBits();
+ Known.Zero = Known2.Zero.reverseBits();
+ break;
+ }
case ISD::SIGN_EXTEND_INREG: {
SDValue Op0 = Op.getOperand(0);
EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
@@ -1033,8 +1290,8 @@ bool TargetLowering::SimplifyDemandedBits(
// If we only care about the highest bit, don't bother shifting right.
if (DemandedBits.isSignMask()) {
- bool AlreadySignExtended =
- TLO.DAG.ComputeNumSignBits(Op0) >= BitWidth - ExVTBits + 1;
+ unsigned NumSignBits = TLO.DAG.ComputeNumSignBits(Op0);
+ bool AlreadySignExtended = NumSignBits >= BitWidth - ExVTBits + 1;
// However if the input is already sign extended we expect the sign
// extension to be dropped altogether later and do not simplify.
if (!AlreadySignExtended) {
@@ -1099,79 +1356,116 @@ bool TargetLowering::SimplifyDemandedBits(
return true;
Known.Zero = KnownLo.Zero.zext(BitWidth) |
- KnownHi.Zero.zext(BitWidth).shl(HalfBitWidth);
+ KnownHi.Zero.zext(BitWidth).shl(HalfBitWidth);
Known.One = KnownLo.One.zext(BitWidth) |
- KnownHi.One.zext(BitWidth).shl(HalfBitWidth);
+ KnownHi.One.zext(BitWidth).shl(HalfBitWidth);
break;
}
- case ISD::ZERO_EXTEND: {
+ case ISD::ZERO_EXTEND:
+ case ISD::ZERO_EXTEND_VECTOR_INREG: {
SDValue Src = Op.getOperand(0);
- unsigned InBits = Src.getScalarValueSizeInBits();
+ EVT SrcVT = Src.getValueType();
+ unsigned InBits = SrcVT.getScalarSizeInBits();
+ unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
+ bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
// If none of the top bits are demanded, convert this into an any_extend.
- if (DemandedBits.getActiveBits() <= InBits)
- return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, Src));
+ if (DemandedBits.getActiveBits() <= InBits) {
+ // If we only need the non-extended bits of the bottom element
+ // then we can just bitcast to the result.
+ if (IsVecInReg && DemandedElts == 1 &&
+ VT.getSizeInBits() == SrcVT.getSizeInBits() &&
+ TLO.DAG.getDataLayout().isLittleEndian())
+ return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
+
+ unsigned Opc =
+ IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
+ if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
+ return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
+ }
APInt InDemandedBits = DemandedBits.trunc(InBits);
- if (SimplifyDemandedBits(Src, InDemandedBits, Known, TLO, Depth+1))
+ APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
+ if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
+ Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
- Known = Known.zext(BitWidth);
- Known.Zero.setBitsFrom(InBits);
+ assert(Known.getBitWidth() == InBits && "Src width has changed?");
+ Known = Known.zext(BitWidth, true /* ExtendedBitsAreKnownZero */);
break;
}
- case ISD::SIGN_EXTEND: {
+ case ISD::SIGN_EXTEND:
+ case ISD::SIGN_EXTEND_VECTOR_INREG: {
SDValue Src = Op.getOperand(0);
- unsigned InBits = Src.getScalarValueSizeInBits();
+ EVT SrcVT = Src.getValueType();
+ unsigned InBits = SrcVT.getScalarSizeInBits();
+ unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
+ bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
// If none of the top bits are demanded, convert this into an any_extend.
- if (DemandedBits.getActiveBits() <= InBits)
- return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, Src));
+ if (DemandedBits.getActiveBits() <= InBits) {
+ // If we only need the non-extended bits of the bottom element
+ // then we can just bitcast to the result.
+ if (IsVecInReg && DemandedElts == 1 &&
+ VT.getSizeInBits() == SrcVT.getSizeInBits() &&
+ TLO.DAG.getDataLayout().isLittleEndian())
+ return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
+
+ unsigned Opc =
+ IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
+ if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
+ return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
+ }
+
+ APInt InDemandedBits = DemandedBits.trunc(InBits);
+ APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
// Since some of the sign extended bits are demanded, we know that the sign
// bit is demanded.
- APInt InDemandedBits = DemandedBits.trunc(InBits);
InDemandedBits.setBit(InBits - 1);
- if (SimplifyDemandedBits(Src, InDemandedBits, Known, TLO, Depth + 1))
+ if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
+ Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+ assert(Known.getBitWidth() == InBits && "Src width has changed?");
+
// If the sign bit is known one, the top bits match.
Known = Known.sext(BitWidth);
// If the sign bit is known zero, convert this to a zero extend.
- if (Known.isNonNegative())
- return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Src));
+ if (Known.isNonNegative()) {
+ unsigned Opc =
+ IsVecInReg ? ISD::ZERO_EXTEND_VECTOR_INREG : ISD::ZERO_EXTEND;
+ if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
+ return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
+ }
break;
}
- case ISD::SIGN_EXTEND_VECTOR_INREG: {
- // TODO - merge this with SIGN_EXTEND above?
+ case ISD::ANY_EXTEND:
+ case ISD::ANY_EXTEND_VECTOR_INREG: {
SDValue Src = Op.getOperand(0);
- unsigned InBits = Src.getScalarValueSizeInBits();
-
- APInt InDemandedBits = DemandedBits.trunc(InBits);
+ EVT SrcVT = Src.getValueType();
+ unsigned InBits = SrcVT.getScalarSizeInBits();
+ unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
+ bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
- // If some of the sign extended bits are demanded, we know that the sign
- // bit is demanded.
- if (InBits < DemandedBits.getActiveBits())
- InDemandedBits.setBit(InBits - 1);
+ // If we only need the bottom element then we can just bitcast.
+ // TODO: Handle ANY_EXTEND?
+ if (IsVecInReg && DemandedElts == 1 &&
+ VT.getSizeInBits() == SrcVT.getSizeInBits() &&
+ TLO.DAG.getDataLayout().isLittleEndian())
+ return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
- if (SimplifyDemandedBits(Src, InDemandedBits, Known, TLO, Depth + 1))
- return true;
- assert(!Known.hasConflict() && "Bits known to be one AND zero?");
- // If the sign bit is known one, the top bits match.
- Known = Known.sext(BitWidth);
- break;
- }
- case ISD::ANY_EXTEND: {
- SDValue Src = Op.getOperand(0);
- unsigned InBits = Src.getScalarValueSizeInBits();
APInt InDemandedBits = DemandedBits.trunc(InBits);
- if (SimplifyDemandedBits(Src, InDemandedBits, Known, TLO, Depth+1))
+ APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
+ if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
+ Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
- Known = Known.zext(BitWidth);
+ assert(Known.getBitWidth() == InBits && "Src width has changed?");
+ Known = Known.zext(BitWidth, false /* => any extend */);
break;
}
case ISD::TRUNCATE: {
@@ -1198,29 +1492,29 @@ bool TargetLowering::SimplifyDemandedBits(
// Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
// undesirable.
break;
- ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
- if (!ShAmt)
+
+ auto *ShAmt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
+ if (!ShAmt || ShAmt->getAPIntValue().uge(BitWidth))
break;
+
SDValue Shift = Src.getOperand(1);
- if (TLO.LegalTypes()) {
- uint64_t ShVal = ShAmt->getZExtValue();
+ uint64_t ShVal = ShAmt->getZExtValue();
+
+ if (TLO.LegalTypes())
Shift = TLO.DAG.getConstant(ShVal, dl, getShiftAmountTy(VT, DL));
- }
- if (ShAmt->getZExtValue() < BitWidth) {
- APInt HighBits = APInt::getHighBitsSet(OperandBitWidth,
- OperandBitWidth - BitWidth);
- HighBits.lshrInPlace(ShAmt->getZExtValue());
- HighBits = HighBits.trunc(BitWidth);
-
- if (!(HighBits & DemandedBits)) {
- // None of the shifted in bits are needed. Add a truncate of the
- // shift input, then shift it.
- SDValue NewTrunc =
- TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
- return TLO.CombineTo(
- Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, Shift));
- }
+ APInt HighBits =
+ APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
+ HighBits.lshrInPlace(ShVal);
+ HighBits = HighBits.trunc(BitWidth);
+
+ if (!(HighBits & DemandedBits)) {
+ // None of the shifted in bits are needed. Add a truncate of the
+ // shift input, then shift it.
+ SDValue NewTrunc =
+ TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
+ return TLO.CombineTo(
+ Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, Shift));
}
break;
}
@@ -1234,8 +1528,8 @@ bool TargetLowering::SimplifyDemandedBits(
// demanded by its users.
EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
APInt InMask = APInt::getLowBitsSet(BitWidth, ZVT.getSizeInBits());
- if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits,
- Known, TLO, Depth+1))
+ if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known,
+ TLO, Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
@@ -1266,7 +1560,7 @@ bool TargetLowering::SimplifyDemandedBits(
Known = Known2;
if (BitWidth > EltBitWidth)
- Known = Known.zext(BitWidth);
+ Known = Known.zext(BitWidth, false /* => any extend */);
break;
}
case ISD::BITCAST: {
@@ -1297,40 +1591,68 @@ bool TargetLowering::SimplifyDemandedBits(
TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
}
}
- // If bitcast from a vector, see if we can use SimplifyDemandedVectorElts by
- // demanding the element if any bits from it are demanded.
+
+ // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
+ // Demand the elt/bit if any of the original elts/bits are demanded.
// TODO - bigendian once we have test coverage.
// TODO - bool vectors once SimplifyDemandedVectorElts has SETCC support.
if (SrcVT.isVector() && NumSrcEltBits > 1 &&
(BitWidth % NumSrcEltBits) == 0 &&
TLO.DAG.getDataLayout().isLittleEndian()) {
unsigned Scale = BitWidth / NumSrcEltBits;
- auto GetDemandedSubMask = [&](APInt &DemandedSubElts) -> bool {
- DemandedSubElts = APInt::getNullValue(Scale);
- for (unsigned i = 0; i != Scale; ++i) {
- unsigned Offset = i * NumSrcEltBits;
- APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset);
- if (!Sub.isNullValue())
- DemandedSubElts.setBit(i);
+ unsigned NumSrcElts = SrcVT.getVectorNumElements();
+ APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
+ APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
+ for (unsigned i = 0; i != Scale; ++i) {
+ unsigned Offset = i * NumSrcEltBits;
+ APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset);
+ if (!Sub.isNullValue()) {
+ DemandedSrcBits |= Sub;
+ for (unsigned j = 0; j != NumElts; ++j)
+ if (DemandedElts[j])
+ DemandedSrcElts.setBit((j * Scale) + i);
}
+ }
+
+ APInt KnownSrcUndef, KnownSrcZero;
+ if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
+ KnownSrcZero, TLO, Depth + 1))
return true;
- };
- APInt DemandedSubElts;
- if (GetDemandedSubMask(DemandedSubElts)) {
- unsigned NumSrcElts = SrcVT.getVectorNumElements();
- APInt DemandedElts = APInt::getSplat(NumSrcElts, DemandedSubElts);
+ KnownBits KnownSrcBits;
+ if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
+ KnownSrcBits, TLO, Depth + 1))
+ return true;
+ } else if ((NumSrcEltBits % BitWidth) == 0 &&
+ TLO.DAG.getDataLayout().isLittleEndian()) {
+ unsigned Scale = NumSrcEltBits / BitWidth;
+ unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
+ APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
+ APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
+ for (unsigned i = 0; i != NumElts; ++i)
+ if (DemandedElts[i]) {
+ unsigned Offset = (i % Scale) * BitWidth;
+ DemandedSrcBits.insertBits(DemandedBits, Offset);
+ DemandedSrcElts.setBit(i / Scale);
+ }
- APInt KnownUndef, KnownZero;
- if (SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, KnownZero,
- TLO, Depth + 1))
+ if (SrcVT.isVector()) {
+ APInt KnownSrcUndef, KnownSrcZero;
+ if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
+ KnownSrcZero, TLO, Depth + 1))
return true;
}
+
+ KnownBits KnownSrcBits;
+ if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
+ KnownSrcBits, TLO, Depth + 1))
+ return true;
}
+
// If this is a bitcast, let computeKnownBits handle it. Only do this on a
// recursive call where Known may be useful to the caller.
if (Depth > 0) {
- Known = TLO.DAG.computeKnownBits(Op, Depth);
+ Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
return false;
}
break;
@@ -1343,8 +1665,10 @@ bool TargetLowering::SimplifyDemandedBits(
SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
unsigned DemandedBitsLZ = DemandedBits.countLeadingZeros();
APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
- if (SimplifyDemandedBits(Op0, LoMask, DemandedElts, Known2, TLO, Depth + 1) ||
- SimplifyDemandedBits(Op1, LoMask, DemandedElts, Known2, TLO, Depth + 1) ||
+ if (SimplifyDemandedBits(Op0, LoMask, DemandedElts, Known2, TLO,
+ Depth + 1) ||
+ SimplifyDemandedBits(Op1, LoMask, DemandedElts, Known2, TLO,
+ Depth + 1) ||
// See if the operation should be performed at a smaller bit width.
ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
SDNodeFlags Flags = Op.getNode()->getFlags();
@@ -1353,8 +1677,8 @@ bool TargetLowering::SimplifyDemandedBits(
// won't wrap after simplification.
Flags.setNoSignedWrap(false);
Flags.setNoUnsignedWrap(false);
- SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1,
- Flags);
+ SDValue NewOp =
+ TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
return TLO.CombineTo(Op, NewOp);
}
return true;
@@ -1431,15 +1755,64 @@ bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op,
DCI.AddToWorklist(Op.getNode());
DCI.CommitTargetLoweringOpt(TLO);
}
+
return Simplified;
}
+/// Given a vector binary operation and known undefined elements for each input
+/// operand, compute whether each element of the output is undefined.
+static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG,
+ const APInt &UndefOp0,
+ const APInt &UndefOp1) {
+ EVT VT = BO.getValueType();
+ assert(DAG.getTargetLoweringInfo().isBinOp(BO.getOpcode()) && VT.isVector() &&
+ "Vector binop only");
+
+ EVT EltVT = VT.getVectorElementType();
+ unsigned NumElts = VT.getVectorNumElements();
+ assert(UndefOp0.getBitWidth() == NumElts &&
+ UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
+
+ auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
+ const APInt &UndefVals) {
+ if (UndefVals[Index])
+ return DAG.getUNDEF(EltVT);
+
+ if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
+ // Try hard to make sure that the getNode() call is not creating temporary
+ // nodes. Ignore opaque integers because they do not constant fold.
+ SDValue Elt = BV->getOperand(Index);
+ auto *C = dyn_cast<ConstantSDNode>(Elt);
+ if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
+ return Elt;
+ }
+
+ return SDValue();
+ };
+
+ APInt KnownUndef = APInt::getNullValue(NumElts);
+ for (unsigned i = 0; i != NumElts; ++i) {
+ // If both inputs for this element are either constant or undef and match
+ // the element type, compute the constant/undef result for this element of
+ // the vector.
+ // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
+ // not handle FP constants. The code within getNode() should be refactored
+ // to avoid the danger of creating a bogus temporary node here.
+ SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
+ SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
+ if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
+ if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
+ KnownUndef.setBit(i);
+ }
+ return KnownUndef;
+}
+
bool TargetLowering::SimplifyDemandedVectorElts(
- SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef,
+ SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
bool AssumeSingleUse) const {
EVT VT = Op.getValueType();
- APInt DemandedElts = DemandedEltMask;
+ APInt DemandedElts = OriginalDemandedElts;
unsigned NumElts = DemandedElts.getBitWidth();
assert(VT.isVector() && "Expected vector op");
assert(VT.getVectorNumElements() == NumElts &&
@@ -1617,7 +1990,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
SDValue Sub = Op.getOperand(1);
EVT SubVT = Sub.getValueType();
unsigned NumSubElts = SubVT.getVectorNumElements();
- const APInt& Idx = cast<ConstantSDNode>(Op.getOperand(2))->getAPIntValue();
+ const APInt &Idx = Op.getConstantOperandAPInt(2);
if (Idx.ugt(NumElts - NumSubElts))
break;
unsigned SubIdx = Idx.getZExtValue();
@@ -1786,18 +2159,26 @@ bool TargetLowering::SimplifyDemandedVectorElts(
}
break;
}
+ case ISD::ANY_EXTEND_VECTOR_INREG:
case ISD::SIGN_EXTEND_VECTOR_INREG:
case ISD::ZERO_EXTEND_VECTOR_INREG: {
APInt SrcUndef, SrcZero;
SDValue Src = Op.getOperand(0);
unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts);
- if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef,
- SrcZero, TLO, Depth + 1))
+ if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
+ Depth + 1))
return true;
KnownZero = SrcZero.zextOrTrunc(NumElts);
KnownUndef = SrcUndef.zextOrTrunc(NumElts);
+ if (Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
+ Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
+ DemandedSrcElts == 1 && TLO.DAG.getDataLayout().isLittleEndian()) {
+ // aext - if we just need the bottom element then we can bitcast.
+ return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
+ }
+
if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
// zext(undef) upper bits are guaranteed to be zero.
if (DemandedElts.isSubsetOf(KnownUndef))
@@ -1806,6 +2187,9 @@ bool TargetLowering::SimplifyDemandedVectorElts(
}
break;
}
+
+ // TODO: There are more binop opcodes that could be handled here - MUL, MIN,
+ // MAX, saturated math, etc.
case ISD::OR:
case ISD::XOR:
case ISD::ADD:
@@ -1815,17 +2199,38 @@ bool TargetLowering::SimplifyDemandedVectorElts(
case ISD::FMUL:
case ISD::FDIV:
case ISD::FREM: {
- APInt SrcUndef, SrcZero;
- if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, SrcUndef,
- SrcZero, TLO, Depth + 1))
+ APInt UndefRHS, ZeroRHS;
+ if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, UndefRHS,
+ ZeroRHS, TLO, Depth + 1))
return true;
- if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
- KnownZero, TLO, Depth + 1))
+ APInt UndefLHS, ZeroLHS;
+ if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UndefLHS,
+ ZeroLHS, TLO, Depth + 1))
return true;
- KnownZero &= SrcZero;
- KnownUndef &= SrcUndef;
+
+ KnownZero = ZeroLHS & ZeroRHS;
+ KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
+ break;
+ }
+ case ISD::SHL:
+ case ISD::SRL:
+ case ISD::SRA:
+ case ISD::ROTL:
+ case ISD::ROTR: {
+ APInt UndefRHS, ZeroRHS;
+ if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, UndefRHS,
+ ZeroRHS, TLO, Depth + 1))
+ return true;
+ APInt UndefLHS, ZeroLHS;
+ if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UndefLHS,
+ ZeroLHS, TLO, Depth + 1))
+ return true;
+
+ KnownZero = ZeroLHS;
+ KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
break;
}
+ case ISD::MUL:
case ISD::AND: {
APInt SrcUndef, SrcZero;
if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, SrcUndef,
@@ -1837,6 +2242,8 @@ bool TargetLowering::SimplifyDemandedVectorElts(
// If either side has a zero element, then the result element is zero, even
// if the other is an UNDEF.
+ // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
+ // and then handle 'and' nodes with the rest of the binop opcodes.
KnownZero |= SrcZero;
KnownUndef &= SrcUndef;
KnownUndef &= ~KnownZero;
@@ -1864,8 +2271,8 @@ bool TargetLowering::SimplifyDemandedVectorElts(
} else {
KnownBits Known;
APInt DemandedBits = APInt::getAllOnesValue(EltSizeInBits);
- if (SimplifyDemandedBits(Op, DemandedBits, DemandedEltMask, Known, TLO,
- Depth, AssumeSingleUse))
+ if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
+ TLO, Depth, AssumeSingleUse))
return true;
}
break;
@@ -1950,6 +2357,10 @@ bool TargetLowering::SimplifyDemandedBitsForTargetNode(
return false;
}
+const Constant *TargetLowering::getTargetConstantFromLoad(LoadSDNode*) const {
+ return nullptr;
+}
+
bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
const SelectionDAG &DAG,
bool SNaN,
@@ -2044,10 +2455,9 @@ bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT,
/// This helper function of SimplifySetCC tries to optimize the comparison when
/// either operand of the SetCC node is a bitwise-and instruction.
-SDValue TargetLowering::simplifySetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
- ISD::CondCode Cond,
- DAGCombinerInfo &DCI,
- const SDLoc &DL) const {
+SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
+ ISD::CondCode Cond, const SDLoc &DL,
+ DAGCombinerInfo &DCI) const {
// Match these patterns in any of their permutations:
// (X & Y) == Y
// (X & Y) != Y
@@ -2200,6 +2610,49 @@ SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
return T2;
}
+/// Try to fold an equality comparison with a {add/sub/xor} binary operation as
+/// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
+/// handle the commuted versions of these patterns.
+SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
+ ISD::CondCode Cond, const SDLoc &DL,
+ DAGCombinerInfo &DCI) const {
+ unsigned BOpcode = N0.getOpcode();
+ assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
+ "Unexpected binop");
+ assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
+
+ // (X + Y) == X --> Y == 0
+ // (X - Y) == X --> Y == 0
+ // (X ^ Y) == X --> Y == 0
+ SelectionDAG &DAG = DCI.DAG;
+ EVT OpVT = N0.getValueType();
+ SDValue X = N0.getOperand(0);
+ SDValue Y = N0.getOperand(1);
+ if (X == N1)
+ return DAG.getSetCC(DL, VT, Y, DAG.getConstant(0, DL, OpVT), Cond);
+
+ if (Y != N1)
+ return SDValue();
+
+ // (X + Y) == Y --> X == 0
+ // (X ^ Y) == Y --> X == 0
+ if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
+ return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, OpVT), Cond);
+
+ // The shift would not be valid if the operands are boolean (i1).
+ if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
+ return SDValue();
+
+ // (X - Y) == Y --> X == Y << 1
+ EVT ShiftVT = getShiftAmountTy(OpVT, DAG.getDataLayout(),
+ !DCI.isBeforeLegalize());
+ SDValue One = DAG.getConstant(1, DL, ShiftVT);
+ SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One);
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(YShl1.getNode());
+ return DAG.getSetCC(DL, VT, X, YShl1, Cond);
+}
+
/// Try to simplify a setcc built with the specified operands and cc. If it is
/// unable to simplify it, return a null SDValue.
SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
@@ -2209,14 +2662,9 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
SelectionDAG &DAG = DCI.DAG;
EVT OpVT = N0.getValueType();
- // These setcc operations always fold.
- switch (Cond) {
- default: break;
- case ISD::SETFALSE:
- case ISD::SETFALSE2: return DAG.getBoolConstant(false, dl, VT, OpVT);
- case ISD::SETTRUE:
- case ISD::SETTRUE2: return DAG.getBoolConstant(true, dl, VT, OpVT);
- }
+ // Constant fold or commute setcc.
+ if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl))
+ return Fold;
// Ensure that the constant occurs on the RHS and fold constant comparisons.
// TODO: Handle non-splat vector constants. All undef causes trouble.
@@ -2226,6 +2674,17 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
+ // If we have a subtract with the same 2 non-constant operands as this setcc
+ // -- but in reverse order -- then try to commute the operands of this setcc
+ // to match. A matching pair of setcc (cmp) and sub may be combined into 1
+ // instruction on some targets.
+ if (!isConstOrConstSplat(N0) && !isConstOrConstSplat(N1) &&
+ (DCI.isBeforeLegalizeOps() ||
+ isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
+ DAG.getNodeIfExists(ISD::SUB, DAG.getVTList(OpVT), { N1, N0 } ) &&
+ !DAG.getNodeIfExists(ISD::SUB, DAG.getVTList(OpVT), { N0, N1 } ))
+ return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
+
if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
const APInt &C1 = N1C->getAPIntValue();
@@ -2235,8 +2694,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (N0.getOpcode() == ISD::SRL && (C1.isNullValue() || C1.isOneValue()) &&
N0.getOperand(0).getOpcode() == ISD::CTLZ &&
N0.getOperand(1).getOpcode() == ISD::Constant) {
- const APInt &ShAmt
- = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+ const APInt &ShAmt = N0.getConstantOperandAPInt(1);
if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
ShAmt == Log2_32(N0.getValueSizeInBits())) {
if ((C1 == 0) == (Cond == ISD::SETEQ)) {
@@ -2275,7 +2733,21 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
return DAG.getSetCC(dl, VT, And, DAG.getConstant(0, dl, CTVT), CC);
}
- // TODO: (ctpop x) == 1 -> x && (x & x-1) == 0 iff ctpop is illegal.
+ // If ctpop is not supported, expand a power-of-2 comparison based on it.
+ if (C1 == 1 && !isOperationLegalOrCustom(ISD::CTPOP, CTVT) &&
+ (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+ // (ctpop x) == 1 --> (x != 0) && ((x & x-1) == 0)
+ // (ctpop x) != 1 --> (x == 0) || ((x & x-1) != 0)
+ SDValue Zero = DAG.getConstant(0, dl, CTVT);
+ SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
+ ISD::CondCode InvCond = ISD::getSetCCInverse(Cond, true);
+ SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
+ SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
+ SDValue LHS = DAG.getSetCC(dl, VT, CTOp, Zero, InvCond);
+ SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
+ unsigned LogicOpcode = Cond == ISD::SETEQ ? ISD::AND : ISD::OR;
+ return DAG.getNode(LogicOpcode, dl, VT, LHS, RHS);
+ }
}
// (zext x) == C --> x == (trunc C)
@@ -2387,8 +2859,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// 8 bits, but have to be careful...
if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
origWidth = Lod->getMemoryVT().getSizeInBits();
- const APInt &Mask =
- cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+ const APInt &Mask = N0.getConstantOperandAPInt(1);
for (unsigned width = origWidth / 2; width>=8; width /= 2) {
APInt newMask = APInt::getLowBitsSet(maskWidth, width);
for (unsigned offset=0; offset<origWidth/width; offset++) {
@@ -2480,7 +2951,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
break;
}
default:
- break; // todo, be more careful with signed comparisons
+ break; // todo, be more careful with signed comparisons
}
} else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
(Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
@@ -2501,7 +2972,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
} else {
APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
ZextOp = DAG.getNode(ISD::AND, dl, Op0Ty, N0.getOperand(0),
- DAG.getConstant(Imm, dl, Op0Ty));
+ DAG.getConstant(Imm, dl, Op0Ty));
}
if (!DCI.isCalledByLegalizer())
DCI.AddToWorklist(ZextOp.getNode());
@@ -2598,6 +3069,18 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
}
}
+ // Given:
+ // icmp eq/ne (urem %x, %y), 0
+ // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
+ // icmp eq/ne %x, 0
+ if (N0.getOpcode() == ISD::UREM && N1C->isNullValue() &&
+ (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+ KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0));
+ KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1));
+ if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
+ return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
+ }
+
if (SDValue V =
optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
return V;
@@ -2805,25 +3288,9 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
}
}
- if (isa<ConstantFPSDNode>(N0.getNode())) {
- // Constant fold or commute setcc.
- SDValue O = DAG.FoldSetCC(VT, N0, N1, Cond, dl);
- if (O.getNode()) return O;
- } else if (auto *CFP = dyn_cast<ConstantFPSDNode>(N1.getNode())) {
- // If the RHS of an FP comparison is a constant, simplify it away in
- // some cases.
- if (CFP->getValueAPF().isNaN()) {
- // If an operand is known to be a nan, we can fold it.
- switch (ISD::getUnorderedFlavor(Cond)) {
- default: llvm_unreachable("Unknown flavor!");
- case 0: // Known false.
- return DAG.getBoolConstant(false, dl, VT, OpVT);
- case 1: // Known true.
- return DAG.getBoolConstant(true, dl, VT, OpVT);
- case 2: // Undefined.
- return DAG.getUNDEF(VT);
- }
- }
+ if (!isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1)) {
+ auto *CFP = cast<ConstantFPSDNode>(N1);
+ assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");
// Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
// constant if knowing that the operand is non-nan is enough. We prefer to
@@ -2883,15 +3350,12 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (N0 == N1) {
// The sext(setcc()) => setcc() optimization relies on the appropriate
// constant being emitted.
+ assert(!N0.getValueType().isInteger() &&
+ "Integer types should be handled by FoldSetCC");
bool EqTrue = ISD::isTrueWhenEqual(Cond);
-
- // We can always fold X == X for integer setcc's.
- if (N0.getValueType().isInteger())
- return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
-
unsigned UOF = ISD::getUnorderedFlavor(Cond);
- if (UOF == 2) // FP operators that are undefined on NaNs.
+ if (UOF == 2) // FP operators that are undefined on NaNs.
return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
if (UOF == unsigned(EqTrue))
return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
@@ -2900,7 +3364,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
if (NewCond != Cond &&
(DCI.isBeforeLegalizeOps() ||
- isCondCodeLegal(NewCond, N0.getSimpleValueType())))
+ isCondCodeLegal(NewCond, N0.getSimpleValueType())))
return DAG.getSetCC(dl, VT, N0, N1, NewCond);
}
@@ -2969,69 +3433,39 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
}
- // Simplify (X+Z) == X --> Z == 0
+ // (X+Y) == X --> Y == 0 and similar folds.
// Don't do this if X is an immediate that can fold into a cmp
- // instruction and X+Z has other uses. It could be an induction variable
+ // instruction and X+Y has other uses. It could be an induction variable
// chain, and the transform would increase register pressure.
- if (!LegalRHSImm || N0.getNode()->hasOneUse()) {
- if (N0.getOperand(0) == N1)
- return DAG.getSetCC(dl, VT, N0.getOperand(1),
- DAG.getConstant(0, dl, N0.getValueType()), Cond);
- if (N0.getOperand(1) == N1) {
- if (isCommutativeBinOp(N0.getOpcode()))
- return DAG.getSetCC(dl, VT, N0.getOperand(0),
- DAG.getConstant(0, dl, N0.getValueType()),
- Cond);
- if (N0.getNode()->hasOneUse()) {
- assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!");
- auto &DL = DAG.getDataLayout();
- // (Z-X) == X --> Z == X<<1
- SDValue SH = DAG.getNode(
- ISD::SHL, dl, N1.getValueType(), N1,
- DAG.getConstant(1, dl,
- getShiftAmountTy(N1.getValueType(), DL,
- !DCI.isBeforeLegalize())));
- if (!DCI.isCalledByLegalizer())
- DCI.AddToWorklist(SH.getNode());
- return DAG.getSetCC(dl, VT, N0.getOperand(0), SH, Cond);
- }
- }
- }
+ if (!LegalRHSImm || N0.hasOneUse())
+ if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, dl, DCI))
+ return V;
}
if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
- N1.getOpcode() == ISD::XOR) {
- // Simplify X == (X+Z) --> Z == 0
- if (N1.getOperand(0) == N0)
- return DAG.getSetCC(dl, VT, N1.getOperand(1),
- DAG.getConstant(0, dl, N1.getValueType()), Cond);
- if (N1.getOperand(1) == N0) {
- if (isCommutativeBinOp(N1.getOpcode()))
- return DAG.getSetCC(dl, VT, N1.getOperand(0),
- DAG.getConstant(0, dl, N1.getValueType()), Cond);
- if (N1.getNode()->hasOneUse()) {
- assert(N1.getOpcode() == ISD::SUB && "Unexpected operation!");
- auto &DL = DAG.getDataLayout();
- // X == (Z-X) --> X<<1 == Z
- SDValue SH = DAG.getNode(
- ISD::SHL, dl, N1.getValueType(), N0,
- DAG.getConstant(1, dl, getShiftAmountTy(N0.getValueType(), DL,
- !DCI.isBeforeLegalize())));
- if (!DCI.isCalledByLegalizer())
- DCI.AddToWorklist(SH.getNode());
- return DAG.getSetCC(dl, VT, SH, N1.getOperand(0), Cond);
- }
- }
- }
+ N1.getOpcode() == ISD::XOR)
+ if (SDValue V = foldSetCCWithBinOp(VT, N1, N0, Cond, dl, DCI))
+ return V;
- if (SDValue V = simplifySetCCWithAnd(VT, N0, N1, Cond, DCI, dl))
+ if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, dl, DCI))
return V;
}
+ // Fold remainder of division by a constant.
+ if (N0.getOpcode() == ISD::UREM && N0.hasOneUse() &&
+ (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+ AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
+
+ // When division is cheap or optimizing for minimum size,
+ // fall through to DIVREM creation by skipping this fold.
+ if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttribute(Attribute::MinSize))
+ if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
+ return Folded;
+ }
+
// Fold away ALL boolean setcc's.
- SDValue Temp;
if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
- EVT OpVT = N0.getValueType();
+ SDValue Temp;
switch (Cond) {
default: llvm_unreachable("Unknown integer setcc!");
case ISD::SETEQ: // X == Y -> ~(X^Y)
@@ -3134,18 +3568,18 @@ TargetLowering::getConstraintType(StringRef Constraint) const {
switch (Constraint[0]) {
default: break;
case 'r': return C_RegisterClass;
- case 'm': // memory
- case 'o': // offsetable
- case 'V': // not offsetable
+ case 'm': // memory
+ case 'o': // offsetable
+ case 'V': // not offsetable
return C_Memory;
- case 'i': // Simple Integer or Relocatable Constant
- case 'n': // Simple Integer
- case 'E': // Floating Point Constant
- case 'F': // Floating Point Constant
- case 's': // Relocatable Constant
- case 'p': // Address.
- case 'X': // Allow ANY value.
- case 'I': // Target registers.
+ case 'i': // Simple Integer or Relocatable Constant
+ case 'n': // Simple Integer
+ case 'E': // Floating Point Constant
+ case 'F': // Floating Point Constant
+ case 's': // Relocatable Constant
+ case 'p': // Address.
+ case 'X': // Allow ANY value.
+ case 'I': // Target registers.
case 'J':
case 'K':
case 'L':
@@ -3159,7 +3593,7 @@ TargetLowering::getConstraintType(StringRef Constraint) const {
}
}
- if (S > 1 && Constraint[0] == '{' && Constraint[S-1] == '}') {
+ if (S > 1 && Constraint[0] == '{' && Constraint[S - 1] == '}') {
if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}"
return C_Memory;
return C_Register;
@@ -3170,14 +3604,20 @@ TargetLowering::getConstraintType(StringRef Constraint) const {
/// Try to replace an X constraint, which matches anything, with another that
/// has more specific requirements based on the type of the corresponding
/// operand.
-const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const{
+const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
if (ConstraintVT.isInteger())
return "r";
if (ConstraintVT.isFloatingPoint())
- return "f"; // works for many targets
+ return "f"; // works for many targets
return nullptr;
}
+SDValue TargetLowering::LowerAsmOutputForConstraint(
+ SDValue &Chain, SDValue &Flag, SDLoc DL, const AsmOperandInfo &OpInfo,
+ SelectionDAG &DAG) const {
+ return SDValue();
+}
+
/// Lower the specified operand into the Ops vector.
/// If it is invalid, don't add anything to Ops.
void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
@@ -3191,7 +3631,8 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
switch (ConstraintLetter) {
default: break;
case 'X': // Allows any operand; labels (basic block) use this.
- if (Op.getOpcode() == ISD::BasicBlock) {
+ if (Op.getOpcode() == ISD::BasicBlock ||
+ Op.getOpcode() == ISD::TargetBlockAddress) {
Ops.push_back(Op);
return;
}
@@ -3199,46 +3640,57 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
case 'i': // Simple Integer or Relocatable Constant
case 'n': // Simple Integer
case 's': { // Relocatable Constant
- // These operands are interested in values of the form (GV+C), where C may
- // be folded in as an offset of GV, or it may be explicitly added. Also, it
- // is possible and fine if either GV or C are missing.
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
- GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op);
-
- // If we have "(add GV, C)", pull out GV/C
- if (Op.getOpcode() == ISD::ADD) {
- C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
- GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0));
- if (!C || !GA) {
- C = dyn_cast<ConstantSDNode>(Op.getOperand(0));
- GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(1));
- }
- if (!C || !GA) {
- C = nullptr;
- GA = nullptr;
- }
- }
- // If we find a valid operand, map to the TargetXXX version so that the
- // value itself doesn't get selected.
- if (GA) { // Either &GV or &GV+C
- if (ConstraintLetter != 'n') {
- int64_t Offs = GA->getOffset();
- if (C) Offs += C->getZExtValue();
- Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(),
- C ? SDLoc(C) : SDLoc(),
- Op.getValueType(), Offs));
- }
- return;
- }
- if (C) { // just C, no GV.
- // Simple constants are not allowed for 's'.
- if (ConstraintLetter != 's') {
+ GlobalAddressSDNode *GA;
+ ConstantSDNode *C;
+ BlockAddressSDNode *BA;
+ uint64_t Offset = 0;
+
+ // Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
+ // etc., since getelementpointer is variadic. We can't use
+ // SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
+ // while in this case the GA may be furthest from the root node which is
+ // likely an ISD::ADD.
+ while (1) {
+ if ((GA = dyn_cast<GlobalAddressSDNode>(Op)) && ConstraintLetter != 'n') {
+ Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
+ GA->getValueType(0),
+ Offset + GA->getOffset()));
+ return;
+ } else if ((C = dyn_cast<ConstantSDNode>(Op)) &&
+ ConstraintLetter != 's') {
// gcc prints these as sign extended. Sign extend value to 64 bits
// now; without this it would get ZExt'd later in
// ScheduleDAGSDNodes::EmitNode, which is very generic.
- Ops.push_back(DAG.getTargetConstant(C->getSExtValue(),
+ bool IsBool = C->getConstantIntValue()->getBitWidth() == 1;
+ BooleanContent BCont = getBooleanContents(MVT::i64);
+ ISD::NodeType ExtOpc = IsBool ? getExtendForContent(BCont)
+ : ISD::SIGN_EXTEND;
+ int64_t ExtVal = ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue()
+ : C->getSExtValue();
+ Ops.push_back(DAG.getTargetConstant(Offset + ExtVal,
SDLoc(C), MVT::i64));
+ return;
+ } else if ((BA = dyn_cast<BlockAddressSDNode>(Op)) &&
+ ConstraintLetter != 'n') {
+ Ops.push_back(DAG.getTargetBlockAddress(
+ BA->getBlockAddress(), BA->getValueType(0),
+ Offset + BA->getOffset(), BA->getTargetFlags()));
+ return;
+ } else {
+ const unsigned OpCode = Op.getOpcode();
+ if (OpCode == ISD::ADD || OpCode == ISD::SUB) {
+ if ((C = dyn_cast<ConstantSDNode>(Op.getOperand(0))))
+ Op = Op.getOperand(1);
+ // Subtraction is not commutative.
+ else if (OpCode == ISD::ADD &&
+ (C = dyn_cast<ConstantSDNode>(Op.getOperand(1))))
+ Op = Op.getOperand(0);
+ else
+ return;
+ Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue();
+ continue;
+ }
}
return;
}
@@ -3252,14 +3704,14 @@ TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
StringRef Constraint,
MVT VT) const {
if (Constraint.empty() || Constraint[0] != '{')
- return std::make_pair(0u, static_cast<TargetRegisterClass*>(nullptr));
- assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?");
+ return std::make_pair(0u, static_cast<TargetRegisterClass *>(nullptr));
+ assert(*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?");
// Remove the braces from around the name.
- StringRef RegName(Constraint.data()+1, Constraint.size()-2);
+ StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
- std::pair<unsigned, const TargetRegisterClass*> R =
- std::make_pair(0u, static_cast<const TargetRegisterClass*>(nullptr));
+ std::pair<unsigned, const TargetRegisterClass *> R =
+ std::make_pair(0u, static_cast<const TargetRegisterClass *>(nullptr));
// Figure out which register class contains this reg.
for (const TargetRegisterClass *RC : RI->regclasses()) {
@@ -3271,8 +3723,8 @@ TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
I != E; ++I) {
if (RegName.equals_lower(RI->getRegAsmName(*I))) {
- std::pair<unsigned, const TargetRegisterClass*> S =
- std::make_pair(*I, RC);
+ std::pair<unsigned, const TargetRegisterClass *> S =
+ std::make_pair(*I, RC);
// If this register class has the requested value type, return it,
// otherwise keep searching and return the first class found
@@ -3321,8 +3773,8 @@ TargetLowering::ParseConstraints(const DataLayout &DL,
// Do a prepass over the constraints, canonicalizing them, and building up the
// ConstraintOperands list.
- unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
- unsigned ResNo = 0; // ResNo - The result number of the next output.
+ unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
+ unsigned ResNo = 0; // ResNo - The result number of the next output.
for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
ConstraintOperands.emplace_back(std::move(CI));
@@ -3391,7 +3843,7 @@ TargetLowering::ParseConstraints(const DataLayout &DL,
case 64:
case 128:
OpInfo.ConstraintVT =
- MVT::getVT(IntegerType::get(OpTy->getContext(), BitSize), true);
+ MVT::getVT(IntegerType::get(OpTy->getContext(), BitSize), true);
break;
}
} else if (PointerType *PT = dyn_cast<PointerType>(OpTy)) {
@@ -3416,8 +3868,8 @@ TargetLowering::ParseConstraints(const DataLayout &DL,
for (maIndex = 0; maIndex < maCount; ++maIndex) {
int weightSum = 0;
for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
- cIndex != eIndex; ++cIndex) {
- AsmOperandInfo& OpInfo = ConstraintOperands[cIndex];
+ cIndex != eIndex; ++cIndex) {
+ AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
if (OpInfo.Type == InlineAsm::isClobber)
continue;
@@ -3432,7 +3884,7 @@ TargetLowering::ParseConstraints(const DataLayout &DL,
Input.ConstraintVT.isInteger()) ||
(OpInfo.ConstraintVT.getSizeInBits() !=
Input.ConstraintVT.getSizeInBits())) {
- weightSum = -1; // Can't match.
+ weightSum = -1; // Can't match.
break;
}
}
@@ -3453,8 +3905,8 @@ TargetLowering::ParseConstraints(const DataLayout &DL,
// Now select chosen alternative in each constraint.
for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
- cIndex != eIndex; ++cIndex) {
- AsmOperandInfo& cInfo = ConstraintOperands[cIndex];
+ cIndex != eIndex; ++cIndex) {
+ AsmOperandInfo &cInfo = ConstraintOperands[cIndex];
if (cInfo.Type == InlineAsm::isClobber)
continue;
cInfo.selectAlternative(bestMAIndex);
@@ -3464,8 +3916,8 @@ TargetLowering::ParseConstraints(const DataLayout &DL,
// Check and hook up tied operands, choose constraint code to use.
for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
- cIndex != eIndex; ++cIndex) {
- AsmOperandInfo& OpInfo = ConstraintOperands[cIndex];
+ cIndex != eIndex; ++cIndex) {
+ AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
// If this is an output operand with a matching input operand, look up the
// matching input. If their types mismatch, e.g. one is an integer, the
@@ -3577,9 +4029,9 @@ TargetLowering::ConstraintWeight
weight = CW_Register;
break;
case 'X': // any operand.
- default:
- weight = CW_Default;
- break;
+ default:
+ weight = CW_Default;
+ break;
}
return weight;
}
@@ -3678,6 +4130,9 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
return;
}
+ if (Op.getNode() && Op.getOpcode() == ISD::TargetBlockAddress)
+ return;
+
// Otherwise, try to resolve it to something we know about by looking at
// the actual operand type.
if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
@@ -3749,12 +4204,12 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
}
SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
- SelectionDAG &DAG,
- SmallVectorImpl<SDNode *> &Created) const {
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDNode *> &Created) const {
AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (TLI.isIntDivCheap(N->getValueType(0), Attr))
- return SDValue(N,0); // Lower SDIV as SDIV
+ return SDValue(N, 0); // Lower SDIV as SDIV
return SDValue();
}
@@ -4000,6 +4455,104 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
return DAG.getSelect(dl, VT, IsOne, N0, Q);
}
+/// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
+/// where the divisor is constant and the comparison target is zero,
+/// return a DAG expression that will generate the same comparison result
+/// using only multiplications, additions and shifts/rotations.
+/// Ref: "Hacker's Delight" 10-17.
+SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
+ SDValue CompTargetNode,
+ ISD::CondCode Cond,
+ DAGCombinerInfo &DCI,
+ const SDLoc &DL) const {
+ SmallVector<SDNode *, 2> Built;
+ if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
+ DCI, DL, Built)) {
+ for (SDNode *N : Built)
+ DCI.AddToWorklist(N);
+ return Folded;
+ }
+
+ return SDValue();
+}
+
+SDValue
+TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
+ SDValue CompTargetNode, ISD::CondCode Cond,
+ DAGCombinerInfo &DCI, const SDLoc &DL,
+ SmallVectorImpl<SDNode *> &Created) const {
+ // fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q)
+ // - D must be constant with D = D0 * 2^K where D0 is odd and D0 != 1
+ // - P is the multiplicative inverse of D0 modulo 2^W
+ // - Q = floor((2^W - 1) / D0)
+ // where W is the width of the common type of N and D.
+ assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ "Only applicable for (in)equality comparisons.");
+
+ EVT VT = REMNode.getValueType();
+
+ // If MUL is unavailable, we cannot proceed in any case.
+ if (!isOperationLegalOrCustom(ISD::MUL, VT))
+ return SDValue();
+
+ // TODO: Add non-uniform constant support.
+ ConstantSDNode *Divisor = isConstOrConstSplat(REMNode->getOperand(1));
+ ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
+ if (!Divisor || !CompTarget || Divisor->isNullValue() ||
+ !CompTarget->isNullValue())
+ return SDValue();
+
+ const APInt &D = Divisor->getAPIntValue();
+
+ // Decompose D into D0 * 2^K
+ unsigned K = D.countTrailingZeros();
+ bool DivisorIsEven = (K != 0);
+ APInt D0 = D.lshr(K);
+
+ // The fold is invalid when D0 == 1.
+ // This is reachable because visitSetCC happens before visitREM.
+ if (D0.isOneValue())
+ return SDValue();
+
+ // P = inv(D0, 2^W)
+ // 2^W requires W + 1 bits, so we have to extend and then truncate.
+ unsigned W = D.getBitWidth();
+ APInt P = D0.zext(W + 1)
+ .multiplicativeInverse(APInt::getSignedMinValue(W + 1))
+ .trunc(W);
+ assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable
+ assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check.");
+
+ // Q = floor((2^W - 1) / D)
+ APInt Q = APInt::getAllOnesValue(W).udiv(D);
+
+ SelectionDAG &DAG = DCI.DAG;
+
+ SDValue PVal = DAG.getConstant(P, DL, VT);
+ SDValue QVal = DAG.getConstant(Q, DL, VT);
+ // (mul N, P)
+ SDValue Op1 = DAG.getNode(ISD::MUL, DL, VT, REMNode->getOperand(0), PVal);
+ Created.push_back(Op1.getNode());
+
+ // Rotate right only if D was even.
+ if (DivisorIsEven) {
+ // We need ROTR to do this.
+ if (!isOperationLegalOrCustom(ISD::ROTR, VT))
+ return SDValue();
+ SDValue ShAmt =
+ DAG.getConstant(K, DL, getShiftAmountTy(VT, DAG.getDataLayout()));
+ SDNodeFlags Flags;
+ Flags.setExact(true);
+ // UREM: (rotr (mul N, P), K)
+ Op1 = DAG.getNode(ISD::ROTR, DL, VT, Op1, ShAmt, Flags);
+ Created.push_back(Op1.getNode());
+ }
+
+ // UREM: (setule/setugt (rotr (mul N, P), K), Q)
+ return DAG.getSetCC(DL, SETCCVT, Op1, QVal,
+ ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
+}
+
bool TargetLowering::
verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const {
if (!isa<ConstantSDNode>(Op.getOperand(0))) {
@@ -4308,7 +4861,7 @@ bool TargetLowering::expandROT(SDNode *Node, SDValue &Result,
}
bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
- SelectionDAG &DAG) const {
+ SelectionDAG &DAG) const {
SDValue Src = Node->getOperand(0);
EVT SrcVT = Src.getValueType();
EVT DstVT = Node->getValueType(0);
@@ -4320,7 +4873,7 @@ bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
// Expand f32 -> i64 conversion
// This algorithm comes from compiler-rt's implementation of fixsfdi:
- // https://github.com/llvm-mirror/compiler-rt/blob/master/lib/builtins/fixsfdi.c
+ // https://github.com/llvm/llvm-project/blob/master/compiler-rt/lib/builtins/fixsfdi.c
unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
EVT IntVT = SrcVT.changeTypeToInteger();
EVT IntShVT = getShiftAmountTy(IntVT, DAG.getDataLayout());
@@ -4544,6 +5097,17 @@ SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
return DAG.getNode(NewOp, dl, VT, Quiet0, Quiet1, Node->getFlags());
}
+ // If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
+ // instead if there are no NaNs.
+ if (Node->getFlags().hasNoNaNs()) {
+ unsigned IEEE2018Op =
+ Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
+ if (isOperationLegalOrCustom(IEEE2018Op, VT)) {
+ return DAG.getNode(IEEE2018Op, dl, VT, Node->getOperand(0),
+ Node->getOperand(1), Node->getFlags());
+ }
+ }
+
return SDValue();
}
@@ -4771,7 +5335,7 @@ SDValue TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
SDValue Value = DAG.getBuildVector(LD->getValueType(0), SL, Vals);
- return DAG.getMergeValues({ Value, NewChain }, SL);
+ return DAG.getMergeValues({Value, NewChain}, SL);
}
SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
@@ -4826,7 +5390,7 @@ SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
// Store Stride in bytes
unsigned Stride = MemSclVT.getSizeInBits() / 8;
- assert (Stride && "Zero stride!");
+ assert(Stride && "Zero stride!");
// Extract each of the elements from the original vector and save them into
// memory individually.
SmallVector<SDValue, 8> Stores;
@@ -5013,17 +5577,16 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
EVT VT = Val.getValueType();
int Alignment = ST->getAlignment();
auto &MF = DAG.getMachineFunction();
- EVT MemVT = ST->getMemoryVT();
+ EVT StoreMemVT = ST->getMemoryVT();
SDLoc dl(ST);
- if (MemVT.isFloatingPoint() || MemVT.isVector()) {
+ if (StoreMemVT.isFloatingPoint() || StoreMemVT.isVector()) {
EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
if (isTypeLegal(intVT)) {
if (!isOperationLegalOrCustom(ISD::STORE, intVT) &&
- MemVT.isVector()) {
+ StoreMemVT.isVector()) {
// Scalarize the store and let the individual components be handled.
SDValue Result = scalarizeVectorStore(ST, DAG);
-
return Result;
}
// Expand to a bitconvert of the value to the integer type of the
@@ -5036,24 +5599,22 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
}
// Do a (aligned) store to a stack slot, then copy from the stack slot
// to the final destination using (unaligned) integer loads and stores.
- EVT StoredVT = ST->getMemoryVT();
- MVT RegVT =
- getRegisterType(*DAG.getContext(),
- EVT::getIntegerVT(*DAG.getContext(),
- StoredVT.getSizeInBits()));
+ MVT RegVT = getRegisterType(
+ *DAG.getContext(),
+ EVT::getIntegerVT(*DAG.getContext(), StoreMemVT.getSizeInBits()));
EVT PtrVT = Ptr.getValueType();
- unsigned StoredBytes = StoredVT.getStoreSize();
+ unsigned StoredBytes = StoreMemVT.getStoreSize();
unsigned RegBytes = RegVT.getSizeInBits() / 8;
unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
// Make sure the stack slot is also aligned for the register type.
- SDValue StackPtr = DAG.CreateStackTemporary(StoredVT, RegVT);
+ SDValue StackPtr = DAG.CreateStackTemporary(StoreMemVT, RegVT);
auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
// Perform the original store, only redirected to the stack slot.
SDValue Store = DAG.getTruncStore(
Chain, dl, Val, StackPtr,
- MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoredVT);
+ MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoreMemVT);
EVT StackPtrVT = StackPtr.getValueType();
@@ -5082,17 +5643,17 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
// The last store may be partial. Do a truncating store. On big-endian
// machines this requires an extending load from the stack slot to ensure
// that the bits are in the right place.
- EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
- 8 * (StoredBytes - Offset));
+ EVT LoadMemVT =
+ EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes - Offset));
// Load from the stack slot.
SDValue Load = DAG.getExtLoad(
ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
- MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT);
+ MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), LoadMemVT);
Stores.push_back(
DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
- ST->getPointerInfo().getWithOffset(Offset), MemVT,
+ ST->getPointerInfo().getWithOffset(Offset), LoadMemVT,
MinAlign(ST->getAlignment(), Offset),
ST->getMemOperand()->getFlags(), ST->getAAInfo()));
// The order of the stores doesn't matter - say it with a TokenFactor.
@@ -5100,18 +5661,16 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
return Result;
}
- assert(ST->getMemoryVT().isInteger() &&
- !ST->getMemoryVT().isVector() &&
+ assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
"Unaligned store of unknown type.");
// Get the half-size VT
- EVT NewStoredVT = ST->getMemoryVT().getHalfSizedIntegerVT(*DAG.getContext());
+ EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(*DAG.getContext());
int NumBits = NewStoredVT.getSizeInBits();
int IncrementSize = NumBits / 8;
// Divide the stored value in two parts.
- SDValue ShiftAmount =
- DAG.getConstant(NumBits, dl, getShiftAmountTy(Val.getValueType(),
- DAG.getDataLayout()));
+ SDValue ShiftAmount = DAG.getConstant(
+ NumBits, dl, getShiftAmountTy(Val.getValueType(), DAG.getDataLayout()));
SDValue Lo = Val;
SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
@@ -5130,7 +5689,7 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
ST->getMemOperand()->getFlags(), ST->getAAInfo());
SDValue Result =
- DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
+ DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
return Result;
}
@@ -5242,7 +5801,7 @@ SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
// TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
// At last for X86 targets, maybe good for other targets too?
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
- MFI.setAdjustsStack(true); // Is this only for X86 target?
+ MFI.setAdjustsStack(true); // Is this only for X86 target?
MFI.setHasCalls(true);
assert((GA->getOffset() == 0) &&
@@ -5282,15 +5841,19 @@ SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
EVT VT = LHS.getValueType();
SDLoc dl(Node);
+ assert(VT == RHS.getValueType() && "Expected operands to be the same type");
+ assert(VT.isInteger() && "Expected operands to be integers");
+
// usub.sat(a, b) -> umax(a, b) - b
if (Opcode == ISD::USUBSAT && isOperationLegalOrCustom(ISD::UMAX, VT)) {
SDValue Max = DAG.getNode(ISD::UMAX, dl, VT, LHS, RHS);
return DAG.getNode(ISD::SUB, dl, VT, Max, RHS);
}
- if (VT.isVector()) {
- // TODO: Consider not scalarizing here.
- return SDValue();
+ if (Opcode == ISD::UADDSAT && isOperationLegalOrCustom(ISD::UMIN, VT)) {
+ SDValue InvRHS = DAG.getNOT(dl, RHS, VT);
+ SDValue Min = DAG.getNode(ISD::UMIN, dl, VT, LHS, InvRHS);
+ return DAG.getNode(ISD::ADD, dl, VT, Min, RHS);
}
unsigned OverflowOp;
@@ -5312,96 +5875,410 @@ SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
"addition or subtraction node.");
}
- assert(LHS.getValueType().isScalarInteger() &&
- "Expected operands to be integers. Vector of int arguments should "
- "already be unrolled.");
- assert(RHS.getValueType().isScalarInteger() &&
- "Expected operands to be integers. Vector of int arguments should "
- "already be unrolled.");
- assert(LHS.getValueType() == RHS.getValueType() &&
- "Expected both operands to be the same type");
-
- unsigned BitWidth = LHS.getValueSizeInBits();
- EVT ResultType = LHS.getValueType();
- EVT BoolVT =
- getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ResultType);
- SDValue Result =
- DAG.getNode(OverflowOp, dl, DAG.getVTList(ResultType, BoolVT), LHS, RHS);
+ unsigned BitWidth = LHS.getScalarValueSizeInBits();
+ EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT),
+ LHS, RHS);
SDValue SumDiff = Result.getValue(0);
SDValue Overflow = Result.getValue(1);
- SDValue Zero = DAG.getConstant(0, dl, ResultType);
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+ SDValue AllOnes = DAG.getAllOnesConstant(dl, VT);
if (Opcode == ISD::UADDSAT) {
- // Just need to check overflow for SatMax.
- APInt MaxVal = APInt::getMaxValue(BitWidth);
- SDValue SatMax = DAG.getConstant(MaxVal, dl, ResultType);
- return DAG.getSelect(dl, ResultType, Overflow, SatMax, SumDiff);
+ if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
+ // (LHS + RHS) | OverflowMask
+ SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
+ return DAG.getNode(ISD::OR, dl, VT, SumDiff, OverflowMask);
+ }
+ // Overflow ? 0xffff.... : (LHS + RHS)
+ return DAG.getSelect(dl, VT, Overflow, AllOnes, SumDiff);
} else if (Opcode == ISD::USUBSAT) {
- // Just need to check overflow for SatMin.
- APInt MinVal = APInt::getMinValue(BitWidth);
- SDValue SatMin = DAG.getConstant(MinVal, dl, ResultType);
- return DAG.getSelect(dl, ResultType, Overflow, SatMin, SumDiff);
+ if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
+ // (LHS - RHS) & ~OverflowMask
+ SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
+ SDValue Not = DAG.getNOT(dl, OverflowMask, VT);
+ return DAG.getNode(ISD::AND, dl, VT, SumDiff, Not);
+ }
+ // Overflow ? 0 : (LHS - RHS)
+ return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);
} else {
// SatMax -> Overflow && SumDiff < 0
// SatMin -> Overflow && SumDiff >= 0
APInt MinVal = APInt::getSignedMinValue(BitWidth);
APInt MaxVal = APInt::getSignedMaxValue(BitWidth);
- SDValue SatMin = DAG.getConstant(MinVal, dl, ResultType);
- SDValue SatMax = DAG.getConstant(MaxVal, dl, ResultType);
+ SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
+ SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
SDValue SumNeg = DAG.getSetCC(dl, BoolVT, SumDiff, Zero, ISD::SETLT);
- Result = DAG.getSelect(dl, ResultType, SumNeg, SatMax, SatMin);
- return DAG.getSelect(dl, ResultType, Overflow, Result, SumDiff);
+ Result = DAG.getSelect(dl, VT, SumNeg, SatMax, SatMin);
+ return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);
}
}
SDValue
-TargetLowering::getExpandedFixedPointMultiplication(SDNode *Node,
- SelectionDAG &DAG) const {
- assert(Node->getOpcode() == ISD::SMULFIX && "Expected opcode to be SMULFIX.");
- assert(Node->getNumOperands() == 3 &&
- "Expected signed fixed point multiplication to have 3 operands.");
+TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
+ assert((Node->getOpcode() == ISD::SMULFIX ||
+ Node->getOpcode() == ISD::UMULFIX ||
+ Node->getOpcode() == ISD::SMULFIXSAT) &&
+ "Expected a fixed point multiplication opcode");
SDLoc dl(Node);
SDValue LHS = Node->getOperand(0);
SDValue RHS = Node->getOperand(1);
- assert(LHS.getValueType().isScalarInteger() &&
- "Expected operands to be integers. Vector of int arguments should "
- "already be unrolled.");
- assert(RHS.getValueType().isScalarInteger() &&
- "Expected operands to be integers. Vector of int arguments should "
- "already be unrolled.");
- assert(LHS.getValueType() == RHS.getValueType() &&
- "Expected both operands to be the same type");
-
- unsigned Scale = Node->getConstantOperandVal(2);
EVT VT = LHS.getValueType();
- assert(Scale < VT.getScalarSizeInBits() &&
- "Expected scale to be less than the number of bits.");
+ unsigned Scale = Node->getConstantOperandVal(2);
+ bool Saturating = Node->getOpcode() == ISD::SMULFIXSAT;
+ EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ unsigned VTSize = VT.getScalarSizeInBits();
+
+ if (!Scale) {
+ // [us]mul.fix(a, b, 0) -> mul(a, b)
+ if (!Saturating && isOperationLegalOrCustom(ISD::MUL, VT)) {
+ return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
+ } else if (Saturating && isOperationLegalOrCustom(ISD::SMULO, VT)) {
+ SDValue Result =
+ DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
+ SDValue Product = Result.getValue(0);
+ SDValue Overflow = Result.getValue(1);
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+
+ APInt MinVal = APInt::getSignedMinValue(VTSize);
+ APInt MaxVal = APInt::getSignedMaxValue(VTSize);
+ SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
+ SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
+ SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Product, Zero, ISD::SETLT);
+ Result = DAG.getSelect(dl, VT, ProdNeg, SatMax, SatMin);
+ return DAG.getSelect(dl, VT, Overflow, Result, Product);
+ }
+ }
- if (!Scale)
- return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
+ bool Signed =
+ Node->getOpcode() == ISD::SMULFIX || Node->getOpcode() == ISD::SMULFIXSAT;
+ assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&
+ "Expected scale to be less than the number of bits if signed or at "
+ "most the number of bits if unsigned.");
+ assert(LHS.getValueType() == RHS.getValueType() &&
+ "Expected both operands to be the same type");
// Get the upper and lower bits of the result.
SDValue Lo, Hi;
- if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT)) {
- SDValue Result =
- DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), LHS, RHS);
+ unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
+ unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
+ if (isOperationLegalOrCustom(LoHiOp, VT)) {
+ SDValue Result = DAG.getNode(LoHiOp, dl, DAG.getVTList(VT, VT), LHS, RHS);
Lo = Result.getValue(0);
Hi = Result.getValue(1);
- } else if (isOperationLegalOrCustom(ISD::MULHS, VT)) {
+ } else if (isOperationLegalOrCustom(HiOp, VT)) {
Lo = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
- Hi = DAG.getNode(ISD::MULHS, dl, VT, LHS, RHS);
+ Hi = DAG.getNode(HiOp, dl, VT, LHS, RHS);
+ } else if (VT.isVector()) {
+ return SDValue();
} else {
- report_fatal_error("Unable to expand signed fixed point multiplication.");
+ report_fatal_error("Unable to expand fixed point multiplication.");
}
+ if (Scale == VTSize)
+ // Result is just the top half since we'd be shifting by the width of the
+ // operand.
+ return Hi;
+
// The result will need to be shifted right by the scale since both operands
// are scaled. The result is given to us in 2 halves, so we only want part of
// both in the result.
EVT ShiftTy = getShiftAmountTy(VT, DAG.getDataLayout());
- Lo = DAG.getNode(ISD::SRL, dl, VT, Lo, DAG.getConstant(Scale, dl, ShiftTy));
- Hi = DAG.getNode(
- ISD::SHL, dl, VT, Hi,
- DAG.getConstant(VT.getScalarSizeInBits() - Scale, dl, ShiftTy));
- return DAG.getNode(ISD::OR, dl, VT, Lo, Hi);
+ SDValue Result = DAG.getNode(ISD::FSHR, dl, VT, Hi, Lo,
+ DAG.getConstant(Scale, dl, ShiftTy));
+ if (!Saturating)
+ return Result;
+
+ unsigned OverflowBits = VTSize - Scale + 1; // +1 for the sign
+ SDValue HiMask =
+ DAG.getConstant(APInt::getHighBitsSet(VTSize, OverflowBits), dl, VT);
+ SDValue LoMask = DAG.getConstant(
+ APInt::getLowBitsSet(VTSize, VTSize - OverflowBits), dl, VT);
+ APInt MaxVal = APInt::getSignedMaxValue(VTSize);
+ APInt MinVal = APInt::getSignedMinValue(VTSize);
+
+ Result = DAG.getSelectCC(dl, Hi, LoMask,
+ DAG.getConstant(MaxVal, dl, VT), Result,
+ ISD::SETGT);
+ return DAG.getSelectCC(dl, Hi, HiMask,
+ DAG.getConstant(MinVal, dl, VT), Result,
+ ISD::SETLT);
+}
+
+void TargetLowering::expandUADDSUBO(
+ SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
+ SDLoc dl(Node);
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+ bool IsAdd = Node->getOpcode() == ISD::UADDO;
+
+ // If ADD/SUBCARRY is legal, use that instead.
+ unsigned OpcCarry = IsAdd ? ISD::ADDCARRY : ISD::SUBCARRY;
+ if (isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) {
+ SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1));
+ SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(),
+ { LHS, RHS, CarryIn });
+ Result = SDValue(NodeCarry.getNode(), 0);
+ Overflow = SDValue(NodeCarry.getNode(), 1);
+ return;
+ }
+
+ Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
+ LHS.getValueType(), LHS, RHS);
+
+ EVT ResultType = Node->getValueType(1);
+ EVT SetCCType = getSetCCResultType(
+ DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
+ ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
+ SDValue SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC);
+ Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
+}
+
+void TargetLowering::expandSADDSUBO(
+ SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
+ SDLoc dl(Node);
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+ bool IsAdd = Node->getOpcode() == ISD::SADDO;
+
+ Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
+ LHS.getValueType(), LHS, RHS);
+
+ EVT ResultType = Node->getValueType(1);
+ EVT OType = getSetCCResultType(
+ DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
+
+ // If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
+ unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
+ if (isOperationLegalOrCustom(OpcSat, LHS.getValueType())) {
+ SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS);
+ SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE);
+ Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
+ return;
+ }
+
+ SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
+
+ // LHSSign -> LHS >= 0
+ // RHSSign -> RHS >= 0
+ // SumSign -> Result >= 0
+ //
+ // Add:
+ // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
+ // Sub:
+ // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
+ SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE);
+ SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE);
+ SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign,
+ IsAdd ? ISD::SETEQ : ISD::SETNE);
+
+ SDValue SumSign = DAG.getSetCC(dl, OType, Result, Zero, ISD::SETGE);
+ SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE);
+
+ SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE);
+ Overflow = DAG.getBoolExtOrTrunc(Cmp, dl, ResultType, ResultType);
+}
+
+bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result,
+ SDValue &Overflow, SelectionDAG &DAG) const {
+ SDLoc dl(Node);
+ EVT VT = Node->getValueType(0);
+ EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+ bool isSigned = Node->getOpcode() == ISD::SMULO;
+
+ // For power-of-two multiplications we can use a simpler shift expansion.
+ if (ConstantSDNode *RHSC = isConstOrConstSplat(RHS)) {
+ const APInt &C = RHSC->getAPIntValue();
+ // mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
+ if (C.isPowerOf2()) {
+ // smulo(x, signed_min) is same as umulo(x, signed_min).
+ bool UseArithShift = isSigned && !C.isMinSignedValue();
+ EVT ShiftAmtTy = getShiftAmountTy(VT, DAG.getDataLayout());
+ SDValue ShiftAmt = DAG.getConstant(C.logBase2(), dl, ShiftAmtTy);
+ Result = DAG.getNode(ISD::SHL, dl, VT, LHS, ShiftAmt);
+ Overflow = DAG.getSetCC(dl, SetCCVT,
+ DAG.getNode(UseArithShift ? ISD::SRA : ISD::SRL,
+ dl, VT, Result, ShiftAmt),
+ LHS, ISD::SETNE);
+ return true;
+ }
+ }
+
+ EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2);
+ if (VT.isVector())
+ WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
+ VT.getVectorNumElements());
+
+ SDValue BottomHalf;
+ SDValue TopHalf;
+ static const unsigned Ops[2][3] =
+ { { ISD::MULHU, ISD::UMUL_LOHI, ISD::ZERO_EXTEND },
+ { ISD::MULHS, ISD::SMUL_LOHI, ISD::SIGN_EXTEND }};
+ if (isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
+ BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
+ TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS);
+ } else if (isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
+ BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
+ RHS);
+ TopHalf = BottomHalf.getValue(1);
+ } else if (isTypeLegal(WideVT)) {
+ LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
+ RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
+ SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
+ BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
+ SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits(), dl,
+ getShiftAmountTy(WideVT, DAG.getDataLayout()));
+ TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT,
+ DAG.getNode(ISD::SRL, dl, WideVT, Mul, ShiftAmt));
+ } else {
+ if (VT.isVector())
+ return false;
+
+ // We can fall back to a libcall with an illegal type for the MUL if we
+ // have a libcall big enough.
+ // Also, we can fall back to a division in some cases, but that's a big
+ // performance hit in the general case.
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (WideVT == MVT::i16)
+ LC = RTLIB::MUL_I16;
+ else if (WideVT == MVT::i32)
+ LC = RTLIB::MUL_I32;
+ else if (WideVT == MVT::i64)
+ LC = RTLIB::MUL_I64;
+ else if (WideVT == MVT::i128)
+ LC = RTLIB::MUL_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!");
+
+ SDValue HiLHS;
+ SDValue HiRHS;
+ if (isSigned) {
+ // The high part is obtained by SRA'ing all but one of the bits of low
+ // part.
+ unsigned LoSize = VT.getSizeInBits();
+ HiLHS =
+ DAG.getNode(ISD::SRA, dl, VT, LHS,
+ DAG.getConstant(LoSize - 1, dl,
+ getPointerTy(DAG.getDataLayout())));
+ HiRHS =
+ DAG.getNode(ISD::SRA, dl, VT, RHS,
+ DAG.getConstant(LoSize - 1, dl,
+ getPointerTy(DAG.getDataLayout())));
+ } else {
+ HiLHS = DAG.getConstant(0, dl, VT);
+ HiRHS = DAG.getConstant(0, dl, VT);
+ }
+
+ // Here we're passing the 2 arguments explicitly as 4 arguments that are
+ // pre-lowered to the correct types. This all depends upon WideVT not
+ // being a legal type for the architecture and thus has to be split to
+ // two arguments.
+ SDValue Ret;
+ if (shouldSplitFunctionArgumentsAsLittleEndian(DAG.getDataLayout())) {
+ // Halves of WideVT are packed into registers in different order
+ // depending on platform endianness. This is usually handled by
+ // the C calling convention, but we can't defer to it in
+ // the legalizer.
+ SDValue Args[] = { LHS, HiLHS, RHS, HiRHS };
+ Ret = makeLibCall(DAG, LC, WideVT, Args, isSigned, dl,
+ /* doesNotReturn */ false, /* isReturnValueUsed */ true,
+ /* isPostTypeLegalization */ true).first;
+ } else {
+ SDValue Args[] = { HiLHS, LHS, HiRHS, RHS };
+ Ret = makeLibCall(DAG, LC, WideVT, Args, isSigned, dl,
+ /* doesNotReturn */ false, /* isReturnValueUsed */ true,
+ /* isPostTypeLegalization */ true).first;
+ }
+ assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
+ "Ret value is a collection of constituent nodes holding result.");
+ if (DAG.getDataLayout().isLittleEndian()) {
+ // Same as above.
+ BottomHalf = Ret.getOperand(0);
+ TopHalf = Ret.getOperand(1);
+ } else {
+ BottomHalf = Ret.getOperand(1);
+ TopHalf = Ret.getOperand(0);
+ }
+ }
+
+ Result = BottomHalf;
+ if (isSigned) {
+ SDValue ShiftAmt = DAG.getConstant(
+ VT.getScalarSizeInBits() - 1, dl,
+ getShiftAmountTy(BottomHalf.getValueType(), DAG.getDataLayout()));
+ SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt);
+ Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf, Sign, ISD::SETNE);
+ } else {
+ Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf,
+ DAG.getConstant(0, dl, VT), ISD::SETNE);
+ }
+
+ // Truncate the result if SetCC returns a larger type than needed.
+ EVT RType = Node->getValueType(1);
+ if (RType.getSizeInBits() < Overflow.getValueSizeInBits())
+ Overflow = DAG.getNode(ISD::TRUNCATE, dl, RType, Overflow);
+
+ assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
+ "Unexpected result type for S/UMULO legalization");
+ return true;
+}
+
+SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const {
+ SDLoc dl(Node);
+ bool NoNaN = Node->getFlags().hasNoNaNs();
+ unsigned BaseOpcode = 0;
+ switch (Node->getOpcode()) {
+ default: llvm_unreachable("Expected VECREDUCE opcode");
+ case ISD::VECREDUCE_FADD: BaseOpcode = ISD::FADD; break;
+ case ISD::VECREDUCE_FMUL: BaseOpcode = ISD::FMUL; break;
+ case ISD::VECREDUCE_ADD: BaseOpcode = ISD::ADD; break;
+ case ISD::VECREDUCE_MUL: BaseOpcode = ISD::MUL; break;
+ case ISD::VECREDUCE_AND: BaseOpcode = ISD::AND; break;
+ case ISD::VECREDUCE_OR: BaseOpcode = ISD::OR; break;
+ case ISD::VECREDUCE_XOR: BaseOpcode = ISD::XOR; break;
+ case ISD::VECREDUCE_SMAX: BaseOpcode = ISD::SMAX; break;
+ case ISD::VECREDUCE_SMIN: BaseOpcode = ISD::SMIN; break;
+ case ISD::VECREDUCE_UMAX: BaseOpcode = ISD::UMAX; break;
+ case ISD::VECREDUCE_UMIN: BaseOpcode = ISD::UMIN; break;
+ case ISD::VECREDUCE_FMAX:
+ BaseOpcode = NoNaN ? ISD::FMAXNUM : ISD::FMAXIMUM;
+ break;
+ case ISD::VECREDUCE_FMIN:
+ BaseOpcode = NoNaN ? ISD::FMINNUM : ISD::FMINIMUM;
+ break;
+ }
+
+ SDValue Op = Node->getOperand(0);
+ EVT VT = Op.getValueType();
+
+ // Try to use a shuffle reduction for power of two vectors.
+ if (VT.isPow2VectorType()) {
+ while (VT.getVectorNumElements() > 1) {
+ EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
+ if (!isOperationLegalOrCustom(BaseOpcode, HalfVT))
+ break;
+
+ SDValue Lo, Hi;
+ std::tie(Lo, Hi) = DAG.SplitVector(Op, dl);
+ Op = DAG.getNode(BaseOpcode, dl, HalfVT, Lo, Hi);
+ VT = HalfVT;
+ }
+ }
+
+ EVT EltVT = VT.getVectorElementType();
+ unsigned NumElts = VT.getVectorNumElements();
+
+ SmallVector<SDValue, 8> Ops;
+ DAG.ExtractVectorElements(Op, Ops, 0, NumElts);
+
+ SDValue Res = Ops[0];
+ for (unsigned i = 1; i < NumElts; i++)
+ Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Node->getFlags());
+
+ // Result type may be wider than element type.
+ if (EltVT != Node->getValueType(0))
+ Res = DAG.getNode(ISD::ANY_EXTEND, dl, Node->getValueType(0), Res);
+ return Res;
}
diff --git a/lib/CodeGen/ShadowStackGCLowering.cpp b/lib/CodeGen/ShadowStackGCLowering.cpp
index 3e12b32b12d4..17a4d76c4c80 100644
--- a/lib/CodeGen/ShadowStackGCLowering.cpp
+++ b/lib/CodeGen/ShadowStackGCLowering.cpp
@@ -1,9 +1,8 @@
//===- ShadowStackGCLowering.cpp - Custom lowering for shadow-stack gc ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -313,7 +312,8 @@ bool ShadowStackGCLowering::runOnFunction(Function &F) {
AtEntry.SetInsertPoint(IP->getParent(), IP);
// Initialize the map pointer and load the current head of the shadow stack.
- Instruction *CurrentHead = AtEntry.CreateLoad(Head, "gc_currhead");
+ Instruction *CurrentHead =
+ AtEntry.CreateLoad(StackEntryTy->getPointerTo(), Head, "gc_currhead");
Instruction *EntryMapPtr = CreateGEP(Context, AtEntry, ConcreteStackEntryTy,
StackEntry, 0, 1, "gc_frame.map");
AtEntry.CreateStore(FrameMap, EntryMapPtr);
@@ -354,7 +354,8 @@ bool ShadowStackGCLowering::runOnFunction(Function &F) {
Instruction *EntryNextPtr2 =
CreateGEP(Context, *AtExit, ConcreteStackEntryTy, StackEntry, 0, 0,
"gc_frame.next");
- Value *SavedHead = AtExit->CreateLoad(EntryNextPtr2, "gc_savedhead");
+ Value *SavedHead = AtExit->CreateLoad(StackEntryTy->getPointerTo(),
+ EntryNextPtr2, "gc_savedhead");
AtExit->CreateStore(SavedHead, Head);
}
diff --git a/lib/CodeGen/ShrinkWrap.cpp b/lib/CodeGen/ShrinkWrap.cpp
index d3454ca6ba6a..2db0ea570598 100644
--- a/lib/CodeGen/ShrinkWrap.cpp
+++ b/lib/CodeGen/ShrinkWrap.cpp
@@ -1,9 +1,8 @@
//===- ShrinkWrap.cpp - Compute safe point for prolog/epilog insertion ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -259,6 +258,15 @@ INITIALIZE_PASS_END(ShrinkWrap, DEBUG_TYPE, "Shrink Wrap Pass", false, false)
bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI,
RegScavenger *RS) const {
+ // This prevents premature stack popping when occurs a indirect stack
+ // access. It is overly aggressive for the moment.
+ // TODO: - Obvious non-stack loads and store, such as global values,
+ // are known to not access the stack.
+ // - Further, data dependency and alias analysis can validate
+ // that load and stores never derive from the stack pointer.
+ if (MI.mayLoadOrStore())
+ return true;
+
if (MI.getOpcode() == FrameSetupOpcode ||
MI.getOpcode() == FrameDestroyOpcode) {
LLVM_DEBUG(dbgs() << "Frame instruction: " << MI << '\n');
diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp
index 5d2669f5ae92..23e5ce0acae8 100644
--- a/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/lib/CodeGen/SjLjEHPrepare.cpp
@@ -1,9 +1,8 @@
//===- SjLjEHPrepare.cpp - Eliminate Invoke & Unwind instructions ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -40,15 +39,15 @@ class SjLjEHPrepare : public FunctionPass {
Type *doubleUnderDataTy;
Type *doubleUnderJBufTy;
Type *FunctionContextTy;
- Constant *RegisterFn;
- Constant *UnregisterFn;
- Constant *BuiltinSetupDispatchFn;
- Constant *FrameAddrFn;
- Constant *StackAddrFn;
- Constant *StackRestoreFn;
- Constant *LSDAAddrFn;
- Constant *CallSiteFn;
- Constant *FuncCtxFn;
+ FunctionCallee RegisterFn;
+ FunctionCallee UnregisterFn;
+ Function *BuiltinSetupDispatchFn;
+ Function *FrameAddrFn;
+ Function *StackAddrFn;
+ Function *StackRestoreFn;
+ Function *LSDAAddrFn;
+ Function *CallSiteFn;
+ Function *FuncCtxFn;
AllocaInst *FuncCtx;
public:
@@ -190,14 +189,16 @@ Value *SjLjEHPrepare::setupFunctionContext(Function &F,
Builder.CreateConstGEP2_32(FunctionContextTy, FuncCtx, 0, 2, "__data");
// The exception values come back in context->__data[0].
+ Type *Int32Ty = Type::getInt32Ty(F.getContext());
Value *ExceptionAddr = Builder.CreateConstGEP2_32(doubleUnderDataTy, FCData,
0, 0, "exception_gep");
- Value *ExnVal = Builder.CreateLoad(ExceptionAddr, true, "exn_val");
+ Value *ExnVal = Builder.CreateLoad(Int32Ty, ExceptionAddr, true, "exn_val");
ExnVal = Builder.CreateIntToPtr(ExnVal, Builder.getInt8PtrTy());
Value *SelectorAddr = Builder.CreateConstGEP2_32(doubleUnderDataTy, FCData,
0, 1, "exn_selector_gep");
- Value *SelVal = Builder.CreateLoad(SelectorAddr, true, "exn_selector_val");
+ Value *SelVal =
+ Builder.CreateLoad(Int32Ty, SelectorAddr, true, "exn_selector_val");
substituteLPadValues(LPI, ExnVal, SelVal);
}
diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp
index fccbb8ec91cb..9fff873324d0 100644
--- a/lib/CodeGen/SlotIndexes.cpp
+++ b/lib/CodeGen/SlotIndexes.cpp
@@ -1,9 +1,8 @@
//===-- SlotIndexes.cpp - Slot Indexes Pass ------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -23,7 +22,6 @@ INITIALIZE_PASS(SlotIndexes, DEBUG_TYPE,
"Slot index numbering", false, false)
STATISTIC(NumLocalRenum, "Number of local renumberings");
-STATISTIC(NumGlobalRenum, "Number of global renumberings");
void SlotIndexes::getAnalysisUsage(AnalysisUsage &au) const {
au.setPreservesAll();
@@ -95,7 +93,7 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
}
// Sort the Idx2MBBMap
- llvm::sort(idx2MBBMap, Idx2MBBCompare());
+ llvm::sort(idx2MBBMap, less_first());
LLVM_DEBUG(mf->print(dbgs(), this));
@@ -145,20 +143,6 @@ void SlotIndexes::removeSingleMachineInstrFromMaps(MachineInstr &MI) {
}
}
-void SlotIndexes::renumberIndexes() {
- // Renumber updates the index of every element of the index list.
- LLVM_DEBUG(dbgs() << "\n*** Renumbering SlotIndexes ***\n");
- ++NumGlobalRenum;
-
- unsigned index = 0;
-
- for (IndexList::iterator I = indexList.begin(), E = indexList.end();
- I != E; ++I) {
- I->setIndex(index);
- index += SlotIndex::InstrDist;
- }
-}
-
// Renumber indexes locally after curItr was inserted, but failed to get a new
// index.
void SlotIndexes::renumberIndexes(IndexList::iterator curItr) {
diff --git a/lib/CodeGen/SpillPlacement.cpp b/lib/CodeGen/SpillPlacement.cpp
index f6786b30b21c..11452fdb747a 100644
--- a/lib/CodeGen/SpillPlacement.cpp
+++ b/lib/CodeGen/SpillPlacement.cpp
@@ -1,9 +1,8 @@
//===- SpillPlacement.cpp - Optimal Spill Code Placement ------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/SpillPlacement.h b/lib/CodeGen/SpillPlacement.h
index aa3ac444e0da..aa0e07ef92e3 100644
--- a/lib/CodeGen/SpillPlacement.h
+++ b/lib/CodeGen/SpillPlacement.h
@@ -1,9 +1,8 @@
//===- SpillPlacement.h - Optimal Spill Code Placement ---------*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/Spiller.h b/lib/CodeGen/Spiller.h
index 330ee81342b6..66dabf78f873 100644
--- a/lib/CodeGen/Spiller.h
+++ b/lib/CodeGen/Spiller.h
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/Spiller.h - Spiller -------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp
index d639f4475301..5c944fe3f6b3 100644
--- a/lib/CodeGen/SplitKit.cpp
+++ b/lib/CodeGen/SplitKit.cpp
@@ -1,9 +1,8 @@
//===- SplitKit.cpp - Toolkit for splitting live ranges -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -521,17 +520,18 @@ SlotIndex SplitEditor::buildSingleSubRegCopy(unsigned FromReg, unsigned ToReg,
.addReg(FromReg, 0, SubIdx);
BumpPtrAllocator &Allocator = LIS.getVNInfoAllocator();
+ SlotIndexes &Indexes = *LIS.getSlotIndexes();
if (FirstCopy) {
- SlotIndexes &Indexes = *LIS.getSlotIndexes();
Def = Indexes.insertMachineInstrInMaps(*CopyMI, Late).getRegSlot();
} else {
CopyMI->bundleWithPred();
}
LaneBitmask LaneMask = TRI.getSubRegIndexLaneMask(SubIdx);
DestLI.refineSubRanges(Allocator, LaneMask,
- [Def, &Allocator](LiveInterval::SubRange& SR) {
- SR.createDeadDef(Def, Allocator);
- });
+ [Def, &Allocator](LiveInterval::SubRange &SR) {
+ SR.createDeadDef(Def, Allocator);
+ },
+ Indexes, TRI);
return Def;
}
diff --git a/lib/CodeGen/SplitKit.h b/lib/CodeGen/SplitKit.h
index bcc8f8cf18bc..86ad3811e3ad 100644
--- a/lib/CodeGen/SplitKit.h
+++ b/lib/CodeGen/SplitKit.h
@@ -1,9 +1,8 @@
//===- SplitKit.h - Toolkit for splitting live ranges -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp
index eb8552915e2a..641b54205d62 100644
--- a/lib/CodeGen/StackColoring.cpp
+++ b/lib/CodeGen/StackColoring.cpp
@@ -1,9 +1,8 @@
//===- StackColoring.cpp --------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -1221,11 +1220,12 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
// Sort the slots according to their size. Place unused slots at the end.
// Use stable sort to guarantee deterministic code generation.
- std::stable_sort(SortedSlots.begin(), SortedSlots.end(),
- [this](int LHS, int RHS) {
+ llvm::stable_sort(SortedSlots, [this](int LHS, int RHS) {
// We use -1 to denote a uninteresting slot. Place these slots at the end.
- if (LHS == -1) return false;
- if (RHS == -1) return true;
+ if (LHS == -1)
+ return false;
+ if (RHS == -1)
+ return true;
// Sort according to size.
return MFI->getObjectSize(LHS) > MFI->getObjectSize(RHS);
});
diff --git a/lib/CodeGen/StackMapLivenessAnalysis.cpp b/lib/CodeGen/StackMapLivenessAnalysis.cpp
index 00cf8070be5e..fb2abf3daa7f 100644
--- a/lib/CodeGen/StackMapLivenessAnalysis.cpp
+++ b/lib/CodeGen/StackMapLivenessAnalysis.cpp
@@ -1,9 +1,8 @@
//===-- StackMapLivenessAnalysis.cpp - StackMap live Out Analysis ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/StackMaps.cpp b/lib/CodeGen/StackMaps.cpp
index 0676fa2421e8..ae9401b89700 100644
--- a/lib/CodeGen/StackMaps.cpp
+++ b/lib/CodeGen/StackMaps.cpp
@@ -1,9 +1,8 @@
//===- StackMaps.cpp ------------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp
index 3b578c7391da..809960c7fdf9 100644
--- a/lib/CodeGen/StackProtector.cpp
+++ b/lib/CodeGen/StackProtector.cpp
@@ -1,9 +1,8 @@
//===- StackProtector.cpp - Stack Protector Insertion ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -18,6 +17,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/CodeGen/Passes.h"
@@ -157,40 +157,6 @@ bool StackProtector::ContainsProtectableArray(Type *Ty, bool &IsLarge,
return NeedsProtector;
}
-bool StackProtector::HasAddressTaken(const Instruction *AI) {
- for (const User *U : AI->users()) {
- if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
- if (AI == SI->getValueOperand())
- return true;
- } else if (const PtrToIntInst *SI = dyn_cast<PtrToIntInst>(U)) {
- if (AI == SI->getOperand(0))
- return true;
- } else if (const CallInst *CI = dyn_cast<CallInst>(U)) {
- // Ignore intrinsics that are not calls. TODO: Use isLoweredToCall().
- if (!isa<DbgInfoIntrinsic>(CI) && !CI->isLifetimeStartOrEnd())
- return true;
- } else if (isa<InvokeInst>(U)) {
- return true;
- } else if (const SelectInst *SI = dyn_cast<SelectInst>(U)) {
- if (HasAddressTaken(SI))
- return true;
- } else if (const PHINode *PN = dyn_cast<PHINode>(U)) {
- // Keep track of what PHI nodes we have already visited to ensure
- // they are only visited once.
- if (VisitedPHIs.insert(PN).second)
- if (HasAddressTaken(PN))
- return true;
- } else if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
- if (HasAddressTaken(GEP))
- return true;
- } else if (const BitCastInst *BI = dyn_cast<BitCastInst>(U)) {
- if (HasAddressTaken(BI))
- return true;
- }
- }
- return false;
-}
-
/// Search for the first call to the llvm.stackprotector intrinsic and return it
/// if present.
static const CallInst *findStackProtectorIntrinsic(Function &F) {
@@ -298,7 +264,9 @@ bool StackProtector::RequiresStackProtector() {
continue;
}
- if (Strong && HasAddressTaken(AI)) {
+ if (Strong && PointerMayBeCaptured(AI,
+ /* ReturnCaptures */ false,
+ /* StoreCaptures */ true)) {
++NumAddrTaken;
Layout.insert(std::make_pair(AI, MachineFrameInfo::SSPLK_AddrOf));
ORE.emit([&]() {
@@ -323,7 +291,7 @@ static Value *getStackGuard(const TargetLoweringBase *TLI, Module *M,
IRBuilder<> &B,
bool *SupportsSelectionDAGSP = nullptr) {
if (Value *Guard = TLI->getIRStackGuard(B))
- return B.CreateLoad(Guard, true, "StackGuard");
+ return B.CreateLoad(B.getInt8PtrTy(), Guard, true, "StackGuard");
// Use SelectionDAG SSP handling, since there isn't an IR guard.
//
@@ -414,15 +382,14 @@ bool StackProtector::InsertStackProtectors() {
// Generate epilogue instrumentation. The epilogue intrumentation can be
// function-based or inlined depending on which mechanism the target is
// providing.
- if (Value* GuardCheck = TLI->getSSPStackGuardCheck(*M)) {
+ if (Function *GuardCheck = TLI->getSSPStackGuardCheck(*M)) {
// Generate the function-based epilogue instrumentation.
// The target provides a guard check function, generate a call to it.
IRBuilder<> B(RI);
- LoadInst *Guard = B.CreateLoad(AI, true, "Guard");
+ LoadInst *Guard = B.CreateLoad(B.getInt8PtrTy(), AI, true, "Guard");
CallInst *Call = B.CreateCall(GuardCheck, {Guard});
- llvm::Function *Function = cast<llvm::Function>(GuardCheck);
- Call->setAttributes(Function->getAttributes());
- Call->setCallingConv(Function->getCallingConv());
+ Call->setAttributes(GuardCheck->getAttributes());
+ Call->setCallingConv(GuardCheck->getCallingConv());
} else {
// Generate the epilogue with inline instrumentation.
// If we do not support SelectionDAG based tail calls, generate IR level
@@ -474,7 +441,7 @@ bool StackProtector::InsertStackProtectors() {
// Generate the stack protector instructions in the old basic block.
IRBuilder<> B(BB);
Value *Guard = getStackGuard(TLI, M, B);
- LoadInst *LI2 = B.CreateLoad(AI, true);
+ LoadInst *LI2 = B.CreateLoad(B.getInt8PtrTy(), AI, true);
Value *Cmp = B.CreateICmpEQ(Guard, LI2);
auto SuccessProb =
BranchProbabilityInfo::getBranchProbStackProtector(true);
@@ -500,14 +467,13 @@ BasicBlock *StackProtector::CreateFailBB() {
IRBuilder<> B(FailBB);
B.SetCurrentDebugLocation(DebugLoc::get(0, 0, F->getSubprogram()));
if (Trip.isOSOpenBSD()) {
- Constant *StackChkFail =
- M->getOrInsertFunction("__stack_smash_handler",
- Type::getVoidTy(Context),
- Type::getInt8PtrTy(Context));
+ FunctionCallee StackChkFail = M->getOrInsertFunction(
+ "__stack_smash_handler", Type::getVoidTy(Context),
+ Type::getInt8PtrTy(Context));
B.CreateCall(StackChkFail, B.CreateGlobalStringPtr(F->getName(), "SSH"));
} else {
- Constant *StackChkFail =
+ FunctionCallee StackChkFail =
M->getOrInsertFunction("__stack_chk_fail", Type::getVoidTy(Context));
B.CreateCall(StackChkFail, {});
@@ -517,7 +483,7 @@ BasicBlock *StackProtector::CreateFailBB() {
}
bool StackProtector::shouldEmitSDCheck(const BasicBlock &BB) const {
- return HasPrologue && !HasIRCheck && dyn_cast<ReturnInst>(BB.getTerminator());
+ return HasPrologue && !HasIRCheck && isa<ReturnInst>(BB.getTerminator());
}
void StackProtector::copyToMachineFrameInfo(MachineFrameInfo &MFI) const {
diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp
index d8c6a249e4da..99b533e10b87 100644
--- a/lib/CodeGen/StackSlotColoring.cpp
+++ b/lib/CodeGen/StackSlotColoring.cpp
@@ -1,9 +1,8 @@
//===- StackSlotColoring.cpp - Stack slot coloring pass. ------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -243,7 +242,7 @@ void StackSlotColoring::InitializeSlots() {
LLVM_DEBUG(dbgs() << '\n');
// Sort them by weight.
- std::stable_sort(SSIntervals.begin(), SSIntervals.end(), IntervalSorter());
+ llvm::stable_sort(SSIntervals, IntervalSorter());
NextColors.resize(AllColors.size());
@@ -348,7 +347,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
li->weight = SlotWeights[SS];
}
// Sort them by new weight.
- std::stable_sort(SSIntervals.begin(), SSIntervals.end(), IntervalSorter());
+ llvm::stable_sort(SSIntervals, IntervalSorter());
#ifndef NDEBUG
for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i)
diff --git a/lib/CodeGen/SwiftErrorValueTracking.cpp b/lib/CodeGen/SwiftErrorValueTracking.cpp
new file mode 100644
index 000000000000..96821cadb1b6
--- /dev/null
+++ b/lib/CodeGen/SwiftErrorValueTracking.cpp
@@ -0,0 +1,312 @@
+//===-- SwiftErrorValueTracking.cpp --------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements a limited mem2reg-like analysis to promote uses of function
+// arguments and allocas marked with swiftalloc from memory into virtual
+// registers tracked by this class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/SwiftErrorValueTracking.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/IR/Value.h"
+
+using namespace llvm;
+
+Register SwiftErrorValueTracking::getOrCreateVReg(const MachineBasicBlock *MBB,
+ const Value *Val) {
+ auto Key = std::make_pair(MBB, Val);
+ auto It = VRegDefMap.find(Key);
+ // If this is the first use of this swifterror value in this basic block,
+ // create a new virtual register.
+ // After we processed all basic blocks we will satisfy this "upwards exposed
+ // use" by inserting a copy or phi at the beginning of this block.
+ if (It == VRegDefMap.end()) {
+ auto &DL = MF->getDataLayout();
+ const TargetRegisterClass *RC = TLI->getRegClassFor(TLI->getPointerTy(DL));
+ auto VReg = MF->getRegInfo().createVirtualRegister(RC);
+ VRegDefMap[Key] = VReg;
+ VRegUpwardsUse[Key] = VReg;
+ return VReg;
+ } else
+ return It->second;
+}
+
+void SwiftErrorValueTracking::setCurrentVReg(const MachineBasicBlock *MBB,
+ const Value *Val, Register VReg) {
+ VRegDefMap[std::make_pair(MBB, Val)] = VReg;
+}
+
+Register SwiftErrorValueTracking::getOrCreateVRegDefAt(
+ const Instruction *I, const MachineBasicBlock *MBB, const Value *Val) {
+ auto Key = PointerIntPair<const Instruction *, 1, bool>(I, true);
+ auto It = VRegDefUses.find(Key);
+ if (It != VRegDefUses.end())
+ return It->second;
+
+ auto &DL = MF->getDataLayout();
+ const TargetRegisterClass *RC = TLI->getRegClassFor(TLI->getPointerTy(DL));
+ Register VReg = MF->getRegInfo().createVirtualRegister(RC);
+ VRegDefUses[Key] = VReg;
+ setCurrentVReg(MBB, Val, VReg);
+ return VReg;
+}
+
+Register SwiftErrorValueTracking::getOrCreateVRegUseAt(
+ const Instruction *I, const MachineBasicBlock *MBB, const Value *Val) {
+ auto Key = PointerIntPair<const Instruction *, 1, bool>(I, false);
+ auto It = VRegDefUses.find(Key);
+ if (It != VRegDefUses.end())
+ return It->second;
+
+ Register VReg = getOrCreateVReg(MBB, Val);
+ VRegDefUses[Key] = VReg;
+ return VReg;
+}
+
+/// Set up SwiftErrorVals by going through the function. If the function has
+/// swifterror argument, it will be the first entry.
+void SwiftErrorValueTracking::setFunction(MachineFunction &mf) {
+ MF = &mf;
+ Fn = &MF->getFunction();
+ TLI = MF->getSubtarget().getTargetLowering();
+ TII = MF->getSubtarget().getInstrInfo();
+
+ if (!TLI->supportSwiftError())
+ return;
+
+ SwiftErrorVals.clear();
+ VRegDefMap.clear();
+ VRegUpwardsUse.clear();
+ VRegDefUses.clear();
+ SwiftErrorArg = nullptr;
+
+ // Check if function has a swifterror argument.
+ bool HaveSeenSwiftErrorArg = false;
+ for (Function::const_arg_iterator AI = Fn->arg_begin(), AE = Fn->arg_end();
+ AI != AE; ++AI)
+ if (AI->hasSwiftErrorAttr()) {
+ assert(!HaveSeenSwiftErrorArg &&
+ "Must have only one swifterror parameter");
+ (void)HaveSeenSwiftErrorArg; // silence warning.
+ HaveSeenSwiftErrorArg = true;
+ SwiftErrorArg = &*AI;
+ SwiftErrorVals.push_back(&*AI);
+ }
+
+ for (const auto &LLVMBB : *Fn)
+ for (const auto &Inst : LLVMBB) {
+ if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(&Inst))
+ if (Alloca->isSwiftError())
+ SwiftErrorVals.push_back(Alloca);
+ }
+}
+
+bool SwiftErrorValueTracking::createEntriesInEntryBlock(DebugLoc DbgLoc) {
+ if (!TLI->supportSwiftError())
+ return false;
+
+ // We only need to do this when we have swifterror parameter or swifterror
+ // alloc.
+ if (SwiftErrorVals.empty())
+ return false;
+
+ MachineBasicBlock *MBB = &*MF->begin();
+ auto &DL = MF->getDataLayout();
+ auto const *RC = TLI->getRegClassFor(TLI->getPointerTy(DL));
+ bool Inserted = false;
+ for (const auto *SwiftErrorVal : SwiftErrorVals) {
+ // We will always generate a copy from the argument. It is always used at
+ // least by the 'return' of the swifterror.
+ if (SwiftErrorArg && SwiftErrorArg == SwiftErrorVal)
+ continue;
+ Register VReg = MF->getRegInfo().createVirtualRegister(RC);
+ // Assign Undef to Vreg. We construct MI directly to make sure it works
+ // with FastISel.
+ BuildMI(*MBB, MBB->getFirstNonPHI(), DbgLoc,
+ TII->get(TargetOpcode::IMPLICIT_DEF), VReg);
+
+ setCurrentVReg(MBB, SwiftErrorVal, VReg);
+ Inserted = true;
+ }
+
+ return Inserted;
+}
+
+/// Propagate swifterror values through the machine function CFG.
+void SwiftErrorValueTracking::propagateVRegs() {
+ if (!TLI->supportSwiftError())
+ return;
+
+ // We only need to do this when we have swifterror parameter or swifterror
+ // alloc.
+ if (SwiftErrorVals.empty())
+ return;
+
+ // For each machine basic block in reverse post order.
+ ReversePostOrderTraversal<MachineFunction *> RPOT(MF);
+ for (MachineBasicBlock *MBB : RPOT) {
+ // For each swifterror value in the function.
+ for (const auto *SwiftErrorVal : SwiftErrorVals) {
+ auto Key = std::make_pair(MBB, SwiftErrorVal);
+ auto UUseIt = VRegUpwardsUse.find(Key);
+ auto VRegDefIt = VRegDefMap.find(Key);
+ bool UpwardsUse = UUseIt != VRegUpwardsUse.end();
+ Register UUseVReg = UpwardsUse ? UUseIt->second : Register();
+ bool DownwardDef = VRegDefIt != VRegDefMap.end();
+ assert(!(UpwardsUse && !DownwardDef) &&
+ "We can't have an upwards use but no downwards def");
+
+ // If there is no upwards exposed use and an entry for the swifterror in
+ // the def map for this value we don't need to do anything: We already
+ // have a downward def for this basic block.
+ if (!UpwardsUse && DownwardDef)
+ continue;
+
+ // Otherwise we either have an upwards exposed use vreg that we need to
+ // materialize or need to forward the downward def from predecessors.
+
+ // Check whether we have a single vreg def from all predecessors.
+ // Otherwise we need a phi.
+ SmallVector<std::pair<MachineBasicBlock *, Register>, 4> VRegs;
+ SmallSet<const MachineBasicBlock *, 8> Visited;
+ for (auto *Pred : MBB->predecessors()) {
+ if (!Visited.insert(Pred).second)
+ continue;
+ VRegs.push_back(std::make_pair(
+ Pred, getOrCreateVReg(Pred, SwiftErrorVal)));
+ if (Pred != MBB)
+ continue;
+ // We have a self-edge.
+ // If there was no upwards use in this basic block there is now one: the
+ // phi needs to use it self.
+ if (!UpwardsUse) {
+ UpwardsUse = true;
+ UUseIt = VRegUpwardsUse.find(Key);
+ assert(UUseIt != VRegUpwardsUse.end());
+ UUseVReg = UUseIt->second;
+ }
+ }
+
+ // We need a phi node if we have more than one predecessor with different
+ // downward defs.
+ bool needPHI =
+ VRegs.size() >= 1 &&
+ std::find_if(
+ VRegs.begin(), VRegs.end(),
+ [&](const std::pair<const MachineBasicBlock *, Register> &V)
+ -> bool { return V.second != VRegs[0].second; }) !=
+ VRegs.end();
+
+ // If there is no upwards exposed used and we don't need a phi just
+ // forward the swifterror vreg from the predecessor(s).
+ if (!UpwardsUse && !needPHI) {
+ assert(!VRegs.empty() &&
+ "No predecessors? The entry block should bail out earlier");
+ // Just forward the swifterror vreg from the predecessor(s).
+ setCurrentVReg(MBB, SwiftErrorVal, VRegs[0].second);
+ continue;
+ }
+
+ auto DLoc = isa<Instruction>(SwiftErrorVal)
+ ? cast<Instruction>(SwiftErrorVal)->getDebugLoc()
+ : DebugLoc();
+ const auto *TII = MF->getSubtarget().getInstrInfo();
+
+ // If we don't need a phi create a copy to the upward exposed vreg.
+ if (!needPHI) {
+ assert(UpwardsUse);
+ assert(!VRegs.empty() &&
+ "No predecessors? Is the Calling Convention correct?");
+ Register DestReg = UUseVReg;
+ BuildMI(*MBB, MBB->getFirstNonPHI(), DLoc, TII->get(TargetOpcode::COPY),
+ DestReg)
+ .addReg(VRegs[0].second);
+ continue;
+ }
+
+ // We need a phi: if there is an upwards exposed use we already have a
+ // destination virtual register number otherwise we generate a new one.
+ auto &DL = MF->getDataLayout();
+ auto const *RC = TLI->getRegClassFor(TLI->getPointerTy(DL));
+ Register PHIVReg =
+ UpwardsUse ? UUseVReg : MF->getRegInfo().createVirtualRegister(RC);
+ MachineInstrBuilder PHI =
+ BuildMI(*MBB, MBB->getFirstNonPHI(), DLoc,
+ TII->get(TargetOpcode::PHI), PHIVReg);
+ for (auto BBRegPair : VRegs) {
+ PHI.addReg(BBRegPair.second).addMBB(BBRegPair.first);
+ }
+
+ // We did not have a definition in this block before: store the phi's vreg
+ // as this block downward exposed def.
+ if (!UpwardsUse)
+ setCurrentVReg(MBB, SwiftErrorVal, PHIVReg);
+ }
+ }
+}
+
+void SwiftErrorValueTracking::preassignVRegs(
+ MachineBasicBlock *MBB, BasicBlock::const_iterator Begin,
+ BasicBlock::const_iterator End) {
+ if (!TLI->supportSwiftError() || SwiftErrorVals.empty())
+ return;
+
+ // Iterator over instructions and assign vregs to swifterror defs and uses.
+ for (auto It = Begin; It != End; ++It) {
+ ImmutableCallSite CS(&*It);
+ if (CS) {
+ // A call-site with a swifterror argument is both use and def.
+ const Value *SwiftErrorAddr = nullptr;
+ for (auto &Arg : CS.args()) {
+ if (!Arg->isSwiftError())
+ continue;
+ // Use of swifterror.
+ assert(!SwiftErrorAddr && "Cannot have multiple swifterror arguments");
+ SwiftErrorAddr = &*Arg;
+ assert(SwiftErrorAddr->isSwiftError() &&
+ "Must have a swifterror value argument");
+ getOrCreateVRegUseAt(&*It, MBB, SwiftErrorAddr);
+ }
+ if (!SwiftErrorAddr)
+ continue;
+
+ // Def of swifterror.
+ getOrCreateVRegDefAt(&*It, MBB, SwiftErrorAddr);
+
+ // A load is a use.
+ } else if (const LoadInst *LI = dyn_cast<const LoadInst>(&*It)) {
+ const Value *V = LI->getOperand(0);
+ if (!V->isSwiftError())
+ continue;
+
+ getOrCreateVRegUseAt(LI, MBB, V);
+
+ // A store is a def.
+ } else if (const StoreInst *SI = dyn_cast<const StoreInst>(&*It)) {
+ const Value *SwiftErrorAddr = SI->getOperand(1);
+ if (!SwiftErrorAddr->isSwiftError())
+ continue;
+
+ // Def of swifterror.
+ getOrCreateVRegDefAt(&*It, MBB, SwiftErrorAddr);
+
+ // A return in a swiferror returning function is a use.
+ } else if (const ReturnInst *R = dyn_cast<const ReturnInst>(&*It)) {
+ const Function *F = R->getParent()->getParent();
+ if (!F->getAttributes().hasAttrSomewhere(Attribute::SwiftError))
+ continue;
+
+ getOrCreateVRegUseAt(R, MBB, SwiftErrorArg);
+ }
+ }
+}
diff --git a/lib/CodeGen/SwitchLoweringUtils.cpp b/lib/CodeGen/SwitchLoweringUtils.cpp
new file mode 100644
index 000000000000..83acf7f80715
--- /dev/null
+++ b/lib/CodeGen/SwitchLoweringUtils.cpp
@@ -0,0 +1,489 @@
+//===- SwitchLoweringUtils.cpp - Switch Lowering --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains switch inst lowering optimizations and utilities for
+// codegen, so that it can be used for both SelectionDAG and GlobalISel.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/SwitchLoweringUtils.h"
+
+using namespace llvm;
+using namespace SwitchCG;
+
+uint64_t SwitchCG::getJumpTableRange(const CaseClusterVector &Clusters,
+ unsigned First, unsigned Last) {
+ assert(Last >= First);
+ const APInt &LowCase = Clusters[First].Low->getValue();
+ const APInt &HighCase = Clusters[Last].High->getValue();
+ assert(LowCase.getBitWidth() == HighCase.getBitWidth());
+
+ // FIXME: A range of consecutive cases has 100% density, but only requires one
+ // comparison to lower. We should discriminate against such consecutive ranges
+ // in jump tables.
+ return (HighCase - LowCase).getLimitedValue((UINT64_MAX - 1) / 100) + 1;
+}
+
+uint64_t
+SwitchCG::getJumpTableNumCases(const SmallVectorImpl<unsigned> &TotalCases,
+ unsigned First, unsigned Last) {
+ assert(Last >= First);
+ assert(TotalCases[Last] >= TotalCases[First]);
+ uint64_t NumCases =
+ TotalCases[Last] - (First == 0 ? 0 : TotalCases[First - 1]);
+ return NumCases;
+}
+
+void SwitchCG::SwitchLowering::findJumpTables(CaseClusterVector &Clusters,
+ const SwitchInst *SI,
+ MachineBasicBlock *DefaultMBB) {
+#ifndef NDEBUG
+ // Clusters must be non-empty, sorted, and only contain Range clusters.
+ assert(!Clusters.empty());
+ for (CaseCluster &C : Clusters)
+ assert(C.Kind == CC_Range);
+ for (unsigned i = 1, e = Clusters.size(); i < e; ++i)
+ assert(Clusters[i - 1].High->getValue().slt(Clusters[i].Low->getValue()));
+#endif
+
+ assert(TLI && "TLI not set!");
+ if (!TLI->areJTsAllowed(SI->getParent()->getParent()))
+ return;
+
+ const unsigned MinJumpTableEntries = TLI->getMinimumJumpTableEntries();
+ const unsigned SmallNumberOfEntries = MinJumpTableEntries / 2;
+
+ // Bail if not enough cases.
+ const int64_t N = Clusters.size();
+ if (N < 2 || N < MinJumpTableEntries)
+ return;
+
+ // Accumulated number of cases in each cluster and those prior to it.
+ SmallVector<unsigned, 8> TotalCases(N);
+ for (unsigned i = 0; i < N; ++i) {
+ const APInt &Hi = Clusters[i].High->getValue();
+ const APInt &Lo = Clusters[i].Low->getValue();
+ TotalCases[i] = (Hi - Lo).getLimitedValue() + 1;
+ if (i != 0)
+ TotalCases[i] += TotalCases[i - 1];
+ }
+
+ uint64_t Range = getJumpTableRange(Clusters,0, N - 1);
+ uint64_t NumCases = getJumpTableNumCases(TotalCases, 0, N - 1);
+ assert(NumCases < UINT64_MAX / 100);
+ assert(Range >= NumCases);
+
+ // Cheap case: the whole range may be suitable for jump table.
+ if (TLI->isSuitableForJumpTable(SI, NumCases, Range)) {
+ CaseCluster JTCluster;
+ if (buildJumpTable(Clusters, 0, N - 1, SI, DefaultMBB, JTCluster)) {
+ Clusters[0] = JTCluster;
+ Clusters.resize(1);
+ return;
+ }
+ }
+
+ // The algorithm below is not suitable for -O0.
+ if (TM->getOptLevel() == CodeGenOpt::None)
+ return;
+
+ // Split Clusters into minimum number of dense partitions. The algorithm uses
+ // the same idea as Kannan & Proebsting "Correction to 'Producing Good Code
+ // for the Case Statement'" (1994), but builds the MinPartitions array in
+ // reverse order to make it easier to reconstruct the partitions in ascending
+ // order. In the choice between two optimal partitionings, it picks the one
+ // which yields more jump tables.
+
+ // MinPartitions[i] is the minimum nbr of partitions of Clusters[i..N-1].
+ SmallVector<unsigned, 8> MinPartitions(N);
+ // LastElement[i] is the last element of the partition starting at i.
+ SmallVector<unsigned, 8> LastElement(N);
+ // PartitionsScore[i] is used to break ties when choosing between two
+ // partitionings resulting in the same number of partitions.
+ SmallVector<unsigned, 8> PartitionsScore(N);
+ // For PartitionsScore, a small number of comparisons is considered as good as
+ // a jump table and a single comparison is considered better than a jump
+ // table.
+ enum PartitionScores : unsigned {
+ NoTable = 0,
+ Table = 1,
+ FewCases = 1,
+ SingleCase = 2
+ };
+
+ // Base case: There is only one way to partition Clusters[N-1].
+ MinPartitions[N - 1] = 1;
+ LastElement[N - 1] = N - 1;
+ PartitionsScore[N - 1] = PartitionScores::SingleCase;
+
+ // Note: loop indexes are signed to avoid underflow.
+ for (int64_t i = N - 2; i >= 0; i--) {
+ // Find optimal partitioning of Clusters[i..N-1].
+ // Baseline: Put Clusters[i] into a partition on its own.
+ MinPartitions[i] = MinPartitions[i + 1] + 1;
+ LastElement[i] = i;
+ PartitionsScore[i] = PartitionsScore[i + 1] + PartitionScores::SingleCase;
+
+ // Search for a solution that results in fewer partitions.
+ for (int64_t j = N - 1; j > i; j--) {
+ // Try building a partition from Clusters[i..j].
+ Range = getJumpTableRange(Clusters, i, j);
+ NumCases = getJumpTableNumCases(TotalCases, i, j);
+ assert(NumCases < UINT64_MAX / 100);
+ assert(Range >= NumCases);
+
+ if (TLI->isSuitableForJumpTable(SI, NumCases, Range)) {
+ unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]);
+ unsigned Score = j == N - 1 ? 0 : PartitionsScore[j + 1];
+ int64_t NumEntries = j - i + 1;
+
+ if (NumEntries == 1)
+ Score += PartitionScores::SingleCase;
+ else if (NumEntries <= SmallNumberOfEntries)
+ Score += PartitionScores::FewCases;
+ else if (NumEntries >= MinJumpTableEntries)
+ Score += PartitionScores::Table;
+
+ // If this leads to fewer partitions, or to the same number of
+ // partitions with better score, it is a better partitioning.
+ if (NumPartitions < MinPartitions[i] ||
+ (NumPartitions == MinPartitions[i] && Score > PartitionsScore[i])) {
+ MinPartitions[i] = NumPartitions;
+ LastElement[i] = j;
+ PartitionsScore[i] = Score;
+ }
+ }
+ }
+ }
+
+ // Iterate over the partitions, replacing some with jump tables in-place.
+ unsigned DstIndex = 0;
+ for (unsigned First = 0, Last; First < N; First = Last + 1) {
+ Last = LastElement[First];
+ assert(Last >= First);
+ assert(DstIndex <= First);
+ unsigned NumClusters = Last - First + 1;
+
+ CaseCluster JTCluster;
+ if (NumClusters >= MinJumpTableEntries &&
+ buildJumpTable(Clusters, First, Last, SI, DefaultMBB, JTCluster)) {
+ Clusters[DstIndex++] = JTCluster;
+ } else {
+ for (unsigned I = First; I <= Last; ++I)
+ std::memmove(&Clusters[DstIndex++], &Clusters[I], sizeof(Clusters[I]));
+ }
+ }
+ Clusters.resize(DstIndex);
+}
+
+bool SwitchCG::SwitchLowering::buildJumpTable(const CaseClusterVector &Clusters,
+ unsigned First, unsigned Last,
+ const SwitchInst *SI,
+ MachineBasicBlock *DefaultMBB,
+ CaseCluster &JTCluster) {
+ assert(First <= Last);
+
+ auto Prob = BranchProbability::getZero();
+ unsigned NumCmps = 0;
+ std::vector<MachineBasicBlock*> Table;
+ DenseMap<MachineBasicBlock*, BranchProbability> JTProbs;
+
+ // Initialize probabilities in JTProbs.
+ for (unsigned I = First; I <= Last; ++I)
+ JTProbs[Clusters[I].MBB] = BranchProbability::getZero();
+
+ for (unsigned I = First; I <= Last; ++I) {
+ assert(Clusters[I].Kind == CC_Range);
+ Prob += Clusters[I].Prob;
+ const APInt &Low = Clusters[I].Low->getValue();
+ const APInt &High = Clusters[I].High->getValue();
+ NumCmps += (Low == High) ? 1 : 2;
+ if (I != First) {
+ // Fill the gap between this and the previous cluster.
+ const APInt &PreviousHigh = Clusters[I - 1].High->getValue();
+ assert(PreviousHigh.slt(Low));
+ uint64_t Gap = (Low - PreviousHigh).getLimitedValue() - 1;
+ for (uint64_t J = 0; J < Gap; J++)
+ Table.push_back(DefaultMBB);
+ }
+ uint64_t ClusterSize = (High - Low).getLimitedValue() + 1;
+ for (uint64_t J = 0; J < ClusterSize; ++J)
+ Table.push_back(Clusters[I].MBB);
+ JTProbs[Clusters[I].MBB] += Clusters[I].Prob;
+ }
+
+ unsigned NumDests = JTProbs.size();
+ if (TLI->isSuitableForBitTests(NumDests, NumCmps,
+ Clusters[First].Low->getValue(),
+ Clusters[Last].High->getValue(), *DL)) {
+ // Clusters[First..Last] should be lowered as bit tests instead.
+ return false;
+ }
+
+ // Create the MBB that will load from and jump through the table.
+ // Note: We create it here, but it's not inserted into the function yet.
+ MachineFunction *CurMF = FuncInfo.MF;
+ MachineBasicBlock *JumpTableMBB =
+ CurMF->CreateMachineBasicBlock(SI->getParent());
+
+ // Add successors. Note: use table order for determinism.
+ SmallPtrSet<MachineBasicBlock *, 8> Done;
+ for (MachineBasicBlock *Succ : Table) {
+ if (Done.count(Succ))
+ continue;
+ addSuccessorWithProb(JumpTableMBB, Succ, JTProbs[Succ]);
+ Done.insert(Succ);
+ }
+ JumpTableMBB->normalizeSuccProbs();
+
+ unsigned JTI = CurMF->getOrCreateJumpTableInfo(TLI->getJumpTableEncoding())
+ ->createJumpTableIndex(Table);
+
+ // Set up the jump table info.
+ JumpTable JT(-1U, JTI, JumpTableMBB, nullptr);
+ JumpTableHeader JTH(Clusters[First].Low->getValue(),
+ Clusters[Last].High->getValue(), SI->getCondition(),
+ nullptr, false);
+ JTCases.emplace_back(std::move(JTH), std::move(JT));
+
+ JTCluster = CaseCluster::jumpTable(Clusters[First].Low, Clusters[Last].High,
+ JTCases.size() - 1, Prob);
+ return true;
+}
+
+void SwitchCG::SwitchLowering::findBitTestClusters(CaseClusterVector &Clusters,
+ const SwitchInst *SI) {
+ // Partition Clusters into as few subsets as possible, where each subset has a
+ // range that fits in a machine word and has <= 3 unique destinations.
+
+#ifndef NDEBUG
+ // Clusters must be sorted and contain Range or JumpTable clusters.
+ assert(!Clusters.empty());
+ assert(Clusters[0].Kind == CC_Range || Clusters[0].Kind == CC_JumpTable);
+ for (const CaseCluster &C : Clusters)
+ assert(C.Kind == CC_Range || C.Kind == CC_JumpTable);
+ for (unsigned i = 1; i < Clusters.size(); ++i)
+ assert(Clusters[i-1].High->getValue().slt(Clusters[i].Low->getValue()));
+#endif
+
+ // The algorithm below is not suitable for -O0.
+ if (TM->getOptLevel() == CodeGenOpt::None)
+ return;
+
+ // If target does not have legal shift left, do not emit bit tests at all.
+ EVT PTy = TLI->getPointerTy(*DL);
+ if (!TLI->isOperationLegal(ISD::SHL, PTy))
+ return;
+
+ int BitWidth = PTy.getSizeInBits();
+ const int64_t N = Clusters.size();
+
+ // MinPartitions[i] is the minimum nbr of partitions of Clusters[i..N-1].
+ SmallVector<unsigned, 8> MinPartitions(N);
+ // LastElement[i] is the last element of the partition starting at i.
+ SmallVector<unsigned, 8> LastElement(N);
+
+ // FIXME: This might not be the best algorithm for finding bit test clusters.
+
+ // Base case: There is only one way to partition Clusters[N-1].
+ MinPartitions[N - 1] = 1;
+ LastElement[N - 1] = N - 1;
+
+ // Note: loop indexes are signed to avoid underflow.
+ for (int64_t i = N - 2; i >= 0; --i) {
+ // Find optimal partitioning of Clusters[i..N-1].
+ // Baseline: Put Clusters[i] into a partition on its own.
+ MinPartitions[i] = MinPartitions[i + 1] + 1;
+ LastElement[i] = i;
+
+ // Search for a solution that results in fewer partitions.
+ // Note: the search is limited by BitWidth, reducing time complexity.
+ for (int64_t j = std::min(N - 1, i + BitWidth - 1); j > i; --j) {
+ // Try building a partition from Clusters[i..j].
+
+ // Check the range.
+ if (!TLI->rangeFitsInWord(Clusters[i].Low->getValue(),
+ Clusters[j].High->getValue(), *DL))
+ continue;
+
+ // Check nbr of destinations and cluster types.
+ // FIXME: This works, but doesn't seem very efficient.
+ bool RangesOnly = true;
+ BitVector Dests(FuncInfo.MF->getNumBlockIDs());
+ for (int64_t k = i; k <= j; k++) {
+ if (Clusters[k].Kind != CC_Range) {
+ RangesOnly = false;
+ break;
+ }
+ Dests.set(Clusters[k].MBB->getNumber());
+ }
+ if (!RangesOnly || Dests.count() > 3)
+ break;
+
+ // Check if it's a better partition.
+ unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]);
+ if (NumPartitions < MinPartitions[i]) {
+ // Found a better partition.
+ MinPartitions[i] = NumPartitions;
+ LastElement[i] = j;
+ }
+ }
+ }
+
+ // Iterate over the partitions, replacing with bit-test clusters in-place.
+ unsigned DstIndex = 0;
+ for (unsigned First = 0, Last; First < N; First = Last + 1) {
+ Last = LastElement[First];
+ assert(First <= Last);
+ assert(DstIndex <= First);
+
+ CaseCluster BitTestCluster;
+ if (buildBitTests(Clusters, First, Last, SI, BitTestCluster)) {
+ Clusters[DstIndex++] = BitTestCluster;
+ } else {
+ size_t NumClusters = Last - First + 1;
+ std::memmove(&Clusters[DstIndex], &Clusters[First],
+ sizeof(Clusters[0]) * NumClusters);
+ DstIndex += NumClusters;
+ }
+ }
+ Clusters.resize(DstIndex);
+}
+
+bool SwitchCG::SwitchLowering::buildBitTests(CaseClusterVector &Clusters,
+ unsigned First, unsigned Last,
+ const SwitchInst *SI,
+ CaseCluster &BTCluster) {
+ assert(First <= Last);
+ if (First == Last)
+ return false;
+
+ BitVector Dests(FuncInfo.MF->getNumBlockIDs());
+ unsigned NumCmps = 0;
+ for (int64_t I = First; I <= Last; ++I) {
+ assert(Clusters[I].Kind == CC_Range);
+ Dests.set(Clusters[I].MBB->getNumber());
+ NumCmps += (Clusters[I].Low == Clusters[I].High) ? 1 : 2;
+ }
+ unsigned NumDests = Dests.count();
+
+ APInt Low = Clusters[First].Low->getValue();
+ APInt High = Clusters[Last].High->getValue();
+ assert(Low.slt(High));
+
+ if (!TLI->isSuitableForBitTests(NumDests, NumCmps, Low, High, *DL))
+ return false;
+
+ APInt LowBound;
+ APInt CmpRange;
+
+ const int BitWidth = TLI->getPointerTy(*DL).getSizeInBits();
+ assert(TLI->rangeFitsInWord(Low, High, *DL) &&
+ "Case range must fit in bit mask!");
+
+ // Check if the clusters cover a contiguous range such that no value in the
+ // range will jump to the default statement.
+ bool ContiguousRange = true;
+ for (int64_t I = First + 1; I <= Last; ++I) {
+ if (Clusters[I].Low->getValue() != Clusters[I - 1].High->getValue() + 1) {
+ ContiguousRange = false;
+ break;
+ }
+ }
+
+ if (Low.isStrictlyPositive() && High.slt(BitWidth)) {
+ // Optimize the case where all the case values fit in a word without having
+ // to subtract minValue. In this case, we can optimize away the subtraction.
+ LowBound = APInt::getNullValue(Low.getBitWidth());
+ CmpRange = High;
+ ContiguousRange = false;
+ } else {
+ LowBound = Low;
+ CmpRange = High - Low;
+ }
+
+ CaseBitsVector CBV;
+ auto TotalProb = BranchProbability::getZero();
+ for (unsigned i = First; i <= Last; ++i) {
+ // Find the CaseBits for this destination.
+ unsigned j;
+ for (j = 0; j < CBV.size(); ++j)
+ if (CBV[j].BB == Clusters[i].MBB)
+ break;
+ if (j == CBV.size())
+ CBV.push_back(
+ CaseBits(0, Clusters[i].MBB, 0, BranchProbability::getZero()));
+ CaseBits *CB = &CBV[j];
+
+ // Update Mask, Bits and ExtraProb.
+ uint64_t Lo = (Clusters[i].Low->getValue() - LowBound).getZExtValue();
+ uint64_t Hi = (Clusters[i].High->getValue() - LowBound).getZExtValue();
+ assert(Hi >= Lo && Hi < 64 && "Invalid bit case!");
+ CB->Mask |= (-1ULL >> (63 - (Hi - Lo))) << Lo;
+ CB->Bits += Hi - Lo + 1;
+ CB->ExtraProb += Clusters[i].Prob;
+ TotalProb += Clusters[i].Prob;
+ }
+
+ BitTestInfo BTI;
+ llvm::sort(CBV, [](const CaseBits &a, const CaseBits &b) {
+ // Sort by probability first, number of bits second, bit mask third.
+ if (a.ExtraProb != b.ExtraProb)
+ return a.ExtraProb > b.ExtraProb;
+ if (a.Bits != b.Bits)
+ return a.Bits > b.Bits;
+ return a.Mask < b.Mask;
+ });
+
+ for (auto &CB : CBV) {
+ MachineBasicBlock *BitTestBB =
+ FuncInfo.MF->CreateMachineBasicBlock(SI->getParent());
+ BTI.push_back(BitTestCase(CB.Mask, BitTestBB, CB.BB, CB.ExtraProb));
+ }
+ BitTestCases.emplace_back(std::move(LowBound), std::move(CmpRange),
+ SI->getCondition(), -1U, MVT::Other, false,
+ ContiguousRange, nullptr, nullptr, std::move(BTI),
+ TotalProb);
+
+ BTCluster = CaseCluster::bitTests(Clusters[First].Low, Clusters[Last].High,
+ BitTestCases.size() - 1, TotalProb);
+ return true;
+}
+
+void SwitchCG::sortAndRangeify(CaseClusterVector &Clusters) {
+#ifndef NDEBUG
+ for (const CaseCluster &CC : Clusters)
+ assert(CC.Low == CC.High && "Input clusters must be single-case");
+#endif
+
+ llvm::sort(Clusters, [](const CaseCluster &a, const CaseCluster &b) {
+ return a.Low->getValue().slt(b.Low->getValue());
+ });
+
+ // Merge adjacent clusters with the same destination.
+ const unsigned N = Clusters.size();
+ unsigned DstIndex = 0;
+ for (unsigned SrcIndex = 0; SrcIndex < N; ++SrcIndex) {
+ CaseCluster &CC = Clusters[SrcIndex];
+ const ConstantInt *CaseVal = CC.Low;
+ MachineBasicBlock *Succ = CC.MBB;
+
+ if (DstIndex != 0 && Clusters[DstIndex - 1].MBB == Succ &&
+ (CaseVal->getValue() - Clusters[DstIndex - 1].High->getValue()) == 1) {
+ // If this case has the same successor and is a neighbour, merge it into
+ // the previous cluster.
+ Clusters[DstIndex - 1].High = CaseVal;
+ Clusters[DstIndex - 1].Prob += CC.Prob;
+ } else {
+ std::memmove(&Clusters[DstIndex++], &Clusters[SrcIndex],
+ sizeof(Clusters[SrcIndex]));
+ }
+ }
+ Clusters.resize(DstIndex);
+}
diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp
index 25cd7802264e..ba348b4a9d41 100644
--- a/lib/CodeGen/TailDuplication.cpp
+++ b/lib/CodeGen/TailDuplication.cpp
@@ -1,9 +1,8 @@
//===- TailDuplication.cpp - Duplicate blocks into predecessors' tails ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/TailDuplicator.cpp b/lib/CodeGen/TailDuplicator.cpp
index b118c176a897..a0590a8a6cc6 100644
--- a/lib/CodeGen/TailDuplicator.cpp
+++ b/lib/CodeGen/TailDuplicator.cpp
@@ -1,9 +1,8 @@
//===- TailDuplicator.cpp - Duplicate blocks into predecessors' tails -----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -435,7 +434,7 @@ void TailDuplicator::duplicateInstruction(
if (NewRC == nullptr)
NewRC = OrigRC;
unsigned NewReg = MRI->createVirtualRegister(NewRC);
- BuildMI(*PredBB, MI, MI->getDebugLoc(),
+ BuildMI(*PredBB, NewMI, NewMI.getDebugLoc(),
TII->get(TargetOpcode::COPY), NewReg)
.addReg(VI->second.Reg, 0, VI->second.SubReg);
LocalVRMap.erase(VI);
@@ -558,7 +557,7 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
unsigned MaxDuplicateCount;
if (TailDupSize == 0 &&
TailDuplicateSize.getNumOccurrences() == 0 &&
- MF->getFunction().optForSize())
+ MF->getFunction().hasOptSize())
MaxDuplicateCount = 1;
else if (TailDupSize == 0)
MaxDuplicateCount = TailDuplicateSize;
@@ -857,11 +856,6 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB,
}
appendCopies(PredBB, CopyInfos, Copies);
- // Simplify
- MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
- SmallVector<MachineOperand, 4> PredCond;
- TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond);
-
NumTailDupAdded += TailBB->size() - 1; // subtract one for removed branch
// Update the CFG.
diff --git a/lib/CodeGen/TargetFrameLoweringImpl.cpp b/lib/CodeGen/TargetFrameLoweringImpl.cpp
index cf78fb5a1f12..9c4483cb240d 100644
--- a/lib/CodeGen/TargetFrameLoweringImpl.cpp
+++ b/lib/CodeGen/TargetFrameLoweringImpl.cpp
@@ -1,9 +1,8 @@
//===- TargetFrameLoweringImpl.cpp - Implement target frame interface ------==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/TargetInstrInfo.cpp b/lib/CodeGen/TargetInstrInfo.cpp
index 2a17af391105..868617ffe14d 100644
--- a/lib/CodeGen/TargetInstrInfo.cpp
+++ b/lib/CodeGen/TargetInstrInfo.cpp
@@ -1,9 +1,8 @@
//===-- TargetInstrInfo.cpp - Target Instruction Information --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -86,11 +85,13 @@ static bool isAsmComment(const char *Str, const MCAsmInfo &MAI) {
/// simple--i.e. not a logical or arithmetic expression--size values without
/// the optional fill value. This is primarily used for creating arbitrary
/// sized inline asm blocks for testing purposes.
-unsigned TargetInstrInfo::getInlineAsmLength(const char *Str,
- const MCAsmInfo &MAI) const {
+unsigned TargetInstrInfo::getInlineAsmLength(
+ const char *Str,
+ const MCAsmInfo &MAI, const TargetSubtargetInfo *STI) const {
// Count the number of instructions in the asm.
bool AtInsnStart = true;
unsigned Length = 0;
+ const unsigned MaxInstLength = MAI.getMaxInstLength(STI);
for (; *Str; ++Str) {
if (*Str == '\n' || strncmp(Str, MAI.getSeparatorString(),
strlen(MAI.getSeparatorString())) == 0) {
@@ -102,7 +103,7 @@ unsigned TargetInstrInfo::getInlineAsmLength(const char *Str,
}
if (AtInsnStart && !std::isspace(static_cast<unsigned char>(*Str))) {
- unsigned AddLength = MAI.getMaxInstLength();
+ unsigned AddLength = MaxInstLength;
if (strncmp(Str, ".space", 6) == 0) {
char *EStr;
int SpaceSize;
@@ -136,8 +137,14 @@ TargetInstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
// Save off the debug loc before erasing the instruction.
DebugLoc DL = Tail->getDebugLoc();
- // Remove all the dead instructions from the end of MBB.
- MBB->erase(Tail, MBB->end());
+ // Update call site info and remove all the dead instructions
+ // from the end of MBB.
+ while (Tail != MBB->end()) {
+ auto MI = Tail++;
+ if (MI->isCall())
+ MBB->getParent()->updateCallSiteInfo(&*MI);
+ MBB->erase(MI);
+ }
// If MBB isn't immediately before MBB, insert a branch to it.
if (++MachineFunction::iterator(MBB) != MachineFunction::iterator(NewDest))
@@ -162,9 +169,9 @@ MachineInstr *TargetInstrInfo::commuteInstructionImpl(MachineInstr &MI,
assert(MI.getOperand(Idx1).isReg() && MI.getOperand(Idx2).isReg() &&
"This only knows how to commute register operands so far");
- unsigned Reg0 = HasDef ? MI.getOperand(0).getReg() : 0;
- unsigned Reg1 = MI.getOperand(Idx1).getReg();
- unsigned Reg2 = MI.getOperand(Idx2).getReg();
+ Register Reg0 = HasDef ? MI.getOperand(0).getReg() : Register();
+ Register Reg1 = MI.getOperand(Idx1).getReg();
+ Register Reg2 = MI.getOperand(Idx2).getReg();
unsigned SubReg0 = HasDef ? MI.getOperand(0).getSubReg() : 0;
unsigned SubReg1 = MI.getOperand(Idx1).getSubReg();
unsigned SubReg2 = MI.getOperand(Idx2).getSubReg();
@@ -523,7 +530,8 @@ static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr &MI,
MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI,
ArrayRef<unsigned> Ops, int FI,
- LiveIntervals *LIS) const {
+ LiveIntervals *LIS,
+ VirtRegMap *VRM) const {
auto Flags = MachineMemOperand::MONone;
for (unsigned OpIdx : Ops)
Flags |= MI.getOperand(OpIdx).isDef() ? MachineMemOperand::MOStore
@@ -569,7 +577,7 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI,
MBB->insert(MI, NewMI);
} else {
// Ask the target to do the actual folding.
- NewMI = foldMemoryOperandImpl(MF, MI, Ops, MI, FI, LIS);
+ NewMI = foldMemoryOperandImpl(MF, MI, Ops, MI, FI, LIS, VRM);
}
if (NewMI) {
@@ -898,7 +906,8 @@ bool TargetInstrInfo::isReallyTriviallyReMaterializableGeneric(
return true;
// Avoid instructions obviously unsafe for remat.
- if (MI.isNotDuplicable() || MI.mayStore() || MI.hasUnmodeledSideEffects())
+ if (MI.isNotDuplicable() || MI.mayStore() || MI.mayRaiseFPException() ||
+ MI.hasUnmodeledSideEffects())
return false;
// Don't remat inline asm. We have no idea how expensive it is
@@ -1010,7 +1019,7 @@ ScheduleHazardRecognizer *TargetInstrInfo::
CreateTargetMIHazardRecognizer(const InstrItineraryData *II,
const ScheduleDAG *DAG) const {
return (ScheduleHazardRecognizer *)
- new ScoreboardHazardRecognizer(II, DAG, "misched");
+ new ScoreboardHazardRecognizer(II, DAG, "machine-scheduler");
}
// Default implementation of CreateTargetPostRAHazardRecognizer.
diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp
index e86190375642..9b28c1a6c450 100644
--- a/lib/CodeGen/TargetLoweringBase.cpp
+++ b/lib/CodeGen/TargetLoweringBase.cpp
@@ -1,9 +1,8 @@
//===- TargetLoweringBase.cpp - Implement the TargetLoweringBase class ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -74,8 +73,8 @@ static cl::opt<unsigned> MinimumJumpTableEntries
cl::desc("Set minimum number of entries to use a jump table."));
static cl::opt<unsigned> MaximumJumpTableSize
- ("max-jump-table-size", cl::init(0), cl::Hidden,
- cl::desc("Set maximum size of jump tables; zero for no limit."));
+ ("max-jump-table-size", cl::init(UINT_MAX), cl::Hidden,
+ cl::desc("Set maximum size of jump tables."));
/// Minimum jump table density for normal functions.
static cl::opt<unsigned>
@@ -124,6 +123,34 @@ void TargetLoweringBase::InitLibcalls(const Triple &TT) {
for (int LC = 0; LC < RTLIB::UNKNOWN_LIBCALL; ++LC)
setLibcallCallingConv((RTLIB::Libcall)LC, CallingConv::C);
+ // For IEEE quad-precision libcall names, PPC uses "kf" instead of "tf".
+ if (TT.getArch() == Triple::ppc || TT.isPPC64()) {
+ setLibcallName(RTLIB::ADD_F128, "__addkf3");
+ setLibcallName(RTLIB::SUB_F128, "__subkf3");
+ setLibcallName(RTLIB::MUL_F128, "__mulkf3");
+ setLibcallName(RTLIB::DIV_F128, "__divkf3");
+ setLibcallName(RTLIB::FPEXT_F32_F128, "__extendsfkf2");
+ setLibcallName(RTLIB::FPEXT_F64_F128, "__extenddfkf2");
+ setLibcallName(RTLIB::FPROUND_F128_F32, "__trunckfsf2");
+ setLibcallName(RTLIB::FPROUND_F128_F64, "__trunckfdf2");
+ setLibcallName(RTLIB::FPTOSINT_F128_I32, "__fixkfsi");
+ setLibcallName(RTLIB::FPTOSINT_F128_I64, "__fixkfdi");
+ setLibcallName(RTLIB::FPTOUINT_F128_I32, "__fixunskfsi");
+ setLibcallName(RTLIB::FPTOUINT_F128_I64, "__fixunskfdi");
+ setLibcallName(RTLIB::SINTTOFP_I32_F128, "__floatsikf");
+ setLibcallName(RTLIB::SINTTOFP_I64_F128, "__floatdikf");
+ setLibcallName(RTLIB::UINTTOFP_I32_F128, "__floatunsikf");
+ setLibcallName(RTLIB::UINTTOFP_I64_F128, "__floatundikf");
+ setLibcallName(RTLIB::OEQ_F128, "__eqkf2");
+ setLibcallName(RTLIB::UNE_F128, "__nekf2");
+ setLibcallName(RTLIB::OGE_F128, "__gekf2");
+ setLibcallName(RTLIB::OLT_F128, "__ltkf2");
+ setLibcallName(RTLIB::OLE_F128, "__lekf2");
+ setLibcallName(RTLIB::OGT_F128, "__gtkf2");
+ setLibcallName(RTLIB::UO_F128, "__unordkf2");
+ setLibcallName(RTLIB::O_F128, "__unordkf2");
+ }
+
// A few names are different on particular architectures or environments.
if (TT.isOSDarwin()) {
// For f16/f32 conversions, Darwin uses the standard naming scheme, instead
@@ -546,7 +573,6 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) {
JumpIsExpensive = JumpIsExpensiveOverride;
PredictableSelectIsExpensive = false;
EnableExtLdPromotion = false;
- HasFloatingPointExceptions = true;
StackPointerRegisterToSaveRestore = 0;
BooleanContents = UndefinedBooleanContent;
BooleanFloatContents = UndefinedBooleanContent;
@@ -583,6 +609,14 @@ void TargetLoweringBase::initActions() {
std::fill(std::begin(TargetDAGCombineArray),
std::end(TargetDAGCombineArray), 0);
+ for (MVT VT : MVT::fp_valuetypes()) {
+ MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits());
+ if (IntVT.isValid()) {
+ setOperationAction(ISD::ATOMIC_SWAP, VT, Promote);
+ AddPromotedToType(ISD::ATOMIC_SWAP, VT, IntVT);
+ }
+ }
+
// Set default actions for various operations.
for (MVT VT : MVT::all_valuetypes()) {
// Default all indexed load / store to expand.
@@ -617,6 +651,8 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::SSUBSAT, VT, Expand);
setOperationAction(ISD::USUBSAT, VT, Expand);
setOperationAction(ISD::SMULFIX, VT, Expand);
+ setOperationAction(ISD::SMULFIXSAT, VT, Expand);
+ setOperationAction(ISD::UMULFIX, VT, Expand);
// Overflow operations default to expand
setOperationAction(ISD::SADDO, VT, Expand);
@@ -655,8 +691,51 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Expand);
}
+ // Constrained floating-point operations default to expand.
+ setOperationAction(ISD::STRICT_FADD, VT, Expand);
+ setOperationAction(ISD::STRICT_FSUB, VT, Expand);
+ setOperationAction(ISD::STRICT_FMUL, VT, Expand);
+ setOperationAction(ISD::STRICT_FDIV, VT, Expand);
+ setOperationAction(ISD::STRICT_FREM, VT, Expand);
+ setOperationAction(ISD::STRICT_FMA, VT, Expand);
+ setOperationAction(ISD::STRICT_FSQRT, VT, Expand);
+ setOperationAction(ISD::STRICT_FPOW, VT, Expand);
+ setOperationAction(ISD::STRICT_FPOWI, VT, Expand);
+ setOperationAction(ISD::STRICT_FSIN, VT, Expand);
+ setOperationAction(ISD::STRICT_FCOS, VT, Expand);
+ setOperationAction(ISD::STRICT_FEXP, VT, Expand);
+ setOperationAction(ISD::STRICT_FEXP2, VT, Expand);
+ setOperationAction(ISD::STRICT_FLOG, VT, Expand);
+ setOperationAction(ISD::STRICT_FLOG10, VT, Expand);
+ setOperationAction(ISD::STRICT_FLOG2, VT, Expand);
+ setOperationAction(ISD::STRICT_FRINT, VT, Expand);
+ setOperationAction(ISD::STRICT_FNEARBYINT, VT, Expand);
+ setOperationAction(ISD::STRICT_FCEIL, VT, Expand);
+ setOperationAction(ISD::STRICT_FFLOOR, VT, Expand);
+ setOperationAction(ISD::STRICT_FROUND, VT, Expand);
+ setOperationAction(ISD::STRICT_FTRUNC, VT, Expand);
+ setOperationAction(ISD::STRICT_FMAXNUM, VT, Expand);
+ setOperationAction(ISD::STRICT_FMINNUM, VT, Expand);
+ setOperationAction(ISD::STRICT_FP_ROUND, VT, Expand);
+ setOperationAction(ISD::STRICT_FP_EXTEND, VT, Expand);
+
// For most targets @llvm.get.dynamic.area.offset just returns 0.
setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, VT, Expand);
+
+ // Vector reduction default to expand.
+ setOperationAction(ISD::VECREDUCE_FADD, VT, Expand);
+ setOperationAction(ISD::VECREDUCE_FMUL, VT, Expand);
+ setOperationAction(ISD::VECREDUCE_ADD, VT, Expand);
+ setOperationAction(ISD::VECREDUCE_MUL, VT, Expand);
+ setOperationAction(ISD::VECREDUCE_AND, VT, Expand);
+ setOperationAction(ISD::VECREDUCE_OR, VT, Expand);
+ setOperationAction(ISD::VECREDUCE_XOR, VT, Expand);
+ setOperationAction(ISD::VECREDUCE_SMAX, VT, Expand);
+ setOperationAction(ISD::VECREDUCE_SMIN, VT, Expand);
+ setOperationAction(ISD::VECREDUCE_UMAX, VT, Expand);
+ setOperationAction(ISD::VECREDUCE_UMIN, VT, Expand);
+ setOperationAction(ISD::VECREDUCE_FMAX, VT, Expand);
+ setOperationAction(ISD::VECREDUCE_FMIN, VT, Expand);
}
// Most targets ignore the @llvm.prefetch intrinsic.
@@ -688,6 +767,10 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::FRINT, VT, Expand);
setOperationAction(ISD::FTRUNC, VT, Expand);
setOperationAction(ISD::FROUND, VT, Expand);
+ setOperationAction(ISD::LROUND, VT, Expand);
+ setOperationAction(ISD::LLROUND, VT, Expand);
+ setOperationAction(ISD::LRINT, VT, Expand);
+ setOperationAction(ISD::LLRINT, VT, Expand);
}
// Default ISD::TRAP to expand (which turns it into abort).
@@ -700,7 +783,7 @@ void TargetLoweringBase::initActions() {
MVT TargetLoweringBase::getScalarShiftAmountTy(const DataLayout &DL,
EVT) const {
- return MVT::getIntegerVT(8 * DL.getPointerSize(0));
+ return MVT::getIntegerVT(DL.getPointerSizeInBits(0));
}
EVT TargetLoweringBase::getShiftAmountTy(EVT LHSTy, const DataLayout &DL,
@@ -985,16 +1068,16 @@ TargetLoweringBase::emitPatchPoint(MachineInstr &InitialMI,
// Add a new memory operand for this FI.
assert(MFI.getObjectOffset(FI) != -1);
- auto Flags = MachineMemOperand::MOLoad;
- if (MI->getOpcode() == TargetOpcode::STATEPOINT) {
- Flags |= MachineMemOperand::MOStore;
- Flags |= MachineMemOperand::MOVolatile;
+ // Note: STATEPOINT MMOs are added during SelectionDAG. STACKMAP, and
+ // PATCHPOINT should be updated to do the same. (TODO)
+ if (MI->getOpcode() != TargetOpcode::STATEPOINT) {
+ auto Flags = MachineMemOperand::MOLoad;
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(MF, FI), Flags,
+ MF.getDataLayout().getPointerSize(), MFI.getObjectAlignment(FI));
+ MIB->addMemOperand(MF, MMO);
}
- MachineMemOperand *MMO = MF.getMachineMemOperand(
- MachinePointerInfo::getFixedStack(MF, FI), Flags,
- MF.getDataLayout().getPointerSize(), MFI.getObjectAlignment(FI));
- MIB->addMemOperand(MF, MMO);
-
+
// Replace the instruction and update the operand index.
MBB->insert(MachineBasicBlock::iterator(MI), MIB);
OperIdx += (MIB->getNumOperands() - MI->getNumOperands()) - 1;
@@ -1393,7 +1476,7 @@ void llvm::GetReturnInfo(CallingConv::ID CC, Type *ReturnType,
Flags.setZExt();
for (unsigned i = 0; i < NumParts; ++i)
- Outs.push_back(ISD::OutputArg(Flags, PartVT, VT, /*isFixed=*/true, 0, 0));
+ Outs.push_back(ISD::OutputArg(Flags, PartVT, VT, /*isfixed=*/true, 0, 0));
}
}
@@ -1409,6 +1492,7 @@ bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context,
const DataLayout &DL, EVT VT,
unsigned AddrSpace,
unsigned Alignment,
+ MachineMemOperand::Flags Flags,
bool *Fast) const {
// Check if the specified alignment is sufficient based on the data layout.
// TODO: While using the data layout works in practice, a better solution
@@ -1424,7 +1508,15 @@ bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context,
}
// This is a misaligned access.
- return allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Fast);
+ return allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Flags, Fast);
+}
+
+bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context,
+ const DataLayout &DL, EVT VT,
+ const MachineMemOperand &MMO,
+ bool *Fast) const {
+ return allowsMemoryAccess(Context, DL, VT, MMO.getAddrSpace(),
+ MMO.getAlignment(), MMO.getFlags(), Fast);
}
BranchProbability TargetLoweringBase::getPredictableBranchThreshold() const {
@@ -1447,6 +1539,7 @@ int TargetLoweringBase::InstructionOpcodeToISD(unsigned Opcode) const {
case Switch: return 0;
case IndirectBr: return 0;
case Invoke: return 0;
+ case CallBr: return 0;
case Resume: return 0;
case Unreachable: return 0;
case CleanupRet: return 0;
@@ -1580,8 +1673,8 @@ Value *TargetLoweringBase::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
// thread's unsafe stack pointer.
Module *M = IRB.GetInsertBlock()->getParent()->getParent();
Type *StackPtrTy = Type::getInt8PtrTy(M->getContext());
- Value *Fn = M->getOrInsertFunction("__safestack_pointer_address",
- StackPtrTy->getPointerTo(0));
+ FunctionCallee Fn = M->getOrInsertFunction("__safestack_pointer_address",
+ StackPtrTy->getPointerTo(0));
return IRB.CreateCall(Fn);
}
@@ -1656,7 +1749,7 @@ Value *TargetLoweringBase::getSDagStackGuard(const Module &M) const {
return M.getNamedValue("__stack_chk_guard");
}
-Value *TargetLoweringBase::getSSPStackGuardCheck(const Module &M) const {
+Function *TargetLoweringBase::getSSPStackGuardCheck(const Module &M) const {
return nullptr;
}
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index cb2fe691d702..4c8f75b237aa 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/TargetLoweringObjectFileImpl.cpp - Object File Info ---===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -219,6 +218,16 @@ void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx,
PersonalityEncoding = dwarf::DW_EH_PE_absptr;
TTypeEncoding = dwarf::DW_EH_PE_absptr;
}
+ CallSiteEncoding = dwarf::DW_EH_PE_udata4;
+ break;
+ case Triple::riscv32:
+ case Triple::riscv64:
+ LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+ PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ dwarf::DW_EH_PE_sdata4;
+ TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ dwarf::DW_EH_PE_sdata4;
+ CallSiteEncoding = dwarf::DW_EH_PE_udata4;
break;
case Triple::sparcv9:
LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
@@ -272,6 +281,19 @@ void TargetLoweringObjectFileELF::emitModuleMetadata(MCStreamer &Streamer,
}
}
+ if (NamedMDNode *DependentLibraries = M.getNamedMetadata("llvm.dependent-libraries")) {
+ auto *S = C.getELFSection(".deplibs", ELF::SHT_LLVM_DEPENDENT_LIBRARIES,
+ ELF::SHF_MERGE | ELF::SHF_STRINGS, 1, "");
+
+ Streamer.SwitchSection(S);
+
+ for (const auto &Operand : DependentLibraries->operands()) {
+ Streamer.EmitBytes(
+ cast<MDString>(cast<MDNode>(Operand)->getOperand(0))->getString());
+ Streamer.EmitIntValue(0, 1);
+ }
+ }
+
unsigned Version = 0;
unsigned Flags = 0;
StringRef Section;
@@ -1458,7 +1480,7 @@ void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx,
const TargetMachine &TM) {
TargetLoweringObjectFile::Initialize(Ctx, TM);
const Triple &T = TM.getTargetTriple();
- if (T.isKnownWindowsMSVCEnvironment() || T.isWindowsItaniumEnvironment()) {
+ if (T.isWindowsMSVCEnvironment() || T.isWindowsItaniumEnvironment()) {
StaticCtorSection =
Ctx.getCOFFSection(".CRT$XCU", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
COFF::IMAGE_SCN_MEM_READ,
@@ -1484,7 +1506,7 @@ static MCSectionCOFF *getCOFFStaticStructorSection(MCContext &Ctx,
unsigned Priority,
const MCSymbol *KeySym,
MCSectionCOFF *Default) {
- if (T.isKnownWindowsMSVCEnvironment() || T.isWindowsItaniumEnvironment()) {
+ if (T.isWindowsMSVCEnvironment() || T.isWindowsItaniumEnvironment()) {
// If the priority is the default, use .CRT$XCU, possibly associative.
if (Priority == 65535)
return Ctx.getAssociativeCOFFSection(Default, KeySym, 0);
@@ -1544,9 +1566,7 @@ const MCExpr *TargetLoweringObjectFileCOFF::lowerRelativeReference(
const GlobalValue *LHS, const GlobalValue *RHS,
const TargetMachine &TM) const {
const Triple &T = TM.getTargetTriple();
- if (!T.isKnownWindowsMSVCEnvironment() &&
- !T.isWindowsItaniumEnvironment() &&
- !T.isWindowsCoreCLREnvironment())
+ if (T.isOSCygMing())
return nullptr;
// Our symbols should exist in address space zero, cowardly no-op if
@@ -1694,8 +1714,11 @@ MCSection *TargetLoweringObjectFileWasm::getExplicitSectionGlobal(
Group = C->getName();
}
- return getContext().getWasmSection(Name, Kind, Group,
- MCContext::GenericSectionID);
+ MCSectionWasm* Section =
+ getContext().getWasmSection(Name, Kind, Group,
+ MCContext::GenericSectionID);
+
+ return Section;
}
static MCSectionWasm *selectWasmSectionForGlobal(
@@ -1724,6 +1747,7 @@ static MCSectionWasm *selectWasmSectionForGlobal(
UniqueID = *NextUniqueID;
(*NextUniqueID)++;
}
+
return Ctx.getWasmSection(Name, Kind, Group, UniqueID);
}
diff --git a/lib/CodeGen/TargetOptionsImpl.cpp b/lib/CodeGen/TargetOptionsImpl.cpp
index 3c133fb8594e..039748d817ca 100644
--- a/lib/CodeGen/TargetOptionsImpl.cpp
+++ b/lib/CodeGen/TargetOptionsImpl.cpp
@@ -1,9 +1,8 @@
//===-- TargetOptionsImpl.cpp - Options that apply to all targets ----------==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/TargetPassConfig.cpp b/lib/CodeGen/TargetPassConfig.cpp
index 28126fcf766d..36df02692f86 100644
--- a/lib/CodeGen/TargetPassConfig.cpp
+++ b/lib/CodeGen/TargetPassConfig.cpp
@@ -1,9 +1,8 @@
//===- TargetPassConfig.cpp - Target independent code generation passes ---===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -23,6 +22,7 @@
#include "llvm/Analysis/ScopedNoAliasAA.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
+#include "llvm/CodeGen/CSEConfigBase.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachinePassRegistry.h"
#include "llvm/CodeGen/Passes.h"
@@ -408,7 +408,7 @@ TargetPassConfig::TargetPassConfig(LLVMTargetMachine &TM, PassManagerBase &pm)
TM.Options.EnableIPRA = EnableIPRA;
else {
// If not explicitly specified, use target default.
- TM.Options.EnableIPRA = TM.useIPRA();
+ TM.Options.EnableIPRA |= TM.useIPRA();
}
if (TM.Options.EnableIPRA)
@@ -646,7 +646,7 @@ void TargetPassConfig::addIRPasses() {
// into optimally-sized loads and compares. The transforms are enabled by a
// target lowering hook.
if (!DisableMergeICmps)
- addPass(createMergeICmpsPass());
+ addPass(createMergeICmpsLegacyPass());
addPass(createExpandMemCmpPass());
}
@@ -815,6 +815,13 @@ bool TargetPassConfig::addCoreISelPasses() {
} else if (addInstSelector())
return true;
+ // Expand pseudo-instructions emitted by ISel. Don't run the verifier before
+ // FinalizeISel.
+ addPass(&FinalizeISelID);
+
+ // Print the instruction selected machine code...
+ printAndVerify("After Instruction Selection");
+
return false;
}
@@ -874,12 +881,6 @@ void TargetPassConfig::addMachinePasses() {
}
}
- // Print the instruction selected machine code...
- printAndVerify("After Instruction Selection");
-
- // Expand pseudo-instructions emitted by ISel.
- addPass(&ExpandISelPseudosID);
-
// Add passes that optimize machine instructions in SSA form.
if (getOptLevel() != CodeGenOpt::None) {
addMachineSSAOptimization();
@@ -898,13 +899,9 @@ void TargetPassConfig::addMachinePasses() {
// Run register allocation and passes that are tightly coupled with it,
// including phi elimination and scheduling.
if (getOptimizeRegAlloc())
- addOptimizedRegAlloc(createRegAllocPass(true));
- else {
- if (RegAlloc != &useDefaultRegisterAllocator &&
- RegAlloc != &createFastRegisterAllocator)
- report_fatal_error("Must use fast (default) register allocator for unoptimized regalloc.");
- addFastRegAlloc(createRegAllocPass(false));
- }
+ addOptimizedRegAlloc();
+ else
+ addFastRegAlloc();
// Run post-ra passes.
addPostRegAlloc();
@@ -1039,10 +1036,6 @@ bool TargetPassConfig::getOptimizeRegAlloc() const {
llvm_unreachable("Invalid optimize-regalloc state");
}
-/// RegisterRegAlloc's global Registry tracks allocator registration.
-MachinePassRegistry<RegisterRegAlloc::FunctionPassCtor>
- RegisterRegAlloc::Registry;
-
/// A dummy default pass factory indicates whether the register allocator is
/// overridden on the command line.
static llvm::once_flag InitializeDefaultRegisterAllocatorFlag;
@@ -1053,12 +1046,8 @@ defaultRegAlloc("default",
useDefaultRegisterAllocator);
static void initializeDefaultRegisterAllocatorOnce() {
- RegisterRegAlloc::FunctionPassCtor Ctor = RegisterRegAlloc::getDefault();
-
- if (!Ctor) {
- Ctor = RegAlloc;
+ if (!RegisterRegAlloc::getDefault())
RegisterRegAlloc::setDefault(RegAlloc);
- }
}
/// Instantiate the default register allocator pass for this target for either
@@ -1098,6 +1087,33 @@ FunctionPass *TargetPassConfig::createRegAllocPass(bool Optimized) {
return createTargetRegisterAllocator(Optimized);
}
+bool TargetPassConfig::addRegAssignmentFast() {
+ if (RegAlloc != &useDefaultRegisterAllocator &&
+ RegAlloc != &createFastRegisterAllocator)
+ report_fatal_error("Must use fast (default) register allocator for unoptimized regalloc.");
+
+ addPass(createRegAllocPass(false));
+ return true;
+}
+
+bool TargetPassConfig::addRegAssignmentOptimized() {
+ // Add the selected register allocation pass.
+ addPass(createRegAllocPass(true));
+
+ // Allow targets to change the register assignments before rewriting.
+ addPreRewrite();
+
+ // Finally rewrite virtual registers.
+ addPass(&VirtRegRewriterID);
+ // Perform stack slot coloring and post-ra machine LICM.
+ //
+ // FIXME: Re-enable coloring with register when it's capable of adding
+ // kill markers.
+ addPass(&StackSlotColoringID);
+
+ return true;
+}
+
/// Return true if the default global register allocator is in use and
/// has not be overriden on the command line with '-regalloc=...'
bool TargetPassConfig::usingDefaultRegAlloc() const {
@@ -1106,18 +1122,17 @@ bool TargetPassConfig::usingDefaultRegAlloc() const {
/// Add the minimum set of target-independent passes that are required for
/// register allocation. No coalescing or scheduling.
-void TargetPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {
+void TargetPassConfig::addFastRegAlloc() {
addPass(&PHIEliminationID, false);
addPass(&TwoAddressInstructionPassID, false);
- if (RegAllocPass)
- addPass(RegAllocPass);
+ addRegAssignmentFast();
}
/// Add standard target-independent passes that are tightly coupled with
/// optimized register allocation, including coalescing, machine instruction
/// scheduling, and register allocation itself.
-void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
+void TargetPassConfig::addOptimizedRegAlloc() {
addPass(&DetectDeadLanesID, false);
addPass(&ProcessImplicitDefsID, false);
@@ -1149,21 +1164,10 @@ void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
// PreRA instruction scheduling.
addPass(&MachineSchedulerID);
- if (RegAllocPass) {
- // Add the selected register allocation pass.
- addPass(RegAllocPass);
-
- // Allow targets to change the register assignments before rewriting.
- addPreRewrite();
-
- // Finally rewrite virtual registers.
- addPass(&VirtRegRewriterID);
-
- // Perform stack slot coloring and post-ra machine LICM.
- //
- // FIXME: Re-enable coloring with register when it's capable of adding
- // kill markers.
- addPass(&StackSlotColoringID);
+ if (addRegAssignmentOptimized()) {
+ // Allow targets to expand pseudo instructions depending on the choice of
+ // registers before MachineCopyPropagation.
+ addPostRewrite();
// Copy propagate to forward register uses and try to eliminate COPYs that
// were not coalesced.
@@ -1221,3 +1225,11 @@ bool TargetPassConfig::isGlobalISelAbortEnabled() const {
bool TargetPassConfig::reportDiagnosticWhenGlobalISelFallback() const {
return TM->Options.GlobalISelAbort == GlobalISelAbortMode::DisableWithDiag;
}
+
+bool TargetPassConfig::isGISelCSEEnabled() const {
+ return true;
+}
+
+std::unique_ptr<CSEConfigBase> TargetPassConfig::getCSEConfig() const {
+ return make_unique<CSEConfigBase>();
+}
diff --git a/lib/CodeGen/TargetRegisterInfo.cpp b/lib/CodeGen/TargetRegisterInfo.cpp
index 661dc18f7a85..f1b2ecf3243b 100644
--- a/lib/CodeGen/TargetRegisterInfo.cpp
+++ b/lib/CodeGen/TargetRegisterInfo.cpp
@@ -1,9 +1,8 @@
//==- TargetRegisterInfo.cpp - Target Register Information Implementation --==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -14,6 +13,7 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -398,6 +398,7 @@ TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg,
const std::pair<unsigned, SmallVector<unsigned, 4>> &Hints_MRI =
MRI.getRegAllocationHints(VirtReg);
+ SmallSet<unsigned, 32> HintedRegs;
// First hint may be a target hint.
bool Skip = (Hints_MRI.first != 0);
for (auto Reg : Hints_MRI.second) {
@@ -411,6 +412,10 @@ TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg,
if (VRM && isVirtualRegister(Phys))
Phys = VRM->getPhys(Phys);
+ // Don't add the same reg twice (Hints_MRI may contain multiple virtual
+ // registers allocated to the same physreg).
+ if (!HintedRegs.insert(Phys).second)
+ continue;
// Check that Phys is a valid hint in VirtReg's register class.
if (!isPhysicalRegister(Phys))
continue;
diff --git a/lib/CodeGen/TargetSchedule.cpp b/lib/CodeGen/TargetSchedule.cpp
index 3cff31ad4933..195279719ad4 100644
--- a/lib/CodeGen/TargetSchedule.cpp
+++ b/lib/CodeGen/TargetSchedule.cpp
@@ -1,9 +1,8 @@
//===- llvm/Target/TargetSchedule.cpp - Sched Machine Model ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/TargetSubtargetInfo.cpp b/lib/CodeGen/TargetSubtargetInfo.cpp
index fa29c05fd6c2..59eb2f9c88cb 100644
--- a/lib/CodeGen/TargetSubtargetInfo.cpp
+++ b/lib/CodeGen/TargetSubtargetInfo.cpp
@@ -1,9 +1,8 @@
//===- TargetSubtargetInfo.cpp - General Target Information ----------------==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -12,24 +11,16 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/TargetSubtargetInfo.h"
-#include "llvm/ADT/Optional.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/TargetInstrInfo.h"
-#include "llvm/CodeGen/TargetSchedule.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/raw_ostream.h"
-#include <string>
using namespace llvm;
TargetSubtargetInfo::TargetSubtargetInfo(
const Triple &TT, StringRef CPU, StringRef FS,
- ArrayRef<SubtargetFeatureKV> PF, ArrayRef<SubtargetFeatureKV> PD,
- const SubtargetInfoKV *ProcSched, const MCWriteProcResEntry *WPR,
+ ArrayRef<SubtargetFeatureKV> PF, ArrayRef<SubtargetSubTypeKV> PD,
+ const MCWriteProcResEntry *WPR,
const MCWriteLatencyEntry *WL, const MCReadAdvanceEntry *RA,
const InstrStage *IS, const unsigned *OC, const unsigned *FP)
- : MCSubtargetInfo(TT, CPU, FS, PF, PD, ProcSched, WPR, WL, RA, IS, OC, FP) {
+ : MCSubtargetInfo(TT, CPU, FS, PF, PD, WPR, WL, RA, IS, OC, FP) {
}
TargetSubtargetInfo::~TargetSubtargetInfo() = default;
@@ -67,50 +58,4 @@ bool TargetSubtargetInfo::useAA() const {
return false;
}
-static std::string createSchedInfoStr(unsigned Latency, double RThroughput) {
- static const char *SchedPrefix = " sched: [";
- std::string Comment;
- raw_string_ostream CS(Comment);
- if (RThroughput != 0.0)
- CS << SchedPrefix << Latency << format(":%2.2f", RThroughput)
- << "]";
- else
- CS << SchedPrefix << Latency << ":?]";
- CS.flush();
- return Comment;
-}
-
-/// Returns string representation of scheduler comment
-std::string TargetSubtargetInfo::getSchedInfoStr(const MachineInstr &MI) const {
- if (MI.isPseudo() || MI.isTerminator())
- return std::string();
- // We don't cache TSchedModel because it depends on TargetInstrInfo
- // that could be changed during the compilation
- TargetSchedModel TSchedModel;
- TSchedModel.init(this);
- unsigned Latency = TSchedModel.computeInstrLatency(&MI);
- double RThroughput = TSchedModel.computeReciprocalThroughput(&MI);
- return createSchedInfoStr(Latency, RThroughput);
-}
-
-/// Returns string representation of scheduler comment
-std::string TargetSubtargetInfo::getSchedInfoStr(MCInst const &MCI) const {
- // We don't cache TSchedModel because it depends on TargetInstrInfo
- // that could be changed during the compilation
- TargetSchedModel TSchedModel;
- TSchedModel.init(this);
- unsigned Latency;
- if (TSchedModel.hasInstrSchedModel())
- Latency = TSchedModel.computeInstrLatency(MCI);
- else if (TSchedModel.hasInstrItineraries()) {
- auto *ItinData = TSchedModel.getInstrItineraries();
- Latency = ItinData->getStageLatency(
- getInstrInfo()->get(MCI.getOpcode()).getSchedClass());
- } else
- return std::string();
- double RThroughput = TSchedModel.computeReciprocalThroughput(MCI);
- return createSchedInfoStr(Latency, RThroughput);
-}
-
-void TargetSubtargetInfo::mirFileLoaded(MachineFunction &MF) const {
-}
+void TargetSubtargetInfo::mirFileLoaded(MachineFunction &MF) const { }
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index 4b72f6a84ca1..43d876646967 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -1,9 +1,8 @@
//===- TwoAddressInstructionPass.cpp - Two-Address instruction pass -------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -1245,8 +1244,13 @@ bool TwoAddressInstructionPass::tryInstructionCommute(MachineInstr *MI,
++NumAggrCommuted;
// There might be more than two commutable operands, update BaseOp and
// continue scanning.
+ // FIXME: This assumes that the new instruction's operands are in the
+ // same positions and were simply swapped.
BaseOpReg = OtherOpReg;
BaseOpKilled = OtherOpKilled;
+ // Resamples OpsNum in case the number of operands was reduced. This
+ // happens with X86.
+ OpsNum = MI->getDesc().getNumOperands();
continue;
}
// If this was a commute based on kill, we won't do better continuing.
diff --git a/lib/CodeGen/UnreachableBlockElim.cpp b/lib/CodeGen/UnreachableBlockElim.cpp
index 5288ca672774..177bab32bccc 100644
--- a/lib/CodeGen/UnreachableBlockElim.cpp
+++ b/lib/CodeGen/UnreachableBlockElim.cpp
@@ -1,9 +1,8 @@
//===-- UnreachableBlockElim.cpp - Remove unreachable blocks for codegen --===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -38,43 +37,13 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Type.h"
#include "llvm/Pass.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
using namespace llvm;
-static bool eliminateUnreachableBlock(Function &F) {
- df_iterator_default_set<BasicBlock*> Reachable;
-
- // Mark all reachable blocks.
- for (BasicBlock *BB : depth_first_ext(&F, Reachable))
- (void)BB/* Mark all reachable blocks */;
-
- // Loop over all dead blocks, remembering them and deleting all instructions
- // in them.
- std::vector<BasicBlock*> DeadBlocks;
- for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
- if (!Reachable.count(&*I)) {
- BasicBlock *BB = &*I;
- DeadBlocks.push_back(BB);
- while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
- PN->replaceAllUsesWith(Constant::getNullValue(PN->getType()));
- BB->getInstList().pop_front();
- }
- for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI)
- (*SI)->removePredecessor(BB);
- BB->dropAllReferences();
- }
-
- // Actually remove the blocks now.
- for (unsigned i = 0, e = DeadBlocks.size(); i != e; ++i) {
- DeadBlocks[i]->eraseFromParent();
- }
-
- return !DeadBlocks.empty();
-}
-
namespace {
class UnreachableBlockElimLegacyPass : public FunctionPass {
bool runOnFunction(Function &F) override {
- return eliminateUnreachableBlock(F);
+ return llvm::EliminateUnreachableBlocks(F);
}
public:
@@ -99,7 +68,7 @@ FunctionPass *llvm::createUnreachableBlockEliminationPass() {
PreservedAnalyses UnreachableBlockElimPass::run(Function &F,
FunctionAnalysisManager &AM) {
- bool Changed = eliminateUnreachableBlock(F);
+ bool Changed = llvm::EliminateUnreachableBlocks(F);
if (!Changed)
return PreservedAnalyses::all();
PreservedAnalyses PA;
diff --git a/lib/CodeGen/ValueTypes.cpp b/lib/CodeGen/ValueTypes.cpp
index adb7075de651..a911cdcbec9d 100644
--- a/lib/CodeGen/ValueTypes.cpp
+++ b/lib/CodeGen/ValueTypes.cpp
@@ -1,9 +1,8 @@
//===----------- ValueTypes.cpp - Implementation of EVT methods -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -166,11 +165,18 @@ std::string EVT::getEVTString() const {
case MVT::v128i16: return "v128i16";
case MVT::v1i32: return "v1i32";
case MVT::v2i32: return "v2i32";
+ case MVT::v3i32: return "v3i32";
case MVT::v4i32: return "v4i32";
+ case MVT::v5i32: return "v5i32";
case MVT::v8i32: return "v8i32";
case MVT::v16i32: return "v16i32";
case MVT::v32i32: return "v32i32";
case MVT::v64i32: return "v64i32";
+ case MVT::v128i32: return "v128i32";
+ case MVT::v256i32: return "v256i32";
+ case MVT::v512i32: return "v512i32";
+ case MVT::v1024i32:return "v1024i32";
+ case MVT::v2048i32:return "v2048i32";
case MVT::v1i64: return "v1i64";
case MVT::v2i64: return "v2i64";
case MVT::v4i64: return "v4i64";
@@ -183,16 +189,25 @@ std::string EVT::getEVTString() const {
case MVT::v2f16: return "v2f16";
case MVT::v4f16: return "v4f16";
case MVT::v8f16: return "v8f16";
+ case MVT::v3f32: return "v3f32";
case MVT::v4f32: return "v4f32";
+ case MVT::v5f32: return "v5f32";
case MVT::v8f32: return "v8f32";
case MVT::v16f32: return "v16f32";
+ case MVT::v32f32: return "v32f32";
+ case MVT::v64f32: return "v64f32";
+ case MVT::v128f32: return "v128f32";
+ case MVT::v256f32: return "v256f32";
+ case MVT::v512f32: return "v512f32";
+ case MVT::v1024f32:return "v1024f32";
+ case MVT::v2048f32:return "v2048f32";
case MVT::v1f64: return "v1f64";
case MVT::v2f64: return "v2f64";
case MVT::v4f64: return "v4f64";
case MVT::v8f64: return "v8f64";
case MVT::Metadata:return "Metadata";
case MVT::Untyped: return "Untyped";
- case MVT::ExceptRef: return "ExceptRef";
+ case MVT::exnref : return "exnref";
}
}
@@ -247,11 +262,18 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
case MVT::v128i16: return VectorType::get(Type::getInt16Ty(Context), 128);
case MVT::v1i32: return VectorType::get(Type::getInt32Ty(Context), 1);
case MVT::v2i32: return VectorType::get(Type::getInt32Ty(Context), 2);
+ case MVT::v3i32: return VectorType::get(Type::getInt32Ty(Context), 3);
case MVT::v4i32: return VectorType::get(Type::getInt32Ty(Context), 4);
+ case MVT::v5i32: return VectorType::get(Type::getInt32Ty(Context), 5);
case MVT::v8i32: return VectorType::get(Type::getInt32Ty(Context), 8);
case MVT::v16i32: return VectorType::get(Type::getInt32Ty(Context), 16);
case MVT::v32i32: return VectorType::get(Type::getInt32Ty(Context), 32);
case MVT::v64i32: return VectorType::get(Type::getInt32Ty(Context), 64);
+ case MVT::v128i32: return VectorType::get(Type::getInt32Ty(Context), 128);
+ case MVT::v256i32: return VectorType::get(Type::getInt32Ty(Context), 256);
+ case MVT::v512i32: return VectorType::get(Type::getInt32Ty(Context), 512);
+ case MVT::v1024i32:return VectorType::get(Type::getInt32Ty(Context), 1024);
+ case MVT::v2048i32:return VectorType::get(Type::getInt32Ty(Context), 2048);
case MVT::v1i64: return VectorType::get(Type::getInt64Ty(Context), 1);
case MVT::v2i64: return VectorType::get(Type::getInt64Ty(Context), 2);
case MVT::v4i64: return VectorType::get(Type::getInt64Ty(Context), 4);
@@ -264,9 +286,18 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
case MVT::v8f16: return VectorType::get(Type::getHalfTy(Context), 8);
case MVT::v1f32: return VectorType::get(Type::getFloatTy(Context), 1);
case MVT::v2f32: return VectorType::get(Type::getFloatTy(Context), 2);
+ case MVT::v3f32: return VectorType::get(Type::getFloatTy(Context), 3);
case MVT::v4f32: return VectorType::get(Type::getFloatTy(Context), 4);
+ case MVT::v5f32: return VectorType::get(Type::getFloatTy(Context), 5);
case MVT::v8f32: return VectorType::get(Type::getFloatTy(Context), 8);
- case MVT::v16f32: return VectorType::get(Type::getFloatTy(Context), 16);
+ case MVT::v16f32: return VectorType::get(Type::getFloatTy(Context), 16);
+ case MVT::v32f32: return VectorType::get(Type::getFloatTy(Context), 32);
+ case MVT::v64f32: return VectorType::get(Type::getFloatTy(Context), 64);
+ case MVT::v128f32: return VectorType::get(Type::getFloatTy(Context), 128);
+ case MVT::v256f32: return VectorType::get(Type::getFloatTy(Context), 256);
+ case MVT::v512f32: return VectorType::get(Type::getFloatTy(Context), 512);
+ case MVT::v1024f32:return VectorType::get(Type::getFloatTy(Context), 1024);
+ case MVT::v2048f32:return VectorType::get(Type::getFloatTy(Context), 2048);
case MVT::v1f64: return VectorType::get(Type::getDoubleTy(Context), 1);
case MVT::v2f64: return VectorType::get(Type::getDoubleTy(Context), 2);
case MVT::v4f64: return VectorType::get(Type::getDoubleTy(Context), 4);
diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp
index ed7bef667e77..4a06704a8876 100644
--- a/lib/CodeGen/VirtRegMap.cpp
+++ b/lib/CodeGen/VirtRegMap.cpp
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/VirtRegMap.cpp - Virtual Register Map -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -385,7 +384,7 @@ void VirtRegRewriter::handleIdentityCopy(MachineInstr &MI) const {
// give us additional liveness information: The target (super-)register
// must not be valid before this point. Replace the COPY with a KILL
// instruction to maintain this information.
- if (MI.getOperand(0).isUndef() || MI.getNumOperands() > 2) {
+ if (MI.getOperand(1).isUndef() || MI.getNumOperands() > 2) {
MI.setDesc(TII->get(TargetOpcode::KILL));
LLVM_DEBUG(dbgs() << " replace by: " << MI);
return;
diff --git a/lib/CodeGen/WasmEHPrepare.cpp b/lib/CodeGen/WasmEHPrepare.cpp
index e5002eb95346..865a1cfbf43a 100644
--- a/lib/CodeGen/WasmEHPrepare.cpp
+++ b/lib/CodeGen/WasmEHPrepare.cpp
@@ -1,14 +1,14 @@
//===-- WasmEHPrepare - Prepare excepton handling for WebAssembly --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This transformation is designed for use by code generators which use
-// WebAssembly exception handling scheme.
+// WebAssembly exception handling scheme. This currently supports C++
+// exceptions.
//
// WebAssembly exception handling uses Windows exception IR for the middle level
// representation. This pass does the following transformation for every
@@ -23,53 +23,20 @@
//
// - After:
// catchpad ...
-// exn = wasm.catch(0); // 0 is a tag for C++
-// wasm.landingpad.index(index);
+// exn = wasm.extract.exception();
// // Only add below in case it's not a single catch (...)
+// wasm.landingpad.index(index);
// __wasm_lpad_context.lpad_index = index;
// __wasm_lpad_context.lsda = wasm.lsda();
// _Unwind_CallPersonality(exn);
-// int selector = __wasm.landingpad_context.selector;
+// selector = __wasm.landingpad_context.selector;
// ...
//
-// Also, does the following for a cleanuppad block with a call to
-// __clang_call_terminate():
-// - Before:
-// cleanuppad ...
-// exn = wasm.get.exception();
-// __clang_call_terminate(exn);
-//
-// - After:
-// cleanuppad ...
-// exn = wasm.catch(0); // 0 is a tag for C++
-// __clang_call_terminate(exn);
-//
-//
-// * Background: WebAssembly EH instructions
-// WebAssembly's try and catch instructions are structured as follows:
-// try
-// instruction*
-// catch (C++ tag)
-// instruction*
-// ...
-// catch_all
-// instruction*
-// try_end
-//
-// A catch instruction in WebAssembly does not correspond to a C++ catch clause.
-// In WebAssembly, there is a single catch instruction for all C++ exceptions.
-// There can be more catch instructions for exceptions in other languages, but
-// they are not generated for now. catch_all catches all exceptions including
-// foreign exceptions (e.g. JavaScript). We turn catchpads into catch (C++ tag)
-// and cleanuppads into catch_all, with one exception: cleanuppad with a call to
-// __clang_call_terminate should be both in catch (C++ tag) and catch_all.
-//
//
// * Background: Direct personality function call
// In WebAssembly EH, the VM is responsible for unwinding the stack once an
// exception is thrown. After the stack is unwound, the control flow is
-// transfered to WebAssembly 'catch' instruction, which returns a caught
-// exception object.
+// transfered to WebAssembly 'catch' instruction.
//
// Unwinding the stack is not done by libunwind but the VM, so the personality
// function in libcxxabi cannot be called from libunwind during the unwinding
@@ -137,19 +104,19 @@ class WasmEHPrepare : public FunctionPass {
Value *LSDAField = nullptr; // lsda field
Value *SelectorField = nullptr; // selector
- Function *ThrowF = nullptr; // wasm.throw() intrinsic
- Function *CatchF = nullptr; // wasm.catch.extract() intrinsic
- Function *LPadIndexF = nullptr; // wasm.landingpad.index() intrinsic
- Function *LSDAF = nullptr; // wasm.lsda() intrinsic
- Function *GetExnF = nullptr; // wasm.get.exception() intrinsic
- Function *GetSelectorF = nullptr; // wasm.get.ehselector() intrinsic
- Function *CallPersonalityF = nullptr; // _Unwind_CallPersonality() wrapper
- Function *ClangCallTermF = nullptr; // __clang_call_terminate() function
+ Function *ThrowF = nullptr; // wasm.throw() intrinsic
+ Function *LPadIndexF = nullptr; // wasm.landingpad.index() intrinsic
+ Function *LSDAF = nullptr; // wasm.lsda() intrinsic
+ Function *GetExnF = nullptr; // wasm.get.exception() intrinsic
+ Function *ExtractExnF = nullptr; // wasm.extract.exception() intrinsic
+ Function *GetSelectorF = nullptr; // wasm.get.ehselector() intrinsic
+ FunctionCallee CallPersonalityF =
+ nullptr; // _Unwind_CallPersonality() wrapper
bool prepareEHPads(Function &F);
bool prepareThrows(Function &F);
- void prepareEHPad(BasicBlock *BB, unsigned Index);
+ void prepareEHPad(BasicBlock *BB, bool NeedLSDA, unsigned Index = 0);
void prepareTerminateCleanupPad(BasicBlock *BB);
public:
@@ -209,14 +176,12 @@ bool WasmEHPrepare::prepareThrows(Function &F) {
// wasm.throw() intinsic, which will be lowered to wasm 'throw' instruction.
ThrowF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_throw);
-
// Insert an unreachable instruction after a call to @llvm.wasm.throw and
// delete all following instructions within the BB, and delete all the dead
// children of the BB as well.
for (User *U : ThrowF->users()) {
- // A call to @llvm.wasm.throw() is only generated from
- // __builtin_wasm_throw() builtin call within libcxxabi, and cannot be an
- // InvokeInst.
+ // A call to @llvm.wasm.throw() is only generated from __cxa_throw()
+ // builtin call within libcxxabi, and cannot be an InvokeInst.
auto *ThrowI = cast<CallInst>(U);
if (ThrowI->getFunction() != &F)
continue;
@@ -263,8 +228,6 @@ bool WasmEHPrepare::prepareEHPads(Function &F) {
SelectorField = IRB.CreateConstGEP2_32(LPadContextTy, LPadContextGV, 0, 2,
"selector_gep");
- // wasm.catch() intinsic, which will be lowered to wasm 'catch' instruction.
- CatchF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_catch);
// wasm.landingpad.index() intrinsic, which is to specify landingpad index
LPadIndexF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_landingpad_index);
// wasm.lsda() intrinsic. Returns the address of LSDA table for the current
@@ -275,14 +238,18 @@ bool WasmEHPrepare::prepareEHPads(Function &F) {
GetExnF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_get_exception);
GetSelectorF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_get_ehselector);
- // _Unwind_CallPersonality() wrapper function, which calls the personality
- CallPersonalityF = cast<Function>(M.getOrInsertFunction(
- "_Unwind_CallPersonality", IRB.getInt32Ty(), IRB.getInt8PtrTy()));
- CallPersonalityF->setDoesNotThrow();
+ // wasm.extract.exception() is the same as wasm.get.exception() but it does
+ // not take a token argument. This will be lowered down to EXTRACT_EXCEPTION
+ // pseudo instruction in instruction selection, which will be expanded using
+ // 'br_on_exn' instruction later.
+ ExtractExnF =
+ Intrinsic::getDeclaration(&M, Intrinsic::wasm_extract_exception);
- // __clang_call_terminate() function, which is inserted by clang in case a
- // cleanup throws
- ClangCallTermF = M.getFunction("__clang_call_terminate");
+ // _Unwind_CallPersonality() wrapper function, which calls the personality
+ CallPersonalityF = M.getOrInsertFunction(
+ "_Unwind_CallPersonality", IRB.getInt32Ty(), IRB.getInt8PtrTy());
+ if (Function *F = dyn_cast<Function>(CallPersonalityF.getCallee()))
+ F->setDoesNotThrow();
unsigned Index = 0;
for (auto *BB : CatchPads) {
@@ -290,60 +257,52 @@ bool WasmEHPrepare::prepareEHPads(Function &F) {
// In case of a single catch (...), we don't need to emit LSDA
if (CPI->getNumArgOperands() == 1 &&
cast<Constant>(CPI->getArgOperand(0))->isNullValue())
- prepareEHPad(BB, -1);
+ prepareEHPad(BB, false);
else
- prepareEHPad(BB, Index++);
+ prepareEHPad(BB, true, Index++);
}
- if (!ClangCallTermF)
- return !CatchPads.empty();
-
- // Cleanuppads will turn into catch_all later, but cleanuppads with a call to
- // __clang_call_terminate() is a special case. __clang_call_terminate() takes
- // an exception object, so we have to duplicate call in both 'catch <C++ tag>'
- // and 'catch_all' clauses. Here we only insert a call to catch; the
- // duplication will be done later. In catch_all, the exception object will be
- // set to null.
+ // Cleanup pads don't need LSDA.
for (auto *BB : CleanupPads)
- for (auto &I : *BB)
- if (auto *CI = dyn_cast<CallInst>(&I))
- if (CI->getCalledValue() == ClangCallTermF)
- prepareEHPad(BB, -1);
+ prepareEHPad(BB, false);
return true;
}
-void WasmEHPrepare::prepareEHPad(BasicBlock *BB, unsigned Index) {
+// Prepare an EH pad for Wasm EH handling. If NeedLSDA is false, Index is
+// ignored.
+void WasmEHPrepare::prepareEHPad(BasicBlock *BB, bool NeedLSDA,
+ unsigned Index) {
assert(BB->isEHPad() && "BB is not an EHPad!");
IRBuilder<> IRB(BB->getContext());
-
IRB.SetInsertPoint(&*BB->getFirstInsertionPt());
- // The argument to wasm.catch() is the tag for C++ exceptions, which we set to
- // 0 for this module.
- // Pseudocode: void *exn = wasm.catch(0);
- Instruction *Exn = IRB.CreateCall(CatchF, IRB.getInt32(0), "exn");
- // Replace the return value of wasm.get.exception() with the return value from
- // wasm.catch().
+
auto *FPI = cast<FuncletPadInst>(BB->getFirstNonPHI());
Instruction *GetExnCI = nullptr, *GetSelectorCI = nullptr;
for (auto &U : FPI->uses()) {
if (auto *CI = dyn_cast<CallInst>(U.getUser())) {
if (CI->getCalledValue() == GetExnF)
GetExnCI = CI;
- else if (CI->getCalledValue() == GetSelectorF)
+ if (CI->getCalledValue() == GetSelectorF)
GetSelectorCI = CI;
}
}
- assert(GetExnCI && "wasm.get.exception() call does not exist");
- GetExnCI->replaceAllUsesWith(Exn);
+ // Cleanup pads w/o __clang_call_terminate call do not have any of
+ // wasm.get.exception() or wasm.get.ehselector() calls. We need to do nothing.
+ if (!GetExnCI) {
+ assert(!GetSelectorCI &&
+ "wasm.get.ehselector() cannot exist w/o wasm.get.exception()");
+ return;
+ }
+
+ Instruction *ExtractExnCI = IRB.CreateCall(ExtractExnF, {}, "exn");
+ GetExnCI->replaceAllUsesWith(ExtractExnCI);
GetExnCI->eraseFromParent();
// In case it is a catchpad with single catch (...) or a cleanuppad, we don't
// need to call personality function because we don't need a selector.
- if (FPI->getNumArgOperands() == 0 ||
- (FPI->getNumArgOperands() == 1 &&
- cast<Constant>(FPI->getArgOperand(0))->isNullValue())) {
+ if (!NeedLSDA) {
if (GetSelectorCI) {
assert(GetSelectorCI->use_empty() &&
"wasm.get.ehselector() still has uses!");
@@ -351,7 +310,7 @@ void WasmEHPrepare::prepareEHPad(BasicBlock *BB, unsigned Index) {
}
return;
}
- IRB.SetInsertPoint(Exn->getNextNode());
+ IRB.SetInsertPoint(ExtractExnCI->getNextNode());
// This is to create a map of <landingpad EH label, landingpad index> in
// SelectionDAGISel, which is to be used in EHStreamer to emit LSDA tables.
@@ -373,12 +332,13 @@ void WasmEHPrepare::prepareEHPad(BasicBlock *BB, unsigned Index) {
IRB.CreateStore(IRB.CreateCall(LSDAF), LSDAField);
// Pseudocode: _Unwind_CallPersonality(exn);
- CallInst *PersCI =
- IRB.CreateCall(CallPersonalityF, Exn, OperandBundleDef("funclet", CPI));
+ CallInst *PersCI = IRB.CreateCall(CallPersonalityF, ExtractExnCI,
+ OperandBundleDef("funclet", CPI));
PersCI->setDoesNotThrow();
// Pseudocode: int selector = __wasm.landingpad_context.selector;
- Instruction *Selector = IRB.CreateLoad(SelectorField, "selector");
+ Instruction *Selector =
+ IRB.CreateLoad(IRB.getInt32Ty(), SelectorField, "selector");
// Replace the return value from wasm.get.ehselector() with the selector value
// loaded from __wasm_lpad_context.selector.
@@ -388,15 +348,15 @@ void WasmEHPrepare::prepareEHPad(BasicBlock *BB, unsigned Index) {
}
void llvm::calculateWasmEHInfo(const Function *F, WasmEHFuncInfo &EHInfo) {
+ // If an exception is not caught by a catchpad (i.e., it is a foreign
+ // exception), it will unwind to its parent catchswitch's unwind destination.
+ // We don't record an unwind destination for cleanuppads because every
+ // exception should be caught by it.
for (const auto &BB : *F) {
if (!BB.isEHPad())
continue;
const Instruction *Pad = BB.getFirstNonPHI();
- // If an exception is not caught by a catchpad (i.e., it is a foreign
- // exception), it will unwind to its parent catchswitch's unwind
- // destination. We don't record an unwind destination for cleanuppads
- // because every exception should be caught by it.
if (const auto *CatchPad = dyn_cast<CatchPadInst>(Pad)) {
const auto *UnwindBB = CatchPad->getCatchSwitch()->getUnwindDest();
if (!UnwindBB)
@@ -409,22 +369,4 @@ void llvm::calculateWasmEHInfo(const Function *F, WasmEHFuncInfo &EHInfo) {
EHInfo.setEHPadUnwindDest(&BB, UnwindBB);
}
}
-
- // Record the unwind destination for invoke and cleanupret instructions.
- for (const auto &BB : *F) {
- const Instruction *TI = BB.getTerminator();
- BasicBlock *UnwindBB = nullptr;
- if (const auto *Invoke = dyn_cast<InvokeInst>(TI))
- UnwindBB = Invoke->getUnwindDest();
- else if (const auto *CleanupRet = dyn_cast<CleanupReturnInst>(TI))
- UnwindBB = CleanupRet->getUnwindDest();
- if (!UnwindBB)
- continue;
- const Instruction *UnwindPad = UnwindBB->getFirstNonPHI();
- if (const auto *CatchSwitch = dyn_cast<CatchSwitchInst>(UnwindPad))
- // Currently there should be only one handler per a catchswitch.
- EHInfo.setThrowUnwindDest(&BB, *CatchSwitch->handlers().begin());
- else // cleanuppad
- EHInfo.setThrowUnwindDest(&BB, UnwindBB);
- }
}
diff --git a/lib/CodeGen/WinEHPrepare.cpp b/lib/CodeGen/WinEHPrepare.cpp
index 6a15240fa6e0..cdf79374e974 100644
--- a/lib/CodeGen/WinEHPrepare.cpp
+++ b/lib/CodeGen/WinEHPrepare.cpp
@@ -1,9 +1,8 @@
//===-- WinEHPrepare - Prepare exception handling for code generation ---===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -1080,7 +1079,8 @@ AllocaInst *WinEHPrepare::insertPHILoads(PHINode *PN, Function &F) {
SpillSlot = new AllocaInst(PN->getType(), DL->getAllocaAddrSpace(), nullptr,
Twine(PN->getName(), ".wineh.spillslot"),
&F.getEntryBlock().front());
- Value *V = new LoadInst(SpillSlot, Twine(PN->getName(), ".wineh.reload"),
+ Value *V = new LoadInst(PN->getType(), SpillSlot,
+ Twine(PN->getName(), ".wineh.reload"),
&*PHIBlock->getFirstInsertionPt());
PN->replaceAllUsesWith(V);
return SpillSlot;
@@ -1222,14 +1222,16 @@ void WinEHPrepare::replaceUseWithLoad(Value *V, Use &U, AllocaInst *&SpillSlot,
Value *&Load = Loads[IncomingBlock];
// Insert the load into the predecessor block
if (!Load)
- Load = new LoadInst(SpillSlot, Twine(V->getName(), ".wineh.reload"),
- /*Volatile=*/false, IncomingBlock->getTerminator());
+ Load = new LoadInst(V->getType(), SpillSlot,
+ Twine(V->getName(), ".wineh.reload"),
+ /*isVolatile=*/false, IncomingBlock->getTerminator());
U.set(Load);
} else {
// Reload right before the old use.
- auto *Load = new LoadInst(SpillSlot, Twine(V->getName(), ".wineh.reload"),
- /*Volatile=*/false, UsingInst);
+ auto *Load = new LoadInst(V->getType(), SpillSlot,
+ Twine(V->getName(), ".wineh.reload"),
+ /*isVolatile=*/false, UsingInst);
U.set(Load);
}
}
diff --git a/lib/CodeGen/XRayInstrumentation.cpp b/lib/CodeGen/XRayInstrumentation.cpp
index 32a7457c2060..19c59e9542b4 100644
--- a/lib/CodeGen/XRayInstrumentation.cpp
+++ b/lib/CodeGen/XRayInstrumentation.cpp
@@ -1,9 +1,8 @@
//===- XRayInstrumentation.cpp - Adds XRay instrumentation to functions. --===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -111,6 +110,8 @@ void XRayInstrumentation::replaceRetWithPatchableRet(
for (auto &MO : T.operands())
MIB.add(MO);
Terminators.push_back(&T);
+ if (T.isCall())
+ MF.updateCallSiteInfo(&T);
}
}
}
diff --git a/lib/DebugInfo/CodeView/AppendingTypeTableBuilder.cpp b/lib/DebugInfo/CodeView/AppendingTypeTableBuilder.cpp
index 8828671d9be9..86a6f9eebfa2 100644
--- a/lib/DebugInfo/CodeView/AppendingTypeTableBuilder.cpp
+++ b/lib/DebugInfo/CodeView/AppendingTypeTableBuilder.cpp
@@ -1,9 +1,8 @@
//===- AppendingTypeTableBuilder.cpp --------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -50,13 +49,8 @@ Optional<TypeIndex> AppendingTypeTableBuilder::getNext(TypeIndex Prev) {
return Prev;
}
-CVType AppendingTypeTableBuilder::getType(TypeIndex Index) {
- CVType Type;
- Type.RecordData = SeenRecords[Index.toArrayIndex()];
- const RecordPrefix *P =
- reinterpret_cast<const RecordPrefix *>(Type.RecordData.data());
- Type.Type = static_cast<TypeLeafKind>(uint16_t(P->RecordKind));
- return Type;
+CVType AppendingTypeTableBuilder::getType(TypeIndex Index){
+ return CVType(SeenRecords[Index.toArrayIndex()]);
}
StringRef AppendingTypeTableBuilder::getTypeName(TypeIndex Index) {
diff --git a/lib/DebugInfo/CodeView/CVSymbolVisitor.cpp b/lib/DebugInfo/CodeView/CVSymbolVisitor.cpp
index cbcaa5692828..48b9b0496ffe 100644
--- a/lib/DebugInfo/CodeView/CVSymbolVisitor.cpp
+++ b/lib/DebugInfo/CodeView/CVSymbolVisitor.cpp
@@ -1,9 +1,8 @@
//===- CVSymbolVisitor.cpp --------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -21,7 +20,7 @@ CVSymbolVisitor::CVSymbolVisitor(SymbolVisitorCallbacks &Callbacks)
template <typename T>
static Error visitKnownRecord(CVSymbol &Record,
SymbolVisitorCallbacks &Callbacks) {
- SymbolRecordKind RK = static_cast<SymbolRecordKind>(Record.Type);
+ SymbolRecordKind RK = static_cast<SymbolRecordKind>(Record.kind());
T KnownRecord(RK);
if (auto EC = Callbacks.visitKnownRecord(Record, KnownRecord))
return EC;
@@ -30,7 +29,7 @@ static Error visitKnownRecord(CVSymbol &Record,
static Error finishVisitation(CVSymbol &Record,
SymbolVisitorCallbacks &Callbacks) {
- switch (Record.Type) {
+ switch (Record.kind()) {
default:
if (auto EC = Callbacks.visitUnknownSymbol(Record))
return EC;
diff --git a/lib/DebugInfo/CodeView/CVTypeVisitor.cpp b/lib/DebugInfo/CodeView/CVTypeVisitor.cpp
index a4182a3b2fa1..ec4773d571c8 100644
--- a/lib/DebugInfo/CodeView/CVTypeVisitor.cpp
+++ b/lib/DebugInfo/CodeView/CVTypeVisitor.cpp
@@ -1,9 +1,8 @@
//===- CVTypeVisitor.cpp ----------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -23,7 +22,7 @@ using namespace llvm::codeview;
template <typename T>
static Error visitKnownRecord(CVType &Record, TypeVisitorCallbacks &Callbacks) {
- TypeRecordKind RK = static_cast<TypeRecordKind>(Record.Type);
+ TypeRecordKind RK = static_cast<TypeRecordKind>(Record.kind());
T KnownRecord(RK);
if (auto EC = Callbacks.visitKnownRecord(Record, KnownRecord))
return EC;
@@ -97,7 +96,7 @@ CVTypeVisitor::CVTypeVisitor(TypeVisitorCallbacks &Callbacks)
: Callbacks(Callbacks) {}
Error CVTypeVisitor::finishVisitation(CVType &Record) {
- switch (Record.Type) {
+ switch (Record.kind()) {
default:
if (auto EC = Callbacks.visitUnknownType(Record))
return EC;
@@ -210,6 +209,14 @@ struct VisitHelper {
}
}
+ VisitHelper(TypeVisitorCallbackPipeline &Callbacks, VisitorDataSource Source)
+ : Visitor((Source == VDS_BytesPresent) ? Pipeline : Callbacks) {
+ if (Source == VDS_BytesPresent) {
+ Pipeline = Callbacks;
+ Pipeline.addCallbackToPipelineFront(Deserializer);
+ }
+ }
+
TypeDeserializer Deserializer;
TypeVisitorCallbackPipeline Pipeline;
CVTypeVisitor Visitor;
@@ -223,6 +230,13 @@ Error llvm::codeview::visitTypeRecord(CVType &Record, TypeIndex Index,
return V.Visitor.visitTypeRecord(Record, Index);
}
+Error llvm::codeview::visitTypeRecord(CVType &Record, TypeIndex Index,
+ TypeVisitorCallbackPipeline &Callbacks,
+ VisitorDataSource Source) {
+ VisitHelper V(Callbacks, Source);
+ return V.Visitor.visitTypeRecord(Record, Index);
+}
+
Error llvm::codeview::visitTypeRecord(CVType &Record,
TypeVisitorCallbacks &Callbacks,
VisitorDataSource Source) {
diff --git a/lib/DebugInfo/CodeView/CodeViewError.cpp b/lib/DebugInfo/CodeView/CodeViewError.cpp
index 2a9753add311..69390c708f59 100644
--- a/lib/DebugInfo/CodeView/CodeViewError.cpp
+++ b/lib/DebugInfo/CodeView/CodeViewError.cpp
@@ -1,9 +1,8 @@
//===- CodeViewError.cpp - Error extensions for CodeView --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -14,6 +13,7 @@
using namespace llvm;
using namespace llvm::codeview;
+namespace {
// FIXME: This class is only here to support the transition to llvm::Error. It
// will be removed once this transition is complete. Clients should prefer to
// deal with the Error value directly, rather than converting to error_code.
@@ -39,6 +39,7 @@ public:
llvm_unreachable("Unrecognized cv_error_code");
}
};
+} // namespace
static llvm::ManagedStatic<CodeViewErrorCategory> CodeViewErrCategory;
const std::error_category &llvm::codeview::CVErrorCategory() {
diff --git a/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp b/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp
index 4fc14480578e..2f49474115a1 100644
--- a/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp
+++ b/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp
@@ -1,9 +1,8 @@
//===- CodeViewRecordIO.cpp -------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -21,6 +20,7 @@ Error CodeViewRecordIO::beginRecord(Optional<uint32_t> MaxLength) {
Limit.MaxLength = MaxLength;
Limit.BeginOffset = getCurrentOffset();
Limits.push_back(Limit);
+ resetStreamedLen();
return Error::success();
}
@@ -35,10 +35,29 @@ Error CodeViewRecordIO::endRecord() {
// we don't know how big the record is until we're finished writing it, so
// even though we don't commit the extraneous data, we still can't guarantee
// we're at the end of the allocated data.
+
+ if (isStreaming()) {
+ // For streaming mode, add padding to align with 4 byte boundaries for each
+ // record
+ uint32_t Align = getStreamedLen() % 4;
+ if (Align == 0)
+ return Error::success();
+
+ int PaddingBytes = 4 - Align;
+ while (PaddingBytes > 0) {
+ char Pad = static_cast<uint8_t>(LF_PAD0 + PaddingBytes);
+ StringRef BytesSR = StringRef(&Pad, sizeof(Pad));
+ Streamer->EmitBytes(BytesSR);
+ --PaddingBytes;
+ }
+ }
return Error::success();
}
uint32_t CodeViewRecordIO::maxFieldLength() const {
+ if (isStreaming())
+ return 0;
+
assert(!Limits.empty() && "Not in a record!");
// The max length of the next field is the minimum of all lengths that would
@@ -78,8 +97,13 @@ Error CodeViewRecordIO::skipPadding() {
return Reader->skip(BytesToAdvance);
}
-Error CodeViewRecordIO::mapByteVectorTail(ArrayRef<uint8_t> &Bytes) {
- if (isWriting()) {
+Error CodeViewRecordIO::mapByteVectorTail(ArrayRef<uint8_t> &Bytes,
+ const Twine &Comment) {
+ if (isStreaming()) {
+ emitComment(Comment);
+ Streamer->EmitBinaryData(toStringRef(Bytes));
+ incrStreamedLen(Bytes.size());
+ } else if (isWriting()) {
if (auto EC = Writer->writeBytes(Bytes))
return EC;
} else {
@@ -89,9 +113,10 @@ Error CodeViewRecordIO::mapByteVectorTail(ArrayRef<uint8_t> &Bytes) {
return Error::success();
}
-Error CodeViewRecordIO::mapByteVectorTail(std::vector<uint8_t> &Bytes) {
+Error CodeViewRecordIO::mapByteVectorTail(std::vector<uint8_t> &Bytes,
+ const Twine &Comment) {
ArrayRef<uint8_t> BytesRef(Bytes);
- if (auto EC = mapByteVectorTail(BytesRef))
+ if (auto EC = mapByteVectorTail(BytesRef, Comment))
return EC;
if (!isWriting())
Bytes.assign(BytesRef.begin(), BytesRef.end());
@@ -99,22 +124,31 @@ Error CodeViewRecordIO::mapByteVectorTail(std::vector<uint8_t> &Bytes) {
return Error::success();
}
-Error CodeViewRecordIO::mapInteger(TypeIndex &TypeInd) {
- if (isWriting()) {
+Error CodeViewRecordIO::mapInteger(TypeIndex &TypeInd, const Twine &Comment) {
+ if (isStreaming()) {
+ emitComment(Comment);
+ Streamer->EmitIntValue(TypeInd.getIndex(), sizeof(TypeInd.getIndex()));
+ incrStreamedLen(sizeof(TypeInd.getIndex()));
+ } else if (isWriting()) {
if (auto EC = Writer->writeInteger(TypeInd.getIndex()))
return EC;
- return Error::success();
+ } else {
+ uint32_t I;
+ if (auto EC = Reader->readInteger(I))
+ return EC;
+ TypeInd.setIndex(I);
}
-
- uint32_t I;
- if (auto EC = Reader->readInteger(I))
- return EC;
- TypeInd.setIndex(I);
return Error::success();
}
-Error CodeViewRecordIO::mapEncodedInteger(int64_t &Value) {
- if (isWriting()) {
+Error CodeViewRecordIO::mapEncodedInteger(int64_t &Value,
+ const Twine &Comment) {
+ if (isStreaming()) {
+ if (Value >= 0)
+ emitEncodedUnsignedInteger(static_cast<uint64_t>(Value), Comment);
+ else
+ emitEncodedSignedInteger(Value, Comment);
+ } else if (isWriting()) {
if (Value >= 0) {
if (auto EC = writeEncodedUnsignedInteger(static_cast<uint64_t>(Value)))
return EC;
@@ -132,8 +166,11 @@ Error CodeViewRecordIO::mapEncodedInteger(int64_t &Value) {
return Error::success();
}
-Error CodeViewRecordIO::mapEncodedInteger(uint64_t &Value) {
- if (isWriting()) {
+Error CodeViewRecordIO::mapEncodedInteger(uint64_t &Value,
+ const Twine &Comment) {
+ if (isStreaming())
+ emitEncodedUnsignedInteger(Value, Comment);
+ else if (isWriting()) {
if (auto EC = writeEncodedUnsignedInteger(Value))
return EC;
} else {
@@ -145,18 +182,28 @@ Error CodeViewRecordIO::mapEncodedInteger(uint64_t &Value) {
return Error::success();
}
-Error CodeViewRecordIO::mapEncodedInteger(APSInt &Value) {
- if (isWriting()) {
+Error CodeViewRecordIO::mapEncodedInteger(APSInt &Value, const Twine &Comment) {
+ if (isStreaming()) {
+ if (Value.isSigned())
+ emitEncodedSignedInteger(Value.getSExtValue(), Comment);
+ else
+ emitEncodedUnsignedInteger(Value.getZExtValue(), Comment);
+ } else if (isWriting()) {
if (Value.isSigned())
return writeEncodedSignedInteger(Value.getSExtValue());
return writeEncodedUnsignedInteger(Value.getZExtValue());
- }
-
- return consume(*Reader, Value);
+ } else
+ return consume(*Reader, Value);
+ return Error::success();
}
-Error CodeViewRecordIO::mapStringZ(StringRef &Value) {
- if (isWriting()) {
+Error CodeViewRecordIO::mapStringZ(StringRef &Value, const Twine &Comment) {
+ if (isStreaming()) {
+ auto NullTerminatedString = StringRef(Value.data(), Value.size() + 1);
+ emitComment(Comment);
+ Streamer->EmitBytes(NullTerminatedString);
+ incrStreamedLen(NullTerminatedString.size());
+ } else if (isWriting()) {
// Truncate if we attempt to write too much.
StringRef S = Value.take_front(maxFieldLength() - 1);
if (auto EC = Writer->writeCString(S))
@@ -168,8 +215,18 @@ Error CodeViewRecordIO::mapStringZ(StringRef &Value) {
return Error::success();
}
-Error CodeViewRecordIO::mapGuid(GUID &Guid) {
+Error CodeViewRecordIO::mapGuid(GUID &Guid, const Twine &Comment) {
constexpr uint32_t GuidSize = 16;
+
+ if (isStreaming()) {
+ StringRef GuidSR =
+ StringRef((reinterpret_cast<const char *>(&Guid)), GuidSize);
+ emitComment(Comment);
+ Streamer->EmitBytes(GuidSR);
+ incrStreamedLen(GuidSize);
+ return Error::success();
+ }
+
if (maxFieldLength() < GuidSize)
return make_error<CodeViewError>(cv_error_code::insufficient_buffer);
@@ -185,13 +242,17 @@ Error CodeViewRecordIO::mapGuid(GUID &Guid) {
return Error::success();
}
-Error CodeViewRecordIO::mapStringZVectorZ(std::vector<StringRef> &Value) {
- if (isWriting()) {
+Error CodeViewRecordIO::mapStringZVectorZ(std::vector<StringRef> &Value,
+ const Twine &Comment) {
+
+ if (!isReading()) {
+ emitComment(Comment);
for (auto V : Value) {
if (auto EC = mapStringZ(V))
return EC;
}
- if (auto EC = Writer->writeInteger<uint8_t>(0))
+ uint8_t FinalZero = 0;
+ if (auto EC = mapInteger(FinalZero))
return EC;
} else {
StringRef S;
@@ -206,6 +267,56 @@ Error CodeViewRecordIO::mapStringZVectorZ(std::vector<StringRef> &Value) {
return Error::success();
}
+void CodeViewRecordIO::emitEncodedSignedInteger(const int64_t &Value,
+ const Twine &Comment) {
+ assert(Value < 0 && "Encoded integer is not signed!");
+ if (Value >= std::numeric_limits<int8_t>::min()) {
+ Streamer->EmitIntValue(LF_CHAR, 2);
+ emitComment(Comment);
+ Streamer->EmitIntValue(Value, 1);
+ incrStreamedLen(3);
+ } else if (Value >= std::numeric_limits<int16_t>::min()) {
+ Streamer->EmitIntValue(LF_SHORT, 2);
+ emitComment(Comment);
+ Streamer->EmitIntValue(Value, 2);
+ incrStreamedLen(4);
+ } else if (Value >= std::numeric_limits<int32_t>::min()) {
+ Streamer->EmitIntValue(LF_LONG, 2);
+ emitComment(Comment);
+ Streamer->EmitIntValue(Value, 4);
+ incrStreamedLen(6);
+ } else {
+ Streamer->EmitIntValue(LF_QUADWORD, 2);
+ emitComment(Comment);
+ Streamer->EmitIntValue(Value, 4);
+ incrStreamedLen(6);
+ }
+}
+
+void CodeViewRecordIO::emitEncodedUnsignedInteger(const uint64_t &Value,
+ const Twine &Comment) {
+ if (Value < LF_NUMERIC) {
+ emitComment(Comment);
+ Streamer->EmitIntValue(Value, 2);
+ incrStreamedLen(2);
+ } else if (Value <= std::numeric_limits<uint16_t>::max()) {
+ Streamer->EmitIntValue(LF_USHORT, 2);
+ emitComment(Comment);
+ Streamer->EmitIntValue(Value, 2);
+ incrStreamedLen(4);
+ } else if (Value <= std::numeric_limits<uint32_t>::max()) {
+ Streamer->EmitIntValue(LF_ULONG, 2);
+ emitComment(Comment);
+ Streamer->EmitIntValue(Value, 4);
+ incrStreamedLen(6);
+ } else {
+ Streamer->EmitIntValue(LF_UQUADWORD, 2);
+ emitComment(Comment);
+ Streamer->EmitIntValue(Value, 8);
+ incrStreamedLen(6);
+ }
+}
+
Error CodeViewRecordIO::writeEncodedSignedInteger(const int64_t &Value) {
assert(Value < 0 && "Encoded integer is not signed!");
if (Value >= std::numeric_limits<int8_t>::min()) {
diff --git a/lib/DebugInfo/CodeView/ContinuationRecordBuilder.cpp b/lib/DebugInfo/CodeView/ContinuationRecordBuilder.cpp
index f180fc6990fc..799cffb7116e 100644
--- a/lib/DebugInfo/CodeView/ContinuationRecordBuilder.cpp
+++ b/lib/DebugInfo/CodeView/ContinuationRecordBuilder.cpp
@@ -66,14 +66,11 @@ void ContinuationRecordBuilder::begin(ContinuationRecordKind RecordKind) {
InjectedSegmentBytes =
ArrayRef<uint8_t>(FLIB, FLIB + sizeof(SegmentInjection));
- CVType Type;
- Type.Type = getTypeLeafKind(RecordKind);
+ // Seed the first record with an appropriate record prefix.
+ RecordPrefix Prefix(getTypeLeafKind(RecordKind));
+ CVType Type(&Prefix, sizeof(Prefix));
cantFail(Mapping.visitTypeBegin(Type));
- // Seed the first trecord with an appropriate record prefix.
- RecordPrefix Prefix;
- Prefix.RecordLen = 0;
- Prefix.RecordKind = Type.Type;
cantFail(SegmentWriter.writeObject(Prefix));
}
@@ -156,14 +153,9 @@ CVType ContinuationRecordBuilder::createSegmentRecord(
MutableArrayRef<uint8_t> Data = Buffer.data();
Data = Data.slice(OffBegin, OffEnd - OffBegin);
- CVType Type;
- Type.Type = getTypeLeafKind(*Kind);
- Type.RecordData = Data;
-
// Write the length to the RecordPrefix, making sure it does not include
// sizeof(RecordPrefix.Length)
RecordPrefix *Prefix = reinterpret_cast<RecordPrefix *>(Data.data());
- assert(Prefix->RecordKind == Type.Type);
Prefix->RecordLen = Data.size() - sizeof(RecordPrefix::RecordLen);
if (RefersTo.hasValue()) {
@@ -175,12 +167,12 @@ CVType ContinuationRecordBuilder::createSegmentRecord(
CR->IndexRef = RefersTo->getIndex();
}
- return Type;
+ return CVType(Data);
}
std::vector<CVType> ContinuationRecordBuilder::end(TypeIndex Index) {
- CVType Type;
- Type.Type = getTypeLeafKind(*Kind);
+ RecordPrefix Prefix(getTypeLeafKind(*Kind));
+ CVType Type(&Prefix, sizeof(Prefix));
cantFail(Mapping.visitTypeEnd(Type));
// We're now done, and we have a series of segments each beginning at an
diff --git a/lib/DebugInfo/CodeView/DebugChecksumsSubsection.cpp b/lib/DebugInfo/CodeView/DebugChecksumsSubsection.cpp
index 0f155a95d607..3d28bac00c44 100644
--- a/lib/DebugInfo/CodeView/DebugChecksumsSubsection.cpp
+++ b/lib/DebugInfo/CodeView/DebugChecksumsSubsection.cpp
@@ -1,9 +1,8 @@
//===- DebugChecksumsSubsection.cpp ---------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/CodeView/DebugCrossExSubsection.cpp b/lib/DebugInfo/CodeView/DebugCrossExSubsection.cpp
index cef27787cfd1..b23410409f88 100644
--- a/lib/DebugInfo/CodeView/DebugCrossExSubsection.cpp
+++ b/lib/DebugInfo/CodeView/DebugCrossExSubsection.cpp
@@ -1,9 +1,8 @@
//===- DebugCrossExSubsection.cpp -----------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/CodeView/DebugCrossImpSubsection.cpp b/lib/DebugInfo/CodeView/DebugCrossImpSubsection.cpp
index 4001741f560a..dbadafd3aaf3 100644
--- a/lib/DebugInfo/CodeView/DebugCrossImpSubsection.cpp
+++ b/lib/DebugInfo/CodeView/DebugCrossImpSubsection.cpp
@@ -1,9 +1,8 @@
//===- DebugCrossImpSubsection.cpp ----------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/CodeView/DebugFrameDataSubsection.cpp b/lib/DebugInfo/CodeView/DebugFrameDataSubsection.cpp
index 5881bf177a55..be8c32d5b294 100644
--- a/lib/DebugInfo/CodeView/DebugFrameDataSubsection.cpp
+++ b/lib/DebugInfo/CodeView/DebugFrameDataSubsection.cpp
@@ -1,9 +1,8 @@
//===- DebugFrameDataSubsection.cpp -----------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/CodeView/DebugInlineeLinesSubsection.cpp b/lib/DebugInfo/CodeView/DebugInlineeLinesSubsection.cpp
index 077c103a615b..48ec7e4ecdd6 100644
--- a/lib/DebugInfo/CodeView/DebugInlineeLinesSubsection.cpp
+++ b/lib/DebugInfo/CodeView/DebugInlineeLinesSubsection.cpp
@@ -1,9 +1,8 @@
//===- DebugInlineeLinesSubsection.cpp ------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/CodeView/DebugLinesSubsection.cpp b/lib/DebugInfo/CodeView/DebugLinesSubsection.cpp
index 57ad40819fbc..ea16c0a6c671 100644
--- a/lib/DebugInfo/CodeView/DebugLinesSubsection.cpp
+++ b/lib/DebugInfo/CodeView/DebugLinesSubsection.cpp
@@ -1,9 +1,8 @@
//===- DebugLinesSubsection.cpp -------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/CodeView/DebugStringTableSubsection.cpp b/lib/DebugInfo/CodeView/DebugStringTableSubsection.cpp
index 9b251f5931b3..63342749918d 100644
--- a/lib/DebugInfo/CodeView/DebugStringTableSubsection.cpp
+++ b/lib/DebugInfo/CodeView/DebugStringTableSubsection.cpp
@@ -1,9 +1,8 @@
//===- DebugStringTableSubsection.cpp - CodeView String Table -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/CodeView/DebugSubsection.cpp b/lib/DebugInfo/CodeView/DebugSubsection.cpp
index 67b428bfa713..3f93463fe6d6 100644
--- a/lib/DebugInfo/CodeView/DebugSubsection.cpp
+++ b/lib/DebugInfo/CodeView/DebugSubsection.cpp
@@ -1,9 +1,8 @@
//===- DebugSubsection.cpp -----------------------------------*- C++-*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp b/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp
index 55f343c11e7f..0f704f286ee9 100644
--- a/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp
+++ b/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp
@@ -1,9 +1,8 @@
//===- DebugSubsectionRecord.cpp ------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/CodeView/DebugSubsectionVisitor.cpp b/lib/DebugInfo/CodeView/DebugSubsectionVisitor.cpp
index 9b824333369b..7968b6a2d757 100644
--- a/lib/DebugInfo/CodeView/DebugSubsectionVisitor.cpp
+++ b/lib/DebugInfo/CodeView/DebugSubsectionVisitor.cpp
@@ -1,9 +1,8 @@
//===- DebugSubsectionVisitor.cpp -------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/CodeView/DebugSymbolRVASubsection.cpp b/lib/DebugInfo/CodeView/DebugSymbolRVASubsection.cpp
index 60fbf9d747b2..52328967357b 100644
--- a/lib/DebugInfo/CodeView/DebugSymbolRVASubsection.cpp
+++ b/lib/DebugInfo/CodeView/DebugSymbolRVASubsection.cpp
@@ -1,9 +1,8 @@
//===- DebugSymbolRVASubsection.cpp ---------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/CodeView/DebugSymbolsSubsection.cpp b/lib/DebugInfo/CodeView/DebugSymbolsSubsection.cpp
index dc8ba8c929ae..c833103663e4 100644
--- a/lib/DebugInfo/CodeView/DebugSymbolsSubsection.cpp
+++ b/lib/DebugInfo/CodeView/DebugSymbolsSubsection.cpp
@@ -1,9 +1,8 @@
//===- DebugSymbolsSubsection.cpp -------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -31,4 +30,4 @@ Error DebugSymbolsSubsection::commit(BinaryStreamWriter &Writer) const {
void DebugSymbolsSubsection::addSymbol(CVSymbol Symbol) {
Records.push_back(Symbol);
Length += Symbol.length();
-} \ No newline at end of file
+}
diff --git a/lib/DebugInfo/CodeView/EnumTables.cpp b/lib/DebugInfo/CodeView/EnumTables.cpp
index ef4e42f79ebc..54e68ae4ea9f 100644
--- a/lib/DebugInfo/CodeView/EnumTables.cpp
+++ b/lib/DebugInfo/CodeView/EnumTables.cpp
@@ -1,9 +1,8 @@
//===- EnumTables.cpp - Enum to string conversion tables ------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -32,10 +31,20 @@ static const EnumEntry<TypeLeafKind> TypeLeafNames[] = {
#undef CV_TYPE
};
-static const EnumEntry<uint16_t> RegisterNames[] = {
+static const EnumEntry<uint16_t> RegisterNames_X86[] = {
+#define CV_REGISTERS_X86
+#define CV_REGISTER(name, val) CV_ENUM_CLASS_ENT(RegisterId, name),
+#include "llvm/DebugInfo/CodeView/CodeViewRegisters.def"
+#undef CV_REGISTER
+#undef CV_REGISTERS_X86
+};
+
+static const EnumEntry<uint16_t> RegisterNames_ARM64[] = {
+#define CV_REGISTERS_ARM64
#define CV_REGISTER(name, val) CV_ENUM_CLASS_ENT(RegisterId, name),
#include "llvm/DebugInfo/CodeView/CodeViewRegisters.def"
#undef CV_REGISTER
+#undef CV_REGISTERS_ARM64
};
static const EnumEntry<uint32_t> PublicSymFlagNames[] = {
@@ -87,6 +96,7 @@ static const EnumEntry<codeview::SourceLanguage> SourceLanguages[] = {
CV_ENUM_ENT(SourceLanguage, ILAsm), CV_ENUM_ENT(SourceLanguage, Java),
CV_ENUM_ENT(SourceLanguage, JScript), CV_ENUM_ENT(SourceLanguage, MSIL),
CV_ENUM_ENT(SourceLanguage, HLSL), CV_ENUM_ENT(SourceLanguage, D),
+ CV_ENUM_ENT(SourceLanguage, Swift),
};
static const EnumEntry<uint32_t> CompileSym2FlagNames[] = {
@@ -171,6 +181,7 @@ static const EnumEntry<unsigned> CPUTypeNames[] = {
CV_ENUM_CLASS_ENT(CPUType, ARM_XMAC),
CV_ENUM_CLASS_ENT(CPUType, ARM_WMMX),
CV_ENUM_CLASS_ENT(CPUType, ARM7),
+ CV_ENUM_CLASS_ENT(CPUType, ARM64),
CV_ENUM_CLASS_ENT(CPUType, Omni),
CV_ENUM_CLASS_ENT(CPUType, Ia64),
CV_ENUM_CLASS_ENT(CPUType, Ia64_2),
@@ -300,8 +311,11 @@ ArrayRef<EnumEntry<TypeLeafKind>> getTypeLeafNames() {
return makeArrayRef(TypeLeafNames);
}
-ArrayRef<EnumEntry<uint16_t>> getRegisterNames() {
- return makeArrayRef(RegisterNames);
+ArrayRef<EnumEntry<uint16_t>> getRegisterNames(CPUType Cpu) {
+ if (Cpu == CPUType::ARM64) {
+ return makeArrayRef(RegisterNames_ARM64);
+ }
+ return makeArrayRef(RegisterNames_X86);
}
ArrayRef<EnumEntry<uint32_t>> getPublicSymFlagNames() {
diff --git a/lib/DebugInfo/CodeView/Formatters.cpp b/lib/DebugInfo/CodeView/Formatters.cpp
index b8d89c76da3b..a7a8c7ff82bf 100644
--- a/lib/DebugInfo/CodeView/Formatters.cpp
+++ b/lib/DebugInfo/CodeView/Formatters.cpp
@@ -1,9 +1,8 @@
//===- Formatters.cpp -----------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/CodeView/GlobalTypeTableBuilder.cpp b/lib/DebugInfo/CodeView/GlobalTypeTableBuilder.cpp
index e76f9e12f0af..a7ad1d045f04 100644
--- a/lib/DebugInfo/CodeView/GlobalTypeTableBuilder.cpp
+++ b/lib/DebugInfo/CodeView/GlobalTypeTableBuilder.cpp
@@ -1,9 +1,8 @@
//===- GlobalTypeTableBuilder.cpp -----------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -53,14 +52,7 @@ Optional<TypeIndex> GlobalTypeTableBuilder::getNext(TypeIndex Prev) {
}
CVType GlobalTypeTableBuilder::getType(TypeIndex Index) {
- CVType Type;
- Type.RecordData = SeenRecords[Index.toArrayIndex()];
- if (!Type.RecordData.empty()) {
- assert(Type.RecordData.size() >= sizeof(RecordPrefix));
- const RecordPrefix *P =
- reinterpret_cast<const RecordPrefix *>(Type.RecordData.data());
- Type.Type = static_cast<TypeLeafKind>(uint16_t(P->RecordKind));
- }
+ CVType Type(SeenRecords[Index.toArrayIndex()]);
return Type;
}
diff --git a/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp b/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp
index ddcad8c631d7..dc1253b7a39f 100644
--- a/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp
+++ b/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp
@@ -1,9 +1,8 @@
//===- LazyRandomTypeCollection.cpp ---------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/CodeView/Line.cpp b/lib/DebugInfo/CodeView/Line.cpp
index 4cb766b5fd26..53adc8cac511 100644
--- a/lib/DebugInfo/CodeView/Line.cpp
+++ b/lib/DebugInfo/CodeView/Line.cpp
@@ -1,9 +1,8 @@
//===-- Line.cpp ----------------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/CodeView/MergingTypeTableBuilder.cpp b/lib/DebugInfo/CodeView/MergingTypeTableBuilder.cpp
index 8aee4aa2e2ae..4d7cd468f3ee 100644
--- a/lib/DebugInfo/CodeView/MergingTypeTableBuilder.cpp
+++ b/lib/DebugInfo/CodeView/MergingTypeTableBuilder.cpp
@@ -1,9 +1,8 @@
//===- MergingTypeTableBuilder.cpp ----------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -53,11 +52,7 @@ Optional<TypeIndex> MergingTypeTableBuilder::getNext(TypeIndex Prev) {
}
CVType MergingTypeTableBuilder::getType(TypeIndex Index) {
- CVType Type;
- Type.RecordData = SeenRecords[Index.toArrayIndex()];
- const RecordPrefix *P =
- reinterpret_cast<const RecordPrefix *>(Type.RecordData.data());
- Type.Type = static_cast<TypeLeafKind>(uint16_t(P->RecordKind));
+ CVType Type(SeenRecords[Index.toArrayIndex()]);
return Type;
}
diff --git a/lib/DebugInfo/CodeView/RecordName.cpp b/lib/DebugInfo/CodeView/RecordName.cpp
index d868ae237a44..cfaad1581159 100644
--- a/lib/DebugInfo/CodeView/RecordName.cpp
+++ b/lib/DebugInfo/CodeView/RecordName.cpp
@@ -1,9 +1,8 @@
//===- RecordName.cpp ----------------------------------------- *- C++ --*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/CodeView/RecordSerialization.cpp b/lib/DebugInfo/CodeView/RecordSerialization.cpp
index bff9a619a846..e7f032f9c670 100644
--- a/lib/DebugInfo/CodeView/RecordSerialization.cpp
+++ b/lib/DebugInfo/CodeView/RecordSerialization.cpp
@@ -1,9 +1,8 @@
//===-- RecordSerialization.cpp -------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/DebugInfo/CodeView/SimpleTypeSerializer.cpp b/lib/DebugInfo/CodeView/SimpleTypeSerializer.cpp
index d28b7c3c2d83..654c40a7470d 100644
--- a/lib/DebugInfo/CodeView/SimpleTypeSerializer.cpp
+++ b/lib/DebugInfo/CodeView/SimpleTypeSerializer.cpp
@@ -3,13 +3,6 @@
using namespace llvm;
using namespace llvm::codeview;
-static void writeRecordPrefix(BinaryStreamWriter &Writer, TypeLeafKind Kind) {
- RecordPrefix Prefix;
- Prefix.RecordKind = Kind;
- Prefix.RecordLen = 0;
- cantFail(Writer.writeObject(Prefix));
-}
-
static void addPadding(BinaryStreamWriter &Writer) {
uint32_t Align = Writer.getOffset() % 4;
if (Align == 0)
@@ -32,10 +25,12 @@ ArrayRef<uint8_t> SimpleTypeSerializer::serialize(T &Record) {
BinaryStreamWriter Writer(ScratchBuffer, support::little);
TypeRecordMapping Mapping(Writer);
- CVType CVT;
- CVT.Type = static_cast<TypeLeafKind>(Record.getKind());
+ // Write the record prefix first with a dummy length but real kind.
+ RecordPrefix DummyPrefix(uint16_t(Record.getKind()));
+ cantFail(Writer.writeObject(DummyPrefix));
- writeRecordPrefix(Writer, CVT.Type);
+ RecordPrefix *Prefix = reinterpret_cast<RecordPrefix *>(ScratchBuffer.data());
+ CVType CVT(Prefix, sizeof(RecordPrefix));
cantFail(Mapping.visitTypeBegin(CVT));
cantFail(Mapping.visitKnownRecord(CVT, Record));
@@ -43,8 +38,7 @@ ArrayRef<uint8_t> SimpleTypeSerializer::serialize(T &Record) {
addPadding(Writer);
- RecordPrefix *Prefix = reinterpret_cast<RecordPrefix *>(ScratchBuffer.data());
-
+ // Update the size and kind after serialization.
Prefix->RecordKind = CVT.kind();
Prefix->RecordLen = Writer.getOffset() - sizeof(uint16_t);
diff --git a/lib/DebugInfo/CodeView/StringsAndChecksums.cpp b/lib/DebugInfo/CodeView/StringsAndChecksums.cpp
index 85d9dbb8c7df..9e204eec8604 100644
--- a/lib/DebugInfo/CodeView/StringsAndChecksums.cpp
+++ b/lib/DebugInfo/CodeView/StringsAndChecksums.cpp
@@ -1,9 +1,8 @@
//===- StringsAndChecksums.cpp --------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/CodeView/SymbolDumper.cpp b/lib/DebugInfo/CodeView/SymbolDumper.cpp
index 04e0bab745d3..27cb7e35234b 100644
--- a/lib/DebugInfo/CodeView/SymbolDumper.cpp
+++ b/lib/DebugInfo/CodeView/SymbolDumper.cpp
@@ -1,9 +1,8 @@
//===-- SymbolDumper.cpp - CodeView symbol info dumper ----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -102,10 +101,10 @@ void CVSymbolDumperImpl::printTypeIndex(StringRef FieldName, TypeIndex TI) {
}
Error CVSymbolDumperImpl::visitSymbolBegin(CVSymbol &CVR) {
- W.startLine() << getSymbolKindName(CVR.Type);
+ W.startLine() << getSymbolKindName(CVR.kind());
W.getOStream() << " {\n";
W.indent();
- W.printEnum("Kind", unsigned(CVR.Type), getSymbolTypeNames());
+ W.printEnum("Kind", unsigned(CVR.kind()), getSymbolTypeNames());
return Error::success();
}
@@ -326,7 +325,7 @@ Error CVSymbolDumperImpl::visitKnownRecord(
Error CVSymbolDumperImpl::visitKnownRecord(
CVSymbol &CVR, DefRangeRegisterRelSym &DefRangeRegisterRel) {
W.printEnum("BaseRegister", uint16_t(DefRangeRegisterRel.Hdr.Register),
- getRegisterNames());
+ getRegisterNames(CompilationCPUType));
W.printBoolean("HasSpilledUDTMember",
DefRangeRegisterRel.hasSpilledUDTMember());
W.printNumber("OffsetInParent", DefRangeRegisterRel.offsetInParent());
@@ -340,7 +339,7 @@ Error CVSymbolDumperImpl::visitKnownRecord(
Error CVSymbolDumperImpl::visitKnownRecord(
CVSymbol &CVR, DefRangeRegisterSym &DefRangeRegister) {
W.printEnum("Register", uint16_t(DefRangeRegister.Hdr.Register),
- getRegisterNames());
+ getRegisterNames(CompilationCPUType));
W.printNumber("MayHaveNoName", DefRangeRegister.Hdr.MayHaveNoName);
printLocalVariableAddrRange(DefRangeRegister.Range,
DefRangeRegister.getRelocationOffset());
@@ -351,7 +350,7 @@ Error CVSymbolDumperImpl::visitKnownRecord(
Error CVSymbolDumperImpl::visitKnownRecord(
CVSymbol &CVR, DefRangeSubfieldRegisterSym &DefRangeSubfieldRegister) {
W.printEnum("Register", uint16_t(DefRangeSubfieldRegister.Hdr.Register),
- getRegisterNames());
+ getRegisterNames(CompilationCPUType));
W.printNumber("MayHaveNoName", DefRangeSubfieldRegister.Hdr.MayHaveNoName);
W.printNumber("OffsetInParent", DefRangeSubfieldRegister.Hdr.OffsetInParent);
printLocalVariableAddrRange(DefRangeSubfieldRegister.Range,
@@ -404,7 +403,8 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
FrameCookie.getRelocationOffset(),
FrameCookie.CodeOffset, &LinkageName);
}
- W.printEnum("Register", uint16_t(FrameCookie.Register), getRegisterNames());
+ W.printEnum("Register", uint16_t(FrameCookie.Register),
+ getRegisterNames(CompilationCPUType));
W.printEnum("CookieKind", uint16_t(FrameCookie.CookieKind),
getFrameCookieKindNames());
W.printHex("Flags", FrameCookie.Flags);
@@ -425,10 +425,10 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
getFrameProcSymFlagNames());
W.printEnum("LocalFramePtrReg",
uint16_t(FrameProc.getLocalFramePtrReg(CompilationCPUType)),
- getRegisterNames());
+ getRegisterNames(CompilationCPUType));
W.printEnum("ParamFramePtrReg",
uint16_t(FrameProc.getParamFramePtrReg(CompilationCPUType)),
- getRegisterNames());
+ getRegisterNames(CompilationCPUType));
return Error::success();
}
@@ -506,7 +506,8 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
RegisterSym &Register) {
printTypeIndex("Type", Register.Index);
- W.printEnum("Seg", uint16_t(Register.Register), getRegisterNames());
+ W.printEnum("Seg", uint16_t(Register.Register),
+ getRegisterNames(CompilationCPUType));
W.printString("Name", Register.Name);
return Error::success();
}
@@ -600,7 +601,8 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
RegRelativeSym &RegRel) {
W.printHex("Offset", RegRel.Offset);
printTypeIndex("Type", RegRel.Type);
- W.printEnum("Register", uint16_t(RegRel.Register), getRegisterNames());
+ W.printEnum("Register", uint16_t(RegRel.Register),
+ getRegisterNames(CompilationCPUType));
W.printString("VarName", RegRel.Name);
return Error::success();
}
@@ -631,6 +633,18 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
return Error::success();
}
+Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
+ AnnotationSym &Annot) {
+ W.printHex("Offset", Annot.CodeOffset);
+ W.printHex("Segment", Annot.Segment);
+
+ ListScope S(W, "Strings");
+ for (StringRef Str : Annot.Strings)
+ W.printString(Str);
+
+ return Error::success();
+}
+
Error CVSymbolDumperImpl::visitUnknownSymbol(CVSymbol &CVR) {
W.printNumber("Length", CVR.length());
return Error::success();
diff --git a/lib/DebugInfo/CodeView/SymbolRecordHelpers.cpp b/lib/DebugInfo/CodeView/SymbolRecordHelpers.cpp
index 01746138ad1f..51a5a9e9243e 100644
--- a/lib/DebugInfo/CodeView/SymbolRecordHelpers.cpp
+++ b/lib/DebugInfo/CodeView/SymbolRecordHelpers.cpp
@@ -1,9 +1,8 @@
//===- SymbolRecordHelpers.cpp ----------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp b/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp
index 2af8205cebc3..70889839ef48 100644
--- a/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp
+++ b/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp
@@ -1,9 +1,8 @@
//===- SymbolRecordMapping.cpp -----------------------------------*- C++-*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -472,6 +471,18 @@ Error SymbolRecordMapping::visitKnownRecord(CVSymbol &CVR,
return Error::success();
}
+Error SymbolRecordMapping::visitKnownRecord(CVSymbol &CVR,
+ AnnotationSym &Annot) {
+
+ error(IO.mapInteger(Annot.CodeOffset));
+ error(IO.mapInteger(Annot.Segment));
+ error(IO.mapVectorN<uint16_t>(
+ Annot.Strings,
+ [](CodeViewRecordIO &IO, StringRef &S) { return IO.mapStringZ(S); }));
+
+ return Error::success();
+}
+
RegisterId codeview::decodeFramePtrReg(EncodedFramePtrReg EncodedReg,
CPUType CPU) {
assert(unsigned(EncodedReg) < 4);
diff --git a/lib/DebugInfo/CodeView/SymbolSerializer.cpp b/lib/DebugInfo/CodeView/SymbolSerializer.cpp
index 0071ecc85685..de9bb42b1798 100644
--- a/lib/DebugInfo/CodeView/SymbolSerializer.cpp
+++ b/lib/DebugInfo/CodeView/SymbolSerializer.cpp
@@ -1,9 +1,8 @@
//===- SymbolSerializer.cpp -----------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/CodeView/TypeDumpVisitor.cpp b/lib/DebugInfo/CodeView/TypeDumpVisitor.cpp
index f5d3bea43a14..d5fea5ee5e29 100644
--- a/lib/DebugInfo/CodeView/TypeDumpVisitor.cpp
+++ b/lib/DebugInfo/CodeView/TypeDumpVisitor.cpp
@@ -1,9 +1,8 @@
//===-- TypeDumpVisitor.cpp - CodeView type info dumper ----------*- C++-*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -172,11 +171,11 @@ Error TypeDumpVisitor::visitTypeBegin(CVType &Record) {
}
Error TypeDumpVisitor::visitTypeBegin(CVType &Record, TypeIndex Index) {
- W->startLine() << getLeafTypeName(Record.Type);
+ W->startLine() << getLeafTypeName(Record.kind());
W->getOStream() << " (" << HexNumber(Index.getIndex()) << ")";
W->getOStream() << " {\n";
W->indent();
- W->printEnum("TypeLeafKind", unsigned(Record.Type),
+ W->printEnum("TypeLeafKind", unsigned(Record.kind()),
makeArrayRef(LeafTypeNames));
return Error::success();
}
diff --git a/lib/DebugInfo/CodeView/TypeHashing.cpp b/lib/DebugInfo/CodeView/TypeHashing.cpp
index 826faef35875..2dbc11a84f0b 100644
--- a/lib/DebugInfo/CodeView/TypeHashing.cpp
+++ b/lib/DebugInfo/CodeView/TypeHashing.cpp
@@ -1,9 +1,8 @@
//===- TypeHashing.cpp -------------------------------------------*- C++-*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -55,10 +54,16 @@ GloballyHashedType::hashType(ArrayRef<uint8_t> RecordData,
reinterpret_cast<const TypeIndex *>(RefData.data()), Ref.Count);
for (TypeIndex TI : Indices) {
ArrayRef<uint8_t> BytesToHash;
- if (TI.isSimple() || TI.isNoneType() || TI.toArrayIndex() >= Prev.size()) {
+ if (TI.isSimple() || TI.isNoneType()) {
const uint8_t *IndexBytes = reinterpret_cast<const uint8_t *>(&TI);
BytesToHash = makeArrayRef(IndexBytes, sizeof(TypeIndex));
} else {
+ if (TI.toArrayIndex() >= Prev.size() ||
+ Prev[TI.toArrayIndex()].empty()) {
+ // There are references to yet-unhashed records. Suspend hashing for
+ // this record until all the other records are processed.
+ return {};
+ }
BytesToHash = Prev[TI.toArrayIndex()].Hash;
}
S.update(BytesToHash);
diff --git a/lib/DebugInfo/CodeView/TypeIndex.cpp b/lib/DebugInfo/CodeView/TypeIndex.cpp
index 332d67470da5..604d342448d3 100644
--- a/lib/DebugInfo/CodeView/TypeIndex.cpp
+++ b/lib/DebugInfo/CodeView/TypeIndex.cpp
@@ -1,9 +1,8 @@
//===-- TypeIndex.cpp - CodeView type index ---------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp b/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp
index 839ab6f0a705..e84e1c9cea78 100644
--- a/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp
+++ b/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp
@@ -1,9 +1,8 @@
//===- TypeIndexDiscovery.cpp -----------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h"
@@ -364,14 +363,16 @@ static bool discoverTypeIndices(ArrayRef<uint8_t> Content, SymbolKind Kind,
// values. One idea is to define some structures representing these types
// that would allow the use of offsetof().
switch (Kind) {
- case SymbolKind::S_GPROC32:
- case SymbolKind::S_LPROC32:
case SymbolKind::S_GPROC32_ID:
case SymbolKind::S_LPROC32_ID:
case SymbolKind::S_LPROC32_DPC:
case SymbolKind::S_LPROC32_DPC_ID:
Refs.push_back({TiRefKind::IndexRef, 24, 1}); // LF_FUNC_ID
break;
+ case SymbolKind::S_GPROC32:
+ case SymbolKind::S_LPROC32:
+ Refs.push_back({TiRefKind::TypeRef, 24, 1}); // Type
+ break;
case SymbolKind::S_UDT:
Refs.push_back({TiRefKind::TypeRef, 0, 1}); // UDT
break;
diff --git a/lib/DebugInfo/CodeView/TypeRecordHelpers.cpp b/lib/DebugInfo/CodeView/TypeRecordHelpers.cpp
index 2a66474cf5b6..8e632f3be460 100644
--- a/lib/DebugInfo/CodeView/TypeRecordHelpers.cpp
+++ b/lib/DebugInfo/CodeView/TypeRecordHelpers.cpp
@@ -1,9 +1,8 @@
//===- TypeRecordHelpers.cpp ------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/CodeView/TypeRecordMapping.cpp b/lib/DebugInfo/CodeView/TypeRecordMapping.cpp
index 3203ff64d3b1..47928c2eef64 100644
--- a/lib/DebugInfo/CodeView/TypeRecordMapping.cpp
+++ b/lib/DebugInfo/CodeView/TypeRecordMapping.cpp
@@ -1,9 +1,8 @@
//===- TypeRecordMapping.cpp ------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -22,19 +21,19 @@ struct MapOneMethodRecord {
: IsFromOverloadList(IsFromOverloadList) {}
Error operator()(CodeViewRecordIO &IO, OneMethodRecord &Method) const {
- error(IO.mapInteger(Method.Attrs.Attrs));
+ error(IO.mapInteger(Method.Attrs.Attrs, "AccessSpecifier"));
if (IsFromOverloadList) {
uint16_t Padding = 0;
- error(IO.mapInteger(Padding));
+ error(IO.mapInteger(Padding, "Padding"));
}
- error(IO.mapInteger(Method.Type));
+ error(IO.mapInteger(Method.Type, "Type"));
if (Method.isIntroducingVirtual()) {
- error(IO.mapInteger(Method.VFTableOffset));
- } else if (!IO.isWriting())
+ error(IO.mapInteger(Method.VFTableOffset, "VFTableOffset"));
+ } else if (IO.isReading())
Method.VFTableOffset = -1;
if (!IsFromOverloadList)
- error(IO.mapStringZ(Method.Name));
+ error(IO.mapStringZ(Method.Name, "Name"));
return Error::success();
}
@@ -73,9 +72,12 @@ static Error mapNameAndUniqueName(CodeViewRecordIO &IO, StringRef &Name,
error(IO.mapStringZ(N));
}
} else {
- error(IO.mapStringZ(Name));
+ // Reading & Streaming mode come after writing mode is executed for each
+ // record. Truncating large names are done during writing, so its not
+ // necessary to do it while reading or streaming.
+ error(IO.mapStringZ(Name, "Name"));
if (HasUniqueName)
- error(IO.mapStringZ(UniqueName));
+ error(IO.mapStringZ(UniqueName, "LinkageName"));
}
return Error::success();
@@ -89,14 +91,18 @@ Error TypeRecordMapping::visitTypeBegin(CVType &CVR) {
// split with continuation records. All other record types cannot be
// longer than the maximum record length.
Optional<uint32_t> MaxLen;
- if (CVR.Type != TypeLeafKind::LF_FIELDLIST &&
- CVR.Type != TypeLeafKind::LF_METHODLIST)
+ if (CVR.kind() != TypeLeafKind::LF_FIELDLIST &&
+ CVR.kind() != TypeLeafKind::LF_METHODLIST)
MaxLen = MaxRecordLength - sizeof(RecordPrefix);
error(IO.beginRecord(MaxLen));
- TypeKind = CVR.Type;
+ TypeKind = CVR.kind();
return Error::success();
}
+Error TypeRecordMapping::visitTypeBegin(CVType &CVR, TypeIndex Index) {
+ return visitTypeBegin(CVR);
+}
+
Error TypeRecordMapping::visitTypeEnd(CVType &Record) {
assert(TypeKind.hasValue() && "Not in a type mapping!");
assert(!MemberKind.hasValue() && "Still in a member mapping!");
@@ -127,7 +133,7 @@ Error TypeRecordMapping::visitMemberEnd(CVMemberRecord &Record) {
assert(TypeKind.hasValue() && "Not in a type mapping!");
assert(MemberKind.hasValue() && "Not in a member mapping!");
- if (!IO.isWriting()) {
+ if (IO.isReading()) {
if (auto EC = IO.skipPadding())
return EC;
}
@@ -138,33 +144,32 @@ Error TypeRecordMapping::visitMemberEnd(CVMemberRecord &Record) {
}
Error TypeRecordMapping::visitKnownRecord(CVType &CVR, ModifierRecord &Record) {
- error(IO.mapInteger(Record.ModifiedType));
- error(IO.mapEnum(Record.Modifiers));
-
+ error(IO.mapInteger(Record.ModifiedType, "ModifiedType"));
+ error(IO.mapEnum(Record.Modifiers, "Modifiers"));
return Error::success();
}
Error TypeRecordMapping::visitKnownRecord(CVType &CVR,
ProcedureRecord &Record) {
- error(IO.mapInteger(Record.ReturnType));
- error(IO.mapEnum(Record.CallConv));
- error(IO.mapEnum(Record.Options));
- error(IO.mapInteger(Record.ParameterCount));
- error(IO.mapInteger(Record.ArgumentList));
+ error(IO.mapInteger(Record.ReturnType, "ReturnType"));
+ error(IO.mapEnum(Record.CallConv, "CallingConvention"));
+ error(IO.mapEnum(Record.Options, "FunctionOptions"));
+ error(IO.mapInteger(Record.ParameterCount, "NumParameters"));
+ error(IO.mapInteger(Record.ArgumentList, "ArgListType"));
return Error::success();
}
Error TypeRecordMapping::visitKnownRecord(CVType &CVR,
MemberFunctionRecord &Record) {
- error(IO.mapInteger(Record.ReturnType));
- error(IO.mapInteger(Record.ClassType));
- error(IO.mapInteger(Record.ThisType));
- error(IO.mapEnum(Record.CallConv));
- error(IO.mapEnum(Record.Options));
- error(IO.mapInteger(Record.ParameterCount));
- error(IO.mapInteger(Record.ArgumentList));
- error(IO.mapInteger(Record.ThisPointerAdjustment));
+ error(IO.mapInteger(Record.ReturnType, "ReturnType"));
+ error(IO.mapInteger(Record.ClassType, "ClassType"));
+ error(IO.mapInteger(Record.ThisType, "ThisType"));
+ error(IO.mapEnum(Record.CallConv, "CallingConvention"));
+ error(IO.mapEnum(Record.Options, "FunctionOptions"));
+ error(IO.mapInteger(Record.ParameterCount, "NumParameters"));
+ error(IO.mapInteger(Record.ArgumentList, "ArgListType"));
+ error(IO.mapInteger(Record.ThisPointerAdjustment, "ThisAdjustment"));
return Error::success();
}
@@ -172,8 +177,10 @@ Error TypeRecordMapping::visitKnownRecord(CVType &CVR,
Error TypeRecordMapping::visitKnownRecord(CVType &CVR, ArgListRecord &Record) {
error(IO.mapVectorN<uint32_t>(
Record.ArgIndices,
- [](CodeViewRecordIO &IO, TypeIndex &N) { return IO.mapInteger(N); }));
-
+ [](CodeViewRecordIO &IO, TypeIndex &N) {
+ return IO.mapInteger(N, "Argument");
+ },
+ "NumArgs"));
return Error::success();
}
@@ -181,47 +188,50 @@ Error TypeRecordMapping::visitKnownRecord(CVType &CVR,
StringListRecord &Record) {
error(IO.mapVectorN<uint32_t>(
Record.StringIndices,
- [](CodeViewRecordIO &IO, TypeIndex &N) { return IO.mapInteger(N); }));
+ [](CodeViewRecordIO &IO, TypeIndex &N) {
+ return IO.mapInteger(N, "Strings");
+ },
+ "NumStrings"));
return Error::success();
}
Error TypeRecordMapping::visitKnownRecord(CVType &CVR, PointerRecord &Record) {
- error(IO.mapInteger(Record.ReferentType));
- error(IO.mapInteger(Record.Attrs));
+ error(IO.mapInteger(Record.ReferentType, "PointeeType"));
+ error(IO.mapInteger(Record.Attrs, "Attributes"));
if (Record.isPointerToMember()) {
- if (!IO.isWriting())
+ if (IO.isReading())
Record.MemberInfo.emplace();
MemberPointerInfo &M = *Record.MemberInfo;
- error(IO.mapInteger(M.ContainingType));
- error(IO.mapEnum(M.Representation));
+ error(IO.mapInteger(M.ContainingType, "ClassType"));
+ error(IO.mapEnum(M.Representation, "Representation"));
}
return Error::success();
}
Error TypeRecordMapping::visitKnownRecord(CVType &CVR, ArrayRecord &Record) {
- error(IO.mapInteger(Record.ElementType));
- error(IO.mapInteger(Record.IndexType));
- error(IO.mapEncodedInteger(Record.Size));
- error(IO.mapStringZ(Record.Name));
+ error(IO.mapInteger(Record.ElementType, "ElementType"));
+ error(IO.mapInteger(Record.IndexType, "IndexType"));
+ error(IO.mapEncodedInteger(Record.Size, "SizeOf"));
+ error(IO.mapStringZ(Record.Name, "Name"));
return Error::success();
}
Error TypeRecordMapping::visitKnownRecord(CVType &CVR, ClassRecord &Record) {
- assert((CVR.Type == TypeLeafKind::LF_STRUCTURE) ||
- (CVR.Type == TypeLeafKind::LF_CLASS) ||
- (CVR.Type == TypeLeafKind::LF_INTERFACE));
-
- error(IO.mapInteger(Record.MemberCount));
- error(IO.mapEnum(Record.Options));
- error(IO.mapInteger(Record.FieldList));
- error(IO.mapInteger(Record.DerivationList));
- error(IO.mapInteger(Record.VTableShape));
- error(IO.mapEncodedInteger(Record.Size));
+ assert((CVR.kind() == TypeLeafKind::LF_STRUCTURE) ||
+ (CVR.kind() == TypeLeafKind::LF_CLASS) ||
+ (CVR.kind() == TypeLeafKind::LF_INTERFACE));
+
+ error(IO.mapInteger(Record.MemberCount, "MemberCount"));
+ error(IO.mapEnum(Record.Options, "Properties"));
+ error(IO.mapInteger(Record.FieldList, "FieldList"));
+ error(IO.mapInteger(Record.DerivationList, "DerivedFrom"));
+ error(IO.mapInteger(Record.VTableShape, "VShape"));
+ error(IO.mapEncodedInteger(Record.Size, "SizeOf"));
error(mapNameAndUniqueName(IO, Record.Name, Record.UniqueName,
Record.hasUniqueName()));
@@ -229,10 +239,10 @@ Error TypeRecordMapping::visitKnownRecord(CVType &CVR, ClassRecord &Record) {
}
Error TypeRecordMapping::visitKnownRecord(CVType &CVR, UnionRecord &Record) {
- error(IO.mapInteger(Record.MemberCount));
- error(IO.mapEnum(Record.Options));
- error(IO.mapInteger(Record.FieldList));
- error(IO.mapEncodedInteger(Record.Size));
+ error(IO.mapInteger(Record.MemberCount, "MemberCount"));
+ error(IO.mapEnum(Record.Options, "Properties"));
+ error(IO.mapInteger(Record.FieldList, "FieldList"));
+ error(IO.mapEncodedInteger(Record.Size, "SizeOf"));
error(mapNameAndUniqueName(IO, Record.Name, Record.UniqueName,
Record.hasUniqueName()));
@@ -240,10 +250,10 @@ Error TypeRecordMapping::visitKnownRecord(CVType &CVR, UnionRecord &Record) {
}
Error TypeRecordMapping::visitKnownRecord(CVType &CVR, EnumRecord &Record) {
- error(IO.mapInteger(Record.MemberCount));
- error(IO.mapEnum(Record.Options));
- error(IO.mapInteger(Record.UnderlyingType));
- error(IO.mapInteger(Record.FieldList));
+ error(IO.mapInteger(Record.MemberCount, "NumEnumerators"));
+ error(IO.mapEnum(Record.Options, "Properties"));
+ error(IO.mapInteger(Record.UnderlyingType, "UnderlyingType"));
+ error(IO.mapInteger(Record.FieldList, "FieldListType"));
error(mapNameAndUniqueName(IO, Record.Name, Record.UniqueName,
Record.hasUniqueName()));
@@ -251,9 +261,9 @@ Error TypeRecordMapping::visitKnownRecord(CVType &CVR, EnumRecord &Record) {
}
Error TypeRecordMapping::visitKnownRecord(CVType &CVR, BitFieldRecord &Record) {
- error(IO.mapInteger(Record.Type));
- error(IO.mapInteger(Record.BitSize));
- error(IO.mapInteger(Record.BitOffset));
+ error(IO.mapInteger(Record.Type, "Type"));
+ error(IO.mapInteger(Record.BitSize, "BitSize"));
+ error(IO.mapInteger(Record.BitOffset, "BitOffset"));
return Error::success();
}
@@ -261,10 +271,10 @@ Error TypeRecordMapping::visitKnownRecord(CVType &CVR, BitFieldRecord &Record) {
Error TypeRecordMapping::visitKnownRecord(CVType &CVR,
VFTableShapeRecord &Record) {
uint16_t Size;
- if (IO.isWriting()) {
+ if (!IO.isReading()) {
ArrayRef<VFTableSlotKind> Slots = Record.getSlots();
Size = Slots.size();
- error(IO.mapInteger(Size));
+ error(IO.mapInteger(Size, "VFEntryCount"));
for (size_t SlotIndex = 0; SlotIndex < Slots.size(); SlotIndex += 2) {
uint8_t Byte = static_cast<uint8_t>(Slots[SlotIndex]) << 4;
@@ -288,61 +298,64 @@ Error TypeRecordMapping::visitKnownRecord(CVType &CVR,
}
Error TypeRecordMapping::visitKnownRecord(CVType &CVR, VFTableRecord &Record) {
- error(IO.mapInteger(Record.CompleteClass));
- error(IO.mapInteger(Record.OverriddenVFTable));
- error(IO.mapInteger(Record.VFPtrOffset));
+ error(IO.mapInteger(Record.CompleteClass, "CompleteClass"));
+ error(IO.mapInteger(Record.OverriddenVFTable, "OverriddenVFTable"));
+ error(IO.mapInteger(Record.VFPtrOffset, "VFPtrOffset"));
uint32_t NamesLen = 0;
- if (IO.isWriting()) {
+ if (!IO.isReading()) {
for (auto Name : Record.MethodNames)
NamesLen += Name.size() + 1;
}
error(IO.mapInteger(NamesLen));
error(IO.mapVectorTail(
Record.MethodNames,
- [](CodeViewRecordIO &IO, StringRef &S) { return IO.mapStringZ(S); }));
+ [](CodeViewRecordIO &IO, StringRef &S) {
+ return IO.mapStringZ(S, "MethodName");
+ },
+ "VFTableName"));
return Error::success();
}
Error TypeRecordMapping::visitKnownRecord(CVType &CVR, StringIdRecord &Record) {
- error(IO.mapInteger(Record.Id));
- error(IO.mapStringZ(Record.String));
+ error(IO.mapInteger(Record.Id, "Id"));
+ error(IO.mapStringZ(Record.String, "StringData"));
return Error::success();
}
Error TypeRecordMapping::visitKnownRecord(CVType &CVR,
UdtSourceLineRecord &Record) {
- error(IO.mapInteger(Record.UDT));
- error(IO.mapInteger(Record.SourceFile));
- error(IO.mapInteger(Record.LineNumber));
+ error(IO.mapInteger(Record.UDT, "UDT"));
+ error(IO.mapInteger(Record.SourceFile, "SourceFile"));
+ error(IO.mapInteger(Record.LineNumber, "LineNumber"));
return Error::success();
}
Error TypeRecordMapping::visitKnownRecord(CVType &CVR,
UdtModSourceLineRecord &Record) {
- error(IO.mapInteger(Record.UDT));
- error(IO.mapInteger(Record.SourceFile));
- error(IO.mapInteger(Record.LineNumber));
- error(IO.mapInteger(Record.Module));
+ error(IO.mapInteger(Record.UDT, "UDT"));
+ error(IO.mapInteger(Record.SourceFile, "SourceFile"));
+ error(IO.mapInteger(Record.LineNumber, "LineNumber"));
+ error(IO.mapInteger(Record.Module, "Module"));
return Error::success();
}
Error TypeRecordMapping::visitKnownRecord(CVType &CVR, FuncIdRecord &Record) {
- error(IO.mapInteger(Record.ParentScope));
- error(IO.mapInteger(Record.FunctionType));
- error(IO.mapStringZ(Record.Name));
+ error(IO.mapInteger(Record.ParentScope, "ParentScope"));
+ error(IO.mapInteger(Record.FunctionType, "FunctionType"));
+ error(IO.mapStringZ(Record.Name, "Name"));
return Error::success();
}
Error TypeRecordMapping::visitKnownRecord(CVType &CVR,
MemberFuncIdRecord &Record) {
- error(IO.mapInteger(Record.ClassType));
- error(IO.mapInteger(Record.FunctionType));
- error(IO.mapStringZ(Record.Name));
+ error(IO.mapInteger(Record.ClassType, "ClassType"));
+ error(IO.mapInteger(Record.FunctionType, "FunctionType"));
+ error(IO.mapStringZ(Record.Name, "Name"));
return Error::success();
}
@@ -351,7 +364,10 @@ Error TypeRecordMapping::visitKnownRecord(CVType &CVR,
BuildInfoRecord &Record) {
error(IO.mapVectorN<uint16_t>(
Record.ArgIndices,
- [](CodeViewRecordIO &IO, TypeIndex &N) { return IO.mapInteger(N); }));
+ [](CodeViewRecordIO &IO, TypeIndex &N) {
+ return IO.mapInteger(N, "Argument");
+ },
+ "NumArgs"));
return Error::success();
}
@@ -360,7 +376,7 @@ Error TypeRecordMapping::visitKnownRecord(CVType &CVR,
MethodOverloadListRecord &Record) {
// TODO: Split the list into multiple records if it's longer than 64KB, using
// a subrecord of TypeRecordKind::Index to chain the records together.
- error(IO.mapVectorTail(Record.Methods, MapOneMethodRecord(true)));
+ error(IO.mapVectorTail(Record.Methods, MapOneMethodRecord(true), "Method"));
return Error::success();
}
@@ -374,22 +390,22 @@ Error TypeRecordMapping::visitKnownRecord(CVType &CVR,
Error TypeRecordMapping::visitKnownRecord(CVType &CVR,
TypeServer2Record &Record) {
- error(IO.mapGuid(Record.Guid));
- error(IO.mapInteger(Record.Age));
- error(IO.mapStringZ(Record.Name));
+ error(IO.mapGuid(Record.Guid, "Guid"));
+ error(IO.mapInteger(Record.Age, "Age"));
+ error(IO.mapStringZ(Record.Name, "Name"));
return Error::success();
}
Error TypeRecordMapping::visitKnownRecord(CVType &CVR, LabelRecord &Record) {
- error(IO.mapEnum(Record.Mode));
+ error(IO.mapEnum(Record.Mode, "Mode"));
return Error::success();
}
Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR,
BaseClassRecord &Record) {
- error(IO.mapInteger(Record.Attrs.Attrs));
- error(IO.mapInteger(Record.Type));
- error(IO.mapEncodedInteger(Record.Offset));
+ error(IO.mapInteger(Record.Attrs.Attrs, "AccessSpecifier"));
+ error(IO.mapInteger(Record.Type, "BaseType"));
+ error(IO.mapEncodedInteger(Record.Offset, "BaseOffset"));
return Error::success();
}
@@ -399,27 +415,27 @@ Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR,
error(IO.mapInteger(Record.Attrs.Attrs));
// FIXME: Handle full APInt such as __int128.
- error(IO.mapEncodedInteger(Record.Value));
- error(IO.mapStringZ(Record.Name));
+ error(IO.mapEncodedInteger(Record.Value, "EnumValue"));
+ error(IO.mapStringZ(Record.Name, "Name"));
return Error::success();
}
Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR,
DataMemberRecord &Record) {
- error(IO.mapInteger(Record.Attrs.Attrs));
- error(IO.mapInteger(Record.Type));
- error(IO.mapEncodedInteger(Record.FieldOffset));
- error(IO.mapStringZ(Record.Name));
+ error(IO.mapInteger(Record.Attrs.Attrs, "AccessSpecifier"));
+ error(IO.mapInteger(Record.Type, "Type"));
+ error(IO.mapEncodedInteger(Record.FieldOffset, "FieldOffset"));
+ error(IO.mapStringZ(Record.Name, "Name"));
return Error::success();
}
Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR,
OverloadedMethodRecord &Record) {
- error(IO.mapInteger(Record.NumOverloads));
- error(IO.mapInteger(Record.MethodList));
- error(IO.mapStringZ(Record.Name));
+ error(IO.mapInteger(Record.NumOverloads, "MethodCount"));
+ error(IO.mapInteger(Record.MethodList, "MethodListIndex"));
+ error(IO.mapStringZ(Record.Name, "Name"));
return Error::success();
}
@@ -434,9 +450,9 @@ Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR,
Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR,
NestedTypeRecord &Record) {
uint16_t Padding = 0;
- error(IO.mapInteger(Padding));
- error(IO.mapInteger(Record.Type));
- error(IO.mapStringZ(Record.Name));
+ error(IO.mapInteger(Padding, "Padding"));
+ error(IO.mapInteger(Record.Type, "Type"));
+ error(IO.mapStringZ(Record.Name, "Name"));
return Error::success();
}
@@ -444,9 +460,9 @@ Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR,
Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR,
StaticDataMemberRecord &Record) {
- error(IO.mapInteger(Record.Attrs.Attrs));
- error(IO.mapInteger(Record.Type));
- error(IO.mapStringZ(Record.Name));
+ error(IO.mapInteger(Record.Attrs.Attrs, "AccessSpecifier"));
+ error(IO.mapInteger(Record.Type, "Type"));
+ error(IO.mapStringZ(Record.Name, "Name"));
return Error::success();
}
@@ -454,11 +470,11 @@ Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR,
Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR,
VirtualBaseClassRecord &Record) {
- error(IO.mapInteger(Record.Attrs.Attrs));
- error(IO.mapInteger(Record.BaseType));
- error(IO.mapInteger(Record.VBPtrType));
- error(IO.mapEncodedInteger(Record.VBPtrOffset));
- error(IO.mapEncodedInteger(Record.VTableIndex));
+ error(IO.mapInteger(Record.Attrs.Attrs, "AccessSpecifier"));
+ error(IO.mapInteger(Record.BaseType, "BaseType"));
+ error(IO.mapInteger(Record.VBPtrType, "VBPtrType"));
+ error(IO.mapEncodedInteger(Record.VBPtrOffset, "VBPtrOffset"));
+ error(IO.mapEncodedInteger(Record.VTableIndex, "VBTableIndex"));
return Error::success();
}
@@ -466,8 +482,8 @@ Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR,
Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR,
VFPtrRecord &Record) {
uint16_t Padding = 0;
- error(IO.mapInteger(Padding));
- error(IO.mapInteger(Record.Type));
+ error(IO.mapInteger(Padding, "Padding"));
+ error(IO.mapInteger(Record.Type, "Type"));
return Error::success();
}
@@ -475,23 +491,23 @@ Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR,
Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR,
ListContinuationRecord &Record) {
uint16_t Padding = 0;
- error(IO.mapInteger(Padding));
- error(IO.mapInteger(Record.ContinuationIndex));
+ error(IO.mapInteger(Padding, "Padding"));
+ error(IO.mapInteger(Record.ContinuationIndex, "ContinuationIndex"));
return Error::success();
}
Error TypeRecordMapping::visitKnownRecord(CVType &CVR,
PrecompRecord &Precomp) {
- error(IO.mapInteger(Precomp.StartTypeIndex));
- error(IO.mapInteger(Precomp.TypesCount));
- error(IO.mapInteger(Precomp.Signature));
- error(IO.mapStringZ(Precomp.PrecompFilePath));
+ error(IO.mapInteger(Precomp.StartTypeIndex, "StartIndex"));
+ error(IO.mapInteger(Precomp.TypesCount, "Count"));
+ error(IO.mapInteger(Precomp.Signature, "Signature"));
+ error(IO.mapStringZ(Precomp.PrecompFilePath, "PrecompFile"));
return Error::success();
}
Error TypeRecordMapping::visitKnownRecord(CVType &CVR,
EndPrecompRecord &EndPrecomp) {
- error(IO.mapInteger(EndPrecomp.Signature));
+ error(IO.mapInteger(EndPrecomp.Signature, "Signature"));
return Error::success();
}
diff --git a/lib/DebugInfo/CodeView/TypeStreamMerger.cpp b/lib/DebugInfo/CodeView/TypeStreamMerger.cpp
index bae11ce6a6a1..aba0e96d606e 100644
--- a/lib/DebugInfo/CodeView/TypeStreamMerger.cpp
+++ b/lib/DebugInfo/CodeView/TypeStreamMerger.cpp
@@ -1,9 +1,8 @@
//===-- TypeStreamMerger.cpp ------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/CodeView/TypeTableCollection.cpp b/lib/DebugInfo/CodeView/TypeTableCollection.cpp
index cf951baa5111..e13068b5b1eb 100644
--- a/lib/DebugInfo/CodeView/TypeTableCollection.cpp
+++ b/lib/DebugInfo/CodeView/TypeTableCollection.cpp
@@ -1,9 +1,8 @@
//===- TypeTableCollection.cpp -------------------------------- *- C++ --*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -37,11 +36,7 @@ Optional<TypeIndex> TypeTableCollection::getNext(TypeIndex Prev) {
CVType TypeTableCollection::getType(TypeIndex Index) {
assert(Index.toArrayIndex() < Records.size());
- ArrayRef<uint8_t> Bytes = Records[Index.toArrayIndex()];
- const RecordPrefix *Prefix =
- reinterpret_cast<const RecordPrefix *>(Bytes.data());
- TypeLeafKind Kind = static_cast<TypeLeafKind>(uint16_t(Prefix->RecordKind));
- return CVType(Kind, Bytes);
+ return CVType(Records[Index.toArrayIndex()]);
}
StringRef TypeTableCollection::getTypeName(TypeIndex Index) {
diff --git a/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp b/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp
index f49ab40fad9a..f4dd79937608 100644
--- a/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp
+++ b/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp
@@ -1,9 +1,8 @@
//===- DWARFAbbreviationDeclaration.cpp -----------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -164,11 +163,11 @@ Optional<DWARFFormValue> DWARFAbbreviationDeclaration::getAttributeValue(
for (const auto &Spec : AttributeSpecs) {
if (*MatchAttrIndex == AttrIndex) {
// We have arrived at the attribute to extract, extract if from Offset.
+ if (Spec.isImplicitConst())
+ return DWARFFormValue::createFromSValue(Spec.Form,
+ Spec.getImplicitConstValue());
+
DWARFFormValue FormValue(Spec.Form);
- if (Spec.isImplicitConst()) {
- FormValue.setSValue(Spec.getImplicitConstValue());
- return FormValue;
- }
if (FormValue.extractValue(DebugInfoData, &Offset, U.getFormParams(), &U))
return FormValue;
}
diff --git a/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp b/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp
index 54daf34ff253..0721efb40f6a 100644
--- a/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp
+++ b/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp
@@ -1,9 +1,8 @@
//===- DWARFAcceleratorTable.cpp ------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -42,7 +41,7 @@ static Atom formatAtom(unsigned Atom) { return {Atom}; }
DWARFAcceleratorTable::~DWARFAcceleratorTable() = default;
-llvm::Error AppleAcceleratorTable::extract() {
+Error AppleAcceleratorTable::extract() {
uint32_t Offset = 0;
// Check that we can at least read the header.
@@ -377,7 +376,7 @@ void DWARFDebugNames::Header::dump(ScopedPrinter &W) const {
W.startLine() << "Augmentation: '" << AugmentationString << "'\n";
}
-llvm::Error DWARFDebugNames::Header::extract(const DWARFDataExtractor &AS,
+Error DWARFDebugNames::Header::extract(const DWARFDataExtractor &AS,
uint32_t *Offset) {
// Check that we can read the fixed-size part.
if (!AS.isValidOffset(*Offset + sizeof(HeaderPOD) - 1))
@@ -519,6 +518,7 @@ Error DWARFDebugNames::NameIndex::extract() {
"Duplicate abbreviation code.");
}
}
+
DWARFDebugNames::Entry::Entry(const NameIndex &NameIdx, const Abbrev &Abbr)
: NameIdx(&NameIdx), Abbr(&Abbr) {
// This merely creates form values. It is up to the caller
@@ -585,13 +585,14 @@ uint32_t DWARFDebugNames::NameIndex::getCUOffset(uint32_t CU) const {
uint32_t DWARFDebugNames::NameIndex::getLocalTUOffset(uint32_t TU) const {
assert(TU < Hdr.LocalTypeUnitCount);
- uint32_t Offset = CUsBase + Hdr.CompUnitCount * 4;
+ uint32_t Offset = CUsBase + 4 * (Hdr.CompUnitCount + TU);
return Section.AccelSection.getRelocatedValue(4, &Offset);
}
uint64_t DWARFDebugNames::NameIndex::getForeignTUSignature(uint32_t TU) const {
assert(TU < Hdr.ForeignTypeUnitCount);
- uint32_t Offset = CUsBase + (Hdr.CompUnitCount + Hdr.LocalTypeUnitCount) * 4;
+ uint32_t Offset =
+ CUsBase + 4 * (Hdr.CompUnitCount + Hdr.LocalTypeUnitCount) + 8 * TU;
return Section.AccelSection.getU64(&Offset);
}
@@ -754,11 +755,11 @@ LLVM_DUMP_METHOD void DWARFDebugNames::NameIndex::dump(ScopedPrinter &W) const {
dumpName(W, NTE, None);
}
-llvm::Error DWARFDebugNames::extract() {
+Error DWARFDebugNames::extract() {
uint32_t Offset = 0;
while (AccelSection.isValidOffset(Offset)) {
NameIndex Next(*this, Offset);
- if (llvm::Error E = Next.extract())
+ if (Error E = Next.extract())
return E;
Offset = Next.getNextUnitOffset();
NameIndices.push_back(std::move(Next));
diff --git a/lib/DebugInfo/DWARF/DWARFAddressRange.cpp b/lib/DebugInfo/DWARF/DWARFAddressRange.cpp
index 86c8d19c02f4..ef6da08d34aa 100644
--- a/lib/DebugInfo/DWARF/DWARFAddressRange.cpp
+++ b/lib/DebugInfo/DWARF/DWARFAddressRange.cpp
@@ -1,9 +1,8 @@
//===- DWARFDebugAranges.cpp ------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp b/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp
index 00a23b3898fa..74cce42466dd 100644
--- a/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp
+++ b/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp
@@ -1,9 +1,8 @@
//===-- DWARFCompileUnit.cpp ----------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/DWARF/DWARFContext.cpp b/lib/DebugInfo/DWARF/DWARFContext.cpp
index e6620ee3dd1d..5ede9bf59619 100644
--- a/lib/DebugInfo/DWARF/DWARFContext.cpp
+++ b/lib/DebugInfo/DWARF/DWARFContext.cpp
@@ -1,9 +1,8 @@
//===- DWARFContext.cpp ---------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -37,11 +36,12 @@
#include "llvm/Object/Decompressor.h"
#include "llvm/Object/MachO.h"
#include "llvm/Object/ObjectFile.h"
-#include "llvm/Object/RelocVisitor.h"
+#include "llvm/Object/RelocationResolver.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/DataExtractor.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/Format.h"
+#include "llvm/Support/LEB128.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/TargetRegistry.h"
@@ -102,7 +102,8 @@ static ContributionCollection
collectContributionData(DWARFContext::unit_iterator_range Units) {
ContributionCollection Contributions;
for (const auto &U : Units)
- Contributions.push_back(U->getStringOffsetsTableContribution());
+ if (const auto &C = U->getStringOffsetsTableContribution())
+ Contributions.push_back(C);
// Sort the contributions so that any invalid ones are placed at
// the start of the contributions vector. This way they are reported
// first.
@@ -158,9 +159,9 @@ static void dumpDWARFv5StringOffsetsSection(
// Detect overlapping contributions.
if (Offset > ContributionHeader) {
- OS << "error: overlapping contributions to string offsets table in "
- "section ."
- << SectionName << ".\n";
+ WithColor::error()
+ << "overlapping contributions to string offsets table in section ."
+ << SectionName << ".\n";
return;
}
// Report a gap in the table.
@@ -269,11 +270,11 @@ static void dumpAddrSection(raw_ostream &OS, DWARFDataExtractor &AddrData,
}
// Dump the .debug_rnglists or .debug_rnglists.dwo section (DWARF v5).
-static void
-dumpRnglistsSection(raw_ostream &OS, DWARFDataExtractor &rnglistData,
- llvm::function_ref<Optional<SectionedAddress>(uint32_t)>
- LookupPooledAddress,
- DIDumpOptions DumpOpts) {
+static void dumpRnglistsSection(
+ raw_ostream &OS, DWARFDataExtractor &rnglistData,
+ llvm::function_ref<Optional<object::SectionedAddress>(uint32_t)>
+ LookupPooledAddress,
+ DIDumpOptions DumpOpts) {
uint32_t Offset = 0;
while (rnglistData.isValidOffset(Offset)) {
llvm::DWARFDebugRnglistTable Rnglists;
@@ -926,6 +927,9 @@ DWARFContext::DIEsForAddress DWARFContext::getDIEsForAddress(uint64_t Address) {
DWARFDie DIE = Worklist.back();
Worklist.pop_back();
+ if (!DIE.isValid())
+ continue;
+
if (DIE.getTag() == DW_TAG_lexical_block &&
DIE.addressRangeContainsAddress(Address)) {
Result.BlockDIE = DIE;
@@ -939,6 +943,8 @@ DWARFContext::DIEsForAddress DWARFContext::getDIEsForAddress(uint64_t Address) {
return Result;
}
+/// TODO: change input parameter from "uint64_t Address"
+/// into "SectionedAddress Address"
static bool getFunctionNameAndStartLineForAddress(DWARFCompileUnit *CU,
uint64_t Address,
FunctionNameKind Kind,
@@ -967,36 +973,155 @@ static bool getFunctionNameAndStartLineForAddress(DWARFCompileUnit *CU,
return FoundResult;
}
-DILineInfo DWARFContext::getLineInfoForAddress(uint64_t Address,
+static Optional<uint64_t> getTypeSize(DWARFDie Type, uint64_t PointerSize) {
+ if (auto SizeAttr = Type.find(DW_AT_byte_size))
+ if (Optional<uint64_t> Size = SizeAttr->getAsUnsignedConstant())
+ return Size;
+
+ switch (Type.getTag()) {
+ case DW_TAG_pointer_type:
+ case DW_TAG_reference_type:
+ case DW_TAG_rvalue_reference_type:
+ return PointerSize;
+ case DW_TAG_ptr_to_member_type: {
+ if (DWARFDie BaseType = Type.getAttributeValueAsReferencedDie(DW_AT_type))
+ if (BaseType.getTag() == DW_TAG_subroutine_type)
+ return 2 * PointerSize;
+ return PointerSize;
+ }
+ case DW_TAG_const_type:
+ case DW_TAG_volatile_type:
+ case DW_TAG_restrict_type:
+ case DW_TAG_typedef: {
+ if (DWARFDie BaseType = Type.getAttributeValueAsReferencedDie(DW_AT_type))
+ return getTypeSize(BaseType, PointerSize);
+ break;
+ }
+ case DW_TAG_array_type: {
+ DWARFDie BaseType = Type.getAttributeValueAsReferencedDie(DW_AT_type);
+ if (!BaseType)
+ return Optional<uint64_t>();
+ Optional<uint64_t> BaseSize = getTypeSize(BaseType, PointerSize);
+ if (!BaseSize)
+ return Optional<uint64_t>();
+ uint64_t Size = *BaseSize;
+ for (DWARFDie Child : Type) {
+ if (Child.getTag() != DW_TAG_subrange_type)
+ continue;
+
+ if (auto ElemCountAttr = Child.find(DW_AT_count))
+ if (Optional<uint64_t> ElemCount =
+ ElemCountAttr->getAsUnsignedConstant())
+ Size *= *ElemCount;
+ if (auto UpperBoundAttr = Child.find(DW_AT_upper_bound))
+ if (Optional<int64_t> UpperBound =
+ UpperBoundAttr->getAsSignedConstant()) {
+ int64_t LowerBound = 0;
+ if (auto LowerBoundAttr = Child.find(DW_AT_lower_bound))
+ LowerBound = LowerBoundAttr->getAsSignedConstant().getValueOr(0);
+ Size *= *UpperBound - LowerBound + 1;
+ }
+ }
+ return Size;
+ }
+ default:
+ break;
+ }
+ return Optional<uint64_t>();
+}
+
+void DWARFContext::addLocalsForDie(DWARFCompileUnit *CU, DWARFDie Subprogram,
+ DWARFDie Die, std::vector<DILocal> &Result) {
+ if (Die.getTag() == DW_TAG_variable ||
+ Die.getTag() == DW_TAG_formal_parameter) {
+ DILocal Local;
+ if (auto NameAttr = Subprogram.find(DW_AT_name))
+ if (Optional<const char *> Name = NameAttr->getAsCString())
+ Local.FunctionName = *Name;
+ if (auto LocationAttr = Die.find(DW_AT_location))
+ if (Optional<ArrayRef<uint8_t>> Location = LocationAttr->getAsBlock())
+ if (!Location->empty() && (*Location)[0] == DW_OP_fbreg)
+ Local.FrameOffset =
+ decodeSLEB128(Location->data() + 1, nullptr, Location->end());
+ if (auto TagOffsetAttr = Die.find(DW_AT_LLVM_tag_offset))
+ Local.TagOffset = TagOffsetAttr->getAsUnsignedConstant();
+
+ if (auto Origin =
+ Die.getAttributeValueAsReferencedDie(DW_AT_abstract_origin))
+ Die = Origin;
+ if (auto NameAttr = Die.find(DW_AT_name))
+ if (Optional<const char *> Name = NameAttr->getAsCString())
+ Local.Name = *Name;
+ if (auto Type = Die.getAttributeValueAsReferencedDie(DW_AT_type))
+ Local.Size = getTypeSize(Type, getCUAddrSize());
+ if (auto DeclFileAttr = Die.find(DW_AT_decl_file)) {
+ if (const auto *LT = CU->getContext().getLineTableForUnit(CU))
+ LT->getFileNameByIndex(
+ DeclFileAttr->getAsUnsignedConstant().getValue(),
+ CU->getCompilationDir(),
+ DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath,
+ Local.DeclFile);
+ }
+ if (auto DeclLineAttr = Die.find(DW_AT_decl_line))
+ Local.DeclLine = DeclLineAttr->getAsUnsignedConstant().getValue();
+
+ Result.push_back(Local);
+ return;
+ }
+
+ if (Die.getTag() == DW_TAG_inlined_subroutine)
+ if (auto Origin =
+ Die.getAttributeValueAsReferencedDie(DW_AT_abstract_origin))
+ Subprogram = Origin;
+
+ for (auto Child : Die)
+ addLocalsForDie(CU, Subprogram, Child, Result);
+}
+
+std::vector<DILocal>
+DWARFContext::getLocalsForAddress(object::SectionedAddress Address) {
+ std::vector<DILocal> Result;
+ DWARFCompileUnit *CU = getCompileUnitForAddress(Address.Address);
+ if (!CU)
+ return Result;
+
+ DWARFDie Subprogram = CU->getSubroutineForAddress(Address.Address);
+ if (Subprogram.isValid())
+ addLocalsForDie(CU, Subprogram, Subprogram, Result);
+ return Result;
+}
+
+DILineInfo DWARFContext::getLineInfoForAddress(object::SectionedAddress Address,
DILineInfoSpecifier Spec) {
DILineInfo Result;
- DWARFCompileUnit *CU = getCompileUnitForAddress(Address);
+ DWARFCompileUnit *CU = getCompileUnitForAddress(Address.Address);
if (!CU)
return Result;
- getFunctionNameAndStartLineForAddress(CU, Address, Spec.FNKind,
- Result.FunctionName,
- Result.StartLine);
+
+ getFunctionNameAndStartLineForAddress(CU, Address.Address, Spec.FNKind,
+ Result.FunctionName, Result.StartLine);
if (Spec.FLIKind != FileLineInfoKind::None) {
- if (const DWARFLineTable *LineTable = getLineTableForUnit(CU))
- LineTable->getFileLineInfoForAddress(Address, CU->getCompilationDir(),
- Spec.FLIKind, Result);
+ if (const DWARFLineTable *LineTable = getLineTableForUnit(CU)) {
+ LineTable->getFileLineInfoForAddress(
+ {Address.Address, Address.SectionIndex}, CU->getCompilationDir(),
+ Spec.FLIKind, Result);
+ }
}
return Result;
}
-DILineInfoTable
-DWARFContext::getLineInfoForAddressRange(uint64_t Address, uint64_t Size,
- DILineInfoSpecifier Spec) {
+DILineInfoTable DWARFContext::getLineInfoForAddressRange(
+ object::SectionedAddress Address, uint64_t Size, DILineInfoSpecifier Spec) {
DILineInfoTable Lines;
- DWARFCompileUnit *CU = getCompileUnitForAddress(Address);
+ DWARFCompileUnit *CU = getCompileUnitForAddress(Address.Address);
if (!CU)
return Lines;
std::string FunctionName = "<invalid>";
uint32_t StartLine = 0;
- getFunctionNameAndStartLineForAddress(CU, Address, Spec.FNKind, FunctionName,
- StartLine);
+ getFunctionNameAndStartLineForAddress(CU, Address.Address, Spec.FNKind,
+ FunctionName, StartLine);
// If the Specifier says we don't need FileLineInfo, just
// return the top-most function at the starting address.
@@ -1004,7 +1129,7 @@ DWARFContext::getLineInfoForAddressRange(uint64_t Address, uint64_t Size,
DILineInfo Result;
Result.FunctionName = FunctionName;
Result.StartLine = StartLine;
- Lines.push_back(std::make_pair(Address, Result));
+ Lines.push_back(std::make_pair(Address.Address, Result));
return Lines;
}
@@ -1012,8 +1137,10 @@ DWARFContext::getLineInfoForAddressRange(uint64_t Address, uint64_t Size,
// Get the index of row we're looking for in the line table.
std::vector<uint32_t> RowVector;
- if (!LineTable->lookupAddressRange(Address, Size, RowVector))
+ if (!LineTable->lookupAddressRange({Address.Address, Address.SectionIndex},
+ Size, RowVector)) {
return Lines;
+ }
for (uint32_t RowIndex : RowVector) {
// Take file number and line/column from the row.
@@ -1025,33 +1152,33 @@ DWARFContext::getLineInfoForAddressRange(uint64_t Address, uint64_t Size,
Result.Line = Row.Line;
Result.Column = Row.Column;
Result.StartLine = StartLine;
- Lines.push_back(std::make_pair(Row.Address, Result));
+ Lines.push_back(std::make_pair(Row.Address.Address, Result));
}
return Lines;
}
DIInliningInfo
-DWARFContext::getInliningInfoForAddress(uint64_t Address,
+DWARFContext::getInliningInfoForAddress(object::SectionedAddress Address,
DILineInfoSpecifier Spec) {
DIInliningInfo InliningInfo;
- DWARFCompileUnit *CU = getCompileUnitForAddress(Address);
+ DWARFCompileUnit *CU = getCompileUnitForAddress(Address.Address);
if (!CU)
return InliningInfo;
const DWARFLineTable *LineTable = nullptr;
SmallVector<DWARFDie, 4> InlinedChain;
- CU->getInlinedChainForAddress(Address, InlinedChain);
+ CU->getInlinedChainForAddress(Address.Address, InlinedChain);
if (InlinedChain.size() == 0) {
// If there is no DIE for address (e.g. it is in unavailable .dwo file),
// try to at least get file/line info from symbol table.
if (Spec.FLIKind != FileLineInfoKind::None) {
DILineInfo Frame;
LineTable = getLineTableForUnit(CU);
- if (LineTable &&
- LineTable->getFileLineInfoForAddress(Address, CU->getCompilationDir(),
- Spec.FLIKind, Frame))
+ if (LineTable && LineTable->getFileLineInfoForAddress(
+ {Address.Address, Address.SectionIndex},
+ CU->getCompilationDir(), Spec.FLIKind, Frame))
InliningInfo.addFrame(Frame);
}
return InliningInfo;
@@ -1073,8 +1200,9 @@ DWARFContext::getInliningInfoForAddress(uint64_t Address,
LineTable = getLineTableForUnit(CU);
// For the topmost routine, get file/line info from line table.
if (LineTable)
- LineTable->getFileLineInfoForAddress(Address, CU->getCompilationDir(),
- Spec.FLIKind, Frame);
+ LineTable->getFileLineInfoForAddress(
+ {Address.Address, Address.SectionIndex}, CU->getCompilationDir(),
+ Spec.FLIKind, Frame);
} else {
// Otherwise, use call file, call line and call column from
// previous DIE in inlined chain.
@@ -1402,8 +1530,14 @@ public:
// Try to obtain an already relocated version of this section.
// Else use the unrelocated section from the object file. We'll have to
// apply relocations ourselves later.
- if (!L || !L->getLoadedSectionContents(*RelocatedSection, Data))
- Section.getContents(Data);
+ if (!L || !L->getLoadedSectionContents(*RelocatedSection, Data)) {
+ Expected<StringRef> E = Section.getContents();
+ if (E)
+ Data = *E;
+ else
+ // maybeDecompress below will error.
+ consumeError(E.takeError());
+ }
if (auto Err = maybeDecompress(Section, Name, Data)) {
ErrorPolicy EP = HandleError(createError(
@@ -1495,6 +1629,9 @@ public:
// Symbol to [address, section index] cache mapping.
std::map<SymbolRef, SymInfo> AddrCache;
+ bool (*Supports)(uint64_t);
+ RelocationResolver Resolver;
+ std::tie(Supports, Resolver) = getRelocationResolver(Obj);
for (const RelocationRef &Reloc : Section.relocations()) {
// FIXME: it's not clear how to correctly handle scattered
// relocations.
@@ -1509,9 +1646,31 @@ public:
continue;
}
- object::RelocVisitor V(Obj);
- uint64_t Val = V.visit(Reloc.getType(), Reloc, SymInfoOrErr->Address);
- if (V.error()) {
+ // Check if Resolver can handle this relocation type early so as not to
+ // handle invalid cases in DWARFDataExtractor.
+ //
+ // TODO Don't store Resolver in every RelocAddrEntry.
+ if (Supports && Supports(Reloc.getType())) {
+ auto I = Map->try_emplace(
+ Reloc.getOffset(),
+ RelocAddrEntry{SymInfoOrErr->SectionIndex, Reloc,
+ SymInfoOrErr->Address,
+ Optional<object::RelocationRef>(), 0, Resolver});
+ // If we didn't successfully insert that's because we already had a
+ // relocation for that offset. Store it as a second relocation in the
+ // same RelocAddrEntry instead.
+ if (!I.second) {
+ RelocAddrEntry &entry = I.first->getSecond();
+ if (entry.Reloc2) {
+ ErrorPolicy EP = HandleError(createError(
+ "At most two relocations per offset are supported"));
+ if (EP == ErrorPolicy::Halt)
+ return;
+ }
+ entry.Reloc2 = Reloc;
+ entry.SymbolValue2 = SymInfoOrErr->Address;
+ }
+ } else {
SmallString<32> Type;
Reloc.getTypeName(Type);
ErrorPolicy EP = HandleError(
@@ -1519,10 +1678,7 @@ public:
errorCodeToError(object_error::parse_failed)));
if (EP == ErrorPolicy::Halt)
return;
- continue;
}
- RelocAddrEntry Rel = {SymInfoOrErr->SectionIndex, Val};
- Map->insert({Reloc.getOffset(), Rel});
}
}
diff --git a/lib/DebugInfo/DWARF/DWARFDataExtractor.cpp b/lib/DebugInfo/DWARF/DWARFDataExtractor.cpp
index 03e317461396..b9adf8cb1d99 100644
--- a/lib/DebugInfo/DWARF/DWARFDataExtractor.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDataExtractor.cpp
@@ -1,9 +1,8 @@
//===- DWARFDataExtractor.cpp ---------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -16,15 +15,19 @@ using namespace llvm;
uint64_t DWARFDataExtractor::getRelocatedValue(uint32_t Size, uint32_t *Off,
uint64_t *SecNdx) const {
if (SecNdx)
- *SecNdx = -1ULL;
+ *SecNdx = object::SectionedAddress::UndefSection;
if (!Section)
return getUnsigned(Off, Size);
- Optional<RelocAddrEntry> Rel = Obj->find(*Section, *Off);
- if (!Rel)
- return getUnsigned(Off, Size);
+ Optional<RelocAddrEntry> E = Obj->find(*Section, *Off);
+ uint64_t A = getUnsigned(Off, Size);
+ if (!E)
+ return A;
if (SecNdx)
- *SecNdx = Rel->SectionIndex;
- return getUnsigned(Off, Size) + Rel->Value;
+ *SecNdx = E->SectionIndex;
+ uint64_t R = E->Resolver(E->Reloc, E->SymbolValue, A);
+ if (E->Reloc2)
+ R = E->Resolver(*E->Reloc2, E->SymbolValue2, R);
+ return R;
}
Optional<uint64_t>
diff --git a/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp b/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp
index 4830c36a8ee7..31b324e5eb27 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp
@@ -1,9 +1,8 @@
//===- DWARFDebugAbbrev.cpp -----------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -84,12 +83,12 @@ void DWARFDebugAbbrev::parse() const {
if (!Data)
return;
uint32_t Offset = 0;
- DWARFAbbreviationDeclarationSet AbbrDecls;
auto I = AbbrDeclSets.begin();
while (Data->isValidOffset(Offset)) {
while (I != AbbrDeclSets.end() && I->first < Offset)
++I;
uint32_t CUAbbrOffset = Offset;
+ DWARFAbbreviationDeclarationSet AbbrDecls;
if (!AbbrDecls.extract(*Data, &Offset))
break;
AbbrDeclSets.insert(I, std::make_pair(CUAbbrOffset, std::move(AbbrDecls)));
diff --git a/lib/DebugInfo/DWARF/DWARFDebugAddr.cpp b/lib/DebugInfo/DWARF/DWARFDebugAddr.cpp
index 22759bfac26c..58626539bba4 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugAddr.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugAddr.cpp
@@ -1,9 +1,8 @@
//===- DWARFDebugAddr.cpp -------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -148,28 +147,13 @@ void DWARFDebugAddrTable::dump(raw_ostream &OS, DIDumpOptions DumpOpts) const {
HeaderData.Length, HeaderData.Version, HeaderData.AddrSize,
HeaderData.SegSize);
- static const char *Fmt32 = "0x%8.8" PRIx64;
- static const char *Fmt64 = "0x%16.16" PRIx64;
- std::string AddrFmt = "\n";
- std::string AddrFmtVerbose = " => ";
- if (HeaderData.AddrSize == 4) {
- AddrFmt.append(Fmt32);
- AddrFmtVerbose.append(Fmt32);
- }
- else {
- AddrFmt.append(Fmt64);
- AddrFmtVerbose.append(Fmt64);
- }
-
if (Addrs.size() > 0) {
- OS << "Addrs: [";
- for (uint64_t Addr : Addrs) {
- OS << format(AddrFmt.c_str(), Addr);
- if (DumpOpts.Verbose)
- OS << format(AddrFmtVerbose.c_str(),
- Addr + HeaderOffset + sizeof(HeaderData));
- }
- OS << "\n]\n";
+ const char *AddrFmt = (HeaderData.AddrSize == 4) ? "0x%8.8" PRIx64 "\n"
+ : "0x%16.16" PRIx64 "\n";
+ OS << "Addrs: [\n";
+ for (uint64_t Addr : Addrs)
+ OS << format(AddrFmt, Addr);
+ OS << "]\n";
}
}
diff --git a/lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp b/lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp
index b9ef6905912a..6551b61accb8 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp
@@ -1,9 +1,8 @@
//===- DWARFDebugArangeSet.cpp --------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/DWARF/DWARFDebugAranges.cpp b/lib/DebugInfo/DWARF/DWARFDebugAranges.cpp
index e8c5dec821b4..6460c9feeab8 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugAranges.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugAranges.cpp
@@ -1,9 +1,8 @@
//===- DWARFDebugAranges.cpp ----------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -115,20 +114,9 @@ void DWARFDebugAranges::construct() {
}
uint32_t DWARFDebugAranges::findAddress(uint64_t Address) const {
- if (!Aranges.empty()) {
- Range range(Address);
- RangeCollIterator begin = Aranges.begin();
- RangeCollIterator end = Aranges.end();
- RangeCollIterator pos =
- std::lower_bound(begin, end, range);
-
- if (pos != end && pos->containsAddress(Address)) {
- return pos->CUOffset;
- } else if (pos != begin) {
- --pos;
- if (pos->containsAddress(Address))
- return pos->CUOffset;
- }
- }
+ RangeCollIterator It =
+ partition_point(Aranges, [=](Range R) { return R.HighPC() <= Address; });
+ if (It != Aranges.end() && It->LowPC <= Address)
+ return It->CUOffset;
return -1U;
}
diff --git a/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp b/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
index ba55ffc28174..b3f23366f2a2 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
@@ -1,9 +1,8 @@
//===- DWARFDebugFrame.h - Parsing of .debug_frame ------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -267,7 +266,7 @@ void CFIProgram::printOperand(raw_ostream &OS, const MCRegisterInfo *MRI,
case OT_Expression:
assert(Instr.Expression && "missing DWARFExpression object");
OS << " ";
- Instr.Expression->print(OS, MRI, IsEH);
+ Instr.Expression->print(OS, MRI, nullptr, IsEH);
break;
}
}
@@ -301,7 +300,7 @@ void CIE::dump(raw_ostream &OS, const MCRegisterInfo *MRI, bool IsEH) const {
OS << format(" Data alignment factor: %d\n", (int32_t)DataAlignmentFactor);
OS << format(" Return address column: %d\n", (int32_t)ReturnAddressRegister);
if (Personality)
- OS << format(" Personality Address: %08x\n", *Personality);
+ OS << format(" Personality Address: %016" PRIx64 "\n", *Personality);
if (!AugmentationData.empty()) {
OS << " Augmentation data: ";
for (uint8_t Byte : AugmentationData)
@@ -320,7 +319,7 @@ void FDE::dump(raw_ostream &OS, const MCRegisterInfo *MRI, bool IsEH) const {
(uint32_t)InitialLocation,
(uint32_t)InitialLocation + (uint32_t)AddressRange);
if (LSDAAddress)
- OS << format(" LSDA Address: %08x\n", *LSDAAddress);
+ OS << format(" LSDA Address: %016" PRIx64 "\n", *LSDAAddress);
CFIs.dump(OS, MRI, IsEH);
OS << "\n";
}
@@ -533,10 +532,9 @@ void DWARFDebugFrame::parse(DWARFDataExtractor Data) {
}
FrameEntry *DWARFDebugFrame::getEntryAtOffset(uint64_t Offset) const {
- auto It =
- std::lower_bound(Entries.begin(), Entries.end(), Offset,
- [](const std::unique_ptr<FrameEntry> &E,
- uint64_t Offset) { return E->getOffset() < Offset; });
+ auto It = partition_point(Entries, [=](const std::unique_ptr<FrameEntry> &E) {
+ return E->getOffset() < Offset;
+ });
if (It != Entries.end() && (*It)->getOffset() == Offset)
return It->get();
return nullptr;
diff --git a/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp b/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
index 976bc4651ae6..d8a755e90df4 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
@@ -1,9 +1,8 @@
//===- DWARFDebugInfoEntry.cpp --------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
index 1d621ff244f3..a1cb1e8582ed 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
@@ -1,9 +1,8 @@
//===- DWARFDebugLine.cpp -------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -67,6 +66,26 @@ void DWARFDebugLine::ContentTypeTracker::trackContentType(
DWARFDebugLine::Prologue::Prologue() { clear(); }
+bool DWARFDebugLine::Prologue::hasFileAtIndex(uint64_t FileIndex) const {
+ uint16_t DwarfVersion = getVersion();
+ assert(DwarfVersion != 0 &&
+ "line table prologue has no dwarf version information");
+ if (DwarfVersion >= 5)
+ return FileIndex < FileNames.size();
+ return FileIndex != 0 && FileIndex <= FileNames.size();
+}
+
+const llvm::DWARFDebugLine::FileNameEntry &
+DWARFDebugLine::Prologue::getFileNameEntry(uint64_t Index) const {
+ uint16_t DwarfVersion = getVersion();
+ assert(DwarfVersion != 0 &&
+ "line table prologue has no dwarf version information");
+ // In DWARF v5 the file names are 0-indexed.
+ if (DwarfVersion >= 5)
+ return FileNames[Index];
+ return FileNames[Index - 1];
+}
+
void DWARFDebugLine::Prologue::clear() {
TotalLength = PrologueLength = 0;
SegSelectorSize = 0;
@@ -145,8 +164,8 @@ parseV2DirFileTables(const DWARFDataExtractor &DebugLineData,
StringRef S = DebugLineData.getCStrRef(OffsetPtr);
if (S.empty())
break;
- DWARFFormValue Dir(dwarf::DW_FORM_string);
- Dir.setPValue(S.data());
+ DWARFFormValue Dir =
+ DWARFFormValue::createFromPValue(dwarf::DW_FORM_string, S.data());
IncludeDirectories.push_back(Dir);
}
@@ -155,8 +174,8 @@ parseV2DirFileTables(const DWARFDataExtractor &DebugLineData,
if (Name.empty())
break;
DWARFDebugLine::FileNameEntry FileEntry;
- FileEntry.Name.setForm(dwarf::DW_FORM_string);
- FileEntry.Name.setPValue(Name.data());
+ FileEntry.Name =
+ DWARFFormValue::createFromPValue(dwarf::DW_FORM_string, Name.data());
FileEntry.DirIdx = DebugLineData.getULEB128(OffsetPtr);
FileEntry.ModTime = DebugLineData.getULEB128(OffsetPtr);
FileEntry.Length = DebugLineData.getULEB128(OffsetPtr);
@@ -281,11 +300,11 @@ Error DWARFDebugLine::Prologue::parse(const DWARFDataExtractor &DebugLineData,
const uint64_t PrologueOffset = *OffsetPtr;
clear();
- TotalLength = DebugLineData.getU32(OffsetPtr);
+ TotalLength = DebugLineData.getRelocatedValue(4, OffsetPtr);
if (TotalLength == UINT32_MAX) {
FormParams.Format = dwarf::DWARF64;
TotalLength = DebugLineData.getU64(OffsetPtr);
- } else if (TotalLength >= 0xffffff00) {
+ } else if (TotalLength >= 0xfffffff0) {
return createStringError(errc::invalid_argument,
"parsing line table prologue at offset 0x%8.8" PRIx64
" unsupported reserved unit length found of value 0x%8.8" PRIx64,
@@ -306,7 +325,8 @@ Error DWARFDebugLine::Prologue::parse(const DWARFDataExtractor &DebugLineData,
SegSelectorSize = DebugLineData.getU8(OffsetPtr);
}
- PrologueLength = DebugLineData.getUnsigned(OffsetPtr, sizeofPrologueLength());
+ PrologueLength =
+ DebugLineData.getRelocatedValue(sizeofPrologueLength(), OffsetPtr);
const uint64_t EndPrologueOffset = PrologueLength + *OffsetPtr;
MinInstLength = DebugLineData.getU8(OffsetPtr);
if (getVersion() >= 4)
@@ -348,13 +368,15 @@ Error DWARFDebugLine::Prologue::parse(const DWARFDataExtractor &DebugLineData,
DWARFDebugLine::Row::Row(bool DefaultIsStmt) { reset(DefaultIsStmt); }
void DWARFDebugLine::Row::postAppend() {
+ Discriminator = 0;
BasicBlock = false;
PrologueEnd = false;
EpilogueBegin = false;
}
void DWARFDebugLine::Row::reset(bool DefaultIsStmt) {
- Address = 0;
+ Address.Address = 0;
+ Address.SectionIndex = object::SectionedAddress::UndefSection;
Line = 1;
Column = 0;
File = 1;
@@ -374,7 +396,7 @@ void DWARFDebugLine::Row::dumpTableHeader(raw_ostream &OS) {
}
void DWARFDebugLine::Row::dump(raw_ostream &OS) const {
- OS << format("0x%16.16" PRIx64 " %6u %6u", Address, Line, Column)
+ OS << format("0x%16.16" PRIx64 " %6u %6u", Address.Address, Line, Column)
<< format(" %6u %3u %13u ", File, Isa, Discriminator)
<< (IsStmt ? " is_stmt" : "") << (BasicBlock ? " basic_block" : "")
<< (PrologueEnd ? " prologue_end" : "")
@@ -387,6 +409,7 @@ DWARFDebugLine::Sequence::Sequence() { reset(); }
void DWARFDebugLine::Sequence::reset() {
LowPC = 0;
HighPC = 0;
+ SectionIndex = object::SectionedAddress::UndefSection;
FirstRowIndex = 0;
LastRowIndex = 0;
Empty = true;
@@ -423,19 +446,20 @@ void DWARFDebugLine::ParsingState::resetRowAndSequence() {
Sequence.reset();
}
-void DWARFDebugLine::ParsingState::appendRowToMatrix(uint32_t Offset) {
+void DWARFDebugLine::ParsingState::appendRowToMatrix() {
+ unsigned RowNumber = LineTable->Rows.size();
if (Sequence.Empty) {
// Record the beginning of instruction sequence.
Sequence.Empty = false;
- Sequence.LowPC = Row.Address;
+ Sequence.LowPC = Row.Address.Address;
Sequence.FirstRowIndex = RowNumber;
}
- ++RowNumber;
LineTable->appendRow(Row);
if (Row.EndSequence) {
// Record the end of instruction sequence.
- Sequence.HighPC = Row.Address;
- Sequence.LastRowIndex = RowNumber;
+ Sequence.HighPC = Row.Address.Address;
+ Sequence.LastRowIndex = RowNumber + 1;
+ Sequence.SectionIndex = Row.Address.SectionIndex;
if (Sequence.isValid())
LineTable->appendSequence(Sequence);
Sequence.reset();
@@ -538,7 +562,7 @@ Error DWARFDebugLine::LineTable::parse(
// address is that of the byte after the last target machine instruction
// of the sequence.
State.Row.EndSequence = true;
- State.appendRowToMatrix(*OffsetPtr);
+ State.appendRowToMatrix();
if (OS) {
*OS << "\n";
OS->indent(12);
@@ -566,9 +590,10 @@ Error DWARFDebugLine::LineTable::parse(
ExtOffset, DebugLineData.getAddressSize(),
Len - 1);
}
- State.Row.Address = DebugLineData.getRelocatedAddress(OffsetPtr);
+ State.Row.Address.Address = DebugLineData.getRelocatedAddress(
+ OffsetPtr, &State.Row.Address.SectionIndex);
if (OS)
- *OS << format(" (0x%16.16" PRIx64 ")", State.Row.Address);
+ *OS << format(" (0x%16.16" PRIx64 ")", State.Row.Address.Address);
break;
case DW_LNE_define_file:
@@ -595,8 +620,8 @@ Error DWARFDebugLine::LineTable::parse(
{
FileNameEntry FileEntry;
const char *Name = DebugLineData.getCStr(OffsetPtr);
- FileEntry.Name.setForm(dwarf::DW_FORM_string);
- FileEntry.Name.setPValue(Name);
+ FileEntry.Name =
+ DWARFFormValue::createFromPValue(dwarf::DW_FORM_string, Name);
FileEntry.DirIdx = DebugLineData.getULEB128(OffsetPtr);
FileEntry.ModTime = DebugLineData.getULEB128(OffsetPtr);
FileEntry.Length = DebugLineData.getULEB128(OffsetPtr);
@@ -637,15 +662,14 @@ Error DWARFDebugLine::LineTable::parse(
// Standard Opcodes
case DW_LNS_copy:
// Takes no arguments. Append a row to the matrix using the
- // current values of the state-machine registers. Then set
- // the basic_block register to false.
- State.appendRowToMatrix(*OffsetPtr);
+ // current values of the state-machine registers.
if (OS) {
*OS << "\n";
OS->indent(12);
State.Row.dump(*OS);
*OS << "\n";
}
+ State.appendRowToMatrix();
break;
case DW_LNS_advance_pc:
@@ -655,7 +679,7 @@ Error DWARFDebugLine::LineTable::parse(
{
uint64_t AddrOffset =
DebugLineData.getULEB128(OffsetPtr) * Prologue.MinInstLength;
- State.Row.Address += AddrOffset;
+ State.Row.Address.Address += AddrOffset;
if (OS)
*OS << " (" << AddrOffset << ")";
}
@@ -713,7 +737,7 @@ Error DWARFDebugLine::LineTable::parse(
uint8_t AdjustOpcode = 255 - Prologue.OpcodeBase;
uint64_t AddrOffset =
(AdjustOpcode / Prologue.LineRange) * Prologue.MinInstLength;
- State.Row.Address += AddrOffset;
+ State.Row.Address.Address += AddrOffset;
if (OS)
*OS
<< format(" (0x%16.16" PRIx64 ")", AddrOffset);
@@ -731,11 +755,11 @@ Error DWARFDebugLine::LineTable::parse(
// requires the use of DW_LNS_advance_pc. Such assemblers, however,
// can use DW_LNS_fixed_advance_pc instead, sacrificing compression.
{
- uint16_t PCOffset = DebugLineData.getU16(OffsetPtr);
- State.Row.Address += PCOffset;
+ uint16_t PCOffset = DebugLineData.getRelocatedValue(2, OffsetPtr);
+ State.Row.Address.Address += PCOffset;
if (OS)
*OS
- << format(" (0x%16.16" PRIx64 ")", PCOffset);
+ << format(" (0x%4.4" PRIx16 ")", PCOffset);
}
break;
@@ -815,18 +839,16 @@ Error DWARFDebugLine::LineTable::parse(
int32_t LineOffset =
Prologue.LineBase + (AdjustOpcode % Prologue.LineRange);
State.Row.Line += LineOffset;
- State.Row.Address += AddrOffset;
+ State.Row.Address.Address += AddrOffset;
if (OS) {
- *OS << "address += " << ((uint32_t)AdjustOpcode)
- << ", line += " << LineOffset << "\n";
+ *OS << "address += " << AddrOffset << ", line += " << LineOffset
+ << "\n";
OS->indent(12);
State.Row.dump(*OS);
}
- State.appendRowToMatrix(*OffsetPtr);
- // Reset discriminator to 0.
- State.Row.Discriminator = 0;
+ State.appendRowToMatrix();
}
if(OS)
*OS << "\n";
@@ -839,7 +861,7 @@ Error DWARFDebugLine::LineTable::parse(
// Sort all sequences so that address lookup will work faster.
if (!Sequences.empty()) {
- llvm::sort(Sequences, Sequence::orderByLowPC);
+ llvm::sort(Sequences, Sequence::orderByHighPC);
// Note: actually, instruction address ranges of sequences should not
// overlap (in shared objects and executables). If they do, the address
// lookup would still work, though, but result would be ambiguous.
@@ -851,74 +873,88 @@ Error DWARFDebugLine::LineTable::parse(
return Error::success();
}
-uint32_t
-DWARFDebugLine::LineTable::findRowInSeq(const DWARFDebugLine::Sequence &Seq,
- uint64_t Address) const {
+uint32_t DWARFDebugLine::LineTable::findRowInSeq(
+ const DWARFDebugLine::Sequence &Seq,
+ object::SectionedAddress Address) const {
if (!Seq.containsPC(Address))
return UnknownRowIndex;
- // Search for instruction address in the rows describing the sequence.
- // Rows are stored in a vector, so we may use arithmetical operations with
- // iterators.
+ assert(Seq.SectionIndex == Address.SectionIndex);
+ // In some cases, e.g. first instruction in a function, the compiler generates
+ // two entries, both with the same address. We want the last one.
+ //
+ // In general we want a non-empty range: the last row whose address is less
+ // than or equal to Address. This can be computed as upper_bound - 1.
DWARFDebugLine::Row Row;
Row.Address = Address;
RowIter FirstRow = Rows.begin() + Seq.FirstRowIndex;
RowIter LastRow = Rows.begin() + Seq.LastRowIndex;
- LineTable::RowIter RowPos = std::lower_bound(
- FirstRow, LastRow, Row, DWARFDebugLine::Row::orderByAddress);
- if (RowPos == LastRow) {
- return Seq.LastRowIndex - 1;
- }
- uint32_t Index = Seq.FirstRowIndex + (RowPos - FirstRow);
- if (RowPos->Address > Address) {
- if (RowPos == FirstRow)
- return UnknownRowIndex;
- else
- Index--;
- }
- return Index;
+ assert(FirstRow->Address.Address <= Row.Address.Address &&
+ Row.Address.Address < LastRow[-1].Address.Address);
+ RowIter RowPos = std::upper_bound(FirstRow + 1, LastRow - 1, Row,
+ DWARFDebugLine::Row::orderByAddress) -
+ 1;
+ assert(Seq.SectionIndex == RowPos->Address.SectionIndex);
+ return RowPos - Rows.begin();
}
-uint32_t DWARFDebugLine::LineTable::lookupAddress(uint64_t Address) const {
- if (Sequences.empty())
- return UnknownRowIndex;
+uint32_t DWARFDebugLine::LineTable::lookupAddress(
+ object::SectionedAddress Address) const {
+
+ // Search for relocatable addresses
+ uint32_t Result = lookupAddressImpl(Address);
+
+ if (Result != UnknownRowIndex ||
+ Address.SectionIndex == object::SectionedAddress::UndefSection)
+ return Result;
+
+ // Search for absolute addresses
+ Address.SectionIndex = object::SectionedAddress::UndefSection;
+ return lookupAddressImpl(Address);
+}
+
+uint32_t DWARFDebugLine::LineTable::lookupAddressImpl(
+ object::SectionedAddress Address) const {
// First, find an instruction sequence containing the given address.
DWARFDebugLine::Sequence Sequence;
- Sequence.LowPC = Address;
- SequenceIter FirstSeq = Sequences.begin();
- SequenceIter LastSeq = Sequences.end();
- SequenceIter SeqPos = std::lower_bound(
- FirstSeq, LastSeq, Sequence, DWARFDebugLine::Sequence::orderByLowPC);
- DWARFDebugLine::Sequence FoundSeq;
- if (SeqPos == LastSeq) {
- FoundSeq = Sequences.back();
- } else if (SeqPos->LowPC == Address) {
- FoundSeq = *SeqPos;
- } else {
- if (SeqPos == FirstSeq)
- return UnknownRowIndex;
- FoundSeq = *(SeqPos - 1);
- }
- return findRowInSeq(FoundSeq, Address);
+ Sequence.SectionIndex = Address.SectionIndex;
+ Sequence.HighPC = Address.Address;
+ SequenceIter It = llvm::upper_bound(Sequences, Sequence,
+ DWARFDebugLine::Sequence::orderByHighPC);
+ if (It == Sequences.end() || It->SectionIndex != Address.SectionIndex)
+ return UnknownRowIndex;
+ return findRowInSeq(*It, Address);
}
bool DWARFDebugLine::LineTable::lookupAddressRange(
- uint64_t Address, uint64_t Size, std::vector<uint32_t> &Result) const {
+ object::SectionedAddress Address, uint64_t Size,
+ std::vector<uint32_t> &Result) const {
+
+ // Search for relocatable addresses
+ if (lookupAddressRangeImpl(Address, Size, Result))
+ return true;
+
+ if (Address.SectionIndex == object::SectionedAddress::UndefSection)
+ return false;
+
+ // Search for absolute addresses
+ Address.SectionIndex = object::SectionedAddress::UndefSection;
+ return lookupAddressRangeImpl(Address, Size, Result);
+}
+
+bool DWARFDebugLine::LineTable::lookupAddressRangeImpl(
+ object::SectionedAddress Address, uint64_t Size,
+ std::vector<uint32_t> &Result) const {
if (Sequences.empty())
return false;
- uint64_t EndAddr = Address + Size;
+ uint64_t EndAddr = Address.Address + Size;
// First, find an instruction sequence containing the given address.
DWARFDebugLine::Sequence Sequence;
- Sequence.LowPC = Address;
- SequenceIter FirstSeq = Sequences.begin();
+ Sequence.SectionIndex = Address.SectionIndex;
+ Sequence.HighPC = Address.Address;
SequenceIter LastSeq = Sequences.end();
- SequenceIter SeqPos = std::lower_bound(
- FirstSeq, LastSeq, Sequence, DWARFDebugLine::Sequence::orderByLowPC);
- if (SeqPos == LastSeq || SeqPos->LowPC != Address) {
- if (SeqPos == FirstSeq)
- return false;
- SeqPos--;
- }
- if (!SeqPos->containsPC(Address))
+ SequenceIter SeqPos = llvm::upper_bound(
+ Sequences, Sequence, DWARFDebugLine::Sequence::orderByHighPC);
+ if (SeqPos == LastSeq || !SeqPos->containsPC(Address))
return false;
SequenceIter StartPos = SeqPos;
@@ -935,7 +971,8 @@ bool DWARFDebugLine::LineTable::lookupAddressRange(
FirstRowIndex = findRowInSeq(CurSeq, Address);
// Figure out the last row in the range.
- uint32_t LastRowIndex = findRowInSeq(CurSeq, EndAddr - 1);
+ uint32_t LastRowIndex =
+ findRowInSeq(CurSeq, {EndAddr - 1, Address.SectionIndex});
if (LastRowIndex == UnknownRowIndex)
LastRowIndex = CurSeq.LastRowIndex - 1;
@@ -952,15 +989,11 @@ bool DWARFDebugLine::LineTable::lookupAddressRange(
return true;
}
-bool DWARFDebugLine::LineTable::hasFileAtIndex(uint64_t FileIndex) const {
- return FileIndex != 0 && FileIndex <= Prologue.FileNames.size();
-}
-
Optional<StringRef> DWARFDebugLine::LineTable::getSourceByIndex(uint64_t FileIndex,
FileLineInfoKind Kind) const {
- if (Kind == FileLineInfoKind::None || !hasFileAtIndex(FileIndex))
+ if (Kind == FileLineInfoKind::None || !Prologue.hasFileAtIndex(FileIndex))
return None;
- const FileNameEntry &Entry = Prologue.FileNames[FileIndex - 1];
+ const FileNameEntry &Entry = Prologue.getFileNameEntry(FileIndex);
if (Optional<const char *> source = Entry.Source.getAsCString())
return StringRef(*source);
return None;
@@ -974,13 +1007,13 @@ static bool isPathAbsoluteOnWindowsOrPosix(const Twine &Path) {
sys::path::is_absolute(Path, sys::path::Style::windows);
}
-bool DWARFDebugLine::LineTable::getFileNameByIndex(uint64_t FileIndex,
- const char *CompDir,
- FileLineInfoKind Kind,
- std::string &Result) const {
+bool DWARFDebugLine::Prologue::getFileNameByIndex(uint64_t FileIndex,
+ StringRef CompDir,
+ FileLineInfoKind Kind,
+ std::string &Result) const {
if (Kind == FileLineInfoKind::None || !hasFileAtIndex(FileIndex))
return false;
- const FileNameEntry &Entry = Prologue.FileNames[FileIndex - 1];
+ const FileNameEntry &Entry = getFileNameEntry(FileIndex);
StringRef FileName = Entry.Name.getAsCString().getValue();
if (Kind != FileLineInfoKind::AbsoluteFilePath ||
isPathAbsoluteOnWindowsOrPosix(FileName)) {
@@ -989,21 +1022,22 @@ bool DWARFDebugLine::LineTable::getFileNameByIndex(uint64_t FileIndex,
}
SmallString<16> FilePath;
- uint64_t IncludeDirIndex = Entry.DirIdx;
StringRef IncludeDir;
// Be defensive about the contents of Entry.
- if (IncludeDirIndex > 0 &&
- IncludeDirIndex <= Prologue.IncludeDirectories.size())
- IncludeDir = Prologue.IncludeDirectories[IncludeDirIndex - 1]
- .getAsCString()
- .getValue();
-
- // We may still need to append compilation directory of compile unit.
- // We know that FileName is not absolute, the only way to have an
- // absolute path at this point would be if IncludeDir is absolute.
- if (CompDir && Kind == FileLineInfoKind::AbsoluteFilePath &&
- !isPathAbsoluteOnWindowsOrPosix(IncludeDir))
- sys::path::append(FilePath, CompDir);
+ if (getVersion() >= 5) {
+ if (Entry.DirIdx < IncludeDirectories.size())
+ IncludeDir = IncludeDirectories[Entry.DirIdx].getAsCString().getValue();
+ } else {
+ if (0 < Entry.DirIdx && Entry.DirIdx <= IncludeDirectories.size())
+ IncludeDir =
+ IncludeDirectories[Entry.DirIdx - 1].getAsCString().getValue();
+
+ // We may still need to append compilation directory of compile unit.
+ // We know that FileName is not absolute, the only way to have an
+ // absolute path at this point would be if IncludeDir is absolute.
+ if (!CompDir.empty() && !isPathAbsoluteOnWindowsOrPosix(IncludeDir))
+ sys::path::append(FilePath, CompDir);
+ }
// sys::path::append skips empty strings.
sys::path::append(FilePath, IncludeDir, FileName);
@@ -1012,8 +1046,8 @@ bool DWARFDebugLine::LineTable::getFileNameByIndex(uint64_t FileIndex,
}
bool DWARFDebugLine::LineTable::getFileLineInfoForAddress(
- uint64_t Address, const char *CompDir, FileLineInfoKind Kind,
- DILineInfo &Result) const {
+ object::SectionedAddress Address, const char *CompDir,
+ FileLineInfoKind Kind, DILineInfo &Result) const {
// Get the index of row we're looking for in the line table.
uint32_t RowIndex = lookupAddress(Address);
if (RowIndex == -1U)
@@ -1058,7 +1092,7 @@ DWARFDebugLine::SectionParser::SectionParser(DWARFDataExtractor &Data,
}
bool DWARFDebugLine::Prologue::totalLengthIsValid() const {
- return TotalLength == 0xffffffff || TotalLength < 0xffffff00;
+ return TotalLength == 0xffffffff || TotalLength < 0xfffffff0;
}
DWARFDebugLine::LineTable DWARFDebugLine::SectionParser::parseNext(
diff --git a/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp b/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp
index f8b5ff6ec8fb..6d8f4bee77c4 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp
@@ -1,9 +1,8 @@
//===- DWARFDebugLoc.cpp --------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -31,15 +30,16 @@ using namespace llvm;
// non-LLVM tools.
static void dumpExpression(raw_ostream &OS, ArrayRef<char> Data,
bool IsLittleEndian, unsigned AddressSize,
- const MCRegisterInfo *MRI) {
+ const MCRegisterInfo *MRI, DWARFUnit *U) {
DWARFDataExtractor Extractor(StringRef(Data.data(), Data.size()),
IsLittleEndian, AddressSize);
- DWARFExpression(Extractor, dwarf::DWARF_VERSION, AddressSize).print(OS, MRI);
+ DWARFExpression(Extractor, dwarf::DWARF_VERSION, AddressSize).print(OS, MRI, U);
}
void DWARFDebugLoc::LocationList::dump(raw_ostream &OS, bool IsLittleEndian,
unsigned AddressSize,
const MCRegisterInfo *MRI,
+ DWARFUnit *U,
uint64_t BaseAddress,
unsigned Indent) const {
for (const Entry &E : Entries) {
@@ -51,15 +51,14 @@ void DWARFDebugLoc::LocationList::dump(raw_ostream &OS, bool IsLittleEndian,
BaseAddress + E.End);
OS << ": ";
- dumpExpression(OS, E.Loc, IsLittleEndian, AddressSize, MRI);
+ dumpExpression(OS, E.Loc, IsLittleEndian, AddressSize, MRI, U);
}
}
DWARFDebugLoc::LocationList const *
DWARFDebugLoc::getLocationListAtOffset(uint64_t Offset) const {
- auto It = std::lower_bound(
- Locations.begin(), Locations.end(), Offset,
- [](const LocationList &L, uint64_t Offset) { return L.Offset < Offset; });
+ auto It = partition_point(
+ Locations, [=](const LocationList &L) { return L.Offset < Offset; });
if (It != Locations.end() && It->Offset == Offset)
return &(*It);
return nullptr;
@@ -69,7 +68,7 @@ void DWARFDebugLoc::dump(raw_ostream &OS, const MCRegisterInfo *MRI,
Optional<uint64_t> Offset) const {
auto DumpLocationList = [&](const LocationList &L) {
OS << format("0x%8.8x: ", L.Offset);
- L.dump(OS, IsLittleEndian, AddressSize, MRI, 0, 12);
+ L.dump(OS, IsLittleEndian, AddressSize, MRI, nullptr, 0, 12);
OS << "\n\n";
};
@@ -184,7 +183,8 @@ DWARFDebugLoclists::parseOneLocationList(DataExtractor Data, unsigned *Offset,
}
if (Kind != dwarf::DW_LLE_base_address) {
- unsigned Bytes = Data.getU16(Offset);
+ unsigned Bytes =
+ Version >= 5 ? Data.getULEB128(Offset) : Data.getU16(Offset);
// A single location description describing the location of the object...
StringRef str = Data.getData().substr(*Offset, Bytes);
*Offset += Bytes;
@@ -212,9 +212,8 @@ void DWARFDebugLoclists::parse(DataExtractor data, unsigned Version) {
DWARFDebugLoclists::LocationList const *
DWARFDebugLoclists::getLocationListAtOffset(uint64_t Offset) const {
- auto It = std::lower_bound(
- Locations.begin(), Locations.end(), Offset,
- [](const LocationList &L, uint64_t Offset) { return L.Offset < Offset; });
+ auto It = partition_point(
+ Locations, [=](const LocationList &L) { return L.Offset < Offset; });
if (It != Locations.end() && It->Offset == Offset)
return &(*It);
return nullptr;
@@ -224,6 +223,7 @@ void DWARFDebugLoclists::LocationList::dump(raw_ostream &OS, uint64_t BaseAddr,
bool IsLittleEndian,
unsigned AddressSize,
const MCRegisterInfo *MRI,
+ DWARFUnit *U,
unsigned Indent) const {
for (const Entry &E : Entries) {
switch (E.Kind) {
@@ -253,7 +253,7 @@ void DWARFDebugLoclists::LocationList::dump(raw_ostream &OS, uint64_t BaseAddr,
llvm_unreachable("unreachable locations list kind");
}
- dumpExpression(OS, E.Loc, IsLittleEndian, AddressSize, MRI);
+ dumpExpression(OS, E.Loc, IsLittleEndian, AddressSize, MRI, U);
}
}
@@ -262,7 +262,7 @@ void DWARFDebugLoclists::dump(raw_ostream &OS, uint64_t BaseAddr,
Optional<uint64_t> Offset) const {
auto DumpLocationList = [&](const LocationList &L) {
OS << format("0x%8.8x: ", L.Offset);
- L.dump(OS, BaseAddr, IsLittleEndian, AddressSize, MRI, /*Indent=*/12);
+ L.dump(OS, BaseAddr, IsLittleEndian, AddressSize, MRI, nullptr, /*Indent=*/12);
OS << "\n\n";
};
diff --git a/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp b/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp
index 6d789c3027a5..3317a778cc70 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp
@@ -1,9 +1,8 @@
//===- DWARFDebugMacro.cpp ------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp b/lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp
index abd1ad59a9c1..963ec64f5e91 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp
@@ -1,9 +1,8 @@
//===- DWARFDebugPubTable.cpp ---------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp b/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp
index dfb913000a46..d8df81a0aa0b 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp
@@ -1,9 +1,8 @@
//===- DWARFDebugRangesList.cpp -------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -69,7 +68,7 @@ void DWARFDebugRangeList::dump(raw_ostream &OS) const {
}
DWARFAddressRangesVector DWARFDebugRangeList::getAbsoluteRanges(
- llvm::Optional<SectionedAddress> BaseAddr) const {
+ llvm::Optional<object::SectionedAddress> BaseAddr) const {
DWARFAddressRangesVector Res;
for (const RangeListEntry &RLE : Entries) {
if (RLE.isBaseAddressSelectionEntry(AddressSize)) {
diff --git a/lib/DebugInfo/DWARF/DWARFDebugRnglists.cpp b/lib/DebugInfo/DWARF/DWARFDebugRnglists.cpp
index 60c6eb30857f..5ac3326f6681 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugRnglists.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugRnglists.cpp
@@ -1,9 +1,8 @@
//===- DWARFDebugRnglists.cpp ---------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -113,9 +112,8 @@ Error RangeListEntry::extract(DWARFDataExtractor Data, uint32_t End,
return Error::success();
}
-DWARFAddressRangesVector
-DWARFDebugRnglist::getAbsoluteRanges(llvm::Optional<SectionedAddress> BaseAddr,
- DWARFUnit &U) const {
+DWARFAddressRangesVector DWARFDebugRnglist::getAbsoluteRanges(
+ llvm::Optional<object::SectionedAddress> BaseAddr, DWARFUnit &U) const {
DWARFAddressRangesVector Res;
for (const RangeListEntry &RLE : Entries) {
if (RLE.EntryKind == dwarf::DW_RLE_end_of_list)
@@ -175,7 +173,7 @@ DWARFDebugRnglist::getAbsoluteRanges(llvm::Optional<SectionedAddress> BaseAddr,
void RangeListEntry::dump(
raw_ostream &OS, uint8_t AddrSize, uint8_t MaxEncodingStringLength,
uint64_t &CurrentBase, DIDumpOptions DumpOpts,
- llvm::function_ref<Optional<SectionedAddress>(uint32_t)>
+ llvm::function_ref<Optional<object::SectionedAddress>(uint32_t)>
LookupPooledAddress) const {
auto PrintRawEntry = [](raw_ostream &OS, const RangeListEntry &Entry,
uint8_t AddrSize, DIDumpOptions DumpOpts) {
@@ -203,7 +201,6 @@ void RangeListEntry::dump(
case dwarf::DW_RLE_end_of_list:
OS << (DumpOpts.Verbose ? "" : "<End of list>");
break;
- // case dwarf::DW_RLE_base_addressx:
case dwarf::DW_RLE_base_addressx: {
if (auto SA = LookupPooledAddress(Value0))
CurrentBase = SA->Address;
@@ -240,7 +237,7 @@ void RangeListEntry::dump(
Start = SA->Address;
DWARFAddressRange(Start, Start + Value1).dump(OS, AddrSize, DumpOpts);
break;
- } break;
+ }
default:
llvm_unreachable("Unsupported range list encoding");
}
diff --git a/lib/DebugInfo/DWARF/DWARFDie.cpp b/lib/DebugInfo/DWARF/DWARFDie.cpp
index 81ef0c8c7aec..d638dc4239f4 100644
--- a/lib/DebugInfo/DWARF/DWARFDie.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDie.cpp
@@ -1,9 +1,8 @@
//===- DWARFDie.cpp -------------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -87,7 +86,7 @@ static void dumpLocation(raw_ostream &OS, DWARFFormValue &FormValue,
DataExtractor Data(StringRef((const char *)Expr.data(), Expr.size()),
Ctx.isLittleEndian(), 0);
DWARFExpression(Data, U->getVersion(), U->getAddressByteSize())
- .print(OS, MRI);
+ .print(OS, MRI, U);
return;
}
@@ -101,10 +100,10 @@ static void dumpLocation(raw_ostream &OS, DWARFFormValue &FormValue,
auto LL = DebugLoc.parseOneLocationList(Data, &Offset);
if (LL) {
uint64_t BaseAddr = 0;
- if (Optional<SectionedAddress> BA = U->getBaseAddress())
+ if (Optional<object::SectionedAddress> BA = U->getBaseAddress())
BaseAddr = BA->Address;
- LL->dump(OS, Ctx.isLittleEndian(), Obj.getAddressSize(), MRI, BaseAddr,
- Indent);
+ LL->dump(OS, Ctx.isLittleEndian(), Obj.getAddressSize(), MRI, U,
+ BaseAddr, Indent);
} else
OS << "error extracting location list.";
return;
@@ -126,12 +125,12 @@ static void dumpLocation(raw_ostream &OS, DWARFFormValue &FormValue,
Data, &Offset, UseLocLists ? U->getVersion() : 4);
uint64_t BaseAddr = 0;
- if (Optional<SectionedAddress> BA = U->getBaseAddress())
+ if (Optional<object::SectionedAddress> BA = U->getBaseAddress())
BaseAddr = BA->Address;
if (LL)
LL->dump(OS, BaseAddr, Ctx.isLittleEndian(), Obj.getAddressSize(), MRI,
- Indent);
+ U, Indent);
else
OS << "error extracting location list.";
}
@@ -279,11 +278,7 @@ static void dumpAttribute(raw_ostream &OS, const DWARFDie &Die,
OS << formatv(" [{0}]", Form);
DWARFUnit *U = Die.getDwarfUnit();
- DWARFFormValue formValue(Form);
-
- if (!formValue.extractValue(U->getDebugInfoExtractor(), OffsetPtr,
- U->getFormParams(), U))
- return;
+ DWARFFormValue FormValue = DWARFFormValue::createFromUnit(Form, U, OffsetPtr);
OS << "\t(";
@@ -294,35 +289,33 @@ static void dumpAttribute(raw_ostream &OS, const DWARFDie &Die,
Color = HighlightColor::String;
if (const auto *LT = U->getContext().getLineTableForUnit(U))
if (LT->getFileNameByIndex(
- formValue.getAsUnsignedConstant().getValue(),
+ FormValue.getAsUnsignedConstant().getValue(),
U->getCompilationDir(),
DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, File)) {
File = '"' + File + '"';
Name = File;
}
- } else if (Optional<uint64_t> Val = formValue.getAsUnsignedConstant())
+ } else if (Optional<uint64_t> Val = FormValue.getAsUnsignedConstant())
Name = AttributeValueString(Attr, *Val);
if (!Name.empty())
WithColor(OS, Color) << Name;
else if (Attr == DW_AT_decl_line || Attr == DW_AT_call_line)
- OS << *formValue.getAsUnsignedConstant();
+ OS << *FormValue.getAsUnsignedConstant();
else if (Attr == DW_AT_high_pc && !DumpOpts.ShowForm && !DumpOpts.Verbose &&
- formValue.getAsUnsignedConstant()) {
+ FormValue.getAsUnsignedConstant()) {
if (DumpOpts.ShowAddresses) {
// Print the actual address rather than the offset.
uint64_t LowPC, HighPC, Index;
if (Die.getLowAndHighPC(LowPC, HighPC, Index))
OS << format("0x%016" PRIx64, HighPC);
else
- formValue.dump(OS, DumpOpts);
+ FormValue.dump(OS, DumpOpts);
}
- } else if (Attr == DW_AT_location || Attr == DW_AT_frame_base ||
- Attr == DW_AT_data_member_location ||
- Attr == DW_AT_GNU_call_site_value)
- dumpLocation(OS, formValue, U, sizeof(BaseIndent) + Indent + 4, DumpOpts);
+ } else if (DWARFAttribute::mayHaveLocationDescription(Attr))
+ dumpLocation(OS, FormValue, U, sizeof(BaseIndent) + Indent + 4, DumpOpts);
else
- formValue.dump(OS, DumpOpts);
+ FormValue.dump(OS, DumpOpts);
std::string Space = DumpOpts.ShowAddresses ? " " : "";
@@ -331,25 +324,25 @@ static void dumpAttribute(raw_ostream &OS, const DWARFDie &Die,
// interesting. These attributes are handled below.
if (Attr == DW_AT_specification || Attr == DW_AT_abstract_origin) {
if (const char *Name =
- Die.getAttributeValueAsReferencedDie(formValue).getName(
+ Die.getAttributeValueAsReferencedDie(FormValue).getName(
DINameKind::LinkageName))
OS << Space << "\"" << Name << '\"';
} else if (Attr == DW_AT_type) {
OS << Space << "\"";
- dumpTypeName(OS, Die.getAttributeValueAsReferencedDie(formValue));
+ dumpTypeName(OS, Die.getAttributeValueAsReferencedDie(FormValue));
OS << '"';
} else if (Attr == DW_AT_APPLE_property_attribute) {
- if (Optional<uint64_t> OptVal = formValue.getAsUnsignedConstant())
+ if (Optional<uint64_t> OptVal = FormValue.getAsUnsignedConstant())
dumpApplePropertyAttribute(OS, *OptVal);
} else if (Attr == DW_AT_ranges) {
const DWARFObject &Obj = Die.getDwarfUnit()->getContext().getDWARFObj();
// For DW_FORM_rnglistx we need to dump the offset separately, since
// we have only dumped the index so far.
- if (formValue.getForm() == DW_FORM_rnglistx)
+ if (FormValue.getForm() == DW_FORM_rnglistx)
if (auto RangeListOffset =
- U->getRnglistOffset(*formValue.getAsSectionOffset())) {
- DWARFFormValue FV(dwarf::DW_FORM_sec_offset);
- FV.setUValue(*RangeListOffset);
+ U->getRnglistOffset(*FormValue.getAsSectionOffset())) {
+ DWARFFormValue FV = DWARFFormValue::createFromUValue(
+ dwarf::DW_FORM_sec_offset, *RangeListOffset);
FV.dump(OS, DumpOpts);
}
if (auto RangesOrError = Die.getAddressRanges())
@@ -403,6 +396,7 @@ DWARFDie::findRecursively(ArrayRef<dwarf::Attribute> Attrs) const {
// DWARF. This corresponds to following the DW_AT_abstract_origin and
// DW_AT_specification just once.
SmallSet<DWARFDie, 3> Seen;
+ Seen.insert(*this);
while (!Worklist.empty()) {
DWARFDie Die = Worklist.back();
@@ -411,19 +405,16 @@ DWARFDie::findRecursively(ArrayRef<dwarf::Attribute> Attrs) const {
if (!Die.isValid())
continue;
- if (Seen.count(Die))
- continue;
-
- Seen.insert(Die);
-
if (auto Value = Die.find(Attrs))
return Value;
if (auto D = Die.getAttributeValueAsReferencedDie(DW_AT_abstract_origin))
- Worklist.push_back(D);
+ if (Seen.insert(D).second)
+ Worklist.push_back(D);
if (auto D = Die.getAttributeValueAsReferencedDie(DW_AT_specification))
- Worklist.push_back(D);
+ if (Seen.insert(D).second)
+ Worklist.push_back(D);
}
return None;
@@ -438,9 +429,11 @@ DWARFDie::getAttributeValueAsReferencedDie(dwarf::Attribute Attr) const {
DWARFDie
DWARFDie::getAttributeValueAsReferencedDie(const DWARFFormValue &V) const {
- if (auto SpecRef = toReference(V)) {
- if (auto SpecUnit = U->getUnitVector().getUnitForOffset(*SpecRef))
- return SpecUnit->getDIEForOffset(*SpecRef);
+ if (auto SpecRef = V.getAsRelativeReference()) {
+ if (SpecRef->Unit)
+ return SpecRef->Unit->getDIEForOffset(SpecRef->Unit->getOffset() + SpecRef->Offset);
+ if (auto SpecUnit = U->getUnitVector().getUnitForOffset(SpecRef->Offset))
+ return SpecUnit->getDIEForOffset(SpecRef->Offset);
}
return DWARFDie();
}
@@ -560,10 +553,12 @@ void DWARFDie::getCallerFrame(uint32_t &CallFile, uint32_t &CallLine,
/// Helper to dump a DIE with all of its parents, but no siblings.
static unsigned dumpParentChain(DWARFDie Die, raw_ostream &OS, unsigned Indent,
- DIDumpOptions DumpOpts) {
+ DIDumpOptions DumpOpts, unsigned Depth = 0) {
if (!Die)
return Indent;
- Indent = dumpParentChain(Die.getParent(), OS, Indent, DumpOpts);
+ if (DumpOpts.ParentRecurseDepth > 0 && Depth >= DumpOpts.ParentRecurseDepth)
+ return Indent;
+ Indent = dumpParentChain(Die.getParent(), OS, Indent, DumpOpts, Depth + 1);
Die.dump(OS, Indent, DumpOpts);
return Indent + 2;
}
@@ -611,8 +606,8 @@ void DWARFDie::dump(raw_ostream &OS, unsigned Indent,
}
DWARFDie child = getFirstChild();
- if (DumpOpts.ShowChildren && DumpOpts.RecurseDepth > 0 && child) {
- DumpOpts.RecurseDepth--;
+ if (DumpOpts.ShowChildren && DumpOpts.ChildRecurseDepth > 0 && child) {
+ DumpOpts.ChildRecurseDepth--;
DIDumpOptions ChildDumpOpts = DumpOpts;
ChildDumpOpts.ShowParents = false;
while (child) {
@@ -668,7 +663,7 @@ iterator_range<DWARFDie::attribute_iterator> DWARFDie::attributes() const {
}
DWARFDie::attribute_iterator::attribute_iterator(DWARFDie D, bool End)
- : Die(D), AttrValue(0), Index(0) {
+ : Die(D), Index(0) {
auto AbbrDecl = Die.getAbbreviationDeclarationPtr();
assert(AbbrDecl && "Must have abbreviation declaration");
if (End) {
@@ -690,18 +685,15 @@ void DWARFDie::attribute_iterator::updateForIndex(
AttrValue.Attr = AbbrDecl.getAttrByIndex(Index);
// Add the previous byte size of any previous attribute value.
AttrValue.Offset += AttrValue.ByteSize;
- AttrValue.Value.setForm(AbbrDecl.getFormByIndex(Index));
uint32_t ParseOffset = AttrValue.Offset;
auto U = Die.getDwarfUnit();
assert(U && "Die must have valid DWARF unit");
- bool b = AttrValue.Value.extractValue(U->getDebugInfoExtractor(),
- &ParseOffset, U->getFormParams(), U);
- (void)b;
- assert(b && "extractValue cannot fail on fully parsed DWARF");
+ AttrValue.Value = DWARFFormValue::createFromUnit(
+ AbbrDecl.getFormByIndex(Index), U, &ParseOffset);
AttrValue.ByteSize = ParseOffset - AttrValue.Offset;
} else {
assert(Index == NumAttrs && "Indexes should be [0, NumAttrs) only");
- AttrValue.clear();
+ AttrValue = {};
}
}
@@ -710,3 +702,39 @@ DWARFDie::attribute_iterator &DWARFDie::attribute_iterator::operator++() {
updateForIndex(*AbbrDecl, Index + 1);
return *this;
}
+
+bool DWARFAttribute::mayHaveLocationDescription(dwarf::Attribute Attr) {
+ switch (Attr) {
+ // From the DWARF v5 specification.
+ case DW_AT_location:
+ case DW_AT_byte_size:
+ case DW_AT_bit_size:
+ case DW_AT_string_length:
+ case DW_AT_lower_bound:
+ case DW_AT_return_addr:
+ case DW_AT_bit_stride:
+ case DW_AT_upper_bound:
+ case DW_AT_count:
+ case DW_AT_data_member_location:
+ case DW_AT_frame_base:
+ case DW_AT_segment:
+ case DW_AT_static_link:
+ case DW_AT_use_location:
+ case DW_AT_vtable_elem_location:
+ case DW_AT_allocated:
+ case DW_AT_associated:
+ case DW_AT_byte_stride:
+ case DW_AT_rank:
+ case DW_AT_call_value:
+ case DW_AT_call_origin:
+ case DW_AT_call_target:
+ case DW_AT_call_target_clobbered:
+ case DW_AT_call_data_location:
+ case DW_AT_call_data_value:
+ // Extensions.
+ case DW_AT_GNU_call_site_value:
+ return true;
+ default:
+ return false;
+ }
+}
diff --git a/lib/DebugInfo/DWARF/DWARFExpression.cpp b/lib/DebugInfo/DWARF/DWARFExpression.cpp
index 2df4456053fb..470d4b5364b4 100644
--- a/lib/DebugInfo/DWARF/DWARFExpression.cpp
+++ b/lib/DebugInfo/DWARF/DWARFExpression.cpp
@@ -1,13 +1,13 @@
//===-- DWARFExpression.cpp -----------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/DWARF/DWARFExpression.h"
+#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/Format.h"
@@ -97,6 +97,11 @@ static DescVector getDescriptions() {
Descriptions[DW_OP_addrx] = Desc(Op::Dwarf4, Op::SizeLEB);
Descriptions[DW_OP_GNU_addr_index] = Desc(Op::Dwarf4, Op::SizeLEB);
Descriptions[DW_OP_GNU_const_index] = Desc(Op::Dwarf4, Op::SizeLEB);
+ Descriptions[DW_OP_GNU_entry_value] = Desc(Op::Dwarf4, Op::SizeLEB);
+
+ Descriptions[DW_OP_convert] = Desc(Op::Dwarf5, Op::BaseTypeRef);
+ Descriptions[DW_OP_entry_value] = Desc(Op::Dwarf5, Op::SizeLEB);
+
return Descriptions;
}
@@ -152,17 +157,21 @@ bool DWARFExpression::Operation::extract(DataExtractor Data, uint16_t Version,
case Operation::SizeAddr:
if (AddressSize == 8) {
Operands[Operand] = Data.getU64(&Offset);
- } else {
- assert(AddressSize == 4);
+ } else if (AddressSize == 4) {
Operands[Operand] = Data.getU32(&Offset);
+ } else {
+ assert(AddressSize == 2);
+ Operands[Operand] = Data.getU16(&Offset);
}
break;
case Operation::SizeRefAddr:
if (getRefAddrSize(AddressSize, Version) == 8) {
Operands[Operand] = Data.getU64(&Offset);
- } else {
- assert(getRefAddrSize(AddressSize, Version) == 4);
+ } else if (getRefAddrSize(AddressSize, Version) == 4) {
Operands[Operand] = Data.getU32(&Offset);
+ } else {
+ assert(getRefAddrSize(AddressSize, Version) == 2);
+ Operands[Operand] = Data.getU16(&Offset);
}
break;
case Operation::SizeLEB:
@@ -171,6 +180,9 @@ bool DWARFExpression::Operation::extract(DataExtractor Data, uint16_t Version,
else
Operands[Operand] = Data.getULEB128(&Offset);
break;
+ case Operation::BaseTypeRef:
+ Operands[Operand] = Data.getULEB128(&Offset);
+ break;
case Operation::SizeBlock:
// We need a size, so this cannot be the first operand
if (Operand == 0)
@@ -182,6 +194,8 @@ bool DWARFExpression::Operation::extract(DataExtractor Data, uint16_t Version,
default:
llvm_unreachable("Unknown DWARFExpression Op size");
}
+
+ OperandEndOffsets[Operand] = Offset;
}
EndOffset = Offset;
@@ -222,6 +236,7 @@ static bool prettyPrintRegisterOp(raw_ostream &OS, uint8_t Opcode,
bool DWARFExpression::Operation::print(raw_ostream &OS,
const DWARFExpression *Expr,
const MCRegisterInfo *RegInfo,
+ DWARFUnit *U,
bool isEH) {
if (Error) {
OS << "<decoding error>";
@@ -245,14 +260,25 @@ bool DWARFExpression::Operation::print(raw_ostream &OS,
if (Size == Operation::SizeNA)
break;
- if (Size == Operation::SizeBlock) {
+ if (Size == Operation::BaseTypeRef && U) {
+ auto Die = U->getDIEForOffset(U->getOffset() + Operands[Operand]);
+ if (Die && Die.getTag() == dwarf::DW_TAG_base_type) {
+ OS << format(" (0x%08x)", U->getOffset() + Operands[Operand]);
+ if (auto Name = Die.find(dwarf::DW_AT_name))
+ OS << " \"" << Name->getAsCString() << "\"";
+ } else {
+ OS << format(" <invalid base_type ref: 0x%" PRIx64 ">",
+ Operands[Operand]);
+ }
+ } else if (Size == Operation::SizeBlock) {
uint32_t Offset = Operands[Operand];
for (unsigned i = 0; i < Operands[Operand - 1]; ++i)
OS << format(" 0x%02x", Expr->Data.getU8(&Offset));
} else {
if (Signed)
OS << format(" %+" PRId64, (int64_t)Operands[Operand]);
- else
+ else if (Opcode != DW_OP_entry_value &&
+ Opcode != DW_OP_GNU_entry_value)
OS << format(" 0x%" PRIx64, Operands[Operand]);
}
}
@@ -260,17 +286,60 @@ bool DWARFExpression::Operation::print(raw_ostream &OS,
}
void DWARFExpression::print(raw_ostream &OS, const MCRegisterInfo *RegInfo,
- bool IsEH) const {
+ DWARFUnit *U, bool IsEH) const {
+ uint32_t EntryValExprSize = 0;
for (auto &Op : *this) {
- if (!Op.print(OS, this, RegInfo, IsEH)) {
+ if (!Op.print(OS, this, RegInfo, U, IsEH)) {
uint32_t FailOffset = Op.getEndOffset();
while (FailOffset < Data.getData().size())
OS << format(" %02x", Data.getU8(&FailOffset));
return;
}
+
+ if (Op.getCode() == DW_OP_entry_value ||
+ Op.getCode() == DW_OP_GNU_entry_value) {
+ OS << "(";
+ EntryValExprSize = Op.getRawOperand(0);
+ continue;
+ }
+
+ if (EntryValExprSize) {
+ EntryValExprSize--;
+ if (EntryValExprSize == 0)
+ OS << ")";
+ }
+
if (Op.getEndOffset() < Data.getData().size())
OS << ", ";
}
}
+bool DWARFExpression::Operation::verify(DWARFUnit *U) {
+
+ for (unsigned Operand = 0; Operand < 2; ++Operand) {
+ unsigned Size = Desc.Op[Operand];
+
+ if (Size == Operation::SizeNA)
+ break;
+
+ if (Size == Operation::BaseTypeRef) {
+ auto Die = U->getDIEForOffset(U->getOffset() + Operands[Operand]);
+ if (!Die || Die.getTag() != dwarf::DW_TAG_base_type) {
+ Error = true;
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+bool DWARFExpression::verify(DWARFUnit *U) {
+ for (auto &Op : *this)
+ if (!Op.verify(U))
+ return false;
+
+ return true;
+}
+
} // namespace llvm
diff --git a/lib/DebugInfo/DWARF/DWARFFormValue.cpp b/lib/DebugInfo/DWARF/DWARFFormValue.cpp
index 7719fea63120..290d35511cdb 100644
--- a/lib/DebugInfo/DWARF/DWARFFormValue.cpp
+++ b/lib/DebugInfo/DWARF/DWARFFormValue.cpp
@@ -1,9 +1,8 @@
//===- DWARFFormValue.cpp -------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -78,6 +77,34 @@ static const DWARFFormValue::FormClass DWARF5FormClasses[] = {
};
+DWARFFormValue DWARFFormValue::createFromSValue(dwarf::Form F, int64_t V) {
+ return DWARFFormValue(F, ValueType(V));
+}
+
+DWARFFormValue DWARFFormValue::createFromUValue(dwarf::Form F, uint64_t V) {
+ return DWARFFormValue(F, ValueType(V));
+}
+
+DWARFFormValue DWARFFormValue::createFromPValue(dwarf::Form F, const char *V) {
+ return DWARFFormValue(F, ValueType(V));
+}
+
+DWARFFormValue DWARFFormValue::createFromBlockValue(dwarf::Form F,
+ ArrayRef<uint8_t> D) {
+ ValueType V;
+ V.uval = D.size();
+ V.data = D.data();
+ return DWARFFormValue(F, V);
+}
+
+DWARFFormValue DWARFFormValue::createFromUnit(dwarf::Form F, const DWARFUnit *U,
+ uint32_t *OffsetPtr) {
+ DWARFFormValue FormValue(F);
+ FormValue.extractValue(U->getDebugInfoExtractor(), OffsetPtr,
+ U->getFormParams(), U);
+ return FormValue;
+}
+
bool DWARFFormValue::skipValue(dwarf::Form Form, DataExtractor DebugInfoData,
uint32_t *OffsetPtr,
const dwarf::FormParams Params) {
@@ -193,13 +220,17 @@ bool DWARFFormValue::isFormClass(DWARFFormValue::FormClass FC) const {
default:
break;
}
- // In DWARF3 DW_FORM_data4 and DW_FORM_data8 served also as a section offset.
- // Don't check for DWARF version here, as some producers may still do this
- // by mistake. Also accept DW_FORM_[line_]strp since these are
- // .debug_[line_]str section offsets.
- return (Form == DW_FORM_data4 || Form == DW_FORM_data8 ||
- Form == DW_FORM_strp || Form == DW_FORM_line_strp) &&
- FC == FC_SectionOffset;
+
+ if (FC == FC_SectionOffset) {
+ if (Form == DW_FORM_strp || Form == DW_FORM_line_strp)
+ return true;
+ // In DWARF3 DW_FORM_data4 and DW_FORM_data8 served also as a section
+ // offset. If we don't have a DWARFUnit, default to the old behavior.
+ if (Form == DW_FORM_data4 || Form == DW_FORM_data8)
+ return !U || U->getVersion() <= 3;
+ }
+
+ return false;
}
bool DWARFFormValue::extractValue(const DWARFDataExtractor &Data,
@@ -268,7 +299,7 @@ bool DWARFFormValue::extractValue(const DWARFDataExtractor &Data,
case DW_FORM_data8:
case DW_FORM_ref8:
case DW_FORM_ref_sup8:
- Value.uval = Data.getU64(OffsetPtr);
+ Value.uval = Data.getRelocatedValue(8, OffsetPtr);
break;
case DW_FORM_data16:
// Treat this like a 16-byte block.
@@ -323,7 +354,7 @@ bool DWARFFormValue::extractValue(const DWARFDataExtractor &Data,
StringRef Str = Data.getData().substr(*OffsetPtr, Value.uval);
Value.data = nullptr;
if (!Str.empty()) {
- Value.data = reinterpret_cast<const uint8_t *>(Str.data());
+ Value.data = Str.bytes_begin();
*OffsetPtr += Value.uval;
}
}
@@ -333,7 +364,7 @@ bool DWARFFormValue::extractValue(const DWARFDataExtractor &Data,
void DWARFFormValue::dumpSectionedAddress(raw_ostream &OS,
DIDumpOptions DumpOpts,
- SectionedAddress SA) const {
+ object::SectionedAddress SA) const {
OS << format("0x%016" PRIx64, SA.Address);
dumpAddressSection(U->getContext().getDWARFObj(), OS, DumpOpts,
SA.SectionIndex);
@@ -370,12 +401,14 @@ void DWARFFormValue::dump(raw_ostream &OS, DIDumpOptions DumpOpts) const {
case DW_FORM_addrx3:
case DW_FORM_addrx4:
case DW_FORM_GNU_addr_index: {
- Optional<SectionedAddress> A = U->getAddrOffsetSectionItem(UValue);
+ if (U == nullptr) {
+ OS << "<invalid dwarf unit>";
+ break;
+ }
+ Optional<object::SectionedAddress> A = U->getAddrOffsetSectionItem(UValue);
if (!A || DumpOpts.Verbose)
AddrOS << format("indexed (%8.8x) address = ", (uint32_t)UValue);
- if (U == nullptr)
- OS << "<invalid dwarf unit>";
- else if (A)
+ if (A)
dumpSectionedAddress(AddrOS, DumpOpts, *A);
else
OS << "<no .debug_addr section>";
@@ -591,14 +624,15 @@ Optional<uint64_t> DWARFFormValue::getAsAddress() const {
return SA->Address;
return None;
}
-Optional<SectionedAddress> DWARFFormValue::getAsSectionedAddress() const {
+Optional<object::SectionedAddress>
+DWARFFormValue::getAsSectionedAddress() const {
if (!isFormClass(FC_Address))
return None;
if (Form == DW_FORM_GNU_addr_index || Form == DW_FORM_addrx) {
uint32_t Index = Value.uval;
if (!U)
return None;
- Optional<SectionedAddress> SA = U->getAddrOffsetSectionItem(Index);
+ Optional<object::SectionedAddress> SA = U->getAddrOffsetSectionItem(Index);
if (!SA)
return None;
return SA;
@@ -607,6 +641,12 @@ Optional<SectionedAddress> DWARFFormValue::getAsSectionedAddress() const {
}
Optional<uint64_t> DWARFFormValue::getAsReference() const {
+ if (auto R = getAsRelativeReference())
+ return R->Unit ? R->Unit->getOffset() + R->Offset : R->Offset;
+ return None;
+}
+
+Optional<DWARFFormValue::UnitOffset> DWARFFormValue::getAsRelativeReference() const {
if (!isFormClass(FC_Reference))
return None;
switch (Form) {
@@ -617,11 +657,11 @@ Optional<uint64_t> DWARFFormValue::getAsReference() const {
case DW_FORM_ref_udata:
if (!U)
return None;
- return Value.uval + U->getOffset();
+ return UnitOffset{const_cast<DWARFUnit*>(U), Value.uval};
case DW_FORM_ref_addr:
case DW_FORM_ref_sig8:
case DW_FORM_GNU_ref_alt:
- return Value.uval;
+ return UnitOffset{nullptr, Value.uval};
default:
return None;
}
diff --git a/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp b/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp
index 1abd931e3b8b..f5f975578082 100644
--- a/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp
+++ b/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp
@@ -1,9 +1,8 @@
//===- DWARFGdbIndex.cpp --------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -121,7 +120,7 @@ bool DWARFGdbIndex::parseImpl(DataExtractor Data) {
return false;
CuListOffset = Data.getU32(&Offset);
- uint32_t CuTypesOffset = Data.getU32(&Offset);
+ TuListOffset = Data.getU32(&Offset);
AddressAreaOffset = Data.getU32(&Offset);
SymbolTableOffset = Data.getU32(&Offset);
ConstantPoolOffset = Data.getU32(&Offset);
@@ -129,7 +128,7 @@ bool DWARFGdbIndex::parseImpl(DataExtractor Data) {
if (Offset != CuListOffset)
return false;
- uint32_t CuListSize = (CuTypesOffset - CuListOffset) / 16;
+ uint32_t CuListSize = (TuListOffset - CuListOffset) / 16;
CuList.reserve(CuListSize);
for (uint32_t i = 0; i < CuListSize; ++i) {
uint64_t CuOffset = Data.getU64(&Offset);
@@ -139,7 +138,7 @@ bool DWARFGdbIndex::parseImpl(DataExtractor Data) {
// CU Types are no longer needed as DWARF skeleton type units never made it
// into the standard.
- uint32_t TuListSize = (AddressAreaOffset - CuTypesOffset) / 24;
+ uint32_t TuListSize = (AddressAreaOffset - TuListOffset) / 24;
TuList.resize(TuListSize);
for (uint32_t I = 0; I < TuListSize; ++I) {
uint64_t CuOffset = Data.getU64(&Offset);
diff --git a/lib/DebugInfo/DWARF/DWARFListTable.cpp b/lib/DebugInfo/DWARF/DWARFListTable.cpp
index 462c036d73ad..e38e706227da 100644
--- a/lib/DebugInfo/DWARF/DWARFListTable.cpp
+++ b/lib/DebugInfo/DWARF/DWARFListTable.cpp
@@ -1,9 +1,8 @@
//===- DWARFListTable.cpp ---------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -26,7 +25,7 @@ Error DWARFListTableHeader::extract(DWARFDataExtractor Data,
"%s table length at offset 0x%" PRIx32,
SectionName.data(), *OffsetPtr);
// TODO: Add support for DWARF64.
- HeaderData.Length = Data.getU32(OffsetPtr);
+ HeaderData.Length = Data.getRelocatedValue(4, OffsetPtr);
if (HeaderData.Length == 0xffffffffu)
return createStringError(errc::not_supported,
"DWARF64 is not supported in %s at offset 0x%" PRIx32,
@@ -74,7 +73,7 @@ Error DWARFListTableHeader::extract(DWARFDataExtractor Data,
SectionName.data(), HeaderOffset, HeaderData.OffsetEntryCount);
Data.setAddressSize(HeaderData.AddrSize);
for (uint32_t I = 0; I < HeaderData.OffsetEntryCount; ++I)
- Offsets.push_back(Data.getU32(OffsetPtr));
+ Offsets.push_back(Data.getRelocatedValue(4, OffsetPtr));
return Error::success();
}
diff --git a/lib/DebugInfo/DWARF/DWARFTypeUnit.cpp b/lib/DebugInfo/DWARF/DWARFTypeUnit.cpp
index 00be75e1a94d..844920ba5b11 100644
--- a/lib/DebugInfo/DWARF/DWARFTypeUnit.cpp
+++ b/lib/DebugInfo/DWARF/DWARFTypeUnit.cpp
@@ -1,9 +1,8 @@
//===- DWARFTypeUnit.cpp --------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/DWARF/DWARFUnit.cpp b/lib/DebugInfo/DWARF/DWARFUnit.cpp
index 80234665bdeb..b74acf60c747 100644
--- a/lib/DebugInfo/DWARF/DWARFUnit.cpp
+++ b/lib/DebugInfo/DWARF/DWARFUnit.cpp
@@ -1,9 +1,8 @@
//===- DWARFUnit.cpp ------------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -198,7 +197,7 @@ DWARFDataExtractor DWARFUnit::getDebugInfoExtractor() const {
getAddressByteSize());
}
-Optional<SectionedAddress>
+Optional<object::SectionedAddress>
DWARFUnit::getAddrOffsetSectionItem(uint32_t Index) const {
if (IsDWO) {
auto R = Context.info_section_units();
@@ -242,17 +241,21 @@ bool DWARFUnitHeader::extract(DWARFContext &Context,
IndexEntry = Entry;
if (!IndexEntry && Index)
IndexEntry = Index->getFromOffset(*offset_ptr);
- Length = debug_info.getU32(offset_ptr);
- // FIXME: Support DWARF64.
- unsigned SizeOfLength = 4;
+ Length = debug_info.getRelocatedValue(4, offset_ptr);
FormParams.Format = DWARF32;
+ unsigned SizeOfLength = 4;
+ if (Length == 0xffffffff) {
+ Length = debug_info.getU64(offset_ptr);
+ FormParams.Format = DWARF64;
+ SizeOfLength = 8;
+ }
FormParams.Version = debug_info.getU16(offset_ptr);
if (FormParams.Version >= 5) {
UnitType = debug_info.getU8(offset_ptr);
FormParams.AddrSize = debug_info.getU8(offset_ptr);
- AbbrOffset = debug_info.getU32(offset_ptr);
+ AbbrOffset = debug_info.getRelocatedValue(FormParams.getDwarfOffsetByteSize(), offset_ptr);
} else {
- AbbrOffset = debug_info.getRelocatedValue(4, offset_ptr);
+ AbbrOffset = debug_info.getRelocatedValue(FormParams.getDwarfOffsetByteSize(), offset_ptr);
FormParams.AddrSize = debug_info.getU8(offset_ptr);
// Fake a unit type based on the section type. This isn't perfect,
// but distinguishing compile and type units is generally enough.
@@ -432,12 +435,17 @@ size_t DWARFUnit::extractDIEsIfNeeded(bool CUDieOnly) {
// which may differ from the unit's format.
DWARFDataExtractor DA(Context.getDWARFObj(), StringOffsetSection,
isLittleEndian, 0);
- if (IsDWO)
- StringOffsetsTableContribution =
- determineStringOffsetsTableContributionDWO(DA);
- else if (getVersion() >= 5)
- StringOffsetsTableContribution =
- determineStringOffsetsTableContribution(DA);
+ if (IsDWO || getVersion() >= 5) {
+ auto StringOffsetOrError =
+ IsDWO ? determineStringOffsetsTableContributionDWO(DA)
+ : determineStringOffsetsTableContribution(DA);
+ if (!StringOffsetOrError) {
+ WithColor::error() << "invalid contribution to string offsets table in section .debug_str_offsets[.dwo]: "
+ << toString(StringOffsetOrError.takeError()) << '\n';
+ } else {
+ StringOffsetsTableContribution = *StringOffsetOrError;
+ }
+ }
// DWARF v5 uses the .debug_rnglists and .debug_rnglists.dwo sections to
// describe address ranges.
@@ -634,7 +642,7 @@ DWARFUnit::getInlinedChainForAddress(uint64_t Address,
// First, find the subroutine that contains the given address (the leaf
// of inlined chain).
DWARFDie SubroutineDIE =
- (DWO ? DWO.get() : this)->getSubroutineForAddress(Address);
+ (DWO ? *DWO : *this).getSubroutineForAddress(Address);
if (!SubroutineDIE)
return;
@@ -745,7 +753,7 @@ const DWARFAbbreviationDeclarationSet *DWARFUnit::getAbbreviations() const {
return Abbrevs;
}
-llvm::Optional<SectionedAddress> DWARFUnit::getBaseAddress() {
+llvm::Optional<object::SectionedAddress> DWARFUnit::getBaseAddress() {
if (BaseAddr)
return BaseAddr;
@@ -755,7 +763,7 @@ llvm::Optional<SectionedAddress> DWARFUnit::getBaseAddress() {
return BaseAddr;
}
-Optional<StrOffsetsContributionDescriptor>
+Expected<StrOffsetsContributionDescriptor>
StrOffsetsContributionDescriptor::validateContributionSize(
DWARFDataExtractor &DA) {
uint8_t EntrySize = getDwarfOffsetByteSize();
@@ -766,58 +774,94 @@ StrOffsetsContributionDescriptor::validateContributionSize(
if (ValidationSize >= Size)
if (DA.isValidOffsetForDataOfSize((uint32_t)Base, ValidationSize))
return *this;
- return None;
+ return createStringError(errc::invalid_argument, "length exceeds section size");
}
// Look for a DWARF64-formatted contribution to the string offsets table
// starting at a given offset and record it in a descriptor.
-static Optional<StrOffsetsContributionDescriptor>
+static Expected<StrOffsetsContributionDescriptor>
parseDWARF64StringOffsetsTableHeader(DWARFDataExtractor &DA, uint32_t Offset) {
if (!DA.isValidOffsetForDataOfSize(Offset, 16))
- return None;
+ return createStringError(errc::invalid_argument, "section offset exceeds section size");
if (DA.getU32(&Offset) != 0xffffffff)
- return None;
+ return createStringError(errc::invalid_argument, "32 bit contribution referenced from a 64 bit unit");
uint64_t Size = DA.getU64(&Offset);
uint8_t Version = DA.getU16(&Offset);
(void)DA.getU16(&Offset); // padding
// The encoded length includes the 2-byte version field and the 2-byte
// padding, so we need to subtract them out when we populate the descriptor.
- return {{Offset, Size - 4, Version, DWARF64}};
+ return StrOffsetsContributionDescriptor(Offset, Size - 4, Version, DWARF64);
}
// Look for a DWARF32-formatted contribution to the string offsets table
// starting at a given offset and record it in a descriptor.
-static Optional<StrOffsetsContributionDescriptor>
+static Expected<StrOffsetsContributionDescriptor>
parseDWARF32StringOffsetsTableHeader(DWARFDataExtractor &DA, uint32_t Offset) {
if (!DA.isValidOffsetForDataOfSize(Offset, 8))
- return None;
+ return createStringError(errc::invalid_argument, "section offset exceeds section size");
+
uint32_t ContributionSize = DA.getU32(&Offset);
if (ContributionSize >= 0xfffffff0)
- return None;
+ return createStringError(errc::invalid_argument, "invalid length");
+
uint8_t Version = DA.getU16(&Offset);
(void)DA.getU16(&Offset); // padding
// The encoded length includes the 2-byte version field and the 2-byte
// padding, so we need to subtract them out when we populate the descriptor.
- return {{Offset, ContributionSize - 4, Version, DWARF32}};
+ return StrOffsetsContributionDescriptor(Offset, ContributionSize - 4, Version,
+ DWARF32);
+}
+
+static Expected<StrOffsetsContributionDescriptor>
+parseDWARFStringOffsetsTableHeader(DWARFDataExtractor &DA,
+ llvm::dwarf::DwarfFormat Format,
+ uint64_t Offset) {
+ StrOffsetsContributionDescriptor Desc;
+ switch (Format) {
+ case dwarf::DwarfFormat::DWARF64: {
+ if (Offset < 16)
+ return createStringError(errc::invalid_argument, "insufficient space for 64 bit header prefix");
+ auto DescOrError = parseDWARF64StringOffsetsTableHeader(DA, (uint32_t)Offset - 16);
+ if (!DescOrError)
+ return DescOrError.takeError();
+ Desc = *DescOrError;
+ break;
+ }
+ case dwarf::DwarfFormat::DWARF32: {
+ if (Offset < 8)
+ return createStringError(errc::invalid_argument, "insufficient space for 32 bit header prefix");
+ auto DescOrError = parseDWARF32StringOffsetsTableHeader(DA, (uint32_t)Offset - 8);
+ if (!DescOrError)
+ return DescOrError.takeError();
+ Desc = *DescOrError;
+ break;
+ }
+ }
+ return Desc.validateContributionSize(DA);
}
-Optional<StrOffsetsContributionDescriptor>
+Expected<Optional<StrOffsetsContributionDescriptor>>
DWARFUnit::determineStringOffsetsTableContribution(DWARFDataExtractor &DA) {
- auto Offset = toSectionOffset(getUnitDIE().find(DW_AT_str_offsets_base), 0);
- Optional<StrOffsetsContributionDescriptor> Descriptor;
- // Attempt to find a DWARF64 contribution 16 bytes before the base.
- if (Offset >= 16)
- Descriptor =
- parseDWARF64StringOffsetsTableHeader(DA, (uint32_t)Offset - 16);
- // Try to find a DWARF32 contribution 8 bytes before the base.
- if (!Descriptor && Offset >= 8)
- Descriptor = parseDWARF32StringOffsetsTableHeader(DA, (uint32_t)Offset - 8);
- return Descriptor ? Descriptor->validateContributionSize(DA) : Descriptor;
-}
-
-Optional<StrOffsetsContributionDescriptor>
+ uint64_t Offset;
+ if (IsDWO) {
+ Offset = 0;
+ if (DA.getData().data() == nullptr)
+ return None;
+ } else {
+ auto OptOffset = toSectionOffset(getUnitDIE().find(DW_AT_str_offsets_base));
+ if (!OptOffset)
+ return None;
+ Offset = *OptOffset;
+ }
+ auto DescOrError = parseDWARFStringOffsetsTableHeader(DA, Header.getFormat(), Offset);
+ if (!DescOrError)
+ return DescOrError.takeError();
+ return *DescOrError;
+}
+
+Expected<Optional<StrOffsetsContributionDescriptor>>
DWARFUnit::determineStringOffsetsTableContributionDWO(DWARFDataExtractor & DA) {
uint64_t Offset = 0;
auto IndexEntry = Header.getIndexEntry();
@@ -826,19 +870,24 @@ DWARFUnit::determineStringOffsetsTableContributionDWO(DWARFDataExtractor & DA) {
if (C)
Offset = C->Offset;
if (getVersion() >= 5) {
+ if (DA.getData().data() == nullptr)
+ return None;
+ Offset += Header.getFormat() == dwarf::DwarfFormat::DWARF32 ? 8 : 16;
// Look for a valid contribution at the given offset.
- auto Descriptor =
- parseDWARF64StringOffsetsTableHeader(DA, (uint32_t)Offset);
- if (!Descriptor)
- Descriptor = parseDWARF32StringOffsetsTableHeader(DA, (uint32_t)Offset);
- return Descriptor ? Descriptor->validateContributionSize(DA) : Descriptor;
+ auto DescOrError = parseDWARFStringOffsetsTableHeader(DA, Header.getFormat(), Offset);
+ if (!DescOrError)
+ return DescOrError.takeError();
+ return *DescOrError;
}
// Prior to DWARF v5, we derive the contribution size from the
// index table (in a package file). In a .dwo file it is simply
// the length of the string offsets section.
if (!IndexEntry)
- return {{0, StringOffsetSection.Data.size(), 4, DWARF32}};
+ return {
+ Optional<StrOffsetsContributionDescriptor>(
+ {0, StringOffsetSection.Data.size(), 4, DWARF32})};
if (C)
- return {{C->Offset, C->Length, 4, DWARF32}};
+ return {Optional<StrOffsetsContributionDescriptor>(
+ {C->Offset, C->Length, 4, DWARF32})};
return None;
}
diff --git a/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp b/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp
index 84b6c4b81817..047c63461ccf 100644
--- a/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp
+++ b/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp
@@ -1,9 +1,8 @@
//===- DWARFUnitIndex.cpp -------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -173,10 +172,9 @@ DWARFUnitIndex::getFromOffset(uint32_t Offset) const {
E2->Contributions[InfoColumn].Offset;
});
}
- auto I =
- llvm::upper_bound(OffsetLookup, Offset, [&](uint32_t Offset, Entry *E2) {
- return Offset < E2->Contributions[InfoColumn].Offset;
- });
+ auto I = partition_point(OffsetLookup, [&](Entry *E2) {
+ return E2->Contributions[InfoColumn].Offset <= Offset;
+ });
if (I == OffsetLookup.begin())
return nullptr;
--I;
diff --git a/lib/DebugInfo/DWARF/DWARFVerifier.cpp b/lib/DebugInfo/DWARF/DWARFVerifier.cpp
index f8370178b627..c2b3189514a8 100644
--- a/lib/DebugInfo/DWARF/DWARFVerifier.cpp
+++ b/lib/DebugInfo/DWARF/DWARFVerifier.cpp
@@ -1,9 +1,8 @@
//===- DWARFVerifier.cpp --------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/DWARF/DWARFVerifier.h"
@@ -61,55 +60,47 @@ DWARFVerifier::DieRangeInfo::insert(const DieRangeInfo &RI) {
}
bool DWARFVerifier::DieRangeInfo::contains(const DieRangeInfo &RHS) const {
- // Both list of ranges are sorted so we can make this fast.
-
- if (Ranges.empty() || RHS.Ranges.empty())
- return false;
-
- // Since the ranges are sorted we can advance where we start searching with
- // this object's ranges as we traverse RHS.Ranges.
- auto End = Ranges.end();
- auto Iter = findRange(RHS.Ranges.front());
+ auto I1 = Ranges.begin(), E1 = Ranges.end();
+ auto I2 = RHS.Ranges.begin(), E2 = RHS.Ranges.end();
+ if (I2 == E2)
+ return true;
- // Now linearly walk the ranges in this object and see if they contain each
- // ranges from RHS.Ranges.
- for (const auto &R : RHS.Ranges) {
- while (Iter != End) {
- if (Iter->contains(R))
- break;
- ++Iter;
+ DWARFAddressRange R = *I2;
+ while (I1 != E1) {
+ bool Covered = I1->LowPC <= R.LowPC;
+ if (R.LowPC == R.HighPC || (Covered && R.HighPC <= I1->HighPC)) {
+ if (++I2 == E2)
+ return true;
+ R = *I2;
+ continue;
}
- if (Iter == End)
+ if (!Covered)
return false;
+ if (R.LowPC < I1->HighPC)
+ R.LowPC = I1->HighPC;
+ ++I1;
}
- return true;
+ return false;
}
bool DWARFVerifier::DieRangeInfo::intersects(const DieRangeInfo &RHS) const {
- if (Ranges.empty() || RHS.Ranges.empty())
- return false;
-
- auto End = Ranges.end();
- auto Iter = findRange(RHS.Ranges.front());
- for (const auto &R : RHS.Ranges) {
- if (Iter == End)
- return false;
- if (R.HighPC <= Iter->LowPC)
- continue;
- while (Iter != End) {
- if (Iter->intersects(R))
- return true;
- ++Iter;
- }
+ auto I1 = Ranges.begin(), E1 = Ranges.end();
+ auto I2 = RHS.Ranges.begin(), E2 = RHS.Ranges.end();
+ while (I1 != E1 && I2 != E2) {
+ if (I1->intersects(*I2))
+ return true;
+ if (I1->LowPC < I2->LowPC)
+ ++I1;
+ else
+ ++I2;
}
-
return false;
}
bool DWARFVerifier::verifyUnitHeader(const DWARFDataExtractor DebugInfoData,
uint32_t *Offset, unsigned UnitIndex,
uint8_t &UnitType, bool &isUnitDWARF64) {
- uint32_t AbbrOffset, Length;
+ uint64_t AbbrOffset, Length;
uint8_t AddrSize = 0;
uint16_t Version;
bool Success = true;
@@ -123,22 +114,19 @@ bool DWARFVerifier::verifyUnitHeader(const DWARFDataExtractor DebugInfoData,
uint32_t OffsetStart = *Offset;
Length = DebugInfoData.getU32(Offset);
if (Length == UINT32_MAX) {
+ Length = DebugInfoData.getU64(Offset);
isUnitDWARF64 = true;
- OS << format(
- "Unit[%d] is in 64-bit DWARF format; cannot verify from this point.\n",
- UnitIndex);
- return false;
}
Version = DebugInfoData.getU16(Offset);
if (Version >= 5) {
UnitType = DebugInfoData.getU8(Offset);
AddrSize = DebugInfoData.getU8(Offset);
- AbbrOffset = DebugInfoData.getU32(Offset);
+ AbbrOffset = isUnitDWARF64 ? DebugInfoData.getU64(Offset) : DebugInfoData.getU32(Offset);
ValidType = dwarf::isUnitType(UnitType);
} else {
UnitType = 0;
- AbbrOffset = DebugInfoData.getU32(Offset);
+ AbbrOffset = isUnitDWARF64 ? DebugInfoData.getU64(Offset) : DebugInfoData.getU32(Offset);
AddrSize = DebugInfoData.getU8(Offset);
}
@@ -166,7 +154,7 @@ bool DWARFVerifier::verifyUnitHeader(const DWARFDataExtractor DebugInfoData,
if (!ValidAddrSize)
note() << "The address size is unsupported.\n";
}
- *Offset = OffsetStart + Length + 4;
+ *Offset = OffsetStart + Length + (isUnitDWARF64 ? 12 : 4);
return Success;
}
@@ -179,21 +167,11 @@ unsigned DWARFVerifier::verifyUnitContents(DWARFUnit &Unit) {
if (Die.getTag() == DW_TAG_null)
continue;
- bool HasTypeAttr = false;
for (auto AttrValue : Die.attributes()) {
NumUnitErrors += verifyDebugInfoAttribute(Die, AttrValue);
NumUnitErrors += verifyDebugInfoForm(Die, AttrValue);
- HasTypeAttr |= (AttrValue.Attr == DW_AT_type);
}
- if (!HasTypeAttr && (Die.getTag() == DW_TAG_formal_parameter ||
- Die.getTag() == DW_TAG_variable ||
- Die.getTag() == DW_TAG_array_type)) {
- error() << "DIE with tag " << TagString(Die.getTag())
- << " is missing type attribute:\n";
- dump(Die) << '\n';
- NumUnitErrors++;
- }
NumUnitErrors += verifyDebugInfoCallSite(Die);
}
@@ -281,19 +259,12 @@ bool DWARFVerifier::handleDebugAbbrev() {
OS << "Verifying .debug_abbrev...\n";
const DWARFObject &DObj = DCtx.getDWARFObj();
- bool noDebugAbbrev = DObj.getAbbrevSection().empty();
- bool noDebugAbbrevDWO = DObj.getAbbrevDWOSection().empty();
-
- if (noDebugAbbrev && noDebugAbbrevDWO) {
- return true;
- }
-
unsigned NumErrors = 0;
- if (!noDebugAbbrev)
+ if (!DObj.getAbbrevSection().empty())
NumErrors += verifyAbbrevSection(DCtx.getDebugAbbrev());
-
- if (!noDebugAbbrevDWO)
+ if (!DObj.getAbbrevDWOSection().empty())
NumErrors += verifyAbbrevSection(DCtx.getDebugAbbrevDWO());
+
return NumErrors == 0;
}
@@ -503,7 +474,7 @@ unsigned DWARFVerifier::verifyDebugInfoAttribute(const DWARFDie &Die,
bool Error = llvm::any_of(Expression, [](DWARFExpression::Operation &Op) {
return Op.isError();
});
- if (Error)
+ if (Error || !Expression.verify(U))
ReportError("DIE contains invalid DWARF expression:");
};
if (Optional<ArrayRef<uint8_t>> Expr = AttrValue.Value.getAsBlock()) {
@@ -629,7 +600,7 @@ unsigned DWARFVerifier::verifyDebugInfoForm(const DWARFDie &Die,
dump(Die) << '\n';
break;
}
- // Check that the index is within the bounds of the section.
+ // Check that the index is within the bounds of the section.
unsigned ItemSize = DieCU->getDwarfStringOffsetsByteSize();
// Use a 64-bit type to calculate the offset to guard against overflow.
uint64_t Offset =
@@ -664,9 +635,9 @@ unsigned DWARFVerifier::verifyDebugInfoReferences() {
// getting the DIE by offset and emitting an error
OS << "Verifying .debug_info references...\n";
unsigned NumErrors = 0;
- for (auto Pair : ReferenceToDIEOffsets) {
- auto Die = DCtx.getDIEForOffset(Pair.first);
- if (Die)
+ for (const std::pair<uint64_t, std::set<uint32_t>> &Pair :
+ ReferenceToDIEOffsets) {
+ if (DCtx.getDIEForOffset(Pair.first))
continue;
++NumErrors;
error() << "invalid DIE reference " << format("0x%08" PRIx64, Pair.first)
@@ -731,7 +702,6 @@ void DWARFVerifier::verifyDebugLineRows() {
continue;
// Verify prologue.
- uint32_t MaxFileIndex = LineTable->Prologue.FileNames.size();
uint32_t MaxDirIndex = LineTable->Prologue.IncludeDirectories.size();
uint32_t FileIndex = 1;
StringMap<uint16_t> FullPathMap;
@@ -773,7 +743,7 @@ void DWARFVerifier::verifyDebugLineRows() {
uint32_t RowIndex = 0;
for (const auto &Row : LineTable->Rows) {
// Verify row address.
- if (Row.Address < PrevAddress) {
+ if (Row.Address.Address < PrevAddress) {
++NumDebugLineErrors;
error() << ".debug_line["
<< format("0x%08" PRIx64,
@@ -789,13 +759,16 @@ void DWARFVerifier::verifyDebugLineRows() {
}
// Verify file index.
- if (Row.File > MaxFileIndex) {
+ if (!LineTable->hasFileAtIndex(Row.File)) {
++NumDebugLineErrors;
+ bool isDWARF5 = LineTable->Prologue.getVersion() >= 5;
error() << ".debug_line["
<< format("0x%08" PRIx64,
*toSectionOffset(Die.find(DW_AT_stmt_list)))
<< "][" << RowIndex << "] has invalid file index " << Row.File
- << " (valid values are [1," << MaxFileIndex << "]):\n";
+ << " (valid values are [" << (isDWARF5 ? "0," : "1,")
+ << LineTable->Prologue.FileNames.size()
+ << (isDWARF5 ? ")" : "]") << "):\n";
DWARFDebugLine::Row::dumpTableHeader(OS);
Row.dump(OS);
OS << '\n';
@@ -803,7 +776,7 @@ void DWARFVerifier::verifyDebugLineRows() {
if (Row.EndSequence)
PrevAddress = 0;
else
- PrevAddress = Row.Address;
+ PrevAddress = Row.Address.Address;
++RowIndex;
}
}
diff --git a/lib/DebugInfo/GSYM/FunctionInfo.cpp b/lib/DebugInfo/GSYM/FunctionInfo.cpp
new file mode 100644
index 000000000000..55c36a55b4be
--- /dev/null
+++ b/lib/DebugInfo/GSYM/FunctionInfo.cpp
@@ -0,0 +1,22 @@
+//===- FunctionInfo.cpp -----------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/GSYM/FunctionInfo.h"
+
+using namespace llvm;
+using namespace gsym;
+
+raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const FunctionInfo &FI) {
+ OS << '[' << HEX64(FI.Range.Start) << '-' << HEX64(FI.Range.End) << "): "
+ << "Name=" << HEX32(FI.Name) << '\n';
+ for (const auto &Line : FI.Lines)
+ OS << Line << '\n';
+ OS << FI.Inline;
+ return OS;
+}
diff --git a/lib/DebugInfo/GSYM/InlineInfo.cpp b/lib/DebugInfo/GSYM/InlineInfo.cpp
new file mode 100644
index 000000000000..781c1755241d
--- /dev/null
+++ b/lib/DebugInfo/GSYM/InlineInfo.cpp
@@ -0,0 +1,59 @@
+//===- InlineInfo.cpp -------------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/GSYM/FileEntry.h"
+#include "llvm/DebugInfo/GSYM/InlineInfo.h"
+#include <algorithm>
+#include <inttypes.h>
+
+using namespace llvm;
+using namespace gsym;
+
+
+raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const InlineInfo &II) {
+ if (!II.isValid())
+ return OS;
+ bool First = true;
+ for (auto Range : II.Ranges) {
+ if (First)
+ First = false;
+ else
+ OS << ' ';
+ OS << Range;
+ }
+ OS << " Name = " << HEX32(II.Name) << ", CallFile = " << II.CallFile
+ << ", CallLine = " << II.CallFile << '\n';
+ for (const auto &Child : II.Children)
+ OS << Child;
+ return OS;
+}
+
+static bool getInlineStackHelper(const InlineInfo &II, uint64_t Addr,
+ std::vector<const InlineInfo *> &InlineStack) {
+ if (II.Ranges.contains(Addr)) {
+ // If this is the top level that represents the concrete function,
+ // there will be no name and we shoud clear the inline stack. Otherwise
+ // we have found an inline call stack that we need to insert.
+ if (II.Name != 0)
+ InlineStack.insert(InlineStack.begin(), &II);
+ for (const auto &Child : II.Children) {
+ if (::getInlineStackHelper(Child, Addr, InlineStack))
+ break;
+ }
+ return !InlineStack.empty();
+ }
+ return false;
+}
+
+llvm::Optional<InlineInfo::InlineArray> InlineInfo::getInlineStack(uint64_t Addr) const {
+ InlineArray Result;
+ if (getInlineStackHelper(*this, Addr, Result))
+ return Result;
+ return llvm::None;
+}
diff --git a/lib/DebugInfo/GSYM/Range.cpp b/lib/DebugInfo/GSYM/Range.cpp
new file mode 100644
index 000000000000..ca61984dacbd
--- /dev/null
+++ b/lib/DebugInfo/GSYM/Range.cpp
@@ -0,0 +1,55 @@
+//===- Range.cpp ------------------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/GSYM/Range.h"
+#include <algorithm>
+#include <inttypes.h>
+
+using namespace llvm;
+using namespace gsym;
+
+
+void AddressRanges::insert(AddressRange Range) {
+ if (Range.size() == 0)
+ return;
+
+ auto It = llvm::upper_bound(Ranges, Range);
+ auto It2 = It;
+ while (It2 != Ranges.end() && It2->Start < Range.End)
+ ++It2;
+ if (It != It2) {
+ Range.End = std::max(Range.End, It2[-1].End);
+ It = Ranges.erase(It, It2);
+ }
+ if (It != Ranges.begin() && Range.Start < It[-1].End)
+ It[-1].End = std::max(It[-1].End, Range.End);
+ else
+ Ranges.insert(It, Range);
+}
+
+bool AddressRanges::contains(uint64_t Addr) const {
+ auto It = std::partition_point(
+ Ranges.begin(), Ranges.end(),
+ [=](const AddressRange &R) { return R.Start <= Addr; });
+ return It != Ranges.begin() && Addr < It[-1].End;
+}
+
+raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const AddressRange &R) {
+ return OS << '[' << HEX64(R.Start) << " - " << HEX64(R.End) << ")";
+}
+
+raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const AddressRanges &AR) {
+ size_t Size = AR.size();
+ for (size_t I = 0; I < Size; ++I) {
+ if (I)
+ OS << ' ';
+ OS << AR[I];
+ }
+ return OS;
+}
diff --git a/lib/DebugInfo/MSF/MSFBuilder.cpp b/lib/DebugInfo/MSF/MSFBuilder.cpp
index 71609919558a..c6fe764ab7e0 100644
--- a/lib/DebugInfo/MSF/MSFBuilder.cpp
+++ b/lib/DebugInfo/MSF/MSFBuilder.cpp
@@ -1,9 +1,8 @@
//===- MSFBuilder.cpp -----------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/MSF/MSFCommon.cpp b/lib/DebugInfo/MSF/MSFCommon.cpp
index d398304375ac..fb4f0700059c 100644
--- a/lib/DebugInfo/MSF/MSFCommon.cpp
+++ b/lib/DebugInfo/MSF/MSFCommon.cpp
@@ -1,9 +1,8 @@
//===- MSFCommon.cpp - Common types and functions for MSF files -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/MSF/MSFError.cpp b/lib/DebugInfo/MSF/MSFError.cpp
index bfac6bebba3f..b368b802c564 100644
--- a/lib/DebugInfo/MSF/MSFError.cpp
+++ b/lib/DebugInfo/MSF/MSFError.cpp
@@ -1,9 +1,8 @@
//===- MSFError.cpp - Error extensions for MSF files ------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -14,6 +13,7 @@
using namespace llvm;
using namespace llvm::msf;
+namespace {
// FIXME: This class is only here to support the transition to llvm::Error. It
// will be removed once this transition is complete. Clients should prefer to
// deal with the Error value directly, rather than converting to error_code.
@@ -39,6 +39,7 @@ public:
llvm_unreachable("Unrecognized msf_error_code");
}
};
+} // namespace
static llvm::ManagedStatic<MSFErrorCategory> MSFCategory;
const std::error_category &llvm::msf::MSFErrCategory() { return *MSFCategory; }
diff --git a/lib/DebugInfo/MSF/MappedBlockStream.cpp b/lib/DebugInfo/MSF/MappedBlockStream.cpp
index dec28eb30697..df925771f0d9 100644
--- a/lib/DebugInfo/MSF/MappedBlockStream.cpp
+++ b/lib/DebugInfo/MSF/MappedBlockStream.cpp
@@ -1,9 +1,8 @@
//===- MappedBlockStream.cpp - Reads stream data from an MSF file ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/DIA/DIADataStream.cpp b/lib/DebugInfo/PDB/DIA/DIADataStream.cpp
index 6a10513fad97..8a806f298d0f 100644
--- a/lib/DebugInfo/PDB/DIA/DIADataStream.cpp
+++ b/lib/DebugInfo/PDB/DIA/DIADataStream.cpp
@@ -1,9 +1,8 @@
//===- DIADataStream.cpp - DIA implementation of IPDBDataStream -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/DIA/DIAEnumDebugStreams.cpp b/lib/DebugInfo/PDB/DIA/DIAEnumDebugStreams.cpp
index d2451f13e6cb..e4cb4daf94b1 100644
--- a/lib/DebugInfo/PDB/DIA/DIAEnumDebugStreams.cpp
+++ b/lib/DebugInfo/PDB/DIA/DIAEnumDebugStreams.cpp
@@ -1,9 +1,8 @@
//==- DIAEnumDebugStreams.cpp - DIA Debug Stream Enumerator impl -*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/DIA/DIAEnumFrameData.cpp b/lib/DebugInfo/PDB/DIA/DIAEnumFrameData.cpp
index f873f3525df5..8a181b448a27 100644
--- a/lib/DebugInfo/PDB/DIA/DIAEnumFrameData.cpp
+++ b/lib/DebugInfo/PDB/DIA/DIAEnumFrameData.cpp
@@ -1,9 +1,8 @@
//==- DIAEnumFrameData.cpp ---------------------------------------*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/DIA/DIAEnumInjectedSources.cpp b/lib/DebugInfo/PDB/DIA/DIAEnumInjectedSources.cpp
index 6c361b81e33d..7226ab2ba0a0 100644
--- a/lib/DebugInfo/PDB/DIA/DIAEnumInjectedSources.cpp
+++ b/lib/DebugInfo/PDB/DIA/DIAEnumInjectedSources.cpp
@@ -1,9 +1,8 @@
//==- DIAEnumSourceFiles.cpp - DIA Source File Enumerator impl ---*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/DIA/DIAEnumLineNumbers.cpp b/lib/DebugInfo/PDB/DIA/DIAEnumLineNumbers.cpp
index 0820d9dc7c9f..6f1d7733fb2d 100644
--- a/lib/DebugInfo/PDB/DIA/DIAEnumLineNumbers.cpp
+++ b/lib/DebugInfo/PDB/DIA/DIAEnumLineNumbers.cpp
@@ -1,9 +1,8 @@
//==- DIAEnumLineNumbers.cpp - DIA Line Number Enumerator impl ---*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/DIA/DIAEnumSectionContribs.cpp b/lib/DebugInfo/PDB/DIA/DIAEnumSectionContribs.cpp
index 90c857aa5713..4f9b232a024a 100644
--- a/lib/DebugInfo/PDB/DIA/DIAEnumSectionContribs.cpp
+++ b/lib/DebugInfo/PDB/DIA/DIAEnumSectionContribs.cpp
@@ -1,9 +1,8 @@
//==- DIAEnumSectionContribs.cpp ---------------------------------*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/DIA/DIAEnumSourceFiles.cpp b/lib/DebugInfo/PDB/DIA/DIAEnumSourceFiles.cpp
index 06595e7ec1c8..943e9e1b4d58 100644
--- a/lib/DebugInfo/PDB/DIA/DIAEnumSourceFiles.cpp
+++ b/lib/DebugInfo/PDB/DIA/DIAEnumSourceFiles.cpp
@@ -1,9 +1,8 @@
//==- DIAEnumSourceFiles.cpp - DIA Source File Enumerator impl ---*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/DIA/DIAEnumSymbols.cpp b/lib/DebugInfo/PDB/DIA/DIAEnumSymbols.cpp
index 48bc32767e6c..5153596d52ae 100644
--- a/lib/DebugInfo/PDB/DIA/DIAEnumSymbols.cpp
+++ b/lib/DebugInfo/PDB/DIA/DIAEnumSymbols.cpp
@@ -1,9 +1,8 @@
//==- DIAEnumSymbols.cpp - DIA Symbol Enumerator impl ------------*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/DIA/DIAEnumTables.cpp b/lib/DebugInfo/PDB/DIA/DIAEnumTables.cpp
index 6fa096156d48..335b575d6542 100644
--- a/lib/DebugInfo/PDB/DIA/DIAEnumTables.cpp
+++ b/lib/DebugInfo/PDB/DIA/DIAEnumTables.cpp
@@ -1,9 +1,8 @@
//===- DIAEnumTables.cpp - DIA Table Enumerator Impl ------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/DIA/DIAFrameData.cpp b/lib/DebugInfo/PDB/DIA/DIAFrameData.cpp
index 533cce7923c0..7975156b1abd 100644
--- a/lib/DebugInfo/PDB/DIA/DIAFrameData.cpp
+++ b/lib/DebugInfo/PDB/DIA/DIAFrameData.cpp
@@ -1,9 +1,8 @@
//===- DIAFrameData.cpp - DIA impl. of IPDBFrameData -------------- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/DIA/DIAInjectedSource.cpp b/lib/DebugInfo/PDB/DIA/DIAInjectedSource.cpp
index 1d642f221d79..032b230b5faa 100644
--- a/lib/DebugInfo/PDB/DIA/DIAInjectedSource.cpp
+++ b/lib/DebugInfo/PDB/DIA/DIAInjectedSource.cpp
@@ -1,9 +1,8 @@
//===- DIAInjectedSource.cpp - DIA impl for IPDBInjectedSource --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -42,11 +41,11 @@ std::string DIAInjectedSource::getVirtualFileName() const {
&IDiaInjectedSource::get_virtualFilename);
}
-PDB_SourceCompression DIAInjectedSource::getCompression() const {
+uint32_t DIAInjectedSource::getCompression() const {
DWORD Compression = 0;
if (S_OK != SourceFile->get_sourceCompression(&Compression))
return PDB_SourceCompression::None;
- return static_cast<PDB_SourceCompression>(Compression);
+ return static_cast<uint32_t>(Compression);
}
std::string DIAInjectedSource::getCode() const {
diff --git a/lib/DebugInfo/PDB/DIA/DIALineNumber.cpp b/lib/DebugInfo/PDB/DIA/DIALineNumber.cpp
index b19be6b595ab..3af02ea36c7b 100644
--- a/lib/DebugInfo/PDB/DIA/DIALineNumber.cpp
+++ b/lib/DebugInfo/PDB/DIA/DIALineNumber.cpp
@@ -1,9 +1,8 @@
//===- DIALineNumber.cpp - DIA implementation of IPDBLineNumber -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp b/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp
index cd4d00a13b18..a8ae076e1d6c 100644
--- a/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp
+++ b/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp
@@ -1,9 +1,8 @@
//===- DIARawSymbol.cpp - DIA implementation of IPDBRawSymbol ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/DIA/DIASectionContrib.cpp b/lib/DebugInfo/PDB/DIA/DIASectionContrib.cpp
index 8e233ca15161..e2d928f2c4b2 100644
--- a/lib/DebugInfo/PDB/DIA/DIASectionContrib.cpp
+++ b/lib/DebugInfo/PDB/DIA/DIASectionContrib.cpp
@@ -1,9 +1,8 @@
//===- DIASectionContrib.cpp - DIA impl. of IPDBSectionContrib ---- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/DIA/DIASession.cpp b/lib/DebugInfo/PDB/DIA/DIASession.cpp
index bd375e172ac0..4e0b8587c613 100644
--- a/lib/DebugInfo/PDB/DIA/DIASession.cpp
+++ b/lib/DebugInfo/PDB/DIA/DIASession.cpp
@@ -1,9 +1,8 @@
//===- DIASession.cpp - DIA implementation of IPDBSession -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/PDB/DIA/DIASession.h"
diff --git a/lib/DebugInfo/PDB/DIA/DIASourceFile.cpp b/lib/DebugInfo/PDB/DIA/DIASourceFile.cpp
index d3e408166a87..21e757c3a060 100644
--- a/lib/DebugInfo/PDB/DIA/DIASourceFile.cpp
+++ b/lib/DebugInfo/PDB/DIA/DIASourceFile.cpp
@@ -1,9 +1,8 @@
//===- DIASourceFile.cpp - DIA implementation of IPDBSourceFile -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/DIA/DIATable.cpp b/lib/DebugInfo/PDB/DIA/DIATable.cpp
index 6017081b2cb6..33d74abd740e 100644
--- a/lib/DebugInfo/PDB/DIA/DIATable.cpp
+++ b/lib/DebugInfo/PDB/DIA/DIATable.cpp
@@ -1,9 +1,8 @@
//===- DIATable.cpp - DIA implementation of IPDBTable -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/GenericError.cpp b/lib/DebugInfo/PDB/GenericError.cpp
index 256952073e88..70dc094c42ec 100644
--- a/lib/DebugInfo/PDB/GenericError.cpp
+++ b/lib/DebugInfo/PDB/GenericError.cpp
@@ -1,9 +1,8 @@
//===- Error.cpp - system_error extensions for PDB --------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -14,6 +13,7 @@
using namespace llvm;
using namespace llvm::pdb;
+namespace {
// FIXME: This class is only here to support the transition to llvm::Error. It
// will be removed once this transition is complete. Clients should prefer to
// deal with the Error value directly, rather than converting to error_code.
@@ -40,6 +40,7 @@ public:
llvm_unreachable("Unrecognized generic_error_code");
}
};
+} // namespace
static llvm::ManagedStatic<PDBErrorCategory> PDBCategory;
const std::error_category &llvm::pdb::PDBErrCategory() { return *PDBCategory; }
diff --git a/lib/DebugInfo/PDB/IPDBSourceFile.cpp b/lib/DebugInfo/PDB/IPDBSourceFile.cpp
index 8cb1fbef51f4..113ee04bab95 100644
--- a/lib/DebugInfo/PDB/IPDBSourceFile.cpp
+++ b/lib/DebugInfo/PDB/IPDBSourceFile.cpp
@@ -1,9 +1,8 @@
//===- IPDBSourceFile.cpp - base interface for a PDB source file ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/Native/DbiModuleDescriptor.cpp b/lib/DebugInfo/PDB/Native/DbiModuleDescriptor.cpp
index 931ac7bb81db..5095efcdee3c 100644
--- a/lib/DebugInfo/PDB/Native/DbiModuleDescriptor.cpp
+++ b/lib/DebugInfo/PDB/Native/DbiModuleDescriptor.cpp
@@ -1,9 +1,8 @@
//===- DbiModuleDescriptor.cpp - PDB module information -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp b/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp
index ab93efc839a9..20b6c6142547 100644
--- a/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp
+++ b/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp
@@ -1,9 +1,8 @@
//===- DbiModuleDescriptorBuilder.cpp - PDB Mod Info Creation ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -104,7 +103,6 @@ uint32_t DbiModuleDescriptorBuilder::calculateSerializedLength() const {
}
void DbiModuleDescriptorBuilder::finalize() {
- Layout.SC.Imod = Layout.Mod;
Layout.FileNameOffs = 0; // TODO: Fix this
Layout.Flags = 0; // TODO: Fix this
Layout.C11Bytes = 0;
@@ -117,12 +115,15 @@ void DbiModuleDescriptorBuilder::finalize() {
// This value includes both the signature field as well as the record bytes
// from the symbol stream.
- Layout.SymBytes = SymbolByteSize + sizeof(uint32_t);
+ Layout.SymBytes =
+ Layout.ModDiStream == kInvalidStreamIndex ? 0 : getNextSymbolOffset();
}
Error DbiModuleDescriptorBuilder::finalizeMsfLayout() {
this->Layout.ModDiStream = kInvalidStreamIndex;
uint32_t C13Size = calculateC13DebugInfoSize();
+ if (!C13Size && !SymbolByteSize)
+ return Error::success();
auto ExpectedSN =
MSF.addStream(calculateDiSymbolStreamSize(SymbolByteSize, C13Size));
if (!ExpectedSN)
diff --git a/lib/DebugInfo/PDB/Native/DbiModuleList.cpp b/lib/DebugInfo/PDB/Native/DbiModuleList.cpp
index eea70b229c67..5cf014e881cd 100644
--- a/lib/DebugInfo/PDB/Native/DbiModuleList.cpp
+++ b/lib/DebugInfo/PDB/Native/DbiModuleList.cpp
@@ -1,9 +1,8 @@
//===- DbiModuleList.cpp - PDB module information list --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/Native/DbiStream.cpp b/lib/DebugInfo/PDB/Native/DbiStream.cpp
index 60ac17b655a7..4eb16804171d 100644
--- a/lib/DebugInfo/PDB/Native/DbiStream.cpp
+++ b/lib/DebugInfo/PDB/Native/DbiStream.cpp
@@ -1,9 +1,8 @@
//===- DbiStream.cpp - PDB Dbi Stream (Stream 3) Access -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -127,8 +126,10 @@ Error DbiStream::reload(PDBFile *Pdb) {
return EC;
if (auto EC = initializeSectionMapData())
return EC;
- if (auto EC = initializeFpoRecords(Pdb))
+ if (auto EC = initializeOldFpoRecords(Pdb))
return EC;
+ if (auto EC = initializeNewFpoRecords(Pdb))
+ return EC;
if (Reader.bytesRemaining() > 0)
return make_error<RawError>(raw_error_code::corrupt_file,
@@ -201,8 +202,16 @@ FixedStreamArray<object::coff_section> DbiStream::getSectionHeaders() const {
return SectionHeaders;
}
-FixedStreamArray<object::FpoData> DbiStream::getFpoRecords() {
- return FpoRecords;
+bool DbiStream::hasOldFpoRecords() const { return OldFpoStream != nullptr; }
+
+FixedStreamArray<object::FpoData> DbiStream::getOldFpoRecords() const {
+ return OldFpoRecords;
+}
+
+bool DbiStream::hasNewFpoRecords() const { return NewFpoStream != nullptr; }
+
+const DebugFrameDataSubsectionRef &DbiStream::getNewFpoRecords() const {
+ return NewFpoRecords;
}
const DbiModuleList &DbiStream::modules() const { return Modules; }
@@ -247,22 +256,15 @@ Error DbiStream::initializeSectionContributionData() {
// Initializes this->SectionHeaders.
Error DbiStream::initializeSectionHeadersData(PDBFile *Pdb) {
- if (!Pdb)
- return Error::success();
-
- if (DbgStreams.size() == 0)
- return Error::success();
+ Expected<std::unique_ptr<msf::MappedBlockStream>> ExpectedStream =
+ createIndexedStreamForHeaderType(Pdb, DbgHeaderType::SectionHdr);
+ if (auto EC = ExpectedStream.takeError())
+ return EC;
- uint32_t StreamNum = getDebugStreamIndex(DbgHeaderType::SectionHdr);
- if (StreamNum == kInvalidStreamIndex)
+ auto &SHS = *ExpectedStream;
+ if (!SHS)
return Error::success();
- if (StreamNum >= Pdb->getNumStreams())
- return make_error<RawError>(raw_error_code::no_stream);
-
- auto SHS = MappedBlockStream::createIndexedStream(
- Pdb->getMsfLayout(), Pdb->getMsfBuffer(), StreamNum, Pdb->getAllocator());
-
size_t StreamLen = SHS->getLength();
if (StreamLen % sizeof(object::coff_section))
return make_error<RawError>(raw_error_code::corrupt_file,
@@ -279,39 +281,65 @@ Error DbiStream::initializeSectionHeadersData(PDBFile *Pdb) {
}
// Initializes this->Fpos.
-Error DbiStream::initializeFpoRecords(PDBFile *Pdb) {
- if (!Pdb)
- return Error::success();
-
- if (DbgStreams.size() == 0)
- return Error::success();
-
- uint32_t StreamNum = getDebugStreamIndex(DbgHeaderType::NewFPO);
+Error DbiStream::initializeOldFpoRecords(PDBFile *Pdb) {
+ Expected<std::unique_ptr<msf::MappedBlockStream>> ExpectedStream =
+ createIndexedStreamForHeaderType(Pdb, DbgHeaderType::FPO);
+ if (auto EC = ExpectedStream.takeError())
+ return EC;
- // This means there is no FPO data.
- if (StreamNum == kInvalidStreamIndex)
+ auto &FS = *ExpectedStream;
+ if (!FS)
return Error::success();
- if (StreamNum >= Pdb->getNumStreams())
- return make_error<RawError>(raw_error_code::no_stream);
-
- auto FS = MappedBlockStream::createIndexedStream(
- Pdb->getMsfLayout(), Pdb->getMsfBuffer(), StreamNum, Pdb->getAllocator());
-
size_t StreamLen = FS->getLength();
if (StreamLen % sizeof(object::FpoData))
return make_error<RawError>(raw_error_code::corrupt_file,
- "Corrupted New FPO stream.");
+ "Corrupted Old FPO stream.");
size_t NumRecords = StreamLen / sizeof(object::FpoData);
BinaryStreamReader Reader(*FS);
- if (auto EC = Reader.readArray(FpoRecords, NumRecords))
+ if (auto EC = Reader.readArray(OldFpoRecords, NumRecords))
return make_error<RawError>(raw_error_code::corrupt_file,
- "Corrupted New FPO stream.");
- FpoStream = std::move(FS);
+ "Corrupted Old FPO stream.");
+ OldFpoStream = std::move(FS);
return Error::success();
}
+Error DbiStream::initializeNewFpoRecords(PDBFile *Pdb) {
+ Expected<std::unique_ptr<msf::MappedBlockStream>> ExpectedStream =
+ createIndexedStreamForHeaderType(Pdb, DbgHeaderType::NewFPO);
+ if (auto EC = ExpectedStream.takeError())
+ return EC;
+
+ auto &FS = *ExpectedStream;
+ if (!FS)
+ return Error::success();
+
+ if (auto EC = NewFpoRecords.initialize(*FS))
+ return EC;
+
+ NewFpoStream = std::move(FS);
+ return Error::success();
+}
+
+Expected<std::unique_ptr<msf::MappedBlockStream>>
+DbiStream::createIndexedStreamForHeaderType(PDBFile *Pdb,
+ DbgHeaderType Type) const {
+ if (!Pdb)
+ return nullptr;
+
+ if (DbgStreams.empty())
+ return nullptr;
+
+ uint32_t StreamNum = getDebugStreamIndex(Type);
+
+ // This means there is no such stream.
+ if (StreamNum == kInvalidStreamIndex)
+ return nullptr;
+
+ return Pdb->safelyCreateIndexedStream(StreamNum);
+}
+
BinarySubstreamRef DbiStream::getSectionContributionData() const {
return SecContrSubstream;
}
diff --git a/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp b/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp
index 094216ea800a..b7ade0072ee5 100644
--- a/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp
+++ b/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp
@@ -1,9 +1,8 @@
//===- DbiStreamBuilder.cpp - PDB Dbi Stream Creation -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/Native/EnumTables.cpp b/lib/DebugInfo/PDB/Native/EnumTables.cpp
index b3837dc72e5b..f5125393695b 100644
--- a/lib/DebugInfo/PDB/Native/EnumTables.cpp
+++ b/lib/DebugInfo/PDB/Native/EnumTables.cpp
@@ -1,9 +1,8 @@
//===- EnumTables.cpp - Enum to string conversion tables --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp b/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp
index 57da7003da2b..8ed5b8b44c59 100644
--- a/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp
+++ b/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp
@@ -1,9 +1,8 @@
//===- DbiStreamBuilder.cpp - PDB Dbi Stream Creation -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -31,14 +30,14 @@ using namespace llvm::pdb;
using namespace llvm::codeview;
struct llvm::pdb::GSIHashStreamBuilder {
- struct UdtDenseMapInfo {
+ struct SymbolDenseMapInfo {
static inline CVSymbol getEmptyKey() {
static CVSymbol Empty;
return Empty;
}
static inline CVSymbol getTombstoneKey() {
- static CVSymbol Tombstone(static_cast<SymbolKind>(-1),
- ArrayRef<uint8_t>());
+ static CVSymbol Tombstone(
+ DenseMapInfo<ArrayRef<uint8_t>>::getTombstoneKey());
return Tombstone;
}
static unsigned getHashValue(const CVSymbol &Val) {
@@ -51,7 +50,7 @@ struct llvm::pdb::GSIHashStreamBuilder {
std::vector<CVSymbol> Records;
uint32_t StreamIndex;
- llvm::DenseSet<CVSymbol, UdtDenseMapInfo> UdtHashes;
+ llvm::DenseSet<CVSymbol, SymbolDenseMapInfo> SymbolHashes;
std::vector<PSHashRecord> HashRecords;
std::array<support::ulittle32_t, (IPHR_HASH + 32) / 32> HashBitmap;
std::vector<support::ulittle32_t> HashBuckets;
@@ -67,8 +66,8 @@ struct llvm::pdb::GSIHashStreamBuilder {
CodeViewContainer::Pdb));
}
void addSymbol(const CVSymbol &Symbol) {
- if (Symbol.kind() == S_UDT) {
- auto Iter = UdtHashes.insert(Symbol);
+ if (Symbol.kind() == S_UDT || Symbol.kind() == S_CONSTANT) {
+ auto Iter = SymbolHashes.insert(Symbol);
if (!Iter.second)
return;
}
@@ -263,8 +262,7 @@ static std::vector<ulittle32_t> computeAddrMap(ArrayRef<CVSymbol> Records) {
SymOffsets.push_back(SymOffset);
SymOffset += Sym.length();
}
- std::stable_sort(PublicsByAddr.begin(), PublicsByAddr.end(),
- comparePubSymByAddrAndName);
+ llvm::stable_sort(PublicsByAddr, comparePubSymByAddrAndName);
// Fill in the symbol offsets in the appropriate order.
std::vector<ulittle32_t> AddrMap;
diff --git a/lib/DebugInfo/PDB/Native/GlobalsStream.cpp b/lib/DebugInfo/PDB/Native/GlobalsStream.cpp
index e36319566821..f27d60f46815 100644
--- a/lib/DebugInfo/PDB/Native/GlobalsStream.cpp
+++ b/lib/DebugInfo/PDB/Native/GlobalsStream.cpp
@@ -1,9 +1,8 @@
//===- GlobalsStream.cpp - PDB Index of Symbols by Name ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/DebugInfo/PDB/Native/Hash.cpp b/lib/DebugInfo/PDB/Native/Hash.cpp
index 61188ece2dcb..b5c139ecbec0 100644
--- a/lib/DebugInfo/PDB/Native/Hash.cpp
+++ b/lib/DebugInfo/PDB/Native/Hash.cpp
@@ -1,9 +1,8 @@
//===- Hash.cpp - PDB Hash Functions --------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/Native/HashTable.cpp b/lib/DebugInfo/PDB/Native/HashTable.cpp
index cfabc9cd1ad8..dfdcdf1f4eaf 100644
--- a/lib/DebugInfo/PDB/Native/HashTable.cpp
+++ b/lib/DebugInfo/PDB/Native/HashTable.cpp
@@ -1,9 +1,8 @@
//===- HashTable.cpp - PDB Hash Table -------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/Native/InfoStream.cpp b/lib/DebugInfo/PDB/Native/InfoStream.cpp
index 973a520ffca9..f41bb32d69af 100644
--- a/lib/DebugInfo/PDB/Native/InfoStream.cpp
+++ b/lib/DebugInfo/PDB/Native/InfoStream.cpp
@@ -1,9 +1,8 @@
//===- InfoStream.cpp - PDB Info Stream (Stream 1) Access -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp b/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp
index 3b5a2accdba6..42daa7cae799 100644
--- a/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp
+++ b/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp
@@ -1,9 +1,8 @@
//===- InfoStreamBuilder.cpp - PDB Info Stream Creation ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/Native/InjectedSourceStream.cpp b/lib/DebugInfo/PDB/Native/InjectedSourceStream.cpp
new file mode 100644
index 000000000000..3f4101db7b93
--- /dev/null
+++ b/lib/DebugInfo/PDB/Native/InjectedSourceStream.cpp
@@ -0,0 +1,65 @@
+//===- InjectedSourceStream.cpp - PDB Headerblock Stream Access -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/Native/InjectedSourceStream.h"
+
+#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
+#include "llvm/DebugInfo/PDB/Native/Hash.h"
+#include "llvm/DebugInfo/PDB/Native/PDBStringTable.h"
+#include "llvm/DebugInfo/PDB/Native/RawConstants.h"
+#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
+#include "llvm/Support/BinaryStreamReader.h"
+#include "llvm/Support/Endian.h"
+
+using namespace llvm;
+using namespace llvm::msf;
+using namespace llvm::support;
+using namespace llvm::pdb;
+
+InjectedSourceStream::InjectedSourceStream(
+ std::unique_ptr<MappedBlockStream> Stream)
+ : Stream(std::move(Stream)) {}
+
+Error InjectedSourceStream::reload(const PDBStringTable &Strings) {
+ BinaryStreamReader Reader(*Stream);
+
+ if (auto EC = Reader.readObject(Header))
+ return EC;
+
+ if (Header->Version !=
+ static_cast<uint32_t>(PdbRaw_SrcHeaderBlockVer::SrcVerOne))
+ return make_error<RawError>(raw_error_code::corrupt_file,
+ "Invalid headerblock header version");
+
+ if (auto EC = InjectedSourceTable.load(Reader))
+ return EC;
+
+ for (const auto& Entry : *this) {
+ if (Entry.second.Size != sizeof(SrcHeaderBlockEntry))
+ return make_error<RawError>(raw_error_code::corrupt_file,
+ "Invalid headerbock entry size");
+ if (Entry.second.Version !=
+ static_cast<uint32_t>(PdbRaw_SrcHeaderBlockVer::SrcVerOne))
+ return make_error<RawError>(raw_error_code::corrupt_file,
+ "Invalid headerbock entry version");
+
+ // Check that all name references are valid.
+ auto Name = Strings.getStringForID(Entry.second.FileNI);
+ if (!Name)
+ return Name.takeError();
+ auto ObjName = Strings.getStringForID(Entry.second.ObjNI);
+ if (!ObjName)
+ return ObjName.takeError();
+ auto VName = Strings.getStringForID(Entry.second.VFileNI);
+ if (!VName)
+ return VName.takeError();
+ }
+
+ assert(Reader.bytesRemaining() == 0);
+ return Error::success();
+}
diff --git a/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp b/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp
index 8c97f4a012f0..1445f0bd9e1b 100644
--- a/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp
+++ b/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp
@@ -1,9 +1,8 @@
//===- ModuleDebugStream.cpp - PDB Module Info Stream Access --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -15,6 +14,7 @@
#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
#include "llvm/DebugInfo/CodeView/SymbolRecordHelpers.h"
#include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h"
+#include "llvm/DebugInfo/PDB/Native/RawConstants.h"
#include "llvm/DebugInfo/PDB/Native/RawError.h"
#include "llvm/Support/BinaryStreamReader.h"
#include "llvm/Support/BinaryStreamRef.h"
@@ -37,6 +37,17 @@ ModuleDebugStreamRef::~ModuleDebugStreamRef() = default;
Error ModuleDebugStreamRef::reload() {
BinaryStreamReader Reader(*Stream);
+ if (Mod.getModuleStreamIndex() != llvm::pdb::kInvalidStreamIndex) {
+ if (Error E = reloadSerialize(Reader))
+ return E;
+ }
+ if (Reader.bytesRemaining() > 0)
+ return make_error<RawError>(raw_error_code::corrupt_file,
+ "Unexpected bytes in module stream.");
+ return Error::success();
+}
+
+Error ModuleDebugStreamRef::reloadSerialize(BinaryStreamReader &Reader) {
uint32_t SymbolSize = Mod.getSymbolDebugInfoByteSize();
uint32_t C11Size = Mod.getC11LineInfoByteSize();
uint32_t C13Size = Mod.getC13LineInfoByteSize();
@@ -72,10 +83,6 @@ Error ModuleDebugStreamRef::reload() {
return EC;
if (auto EC = Reader.readSubstream(GlobalRefsSubstream, GlobalRefsSize))
return EC;
- if (Reader.bytesRemaining() > 0)
- return make_error<RawError>(raw_error_code::corrupt_file,
- "Unexpected bytes in module stream.");
-
return Error::success();
}
diff --git a/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp b/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp
index a4eaed90837d..4a88391494cd 100644
--- a/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp
+++ b/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp
@@ -1,9 +1,8 @@
//===- NamedStreamMap.cpp - PDB Named Stream Map --------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -35,6 +34,7 @@ uint16_t NamedStreamMapTraits::hashLookupKey(StringRef S) const {
// Here, the type HASH is a typedef of unsigned short.
// ** It is not a bug that we truncate the result of hashStringV1, in fact
// it is a bug if we do not! **
+ // See NMTNI::hash() in the reference implementation.
return static_cast<uint16_t>(hashStringV1(S));
}
@@ -46,8 +46,7 @@ uint32_t NamedStreamMapTraits::lookupKeyToStorageKey(StringRef S) {
return NS->appendStringData(S);
}
-NamedStreamMap::NamedStreamMap()
- : HashTraits(*this), OffsetIndexMap(1, HashTraits) {}
+NamedStreamMap::NamedStreamMap() : HashTraits(*this), OffsetIndexMap(1) {}
Error NamedStreamMap::load(BinaryStreamReader &Stream) {
uint32_t StringBufferSize;
@@ -99,7 +98,7 @@ uint32_t NamedStreamMap::hashString(uint32_t Offset) const {
}
bool NamedStreamMap::get(StringRef Stream, uint32_t &StreamNo) const {
- auto Iter = OffsetIndexMap.find_as(Stream);
+ auto Iter = OffsetIndexMap.find_as(Stream, HashTraits);
if (Iter == OffsetIndexMap.end())
return false;
StreamNo = (*Iter).second;
@@ -123,5 +122,5 @@ uint32_t NamedStreamMap::appendStringData(StringRef S) {
}
void NamedStreamMap::set(StringRef Stream, uint32_t StreamNo) {
- OffsetIndexMap.set_as(Stream, support::ulittle32_t(StreamNo));
+ OffsetIndexMap.set_as(Stream, support::ulittle32_t(StreamNo), HashTraits);
}
diff --git a/lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp b/lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp
index efa70b0e7bd8..39ae84acba20 100644
--- a/lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp
+++ b/lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp
@@ -1,9 +1,8 @@
//===- NativeCompilandSymbol.cpp - Native impl for compilands ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/Native/NativeEnumGlobals.cpp b/lib/DebugInfo/PDB/Native/NativeEnumGlobals.cpp
index 6eece3df2db3..54646867bc5f 100644
--- a/lib/DebugInfo/PDB/Native/NativeEnumGlobals.cpp
+++ b/lib/DebugInfo/PDB/Native/NativeEnumGlobals.cpp
@@ -1,9 +1,8 @@
//==- NativeEnumGlobals.cpp - Native Global Enumerator impl ------*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/Native/NativeEnumInjectedSources.cpp b/lib/DebugInfo/PDB/Native/NativeEnumInjectedSources.cpp
new file mode 100644
index 000000000000..f17ff5bb01f2
--- /dev/null
+++ b/lib/DebugInfo/PDB/Native/NativeEnumInjectedSources.cpp
@@ -0,0 +1,120 @@
+//==- NativeEnumInjectedSources.cpp - Native Injected Source Enumerator --*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/Native/NativeEnumInjectedSources.h"
+
+#include "llvm/DebugInfo/PDB/Native/InfoStream.h"
+#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
+#include "llvm/DebugInfo/PDB/Native/PDBStringTable.h"
+
+namespace llvm {
+namespace pdb {
+
+namespace {
+
+Expected<std::string> readStreamData(BinaryStream &Stream, uint32_t Limit) {
+ uint32_t Offset = 0, DataLength = std::min(Limit, Stream.getLength());
+ std::string Result;
+ Result.reserve(DataLength);
+ while (Offset < DataLength) {
+ ArrayRef<uint8_t> Data;
+ if (auto E = Stream.readLongestContiguousChunk(Offset, Data))
+ return std::move(E);
+ Data = Data.take_front(DataLength - Offset);
+ Offset += Data.size();
+ Result += toStringRef(Data);
+ }
+ return Result;
+}
+
+class NativeInjectedSource final : public IPDBInjectedSource {
+ const SrcHeaderBlockEntry &Entry;
+ const PDBStringTable &Strings;
+ PDBFile &File;
+
+public:
+ NativeInjectedSource(const SrcHeaderBlockEntry &Entry,
+ PDBFile &File, const PDBStringTable &Strings)
+ : Entry(Entry), Strings(Strings), File(File) {}
+
+ uint32_t getCrc32() const override { return Entry.CRC; }
+ uint64_t getCodeByteSize() const override { return Entry.FileSize; }
+
+ std::string getFileName() const override {
+ auto Name = Strings.getStringForID(Entry.FileNI);
+ assert(Name && "InjectedSourceStream should have rejected this");
+ return *Name;
+ }
+
+ std::string getObjectFileName() const override {
+ auto ObjName = Strings.getStringForID(Entry.ObjNI);
+ assert(ObjName && "InjectedSourceStream should have rejected this");
+ return *ObjName;
+ }
+
+ std::string getVirtualFileName() const override {
+ auto VName = Strings.getStringForID(Entry.VFileNI);
+ assert(VName && "InjectedSourceStream should have rejected this");
+ return *VName;
+ }
+
+ uint32_t getCompression() const override { return Entry.Compression; }
+
+ std::string getCode() const override {
+ // Get name of stream storing the data.
+ auto VName = Strings.getStringForID(Entry.VFileNI);
+ assert(VName && "InjectedSourceStream should have rejected this");
+ std::string StreamName = ("/src/files/" + *VName).str();
+
+ // Find stream with that name and read its data.
+ // FIXME: Consider validating (or even loading) all this in
+ // InjectedSourceStream so that no error can happen here.
+ auto ExpectedFileStream = File.safelyCreateNamedStream(StreamName);
+ if (!ExpectedFileStream) {
+ consumeError(ExpectedFileStream.takeError());
+ return "(failed to open data stream)";
+ }
+
+ auto Data = readStreamData(**ExpectedFileStream, Entry.FileSize);
+ if (!Data) {
+ consumeError(Data.takeError());
+ return "(failed to read data)";
+ }
+ return *Data;
+ }
+};
+
+} // namespace
+
+NativeEnumInjectedSources::NativeEnumInjectedSources(
+ PDBFile &File, const InjectedSourceStream &IJS,
+ const PDBStringTable &Strings)
+ : File(File), Stream(IJS), Strings(Strings), Cur(Stream.begin()) {}
+
+uint32_t NativeEnumInjectedSources::getChildCount() const {
+ return static_cast<uint32_t>(Stream.size());
+}
+
+std::unique_ptr<IPDBInjectedSource>
+NativeEnumInjectedSources::getChildAtIndex(uint32_t N) const {
+ if (N >= getChildCount())
+ return nullptr;
+ return make_unique<NativeInjectedSource>(std::next(Stream.begin(), N)->second,
+ File, Strings);
+}
+
+std::unique_ptr<IPDBInjectedSource> NativeEnumInjectedSources::getNext() {
+ if (Cur == Stream.end())
+ return nullptr;
+ return make_unique<NativeInjectedSource>((Cur++)->second, File, Strings);
+}
+
+void NativeEnumInjectedSources::reset() { Cur = Stream.begin(); }
+
+}
+}
diff --git a/lib/DebugInfo/PDB/Native/NativeEnumModules.cpp b/lib/DebugInfo/PDB/Native/NativeEnumModules.cpp
index 6e4d56443a07..c6621924b516 100644
--- a/lib/DebugInfo/PDB/Native/NativeEnumModules.cpp
+++ b/lib/DebugInfo/PDB/Native/NativeEnumModules.cpp
@@ -1,9 +1,8 @@
//==- NativeEnumModules.cpp - Native Symbol Enumerator impl ------*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/Native/NativeEnumTypes.cpp b/lib/DebugInfo/PDB/Native/NativeEnumTypes.cpp
index 288a9128147a..ac217df1ee48 100644
--- a/lib/DebugInfo/PDB/Native/NativeEnumTypes.cpp
+++ b/lib/DebugInfo/PDB/Native/NativeEnumTypes.cpp
@@ -1,9 +1,8 @@
//==- NativeEnumTypes.cpp - Native Type Enumerator impl ----------*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp b/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp
index 6dde5d08a500..3f393409129b 100644
--- a/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp
+++ b/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp
@@ -1,9 +1,8 @@
//===- NativeExeSymbol.cpp - native impl for PDBSymbolExe -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp b/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp
index 62950cb3e52a..8e43cf24495a 100644
--- a/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp
+++ b/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp
@@ -1,9 +1,8 @@
//===- NativeRawSymbol.cpp - Native implementation of IPDBRawSymbol -------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/Native/NativeSession.cpp b/lib/DebugInfo/PDB/Native/NativeSession.cpp
index 7807e312365c..8a49cb1c5963 100644
--- a/lib/DebugInfo/PDB/Native/NativeSession.cpp
+++ b/lib/DebugInfo/PDB/Native/NativeSession.cpp
@@ -1,9 +1,8 @@
//===- NativeSession.cpp - Native implementation of IPDBSession -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -14,6 +13,7 @@
#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
#include "llvm/DebugInfo/PDB/IPDBSourceFile.h"
#include "llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h"
+#include "llvm/DebugInfo/PDB/Native/NativeEnumInjectedSources.h"
#include "llvm/DebugInfo/PDB/Native/NativeEnumTypes.h"
#include "llvm/DebugInfo/PDB/Native/NativeExeSymbol.h"
#include "llvm/DebugInfo/PDB/Native/NativeTypeBuiltin.h"
@@ -192,7 +192,17 @@ std::unique_ptr<IPDBEnumTables> NativeSession::getEnumTables() const {
std::unique_ptr<IPDBEnumInjectedSources>
NativeSession::getInjectedSources() const {
- return nullptr;
+ auto ISS = Pdb->getInjectedSourceStream();
+ if (!ISS) {
+ consumeError(ISS.takeError());
+ return nullptr;
+ }
+ auto Strings = Pdb->getStringTable();
+ if (!Strings) {
+ consumeError(Strings.takeError());
+ return nullptr;
+ }
+ return make_unique<NativeEnumInjectedSources>(*Pdb, *ISS, *Strings);
}
std::unique_ptr<IPDBEnumSectionContribs>
diff --git a/lib/DebugInfo/PDB/Native/NativeSymbolEnumerator.cpp b/lib/DebugInfo/PDB/Native/NativeSymbolEnumerator.cpp
index 6ebb8cae3a65..704c1254afbf 100644
--- a/lib/DebugInfo/PDB/Native/NativeSymbolEnumerator.cpp
+++ b/lib/DebugInfo/PDB/Native/NativeSymbolEnumerator.cpp
@@ -1,9 +1,8 @@
//===- NativeSymbolEnumerator.cpp - info about enumerators ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/Native/NativeTypeArray.cpp b/lib/DebugInfo/PDB/Native/NativeTypeArray.cpp
index a52561728a98..80d455ad66e9 100644
--- a/lib/DebugInfo/PDB/Native/NativeTypeArray.cpp
+++ b/lib/DebugInfo/PDB/Native/NativeTypeArray.cpp
@@ -1,9 +1,8 @@
//===- NativeTypeArray.cpp - info about arrays ------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/Native/NativeTypeBuiltin.cpp b/lib/DebugInfo/PDB/Native/NativeTypeBuiltin.cpp
index 7b0f13f3c075..a08663aa91ba 100644
--- a/lib/DebugInfo/PDB/Native/NativeTypeBuiltin.cpp
+++ b/lib/DebugInfo/PDB/Native/NativeTypeBuiltin.cpp
@@ -1,9 +1,8 @@
//===- NativeTypeBuiltin.cpp -------------------------------------- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/Native/NativeTypeEnum.cpp b/lib/DebugInfo/PDB/Native/NativeTypeEnum.cpp
index 37176fe083b9..9f5e86281a23 100644
--- a/lib/DebugInfo/PDB/Native/NativeTypeEnum.cpp
+++ b/lib/DebugInfo/PDB/Native/NativeTypeEnum.cpp
@@ -1,9 +1,8 @@
//===- NativeTypeEnum.cpp - info about enum type ----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/Native/NativeTypeFunctionSig.cpp b/lib/DebugInfo/PDB/Native/NativeTypeFunctionSig.cpp
index a9590fffdb87..405303469c18 100644
--- a/lib/DebugInfo/PDB/Native/NativeTypeFunctionSig.cpp
+++ b/lib/DebugInfo/PDB/Native/NativeTypeFunctionSig.cpp
@@ -1,9 +1,8 @@
//===- NativeTypeFunctionSig.cpp - info about function signature -*- C++-*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/Native/NativeTypePointer.cpp b/lib/DebugInfo/PDB/Native/NativeTypePointer.cpp
index bd8ecb6c4007..32dcfc235954 100644
--- a/lib/DebugInfo/PDB/Native/NativeTypePointer.cpp
+++ b/lib/DebugInfo/PDB/Native/NativeTypePointer.cpp
@@ -1,9 +1,8 @@
//===- NativeTypePointer.cpp - info about pointer type ----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/Native/NativeTypeUDT.cpp b/lib/DebugInfo/PDB/Native/NativeTypeUDT.cpp
index 3abf91dcc6a3..be67846c0b24 100644
--- a/lib/DebugInfo/PDB/Native/NativeTypeUDT.cpp
+++ b/lib/DebugInfo/PDB/Native/NativeTypeUDT.cpp
@@ -1,9 +1,8 @@
//===- NativeTypeUDT.cpp - info about class/struct type ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/Native/PDBFile.cpp b/lib/DebugInfo/PDB/Native/PDBFile.cpp
index a1f8786ff12f..983031dfcb78 100644
--- a/lib/DebugInfo/PDB/Native/PDBFile.cpp
+++ b/lib/DebugInfo/PDB/Native/PDBFile.cpp
@@ -1,9 +1,8 @@
//===- PDBFile.cpp - Low level interface to a PDB file ----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -15,6 +14,7 @@
#include "llvm/DebugInfo/PDB/Native/DbiStream.h"
#include "llvm/DebugInfo/PDB/Native/GlobalsStream.h"
#include "llvm/DebugInfo/PDB/Native/InfoStream.h"
+#include "llvm/DebugInfo/PDB/Native/InjectedSourceStream.h"
#include "llvm/DebugInfo/PDB/Native/PDBStringTable.h"
#include "llvm/DebugInfo/PDB/Native/PublicsStream.h"
#include "llvm/DebugInfo/PDB/Native/RawError.h"
@@ -234,7 +234,8 @@ ArrayRef<support::ulittle32_t> PDBFile::getDirectoryBlockArray() const {
return ContainerLayout.DirectoryBlocks;
}
-std::unique_ptr<MappedBlockStream> PDBFile::createIndexedStream(uint16_t SN) {
+std::unique_ptr<MappedBlockStream>
+PDBFile::createIndexedStream(uint16_t SN) const {
if (SN == kInvalidStreamIndex)
return nullptr;
return MappedBlockStream::createIndexedStream(ContainerLayout, *Buffer, SN,
@@ -259,8 +260,8 @@ Expected<GlobalsStream &> PDBFile::getPDBGlobalsStream() {
if (!DbiS)
return DbiS.takeError();
- auto GlobalS = safelyCreateIndexedStream(
- ContainerLayout, *Buffer, DbiS->getGlobalSymbolStreamIndex());
+ auto GlobalS =
+ safelyCreateIndexedStream(DbiS->getGlobalSymbolStreamIndex());
if (!GlobalS)
return GlobalS.takeError();
auto TempGlobals = llvm::make_unique<GlobalsStream>(std::move(*GlobalS));
@@ -273,7 +274,7 @@ Expected<GlobalsStream &> PDBFile::getPDBGlobalsStream() {
Expected<InfoStream &> PDBFile::getPDBInfoStream() {
if (!Info) {
- auto InfoS = safelyCreateIndexedStream(ContainerLayout, *Buffer, StreamPDB);
+ auto InfoS = safelyCreateIndexedStream(StreamPDB);
if (!InfoS)
return InfoS.takeError();
auto TempInfo = llvm::make_unique<InfoStream>(std::move(*InfoS));
@@ -286,7 +287,7 @@ Expected<InfoStream &> PDBFile::getPDBInfoStream() {
Expected<DbiStream &> PDBFile::getPDBDbiStream() {
if (!Dbi) {
- auto DbiS = safelyCreateIndexedStream(ContainerLayout, *Buffer, StreamDBI);
+ auto DbiS = safelyCreateIndexedStream(StreamDBI);
if (!DbiS)
return DbiS.takeError();
auto TempDbi = llvm::make_unique<DbiStream>(std::move(*DbiS));
@@ -299,7 +300,7 @@ Expected<DbiStream &> PDBFile::getPDBDbiStream() {
Expected<TpiStream &> PDBFile::getPDBTpiStream() {
if (!Tpi) {
- auto TpiS = safelyCreateIndexedStream(ContainerLayout, *Buffer, StreamTPI);
+ auto TpiS = safelyCreateIndexedStream(StreamTPI);
if (!TpiS)
return TpiS.takeError();
auto TempTpi = llvm::make_unique<TpiStream>(*this, std::move(*TpiS));
@@ -315,7 +316,7 @@ Expected<TpiStream &> PDBFile::getPDBIpiStream() {
if (!hasPDBIpiStream())
return make_error<RawError>(raw_error_code::no_stream);
- auto IpiS = safelyCreateIndexedStream(ContainerLayout, *Buffer, StreamIPI);
+ auto IpiS = safelyCreateIndexedStream(StreamIPI);
if (!IpiS)
return IpiS.takeError();
auto TempIpi = llvm::make_unique<TpiStream>(*this, std::move(*IpiS));
@@ -332,8 +333,8 @@ Expected<PublicsStream &> PDBFile::getPDBPublicsStream() {
if (!DbiS)
return DbiS.takeError();
- auto PublicS = safelyCreateIndexedStream(
- ContainerLayout, *Buffer, DbiS->getPublicSymbolStreamIndex());
+ auto PublicS =
+ safelyCreateIndexedStream(DbiS->getPublicSymbolStreamIndex());
if (!PublicS)
return PublicS.takeError();
auto TempPublics = llvm::make_unique<PublicsStream>(std::move(*PublicS));
@@ -351,8 +352,7 @@ Expected<SymbolStream &> PDBFile::getPDBSymbolStream() {
return DbiS.takeError();
uint32_t SymbolStreamNum = DbiS->getSymRecordStreamIndex();
- auto SymbolS =
- safelyCreateIndexedStream(ContainerLayout, *Buffer, SymbolStreamNum);
+ auto SymbolS = safelyCreateIndexedStream(SymbolStreamNum);
if (!SymbolS)
return SymbolS.takeError();
@@ -366,17 +366,7 @@ Expected<SymbolStream &> PDBFile::getPDBSymbolStream() {
Expected<PDBStringTable &> PDBFile::getStringTable() {
if (!Strings) {
- auto IS = getPDBInfoStream();
- if (!IS)
- return IS.takeError();
-
- Expected<uint32_t> ExpectedNSI = IS->getNamedStreamIndex("/names");
- if (!ExpectedNSI)
- return ExpectedNSI.takeError();
- uint32_t NameStreamIndex = *ExpectedNSI;
-
- auto NS =
- safelyCreateIndexedStream(ContainerLayout, *Buffer, NameStreamIndex);
+ auto NS = safelyCreateNamedStream("/names");
if (!NS)
return NS.takeError();
@@ -391,6 +381,24 @@ Expected<PDBStringTable &> PDBFile::getStringTable() {
return *Strings;
}
+Expected<InjectedSourceStream &> PDBFile::getInjectedSourceStream() {
+ if (!InjectedSources) {
+ auto IJS = safelyCreateNamedStream("/src/headerblock");
+ if (!IJS)
+ return IJS.takeError();
+
+ auto Strings = getStringTable();
+ if (!Strings)
+ return Strings.takeError();
+
+ auto IJ = llvm::make_unique<InjectedSourceStream>(std::move(*IJS));
+ if (auto EC = IJ->reload(*Strings))
+ return std::move(EC);
+ InjectedSources = std::move(IJ);
+ }
+ return *InjectedSources;
+}
+
uint32_t PDBFile::getPointerSize() {
auto DbiS = getPDBDbiStream();
if (!DbiS)
@@ -459,16 +467,41 @@ bool PDBFile::hasPDBStringTable() {
return true;
}
+bool PDBFile::hasPDBInjectedSourceStream() {
+ auto IS = getPDBInfoStream();
+ if (!IS)
+ return false;
+ Expected<uint32_t> ExpectedNSI = IS->getNamedStreamIndex("/src/headerblock");
+ if (!ExpectedNSI) {
+ consumeError(ExpectedNSI.takeError());
+ return false;
+ }
+ assert(*ExpectedNSI < getNumStreams());
+ return true;
+}
+
/// Wrapper around MappedBlockStream::createIndexedStream() that checks if a
/// stream with that index actually exists. If it does not, the return value
/// will have an MSFError with code msf_error_code::no_stream. Else, the return
/// value will contain the stream returned by createIndexedStream().
Expected<std::unique_ptr<MappedBlockStream>>
-PDBFile::safelyCreateIndexedStream(const MSFLayout &Layout,
- BinaryStreamRef MsfData,
- uint32_t StreamIndex) const {
+PDBFile::safelyCreateIndexedStream(uint32_t StreamIndex) const {
if (StreamIndex >= getNumStreams())
+ // This rejects kInvalidStreamIndex with an error as well.
return make_error<RawError>(raw_error_code::no_stream);
- return MappedBlockStream::createIndexedStream(Layout, MsfData, StreamIndex,
- Allocator);
+ return createIndexedStream(StreamIndex);
+}
+
+Expected<std::unique_ptr<MappedBlockStream>>
+PDBFile::safelyCreateNamedStream(StringRef Name) {
+ auto IS = getPDBInfoStream();
+ if (!IS)
+ return IS.takeError();
+
+ Expected<uint32_t> ExpectedNSI = IS->getNamedStreamIndex(Name);
+ if (!ExpectedNSI)
+ return ExpectedNSI.takeError();
+ uint32_t NameStreamIndex = *ExpectedNSI;
+
+ return safelyCreateIndexedStream(NameStreamIndex);
}
diff --git a/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp b/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp
index e0ceb7499ee5..8f5a048ea4b5 100644
--- a/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp
+++ b/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp
@@ -1,9 +1,8 @@
//===- PDBFileBuilder.cpp - PDB File Creation -------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -35,7 +34,7 @@ using namespace llvm::support;
PDBFileBuilder::PDBFileBuilder(BumpPtrAllocator &Allocator)
: Allocator(Allocator), InjectedSourceHashTraits(Strings),
- InjectedSourceTable(2, InjectedSourceHashTraits) {}
+ InjectedSourceTable(2) {}
PDBFileBuilder::~PDBFileBuilder() {}
@@ -190,7 +189,8 @@ Error PDBFileBuilder::finalizeMsfLayout() {
static_cast<uint32_t>(PdbRaw_SrcHeaderBlockVer::SrcVerOne);
Entry.CRC = CRC.getCRC();
StringRef VName = getStringTableBuilder().getStringForId(IS.VNameIndex);
- InjectedSourceTable.set_as(VName, std::move(Entry));
+ InjectedSourceTable.set_as(VName, std::move(Entry),
+ InjectedSourceHashTraits);
}
uint32_t SrcHeaderBlockSize =
diff --git a/lib/DebugInfo/PDB/Native/PDBStringTable.cpp b/lib/DebugInfo/PDB/Native/PDBStringTable.cpp
index afeea32043dd..2be1656e06bb 100644
--- a/lib/DebugInfo/PDB/Native/PDBStringTable.cpp
+++ b/lib/DebugInfo/PDB/Native/PDBStringTable.cpp
@@ -1,9 +1,8 @@
//===- PDBStringTable.cpp - PDB String Table ---------------------*- C++-*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp b/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp
index d9dcabf3d958..f7f36901e4d4 100644
--- a/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp
+++ b/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp
@@ -1,9 +1,8 @@
//===- PDBStringTableBuilder.cpp - PDB String Table -------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -27,7 +26,13 @@ StringTableHashTraits::StringTableHashTraits(PDBStringTableBuilder &Table)
: Table(&Table) {}
uint32_t StringTableHashTraits::hashLookupKey(StringRef S) const {
- return Table->getIdForString(S);
+ // The reference implementation doesn't include code for /src/headerblock
+ // handling, but it can only read natvis entries lld's PDB files if
+ // this hash function truncates the hash to 16 bit.
+ // PDB/include/misc.h in the reference implementation has a hashSz() function
+ // that returns an unsigned short, that seems what's being used for
+ // /src/headerblock.
+ return static_cast<uint16_t>(Table->getIdForString(S));
}
StringRef StringTableHashTraits::storageKeyToLookupKey(uint32_t Offset) const {
@@ -50,63 +55,75 @@ StringRef PDBStringTableBuilder::getStringForId(uint32_t Id) const {
return Strings.getStringForId(Id);
}
-// This is a precomputed list of Buckets given the specified number of
-// strings. Matching the reference algorithm exactly is not strictly
-// necessary for correctness, but it helps when comparing LLD's PDBs with
-// Microsoft's PDBs so as to eliminate superfluous differences.
-static std::map<uint32_t, uint32_t> StringsToBuckets = {
- {1, 2},
- {2, 4},
- {4, 7},
- {6, 11},
- {9, 17},
- {13, 26},
- {20, 40},
- {31, 61},
- {46, 92},
- {70, 139},
- {105, 209},
- {157, 314},
- {236, 472},
- {355, 709},
- {532, 1064},
- {799, 1597},
- {1198, 2396},
- {1798, 3595},
- {2697, 5393},
- {4045, 8090},
- {6068, 12136},
- {9103, 18205},
- {13654, 27308},
- {20482, 40963},
- {30723, 61445},
- {46084, 92168},
- {69127, 138253},
- {103690, 207380},
- {155536, 311071},
- {233304, 466607},
- {349956, 699911},
- {524934, 1049867},
- {787401, 1574801},
- {1181101, 2362202},
- {1771652, 3543304},
- {2657479, 5314957},
- {3986218, 7972436},
- {5979328, 11958655},
- {8968992, 17937983},
- {13453488, 26906975},
- {20180232, 40360463},
- {30270348, 60540695},
- {45405522, 90811043},
- {68108283, 136216565},
- {102162424, 204324848},
- {153243637, 306487273},
- {229865455, 459730910},
- {344798183, 689596366},
- {517197275, 1034394550},
- {775795913, 1551591826}};
-
static uint32_t computeBucketCount(uint32_t NumStrings) {
+ // This is a precomputed list of Buckets given the specified number of
+ // strings. Matching the reference algorithm exactly is not strictly
+ // necessary for correctness, but it helps when comparing LLD's PDBs with
+ // Microsoft's PDBs so as to eliminate superfluous differences.
+ // The reference implementation does (in nmt.h, NMT::grow()):
+ // unsigned StringCount = 0;
+ // unsigned BucketCount = 1;
+ // fn insert() {
+ // ++StringCount;
+ // if (BucketCount * 3 / 4 < StringCount)
+ // BucketCount = BucketCount * 3 / 2 + 1;
+ // }
+ // This list contains all StringCount, BucketCount pairs where BucketCount was
+ // just incremented. It ends before the first BucketCount entry where
+ // BucketCount * 3 would overflow a 32-bit unsigned int.
+ static std::map<uint32_t, uint32_t> StringsToBuckets = {
+ {0, 1},
+ {1, 2},
+ {2, 4},
+ {4, 7},
+ {6, 11},
+ {9, 17},
+ {13, 26},
+ {20, 40},
+ {31, 61},
+ {46, 92},
+ {70, 139},
+ {105, 209},
+ {157, 314},
+ {236, 472},
+ {355, 709},
+ {532, 1064},
+ {799, 1597},
+ {1198, 2396},
+ {1798, 3595},
+ {2697, 5393},
+ {4045, 8090},
+ {6068, 12136},
+ {9103, 18205},
+ {13654, 27308},
+ {20482, 40963},
+ {30723, 61445},
+ {46084, 92168},
+ {69127, 138253},
+ {103690, 207380},
+ {155536, 311071},
+ {233304, 466607},
+ {349956, 699911},
+ {524934, 1049867},
+ {787401, 1574801},
+ {1181101, 2362202},
+ {1771652, 3543304},
+ {2657479, 5314957},
+ {3986218, 7972436},
+ {5979328, 11958655},
+ {8968992, 17937983},
+ {13453488, 26906975},
+ {20180232, 40360463},
+ {30270348, 60540695},
+ {45405522, 90811043},
+ {68108283, 136216565},
+ {102162424, 204324848},
+ {153243637, 306487273},
+ {229865455, 459730910},
+ {344798183, 689596366},
+ {517197275, 1034394550},
+ {775795913, 1551591826},
+ {1163693870, 2327387740}};
auto Entry = StringsToBuckets.lower_bound(NumStrings);
assert(Entry != StringsToBuckets.end());
return Entry->second;
diff --git a/lib/DebugInfo/PDB/Native/PublicsStream.cpp b/lib/DebugInfo/PDB/Native/PublicsStream.cpp
index f6466eb80464..a33bf03bf8fb 100644
--- a/lib/DebugInfo/PDB/Native/PublicsStream.cpp
+++ b/lib/DebugInfo/PDB/Native/PublicsStream.cpp
@@ -1,9 +1,8 @@
//===- PublicsStream.cpp - PDB Public Symbol Stream -----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/DebugInfo/PDB/Native/RawError.cpp b/lib/DebugInfo/PDB/Native/RawError.cpp
index dec9797088f2..ed6cf0839675 100644
--- a/lib/DebugInfo/PDB/Native/RawError.cpp
+++ b/lib/DebugInfo/PDB/Native/RawError.cpp
@@ -5,6 +5,7 @@
using namespace llvm;
using namespace llvm::pdb;
+namespace {
// FIXME: This class is only here to support the transition to llvm::Error. It
// will be removed once this transition is complete. Clients should prefer to
// deal with the Error value directly, rather than converting to error_code.
@@ -44,6 +45,7 @@ public:
llvm_unreachable("Unrecognized raw_error_code");
}
};
+} // namespace
static llvm::ManagedStatic<RawErrorCategory> RawCategory;
const std::error_category &llvm::pdb::RawErrCategory() { return *RawCategory; }
diff --git a/lib/DebugInfo/PDB/Native/SymbolStream.cpp b/lib/DebugInfo/PDB/Native/SymbolStream.cpp
index 2d8d04ceca4d..003840b6e67e 100644
--- a/lib/DebugInfo/PDB/Native/SymbolStream.cpp
+++ b/lib/DebugInfo/PDB/Native/SymbolStream.cpp
@@ -1,9 +1,8 @@
//===- SymbolStream.cpp - PDB Symbol Stream Access ------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/Native/TpiHashing.cpp b/lib/DebugInfo/PDB/Native/TpiHashing.cpp
index 18708826ffc7..b21b82bf76fd 100644
--- a/lib/DebugInfo/PDB/Native/TpiHashing.cpp
+++ b/lib/DebugInfo/PDB/Native/TpiHashing.cpp
@@ -1,9 +1,8 @@
//===- TpiHashing.cpp -----------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/Native/TpiStream.cpp b/lib/DebugInfo/PDB/Native/TpiStream.cpp
index f234d446e6a0..8ee7f897b8bb 100644
--- a/lib/DebugInfo/PDB/Native/TpiStream.cpp
+++ b/lib/DebugInfo/PDB/Native/TpiStream.cpp
@@ -1,9 +1,8 @@
//===- TpiStream.cpp - PDB Type Info (TPI) Stream 2 Access ----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -79,14 +78,13 @@ Error TpiStream::reload() {
// Hash indices, hash values, etc come from the hash stream.
if (Header->HashStreamIndex != kInvalidStreamIndex) {
- if (Header->HashStreamIndex >= Pdb.getNumStreams())
+ auto HS = Pdb.safelyCreateIndexedStream(Header->HashStreamIndex);
+ if (!HS) {
+ consumeError(HS.takeError());
return make_error<RawError>(raw_error_code::corrupt_file,
"Invalid TPI hash stream index.");
-
- auto HS = MappedBlockStream::createIndexedStream(
- Pdb.getMsfLayout(), Pdb.getMsfBuffer(), Header->HashStreamIndex,
- Pdb.getAllocator());
- BinaryStreamReader HSR(*HS);
+ }
+ BinaryStreamReader HSR(**HS);
// There should be a hash value for every type record, or no hashes at all.
uint32_t NumHashValues =
@@ -111,7 +109,7 @@ Error TpiStream::reload() {
return EC;
}
- HashStream = std::move(HS);
+ HashStream = std::move(*HS);
}
Types = llvm::make_unique<LazyRandomTypeCollection>(
diff --git a/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp b/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp
index 8dd30018028e..6b308453c2de 100644
--- a/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp
+++ b/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp
@@ -1,9 +1,8 @@
//===- TpiStreamBuilder.cpp - -------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -77,7 +76,7 @@ Error TpiStreamBuilder::finalize() {
H->HashStreamIndex = HashStreamIndex;
H->HashAuxStreamIndex = kInvalidStreamIndex;
H->HashKeySize = sizeof(ulittle32_t);
- H->NumHashBuckets = MinTpiHashBuckets;
+ H->NumHashBuckets = MaxTpiHashBuckets - 1;
// Recall that hash values go into a completely different stream identified by
// the `HashStreamIndex` field of the `TpiStreamHeader`. Therefore, the data
@@ -130,7 +129,7 @@ Error TpiStreamBuilder::finalizeMsfLayout() {
ulittle32_t *H = Allocator.Allocate<ulittle32_t>(TypeHashes.size());
MutableArrayRef<ulittle32_t> HashBuffer(H, TypeHashes.size());
for (uint32_t I = 0; I < TypeHashes.size(); ++I) {
- HashBuffer[I] = TypeHashes[I] % MinTpiHashBuckets;
+ HashBuffer[I] = TypeHashes[I] % (MaxTpiHashBuckets - 1);
}
ArrayRef<uint8_t> Bytes(
reinterpret_cast<const uint8_t *>(HashBuffer.data()),
@@ -153,9 +152,12 @@ Error TpiStreamBuilder::commit(const msf::MSFLayout &Layout,
if (auto EC = Writer.writeObject(*Header))
return EC;
- for (auto Rec : TypeRecords)
+ for (auto Rec : TypeRecords) {
+ assert(!Rec.empty()); // An empty record will not write anything, but it
+ // would shift all offsets from here on.
if (auto EC = Writer.writeBytes(Rec))
return EC;
+ }
if (HashStreamIndex != kInvalidStreamIndex) {
auto HVS = WritableMappedBlockStream::createIndexedStream(
diff --git a/lib/DebugInfo/PDB/PDB.cpp b/lib/DebugInfo/PDB/PDB.cpp
index fc1ad8bcd7cd..e7b968cb7bea 100644
--- a/lib/DebugInfo/PDB/PDB.cpp
+++ b/lib/DebugInfo/PDB/PDB.cpp
@@ -1,9 +1,8 @@
//===- PDB.cpp - base header file for creating a PDB reader ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/PDBContext.cpp b/lib/DebugInfo/PDB/PDBContext.cpp
index df0feac2bc40..e452f1d4ced7 100644
--- a/lib/DebugInfo/PDB/PDBContext.cpp
+++ b/lib/DebugInfo/PDB/PDBContext.cpp
@@ -1,9 +1,8 @@
//===-- PDBContext.cpp ------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===/
@@ -31,14 +30,14 @@ PDBContext::PDBContext(const COFFObjectFile &Object,
void PDBContext::dump(raw_ostream &OS, DIDumpOptions DumpOpts){}
-DILineInfo PDBContext::getLineInfoForAddress(uint64_t Address,
+DILineInfo PDBContext::getLineInfoForAddress(object::SectionedAddress Address,
DILineInfoSpecifier Specifier) {
DILineInfo Result;
- Result.FunctionName = getFunctionName(Address, Specifier.FNKind);
+ Result.FunctionName = getFunctionName(Address.Address, Specifier.FNKind);
uint32_t Length = 1;
std::unique_ptr<PDBSymbol> Symbol =
- Session->findSymbolByAddress(Address, PDB_SymType::None);
+ Session->findSymbolByAddress(Address.Address, PDB_SymType::None);
if (auto Func = dyn_cast_or_null<PDBSymbolFunc>(Symbol.get())) {
Length = Func->getLength();
} else if (auto Data = dyn_cast_or_null<PDBSymbolData>(Symbol.get())) {
@@ -47,7 +46,7 @@ DILineInfo PDBContext::getLineInfoForAddress(uint64_t Address,
// If we couldn't find a symbol, then just assume 1 byte, so that we get
// only the line number of the first instruction.
- auto LineNumbers = Session->findLineNumbersByAddress(Address, Length);
+ auto LineNumbers = Session->findLineNumbersByAddress(Address.Address, Length);
if (!LineNumbers || LineNumbers->getChildCount() == 0)
return Result;
@@ -64,26 +63,27 @@ DILineInfo PDBContext::getLineInfoForAddress(uint64_t Address,
}
DILineInfoTable
-PDBContext::getLineInfoForAddressRange(uint64_t Address, uint64_t Size,
+PDBContext::getLineInfoForAddressRange(object::SectionedAddress Address,
+ uint64_t Size,
DILineInfoSpecifier Specifier) {
if (Size == 0)
return DILineInfoTable();
DILineInfoTable Table;
- auto LineNumbers = Session->findLineNumbersByAddress(Address, Size);
+ auto LineNumbers = Session->findLineNumbersByAddress(Address.Address, Size);
if (!LineNumbers || LineNumbers->getChildCount() == 0)
return Table;
while (auto LineInfo = LineNumbers->getNext()) {
- DILineInfo LineEntry =
- getLineInfoForAddress(LineInfo->getVirtualAddress(), Specifier);
+ DILineInfo LineEntry = getLineInfoForAddress(
+ {LineInfo->getVirtualAddress(), Address.SectionIndex}, Specifier);
Table.push_back(std::make_pair(LineInfo->getVirtualAddress(), LineEntry));
}
return Table;
}
DIInliningInfo
-PDBContext::getInliningInfoForAddress(uint64_t Address,
+PDBContext::getInliningInfoForAddress(object::SectionedAddress Address,
DILineInfoSpecifier Specifier) {
DIInliningInfo InlineInfo;
DILineInfo Frame = getLineInfoForAddress(Address, Specifier);
@@ -91,6 +91,11 @@ PDBContext::getInliningInfoForAddress(uint64_t Address,
return InlineInfo;
}
+std::vector<DILocal>
+PDBContext::getLocalsForAddress(object::SectionedAddress Address) {
+ return std::vector<DILocal>();
+}
+
std::string PDBContext::getFunctionName(uint64_t Address,
DINameKind NameKind) const {
if (NameKind == DINameKind::None)
diff --git a/lib/DebugInfo/PDB/PDBExtras.cpp b/lib/DebugInfo/PDB/PDBExtras.cpp
index 0d8af232cd92..354a99476c4b 100644
--- a/lib/DebugInfo/PDB/PDBExtras.cpp
+++ b/lib/DebugInfo/PDB/PDBExtras.cpp
@@ -1,9 +1,8 @@
//===- PDBExtras.cpp - helper functions and classes for PDBs --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -118,13 +117,37 @@ raw_ostream &llvm::pdb::operator<<(raw_ostream &OS, const PDB_DataKind &Data) {
}
raw_ostream &llvm::pdb::operator<<(raw_ostream &OS,
- const codeview::RegisterId &Reg) {
- switch (Reg) {
-#define CV_REGISTER(name, val) case codeview::RegisterId::name: OS << #name; return OS;
+ const llvm::codeview::CPURegister &CpuReg) {
+ if (CpuReg.Cpu == llvm::codeview::CPUType::ARM64) {
+ switch (CpuReg.Reg) {
+#define CV_REGISTERS_ARM64
+#define CV_REGISTER(name, val) \
+ case codeview::RegisterId::name: \
+ OS << #name; \
+ return OS;
+#include "llvm/DebugInfo/CodeView/CodeViewRegisters.def"
+#undef CV_REGISTER
+#undef CV_REGISTERS_ARM64
+
+ default:
+ break;
+ }
+ } else {
+ switch (CpuReg.Reg) {
+#define CV_REGISTERS_X86
+#define CV_REGISTER(name, val) \
+ case codeview::RegisterId::name: \
+ OS << #name; \
+ return OS;
#include "llvm/DebugInfo/CodeView/CodeViewRegisters.def"
#undef CV_REGISTER
+#undef CV_REGISTERS_X86
+
+ default:
+ break;
+ }
}
- OS << static_cast<int>(Reg);
+ OS << static_cast<int>(CpuReg.Reg);
return OS;
}
@@ -193,6 +216,7 @@ raw_ostream &llvm::pdb::operator<<(raw_ostream &OS, const PDB_Lang &Lang) {
CASE_OUTPUT_ENUM_CLASS_NAME(PDB_Lang, MSIL, OS)
CASE_OUTPUT_ENUM_CLASS_NAME(PDB_Lang, HLSL, OS)
CASE_OUTPUT_ENUM_CLASS_NAME(PDB_Lang, D, OS)
+ CASE_OUTPUT_ENUM_CLASS_NAME(PDB_Lang, Swift, OS)
}
return OS;
}
@@ -296,14 +320,17 @@ raw_ostream &llvm::pdb::operator<<(raw_ostream &OS,
return OS;
}
-raw_ostream &llvm::pdb::operator<<(raw_ostream &OS,
- const PDB_SourceCompression &Compression) {
+raw_ostream &llvm::pdb::dumpPDBSourceCompression(raw_ostream &OS,
+ uint32_t Compression) {
switch (Compression) {
CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SourceCompression, None, OS)
CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SourceCompression, Huffman, OS)
CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SourceCompression, LZ, OS)
CASE_OUTPUT_ENUM_CLASS_STR(PDB_SourceCompression, RunLengthEncoded, "RLE",
OS)
+ CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SourceCompression, DotNet, OS)
+ default:
+ OS << "Unknown (" << Compression << ")";
}
return OS;
}
diff --git a/lib/DebugInfo/PDB/PDBInterfaceAnchors.cpp b/lib/DebugInfo/PDB/PDBInterfaceAnchors.cpp
index 951909295d13..8eb3311b09e3 100644
--- a/lib/DebugInfo/PDB/PDBInterfaceAnchors.cpp
+++ b/lib/DebugInfo/PDB/PDBInterfaceAnchors.cpp
@@ -1,9 +1,8 @@
//===- PDBInterfaceAnchors.h - defines class anchor funcions ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// Class anchors are necessary per the LLVM Coding style guide, to ensure that
diff --git a/lib/DebugInfo/PDB/PDBSymDumper.cpp b/lib/DebugInfo/PDB/PDBSymDumper.cpp
index 2f819312e54e..0956a32f4a49 100644
--- a/lib/DebugInfo/PDB/PDBSymDumper.cpp
+++ b/lib/DebugInfo/PDB/PDBSymDumper.cpp
@@ -1,9 +1,8 @@
//===- PDBSymDumper.cpp - ---------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/PDBSymbol.cpp b/lib/DebugInfo/PDB/PDBSymbol.cpp
index d492edafdafe..34c8ac41d45b 100644
--- a/lib/DebugInfo/PDB/PDBSymbol.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbol.cpp
@@ -1,9 +1,8 @@
//===- PDBSymbol.cpp - base class for user-facing symbol types --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/PDBSymbolAnnotation.cpp b/lib/DebugInfo/PDB/PDBSymbolAnnotation.cpp
index cb1a9bee8024..0fa83efb7ae0 100644
--- a/lib/DebugInfo/PDB/PDBSymbolAnnotation.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolAnnotation.cpp
@@ -1,9 +1,8 @@
//===- PDBSymbolAnnotation.cpp - --------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/PDBSymbolBlock.cpp b/lib/DebugInfo/PDB/PDBSymbolBlock.cpp
index 13eec9734d02..9452282a8817 100644
--- a/lib/DebugInfo/PDB/PDBSymbolBlock.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolBlock.cpp
@@ -1,9 +1,8 @@
//===- PDBSymbolBlock.cpp - -------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/PDBSymbolCompiland.cpp b/lib/DebugInfo/PDB/PDBSymbolCompiland.cpp
index bbc5e6dd2a17..9b2883546305 100644
--- a/lib/DebugInfo/PDB/PDBSymbolCompiland.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolCompiland.cpp
@@ -1,9 +1,8 @@
//===- PDBSymbolCompiland.cpp - compiland details ---------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -91,16 +90,16 @@ std::string PDBSymbolCompiland::getSourceFileFullPath() const {
PDB_Lang Lang = Details ? Details->getLanguage() : PDB_Lang::Cpp;
auto SrcFiles = Session.getSourceFilesForCompiland(*this);
if (SrcFiles) {
- bool LangC = (Lang == PDB_Lang::Cpp || Lang == PDB_Lang::C);
while (auto File = SrcFiles->getNext()) {
std::string FileName = File->getFileName();
auto file_extension = sys::path::extension(FileName);
if (StringSwitch<bool>(file_extension.lower())
- .Case(".cpp", LangC)
- .Case(".c", LangC)
- .Case(".cc", LangC)
- .Case(".cxx", LangC)
+ .Case(".cpp", Lang == PDB_Lang::Cpp)
+ .Case(".cc", Lang == PDB_Lang::Cpp)
+ .Case(".cxx", Lang == PDB_Lang::Cpp)
+ .Case(".c", Lang == PDB_Lang::C)
.Case(".asm", Lang == PDB_Lang::Masm)
+ .Case(".swift", Lang == PDB_Lang::Swift)
.Default(false))
return File->getFileName();
}
diff --git a/lib/DebugInfo/PDB/PDBSymbolCompilandDetails.cpp b/lib/DebugInfo/PDB/PDBSymbolCompilandDetails.cpp
index bdd8535a3ef3..0d86dfe1e632 100644
--- a/lib/DebugInfo/PDB/PDBSymbolCompilandDetails.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolCompilandDetails.cpp
@@ -1,9 +1,8 @@
//===- PDBSymbolCompilandDetails.cpp - compiland details --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/PDBSymbolCompilandEnv.cpp b/lib/DebugInfo/PDB/PDBSymbolCompilandEnv.cpp
index f88df2df6be4..61f119405fd9 100644
--- a/lib/DebugInfo/PDB/PDBSymbolCompilandEnv.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolCompilandEnv.cpp
@@ -1,9 +1,8 @@
//===- PDBSymbolCompilandEnv.cpp - compiland env variables ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/PDBSymbolCustom.cpp b/lib/DebugInfo/PDB/PDBSymbolCustom.cpp
index 10a21806adb6..6c9a4aa76c3d 100644
--- a/lib/DebugInfo/PDB/PDBSymbolCustom.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolCustom.cpp
@@ -1,9 +1,8 @@
//===- PDBSymbolCustom.cpp - compiler-specific types ------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/PDBSymbolData.cpp b/lib/DebugInfo/PDB/PDBSymbolData.cpp
index 7de94670bcb3..d2b82111ccd5 100644
--- a/lib/DebugInfo/PDB/PDBSymbolData.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolData.cpp
@@ -1,9 +1,8 @@
//===- PDBSymbolData.cpp - PDB data (e.g. variable) accessors ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/PDBSymbolExe.cpp b/lib/DebugInfo/PDB/PDBSymbolExe.cpp
index eb409412af59..c85756c43e47 100644
--- a/lib/DebugInfo/PDB/PDBSymbolExe.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolExe.cpp
@@ -1,9 +1,8 @@
//===- PDBSymbolExe.cpp - ---------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/PDBSymbolFunc.cpp b/lib/DebugInfo/PDB/PDBSymbolFunc.cpp
index 75063cb3e7f8..7c3ba981fd6b 100644
--- a/lib/DebugInfo/PDB/PDBSymbolFunc.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolFunc.cpp
@@ -1,9 +1,8 @@
//===- PDBSymbolFunc.cpp - --------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/PDBSymbolFuncDebugEnd.cpp b/lib/DebugInfo/PDB/PDBSymbolFuncDebugEnd.cpp
index af8aafa7be96..66433dc17b49 100644
--- a/lib/DebugInfo/PDB/PDBSymbolFuncDebugEnd.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolFuncDebugEnd.cpp
@@ -1,9 +1,8 @@
//===- PDBSymbolFuncDebugEnd.cpp - ------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/PDBSymbolFuncDebugStart.cpp b/lib/DebugInfo/PDB/PDBSymbolFuncDebugStart.cpp
index 77b510873bea..fe32c93c0121 100644
--- a/lib/DebugInfo/PDB/PDBSymbolFuncDebugStart.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolFuncDebugStart.cpp
@@ -1,9 +1,8 @@
//===- PDBSymbolFuncDebugStart.cpp - ----------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/PDBSymbolLabel.cpp b/lib/DebugInfo/PDB/PDBSymbolLabel.cpp
index c802b97925e6..1fffe69a0c83 100644
--- a/lib/DebugInfo/PDB/PDBSymbolLabel.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolLabel.cpp
@@ -1,9 +1,8 @@
//===- PDBSymbolLabel.cpp - -------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/PDBSymbolPublicSymbol.cpp b/lib/DebugInfo/PDB/PDBSymbolPublicSymbol.cpp
index a2dd2ab92dd9..08697683f641 100644
--- a/lib/DebugInfo/PDB/PDBSymbolPublicSymbol.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolPublicSymbol.cpp
@@ -1,9 +1,8 @@
//===- PDBSymbolPublicSymbol.cpp - ------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/PDBSymbolThunk.cpp b/lib/DebugInfo/PDB/PDBSymbolThunk.cpp
index d227e3a7a60c..6483858183e5 100644
--- a/lib/DebugInfo/PDB/PDBSymbolThunk.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolThunk.cpp
@@ -1,9 +1,8 @@
//===- PDBSymbolThunk.cpp - -------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeArray.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeArray.cpp
index a2064d1ac1eb..a0d521abe43f 100644
--- a/lib/DebugInfo/PDB/PDBSymbolTypeArray.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolTypeArray.cpp
@@ -1,9 +1,8 @@
//===- PDBSymbolTypeArray.cpp - ---------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeBaseClass.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeBaseClass.cpp
index f0376c05557f..08467059b5e1 100644
--- a/lib/DebugInfo/PDB/PDBSymbolTypeBaseClass.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolTypeBaseClass.cpp
@@ -1,9 +1,8 @@
//===- PDBSymbolTypeBaseClass.cpp - -----------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeBuiltin.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeBuiltin.cpp
index a9f59e5f9d4d..a0dd9ef601c0 100644
--- a/lib/DebugInfo/PDB/PDBSymbolTypeBuiltin.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolTypeBuiltin.cpp
@@ -1,9 +1,8 @@
//===- PDBSymbolTypeBuiltin.cpp - ------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeCustom.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeCustom.cpp
index cfb347fbac55..6723894c90ea 100644
--- a/lib/DebugInfo/PDB/PDBSymbolTypeCustom.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolTypeCustom.cpp
@@ -1,9 +1,8 @@
//===- PDBSymbolTypeCustom.cpp - --------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeDimension.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeDimension.cpp
index 4eb48997635a..4a25a391f278 100644
--- a/lib/DebugInfo/PDB/PDBSymbolTypeDimension.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolTypeDimension.cpp
@@ -1,10 +1,9 @@
//===- PDBSymbolTypeDimension.cpp - --------------------------------*- C++
//-*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeEnum.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeEnum.cpp
index 2e88d9eb284a..b9fdf6aec811 100644
--- a/lib/DebugInfo/PDB/PDBSymbolTypeEnum.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolTypeEnum.cpp
@@ -1,9 +1,8 @@
//===- PDBSymbolTypeEnum.cpp - --------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeFriend.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeFriend.cpp
index 00d2d51aa8a7..4ffea42cbb0a 100644
--- a/lib/DebugInfo/PDB/PDBSymbolTypeFriend.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolTypeFriend.cpp
@@ -1,9 +1,8 @@
//===- PDBSymbolTypeFriend.cpp - --------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeFunctionArg.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeFunctionArg.cpp
index 0399e110d592..683e93548fb1 100644
--- a/lib/DebugInfo/PDB/PDBSymbolTypeFunctionArg.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolTypeFunctionArg.cpp
@@ -1,9 +1,8 @@
//===- PDBSymbolTypeFunctionArg.cpp - --------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp
index c0564d3941dd..292320a6fe6d 100644
--- a/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp
@@ -1,9 +1,8 @@
//===- PDBSymbolTypeFunctionSig.cpp - --------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeManaged.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeManaged.cpp
index 1faaf9c67a2c..e80e6c716572 100644
--- a/lib/DebugInfo/PDB/PDBSymbolTypeManaged.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolTypeManaged.cpp
@@ -1,9 +1,8 @@
//===- PDBSymboTypelManaged.cpp - ------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/PDBSymbolTypePointer.cpp b/lib/DebugInfo/PDB/PDBSymbolTypePointer.cpp
index cf5a369116a9..462fc315359b 100644
--- a/lib/DebugInfo/PDB/PDBSymbolTypePointer.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolTypePointer.cpp
@@ -1,9 +1,8 @@
//===- PDBSymbolTypePointer.cpp -----------------------------------*- C++ -===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeTypedef.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeTypedef.cpp
index 1838f1612b49..70749d9bf5f5 100644
--- a/lib/DebugInfo/PDB/PDBSymbolTypeTypedef.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolTypeTypedef.cpp
@@ -1,9 +1,8 @@
//===- PDBSymbolTypeTypedef.cpp ---------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeUDT.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeUDT.cpp
index 2f5222f34fe4..d302c29a3bec 100644
--- a/lib/DebugInfo/PDB/PDBSymbolTypeUDT.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolTypeUDT.cpp
@@ -1,9 +1,8 @@
//===- PDBSymbolTypeUDT.cpp - --------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeVTable.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeVTable.cpp
index 0262f91e8336..4e2a45116d51 100644
--- a/lib/DebugInfo/PDB/PDBSymbolTypeVTable.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolTypeVTable.cpp
@@ -1,9 +1,8 @@
//===- PDBSymbolTypeVTable.cpp - --------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeVTableShape.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeVTableShape.cpp
index 16c3a3606981..78957620e083 100644
--- a/lib/DebugInfo/PDB/PDBSymbolTypeVTableShape.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolTypeVTableShape.cpp
@@ -1,9 +1,8 @@
//===- PDBSymbolTypeVTableShape.cpp - ---------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/PDBSymbolUnknown.cpp b/lib/DebugInfo/PDB/PDBSymbolUnknown.cpp
index 7bcf9457a2b6..650d01183171 100644
--- a/lib/DebugInfo/PDB/PDBSymbolUnknown.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolUnknown.cpp
@@ -1,9 +1,8 @@
//===- PDBSymbolUnknown.cpp - -----------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/PDBSymbolUsingNamespace.cpp b/lib/DebugInfo/PDB/PDBSymbolUsingNamespace.cpp
index ecf2126f8802..74afbdb18086 100644
--- a/lib/DebugInfo/PDB/PDBSymbolUsingNamespace.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolUsingNamespace.cpp
@@ -1,9 +1,8 @@
//===- PDBSymbolUsingNamespace.cpp - ------------------- --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/PDB/UDTLayout.cpp b/lib/DebugInfo/PDB/UDTLayout.cpp
index 5f4390bbaf12..acb1599480b0 100644
--- a/lib/DebugInfo/PDB/UDTLayout.cpp
+++ b/lib/DebugInfo/PDB/UDTLayout.cpp
@@ -1,9 +1,8 @@
//===- UDTLayout.cpp ------------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/DebugInfo/Symbolize/DIPrinter.cpp b/lib/DebugInfo/Symbolize/DIPrinter.cpp
index c1e2536d6e20..b2bfef251485 100644
--- a/lib/DebugInfo/Symbolize/DIPrinter.cpp
+++ b/lib/DebugInfo/Symbolize/DIPrinter.cpp
@@ -1,9 +1,8 @@
//===- lib/DebugInfo/Symbolize/DIPrinter.cpp ------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -19,6 +18,7 @@
#include "llvm/Support/Format.h"
#include "llvm/Support/LineIterator.h"
#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cmath>
@@ -78,8 +78,13 @@ void DIPrinter::print(const DILineInfo &Info, bool Inlined) {
std::string Filename = Info.FileName;
if (Filename == kDILineInfoBadString)
Filename = kBadString;
+ else if (Basenames)
+ Filename = llvm::sys::path::filename(Filename);
if (!Verbose) {
- OS << Filename << ":" << Info.Line << ":" << Info.Column << "\n";
+ OS << Filename << ":" << Info.Line;
+ if (Style == OutputStyle::LLVM)
+ OS << ":" << Info.Column;
+ OS << "\n";
printContext(Filename, Info.Line);
return;
}
@@ -117,5 +122,28 @@ DIPrinter &DIPrinter::operator<<(const DIGlobal &Global) {
return *this;
}
+DIPrinter &DIPrinter::operator<<(const DILocal &Local) {
+ OS << Local.FunctionName << '\n';
+ OS << Local.Name << '\n';
+ if (Local.DeclFile.empty())
+ OS << "??";
+ else
+ OS << Local.DeclFile;
+ OS << ':' << Local.DeclLine << '\n';
+ if (Local.FrameOffset)
+ OS << *Local.FrameOffset << ' ';
+ else
+ OS << "?? ";
+ if (Local.Size)
+ OS << *Local.Size << ' ';
+ else
+ OS << "?? ";
+ if (Local.TagOffset)
+ OS << *Local.TagOffset << '\n';
+ else
+ OS << "??\n";
+ return *this;
+}
+
} // end namespace symbolize
} // end namespace llvm
diff --git a/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp b/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp
index 08be524ab464..2765bf44d504 100644
--- a/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp
+++ b/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp
@@ -1,9 +1,8 @@
//===- SymbolizableObjectFile.cpp -----------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -43,8 +42,9 @@ getDILineInfoSpecifier(FunctionNameKind FNKind) {
}
ErrorOr<std::unique_ptr<SymbolizableObjectFile>>
-SymbolizableObjectFile::create(object::ObjectFile *Obj,
+SymbolizableObjectFile::create(const object::ObjectFile *Obj,
std::unique_ptr<DIContext> DICtx) {
+ assert(DICtx);
std::unique_ptr<SymbolizableObjectFile> res(
new SymbolizableObjectFile(Obj, std::move(DICtx)));
std::unique_ptr<DataExtractor> OpdExtractor;
@@ -54,13 +54,13 @@ SymbolizableObjectFile::create(object::ObjectFile *Obj,
if (Obj->getArch() == Triple::ppc64) {
for (section_iterator Section : Obj->sections()) {
StringRef Name;
- StringRef Data;
if (auto EC = Section->getName(Name))
return EC;
if (Name == ".opd") {
- if (auto EC = Section->getContents(Data))
- return EC;
- OpdExtractor.reset(new DataExtractor(Data, Obj->isLittleEndian(),
+ Expected<StringRef> E = Section->getContents();
+ if (!E)
+ return errorToErrorCode(E.takeError());
+ OpdExtractor.reset(new DataExtractor(*E, Obj->isLittleEndian(),
Obj->getBytesInAddress()));
OpdAddress = Section->getAddress();
break;
@@ -79,10 +79,30 @@ SymbolizableObjectFile::create(object::ObjectFile *Obj,
if (auto EC = res->addCoffExportSymbols(CoffObj))
return EC;
}
+
+ std::vector<std::pair<SymbolDesc, StringRef>> &Fs = res->Functions,
+ &Os = res->Objects;
+ auto Uniquify = [](std::vector<std::pair<SymbolDesc, StringRef>> &S) {
+ // Sort by (Addr,Size,Name). If several SymbolDescs share the same Addr,
+ // pick the one with the largest Size. This helps us avoid symbols with no
+ // size information (Size=0).
+ llvm::sort(S);
+ auto I = S.begin(), E = S.end(), J = S.begin();
+ while (I != E) {
+ auto OI = I;
+ while (++I != E && OI->first.Addr == I->first.Addr) {
+ }
+ *J++ = I[-1];
+ }
+ S.erase(J, S.end());
+ };
+ Uniquify(Fs);
+ Uniquify(Os);
+
return std::move(res);
}
-SymbolizableObjectFile::SymbolizableObjectFile(ObjectFile *Obj,
+SymbolizableObjectFile::SymbolizableObjectFile(const ObjectFile *Obj,
std::unique_ptr<DIContext> DICtx)
: Module(Obj), DebugInfoContext(std::move(DICtx)) {}
@@ -128,7 +148,7 @@ std::error_code SymbolizableObjectFile::addCoffExportSymbols(
uint64_t SymbolStart = ImageBase + Export.Offset;
uint64_t SymbolSize = NextOffset - Export.Offset;
SymbolDesc SD = {SymbolStart, SymbolSize};
- Functions.insert(std::make_pair(SD, Export.Name));
+ Functions.emplace_back(SD, Export.Name);
}
return std::error_code();
}
@@ -137,6 +157,11 @@ std::error_code SymbolizableObjectFile::addSymbol(const SymbolRef &Symbol,
uint64_t SymbolSize,
DataExtractor *OpdExtractor,
uint64_t OpdAddress) {
+ // Avoid adding symbols from an unknown/undefined section.
+ const ObjectFile *Obj = Symbol.getObject();
+ Expected<section_iterator> Sec = Symbol.getSection();
+ if (!Sec || (Obj && Obj->section_end() == *Sec))
+ return std::error_code();
Expected<SymbolRef::Type> SymbolTypeOrErr = Symbol.getType();
if (!SymbolTypeOrErr)
return errorToErrorCode(SymbolTypeOrErr.takeError());
@@ -170,7 +195,7 @@ std::error_code SymbolizableObjectFile::addSymbol(const SymbolRef &Symbol,
// with same address size. Make sure we choose the correct one.
auto &M = SymbolType == SymbolRef::ST_Function ? Functions : Objects;
SymbolDesc SD = { SymbolAddress, SymbolSize };
- M.insert(std::make_pair(SD, SymbolName));
+ M.emplace_back(SD, SymbolName);
return std::error_code();
}
@@ -191,12 +216,10 @@ bool SymbolizableObjectFile::getNameFromSymbolTable(SymbolRef::Type Type,
std::string &Name,
uint64_t &Addr,
uint64_t &Size) const {
- const auto &SymbolMap = Type == SymbolRef::ST_Function ? Functions : Objects;
- if (SymbolMap.empty())
- return false;
- SymbolDesc SD = { Address, Address };
- auto SymbolIterator = SymbolMap.upper_bound(SD);
- if (SymbolIterator == SymbolMap.begin())
+ const auto &Symbols = Type == SymbolRef::ST_Function ? Functions : Objects;
+ std::pair<SymbolDesc, StringRef> SD{{Address, UINT64_C(-1)}, StringRef()};
+ auto SymbolIterator = llvm::upper_bound(Symbols, SD);
+ if (SymbolIterator == Symbols.begin())
return false;
--SymbolIterator;
if (SymbolIterator->first.Size != 0 &&
@@ -218,19 +241,21 @@ bool SymbolizableObjectFile::shouldOverrideWithSymbolTable(
isa<DWARFContext>(DebugInfoContext.get());
}
-DILineInfo SymbolizableObjectFile::symbolizeCode(uint64_t ModuleOffset,
- FunctionNameKind FNKind,
- bool UseSymbolTable) const {
- DILineInfo LineInfo;
- if (DebugInfoContext) {
- LineInfo = DebugInfoContext->getLineInfoForAddress(
- ModuleOffset, getDILineInfoSpecifier(FNKind));
- }
+DILineInfo
+SymbolizableObjectFile::symbolizeCode(object::SectionedAddress ModuleOffset,
+ FunctionNameKind FNKind,
+ bool UseSymbolTable) const {
+ if (ModuleOffset.SectionIndex == object::SectionedAddress::UndefSection)
+ ModuleOffset.SectionIndex =
+ getModuleSectionIndexForAddress(ModuleOffset.Address);
+ DILineInfo LineInfo = DebugInfoContext->getLineInfoForAddress(
+ ModuleOffset, getDILineInfoSpecifier(FNKind));
+
// Override function name from symbol table if necessary.
if (shouldOverrideWithSymbolTable(FNKind, UseSymbolTable)) {
std::string FunctionName;
uint64_t Start, Size;
- if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset,
+ if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset.Address,
FunctionName, Start, Size)) {
LineInfo.FunctionName = FunctionName;
}
@@ -239,12 +264,14 @@ DILineInfo SymbolizableObjectFile::symbolizeCode(uint64_t ModuleOffset,
}
DIInliningInfo SymbolizableObjectFile::symbolizeInlinedCode(
- uint64_t ModuleOffset, FunctionNameKind FNKind, bool UseSymbolTable) const {
- DIInliningInfo InlinedContext;
+ object::SectionedAddress ModuleOffset, FunctionNameKind FNKind,
+ bool UseSymbolTable) const {
+ if (ModuleOffset.SectionIndex == object::SectionedAddress::UndefSection)
+ ModuleOffset.SectionIndex =
+ getModuleSectionIndexForAddress(ModuleOffset.Address);
+ DIInliningInfo InlinedContext = DebugInfoContext->getInliningInfoForAddress(
+ ModuleOffset, getDILineInfoSpecifier(FNKind));
- if (DebugInfoContext)
- InlinedContext = DebugInfoContext->getInliningInfoForAddress(
- ModuleOffset, getDILineInfoSpecifier(FNKind));
// Make sure there is at least one frame in context.
if (InlinedContext.getNumberOfFrames() == 0)
InlinedContext.addFrame(DILineInfo());
@@ -253,7 +280,7 @@ DIInliningInfo SymbolizableObjectFile::symbolizeInlinedCode(
if (shouldOverrideWithSymbolTable(FNKind, UseSymbolTable)) {
std::string FunctionName;
uint64_t Start, Size;
- if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset,
+ if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset.Address,
FunctionName, Start, Size)) {
InlinedContext.getMutableFrame(InlinedContext.getNumberOfFrames() - 1)
->FunctionName = FunctionName;
@@ -263,9 +290,34 @@ DIInliningInfo SymbolizableObjectFile::symbolizeInlinedCode(
return InlinedContext;
}
-DIGlobal SymbolizableObjectFile::symbolizeData(uint64_t ModuleOffset) const {
+DIGlobal SymbolizableObjectFile::symbolizeData(
+ object::SectionedAddress ModuleOffset) const {
DIGlobal Res;
- getNameFromSymbolTable(SymbolRef::ST_Data, ModuleOffset, Res.Name, Res.Start,
- Res.Size);
+ getNameFromSymbolTable(SymbolRef::ST_Data, ModuleOffset.Address, Res.Name,
+ Res.Start, Res.Size);
return Res;
}
+
+std::vector<DILocal> SymbolizableObjectFile::symbolizeFrame(
+ object::SectionedAddress ModuleOffset) const {
+ if (ModuleOffset.SectionIndex == object::SectionedAddress::UndefSection)
+ ModuleOffset.SectionIndex =
+ getModuleSectionIndexForAddress(ModuleOffset.Address);
+ return DebugInfoContext->getLocalsForAddress(ModuleOffset);
+}
+
+/// Search for the first occurence of specified Address in ObjectFile.
+uint64_t SymbolizableObjectFile::getModuleSectionIndexForAddress(
+ uint64_t Address) const {
+
+ for (SectionRef Sec : Module->sections()) {
+ if (!Sec.isText() || Sec.isVirtual())
+ continue;
+
+ if (Address >= Sec.getAddress() &&
+ Address < Sec.getAddress() + Sec.getSize())
+ return Sec.getIndex();
+ }
+
+ return object::SectionedAddress::UndefSection;
+}
diff --git a/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h b/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h
index 216cca8de4f5..9cab94178c1b 100644
--- a/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h
+++ b/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h
@@ -1,9 +1,8 @@
//===- SymbolizableObjectFile.h ---------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -32,14 +31,17 @@ namespace symbolize {
class SymbolizableObjectFile : public SymbolizableModule {
public:
static ErrorOr<std::unique_ptr<SymbolizableObjectFile>>
- create(object::ObjectFile *Obj, std::unique_ptr<DIContext> DICtx);
+ create(const object::ObjectFile *Obj, std::unique_ptr<DIContext> DICtx);
- DILineInfo symbolizeCode(uint64_t ModuleOffset, FunctionNameKind FNKind,
+ DILineInfo symbolizeCode(object::SectionedAddress ModuleOffset,
+ FunctionNameKind FNKind,
bool UseSymbolTable) const override;
- DIInliningInfo symbolizeInlinedCode(uint64_t ModuleOffset,
+ DIInliningInfo symbolizeInlinedCode(object::SectionedAddress ModuleOffset,
FunctionNameKind FNKind,
bool UseSymbolTable) const override;
- DIGlobal symbolizeData(uint64_t ModuleOffset) const override;
+ DIGlobal symbolizeData(object::SectionedAddress ModuleOffset) const override;
+ std::vector<DILocal>
+ symbolizeFrame(object::SectionedAddress ModuleOffset) const override;
// Return true if this is a 32-bit x86 PE COFF module.
bool isWin32Module() const override;
@@ -63,7 +65,10 @@ private:
uint64_t OpdAddress = 0);
std::error_code addCoffExportSymbols(const object::COFFObjectFile *CoffObj);
- object::ObjectFile *Module;
+ /// Search for the first occurence of specified Address in ObjectFile.
+ uint64_t getModuleSectionIndexForAddress(uint64_t Address) const;
+
+ const object::ObjectFile *Module;
std::unique_ptr<DIContext> DebugInfoContext;
struct SymbolDesc {
@@ -72,14 +77,14 @@ private:
// the following symbol.
uint64_t Size;
- friend bool operator<(const SymbolDesc &s1, const SymbolDesc &s2) {
- return s1.Addr < s2.Addr;
+ bool operator<(const SymbolDesc &RHS) const {
+ return Addr != RHS.Addr ? Addr < RHS.Addr : Size < RHS.Size;
}
};
- std::map<SymbolDesc, StringRef> Functions;
- std::map<SymbolDesc, StringRef> Objects;
+ std::vector<std::pair<SymbolDesc, StringRef>> Functions;
+ std::vector<std::pair<SymbolDesc, StringRef>> Objects;
- SymbolizableObjectFile(object::ObjectFile *Obj,
+ SymbolizableObjectFile(const object::ObjectFile *Obj,
std::unique_ptr<DIContext> DICtx);
};
diff --git a/lib/DebugInfo/Symbolize/Symbolize.cpp b/lib/DebugInfo/Symbolize/Symbolize.cpp
index 59a85d6c3fcf..6a619f8f2f37 100644
--- a/lib/DebugInfo/Symbolize/Symbolize.cpp
+++ b/lib/DebugInfo/Symbolize/Symbolize.cpp
@@ -1,9 +1,8 @@
//===-- LLVMSymbolize.cpp -------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -17,7 +16,6 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/BinaryFormat/COFF.h"
-#include "llvm/Config/config.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
#include "llvm/DebugInfo/PDB/PDB.h"
#include "llvm/DebugInfo/PDB/PDBContext.h"
@@ -25,6 +23,7 @@
#include "llvm/Object/COFF.h"
#include "llvm/Object/MachO.h"
#include "llvm/Object/MachOUniversal.h"
+#include "llvm/Support/CRC.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Compression.h"
#include "llvm/Support/DataExtractor.h"
@@ -34,7 +33,6 @@
#include "llvm/Support/Path.h"
#include <algorithm>
#include <cassert>
-#include <cstdlib>
#include <cstring>
#if defined(_MSC_VER)
@@ -54,14 +52,8 @@ namespace llvm {
namespace symbolize {
Expected<DILineInfo>
-LLVMSymbolizer::symbolizeCode(const std::string &ModuleName,
- uint64_t ModuleOffset, StringRef DWPName) {
- SymbolizableModule *Info;
- if (auto InfoOrErr = getOrCreateModuleInfo(ModuleName, DWPName))
- Info = InfoOrErr.get();
- else
- return InfoOrErr.takeError();
-
+LLVMSymbolizer::symbolizeCodeCommon(SymbolizableModule *Info,
+ object::SectionedAddress ModuleOffset) {
// A null module means an error has already been reported. Return an empty
// result.
if (!Info)
@@ -70,7 +62,7 @@ LLVMSymbolizer::symbolizeCode(const std::string &ModuleName,
// If the user is giving us relative addresses, add the preferred base of the
// object to the offset before we do the query. It's what DIContext expects.
if (Opts.RelativeAddresses)
- ModuleOffset += Info->getModulePreferredBase();
+ ModuleOffset.Address += Info->getModulePreferredBase();
DILineInfo LineInfo = Info->symbolizeCode(ModuleOffset, Opts.PrintFunctions,
Opts.UseSymbolTable);
@@ -79,11 +71,37 @@ LLVMSymbolizer::symbolizeCode(const std::string &ModuleName,
return LineInfo;
}
+Expected<DILineInfo>
+LLVMSymbolizer::symbolizeCode(const ObjectFile &Obj,
+ object::SectionedAddress ModuleOffset) {
+ StringRef ModuleName = Obj.getFileName();
+ auto I = Modules.find(ModuleName);
+ if (I != Modules.end())
+ return symbolizeCodeCommon(I->second.get(), ModuleOffset);
+
+ std::unique_ptr<DIContext> Context =
+ DWARFContext::create(Obj, nullptr, DWARFContext::defaultErrorHandler);
+ Expected<SymbolizableModule *> InfoOrErr =
+ createModuleInfo(&Obj, std::move(Context), ModuleName);
+ if (!InfoOrErr)
+ return InfoOrErr.takeError();
+ return symbolizeCodeCommon(*InfoOrErr, ModuleOffset);
+}
+
+Expected<DILineInfo>
+LLVMSymbolizer::symbolizeCode(const std::string &ModuleName,
+ object::SectionedAddress ModuleOffset) {
+ Expected<SymbolizableModule *> InfoOrErr = getOrCreateModuleInfo(ModuleName);
+ if (!InfoOrErr)
+ return InfoOrErr.takeError();
+ return symbolizeCodeCommon(*InfoOrErr, ModuleOffset);
+}
+
Expected<DIInliningInfo>
LLVMSymbolizer::symbolizeInlinedCode(const std::string &ModuleName,
- uint64_t ModuleOffset, StringRef DWPName) {
+ object::SectionedAddress ModuleOffset) {
SymbolizableModule *Info;
- if (auto InfoOrErr = getOrCreateModuleInfo(ModuleName, DWPName))
+ if (auto InfoOrErr = getOrCreateModuleInfo(ModuleName))
Info = InfoOrErr.get();
else
return InfoOrErr.takeError();
@@ -96,7 +114,7 @@ LLVMSymbolizer::symbolizeInlinedCode(const std::string &ModuleName,
// If the user is giving us relative addresses, add the preferred base of the
// object to the offset before we do the query. It's what DIContext expects.
if (Opts.RelativeAddresses)
- ModuleOffset += Info->getModulePreferredBase();
+ ModuleOffset.Address += Info->getModulePreferredBase();
DIInliningInfo InlinedContext = Info->symbolizeInlinedCode(
ModuleOffset, Opts.PrintFunctions, Opts.UseSymbolTable);
@@ -109,8 +127,9 @@ LLVMSymbolizer::symbolizeInlinedCode(const std::string &ModuleName,
return InlinedContext;
}
-Expected<DIGlobal> LLVMSymbolizer::symbolizeData(const std::string &ModuleName,
- uint64_t ModuleOffset) {
+Expected<DIGlobal>
+LLVMSymbolizer::symbolizeData(const std::string &ModuleName,
+ object::SectionedAddress ModuleOffset) {
SymbolizableModule *Info;
if (auto InfoOrErr = getOrCreateModuleInfo(ModuleName))
Info = InfoOrErr.get();
@@ -126,7 +145,7 @@ Expected<DIGlobal> LLVMSymbolizer::symbolizeData(const std::string &ModuleName,
// the object to the offset before we do the query. It's what DIContext
// expects.
if (Opts.RelativeAddresses)
- ModuleOffset += Info->getModulePreferredBase();
+ ModuleOffset.Address += Info->getModulePreferredBase();
DIGlobal Global = Info->symbolizeData(ModuleOffset);
if (Opts.Demangle)
@@ -134,6 +153,29 @@ Expected<DIGlobal> LLVMSymbolizer::symbolizeData(const std::string &ModuleName,
return Global;
}
+Expected<std::vector<DILocal>>
+LLVMSymbolizer::symbolizeFrame(const std::string &ModuleName,
+ object::SectionedAddress ModuleOffset) {
+ SymbolizableModule *Info;
+ if (auto InfoOrErr = getOrCreateModuleInfo(ModuleName))
+ Info = InfoOrErr.get();
+ else
+ return InfoOrErr.takeError();
+
+ // A null module means an error has already been reported. Return an empty
+ // result.
+ if (!Info)
+ return std::vector<DILocal>();
+
+ // If the user is giving us relative addresses, add the preferred base of
+ // the object to the offset before we do the query. It's what DIContext
+ // expects.
+ if (Opts.RelativeAddresses)
+ ModuleOffset.Address += Info->getModulePreferredBase();
+
+ return Info->symbolizeFrame(ModuleOffset);
+}
+
void LLVMSymbolizer::flush() {
ObjectForUBPathAndArch.clear();
BinaryForPath.clear();
@@ -163,42 +205,45 @@ bool checkFileCRC(StringRef Path, uint32_t CRCHash) {
MemoryBuffer::getFileOrSTDIN(Path);
if (!MB)
return false;
- return !zlib::isAvailable() || CRCHash == zlib::crc32(MB.get()->getBuffer());
+ return CRCHash == llvm::crc32(0, MB.get()->getBuffer());
}
bool findDebugBinary(const std::string &OrigPath,
const std::string &DebuglinkName, uint32_t CRCHash,
+ const std::string &FallbackDebugPath,
std::string &Result) {
- std::string OrigRealPath = OrigPath;
-#if defined(HAVE_REALPATH)
- if (char *RP = realpath(OrigPath.c_str(), nullptr)) {
- OrigRealPath = RP;
- free(RP);
- }
-#endif
- SmallString<16> OrigDir(OrigRealPath);
+ SmallString<16> OrigDir(OrigPath);
llvm::sys::path::remove_filename(OrigDir);
SmallString<16> DebugPath = OrigDir;
- // Try /path/to/original_binary/debuglink_name
+ // Try relative/path/to/original_binary/debuglink_name
llvm::sys::path::append(DebugPath, DebuglinkName);
if (checkFileCRC(DebugPath, CRCHash)) {
Result = DebugPath.str();
return true;
}
- // Try /path/to/original_binary/.debug/debuglink_name
+ // Try relative/path/to/original_binary/.debug/debuglink_name
DebugPath = OrigDir;
llvm::sys::path::append(DebugPath, ".debug", DebuglinkName);
if (checkFileCRC(DebugPath, CRCHash)) {
Result = DebugPath.str();
return true;
}
+ // Make the path absolute so that lookups will go to
+ // "/usr/lib/debug/full/path/to/debug", not
+ // "/usr/lib/debug/to/debug"
+ llvm::sys::fs::make_absolute(OrigDir);
+ if (!FallbackDebugPath.empty()) {
+ // Try <FallbackDebugPath>/absolute/path/to/original_binary/debuglink_name
+ DebugPath = FallbackDebugPath;
+ } else {
#if defined(__NetBSD__)
- // Try /usr/libdata/debug/path/to/original_binary/debuglink_name
- DebugPath = "/usr/libdata/debug";
+ // Try /usr/libdata/debug/absolute/path/to/original_binary/debuglink_name
+ DebugPath = "/usr/libdata/debug";
#else
- // Try /usr/lib/debug/path/to/original_binary/debuglink_name
- DebugPath = "/usr/lib/debug";
+ // Try /usr/lib/debug/absolute/path/to/original_binary/debuglink_name
+ DebugPath = "/usr/lib/debug";
#endif
+ }
llvm::sys::path::append(DebugPath, llvm::sys::path::relative_path(OrigDir),
DebuglinkName);
if (checkFileCRC(DebugPath, CRCHash)) {
@@ -217,9 +262,12 @@ bool getGNUDebuglinkContents(const ObjectFile *Obj, std::string &DebugName,
Section.getName(Name);
Name = Name.substr(Name.find_first_not_of("._"));
if (Name == "gnu_debuglink") {
- StringRef Data;
- Section.getContents(Data);
- DataExtractor DE(Data, Obj->isLittleEndian(), 0);
+ Expected<StringRef> ContentsOrErr = Section.getContents();
+ if (!ContentsOrErr) {
+ consumeError(ContentsOrErr.takeError());
+ return false;
+ }
+ DataExtractor DE(*ContentsOrErr, Obj->isLittleEndian(), 0);
uint32_t Offset = 0;
if (const char *DebugNameStr = DE.getCStr(&Offset)) {
// 4-byte align the offset.
@@ -284,7 +332,8 @@ ObjectFile *LLVMSymbolizer::lookUpDebuglinkObject(const std::string &Path,
std::string DebugBinaryPath;
if (!getGNUDebuglinkContents(Obj, DebuglinkName, CRCHash))
return nullptr;
- if (!findDebugBinary(Path, DebuglinkName, CRCHash, DebugBinaryPath))
+ if (!findDebugBinary(Path, DebuglinkName, CRCHash, Opts.FallbackDebugPath,
+ DebugBinaryPath))
return nullptr;
auto DbgObjOrErr = getOrCreateObject(DebugBinaryPath, ArchName);
if (!DbgObjOrErr) {
@@ -298,15 +347,14 @@ ObjectFile *LLVMSymbolizer::lookUpDebuglinkObject(const std::string &Path,
Expected<LLVMSymbolizer::ObjectPair>
LLVMSymbolizer::getOrCreateObjectPair(const std::string &Path,
const std::string &ArchName) {
- const auto &I = ObjectPairForPathArch.find(std::make_pair(Path, ArchName));
- if (I != ObjectPairForPathArch.end()) {
+ auto I = ObjectPairForPathArch.find(std::make_pair(Path, ArchName));
+ if (I != ObjectPairForPathArch.end())
return I->second;
- }
auto ObjOrErr = getOrCreateObject(Path, ArchName);
if (!ObjOrErr) {
- ObjectPairForPathArch.insert(std::make_pair(std::make_pair(Path, ArchName),
- ObjectPair(nullptr, nullptr)));
+ ObjectPairForPathArch.emplace(std::make_pair(Path, ArchName),
+ ObjectPair(nullptr, nullptr));
return ObjOrErr.takeError();
}
@@ -321,46 +369,43 @@ LLVMSymbolizer::getOrCreateObjectPair(const std::string &Path,
if (!DbgObj)
DbgObj = Obj;
ObjectPair Res = std::make_pair(Obj, DbgObj);
- ObjectPairForPathArch.insert(
- std::make_pair(std::make_pair(Path, ArchName), Res));
+ ObjectPairForPathArch.emplace(std::make_pair(Path, ArchName), Res);
return Res;
}
Expected<ObjectFile *>
LLVMSymbolizer::getOrCreateObject(const std::string &Path,
const std::string &ArchName) {
- const auto &I = BinaryForPath.find(Path);
- Binary *Bin = nullptr;
- if (I == BinaryForPath.end()) {
+ Binary *Bin;
+ auto Pair = BinaryForPath.emplace(Path, OwningBinary<Binary>());
+ if (!Pair.second) {
+ Bin = Pair.first->second.getBinary();
+ } else {
Expected<OwningBinary<Binary>> BinOrErr = createBinary(Path);
- if (!BinOrErr) {
- BinaryForPath.insert(std::make_pair(Path, OwningBinary<Binary>()));
+ if (!BinOrErr)
return BinOrErr.takeError();
- }
- Bin = BinOrErr->getBinary();
- BinaryForPath.insert(std::make_pair(Path, std::move(BinOrErr.get())));
- } else {
- Bin = I->second.getBinary();
+ Pair.first->second = std::move(BinOrErr.get());
+ Bin = Pair.first->second.getBinary();
}
if (!Bin)
return static_cast<ObjectFile *>(nullptr);
if (MachOUniversalBinary *UB = dyn_cast_or_null<MachOUniversalBinary>(Bin)) {
- const auto &I = ObjectForUBPathAndArch.find(std::make_pair(Path, ArchName));
- if (I != ObjectForUBPathAndArch.end()) {
+ auto I = ObjectForUBPathAndArch.find(std::make_pair(Path, ArchName));
+ if (I != ObjectForUBPathAndArch.end())
return I->second.get();
- }
+
Expected<std::unique_ptr<ObjectFile>> ObjOrErr =
UB->getObjectForArch(ArchName);
if (!ObjOrErr) {
- ObjectForUBPathAndArch.insert(std::make_pair(
- std::make_pair(Path, ArchName), std::unique_ptr<ObjectFile>()));
+ ObjectForUBPathAndArch.emplace(std::make_pair(Path, ArchName),
+ std::unique_ptr<ObjectFile>());
return ObjOrErr.takeError();
}
ObjectFile *Res = ObjOrErr->get();
- ObjectForUBPathAndArch.insert(std::make_pair(std::make_pair(Path, ArchName),
- std::move(ObjOrErr.get())));
+ ObjectForUBPathAndArch.emplace(std::make_pair(Path, ArchName),
+ std::move(ObjOrErr.get()));
return Res;
}
if (Bin->isObject()) {
@@ -370,12 +415,28 @@ LLVMSymbolizer::getOrCreateObject(const std::string &Path,
}
Expected<SymbolizableModule *>
-LLVMSymbolizer::getOrCreateModuleInfo(const std::string &ModuleName,
- StringRef DWPName) {
- const auto &I = Modules.find(ModuleName);
- if (I != Modules.end()) {
+LLVMSymbolizer::createModuleInfo(const ObjectFile *Obj,
+ std::unique_ptr<DIContext> Context,
+ StringRef ModuleName) {
+ auto InfoOrErr =
+ SymbolizableObjectFile::create(Obj, std::move(Context));
+ std::unique_ptr<SymbolizableModule> SymMod;
+ if (InfoOrErr)
+ SymMod = std::move(*InfoOrErr);
+ auto InsertResult =
+ Modules.insert(std::make_pair(ModuleName, std::move(SymMod)));
+ assert(InsertResult.second);
+ if (std::error_code EC = InfoOrErr.getError())
+ return errorCodeToError(EC);
+ return InsertResult.first->second.get();
+}
+
+Expected<SymbolizableModule *>
+LLVMSymbolizer::getOrCreateModuleInfo(const std::string &ModuleName) {
+ auto I = Modules.find(ModuleName);
+ if (I != Modules.end())
return I->second.get();
- }
+
std::string BinaryName = ModuleName;
std::string ArchName = Opts.DefaultArch;
size_t ColonPos = ModuleName.find_last_of(':');
@@ -390,8 +451,7 @@ LLVMSymbolizer::getOrCreateModuleInfo(const std::string &ModuleName,
auto ObjectsOrErr = getOrCreateObjectPair(BinaryName, ArchName);
if (!ObjectsOrErr) {
// Failed to find valid object file.
- Modules.insert(
- std::make_pair(ModuleName, std::unique_ptr<SymbolizableModule>()));
+ Modules.emplace(ModuleName, std::unique_ptr<SymbolizableModule>());
return ObjectsOrErr.takeError();
}
ObjectPair Objects = ObjectsOrErr.get();
@@ -408,8 +468,7 @@ LLVMSymbolizer::getOrCreateModuleInfo(const std::string &ModuleName,
std::unique_ptr<IPDBSession> Session;
if (auto Err = loadDataForEXE(PDB_ReaderType::DIA,
Objects.first->getFileName(), Session)) {
- Modules.insert(
- std::make_pair(ModuleName, std::unique_ptr<SymbolizableModule>()));
+ Modules.emplace(ModuleName, std::unique_ptr<SymbolizableModule>());
// Return along the PDB filename to provide more context
return createFileError(PDBFileName, std::move(Err));
}
@@ -417,20 +476,10 @@ LLVMSymbolizer::getOrCreateModuleInfo(const std::string &ModuleName,
}
}
if (!Context)
- Context = DWARFContext::create(*Objects.second, nullptr,
- DWARFContext::defaultErrorHandler, DWPName);
- assert(Context);
- auto InfoOrErr =
- SymbolizableObjectFile::create(Objects.first, std::move(Context));
- std::unique_ptr<SymbolizableModule> SymMod;
- if (InfoOrErr)
- SymMod = std::move(InfoOrErr.get());
- auto InsertResult =
- Modules.insert(std::make_pair(ModuleName, std::move(SymMod)));
- assert(InsertResult.second);
- if (auto EC = InfoOrErr.getError())
- return errorCodeToError(EC);
- return InsertResult.first->second.get();
+ Context =
+ DWARFContext::create(*Objects.second, nullptr,
+ DWARFContext::defaultErrorHandler, Opts.DWPName);
+ return createModuleInfo(Objects.first, std::move(Context), ModuleName);
}
namespace {
diff --git a/lib/Demangle/Demangle.cpp b/lib/Demangle/Demangle.cpp
new file mode 100644
index 000000000000..5f921537b9bd
--- /dev/null
+++ b/lib/Demangle/Demangle.cpp
@@ -0,0 +1,36 @@
+//===-- Demangle.cpp - Common demangling functions ------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file This file contains definitions of common demangling functions.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Demangle/Demangle.h"
+#include <cstdlib>
+
+static bool isItaniumEncoding(const std::string &MangledName) {
+ size_t Pos = MangledName.find_first_not_of('_');
+ // A valid Itanium encoding requires 1-4 leading underscores, followed by 'Z'.
+ return Pos > 0 && Pos <= 4 && MangledName[Pos] == 'Z';
+}
+
+std::string llvm::demangle(const std::string &MangledName) {
+ char *Demangled;
+ if (isItaniumEncoding(MangledName))
+ Demangled = itaniumDemangle(MangledName.c_str(), nullptr, nullptr, nullptr);
+ else
+ Demangled =
+ microsoftDemangle(MangledName.c_str(), nullptr, nullptr, nullptr);
+
+ if (!Demangled)
+ return MangledName;
+
+ std::string Ret = Demangled;
+ free(Demangled);
+ return Ret;
+}
diff --git a/lib/Demangle/ItaniumDemangle.cpp b/lib/Demangle/ItaniumDemangle.cpp
index b2de0be2b70c..5c99c70e3cc6 100644
--- a/lib/Demangle/ItaniumDemangle.cpp
+++ b/lib/Demangle/ItaniumDemangle.cpp
@@ -1,9 +1,8 @@
//===------------------------- ItaniumDemangle.cpp ------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is dual licensed under the MIT and the University of Illinois Open
-// Source Licenses. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Demangle/MicrosoftDemangle.cpp b/lib/Demangle/MicrosoftDemangle.cpp
index 51ffa0bff7f3..bf7d77638f34 100644
--- a/lib/Demangle/MicrosoftDemangle.cpp
+++ b/lib/Demangle/MicrosoftDemangle.cpp
@@ -1,9 +1,8 @@
//===- MicrosoftDemangle.cpp ----------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is dual licensed under the MIT and the University of Illinois Open
-// Source Licenses. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -18,7 +17,7 @@
#include "llvm/Demangle/Demangle.h"
#include "llvm/Demangle/MicrosoftDemangleNodes.h"
-#include "llvm/Demangle/Compiler.h"
+#include "llvm/Demangle/DemangleConfig.h"
#include "llvm/Demangle/StringView.h"
#include "llvm/Demangle/Utility.h"
@@ -59,14 +58,18 @@ static bool isMemberPointer(StringView MangledName, bool &Error) {
// what.
break;
default:
- Error = true;
- return false;
+ // isMemberPointer() is called only if isPointerType() returns true,
+ // and it rejects other prefixes.
+ DEMANGLE_UNREACHABLE;
}
// If it starts with a number, then 6 indicates a non-member function
// pointer, and 8 indicates a member function pointer.
if (startsWithDigit(MangledName)) {
- assert(MangledName[0] == '6' || MangledName[0] == '8');
+ if (MangledName[0] != '6' && MangledName[0] != '8') {
+ Error = true;
+ return false;
+ }
return (MangledName[0] == '8');
}
@@ -76,7 +79,10 @@ static bool isMemberPointer(StringView MangledName, bool &Error) {
MangledName.consumeFront('I'); // restrict
MangledName.consumeFront('F'); // unaligned
- assert(!MangledName.empty());
+ if (MangledName.empty()) {
+ Error = true;
+ return false;
+ }
// The next value should be either ABCD (non-member) or QRST (member).
switch (MangledName.front()) {
@@ -136,8 +142,6 @@ consumeSpecialIntrinsicKind(StringView &MangledName) {
static bool startsWithLocalScopePattern(StringView S) {
if (!S.consumeFront('?'))
return false;
- if (S.size() < 2)
- return false;
size_t End = S.find('?');
if (End == StringView::npos)
@@ -234,10 +238,10 @@ demanglePointerCVQualifiers(StringView &MangledName) {
case 'S':
return std::make_pair(Qualifiers(Q_Const | Q_Volatile),
PointerAffinity::Pointer);
- default:
- assert(false && "Ty is not a pointer type!");
}
- return std::make_pair(Q_None, PointerAffinity::Pointer);
+ // This function is only called if isPointerType() returns true,
+ // and it only returns true for the six cases listed above.
+ DEMANGLE_UNREACHABLE;
}
StringView Demangler::copyString(StringView Borrowed) {
@@ -265,12 +269,16 @@ Demangler::demangleSpecialTableSymbolNode(StringView &MangledName,
NI->Name = "`RTTI Complete Object Locator'";
break;
default:
- LLVM_BUILTIN_UNREACHABLE;
+ DEMANGLE_UNREACHABLE;
}
QualifiedNameNode *QN = demangleNameScopeChain(MangledName, NI);
SpecialTableSymbolNode *STSN = Arena.alloc<SpecialTableSymbolNode>();
STSN->Name = QN;
bool IsMember = false;
+ if (MangledName.empty()) {
+ Error = true;
+ return nullptr;
+ }
char Front = MangledName.popFront();
if (Front != '6' && Front != '7') {
Error = true;
@@ -284,9 +292,10 @@ Demangler::demangleSpecialTableSymbolNode(StringView &MangledName,
}
LocalStaticGuardVariableNode *
-Demangler::demangleLocalStaticGuard(StringView &MangledName) {
+Demangler::demangleLocalStaticGuard(StringView &MangledName, bool IsThread) {
LocalStaticGuardIdentifierNode *LSGI =
Arena.alloc<LocalStaticGuardIdentifierNode>();
+ LSGI->IsThread = IsThread;
QualifiedNameNode *QN = demangleNameScopeChain(MangledName, LSGI);
LocalStaticGuardVariableNode *LSGVN =
Arena.alloc<LocalStaticGuardVariableNode>();
@@ -379,11 +388,11 @@ FunctionSymbolNode *Demangler::demangleInitFiniStub(StringView &MangledName,
if (MangledName.consumeFront('?'))
IsKnownStaticDataMember = true;
- QualifiedNameNode *QN = demangleFullyQualifiedSymbolName(MangledName);
+ SymbolNode *Symbol = demangleDeclarator(MangledName);
+ if (Error)
+ return nullptr;
- SymbolNode *Symbol = demangleEncodedSymbol(MangledName, QN);
FunctionSymbolNode *FSN = nullptr;
- Symbol->Name = QN;
if (Symbol->kind() == NodeKind::VariableSymbol) {
DSIN->Variable = static_cast<VariableSymbolNode *>(Symbol);
@@ -401,7 +410,8 @@ FunctionSymbolNode *Demangler::demangleInitFiniStub(StringView &MangledName,
}
FSN = demangleFunctionEncoding(MangledName);
- FSN->Name = synthesizeQualifiedName(Arena, DSIN);
+ if (FSN)
+ FSN->Name = synthesizeQualifiedName(Arena, DSIN);
} else {
if (IsKnownStaticDataMember) {
// This was supposed to be a static data member, but we got a function.
@@ -419,10 +429,10 @@ FunctionSymbolNode *Demangler::demangleInitFiniStub(StringView &MangledName,
SymbolNode *Demangler::demangleSpecialIntrinsic(StringView &MangledName) {
SpecialIntrinsicKind SIK = consumeSpecialIntrinsicKind(MangledName);
- if (SIK == SpecialIntrinsicKind::None)
- return nullptr;
switch (SIK) {
+ case SpecialIntrinsicKind::None:
+ return nullptr;
case SpecialIntrinsicKind::StringLiteralSymbol:
return demangleStringLiteral(MangledName);
case SpecialIntrinsicKind::Vftable:
@@ -433,7 +443,9 @@ SymbolNode *Demangler::demangleSpecialIntrinsic(StringView &MangledName) {
case SpecialIntrinsicKind::VcallThunk:
return demangleVcallThunkNode(MangledName);
case SpecialIntrinsicKind::LocalStaticGuard:
- return demangleLocalStaticGuard(MangledName);
+ return demangleLocalStaticGuard(MangledName, /*IsThread=*/false);
+ case SpecialIntrinsicKind::LocalStaticThreadGuard:
+ return demangleLocalStaticGuard(MangledName, /*IsThread=*/true);
case SpecialIntrinsicKind::RttiTypeDescriptor: {
TypeNode *T = demangleType(MangledName, QualifierMangleMode::Result);
if (Error)
@@ -453,11 +465,16 @@ SymbolNode *Demangler::demangleSpecialIntrinsic(StringView &MangledName) {
case SpecialIntrinsicKind::RttiBaseClassDescriptor:
return demangleRttiBaseClassDescriptorNode(Arena, MangledName);
case SpecialIntrinsicKind::DynamicInitializer:
- return demangleInitFiniStub(MangledName, false);
+ return demangleInitFiniStub(MangledName, /*IsDestructor=*/false);
case SpecialIntrinsicKind::DynamicAtexitDestructor:
- return demangleInitFiniStub(MangledName, true);
- default:
+ return demangleInitFiniStub(MangledName, /*IsDestructor=*/true);
+ case SpecialIntrinsicKind::Typeof:
+ case SpecialIntrinsicKind::UdtReturning:
+ // It's unclear which tools produces these manglings, so demangling
+ // support is not (yet?) implemented.
break;
+ case SpecialIntrinsicKind::Unknown:
+ DEMANGLE_UNREACHABLE; // Never returned by consumeSpecialIntrinsicKind.
}
Error = true;
return nullptr;
@@ -467,11 +484,15 @@ IdentifierNode *
Demangler::demangleFunctionIdentifierCode(StringView &MangledName) {
assert(MangledName.startsWith('?'));
MangledName = MangledName.dropFront();
+ if (MangledName.empty()) {
+ Error = true;
+ return nullptr;
+ }
if (MangledName.consumeFront("__"))
return demangleFunctionIdentifierCode(
MangledName, FunctionIdentifierCodeGroup::DoubleUnder);
- else if (MangledName.consumeFront("_"))
+ if (MangledName.consumeFront("_"))
return demangleFunctionIdentifierCode(MangledName,
FunctionIdentifierCodeGroup::Under);
return demangleFunctionIdentifierCode(MangledName,
@@ -497,16 +518,22 @@ LiteralOperatorIdentifierNode *
Demangler::demangleLiteralOperatorIdentifier(StringView &MangledName) {
LiteralOperatorIdentifierNode *N =
Arena.alloc<LiteralOperatorIdentifierNode>();
- N->Name = demangleSimpleString(MangledName, false);
+ N->Name = demangleSimpleString(MangledName, /*Memorize=*/false);
return N;
}
-static IntrinsicFunctionKind
-translateIntrinsicFunctionCode(char CH, FunctionIdentifierCodeGroup Group) {
+IntrinsicFunctionKind
+Demangler::translateIntrinsicFunctionCode(char CH,
+ FunctionIdentifierCodeGroup Group) {
+ using IFK = IntrinsicFunctionKind;
+ if (!(CH >= '0' && CH <= '9') && !(CH >= 'A' && CH <= 'Z')) {
+ Error = true;
+ return IFK::None;
+ }
+
// Not all ? identifiers are intrinsics *functions*. This function only maps
// operator codes for the special functions, all others are handled elsewhere,
// hence the IFK::None entries in the table.
- using IFK = IntrinsicFunctionKind;
static IFK Basic[36] = {
IFK::None, // ?0 # Foo::Foo()
IFK::None, // ?1 # Foo::~Foo()
@@ -606,8 +633,8 @@ translateIntrinsicFunctionCode(char CH, FunctionIdentifierCodeGroup Group) {
// iter
IFK::None, // ?__J local static thread guard
IFK::None, // ?__K operator ""_name
- IFK::CoAwait, // ?__L co_await
- IFK::None, // ?__M <unused>
+ IFK::CoAwait, // ?__L operator co_await
+ IFK::Spaceship, // ?__M operator<=>
IFK::None, // ?__N <unused>
IFK::None, // ?__O <unused>
IFK::None, // ?__P <unused>
@@ -632,12 +659,16 @@ translateIntrinsicFunctionCode(char CH, FunctionIdentifierCodeGroup Group) {
case FunctionIdentifierCodeGroup::DoubleUnder:
return DoubleUnder[Index];
}
- LLVM_BUILTIN_UNREACHABLE;
+ DEMANGLE_UNREACHABLE;
}
IdentifierNode *
Demangler::demangleFunctionIdentifierCode(StringView &MangledName,
FunctionIdentifierCodeGroup Group) {
+ if (MangledName.empty()) {
+ Error = true;
+ return nullptr;
+ }
switch (Group) {
case FunctionIdentifierCodeGroup::Basic:
switch (char CH = MangledName.popFront()) {
@@ -650,7 +681,6 @@ Demangler::demangleFunctionIdentifierCode(StringView &MangledName,
return Arena.alloc<IntrinsicFunctionIdentifierNode>(
translateIntrinsicFunctionCode(CH, Group));
}
- break;
case FunctionIdentifierCodeGroup::Under:
return Arena.alloc<IntrinsicFunctionIdentifierNode>(
translateIntrinsicFunctionCode(MangledName.popFront(), Group));
@@ -663,13 +693,17 @@ Demangler::demangleFunctionIdentifierCode(StringView &MangledName,
translateIntrinsicFunctionCode(CH, Group));
}
}
- // No Mangling Yet: Spaceship, // operator<=>
- return nullptr;
+ DEMANGLE_UNREACHABLE;
}
SymbolNode *Demangler::demangleEncodedSymbol(StringView &MangledName,
QualifiedNameNode *Name) {
+ if (MangledName.empty()) {
+ Error = true;
+ return nullptr;
+ }
+
// Read a variable.
switch (MangledName.front()) {
case '0':
@@ -680,8 +714,6 @@ SymbolNode *Demangler::demangleEncodedSymbol(StringView &MangledName,
StorageClass SC = demangleVariableStorageClass(MangledName);
return demangleVariableEncoding(MangledName, SC);
}
- case '8':
- return nullptr;
}
FunctionSymbolNode *FSN = demangleFunctionEncoding(MangledName);
@@ -689,23 +721,74 @@ SymbolNode *Demangler::demangleEncodedSymbol(StringView &MangledName,
if (UQN->kind() == NodeKind::ConversionOperatorIdentifier) {
ConversionOperatorIdentifierNode *COIN =
static_cast<ConversionOperatorIdentifierNode *>(UQN);
- COIN->TargetType = FSN->Signature->ReturnType;
+ if (FSN)
+ COIN->TargetType = FSN->Signature->ReturnType;
}
return FSN;
}
+SymbolNode *Demangler::demangleDeclarator(StringView &MangledName) {
+ // What follows is a main symbol name. This may include namespaces or class
+ // back references.
+ QualifiedNameNode *QN = demangleFullyQualifiedSymbolName(MangledName);
+ if (Error)
+ return nullptr;
+
+ SymbolNode *Symbol = demangleEncodedSymbol(MangledName, QN);
+ if (Error)
+ return nullptr;
+ Symbol->Name = QN;
+
+ IdentifierNode *UQN = QN->getUnqualifiedIdentifier();
+ if (UQN->kind() == NodeKind::ConversionOperatorIdentifier) {
+ ConversionOperatorIdentifierNode *COIN =
+ static_cast<ConversionOperatorIdentifierNode *>(UQN);
+ if (!COIN->TargetType) {
+ Error = true;
+ return nullptr;
+ }
+ }
+ return Symbol;
+}
+
+SymbolNode *Demangler::demangleMD5Name(StringView &MangledName) {
+ assert(MangledName.startsWith("??@"));
+ // This is an MD5 mangled name. We can't demangle it, just return the
+ // mangled name.
+ // An MD5 mangled name is ??@ followed by 32 characters and a terminating @.
+ size_t MD5Last = MangledName.find('@', strlen("??@"));
+ if (MD5Last == StringView::npos) {
+ Error = true;
+ return nullptr;
+ }
+ const char *Start = MangledName.begin();
+ MangledName = MangledName.dropFront(MD5Last + 1);
+
+ // There are two additional special cases for MD5 names:
+ // 1. For complete object locators where the object name is long enough
+ // for the object to have an MD5 name, the complete object locator is
+ // called ??@...@??_R4@ (with a trailing "??_R4@" instead of the usual
+ // leading "??_R4". This is handled here.
+ // 2. For catchable types, in versions of MSVC before 2015 (<1900) or after
+ // 2017.2 (>= 1914), the catchable type mangling is _CT??@...@??@...@8
+ // instead of_CT??@...@8 with just one MD5 name. Since we don't yet
+ // demangle catchable types anywhere, this isn't handled for MD5 names
+ // either.
+ MangledName.consumeFront("??_R4@");
+
+ StringView MD5(Start, MangledName.begin());
+ SymbolNode *S = Arena.alloc<SymbolNode>(NodeKind::Md5Symbol);
+ S->Name = synthesizeQualifiedName(Arena, MD5);
+
+ return S;
+}
+
// Parser entry point.
SymbolNode *Demangler::parse(StringView &MangledName) {
- // We can't demangle MD5 names, just output them as-is.
- // Also, MSVC-style mangled symbols must start with '?'.
- if (MangledName.startsWith("??@")) {
- // This is an MD5 mangled name. We can't demangle it, just return the
- // mangled name.
- SymbolNode *S = Arena.alloc<SymbolNode>(NodeKind::Md5Symbol);
- S->Name = synthesizeQualifiedName(Arena, MangledName);
- return S;
- }
+ if (MangledName.startsWith("??@"))
+ return demangleMD5Name(MangledName);
+ // MSVC-style mangled symbols must start with '?'.
if (!MangledName.startsWith('?')) {
Error = true;
return nullptr;
@@ -718,21 +801,7 @@ SymbolNode *Demangler::parse(StringView &MangledName) {
if (SymbolNode *SI = demangleSpecialIntrinsic(MangledName))
return SI;
- // What follows is a main symbol name. This may include namespaces or class
- // back references.
- QualifiedNameNode *QN = demangleFullyQualifiedSymbolName(MangledName);
- if (Error)
- return nullptr;
-
- SymbolNode *Symbol = demangleEncodedSymbol(MangledName, QN);
- if (Symbol) {
- Symbol->Name = QN;
- }
-
- if (Error)
- return nullptr;
-
- return Symbol;
+ return demangleDeclarator(MangledName);
}
TagTypeNode *Demangler::parseTagUniqueName(StringView &MangledName) {
@@ -759,6 +828,9 @@ VariableSymbolNode *Demangler::demangleVariableEncoding(StringView &MangledName,
VSN->Type = demangleType(MangledName, QualifierMangleMode::Drop);
VSN->SC = SC;
+ if (Error)
+ return nullptr;
+
// <variable-type> ::= <type> <cvr-qualifiers>
// ::= <type> <pointee-cvr-qualifiers> # pointers, references
switch (VSN->Type->kind()) {
@@ -797,7 +869,7 @@ VariableSymbolNode *Demangler::demangleVariableEncoding(StringView &MangledName,
// <number> ::= [?] <non-negative integer>
//
// <non-negative integer> ::= <decimal digit> # when 1 <= Number <= 10
-// ::= <hex digit>+ @ # when Numbrer == 0 or >= 10
+// ::= <hex digit>+ @ # when Number == 0 or >= 10
//
// <hex-digit> ::= [A-P] # A = 0, B = 1, ...
std::pair<uint64_t, bool> Demangler::demangleNumber(StringView &MangledName) {
@@ -906,8 +978,18 @@ Demangler::demangleTemplateInstantiationName(StringView &MangledName,
if (Error)
return nullptr;
- if (NBB & NBB_Template)
+ if (NBB & NBB_Template) {
+ // NBB_Template is only set for types and non-leaf names ("a::" in "a::b").
+ // Structors and conversion operators only makes sense in a leaf name, so
+ // reject them in NBB_Template contexts.
+ if (Identifier->kind() == NodeKind::ConversionOperatorIdentifier ||
+ Identifier->kind() == NodeKind::StructorIdentifier) {
+ Error = true;
+ return nullptr;
+ }
+
memorizeIdentifier(Identifier);
+ }
return Identifier;
}
@@ -931,6 +1013,7 @@ static uint8_t rebasedHexDigitToNumber(char C) {
}
uint8_t Demangler::demangleCharLiteral(StringView &MangledName) {
+ assert(!MangledName.empty());
if (!MangledName.startsWith('?'))
return MangledName.popFront();
@@ -988,7 +1071,7 @@ wchar_t Demangler::demangleWcharLiteral(StringView &MangledName) {
uint8_t C1, C2;
C1 = demangleCharLiteral(MangledName);
- if (Error)
+ if (Error || MangledName.empty())
goto WCharLiteralError;
C2 = demangleCharLiteral(MangledName);
if (Error)
@@ -1007,10 +1090,8 @@ static void writeHexDigit(char *Buffer, uint8_t Digit) {
}
static void outputHex(OutputStream &OS, unsigned C) {
- if (C == 0) {
- OS << "\\x00";
- return;
- }
+ assert (C != 0);
+
// It's easier to do the math if we can work from right to left, but we need
// to print the numbers from left to right. So render this into a temporary
// buffer first, then output the temporary buffer. Each byte is of the form
@@ -1019,23 +1100,26 @@ static void outputHex(OutputStream &OS, unsigned C) {
char TempBuffer[17];
::memset(TempBuffer, 0, sizeof(TempBuffer));
- constexpr int MaxPos = 15;
+ constexpr int MaxPos = sizeof(TempBuffer) - 1;
- int Pos = MaxPos - 1;
+ int Pos = MaxPos - 1; // TempBuffer[MaxPos] is the terminating \0.
while (C != 0) {
for (int I = 0; I < 2; ++I) {
writeHexDigit(&TempBuffer[Pos--], C % 16);
C /= 16;
}
- TempBuffer[Pos--] = 'x';
- TempBuffer[Pos--] = '\\';
- assert(Pos >= 0);
}
+ TempBuffer[Pos--] = 'x';
+ assert(Pos >= 0);
+ TempBuffer[Pos--] = '\\';
OS << StringView(&TempBuffer[Pos + 1]);
}
static void outputEscapedChar(OutputStream &OS, unsigned C) {
switch (C) {
+ case '\0': // nul
+ OS << "\\0";
+ return;
case '\'': // single quote
OS << "\\\'";
return;
@@ -1100,8 +1184,11 @@ static unsigned countEmbeddedNulls(const uint8_t *StringBytes,
return Result;
}
+// A mangled (non-wide) string literal stores the total length of the string it
+// refers to (passed in NumBytes), and it contains up to 32 bytes of actual text
+// (passed in StringBytes, NumChars).
static unsigned guessCharByteSize(const uint8_t *StringBytes, unsigned NumChars,
- unsigned NumBytes) {
+ uint64_t NumBytes) {
assert(NumBytes > 0);
// If the number of bytes is odd, this is guaranteed to be a char string.
@@ -1113,7 +1200,7 @@ static unsigned guessCharByteSize(const uint8_t *StringBytes, unsigned NumChars,
// 2-byte, or 4-byte null terminator.
if (NumBytes < 32) {
unsigned TrailingNulls = countTrailingNullBytes(StringBytes, NumChars);
- if (TrailingNulls >= 4)
+ if (TrailingNulls >= 4 && NumBytes % 4 == 0)
return 4;
if (TrailingNulls >= 2)
return 2;
@@ -1127,7 +1214,7 @@ static unsigned guessCharByteSize(const uint8_t *StringBytes, unsigned NumChars,
// perfect and is biased towards languages that have ascii alphabets, but this
// was always going to be best effort since the encoding is lossy.
unsigned Nulls = countEmbeddedNulls(StringBytes, NumChars);
- if (Nulls >= 2 * NumChars / 3)
+ if (Nulls >= 2 * NumChars / 3 && NumBytes % 4 == 0)
return 4;
if (Nulls >= NumChars / 3)
return 2;
@@ -1178,6 +1265,11 @@ Demangler::demangleStringLiteral(StringView &MangledName) {
EncodedStringLiteralNode *Result = Arena.alloc<EncodedStringLiteralNode>();
+ // Must happen before the first `goto StringLiteralError`.
+ if (!initializeOutputStream(nullptr, nullptr, OS, 1024))
+ // FIXME: Propagate out-of-memory as an error?
+ std::terminate();
+
// Prefix indicating the beginning of a string literal
if (!MangledName.consumeFront("@_"))
goto StringLiteralError;
@@ -1188,7 +1280,7 @@ Demangler::demangleStringLiteral(StringView &MangledName) {
switch (MangledName.popFront()) {
case '1':
IsWcharT = true;
- LLVM_FALLTHROUGH;
+ DEMANGLE_FALLTHROUGH;
case '0':
break;
default:
@@ -1197,7 +1289,7 @@ Demangler::demangleStringLiteral(StringView &MangledName) {
// Encoded Length
std::tie(StringByteSize, IsNegative) = demangleNumber(MangledName);
- if (Error || IsNegative)
+ if (Error || IsNegative || StringByteSize < (IsWcharT ? 2 : 1))
goto StringLiteralError;
// CRC 32 (always 8 characters plus a terminator)
@@ -1209,16 +1301,14 @@ Demangler::demangleStringLiteral(StringView &MangledName) {
if (MangledName.empty())
goto StringLiteralError;
- if (!initializeOutputStream(nullptr, nullptr, OS, 1024))
- // FIXME: Propagate out-of-memory as an error?
- std::terminate();
if (IsWcharT) {
Result->Char = CharKind::Wchar;
if (StringByteSize > 64)
Result->IsTruncated = true;
while (!MangledName.consumeFront('@')) {
- assert(StringByteSize >= 2);
+ if (MangledName.size() < 2)
+ goto StringLiteralError;
wchar_t W = demangleWcharLiteral(MangledName);
if (StringByteSize != 2 || Result->IsTruncated)
outputEscapedChar(OS, W);
@@ -1234,7 +1324,8 @@ Demangler::demangleStringLiteral(StringView &MangledName) {
unsigned BytesDecoded = 0;
while (!MangledName.consumeFront('@')) {
- assert(StringByteSize >= 1);
+ if (MangledName.size() < 1 || BytesDecoded >= MaxStringByteLength)
+ goto StringLiteralError;
StringBytes[BytesDecoded++] = demangleCharLiteral(MangledName);
}
@@ -1255,7 +1346,7 @@ Demangler::demangleStringLiteral(StringView &MangledName) {
Result->Char = CharKind::Char32;
break;
default:
- LLVM_BUILTIN_UNREACHABLE;
+ DEMANGLE_UNREACHABLE;
}
const unsigned NumChars = BytesDecoded / CharBytes;
for (unsigned CharIndex = 0; CharIndex < NumChars; ++CharIndex) {
@@ -1274,15 +1365,20 @@ Demangler::demangleStringLiteral(StringView &MangledName) {
StringLiteralError:
Error = true;
+ std::free(OS.getBuffer());
return nullptr;
}
+// Returns MangledName's prefix before the first '@', or an error if
+// MangledName contains no '@' or the prefix has length 0.
StringView Demangler::demangleSimpleString(StringView &MangledName,
bool Memorize) {
StringView S;
for (size_t i = 0; i < MangledName.size(); ++i) {
if (MangledName[i] != '@')
continue;
+ if (i == 0)
+ break;
S = MangledName.substr(0, i);
MangledName = MangledName.dropFront(i + 1);
@@ -1319,8 +1415,10 @@ Demangler::demangleLocallyScopedNamePiece(StringView &MangledName) {
NamedIdentifierNode *Identifier = Arena.alloc<NamedIdentifierNode>();
MangledName.consumeFront('?');
- auto Number = demangleNumber(MangledName);
- assert(!Number.second);
+ uint64_t Number = 0;
+ bool IsNegative = false;
+ std::tie(Number, IsNegative) = demangleNumber(MangledName);
+ assert(!IsNegative);
// One ? to terminate the number
MangledName.consumeFront('?');
@@ -1338,7 +1436,7 @@ Demangler::demangleLocallyScopedNamePiece(StringView &MangledName) {
OS << '`';
Scope->output(OS, OF_Default);
OS << '\'';
- OS << "::`" << Number.first << "'";
+ OS << "::`" << Number << "'";
OS << '\0';
char *Result = OS.getBuffer();
Identifier->Name = copyString(Result);
@@ -1349,7 +1447,8 @@ Demangler::demangleLocallyScopedNamePiece(StringView &MangledName) {
// Parses a type name in the form of A@B@C@@ which represents C::B::A.
QualifiedNameNode *
Demangler::demangleFullyQualifiedTypeName(StringView &MangledName) {
- IdentifierNode *Identifier = demangleUnqualifiedTypeName(MangledName, true);
+ IdentifierNode *Identifier =
+ demangleUnqualifiedTypeName(MangledName, /*Memorize=*/true);
if (Error)
return nullptr;
assert(Identifier);
@@ -1381,9 +1480,12 @@ Demangler::demangleFullyQualifiedSymbolName(StringView &MangledName) {
return nullptr;
if (Identifier->kind() == NodeKind::StructorIdentifier) {
+ if (QN->Components->Count < 2) {
+ Error = true;
+ return nullptr;
+ }
StructorIdentifierNode *SIN =
static_cast<StructorIdentifierNode *>(Identifier);
- assert(QN->Components->Count >= 2);
Node *ClassNode = QN->Components->Nodes[QN->Components->Count - 2];
SIN->Class = static_cast<IdentifierNode *>(ClassNode);
}
@@ -1415,7 +1517,7 @@ Demangler::demangleUnqualifiedSymbolName(StringView &MangledName,
return demangleTemplateInstantiationName(MangledName, NBB);
if (MangledName.startsWith('?'))
return demangleFunctionIdentifierCode(MangledName);
- return demangleSimpleName(MangledName, (NBB & NBB_Simple) != 0);
+ return demangleSimpleName(MangledName, /*Memorize=*/(NBB & NBB_Simple) != 0);
}
IdentifierNode *Demangler::demangleNameScopePiece(StringView &MangledName) {
@@ -1431,7 +1533,7 @@ IdentifierNode *Demangler::demangleNameScopePiece(StringView &MangledName) {
if (startsWithLocalScopePattern(MangledName))
return demangleLocallyScopedNamePiece(MangledName);
- return demangleSimpleName(MangledName, true);
+ return demangleSimpleName(MangledName, /*Memorize=*/true);
}
static NodeArrayNode *nodeListToNodeArray(ArenaAllocator &Arena, NodeList *Head,
@@ -1489,11 +1591,11 @@ FuncClass Demangler::demangleFunctionClass(StringView &MangledName) {
case 'C':
return FuncClass(FC_Private | FC_Static);
case 'D':
- return FuncClass(FC_Private | FC_Static);
+ return FuncClass(FC_Private | FC_Static | FC_Far);
case 'E':
return FuncClass(FC_Private | FC_Virtual);
case 'F':
- return FuncClass(FC_Private | FC_Virtual);
+ return FuncClass(FC_Private | FC_Virtual | FC_Far);
case 'G':
return FuncClass(FC_Private | FC_StaticThisAdjust);
case 'H':
@@ -1538,7 +1640,8 @@ FuncClass Demangler::demangleFunctionClass(StringView &MangledName) {
FuncClass VFlag = FC_VirtualThisAdjust;
if (MangledName.consumeFront('R'))
VFlag = FuncClass(VFlag | FC_VirtualThisAdjustEx);
-
+ if (MangledName.empty())
+ break;
switch (MangledName.popFront()) {
case '0':
return FuncClass(FC_Private | FC_Virtual | VFlag);
@@ -1561,6 +1664,11 @@ FuncClass Demangler::demangleFunctionClass(StringView &MangledName) {
}
CallingConv Demangler::demangleCallingConvention(StringView &MangledName) {
+ if (MangledName.empty()) {
+ Error = true;
+ return CallingConv::None;
+ }
+
switch (MangledName.popFront()) {
case 'A':
case 'B':
@@ -1591,7 +1699,7 @@ CallingConv Demangler::demangleCallingConvention(StringView &MangledName) {
}
StorageClass Demangler::demangleVariableStorageClass(StringView &MangledName) {
- assert(std::isdigit(MangledName.front()));
+ assert(MangledName.front() >= '0' && MangledName.front() <= '4');
switch (MangledName.popFront()) {
case '0':
@@ -1605,12 +1713,15 @@ StorageClass Demangler::demangleVariableStorageClass(StringView &MangledName) {
case '4':
return StorageClass::FunctionLocalStatic;
}
- Error = true;
- return StorageClass::None;
+ DEMANGLE_UNREACHABLE;
}
std::pair<Qualifiers, bool>
Demangler::demangleQualifiers(StringView &MangledName) {
+ if (MangledName.empty()) {
+ Error = true;
+ return std::make_pair(Q_None, false);
+ }
switch (MangledName.popFront()) {
// Member qualifiers
@@ -1649,6 +1760,11 @@ TypeNode *Demangler::demangleType(StringView &MangledName,
std::tie(Quals, IsMember) = demangleQualifiers(MangledName);
}
+ if (MangledName.empty()) {
+ Error = true;
+ return nullptr;
+ }
+
TypeNode *Ty = nullptr;
if (isTagType(MangledName))
Ty = demangleClassType(MangledName);
@@ -1710,7 +1826,7 @@ FunctionSignatureNode *Demangler::demangleFunctionType(StringView &MangledName,
if (!IsStructor)
FTy->ReturnType = demangleType(MangledName, QualifierMangleMode::Result);
- FTy->Params = demangleFunctionParameterList(MangledName);
+ FTy->Params = demangleFunctionParameterList(MangledName, FTy->IsVariadic);
FTy->IsNoexcept = demangleThrowSpecification(MangledName);
@@ -1723,6 +1839,11 @@ Demangler::demangleFunctionEncoding(StringView &MangledName) {
if (MangledName.consumeFront("$$J0"))
ExtraFlags = FC_ExternC;
+ if (MangledName.empty()) {
+ Error = true;
+ return nullptr;
+ }
+
FuncClass FC = demangleFunctionClass(MangledName);
FC = FuncClass(ExtraFlags | FC);
@@ -1750,6 +1871,10 @@ Demangler::demangleFunctionEncoding(StringView &MangledName) {
bool HasThisQuals = !(FC & (FC_Global | FC_Static));
FSN = demangleFunctionType(MangledName, HasThisQuals);
}
+
+ if (Error)
+ return nullptr;
+
if (TTN) {
*static_cast<FunctionSignatureNode *>(TTN) = *FSN;
FSN = TTN;
@@ -1766,7 +1891,7 @@ CustomTypeNode *Demangler::demangleCustomType(StringView &MangledName) {
MangledName.popFront();
CustomTypeNode *CTN = Arena.alloc<CustomTypeNode>();
- CTN->Identifier = demangleUnqualifiedTypeName(MangledName, true);
+ CTN->Identifier = demangleUnqualifiedTypeName(MangledName, /*Memorize=*/true);
if (!MangledName.consumeFront('@'))
Error = true;
if (Error)
@@ -1820,6 +1945,8 @@ PrimitiveTypeNode *Demangler::demanglePrimitiveType(StringView &MangledName) {
return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Uint64);
case 'W':
return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Wchar);
+ case 'Q':
+ return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char8);
case 'S':
return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char16);
case 'U':
@@ -1846,7 +1973,7 @@ TagTypeNode *Demangler::demangleClassType(StringView &MangledName) {
TT = Arena.alloc<TagTypeNode>(TagKind::Class);
break;
case 'W':
- if (MangledName.popFront() != '4') {
+ if (!MangledName.consumeFront('4')) {
Error = true;
return nullptr;
}
@@ -1890,6 +2017,8 @@ PointerTypeNode *Demangler::demangleMemberPointerType(StringView &MangledName) {
Qualifiers ExtQuals = demanglePointerExtQualifiers(MangledName);
Pointer->Quals = Qualifiers(Pointer->Quals | ExtQuals);
+ // isMemberPointer() only returns true if there is at least one character
+ // after the qualifiers.
if (MangledName.consumeFront("8")) {
Pointer->ClassParent = demangleFullyQualifiedTypeName(MangledName);
Pointer->Pointee = demangleFunctionType(MangledName, true);
@@ -1897,11 +2026,12 @@ PointerTypeNode *Demangler::demangleMemberPointerType(StringView &MangledName) {
Qualifiers PointeeQuals = Q_None;
bool IsMember = false;
std::tie(PointeeQuals, IsMember) = demangleQualifiers(MangledName);
- assert(IsMember);
+ assert(IsMember || Error);
Pointer->ClassParent = demangleFullyQualifiedTypeName(MangledName);
Pointer->Pointee = demangleType(MangledName, QualifierMangleMode::Drop);
- Pointer->Pointee->Quals = PointeeQuals;
+ if (Pointer->Pointee)
+ Pointer->Pointee->Quals = PointeeQuals;
}
return Pointer;
@@ -1938,7 +2068,7 @@ ArrayTypeNode *Demangler::demangleArrayType(StringView &MangledName) {
for (uint64_t I = 0; I < Rank; ++I) {
uint64_t D = 0;
std::tie(D, IsNegative) = demangleNumber(MangledName);
- if (IsNegative) {
+ if (Error || IsNegative) {
Error = true;
return nullptr;
}
@@ -1963,12 +2093,12 @@ ArrayTypeNode *Demangler::demangleArrayType(StringView &MangledName) {
return ATy;
}
-// Reads a function or a template parameters.
-NodeArrayNode *
-Demangler::demangleFunctionParameterList(StringView &MangledName) {
+// Reads a function's parameters.
+NodeArrayNode *Demangler::demangleFunctionParameterList(StringView &MangledName,
+ bool &IsVariadic) {
// Empty parameter list.
if (MangledName.consumeFront('X'))
- return {};
+ return nullptr;
NodeList *Head = Arena.alloc<NodeList>();
NodeList **Current = &Head;
@@ -1981,7 +2111,7 @@ Demangler::demangleFunctionParameterList(StringView &MangledName) {
size_t N = MangledName[0] - '0';
if (N >= Backrefs.FunctionParamCount) {
Error = true;
- return {};
+ return nullptr;
}
MangledName = MangledName.dropFront();
@@ -2012,7 +2142,7 @@ Demangler::demangleFunctionParameterList(StringView &MangledName) {
}
if (Error)
- return {};
+ return nullptr;
NodeArrayNode *NA = nodeListToNodeArray(Arena, Head, Count);
// A non-empty parameter list is terminated by either 'Z' (variadic) parameter
@@ -2022,13 +2152,11 @@ Demangler::demangleFunctionParameterList(StringView &MangledName) {
return NA;
if (MangledName.consumeFront('Z')) {
- // This is a variadic parameter list. We probably need a variadic node to
- // append to the end.
+ IsVariadic = true;
return NA;
}
- Error = true;
- return {};
+ DEMANGLE_UNREACHABLE;
}
NodeArrayNode *
@@ -2037,7 +2165,7 @@ Demangler::demangleTemplateParameterList(StringView &MangledName) {
NodeList **Current = &Head;
size_t Count = 0;
- while (!Error && !MangledName.startsWith('@')) {
+ while (!MangledName.startsWith('@')) {
if (MangledName.consumeFront("$S") || MangledName.consumeFront("$$V") ||
MangledName.consumeFront("$$$V") || MangledName.consumeFront("$$Z")) {
// parameter pack separator
@@ -2070,12 +2198,16 @@ Demangler::demangleTemplateParameterList(StringView &MangledName) {
MangledName = MangledName.dropFront();
// 1 - single inheritance <name>
// H - multiple inheritance <name> <number>
- // I - virtual inheritance <name> <number> <number> <number>
+ // I - virtual inheritance <name> <number> <number>
// J - unspecified inheritance <name> <number> <number> <number>
char InheritanceSpecifier = MangledName.popFront();
SymbolNode *S = nullptr;
if (MangledName.startsWith('?')) {
S = parse(MangledName);
+ if (Error || !S->Name) {
+ Error = true;
+ return nullptr;
+ }
memorizeIdentifier(S->Name->getUnqualifiedIdentifier());
}
@@ -2083,20 +2215,19 @@ Demangler::demangleTemplateParameterList(StringView &MangledName) {
case 'J':
TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] =
demangleSigned(MangledName);
- LLVM_FALLTHROUGH;
+ DEMANGLE_FALLTHROUGH;
case 'I':
TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] =
demangleSigned(MangledName);
- LLVM_FALLTHROUGH;
+ DEMANGLE_FALLTHROUGH;
case 'H':
TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] =
demangleSigned(MangledName);
- LLVM_FALLTHROUGH;
+ DEMANGLE_FALLTHROUGH;
case '1':
break;
default:
- Error = true;
- break;
+ DEMANGLE_UNREACHABLE;
}
TPRN->Affinity = PointerAffinity::Pointer;
TPRN->Symbol = S;
@@ -2117,18 +2248,15 @@ Demangler::demangleTemplateParameterList(StringView &MangledName) {
case 'G':
TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] =
demangleSigned(MangledName);
- LLVM_FALLTHROUGH;
+ DEMANGLE_FALLTHROUGH;
case 'F':
TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] =
demangleSigned(MangledName);
TPRN->ThunkOffsets[TPRN->ThunkOffsetCount++] =
demangleSigned(MangledName);
- LLVM_FALLTHROUGH;
- case '0':
break;
default:
- Error = true;
- break;
+ DEMANGLE_UNREACHABLE;
}
TPRN->IsMemberPointer = true;
@@ -2148,15 +2276,14 @@ Demangler::demangleTemplateParameterList(StringView &MangledName) {
Current = &TP.Next;
}
- if (Error)
- return nullptr;
+ // The loop above returns nullptr on Error.
+ assert(!Error);
// Template parameter lists cannot be variadic, so it can only be terminated
- // by @.
- if (MangledName.consumeFront('@'))
- return nodeListToNodeArray(Arena, Head, Count);
- Error = true;
- return nullptr;
+ // by @ (as opposed to 'Z' in the function parameter case).
+ assert(MangledName.startsWith('@')); // The above loop exits only on '@'.
+ MangledName.consumeFront('@');
+ return nodeListToNodeArray(Arena, Head, Count);
}
void Demangler::dumpBackReferences() {
diff --git a/lib/Demangle/MicrosoftDemangleNodes.cpp b/lib/Demangle/MicrosoftDemangleNodes.cpp
index 622f8e75e351..63ca475ec1fe 100644
--- a/lib/Demangle/MicrosoftDemangleNodes.cpp
+++ b/lib/Demangle/MicrosoftDemangleNodes.cpp
@@ -1,9 +1,8 @@
//===- MicrosoftDemangle.cpp ----------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is dual licensed under the MIT and the University of Illinois Open
-// Source Licenses. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -12,7 +11,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Demangle/MicrosoftDemangleNodes.h"
-#include "llvm/Demangle/Compiler.h"
+#include "llvm/Demangle/DemangleConfig.h"
#include "llvm/Demangle/Utility.h"
#include <cctype>
#include <string>
@@ -35,21 +34,20 @@ static void outputSpaceIfNecessary(OutputStream &OS) {
OS << " ";
}
-static bool outputSingleQualifier(OutputStream &OS, Qualifiers Q) {
+static void outputSingleQualifier(OutputStream &OS, Qualifiers Q) {
switch (Q) {
case Q_Const:
OS << "const";
- return true;
+ break;
case Q_Volatile:
OS << "volatile";
- return true;
+ break;
case Q_Restrict:
OS << "__restrict";
- return true;
+ break;
default:
break;
}
- return false;
}
static bool outputQualifierIfPresent(OutputStream &OS, Qualifiers Q,
@@ -131,6 +129,7 @@ void PrimitiveTypeNode::outputPre(OutputStream &OS, OutputFlags Flags) const {
OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Char, "char");
OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Schar, "signed char");
OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Uchar, "unsigned char");
+ OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Char8, "char8_t");
OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Char16, "char16_t");
OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Char32, "char32_t");
OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Short, "short");
@@ -338,8 +337,9 @@ void IntrinsicFunctionIdentifierNode::output(OutputStream &OS,
"`vector vbase copy constructor iterator'");
OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, ManVectorVbaseCopyCtorIter,
"`managed vector vbase copy constructor iterator'");
- OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, CoAwait, "co_await");
- OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, Spaceship, "operator <=>");
+ OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, CoAwait,
+ "operator co_await");
+ OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, Spaceship, "operator<=>");
case IntrinsicFunctionKind::MaxIntrinsic:
case IntrinsicFunctionKind::None:
break;
@@ -349,7 +349,10 @@ void IntrinsicFunctionIdentifierNode::output(OutputStream &OS,
void LocalStaticGuardIdentifierNode::output(OutputStream &OS,
OutputFlags Flags) const {
- OS << "`local static guard'";
+ if (IsThread)
+ OS << "`local static thread guard'";
+ else
+ OS << "`local static guard'";
if (ScopeIndex > 0)
OS << "{" << ScopeIndex << "}";
}
@@ -411,6 +414,12 @@ void FunctionSignatureNode::outputPost(OutputStream &OS,
Params->output(OS, Flags);
else
OS << "void";
+
+ if (IsVariadic) {
+ if (OS.back() != '(')
+ OS << ", ";
+ OS << "...";
+ }
OS << ")";
}
diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp
index ae96c7f5955f..1c6c0406d048 100644
--- a/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -1,9 +1,8 @@
//===-- ExecutionEngine.cpp - Common Implementation shared by EEs ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -1020,32 +1019,6 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
return Result;
}
-/// StoreIntToMemory - Fills the StoreBytes bytes of memory starting from Dst
-/// with the integer held in IntVal.
-static void StoreIntToMemory(const APInt &IntVal, uint8_t *Dst,
- unsigned StoreBytes) {
- assert((IntVal.getBitWidth()+7)/8 >= StoreBytes && "Integer too small!");
- const uint8_t *Src = (const uint8_t *)IntVal.getRawData();
-
- if (sys::IsLittleEndianHost) {
- // Little-endian host - the source is ordered from LSB to MSB. Order the
- // destination from LSB to MSB: Do a straight copy.
- memcpy(Dst, Src, StoreBytes);
- } else {
- // Big-endian host - the source is an array of 64 bit words ordered from
- // LSW to MSW. Each word is ordered from MSB to LSB. Order the destination
- // from MSB to LSB: Reverse the word order, but not the bytes in a word.
- while (StoreBytes > sizeof(uint64_t)) {
- StoreBytes -= sizeof(uint64_t);
- // May not be aligned so use memcpy.
- memcpy(Dst + StoreBytes, Src, sizeof(uint64_t));
- Src += sizeof(uint64_t);
- }
-
- memcpy(Dst, Src + sizeof(uint64_t) - StoreBytes, StoreBytes);
- }
-}
-
void ExecutionEngine::StoreValueToMemory(const GenericValue &Val,
GenericValue *Ptr, Type *Ty) {
const unsigned StoreBytes = getDataLayout().getTypeStoreSize(Ty);
@@ -1093,33 +1066,6 @@ void ExecutionEngine::StoreValueToMemory(const GenericValue &Val,
std::reverse((uint8_t*)Ptr, StoreBytes + (uint8_t*)Ptr);
}
-/// LoadIntFromMemory - Loads the integer stored in the LoadBytes bytes starting
-/// from Src into IntVal, which is assumed to be wide enough and to hold zero.
-static void LoadIntFromMemory(APInt &IntVal, uint8_t *Src, unsigned LoadBytes) {
- assert((IntVal.getBitWidth()+7)/8 >= LoadBytes && "Integer too small!");
- uint8_t *Dst = reinterpret_cast<uint8_t *>(
- const_cast<uint64_t *>(IntVal.getRawData()));
-
- if (sys::IsLittleEndianHost)
- // Little-endian host - the destination must be ordered from LSB to MSB.
- // The source is ordered from LSB to MSB: Do a straight copy.
- memcpy(Dst, Src, LoadBytes);
- else {
- // Big-endian - the destination is an array of 64 bit words ordered from
- // LSW to MSW. Each word must be ordered from MSB to LSB. The source is
- // ordered from MSB to LSB: Reverse the word order, but not the bytes in
- // a word.
- while (LoadBytes > sizeof(uint64_t)) {
- LoadBytes -= sizeof(uint64_t);
- // May not be aligned so use memcpy.
- memcpy(Dst, Src + LoadBytes, sizeof(uint64_t));
- Dst += sizeof(uint64_t);
- }
-
- memcpy(Dst + sizeof(uint64_t) - LoadBytes, Src, LoadBytes);
- }
-}
-
/// FIXME: document
///
void ExecutionEngine::LoadValueFromMemory(GenericValue &Result,
diff --git a/lib/ExecutionEngine/ExecutionEngineBindings.cpp b/lib/ExecutionEngine/ExecutionEngineBindings.cpp
index 3be4bec566a0..c741fe2b3778 100644
--- a/lib/ExecutionEngine/ExecutionEngineBindings.cpp
+++ b/lib/ExecutionEngine/ExecutionEngineBindings.cpp
@@ -1,9 +1,8 @@
//===-- ExecutionEngineBindings.cpp - C bindings for EEs ------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/ExecutionEngine/GDBRegistrationListener.cpp b/lib/ExecutionEngine/GDBRegistrationListener.cpp
index 8204f5a90268..08d20156a590 100644
--- a/lib/ExecutionEngine/GDBRegistrationListener.cpp
+++ b/lib/ExecutionEngine/GDBRegistrationListener.cpp
@@ -1,9 +1,8 @@
//===----- GDBRegistrationListener.cpp - Registers objects with GDB -------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp
index e9051c198506..1ebc820a8b49 100644
--- a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp
+++ b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp
@@ -1,9 +1,8 @@
//===-- IntelJITEventListener.cpp - Tell Intel profiler about JITed code --===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -142,13 +141,25 @@ void IntelJITEventListener::notifyObjectLoaded(
uint64_t Addr = *AddrOrErr;
uint64_t Size = P.second;
+ auto SecOrErr = Sym.getSection();
+ if (!SecOrErr) {
+ // TODO: Actually report errors helpfully.
+ consumeError(SecOrErr.takeError());
+ continue;
+ }
+ object::section_iterator Sec = *SecOrErr;
+ if (Sec == Obj.section_end())
+ continue;
+ uint64_t Index = Sec->getIndex();
+
// Record this address in a local vector
Functions.push_back((void*)Addr);
// Build the function loaded notification message
iJIT_Method_Load FunctionMessage =
FunctionDescToIntelJITFormat(*Wrapper, Name->data(), Addr, Size);
- DILineInfoTable Lines = Context->getLineInfoForAddressRange(Addr, Size);
+ DILineInfoTable Lines =
+ Context->getLineInfoForAddressRange({Addr, Index}, Size);
DILineInfoTable::iterator Begin = Lines.begin();
DILineInfoTable::iterator End = Lines.end();
for (DILineInfoTable::iterator It = Begin; It != End; ++It) {
diff --git a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventsWrapper.h b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventsWrapper.h
index 777d0f179cb5..68699c6a2200 100644
--- a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventsWrapper.h
+++ b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventsWrapper.h
@@ -1,9 +1,8 @@
//===-- IntelJITEventsWrapper.h - Intel JIT Events API Wrapper --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/ExecutionEngine/IntelJITEvents/ittnotify_config.h b/lib/ExecutionEngine/IntelJITEvents/ittnotify_config.h
index 61d8cc75d9f2..16ce672150cc 100644
--- a/lib/ExecutionEngine/IntelJITEvents/ittnotify_config.h
+++ b/lib/ExecutionEngine/IntelJITEvents/ittnotify_config.h
@@ -1,9 +1,8 @@
/*===-- ittnotify_config.h - JIT Profiling API internal config-----*- C -*-===*
*
- * The LLVM Compiler Infrastructure
- *
- * This file is distributed under the University of Illinois Open Source
- * License. See LICENSE.TXT for details.
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===----------------------------------------------------------------------===*
*
diff --git a/lib/ExecutionEngine/IntelJITEvents/ittnotify_types.h b/lib/ExecutionEngine/IntelJITEvents/ittnotify_types.h
index 5df752f66f10..15008fe93e60 100644
--- a/lib/ExecutionEngine/IntelJITEvents/ittnotify_types.h
+++ b/lib/ExecutionEngine/IntelJITEvents/ittnotify_types.h
@@ -1,9 +1,8 @@
/*===-- ittnotify_types.h - JIT Profiling API internal types--------*- C -*-===*
*
- * The LLVM Compiler Infrastructure
- *
- * This file is distributed under the University of Illinois Open Source
- * License. See LICENSE.TXT for details.
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===----------------------------------------------------------------------===*
*
diff --git a/lib/ExecutionEngine/IntelJITEvents/jitprofiling.c b/lib/ExecutionEngine/IntelJITEvents/jitprofiling.c
index bc8fea148749..074e0735628a 100644
--- a/lib/ExecutionEngine/IntelJITEvents/jitprofiling.c
+++ b/lib/ExecutionEngine/IntelJITEvents/jitprofiling.c
@@ -1,9 +1,8 @@
/*===-- jitprofiling.c - JIT (Just-In-Time) Profiling API----------*- C -*-===*
*
- * The LLVM Compiler Infrastructure
- *
- * This file is distributed under the University of Illinois Open Source
- * License. See LICENSE.TXT for details.
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===----------------------------------------------------------------------===*
*
diff --git a/lib/ExecutionEngine/IntelJITEvents/jitprofiling.h b/lib/ExecutionEngine/IntelJITEvents/jitprofiling.h
index efd2b1a33f75..ba627b430ff1 100644
--- a/lib/ExecutionEngine/IntelJITEvents/jitprofiling.h
+++ b/lib/ExecutionEngine/IntelJITEvents/jitprofiling.h
@@ -1,9 +1,8 @@
/*===-- jitprofiling.h - JIT Profiling API-------------------------*- C -*-===*
*
- * The LLVM Compiler Infrastructure
- *
- * This file is distributed under the University of Illinois Open Source
- * License. See LICENSE.TXT for details.
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===----------------------------------------------------------------------===*
*
diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp
index 98dca1102759..51f31d3d5d8f 100644
--- a/lib/ExecutionEngine/Interpreter/Execution.cpp
+++ b/lib/ExecutionEngine/Interpreter/Execution.cpp
@@ -1,9 +1,8 @@
//===-- Execution.cpp - Implement code to simulate the program ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -44,6 +43,60 @@ static void SetValue(Value *V, GenericValue Val, ExecutionContext &SF) {
}
//===----------------------------------------------------------------------===//
+// Unary Instruction Implementations
+//===----------------------------------------------------------------------===//
+
+static void executeFNegInst(GenericValue &Dest, GenericValue Src, Type *Ty) {
+ switch (Ty->getTypeID()) {
+ case Type::FloatTyID:
+ Dest.FloatVal = -Src.FloatVal;
+ break;
+ case Type::DoubleTyID:
+ Dest.DoubleVal = -Src.DoubleVal;
+ break;
+ default:
+ llvm_unreachable("Unhandled type for FNeg instruction");
+ }
+}
+
+void Interpreter::visitUnaryOperator(UnaryOperator &I) {
+ ExecutionContext &SF = ECStack.back();
+ Type *Ty = I.getOperand(0)->getType();
+ GenericValue Src = getOperandValue(I.getOperand(0), SF);
+ GenericValue R; // Result
+
+ // First process vector operation
+ if (Ty->isVectorTy()) {
+ R.AggregateVal.resize(Src.AggregateVal.size());
+
+ switch(I.getOpcode()) {
+ default:
+ llvm_unreachable("Don't know how to handle this unary operator");
+ break;
+ case Instruction::FNeg:
+ if (cast<VectorType>(Ty)->getElementType()->isFloatTy()) {
+ for (unsigned i = 0; i < R.AggregateVal.size(); ++i)
+ R.AggregateVal[i].FloatVal = -Src.AggregateVal[i].FloatVal;
+ } else if (cast<VectorType>(Ty)->getElementType()->isDoubleTy()) {
+ for (unsigned i = 0; i < R.AggregateVal.size(); ++i)
+ R.AggregateVal[i].DoubleVal = -Src.AggregateVal[i].DoubleVal;
+ } else {
+ llvm_unreachable("Unhandled type for FNeg instruction");
+ }
+ break;
+ }
+ } else {
+ switch (I.getOpcode()) {
+ default:
+ llvm_unreachable("Don't know how to handle this unary operator");
+ break;
+ case Instruction::FNeg: executeFNegInst(R, Src, Ty); break;
+ }
+ }
+ SetValue(&I, R, SF);
+}
+
+//===----------------------------------------------------------------------===//
// Binary Instruction Implementations
//===----------------------------------------------------------------------===//
@@ -2113,7 +2166,7 @@ void Interpreter::run() {
// Track the number of dynamic instructions executed.
++NumDynamicInsts;
- LLVM_DEBUG(dbgs() << "About to interpret: " << I);
+ LLVM_DEBUG(dbgs() << "About to interpret: " << I << "\n");
visit(I); // Dispatch to one of the visit* methods...
}
}
diff --git a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
index 334fcacf8078..c3a2ccc582c9 100644
--- a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
+++ b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
@@ -1,9 +1,8 @@
//===-- ExternalFunctions.cpp - Implement External Functions --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/ExecutionEngine/Interpreter/Interpreter.cpp b/lib/ExecutionEngine/Interpreter/Interpreter.cpp
index 9818adfff82e..5727f7adb49c 100644
--- a/lib/ExecutionEngine/Interpreter/Interpreter.cpp
+++ b/lib/ExecutionEngine/Interpreter/Interpreter.cpp
@@ -1,9 +1,8 @@
//===- Interpreter.cpp - Top-Level LLVM Interpreter Implementation --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/ExecutionEngine/Interpreter/Interpreter.h b/lib/ExecutionEngine/Interpreter/Interpreter.h
index 33542e7e43ad..e72d778317d6 100644
--- a/lib/ExecutionEngine/Interpreter/Interpreter.h
+++ b/lib/ExecutionEngine/Interpreter/Interpreter.h
@@ -1,9 +1,8 @@
//===-- Interpreter.h ------------------------------------------*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -125,6 +124,7 @@ public:
void visitSwitchInst(SwitchInst &I);
void visitIndirectBrInst(IndirectBrInst &I);
+ void visitUnaryOperator(UnaryOperator &I);
void visitBinaryOperator(BinaryOperator &I);
void visitICmpInst(ICmpInst &I);
void visitFCmpInst(FCmpInst &I);
diff --git a/lib/ExecutionEngine/JITLink/BasicGOTAndStubsBuilder.h b/lib/ExecutionEngine/JITLink/BasicGOTAndStubsBuilder.h
new file mode 100644
index 000000000000..1271ad962b38
--- /dev/null
+++ b/lib/ExecutionEngine/JITLink/BasicGOTAndStubsBuilder.h
@@ -0,0 +1,82 @@
+//===--- BasicGOTAndStubsBuilder.h - Generic GOT/Stub creation --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// A base for simple GOT and stub creation.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_EXECUTIONENGINE_JITLINK_BASICGOTANDSTUBSBUILDER_H
+#define LLVM_LIB_EXECUTIONENGINE_JITLINK_BASICGOTANDSTUBSBUILDER_H
+
+#include "llvm/ExecutionEngine/JITLink/JITLink.h"
+
+namespace llvm {
+namespace jitlink {
+
+template <typename BuilderImpl> class BasicGOTAndStubsBuilder {
+public:
+ BasicGOTAndStubsBuilder(AtomGraph &G) : G(G) {}
+
+ void run() {
+ // We're going to be adding new atoms, but we don't want to iterate over
+ // the newly added ones, so just copy the existing atoms out.
+ std::vector<DefinedAtom *> DAs(G.defined_atoms().begin(),
+ G.defined_atoms().end());
+
+ for (auto *DA : DAs)
+ for (auto &E : DA->edges())
+ if (impl().isGOTEdge(E))
+ impl().fixGOTEdge(E, getGOTEntryAtom(E.getTarget()));
+ else if (impl().isExternalBranchEdge(E))
+ impl().fixExternalBranchEdge(E, getStubAtom(E.getTarget()));
+ }
+
+protected:
+ Atom &getGOTEntryAtom(Atom &Target) {
+ assert(Target.hasName() && "GOT edge cannot point to anonymous target");
+
+ auto GOTEntryI = GOTEntries.find(Target.getName());
+
+ // Build the entry if it doesn't exist.
+ if (GOTEntryI == GOTEntries.end()) {
+ auto &GOTEntry = impl().createGOTEntry(Target);
+ GOTEntryI =
+ GOTEntries.insert(std::make_pair(Target.getName(), &GOTEntry)).first;
+ }
+
+ assert(GOTEntryI != GOTEntries.end() && "Could not get GOT entry atom");
+ return *GOTEntryI->second;
+ }
+
+ Atom &getStubAtom(Atom &Target) {
+ assert(Target.hasName() &&
+ "External branch edge can not point to an anonymous target");
+ auto StubI = Stubs.find(Target.getName());
+
+ if (StubI == Stubs.end()) {
+ auto &StubAtom = impl().createStub(Target);
+ StubI = Stubs.insert(std::make_pair(Target.getName(), &StubAtom)).first;
+ }
+
+ assert(StubI != Stubs.end() && "Count not get stub atom");
+ return *StubI->second;
+ }
+
+ AtomGraph &G;
+
+private:
+ BuilderImpl &impl() { return static_cast<BuilderImpl &>(*this); }
+
+ DenseMap<StringRef, DefinedAtom *> GOTEntries;
+ DenseMap<StringRef, DefinedAtom *> Stubs;
+};
+
+} // end namespace jitlink
+} // end namespace llvm
+
+#endif // LLVM_LIB_EXECUTIONENGINE_JITLINK_BASICGOTANDSTUBSBUILDER_H
diff --git a/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp b/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp
new file mode 100644
index 000000000000..25f0e9040ffe
--- /dev/null
+++ b/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp
@@ -0,0 +1,544 @@
+//===-------- JITLink_EHFrameSupport.cpp - JITLink eh-frame utils ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "EHFrameSupportImpl.h"
+
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/Support/DynamicLibrary.h"
+
+#define DEBUG_TYPE "jitlink"
+
+namespace llvm {
+namespace jitlink {
+
+EHFrameParser::EHFrameParser(AtomGraph &G, Section &EHFrameSection,
+ StringRef EHFrameContent,
+ JITTargetAddress EHFrameAddress,
+ Edge::Kind FDEToCIERelocKind,
+ Edge::Kind FDEToTargetRelocKind)
+ : G(G), EHFrameSection(EHFrameSection), EHFrameContent(EHFrameContent),
+ EHFrameAddress(EHFrameAddress),
+ EHFrameReader(EHFrameContent, G.getEndianness()),
+ FDEToCIERelocKind(FDEToCIERelocKind),
+ FDEToTargetRelocKind(FDEToTargetRelocKind) {}
+
+Error EHFrameParser::atomize() {
+ while (!EHFrameReader.empty()) {
+ size_t RecordOffset = EHFrameReader.getOffset();
+
+ LLVM_DEBUG({
+ dbgs() << "Processing eh-frame record at "
+ << format("0x%016" PRIx64, EHFrameAddress + RecordOffset)
+ << " (offset " << RecordOffset << ")\n";
+ });
+
+ size_t CIELength = 0;
+ uint32_t CIELengthField;
+ if (auto Err = EHFrameReader.readInteger(CIELengthField))
+ return Err;
+
+ // Process CIE length/extended-length fields to build the atom.
+ //
+ // The value of these fields describe the length of the *rest* of the CIE
+ // (not including data up to the end of the field itself) so we have to
+ // bump CIELength to include the data up to the end of the field: 4 bytes
+ // for Length, or 12 bytes (4 bytes + 8 bytes) for ExtendedLength.
+ if (CIELengthField == 0) // Length 0 means end of __eh_frame section.
+ break;
+
+ // If the regular length field's value is 0xffffffff, use extended length.
+ if (CIELengthField == 0xffffffff) {
+ uint64_t CIEExtendedLengthField;
+ if (auto Err = EHFrameReader.readInteger(CIEExtendedLengthField))
+ return Err;
+ if (CIEExtendedLengthField > EHFrameReader.bytesRemaining())
+ return make_error<JITLinkError>("CIE record extends past the end of "
+ "the __eh_frame section");
+ if (CIEExtendedLengthField + 12 > std::numeric_limits<size_t>::max())
+ return make_error<JITLinkError>("CIE record too large to process");
+ CIELength = CIEExtendedLengthField + 12;
+ } else {
+ if (CIELengthField > EHFrameReader.bytesRemaining())
+ return make_error<JITLinkError>("CIE record extends past the end of "
+ "the __eh_frame section");
+ CIELength = CIELengthField + 4;
+ }
+
+ LLVM_DEBUG(dbgs() << " length: " << CIELength << "\n");
+
+ // Add an atom for this record.
+ CurRecordAtom = &G.addAnonymousAtom(
+ EHFrameSection, EHFrameAddress + RecordOffset, G.getPointerSize());
+ CurRecordAtom->setContent(EHFrameContent.substr(RecordOffset, CIELength));
+
+ // Read the CIE Pointer.
+ size_t CIEPointerAddress = EHFrameAddress + EHFrameReader.getOffset();
+ uint32_t CIEPointer;
+ if (auto Err = EHFrameReader.readInteger(CIEPointer))
+ return Err;
+
+ // Based on the CIE pointer value, parse this as a CIE or FDE record.
+ if (CIEPointer == 0) {
+ if (auto Err = processCIE())
+ return Err;
+ } else {
+ if (auto Err = processFDE(CIEPointerAddress, CIEPointer))
+ return Err;
+ }
+
+ EHFrameReader.setOffset(RecordOffset + CIELength);
+ }
+
+ return Error::success();
+}
+
+Expected<EHFrameParser::AugmentationInfo>
+EHFrameParser::parseAugmentationString() {
+ AugmentationInfo AugInfo;
+ uint8_t NextChar;
+ uint8_t *NextField = &AugInfo.Fields[0];
+
+ if (auto Err = EHFrameReader.readInteger(NextChar))
+ return std::move(Err);
+
+ while (NextChar != 0) {
+ switch (NextChar) {
+ case 'z':
+ AugInfo.AugmentationDataPresent = true;
+ break;
+ case 'e':
+ if (auto Err = EHFrameReader.readInteger(NextChar))
+ return std::move(Err);
+ if (NextChar != 'h')
+ return make_error<JITLinkError>("Unrecognized substring e" +
+ Twine(NextChar) +
+ " in augmentation string");
+ AugInfo.EHDataFieldPresent = true;
+ break;
+ case 'L':
+ case 'P':
+ case 'R':
+ *NextField++ = NextChar;
+ break;
+ default:
+ return make_error<JITLinkError>("Unrecognized character " +
+ Twine(NextChar) +
+ " in augmentation string");
+ }
+
+ if (auto Err = EHFrameReader.readInteger(NextChar))
+ return std::move(Err);
+ }
+
+ return std::move(AugInfo);
+}
+
+Expected<JITTargetAddress> EHFrameParser::readAbsolutePointer() {
+ static_assert(sizeof(JITTargetAddress) == sizeof(uint64_t),
+ "Result must be able to hold a uint64_t");
+ JITTargetAddress Addr;
+ if (G.getPointerSize() == 8) {
+ if (auto Err = EHFrameReader.readInteger(Addr))
+ return std::move(Err);
+ } else if (G.getPointerSize() == 4) {
+ uint32_t Addr32;
+ if (auto Err = EHFrameReader.readInteger(Addr32))
+ return std::move(Err);
+ Addr = Addr32;
+ } else
+ llvm_unreachable("Pointer size is not 32-bit or 64-bit");
+ return Addr;
+}
+
+Error EHFrameParser::processCIE() {
+ // Use the dwarf namespace for convenient access to pointer encoding
+ // constants.
+ using namespace dwarf;
+
+ LLVM_DEBUG(dbgs() << " Record is CIE\n");
+
+ CIEInformation CIEInfo(*CurRecordAtom);
+
+ uint8_t Version = 0;
+ if (auto Err = EHFrameReader.readInteger(Version))
+ return Err;
+
+ if (Version != 0x01)
+ return make_error<JITLinkError>("Bad CIE version " + Twine(Version) +
+ " (should be 0x01) in eh-frame");
+
+ auto AugInfo = parseAugmentationString();
+ if (!AugInfo)
+ return AugInfo.takeError();
+
+ // Skip the EH Data field if present.
+ if (AugInfo->EHDataFieldPresent)
+ if (auto Err = EHFrameReader.skip(G.getPointerSize()))
+ return Err;
+
+ // Read and sanity check the code alignment factor.
+ {
+ uint64_t CodeAlignmentFactor = 0;
+ if (auto Err = EHFrameReader.readULEB128(CodeAlignmentFactor))
+ return Err;
+ if (CodeAlignmentFactor != 1)
+ return make_error<JITLinkError>("Unsupported CIE code alignment factor " +
+ Twine(CodeAlignmentFactor) +
+ " (expected 1)");
+ }
+
+ // Read and sanity check the data alignment factor.
+ {
+ int64_t DataAlignmentFactor = 0;
+ if (auto Err = EHFrameReader.readSLEB128(DataAlignmentFactor))
+ return Err;
+ if (DataAlignmentFactor != -8)
+ return make_error<JITLinkError>("Unsupported CIE data alignment factor " +
+ Twine(DataAlignmentFactor) +
+ " (expected -8)");
+ }
+
+ // Skip the return address register field.
+ if (auto Err = EHFrameReader.skip(1))
+ return Err;
+
+ uint64_t AugmentationDataLength = 0;
+ if (auto Err = EHFrameReader.readULEB128(AugmentationDataLength))
+ return Err;
+
+ uint32_t AugmentationDataStartOffset = EHFrameReader.getOffset();
+
+ uint8_t *NextField = &AugInfo->Fields[0];
+ while (uint8_t Field = *NextField++) {
+ switch (Field) {
+ case 'L': {
+ CIEInfo.FDEsHaveLSDAField = true;
+ uint8_t LSDAPointerEncoding;
+ if (auto Err = EHFrameReader.readInteger(LSDAPointerEncoding))
+ return Err;
+ if (LSDAPointerEncoding != (DW_EH_PE_pcrel | DW_EH_PE_absptr))
+ return make_error<JITLinkError>(
+ "Unsupported LSDA pointer encoding " +
+ formatv("{0:x2}", LSDAPointerEncoding) + " in CIE at " +
+ formatv("{0:x16}", CurRecordAtom->getAddress()));
+ break;
+ }
+ case 'P': {
+ uint8_t PersonalityPointerEncoding = 0;
+ if (auto Err = EHFrameReader.readInteger(PersonalityPointerEncoding))
+ return Err;
+ if (PersonalityPointerEncoding !=
+ (DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4))
+ return make_error<JITLinkError>(
+ "Unspported personality pointer "
+ "encoding " +
+ formatv("{0:x2}", PersonalityPointerEncoding) + " in CIE at " +
+ formatv("{0:x16}", CurRecordAtom->getAddress()));
+ uint32_t PersonalityPointerAddress;
+ if (auto Err = EHFrameReader.readInteger(PersonalityPointerAddress))
+ return Err;
+ break;
+ }
+ case 'R': {
+ uint8_t FDEPointerEncoding;
+ if (auto Err = EHFrameReader.readInteger(FDEPointerEncoding))
+ return Err;
+ if (FDEPointerEncoding != (DW_EH_PE_pcrel | DW_EH_PE_absptr))
+ return make_error<JITLinkError>(
+ "Unsupported FDE address pointer "
+ "encoding " +
+ formatv("{0:x2}", FDEPointerEncoding) + " in CIE at " +
+ formatv("{0:x16}", CurRecordAtom->getAddress()));
+ break;
+ }
+ default:
+ llvm_unreachable("Invalid augmentation string field");
+ }
+ }
+
+ if (EHFrameReader.getOffset() - AugmentationDataStartOffset >
+ AugmentationDataLength)
+ return make_error<JITLinkError>("Read past the end of the augmentation "
+ "data while parsing fields");
+
+ assert(!CIEInfos.count(CurRecordAtom->getAddress()) &&
+ "Multiple CIEs recorded at the same address?");
+ CIEInfos[CurRecordAtom->getAddress()] = std::move(CIEInfo);
+
+ return Error::success();
+}
+
+Error EHFrameParser::processFDE(JITTargetAddress CIEPointerAddress,
+ uint32_t CIEPointer) {
+ LLVM_DEBUG(dbgs() << " Record is FDE\n");
+
+ LLVM_DEBUG({
+ dbgs() << " CIE pointer: "
+ << format("0x%016" PRIx64, CIEPointerAddress - CIEPointer) << "\n";
+ });
+
+ auto CIEInfoItr = CIEInfos.find(CIEPointerAddress - CIEPointer);
+ if (CIEInfoItr == CIEInfos.end())
+ return make_error<JITLinkError>(
+ "FDE at " + formatv("{0:x16}", CurRecordAtom->getAddress()) +
+ " points to non-existant CIE at " +
+ formatv("{0:x16}", CIEPointerAddress - CIEPointer));
+ auto &CIEInfo = CIEInfoItr->second;
+
+ // The CIEPointer looks good. Add a relocation.
+ CurRecordAtom->addEdge(FDEToCIERelocKind,
+ CIEPointerAddress - CurRecordAtom->getAddress(),
+ *CIEInfo.CIEAtom, 0);
+
+ // Read and sanity check the PC-start pointer and size.
+ JITTargetAddress PCBeginAddress = EHFrameAddress + EHFrameReader.getOffset();
+
+ auto PCBeginDelta = readAbsolutePointer();
+ if (!PCBeginDelta)
+ return PCBeginDelta.takeError();
+
+ JITTargetAddress PCBegin = PCBeginAddress + *PCBeginDelta;
+ LLVM_DEBUG({
+ dbgs() << " PC begin: " << format("0x%016" PRIx64, PCBegin) << "\n";
+ });
+
+ auto *TargetAtom = G.getAtomByAddress(PCBegin);
+
+ if (!TargetAtom)
+ return make_error<JITLinkError>("FDE PC-begin " +
+ formatv("{0:x16}", PCBegin) +
+ " does not point at atom");
+
+ if (TargetAtom->getAddress() != PCBegin)
+ return make_error<JITLinkError>(
+ "FDE PC-begin " + formatv("{0:x16}", PCBegin) +
+ " does not point to start of atom at " +
+ formatv("{0:x16}", TargetAtom->getAddress()));
+
+ LLVM_DEBUG(dbgs() << " FDE target: " << *TargetAtom << "\n");
+
+ // The PC-start pointer and size look good. Add relocations.
+ CurRecordAtom->addEdge(FDEToTargetRelocKind,
+ PCBeginAddress - CurRecordAtom->getAddress(),
+ *TargetAtom, 0);
+
+ // Add a keep-alive relocation from the function to the FDE to ensure it is
+ // not dead stripped.
+ TargetAtom->addEdge(Edge::KeepAlive, 0, *CurRecordAtom, 0);
+
+ // Skip over the PC range size field.
+ if (auto Err = EHFrameReader.skip(G.getPointerSize()))
+ return Err;
+
+ if (CIEInfo.FDEsHaveLSDAField) {
+ uint64_t AugmentationDataSize;
+ if (auto Err = EHFrameReader.readULEB128(AugmentationDataSize))
+ return Err;
+ if (AugmentationDataSize != G.getPointerSize())
+ return make_error<JITLinkError>(
+ "Unexpected FDE augmentation data size (expected " +
+ Twine(G.getPointerSize()) + ", got " + Twine(AugmentationDataSize) +
+ ") for FDE at " + formatv("{0:x16}", CurRecordAtom->getAddress()));
+ JITTargetAddress LSDAAddress = EHFrameAddress + EHFrameReader.getOffset();
+ auto LSDADelta = readAbsolutePointer();
+ if (!LSDADelta)
+ return LSDADelta.takeError();
+
+ JITTargetAddress LSDA = LSDAAddress + *LSDADelta;
+
+ auto *LSDAAtom = G.getAtomByAddress(LSDA);
+
+ if (!LSDAAtom)
+ return make_error<JITLinkError>("FDE LSDA " + formatv("{0:x16}", LSDA) +
+ " does not point at atom");
+
+ if (LSDAAtom->getAddress() != LSDA)
+ return make_error<JITLinkError>(
+ "FDE LSDA " + formatv("{0:x16}", LSDA) +
+ " does not point to start of atom at " +
+ formatv("{0:x16}", LSDAAtom->getAddress()));
+
+ LLVM_DEBUG(dbgs() << " FDE LSDA: " << *LSDAAtom << "\n");
+
+ // LSDA looks good. Add relocations.
+ CurRecordAtom->addEdge(FDEToTargetRelocKind,
+ LSDAAddress - CurRecordAtom->getAddress(), *LSDAAtom,
+ 0);
+ }
+
+ return Error::success();
+}
+
+Error addEHFrame(AtomGraph &G, Section &EHFrameSection,
+ StringRef EHFrameContent, JITTargetAddress EHFrameAddress,
+ Edge::Kind FDEToCIERelocKind,
+ Edge::Kind FDEToTargetRelocKind) {
+ return EHFrameParser(G, EHFrameSection, EHFrameContent, EHFrameAddress,
+ FDEToCIERelocKind, FDEToTargetRelocKind)
+ .atomize();
+}
+
+// Determine whether we can register EH tables.
+#if (defined(__GNUC__) && !defined(__ARM_EABI__) && !defined(__ia64__) && \
+ !(defined(_AIX) && defined(__ibmxl__)) && !defined(__SEH__) && \
+ !defined(__USING_SJLJ_EXCEPTIONS__))
+#define HAVE_EHTABLE_SUPPORT 1
+#else
+#define HAVE_EHTABLE_SUPPORT 0
+#endif
+
+#if HAVE_EHTABLE_SUPPORT
+extern "C" void __register_frame(const void *);
+extern "C" void __deregister_frame(const void *);
+
+Error registerFrameWrapper(const void *P) {
+ __register_frame(P);
+ return Error::success();
+}
+
+Error deregisterFrameWrapper(const void *P) {
+ __deregister_frame(P);
+ return Error::success();
+}
+
+#else
+
+// The building compiler does not have __(de)register_frame but
+// it may be found at runtime in a dynamically-loaded library.
+// For example, this happens when building LLVM with Visual C++
+// but using the MingW runtime.
+static Error registerFrameWrapper(const void *P) {
+ static void((*RegisterFrame)(const void *)) = 0;
+
+ if (!RegisterFrame)
+ *(void **)&RegisterFrame =
+ llvm::sys::DynamicLibrary::SearchForAddressOfSymbol("__register_frame");
+
+ if (RegisterFrame) {
+ RegisterFrame(P);
+ return Error::success();
+ }
+
+ return make_error<JITLinkError>("could not register eh-frame: "
+ "__register_frame function not found");
+}
+
+static Error deregisterFrameWrapper(const void *P) {
+ static void((*DeregisterFrame)(const void *)) = 0;
+
+ if (!DeregisterFrame)
+ *(void **)&DeregisterFrame =
+ llvm::sys::DynamicLibrary::SearchForAddressOfSymbol(
+ "__deregister_frame");
+
+ if (DeregisterFrame) {
+ DeregisterFrame(P);
+ return Error::success();
+ }
+
+ return make_error<JITLinkError>("could not deregister eh-frame: "
+ "__deregister_frame function not found");
+}
+#endif
+
+#ifdef __APPLE__
+
+template <typename HandleFDEFn>
+Error walkAppleEHFrameSection(const char *const SectionStart,
+ HandleFDEFn HandleFDE) {
+ const char *CurCFIRecord = SectionStart;
+ uint64_t Size = *reinterpret_cast<const uint32_t *>(CurCFIRecord);
+
+ while (Size != 0) {
+ const char *OffsetField = CurCFIRecord + (Size == 0xffffffff ? 12 : 4);
+ if (Size == 0xffffffff)
+ Size = *reinterpret_cast<const uint64_t *>(CurCFIRecord + 4) + 12;
+ else
+ Size += 4;
+ uint32_t Offset = *reinterpret_cast<const uint32_t *>(OffsetField);
+ if (Offset != 0)
+ if (auto Err = HandleFDE(CurCFIRecord))
+ return Err;
+
+ LLVM_DEBUG({
+ dbgs() << "Registering eh-frame section:\n";
+ dbgs() << "Processing " << (Offset ? "FDE" : "CIE") << " @"
+ << (void *)CurCFIRecord << ": [";
+ for (unsigned I = 0; I < Size; ++I)
+ dbgs() << format(" 0x%02" PRIx8, *(CurCFIRecord + I));
+ dbgs() << " ]\n";
+ });
+ CurCFIRecord += Size;
+
+ Size = *reinterpret_cast<const uint32_t *>(CurCFIRecord);
+ }
+
+ return Error::success();
+}
+
+#endif // __APPLE__
+
+Error registerEHFrameSection(const void *EHFrameSectionAddr) {
+#ifdef __APPLE__
+ // On Darwin __register_frame has to be called for each FDE entry.
+ return walkAppleEHFrameSection(static_cast<const char *>(EHFrameSectionAddr),
+ registerFrameWrapper);
+#else
+ // On Linux __register_frame takes a single argument:
+ // a pointer to the start of the .eh_frame section.
+
+ // How can it find the end? Because crtendS.o is linked
+ // in and it has an .eh_frame section with four zero chars.
+ return registerFrameWrapper(EHFrameSectionAddr);
+#endif
+}
+
+Error deregisterEHFrameSection(const void *EHFrameSectionAddr) {
+#ifdef __APPLE__
+ return walkAppleEHFrameSection(static_cast<const char *>(EHFrameSectionAddr),
+ deregisterFrameWrapper);
+#else
+ return deregisterFrameWrapper(EHFrameSectionAddr);
+#endif
+}
+
+EHFrameRegistrar::~EHFrameRegistrar() {}
+
+InProcessEHFrameRegistrar &InProcessEHFrameRegistrar::getInstance() {
+ static InProcessEHFrameRegistrar Instance;
+ return Instance;
+}
+
+InProcessEHFrameRegistrar::InProcessEHFrameRegistrar() {}
+
+AtomGraphPassFunction
+createEHFrameRecorderPass(const Triple &TT,
+ StoreFrameAddressFunction StoreFrameAddress) {
+ const char *EHFrameSectionName = nullptr;
+ if (TT.getObjectFormat() == Triple::MachO)
+ EHFrameSectionName = "__eh_frame";
+ else
+ EHFrameSectionName = ".eh_frame";
+
+ auto RecordEHFrame = [EHFrameSectionName,
+ StoreFrameAddress](AtomGraph &G) -> Error {
+ // Search for a non-empty eh-frame and record the address of the first atom
+ // in it.
+ JITTargetAddress Addr = 0;
+ if (auto *S = G.findSectionByName(EHFrameSectionName))
+ Addr = S->getRange().getStart();
+ StoreFrameAddress(Addr);
+ return Error::success();
+ };
+
+ return RecordEHFrame;
+}
+
+} // end namespace jitlink
+} // end namespace llvm
diff --git a/lib/ExecutionEngine/JITLink/EHFrameSupportImpl.h b/lib/ExecutionEngine/JITLink/EHFrameSupportImpl.h
new file mode 100644
index 000000000000..d679edef7ea6
--- /dev/null
+++ b/lib/ExecutionEngine/JITLink/EHFrameSupportImpl.h
@@ -0,0 +1,72 @@
+//===------- EHFrameSupportImpl.h - JITLink eh-frame utils ------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// EHFrame registration support for JITLink.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_EXECUTIONENGINE_JITLINK_EHFRAMESUPPORTIMPL_H
+#define LLVM_LIB_EXECUTIONENGINE_JITLINK_EHFRAMESUPPORTIMPL_H
+
+#include "llvm/ExecutionEngine/JITLink/EHFrameSupport.h"
+
+#include "llvm/ExecutionEngine/JITLink/JITLink.h"
+#include "llvm/Support/BinaryStreamReader.h"
+
+namespace llvm {
+namespace jitlink {
+
+/// A generic parser for eh-frame sections.
+///
+/// Adds atoms representing CIE and FDE entries, using the given FDE-to-CIE and
+/// FDEToTarget relocation kinds.
+class EHFrameParser {
+public:
+ EHFrameParser(AtomGraph &G, Section &EHFrameSection, StringRef EHFrameContent,
+ JITTargetAddress EHFrameAddress, Edge::Kind FDEToCIERelocKind,
+ Edge::Kind FDEToTargetRelocKind);
+ Error atomize();
+
+private:
+ struct AugmentationInfo {
+ bool AugmentationDataPresent = false;
+ bool EHDataFieldPresent = false;
+ uint8_t Fields[4] = {0x0, 0x0, 0x0, 0x0};
+ };
+
+ Expected<AugmentationInfo> parseAugmentationString();
+ Expected<JITTargetAddress> readAbsolutePointer();
+ Error processCIE();
+ Error processFDE(JITTargetAddress CIEPointerAddress, uint32_t CIEPointer);
+
+ struct CIEInformation {
+ CIEInformation() = default;
+ CIEInformation(DefinedAtom &CIEAtom) : CIEAtom(&CIEAtom) {}
+ DefinedAtom *CIEAtom = nullptr;
+ bool FDEsHaveLSDAField = false;
+ };
+
+ AtomGraph &G;
+ Section &EHFrameSection;
+ StringRef EHFrameContent;
+ JITTargetAddress EHFrameAddress;
+ BinaryStreamReader EHFrameReader;
+ DefinedAtom *CurRecordAtom = nullptr;
+ DenseMap<JITTargetAddress, CIEInformation> CIEInfos;
+ Edge::Kind FDEToCIERelocKind;
+ Edge::Kind FDEToTargetRelocKind;
+};
+
+Error addEHFrame(AtomGraph &G, Section &EHFrameSection,
+ StringRef EHFrameContent, JITTargetAddress EHFrameAddress,
+ Edge::Kind FDEToCIERelocKind, Edge::Kind FDEToTargetRelocKind);
+
+} // end namespace jitlink
+} // end namespace llvm
+
+#endif // LLVM_LIB_EXECUTIONENGINE_JITLINK_EHFRAMESUPPORTIMPL_H
diff --git a/lib/ExecutionEngine/JITLink/JITLink.cpp b/lib/ExecutionEngine/JITLink/JITLink.cpp
new file mode 100644
index 000000000000..9d0a7459dc09
--- /dev/null
+++ b/lib/ExecutionEngine/JITLink/JITLink.cpp
@@ -0,0 +1,172 @@
+//===------------- JITLink.cpp - Core Run-time JIT linker APIs ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/JITLink/JITLink.h"
+
+#include "llvm/BinaryFormat/Magic.h"
+#include "llvm/ExecutionEngine/JITLink/MachO.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+using namespace llvm::object;
+
+#define DEBUG_TYPE "jitlink"
+
+namespace {
+
+enum JITLinkErrorCode { GenericJITLinkError = 1 };
+
+// FIXME: This class is only here to support the transition to llvm::Error. It
+// will be removed once this transition is complete. Clients should prefer to
+// deal with the Error value directly, rather than converting to error_code.
+class JITLinkerErrorCategory : public std::error_category {
+public:
+ const char *name() const noexcept override { return "runtimedyld"; }
+
+ std::string message(int Condition) const override {
+ switch (static_cast<JITLinkErrorCode>(Condition)) {
+ case GenericJITLinkError:
+ return "Generic JITLink error";
+ }
+ llvm_unreachable("Unrecognized JITLinkErrorCode");
+ }
+};
+
+static ManagedStatic<JITLinkerErrorCategory> JITLinkerErrorCategory;
+
+} // namespace
+
+namespace llvm {
+namespace jitlink {
+
+char JITLinkError::ID = 0;
+
+void JITLinkError::log(raw_ostream &OS) const { OS << ErrMsg << "\n"; }
+
+std::error_code JITLinkError::convertToErrorCode() const {
+ return std::error_code(GenericJITLinkError, *JITLinkerErrorCategory);
+}
+
+const StringRef getGenericEdgeKindName(Edge::Kind K) {
+ switch (K) {
+ case Edge::Invalid:
+ return "INVALID RELOCATION";
+ case Edge::KeepAlive:
+ return "Keep-Alive";
+ case Edge::LayoutNext:
+ return "Layout-Next";
+ default:
+ llvm_unreachable("Unrecognized relocation kind");
+ }
+}
+
+raw_ostream &operator<<(raw_ostream &OS, const Atom &A) {
+ OS << "<";
+ if (A.getName().empty())
+ OS << "anon@" << format("0x%016" PRIx64, A.getAddress());
+ else
+ OS << A.getName();
+ OS << " [";
+ if (A.isDefined()) {
+ auto &DA = static_cast<const DefinedAtom &>(A);
+ OS << " section=" << DA.getSection().getName();
+ if (DA.isLive())
+ OS << " live";
+ if (DA.shouldDiscard())
+ OS << " should-discard";
+ } else
+ OS << " external";
+ OS << " ]>";
+ return OS;
+}
+
+void printEdge(raw_ostream &OS, const Atom &FixupAtom, const Edge &E,
+ StringRef EdgeKindName) {
+ OS << "edge@" << formatv("{0:x16}", FixupAtom.getAddress() + E.getOffset())
+ << ": " << FixupAtom << " + " << E.getOffset() << " -- " << EdgeKindName
+ << " -> " << E.getTarget() << " + " << E.getAddend();
+}
+
+Section::~Section() {
+ for (auto *DA : DefinedAtoms)
+ DA->~DefinedAtom();
+}
+
+void AtomGraph::dump(raw_ostream &OS,
+ std::function<StringRef(Edge::Kind)> EdgeKindToName) {
+ if (!EdgeKindToName)
+ EdgeKindToName = [](Edge::Kind K) { return StringRef(); };
+
+ OS << "Defined atoms:\n";
+ for (auto *DA : defined_atoms()) {
+ OS << " " << format("0x%016" PRIx64, DA->getAddress()) << ": " << *DA
+ << "\n";
+ for (auto &E : DA->edges()) {
+ OS << " ";
+ StringRef EdgeName = (E.getKind() < Edge::FirstRelocation
+ ? getGenericEdgeKindName(E.getKind())
+ : EdgeKindToName(E.getKind()));
+
+ if (!EdgeName.empty())
+ printEdge(OS, *DA, E, EdgeName);
+ else {
+ auto EdgeNumberString = std::to_string(E.getKind());
+ printEdge(OS, *DA, E, EdgeNumberString);
+ }
+ OS << "\n";
+ }
+ }
+
+ OS << "Absolute atoms:\n";
+ for (auto *A : absolute_atoms())
+ OS << " " << format("0x%016" PRIx64, A->getAddress()) << ": " << *A
+ << "\n";
+
+ OS << "External atoms:\n";
+ for (auto *A : external_atoms())
+ OS << " " << format("0x%016" PRIx64, A->getAddress()) << ": " << *A
+ << "\n";
+}
+
+JITLinkContext::~JITLinkContext() {}
+
+bool JITLinkContext::shouldAddDefaultTargetPasses(const Triple &TT) const {
+ return true;
+}
+
+AtomGraphPassFunction JITLinkContext::getMarkLivePass(const Triple &TT) const {
+ return AtomGraphPassFunction();
+}
+
+Error JITLinkContext::modifyPassConfig(const Triple &TT,
+ PassConfiguration &Config) {
+ return Error::success();
+}
+
+Error markAllAtomsLive(AtomGraph &G) {
+ for (auto *DA : G.defined_atoms())
+ DA->setLive(true);
+ return Error::success();
+}
+
+void jitLink(std::unique_ptr<JITLinkContext> Ctx) {
+ auto Magic = identify_magic(Ctx->getObjectBuffer().getBuffer());
+ switch (Magic) {
+ case file_magic::macho_object:
+ return jitLink_MachO(std::move(Ctx));
+ default:
+ Ctx->notifyFailed(make_error<JITLinkError>("Unsupported file format"));
+ };
+}
+
+} // end namespace jitlink
+} // end namespace llvm
diff --git a/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp b/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp
new file mode 100644
index 000000000000..96e074da122b
--- /dev/null
+++ b/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp
@@ -0,0 +1,481 @@
+//===--------- JITLinkGeneric.cpp - Generic JIT linker utilities ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Generic JITLinker utility class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "JITLinkGeneric.h"
+#include "EHFrameSupportImpl.h"
+
+#include "llvm/Support/BinaryStreamReader.h"
+#include "llvm/Support/MemoryBuffer.h"
+
+#define DEBUG_TYPE "jitlink"
+
+namespace llvm {
+namespace jitlink {
+
+JITLinkerBase::~JITLinkerBase() {}
+
+void JITLinkerBase::linkPhase1(std::unique_ptr<JITLinkerBase> Self) {
+
+ // Build the atom graph.
+ if (auto GraphOrErr = buildGraph(Ctx->getObjectBuffer()))
+ G = std::move(*GraphOrErr);
+ else
+ return Ctx->notifyFailed(GraphOrErr.takeError());
+ assert(G && "Graph should have been created by buildGraph above");
+
+ // Prune and optimize the graph.
+ if (auto Err = runPasses(Passes.PrePrunePasses, *G))
+ return Ctx->notifyFailed(std::move(Err));
+
+ LLVM_DEBUG({
+ dbgs() << "Atom graph \"" << G->getName() << "\" pre-pruning:\n";
+ dumpGraph(dbgs());
+ });
+
+ prune(*G);
+
+ LLVM_DEBUG({
+ dbgs() << "Atom graph \"" << G->getName() << "\" post-pruning:\n";
+ dumpGraph(dbgs());
+ });
+
+ // Run post-pruning passes.
+ if (auto Err = runPasses(Passes.PostPrunePasses, *G))
+ return Ctx->notifyFailed(std::move(Err));
+
+ // Sort atoms into segments.
+ layOutAtoms();
+
+ // Allocate memory for segments.
+ if (auto Err = allocateSegments(Layout))
+ return Ctx->notifyFailed(std::move(Err));
+
+ // Notify client that the defined atoms have been assigned addresses.
+ Ctx->notifyResolved(*G);
+
+ auto ExternalSymbols = getExternalSymbolNames();
+
+ // We're about to hand off ownership of ourself to the continuation. Grab a
+ // pointer to the context so that we can call it to initiate the lookup.
+ //
+ // FIXME: Once callee expressions are defined to be sequenced before argument
+ // expressions (c++17) we can simplify all this to:
+ //
+ // Ctx->lookup(std::move(UnresolvedExternals),
+ // [Self=std::move(Self)](Expected<AsyncLookupResult> Result) {
+ // Self->linkPhase2(std::move(Self), std::move(Result));
+ // });
+ //
+ // FIXME: Use move capture once we have c++14.
+ auto *TmpCtx = Ctx.get();
+ auto *UnownedSelf = Self.release();
+ auto Phase2Continuation =
+ [UnownedSelf](Expected<AsyncLookupResult> LookupResult) {
+ std::unique_ptr<JITLinkerBase> Self(UnownedSelf);
+ UnownedSelf->linkPhase2(std::move(Self), std::move(LookupResult));
+ };
+ TmpCtx->lookup(std::move(ExternalSymbols), std::move(Phase2Continuation));
+}
+
+void JITLinkerBase::linkPhase2(std::unique_ptr<JITLinkerBase> Self,
+ Expected<AsyncLookupResult> LR) {
+ // If the lookup failed, bail out.
+ if (!LR)
+ return deallocateAndBailOut(LR.takeError());
+
+ // Assign addresses to external atoms.
+ applyLookupResult(*LR);
+
+ LLVM_DEBUG({
+ dbgs() << "Atom graph \"" << G->getName() << "\" before copy-and-fixup:\n";
+ dumpGraph(dbgs());
+ });
+
+ // Copy atom content to working memory and fix up.
+ if (auto Err = copyAndFixUpAllAtoms(Layout, *Alloc))
+ return deallocateAndBailOut(std::move(Err));
+
+ LLVM_DEBUG({
+ dbgs() << "Atom graph \"" << G->getName() << "\" after copy-and-fixup:\n";
+ dumpGraph(dbgs());
+ });
+
+ if (auto Err = runPasses(Passes.PostFixupPasses, *G))
+ return deallocateAndBailOut(std::move(Err));
+
+ // FIXME: Use move capture once we have c++14.
+ auto *UnownedSelf = Self.release();
+ auto Phase3Continuation = [UnownedSelf](Error Err) {
+ std::unique_ptr<JITLinkerBase> Self(UnownedSelf);
+ UnownedSelf->linkPhase3(std::move(Self), std::move(Err));
+ };
+
+ Alloc->finalizeAsync(std::move(Phase3Continuation));
+}
+
+void JITLinkerBase::linkPhase3(std::unique_ptr<JITLinkerBase> Self, Error Err) {
+ if (Err)
+ return deallocateAndBailOut(std::move(Err));
+ Ctx->notifyFinalized(std::move(Alloc));
+}
+
+Error JITLinkerBase::runPasses(AtomGraphPassList &Passes, AtomGraph &G) {
+ for (auto &P : Passes)
+ if (auto Err = P(G))
+ return Err;
+ return Error::success();
+}
+
+void JITLinkerBase::layOutAtoms() {
+ // Group sections by protections, and whether or not they're zero-fill.
+ for (auto &S : G->sections()) {
+
+ // Skip empty sections.
+ if (S.atoms_empty())
+ continue;
+
+ auto &SL = Layout[S.getProtectionFlags()];
+ if (S.isZeroFill())
+ SL.ZeroFillSections.push_back(SegmentLayout::SectionLayout(S));
+ else
+ SL.ContentSections.push_back(SegmentLayout::SectionLayout(S));
+ }
+
+ // Sort sections within the layout by ordinal.
+ {
+ auto CompareByOrdinal = [](const SegmentLayout::SectionLayout &LHS,
+ const SegmentLayout::SectionLayout &RHS) {
+ return LHS.S->getSectionOrdinal() < RHS.S->getSectionOrdinal();
+ };
+ for (auto &KV : Layout) {
+ auto &SL = KV.second;
+ std::sort(SL.ContentSections.begin(), SL.ContentSections.end(),
+ CompareByOrdinal);
+ std::sort(SL.ZeroFillSections.begin(), SL.ZeroFillSections.end(),
+ CompareByOrdinal);
+ }
+ }
+
+ // Add atoms to the sections.
+ for (auto &KV : Layout) {
+ auto &SL = KV.second;
+ for (auto *SIList : {&SL.ContentSections, &SL.ZeroFillSections}) {
+ for (auto &SI : *SIList) {
+ // First build the set of layout-heads (i.e. "heads" of layout-next
+ // chains) by copying the section atoms, then eliminating any that
+ // appear as layout-next targets.
+ DenseSet<DefinedAtom *> LayoutHeads;
+ for (auto *DA : SI.S->atoms())
+ LayoutHeads.insert(DA);
+
+ for (auto *DA : SI.S->atoms())
+ if (DA->hasLayoutNext())
+ LayoutHeads.erase(&DA->getLayoutNext());
+
+ // Next, sort the layout heads by address order.
+ std::vector<DefinedAtom *> OrderedLayoutHeads;
+ OrderedLayoutHeads.reserve(LayoutHeads.size());
+ for (auto *DA : LayoutHeads)
+ OrderedLayoutHeads.push_back(DA);
+
+ // Now sort the list of layout heads by address.
+ std::sort(OrderedLayoutHeads.begin(), OrderedLayoutHeads.end(),
+ [](const DefinedAtom *LHS, const DefinedAtom *RHS) {
+ return LHS->getAddress() < RHS->getAddress();
+ });
+
+ // Now populate the SI.Atoms field by appending each of the chains.
+ for (auto *DA : OrderedLayoutHeads) {
+ SI.Atoms.push_back(DA);
+ while (DA->hasLayoutNext()) {
+ auto &Next = DA->getLayoutNext();
+ SI.Atoms.push_back(&Next);
+ DA = &Next;
+ }
+ }
+ }
+ }
+ }
+
+ LLVM_DEBUG({
+ dbgs() << "Segment ordering:\n";
+ for (auto &KV : Layout) {
+ dbgs() << " Segment "
+ << static_cast<sys::Memory::ProtectionFlags>(KV.first) << ":\n";
+ auto &SL = KV.second;
+ for (auto &SIEntry :
+ {std::make_pair(&SL.ContentSections, "content sections"),
+ std::make_pair(&SL.ZeroFillSections, "zero-fill sections")}) {
+ auto &SIList = *SIEntry.first;
+ dbgs() << " " << SIEntry.second << ":\n";
+ for (auto &SI : SIList) {
+ dbgs() << " " << SI.S->getName() << ":\n";
+ for (auto *DA : SI.Atoms)
+ dbgs() << " " << *DA << "\n";
+ }
+ }
+ }
+ });
+}
+
+Error JITLinkerBase::allocateSegments(const SegmentLayoutMap &Layout) {
+
+ // Compute segment sizes and allocate memory.
+ LLVM_DEBUG(dbgs() << "JIT linker requesting: { ");
+ JITLinkMemoryManager::SegmentsRequestMap Segments;
+ for (auto &KV : Layout) {
+ auto &Prot = KV.first;
+ auto &SegLayout = KV.second;
+
+ // Calculate segment content size.
+ size_t SegContentSize = 0;
+ for (auto &SI : SegLayout.ContentSections) {
+ assert(!SI.S->atoms_empty() && "Sections in layout must not be empty");
+ assert(!SI.Atoms.empty() && "Section layouts must not be empty");
+
+ // Bump to section alignment before processing atoms.
+ SegContentSize = alignTo(SegContentSize, SI.S->getAlignment());
+
+ for (auto *DA : SI.Atoms) {
+ SegContentSize = alignTo(SegContentSize, DA->getAlignment());
+ SegContentSize += DA->getSize();
+ }
+ }
+
+ // Get segment content alignment.
+ unsigned SegContentAlign = 1;
+ if (!SegLayout.ContentSections.empty()) {
+ auto &FirstContentSection = SegLayout.ContentSections.front();
+ SegContentAlign =
+ std::max(FirstContentSection.S->getAlignment(),
+ FirstContentSection.Atoms.front()->getAlignment());
+ }
+
+ // Calculate segment zero-fill size.
+ uint64_t SegZeroFillSize = 0;
+ for (auto &SI : SegLayout.ZeroFillSections) {
+ assert(!SI.S->atoms_empty() && "Sections in layout must not be empty");
+ assert(!SI.Atoms.empty() && "Section layouts must not be empty");
+
+ // Bump to section alignment before processing atoms.
+ SegZeroFillSize = alignTo(SegZeroFillSize, SI.S->getAlignment());
+
+ for (auto *DA : SI.Atoms) {
+ SegZeroFillSize = alignTo(SegZeroFillSize, DA->getAlignment());
+ SegZeroFillSize += DA->getSize();
+ }
+ }
+
+ // Calculate segment zero-fill alignment.
+ uint32_t SegZeroFillAlign = 1;
+
+ if (!SegLayout.ZeroFillSections.empty()) {
+ auto &FirstZeroFillSection = SegLayout.ZeroFillSections.front();
+ SegZeroFillAlign =
+ std::max(FirstZeroFillSection.S->getAlignment(),
+ FirstZeroFillSection.Atoms.front()->getAlignment());
+ }
+
+ if (SegContentSize == 0)
+ SegContentAlign = SegZeroFillAlign;
+
+ if (SegContentAlign % SegZeroFillAlign != 0)
+ return make_error<JITLinkError>("First content atom alignment does not "
+ "accommodate first zero-fill atom "
+ "alignment");
+
+ Segments[Prot] = {SegContentSize, SegContentAlign, SegZeroFillSize,
+ SegZeroFillAlign};
+
+ LLVM_DEBUG({
+ dbgs() << (&KV == &*Layout.begin() ? "" : "; ")
+ << static_cast<sys::Memory::ProtectionFlags>(Prot) << ": "
+ << SegContentSize << " content bytes (alignment "
+ << SegContentAlign << ") + " << SegZeroFillSize
+ << " zero-fill bytes (alignment " << SegZeroFillAlign << ")";
+ });
+ }
+ LLVM_DEBUG(dbgs() << " }\n");
+
+ if (auto AllocOrErr = Ctx->getMemoryManager().allocate(Segments))
+ Alloc = std::move(*AllocOrErr);
+ else
+ return AllocOrErr.takeError();
+
+ LLVM_DEBUG({
+ dbgs() << "JIT linker got working memory:\n";
+ for (auto &KV : Layout) {
+ auto Prot = static_cast<sys::Memory::ProtectionFlags>(KV.first);
+ dbgs() << " " << Prot << ": "
+ << (const void *)Alloc->getWorkingMemory(Prot).data() << "\n";
+ }
+ });
+
+ // Update atom target addresses.
+ for (auto &KV : Layout) {
+ auto &Prot = KV.first;
+ auto &SL = KV.second;
+
+ JITTargetAddress AtomTargetAddr =
+ Alloc->getTargetMemory(static_cast<sys::Memory::ProtectionFlags>(Prot));
+
+ for (auto *SIList : {&SL.ContentSections, &SL.ZeroFillSections})
+ for (auto &SI : *SIList) {
+ AtomTargetAddr = alignTo(AtomTargetAddr, SI.S->getAlignment());
+ for (auto *DA : SI.Atoms) {
+ AtomTargetAddr = alignTo(AtomTargetAddr, DA->getAlignment());
+ DA->setAddress(AtomTargetAddr);
+ AtomTargetAddr += DA->getSize();
+ }
+ }
+ }
+
+ return Error::success();
+}
+
+DenseSet<StringRef> JITLinkerBase::getExternalSymbolNames() const {
+ // Identify unresolved external atoms.
+ DenseSet<StringRef> UnresolvedExternals;
+ for (auto *DA : G->external_atoms()) {
+ assert(DA->getAddress() == 0 &&
+ "External has already been assigned an address");
+ assert(DA->getName() != StringRef() && DA->getName() != "" &&
+ "Externals must be named");
+ UnresolvedExternals.insert(DA->getName());
+ }
+ return UnresolvedExternals;
+}
+
+void JITLinkerBase::applyLookupResult(AsyncLookupResult Result) {
+ for (auto &KV : Result) {
+ Atom &A = G->getAtomByName(KV.first);
+ assert(A.getAddress() == 0 && "Atom already resolved");
+ A.setAddress(KV.second.getAddress());
+ }
+
+ LLVM_DEBUG({
+ dbgs() << "Externals after applying lookup result:\n";
+ for (auto *A : G->external_atoms())
+ dbgs() << " " << A->getName() << ": "
+ << formatv("{0:x16}", A->getAddress()) << "\n";
+ });
+ assert(llvm::all_of(G->external_atoms(),
+ [](Atom *A) { return A->getAddress() != 0; }) &&
+ "All atoms should have been resolved by this point");
+}
+
+void JITLinkerBase::deallocateAndBailOut(Error Err) {
+ assert(Err && "Should not be bailing out on success value");
+ assert(Alloc && "can not call deallocateAndBailOut before allocation");
+ Ctx->notifyFailed(joinErrors(std::move(Err), Alloc->deallocate()));
+}
+
+void JITLinkerBase::dumpGraph(raw_ostream &OS) {
+ assert(G && "Graph is not set yet");
+ G->dump(dbgs(), [this](Edge::Kind K) { return getEdgeKindName(K); });
+}
+
+void prune(AtomGraph &G) {
+ std::vector<DefinedAtom *> Worklist;
+ DenseMap<DefinedAtom *, std::vector<Edge *>> EdgesToUpdate;
+
+ // Build the initial worklist from all atoms initially live.
+ for (auto *DA : G.defined_atoms()) {
+ if (!DA->isLive() || DA->shouldDiscard())
+ continue;
+
+ for (auto &E : DA->edges()) {
+ if (!E.getTarget().isDefined())
+ continue;
+
+ auto &EDT = static_cast<DefinedAtom &>(E.getTarget());
+
+ if (EDT.shouldDiscard())
+ EdgesToUpdate[&EDT].push_back(&E);
+ else if (E.isKeepAlive() && !EDT.isLive())
+ Worklist.push_back(&EDT);
+ }
+ }
+
+ // Propagate live flags to all atoms reachable from the initial live set.
+ while (!Worklist.empty()) {
+ DefinedAtom &NextLive = *Worklist.back();
+ Worklist.pop_back();
+
+ assert(!NextLive.shouldDiscard() &&
+ "should-discard nodes should never make it into the worklist");
+
+ // If this atom has already been marked as live, or is marked to be
+ // discarded, then skip it.
+ if (NextLive.isLive())
+ continue;
+
+ // Otherwise set it as live and add any non-live atoms that it points to
+ // to the worklist.
+ NextLive.setLive(true);
+
+ for (auto &E : NextLive.edges()) {
+ if (!E.getTarget().isDefined())
+ continue;
+
+ auto &EDT = static_cast<DefinedAtom &>(E.getTarget());
+
+ if (EDT.shouldDiscard())
+ EdgesToUpdate[&EDT].push_back(&E);
+ else if (E.isKeepAlive() && !EDT.isLive())
+ Worklist.push_back(&EDT);
+ }
+ }
+
+ // Collect atoms to remove, then remove them from the graph.
+ std::vector<DefinedAtom *> AtomsToRemove;
+ for (auto *DA : G.defined_atoms())
+ if (DA->shouldDiscard() || !DA->isLive())
+ AtomsToRemove.push_back(DA);
+
+ LLVM_DEBUG(dbgs() << "Pruning atoms:\n");
+ for (auto *DA : AtomsToRemove) {
+ LLVM_DEBUG(dbgs() << " " << *DA << "... ");
+
+ // Check whether we need to replace this atom with an external atom.
+ //
+ // We replace if all of the following hold:
+ // (1) The atom is marked should-discard,
+ // (2) it has live edges (i.e. edges from live atoms) pointing to it.
+ //
+ // Otherwise we simply delete the atom.
+
+ G.removeDefinedAtom(*DA);
+
+ auto EdgesToUpdateItr = EdgesToUpdate.find(DA);
+ if (EdgesToUpdateItr != EdgesToUpdate.end()) {
+ auto &ExternalReplacement = G.addExternalAtom(DA->getName());
+ for (auto *EdgeToUpdate : EdgesToUpdateItr->second)
+ EdgeToUpdate->setTarget(ExternalReplacement);
+ LLVM_DEBUG(dbgs() << "replaced with " << ExternalReplacement << "\n");
+ } else
+ LLVM_DEBUG(dbgs() << "deleted\n");
+ }
+
+ // Finally, discard any absolute symbols that were marked should-discard.
+ {
+ std::vector<Atom *> AbsoluteAtomsToRemove;
+ for (auto *A : G.absolute_atoms())
+ if (A->shouldDiscard() || A->isLive())
+ AbsoluteAtomsToRemove.push_back(A);
+ for (auto *A : AbsoluteAtomsToRemove)
+ G.removeAbsoluteAtom(*A);
+ }
+}
+
+} // end namespace jitlink
+} // end namespace llvm
diff --git a/lib/ExecutionEngine/JITLink/JITLinkGeneric.h b/lib/ExecutionEngine/JITLink/JITLinkGeneric.h
new file mode 100644
index 000000000000..e6fd6e38f7a6
--- /dev/null
+++ b/lib/ExecutionEngine/JITLink/JITLinkGeneric.h
@@ -0,0 +1,256 @@
+//===------ JITLinkGeneric.h - Generic JIT linker utilities -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Generic JITLinker utilities. E.g. graph pruning, eh-frame parsing.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LIB_EXECUTIONENGINE_JITLINK_JITLINKGENERIC_H
+#define LIB_EXECUTIONENGINE_JITLINK_JITLINKGENERIC_H
+
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ExecutionEngine/JITLink/JITLink.h"
+
+#define DEBUG_TYPE "jitlink"
+
+namespace llvm {
+
+class MemoryBufferRef;
+
+namespace jitlink {
+
+/// Base class for a JIT linker.
+///
+/// A JITLinkerBase instance links one object file into an ongoing JIT
+/// session. Symbol resolution and finalization operations are pluggable,
+/// and called using continuation passing (passing a continuation for the
+/// remaining linker work) to allow them to be performed asynchronously.
+class JITLinkerBase {
+public:
+ JITLinkerBase(std::unique_ptr<JITLinkContext> Ctx, PassConfiguration Passes)
+ : Ctx(std::move(Ctx)), Passes(std::move(Passes)) {
+ assert(this->Ctx && "Ctx can not be null");
+ }
+
+ virtual ~JITLinkerBase();
+
+protected:
+ struct SegmentLayout {
+ using SectionAtomsList = std::vector<DefinedAtom *>;
+ struct SectionLayout {
+ SectionLayout(Section &S) : S(&S) {}
+
+ Section *S;
+ SectionAtomsList Atoms;
+ };
+
+ using SectionLayoutList = std::vector<SectionLayout>;
+
+ SectionLayoutList ContentSections;
+ SectionLayoutList ZeroFillSections;
+ };
+
+ using SegmentLayoutMap = DenseMap<unsigned, SegmentLayout>;
+
+ // Phase 1:
+ // 1.1: Build atom graph
+ // 1.2: Run pre-prune passes
+ // 1.2: Prune graph
+ // 1.3: Run post-prune passes
+ // 1.4: Sort atoms into segments
+ // 1.5: Allocate segment memory
+ // 1.6: Identify externals and make an async call to resolve function
+ void linkPhase1(std::unique_ptr<JITLinkerBase> Self);
+
+ // Phase 2:
+ // 2.1: Apply resolution results
+ // 2.2: Fix up atom contents
+ // 2.3: Call OnResolved callback
+ // 2.3: Make an async call to transfer and finalize memory.
+ void linkPhase2(std::unique_ptr<JITLinkerBase> Self,
+ Expected<AsyncLookupResult> LookupResult);
+
+ // Phase 3:
+ // 3.1: Call OnFinalized callback, handing off allocation.
+ void linkPhase3(std::unique_ptr<JITLinkerBase> Self, Error Err);
+
+ // Build a graph from the given object buffer.
+ // To be implemented by the client.
+ virtual Expected<std::unique_ptr<AtomGraph>>
+ buildGraph(MemoryBufferRef ObjBuffer) = 0;
+
+ // For debug dumping of the atom graph.
+ virtual StringRef getEdgeKindName(Edge::Kind K) const = 0;
+
+private:
+ // Run all passes in the given pass list, bailing out immediately if any pass
+ // returns an error.
+ Error runPasses(AtomGraphPassList &Passes, AtomGraph &G);
+
+ // Copy atom contents and apply relocations.
+ // Implemented in JITLinker.
+ virtual Error
+ copyAndFixUpAllAtoms(const SegmentLayoutMap &Layout,
+ JITLinkMemoryManager::Allocation &Alloc) const = 0;
+
+ void layOutAtoms();
+ Error allocateSegments(const SegmentLayoutMap &Layout);
+ DenseSet<StringRef> getExternalSymbolNames() const;
+ void applyLookupResult(AsyncLookupResult LR);
+ void deallocateAndBailOut(Error Err);
+
+ void dumpGraph(raw_ostream &OS);
+
+ std::unique_ptr<JITLinkContext> Ctx;
+ PassConfiguration Passes;
+ std::unique_ptr<AtomGraph> G;
+ SegmentLayoutMap Layout;
+ std::unique_ptr<JITLinkMemoryManager::Allocation> Alloc;
+};
+
+template <typename LinkerImpl> class JITLinker : public JITLinkerBase {
+public:
+ using JITLinkerBase::JITLinkerBase;
+
+ /// Link constructs a LinkerImpl instance and calls linkPhase1.
+ /// Link should be called with the constructor arguments for LinkerImpl, which
+ /// will be forwarded to the constructor.
+ template <typename... ArgTs> static void link(ArgTs &&... Args) {
+ auto L = llvm::make_unique<LinkerImpl>(std::forward<ArgTs>(Args)...);
+
+ // Ownership of the linker is passed into the linker's doLink function to
+ // allow it to be passed on to async continuations.
+ //
+ // FIXME: Remove LTmp once we have c++17.
+ // C++17 sequencing rules guarantee that function name expressions are
+ // sequenced before arguments, so L->linkPhase1(std::move(L), ...) will be
+ // well formed.
+ auto &LTmp = *L;
+ LTmp.linkPhase1(std::move(L));
+ }
+
+private:
+ const LinkerImpl &impl() const {
+ return static_cast<const LinkerImpl &>(*this);
+ }
+
+ Error
+ copyAndFixUpAllAtoms(const SegmentLayoutMap &Layout,
+ JITLinkMemoryManager::Allocation &Alloc) const override {
+ LLVM_DEBUG(dbgs() << "Copying and fixing up atoms:\n");
+ for (auto &KV : Layout) {
+ auto &Prot = KV.first;
+ auto &SegLayout = KV.second;
+
+ auto SegMem = Alloc.getWorkingMemory(
+ static_cast<sys::Memory::ProtectionFlags>(Prot));
+ char *LastAtomEnd = SegMem.data();
+ char *AtomDataPtr = LastAtomEnd;
+
+ LLVM_DEBUG({
+ dbgs() << " Processing segment "
+ << static_cast<sys::Memory::ProtectionFlags>(Prot) << " [ "
+ << (const void *)SegMem.data() << " .. "
+ << (const void *)((char *)SegMem.data() + SegMem.size())
+ << " ]\n Processing content sections:\n";
+ });
+
+ for (auto &SI : SegLayout.ContentSections) {
+ LLVM_DEBUG(dbgs() << " " << SI.S->getName() << ":\n");
+
+ AtomDataPtr += alignmentAdjustment(AtomDataPtr, SI.S->getAlignment());
+
+ LLVM_DEBUG({
+ dbgs() << " Bumped atom pointer to " << (const void *)AtomDataPtr
+ << " to meet section alignment "
+ << " of " << SI.S->getAlignment() << "\n";
+ });
+
+ for (auto *DA : SI.Atoms) {
+
+ // Align.
+ AtomDataPtr += alignmentAdjustment(AtomDataPtr, DA->getAlignment());
+ LLVM_DEBUG({
+ dbgs() << " Bumped atom pointer to "
+ << (const void *)AtomDataPtr << " to meet alignment of "
+ << DA->getAlignment() << "\n";
+ });
+
+ // Zero pad up to alignment.
+ LLVM_DEBUG({
+ if (LastAtomEnd != AtomDataPtr)
+ dbgs() << " Zero padding from " << (const void *)LastAtomEnd
+ << " to " << (const void *)AtomDataPtr << "\n";
+ });
+ while (LastAtomEnd != AtomDataPtr)
+ *LastAtomEnd++ = 0;
+
+ // Copy initial atom content.
+ LLVM_DEBUG({
+ dbgs() << " Copying atom " << *DA << " content, "
+ << DA->getContent().size() << " bytes, from "
+ << (const void *)DA->getContent().data() << " to "
+ << (const void *)AtomDataPtr << "\n";
+ });
+ memcpy(AtomDataPtr, DA->getContent().data(), DA->getContent().size());
+
+ // Copy atom data and apply fixups.
+ LLVM_DEBUG(dbgs() << " Applying fixups.\n");
+ for (auto &E : DA->edges()) {
+
+ // Skip non-relocation edges.
+ if (!E.isRelocation())
+ continue;
+
+ // Dispatch to LinkerImpl for fixup.
+ if (auto Err = impl().applyFixup(*DA, E, AtomDataPtr))
+ return Err;
+ }
+
+ // Point the atom's content to the fixed up buffer.
+ DA->setContent(StringRef(AtomDataPtr, DA->getContent().size()));
+
+ // Update atom end pointer.
+ LastAtomEnd = AtomDataPtr + DA->getContent().size();
+ AtomDataPtr = LastAtomEnd;
+ }
+ }
+
+ // Zero pad the rest of the segment.
+ LLVM_DEBUG({
+ dbgs() << " Zero padding end of segment from "
+ << (const void *)LastAtomEnd << " to "
+ << (const void *)((char *)SegMem.data() + SegMem.size()) << "\n";
+ });
+ while (LastAtomEnd != SegMem.data() + SegMem.size())
+ *LastAtomEnd++ = 0;
+ }
+
+ return Error::success();
+ }
+};
+
+/// Dead strips and replaces discarded definitions with external atoms.
+///
+/// Finds the set of nodes reachable from any node initially marked live
+/// (nodes marked should-discard are treated as not live, even if they are
+/// reachable). All nodes not marked as live at the end of this process,
+/// are deleted. Nodes that are live, but marked should-discard are replaced
+/// with external atoms and all edges to them are re-written.
+void prune(AtomGraph &G);
+
+Error addEHFrame(AtomGraph &G, Section &EHFrameSection,
+ StringRef EHFrameContent, JITTargetAddress EHFrameAddress,
+ Edge::Kind FDEToCIERelocKind, Edge::Kind FDEToTargetRelocKind);
+
+} // end namespace jitlink
+} // end namespace llvm
+
+#undef DEBUG_TYPE // "jitlink"
+
+#endif // LLVM_EXECUTIONENGINE_JITLINK_JITLINKGENERIC_H
diff --git a/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp b/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp
new file mode 100644
index 000000000000..267307cfde05
--- /dev/null
+++ b/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp
@@ -0,0 +1,105 @@
+//===--- JITLinkMemoryManager.cpp - JITLinkMemoryManager implementation ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h"
+#include "llvm/Support/Process.h"
+
+namespace llvm {
+namespace jitlink {
+
+JITLinkMemoryManager::~JITLinkMemoryManager() = default;
+JITLinkMemoryManager::Allocation::~Allocation() = default;
+
+Expected<std::unique_ptr<JITLinkMemoryManager::Allocation>>
+InProcessMemoryManager::allocate(const SegmentsRequestMap &Request) {
+
+ using AllocationMap = DenseMap<unsigned, sys::MemoryBlock>;
+
+ // Local class for allocation.
+ class IPMMAlloc : public Allocation {
+ public:
+ IPMMAlloc(AllocationMap SegBlocks) : SegBlocks(std::move(SegBlocks)) {}
+ MutableArrayRef<char> getWorkingMemory(ProtectionFlags Seg) override {
+ assert(SegBlocks.count(Seg) && "No allocation for segment");
+ return {static_cast<char *>(SegBlocks[Seg].base()),
+ SegBlocks[Seg].allocatedSize()};
+ }
+ JITTargetAddress getTargetMemory(ProtectionFlags Seg) override {
+ assert(SegBlocks.count(Seg) && "No allocation for segment");
+ return reinterpret_cast<JITTargetAddress>(SegBlocks[Seg].base());
+ }
+ void finalizeAsync(FinalizeContinuation OnFinalize) override {
+ OnFinalize(applyProtections());
+ }
+ Error deallocate() override {
+ for (auto &KV : SegBlocks)
+ if (auto EC = sys::Memory::releaseMappedMemory(KV.second))
+ return errorCodeToError(EC);
+ return Error::success();
+ }
+
+ private:
+ Error applyProtections() {
+ for (auto &KV : SegBlocks) {
+ auto &Prot = KV.first;
+ auto &Block = KV.second;
+ if (auto EC = sys::Memory::protectMappedMemory(Block, Prot))
+ return errorCodeToError(EC);
+ if (Prot & sys::Memory::MF_EXEC)
+ sys::Memory::InvalidateInstructionCache(Block.base(),
+ Block.allocatedSize());
+ }
+ return Error::success();
+ }
+
+ AllocationMap SegBlocks;
+ };
+
+ AllocationMap Blocks;
+ const sys::Memory::ProtectionFlags ReadWrite =
+ static_cast<sys::Memory::ProtectionFlags>(sys::Memory::MF_READ |
+ sys::Memory::MF_WRITE);
+
+ for (auto &KV : Request) {
+ auto &Seg = KV.second;
+
+ if (Seg.getContentAlignment() > sys::Process::getPageSizeEstimate())
+ return make_error<StringError>("Cannot request higher than page "
+ "alignment",
+ inconvertibleErrorCode());
+
+ if (sys::Process::getPageSizeEstimate() % Seg.getContentAlignment() != 0)
+ return make_error<StringError>("Page size is not a multiple of "
+ "alignment",
+ inconvertibleErrorCode());
+
+ uint64_t ZeroFillStart =
+ alignTo(Seg.getContentSize(), Seg.getZeroFillAlignment());
+ uint64_t SegmentSize = ZeroFillStart + Seg.getZeroFillSize();
+
+ std::error_code EC;
+ auto SegMem =
+ sys::Memory::allocateMappedMemory(SegmentSize, nullptr, ReadWrite, EC);
+
+ if (EC)
+ return errorCodeToError(EC);
+
+ // Zero out the zero-fill memory.
+ memset(static_cast<char *>(SegMem.base()) + ZeroFillStart, 0,
+ Seg.getZeroFillSize());
+
+ // Record the block for this segment.
+ Blocks[KV.first] = std::move(SegMem);
+ }
+ return std::unique_ptr<InProcessMemoryManager::Allocation>(
+ new IPMMAlloc(std::move(Blocks)));
+}
+
+} // end namespace jitlink
+} // end namespace llvm
diff --git a/lib/ExecutionEngine/JITLink/MachO.cpp b/lib/ExecutionEngine/JITLink/MachO.cpp
new file mode 100644
index 000000000000..15995b8ce98f
--- /dev/null
+++ b/lib/ExecutionEngine/JITLink/MachO.cpp
@@ -0,0 +1,78 @@
+//===-------------- MachO.cpp - JIT linker function for MachO -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// MachO jit-link function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/JITLink/MachO.h"
+
+#include "llvm/BinaryFormat/MachO.h"
+#include "llvm/ExecutionEngine/JITLink/MachO_x86_64.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/MemoryBuffer.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "jitlink"
+
+namespace llvm {
+namespace jitlink {
+
+void jitLink_MachO(std::unique_ptr<JITLinkContext> Ctx) {
+
+ // We don't want to do full MachO validation here. Just parse enough of the
+ // header to find out what MachO linker to use.
+
+ StringRef Data = Ctx->getObjectBuffer().getBuffer();
+ if (Data.size() < 4) {
+ Ctx->notifyFailed(make_error<JITLinkError>("Truncated MachO buffer"));
+ return;
+ }
+
+ uint32_t Magic;
+ memcpy(&Magic, Data.data(), sizeof(uint32_t));
+ LLVM_DEBUG({
+ dbgs() << "jitLink_MachO: magic = " << format("0x%08" PRIx32, Magic)
+ << ", identifier = \""
+ << Ctx->getObjectBuffer().getBufferIdentifier() << "\"\n";
+ });
+
+ if (Magic == MachO::MH_MAGIC || Magic == MachO::MH_CIGAM) {
+ Ctx->notifyFailed(
+ make_error<JITLinkError>("MachO 32-bit platforms not supported"));
+ return;
+ } else if (Magic == MachO::MH_MAGIC_64 || Magic == MachO::MH_CIGAM_64) {
+ MachO::mach_header_64 Header;
+
+ memcpy(&Header, Data.data(), sizeof(MachO::mach_header_64));
+ if (Magic == MachO::MH_CIGAM_64)
+ swapStruct(Header);
+
+ LLVM_DEBUG({
+ dbgs() << "jitLink_MachO: cputype = "
+ << format("0x%08" PRIx32, Header.cputype)
+ << ", cpusubtype = " << format("0x%08" PRIx32, Header.cpusubtype)
+ << "\n";
+ });
+
+ switch (Header.cputype) {
+ case MachO::CPU_TYPE_X86_64:
+ return jitLink_MachO_x86_64(std::move(Ctx));
+ }
+ Ctx->notifyFailed(make_error<JITLinkError>("MachO-64 CPU type not valid"));
+ return;
+ }
+
+ Ctx->notifyFailed(make_error<JITLinkError>("MachO magic not valid"));
+}
+
+} // end namespace jitlink
+} // end namespace llvm
diff --git a/lib/ExecutionEngine/JITLink/MachOAtomGraphBuilder.cpp b/lib/ExecutionEngine/JITLink/MachOAtomGraphBuilder.cpp
new file mode 100644
index 000000000000..1501c7ad0bc5
--- /dev/null
+++ b/lib/ExecutionEngine/JITLink/MachOAtomGraphBuilder.cpp
@@ -0,0 +1,411 @@
+//=--------- MachOAtomGraphBuilder.cpp - MachO AtomGraph builder ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Generic MachO AtomGraph buliding code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MachOAtomGraphBuilder.h"
+
+#define DEBUG_TYPE "jitlink"
+
+namespace llvm {
+namespace jitlink {
+
+MachOAtomGraphBuilder::~MachOAtomGraphBuilder() {}
+
+Expected<std::unique_ptr<AtomGraph>> MachOAtomGraphBuilder::buildGraph() {
+ if (auto Err = parseSections())
+ return std::move(Err);
+
+ if (auto Err = addAtoms())
+ return std::move(Err);
+
+ if (auto Err = addRelocations())
+ return std::move(Err);
+
+ return std::move(G);
+}
+
+MachOAtomGraphBuilder::MachOAtomGraphBuilder(const object::MachOObjectFile &Obj)
+ : Obj(Obj),
+ G(llvm::make_unique<AtomGraph>(Obj.getFileName(), getPointerSize(Obj),
+ getEndianness(Obj))) {}
+
+void MachOAtomGraphBuilder::addCustomAtomizer(StringRef SectionName,
+ CustomAtomizeFunction Atomizer) {
+ assert(!CustomAtomizeFunctions.count(SectionName) &&
+ "Custom atomizer for this section already exists");
+ CustomAtomizeFunctions[SectionName] = std::move(Atomizer);
+}
+
+bool MachOAtomGraphBuilder::areLayoutLocked(const Atom &A, const Atom &B) {
+ // If these atoms are the same then they're trivially "locked".
+ if (&A == &B)
+ return true;
+
+ // If A and B are different, check whether either is undefined. (in which
+ // case they are not locked).
+ if (!A.isDefined() || !B.isDefined())
+ return false;
+
+ // A and B are different, but they're both defined atoms. We need to check
+ // whether they're part of the same alt_entry chain.
+ auto &DA = static_cast<const DefinedAtom &>(A);
+ auto &DB = static_cast<const DefinedAtom &>(B);
+
+ auto AStartItr = AltEntryStarts.find(&DA);
+ if (AStartItr == AltEntryStarts.end()) // If A is not in a chain bail out.
+ return false;
+
+ auto BStartItr = AltEntryStarts.find(&DB);
+ if (BStartItr == AltEntryStarts.end()) // If B is not in a chain bail out.
+ return false;
+
+ // A and B are layout locked if they're in the same chain.
+ return AStartItr->second == BStartItr->second;
+}
+
+unsigned
+MachOAtomGraphBuilder::getPointerSize(const object::MachOObjectFile &Obj) {
+ return Obj.is64Bit() ? 8 : 4;
+}
+
+support::endianness
+MachOAtomGraphBuilder::getEndianness(const object::MachOObjectFile &Obj) {
+ return Obj.isLittleEndian() ? support::little : support::big;
+}
+
+MachOAtomGraphBuilder::MachOSection &MachOAtomGraphBuilder::getCommonSection() {
+ if (!CommonSymbolsSection) {
+ auto Prot = static_cast<sys::Memory::ProtectionFlags>(
+ sys::Memory::MF_READ | sys::Memory::MF_WRITE);
+ auto &GenericSection = G->createSection("<common>", 1, Prot, true);
+ CommonSymbolsSection = MachOSection(GenericSection);
+ }
+ return *CommonSymbolsSection;
+}
+
+Error MachOAtomGraphBuilder::parseSections() {
+ for (auto &SecRef : Obj.sections()) {
+ assert((SecRef.getAlignment() <= std::numeric_limits<uint32_t>::max()) &&
+ "Section alignment does not fit in 32 bits");
+
+ StringRef Name;
+ if (auto EC = SecRef.getName(Name))
+ return errorCodeToError(EC);
+
+ unsigned SectionIndex = SecRef.getIndex() + 1;
+
+ uint32_t Align = SecRef.getAlignment();
+ if (!isPowerOf2_32(Align))
+ return make_error<JITLinkError>("Section " + Name +
+ " has non-power-of-2 "
+ "alignment");
+
+ // FIXME: Get real section permissions
+ // How, exactly, on MachO?
+ sys::Memory::ProtectionFlags Prot;
+ if (SecRef.isText())
+ Prot = static_cast<sys::Memory::ProtectionFlags>(sys::Memory::MF_READ |
+ sys::Memory::MF_EXEC);
+ else
+ Prot = static_cast<sys::Memory::ProtectionFlags>(sys::Memory::MF_READ |
+ sys::Memory::MF_WRITE);
+
+ auto &GenericSection = G->createSection(Name, Align, Prot, SecRef.isBSS());
+
+ LLVM_DEBUG({
+ dbgs() << "Adding section " << Name << ": "
+ << format("0x%016" PRIx64, SecRef.getAddress())
+ << ", align: " << SecRef.getAlignment() << "\n";
+ });
+
+ assert(!Sections.count(SectionIndex) && "Section index already in use");
+
+ auto &MachOSec =
+ Sections
+ .try_emplace(SectionIndex, GenericSection, SecRef.getAddress(),
+ SecRef.getAlignment())
+ .first->second;
+
+ if (!SecRef.isVirtual()) {
+ // If this section has content then record it.
+ Expected<StringRef> Content = SecRef.getContents();
+ if (!Content)
+ return Content.takeError();
+ if (Content->size() != SecRef.getSize())
+ return make_error<JITLinkError>("Section content size does not match "
+ "declared size for " +
+ Name);
+ MachOSec.setContent(*Content);
+ } else {
+ // If this is a zero-fill section then just record the size.
+ MachOSec.setZeroFill(SecRef.getSize());
+ }
+
+ uint32_t SectionFlags =
+ Obj.is64Bit() ? Obj.getSection64(SecRef.getRawDataRefImpl()).flags
+ : Obj.getSection(SecRef.getRawDataRefImpl()).flags;
+
+ MachOSec.setNoDeadStrip(SectionFlags & MachO::S_ATTR_NO_DEAD_STRIP);
+ }
+
+ return Error::success();
+}
+
+// Adds atoms with identified start addresses (but not lengths) for all named
+// atoms.
+// Also, for every section that contains named atoms, but does not have an
+// atom at offset zero of that section, constructs an anonymous atom covering
+// that range.
+Error MachOAtomGraphBuilder::addNonCustomAtoms() {
+ using AddrToAtomMap = std::map<JITTargetAddress, DefinedAtom *>;
+ DenseMap<MachOSection *, AddrToAtomMap> SecToAtoms;
+
+ DenseMap<MachOSection *, unsigned> FirstOrdinal;
+ std::vector<DefinedAtom *> AltEntryAtoms;
+
+ DenseSet<StringRef> ProcessedSymbols; // Used to check for duplicate defs.
+
+ for (auto SymI = Obj.symbol_begin(), SymE = Obj.symbol_end(); SymI != SymE;
+ ++SymI) {
+ object::SymbolRef Sym(SymI->getRawDataRefImpl(), &Obj);
+
+ auto Name = Sym.getName();
+ if (!Name)
+ return Name.takeError();
+
+ // Bail out on duplicate definitions: There should never be more than one
+ // definition for a symbol in a given object file.
+ if (ProcessedSymbols.count(*Name))
+ return make_error<JITLinkError>("Duplicate definition within object: " +
+ *Name);
+ else
+ ProcessedSymbols.insert(*Name);
+
+ auto Addr = Sym.getAddress();
+ if (!Addr)
+ return Addr.takeError();
+
+ auto SymType = Sym.getType();
+ if (!SymType)
+ return SymType.takeError();
+
+ auto Flags = Sym.getFlags();
+
+ if (Flags & object::SymbolRef::SF_Undefined) {
+ LLVM_DEBUG(dbgs() << "Adding undef atom \"" << *Name << "\"\n");
+ G->addExternalAtom(*Name);
+ continue;
+ } else if (Flags & object::SymbolRef::SF_Absolute) {
+ LLVM_DEBUG(dbgs() << "Adding absolute \"" << *Name << "\" addr: "
+ << format("0x%016" PRIx64, *Addr) << "\n");
+ auto &A = G->addAbsoluteAtom(*Name, *Addr);
+ A.setGlobal(Flags & object::SymbolRef::SF_Global);
+ A.setExported(Flags & object::SymbolRef::SF_Exported);
+ A.setWeak(Flags & object::SymbolRef::SF_Weak);
+ continue;
+ } else if (Flags & object::SymbolRef::SF_Common) {
+ LLVM_DEBUG({
+ dbgs() << "Adding common \"" << *Name
+ << "\" addr: " << format("0x%016" PRIx64, *Addr) << "\n";
+ });
+ auto &A =
+ G->addCommonAtom(getCommonSection().getGenericSection(), *Name, *Addr,
+ std::max(Sym.getAlignment(), 1U),
+ Obj.getCommonSymbolSize(Sym.getRawDataRefImpl()));
+ A.setGlobal(Flags & object::SymbolRef::SF_Global);
+ A.setExported(Flags & object::SymbolRef::SF_Exported);
+ continue;
+ }
+
+ LLVM_DEBUG(dbgs() << "Adding defined atom \"" << *Name << "\"\n");
+
+ // This atom is neither undefined nor absolute, so it must be defined in
+ // this object. Get its section index.
+ auto SecItr = Sym.getSection();
+ if (!SecItr)
+ return SecItr.takeError();
+
+ uint64_t SectionIndex = (*SecItr)->getIndex() + 1;
+
+ LLVM_DEBUG(dbgs() << " to section index " << SectionIndex << "\n");
+
+ auto SecByIndexItr = Sections.find(SectionIndex);
+ if (SecByIndexItr == Sections.end())
+ return make_error<JITLinkError>("Unrecognized section index in macho");
+
+ auto &Sec = SecByIndexItr->second;
+
+ auto &DA = G->addDefinedAtom(Sec.getGenericSection(), *Name, *Addr,
+ std::max(Sym.getAlignment(), 1U));
+
+ DA.setGlobal(Flags & object::SymbolRef::SF_Global);
+ DA.setExported(Flags & object::SymbolRef::SF_Exported);
+ DA.setWeak(Flags & object::SymbolRef::SF_Weak);
+
+ DA.setCallable(*SymType & object::SymbolRef::ST_Function);
+
+ // Check NDesc flags.
+ {
+ uint16_t NDesc = 0;
+ if (Obj.is64Bit())
+ NDesc = Obj.getSymbol64TableEntry(SymI->getRawDataRefImpl()).n_desc;
+ else
+ NDesc = Obj.getSymbolTableEntry(SymI->getRawDataRefImpl()).n_desc;
+
+ // Record atom for alt-entry post-processing (where the layout-next
+ // constraints will be added).
+ if (NDesc & MachO::N_ALT_ENTRY)
+ AltEntryAtoms.push_back(&DA);
+
+ // If this atom has a no-dead-strip attr attached then mark it live.
+ if (NDesc & MachO::N_NO_DEAD_STRIP)
+ DA.setLive(true);
+ }
+
+ LLVM_DEBUG({
+ dbgs() << " Added " << *Name
+ << " addr: " << format("0x%016" PRIx64, *Addr)
+ << ", align: " << DA.getAlignment()
+ << ", section: " << Sec.getGenericSection().getName() << "\n";
+ });
+
+ auto &SecAtoms = SecToAtoms[&Sec];
+ SecAtoms[DA.getAddress() - Sec.getAddress()] = &DA;
+ }
+
+ // Add anonymous atoms.
+ for (auto &KV : Sections) {
+ auto &S = KV.second;
+
+ // Skip empty sections.
+ if (S.empty())
+ continue;
+
+ // Skip sections with custom handling.
+ if (CustomAtomizeFunctions.count(S.getName()))
+ continue;
+
+ auto SAI = SecToAtoms.find(&S);
+
+ // If S is not in the SecToAtoms map then it contained no named atom. Add
+ // one anonymous atom to cover the whole section.
+ if (SAI == SecToAtoms.end()) {
+ SecToAtoms[&S][0] = &G->addAnonymousAtom(
+ S.getGenericSection(), S.getAddress(), S.getAlignment());
+ continue;
+ }
+
+ // Otherwise, check whether this section had an atom covering offset zero.
+ // If not, add one.
+ auto &SecAtoms = SAI->second;
+ if (!SecAtoms.count(0))
+ SecAtoms[0] = &G->addAnonymousAtom(S.getGenericSection(), S.getAddress(),
+ S.getAlignment());
+ }
+
+ LLVM_DEBUG(dbgs() << "MachOGraphBuilder setting atom content\n");
+
+ // Set atom contents and any section-based flags.
+ for (auto &KV : SecToAtoms) {
+ auto &S = *KV.first;
+ auto &SecAtoms = KV.second;
+
+ // Iterate the atoms in reverse order and set up their contents.
+ JITTargetAddress LastAtomAddr = S.getSize();
+ for (auto I = SecAtoms.rbegin(), E = SecAtoms.rend(); I != E; ++I) {
+ auto Offset = I->first;
+ auto &A = *I->second;
+ LLVM_DEBUG({
+ dbgs() << " " << A << " to [ " << S.getAddress() + Offset << " .. "
+ << S.getAddress() + LastAtomAddr << " ]\n";
+ });
+
+ if (S.isZeroFill())
+ A.setZeroFill(LastAtomAddr - Offset);
+ else
+ A.setContent(S.getContent().substr(Offset, LastAtomAddr - Offset));
+
+ // If the section has no-dead-strip set then mark the atom as live.
+ if (S.isNoDeadStrip())
+ A.setLive(true);
+
+ LastAtomAddr = Offset;
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << "Adding alt-entry starts\n");
+
+ // Sort alt-entry atoms by address in ascending order.
+ llvm::sort(AltEntryAtoms.begin(), AltEntryAtoms.end(),
+ [](const DefinedAtom *LHS, const DefinedAtom *RHS) {
+ return LHS->getAddress() < RHS->getAddress();
+ });
+
+ // Process alt-entry atoms in address order to build the table of alt-entry
+ // atoms to alt-entry chain starts.
+ for (auto *DA : AltEntryAtoms) {
+ assert(!AltEntryStarts.count(DA) && "Duplicate entry in AltEntryStarts");
+
+ // DA is an alt-entry atom. Look for the predecessor atom that it is locked
+ // to, bailing out if we do not find one.
+ auto AltEntryPred = G->findAtomByAddress(DA->getAddress() - 1);
+ if (!AltEntryPred)
+ return AltEntryPred.takeError();
+
+ // Add a LayoutNext edge from the predecessor to this atom.
+ AltEntryPred->setLayoutNext(*DA);
+
+ // Check to see whether the predecessor itself is an alt-entry atom.
+ auto AltEntryStartItr = AltEntryStarts.find(&*AltEntryPred);
+ if (AltEntryStartItr != AltEntryStarts.end()) {
+ // If the predecessor was an alt-entry atom then re-use its value.
+ LLVM_DEBUG({
+ dbgs() << " " << *DA << " -> " << *AltEntryStartItr->second
+ << " (based on existing entry for " << *AltEntryPred << ")\n";
+ });
+ AltEntryStarts[DA] = AltEntryStartItr->second;
+ } else {
+ // If the predecessor does not have an entry then add an entry for this
+ // atom (i.e. the alt_entry atom) and a self-reference entry for the
+ /// predecessory atom that is the start of this chain.
+ LLVM_DEBUG({
+ dbgs() << " " << *AltEntryPred << " -> " << *AltEntryPred << "\n"
+ << " " << *DA << " -> " << *AltEntryPred << "\n";
+ });
+ AltEntryStarts[&*AltEntryPred] = &*AltEntryPred;
+ AltEntryStarts[DA] = &*AltEntryPred;
+ }
+ }
+
+ return Error::success();
+}
+
+Error MachOAtomGraphBuilder::addAtoms() {
+ // Add all named atoms.
+ if (auto Err = addNonCustomAtoms())
+ return Err;
+
+ // Process special sections.
+ for (auto &KV : Sections) {
+ auto &S = KV.second;
+ auto HI = CustomAtomizeFunctions.find(S.getGenericSection().getName());
+ if (HI != CustomAtomizeFunctions.end()) {
+ auto &Atomize = HI->second;
+ if (auto Err = Atomize(S))
+ return Err;
+ }
+ }
+
+ return Error::success();
+}
+
+} // end namespace jitlink
+} // end namespace llvm
diff --git a/lib/ExecutionEngine/JITLink/MachOAtomGraphBuilder.h b/lib/ExecutionEngine/JITLink/MachOAtomGraphBuilder.h
new file mode 100644
index 000000000000..72d441b24d06
--- /dev/null
+++ b/lib/ExecutionEngine/JITLink/MachOAtomGraphBuilder.h
@@ -0,0 +1,138 @@
+//===----- MachOAtomGraphBuilder.h - MachO AtomGraph builder ----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Generic MachO AtomGraph building code.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LIB_EXECUTIONENGINE_JITLINK_MACHOATOMGRAPHBUILDER_H
+#define LIB_EXECUTIONENGINE_JITLINK_MACHOATOMGRAPHBUILDER_H
+
+#include "llvm/ExecutionEngine/JITLink/JITLink.h"
+
+#include "JITLinkGeneric.h"
+
+#include "llvm/Object/MachO.h"
+
+namespace llvm {
+namespace jitlink {
+
+class MachOAtomGraphBuilder {
+public:
+ virtual ~MachOAtomGraphBuilder();
+ Expected<std::unique_ptr<AtomGraph>> buildGraph();
+
+protected:
+ using OffsetToAtomMap = std::map<JITTargetAddress, DefinedAtom *>;
+
+ class MachOSection {
+ public:
+ MachOSection() = default;
+
+ /// Create a MachO section with the given address and alignment.
+ MachOSection(Section &GenericSection, JITTargetAddress Address,
+ unsigned Alignment)
+ : Address(Address), GenericSection(&GenericSection),
+ Alignment(Alignment) {}
+
+ /// Create a section without address, content or size (used for common
+ /// symbol sections).
+ MachOSection(Section &GenericSection) : GenericSection(&GenericSection) {}
+
+ Section &getGenericSection() const {
+ assert(GenericSection && "Section is null");
+ return *GenericSection;
+ }
+
+ StringRef getName() const {
+ assert(GenericSection && "No generic section attached");
+ return GenericSection->getName();
+ }
+
+ MachOSection &setContent(StringRef Content) {
+ assert(!ContentPtr && !Size && "Content/zeroFill already set");
+ ContentPtr = Content.data();
+ Size = Content.size();
+ return *this;
+ }
+
+ MachOSection &setZeroFill(uint64_t Size) {
+ assert(!ContentPtr && !this->Size && "Content/zeroFill already set");
+ this->Size = Size;
+ return *this;
+ }
+
+ bool isZeroFill() const { return !ContentPtr; }
+
+ bool empty() const { return getSize() == 0; }
+
+ size_t getSize() const { return Size; }
+
+ StringRef getContent() const {
+ assert(ContentPtr && "getContent() called on zero-fill section");
+ return {ContentPtr, static_cast<size_t>(Size)};
+ }
+
+ JITTargetAddress getAddress() const { return Address; }
+
+ unsigned getAlignment() const { return Alignment; }
+
+ MachOSection &setNoDeadStrip(bool NoDeadStrip) {
+ this->NoDeadStrip = NoDeadStrip;
+ return *this;
+ }
+
+ bool isNoDeadStrip() const { return NoDeadStrip; }
+
+ private:
+ JITTargetAddress Address = 0;
+ Section *GenericSection = nullptr;
+ const char *ContentPtr = nullptr;
+ uint64_t Size = 0;
+ unsigned Alignment = 0;
+ bool NoDeadStrip = false;
+ };
+
+ using CustomAtomizeFunction = std::function<Error(MachOSection &S)>;
+
+ MachOAtomGraphBuilder(const object::MachOObjectFile &Obj);
+
+ AtomGraph &getGraph() const { return *G; }
+
+ const object::MachOObjectFile &getObject() const { return Obj; }
+
+ void addCustomAtomizer(StringRef SectionName, CustomAtomizeFunction Atomizer);
+
+ virtual Error addRelocations() = 0;
+
+ /// Returns true if Atom A and Atom B are at a fixed offset from one another
+ /// (i.e. if they're part of the same alt-entry chain).
+ bool areLayoutLocked(const Atom &A, const Atom &B);
+
+private:
+ static unsigned getPointerSize(const object::MachOObjectFile &Obj);
+ static support::endianness getEndianness(const object::MachOObjectFile &Obj);
+
+ MachOSection &getCommonSection();
+
+ Error parseSections();
+ Error addNonCustomAtoms();
+ Error addAtoms();
+
+ const object::MachOObjectFile &Obj;
+ std::unique_ptr<AtomGraph> G;
+ DenseMap<const DefinedAtom *, const DefinedAtom *> AltEntryStarts;
+ DenseMap<unsigned, MachOSection> Sections;
+ StringMap<CustomAtomizeFunction> CustomAtomizeFunctions;
+ Optional<MachOSection> CommonSymbolsSection;
+};
+
+} // end namespace jitlink
+} // end namespace llvm
+
+#endif // LIB_EXECUTIONENGINE_JITLINK_MACHOATOMGRAPHBUILDER_H
diff --git a/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp b/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp
new file mode 100644
index 000000000000..4010678c6d33
--- /dev/null
+++ b/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp
@@ -0,0 +1,608 @@
+//===---- MachO_x86_64.cpp -JIT linker implementation for MachO/x86-64 ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// MachO/x86-64 jit-link implementation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/JITLink/MachO_x86_64.h"
+
+#include "BasicGOTAndStubsBuilder.h"
+#include "MachOAtomGraphBuilder.h"
+
+#define DEBUG_TYPE "jitlink"
+
+using namespace llvm;
+using namespace llvm::jitlink;
+using namespace llvm::jitlink::MachO_x86_64_Edges;
+
+namespace {
+
+class MachOAtomGraphBuilder_x86_64 : public MachOAtomGraphBuilder {
+public:
+ MachOAtomGraphBuilder_x86_64(const object::MachOObjectFile &Obj)
+ : MachOAtomGraphBuilder(Obj),
+ NumSymbols(Obj.getSymtabLoadCommand().nsyms) {
+ addCustomAtomizer("__eh_frame", [this](MachOSection &EHFrameSection) {
+ return addEHFrame(getGraph(), EHFrameSection.getGenericSection(),
+ EHFrameSection.getContent(),
+ EHFrameSection.getAddress(), NegDelta32, Delta64);
+ });
+ }
+
+private:
+ static Expected<MachOX86RelocationKind>
+ getRelocationKind(const MachO::relocation_info &RI) {
+ switch (RI.r_type) {
+ case MachO::X86_64_RELOC_UNSIGNED:
+ if (!RI.r_pcrel && RI.r_length == 3)
+ return RI.r_extern ? Pointer64 : Pointer64Anon;
+ break;
+ case MachO::X86_64_RELOC_SIGNED:
+ if (RI.r_pcrel && RI.r_length == 2)
+ return RI.r_extern ? PCRel32 : PCRel32Anon;
+ break;
+ case MachO::X86_64_RELOC_BRANCH:
+ if (RI.r_pcrel && RI.r_extern && RI.r_length == 2)
+ return Branch32;
+ break;
+ case MachO::X86_64_RELOC_GOT_LOAD:
+ if (RI.r_pcrel && RI.r_extern && RI.r_length == 2)
+ return PCRel32GOTLoad;
+ break;
+ case MachO::X86_64_RELOC_GOT:
+ if (RI.r_pcrel && RI.r_extern && RI.r_length == 2)
+ return PCRel32GOT;
+ break;
+ case MachO::X86_64_RELOC_SUBTRACTOR:
+ // SUBTRACTOR must be non-pc-rel, extern, with length 2 or 3.
+ // Initially represent SUBTRACTOR relocations with 'Delta<W>'. They may
+ // be turned into NegDelta<W> by parsePairRelocation.
+ if (!RI.r_pcrel && RI.r_extern) {
+ if (RI.r_length == 2)
+ return Delta32;
+ else if (RI.r_length == 3)
+ return Delta64;
+ }
+ break;
+ case MachO::X86_64_RELOC_SIGNED_1:
+ if (RI.r_pcrel && RI.r_length == 2)
+ return RI.r_extern ? PCRel32Minus1 : PCRel32Minus1Anon;
+ break;
+ case MachO::X86_64_RELOC_SIGNED_2:
+ if (RI.r_pcrel && RI.r_length == 2)
+ return RI.r_extern ? PCRel32Minus2 : PCRel32Minus2Anon;
+ break;
+ case MachO::X86_64_RELOC_SIGNED_4:
+ if (RI.r_pcrel && RI.r_length == 2)
+ return RI.r_extern ? PCRel32Minus4 : PCRel32Minus4Anon;
+ break;
+ case MachO::X86_64_RELOC_TLV:
+ if (RI.r_pcrel && RI.r_extern && RI.r_length == 2)
+ return PCRel32TLV;
+ break;
+ }
+
+ return make_error<JITLinkError>(
+ "Unsupported x86-64 relocation: address=" +
+ formatv("{0:x8}", RI.r_address) +
+ ", symbolnum=" + formatv("{0:x6}", RI.r_symbolnum) +
+ ", kind=" + formatv("{0:x1}", RI.r_type) +
+ ", pc_rel=" + (RI.r_pcrel ? "true" : "false") +
+ ", extern= " + (RI.r_extern ? "true" : "false") +
+ ", length=" + formatv("{0:d}", RI.r_length));
+ }
+
+ Expected<Atom &> findAtomBySymbolIndex(const MachO::relocation_info &RI) {
+ auto &Obj = getObject();
+ if (RI.r_symbolnum >= NumSymbols)
+ return make_error<JITLinkError>("Symbol index out of range");
+ auto SymI = Obj.getSymbolByIndex(RI.r_symbolnum);
+ auto Name = SymI->getName();
+ if (!Name)
+ return Name.takeError();
+ return getGraph().getAtomByName(*Name);
+ }
+
+ MachO::relocation_info
+ getRelocationInfo(const object::relocation_iterator RelItr) {
+ MachO::any_relocation_info ARI =
+ getObject().getRelocation(RelItr->getRawDataRefImpl());
+ MachO::relocation_info RI;
+ memcpy(&RI, &ARI, sizeof(MachO::relocation_info));
+ return RI;
+ }
+
+ using PairRelocInfo = std::tuple<MachOX86RelocationKind, Atom *, uint64_t>;
+
+ // Parses paired SUBTRACTOR/UNSIGNED relocations and, on success,
+ // returns the edge kind and addend to be used.
+ Expected<PairRelocInfo>
+ parsePairRelocation(DefinedAtom &AtomToFix, Edge::Kind SubtractorKind,
+ const MachO::relocation_info &SubRI,
+ JITTargetAddress FixupAddress, const char *FixupContent,
+ object::relocation_iterator &UnsignedRelItr,
+ object::relocation_iterator &RelEnd) {
+ using namespace support;
+
+ assert(((SubtractorKind == Delta32 && SubRI.r_length == 2) ||
+ (SubtractorKind == Delta64 && SubRI.r_length == 3)) &&
+ "Subtractor kind should match length");
+ assert(SubRI.r_extern && "SUBTRACTOR reloc symbol should be extern");
+ assert(!SubRI.r_pcrel && "SUBTRACTOR reloc should not be PCRel");
+
+ if (UnsignedRelItr == RelEnd)
+ return make_error<JITLinkError>("x86_64 SUBTRACTOR without paired "
+ "UNSIGNED relocation");
+
+ auto UnsignedRI = getRelocationInfo(UnsignedRelItr);
+
+ if (SubRI.r_address != UnsignedRI.r_address)
+ return make_error<JITLinkError>("x86_64 SUBTRACTOR and paired UNSIGNED "
+ "point to different addresses");
+
+ if (SubRI.r_length != UnsignedRI.r_length)
+ return make_error<JITLinkError>("length of x86_64 SUBTRACTOR and paired "
+ "UNSIGNED reloc must match");
+
+ auto FromAtom = findAtomBySymbolIndex(SubRI);
+ if (!FromAtom)
+ return FromAtom.takeError();
+
+ // Read the current fixup value.
+ uint64_t FixupValue = 0;
+ if (SubRI.r_length == 3)
+ FixupValue = *(const little64_t *)FixupContent;
+ else
+ FixupValue = *(const little32_t *)FixupContent;
+
+ // Find 'ToAtom' using symbol number or address, depending on whether the
+ // paired UNSIGNED relocation is extern.
+ Atom *ToAtom = nullptr;
+ if (UnsignedRI.r_extern) {
+ // Find target atom by symbol index.
+ if (auto ToAtomOrErr = findAtomBySymbolIndex(UnsignedRI))
+ ToAtom = &*ToAtomOrErr;
+ else
+ return ToAtomOrErr.takeError();
+ } else {
+ if (auto ToAtomOrErr = getGraph().findAtomByAddress(FixupValue))
+ ToAtom = &*ToAtomOrErr;
+ else
+ return ToAtomOrErr.takeError();
+ FixupValue -= ToAtom->getAddress();
+ }
+
+ MachOX86RelocationKind DeltaKind;
+ Atom *TargetAtom;
+ uint64_t Addend;
+ if (areLayoutLocked(AtomToFix, *FromAtom)) {
+ TargetAtom = ToAtom;
+ DeltaKind = (SubRI.r_length == 3) ? Delta64 : Delta32;
+ Addend = FixupValue + (FixupAddress - FromAtom->getAddress());
+ // FIXME: handle extern 'from'.
+ } else if (areLayoutLocked(AtomToFix, *ToAtom)) {
+ TargetAtom = &*FromAtom;
+ DeltaKind = (SubRI.r_length == 3) ? NegDelta64 : NegDelta32;
+ Addend = FixupValue - (FixupAddress - ToAtom->getAddress());
+ } else {
+ // AtomToFix was neither FromAtom nor ToAtom.
+ return make_error<JITLinkError>("SUBTRACTOR relocation must fix up "
+ "either 'A' or 'B' (or an atom in one "
+ "of their alt-entry groups)");
+ }
+
+ return PairRelocInfo(DeltaKind, TargetAtom, Addend);
+ }
+
+ Error addRelocations() override {
+ using namespace support;
+ auto &G = getGraph();
+ auto &Obj = getObject();
+
+ for (auto &S : Obj.sections()) {
+
+ JITTargetAddress SectionAddress = S.getAddress();
+
+ for (auto RelItr = S.relocation_begin(), RelEnd = S.relocation_end();
+ RelItr != RelEnd; ++RelItr) {
+
+ MachO::relocation_info RI = getRelocationInfo(RelItr);
+
+ // Sanity check the relocation kind.
+ auto Kind = getRelocationKind(RI);
+ if (!Kind)
+ return Kind.takeError();
+
+ // Find the address of the value to fix up.
+ JITTargetAddress FixupAddress = SectionAddress + (uint32_t)RI.r_address;
+
+ LLVM_DEBUG({
+ dbgs() << "Processing relocation at "
+ << format("0x%016" PRIx64, FixupAddress) << "\n";
+ });
+
+ // Find the atom that the fixup points to.
+ DefinedAtom *AtomToFix = nullptr;
+ {
+ auto AtomToFixOrErr = G.findAtomByAddress(FixupAddress);
+ if (!AtomToFixOrErr)
+ return AtomToFixOrErr.takeError();
+ AtomToFix = &*AtomToFixOrErr;
+ }
+
+ if (FixupAddress + static_cast<JITTargetAddress>(1ULL << RI.r_length) >
+ AtomToFix->getAddress() + AtomToFix->getContent().size())
+ return make_error<JITLinkError>(
+ "Relocation content extends past end of fixup atom");
+
+ // Get a pointer to the fixup content.
+ const char *FixupContent = AtomToFix->getContent().data() +
+ (FixupAddress - AtomToFix->getAddress());
+
+ // The target atom and addend will be populated by the switch below.
+ Atom *TargetAtom = nullptr;
+ uint64_t Addend = 0;
+
+ switch (*Kind) {
+ case Branch32:
+ case PCRel32:
+ case PCRel32GOTLoad:
+ case PCRel32GOT:
+ if (auto TargetAtomOrErr = findAtomBySymbolIndex(RI))
+ TargetAtom = &*TargetAtomOrErr;
+ else
+ return TargetAtomOrErr.takeError();
+ Addend = *(const ulittle32_t *)FixupContent;
+ break;
+ case Pointer64:
+ if (auto TargetAtomOrErr = findAtomBySymbolIndex(RI))
+ TargetAtom = &*TargetAtomOrErr;
+ else
+ return TargetAtomOrErr.takeError();
+ Addend = *(const ulittle64_t *)FixupContent;
+ break;
+ case Pointer64Anon: {
+ JITTargetAddress TargetAddress = *(const ulittle64_t *)FixupContent;
+ if (auto TargetAtomOrErr = G.findAtomByAddress(TargetAddress))
+ TargetAtom = &*TargetAtomOrErr;
+ else
+ return TargetAtomOrErr.takeError();
+ Addend = TargetAddress - TargetAtom->getAddress();
+ break;
+ }
+ case PCRel32Minus1:
+ case PCRel32Minus2:
+ case PCRel32Minus4:
+ if (auto TargetAtomOrErr = findAtomBySymbolIndex(RI))
+ TargetAtom = &*TargetAtomOrErr;
+ else
+ return TargetAtomOrErr.takeError();
+ Addend = *(const ulittle32_t *)FixupContent +
+ (1 << (*Kind - PCRel32Minus1));
+ break;
+ case PCRel32Anon: {
+ JITTargetAddress TargetAddress =
+ FixupAddress + 4 + *(const ulittle32_t *)FixupContent;
+ if (auto TargetAtomOrErr = G.findAtomByAddress(TargetAddress))
+ TargetAtom = &*TargetAtomOrErr;
+ else
+ return TargetAtomOrErr.takeError();
+ Addend = TargetAddress - TargetAtom->getAddress();
+ break;
+ }
+ case PCRel32Minus1Anon:
+ case PCRel32Minus2Anon:
+ case PCRel32Minus4Anon: {
+ JITTargetAddress Delta =
+ static_cast<JITTargetAddress>(1ULL << (*Kind - PCRel32Minus1Anon));
+ JITTargetAddress TargetAddress =
+ FixupAddress + 4 + Delta + *(const ulittle32_t *)FixupContent;
+ if (auto TargetAtomOrErr = G.findAtomByAddress(TargetAddress))
+ TargetAtom = &*TargetAtomOrErr;
+ else
+ return TargetAtomOrErr.takeError();
+ Addend = TargetAddress - TargetAtom->getAddress();
+ break;
+ }
+ case Delta32:
+ case Delta64: {
+ // We use Delta32/Delta64 to represent SUBTRACTOR relocations.
+ // parsePairRelocation handles the paired reloc, and returns the
+ // edge kind to be used (either Delta32/Delta64, or
+ // NegDelta32/NegDelta64, depending on the direction of the
+ // subtraction) along with the addend.
+ auto PairInfo =
+ parsePairRelocation(*AtomToFix, *Kind, RI, FixupAddress,
+ FixupContent, ++RelItr, RelEnd);
+ if (!PairInfo)
+ return PairInfo.takeError();
+ std::tie(*Kind, TargetAtom, Addend) = *PairInfo;
+ assert(TargetAtom && "No target atom from parsePairRelocation?");
+ break;
+ }
+ default:
+ llvm_unreachable("Special relocation kind should not appear in "
+ "mach-o file");
+ }
+
+ LLVM_DEBUG({
+ Edge GE(*Kind, FixupAddress - AtomToFix->getAddress(), *TargetAtom,
+ Addend);
+ printEdge(dbgs(), *AtomToFix, GE,
+ getMachOX86RelocationKindName(*Kind));
+ dbgs() << "\n";
+ });
+ AtomToFix->addEdge(*Kind, FixupAddress - AtomToFix->getAddress(),
+ *TargetAtom, Addend);
+ }
+ }
+ return Error::success();
+ }
+
+ unsigned NumSymbols = 0;
+};
+
+class MachO_x86_64_GOTAndStubsBuilder
+ : public BasicGOTAndStubsBuilder<MachO_x86_64_GOTAndStubsBuilder> {
+public:
+ MachO_x86_64_GOTAndStubsBuilder(AtomGraph &G)
+ : BasicGOTAndStubsBuilder<MachO_x86_64_GOTAndStubsBuilder>(G) {}
+
+ bool isGOTEdge(Edge &E) const {
+ return E.getKind() == PCRel32GOT || E.getKind() == PCRel32GOTLoad;
+ }
+
+ DefinedAtom &createGOTEntry(Atom &Target) {
+ auto &GOTEntryAtom = G.addAnonymousAtom(getGOTSection(), 0x0, 8);
+ GOTEntryAtom.setContent(
+ StringRef(reinterpret_cast<const char *>(NullGOTEntryContent), 8));
+ GOTEntryAtom.addEdge(Pointer64, 0, Target, 0);
+ return GOTEntryAtom;
+ }
+
+ void fixGOTEdge(Edge &E, Atom &GOTEntry) {
+ assert((E.getKind() == PCRel32GOT || E.getKind() == PCRel32GOTLoad) &&
+ "Not a GOT edge?");
+ E.setKind(PCRel32);
+ E.setTarget(GOTEntry);
+ // Leave the edge addend as-is.
+ }
+
+ bool isExternalBranchEdge(Edge &E) {
+ return E.getKind() == Branch32 && !E.getTarget().isDefined();
+ }
+
+ DefinedAtom &createStub(Atom &Target) {
+ auto &StubAtom = G.addAnonymousAtom(getStubsSection(), 0x0, 2);
+ StubAtom.setContent(
+ StringRef(reinterpret_cast<const char *>(StubContent), 6));
+
+ // Re-use GOT entries for stub targets.
+ auto &GOTEntryAtom = getGOTEntryAtom(Target);
+ StubAtom.addEdge(PCRel32, 2, GOTEntryAtom, 0);
+
+ return StubAtom;
+ }
+
+ void fixExternalBranchEdge(Edge &E, Atom &Stub) {
+ assert(E.getKind() == Branch32 && "Not a Branch32 edge?");
+ assert(E.getAddend() == 0 && "Branch32 edge has non-zero addend?");
+ E.setTarget(Stub);
+ }
+
+private:
+ Section &getGOTSection() {
+ if (!GOTSection)
+ GOTSection = &G.createSection("$__GOT", 8, sys::Memory::MF_READ, false);
+ return *GOTSection;
+ }
+
+ Section &getStubsSection() {
+ if (!StubsSection) {
+ auto StubsProt = static_cast<sys::Memory::ProtectionFlags>(
+ sys::Memory::MF_READ | sys::Memory::MF_EXEC);
+ StubsSection = &G.createSection("$__STUBS", 8, StubsProt, false);
+ }
+ return *StubsSection;
+ }
+
+ static const uint8_t NullGOTEntryContent[8];
+ static const uint8_t StubContent[6];
+ Section *GOTSection = nullptr;
+ Section *StubsSection = nullptr;
+};
+
+const uint8_t MachO_x86_64_GOTAndStubsBuilder::NullGOTEntryContent[8] = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
+const uint8_t MachO_x86_64_GOTAndStubsBuilder::StubContent[6] = {
+ 0xFF, 0x25, 0x00, 0x00, 0x00, 0x00};
+} // namespace
+
+namespace llvm {
+namespace jitlink {
+
+class MachOJITLinker_x86_64 : public JITLinker<MachOJITLinker_x86_64> {
+ friend class JITLinker<MachOJITLinker_x86_64>;
+
+public:
+ MachOJITLinker_x86_64(std::unique_ptr<JITLinkContext> Ctx,
+ PassConfiguration PassConfig)
+ : JITLinker(std::move(Ctx), std::move(PassConfig)) {}
+
+private:
+ StringRef getEdgeKindName(Edge::Kind R) const override {
+ return getMachOX86RelocationKindName(R);
+ }
+
+ Expected<std::unique_ptr<AtomGraph>>
+ buildGraph(MemoryBufferRef ObjBuffer) override {
+ auto MachOObj = object::ObjectFile::createMachOObjectFile(ObjBuffer);
+ if (!MachOObj)
+ return MachOObj.takeError();
+ return MachOAtomGraphBuilder_x86_64(**MachOObj).buildGraph();
+ }
+
+ static Error targetOutOfRangeError(const Atom &A, const Edge &E) {
+ std::string ErrMsg;
+ {
+ raw_string_ostream ErrStream(ErrMsg);
+ ErrStream << "Relocation target out of range: ";
+ printEdge(ErrStream, A, E, getMachOX86RelocationKindName(E.getKind()));
+ ErrStream << "\n";
+ }
+ return make_error<JITLinkError>(std::move(ErrMsg));
+ }
+
+ Error applyFixup(DefinedAtom &A, const Edge &E, char *AtomWorkingMem) const {
+ using namespace support;
+
+ char *FixupPtr = AtomWorkingMem + E.getOffset();
+ JITTargetAddress FixupAddress = A.getAddress() + E.getOffset();
+
+ switch (E.getKind()) {
+ case Branch32:
+ case PCRel32:
+ case PCRel32Anon: {
+ int64_t Value =
+ E.getTarget().getAddress() - (FixupAddress + 4) + E.getAddend();
+ if (Value < std::numeric_limits<int32_t>::min() ||
+ Value > std::numeric_limits<int32_t>::max())
+ return targetOutOfRangeError(A, E);
+ *(little32_t *)FixupPtr = Value;
+ break;
+ }
+ case Pointer64:
+ case Pointer64Anon: {
+ uint64_t Value = E.getTarget().getAddress() + E.getAddend();
+ *(ulittle64_t *)FixupPtr = Value;
+ break;
+ }
+ case PCRel32Minus1:
+ case PCRel32Minus2:
+ case PCRel32Minus4: {
+ int Delta = 4 + (1 << (E.getKind() - PCRel32Minus1));
+ int64_t Value =
+ E.getTarget().getAddress() - (FixupAddress + Delta) + E.getAddend();
+ if (Value < std::numeric_limits<int32_t>::min() ||
+ Value > std::numeric_limits<int32_t>::max())
+ return targetOutOfRangeError(A, E);
+ *(little32_t *)FixupPtr = Value;
+ break;
+ }
+ case PCRel32Minus1Anon:
+ case PCRel32Minus2Anon:
+ case PCRel32Minus4Anon: {
+ int Delta = 4 + (1 << (E.getKind() - PCRel32Minus1Anon));
+ int64_t Value =
+ E.getTarget().getAddress() - (FixupAddress + Delta) + E.getAddend();
+ if (Value < std::numeric_limits<int32_t>::min() ||
+ Value > std::numeric_limits<int32_t>::max())
+ return targetOutOfRangeError(A, E);
+ *(little32_t *)FixupPtr = Value;
+ break;
+ }
+ case Delta32:
+ case Delta64:
+ case NegDelta32:
+ case NegDelta64: {
+ int64_t Value;
+ if (E.getKind() == Delta32 || E.getKind() == Delta64)
+ Value = E.getTarget().getAddress() - FixupAddress + E.getAddend();
+ else
+ Value = FixupAddress - E.getTarget().getAddress() + E.getAddend();
+
+ if (E.getKind() == Delta32 || E.getKind() == NegDelta32) {
+ if (Value < std::numeric_limits<int32_t>::min() ||
+ Value > std::numeric_limits<int32_t>::max())
+ return targetOutOfRangeError(A, E);
+ *(little32_t *)FixupPtr = Value;
+ } else
+ *(little64_t *)FixupPtr = Value;
+ break;
+ }
+ default:
+ llvm_unreachable("Unrecognized edge kind");
+ }
+
+ return Error::success();
+ }
+
+ uint64_t NullValue = 0;
+};
+
+void jitLink_MachO_x86_64(std::unique_ptr<JITLinkContext> Ctx) {
+ PassConfiguration Config;
+ Triple TT("x86_64-apple-macosx");
+
+ if (Ctx->shouldAddDefaultTargetPasses(TT)) {
+ // Add a mark-live pass.
+ if (auto MarkLive = Ctx->getMarkLivePass(TT))
+ Config.PrePrunePasses.push_back(std::move(MarkLive));
+ else
+ Config.PrePrunePasses.push_back(markAllAtomsLive);
+
+ // Add an in-place GOT/Stubs pass.
+ Config.PostPrunePasses.push_back([](AtomGraph &G) -> Error {
+ MachO_x86_64_GOTAndStubsBuilder(G).run();
+ return Error::success();
+ });
+ }
+
+ if (auto Err = Ctx->modifyPassConfig(TT, Config))
+ return Ctx->notifyFailed(std::move(Err));
+
+ // Construct a JITLinker and run the link function.
+ MachOJITLinker_x86_64::link(std::move(Ctx), std::move(Config));
+}
+
+StringRef getMachOX86RelocationKindName(Edge::Kind R) {
+ switch (R) {
+ case Branch32:
+ return "Branch32";
+ case Pointer64:
+ return "Pointer64";
+ case Pointer64Anon:
+ return "Pointer64Anon";
+ case PCRel32:
+ return "PCRel32";
+ case PCRel32Minus1:
+ return "PCRel32Minus1";
+ case PCRel32Minus2:
+ return "PCRel32Minus2";
+ case PCRel32Minus4:
+ return "PCRel32Minus4";
+ case PCRel32Anon:
+ return "PCRel32Anon";
+ case PCRel32Minus1Anon:
+ return "PCRel32Minus1Anon";
+ case PCRel32Minus2Anon:
+ return "PCRel32Minus2Anon";
+ case PCRel32Minus4Anon:
+ return "PCRel32Minus4Anon";
+ case PCRel32GOTLoad:
+ return "PCRel32GOTLoad";
+ case PCRel32GOT:
+ return "PCRel32GOT";
+ case PCRel32TLV:
+ return "PCRel32TLV";
+ case Delta32:
+ return "Delta32";
+ case Delta64:
+ return "Delta64";
+ case NegDelta32:
+ return "NegDelta32";
+ case NegDelta64:
+ return "NegDelta64";
+ default:
+ return getGenericEdgeKindName(static_cast<Edge::Kind>(R));
+ }
+}
+
+} // end namespace jitlink
+} // end namespace llvm
diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/lib/ExecutionEngine/MCJIT/MCJIT.cpp
index ffc6707e1488..08815b7a80ae 100644
--- a/lib/ExecutionEngine/MCJIT/MCJIT.cpp
+++ b/lib/ExecutionEngine/MCJIT/MCJIT.cpp
@@ -1,9 +1,8 @@
//===-- MCJIT.cpp - MC-based Just-in-Time Compiler ------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.h b/lib/ExecutionEngine/MCJIT/MCJIT.h
index 1119e138720f..77097fc0d17e 100644
--- a/lib/ExecutionEngine/MCJIT/MCJIT.h
+++ b/lib/ExecutionEngine/MCJIT/MCJIT.h
@@ -1,9 +1,8 @@
//===-- MCJIT.h - Class definition for the MCJIT ----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp b/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp
index 21af6b585c41..2ad9d24555f3 100644
--- a/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp
+++ b/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp
@@ -1,9 +1,8 @@
//===-- OProfileJITEventListener.cpp - Tell OProfile about JITted code ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp b/lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp
index b473ac3faf4c..1a2667736926 100644
--- a/lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp
+++ b/lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp
@@ -1,9 +1,8 @@
//===-- OProfileWrapper.cpp - OProfile JIT API Wrapper implementation -----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp b/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp
index 241eb3600da7..99bf53bc3afa 100644
--- a/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp
+++ b/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp
@@ -1,9 +1,8 @@
//===----- CompileOnDemandLayer.cpp - Lazily emit IR on first call --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/ExecutionEngine/Orc/CompileUtils.cpp b/lib/ExecutionEngine/Orc/CompileUtils.cpp
new file mode 100644
index 000000000000..d46b6fcf9a5f
--- /dev/null
+++ b/lib/ExecutionEngine/Orc/CompileUtils.cpp
@@ -0,0 +1,86 @@
+//===------ CompileUtils.cpp - Utilities for compiling IR in the JIT ------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/CompileUtils.h"
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ExecutionEngine/ObjectCache.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SmallVectorMemoryBuffer.h"
+#include "llvm/Target/TargetMachine.h"
+
+#include <algorithm>
+
+namespace llvm {
+namespace orc {
+
+/// Compile a Module to an ObjectFile.
+SimpleCompiler::CompileResult SimpleCompiler::operator()(Module &M) {
+ CompileResult CachedObject = tryToLoadFromObjectCache(M);
+ if (CachedObject)
+ return CachedObject;
+
+ SmallVector<char, 0> ObjBufferSV;
+
+ {
+ raw_svector_ostream ObjStream(ObjBufferSV);
+
+ legacy::PassManager PM;
+ MCContext *Ctx;
+ if (TM.addPassesToEmitMC(PM, Ctx, ObjStream))
+ llvm_unreachable("Target does not support MC emission.");
+ PM.run(M);
+ }
+
+ auto ObjBuffer = llvm::make_unique<SmallVectorMemoryBuffer>(
+ std::move(ObjBufferSV),
+ "<in memory object compiled from " + M.getModuleIdentifier() + ">");
+
+ auto Obj = object::ObjectFile::createObjectFile(ObjBuffer->getMemBufferRef());
+
+ if (Obj) {
+ notifyObjectCompiled(M, *ObjBuffer);
+ return std::move(ObjBuffer);
+ }
+
+ // TODO: Actually report errors helpfully.
+ consumeError(Obj.takeError());
+ return nullptr;
+}
+
+SimpleCompiler::CompileResult
+SimpleCompiler::tryToLoadFromObjectCache(const Module &M) {
+ if (!ObjCache)
+ return CompileResult();
+
+ return ObjCache->getObject(&M);
+}
+
+void SimpleCompiler::notifyObjectCompiled(const Module &M,
+ const MemoryBuffer &ObjBuffer) {
+ if (ObjCache)
+ ObjCache->notifyObjectCompiled(&M, ObjBuffer.getMemBufferRef());
+}
+
+ConcurrentIRCompiler::ConcurrentIRCompiler(JITTargetMachineBuilder JTMB,
+ ObjectCache *ObjCache)
+ : JTMB(std::move(JTMB)), ObjCache(ObjCache) {}
+
+std::unique_ptr<MemoryBuffer> ConcurrentIRCompiler::operator()(Module &M) {
+ auto TM = cantFail(JTMB.createTargetMachine());
+ SimpleCompiler C(*TM, ObjCache);
+ return C(M);
+}
+
+} // end namespace orc
+} // end namespace llvm
diff --git a/lib/ExecutionEngine/Orc/Core.cpp b/lib/ExecutionEngine/Orc/Core.cpp
index 73c0bcdf7d28..dac37e030e0c 100644
--- a/lib/ExecutionEngine/Orc/Core.cpp
+++ b/lib/ExecutionEngine/Orc/Core.cpp
@@ -1,9 +1,8 @@
//===--- Core.cpp - Core ORC APIs (MaterializationUnit, JITDylib, etc.) ---===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -27,17 +26,17 @@ namespace {
#ifndef NDEBUG
-cl::opt<bool> PrintHidden("debug-orc-print-hidden", cl::init(false),
+cl::opt<bool> PrintHidden("debug-orc-print-hidden", cl::init(true),
cl::desc("debug print hidden symbols defined by "
"materialization units"),
cl::Hidden);
-cl::opt<bool> PrintCallable("debug-orc-print-callable", cl::init(false),
+cl::opt<bool> PrintCallable("debug-orc-print-callable", cl::init(true),
cl::desc("debug print callable symbols defined by "
"materialization units"),
cl::Hidden);
-cl::opt<bool> PrintData("debug-orc-print-data", cl::init(false),
+cl::opt<bool> PrintData("debug-orc-print-data", cl::init(true),
cl::desc("debug print data symbols defined by "
"materialization units"),
cl::Hidden);
@@ -134,8 +133,6 @@ struct PrintSymbolMapElemsMatchingCLOpts {
namespace llvm {
namespace orc {
- SymbolStringPool::PoolMapEntry SymbolStringPtr::Tombstone(0);
-
char FailedToMaterialize::ID = 0;
char SymbolsNotFound::ID = 0;
char SymbolsCouldNotBeRemoved::ID = 0;
@@ -222,6 +219,31 @@ raw_ostream &operator<<(raw_ostream &OS, const JITDylibSearchList &JDs) {
return OS;
}
+raw_ostream &operator<<(raw_ostream &OS, const SymbolAliasMap &Aliases) {
+ OS << "{";
+ for (auto &KV : Aliases)
+ OS << " " << *KV.first << ": " << KV.second.Aliasee << " "
+ << KV.second.AliasFlags;
+ OS << " }\n";
+ return OS;
+}
+
+raw_ostream &operator<<(raw_ostream &OS, const SymbolState &S) {
+ switch (S) {
+ case SymbolState::Invalid:
+ return OS << "Invalid";
+ case SymbolState::NeverSearched:
+ return OS << "Never-Searched";
+ case SymbolState::Materializing:
+ return OS << "Materializing";
+ case SymbolState::Resolved:
+ return OS << "Resolved";
+ case SymbolState::Ready:
+ return OS << "Ready";
+ }
+ llvm_unreachable("Invalid state");
+}
+
FailedToMaterialize::FailedToMaterialize(SymbolNameSet Symbols)
: Symbols(std::move(Symbols)) {
assert(!this->Symbols.empty() && "Can not fail to resolve an empty set");
@@ -262,85 +284,46 @@ void SymbolsCouldNotBeRemoved::log(raw_ostream &OS) const {
}
AsynchronousSymbolQuery::AsynchronousSymbolQuery(
- const SymbolNameSet &Symbols, SymbolsResolvedCallback NotifySymbolsResolved,
- SymbolsReadyCallback NotifySymbolsReady)
- : NotifySymbolsResolved(std::move(NotifySymbolsResolved)),
- NotifySymbolsReady(std::move(NotifySymbolsReady)) {
- NotYetResolvedCount = NotYetReadyCount = Symbols.size();
+ const SymbolNameSet &Symbols, SymbolState RequiredState,
+ SymbolsResolvedCallback NotifyComplete)
+ : NotifyComplete(std::move(NotifyComplete)), RequiredState(RequiredState) {
+ assert(RequiredState >= SymbolState::Resolved &&
+ "Cannot query for a symbols that have not reached the resolve state "
+ "yet");
+
+ OutstandingSymbolsCount = Symbols.size();
for (auto &S : Symbols)
ResolvedSymbols[S] = nullptr;
}
-void AsynchronousSymbolQuery::resolve(const SymbolStringPtr &Name,
- JITEvaluatedSymbol Sym) {
+void AsynchronousSymbolQuery::notifySymbolMetRequiredState(
+ const SymbolStringPtr &Name, JITEvaluatedSymbol Sym) {
auto I = ResolvedSymbols.find(Name);
assert(I != ResolvedSymbols.end() &&
"Resolving symbol outside the requested set");
assert(I->second.getAddress() == 0 && "Redundantly resolving symbol Name");
I->second = std::move(Sym);
- --NotYetResolvedCount;
-}
-
-void AsynchronousSymbolQuery::handleFullyResolved() {
- assert(NotYetResolvedCount == 0 && "Not fully resolved?");
-
- if (!NotifySymbolsResolved) {
- // handleFullyResolved may be called by handleFullyReady (see comments in
- // that method), in which case this is a no-op, so bail out.
- assert(!NotifySymbolsReady &&
- "NotifySymbolsResolved already called or an error occurred");
- return;
- }
-
- auto TmpNotifySymbolsResolved = std::move(NotifySymbolsResolved);
- NotifySymbolsResolved = SymbolsResolvedCallback();
- TmpNotifySymbolsResolved(std::move(ResolvedSymbols));
-}
-
-void AsynchronousSymbolQuery::notifySymbolReady() {
- assert(NotYetReadyCount != 0 && "All symbols already emitted");
- --NotYetReadyCount;
+ --OutstandingSymbolsCount;
}
-void AsynchronousSymbolQuery::handleFullyReady() {
- assert(NotifySymbolsReady &&
- "NotifySymbolsReady already called or an error occurred");
+void AsynchronousSymbolQuery::handleComplete() {
+ assert(OutstandingSymbolsCount == 0 &&
+ "Symbols remain, handleComplete called prematurely");
- auto TmpNotifySymbolsReady = std::move(NotifySymbolsReady);
- NotifySymbolsReady = SymbolsReadyCallback();
-
- if (NotYetResolvedCount == 0 && NotifySymbolsResolved) {
- // The NotifyResolved callback of one query must have caused this query to
- // become ready (i.e. there is still a handleFullyResolved callback waiting
- // to be made back up the stack). Fold the handleFullyResolved call into
- // this one before proceeding. This will cause the call further up the
- // stack to become a no-op.
- handleFullyResolved();
- }
-
- assert(QueryRegistrations.empty() &&
- "Query is still registered with some symbols");
- assert(!NotifySymbolsResolved && "Resolution not applied yet");
- TmpNotifySymbolsReady(Error::success());
+ auto TmpNotifyComplete = std::move(NotifyComplete);
+ NotifyComplete = SymbolsResolvedCallback();
+ TmpNotifyComplete(std::move(ResolvedSymbols));
}
-bool AsynchronousSymbolQuery::canStillFail() {
- return (NotifySymbolsResolved || NotifySymbolsReady);
-}
+bool AsynchronousSymbolQuery::canStillFail() { return !!NotifyComplete; }
void AsynchronousSymbolQuery::handleFailed(Error Err) {
assert(QueryRegistrations.empty() && ResolvedSymbols.empty() &&
- NotYetResolvedCount == 0 && NotYetReadyCount == 0 &&
+ OutstandingSymbolsCount == 0 &&
"Query should already have been abandoned");
- if (NotifySymbolsResolved) {
- NotifySymbolsResolved(std::move(Err));
- NotifySymbolsResolved = SymbolsResolvedCallback();
- } else {
- assert(NotifySymbolsReady && "Failed after both callbacks issued?");
- NotifySymbolsReady(std::move(Err));
- }
- NotifySymbolsReady = SymbolsReadyCallback();
+ NotifyComplete(std::move(Err));
+ NotifyComplete = SymbolsResolvedCallback();
}
void AsynchronousSymbolQuery::addQueryDependence(JITDylib &JD,
@@ -363,8 +346,7 @@ void AsynchronousSymbolQuery::removeQueryDependence(
void AsynchronousSymbolQuery::detach() {
ResolvedSymbols.clear();
- NotYetResolvedCount = 0;
- NotYetReadyCount = 0;
+ OutstandingSymbolsCount = 0;
for (auto &KV : QueryRegistrations)
KV.first->detachQueryHelper(*this, KV.second);
QueryRegistrations.clear();
@@ -374,11 +356,6 @@ MaterializationResponsibility::MaterializationResponsibility(
JITDylib &JD, SymbolFlagsMap SymbolFlags, VModuleKey K)
: JD(JD), SymbolFlags(std::move(SymbolFlags)), K(std::move(K)) {
assert(!this->SymbolFlags.empty() && "Materializing nothing?");
-
-#ifndef NDEBUG
- for (auto &KV : this->SymbolFlags)
- KV.second |= JITSymbolFlags::Materializing;
-#endif
}
MaterializationResponsibility::~MaterializationResponsibility() {
@@ -390,16 +367,15 @@ SymbolNameSet MaterializationResponsibility::getRequestedSymbols() const {
return JD.getRequestedSymbols(SymbolFlags);
}
-void MaterializationResponsibility::resolve(const SymbolMap &Symbols) {
- LLVM_DEBUG(dbgs() << "In " << JD.getName() << " resolving " << Symbols
- << "\n");
+void MaterializationResponsibility::notifyResolved(const SymbolMap &Symbols) {
+ LLVM_DEBUG({
+ dbgs() << "In " << JD.getName() << " resolving " << Symbols << "\n";
+ });
#ifndef NDEBUG
for (auto &KV : Symbols) {
auto I = SymbolFlags.find(KV.first);
assert(I != SymbolFlags.end() &&
"Resolving symbol outside this responsibility set");
- assert(I->second.isMaterializing() && "Duplicate resolution");
- I->second &= ~JITSymbolFlags::Materializing;
if (I->second.isWeak())
assert(I->second == (KV.second.getFlags() | JITSymbolFlags::Weak) &&
"Resolving symbol with incorrect flags");
@@ -412,12 +388,11 @@ void MaterializationResponsibility::resolve(const SymbolMap &Symbols) {
JD.resolve(Symbols);
}
-void MaterializationResponsibility::emit() {
-#ifndef NDEBUG
- for (auto &KV : SymbolFlags)
- assert(!KV.second.isMaterializing() &&
- "Failed to resolve symbol before emission");
-#endif // NDEBUG
+void MaterializationResponsibility::notifyEmitted() {
+
+ LLVM_DEBUG({
+ dbgs() << "In " << JD.getName() << " emitting " << SymbolFlags << "\n";
+ });
JD.emit(SymbolFlags);
SymbolFlags.clear();
@@ -429,19 +404,19 @@ Error MaterializationResponsibility::defineMaterializing(
// It's ok if we hit a duplicate here: In that case the new version will be
// discarded, and the JITDylib::defineMaterializing method will return a
// duplicate symbol error.
- for (auto &KV : NewSymbolFlags) {
- auto I = SymbolFlags.insert(KV).first;
- (void)I;
-#ifndef NDEBUG
- I->second |= JITSymbolFlags::Materializing;
-#endif
- }
+ for (auto &KV : NewSymbolFlags)
+ SymbolFlags.insert(KV);
return JD.defineMaterializing(NewSymbolFlags);
}
void MaterializationResponsibility::failMaterialization() {
+ LLVM_DEBUG({
+ dbgs() << "In " << JD.getName() << " failing materialization for "
+ << SymbolFlags << "\n";
+ });
+
SymbolNameSet FailedSymbols;
for (auto &KV : SymbolFlags)
FailedSymbols.insert(KV.first);
@@ -510,8 +485,8 @@ StringRef AbsoluteSymbolsMaterializationUnit::getName() const {
void AbsoluteSymbolsMaterializationUnit::materialize(
MaterializationResponsibility R) {
- R.resolve(Symbols);
- R.emit();
+ R.notifyResolved(Symbols);
+ R.notifyEmitted();
}
void AbsoluteSymbolsMaterializationUnit::discard(const JITDylib &JD,
@@ -559,6 +534,14 @@ void ReExportsMaterializationUnit::materialize(
Aliases.erase(I);
}
+ LLVM_DEBUG({
+ ES.runSessionLocked([&]() {
+ dbgs() << "materializing reexports: target = " << TgtJD.getName()
+ << ", source = " << SrcJD.getName() << " " << RequestedAliases
+ << "\n";
+ });
+ });
+
if (!Aliases.empty()) {
if (SourceJD)
R.replace(reexports(*SourceJD, std::move(Aliases), MatchNonExported));
@@ -641,7 +624,7 @@ void ReExportsMaterializationUnit::materialize(
}
};
- auto OnResolve = [QueryInfo](Expected<SymbolMap> Result) {
+ auto OnComplete = [QueryInfo](Expected<SymbolMap> Result) {
if (Result) {
SymbolMap ResolutionMap;
for (auto &KV : QueryInfo->Aliases) {
@@ -650,8 +633,8 @@ void ReExportsMaterializationUnit::materialize(
ResolutionMap[KV.first] = JITEvaluatedSymbol(
(*Result)[KV.second.Aliasee].getAddress(), KV.second.AliasFlags);
}
- QueryInfo->R.resolve(ResolutionMap);
- QueryInfo->R.emit();
+ QueryInfo->R.notifyResolved(ResolutionMap);
+ QueryInfo->R.notifyEmitted();
} else {
auto &ES = QueryInfo->R.getTargetJITDylib().getExecutionSession();
ES.reportError(Result.takeError());
@@ -659,10 +642,8 @@ void ReExportsMaterializationUnit::materialize(
}
};
- auto OnReady = [&ES](Error Err) { ES.reportError(std::move(Err)); };
-
ES.lookup(JITDylibSearchList({{&SrcJD, MatchNonExported}}), QuerySymbols,
- std::move(OnResolve), std::move(OnReady),
+ SymbolState::Resolved, std::move(OnComplete),
std::move(RegisterDependencies));
}
}
@@ -687,17 +668,20 @@ Expected<SymbolAliasMap>
buildSimpleReexportsAliasMap(JITDylib &SourceJD, const SymbolNameSet &Symbols) {
auto Flags = SourceJD.lookupFlags(Symbols);
- if (Flags.size() != Symbols.size()) {
+ if (!Flags)
+ return Flags.takeError();
+
+ if (Flags->size() != Symbols.size()) {
SymbolNameSet Unresolved = Symbols;
- for (auto &KV : Flags)
+ for (auto &KV : *Flags)
Unresolved.erase(KV.first);
return make_error<SymbolsNotFound>(std::move(Unresolved));
}
SymbolAliasMap Result;
for (auto &Name : Symbols) {
- assert(Flags.count(Name) && "Missing entry in flags map");
- Result[Name] = SymbolAliasMapEntry(Name, Flags[Name]);
+ assert(Flags->count(Name) && "Missing entry in flags map");
+ Result[Name] = SymbolAliasMapEntry(Name, (*Flags)[Name]);
}
return Result;
@@ -709,14 +693,17 @@ ReexportsGenerator::ReexportsGenerator(JITDylib &SourceJD,
: SourceJD(SourceJD), MatchNonExported(MatchNonExported),
Allow(std::move(Allow)) {}
-SymbolNameSet ReexportsGenerator::operator()(JITDylib &JD,
- const SymbolNameSet &Names) {
+Expected<SymbolNameSet>
+ReexportsGenerator::operator()(JITDylib &JD, const SymbolNameSet &Names) {
orc::SymbolNameSet Added;
orc::SymbolAliasMap AliasMap;
auto Flags = SourceJD.lookupFlags(Names);
- for (auto &KV : Flags) {
+ if (!Flags)
+ return Flags.takeError();
+
+ for (auto &KV : *Flags) {
if (Allow && !Allow(KV.first))
continue;
AliasMap[KV.first] = SymbolAliasMapEntry(KV.first, KV.second);
@@ -731,21 +718,19 @@ SymbolNameSet ReexportsGenerator::operator()(JITDylib &JD,
Error JITDylib::defineMaterializing(const SymbolFlagsMap &SymbolFlags) {
return ES.runSessionLocked([&]() -> Error {
- std::vector<SymbolMap::iterator> AddedSyms;
+ std::vector<SymbolTable::iterator> AddedSyms;
for (auto &KV : SymbolFlags) {
- SymbolMap::iterator EntryItr;
+ SymbolTable::iterator EntryItr;
bool Added;
- auto NewFlags = KV.second;
- NewFlags |= JITSymbolFlags::Materializing;
-
- std::tie(EntryItr, Added) = Symbols.insert(
- std::make_pair(KV.first, JITEvaluatedSymbol(0, NewFlags)));
+ std::tie(EntryItr, Added) =
+ Symbols.insert(std::make_pair(KV.first, SymbolTableEntry(KV.second)));
- if (Added)
+ if (Added) {
AddedSyms.push_back(EntryItr);
- else {
+ EntryItr->second.setState(SymbolState::Materializing);
+ } else {
// Remove any symbols already added.
for (auto &SI : AddedSyms)
Symbols.erase(SI);
@@ -769,9 +754,10 @@ void JITDylib::replace(std::unique_ptr<MaterializationUnit> MU) {
for (auto &KV : MU->getSymbols()) {
auto SymI = Symbols.find(KV.first);
assert(SymI != Symbols.end() && "Replacing unknown symbol");
- assert(!SymI->second.getFlags().isLazy() &&
- SymI->second.getFlags().isMaterializing() &&
- "Can not replace symbol that is not materializing");
+ assert(SymI->second.isInMaterializationPhase() &&
+ "Can not call replace on a symbol that is not materializing");
+ assert(!SymI->second.hasMaterializerAttached() &&
+ "Symbol should not have materializer attached already");
assert(UnmaterializedInfos.count(KV.first) == 0 &&
"Symbol being replaced should have no UnmaterializedInfo");
}
@@ -782,7 +768,7 @@ void JITDylib::replace(std::unique_ptr<MaterializationUnit> MU) {
for (auto &KV : MU->getSymbols()) {
auto MII = MaterializingInfos.find(KV.first);
if (MII != MaterializingInfos.end()) {
- if (!MII->second.PendingQueries.empty())
+ if (MII->second.hasQueriesPending())
return std::move(MU);
}
}
@@ -790,16 +776,15 @@ void JITDylib::replace(std::unique_ptr<MaterializationUnit> MU) {
// Otherwise, make MU responsible for all the symbols.
auto UMI = std::make_shared<UnmaterializedInfo>(std::move(MU));
for (auto &KV : UMI->MU->getSymbols()) {
- assert(!KV.second.isLazy() &&
- "Lazy flag should be managed internally.");
- assert(!KV.second.isMaterializing() &&
- "Materializing flags should be managed internally.");
-
auto SymI = Symbols.find(KV.first);
- JITSymbolFlags ReplaceFlags = KV.second;
- ReplaceFlags |= JITSymbolFlags::Lazy;
- SymI->second = JITEvaluatedSymbol(SymI->second.getAddress(),
- std::move(ReplaceFlags));
+ assert(SymI->second.getState() == SymbolState::Materializing &&
+ "Can not replace a symbol that is not materializing");
+ assert(!SymI->second.hasMaterializerAttached() &&
+ "Can not replace a symbol that has a materializer attached");
+ assert(UnmaterializedInfos.count(KV.first) == 0 &&
+ "Unexpected materializer entry in map");
+ SymI->second.setAddress(SymI->second.getAddress());
+ SymI->second.setMaterializerAttached(true);
UnmaterializedInfos[KV.first] = UMI;
}
@@ -817,14 +802,14 @@ JITDylib::getRequestedSymbols(const SymbolFlagsMap &SymbolFlags) const {
for (auto &KV : SymbolFlags) {
assert(Symbols.count(KV.first) && "JITDylib does not cover this symbol?");
- assert(Symbols.find(KV.first)->second.getFlags().isMaterializing() &&
- "getRequestedSymbols can only be called for materializing "
- "symbols");
+ assert(Symbols.find(KV.first)->second.isInMaterializationPhase() &&
+ "getRequestedSymbols can only be called for symbols that have "
+ "started materializing");
auto I = MaterializingInfos.find(KV.first);
if (I == MaterializingInfos.end())
continue;
- if (!I->second.PendingQueries.empty())
+ if (I->second.hasQueriesPending())
RequestedSymbols.insert(KV.first);
}
@@ -835,9 +820,8 @@ JITDylib::getRequestedSymbols(const SymbolFlagsMap &SymbolFlags) const {
void JITDylib::addDependencies(const SymbolStringPtr &Name,
const SymbolDependenceMap &Dependencies) {
assert(Symbols.count(Name) && "Name not in symbol table");
- assert((Symbols[Name].getFlags().isLazy() ||
- Symbols[Name].getFlags().isMaterializing()) &&
- "Symbol is not lazy or materializing");
+ assert(Symbols[Name].isInMaterializationPhase() &&
+ "Can not add dependencies for a symbol that is not materializing");
auto &MI = MaterializingInfos[Name];
assert(!MI.IsEmitted && "Can not add dependencies to an emitted symbol");
@@ -852,9 +836,8 @@ void JITDylib::addDependencies(const SymbolStringPtr &Name,
// Assert that this symbol exists and has not been emitted already.
auto SymI = OtherJITDylib.Symbols.find(OtherSymbol);
assert(SymI != OtherJITDylib.Symbols.end() &&
- (SymI->second.getFlags().isLazy() ||
- SymI->second.getFlags().isMaterializing()) &&
- "Dependency on emitted symbol");
+ (SymI->second.getState() != SymbolState::Ready &&
+ "Dependency on emitted symbol"));
#endif
auto &OtherMI = OtherJITDylib.MaterializingInfos[OtherSymbol];
@@ -873,54 +856,52 @@ void JITDylib::addDependencies(const SymbolStringPtr &Name,
}
void JITDylib::resolve(const SymbolMap &Resolved) {
- auto FullyResolvedQueries = ES.runSessionLocked([&, this]() {
- AsynchronousSymbolQuerySet FullyResolvedQueries;
+ auto CompletedQueries = ES.runSessionLocked([&, this]() {
+ AsynchronousSymbolQuerySet CompletedQueries;
for (const auto &KV : Resolved) {
auto &Name = KV.first;
auto Sym = KV.second;
- assert(!Sym.getFlags().isLazy() && !Sym.getFlags().isMaterializing() &&
- "Materializing flags should be managed internally");
-
auto I = Symbols.find(Name);
assert(I != Symbols.end() && "Symbol not found");
- assert(!I->second.getFlags().isLazy() &&
- I->second.getFlags().isMaterializing() &&
+ assert(!I->second.hasMaterializerAttached() &&
+ "Resolving symbol with materializer attached?");
+ assert(I->second.getState() == SymbolState::Materializing &&
"Symbol should be materializing");
assert(I->second.getAddress() == 0 && "Symbol has already been resolved");
assert((Sym.getFlags() & ~JITSymbolFlags::Weak) ==
- (JITSymbolFlags::stripTransientFlags(I->second.getFlags()) &
- ~JITSymbolFlags::Weak) &&
+ (I->second.getFlags() & ~JITSymbolFlags::Weak) &&
"Resolved flags should match the declared flags");
// Once resolved, symbols can never be weak.
JITSymbolFlags ResolvedFlags = Sym.getFlags();
ResolvedFlags &= ~JITSymbolFlags::Weak;
- ResolvedFlags |= JITSymbolFlags::Materializing;
- I->second = JITEvaluatedSymbol(Sym.getAddress(), ResolvedFlags);
+ I->second.setAddress(Sym.getAddress());
+ I->second.setFlags(ResolvedFlags);
+ I->second.setState(SymbolState::Resolved);
auto &MI = MaterializingInfos[Name];
- for (auto &Q : MI.PendingQueries) {
- Q->resolve(Name, Sym);
- if (Q->isFullyResolved())
- FullyResolvedQueries.insert(Q);
+ for (auto &Q : MI.takeQueriesMeeting(SymbolState::Resolved)) {
+ Q->notifySymbolMetRequiredState(Name, Sym);
+ if (Q->isComplete())
+ CompletedQueries.insert(std::move(Q));
}
}
- return FullyResolvedQueries;
+ return CompletedQueries;
});
- for (auto &Q : FullyResolvedQueries) {
- assert(Q->isFullyResolved() && "Q not fully resolved");
- Q->handleFullyResolved();
+ for (auto &Q : CompletedQueries) {
+ assert(Q->isComplete() && "Q not completed");
+ Q->handleComplete();
}
}
void JITDylib::emit(const SymbolFlagsMap &Emitted) {
- auto FullyReadyQueries = ES.runSessionLocked([&, this]() {
- AsynchronousSymbolQuerySet ReadyQueries;
+ auto CompletedQueries = ES.runSessionLocked([&, this]() {
+ AsynchronousSymbolQuerySet CompletedQueries;
for (const auto &KV : Emitted) {
const auto &Name = KV.first;
@@ -962,20 +943,22 @@ void JITDylib::emit(const SymbolFlagsMap &Emitted) {
DependantMI.UnemittedDependencies.empty()) {
assert(DependantMI.Dependants.empty() &&
"Dependants should be empty by now");
- for (auto &Q : DependantMI.PendingQueries) {
- Q->notifySymbolReady();
- if (Q->isFullyReady())
- ReadyQueries.insert(Q);
- Q->removeQueryDependence(DependantJD, DependantName);
- }
// Since this dependant is now ready, we erase its MaterializingInfo
// and update its materializing state.
- assert(DependantJD.Symbols.count(DependantName) &&
+ auto DependantSymI = DependantJD.Symbols.find(DependantName);
+ assert(DependantSymI != DependantJD.Symbols.end() &&
"Dependant has no entry in the Symbols table");
- auto &DependantSym = DependantJD.Symbols[DependantName];
- DependantSym.setFlags(DependantSym.getFlags() &
- ~JITSymbolFlags::Materializing);
+ DependantSymI->second.setState(SymbolState::Ready);
+
+ for (auto &Q : DependantMI.takeQueriesMeeting(SymbolState::Ready)) {
+ Q->notifySymbolMetRequiredState(
+ DependantName, DependantSymI->second.getSymbol());
+ if (Q->isComplete())
+ CompletedQueries.insert(Q);
+ Q->removeQueryDependence(DependantJD, DependantName);
+ }
+
DependantJD.MaterializingInfos.erase(DependantMII);
}
}
@@ -984,26 +967,25 @@ void JITDylib::emit(const SymbolFlagsMap &Emitted) {
MI.IsEmitted = true;
if (MI.UnemittedDependencies.empty()) {
- for (auto &Q : MI.PendingQueries) {
- Q->notifySymbolReady();
- if (Q->isFullyReady())
- ReadyQueries.insert(Q);
+ auto SymI = Symbols.find(Name);
+ assert(SymI != Symbols.end() && "Symbol has no entry in Symbols table");
+ SymI->second.setState(SymbolState::Ready);
+ for (auto &Q : MI.takeQueriesMeeting(SymbolState::Ready)) {
+ Q->notifySymbolMetRequiredState(Name, SymI->second.getSymbol());
+ if (Q->isComplete())
+ CompletedQueries.insert(Q);
Q->removeQueryDependence(*this, Name);
}
- assert(Symbols.count(Name) &&
- "Symbol has no entry in the Symbols table");
- auto &Sym = Symbols[Name];
- Sym.setFlags(Sym.getFlags() & ~JITSymbolFlags::Materializing);
MaterializingInfos.erase(MII);
}
}
- return ReadyQueries;
+ return CompletedQueries;
});
- for (auto &Q : FullyReadyQueries) {
- assert(Q->isFullyReady() && "Q is not fully ready");
- Q->handleFullyReady();
+ for (auto &Q : CompletedQueries) {
+ assert(Q->isComplete() && "Q is not complete");
+ Q->handleComplete();
}
}
@@ -1013,6 +995,7 @@ void JITDylib::notifyFailed(const SymbolNameSet &FailedSymbols) {
auto FailedQueriesToNotify = ES.runSessionLocked([&, this]() {
AsynchronousSymbolQuerySet FailedQueries;
+ std::vector<MaterializingInfosMap::iterator> MIIsToRemove;
for (auto &Name : FailedSymbols) {
auto I = Symbols.find(Name);
@@ -1026,17 +1009,40 @@ void JITDylib::notifyFailed(const SymbolNameSet &FailedSymbols) {
if (MII == MaterializingInfos.end())
continue;
+ // Remove this symbol from the dependants list of any dependencies.
+ for (auto &KV : MII->second.UnemittedDependencies) {
+ auto *DependencyJD = KV.first;
+ auto &Dependencies = KV.second;
+ for (auto &DependencyName : Dependencies) {
+ auto DependencyMII =
+ DependencyJD->MaterializingInfos.find(DependencyName);
+ assert(DependencyMII != DependencyJD->MaterializingInfos.end() &&
+ "Unemitted dependency must have a MaterializingInfo entry");
+ assert(DependencyMII->second.Dependants.count(this) &&
+ "Dependency's dependants list does not contain this JITDylib");
+ assert(DependencyMII->second.Dependants[this].count(Name) &&
+ "Dependency's dependants list does not contain dependant");
+ DependencyMII->second.Dependants[this].erase(Name);
+ }
+ }
+
// Copy all the queries to the FailedQueries list, then abandon them.
// This has to be a copy, and the copy has to come before the abandon
// operation: Each Q.detach() call will reach back into this
// PendingQueries list to remove Q.
- for (auto &Q : MII->second.PendingQueries)
+ for (auto &Q : MII->second.pendingQueries())
FailedQueries.insert(Q);
- for (auto &Q : FailedQueries)
- Q->detach();
+ MIIsToRemove.push_back(std::move(MII));
+ }
+
+ // Detach failed queries.
+ for (auto &Q : FailedQueries)
+ Q->detach();
- assert(MII->second.PendingQueries.empty() &&
+ // Remove the MaterializingInfos.
+ for (auto &MII : MIIsToRemove) {
+ assert(!MII->second.hasQueriesPending() &&
"Queries remain after symbol was failed");
MaterializingInfos.erase(MII);
@@ -1052,9 +1058,11 @@ void JITDylib::notifyFailed(const SymbolNameSet &FailedSymbols) {
void JITDylib::setSearchOrder(JITDylibSearchList NewSearchOrder,
bool SearchThisJITDylibFirst,
bool MatchNonExportedInThisDylib) {
- if (SearchThisJITDylibFirst && NewSearchOrder.front().first != this)
- NewSearchOrder.insert(NewSearchOrder.begin(),
- {this, MatchNonExportedInThisDylib});
+ if (SearchThisJITDylibFirst) {
+ if (NewSearchOrder.empty() || NewSearchOrder.front().first != this)
+ NewSearchOrder.insert(NewSearchOrder.begin(),
+ {this, MatchNonExportedInThisDylib});
+ }
ES.runSessionLocked([&]() { SearchOrder = std::move(NewSearchOrder); });
}
@@ -1092,7 +1100,7 @@ void JITDylib::removeFromSearchOrder(JITDylib &JD) {
Error JITDylib::remove(const SymbolNameSet &Names) {
return ES.runSessionLocked([&]() -> Error {
using SymbolMaterializerItrPair =
- std::pair<SymbolMap::iterator, UnmaterializedInfosMap::iterator>;
+ std::pair<SymbolTable::iterator, UnmaterializedInfosMap::iterator>;
std::vector<SymbolMaterializerItrPair> SymbolsToRemove;
SymbolNameSet Missing;
SymbolNameSet Materializing;
@@ -1107,13 +1115,14 @@ Error JITDylib::remove(const SymbolNameSet &Names) {
}
// Note symbol materializing.
- if (I->second.getFlags().isMaterializing()) {
+ if (I->second.isInMaterializationPhase()) {
Materializing.insert(Name);
continue;
}
- auto UMII = I->second.getFlags().isLazy() ? UnmaterializedInfos.find(Name)
- : UnmaterializedInfos.end();
+ auto UMII = I->second.hasMaterializerAttached()
+ ? UnmaterializedInfos.find(Name)
+ : UnmaterializedInfos.end();
SymbolsToRemove.push_back(std::make_pair(I, UMII));
}
@@ -1143,16 +1152,23 @@ Error JITDylib::remove(const SymbolNameSet &Names) {
});
}
-SymbolFlagsMap JITDylib::lookupFlags(const SymbolNameSet &Names) {
- return ES.runSessionLocked([&, this]() {
+Expected<SymbolFlagsMap> JITDylib::lookupFlags(const SymbolNameSet &Names) {
+ return ES.runSessionLocked([&, this]() -> Expected<SymbolFlagsMap> {
SymbolFlagsMap Result;
auto Unresolved = lookupFlagsImpl(Result, Names);
- if (DefGenerator && !Unresolved.empty()) {
- auto NewDefs = DefGenerator(*this, Unresolved);
- if (!NewDefs.empty()) {
- auto Unresolved2 = lookupFlagsImpl(Result, NewDefs);
+ if (!Unresolved)
+ return Unresolved.takeError();
+
+ if (DefGenerator && !Unresolved->empty()) {
+ auto NewDefs = DefGenerator(*this, *Unresolved);
+ if (!NewDefs)
+ return NewDefs.takeError();
+ if (!NewDefs->empty()) {
+ auto Unresolved2 = lookupFlagsImpl(Result, *NewDefs);
+ if (!Unresolved2)
+ return Unresolved2.takeError();
(void)Unresolved2;
- assert(Unresolved2.empty() &&
+ assert(Unresolved2->empty() &&
"All fallback defs should have been found by lookupFlagsImpl");
}
};
@@ -1160,41 +1176,42 @@ SymbolFlagsMap JITDylib::lookupFlags(const SymbolNameSet &Names) {
});
}
-SymbolNameSet JITDylib::lookupFlagsImpl(SymbolFlagsMap &Flags,
- const SymbolNameSet &Names) {
+Expected<SymbolNameSet> JITDylib::lookupFlagsImpl(SymbolFlagsMap &Flags,
+ const SymbolNameSet &Names) {
SymbolNameSet Unresolved;
for (auto &Name : Names) {
auto I = Symbols.find(Name);
-
- if (I == Symbols.end()) {
+ if (I != Symbols.end()) {
+ assert(!Flags.count(Name) && "Symbol already present in Flags map");
+ Flags[Name] = I->second.getFlags();
+ } else
Unresolved.insert(Name);
- continue;
- }
-
- assert(!Flags.count(Name) && "Symbol already present in Flags map");
- Flags[Name] = JITSymbolFlags::stripTransientFlags(I->second.getFlags());
}
return Unresolved;
}
-void JITDylib::lodgeQuery(std::shared_ptr<AsynchronousSymbolQuery> &Q,
- SymbolNameSet &Unresolved, bool MatchNonExported,
- MaterializationUnitList &MUs) {
+Error JITDylib::lodgeQuery(std::shared_ptr<AsynchronousSymbolQuery> &Q,
+ SymbolNameSet &Unresolved, bool MatchNonExported,
+ MaterializationUnitList &MUs) {
assert(Q && "Query can not be null");
lodgeQueryImpl(Q, Unresolved, MatchNonExported, MUs);
if (DefGenerator && !Unresolved.empty()) {
auto NewDefs = DefGenerator(*this, Unresolved);
- if (!NewDefs.empty()) {
- for (auto &D : NewDefs)
+ if (!NewDefs)
+ return NewDefs.takeError();
+ if (!NewDefs->empty()) {
+ for (auto &D : *NewDefs)
Unresolved.erase(D);
- lodgeQueryImpl(Q, NewDefs, MatchNonExported, MUs);
- assert(NewDefs.empty() &&
+ lodgeQueryImpl(Q, *NewDefs, MatchNonExported, MUs);
+ assert(NewDefs->empty() &&
"All fallback defs should have been found by lookupImpl");
}
}
+
+ return Error::success();
}
void JITDylib::lodgeQueryImpl(
@@ -1204,6 +1221,7 @@ void JITDylib::lodgeQueryImpl(
std::vector<SymbolStringPtr> ToRemove;
for (auto Name : Unresolved) {
+
// Search for the name in Symbols. Skip it if not found.
auto SymI = Symbols.find(Name);
if (SymI == Symbols.end())
@@ -1213,20 +1231,22 @@ void JITDylib::lodgeQueryImpl(
if (!SymI->second.getFlags().isExported() && !MatchNonExported)
continue;
- // If we matched against Name in JD, mark it to be removed from the Unresolved
- // set.
+ // If we matched against Name in JD, mark it to be removed from the
+ // Unresolved set.
ToRemove.push_back(Name);
- // If the symbol has an address then resolve it.
- if (SymI->second.getAddress() != 0)
- Q->resolve(Name, SymI->second);
+ // If this symbol already meets the required state for then notify the
+ // query and continue.
+ if (SymI->second.getState() >= Q->getRequiredState()) {
+ Q->notifySymbolMetRequiredState(Name, SymI->second.getSymbol());
+ continue;
+ }
- // If the symbol is lazy, get the MaterialiaztionUnit for it.
- if (SymI->second.getFlags().isLazy()) {
+ // Otherwise this symbol does not yet meet the required state. Check whether
+ // it has a materializer attached, and if so prepare to run it.
+ if (SymI->second.hasMaterializerAttached()) {
assert(SymI->second.getAddress() == 0 &&
- "Lazy symbol should not have a resolved address");
- assert(!SymI->second.getFlags().isMaterializing() &&
- "Materializing and lazy should not both be set");
+ "Symbol not resolved but already has address?");
auto UMII = UnmaterializedInfos.find(Name);
assert(UMII != UnmaterializedInfos.end() &&
"Lazy symbol should have UnmaterializedInfo");
@@ -1237,27 +1257,20 @@ void JITDylib::lodgeQueryImpl(
// materializing state.
for (auto &KV : MU->getSymbols()) {
auto SymK = Symbols.find(KV.first);
- auto Flags = SymK->second.getFlags();
- Flags &= ~JITSymbolFlags::Lazy;
- Flags |= JITSymbolFlags::Materializing;
- SymK->second.setFlags(Flags);
+ SymK->second.setMaterializerAttached(false);
+ SymK->second.setState(SymbolState::Materializing);
UnmaterializedInfos.erase(KV.first);
}
// Add MU to the list of MaterializationUnits to be materialized.
MUs.push_back(std::move(MU));
- } else if (!SymI->second.getFlags().isMaterializing()) {
- // The symbol is neither lazy nor materializing, so it must be
- // ready. Notify the query and continue.
- Q->notifySymbolReady();
- continue;
}
// Add the query to the PendingQueries list.
- assert(SymI->second.getFlags().isMaterializing() &&
+ assert(SymI->second.isInMaterializationPhase() &&
"By this line the symbol should be materializing");
auto &MI = MaterializingInfos[Name];
- MI.PendingQueries.push_back(Q);
+ MI.addQuery(Q);
Q->addQueryDependence(*this, Name);
}
@@ -1266,40 +1279,43 @@ void JITDylib::lodgeQueryImpl(
Unresolved.erase(Name);
}
-SymbolNameSet JITDylib::legacyLookup(std::shared_ptr<AsynchronousSymbolQuery> Q,
- SymbolNameSet Names) {
+Expected<SymbolNameSet>
+JITDylib::legacyLookup(std::shared_ptr<AsynchronousSymbolQuery> Q,
+ SymbolNameSet Names) {
assert(Q && "Query can not be null");
ES.runOutstandingMUs();
- LookupImplActionFlags ActionFlags = None;
+ bool QueryComplete = false;
std::vector<std::unique_ptr<MaterializationUnit>> MUs;
SymbolNameSet Unresolved = std::move(Names);
- ES.runSessionLocked([&, this]() {
- ActionFlags = lookupImpl(Q, MUs, Unresolved);
+ auto Err = ES.runSessionLocked([&, this]() -> Error {
+ QueryComplete = lookupImpl(Q, MUs, Unresolved);
if (DefGenerator && !Unresolved.empty()) {
- assert(ActionFlags == None &&
- "ActionFlags set but unresolved symbols remain?");
+ assert(!QueryComplete && "query complete but unresolved symbols remain?");
auto NewDefs = DefGenerator(*this, Unresolved);
- if (!NewDefs.empty()) {
- for (auto &D : NewDefs)
+ if (!NewDefs)
+ return NewDefs.takeError();
+ if (!NewDefs->empty()) {
+ for (auto &D : *NewDefs)
Unresolved.erase(D);
- ActionFlags = lookupImpl(Q, MUs, NewDefs);
- assert(NewDefs.empty() &&
+ QueryComplete = lookupImpl(Q, MUs, *NewDefs);
+ assert(NewDefs->empty() &&
"All fallback defs should have been found by lookupImpl");
}
}
+ return Error::success();
});
- assert((MUs.empty() || ActionFlags == None) &&
- "If action flags are set, there should be no work to do (so no MUs)");
+ if (Err)
+ return std::move(Err);
- if (ActionFlags & NotifyFullyResolved)
- Q->handleFullyResolved();
+ assert((MUs.empty() || !QueryComplete) &&
+ "If action flags are set, there should be no work to do (so no MUs)");
- if (ActionFlags & NotifyFullyReady)
- Q->handleFullyReady();
+ if (QueryComplete)
+ Q->handleComplete();
// FIXME: Swap back to the old code below once RuntimeDyld works with
// callbacks from asynchronous queries.
@@ -1318,13 +1334,13 @@ SymbolNameSet JITDylib::legacyLookup(std::shared_ptr<AsynchronousSymbolQuery> Q,
return Unresolved;
}
-JITDylib::LookupImplActionFlags
-JITDylib::lookupImpl(std::shared_ptr<AsynchronousSymbolQuery> &Q,
- std::vector<std::unique_ptr<MaterializationUnit>> &MUs,
- SymbolNameSet &Unresolved) {
- LookupImplActionFlags ActionFlags = None;
- std::vector<SymbolStringPtr> ToRemove;
+bool JITDylib::lookupImpl(
+ std::shared_ptr<AsynchronousSymbolQuery> &Q,
+ std::vector<std::unique_ptr<MaterializationUnit>> &MUs,
+ SymbolNameSet &Unresolved) {
+ bool QueryComplete = false;
+ std::vector<SymbolStringPtr> ToRemove;
for (auto Name : Unresolved) {
// Search for the name in Symbols. Skip it if not found.
@@ -1335,19 +1351,17 @@ JITDylib::lookupImpl(std::shared_ptr<AsynchronousSymbolQuery> &Q,
// If we found Name, mark it to be removed from the Unresolved set.
ToRemove.push_back(Name);
- // If the symbol has an address then resolve it.
- if (SymI->second.getAddress() != 0) {
- Q->resolve(Name, SymI->second);
- if (Q->isFullyResolved())
- ActionFlags |= NotifyFullyResolved;
+ if (SymI->second.getState() >= Q->getRequiredState()) {
+ Q->notifySymbolMetRequiredState(Name, SymI->second.getSymbol());
+ if (Q->isComplete())
+ QueryComplete = true;
+ continue;
}
// If the symbol is lazy, get the MaterialiaztionUnit for it.
- if (SymI->second.getFlags().isLazy()) {
+ if (SymI->second.hasMaterializerAttached()) {
assert(SymI->second.getAddress() == 0 &&
"Lazy symbol should not have a resolved address");
- assert(!SymI->second.getFlags().isMaterializing() &&
- "Materializing and lazy should not both be set");
auto UMII = UnmaterializedInfos.find(Name);
assert(UMII != UnmaterializedInfos.end() &&
"Lazy symbol should have UnmaterializedInfo");
@@ -1358,29 +1372,21 @@ JITDylib::lookupImpl(std::shared_ptr<AsynchronousSymbolQuery> &Q,
// materializing state.
for (auto &KV : MU->getSymbols()) {
auto SymK = Symbols.find(KV.first);
- auto Flags = SymK->second.getFlags();
- Flags &= ~JITSymbolFlags::Lazy;
- Flags |= JITSymbolFlags::Materializing;
- SymK->second.setFlags(Flags);
+ assert(SymK != Symbols.end() && "Missing symbol table entry");
+ SymK->second.setState(SymbolState::Materializing);
+ SymK->second.setMaterializerAttached(false);
UnmaterializedInfos.erase(KV.first);
}
// Add MU to the list of MaterializationUnits to be materialized.
MUs.push_back(std::move(MU));
- } else if (!SymI->second.getFlags().isMaterializing()) {
- // The symbol is neither lazy nor materializing, so it must be ready.
- // Notify the query and continue.
- Q->notifySymbolReady();
- if (Q->isFullyReady())
- ActionFlags |= NotifyFullyReady;
- continue;
}
// Add the query to the PendingQueries list.
- assert(SymI->second.getFlags().isMaterializing() &&
+ assert(SymI->second.isInMaterializationPhase() &&
"By this line the symbol should be materializing");
auto &MI = MaterializingInfos[Name];
- MI.PendingQueries.push_back(Q);
+ MI.addQuery(Q);
Q->addQueryDependence(*this, Name);
}
@@ -1388,7 +1394,7 @@ JITDylib::lookupImpl(std::shared_ptr<AsynchronousSymbolQuery> &Q,
for (auto &Name : ToRemove)
Unresolved.erase(Name);
- return ActionFlags;
+ return QueryComplete;
}
void JITDylib::dump(raw_ostream &OS) {
@@ -1405,21 +1411,19 @@ void JITDylib::dump(raw_ostream &OS) {
for (auto &KV : Symbols) {
OS << " \"" << *KV.first << "\": ";
if (auto Addr = KV.second.getAddress())
- OS << format("0x%016" PRIx64, Addr) << ", " << KV.second.getFlags();
+ OS << format("0x%016" PRIx64, Addr) << ", " << KV.second.getFlags()
+ << " ";
else
- OS << "<not resolved>";
- if (KV.second.getFlags().isLazy() ||
- KV.second.getFlags().isMaterializing()) {
- OS << " (";
- if (KV.second.getFlags().isLazy()) {
- auto I = UnmaterializedInfos.find(KV.first);
- assert(I != UnmaterializedInfos.end() &&
- "Lazy symbol should have UnmaterializedInfo");
- OS << " Lazy (MU=" << I->second->MU.get() << ")";
- }
- if (KV.second.getFlags().isMaterializing())
- OS << " Materializing";
- OS << ", " << KV.second.getFlags() << " )\n";
+ OS << "<not resolved> ";
+
+ OS << KV.second.getState();
+
+ if (KV.second.hasMaterializerAttached()) {
+ OS << " (Materializer ";
+ auto I = UnmaterializedInfos.find(KV.first);
+ assert(I != UnmaterializedInfos.end() &&
+ "Lazy symbol should have UnmaterializedInfo");
+ OS << I->second->MU.get() << ")\n";
} else
OS << "\n";
}
@@ -1430,10 +1434,10 @@ void JITDylib::dump(raw_ostream &OS) {
OS << " \"" << *KV.first << "\":\n"
<< " IsEmitted = " << (KV.second.IsEmitted ? "true" : "false")
<< "\n"
- << " " << KV.second.PendingQueries.size()
+ << " " << KV.second.pendingQueries().size()
<< " pending queries: { ";
- for (auto &Q : KV.second.PendingQueries)
- OS << Q.get() << " ";
+ for (const auto &Q : KV.second.pendingQueries())
+ OS << Q.get() << " (" << Q->getRequiredState() << ") ";
OS << "}\n Dependants:\n";
for (auto &KV2 : KV.second.Dependants)
OS << " " << KV2.first->getName() << ": " << KV2.second << "\n";
@@ -1444,6 +1448,51 @@ void JITDylib::dump(raw_ostream &OS) {
});
}
+void JITDylib::MaterializingInfo::addQuery(
+ std::shared_ptr<AsynchronousSymbolQuery> Q) {
+
+ auto I = std::lower_bound(
+ PendingQueries.rbegin(), PendingQueries.rend(), Q->getRequiredState(),
+ [](const std::shared_ptr<AsynchronousSymbolQuery> &V, SymbolState S) {
+ return V->getRequiredState() <= S;
+ });
+ PendingQueries.insert(I.base(), std::move(Q));
+}
+
+void JITDylib::MaterializingInfo::removeQuery(
+ const AsynchronousSymbolQuery &Q) {
+ // FIXME: Implement 'find_as' for shared_ptr<T>/T*.
+ auto I =
+ std::find_if(PendingQueries.begin(), PendingQueries.end(),
+ [&Q](const std::shared_ptr<AsynchronousSymbolQuery> &V) {
+ return V.get() == &Q;
+ });
+ assert(I != PendingQueries.end() &&
+ "Query is not attached to this MaterializingInfo");
+ PendingQueries.erase(I);
+}
+
+JITDylib::AsynchronousSymbolQueryList
+JITDylib::MaterializingInfo::takeQueriesMeeting(SymbolState RequiredState) {
+ AsynchronousSymbolQueryList Result;
+ while (!PendingQueries.empty()) {
+ if (PendingQueries.back()->getRequiredState() > RequiredState)
+ break;
+
+ Result.push_back(std::move(PendingQueries.back()));
+ PendingQueries.pop_back();
+ }
+
+ return Result;
+}
+
+JITDylib::AsynchronousSymbolQueryList
+JITDylib::MaterializingInfo::takeAllQueries() {
+ AsynchronousSymbolQueryList Result;
+ std::swap(Result, PendingQueries);
+ return Result;
+}
+
JITDylib::JITDylib(ExecutionSession &ES, std::string Name)
: ES(ES), JITDylibName(std::move(Name)) {
SearchOrder.push_back({this, true});
@@ -1451,77 +1500,52 @@ JITDylib::JITDylib(ExecutionSession &ES, std::string Name)
Error JITDylib::defineImpl(MaterializationUnit &MU) {
SymbolNameSet Duplicates;
- SymbolNameSet MUDefsOverridden;
-
- struct ExistingDefOverriddenEntry {
- SymbolMap::iterator ExistingDefItr;
- JITSymbolFlags NewFlags;
- };
- std::vector<ExistingDefOverriddenEntry> ExistingDefsOverridden;
-
- for (auto &KV : MU.getSymbols()) {
- assert(!KV.second.isLazy() && "Lazy flag should be managed internally.");
- assert(!KV.second.isMaterializing() &&
- "Materializing flags should be managed internally.");
+ std::vector<SymbolStringPtr> ExistingDefsOverridden;
+ std::vector<SymbolStringPtr> MUDefsOverridden;
- SymbolMap::iterator EntryItr;
- bool Added;
+ for (const auto &KV : MU.getSymbols()) {
+ auto I = Symbols.find(KV.first);
- auto NewFlags = KV.second;
- NewFlags |= JITSymbolFlags::Lazy;
-
- std::tie(EntryItr, Added) = Symbols.insert(
- std::make_pair(KV.first, JITEvaluatedSymbol(0, NewFlags)));
-
- if (!Added) {
+ if (I != Symbols.end()) {
if (KV.second.isStrong()) {
- if (EntryItr->second.getFlags().isStrong() ||
- (EntryItr->second.getFlags() & JITSymbolFlags::Materializing))
+ if (I->second.getFlags().isStrong() ||
+ I->second.getState() > SymbolState::NeverSearched)
Duplicates.insert(KV.first);
- else
- ExistingDefsOverridden.push_back({EntryItr, NewFlags});
+ else {
+ assert(I->second.getState() == SymbolState::NeverSearched &&
+ "Overridden existing def should be in the never-searched "
+ "state");
+ ExistingDefsOverridden.push_back(KV.first);
+ }
} else
- MUDefsOverridden.insert(KV.first);
+ MUDefsOverridden.push_back(KV.first);
}
}
- if (!Duplicates.empty()) {
- // We need to remove the symbols we added.
- for (auto &KV : MU.getSymbols()) {
- if (Duplicates.count(KV.first))
- continue;
-
- bool Found = false;
- for (const auto &EDO : ExistingDefsOverridden)
- if (EDO.ExistingDefItr->first == KV.first)
- Found = true;
-
- if (!Found)
- Symbols.erase(KV.first);
- }
-
- // FIXME: Return all duplicates.
+ // If there were any duplicate definitions then bail out.
+ if (!Duplicates.empty())
return make_error<DuplicateDefinition>(**Duplicates.begin());
- }
- // Update flags on existing defs and call discard on their materializers.
- for (auto &EDO : ExistingDefsOverridden) {
- assert(EDO.ExistingDefItr->second.getFlags().isLazy() &&
- !EDO.ExistingDefItr->second.getFlags().isMaterializing() &&
- "Overridden existing def should be in the Lazy state");
+ // Discard any overridden defs in this MU.
+ for (auto &S : MUDefsOverridden)
+ MU.doDiscard(*this, S);
- EDO.ExistingDefItr->second.setFlags(EDO.NewFlags);
+ // Discard existing overridden defs.
+ for (auto &S : ExistingDefsOverridden) {
- auto UMII = UnmaterializedInfos.find(EDO.ExistingDefItr->first);
+ auto UMII = UnmaterializedInfos.find(S);
assert(UMII != UnmaterializedInfos.end() &&
"Overridden existing def should have an UnmaterializedInfo");
-
- UMII->second->MU->doDiscard(*this, EDO.ExistingDefItr->first);
+ UMII->second->MU->doDiscard(*this, S);
}
- // Discard overridden symbols povided by MU.
- for (auto &Sym : MUDefsOverridden)
- MU.doDiscard(*this, Sym);
+ // Finally, add the defs from this MU.
+ for (auto &KV : MU.getSymbols()) {
+ auto &SymEntry = Symbols[KV.first];
+ SymEntry.setFlags(KV.second);
+ SymEntry.setState(SymbolState::NeverSearched);
+ SymEntry.setMaterializerAttached(true);
+ }
return Error::success();
}
@@ -1532,17 +1556,7 @@ void JITDylib::detachQueryHelper(AsynchronousSymbolQuery &Q,
assert(MaterializingInfos.count(QuerySymbol) &&
"QuerySymbol does not have MaterializingInfo");
auto &MI = MaterializingInfos[QuerySymbol];
-
- auto IdenticalQuery =
- [&](const std::shared_ptr<AsynchronousSymbolQuery> &R) {
- return R.get() == &Q;
- };
-
- auto I = std::find_if(MI.PendingQueries.begin(), MI.PendingQueries.end(),
- IdenticalQuery);
- assert(I != MI.PendingQueries.end() &&
- "Query Q should be in the PendingQueries list for QuerySymbol");
- MI.PendingQueries.erase(I);
+ MI.removeQuery(Q);
}
}
@@ -1582,8 +1596,18 @@ JITDylib &ExecutionSession::getMainJITDylib() {
return runSessionLocked([this]() -> JITDylib & { return *JDs.front(); });
}
+JITDylib *ExecutionSession::getJITDylibByName(StringRef Name) {
+ return runSessionLocked([&, this]() -> JITDylib * {
+ for (auto &JD : JDs)
+ if (JD->getName() == Name)
+ return JD.get();
+ return nullptr;
+ });
+}
+
JITDylib &ExecutionSession::createJITDylib(std::string Name,
bool AddToMainDylibSearchOrder) {
+ assert(!getJITDylibByName(Name) && "JITDylib with that name already exists");
return runSessionLocked([&, this]() -> JITDylib & {
JDs.push_back(
std::unique_ptr<JITDylib>(new JITDylib(*this, std::move(Name))));
@@ -1610,74 +1634,36 @@ void ExecutionSession::legacyFailQuery(AsynchronousSymbolQuery &Q, Error Err) {
Expected<SymbolMap> ExecutionSession::legacyLookup(
LegacyAsyncLookupFunction AsyncLookup, SymbolNameSet Names,
- bool WaitUntilReady, RegisterDependenciesFunction RegisterDependencies) {
+ SymbolState RequiredState,
+ RegisterDependenciesFunction RegisterDependencies) {
#if LLVM_ENABLE_THREADS
// In the threaded case we use promises to return the results.
std::promise<SymbolMap> PromisedResult;
- std::mutex ErrMutex;
Error ResolutionError = Error::success();
- std::promise<void> PromisedReady;
- Error ReadyError = Error::success();
- auto OnResolve = [&](Expected<SymbolMap> R) {
+ auto NotifyComplete = [&](Expected<SymbolMap> R) {
if (R)
PromisedResult.set_value(std::move(*R));
else {
- {
- ErrorAsOutParameter _(&ResolutionError);
- std::lock_guard<std::mutex> Lock(ErrMutex);
- ResolutionError = R.takeError();
- }
+ ErrorAsOutParameter _(&ResolutionError);
+ ResolutionError = R.takeError();
PromisedResult.set_value(SymbolMap());
}
};
-
- std::function<void(Error)> OnReady;
- if (WaitUntilReady) {
- OnReady = [&](Error Err) {
- if (Err) {
- ErrorAsOutParameter _(&ReadyError);
- std::lock_guard<std::mutex> Lock(ErrMutex);
- ReadyError = std::move(Err);
- }
- PromisedReady.set_value();
- };
- } else {
- OnReady = [&](Error Err) {
- if (Err)
- reportError(std::move(Err));
- };
- }
-
#else
SymbolMap Result;
Error ResolutionError = Error::success();
- Error ReadyError = Error::success();
- auto OnResolve = [&](Expected<SymbolMap> R) {
+ auto NotifyComplete = [&](Expected<SymbolMap> R) {
ErrorAsOutParameter _(&ResolutionError);
if (R)
Result = std::move(*R);
else
ResolutionError = R.takeError();
};
-
- std::function<void(Error)> OnReady;
- if (WaitUntilReady) {
- OnReady = [&](Error Err) {
- ErrorAsOutParameter _(&ReadyError);
- if (Err)
- ReadyError = std::move(Err);
- };
- } else {
- OnReady = [&](Error Err) {
- if (Err)
- reportError(std::move(Err));
- };
- }
#endif
auto Query = std::make_shared<AsynchronousSymbolQuery>(
- Names, std::move(OnResolve), std::move(OnReady));
+ Names, RequiredState, std::move(NotifyComplete));
// FIXME: This should be run session locked along with the registration code
// and error reporting below.
SymbolNameSet UnresolvedSymbols = AsyncLookup(Query, std::move(Names));
@@ -1701,39 +1687,13 @@ Expected<SymbolMap> ExecutionSession::legacyLookup(
#if LLVM_ENABLE_THREADS
auto ResultFuture = PromisedResult.get_future();
auto Result = ResultFuture.get();
-
- {
- std::lock_guard<std::mutex> Lock(ErrMutex);
- if (ResolutionError) {
- // ReadyError will never be assigned. Consume the success value.
- cantFail(std::move(ReadyError));
- return std::move(ResolutionError);
- }
- }
-
- if (WaitUntilReady) {
- auto ReadyFuture = PromisedReady.get_future();
- ReadyFuture.get();
-
- {
- std::lock_guard<std::mutex> Lock(ErrMutex);
- if (ReadyError)
- return std::move(ReadyError);
- }
- } else
- cantFail(std::move(ReadyError));
-
+ if (ResolutionError)
+ return std::move(ResolutionError);
return std::move(Result);
#else
- if (ResolutionError) {
- // ReadyError will never be assigned. Consume the success value.
- cantFail(std::move(ReadyError));
+ if (ResolutionError)
return std::move(ResolutionError);
- }
-
- if (ReadyError)
- return std::move(ReadyError);
return Result;
#endif
@@ -1741,9 +1701,16 @@ Expected<SymbolMap> ExecutionSession::legacyLookup(
void ExecutionSession::lookup(
const JITDylibSearchList &SearchOrder, SymbolNameSet Symbols,
- SymbolsResolvedCallback OnResolve, SymbolsReadyCallback OnReady,
+ SymbolState RequiredState, SymbolsResolvedCallback NotifyComplete,
RegisterDependenciesFunction RegisterDependencies) {
+ LLVM_DEBUG({
+ runSessionLocked([&]() {
+ dbgs() << "Looking up " << Symbols << " in " << SearchOrder
+ << " (required state: " << RequiredState << ")\n";
+ });
+ });
+
// lookup can be re-entered recursively if running on a single thread. Run any
// outstanding MUs in case this query depends on them, otherwise this lookup
// will starve waiting for a result from an MU that is stuck in the queue.
@@ -1751,38 +1718,32 @@ void ExecutionSession::lookup(
auto Unresolved = std::move(Symbols);
std::map<JITDylib *, MaterializationUnitList> CollectedMUsMap;
- auto Q = std::make_shared<AsynchronousSymbolQuery>(
- Unresolved, std::move(OnResolve), std::move(OnReady));
- bool QueryIsFullyResolved = false;
- bool QueryIsFullyReady = false;
- bool QueryFailed = false;
-
- runSessionLocked([&]() {
- for (auto &KV : SearchOrder) {
- assert(KV.first && "JITDylibList entries must not be null");
- assert(!CollectedMUsMap.count(KV.first) &&
- "JITDylibList should not contain duplicate entries");
-
- auto &JD = *KV.first;
- auto MatchNonExported = KV.second;
- JD.lodgeQuery(Q, Unresolved, MatchNonExported, CollectedMUsMap[&JD]);
- }
+ auto Q = std::make_shared<AsynchronousSymbolQuery>(Unresolved, RequiredState,
+ std::move(NotifyComplete));
+ bool QueryComplete = false;
+
+ auto LodgingErr = runSessionLocked([&]() -> Error {
+ auto LodgeQuery = [&]() -> Error {
+ for (auto &KV : SearchOrder) {
+ assert(KV.first && "JITDylibList entries must not be null");
+ assert(!CollectedMUsMap.count(KV.first) &&
+ "JITDylibList should not contain duplicate entries");
+
+ auto &JD = *KV.first;
+ auto MatchNonExported = KV.second;
+ if (auto Err = JD.lodgeQuery(Q, Unresolved, MatchNonExported,
+ CollectedMUsMap[&JD]))
+ return Err;
+ }
- if (Unresolved.empty()) {
- // Query lodged successfully.
+ if (!Unresolved.empty())
+ return make_error<SymbolsNotFound>(std::move(Unresolved));
- // Record whether this query is fully ready / resolved. We will use
- // this to call handleFullyResolved/handleFullyReady outside the session
- // lock.
- QueryIsFullyResolved = Q->isFullyResolved();
- QueryIsFullyReady = Q->isFullyReady();
+ return Error::success();
+ };
- // Call the register dependencies function.
- if (RegisterDependencies && !Q->QueryRegistrations.empty())
- RegisterDependencies(Q->QueryRegistrations);
- } else {
- // Query failed due to unresolved symbols.
- QueryFailed = true;
+ if (auto Err = LodgeQuery()) {
+ // Query failed.
// Disconnect the query from its dependencies.
Q->detach();
@@ -1791,19 +1752,32 @@ void ExecutionSession::lookup(
for (auto &KV : CollectedMUsMap)
for (auto &MU : KV.second)
KV.first->replace(std::move(MU));
+
+ return Err;
}
+
+ // Query lodged successfully.
+
+ // Record whether this query is fully ready / resolved. We will use
+ // this to call handleFullyResolved/handleFullyReady outside the session
+ // lock.
+ QueryComplete = Q->isComplete();
+
+ // Call the register dependencies function.
+ if (RegisterDependencies && !Q->QueryRegistrations.empty())
+ RegisterDependencies(Q->QueryRegistrations);
+
+ return Error::success();
});
- if (QueryFailed) {
- Q->handleFailed(make_error<SymbolsNotFound>(std::move(Unresolved)));
+ if (LodgingErr) {
+ Q->handleFailed(std::move(LodgingErr));
return;
- } else {
- if (QueryIsFullyResolved)
- Q->handleFullyResolved();
- if (QueryIsFullyReady)
- Q->handleFullyReady();
}
+ if (QueryComplete)
+ Q->handleComplete();
+
// Move the MUs to the OutstandingMUs list, then materialize.
{
std::lock_guard<std::recursive_mutex> Lock(OutstandingMUsMutex);
@@ -1816,113 +1790,55 @@ void ExecutionSession::lookup(
runOutstandingMUs();
}
-Expected<SymbolMap> ExecutionSession::lookup(
- const JITDylibSearchList &SearchOrder, const SymbolNameSet &Symbols,
- RegisterDependenciesFunction RegisterDependencies, bool WaitUntilReady) {
+Expected<SymbolMap>
+ExecutionSession::lookup(const JITDylibSearchList &SearchOrder,
+ const SymbolNameSet &Symbols,
+ SymbolState RequiredState,
+ RegisterDependenciesFunction RegisterDependencies) {
#if LLVM_ENABLE_THREADS
// In the threaded case we use promises to return the results.
std::promise<SymbolMap> PromisedResult;
- std::mutex ErrMutex;
Error ResolutionError = Error::success();
- std::promise<void> PromisedReady;
- Error ReadyError = Error::success();
- auto OnResolve = [&](Expected<SymbolMap> R) {
+
+ auto NotifyComplete = [&](Expected<SymbolMap> R) {
if (R)
PromisedResult.set_value(std::move(*R));
else {
- {
- ErrorAsOutParameter _(&ResolutionError);
- std::lock_guard<std::mutex> Lock(ErrMutex);
- ResolutionError = R.takeError();
- }
+ ErrorAsOutParameter _(&ResolutionError);
+ ResolutionError = R.takeError();
PromisedResult.set_value(SymbolMap());
}
};
- std::function<void(Error)> OnReady;
- if (WaitUntilReady) {
- OnReady = [&](Error Err) {
- if (Err) {
- ErrorAsOutParameter _(&ReadyError);
- std::lock_guard<std::mutex> Lock(ErrMutex);
- ReadyError = std::move(Err);
- }
- PromisedReady.set_value();
- };
- } else {
- OnReady = [&](Error Err) {
- if (Err)
- reportError(std::move(Err));
- };
- }
-
#else
SymbolMap Result;
Error ResolutionError = Error::success();
- Error ReadyError = Error::success();
- auto OnResolve = [&](Expected<SymbolMap> R) {
+ auto NotifyComplete = [&](Expected<SymbolMap> R) {
ErrorAsOutParameter _(&ResolutionError);
if (R)
Result = std::move(*R);
else
ResolutionError = R.takeError();
};
-
- std::function<void(Error)> OnReady;
- if (WaitUntilReady) {
- OnReady = [&](Error Err) {
- ErrorAsOutParameter _(&ReadyError);
- if (Err)
- ReadyError = std::move(Err);
- };
- } else {
- OnReady = [&](Error Err) {
- if (Err)
- reportError(std::move(Err));
- };
- }
#endif
// Perform the asynchronous lookup.
- lookup(SearchOrder, Symbols, OnResolve, OnReady, RegisterDependencies);
+ lookup(SearchOrder, Symbols, RequiredState, NotifyComplete,
+ RegisterDependencies);
#if LLVM_ENABLE_THREADS
auto ResultFuture = PromisedResult.get_future();
auto Result = ResultFuture.get();
- {
- std::lock_guard<std::mutex> Lock(ErrMutex);
- if (ResolutionError) {
- // ReadyError will never be assigned. Consume the success value.
- cantFail(std::move(ReadyError));
- return std::move(ResolutionError);
- }
- }
-
- if (WaitUntilReady) {
- auto ReadyFuture = PromisedReady.get_future();
- ReadyFuture.get();
-
- {
- std::lock_guard<std::mutex> Lock(ErrMutex);
- if (ReadyError)
- return std::move(ReadyError);
- }
- } else
- cantFail(std::move(ReadyError));
+ if (ResolutionError)
+ return std::move(ResolutionError);
return std::move(Result);
#else
- if (ResolutionError) {
- // ReadyError will never be assigned. Consume the success value.
- cantFail(std::move(ReadyError));
+ if (ResolutionError)
return std::move(ResolutionError);
- }
-
- if (ReadyError)
- return std::move(ReadyError);
return Result;
#endif
@@ -1933,8 +1849,8 @@ ExecutionSession::lookup(const JITDylibSearchList &SearchOrder,
SymbolStringPtr Name) {
SymbolNameSet Names({Name});
- if (auto ResultMap = lookup(SearchOrder, std::move(Names),
- NoDependenciesToRegister, true)) {
+ if (auto ResultMap = lookup(SearchOrder, std::move(Names), SymbolState::Ready,
+ NoDependenciesToRegister)) {
assert(ResultMap->size() == 1 && "Unexpected number of results");
assert(ResultMap->count(Name) && "Missing result for symbol");
return std::move(ResultMap->begin()->second);
diff --git a/lib/ExecutionEngine/Orc/ExecutionUtils.cpp b/lib/ExecutionEngine/Orc/ExecutionUtils.cpp
index 7c3c50b4d6e5..f7fc5f8f1797 100644
--- a/lib/ExecutionEngine/Orc/ExecutionUtils.cpp
+++ b/lib/ExecutionEngine/Orc/ExecutionUtils.cpp
@@ -1,9 +1,8 @@
//===---- ExecutionUtils.cpp - Utilities for executing functions in Orc ---===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -130,8 +129,7 @@ Error CtorDtorRunner::run() {
auto &ES = JD.getExecutionSession();
if (auto CtorDtorMap =
- ES.lookup(JITDylibSearchList({{&JD, true}}), std::move(Names),
- NoDependenciesToRegister, true)) {
+ ES.lookup(JITDylibSearchList({{&JD, true}}), std::move(Names))) {
for (auto &KV : CtorDtorsByPriority) {
for (auto &Name : KV.second) {
assert(CtorDtorMap->count(Name) && "No entry for Name");
@@ -140,13 +138,10 @@ Error CtorDtorRunner::run() {
CtorDtor();
}
}
+ CtorDtorsByPriority.clear();
return Error::success();
} else
return CtorDtorMap.takeError();
-
- CtorDtorsByPriority.clear();
-
- return Error::success();
}
void LocalCXXRuntimeOverridesBase::runDestructors() {
@@ -179,22 +174,24 @@ Error LocalCXXRuntimeOverrides::enable(JITDylib &JD,
}
DynamicLibrarySearchGenerator::DynamicLibrarySearchGenerator(
- sys::DynamicLibrary Dylib, const DataLayout &DL, SymbolPredicate Allow)
+ sys::DynamicLibrary Dylib, char GlobalPrefix, SymbolPredicate Allow)
: Dylib(std::move(Dylib)), Allow(std::move(Allow)),
- GlobalPrefix(DL.getGlobalPrefix()) {}
+ GlobalPrefix(GlobalPrefix) {}
Expected<DynamicLibrarySearchGenerator>
-DynamicLibrarySearchGenerator::Load(const char *FileName, const DataLayout &DL,
+DynamicLibrarySearchGenerator::Load(const char *FileName, char GlobalPrefix,
SymbolPredicate Allow) {
std::string ErrMsg;
auto Lib = sys::DynamicLibrary::getPermanentLibrary(FileName, &ErrMsg);
if (!Lib.isValid())
return make_error<StringError>(std::move(ErrMsg), inconvertibleErrorCode());
- return DynamicLibrarySearchGenerator(std::move(Lib), DL, std::move(Allow));
+ return DynamicLibrarySearchGenerator(std::move(Lib), GlobalPrefix,
+ std::move(Allow));
}
-SymbolNameSet DynamicLibrarySearchGenerator::
-operator()(JITDylib &JD, const SymbolNameSet &Names) {
+Expected<SymbolNameSet>
+DynamicLibrarySearchGenerator::operator()(JITDylib &JD,
+ const SymbolNameSet &Names) {
orc::SymbolNameSet Added;
orc::SymbolMap NewSymbols;
@@ -210,7 +207,8 @@ operator()(JITDylib &JD, const SymbolNameSet &Names) {
if (HasGlobalPrefix && (*Name).front() != GlobalPrefix)
continue;
- std::string Tmp((*Name).data() + (HasGlobalPrefix ? 1 : 0), (*Name).size());
+ std::string Tmp((*Name).data() + HasGlobalPrefix,
+ (*Name).size() - HasGlobalPrefix);
if (void *Addr = Dylib.getAddressOfSymbol(Tmp.c_str())) {
Added.insert(Name);
NewSymbols[Name] = JITEvaluatedSymbol(
diff --git a/lib/ExecutionEngine/Orc/IRCompileLayer.cpp b/lib/ExecutionEngine/Orc/IRCompileLayer.cpp
index d952d1be70da..81dfc02f55b2 100644
--- a/lib/ExecutionEngine/Orc/IRCompileLayer.cpp
+++ b/lib/ExecutionEngine/Orc/IRCompileLayer.cpp
@@ -1,9 +1,8 @@
//===--------------- IRCompileLayer.cpp - IR Compiling Layer --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/ExecutionEngine/Orc/IRTransformLayer.cpp b/lib/ExecutionEngine/Orc/IRTransformLayer.cpp
index 7bc0d696e3ac..e3519284613e 100644
--- a/lib/ExecutionEngine/Orc/IRTransformLayer.cpp
+++ b/lib/ExecutionEngine/Orc/IRTransformLayer.cpp
@@ -1,9 +1,8 @@
//===-------------- IRTransformLayer.cpp - IR Transform Layer -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/ExecutionEngine/Orc/IndirectionUtils.cpp b/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
index 82000ec5b32b..cc3656fe5dc5 100644
--- a/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
+++ b/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
@@ -1,9 +1,8 @@
//===---- IndirectionUtils.cpp - Utilities for call indirection in Orc ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -38,8 +37,8 @@ private:
void materialize(MaterializationResponsibility R) override {
SymbolMap Result;
Result[Name] = JITEvaluatedSymbol(Compile(), JITSymbolFlags::Exported);
- R.resolve(Result);
- R.emit();
+ R.notifyResolved(Result);
+ R.notifyEmitted();
}
void discard(const JITDylib &JD, const SymbolStringPtr &Name) override {
@@ -238,11 +237,11 @@ void makeStub(Function &F, Value &ImplPointer) {
Module &M = *F.getParent();
BasicBlock *EntryBlock = BasicBlock::Create(M.getContext(), "entry", &F);
IRBuilder<> Builder(EntryBlock);
- LoadInst *ImplAddr = Builder.CreateLoad(&ImplPointer);
+ LoadInst *ImplAddr = Builder.CreateLoad(F.getType(), &ImplPointer);
std::vector<Value*> CallArgs;
for (auto &A : F.args())
CallArgs.push_back(&A);
- CallInst *Call = Builder.CreateCall(ImplAddr, CallArgs);
+ CallInst *Call = Builder.CreateCall(F.getFunctionType(), ImplAddr, CallArgs);
Call->setTailCall();
Call->setAttributes(F.getAttributes());
if (F.getReturnType()->isVoidTy())
diff --git a/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp b/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp
index 4af09d196ff9..df23547a9de3 100644
--- a/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp
+++ b/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp
@@ -1,9 +1,8 @@
//===----- JITTargetMachineBuilder.cpp - Build TargetMachines for JIT -----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/ExecutionEngine/Orc/LLJIT.cpp b/lib/ExecutionEngine/Orc/LLJIT.cpp
index e2089f9106bd..b120691faf07 100644
--- a/lib/ExecutionEngine/Orc/LLJIT.cpp
+++ b/lib/ExecutionEngine/Orc/LLJIT.cpp
@@ -1,58 +1,37 @@
//===--------- LLJIT.cpp - An ORC-based JIT for compiling LLVM IR ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "llvm/ExecutionEngine/Orc/LLJIT.h"
#include "llvm/ExecutionEngine/Orc/OrcError.h"
+#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
#include "llvm/ExecutionEngine/SectionMemoryManager.h"
#include "llvm/IR/Mangler.h"
-namespace {
+namespace llvm {
+namespace orc {
- // A SimpleCompiler that owns its TargetMachine.
- class TMOwningSimpleCompiler : public llvm::orc::SimpleCompiler {
- public:
- TMOwningSimpleCompiler(std::unique_ptr<llvm::TargetMachine> TM)
- : llvm::orc::SimpleCompiler(*TM), TM(std::move(TM)) {}
- private:
- // FIXME: shared because std::functions (and thus
- // IRCompileLayer::CompileFunction) are not moveable.
- std::shared_ptr<llvm::TargetMachine> TM;
- };
+Error LLJITBuilderState::prepareForConstruction() {
-} // end anonymous namespace
+ if (!JTMB) {
+ if (auto JTMBOrErr = JITTargetMachineBuilder::detectHost())
+ JTMB = std::move(*JTMBOrErr);
+ else
+ return JTMBOrErr.takeError();
+ }
-namespace llvm {
-namespace orc {
+ return Error::success();
+}
LLJIT::~LLJIT() {
if (CompileThreads)
CompileThreads->wait();
}
-Expected<std::unique_ptr<LLJIT>>
-LLJIT::Create(JITTargetMachineBuilder JTMB, DataLayout DL,
- unsigned NumCompileThreads) {
-
- if (NumCompileThreads == 0) {
- // If NumCompileThreads == 0 then create a single-threaded LLJIT instance.
- auto TM = JTMB.createTargetMachine();
- if (!TM)
- return TM.takeError();
- return std::unique_ptr<LLJIT>(new LLJIT(llvm::make_unique<ExecutionSession>(),
- std::move(*TM), std::move(DL)));
- }
-
- return std::unique_ptr<LLJIT>(new LLJIT(llvm::make_unique<ExecutionSession>(),
- std::move(JTMB), std::move(DL),
- NumCompileThreads));
-}
-
Error LLJIT::defineAbsolute(StringRef Name, JITEvaluatedSymbol Sym) {
auto InternedName = ES->intern(Name);
SymbolMap Symbols({{InternedName, Sym}});
@@ -65,13 +44,13 @@ Error LLJIT::addIRModule(JITDylib &JD, ThreadSafeModule TSM) {
if (auto Err = applyDataLayout(*TSM.getModule()))
return Err;
- return CompileLayer.add(JD, std::move(TSM), ES->allocateVModule());
+ return CompileLayer->add(JD, std::move(TSM), ES->allocateVModule());
}
Error LLJIT::addObjectFile(JITDylib &JD, std::unique_ptr<MemoryBuffer> Obj) {
assert(Obj && "Can not add null object");
- return ObjLinkingLayer.add(JD, std::move(Obj), ES->allocateVModule());
+ return ObjLinkingLayer->add(JD, std::move(Obj), ES->allocateVModule());
}
Expected<JITEvaluatedSymbol> LLJIT::lookupLinkerMangled(JITDylib &JD,
@@ -79,42 +58,76 @@ Expected<JITEvaluatedSymbol> LLJIT::lookupLinkerMangled(JITDylib &JD,
return ES->lookup(JITDylibSearchList({{&JD, true}}), ES->intern(Name));
}
-LLJIT::LLJIT(std::unique_ptr<ExecutionSession> ES,
- std::unique_ptr<TargetMachine> TM, DataLayout DL)
- : ES(std::move(ES)), Main(this->ES->getMainJITDylib()), DL(std::move(DL)),
- ObjLinkingLayer(
- *this->ES,
- []() { return llvm::make_unique<SectionMemoryManager>(); }),
- CompileLayer(*this->ES, ObjLinkingLayer,
- TMOwningSimpleCompiler(std::move(TM))),
- CtorRunner(Main), DtorRunner(Main) {}
-
-LLJIT::LLJIT(std::unique_ptr<ExecutionSession> ES, JITTargetMachineBuilder JTMB,
- DataLayout DL, unsigned NumCompileThreads)
- : ES(std::move(ES)), Main(this->ES->getMainJITDylib()), DL(std::move(DL)),
- ObjLinkingLayer(
- *this->ES,
- []() { return llvm::make_unique<SectionMemoryManager>(); }),
- CompileLayer(*this->ES, ObjLinkingLayer,
- ConcurrentIRCompiler(std::move(JTMB))),
- CtorRunner(Main), DtorRunner(Main) {
- assert(NumCompileThreads != 0 &&
- "Multithreaded LLJIT instance can not be created with 0 threads");
-
- // Move modules to new contexts when they're emitted so that we can compile
- // them in parallel.
- CompileLayer.setCloneToNewContextOnEmit(true);
-
- // Create a thread pool to compile on and set the execution session
- // dispatcher to use the thread pool.
- CompileThreads = llvm::make_unique<ThreadPool>(NumCompileThreads);
- this->ES->setDispatchMaterialization(
- [this](JITDylib &JD, std::unique_ptr<MaterializationUnit> MU) {
- // FIXME: Switch to move capture once we have c++14.
- auto SharedMU = std::shared_ptr<MaterializationUnit>(std::move(MU));
- auto Work = [SharedMU, &JD]() { SharedMU->doMaterialize(JD); };
- CompileThreads->async(std::move(Work));
- });
+std::unique_ptr<ObjectLayer>
+LLJIT::createObjectLinkingLayer(LLJITBuilderState &S, ExecutionSession &ES) {
+
+ // If the config state provided an ObjectLinkingLayer factory then use it.
+ if (S.CreateObjectLinkingLayer)
+ return S.CreateObjectLinkingLayer(ES);
+
+ // Otherwise default to creating an RTDyldObjectLinkingLayer that constructs
+ // a new SectionMemoryManager for each object.
+ auto GetMemMgr = []() { return llvm::make_unique<SectionMemoryManager>(); };
+ return llvm::make_unique<RTDyldObjectLinkingLayer>(ES, std::move(GetMemMgr));
+}
+
+Expected<IRCompileLayer::CompileFunction>
+LLJIT::createCompileFunction(LLJITBuilderState &S,
+ JITTargetMachineBuilder JTMB) {
+
+ /// If there is a custom compile function creator set then use it.
+ if (S.CreateCompileFunction)
+ return S.CreateCompileFunction(std::move(JTMB));
+
+ // Otherwise default to creating a SimpleCompiler, or ConcurrentIRCompiler,
+ // depending on the number of threads requested.
+ if (S.NumCompileThreads > 0)
+ return ConcurrentIRCompiler(std::move(JTMB));
+
+ auto TM = JTMB.createTargetMachine();
+ if (!TM)
+ return TM.takeError();
+
+ return TMOwningSimpleCompiler(std::move(*TM));
+}
+
+LLJIT::LLJIT(LLJITBuilderState &S, Error &Err)
+ : ES(S.ES ? std::move(S.ES) : llvm::make_unique<ExecutionSession>()),
+ Main(this->ES->getMainJITDylib()), DL(""), CtorRunner(Main),
+ DtorRunner(Main) {
+
+ ErrorAsOutParameter _(&Err);
+
+ ObjLinkingLayer = createObjectLinkingLayer(S, *ES);
+
+ if (auto DLOrErr = S.JTMB->getDefaultDataLayoutForTarget())
+ DL = std::move(*DLOrErr);
+ else {
+ Err = DLOrErr.takeError();
+ return;
+ }
+
+ {
+ auto CompileFunction = createCompileFunction(S, std::move(*S.JTMB));
+ if (!CompileFunction) {
+ Err = CompileFunction.takeError();
+ return;
+ }
+ CompileLayer = llvm::make_unique<IRCompileLayer>(
+ *ES, *ObjLinkingLayer, std::move(*CompileFunction));
+ }
+
+ if (S.NumCompileThreads > 0) {
+ CompileLayer->setCloneToNewContextOnEmit(true);
+ CompileThreads = llvm::make_unique<ThreadPool>(S.NumCompileThreads);
+ ES->setDispatchMaterialization(
+ [this](JITDylib &JD, std::unique_ptr<MaterializationUnit> MU) {
+ // FIXME: Switch to move capture once we have c++14.
+ auto SharedMU = std::shared_ptr<MaterializationUnit>(std::move(MU));
+ auto Work = [SharedMU, &JD]() { SharedMU->doMaterialize(JD); };
+ CompileThreads->async(std::move(Work));
+ });
+ }
}
std::string LLJIT::mangle(StringRef UnmangledName) {
@@ -143,35 +156,11 @@ void LLJIT::recordCtorDtors(Module &M) {
DtorRunner.add(getDestructors(M));
}
-Expected<std::unique_ptr<LLLazyJIT>>
-LLLazyJIT::Create(JITTargetMachineBuilder JTMB, DataLayout DL,
- JITTargetAddress ErrorAddr, unsigned NumCompileThreads) {
- auto ES = llvm::make_unique<ExecutionSession>();
-
- const Triple &TT = JTMB.getTargetTriple();
-
- auto LCTMgr = createLocalLazyCallThroughManager(TT, *ES, ErrorAddr);
- if (!LCTMgr)
- return LCTMgr.takeError();
-
- auto ISMBuilder = createLocalIndirectStubsManagerBuilder(TT);
- if (!ISMBuilder)
- return make_error<StringError>(
- std::string("No indirect stubs manager builder for ") + TT.str(),
- inconvertibleErrorCode());
-
- if (NumCompileThreads == 0) {
- auto TM = JTMB.createTargetMachine();
- if (!TM)
- return TM.takeError();
- return std::unique_ptr<LLLazyJIT>(
- new LLLazyJIT(std::move(ES), std::move(*TM), std::move(DL),
- std::move(*LCTMgr), std::move(ISMBuilder)));
- }
-
- return std::unique_ptr<LLLazyJIT>(new LLLazyJIT(
- std::move(ES), std::move(JTMB), std::move(DL), NumCompileThreads,
- std::move(*LCTMgr), std::move(ISMBuilder)));
+Error LLLazyJITBuilderState::prepareForConstruction() {
+ if (auto Err = LLJITBuilderState::prepareForConstruction())
+ return Err;
+ TT = JTMB->getTargetTriple();
+ return Error::success();
}
Error LLLazyJIT::addLazyIRModule(JITDylib &JD, ThreadSafeModule TSM) {
@@ -182,28 +171,55 @@ Error LLLazyJIT::addLazyIRModule(JITDylib &JD, ThreadSafeModule TSM) {
recordCtorDtors(*TSM.getModule());
- return CODLayer.add(JD, std::move(TSM), ES->allocateVModule());
+ return CODLayer->add(JD, std::move(TSM), ES->allocateVModule());
}
-LLLazyJIT::LLLazyJIT(
- std::unique_ptr<ExecutionSession> ES, std::unique_ptr<TargetMachine> TM,
- DataLayout DL, std::unique_ptr<LazyCallThroughManager> LCTMgr,
- std::function<std::unique_ptr<IndirectStubsManager>()> ISMBuilder)
- : LLJIT(std::move(ES), std::move(TM), std::move(DL)),
- LCTMgr(std::move(LCTMgr)), TransformLayer(*this->ES, CompileLayer),
- CODLayer(*this->ES, TransformLayer, *this->LCTMgr,
- std::move(ISMBuilder)) {}
-
-LLLazyJIT::LLLazyJIT(
- std::unique_ptr<ExecutionSession> ES, JITTargetMachineBuilder JTMB,
- DataLayout DL, unsigned NumCompileThreads,
- std::unique_ptr<LazyCallThroughManager> LCTMgr,
- std::function<std::unique_ptr<IndirectStubsManager>()> ISMBuilder)
- : LLJIT(std::move(ES), std::move(JTMB), std::move(DL), NumCompileThreads),
- LCTMgr(std::move(LCTMgr)), TransformLayer(*this->ES, CompileLayer),
- CODLayer(*this->ES, TransformLayer, *this->LCTMgr,
- std::move(ISMBuilder)) {
- CODLayer.setCloneToNewContextOnEmit(true);
+LLLazyJIT::LLLazyJIT(LLLazyJITBuilderState &S, Error &Err) : LLJIT(S, Err) {
+
+ // If LLJIT construction failed then bail out.
+ if (Err)
+ return;
+
+ ErrorAsOutParameter _(&Err);
+
+ /// Take/Create the lazy-compile callthrough manager.
+ if (S.LCTMgr)
+ LCTMgr = std::move(S.LCTMgr);
+ else {
+ if (auto LCTMgrOrErr = createLocalLazyCallThroughManager(
+ S.TT, *ES, S.LazyCompileFailureAddr))
+ LCTMgr = std::move(*LCTMgrOrErr);
+ else {
+ Err = LCTMgrOrErr.takeError();
+ return;
+ }
+ }
+
+ // Take/Create the indirect stubs manager builder.
+ auto ISMBuilder = std::move(S.ISMBuilder);
+
+ // If none was provided, try to build one.
+ if (!ISMBuilder)
+ ISMBuilder = createLocalIndirectStubsManagerBuilder(S.TT);
+
+ // No luck. Bail out.
+ if (!ISMBuilder) {
+ Err = make_error<StringError>("Could not construct "
+ "IndirectStubsManagerBuilder for target " +
+ S.TT.str(),
+ inconvertibleErrorCode());
+ return;
+ }
+
+ // Create the transform layer.
+ TransformLayer = llvm::make_unique<IRTransformLayer>(*ES, *CompileLayer);
+
+ // Create the COD layer.
+ CODLayer = llvm::make_unique<CompileOnDemandLayer>(
+ *ES, *TransformLayer, *LCTMgr, std::move(ISMBuilder));
+
+ if (S.NumCompileThreads > 0)
+ CODLayer->setCloneToNewContextOnEmit(true);
}
} // End namespace orc.
diff --git a/lib/ExecutionEngine/Orc/Layer.cpp b/lib/ExecutionEngine/Orc/Layer.cpp
index 11af76825e9f..3ed2dabf4545 100644
--- a/lib/ExecutionEngine/Orc/Layer.cpp
+++ b/lib/ExecutionEngine/Orc/Layer.cpp
@@ -1,9 +1,8 @@
//===-------------------- Layer.cpp - Layer interfaces --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -88,17 +87,15 @@ void BasicIRLayerMaterializationUnit::materialize(
#ifndef NDEBUG
auto &ES = R.getTargetJITDylib().getExecutionSession();
+ auto &N = R.getTargetJITDylib().getName();
#endif // NDEBUG
auto Lock = TSM.getContextLock();
- LLVM_DEBUG(ES.runSessionLocked([&]() {
- dbgs() << "Emitting, for " << R.getTargetJITDylib().getName() << ", "
- << *this << "\n";
- }););
+ LLVM_DEBUG(ES.runSessionLocked(
+ [&]() { dbgs() << "Emitting, for " << N << ", " << *this << "\n"; }););
L.emit(std::move(R), std::move(TSM));
LLVM_DEBUG(ES.runSessionLocked([&]() {
- dbgs() << "Finished emitting, for " << R.getTargetJITDylib().getName()
- << ", " << *this << "\n";
+ dbgs() << "Finished emitting, for " << N << ", " << *this << "\n";
}););
}
diff --git a/lib/ExecutionEngine/Orc/LazyReexports.cpp b/lib/ExecutionEngine/Orc/LazyReexports.cpp
index 55f4a7c5afce..fc8205845654 100644
--- a/lib/ExecutionEngine/Orc/LazyReexports.cpp
+++ b/lib/ExecutionEngine/Orc/LazyReexports.cpp
@@ -1,9 +1,8 @@
//===---------- LazyReexports.cpp - Utilities for lazy reexports ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -52,18 +51,15 @@ LazyCallThroughManager::callThroughToSymbol(JITTargetAddress TrampolineAddr) {
SymbolName = I->second.second;
}
- auto LookupResult = ES.lookup(JITDylibSearchList({{SourceJD, true}}),
- {SymbolName}, NoDependenciesToRegister, true);
+ auto LookupResult =
+ ES.lookup(JITDylibSearchList({{SourceJD, true}}), SymbolName);
if (!LookupResult) {
ES.reportError(LookupResult.takeError());
return ErrorHandlerAddr;
}
- assert(LookupResult->size() == 1 && "Unexpected number of results");
- assert(LookupResult->count(SymbolName) && "Unexpected result");
-
- auto ResolvedAddr = LookupResult->begin()->second.getAddress();
+ auto ResolvedAddr = LookupResult->getAddress();
std::shared_ptr<NotifyResolvedFunction> NotifyResolved = nullptr;
{
@@ -182,8 +178,8 @@ void LazyReexportsMaterializationUnit::materialize(
for (auto &Alias : RequestedAliases)
Stubs[Alias.first] = ISManager.findStub(*Alias.first, false);
- R.resolve(Stubs);
- R.emit();
+ R.notifyResolved(Stubs);
+ R.notifyEmitted();
}
void LazyReexportsMaterializationUnit::discard(const JITDylib &JD,
diff --git a/lib/ExecutionEngine/Orc/Legacy.cpp b/lib/ExecutionEngine/Orc/Legacy.cpp
index ddb72544b770..ce6368b57a89 100644
--- a/lib/ExecutionEngine/Orc/Legacy.cpp
+++ b/lib/ExecutionEngine/Orc/Legacy.cpp
@@ -1,9 +1,8 @@
//===------- Legacy.cpp - Adapters for ExecutionEngine API interop --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -37,8 +36,7 @@ void JITSymbolResolverAdapter::lookup(const LookupSet &Symbols,
};
auto Q = std::make_shared<AsynchronousSymbolQuery>(
- InternedSymbols, OnResolvedWithUnwrap,
- [this](Error Err) { ES.reportError(std::move(Err)); });
+ InternedSymbols, SymbolState::Resolved, OnResolvedWithUnwrap);
auto Unresolved = R.lookup(Q, InternedSymbols);
if (Unresolved.empty()) {
diff --git a/lib/ExecutionEngine/Orc/NullResolver.cpp b/lib/ExecutionEngine/Orc/NullResolver.cpp
index 922fc6f021ce..5b4345b870bb 100644
--- a/lib/ExecutionEngine/Orc/NullResolver.cpp
+++ b/lib/ExecutionEngine/Orc/NullResolver.cpp
@@ -1,9 +1,8 @@
//===---------- NullResolver.cpp - Reject symbol lookup requests ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp b/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp
new file mode 100644
index 000000000000..def0b300eca1
--- /dev/null
+++ b/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp
@@ -0,0 +1,483 @@
+//===------- ObjectLinkingLayer.cpp - JITLink backed ORC ObjectLayer ------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h"
+
+#include "llvm/ADT/Optional.h"
+#include "llvm/ExecutionEngine/JITLink/EHFrameSupport.h"
+
+#include <vector>
+
+#define DEBUG_TYPE "orc"
+
+using namespace llvm;
+using namespace llvm::jitlink;
+using namespace llvm::orc;
+
+namespace llvm {
+namespace orc {
+
+class ObjectLinkingLayerJITLinkContext final : public JITLinkContext {
+public:
+ ObjectLinkingLayerJITLinkContext(ObjectLinkingLayer &Layer,
+ MaterializationResponsibility MR,
+ std::unique_ptr<MemoryBuffer> ObjBuffer)
+ : Layer(Layer), MR(std::move(MR)), ObjBuffer(std::move(ObjBuffer)) {}
+
+ JITLinkMemoryManager &getMemoryManager() override { return Layer.MemMgr; }
+
+ MemoryBufferRef getObjectBuffer() const override {
+ return ObjBuffer->getMemBufferRef();
+ }
+
+ void notifyFailed(Error Err) override {
+ Layer.getExecutionSession().reportError(std::move(Err));
+ MR.failMaterialization();
+ }
+
+ void lookup(const DenseSet<StringRef> &Symbols,
+ JITLinkAsyncLookupContinuation LookupContinuation) override {
+
+ JITDylibSearchList SearchOrder;
+ MR.getTargetJITDylib().withSearchOrderDo(
+ [&](const JITDylibSearchList &JDs) { SearchOrder = JDs; });
+
+ auto &ES = Layer.getExecutionSession();
+
+ SymbolNameSet InternedSymbols;
+ for (auto &S : Symbols)
+ InternedSymbols.insert(ES.intern(S));
+
+ // OnResolve -- De-intern the symbols and pass the result to the linker.
+ // FIXME: Capture LookupContinuation by move once we have c++14.
+ auto SharedLookupContinuation =
+ std::make_shared<JITLinkAsyncLookupContinuation>(
+ std::move(LookupContinuation));
+ auto OnResolve = [SharedLookupContinuation](Expected<SymbolMap> Result) {
+ if (!Result)
+ (*SharedLookupContinuation)(Result.takeError());
+ else {
+ AsyncLookupResult LR;
+ for (auto &KV : *Result)
+ LR[*KV.first] = KV.second;
+ (*SharedLookupContinuation)(std::move(LR));
+ }
+ };
+
+ ES.lookup(SearchOrder, std::move(InternedSymbols), SymbolState::Resolved,
+ std::move(OnResolve), [this](const SymbolDependenceMap &Deps) {
+ registerDependencies(Deps);
+ });
+ }
+
+ void notifyResolved(AtomGraph &G) override {
+ auto &ES = Layer.getExecutionSession();
+
+ SymbolFlagsMap ExtraSymbolsToClaim;
+ bool AutoClaim = Layer.AutoClaimObjectSymbols;
+
+ SymbolMap InternedResult;
+ for (auto *DA : G.defined_atoms())
+ if (DA->hasName() && DA->isGlobal()) {
+ auto InternedName = ES.intern(DA->getName());
+ JITSymbolFlags Flags;
+
+ if (DA->isExported())
+ Flags |= JITSymbolFlags::Exported;
+ if (DA->isWeak())
+ Flags |= JITSymbolFlags::Weak;
+ if (DA->isCallable())
+ Flags |= JITSymbolFlags::Callable;
+ if (DA->isCommon())
+ Flags |= JITSymbolFlags::Common;
+
+ InternedResult[InternedName] =
+ JITEvaluatedSymbol(DA->getAddress(), Flags);
+ if (AutoClaim && !MR.getSymbols().count(InternedName)) {
+ assert(!ExtraSymbolsToClaim.count(InternedName) &&
+ "Duplicate symbol to claim?");
+ ExtraSymbolsToClaim[InternedName] = Flags;
+ }
+ }
+
+ for (auto *A : G.absolute_atoms())
+ if (A->hasName()) {
+ auto InternedName = ES.intern(A->getName());
+ JITSymbolFlags Flags;
+ Flags |= JITSymbolFlags::Absolute;
+ if (A->isWeak())
+ Flags |= JITSymbolFlags::Weak;
+ if (A->isCallable())
+ Flags |= JITSymbolFlags::Callable;
+ InternedResult[InternedName] =
+ JITEvaluatedSymbol(A->getAddress(), Flags);
+ if (AutoClaim && !MR.getSymbols().count(InternedName)) {
+ assert(!ExtraSymbolsToClaim.count(InternedName) &&
+ "Duplicate symbol to claim?");
+ ExtraSymbolsToClaim[InternedName] = Flags;
+ }
+ }
+
+ if (!ExtraSymbolsToClaim.empty())
+ if (auto Err = MR.defineMaterializing(ExtraSymbolsToClaim))
+ return notifyFailed(std::move(Err));
+
+ MR.notifyResolved(InternedResult);
+
+ Layer.notifyLoaded(MR);
+ }
+
+ void notifyFinalized(
+ std::unique_ptr<JITLinkMemoryManager::Allocation> A) override {
+
+ if (auto Err = Layer.notifyEmitted(MR, std::move(A))) {
+ Layer.getExecutionSession().reportError(std::move(Err));
+ MR.failMaterialization();
+
+ return;
+ }
+ MR.notifyEmitted();
+ }
+
+ AtomGraphPassFunction getMarkLivePass(const Triple &TT) const override {
+ return [this](AtomGraph &G) { return markResponsibilitySymbolsLive(G); };
+ }
+
+ Error modifyPassConfig(const Triple &TT, PassConfiguration &Config) override {
+ // Add passes to mark duplicate defs as should-discard, and to walk the
+ // atom graph to build the symbol dependence graph.
+ Config.PrePrunePasses.push_back(
+ [this](AtomGraph &G) { return markSymbolsToDiscard(G); });
+ Config.PostPrunePasses.push_back(
+ [this](AtomGraph &G) { return computeNamedSymbolDependencies(G); });
+
+ Layer.modifyPassConfig(MR, TT, Config);
+
+ return Error::success();
+ }
+
+private:
+ using AnonAtomNamedDependenciesMap =
+ DenseMap<const DefinedAtom *, SymbolNameSet>;
+
+ Error markSymbolsToDiscard(AtomGraph &G) {
+ auto &ES = Layer.getExecutionSession();
+ for (auto *DA : G.defined_atoms())
+ if (DA->isWeak() && DA->hasName()) {
+ auto S = ES.intern(DA->getName());
+ auto I = MR.getSymbols().find(S);
+ if (I == MR.getSymbols().end())
+ DA->setShouldDiscard(true);
+ }
+
+ for (auto *A : G.absolute_atoms())
+ if (A->isWeak() && A->hasName()) {
+ auto S = ES.intern(A->getName());
+ auto I = MR.getSymbols().find(S);
+ if (I == MR.getSymbols().end())
+ A->setShouldDiscard(true);
+ }
+
+ return Error::success();
+ }
+
+ Error markResponsibilitySymbolsLive(AtomGraph &G) const {
+ auto &ES = Layer.getExecutionSession();
+ for (auto *DA : G.defined_atoms())
+ if (DA->hasName() &&
+ MR.getSymbols().count(ES.intern(DA->getName())))
+ DA->setLive(true);
+ return Error::success();
+ }
+
+ Error computeNamedSymbolDependencies(AtomGraph &G) {
+ auto &ES = MR.getTargetJITDylib().getExecutionSession();
+ auto AnonDeps = computeAnonDeps(G);
+
+ for (auto *DA : G.defined_atoms()) {
+
+ // Skip anonymous and non-global atoms: we do not need dependencies for
+ // these.
+ if (!DA->hasName() || !DA->isGlobal())
+ continue;
+
+ auto DAName = ES.intern(DA->getName());
+ SymbolNameSet &DADeps = NamedSymbolDeps[DAName];
+
+ for (auto &E : DA->edges()) {
+ auto &TA = E.getTarget();
+
+ if (TA.hasName())
+ DADeps.insert(ES.intern(TA.getName()));
+ else {
+ assert(TA.isDefined() && "Anonymous atoms must be defined");
+ auto &DTA = static_cast<DefinedAtom &>(TA);
+ auto I = AnonDeps.find(&DTA);
+ if (I != AnonDeps.end())
+ for (auto &S : I->second)
+ DADeps.insert(S);
+ }
+ }
+ }
+
+ return Error::success();
+ }
+
+ AnonAtomNamedDependenciesMap computeAnonDeps(AtomGraph &G) {
+
+ auto &ES = MR.getTargetJITDylib().getExecutionSession();
+ AnonAtomNamedDependenciesMap DepMap;
+
+ // For all anonymous atoms:
+ // (1) Add their named dependencies.
+ // (2) Add them to the worklist for further iteration if they have any
+ // depend on any other anonymous atoms.
+ struct WorklistEntry {
+ WorklistEntry(DefinedAtom *DA, DenseSet<DefinedAtom *> DAAnonDeps)
+ : DA(DA), DAAnonDeps(std::move(DAAnonDeps)) {}
+
+ DefinedAtom *DA = nullptr;
+ DenseSet<DefinedAtom *> DAAnonDeps;
+ };
+ std::vector<WorklistEntry> Worklist;
+ for (auto *DA : G.defined_atoms())
+ if (!DA->hasName()) {
+ auto &DANamedDeps = DepMap[DA];
+ DenseSet<DefinedAtom *> DAAnonDeps;
+
+ for (auto &E : DA->edges()) {
+ auto &TA = E.getTarget();
+ if (TA.hasName())
+ DANamedDeps.insert(ES.intern(TA.getName()));
+ else {
+ assert(TA.isDefined() && "Anonymous atoms must be defined");
+ DAAnonDeps.insert(static_cast<DefinedAtom *>(&TA));
+ }
+ }
+
+ if (!DAAnonDeps.empty())
+ Worklist.push_back(WorklistEntry(DA, std::move(DAAnonDeps)));
+ }
+
+ // Loop over all anonymous atoms with anonymous dependencies, propagating
+ // their respective *named* dependencies. Iterate until we hit a stable
+ // state.
+ bool Changed;
+ do {
+ Changed = false;
+ for (auto &WLEntry : Worklist) {
+ auto *DA = WLEntry.DA;
+ auto &DANamedDeps = DepMap[DA];
+ auto &DAAnonDeps = WLEntry.DAAnonDeps;
+
+ for (auto *TA : DAAnonDeps) {
+ auto I = DepMap.find(TA);
+ if (I != DepMap.end())
+ for (const auto &S : I->second)
+ Changed |= DANamedDeps.insert(S).second;
+ }
+ }
+ } while (Changed);
+
+ return DepMap;
+ }
+
+ void registerDependencies(const SymbolDependenceMap &QueryDeps) {
+ for (auto &NamedDepsEntry : NamedSymbolDeps) {
+ auto &Name = NamedDepsEntry.first;
+ auto &NameDeps = NamedDepsEntry.second;
+ SymbolDependenceMap SymbolDeps;
+
+ for (const auto &QueryDepsEntry : QueryDeps) {
+ JITDylib &SourceJD = *QueryDepsEntry.first;
+ const SymbolNameSet &Symbols = QueryDepsEntry.second;
+ auto &DepsForJD = SymbolDeps[&SourceJD];
+
+ for (const auto &S : Symbols)
+ if (NameDeps.count(S))
+ DepsForJD.insert(S);
+
+ if (DepsForJD.empty())
+ SymbolDeps.erase(&SourceJD);
+ }
+
+ MR.addDependencies(Name, SymbolDeps);
+ }
+ }
+
+ ObjectLinkingLayer &Layer;
+ MaterializationResponsibility MR;
+ std::unique_ptr<MemoryBuffer> ObjBuffer;
+ DenseMap<SymbolStringPtr, SymbolNameSet> NamedSymbolDeps;
+};
+
+ObjectLinkingLayer::Plugin::~Plugin() {}
+
+ObjectLinkingLayer::ObjectLinkingLayer(ExecutionSession &ES,
+ JITLinkMemoryManager &MemMgr)
+ : ObjectLayer(ES), MemMgr(MemMgr) {}
+
+ObjectLinkingLayer::~ObjectLinkingLayer() {
+ if (auto Err = removeAllModules())
+ getExecutionSession().reportError(std::move(Err));
+}
+
+void ObjectLinkingLayer::emit(MaterializationResponsibility R,
+ std::unique_ptr<MemoryBuffer> O) {
+ assert(O && "Object must not be null");
+ jitLink(llvm::make_unique<ObjectLinkingLayerJITLinkContext>(
+ *this, std::move(R), std::move(O)));
+}
+
+void ObjectLinkingLayer::modifyPassConfig(MaterializationResponsibility &MR,
+ const Triple &TT,
+ PassConfiguration &PassConfig) {
+ for (auto &P : Plugins)
+ P->modifyPassConfig(MR, TT, PassConfig);
+}
+
+void ObjectLinkingLayer::notifyLoaded(MaterializationResponsibility &MR) {
+ for (auto &P : Plugins)
+ P->notifyLoaded(MR);
+}
+
+Error ObjectLinkingLayer::notifyEmitted(MaterializationResponsibility &MR,
+ AllocPtr Alloc) {
+ Error Err = Error::success();
+ for (auto &P : Plugins)
+ Err = joinErrors(std::move(Err), P->notifyEmitted(MR));
+
+ if (Err)
+ return Err;
+
+ {
+ std::lock_guard<std::mutex> Lock(LayerMutex);
+ UntrackedAllocs.push_back(std::move(Alloc));
+ }
+
+ return Error::success();
+}
+
+Error ObjectLinkingLayer::removeModule(VModuleKey K) {
+ Error Err = Error::success();
+
+ for (auto &P : Plugins)
+ Err = joinErrors(std::move(Err), P->notifyRemovingModule(K));
+
+ AllocPtr Alloc;
+
+ {
+ std::lock_guard<std::mutex> Lock(LayerMutex);
+ auto AllocItr = TrackedAllocs.find(K);
+ Alloc = std::move(AllocItr->second);
+ TrackedAllocs.erase(AllocItr);
+ }
+
+ assert(Alloc && "No allocation for key K");
+
+ return joinErrors(std::move(Err), Alloc->deallocate());
+}
+
+Error ObjectLinkingLayer::removeAllModules() {
+
+ Error Err = Error::success();
+
+ for (auto &P : Plugins)
+ Err = joinErrors(std::move(Err), P->notifyRemovingAllModules());
+
+ std::vector<AllocPtr> Allocs;
+ {
+ std::lock_guard<std::mutex> Lock(LayerMutex);
+ Allocs = std::move(UntrackedAllocs);
+
+ for (auto &KV : TrackedAllocs)
+ Allocs.push_back(std::move(KV.second));
+
+ TrackedAllocs.clear();
+ }
+
+ while (!Allocs.empty()) {
+ Err = joinErrors(std::move(Err), Allocs.back()->deallocate());
+ Allocs.pop_back();
+ }
+
+ return Err;
+}
+
+EHFrameRegistrationPlugin::EHFrameRegistrationPlugin(
+ jitlink::EHFrameRegistrar &Registrar)
+ : Registrar(Registrar) {}
+
+void EHFrameRegistrationPlugin::modifyPassConfig(
+ MaterializationResponsibility &MR, const Triple &TT,
+ PassConfiguration &PassConfig) {
+ assert(!InProcessLinks.count(&MR) && "Link for MR already being tracked?");
+
+ PassConfig.PostFixupPasses.push_back(
+ createEHFrameRecorderPass(TT, [this, &MR](JITTargetAddress Addr) {
+ if (Addr)
+ InProcessLinks[&MR] = Addr;
+ }));
+}
+
+Error EHFrameRegistrationPlugin::notifyEmitted(
+ MaterializationResponsibility &MR) {
+
+ auto EHFrameAddrItr = InProcessLinks.find(&MR);
+ if (EHFrameAddrItr == InProcessLinks.end())
+ return Error::success();
+
+ auto EHFrameAddr = EHFrameAddrItr->second;
+ assert(EHFrameAddr && "eh-frame addr to register can not be null");
+
+ InProcessLinks.erase(EHFrameAddrItr);
+ if (auto Key = MR.getVModuleKey())
+ TrackedEHFrameAddrs[Key] = EHFrameAddr;
+ else
+ UntrackedEHFrameAddrs.push_back(EHFrameAddr);
+
+ return Registrar.registerEHFrames(EHFrameAddr);
+}
+
+Error EHFrameRegistrationPlugin::notifyRemovingModule(VModuleKey K) {
+ auto EHFrameAddrItr = TrackedEHFrameAddrs.find(K);
+ if (EHFrameAddrItr == TrackedEHFrameAddrs.end())
+ return Error::success();
+
+ auto EHFrameAddr = EHFrameAddrItr->second;
+ assert(EHFrameAddr && "Tracked eh-frame addr must not be null");
+
+ TrackedEHFrameAddrs.erase(EHFrameAddrItr);
+
+ return Registrar.deregisterEHFrames(EHFrameAddr);
+}
+
+Error EHFrameRegistrationPlugin::notifyRemovingAllModules() {
+
+ std::vector<JITTargetAddress> EHFrameAddrs = std::move(UntrackedEHFrameAddrs);
+ EHFrameAddrs.reserve(EHFrameAddrs.size() + TrackedEHFrameAddrs.size());
+
+ for (auto &KV : TrackedEHFrameAddrs)
+ EHFrameAddrs.push_back(KV.second);
+
+ TrackedEHFrameAddrs.clear();
+
+ Error Err = Error::success();
+
+ while (!EHFrameAddrs.empty()) {
+ auto EHFrameAddr = EHFrameAddrs.back();
+ assert(EHFrameAddr && "Untracked eh-frame addr must not be null");
+ EHFrameAddrs.pop_back();
+ Err = joinErrors(std::move(Err), Registrar.deregisterEHFrames(EHFrameAddr));
+ }
+
+ return Err;
+}
+
+} // End namespace orc.
+} // End namespace llvm.
diff --git a/lib/ExecutionEngine/Orc/ObjectTransformLayer.cpp b/lib/ExecutionEngine/Orc/ObjectTransformLayer.cpp
index 825f53204736..815517321b76 100644
--- a/lib/ExecutionEngine/Orc/ObjectTransformLayer.cpp
+++ b/lib/ExecutionEngine/Orc/ObjectTransformLayer.cpp
@@ -1,9 +1,8 @@
//===---------- ObjectTransformLayer.cpp - Object Transform Layer ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/ExecutionEngine/Orc/OrcABISupport.cpp b/lib/ExecutionEngine/Orc/OrcABISupport.cpp
index aa4055542426..8ed23de419d1 100644
--- a/lib/ExecutionEngine/Orc/OrcABISupport.cpp
+++ b/lib/ExecutionEngine/Orc/OrcABISupport.cpp
@@ -1,9 +1,8 @@
//===------------- OrcABISupport.cpp - ABI specific support code ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -148,7 +147,7 @@ Error OrcAArch64::emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo,
const unsigned StubSize = IndirectStubsInfo::StubSize;
// Emit at least MinStubs, rounded up to fill the pages allocated.
- unsigned PageSize = sys::Process::getPageSize();
+ static const unsigned PageSize = sys::Process::getPageSizeEstimate();
unsigned NumPages = ((MinStubs * StubSize) + (PageSize - 1)) / PageSize;
unsigned NumStubs = (NumPages * PageSize) / StubSize;
@@ -230,7 +229,7 @@ Error OrcX86_64_Base::emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo,
const unsigned StubSize = IndirectStubsInfo::StubSize;
// Emit at least MinStubs, rounded up to fill the pages allocated.
- unsigned PageSize = sys::Process::getPageSize();
+ static const unsigned PageSize = sys::Process::getPageSizeEstimate();
unsigned NumPages = ((MinStubs * StubSize) + (PageSize - 1)) / PageSize;
unsigned NumStubs = (NumPages * PageSize) / StubSize;
@@ -498,7 +497,7 @@ Error OrcI386::emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo,
const unsigned StubSize = IndirectStubsInfo::StubSize;
// Emit at least MinStubs, rounded up to fill the pages allocated.
- unsigned PageSize = sys::Process::getPageSize();
+ static const unsigned PageSize = sys::Process::getPageSizeEstimate();
unsigned NumPages = ((MinStubs * StubSize) + (PageSize - 1)) / PageSize;
unsigned NumStubs = (NumPages * PageSize) / StubSize;
@@ -684,7 +683,7 @@ Error OrcMips32_Base::emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo,
const unsigned StubSize = IndirectStubsInfo::StubSize;
// Emit at least MinStubs, rounded up to fill the pages allocated.
- unsigned PageSize = sys::Process::getPageSize();
+ static const unsigned PageSize = sys::Process::getPageSizeEstimate();
unsigned NumPages = ((MinStubs * StubSize) + (PageSize - 1)) / PageSize;
unsigned NumStubs = (NumPages * PageSize) / StubSize;
@@ -930,7 +929,7 @@ Error OrcMips64::emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo,
const unsigned StubSize = IndirectStubsInfo::StubSize;
// Emit at least MinStubs, rounded up to fill the pages allocated.
- unsigned PageSize = sys::Process::getPageSize();
+ static const unsigned PageSize = sys::Process::getPageSizeEstimate();
unsigned NumPages = ((MinStubs * StubSize) + (PageSize - 1)) / PageSize;
unsigned NumStubs = (NumPages * PageSize) / StubSize;
diff --git a/lib/ExecutionEngine/Orc/OrcCBindings.cpp b/lib/ExecutionEngine/Orc/OrcCBindings.cpp
index 6dea64a6e78f..28c8479abba4 100644
--- a/lib/ExecutionEngine/Orc/OrcCBindings.cpp
+++ b/lib/ExecutionEngine/Orc/OrcCBindings.cpp
@@ -1,9 +1,8 @@
//===----------- OrcCBindings.cpp - C bindings for the Orc APIs -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/ExecutionEngine/Orc/OrcCBindingsStack.h b/lib/ExecutionEngine/Orc/OrcCBindingsStack.h
index 817a4b89bfb0..98129e1690d2 100644
--- a/lib/ExecutionEngine/Orc/OrcCBindingsStack.h
+++ b/lib/ExecutionEngine/Orc/OrcCBindingsStack.h
@@ -1,9 +1,8 @@
//===- OrcCBindingsStack.h - Orc JIT stack for C bindings -----*- C++ -*---===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -154,8 +153,8 @@ private:
for (auto &S : Symbols) {
if (auto Sym = findSymbol(*S)) {
if (auto Addr = Sym.getAddress()) {
- Query->resolve(S, JITEvaluatedSymbol(*Addr, Sym.getFlags()));
- Query->notifySymbolReady();
+ Query->notifySymbolMetRequiredState(
+ S, JITEvaluatedSymbol(*Addr, Sym.getFlags()));
} else {
Stack.ES.legacyFailQuery(*Query, Addr.takeError());
return orc::SymbolNameSet();
@@ -167,11 +166,8 @@ private:
UnresolvedSymbols.insert(S);
}
- if (Query->isFullyResolved())
- Query->handleFullyResolved();
-
- if (Query->isFullyReady())
- Query->handleFullyReady();
+ if (Query->isComplete())
+ Query->handleComplete();
return UnresolvedSymbols;
}
@@ -215,28 +211,31 @@ public:
IndirectStubsManagerBuilder IndirectStubsMgrBuilder)
: CCMgr(createCompileCallbackManager(TM, ES)), DL(TM.createDataLayout()),
IndirectStubsMgr(IndirectStubsMgrBuilder()),
- ObjectLayer(ES,
- [this](orc::VModuleKey K) {
- auto ResolverI = Resolvers.find(K);
- assert(ResolverI != Resolvers.end() &&
- "No resolver for module K");
- auto Resolver = std::move(ResolverI->second);
- Resolvers.erase(ResolverI);
- return ObjLayerT::Resources{
- std::make_shared<SectionMemoryManager>(), Resolver};
- },
- nullptr,
- [this](orc::VModuleKey K, const object::ObjectFile &Obj,
- const RuntimeDyld::LoadedObjectInfo &LoadedObjInfo) {
- this->notifyFinalized(K, Obj, LoadedObjInfo);
- },
- [this](orc::VModuleKey K, const object::ObjectFile &Obj) {
- this->notifyFreed(K, Obj);
- }),
- CompileLayer(ObjectLayer, orc::SimpleCompiler(TM)),
+ ObjectLayer(
+ AcknowledgeORCv1Deprecation, ES,
+ [this](orc::VModuleKey K) {
+ auto ResolverI = Resolvers.find(K);
+ assert(ResolverI != Resolvers.end() &&
+ "No resolver for module K");
+ auto Resolver = std::move(ResolverI->second);
+ Resolvers.erase(ResolverI);
+ return ObjLayerT::Resources{
+ std::make_shared<SectionMemoryManager>(), Resolver};
+ },
+ nullptr,
+ [this](orc::VModuleKey K, const object::ObjectFile &Obj,
+ const RuntimeDyld::LoadedObjectInfo &LoadedObjInfo) {
+ this->notifyFinalized(K, Obj, LoadedObjInfo);
+ },
+ [this](orc::VModuleKey K, const object::ObjectFile &Obj) {
+ this->notifyFreed(K, Obj);
+ }),
+ CompileLayer(AcknowledgeORCv1Deprecation, ObjectLayer,
+ orc::SimpleCompiler(TM)),
CODLayer(createCODLayer(ES, CompileLayer, CCMgr.get(),
std::move(IndirectStubsMgrBuilder), Resolvers)),
CXXRuntimeOverrides(
+ AcknowledgeORCv1Deprecation,
[this](const std::string &S) { return mangle(S); }) {}
Error shutdown() {
@@ -312,7 +311,8 @@ public:
// Run the static constructors, and save the static destructor runner for
// execution when the JIT is torn down.
- orc::LegacyCtorDtorRunner<OrcCBindingsStack> CtorRunner(std::move(CtorNames), K);
+ orc::LegacyCtorDtorRunner<OrcCBindingsStack> CtorRunner(
+ AcknowledgeORCv1Deprecation, std::move(CtorNames), K);
if (auto Err = CtorRunner.runViaLayer(*this))
return std::move(Err);
@@ -469,7 +469,7 @@ private:
return nullptr;
return llvm::make_unique<CODLayerT>(
- ES, CompileLayer,
+ AcknowledgeORCv1Deprecation, ES, CompileLayer,
[&Resolvers](orc::VModuleKey K) {
auto ResolverI = Resolvers.find(K);
assert(ResolverI != Resolvers.end() && "No resolver for module K");
diff --git a/lib/ExecutionEngine/Orc/OrcError.cpp b/lib/ExecutionEngine/Orc/OrcError.cpp
index f4102b359a6b..e6e9a095319c 100644
--- a/lib/ExecutionEngine/Orc/OrcError.cpp
+++ b/lib/ExecutionEngine/Orc/OrcError.cpp
@@ -1,9 +1,8 @@
//===---------------- OrcError.cpp - Error codes for ORC ------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/ExecutionEngine/Orc/OrcMCJITReplacement.cpp b/lib/ExecutionEngine/Orc/OrcMCJITReplacement.cpp
index 617bc2fc64b5..772a9c2c4ab2 100644
--- a/lib/ExecutionEngine/Orc/OrcMCJITReplacement.cpp
+++ b/lib/ExecutionEngine/Orc/OrcMCJITReplacement.cpp
@@ -1,9 +1,8 @@
//===-------- OrcMCJITReplacement.cpp - Orc-based MCJIT replacement -------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -128,7 +127,8 @@ void OrcMCJITReplacement::runStaticConstructorsDestructors(bool isDtors) {
auto &CtorDtorsMap = isDtors ? UnexecutedDestructors : UnexecutedConstructors;
for (auto &KV : CtorDtorsMap)
- cantFail(LegacyCtorDtorRunner<LazyEmitLayerT>(std::move(KV.second), KV.first)
+ cantFail(LegacyCtorDtorRunner<LazyEmitLayerT>(
+ AcknowledgeORCv1Deprecation, std::move(KV.second), KV.first)
.runViaLayer(LazyEmitLayer));
CtorDtorsMap.clear();
diff --git a/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h b/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h
index 36e7e83a8bab..169dc8f1d02b 100644
--- a/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h
+++ b/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h
@@ -1,9 +1,8 @@
//===- OrcMCJITReplacement.h - Orc based MCJIT replacement ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -177,8 +176,8 @@ class OrcMCJITReplacement : public ExecutionEngine {
for (auto &S : Symbols) {
if (auto Sym = M.findMangledSymbol(*S)) {
if (auto Addr = Sym.getAddress()) {
- Query->resolve(S, JITEvaluatedSymbol(*Addr, Sym.getFlags()));
- Query->notifySymbolReady();
+ Query->notifySymbolMetRequiredState(
+ S, JITEvaluatedSymbol(*Addr, Sym.getFlags()));
NewSymbolsResolved = true;
} else {
M.ES.legacyFailQuery(*Query, Addr.takeError());
@@ -190,8 +189,8 @@ class OrcMCJITReplacement : public ExecutionEngine {
} else {
if (auto Sym2 = M.ClientResolver->findSymbol(*S)) {
if (auto Addr = Sym2.getAddress()) {
- Query->resolve(S, JITEvaluatedSymbol(*Addr, Sym2.getFlags()));
- Query->notifySymbolReady();
+ Query->notifySymbolMetRequiredState(
+ S, JITEvaluatedSymbol(*Addr, Sym2.getFlags()));
NewSymbolsResolved = true;
} else {
M.ES.legacyFailQuery(*Query, Addr.takeError());
@@ -205,11 +204,8 @@ class OrcMCJITReplacement : public ExecutionEngine {
}
}
- if (NewSymbolsResolved && Query->isFullyResolved())
- Query->handleFullyResolved();
-
- if (NewSymbolsResolved && Query->isFullyReady())
- Query->handleFullyReady();
+ if (NewSymbolsResolved && Query->isComplete())
+ Query->handleComplete();
return UnresolvedSymbols;
}
@@ -236,24 +232,24 @@ public:
OrcMCJITReplacement(std::shared_ptr<MCJITMemoryManager> MemMgr,
std::shared_ptr<LegacyJITSymbolResolver> ClientResolver,
std::unique_ptr<TargetMachine> TM)
- : ExecutionEngine(TM->createDataLayout()),
- TM(std::move(TM)),
+ : ExecutionEngine(TM->createDataLayout()), TM(std::move(TM)),
MemMgr(
std::make_shared<MCJITReplacementMemMgr>(*this, std::move(MemMgr))),
Resolver(std::make_shared<LinkingORCResolver>(*this)),
ClientResolver(std::move(ClientResolver)), NotifyObjectLoaded(*this),
NotifyFinalized(*this),
ObjectLayer(
- ES,
+ AcknowledgeORCv1Deprecation, ES,
[this](VModuleKey K) {
return ObjectLayerT::Resources{this->MemMgr, this->Resolver};
},
NotifyObjectLoaded, NotifyFinalized),
- CompileLayer(ObjectLayer, SimpleCompiler(*this->TM),
+ CompileLayer(AcknowledgeORCv1Deprecation, ObjectLayer,
+ SimpleCompiler(*this->TM),
[this](VModuleKey K, std::unique_ptr<Module> M) {
Modules.push_back(std::move(M));
}),
- LazyEmitLayer(CompileLayer) {}
+ LazyEmitLayer(AcknowledgeORCv1Deprecation, CompileLayer) {}
static void Register() {
OrcMCJITReplacementCtor = createOrcMCJITReplacement;
diff --git a/lib/ExecutionEngine/Orc/RPCUtils.cpp b/lib/ExecutionEngine/Orc/RPCUtils.cpp
index 2a7ab5ca8180..367b3639f841 100644
--- a/lib/ExecutionEngine/Orc/RPCUtils.cpp
+++ b/lib/ExecutionEngine/Orc/RPCUtils.cpp
@@ -1,9 +1,8 @@
//===--------------- RPCUtils.cpp - RPCUtils implementation ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp b/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp
index 299d76183cd4..b22ecd5f80a1 100644
--- a/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp
+++ b/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp
@@ -1,9 +1,8 @@
//===-- RTDyldObjectLinkingLayer.cpp - RuntimeDyld backed ORC ObjectLayer -===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -42,9 +41,6 @@ public:
OnResolved(Result);
};
- // We're not waiting for symbols to be ready. Just log any errors.
- auto OnReady = [&ES](Error Err) { ES.reportError(std::move(Err)); };
-
// Register dependencies for all symbols contained in this set.
auto RegisterDependencies = [&](const SymbolDependenceMap &Deps) {
MR.addDependenciesForAll(Deps);
@@ -53,8 +49,8 @@ public:
JITDylibSearchList SearchOrder;
MR.getTargetJITDylib().withSearchOrderDo(
[&](const JITDylibSearchList &JDs) { SearchOrder = JDs; });
- ES.lookup(SearchOrder, InternedSymbols, OnResolvedWithUnwrap, OnReady,
- RegisterDependencies);
+ ES.lookup(SearchOrder, InternedSymbols, SymbolState::Resolved,
+ OnResolvedWithUnwrap, RegisterDependencies);
}
Expected<LookupSet> getResponsibilitySet(const LookupSet &Symbols) {
@@ -78,11 +74,8 @@ namespace llvm {
namespace orc {
RTDyldObjectLinkingLayer::RTDyldObjectLinkingLayer(
- ExecutionSession &ES, GetMemoryManagerFunction GetMemoryManager,
- NotifyLoadedFunction NotifyLoaded, NotifyEmittedFunction NotifyEmitted)
- : ObjectLayer(ES), GetMemoryManager(GetMemoryManager),
- NotifyLoaded(std::move(NotifyLoaded)),
- NotifyEmitted(std::move(NotifyEmitted)) {}
+ ExecutionSession &ES, GetMemoryManagerFunction GetMemoryManager)
+ : ObjectLayer(ES), GetMemoryManager(GetMemoryManager) {}
void RTDyldObjectLinkingLayer::emit(MaterializationResponsibility R,
std::unique_ptr<MemoryBuffer> O) {
@@ -96,7 +89,13 @@ void RTDyldObjectLinkingLayer::emit(MaterializationResponsibility R,
auto &ES = getExecutionSession();
- auto Obj = object::ObjectFile::createObjectFile(*O);
+ // Create a MemoryBufferRef backed MemoryBuffer (i.e. shallow) copy of the
+ // the underlying buffer to pass into RuntimeDyld. This allows us to hold
+ // ownership of the real underlying buffer and return it to the user once
+ // the object has been emitted.
+ auto ObjBuffer = MemoryBuffer::getMemBuffer(O->getMemBufferRef(), false);
+
+ auto Obj = object::ObjectFile::createObjectFile(*ObjBuffer);
if (!Obj) {
getExecutionSession().reportError(Obj.takeError());
@@ -134,13 +133,8 @@ void RTDyldObjectLinkingLayer::emit(MaterializationResponsibility R,
JITDylibSearchOrderResolver Resolver(*SharedR);
- /* Thoughts on proper cross-dylib weak symbol handling:
- *
- * Change selection of canonical defs to be a manually triggered process, and
- * add a 'canonical' bit to symbol definitions. When canonical def selection
- * is triggered, sweep the JITDylibs to mark defs as canonical, discard
- * duplicate defs.
- */
+ // FIXME: Switch to move-capture for the 'O' buffer once we have c++14.
+ MemoryBuffer *UnownedObjBuffer = O.release();
jitLinkForORC(
**Obj, std::move(O), *MemMgr, Resolver, ProcessAllSections,
[this, K, SharedR, &Obj, InternalSymbols](
@@ -149,8 +143,9 @@ void RTDyldObjectLinkingLayer::emit(MaterializationResponsibility R,
return onObjLoad(K, *SharedR, **Obj, std::move(LoadedObjInfo),
ResolvedSymbols, *InternalSymbols);
},
- [this, K, SharedR](Error Err) {
- onObjEmit(K, *SharedR, std::move(Err));
+ [this, K, SharedR, UnownedObjBuffer](Error Err) {
+ std::unique_ptr<MemoryBuffer> ObjBuffer(UnownedObjBuffer);
+ onObjEmit(K, std::move(ObjBuffer), *SharedR, std::move(Err));
});
}
@@ -177,7 +172,7 @@ Error RTDyldObjectLinkingLayer::onObjLoad(
auto I = R.getSymbols().find(InternedName);
if (OverrideObjectFlags && I != R.getSymbols().end())
- Flags = JITSymbolFlags::stripTransientFlags(I->second);
+ Flags = I->second;
else if (AutoClaimObjectSymbols && I == R.getSymbols().end())
ExtraSymbolsToClaim[InternedName] = Flags;
}
@@ -189,7 +184,7 @@ Error RTDyldObjectLinkingLayer::onObjLoad(
if (auto Err = R.defineMaterializing(ExtraSymbolsToClaim))
return Err;
- R.resolve(Symbols);
+ R.notifyResolved(Symbols);
if (NotifyLoaded)
NotifyLoaded(K, Obj, *LoadedObjInfo);
@@ -197,20 +192,29 @@ Error RTDyldObjectLinkingLayer::onObjLoad(
return Error::success();
}
-void RTDyldObjectLinkingLayer::onObjEmit(VModuleKey K,
- MaterializationResponsibility &R,
- Error Err) {
+void RTDyldObjectLinkingLayer::onObjEmit(
+ VModuleKey K, std::unique_ptr<MemoryBuffer> ObjBuffer,
+ MaterializationResponsibility &R, Error Err) {
if (Err) {
getExecutionSession().reportError(std::move(Err));
R.failMaterialization();
return;
}
- R.emit();
+ R.notifyEmitted();
if (NotifyEmitted)
- NotifyEmitted(K);
+ NotifyEmitted(K, std::move(ObjBuffer));
}
+LegacyRTDyldObjectLinkingLayer::LegacyRTDyldObjectLinkingLayer(
+ ExecutionSession &ES, ResourcesGetter GetResources,
+ NotifyLoadedFtor NotifyLoaded, NotifyFinalizedFtor NotifyFinalized,
+ NotifyFreedFtor NotifyFreed)
+ : ES(ES), GetResources(std::move(GetResources)),
+ NotifyLoaded(std::move(NotifyLoaded)),
+ NotifyFinalized(std::move(NotifyFinalized)),
+ NotifyFreed(std::move(NotifyFreed)), ProcessAllSections(false) {}
+
} // End namespace orc.
} // End namespace llvm.
diff --git a/lib/ExecutionEngine/Orc/ThreadSafeModule.cpp b/lib/ExecutionEngine/Orc/ThreadSafeModule.cpp
index 9525b168fbd3..4cb7376758a7 100644
--- a/lib/ExecutionEngine/Orc/ThreadSafeModule.cpp
+++ b/lib/ExecutionEngine/Orc/ThreadSafeModule.cpp
@@ -1,10 +1,9 @@
//===-- ThreadSafeModule.cpp - Thread safe Module, Context, and Utilities
//h-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp b/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp
index f195d0282998..5606421a3cb0 100644
--- a/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp
+++ b/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp
@@ -1,9 +1,8 @@
//===-- PerfJITEventListener.cpp - Tell Linux's perf about JITted code ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -266,16 +265,22 @@ void PerfJITEventListener::notifyObjectLoaded(
consumeError(AddrOrErr.takeError());
continue;
}
- uint64_t Addr = *AddrOrErr;
uint64_t Size = P.second;
+ object::SectionedAddress Address;
+ Address.Address = *AddrOrErr;
+
+ uint64_t SectionIndex = object::SectionedAddress::UndefSection;
+ if (auto SectOrErr = Sym.getSection())
+ if (*SectOrErr != Obj.section_end())
+ SectionIndex = SectOrErr.get()->getIndex();
// According to spec debugging info has to come before loading the
// corresonding code load.
DILineInfoTable Lines = Context->getLineInfoForAddressRange(
- Addr, Size, FileLineInfoKind::AbsoluteFilePath);
+ {*AddrOrErr, SectionIndex}, Size, FileLineInfoKind::AbsoluteFilePath);
- NotifyDebug(Addr, Lines);
- NotifyCode(Name, Addr, Size);
+ NotifyDebug(*AddrOrErr, Lines);
+ NotifyCode(Name, *AddrOrErr, Size);
}
Dumpstream->flush();
@@ -336,8 +341,8 @@ bool PerfJITEventListener::OpenMarker() {
//
// Mapping must be PROT_EXEC to ensure it is captured by perf record
// even when not using -d option.
- MarkerAddr = ::mmap(NULL, sys::Process::getPageSize(), PROT_READ | PROT_EXEC,
- MAP_PRIVATE, DumpFd, 0);
+ MarkerAddr = ::mmap(NULL, sys::Process::getPageSizeEstimate(),
+ PROT_READ | PROT_EXEC, MAP_PRIVATE, DumpFd, 0);
if (MarkerAddr == MAP_FAILED) {
errs() << "could not mmap JIT marker\n";
@@ -350,7 +355,7 @@ void PerfJITEventListener::CloseMarker() {
if (!MarkerAddr)
return;
- munmap(MarkerAddr, sys::Process::getPageSize());
+ munmap(MarkerAddr, sys::Process::getPageSizeEstimate());
MarkerAddr = nullptr;
}
diff --git a/lib/ExecutionEngine/RuntimeDyld/JITSymbol.cpp b/lib/ExecutionEngine/RuntimeDyld/JITSymbol.cpp
index 0553c217c2a2..4e2d0f422f39 100644
--- a/lib/ExecutionEngine/RuntimeDyld/JITSymbol.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/JITSymbol.cpp
@@ -1,9 +1,8 @@
//===----------- JITSymbol.cpp - JITSymbol class implementation -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp b/lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp
index 75d4c2b5134e..46604ff4000c 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp
@@ -1,9 +1,8 @@
//===-- RTDyldMemoryManager.cpp - Memory manager for MC-JIT -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -33,8 +32,9 @@ namespace llvm {
RTDyldMemoryManager::~RTDyldMemoryManager() {}
// Determine whether we can register EH tables.
-#if (defined(__GNUC__) && !defined(__ARM_EABI__) && !defined(__ia64__) && \
- !defined(__SEH__) && !defined(__USING_SJLJ_EXCEPTIONS__))
+#if (defined(__GNUC__) && !defined(__ARM_EABI__) && !defined(__ia64__) && \
+ !(defined(_AIX) && defined(__ibmxl__)) && !defined(__SEH__) && \
+ !defined(__USING_SJLJ_EXCEPTIONS__))
#define HAVE_EHTABLE_SUPPORT 1
#else
#define HAVE_EHTABLE_SUPPORT 0
@@ -48,7 +48,7 @@ extern "C" void __deregister_frame(void *);
// it may be found at runtime in a dynamically-loaded library.
// For example, this happens when building LLVM with Visual C++
// but using the MingW runtime.
-void __register_frame(void *p) {
+static void __register_frame(void *p) {
static bool Searched = false;
static void((*rf)(void *)) = 0;
@@ -61,7 +61,7 @@ void __register_frame(void *p) {
rf(p);
}
-void __deregister_frame(void *p) {
+static void __deregister_frame(void *p) {
static bool Searched = false;
static void((*df)(void *)) = 0;
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
index 53cb782c55c4..e26e6ce45db4 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
@@ -1,9 +1,8 @@
//===-- RuntimeDyld.cpp - Run-time dynamic linker for MC-JIT ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -13,7 +12,6 @@
#include "llvm/ExecutionEngine/RuntimeDyld.h"
#include "RuntimeDyldCOFF.h"
-#include "RuntimeDyldCheckerImpl.h"
#include "RuntimeDyldELF.h"
#include "RuntimeDyldImpl.h"
#include "RuntimeDyldMachO.h"
@@ -376,10 +374,55 @@ RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) {
else
return IOrErr.takeError();
- // If there is an attached checker, notify it about the stubs for this
- // section so that they can be verified.
- if (Checker)
- Checker->registerStubMap(Obj.getFileName(), SectionID, Stubs);
+ // If there is a NotifyStubEmitted callback set, call it to register any
+ // stubs created for this section.
+ if (NotifyStubEmitted) {
+ StringRef FileName = Obj.getFileName();
+ StringRef SectionName = Sections[SectionID].getName();
+ for (auto &KV : Stubs) {
+
+ auto &VR = KV.first;
+ uint64_t StubAddr = KV.second;
+
+ // If this is a named stub, just call NotifyStubEmitted.
+ if (VR.SymbolName) {
+ NotifyStubEmitted(FileName, SectionName, VR.SymbolName, SectionID,
+ StubAddr);
+ continue;
+ }
+
+ // Otherwise we will have to try a reverse lookup on the globla symbol table.
+ for (auto &GSTMapEntry : GlobalSymbolTable) {
+ StringRef SymbolName = GSTMapEntry.first();
+ auto &GSTEntry = GSTMapEntry.second;
+ if (GSTEntry.getSectionID() == VR.SectionID &&
+ GSTEntry.getOffset() == VR.Offset) {
+ NotifyStubEmitted(FileName, SectionName, SymbolName, SectionID,
+ StubAddr);
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ // Process remaining sections
+ if (ProcessAllSections) {
+ LLVM_DEBUG(dbgs() << "Process remaining sections:\n");
+ for (section_iterator SI = Obj.section_begin(), SE = Obj.section_end();
+ SI != SE; ++SI) {
+
+ /* Ignore already loaded sections */
+ if (LocalSections.find(*SI) != LocalSections.end())
+ continue;
+
+ bool IsCode = SI->isText();
+ if (auto SectionIDOrErr =
+ findOrEmitSection(Obj, *SI, IsCode, LocalSections))
+ LLVM_DEBUG(dbgs() << "\tSectionID: " << (*SectionIDOrErr) << "\n");
+ else
+ return SectionIDOrErr.takeError();
+ }
}
// Give the subclasses a chance to tie-up any loose ends.
@@ -497,7 +540,14 @@ Error RuntimeDyldImpl::computeTotalAllocSize(const ObjectFile &Obj,
return errorCodeToError(EC);
uint64_t StubBufSize = computeSectionStubBufSize(Obj, Section);
- uint64_t SectionSize = DataSize + StubBufSize;
+
+ uint64_t PaddingSize = 0;
+ if (Name == ".eh_frame")
+ PaddingSize += 4;
+ if (StubBufSize != 0)
+ PaddingSize += getStubAlignment() - 1;
+
+ uint64_t SectionSize = DataSize + PaddingSize + StubBufSize;
// The .eh_frame section (at least on Linux) needs an extra four bytes
// padded
@@ -703,9 +753,6 @@ Error RuntimeDyldImpl::emitCommonSymbols(const ObjectFile &Obj,
Addr += Size;
}
- if (Checker)
- Checker->registerSection(Obj.getFileName(), SectionID);
-
return Error::success();
}
@@ -725,6 +772,11 @@ RuntimeDyldImpl::emitSection(const ObjectFile &Obj,
bool IsReadOnly = isReadOnlyData(Section);
uint64_t DataSize = Section.getSize();
+ // An alignment of 0 (at least with ELF) is identical to an alignment of 1,
+ // while being more "polite". Other formats do not support 0-aligned sections
+ // anyway, so we should guarantee that the alignment is always at least 1.
+ Alignment = std::max(1u, Alignment);
+
StringRef Name;
if (auto EC = Section.getName(Name))
return errorCodeToError(EC);
@@ -747,18 +799,19 @@ RuntimeDyldImpl::emitSection(const ObjectFile &Obj,
if (!IsVirtual && !IsZeroInit) {
// In either case, set the location of the unrelocated section in memory,
// since we still process relocations for it even if we're not applying them.
- if (auto EC = Section.getContents(data))
- return errorCodeToError(EC);
+ if (Expected<StringRef> E = Section.getContents())
+ data = *E;
+ else
+ return E.takeError();
pData = data.data();
}
- // Code section alignment needs to be at least as high as stub alignment or
- // padding calculations may by incorrect when the section is remapped to a
- // higher alignment.
- if (IsCode) {
+ // If there are any stubs then the section alignment needs to be at least as
+ // high as stub alignment or padding calculations may by incorrect when the
+ // section is remapped.
+ if (StubBufSize != 0) {
Alignment = std::max(Alignment, getStubAlignment());
- if (StubBufSize > 0)
- PaddingSize += getStubAlignment() - 1;
+ PaddingSize += getStubAlignment() - 1;
}
// Some sections, such as debug info, don't need to be loaded for execution.
@@ -789,7 +842,7 @@ RuntimeDyldImpl::emitSection(const ObjectFile &Obj,
// Align DataSize to stub alignment if we have any stubs (PaddingSize will
// have been increased above to account for this).
if (StubBufSize > 0)
- DataSize &= ~(getStubAlignment() - 1);
+ DataSize &= -(uint64_t)getStubAlignment();
}
LLVM_DEBUG(dbgs() << "emitSection SectionID: " << SectionID << " Name: "
@@ -817,9 +870,6 @@ RuntimeDyldImpl::emitSection(const ObjectFile &Obj,
if (!IsRequired)
Sections.back().setLoadAddress(0);
- if (Checker)
- Checker->registerSection(Obj.getFileName(), SectionID);
-
return SectionID;
}
@@ -1202,42 +1252,43 @@ RuntimeDyld::RuntimeDyld(RuntimeDyld::MemoryManager &MemMgr,
// permissions are applied.
Dyld = nullptr;
ProcessAllSections = false;
- Checker = nullptr;
}
RuntimeDyld::~RuntimeDyld() {}
static std::unique_ptr<RuntimeDyldCOFF>
-createRuntimeDyldCOFF(Triple::ArchType Arch, RuntimeDyld::MemoryManager &MM,
- JITSymbolResolver &Resolver, bool ProcessAllSections,
- RuntimeDyldCheckerImpl *Checker) {
+createRuntimeDyldCOFF(
+ Triple::ArchType Arch, RuntimeDyld::MemoryManager &MM,
+ JITSymbolResolver &Resolver, bool ProcessAllSections,
+ RuntimeDyld::NotifyStubEmittedFunction NotifyStubEmitted) {
std::unique_ptr<RuntimeDyldCOFF> Dyld =
RuntimeDyldCOFF::create(Arch, MM, Resolver);
Dyld->setProcessAllSections(ProcessAllSections);
- Dyld->setRuntimeDyldChecker(Checker);
+ Dyld->setNotifyStubEmitted(std::move(NotifyStubEmitted));
return Dyld;
}
static std::unique_ptr<RuntimeDyldELF>
createRuntimeDyldELF(Triple::ArchType Arch, RuntimeDyld::MemoryManager &MM,
JITSymbolResolver &Resolver, bool ProcessAllSections,
- RuntimeDyldCheckerImpl *Checker) {
+ RuntimeDyld::NotifyStubEmittedFunction NotifyStubEmitted) {
std::unique_ptr<RuntimeDyldELF> Dyld =
RuntimeDyldELF::create(Arch, MM, Resolver);
Dyld->setProcessAllSections(ProcessAllSections);
- Dyld->setRuntimeDyldChecker(Checker);
+ Dyld->setNotifyStubEmitted(std::move(NotifyStubEmitted));
return Dyld;
}
static std::unique_ptr<RuntimeDyldMachO>
-createRuntimeDyldMachO(Triple::ArchType Arch, RuntimeDyld::MemoryManager &MM,
- JITSymbolResolver &Resolver,
- bool ProcessAllSections,
- RuntimeDyldCheckerImpl *Checker) {
+createRuntimeDyldMachO(
+ Triple::ArchType Arch, RuntimeDyld::MemoryManager &MM,
+ JITSymbolResolver &Resolver,
+ bool ProcessAllSections,
+ RuntimeDyld::NotifyStubEmittedFunction NotifyStubEmitted) {
std::unique_ptr<RuntimeDyldMachO> Dyld =
RuntimeDyldMachO::create(Arch, MM, Resolver);
Dyld->setProcessAllSections(ProcessAllSections);
- Dyld->setRuntimeDyldChecker(Checker);
+ Dyld->setNotifyStubEmitted(std::move(NotifyStubEmitted));
return Dyld;
}
@@ -1247,15 +1298,16 @@ RuntimeDyld::loadObject(const ObjectFile &Obj) {
if (Obj.isELF())
Dyld =
createRuntimeDyldELF(static_cast<Triple::ArchType>(Obj.getArch()),
- MemMgr, Resolver, ProcessAllSections, Checker);
+ MemMgr, Resolver, ProcessAllSections,
+ std::move(NotifyStubEmitted));
else if (Obj.isMachO())
Dyld = createRuntimeDyldMachO(
static_cast<Triple::ArchType>(Obj.getArch()), MemMgr, Resolver,
- ProcessAllSections, Checker);
+ ProcessAllSections, std::move(NotifyStubEmitted));
else if (Obj.isCOFF())
Dyld = createRuntimeDyldCOFF(
static_cast<Triple::ArchType>(Obj.getArch()), MemMgr, Resolver,
- ProcessAllSections, Checker);
+ ProcessAllSections, std::move(NotifyStubEmitted));
else
report_fatal_error("Incompatible object format!");
}
@@ -1274,6 +1326,11 @@ void *RuntimeDyld::getSymbolLocalAddress(StringRef Name) const {
return Dyld->getSymbolLocalAddress(Name);
}
+unsigned RuntimeDyld::getSymbolSectionID(StringRef Name) const {
+ assert(Dyld && "No RuntimeDyld instance attached");
+ return Dyld->getSymbolSectionID(Name);
+}
+
JITEvaluatedSymbol RuntimeDyld::getSymbol(StringRef Name) const {
if (!Dyld)
return nullptr;
@@ -1312,6 +1369,16 @@ void RuntimeDyld::finalizeWithMemoryManagerLocking() {
}
}
+StringRef RuntimeDyld::getSectionContent(unsigned SectionID) const {
+ assert(Dyld && "No Dyld instance attached");
+ return Dyld->getSectionContent(SectionID);
+}
+
+uint64_t RuntimeDyld::getSectionLoadAddress(unsigned SectionID) const {
+ assert(Dyld && "No Dyld instance attached");
+ return Dyld->getSectionLoadAddress(SectionID);
+}
+
void RuntimeDyld::registerEHFrames() {
if (Dyld)
Dyld->registerEHFrames();
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp
index 340ddaab186d..d4e3b0ba7670 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp
@@ -1,9 +1,8 @@
//===-- RuntimeDyldCOFF.cpp - Run-time dynamic linker for MC-JIT -*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.h
index 729a358fa0ea..4efd18a2e6c5 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.h
@@ -1,9 +1,8 @@
//===-- RuntimeDyldCOFF.h - Run-time dynamic linker for MC-JIT ---*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
index 6eb6256080ff..ec31ea4e573c 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
@@ -1,23 +1,21 @@
//===--- RuntimeDyldChecker.cpp - RuntimeDyld tester framework --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "llvm/ExecutionEngine/RuntimeDyldChecker.h"
#include "RuntimeDyldCheckerImpl.h"
-#include "RuntimeDyldImpl.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/Support/Endian.h"
#include "llvm/Support/MSVCErrorWorkarounds.h"
#include "llvm/Support/Path.h"
#include <cctype>
-#include <future>
#include <memory>
#include <utility>
@@ -321,22 +319,22 @@ private:
return std::make_pair(EvalResult(NextPC), RemainingExpr);
}
- // Evaluate a call to stub_addr.
+ // Evaluate a call to stub_addr/got_addr.
// Look up and return the address of the stub for the given
// (<file name>, <section name>, <symbol name>) tuple.
// On success, returns a pair containing the stub address, plus the expression
// remaining to be evaluated.
- std::pair<EvalResult, StringRef> evalStubAddr(StringRef Expr,
- ParseContext PCtx) const {
+ std::pair<EvalResult, StringRef>
+ evalStubOrGOTAddr(StringRef Expr, ParseContext PCtx, bool IsStubAddr) const {
if (!Expr.startswith("("))
return std::make_pair(unexpectedToken(Expr, Expr, "expected '('"), "");
StringRef RemainingExpr = Expr.substr(1).ltrim();
// Handle file-name specially, as it may contain characters that aren't
// legal for symbols.
- StringRef FileName;
+ StringRef StubContainerName;
size_t ComaIdx = RemainingExpr.find(',');
- FileName = RemainingExpr.substr(0, ComaIdx).rtrim();
+ StubContainerName = RemainingExpr.substr(0, ComaIdx).rtrim();
RemainingExpr = RemainingExpr.substr(ComaIdx).ltrim();
if (!RemainingExpr.startswith(","))
@@ -344,14 +342,6 @@ private:
unexpectedToken(RemainingExpr, Expr, "expected ','"), "");
RemainingExpr = RemainingExpr.substr(1).ltrim();
- StringRef SectionName;
- std::tie(SectionName, RemainingExpr) = parseSymbol(RemainingExpr);
-
- if (!RemainingExpr.startswith(","))
- return std::make_pair(
- unexpectedToken(RemainingExpr, Expr, "expected ','"), "");
- RemainingExpr = RemainingExpr.substr(1).ltrim();
-
StringRef Symbol;
std::tie(Symbol, RemainingExpr) = parseSymbol(RemainingExpr);
@@ -362,8 +352,8 @@ private:
uint64_t StubAddr;
std::string ErrorMsg = "";
- std::tie(StubAddr, ErrorMsg) = Checker.getStubAddrFor(
- FileName, SectionName, Symbol, PCtx.IsInsideLoad);
+ std::tie(StubAddr, ErrorMsg) = Checker.getStubOrGOTAddrFor(
+ StubContainerName, Symbol, PCtx.IsInsideLoad, IsStubAddr);
if (ErrorMsg != "")
return std::make_pair(EvalResult(ErrorMsg), "");
@@ -423,7 +413,9 @@ private:
else if (Symbol == "next_pc")
return evalNextPC(RemainingExpr, PCtx);
else if (Symbol == "stub_addr")
- return evalStubAddr(RemainingExpr, PCtx);
+ return evalStubOrGOTAddr(RemainingExpr, PCtx, true);
+ else if (Symbol == "got_addr")
+ return evalStubOrGOTAddr(RemainingExpr, PCtx, false);
else if (Symbol == "section_addr")
return evalSectionAddr(RemainingExpr, PCtx);
@@ -534,6 +526,11 @@ private:
uint64_t LoadAddr = LoadAddrExprResult.getValue();
+ // If there is no error but the content pointer is null then this is a
+ // zero-fill symbol/section.
+ if (LoadAddr == 0)
+ return std::make_pair(0, RemainingExpr);
+
return std::make_pair(
EvalResult(Checker.readMemoryAtAddr(LoadAddr, ReadSize)),
RemainingExpr);
@@ -666,27 +663,29 @@ private:
bool decodeInst(StringRef Symbol, MCInst &Inst, uint64_t &Size) const {
MCDisassembler *Dis = Checker.Disassembler;
- StringRef SectionMem = Checker.getSubsectionStartingAt(Symbol);
- ArrayRef<uint8_t> SectionBytes(
- reinterpret_cast<const uint8_t *>(SectionMem.data()),
- SectionMem.size());
+ StringRef SymbolMem = Checker.getSymbolContent(Symbol);
+ ArrayRef<uint8_t> SymbolBytes(SymbolMem.bytes_begin(), SymbolMem.size());
MCDisassembler::DecodeStatus S =
- Dis->getInstruction(Inst, Size, SectionBytes, 0, nulls(), nulls());
+ Dis->getInstruction(Inst, Size, SymbolBytes, 0, nulls(), nulls());
return (S == MCDisassembler::Success);
}
};
}
-RuntimeDyldCheckerImpl::RuntimeDyldCheckerImpl(RuntimeDyld &RTDyld,
- MCDisassembler *Disassembler,
- MCInstPrinter *InstPrinter,
- raw_ostream &ErrStream)
- : RTDyld(RTDyld), Disassembler(Disassembler), InstPrinter(InstPrinter),
- ErrStream(ErrStream) {
- RTDyld.Checker = this;
-}
+RuntimeDyldCheckerImpl::RuntimeDyldCheckerImpl(
+ IsSymbolValidFunction IsSymbolValid, GetSymbolInfoFunction GetSymbolInfo,
+ GetSectionInfoFunction GetSectionInfo, GetStubInfoFunction GetStubInfo,
+ GetGOTInfoFunction GetGOTInfo, support::endianness Endianness,
+ MCDisassembler *Disassembler, MCInstPrinter *InstPrinter,
+ raw_ostream &ErrStream)
+ : IsSymbolValid(std::move(IsSymbolValid)),
+ GetSymbolInfo(std::move(GetSymbolInfo)),
+ GetSectionInfo(std::move(GetSectionInfo)),
+ GetStubInfo(std::move(GetStubInfo)), GetGOTInfo(std::move(GetGOTInfo)),
+ Endianness(Endianness), Disassembler(Disassembler),
+ InstPrinter(InstPrinter), ErrStream(ErrStream) {}
bool RuntimeDyldCheckerImpl::check(StringRef CheckExpr) const {
CheckExpr = CheckExpr.trim();
@@ -731,242 +730,134 @@ bool RuntimeDyldCheckerImpl::checkAllRulesInBuffer(StringRef RulePrefix,
return DidAllTestsPass && (NumRules != 0);
}
-Expected<JITSymbolResolver::LookupResult> RuntimeDyldCheckerImpl::lookup(
- const JITSymbolResolver::LookupSet &Symbols) const {
-
-#ifdef _MSC_VER
- using ExpectedLookupResult = MSVCPExpected<JITSymbolResolver::LookupResult>;
-#else
- using ExpectedLookupResult = Expected<JITSymbolResolver::LookupResult>;
-#endif
-
- auto ResultP = std::make_shared<std::promise<ExpectedLookupResult>>();
- auto ResultF = ResultP->get_future();
-
- getRTDyld().Resolver.lookup(
- Symbols, [=](Expected<JITSymbolResolver::LookupResult> Result) {
- ResultP->set_value(std::move(Result));
- });
- return ResultF.get();
-}
-
bool RuntimeDyldCheckerImpl::isSymbolValid(StringRef Symbol) const {
- if (getRTDyld().getSymbol(Symbol))
- return true;
- auto Result = lookup({Symbol});
+ return IsSymbolValid(Symbol);
+}
- if (!Result) {
- logAllUnhandledErrors(Result.takeError(), errs(), "RTDyldChecker: ");
- return false;
+uint64_t RuntimeDyldCheckerImpl::getSymbolLocalAddr(StringRef Symbol) const {
+ auto SymInfo = GetSymbolInfo(Symbol);
+ if (!SymInfo) {
+ logAllUnhandledErrors(SymInfo.takeError(), errs(), "RTDyldChecker: ");
+ return 0;
}
- assert(Result->count(Symbol) && "Missing symbol result");
- return true;
-}
+ if (SymInfo->isZeroFill())
+ return 0;
-uint64_t RuntimeDyldCheckerImpl::getSymbolLocalAddr(StringRef Symbol) const {
return static_cast<uint64_t>(
- reinterpret_cast<uintptr_t>(getRTDyld().getSymbolLocalAddress(Symbol)));
+ reinterpret_cast<uintptr_t>(SymInfo->getContent().data()));
}
uint64_t RuntimeDyldCheckerImpl::getSymbolRemoteAddr(StringRef Symbol) const {
- if (auto InternalSymbol = getRTDyld().getSymbol(Symbol))
- return InternalSymbol.getAddress();
-
- auto Result = lookup({Symbol});
- if (!Result) {
- logAllUnhandledErrors(Result.takeError(), errs(), "RTDyldChecker: ");
+ auto SymInfo = GetSymbolInfo(Symbol);
+ if (!SymInfo) {
+ logAllUnhandledErrors(SymInfo.takeError(), errs(), "RTDyldChecker: ");
return 0;
}
- auto I = Result->find(Symbol);
- assert(I != Result->end() && "Missing symbol result");
- return I->second.getAddress();
+
+ return SymInfo->getTargetAddress();
}
uint64_t RuntimeDyldCheckerImpl::readMemoryAtAddr(uint64_t SrcAddr,
unsigned Size) const {
uintptr_t PtrSizedAddr = static_cast<uintptr_t>(SrcAddr);
assert(PtrSizedAddr == SrcAddr && "Linker memory pointer out-of-range.");
- uint8_t *Src = reinterpret_cast<uint8_t*>(PtrSizedAddr);
- return getRTDyld().readBytesUnaligned(Src, Size);
+ void *Ptr = reinterpret_cast<void*>(PtrSizedAddr);
+
+ switch (Size) {
+ case 1:
+ return support::endian::read<uint8_t>(Ptr, Endianness);
+ case 2:
+ return support::endian::read<uint16_t>(Ptr, Endianness);
+ case 4:
+ return support::endian::read<uint32_t>(Ptr, Endianness);
+ case 8:
+ return support::endian::read<uint64_t>(Ptr, Endianness);
+ }
+ llvm_unreachable("Unsupported read size");
}
-
-std::pair<const RuntimeDyldCheckerImpl::SectionAddressInfo*, std::string>
-RuntimeDyldCheckerImpl::findSectionAddrInfo(StringRef FileName,
- StringRef SectionName) const {
-
- auto SectionMapItr = Stubs.find(FileName);
- if (SectionMapItr == Stubs.end()) {
- std::string ErrorMsg = "File '";
- ErrorMsg += FileName;
- ErrorMsg += "' not found. ";
- if (Stubs.empty())
- ErrorMsg += "No stubs registered.";
- else {
- ErrorMsg += "Available files are:";
- for (const auto& StubEntry : Stubs) {
- ErrorMsg += " '";
- ErrorMsg += StubEntry.first;
- ErrorMsg += "'";
- }
- }
- ErrorMsg += "\n";
- return std::make_pair(nullptr, ErrorMsg);
+StringRef RuntimeDyldCheckerImpl::getSymbolContent(StringRef Symbol) const {
+ auto SymInfo = GetSymbolInfo(Symbol);
+ if (!SymInfo) {
+ logAllUnhandledErrors(SymInfo.takeError(), errs(), "RTDyldChecker: ");
+ return StringRef();
}
-
- auto SectionInfoItr = SectionMapItr->second.find(SectionName);
- if (SectionInfoItr == SectionMapItr->second.end())
- return std::make_pair(nullptr,
- ("Section '" + SectionName + "' not found in file '" +
- FileName + "'\n").str());
-
- return std::make_pair(&SectionInfoItr->second, std::string(""));
+ return SymInfo->getContent();
}
std::pair<uint64_t, std::string> RuntimeDyldCheckerImpl::getSectionAddr(
StringRef FileName, StringRef SectionName, bool IsInsideLoad) const {
- const SectionAddressInfo *SectionInfo = nullptr;
- {
- std::string ErrorMsg;
- std::tie(SectionInfo, ErrorMsg) =
- findSectionAddrInfo(FileName, SectionName);
- if (ErrorMsg != "")
- return std::make_pair(0, ErrorMsg);
- }
-
- unsigned SectionID = SectionInfo->SectionID;
- uint64_t Addr;
- if (IsInsideLoad)
- Addr = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(
- getRTDyld().Sections[SectionID].getAddress()));
- else
- Addr = getRTDyld().Sections[SectionID].getLoadAddress();
-
- return std::make_pair(Addr, std::string(""));
-}
-
-std::pair<uint64_t, std::string> RuntimeDyldCheckerImpl::getStubAddrFor(
- StringRef FileName, StringRef SectionName, StringRef SymbolName,
- bool IsInsideLoad) const {
-
- const SectionAddressInfo *SectionInfo = nullptr;
- {
- std::string ErrorMsg;
- std::tie(SectionInfo, ErrorMsg) =
- findSectionAddrInfo(FileName, SectionName);
- if (ErrorMsg != "")
- return std::make_pair(0, ErrorMsg);
+ auto SecInfo = GetSectionInfo(FileName, SectionName);
+ if (!SecInfo) {
+ std::string ErrMsg;
+ {
+ raw_string_ostream ErrMsgStream(ErrMsg);
+ logAllUnhandledErrors(SecInfo.takeError(), ErrMsgStream,
+ "RTDyldChecker: ");
+ }
+ return std::make_pair(0, std::move(ErrMsg));
}
- unsigned SectionID = SectionInfo->SectionID;
- const StubOffsetsMap &SymbolStubs = SectionInfo->StubOffsets;
- auto StubOffsetItr = SymbolStubs.find(SymbolName);
- if (StubOffsetItr == SymbolStubs.end())
- return std::make_pair(0,
- ("Stub for symbol '" + SymbolName + "' not found. "
- "If '" + SymbolName + "' is an internal symbol this "
- "may indicate that the stub target offset is being "
- "computed incorrectly.\n").str());
+ // If this address is being looked up in "load" mode, return the content
+ // pointer, otherwise return the target address.
- uint64_t StubOffset = StubOffsetItr->second;
+ uint64_t Addr = 0;
- uint64_t Addr;
if (IsInsideLoad) {
- uintptr_t SectionBase = reinterpret_cast<uintptr_t>(
- getRTDyld().Sections[SectionID].getAddress());
- Addr = static_cast<uint64_t>(SectionBase) + StubOffset;
- } else {
- uint64_t SectionBase = getRTDyld().Sections[SectionID].getLoadAddress();
- Addr = SectionBase + StubOffset;
- }
-
- return std::make_pair(Addr, std::string(""));
-}
-
-StringRef
-RuntimeDyldCheckerImpl::getSubsectionStartingAt(StringRef Name) const {
- RTDyldSymbolTable::const_iterator pos =
- getRTDyld().GlobalSymbolTable.find(Name);
- if (pos == getRTDyld().GlobalSymbolTable.end())
- return StringRef();
- const auto &SymInfo = pos->second;
- uint8_t *SectionAddr = getRTDyld().getSectionAddress(SymInfo.getSectionID());
- return StringRef(reinterpret_cast<const char *>(SectionAddr) +
- SymInfo.getOffset(),
- getRTDyld().Sections[SymInfo.getSectionID()].getSize() -
- SymInfo.getOffset());
-}
-
-Optional<uint64_t>
-RuntimeDyldCheckerImpl::getSectionLoadAddress(void *LocalAddress) const {
- for (auto &S : getRTDyld().Sections) {
- if (S.getAddress() == LocalAddress)
- return S.getLoadAddress();
- }
- return Optional<uint64_t>();
-}
-
-void RuntimeDyldCheckerImpl::registerSection(
- StringRef FilePath, unsigned SectionID) {
- StringRef FileName = sys::path::filename(FilePath);
- const SectionEntry &Section = getRTDyld().Sections[SectionID];
- StringRef SectionName = Section.getName();
+ if (SecInfo->isZeroFill())
+ Addr = 0;
+ else
+ Addr = pointerToJITTargetAddress(SecInfo->getContent().data());
+ } else
+ Addr = SecInfo->getTargetAddress();
- Stubs[FileName][SectionName].SectionID = SectionID;
+ return std::make_pair(Addr, "");
}
-void RuntimeDyldCheckerImpl::registerStubMap(
- StringRef FilePath, unsigned SectionID,
- const RuntimeDyldImpl::StubMap &RTDyldStubs) {
- StringRef FileName = sys::path::filename(FilePath);
- const SectionEntry &Section = getRTDyld().Sections[SectionID];
- StringRef SectionName = Section.getName();
-
- Stubs[FileName][SectionName].SectionID = SectionID;
+std::pair<uint64_t, std::string> RuntimeDyldCheckerImpl::getStubOrGOTAddrFor(
+ StringRef StubContainerName, StringRef SymbolName, bool IsInsideLoad,
+ bool IsStubAddr) const {
- for (auto &StubMapEntry : RTDyldStubs) {
- std::string SymbolName = "";
+ auto StubInfo = IsStubAddr ? GetStubInfo(StubContainerName, SymbolName)
+ : GetGOTInfo(StubContainerName, SymbolName);
- if (StubMapEntry.first.SymbolName)
- SymbolName = StubMapEntry.first.SymbolName;
- else {
- // If this is a (Section, Offset) pair, do a reverse lookup in the
- // global symbol table to find the name.
- for (auto &GSTEntry : getRTDyld().GlobalSymbolTable) {
- const auto &SymInfo = GSTEntry.second;
- if (SymInfo.getSectionID() == StubMapEntry.first.SectionID &&
- SymInfo.getOffset() ==
- static_cast<uint64_t>(StubMapEntry.first.Offset)) {
- SymbolName = GSTEntry.first();
- break;
- }
- }
+ if (!StubInfo) {
+ std::string ErrMsg;
+ {
+ raw_string_ostream ErrMsgStream(ErrMsg);
+ logAllUnhandledErrors(StubInfo.takeError(), ErrMsgStream,
+ "RTDyldChecker: ");
}
-
- if (SymbolName != "")
- Stubs[FileName][SectionName].StubOffsets[SymbolName] =
- StubMapEntry.second;
+ return std::make_pair((uint64_t)0, std::move(ErrMsg));
}
-}
-RuntimeDyldChecker::RuntimeDyldChecker(RuntimeDyld &RTDyld,
- MCDisassembler *Disassembler,
- MCInstPrinter *InstPrinter,
- raw_ostream &ErrStream)
- : Impl(make_unique<RuntimeDyldCheckerImpl>(RTDyld, Disassembler,
- InstPrinter, ErrStream)) {}
+ uint64_t Addr = 0;
-RuntimeDyldChecker::~RuntimeDyldChecker() {}
+ if (IsInsideLoad) {
+ if (StubInfo->isZeroFill())
+ return std::make_pair((uint64_t)0, "Detected zero-filled stub/GOT entry");
+ Addr = pointerToJITTargetAddress(StubInfo->getContent().data());
+ } else
+ Addr = StubInfo->getTargetAddress();
-RuntimeDyld& RuntimeDyldChecker::getRTDyld() {
- return Impl->RTDyld;
+ return std::make_pair(Addr, "");
}
-const RuntimeDyld& RuntimeDyldChecker::getRTDyld() const {
- return Impl->RTDyld;
-}
+RuntimeDyldChecker::RuntimeDyldChecker(
+ IsSymbolValidFunction IsSymbolValid, GetSymbolInfoFunction GetSymbolInfo,
+ GetSectionInfoFunction GetSectionInfo, GetStubInfoFunction GetStubInfo,
+ GetGOTInfoFunction GetGOTInfo, support::endianness Endianness,
+ MCDisassembler *Disassembler, MCInstPrinter *InstPrinter,
+ raw_ostream &ErrStream)
+ : Impl(::llvm::make_unique<RuntimeDyldCheckerImpl>(
+ std::move(IsSymbolValid), std::move(GetSymbolInfo),
+ std::move(GetSectionInfo), std::move(GetStubInfo),
+ std::move(GetGOTInfo), Endianness, Disassembler, InstPrinter,
+ ErrStream)) {}
+
+RuntimeDyldChecker::~RuntimeDyldChecker() {}
bool RuntimeDyldChecker::check(StringRef CheckExpr) const {
return Impl->check(CheckExpr);
@@ -982,8 +873,3 @@ RuntimeDyldChecker::getSectionAddr(StringRef FileName, StringRef SectionName,
bool LocalAddress) {
return Impl->getSectionAddr(FileName, SectionName, LocalAddress);
}
-
-Optional<uint64_t>
-RuntimeDyldChecker::getSectionLoadAddress(void *LocalAddress) const {
- return Impl->getSectionLoadAddress(LocalAddress);
-}
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCheckerImpl.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCheckerImpl.h
index 6da1a68d06d6..ac9d4d460217 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCheckerImpl.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCheckerImpl.h
@@ -1,9 +1,8 @@
//===-- RuntimeDyldCheckerImpl.h -- RuntimeDyld test framework --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -16,14 +15,22 @@ namespace llvm {
class RuntimeDyldCheckerImpl {
friend class RuntimeDyldChecker;
- friend class RuntimeDyldImpl;
friend class RuntimeDyldCheckerExprEval;
- friend class RuntimeDyldELF;
+
+ using IsSymbolValidFunction =
+ RuntimeDyldChecker::IsSymbolValidFunction;
+ using GetSymbolInfoFunction = RuntimeDyldChecker::GetSymbolInfoFunction;
+ using GetSectionInfoFunction = RuntimeDyldChecker::GetSectionInfoFunction;
+ using GetStubInfoFunction = RuntimeDyldChecker::GetStubInfoFunction;
+ using GetGOTInfoFunction = RuntimeDyldChecker::GetGOTInfoFunction;
public:
- RuntimeDyldCheckerImpl(RuntimeDyld &RTDyld, MCDisassembler *Disassembler,
- MCInstPrinter *InstPrinter,
- llvm::raw_ostream &ErrStream);
+ RuntimeDyldCheckerImpl(
+ IsSymbolValidFunction IsSymbolValid, GetSymbolInfoFunction GetSymbolInfo,
+ GetSectionInfoFunction GetSectionInfo, GetStubInfoFunction GetStubInfo,
+ GetGOTInfoFunction GetGOTInfo, support::endianness Endianness,
+ MCDisassembler *Disassembler, MCInstPrinter *InstPrinter,
+ llvm::raw_ostream &ErrStream);
bool check(StringRef CheckExpr) const;
bool checkAllRulesInBuffer(StringRef RulePrefix, MemoryBuffer *MemBuf) const;
@@ -31,15 +38,6 @@ public:
private:
// StubMap typedefs.
- typedef std::map<std::string, uint64_t> StubOffsetsMap;
- struct SectionAddressInfo {
- uint64_t SectionID;
- StubOffsetsMap StubOffsets;
- };
- typedef std::map<std::string, SectionAddressInfo> SectionMap;
- typedef std::map<std::string, SectionMap> StubMap;
-
- RuntimeDyldImpl &getRTDyld() const { return *RTDyld.Dyld; }
Expected<JITSymbolResolver::LookupResult>
lookup(const JITSymbolResolver::LookupSet &Symbols) const;
@@ -49,32 +47,27 @@ private:
uint64_t getSymbolRemoteAddr(StringRef Symbol) const;
uint64_t readMemoryAtAddr(uint64_t Addr, unsigned Size) const;
- std::pair<const SectionAddressInfo*, std::string> findSectionAddrInfo(
- StringRef FileName,
- StringRef SectionName) const;
+ StringRef getSymbolContent(StringRef Symbol) const;
std::pair<uint64_t, std::string> getSectionAddr(StringRef FileName,
StringRef SectionName,
bool IsInsideLoad) const;
- std::pair<uint64_t, std::string> getStubAddrFor(StringRef FileName,
- StringRef SectionName,
- StringRef Symbol,
- bool IsInsideLoad) const;
- StringRef getSubsectionStartingAt(StringRef Name) const;
+ std::pair<uint64_t, std::string>
+ getStubOrGOTAddrFor(StringRef StubContainerName, StringRef Symbol,
+ bool IsInsideLoad, bool IsStubAddr) const;
Optional<uint64_t> getSectionLoadAddress(void *LocalAddr) const;
- void registerSection(StringRef FilePath, unsigned SectionID);
- void registerStubMap(StringRef FilePath, unsigned SectionID,
- const RuntimeDyldImpl::StubMap &RTDyldStubs);
-
- RuntimeDyld &RTDyld;
+ IsSymbolValidFunction IsSymbolValid;
+ GetSymbolInfoFunction GetSymbolInfo;
+ GetSectionInfoFunction GetSectionInfo;
+ GetStubInfoFunction GetStubInfo;
+ GetGOTInfoFunction GetGOTInfo;
+ support::endianness Endianness;
MCDisassembler *Disassembler;
MCInstPrinter *InstPrinter;
llvm::raw_ostream &ErrStream;
-
- StubMap Stubs;
};
}
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
index 226ee715e18b..60041a45e2b8 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
@@ -1,9 +1,8 @@
//===-- RuntimeDyldELF.cpp - Run-time dynamic linker for MC-JIT -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -1857,9 +1856,6 @@ Error RuntimeDyldELF::finalizeLoad(const ObjectFile &Obj,
Sections[GOTSectionID] =
SectionEntry(".got", Addr, TotalSize, TotalSize, 0);
- if (Checker)
- Checker->registerSection(Obj.getFileName(), GOTSectionID);
-
// For now, initialize all GOT entries to zero. We'll fill them in as
// needed when GOT-based relocations are applied.
memset(Addr, 0, TotalSize);
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
index f37bd0bbaea6..ef0784e2273b 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
@@ -1,9 +1,8 @@
//===-- RuntimeDyldELF.h - Run-time dynamic linker for MC-JIT ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -61,7 +60,7 @@ class RuntimeDyldELF : public RuntimeDyldImpl {
void resolveBPFRelocation(const SectionEntry &Section, uint64_t Offset,
uint64_t Value, uint32_t Type, int64_t Addend);
- unsigned getMaxStubSize() override {
+ unsigned getMaxStubSize() const override {
if (Arch == Triple::aarch64 || Arch == Triple::aarch64_be)
return 20; // movz; movk; movk; movk; br
if (Arch == Triple::arm || Arch == Triple::thumb)
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
index 4c650e09ac1f..68b3468fbc9d 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
@@ -1,9 +1,8 @@
//===-- RuntimeDyldImpl.h - Run-time dynamic linker for MC-JIT --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -241,7 +240,6 @@ typedef StringMap<SymbolTableEntry> RTDyldSymbolTable;
class RuntimeDyldImpl {
friend class RuntimeDyld::LoadedObjectInfo;
- friend class RuntimeDyldCheckerImpl;
protected:
static const unsigned AbsoluteSymbolSection = ~0U;
@@ -251,9 +249,6 @@ protected:
// The symbol resolver to use for external symbols.
JITSymbolResolver &Resolver;
- // Attached RuntimeDyldChecker instance. Null if no instance attached.
- RuntimeDyldCheckerImpl *Checker;
-
// A list of all sections emitted by the dynamic linker. These sections are
// referenced in the code by means of their index in this list - SectionID.
typedef SmallVector<SectionEntry, 64> SectionList;
@@ -313,20 +308,16 @@ protected:
// the end of the list while the list is being processed.
sys::Mutex lock;
- virtual unsigned getMaxStubSize() = 0;
+ using NotifyStubEmittedFunction =
+ RuntimeDyld::NotifyStubEmittedFunction;
+ NotifyStubEmittedFunction NotifyStubEmitted;
+
+ virtual unsigned getMaxStubSize() const = 0;
virtual unsigned getStubAlignment() = 0;
bool HasError;
std::string ErrorStr;
- uint64_t getSectionLoadAddress(unsigned SectionID) const {
- return Sections[SectionID].getLoadAddress();
- }
-
- uint8_t *getSectionAddress(unsigned SectionID) const {
- return Sections[SectionID].getAddress();
- }
-
void writeInt16BE(uint8_t *Addr, uint16_t Value) {
if (IsTargetLittleEndian)
sys::swapByteOrder(Value);
@@ -472,7 +463,7 @@ protected:
public:
RuntimeDyldImpl(RuntimeDyld::MemoryManager &MemMgr,
JITSymbolResolver &Resolver)
- : MemMgr(MemMgr), Resolver(Resolver), Checker(nullptr),
+ : MemMgr(MemMgr), Resolver(Resolver),
ProcessAllSections(false), HasError(false) {
}
@@ -482,13 +473,22 @@ public:
this->ProcessAllSections = ProcessAllSections;
}
- void setRuntimeDyldChecker(RuntimeDyldCheckerImpl *Checker) {
- this->Checker = Checker;
- }
-
virtual std::unique_ptr<RuntimeDyld::LoadedObjectInfo>
loadObject(const object::ObjectFile &Obj) = 0;
+ uint64_t getSectionLoadAddress(unsigned SectionID) const {
+ return Sections[SectionID].getLoadAddress();
+ }
+
+ uint8_t *getSectionAddress(unsigned SectionID) const {
+ return Sections[SectionID].getAddress();
+ }
+
+ StringRef getSectionContent(unsigned SectionID) const {
+ return StringRef(reinterpret_cast<char *>(Sections[SectionID].getAddress()),
+ Sections[SectionID].getStubOffset() + getMaxStubSize());
+ }
+
uint8_t* getSymbolLocalAddress(StringRef Name) const {
// FIXME: Just look up as a function for now. Overly simple of course.
// Work in progress.
@@ -502,6 +502,13 @@ public:
return getSectionAddress(SymInfo.getSectionID()) + SymInfo.getOffset();
}
+ unsigned getSymbolSectionID(StringRef Name) const {
+ auto GSTItr = GlobalSymbolTable.find(Name);
+ if (GSTItr == GlobalSymbolTable.end())
+ return ~0U;
+ return GSTItr->second.getSectionID();
+ }
+
JITEvaluatedSymbol getSymbol(StringRef Name) const {
// FIXME: Just look up as a function for now. Overly simple of course.
// Work in progress.
@@ -560,6 +567,10 @@ public:
virtual bool isCompatibleFile(const ObjectFile &Obj) const = 0;
+ void setNotifyStubEmitted(NotifyStubEmittedFunction NotifyStubEmitted) {
+ this->NotifyStubEmitted = std::move(NotifyStubEmitted);
+ }
+
virtual void registerEHFrames();
void deregisterEHFrames();
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
index d47fcd45be88..202c3ca1c507 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
@@ -1,9 +1,8 @@
//===-- RuntimeDyldMachO.cpp - Run-time dynamic linker for MC-JIT -*- C++ -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
index d71ca4e54953..650e7b79fbb8 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
@@ -1,9 +1,8 @@
//===-- RuntimeDyldMachO.h - Run-time dynamic linker for MC-JIT ---*- C++ -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFI386.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFI386.h
index dd65051edad7..40910bea0c36 100644
--- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFI386.h
+++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFI386.h
@@ -1,9 +1,8 @@
//===--- RuntimeDyldCOFFI386.h --- COFF/X86_64 specific code ---*- C++ --*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -28,16 +27,16 @@ public:
JITSymbolResolver &Resolver)
: RuntimeDyldCOFF(MM, Resolver) {}
- unsigned getMaxStubSize() override {
+ unsigned getMaxStubSize() const override {
return 8; // 2-byte jmp instruction + 32-bit relative address + 2 byte pad
}
unsigned getStubAlignment() override { return 1; }
- Expected<relocation_iterator>
+ Expected<object::relocation_iterator>
processRelocationRef(unsigned SectionID,
- relocation_iterator RelI,
- const ObjectFile &Obj,
+ object::relocation_iterator RelI,
+ const object::ObjectFile &Obj,
ObjSectionToIDMap &ObjSectionToID,
StubMap &Stubs) override {
diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFThumb.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFThumb.h
index 8723dd0fd0ea..bb2e9626e0b0 100644
--- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFThumb.h
+++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFThumb.h
@@ -1,9 +1,8 @@
//===--- RuntimeDyldCOFFThumb.h --- COFF/Thumb specific code ---*- C++ --*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -22,9 +21,10 @@
namespace llvm {
-static bool isThumbFunc(symbol_iterator Symbol, const ObjectFile &Obj,
- section_iterator Section) {
- Expected<SymbolRef::Type> SymTypeOrErr = Symbol->getType();
+static bool isThumbFunc(object::symbol_iterator Symbol,
+ const object::ObjectFile &Obj,
+ object::section_iterator Section) {
+ Expected<object::SymbolRef::Type> SymTypeOrErr = Symbol->getType();
if (!SymTypeOrErr) {
std::string Buf;
raw_string_ostream OS(Buf);
@@ -33,12 +33,14 @@ static bool isThumbFunc(symbol_iterator Symbol, const ObjectFile &Obj,
report_fatal_error(Buf);
}
- if (*SymTypeOrErr != SymbolRef::ST_Function)
+ if (*SymTypeOrErr != object::SymbolRef::ST_Function)
return false;
// We check the IMAGE_SCN_MEM_16BIT flag in the section of the symbol to tell
// if it's thumb or not
- return cast<COFFObjectFile>(Obj).getCOFFSection(*Section)->Characteristics &
+ return cast<object::COFFObjectFile>(Obj)
+ .getCOFFSection(*Section)
+ ->Characteristics &
COFF::IMAGE_SCN_MEM_16BIT;
}
@@ -48,16 +50,16 @@ public:
JITSymbolResolver &Resolver)
: RuntimeDyldCOFF(MM, Resolver) {}
- unsigned getMaxStubSize() override {
+ unsigned getMaxStubSize() const override {
return 16; // 8-byte load instructions, 4-byte jump, 4-byte padding
}
unsigned getStubAlignment() override { return 1; }
- Expected<relocation_iterator>
+ Expected<object::relocation_iterator>
processRelocationRef(unsigned SectionID,
- relocation_iterator RelI,
- const ObjectFile &Obj,
+ object::relocation_iterator RelI,
+ const object::ObjectFile &Obj,
ObjSectionToIDMap &ObjSectionToID,
StubMap &Stubs) override {
auto Symbol = RelI->getSymbol();
diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h
index aee5f6dc3746..d2d74534cf90 100644
--- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h
+++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h
@@ -1,9 +1,8 @@
//===-- RuntimeDyldCOFFX86_64.h --- COFF/X86_64 specific code ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -62,7 +61,7 @@ public:
unsigned getStubAlignment() override { return 1; }
// 2-byte jmp instruction + 32-bit relative address + 64-bit absolute jump
- unsigned getMaxStubSize() override { return 14; }
+ unsigned getMaxStubSize() const override { return 14; }
// The target location for the relocation is described by RE.SectionID and
// RE.Offset. RE.SectionID can be used to find the SectionEntry. Each
@@ -187,21 +186,21 @@ public:
return std::make_tuple(Offset, RelType, Addend);
}
- Expected<relocation_iterator>
+ Expected<object::relocation_iterator>
processRelocationRef(unsigned SectionID,
- relocation_iterator RelI,
- const ObjectFile &Obj,
+ object::relocation_iterator RelI,
+ const object::ObjectFile &Obj,
ObjSectionToIDMap &ObjSectionToID,
StubMap &Stubs) override {
// If possible, find the symbol referred to in the relocation,
// and the section that contains it.
- symbol_iterator Symbol = RelI->getSymbol();
+ object::symbol_iterator Symbol = RelI->getSymbol();
if (Symbol == Obj.symbol_end())
report_fatal_error("Unknown symbol in relocation");
auto SectionOrError = Symbol->getSection();
if (!SectionOrError)
return SectionOrError.takeError();
- section_iterator SecI = *SectionOrError;
+ object::section_iterator SecI = *SectionOrError;
// If there is no section, this must be an external reference.
const bool IsExtern = SecI == Obj.section_end();
@@ -280,11 +279,11 @@ public:
UnregisteredEHFrameSections.clear();
}
- Error finalizeLoad(const ObjectFile &Obj,
+ Error finalizeLoad(const object::ObjectFile &Obj,
ObjSectionToIDMap &SectionMap) override {
// Look for and record the EH frame section IDs.
for (const auto &SectionPair : SectionMap) {
- const SectionRef &Section = SectionPair.first;
+ const object::SectionRef &Section = SectionPair.first;
StringRef Name;
if (auto EC = Section.getName(Name))
return errorCodeToError(EC);
diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFMips.cpp b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFMips.cpp
index 3a166b40af2d..17cbe612fb43 100644
--- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFMips.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFMips.cpp
@@ -1,9 +1,8 @@
//===-- RuntimeDyldELFMips.cpp ---- ELF/Mips specific code. -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFMips.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFMips.h
index f53b9e6bd75a..14fb36f070f8 100644
--- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFMips.h
+++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFMips.h
@@ -1,9 +1,8 @@
//===-- RuntimeDyldELFMips.h ---- ELF/Mips specific code. -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h
index 2a619c549cfa..f2ee1b06d494 100644
--- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h
+++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h
@@ -1,9 +1,8 @@
//===-- RuntimeDyldMachOAArch64.h -- MachO/AArch64 specific code. -*- C++ -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -27,7 +26,7 @@ public:
JITSymbolResolver &Resolver)
: RuntimeDyldMachOCRTPBase(MM, Resolver) {}
- unsigned getMaxStubSize() override { return 8; }
+ unsigned getMaxStubSize() const override { return 8; }
unsigned getStubAlignment() override { return 8; }
diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h
index ab7cd2bdae15..3bec8b979f7d 100644
--- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h
+++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h
@@ -1,9 +1,8 @@
//===----- RuntimeDyldMachOARM.h ---- MachO/ARM specific code. ----*- C++ -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -30,7 +29,7 @@ public:
JITSymbolResolver &Resolver)
: RuntimeDyldMachOCRTPBase(MM, Resolver) {}
- unsigned getMaxStubSize() override { return 8; }
+ unsigned getMaxStubSize() const override { return 8; }
unsigned getStubAlignment() override { return 4; }
@@ -225,7 +224,7 @@ public:
HighInsn = (HighInsn & 0xf800) | ((Value >> 12) & 0x7ff);
uint16_t LowInsn = readBytesUnaligned(LocalAddress + 2, 2);
- assert((LowInsn & 0xf800) != 0xf8000 &&
+ assert((LowInsn & 0xf800) == 0xf800 &&
"Unrecognized thumb branch encoding (BR22 low bits)");
LowInsn = (LowInsn & 0xf800) | ((Value >> 1) & 0x7ff);
diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h
index d384d70b8b0f..f0de27ba14bb 100644
--- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h
+++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h
@@ -1,9 +1,8 @@
//===---- RuntimeDyldMachOI386.h ---- MachO/I386 specific code. ---*- C++ -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -27,7 +26,7 @@ public:
JITSymbolResolver &Resolver)
: RuntimeDyldMachOCRTPBase(MM, Resolver) {}
- unsigned getMaxStubSize() override { return 0; }
+ unsigned getMaxStubSize() const override { return 0; }
unsigned getStubAlignment() override { return 1; }
diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h
index 9732ea6a0cd2..28febbdb948c 100644
--- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h
+++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h
@@ -1,9 +1,8 @@
//===-- RuntimeDyldMachOX86_64.h ---- MachO/X86_64 specific code. -*- C++ -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -27,9 +26,9 @@ public:
JITSymbolResolver &Resolver)
: RuntimeDyldMachOCRTPBase(MM, Resolver) {}
- unsigned getMaxStubSize() override { return 8; }
+ unsigned getMaxStubSize() const override { return 8; }
- unsigned getStubAlignment() override { return 1; }
+ unsigned getStubAlignment() override { return 8; }
Expected<relocation_iterator>
processRelocationRef(unsigned SectionID, relocation_iterator RelI,
diff --git a/lib/ExecutionEngine/SectionMemoryManager.cpp b/lib/ExecutionEngine/SectionMemoryManager.cpp
index 05ab4a074e37..925049b2a1b4 100644
--- a/lib/ExecutionEngine/SectionMemoryManager.cpp
+++ b/lib/ExecutionEngine/SectionMemoryManager.cpp
@@ -1,9 +1,8 @@
//===- SectionMemoryManager.cpp - Memory manager for MCJIT/RtDyld *- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -65,9 +64,9 @@ uint8_t *SectionMemoryManager::allocateSection(
// Look in the list of free memory regions and use a block there if one
// is available.
for (FreeMemBlock &FreeMB : MemGroup.FreeMem) {
- if (FreeMB.Free.size() >= RequiredSize) {
+ if (FreeMB.Free.allocatedSize() >= RequiredSize) {
Addr = (uintptr_t)FreeMB.Free.base();
- uintptr_t EndOfBlock = Addr + FreeMB.Free.size();
+ uintptr_t EndOfBlock = Addr + FreeMB.Free.allocatedSize();
// Align the address.
Addr = (Addr + Alignment - 1) & ~(uintptr_t)(Alignment - 1);
@@ -116,7 +115,7 @@ uint8_t *SectionMemoryManager::allocateSection(
// Remember that we allocated this memory
MemGroup.AllocatedMem.push_back(MB);
Addr = (uintptr_t)MB.base();
- uintptr_t EndOfBlock = Addr + MB.size();
+ uintptr_t EndOfBlock = Addr + MB.allocatedSize();
// Align the address.
Addr = (Addr + Alignment - 1) & ~(uintptr_t)(Alignment - 1);
@@ -173,12 +172,12 @@ bool SectionMemoryManager::finalizeMemory(std::string *ErrMsg) {
}
static sys::MemoryBlock trimBlockToPageSize(sys::MemoryBlock M) {
- static const size_t PageSize = sys::Process::getPageSize();
+ static const size_t PageSize = sys::Process::getPageSizeEstimate();
size_t StartOverlap =
(PageSize - ((uintptr_t)M.base() % PageSize)) % PageSize;
- size_t TrimmedSize = M.size();
+ size_t TrimmedSize = M.allocatedSize();
TrimmedSize -= StartOverlap;
TrimmedSize -= TrimmedSize % PageSize;
@@ -186,8 +185,9 @@ static sys::MemoryBlock trimBlockToPageSize(sys::MemoryBlock M) {
TrimmedSize);
assert(((uintptr_t)Trimmed.base() % PageSize) == 0);
- assert((Trimmed.size() % PageSize) == 0);
- assert(M.base() <= Trimmed.base() && Trimmed.size() <= M.size());
+ assert((Trimmed.allocatedSize() % PageSize) == 0);
+ assert(M.base() <= Trimmed.base() &&
+ Trimmed.allocatedSize() <= M.allocatedSize());
return Trimmed;
}
@@ -210,17 +210,19 @@ SectionMemoryManager::applyMemoryGroupPermissions(MemoryGroup &MemGroup,
}
// Remove all blocks which are now empty
- MemGroup.FreeMem.erase(
- remove_if(MemGroup.FreeMem,
- [](FreeMemBlock &FreeMB) { return FreeMB.Free.size() == 0; }),
- MemGroup.FreeMem.end());
+ MemGroup.FreeMem.erase(remove_if(MemGroup.FreeMem,
+ [](FreeMemBlock &FreeMB) {
+ return FreeMB.Free.allocatedSize() == 0;
+ }),
+ MemGroup.FreeMem.end());
return std::error_code();
}
void SectionMemoryManager::invalidateInstructionCache() {
for (sys::MemoryBlock &Block : CodeMem.PendingMem)
- sys::Memory::InvalidateInstructionCache(Block.base(), Block.size());
+ sys::Memory::InvalidateInstructionCache(Block.base(),
+ Block.allocatedSize());
}
SectionMemoryManager::~SectionMemoryManager() {
diff --git a/lib/ExecutionEngine/TargetSelect.cpp b/lib/ExecutionEngine/TargetSelect.cpp
index 9626b8d3ffa3..0d9c6cfa0908 100644
--- a/lib/ExecutionEngine/TargetSelect.cpp
+++ b/lib/ExecutionEngine/TargetSelect.cpp
@@ -1,9 +1,8 @@
//===-- TargetSelect.cpp - Target Chooser Code ----------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/FuzzMutate/FuzzerCLI.cpp b/lib/FuzzMutate/FuzzerCLI.cpp
index a70dad37dfcf..63d31c035390 100644
--- a/lib/FuzzMutate/FuzzerCLI.cpp
+++ b/lib/FuzzMutate/FuzzerCLI.cpp
@@ -1,9 +1,8 @@
//===-- FuzzerCLI.cpp -----------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/FuzzMutate/IRMutator.cpp b/lib/FuzzMutate/IRMutator.cpp
index 40e402cdadef..2fc65981f1db 100644
--- a/lib/FuzzMutate/IRMutator.cpp
+++ b/lib/FuzzMutate/IRMutator.cpp
@@ -1,9 +1,8 @@
//===-- IRMutator.cpp -----------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/FuzzMutate/OpDescriptor.cpp b/lib/FuzzMutate/OpDescriptor.cpp
index 1c5d8f606aea..67d44be8b699 100644
--- a/lib/FuzzMutate/OpDescriptor.cpp
+++ b/lib/FuzzMutate/OpDescriptor.cpp
@@ -1,9 +1,8 @@
//===-- OpDescriptor.cpp --------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/FuzzMutate/Operations.cpp b/lib/FuzzMutate/Operations.cpp
index b842f6d64fb1..cf55d09caf7e 100644
--- a/lib/FuzzMutate/Operations.cpp
+++ b/lib/FuzzMutate/Operations.cpp
@@ -1,9 +1,8 @@
//===-- Operations.cpp ----------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/FuzzMutate/RandomIRBuilder.cpp b/lib/FuzzMutate/RandomIRBuilder.cpp
index 337184535558..1295714839e8 100644
--- a/lib/FuzzMutate/RandomIRBuilder.cpp
+++ b/lib/FuzzMutate/RandomIRBuilder.cpp
@@ -1,9 +1,8 @@
//===-- RandomIRBuilder.cpp -----------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -54,7 +53,8 @@ Value *RandomIRBuilder::newSource(BasicBlock &BB, ArrayRef<Instruction *> Insts,
IP = ++I->getIterator();
assert(IP != BB.end() && "guaranteed by the findPointer");
}
- auto *NewLoad = new LoadInst(Ptr, "L", &*IP);
+ auto *NewLoad = new LoadInst(
+ cast<PointerType>(Ptr->getType())->getElementType(), Ptr, "L", &*IP);
// Only sample this load if it really matches the descriptor
if (Pred.matches(Srcs, NewLoad))
diff --git a/lib/IR/AbstractCallSite.cpp b/lib/IR/AbstractCallSite.cpp
new file mode 100644
index 000000000000..b7a81030f41c
--- /dev/null
+++ b/lib/IR/AbstractCallSite.cpp
@@ -0,0 +1,134 @@
+//===-- AbstractCallSite.cpp - Implementation of abstract call sites ------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements abstract call sites which unify the interface for
+// direct, indirect, and callback call sites.
+//
+// For more information see:
+// https://llvm.org/devmtg/2018-10/talk-abstracts.html#talk20
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "abstract-call-sites"
+
+STATISTIC(NumCallbackCallSites, "Number of callback call sites created");
+STATISTIC(NumDirectAbstractCallSites,
+ "Number of direct abstract call sites created");
+STATISTIC(NumInvalidAbstractCallSitesUnknownUse,
+ "Number of invalid abstract call sites created (unknown use)");
+STATISTIC(NumInvalidAbstractCallSitesUnknownCallee,
+ "Number of invalid abstract call sites created (unknown callee)");
+STATISTIC(NumInvalidAbstractCallSitesNoCallback,
+ "Number of invalid abstract call sites created (no callback)");
+
+/// Create an abstract call site from a use.
+AbstractCallSite::AbstractCallSite(const Use *U) : CS(U->getUser()) {
+
+ // First handle unknown users.
+ if (!CS) {
+
+ // If the use is actually in a constant cast expression which itself
+ // has only one use, we look through the constant cast expression.
+ // This happens by updating the use @p U to the use of the constant
+ // cast expression and afterwards re-initializing CS accordingly.
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U->getUser()))
+ if (CE->getNumUses() == 1 && CE->isCast()) {
+ U = &*CE->use_begin();
+ CS = CallSite(U->getUser());
+ }
+
+ if (!CS) {
+ NumInvalidAbstractCallSitesUnknownUse++;
+ return;
+ }
+ }
+
+ // Then handle direct or indirect calls. Thus, if U is the callee of the
+ // call site CS it is not a callback and we are done.
+ if (CS.isCallee(U)) {
+ NumDirectAbstractCallSites++;
+ return;
+ }
+
+ // If we cannot identify the broker function we cannot create a callback and
+ // invalidate the abstract call site.
+ Function *Callee = CS.getCalledFunction();
+ if (!Callee) {
+ NumInvalidAbstractCallSitesUnknownCallee++;
+ CS = CallSite();
+ return;
+ }
+
+ MDNode *CallbackMD = Callee->getMetadata(LLVMContext::MD_callback);
+ if (!CallbackMD) {
+ NumInvalidAbstractCallSitesNoCallback++;
+ CS = CallSite();
+ return;
+ }
+
+ unsigned UseIdx = CS.getArgumentNo(U);
+ MDNode *CallbackEncMD = nullptr;
+ for (const MDOperand &Op : CallbackMD->operands()) {
+ MDNode *OpMD = cast<MDNode>(Op.get());
+ auto *CBCalleeIdxAsCM = cast<ConstantAsMetadata>(OpMD->getOperand(0));
+ uint64_t CBCalleeIdx =
+ cast<ConstantInt>(CBCalleeIdxAsCM->getValue())->getZExtValue();
+ if (CBCalleeIdx != UseIdx)
+ continue;
+ CallbackEncMD = OpMD;
+ break;
+ }
+
+ if (!CallbackEncMD) {
+ NumInvalidAbstractCallSitesNoCallback++;
+ CS = CallSite();
+ return;
+ }
+
+ NumCallbackCallSites++;
+
+ assert(CallbackEncMD->getNumOperands() >= 2 && "Incomplete !callback metadata");
+
+ unsigned NumCallOperands = CS.getNumArgOperands();
+ // Skip the var-arg flag at the end when reading the metadata.
+ for (unsigned u = 0, e = CallbackEncMD->getNumOperands() - 1; u < e; u++) {
+ Metadata *OpAsM = CallbackEncMD->getOperand(u).get();
+ auto *OpAsCM = cast<ConstantAsMetadata>(OpAsM);
+ assert(OpAsCM->getType()->isIntegerTy(64) &&
+ "Malformed !callback metadata");
+
+ int64_t Idx = cast<ConstantInt>(OpAsCM->getValue())->getSExtValue();
+ assert(-1 <= Idx && Idx <= NumCallOperands &&
+ "Out-of-bounds !callback metadata index");
+
+ CI.ParameterEncoding.push_back(Idx);
+ }
+
+ if (!Callee->isVarArg())
+ return;
+
+ Metadata *VarArgFlagAsM =
+ CallbackEncMD->getOperand(CallbackEncMD->getNumOperands() - 1).get();
+ auto *VarArgFlagAsCM = cast<ConstantAsMetadata>(VarArgFlagAsM);
+ assert(VarArgFlagAsCM->getType()->isIntegerTy(1) &&
+ "Malformed !callback metadata var-arg flag");
+
+ if (VarArgFlagAsCM->getValue()->isNullValue())
+ return;
+
+ // Add all variadic arguments at the end.
+ for (unsigned u = Callee->arg_size(); u < NumCallOperands; u++)
+ CI.ParameterEncoding.push_back(u);
+}
diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp
index a5dc623e1a30..eb5760daecb3 100644
--- a/lib/IR/AsmWriter.cpp
+++ b/lib/IR/AsmWriter.cpp
@@ -1,9 +1,8 @@
//===- AsmWriter.cpp - Printing LLVM as an assembly file ------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -621,7 +620,10 @@ void TypePrinting::print(Type *Ty, raw_ostream &OS) {
}
case Type::VectorTyID: {
VectorType *PTy = cast<VectorType>(Ty);
- OS << "<" << PTy->getNumElements() << " x ";
+ OS << "<";
+ if (PTy->isScalable())
+ OS << "vscale x ";
+ OS << PTy->getNumElements() << " x ";
print(PTy->getElementType(), OS);
OS << '>';
return;
@@ -1038,6 +1040,9 @@ void SlotTracker::processIndex() {
TidIter != TheIndex->typeIds().end(); TidIter++)
CreateTypeIdSlot(TidIter->second.first);
+ for (auto &TId : TheIndex->typeIdCompatibleVtableMap())
+ CreateGUIDSlot(GlobalValue::getGUID(TId.first));
+
ST_DEBUG("end processIndex!\n");
}
@@ -2002,6 +2007,19 @@ static void writeDINamespace(raw_ostream &Out, const DINamespace *N,
Out << ")";
}
+static void writeDICommonBlock(raw_ostream &Out, const DICommonBlock *N,
+ TypePrinting *TypePrinter, SlotTracker *Machine,
+ const Module *Context) {
+ Out << "!DICommonBlock(";
+ MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+ Printer.printMetadata("scope", N->getRawScope(), false);
+ Printer.printMetadata("declaration", N->getRawDecl(), false);
+ Printer.printString("name", N->getName());
+ Printer.printMetadata("file", N->getRawFile());
+ Printer.printInt("line", N->getLineNo());
+ Out << ")";
+}
+
static void writeDIMacro(raw_ostream &Out, const DIMacro *N,
TypePrinting *TypePrinter, SlotTracker *Machine,
const Module *Context) {
@@ -2124,8 +2142,13 @@ static void writeDIExpression(raw_ostream &Out, const DIExpression *N,
assert(!OpStr.empty() && "Expected valid opcode");
Out << FS << OpStr;
- for (unsigned A = 0, AE = I->getNumArgs(); A != AE; ++A)
- Out << FS << I->getArg(A);
+ if (I->getOp() == dwarf::DW_OP_LLVM_convert) {
+ Out << FS << I->getArg(0);
+ Out << FS << dwarf::AttributeEncodingString(I->getArg(1));
+ } else {
+ for (unsigned A = 0, AE = I->getNumArgs(); A != AE; ++A)
+ Out << FS << I->getArg(A);
+ }
}
} else {
for (const auto &I : N->getElements())
@@ -2393,6 +2416,7 @@ public:
void printGlobalVarSummary(const GlobalVarSummary *GS);
void printFunctionSummary(const FunctionSummary *FS);
void printTypeIdSummary(const TypeIdSummary &TIS);
+ void printTypeIdCompatibleVtableSummary(const TypeIdCompatibleVtableInfo &TI);
void printTypeTestResolution(const TypeTestResolution &TTRes);
void printArgs(const std::vector<uint64_t> &Args);
void printWPDRes(const WholeProgramDevirtResolution &WPDRes);
@@ -2695,6 +2719,15 @@ void AssemblyWriter::printModuleSummaryIndex() {
printTypeIdSummary(TidIter->second.second);
Out << ") ; guid = " << TidIter->first << "\n";
}
+
+ // Print the TypeIdCompatibleVtableMap entries.
+ for (auto &TId : TheIndex->typeIdCompatibleVtableMap()) {
+ auto GUID = GlobalValue::getGUID(TId.first);
+ Out << "^" << Machine.getGUIDSlot(GUID)
+ << " = typeidCompatibleVTable: (name: \"" << TId.first << "\"";
+ printTypeIdCompatibleVtableSummary(TId.second);
+ Out << ") ; guid = " << GUID << "\n";
+ }
}
static const char *
@@ -2777,6 +2810,19 @@ void AssemblyWriter::printTypeIdSummary(const TypeIdSummary &TIS) {
Out << ")";
}
+void AssemblyWriter::printTypeIdCompatibleVtableSummary(
+ const TypeIdCompatibleVtableInfo &TI) {
+ Out << ", summary: (";
+ FieldSeparator FS;
+ for (auto &P : TI) {
+ Out << FS;
+ Out << "(offset: " << P.AddressPointOffset << ", ";
+ Out << "^" << Machine.getGUIDSlot(P.VTableVI.getGUID());
+ Out << ")";
+ }
+ Out << ")";
+}
+
void AssemblyWriter::printArgs(const std::vector<uint64_t> &Args) {
Out << "args: (";
FieldSeparator FS;
@@ -2845,7 +2891,21 @@ void AssemblyWriter::printAliasSummary(const AliasSummary *AS) {
}
void AssemblyWriter::printGlobalVarSummary(const GlobalVarSummary *GS) {
- Out << ", varFlags: (readonly: " << GS->VarFlags.ReadOnly << ")";
+ Out << ", varFlags: (readonly: " << GS->VarFlags.MaybeReadOnly << ", "
+ << "writeonly: " << GS->VarFlags.MaybeWriteOnly << ")";
+
+ auto VTableFuncs = GS->vTableFuncs();
+ if (!VTableFuncs.empty()) {
+ Out << ", vTableFuncs: (";
+ FieldSeparator FS;
+ for (auto &P : VTableFuncs) {
+ Out << FS;
+ Out << "(virtFunc: ^" << Machine.getGUIDSlot(P.FuncVI.getGUID())
+ << ", offset: " << P.VTableOffset;
+ Out << ")";
+ }
+ Out << ")";
+ }
}
static std::string getLinkageName(GlobalValue::LinkageTypes LT) {
@@ -3024,6 +3084,7 @@ void AssemblyWriter::printSummary(const GlobalValueSummary &Summary) {
Out << ", notEligibleToImport: " << GVFlags.NotEligibleToImport;
Out << ", live: " << GVFlags.Live;
Out << ", dsoLocal: " << GVFlags.DSOLocal;
+ Out << ", canAutoHide: " << GVFlags.CanAutoHide;
Out << ")";
if (Summary.getSummaryKind() == GlobalValueSummary::AliasKind)
@@ -3041,6 +3102,8 @@ void AssemblyWriter::printSummary(const GlobalValueSummary &Summary) {
Out << FS;
if (Ref.isReadOnly())
Out << "readonly ";
+ else if (Ref.isWriteOnly())
+ Out << "writeonly ";
Out << "^" << Machine.getGUIDSlot(Ref.getGUID());
}
Out << ")";
@@ -3229,6 +3292,12 @@ void AssemblyWriter::printGlobal(const GlobalVariable *GV) {
printEscapedString(GV->getSection(), Out);
Out << '"';
}
+ if (GV->hasPartition()) {
+ Out << ", partition \"";
+ printEscapedString(GV->getPartition(), Out);
+ Out << '"';
+ }
+
maybePrintComdat(Out, *GV);
if (GV->getAlignment())
Out << ", align " << GV->getAlignment();
@@ -3280,6 +3349,12 @@ void AssemblyWriter::printIndirectSymbol(const GlobalIndirectSymbol *GIS) {
writeOperand(IS, !isa<ConstantExpr>(IS));
}
+ if (GIS->hasPartition()) {
+ Out << ", partition \"";
+ printEscapedString(GIS->getPartition(), Out);
+ Out << '"';
+ }
+
printInfoComment(*GIS);
Out << '\n';
}
@@ -3420,6 +3495,11 @@ void AssemblyWriter::printFunction(const Function *F) {
printEscapedString(F->getSection(), Out);
Out << '"';
}
+ if (F->hasPartition()) {
+ Out << " partition \"";
+ printEscapedString(F->getPartition(), Out);
+ Out << '"';
+ }
maybePrintComdat(Out, *F);
if (F->getAlignment())
Out << " align " << F->getAlignment();
@@ -3478,23 +3558,24 @@ void AssemblyWriter::printArgument(const Argument *Arg, AttributeSet Attrs) {
/// printBasicBlock - This member is called for each basic block in a method.
void AssemblyWriter::printBasicBlock(const BasicBlock *BB) {
+ bool IsEntryBlock = BB == &BB->getParent()->getEntryBlock();
if (BB->hasName()) { // Print out the label if it exists...
Out << "\n";
PrintLLVMName(Out, BB->getName(), LabelPrefix);
Out << ':';
- } else if (!BB->use_empty()) { // Don't print block # of no uses...
- Out << "\n; <label>:";
+ } else if (!IsEntryBlock) {
+ Out << "\n";
int Slot = Machine.getLocalSlot(BB);
if (Slot != -1)
Out << Slot << ":";
else
- Out << "<badref>";
+ Out << "<badref>:";
}
if (!BB->getParent()) {
Out.PadToColumn(50);
Out << "; Error: Block without parent!";
- } else if (BB != &BB->getParent()->getEntryBlock()) { // Not the entry block?
+ } else if (!IsEntryBlock) {
// Output predecessors for the block.
Out.PadToColumn(50);
Out << ";";
@@ -3837,6 +3918,51 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
writeOperand(II->getNormalDest(), true);
Out << " unwind ";
writeOperand(II->getUnwindDest(), true);
+ } else if (const CallBrInst *CBI = dyn_cast<CallBrInst>(&I)) {
+ Operand = CBI->getCalledValue();
+ FunctionType *FTy = CBI->getFunctionType();
+ Type *RetTy = FTy->getReturnType();
+ const AttributeList &PAL = CBI->getAttributes();
+
+ // Print the calling convention being used.
+ if (CBI->getCallingConv() != CallingConv::C) {
+ Out << " ";
+ PrintCallingConv(CBI->getCallingConv(), Out);
+ }
+
+ if (PAL.hasAttributes(AttributeList::ReturnIndex))
+ Out << ' ' << PAL.getAsString(AttributeList::ReturnIndex);
+
+ // If possible, print out the short form of the callbr instruction. We can
+ // only do this if the first argument is a pointer to a nonvararg function,
+ // and if the return type is not a pointer to a function.
+ //
+ Out << ' ';
+ TypePrinter.print(FTy->isVarArg() ? FTy : RetTy, Out);
+ Out << ' ';
+ writeOperand(Operand, false);
+ Out << '(';
+ for (unsigned op = 0, Eop = CBI->getNumArgOperands(); op < Eop; ++op) {
+ if (op)
+ Out << ", ";
+ writeParamOperand(CBI->getArgOperand(op), PAL.getParamAttributes(op));
+ }
+
+ Out << ')';
+ if (PAL.hasAttributes(AttributeList::FunctionIndex))
+ Out << " #" << Machine.getAttributeGroupSlot(PAL.getFnAttributes());
+
+ writeOperandBundles(CBI);
+
+ Out << "\n to ";
+ writeOperand(CBI->getDefaultDest(), true);
+ Out << " [";
+ for (unsigned i = 0, e = CBI->getNumIndirectDests(); i != e; ++i) {
+ if (i != 0)
+ Out << ", ";
+ writeOperand(CBI->getIndirectDest(i), true);
+ }
+ Out << ']';
} else if (const AllocaInst *AI = dyn_cast<AllocaInst>(&I)) {
Out << ' ';
if (AI->isUsedWithInAlloca())
diff --git a/lib/IR/AttributeImpl.h b/lib/IR/AttributeImpl.h
index bb0c072e4781..f989fa3b910e 100644
--- a/lib/IR/AttributeImpl.h
+++ b/lib/IR/AttributeImpl.h
@@ -1,9 +1,8 @@
//===- AttributeImpl.h - Attribute Internals --------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -30,6 +29,7 @@
namespace llvm {
class LLVMContext;
+class Type;
//===----------------------------------------------------------------------===//
/// \class
@@ -42,7 +42,8 @@ protected:
enum AttrEntryKind {
EnumAttrEntry,
IntAttrEntry,
- StringAttrEntry
+ StringAttrEntry,
+ TypeAttrEntry,
};
AttributeImpl(AttrEntryKind KindID) : KindID(KindID) {}
@@ -57,6 +58,7 @@ public:
bool isEnumAttribute() const { return KindID == EnumAttrEntry; }
bool isIntAttribute() const { return KindID == IntAttrEntry; }
bool isStringAttribute() const { return KindID == StringAttrEntry; }
+ bool isTypeAttribute() const { return KindID == TypeAttrEntry; }
bool hasAttribute(Attribute::AttrKind A) const;
bool hasAttribute(StringRef Kind) const;
@@ -67,16 +69,20 @@ public:
StringRef getKindAsString() const;
StringRef getValueAsString() const;
+ Type *getValueAsType() const;
+
/// Used when sorting the attributes.
bool operator<(const AttributeImpl &AI) const;
void Profile(FoldingSetNodeID &ID) const {
if (isEnumAttribute())
- Profile(ID, getKindAsEnum(), 0);
+ Profile(ID, getKindAsEnum(), static_cast<uint64_t>(0));
else if (isIntAttribute())
Profile(ID, getKindAsEnum(), getValueAsInt());
- else
+ else if (isStringAttribute())
Profile(ID, getKindAsString(), getValueAsString());
+ else
+ Profile(ID, getKindAsEnum(), getValueAsType());
}
static void Profile(FoldingSetNodeID &ID, Attribute::AttrKind Kind,
@@ -89,6 +95,12 @@ public:
ID.AddString(Kind);
if (!Values.empty()) ID.AddString(Values);
}
+
+ static void Profile(FoldingSetNodeID &ID, Attribute::AttrKind Kind,
+ Type *Ty) {
+ ID.AddInteger(Kind);
+ ID.AddPointer(Ty);
+ }
};
//===----------------------------------------------------------------------===//
@@ -146,6 +158,18 @@ public:
StringRef getStringValue() const { return Val; }
};
+class TypeAttributeImpl : public EnumAttributeImpl {
+ virtual void anchor();
+
+ Type *Ty;
+
+public:
+ TypeAttributeImpl(Attribute::AttrKind Kind, Type *Ty)
+ : EnumAttributeImpl(TypeAttrEntry, Kind), Ty(Ty) {}
+
+ Type *getTypeValue() const { return Ty; }
+};
+
//===----------------------------------------------------------------------===//
/// \class
/// This class represents a group of attributes that apply to one
@@ -155,9 +179,9 @@ class AttributeSetNode final
private TrailingObjects<AttributeSetNode, Attribute> {
friend TrailingObjects;
- /// Bitset with a bit for each available attribute Attribute::AttrKind.
- uint64_t AvailableAttrs;
unsigned NumAttrs; ///< Number of attributes in this node.
+ /// Bitset with a bit for each available attribute Attribute::AttrKind.
+ uint8_t AvailableAttrs[12] = {};
AttributeSetNode(ArrayRef<Attribute> Attrs);
@@ -176,7 +200,7 @@ public:
unsigned getNumAttributes() const { return NumAttrs; }
bool hasAttribute(Attribute::AttrKind Kind) const {
- return AvailableAttrs & ((uint64_t)1) << Kind;
+ return AvailableAttrs[Kind / 8] & ((uint64_t)1) << (Kind % 8);
}
bool hasAttribute(StringRef Kind) const;
bool hasAttributes() const { return NumAttrs != 0; }
@@ -190,6 +214,7 @@ public:
uint64_t getDereferenceableOrNullBytes() const;
std::pair<unsigned, Optional<unsigned>> getAllocSizeArgs() const;
std::string getAsString(bool InAttrGrp) const;
+ Type *getByValType() const;
using iterator = const Attribute *;
@@ -219,10 +244,10 @@ class AttributeListImpl final
friend TrailingObjects;
private:
- /// Bitset with a bit for each available attribute Attribute::AttrKind.
- uint64_t AvailableFunctionAttrs;
LLVMContext &Context;
unsigned NumAttrSets; ///< Number of entries in this set.
+ /// Bitset with a bit for each available attribute Attribute::AttrKind.
+ uint8_t AvailableFunctionAttrs[12] = {};
// Helper fn for TrailingObjects class.
size_t numTrailingObjects(OverloadToken<AttributeSet>) { return NumAttrSets; }
@@ -242,7 +267,7 @@ public:
/// Return true if the AttributeSet or the FunctionIndex has an
/// enum attribute of the given kind.
bool hasFnAttribute(Attribute::AttrKind Kind) const {
- return AvailableFunctionAttrs & ((uint64_t)1) << Kind;
+ return AvailableFunctionAttrs[Kind / 8] & ((uint64_t)1) << (Kind % 8);
}
using iterator = const AttributeSet *;
diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp
index ff46debb7a9e..bb90bcd7dd74 100644
--- a/lib/IR/Attributes.cpp
+++ b/lib/IR/Attributes.cpp
@@ -1,9 +1,8 @@
//===- Attributes.cpp - Implement AttributesList --------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -122,6 +121,27 @@ Attribute Attribute::get(LLVMContext &Context, StringRef Kind, StringRef Val) {
return Attribute(PA);
}
+Attribute Attribute::get(LLVMContext &Context, Attribute::AttrKind Kind,
+ Type *Ty) {
+ LLVMContextImpl *pImpl = Context.pImpl;
+ FoldingSetNodeID ID;
+ ID.AddInteger(Kind);
+ ID.AddPointer(Ty);
+
+ void *InsertPoint;
+ AttributeImpl *PA = pImpl->AttrsSet.FindNodeOrInsertPos(ID, InsertPoint);
+
+ if (!PA) {
+ // If we didn't find any existing attributes of the same shape then create a
+ // new one and insert it.
+ PA = new TypeAttributeImpl(Kind, Ty);
+ pImpl->AttrsSet.InsertNode(PA, InsertPoint);
+ }
+
+ // Return the Attribute that we found or created.
+ return Attribute(PA);
+}
+
Attribute Attribute::getWithAlignment(LLVMContext &Context, uint64_t Align) {
assert(isPowerOf2_32(Align) && "Alignment must be a power of two.");
assert(Align <= 0x40000000 && "Alignment too large.");
@@ -147,6 +167,10 @@ Attribute Attribute::getWithDereferenceableOrNullBytes(LLVMContext &Context,
return get(Context, DereferenceableOrNull, Bytes);
}
+Attribute Attribute::getWithByValType(LLVMContext &Context, Type *Ty) {
+ return get(Context, ByVal, Ty);
+}
+
Attribute
Attribute::getWithAllocSizeArgs(LLVMContext &Context, unsigned ElemSizeArg,
const Optional<unsigned> &NumElemsArg) {
@@ -171,9 +195,13 @@ bool Attribute::isStringAttribute() const {
return pImpl && pImpl->isStringAttribute();
}
+bool Attribute::isTypeAttribute() const {
+ return pImpl && pImpl->isTypeAttribute();
+}
+
Attribute::AttrKind Attribute::getKindAsEnum() const {
if (!pImpl) return None;
- assert((isEnumAttribute() || isIntAttribute()) &&
+ assert((isEnumAttribute() || isIntAttribute() || isTypeAttribute()) &&
"Invalid attribute type to get the kind as an enum!");
return pImpl->getKindAsEnum();
}
@@ -199,6 +227,14 @@ StringRef Attribute::getValueAsString() const {
return pImpl->getValueAsString();
}
+Type *Attribute::getValueAsType() const {
+ if (!pImpl) return {};
+ assert(isTypeAttribute() &&
+ "Invalid attribute type to get the value as a type!");
+ return pImpl->getValueAsType();
+}
+
+
bool Attribute::hasAttribute(AttrKind Kind) const {
return (pImpl && pImpl->hasAttribute(Kind)) || (!pImpl && Kind == None);
}
@@ -247,14 +283,14 @@ std::string Attribute::getAsString(bool InAttrGrp) const {
return "sanitize_address";
if (hasAttribute(Attribute::SanitizeHWAddress))
return "sanitize_hwaddress";
+ if (hasAttribute(Attribute::SanitizeMemTag))
+ return "sanitize_memtag";
if (hasAttribute(Attribute::AlwaysInline))
return "alwaysinline";
if (hasAttribute(Attribute::ArgMemOnly))
return "argmemonly";
if (hasAttribute(Attribute::Builtin))
return "builtin";
- if (hasAttribute(Attribute::ByVal))
- return "byval";
if (hasAttribute(Attribute::Convergent))
return "convergent";
if (hasAttribute(Attribute::SwiftError))
@@ -287,6 +323,8 @@ std::string Attribute::getAsString(bool InAttrGrp) const {
return "nocapture";
if (hasAttribute(Attribute::NoDuplicate))
return "noduplicate";
+ if (hasAttribute(Attribute::NoFree))
+ return "nofree";
if (hasAttribute(Attribute::NoImplicitFloat))
return "noimplicitfloat";
if (hasAttribute(Attribute::NoInline))
@@ -299,6 +337,10 @@ std::string Attribute::getAsString(bool InAttrGrp) const {
return "noredzone";
if (hasAttribute(Attribute::NoReturn))
return "noreturn";
+ if (hasAttribute(Attribute::NoSync))
+ return "nosync";
+ if (hasAttribute(Attribute::WillReturn))
+ return "willreturn";
if (hasAttribute(Attribute::NoCfCheck))
return "nocf_check";
if (hasAttribute(Attribute::NoRecurse))
@@ -351,6 +393,21 @@ std::string Attribute::getAsString(bool InAttrGrp) const {
return "zeroext";
if (hasAttribute(Attribute::Cold))
return "cold";
+ if (hasAttribute(Attribute::ImmArg))
+ return "immarg";
+
+ if (hasAttribute(Attribute::ByVal)) {
+ std::string Result;
+ Result += "byval";
+ if (Type *Ty = getValueAsType()) {
+ raw_string_ostream OS(Result);
+ Result += '(';
+ Ty->print(OS, false, true);
+ OS.flush();
+ Result += ')';
+ }
+ return Result;
+ }
// FIXME: These should be output like this:
//
@@ -450,6 +507,8 @@ void IntAttributeImpl::anchor() {}
void StringAttributeImpl::anchor() {}
+void TypeAttributeImpl::anchor() {}
+
bool AttributeImpl::hasAttribute(Attribute::AttrKind A) const {
if (isStringAttribute()) return false;
return getKindAsEnum() == A;
@@ -461,7 +520,7 @@ bool AttributeImpl::hasAttribute(StringRef Kind) const {
}
Attribute::AttrKind AttributeImpl::getKindAsEnum() const {
- assert(isEnumAttribute() || isIntAttribute());
+ assert(isEnumAttribute() || isIntAttribute() || isTypeAttribute());
return static_cast<const EnumAttributeImpl *>(this)->getEnumKind();
}
@@ -480,6 +539,11 @@ StringRef AttributeImpl::getValueAsString() const {
return static_cast<const StringAttributeImpl *>(this)->getStringValue();
}
+Type *AttributeImpl::getValueAsType() const {
+ assert(isTypeAttribute());
+ return static_cast<const TypeAttributeImpl *>(this)->getTypeValue();
+}
+
bool AttributeImpl::operator<(const AttributeImpl &AI) const {
// This sorts the attributes with Attribute::AttrKinds coming first (sorted
// relative to their enum value) and then strings.
@@ -487,10 +551,23 @@ bool AttributeImpl::operator<(const AttributeImpl &AI) const {
if (AI.isEnumAttribute()) return getKindAsEnum() < AI.getKindAsEnum();
if (AI.isIntAttribute()) return true;
if (AI.isStringAttribute()) return true;
+ if (AI.isTypeAttribute()) return true;
+ }
+
+ if (isTypeAttribute()) {
+ if (AI.isEnumAttribute()) return false;
+ if (AI.isTypeAttribute()) {
+ assert(getKindAsEnum() != AI.getKindAsEnum() &&
+ "Comparison of types would be unstable");
+ return getKindAsEnum() < AI.getKindAsEnum();
+ }
+ if (AI.isIntAttribute()) return true;
+ if (AI.isStringAttribute()) return true;
}
if (isIntAttribute()) {
if (AI.isEnumAttribute()) return false;
+ if (AI.isTypeAttribute()) return false;
if (AI.isIntAttribute()) {
if (getKindAsEnum() == AI.getKindAsEnum())
return getValueAsInt() < AI.getValueAsInt();
@@ -499,7 +576,9 @@ bool AttributeImpl::operator<(const AttributeImpl &AI) const {
if (AI.isStringAttribute()) return true;
}
+ assert(isStringAttribute());
if (AI.isEnumAttribute()) return false;
+ if (AI.isTypeAttribute()) return false;
if (AI.isIntAttribute()) return false;
if (getKindAsString() == AI.getKindAsString())
return getValueAsString() < AI.getValueAsString();
@@ -607,6 +686,10 @@ uint64_t AttributeSet::getDereferenceableOrNullBytes() const {
return SetNode ? SetNode->getDereferenceableOrNullBytes() : 0;
}
+Type *AttributeSet::getByValType() const {
+ return SetNode ? SetNode->getByValType() : nullptr;
+}
+
std::pair<unsigned, Optional<unsigned>> AttributeSet::getAllocSizeArgs() const {
return SetNode ? SetNode->getAllocSizeArgs()
: std::pair<unsigned, Optional<unsigned>>(0, 0);
@@ -637,13 +720,18 @@ LLVM_DUMP_METHOD void AttributeSet::dump() const {
//===----------------------------------------------------------------------===//
AttributeSetNode::AttributeSetNode(ArrayRef<Attribute> Attrs)
- : AvailableAttrs(0), NumAttrs(Attrs.size()) {
+ : NumAttrs(Attrs.size()) {
// There's memory after the node where we can store the entries in.
llvm::copy(Attrs, getTrailingObjects<Attribute>());
+ static_assert(Attribute::EndAttrKinds <=
+ sizeof(AvailableAttrs) * CHAR_BIT,
+ "Too many attributes");
+
for (const auto I : *this) {
if (!I.isStringAttribute()) {
- AvailableAttrs |= ((uint64_t)1) << I.getKindAsEnum();
+ Attribute::AttrKind Kind = I.getKindAsEnum();
+ AvailableAttrs[Kind / 8] |= 1ULL << (Kind % 8);
}
}
}
@@ -690,6 +778,9 @@ AttributeSetNode *AttributeSetNode::get(LLVMContext &C, const AttrBuilder &B) {
Attribute Attr;
switch (Kind) {
+ case Attribute::ByVal:
+ Attr = Attribute::getWithByValType(C, B.getByValType());
+ break;
case Attribute::Alignment:
Attr = Attribute::getWithAlignment(C, B.getAlignment());
break;
@@ -759,6 +850,13 @@ unsigned AttributeSetNode::getStackAlignment() const {
return 0;
}
+Type *AttributeSetNode::getByValType() const {
+ for (const auto I : *this)
+ if (I.hasAttribute(Attribute::ByVal))
+ return I.getValueAsType();
+ return 0;
+}
+
uint64_t AttributeSetNode::getDereferenceableBytes() const {
for (const auto I : *this)
if (I.hasAttribute(Attribute::Dereferenceable))
@@ -805,7 +903,7 @@ static constexpr unsigned attrIdxToArrayIdx(unsigned Index) {
AttributeListImpl::AttributeListImpl(LLVMContext &C,
ArrayRef<AttributeSet> Sets)
- : AvailableFunctionAttrs(0), Context(C), NumAttrSets(Sets.size()) {
+ : Context(C), NumAttrSets(Sets.size()) {
assert(!Sets.empty() && "pointless AttributeListImpl");
// There's memory after the node where we can store the entries in.
@@ -818,8 +916,10 @@ AttributeListImpl::AttributeListImpl(LLVMContext &C,
static_assert(attrIdxToArrayIdx(AttributeList::FunctionIndex) == 0U,
"function should be stored in slot 0");
for (const auto I : Sets[0]) {
- if (!I.isStringAttribute())
- AvailableFunctionAttrs |= 1ULL << I.getKindAsEnum();
+ if (!I.isStringAttribute()) {
+ Attribute::AttrKind Kind = I.getKindAsEnum();
+ AvailableFunctionAttrs[Kind / 8] |= 1ULL << (Kind % 8);
+ }
}
}
@@ -1257,6 +1357,11 @@ unsigned AttributeList::getParamAlignment(unsigned ArgNo) const {
return getAttributes(ArgNo + FirstArgIndex).getAlignment();
}
+Type *AttributeList::getParamByValType(unsigned Index) const {
+ return getAttributes(Index+FirstArgIndex).getByValType();
+}
+
+
unsigned AttributeList::getStackAlignment(unsigned Index) const {
return getAttributes(Index).getStackAlignment();
}
@@ -1335,6 +1440,7 @@ void AttrBuilder::clear() {
TargetDepAttrs.clear();
Alignment = StackAlignment = DerefBytes = DerefOrNullBytes = 0;
AllocSizeArgs = 0;
+ ByValType = nullptr;
}
AttrBuilder &AttrBuilder::addAttribute(Attribute::AttrKind Val) {
@@ -1359,6 +1465,8 @@ AttrBuilder &AttrBuilder::addAttribute(Attribute Attr) {
Alignment = Attr.getAlignment();
else if (Kind == Attribute::StackAlignment)
StackAlignment = Attr.getStackAlignment();
+ else if (Kind == Attribute::ByVal)
+ ByValType = Attr.getValueAsType();
else if (Kind == Attribute::Dereferenceable)
DerefBytes = Attr.getDereferenceableBytes();
else if (Kind == Attribute::DereferenceableOrNull)
@@ -1381,6 +1489,8 @@ AttrBuilder &AttrBuilder::removeAttribute(Attribute::AttrKind Val) {
Alignment = 0;
else if (Val == Attribute::StackAlignment)
StackAlignment = 0;
+ else if (Val == Attribute::ByVal)
+ ByValType = nullptr;
else if (Val == Attribute::Dereferenceable)
DerefBytes = 0;
else if (Val == Attribute::DereferenceableOrNull)
@@ -1463,6 +1573,12 @@ AttrBuilder &AttrBuilder::addAllocSizeAttrFromRawRepr(uint64_t RawArgs) {
return *this;
}
+AttrBuilder &AttrBuilder::addByValAttr(Type *Ty) {
+ Attrs[Attribute::ByVal] = true;
+ ByValType = Ty;
+ return *this;
+}
+
AttrBuilder &AttrBuilder::merge(const AttrBuilder &B) {
// FIXME: What if both have alignments, but they don't match?!
if (!Alignment)
@@ -1480,6 +1596,9 @@ AttrBuilder &AttrBuilder::merge(const AttrBuilder &B) {
if (!AllocSizeArgs)
AllocSizeArgs = B.AllocSizeArgs;
+ if (!ByValType)
+ ByValType = B.ByValType;
+
Attrs |= B.Attrs;
for (auto I : B.td_attrs())
@@ -1505,6 +1624,9 @@ AttrBuilder &AttrBuilder::remove(const AttrBuilder &B) {
if (B.AllocSizeArgs)
AllocSizeArgs = 0;
+ if (B.ByValType)
+ ByValType = nullptr;
+
Attrs &= ~B.Attrs;
for (auto I : B.td_attrs())
@@ -1564,7 +1686,7 @@ bool AttrBuilder::operator==(const AttrBuilder &B) {
return false;
return Alignment == B.Alignment && StackAlignment == B.StackAlignment &&
- DerefBytes == B.DerefBytes;
+ DerefBytes == B.DerefBytes && ByValType == B.ByValType;
}
//===----------------------------------------------------------------------===//
diff --git a/lib/IR/AutoUpgrade.cpp b/lib/IR/AutoUpgrade.cpp
index b2eb8b09982e..a2d820352825 100644
--- a/lib/IR/AutoUpgrade.cpp
+++ b/lib/IR/AutoUpgrade.cpp
@@ -1,9 +1,8 @@
//===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -199,14 +198,14 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
Name.startswith("avx512.mask.pmull.") || // Added in 4.0
Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0
Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0
- Name == "avx512.mask.cvtudq2ps.128" || // Added in 7.0
- Name == "avx512.mask.cvtudq2ps.256" || // Added in 7.0
- Name == "avx512.mask.cvtqq2pd.128" || // Added in 7.0
- Name == "avx512.mask.cvtqq2pd.256" || // Added in 7.0
- Name == "avx512.mask.cvtuqq2pd.128" || // Added in 7.0
- Name == "avx512.mask.cvtuqq2pd.256" || // Added in 7.0
- Name == "avx512.mask.cvtdq2ps.128" || // Added in 7.0
- Name == "avx512.mask.cvtdq2ps.256" || // Added in 7.0
+ Name.startswith("avx512.mask.cvtudq2ps.") || // Added in 7.0 updated 9.0
+ Name.startswith("avx512.mask.cvtqq2pd.") || // Added in 7.0 updated 9.0
+ Name.startswith("avx512.mask.cvtuqq2pd.") || // Added in 7.0 updated 9.0
+ Name.startswith("avx512.mask.cvtdq2ps.") || // Added in 7.0 updated 9.0
+ Name == "avx512.mask.cvtqq2ps.256" || // Added in 9.0
+ Name == "avx512.mask.cvtqq2ps.512" || // Added in 9.0
+ Name == "avx512.mask.cvtuqq2ps.256" || // Added in 9.0
+ Name == "avx512.mask.cvtuqq2ps.512" || // Added in 9.0
Name == "avx512.mask.cvtpd2dq.256" || // Added in 7.0
Name == "avx512.mask.cvtpd2ps.256" || // Added in 7.0
Name == "avx512.mask.cvttpd2dq.256" || // Added in 7.0
@@ -216,7 +215,6 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
Name == "avx512.mask.cvtps2pd.256" || // Added in 7.0
Name == "avx512.cvtusi2sd" || // Added in 7.0
Name.startswith("avx512.mask.permvar.") || // Added in 7.0
- Name.startswith("avx512.mask.permvar.") || // Added in 7.0
Name == "sse2.pmulu.dq" || // Added in 7.0
Name == "sse41.pmuldq" || // Added in 7.0
Name == "avx2.pmulu.dq" || // Added in 7.0
@@ -300,6 +298,11 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
Name.startswith("avx512.mask.fpclass.p") || // Added in 7.0
Name.startswith("avx512.mask.vpshufbitqmb.") || // Added in 8.0
Name.startswith("avx512.mask.pmultishift.qb.") || // Added in 8.0
+ Name.startswith("avx512.mask.conflict.") || // Added in 9.0
+ Name == "avx512.mask.pmov.qd.256" || // Added in 9.0
+ Name == "avx512.mask.pmov.qd.512" || // Added in 9.0
+ Name == "avx512.mask.pmov.wb.256" || // Added in 9.0
+ Name == "avx512.mask.pmov.wb.512" || // Added in 9.0
Name == "sse.cvtsi2ss" || // Added in 7.0
Name == "sse.cvtsi642ss" || // Added in 7.0
Name == "sse2.cvtsi2sd" || // Added in 7.0
@@ -338,6 +341,16 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
Name.startswith("avx512.mask.load.") || // Added in 3.9
Name.startswith("avx512.mask.expand.load.") || // Added in 7.0
Name.startswith("avx512.mask.compress.store.") || // Added in 7.0
+ Name.startswith("avx512.mask.expand.b") || // Added in 9.0
+ Name.startswith("avx512.mask.expand.w") || // Added in 9.0
+ Name.startswith("avx512.mask.expand.d") || // Added in 9.0
+ Name.startswith("avx512.mask.expand.q") || // Added in 9.0
+ Name.startswith("avx512.mask.expand.p") || // Added in 9.0
+ Name.startswith("avx512.mask.compress.b") || // Added in 9.0
+ Name.startswith("avx512.mask.compress.w") || // Added in 9.0
+ Name.startswith("avx512.mask.compress.d") || // Added in 9.0
+ Name.startswith("avx512.mask.compress.q") || // Added in 9.0
+ Name.startswith("avx512.mask.compress.p") || // Added in 9.0
Name == "sse42.crc32.64.8" || // Added in 3.4
Name.startswith("avx.vbroadcast.s") || // Added in 3.5
Name.startswith("avx512.vbroadcast.s") || // Added in 7.0
@@ -362,8 +375,7 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
Name == "xop.vpcmov.256" || // Added in 5.0
Name.startswith("avx512.mask.move.s") || // Added in 4.0
Name.startswith("avx512.cvtmask2") || // Added in 5.0
- (Name.startswith("xop.vpcom") && // Added in 3.2
- F->arg_size() == 2) ||
+ Name.startswith("xop.vpcom") || // Added in 3.2, Updated in 9.0
Name.startswith("xop.vprot") || // Added in 8.0
Name.startswith("avx512.prol") || // Added in 8.0
Name.startswith("avx512.pror") || // Added in 8.0
@@ -373,8 +385,6 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
Name.startswith("avx512.mask.prol.") || // Added in 8.0
Name.startswith("avx512.ptestm") || //Added in 6.0
Name.startswith("avx512.ptestnm") || //Added in 6.0
- Name.startswith("sse2.pavg") || // Added in 6.0
- Name.startswith("avx2.pavg") || // Added in 6.0
Name.startswith("avx512.mask.pavg")) // Added in 6.0
return true;
@@ -469,12 +479,23 @@ static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name,
}
}
+ if (Name == "seh.recoverfp") {
+ NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_recoverfp);
+ return true;
+ }
+
return false;
}
static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
assert(F && "Illegal to upgrade a non-existent Function.");
+ // Upgrade intrinsics "clang.arc.use" which doesn't start with "llvm.".
+ if (F->getName() == "clang.arc.use") {
+ NewFn = nullptr;
+ return true;
+ }
+
// Quickly eliminate it, if it's not a candidate.
StringRef Name = F->getName();
if (Name.size() <= 8 || !Name.startswith("llvm."))
@@ -544,9 +565,16 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
return true;
}
- if (Name == "x86.seh.recoverfp") {
- NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_recoverfp);
- return true;
+ if (Name.startswith("aarch64.neon.addp")) {
+ if (F->arg_size() != 2)
+ break; // Invalid IR.
+ auto fArgs = F->getFunctionType()->params();
+ VectorType *ArgTy = dyn_cast<VectorType>(fArgs[0]);
+ if (ArgTy && ArgTy->getElementType()->isFloatingPointTy()) {
+ NewFn = Intrinsic::getDeclaration(F->getParent(),
+ Intrinsic::aarch64_neon_faddp, fArgs);
+ return true;
+ }
}
break;
}
@@ -574,6 +602,26 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
}
break;
}
+ case 'e': {
+ SmallVector<StringRef, 2> Groups;
+ Regex R("^experimental.vector.reduce.([a-z]+)\\.[fi][0-9]+");
+ if (R.match(Name, &Groups)) {
+ Intrinsic::ID ID = Intrinsic::not_intrinsic;
+ if (Groups[1] == "fadd")
+ ID = Intrinsic::experimental_vector_reduce_v2_fadd;
+ if (Groups[1] == "fmul")
+ ID = Intrinsic::experimental_vector_reduce_v2_fmul;
+
+ if (ID != Intrinsic::not_intrinsic) {
+ rename(F);
+ auto Args = F->getFunctionType()->params();
+ Type *Tys[] = {F->getFunctionType()->getReturnType(), Args[1]};
+ NewFn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
+ return true;
+ }
+ }
+ break;
+ }
case 'i':
case 'l': {
bool IsLifetimeStart = Name.startswith("lifetime.start");
@@ -716,6 +764,8 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
.Cases("clz.ll", "popc.ll", "h2f", true)
.Cases("max.i", "max.ll", "max.ui", "max.ull", true)
.Cases("min.i", "min.ll", "min.ui", "min.ull", true)
+ .StartsWith("atomic.load.add.f32.p", true)
+ .StartsWith("atomic.load.add.f64.p", true)
.Default(false);
if (Expand) {
NewFn = nullptr;
@@ -729,7 +779,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
// address space.
if (Name.startswith("objectsize.")) {
Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
- if (F->arg_size() == 2 ||
+ if (F->arg_size() == 2 || F->arg_size() == 3 ||
F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) {
rename(F);
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize,
@@ -777,9 +827,35 @@ bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
return Upgraded;
}
-bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
- // Nothing to do yet.
- return false;
+GlobalVariable *llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
+ if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
+ GV->getName() == "llvm.global_dtors")) ||
+ !GV->hasInitializer())
+ return nullptr;
+ ArrayType *ATy = dyn_cast<ArrayType>(GV->getValueType());
+ if (!ATy)
+ return nullptr;
+ StructType *STy = dyn_cast<StructType>(ATy->getElementType());
+ if (!STy || STy->getNumElements() != 2)
+ return nullptr;
+
+ LLVMContext &C = GV->getContext();
+ IRBuilder<> IRB(C);
+ auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
+ IRB.getInt8PtrTy());
+ Constant *Init = GV->getInitializer();
+ unsigned N = Init->getNumOperands();
+ std::vector<Constant *> NewCtors(N);
+ for (unsigned i = 0; i != N; ++i) {
+ auto Ctor = cast<Constant>(Init->getOperand(i));
+ NewCtors[i] = ConstantStruct::get(
+ EltTy, Ctor->getAggregateElement(0u), Ctor->getAggregateElement(1),
+ Constant::getNullValue(IRB.getInt8PtrTy()));
+ }
+ Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
+
+ return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
+ NewInit, GV->getName());
}
// Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
@@ -1053,6 +1129,45 @@ static Value *upgradeX86Rotate(IRBuilder<> &Builder, CallInst &CI,
return Res;
}
+static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallInst &CI, unsigned Imm,
+ bool IsSigned) {
+ Type *Ty = CI.getType();
+ Value *LHS = CI.getArgOperand(0);
+ Value *RHS = CI.getArgOperand(1);
+
+ CmpInst::Predicate Pred;
+ switch (Imm) {
+ case 0x0:
+ Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
+ break;
+ case 0x1:
+ Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
+ break;
+ case 0x2:
+ Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
+ break;
+ case 0x3:
+ Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
+ break;
+ case 0x4:
+ Pred = ICmpInst::ICMP_EQ;
+ break;
+ case 0x5:
+ Pred = ICmpInst::ICMP_NE;
+ break;
+ case 0x6:
+ return Constant::getNullValue(Ty); // FALSE
+ case 0x7:
+ return Constant::getAllOnesValue(Ty); // TRUE
+ default:
+ llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
+ }
+
+ Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
+ Value *Ext = Builder.CreateSExt(Cmp, Ty);
+ return Ext;
+}
+
static Value *upgradeX86ConcatShift(IRBuilder<> &Builder, CallInst &CI,
bool IsShiftRight, bool ZeroMask) {
Type *Ty = CI.getType();
@@ -1110,16 +1225,16 @@ static Value *UpgradeMaskedStore(IRBuilder<> &Builder,
static Value *UpgradeMaskedLoad(IRBuilder<> &Builder,
Value *Ptr, Value *Passthru, Value *Mask,
bool Aligned) {
+ Type *ValTy = Passthru->getType();
// Cast the pointer to the right type.
- Ptr = Builder.CreateBitCast(Ptr,
- llvm::PointerType::getUnqual(Passthru->getType()));
+ Ptr = Builder.CreateBitCast(Ptr, llvm::PointerType::getUnqual(ValTy));
unsigned Align =
Aligned ? cast<VectorType>(Passthru->getType())->getBitWidth() / 8 : 1;
// If the mask is all ones just emit a regular store.
if (const auto *C = dyn_cast<Constant>(Mask))
if (C->isAllOnesValue())
- return Builder.CreateAlignedLoad(Ptr, Align);
+ return Builder.CreateAlignedLoad(ValTy, Ptr, Align);
// Convert the mask from an integer type to a vector of i1.
unsigned NumElts = Passthru->getType()->getVectorNumElements();
@@ -1462,6 +1577,36 @@ static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
IID = Intrinsic::x86_avx512_pmultishift_qb_512;
else
llvm_unreachable("Unexpected intrinsic");
+ } else if (Name.startswith("conflict.")) {
+ if (Name[9] == 'd' && VecWidth == 128)
+ IID = Intrinsic::x86_avx512_conflict_d_128;
+ else if (Name[9] == 'd' && VecWidth == 256)
+ IID = Intrinsic::x86_avx512_conflict_d_256;
+ else if (Name[9] == 'd' && VecWidth == 512)
+ IID = Intrinsic::x86_avx512_conflict_d_512;
+ else if (Name[9] == 'q' && VecWidth == 128)
+ IID = Intrinsic::x86_avx512_conflict_q_128;
+ else if (Name[9] == 'q' && VecWidth == 256)
+ IID = Intrinsic::x86_avx512_conflict_q_256;
+ else if (Name[9] == 'q' && VecWidth == 512)
+ IID = Intrinsic::x86_avx512_conflict_q_512;
+ else
+ llvm_unreachable("Unexpected intrinsic");
+ } else if (Name.startswith("pavg.")) {
+ if (Name[5] == 'b' && VecWidth == 128)
+ IID = Intrinsic::x86_sse2_pavg_b;
+ else if (Name[5] == 'b' && VecWidth == 256)
+ IID = Intrinsic::x86_avx2_pavg_b;
+ else if (Name[5] == 'b' && VecWidth == 512)
+ IID = Intrinsic::x86_avx512_pavg_b_512;
+ else if (Name[5] == 'w' && VecWidth == 128)
+ IID = Intrinsic::x86_sse2_pavg_w;
+ else if (Name[5] == 'w' && VecWidth == 256)
+ IID = Intrinsic::x86_avx2_pavg_w;
+ else if (Name[5] == 'w' && VecWidth == 512)
+ IID = Intrinsic::x86_avx512_pavg_w_512;
+ else
+ llvm_unreachable("Unexpected intrinsic");
} else
return false;
@@ -1503,6 +1648,14 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
// Get the Function's name.
StringRef Name = F->getName();
+ // clang.arc.use is an old name for llvm.arc.clang.arc.use. It is dropped
+ // from upgrader because the optimizer now only recognizes intrinsics for
+ // ARC runtime calls.
+ if (Name == "clang.arc.use") {
+ CI->eraseFromParent();
+ return;
+ }
+
assert(Name.startswith("llvm.") && "Intrinsic doesn't start with 'llvm.'");
Name = Name.substr(5);
@@ -1917,38 +2070,47 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Name == "avx.cvtdq2.ps.256" ||
Name.startswith("avx512.mask.cvtdq2pd.") ||
Name.startswith("avx512.mask.cvtudq2pd.") ||
- Name == "avx512.mask.cvtdq2ps.128" ||
- Name == "avx512.mask.cvtdq2ps.256" ||
- Name == "avx512.mask.cvtudq2ps.128" ||
- Name == "avx512.mask.cvtudq2ps.256" ||
- Name == "avx512.mask.cvtqq2pd.128" ||
- Name == "avx512.mask.cvtqq2pd.256" ||
- Name == "avx512.mask.cvtuqq2pd.128" ||
- Name == "avx512.mask.cvtuqq2pd.256" ||
+ Name.startswith("avx512.mask.cvtdq2ps.") ||
+ Name.startswith("avx512.mask.cvtudq2ps.") ||
+ Name.startswith("avx512.mask.cvtqq2pd.") ||
+ Name.startswith("avx512.mask.cvtuqq2pd.") ||
+ Name == "avx512.mask.cvtqq2ps.256" ||
+ Name == "avx512.mask.cvtqq2ps.512" ||
+ Name == "avx512.mask.cvtuqq2ps.256" ||
+ Name == "avx512.mask.cvtuqq2ps.512" ||
Name == "sse2.cvtps2pd" ||
Name == "avx.cvt.ps2.pd.256" ||
Name == "avx512.mask.cvtps2pd.128" ||
Name == "avx512.mask.cvtps2pd.256")) {
Type *DstTy = CI->getType();
Rep = CI->getArgOperand(0);
+ Type *SrcTy = Rep->getType();
unsigned NumDstElts = DstTy->getVectorNumElements();
- if (NumDstElts < Rep->getType()->getVectorNumElements()) {
+ if (NumDstElts < SrcTy->getVectorNumElements()) {
assert(NumDstElts == 2 && "Unexpected vector size");
uint32_t ShuffleMask[2] = { 0, 1 };
Rep = Builder.CreateShuffleVector(Rep, Rep, ShuffleMask);
}
- bool IsPS2PD = (StringRef::npos != Name.find("ps2"));
+ bool IsPS2PD = SrcTy->getVectorElementType()->isFloatTy();
bool IsUnsigned = (StringRef::npos != Name.find("cvtu"));
if (IsPS2PD)
Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
- else if (IsUnsigned)
- Rep = Builder.CreateUIToFP(Rep, DstTy, "cvt");
- else
- Rep = Builder.CreateSIToFP(Rep, DstTy, "cvt");
+ else if (CI->getNumArgOperands() == 4 &&
+ (!isa<ConstantInt>(CI->getArgOperand(3)) ||
+ cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
+ Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
+ : Intrinsic::x86_avx512_sitofp_round;
+ Function *F = Intrinsic::getDeclaration(CI->getModule(), IID,
+ { DstTy, SrcTy });
+ Rep = Builder.CreateCall(F, { Rep, CI->getArgOperand(3) });
+ } else {
+ Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
+ : Builder.CreateSIToFP(Rep, DstTy, "cvt");
+ }
- if (CI->getNumArgOperands() == 3)
+ if (CI->getNumArgOperands() >= 3)
Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
CI->getArgOperand(1));
} else if (IsX86 && (Name.startswith("avx512.mask.loadu."))) {
@@ -1989,52 +2151,56 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Intrinsic::masked_compressstore,
ResultTy);
Rep = Builder.CreateCall(CSt, { CI->getArgOperand(1), Ptr, MaskVec });
+ } else if (IsX86 && (Name.startswith("avx512.mask.compress.") ||
+ Name.startswith("avx512.mask.expand."))) {
+ Type *ResultTy = CI->getType();
+
+ Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
+ ResultTy->getVectorNumElements());
+
+ bool IsCompress = Name[12] == 'c';
+ Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
+ : Intrinsic::x86_avx512_mask_expand;
+ Function *Intr = Intrinsic::getDeclaration(F->getParent(), IID, ResultTy);
+ Rep = Builder.CreateCall(Intr, { CI->getOperand(0), CI->getOperand(1),
+ MaskVec });
} else if (IsX86 && Name.startswith("xop.vpcom")) {
- Intrinsic::ID intID;
- if (Name.endswith("ub"))
- intID = Intrinsic::x86_xop_vpcomub;
- else if (Name.endswith("uw"))
- intID = Intrinsic::x86_xop_vpcomuw;
- else if (Name.endswith("ud"))
- intID = Intrinsic::x86_xop_vpcomud;
- else if (Name.endswith("uq"))
- intID = Intrinsic::x86_xop_vpcomuq;
- else if (Name.endswith("b"))
- intID = Intrinsic::x86_xop_vpcomb;
- else if (Name.endswith("w"))
- intID = Intrinsic::x86_xop_vpcomw;
- else if (Name.endswith("d"))
- intID = Intrinsic::x86_xop_vpcomd;
- else if (Name.endswith("q"))
- intID = Intrinsic::x86_xop_vpcomq;
+ bool IsSigned;
+ if (Name.endswith("ub") || Name.endswith("uw") || Name.endswith("ud") ||
+ Name.endswith("uq"))
+ IsSigned = false;
+ else if (Name.endswith("b") || Name.endswith("w") || Name.endswith("d") ||
+ Name.endswith("q"))
+ IsSigned = true;
else
llvm_unreachable("Unknown suffix");
- Name = Name.substr(9); // strip off "xop.vpcom"
unsigned Imm;
- if (Name.startswith("lt"))
- Imm = 0;
- else if (Name.startswith("le"))
- Imm = 1;
- else if (Name.startswith("gt"))
- Imm = 2;
- else if (Name.startswith("ge"))
- Imm = 3;
- else if (Name.startswith("eq"))
- Imm = 4;
- else if (Name.startswith("ne"))
- Imm = 5;
- else if (Name.startswith("false"))
- Imm = 6;
- else if (Name.startswith("true"))
- Imm = 7;
- else
- llvm_unreachable("Unknown condition");
+ if (CI->getNumArgOperands() == 3) {
+ Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
+ } else {
+ Name = Name.substr(9); // strip off "xop.vpcom"
+ if (Name.startswith("lt"))
+ Imm = 0;
+ else if (Name.startswith("le"))
+ Imm = 1;
+ else if (Name.startswith("gt"))
+ Imm = 2;
+ else if (Name.startswith("ge"))
+ Imm = 3;
+ else if (Name.startswith("eq"))
+ Imm = 4;
+ else if (Name.startswith("ne"))
+ Imm = 5;
+ else if (Name.startswith("false"))
+ Imm = 6;
+ else if (Name.startswith("true"))
+ Imm = 7;
+ else
+ llvm_unreachable("Unknown condition");
+ }
- Function *VPCOM = Intrinsic::getDeclaration(F->getParent(), intID);
- Rep =
- Builder.CreateCall(VPCOM, {CI->getArgOperand(0), CI->getArgOperand(1),
- Builder.getInt8(Imm)});
+ Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
} else if (IsX86 && Name.startswith("xop.vpcmov")) {
Value *Sel = CI->getArgOperand(2);
Value *NotSel = Builder.CreateNot(Sel);
@@ -2103,6 +2269,14 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
if (CI->getNumArgOperands() == 3)
Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
CI->getArgOperand(1));
+ } else if (Name == "avx512.mask.pmov.qd.256" ||
+ Name == "avx512.mask.pmov.qd.512" ||
+ Name == "avx512.mask.pmov.wb.256" ||
+ Name == "avx512.mask.pmov.wb.512") {
+ Type *Ty = CI->getArgOperand(1)->getType();
+ Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
+ Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
+ CI->getArgOperand(1));
} else if (IsX86 && (Name.startswith("avx.vbroadcastf128") ||
Name == "avx2.vbroadcasti128")) {
// Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
@@ -2111,7 +2285,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Type *VT = VectorType::get(EltTy, NumSrcElts);
Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
PointerType::getUnqual(VT));
- Value *Load = Builder.CreateAlignedLoad(Op, 1);
+ Value *Load = Builder.CreateAlignedLoad(VT, Op, 1);
if (NumSrcElts == 2)
Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
{ 0, 1, 0, 1 });
@@ -2857,28 +3031,9 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
// Convert the type of the pointer to a pointer to the stored type.
Value *BC =
Builder.CreateBitCast(Ptr, PointerType::getUnqual(VTy), "cast");
- LoadInst *LI = Builder.CreateAlignedLoad(BC, VTy->getBitWidth() / 8);
+ LoadInst *LI = Builder.CreateAlignedLoad(VTy, BC, VTy->getBitWidth() / 8);
LI->setMetadata(M->getMDKindID("nontemporal"), Node);
Rep = LI;
- } else if (IsX86 &&
- (Name.startswith("sse2.pavg") || Name.startswith("avx2.pavg") ||
- Name.startswith("avx512.mask.pavg"))) {
- // llvm.x86.sse2.pavg.b/w, llvm.x86.avx2.pavg.b/w,
- // llvm.x86.avx512.mask.pavg.b/w
- Value *A = CI->getArgOperand(0);
- Value *B = CI->getArgOperand(1);
- VectorType *ZextType = VectorType::getExtendedElementVectorType(
- cast<VectorType>(A->getType()));
- Value *ExtendedA = Builder.CreateZExt(A, ZextType);
- Value *ExtendedB = Builder.CreateZExt(B, ZextType);
- Value *Sum = Builder.CreateAdd(ExtendedA, ExtendedB);
- Value *AddOne = Builder.CreateAdd(Sum, ConstantInt::get(ZextType, 1));
- Value *ShiftR = Builder.CreateLShr(AddOne, ConstantInt::get(ZextType, 1));
- Rep = Builder.CreateTrunc(ShiftR, A->getType());
- if (CI->getNumArgOperands() > 2) {
- Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
- CI->getArgOperand(2));
- }
} else if (IsX86 && (Name.startswith("fma.vfmadd.") ||
Name.startswith("fma.vfmsub.") ||
Name.startswith("fma.vfnmadd.") ||
@@ -3273,6 +3428,12 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Value *Cmp = Builder.CreateICmpSGE(
Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
+ } else if (IsNVVM && (Name.startswith("atomic.load.add.f32.p") ||
+ Name.startswith("atomic.load.add.f64.p"))) {
+ Value *Ptr = CI->getArgOperand(0);
+ Value *Val = CI->getArgOperand(1);
+ Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val,
+ AtomicOrdering::SequentiallyConsistent);
} else if (IsNVVM && (Name == "max.i" || Name == "max.ll" ||
Name == "max.ui" || Name == "max.ull")) {
Value *Arg0 = CI->getArgOperand(0);
@@ -3334,7 +3495,28 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
DefaultCase();
return;
}
-
+ case Intrinsic::experimental_vector_reduce_v2_fmul: {
+ SmallVector<Value *, 2> Args;
+ if (CI->isFast())
+ Args.push_back(ConstantFP::get(CI->getOperand(0)->getType(), 1.0));
+ else
+ Args.push_back(CI->getOperand(0));
+ Args.push_back(CI->getOperand(1));
+ NewCall = Builder.CreateCall(NewFn, Args);
+ cast<Instruction>(NewCall)->copyFastMathFlags(CI);
+ break;
+ }
+ case Intrinsic::experimental_vector_reduce_v2_fadd: {
+ SmallVector<Value *, 2> Args;
+ if (CI->isFast())
+ Args.push_back(Constant::getNullValue(CI->getOperand(0)->getType()));
+ else
+ Args.push_back(CI->getOperand(0));
+ Args.push_back(CI->getOperand(1));
+ NewCall = Builder.CreateCall(NewFn, Args);
+ cast<Instruction>(NewCall)->copyFastMathFlags(CI);
+ break;
+ }
case Intrinsic::arm_neon_vld1:
case Intrinsic::arm_neon_vld2:
case Intrinsic::arm_neon_vld3:
@@ -3371,8 +3553,10 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Value *NullIsUnknownSize = CI->getNumArgOperands() == 2
? Builder.getFalse()
: CI->getArgOperand(2);
+ Value *Dynamic =
+ CI->getNumArgOperands() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
NewCall = Builder.CreateCall(
- NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize});
+ NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
break;
}
@@ -3649,8 +3833,8 @@ bool llvm::UpgradeDebugInfo(Module &M) {
bool llvm::UpgradeRetainReleaseMarker(Module &M) {
bool Changed = false;
- NamedMDNode *ModRetainReleaseMarker =
- M.getNamedMetadata("clang.arc.retainAutoreleasedReturnValueMarker");
+ const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
+ NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
if (ModRetainReleaseMarker) {
MDNode *Op = ModRetainReleaseMarker->getOperand(0);
if (Op) {
@@ -3660,11 +3844,11 @@ bool llvm::UpgradeRetainReleaseMarker(Module &M) {
ID->getString().split(ValueComp, "#");
if (ValueComp.size() == 2) {
std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
- Metadata *Ops[1] = {MDString::get(M.getContext(), NewValue)};
- ModRetainReleaseMarker->setOperand(0,
- MDNode::get(M.getContext(), Ops));
- Changed = true;
+ ID = MDString::get(M.getContext(), NewValue);
}
+ M.addModuleFlag(Module::Error, MarkerKey, ID);
+ M.eraseNamedMetadata(ModRetainReleaseMarker);
+ Changed = true;
}
}
}
diff --git a/lib/IR/BasicBlock.cpp b/lib/IR/BasicBlock.cpp
index 375924360dda..34410712645d 100644
--- a/lib/IR/BasicBlock.cpp
+++ b/lib/IR/BasicBlock.cpp
@@ -1,9 +1,8 @@
//===-- BasicBlock.cpp - Implement BasicBlock related methods -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -300,7 +299,7 @@ iterator_range<BasicBlock::phi_iterator> BasicBlock::phis() {
/// called while the predecessor still refers to this block.
///
void BasicBlock::removePredecessor(BasicBlock *Pred,
- bool DontDeleteUselessPHIs) {
+ bool KeepOneInputPHIs) {
assert((hasNUsesOrMore(16)||// Reduce cost of this assertion for complex CFGs.
find(pred_begin(this), pred_end(this), Pred) != pred_end(this)) &&
"removePredecessor: BB is not a predecessor!");
@@ -331,11 +330,11 @@ void BasicBlock::removePredecessor(BasicBlock *Pred,
}
// <= Two predecessors BEFORE I remove one?
- if (max_idx <= 2 && !DontDeleteUselessPHIs) {
+ if (max_idx <= 2 && !KeepOneInputPHIs) {
// Yup, loop through and nuke the PHI nodes
while (PHINode *PN = dyn_cast<PHINode>(&front())) {
// Remove the predecessor first.
- PN->removeIncomingValue(Pred, !DontDeleteUselessPHIs);
+ PN->removeIncomingValue(Pred, !KeepOneInputPHIs);
// If the PHI _HAD_ two uses, replace PHI node with its now *single* value
if (max_idx == 2) {
@@ -360,7 +359,7 @@ void BasicBlock::removePredecessor(BasicBlock *Pred,
// If all incoming values to the Phi are the same, we can replace the Phi
// with that value.
Value* PNV = nullptr;
- if (!DontDeleteUselessPHIs && (PNV = PN->hasConstantValue()))
+ if (!KeepOneInputPHIs && (PNV = PN->hasConstantValue()))
if (PNV != PN) {
PN->replaceAllUsesWith(PNV);
PN->eraseFromParent();
@@ -426,41 +425,37 @@ BasicBlock *BasicBlock::splitBasicBlock(iterator I, const Twine &BBName) {
// Now we must loop through all of the successors of the New block (which
// _were_ the successors of the 'this' block), and update any PHI nodes in
// successors. If there were PHI nodes in the successors, then they need to
- // know that incoming branches will be from New, not from Old.
+ // know that incoming branches will be from New, not from Old (this).
//
- for (succ_iterator I = succ_begin(New), E = succ_end(New); I != E; ++I) {
- // Loop over any phi nodes in the basic block, updating the BB field of
- // incoming values...
- BasicBlock *Successor = *I;
- for (auto &PN : Successor->phis()) {
- int Idx = PN.getBasicBlockIndex(this);
- while (Idx != -1) {
- PN.setIncomingBlock((unsigned)Idx, New);
- Idx = PN.getBasicBlockIndex(this);
- }
- }
- }
+ New->replaceSuccessorsPhiUsesWith(this, New);
return New;
}
-void BasicBlock::replaceSuccessorsPhiUsesWith(BasicBlock *New) {
+void BasicBlock::replacePhiUsesWith(BasicBlock *Old, BasicBlock *New) {
+ // N.B. This might not be a complete BasicBlock, so don't assume
+ // that it ends with a non-phi instruction.
+ for (iterator II = begin(), IE = end(); II != IE; ++II) {
+ PHINode *PN = dyn_cast<PHINode>(II);
+ if (!PN)
+ break;
+ PN->replaceIncomingBlockWith(Old, New);
+ }
+}
+
+void BasicBlock::replaceSuccessorsPhiUsesWith(BasicBlock *Old,
+ BasicBlock *New) {
Instruction *TI = getTerminator();
if (!TI)
// Cope with being called on a BasicBlock that doesn't have a terminator
// yet. Clang's CodeGenFunction::EmitReturnBlock() likes to do this.
return;
- for (BasicBlock *Succ : successors(TI)) {
- // N.B. Succ might not be a complete BasicBlock, so don't assume
- // that it ends with a non-phi instruction.
- for (iterator II = Succ->begin(), IE = Succ->end(); II != IE; ++II) {
- PHINode *PN = dyn_cast<PHINode>(II);
- if (!PN)
- break;
- int i;
- while ((i = PN->getBasicBlockIndex(this)) >= 0)
- PN->setIncomingBlock(i, New);
- }
- }
+ llvm::for_each(successors(TI), [Old, New](BasicBlock *Succ) {
+ Succ->replacePhiUsesWith(Old, New);
+ });
+}
+
+void BasicBlock::replaceSuccessorsPhiUsesWith(BasicBlock *New) {
+ this->replaceSuccessorsPhiUsesWith(this, New);
}
/// Return true if this basic block is a landing pad. I.e., it's
diff --git a/lib/IR/Comdat.cpp b/lib/IR/Comdat.cpp
index 3b1f7d62cdae..c9f715daf457 100644
--- a/lib/IR/Comdat.cpp
+++ b/lib/IR/Comdat.cpp
@@ -1,9 +1,8 @@
//===- Comdat.cpp - Implement Metadata classes ----------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/IR/ConstantFold.cpp b/lib/IR/ConstantFold.cpp
index 57de6b042303..835fbb3443b8 100644
--- a/lib/IR/ConstantFold.cpp
+++ b/lib/IR/ConstantFold.cpp
@@ -1,9 +1,8 @@
//===- ConstantFold.cpp - LLVM constant folder ----------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -27,6 +26,7 @@
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/ErrorHandling.h"
@@ -268,19 +268,20 @@ static Constant *ExtractConstantBytes(Constant *C, unsigned ByteStart,
ConstantInt *Amt = dyn_cast<ConstantInt>(CE->getOperand(1));
if (!Amt)
return nullptr;
- unsigned ShAmt = Amt->getZExtValue();
+ APInt ShAmt = Amt->getValue();
// Cannot analyze non-byte shifts.
if ((ShAmt & 7) != 0)
return nullptr;
- ShAmt >>= 3;
+ ShAmt.lshrInPlace(3);
// If the extract is known to be all zeros, return zero.
- if (ByteStart >= CSize-ShAmt)
- return Constant::getNullValue(IntegerType::get(CE->getContext(),
- ByteSize*8));
+ if (ShAmt.uge(CSize - ByteStart))
+ return Constant::getNullValue(
+ IntegerType::get(CE->getContext(), ByteSize * 8));
// If the extract is known to be fully in the input, extract it.
- if (ByteStart+ByteSize+ShAmt <= CSize)
- return ExtractConstantBytes(CE->getOperand(0), ByteStart+ShAmt, ByteSize);
+ if (ShAmt.ule(CSize - (ByteStart + ByteSize)))
+ return ExtractConstantBytes(CE->getOperand(0),
+ ByteStart + ShAmt.getZExtValue(), ByteSize);
// TODO: Handle the 'partially zero' case.
return nullptr;
@@ -290,19 +291,20 @@ static Constant *ExtractConstantBytes(Constant *C, unsigned ByteStart,
ConstantInt *Amt = dyn_cast<ConstantInt>(CE->getOperand(1));
if (!Amt)
return nullptr;
- unsigned ShAmt = Amt->getZExtValue();
+ APInt ShAmt = Amt->getValue();
// Cannot analyze non-byte shifts.
if ((ShAmt & 7) != 0)
return nullptr;
- ShAmt >>= 3;
+ ShAmt.lshrInPlace(3);
// If the extract is known to be all zeros, return zero.
- if (ByteStart+ByteSize <= ShAmt)
- return Constant::getNullValue(IntegerType::get(CE->getContext(),
- ByteSize*8));
+ if (ShAmt.uge(ByteStart + ByteSize))
+ return Constant::getNullValue(
+ IntegerType::get(CE->getContext(), ByteSize * 8));
// If the extract is known to be fully in the input, extract it.
- if (ByteStart >= ShAmt)
- return ExtractConstantBytes(CE->getOperand(0), ByteStart-ShAmt, ByteSize);
+ if (ShAmt.ule(ByteStart))
+ return ExtractConstantBytes(CE->getOperand(0),
+ ByteStart - ShAmt.getZExtValue(), ByteSize);
// TODO: Handle the 'partially zero' case.
return nullptr;
@@ -916,6 +918,52 @@ Constant *llvm::ConstantFoldInsertValueInstruction(Constant *Agg,
return ConstantVector::get(Result);
}
+Constant *llvm::ConstantFoldUnaryInstruction(unsigned Opcode, Constant *C) {
+ assert(Instruction::isUnaryOp(Opcode) && "Non-unary instruction detected");
+
+ // Handle scalar UndefValue. Vectors are always evaluated per element.
+ bool HasScalarUndef = !C->getType()->isVectorTy() && isa<UndefValue>(C);
+
+ if (HasScalarUndef) {
+ switch (static_cast<Instruction::UnaryOps>(Opcode)) {
+ case Instruction::FNeg:
+ return C; // -undef -> undef
+ case Instruction::UnaryOpsEnd:
+ llvm_unreachable("Invalid UnaryOp");
+ }
+ }
+
+ // Constant should not be UndefValue, unless these are vector constants.
+ assert(!HasScalarUndef && "Unexpected UndefValue");
+ // We only have FP UnaryOps right now.
+ assert(!isa<ConstantInt>(C) && "Unexpected Integer UnaryOp");
+
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
+ const APFloat &CV = CFP->getValueAPF();
+ switch (Opcode) {
+ default:
+ break;
+ case Instruction::FNeg:
+ return ConstantFP::get(C->getContext(), neg(CV));
+ }
+ } else if (VectorType *VTy = dyn_cast<VectorType>(C->getType())) {
+ // Fold each element and create a vector constant from those constants.
+ SmallVector<Constant*, 16> Result;
+ Type *Ty = IntegerType::get(VTy->getContext(), 32);
+ for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+ Constant *ExtractIdx = ConstantInt::get(Ty, i);
+ Constant *Elt = ConstantExpr::getExtractElement(C, ExtractIdx);
+
+ Result.push_back(ConstantExpr::get(Opcode, Elt));
+ }
+
+ return ConstantVector::get(Result);
+ }
+
+ // We don't know how to fold this.
+ return nullptr;
+}
+
Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1,
Constant *C2) {
assert(Instruction::isBinaryOp(Opcode) && "Non-binary instruction detected");
@@ -1077,10 +1125,29 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1,
isa<GlobalValue>(CE1->getOperand(0))) {
GlobalValue *GV = cast<GlobalValue>(CE1->getOperand(0));
- // Functions are at least 4-byte aligned.
- unsigned GVAlign = GV->getAlignment();
- if (isa<Function>(GV))
- GVAlign = std::max(GVAlign, 4U);
+ unsigned GVAlign;
+
+ if (Module *TheModule = GV->getParent()) {
+ GVAlign = GV->getPointerAlignment(TheModule->getDataLayout());
+
+ // If the function alignment is not specified then assume that it
+ // is 4.
+ // This is dangerous; on x86, the alignment of the pointer
+ // corresponds to the alignment of the function, but might be less
+ // than 4 if it isn't explicitly specified.
+ // However, a fix for this behaviour was reverted because it
+ // increased code size (see https://reviews.llvm.org/D55115)
+ // FIXME: This code should be deleted once existing targets have
+ // appropriate defaults
+ if (GVAlign == 0U && isa<Function>(GV))
+ GVAlign = 4U;
+ } else if (isa<Function>(GV)) {
+ // Without a datalayout we have to assume the worst case: that the
+ // function pointer isn't aligned at all.
+ GVAlign = 0U;
+ } else {
+ GVAlign = GV->getAlignment();
+ }
if (GVAlign > 1) {
unsigned DstWidth = CI2->getType()->getBitWidth();
@@ -1360,8 +1427,9 @@ static FCmpInst::Predicate evaluateFCmpRelation(Constant *V1, Constant *V2) {
assert(V1->getType() == V2->getType() &&
"Cannot compare values of different types!");
- // Handle degenerate case quickly
- if (V1 == V2) return FCmpInst::FCMP_OEQ;
+ // We do not know if a constant expression will evaluate to a number or NaN.
+ // Therefore, we can only say that the relation is unordered or equal.
+ if (V1 == V2) return FCmpInst::FCMP_UEQ;
if (!isa<ConstantExpr>(V1)) {
if (!isa<ConstantExpr>(V2)) {
@@ -1552,7 +1620,7 @@ static ICmpInst::Predicate evaluateICmpRelation(Constant *V1, Constant *V2,
case Instruction::ZExt:
case Instruction::SExt:
// We can't evaluate floating point casts or truncations.
- if (CE1Op0->getType()->isFloatingPointTy())
+ if (CE1Op0->getType()->isFPOrFPVectorTy())
break;
// If the cast is not actually changing bits, and the second operand is a
@@ -1856,7 +1924,6 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred,
default: llvm_unreachable("Unknown relation!");
case FCmpInst::FCMP_UNO:
case FCmpInst::FCMP_ORD:
- case FCmpInst::FCMP_UEQ:
case FCmpInst::FCMP_UNE:
case FCmpInst::FCMP_ULT:
case FCmpInst::FCMP_UGT:
@@ -1902,6 +1969,13 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred,
else if (pred == FCmpInst::FCMP_ONE || pred == FCmpInst::FCMP_UNE)
Result = 1;
break;
+ case FCmpInst::FCMP_UEQ: // We know that C1 == C2 || isUnordered(C1, C2).
+ // We can only partially decide this relation.
+ if (pred == FCmpInst::FCMP_ONE)
+ Result = 0;
+ else if (pred == FCmpInst::FCMP_UEQ)
+ Result = 1;
+ break;
}
// If we evaluated the result, return it now.
@@ -1981,11 +2055,13 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred,
// If the right hand side is a bitcast, try using its inverse to simplify
// it by moving it to the left hand side. We can't do this if it would turn
- // a vector compare into a scalar compare or visa versa.
+ // a vector compare into a scalar compare or visa versa, or if it would turn
+ // the operands into FP values.
if (ConstantExpr *CE2 = dyn_cast<ConstantExpr>(C2)) {
Constant *CE2Op0 = CE2->getOperand(0);
if (CE2->getOpcode() == Instruction::BitCast &&
- CE2->getType()->isVectorTy() == CE2Op0->getType()->isVectorTy()) {
+ CE2->getType()->isVectorTy() == CE2Op0->getType()->isVectorTy() &&
+ !CE2Op0->getType()->isFPOrFPVectorTy()) {
Constant *Inverse = ConstantExpr::getBitCast(C1, CE2Op0->getType());
return ConstantExpr::getICmp(pred, Inverse, CE2Op0);
}
@@ -2072,7 +2148,7 @@ Constant *llvm::ConstantFoldGetElementPtr(Type *PointeeTy, Constant *C,
if (Idxs.empty()) return C;
Type *GEPTy = GetElementPtrInst::getGEPReturnType(
- C, makeArrayRef((Value *const *)Idxs.data(), Idxs.size()));
+ PointeeTy, C, makeArrayRef((Value *const *)Idxs.data(), Idxs.size()));
if (isa<UndefValue>(C))
return UndefValue::get(GEPTy);
diff --git a/lib/IR/ConstantFold.h b/lib/IR/ConstantFold.h
index 2d8de1132b96..9ad6e14e9e40 100644
--- a/lib/IR/ConstantFold.h
+++ b/lib/IR/ConstantFold.h
@@ -1,9 +1,8 @@
//===-- ConstantFolding.h - Internal Constant Folding Interface -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -44,6 +43,7 @@ template <typename T> class ArrayRef;
ArrayRef<unsigned> Idxs);
Constant *ConstantFoldInsertValueInstruction(Constant *Agg, Constant *Val,
ArrayRef<unsigned> Idxs);
+ Constant *ConstantFoldUnaryInstruction(unsigned Opcode, Constant *V);
Constant *ConstantFoldBinaryInstruction(unsigned Opcode, Constant *V1,
Constant *V2);
Constant *ConstantFoldCompareInstruction(unsigned short predicate,
diff --git a/lib/IR/ConstantRange.cpp b/lib/IR/ConstantRange.cpp
index 39a0b13c4e0c..920fdc01a14f 100644
--- a/lib/IR/ConstantRange.cpp
+++ b/lib/IR/ConstantRange.cpp
@@ -1,9 +1,8 @@
//===- ConstantRange.cpp - ConstantRange implementation -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -32,6 +31,7 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/KnownBits.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
@@ -54,6 +54,26 @@ ConstantRange::ConstantRange(APInt L, APInt U)
"Lower == Upper, but they aren't min or max value!");
}
+ConstantRange ConstantRange::fromKnownBits(const KnownBits &Known,
+ bool IsSigned) {
+ assert(!Known.hasConflict() && "Expected valid KnownBits");
+
+ if (Known.isUnknown())
+ return getFull(Known.getBitWidth());
+
+ // For unsigned ranges, or signed ranges with known sign bit, create a simple
+ // range between the smallest and largest possible value.
+ if (!IsSigned || Known.isNegative() || Known.isNonNegative())
+ return ConstantRange(Known.One, ~Known.Zero + 1);
+
+ // If we don't know the sign bit, pick the lower bound as a negative number
+ // and the upper bound as a non-negative one.
+ APInt Lower = Known.One, Upper = ~Known.Zero;
+ Lower.setSignBit();
+ Upper.clearSignBit();
+ return ConstantRange(Lower, Upper + 1);
+}
+
ConstantRange ConstantRange::makeAllowedICmpRegion(CmpInst::Predicate Pred,
const ConstantRange &CR) {
if (CR.isEmptySet())
@@ -68,55 +88,39 @@ ConstantRange ConstantRange::makeAllowedICmpRegion(CmpInst::Predicate Pred,
case CmpInst::ICMP_NE:
if (CR.isSingleElement())
return ConstantRange(CR.getUpper(), CR.getLower());
- return ConstantRange(W);
+ return getFull(W);
case CmpInst::ICMP_ULT: {
APInt UMax(CR.getUnsignedMax());
if (UMax.isMinValue())
- return ConstantRange(W, /* empty */ false);
+ return getEmpty(W);
return ConstantRange(APInt::getMinValue(W), std::move(UMax));
}
case CmpInst::ICMP_SLT: {
APInt SMax(CR.getSignedMax());
if (SMax.isMinSignedValue())
- return ConstantRange(W, /* empty */ false);
+ return getEmpty(W);
return ConstantRange(APInt::getSignedMinValue(W), std::move(SMax));
}
- case CmpInst::ICMP_ULE: {
- APInt UMax(CR.getUnsignedMax());
- if (UMax.isMaxValue())
- return ConstantRange(W);
- return ConstantRange(APInt::getMinValue(W), std::move(UMax) + 1);
- }
- case CmpInst::ICMP_SLE: {
- APInt SMax(CR.getSignedMax());
- if (SMax.isMaxSignedValue())
- return ConstantRange(W);
- return ConstantRange(APInt::getSignedMinValue(W), std::move(SMax) + 1);
- }
+ case CmpInst::ICMP_ULE:
+ return getNonEmpty(APInt::getMinValue(W), CR.getUnsignedMax() + 1);
+ case CmpInst::ICMP_SLE:
+ return getNonEmpty(APInt::getSignedMinValue(W), CR.getSignedMax() + 1);
case CmpInst::ICMP_UGT: {
APInt UMin(CR.getUnsignedMin());
if (UMin.isMaxValue())
- return ConstantRange(W, /* empty */ false);
+ return getEmpty(W);
return ConstantRange(std::move(UMin) + 1, APInt::getNullValue(W));
}
case CmpInst::ICMP_SGT: {
APInt SMin(CR.getSignedMin());
if (SMin.isMaxSignedValue())
- return ConstantRange(W, /* empty */ false);
+ return getEmpty(W);
return ConstantRange(std::move(SMin) + 1, APInt::getSignedMinValue(W));
}
- case CmpInst::ICMP_UGE: {
- APInt UMin(CR.getUnsignedMin());
- if (UMin.isMinValue())
- return ConstantRange(W);
- return ConstantRange(std::move(UMin), APInt::getNullValue(W));
- }
- case CmpInst::ICMP_SGE: {
- APInt SMin(CR.getSignedMin());
- if (SMin.isMinSignedValue())
- return ConstantRange(W);
- return ConstantRange(std::move(SMin), APInt::getSignedMinValue(W));
- }
+ case CmpInst::ICMP_UGE:
+ return getNonEmpty(CR.getUnsignedMin(), APInt::getNullValue(W));
+ case CmpInst::ICMP_SGE:
+ return getNonEmpty(CR.getSignedMin(), APInt::getSignedMinValue(W));
}
}
@@ -176,146 +180,106 @@ bool ConstantRange::getEquivalentICmp(CmpInst::Predicate &Pred,
return Success;
}
+/// Exact mul nuw region for single element RHS.
+static ConstantRange makeExactMulNUWRegion(const APInt &V) {
+ unsigned BitWidth = V.getBitWidth();
+ if (V == 0)
+ return ConstantRange::getFull(V.getBitWidth());
+
+ return ConstantRange::getNonEmpty(
+ APIntOps::RoundingUDiv(APInt::getMinValue(BitWidth), V,
+ APInt::Rounding::UP),
+ APIntOps::RoundingUDiv(APInt::getMaxValue(BitWidth), V,
+ APInt::Rounding::DOWN) + 1);
+}
+
+/// Exact mul nsw region for single element RHS.
+static ConstantRange makeExactMulNSWRegion(const APInt &V) {
+ // Handle special case for 0, -1 and 1. See the last for reason why we
+ // specialize -1 and 1.
+ unsigned BitWidth = V.getBitWidth();
+ if (V == 0 || V.isOneValue())
+ return ConstantRange::getFull(BitWidth);
+
+ APInt MinValue = APInt::getSignedMinValue(BitWidth);
+ APInt MaxValue = APInt::getSignedMaxValue(BitWidth);
+ // e.g. Returning [-127, 127], represented as [-127, -128).
+ if (V.isAllOnesValue())
+ return ConstantRange(-MaxValue, MinValue);
+
+ APInt Lower, Upper;
+ if (V.isNegative()) {
+ Lower = APIntOps::RoundingSDiv(MaxValue, V, APInt::Rounding::UP);
+ Upper = APIntOps::RoundingSDiv(MinValue, V, APInt::Rounding::DOWN);
+ } else {
+ Lower = APIntOps::RoundingSDiv(MinValue, V, APInt::Rounding::UP);
+ Upper = APIntOps::RoundingSDiv(MaxValue, V, APInt::Rounding::DOWN);
+ }
+ // ConstantRange ctor take a half inclusive interval [Lower, Upper + 1).
+ // Upper + 1 is guaranteed not to overflow, because |divisor| > 1. 0, -1,
+ // and 1 are already handled as special cases.
+ return ConstantRange(Lower, Upper + 1);
+}
+
ConstantRange
ConstantRange::makeGuaranteedNoWrapRegion(Instruction::BinaryOps BinOp,
const ConstantRange &Other,
unsigned NoWrapKind) {
using OBO = OverflowingBinaryOperator;
- // Computes the intersection of CR0 and CR1. It is different from
- // intersectWith in that the ConstantRange returned will only contain elements
- // in both CR0 and CR1 (i.e. SubsetIntersect(X, Y) is a *subset*, proper or
- // not, of both X and Y).
- auto SubsetIntersect =
- [](const ConstantRange &CR0, const ConstantRange &CR1) {
- return CR0.inverse().unionWith(CR1.inverse()).inverse();
- };
-
assert(Instruction::isBinaryOp(BinOp) && "Binary operators only!");
assert((NoWrapKind == OBO::NoSignedWrap ||
- NoWrapKind == OBO::NoUnsignedWrap ||
- NoWrapKind == (OBO::NoUnsignedWrap | OBO::NoSignedWrap)) &&
+ NoWrapKind == OBO::NoUnsignedWrap) &&
"NoWrapKind invalid!");
+ bool Unsigned = NoWrapKind == OBO::NoUnsignedWrap;
unsigned BitWidth = Other.getBitWidth();
- ConstantRange Result(BitWidth);
switch (BinOp) {
default:
- // Conservative answer: empty set
- return ConstantRange(BitWidth, false);
+ llvm_unreachable("Unsupported binary op");
- case Instruction::Add:
- if (auto *C = Other.getSingleElement())
- if (C->isNullValue())
- // Full set: nothing signed / unsigned wraps when added to 0.
- return ConstantRange(BitWidth);
- if (NoWrapKind & OBO::NoUnsignedWrap)
- Result =
- SubsetIntersect(Result, ConstantRange(APInt::getNullValue(BitWidth),
- -Other.getUnsignedMax()));
- if (NoWrapKind & OBO::NoSignedWrap) {
- const APInt &SignedMin = Other.getSignedMin();
- const APInt &SignedMax = Other.getSignedMax();
- if (SignedMax.isStrictlyPositive())
- Result = SubsetIntersect(
- Result,
- ConstantRange(APInt::getSignedMinValue(BitWidth),
- APInt::getSignedMinValue(BitWidth) - SignedMax));
- if (SignedMin.isNegative())
- Result = SubsetIntersect(
- Result,
- ConstantRange(APInt::getSignedMinValue(BitWidth) - SignedMin,
- APInt::getSignedMinValue(BitWidth)));
- }
- return Result;
+ case Instruction::Add: {
+ if (Unsigned)
+ return getNonEmpty(APInt::getNullValue(BitWidth),
+ -Other.getUnsignedMax());
+
+ APInt SignedMinVal = APInt::getSignedMinValue(BitWidth);
+ APInt SMin = Other.getSignedMin(), SMax = Other.getSignedMax();
+ return getNonEmpty(
+ SMin.isNegative() ? SignedMinVal - SMin : SignedMinVal,
+ SMax.isStrictlyPositive() ? SignedMinVal - SMax : SignedMinVal);
+ }
- case Instruction::Sub:
- if (auto *C = Other.getSingleElement())
- if (C->isNullValue())
- // Full set: nothing signed / unsigned wraps when subtracting 0.
- return ConstantRange(BitWidth);
- if (NoWrapKind & OBO::NoUnsignedWrap)
- Result =
- SubsetIntersect(Result, ConstantRange(Other.getUnsignedMax(),
- APInt::getMinValue(BitWidth)));
- if (NoWrapKind & OBO::NoSignedWrap) {
- const APInt &SignedMin = Other.getSignedMin();
- const APInt &SignedMax = Other.getSignedMax();
- if (SignedMax.isStrictlyPositive())
- Result = SubsetIntersect(
- Result,
- ConstantRange(APInt::getSignedMinValue(BitWidth) + SignedMax,
- APInt::getSignedMinValue(BitWidth)));
- if (SignedMin.isNegative())
- Result = SubsetIntersect(
- Result,
- ConstantRange(APInt::getSignedMinValue(BitWidth),
- APInt::getSignedMinValue(BitWidth) + SignedMin));
- }
- return Result;
- case Instruction::Mul: {
- if (NoWrapKind == (OBO::NoSignedWrap | OBO::NoUnsignedWrap)) {
- return SubsetIntersect(
- makeGuaranteedNoWrapRegion(BinOp, Other, OBO::NoSignedWrap),
- makeGuaranteedNoWrapRegion(BinOp, Other, OBO::NoUnsignedWrap));
- }
+ case Instruction::Sub: {
+ if (Unsigned)
+ return getNonEmpty(Other.getUnsignedMax(), APInt::getMinValue(BitWidth));
- // Equivalent to calling makeGuaranteedNoWrapRegion() on [V, V+1).
- const bool Unsigned = NoWrapKind == OBO::NoUnsignedWrap;
- const auto makeSingleValueRegion = [Unsigned,
- BitWidth](APInt V) -> ConstantRange {
- // Handle special case for 0, -1 and 1. See the last for reason why we
- // specialize -1 and 1.
- if (V == 0 || V.isOneValue())
- return ConstantRange(BitWidth, true);
-
- APInt MinValue, MaxValue;
- if (Unsigned) {
- MinValue = APInt::getMinValue(BitWidth);
- MaxValue = APInt::getMaxValue(BitWidth);
- } else {
- MinValue = APInt::getSignedMinValue(BitWidth);
- MaxValue = APInt::getSignedMaxValue(BitWidth);
- }
- // e.g. Returning [-127, 127], represented as [-127, -128).
- if (!Unsigned && V.isAllOnesValue())
- return ConstantRange(-MaxValue, MinValue);
-
- APInt Lower, Upper;
- if (!Unsigned && V.isNegative()) {
- Lower = APIntOps::RoundingSDiv(MaxValue, V, APInt::Rounding::UP);
- Upper = APIntOps::RoundingSDiv(MinValue, V, APInt::Rounding::DOWN);
- } else if (Unsigned) {
- Lower = APIntOps::RoundingUDiv(MinValue, V, APInt::Rounding::UP);
- Upper = APIntOps::RoundingUDiv(MaxValue, V, APInt::Rounding::DOWN);
- } else {
- Lower = APIntOps::RoundingSDiv(MinValue, V, APInt::Rounding::UP);
- Upper = APIntOps::RoundingSDiv(MaxValue, V, APInt::Rounding::DOWN);
- }
- if (Unsigned) {
- Lower = Lower.zextOrSelf(BitWidth);
- Upper = Upper.zextOrSelf(BitWidth);
- } else {
- Lower = Lower.sextOrSelf(BitWidth);
- Upper = Upper.sextOrSelf(BitWidth);
- }
- // ConstantRange ctor take a half inclusive interval [Lower, Upper + 1).
- // Upper + 1 is guanranteed not to overflow, because |divisor| > 1. 0, -1,
- // and 1 are already handled as special cases.
- return ConstantRange(Lower, Upper + 1);
- };
+ APInt SignedMinVal = APInt::getSignedMinValue(BitWidth);
+ APInt SMin = Other.getSignedMin(), SMax = Other.getSignedMax();
+ return getNonEmpty(
+ SMax.isStrictlyPositive() ? SignedMinVal + SMax : SignedMinVal,
+ SMin.isNegative() ? SignedMinVal + SMin : SignedMinVal);
+ }
+ case Instruction::Mul:
if (Unsigned)
- return makeSingleValueRegion(Other.getUnsignedMax());
+ return makeExactMulNUWRegion(Other.getUnsignedMax());
- return SubsetIntersect(makeSingleValueRegion(Other.getSignedMin()),
- makeSingleValueRegion(Other.getSignedMax()));
- }
+ return makeExactMulNSWRegion(Other.getSignedMin())
+ .intersectWith(makeExactMulNSWRegion(Other.getSignedMax()));
}
}
+ConstantRange ConstantRange::makeExactNoWrapRegion(Instruction::BinaryOps BinOp,
+ const APInt &Other,
+ unsigned NoWrapKind) {
+ // makeGuaranteedNoWrapRegion() is exact for single-element ranges, as
+ // "for all" and "for any" coincide in this case.
+ return makeGuaranteedNoWrapRegion(BinOp, ConstantRange(Other), NoWrapKind);
+}
+
bool ConstantRange::isFullSet() const {
return Lower == Upper && Lower.isMaxValue();
}
@@ -325,20 +289,19 @@ bool ConstantRange::isEmptySet() const {
}
bool ConstantRange::isWrappedSet() const {
+ return Lower.ugt(Upper) && !Upper.isNullValue();
+}
+
+bool ConstantRange::isUpperWrapped() const {
return Lower.ugt(Upper);
}
bool ConstantRange::isSignWrappedSet() const {
- return contains(APInt::getSignedMaxValue(getBitWidth())) &&
- contains(APInt::getSignedMinValue(getBitWidth()));
+ return Lower.sgt(Upper) && !Upper.isMinSignedValue();
}
-APInt ConstantRange::getSetSize() const {
- if (isFullSet())
- return APInt::getOneBitSet(getBitWidth()+1, getBitWidth());
-
- // This is also correct for wrapped sets.
- return (Upper - Lower).zext(getBitWidth()+1);
+bool ConstantRange::isUpperSignWrapped() const {
+ return Lower.sgt(Upper);
}
bool
@@ -362,26 +325,41 @@ ConstantRange::isSizeLargerThan(uint64_t MaxSize) const {
return (Upper - Lower).ugt(MaxSize);
}
+bool ConstantRange::isAllNegative() const {
+ // Empty set is all negative, full set is not.
+ if (isEmptySet())
+ return true;
+ if (isFullSet())
+ return false;
+
+ return !isUpperSignWrapped() && !Upper.isStrictlyPositive();
+}
+
+bool ConstantRange::isAllNonNegative() const {
+ // Empty and full set are automatically treated correctly.
+ return !isSignWrappedSet() && Lower.isNonNegative();
+}
+
APInt ConstantRange::getUnsignedMax() const {
- if (isFullSet() || isWrappedSet())
+ if (isFullSet() || isUpperWrapped())
return APInt::getMaxValue(getBitWidth());
return getUpper() - 1;
}
APInt ConstantRange::getUnsignedMin() const {
- if (isFullSet() || (isWrappedSet() && !getUpper().isNullValue()))
+ if (isFullSet() || isWrappedSet())
return APInt::getMinValue(getBitWidth());
return getLower();
}
APInt ConstantRange::getSignedMax() const {
- if (isFullSet() || Lower.sgt(Upper))
+ if (isFullSet() || isUpperSignWrapped())
return APInt::getSignedMaxValue(getBitWidth());
return getUpper() - 1;
}
APInt ConstantRange::getSignedMin() const {
- if (isFullSet() || (Lower.sgt(Upper) && !getUpper().isMinSignedValue()))
+ if (isFullSet() || isSignWrappedSet())
return APInt::getSignedMinValue(getBitWidth());
return getLower();
}
@@ -390,7 +368,7 @@ bool ConstantRange::contains(const APInt &V) const {
if (Lower == Upper)
return isFullSet();
- if (!isWrappedSet())
+ if (!isUpperWrapped())
return Lower.ule(V) && V.ult(Upper);
return Lower.ule(V) || V.ult(Upper);
}
@@ -399,14 +377,14 @@ bool ConstantRange::contains(const ConstantRange &Other) const {
if (isFullSet() || Other.isEmptySet()) return true;
if (isEmptySet() || Other.isFullSet()) return false;
- if (!isWrappedSet()) {
- if (Other.isWrappedSet())
+ if (!isUpperWrapped()) {
+ if (Other.isUpperWrapped())
return false;
return Lower.ule(Other.getLower()) && Other.getUpper().ule(Upper);
}
- if (!Other.isWrappedSet())
+ if (!Other.isUpperWrapped())
return Other.getUpper().ule(Upper) ||
Lower.ule(Other.getLower());
@@ -425,7 +403,28 @@ ConstantRange ConstantRange::difference(const ConstantRange &CR) const {
return intersectWith(CR.inverse());
}
-ConstantRange ConstantRange::intersectWith(const ConstantRange &CR) const {
+static ConstantRange getPreferredRange(
+ const ConstantRange &CR1, const ConstantRange &CR2,
+ ConstantRange::PreferredRangeType Type) {
+ if (Type == ConstantRange::Unsigned) {
+ if (!CR1.isWrappedSet() && CR2.isWrappedSet())
+ return CR1;
+ if (CR1.isWrappedSet() && !CR2.isWrappedSet())
+ return CR2;
+ } else if (Type == ConstantRange::Signed) {
+ if (!CR1.isSignWrappedSet() && CR2.isSignWrappedSet())
+ return CR1;
+ if (CR1.isSignWrappedSet() && !CR2.isSignWrappedSet())
+ return CR2;
+ }
+
+ if (CR1.isSizeStrictlySmallerThan(CR2))
+ return CR1;
+ return CR2;
+}
+
+ConstantRange ConstantRange::intersectWith(const ConstantRange &CR,
+ PreferredRangeType Type) const {
assert(getBitWidth() == CR.getBitWidth() &&
"ConstantRange types don't agree!");
@@ -433,100 +432,134 @@ ConstantRange ConstantRange::intersectWith(const ConstantRange &CR) const {
if ( isEmptySet() || CR.isFullSet()) return *this;
if (CR.isEmptySet() || isFullSet()) return CR;
- if (!isWrappedSet() && CR.isWrappedSet())
- return CR.intersectWith(*this);
+ if (!isUpperWrapped() && CR.isUpperWrapped())
+ return CR.intersectWith(*this, Type);
- if (!isWrappedSet() && !CR.isWrappedSet()) {
+ if (!isUpperWrapped() && !CR.isUpperWrapped()) {
if (Lower.ult(CR.Lower)) {
+ // L---U : this
+ // L---U : CR
if (Upper.ule(CR.Lower))
- return ConstantRange(getBitWidth(), false);
+ return getEmpty();
+ // L---U : this
+ // L---U : CR
if (Upper.ult(CR.Upper))
return ConstantRange(CR.Lower, Upper);
+ // L-------U : this
+ // L---U : CR
return CR;
}
+ // L---U : this
+ // L-------U : CR
if (Upper.ult(CR.Upper))
return *this;
+ // L-----U : this
+ // L-----U : CR
if (Lower.ult(CR.Upper))
return ConstantRange(Lower, CR.Upper);
- return ConstantRange(getBitWidth(), false);
+ // L---U : this
+ // L---U : CR
+ return getEmpty();
}
- if (isWrappedSet() && !CR.isWrappedSet()) {
+ if (isUpperWrapped() && !CR.isUpperWrapped()) {
if (CR.Lower.ult(Upper)) {
+ // ------U L--- : this
+ // L--U : CR
if (CR.Upper.ult(Upper))
return CR;
+ // ------U L--- : this
+ // L------U : CR
if (CR.Upper.ule(Lower))
return ConstantRange(CR.Lower, Upper);
- if (isSizeStrictlySmallerThan(CR))
- return *this;
- return CR;
+ // ------U L--- : this
+ // L----------U : CR
+ return getPreferredRange(*this, CR, Type);
}
if (CR.Lower.ult(Lower)) {
+ // --U L---- : this
+ // L--U : CR
if (CR.Upper.ule(Lower))
- return ConstantRange(getBitWidth(), false);
+ return getEmpty();
+ // --U L---- : this
+ // L------U : CR
return ConstantRange(Lower, CR.Upper);
}
+
+ // --U L------ : this
+ // L--U : CR
return CR;
}
if (CR.Upper.ult(Upper)) {
- if (CR.Lower.ult(Upper)) {
- if (isSizeStrictlySmallerThan(CR))
- return *this;
- return CR;
- }
+ // ------U L-- : this
+ // --U L------ : CR
+ if (CR.Lower.ult(Upper))
+ return getPreferredRange(*this, CR, Type);
+ // ----U L-- : this
+ // --U L---- : CR
if (CR.Lower.ult(Lower))
return ConstantRange(Lower, CR.Upper);
+ // ----U L---- : this
+ // --U L-- : CR
return CR;
}
if (CR.Upper.ule(Lower)) {
+ // --U L-- : this
+ // ----U L---- : CR
if (CR.Lower.ult(Lower))
return *this;
+ // --U L---- : this
+ // ----U L-- : CR
return ConstantRange(CR.Lower, Upper);
}
- if (isSizeStrictlySmallerThan(CR))
- return *this;
- return CR;
+
+ // --U L------ : this
+ // ------U L-- : CR
+ return getPreferredRange(*this, CR, Type);
}
-ConstantRange ConstantRange::unionWith(const ConstantRange &CR) const {
+ConstantRange ConstantRange::unionWith(const ConstantRange &CR,
+ PreferredRangeType Type) const {
assert(getBitWidth() == CR.getBitWidth() &&
"ConstantRange types don't agree!");
if ( isFullSet() || CR.isEmptySet()) return *this;
if (CR.isFullSet() || isEmptySet()) return CR;
- if (!isWrappedSet() && CR.isWrappedSet()) return CR.unionWith(*this);
+ if (!isUpperWrapped() && CR.isUpperWrapped())
+ return CR.unionWith(*this, Type);
- if (!isWrappedSet() && !CR.isWrappedSet()) {
- if (CR.Upper.ult(Lower) || Upper.ult(CR.Lower)) {
- // If the two ranges are disjoint, find the smaller gap and bridge it.
- APInt d1 = CR.Lower - Upper, d2 = Lower - CR.Upper;
- if (d1.ult(d2))
- return ConstantRange(Lower, CR.Upper);
- return ConstantRange(CR.Lower, Upper);
- }
+ if (!isUpperWrapped() && !CR.isUpperWrapped()) {
+ // L---U and L---U : this
+ // L---U L---U : CR
+ // result in one of
+ // L---------U
+ // -----U L-----
+ if (CR.Upper.ult(Lower) || Upper.ult(CR.Lower))
+ return getPreferredRange(
+ ConstantRange(Lower, CR.Upper), ConstantRange(CR.Lower, Upper), Type);
APInt L = CR.Lower.ult(Lower) ? CR.Lower : Lower;
APInt U = (CR.Upper - 1).ugt(Upper - 1) ? CR.Upper : Upper;
if (L.isNullValue() && U.isNullValue())
- return ConstantRange(getBitWidth());
+ return getFull();
return ConstantRange(std::move(L), std::move(U));
}
- if (!CR.isWrappedSet()) {
+ if (!CR.isUpperWrapped()) {
// ------U L----- and ------U L----- : this
// L--U L--U : CR
if (CR.Upper.ule(Upper) || CR.Lower.uge(Lower))
@@ -535,26 +568,25 @@ ConstantRange ConstantRange::unionWith(const ConstantRange &CR) const {
// ------U L----- : this
// L---------U : CR
if (CR.Lower.ule(Upper) && Lower.ule(CR.Upper))
- return ConstantRange(getBitWidth());
+ return getFull();
// ----U L---- : this
// L---U : CR
- // <d1> <d2>
- if (Upper.ule(CR.Lower) && CR.Upper.ule(Lower)) {
- APInt d1 = CR.Lower - Upper, d2 = Lower - CR.Upper;
- if (d1.ult(d2))
- return ConstantRange(Lower, CR.Upper);
- return ConstantRange(CR.Lower, Upper);
- }
+ // results in one of
+ // ----------U L----
+ // ----U L----------
+ if (Upper.ult(CR.Lower) && CR.Upper.ult(Lower))
+ return getPreferredRange(
+ ConstantRange(Lower, CR.Upper), ConstantRange(CR.Lower, Upper), Type);
// ----U L----- : this
// L----U : CR
- if (Upper.ult(CR.Lower) && Lower.ult(CR.Upper))
+ if (Upper.ult(CR.Lower) && Lower.ule(CR.Upper))
return ConstantRange(CR.Lower, Upper);
// ------U L---- : this
// L-----U : CR
- assert(CR.Lower.ult(Upper) && CR.Upper.ult(Lower) &&
+ assert(CR.Lower.ule(Upper) && CR.Upper.ult(Lower) &&
"ConstantRange::unionWith missed a case with one range wrapped");
return ConstantRange(Lower, CR.Upper);
}
@@ -562,7 +594,7 @@ ConstantRange ConstantRange::unionWith(const ConstantRange &CR) const {
// ------U L---- and ------U L---- : this
// -U L----------- and ------------U L : CR
if (CR.Lower.ule(Upper) || Lower.ule(CR.Upper))
- return ConstantRange(getBitWidth());
+ return getFull();
APInt L = CR.Lower.ult(Lower) ? CR.Lower : Lower;
APInt U = CR.Upper.ugt(Upper) ? CR.Upper : Upper;
@@ -588,7 +620,7 @@ ConstantRange ConstantRange::castOp(Instruction::CastOps CastOp,
if (getBitWidth() == ResultBitWidth)
return *this;
else
- return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+ return getFull();
case Instruction::UIToFP: {
// TODO: use input range if available
auto BW = getBitWidth();
@@ -608,17 +640,17 @@ ConstantRange ConstantRange::castOp(Instruction::CastOps CastOp,
case Instruction::IntToPtr:
case Instruction::PtrToInt:
case Instruction::AddrSpaceCast:
- // Conservatively return full set.
- return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+ // Conservatively return getFull set.
+ return getFull();
};
}
ConstantRange ConstantRange::zeroExtend(uint32_t DstTySize) const {
- if (isEmptySet()) return ConstantRange(DstTySize, /*isFullSet=*/false);
+ if (isEmptySet()) return getEmpty(DstTySize);
unsigned SrcTySize = getBitWidth();
assert(SrcTySize < DstTySize && "Not a value extension");
- if (isFullSet() || isWrappedSet()) {
+ if (isFullSet() || isUpperWrapped()) {
// Change into [0, 1 << src bit width)
APInt LowerExt(DstTySize, 0);
if (!Upper) // special case: [X, 0) -- not really wrapping around
@@ -631,7 +663,7 @@ ConstantRange ConstantRange::zeroExtend(uint32_t DstTySize) const {
}
ConstantRange ConstantRange::signExtend(uint32_t DstTySize) const {
- if (isEmptySet()) return ConstantRange(DstTySize, /*isFullSet=*/false);
+ if (isEmptySet()) return getEmpty(DstTySize);
unsigned SrcTySize = getBitWidth();
assert(SrcTySize < DstTySize && "Not a value extension");
@@ -651,9 +683,9 @@ ConstantRange ConstantRange::signExtend(uint32_t DstTySize) const {
ConstantRange ConstantRange::truncate(uint32_t DstTySize) const {
assert(getBitWidth() > DstTySize && "Not a value truncation");
if (isEmptySet())
- return ConstantRange(DstTySize, /*isFullSet=*/false);
+ return getEmpty(DstTySize);
if (isFullSet())
- return ConstantRange(DstTySize, /*isFullSet=*/true);
+ return getFull(DstTySize);
APInt LowerDiv(Lower), UpperDiv(Upper);
ConstantRange Union(DstTySize, /*isFullSet=*/false);
@@ -661,12 +693,12 @@ ConstantRange ConstantRange::truncate(uint32_t DstTySize) const {
// Analyze wrapped sets in their two parts: [0, Upper) \/ [Lower, MaxValue]
// We use the non-wrapped set code to analyze the [Lower, MaxValue) part, and
// then we do the union with [MaxValue, Upper)
- if (isWrappedSet()) {
+ if (isUpperWrapped()) {
// If Upper is greater than or equal to MaxValue(DstTy), it covers the whole
// truncated range.
if (Upper.getActiveBits() > DstTySize ||
Upper.countTrailingOnes() == DstTySize)
- return ConstantRange(DstTySize, /*isFullSet=*/true);
+ return getFull(DstTySize);
Union = ConstantRange(APInt::getMaxValue(DstTySize),Upper.trunc(DstTySize));
UpperDiv.setAllBits();
@@ -699,7 +731,7 @@ ConstantRange ConstantRange::truncate(uint32_t DstTySize) const {
UpperDiv.trunc(DstTySize)).unionWith(Union);
}
- return ConstantRange(DstTySize, /*isFullSet=*/true);
+ return getFull(DstTySize);
}
ConstantRange ConstantRange::zextOrTrunc(uint32_t DstTySize) const {
@@ -733,6 +765,12 @@ ConstantRange ConstantRange::binaryOp(Instruction::BinaryOps BinOp,
return multiply(Other);
case Instruction::UDiv:
return udiv(Other);
+ case Instruction::SDiv:
+ return sdiv(Other);
+ case Instruction::URem:
+ return urem(Other);
+ case Instruction::SRem:
+ return srem(Other);
case Instruction::Shl:
return shl(Other);
case Instruction::LShr:
@@ -752,39 +790,36 @@ ConstantRange ConstantRange::binaryOp(Instruction::BinaryOps BinOp,
case Instruction::FMul:
return multiply(Other);
default:
- // Conservatively return full set.
- return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+ // Conservatively return getFull set.
+ return getFull();
}
}
ConstantRange
ConstantRange::add(const ConstantRange &Other) const {
if (isEmptySet() || Other.isEmptySet())
- return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+ return getEmpty();
if (isFullSet() || Other.isFullSet())
- return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+ return getFull();
APInt NewLower = getLower() + Other.getLower();
APInt NewUpper = getUpper() + Other.getUpper() - 1;
if (NewLower == NewUpper)
- return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+ return getFull();
ConstantRange X = ConstantRange(std::move(NewLower), std::move(NewUpper));
if (X.isSizeStrictlySmallerThan(*this) ||
X.isSizeStrictlySmallerThan(Other))
// We've wrapped, therefore, full set.
- return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+ return getFull();
return X;
}
ConstantRange ConstantRange::addWithNoSignedWrap(const APInt &Other) const {
// Calculate the subset of this range such that "X + Other" is
// guaranteed not to wrap (overflow) for all X in this subset.
- // makeGuaranteedNoWrapRegion will produce an exact NSW range since we are
- // passing a single element range.
- auto NSWRange = ConstantRange::makeGuaranteedNoWrapRegion(BinaryOperator::Add,
- ConstantRange(Other),
- OverflowingBinaryOperator::NoSignedWrap);
+ auto NSWRange = ConstantRange::makeExactNoWrapRegion(
+ BinaryOperator::Add, Other, OverflowingBinaryOperator::NoSignedWrap);
auto NSWConstrainedRange = intersectWith(NSWRange);
return NSWConstrainedRange.add(ConstantRange(Other));
@@ -793,20 +828,20 @@ ConstantRange ConstantRange::addWithNoSignedWrap(const APInt &Other) const {
ConstantRange
ConstantRange::sub(const ConstantRange &Other) const {
if (isEmptySet() || Other.isEmptySet())
- return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+ return getEmpty();
if (isFullSet() || Other.isFullSet())
- return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+ return getFull();
APInt NewLower = getLower() - Other.getUpper() + 1;
APInt NewUpper = getUpper() - Other.getLower();
if (NewLower == NewUpper)
- return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+ return getFull();
ConstantRange X = ConstantRange(std::move(NewLower), std::move(NewUpper));
if (X.isSizeStrictlySmallerThan(*this) ||
X.isSizeStrictlySmallerThan(Other))
// We've wrapped, therefore, full set.
- return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+ return getFull();
return X;
}
@@ -818,7 +853,7 @@ ConstantRange::multiply(const ConstantRange &Other) const {
// range according to the greatest power-of-two factor of the single element.
if (isEmptySet() || Other.isEmptySet())
- return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+ return getEmpty();
// Multiplication is signedness-independent. However different ranges can be
// obtained depending on how the input ranges are treated. These different
@@ -840,7 +875,7 @@ ConstantRange::multiply(const ConstantRange &Other) const {
// from one positive number to another which is as good as we can generate.
// In this case, skip the extra work of generating signed ranges which aren't
// going to be better than this range.
- if (!UR.isWrappedSet() &&
+ if (!UR.isUpperWrapped() &&
(UR.getUpper().isNonNegative() || UR.getUpper().isMinSignedValue()))
return UR;
@@ -869,12 +904,10 @@ ConstantRange::smax(const ConstantRange &Other) const {
// X smax Y is: range(smax(X_smin, Y_smin),
// smax(X_smax, Y_smax))
if (isEmptySet() || Other.isEmptySet())
- return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+ return getEmpty();
APInt NewL = APIntOps::smax(getSignedMin(), Other.getSignedMin());
APInt NewU = APIntOps::smax(getSignedMax(), Other.getSignedMax()) + 1;
- if (NewU == NewL)
- return ConstantRange(getBitWidth(), /*isFullSet=*/true);
- return ConstantRange(std::move(NewL), std::move(NewU));
+ return getNonEmpty(std::move(NewL), std::move(NewU));
}
ConstantRange
@@ -882,12 +915,10 @@ ConstantRange::umax(const ConstantRange &Other) const {
// X umax Y is: range(umax(X_umin, Y_umin),
// umax(X_umax, Y_umax))
if (isEmptySet() || Other.isEmptySet())
- return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+ return getEmpty();
APInt NewL = APIntOps::umax(getUnsignedMin(), Other.getUnsignedMin());
APInt NewU = APIntOps::umax(getUnsignedMax(), Other.getUnsignedMax()) + 1;
- if (NewU == NewL)
- return ConstantRange(getBitWidth(), /*isFullSet=*/true);
- return ConstantRange(std::move(NewL), std::move(NewU));
+ return getNonEmpty(std::move(NewL), std::move(NewU));
}
ConstantRange
@@ -895,12 +926,10 @@ ConstantRange::smin(const ConstantRange &Other) const {
// X smin Y is: range(smin(X_smin, Y_smin),
// smin(X_smax, Y_smax))
if (isEmptySet() || Other.isEmptySet())
- return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+ return getEmpty();
APInt NewL = APIntOps::smin(getSignedMin(), Other.getSignedMin());
APInt NewU = APIntOps::smin(getSignedMax(), Other.getSignedMax()) + 1;
- if (NewU == NewL)
- return ConstantRange(getBitWidth(), /*isFullSet=*/true);
- return ConstantRange(std::move(NewL), std::move(NewU));
+ return getNonEmpty(std::move(NewL), std::move(NewU));
}
ConstantRange
@@ -908,20 +937,16 @@ ConstantRange::umin(const ConstantRange &Other) const {
// X umin Y is: range(umin(X_umin, Y_umin),
// umin(X_umax, Y_umax))
if (isEmptySet() || Other.isEmptySet())
- return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+ return getEmpty();
APInt NewL = APIntOps::umin(getUnsignedMin(), Other.getUnsignedMin());
APInt NewU = APIntOps::umin(getUnsignedMax(), Other.getUnsignedMax()) + 1;
- if (NewU == NewL)
- return ConstantRange(getBitWidth(), /*isFullSet=*/true);
- return ConstantRange(std::move(NewL), std::move(NewU));
+ return getNonEmpty(std::move(NewL), std::move(NewU));
}
ConstantRange
ConstantRange::udiv(const ConstantRange &RHS) const {
if (isEmptySet() || RHS.isEmptySet() || RHS.getUnsignedMax().isNullValue())
- return ConstantRange(getBitWidth(), /*isFullSet=*/false);
- if (RHS.isFullSet())
- return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+ return getEmpty();
APInt Lower = getUnsignedMin().udiv(RHS.getUnsignedMax());
@@ -936,52 +961,186 @@ ConstantRange::udiv(const ConstantRange &RHS) const {
}
APInt Upper = getUnsignedMax().udiv(RHS_umin) + 1;
+ return getNonEmpty(std::move(Lower), std::move(Upper));
+}
- // If the LHS is Full and the RHS is a wrapped interval containing 1 then
- // this could occur.
- if (Lower == Upper)
- return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+ConstantRange ConstantRange::sdiv(const ConstantRange &RHS) const {
+ // We split up the LHS and RHS into positive and negative components
+ // and then also compute the positive and negative components of the result
+ // separately by combining division results with the appropriate signs.
+ APInt Zero = APInt::getNullValue(getBitWidth());
+ APInt SignedMin = APInt::getSignedMinValue(getBitWidth());
+ ConstantRange PosFilter(APInt(getBitWidth(), 1), SignedMin);
+ ConstantRange NegFilter(SignedMin, Zero);
+ ConstantRange PosL = intersectWith(PosFilter);
+ ConstantRange NegL = intersectWith(NegFilter);
+ ConstantRange PosR = RHS.intersectWith(PosFilter);
+ ConstantRange NegR = RHS.intersectWith(NegFilter);
+
+ ConstantRange PosRes = getEmpty();
+ if (!PosL.isEmptySet() && !PosR.isEmptySet())
+ // pos / pos = pos.
+ PosRes = ConstantRange(PosL.Lower.sdiv(PosR.Upper - 1),
+ (PosL.Upper - 1).sdiv(PosR.Lower) + 1);
+
+ if (!NegL.isEmptySet() && !NegR.isEmptySet()) {
+ // neg / neg = pos.
+ //
+ // We need to deal with one tricky case here: SignedMin / -1 is UB on the
+ // IR level, so we'll want to exclude this case when calculating bounds.
+ // (For APInts the operation is well-defined and yields SignedMin.) We
+ // handle this by dropping either SignedMin from the LHS or -1 from the RHS.
+ APInt Lo = (NegL.Upper - 1).sdiv(NegR.Lower);
+ if (NegL.Lower.isMinSignedValue() && NegR.Upper.isNullValue()) {
+ // Remove -1 from the LHS. Skip if it's the only element, as this would
+ // leave us with an empty set.
+ if (!NegR.Lower.isAllOnesValue()) {
+ APInt AdjNegRUpper;
+ if (RHS.Lower.isAllOnesValue())
+ // Negative part of [-1, X] without -1 is [SignedMin, X].
+ AdjNegRUpper = RHS.Upper;
+ else
+ // [X, -1] without -1 is [X, -2].
+ AdjNegRUpper = NegR.Upper - 1;
+
+ PosRes = PosRes.unionWith(
+ ConstantRange(Lo, NegL.Lower.sdiv(AdjNegRUpper - 1) + 1));
+ }
+ // Remove SignedMin from the RHS. Skip if it's the only element, as this
+ // would leave us with an empty set.
+ if (NegL.Upper != SignedMin + 1) {
+ APInt AdjNegLLower;
+ if (Upper == SignedMin + 1)
+ // Negative part of [X, SignedMin] without SignedMin is [X, -1].
+ AdjNegLLower = Lower;
+ else
+ // [SignedMin, X] without SignedMin is [SignedMin + 1, X].
+ AdjNegLLower = NegL.Lower + 1;
+
+ PosRes = PosRes.unionWith(
+ ConstantRange(std::move(Lo),
+ AdjNegLLower.sdiv(NegR.Upper - 1) + 1));
+ }
+ } else {
+ PosRes = PosRes.unionWith(
+ ConstantRange(std::move(Lo), NegL.Lower.sdiv(NegR.Upper - 1) + 1));
+ }
+ }
+
+ ConstantRange NegRes = getEmpty();
+ if (!PosL.isEmptySet() && !NegR.isEmptySet())
+ // pos / neg = neg.
+ NegRes = ConstantRange((PosL.Upper - 1).sdiv(NegR.Upper - 1),
+ PosL.Lower.sdiv(NegR.Lower) + 1);
+
+ if (!NegL.isEmptySet() && !PosR.isEmptySet())
+ // neg / pos = neg.
+ NegRes = NegRes.unionWith(
+ ConstantRange(NegL.Lower.sdiv(PosR.Lower),
+ (NegL.Upper - 1).sdiv(PosR.Upper - 1) + 1));
+
+ // Prefer a non-wrapping signed range here.
+ ConstantRange Res = NegRes.unionWith(PosRes, PreferredRangeType::Signed);
+
+ // Preserve the zero that we dropped when splitting the LHS by sign.
+ if (contains(Zero) && (!PosR.isEmptySet() || !NegR.isEmptySet()))
+ Res = Res.unionWith(ConstantRange(Zero));
+ return Res;
+}
+
+ConstantRange ConstantRange::urem(const ConstantRange &RHS) const {
+ if (isEmptySet() || RHS.isEmptySet() || RHS.getUnsignedMax().isNullValue())
+ return getEmpty();
+
+ // L % R for L < R is L.
+ if (getUnsignedMax().ult(RHS.getUnsignedMin()))
+ return *this;
+
+ // L % R is <= L and < R.
+ APInt Upper = APIntOps::umin(getUnsignedMax(), RHS.getUnsignedMax() - 1) + 1;
+ return getNonEmpty(APInt::getNullValue(getBitWidth()), std::move(Upper));
+}
+
+ConstantRange ConstantRange::srem(const ConstantRange &RHS) const {
+ if (isEmptySet() || RHS.isEmptySet())
+ return getEmpty();
+
+ ConstantRange AbsRHS = RHS.abs();
+ APInt MinAbsRHS = AbsRHS.getUnsignedMin();
+ APInt MaxAbsRHS = AbsRHS.getUnsignedMax();
+
+ // Modulus by zero is UB.
+ if (MaxAbsRHS.isNullValue())
+ return getEmpty();
+
+ if (MinAbsRHS.isNullValue())
+ ++MinAbsRHS;
+
+ APInt MinLHS = getSignedMin(), MaxLHS = getSignedMax();
+
+ if (MinLHS.isNonNegative()) {
+ // L % R for L < R is L.
+ if (MaxLHS.ult(MinAbsRHS))
+ return *this;
+
+ // L % R is <= L and < R.
+ APInt Upper = APIntOps::umin(MaxLHS, MaxAbsRHS - 1) + 1;
+ return ConstantRange(APInt::getNullValue(getBitWidth()), std::move(Upper));
+ }
+
+ // Same basic logic as above, but the result is negative.
+ if (MaxLHS.isNegative()) {
+ if (MinLHS.ugt(-MinAbsRHS))
+ return *this;
+
+ APInt Lower = APIntOps::umax(MinLHS, -MaxAbsRHS + 1);
+ return ConstantRange(std::move(Lower), APInt(getBitWidth(), 1));
+ }
+
+ // LHS range crosses zero.
+ APInt Lower = APIntOps::umax(MinLHS, -MaxAbsRHS + 1);
+ APInt Upper = APIntOps::umin(MaxLHS, MaxAbsRHS - 1) + 1;
return ConstantRange(std::move(Lower), std::move(Upper));
}
ConstantRange
ConstantRange::binaryAnd(const ConstantRange &Other) const {
if (isEmptySet() || Other.isEmptySet())
- return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+ return getEmpty();
// TODO: replace this with something less conservative
APInt umin = APIntOps::umin(Other.getUnsignedMax(), getUnsignedMax());
- if (umin.isAllOnesValue())
- return ConstantRange(getBitWidth(), /*isFullSet=*/true);
- return ConstantRange(APInt::getNullValue(getBitWidth()), std::move(umin) + 1);
+ return getNonEmpty(APInt::getNullValue(getBitWidth()), std::move(umin) + 1);
}
ConstantRange
ConstantRange::binaryOr(const ConstantRange &Other) const {
if (isEmptySet() || Other.isEmptySet())
- return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+ return getEmpty();
// TODO: replace this with something less conservative
APInt umax = APIntOps::umax(getUnsignedMin(), Other.getUnsignedMin());
- if (umax.isNullValue())
- return ConstantRange(getBitWidth(), /*isFullSet=*/true);
- return ConstantRange(std::move(umax), APInt::getNullValue(getBitWidth()));
+ return getNonEmpty(std::move(umax), APInt::getNullValue(getBitWidth()));
}
ConstantRange
ConstantRange::shl(const ConstantRange &Other) const {
if (isEmptySet() || Other.isEmptySet())
- return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+ return getEmpty();
APInt max = getUnsignedMax();
APInt Other_umax = Other.getUnsignedMax();
+ // If we are shifting by maximum amount of
+ // zero return return the original range.
+ if (Other_umax.isNullValue())
+ return *this;
// there's overflow!
- if (Other_umax.uge(max.countLeadingZeros()))
- return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+ if (Other_umax.ugt(max.countLeadingZeros()))
+ return getFull();
// FIXME: implement the other tricky cases
@@ -995,20 +1154,17 @@ ConstantRange::shl(const ConstantRange &Other) const {
ConstantRange
ConstantRange::lshr(const ConstantRange &Other) const {
if (isEmptySet() || Other.isEmptySet())
- return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+ return getEmpty();
APInt max = getUnsignedMax().lshr(Other.getUnsignedMin()) + 1;
APInt min = getUnsignedMin().lshr(Other.getUnsignedMax());
- if (min == max)
- return ConstantRange(getBitWidth(), /*isFullSet=*/true);
-
- return ConstantRange(std::move(min), std::move(max));
+ return getNonEmpty(std::move(min), std::move(max));
}
ConstantRange
ConstantRange::ashr(const ConstantRange &Other) const {
if (isEmptySet() || Other.isEmptySet())
- return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+ return getEmpty();
// May straddle zero, so handle both positive and negative cases.
// 'PosMax' is the upper bound of the result of the ashr
@@ -1053,20 +1209,196 @@ ConstantRange::ashr(const ConstantRange &Other) const {
min = NegMin;
max = PosMax;
}
- if (min == max)
- return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+ return getNonEmpty(std::move(min), std::move(max));
+}
+
+ConstantRange ConstantRange::uadd_sat(const ConstantRange &Other) const {
+ if (isEmptySet() || Other.isEmptySet())
+ return getEmpty();
+
+ APInt NewL = getUnsignedMin().uadd_sat(Other.getUnsignedMin());
+ APInt NewU = getUnsignedMax().uadd_sat(Other.getUnsignedMax()) + 1;
+ return getNonEmpty(std::move(NewL), std::move(NewU));
+}
+
+ConstantRange ConstantRange::sadd_sat(const ConstantRange &Other) const {
+ if (isEmptySet() || Other.isEmptySet())
+ return getEmpty();
+
+ APInt NewL = getSignedMin().sadd_sat(Other.getSignedMin());
+ APInt NewU = getSignedMax().sadd_sat(Other.getSignedMax()) + 1;
+ return getNonEmpty(std::move(NewL), std::move(NewU));
+}
+
+ConstantRange ConstantRange::usub_sat(const ConstantRange &Other) const {
+ if (isEmptySet() || Other.isEmptySet())
+ return getEmpty();
+
+ APInt NewL = getUnsignedMin().usub_sat(Other.getUnsignedMax());
+ APInt NewU = getUnsignedMax().usub_sat(Other.getUnsignedMin()) + 1;
+ return getNonEmpty(std::move(NewL), std::move(NewU));
+}
- return ConstantRange(std::move(min), std::move(max));
+ConstantRange ConstantRange::ssub_sat(const ConstantRange &Other) const {
+ if (isEmptySet() || Other.isEmptySet())
+ return getEmpty();
+
+ APInt NewL = getSignedMin().ssub_sat(Other.getSignedMax());
+ APInt NewU = getSignedMax().ssub_sat(Other.getSignedMin()) + 1;
+ return getNonEmpty(std::move(NewL), std::move(NewU));
}
ConstantRange ConstantRange::inverse() const {
if (isFullSet())
- return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+ return getEmpty();
if (isEmptySet())
- return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+ return getFull();
return ConstantRange(Upper, Lower);
}
+ConstantRange ConstantRange::abs() const {
+ if (isEmptySet())
+ return getEmpty();
+
+ if (isSignWrappedSet()) {
+ APInt Lo;
+ // Check whether the range crosses zero.
+ if (Upper.isStrictlyPositive() || !Lower.isStrictlyPositive())
+ Lo = APInt::getNullValue(getBitWidth());
+ else
+ Lo = APIntOps::umin(Lower, -Upper + 1);
+
+ // SignedMin is included in the result range.
+ return ConstantRange(Lo, APInt::getSignedMinValue(getBitWidth()) + 1);
+ }
+
+ APInt SMin = getSignedMin(), SMax = getSignedMax();
+
+ // All non-negative.
+ if (SMin.isNonNegative())
+ return *this;
+
+ // All negative.
+ if (SMax.isNegative())
+ return ConstantRange(-SMax, -SMin + 1);
+
+ // Range crosses zero.
+ return ConstantRange(APInt::getNullValue(getBitWidth()),
+ APIntOps::umax(-SMin, SMax) + 1);
+}
+
+ConstantRange::OverflowResult ConstantRange::unsignedAddMayOverflow(
+ const ConstantRange &Other) const {
+ if (isEmptySet() || Other.isEmptySet())
+ return OverflowResult::MayOverflow;
+
+ APInt Min = getUnsignedMin(), Max = getUnsignedMax();
+ APInt OtherMin = Other.getUnsignedMin(), OtherMax = Other.getUnsignedMax();
+
+ // a u+ b overflows high iff a u> ~b.
+ if (Min.ugt(~OtherMin))
+ return OverflowResult::AlwaysOverflowsHigh;
+ if (Max.ugt(~OtherMax))
+ return OverflowResult::MayOverflow;
+ return OverflowResult::NeverOverflows;
+}
+
+ConstantRange::OverflowResult ConstantRange::signedAddMayOverflow(
+ const ConstantRange &Other) const {
+ if (isEmptySet() || Other.isEmptySet())
+ return OverflowResult::MayOverflow;
+
+ APInt Min = getSignedMin(), Max = getSignedMax();
+ APInt OtherMin = Other.getSignedMin(), OtherMax = Other.getSignedMax();
+
+ APInt SignedMin = APInt::getSignedMinValue(getBitWidth());
+ APInt SignedMax = APInt::getSignedMaxValue(getBitWidth());
+
+ // a s+ b overflows high iff a s>=0 && b s>= 0 && a s> smax - b.
+ // a s+ b overflows low iff a s< 0 && b s< 0 && a s< smin - b.
+ if (Min.isNonNegative() && OtherMin.isNonNegative() &&
+ Min.sgt(SignedMax - OtherMin))
+ return OverflowResult::AlwaysOverflowsHigh;
+ if (Max.isNegative() && OtherMax.isNegative() &&
+ Max.slt(SignedMin - OtherMax))
+ return OverflowResult::AlwaysOverflowsLow;
+
+ if (Max.isNonNegative() && OtherMax.isNonNegative() &&
+ Max.sgt(SignedMax - OtherMax))
+ return OverflowResult::MayOverflow;
+ if (Min.isNegative() && OtherMin.isNegative() &&
+ Min.slt(SignedMin - OtherMin))
+ return OverflowResult::MayOverflow;
+
+ return OverflowResult::NeverOverflows;
+}
+
+ConstantRange::OverflowResult ConstantRange::unsignedSubMayOverflow(
+ const ConstantRange &Other) const {
+ if (isEmptySet() || Other.isEmptySet())
+ return OverflowResult::MayOverflow;
+
+ APInt Min = getUnsignedMin(), Max = getUnsignedMax();
+ APInt OtherMin = Other.getUnsignedMin(), OtherMax = Other.getUnsignedMax();
+
+ // a u- b overflows low iff a u< b.
+ if (Max.ult(OtherMin))
+ return OverflowResult::AlwaysOverflowsLow;
+ if (Min.ult(OtherMax))
+ return OverflowResult::MayOverflow;
+ return OverflowResult::NeverOverflows;
+}
+
+ConstantRange::OverflowResult ConstantRange::signedSubMayOverflow(
+ const ConstantRange &Other) const {
+ if (isEmptySet() || Other.isEmptySet())
+ return OverflowResult::MayOverflow;
+
+ APInt Min = getSignedMin(), Max = getSignedMax();
+ APInt OtherMin = Other.getSignedMin(), OtherMax = Other.getSignedMax();
+
+ APInt SignedMin = APInt::getSignedMinValue(getBitWidth());
+ APInt SignedMax = APInt::getSignedMaxValue(getBitWidth());
+
+ // a s- b overflows high iff a s>=0 && b s< 0 && a s> smax + b.
+ // a s- b overflows low iff a s< 0 && b s>= 0 && a s< smin + b.
+ if (Min.isNonNegative() && OtherMax.isNegative() &&
+ Min.sgt(SignedMax + OtherMax))
+ return OverflowResult::AlwaysOverflowsHigh;
+ if (Max.isNegative() && OtherMin.isNonNegative() &&
+ Max.slt(SignedMin + OtherMin))
+ return OverflowResult::AlwaysOverflowsLow;
+
+ if (Max.isNonNegative() && OtherMin.isNegative() &&
+ Max.sgt(SignedMax + OtherMin))
+ return OverflowResult::MayOverflow;
+ if (Min.isNegative() && OtherMax.isNonNegative() &&
+ Min.slt(SignedMin + OtherMax))
+ return OverflowResult::MayOverflow;
+
+ return OverflowResult::NeverOverflows;
+}
+
+ConstantRange::OverflowResult ConstantRange::unsignedMulMayOverflow(
+ const ConstantRange &Other) const {
+ if (isEmptySet() || Other.isEmptySet())
+ return OverflowResult::MayOverflow;
+
+ APInt Min = getUnsignedMin(), Max = getUnsignedMax();
+ APInt OtherMin = Other.getUnsignedMin(), OtherMax = Other.getUnsignedMax();
+ bool Overflow;
+
+ (void) Min.umul_ov(OtherMin, Overflow);
+ if (Overflow)
+ return OverflowResult::AlwaysOverflowsHigh;
+
+ (void) Max.umul_ov(OtherMax, Overflow);
+ if (Overflow)
+ return OverflowResult::MayOverflow;
+
+ return OverflowResult::NeverOverflows;
+}
+
void ConstantRange::print(raw_ostream &OS) const {
if (isFullSet())
OS << "full-set";
diff --git a/lib/IR/Constants.cpp b/lib/IR/Constants.cpp
index d36967fdcfe1..ff551da29ae6 100644
--- a/lib/IR/Constants.cpp
+++ b/lib/IR/Constants.cpp
@@ -1,9 +1,8 @@
//===-- Constants.cpp - Implement Constant nodes --------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -261,6 +260,16 @@ bool Constant::containsUndefElement() const {
return false;
}
+bool Constant::containsConstantExpression() const {
+ if (!getType()->isVectorTy())
+ return false;
+ for (unsigned i = 0, e = getType()->getVectorNumElements(); i != e; ++i)
+ if (isa<ConstantExpr>(getAggregateElement(i)))
+ return true;
+
+ return false;
+}
+
/// Constructor to create a '0' constant of arbitrary type.
Constant *Constant::getNullValue(Type *Ty) {
switch (Ty->getTypeID()) {
@@ -1821,7 +1830,8 @@ Constant *ConstantExpr::get(unsigned Opcode, Constant *C, unsigned Flags,
}
#endif
- // TODO: Try to constant fold operation.
+ if (Constant *FC = ConstantFoldUnaryInstruction(Opcode, C))
+ return FC;
if (OnlyIfReducedTy == C->getType())
return nullptr;
@@ -1846,51 +1856,31 @@ Constant *ConstantExpr::get(unsigned Opcode, Constant *C1, Constant *C2,
case Instruction::Add:
case Instruction::Sub:
case Instruction::Mul:
- assert(C1->getType() == C2->getType() && "Op types should be identical!");
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
assert(C1->getType()->isIntOrIntVectorTy() &&
"Tried to create an integer operation on a non-integer type!");
break;
case Instruction::FAdd:
case Instruction::FSub:
case Instruction::FMul:
- assert(C1->getType() == C2->getType() && "Op types should be identical!");
- assert(C1->getType()->isFPOrFPVectorTy() &&
- "Tried to create a floating-point operation on a "
- "non-floating-point type!");
- break;
- case Instruction::UDiv:
- case Instruction::SDiv:
- assert(C1->getType() == C2->getType() && "Op types should be identical!");
- assert(C1->getType()->isIntOrIntVectorTy() &&
- "Tried to create an arithmetic operation on a non-arithmetic type!");
- break;
case Instruction::FDiv:
- assert(C1->getType() == C2->getType() && "Op types should be identical!");
- assert(C1->getType()->isFPOrFPVectorTy() &&
- "Tried to create an arithmetic operation on a non-arithmetic type!");
- break;
- case Instruction::URem:
- case Instruction::SRem:
- assert(C1->getType() == C2->getType() && "Op types should be identical!");
- assert(C1->getType()->isIntOrIntVectorTy() &&
- "Tried to create an arithmetic operation on a non-arithmetic type!");
- break;
case Instruction::FRem:
- assert(C1->getType() == C2->getType() && "Op types should be identical!");
assert(C1->getType()->isFPOrFPVectorTy() &&
- "Tried to create an arithmetic operation on a non-arithmetic type!");
+ "Tried to create a floating-point operation on a "
+ "non-floating-point type!");
break;
case Instruction::And:
case Instruction::Or:
case Instruction::Xor:
- assert(C1->getType() == C2->getType() && "Op types should be identical!");
assert(C1->getType()->isIntOrIntVectorTy() &&
"Tried to create a logical operation on a non-integral type!");
break;
case Instruction::Shl:
case Instruction::LShr:
case Instruction::AShr:
- assert(C1->getType() == C2->getType() && "Op types should be identical!");
assert(C1->getType()->isIntOrIntVectorTy() &&
"Tried to create a shift operation on a non-integer type!");
break;
@@ -1900,7 +1890,7 @@ Constant *ConstantExpr::get(unsigned Opcode, Constant *C1, Constant *C2,
#endif
if (Constant *FC = ConstantFoldBinaryInstruction(Opcode, C1, C2))
- return FC; // Fold a few common cases.
+ return FC;
if (OnlyIfReducedTy == C1->getType())
return nullptr;
@@ -2226,7 +2216,7 @@ Constant *ConstantExpr::getNeg(Constant *C, bool HasNUW, bool HasNSW) {
Constant *ConstantExpr::getFNeg(Constant *C) {
assert(C->getType()->isFPOrFPVectorTy() &&
"Cannot FNEG a non-floating-point value!");
- return getFSub(ConstantFP::getZeroValueForNegation(C->getType()), C);
+ return get(Instruction::FNeg, C);
}
Constant *ConstantExpr::getNot(Constant *C) {
@@ -2567,7 +2557,7 @@ Constant *ConstantDataArray::getFP(LLVMContext &Context,
Constant *ConstantDataArray::getString(LLVMContext &Context,
StringRef Str, bool AddNull) {
if (!AddNull) {
- const uint8_t *Data = reinterpret_cast<const uint8_t *>(Str.data());
+ const uint8_t *Data = Str.bytes_begin();
return get(Context, makeArrayRef(Data, Str.size()));
}
@@ -3015,7 +3005,8 @@ Instruction *ConstantExpr::getAsInstruction() {
case Instruction::FCmp:
return CmpInst::Create((Instruction::OtherOps)getOpcode(),
(CmpInst::Predicate)getPredicate(), Ops[0], Ops[1]);
-
+ case Instruction::FNeg:
+ return UnaryOperator::Create((Instruction::UnaryOps)getOpcode(), Ops[0]);
default:
assert(getNumOperands() == 2 && "Must be binary operator?");
BinaryOperator *BO =
diff --git a/lib/IR/ConstantsContext.h b/lib/IR/ConstantsContext.h
index eac171397084..7614dab9f15d 100644
--- a/lib/IR/ConstantsContext.h
+++ b/lib/IR/ConstantsContext.h
@@ -1,9 +1,8 @@
//===-- ConstantsContext.h - Constants-related Context Interals -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/IR/Core.cpp b/lib/IR/Core.cpp
index 815797f4b7ea..310935b5213a 100644
--- a/lib/IR/Core.cpp
+++ b/lib/IR/Core.cpp
@@ -1,9 +1,8 @@
//===-- Core.cpp ----------------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -1037,6 +1036,16 @@ LLVMValueRef LLVMConstPointerNull(LLVMTypeRef Ty) {
/*--.. Operations on metadata nodes ........................................--*/
+LLVMMetadataRef LLVMMDStringInContext2(LLVMContextRef C, const char *Str,
+ size_t SLen) {
+ return wrap(MDString::get(*unwrap(C), StringRef(Str, SLen)));
+}
+
+LLVMMetadataRef LLVMMDNodeInContext2(LLVMContextRef C, LLVMMetadataRef *MDs,
+ size_t Count) {
+ return wrap(MDNode::get(*unwrap(C), ArrayRef<Metadata*>(unwrap(MDs), Count)));
+}
+
LLVMValueRef LLVMMDStringInContext(LLVMContextRef C, const char *Str,
unsigned SLen) {
LLVMContext &Context = *unwrap(C);
@@ -1200,15 +1209,17 @@ void LLVMAddNamedMetadataOperand(LLVMModuleRef M, const char *Name,
const char *LLVMGetDebugLocDirectory(LLVMValueRef Val, unsigned *Length) {
if (!Length) return nullptr;
StringRef S;
- if (const auto *I = unwrap<Instruction>(Val)) {
- S = I->getDebugLoc()->getDirectory();
- } else if (const auto *GV = unwrap<GlobalVariable>(Val)) {
+ if (const auto *I = dyn_cast<Instruction>(unwrap(Val))) {
+ if (const auto &DL = I->getDebugLoc()) {
+ S = DL->getDirectory();
+ }
+ } else if (const auto *GV = dyn_cast<GlobalVariable>(unwrap(Val))) {
SmallVector<DIGlobalVariableExpression *, 1> GVEs;
GV->getDebugInfo(GVEs);
if (GVEs.size())
if (const DIGlobalVariable *DGV = GVEs[0]->getVariable())
S = DGV->getDirectory();
- } else if (const auto *F = unwrap<Function>(Val)) {
+ } else if (const auto *F = dyn_cast<Function>(unwrap(Val))) {
if (const DISubprogram *DSP = F->getSubprogram())
S = DSP->getDirectory();
} else {
@@ -1222,15 +1233,17 @@ const char *LLVMGetDebugLocDirectory(LLVMValueRef Val, unsigned *Length) {
const char *LLVMGetDebugLocFilename(LLVMValueRef Val, unsigned *Length) {
if (!Length) return nullptr;
StringRef S;
- if (const auto *I = unwrap<Instruction>(Val)) {
- S = I->getDebugLoc()->getFilename();
- } else if (const auto *GV = unwrap<GlobalVariable>(Val)) {
+ if (const auto *I = dyn_cast<Instruction>(unwrap(Val))) {
+ if (const auto &DL = I->getDebugLoc()) {
+ S = DL->getFilename();
+ }
+ } else if (const auto *GV = dyn_cast<GlobalVariable>(unwrap(Val))) {
SmallVector<DIGlobalVariableExpression *, 1> GVEs;
GV->getDebugInfo(GVEs);
if (GVEs.size())
if (const DIGlobalVariable *DGV = GVEs[0]->getVariable())
S = DGV->getFilename();
- } else if (const auto *F = unwrap<Function>(Val)) {
+ } else if (const auto *F = dyn_cast<Function>(unwrap(Val))) {
if (const DISubprogram *DSP = F->getSubprogram())
S = DSP->getFilename();
} else {
@@ -1243,15 +1256,17 @@ const char *LLVMGetDebugLocFilename(LLVMValueRef Val, unsigned *Length) {
unsigned LLVMGetDebugLocLine(LLVMValueRef Val) {
unsigned L = 0;
- if (const auto *I = unwrap<Instruction>(Val)) {
- L = I->getDebugLoc()->getLine();
- } else if (const auto *GV = unwrap<GlobalVariable>(Val)) {
+ if (const auto *I = dyn_cast<Instruction>(unwrap(Val))) {
+ if (const auto &DL = I->getDebugLoc()) {
+ L = DL->getLine();
+ }
+ } else if (const auto *GV = dyn_cast<GlobalVariable>(unwrap(Val))) {
SmallVector<DIGlobalVariableExpression *, 1> GVEs;
GV->getDebugInfo(GVEs);
if (GVEs.size())
if (const DIGlobalVariable *DGV = GVEs[0]->getVariable())
L = DGV->getLine();
- } else if (const auto *F = unwrap<Function>(Val)) {
+ } else if (const auto *F = dyn_cast<Function>(unwrap(Val))) {
if (const DISubprogram *DSP = F->getSubprogram())
L = DSP->getLine();
} else {
@@ -1263,9 +1278,9 @@ unsigned LLVMGetDebugLocLine(LLVMValueRef Val) {
unsigned LLVMGetDebugLocColumn(LLVMValueRef Val) {
unsigned C = 0;
- if (const auto *I = unwrap<Instruction>(Val))
- if (const auto &L = I->getDebugLoc())
- C = L->getColumn();
+ if (const auto *I = dyn_cast<Instruction>(unwrap(Val)))
+ if (const auto &DL = I->getDebugLoc())
+ C = DL->getColumn();
return C;
}
@@ -2330,6 +2345,10 @@ const char *LLVMIntrinsicCopyOverloadedName(unsigned ID,
return strdup(Str.c_str());
}
+unsigned LLVMLookupIntrinsicID(const char *Name, size_t NameLen) {
+ return Function::lookupIntrinsicID({Name, NameLen});
+}
+
LLVMBool LLVMIntrinsicIsOverloaded(unsigned ID) {
auto IID = llvm_map_to_intrinsic_id(ID);
return llvm::Intrinsic::isOverloaded(IID);
@@ -2464,6 +2483,71 @@ void LLVMSetParamAlignment(LLVMValueRef Arg, unsigned align) {
A->addAttr(Attribute::getWithAlignment(A->getContext(), align));
}
+/*--.. Operations on ifuncs ................................................--*/
+
+LLVMValueRef LLVMAddGlobalIFunc(LLVMModuleRef M,
+ const char *Name, size_t NameLen,
+ LLVMTypeRef Ty, unsigned AddrSpace,
+ LLVMValueRef Resolver) {
+ return wrap(GlobalIFunc::create(unwrap(Ty), AddrSpace,
+ GlobalValue::ExternalLinkage,
+ StringRef(Name, NameLen),
+ unwrap<Constant>(Resolver), unwrap(M)));
+}
+
+LLVMValueRef LLVMGetNamedGlobalIFunc(LLVMModuleRef M,
+ const char *Name, size_t NameLen) {
+ return wrap(unwrap(M)->getNamedIFunc(StringRef(Name, NameLen)));
+}
+
+LLVMValueRef LLVMGetFirstGlobalIFunc(LLVMModuleRef M) {
+ Module *Mod = unwrap(M);
+ Module::ifunc_iterator I = Mod->ifunc_begin();
+ if (I == Mod->ifunc_end())
+ return nullptr;
+ return wrap(&*I);
+}
+
+LLVMValueRef LLVMGetLastGlobalIFunc(LLVMModuleRef M) {
+ Module *Mod = unwrap(M);
+ Module::ifunc_iterator I = Mod->ifunc_end();
+ if (I == Mod->ifunc_begin())
+ return nullptr;
+ return wrap(&*--I);
+}
+
+LLVMValueRef LLVMGetNextGlobalIFunc(LLVMValueRef IFunc) {
+ GlobalIFunc *GIF = unwrap<GlobalIFunc>(IFunc);
+ Module::ifunc_iterator I(GIF);
+ if (++I == GIF->getParent()->ifunc_end())
+ return nullptr;
+ return wrap(&*I);
+}
+
+LLVMValueRef LLVMGetPreviousGlobalIFunc(LLVMValueRef IFunc) {
+ GlobalIFunc *GIF = unwrap<GlobalIFunc>(IFunc);
+ Module::ifunc_iterator I(GIF);
+ if (I == GIF->getParent()->ifunc_begin())
+ return nullptr;
+ return wrap(&*--I);
+}
+
+LLVMValueRef LLVMGetGlobalIFuncResolver(LLVMValueRef IFunc) {
+ return wrap(unwrap<GlobalIFunc>(IFunc)->getResolver());
+}
+
+void LLVMSetGlobalIFuncResolver(LLVMValueRef IFunc, LLVMValueRef Resolver) {
+ unwrap<GlobalIFunc>(IFunc)->setResolver(unwrap<Constant>(Resolver));
+}
+
+void LLVMEraseGlobalIFunc(LLVMValueRef IFunc) {
+ unwrap<GlobalIFunc>(IFunc)->eraseFromParent();
+}
+
+void LLVMRemoveGlobalIFunc(LLVMValueRef IFunc) {
+ unwrap<GlobalIFunc>(IFunc)->removeFromParent();
+}
+
/*--.. Operations on basic blocks ..........................................--*/
LLVMValueRef LLVMBasicBlockAsValue(LLVMBasicBlockRef BB) {
@@ -2541,6 +2625,20 @@ LLVMBasicBlockRef LLVMCreateBasicBlockInContext(LLVMContextRef C,
return wrap(llvm::BasicBlock::Create(*unwrap(C), Name));
}
+void LLVMInsertExistingBasicBlockAfterInsertBlock(LLVMBuilderRef Builder,
+ LLVMBasicBlockRef BB) {
+ BasicBlock *ToInsert = unwrap(BB);
+ BasicBlock *CurBB = unwrap(Builder)->GetInsertBlock();
+ assert(CurBB && "current insertion point is invalid!");
+ CurBB->getParent()->getBasicBlockList().insertAfter(CurBB->getIterator(),
+ ToInsert);
+}
+
+void LLVMAppendExistingBasicBlock(LLVMValueRef Fn,
+ LLVMBasicBlockRef BB) {
+ unwrap<Function>(Fn)->getBasicBlockList().push_back(unwrap(BB));
+}
+
LLVMBasicBlockRef LLVMAppendBasicBlockInContext(LLVMContextRef C,
LLVMValueRef FnRef,
const char *Name) {
@@ -2924,6 +3022,17 @@ void LLVMDisposeBuilder(LLVMBuilderRef Builder) {
/*--.. Metadata builders ...................................................--*/
+LLVMMetadataRef LLVMGetCurrentDebugLocation2(LLVMBuilderRef Builder) {
+ return wrap(unwrap(Builder)->getCurrentDebugLocation().getAsMDNode());
+}
+
+void LLVMSetCurrentDebugLocation2(LLVMBuilderRef Builder, LLVMMetadataRef Loc) {
+ if (Loc)
+ unwrap(Builder)->SetCurrentDebugLocation(DebugLoc(unwrap<MDNode>(Loc)));
+ else
+ unwrap(Builder)->SetCurrentDebugLocation(DebugLoc());
+}
+
void LLVMSetCurrentDebugLocation(LLVMBuilderRef Builder, LLVMValueRef L) {
MDNode *Loc =
L ? cast<MDNode>(unwrap<MetadataAsValue>(L)->getMetadata()) : nullptr;
@@ -2940,6 +3049,17 @@ void LLVMSetInstDebugLocation(LLVMBuilderRef Builder, LLVMValueRef Inst) {
unwrap(Builder)->SetInstDebugLocation(unwrap<Instruction>(Inst));
}
+void LLVMBuilderSetDefaultFPMathTag(LLVMBuilderRef Builder,
+ LLVMMetadataRef FPMathTag) {
+
+ unwrap(Builder)->setDefaultFPMathTag(FPMathTag
+ ? unwrap<MDNode>(FPMathTag)
+ : nullptr);
+}
+
+LLVMMetadataRef LLVMBuilderGetDefaultFPMathTag(LLVMBuilderRef Builder) {
+ return wrap(unwrap(Builder)->getDefaultFPMathTag());
+}
/*--.. Instruction builders ................................................--*/
diff --git a/lib/IR/DIBuilder.cpp b/lib/IR/DIBuilder.cpp
index fb81634a2868..2493c6cbe532 100644
--- a/lib/IR/DIBuilder.cpp
+++ b/lib/IR/DIBuilder.cpp
@@ -1,9 +1,8 @@
//===--- DIBuilder.cpp - Debug Information Builder ------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -167,8 +166,8 @@ createImportedModule(LLVMContext &C, dwarf::Tag Tag, DIScope *Context,
if (Line)
assert(File && "Source location has line number but no file");
unsigned EntitiesCount = C.pImpl->DIImportedEntitys.size();
- auto *M =
- DIImportedEntity::get(C, Tag, Context, DINodeRef(NS), File, Line, Name);
+ auto *M = DIImportedEntity::get(C, Tag, Context, cast_or_null<DINode>(NS),
+ File, Line, Name);
if (EntitiesCount < C.pImpl->DIImportedEntitys.size())
// A new Imported Entity was just added to the context.
// Add it to the Imported Modules list.
@@ -806,6 +805,13 @@ DISubprogram *DIBuilder::createMethod(
return SP;
}
+DICommonBlock *DIBuilder::createCommonBlock(
+ DIScope *Scope, DIGlobalVariable *Decl, StringRef Name, DIFile *File,
+ unsigned LineNo) {
+ return DICommonBlock::get(
+ VMContext, Scope, Decl, Name, File, LineNo);
+}
+
DINamespace *DIBuilder::createNameSpace(DIScope *Scope, StringRef Name,
bool ExportSymbols) {
diff --git a/lib/IR/DataLayout.cpp b/lib/IR/DataLayout.cpp
index 63c24b5ee7af..6e0ebbd4a730 100644
--- a/lib/IR/DataLayout.cpp
+++ b/lib/IR/DataLayout.cpp
@@ -1,9 +1,8 @@
//===- DataLayout.cpp - Data size & alignment routines ---------------------==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -185,6 +184,8 @@ void DataLayout::reset(StringRef Desc) {
AllocaAddrSpace = 0;
StackNaturalAlign = 0;
ProgramAddrSpace = 0;
+ FunctionPtrAlign = 0;
+ TheFunctionPtrAlignType = FunctionPtrAlignType::Independent;
ManglingMode = MM_None;
NonIntegralAddressSpaces.clear();
@@ -380,6 +381,22 @@ void DataLayout::parseSpecifier(StringRef Desc) {
StackNaturalAlign = inBytes(getInt(Tok));
break;
}
+ case 'F': {
+ switch (Tok.front()) {
+ case 'i':
+ TheFunctionPtrAlignType = FunctionPtrAlignType::Independent;
+ break;
+ case 'n':
+ TheFunctionPtrAlignType = FunctionPtrAlignType::MultipleOfFunctionAlign;
+ break;
+ default:
+ report_fatal_error("Unknown function pointer alignment type in "
+ "datalayout string");
+ }
+ Tok = Tok.substr(1);
+ FunctionPtrAlign = inBytes(getInt(Tok));
+ break;
+ }
case 'P': { // Function address space.
ProgramAddrSpace = getAddrSpace(Tok);
break;
@@ -433,6 +450,8 @@ bool DataLayout::operator==(const DataLayout &Other) const {
AllocaAddrSpace == Other.AllocaAddrSpace &&
StackNaturalAlign == Other.StackNaturalAlign &&
ProgramAddrSpace == Other.ProgramAddrSpace &&
+ FunctionPtrAlign == Other.FunctionPtrAlign &&
+ TheFunctionPtrAlignType == Other.TheFunctionPtrAlignType &&
ManglingMode == Other.ManglingMode &&
LegalIntWidths == Other.LegalIntWidths &&
Alignments == Other.Alignments && Pointers == Other.Pointers;
@@ -444,12 +463,9 @@ DataLayout::AlignmentsTy::iterator
DataLayout::findAlignmentLowerBound(AlignTypeEnum AlignType,
uint32_t BitWidth) {
auto Pair = std::make_pair((unsigned)AlignType, BitWidth);
- return std::lower_bound(Alignments.begin(), Alignments.end(), Pair,
- [](const LayoutAlignElem &LHS,
- const std::pair<unsigned, uint32_t> &RHS) {
- return std::tie(LHS.AlignType, LHS.TypeBitWidth) <
- std::tie(RHS.first, RHS.second);
- });
+ return partition_point(Alignments, [=](const LayoutAlignElem &E) {
+ return std::make_pair(E.AlignType, E.TypeBitWidth) < Pair;
+ });
}
void
diff --git a/lib/IR/DebugInfo.cpp b/lib/IR/DebugInfo.cpp
index 9fa31773b598..ce47ef207434 100644
--- a/lib/IR/DebugInfo.cpp
+++ b/lib/IR/DebugInfo.cpp
@@ -1,9 +1,8 @@
//===- DebugInfo.cpp - Debug Information Helper Classes -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -82,7 +81,7 @@ void DebugInfoFinder::processCompileUnit(DICompileUnit *CU) {
continue;
auto *GV = DIG->getVariable();
processScope(GV->getScope());
- processType(GV->getType().resolve());
+ processType(GV->getType());
}
for (auto *ET : CU->getEnumTypes())
processType(ET);
@@ -92,7 +91,7 @@ void DebugInfoFinder::processCompileUnit(DICompileUnit *CU) {
else
processSubprogram(cast<DISubprogram>(RT));
for (auto *Import : CU->getImportedEntities()) {
- auto *Entity = Import->getEntity().resolve();
+ auto *Entity = Import->getEntity();
if (auto *T = dyn_cast<DIType>(Entity))
processType(T);
else if (auto *SP = dyn_cast<DISubprogram>(Entity))
@@ -125,14 +124,14 @@ void DebugInfoFinder::processLocation(const Module &M, const DILocation *Loc) {
void DebugInfoFinder::processType(DIType *DT) {
if (!addType(DT))
return;
- processScope(DT->getScope().resolve());
+ processScope(DT->getScope());
if (auto *ST = dyn_cast<DISubroutineType>(DT)) {
- for (DITypeRef Ref : ST->getTypeArray())
- processType(Ref.resolve());
+ for (DIType *Ref : ST->getTypeArray())
+ processType(Ref);
return;
}
if (auto *DCT = dyn_cast<DICompositeType>(DT)) {
- processType(DCT->getBaseType().resolve());
+ processType(DCT->getBaseType());
for (Metadata *D : DCT->getElements()) {
if (auto *T = dyn_cast<DIType>(D))
processType(T);
@@ -142,7 +141,7 @@ void DebugInfoFinder::processType(DIType *DT) {
return;
}
if (auto *DDT = dyn_cast<DIDerivedType>(DT)) {
- processType(DDT->getBaseType().resolve());
+ processType(DDT->getBaseType());
}
}
@@ -175,7 +174,7 @@ void DebugInfoFinder::processScope(DIScope *Scope) {
void DebugInfoFinder::processSubprogram(DISubprogram *SP) {
if (!addSubprogram(SP))
return;
- processScope(SP->getScope().resolve());
+ processScope(SP->getScope());
// Some of the users, e.g. CloneFunctionInto / CloneModule, need to set up a
// ValueMap containing identity mappings for all of the DICompileUnit's, not
// just DISubprogram's, referenced from anywhere within the Function being
@@ -188,9 +187,9 @@ void DebugInfoFinder::processSubprogram(DISubprogram *SP) {
processType(SP->getType());
for (auto *Element : SP->getTemplateParams()) {
if (auto *TType = dyn_cast<DITemplateTypeParameter>(Element)) {
- processType(TType->getType().resolve());
+ processType(TType->getType());
} else if (auto *TVal = dyn_cast<DITemplateValueParameter>(Element)) {
- processType(TVal->getType().resolve());
+ processType(TVal->getType());
}
}
}
@@ -208,7 +207,7 @@ void DebugInfoFinder::processDeclare(const Module &M,
if (!NodesSeen.insert(DV).second)
return;
processScope(DV->getScope());
- processType(DV->getType().resolve());
+ processType(DV->getType());
}
void DebugInfoFinder::processValue(const Module &M, const DbgValueInst *DVI) {
@@ -223,7 +222,7 @@ void DebugInfoFinder::processValue(const Module &M, const DbgValueInst *DVI) {
if (!NodesSeen.insert(DV).second)
return;
processScope(DV->getScope());
- processType(DV->getType().resolve());
+ processType(DV->getType());
}
bool DebugInfoFinder::addType(DIType *DT) {
@@ -429,7 +428,8 @@ private:
StringRef LinkageName = MDS->getName().empty() ? MDS->getLinkageName() : "";
DISubprogram *Declaration = nullptr;
auto *Type = cast_or_null<DISubroutineType>(map(MDS->getType()));
- DITypeRef ContainingType(map(MDS->getContainingType()));
+ DIType *ContainingType =
+ cast_or_null<DIType>(map(MDS->getContainingType()));
auto *Unit = cast_or_null<DICompileUnit>(map(MDS->getUnit()));
auto Variables = nullptr;
auto TemplateParams = nullptr;
@@ -900,6 +900,43 @@ LLVMMetadataRef LLVMDILocationGetScope(LLVMMetadataRef Location) {
return wrap(unwrapDI<DILocation>(Location)->getScope());
}
+LLVMMetadataRef LLVMDILocationGetInlinedAt(LLVMMetadataRef Location) {
+ return wrap(unwrapDI<DILocation>(Location)->getInlinedAt());
+}
+
+LLVMMetadataRef LLVMDIScopeGetFile(LLVMMetadataRef Scope) {
+ return wrap(unwrapDI<DIScope>(Scope)->getFile());
+}
+
+const char *LLVMDIFileGetDirectory(LLVMMetadataRef File, unsigned *Len) {
+ auto Dir = unwrapDI<DIFile>(File)->getDirectory();
+ *Len = Dir.size();
+ return Dir.data();
+}
+
+const char *LLVMDIFileGetFilename(LLVMMetadataRef File, unsigned *Len) {
+ auto Name = unwrapDI<DIFile>(File)->getFilename();
+ *Len = Name.size();
+ return Name.data();
+}
+
+const char *LLVMDIFileGetSource(LLVMMetadataRef File, unsigned *Len) {
+ if (auto Src = unwrapDI<DIFile>(File)->getSource()) {
+ *Len = Src->size();
+ return Src->data();
+ }
+ *Len = 0;
+ return "";
+}
+
+LLVMMetadataRef LLVMDIBuilderCreateEnumerator(LLVMDIBuilderRef Builder,
+ const char *Name, size_t NameLen,
+ int64_t Value,
+ LLVMBool IsUnsigned) {
+ return wrap(unwrap(Builder)->createEnumerator({Name, NameLen}, Value,
+ IsUnsigned != 0));
+}
+
LLVMMetadataRef LLVMDIBuilderCreateEnumerationType(
LLVMDIBuilderRef Builder, LLVMMetadataRef Scope, const char *Name,
size_t NameLen, LLVMMetadataRef File, unsigned LineNumber,
@@ -1237,6 +1274,27 @@ LLVMMetadataRef LLVMDIBuilderCreateGlobalVariableExpression(
nullptr, AlignInBits));
}
+LLVMMetadataRef LLVMDIGlobalVariableExpressionGetVariable(LLVMMetadataRef GVE) {
+ return wrap(unwrapDI<DIGlobalVariableExpression>(GVE)->getVariable());
+}
+
+LLVMMetadataRef LLVMDIGlobalVariableExpressionGetExpression(
+ LLVMMetadataRef GVE) {
+ return wrap(unwrapDI<DIGlobalVariableExpression>(GVE)->getExpression());
+}
+
+LLVMMetadataRef LLVMDIVariableGetFile(LLVMMetadataRef Var) {
+ return wrap(unwrapDI<DIVariable>(Var)->getFile());
+}
+
+LLVMMetadataRef LLVMDIVariableGetScope(LLVMMetadataRef Var) {
+ return wrap(unwrapDI<DIVariable>(Var)->getScope());
+}
+
+unsigned LLVMDIVariableGetLine(LLVMMetadataRef Var) {
+ return unwrapDI<DIVariable>(Var)->getLine();
+}
+
LLVMMetadataRef LLVMTemporaryMDNode(LLVMContextRef Ctx, LLVMMetadataRef *Data,
size_t Count) {
return wrap(
@@ -1348,6 +1406,21 @@ void LLVMSetSubprogram(LLVMValueRef Func, LLVMMetadataRef SP) {
unwrap<Function>(Func)->setSubprogram(unwrap<DISubprogram>(SP));
}
+unsigned LLVMDISubprogramGetLine(LLVMMetadataRef Subprogram) {
+ return unwrapDI<DISubprogram>(Subprogram)->getLine();
+}
+
+LLVMMetadataRef LLVMInstructionGetDebugLoc(LLVMValueRef Inst) {
+ return wrap(unwrap<Instruction>(Inst)->getDebugLoc().getAsMDNode());
+}
+
+void LLVMInstructionSetDebugLoc(LLVMValueRef Inst, LLVMMetadataRef Loc) {
+ if (Loc)
+ unwrap<Instruction>(Inst)->setDebugLoc(DebugLoc(unwrap<MDNode>(Loc)));
+ else
+ unwrap<Instruction>(Inst)->setDebugLoc(DebugLoc());
+}
+
LLVMMetadataKind LLVMGetMetadataKind(LLVMMetadataRef Metadata) {
switch(unwrap(Metadata)->getMetadataID()) {
#define HANDLE_METADATA_LEAF(CLASS) \
diff --git a/lib/IR/DebugInfoMetadata.cpp b/lib/IR/DebugInfoMetadata.cpp
index 92f3f21f754c..900df27d1d33 100644
--- a/lib/IR/DebugInfoMetadata.cpp
+++ b/lib/IR/DebugInfoMetadata.cpp
@@ -1,9 +1,8 @@
//===- DebugInfoMetadata.cpp - Implement debug info metadata --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -89,7 +88,7 @@ const DILocation *DILocation::getMergedLocation(const DILocation *LocA,
DILocation *L = LocA->getInlinedAt();
while (S) {
Locations.insert(std::make_pair(S, L));
- S = S->getScope().resolve();
+ S = S->getScope();
if (!S && L) {
S = L->getScope();
L = L->getInlinedAt();
@@ -101,7 +100,7 @@ const DILocation *DILocation::getMergedLocation(const DILocation *LocA,
while (S) {
if (Locations.count(std::make_pair(S, L)))
break;
- S = S->getScope().resolve();
+ S = S->getScope();
if (!S && L) {
S = L->getScope();
L = L->getInlinedAt();
@@ -210,7 +209,7 @@ DINode::DIFlags DINode::splitFlags(DIFlags Flags,
return Flags;
}
-DIScopeRef DIScope::getScope() const {
+DIScope *DIScope::getScope() const {
if (auto *T = dyn_cast<DIType>(this))
return T->getScope();
@@ -223,6 +222,9 @@ DIScopeRef DIScope::getScope() const {
if (auto *NS = dyn_cast<DINamespace>(this))
return NS->getScope();
+ if (auto *CB = dyn_cast<DICommonBlock>(this))
+ return CB->getScope();
+
if (auto *M = dyn_cast<DIModule>(this))
return M->getScope();
@@ -238,6 +240,8 @@ StringRef DIScope::getName() const {
return SP->getName();
if (auto *NS = dyn_cast<DINamespace>(this))
return NS->getName();
+ if (auto *CB = dyn_cast<DICommonBlock>(this))
+ return CB->getName();
if (auto *M = dyn_cast<DIModule>(this))
return M->getName();
assert((isa<DILexicalBlockBase>(this) || isa<DIFile>(this) ||
@@ -695,6 +699,17 @@ DINamespace *DINamespace::getImpl(LLVMContext &Context, Metadata *Scope,
DEFINE_GETIMPL_STORE(DINamespace, (ExportSymbols), Ops);
}
+DICommonBlock *DICommonBlock::getImpl(LLVMContext &Context, Metadata *Scope,
+ Metadata *Decl, MDString *Name,
+ Metadata *File, unsigned LineNo,
+ StorageType Storage, bool ShouldCreate) {
+ assert(isCanonical(Name) && "Expected canonical MDString");
+ DEFINE_GETIMPL_LOOKUP(DICommonBlock, (Scope, Decl, Name, File, LineNo));
+ // The nullptr is for DIScope's File operand. This should be refactored.
+ Metadata *Ops[] = {Scope, Decl, Name, File};
+ DEFINE_GETIMPL_STORE(DICommonBlock, (LineNo), Ops);
+}
+
DIModule *DIModule::getImpl(LLVMContext &Context, Metadata *Scope,
MDString *Name, MDString *ConfigurationMacros,
MDString *IncludePath, MDString *ISysRoot,
@@ -814,10 +829,14 @@ DIExpression *DIExpression::getImpl(LLVMContext &Context,
unsigned DIExpression::ExprOperand::getSize() const {
switch (getOp()) {
+ case dwarf::DW_OP_LLVM_convert:
case dwarf::DW_OP_LLVM_fragment:
return 3;
case dwarf::DW_OP_constu:
+ case dwarf::DW_OP_deref_size:
case dwarf::DW_OP_plus_uconst:
+ case dwarf::DW_OP_LLVM_tag_offset:
+ case dwarf::DW_OP_entry_value:
return 2;
default:
return 1;
@@ -858,6 +877,15 @@ bool DIExpression::isValid() const {
return false;
break;
}
+ case dwarf::DW_OP_entry_value: {
+ // An entry value operator must appear at the begin and the size
+ // of following expression should be 1, because we support only
+ // entry values of a simple register location.
+ return I->get() == expr_op_begin()->get() && I->getArg(0) == 1 &&
+ getNumElements() == 2;
+ }
+ case dwarf::DW_OP_LLVM_convert:
+ case dwarf::DW_OP_LLVM_tag_offset:
case dwarf::DW_OP_constu:
case dwarf::DW_OP_plus_uconst:
case dwarf::DW_OP_plus:
@@ -872,6 +900,7 @@ bool DIExpression::isValid() const {
case dwarf::DW_OP_shr:
case dwarf::DW_OP_shra:
case dwarf::DW_OP_deref:
+ case dwarf::DW_OP_deref_size:
case dwarf::DW_OP_xderef:
case dwarf::DW_OP_lit0:
case dwarf::DW_OP_not:
@@ -882,6 +911,42 @@ bool DIExpression::isValid() const {
return true;
}
+bool DIExpression::isImplicit() const {
+ unsigned N = getNumElements();
+ if (isValid() && N > 0) {
+ switch (getElement(N-1)) {
+ case dwarf::DW_OP_stack_value:
+ case dwarf::DW_OP_LLVM_tag_offset:
+ return true;
+ case dwarf::DW_OP_LLVM_fragment:
+ return N > 1 && getElement(N-2) == dwarf::DW_OP_stack_value;
+ default: break;
+ }
+ }
+ return false;
+}
+
+bool DIExpression::isComplex() const {
+ if (!isValid())
+ return false;
+
+ if (getNumElements() == 0)
+ return false;
+
+ // If there are any elements other than fragment or tag_offset, then some
+ // kind of complex computation occurs.
+ for (const auto &It : expr_ops()) {
+ switch (It.getOp()) {
+ case dwarf::DW_OP_LLVM_tag_offset:
+ case dwarf::DW_OP_LLVM_fragment:
+ continue;
+ default: return true;
+ }
+ }
+
+ return false;
+}
+
Optional<DIExpression::FragmentInfo>
DIExpression::getFragmentInfo(expr_op_iterator Start, expr_op_iterator End) {
for (auto I = Start; I != End; ++I)
@@ -929,25 +994,53 @@ bool DIExpression::extractIfOffset(int64_t &Offset) const {
return false;
}
-DIExpression *DIExpression::prepend(const DIExpression *Expr, bool DerefBefore,
- int64_t Offset, bool DerefAfter,
- bool StackValue) {
+const DIExpression *DIExpression::extractAddressClass(const DIExpression *Expr,
+ unsigned &AddrClass) {
+ const unsigned PatternSize = 4;
+ if (Expr->Elements.size() >= PatternSize &&
+ Expr->Elements[PatternSize - 4] == dwarf::DW_OP_constu &&
+ Expr->Elements[PatternSize - 2] == dwarf::DW_OP_swap &&
+ Expr->Elements[PatternSize - 1] == dwarf::DW_OP_xderef) {
+ AddrClass = Expr->Elements[PatternSize - 3];
+
+ if (Expr->Elements.size() == PatternSize)
+ return nullptr;
+ return DIExpression::get(Expr->getContext(),
+ makeArrayRef(&*Expr->Elements.begin(),
+ Expr->Elements.size() - PatternSize));
+ }
+ return Expr;
+}
+
+DIExpression *DIExpression::prepend(const DIExpression *Expr, uint8_t Flags,
+ int64_t Offset) {
SmallVector<uint64_t, 8> Ops;
- if (DerefBefore)
+ if (Flags & DIExpression::DerefBefore)
Ops.push_back(dwarf::DW_OP_deref);
appendOffset(Ops, Offset);
- if (DerefAfter)
+ if (Flags & DIExpression::DerefAfter)
Ops.push_back(dwarf::DW_OP_deref);
- return prependOpcodes(Expr, Ops, StackValue);
+ bool StackValue = Flags & DIExpression::StackValue;
+ bool EntryValue = Flags & DIExpression::EntryValue;
+
+ return prependOpcodes(Expr, Ops, StackValue, EntryValue);
}
DIExpression *DIExpression::prependOpcodes(const DIExpression *Expr,
SmallVectorImpl<uint64_t> &Ops,
- bool StackValue) {
+ bool StackValue,
+ bool EntryValue) {
assert(Expr && "Can't prepend ops to this expression");
+ if (EntryValue) {
+ Ops.push_back(dwarf::DW_OP_entry_value);
+ // Add size info needed for entry value expression.
+ // Add plus one for target register operand.
+ Ops.push_back(Expr->getNumElements() + 1);
+ }
+
// If there are no ops to prepend, do not even add the DW_OP_stack_value.
if (Ops.empty())
StackValue = false;
diff --git a/lib/IR/DebugLoc.cpp b/lib/IR/DebugLoc.cpp
index 10ec98ac7e6c..14d1396f1543 100644
--- a/lib/IR/DebugLoc.cpp
+++ b/lib/IR/DebugLoc.cpp
@@ -1,9 +1,8 @@
//===-- DebugLoc.cpp - Implement DebugLoc class ---------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/IR/DiagnosticHandler.cpp b/lib/IR/DiagnosticHandler.cpp
index 8f972785cf91..2fe634803894 100644
--- a/lib/IR/DiagnosticHandler.cpp
+++ b/lib/IR/DiagnosticHandler.cpp
@@ -1,9 +1,8 @@
//===- DiagnosticHandler.h - DiagnosticHandler class for LLVM -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/IR/DiagnosticInfo.cpp b/lib/IR/DiagnosticInfo.cpp
index dc957ab7dad9..4a8e3cca3493 100644
--- a/lib/IR/DiagnosticInfo.cpp
+++ b/lib/IR/DiagnosticInfo.cpp
@@ -1,9 +1,8 @@
//===- llvm/Support/DiagnosticInfo.cpp - Diagnostic Definitions -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -373,83 +372,3 @@ std::string DiagnosticInfoOptimizationBase::getMsg() const {
void OptimizationRemarkAnalysisFPCommute::anchor() {}
void OptimizationRemarkAnalysisAliasing::anchor() {}
-
-namespace llvm {
-namespace yaml {
-
-void MappingTraits<DiagnosticInfoOptimizationBase *>::mapping(
- IO &io, DiagnosticInfoOptimizationBase *&OptDiag) {
- assert(io.outputting() && "input not yet implemented");
-
- if (io.mapTag("!Passed",
- (OptDiag->getKind() == DK_OptimizationRemark ||
- OptDiag->getKind() == DK_MachineOptimizationRemark)))
- ;
- else if (io.mapTag(
- "!Missed",
- (OptDiag->getKind() == DK_OptimizationRemarkMissed ||
- OptDiag->getKind() == DK_MachineOptimizationRemarkMissed)))
- ;
- else if (io.mapTag(
- "!Analysis",
- (OptDiag->getKind() == DK_OptimizationRemarkAnalysis ||
- OptDiag->getKind() == DK_MachineOptimizationRemarkAnalysis)))
- ;
- else if (io.mapTag("!AnalysisFPCommute",
- OptDiag->getKind() ==
- DK_OptimizationRemarkAnalysisFPCommute))
- ;
- else if (io.mapTag("!AnalysisAliasing",
- OptDiag->getKind() ==
- DK_OptimizationRemarkAnalysisAliasing))
- ;
- else if (io.mapTag("!Failure", OptDiag->getKind() == DK_OptimizationFailure))
- ;
- else
- llvm_unreachable("Unknown remark type");
-
- // These are read-only for now.
- DiagnosticLocation DL = OptDiag->getLocation();
- StringRef FN =
- GlobalValue::dropLLVMManglingEscape(OptDiag->getFunction().getName());
-
- StringRef PassName(OptDiag->PassName);
- io.mapRequired("Pass", PassName);
- io.mapRequired("Name", OptDiag->RemarkName);
- if (!io.outputting() || DL.isValid())
- io.mapOptional("DebugLoc", DL);
- io.mapRequired("Function", FN);
- io.mapOptional("Hotness", OptDiag->Hotness);
- io.mapOptional("Args", OptDiag->Args);
-}
-
-template <> struct MappingTraits<DiagnosticLocation> {
- static void mapping(IO &io, DiagnosticLocation &DL) {
- assert(io.outputting() && "input not yet implemented");
-
- StringRef File = DL.getRelativePath();
- unsigned Line = DL.getLine();
- unsigned Col = DL.getColumn();
-
- io.mapRequired("File", File);
- io.mapRequired("Line", Line);
- io.mapRequired("Column", Col);
- }
-
- static const bool flow = true;
-};
-
-// Implement this as a mapping for now to get proper quotation for the value.
-template <> struct MappingTraits<DiagnosticInfoOptimizationBase::Argument> {
- static void mapping(IO &io, DiagnosticInfoOptimizationBase::Argument &A) {
- assert(io.outputting() && "input not yet implemented");
- io.mapRequired(A.Key.data(), A.Val);
- if (A.Loc.isValid())
- io.mapOptional("DebugLoc", A.Loc);
- }
-};
-
-} // end namespace yaml
-} // end namespace llvm
-
-LLVM_YAML_IS_SEQUENCE_VECTOR(DiagnosticInfoOptimizationBase::Argument)
diff --git a/lib/IR/DiagnosticPrinter.cpp b/lib/IR/DiagnosticPrinter.cpp
index ee2df9e24f93..496bd18e78e2 100644
--- a/lib/IR/DiagnosticPrinter.cpp
+++ b/lib/IR/DiagnosticPrinter.cpp
@@ -1,9 +1,8 @@
//===- llvm/Support/DiagnosticInfo.cpp - Diagnostic Definitions -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/IR/Dominators.cpp b/lib/IR/Dominators.cpp
index cf9f5759ba53..910a41050b94 100644
--- a/lib/IR/Dominators.cpp
+++ b/lib/IR/Dominators.cpp
@@ -1,9 +1,8 @@
//===- Dominators.cpp - Dominator Calculation -----------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/IR/Function.cpp b/lib/IR/Function.cpp
index a88478b89bfc..dc28d22548dd 100644
--- a/lib/IR/Function.cpp
+++ b/lib/IR/Function.cpp
@@ -1,9 +1,8 @@
//===- Function.cpp - Implement the Global object classes -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -114,6 +113,11 @@ unsigned Argument::getParamAlignment() const {
return getParent()->getParamAlignment(getArgNo());
}
+Type *Argument::getParamByValType() const {
+ assert(getType()->isPointerTy() && "Only pointers have byval types");
+ return getParent()->getParamByValType(getArgNo());
+}
+
uint64_t Argument::getDereferenceableBytes() const {
assert(getType()->isPointerTy() &&
"Only pointers have dereferenceable bytes");
@@ -146,6 +150,10 @@ bool Argument::hasStructRetAttr() const {
return hasAttribute(Attribute::StructRet);
}
+bool Argument::hasInRegAttr() const {
+ return hasAttribute(Attribute::InReg);
+}
+
bool Argument::hasReturnedAttr() const {
return hasAttribute(Attribute::Returned);
}
@@ -186,6 +194,10 @@ bool Argument::hasAttribute(Attribute::AttrKind Kind) const {
return getParent()->hasParamAttribute(getArgNo(), Kind);
}
+Attribute Argument::getAttribute(Attribute::AttrKind Kind) const {
+ return getParent()->getParamAttribute(getArgNo(), Kind);
+}
+
//===----------------------------------------------------------------------===//
// Helper Methods in Function
//===----------------------------------------------------------------------===//
@@ -521,9 +533,8 @@ static ArrayRef<const char *> findTargetSubtable(StringRef Name) {
// Drop "llvm." and take the first dotted component. That will be the target
// if this is target specific.
StringRef Target = Name.drop_front(5).split('.').first;
- auto It = std::lower_bound(Targets.begin(), Targets.end(), Target,
- [](const IntrinsicTargetInfo &TI,
- StringRef Target) { return TI.Name < Target; });
+ auto It = partition_point(
+ Targets, [=](const IntrinsicTargetInfo &TI) { return TI.Name < Target; });
// We've either found the target or just fall back to the generic set, which
// is always first.
const auto &TI = It != Targets.end() && It->Name == Target ? *It : Targets[0];
@@ -688,7 +699,8 @@ enum IIT_Info {
IIT_STRUCT6 = 38,
IIT_STRUCT7 = 39,
IIT_STRUCT8 = 40,
- IIT_F128 = 41
+ IIT_F128 = 41,
+ IIT_VEC_ELEMENT = 42
};
static void DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos,
@@ -853,6 +865,12 @@ static void DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos,
DecodeIITType(NextElt, Infos, OutputTable);
return;
}
+ case IIT_VEC_ELEMENT: {
+ unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]);
+ OutputTable.push_back(IITDescriptor::get(IITDescriptor::VecElementArgument,
+ ArgInfo));
+ return;
+ }
}
llvm_unreachable("unhandled");
}
@@ -949,10 +967,9 @@ static Type *DecodeFixedType(ArrayRef<Intrinsic::IITDescriptor> &Infos,
case IITDescriptor::SameVecWidthArgument: {
Type *EltTy = DecodeFixedType(Infos, Tys, Context);
Type *Ty = Tys[D.getArgumentNumber()];
- if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
+ if (auto *VTy = dyn_cast<VectorType>(Ty))
return VectorType::get(EltTy, VTy->getNumElements());
- }
- llvm_unreachable("unhandled");
+ return EltTy;
}
case IITDescriptor::PtrToArgument: {
Type *Ty = Tys[D.getArgumentNumber()];
@@ -966,6 +983,12 @@ static Type *DecodeFixedType(ArrayRef<Intrinsic::IITDescriptor> &Infos,
Type *EltTy = VTy->getVectorElementType();
return PointerType::getUnqual(EltTy);
}
+ case IITDescriptor::VecElementArgument: {
+ Type *Ty = Tys[D.getArgumentNumber()];
+ if (VectorType *VTy = dyn_cast<VectorType>(Ty))
+ return VTy->getElementType();
+ llvm_unreachable("Expected an argument of Vector Type");
+ }
case IITDescriptor::VecOfAnyPtrsToElt:
// Return the overloaded type (which determines the pointers address space)
return Tys[D.getOverloadArgNumber()];
@@ -1020,9 +1043,10 @@ bool Intrinsic::isLeaf(ID id) {
Function *Intrinsic::getDeclaration(Module *M, ID id, ArrayRef<Type*> Tys) {
// There can never be multiple globals with the same name of different types,
// because intrinsics must be a specific type.
- return
- cast<Function>(M->getOrInsertFunction(getName(id, Tys),
- getType(M->getContext(), id, Tys)));
+ return cast<Function>(
+ M->getOrInsertFunction(getName(id, Tys),
+ getType(M->getContext(), id, Tys))
+ .getCallee());
}
// This defines the "Intrinsic::getIntrinsicForGCCBuiltin()" method.
@@ -1035,12 +1059,26 @@ Function *Intrinsic::getDeclaration(Module *M, ID id, ArrayRef<Type*> Tys) {
#include "llvm/IR/IntrinsicImpl.inc"
#undef GET_LLVM_INTRINSIC_FOR_MS_BUILTIN
-bool Intrinsic::matchIntrinsicType(Type *Ty, ArrayRef<Intrinsic::IITDescriptor> &Infos,
- SmallVectorImpl<Type*> &ArgTys) {
+using DeferredIntrinsicMatchPair =
+ std::pair<Type *, ArrayRef<Intrinsic::IITDescriptor>>;
+
+static bool matchIntrinsicType(
+ Type *Ty, ArrayRef<Intrinsic::IITDescriptor> &Infos,
+ SmallVectorImpl<Type *> &ArgTys,
+ SmallVectorImpl<DeferredIntrinsicMatchPair> &DeferredChecks,
+ bool IsDeferredCheck) {
using namespace Intrinsic;
// If we ran out of descriptors, there are too many arguments.
if (Infos.empty()) return true;
+
+ // Do this before slicing off the 'front' part
+ auto InfosRef = Infos;
+ auto DeferCheck = [&DeferredChecks, &InfosRef](Type *T) {
+ DeferredChecks.emplace_back(T, InfosRef);
+ return false;
+ };
+
IITDescriptor D = Infos.front();
Infos = Infos.slice(1);
@@ -1058,12 +1096,14 @@ bool Intrinsic::matchIntrinsicType(Type *Ty, ArrayRef<Intrinsic::IITDescriptor>
case IITDescriptor::Vector: {
VectorType *VT = dyn_cast<VectorType>(Ty);
return !VT || VT->getNumElements() != D.Vector_Width ||
- matchIntrinsicType(VT->getElementType(), Infos, ArgTys);
+ matchIntrinsicType(VT->getElementType(), Infos, ArgTys,
+ DeferredChecks, IsDeferredCheck);
}
case IITDescriptor::Pointer: {
PointerType *PT = dyn_cast<PointerType>(Ty);
return !PT || PT->getAddressSpace() != D.Pointer_AddressSpace ||
- matchIntrinsicType(PT->getElementType(), Infos, ArgTys);
+ matchIntrinsicType(PT->getElementType(), Infos, ArgTys,
+ DeferredChecks, IsDeferredCheck);
}
case IITDescriptor::Struct: {
@@ -1072,35 +1112,40 @@ bool Intrinsic::matchIntrinsicType(Type *Ty, ArrayRef<Intrinsic::IITDescriptor>
return true;
for (unsigned i = 0, e = D.Struct_NumElements; i != e; ++i)
- if (matchIntrinsicType(ST->getElementType(i), Infos, ArgTys))
+ if (matchIntrinsicType(ST->getElementType(i), Infos, ArgTys,
+ DeferredChecks, IsDeferredCheck))
return true;
return false;
}
case IITDescriptor::Argument:
- // Two cases here - If this is the second occurrence of an argument, verify
- // that the later instance matches the previous instance.
+ // If this is the second occurrence of an argument,
+ // verify that the later instance matches the previous instance.
if (D.getArgumentNumber() < ArgTys.size())
return Ty != ArgTys[D.getArgumentNumber()];
- // Otherwise, if this is the first instance of an argument, record it and
- // verify the "Any" kind.
- assert(D.getArgumentNumber() == ArgTys.size() && "Table consistency error");
- ArgTys.push_back(Ty);
+ if (D.getArgumentNumber() > ArgTys.size() ||
+ D.getArgumentKind() == IITDescriptor::AK_MatchType)
+ return IsDeferredCheck || DeferCheck(Ty);
- switch (D.getArgumentKind()) {
- case IITDescriptor::AK_Any: return false; // Success
- case IITDescriptor::AK_AnyInteger: return !Ty->isIntOrIntVectorTy();
- case IITDescriptor::AK_AnyFloat: return !Ty->isFPOrFPVectorTy();
- case IITDescriptor::AK_AnyVector: return !isa<VectorType>(Ty);
- case IITDescriptor::AK_AnyPointer: return !isa<PointerType>(Ty);
- }
- llvm_unreachable("all argument kinds not covered");
+ assert(D.getArgumentNumber() == ArgTys.size() && !IsDeferredCheck &&
+ "Table consistency error");
+ ArgTys.push_back(Ty);
+
+ switch (D.getArgumentKind()) {
+ case IITDescriptor::AK_Any: return false; // Success
+ case IITDescriptor::AK_AnyInteger: return !Ty->isIntOrIntVectorTy();
+ case IITDescriptor::AK_AnyFloat: return !Ty->isFPOrFPVectorTy();
+ case IITDescriptor::AK_AnyVector: return !isa<VectorType>(Ty);
+ case IITDescriptor::AK_AnyPointer: return !isa<PointerType>(Ty);
+ default: break;
+ }
+ llvm_unreachable("all argument kinds not covered");
case IITDescriptor::ExtendArgument: {
- // This may only be used when referring to a previous vector argument.
+ // If this is a forward reference, defer the check for later.
if (D.getArgumentNumber() >= ArgTys.size())
- return true;
+ return IsDeferredCheck || DeferCheck(Ty);
Type *NewTy = ArgTys[D.getArgumentNumber()];
if (VectorType *VTy = dyn_cast<VectorType>(NewTy))
@@ -1113,9 +1158,9 @@ bool Intrinsic::matchIntrinsicType(Type *Ty, ArrayRef<Intrinsic::IITDescriptor>
return Ty != NewTy;
}
case IITDescriptor::TruncArgument: {
- // This may only be used when referring to a previous vector argument.
+ // If this is a forward reference, defer the check for later.
if (D.getArgumentNumber() >= ArgTys.size())
- return true;
+ return IsDeferredCheck || DeferCheck(Ty);
Type *NewTy = ArgTys[D.getArgumentNumber()];
if (VectorType *VTy = dyn_cast<VectorType>(NewTy))
@@ -1128,34 +1173,42 @@ bool Intrinsic::matchIntrinsicType(Type *Ty, ArrayRef<Intrinsic::IITDescriptor>
return Ty != NewTy;
}
case IITDescriptor::HalfVecArgument:
- // This may only be used when referring to a previous vector argument.
+ // If this is a forward reference, defer the check for later.
return D.getArgumentNumber() >= ArgTys.size() ||
!isa<VectorType>(ArgTys[D.getArgumentNumber()]) ||
VectorType::getHalfElementsVectorType(
cast<VectorType>(ArgTys[D.getArgumentNumber()])) != Ty;
case IITDescriptor::SameVecWidthArgument: {
- if (D.getArgumentNumber() >= ArgTys.size())
- return true;
- VectorType * ReferenceType =
- dyn_cast<VectorType>(ArgTys[D.getArgumentNumber()]);
- VectorType *ThisArgType = dyn_cast<VectorType>(Ty);
- if (!ThisArgType || !ReferenceType ||
- (ReferenceType->getVectorNumElements() !=
- ThisArgType->getVectorNumElements()))
+ if (D.getArgumentNumber() >= ArgTys.size()) {
+ // Defer check and subsequent check for the vector element type.
+ Infos = Infos.slice(1);
+ return IsDeferredCheck || DeferCheck(Ty);
+ }
+ auto *ReferenceType = dyn_cast<VectorType>(ArgTys[D.getArgumentNumber()]);
+ auto *ThisArgType = dyn_cast<VectorType>(Ty);
+ // Both must be vectors of the same number of elements or neither.
+ if ((ReferenceType != nullptr) != (ThisArgType != nullptr))
return true;
- return matchIntrinsicType(ThisArgType->getVectorElementType(),
- Infos, ArgTys);
+ Type *EltTy = Ty;
+ if (ThisArgType) {
+ if (ReferenceType->getVectorNumElements() !=
+ ThisArgType->getVectorNumElements())
+ return true;
+ EltTy = ThisArgType->getVectorElementType();
+ }
+ return matchIntrinsicType(EltTy, Infos, ArgTys, DeferredChecks,
+ IsDeferredCheck);
}
case IITDescriptor::PtrToArgument: {
if (D.getArgumentNumber() >= ArgTys.size())
- return true;
+ return IsDeferredCheck || DeferCheck(Ty);
Type * ReferenceType = ArgTys[D.getArgumentNumber()];
PointerType *ThisArgType = dyn_cast<PointerType>(Ty);
return (!ThisArgType || ThisArgType->getElementType() != ReferenceType);
}
case IITDescriptor::PtrToElt: {
if (D.getArgumentNumber() >= ArgTys.size())
- return true;
+ return IsDeferredCheck || DeferCheck(Ty);
VectorType * ReferenceType =
dyn_cast<VectorType> (ArgTys[D.getArgumentNumber()]);
PointerType *ThisArgType = dyn_cast<PointerType>(Ty);
@@ -1165,15 +1218,20 @@ bool Intrinsic::matchIntrinsicType(Type *Ty, ArrayRef<Intrinsic::IITDescriptor>
}
case IITDescriptor::VecOfAnyPtrsToElt: {
unsigned RefArgNumber = D.getRefArgNumber();
+ if (RefArgNumber >= ArgTys.size()) {
+ if (IsDeferredCheck)
+ return true;
+ // If forward referencing, already add the pointer-vector type and
+ // defer the checks for later.
+ ArgTys.push_back(Ty);
+ return DeferCheck(Ty);
+ }
- // This may only be used when referring to a previous argument.
- if (RefArgNumber >= ArgTys.size())
- return true;
-
- // Record the overloaded type
- assert(D.getOverloadArgNumber() == ArgTys.size() &&
- "Table consistency error");
- ArgTys.push_back(Ty);
+ if (!IsDeferredCheck){
+ assert(D.getOverloadArgNumber() == ArgTys.size() &&
+ "Table consistency error");
+ ArgTys.push_back(Ty);
+ }
// Verify the overloaded type "matches" the Ref type.
// i.e. Ty is a vector with the same width as Ref.
@@ -1191,10 +1249,42 @@ bool Intrinsic::matchIntrinsicType(Type *Ty, ArrayRef<Intrinsic::IITDescriptor>
return ThisArgEltTy->getElementType() !=
ReferenceType->getVectorElementType();
}
+ case IITDescriptor::VecElementArgument: {
+ if (D.getArgumentNumber() >= ArgTys.size())
+ return IsDeferredCheck ? true : DeferCheck(Ty);
+ auto *ReferenceType = dyn_cast<VectorType>(ArgTys[D.getArgumentNumber()]);
+ return !ReferenceType || Ty != ReferenceType->getElementType();
+ }
}
llvm_unreachable("unhandled");
}
+Intrinsic::MatchIntrinsicTypesResult
+Intrinsic::matchIntrinsicSignature(FunctionType *FTy,
+ ArrayRef<Intrinsic::IITDescriptor> &Infos,
+ SmallVectorImpl<Type *> &ArgTys) {
+ SmallVector<DeferredIntrinsicMatchPair, 2> DeferredChecks;
+ if (matchIntrinsicType(FTy->getReturnType(), Infos, ArgTys, DeferredChecks,
+ false))
+ return MatchIntrinsicTypes_NoMatchRet;
+
+ unsigned NumDeferredReturnChecks = DeferredChecks.size();
+
+ for (auto Ty : FTy->params())
+ if (matchIntrinsicType(Ty, Infos, ArgTys, DeferredChecks, false))
+ return MatchIntrinsicTypes_NoMatchArg;
+
+ for (unsigned I = 0, E = DeferredChecks.size(); I != E; ++I) {
+ DeferredIntrinsicMatchPair &Check = DeferredChecks[I];
+ if (matchIntrinsicType(Check.first, Check.second, ArgTys, DeferredChecks,
+ true))
+ return I < NumDeferredReturnChecks ? MatchIntrinsicTypes_NoMatchRet
+ : MatchIntrinsicTypes_NoMatchArg;
+ }
+
+ return MatchIntrinsicTypes_Match;
+}
+
bool
Intrinsic::matchIntrinsicVarArg(bool isVarArg,
ArrayRef<Intrinsic::IITDescriptor> &Infos) {
@@ -1228,13 +1318,8 @@ Optional<Function*> Intrinsic::remangleIntrinsicFunction(Function *F) {
getIntrinsicInfoTableEntries(ID, Table);
ArrayRef<Intrinsic::IITDescriptor> TableRef = Table;
- // If we encounter any problems matching the signature with the descriptor
- // just give up remangling. It's up to verifier to report the discrepancy.
- if (Intrinsic::matchIntrinsicType(FTy->getReturnType(), TableRef, ArgTys))
+ if (Intrinsic::matchIntrinsicSignature(FTy, TableRef, ArgTys))
return None;
- for (auto Ty : FTy->params())
- if (Intrinsic::matchIntrinsicType(Ty, TableRef, ArgTys))
- return None;
if (Intrinsic::matchIntrinsicVarArg(FTy->isVarArg(), TableRef))
return None;
}
@@ -1378,7 +1463,7 @@ void Function::setEntryCount(uint64_t Count, Function::ProfileCountType Type,
setEntryCount(ProfileCount(Count, Type), Imports);
}
-ProfileCount Function::getEntryCount() const {
+ProfileCount Function::getEntryCount(bool AllowSynthetic) const {
MDNode *MD = getMetadata(LLVMContext::MD_prof);
if (MD && MD->getOperand(0))
if (MDString *MDS = dyn_cast<MDString>(MD->getOperand(0))) {
@@ -1390,7 +1475,8 @@ ProfileCount Function::getEntryCount() const {
if (Count == (uint64_t)-1)
return ProfileCount::getInvalid();
return ProfileCount(Count, PCT_Real);
- } else if (MDS->getString().equals("synthetic_function_entry_count")) {
+ } else if (AllowSynthetic &&
+ MDS->getString().equals("synthetic_function_entry_count")) {
ConstantInt *CI = mdconst::extract<ConstantInt>(MD->getOperand(1));
uint64_t Count = CI->getValue().getZExtValue();
return ProfileCount(Count, PCT_Synthetic);
diff --git a/lib/IR/GVMaterializer.cpp b/lib/IR/GVMaterializer.cpp
index 706926d1b981..35397309a103 100644
--- a/lib/IR/GVMaterializer.cpp
+++ b/lib/IR/GVMaterializer.cpp
@@ -1,9 +1,8 @@
//===-- GVMaterializer.cpp - Base implementation for GV materializers -----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/IR/Globals.cpp b/lib/IR/Globals.cpp
index cbd6450a20c9..e2bfc0420bc5 100644
--- a/lib/IR/Globals.cpp
+++ b/lib/IR/Globals.cpp
@@ -1,9 +1,8 @@
//===-- Globals.cpp - Implement the GlobalValue & GlobalVariable class ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -68,6 +67,7 @@ void GlobalValue::copyAttributesFrom(const GlobalValue *Src) {
setUnnamedAddr(Src->getUnnamedAddr());
setDLLStorageClass(Src->getDLLStorageClass());
setDSOLocal(Src->isDSOLocal());
+ setPartition(Src->getPartition());
}
void GlobalValue::removeFromParent() {
@@ -181,6 +181,28 @@ const Comdat *GlobalValue::getComdat() const {
return cast<GlobalObject>(this)->getComdat();
}
+StringRef GlobalValue::getPartition() const {
+ if (!hasPartition())
+ return "";
+ return getContext().pImpl->GlobalValuePartitions[this];
+}
+
+void GlobalValue::setPartition(StringRef S) {
+ // Do nothing if we're clearing the partition and it is already empty.
+ if (!hasPartition() && S.empty())
+ return;
+
+ // Get or create a stable partition name string and put it in the table in the
+ // context.
+ if (!S.empty())
+ S = getContext().pImpl->Saver.save(S);
+ getContext().pImpl->GlobalValuePartitions[this] = S;
+
+ // Update the HasPartition field. Setting the partition to the empty string
+ // means this global no longer has a partition.
+ HasPartition = !S.empty();
+}
+
StringRef GlobalObject::getSectionImpl() const {
assert(hasSection());
return getContext().pImpl->GlobalObjectSections[this];
@@ -193,9 +215,8 @@ void GlobalObject::setSection(StringRef S) {
// Get or create a stable section name string and put it in the table in the
// context.
- if (!S.empty()) {
- S = getContext().pImpl->SectionStrings.insert(S).first->first();
- }
+ if (!S.empty())
+ S = getContext().pImpl->Saver.save(S);
getContext().pImpl->GlobalObjectSections[this] = S;
// Update the HasSectionHashEntryBit. Setting the section to the empty string
diff --git a/lib/IR/IRBuilder.cpp b/lib/IR/IRBuilder.cpp
index a98189956770..0c6461c9078f 100644
--- a/lib/IR/IRBuilder.cpp
+++ b/lib/IR/IRBuilder.cpp
@@ -1,9 +1,8 @@
//===- IRBuilder.cpp - Builder for LLVM Instrs ----------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -72,7 +71,7 @@ Value *IRBuilderBase::getCastedInt8PtrValue(Value *Ptr) {
return BCI;
}
-static CallInst *createCallHelper(Value *Callee, ArrayRef<Value *> Ops,
+static CallInst *createCallHelper(Function *Callee, ArrayRef<Value *> Ops,
IRBuilderBase *Builder,
const Twine &Name = "",
Instruction *FMFSource = nullptr) {
@@ -84,7 +83,7 @@ static CallInst *createCallHelper(Value *Callee, ArrayRef<Value *> Ops,
return CI;
}
-static InvokeInst *createInvokeHelper(Value *Invokee, BasicBlock *NormalDest,
+static InvokeInst *createInvokeHelper(Function *Invokee, BasicBlock *NormalDest,
BasicBlock *UnwindDest,
ArrayRef<Value *> Ops,
IRBuilderBase *Builder,
@@ -105,7 +104,7 @@ CreateMemSet(Value *Ptr, Value *Val, Value *Size, unsigned Align,
Value *Ops[] = {Ptr, Val, Size, getInt1(isVolatile)};
Type *Tys[] = { Ptr->getType(), Size->getType() };
Module *M = BB->getParent()->getParent();
- Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys);
+ Function *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys);
CallInst *CI = createCallHelper(TheFn, Ops, this);
@@ -135,7 +134,7 @@ CallInst *IRBuilderBase::CreateElementUnorderedAtomicMemSet(
Value *Ops[] = {Ptr, Val, Size, getInt32(ElementSize)};
Type *Tys[] = {Ptr->getType(), Size->getType()};
Module *M = BB->getParent()->getParent();
- Value *TheFn = Intrinsic::getDeclaration(
+ Function *TheFn = Intrinsic::getDeclaration(
M, Intrinsic::memset_element_unordered_atomic, Tys);
CallInst *CI = createCallHelper(TheFn, Ops, this);
@@ -167,7 +166,7 @@ CreateMemCpy(Value *Dst, unsigned DstAlign, Value *Src, unsigned SrcAlign,
Value *Ops[] = {Dst, Src, Size, getInt1(isVolatile)};
Type *Tys[] = { Dst->getType(), Src->getType(), Size->getType() };
Module *M = BB->getParent()->getParent();
- Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memcpy, Tys);
+ Function *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memcpy, Tys);
CallInst *CI = createCallHelper(TheFn, Ops, this);
@@ -208,7 +207,7 @@ CallInst *IRBuilderBase::CreateElementUnorderedAtomicMemCpy(
Value *Ops[] = {Dst, Src, Size, getInt32(ElementSize)};
Type *Tys[] = {Dst->getType(), Src->getType(), Size->getType()};
Module *M = BB->getParent()->getParent();
- Value *TheFn = Intrinsic::getDeclaration(
+ Function *TheFn = Intrinsic::getDeclaration(
M, Intrinsic::memcpy_element_unordered_atomic, Tys);
CallInst *CI = createCallHelper(TheFn, Ops, this);
@@ -247,7 +246,7 @@ CreateMemMove(Value *Dst, unsigned DstAlign, Value *Src, unsigned SrcAlign,
Value *Ops[] = {Dst, Src, Size, getInt1(isVolatile)};
Type *Tys[] = { Dst->getType(), Src->getType(), Size->getType() };
Module *M = BB->getParent()->getParent();
- Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memmove, Tys);
+ Function *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memmove, Tys);
CallInst *CI = createCallHelper(TheFn, Ops, this);
@@ -284,7 +283,7 @@ CallInst *IRBuilderBase::CreateElementUnorderedAtomicMemMove(
Value *Ops[] = {Dst, Src, Size, getInt32(ElementSize)};
Type *Tys[] = {Dst->getType(), Src->getType(), Size->getType()};
Module *M = BB->getParent()->getParent();
- Value *TheFn = Intrinsic::getDeclaration(
+ Function *TheFn = Intrinsic::getDeclaration(
M, Intrinsic::memmove_element_unordered_atomic, Tys);
CallInst *CI = createCallHelper(TheFn, Ops, this);
@@ -314,7 +313,7 @@ static CallInst *getReductionIntrinsic(IRBuilderBase *Builder, Intrinsic::ID ID,
Value *Src) {
Module *M = Builder->GetInsertBlock()->getParent()->getParent();
Value *Ops[] = {Src};
- Type *Tys[] = { Src->getType()->getVectorElementType(), Src->getType() };
+ Type *Tys[] = { Src->getType() };
auto Decl = Intrinsic::getDeclaration(M, ID, Tys);
return createCallHelper(Decl, Ops, Builder);
}
@@ -322,20 +321,18 @@ static CallInst *getReductionIntrinsic(IRBuilderBase *Builder, Intrinsic::ID ID,
CallInst *IRBuilderBase::CreateFAddReduce(Value *Acc, Value *Src) {
Module *M = GetInsertBlock()->getParent()->getParent();
Value *Ops[] = {Acc, Src};
- Type *Tys[] = {Src->getType()->getVectorElementType(), Acc->getType(),
- Src->getType()};
+ Type *Tys[] = {Acc->getType(), Src->getType()};
auto Decl = Intrinsic::getDeclaration(
- M, Intrinsic::experimental_vector_reduce_fadd, Tys);
+ M, Intrinsic::experimental_vector_reduce_v2_fadd, Tys);
return createCallHelper(Decl, Ops, this);
}
CallInst *IRBuilderBase::CreateFMulReduce(Value *Acc, Value *Src) {
Module *M = GetInsertBlock()->getParent()->getParent();
Value *Ops[] = {Acc, Src};
- Type *Tys[] = {Src->getType()->getVectorElementType(), Acc->getType(),
- Src->getType()};
+ Type *Tys[] = {Acc->getType(), Src->getType()};
auto Decl = Intrinsic::getDeclaration(
- M, Intrinsic::experimental_vector_reduce_fmul, Tys);
+ M, Intrinsic::experimental_vector_reduce_v2_fmul, Tys);
return createCallHelper(Decl, Ops, this);
}
@@ -409,8 +406,8 @@ CallInst *IRBuilderBase::CreateLifetimeStart(Value *Ptr, ConstantInt *Size) {
"lifetime.start requires the size to be an i64");
Value *Ops[] = { Size, Ptr };
Module *M = BB->getParent()->getParent();
- Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::lifetime_start,
- { Ptr->getType() });
+ Function *TheFn =
+ Intrinsic::getDeclaration(M, Intrinsic::lifetime_start, {Ptr->getType()});
return createCallHelper(TheFn, Ops, this);
}
@@ -425,8 +422,8 @@ CallInst *IRBuilderBase::CreateLifetimeEnd(Value *Ptr, ConstantInt *Size) {
"lifetime.end requires the size to be an i64");
Value *Ops[] = { Size, Ptr };
Module *M = BB->getParent()->getParent();
- Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::lifetime_end,
- { Ptr->getType() });
+ Function *TheFn =
+ Intrinsic::getDeclaration(M, Intrinsic::lifetime_end, {Ptr->getType()});
return createCallHelper(TheFn, Ops, this);
}
@@ -445,7 +442,7 @@ CallInst *IRBuilderBase::CreateInvariantStart(Value *Ptr, ConstantInt *Size) {
// Fill in the single overloaded type: memory object type.
Type *ObjectPtr[1] = {Ptr->getType()};
Module *M = BB->getParent()->getParent();
- Value *TheFn =
+ Function *TheFn =
Intrinsic::getDeclaration(M, Intrinsic::invariant_start, ObjectPtr);
return createCallHelper(TheFn, Ops, this);
}
@@ -456,7 +453,7 @@ CallInst *IRBuilderBase::CreateAssumption(Value *Cond) {
Value *Ops[] = { Cond };
Module *M = BB->getParent()->getParent();
- Value *FnAssume = Intrinsic::getDeclaration(M, Intrinsic::assume);
+ Function *FnAssume = Intrinsic::getDeclaration(M, Intrinsic::assume);
return createCallHelper(FnAssume, Ops, this);
}
@@ -508,7 +505,7 @@ CallInst *IRBuilderBase::CreateMaskedIntrinsic(Intrinsic::ID Id,
ArrayRef<Type *> OverloadedTypes,
const Twine &Name) {
Module *M = BB->getParent()->getParent();
- Value *TheFn = Intrinsic::getDeclaration(M, Id, OverloadedTypes);
+ Function *TheFn = Intrinsic::getDeclaration(M, Id, OverloadedTypes);
return createCallHelper(TheFn, Ops, this, Name);
}
@@ -709,7 +706,7 @@ CallInst *IRBuilderBase::CreateGCResult(Instruction *Statepoint,
Intrinsic::ID ID = Intrinsic::experimental_gc_result;
Module *M = BB->getParent()->getParent();
Type *Types[] = {ResultType};
- Value *FnGCResult = Intrinsic::getDeclaration(M, ID, Types);
+ Function *FnGCResult = Intrinsic::getDeclaration(M, ID, Types);
Value *Args[] = {Statepoint};
return createCallHelper(FnGCResult, Args, this, Name);
@@ -722,8 +719,8 @@ CallInst *IRBuilderBase::CreateGCRelocate(Instruction *Statepoint,
const Twine &Name) {
Module *M = BB->getParent()->getParent();
Type *Types[] = {ResultType};
- Value *FnGCRelocate =
- Intrinsic::getDeclaration(M, Intrinsic::experimental_gc_relocate, Types);
+ Function *FnGCRelocate =
+ Intrinsic::getDeclaration(M, Intrinsic::experimental_gc_relocate, Types);
Value *Args[] = {Statepoint,
getInt32(BaseOffset),
diff --git a/lib/IR/IRPrintingPasses.cpp b/lib/IR/IRPrintingPasses.cpp
index 43010220b9f3..35b06135a828 100644
--- a/lib/IR/IRPrintingPasses.cpp
+++ b/lib/IR/IRPrintingPasses.cpp
@@ -1,9 +1,8 @@
//===--- IRPrintingPasses.cpp - Module and Function printing passes -------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/IR/InlineAsm.cpp b/lib/IR/InlineAsm.cpp
index 4623f69bd9a3..99da7caaccf0 100644
--- a/lib/IR/InlineAsm.cpp
+++ b/lib/IR/InlineAsm.cpp
@@ -1,9 +1,8 @@
//===- InlineAsm.cpp - Implement the InlineAsm class ----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/IR/Instruction.cpp b/lib/IR/Instruction.cpp
index d861b5288592..ba5629d1662b 100644
--- a/lib/IR/Instruction.cpp
+++ b/lib/IR/Instruction.cpp
@@ -1,9 +1,8 @@
//===-- Instruction.cpp - Implement the Instruction class -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -139,8 +138,10 @@ void Instruction::dropPoisonGeneratingFlags() {
cast<GetElementPtrInst>(this)->setIsInBounds(false);
break;
}
+ // TODO: FastMathFlags!
}
+
bool Instruction::isExact() const {
return cast<PossiblyExactOperator>(this)->isExact();
}
@@ -302,6 +303,7 @@ const char *Instruction::getOpcodeName(unsigned OpCode) {
case CatchRet: return "catchret";
case CatchPad: return "catchpad";
case CatchSwitch: return "catchswitch";
+ case CallBr: return "callbr";
// Standard unary operators...
case FNeg: return "fneg";
@@ -406,6 +408,10 @@ static bool haveSameSpecialState(const Instruction *I1, const Instruction *I2,
return CI->getCallingConv() == cast<InvokeInst>(I2)->getCallingConv() &&
CI->getAttributes() == cast<InvokeInst>(I2)->getAttributes() &&
CI->hasIdenticalOperandBundleSchema(*cast<InvokeInst>(I2));
+ if (const CallBrInst *CI = dyn_cast<CallBrInst>(I1))
+ return CI->getCallingConv() == cast<CallBrInst>(I2)->getCallingConv() &&
+ CI->getAttributes() == cast<CallBrInst>(I2)->getAttributes() &&
+ CI->hasIdenticalOperandBundleSchema(*cast<CallBrInst>(I2));
if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(I1))
return IVI->getIndices() == cast<InsertValueInst>(I2)->getIndices();
if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(I1))
@@ -516,9 +522,9 @@ bool Instruction::mayReadFromMemory() const {
case Instruction::CatchRet:
return true;
case Instruction::Call:
- return !cast<CallInst>(this)->doesNotAccessMemory();
case Instruction::Invoke:
- return !cast<InvokeInst>(this)->doesNotAccessMemory();
+ case Instruction::CallBr:
+ return !cast<CallBase>(this)->doesNotAccessMemory();
case Instruction::Store:
return !cast<StoreInst>(this)->isUnordered();
}
@@ -536,9 +542,9 @@ bool Instruction::mayWriteToMemory() const {
case Instruction::CatchRet:
return true;
case Instruction::Call:
- return !cast<CallInst>(this)->onlyReadsMemory();
case Instruction::Invoke:
- return !cast<InvokeInst>(this)->onlyReadsMemory();
+ case Instruction::CallBr:
+ return !cast<CallBase>(this)->onlyReadsMemory();
case Instruction::Load:
return !cast<LoadInst>(this)->isUnordered();
}
@@ -671,6 +677,13 @@ void Instruction::setSuccessor(unsigned idx, BasicBlock *B) {
llvm_unreachable("not a terminator");
}
+void Instruction::replaceSuccessorWith(BasicBlock *OldBB, BasicBlock *NewBB) {
+ for (unsigned Idx = 0, NumSuccessors = Instruction::getNumSuccessors();
+ Idx != NumSuccessors; ++Idx)
+ if (getSuccessor(Idx) == OldBB)
+ setSuccessor(Idx, NewBB);
+}
+
Instruction *Instruction::cloneImpl() const {
llvm_unreachable("Subclass of Instruction failed to implement cloneImpl");
}
@@ -731,52 +744,9 @@ Instruction *Instruction::clone() const {
return New;
}
-void Instruction::updateProfWeight(uint64_t S, uint64_t T) {
- auto *ProfileData = getMetadata(LLVMContext::MD_prof);
- if (ProfileData == nullptr)
- return;
-
- auto *ProfDataName = dyn_cast<MDString>(ProfileData->getOperand(0));
- if (!ProfDataName || (!ProfDataName->getString().equals("branch_weights") &&
- !ProfDataName->getString().equals("VP")))
- return;
-
- MDBuilder MDB(getContext());
- SmallVector<Metadata *, 3> Vals;
- Vals.push_back(ProfileData->getOperand(0));
- APInt APS(128, S), APT(128, T);
- if (ProfDataName->getString().equals("branch_weights"))
- for (unsigned i = 1; i < ProfileData->getNumOperands(); i++) {
- // Using APInt::div may be expensive, but most cases should fit 64 bits.
- APInt Val(128,
- mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(i))
- ->getValue()
- .getZExtValue());
- Val *= APS;
- Vals.push_back(MDB.createConstant(
- ConstantInt::get(Type::getInt64Ty(getContext()),
- Val.udiv(APT).getLimitedValue())));
- }
- else if (ProfDataName->getString().equals("VP"))
- for (unsigned i = 1; i < ProfileData->getNumOperands(); i += 2) {
- // The first value is the key of the value profile, which will not change.
- Vals.push_back(ProfileData->getOperand(i));
- // Using APInt::div may be expensive, but most cases should fit 64 bits.
- APInt Val(128,
- mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(i + 1))
- ->getValue()
- .getZExtValue());
- Val *= APS;
- Vals.push_back(MDB.createConstant(
- ConstantInt::get(Type::getInt64Ty(getContext()),
- Val.udiv(APT).getLimitedValue())));
- }
- setMetadata(LLVMContext::MD_prof, MDNode::get(getContext(), Vals));
-}
-
void Instruction::setProfWeight(uint64_t W) {
- assert((isa<CallInst>(this) || isa<InvokeInst>(this)) &&
- "Can only set weights for call and invoke instrucitons");
+ assert(isa<CallBase>(this) &&
+ "Can only set weights for call like instructions");
SmallVector<uint32_t, 1> Weights;
Weights.push_back(W);
MDBuilder MDB(getContext());
diff --git a/lib/IR/Instructions.cpp b/lib/IR/Instructions.cpp
index 06b46724a87f..2e7cad103c12 100644
--- a/lib/IR/Instructions.cpp
+++ b/lib/IR/Instructions.cpp
@@ -1,9 +1,8 @@
//===- Instructions.cpp - Implement the LLVM instructions -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -29,6 +28,7 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
@@ -45,6 +45,12 @@
using namespace llvm;
+static cl::opt<bool> SwitchInstProfUpdateWrapperStrict(
+ "switch-inst-prof-update-wrapper-strict", cl::Hidden,
+ cl::desc("Assert that prof branch_weights metadata is valid when creating "
+ "an instance of SwitchInstProfUpdateWrapper"),
+ cl::init(false));
+
//===----------------------------------------------------------------------===//
// AllocaInst Class
//===----------------------------------------------------------------------===//
@@ -257,6 +263,11 @@ void LandingPadInst::addClause(Constant *Val) {
Function *CallBase::getCaller() { return getParent()->getParent(); }
+unsigned CallBase::getNumSubclassExtraOperandsDynamic() const {
+ assert(getOpcode() == Instruction::CallBr && "Unexpected opcode!");
+ return cast<CallBrInst>(this)->getNumIndirectDests() + 1;
+}
+
bool CallBase::isIndirectCall() const {
const Value *V = getCalledValue();
if (isa<Function>(V) || isa<Constant>(V))
@@ -267,6 +278,21 @@ bool CallBase::isIndirectCall() const {
return true;
}
+/// Tests if this call site must be tail call optimized. Only a CallInst can
+/// be tail call optimized.
+bool CallBase::isMustTailCall() const {
+ if (auto *CI = dyn_cast<CallInst>(this))
+ return CI->isMustTailCall();
+ return false;
+}
+
+/// Tests if this call site is marked as a tail call.
+bool CallBase::isTailCall() const {
+ if (auto *CI = dyn_cast<CallInst>(this))
+ return CI->isTailCall();
+ return false;
+}
+
Intrinsic::ID CallBase::getIntrinsicID() const {
if (auto *F = getCalledFunction())
return F->getIntrinsicID();
@@ -429,8 +455,8 @@ CallInst *CallInst::Create(CallInst *CI, ArrayRef<OperandBundleDef> OpB,
Instruction *InsertPt) {
std::vector<Value *> Args(CI->arg_begin(), CI->arg_end());
- auto *NewCI = CallInst::Create(CI->getCalledValue(), Args, OpB, CI->getName(),
- InsertPt);
+ auto *NewCI = CallInst::Create(CI->getFunctionType(), CI->getCalledValue(),
+ Args, OpB, CI->getName(), InsertPt);
NewCI->setTailCallKind(CI->getTailCallKind());
NewCI->setCallingConv(CI->getCallingConv());
NewCI->SubclassOptionalData = CI->SubclassOptionalData;
@@ -439,14 +465,57 @@ CallInst *CallInst::Create(CallInst *CI, ArrayRef<OperandBundleDef> OpB,
return NewCI;
}
+// Update profile weight for call instruction by scaling it using the ratio
+// of S/T. The meaning of "branch_weights" meta data for call instruction is
+// transfered to represent call count.
+void CallInst::updateProfWeight(uint64_t S, uint64_t T) {
+ auto *ProfileData = getMetadata(LLVMContext::MD_prof);
+ if (ProfileData == nullptr)
+ return;
+ auto *ProfDataName = dyn_cast<MDString>(ProfileData->getOperand(0));
+ if (!ProfDataName || (!ProfDataName->getString().equals("branch_weights") &&
+ !ProfDataName->getString().equals("VP")))
+ return;
+ if (T == 0) {
+ LLVM_DEBUG(dbgs() << "Attempting to update profile weights will result in "
+ "div by 0. Ignoring. Likely the function "
+ << getParent()->getParent()->getName()
+ << " has 0 entry count, and contains call instructions "
+ "with non-zero prof info.");
+ return;
+ }
-
-
-
-
-
+ MDBuilder MDB(getContext());
+ SmallVector<Metadata *, 3> Vals;
+ Vals.push_back(ProfileData->getOperand(0));
+ APInt APS(128, S), APT(128, T);
+ if (ProfDataName->getString().equals("branch_weights") &&
+ ProfileData->getNumOperands() > 0) {
+ // Using APInt::div may be expensive, but most cases should fit 64 bits.
+ APInt Val(128, mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(1))
+ ->getValue()
+ .getZExtValue());
+ Val *= APS;
+ Vals.push_back(MDB.createConstant(ConstantInt::get(
+ Type::getInt64Ty(getContext()), Val.udiv(APT).getLimitedValue())));
+ } else if (ProfDataName->getString().equals("VP"))
+ for (unsigned i = 1; i < ProfileData->getNumOperands(); i += 2) {
+ // The first value is the key of the value profile, which will not change.
+ Vals.push_back(ProfileData->getOperand(i));
+ // Using APInt::div may be expensive, but most cases should fit 64 bits.
+ APInt Val(128,
+ mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(i + 1))
+ ->getValue()
+ .getZExtValue());
+ Val *= APS;
+ Vals.push_back(MDB.createConstant(
+ ConstantInt::get(Type::getInt64Ty(getContext()),
+ Val.udiv(APT).getLimitedValue())));
+ }
+ setMetadata(LLVMContext::MD_prof, MDNode::get(getContext(), Vals));
+}
/// IsConstantOne - Return true only if val is constant int 1
static bool IsConstantOne(Value *val) {
@@ -503,7 +572,7 @@ static Instruction *createMalloc(Instruction *InsertBefore,
BasicBlock *BB = InsertBefore ? InsertBefore->getParent() : InsertAtEnd;
Module *M = BB->getParent()->getParent();
Type *BPTy = Type::getInt8PtrTy(BB->getContext());
- Value *MallocFunc = MallocF;
+ FunctionCallee MallocFunc = MallocF;
if (!MallocFunc)
// prototype malloc as "void *malloc(size_t)"
MallocFunc = M->getOrInsertFunction("malloc", BPTy, IntPtrTy);
@@ -527,7 +596,7 @@ static Instruction *createMalloc(Instruction *InsertBefore,
}
}
MCall->setTailCall();
- if (Function *F = dyn_cast<Function>(MallocFunc)) {
+ if (Function *F = dyn_cast<Function>(MallocFunc.getCallee())) {
MCall->setCallingConv(F->getCallingConv());
if (!F->returnDoesNotAlias())
F->setReturnDoesNotAlias();
@@ -600,7 +669,7 @@ static Instruction *createFree(Value *Source,
Type *VoidTy = Type::getVoidTy(M->getContext());
Type *IntPtrTy = Type::getInt8PtrTy(M->getContext());
// prototype free as "void free(void*)"
- Value *FreeFunc = M->getOrInsertFunction("free", VoidTy, IntPtrTy);
+ FunctionCallee FreeFunc = M->getOrInsertFunction("free", VoidTy, IntPtrTy);
CallInst *Result = nullptr;
Value *PtrCast = Source;
if (InsertBefore) {
@@ -613,7 +682,7 @@ static Instruction *createFree(Value *Source,
Result = CallInst::Create(FreeFunc, PtrCast, Bundles, "");
}
Result->setTailCall();
- if (Function *F = dyn_cast<Function>(FreeFunc))
+ if (Function *F = dyn_cast<Function>(FreeFunc.getCallee()))
Result->setCallingConv(F->getCallingConv());
return Result;
@@ -697,9 +766,9 @@ InvokeInst *InvokeInst::Create(InvokeInst *II, ArrayRef<OperandBundleDef> OpB,
Instruction *InsertPt) {
std::vector<Value *> Args(II->arg_begin(), II->arg_end());
- auto *NewII = InvokeInst::Create(II->getCalledValue(), II->getNormalDest(),
- II->getUnwindDest(), Args, OpB,
- II->getName(), InsertPt);
+ auto *NewII = InvokeInst::Create(II->getFunctionType(), II->getCalledValue(),
+ II->getNormalDest(), II->getUnwindDest(),
+ Args, OpB, II->getName(), InsertPt);
NewII->setCallingConv(II->getCallingConv());
NewII->SubclassOptionalData = II->SubclassOptionalData;
NewII->setAttributes(II->getAttributes());
@@ -713,6 +782,76 @@ LandingPadInst *InvokeInst::getLandingPadInst() const {
}
//===----------------------------------------------------------------------===//
+// CallBrInst Implementation
+//===----------------------------------------------------------------------===//
+
+void CallBrInst::init(FunctionType *FTy, Value *Fn, BasicBlock *Fallthrough,
+ ArrayRef<BasicBlock *> IndirectDests,
+ ArrayRef<Value *> Args,
+ ArrayRef<OperandBundleDef> Bundles,
+ const Twine &NameStr) {
+ this->FTy = FTy;
+
+ assert((int)getNumOperands() ==
+ ComputeNumOperands(Args.size(), IndirectDests.size(),
+ CountBundleInputs(Bundles)) &&
+ "NumOperands not set up?");
+ NumIndirectDests = IndirectDests.size();
+ setDefaultDest(Fallthrough);
+ for (unsigned i = 0; i != NumIndirectDests; ++i)
+ setIndirectDest(i, IndirectDests[i]);
+ setCalledOperand(Fn);
+
+#ifndef NDEBUG
+ assert(((Args.size() == FTy->getNumParams()) ||
+ (FTy->isVarArg() && Args.size() > FTy->getNumParams())) &&
+ "Calling a function with bad signature");
+
+ for (unsigned i = 0, e = Args.size(); i != e; i++)
+ assert((i >= FTy->getNumParams() ||
+ FTy->getParamType(i) == Args[i]->getType()) &&
+ "Calling a function with a bad signature!");
+#endif
+
+ std::copy(Args.begin(), Args.end(), op_begin());
+
+ auto It = populateBundleOperandInfos(Bundles, Args.size());
+ (void)It;
+ assert(It + 2 + IndirectDests.size() == op_end() && "Should add up!");
+
+ setName(NameStr);
+}
+
+CallBrInst::CallBrInst(const CallBrInst &CBI)
+ : CallBase(CBI.Attrs, CBI.FTy, CBI.getType(), Instruction::CallBr,
+ OperandTraits<CallBase>::op_end(this) - CBI.getNumOperands(),
+ CBI.getNumOperands()) {
+ setCallingConv(CBI.getCallingConv());
+ std::copy(CBI.op_begin(), CBI.op_end(), op_begin());
+ std::copy(CBI.bundle_op_info_begin(), CBI.bundle_op_info_end(),
+ bundle_op_info_begin());
+ SubclassOptionalData = CBI.SubclassOptionalData;
+ NumIndirectDests = CBI.NumIndirectDests;
+}
+
+CallBrInst *CallBrInst::Create(CallBrInst *CBI, ArrayRef<OperandBundleDef> OpB,
+ Instruction *InsertPt) {
+ std::vector<Value *> Args(CBI->arg_begin(), CBI->arg_end());
+
+ auto *NewCBI = CallBrInst::Create(CBI->getFunctionType(),
+ CBI->getCalledValue(),
+ CBI->getDefaultDest(),
+ CBI->getIndirectDests(),
+ Args, OpB, CBI->getName(), InsertPt);
+ NewCBI->setCallingConv(CBI->getCallingConv());
+ NewCBI->SubclassOptionalData = CBI->SubclassOptionalData;
+ NewCBI->setAttributes(CBI->getAttributes());
+ NewCBI->setDebugLoc(CBI->getDebugLoc());
+ NewCBI->NumIndirectDests = CBI->NumIndirectDests;
+ return NewCBI;
+}
+
+//===----------------------------------------------------------------------===//
// ReturnInst Implementation
//===----------------------------------------------------------------------===//
@@ -1408,6 +1547,10 @@ StringRef AtomicRMWInst::getOperationName(BinOp Op) {
return "umax";
case AtomicRMWInst::UMin:
return "umin";
+ case AtomicRMWInst::FAdd:
+ return "fadd";
+ case AtomicRMWInst::FSub:
+ return "fsub";
case AtomicRMWInst::BAD_BINOP:
return "<invalid operation>";
}
@@ -1666,6 +1809,25 @@ ShuffleVectorInst::ShuffleVectorInst(Value *V1, Value *V2, Value *Mask,
setName(Name);
}
+void ShuffleVectorInst::commute() {
+ int NumOpElts = Op<0>()->getType()->getVectorNumElements();
+ int NumMaskElts = getMask()->getType()->getVectorNumElements();
+ SmallVector<Constant*, 16> NewMask(NumMaskElts);
+ Type *Int32Ty = Type::getInt32Ty(getContext());
+ for (int i = 0; i != NumMaskElts; ++i) {
+ int MaskElt = getMaskValue(i);
+ if (MaskElt == -1) {
+ NewMask[i] = UndefValue::get(Int32Ty);
+ continue;
+ }
+ assert(MaskElt >= 0 && MaskElt < 2 * NumOpElts && "Out-of-range mask");
+ MaskElt = (MaskElt < NumOpElts) ? MaskElt + NumOpElts : MaskElt - NumOpElts;
+ NewMask[i] = ConstantInt::get(Int32Ty, MaskElt);
+ }
+ Op<2>() = ConstantVector::get(NewMask);
+ Op<0>().swap(Op<1>());
+}
+
bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2,
const Value *Mask) {
// V1 and V2 must be vectors of the same type.
@@ -3714,6 +3876,141 @@ void SwitchInst::growOperands() {
growHungoffUses(ReservedSpace);
}
+MDNode *
+SwitchInstProfUpdateWrapper::getProfBranchWeightsMD(const SwitchInst &SI) {
+ if (MDNode *ProfileData = SI.getMetadata(LLVMContext::MD_prof))
+ if (auto *MDName = dyn_cast<MDString>(ProfileData->getOperand(0)))
+ if (MDName->getString() == "branch_weights")
+ return ProfileData;
+ return nullptr;
+}
+
+MDNode *SwitchInstProfUpdateWrapper::buildProfBranchWeightsMD() {
+ assert(State == Changed && "called only if metadata has changed");
+
+ if (!Weights)
+ return nullptr;
+
+ assert(SI.getNumSuccessors() == Weights->size() &&
+ "num of prof branch_weights must accord with num of successors");
+
+ bool AllZeroes =
+ all_of(Weights.getValue(), [](uint32_t W) { return W == 0; });
+
+ if (AllZeroes || Weights.getValue().size() < 2)
+ return nullptr;
+
+ return MDBuilder(SI.getParent()->getContext()).createBranchWeights(*Weights);
+}
+
+void SwitchInstProfUpdateWrapper::init() {
+ MDNode *ProfileData = getProfBranchWeightsMD(SI);
+ if (!ProfileData) {
+ State = Initialized;
+ return;
+ }
+
+ if (ProfileData->getNumOperands() != SI.getNumSuccessors() + 1) {
+ State = Invalid;
+ if (SwitchInstProfUpdateWrapperStrict)
+ llvm_unreachable("number of prof branch_weights metadata operands does "
+ "not correspond to number of succesors");
+ return;
+ }
+
+ SmallVector<uint32_t, 8> Weights;
+ for (unsigned CI = 1, CE = SI.getNumSuccessors(); CI <= CE; ++CI) {
+ ConstantInt *C = mdconst::extract<ConstantInt>(ProfileData->getOperand(CI));
+ uint32_t CW = C->getValue().getZExtValue();
+ Weights.push_back(CW);
+ }
+ State = Initialized;
+ this->Weights = std::move(Weights);
+}
+
+SwitchInst::CaseIt
+SwitchInstProfUpdateWrapper::removeCase(SwitchInst::CaseIt I) {
+ if (Weights) {
+ assert(SI.getNumSuccessors() == Weights->size() &&
+ "num of prof branch_weights must accord with num of successors");
+ State = Changed;
+ // Copy the last case to the place of the removed one and shrink.
+ // This is tightly coupled with the way SwitchInst::removeCase() removes
+ // the cases in SwitchInst::removeCase(CaseIt).
+ Weights.getValue()[I->getCaseIndex() + 1] = Weights.getValue().back();
+ Weights.getValue().pop_back();
+ }
+ return SI.removeCase(I);
+}
+
+void SwitchInstProfUpdateWrapper::addCase(
+ ConstantInt *OnVal, BasicBlock *Dest,
+ SwitchInstProfUpdateWrapper::CaseWeightOpt W) {
+ SI.addCase(OnVal, Dest);
+
+ if (State == Invalid)
+ return;
+
+ if (!Weights && W && *W) {
+ State = Changed;
+ Weights = SmallVector<uint32_t, 8>(SI.getNumSuccessors(), 0);
+ Weights.getValue()[SI.getNumSuccessors() - 1] = *W;
+ } else if (Weights) {
+ State = Changed;
+ Weights.getValue().push_back(W ? *W : 0);
+ }
+ if (Weights)
+ assert(SI.getNumSuccessors() == Weights->size() &&
+ "num of prof branch_weights must accord with num of successors");
+}
+
+SymbolTableList<Instruction>::iterator
+SwitchInstProfUpdateWrapper::eraseFromParent() {
+ // Instruction is erased. Mark as unchanged to not touch it in the destructor.
+ if (State != Invalid) {
+ State = Initialized;
+ if (Weights)
+ Weights->resize(0);
+ }
+ return SI.eraseFromParent();
+}
+
+SwitchInstProfUpdateWrapper::CaseWeightOpt
+SwitchInstProfUpdateWrapper::getSuccessorWeight(unsigned idx) {
+ if (!Weights)
+ return None;
+ return Weights.getValue()[idx];
+}
+
+void SwitchInstProfUpdateWrapper::setSuccessorWeight(
+ unsigned idx, SwitchInstProfUpdateWrapper::CaseWeightOpt W) {
+ if (!W || State == Invalid)
+ return;
+
+ if (!Weights && *W)
+ Weights = SmallVector<uint32_t, 8>(SI.getNumSuccessors(), 0);
+
+ if (Weights) {
+ auto &OldW = Weights.getValue()[idx];
+ if (*W != OldW) {
+ State = Changed;
+ OldW = *W;
+ }
+ }
+}
+
+SwitchInstProfUpdateWrapper::CaseWeightOpt
+SwitchInstProfUpdateWrapper::getSuccessorWeight(const SwitchInst &SI,
+ unsigned idx) {
+ if (MDNode *ProfileData = getProfBranchWeightsMD(SI))
+ if (ProfileData->getNumOperands() == SI.getNumSuccessors() + 1)
+ return mdconst::extract<ConstantInt>(ProfileData->getOperand(idx + 1))
+ ->getValue()
+ .getZExtValue();
+
+ return None;
+}
+
//===----------------------------------------------------------------------===//
// IndirectBrInst Implementation
//===----------------------------------------------------------------------===//
@@ -3978,6 +4275,14 @@ InvokeInst *InvokeInst::cloneImpl() const {
return new(getNumOperands()) InvokeInst(*this);
}
+CallBrInst *CallBrInst::cloneImpl() const {
+ if (hasOperandBundles()) {
+ unsigned DescriptorBytes = getNumOperandBundles() * sizeof(BundleOpInfo);
+ return new (getNumOperands(), DescriptorBytes) CallBrInst(*this);
+ }
+ return new (getNumOperands()) CallBrInst(*this);
+}
+
ResumeInst *ResumeInst::cloneImpl() const { return new (1) ResumeInst(*this); }
CleanupReturnInst *CleanupReturnInst::cloneImpl() const {
diff --git a/lib/IR/IntrinsicInst.cpp b/lib/IR/IntrinsicInst.cpp
index df3a38ac147f..7a042326f67f 100644
--- a/lib/IR/IntrinsicInst.cpp
+++ b/lib/IR/IntrinsicInst.cpp
@@ -1,9 +1,8 @@
//===-- InstrinsicInst.cpp - Intrinsic Instruction Wrappers ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -22,6 +21,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Operator.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfoMetadata.h"
@@ -103,45 +103,94 @@ Value *InstrProfIncrementInst::getStep() const {
return ConstantInt::get(Type::getInt64Ty(Context), 1);
}
-ConstrainedFPIntrinsic::RoundingMode
+Optional<ConstrainedFPIntrinsic::RoundingMode>
ConstrainedFPIntrinsic::getRoundingMode() const {
unsigned NumOperands = getNumArgOperands();
Metadata *MD =
dyn_cast<MetadataAsValue>(getArgOperand(NumOperands - 2))->getMetadata();
if (!MD || !isa<MDString>(MD))
- return rmInvalid;
- StringRef RoundingArg = cast<MDString>(MD)->getString();
+ return None;
+ return StrToRoundingMode(cast<MDString>(MD)->getString());
+}
+Optional<ConstrainedFPIntrinsic::RoundingMode>
+ConstrainedFPIntrinsic::StrToRoundingMode(StringRef RoundingArg) {
// For dynamic rounding mode, we use round to nearest but we will set the
// 'exact' SDNodeFlag so that the value will not be rounded.
- return StringSwitch<RoundingMode>(RoundingArg)
+ return StringSwitch<Optional<RoundingMode>>(RoundingArg)
.Case("round.dynamic", rmDynamic)
.Case("round.tonearest", rmToNearest)
.Case("round.downward", rmDownward)
.Case("round.upward", rmUpward)
.Case("round.towardzero", rmTowardZero)
- .Default(rmInvalid);
+ .Default(None);
+}
+
+Optional<StringRef>
+ConstrainedFPIntrinsic::RoundingModeToStr(RoundingMode UseRounding) {
+ Optional<StringRef> RoundingStr = None;
+ switch (UseRounding) {
+ case ConstrainedFPIntrinsic::rmDynamic:
+ RoundingStr = "round.dynamic";
+ break;
+ case ConstrainedFPIntrinsic::rmToNearest:
+ RoundingStr = "round.tonearest";
+ break;
+ case ConstrainedFPIntrinsic::rmDownward:
+ RoundingStr = "round.downward";
+ break;
+ case ConstrainedFPIntrinsic::rmUpward:
+ RoundingStr = "round.upward";
+ break;
+ case ConstrainedFPIntrinsic::rmTowardZero:
+ RoundingStr = "round.tozero";
+ break;
+ }
+ return RoundingStr;
}
-ConstrainedFPIntrinsic::ExceptionBehavior
+Optional<ConstrainedFPIntrinsic::ExceptionBehavior>
ConstrainedFPIntrinsic::getExceptionBehavior() const {
unsigned NumOperands = getNumArgOperands();
Metadata *MD =
dyn_cast<MetadataAsValue>(getArgOperand(NumOperands - 1))->getMetadata();
if (!MD || !isa<MDString>(MD))
- return ebInvalid;
- StringRef ExceptionArg = cast<MDString>(MD)->getString();
- return StringSwitch<ExceptionBehavior>(ExceptionArg)
+ return None;
+ return StrToExceptionBehavior(cast<MDString>(MD)->getString());
+}
+
+Optional<ConstrainedFPIntrinsic::ExceptionBehavior>
+ConstrainedFPIntrinsic::StrToExceptionBehavior(StringRef ExceptionArg) {
+ return StringSwitch<Optional<ExceptionBehavior>>(ExceptionArg)
.Case("fpexcept.ignore", ebIgnore)
.Case("fpexcept.maytrap", ebMayTrap)
.Case("fpexcept.strict", ebStrict)
- .Default(ebInvalid);
+ .Default(None);
+}
+
+Optional<StringRef>
+ConstrainedFPIntrinsic::ExceptionBehaviorToStr(ExceptionBehavior UseExcept) {
+ Optional<StringRef> ExceptStr = None;
+ switch (UseExcept) {
+ case ConstrainedFPIntrinsic::ebStrict:
+ ExceptStr = "fpexcept.strict";
+ break;
+ case ConstrainedFPIntrinsic::ebIgnore:
+ ExceptStr = "fpexcept.ignore";
+ break;
+ case ConstrainedFPIntrinsic::ebMayTrap:
+ ExceptStr = "fpexcept.maytrap";
+ break;
+ }
+ return ExceptStr;
}
bool ConstrainedFPIntrinsic::isUnaryOp() const {
switch (getIntrinsicID()) {
default:
return false;
+ case Intrinsic::experimental_constrained_fptrunc:
+ case Intrinsic::experimental_constrained_fpext:
case Intrinsic::experimental_constrained_sqrt:
case Intrinsic::experimental_constrained_sin:
case Intrinsic::experimental_constrained_cos:
@@ -169,3 +218,42 @@ bool ConstrainedFPIntrinsic::isTernaryOp() const {
}
}
+Instruction::BinaryOps BinaryOpIntrinsic::getBinaryOp() const {
+ switch (getIntrinsicID()) {
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::sadd_with_overflow:
+ case Intrinsic::uadd_sat:
+ case Intrinsic::sadd_sat:
+ return Instruction::Add;
+ case Intrinsic::usub_with_overflow:
+ case Intrinsic::ssub_with_overflow:
+ case Intrinsic::usub_sat:
+ case Intrinsic::ssub_sat:
+ return Instruction::Sub;
+ case Intrinsic::umul_with_overflow:
+ case Intrinsic::smul_with_overflow:
+ return Instruction::Mul;
+ default:
+ llvm_unreachable("Invalid intrinsic");
+ }
+}
+
+bool BinaryOpIntrinsic::isSigned() const {
+ switch (getIntrinsicID()) {
+ case Intrinsic::sadd_with_overflow:
+ case Intrinsic::ssub_with_overflow:
+ case Intrinsic::smul_with_overflow:
+ case Intrinsic::sadd_sat:
+ case Intrinsic::ssub_sat:
+ return true;
+ default:
+ return false;
+ }
+}
+
+unsigned BinaryOpIntrinsic::getNoWrapKind() const {
+ if (isSigned())
+ return OverflowingBinaryOperator::NoSignedWrap;
+ else
+ return OverflowingBinaryOperator::NoUnsignedWrap;
+}
diff --git a/lib/IR/LLVMContext.cpp b/lib/IR/LLVMContext.cpp
index 944d8265151d..e1cdf6b539db 100644
--- a/lib/IR/LLVMContext.cpp
+++ b/lib/IR/LLVMContext.cpp
@@ -1,9 +1,8 @@
//===-- LLVMContext.cpp - Implement LLVMContext ---------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -22,6 +21,7 @@
#include "llvm/IR/DiagnosticPrinter.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/RemarkStreamer.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
@@ -62,6 +62,8 @@ LLVMContext::LLVMContext() : pImpl(new LLVMContextImpl(*this)) {
{MD_callees, "callees"},
{MD_irr_loop, "irr_loop"},
{MD_access_group, "llvm.access.group"},
+ {MD_callback, "callback"},
+ {MD_preserve_access_index, "llvm.preserve.access.index"},
};
for (auto &MDKind : MDKinds) {
@@ -160,12 +162,15 @@ uint64_t LLVMContext::getDiagnosticsHotnessThreshold() const {
return pImpl->DiagnosticsHotnessThreshold;
}
-yaml::Output *LLVMContext::getDiagnosticsOutputFile() {
- return pImpl->DiagnosticsOutputFile.get();
+RemarkStreamer *LLVMContext::getRemarkStreamer() {
+ return pImpl->RemarkDiagStreamer.get();
}
-
-void LLVMContext::setDiagnosticsOutputFile(std::unique_ptr<yaml::Output> F) {
- pImpl->DiagnosticsOutputFile = std::move(F);
+const RemarkStreamer *LLVMContext::getRemarkStreamer() const {
+ return const_cast<LLVMContext *>(this)->getRemarkStreamer();
+}
+void LLVMContext::setRemarkStreamer(
+ std::unique_ptr<RemarkStreamer> RemarkStreamer) {
+ pImpl->RemarkDiagStreamer = std::move(RemarkStreamer);
}
DiagnosticHandler::DiagnosticHandlerTy
@@ -228,14 +233,10 @@ LLVMContext::getDiagnosticMessagePrefix(DiagnosticSeverity Severity) {
}
void LLVMContext::diagnose(const DiagnosticInfo &DI) {
- if (auto *OptDiagBase = dyn_cast<DiagnosticInfoOptimizationBase>(&DI)) {
- yaml::Output *Out = getDiagnosticsOutputFile();
- if (Out) {
- // For remarks the << operator takes a reference to a pointer.
- auto *P = const_cast<DiagnosticInfoOptimizationBase *>(OptDiagBase);
- *Out << P;
- }
- }
+ if (auto *OptDiagBase = dyn_cast<DiagnosticInfoOptimizationBase>(&DI))
+ if (RemarkStreamer *RS = getRemarkStreamer())
+ RS->emit(*OptDiagBase);
+
// If there is a report handler, use it.
if (pImpl->DiagHandler &&
(!pImpl->RespectDiagnosticFilters || isDiagnosticEnabled(DI)) &&
diff --git a/lib/IR/LLVMContextImpl.cpp b/lib/IR/LLVMContextImpl.cpp
index 3c34ca55c224..c6ab2c6f213a 100644
--- a/lib/IR/LLVMContextImpl.cpp
+++ b/lib/IR/LLVMContextImpl.cpp
@@ -1,9 +1,8 @@
//===- LLVMContextImpl.cpp - Implement LLVMContextImpl --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/IR/LLVMContextImpl.h b/lib/IR/LLVMContextImpl.h
index 2d120869860a..78cf707e0e74 100644
--- a/lib/IR/LLVMContextImpl.h
+++ b/lib/IR/LLVMContextImpl.h
@@ -1,9 +1,8 @@
//===- LLVMContextImpl.h - The LLVMContextImpl opaque class -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -31,16 +30,17 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/StringSet.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
+#include "llvm/IR/RemarkStreamer.h"
#include "llvm/IR/TrackingMDRef.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/StringSaver.h"
#include "llvm/Support/YAMLTraits.h"
#include <algorithm>
#include <cassert>
@@ -789,6 +789,31 @@ template <> struct MDNodeKeyImpl<DINamespace> {
}
};
+template <> struct MDNodeKeyImpl<DICommonBlock> {
+ Metadata *Scope;
+ Metadata *Decl;
+ MDString *Name;
+ Metadata *File;
+ unsigned LineNo;
+
+ MDNodeKeyImpl(Metadata *Scope, Metadata *Decl, MDString *Name,
+ Metadata *File, unsigned LineNo)
+ : Scope(Scope), Decl(Decl), Name(Name), File(File), LineNo(LineNo) {}
+ MDNodeKeyImpl(const DICommonBlock *N)
+ : Scope(N->getRawScope()), Decl(N->getRawDecl()), Name(N->getRawName()),
+ File(N->getRawFile()), LineNo(N->getLineNo()) {}
+
+ bool isKeyOf(const DICommonBlock *RHS) const {
+ return Scope == RHS->getRawScope() && Decl == RHS->getRawDecl() &&
+ Name == RHS->getRawName() && File == RHS->getRawFile() &&
+ LineNo == RHS->getLineNo();
+ }
+
+ unsigned getHashValue() const {
+ return hash_combine(Scope, Decl, Name, File, LineNo);
+ }
+};
+
template <> struct MDNodeKeyImpl<DIModule> {
Metadata *Scope;
MDString *Name;
@@ -1227,7 +1252,7 @@ public:
bool RespectDiagnosticFilters = false;
bool DiagnosticsHotnessRequested = false;
uint64_t DiagnosticsHotnessThreshold = 0;
- std::unique_ptr<yaml::Output> DiagnosticsOutputFile;
+ std::unique_ptr<RemarkStreamer> RemarkDiagStreamer;
LLVMContext::YieldCallbackTy YieldCallback = nullptr;
void *YieldOpaqueHandle = nullptr;
@@ -1296,9 +1321,8 @@ public:
Type X86_FP80Ty, FP128Ty, PPC_FP128Ty, X86_MMXTy;
IntegerType Int1Ty, Int8Ty, Int16Ty, Int32Ty, Int64Ty, Int128Ty;
- /// TypeAllocator - All dynamically allocated types are allocated from this.
- /// They live forever until the context is torn down.
- BumpPtrAllocator TypeAllocator;
+ BumpPtrAllocator Alloc;
+ UniqueStringSaver Saver{Alloc};
DenseMap<unsigned, IntegerType*> IntegerTypes;
@@ -1310,7 +1334,7 @@ public:
unsigned NamedStructTypesUniqueID = 0;
DenseMap<std::pair<Type *, uint64_t>, ArrayType*> ArrayTypes;
- DenseMap<std::pair<Type *, unsigned>, VectorType*> VectorTypes;
+ DenseMap<std::pair<Type *, ElementCount>, VectorType*> VectorTypes;
DenseMap<Type*, PointerType*> PointerTypes; // Pointers in AddrSpace = 0
DenseMap<std::pair<Type*, unsigned>, PointerType*> ASPointerTypes;
@@ -1332,8 +1356,8 @@ public:
/// Collection of per-GlobalObject sections used in this context.
DenseMap<const GlobalObject *, StringRef> GlobalObjectSections;
- /// Stable collection of section strings.
- StringSet<> SectionStrings;
+ /// Collection of per-GlobalValue partitions used in this context.
+ DenseMap<const GlobalValue *, StringRef> GlobalValuePartitions;
/// DiscriminatorTable - This table maps file:line locations to an
/// integer representing the next DWARF path discriminator to assign to
diff --git a/lib/IR/LegacyPassManager.cpp b/lib/IR/LegacyPassManager.cpp
index 01d14f17bba5..c575d6e782b9 100644
--- a/lib/IR/LegacyPassManager.cpp
+++ b/lib/IR/LegacyPassManager.cpp
@@ -1,9 +1,8 @@
//===- LegacyPassManager.cpp - LLVM Pass Infrastructure Implementation ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -28,6 +27,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/Mutex.h"
+#include "llvm/Support/TimeProfiler.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
@@ -310,7 +310,7 @@ void PassManagerPrettyStackEntry::print(raw_ostream &OS) const {
OS << "value";
OS << " '";
- V->printAsOperand(OS, /*PrintTy=*/false, M);
+ V->printAsOperand(OS, /*PrintType=*/false, M);
OS << "'\n";
}
@@ -1629,10 +1629,14 @@ bool FPPassManager::runOnFunction(Function &F) {
FunctionSize = F.getInstructionCount();
}
+ llvm::TimeTraceScope FunctionScope("OptFunction", F.getName());
+
for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
FunctionPass *FP = getContainedPass(Index);
bool LocalChanged = false;
+ llvm::TimeTraceScope PassScope("RunPass", FP->getPassName());
+
dumpPassInfo(FP, EXECUTION_MSG, ON_FUNCTION_MSG, F.getName());
dumpRequiredSet(FP);
@@ -1669,12 +1673,14 @@ bool FPPassManager::runOnFunction(Function &F) {
recordAvailableAnalysis(FP);
removeDeadPasses(FP, F.getName(), ON_FUNCTION_MSG);
}
+
return Changed;
}
bool FPPassManager::runOnModule(Module &M) {
bool Changed = false;
+ llvm::TimeTraceScope TimeScope("OptModule", M.getName());
for (Function &F : M)
Changed |= runOnFunction(F);
@@ -1707,6 +1713,8 @@ bool FPPassManager::doFinalization(Module &M) {
/// the module, and if so, return true.
bool
MPPassManager::runOnModule(Module &M) {
+ llvm::TimeTraceScope TimeScope("OptModule", M.getName());
+
bool Changed = false;
// Initialize on-the-fly passes
@@ -1719,14 +1727,12 @@ MPPassManager::runOnModule(Module &M) {
for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index)
Changed |= getContainedPass(Index)->doInitialization(M);
- unsigned InstrCount, ModuleCount = 0;
+ unsigned InstrCount;
StringMap<std::pair<unsigned, unsigned>> FunctionToInstrCount;
bool EmitICRemark = M.shouldEmitInstrCountChangedRemark();
// Collect the initial size of the module.
- if (EmitICRemark) {
+ if (EmitICRemark)
InstrCount = initSizeRemarkInfo(M, FunctionToInstrCount);
- ModuleCount = InstrCount;
- }
for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
ModulePass *MP = getContainedPass(Index);
@@ -1744,7 +1750,7 @@ MPPassManager::runOnModule(Module &M) {
LocalChanged |= MP->runOnModule(M);
if (EmitICRemark) {
// Update the size of the module.
- ModuleCount = M.getInstructionCount();
+ unsigned ModuleCount = M.getInstructionCount();
if (ModuleCount != InstrCount) {
int64_t Delta = static_cast<int64_t>(ModuleCount) -
static_cast<int64_t>(InstrCount);
diff --git a/lib/IR/MDBuilder.cpp b/lib/IR/MDBuilder.cpp
index 3fa541f1b535..14bcb3a29b07 100644
--- a/lib/IR/MDBuilder.cpp
+++ b/lib/IR/MDBuilder.cpp
@@ -1,9 +1,8 @@
//===---- llvm/MDBuilder.cpp - Builder for LLVM metadata ------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -69,9 +68,7 @@ MDNode *MDBuilder::createFunctionEntryCount(
Ops.push_back(createConstant(ConstantInt::get(Int64Ty, Count)));
if (Imports) {
SmallVector<GlobalValue::GUID, 2> OrderID(Imports->begin(), Imports->end());
- std::stable_sort(OrderID.begin(), OrderID.end(),
- [] (GlobalValue::GUID A, GlobalValue::GUID B) {
- return A < B;});
+ llvm::stable_sort(OrderID);
for (auto ID : OrderID)
Ops.push_back(createConstant(ConstantInt::get(Int64Ty, ID)));
}
@@ -107,6 +104,52 @@ MDNode *MDBuilder::createCallees(ArrayRef<Function *> Callees) {
return MDNode::get(Context, Ops);
}
+MDNode *MDBuilder::createCallbackEncoding(unsigned CalleeArgNo,
+ ArrayRef<int> Arguments,
+ bool VarArgArePassed) {
+ SmallVector<Metadata *, 4> Ops;
+
+ Type *Int64 = Type::getInt64Ty(Context);
+ Ops.push_back(createConstant(ConstantInt::get(Int64, CalleeArgNo)));
+
+ for (int ArgNo : Arguments)
+ Ops.push_back(createConstant(ConstantInt::get(Int64, ArgNo, true)));
+
+ Type *Int1 = Type::getInt1Ty(Context);
+ Ops.push_back(createConstant(ConstantInt::get(Int1, VarArgArePassed)));
+
+ return MDNode::get(Context, Ops);
+}
+
+MDNode *MDBuilder::mergeCallbackEncodings(MDNode *ExistingCallbacks,
+ MDNode *NewCB) {
+ if (!ExistingCallbacks)
+ return MDNode::get(Context, {NewCB});
+
+ auto *NewCBCalleeIdxAsCM = cast<ConstantAsMetadata>(NewCB->getOperand(0));
+ uint64_t NewCBCalleeIdx =
+ cast<ConstantInt>(NewCBCalleeIdxAsCM->getValue())->getZExtValue();
+ (void)NewCBCalleeIdx;
+
+ SmallVector<Metadata *, 4> Ops;
+ unsigned NumExistingOps = ExistingCallbacks->getNumOperands();
+ Ops.resize(NumExistingOps + 1);
+
+ for (unsigned u = 0; u < NumExistingOps; u++) {
+ Ops[u] = ExistingCallbacks->getOperand(u);
+
+ auto *OldCBCalleeIdxAsCM = cast<ConstantAsMetadata>(Ops[u]);
+ uint64_t OldCBCalleeIdx =
+ cast<ConstantInt>(OldCBCalleeIdxAsCM->getValue())->getZExtValue();
+ (void)OldCBCalleeIdx;
+ assert(NewCBCalleeIdx != OldCBCalleeIdx &&
+ "Cannot map a callback callee index twice!");
+ }
+
+ Ops[NumExistingOps] = NewCB;
+ return MDNode::get(Context, Ops);
+}
+
MDNode *MDBuilder::createAnonymousAARoot(StringRef Name, MDNode *Extra) {
// To ensure uniqueness the root node is self-referential.
auto Dummy = MDNode::getTemporary(Context, None);
diff --git a/lib/IR/Mangler.cpp b/lib/IR/Mangler.cpp
index be3086cfcf05..d73f748b0584 100644
--- a/lib/IR/Mangler.cpp
+++ b/lib/IR/Mangler.cpp
@@ -1,9 +1,8 @@
//===-- Mangler.cpp - Self-contained c/asm llvm name mangler --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -187,7 +186,7 @@ void llvm::emitLinkerFlagsForGlobalCOFF(raw_ostream &OS, const GlobalValue *GV,
if (!GV->hasDLLExportStorageClass() || GV->isDeclaration())
return;
- if (TT.isKnownWindowsMSVCEnvironment())
+ if (TT.isWindowsMSVCEnvironment())
OS << " /EXPORT:";
else
OS << " -export:";
@@ -206,7 +205,7 @@ void llvm::emitLinkerFlagsForGlobalCOFF(raw_ostream &OS, const GlobalValue *GV,
}
if (!GV->getValueType()->isFunctionTy()) {
- if (TT.isKnownWindowsMSVCEnvironment())
+ if (TT.isWindowsMSVCEnvironment())
OS << ",DATA";
else
OS << ",data";
@@ -215,7 +214,7 @@ void llvm::emitLinkerFlagsForGlobalCOFF(raw_ostream &OS, const GlobalValue *GV,
void llvm::emitLinkerFlagsForUsedCOFF(raw_ostream &OS, const GlobalValue *GV,
const Triple &T, Mangler &M) {
- if (!T.isKnownWindowsMSVCEnvironment())
+ if (!T.isWindowsMSVCEnvironment())
return;
OS << " /INCLUDE:";
diff --git a/lib/IR/Metadata.cpp b/lib/IR/Metadata.cpp
index 5536c2497f1e..748a2238e642 100644
--- a/lib/IR/Metadata.cpp
+++ b/lib/IR/Metadata.cpp
@@ -1,9 +1,8 @@
//===- Metadata.cpp - Implement Metadata classes --------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -1180,10 +1179,7 @@ void MDGlobalAttachmentMap::getAll(
// Sort the resulting array so it is stable with respect to metadata IDs. We
// need to preserve the original insertion order though.
- std::stable_sort(
- Result.begin(), Result.end(),
- [](const std::pair<unsigned, MDNode *> &A,
- const std::pair<unsigned, MDNode *> &B) { return A.first < B.first; });
+ llvm::stable_sort(Result, less_first());
}
void Instruction::setMetadata(StringRef Kind, MDNode *Node) {
diff --git a/lib/IR/MetadataImpl.h b/lib/IR/MetadataImpl.h
index b9137460bd20..b4188dd7d3ee 100644
--- a/lib/IR/MetadataImpl.h
+++ b/lib/IR/MetadataImpl.h
@@ -1,9 +1,8 @@
//===- MetadataImpl.h - Helpers for implementing metadata -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/IR/Module.cpp b/lib/IR/Module.cpp
index 93f27304424f..dbf4035ac7c1 100644
--- a/lib/IR/Module.cpp
+++ b/lib/IR/Module.cpp
@@ -1,9 +1,8 @@
//===- Module.cpp - Implement the Module class ----------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -141,8 +140,8 @@ void Module::getOperandBundleTags(SmallVectorImpl<StringRef> &Result) const {
// it. This is nice because it allows most passes to get away with not handling
// the symbol table directly for this common task.
//
-Constant *Module::getOrInsertFunction(StringRef Name, FunctionType *Ty,
- AttributeList AttributeList) {
+FunctionCallee Module::getOrInsertFunction(StringRef Name, FunctionType *Ty,
+ AttributeList AttributeList) {
// See if we have a definition for the specified function already.
GlobalValue *F = getNamedValue(Name);
if (!F) {
@@ -152,21 +151,20 @@ Constant *Module::getOrInsertFunction(StringRef Name, FunctionType *Ty,
if (!New->isIntrinsic()) // Intrinsics get attrs set on construction
New->setAttributes(AttributeList);
FunctionList.push_back(New);
- return New; // Return the new prototype.
+ return {Ty, New}; // Return the new prototype.
}
// If the function exists but has the wrong type, return a bitcast to the
// right type.
auto *PTy = PointerType::get(Ty, F->getAddressSpace());
if (F->getType() != PTy)
- return ConstantExpr::getBitCast(F, PTy);
+ return {Ty, ConstantExpr::getBitCast(F, PTy)};
// Otherwise, we just found the existing function or a prototype.
- return F;
+ return {Ty, F};
}
-Constant *Module::getOrInsertFunction(StringRef Name,
- FunctionType *Ty) {
+FunctionCallee Module::getOrInsertFunction(StringRef Name, FunctionType *Ty) {
return getOrInsertFunction(Name, Ty, AttributeList());
}
@@ -533,12 +531,16 @@ void Module::setCodeModel(CodeModel::Model CL) {
addModuleFlag(ModFlagBehavior::Error, "Code Model", CL);
}
-void Module::setProfileSummary(Metadata *M) {
- addModuleFlag(ModFlagBehavior::Error, "ProfileSummary", M);
+void Module::setProfileSummary(Metadata *M, ProfileSummary::Kind Kind) {
+ if (Kind == ProfileSummary::PSK_CSInstr)
+ addModuleFlag(ModFlagBehavior::Error, "CSProfileSummary", M);
+ else
+ addModuleFlag(ModFlagBehavior::Error, "ProfileSummary", M);
}
-Metadata *Module::getProfileSummary() {
- return getModuleFlag("ProfileSummary");
+Metadata *Module::getProfileSummary(bool IsCS) {
+ return (IsCS ? getModuleFlag("CSProfileSummary")
+ : getModuleFlag("ProfileSummary"));
}
void Module::setOwnedMemoryBuffer(std::unique_ptr<MemoryBuffer> MB) {
diff --git a/lib/IR/ModuleSummaryIndex.cpp b/lib/IR/ModuleSummaryIndex.cpp
index 46b88cd31779..9f347d8da01d 100644
--- a/lib/IR/ModuleSummaryIndex.cpp
+++ b/lib/IR/ModuleSummaryIndex.cpp
@@ -1,9 +1,8 @@
//===-- ModuleSummaryIndex.cpp - Module Summary Index ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -24,9 +23,12 @@ using namespace llvm;
STATISTIC(ReadOnlyLiveGVars,
"Number of live global variables marked read only");
+STATISTIC(WriteOnlyLiveGVars,
+ "Number of live global variables marked write only");
FunctionSummary FunctionSummary::ExternalNode =
FunctionSummary::makeDummyFunctionSummary({});
+
bool ValueInfo::isDSOLocal() const {
// Need to check all summaries are local in case of hash collisions.
return getSummaryList().size() &&
@@ -36,15 +38,27 @@ bool ValueInfo::isDSOLocal() const {
});
}
-// Gets the number of immutable refs in RefEdgeList
-unsigned FunctionSummary::immutableRefCount() const {
- // Here we take advantage of having all readonly references
+bool ValueInfo::canAutoHide() const {
+ // Can only auto hide if all copies are eligible to auto hide.
+ return getSummaryList().size() &&
+ llvm::all_of(getSummaryList(),
+ [](const std::unique_ptr<GlobalValueSummary> &Summary) {
+ return Summary->canAutoHide();
+ });
+}
+
+// Gets the number of readonly and writeonly refs in RefEdgeList
+std::pair<unsigned, unsigned> FunctionSummary::specialRefCounts() const {
+ // Here we take advantage of having all readonly and writeonly references
// located in the end of the RefEdgeList.
auto Refs = refs();
- unsigned ImmutableRefCnt = 0;
- for (int I = Refs.size() - 1; I >= 0 && Refs[I].isReadOnly(); --I)
- ImmutableRefCnt++;
- return ImmutableRefCnt;
+ unsigned RORefCnt = 0, WORefCnt = 0;
+ int I;
+ for (I = Refs.size() - 1; I >= 0 && Refs[I].isWriteOnly(); --I)
+ WORefCnt++;
+ for (; I >= 0 && Refs[I].isReadOnly(); --I)
+ RORefCnt++;
+ return {RORefCnt, WORefCnt};
}
// Collect for the given module the list of function it defines
@@ -66,17 +80,6 @@ void ModuleSummaryIndex::collectDefinedFunctionsForModule(
}
}
-// Collect for each module the list of function it defines (GUID -> Summary).
-void ModuleSummaryIndex::collectDefinedGVSummariesPerModule(
- StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries) const {
- for (auto &GlobalList : *this) {
- auto GUID = GlobalList.first;
- for (auto &Summary : GlobalList.second.SummaryList) {
- ModuleToDefinedGVSummaries[Summary->modulePath()][GUID] = Summary.get();
- }
- }
-}
-
GlobalValueSummary *
ModuleSummaryIndex::getGlobalValueSummary(uint64_t ValueGUID,
bool PerModuleIndex) const {
@@ -101,48 +104,56 @@ bool ModuleSummaryIndex::isGUIDLive(GlobalValue::GUID GUID) const {
return false;
}
-static void propagateConstantsToRefs(GlobalValueSummary *S) {
- // If reference is not readonly then referenced summary is not
- // readonly either. Note that:
+static void propagateAttributesToRefs(GlobalValueSummary *S) {
+ // If reference is not readonly or writeonly then referenced summary is not
+ // read/writeonly either. Note that:
// - All references from GlobalVarSummary are conservatively considered as
- // not readonly. Tracking them properly requires more complex analysis
- // then we have now.
+ // not readonly or writeonly. Tracking them properly requires more complex
+ // analysis then we have now.
//
// - AliasSummary objects have no refs at all so this function is a no-op
// for them.
for (auto &VI : S->refs()) {
- if (VI.isReadOnly()) {
- // We only mark refs as readonly when computing function summaries on
- // analysis phase.
- assert(isa<FunctionSummary>(S));
- continue;
- }
+ assert(VI.getAccessSpecifier() == 0 || isa<FunctionSummary>(S));
for (auto &Ref : VI.getSummaryList())
- // If references to alias is not readonly then aliasee is not readonly
- if (auto *GVS = dyn_cast<GlobalVarSummary>(Ref->getBaseObject()))
- GVS->setReadOnly(false);
+ // If references to alias is not read/writeonly then aliasee
+ // is not read/writeonly
+ if (auto *GVS = dyn_cast<GlobalVarSummary>(Ref->getBaseObject())) {
+ if (!VI.isReadOnly())
+ GVS->setReadOnly(false);
+ if (!VI.isWriteOnly())
+ GVS->setWriteOnly(false);
+ }
}
}
-// Do the constant propagation in combined index.
-// The goal of constant propagation is internalization of readonly
-// variables. To determine which variables are readonly and which
-// are not we take following steps:
-// - During analysis we speculatively assign readonly attribute to
-// all variables which can be internalized. When computing function
-// summary we also assign readonly attribute to a reference if
-// function doesn't modify referenced variable.
+// Do the access attribute propagation in combined index.
+// The goal of attribute propagation is internalization of readonly (RO)
+// or writeonly (WO) variables. To determine which variables are RO or WO
+// and which are not we take following steps:
+// - During analysis we speculatively assign readonly and writeonly
+// attribute to all variables which can be internalized. When computing
+// function summary we also assign readonly or writeonly attribute to a
+// reference if function doesn't modify referenced variable (readonly)
+// or doesn't read it (writeonly).
+//
+// - After computing dead symbols in combined index we do the attribute
+// propagation. During this step we:
+// a. clear RO and WO attributes from variables which are preserved or
+// can't be imported
+// b. clear RO and WO attributes from variables referenced by any global
+// variable initializer
+// c. clear RO attribute from variable referenced by a function when
+// reference is not readonly
+// d. clear WO attribute from variable referenced by a function when
+// reference is not writeonly
//
-// - After computing dead symbols in combined index we do the constant
-// propagation. During this step we clear readonly attribute from
-// all variables which:
-// a. are preserved or can't be imported
-// b. referenced by any global variable initializer
-// c. referenced by a function and reference is not readonly
+// Because of (c, d) we don't internalize variables read by function A
+// and modified by function B.
//
// Internalization itself happens in the backend after import is finished
-// See internalizeImmutableGVs.
-void ModuleSummaryIndex::propagateConstants(
+// See internalizeGVsAfterImport.
+void ModuleSummaryIndex::propagateAttributes(
const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) {
for (auto &P : *this)
for (auto &S : P.second.SummaryList) {
@@ -150,29 +161,36 @@ void ModuleSummaryIndex::propagateConstants(
// We don't examine references from dead objects
continue;
- // Global variable can't be marked read only if it is not eligible
- // to import since we need to ensure that all external references
- // get a local (imported) copy. It also can't be marked read only
- // if it or any alias (since alias points to the same memory) are
- // preserved or notEligibleToImport, since either of those means
- // there could be writes that are not visible (because preserved
- // means it could have external to DSO writes, and notEligibleToImport
- // means it could have writes via inline assembly leading it to be
- // in the @llvm.*used).
+ // Global variable can't be marked read/writeonly if it is not eligible
+ // to import since we need to ensure that all external references get
+ // a local (imported) copy. It also can't be marked read/writeonly if
+ // it or any alias (since alias points to the same memory) are preserved
+ // or notEligibleToImport, since either of those means there could be
+ // writes (or reads in case of writeonly) that are not visible (because
+ // preserved means it could have external to DSO writes or reads, and
+ // notEligibleToImport means it could have writes or reads via inline
+ // assembly leading it to be in the @llvm.*used).
if (auto *GVS = dyn_cast<GlobalVarSummary>(S->getBaseObject()))
// Here we intentionally pass S.get() not GVS, because S could be
// an alias.
- if (!canImportGlobalVar(S.get()) || GUIDPreservedSymbols.count(P.first))
+ if (!canImportGlobalVar(S.get()) ||
+ GUIDPreservedSymbols.count(P.first)) {
GVS->setReadOnly(false);
- propagateConstantsToRefs(S.get());
+ GVS->setWriteOnly(false);
+ }
+ propagateAttributesToRefs(S.get());
}
if (llvm::AreStatisticsEnabled())
for (auto &P : *this)
if (P.second.SummaryList.size())
if (auto *GVS = dyn_cast<GlobalVarSummary>(
P.second.SummaryList[0]->getBaseObject()))
- if (isGlobalValueLive(GVS) && GVS->isReadOnly())
- ReadOnlyLiveGVars++;
+ if (isGlobalValueLive(GVS)) {
+ if (GVS->maybeReadOnly())
+ ReadOnlyLiveGVars++;
+ if (GVS->maybeWriteOnly())
+ WriteOnlyLiveGVars++;
+ }
}
// TODO: write a graphviz dumper for SCCs (see ModuleSummaryIndex::exportToDot)
@@ -335,14 +353,21 @@ static void defineExternalNode(raw_ostream &OS, const char *Pfx,
static bool hasReadOnlyFlag(const GlobalValueSummary *S) {
if (auto *GVS = dyn_cast<GlobalVarSummary>(S))
- return GVS->isReadOnly();
+ return GVS->maybeReadOnly();
+ return false;
+}
+
+static bool hasWriteOnlyFlag(const GlobalValueSummary *S) {
+ if (auto *GVS = dyn_cast<GlobalVarSummary>(S))
+ return GVS->maybeWriteOnly();
return false;
}
void ModuleSummaryIndex::exportToDot(raw_ostream &OS) const {
std::vector<Edge> CrossModuleEdges;
DenseMap<GlobalValue::GUID, std::vector<uint64_t>> NodeMap;
- StringMap<GVSummaryMapTy> ModuleToDefinedGVS;
+ using GVSOrderedMapTy = std::map<GlobalValue::GUID, GlobalValueSummary *>;
+ std::map<StringRef, GVSOrderedMapTy> ModuleToDefinedGVS;
collectDefinedGVSummariesPerModule(ModuleToDefinedGVS);
// Get node identifier in form MXXX_<GUID>. The MXXX prefix is required,
@@ -359,12 +384,14 @@ void ModuleSummaryIndex::exportToDot(raw_ostream &OS) const {
// 0 - alias
// 1 - reference
// 2 - constant reference
- // Other value: (hotness - 3).
- TypeOrHotness += 3;
+ // 3 - writeonly reference
+ // Other value: (hotness - 4).
+ TypeOrHotness += 4;
static const char *EdgeAttrs[] = {
" [style=dotted]; // alias",
" [style=dashed]; // ref",
" [style=dashed,color=forestgreen]; // const-ref",
+ " [style=dashed,color=violetred]; // writeOnly-ref",
" // call (hotness : Unknown)",
" [color=blue]; // call (hotness : Cold)",
" // call (hotness : None)",
@@ -379,12 +406,12 @@ void ModuleSummaryIndex::exportToDot(raw_ostream &OS) const {
OS << "digraph Summary {\n";
for (auto &ModIt : ModuleToDefinedGVS) {
- auto ModId = getModuleId(ModIt.first());
- OS << " // Module: " << ModIt.first() << "\n";
+ auto ModId = getModuleId(ModIt.first);
+ OS << " // Module: " << ModIt.first << "\n";
OS << " subgraph cluster_" << std::to_string(ModId) << " {\n";
OS << " style = filled;\n";
OS << " color = lightgrey;\n";
- OS << " label = \"" << sys::path::filename(ModIt.first()) << "\";\n";
+ OS << " label = \"" << sys::path::filename(ModIt.first) << "\";\n";
OS << " node [style=filled,fillcolor=lightblue];\n";
auto &GVSMap = ModIt.second;
@@ -409,7 +436,13 @@ void ModuleSummaryIndex::exportToDot(raw_ostream &OS) const {
A.add("shape", "Mrecord", "variable");
if (Flags.Live && hasReadOnlyFlag(SummaryIt.second))
A.addComment("immutable");
+ if (Flags.Live && hasWriteOnlyFlag(SummaryIt.second))
+ A.addComment("writeOnly");
}
+ if (Flags.DSOLocal)
+ A.addComment("dsoLocal");
+ if (Flags.CanAutoHide)
+ A.addComment("canAutoHide");
auto VI = getValueInfo(SummaryIt.first);
A.add("label", getNodeLabel(VI, SummaryIt.second));
@@ -426,20 +459,11 @@ void ModuleSummaryIndex::exportToDot(raw_ostream &OS) const {
for (auto &SummaryIt : GVSMap) {
auto *GVS = SummaryIt.second;
for (auto &R : GVS->refs())
- Draw(SummaryIt.first, R.getGUID(), R.isReadOnly() ? -1 : -2);
+ Draw(SummaryIt.first, R.getGUID(),
+ R.isWriteOnly() ? -1 : (R.isReadOnly() ? -2 : -3));
if (auto *AS = dyn_cast_or_null<AliasSummary>(SummaryIt.second)) {
- GlobalValue::GUID AliaseeId;
- if (AS->hasAliaseeGUID())
- AliaseeId = AS->getAliaseeGUID();
- else {
- auto AliaseeOrigId = AS->getAliasee().getOriginalName();
- AliaseeId = getGUIDFromOriginalID(AliaseeOrigId);
- if (!AliaseeId)
- AliaseeId = AliaseeOrigId;
- }
-
- Draw(SummaryIt.first, AliaseeId, -3);
+ Draw(SummaryIt.first, AS->getAliaseeGUID(), -4);
continue;
}
diff --git a/lib/IR/Operator.cpp b/lib/IR/Operator.cpp
index 5b4c7524b672..8ba68674d50e 100644
--- a/lib/IR/Operator.cpp
+++ b/lib/IR/Operator.cpp
@@ -1,9 +1,8 @@
//===-- Operator.cpp - Implement the LLVM operators -----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/IR/OptBisect.cpp b/lib/IR/OptBisect.cpp
index c79e1fc2b0b4..3104b90f3070 100644
--- a/lib/IR/OptBisect.cpp
+++ b/lib/IR/OptBisect.cpp
@@ -1,9 +1,8 @@
//===- llvm/IR/OptBisect/Bisect.cpp - LLVM Bisect support -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -15,13 +14,6 @@
#include "llvm/IR/OptBisect.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/Analysis/CallGraph.h"
-#include "llvm/Analysis/CallGraphSCCPass.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/RegionInfo.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/raw_ostream.h"
@@ -47,73 +39,10 @@ static void printPassMessage(const StringRef &Name, int PassNum,
<< "(" << PassNum << ") " << Name << " on " << TargetDesc << "\n";
}
-static std::string getDescription(const Module &M) {
- return "module (" + M.getName().str() + ")";
-}
-
-static std::string getDescription(const Function &F) {
- return "function (" + F.getName().str() + ")";
-}
-
-static std::string getDescription(const BasicBlock &BB) {
- return "basic block (" + BB.getName().str() + ") in function (" +
- BB.getParent()->getName().str() + ")";
-}
-
-static std::string getDescription(const Loop &L) {
- // FIXME: Move into LoopInfo so we can get a better description
- // (and avoid a circular dependency between IR and Analysis).
- return "loop";
-}
-
-static std::string getDescription(const Region &R) {
- // FIXME: Move into RegionInfo so we can get a better description
- // (and avoid a circular dependency between IR and Analysis).
- return "region";
-}
-
-static std::string getDescription(const CallGraphSCC &SCC) {
- // FIXME: Move into CallGraphSCCPass to avoid circular dependency between
- // IR and Analysis.
- std::string Desc = "SCC (";
- bool First = true;
- for (CallGraphNode *CGN : SCC) {
- if (First)
- First = false;
- else
- Desc += ", ";
- Function *F = CGN->getFunction();
- if (F)
- Desc += F->getName();
- else
- Desc += "<<null function>>";
- }
- Desc += ")";
- return Desc;
-}
-
-bool OptBisect::shouldRunPass(const Pass *P, const Module &U) {
- return !BisectEnabled || checkPass(P->getPassName(), getDescription(U));
-}
-
-bool OptBisect::shouldRunPass(const Pass *P, const Function &U) {
- return !BisectEnabled || checkPass(P->getPassName(), getDescription(U));
-}
-
-bool OptBisect::shouldRunPass(const Pass *P, const BasicBlock &U) {
- return !BisectEnabled || checkPass(P->getPassName(), getDescription(U));
-}
-
-bool OptBisect::shouldRunPass(const Pass *P, const Region &U) {
- return !BisectEnabled || checkPass(P->getPassName(), getDescription(U));
-}
-
-bool OptBisect::shouldRunPass(const Pass *P, const Loop &U) {
- return !BisectEnabled || checkPass(P->getPassName(), getDescription(U));
-}
+bool OptBisect::shouldRunPass(const Pass *P, StringRef IRDescription) {
+ assert(BisectEnabled);
-bool OptBisect::shouldRunPass(const Pass *P, const CallGraphSCC &U) {
- return !BisectEnabled || checkPass(P->getPassName(), getDescription(U));
+ return checkPass(P->getPassName(), IRDescription);
}
bool OptBisect::checkPass(const StringRef PassName,
diff --git a/lib/IR/Pass.cpp b/lib/IR/Pass.cpp
index a1dc17882493..699a7e17c0cb 100644
--- a/lib/IR/Pass.cpp
+++ b/lib/IR/Pass.cpp
@@ -1,9 +1,8 @@
//===- Pass.cpp - LLVM Pass Infrastructure Implementation -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -56,8 +55,13 @@ PassManagerType ModulePass::getPotentialPassManagerType() const {
return PMT_ModulePassManager;
}
+static std::string getDescription(const Module &M) {
+ return "module (" + M.getName().str() + ")";
+}
+
bool ModulePass::skipModule(Module &M) const {
- return !M.getContext().getOptPassGate().shouldRunPass(this, M);
+ OptPassGate &Gate = M.getContext().getOptPassGate();
+ return Gate.isEnabled() && !Gate.shouldRunPass(this, getDescription(M));
}
bool Pass::mustPreserveAnalysisID(char &AID) const {
@@ -155,11 +159,16 @@ PassManagerType FunctionPass::getPotentialPassManagerType() const {
return PMT_FunctionPassManager;
}
+static std::string getDescription(const Function &F) {
+ return "function (" + F.getName().str() + ")";
+}
+
bool FunctionPass::skipFunction(const Function &F) const {
- if (!F.getContext().getOptPassGate().shouldRunPass(this, F))
+ OptPassGate &Gate = F.getContext().getOptPassGate();
+ if (Gate.isEnabled() && !Gate.shouldRunPass(this, getDescription(F)))
return true;
- if (F.hasFnAttribute(Attribute::OptimizeNone)) {
+ if (F.hasOptNone()) {
LLVM_DEBUG(dbgs() << "Skipping pass '" << getPassName() << "' on function "
<< F.getName() << "\n");
return true;
@@ -186,13 +195,19 @@ bool BasicBlockPass::doFinalization(Function &) {
return false;
}
+static std::string getDescription(const BasicBlock &BB) {
+ return "basic block (" + BB.getName().str() + ") in function (" +
+ BB.getParent()->getName().str() + ")";
+}
+
bool BasicBlockPass::skipBasicBlock(const BasicBlock &BB) const {
const Function *F = BB.getParent();
if (!F)
return false;
- if (!F->getContext().getOptPassGate().shouldRunPass(this, BB))
+ OptPassGate &Gate = F->getContext().getOptPassGate();
+ if (Gate.isEnabled() && !Gate.shouldRunPass(this, getDescription(BB)))
return true;
- if (F->hasFnAttribute(Attribute::OptimizeNone)) {
+ if (F->hasOptNone()) {
// Report this only once per function.
if (&BB == &F->getEntryBlock())
LLVM_DEBUG(dbgs() << "Skipping pass '" << getPassName()
diff --git a/lib/IR/PassInstrumentation.cpp b/lib/IR/PassInstrumentation.cpp
index 5aa2bc6d895e..49cc6ec04d90 100644
--- a/lib/IR/PassInstrumentation.cpp
+++ b/lib/IR/PassInstrumentation.cpp
@@ -1,9 +1,8 @@
//===- PassInstrumentation.cpp - Pass Instrumentation interface -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/lib/IR/PassManager.cpp b/lib/IR/PassManager.cpp
index 47fdfedfdde8..cde9b873795e 100644
--- a/lib/IR/PassManager.cpp
+++ b/lib/IR/PassManager.cpp
@@ -1,9 +1,8 @@
//===- PassManager.cpp - Infrastructure for managing & running IR passes --===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/IR/PassRegistry.cpp b/lib/IR/PassRegistry.cpp
index b0f1a9928725..92c188b11898 100644
--- a/lib/IR/PassRegistry.cpp
+++ b/lib/IR/PassRegistry.cpp
@@ -1,9 +1,8 @@
//===- PassRegistry.cpp - Pass Registration Implementation ----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/IR/PassTimingInfo.cpp b/lib/IR/PassTimingInfo.cpp
index 40b3977ecbd9..9cc44ea05fee 100644
--- a/lib/IR/PassTimingInfo.cpp
+++ b/lib/IR/PassTimingInfo.cpp
@@ -1,9 +1,8 @@
//===- PassTimingInfo.cpp - LLVM Pass Timing Implementation ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -78,7 +77,8 @@ public:
static void init();
/// Prints out timing information and then resets the timers.
- void print();
+ /// By default it uses the stream created by CreateInfoOutputFile().
+ void print(raw_ostream *OutStream = nullptr);
/// Returns the timer for the specified pass if it exists.
Timer *getPassTimer(Pass *, PassInstanceID);
@@ -112,7 +112,9 @@ void PassTimingInfo::init() {
}
/// Prints out timing information and then resets the timers.
-void PassTimingInfo::print() { TG.print(*CreateInfoOutputFile()); }
+void PassTimingInfo::print(raw_ostream *OutStream) {
+ TG.print(OutStream ? *OutStream : *CreateInfoOutputFile(), true);
+}
Timer *PassTimingInfo::newPassTimer(StringRef PassID, StringRef PassDesc) {
unsigned &num = PassIDCountMap[PassID];
@@ -154,9 +156,9 @@ Timer *getPassTimer(Pass *P) {
/// If timing is enabled, report the times collected up to now and then reset
/// them.
-void reportAndResetTimings() {
+void reportAndResetTimings(raw_ostream *OutStream) {
if (legacy::PassTimingInfo::TheTimeInfo)
- legacy::PassTimingInfo::TheTimeInfo->print();
+ legacy::PassTimingInfo::TheTimeInfo->print(OutStream);
}
//===----------------------------------------------------------------------===//
@@ -182,7 +184,15 @@ Timer &TimePassesHandler::getPassTimer(StringRef PassID) {
TimePassesHandler::TimePassesHandler(bool Enabled)
: TG("pass", "... Pass execution timing report ..."), Enabled(Enabled) {}
-void TimePassesHandler::print() { TG.print(*CreateInfoOutputFile()); }
+void TimePassesHandler::setOutStream(raw_ostream &Out) {
+ OutStream = &Out;
+}
+
+void TimePassesHandler::print() {
+ if (!Enabled)
+ return;
+ TG.print(OutStream ? *OutStream : *CreateInfoOutputFile(), true);
+}
LLVM_DUMP_METHOD void TimePassesHandler::dump() const {
dbgs() << "Dumping timers for " << getTypeName<TimePassesHandler>()
diff --git a/lib/IR/ProfileSummary.cpp b/lib/IR/ProfileSummary.cpp
index 491fe834df9a..11d95ac19be6 100644
--- a/lib/IR/ProfileSummary.cpp
+++ b/lib/IR/ProfileSummary.cpp
@@ -1,9 +1,8 @@
//=-- Profilesummary.cpp - Profile summary support --------------------------=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -22,8 +21,6 @@
using namespace llvm;
-const char *ProfileSummary::KindStr[2] = {"InstrProf", "SampleProfile"};
-
// Return an MDTuple with two elements. The first element is a string Key and
// the second is a uint64_t Value.
static Metadata *getKeyValMD(LLVMContext &Context, const char *Key,
@@ -69,6 +66,7 @@ Metadata *ProfileSummary::getDetailedSummaryMD(LLVMContext &Context) {
// "SampleProfile"). The rest of the elements of the outer MDTuple are specific
// to the kind of profile summary as returned by getFormatSpecificMD.
Metadata *ProfileSummary::getMD(LLVMContext &Context) {
+ const char *KindStr[3] = {"InstrProf", "CSInstrProf", "SampleProfile"};
Metadata *Components[] = {
getKeyValMD(Context, "ProfileFormat", KindStr[PSK]),
getKeyValMD(Context, "TotalCount", getTotalCount()),
@@ -154,6 +152,9 @@ ProfileSummary *ProfileSummary::getFromMD(Metadata *MD) {
else if (isKeyValuePair(dyn_cast_or_null<MDTuple>(FormatMD), "ProfileFormat",
"InstrProf"))
SummaryKind = PSK_Instr;
+ else if (isKeyValuePair(dyn_cast_or_null<MDTuple>(FormatMD), "ProfileFormat",
+ "CSInstrProf"))
+ SummaryKind = PSK_CSInstr;
else
return nullptr;
diff --git a/lib/IR/RemarkStreamer.cpp b/lib/IR/RemarkStreamer.cpp
new file mode 100644
index 000000000000..5b4c7e72b479
--- /dev/null
+++ b/lib/IR/RemarkStreamer.cpp
@@ -0,0 +1,154 @@
+//===- llvm/IR/RemarkStreamer.cpp - Remark Streamer -*- C++ -------------*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation of the remark outputting as part of
+// LLVMContext.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/RemarkStreamer.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/Remarks/RemarkFormat.h"
+
+using namespace llvm;
+
+RemarkStreamer::RemarkStreamer(StringRef Filename,
+ std::unique_ptr<remarks::Serializer> Serializer)
+ : Filename(Filename), PassFilter(), Serializer(std::move(Serializer)) {
+ assert(!Filename.empty() && "This needs to be a real filename.");
+}
+
+Error RemarkStreamer::setFilter(StringRef Filter) {
+ Regex R = Regex(Filter);
+ std::string RegexError;
+ if (!R.isValid(RegexError))
+ return createStringError(std::make_error_code(std::errc::invalid_argument),
+ RegexError.data());
+ PassFilter = std::move(R);
+ return Error::success();
+}
+
+/// DiagnosticKind -> remarks::Type
+static remarks::Type toRemarkType(enum DiagnosticKind Kind) {
+ switch (Kind) {
+ default:
+ return remarks::Type::Unknown;
+ case DK_OptimizationRemark:
+ case DK_MachineOptimizationRemark:
+ return remarks::Type::Passed;
+ case DK_OptimizationRemarkMissed:
+ case DK_MachineOptimizationRemarkMissed:
+ return remarks::Type::Missed;
+ case DK_OptimizationRemarkAnalysis:
+ case DK_MachineOptimizationRemarkAnalysis:
+ return remarks::Type::Analysis;
+ case DK_OptimizationRemarkAnalysisFPCommute:
+ return remarks::Type::AnalysisFPCommute;
+ case DK_OptimizationRemarkAnalysisAliasing:
+ return remarks::Type::AnalysisAliasing;
+ case DK_OptimizationFailure:
+ return remarks::Type::Failure;
+ }
+}
+
+/// DiagnosticLocation -> remarks::RemarkLocation.
+static Optional<remarks::RemarkLocation>
+toRemarkLocation(const DiagnosticLocation &DL) {
+ if (!DL.isValid())
+ return None;
+ StringRef File = DL.getRelativePath();
+ unsigned Line = DL.getLine();
+ unsigned Col = DL.getColumn();
+ return remarks::RemarkLocation{File, Line, Col};
+}
+
+/// LLVM Diagnostic -> Remark
+remarks::Remark
+RemarkStreamer::toRemark(const DiagnosticInfoOptimizationBase &Diag) {
+ remarks::Remark R; // The result.
+ R.RemarkType = toRemarkType(static_cast<DiagnosticKind>(Diag.getKind()));
+ R.PassName = Diag.getPassName();
+ R.RemarkName = Diag.getRemarkName();
+ R.FunctionName =
+ GlobalValue::dropLLVMManglingEscape(Diag.getFunction().getName());
+ R.Loc = toRemarkLocation(Diag.getLocation());
+ R.Hotness = Diag.getHotness();
+
+ for (const DiagnosticInfoOptimizationBase::Argument &Arg : Diag.getArgs()) {
+ R.Args.emplace_back();
+ R.Args.back().Key = Arg.Key;
+ R.Args.back().Val = Arg.Val;
+ R.Args.back().Loc = toRemarkLocation(Arg.Loc);
+ }
+
+ return R;
+}
+
+void RemarkStreamer::emit(const DiagnosticInfoOptimizationBase &Diag) {
+ if (Optional<Regex> &Filter = PassFilter)
+ if (!Filter->match(Diag.getPassName()))
+ return;
+
+ // First, convert the diagnostic to a remark.
+ remarks::Remark R = toRemark(Diag);
+ // Then, emit the remark through the serializer.
+ Serializer->emit(R);
+}
+
+char RemarkSetupFileError::ID = 0;
+char RemarkSetupPatternError::ID = 0;
+char RemarkSetupFormatError::ID = 0;
+
+static std::unique_ptr<remarks::Serializer>
+formatToSerializer(remarks::Format RemarksFormat, raw_ostream &OS) {
+ switch (RemarksFormat) {
+ default:
+ llvm_unreachable("Unknown remark serializer format.");
+ return nullptr;
+ case remarks::Format::YAML:
+ return llvm::make_unique<remarks::YAMLSerializer>(OS);
+ };
+}
+
+Expected<std::unique_ptr<ToolOutputFile>>
+llvm::setupOptimizationRemarks(LLVMContext &Context, StringRef RemarksFilename,
+ StringRef RemarksPasses, StringRef RemarksFormat,
+ bool RemarksWithHotness,
+ unsigned RemarksHotnessThreshold) {
+ if (RemarksWithHotness)
+ Context.setDiagnosticsHotnessRequested(true);
+
+ if (RemarksHotnessThreshold)
+ Context.setDiagnosticsHotnessThreshold(RemarksHotnessThreshold);
+
+ if (RemarksFilename.empty())
+ return nullptr;
+
+ std::error_code EC;
+ auto RemarksFile =
+ llvm::make_unique<ToolOutputFile>(RemarksFilename, EC, sys::fs::F_None);
+ // We don't use llvm::FileError here because some diagnostics want the file
+ // name separately.
+ if (EC)
+ return make_error<RemarkSetupFileError>(errorCodeToError(EC));
+
+ Expected<remarks::Format> Format = remarks::parseFormat(RemarksFormat);
+ if (Error E = Format.takeError())
+ return make_error<RemarkSetupFormatError>(std::move(E));
+
+ Context.setRemarkStreamer(llvm::make_unique<RemarkStreamer>(
+ RemarksFilename, formatToSerializer(*Format, RemarksFile->os())));
+
+ if (!RemarksPasses.empty())
+ if (Error E = Context.getRemarkStreamer()->setFilter(RemarksPasses))
+ return make_error<RemarkSetupPatternError>(std::move(E));
+
+ return std::move(RemarksFile);
+}
diff --git a/lib/IR/SafepointIRVerifier.cpp b/lib/IR/SafepointIRVerifier.cpp
index 12ada1320225..7f3dea5e6a6d 100644
--- a/lib/IR/SafepointIRVerifier.cpp
+++ b/lib/IR/SafepointIRVerifier.cpp
@@ -1,9 +1,8 @@
//===-- SafepointIRVerifier.cpp - Verify gc.statepoint invariants ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -198,6 +197,17 @@ protected:
static void Verify(const Function &F, const DominatorTree &DT,
const CFGDeadness &CD);
+namespace llvm {
+PreservedAnalyses SafepointIRVerifierPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ const auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
+ CFGDeadness CD;
+ CD.processFunction(F, DT);
+ Verify(F, DT, CD);
+ return PreservedAnalyses::all();
+}
+}
+
namespace {
struct SafepointIRVerifier : public FunctionPass {
diff --git a/lib/IR/Statepoint.cpp b/lib/IR/Statepoint.cpp
index 18efee2177c3..fce89b42e9bf 100644
--- a/lib/IR/Statepoint.cpp
+++ b/lib/IR/Statepoint.cpp
@@ -1,9 +1,8 @@
//===-- IR/Statepoint.cpp -- gc.statepoint utilities --- -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -18,21 +17,15 @@
using namespace llvm;
-static const Function *getCalledFunction(ImmutableCallSite CS) {
- if (!CS.getInstruction())
- return nullptr;
- return CS.getCalledFunction();
-}
-
-bool llvm::isStatepoint(ImmutableCallSite CS) {
- if (auto *F = getCalledFunction(CS))
+bool llvm::isStatepoint(const CallBase *Call) {
+ if (auto *F = Call->getCalledFunction())
return F->getIntrinsicID() == Intrinsic::experimental_gc_statepoint;
return false;
}
bool llvm::isStatepoint(const Value *V) {
- if (auto CS = ImmutableCallSite(V))
- return isStatepoint(CS);
+ if (auto *Call = dyn_cast<CallBase>(V))
+ return isStatepoint(Call);
return false;
}
@@ -40,23 +33,21 @@ bool llvm::isStatepoint(const Value &V) {
return isStatepoint(&V);
}
-bool llvm::isGCRelocate(ImmutableCallSite CS) {
- return CS.getInstruction() && isa<GCRelocateInst>(CS.getInstruction());
+bool llvm::isGCRelocate(const CallBase *Call) {
+ return isa<GCRelocateInst>(Call);
}
bool llvm::isGCRelocate(const Value *V) {
- if (auto CS = ImmutableCallSite(V))
- return isGCRelocate(CS);
+ if (auto *Call = dyn_cast<CallBase>(V))
+ return isGCRelocate(Call);
return false;
}
-bool llvm::isGCResult(ImmutableCallSite CS) {
- return CS.getInstruction() && isa<GCResultInst>(CS.getInstruction());
-}
+bool llvm::isGCResult(const CallBase *Call) { return isa<GCResultInst>(Call); }
bool llvm::isGCResult(const Value *V) {
- if (auto CS = ImmutableCallSite(V))
- return isGCResult(CS);
+ if (auto *Call = dyn_cast<CallBase>(V))
+ return isGCResult(Call);
return false;
}
diff --git a/lib/IR/SymbolTableListTraitsImpl.h b/lib/IR/SymbolTableListTraitsImpl.h
index d4ad1eba33c6..f399c823d6fb 100644
--- a/lib/IR/SymbolTableListTraitsImpl.h
+++ b/lib/IR/SymbolTableListTraitsImpl.h
@@ -1,9 +1,8 @@
//===-- llvm/SymbolTableListTraitsImpl.h - Implementation ------*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -84,7 +83,8 @@ void SymbolTableListTraits<ValueSubClass>::transferNodesFromList(
SymbolTableListTraits &L2, iterator first, iterator last) {
// We only have to do work here if transferring instructions between BBs
ItemParentClass *NewIP = getListOwner(), *OldIP = L2.getListOwner();
- assert(NewIP != OldIP && "Expected different list owners");
+ if (NewIP == OldIP)
+ return;
// We only have to update symbol table entries if we are transferring the
// instructions to a different symtab object...
diff --git a/lib/IR/Type.cpp b/lib/IR/Type.cpp
index 0fb079c5ab73..8ece7f223dd2 100644
--- a/lib/IR/Type.cpp
+++ b/lib/IR/Type.cpp
@@ -1,9 +1,8 @@
//===- Type.cpp - Implement the Type class --------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -256,7 +255,7 @@ IntegerType *IntegerType::get(LLVMContext &C, unsigned NumBits) {
IntegerType *&Entry = C.pImpl->IntegerTypes[NumBits];
if (!Entry)
- Entry = new (C.pImpl->TypeAllocator) IntegerType(C, NumBits);
+ Entry = new (C.pImpl->Alloc) IntegerType(C, NumBits);
return Entry;
}
@@ -308,7 +307,7 @@ FunctionType *FunctionType::get(Type *ReturnType,
if (Insertion.second) {
// The function type was not found. Allocate one and update FunctionTypes
// in-place.
- FT = (FunctionType *)pImpl->TypeAllocator.Allocate(
+ FT = (FunctionType *)pImpl->Alloc.Allocate(
sizeof(FunctionType) + sizeof(Type *) * (Params.size() + 1),
alignof(FunctionType));
new (FT) FunctionType(ReturnType, Params, isVarArg);
@@ -354,7 +353,7 @@ StructType *StructType::get(LLVMContext &Context, ArrayRef<Type*> ETypes,
if (Insertion.second) {
// The struct type was not found. Allocate one and update AnonStructTypes
// in-place.
- ST = new (Context.pImpl->TypeAllocator) StructType(Context);
+ ST = new (Context.pImpl->Alloc) StructType(Context);
ST->setSubclassData(SCDB_IsLiteral); // Literal struct.
ST->setBody(ETypes, isPacked);
*Insertion.first = ST;
@@ -380,7 +379,7 @@ void StructType::setBody(ArrayRef<Type*> Elements, bool isPacked) {
return;
}
- ContainedTys = Elements.copy(getContext().pImpl->TypeAllocator).data();
+ ContainedTys = Elements.copy(getContext().pImpl->Alloc).data();
}
void StructType::setName(StringRef Name) {
@@ -435,7 +434,7 @@ void StructType::setName(StringRef Name) {
// StructType Helper functions.
StructType *StructType::create(LLVMContext &Context, StringRef Name) {
- StructType *ST = new (Context.pImpl->TypeAllocator) StructType(Context);
+ StructType *ST = new (Context.pImpl->Alloc) StructType(Context);
if (!Name.empty())
ST->setName(Name);
return ST;
@@ -505,6 +504,8 @@ StringRef StructType::getName() const {
}
bool StructType::isValidElementType(Type *ElemTy) {
+ if (auto *VTy = dyn_cast<VectorType>(ElemTy))
+ return !VTy->isScalable();
return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() &&
!ElemTy->isMetadataTy() && !ElemTy->isFunctionTy() &&
!ElemTy->isTokenTy();
@@ -586,11 +587,13 @@ ArrayType *ArrayType::get(Type *ElementType, uint64_t NumElements) {
pImpl->ArrayTypes[std::make_pair(ElementType, NumElements)];
if (!Entry)
- Entry = new (pImpl->TypeAllocator) ArrayType(ElementType, NumElements);
+ Entry = new (pImpl->Alloc) ArrayType(ElementType, NumElements);
return Entry;
}
bool ArrayType::isValidElementType(Type *ElemTy) {
+ if (auto *VTy = dyn_cast<VectorType>(ElemTy))
+ return !VTy->isScalable();
return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() &&
!ElemTy->isMetadataTy() && !ElemTy->isFunctionTy() &&
!ElemTy->isTokenTy();
@@ -600,21 +603,20 @@ bool ArrayType::isValidElementType(Type *ElemTy) {
// VectorType Implementation
//===----------------------------------------------------------------------===//
-VectorType::VectorType(Type *ElType, unsigned NumEl)
- : SequentialType(VectorTyID, ElType, NumEl) {}
+VectorType::VectorType(Type *ElType, ElementCount EC)
+ : SequentialType(VectorTyID, ElType, EC.Min), Scalable(EC.Scalable) {}
-VectorType *VectorType::get(Type *ElementType, unsigned NumElements) {
- assert(NumElements > 0 && "#Elements of a VectorType must be greater than 0");
+VectorType *VectorType::get(Type *ElementType, ElementCount EC) {
+ assert(EC.Min > 0 && "#Elements of a VectorType must be greater than 0");
assert(isValidElementType(ElementType) && "Element type of a VectorType must "
"be an integer, floating point, or "
"pointer type.");
LLVMContextImpl *pImpl = ElementType->getContext().pImpl;
VectorType *&Entry = ElementType->getContext().pImpl
- ->VectorTypes[std::make_pair(ElementType, NumElements)];
-
+ ->VectorTypes[std::make_pair(ElementType, EC)];
if (!Entry)
- Entry = new (pImpl->TypeAllocator) VectorType(ElementType, NumElements);
+ Entry = new (pImpl->Alloc) VectorType(ElementType, EC);
return Entry;
}
@@ -638,7 +640,7 @@ PointerType *PointerType::get(Type *EltTy, unsigned AddressSpace) {
: CImpl->ASPointerTypes[std::make_pair(EltTy, AddressSpace)];
if (!Entry)
- Entry = new (CImpl->TypeAllocator) PointerType(EltTy, AddressSpace);
+ Entry = new (CImpl->Alloc) PointerType(EltTy, AddressSpace);
return Entry;
}
diff --git a/lib/IR/TypeFinder.cpp b/lib/IR/TypeFinder.cpp
index e9af78c71bfd..2e2c194860cd 100644
--- a/lib/IR/TypeFinder.cpp
+++ b/lib/IR/TypeFinder.cpp
@@ -1,9 +1,8 @@
//===- TypeFinder.cpp - Implement the TypeFinder class --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/IR/Use.cpp b/lib/IR/Use.cpp
index cae845d99fe5..18c61757ee84 100644
--- a/lib/IR/Use.cpp
+++ b/lib/IR/Use.cpp
@@ -1,9 +1,8 @@
//===-- Use.cpp - Implement the Use class ---------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/IR/User.cpp b/lib/IR/User.cpp
index 041593f20b57..33a3686c94a1 100644
--- a/lib/IR/User.cpp
+++ b/lib/IR/User.cpp
@@ -1,9 +1,8 @@
//===-- User.cpp - Implement the User class -------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/IR/Value.cpp b/lib/IR/Value.cpp
index 80b993c89f7f..b7f77dc3043e 100644
--- a/lib/IR/Value.cpp
+++ b/lib/IR/Value.cpp
@@ -1,9 +1,8 @@
//===-- Value.cpp - Implement the Value class -----------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -58,7 +57,8 @@ Value::Value(Type *ty, unsigned scid)
// FIXME: Why isn't this in the subclass gunk??
// Note, we cannot call isa<CallInst> before the CallInst has been
// constructed.
- if (SubclassID == Instruction::Call || SubclassID == Instruction::Invoke)
+ if (SubclassID == Instruction::Call || SubclassID == Instruction::Invoke ||
+ SubclassID == Instruction::CallBr)
assert((VTy->isFirstClassType() || VTy->isVoidTy() || VTy->isStructTy()) &&
"invalid CallInst type!");
else if (SubclassID != BasicBlockVal &&
@@ -460,6 +460,7 @@ namespace {
enum PointerStripKind {
PSK_ZeroIndices,
PSK_ZeroIndicesAndAliases,
+ PSK_ZeroIndicesAndAliasesSameRepresentation,
PSK_ZeroIndicesAndAliasesAndInvariantGroups,
PSK_InBoundsConstantIndices,
PSK_InBounds
@@ -479,6 +480,7 @@ static const Value *stripPointerCastsAndOffsets(const Value *V) {
if (auto *GEP = dyn_cast<GEPOperator>(V)) {
switch (StripKind) {
case PSK_ZeroIndicesAndAliases:
+ case PSK_ZeroIndicesAndAliasesSameRepresentation:
case PSK_ZeroIndicesAndAliasesAndInvariantGroups:
case PSK_ZeroIndices:
if (!GEP->hasAllZeroIndices())
@@ -494,8 +496,12 @@ static const Value *stripPointerCastsAndOffsets(const Value *V) {
break;
}
V = GEP->getPointerOperand();
- } else if (Operator::getOpcode(V) == Instruction::BitCast ||
+ } else if (Operator::getOpcode(V) == Instruction::BitCast) {
+ V = cast<Operator>(V)->getOperand(0);
+ } else if (StripKind != PSK_ZeroIndicesAndAliasesSameRepresentation &&
Operator::getOpcode(V) == Instruction::AddrSpaceCast) {
+ // TODO: If we know an address space cast will not change the
+ // representation we could look through it here as well.
V = cast<Operator>(V)->getOperand(0);
} else if (auto *GA = dyn_cast<GlobalAlias>(V)) {
if (StripKind == PSK_ZeroIndices || GA->isInterposable())
@@ -530,6 +536,11 @@ const Value *Value::stripPointerCasts() const {
return stripPointerCastsAndOffsets<PSK_ZeroIndicesAndAliases>(this);
}
+const Value *Value::stripPointerCastsSameRepresentation() const {
+ return stripPointerCastsAndOffsets<
+ PSK_ZeroIndicesAndAliasesSameRepresentation>(this);
+}
+
const Value *Value::stripPointerCastsNoFollowAliases() const {
return stripPointerCastsAndOffsets<PSK_ZeroIndices>(this);
}
@@ -544,13 +555,13 @@ const Value *Value::stripPointerCastsAndInvariantGroups() const {
}
const Value *
-Value::stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL,
- APInt &Offset) const {
- if (!getType()->isPointerTy())
+Value::stripAndAccumulateConstantOffsets(const DataLayout &DL, APInt &Offset,
+ bool AllowNonInbounds) const {
+ if (!getType()->isPtrOrPtrVectorTy())
return this;
- assert(Offset.getBitWidth() == DL.getIndexSizeInBits(cast<PointerType>(
- getType())->getAddressSpace()) &&
+ unsigned BitWidth = Offset.getBitWidth();
+ assert(BitWidth == DL.getIndexTypeSizeInBits(getType()) &&
"The offset bit width does not match the DL specification.");
// Even though we don't look through PHI nodes, we could be called on an
@@ -560,27 +571,39 @@ Value::stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL,
const Value *V = this;
do {
if (auto *GEP = dyn_cast<GEPOperator>(V)) {
- if (!GEP->isInBounds())
+ // If in-bounds was requested, we do not strip non-in-bounds GEPs.
+ if (!AllowNonInbounds && !GEP->isInBounds())
return V;
- APInt GEPOffset(Offset);
+
+ // If one of the values we have visited is an addrspacecast, then
+ // the pointer type of this GEP may be different from the type
+ // of the Ptr parameter which was passed to this function. This
+ // means when we construct GEPOffset, we need to use the size
+ // of GEP's pointer type rather than the size of the original
+ // pointer type.
+ APInt GEPOffset(DL.getIndexTypeSizeInBits(V->getType()), 0);
if (!GEP->accumulateConstantOffset(DL, GEPOffset))
return V;
- Offset = GEPOffset;
+
+ // Stop traversal if the pointer offset wouldn't fit in the bit-width
+ // provided by the Offset argument. This can happen due to AddrSpaceCast
+ // stripping.
+ if (GEPOffset.getMinSignedBits() > BitWidth)
+ return V;
+
+ Offset += GEPOffset.sextOrTrunc(BitWidth);
V = GEP->getPointerOperand();
- } else if (Operator::getOpcode(V) == Instruction::BitCast) {
+ } else if (Operator::getOpcode(V) == Instruction::BitCast ||
+ Operator::getOpcode(V) == Instruction::AddrSpaceCast) {
V = cast<Operator>(V)->getOperand(0);
} else if (auto *GA = dyn_cast<GlobalAlias>(V)) {
- V = GA->getAliasee();
- } else {
- if (const auto *Call = dyn_cast<CallBase>(V))
- if (const Value *RV = Call->getReturnedArgOperand()) {
+ if (!GA->isInterposable())
+ V = GA->getAliasee();
+ } else if (const auto *Call = dyn_cast<CallBase>(V)) {
+ if (const Value *RV = Call->getReturnedArgOperand())
V = RV;
- continue;
- }
-
- return V;
}
- assert(V->getType()->isPointerTy() && "Unexpected operand type!");
+ assert(V->getType()->isPtrOrPtrVectorTy() && "Unexpected operand type!");
} while (Visited.insert(V).second);
return V;
@@ -648,10 +671,14 @@ unsigned Value::getPointerAlignment(const DataLayout &DL) const {
unsigned Align = 0;
if (auto *GO = dyn_cast<GlobalObject>(this)) {
- // Don't make any assumptions about function pointer alignment. Some
- // targets use the LSBs to store additional information.
- if (isa<Function>(GO))
- return 0;
+ if (isa<Function>(GO)) {
+ switch (DL.getFunctionPtrAlignType()) {
+ case DataLayout::FunctionPtrAlignType::Independent:
+ return DL.getFunctionPtrAlign();
+ case DataLayout::FunctionPtrAlignType::MultipleOfFunctionAlign:
+ return std::max(DL.getFunctionPtrAlign(), GO->getAlignment());
+ }
+ }
Align = GO->getAlignment();
if (Align == 0) {
if (auto *GVar = dyn_cast<GlobalVariable>(GO)) {
@@ -931,7 +958,7 @@ void ValueHandleBase::ValueIsRAUWd(Value *Old, Value *New) {
<< Old->getName() << " to " << *New->getType() << " %"
<< New->getName() << "\n";
llvm_unreachable(
- "A weak tracking value handle still pointed to the old value!\n");
+ "A weak tracking value handle still pointed to the old value!\n");
default:
break;
}
diff --git a/lib/IR/ValueSymbolTable.cpp b/lib/IR/ValueSymbolTable.cpp
index f4bea5604043..417ec045071d 100644
--- a/lib/IR/ValueSymbolTable.cpp
+++ b/lib/IR/ValueSymbolTable.cpp
@@ -1,9 +1,8 @@
//===- ValueSymbolTable.cpp - Implement the ValueSymbolTable class --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp
index 30e77b92009f..9346c8bda75d 100644
--- a/lib/IR/Verifier.cpp
+++ b/lib/IR/Verifier.cpp
@@ -1,9 +1,8 @@
//===-- Verifier.cpp - Implement the Module Verifier -----------------------==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -467,6 +466,7 @@ private:
void visitReturnInst(ReturnInst &RI);
void visitSwitchInst(SwitchInst &SI);
void visitIndirectBrInst(IndirectBrInst &BI);
+ void visitCallBrInst(CallBrInst &CBI);
void visitSelectInst(SelectInst &SI);
void visitUserOp1(Instruction &I);
void visitUserOp2(Instruction &I) { visitUserOp1(I); }
@@ -500,7 +500,7 @@ private:
const Value *V);
void verifyParameterAttrs(AttributeSet Attrs, Type *Ty, const Value *V);
void verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs,
- const Value *V);
+ const Value *V, bool IsIntrinsic);
void verifyFunctionMetadata(ArrayRef<std::pair<unsigned, MDNode *>> MDs);
void visitConstantExprsRecursively(const Constant *EntryC);
@@ -641,18 +641,18 @@ void Verifier::visitGlobalVariable(const GlobalVariable &GV) {
PointerType *FuncPtrTy =
FunctionType::get(Type::getVoidTy(Context), false)->
getPointerTo(DL.getProgramAddressSpace());
- // FIXME: Reject the 2-field form in LLVM 4.0.
Assert(STy &&
(STy->getNumElements() == 2 || STy->getNumElements() == 3) &&
STy->getTypeAtIndex(0u)->isIntegerTy(32) &&
STy->getTypeAtIndex(1) == FuncPtrTy,
"wrong type for intrinsic global variable", &GV);
- if (STy->getNumElements() == 3) {
- Type *ETy = STy->getTypeAtIndex(2);
- Assert(ETy->isPointerTy() &&
- cast<PointerType>(ETy)->getElementType()->isIntegerTy(8),
- "wrong type for intrinsic global variable", &GV);
- }
+ Assert(STy->getNumElements() == 3,
+ "the third field of the element type is mandatory, "
+ "specify i8* null to migrate from the obsoleted 2-field form");
+ Type *ETy = STy->getTypeAtIndex(2);
+ Assert(ETy->isPointerTy() &&
+ cast<PointerType>(ETy)->getElementType()->isIntegerTy(8),
+ "wrong type for intrinsic global variable", &GV);
}
}
@@ -691,6 +691,13 @@ void Verifier::visitGlobalVariable(const GlobalVariable &GV) {
"DIGlobalVariableExpression");
}
+ // Scalable vectors cannot be global variables, since we don't know
+ // the runtime size. If the global is a struct or an array containing
+ // scalable vectors, that will be caught by the isValidElementType methods
+ // in StructType or ArrayType instead.
+ if (auto *VTy = dyn_cast<VectorType>(GV.getValueType()))
+ Assert(!VTy->isScalable(), "Globals cannot contain scalable vectors", &GV);
+
if (!GV.hasInitializer()) {
visitGlobalValue(GV);
return;
@@ -926,7 +933,8 @@ void Verifier::visitDIDerivedType(const DIDerivedType &N) {
if (N.getDWARFAddressSpace()) {
AssertDI(N.getTag() == dwarf::DW_TAG_pointer_type ||
- N.getTag() == dwarf::DW_TAG_reference_type,
+ N.getTag() == dwarf::DW_TAG_reference_type ||
+ N.getTag() == dwarf::DW_TAG_rvalue_reference_type,
"DWARF address space only applies to pointer or reference types",
&N);
}
@@ -1156,6 +1164,14 @@ void Verifier::visitDILexicalBlockFile(const DILexicalBlockFile &N) {
visitDILexicalBlockBase(N);
}
+void Verifier::visitDICommonBlock(const DICommonBlock &N) {
+ AssertDI(N.getTag() == dwarf::DW_TAG_common_block, "invalid tag", &N);
+ if (auto *S = N.getRawScope())
+ AssertDI(isa<DIScope>(S), "invalid scope ref", &N, S);
+ if (auto *S = N.getRawDecl())
+ AssertDI(isa<DIGlobalVariable>(S), "invalid declaration", &N, S);
+}
+
void Verifier::visitDINamespace(const DINamespace &N) {
AssertDI(N.getTag() == dwarf::DW_TAG_namespace, "invalid tag", &N);
if (auto *S = N.getRawScope())
@@ -1224,7 +1240,6 @@ void Verifier::visitDIGlobalVariable(const DIGlobalVariable &N) {
visitDIVariable(N);
AssertDI(N.getTag() == dwarf::DW_TAG_variable, "invalid tag", &N);
- AssertDI(!N.getName().empty(), "missing global variable name", &N);
AssertDI(isType(N.getRawType()), "invalid type ref", &N, N.getRawType());
AssertDI(N.getType(), "missing global variable type", &N);
if (auto *Member = N.getRawStaticDataMemberDeclaration()) {
@@ -1478,9 +1493,12 @@ void Verifier::visitModuleFlagCGProfileEntry(const MDOperand &MDO) {
static bool isFuncOnlyAttr(Attribute::AttrKind Kind) {
switch (Kind) {
case Attribute::NoReturn:
+ case Attribute::NoSync:
+ case Attribute::WillReturn:
case Attribute::NoCfCheck:
case Attribute::NoUnwind:
case Attribute::NoInline:
+ case Attribute::NoFree:
case Attribute::AlwaysInline:
case Attribute::OptimizeForSize:
case Attribute::StackProtect:
@@ -1498,6 +1516,7 @@ static bool isFuncOnlyAttr(Attribute::AttrKind Kind) {
case Attribute::ReturnsTwice:
case Attribute::SanitizeAddress:
case Attribute::SanitizeHWAddress:
+ case Attribute::SanitizeMemTag:
case Attribute::SanitizeThread:
case Attribute::SanitizeMemory:
case Attribute::MinSize:
@@ -1562,6 +1581,11 @@ void Verifier::verifyParameterAttrs(AttributeSet Attrs, Type *Ty,
verifyAttributeTypes(Attrs, /*IsFunction=*/false, V);
+ if (Attrs.hasAttribute(Attribute::ImmArg)) {
+ Assert(Attrs.getNumAttributes() == 1,
+ "Attribute 'immarg' is incompatible with other attributes", V);
+ }
+
// Check for mutually incompatible attributes. Only inreg is compatible with
// sret.
unsigned AttrCount = 0;
@@ -1616,6 +1640,11 @@ void Verifier::verifyParameterAttrs(AttributeSet Attrs, Type *Ty,
"'noinline and alwaysinline' are incompatible!",
V);
+ if (Attrs.hasAttribute(Attribute::ByVal) && Attrs.getByValType()) {
+ Assert(Attrs.getByValType() == cast<PointerType>(Ty)->getElementType(),
+ "Attribute 'byval' type does not match parameter!", V);
+ }
+
AttrBuilder IncompatibleAttrs = AttributeFuncs::typeIncompatible(Ty);
Assert(!AttrBuilder(Attrs).overlaps(IncompatibleAttrs),
"Wrong types for attribute: " +
@@ -1649,7 +1678,7 @@ void Verifier::verifyParameterAttrs(AttributeSet Attrs, Type *Ty,
// Check parameter attributes against a function type.
// The value V is printed in error messages.
void Verifier::verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs,
- const Value *V) {
+ const Value *V, bool IsIntrinsic) {
if (Attrs.isEmpty())
return;
@@ -1686,6 +1715,11 @@ void Verifier::verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs,
Type *Ty = FT->getParamType(i);
AttributeSet ArgAttrs = Attrs.getParamAttributes(i);
+ if (!IsIntrinsic) {
+ Assert(!ArgAttrs.hasAttribute(Attribute::ImmArg),
+ "immarg attribute only applies to intrinsics",V);
+ }
+
verifyParameterAttrs(ArgAttrs, Ty, V);
if (ArgAttrs.hasAttribute(Attribute::Nest)) {
@@ -1904,16 +1938,8 @@ void Verifier::verifyStatepoint(const CallBase &Call) {
"reordering restrictions required by safepoint semantics",
Call);
- const Value *IDV = Call.getArgOperand(0);
- Assert(isa<ConstantInt>(IDV), "gc.statepoint ID must be a constant integer",
- Call);
-
- const Value *NumPatchBytesV = Call.getArgOperand(1);
- Assert(isa<ConstantInt>(NumPatchBytesV),
- "gc.statepoint number of patchable bytes must be a constant integer",
- Call);
const int64_t NumPatchBytes =
- cast<ConstantInt>(NumPatchBytesV)->getSExtValue();
+ cast<ConstantInt>(Call.getArgOperand(1))->getSExtValue();
assert(isInt<32>(NumPatchBytes) && "NumPatchBytesV is an i32!");
Assert(NumPatchBytes >= 0,
"gc.statepoint number of patchable bytes must be "
@@ -1926,12 +1952,7 @@ void Verifier::verifyStatepoint(const CallBase &Call) {
"gc.statepoint callee must be of function pointer type", Call, Target);
FunctionType *TargetFuncType = cast<FunctionType>(PT->getElementType());
- const Value *NumCallArgsV = Call.getArgOperand(3);
- Assert(isa<ConstantInt>(NumCallArgsV),
- "gc.statepoint number of arguments to underlying call "
- "must be constant integer",
- Call);
- const int NumCallArgs = cast<ConstantInt>(NumCallArgsV)->getZExtValue();
+ const int NumCallArgs = cast<ConstantInt>(Call.getArgOperand(3))->getZExtValue();
Assert(NumCallArgs >= 0,
"gc.statepoint number of arguments to underlying call "
"must be positive",
@@ -1950,10 +1971,8 @@ void Verifier::verifyStatepoint(const CallBase &Call) {
Assert(NumCallArgs == NumParams,
"gc.statepoint mismatch in number of call args", Call);
- const Value *FlagsV = Call.getArgOperand(4);
- Assert(isa<ConstantInt>(FlagsV),
- "gc.statepoint flags must be constant integer", Call);
- const uint64_t Flags = cast<ConstantInt>(FlagsV)->getZExtValue();
+ const uint64_t Flags
+ = cast<ConstantInt>(Call.getArgOperand(4))->getZExtValue();
Assert((Flags & ~(uint64_t)StatepointFlags::MaskAll) == 0,
"unknown flag used in gc.statepoint flags argument", Call);
@@ -2043,7 +2062,7 @@ void Verifier::verifyFrameRecoverIndices() {
unsigned MaxRecoveredIndex = Counts.second.second;
Assert(MaxRecoveredIndex <= EscapedObjectCount,
"all indices passed to llvm.localrecover must be less than the "
- "number of arguments passed ot llvm.localescape in the parent "
+ "number of arguments passed to llvm.localescape in the parent "
"function",
F);
}
@@ -2130,8 +2149,11 @@ void Verifier::visitFunction(const Function &F) {
Assert(verifyAttributeCount(Attrs, FT->getNumParams()),
"Attribute after last parameter!", &F);
+ bool isLLVMdotName = F.getName().size() >= 5 &&
+ F.getName().substr(0, 5) == "llvm.";
+
// Check function attributes.
- verifyFunctionAttrs(FT, Attrs, &F);
+ verifyFunctionAttrs(FT, Attrs, &F, isLLVMdotName);
// On function declarations/definitions, we do not support the builtin
// attribute. We do not check this in VerifyFunctionAttrs since that is
@@ -2170,9 +2192,6 @@ void Verifier::visitFunction(const Function &F) {
break;
}
- bool isLLVMdotName = F.getName().size() >= 5 &&
- F.getName().substr(0, 5) == "llvm.";
-
// Check that the argument values match the function type for this function...
unsigned i = 0;
for (const Argument &Arg : F.args()) {
@@ -2220,8 +2239,11 @@ void Verifier::visitFunction(const Function &F) {
MDs.empty() ? nullptr : MDs.front().second);
} else if (F.isDeclaration()) {
for (const auto &I : MDs) {
- AssertDI(I.first != LLVMContext::MD_dbg,
- "function declaration may not have a !dbg attachment", &F);
+ // This is used for call site debug information.
+ AssertDI(I.first != LLVMContext::MD_dbg ||
+ !cast<DISubprogram>(I.second)->isDistinct(),
+ "function declaration may only have a unique !dbg attachment",
+ &F);
Assert(I.first != LLVMContext::MD_prof,
"function declaration may not have a !prof attachment", &F);
@@ -2299,36 +2321,44 @@ void Verifier::visitFunction(const Function &F) {
// FIXME: Check this incrementally while visiting !dbg attachments.
// FIXME: Only check when N is the canonical subprogram for F.
SmallPtrSet<const MDNode *, 32> Seen;
- for (auto &BB : F)
- for (auto &I : BB) {
- // Be careful about using DILocation here since we might be dealing with
- // broken code (this is the Verifier after all).
- DILocation *DL =
- dyn_cast_or_null<DILocation>(I.getDebugLoc().getAsMDNode());
- if (!DL)
- continue;
- if (!Seen.insert(DL).second)
- continue;
+ auto VisitDebugLoc = [&](const Instruction &I, const MDNode *Node) {
+ // Be careful about using DILocation here since we might be dealing with
+ // broken code (this is the Verifier after all).
+ const DILocation *DL = dyn_cast_or_null<DILocation>(Node);
+ if (!DL)
+ return;
+ if (!Seen.insert(DL).second)
+ return;
- Metadata *Parent = DL->getRawScope();
- AssertDI(Parent && isa<DILocalScope>(Parent),
- "DILocation's scope must be a DILocalScope", N, &F, &I, DL,
- Parent);
- DILocalScope *Scope = DL->getInlinedAtScope();
- if (Scope && !Seen.insert(Scope).second)
- continue;
+ Metadata *Parent = DL->getRawScope();
+ AssertDI(Parent && isa<DILocalScope>(Parent),
+ "DILocation's scope must be a DILocalScope", N, &F, &I, DL,
+ Parent);
+ DILocalScope *Scope = DL->getInlinedAtScope();
+ if (Scope && !Seen.insert(Scope).second)
+ return;
- DISubprogram *SP = Scope ? Scope->getSubprogram() : nullptr;
+ DISubprogram *SP = Scope ? Scope->getSubprogram() : nullptr;
- // Scope and SP could be the same MDNode and we don't want to skip
- // validation in that case
- if (SP && ((Scope != SP) && !Seen.insert(SP).second))
- continue;
+ // Scope and SP could be the same MDNode and we don't want to skip
+ // validation in that case
+ if (SP && ((Scope != SP) && !Seen.insert(SP).second))
+ return;
- // FIXME: Once N is canonical, check "SP == &N".
- AssertDI(SP->describes(&F),
- "!dbg attachment points at wrong subprogram for function", N, &F,
- &I, DL, Scope, SP);
+ // FIXME: Once N is canonical, check "SP == &N".
+ AssertDI(SP->describes(&F),
+ "!dbg attachment points at wrong subprogram for function", N, &F,
+ &I, DL, Scope, SP);
+ };
+ for (auto &BB : F)
+ for (auto &I : BB) {
+ VisitDebugLoc(I, I.getDebugLoc().getAsMDNode());
+ // The llvm.loop annotations also contain two DILocations.
+ if (auto MD = I.getMetadata(LLVMContext::MD_loop))
+ for (unsigned i = 1; i < MD->getNumOperands(); ++i)
+ VisitDebugLoc(I, dyn_cast_or_null<MDNode>(MD->getOperand(i)));
+ if (BrokenDebugInfo)
+ return;
}
}
@@ -2451,6 +2481,26 @@ void Verifier::visitIndirectBrInst(IndirectBrInst &BI) {
visitTerminator(BI);
}
+void Verifier::visitCallBrInst(CallBrInst &CBI) {
+ Assert(CBI.isInlineAsm(), "Callbr is currently only used for asm-goto!",
+ &CBI);
+ Assert(CBI.getType()->isVoidTy(), "Callbr return value is not supported!",
+ &CBI);
+ for (unsigned i = 0, e = CBI.getNumSuccessors(); i != e; ++i)
+ Assert(CBI.getSuccessor(i)->getType()->isLabelTy(),
+ "Callbr successors must all have pointer type!", &CBI);
+ for (unsigned i = 0, e = CBI.getNumOperands(); i != e; ++i) {
+ Assert(i >= CBI.getNumArgOperands() || !isa<BasicBlock>(CBI.getOperand(i)),
+ "Using an unescaped label as a callbr argument!", &CBI);
+ if (isa<BasicBlock>(CBI.getOperand(i)))
+ for (unsigned j = i + 1; j != e; ++j)
+ Assert(CBI.getOperand(i) != CBI.getOperand(j),
+ "Duplicate callbr destination!", &CBI);
+ }
+
+ visitTerminator(CBI);
+}
+
void Verifier::visitSelectInst(SelectInst &SI) {
Assert(!SelectInst::areInvalidOperands(SI.getOperand(0), SI.getOperand(1),
SI.getOperand(2)),
@@ -2780,17 +2830,21 @@ void Verifier::visitCallBase(CallBase &Call) {
Assert(verifyAttributeCount(Attrs, Call.arg_size()),
"Attribute after last parameter!", Call);
+ bool IsIntrinsic = Call.getCalledFunction() &&
+ Call.getCalledFunction()->getName().startswith("llvm.");
+
+ Function *Callee
+ = dyn_cast<Function>(Call.getCalledValue()->stripPointerCasts());
+
if (Attrs.hasAttribute(AttributeList::FunctionIndex, Attribute::Speculatable)) {
// Don't allow speculatable on call sites, unless the underlying function
// declaration is also speculatable.
- Function *Callee =
- dyn_cast<Function>(Call.getCalledValue()->stripPointerCasts());
Assert(Callee && Callee->isSpeculatable(),
"speculatable attribute may not apply to call sites", Call);
}
// Verify call attributes.
- verifyFunctionAttrs(FTy, Attrs, &Call);
+ verifyFunctionAttrs(FTy, Attrs, &Call, IsIntrinsic);
// Conservatively check the inalloca argument.
// We have a bug if we can find that there is an underlying alloca without
@@ -2805,7 +2859,7 @@ void Verifier::visitCallBase(CallBase &Call) {
// For each argument of the callsite, if it has the swifterror argument,
// make sure the underlying alloca/parameter it comes from has a swifterror as
// well.
- for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
+ for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) {
if (Call.paramHasAttr(i, Attribute::SwiftError)) {
Value *SwiftErrorArg = Call.getArgOperand(i);
if (auto AI = dyn_cast<AllocaInst>(SwiftErrorArg->stripInBoundsOffsets())) {
@@ -2822,6 +2876,21 @@ void Verifier::visitCallBase(CallBase &Call) {
Call);
}
+ if (Attrs.hasParamAttribute(i, Attribute::ImmArg)) {
+ // Don't allow immarg on call sites, unless the underlying declaration
+ // also has the matching immarg.
+ Assert(Callee && Callee->hasParamAttribute(i, Attribute::ImmArg),
+ "immarg may not apply only to call sites",
+ Call.getArgOperand(i), Call);
+ }
+
+ if (Call.paramHasAttr(i, Attribute::ImmArg)) {
+ Value *ArgVal = Call.getArgOperand(i);
+ Assert(isa<ConstantInt>(ArgVal) || isa<ConstantFP>(ArgVal),
+ "immarg operand has non-immediate parameter", ArgVal, Call);
+ }
+ }
+
if (FTy->isVarArg()) {
// FIXME? is 'nest' even legal here?
bool SawNest = false;
@@ -2871,8 +2940,7 @@ void Verifier::visitCallBase(CallBase &Call) {
}
// Verify that there's no metadata unless it's a direct call to an intrinsic.
- if (!Call.getCalledFunction() ||
- !Call.getCalledFunction()->getName().startswith("llvm.")) {
+ if (!IsIntrinsic) {
for (Type *ParamTy : FTy->params()) {
Assert(!ParamTy->isMetadataTy(),
"Function has metadata parameter but isn't an intrinsic", Call);
@@ -3236,7 +3304,7 @@ void Verifier::visitRangeMetadata(Instruction &I, MDNode *Range, Type *Ty) {
unsigned NumRanges = NumOperands / 2;
Assert(NumRanges >= 1, "It should have at least one range!", Range);
- ConstantRange LastRange(1); // Dummy initial value
+ ConstantRange LastRange(1, true); // Dummy initial value
for (unsigned i = 0; i < NumRanges; ++i) {
ConstantInt *Low =
mdconst::dyn_extract<ConstantInt>(Range->getOperand(2 * i));
@@ -3431,10 +3499,22 @@ void Verifier::visitAtomicRMWInst(AtomicRMWInst &RMWI) {
PointerType *PTy = dyn_cast<PointerType>(RMWI.getOperand(0)->getType());
Assert(PTy, "First atomicrmw operand must be a pointer.", &RMWI);
Type *ElTy = PTy->getElementType();
- Assert(ElTy->isIntegerTy(), "atomicrmw " +
- AtomicRMWInst::getOperationName(Op) +
- " operand must have integer type!",
- &RMWI, ElTy);
+ if (Op == AtomicRMWInst::Xchg) {
+ Assert(ElTy->isIntegerTy() || ElTy->isFloatingPointTy(), "atomicrmw " +
+ AtomicRMWInst::getOperationName(Op) +
+ " operand must have integer or floating point type!",
+ &RMWI, ElTy);
+ } else if (AtomicRMWInst::isFPOperation(Op)) {
+ Assert(ElTy->isFloatingPointTy(), "atomicrmw " +
+ AtomicRMWInst::getOperationName(Op) +
+ " operand must have floating point type!",
+ &RMWI, ElTy);
+ } else {
+ Assert(ElTy->isIntegerTy(), "atomicrmw " +
+ AtomicRMWInst::getOperationName(Op) +
+ " operand must have integer type!",
+ &RMWI, ElTy);
+ }
checkAtomicMemAccessSize(ElTy, &RMWI);
Assert(ElTy == RMWI.getOperand(1)->getType(),
"Argument value type does not match pointer operand type!", &RMWI,
@@ -3886,7 +3966,7 @@ void Verifier::verifyDominatesUse(Instruction &I, unsigned i) {
}
// Quick check whether the def has already been encountered in the same block.
- // PHI nodes are not checked to prevent accepting preceeding PHIs, because PHI
+ // PHI nodes are not checked to prevent accepting preceding PHIs, because PHI
// uses are defined to happen on the incoming edge, not at the instruction.
//
// FIXME: If this operand is a MetadataAsValue (wrapping a LocalAsMetadata)
@@ -3981,7 +4061,8 @@ void Verifier::visitInstruction(Instruction &I) {
F->getIntrinsicID() == Intrinsic::coro_destroy ||
F->getIntrinsicID() == Intrinsic::experimental_patchpoint_void ||
F->getIntrinsicID() == Intrinsic::experimental_patchpoint_i64 ||
- F->getIntrinsicID() == Intrinsic::experimental_gc_statepoint,
+ F->getIntrinsicID() == Intrinsic::experimental_gc_statepoint ||
+ F->getIntrinsicID() == Intrinsic::wasm_rethrow_in_catch,
"Cannot invoke an intrinsic other than donothing, patchpoint, "
"statepoint, coro_resume or coro_destroy",
&I);
@@ -4095,14 +4176,14 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
getIntrinsicInfoTableEntries(ID, Table);
ArrayRef<Intrinsic::IITDescriptor> TableRef = Table;
+ // Walk the descriptors to extract overloaded types.
SmallVector<Type *, 4> ArgTys;
- Assert(!Intrinsic::matchIntrinsicType(IFTy->getReturnType(),
- TableRef, ArgTys),
+ Intrinsic::MatchIntrinsicTypesResult Res =
+ Intrinsic::matchIntrinsicSignature(IFTy, TableRef, ArgTys);
+ Assert(Res != Intrinsic::MatchIntrinsicTypes_NoMatchRet,
"Intrinsic has incorrect return type!", IF);
- for (unsigned i = 0, e = IFTy->getNumParams(); i != e; ++i)
- Assert(!Intrinsic::matchIntrinsicType(IFTy->getParamType(i),
- TableRef, ArgTys),
- "Intrinsic has incorrect argument type!", IF);
+ Assert(Res != Intrinsic::MatchIntrinsicTypes_NoMatchArg,
+ "Intrinsic has incorrect argument type!", IF);
// Verify if the intrinsic call matches the vararg property.
if (IsVarArg)
@@ -4149,19 +4230,14 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
"an array");
break;
}
- case Intrinsic::ctlz: // llvm.ctlz
- case Intrinsic::cttz: // llvm.cttz
- Assert(isa<ConstantInt>(Call.getArgOperand(1)),
- "is_zero_undef argument of bit counting intrinsics must be a "
- "constant int",
- Call);
- break;
case Intrinsic::experimental_constrained_fadd:
case Intrinsic::experimental_constrained_fsub:
case Intrinsic::experimental_constrained_fmul:
case Intrinsic::experimental_constrained_fdiv:
case Intrinsic::experimental_constrained_frem:
case Intrinsic::experimental_constrained_fma:
+ case Intrinsic::experimental_constrained_fptrunc:
+ case Intrinsic::experimental_constrained_fpext:
case Intrinsic::experimental_constrained_sqrt:
case Intrinsic::experimental_constrained_pow:
case Intrinsic::experimental_constrained_powi:
@@ -4211,9 +4287,7 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
"alignment of arg 1 of memory intrinsic must be 0 or a power of 2",
Call);
}
- Assert(isa<ConstantInt>(Call.getArgOperand(3)),
- "isvolatile argument of memory intrinsics must be a constant int",
- Call);
+
break;
}
case Intrinsic::memcpy_element_unordered_atomic:
@@ -4222,11 +4296,7 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
const auto *AMI = cast<AtomicMemIntrinsic>(&Call);
ConstantInt *ElementSizeCI =
- dyn_cast<ConstantInt>(AMI->getRawElementSizeInBytes());
- Assert(ElementSizeCI,
- "element size of the element-wise unordered atomic memory "
- "intrinsic must be a constant int",
- Call);
+ cast<ConstantInt>(AMI->getRawElementSizeInBytes());
const APInt &ElementSizeVal = ElementSizeCI->getValue();
Assert(ElementSizeVal.isPowerOf2(),
"element size of the element-wise atomic memory intrinsic "
@@ -4281,28 +4351,14 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
Call);
break;
case Intrinsic::prefetch:
- Assert(isa<ConstantInt>(Call.getArgOperand(1)) &&
- isa<ConstantInt>(Call.getArgOperand(2)) &&
- cast<ConstantInt>(Call.getArgOperand(1))->getZExtValue() < 2 &&
- cast<ConstantInt>(Call.getArgOperand(2))->getZExtValue() < 4,
+ Assert(cast<ConstantInt>(Call.getArgOperand(1))->getZExtValue() < 2 &&
+ cast<ConstantInt>(Call.getArgOperand(2))->getZExtValue() < 4,
"invalid arguments to llvm.prefetch", Call);
break;
case Intrinsic::stackprotector:
Assert(isa<AllocaInst>(Call.getArgOperand(1)->stripPointerCasts()),
"llvm.stackprotector parameter #2 must resolve to an alloca.", Call);
break;
- case Intrinsic::lifetime_start:
- case Intrinsic::lifetime_end:
- case Intrinsic::invariant_start:
- Assert(isa<ConstantInt>(Call.getArgOperand(0)),
- "size argument of memory use markers must be a constant integer",
- Call);
- break;
- case Intrinsic::invariant_end:
- Assert(isa<ConstantInt>(Call.getArgOperand(1)),
- "llvm.invariant.end parameter #2 must be a constant integer", Call);
- break;
-
case Intrinsic::localescape: {
BasicBlock *BB = Call.getParent();
Assert(BB == &BB->getParent()->front(),
@@ -4327,9 +4383,7 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
"llvm.localrecover first "
"argument must be function defined in this module",
Call);
- auto *IdxArg = dyn_cast<ConstantInt>(Call.getArgOperand(2));
- Assert(IdxArg, "idx argument of llvm.localrecover must be a constant int",
- Call);
+ auto *IdxArg = cast<ConstantInt>(Call.getArgOperand(2));
auto &Entry = FrameEscapeInfo[Fn];
Entry.second = unsigned(
std::max(uint64_t(Entry.second), IdxArg->getLimitedValue(~0U) + 1));
@@ -4484,11 +4538,13 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
Call);
Value *Ptr = Call.getArgOperand(0);
- // Value *Alignment = Call.getArgOperand(1);
+ ConstantInt *Alignment = cast<ConstantInt>(Call.getArgOperand(1));
Value *Mask = Call.getArgOperand(2);
Value *PassThru = Call.getArgOperand(3);
Assert(Mask->getType()->isVectorTy(), "masked_load: mask must be vector",
Call);
+ Assert(Alignment->getValue().isPowerOf2(),
+ "masked_load: alignment must be a power of 2", Call);
// DataTy is the overloaded type
Type *DataTy = cast<PointerType>(Ptr->getType())->getElementType();
@@ -4504,10 +4560,12 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
case Intrinsic::masked_store: {
Value *Val = Call.getArgOperand(0);
Value *Ptr = Call.getArgOperand(1);
- // Value *Alignment = Call.getArgOperand(2);
+ ConstantInt *Alignment = cast<ConstantInt>(Call.getArgOperand(2));
Value *Mask = Call.getArgOperand(3);
Assert(Mask->getType()->isVectorTy(), "masked_store: mask must be vector",
Call);
+ Assert(Alignment->getValue().isPowerOf2(),
+ "masked_store: alignment must be a power of 2", Call);
// DataTy is the overloaded type
Type *DataTy = cast<PointerType>(Ptr->getType())->getElementType();
@@ -4563,22 +4621,41 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
"of ints");
break;
}
- case Intrinsic::smul_fix: {
+ case Intrinsic::smul_fix:
+ case Intrinsic::smul_fix_sat:
+ case Intrinsic::umul_fix: {
Value *Op1 = Call.getArgOperand(0);
Value *Op2 = Call.getArgOperand(1);
Assert(Op1->getType()->isIntOrIntVectorTy(),
- "first operand of smul_fix must be an int type or vector "
+ "first operand of [us]mul_fix[_sat] must be an int type or vector "
"of ints");
Assert(Op2->getType()->isIntOrIntVectorTy(),
- "second operand of smul_fix must be an int type or vector "
+ "second operand of [us]mul_fix_[sat] must be an int type or vector "
"of ints");
- auto *Op3 = dyn_cast<ConstantInt>(Call.getArgOperand(2));
- Assert(Op3, "third argument of smul_fix must be a constant integer");
+ auto *Op3 = cast<ConstantInt>(Call.getArgOperand(2));
Assert(Op3->getType()->getBitWidth() <= 32,
- "third argument of smul_fix must fit within 32 bits");
- Assert(Op3->getZExtValue() < Op1->getType()->getScalarSizeInBits(),
- "the scale of smul_fix must be less than the width of the operands");
+ "third argument of [us]mul_fix[_sat] must fit within 32 bits");
+
+ if (ID == Intrinsic::smul_fix || ID == Intrinsic::smul_fix_sat) {
+ Assert(
+ Op3->getZExtValue() < Op1->getType()->getScalarSizeInBits(),
+ "the scale of smul_fix[_sat] must be less than the width of the operands");
+ } else {
+ Assert(Op3->getZExtValue() <= Op1->getType()->getScalarSizeInBits(),
+ "the scale of umul_fix[_sat] must be less than or equal to the width of "
+ "the operands");
+ }
+ break;
+ }
+ case Intrinsic::lround:
+ case Intrinsic::llround:
+ case Intrinsic::lrint:
+ case Intrinsic::llrint: {
+ Type *ValTy = Call.getArgOperand(0)->getType();
+ Type *ResultTy = Call.getType();
+ Assert(!ValTy->isVectorTy() && !ResultTy->isVectorTy(),
+ "Intrinsic does not support vectors", &Call);
break;
}
};
@@ -4605,17 +4682,109 @@ static DISubprogram *getSubprogram(Metadata *LocalScope) {
void Verifier::visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI) {
unsigned NumOperands = FPI.getNumArgOperands();
- Assert(((NumOperands == 5 && FPI.isTernaryOp()) ||
- (NumOperands == 3 && FPI.isUnaryOp()) || (NumOperands == 4)),
- "invalid arguments for constrained FP intrinsic", &FPI);
- Assert(isa<MetadataAsValue>(FPI.getArgOperand(NumOperands-1)),
- "invalid exception behavior argument", &FPI);
- Assert(isa<MetadataAsValue>(FPI.getArgOperand(NumOperands-2)),
- "invalid rounding mode argument", &FPI);
- Assert(FPI.getRoundingMode() != ConstrainedFPIntrinsic::rmInvalid,
- "invalid rounding mode argument", &FPI);
- Assert(FPI.getExceptionBehavior() != ConstrainedFPIntrinsic::ebInvalid,
- "invalid exception behavior argument", &FPI);
+ bool HasExceptionMD = false;
+ bool HasRoundingMD = false;
+ switch (FPI.getIntrinsicID()) {
+ case Intrinsic::experimental_constrained_sqrt:
+ case Intrinsic::experimental_constrained_sin:
+ case Intrinsic::experimental_constrained_cos:
+ case Intrinsic::experimental_constrained_exp:
+ case Intrinsic::experimental_constrained_exp2:
+ case Intrinsic::experimental_constrained_log:
+ case Intrinsic::experimental_constrained_log10:
+ case Intrinsic::experimental_constrained_log2:
+ case Intrinsic::experimental_constrained_rint:
+ case Intrinsic::experimental_constrained_nearbyint:
+ case Intrinsic::experimental_constrained_ceil:
+ case Intrinsic::experimental_constrained_floor:
+ case Intrinsic::experimental_constrained_round:
+ case Intrinsic::experimental_constrained_trunc:
+ Assert((NumOperands == 3), "invalid arguments for constrained FP intrinsic",
+ &FPI);
+ HasExceptionMD = true;
+ HasRoundingMD = true;
+ break;
+
+ case Intrinsic::experimental_constrained_fma:
+ Assert((NumOperands == 5), "invalid arguments for constrained FP intrinsic",
+ &FPI);
+ HasExceptionMD = true;
+ HasRoundingMD = true;
+ break;
+
+ case Intrinsic::experimental_constrained_fadd:
+ case Intrinsic::experimental_constrained_fsub:
+ case Intrinsic::experimental_constrained_fmul:
+ case Intrinsic::experimental_constrained_fdiv:
+ case Intrinsic::experimental_constrained_frem:
+ case Intrinsic::experimental_constrained_pow:
+ case Intrinsic::experimental_constrained_powi:
+ case Intrinsic::experimental_constrained_maxnum:
+ case Intrinsic::experimental_constrained_minnum:
+ Assert((NumOperands == 4), "invalid arguments for constrained FP intrinsic",
+ &FPI);
+ HasExceptionMD = true;
+ HasRoundingMD = true;
+ break;
+
+ case Intrinsic::experimental_constrained_fptrunc:
+ case Intrinsic::experimental_constrained_fpext: {
+ if (FPI.getIntrinsicID() == Intrinsic::experimental_constrained_fptrunc) {
+ Assert((NumOperands == 3),
+ "invalid arguments for constrained FP intrinsic", &FPI);
+ HasRoundingMD = true;
+ } else {
+ Assert((NumOperands == 2),
+ "invalid arguments for constrained FP intrinsic", &FPI);
+ }
+ HasExceptionMD = true;
+
+ Value *Operand = FPI.getArgOperand(0);
+ Type *OperandTy = Operand->getType();
+ Value *Result = &FPI;
+ Type *ResultTy = Result->getType();
+ Assert(OperandTy->isFPOrFPVectorTy(),
+ "Intrinsic first argument must be FP or FP vector", &FPI);
+ Assert(ResultTy->isFPOrFPVectorTy(),
+ "Intrinsic result must be FP or FP vector", &FPI);
+ Assert(OperandTy->isVectorTy() == ResultTy->isVectorTy(),
+ "Intrinsic first argument and result disagree on vector use", &FPI);
+ if (OperandTy->isVectorTy()) {
+ auto *OperandVecTy = cast<VectorType>(OperandTy);
+ auto *ResultVecTy = cast<VectorType>(ResultTy);
+ Assert(OperandVecTy->getNumElements() == ResultVecTy->getNumElements(),
+ "Intrinsic first argument and result vector lengths must be equal",
+ &FPI);
+ }
+ if (FPI.getIntrinsicID() == Intrinsic::experimental_constrained_fptrunc) {
+ Assert(OperandTy->getScalarSizeInBits() > ResultTy->getScalarSizeInBits(),
+ "Intrinsic first argument's type must be larger than result type",
+ &FPI);
+ } else {
+ Assert(OperandTy->getScalarSizeInBits() < ResultTy->getScalarSizeInBits(),
+ "Intrinsic first argument's type must be smaller than result type",
+ &FPI);
+ }
+ }
+ break;
+
+ default:
+ llvm_unreachable("Invalid constrained FP intrinsic!");
+ }
+
+ // If a non-metadata argument is passed in a metadata slot then the
+ // error will be caught earlier when the incorrect argument doesn't
+ // match the specification in the intrinsic call table. Thus, no
+ // argument type check is needed here.
+
+ if (HasExceptionMD) {
+ Assert(FPI.getExceptionBehavior().hasValue(),
+ "invalid exception behavior argument", &FPI);
+ }
+ if (HasRoundingMD) {
+ Assert(FPI.getRoundingMode().hasValue(),
+ "invalid rounding mode argument", &FPI);
+ }
}
void Verifier::visitDbgIntrinsic(StringRef Kind, DbgVariableIntrinsic &DII) {
diff --git a/lib/IRReader/IRReader.cpp b/lib/IRReader/IRReader.cpp
index 36bbf719bb61..7ca6c2fca52a 100644
--- a/lib/IRReader/IRReader.cpp
+++ b/lib/IRReader/IRReader.cpp
@@ -1,9 +1,8 @@
//===---- IRReader.cpp - Reader for LLVM IR files -------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -30,9 +29,9 @@ static const char *const TimeIRParsingGroupDescription = "LLVM IR Parsing";
static const char *const TimeIRParsingName = "parse";
static const char *const TimeIRParsingDescription = "Parse IR";
-static std::unique_ptr<Module>
-getLazyIRModule(std::unique_ptr<MemoryBuffer> Buffer, SMDiagnostic &Err,
- LLVMContext &Context, bool ShouldLazyLoadMetadata) {
+std::unique_ptr<Module>
+llvm::getLazyIRModule(std::unique_ptr<MemoryBuffer> Buffer, SMDiagnostic &Err,
+ LLVMContext &Context, bool ShouldLazyLoadMetadata) {
if (isBitcode((const unsigned char *)Buffer->getBufferStart(),
(const unsigned char *)Buffer->getBufferEnd())) {
Expected<std::unique_ptr<Module>> ModuleOrErr = getOwningLazyBitcodeModule(
diff --git a/lib/LTO/Caching.cpp b/lib/LTO/Caching.cpp
index 089e77e742eb..000ab91dba7c 100644
--- a/lib/LTO/Caching.cpp
+++ b/lib/LTO/Caching.cpp
@@ -1,9 +1,8 @@
//===-Caching.cpp - LLVM Link Time Optimizer Cache Handling ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -39,21 +38,23 @@ Expected<NativeObjectCache> lto::localCache(StringRef CacheDirectoryPath,
SmallString<64> EntryPath;
sys::path::append(EntryPath, CacheDirectoryPath, "llvmcache-" + Key);
// First, see if we have a cache hit.
- int FD;
SmallString<64> ResultPath;
- std::error_code EC = sys::fs::openFileForRead(
- Twine(EntryPath), FD, sys::fs::OF_UpdateAtime, &ResultPath);
- if (!EC) {
+ Expected<sys::fs::file_t> FDOrErr = sys::fs::openNativeFileForRead(
+ Twine(EntryPath), sys::fs::OF_UpdateAtime, &ResultPath);
+ std::error_code EC;
+ if (FDOrErr) {
ErrorOr<std::unique_ptr<MemoryBuffer>> MBOrErr =
- MemoryBuffer::getOpenFile(FD, EntryPath,
- /*FileSize*/ -1,
- /*RequiresNullTerminator*/ false);
- close(FD);
+ MemoryBuffer::getOpenFile(*FDOrErr, EntryPath,
+ /*FileSize=*/-1,
+ /*RequiresNullTerminator=*/false);
+ sys::fs::closeFile(*FDOrErr);
if (MBOrErr) {
AddBuffer(Task, std::move(*MBOrErr));
return AddStreamFn();
}
EC = MBOrErr.getError();
+ } else {
+ EC = errorToErrorCode(FDOrErr.takeError());
}
// On Windows we can fail to open a cache file with a permission denied
@@ -87,9 +88,9 @@ Expected<NativeObjectCache> lto::localCache(StringRef CacheDirectoryPath,
// Open the file first to avoid racing with a cache pruner.
ErrorOr<std::unique_ptr<MemoryBuffer>> MBOrErr =
- MemoryBuffer::getOpenFile(TempFile.FD, TempFile.TmpName,
- /*FileSize*/ -1,
- /*RequiresNullTerminator*/ false);
+ MemoryBuffer::getOpenFile(
+ sys::fs::convertFDToNativeFile(TempFile.FD), TempFile.TmpName,
+ /*FileSize=*/-1, /*RequiresNullTerminator=*/false);
if (!MBOrErr)
report_fatal_error(Twine("Failed to open new cache file ") +
TempFile.TmpName + ": " +
diff --git a/lib/LTO/LTO.cpp b/lib/LTO/LTO.cpp
index 3a955060deaa..64506890956a 100644
--- a/lib/LTO/LTO.cpp
+++ b/lib/LTO/LTO.cpp
@@ -1,9 +1,8 @@
//===-LTO.cpp - LLVM Link Time Optimizer ----------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -21,9 +20,11 @@
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/AutoUpgrade.h"
#include "llvm/IR/DiagnosticPrinter.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Mangler.h"
#include "llvm/IR/Metadata.h"
+#include "llvm/IR/RemarkStreamer.h"
#include "llvm/LTO/LTOBackend.h"
#include "llvm/LTO/SummaryBasedOptimizations.h"
#include "llvm/Linker/IRMover.h"
@@ -186,12 +187,15 @@ void llvm::computeLTOCacheKey(
auto AddUsedThings = [&](GlobalValueSummary *GS) {
if (!GS) return;
AddUnsigned(GS->isLive());
+ AddUnsigned(GS->canAutoHide());
for (const ValueInfo &VI : GS->refs()) {
AddUnsigned(VI.isDSOLocal());
AddUsedCfiGlobal(VI.getGUID());
}
- if (auto *GVS = dyn_cast<GlobalVarSummary>(GS))
- AddUnsigned(GVS->isReadOnly());
+ if (auto *GVS = dyn_cast<GlobalVarSummary>(GS)) {
+ AddUnsigned(GVS->maybeReadOnly());
+ AddUnsigned(GVS->maybeWriteOnly());
+ }
if (auto *FS = dyn_cast<FunctionSummary>(GS)) {
for (auto &TT : FS->type_tests())
UsedTypeIds.insert(TT);
@@ -294,13 +298,13 @@ void llvm::computeLTOCacheKey(
}
static void thinLTOResolvePrevailingGUID(
- GlobalValueSummaryList &GVSummaryList, GlobalValue::GUID GUID,
- DenseSet<GlobalValueSummary *> &GlobalInvolvedWithAlias,
+ ValueInfo VI, DenseSet<GlobalValueSummary *> &GlobalInvolvedWithAlias,
function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
isPrevailing,
function_ref<void(StringRef, GlobalValue::GUID, GlobalValue::LinkageTypes)>
- recordNewLinkage) {
- for (auto &S : GVSummaryList) {
+ recordNewLinkage,
+ const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) {
+ for (auto &S : VI.getSummaryList()) {
GlobalValue::LinkageTypes OriginalLinkage = S->linkage();
// Ignore local and appending linkage values since the linker
// doesn't resolve them.
@@ -315,17 +319,29 @@ static void thinLTOResolvePrevailingGUID(
// ensure a copy is kept to satisfy the exported reference.
// FIXME: We may want to split the compile time and correctness
// aspects into separate routines.
- if (isPrevailing(GUID, S.get())) {
- if (GlobalValue::isLinkOnceLinkage(OriginalLinkage))
+ if (isPrevailing(VI.getGUID(), S.get())) {
+ if (GlobalValue::isLinkOnceLinkage(OriginalLinkage)) {
S->setLinkage(GlobalValue::getWeakLinkage(
GlobalValue::isLinkOnceODRLinkage(OriginalLinkage)));
+ // The kept copy is eligible for auto-hiding (hidden visibility) if all
+ // copies were (i.e. they were all linkonce_odr global unnamed addr).
+ // If any copy is not (e.g. it was originally weak_odr), then the symbol
+ // must remain externally available (e.g. a weak_odr from an explicitly
+ // instantiated template). Additionally, if it is in the
+ // GUIDPreservedSymbols set, that means that it is visibile outside
+ // the summary (e.g. in a native object or a bitcode file without
+ // summary), and in that case we cannot hide it as it isn't possible to
+ // check all copies.
+ S->setCanAutoHide(VI.canAutoHide() &&
+ !GUIDPreservedSymbols.count(VI.getGUID()));
+ }
}
// Alias and aliasee can't be turned into available_externally.
else if (!isa<AliasSummary>(S.get()) &&
!GlobalInvolvedWithAlias.count(S.get()))
S->setLinkage(GlobalValue::AvailableExternallyLinkage);
if (S->linkage() != OriginalLinkage)
- recordNewLinkage(S->modulePath(), GUID, S->linkage());
+ recordNewLinkage(S->modulePath(), VI.getGUID(), S->linkage());
}
}
@@ -340,7 +356,8 @@ void llvm::thinLTOResolvePrevailingInIndex(
function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
isPrevailing,
function_ref<void(StringRef, GlobalValue::GUID, GlobalValue::LinkageTypes)>
- recordNewLinkage) {
+ recordNewLinkage,
+ const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) {
// We won't optimize the globals that are referenced by an alias for now
// Ideally we should turn the alias into a global and duplicate the definition
// when needed.
@@ -351,9 +368,17 @@ void llvm::thinLTOResolvePrevailingInIndex(
GlobalInvolvedWithAlias.insert(&AS->getAliasee());
for (auto &I : Index)
- thinLTOResolvePrevailingGUID(I.second.SummaryList, I.first,
- GlobalInvolvedWithAlias, isPrevailing,
- recordNewLinkage);
+ thinLTOResolvePrevailingGUID(Index.getValueInfo(I), GlobalInvolvedWithAlias,
+ isPrevailing, recordNewLinkage,
+ GUIDPreservedSymbols);
+}
+
+static bool isWeakObjectWithRWAccess(GlobalValueSummary *GVS) {
+ if (auto *VarSummary = dyn_cast<GlobalVarSummary>(GVS->getBaseObject()))
+ return !VarSummary->maybeReadOnly() && !VarSummary->maybeWriteOnly() &&
+ (VarSummary->linkage() == GlobalValue::WeakODRLinkage ||
+ VarSummary->linkage() == GlobalValue::LinkOnceODRLinkage);
+ return false;
}
static void thinLTOInternalizeAndPromoteGUID(
@@ -370,7 +395,13 @@ static void thinLTOInternalizeAndPromoteGUID(
S->linkage() != GlobalValue::AppendingLinkage &&
// We can't internalize available_externally globals because this
// can break function pointer equality.
- S->linkage() != GlobalValue::AvailableExternallyLinkage)
+ S->linkage() != GlobalValue::AvailableExternallyLinkage &&
+ // Functions and read-only variables with linkonce_odr and
+ // weak_odr linkage can be internalized. We can't internalize
+ // linkonce_odr and weak_odr variables which are both modified
+ // and read somewhere in the program because reads and writes
+ // will become inconsistent.
+ !isWeakObjectWithRWAccess(S.get()))
S->setLinkage(GlobalValue::InternalLinkage);
}
}
@@ -397,6 +428,7 @@ Expected<std::unique_ptr<InputFile>> InputFile::create(MemoryBufferRef Object) {
File->TargetTriple = FOrErr->TheReader.getTargetTriple();
File->SourceFileName = FOrErr->TheReader.getSourceFileName();
File->COFFLinkerOpts = FOrErr->TheReader.getCOFFLinkerOpts();
+ File->DependentLibraries = FOrErr->TheReader.getDependentLibraries();
File->ComdatTable = FOrErr->TheReader.getComdatTable();
for (unsigned I = 0; I != FOrErr->Mods.size(); ++I) {
@@ -419,6 +451,11 @@ StringRef InputFile::getName() const {
return Mods[0].getModuleIdentifier();
}
+BitcodeModule &InputFile::getSingleBitcodeModule() {
+ assert(Mods.size() == 1 && "Expect only one bitcode module");
+ return Mods[0];
+}
+
LTO::RegularLTOState::RegularLTOState(unsigned ParallelCodeGenParallelismLevel,
Config &Conf)
: ParallelCodeGenParallelismLevel(ParallelCodeGenParallelismLevel),
@@ -809,6 +846,45 @@ unsigned LTO::getMaxTasks() const {
return RegularLTO.ParallelCodeGenParallelismLevel + ThinLTO.ModuleMap.size();
}
+// If only some of the modules were split, we cannot correctly handle
+// code that contains type tests or type checked loads.
+Error LTO::checkPartiallySplit() {
+ if (!ThinLTO.CombinedIndex.partiallySplitLTOUnits())
+ return Error::success();
+
+ Function *TypeTestFunc = RegularLTO.CombinedModule->getFunction(
+ Intrinsic::getName(Intrinsic::type_test));
+ Function *TypeCheckedLoadFunc = RegularLTO.CombinedModule->getFunction(
+ Intrinsic::getName(Intrinsic::type_checked_load));
+
+ // First check if there are type tests / type checked loads in the
+ // merged regular LTO module IR.
+ if ((TypeTestFunc && !TypeTestFunc->use_empty()) ||
+ (TypeCheckedLoadFunc && !TypeCheckedLoadFunc->use_empty()))
+ return make_error<StringError>(
+ "inconsistent LTO Unit splitting (recompile with -fsplit-lto-unit)",
+ inconvertibleErrorCode());
+
+ // Otherwise check if there are any recorded in the combined summary from the
+ // ThinLTO modules.
+ for (auto &P : ThinLTO.CombinedIndex) {
+ for (auto &S : P.second.SummaryList) {
+ auto *FS = dyn_cast<FunctionSummary>(S.get());
+ if (!FS)
+ continue;
+ if (!FS->type_test_assume_vcalls().empty() ||
+ !FS->type_checked_load_vcalls().empty() ||
+ !FS->type_test_assume_const_vcalls().empty() ||
+ !FS->type_checked_load_const_vcalls().empty() ||
+ !FS->type_tests().empty())
+ return make_error<StringError>(
+ "inconsistent LTO Unit splitting (recompile with -fsplit-lto-unit)",
+ inconvertibleErrorCode());
+ }
+ }
+ return Error::success();
+}
+
Error LTO::run(AddStreamFn AddStream, NativeObjectCache Cache) {
// Compute "dead" symbols, we don't want to import/export these!
DenseSet<GlobalValue::GUID> GUIDPreservedSymbols;
@@ -840,20 +916,25 @@ Error LTO::run(AddStreamFn AddStream, NativeObjectCache Cache) {
isPrevailing, Conf.OptLevel > 0);
// Setup output file to emit statistics.
- std::unique_ptr<ToolOutputFile> StatsFile = nullptr;
- if (!Conf.StatsFile.empty()) {
- EnableStatistics(false);
- std::error_code EC;
- StatsFile =
- llvm::make_unique<ToolOutputFile>(Conf.StatsFile, EC, sys::fs::F_None);
- if (EC)
- return errorCodeToError(EC);
- StatsFile->keep();
- }
+ auto StatsFileOrErr = setupStatsFile(Conf.StatsFile);
+ if (!StatsFileOrErr)
+ return StatsFileOrErr.takeError();
+ std::unique_ptr<ToolOutputFile> StatsFile = std::move(StatsFileOrErr.get());
+
+ // Finalize linking of regular LTO modules containing summaries now that
+ // we have computed liveness information.
+ for (auto &M : RegularLTO.ModsWithSummaries)
+ if (Error Err = linkRegularLTO(std::move(M),
+ /*LivenessFromIndex=*/true))
+ return Err;
+
+ // Ensure we don't have inconsistently split LTO units with type tests.
+ if (Error Err = checkPartiallySplit())
+ return Err;
Error Result = runRegularLTO(AddStream);
if (!Result)
- Result = runThinLTO(AddStream, Cache);
+ Result = runThinLTO(AddStream, Cache, GUIDPreservedSymbols);
if (StatsFile)
PrintStatisticsJSON(StatsFile->os());
@@ -862,11 +943,6 @@ Error LTO::run(AddStreamFn AddStream, NativeObjectCache Cache) {
}
Error LTO::runRegularLTO(AddStreamFn AddStream) {
- for (auto &M : RegularLTO.ModsWithSummaries)
- if (Error Err = linkRegularLTO(std::move(M),
- /*LivenessFromIndex=*/true))
- return Err;
-
// Make sure commons have the right size/alignment: we kept the largest from
// all the prevailing when adding the inputs, and we apply it here.
const DataLayout &DL = RegularLTO.CombinedModule->getDataLayout();
@@ -1161,7 +1237,8 @@ ThinBackend lto::createWriteIndexesThinBackend(
};
}
-Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache) {
+Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache,
+ const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) {
if (ThinLTO.ModuleMap.empty())
return Error::success();
@@ -1243,7 +1320,7 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache) {
ResolvedODR[ModuleIdentifier][GUID] = NewLinkage;
};
thinLTOResolvePrevailingInIndex(ThinLTO.CombinedIndex, isPrevailing,
- recordNewLinkage);
+ recordNewLinkage, GUIDPreservedSymbols);
std::unique_ptr<ThinBackendProc> BackendProc =
ThinLTO.Backend(Conf, ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries,
@@ -1264,25 +1341,37 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache) {
}
Expected<std::unique_ptr<ToolOutputFile>>
-lto::setupOptimizationRemarks(LLVMContext &Context,
- StringRef LTORemarksFilename,
- bool LTOPassRemarksWithHotness, int Count) {
- if (LTOPassRemarksWithHotness)
- Context.setDiagnosticsHotnessRequested(true);
- if (LTORemarksFilename.empty())
- return nullptr;
-
- std::string Filename = LTORemarksFilename;
- if (Count != -1)
+lto::setupOptimizationRemarks(LLVMContext &Context, StringRef RemarksFilename,
+ StringRef RemarksPasses, StringRef RemarksFormat,
+ bool RemarksWithHotness, int Count) {
+ std::string Filename = RemarksFilename;
+ if (!Filename.empty() && Count != -1)
Filename += ".thin." + llvm::utostr(Count) + ".yaml";
+ auto ResultOrErr = llvm::setupOptimizationRemarks(
+ Context, Filename, RemarksPasses, RemarksFormat, RemarksWithHotness);
+ if (Error E = ResultOrErr.takeError())
+ return std::move(E);
+
+ if (*ResultOrErr)
+ (*ResultOrErr)->keep();
+
+ return ResultOrErr;
+}
+
+Expected<std::unique_ptr<ToolOutputFile>>
+lto::setupStatsFile(StringRef StatsFilename) {
+ // Setup output file to emit statistics.
+ if (StatsFilename.empty())
+ return nullptr;
+
+ llvm::EnableStatistics(false);
std::error_code EC;
- auto DiagnosticFile =
- llvm::make_unique<ToolOutputFile>(Filename, EC, sys::fs::F_None);
+ auto StatsFile =
+ llvm::make_unique<ToolOutputFile>(StatsFilename, EC, sys::fs::F_None);
if (EC)
return errorCodeToError(EC);
- Context.setDiagnosticsOutputFile(
- llvm::make_unique<yaml::Output>(DiagnosticFile->os()));
- DiagnosticFile->keep();
- return std::move(DiagnosticFile);
+
+ StatsFile->keep();
+ return std::move(StatsFile);
}
diff --git a/lib/LTO/LTOBackend.cpp b/lib/LTO/LTOBackend.cpp
index 926c419e34a8..7456e7175163 100644
--- a/lib/LTO/LTOBackend.cpp
+++ b/lib/LTO/LTOBackend.cpp
@@ -1,9 +1,8 @@
//===-LTOBackend.cpp - LLVM Link Time Optimizer Backend -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -23,6 +22,7 @@
#include "llvm/Bitcode/BitcodeWriter.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/IR/RemarkStreamer.h"
#include "llvm/IR/Verifier.h"
#include "llvm/LTO/LTO.h"
#include "llvm/MC/SubtargetFeature.h"
@@ -33,9 +33,9 @@
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/Program.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/ThreadPool.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
@@ -155,10 +155,17 @@ static void runNewPMPasses(Config &Conf, Module &Mod, TargetMachine *TM,
const ModuleSummaryIndex *ImportSummary) {
Optional<PGOOptions> PGOOpt;
if (!Conf.SampleProfile.empty())
- PGOOpt = PGOOptions("", "", Conf.SampleProfile, Conf.ProfileRemapping,
- false, true);
+ PGOOpt = PGOOptions(Conf.SampleProfile, "", Conf.ProfileRemapping,
+ PGOOptions::SampleUse, PGOOptions::NoCSAction, true);
+ else if (Conf.RunCSIRInstr) {
+ PGOOpt = PGOOptions("", Conf.CSIRProfile, Conf.ProfileRemapping,
+ PGOOptions::IRUse, PGOOptions::CSIRInstr);
+ } else if (!Conf.CSIRProfile.empty()) {
+ PGOOpt = PGOOptions(Conf.CSIRProfile, "", Conf.ProfileRemapping,
+ PGOOptions::IRUse, PGOOptions::CSIRUse);
+ }
- PassBuilder PB(TM, PGOOpt);
+ PassBuilder PB(TM, PipelineTuningOptions(), PGOOpt);
AAManager AA;
// Parse a custom AA pipeline if asked to.
@@ -274,6 +281,11 @@ static void runOldPMPasses(Config &Conf, Module &Mod, TargetMachine *TM,
PMB.SLPVectorize = true;
PMB.OptLevel = Conf.OptLevel;
PMB.PGOSampleUse = Conf.SampleProfile;
+ PMB.EnablePGOCSInstrGen = Conf.RunCSIRInstr;
+ if (!Conf.RunCSIRInstr && !Conf.CSIRProfile.empty()) {
+ PMB.EnablePGOCSInstrUse = true;
+ PMB.PGOInstrUse = Conf.CSIRProfile;
+ }
if (IsThinLTO)
PMB.populateThinLTOPassManager(passes);
else
@@ -302,7 +314,7 @@ void codegen(Config &Conf, TargetMachine *TM, AddStreamFn AddStream,
return;
std::unique_ptr<ToolOutputFile> DwoOut;
- SmallString<1024> DwoFile(Conf.DwoPath);
+ SmallString<1024> DwoFile(Conf.SplitDwarfOutput);
if (!Conf.DwoDir.empty()) {
std::error_code EC;
if (auto EC = llvm::sys::fs::create_directories(Conf.DwoDir))
@@ -311,11 +323,12 @@ void codegen(Config &Conf, TargetMachine *TM, AddStreamFn AddStream,
DwoFile = Conf.DwoDir;
sys::path::append(DwoFile, std::to_string(Task) + ".dwo");
- }
+ TM->Options.MCOptions.SplitDwarfFile = DwoFile.str().str();
+ } else
+ TM->Options.MCOptions.SplitDwarfFile = Conf.SplitDwarfFile;
if (!DwoFile.empty()) {
std::error_code EC;
- TM->Options.MCOptions.SplitDwarfFile = DwoFile.str().str();
DwoOut = llvm::make_unique<ToolOutputFile>(DwoFile, EC, sys::fs::F_None);
if (EC)
report_fatal_error("Failed to open " + DwoFile + ": " + EC.message());
@@ -419,7 +432,8 @@ Error lto::backend(Config &C, AddStreamFn AddStream,
// Setup optimization remarks.
auto DiagFileOrErr = lto::setupOptimizationRemarks(
- Mod->getContext(), C.RemarksFilename, C.RemarksWithHotness);
+ Mod->getContext(), C.RemarksFilename, C.RemarksPasses, C.RemarksFormat,
+ C.RemarksWithHotness);
if (!DiagFileOrErr)
return DiagFileOrErr.takeError();
auto DiagnosticOutputFile = std::move(*DiagFileOrErr);
@@ -473,7 +487,8 @@ Error lto::thinBackend(Config &Conf, unsigned Task, AddStreamFn AddStream,
// Setup optimization remarks.
auto DiagFileOrErr = lto::setupOptimizationRemarks(
- Mod.getContext(), Conf.RemarksFilename, Conf.RemarksWithHotness, Task);
+ Mod.getContext(), Conf.RemarksFilename, Conf.RemarksPasses,
+ Conf.RemarksFormat, Conf.RemarksWithHotness, Task);
if (!DiagFileOrErr)
return DiagFileOrErr.takeError();
auto DiagnosticOutputFile = std::move(*DiagFileOrErr);
diff --git a/lib/LTO/LTOCodeGenerator.cpp b/lib/LTO/LTOCodeGenerator.cpp
index 3b63bbc7e256..6bb3bfaefc9c 100644
--- a/lib/LTO/LTOCodeGenerator.cpp
+++ b/lib/LTO/LTOCodeGenerator.cpp
@@ -1,9 +1,8 @@
//===-LTOCodeGenerator.cpp - LLVM Link Time Optimizer ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -34,6 +33,7 @@
#include "llvm/IR/Mangler.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassTimingInfo.h"
+#include "llvm/IR/RemarkStreamer.h"
#include "llvm/IR/Verifier.h"
#include "llvm/InitializePasses.h"
#include "llvm/LTO/LTO.h"
@@ -81,15 +81,31 @@ cl::opt<bool> LTODiscardValueNames(
#endif
cl::Hidden);
-cl::opt<std::string>
- LTORemarksFilename("lto-pass-remarks-output",
- cl::desc("Output filename for pass remarks"),
- cl::value_desc("filename"));
-
-cl::opt<bool> LTOPassRemarksWithHotness(
+cl::opt<bool> RemarksWithHotness(
"lto-pass-remarks-with-hotness",
cl::desc("With PGO, include profile count in optimization remarks"),
cl::Hidden);
+
+cl::opt<std::string>
+ RemarksFilename("lto-pass-remarks-output",
+ cl::desc("Output filename for pass remarks"),
+ cl::value_desc("filename"));
+
+cl::opt<std::string>
+ RemarksPasses("lto-pass-remarks-filter",
+ cl::desc("Only record optimization remarks from passes whose "
+ "names match the given regular expression"),
+ cl::value_desc("regex"));
+
+cl::opt<std::string> RemarksFormat(
+ "lto-pass-remarks-format",
+ cl::desc("The format used for serializing remarks (default: YAML)"),
+ cl::value_desc("format"), cl::init("yaml"));
+
+cl::opt<std::string> LTOStatsFile(
+ "lto-stats-file",
+ cl::desc("Save statistics to the specified file"),
+ cl::Hidden);
}
LTOCodeGenerator::LTOCodeGenerator(LLVMContext &Context)
@@ -120,6 +136,7 @@ void LTOCodeGenerator::initializeLTOPasses() {
initializeArgPromotionPass(R);
initializeJumpThreadingPass(R);
initializeSROALegacyPassPass(R);
+ initializeAttributorLegacyPassPass(R);
initializePostOrderFunctionAttrsLegacyPassPass(R);
initializeReversePostOrderFunctionAttrsLegacyPassPass(R);
initializeGlobalsAAWrapperPassPass(R);
@@ -505,14 +522,23 @@ bool LTOCodeGenerator::optimize(bool DisableVerify, bool DisableInline,
if (!this->determineTarget())
return false;
- auto DiagFileOrErr = lto::setupOptimizationRemarks(
- Context, LTORemarksFilename, LTOPassRemarksWithHotness);
+ auto DiagFileOrErr =
+ lto::setupOptimizationRemarks(Context, RemarksFilename, RemarksPasses,
+ RemarksFormat, RemarksWithHotness);
if (!DiagFileOrErr) {
errs() << "Error: " << toString(DiagFileOrErr.takeError()) << "\n";
report_fatal_error("Can't get an output file for the remarks");
}
DiagnosticOutputFile = std::move(*DiagFileOrErr);
+ // Setup output file to emit statistics.
+ auto StatsFileOrErr = lto::setupStatsFile(LTOStatsFile);
+ if (!StatsFileOrErr) {
+ errs() << "Error: " << toString(StatsFileOrErr.takeError()) << "\n";
+ report_fatal_error("Can't get an output file for the statistics");
+ }
+ StatsFile = std::move(StatsFileOrErr.get());
+
// We always run the verifier once on the merged module, the `DisableVerify`
// parameter only applies to subsequent verify.
verifyMergedModuleOnce();
@@ -579,9 +605,13 @@ bool LTOCodeGenerator::compileOptimized(ArrayRef<raw_pwrite_stream *> Out) {
[&]() { return createTargetMachine(); }, FileType,
ShouldRestoreGlobalsLinkage);
- // If statistics were requested, print them out after codegen.
- if (llvm::AreStatisticsEnabled())
- llvm::PrintStatistics();
+ // If statistics were requested, save them to the specified file or
+ // print them out after codegen.
+ if (StatsFile)
+ PrintStatisticsJSON(StatsFile->os());
+ else if (AreStatisticsEnabled())
+ PrintStatistics();
+
reportAndResetTimings();
finishOptimizationRemarks();
diff --git a/lib/LTO/LTOModule.cpp b/lib/LTO/LTOModule.cpp
index 0d40d49dbe39..7ffe7bf84ba8 100644
--- a/lib/LTO/LTOModule.cpp
+++ b/lib/LTO/LTOModule.cpp
@@ -1,9 +1,8 @@
//===-- LTOModule.cpp - LLVM Link Time Optimizer --------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -131,7 +130,8 @@ LTOModule::createFromOpenFileSlice(LLVMContext &Context, int fd, StringRef path,
size_t map_size, off_t offset,
const TargetOptions &options) {
ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
- MemoryBuffer::getOpenFileSlice(fd, path, map_size, offset);
+ MemoryBuffer::getOpenFileSlice(sys::fs::convertFDToNativeFile(fd), path,
+ map_size, offset);
if (std::error_code EC = BufferOrErr.getError()) {
Context.emitError(EC.message());
return EC;
@@ -646,6 +646,32 @@ void LTOModule::parseMetadata() {
continue;
emitLinkerFlagsForGlobalCOFF(OS, Sym.symbol, TT, M);
}
+}
+
+lto::InputFile *LTOModule::createInputFile(const void *buffer,
+ size_t buffer_size, const char *path,
+ std::string &outErr) {
+ StringRef Data((const char *)buffer, buffer_size);
+ MemoryBufferRef BufferRef(Data, path);
+
+ Expected<std::unique_ptr<lto::InputFile>> ObjOrErr =
+ lto::InputFile::create(BufferRef);
+
+ if (ObjOrErr)
+ return ObjOrErr->release();
+
+ outErr = std::string(path) +
+ ": Could not read LTO input file: " + toString(ObjOrErr.takeError());
+ return nullptr;
+}
+
+size_t LTOModule::getDependentLibraryCount(lto::InputFile *input) {
+ return input->getDependentLibraries().size();
+}
- // Add other interesting metadata here.
+const char *LTOModule::getDependentLibrary(lto::InputFile *input, size_t index,
+ size_t *size) {
+ StringRef S = input->getDependentLibraries()[index];
+ *size = S.size();
+ return S.data();
}
diff --git a/lib/LTO/SummaryBasedOptimizations.cpp b/lib/LTO/SummaryBasedOptimizations.cpp
index bcdd984daa58..e919fd530fb0 100644
--- a/lib/LTO/SummaryBasedOptimizations.cpp
+++ b/lib/LTO/SummaryBasedOptimizations.cpp
@@ -1,9 +1,8 @@
//==-SummaryBasedOptimizations.cpp - Optimizations based on ThinLTO summary-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/LTO/ThinLTOCodeGenerator.cpp b/lib/LTO/ThinLTOCodeGenerator.cpp
index d9ec68fe3eb5..1c52218836ca 100644
--- a/lib/LTO/ThinLTOCodeGenerator.cpp
+++ b/lib/LTO/ThinLTOCodeGenerator.cpp
@@ -1,9 +1,8 @@
//===-ThinLTOCodeGenerator.cpp - LLVM Link Time Optimizer -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -30,6 +29,7 @@
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Mangler.h"
#include "llvm/IR/PassTimingInfo.h"
+#include "llvm/IR/RemarkStreamer.h"
#include "llvm/IR/Verifier.h"
#include "llvm/IRReader/IRReader.h"
#include "llvm/LTO/LTO.h"
@@ -70,8 +70,10 @@ using namespace llvm;
namespace llvm {
// Flags -discard-value-names, defined in LTOCodeGenerator.cpp
extern cl::opt<bool> LTODiscardValueNames;
-extern cl::opt<std::string> LTORemarksFilename;
-extern cl::opt<bool> LTOPassRemarksWithHotness;
+extern cl::opt<std::string> RemarksFilename;
+extern cl::opt<std::string> RemarksPasses;
+extern cl::opt<bool> RemarksWithHotness;
+extern cl::opt<std::string> RemarksFormat;
}
namespace {
@@ -135,14 +137,13 @@ static void computePrevailingCopies(
}
}
-static StringMap<MemoryBufferRef>
-generateModuleMap(const std::vector<ThinLTOBuffer> &Modules) {
- StringMap<MemoryBufferRef> ModuleMap;
- for (auto &ModuleBuffer : Modules) {
- assert(ModuleMap.find(ModuleBuffer.getBufferIdentifier()) ==
- ModuleMap.end() &&
+static StringMap<lto::InputFile *>
+generateModuleMap(std::vector<std::unique_ptr<lto::InputFile>> &Modules) {
+ StringMap<lto::InputFile *> ModuleMap;
+ for (auto &M : Modules) {
+ assert(ModuleMap.find(M->getName()) == ModuleMap.end() &&
"Expect unique Buffer Identifier");
- ModuleMap[ModuleBuffer.getBufferIdentifier()] = ModuleBuffer.getMemBuffer();
+ ModuleMap[M->getName()] = M.get();
}
return ModuleMap;
}
@@ -175,18 +176,19 @@ static void verifyLoadedModule(Module &TheModule) {
}
}
-static std::unique_ptr<Module>
-loadModuleFromBuffer(const MemoryBufferRef &Buffer, LLVMContext &Context,
- bool Lazy, bool IsImporting) {
+static std::unique_ptr<Module> loadModuleFromInput(lto::InputFile *Input,
+ LLVMContext &Context,
+ bool Lazy,
+ bool IsImporting) {
+ auto &Mod = Input->getSingleBitcodeModule();
SMDiagnostic Err;
Expected<std::unique_ptr<Module>> ModuleOrErr =
- Lazy
- ? getLazyBitcodeModule(Buffer, Context,
- /* ShouldLazyLoadMetadata */ true, IsImporting)
- : parseBitcodeFile(Buffer, Context);
+ Lazy ? Mod.getLazyModule(Context,
+ /* ShouldLazyLoadMetadata */ true, IsImporting)
+ : Mod.parseModule(Context);
if (!ModuleOrErr) {
handleAllErrors(ModuleOrErr.takeError(), [&](ErrorInfoBase &EIB) {
- SMDiagnostic Err = SMDiagnostic(Buffer.getBufferIdentifier(),
+ SMDiagnostic Err = SMDiagnostic(Mod.getModuleIdentifier(),
SourceMgr::DK_Error, EIB.message());
Err.print("ThinLTO", errs());
});
@@ -194,16 +196,17 @@ loadModuleFromBuffer(const MemoryBufferRef &Buffer, LLVMContext &Context,
}
if (!Lazy)
verifyLoadedModule(*ModuleOrErr.get());
- return std::move(ModuleOrErr.get());
+ return std::move(*ModuleOrErr);
}
static void
crossImportIntoModule(Module &TheModule, const ModuleSummaryIndex &Index,
- StringMap<MemoryBufferRef> &ModuleMap,
+ StringMap<lto::InputFile*> &ModuleMap,
const FunctionImporter::ImportMapTy &ImportList) {
auto Loader = [&](StringRef Identifier) {
- return loadModuleFromBuffer(ModuleMap[Identifier], TheModule.getContext(),
- /*Lazy=*/true, /*IsImporting*/ true);
+ auto &Input = ModuleMap[Identifier];
+ return loadModuleFromInput(Input, TheModule.getContext(),
+ /*Lazy=*/true, /*IsImporting*/ true);
};
FunctionImporter Importer(Index, Loader);
@@ -248,6 +251,15 @@ static void optimizeModule(Module &TheModule, TargetMachine &TM,
PM.run(TheModule);
}
+static void
+addUsedSymbolToPreservedGUID(const lto::InputFile &File,
+ DenseSet<GlobalValue::GUID> &PreservedGUID) {
+ for (const auto &Sym : File.symbols()) {
+ if (Sym.isUsed())
+ PreservedGUID.insert(GlobalValue::getGUID(Sym.getIRName()));
+ }
+}
+
// Convert the PreservedSymbols map from "Name" based to "GUID" based.
static DenseSet<GlobalValue::GUID>
computeGUIDPreservedSymbols(const StringSet<> &PreservedSymbols,
@@ -337,17 +349,14 @@ public:
ErrorOr<std::unique_ptr<MemoryBuffer>> tryLoadingBuffer() {
if (EntryPath.empty())
return std::error_code();
- int FD;
SmallString<64> ResultPath;
- std::error_code EC = sys::fs::openFileForRead(
- Twine(EntryPath), FD, sys::fs::OF_UpdateAtime, &ResultPath);
- if (EC)
- return EC;
- ErrorOr<std::unique_ptr<MemoryBuffer>> MBOrErr =
- MemoryBuffer::getOpenFile(FD, EntryPath,
- /*FileSize*/ -1,
- /*RequiresNullTerminator*/ false);
- close(FD);
+ Expected<sys::fs::file_t> FDOrErr = sys::fs::openNativeFileForRead(
+ Twine(EntryPath), sys::fs::OF_UpdateAtime, &ResultPath);
+ if (!FDOrErr)
+ return errorToErrorCode(FDOrErr.takeError());
+ ErrorOr<std::unique_ptr<MemoryBuffer>> MBOrErr = MemoryBuffer::getOpenFile(
+ *FDOrErr, EntryPath, /*FileSize=*/-1, /*RequiresNullTerminator=*/false);
+ sys::fs::closeFile(*FDOrErr);
return MBOrErr;
}
@@ -381,7 +390,7 @@ public:
static std::unique_ptr<MemoryBuffer>
ProcessThinLTOModule(Module &TheModule, ModuleSummaryIndex &Index,
- StringMap<MemoryBufferRef> &ModuleMap, TargetMachine &TM,
+ StringMap<lto::InputFile *> &ModuleMap, TargetMachine &TM,
const FunctionImporter::ImportMapTy &ImportList,
const FunctionImporter::ExportSetTy &ExportList,
const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols,
@@ -447,7 +456,8 @@ ProcessThinLTOModule(Module &TheModule, ModuleSummaryIndex &Index,
static void resolvePrevailingInIndex(
ModuleSummaryIndex &Index,
StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>>
- &ResolvedODR) {
+ &ResolvedODR,
+ const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) {
DenseMap<GlobalValue::GUID, const GlobalValueSummary *> PrevailingCopy;
computePrevailingCopies(Index, PrevailingCopy);
@@ -466,7 +476,8 @@ static void resolvePrevailingInIndex(
ResolvedODR[ModuleIdentifier][GUID] = NewLinkage;
};
- thinLTOResolvePrevailingInIndex(Index, isPrevailing, recordNewLinkage);
+ thinLTOResolvePrevailingInIndex(Index, isPrevailing, recordNewLinkage,
+ GUIDPreservedSymbols);
}
// Initialize the TargetMachine builder for a given Triple
@@ -488,15 +499,14 @@ static void initTMBuilder(TargetMachineBuilder &TMBuilder,
} // end anonymous namespace
void ThinLTOCodeGenerator::addModule(StringRef Identifier, StringRef Data) {
- ThinLTOBuffer Buffer(Data, Identifier);
- LLVMContext Context;
- StringRef TripleStr;
- ErrorOr<std::string> TripleOrErr = expectedToErrorOrAndEmitErrors(
- Context, getBitcodeTargetTriple(Buffer.getMemBuffer()));
+ MemoryBufferRef Buffer(Data, Identifier);
- if (TripleOrErr)
- TripleStr = *TripleOrErr;
+ auto InputOrError = lto::InputFile::create(Buffer);
+ if (!InputOrError)
+ report_fatal_error("ThinLTO cannot create input file: " +
+ toString(InputOrError.takeError()));
+ auto TripleStr = (*InputOrError)->getTargetTriple();
Triple TheTriple(TripleStr);
if (Modules.empty())
@@ -508,7 +518,7 @@ void ThinLTOCodeGenerator::addModule(StringRef Identifier, StringRef Data) {
initTMBuilder(TMBuilder, Triple(TMBuilder.TheTriple.merge(TheTriple)));
}
- Modules.push_back(Buffer);
+ Modules.emplace_back(std::move(*InputOrError));
}
void ThinLTOCodeGenerator::preserveSymbol(StringRef Name) {
@@ -549,9 +559,10 @@ std::unique_ptr<ModuleSummaryIndex> ThinLTOCodeGenerator::linkCombinedIndex() {
std::unique_ptr<ModuleSummaryIndex> CombinedIndex =
llvm::make_unique<ModuleSummaryIndex>(/*HaveGVs=*/false);
uint64_t NextModuleId = 0;
- for (auto &ModuleBuffer : Modules) {
- if (Error Err = readModuleSummaryIndex(ModuleBuffer.getMemBuffer(),
- *CombinedIndex, NextModuleId++)) {
+ for (auto &Mod : Modules) {
+ auto &M = Mod->getSingleBitcodeModule();
+ if (Error Err =
+ M.readSummary(*CombinedIndex, Mod->getName(), NextModuleId++)) {
// FIXME diagnose
logAllUnhandledErrors(
std::move(Err), errs(),
@@ -593,8 +604,8 @@ static void computeDeadSymbolsInIndex(
* Perform promotion and renaming of exported internal functions.
* Index is updated to reflect linkage changes from weak resolution.
*/
-void ThinLTOCodeGenerator::promote(Module &TheModule,
- ModuleSummaryIndex &Index) {
+void ThinLTOCodeGenerator::promote(Module &TheModule, ModuleSummaryIndex &Index,
+ const lto::InputFile &File) {
auto ModuleCount = Index.modulePaths().size();
auto ModuleIdentifier = TheModule.getModuleIdentifier();
@@ -606,6 +617,9 @@ void ThinLTOCodeGenerator::promote(Module &TheModule,
auto GUIDPreservedSymbols = computeGUIDPreservedSymbols(
PreservedSymbols, Triple(TheModule.getTargetTriple()));
+ // Add used symbol to the preserved symbols.
+ addUsedSymbolToPreservedGUID(File, GUIDPreservedSymbols);
+
// Compute "dead" symbols, we don't want to import/export these!
computeDeadSymbolsInIndex(Index, GUIDPreservedSymbols);
@@ -617,7 +631,7 @@ void ThinLTOCodeGenerator::promote(Module &TheModule,
// Resolve prevailing symbols
StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>> ResolvedODR;
- resolvePrevailingInIndex(Index, ResolvedODR);
+ resolvePrevailingInIndex(Index, ResolvedODR, GUIDPreservedSymbols);
thinLTOResolvePrevailingInModule(
TheModule, ModuleToDefinedGVSummaries[ModuleIdentifier]);
@@ -633,7 +647,8 @@ void ThinLTOCodeGenerator::promote(Module &TheModule,
* Perform cross-module importing for the module identified by ModuleIdentifier.
*/
void ThinLTOCodeGenerator::crossModuleImport(Module &TheModule,
- ModuleSummaryIndex &Index) {
+ ModuleSummaryIndex &Index,
+ const lto::InputFile &File) {
auto ModuleMap = generateModuleMap(Modules);
auto ModuleCount = Index.modulePaths().size();
@@ -645,6 +660,8 @@ void ThinLTOCodeGenerator::crossModuleImport(Module &TheModule,
auto GUIDPreservedSymbols = computeGUIDPreservedSymbols(
PreservedSymbols, Triple(TheModule.getTargetTriple()));
+ addUsedSymbolToPreservedGUID(File, GUIDPreservedSymbols);
+
// Compute "dead" symbols, we don't want to import/export these!
computeDeadSymbolsInIndex(Index, GUIDPreservedSymbols);
@@ -663,7 +680,8 @@ void ThinLTOCodeGenerator::crossModuleImport(Module &TheModule,
*/
void ThinLTOCodeGenerator::gatherImportedSummariesForModule(
Module &TheModule, ModuleSummaryIndex &Index,
- std::map<std::string, GVSummaryMapTy> &ModuleToSummariesForIndex) {
+ std::map<std::string, GVSummaryMapTy> &ModuleToSummariesForIndex,
+ const lto::InputFile &File) {
auto ModuleCount = Index.modulePaths().size();
auto ModuleIdentifier = TheModule.getModuleIdentifier();
@@ -675,6 +693,8 @@ void ThinLTOCodeGenerator::gatherImportedSummariesForModule(
auto GUIDPreservedSymbols = computeGUIDPreservedSymbols(
PreservedSymbols, Triple(TheModule.getTargetTriple()));
+ addUsedSymbolToPreservedGUID(File, GUIDPreservedSymbols);
+
// Compute "dead" symbols, we don't want to import/export these!
computeDeadSymbolsInIndex(Index, GUIDPreservedSymbols);
@@ -693,7 +713,8 @@ void ThinLTOCodeGenerator::gatherImportedSummariesForModule(
* Emit the list of files needed for importing into module.
*/
void ThinLTOCodeGenerator::emitImports(Module &TheModule, StringRef OutputName,
- ModuleSummaryIndex &Index) {
+ ModuleSummaryIndex &Index,
+ const lto::InputFile &File) {
auto ModuleCount = Index.modulePaths().size();
auto ModuleIdentifier = TheModule.getModuleIdentifier();
@@ -705,6 +726,8 @@ void ThinLTOCodeGenerator::emitImports(Module &TheModule, StringRef OutputName,
auto GUIDPreservedSymbols = computeGUIDPreservedSymbols(
PreservedSymbols, Triple(TheModule.getTargetTriple()));
+ addUsedSymbolToPreservedGUID(File, GUIDPreservedSymbols);
+
// Compute "dead" symbols, we don't want to import/export these!
computeDeadSymbolsInIndex(Index, GUIDPreservedSymbols);
@@ -727,10 +750,12 @@ void ThinLTOCodeGenerator::emitImports(Module &TheModule, StringRef OutputName,
}
/**
- * Perform internalization. Index is updated to reflect linkage changes.
+ * Perform internalization. Runs promote and internalization together.
+ * Index is updated to reflect linkage changes.
*/
void ThinLTOCodeGenerator::internalize(Module &TheModule,
- ModuleSummaryIndex &Index) {
+ ModuleSummaryIndex &Index,
+ const lto::InputFile &File) {
initTMBuilder(TMBuilder, Triple(TheModule.getTargetTriple()));
auto ModuleCount = Index.modulePaths().size();
auto ModuleIdentifier = TheModule.getModuleIdentifier();
@@ -739,6 +764,8 @@ void ThinLTOCodeGenerator::internalize(Module &TheModule,
auto GUIDPreservedSymbols =
computeGUIDPreservedSymbols(PreservedSymbols, TMBuilder.TheTriple);
+ addUsedSymbolToPreservedGUID(File, GUIDPreservedSymbols);
+
// Collect for each module the list of function it defines (GUID -> Summary).
StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount);
Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
@@ -758,8 +785,20 @@ void ThinLTOCodeGenerator::internalize(Module &TheModule,
if (ExportList.empty() && GUIDPreservedSymbols.empty())
return;
- // Internalization
+ // Resolve prevailing symbols
+ StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>> ResolvedODR;
+ resolvePrevailingInIndex(Index, ResolvedODR, GUIDPreservedSymbols);
+
+ // Promote the exported values in the index, so that they are promoted
+ // in the module.
internalizeAndPromoteInIndex(ExportLists, GUIDPreservedSymbols, Index);
+
+ promoteModule(TheModule, Index);
+
+ // Internalization
+ thinLTOResolvePrevailingInModule(
+ TheModule, ModuleToDefinedGVSummaries[ModuleIdentifier]);
+
thinLTOInternalizeModule(TheModule,
ModuleToDefinedGVSummaries[ModuleIdentifier]);
}
@@ -777,11 +816,13 @@ void ThinLTOCodeGenerator::optimize(Module &TheModule) {
/// Write out the generated object file, either from CacheEntryPath or from
/// OutputBuffer, preferring hard-link when possible.
/// Returns the path to the generated file in SavedObjectsDirectoryPath.
-static std::string writeGeneratedObject(int count, StringRef CacheEntryPath,
- StringRef SavedObjectsDirectoryPath,
- const MemoryBuffer &OutputBuffer) {
+std::string
+ThinLTOCodeGenerator::writeGeneratedObject(int count, StringRef CacheEntryPath,
+ const MemoryBuffer &OutputBuffer) {
+ auto ArchName = TMBuilder.TheTriple.getArchName();
SmallString<128> OutputPath(SavedObjectsDirectoryPath);
- llvm::sys::path::append(OutputPath, Twine(count) + ".thinlto.o");
+ llvm::sys::path::append(OutputPath,
+ Twine(count) + "." + ArchName + ".thinlto.o");
OutputPath.c_str(); // Ensure the string is null terminated.
if (sys::fs::exists(OutputPath))
sys::fs::remove(OutputPath);
@@ -830,23 +871,22 @@ void ThinLTOCodeGenerator::run() {
// Perform only parallel codegen and return.
ThreadPool Pool;
int count = 0;
- for (auto &ModuleBuffer : Modules) {
+ for (auto &Mod : Modules) {
Pool.async([&](int count) {
LLVMContext Context;
Context.setDiscardValueNames(LTODiscardValueNames);
// Parse module now
- auto TheModule =
- loadModuleFromBuffer(ModuleBuffer.getMemBuffer(), Context, false,
- /*IsImporting*/ false);
+ auto TheModule = loadModuleFromInput(Mod.get(), Context, false,
+ /*IsImporting*/ false);
// CodeGen
auto OutputBuffer = codegenModule(*TheModule, *TMBuilder.create());
if (SavedObjectsDirectoryPath.empty())
ProducedBinaries[count] = std::move(OutputBuffer);
else
- ProducedBinaryFiles[count] = writeGeneratedObject(
- count, "", SavedObjectsDirectoryPath, *OutputBuffer);
+ ProducedBinaryFiles[count] =
+ writeGeneratedObject(count, "", *OutputBuffer);
}, count++);
}
@@ -881,6 +921,10 @@ void ThinLTOCodeGenerator::run() {
auto GUIDPreservedSymbols =
computeGUIDPreservedSymbols(PreservedSymbols, TMBuilder.TheTriple);
+ // Add used symbol from inputs to the preserved symbols.
+ for (const auto &M : Modules)
+ addUsedSymbolToPreservedGUID(*M, GUIDPreservedSymbols);
+
// Compute "dead" symbols, we don't want to import/export these!
computeDeadSymbolsInIndex(*Index, GUIDPreservedSymbols);
@@ -902,7 +946,7 @@ void ThinLTOCodeGenerator::run() {
// Resolve prevailing symbols, this has to be computed early because it
// impacts the caching.
- resolvePrevailingInIndex(*Index, ResolvedODR);
+ resolvePrevailingInIndex(*Index, ResolvedODR, GUIDPreservedSymbols);
// Use global summary-based analysis to identify symbols that can be
// internalized (because they aren't exported or preserved as per callback).
@@ -913,7 +957,7 @@ void ThinLTOCodeGenerator::run() {
// GVSummary and ResolvedODR maps to enable threaded access to these maps
// below.
for (auto &Module : Modules) {
- auto ModuleIdentifier = Module.getBufferIdentifier();
+ auto ModuleIdentifier = Module->getName();
ExportLists[ModuleIdentifier];
ImportLists[ModuleIdentifier];
ResolvedODR[ModuleIdentifier];
@@ -927,8 +971,10 @@ void ThinLTOCodeGenerator::run() {
ModulesOrdering.resize(Modules.size());
std::iota(ModulesOrdering.begin(), ModulesOrdering.end(), 0);
llvm::sort(ModulesOrdering, [&](int LeftIndex, int RightIndex) {
- auto LSize = Modules[LeftIndex].getBuffer().size();
- auto RSize = Modules[RightIndex].getBuffer().size();
+ auto LSize =
+ Modules[LeftIndex]->getSingleBitcodeModule().getBuffer().size();
+ auto RSize =
+ Modules[RightIndex]->getSingleBitcodeModule().getBuffer().size();
return LSize > RSize;
});
@@ -936,9 +982,9 @@ void ThinLTOCodeGenerator::run() {
{
ThreadPool Pool(ThreadCount);
for (auto IndexCount : ModulesOrdering) {
- auto &ModuleBuffer = Modules[IndexCount];
+ auto &Mod = Modules[IndexCount];
Pool.async([&](int count) {
- auto ModuleIdentifier = ModuleBuffer.getBufferIdentifier();
+ auto ModuleIdentifier = Mod->getName();
auto &ExportList = ExportLists[ModuleIdentifier];
auto &DefinedGVSummaries = ModuleToDefinedGVSummaries[ModuleIdentifier];
@@ -963,8 +1009,7 @@ void ThinLTOCodeGenerator::run() {
ProducedBinaries[count] = std::move(ErrOrBuffer.get());
else
ProducedBinaryFiles[count] = writeGeneratedObject(
- count, CacheEntryPath, SavedObjectsDirectoryPath,
- *ErrOrBuffer.get());
+ count, CacheEntryPath, *ErrOrBuffer.get());
return;
}
}
@@ -973,7 +1018,8 @@ void ThinLTOCodeGenerator::run() {
Context.setDiscardValueNames(LTODiscardValueNames);
Context.enableDebugTypeODRUniquing();
auto DiagFileOrErr = lto::setupOptimizationRemarks(
- Context, LTORemarksFilename, LTOPassRemarksWithHotness, count);
+ Context, RemarksFilename, RemarksPasses, RemarksFormat,
+ RemarksWithHotness, count);
if (!DiagFileOrErr) {
errs() << "Error: " << toString(DiagFileOrErr.takeError()) << "\n";
report_fatal_error("ThinLTO: Can't get an output file for the "
@@ -981,9 +1027,8 @@ void ThinLTOCodeGenerator::run() {
}
// Parse module now
- auto TheModule =
- loadModuleFromBuffer(ModuleBuffer.getMemBuffer(), Context, false,
- /*IsImporting*/ false);
+ auto TheModule = loadModuleFromInput(Mod.get(), Context, false,
+ /*IsImporting*/ false);
// Save temps: original file.
saveTempBitcode(*TheModule, SaveTempsDir, count, ".0.original.bc");
@@ -1021,7 +1066,7 @@ void ThinLTOCodeGenerator::run() {
return;
}
ProducedBinaryFiles[count] = writeGeneratedObject(
- count, CacheEntryPath, SavedObjectsDirectoryPath, *OutputBuffer);
+ count, CacheEntryPath, *OutputBuffer);
}, IndexCount);
}
}
diff --git a/lib/LTO/UpdateCompilerUsed.cpp b/lib/LTO/UpdateCompilerUsed.cpp
index 00482dee6e10..6434f902088d 100644
--- a/lib/LTO/UpdateCompilerUsed.cpp
+++ b/lib/LTO/UpdateCompilerUsed.cpp
@@ -1,9 +1,8 @@
//==-LTOInternalize.cpp - LLVM Link Time Optimizer Internalization Utility -==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/LineEditor/LineEditor.cpp b/lib/LineEditor/LineEditor.cpp
index 533a928b2dfd..57e62bd64871 100644
--- a/lib/LineEditor/LineEditor.cpp
+++ b/lib/LineEditor/LineEditor.cpp
@@ -1,9 +1,8 @@
//===-- LineEditor.cpp - line editor --------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Linker/IRMover.cpp b/lib/Linker/IRMover.cpp
index afbc57abfcc0..37515d93ed50 100644
--- a/lib/Linker/IRMover.cpp
+++ b/lib/Linker/IRMover.cpp
@@ -1,9 +1,8 @@
//===- lib/Linker/IRMover.cpp ---------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -403,6 +402,7 @@ class IRLinker {
DenseSet<GlobalValue *> ValuesToLink;
std::vector<GlobalValue *> Worklist;
+ std::vector<std::pair<GlobalValue *, Value*>> RAUWWorklist;
void maybeAdd(GlobalValue *GV) {
if (ValuesToLink.insert(GV).second)
@@ -489,12 +489,24 @@ class IRLinker {
void linkAliasBody(GlobalAlias &Dst, GlobalAlias &Src);
Error linkGlobalValueBody(GlobalValue &Dst, GlobalValue &Src);
+ /// Replace all types in the source AttributeList with the
+ /// corresponding destination type.
+ AttributeList mapAttributeTypes(LLVMContext &C, AttributeList Attrs);
+
/// Functions that take care of cloning a specific global value type
/// into the destination module.
GlobalVariable *copyGlobalVariableProto(const GlobalVariable *SGVar);
Function *copyFunctionProto(const Function *SF);
GlobalValue *copyGlobalAliasProto(const GlobalAlias *SGA);
+ /// Perform "replace all uses with" operations. These work items need to be
+ /// performed as part of materialization, but we postpone them to happen after
+ /// materialization is done. The materializer called by ValueMapper is not
+ /// expected to delete constants, as ValueMapper is holding pointers to some
+ /// of them, but constant destruction may be indirectly triggered by RAUW.
+ /// Hence, the need to move this out of the materialization call chain.
+ void flushRAUWWorklist();
+
/// When importing for ThinLTO, prevent importing of types listed on
/// the DICompileUnit that we don't need a copy of in the importing
/// module.
@@ -620,6 +632,21 @@ GlobalVariable *IRLinker::copyGlobalVariableProto(const GlobalVariable *SGVar) {
return NewDGV;
}
+AttributeList IRLinker::mapAttributeTypes(LLVMContext &C, AttributeList Attrs) {
+ for (unsigned i = 0; i < Attrs.getNumAttrSets(); ++i) {
+ if (Attrs.hasAttribute(i, Attribute::ByVal)) {
+ Type *Ty = Attrs.getAttribute(i, Attribute::ByVal).getValueAsType();
+ if (!Ty)
+ continue;
+
+ Attrs = Attrs.removeAttribute(C, i, Attribute::ByVal);
+ Attrs = Attrs.addAttribute(
+ C, i, Attribute::getWithByValType(C, TypeMap.get(Ty)));
+ }
+ }
+ return Attrs;
+}
+
/// Link the function in the source module into the destination module if
/// needed, setting up mapping information.
Function *IRLinker::copyFunctionProto(const Function *SF) {
@@ -629,6 +656,7 @@ Function *IRLinker::copyFunctionProto(const Function *SF) {
Function::Create(TypeMap.get(SF->getFunctionType()),
GlobalValue::ExternalLinkage, SF->getName(), &DstM);
F->copyAttributesFrom(SF);
+ F->setAttributes(mapAttributeTypes(F->getContext(), F->getAttributes()));
return F;
}
@@ -884,8 +912,8 @@ IRLinker::linkAppendingVarProto(GlobalVariable *DstGV,
// Replace any uses of the two global variables with uses of the new
// global.
if (DstGV) {
- DstGV->replaceAllUsesWith(ConstantExpr::getBitCast(NG, DstGV->getType()));
- DstGV->eraseFromParent();
+ RAUWWorklist.push_back(
+ std::make_pair(DstGV, ConstantExpr::getBitCast(NG, DstGV->getType())));
}
return Ret;
@@ -984,9 +1012,12 @@ Expected<Constant *> IRLinker::linkGlobalValueProto(GlobalValue *SGV,
}
if (DGV && NewGV != DGV) {
- DGV->replaceAllUsesWith(
- ConstantExpr::getPointerBitCastOrAddrSpaceCast(NewGV, DGV->getType()));
- DGV->eraseFromParent();
+ // Schedule "replace all uses with" to happen after materializing is
+ // done. It is not safe to do it now, since ValueMapper may be holding
+ // pointers to constants that will get deleted if RAUW runs.
+ RAUWWorklist.push_back(std::make_pair(
+ DGV,
+ ConstantExpr::getPointerBitCastOrAddrSpaceCast(NewGV, DGV->getType())));
}
return C;
@@ -1044,6 +1075,18 @@ Error IRLinker::linkGlobalValueBody(GlobalValue &Dst, GlobalValue &Src) {
return Error::success();
}
+void IRLinker::flushRAUWWorklist() {
+ for (const auto Elem : RAUWWorklist) {
+ GlobalValue *Old;
+ Value *New;
+ std::tie(Old, New) = Elem;
+
+ Old->replaceAllUsesWith(New);
+ Old->eraseFromParent();
+ }
+ RAUWWorklist.clear();
+}
+
void IRLinker::prepareCompileUnitsForImport() {
NamedMDNode *SrcCompileUnits = SrcM->getNamedMetadata("llvm.dbg.cu");
if (!SrcCompileUnits)
@@ -1200,7 +1243,9 @@ Error IRLinker::linkModuleFlagsMetadata() {
if (SrcBehaviorValue == Module::Override &&
SrcOp->getOperand(2) != DstOp->getOperand(2))
return stringErr("linking module flags '" + ID->getString() +
- "': IDs have conflicting override values");
+ "': IDs have conflicting override values in '" +
+ SrcM->getModuleIdentifier() + "' and '" +
+ DstM.getModuleIdentifier() + "'");
continue;
} else if (SrcBehaviorValue == Module::Override) {
// Update the destination flag to that of the source.
@@ -1211,7 +1256,9 @@ Error IRLinker::linkModuleFlagsMetadata() {
// Diagnose inconsistent merge behavior types.
if (SrcBehaviorValue != DstBehaviorValue)
return stringErr("linking module flags '" + ID->getString() +
- "': IDs have conflicting behaviors");
+ "': IDs have conflicting behaviors in '" +
+ SrcM->getModuleIdentifier() + "' and '" +
+ DstM.getModuleIdentifier() + "'");
auto replaceDstValue = [&](MDNode *New) {
Metadata *FlagOps[] = {DstOp->getOperand(0), ID, New};
@@ -1229,7 +1276,9 @@ Error IRLinker::linkModuleFlagsMetadata() {
// Emit an error if the values differ.
if (SrcOp->getOperand(2) != DstOp->getOperand(2))
return stringErr("linking module flags '" + ID->getString() +
- "': IDs have conflicting values");
+ "': IDs have conflicting values in '" +
+ SrcM->getModuleIdentifier() + "' and '" +
+ DstM.getModuleIdentifier() + "'");
continue;
}
case Module::Warning: {
@@ -1369,6 +1418,7 @@ Error IRLinker::run() {
Mapper.mapValue(*GV);
if (FoundError)
return std::move(*FoundError);
+ flushRAUWWorklist();
}
// Note that we are done linking global value bodies. This prevents
diff --git a/lib/Linker/LinkDiagnosticInfo.h b/lib/Linker/LinkDiagnosticInfo.h
index d91f19c69aac..30c16abaf509 100644
--- a/lib/Linker/LinkDiagnosticInfo.h
+++ b/lib/Linker/LinkDiagnosticInfo.h
@@ -1,9 +1,8 @@
//===- LinkDiagnosticInfo.h -------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp
index 25f31a3401a6..a18f4cc25bcc 100644
--- a/lib/Linker/LinkModules.cpp
+++ b/lib/Linker/LinkModules.cpp
@@ -1,9 +1,8 @@
//===- lib/Linker/LinkModules.cpp - Module Linker Implementation ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/MC/ConstantPools.cpp b/lib/MC/ConstantPools.cpp
index 18277a225640..8cba6b3281a5 100644
--- a/lib/MC/ConstantPools.cpp
+++ b/lib/MC/ConstantPools.cpp
@@ -1,9 +1,8 @@
//===- ConstantPools.cpp - ConstantPool class -----------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp
index 89f3b30cddd6..2c68723a12f8 100644
--- a/lib/MC/ELFObjectWriter.cpp
+++ b/lib/MC/ELFObjectWriter.cpp
@@ -1,9 +1,8 @@
//===- lib/MC/ELFObjectWriter.cpp - ELF File Writer -----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -426,7 +425,8 @@ void ELFWriter::writeHeader(const MCAssembler &Asm) {
W.OS << char(ELF::EV_CURRENT); // e_ident[EI_VERSION]
// e_ident[EI_OSABI]
W.OS << char(OWriter.TargetObjectWriter->getOSABI());
- W.OS << char(0); // e_ident[EI_ABIVERSION]
+ // e_ident[EI_ABIVERSION]
+ W.OS << char(OWriter.TargetObjectWriter->getABIVersion());
W.OS.write_zeros(ELF::EI_NIDENT - ELF::EI_PAD);
@@ -463,7 +463,7 @@ void ELFWriter::writeHeader(const MCAssembler &Asm) {
uint64_t ELFWriter::SymbolValue(const MCSymbol &Sym,
const MCAsmLayout &Layout) {
- if (Sym.isCommon() && Sym.isExternal())
+ if (Sym.isCommon() && (Sym.isTargetCommon() || Sym.isExternal()))
return Sym.getCommonAlignment();
uint64_t Res;
@@ -577,6 +577,10 @@ bool ELFWriter::isInSymtab(const MCAsmLayout &Layout, const MCSymbolELF &Symbol,
bool Used, bool Renamed) {
if (Symbol.isVariable()) {
const MCExpr *Expr = Symbol.getVariableValue();
+ // Target Expressions that are always inlined do not appear in the symtab
+ if (const auto *T = dyn_cast<MCTargetExpr>(Expr))
+ if (T->inlineAssignedExpr())
+ return false;
if (const MCSymbolRefExpr *Ref = dyn_cast<MCSymbolRefExpr>(Expr)) {
if (Ref->getKind() == MCSymbolRefExpr::VK_WEAKREF)
return false;
@@ -656,8 +660,12 @@ void ELFWriter::computeSymbolTable(
if (Symbol.isAbsolute()) {
MSD.SectionIndex = ELF::SHN_ABS;
} else if (Symbol.isCommon()) {
- assert(!Local);
- MSD.SectionIndex = ELF::SHN_COMMON;
+ if (Symbol.isTargetCommon()) {
+ MSD.SectionIndex = Symbol.getIndex();
+ } else {
+ assert(!Local);
+ MSD.SectionIndex = ELF::SHN_COMMON;
+ }
} else if (Symbol.isUndefined()) {
if (isSignature && !Used) {
MSD.SectionIndex = RevGroupMap.lookup(&Symbol);
@@ -710,7 +718,7 @@ void ELFWriter::computeSymbolTable(
if (HasLargeSectionIndex) {
MCSectionELF *SymtabShndxSection =
- Ctx.getELFSection(".symtab_shndxr", ELF::SHT_SYMTAB_SHNDX, 0, 4, "");
+ Ctx.getELFSection(".symtab_shndx", ELF::SHT_SYMTAB_SHNDX, 0, 4, "");
SymtabShndxSectionIndex = addToSectionTable(SymtabShndxSection);
SymtabShndxSection->setAlignment(4);
}
@@ -882,12 +890,16 @@ void ELFWriter::writeSectionData(const MCAssembler &Asm, MCSection &Sec,
return;
}
- if (ZlibStyle)
+ if (ZlibStyle) {
// Set the compressed flag. That is zlib style.
Section.setFlags(Section.getFlags() | ELF::SHF_COMPRESSED);
- else
+ // Alignment field should reflect the requirements of
+ // the compressed section header.
+ Section.setAlignment(is64Bit() ? 8 : 4);
+ } else {
// Add "z" prefix to section name. This is zlib-gnu style.
MC.renameELFSection(&Section, (".z" + SectionName.drop_front(1)).str());
+ }
W.OS << CompressedContents;
}
@@ -1271,18 +1283,25 @@ void ELFObjectWriter::executePostLayoutBinding(MCAssembler &Asm,
// This is the first place we are able to copy this information.
Alias->setExternal(Symbol.isExternal());
Alias->setBinding(Symbol.getBinding());
+ Alias->setOther(Symbol.getOther());
if (!Symbol.isUndefined() && !Rest.startswith("@@@"))
continue;
- // FIXME: produce a better error message.
+ // FIXME: Get source locations for these errors or diagnose them earlier.
if (Symbol.isUndefined() && Rest.startswith("@@") &&
- !Rest.startswith("@@@"))
- report_fatal_error("A @@ version cannot be undefined");
+ !Rest.startswith("@@@")) {
+ Asm.getContext().reportError(SMLoc(), "versioned symbol " + AliasName +
+ " must be defined");
+ continue;
+ }
- if (Renames.count(&Symbol) && Renames[&Symbol] != Alias)
- report_fatal_error(llvm::Twine("Multiple symbol versions defined for ") +
- Symbol.getName());
+ if (Renames.count(&Symbol) && Renames[&Symbol] != Alias) {
+ Asm.getContext().reportError(
+ SMLoc(), llvm::Twine("multiple symbol versions defined for ") +
+ Symbol.getName());
+ continue;
+ }
Renames.insert(std::make_pair(&Symbol, Alias));
}
@@ -1358,6 +1377,12 @@ bool ELFObjectWriter::shouldRelocateWithSymbol(const MCAssembler &Asm,
return true;
}
+ // Keep symbol type for a local ifunc because it may result in an IRELATIVE
+ // reloc that the dynamic loader will use to resolve the address at startup
+ // time.
+ if (Sym->getType() == ELF::STT_GNU_IFUNC)
+ return true;
+
// If a relocation points to a mergeable section, we have to be careful.
// If the offset is zero, a relocation with the section will encode the
// same information. With a non-zero offset, the situation is different.
diff --git a/lib/MC/MCAsmBackend.cpp b/lib/MC/MCAsmBackend.cpp
index 92d3a8a2645f..9b1102cbe7d1 100644
--- a/lib/MC/MCAsmBackend.cpp
+++ b/lib/MC/MCAsmBackend.cpp
@@ -1,9 +1,8 @@
//===- MCAsmBackend.cpp - Target MC Assembly Backend ----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -17,6 +16,7 @@
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCWasmObjectWriter.h"
#include "llvm/MC/MCWinCOFFObjectWriter.h"
+#include "llvm/MC/MCXCOFFObjectWriter.h"
#include <cassert>
#include <cstddef>
#include <cstdint>
@@ -44,6 +44,9 @@ MCAsmBackend::createObjectWriter(raw_pwrite_stream &OS) const {
case Triple::Wasm:
return createWasmObjectWriter(cast<MCWasmObjectTargetWriter>(std::move(TW)),
OS);
+ case Triple::XCOFF:
+ return createXCOFFObjectWriter(
+ cast<MCXCOFFObjectTargetWriter>(std::move(TW)), OS);
default:
llvm_unreachable("unexpected object format");
}
@@ -65,6 +68,7 @@ Optional<MCFixupKind> MCAsmBackend::getFixupKind(StringRef Name) const {
const MCFixupKindInfo &MCAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
static const MCFixupKindInfo Builtins[] = {
+ {"FK_NONE", 0, 0, 0},
{"FK_Data_1", 0, 8, 0},
{"FK_Data_2", 0, 16, 0},
{"FK_Data_4", 0, 32, 0},
diff --git a/lib/MC/MCAsmInfo.cpp b/lib/MC/MCAsmInfo.cpp
index 30f22d2d68f4..71e51e320f8b 100644
--- a/lib/MC/MCAsmInfo.cpp
+++ b/lib/MC/MCAsmInfo.cpp
@@ -1,9 +1,8 @@
//===- MCAsmInfo.cpp - Asm Info -------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -71,6 +70,10 @@ MCAsmInfo::MCAsmInfo() {
MCAsmInfo::~MCAsmInfo() = default;
+void MCAsmInfo::addInitialFrameState(const MCCFIInstruction &Inst) {
+ InitialFrameState.push_back(Inst);
+}
+
bool MCAsmInfo::isSectionAtomizableBySymbols(const MCSection &Section) const {
return false;
}
diff --git a/lib/MC/MCAsmInfoCOFF.cpp b/lib/MC/MCAsmInfoCOFF.cpp
index 15886eb619b9..9f19d163f57b 100644
--- a/lib/MC/MCAsmInfoCOFF.cpp
+++ b/lib/MC/MCAsmInfoCOFF.cpp
@@ -1,9 +1,8 @@
//===- MCAsmInfoCOFF.cpp - COFF asm properties ----------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/MC/MCAsmInfoDarwin.cpp b/lib/MC/MCAsmInfoDarwin.cpp
index c74840982fb7..62bc5b8c9418 100644
--- a/lib/MC/MCAsmInfoDarwin.cpp
+++ b/lib/MC/MCAsmInfoDarwin.cpp
@@ -1,9 +1,8 @@
//===- MCAsmInfoDarwin.cpp - Darwin asm properties ------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/MC/MCAsmInfoELF.cpp b/lib/MC/MCAsmInfoELF.cpp
index b0dc43c6c868..a5e8aff7f129 100644
--- a/lib/MC/MCAsmInfoELF.cpp
+++ b/lib/MC/MCAsmInfoELF.cpp
@@ -1,9 +1,8 @@
//===- MCAsmInfoELF.cpp - ELF asm properties ------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/MC/MCAsmInfoWasm.cpp b/lib/MC/MCAsmInfoWasm.cpp
index d448664baa14..ce6ec7ef211e 100644
--- a/lib/MC/MCAsmInfoWasm.cpp
+++ b/lib/MC/MCAsmInfoWasm.cpp
@@ -1,9 +1,8 @@
//===-- MCAsmInfoWasm.cpp - Wasm asm properties -----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -19,6 +18,7 @@ void MCAsmInfoWasm::anchor() {}
MCAsmInfoWasm::MCAsmInfoWasm() {
HasIdentDirective = true;
+ HasNoDeadStrip = true;
WeakRefDirective = "\t.weak\t";
PrivateGlobalPrefix = ".L";
PrivateLabelPrefix = ".L";
diff --git a/lib/MC/MCAsmInfoXCOFF.cpp b/lib/MC/MCAsmInfoXCOFF.cpp
new file mode 100644
index 000000000000..74c21f0c9e6d
--- /dev/null
+++ b/lib/MC/MCAsmInfoXCOFF.cpp
@@ -0,0 +1,18 @@
+//===- MC/MCAsmInfoXCOFF.cpp - XCOFF asm properties ------------ *- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCAsmInfoXCOFF.h"
+
+using namespace llvm;
+
+void MCAsmInfoXCOFF::anchor() {}
+
+MCAsmInfoXCOFF::MCAsmInfoXCOFF() {
+ IsLittleEndian = false;
+ HasDotTypeDotSizeDirective = false;
+}
diff --git a/lib/MC/MCAsmMacro.cpp b/lib/MC/MCAsmMacro.cpp
index 7e89c03c6c6b..ba4fb7d4f387 100644
--- a/lib/MC/MCAsmMacro.cpp
+++ b/lib/MC/MCAsmMacro.cpp
@@ -1,9 +1,8 @@
//===- MCAsmMacro.h - Assembly Macros ---------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp
index e017103070bf..7a2b0b8a1220 100644
--- a/lib/MC/MCAsmStreamer.cpp
+++ b/lib/MC/MCAsmStreamer.cpp
@@ -1,9 +1,8 @@
//===- lib/MC/MCAsmStreamer.cpp - Text Assembly Output ----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -108,10 +107,7 @@ public:
void AddComment(const Twine &T, bool EOL = true) override;
/// Add a comment showing the encoding of an instruction.
- /// If PrintSchedInfo is true, then the comment sched:[x:y] will be added to
- /// the output if supported by the target.
- void AddEncodingComment(const MCInst &Inst, const MCSubtargetInfo &,
- bool PrintSchedInfo);
+ void AddEncodingComment(const MCInst &Inst, const MCSubtargetInfo &);
/// Return a raw_ostream that comments can be written to.
/// Unlike AddComment, you are required to terminate comments with \n if you
@@ -192,6 +188,7 @@ public:
void EmitValueImpl(const MCExpr *Value, unsigned Size,
SMLoc Loc = SMLoc()) override;
void EmitIntValue(uint64_t Value, unsigned Size) override;
+ void EmitIntValueInHex(uint64_t Value, unsigned Size) override;
void EmitULEB128Value(const MCExpr *Value) override;
@@ -227,11 +224,11 @@ public:
Expected<unsigned> tryEmitDwarfFileDirective(unsigned FileNo,
StringRef Directory,
StringRef Filename,
- MD5::MD5Result *Checksum = 0,
+ Optional<MD5::MD5Result> Checksum = None,
Optional<StringRef> Source = None,
unsigned CUID = 0) override;
void emitDwarfFile0Directive(StringRef Directory, StringRef Filename,
- MD5::MD5Result *Checksum,
+ Optional<MD5::MD5Result> Checksum,
Optional<StringRef> Source,
unsigned CUID = 0) override;
void EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
@@ -312,8 +309,7 @@ public:
void emitCGProfileEntry(const MCSymbolRefExpr *From,
const MCSymbolRefExpr *To, uint64_t Count) override;
- void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
- bool PrintSchedInfo) override;
+ void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override;
void EmitBundleAlignMode(unsigned AlignPow2) override;
void EmitBundleLock(bool AlignToEnd) override;
@@ -546,6 +542,7 @@ static const char *getPlatformName(MachO::PlatformType Type) {
case MachO::PLATFORM_TVOS: return "tvos";
case MachO::PLATFORM_WATCHOS: return "watchos";
case MachO::PLATFORM_BRIDGEOS: return "bridgeos";
+ case MachO::PLATFORM_MACCATALYST: return "macCatalyst";
case MachO::PLATFORM_IOSSIMULATOR: return "iossimulator";
case MachO::PLATFORM_TVOSSIMULATOR: return "tvossimulator";
case MachO::PLATFORM_WATCHOSSIMULATOR: return "watchossimulator";
@@ -657,6 +654,9 @@ bool MCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
// .weak_reference
case MCSA_WeakReference: OS << MAI->getWeakRefDirective(); break;
case MCSA_WeakDefAutoPrivate: OS << "\t.weak_def_can_be_hidden\t"; break;
+ case MCSA_Cold:
+ // Assemblers currently do not support a .cold directive.
+ return false;
}
Symbol->print(OS, MAI);
@@ -924,6 +924,10 @@ void MCAsmStreamer::EmitIntValue(uint64_t Value, unsigned Size) {
EmitValue(MCConstantExpr::create(Value, getContext()), Size);
}
+void MCAsmStreamer::EmitIntValueInHex(uint64_t Value, unsigned Size) {
+ EmitValue(MCConstantExpr::create(Value, getContext(), true), Size);
+}
+
void MCAsmStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
SMLoc Loc) {
assert(Size <= 8 && "Invalid size");
@@ -1153,7 +1157,7 @@ void MCAsmStreamer::EmitFileDirective(StringRef Filename) {
static void printDwarfFileDirective(unsigned FileNo, StringRef Directory,
StringRef Filename,
- MD5::MD5Result *Checksum,
+ Optional<MD5::MD5Result> Checksum,
Optional<StringRef> Source,
bool UseDwarfDirectory,
raw_svector_ostream &OS) {
@@ -1186,13 +1190,14 @@ static void printDwarfFileDirective(unsigned FileNo, StringRef Directory,
Expected<unsigned> MCAsmStreamer::tryEmitDwarfFileDirective(
unsigned FileNo, StringRef Directory, StringRef Filename,
- MD5::MD5Result *Checksum, Optional<StringRef> Source, unsigned CUID) {
+ Optional<MD5::MD5Result> Checksum, Optional<StringRef> Source, unsigned CUID) {
assert(CUID == 0 && "multiple CUs not supported by MCAsmStreamer");
MCDwarfLineTable &Table = getContext().getMCDwarfLineTable(CUID);
unsigned NumFiles = Table.getMCDwarfFiles().size();
Expected<unsigned> FileNoOrErr =
- Table.tryGetFile(Directory, Filename, Checksum, Source, FileNo);
+ Table.tryGetFile(Directory, Filename, Checksum, Source,
+ getContext().getDwarfVersion(), FileNo);
if (!FileNoOrErr)
return FileNoOrErr.takeError();
FileNo = FileNoOrErr.get();
@@ -1214,7 +1219,7 @@ Expected<unsigned> MCAsmStreamer::tryEmitDwarfFileDirective(
void MCAsmStreamer::emitDwarfFile0Directive(StringRef Directory,
StringRef Filename,
- MD5::MD5Result *Checksum,
+ Optional<MD5::MD5Result> Checksum,
Optional<StringRef> Source,
unsigned CUID) {
assert(CUID == 0);
@@ -1737,8 +1742,7 @@ void MCAsmStreamer::emitCGProfileEntry(const MCSymbolRefExpr *From,
}
void MCAsmStreamer::AddEncodingComment(const MCInst &Inst,
- const MCSubtargetInfo &STI,
- bool PrintSchedInfo) {
+ const MCSubtargetInfo &STI) {
raw_ostream &OS = GetCommentOS();
SmallString<256> Code;
SmallVector<MCFixup, 4> Fixups;
@@ -1817,11 +1821,7 @@ void MCAsmStreamer::AddEncodingComment(const MCInst &Inst,
}
}
}
- OS << "]";
- // If we are not going to add fixup or schedule comments after this point
- // then we have to end the current comment line with "\n".
- if (Fixups.size() || !PrintSchedInfo)
- OS << "\n";
+ OS << "]\n";
for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
MCFixup &F = Fixups[i];
@@ -1833,18 +1833,15 @@ void MCAsmStreamer::AddEncodingComment(const MCInst &Inst,
}
void MCAsmStreamer::EmitInstruction(const MCInst &Inst,
- const MCSubtargetInfo &STI,
- bool PrintSchedInfo) {
+ const MCSubtargetInfo &STI) {
assert(getCurrentSectionOnly() &&
"Cannot emit contents before setting section!");
// Show the encoding in a comment if we have a code emitter.
- AddEncodingComment(Inst, STI, PrintSchedInfo);
+ AddEncodingComment(Inst, STI);
// Show the MCInst if enabled.
if (ShowInst) {
- if (PrintSchedInfo)
- GetCommentOS() << "\n";
Inst.dump_pretty(GetCommentOS(), InstPrinter.get(), "\n ");
GetCommentOS() << "\n";
}
@@ -1854,12 +1851,6 @@ void MCAsmStreamer::EmitInstruction(const MCInst &Inst,
else
InstPrinter->printInst(&Inst, OS, "", STI);
- if (PrintSchedInfo) {
- std::string SI = STI.getSchedInfoStr(Inst);
- if (!SI.empty())
- GetCommentOS() << SI;
- }
-
StringRef Comments = CommentToEmit;
if (Comments.size() && Comments.back() != '\n')
GetCommentOS() << "\n";
@@ -1927,7 +1918,7 @@ void MCAsmStreamer::FinishImpl() {
// Emit the label for the line table, if requested - since the rest of the
// line table will be defined by .loc/.file directives, and not emitted
// directly, the label is the only work required here.
- auto &Tables = getContext().getMCDwarfLineTables();
+ const auto &Tables = getContext().getMCDwarfLineTables();
if (!Tables.empty()) {
assert(Tables.size() == 1 && "asm output only supports one line table");
if (auto *Label = Tables.begin()->second.getLabel()) {
diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp
index cde6a93a1647..c4f4d4c2870e 100644
--- a/lib/MC/MCAssembler.cpp
+++ b/lib/MC/MCAssembler.cpp
@@ -1,9 +1,8 @@
//===- lib/MC/MCAssembler.cpp - Assembler Backend Implementation ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -323,6 +322,13 @@ uint64_t MCAssembler::computeFragmentSize(const MCAsmLayout &Layout,
const MCAlignFragment &AF = cast<MCAlignFragment>(F);
unsigned Offset = Layout.getFragmentOffset(&AF);
unsigned Size = OffsetToAlignment(Offset, AF.getAlignment());
+
+ // Insert extra Nops for code alignment if the target define
+ // shouldInsertExtraNopBytesForCodeAlign target hook.
+ if (AF.getParent()->UseCodeAlign() && AF.hasEmitNops() &&
+ getBackend().shouldInsertExtraNopBytesForCodeAlign(AF, Size))
+ return Size;
+
// If we are padding with nops, force the padding to be larger than the
// minimum nop size.
if (Size > 0 && AF.hasEmitNops()) {
@@ -805,7 +811,8 @@ void MCAssembler::layout(MCAsmLayout &Layout) {
if (isa<MCEncodedFragment>(&Frag) &&
isa<MCCompactEncodedInstFragment>(&Frag))
continue;
- if (!isa<MCEncodedFragment>(&Frag) && !isa<MCCVDefRangeFragment>(&Frag))
+ if (!isa<MCEncodedFragment>(&Frag) && !isa<MCCVDefRangeFragment>(&Frag) &&
+ !isa<MCAlignFragment>(&Frag))
continue;
ArrayRef<MCFixup> Fixups;
MutableArrayRef<char> Contents;
@@ -826,6 +833,13 @@ void MCAssembler::layout(MCAsmLayout &Layout) {
} else if (auto *FragWithFixups = dyn_cast<MCDwarfLineAddrFragment>(&Frag)) {
Fixups = FragWithFixups->getFixups();
Contents = FragWithFixups->getContents();
+ } else if (auto *AF = dyn_cast<MCAlignFragment>(&Frag)) {
+ // Insert fixup type for code alignment if the target define
+ // shouldInsertFixupForCodeAlign target hook.
+ if (Sec.UseCodeAlign() && AF->hasEmitNops()) {
+ getBackend().shouldInsertFixupForCodeAlign(*this, Layout, *AF);
+ }
+ continue;
} else
llvm_unreachable("Unknown fragment with fixups!");
for (const MCFixup &Fixup : Fixups) {
diff --git a/lib/MC/MCCodeEmitter.cpp b/lib/MC/MCCodeEmitter.cpp
index ca69478ed10d..0d114f12d58c 100644
--- a/lib/MC/MCCodeEmitter.cpp
+++ b/lib/MC/MCCodeEmitter.cpp
@@ -1,9 +1,8 @@
//===- MCCodeEmitter.cpp - Instruction Encoding ---------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/MC/MCCodePadder.cpp b/lib/MC/MCCodePadder.cpp
index 57547814e595..27a62f95a529 100644
--- a/lib/MC/MCCodePadder.cpp
+++ b/lib/MC/MCCodePadder.cpp
@@ -1,9 +1,8 @@
//===- MCCodePadder.cpp - Target MC Code Padder ---------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/MC/MCCodeView.cpp b/lib/MC/MCCodeView.cpp
index 978ac789c31e..1a71b542bd06 100644
--- a/lib/MC/MCCodeView.cpp
+++ b/lib/MC/MCCodeView.cpp
@@ -1,9 +1,8 @@
//===- MCCodeView.h - Machine Code CodeView support -------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp
index fab517075c5a..0dc2e2d37caf 100644
--- a/lib/MC/MCContext.cpp
+++ b/lib/MC/MCContext.cpp
@@ -1,9 +1,8 @@
//===- lib/MC/MCContext.cpp - Machine Code Context ------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -27,17 +26,20 @@
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCSectionWasm.h"
+#include "llvm/MC/MCSectionXCOFF.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCSymbolCOFF.h"
#include "llvm/MC/MCSymbolELF.h"
#include "llvm/MC/MCSymbolMachO.h"
#include "llvm/MC/MCSymbolWasm.h"
+#include "llvm/MC/MCSymbolXCOFF.h"
#include "llvm/MC/SectionKind.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
#include "llvm/Support/Signals.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/raw_ostream.h"
@@ -85,6 +87,7 @@ void MCContext::reset() {
COFFAllocator.DestroyAll();
ELFAllocator.DestroyAll();
MachOAllocator.DestroyAll();
+ XCOFFAllocator.DestroyAll();
MCSubtargetAllocator.DestroyAll();
UsedNames.clear();
@@ -106,6 +109,7 @@ void MCContext::reset() {
ELFUniquingMap.clear();
COFFUniquingMap.clear();
WasmUniquingMap.clear();
+ XCOFFUniquingMap.clear();
NextID.clear();
AllowTemporaryLabels = true;
@@ -161,6 +165,8 @@ MCSymbol *MCContext::createSymbolImpl(const StringMapEntry<bool> *Name,
return new (Name, *this) MCSymbolMachO(Name, IsTemporary);
case MCObjectFileInfo::IsWasm:
return new (Name, *this) MCSymbolWasm(Name, IsTemporary);
+ case MCObjectFileInfo::IsXCOFF:
+ return new (Name, *this) MCSymbolXCOFF(Name, IsTemporary);
}
}
return new (Name, *this) MCSymbol(MCSymbol::SymbolKindUnset, Name,
@@ -459,14 +465,6 @@ MCSectionCOFF *MCContext::getCOFFSection(StringRef Section,
BeginSymName);
}
-MCSectionCOFF *MCContext::getCOFFSection(StringRef Section) {
- COFFSectionKey T{Section, "", 0, GenericSectionID};
- auto Iter = COFFUniquingMap.find(T);
- if (Iter == COFFUniquingMap.end())
- return nullptr;
- return Iter->second;
-}
-
MCSectionCOFF *MCContext::getAssociativeCOFFSection(MCSectionCOFF *Sec,
const MCSymbol *KeySym,
unsigned UniqueID) {
@@ -531,6 +529,38 @@ MCSectionWasm *MCContext::getWasmSection(const Twine &Section, SectionKind Kind,
return Result;
}
+MCSectionXCOFF *MCContext::getXCOFFSection(StringRef Section,
+ XCOFF::StorageMappingClass SMC,
+ SectionKind Kind,
+ const char *BeginSymName) {
+ // Do the lookup. If we have a hit, return it.
+ auto IterBool = XCOFFUniquingMap.insert(
+ std::make_pair(XCOFFSectionKey{Section.str(), SMC}, nullptr));
+ auto &Entry = *IterBool.first;
+ if (!IterBool.second)
+ return Entry.second;
+
+ // Otherwise, return a new section.
+ StringRef CachedName = Entry.first.SectionName;
+
+ MCSymbol *Begin = nullptr;
+ if (BeginSymName)
+ Begin = createTempSymbol(BeginSymName, false);
+
+ MCSectionXCOFF *Result = new (XCOFFAllocator.Allocate())
+ MCSectionXCOFF(CachedName, SMC, Kind, Begin);
+ Entry.second = Result;
+
+ auto *F = new MCDataFragment();
+ Result->getFragmentList().insert(Result->begin(), F);
+ F->setParent(Result);
+
+ if (Begin)
+ Begin->setFragment(F);
+
+ return Result;
+}
+
MCSubtargetInfo &MCContext::getSubtargetCopy(const MCSubtargetInfo &STI) {
return *new (MCSubtargetAllocator.Allocate()) MCSubtargetInfo(STI);
}
@@ -566,6 +596,42 @@ void MCContext::RemapDebugPaths() {
// Dwarf Management
//===----------------------------------------------------------------------===//
+void MCContext::setGenDwarfRootFile(StringRef InputFileName, StringRef Buffer) {
+ // MCDwarf needs the root file as well as the compilation directory.
+ // If we find a '.file 0' directive that will supersede these values.
+ Optional<MD5::MD5Result> Cksum;
+ if (getDwarfVersion() >= 5) {
+ MD5 Hash;
+ MD5::MD5Result Sum;
+ Hash.update(Buffer);
+ Hash.final(Sum);
+ Cksum = Sum;
+ }
+ // Canonicalize the root filename. It cannot be empty, and should not
+ // repeat the compilation dir.
+ // The MCContext ctor initializes MainFileName to the name associated with
+ // the SrcMgr's main file ID, which might be the same as InputFileName (and
+ // possibly include directory components).
+ // Or, MainFileName might have been overridden by a -main-file-name option,
+ // which is supposed to be just a base filename with no directory component.
+ // So, if the InputFileName and MainFileName are not equal, assume
+ // MainFileName is a substitute basename and replace the last component.
+ SmallString<1024> FileNameBuf = InputFileName;
+ if (FileNameBuf.empty() || FileNameBuf == "-")
+ FileNameBuf = "<stdin>";
+ if (!getMainFileName().empty() && FileNameBuf != getMainFileName()) {
+ llvm::sys::path::remove_filename(FileNameBuf);
+ llvm::sys::path::append(FileNameBuf, getMainFileName());
+ }
+ StringRef FileName = FileNameBuf;
+ if (FileName.consume_front(getCompilationDir()))
+ if (llvm::sys::path::is_separator(FileName.front()))
+ FileName = FileName.drop_front();
+ assert(!FileName.empty());
+ setMCLineTableRootFile(
+ /*CUID=*/0, getCompilationDir(), FileName, Cksum, None);
+}
+
/// getDwarfFile - takes a file name and number to place in the dwarf file and
/// directory tables. If the file number has already been allocated it is an
/// error and zero is returned and the client reports the error, else the
@@ -573,11 +639,12 @@ void MCContext::RemapDebugPaths() {
Expected<unsigned> MCContext::getDwarfFile(StringRef Directory,
StringRef FileName,
unsigned FileNumber,
- MD5::MD5Result *Checksum,
+ Optional<MD5::MD5Result> Checksum,
Optional<StringRef> Source,
unsigned CUID) {
MCDwarfLineTable &Table = MCDwarfLineTablesCUMap[CUID];
- return Table.tryGetFile(Directory, FileName, Checksum, Source, FileNumber);
+ return Table.tryGetFile(Directory, FileName, Checksum, Source, DwarfVersion,
+ FileNumber);
}
/// isValidDwarfFileNumber - takes a dwarf file number and returns true if it
@@ -585,7 +652,7 @@ Expected<unsigned> MCContext::getDwarfFile(StringRef Directory,
bool MCContext::isValidDwarfFileNumber(unsigned FileNumber, unsigned CUID) {
const MCDwarfLineTable &LineTable = getMCDwarfLineTable(CUID);
if (FileNumber == 0)
- return getDwarfVersion() >= 5 && LineTable.hasRootFile();
+ return getDwarfVersion() >= 5;
if (FileNumber >= LineTable.getMCDwarfFiles().size())
return false;
diff --git a/lib/MC/MCDisassembler/Disassembler.cpp b/lib/MC/MCDisassembler/Disassembler.cpp
index ad0a39991c53..21bdc2eaea3e 100644
--- a/lib/MC/MCDisassembler/Disassembler.cpp
+++ b/lib/MC/MCDisassembler/Disassembler.cpp
@@ -1,9 +1,8 @@
//===-- lib/MC/Disassembler.cpp - Disassembler Public C Interface ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -53,31 +52,32 @@ LLVMCreateDisasmCPUFeatures(const char *TT, const char *CPU,
if (!TheTarget)
return nullptr;
- const MCRegisterInfo *MRI = TheTarget->createMCRegInfo(TT);
+ std::unique_ptr<const MCRegisterInfo> MRI(TheTarget->createMCRegInfo(TT));
if (!MRI)
return nullptr;
// Get the assembler info needed to setup the MCContext.
- const MCAsmInfo *MAI = TheTarget->createMCAsmInfo(*MRI, TT);
+ std::unique_ptr<const MCAsmInfo> MAI(TheTarget->createMCAsmInfo(*MRI, TT));
if (!MAI)
return nullptr;
- const MCInstrInfo *MII = TheTarget->createMCInstrInfo();
+ std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo());
if (!MII)
return nullptr;
- const MCSubtargetInfo *STI =
- TheTarget->createMCSubtargetInfo(TT, CPU, Features);
+ std::unique_ptr<const MCSubtargetInfo> STI(
+ TheTarget->createMCSubtargetInfo(TT, CPU, Features));
if (!STI)
return nullptr;
// Set up the MCContext for creating symbols and MCExpr's.
- MCContext *Ctx = new MCContext(MAI, MRI, nullptr);
+ std::unique_ptr<MCContext> Ctx(new MCContext(MAI.get(), MRI.get(), nullptr));
if (!Ctx)
return nullptr;
// Set up disassembler.
- MCDisassembler *DisAsm = TheTarget->createMCDisassembler(*STI, *Ctx);
+ std::unique_ptr<MCDisassembler> DisAsm(
+ TheTarget->createMCDisassembler(*STI, *Ctx));
if (!DisAsm)
return nullptr;
@@ -87,19 +87,20 @@ LLVMCreateDisasmCPUFeatures(const char *TT, const char *CPU,
return nullptr;
std::unique_ptr<MCSymbolizer> Symbolizer(TheTarget->createMCSymbolizer(
- TT, GetOpInfo, SymbolLookUp, DisInfo, Ctx, std::move(RelInfo)));
+ TT, GetOpInfo, SymbolLookUp, DisInfo, Ctx.get(), std::move(RelInfo)));
DisAsm->setSymbolizer(std::move(Symbolizer));
// Set up the instruction printer.
int AsmPrinterVariant = MAI->getAssemblerDialect();
- MCInstPrinter *IP = TheTarget->createMCInstPrinter(
- Triple(TT), AsmPrinterVariant, *MAI, *MII, *MRI);
+ std::unique_ptr<MCInstPrinter> IP(TheTarget->createMCInstPrinter(
+ Triple(TT), AsmPrinterVariant, *MAI, *MII, *MRI));
if (!IP)
return nullptr;
- LLVMDisasmContext *DC =
- new LLVMDisasmContext(TT, DisInfo, TagType, GetOpInfo, SymbolLookUp,
- TheTarget, MAI, MRI, STI, MII, Ctx, DisAsm, IP);
+ LLVMDisasmContext *DC = new LLVMDisasmContext(
+ TT, DisInfo, TagType, GetOpInfo, SymbolLookUp, TheTarget, std::move(MAI),
+ std::move(MRI), std::move(STI), std::move(MII), std::move(Ctx),
+ std::move(DisAsm), std::move(IP));
if (!DC)
return nullptr;
diff --git a/lib/MC/MCDisassembler/Disassembler.h b/lib/MC/MCDisassembler/Disassembler.h
index f638fdc781d7..e5aab53a7613 100644
--- a/lib/MC/MCDisassembler/Disassembler.h
+++ b/lib/MC/MCDisassembler/Disassembler.h
@@ -1,9 +1,8 @@
//===------------- Disassembler.h - LLVM Disassembler -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -83,24 +82,22 @@ public:
SmallString<128> CommentsToEmit;
raw_svector_ostream CommentStream;
- LLVMDisasmContext(std::string tripleName, void *disInfo, int tagType,
- LLVMOpInfoCallback getOpInfo,
- LLVMSymbolLookupCallback symbolLookUp,
- const Target *theTarget, const MCAsmInfo *mAI,
- const MCRegisterInfo *mRI, const MCSubtargetInfo *mSI,
- const MCInstrInfo *mII, llvm::MCContext *ctx,
- const MCDisassembler *disAsm, MCInstPrinter *iP)
- : TripleName(std::move(tripleName)), DisInfo(disInfo), TagType(tagType),
- GetOpInfo(getOpInfo), SymbolLookUp(symbolLookUp), TheTarget(theTarget),
- Options(0), CommentStream(CommentsToEmit) {
- MAI.reset(mAI);
- MRI.reset(mRI);
- MSI.reset(mSI);
- MII.reset(mII);
- Ctx.reset(ctx);
- DisAsm.reset(disAsm);
- IP.reset(iP);
- }
+ LLVMDisasmContext(std::string TripleName, void *DisInfo, int TagType,
+ LLVMOpInfoCallback GetOpInfo,
+ LLVMSymbolLookupCallback SymbolLookUp,
+ const Target *TheTarget,
+ std::unique_ptr<const MCAsmInfo> &&MAI,
+ std::unique_ptr<const MCRegisterInfo> &&MRI,
+ std::unique_ptr<const MCSubtargetInfo> &&MSI,
+ std::unique_ptr<const MCInstrInfo> &&MII,
+ std::unique_ptr<const llvm::MCContext> &&Ctx,
+ std::unique_ptr<const MCDisassembler> &&DisAsm,
+ std::unique_ptr<MCInstPrinter> &&IP)
+ : TripleName(std::move(TripleName)), DisInfo(DisInfo), TagType(TagType),
+ GetOpInfo(GetOpInfo), SymbolLookUp(SymbolLookUp), TheTarget(TheTarget),
+ MAI(std::move(MAI)), MRI(std::move(MRI)), MSI(std::move(MSI)),
+ MII(std::move(MII)), Ctx(std::move(Ctx)), DisAsm(std::move(DisAsm)),
+ IP(std::move(IP)), Options(0), CommentStream(CommentsToEmit) {}
const std::string &getTripleName() const { return TripleName; }
void *getDisInfo() const { return DisInfo; }
int getTagType() const { return TagType; }
diff --git a/lib/MC/MCDisassembler/MCDisassembler.cpp b/lib/MC/MCDisassembler/MCDisassembler.cpp
index 2f1275d00b86..063f7e706024 100644
--- a/lib/MC/MCDisassembler/MCDisassembler.cpp
+++ b/lib/MC/MCDisassembler/MCDisassembler.cpp
@@ -1,13 +1,14 @@
//===- MCDisassembler.cpp - Disassembler interface ------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
@@ -15,6 +16,13 @@ using namespace llvm;
MCDisassembler::~MCDisassembler() = default;
+MCDisassembler::DecodeStatus MCDisassembler::onSymbolStart(
+ StringRef Name, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address,
+ raw_ostream &VStream, raw_ostream &CStream) const {
+ Size = 0;
+ return MCDisassembler::Success;
+}
+
bool MCDisassembler::tryAddingSymbolicOperand(MCInst &Inst, int64_t Value,
uint64_t Address, bool IsBranch,
uint64_t Offset,
diff --git a/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp b/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp
index 1969c5dc66ab..7befef86303c 100644
--- a/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp
+++ b/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp
@@ -1,9 +1,8 @@
//===-- MCExternalSymbolizer.cpp - External symbolizer --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/MC/MCDisassembler/MCRelocationInfo.cpp b/lib/MC/MCDisassembler/MCRelocationInfo.cpp
index 8f932a3f0d48..64e216e0051d 100644
--- a/lib/MC/MCDisassembler/MCRelocationInfo.cpp
+++ b/lib/MC/MCDisassembler/MCRelocationInfo.cpp
@@ -1,9 +1,8 @@
//===-- MCRelocationInfo.cpp ----------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/MC/MCDisassembler/MCSymbolizer.cpp b/lib/MC/MCDisassembler/MCSymbolizer.cpp
index 78e611e3ddda..8214a196afb1 100644
--- a/lib/MC/MCDisassembler/MCSymbolizer.cpp
+++ b/lib/MC/MCDisassembler/MCSymbolizer.cpp
@@ -1,9 +1,8 @@
//===-- llvm/MC/MCSymbolizer.cpp - MCSymbolizer class ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp
index 38b02694d81d..aae6fdf90931 100644
--- a/lib/MC/MCDwarf.cpp
+++ b/lib/MC/MCDwarf.cpp
@@ -1,9 +1,8 @@
//===- lib/MC/MCDwarf.cpp - MCDwarf implementation ------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -260,7 +259,7 @@ void MCDwarfLineTable::Emit(MCObjectStreamer *MCOS,
void MCDwarfDwoLineTable::Emit(MCStreamer &MCOS, MCDwarfLineTableParams Params,
MCSection *Section) const {
- if (Header.MCDwarfFiles.empty())
+ if (!HasSplitLineTable)
return;
Optional<MCDwarfLineStr> NoLineStr(None);
MCOS.SwitchSection(Section);
@@ -362,10 +361,10 @@ static void emitOneV5FileEntry(MCStreamer *MCOS, const MCDwarfFile &DwarfFile,
}
MCOS->EmitULEB128IntValue(DwarfFile.DirIndex); // Directory number.
if (EmitMD5) {
- MD5::MD5Result *Cksum = DwarfFile.Checksum;
+ const MD5::MD5Result &Cksum = *DwarfFile.Checksum;
MCOS->EmitBinaryData(
- StringRef(reinterpret_cast<const char *>(Cksum->Bytes.data()),
- Cksum->Bytes.size()));
+ StringRef(reinterpret_cast<const char *>(Cksum.Bytes.data()),
+ Cksum.Bytes.size()));
}
if (HasSource) {
if (LineStr)
@@ -379,8 +378,7 @@ static void emitOneV5FileEntry(MCStreamer *MCOS, const MCDwarfFile &DwarfFile,
}
void MCDwarfLineTableHeader::emitV5FileDirTables(
- MCStreamer *MCOS, Optional<MCDwarfLineStr> &LineStr,
- StringRef CtxCompilationDir) const {
+ MCStreamer *MCOS, Optional<MCDwarfLineStr> &LineStr) const {
// The directory format, which is just a list of the directory paths. In a
// non-split object, these are references to .debug_line_str; in a split
// object, they are inline strings.
@@ -390,8 +388,9 @@ void MCDwarfLineTableHeader::emitV5FileDirTables(
: dwarf::DW_FORM_string);
MCOS->EmitULEB128IntValue(MCDwarfDirs.size() + 1);
// Try not to emit an empty compilation directory.
- const StringRef CompDir =
- CompilationDir.empty() ? CtxCompilationDir : StringRef(CompilationDir);
+ const StringRef CompDir = CompilationDir.empty()
+ ? MCOS->getContext().getCompilationDir()
+ : StringRef(CompilationDir);
if (LineStr) {
// Record path strings, emit references here.
LineStr->emitRef(MCOS, CompDir);
@@ -431,10 +430,14 @@ void MCDwarfLineTableHeader::emitV5FileDirTables(
: dwarf::DW_FORM_string);
}
// Then the counted list of files. The root file is file #0, then emit the
- // files as provide by .file directives. To accommodate assembler source
- // written for DWARF v4 but trying to emit v5, if we didn't see a root file
- // explicitly, replicate file #1.
- MCOS->EmitULEB128IntValue(MCDwarfFiles.size());
+ // files as provide by .file directives.
+ // MCDwarfFiles has an unused element [0] so use size() not size()+1.
+ // But sometimes MCDwarfFiles is empty, in which case we still emit one file.
+ MCOS->EmitULEB128IntValue(MCDwarfFiles.empty() ? 1 : MCDwarfFiles.size());
+ // To accommodate assembler source written for DWARF v4 but trying to emit
+ // v5: If we didn't see a root file explicitly, replicate file #1.
+ assert((!RootFile.Name.empty() || MCDwarfFiles.size() >= 1) &&
+ "No root file and no .file directives");
emitOneV5FileEntry(MCOS, RootFile.Name.empty() ? MCDwarfFiles[1] : RootFile,
HasAllMD5, HasSource, LineStr);
for (unsigned i = 1; i < MCDwarfFiles.size(); ++i)
@@ -506,7 +509,7 @@ MCDwarfLineTableHeader::Emit(MCStreamer *MCOS, MCDwarfLineTableParams Params,
// Put out the directory and file tables. The formats vary depending on
// the version.
if (LineTableVersion >= 5)
- emitV5FileDirTables(MCOS, LineStr, context.getCompilationDir());
+ emitV5FileDirTables(MCOS, LineStr);
else
emitV2FileDirTables(MCOS);
@@ -533,17 +536,27 @@ void MCDwarfLineTable::EmitCU(MCObjectStreamer *MCOS,
Expected<unsigned> MCDwarfLineTable::tryGetFile(StringRef &Directory,
StringRef &FileName,
- MD5::MD5Result *Checksum,
+ Optional<MD5::MD5Result> Checksum,
Optional<StringRef> Source,
+ uint16_t DwarfVersion,
unsigned FileNumber) {
- return Header.tryGetFile(Directory, FileName, Checksum, Source, FileNumber);
+ return Header.tryGetFile(Directory, FileName, Checksum, Source, DwarfVersion,
+ FileNumber);
+}
+
+bool isRootFile(const MCDwarfFile &RootFile, StringRef &Directory,
+ StringRef &FileName, Optional<MD5::MD5Result> Checksum) {
+ if (RootFile.Name.empty() || RootFile.Name != FileName.data())
+ return false;
+ return RootFile.Checksum == Checksum;
}
Expected<unsigned>
MCDwarfLineTableHeader::tryGetFile(StringRef &Directory,
StringRef &FileName,
- MD5::MD5Result *Checksum,
- Optional<StringRef> &Source,
+ Optional<MD5::MD5Result> Checksum,
+ Optional<StringRef> Source,
+ uint16_t DwarfVersion,
unsigned FileNumber) {
if (Directory == CompilationDir)
Directory = "";
@@ -555,9 +568,11 @@ MCDwarfLineTableHeader::tryGetFile(StringRef &Directory,
// Keep track of whether any or all files have an MD5 checksum.
// If any files have embedded source, they all must.
if (MCDwarfFiles.empty()) {
- trackMD5Usage(Checksum);
+ trackMD5Usage(Checksum.hasValue());
HasSource = (Source != None);
}
+ if (isRootFile(RootFile, Directory, FileName, Checksum) && DwarfVersion >= 5)
+ return 0;
if (FileNumber == 0) {
// File numbers start with 1 and/or after any file numbers
// allocated by inline-assembler .file directives.
@@ -603,11 +618,7 @@ MCDwarfLineTableHeader::tryGetFile(StringRef &Directory,
// For FileNames with no directories a DirIndex of 0 is used.
DirIndex = 0;
} else {
- DirIndex = 0;
- for (unsigned End = MCDwarfDirs.size(); DirIndex < End; DirIndex++) {
- if (Directory == MCDwarfDirs[DirIndex])
- break;
- }
+ DirIndex = llvm::find(MCDwarfDirs, Directory) - MCDwarfDirs.begin();
if (DirIndex >= MCDwarfDirs.size())
MCDwarfDirs.push_back(Directory);
// The DirIndex is one based, as DirIndex of 0 is used for FileNames with
@@ -620,7 +631,7 @@ MCDwarfLineTableHeader::tryGetFile(StringRef &Directory,
File.Name = FileName;
File.DirIndex = DirIndex;
File.Checksum = Checksum;
- trackMD5Usage(Checksum);
+ trackMD5Usage(Checksum.hasValue());
File.Source = Source;
if (Source)
HasSource = true;
@@ -755,9 +766,7 @@ bool MCDwarfLineAddr::FixedEncode(MCContext &Context,
*Offset = OS.tell();
*Size = AddrSize;
SetDelta = false;
- std::vector<uint8_t> FillData;
- FillData.insert(FillData.begin(), AddrSize, 0);
- OS.write(reinterpret_cast<char *>(FillData.data()), AddrSize);
+ OS.write_zeros(AddrSize);
} else {
OS << char(dwarf::DW_LNS_fixed_advance_pc);
// Generate fixup for 2-bytes address delta.
@@ -1007,9 +1016,15 @@ static void EmitGenDwarfInfo(MCStreamer *MCOS,
MCOS->EmitBytes(MCDwarfDirs[0]);
MCOS->EmitBytes(sys::path::get_separator());
}
- const SmallVectorImpl<MCDwarfFile> &MCDwarfFiles =
- MCOS->getContext().getMCDwarfFiles();
- MCOS->EmitBytes(MCDwarfFiles[1].Name);
+ const SmallVectorImpl<MCDwarfFile> &MCDwarfFiles = context.getMCDwarfFiles();
+ // MCDwarfFiles might be empty if we have an empty source file.
+ // If it's not empty, [0] is unused and [1] is the first actual file.
+ assert(MCDwarfFiles.empty() || MCDwarfFiles.size() >= 2);
+ const MCDwarfFile &RootFile =
+ MCDwarfFiles.empty()
+ ? context.getMCDwarfLineTable(/*CUID=*/0).getRootFile()
+ : MCDwarfFiles[1];
+ MCOS->EmitBytes(RootFile.Name);
MCOS->EmitIntValue(0, 1); // NULL byte to terminate the string.
// AT_comp_dir, the working directory the assembly was done in.
@@ -1754,6 +1769,20 @@ struct CIEKey {
IsSimple(Frame.IsSimple), RAReg(Frame.RAReg),
IsBKeyFrame(Frame.IsBKeyFrame) {}
+ StringRef PersonalityName() const {
+ if (!Personality)
+ return StringRef();
+ return Personality->getName();
+ }
+
+ bool operator<(const CIEKey &Other) const {
+ return std::make_tuple(PersonalityName(), PersonalityEncoding, LsdaEncoding,
+ IsSignalFrame, IsSimple, RAReg) <
+ std::make_tuple(Other.PersonalityName(), Other.PersonalityEncoding,
+ Other.LsdaEncoding, Other.IsSignalFrame,
+ Other.IsSimple, Other.RAReg);
+ }
+
const MCSymbol *Personality;
unsigned PersonalityEncoding;
unsigned LsdaEncoding;
@@ -1831,7 +1860,16 @@ void MCDwarfFrameEmitter::Emit(MCObjectStreamer &Streamer, MCAsmBackend *MAB,
const MCSymbol *DummyDebugKey = nullptr;
bool CanOmitDwarf = MOFI->getOmitDwarfIfHaveCompactUnwind();
- for (auto I = FrameArray.begin(), E = FrameArray.end(); I != E;) {
+ // Sort the FDEs by their corresponding CIE before we emit them.
+ // This isn't technically necessary according to the DWARF standard,
+ // but the Android libunwindstack rejects eh_frame sections where
+ // an FDE refers to a CIE other than the closest previous CIE.
+ std::vector<MCDwarfFrameInfo> FrameArrayX(FrameArray.begin(), FrameArray.end());
+ llvm::stable_sort(FrameArrayX,
+ [](const MCDwarfFrameInfo &X, const MCDwarfFrameInfo &Y) {
+ return CIEKey(X) < CIEKey(Y);
+ });
+ for (auto I = FrameArrayX.begin(), E = FrameArrayX.end(); I != E;) {
const MCDwarfFrameInfo &Frame = *I;
++I;
if (CanOmitDwarf && Frame.CompactUnwindEncoding !=
diff --git a/lib/MC/MCELFObjectTargetWriter.cpp b/lib/MC/MCELFObjectTargetWriter.cpp
index ff53dd7299c1..a81eab9ca296 100644
--- a/lib/MC/MCELFObjectTargetWriter.cpp
+++ b/lib/MC/MCELFObjectTargetWriter.cpp
@@ -1,9 +1,8 @@
//===-- MCELFObjectTargetWriter.cpp - ELF Target Writer Subclass ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -13,8 +12,9 @@ using namespace llvm;
MCELFObjectTargetWriter::MCELFObjectTargetWriter(bool Is64Bit_, uint8_t OSABI_,
uint16_t EMachine_,
- bool HasRelocationAddend_)
- : OSABI(OSABI_), EMachine(EMachine_),
+ bool HasRelocationAddend_,
+ uint8_t ABIVersion_)
+ : OSABI(OSABI_), ABIVersion(ABIVersion_), EMachine(EMachine_),
HasRelocationAddend(HasRelocationAddend_), Is64Bit(Is64Bit_) {}
bool MCELFObjectTargetWriter::needsRelocateWithSymbol(const MCSymbol &Sym,
diff --git a/lib/MC/MCELFStreamer.cpp b/lib/MC/MCELFStreamer.cpp
index 95b48e6abc74..245dd063004f 100644
--- a/lib/MC/MCELFStreamer.cpp
+++ b/lib/MC/MCELFStreamer.cpp
@@ -1,9 +1,8 @@
//===- lib/MC/MCELFStreamer.cpp - ELF Object Output -----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -202,6 +201,7 @@ bool MCELFStreamer::EmitSymbolAttribute(MCSymbol *S, MCSymbolAttr Attribute) {
// In the future it might be worth trying to make these operations more well
// defined.
switch (Attribute) {
+ case MCSA_Cold:
case MCSA_LazyReference:
case MCSA_Reference:
case MCSA_SymbolResolver:
@@ -400,6 +400,8 @@ void MCELFStreamer::fixSymbolsInTLSFixups(const MCExpr *expr) {
case MCSymbolRefExpr::VK_INDNTPOFF:
case MCSymbolRefExpr::VK_NTPOFF:
case MCSymbolRefExpr::VK_GOTNTPOFF:
+ case MCSymbolRefExpr::VK_TLSCALL:
+ case MCSymbolRefExpr::VK_TLSDESC:
case MCSymbolRefExpr::VK_TLSGD:
case MCSymbolRefExpr::VK_TLSLD:
case MCSymbolRefExpr::VK_TLSLDM:
diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp
index 3c022199145f..ab53ed42778e 100644
--- a/lib/MC/MCExpr.cpp
+++ b/lib/MC/MCExpr.cpp
@@ -1,14 +1,14 @@
//===- MCExpr.cpp - Assembly Level Expression Implementation --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "llvm/MC/MCExpr.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/MC/MCAsmBackend.h"
@@ -43,10 +43,15 @@ void MCExpr::print(raw_ostream &OS, const MCAsmInfo *MAI, bool InParens) const {
switch (getKind()) {
case MCExpr::Target:
return cast<MCTargetExpr>(this)->printImpl(OS, MAI);
- case MCExpr::Constant:
- OS << cast<MCConstantExpr>(*this).getValue();
+ case MCExpr::Constant: {
+ auto Value = cast<MCConstantExpr>(*this).getValue();
+ auto PrintInHex = cast<MCConstantExpr>(*this).useHexFormat();
+ if (PrintInHex)
+ OS << "0x" << Twine::utohexstr(Value);
+ else
+ OS << Value;
return;
-
+ }
case MCExpr::SymbolRef: {
const MCSymbolRefExpr &SRE = cast<MCSymbolRefExpr>(*this);
const MCSymbol &Sym = SRE.getSymbol();
@@ -161,8 +166,9 @@ const MCUnaryExpr *MCUnaryExpr::create(Opcode Opc, const MCExpr *Expr,
return new (Ctx) MCUnaryExpr(Opc, Expr, Loc);
}
-const MCConstantExpr *MCConstantExpr::create(int64_t Value, MCContext &Ctx) {
- return new (Ctx) MCConstantExpr(Value);
+const MCConstantExpr *MCConstantExpr::create(int64_t Value, MCContext &Ctx,
+ bool PrintInHex) {
+ return new (Ctx) MCConstantExpr(Value, PrintInHex);
}
/* *** */
@@ -303,15 +309,16 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
case VK_Hexagon_LD_PLT: return "LDPLT";
case VK_Hexagon_IE: return "IE";
case VK_Hexagon_IE_GOT: return "IEGOT";
- case VK_WebAssembly_FUNCTION: return "FUNCTION";
- case VK_WebAssembly_GLOBAL: return "GLOBAL";
- case VK_WebAssembly_TYPEINDEX: return "TYPEINDEX";
- case VK_WebAssembly_EVENT: return "EVENT";
+ case VK_WASM_TYPEINDEX: return "TYPEINDEX";
+ case VK_WASM_MBREL: return "MBREL";
+ case VK_WASM_TBREL: return "TBREL";
case VK_AMDGPU_GOTPCREL32_LO: return "gotpcrel32@lo";
case VK_AMDGPU_GOTPCREL32_HI: return "gotpcrel32@hi";
case VK_AMDGPU_REL32_LO: return "rel32@lo";
case VK_AMDGPU_REL32_HI: return "rel32@hi";
case VK_AMDGPU_REL64: return "rel64";
+ case VK_AMDGPU_ABS32_LO: return "abs32@lo";
+ case VK_AMDGPU_ABS32_HI: return "abs32@hi";
}
llvm_unreachable("Invalid variant kind");
}
@@ -419,15 +426,16 @@ MCSymbolRefExpr::getVariantKindForName(StringRef Name) {
.Case("lo8", VK_AVR_LO8)
.Case("hi8", VK_AVR_HI8)
.Case("hlo8", VK_AVR_HLO8)
- .Case("function", VK_WebAssembly_FUNCTION)
- .Case("global", VK_WebAssembly_GLOBAL)
- .Case("typeindex", VK_WebAssembly_TYPEINDEX)
- .Case("event", VK_WebAssembly_EVENT)
+ .Case("typeindex", VK_WASM_TYPEINDEX)
+ .Case("tbrel", VK_WASM_TBREL)
+ .Case("mbrel", VK_WASM_MBREL)
.Case("gotpcrel32@lo", VK_AMDGPU_GOTPCREL32_LO)
.Case("gotpcrel32@hi", VK_AMDGPU_GOTPCREL32_HI)
.Case("rel32@lo", VK_AMDGPU_REL32_LO)
.Case("rel32@hi", VK_AMDGPU_REL32_HI)
.Case("rel64", VK_AMDGPU_REL64)
+ .Case("abs32@lo", VK_AMDGPU_ABS32_LO)
+ .Case("abs32@hi", VK_AMDGPU_ABS32_HI)
.Default(VK_Invalid);
}
@@ -559,6 +567,11 @@ static void AttemptToFoldSymbolOffsetDifference(
if (Asm->isThumbFunc(&SA))
Addend |= 1;
+ // If symbol is labeled as micromips, we set low-bit to ensure
+ // correct offset in .gcc_except_table
+ if (Asm->getBackend().isMicroMips(&SA))
+ Addend |= 1;
+
// Clear the symbol expr pointers to indicate we have folded these
// operands.
A = B = nullptr;
diff --git a/lib/MC/MCFragment.cpp b/lib/MC/MCFragment.cpp
index d22b117972bf..ae5bd65507bc 100644
--- a/lib/MC/MCFragment.cpp
+++ b/lib/MC/MCFragment.cpp
@@ -1,9 +1,8 @@
//===- lib/MC/MCFragment.cpp - Assembler Fragment Implementation ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/MC/MCInst.cpp b/lib/MC/MCInst.cpp
index 64f111fc7114..f6f6edee5822 100644
--- a/lib/MC/MCInst.cpp
+++ b/lib/MC/MCInst.cpp
@@ -1,9 +1,8 @@
//===- lib/MC/MCInst.cpp - MCInst implementation --------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/MC/MCInstPrinter.cpp b/lib/MC/MCInstPrinter.cpp
index 9296fcedb72b..159f4070fe9f 100644
--- a/lib/MC/MCInstPrinter.cpp
+++ b/lib/MC/MCInstPrinter.cpp
@@ -1,9 +1,8 @@
//===- MCInstPrinter.cpp - Convert an MCInst to target assembly syntax ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -22,10 +21,14 @@ using namespace llvm;
void llvm::dumpBytes(ArrayRef<uint8_t> bytes, raw_ostream &OS) {
static const char hex_rep[] = "0123456789abcdef";
+ bool First = true;
for (char i: bytes) {
+ if (First)
+ First = false;
+ else
+ OS << ' ';
OS << hex_rep[(i & 0xF0) >> 4];
OS << hex_rep[i & 0xF];
- OS << ' ';
}
}
diff --git a/lib/MC/MCInstrAnalysis.cpp b/lib/MC/MCInstrAnalysis.cpp
index 8223f3a5c66f..eca87f940bf5 100644
--- a/lib/MC/MCInstrAnalysis.cpp
+++ b/lib/MC/MCInstrAnalysis.cpp
@@ -1,9 +1,8 @@
//===- MCInstrAnalysis.cpp - InstrDesc target hooks -----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/MC/MCInstrDesc.cpp b/lib/MC/MCInstrDesc.cpp
index 53cba864a85d..d54aeba89edc 100644
--- a/lib/MC/MCInstrDesc.cpp
+++ b/lib/MC/MCInstrDesc.cpp
@@ -1,9 +1,8 @@
//===------ llvm/MC/MCInstrDesc.cpp- Instruction Descriptors --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/MC/MCLabel.cpp b/lib/MC/MCLabel.cpp
index c376c83274ef..66ee73c5bbb3 100644
--- a/lib/MC/MCLabel.cpp
+++ b/lib/MC/MCLabel.cpp
@@ -1,9 +1,8 @@
//===- lib/MC/MCLabel.cpp - MCLabel implementation ------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/MC/MCLinkerOptimizationHint.cpp b/lib/MC/MCLinkerOptimizationHint.cpp
index 2f8581470ea6..9ab321872b11 100644
--- a/lib/MC/MCLinkerOptimizationHint.cpp
+++ b/lib/MC/MCLinkerOptimizationHint.cpp
@@ -1,9 +1,8 @@
//===- llvm/MC/MCLinkerOptimizationHint.cpp ----- LOH handling ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp
index b30317e74672..613f255a4ea4 100644
--- a/lib/MC/MCMachOStreamer.cpp
+++ b/lib/MC/MCMachOStreamer.cpp
@@ -1,9 +1,8 @@
//===- MCMachOStreamer.cpp - MachO Streamer -------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -387,6 +386,10 @@ bool MCMachOStreamer::EmitSymbolAttribute(MCSymbol *Sym,
Symbol->setWeakDefinition();
Symbol->setWeakReference();
break;
+
+ case MCSA_Cold:
+ Symbol->setCold();
+ break;
}
return true;
diff --git a/lib/MC/MCMachObjectTargetWriter.cpp b/lib/MC/MCMachObjectTargetWriter.cpp
index 8809a3c320f8..a57b8a7ac0ff 100644
--- a/lib/MC/MCMachObjectTargetWriter.cpp
+++ b/lib/MC/MCMachObjectTargetWriter.cpp
@@ -1,9 +1,8 @@
//===- MCMachObjectTargetWriter.cpp - Mach-O Target Writer Subclass -------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/MC/MCNullStreamer.cpp b/lib/MC/MCNullStreamer.cpp
index 4e97e7550bcb..8452317c8c6b 100644
--- a/lib/MC/MCNullStreamer.cpp
+++ b/lib/MC/MCNullStreamer.cpp
@@ -1,9 +1,8 @@
//===- lib/MC/MCNullStreamer.cpp - Dummy Streamer Implementation ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp
index 9e35355d06e0..9f555abe1404 100644
--- a/lib/MC/MCObjectFileInfo.cpp
+++ b/lib/MC/MCObjectFileInfo.cpp
@@ -1,9 +1,8 @@
//===-- MCObjectFileInfo.cpp - Object File Information --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -19,6 +18,7 @@
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCSectionWasm.h"
+#include "llvm/MC/MCSectionXCOFF.h"
using namespace llvm;
@@ -291,6 +291,9 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(const Triple &T) {
FaultMapSection = Ctx->getMachOSection("__LLVM_FAULTMAPS", "__llvm_faultmaps",
0, SectionKind::getMetadata());
+ RemarksSection = Ctx->getMachOSection(
+ "__LLVM", "__remarks", MachO::S_ATTR_DEBUG, SectionKind::getMetadata());
+
TLSExtraDataSection = TLSTLVSection;
}
@@ -476,6 +479,9 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(const Triple &T, bool Large) {
Ctx->getELFSection(".eh_frame", EHSectionType, EHSectionFlags);
StackSizesSection = Ctx->getELFSection(".stack_sizes", ELF::SHT_PROGBITS, 0);
+
+ RemarksSection =
+ Ctx->getELFSection(".remarks", ELF::SHT_PROGBITS, ELF::SHF_EXCLUDE);
}
void MCObjectFileInfo::initCOFFMCObjectFileInfo(const Triple &T) {
@@ -756,6 +762,15 @@ void MCObjectFileInfo::initWasmMCObjectFileInfo(const Triple &T) {
// TODO: Define more sections.
}
+void MCObjectFileInfo::initXCOFFMCObjectFileInfo(const Triple &T) {
+ // The default csect for program code. Functions without a specified section
+ // get placed into this csect. The choice of csect name is not a property of
+ // the ABI or object file format. For example, the XL compiler uses an unnamed
+ // csect for program code.
+ TextSection = Ctx->getXCOFFSection(
+ ".text", XCOFF::StorageMappingClass::XMC_PR, SectionKind::getText());
+}
+
void MCObjectFileInfo::InitMCObjectFileInfo(const Triple &TheTriple, bool PIC,
MCContext &ctx,
bool LargeCodeModel) {
@@ -802,6 +817,10 @@ void MCObjectFileInfo::InitMCObjectFileInfo(const Triple &TheTriple, bool PIC,
Env = IsWasm;
initWasmMCObjectFileInfo(TT);
break;
+ case Triple::XCOFF:
+ Env = IsXCOFF;
+ initXCOFFMCObjectFileInfo(TT);
+ break;
case Triple::UnknownObjectFormat:
report_fatal_error("Cannot initialize MC for unknown object file format.");
break;
@@ -817,6 +836,7 @@ MCSection *MCObjectFileInfo::getDwarfComdatSection(const char *Name,
case Triple::MachO:
case Triple::COFF:
case Triple::Wasm:
+ case Triple::XCOFF:
case Triple::UnknownObjectFormat:
report_fatal_error("Cannot get DWARF comdat section for this object file "
"format: not implemented.");
diff --git a/lib/MC/MCObjectStreamer.cpp b/lib/MC/MCObjectStreamer.cpp
index 6ec705bdddb7..1587d8498666 100644
--- a/lib/MC/MCObjectStreamer.cpp
+++ b/lib/MC/MCObjectStreamer.cpp
@@ -1,9 +1,8 @@
//===- lib/MC/MCObjectStreamer.cpp - Object File MCStreamer Interface -----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -315,7 +314,7 @@ bool MCObjectStreamer::mayHaveInstructions(MCSection &Sec) const {
}
void MCObjectStreamer::EmitInstruction(const MCInst &Inst,
- const MCSubtargetInfo &STI, bool) {
+ const MCSubtargetInfo &STI) {
getAssembler().getBackend().handleCodePaddingInstructionBegin(Inst);
EmitInstructionImpl(Inst, STI);
getAssembler().getBackend().handleCodePaddingInstructionEnd(Inst);
diff --git a/lib/MC/MCObjectWriter.cpp b/lib/MC/MCObjectWriter.cpp
index 98ac48a23f91..a058bbe0ba0b 100644
--- a/lib/MC/MCObjectWriter.cpp
+++ b/lib/MC/MCObjectWriter.cpp
@@ -1,9 +1,8 @@
//===- lib/MC/MCObjectWriter.cpp - MCObjectWriter implementation ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/MC/MCParser/AsmLexer.cpp b/lib/MC/MCParser/AsmLexer.cpp
index 2b0d20f9b8e2..9155ae05d29d 100644
--- a/lib/MC/MCParser/AsmLexer.cpp
+++ b/lib/MC/MCParser/AsmLexer.cpp
@@ -1,9 +1,8 @@
//===- AsmLexer.cpp - Lexer for Assembly Files ----------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -62,8 +61,6 @@ int AsmLexer::getNextChar() {
return (unsigned char)*CurPtr++;
}
-/// LexFloatLiteral: [0-9]*[.][0-9]*([eE][+-]?[0-9]*)?
-///
/// The leading integral digit sequence and dot should have already been
/// consumed, some or all of the fractional digit sequence *can* have been
/// consumed.
@@ -72,13 +69,16 @@ AsmToken AsmLexer::LexFloatLiteral() {
while (isDigit(*CurPtr))
++CurPtr;
- // Check for exponent; we intentionally accept a slighlty wider set of
- // literals here and rely on the upstream client to reject invalid ones (e.g.,
- // "1e+").
- if (*CurPtr == 'e' || *CurPtr == 'E') {
+ if (*CurPtr == '-' || *CurPtr == '+')
+ return ReturnError(CurPtr, "Invalid sign in float literal");
+
+ // Check for exponent
+ if ((*CurPtr == 'e' || *CurPtr == 'E')) {
++CurPtr;
+
if (*CurPtr == '-' || *CurPtr == '+')
++CurPtr;
+
while (isDigit(*CurPtr))
++CurPtr;
}
@@ -146,8 +146,9 @@ AsmToken AsmLexer::LexIdentifier() {
// Disambiguate a .1243foo identifier from a floating literal.
while (isDigit(*CurPtr))
++CurPtr;
- if (*CurPtr == 'e' || *CurPtr == 'E' ||
- !IsIdentifierChar(*CurPtr, AllowAtInIdentifier))
+
+ if (!IsIdentifierChar(*CurPtr, AllowAtInIdentifier) ||
+ *CurPtr == 'e' || *CurPtr == 'E')
return LexFloatLiteral();
}
@@ -327,8 +328,9 @@ AsmToken AsmLexer::LexDigit() {
unsigned Radix = doHexLookAhead(CurPtr, 10, LexMasmIntegers);
bool isHex = Radix == 16;
// Check for floating point literals.
- if (!isHex && (*CurPtr == '.' || *CurPtr == 'e')) {
- ++CurPtr;
+ if (!isHex && (*CurPtr == '.' || *CurPtr == 'e' || *CurPtr == 'E')) {
+ if (*CurPtr == '.')
+ ++CurPtr;
return LexFloatLiteral();
}
@@ -557,7 +559,7 @@ AsmToken AsmLexer::LexToken() {
AsmToken TokenBuf[2];
MutableArrayRef<AsmToken> Buf(TokenBuf, 2);
size_t num = peekTokens(Buf, true);
- // There cannot be a space preceeding this
+ // There cannot be a space preceding this
if (IsAtStartOfLine && num == 2 && TokenBuf[0].is(AsmToken::Integer) &&
TokenBuf[1].is(AsmToken::String)) {
CurPtr = TokStart; // reset curPtr;
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index cf42a6f7075b..084f6a7a2e14 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -1,9 +1,8 @@
//===- AsmParser.cpp - Parser for Assembly Files --------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -159,12 +158,16 @@ private:
/// The values from the last parsed cpp hash file line comment if any.
struct CppHashInfoTy {
StringRef Filename;
- int64_t LineNumber = 0;
+ int64_t LineNumber;
SMLoc Loc;
- unsigned Buf = 0;
+ unsigned Buf;
+ CppHashInfoTy() : Filename(), LineNumber(0), Loc(), Buf(0) {}
};
CppHashInfoTy CppHashInfo;
+ /// The filename from the first cpp hash file line comment, if any.
+ StringRef FirstCppHashFilename;
+
/// List of forward directional labels for diagnosis at the end.
SmallVector<std::tuple<SMLoc, CppHashInfoTy, MCSymbol *>, 4> DirLabels;
@@ -426,6 +429,7 @@ private:
DK_WEAK_DEFINITION,
DK_WEAK_REFERENCE,
DK_WEAK_DEF_CAN_BE_HIDDEN,
+ DK_COLD,
DK_COMM,
DK_COMMON,
DK_LCOMM,
@@ -709,6 +713,9 @@ AsmParser::AsmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
case MCObjectFileInfo::IsWasm:
PlatformParser.reset(createWasmAsmParser());
break;
+ case MCObjectFileInfo::IsXCOFF:
+ // TODO: Need to implement createXCOFFAsmParser for XCOFF format.
+ break;
}
PlatformParser->Initialize(*this);
@@ -844,9 +851,20 @@ bool AsmParser::enabledGenDwarfForAssembly() {
// If we haven't encountered any .file directives (which would imply that
// the assembler source was produced with debug info already) then emit one
// describing the assembler source file itself.
- if (getContext().getGenDwarfFileNumber() == 0)
+ if (getContext().getGenDwarfFileNumber() == 0) {
+ // Use the first #line directive for this, if any. It's preprocessed, so
+ // there is no checksum, and of course no source directive.
+ if (!FirstCppHashFilename.empty())
+ getContext().setMCLineTableRootFile(/*CUID=*/0,
+ getContext().getCompilationDir(),
+ FirstCppHashFilename,
+ /*Cksum=*/None, /*Source=*/None);
+ const MCDwarfFile &RootFile =
+ getContext().getMCDwarfLineTable(/*CUID=*/0).getRootFile();
getContext().setGenDwarfFileNumber(getStreamer().EmitDwarfFileDirective(
- 0, StringRef(), getContext().getMainFileName()));
+ /*CUID=*/0, getContext().getCompilationDir(), RootFile.Name,
+ RootFile.Checksum, RootFile.Source));
+ }
return true;
}
@@ -1983,6 +2001,8 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
return parseDirectiveSymbolAttribute(MCSA_WeakReference);
case DK_WEAK_DEF_CAN_BE_HIDDEN:
return parseDirectiveSymbolAttribute(MCSA_WeakDefAutoPrivate);
+ case DK_COLD:
+ return parseDirectiveSymbolAttribute(MCSA_Cold);
case DK_COMM:
case DK_COMMON:
return parseDirectiveComm(/*IsLocal=*/false);
@@ -2275,11 +2295,14 @@ bool AsmParser::parseCppHashLineFilenameComment(SMLoc L) {
// Get rid of the enclosing quotes.
Filename = Filename.substr(1, Filename.size() - 2);
- // Save the SMLoc, Filename and LineNumber for later use by diagnostics.
+ // Save the SMLoc, Filename and LineNumber for later use by diagnostics
+ // and possibly DWARF file info.
CppHashInfo.Loc = L;
CppHashInfo.Filename = Filename;
CppHashInfo.LineNumber = LineNumber;
CppHashInfo.Buf = CurBuffer;
+ if (FirstCppHashFilename.empty())
+ FirstCppHashFilename = Filename;
return false;
}
@@ -3364,26 +3387,28 @@ bool AsmParser::parseDirectiveFile(SMLoc DirectiveLoc) {
}
if (FileNumber == -1) {
- if (!getContext().getAsmInfo()->hasSingleParameterDotFile())
- return Error(DirectiveLoc,
- "target does not support '.file' without a number");
- getStreamer().EmitFileDirective(Filename);
+ // Ignore the directive if there is no number and the target doesn't support
+ // numberless .file directives. This allows some portability of assembler
+ // between different object file formats.
+ if (getContext().getAsmInfo()->hasSingleParameterDotFile())
+ getStreamer().EmitFileDirective(Filename);
} else {
// In case there is a -g option as well as debug info from directive .file,
// we turn off the -g option, directly use the existing debug info instead.
- // Also reset any implicit ".file 0" for the assembler source.
+ // Throw away any implicit file table for the assembler source.
if (Ctx.getGenDwarfForAssembly()) {
- Ctx.getMCDwarfLineTable(0).resetRootFile();
+ Ctx.getMCDwarfLineTable(0).resetFileTable();
Ctx.setGenDwarfForAssembly(false);
}
- MD5::MD5Result *CKMem = nullptr;
+ Optional<MD5::MD5Result> CKMem;
if (HasMD5) {
- CKMem = (MD5::MD5Result *)Ctx.allocate(sizeof(MD5::MD5Result), 1);
+ MD5::MD5Result Sum;
for (unsigned i = 0; i != 8; ++i) {
- CKMem->Bytes[i] = uint8_t(MD5Hi >> ((7 - i) * 8));
- CKMem->Bytes[i + 8] = uint8_t(MD5Lo >> ((7 - i) * 8));
+ Sum.Bytes[i] = uint8_t(MD5Hi >> ((7 - i) * 8));
+ Sum.Bytes[i + 8] = uint8_t(MD5Lo >> ((7 - i) * 8));
}
+ CKMem = Sum;
}
if (HasSource) {
char *SourceBuf = static_cast<char *>(Ctx.allocate(SourceString.size()));
@@ -3399,7 +3424,6 @@ bool AsmParser::parseDirectiveFile(SMLoc DirectiveLoc) {
FileNumber, Directory, Filename, CKMem, Source);
if (!FileNumOrErr)
return Error(DirectiveLoc, toString(FileNumOrErr.takeError()));
- FileNumber = FileNumOrErr.get();
}
// Alert the user if there are some .file directives with MD5 and some not.
// But only do that once.
@@ -5035,9 +5059,9 @@ bool AsmParser::parseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined) {
MCSymbol *Sym = getContext().lookupSymbol(Name);
if (expect_defined)
- TheCondState.CondMet = (Sym && !Sym->isUndefined());
+ TheCondState.CondMet = (Sym && !Sym->isUndefined(false));
else
- TheCondState.CondMet = (!Sym || Sym->isUndefined());
+ TheCondState.CondMet = (!Sym || Sym->isUndefined(false));
TheCondState.Ignore = !TheCondState.CondMet;
}
@@ -5223,6 +5247,7 @@ void AsmParser::initializeDirectiveKindMap() {
DirectiveKindMap[".weak_definition"] = DK_WEAK_DEFINITION;
DirectiveKindMap[".weak_reference"] = DK_WEAK_REFERENCE;
DirectiveKindMap[".weak_def_can_be_hidden"] = DK_WEAK_DEF_CAN_BE_HIDDEN;
+ DirectiveKindMap[".cold"] = DK_COLD;
DirectiveKindMap[".comm"] = DK_COMM;
DirectiveKindMap[".common"] = DK_COMMON;
DirectiveKindMap[".lcomm"] = DK_LCOMM;
diff --git a/lib/MC/MCParser/COFFAsmParser.cpp b/lib/MC/MCParser/COFFAsmParser.cpp
index 388304a72395..1217ea99e465 100644
--- a/lib/MC/MCParser/COFFAsmParser.cpp
+++ b/lib/MC/MCParser/COFFAsmParser.cpp
@@ -1,9 +1,8 @@
//===- COFFAsmParser.cpp - COFF Assembly Parser ---------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/MC/MCParser/DarwinAsmParser.cpp b/lib/MC/MCParser/DarwinAsmParser.cpp
index cd99112292a9..1160934dc62c 100644
--- a/lib/MC/MCParser/DarwinAsmParser.cpp
+++ b/lib/MC/MCParser/DarwinAsmParser.cpp
@@ -1,9 +1,8 @@
//===- DarwinAsmParser.cpp - Darwin (Mach-O) Assembly Parser --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -1149,6 +1148,7 @@ static Triple::OSType getOSTypeFromPlatform(MachO::PlatformType Type) {
case MachO::PLATFORM_TVOS: return Triple::TvOS;
case MachO::PLATFORM_WATCHOS: return Triple::WatchOS;
case MachO::PLATFORM_BRIDGEOS: /* silence warning */ break;
+ case MachO::PLATFORM_MACCATALYST: return Triple::IOS;
case MachO::PLATFORM_IOSSIMULATOR: /* silence warning */ break;
case MachO::PLATFORM_TVOSSIMULATOR: /* silence warning */ break;
case MachO::PLATFORM_WATCHOSSIMULATOR: /* silence warning */ break;
@@ -1169,6 +1169,7 @@ bool DarwinAsmParser::parseBuildVersion(StringRef Directive, SMLoc Loc) {
.Case("ios", MachO::PLATFORM_IOS)
.Case("tvos", MachO::PLATFORM_TVOS)
.Case("watchos", MachO::PLATFORM_WATCHOS)
+ .Case("macCatalyst", MachO::PLATFORM_MACCATALYST)
.Default(0);
if (Platform == 0)
return Error(PlatformLoc, "unknown platform name");
diff --git a/lib/MC/MCParser/ELFAsmParser.cpp b/lib/MC/MCParser/ELFAsmParser.cpp
index d568f7a71eeb..a55bdd5364cb 100644
--- a/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/lib/MC/MCParser/ELFAsmParser.cpp
@@ -1,9 +1,8 @@
//===- ELFAsmParser.cpp - ELF Assembly Parser -----------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -616,6 +615,10 @@ EndStmt:
Type = ELF::SHT_LLVM_LINKER_OPTIONS;
else if (TypeName == "llvm_call_graph_profile")
Type = ELF::SHT_LLVM_CALL_GRAPH_PROFILE;
+ else if (TypeName == "llvm_dependent_libraries")
+ Type = ELF::SHT_LLVM_DEPENDENT_LIBRARIES;
+ else if (TypeName == "llvm_sympart")
+ Type = ELF::SHT_LLVM_SYMPART;
else if (TypeName.getAsInteger(0, Type))
return TokError("unknown section type");
}
diff --git a/lib/MC/MCParser/MCAsmLexer.cpp b/lib/MC/MCParser/MCAsmLexer.cpp
index 10960fc69633..497055bc1760 100644
--- a/lib/MC/MCParser/MCAsmLexer.cpp
+++ b/lib/MC/MCParser/MCAsmLexer.cpp
@@ -1,9 +1,8 @@
//===- MCAsmLexer.cpp - Abstract Asm Lexer Interface ----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/MC/MCParser/MCAsmParser.cpp b/lib/MC/MCParser/MCAsmParser.cpp
index efedcdc5a314..41a1ee555d6f 100644
--- a/lib/MC/MCParser/MCAsmParser.cpp
+++ b/lib/MC/MCParser/MCAsmParser.cpp
@@ -1,9 +1,8 @@
//===-- MCAsmParser.cpp - Abstract Asm Parser Interface -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/MC/MCParser/MCAsmParserExtension.cpp b/lib/MC/MCParser/MCAsmParserExtension.cpp
index 031f473dc5fe..18d18f0cf6ed 100644
--- a/lib/MC/MCParser/MCAsmParserExtension.cpp
+++ b/lib/MC/MCParser/MCAsmParserExtension.cpp
@@ -1,9 +1,8 @@
//===- MCAsmParserExtension.cpp - Asm Parser Hooks ------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/MC/MCParser/MCTargetAsmParser.cpp b/lib/MC/MCParser/MCTargetAsmParser.cpp
index a0c06c9d5018..940f26d4750b 100644
--- a/lib/MC/MCParser/MCTargetAsmParser.cpp
+++ b/lib/MC/MCParser/MCTargetAsmParser.cpp
@@ -1,9 +1,8 @@
//===-- MCTargetAsmParser.cpp - Target Assembly Parser --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/MC/MCParser/WasmAsmParser.cpp b/lib/MC/MCParser/WasmAsmParser.cpp
index 93bb0cb3c72e..28d4459fecd4 100644
--- a/lib/MC/MCParser/WasmAsmParser.cpp
+++ b/lib/MC/MCParser/WasmAsmParser.cpp
@@ -1,9 +1,8 @@
//===- WasmAsmParser.cpp - Wasm Assembly Parser -----------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
// --
//
@@ -22,6 +21,7 @@
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/MC/MCParser/MCAsmParserExtension.h"
+#include "llvm/MC/MCSectionWasm.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCSymbolWasm.h"
@@ -32,8 +32,8 @@ using namespace llvm;
namespace {
class WasmAsmParser : public MCAsmParserExtension {
- MCAsmParser *Parser;
- MCAsmLexer *Lexer;
+ MCAsmParser *Parser = nullptr;
+ MCAsmLexer *Lexer = nullptr;
template<bool (WasmAsmParser::*HandlerMethod)(StringRef, SMLoc)>
void addDirectiveHandler(StringRef Directive) {
@@ -44,9 +44,7 @@ class WasmAsmParser : public MCAsmParserExtension {
}
public:
- WasmAsmParser() : Parser(nullptr), Lexer(nullptr) {
- BracketExpressionsSupported = true;
- }
+ WasmAsmParser() { BracketExpressionsSupported = true; }
void Initialize(MCAsmParser &P) override {
Parser = &P;
@@ -58,21 +56,31 @@ public:
addDirectiveHandler<&WasmAsmParser::parseSectionDirective>(".section");
addDirectiveHandler<&WasmAsmParser::parseDirectiveSize>(".size");
addDirectiveHandler<&WasmAsmParser::parseDirectiveType>(".type");
+ addDirectiveHandler<&WasmAsmParser::ParseDirectiveIdent>(".ident");
+ addDirectiveHandler<
+ &WasmAsmParser::ParseDirectiveSymbolAttribute>(".weak");
+ addDirectiveHandler<
+ &WasmAsmParser::ParseDirectiveSymbolAttribute>(".local");
+ addDirectiveHandler<
+ &WasmAsmParser::ParseDirectiveSymbolAttribute>(".internal");
+ addDirectiveHandler<
+ &WasmAsmParser::ParseDirectiveSymbolAttribute>(".hidden");
}
- bool Error(const StringRef &msg, const AsmToken &tok) {
- return Parser->Error(tok.getLoc(), msg + tok.getString());
+ bool error(const StringRef &Msg, const AsmToken &Tok) {
+ return Parser->Error(Tok.getLoc(), Msg + Tok.getString());
}
- bool IsNext(AsmToken::TokenKind Kind) {
- auto ok = Lexer->is(Kind);
- if (ok) Lex();
- return ok;
+ bool isNext(AsmToken::TokenKind Kind) {
+ auto Ok = Lexer->is(Kind);
+ if (Ok)
+ Lex();
+ return Ok;
}
- bool Expect(AsmToken::TokenKind Kind, const char *KindName) {
- if (!IsNext(Kind))
- return Error(std::string("Expected ") + KindName + ", instead got: ",
+ bool expect(AsmToken::TokenKind Kind, const char *KindName) {
+ if (!isNext(Kind))
+ return error(std::string("Expected ") + KindName + ", instead got: ",
Lexer->getTok());
return false;
}
@@ -82,9 +90,65 @@ public:
return false;
}
+ bool parseSectionFlags(StringRef FlagStr, bool &Passive) {
+ SmallVector<StringRef, 2> Flags;
+ // If there are no flags, keep Flags empty
+ FlagStr.split(Flags, ",", -1, false);
+ for (auto &Flag : Flags) {
+ if (Flag == "passive")
+ Passive = true;
+ else
+ return error("Expected section flags, instead got: ", Lexer->getTok());
+ }
+ return false;
+ }
+
bool parseSectionDirective(StringRef, SMLoc) {
- // FIXME: .section currently no-op.
- while (Lexer->isNot(AsmToken::EndOfStatement)) Parser->Lex();
+ StringRef Name;
+ if (Parser->parseIdentifier(Name))
+ return TokError("expected identifier in directive");
+
+ if (expect(AsmToken::Comma, ","))
+ return true;
+
+ if (Lexer->isNot(AsmToken::String))
+ return error("expected string in directive, instead got: ", Lexer->getTok());
+
+ auto Kind = StringSwitch<Optional<SectionKind>>(Name)
+ .StartsWith(".data", SectionKind::getData())
+ .StartsWith(".rodata", SectionKind::getReadOnly())
+ .StartsWith(".text", SectionKind::getText())
+ .StartsWith(".custom_section", SectionKind::getMetadata())
+ .StartsWith(".bss", SectionKind::getBSS())
+ // See use of .init_array in WasmObjectWriter and
+ // TargetLoweringObjectFileWasm
+ .StartsWith(".init_array", SectionKind::getData())
+ .Default(Optional<SectionKind>());
+ if (!Kind.hasValue())
+ return Parser->Error(Lexer->getLoc(), "unknown section kind: " + Name);
+
+ MCSectionWasm *Section = getContext().getWasmSection(Name, Kind.getValue());
+
+ // Update section flags if present in this .section directive
+ bool Passive = false;
+ if (parseSectionFlags(getTok().getStringContents(), Passive))
+ return true;
+
+ if (Passive) {
+ if (!Section->isWasmData())
+ return Parser->Error(getTok().getLoc(),
+ "Only data sections can be passive");
+ Section->setPassive();
+ }
+
+ Lex();
+
+ if (expect(AsmToken::Comma, ",") || expect(AsmToken::At, "@") ||
+ expect(AsmToken::EndOfStatement, "eol"))
+ return true;
+
+ auto WS = getContext().getWasmSection(Name, Kind.getValue());
+ getStreamer().SwitchSection(WS);
return false;
}
@@ -95,16 +159,15 @@ public:
if (Parser->parseIdentifier(Name))
return TokError("expected identifier in directive");
auto Sym = getContext().getOrCreateSymbol(Name);
- if (Lexer->isNot(AsmToken::Comma))
- return TokError("unexpected token in directive");
- Lex();
+ if (expect(AsmToken::Comma, ","))
+ return true;
const MCExpr *Expr;
if (Parser->parseExpression(Expr))
return true;
- if (Lexer->isNot(AsmToken::EndOfStatement))
- return TokError("unexpected token in directive");
- Lex();
- // MCWasmStreamer implements this.
+ if (expect(AsmToken::EndOfStatement, "eol"))
+ return true;
+ // This is done automatically by the assembler for functions currently,
+ // so this is only currently needed for data sections:
getStreamer().emitELFSize(Sym, Expr);
return false;
}
@@ -113,24 +176,71 @@ public:
// This could be the start of a function, check if followed by
// "label,@function"
if (!Lexer->is(AsmToken::Identifier))
- return Error("Expected label after .type directive, got: ",
+ return error("Expected label after .type directive, got: ",
Lexer->getTok());
auto WasmSym = cast<MCSymbolWasm>(
getStreamer().getContext().getOrCreateSymbol(
Lexer->getTok().getString()));
Lex();
- if (!(IsNext(AsmToken::Comma) && IsNext(AsmToken::At) &&
+ if (!(isNext(AsmToken::Comma) && isNext(AsmToken::At) &&
Lexer->is(AsmToken::Identifier)))
- return Error("Expected label,@type declaration, got: ", Lexer->getTok());
+ return error("Expected label,@type declaration, got: ", Lexer->getTok());
auto TypeName = Lexer->getTok().getString();
if (TypeName == "function")
WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
else if (TypeName == "global")
WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL);
+ else if (TypeName == "object")
+ WasmSym->setType(wasm::WASM_SYMBOL_TYPE_DATA);
else
- return Error("Unknown WASM symbol type: ", Lexer->getTok());
+ return error("Unknown WASM symbol type: ", Lexer->getTok());
Lex();
- return Expect(AsmToken::EndOfStatement, "EOL");
+ return expect(AsmToken::EndOfStatement, "EOL");
+ }
+
+ // FIXME: Shared with ELF.
+ /// ParseDirectiveIdent
+ /// ::= .ident string
+ bool ParseDirectiveIdent(StringRef, SMLoc) {
+ if (getLexer().isNot(AsmToken::String))
+ return TokError("unexpected token in '.ident' directive");
+ StringRef Data = getTok().getIdentifier();
+ Lex();
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.ident' directive");
+ Lex();
+ getStreamer().EmitIdent(Data);
+ return false;
+ }
+
+ // FIXME: Shared with ELF.
+ /// ParseDirectiveSymbolAttribute
+ /// ::= { ".local", ".weak", ... } [ identifier ( , identifier )* ]
+ bool ParseDirectiveSymbolAttribute(StringRef Directive, SMLoc) {
+ MCSymbolAttr Attr = StringSwitch<MCSymbolAttr>(Directive)
+ .Case(".weak", MCSA_Weak)
+ .Case(".local", MCSA_Local)
+ .Case(".hidden", MCSA_Hidden)
+ .Case(".internal", MCSA_Internal)
+ .Case(".protected", MCSA_Protected)
+ .Default(MCSA_Invalid);
+ assert(Attr != MCSA_Invalid && "unexpected symbol attribute directive!");
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ while (true) {
+ StringRef Name;
+ if (getParser().parseIdentifier(Name))
+ return TokError("expected identifier in directive");
+ MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
+ getStreamer().EmitSymbolAttribute(Sym, Attr);
+ if (getLexer().is(AsmToken::EndOfStatement))
+ break;
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in directive");
+ Lex();
+ }
+ }
+ Lex();
+ return false;
}
};
diff --git a/lib/MC/MCRegisterInfo.cpp b/lib/MC/MCRegisterInfo.cpp
index 5abae5379867..4273b876b7bb 100644
--- a/lib/MC/MCRegisterInfo.cpp
+++ b/lib/MC/MCRegisterInfo.cpp
@@ -1,9 +1,8 @@
//===- MC/MCRegisterInfo.cpp - Target Register Description ----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/MC/MCSchedule.cpp b/lib/MC/MCSchedule.cpp
index 929bd7f6046c..1fc5ec5e975f 100644
--- a/lib/MC/MCSchedule.cpp
+++ b/lib/MC/MCSchedule.cpp
@@ -1,9 +1,8 @@
//===- MCSchedule.cpp - Scheduling ------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -150,3 +149,19 @@ MCSchedModel::getReciprocalThroughput(unsigned SchedClass,
// that it can execute at the maximum default issue width.
return 1.0 / DefaultIssueWidth;
}
+
+unsigned
+MCSchedModel::getForwardingDelayCycles(ArrayRef<MCReadAdvanceEntry> Entries,
+ unsigned WriteResourceID) {
+ if (Entries.empty())
+ return 0;
+
+ int DelayCycles = 0;
+ for (const MCReadAdvanceEntry &E : Entries) {
+ if (E.WriteResourceID != WriteResourceID)
+ continue;
+ DelayCycles = std::min(DelayCycles, E.Cycles);
+ }
+
+ return std::abs(DelayCycles);
+}
diff --git a/lib/MC/MCSection.cpp b/lib/MC/MCSection.cpp
index d4f11d10136a..2c892ab81608 100644
--- a/lib/MC/MCSection.cpp
+++ b/lib/MC/MCSection.cpp
@@ -1,9 +1,8 @@
//===- lib/MC/MCSection.cpp - Machine Code Section Representation ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/MC/MCSectionCOFF.cpp b/lib/MC/MCSectionCOFF.cpp
index c861963eec8a..f0c06f70bd73 100644
--- a/lib/MC/MCSectionCOFF.cpp
+++ b/lib/MC/MCSectionCOFF.cpp
@@ -1,9 +1,8 @@
//===- lib/MC/MCSectionCOFF.cpp - COFF Code Section Representation --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -15,8 +14,6 @@
using namespace llvm;
-MCSectionCOFF::~MCSectionCOFF() = default; // anchor.
-
// ShouldOmitSectionDirective - Decides whether a '.section' directive
// should be printed before the section name
bool MCSectionCOFF::ShouldOmitSectionDirective(StringRef Name,
diff --git a/lib/MC/MCSectionELF.cpp b/lib/MC/MCSectionELF.cpp
index 7ee1694ebbf7..efe504b2024c 100644
--- a/lib/MC/MCSectionELF.cpp
+++ b/lib/MC/MCSectionELF.cpp
@@ -1,9 +1,8 @@
//===- lib/MC/MCSectionELF.cpp - ELF Code Section Representation ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -18,8 +17,6 @@
using namespace llvm;
-MCSectionELF::~MCSectionELF() = default; // anchor.
-
// Decides whether a '.section' directive
// should be printed before the section name.
bool MCSectionELF::ShouldOmitSectionDirective(StringRef Name,
@@ -155,6 +152,10 @@ void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
OS << "llvm_linker_options";
else if (Type == ELF::SHT_LLVM_CALL_GRAPH_PROFILE)
OS << "llvm_call_graph_profile";
+ else if (Type == ELF::SHT_LLVM_DEPENDENT_LIBRARIES)
+ OS << "llvm_dependent_libraries";
+ else if (Type == ELF::SHT_LLVM_SYMPART)
+ OS << "llvm_sympart";
else
report_fatal_error("unsupported type 0x" + Twine::utohexstr(Type) +
" for section " + getSectionName());
diff --git a/lib/MC/MCSectionMachO.cpp b/lib/MC/MCSectionMachO.cpp
index f40237231a2f..0fd89dcbe5fa 100644
--- a/lib/MC/MCSectionMachO.cpp
+++ b/lib/MC/MCSectionMachO.cpp
@@ -1,9 +1,8 @@
//===- lib/MC/MCSectionMachO.cpp - MachO Code Section Representation ------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/MC/MCSectionWasm.cpp b/lib/MC/MCSectionWasm.cpp
index 626027a24f97..8633c10a73fd 100644
--- a/lib/MC/MCSectionWasm.cpp
+++ b/lib/MC/MCSectionWasm.cpp
@@ -1,9 +1,8 @@
//===- lib/MC/MCSectionWasm.cpp - Wasm Code Section Representation --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -15,11 +14,9 @@
using namespace llvm;
-MCSectionWasm::~MCSectionWasm() {} // anchor.
-
// Decides whether a '.section' directive
// should be printed before the section name.
-bool MCSectionWasm::ShouldOmitSectionDirective(StringRef Name,
+bool MCSectionWasm::shouldOmitSectionDirective(StringRef Name,
const MCAsmInfo &MAI) const {
return MAI.shouldOmitSectionDirective(Name);
}
@@ -51,7 +48,7 @@ void MCSectionWasm::PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
raw_ostream &OS,
const MCExpr *Subsection) const {
- if (ShouldOmitSectionDirective(SectionName, MAI)) {
+ if (shouldOmitSectionDirective(SectionName, MAI)) {
OS << '\t' << getSectionName();
if (Subsection) {
OS << '\t';
@@ -65,7 +62,8 @@ void MCSectionWasm::PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
printName(OS, getSectionName());
OS << ",\"";
- // TODO: Print section flags.
+ if (IsPassive)
+ OS << "passive";
OS << '"';
diff --git a/lib/MC/MCSectionXCOFF.cpp b/lib/MC/MCSectionXCOFF.cpp
new file mode 100644
index 000000000000..d1a637345024
--- /dev/null
+++ b/lib/MC/MCSectionXCOFF.cpp
@@ -0,0 +1,33 @@
+//===- lib/MC/MCSectionXCOFF.cpp - XCOFF Code Section Representation ------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCSectionXCOFF.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+MCSectionXCOFF::~MCSectionXCOFF() = default;
+
+void MCSectionXCOFF::PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
+ raw_ostream &OS,
+ const MCExpr *Subsection) const {
+ if (getKind().isText()) {
+ OS << "\t.csect " << getSectionName() << "["
+ << "PR"
+ << "]" << '\n';
+ return;
+ }
+
+ report_fatal_error("Printing for this SectionKind is unimplemented.");
+}
+
+bool MCSectionXCOFF::UseCodeAlign() const { return getKind().isText(); }
+
+bool MCSectionXCOFF::isVirtualSection() const { return !getKind().isCommon(); }
diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp
index 6a8471bc61b4..decbb96817e3 100644
--- a/lib/MC/MCStreamer.cpp
+++ b/lib/MC/MCStreamer.cpp
@@ -1,9 +1,8 @@
//===- lib/MC/MCStreamer.cpp - Streaming Machine Code Output --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -108,6 +107,11 @@ raw_ostream &MCStreamer::GetCommentOS() {
return nulls();
}
+unsigned MCStreamer::getNumFrameInfos() { return DwarfFrameInfos.size(); }
+ArrayRef<MCDwarfFrameInfo> MCStreamer::getDwarfFrameInfos() const {
+ return DwarfFrameInfos;
+}
+
void MCStreamer::emitRawComment(const Twine &T, bool TabPrefix) {}
void MCStreamer::addExplicitComment(const Twine &T) {}
@@ -136,10 +140,10 @@ void MCStreamer::EmitIntValue(uint64_t Value, unsigned Size) {
/// EmitULEB128IntValue - Special case of EmitULEB128Value that avoids the
/// client having to pass in a MCExpr for constant integers.
-void MCStreamer::EmitULEB128IntValue(uint64_t Value) {
+void MCStreamer::EmitULEB128IntValue(uint64_t Value, unsigned PadTo) {
SmallString<128> Tmp;
raw_svector_ostream OSE(Tmp);
- encodeULEB128(Value, OSE);
+ encodeULEB128(Value, OSE, PadTo);
EmitBytes(OSE.str());
}
@@ -205,7 +209,7 @@ void MCStreamer::EmitZeros(uint64_t NumBytes) {
Expected<unsigned>
MCStreamer::tryEmitDwarfFileDirective(unsigned FileNo, StringRef Directory,
StringRef Filename,
- MD5::MD5Result *Checksum,
+ Optional<MD5::MD5Result> Checksum,
Optional<StringRef> Source,
unsigned CUID) {
return getContext().getDwarfFile(Directory, Filename, FileNo, Checksum,
@@ -214,7 +218,7 @@ MCStreamer::tryEmitDwarfFileDirective(unsigned FileNo, StringRef Directory,
void MCStreamer::emitDwarfFile0Directive(StringRef Directory,
StringRef Filename,
- MD5::MD5Result *Checksum,
+ Optional<MD5::MD5Result> Checksum,
Optional<StringRef> Source,
unsigned CUID) {
getContext().setMCLineTableRootFile(CUID, Directory, Filename, Checksum,
@@ -953,8 +957,7 @@ void MCStreamer::visitUsedExpr(const MCExpr &Expr) {
}
}
-void MCStreamer::EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
- bool) {
+void MCStreamer::EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &) {
// Scan for values.
for (unsigned i = Inst.getNumOperands(); i--;)
if (Inst.getOperand(i).isExpr())
@@ -1074,6 +1077,15 @@ void MCStreamer::EmitVersionForTarget(const Triple &Target,
unsigned Major;
unsigned Minor;
unsigned Update;
+ if (Target.isMacCatalystEnvironment()) {
+ // Mac Catalyst always uses the build version load command.
+ Target.getiOSVersion(Major, Minor, Update);
+ assert(Major && "A non-zero major version is expected");
+ EmitBuildVersion(MachO::PLATFORM_MACCATALYST, Major, Minor, Update,
+ SDKVersion);
+ return;
+ }
+
MCVersionMinType VersionType;
if (Target.isWatchOS()) {
VersionType = MCVM_WatchOSVersionMin;
diff --git a/lib/MC/MCSubtargetInfo.cpp b/lib/MC/MCSubtargetInfo.cpp
index f6167826fae2..5fd48d9e1010 100644
--- a/lib/MC/MCSubtargetInfo.cpp
+++ b/lib/MC/MCSubtargetInfo.cpp
@@ -1,9 +1,8 @@
//===- MCSubtargetInfo.cpp - Subtarget Information ------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -13,6 +12,7 @@
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/MC/MCSchedule.h"
#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
@@ -20,11 +20,178 @@
using namespace llvm;
+/// Find KV in array using binary search.
+template <typename T>
+static const T *Find(StringRef S, ArrayRef<T> A) {
+ // Binary search the array
+ auto F = llvm::lower_bound(A, S);
+ // If not found then return NULL
+ if (F == A.end() || StringRef(F->Key) != S) return nullptr;
+ // Return the found array item
+ return F;
+}
+
+/// For each feature that is (transitively) implied by this feature, set it.
+static
+void SetImpliedBits(FeatureBitset &Bits, const FeatureBitset &Implies,
+ ArrayRef<SubtargetFeatureKV> FeatureTable) {
+ // OR the Implies bits in outside the loop. This allows the Implies for CPUs
+ // which might imply features not in FeatureTable to use this.
+ Bits |= Implies;
+ for (const SubtargetFeatureKV &FE : FeatureTable)
+ if (Implies.test(FE.Value))
+ SetImpliedBits(Bits, FE.Implies.getAsBitset(), FeatureTable);
+}
+
+/// For each feature that (transitively) implies this feature, clear it.
+static
+void ClearImpliedBits(FeatureBitset &Bits, unsigned Value,
+ ArrayRef<SubtargetFeatureKV> FeatureTable) {
+ for (const SubtargetFeatureKV &FE : FeatureTable) {
+ if (FE.Implies.getAsBitset().test(Value)) {
+ Bits.reset(FE.Value);
+ ClearImpliedBits(Bits, FE.Value, FeatureTable);
+ }
+ }
+}
+
+static void ApplyFeatureFlag(FeatureBitset &Bits, StringRef Feature,
+ ArrayRef<SubtargetFeatureKV> FeatureTable) {
+ assert(SubtargetFeatures::hasFlag(Feature) &&
+ "Feature flags should start with '+' or '-'");
+
+ // Find feature in table.
+ const SubtargetFeatureKV *FeatureEntry =
+ Find(SubtargetFeatures::StripFlag(Feature), FeatureTable);
+ // If there is a match
+ if (FeatureEntry) {
+ // Enable/disable feature in bits
+ if (SubtargetFeatures::isEnabled(Feature)) {
+ Bits.set(FeatureEntry->Value);
+
+ // For each feature that this implies, set it.
+ SetImpliedBits(Bits, FeatureEntry->Implies.getAsBitset(), FeatureTable);
+ } else {
+ Bits.reset(FeatureEntry->Value);
+
+ // For each feature that implies this, clear it.
+ ClearImpliedBits(Bits, FeatureEntry->Value, FeatureTable);
+ }
+ } else {
+ errs() << "'" << Feature << "' is not a recognized feature for this target"
+ << " (ignoring feature)\n";
+ }
+}
+
+/// Return the length of the longest entry in the table.
+template <typename T>
+static size_t getLongestEntryLength(ArrayRef<T> Table) {
+ size_t MaxLen = 0;
+ for (auto &I : Table)
+ MaxLen = std::max(MaxLen, std::strlen(I.Key));
+ return MaxLen;
+}
+
+/// Display help for feature and mcpu choices.
+static void Help(ArrayRef<SubtargetSubTypeKV> CPUTable,
+ ArrayRef<SubtargetFeatureKV> FeatTable) {
+ // the static variable ensures that the help information only gets
+ // printed once even though a target machine creates multiple subtargets
+ static bool PrintOnce = false;
+ if (PrintOnce) {
+ return;
+ }
+
+ // Determine the length of the longest CPU and Feature entries.
+ unsigned MaxCPULen = getLongestEntryLength(CPUTable);
+ unsigned MaxFeatLen = getLongestEntryLength(FeatTable);
+
+ // Print the CPU table.
+ errs() << "Available CPUs for this target:\n\n";
+ for (auto &CPU : CPUTable)
+ errs() << format(" %-*s - Select the %s processor.\n", MaxCPULen, CPU.Key,
+ CPU.Key);
+ errs() << '\n';
+
+ // Print the Feature table.
+ errs() << "Available features for this target:\n\n";
+ for (auto &Feature : FeatTable)
+ errs() << format(" %-*s - %s.\n", MaxFeatLen, Feature.Key, Feature.Desc);
+ errs() << '\n';
+
+ errs() << "Use +feature to enable a feature, or -feature to disable it.\n"
+ "For example, llc -mcpu=mycpu -mattr=+feature1,-feature2\n";
+
+ PrintOnce = true;
+}
+
+/// Display help for mcpu choices only
+static void cpuHelp(ArrayRef<SubtargetSubTypeKV> CPUTable) {
+ // the static variable ensures that the help information only gets
+ // printed once even though a target machine creates multiple subtargets
+ static bool PrintOnce = false;
+ if (PrintOnce) {
+ return;
+ }
+
+ // Print the CPU table.
+ errs() << "Available CPUs for this target:\n\n";
+ for (auto &CPU : CPUTable)
+ errs() << "\t" << CPU.Key << "\n";
+ errs() << '\n';
+
+ errs() << "Use -mcpu or -mtune to specify the target's processor.\n"
+ "For example, clang --target=aarch64-unknown-linux-gui "
+ "-mcpu=cortex-a35\n";
+
+ PrintOnce = true;
+}
+
static FeatureBitset getFeatures(StringRef CPU, StringRef FS,
- ArrayRef<SubtargetFeatureKV> ProcDesc,
+ ArrayRef<SubtargetSubTypeKV> ProcDesc,
ArrayRef<SubtargetFeatureKV> ProcFeatures) {
SubtargetFeatures Features(FS);
- return Features.getFeatureBits(CPU, ProcDesc, ProcFeatures);
+
+ if (ProcDesc.empty() || ProcFeatures.empty())
+ return FeatureBitset();
+
+ assert(std::is_sorted(std::begin(ProcDesc), std::end(ProcDesc)) &&
+ "CPU table is not sorted");
+ assert(std::is_sorted(std::begin(ProcFeatures), std::end(ProcFeatures)) &&
+ "CPU features table is not sorted");
+ // Resulting bits
+ FeatureBitset Bits;
+
+ // Check if help is needed
+ if (CPU == "help")
+ Help(ProcDesc, ProcFeatures);
+
+ // Find CPU entry if CPU name is specified.
+ else if (!CPU.empty()) {
+ const SubtargetSubTypeKV *CPUEntry = Find(CPU, ProcDesc);
+
+ // If there is a match
+ if (CPUEntry) {
+ // Set the features implied by this CPU feature, if any.
+ SetImpliedBits(Bits, CPUEntry->Implies.getAsBitset(), ProcFeatures);
+ } else {
+ errs() << "'" << CPU << "' is not a recognized processor for this target"
+ << " (ignoring processor)\n";
+ }
+ }
+
+ // Iterate through each feature
+ for (const std::string &Feature : Features.getFeatures()) {
+ // Check for help
+ if (Feature == "+help")
+ Help(ProcDesc, ProcFeatures);
+ else if (Feature == "+cpuHelp")
+ cpuHelp(ProcDesc);
+ else
+ ApplyFeatureFlag(Bits, Feature, ProcFeatures);
+ }
+
+ return Bits;
}
void MCSubtargetInfo::InitMCProcessorInfo(StringRef CPU, StringRef FS) {
@@ -41,12 +208,12 @@ void MCSubtargetInfo::setDefaultFeatures(StringRef CPU, StringRef FS) {
MCSubtargetInfo::MCSubtargetInfo(
const Triple &TT, StringRef C, StringRef FS,
- ArrayRef<SubtargetFeatureKV> PF, ArrayRef<SubtargetFeatureKV> PD,
- const SubtargetInfoKV *ProcSched, const MCWriteProcResEntry *WPR,
+ ArrayRef<SubtargetFeatureKV> PF, ArrayRef<SubtargetSubTypeKV> PD,
+ const MCWriteProcResEntry *WPR,
const MCWriteLatencyEntry *WL, const MCReadAdvanceEntry *RA,
const InstrStage *IS, const unsigned *OC, const unsigned *FP)
: TargetTriple(TT), CPU(C), ProcFeatures(PF), ProcDesc(PD),
- ProcSchedModels(ProcSched), WriteProcResTable(WPR), WriteLatencyTable(WL),
+ WriteProcResTable(WPR), WriteLatencyTable(WL),
ReadAdvanceTable(RA), Stages(IS), OperandCycles(OC), ForwardingPaths(FP) {
InitMCProcessorInfo(CPU, FS);
}
@@ -61,13 +228,50 @@ FeatureBitset MCSubtargetInfo::ToggleFeature(const FeatureBitset &FB) {
return FeatureBits;
}
-FeatureBitset MCSubtargetInfo::ToggleFeature(StringRef FS) {
- SubtargetFeatures::ToggleFeature(FeatureBits, FS, ProcFeatures);
+FeatureBitset MCSubtargetInfo::SetFeatureBitsTransitively(
+ const FeatureBitset &FB) {
+ SetImpliedBits(FeatureBits, FB, ProcFeatures);
+ return FeatureBits;
+}
+
+FeatureBitset MCSubtargetInfo::ClearFeatureBitsTransitively(
+ const FeatureBitset &FB) {
+ for (unsigned I = 0, E = FB.size(); I < E; I++) {
+ if (FB[I]) {
+ FeatureBits.reset(I);
+ ClearImpliedBits(FeatureBits, I, ProcFeatures);
+ }
+ }
+ return FeatureBits;
+}
+
+FeatureBitset MCSubtargetInfo::ToggleFeature(StringRef Feature) {
+ // Find feature in table.
+ const SubtargetFeatureKV *FeatureEntry =
+ Find(SubtargetFeatures::StripFlag(Feature), ProcFeatures);
+ // If there is a match
+ if (FeatureEntry) {
+ if (FeatureBits.test(FeatureEntry->Value)) {
+ FeatureBits.reset(FeatureEntry->Value);
+ // For each feature that implies this, clear it.
+ ClearImpliedBits(FeatureBits, FeatureEntry->Value, ProcFeatures);
+ } else {
+ FeatureBits.set(FeatureEntry->Value);
+
+ // For each feature that this implies, set it.
+ SetImpliedBits(FeatureBits, FeatureEntry->Implies.getAsBitset(),
+ ProcFeatures);
+ }
+ } else {
+ errs() << "'" << Feature << "' is not a recognized feature for this target"
+ << " (ignoring feature)\n";
+ }
+
return FeatureBits;
}
FeatureBitset MCSubtargetInfo::ApplyFeatureFlag(StringRef FS) {
- SubtargetFeatures::ApplyFeatureFlag(FeatureBits, FS, ProcFeatures);
+ ::ApplyFeatureFlag(FeatureBits, FS, ProcFeatures);
return FeatureBits;
}
@@ -75,37 +279,30 @@ bool MCSubtargetInfo::checkFeatures(StringRef FS) const {
SubtargetFeatures T(FS);
FeatureBitset Set, All;
for (std::string F : T.getFeatures()) {
- SubtargetFeatures::ApplyFeatureFlag(Set, F, ProcFeatures);
+ ::ApplyFeatureFlag(Set, F, ProcFeatures);
if (F[0] == '-')
F[0] = '+';
- SubtargetFeatures::ApplyFeatureFlag(All, F, ProcFeatures);
+ ::ApplyFeatureFlag(All, F, ProcFeatures);
}
return (FeatureBits & All) == Set;
}
const MCSchedModel &MCSubtargetInfo::getSchedModelForCPU(StringRef CPU) const {
- assert(ProcSchedModels && "Processor machine model not available!");
-
- ArrayRef<SubtargetInfoKV> SchedModels(ProcSchedModels, ProcDesc.size());
-
- assert(std::is_sorted(SchedModels.begin(), SchedModels.end(),
- [](const SubtargetInfoKV &LHS, const SubtargetInfoKV &RHS) {
- return strcmp(LHS.Key, RHS.Key) < 0;
- }) &&
+ assert(std::is_sorted(ProcDesc.begin(), ProcDesc.end()) &&
"Processor machine model table is not sorted");
// Find entry
- auto Found =
- std::lower_bound(SchedModels.begin(), SchedModels.end(), CPU);
- if (Found == SchedModels.end() || StringRef(Found->Key) != CPU) {
+ const SubtargetSubTypeKV *CPUEntry = Find(CPU, ProcDesc);
+
+ if (!CPUEntry) {
if (CPU != "help") // Don't error if the user asked for help.
errs() << "'" << CPU
<< "' is not a recognized processor for this target"
<< " (ignoring processor)\n";
return MCSchedModel::GetDefaultSchedModel();
}
- assert(Found->Value && "Missing processor SchedModel value");
- return *(const MCSchedModel *)Found->Value;
+ assert(CPUEntry->SchedModel && "Missing processor SchedModel value");
+ return *CPUEntry->SchedModel;
}
InstrItineraryData
diff --git a/lib/MC/MCSymbol.cpp b/lib/MC/MCSymbol.cpp
index 5502c658f565..67cab9a92722 100644
--- a/lib/MC/MCSymbol.cpp
+++ b/lib/MC/MCSymbol.cpp
@@ -1,9 +1,8 @@
//===- lib/MC/MCSymbol.cpp - MCSymbol implementation ----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/MC/MCSymbolELF.cpp b/lib/MC/MCSymbolELF.cpp
index 12c724f6b1ee..a07c56c64f84 100644
--- a/lib/MC/MCSymbolELF.cpp
+++ b/lib/MC/MCSymbolELF.cpp
@@ -1,9 +1,8 @@
//===- lib/MC/MCSymbolELF.cpp ---------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -66,7 +65,7 @@ void MCSymbolELF::setBinding(unsigned Binding) const {
unsigned MCSymbolELF::getBinding() const {
if (isBindingSet()) {
- uint32_t Val = (getFlags() & (0x3 << ELF_STB_Shift)) >> ELF_STB_Shift;
+ uint32_t Val = (Flags >> ELF_STB_Shift) & 3;
switch (Val) {
default:
llvm_unreachable("Invalid value");
@@ -126,7 +125,7 @@ void MCSymbolELF::setType(unsigned Type) const {
}
unsigned MCSymbolELF::getType() const {
- uint32_t Val = (getFlags() & (0x7 << ELF_STT_Shift)) >> ELF_STT_Shift;
+ uint32_t Val = (Flags >> ELF_STT_Shift) & 7;
switch (Val) {
default:
llvm_unreachable("Invalid value");
@@ -156,9 +155,7 @@ void MCSymbolELF::setVisibility(unsigned Visibility) {
}
unsigned MCSymbolELF::getVisibility() const {
- unsigned Visibility = (getFlags() & (0x3 << ELF_STV_Shift)) >> ELF_STV_Shift;
- assert(Visibility == ELF::STV_DEFAULT || Visibility == ELF::STV_INTERNAL ||
- Visibility == ELF::STV_HIDDEN || Visibility == ELF::STV_PROTECTED);
+ unsigned Visibility = (Flags >> ELF_STV_Shift) & 3;
return Visibility;
}
@@ -171,7 +168,7 @@ void MCSymbolELF::setOther(unsigned Other) {
}
unsigned MCSymbolELF::getOther() const {
- unsigned Other = (getFlags() & (0x7 << ELF_STO_Shift)) >> ELF_STO_Shift;
+ unsigned Other = (Flags >> ELF_STO_Shift) & 7;
return Other << 5;
}
diff --git a/lib/MC/MCTargetOptions.cpp b/lib/MC/MCTargetOptions.cpp
index b85e53db5d61..96bb094134fe 100644
--- a/lib/MC/MCTargetOptions.cpp
+++ b/lib/MC/MCTargetOptions.cpp
@@ -1,9 +1,8 @@
//===- lib/MC/MCTargetOptions.cpp - MC Target Options ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -13,12 +12,11 @@
using namespace llvm;
MCTargetOptions::MCTargetOptions()
- : SanitizeAddress(false), MCRelaxAll(false), MCNoExecStack(false),
- MCFatalWarnings(false), MCNoWarn(false), MCNoDeprecatedWarn(false),
- MCSaveTempLabels(false), MCUseDwarfDirectory(false),
- MCIncrementalLinkerCompatible(false), MCPIECopyRelocations(false),
- ShowMCEncoding(false), ShowMCInst(false), AsmVerbose(false),
- PreserveAsmComments(true) {}
+ : MCRelaxAll(false), MCNoExecStack(false), MCFatalWarnings(false),
+ MCNoWarn(false), MCNoDeprecatedWarn(false), MCSaveTempLabels(false),
+ MCUseDwarfDirectory(false), MCIncrementalLinkerCompatible(false),
+ MCPIECopyRelocations(false), ShowMCEncoding(false), ShowMCInst(false),
+ AsmVerbose(false), PreserveAsmComments(true) {}
StringRef MCTargetOptions::getABIName() const {
return ABIName;
diff --git a/lib/MC/MCValue.cpp b/lib/MC/MCValue.cpp
index 7e03913aa680..81da47b2eced 100644
--- a/lib/MC/MCValue.cpp
+++ b/lib/MC/MCValue.cpp
@@ -1,9 +1,8 @@
//===- lib/MC/MCValue.cpp - MCValue implementation ------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/MC/MCWasmObjectTargetWriter.cpp b/lib/MC/MCWasmObjectTargetWriter.cpp
index 59082a160caf..e46257823e34 100644
--- a/lib/MC/MCWasmObjectTargetWriter.cpp
+++ b/lib/MC/MCWasmObjectTargetWriter.cpp
@@ -1,9 +1,8 @@
//===-- MCWasmObjectTargetWriter.cpp - Wasm Target Writer Subclass --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -11,8 +10,8 @@
using namespace llvm;
-MCWasmObjectTargetWriter::MCWasmObjectTargetWriter(bool Is64Bit_)
- : Is64Bit(Is64Bit_) {}
+MCWasmObjectTargetWriter::MCWasmObjectTargetWriter(bool Is64Bit)
+ : Is64Bit(Is64Bit) {}
// Pin the vtable to this object file
MCWasmObjectTargetWriter::~MCWasmObjectTargetWriter() = default;
diff --git a/lib/MC/MCWasmStreamer.cpp b/lib/MC/MCWasmStreamer.cpp
index d2a152058b90..86fa72197855 100644
--- a/lib/MC/MCWasmStreamer.cpp
+++ b/lib/MC/MCWasmStreamer.cpp
@@ -1,9 +1,8 @@
//===- lib/MC/MCWasmStreamer.cpp - Wasm Object Output ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -35,15 +34,15 @@
using namespace llvm;
-MCWasmStreamer::~MCWasmStreamer() {}
+MCWasmStreamer::~MCWasmStreamer() = default; // anchor.
void MCWasmStreamer::mergeFragment(MCDataFragment *DF, MCDataFragment *EF) {
flushPendingLabels(DF, DF->getContents().size());
- for (unsigned i = 0, e = EF->getFixups().size(); i != e; ++i) {
- EF->getFixups()[i].setOffset(EF->getFixups()[i].getOffset() +
+ for (unsigned I = 0, E = EF->getFixups().size(); I != E; ++I) {
+ EF->getFixups()[I].setOffset(EF->getFixups()[I].getOffset() +
DF->getContents().size());
- DF->getFixups().push_back(EF->getFixups()[i]);
+ DF->getFixups().push_back(EF->getFixups()[I]);
}
if (DF->getSubtargetInfo() == nullptr && EF->getSubtargetInfo())
DF->setHasInstructions(*EF->getSubtargetInfo());
@@ -119,6 +118,11 @@ bool MCWasmStreamer::EmitSymbolAttribute(MCSymbol *S, MCSymbolAttr Attribute) {
break;
case MCSA_ELF_TypeObject:
+ case MCSA_Cold:
+ break;
+
+ case MCSA_NoDeadStrip:
+ Symbol->setExported();
break;
default:
@@ -179,9 +183,9 @@ void MCWasmStreamer::EmitInstToData(const MCInst &Inst,
MCDataFragment *DF = getOrCreateDataFragment();
// Add the fixups and data.
- for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
- Fixups[i].setOffset(Fixups[i].getOffset() + DF->getContents().size());
- DF->getFixups().push_back(Fixups[i]);
+ for (unsigned I = 0, E = Fixups.size(); I != E; ++I) {
+ Fixups[I].setOffset(Fixups[I].getOffset() + DF->getContents().size());
+ DF->getFixups().push_back(Fixups[I]);
}
DF->setHasInstructions(STI);
DF->getContents().append(Code.begin(), Code.end());
diff --git a/lib/MC/MCWin64EH.cpp b/lib/MC/MCWin64EH.cpp
index 0724b109e1a1..4e9a29667097 100644
--- a/lib/MC/MCWin64EH.cpp
+++ b/lib/MC/MCWin64EH.cpp
@@ -1,9 +1,8 @@
//===- lib/MC/MCWin64EH.cpp - MCWin64EH implementation --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -256,8 +255,12 @@ static int64_t GetAbsDifference(MCStreamer &Streamer, const MCSymbol *LHS,
MCBinaryExpr::createSub(MCSymbolRefExpr::create(LHS, Context),
MCSymbolRefExpr::create(RHS, Context), Context);
MCObjectStreamer *OS = (MCObjectStreamer *)(&Streamer);
+ // It should normally be possible to calculate the length of a function
+ // at this point, but it might not be possible in the presence of certain
+ // unusual constructs, like an inline asm with an alignment directive.
int64_t value;
- Diff->evaluateAsAbsolute(value, OS->getAssembler());
+ if (!Diff->evaluateAsAbsolute(value, OS->getAssembler()))
+ report_fatal_error("Failed to evaluate function length in SEH unwind info");
return value;
}
@@ -453,6 +456,38 @@ static void ARM64EmitUnwindCode(MCStreamer &streamer, const MCSymbol *begin,
}
}
+// Returns the epilog symbol of an epilog with the exact same unwind code
+// sequence, if it exists. Otherwise, returns nulltpr.
+// EpilogInstrs - Unwind codes for the current epilog.
+// Epilogs - Epilogs that potentialy match the current epilog.
+static MCSymbol*
+FindMatchingEpilog(const std::vector<WinEH::Instruction>& EpilogInstrs,
+ const std::vector<MCSymbol *>& Epilogs,
+ const WinEH::FrameInfo *info) {
+ for (auto *EpilogStart : Epilogs) {
+ auto InstrsIter = info->EpilogMap.find(EpilogStart);
+ assert(InstrsIter != info->EpilogMap.end() &&
+ "Epilog not found in EpilogMap");
+ const auto &Instrs = InstrsIter->second;
+
+ if (Instrs.size() != EpilogInstrs.size())
+ continue;
+
+ bool Match = true;
+ for (unsigned i = 0; i < Instrs.size(); ++i)
+ if (Instrs[i].Operation != EpilogInstrs[i].Operation ||
+ Instrs[i].Offset != EpilogInstrs[i].Offset ||
+ Instrs[i].Register != EpilogInstrs[i].Register) {
+ Match = false;
+ break;
+ }
+
+ if (Match)
+ return EpilogStart;
+ }
+ return nullptr;
+}
+
// Populate the .xdata section. The format of .xdata on ARM64 is documented at
// https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling
static void ARM64EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info) {
@@ -467,22 +502,71 @@ static void ARM64EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info) {
streamer.EmitLabel(Label);
info->Symbol = Label;
- uint32_t FuncLength = 0x0;
- if (info->FuncletOrFuncEnd)
- FuncLength = (uint32_t)GetAbsDifference(streamer, info->FuncletOrFuncEnd,
- info->Begin);
- FuncLength /= 4;
+ int64_t RawFuncLength;
+ if (!info->FuncletOrFuncEnd) {
+ // FIXME: This is very wrong; we emit SEH data which covers zero bytes
+ // of code. But otherwise test/MC/AArch64/seh.s crashes.
+ RawFuncLength = 0;
+ } else {
+ // FIXME: GetAbsDifference tries to compute the length of the function
+ // immediately, before the whole file is emitted, but in general
+ // that's impossible: the size in bytes of certain assembler directives
+ // like .align and .fill is not known until the whole file is parsed and
+ // relaxations are applied. Currently, GetAbsDifference fails with a fatal
+ // error in that case. (We mostly don't hit this because inline assembly
+ // specifying those directives is rare, and we don't normally try to
+ // align loops on AArch64.)
+ //
+ // There are two potential approaches to delaying the computation. One,
+ // we could emit something like ".word (endfunc-beginfunc)/4+0x10800000",
+ // as long as we have some conservative estimate we could use to prove
+ // that we don't need to split the unwind data. Emitting the constant
+ // is straightforward, but there's no existing code for estimating the
+ // size of the function.
+ //
+ // The other approach would be to use a dedicated, relaxable fragment,
+ // which could grow to accommodate splitting the unwind data if
+ // necessary. This is more straightforward, since it automatically works
+ // without any new infrastructure, and it's consistent with how we handle
+ // relaxation in other contexts. But it would require some refactoring
+ // to move parts of the pdata/xdata emission into the implementation of
+ // a fragment. We could probably continue to encode the unwind codes
+ // here, but we'd have to emit the pdata, the xdata header, and the
+ // epilogue scopes later, since they depend on whether the we need to
+ // split the unwind data.
+ RawFuncLength = GetAbsDifference(streamer, info->FuncletOrFuncEnd,
+ info->Begin);
+ }
+ if (RawFuncLength > 0xFFFFF)
+ report_fatal_error("SEH unwind data splitting not yet implemented");
+ uint32_t FuncLength = (uint32_t)RawFuncLength / 4;
uint32_t PrologCodeBytes = ARM64CountOfUnwindCodes(info->Instructions);
uint32_t TotalCodeBytes = PrologCodeBytes;
// Process epilogs.
MapVector<MCSymbol *, uint32_t> EpilogInfo;
+ // Epilogs processed so far.
+ std::vector<MCSymbol *> AddedEpilogs;
+
for (auto &I : info->EpilogMap) {
MCSymbol *EpilogStart = I.first;
auto &EpilogInstrs = I.second;
uint32_t CodeBytes = ARM64CountOfUnwindCodes(EpilogInstrs);
- EpilogInfo[EpilogStart] = TotalCodeBytes;
- TotalCodeBytes += CodeBytes;
+
+ MCSymbol* MatchingEpilog =
+ FindMatchingEpilog(EpilogInstrs, AddedEpilogs, info);
+ if (MatchingEpilog) {
+ assert(EpilogInfo.find(MatchingEpilog) != EpilogInfo.end() &&
+ "Duplicate epilog not found");
+ EpilogInfo[EpilogStart] = EpilogInfo.lookup(MatchingEpilog);
+ // Clear the unwind codes in the EpilogMap, so that they don't get output
+ // in the logic below.
+ EpilogInstrs.clear();
+ } else {
+ EpilogInfo[EpilogStart] = TotalCodeBytes;
+ TotalCodeBytes += CodeBytes;
+ AddedEpilogs.push_back(EpilogStart);
+ }
}
// Code Words, Epilog count, E, X, Vers, Function Length
diff --git a/lib/MC/MCWinCOFFStreamer.cpp b/lib/MC/MCWinCOFFStreamer.cpp
index 7b1dc7abf708..04d5f100a2ff 100644
--- a/lib/MC/MCWinCOFFStreamer.cpp
+++ b/lib/MC/MCWinCOFFStreamer.cpp
@@ -1,9 +1,8 @@
//===- llvm/MC/MCWinCOFFStreamer.cpp --------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -256,7 +255,7 @@ void MCWinCOFFStreamer::EmitCommonSymbol(MCSymbol *S, uint64_t Size,
auto *Symbol = cast<MCSymbolCOFF>(S);
const Triple &T = getContext().getObjectFileInfo()->getTargetTriple();
- if (T.isKnownWindowsMSVCEnvironment()) {
+ if (T.isWindowsMSVCEnvironment()) {
if (ByteAlignment > 32)
report_fatal_error("alignment is limited to 32-bytes");
@@ -268,7 +267,7 @@ void MCWinCOFFStreamer::EmitCommonSymbol(MCSymbol *S, uint64_t Size,
Symbol->setExternal(true);
Symbol->setCommon(Size, ByteAlignment);
- if (!T.isKnownWindowsMSVCEnvironment() && ByteAlignment > 1) {
+ if (!T.isWindowsMSVCEnvironment() && ByteAlignment > 1) {
SmallString<128> Directive;
raw_svector_ostream OS(Directive);
const MCObjectFileInfo *MFI = getContext().getObjectFileInfo();
diff --git a/lib/MC/MCWinEH.cpp b/lib/MC/MCWinEH.cpp
index a5d0f5a2cb75..e58a0b2cf654 100644
--- a/lib/MC/MCWinEH.cpp
+++ b/lib/MC/MCWinEH.cpp
@@ -1,9 +1,8 @@
//===- lib/MC/MCWinEH.cpp - Windows EH implementation ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/MC/MCXCOFFObjectTargetWriter.cpp b/lib/MC/MCXCOFFObjectTargetWriter.cpp
new file mode 100644
index 000000000000..504e333cb2d4
--- /dev/null
+++ b/lib/MC/MCXCOFFObjectTargetWriter.cpp
@@ -0,0 +1,16 @@
+//===- MCXCOFFObjectTargetWriter.cpp - XCOFF Target Writer Subclass -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCXCOFFObjectWriter.h"
+
+using namespace llvm;
+
+MCXCOFFObjectTargetWriter::MCXCOFFObjectTargetWriter(bool Is64Bit)
+ : Is64Bit(Is64Bit) {}
+
+MCXCOFFObjectTargetWriter::~MCXCOFFObjectTargetWriter() = default;
diff --git a/lib/MC/MCXCOFFStreamer.cpp b/lib/MC/MCXCOFFStreamer.cpp
new file mode 100644
index 000000000000..071de024a3fa
--- /dev/null
+++ b/lib/MC/MCXCOFFStreamer.cpp
@@ -0,0 +1,59 @@
+//===- lib/MC/MCXCOFFStreamer.cpp - XCOFF Object Output -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file assembles .s files and emits XCOFF .o object files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCXCOFFStreamer.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+MCXCOFFStreamer::MCXCOFFStreamer(MCContext &Context,
+ std::unique_ptr<MCAsmBackend> MAB,
+ std::unique_ptr<MCObjectWriter> OW,
+ std::unique_ptr<MCCodeEmitter> Emitter)
+ : MCObjectStreamer(Context, std::move(MAB), std::move(OW),
+ std::move(Emitter)) {}
+
+bool MCXCOFFStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
+ MCSymbolAttr Attribute) {
+ report_fatal_error("Symbol attributes not implemented for XCOFF.");
+}
+
+void MCXCOFFStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment) {
+ report_fatal_error("Emiting common symbols not implemented for XCOFF.");
+}
+
+void MCXCOFFStreamer::EmitZerofill(MCSection *Section, MCSymbol *Symbol,
+ uint64_t Size, unsigned ByteAlignment,
+ SMLoc Loc) {
+ report_fatal_error("Zero fill not implemented for XCOFF.");
+}
+
+void MCXCOFFStreamer::EmitInstToData(const MCInst &Inst,
+ const MCSubtargetInfo &) {
+ report_fatal_error("Instruction emission not implemented for XCOFF.");
+}
+
+MCStreamer *llvm::createXCOFFStreamer(MCContext &Context,
+ std::unique_ptr<MCAsmBackend> &&MAB,
+ std::unique_ptr<MCObjectWriter> &&OW,
+ std::unique_ptr<MCCodeEmitter> &&CE,
+ bool RelaxAll) {
+ MCXCOFFStreamer *S = new MCXCOFFStreamer(Context, std::move(MAB),
+ std::move(OW), std::move(CE));
+ if (RelaxAll)
+ S->getAssembler().setRelaxAll(true);
+ return S;
+}
diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp
index 2fa65658ccfa..f0ceb86b25af 100644
--- a/lib/MC/MachObjectWriter.cpp
+++ b/lib/MC/MachObjectWriter.cpp
@@ -1,9 +1,8 @@
//===- lib/MC/MachObjectWriter.cpp - Mach-O File Writer -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -14,6 +13,7 @@
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDirectives.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCFixupKindInfo.h"
@@ -449,11 +449,25 @@ void MachObjectWriter::writeLinkerOptionsLoadCommand(
assert(W.OS.tell() - Start == Size);
}
+static bool isFixupTargetValid(const MCValue &Target) {
+ // Target is (LHS - RHS + cst).
+ // We don't support the form where LHS is null: -RHS + cst
+ if (!Target.getSymA() && Target.getSymB())
+ return false;
+ return true;
+}
+
void MachObjectWriter::recordRelocation(MCAssembler &Asm,
const MCAsmLayout &Layout,
const MCFragment *Fragment,
const MCFixup &Fixup, MCValue Target,
uint64_t &FixedValue) {
+ if (!isFixupTargetValid(Target)) {
+ Asm.getContext().reportError(Fixup.getLoc(),
+ "unsupported relocation expression");
+ return;
+ }
+
TargetObjectWriter->recordRelocation(this, Asm, Layout, Fragment, Fixup,
Target, FixedValue);
}
diff --git a/lib/MC/StringTableBuilder.cpp b/lib/MC/StringTableBuilder.cpp
index de40a7728d3f..cb3db8e2268c 100644
--- a/lib/MC/StringTableBuilder.cpp
+++ b/lib/MC/StringTableBuilder.cpp
@@ -1,9 +1,8 @@
//===- StringTableBuilder.cpp - String table building utility -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -160,6 +159,13 @@ void StringTableBuilder::finalizeStringTable(bool Optimize) {
if (K == MachO)
Size = alignTo(Size, 4); // Pad to multiple of 4.
+
+ // The first byte in an ELF string table must be null, according to the ELF
+ // specification. In 'initSize()' we reserved the first byte to hold null for
+ // this purpose and here we actually add the string to allow 'getOffset()' to
+ // be called on an empty string.
+ if (K == ELF)
+ StringIndexMap[CachedHashStringRef("")] = 0;
}
void StringTableBuilder::clear() {
diff --git a/lib/MC/SubtargetFeature.cpp b/lib/MC/SubtargetFeature.cpp
index b69af24b531e..c4dd77359b24 100644
--- a/lib/MC/SubtargetFeature.cpp
+++ b/lib/MC/SubtargetFeature.cpp
@@ -1,9 +1,8 @@
//===- SubtargetFeature.cpp - CPU characteristics Implementation ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -12,7 +11,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/MC/SubtargetFeature.h"
-#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
@@ -20,7 +18,6 @@
#include "llvm/Config/llvm-config.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
@@ -32,31 +29,8 @@
using namespace llvm;
-/// Determine if a feature has a flag; '+' or '-'
-static inline bool hasFlag(StringRef Feature) {
- assert(!Feature.empty() && "Empty string");
- // Get first character
- char Ch = Feature[0];
- // Check if first character is '+' or '-' flag
- return Ch == '+' || Ch =='-';
-}
-
-/// Return string stripped of flag.
-static inline std::string StripFlag(StringRef Feature) {
- return hasFlag(Feature) ? Feature.substr(1) : Feature;
-}
-
-/// Return true if enable flag; '+'.
-static inline bool isEnabled(StringRef Feature) {
- assert(!Feature.empty() && "Empty string");
- // Get first character
- char Ch = Feature[0];
- // Check if first character is '+' for enabled
- return Ch == '+';
-}
-
/// Splits a string of comma separated items in to a vector of strings.
-static void Split(std::vector<std::string> &V, StringRef S) {
+void SubtargetFeatures::Split(std::vector<std::string> &V, StringRef S) {
SmallVector<StringRef, 3> Tmp;
S.split(Tmp, ',', -1, false /* KeepEmpty */);
V.assign(Tmp.begin(), Tmp.end());
@@ -70,48 +44,6 @@ void SubtargetFeatures::AddFeature(StringRef String, bool Enable) {
: (Enable ? "+" : "-") + String.lower());
}
-/// Find KV in array using binary search.
-static const SubtargetFeatureKV *Find(StringRef S,
- ArrayRef<SubtargetFeatureKV> A) {
- // Binary search the array
- auto F = std::lower_bound(A.begin(), A.end(), S);
- // If not found then return NULL
- if (F == A.end() || StringRef(F->Key) != S) return nullptr;
- // Return the found array item
- return F;
-}
-
-/// Return the length of the longest entry in the table.
-static size_t getLongestEntryLength(ArrayRef<SubtargetFeatureKV> Table) {
- size_t MaxLen = 0;
- for (auto &I : Table)
- MaxLen = std::max(MaxLen, std::strlen(I.Key));
- return MaxLen;
-}
-
-/// Display help for feature choices.
-static void Help(ArrayRef<SubtargetFeatureKV> CPUTable,
- ArrayRef<SubtargetFeatureKV> FeatTable) {
- // Determine the length of the longest CPU and Feature entries.
- unsigned MaxCPULen = getLongestEntryLength(CPUTable);
- unsigned MaxFeatLen = getLongestEntryLength(FeatTable);
-
- // Print the CPU table.
- errs() << "Available CPUs for this target:\n\n";
- for (auto &CPU : CPUTable)
- errs() << format(" %-*s - %s.\n", MaxCPULen, CPU.Key, CPU.Desc);
- errs() << '\n';
-
- // Print the Feature table.
- errs() << "Available features for this target:\n\n";
- for (auto &Feature : FeatTable)
- errs() << format(" %-*s - %s.\n", MaxFeatLen, Feature.Key, Feature.Desc);
- errs() << '\n';
-
- errs() << "Use +feature to enable a feature, or -feature to disable it.\n"
- "For example, llc -mcpu=mycpu -mattr=+feature1,-feature2\n";
-}
-
SubtargetFeatures::SubtargetFeatures(StringRef Initial) {
// Break up string into separate features
Split(Features, Initial);
@@ -121,136 +53,6 @@ std::string SubtargetFeatures::getString() const {
return join(Features.begin(), Features.end(), ",");
}
-/// For each feature that is (transitively) implied by this feature, set it.
-static
-void SetImpliedBits(FeatureBitset &Bits, const SubtargetFeatureKV &FeatureEntry,
- ArrayRef<SubtargetFeatureKV> FeatureTable) {
- for (const SubtargetFeatureKV &FE : FeatureTable) {
- if (FeatureEntry.Value == FE.Value) continue;
-
- if ((FeatureEntry.Implies & FE.Value).any()) {
- Bits |= FE.Value;
- SetImpliedBits(Bits, FE, FeatureTable);
- }
- }
-}
-
-/// For each feature that (transitively) implies this feature, clear it.
-static
-void ClearImpliedBits(FeatureBitset &Bits,
- const SubtargetFeatureKV &FeatureEntry,
- ArrayRef<SubtargetFeatureKV> FeatureTable) {
- for (const SubtargetFeatureKV &FE : FeatureTable) {
- if (FeatureEntry.Value == FE.Value) continue;
-
- if ((FE.Implies & FeatureEntry.Value).any()) {
- Bits &= ~FE.Value;
- ClearImpliedBits(Bits, FE, FeatureTable);
- }
- }
-}
-
-void
-SubtargetFeatures::ToggleFeature(FeatureBitset &Bits, StringRef Feature,
- ArrayRef<SubtargetFeatureKV> FeatureTable) {
- // Find feature in table.
- const SubtargetFeatureKV *FeatureEntry =
- Find(StripFlag(Feature), FeatureTable);
- // If there is a match
- if (FeatureEntry) {
- if ((Bits & FeatureEntry->Value) == FeatureEntry->Value) {
- Bits &= ~FeatureEntry->Value;
- // For each feature that implies this, clear it.
- ClearImpliedBits(Bits, *FeatureEntry, FeatureTable);
- } else {
- Bits |= FeatureEntry->Value;
-
- // For each feature that this implies, set it.
- SetImpliedBits(Bits, *FeatureEntry, FeatureTable);
- }
- } else {
- errs() << "'" << Feature << "' is not a recognized feature for this target"
- << " (ignoring feature)\n";
- }
-}
-
-void SubtargetFeatures::ApplyFeatureFlag(FeatureBitset &Bits, StringRef Feature,
- ArrayRef<SubtargetFeatureKV> FeatureTable) {
- assert(hasFlag(Feature));
-
- // Find feature in table.
- const SubtargetFeatureKV *FeatureEntry =
- Find(StripFlag(Feature), FeatureTable);
- // If there is a match
- if (FeatureEntry) {
- // Enable/disable feature in bits
- if (isEnabled(Feature)) {
- Bits |= FeatureEntry->Value;
-
- // For each feature that this implies, set it.
- SetImpliedBits(Bits, *FeatureEntry, FeatureTable);
- } else {
- Bits &= ~FeatureEntry->Value;
-
- // For each feature that implies this, clear it.
- ClearImpliedBits(Bits, *FeatureEntry, FeatureTable);
- }
- } else {
- errs() << "'" << Feature << "' is not a recognized feature for this target"
- << " (ignoring feature)\n";
- }
-}
-
-FeatureBitset
-SubtargetFeatures::getFeatureBits(StringRef CPU,
- ArrayRef<SubtargetFeatureKV> CPUTable,
- ArrayRef<SubtargetFeatureKV> FeatureTable) {
- if (CPUTable.empty() || FeatureTable.empty())
- return FeatureBitset();
-
- assert(std::is_sorted(std::begin(CPUTable), std::end(CPUTable)) &&
- "CPU table is not sorted");
- assert(std::is_sorted(std::begin(FeatureTable), std::end(FeatureTable)) &&
- "CPU features table is not sorted");
- // Resulting bits
- FeatureBitset Bits;
-
- // Check if help is needed
- if (CPU == "help")
- Help(CPUTable, FeatureTable);
-
- // Find CPU entry if CPU name is specified.
- else if (!CPU.empty()) {
- const SubtargetFeatureKV *CPUEntry = Find(CPU, CPUTable);
-
- // If there is a match
- if (CPUEntry) {
- // Set base feature bits
- Bits = CPUEntry->Value;
-
- // Set the feature implied by this CPU feature, if any.
- for (auto &FE : FeatureTable) {
- if ((CPUEntry->Value & FE.Value).any())
- SetImpliedBits(Bits, FE, FeatureTable);
- }
- } else {
- errs() << "'" << CPU << "' is not a recognized processor for this target"
- << " (ignoring processor)\n";
- }
- }
-
- // Iterate through each feature
- for (const std::string &Feature : Features) {
- // Check for help
- if (Feature == "+help")
- Help(CPUTable, FeatureTable);
-
- ApplyFeatureFlag(Bits, Feature, FeatureTable);
- }
-
- return Bits;
-}
-
void SubtargetFeatures::print(raw_ostream &OS) const {
for (auto &F : Features)
OS << F << " ";
diff --git a/lib/MC/WasmObjectWriter.cpp b/lib/MC/WasmObjectWriter.cpp
index 0cca3757be90..098343cd0107 100644
--- a/lib/MC/WasmObjectWriter.cpp
+++ b/lib/MC/WasmObjectWriter.cpp
@@ -1,9 +1,8 @@
//===- lib/MC/WasmObjectWriter.cpp - Wasm File Writer ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -41,7 +40,7 @@ namespace {
// Went we ceate the indirect function table we start at 1, so that there is
// and emtpy slot at 0 and therefore calling a null function pointer will trap.
-static const uint32_t kInitialTableOffset = 1;
+static const uint32_t InitialTableOffset = 1;
// For patching purposes, we need to remember where each section starts, both
// for patching up the section size field, and for patching up references to
@@ -61,7 +60,7 @@ struct SectionBookkeeping {
// TODO: Consider using wasm::WasmSignature directly instead.
struct WasmSignature {
// Support empty and tombstone instances, needed by DenseMap.
- enum { Plain, Empty, Tombstone } State;
+ enum { Plain, Empty, Tombstone } State = Plain;
// The return types of the function.
SmallVector<wasm::ValType, 1> Returns;
@@ -69,8 +68,6 @@ struct WasmSignature {
// The parameter types of the function.
SmallVector<wasm::ValType, 4> Params;
- WasmSignature() : State(Plain) {}
-
bool operator==(const WasmSignature &Other) const {
return State == Other.State && Returns == Other.Returns &&
Params == Other.Params;
@@ -109,9 +106,10 @@ struct WasmSignatureDenseMapInfo {
struct WasmDataSegment {
MCSectionWasm *Section;
StringRef Name;
+ uint32_t InitFlags;
uint32_t Offset;
uint32_t Alignment;
- uint32_t Flags;
+ uint32_t LinkerFlags;
SmallVector<char, 4> Data;
};
@@ -149,18 +147,7 @@ struct WasmRelocationEntry {
: Offset(Offset), Symbol(Symbol), Addend(Addend), Type(Type),
FixupSection(FixupSection) {}
- bool hasAddend() const {
- switch (Type) {
- case wasm::R_WEBASSEMBLY_MEMORY_ADDR_LEB:
- case wasm::R_WEBASSEMBLY_MEMORY_ADDR_SLEB:
- case wasm::R_WEBASSEMBLY_MEMORY_ADDR_I32:
- case wasm::R_WEBASSEMBLY_FUNCTION_OFFSET_I32:
- case wasm::R_WEBASSEMBLY_SECTION_OFFSET_I32:
- return true;
- default:
- return false;
- }
- }
+ bool hasAddend() const { return wasm::relocTypeHasAddend(Type); }
void print(raw_ostream &Out) const {
Out << wasm::relocTypetoString(Type) << " Off=" << Offset
@@ -173,7 +160,7 @@ struct WasmRelocationEntry {
#endif
};
-static const uint32_t INVALID_INDEX = -1;
+static const uint32_t InvalidIndex = -1;
struct WasmCustomSection {
@@ -185,7 +172,7 @@ struct WasmCustomSection {
WasmCustomSection(StringRef Name, MCSectionWasm *Section)
: Name(Name), Section(Section), OutputContentsOffset(0),
- OutputIndex(INVALID_INDEX) {}
+ OutputIndex(InvalidIndex) {}
};
#if !defined(NDEBUG)
@@ -195,6 +182,33 @@ raw_ostream &operator<<(raw_ostream &OS, const WasmRelocationEntry &Rel) {
}
#endif
+// Write X as an (unsigned) LEB value at offset Offset in Stream, padded
+// to allow patching.
+static void writePatchableLEB(raw_pwrite_stream &Stream, uint32_t X,
+ uint64_t Offset) {
+ uint8_t Buffer[5];
+ unsigned SizeLen = encodeULEB128(X, Buffer, 5);
+ assert(SizeLen == 5);
+ Stream.pwrite((char *)Buffer, SizeLen, Offset);
+}
+
+// Write X as an signed LEB value at offset Offset in Stream, padded
+// to allow patching.
+static void writePatchableSLEB(raw_pwrite_stream &Stream, int32_t X,
+ uint64_t Offset) {
+ uint8_t Buffer[5];
+ unsigned SizeLen = encodeSLEB128(X, Buffer, 5);
+ assert(SizeLen == 5);
+ Stream.pwrite((char *)Buffer, SizeLen, Offset);
+}
+
+// Write X as a plain integer value at offset Offset in Stream.
+static void writeI32(raw_pwrite_stream &Stream, uint32_t X, uint64_t Offset) {
+ uint8_t Buffer[4];
+ support::endian::write32le(Buffer, X);
+ Stream.pwrite((char *)Buffer, sizeof(Buffer), Offset);
+}
+
class WasmObjectWriter : public MCObjectWriter {
support::endian::Writer W;
@@ -218,12 +232,15 @@ class WasmObjectWriter : public MCObjectWriter {
// Maps function/global symbols to the function/global/event/section index
// space.
DenseMap<const MCSymbolWasm *, uint32_t> WasmIndices;
+ DenseMap<const MCSymbolWasm *, uint32_t> GOTIndices;
// Maps data symbols to the Wasm segment and offset/size with the segment.
DenseMap<const MCSymbolWasm *, wasm::WasmDataReference> DataLocations;
// Stores output data (index, relocations, content offset) for custom
// section.
std::vector<WasmCustomSection> CustomSections;
+ std::unique_ptr<WasmCustomSection> ProducersSection;
+ std::unique_ptr<WasmCustomSection> TargetFeaturesSection;
// Relocations for fixing up references in the custom sections.
DenseMap<const MCSectionWasm *, std::vector<WasmRelocationEntry>>
CustomSectionsRelocations;
@@ -233,7 +250,6 @@ class WasmObjectWriter : public MCObjectWriter {
DenseMap<WasmSignature, uint32_t, WasmSignatureDenseMapInfo> SignatureIndices;
SmallVector<WasmSignature, 4> Signatures;
- SmallVector<WasmGlobal, 4> Globals;
SmallVector<WasmDataSegment, 4> DataSegments;
unsigned NumFunctionImports = 0;
unsigned NumGlobalImports = 0;
@@ -242,9 +258,6 @@ class WasmObjectWriter : public MCObjectWriter {
// TargetObjectWriter wrappers.
bool is64Bit() const { return TargetObjectWriter->is64Bit(); }
- unsigned getRelocType(const MCValue &Target, const MCFixup &Fixup) const {
- return TargetObjectWriter->getRelocType(Target, Fixup);
- }
void startSection(SectionBookkeeping &Section, unsigned SectionId);
void startCustomSection(SectionBookkeeping &Section, StringRef Name);
@@ -255,20 +268,21 @@ public:
raw_pwrite_stream &OS)
: W(OS, support::little), TargetObjectWriter(std::move(MOTW)) {}
- ~WasmObjectWriter() override;
-
private:
void reset() override {
CodeRelocations.clear();
DataRelocations.clear();
TypeIndices.clear();
WasmIndices.clear();
+ GOTIndices.clear();
TableIndices.clear();
DataLocations.clear();
+ CustomSections.clear();
+ ProducersSection.reset();
+ TargetFeaturesSection.reset();
CustomSectionsRelocations.clear();
SignatureIndices.clear();
Signatures.clear();
- Globals.clear();
DataSegments.clear();
SectionFunctions.clear();
NumFunctionImports = 0;
@@ -298,9 +312,9 @@ private:
void writeImportSection(ArrayRef<wasm::WasmImport> Imports, uint32_t DataSize,
uint32_t NumElements);
void writeFunctionSection(ArrayRef<WasmFunction> Functions);
- void writeGlobalSection();
void writeExportSection(ArrayRef<wasm::WasmExport> Exports);
void writeElemSection(ArrayRef<uint32_t> TableElems);
+ void writeDataCountSection();
void writeCodeSection(const MCAssembler &Asm, const MCAsmLayout &Layout,
ArrayRef<WasmFunction> Functions);
void writeDataSection();
@@ -311,7 +325,8 @@ private:
ArrayRef<wasm::WasmSymbolInfo> SymbolInfos,
ArrayRef<std::pair<uint16_t, uint32_t>> InitFuncs,
const std::map<StringRef, std::vector<WasmComdatEntry>> &Comdats);
- void writeCustomSections(const MCAssembler &Asm, const MCAsmLayout &Layout);
+ void writeCustomSection(WasmCustomSection &CustomSection,
+ const MCAssembler &Asm, const MCAsmLayout &Layout);
void writeCustomRelocSections();
void
updateCustomSectionRelocations(const SmallVector<WasmFunction, 4> &Functions,
@@ -330,8 +345,6 @@ private:
} // end anonymous namespace
-WasmObjectWriter::~WasmObjectWriter() {}
-
// Write out a section header and a patchable section size field.
void WasmObjectWriter::startSection(SectionBookkeeping &Section,
unsigned SectionId) {
@@ -342,7 +355,7 @@ void WasmObjectWriter::startSection(SectionBookkeeping &Section,
// The section size. We don't know the size yet, so reserve enough space
// for any 32-bit value; we'll patch it later.
- encodeULEB128(UINT32_MAX, W.OS);
+ encodeULEB128(0, W.OS, 5);
// The position where the section starts, for measuring its size.
Section.ContentsOffset = W.OS.tell();
@@ -368,7 +381,13 @@ void WasmObjectWriter::startCustomSection(SectionBookkeeping &Section,
// Now that the section is complete and we know how big it is, patch up the
// section size field at the start of the section.
void WasmObjectWriter::endSection(SectionBookkeeping &Section) {
- uint64_t Size = W.OS.tell() - Section.PayloadOffset;
+ uint64_t Size = W.OS.tell();
+ // /dev/null doesn't support seek/tell and can report offset of 0.
+ // Simply skip this patching in that case.
+ if (!Size)
+ return;
+
+ Size -= Section.PayloadOffset;
if (uint32_t(Size) != Size)
report_fatal_error("section size does not fit in a uint32_t");
@@ -376,11 +395,8 @@ void WasmObjectWriter::endSection(SectionBookkeeping &Section) {
// Write the final section size to the payload_len field, which follows
// the section id byte.
- uint8_t Buffer[16];
- unsigned SizeLen = encodeULEB128(Size, Buffer, 5);
- assert(SizeLen == 5);
- static_cast<raw_pwrite_stream &>(W.OS).pwrite((char *)Buffer, SizeLen,
- Section.SizeOffset);
+ writePatchableLEB(static_cast<raw_pwrite_stream &>(W.OS), Size,
+ Section.SizeOffset);
}
// Emit the Wasm header.
@@ -479,15 +495,15 @@ void WasmObjectWriter::recordRelocation(MCAssembler &Asm,
// be negative and don't wrap.
FixedValue = 0;
- unsigned Type = getRelocType(Target, Fixup);
+ unsigned Type = TargetObjectWriter->getRelocType(Target, Fixup);
assert(!IsPCRel);
assert(SymA);
// Absolute offset within a section or a function.
// Currently only supported for for metadata sections.
// See: test/MC/WebAssembly/blockaddress.ll
- if (Type == wasm::R_WEBASSEMBLY_FUNCTION_OFFSET_I32 ||
- Type == wasm::R_WEBASSEMBLY_SECTION_OFFSET_I32) {
+ if (Type == wasm::R_WASM_FUNCTION_OFFSET_I32 ||
+ Type == wasm::R_WASM_SECTION_OFFSET_I32) {
if (!FixupSection.getKind().isMetadata())
report_fatal_error("relocations for function or section offsets are "
"only supported in metadata sections");
@@ -505,9 +521,9 @@ void WasmObjectWriter::recordRelocation(MCAssembler &Asm,
SymA = cast<MCSymbolWasm>(SectionSymbol);
}
- // Relocation other than R_WEBASSEMBLY_TYPE_INDEX_LEB are required to be
+ // Relocation other than R_WASM_TYPE_INDEX_LEB are required to be
// against a named symbol.
- if (Type != wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB) {
+ if (Type != wasm::R_WASM_TYPE_INDEX_LEB) {
if (SymA->getName().empty())
report_fatal_error("relocations against un-named temporaries are not yet "
"supported by wasm");
@@ -515,6 +531,9 @@ void WasmObjectWriter::recordRelocation(MCAssembler &Asm,
SymA->setUsedInReloc();
}
+ if (RefA->getKind() == MCSymbolRefExpr::VK_GOT)
+ SymA->setUsedInGOT();
+
WasmRelocationEntry Rec(FixupOffset, SymA, C, Type, &FixupSection);
LLVM_DEBUG(dbgs() << "WasmReloc: " << Rec << "\n");
@@ -529,40 +548,14 @@ void WasmObjectWriter::recordRelocation(MCAssembler &Asm,
}
}
-// Write X as an (unsigned) LEB value at offset Offset in Stream, padded
-// to allow patching.
-static void WritePatchableLEB(raw_pwrite_stream &Stream, uint32_t X,
- uint64_t Offset) {
- uint8_t Buffer[5];
- unsigned SizeLen = encodeULEB128(X, Buffer, 5);
- assert(SizeLen == 5);
- Stream.pwrite((char *)Buffer, SizeLen, Offset);
-}
-
-// Write X as an signed LEB value at offset Offset in Stream, padded
-// to allow patching.
-static void WritePatchableSLEB(raw_pwrite_stream &Stream, int32_t X,
- uint64_t Offset) {
- uint8_t Buffer[5];
- unsigned SizeLen = encodeSLEB128(X, Buffer, 5);
- assert(SizeLen == 5);
- Stream.pwrite((char *)Buffer, SizeLen, Offset);
-}
-
-// Write X as a plain integer value at offset Offset in Stream.
-static void WriteI32(raw_pwrite_stream &Stream, uint32_t X, uint64_t Offset) {
- uint8_t Buffer[4];
- support::endian::write32le(Buffer, X);
- Stream.pwrite((char *)Buffer, sizeof(Buffer), Offset);
-}
-
-static const MCSymbolWasm *ResolveSymbol(const MCSymbolWasm &Symbol) {
- if (Symbol.isVariable()) {
- const MCExpr *Expr = Symbol.getVariableValue();
+static const MCSymbolWasm *resolveSymbol(const MCSymbolWasm &Symbol) {
+ const MCSymbolWasm* Ret = &Symbol;
+ while (Ret->isVariable()) {
+ const MCExpr *Expr = Ret->getVariableValue();
auto *Inner = cast<MCSymbolRefExpr>(Expr);
- return cast<MCSymbolWasm>(&Inner->getSymbol());
+ Ret = cast<MCSymbolWasm>(&Inner->getSymbol());
}
- return &Symbol;
+ return Ret;
}
// Compute a value to write into the code at the location covered
@@ -571,36 +564,41 @@ static const MCSymbolWasm *ResolveSymbol(const MCSymbolWasm &Symbol) {
// useable.
uint32_t
WasmObjectWriter::getProvisionalValue(const WasmRelocationEntry &RelEntry) {
+ if (RelEntry.Type == wasm::R_WASM_GLOBAL_INDEX_LEB && !RelEntry.Symbol->isGlobal()) {
+ assert(GOTIndices.count(RelEntry.Symbol) > 0 && "symbol not found in GOT index space");
+ return GOTIndices[RelEntry.Symbol];
+ }
+
switch (RelEntry.Type) {
- case wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB:
- case wasm::R_WEBASSEMBLY_TABLE_INDEX_I32: {
+ case wasm::R_WASM_TABLE_INDEX_REL_SLEB:
+ case wasm::R_WASM_TABLE_INDEX_SLEB:
+ case wasm::R_WASM_TABLE_INDEX_I32: {
// Provisional value is table address of the resolved symbol itself
- const MCSymbolWasm *Sym = ResolveSymbol(*RelEntry.Symbol);
+ const MCSymbolWasm *Sym = resolveSymbol(*RelEntry.Symbol);
assert(Sym->isFunction());
return TableIndices[Sym];
}
- case wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB:
+ case wasm::R_WASM_TYPE_INDEX_LEB:
// Provisional value is same as the index
return getRelocationIndexValue(RelEntry);
- case wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB:
- case wasm::R_WEBASSEMBLY_GLOBAL_INDEX_LEB:
- case wasm::R_WEBASSEMBLY_EVENT_INDEX_LEB:
+ case wasm::R_WASM_FUNCTION_INDEX_LEB:
+ case wasm::R_WASM_GLOBAL_INDEX_LEB:
+ case wasm::R_WASM_EVENT_INDEX_LEB:
// Provisional value is function/global/event Wasm index
- if (!WasmIndices.count(RelEntry.Symbol))
- report_fatal_error("symbol not found in wasm index space: " +
- RelEntry.Symbol->getName());
+ assert(WasmIndices.count(RelEntry.Symbol) > 0 && "symbol not found in wasm index space");
return WasmIndices[RelEntry.Symbol];
- case wasm::R_WEBASSEMBLY_FUNCTION_OFFSET_I32:
- case wasm::R_WEBASSEMBLY_SECTION_OFFSET_I32: {
+ case wasm::R_WASM_FUNCTION_OFFSET_I32:
+ case wasm::R_WASM_SECTION_OFFSET_I32: {
const auto &Section =
static_cast<const MCSectionWasm &>(RelEntry.Symbol->getSection());
return Section.getSectionOffset() + RelEntry.Addend;
}
- case wasm::R_WEBASSEMBLY_MEMORY_ADDR_LEB:
- case wasm::R_WEBASSEMBLY_MEMORY_ADDR_I32:
- case wasm::R_WEBASSEMBLY_MEMORY_ADDR_SLEB: {
+ case wasm::R_WASM_MEMORY_ADDR_LEB:
+ case wasm::R_WASM_MEMORY_ADDR_I32:
+ case wasm::R_WASM_MEMORY_ADDR_REL_SLEB:
+ case wasm::R_WASM_MEMORY_ADDR_SLEB: {
// Provisional value is address of the global
- const MCSymbolWasm *Sym = ResolveSymbol(*RelEntry.Symbol);
+ const MCSymbolWasm *Sym = resolveSymbol(*RelEntry.Symbol);
// For undefined symbols, use zero
if (!Sym->isDefined())
return 0;
@@ -654,7 +652,7 @@ static void addData(SmallVectorImpl<char> &DataBytes,
uint32_t
WasmObjectWriter::getRelocationIndexValue(const WasmRelocationEntry &RelEntry) {
- if (RelEntry.Type == wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB) {
+ if (RelEntry.Type == wasm::R_WASM_TYPE_INDEX_LEB) {
if (!TypeIndices.count(RelEntry.Symbol))
report_fatal_error("symbol not found in type index space: " +
RelEntry.Symbol->getName());
@@ -678,22 +676,24 @@ void WasmObjectWriter::applyRelocations(
uint32_t Value = getProvisionalValue(RelEntry);
switch (RelEntry.Type) {
- case wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB:
- case wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB:
- case wasm::R_WEBASSEMBLY_GLOBAL_INDEX_LEB:
- case wasm::R_WEBASSEMBLY_MEMORY_ADDR_LEB:
- case wasm::R_WEBASSEMBLY_EVENT_INDEX_LEB:
- WritePatchableLEB(Stream, Value, Offset);
+ case wasm::R_WASM_FUNCTION_INDEX_LEB:
+ case wasm::R_WASM_TYPE_INDEX_LEB:
+ case wasm::R_WASM_GLOBAL_INDEX_LEB:
+ case wasm::R_WASM_MEMORY_ADDR_LEB:
+ case wasm::R_WASM_EVENT_INDEX_LEB:
+ writePatchableLEB(Stream, Value, Offset);
break;
- case wasm::R_WEBASSEMBLY_TABLE_INDEX_I32:
- case wasm::R_WEBASSEMBLY_MEMORY_ADDR_I32:
- case wasm::R_WEBASSEMBLY_FUNCTION_OFFSET_I32:
- case wasm::R_WEBASSEMBLY_SECTION_OFFSET_I32:
- WriteI32(Stream, Value, Offset);
+ case wasm::R_WASM_TABLE_INDEX_I32:
+ case wasm::R_WASM_MEMORY_ADDR_I32:
+ case wasm::R_WASM_FUNCTION_OFFSET_I32:
+ case wasm::R_WASM_SECTION_OFFSET_I32:
+ writeI32(Stream, Value, Offset);
break;
- case wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB:
- case wasm::R_WEBASSEMBLY_MEMORY_ADDR_SLEB:
- WritePatchableSLEB(Stream, Value, Offset);
+ case wasm::R_WASM_TABLE_INDEX_SLEB:
+ case wasm::R_WASM_TABLE_INDEX_REL_SLEB:
+ case wasm::R_WASM_MEMORY_ADDR_SLEB:
+ case wasm::R_WASM_MEMORY_ADDR_REL_SLEB:
+ writePatchableSLEB(Stream, Value, Offset);
break;
default:
llvm_unreachable("invalid relocation type");
@@ -783,26 +783,6 @@ void WasmObjectWriter::writeFunctionSection(ArrayRef<WasmFunction> Functions) {
endSection(Section);
}
-void WasmObjectWriter::writeGlobalSection() {
- if (Globals.empty())
- return;
-
- SectionBookkeeping Section;
- startSection(Section, wasm::WASM_SEC_GLOBAL);
-
- encodeULEB128(Globals.size(), W.OS);
- for (const WasmGlobal &Global : Globals) {
- writeValueType(static_cast<wasm::ValType>(Global.Type.Type));
- W.OS << char(Global.Type.Mutable);
-
- W.OS << char(wasm::WASM_OPCODE_I32_CONST);
- encodeSLEB128(Global.InitialValue, W.OS);
- W.OS << char(wasm::WASM_OPCODE_END);
- }
-
- endSection(Section);
-}
-
void WasmObjectWriter::writeEventSection(ArrayRef<wasm::WasmEventType> Events) {
if (Events.empty())
return;
@@ -848,7 +828,7 @@ void WasmObjectWriter::writeElemSection(ArrayRef<uint32_t> TableElems) {
// init expr for starting offset
W.OS << char(wasm::WASM_OPCODE_I32_CONST);
- encodeSLEB128(kInitialTableOffset, W.OS);
+ encodeSLEB128(InitialTableOffset, W.OS);
W.OS << char(wasm::WASM_OPCODE_END);
encodeULEB128(TableElems.size(), W.OS);
@@ -858,6 +838,16 @@ void WasmObjectWriter::writeElemSection(ArrayRef<uint32_t> TableElems) {
endSection(Section);
}
+void WasmObjectWriter::writeDataCountSection() {
+ if (DataSegments.empty())
+ return;
+
+ SectionBookkeeping Section;
+ startSection(Section, wasm::WASM_SEC_DATACOUNT);
+ encodeULEB128(DataSegments.size(), W.OS);
+ endSection(Section);
+}
+
void WasmObjectWriter::writeCodeSection(const MCAssembler &Asm,
const MCAsmLayout &Layout,
ArrayRef<WasmFunction> Functions) {
@@ -899,10 +889,14 @@ void WasmObjectWriter::writeDataSection() {
encodeULEB128(DataSegments.size(), W.OS); // count
for (const WasmDataSegment &Segment : DataSegments) {
- encodeULEB128(0, W.OS); // memory index
- W.OS << char(wasm::WASM_OPCODE_I32_CONST);
- encodeSLEB128(Segment.Offset, W.OS); // offset
- W.OS << char(wasm::WASM_OPCODE_END);
+ encodeULEB128(Segment.InitFlags, W.OS); // flags
+ if (Segment.InitFlags & wasm::WASM_SEGMENT_HAS_MEMINDEX)
+ encodeULEB128(0, W.OS); // memory index
+ if ((Segment.InitFlags & wasm::WASM_SEGMENT_IS_PASSIVE) == 0) {
+ W.OS << char(wasm::WASM_OPCODE_I32_CONST);
+ encodeSLEB128(Segment.Offset, W.OS); // offset
+ W.OS << char(wasm::WASM_OPCODE_END);
+ }
encodeULEB128(Segment.Data.size(), W.OS); // size
Segment.Section->setSectionOffset(W.OS.tell() - Section.ContentsOffset);
W.OS << Segment.Data; // data
@@ -928,9 +922,8 @@ void WasmObjectWriter::writeRelocSection(
// order, but for the code section we combine many MC sections into single
// wasm section, and this order is determined by the order of Asm.Symbols()
// not the sections order.
- std::stable_sort(
- Relocs.begin(), Relocs.end(),
- [](const WasmRelocationEntry &A, const WasmRelocationEntry &B) {
+ llvm::stable_sort(
+ Relocs, [](const WasmRelocationEntry &A, const WasmRelocationEntry &B) {
return (A.Offset + A.FixupSection->getSectionOffset()) <
(B.Offset + B.FixupSection->getSectionOffset());
});
@@ -982,7 +975,8 @@ void WasmObjectWriter::writeLinkingMetaDataSection(
case wasm::WASM_SYMBOL_TYPE_GLOBAL:
case wasm::WASM_SYMBOL_TYPE_EVENT:
encodeULEB128(Sym.ElementIndex, W.OS);
- if ((Sym.Flags & wasm::WASM_SYMBOL_UNDEFINED) == 0)
+ if ((Sym.Flags & wasm::WASM_SYMBOL_UNDEFINED) == 0 ||
+ (Sym.Flags & wasm::WASM_SYMBOL_EXPLICIT_NAME) != 0)
writeString(Sym.Name);
break;
case wasm::WASM_SYMBOL_TYPE_DATA:
@@ -1012,7 +1006,7 @@ void WasmObjectWriter::writeLinkingMetaDataSection(
for (const WasmDataSegment &Segment : DataSegments) {
writeString(Segment.Name);
encodeULEB128(Segment.Alignment, W.OS);
- encodeULEB128(Segment.Flags, W.OS);
+ encodeULEB128(Segment.LinkerFlags, W.OS);
}
endSection(SubSection);
}
@@ -1045,25 +1039,24 @@ void WasmObjectWriter::writeLinkingMetaDataSection(
endSection(Section);
}
-void WasmObjectWriter::writeCustomSections(const MCAssembler &Asm,
- const MCAsmLayout &Layout) {
- for (auto &CustomSection : CustomSections) {
- SectionBookkeeping Section;
- auto *Sec = CustomSection.Section;
- startCustomSection(Section, CustomSection.Name);
+void WasmObjectWriter::writeCustomSection(WasmCustomSection &CustomSection,
+ const MCAssembler &Asm,
+ const MCAsmLayout &Layout) {
+ SectionBookkeeping Section;
+ auto *Sec = CustomSection.Section;
+ startCustomSection(Section, CustomSection.Name);
- Sec->setSectionOffset(W.OS.tell() - Section.ContentsOffset);
- Asm.writeSectionData(W.OS, Sec, Layout);
+ Sec->setSectionOffset(W.OS.tell() - Section.ContentsOffset);
+ Asm.writeSectionData(W.OS, Sec, Layout);
- CustomSection.OutputContentsOffset = Section.ContentsOffset;
- CustomSection.OutputIndex = Section.Index;
+ CustomSection.OutputContentsOffset = Section.ContentsOffset;
+ CustomSection.OutputIndex = Section.Index;
- endSection(Section);
+ endSection(Section);
- // Apply fixups.
- auto &Relocations = CustomSectionsRelocations[CustomSection.Section];
- applyRelocations(Relocations, CustomSection.OutputContentsOffset);
- }
+ // Apply fixups.
+ auto &Relocations = CustomSectionsRelocations[CustomSection.Section];
+ applyRelocations(Relocations, CustomSection.OutputContentsOffset);
}
uint32_t WasmObjectWriter::getFunctionType(const MCSymbolWasm &Symbol) {
@@ -1082,7 +1075,7 @@ void WasmObjectWriter::registerFunctionType(const MCSymbolWasm &Symbol) {
assert(Symbol.isFunction());
WasmSignature S;
- const MCSymbolWasm *ResolvedSym = ResolveSymbol(Symbol);
+ const MCSymbolWasm *ResolvedSym = resolveSymbol(Symbol);
if (auto *Sig = ResolvedSym->getSignature()) {
S.Returns = Sig->Returns;
S.Params = Sig->Params;
@@ -1143,7 +1136,6 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
uint64_t StartOffset = W.OS.tell();
LLVM_DEBUG(dbgs() << "WasmObjectWriter::writeObject\n");
- MCContext &Ctx = Asm.getContext();
// Collect information from the available symbols.
SmallVector<WasmFunction, 4> Functions;
@@ -1159,22 +1151,18 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
// For now, always emit the memory import, since loads and stores are not
// valid without it. In the future, we could perhaps be more clever and omit
// it if there are no loads or stores.
- MCSymbolWasm *MemorySym =
- cast<MCSymbolWasm>(Ctx.getOrCreateSymbol("__linear_memory"));
wasm::WasmImport MemImport;
- MemImport.Module = MemorySym->getModuleName();
- MemImport.Field = MemorySym->getName();
+ MemImport.Module = "env";
+ MemImport.Field = "__linear_memory";
MemImport.Kind = wasm::WASM_EXTERNAL_MEMORY;
Imports.push_back(MemImport);
// For now, always emit the table section, since indirect calls are not
// valid without it. In the future, we could perhaps be more clever and omit
// it if there are no indirect calls.
- MCSymbolWasm *TableSym =
- cast<MCSymbolWasm>(Ctx.getOrCreateSymbol("__indirect_function_table"));
wasm::WasmImport TableImport;
- TableImport.Module = TableSym->getModuleName();
- TableImport.Field = TableSym->getName();
+ TableImport.Module = "env";
+ TableImport.Field = "__indirect_function_table";
TableImport.Kind = wasm::WASM_EXTERNAL_TABLE;
TableImport.Table.ElemType = wasm::WASM_TYPE_FUNCREF;
Imports.push_back(TableImport);
@@ -1200,39 +1188,60 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
if (!WS.isDefined() && !WS.isComdat()) {
if (WS.isFunction()) {
wasm::WasmImport Import;
- Import.Module = WS.getModuleName();
- Import.Field = WS.getName();
+ Import.Module = WS.getImportModule();
+ Import.Field = WS.getImportName();
Import.Kind = wasm::WASM_EXTERNAL_FUNCTION;
Import.SigIndex = getFunctionType(WS);
Imports.push_back(Import);
+ assert(WasmIndices.count(&WS) == 0);
WasmIndices[&WS] = NumFunctionImports++;
} else if (WS.isGlobal()) {
if (WS.isWeak())
report_fatal_error("undefined global symbol cannot be weak");
wasm::WasmImport Import;
- Import.Module = WS.getModuleName();
- Import.Field = WS.getName();
+ Import.Field = WS.getImportName();
Import.Kind = wasm::WASM_EXTERNAL_GLOBAL;
+ Import.Module = WS.getImportModule();
Import.Global = WS.getGlobalType();
Imports.push_back(Import);
+ assert(WasmIndices.count(&WS) == 0);
WasmIndices[&WS] = NumGlobalImports++;
} else if (WS.isEvent()) {
if (WS.isWeak())
report_fatal_error("undefined event symbol cannot be weak");
wasm::WasmImport Import;
- Import.Module = WS.getModuleName();
- Import.Field = WS.getName();
+ Import.Module = WS.getImportModule();
+ Import.Field = WS.getImportName();
Import.Kind = wasm::WASM_EXTERNAL_EVENT;
Import.Event.Attribute = wasm::WASM_EVENT_ATTRIBUTE_EXCEPTION;
Import.Event.SigIndex = getEventType(WS);
Imports.push_back(Import);
+ assert(WasmIndices.count(&WS) == 0);
WasmIndices[&WS] = NumEventImports++;
}
}
}
+ // Add imports for GOT globals
+ for (const MCSymbol &S : Asm.symbols()) {
+ const auto &WS = static_cast<const MCSymbolWasm &>(S);
+ if (WS.isUsedInGOT()) {
+ wasm::WasmImport Import;
+ if (WS.isFunction())
+ Import.Module = "GOT.func";
+ else
+ Import.Module = "GOT.mem";
+ Import.Field = WS.getName();
+ Import.Kind = wasm::WASM_EXTERNAL_GLOBAL;
+ Import.Global = {wasm::WASM_TYPE_I32, true};
+ Imports.push_back(Import);
+ assert(GOTIndices.count(&WS) == 0);
+ GOTIndices[&WS] = NumGlobalImports++;
+ }
+ }
+
// Populate DataSegments and CustomSections, which must be done before
// populating DataLocations.
for (MCSection &Sec : Asm) {
@@ -1253,11 +1262,13 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
DataSegments.emplace_back();
WasmDataSegment &Segment = DataSegments.back();
Segment.Name = SectionName;
+ Segment.InitFlags =
+ Section.getPassive() ? (uint32_t)wasm::WASM_SEGMENT_IS_PASSIVE : 0;
Segment.Offset = DataSize;
Segment.Section = &Section;
addData(Segment.Data, Section);
Segment.Alignment = Log2_32(Section.getAlignment());
- Segment.Flags = 0;
+ Segment.LinkerFlags = 0;
DataSize += Segment.Data.size();
Section.setSegmentIndex(SegmentIndex);
@@ -1282,6 +1293,18 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
report_fatal_error("section name and begin symbol should match: " +
Twine(SectionName));
}
+
+ // Separate out the producers and target features sections
+ if (Name == "producers") {
+ ProducersSection = llvm::make_unique<WasmCustomSection>(Name, &Section);
+ continue;
+ }
+ if (Name == "target_features") {
+ TargetFeaturesSection =
+ llvm::make_unique<WasmCustomSection>(Name, &Section);
+ continue;
+ }
+
CustomSections.emplace_back(Name, &Section);
}
}
@@ -1313,7 +1336,7 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
report_fatal_error(
"function sections must contain one function each");
- if (WS.getSize() == 0)
+ if (WS.getSize() == nullptr)
report_fatal_error(
"function symbols must have a size set with .size");
@@ -1338,7 +1361,7 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
LLVM_DEBUG(dbgs() << " -> function index: " << Index << "\n");
} else if (WS.isData()) {
- if (WS.isTemporary() && !WS.getSize())
+ if (!isInSymtab(WS))
continue;
if (!WS.isDefined()) {
@@ -1384,11 +1407,12 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
wasm::WasmEventType Event;
Event.SigIndex = getEventType(WS);
Event.Attribute = wasm::WASM_EVENT_ATTRIBUTE_EXCEPTION;
+ assert(WasmIndices.count(&WS) == 0);
WasmIndices[&WS] = Index;
Events.push_back(Event);
} else {
// An import; the index was assigned above.
- Index = WasmIndices.find(&WS)->second;
+ assert(WasmIndices.count(&WS) > 0);
}
LLVM_DEBUG(dbgs() << " -> event index: " << WasmIndices.find(&WS)->second
<< "\n");
@@ -1410,16 +1434,17 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
// Find the target symbol of this weak alias and export that index
const auto &WS = static_cast<const MCSymbolWasm &>(S);
- const MCSymbolWasm *ResolvedSym = ResolveSymbol(WS);
+ const MCSymbolWasm *ResolvedSym = resolveSymbol(WS);
LLVM_DEBUG(dbgs() << WS.getName() << ": weak alias of '" << *ResolvedSym
<< "'\n");
- if (WS.isFunction()) {
+ if (ResolvedSym->isFunction()) {
assert(WasmIndices.count(ResolvedSym) > 0);
uint32_t WasmIndex = WasmIndices.find(ResolvedSym)->second;
+ assert(WasmIndices.count(&WS) == 0);
WasmIndices[&WS] = WasmIndex;
LLVM_DEBUG(dbgs() << " -> index:" << WasmIndex << "\n");
- } else if (WS.isData()) {
+ } else if (ResolvedSym->isData()) {
assert(DataLocations.count(ResolvedSym) > 0);
const wasm::WasmDataReference &Ref =
DataLocations.find(ResolvedSym)->second;
@@ -1434,7 +1459,7 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
for (const MCSymbol &S : Asm.symbols()) {
const auto &WS = static_cast<const MCSymbolWasm &>(S);
if (!isInSymtab(WS)) {
- WS.setIndex(INVALID_INDEX);
+ WS.setIndex(InvalidIndex);
continue;
}
LLVM_DEBUG(dbgs() << "adding to symtab: " << WS << "\n");
@@ -1448,6 +1473,10 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
Flags |= wasm::WASM_SYMBOL_BINDING_LOCAL;
if (WS.isUndefined())
Flags |= wasm::WASM_SYMBOL_UNDEFINED;
+ if (WS.isExported())
+ Flags |= wasm::WASM_SYMBOL_EXPORTED;
+ if (WS.getName() != WS.getImportName())
+ Flags |= wasm::WASM_SYMBOL_EXPLICIT_NAME;
wasm::WasmSymbolInfo Info;
Info.Name = WS.getName();
@@ -1469,13 +1498,13 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
// Functions referenced by a relocation need to put in the table. This is
// purely to make the object file's provisional values readable, and is
// ignored by the linker, which re-calculates the relocations itself.
- if (Rel.Type != wasm::R_WEBASSEMBLY_TABLE_INDEX_I32 &&
- Rel.Type != wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB)
+ if (Rel.Type != wasm::R_WASM_TABLE_INDEX_I32 &&
+ Rel.Type != wasm::R_WASM_TABLE_INDEX_SLEB)
return;
assert(Rel.Symbol->isFunction());
- const MCSymbolWasm &WS = *ResolveSymbol(*Rel.Symbol);
+ const MCSymbolWasm &WS = *resolveSymbol(*Rel.Symbol);
uint32_t FunctionIndex = WasmIndices.find(&WS)->second;
- uint32_t TableIndex = TableElems.size() + kInitialTableOffset;
+ uint32_t TableIndex = TableElems.size() + InitialTableOffset;
if (TableIndices.try_emplace(&WS, TableIndex).second) {
LLVM_DEBUG(dbgs() << " -> adding " << WS.getName()
<< " to table: " << TableIndex << "\n");
@@ -1534,25 +1563,26 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
const auto &DataFrag = cast<MCDataFragment>(Frag);
const SmallVectorImpl<char> &Contents = DataFrag.getContents();
for (const uint8_t *
- p = (const uint8_t *)Contents.data(),
- *end = (const uint8_t *)Contents.data() + Contents.size();
- p != end; ++p) {
- if (*p != 0)
+ P = (const uint8_t *)Contents.data(),
+ *End = (const uint8_t *)Contents.data() + Contents.size();
+ P != End; ++P) {
+ if (*P != 0)
report_fatal_error("non-symbolic data in .init_array section");
}
for (const MCFixup &Fixup : DataFrag.getFixups()) {
assert(Fixup.getKind() ==
MCFixup::getKindForSize(is64Bit() ? 8 : 4, false));
const MCExpr *Expr = Fixup.getValue();
- auto *Sym = dyn_cast<MCSymbolRefExpr>(Expr);
- if (!Sym)
+ auto *SymRef = dyn_cast<MCSymbolRefExpr>(Expr);
+ if (!SymRef)
report_fatal_error("fixups in .init_array should be symbol references");
- if (Sym->getKind() != MCSymbolRefExpr::VK_WebAssembly_FUNCTION)
- report_fatal_error("symbols in .init_array should be for functions");
- if (Sym->getSymbol().getIndex() == INVALID_INDEX)
+ const auto &TargetSym = cast<const MCSymbolWasm>(SymRef->getSymbol());
+ if (TargetSym.getIndex() == InvalidIndex)
report_fatal_error("symbols in .init_array should exist in symbtab");
+ if (!TargetSym.isFunction())
+ report_fatal_error("symbols in .init_array should be for functions");
InitFuncs.push_back(
- std::make_pair(Priority, Sym->getSymbol().getIndex()));
+ std::make_pair(Priority, TargetSym.getIndex()));
}
}
@@ -1564,17 +1594,22 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
writeFunctionSection(Functions);
// Skip the "table" section; we import the table instead.
// Skip the "memory" section; we import the memory instead.
- writeGlobalSection();
writeEventSection(Events);
writeExportSection(Exports);
writeElemSection(TableElems);
+ writeDataCountSection();
writeCodeSection(Asm, Layout, Functions);
writeDataSection();
- writeCustomSections(Asm, Layout);
+ for (auto &CustomSection : CustomSections)
+ writeCustomSection(CustomSection, Asm, Layout);
writeLinkingMetaDataSection(SymbolInfos, InitFuncs, Comdats);
writeRelocSection(CodeSectionIndex, "CODE", CodeRelocations);
writeRelocSection(DataSectionIndex, "DATA", DataRelocations);
writeCustomRelocSections();
+ if (ProducersSection)
+ writeCustomSection(*ProducersSection, Asm, Layout);
+ if (TargetFeaturesSection)
+ writeCustomSection(*TargetFeaturesSection, Asm, Layout);
// TODO: Translate the .comment section to the output.
return W.OS.tell() - StartOffset;
diff --git a/lib/MC/WinCOFFObjectWriter.cpp b/lib/MC/WinCOFFObjectWriter.cpp
index b774852eabe6..0e6c05bc726d 100644
--- a/lib/MC/WinCOFFObjectWriter.cpp
+++ b/lib/MC/WinCOFFObjectWriter.cpp
@@ -1,9 +1,8 @@
//===- llvm/MC/WinCOFFObjectWriter.cpp ------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -388,7 +387,7 @@ void WinCOFFObjectWriter::DefineSymbol(const MCSymbol &MCSym,
Sym->Aux[0].AuxType = ATWeakExternal;
Sym->Aux[0].Aux.WeakExternal.TagIndex = 0;
Sym->Aux[0].Aux.WeakExternal.Characteristics =
- COFF::IMAGE_WEAK_EXTERN_SEARCH_LIBRARY;
+ COFF::IMAGE_WEAK_EXTERN_SEARCH_ALIAS;
} else {
if (!Base)
Sym->Data.SectionNumber = COFF::IMAGE_SYM_ABSOLUTE;
diff --git a/lib/MC/XCOFFObjectWriter.cpp b/lib/MC/XCOFFObjectWriter.cpp
new file mode 100644
index 000000000000..9b9a7b6c118c
--- /dev/null
+++ b/lib/MC/XCOFFObjectWriter.cpp
@@ -0,0 +1,94 @@
+//===-- lib/MC/XCOFFObjectWriter.cpp - XCOFF file writer ------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements XCOFF object file writer information.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/MC/MCXCOFFObjectWriter.h"
+
+using namespace llvm;
+
+namespace {
+
+class XCOFFObjectWriter : public MCObjectWriter {
+ support::endian::Writer W;
+ std::unique_ptr<MCXCOFFObjectTargetWriter> TargetObjectWriter;
+
+ void executePostLayoutBinding(MCAssembler &, const MCAsmLayout &) override;
+
+ void recordRelocation(MCAssembler &, const MCAsmLayout &, const MCFragment *,
+ const MCFixup &, MCValue, uint64_t &) override;
+
+ uint64_t writeObject(MCAssembler &, const MCAsmLayout &) override;
+
+public:
+ XCOFFObjectWriter(std::unique_ptr<MCXCOFFObjectTargetWriter> MOTW,
+ raw_pwrite_stream &OS);
+};
+
+XCOFFObjectWriter::XCOFFObjectWriter(
+ std::unique_ptr<MCXCOFFObjectTargetWriter> MOTW, raw_pwrite_stream &OS)
+ : W(OS, support::big), TargetObjectWriter(std::move(MOTW)) {}
+
+void XCOFFObjectWriter::executePostLayoutBinding(MCAssembler &,
+ const MCAsmLayout &) {
+ // TODO Implement once we have sections and symbols to handle.
+}
+
+void XCOFFObjectWriter::recordRelocation(MCAssembler &, const MCAsmLayout &,
+ const MCFragment *, const MCFixup &,
+ MCValue, uint64_t &) {
+ report_fatal_error("XCOFF relocations not supported.");
+}
+
+uint64_t XCOFFObjectWriter::writeObject(MCAssembler &Asm, const MCAsmLayout &) {
+ // We always emit a timestamp of 0 for reproducibility, so ensure incremental
+ // linking is not enabled, in case, like with Windows COFF, such a timestamp
+ // is incompatible with incremental linking of XCOFF.
+ if (Asm.isIncrementalLinkerCompatible())
+ report_fatal_error("Incremental linking not supported for XCOFF.");
+
+ if (TargetObjectWriter->is64Bit())
+ report_fatal_error("64-bit XCOFF object files are not supported yet.");
+
+ uint64_t StartOffset = W.OS.tell();
+
+ // TODO FIXME Assign section numbers/finalize sections.
+
+ // TODO FIXME Finalize symbols.
+
+ // Magic.
+ W.write<uint16_t>(0x01df);
+ // Number of sections.
+ W.write<uint16_t>(0);
+ // Timestamp field. For reproducible output we write a 0, which represents no
+ // timestamp.
+ W.write<int32_t>(0);
+ // Byte Offset to the start of the symbol table.
+ W.write<uint32_t>(0);
+ // Number of entries in the symbol table.
+ W.write<int32_t>(0);
+ // Size of the optional header.
+ W.write<uint16_t>(0);
+ // Flags.
+ W.write<uint16_t>(0);
+
+ return W.OS.tell() - StartOffset;
+}
+
+} // end anonymous namespace
+
+std::unique_ptr<MCObjectWriter>
+llvm::createXCOFFObjectWriter(std::unique_ptr<MCXCOFFObjectTargetWriter> MOTW,
+ raw_pwrite_stream &OS) {
+ return llvm::make_unique<XCOFFObjectWriter>(std::move(MOTW), OS);
+}
diff --git a/lib/MCA/Context.cpp b/lib/MCA/Context.cpp
index c1b197dfe2e6..f0e8dfab8680 100644
--- a/lib/MCA/Context.cpp
+++ b/lib/MCA/Context.cpp
@@ -1,9 +1,8 @@
//===---------------------------- Context.cpp -------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -22,6 +21,7 @@
#include "llvm/MCA/Stages/DispatchStage.h"
#include "llvm/MCA/Stages/EntryStage.h"
#include "llvm/MCA/Stages/ExecuteStage.h"
+#include "llvm/MCA/Stages/MicroOpQueueStage.h"
#include "llvm/MCA/Stages/RetireStage.h"
namespace llvm {
@@ -43,7 +43,8 @@ Context::createDefaultPipeline(const PipelineOptions &Opts, InstrBuilder &IB,
auto Fetch = llvm::make_unique<EntryStage>(SrcMgr);
auto Dispatch = llvm::make_unique<DispatchStage>(STI, MRI, Opts.DispatchWidth,
*RCU, *PRF);
- auto Execute = llvm::make_unique<ExecuteStage>(*HWS);
+ auto Execute =
+ llvm::make_unique<ExecuteStage>(*HWS, Opts.EnableBottleneckAnalysis);
auto Retire = llvm::make_unique<RetireStage>(*RCU, *PRF);
// Pass the ownership of all the hardware units to this Context.
@@ -55,6 +56,9 @@ Context::createDefaultPipeline(const PipelineOptions &Opts, InstrBuilder &IB,
// Build the pipeline.
auto StagePipeline = llvm::make_unique<Pipeline>();
StagePipeline->appendStage(std::move(Fetch));
+ if (Opts.MicroOpQueueSize)
+ StagePipeline->appendStage(llvm::make_unique<MicroOpQueueStage>(
+ Opts.MicroOpQueueSize, Opts.DecodersThroughput));
StagePipeline->appendStage(std::move(Dispatch));
StagePipeline->appendStage(std::move(Execute));
StagePipeline->appendStage(std::move(Retire));
diff --git a/lib/MCA/HWEventListener.cpp b/lib/MCA/HWEventListener.cpp
index 4a0e5b1754dd..58b2e0329222 100644
--- a/lib/MCA/HWEventListener.cpp
+++ b/lib/MCA/HWEventListener.cpp
@@ -1,9 +1,8 @@
//===----------------------- HWEventListener.cpp ----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/lib/MCA/HardwareUnits/HardwareUnit.cpp b/lib/MCA/HardwareUnits/HardwareUnit.cpp
index edd32b9c0c1a..69f793796ec7 100644
--- a/lib/MCA/HardwareUnits/HardwareUnit.cpp
+++ b/lib/MCA/HardwareUnits/HardwareUnit.cpp
@@ -1,9 +1,8 @@
//===------------------------- HardwareUnit.cpp -----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/lib/MCA/HardwareUnits/LSUnit.cpp b/lib/MCA/HardwareUnits/LSUnit.cpp
index 8895eb392b60..ac1a6a36547b 100644
--- a/lib/MCA/HardwareUnits/LSUnit.cpp
+++ b/lib/MCA/HardwareUnits/LSUnit.cpp
@@ -1,9 +1,8 @@
//===----------------------- LSUnit.cpp --------------------------*- C++-*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -22,67 +21,133 @@
namespace llvm {
namespace mca {
-LSUnit::LSUnit(const MCSchedModel &SM, unsigned LQ, unsigned SQ,
- bool AssumeNoAlias)
- : LQ_Size(LQ), SQ_Size(SQ), NoAlias(AssumeNoAlias) {
+LSUnitBase::LSUnitBase(const MCSchedModel &SM, unsigned LQ, unsigned SQ,
+ bool AssumeNoAlias)
+ : LQSize(LQ), SQSize(SQ), UsedLQEntries(0), UsedSQEntries(0),
+ NoAlias(AssumeNoAlias), NextGroupID(1) {
if (SM.hasExtraProcessorInfo()) {
const MCExtraProcessorInfo &EPI = SM.getExtraProcessorInfo();
- if (!LQ_Size && EPI.LoadQueueID) {
+ if (!LQSize && EPI.LoadQueueID) {
const MCProcResourceDesc &LdQDesc = *SM.getProcResource(EPI.LoadQueueID);
- LQ_Size = LdQDesc.BufferSize;
+ LQSize = LdQDesc.BufferSize;
}
- if (!SQ_Size && EPI.StoreQueueID) {
+ if (!SQSize && EPI.StoreQueueID) {
const MCProcResourceDesc &StQDesc = *SM.getProcResource(EPI.StoreQueueID);
- SQ_Size = StQDesc.BufferSize;
+ SQSize = StQDesc.BufferSize;
}
}
}
-#ifndef NDEBUG
-void LSUnit::dump() const {
- dbgs() << "[LSUnit] LQ_Size = " << LQ_Size << '\n';
- dbgs() << "[LSUnit] SQ_Size = " << SQ_Size << '\n';
- dbgs() << "[LSUnit] NextLQSlotIdx = " << LoadQueue.size() << '\n';
- dbgs() << "[LSUnit] NextSQSlotIdx = " << StoreQueue.size() << '\n';
-}
-#endif
-
-void LSUnit::assignLQSlot(unsigned Index) {
- assert(!isLQFull());
- assert(LoadQueue.count(Index) == 0);
+LSUnitBase::~LSUnitBase() {}
- LLVM_DEBUG(dbgs() << "[LSUnit] - AssignLQSlot <Idx=" << Index
- << ",slot=" << LoadQueue.size() << ">\n");
- LoadQueue.insert(Index);
+void LSUnitBase::cycleEvent() {
+ for (const std::pair<unsigned, std::unique_ptr<MemoryGroup>> &G : Groups)
+ G.second->cycleEvent();
}
-void LSUnit::assignSQSlot(unsigned Index) {
- assert(!isSQFull());
- assert(StoreQueue.count(Index) == 0);
-
- LLVM_DEBUG(dbgs() << "[LSUnit] - AssignSQSlot <Idx=" << Index
- << ",slot=" << StoreQueue.size() << ">\n");
- StoreQueue.insert(Index);
+#ifndef NDEBUG
+void LSUnitBase::dump() const {
+ dbgs() << "[LSUnit] LQ_Size = " << getLoadQueueSize() << '\n';
+ dbgs() << "[LSUnit] SQ_Size = " << getStoreQueueSize() << '\n';
+ dbgs() << "[LSUnit] NextLQSlotIdx = " << getUsedLQEntries() << '\n';
+ dbgs() << "[LSUnit] NextSQSlotIdx = " << getUsedSQEntries() << '\n';
+ dbgs() << "\n";
+ for (const auto &GroupIt : Groups) {
+ const MemoryGroup &Group = *GroupIt.second;
+ dbgs() << "[LSUnit] Group (" << GroupIt.first << "): "
+ << "[ #Preds = " << Group.getNumPredecessors()
+ << ", #GIssued = " << Group.getNumExecutingPredecessors()
+ << ", #GExecuted = " << Group.getNumExecutedPredecessors()
+ << ", #Inst = " << Group.getNumInstructions()
+ << ", #IIssued = " << Group.getNumExecuting()
+ << ", #IExecuted = " << Group.getNumExecuted() << '\n';
+ }
}
+#endif
-void LSUnit::dispatch(const InstRef &IR) {
+unsigned LSUnit::dispatch(const InstRef &IR) {
const InstrDesc &Desc = IR.getInstruction()->getDesc();
unsigned IsMemBarrier = Desc.HasSideEffects;
assert((Desc.MayLoad || Desc.MayStore) && "Not a memory operation!");
- const unsigned Index = IR.getSourceIndex();
- if (Desc.MayLoad) {
- if (IsMemBarrier)
- LoadBarriers.insert(Index);
- assignLQSlot(Index);
- }
+ if (Desc.MayLoad)
+ assignLQSlot();
+ if (Desc.MayStore)
+ assignSQSlot();
if (Desc.MayStore) {
+ // Always create a new group for store operations.
+
+ // A store may not pass a previous store or store barrier.
+ unsigned NewGID = createMemoryGroup();
+ MemoryGroup &NewGroup = getGroup(NewGID);
+ NewGroup.addInstruction();
+
+ // A store may not pass a previous load or load barrier.
+ unsigned ImmediateLoadDominator =
+ std::max(CurrentLoadGroupID, CurrentLoadBarrierGroupID);
+ if (ImmediateLoadDominator) {
+ MemoryGroup &IDom = getGroup(ImmediateLoadDominator);
+ LLVM_DEBUG(dbgs() << "[LSUnit]: GROUP DEP: (" << ImmediateLoadDominator
+ << ") --> (" << NewGID << ")\n");
+ IDom.addSuccessor(&NewGroup);
+ }
+ if (CurrentStoreGroupID) {
+ MemoryGroup &StoreGroup = getGroup(CurrentStoreGroupID);
+ LLVM_DEBUG(dbgs() << "[LSUnit]: GROUP DEP: (" << CurrentStoreGroupID
+ << ") --> (" << NewGID << ")\n");
+ StoreGroup.addSuccessor(&NewGroup);
+ }
+
+ CurrentStoreGroupID = NewGID;
+ if (Desc.MayLoad) {
+ CurrentLoadGroupID = NewGID;
+ if (IsMemBarrier)
+ CurrentLoadBarrierGroupID = NewGID;
+ }
+
+ return NewGID;
+ }
+
+ assert(Desc.MayLoad && "Expected a load!");
+
+ // Always create a new memory group if this is the first load of the sequence.
+
+ // A load may not pass a previous store unless flag 'NoAlias' is set.
+ // A load may pass a previous load.
+ // A younger load cannot pass a older load barrier.
+ // A load barrier cannot pass a older load.
+ bool ShouldCreateANewGroup = !CurrentLoadGroupID || IsMemBarrier ||
+ CurrentLoadGroupID <= CurrentStoreGroupID ||
+ CurrentLoadGroupID <= CurrentLoadBarrierGroupID;
+ if (ShouldCreateANewGroup) {
+ unsigned NewGID = createMemoryGroup();
+ MemoryGroup &NewGroup = getGroup(NewGID);
+ NewGroup.addInstruction();
+
+ if (!assumeNoAlias() && CurrentStoreGroupID) {
+ MemoryGroup &StGroup = getGroup(CurrentStoreGroupID);
+ LLVM_DEBUG(dbgs() << "[LSUnit]: GROUP DEP: (" << CurrentStoreGroupID
+ << ") --> (" << NewGID << ")\n");
+ StGroup.addSuccessor(&NewGroup);
+ }
+ if (CurrentLoadBarrierGroupID) {
+ MemoryGroup &LdGroup = getGroup(CurrentLoadBarrierGroupID);
+ LLVM_DEBUG(dbgs() << "[LSUnit]: GROUP DEP: (" << CurrentLoadBarrierGroupID
+ << ") --> (" << NewGID << ")\n");
+ LdGroup.addSuccessor(&NewGroup);
+ }
+
+ CurrentLoadGroupID = NewGID;
if (IsMemBarrier)
- StoreBarriers.insert(Index);
- assignSQSlot(Index);
+ CurrentLoadBarrierGroupID = NewGID;
+ return NewGID;
}
+
+ MemoryGroup &Group = getGroup(CurrentLoadGroupID);
+ Group.addInstruction();
+ return CurrentLoadGroupID;
}
LSUnit::Status LSUnit::isAvailable(const InstRef &IR) const {
@@ -94,95 +159,46 @@ LSUnit::Status LSUnit::isAvailable(const InstRef &IR) const {
return LSUnit::LSU_AVAILABLE;
}
-bool LSUnit::isReady(const InstRef &IR) const {
+void LSUnitBase::onInstructionExecuted(const InstRef &IR) {
const InstrDesc &Desc = IR.getInstruction()->getDesc();
- const unsigned Index = IR.getSourceIndex();
bool IsALoad = Desc.MayLoad;
bool IsAStore = Desc.MayStore;
- assert((IsALoad || IsAStore) && "Not a memory operation!");
- assert((!IsALoad || LoadQueue.count(Index) == 1) && "Load not in queue!");
- assert((!IsAStore || StoreQueue.count(Index) == 1) && "Store not in queue!");
-
- if (IsALoad && !LoadBarriers.empty()) {
- unsigned LoadBarrierIndex = *LoadBarriers.begin();
- // A younger load cannot pass a older load barrier.
- if (Index > LoadBarrierIndex)
- return false;
- // A load barrier cannot pass a older load.
- if (Index == LoadBarrierIndex && Index != *LoadQueue.begin())
- return false;
- }
+ assert((IsALoad || IsAStore) && "Expected a memory operation!");
- if (IsAStore && !StoreBarriers.empty()) {
- unsigned StoreBarrierIndex = *StoreBarriers.begin();
- // A younger store cannot pass a older store barrier.
- if (Index > StoreBarrierIndex)
- return false;
- // A store barrier cannot pass a older store.
- if (Index == StoreBarrierIndex && Index != *StoreQueue.begin())
- return false;
- }
-
- // A load may not pass a previous store unless flag 'NoAlias' is set.
- // A load may pass a previous load.
- if (NoAlias && IsALoad)
- return true;
-
- if (StoreQueue.size()) {
- // A load may not pass a previous store.
- // A store may not pass a previous store.
- if (Index > *StoreQueue.begin())
- return false;
+ unsigned GroupID = IR.getInstruction()->getLSUTokenID();
+ auto It = Groups.find(GroupID);
+ It->second->onInstructionExecuted();
+ if (It->second->isExecuted()) {
+ Groups.erase(It);
}
- // Okay, we are older than the oldest store in the queue.
- // If there are no pending loads, then we can say for sure that this
- // instruction is ready.
- if (isLQEmpty())
- return true;
-
- // Check if there are no older loads.
- if (Index <= *LoadQueue.begin())
- return true;
-
- // There is at least one younger load.
- //
- // A store may not pass a previous load.
- // A load may pass a previous load.
- return !IsAStore;
-}
-
-void LSUnit::onInstructionExecuted(const InstRef &IR) {
- const InstrDesc &Desc = IR.getInstruction()->getDesc();
- const unsigned Index = IR.getSourceIndex();
- bool IsALoad = Desc.MayLoad;
- bool IsAStore = Desc.MayStore;
-
if (IsALoad) {
- if (LoadQueue.erase(Index)) {
- LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << Index
- << " has been removed from the load queue.\n");
- }
- if (!LoadBarriers.empty() && Index == *LoadBarriers.begin()) {
- LLVM_DEBUG(
- dbgs() << "[LSUnit]: Instruction idx=" << Index
- << " has been removed from the set of load barriers.\n");
- LoadBarriers.erase(Index);
- }
+ UsedLQEntries--;
+ LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << IR.getSourceIndex()
+ << " has been removed from the load queue.\n");
}
if (IsAStore) {
- if (StoreQueue.erase(Index)) {
- LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << Index
- << " has been removed from the store queue.\n");
- }
+ UsedSQEntries--;
+ LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << IR.getSourceIndex()
+ << " has been removed from the store queue.\n");
+ }
+}
- if (!StoreBarriers.empty() && Index == *StoreBarriers.begin()) {
- LLVM_DEBUG(
- dbgs() << "[LSUnit]: Instruction idx=" << Index
- << " has been removed from the set of store barriers.\n");
- StoreBarriers.erase(Index);
- }
+void LSUnit::onInstructionExecuted(const InstRef &IR) {
+ const Instruction &IS = *IR.getInstruction();
+ if (!IS.isMemOp())
+ return;
+
+ LSUnitBase::onInstructionExecuted(IR);
+ unsigned GroupID = IS.getLSUTokenID();
+ if (!isValidGroupID(GroupID)) {
+ if (GroupID == CurrentLoadGroupID)
+ CurrentLoadGroupID = 0;
+ if (GroupID == CurrentStoreGroupID)
+ CurrentStoreGroupID = 0;
+ if (GroupID == CurrentLoadBarrierGroupID)
+ CurrentLoadBarrierGroupID = 0;
}
}
diff --git a/lib/MCA/HardwareUnits/RegisterFile.cpp b/lib/MCA/HardwareUnits/RegisterFile.cpp
index 22977e5ded65..86a888ea8cae 100644
--- a/lib/MCA/HardwareUnits/RegisterFile.cpp
+++ b/lib/MCA/HardwareUnits/RegisterFile.cpp
@@ -1,9 +1,8 @@
//===--------------------- RegisterFile.cpp ---------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -189,7 +188,7 @@ void RegisterFile::addRegisterWrite(WriteRef Write,
if (OtherWS && (OtherWrite.getSourceIndex() != Write.getSourceIndex())) {
// This partial write has a false dependency on RenameAs.
assert(!IsEliminated && "Unexpected partial update!");
- OtherWS->addUser(&WS);
+ OtherWS->addUser(OtherWrite.getSourceIndex(), &WS);
}
}
}
@@ -331,30 +330,25 @@ bool RegisterFile::tryEliminateMove(WriteState &WS, ReadState &RS) {
if (RMT.AllowZeroMoveEliminationOnly && !IsZeroMove)
return false;
- MCPhysReg FromReg = RS.getRegisterID();
- MCPhysReg ToReg = WS.getRegisterID();
-
// Construct an alias.
- MCPhysReg AliasReg = FromReg;
- if (RRIFrom.RenameAs)
- AliasReg = RRIFrom.RenameAs;
+ MCPhysReg AliasedReg =
+ RRIFrom.RenameAs ? RRIFrom.RenameAs : RS.getRegisterID();
+ MCPhysReg AliasReg = RRITo.RenameAs ? RRITo.RenameAs : WS.getRegisterID();
- const RegisterRenamingInfo &RMAlias = RegisterMappings[AliasReg].second;
+ const RegisterRenamingInfo &RMAlias = RegisterMappings[AliasedReg].second;
if (RMAlias.AliasRegID)
- AliasReg = RMAlias.AliasRegID;
+ AliasedReg = RMAlias.AliasRegID;
- if (AliasReg != ToReg) {
- RegisterMappings[ToReg].second.AliasRegID = AliasReg;
- for (MCSubRegIterator I(ToReg, &MRI); I.isValid(); ++I)
- RegisterMappings[*I].second.AliasRegID = AliasReg;
- }
+ RegisterMappings[AliasReg].second.AliasRegID = AliasedReg;
+ for (MCSubRegIterator I(AliasReg, &MRI); I.isValid(); ++I)
+ RegisterMappings[*I].second.AliasRegID = AliasedReg;
- RMT.NumMoveEliminated++;
if (IsZeroMove) {
WS.setWriteZero();
RS.setReadZero();
}
WS.setEliminated();
+ RMT.NumMoveEliminated++;
return true;
}
@@ -402,7 +396,7 @@ void RegisterFile::collectWrites(const ReadState &RS,
}
void RegisterFile::addRegisterRead(ReadState &RS,
- SmallVectorImpl<WriteRef> &Defs) const {
+ const MCSubtargetInfo &STI) const {
unsigned RegID = RS.getRegisterID();
const RegisterRenamingInfo &RRI = RegisterMappings[RegID].second;
RS.setPRF(RRI.IndexPlusCost.first);
@@ -411,8 +405,23 @@ void RegisterFile::addRegisterRead(ReadState &RS,
if (ZeroRegisters[RS.getRegisterID()])
RS.setReadZero();
- collectWrites(RS, Defs);
- RS.setDependentWrites(Defs.size());
+
+ SmallVector<WriteRef, 4> DependentWrites;
+ collectWrites(RS, DependentWrites);
+ RS.setDependentWrites(DependentWrites.size());
+
+ // We know that this read depends on all the writes in DependentWrites.
+ // For each write, check if we have ReadAdvance information, and use it
+ // to figure out in how many cycles this read becomes available.
+ const ReadDescriptor &RD = RS.getDescriptor();
+ const MCSchedModel &SM = STI.getSchedModel();
+ const MCSchedClassDesc *SC = SM.getSchedClassDesc(RD.SchedClassID);
+ for (WriteRef &WR : DependentWrites) {
+ WriteState &WS = *WR.getWriteState();
+ unsigned WriteResID = WS.getWriteResourceID();
+ int ReadAdvance = STI.getReadAdvanceCycles(SC, RD.UseIndex, WriteResID);
+ WS.addUser(WR.getSourceIndex(), &RS, ReadAdvance);
+ }
}
unsigned RegisterFile::isAvailable(ArrayRef<unsigned> Regs) const {
diff --git a/lib/MCA/HardwareUnits/ResourceManager.cpp b/lib/MCA/HardwareUnits/ResourceManager.cpp
index 2039b58e8ee5..06f2476353d6 100644
--- a/lib/MCA/HardwareUnits/ResourceManager.cpp
+++ b/lib/MCA/HardwareUnits/ResourceManager.cpp
@@ -1,9 +1,8 @@
//===--------------------- ResourceManager.cpp ------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -24,16 +23,10 @@ namespace mca {
#define DEBUG_TYPE "llvm-mca"
ResourceStrategy::~ResourceStrategy() = default;
-// Returns the index of the highest bit set. For resource masks, the position of
-// the highest bit set can be used to construct a resource mask identifier.
-static unsigned getResourceStateIndex(uint64_t Mask) {
- return std::numeric_limits<uint64_t>::digits - countLeadingZeros(Mask);
-}
-
static uint64_t selectImpl(uint64_t CandidateMask,
uint64_t &NextInSequenceMask) {
// The upper bit set in CandidateMask identifies our next candidate resource.
- CandidateMask = 1ULL << (getResourceStateIndex(CandidateMask) - 1);
+ CandidateMask = 1ULL << getResourceStateIndex(CandidateMask);
NextInSequenceMask &= (CandidateMask | (CandidateMask - 1));
return CandidateMask;
}
@@ -75,7 +68,7 @@ ResourceState::ResourceState(const MCProcResourceDesc &Desc, unsigned Index,
BufferSize(Desc.BufferSize), IsAGroup(countPopulation(ResourceMask) > 1) {
if (IsAGroup) {
ResourceSizeMask =
- ResourceMask ^ 1ULL << (getResourceStateIndex(ResourceMask) - 1);
+ ResourceMask ^ 1ULL << getResourceStateIndex(ResourceMask);
} else {
ResourceSizeMask = (1ULL << Desc.NumUnits) - 1;
}
@@ -116,13 +109,21 @@ getStrategyFor(const ResourceState &RS) {
}
ResourceManager::ResourceManager(const MCSchedModel &SM)
- : Resources(SM.getNumProcResourceKinds()),
- Strategies(SM.getNumProcResourceKinds()),
- Resource2Groups(SM.getNumProcResourceKinds(), 0),
- ProcResID2Mask(SM.getNumProcResourceKinds()) {
+ : Resources(SM.getNumProcResourceKinds() - 1),
+ Strategies(SM.getNumProcResourceKinds() - 1),
+ Resource2Groups(SM.getNumProcResourceKinds() - 1, 0),
+ ProcResID2Mask(SM.getNumProcResourceKinds(), 0),
+ ResIndex2ProcResID(SM.getNumProcResourceKinds() - 1, 0),
+ ProcResUnitMask(0), ReservedResourceGroups(0) {
computeProcResourceMasks(SM, ProcResID2Mask);
- for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+ // initialize vector ResIndex2ProcResID.
+ for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+ unsigned Index = getResourceStateIndex(ProcResID2Mask[I]);
+ ResIndex2ProcResID[Index] = I;
+ }
+
+ for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
uint64_t Mask = ProcResID2Mask[I];
unsigned Index = getResourceStateIndex(Mask);
Resources[Index] =
@@ -130,14 +131,16 @@ ResourceManager::ResourceManager(const MCSchedModel &SM)
Strategies[Index] = getStrategyFor(*Resources[Index]);
}
- for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+ for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
uint64_t Mask = ProcResID2Mask[I];
unsigned Index = getResourceStateIndex(Mask);
const ResourceState &RS = *Resources[Index];
- if (!RS.isAResourceGroup())
+ if (!RS.isAResourceGroup()) {
+ ProcResUnitMask |= Mask;
continue;
+ }
- uint64_t GroupMaskIdx = 1ULL << (Index - 1);
+ uint64_t GroupMaskIdx = 1ULL << Index;
Mask -= GroupMaskIdx;
while (Mask) {
// Extract lowest set isolated bit.
@@ -147,6 +150,8 @@ ResourceManager::ResourceManager(const MCSchedModel &SM)
Mask ^= Unit;
}
}
+
+ AvailableProcResUnits = ProcResUnitMask;
}
void ResourceManager::setCustomStrategyImpl(std::unique_ptr<ResourceStrategy> S,
@@ -158,7 +163,7 @@ void ResourceManager::setCustomStrategyImpl(std::unique_ptr<ResourceStrategy> S,
}
unsigned ResourceManager::resolveResourceMask(uint64_t Mask) const {
- return Resources[getResourceStateIndex(Mask)]->getProcResourceID();
+ return ResIndex2ProcResID[getResourceStateIndex(Mask)];
}
unsigned ResourceManager::getNumUnits(uint64_t ResourceID) const {
@@ -200,6 +205,8 @@ void ResourceManager::use(const ResourceRef &RR) {
if (RS.isReady())
return;
+ AvailableProcResUnits ^= RR.first;
+
// Notify groups that RR.first is no longer available.
uint64_t Users = Resource2Groups[RSID];
while (Users) {
@@ -214,19 +221,22 @@ void ResourceManager::use(const ResourceRef &RR) {
}
void ResourceManager::release(const ResourceRef &RR) {
- ResourceState &RS = *Resources[getResourceStateIndex(RR.first)];
+ unsigned RSID = getResourceStateIndex(RR.first);
+ ResourceState &RS = *Resources[RSID];
bool WasFullyUsed = !RS.isReady();
RS.releaseSubResource(RR.second);
if (!WasFullyUsed)
return;
- for (std::unique_ptr<ResourceState> &Res : Resources) {
- ResourceState &Current = *Res;
- if (!Current.isAResourceGroup() || Current.getResourceMask() == RR.first)
- continue;
+ AvailableProcResUnits ^= RR.first;
- if (Current.containsResource(RR.first))
- Current.releaseSubResource(RR.first);
+ // Notify groups that RR.first is now available again.
+ uint64_t Users = Resource2Groups[RSID];
+ while (Users) {
+ unsigned GroupIndex = getResourceStateIndex(Users & (-Users));
+ ResourceState &CurrentUser = *Resources[GroupIndex];
+ CurrentUser.releaseSubResource(RR.first);
+ Users &= Users - 1;
}
}
@@ -260,13 +270,19 @@ void ResourceManager::releaseBuffers(ArrayRef<uint64_t> Buffers) {
Resources[getResourceStateIndex(R)]->releaseBuffer();
}
-bool ResourceManager::canBeIssued(const InstrDesc &Desc) const {
- return all_of(
- Desc.Resources, [&](const std::pair<uint64_t, const ResourceUsage> &E) {
- unsigned NumUnits = E.second.isReserved() ? 0U : E.second.NumUnits;
- unsigned Index = getResourceStateIndex(E.first);
- return Resources[Index]->isReady(NumUnits);
- });
+uint64_t ResourceManager::checkAvailability(const InstrDesc &Desc) const {
+ uint64_t BusyResourceMask = 0;
+ for (const std::pair<uint64_t, const ResourceUsage> &E : Desc.Resources) {
+ unsigned NumUnits = E.second.isReserved() ? 0U : E.second.NumUnits;
+ unsigned Index = getResourceStateIndex(E.first);
+ if (!Resources[Index]->isReady(NumUnits))
+ BusyResourceMask |= E.first;
+ }
+
+ BusyResourceMask &= ProcResUnitMask;
+ if (BusyResourceMask)
+ return BusyResourceMask;
+ return Desc.UsedProcResGroups & ReservedResourceGroups;
}
void ResourceManager::issueInstruction(
@@ -317,14 +333,20 @@ void ResourceManager::cycleEvent(SmallVectorImpl<ResourceRef> &ResourcesFreed) {
}
void ResourceManager::reserveResource(uint64_t ResourceID) {
- ResourceState &Resource = *Resources[getResourceStateIndex(ResourceID)];
- assert(!Resource.isReserved());
+ const unsigned Index = getResourceStateIndex(ResourceID);
+ ResourceState &Resource = *Resources[Index];
+ assert(Resource.isAResourceGroup() && !Resource.isReserved() &&
+ "Unexpected resource found!");
Resource.setReserved();
+ ReservedResourceGroups ^= 1ULL << Index;
}
void ResourceManager::releaseResource(uint64_t ResourceID) {
- ResourceState &Resource = *Resources[getResourceStateIndex(ResourceID)];
+ const unsigned Index = getResourceStateIndex(ResourceID);
+ ResourceState &Resource = *Resources[Index];
Resource.clearReserved();
+ if (Resource.isAResourceGroup())
+ ReservedResourceGroups ^= 1ULL << Index;
}
} // namespace mca
diff --git a/lib/MCA/HardwareUnits/RetireControlUnit.cpp b/lib/MCA/HardwareUnits/RetireControlUnit.cpp
index de9f24552c38..068c5062ccdf 100644
--- a/lib/MCA/HardwareUnits/RetireControlUnit.cpp
+++ b/lib/MCA/HardwareUnits/RetireControlUnit.cpp
@@ -1,9 +1,8 @@
//===---------------------- RetireControlUnit.cpp ---------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/lib/MCA/HardwareUnits/Scheduler.cpp b/lib/MCA/HardwareUnits/Scheduler.cpp
index 355ef79d06a6..0f0f2ffb8325 100644
--- a/lib/MCA/HardwareUnits/Scheduler.cpp
+++ b/lib/MCA/HardwareUnits/Scheduler.cpp
@@ -1,9 +1,8 @@
//===--------------------- Scheduler.cpp ------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -38,10 +37,13 @@ void Scheduler::dump() const {
}
#endif
-Scheduler::Status Scheduler::isAvailable(const InstRef &IR) const {
+Scheduler::Status Scheduler::isAvailable(const InstRef &IR) {
const InstrDesc &Desc = IR.getInstruction()->getDesc();
- switch (Resources->canBeDispatched(Desc.Buffers)) {
+ ResourceStateEvent RSE = Resources->canBeDispatched(Desc.Buffers);
+ HadTokenStall = RSE != RS_BUFFER_AVAILABLE;
+
+ switch (RSE) {
case ResourceStateEvent::RS_BUFFER_UNAVAILABLE:
return Scheduler::SC_BUFFERS_FULL;
case ResourceStateEvent::RS_RESERVED:
@@ -51,7 +53,10 @@ Scheduler::Status Scheduler::isAvailable(const InstRef &IR) const {
}
// Give lower priority to LSUnit stall events.
- switch (LSU.isAvailable(IR)) {
+ LSUnit::Status LSS = LSU.isAvailable(IR);
+ HadTokenStall = LSS != LSUnit::LSU_AVAILABLE;
+
+ switch (LSS) {
case LSUnit::LSU_LQUEUE_FULL:
return Scheduler::SC_LOAD_QUEUE_FULL;
case LSUnit::LSU_SQUEUE_FULL:
@@ -75,7 +80,15 @@ void Scheduler::issueInstructionImpl(
// Notify the instruction that it started executing.
// This updates the internal state of each write.
- IS->execute();
+ IS->execute(IR.getSourceIndex());
+
+ IS->computeCriticalRegDep();
+
+ if (IS->isMemOp()) {
+ LSU.onInstructionIssued(IR);
+ const MemoryGroup &Group = LSU.getGroup(IS->getLSUTokenID());
+ IS->setCriticalMemDep(Group.getCriticalPredecessor());
+ }
if (IS->isExecuting())
IssuedSet.emplace_back(IR);
@@ -87,9 +100,11 @@ void Scheduler::issueInstructionImpl(
void Scheduler::issueInstruction(
InstRef &IR,
SmallVectorImpl<std::pair<ResourceRef, ResourceCycles>> &UsedResources,
+ SmallVectorImpl<InstRef> &PendingInstructions,
SmallVectorImpl<InstRef> &ReadyInstructions) {
const Instruction &Inst = *IR.getInstruction();
bool HasDependentUsers = Inst.hasDependentUsers();
+ HasDependentUsers |= Inst.isMemOp() && LSU.hasDependentUsers(IR);
Resources->releaseBuffers(Inst.getDesc().Buffers);
issueInstructionImpl(IR, UsedResources);
@@ -98,12 +113,49 @@ void Scheduler::issueInstruction(
// this same cycle if operands have ReadAdvance entries. Promote those
// instructions to the ReadySet and notify the caller that those are ready.
if (HasDependentUsers)
- promoteToReadySet(ReadyInstructions);
+ if (promoteToPendingSet(PendingInstructions))
+ promoteToReadySet(ReadyInstructions);
+}
+
+bool Scheduler::promoteToReadySet(SmallVectorImpl<InstRef> &Ready) {
+ // Scan the set of waiting instructions and promote them to the
+ // ready set if operands are all ready.
+ unsigned PromotedElements = 0;
+ for (auto I = PendingSet.begin(), E = PendingSet.end(); I != E;) {
+ InstRef &IR = *I;
+ if (!IR)
+ break;
+
+ // Check if there are unsolved register dependencies.
+ Instruction &IS = *IR.getInstruction();
+ if (!IS.isReady() && !IS.updatePending()) {
+ ++I;
+ continue;
+ }
+ // Check if there are unsolved memory dependencies.
+ if (IS.isMemOp() && !LSU.isReady(IR)) {
+ ++I;
+ continue;
+ }
+
+ LLVM_DEBUG(dbgs() << "[SCHEDULER]: Instruction #" << IR
+ << " promoted to the READY set.\n");
+
+ Ready.emplace_back(IR);
+ ReadySet.emplace_back(IR);
+
+ IR.invalidate();
+ ++PromotedElements;
+ std::iter_swap(I, E - PromotedElements);
+ }
+
+ PendingSet.resize(PendingSet.size() - PromotedElements);
+ return PromotedElements;
}
-void Scheduler::promoteToReadySet(SmallVectorImpl<InstRef> &Ready) {
+bool Scheduler::promoteToPendingSet(SmallVectorImpl<InstRef> &Pending) {
// Scan the set of waiting instructions and promote them to the
- // ready queue if operands are all ready.
+ // pending set if operands are all ready.
unsigned RemovedElements = 0;
for (auto I = WaitSet.begin(), E = WaitSet.end(); I != E;) {
InstRef &IR = *I;
@@ -111,19 +163,23 @@ void Scheduler::promoteToReadySet(SmallVectorImpl<InstRef> &Ready) {
break;
// Check if this instruction is now ready. In case, force
- // a transition in state using method 'update()'.
+ // a transition in state using method 'updateDispatched()'.
Instruction &IS = *IR.getInstruction();
- if (!IS.isReady())
- IS.update();
+ if (IS.isDispatched() && !IS.updateDispatched()) {
+ ++I;
+ continue;
+ }
- // Check if there are still unsolved data dependencies.
- if (!isReady(IR)) {
+ if (IS.isMemOp() && LSU.isWaiting(IR)) {
++I;
continue;
}
- Ready.emplace_back(IR);
- ReadySet.emplace_back(IR);
+ LLVM_DEBUG(dbgs() << "[SCHEDULER]: Instruction #" << IR
+ << " promoted to the PENDING set.\n");
+
+ Pending.emplace_back(IR);
+ PendingSet.emplace_back(IR);
IR.invalidate();
++RemovedElements;
@@ -131,16 +187,21 @@ void Scheduler::promoteToReadySet(SmallVectorImpl<InstRef> &Ready) {
}
WaitSet.resize(WaitSet.size() - RemovedElements);
+ return RemovedElements;
}
InstRef Scheduler::select() {
unsigned QueueIndex = ReadySet.size();
for (unsigned I = 0, E = ReadySet.size(); I != E; ++I) {
- const InstRef &IR = ReadySet[I];
+ InstRef &IR = ReadySet[I];
if (QueueIndex == ReadySet.size() ||
Strategy->compare(IR, ReadySet[QueueIndex])) {
- const InstrDesc &D = IR.getInstruction()->getDesc();
- if (Resources->canBeIssued(D))
+ Instruction &IS = *IR.getInstruction();
+ uint64_t BusyResourceMask = Resources->checkAvailability(IS.getDesc());
+ if (BusyResourceMask)
+ IS.setCriticalResourceMask(BusyResourceMask);
+ BusyResourceUnits |= BusyResourceMask;
+ if (!BusyResourceMask)
QueueIndex = I;
}
}
@@ -180,22 +241,51 @@ void Scheduler::updateIssuedSet(SmallVectorImpl<InstRef> &Executed) {
IssuedSet.resize(IssuedSet.size() - RemovedElements);
}
+uint64_t Scheduler::analyzeResourcePressure(SmallVectorImpl<InstRef> &Insts) {
+ Insts.insert(Insts.end(), ReadySet.begin(), ReadySet.end());
+ return BusyResourceUnits;
+}
+
+void Scheduler::analyzeDataDependencies(SmallVectorImpl<InstRef> &RegDeps,
+ SmallVectorImpl<InstRef> &MemDeps) {
+ const auto EndIt = PendingSet.end() - NumDispatchedToThePendingSet;
+ for (const InstRef &IR : make_range(PendingSet.begin(), EndIt)) {
+ const Instruction &IS = *IR.getInstruction();
+ if (Resources->checkAvailability(IS.getDesc()))
+ continue;
+
+ if (IS.isMemOp() && LSU.isPending(IR))
+ MemDeps.emplace_back(IR);
+
+ if (IS.isPending())
+ RegDeps.emplace_back(IR);
+ }
+}
+
void Scheduler::cycleEvent(SmallVectorImpl<ResourceRef> &Freed,
SmallVectorImpl<InstRef> &Executed,
+ SmallVectorImpl<InstRef> &Pending,
SmallVectorImpl<InstRef> &Ready) {
+ LSU.cycleEvent();
+
// Release consumed resources.
Resources->cycleEvent(Freed);
- // Propagate the cycle event to the 'Issued' and 'Wait' sets.
for (InstRef &IR : IssuedSet)
IR.getInstruction()->cycleEvent();
-
updateIssuedSet(Executed);
+ for (InstRef &IR : PendingSet)
+ IR.getInstruction()->cycleEvent();
+
for (InstRef &IR : WaitSet)
IR.getInstruction()->cycleEvent();
+ promoteToPendingSet(Pending);
promoteToReadySet(Ready);
+
+ NumDispatchedToThePendingSet = 0;
+ BusyResourceUnits = 0;
}
bool Scheduler::mustIssueImmediately(const InstRef &IR) const {
@@ -208,21 +298,31 @@ bool Scheduler::mustIssueImmediately(const InstRef &IR) const {
return Desc.MustIssueImmediately;
}
-void Scheduler::dispatch(const InstRef &IR) {
- const InstrDesc &Desc = IR.getInstruction()->getDesc();
+bool Scheduler::dispatch(InstRef &IR) {
+ Instruction &IS = *IR.getInstruction();
+ const InstrDesc &Desc = IS.getDesc();
Resources->reserveBuffers(Desc.Buffers);
// If necessary, reserve queue entries in the load-store unit (LSU).
- bool IsMemOp = Desc.MayLoad || Desc.MayStore;
- if (IsMemOp)
- LSU.dispatch(IR);
+ if (IS.isMemOp())
+ IS.setLSUTokenID(LSU.dispatch(IR));
- if (!isReady(IR)) {
+ if (IS.isDispatched() || (IS.isMemOp() && LSU.isWaiting(IR))) {
LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR << " to the WaitSet\n");
WaitSet.push_back(IR);
- return;
+ return false;
+ }
+
+ if (IS.isPending() || (IS.isMemOp() && LSU.isPending(IR))) {
+ LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR
+ << " to the PendingSet\n");
+ PendingSet.push_back(IR);
+ ++NumDispatchedToThePendingSet;
+ return false;
}
+ assert(IS.isReady() && (!IS.isMemOp() || LSU.isReady(IR)) &&
+ "Unexpected internal state found!");
// Don't add a zero-latency instruction to the Ready queue.
// A zero-latency instruction doesn't consume any scheduler resources. That is
// because it doesn't need to be executed, and it is often removed at register
@@ -235,12 +335,8 @@ void Scheduler::dispatch(const InstRef &IR) {
LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR << " to the ReadySet\n");
ReadySet.push_back(IR);
}
-}
-bool Scheduler::isReady(const InstRef &IR) const {
- const InstrDesc &Desc = IR.getInstruction()->getDesc();
- bool IsMemOp = Desc.MayLoad || Desc.MayStore;
- return IR.getInstruction()->isReady() && (!IsMemOp || LSU.isReady(IR));
+ return true;
}
} // namespace mca
diff --git a/lib/MCA/InstrBuilder.cpp b/lib/MCA/InstrBuilder.cpp
index d2d65e55537c..829920366c90 100644
--- a/lib/MCA/InstrBuilder.cpp
+++ b/lib/MCA/InstrBuilder.cpp
@@ -1,9 +1,8 @@
//===--------------------- InstrBuilder.cpp ---------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -66,6 +65,17 @@ static void initializeUsedResources(InstrDesc &ID,
for (unsigned I = 0, E = SCDesc.NumWriteProcResEntries; I < E; ++I) {
const MCWriteProcResEntry *PRE = STI.getWriteProcResBegin(&SCDesc) + I;
const MCProcResourceDesc &PR = *SM.getProcResource(PRE->ProcResourceIdx);
+ if (!PRE->Cycles) {
+#ifndef NDEBUG
+ WithColor::warning()
+ << "Ignoring invalid write of zero cycles on processor resource "
+ << PR.Name << "\n";
+ WithColor::note() << "found in scheduling class " << SCDesc.Name
+ << " (write index #" << I << ")\n";
+#endif
+ continue;
+ }
+
uint64_t Mask = ProcResourceMasks[PRE->ProcResourceIdx];
if (PR.BufferSize < 0) {
AllInOrderResources = false;
@@ -98,14 +108,14 @@ static void initializeUsedResources(InstrDesc &ID,
});
uint64_t UsedResourceUnits = 0;
+ uint64_t UsedResourceGroups = 0;
// Remove cycles contributed by smaller resources.
for (unsigned I = 0, E = Worklist.size(); I < E; ++I) {
ResourcePlusCycles &A = Worklist[I];
if (!A.second.size()) {
- A.second.NumUnits = 0;
- A.second.setReserved();
- ID.Resources.emplace_back(A);
+ assert(countPopulation(A.first) > 1 && "Expected a group!");
+ UsedResourceGroups |= PowerOf2Floor(A.first);
continue;
}
@@ -116,6 +126,7 @@ static void initializeUsedResources(InstrDesc &ID,
} else {
// Remove the leading 1 from the resource group mask.
NormalizedMask ^= PowerOf2Floor(NormalizedMask);
+ UsedResourceGroups |= (A.first ^ NormalizedMask);
}
for (unsigned J = I + 1; J < E; ++J) {
@@ -128,6 +139,9 @@ static void initializeUsedResources(InstrDesc &ID,
}
}
+ ID.UsedProcResUnits = UsedResourceUnits;
+ ID.UsedProcResGroups = UsedResourceGroups;
+
// A SchedWrite may specify a number of cycles in which a resource group
// is reserved. For example (on target x86; cpu Haswell):
//
@@ -180,10 +194,15 @@ static void initializeUsedResources(InstrDesc &ID,
LLVM_DEBUG({
for (const std::pair<uint64_t, ResourceUsage> &R : ID.Resources)
- dbgs() << "\t\tMask=" << format_hex(R.first, 16) << ", "
+ dbgs() << "\t\tResource Mask=" << format_hex(R.first, 16) << ", "
+ << "Reserved=" << R.second.isReserved() << ", "
+ << "#Units=" << R.second.NumUnits << ", "
<< "cy=" << R.second.size() << '\n';
for (const uint64_t R : ID.Buffers)
dbgs() << "\t\tBuffer Mask=" << format_hex(R, 16) << '\n';
+ dbgs() << "\t\t Used Units=" << format_hex(ID.UsedProcResUnits, 16) << '\n';
+ dbgs() << "\t\tUsed Groups=" << format_hex(ID.UsedProcResGroups, 16)
+ << '\n';
});
}
@@ -533,6 +552,7 @@ InstrBuilder::createInstrDescImpl(const MCInst &MCI) {
// Create a new empty descriptor.
std::unique_ptr<InstrDesc> ID = llvm::make_unique<InstrDesc>();
ID->NumMicroOps = SCDesc.NumMicroOps;
+ ID->SchedClassID = SchedClassID;
if (MCDesc.isCall() && FirstCallInst) {
// We don't correctly model calls.
@@ -572,7 +592,6 @@ InstrBuilder::createInstrDescImpl(const MCInst &MCI) {
return std::move(Err);
// Now add the new descriptor.
- SchedClassID = MCDesc.getSchedClass();
bool IsVariadic = MCDesc.isVariadic();
if (!IsVariadic && !IsVariant) {
Descriptors[MCI.getOpcode()] = std::move(ID);
diff --git a/lib/MCA/Instruction.cpp b/lib/MCA/Instruction.cpp
index 057e95ca9990..001842bca318 100644
--- a/lib/MCA/Instruction.cpp
+++ b/lib/MCA/Instruction.cpp
@@ -1,9 +1,8 @@
//===--------------------- Instruction.cpp ----------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -19,7 +18,16 @@
namespace llvm {
namespace mca {
-void ReadState::writeStartEvent(unsigned Cycles) {
+void WriteState::writeStartEvent(unsigned IID, unsigned RegID,
+ unsigned Cycles) {
+ CRD.IID = IID;
+ CRD.RegID = RegID;
+ CRD.Cycles = Cycles;
+ DependentWriteCyclesLeft = Cycles;
+ DependentWrite = nullptr;
+}
+
+void ReadState::writeStartEvent(unsigned IID, unsigned RegID, unsigned Cycles) {
assert(DependentWrites);
assert(CyclesLeft == UNKNOWN_CYCLES);
@@ -29,7 +37,12 @@ void ReadState::writeStartEvent(unsigned Cycles) {
// The HW is forced to do some extra bookkeeping to track of all the
// dependent writes, and implement a merging scheme for the partial writes.
--DependentWrites;
- TotalCycles = std::max(TotalCycles, Cycles);
+ if (TotalCycles < Cycles) {
+ CRD.IID = IID;
+ CRD.RegID = RegID;
+ CRD.Cycles = Cycles;
+ TotalCycles = Cycles;
+ }
if (!DependentWrites) {
CyclesLeft = TotalCycles;
@@ -37,7 +50,7 @@ void ReadState::writeStartEvent(unsigned Cycles) {
}
}
-void WriteState::onInstructionIssued() {
+void WriteState::onInstructionIssued(unsigned IID) {
assert(CyclesLeft == UNKNOWN_CYCLES);
// Update the number of cycles left based on the WriteDescriptor info.
CyclesLeft = getLatency();
@@ -47,34 +60,30 @@ void WriteState::onInstructionIssued() {
for (const std::pair<ReadState *, int> &User : Users) {
ReadState *RS = User.first;
unsigned ReadCycles = std::max(0, CyclesLeft - User.second);
- RS->writeStartEvent(ReadCycles);
+ RS->writeStartEvent(IID, RegisterID, ReadCycles);
}
// Notify any writes that are in a false dependency with this write.
if (PartialWrite)
- PartialWrite->writeStartEvent(CyclesLeft);
+ PartialWrite->writeStartEvent(IID, RegisterID, CyclesLeft);
}
-void WriteState::addUser(ReadState *User, int ReadAdvance) {
+void WriteState::addUser(unsigned IID, ReadState *User, int ReadAdvance) {
// If CyclesLeft is different than -1, then we don't need to
// update the list of users. We can just notify the user with
// the actual number of cycles left (which may be zero).
if (CyclesLeft != UNKNOWN_CYCLES) {
unsigned ReadCycles = std::max(0, CyclesLeft - ReadAdvance);
- User->writeStartEvent(ReadCycles);
+ User->writeStartEvent(IID, RegisterID, ReadCycles);
return;
}
- if (llvm::find_if(Users, [&User](const std::pair<ReadState *, int> &Use) {
- return Use.first == User;
- }) == Users.end()) {
- Users.emplace_back(User, ReadAdvance);
- }
+ Users.emplace_back(User, ReadAdvance);
}
-void WriteState::addUser(WriteState *User) {
+void WriteState::addUser(unsigned IID, WriteState *User) {
if (CyclesLeft != UNKNOWN_CYCLES) {
- User->writeStartEvent(std::max(0, CyclesLeft));
+ User->writeStartEvent(IID, RegisterID, std::max(0, CyclesLeft));
return;
}
@@ -126,16 +135,37 @@ void WriteRef::dump() const {
}
#endif
+const CriticalDependency &Instruction::computeCriticalRegDep() {
+ if (CriticalRegDep.Cycles)
+ return CriticalRegDep;
+
+ unsigned MaxLatency = 0;
+ for (const WriteState &WS : getDefs()) {
+ const CriticalDependency &WriteCRD = WS.getCriticalRegDep();
+ if (WriteCRD.Cycles > MaxLatency)
+ CriticalRegDep = WriteCRD;
+ }
+
+ for (const ReadState &RS : getUses()) {
+ const CriticalDependency &ReadCRD = RS.getCriticalRegDep();
+ if (ReadCRD.Cycles > MaxLatency)
+ CriticalRegDep = ReadCRD;
+ }
+
+ return CriticalRegDep;
+}
+
void Instruction::dispatch(unsigned RCUToken) {
assert(Stage == IS_INVALID);
- Stage = IS_AVAILABLE;
+ Stage = IS_DISPATCHED;
RCUTokenID = RCUToken;
// Check if input operands are already available.
- update();
+ if (updateDispatched())
+ updatePending();
}
-void Instruction::execute() {
+void Instruction::execute(unsigned IID) {
assert(Stage == IS_READY);
Stage = IS_EXECUTING;
@@ -143,7 +173,7 @@ void Instruction::execute() {
CyclesLeft = getLatency();
for (WriteState &WS : getDefs())
- WS.onInstructionIssued();
+ WS.onInstructionIssued(IID);
// Transition to the "executed" stage if this is a zero-latency instruction.
if (!CyclesLeft)
@@ -156,30 +186,49 @@ void Instruction::forceExecuted() {
Stage = IS_EXECUTED;
}
-void Instruction::update() {
- assert(isDispatched() && "Unexpected instruction stage found!");
+bool Instruction::updatePending() {
+ assert(isPending() && "Unexpected instruction stage found!");
if (!all_of(getUses(), [](const ReadState &Use) { return Use.isReady(); }))
- return;
+ return false;
+
+ // A partial register write cannot complete before a dependent write.
+ if (!all_of(getDefs(), [](const WriteState &Def) { return Def.isReady(); }))
+ return false;
+
+ Stage = IS_READY;
+ return true;
+}
+
+bool Instruction::updateDispatched() {
+ assert(isDispatched() && "Unexpected instruction stage found!");
+
+ if (!all_of(getUses(), [](const ReadState &Use) {
+ return Use.isPending() || Use.isReady();
+ }))
+ return false;
// A partial register write cannot complete before a dependent write.
- auto IsDefReady = [&](const WriteState &Def) {
- if (!Def.getDependentWrite()) {
- unsigned CyclesLeft = Def.getDependentWriteCyclesLeft();
- return !CyclesLeft || CyclesLeft < getLatency();
- }
+ if (!all_of(getDefs(),
+ [](const WriteState &Def) { return !Def.getDependentWrite(); }))
return false;
- };
- if (all_of(getDefs(), IsDefReady))
- Stage = IS_READY;
+ Stage = IS_PENDING;
+ return true;
+}
+
+void Instruction::update() {
+ if (isDispatched())
+ updateDispatched();
+ if (isPending())
+ updatePending();
}
void Instruction::cycleEvent() {
if (isReady())
return;
- if (isDispatched()) {
+ if (isDispatched() || isPending()) {
for (ReadState &Use : getUses())
Use.cycleEvent();
diff --git a/lib/MCA/Pipeline.cpp b/lib/MCA/Pipeline.cpp
index 4c0e37c9ba7e..22b9d0799f77 100644
--- a/lib/MCA/Pipeline.cpp
+++ b/lib/MCA/Pipeline.cpp
@@ -1,9 +1,8 @@
//===--------------------- Pipeline.cpp -------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -64,9 +63,10 @@ Error Pipeline::runCycle() {
Err = FirstStage.execute(IR);
// Update stages in preparation for a new cycle.
- for (auto I = Stages.rbegin(), E = Stages.rend(); I != E && !Err; ++I) {
- const std::unique_ptr<Stage> &S = *I;
+ for (const std::unique_ptr<Stage> &S : Stages) {
Err = S->cycleEnd();
+ if (Err)
+ break;
}
return Err;
diff --git a/lib/MCA/Stages/DispatchStage.cpp b/lib/MCA/Stages/DispatchStage.cpp
index 7fb4eb6a1c0e..7334a268e9a6 100644
--- a/lib/MCA/Stages/DispatchStage.cpp
+++ b/lib/MCA/Stages/DispatchStage.cpp
@@ -1,9 +1,8 @@
//===--------------------- DispatchStage.cpp --------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -26,6 +25,16 @@
namespace llvm {
namespace mca {
+DispatchStage::DispatchStage(const MCSubtargetInfo &Subtarget,
+ const MCRegisterInfo &MRI,
+ unsigned MaxDispatchWidth, RetireControlUnit &R,
+ RegisterFile &F)
+ : DispatchWidth(MaxDispatchWidth), AvailableEntries(MaxDispatchWidth),
+ CarryOver(0U), CarriedOver(), STI(Subtarget), RCU(R), PRF(F) {
+ if (!DispatchWidth)
+ DispatchWidth = Subtarget.getSchedModel().IssueWidth;
+}
+
void DispatchStage::notifyInstructionDispatched(const InstRef &IR,
ArrayRef<unsigned> UsedRegs,
unsigned UOps) const {
@@ -60,28 +69,10 @@ bool DispatchStage::checkRCU(const InstRef &IR) const {
}
bool DispatchStage::canDispatch(const InstRef &IR) const {
- return checkRCU(IR) && checkPRF(IR) && checkNextStage(IR);
-}
-
-void DispatchStage::updateRAWDependencies(ReadState &RS,
- const MCSubtargetInfo &STI) {
- SmallVector<WriteRef, 4> DependentWrites;
-
- // Collect all the dependent writes, and update RS internal state.
- PRF.addRegisterRead(RS, DependentWrites);
-
- // We know that this read depends on all the writes in DependentWrites.
- // For each write, check if we have ReadAdvance information, and use it
- // to figure out in how many cycles this read becomes available.
- const ReadDescriptor &RD = RS.getDescriptor();
- const MCSchedModel &SM = STI.getSchedModel();
- const MCSchedClassDesc *SC = SM.getSchedClassDesc(RD.SchedClassID);
- for (WriteRef &WR : DependentWrites) {
- WriteState &WS = *WR.getWriteState();
- unsigned WriteResID = WS.getWriteResourceID();
- int ReadAdvance = STI.getReadAdvanceCycles(SC, RD.UseIndex, WriteResID);
- WS.addUser(&RS, ReadAdvance);
- }
+ bool CanDispatch = checkRCU(IR);
+ CanDispatch &= checkPRF(IR);
+ CanDispatch &= checkNextStage(IR);
+ return CanDispatch;
}
Error DispatchStage::dispatch(InstRef IR) {
@@ -104,11 +95,11 @@ Error DispatchStage::dispatch(InstRef IR) {
AvailableEntries = 0;
// Check if this is an optimizable reg-reg move.
- bool IsEliminated = false;
if (IS.isOptimizableMove()) {
assert(IS.getDefs().size() == 1 && "Expected a single input!");
assert(IS.getUses().size() == 1 && "Expected a single output!");
- IsEliminated = PRF.tryEliminateMove(IS.getDefs()[0], IS.getUses()[0]);
+ if (PRF.tryEliminateMove(IS.getDefs()[0], IS.getUses()[0]))
+ IS.setEliminated();
}
// A dependency-breaking instruction doesn't have to wait on the register
@@ -120,9 +111,9 @@ Error DispatchStage::dispatch(InstRef IR) {
//
// We also don't update data dependencies for instructions that have been
// eliminated at register renaming stage.
- if (!IsEliminated) {
+ if (!IS.isEliminated()) {
for (ReadState &RS : IS.getUses())
- updateRAWDependencies(RS, STI);
+ PRF.addRegisterRead(RS, STI);
}
// By default, a dependency-breaking zero-idiom is expected to be optimized
diff --git a/lib/MCA/Stages/EntryStage.cpp b/lib/MCA/Stages/EntryStage.cpp
index 3325bb36f5af..d2f5613a0fb6 100644
--- a/lib/MCA/Stages/EntryStage.cpp
+++ b/lib/MCA/Stages/EntryStage.cpp
@@ -1,9 +1,8 @@
//===---------------------- EntryStage.cpp ----------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -19,7 +18,9 @@
namespace llvm {
namespace mca {
-bool EntryStage::hasWorkToComplete() const { return CurrentInstruction; }
+bool EntryStage::hasWorkToComplete() const {
+ return static_cast<bool>(CurrentInstruction);
+}
bool EntryStage::isAvailable(const InstRef & /* unused */) const {
if (CurrentInstruction)
diff --git a/lib/MCA/Stages/ExecuteStage.cpp b/lib/MCA/Stages/ExecuteStage.cpp
index e78327763fa1..a2b361fcd1bf 100644
--- a/lib/MCA/Stages/ExecuteStage.cpp
+++ b/lib/MCA/Stages/ExecuteStage.cpp
@@ -1,9 +1,8 @@
//===---------------------- ExecuteStage.cpp --------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -53,8 +52,11 @@ bool ExecuteStage::isAvailable(const InstRef &IR) const {
Error ExecuteStage::issueInstruction(InstRef &IR) {
SmallVector<std::pair<ResourceRef, ResourceCycles>, 4> Used;
+ SmallVector<InstRef, 4> Pending;
SmallVector<InstRef, 4> Ready;
- HWS.issueInstruction(IR, Used, Ready);
+
+ HWS.issueInstruction(IR, Used, Pending, Ready);
+ NumIssuedOpcodes += IR.getInstruction()->getDesc().NumMicroOps;
notifyReservedOrReleasedBuffers(IR, /* Reserved */ false);
@@ -66,6 +68,9 @@ Error ExecuteStage::issueInstruction(InstRef &IR) {
return S;
}
+ for (const InstRef &I : Pending)
+ notifyInstructionPending(I);
+
for (const InstRef &I : Ready)
notifyInstructionReady(I);
return ErrorSuccess();
@@ -87,9 +92,12 @@ Error ExecuteStage::issueReadyInstructions() {
Error ExecuteStage::cycleStart() {
SmallVector<ResourceRef, 8> Freed;
SmallVector<InstRef, 4> Executed;
+ SmallVector<InstRef, 4> Pending;
SmallVector<InstRef, 4> Ready;
- HWS.cycleEvent(Freed, Executed, Ready);
+ HWS.cycleEvent(Freed, Executed, Pending, Ready);
+ NumDispatchedOpcodes = 0;
+ NumIssuedOpcodes = 0;
for (const ResourceRef &RR : Freed)
notifyResourceAvailable(RR);
@@ -101,12 +109,53 @@ Error ExecuteStage::cycleStart() {
return S;
}
+ for (const InstRef &IR : Pending)
+ notifyInstructionPending(IR);
+
for (const InstRef &IR : Ready)
notifyInstructionReady(IR);
return issueReadyInstructions();
}
+Error ExecuteStage::cycleEnd() {
+ if (!EnablePressureEvents)
+ return ErrorSuccess();
+
+ // Always conservatively report any backpressure events if the dispatch logic
+ // was stalled due to unavailable scheduler resources.
+ if (!HWS.hadTokenStall() && NumDispatchedOpcodes <= NumIssuedOpcodes)
+ return ErrorSuccess();
+
+ SmallVector<InstRef, 8> Insts;
+ uint64_t Mask = HWS.analyzeResourcePressure(Insts);
+ if (Mask) {
+ LLVM_DEBUG(dbgs() << "[E] Backpressure increased because of unavailable "
+ "pipeline resources: "
+ << format_hex(Mask, 16) << '\n');
+ HWPressureEvent Ev(HWPressureEvent::RESOURCES, Insts, Mask);
+ notifyEvent(Ev);
+ }
+
+ SmallVector<InstRef, 8> RegDeps;
+ SmallVector<InstRef, 8> MemDeps;
+ HWS.analyzeDataDependencies(RegDeps, MemDeps);
+ if (RegDeps.size()) {
+ LLVM_DEBUG(
+ dbgs() << "[E] Backpressure increased by register dependencies\n");
+ HWPressureEvent Ev(HWPressureEvent::REGISTER_DEPS, RegDeps);
+ notifyEvent(Ev);
+ }
+
+ if (MemDeps.size()) {
+ LLVM_DEBUG(dbgs() << "[E] Backpressure increased by memory dependencies\n");
+ HWPressureEvent Ev(HWPressureEvent::MEMORY_DEPS, MemDeps);
+ notifyEvent(Ev);
+ }
+
+ return ErrorSuccess();
+}
+
#ifndef NDEBUG
static void verifyInstructionEliminated(const InstRef &IR) {
const Instruction &Inst = *IR.getInstruction();
@@ -124,6 +173,7 @@ Error ExecuteStage::handleInstructionEliminated(InstRef &IR) {
#ifndef NDEBUG
verifyInstructionEliminated(IR);
#endif
+ notifyInstructionPending(IR);
notifyInstructionReady(IR);
notifyInstructionIssued(IR, {});
IR.getInstruction()->forceExecuted();
@@ -147,10 +197,18 @@ Error ExecuteStage::execute(InstRef &IR) {
// BufferSize=0 as reserved. Resources with a buffer size of zero will only
// be released after MCIS is issued, and all the ResourceCycles for those
// units have been consumed.
- HWS.dispatch(IR);
+ bool IsReadyInstruction = HWS.dispatch(IR);
+ const Instruction &Inst = *IR.getInstruction();
+ NumDispatchedOpcodes += Inst.getDesc().NumMicroOps;
notifyReservedOrReleasedBuffers(IR, /* Reserved */ true);
- if (!HWS.isReady(IR))
+
+ if (!IsReadyInstruction) {
+ if (Inst.isPending())
+ notifyInstructionPending(IR);
return ErrorSuccess();
+ }
+
+ notifyInstructionPending(IR);
// If we did not return early, then the scheduler is ready for execution.
notifyInstructionReady(IR);
@@ -170,6 +228,12 @@ void ExecuteStage::notifyInstructionExecuted(const InstRef &IR) const {
HWInstructionEvent(HWInstructionEvent::Executed, IR));
}
+void ExecuteStage::notifyInstructionPending(const InstRef &IR) const {
+ LLVM_DEBUG(dbgs() << "[E] Instruction Pending: #" << IR << '\n');
+ notifyEvent<HWInstructionEvent>(
+ HWInstructionEvent(HWInstructionEvent::Pending, IR));
+}
+
void ExecuteStage::notifyInstructionReady(const InstRef &IR) const {
LLVM_DEBUG(dbgs() << "[E] Instruction Ready: #" << IR << '\n');
notifyEvent<HWInstructionEvent>(
@@ -189,9 +253,10 @@ void ExecuteStage::notifyInstructionIssued(
LLVM_DEBUG({
dbgs() << "[E] Instruction Issued: #" << IR << '\n';
for (const std::pair<ResourceRef, ResourceCycles> &Resource : Used) {
+ assert(Resource.second.getDenominator() == 1 && "Invalid cycles!");
dbgs() << "[E] Resource Used: [" << Resource.first.first << '.'
<< Resource.first.second << "], ";
- dbgs() << "cycles: " << Resource.second << '\n';
+ dbgs() << "cycles: " << Resource.second.getNumerator() << '\n';
}
});
diff --git a/lib/MCA/Stages/InstructionTables.cpp b/lib/MCA/Stages/InstructionTables.cpp
index f918c183aa5a..adeefb45ec2d 100644
--- a/lib/MCA/Stages/InstructionTables.cpp
+++ b/lib/MCA/Stages/InstructionTables.cpp
@@ -1,9 +1,8 @@
//===--------------------- InstructionTables.cpp ----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/lib/MCA/Stages/MicroOpQueueStage.cpp b/lib/MCA/Stages/MicroOpQueueStage.cpp
new file mode 100644
index 000000000000..cb3e4c6979a4
--- /dev/null
+++ b/lib/MCA/Stages/MicroOpQueueStage.cpp
@@ -0,0 +1,70 @@
+//===---------------------- MicroOpQueueStage.cpp ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines the MicroOpQueueStage.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MCA/Stages/MicroOpQueueStage.h"
+
+namespace llvm {
+namespace mca {
+
+#define DEBUG_TYPE "llvm-mca"
+
+Error MicroOpQueueStage::moveInstructions() {
+ InstRef IR = Buffer[CurrentInstructionSlotIdx];
+ while (IR && checkNextStage(IR)) {
+ if (llvm::Error Val = moveToTheNextStage(IR))
+ return Val;
+
+ Buffer[CurrentInstructionSlotIdx].invalidate();
+ unsigned NormalizedOpcodes = getNormalizedOpcodes(IR);
+ CurrentInstructionSlotIdx += NormalizedOpcodes;
+ CurrentInstructionSlotIdx %= Buffer.size();
+ AvailableEntries += NormalizedOpcodes;
+ IR = Buffer[CurrentInstructionSlotIdx];
+ }
+
+ return llvm::ErrorSuccess();
+}
+
+MicroOpQueueStage::MicroOpQueueStage(unsigned Size, unsigned IPC,
+ bool ZeroLatencyStage)
+ : NextAvailableSlotIdx(0), CurrentInstructionSlotIdx(0), MaxIPC(IPC),
+ CurrentIPC(0), IsZeroLatencyStage(ZeroLatencyStage) {
+ Buffer.resize(Size ? Size : 1);
+ AvailableEntries = Buffer.size();
+}
+
+Error MicroOpQueueStage::execute(InstRef &IR) {
+ Buffer[NextAvailableSlotIdx] = IR;
+ unsigned NormalizedOpcodes = getNormalizedOpcodes(IR);
+ NextAvailableSlotIdx += NormalizedOpcodes;
+ NextAvailableSlotIdx %= Buffer.size();
+ AvailableEntries -= NormalizedOpcodes;
+ ++CurrentIPC;
+ return llvm::ErrorSuccess();
+}
+
+Error MicroOpQueueStage::cycleStart() {
+ CurrentIPC = 0;
+ if (!IsZeroLatencyStage)
+ return moveInstructions();
+ return llvm::ErrorSuccess();
+}
+
+Error MicroOpQueueStage::cycleEnd() {
+ if (IsZeroLatencyStage)
+ return moveInstructions();
+ return llvm::ErrorSuccess();
+}
+
+} // namespace mca
+} // namespace llvm
diff --git a/lib/MCA/Stages/RetireStage.cpp b/lib/MCA/Stages/RetireStage.cpp
index d6bcc518662f..e1789dd7fa2a 100644
--- a/lib/MCA/Stages/RetireStage.cpp
+++ b/lib/MCA/Stages/RetireStage.cpp
@@ -1,9 +1,8 @@
//===---------------------- RetireStage.cpp ---------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/lib/MCA/Stages/Stage.cpp b/lib/MCA/Stages/Stage.cpp
index 38191645e736..ed512ac9711c 100644
--- a/lib/MCA/Stages/Stage.cpp
+++ b/lib/MCA/Stages/Stage.cpp
@@ -1,9 +1,8 @@
//===---------------------- Stage.cpp ---------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/lib/MCA/Support.cpp b/lib/MCA/Support.cpp
index 335953e10481..ce1f0f6f211b 100644
--- a/lib/MCA/Support.cpp
+++ b/lib/MCA/Support.cpp
@@ -1,9 +1,8 @@
//===--------------------- Support.cpp --------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -21,6 +20,22 @@ namespace mca {
#define DEBUG_TYPE "llvm-mca"
+ResourceCycles &ResourceCycles::operator+=(const ResourceCycles &RHS) {
+ if (Denominator == RHS.Denominator)
+ Numerator += RHS.Numerator;
+ else {
+ // Create a common denominator for LHS and RHS by calculating the least
+ // common multiple from the GCD.
+ unsigned GCD = GreatestCommonDivisor64(Denominator, RHS.Denominator);
+ unsigned LCM = (Denominator * RHS.Denominator) / GCD;
+ unsigned LHSNumerator = Numerator * (LCM / Denominator);
+ unsigned RHSNumerator = RHS.Numerator * (LCM / RHS.Denominator);
+ Numerator = LHSNumerator + RHSNumerator;
+ Denominator = LCM;
+ }
+ return *this;
+}
+
void computeProcResourceMasks(const MCSchedModel &SM,
MutableArrayRef<uint64_t> Masks) {
unsigned ProcResourceID = 0;
@@ -57,8 +72,9 @@ void computeProcResourceMasks(const MCSchedModel &SM,
<< "\n");
for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
const MCProcResourceDesc &Desc = *SM.getProcResource(I);
- LLVM_DEBUG(dbgs() << '[' << I << "] " << Desc.Name << " - " << Masks[I]
- << '\n');
+ LLVM_DEBUG(dbgs() << '[' << format_decimal(I,2) << "] " << " - "
+ << format_hex(Masks[I],16) << " - "
+ << Desc.Name << '\n');
}
#endif
}
diff --git a/lib/Object/Archive.cpp b/lib/Object/Archive.cpp
index 8ec115a5566c..49e66f46ab3f 100644
--- a/lib/Object/Archive.cpp
+++ b/lib/Object/Archive.cpp
@@ -1,9 +1,8 @@
//===- Archive.cpp - ar File Format implementation ------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -512,7 +511,7 @@ Expected<MemoryBufferRef> Archive::Child::getMemoryBufferRef() const {
StringRef Name = NameOrErr.get();
Expected<StringRef> Buf = getBuffer();
if (!Buf)
- return Buf.takeError();
+ return createFileError(Name, Buf.takeError());
return MemoryBufferRef(*Buf, Name);
}
@@ -779,19 +778,18 @@ Archive::child_iterator Archive::child_begin(Error &Err,
return child_end();
if (SkipInternal)
- return child_iterator(Child(this, FirstRegularData,
- FirstRegularStartOfFile),
- &Err);
+ return child_iterator::itr(
+ Child(this, FirstRegularData, FirstRegularStartOfFile), Err);
const char *Loc = Data.getBufferStart() + strlen(Magic);
Child C(this, Loc, &Err);
if (Err)
return child_end();
- return child_iterator(C, &Err);
+ return child_iterator::itr(C, Err);
}
Archive::child_iterator Archive::child_end() const {
- return child_iterator(Child(nullptr, nullptr, nullptr), nullptr);
+ return child_iterator::end(Child(nullptr, nullptr, nullptr));
}
StringRef Archive::Symbol::getName() const {
diff --git a/lib/Object/ArchiveWriter.cpp b/lib/Object/ArchiveWriter.cpp
index da93602cbb28..228f6b40c5ec 100644
--- a/lib/Object/ArchiveWriter.cpp
+++ b/lib/Object/ArchiveWriter.cpp
@@ -1,9 +1,8 @@
//===- ArchiveWriter.cpp - ar File Format implementation --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -49,7 +48,6 @@ NewArchiveMember::getOldMember(const object::Archive::Child &OldMember,
return BufOrErr.takeError();
NewArchiveMember M;
- assert(M.IsNew == false);
M.Buf = MemoryBuffer::getMemBuffer(*BufOrErr, false);
M.MemberName = M.Buf->getBufferIdentifier();
if (!Deterministic) {
@@ -76,10 +74,11 @@ NewArchiveMember::getOldMember(const object::Archive::Child &OldMember,
Expected<NewArchiveMember> NewArchiveMember::getFile(StringRef FileName,
bool Deterministic) {
sys::fs::file_status Status;
- int FD;
- if (auto EC = sys::fs::openFileForRead(FileName, FD))
- return errorCodeToError(EC);
- assert(FD != -1);
+ auto FDOrErr = sys::fs::openNativeFileForRead(FileName);
+ if (!FDOrErr)
+ return FDOrErr.takeError();
+ sys::fs::file_t FD = *FDOrErr;
+ assert(FD != sys::fs::kInvalidFile);
if (auto EC = sys::fs::status(FD, Status))
return errorCodeToError(EC);
@@ -95,11 +94,10 @@ Expected<NewArchiveMember> NewArchiveMember::getFile(StringRef FileName,
if (!MemberBufferOrErr)
return errorCodeToError(MemberBufferOrErr.getError());
- if (close(FD) != 0)
- return errorCodeToError(std::error_code(errno, std::generic_category()));
+ if (auto EC = sys::fs::closeFile(FD))
+ return errorCodeToError(EC);
NewArchiveMember M;
- M.IsNew = true;
M.Buf = std::move(*MemberBufferOrErr);
M.MemberName = M.Buf->getBufferIdentifier();
if (!Deterministic) {
@@ -192,35 +190,6 @@ static bool useStringTable(bool Thin, StringRef Name) {
return Thin || Name.size() >= 16 || Name.contains('/');
}
-// Compute the relative path from From to To.
-static std::string computeRelativePath(StringRef From, StringRef To) {
- if (sys::path::is_absolute(From) || sys::path::is_absolute(To))
- return To;
-
- StringRef DirFrom = sys::path::parent_path(From);
- auto FromI = sys::path::begin(DirFrom);
- auto ToI = sys::path::begin(To);
- while (*FromI == *ToI) {
- ++FromI;
- ++ToI;
- }
-
- SmallString<128> Relative;
- for (auto FromE = sys::path::end(DirFrom); FromI != FromE; ++FromI)
- sys::path::append(Relative, "..");
-
- for (auto ToE = sys::path::end(To); ToI != ToE; ++ToI)
- sys::path::append(Relative, *ToI);
-
-#ifdef _WIN32
- // Replace backslashes with slashes so that the path is portable between *nix
- // and Windows.
- std::replace(Relative.begin(), Relative.end(), '\\', '/');
-#endif
-
- return Relative.str();
-}
-
static bool is64BitKind(object::Archive::Kind Kind) {
switch (Kind) {
case object::Archive::K_GNU:
@@ -235,27 +204,11 @@ static bool is64BitKind(object::Archive::Kind Kind) {
llvm_unreachable("not supported for writting");
}
-static void addToStringTable(raw_ostream &Out, StringRef ArcName,
- const NewArchiveMember &M, bool Thin) {
- StringRef ID = M.Buf->getBufferIdentifier();
- if (Thin) {
- if (M.IsNew)
- Out << computeRelativePath(ArcName, ID);
- else
- Out << ID;
- } else
- Out << M.MemberName;
- Out << "/\n";
-}
-
-static void printMemberHeader(raw_ostream &Out, uint64_t Pos,
- raw_ostream &StringTable,
- StringMap<uint64_t> &MemberNames,
- object::Archive::Kind Kind, bool Thin,
- StringRef ArcName, const NewArchiveMember &M,
- sys::TimePoint<std::chrono::seconds> ModTime,
- unsigned Size) {
-
+static void
+printMemberHeader(raw_ostream &Out, uint64_t Pos, raw_ostream &StringTable,
+ StringMap<uint64_t> &MemberNames, object::Archive::Kind Kind,
+ bool Thin, const NewArchiveMember &M,
+ sys::TimePoint<std::chrono::seconds> ModTime, unsigned Size) {
if (isBSDLike(Kind))
return printBSDMemberHeader(Out, Pos, M.MemberName, ModTime, M.UID, M.GID,
M.Perms, Size);
@@ -266,12 +219,12 @@ static void printMemberHeader(raw_ostream &Out, uint64_t Pos,
uint64_t NamePos;
if (Thin) {
NamePos = StringTable.tell();
- addToStringTable(StringTable, ArcName, M, Thin);
+ StringTable << M.MemberName << "/\n";
} else {
auto Insertion = MemberNames.insert({M.MemberName, uint64_t(0)});
if (Insertion.second) {
Insertion.first->second = StringTable.tell();
- addToStringTable(StringTable, ArcName, M, Thin);
+ StringTable << M.MemberName << "/\n";
}
NamePos = Insertion.first->second;
}
@@ -424,8 +377,8 @@ getSymbols(MemoryBufferRef Buf, raw_ostream &SymNames, bool &HasObject) {
if (!isArchiveSymbol(S))
continue;
Ret.push_back(SymNames.tell());
- if (auto EC = S.printName(SymNames))
- return errorCodeToError(EC);
+ if (Error E = S.printName(SymNames))
+ return std::move(E);
SymNames << '\0';
}
return Ret;
@@ -433,8 +386,8 @@ getSymbols(MemoryBufferRef Buf, raw_ostream &SymNames, bool &HasObject) {
static Expected<std::vector<MemberData>>
computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames,
- object::Archive::Kind Kind, bool Thin, StringRef ArcName,
- bool Deterministic, ArrayRef<NewArchiveMember> NewMembers) {
+ object::Archive::Kind Kind, bool Thin, bool Deterministic,
+ ArrayRef<NewArchiveMember> NewMembers) {
static char PaddingData[8] = {'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'};
// This ignores the symbol table, but we only need the value mod 8 and the
@@ -521,8 +474,8 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames,
ModTime = sys::toTimePoint(FilenameCount[M.MemberName]++);
else
ModTime = M.ModTime;
- printMemberHeader(Out, Pos, StringTable, MemberNames, Kind, Thin, ArcName,
- M, ModTime, Buf.getBufferSize() + MemberPadding);
+ printMemberHeader(Out, Pos, StringTable, MemberNames, Kind, Thin, M,
+ ModTime, Buf.getBufferSize() + MemberPadding);
Out.flush();
Expected<std::vector<unsigned>> Symbols =
@@ -541,11 +494,53 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames,
return Ret;
}
-Error llvm::writeArchive(StringRef ArcName,
- ArrayRef<NewArchiveMember> NewMembers,
- bool WriteSymtab, object::Archive::Kind Kind,
- bool Deterministic, bool Thin,
- std::unique_ptr<MemoryBuffer> OldArchiveBuf) {
+namespace llvm {
+
+static ErrorOr<SmallString<128>> canonicalizePath(StringRef P) {
+ SmallString<128> Ret = P;
+ std::error_code Err = sys::fs::make_absolute(Ret);
+ if (Err)
+ return Err;
+ sys::path::remove_dots(Ret, /*removedotdot*/ true);
+ return Ret;
+}
+
+// Compute the relative path from From to To.
+Expected<std::string> computeArchiveRelativePath(StringRef From, StringRef To) {
+ ErrorOr<SmallString<128>> PathToOrErr = canonicalizePath(To);
+ ErrorOr<SmallString<128>> DirFromOrErr = canonicalizePath(From);
+ if (!PathToOrErr || !DirFromOrErr)
+ return errorCodeToError(std::error_code(errno, std::generic_category()));
+
+ const SmallString<128> &PathTo = *PathToOrErr;
+ const SmallString<128> &DirFrom = sys::path::parent_path(*DirFromOrErr);
+
+ // Can't construct a relative path between different roots
+ if (sys::path::root_name(PathTo) != sys::path::root_name(DirFrom))
+ return sys::path::convert_to_slash(PathTo);
+
+ // Skip common prefixes
+ auto FromTo =
+ std::mismatch(sys::path::begin(DirFrom), sys::path::end(DirFrom),
+ sys::path::begin(PathTo));
+ auto FromI = FromTo.first;
+ auto ToI = FromTo.second;
+
+ // Construct relative path
+ SmallString<128> Relative;
+ for (auto FromE = sys::path::end(DirFrom); FromI != FromE; ++FromI)
+ sys::path::append(Relative, sys::path::Style::posix, "..");
+
+ for (auto ToE = sys::path::end(PathTo); ToI != ToE; ++ToI)
+ sys::path::append(Relative, sys::path::Style::posix, *ToI);
+
+ return Relative.str();
+}
+
+Error writeArchive(StringRef ArcName, ArrayRef<NewArchiveMember> NewMembers,
+ bool WriteSymtab, object::Archive::Kind Kind,
+ bool Deterministic, bool Thin,
+ std::unique_ptr<MemoryBuffer> OldArchiveBuf) {
assert((!Thin || !isBSDLike(Kind)) && "Only the gnu format has a thin mode");
SmallString<0> SymNamesBuf;
@@ -554,7 +549,7 @@ Error llvm::writeArchive(StringRef ArcName,
raw_svector_ostream StringTable(StringTableBuf);
Expected<std::vector<MemberData>> DataOrErr = computeMemberData(
- StringTable, SymNames, Kind, Thin, ArcName, Deterministic, NewMembers);
+ StringTable, SymNames, Kind, Thin, Deterministic, NewMembers);
if (Error E = DataOrErr.takeError())
return E;
std::vector<MemberData> &Data = *DataOrErr;
@@ -631,3 +626,5 @@ Error llvm::writeArchive(StringRef ArcName,
return Temp->keep(ArcName);
}
+
+} // namespace llvm
diff --git a/lib/Object/Binary.cpp b/lib/Object/Binary.cpp
index fe41987f5c27..a953c1d8cb80 100644
--- a/lib/Object/Binary.cpp
+++ b/lib/Object/Binary.cpp
@@ -1,9 +1,8 @@
//===- Binary.cpp - A generic binary file ---------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -17,6 +16,7 @@
#include "llvm/Object/Archive.h"
#include "llvm/Object/Error.h"
#include "llvm/Object/MachOUniversal.h"
+#include "llvm/Object/Minidump.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Object/WindowsResource.h"
#include "llvm/Support/Error.h"
@@ -69,6 +69,8 @@ Expected<std::unique_ptr<Binary>> object::createBinary(MemoryBufferRef Buffer,
case file_magic::coff_import_library:
case file_magic::pecoff_executable:
case file_magic::bitcode:
+ case file_magic::xcoff_object_32:
+ case file_magic::xcoff_object_64:
case file_magic::wasm_object:
return ObjectFile::createSymbolicFile(Buffer, Type, Context);
case file_magic::macho_universal_binary:
@@ -82,6 +84,8 @@ Expected<std::unique_ptr<Binary>> object::createBinary(MemoryBufferRef Buffer,
case file_magic::coff_cl_gl_object:
// Unrecognized object file format.
return errorCodeToError(object_error::invalid_file_type);
+ case file_magic::minidump:
+ return MinidumpFile::create(Buffer);
}
llvm_unreachable("Unexpected Binary File Type");
}
diff --git a/lib/Object/COFFImportFile.cpp b/lib/Object/COFFImportFile.cpp
index dc11cc4bcffe..ff4a799be60c 100644
--- a/lib/Object/COFFImportFile.cpp
+++ b/lib/Object/COFFImportFile.cpp
@@ -1,9 +1,8 @@
//===- COFFImportFile.cpp - COFF short import file implementation ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -496,7 +495,7 @@ NewArchiveMember ObjectFactory::createWeakExternal(StringRef Sym,
// COFF Header
coff_file_header Header{
- u16(0),
+ u16(Machine),
u16(NumberOfSections),
u32(0),
u32(sizeof(Header) + (NumberOfSections * sizeof(coff_section))),
@@ -596,7 +595,10 @@ Error writeImportLibrary(StringRef ImportName, StringRef Path,
ImportType = IMPORT_CONST;
StringRef SymbolName = E.SymbolName.empty() ? E.Name : E.SymbolName;
- ImportNameType NameType = getNameType(SymbolName, E.Name, Machine, MinGW);
+ ImportNameType NameType = E.Noname
+ ? IMPORT_ORDINAL
+ : getNameType(SymbolName, E.Name,
+ Machine, MinGW);
Expected<std::string> Name = E.ExtName.empty()
? SymbolName
: replace(SymbolName, E.Name, E.ExtName);
diff --git a/lib/Object/COFFModuleDefinition.cpp b/lib/Object/COFFModuleDefinition.cpp
index c703071b86e0..64d4cf0efda2 100644
--- a/lib/Object/COFFModuleDefinition.cpp
+++ b/lib/Object/COFFModuleDefinition.cpp
@@ -1,9 +1,8 @@
//===--- COFFModuleDefinition.cpp - Simple DEF parser ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp
index fc1deeba339a..854664e679df 100644
--- a/lib/Object/COFFObjectFile.cpp
+++ b/lib/Object/COFFObjectFile.cpp
@@ -1,9 +1,8 @@
//===- COFFObjectFile.cpp - COFF object file implementation ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -270,10 +269,9 @@ void COFFObjectFile::moveSectionNext(DataRefImpl &Ref) const {
Ref.p = reinterpret_cast<uintptr_t>(Sec);
}
-std::error_code COFFObjectFile::getSectionName(DataRefImpl Ref,
- StringRef &Result) const {
+Expected<StringRef> COFFObjectFile::getSectionName(DataRefImpl Ref) const {
const coff_section *Sec = toSec(Ref);
- return getSectionName(Sec, Result);
+ return getSectionName(Sec);
}
uint64_t COFFObjectFile::getSectionAddress(DataRefImpl Ref) const {
@@ -294,13 +292,13 @@ uint64_t COFFObjectFile::getSectionSize(DataRefImpl Ref) const {
return getSectionSize(toSec(Ref));
}
-std::error_code COFFObjectFile::getSectionContents(DataRefImpl Ref,
- StringRef &Result) const {
+Expected<ArrayRef<uint8_t>>
+COFFObjectFile::getSectionContents(DataRefImpl Ref) const {
const coff_section *Sec = toSec(Ref);
ArrayRef<uint8_t> Res;
- std::error_code EC = getSectionContents(Sec, Res);
- Result = StringRef(reinterpret_cast<const char*>(Res.data()), Res.size());
- return EC;
+ if (Error E = getSectionContents(Sec, Res))
+ return std::move(E);
+ return Res;
}
uint64_t COFFObjectFile::getSectionAlignment(DataRefImpl Ref) const {
@@ -1075,8 +1073,8 @@ uint32_t COFFObjectFile::getSymbolIndex(COFFSymbolRef Symbol) const {
return Index;
}
-std::error_code COFFObjectFile::getSectionName(const coff_section *Sec,
- StringRef &Res) const {
+Expected<StringRef>
+COFFObjectFile::getSectionName(const coff_section *Sec) const {
StringRef Name;
if (Sec->Name[COFF::NameSize - 1] == 0)
// Null terminated, let ::strlen figure out the length.
@@ -1090,17 +1088,18 @@ std::error_code COFFObjectFile::getSectionName(const coff_section *Sec,
uint32_t Offset;
if (Name.startswith("//")) {
if (decodeBase64StringEntry(Name.substr(2), Offset))
- return object_error::parse_failed;
+ return createStringError(object_error::parse_failed,
+ "inalid section name");
} else {
if (Name.substr(1).getAsInteger(10, Offset))
- return object_error::parse_failed;
+ return createStringError(object_error::parse_failed,
+ "invalid section name");
}
if (std::error_code EC = getString(Offset, Name))
- return EC;
+ return errorCodeToError(EC);
}
- Res = Name;
- return std::error_code();
+ return Name;
}
uint64_t COFFObjectFile::getSectionSize(const coff_section *Sec) const {
@@ -1119,22 +1118,21 @@ uint64_t COFFObjectFile::getSectionSize(const coff_section *Sec) const {
return Sec->SizeOfRawData;
}
-std::error_code
-COFFObjectFile::getSectionContents(const coff_section *Sec,
- ArrayRef<uint8_t> &Res) const {
+Error COFFObjectFile::getSectionContents(const coff_section *Sec,
+ ArrayRef<uint8_t> &Res) const {
// In COFF, a virtual section won't have any in-file
// content, so the file pointer to the content will be zero.
if (Sec->PointerToRawData == 0)
- return std::error_code();
+ return Error::success();
// The only thing that we need to verify is that the contents is contained
// within the file bounds. We don't need to make sure it doesn't cover other
// data, as there's nothing that says that is not allowed.
uintptr_t ConStart = uintptr_t(base()) + Sec->PointerToRawData;
uint32_t SectionSize = getSectionSize(Sec);
if (checkOffset(Data, ConStart, SectionSize))
- return object_error::parse_failed;
+ return make_error<BinaryError>();
Res = makeArrayRef(reinterpret_cast<const uint8_t *>(ConStart), SectionSize);
- return std::error_code();
+ return Error::success();
}
const coff_relocation *COFFObjectFile::toRel(DataRefImpl Rel) const {
@@ -1237,6 +1235,7 @@ StringRef COFFObjectFile::getRelocationTypeName(uint16_t Type) const {
LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_TOKEN);
LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_BLX24);
LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_BLX11);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_REL32);
LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_SECTION);
LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_SECREL);
LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_MOV32A);
@@ -1244,6 +1243,7 @@ StringRef COFFObjectFile::getRelocationTypeName(uint16_t Type) const {
LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_BRANCH20T);
LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_BRANCH24T);
LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_BLX23T);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_PAIR);
default:
return "Unknown";
}
@@ -1267,6 +1267,7 @@ StringRef COFFObjectFile::getRelocationTypeName(uint16_t Type) const {
LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_ADDR64);
LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_BRANCH19);
LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_BRANCH14);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_REL32);
default:
return "Unknown";
}
@@ -1455,7 +1456,7 @@ std::error_code DelayImportDirectoryEntryRef::getName(StringRef &Result) const {
std::error_code DelayImportDirectoryEntryRef::
getDelayImportTable(const delay_import_directory_table_entry *&Result) const {
- Result = Table;
+ Result = &Table[Index];
return std::error_code();
}
diff --git a/lib/Object/Decompressor.cpp b/lib/Object/Decompressor.cpp
index 53f084d7620e..ec15e6f69ada 100644
--- a/lib/Object/Decompressor.cpp
+++ b/lib/Object/Decompressor.cpp
@@ -1,9 +1,8 @@
//===-- Decompressor.cpp --------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Object/ELF.cpp b/lib/Object/ELF.cpp
index cf8313f88f93..8660b1a64bdd 100644
--- a/lib/Object/ELF.cpp
+++ b/lib/Object/ELF.cpp
@@ -1,9 +1,8 @@
//===- ELF.cpp - ELF object file implementation ---------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -220,8 +219,8 @@ StringRef llvm::object::getELFSectionTypeName(uint32_t Machine, unsigned Type) {
switch (Type) {
STRINGIFY_ENUM_CASE(ELF, SHT_MIPS_REGINFO);
STRINGIFY_ENUM_CASE(ELF, SHT_MIPS_OPTIONS);
- STRINGIFY_ENUM_CASE(ELF, SHT_MIPS_ABIFLAGS);
STRINGIFY_ENUM_CASE(ELF, SHT_MIPS_DWARF);
+ STRINGIFY_ENUM_CASE(ELF, SHT_MIPS_ABIFLAGS);
}
break;
default:
@@ -254,6 +253,8 @@ StringRef llvm::object::getELFSectionTypeName(uint32_t Machine, unsigned Type) {
STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_LINKER_OPTIONS);
STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_CALL_GRAPH_PROFILE);
STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_ADDRSIG);
+ STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_DEPENDENT_LIBRARIES);
+ STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_SYMPART);
STRINGIFY_ENUM_CASE(ELF, SHT_GNU_ATTRIBUTES);
STRINGIFY_ENUM_CASE(ELF, SHT_GNU_HASH);
STRINGIFY_ENUM_CASE(ELF, SHT_GNU_verdef);
@@ -425,7 +426,7 @@ ELFFile<ELFT>::android_relas(const Elf_Shdr *Sec) const {
}
template <class ELFT>
-const char *ELFFile<ELFT>::getDynamicTagAsString(unsigned Arch,
+std::string ELFFile<ELFT>::getDynamicTagAsString(unsigned Arch,
uint64_t Type) const {
#define DYNAMIC_STRINGIFY_ENUM(tag, value) \
case value: \
@@ -433,12 +434,21 @@ const char *ELFFile<ELFT>::getDynamicTagAsString(unsigned Arch,
#define DYNAMIC_TAG(n, v)
switch (Arch) {
+ case ELF::EM_AARCH64:
+ switch (Type) {
+#define AARCH64_DYNAMIC_TAG(name, value) DYNAMIC_STRINGIFY_ENUM(name, value)
+#include "llvm/BinaryFormat/DynamicTags.def"
+#undef AARCH64_DYNAMIC_TAG
+ }
+ break;
+
case ELF::EM_HEXAGON:
switch (Type) {
#define HEXAGON_DYNAMIC_TAG(name, value) DYNAMIC_STRINGIFY_ENUM(name, value)
#include "llvm/BinaryFormat/DynamicTags.def"
#undef HEXAGON_DYNAMIC_TAG
}
+ break;
case ELF::EM_MIPS:
switch (Type) {
@@ -446,6 +456,7 @@ const char *ELFFile<ELFT>::getDynamicTagAsString(unsigned Arch,
#include "llvm/BinaryFormat/DynamicTags.def"
#undef MIPS_DYNAMIC_TAG
}
+ break;
case ELF::EM_PPC64:
switch (Type) {
@@ -453,10 +464,12 @@ const char *ELFFile<ELFT>::getDynamicTagAsString(unsigned Arch,
#include "llvm/BinaryFormat/DynamicTags.def"
#undef PPC64_DYNAMIC_TAG
}
+ break;
}
#undef DYNAMIC_TAG
switch (Type) {
// Now handle all dynamic tags except the architecture specific ones
+#define AARCH64_DYNAMIC_TAG(name, value)
#define MIPS_DYNAMIC_TAG(name, value)
#define HEXAGON_DYNAMIC_TAG(name, value)
#define PPC64_DYNAMIC_TAG(name, value)
@@ -465,18 +478,19 @@ const char *ELFFile<ELFT>::getDynamicTagAsString(unsigned Arch,
#define DYNAMIC_TAG(name, value) DYNAMIC_STRINGIFY_ENUM(name, value)
#include "llvm/BinaryFormat/DynamicTags.def"
#undef DYNAMIC_TAG
+#undef AARCH64_DYNAMIC_TAG
#undef MIPS_DYNAMIC_TAG
#undef HEXAGON_DYNAMIC_TAG
#undef PPC64_DYNAMIC_TAG
#undef DYNAMIC_TAG_MARKER
#undef DYNAMIC_STRINGIFY_ENUM
default:
- return "unknown";
+ return "<unknown:>0x" + utohexstr(Type, true);
}
}
template <class ELFT>
-const char *ELFFile<ELFT>::getDynamicTagAsString(uint64_t Type) const {
+std::string ELFFile<ELFT>::getDynamicTagAsString(uint64_t Type) const {
return getDynamicTagAsString(getHeader()->e_machine, Type);
}
@@ -523,12 +537,15 @@ Expected<typename ELFT::DynRange> ELFFile<ELFT>::dynamicEntries() const {
}
if (Dyn.empty())
+ // TODO: this error is untested.
return createError("invalid empty dynamic section");
if (DynSecSize % sizeof(Elf_Dyn) != 0)
+ // TODO: this error is untested.
return createError("malformed dynamic section");
if (Dyn.back().d_tag != ELF::DT_NULL)
+ // TODO: this error is untested.
return createError("dynamic sections must be DT_NULL terminated");
return Dyn;
@@ -553,12 +570,14 @@ Expected<const uint8_t *> ELFFile<ELFT>::toMappedAddr(uint64_t VAddr) const {
});
if (I == LoadSegments.begin())
- return createError("Virtual address is not in any segment");
+ return createError("virtual address is not in any segment: 0x" +
+ Twine::utohexstr(VAddr));
--I;
const Elf_Phdr &Phdr = **I;
uint64_t Delta = VAddr - Phdr.p_vaddr;
if (Delta >= Phdr.p_filesz)
- return createError("Virtual address is not in any segment");
+ return createError("virtual address is not in any segment: 0x" +
+ Twine::utohexstr(VAddr));
return base() + Phdr.p_offset + Delta;
}
diff --git a/lib/Object/ELFObjectFile.cpp b/lib/Object/ELFObjectFile.cpp
index 9fb3a55ac7b1..c7b715793048 100644
--- a/lib/Object/ELFObjectFile.cpp
+++ b/lib/Object/ELFObjectFile.cpp
@@ -1,9 +1,8 @@
//===- ELFObjectFile.cpp - ELF object file implementation -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -36,6 +35,16 @@
using namespace llvm;
using namespace object;
+const EnumEntry<unsigned> llvm::object::ElfSymbolTypes[NumElfSymbolTypes] = {
+ {"None", "NOTYPE", ELF::STT_NOTYPE},
+ {"Object", "OBJECT", ELF::STT_OBJECT},
+ {"Function", "FUNC", ELF::STT_FUNC},
+ {"Section", "SECTION", ELF::STT_SECTION},
+ {"File", "FILE", ELF::STT_FILE},
+ {"Common", "COMMON", ELF::STT_COMMON},
+ {"TLS", "TLS", ELF::STT_TLS},
+ {"GNU_IFunc", "IFUNC", ELF::STT_GNU_IFUNC}};
+
ELFObjectFileBase::ELFObjectFileBase(unsigned int Type, MemoryBufferRef Source)
: ObjectFile(Type, Source) {}
@@ -139,8 +148,7 @@ SubtargetFeatures ELFObjectFileBase::getMIPSFeatures() const {
SubtargetFeatures ELFObjectFileBase::getARMFeatures() const {
SubtargetFeatures Features;
ARMAttributeParser Attributes;
- std::error_code EC = getBuildAttributes(Attributes);
- if (EC)
+ if (Error E = getBuildAttributes(Attributes))
return SubtargetFeatures();
// both ARMv7-M and R have to support thumb hardware div
@@ -186,9 +194,9 @@ SubtargetFeatures ELFObjectFileBase::getARMFeatures() const {
default:
break;
case ARMBuildAttrs::Not_Allowed:
- Features.AddFeature("vfp2", false);
- Features.AddFeature("vfp3", false);
- Features.AddFeature("vfp4", false);
+ Features.AddFeature("vfp2d16sp", false);
+ Features.AddFeature("vfp3d16sp", false);
+ Features.AddFeature("vfp4d16sp", false);
break;
case ARMBuildAttrs::AllowFPv2:
Features.AddFeature("vfp2");
@@ -222,6 +230,24 @@ SubtargetFeatures ELFObjectFileBase::getARMFeatures() const {
}
}
+ if (Attributes.hasAttribute(ARMBuildAttrs::MVE_arch)) {
+ switch(Attributes.getAttributeValue(ARMBuildAttrs::MVE_arch)) {
+ default:
+ break;
+ case ARMBuildAttrs::Not_Allowed:
+ Features.AddFeature("mve", false);
+ Features.AddFeature("mve.fp", false);
+ break;
+ case ARMBuildAttrs::AllowMVEInteger:
+ Features.AddFeature("mve.fp", false);
+ Features.AddFeature("mve");
+ break;
+ case ARMBuildAttrs::AllowMVEIntegerAndFloat:
+ Features.AddFeature("mve.fp");
+ break;
+ }
+ }
+
if (Attributes.hasAttribute(ARMBuildAttrs::DIV_use)) {
switch(Attributes.getAttributeValue(ARMBuildAttrs::DIV_use)) {
default:
@@ -270,8 +296,7 @@ void ELFObjectFileBase::setARMSubArch(Triple &TheTriple) const {
return;
ARMAttributeParser Attributes;
- std::error_code EC = getBuildAttributes(Attributes);
- if (EC)
+ if (Error E = getBuildAttributes(Attributes))
return;
std::string Triple;
@@ -370,12 +395,13 @@ ELFObjectFileBase::getPltAddresses() const {
}
if (!Plt || !RelaPlt || !GotPlt)
return {};
- StringRef PltContents;
- if (Plt->getContents(PltContents))
+ Expected<StringRef> PltContents = Plt->getContents();
+ if (!PltContents) {
+ consumeError(PltContents.takeError());
return {};
- ArrayRef<uint8_t> PltBytes((const uint8_t *)PltContents.data(),
- Plt->getSize());
- auto PltEntries = MIA->findPltEntries(Plt->getAddress(), PltBytes,
+ }
+ auto PltEntries = MIA->findPltEntries(Plt->getAddress(),
+ arrayRefFromStringRef(*PltContents),
GotPlt->getAddress(), Triple);
// Build a map from GOT entry virtual address to PLT entry virtual address.
DenseMap<uint64_t, uint64_t> GotToPlt;
diff --git a/lib/Object/Error.cpp b/lib/Object/Error.cpp
index 6fa23e06c409..010c5b42dac2 100644
--- a/lib/Object/Error.cpp
+++ b/lib/Object/Error.cpp
@@ -1,9 +1,8 @@
//===- Error.cpp - system_error extensions for Object -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -79,18 +78,15 @@ const std::error_category &object::object_category() {
}
llvm::Error llvm::object::isNotObjectErrorInvalidFileType(llvm::Error Err) {
- if (auto Err2 =
- handleErrors(std::move(Err), [](std::unique_ptr<ECError> M) -> Error {
- // Try to handle 'M'. If successful, return a success value from
- // the handler.
- if (M->convertToErrorCode() == object_error::invalid_file_type)
- return Error::success();
+ return handleErrors(std::move(Err), [](std::unique_ptr<ECError> M) -> Error {
+ // Try to handle 'M'. If successful, return a success value from
+ // the handler.
+ if (M->convertToErrorCode() == object_error::invalid_file_type)
+ return Error::success();
- // We failed to handle 'M' - return it from the handler.
- // This value will be passed back from catchErrors and
- // wind up in Err2, where it will be returned from this function.
- return Error(std::move(M));
- }))
- return Err2;
- return Err;
+ // We failed to handle 'M' - return it from the handler.
+ // This value will be passed back from catchErrors and
+ // wind up in Err2, where it will be returned from this function.
+ return Error(std::move(M));
+ });
}
diff --git a/lib/Object/IRObjectFile.cpp b/lib/Object/IRObjectFile.cpp
index 1ecb26d60bce..636f1521262f 100644
--- a/lib/Object/IRObjectFile.cpp
+++ b/lib/Object/IRObjectFile.cpp
@@ -1,9 +1,8 @@
//===- IRObjectFile.cpp - IR object file implementation ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -43,10 +42,9 @@ void IRObjectFile::moveSymbolNext(DataRefImpl &Symb) const {
Symb.p += sizeof(ModuleSymbolTable::Symbol);
}
-std::error_code IRObjectFile::printSymbolName(raw_ostream &OS,
- DataRefImpl Symb) const {
+Error IRObjectFile::printSymbolName(raw_ostream &OS, DataRefImpl Symb) const {
SymTab.printSymbolName(OS, getSym(Symb));
- return std::error_code();
+ return Error::success();
}
uint32_t IRObjectFile::getSymbolFlags(DataRefImpl Symb) const {
@@ -76,10 +74,12 @@ Expected<MemoryBufferRef>
IRObjectFile::findBitcodeInObject(const ObjectFile &Obj) {
for (const SectionRef &Sec : Obj.sections()) {
if (Sec.isBitcode()) {
- StringRef SecContents;
- if (std::error_code EC = Sec.getContents(SecContents))
- return errorCodeToError(EC);
- return MemoryBufferRef(SecContents, Obj.getFileName());
+ Expected<StringRef> Contents = Sec.getContents();
+ if (!Contents)
+ return Contents.takeError();
+ if (Contents->size() <= 1)
+ return errorCodeToError(object_error::bitcode_section_not_found);
+ return MemoryBufferRef(*Contents, Obj.getFileName());
}
}
diff --git a/lib/Object/IRSymtab.cpp b/lib/Object/IRSymtab.cpp
index 344d565349c0..e4282b9d6bd3 100644
--- a/lib/Object/IRSymtab.cpp
+++ b/lib/Object/IRSymtab.cpp
@@ -1,9 +1,8 @@
//===- IRSymtab.cpp - implementation of IR symbol tables ------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -90,6 +89,8 @@ struct Builder {
std::string COFFLinkerOpts;
raw_string_ostream COFFLinkerOptsOS{COFFLinkerOpts};
+ std::vector<storage::Str> DependentLibraries;
+
void setStr(storage::Str &S, StringRef Value) {
S.Offset = StrtabBuilder.add(Value);
S.Size = Value.size();
@@ -141,6 +142,20 @@ Error Builder::addModule(Module *M) {
}
}
+ if (TT.isOSBinFormatELF()) {
+ if (auto E = M->materializeMetadata())
+ return E;
+ if (NamedMDNode *N = M->getNamedMetadata("llvm.dependent-libraries")) {
+ for (MDNode *MDOptions : N->operands()) {
+ const auto OperandStr =
+ cast<MDString>(cast<MDNode>(MDOptions)->getOperand(0))->getString();
+ storage::Str Specifier;
+ setStr(Specifier, OperandStr);
+ DependentLibraries.emplace_back(Specifier);
+ }
+ }
+ }
+
for (ModuleSymbolTable::Symbol Msym : Msymtab.symbols())
if (Error Err = addSymbol(Msymtab, Used, Msym))
return Err;
@@ -313,7 +328,7 @@ Error Builder::build(ArrayRef<Module *> IRMods) {
writeRange(Hdr.Comdats, Comdats);
writeRange(Hdr.Symbols, Syms);
writeRange(Hdr.Uncommons, Uncommons);
-
+ writeRange(Hdr.DependentLibraries, DependentLibraries);
*reinterpret_cast<storage::Header *>(Symtab.data()) = Hdr;
return Error::success();
}
diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp
index ce4d1cf92e20..5aec844003c0 100644
--- a/lib/Object/MachOObjectFile.cpp
+++ b/lib/Object/MachOObjectFile.cpp
@@ -1,9 +1,8 @@
//===- MachOObjectFile.cpp - Mach-O object file binding -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -58,6 +57,12 @@ namespace {
} // end anonymous namespace
+static const std::array<StringRef, 17> validArchs = {
+ "i386", "x86_64", "x86_64h", "armv4t", "arm", "armv5e",
+ "armv6", "armv6m", "armv7", "armv7em", "armv7k", "armv7m",
+ "armv7s", "arm64", "arm64_32", "ppc", "ppc64",
+};
+
static Error malformedError(const Twine &Msg) {
return make_error<GenericBinaryError>("truncated or malformed object (" +
Msg + ")",
@@ -292,7 +297,10 @@ static Error parseSegmentLoadCommand(
for (unsigned J = 0; J < S.nsects; ++J) {
const char *Sec = getSectionPtr(Obj, Load, J);
Sections.push_back(Sec);
- Section s = getStruct<Section>(Obj, Sec);
+ auto SectionOrErr = getStructOrErr<Section>(Obj, Sec);
+ if (!SectionOrErr)
+ return SectionOrErr.takeError();
+ Section s = SectionOrErr.get();
if (Obj.getHeader().filetype != MachO::MH_DYLIB_STUB &&
Obj.getHeader().filetype != MachO::MH_DSYM &&
s.flags != MachO::S_ZEROFILL &&
@@ -402,8 +410,10 @@ static Error checkSymtabCommand(const MachOObjectFile &Obj,
" LC_SYMTAB cmdsize too small");
if (*SymtabLoadCmd != nullptr)
return malformedError("more than one LC_SYMTAB command");
- MachO::symtab_command Symtab =
- getStruct<MachO::symtab_command>(Obj, Load.Ptr);
+ auto SymtabOrErr = getStructOrErr<MachO::symtab_command>(Obj, Load.Ptr);
+ if (!SymtabOrErr)
+ return SymtabOrErr.takeError();
+ MachO::symtab_command Symtab = SymtabOrErr.get();
if (Symtab.cmdsize != sizeof(MachO::symtab_command))
return malformedError("LC_SYMTAB command " + Twine(LoadCommandIndex) +
" has incorrect cmdsize");
@@ -458,8 +468,11 @@ static Error checkDysymtabCommand(const MachOObjectFile &Obj,
" LC_DYSYMTAB cmdsize too small");
if (*DysymtabLoadCmd != nullptr)
return malformedError("more than one LC_DYSYMTAB command");
- MachO::dysymtab_command Dysymtab =
- getStruct<MachO::dysymtab_command>(Obj, Load.Ptr);
+ auto DysymtabOrErr =
+ getStructOrErr<MachO::dysymtab_command>(Obj, Load.Ptr);
+ if (!DysymtabOrErr)
+ return DysymtabOrErr.takeError();
+ MachO::dysymtab_command Dysymtab = DysymtabOrErr.get();
if (Dysymtab.cmdsize != sizeof(MachO::dysymtab_command))
return malformedError("LC_DYSYMTAB command " + Twine(LoadCommandIndex) +
" has incorrect cmdsize");
@@ -589,8 +602,11 @@ static Error checkLinkeditDataCommand(const MachOObjectFile &Obj,
CmdName + " cmdsize too small");
if (*LoadCmd != nullptr)
return malformedError("more than one " + Twine(CmdName) + " command");
- MachO::linkedit_data_command LinkData =
- getStruct<MachO::linkedit_data_command>(Obj, Load.Ptr);
+ auto LinkDataOrError =
+ getStructOrErr<MachO::linkedit_data_command>(Obj, Load.Ptr);
+ if (!LinkDataOrError)
+ return LinkDataOrError.takeError();
+ MachO::linkedit_data_command LinkData = LinkDataOrError.get();
if (LinkData.cmdsize != sizeof(MachO::linkedit_data_command))
return malformedError(Twine(CmdName) + " command " +
Twine(LoadCommandIndex) + " has incorrect cmdsize");
@@ -624,8 +640,11 @@ static Error checkDyldInfoCommand(const MachOObjectFile &Obj,
if (*LoadCmd != nullptr)
return malformedError("more than one LC_DYLD_INFO and or LC_DYLD_INFO_ONLY "
"command");
- MachO::dyld_info_command DyldInfo =
- getStruct<MachO::dyld_info_command>(Obj, Load.Ptr);
+ auto DyldInfoOrErr =
+ getStructOrErr<MachO::dyld_info_command>(Obj, Load.Ptr);
+ if (!DyldInfoOrErr)
+ return DyldInfoOrErr.takeError();
+ MachO::dyld_info_command DyldInfo = DyldInfoOrErr.get();
if (DyldInfo.cmdsize != sizeof(MachO::dyld_info_command))
return malformedError(Twine(CmdName) + " command " +
Twine(LoadCommandIndex) + " has incorrect cmdsize");
@@ -715,7 +734,10 @@ static Error checkDylibCommand(const MachOObjectFile &Obj,
if (Load.C.cmdsize < sizeof(MachO::dylib_command))
return malformedError("load command " + Twine(LoadCommandIndex) + " " +
CmdName + " cmdsize too small");
- MachO::dylib_command D = getStruct<MachO::dylib_command>(Obj, Load.Ptr);
+ auto CommandOrErr = getStructOrErr<MachO::dylib_command>(Obj, Load.Ptr);
+ if (!CommandOrErr)
+ return CommandOrErr.takeError();
+ MachO::dylib_command D = CommandOrErr.get();
if (D.dylib.name < sizeof(MachO::dylib_command))
return malformedError("load command " + Twine(LoadCommandIndex) + " " +
CmdName + " name.offset field too small, not past "
@@ -761,7 +783,10 @@ static Error checkDyldCommand(const MachOObjectFile &Obj,
if (Load.C.cmdsize < sizeof(MachO::dylinker_command))
return malformedError("load command " + Twine(LoadCommandIndex) + " " +
CmdName + " cmdsize too small");
- MachO::dylinker_command D = getStruct<MachO::dylinker_command>(Obj, Load.Ptr);
+ auto CommandOrErr = getStructOrErr<MachO::dylinker_command>(Obj, Load.Ptr);
+ if (!CommandOrErr)
+ return CommandOrErr.takeError();
+ MachO::dylinker_command D = CommandOrErr.get();
if (D.name < sizeof(MachO::dylinker_command))
return malformedError("load command " + Twine(LoadCommandIndex) + " " +
CmdName + " name.offset field too small, not past "
@@ -806,7 +831,10 @@ static Error checkNoteCommand(const MachOObjectFile &Obj,
if (Load.C.cmdsize != sizeof(MachO::note_command))
return malformedError("load command " + Twine(LoadCommandIndex) +
" LC_NOTE has incorrect cmdsize");
- MachO::note_command Nt = getStruct<MachO::note_command>(Obj, Load.Ptr);
+ auto NoteCmdOrErr = getStructOrErr<MachO::note_command>(Obj, Load.Ptr);
+ if (!NoteCmdOrErr)
+ return NoteCmdOrErr.takeError();
+ MachO::note_command Nt = NoteCmdOrErr.get();
uint64_t FileSize = Obj.getData().size();
if (Nt.offset > FileSize)
return malformedError("offset field of LC_NOTE command " +
@@ -829,8 +857,11 @@ parseBuildVersionCommand(const MachOObjectFile &Obj,
const MachOObjectFile::LoadCommandInfo &Load,
SmallVectorImpl<const char*> &BuildTools,
uint32_t LoadCommandIndex) {
- MachO::build_version_command BVC =
- getStruct<MachO::build_version_command>(Obj, Load.Ptr);
+ auto BVCOrErr =
+ getStructOrErr<MachO::build_version_command>(Obj, Load.Ptr);
+ if (!BVCOrErr)
+ return BVCOrErr.takeError();
+ MachO::build_version_command BVC = BVCOrErr.get();
if (Load.C.cmdsize !=
sizeof(MachO::build_version_command) +
BVC.ntools * sizeof(MachO::build_tool_version))
@@ -851,7 +882,10 @@ static Error checkRpathCommand(const MachOObjectFile &Obj,
if (Load.C.cmdsize < sizeof(MachO::rpath_command))
return malformedError("load command " + Twine(LoadCommandIndex) +
" LC_RPATH cmdsize too small");
- MachO::rpath_command R = getStruct<MachO::rpath_command>(Obj, Load.Ptr);
+ auto ROrErr = getStructOrErr<MachO::rpath_command>(Obj, Load.Ptr);
+ if (!ROrErr)
+ return ROrErr.takeError();
+ MachO::rpath_command R = ROrErr.get();
if (R.path < sizeof(MachO::rpath_command))
return malformedError("load command " + Twine(LoadCommandIndex) +
" LC_RPATH path.offset field too small, not past "
@@ -904,8 +938,11 @@ static Error checkLinkerOptCommand(const MachOObjectFile &Obj,
if (Load.C.cmdsize < sizeof(MachO::linker_option_command))
return malformedError("load command " + Twine(LoadCommandIndex) +
" LC_LINKER_OPTION cmdsize too small");
- MachO::linker_option_command L =
- getStruct<MachO::linker_option_command>(Obj, Load.Ptr);
+ auto LinkOptionOrErr =
+ getStructOrErr<MachO::linker_option_command>(Obj, Load.Ptr);
+ if (!LinkOptionOrErr)
+ return LinkOptionOrErr.takeError();
+ MachO::linker_option_command L = LinkOptionOrErr.get();
// Make sure the count of strings is correct.
const char *string = (const char *)Load.Ptr +
sizeof(struct MachO::linker_option_command);
@@ -919,6 +956,10 @@ static Error checkLinkerOptCommand(const MachOObjectFile &Obj,
if (left > 0) {
i++;
uint32_t NullPos = StringRef(string, left).find('\0');
+ if (0xffffffff == NullPos)
+ return malformedError("load command " + Twine(LoadCommandIndex) +
+ " LC_LINKER_OPTION string #" + Twine(i) +
+ " is not NULL terminated");
uint32_t len = std::min(NullPos, left) + 1;
string += len;
left -= len;
@@ -965,8 +1006,11 @@ static Error checkThreadCommand(const MachOObjectFile &Obj,
if (Load.C.cmdsize < sizeof(MachO::thread_command))
return malformedError("load command " + Twine(LoadCommandIndex) +
CmdName + " cmdsize too small");
- MachO::thread_command T =
- getStruct<MachO::thread_command>(Obj, Load.Ptr);
+ auto ThreadCommandOrErr =
+ getStructOrErr<MachO::thread_command>(Obj, Load.Ptr);
+ if (!ThreadCommandOrErr)
+ return ThreadCommandOrErr.takeError();
+ MachO::thread_command T = ThreadCommandOrErr.get();
const char *state = Load.Ptr + sizeof(MachO::thread_command);
const char *end = Load.Ptr + T.cmdsize;
uint32_t nflavor = 0;
@@ -1097,7 +1141,8 @@ static Error checkThreadCommand(const MachOObjectFile &Obj,
"flavor number " + Twine(nflavor) + " in " +
CmdName + " command");
}
- } else if (cputype == MachO::CPU_TYPE_ARM64) {
+ } else if (cputype == MachO::CPU_TYPE_ARM64 ||
+ cputype == MachO::CPU_TYPE_ARM64_32) {
if (flavor == MachO::ARM_THREAD_STATE64) {
if (count != MachO::ARM_THREAD_STATE64_COUNT)
return malformedError("load command " + Twine(LoadCommandIndex) +
@@ -1156,8 +1201,10 @@ static Error checkTwoLevelHintsCommand(const MachOObjectFile &Obj,
" LC_TWOLEVEL_HINTS has incorrect cmdsize");
if (*LoadCmd != nullptr)
return malformedError("more than one LC_TWOLEVEL_HINTS command");
- MachO::twolevel_hints_command Hints =
- getStruct<MachO::twolevel_hints_command>(Obj, Load.Ptr);
+ auto HintsOrErr = getStructOrErr<MachO::twolevel_hints_command>(Obj, Load.Ptr);
+ if(!HintsOrErr)
+ return HintsOrErr.takeError();
+ MachO::twolevel_hints_command Hints = HintsOrErr.get();
uint64_t FileSize = Obj.getData().size();
if (Hints.offset > FileSize)
return malformedError("offset field of LC_TWOLEVEL_HINTS command " +
@@ -1658,36 +1705,35 @@ Error MachOObjectFile::checkSymbolTable() const {
} else {
MachO::nlist STE = getSymbolTableEntry(SymDRI);
NType = STE.n_type;
- NType = STE.n_type;
NSect = STE.n_sect;
NDesc = STE.n_desc;
NStrx = STE.n_strx;
NValue = STE.n_value;
}
- if ((NType & MachO::N_STAB) == 0 &&
- (NType & MachO::N_TYPE) == MachO::N_SECT) {
- if (NSect == 0 || NSect > Sections.size())
- return malformedError("bad section index: " + Twine((int)NSect) +
- " for symbol at index " + Twine(SymbolIndex));
- }
- if ((NType & MachO::N_STAB) == 0 &&
- (NType & MachO::N_TYPE) == MachO::N_INDR) {
- if (NValue >= S.strsize)
- return malformedError("bad n_value: " + Twine((int)NValue) + " past "
- "the end of string table, for N_INDR symbol at "
- "index " + Twine(SymbolIndex));
- }
- if ((Flags & MachO::MH_TWOLEVEL) == MachO::MH_TWOLEVEL &&
- (((NType & MachO::N_TYPE) == MachO::N_UNDF && NValue == 0) ||
- (NType & MachO::N_TYPE) == MachO::N_PBUD)) {
- uint32_t LibraryOrdinal = MachO::GET_LIBRARY_ORDINAL(NDesc);
- if (LibraryOrdinal != 0 &&
- LibraryOrdinal != MachO::EXECUTABLE_ORDINAL &&
- LibraryOrdinal != MachO::DYNAMIC_LOOKUP_ORDINAL &&
- LibraryOrdinal - 1 >= Libraries.size() ) {
- return malformedError("bad library ordinal: " + Twine(LibraryOrdinal) +
- " for symbol at index " + Twine(SymbolIndex));
+ if ((NType & MachO::N_STAB) == 0) {
+ if ((NType & MachO::N_TYPE) == MachO::N_SECT) {
+ if (NSect == 0 || NSect > Sections.size())
+ return malformedError("bad section index: " + Twine((int)NSect) +
+ " for symbol at index " + Twine(SymbolIndex));
+ }
+ if ((NType & MachO::N_TYPE) == MachO::N_INDR) {
+ if (NValue >= S.strsize)
+ return malformedError("bad n_value: " + Twine((int)NValue) + " past "
+ "the end of string table, for N_INDR symbol at "
+ "index " + Twine(SymbolIndex));
}
+ if ((Flags & MachO::MH_TWOLEVEL) == MachO::MH_TWOLEVEL &&
+ (((NType & MachO::N_TYPE) == MachO::N_UNDF && NValue == 0) ||
+ (NType & MachO::N_TYPE) == MachO::N_PBUD)) {
+ uint32_t LibraryOrdinal = MachO::GET_LIBRARY_ORDINAL(NDesc);
+ if (LibraryOrdinal != 0 &&
+ LibraryOrdinal != MachO::EXECUTABLE_ORDINAL &&
+ LibraryOrdinal != MachO::DYNAMIC_LOOKUP_ORDINAL &&
+ LibraryOrdinal - 1 >= Libraries.size() ) {
+ return malformedError("bad library ordinal: " + Twine(LibraryOrdinal) +
+ " for symbol at index " + Twine(SymbolIndex));
+ }
+ }
}
if (NStrx >= S.strsize)
return malformedError("bad string table index: " + Twine((int)NStrx) +
@@ -1861,11 +1907,9 @@ void MachOObjectFile::moveSectionNext(DataRefImpl &Sec) const {
Sec.d.a++;
}
-std::error_code MachOObjectFile::getSectionName(DataRefImpl Sec,
- StringRef &Result) const {
+Expected<StringRef> MachOObjectFile::getSectionName(DataRefImpl Sec) const {
ArrayRef<char> Raw = getSectionRawName(Sec);
- Result = parseSegmentOrSectionName(Raw.data());
- return std::error_code();
+ return parseSegmentOrSectionName(Raw.data());
}
uint64_t MachOObjectFile::getSectionAddress(DataRefImpl Sec) const {
@@ -1907,8 +1951,8 @@ uint64_t MachOObjectFile::getSectionSize(DataRefImpl Sec) const {
return SectSize;
}
-std::error_code MachOObjectFile::getSectionContents(DataRefImpl Sec,
- StringRef &Res) const {
+Expected<ArrayRef<uint8_t>>
+MachOObjectFile::getSectionContents(DataRefImpl Sec) const {
uint32_t Offset;
uint64_t Size;
@@ -1922,8 +1966,7 @@ std::error_code MachOObjectFile::getSectionContents(DataRefImpl Sec,
Size = Sect.size;
}
- Res = this->getData().substr(Offset, Size);
- return std::error_code();
+ return arrayRefFromStringRef(getData().substr(Offset, Size));
}
uint64_t MachOObjectFile::getSectionAlignment(DataRefImpl Sec) const {
@@ -1998,9 +2041,8 @@ bool MachOObjectFile::isSectionVirtual(DataRefImpl Sec) const {
bool MachOObjectFile::isSectionBitcode(DataRefImpl Sec) const {
StringRef SegmentName = getSectionFinalSegmentName(Sec);
- StringRef SectName;
- if (!getSectionName(Sec, SectName))
- return (SegmentName == "__LLVM" && SectName == "__bitcode");
+ if (Expected<StringRef> NameOrErr = getSectionName(Sec))
+ return (SegmentName == "__LLVM" && *NameOrErr == "__bitcode");
return false;
}
@@ -2172,7 +2214,8 @@ void MachOObjectFile::getRelocationTypeName(
res = Table[RType];
break;
}
- case Triple::aarch64: {
+ case Triple::aarch64:
+ case Triple::aarch64_32: {
static const char *const Table[] = {
"ARM64_RELOC_UNSIGNED", "ARM64_RELOC_SUBTRACTOR",
"ARM64_RELOC_BRANCH26", "ARM64_RELOC_PAGE21",
@@ -2242,9 +2285,18 @@ uint8_t MachOObjectFile::getRelocationLength(DataRefImpl Rel) const {
// one of the two following forms:
// libFoo.A.dylib
// libFoo.dylib
+//
// The library may have a suffix trailing the name Foo of the form:
// libFoo_profile.A.dylib
// libFoo_profile.dylib
+// These dyld image suffixes are separated from the short name by a '_'
+// character. Because the '_' character is commonly used to separate words in
+// filenames guessLibraryShortName() cannot reliably separate a dylib's short
+// name from an arbitrary image suffix; imagine if both the short name and the
+// suffix contains an '_' character! To better deal with this ambiguity,
+// guessLibraryShortName() will recognize only "_debug" and "_profile" as valid
+// Suffix values. Calling code needs to be tolerant of guessLibraryShortName()
+// guessing incorrectly.
//
// The Name of the dynamic library is also recognized as a library name if it
// has the following form:
@@ -2252,7 +2304,6 @@ uint8_t MachOObjectFile::getRelocationLength(DataRefImpl Rel) const {
//
// If the Name of the dynamic library is none of the forms above then a NULL
// StringRef is returned.
-//
StringRef MachOObjectFile::guessLibraryShortName(StringRef Name,
bool &isFramework,
StringRef &Suffix) {
@@ -2272,7 +2323,10 @@ StringRef MachOObjectFile::guessLibraryShortName(StringRef Name,
Idx = Foo.rfind('_');
if (Idx != Foo.npos && Foo.size() >= 2) {
Suffix = Foo.slice(Idx, Foo.npos);
- Foo = Foo.slice(0, Idx);
+ if (Suffix != "_debug" && Suffix != "_profile")
+ Suffix = StringRef();
+ else
+ Foo = Foo.slice(0, Idx);
}
// First look for the form Foo.framework/Foo
@@ -2333,10 +2387,14 @@ guess_library:
else
b = b+1;
// ignore any suffix after an underbar like Foo_profile.A.dylib
- Idx = Name.find('_', b);
+ Idx = Name.rfind('_');
if (Idx != Name.npos && Idx != b) {
Lib = Name.slice(b, Idx);
Suffix = Name.slice(Idx, a);
+ if (Suffix != "_debug" && Suffix != "_profile") {
+ Suffix = StringRef();
+ Lib = Name.slice(b, a);
+ }
}
else
Lib = Name.slice(b, a);
@@ -2381,8 +2439,11 @@ std::error_code MachOObjectFile::getLibraryShortNameByIndex(unsigned Index,
// all the Libraries.
if (LibrariesShortNames.size() == 0) {
for (unsigned i = 0; i < Libraries.size(); i++) {
- MachO::dylib_command D =
- getStruct<MachO::dylib_command>(*this, Libraries[i]);
+ auto CommandOrErr =
+ getStructOrErr<MachO::dylib_command>(*this, Libraries[i]);
+ if (!CommandOrErr)
+ return object_error::parse_failed;
+ MachO::dylib_command D = CommandOrErr.get();
if (D.dylib.name >= D.cmdsize)
return object_error::parse_failed;
const char *P = (const char *)(Libraries[i]) + D.dylib.name;
@@ -2485,6 +2546,8 @@ StringRef MachOObjectFile::getFileFormatName() const {
return "Mach-O 32-bit i386";
case MachO::CPU_TYPE_ARM:
return "Mach-O arm";
+ case MachO::CPU_TYPE_ARM64_32:
+ return "Mach-O arm64 (ILP32)";
case MachO::CPU_TYPE_POWERPC:
return "Mach-O 32-bit ppc";
default:
@@ -2514,6 +2577,8 @@ Triple::ArchType MachOObjectFile::getArch(uint32_t CPUType) {
return Triple::arm;
case MachO::CPU_TYPE_ARM64:
return Triple::aarch64;
+ case MachO::CPU_TYPE_ARM64_32:
+ return Triple::aarch64_32;
case MachO::CPU_TYPE_POWERPC:
return Triple::ppc;
case MachO::CPU_TYPE_POWERPC64:
@@ -2620,6 +2685,17 @@ Triple MachOObjectFile::getArchTriple(uint32_t CPUType, uint32_t CPUSubType,
default:
return Triple();
}
+ case MachO::CPU_TYPE_ARM64_32:
+ switch (CPUSubType & ~MachO::CPU_SUBTYPE_MASK) {
+ case MachO::CPU_SUBTYPE_ARM64_32_V8:
+ if (McpuDefault)
+ *McpuDefault = "cyclone";
+ if (ArchFlag)
+ *ArchFlag = "arm64_32";
+ return Triple("arm64_32-apple-darwin");
+ default:
+ return Triple();
+ }
case MachO::CPU_TYPE_POWERPC:
switch (CPUSubType & ~MachO::CPU_SUBTYPE_MASK) {
case MachO::CPU_SUBTYPE_POWERPC_ALL:
@@ -2648,26 +2724,12 @@ Triple MachOObjectFile::getHostArch() {
}
bool MachOObjectFile::isValidArch(StringRef ArchFlag) {
- return StringSwitch<bool>(ArchFlag)
- .Case("i386", true)
- .Case("x86_64", true)
- .Case("x86_64h", true)
- .Case("armv4t", true)
- .Case("arm", true)
- .Case("armv5e", true)
- .Case("armv6", true)
- .Case("armv6m", true)
- .Case("armv7", true)
- .Case("armv7em", true)
- .Case("armv7k", true)
- .Case("armv7m", true)
- .Case("armv7s", true)
- .Case("arm64", true)
- .Case("ppc", true)
- .Case("ppc64", true)
- .Default(false);
+ return std::find(validArchs.cbegin(), validArchs.cend(), ArchFlag) !=
+ validArchs.cend();
}
+ArrayRef<StringRef> MachOObjectFile::getValidArchs() { return validArchs; }
+
Triple::ArchType MachOObjectFile::getArch() const {
return getArch(getCPUType(*this));
}
@@ -3102,8 +3164,8 @@ void MachORebaseEntry::moveNext() {
moveToEnd();
return;
}
- error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset,
- true);
+ error = O->RebaseEntryCheckSegAndOffsets(SegmentIndex, SegmentOffset,
+ PointerSize);
if (error) {
*E = malformedError("for REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB " +
Twine(error) + " for opcode at: 0x" +
@@ -3127,8 +3189,8 @@ void MachORebaseEntry::moveNext() {
moveToEnd();
return;
}
- error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset,
- true);
+ error = O->RebaseEntryCheckSegAndOffsets(SegmentIndex, SegmentOffset,
+ PointerSize);
if (error) {
*E = malformedError("for REBASE_OPCODE_ADD_ADDR_ULEB " + Twine(error) +
" for opcode at: 0x" +
@@ -3142,8 +3204,8 @@ void MachORebaseEntry::moveNext() {
SegmentOffset) << "\n");
break;
case MachO::REBASE_OPCODE_ADD_ADDR_IMM_SCALED:
- error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset,
- true);
+ error = O->RebaseEntryCheckSegAndOffsets(SegmentIndex, SegmentOffset,
+ PointerSize);
if (error) {
*E = malformedError("for REBASE_OPCODE_ADD_ADDR_IMM_SCALED " +
Twine(error) + " for opcode at: 0x" +
@@ -3152,8 +3214,8 @@ void MachORebaseEntry::moveNext() {
return;
}
SegmentOffset += ImmValue * PointerSize;
- error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset,
- false);
+ error = O->RebaseEntryCheckSegAndOffsets(SegmentIndex, SegmentOffset,
+ PointerSize);
if (error) {
*E =
malformedError("for REBASE_OPCODE_ADD_ADDR_IMM_SCALED "
@@ -3169,15 +3231,6 @@ void MachORebaseEntry::moveNext() {
SegmentOffset) << "\n");
break;
case MachO::REBASE_OPCODE_DO_REBASE_IMM_TIMES:
- error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset,
- true);
- if (error) {
- *E = malformedError("for REBASE_OPCODE_DO_REBASE_IMM_TIMES " +
- Twine(error) + " for opcode at: 0x" +
- Twine::utohexstr(OpcodeStart - Opcodes.begin()));
- moveToEnd();
- return;
- }
AdvanceAmount = PointerSize;
Skip = 0;
Count = ImmValue;
@@ -3185,8 +3238,8 @@ void MachORebaseEntry::moveNext() {
RemainingLoopCount = ImmValue - 1;
else
RemainingLoopCount = 0;
- error = O->RebaseEntryCheckCountAndSkip(Count, Skip, PointerSize,
- SegmentIndex, SegmentOffset);
+ error = O->RebaseEntryCheckSegAndOffsets(SegmentIndex, SegmentOffset,
+ PointerSize, Count, Skip);
if (error) {
*E = malformedError("for REBASE_OPCODE_DO_REBASE_IMM_TIMES " +
Twine(error) + " for opcode at: 0x" +
@@ -3203,15 +3256,6 @@ void MachORebaseEntry::moveNext() {
<< "\n");
return;
case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES:
- error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset,
- true);
- if (error) {
- *E = malformedError("for REBASE_OPCODE_DO_REBASE_ULEB_TIMES " +
- Twine(error) + " for opcode at: 0x" +
- Twine::utohexstr(OpcodeStart - Opcodes.begin()));
- moveToEnd();
- return;
- }
AdvanceAmount = PointerSize;
Skip = 0;
Count = readULEB128(&error);
@@ -3226,8 +3270,8 @@ void MachORebaseEntry::moveNext() {
RemainingLoopCount = Count - 1;
else
RemainingLoopCount = 0;
- error = O->RebaseEntryCheckCountAndSkip(Count, Skip, PointerSize,
- SegmentIndex, SegmentOffset);
+ error = O->RebaseEntryCheckSegAndOffsets(SegmentIndex, SegmentOffset,
+ PointerSize, Count, Skip);
if (error) {
*E = malformedError("for REBASE_OPCODE_DO_REBASE_ULEB_TIMES " +
Twine(error) + " for opcode at: 0x" +
@@ -3244,15 +3288,6 @@ void MachORebaseEntry::moveNext() {
<< "\n");
return;
case MachO::REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB:
- error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset,
- true);
- if (error) {
- *E = malformedError("for REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB " +
- Twine(error) + " for opcode at: 0x" +
- Twine::utohexstr(OpcodeStart - Opcodes.begin()));
- moveToEnd();
- return;
- }
Skip = readULEB128(&error);
if (error) {
*E = malformedError("for REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB " +
@@ -3264,8 +3299,8 @@ void MachORebaseEntry::moveNext() {
AdvanceAmount = Skip + PointerSize;
Count = 1;
RemainingLoopCount = 0;
- error = O->RebaseEntryCheckCountAndSkip(Count, Skip, PointerSize,
- SegmentIndex, SegmentOffset);
+ error = O->RebaseEntryCheckSegAndOffsets(SegmentIndex, SegmentOffset,
+ PointerSize, Count, Skip);
if (error) {
*E = malformedError("for REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB " +
Twine(error) + " for opcode at: 0x" +
@@ -3282,16 +3317,6 @@ void MachORebaseEntry::moveNext() {
<< "\n");
return;
case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB:
- error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset,
- true);
- if (error) {
- *E = malformedError("for REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_"
- "ULEB " +
- Twine(error) + " for opcode at: 0x" +
- Twine::utohexstr(OpcodeStart - Opcodes.begin()));
- moveToEnd();
- return;
- }
Count = readULEB128(&error);
if (error) {
*E = malformedError("for REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_"
@@ -3316,8 +3341,8 @@ void MachORebaseEntry::moveNext() {
}
AdvanceAmount = Skip + PointerSize;
- error = O->RebaseEntryCheckCountAndSkip(Count, Skip, PointerSize,
- SegmentIndex, SegmentOffset);
+ error = O->RebaseEntryCheckSegAndOffsets(SegmentIndex, SegmentOffset,
+ PointerSize, Count, Skip);
if (error) {
*E = malformedError("for REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_"
"ULEB " +
@@ -3624,7 +3649,8 @@ void MachOBindEntry::moveNext() {
moveToEnd();
return;
}
- error = O->BindEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, true);
+ error = O->BindEntryCheckSegAndOffsets(SegmentIndex, SegmentOffset,
+ PointerSize);
if (error) {
*E = malformedError("for BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB " +
Twine(error) + " for opcode at: 0x" +
@@ -3648,7 +3674,8 @@ void MachOBindEntry::moveNext() {
moveToEnd();
return;
}
- error = O->BindEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, true);
+ error = O->BindEntryCheckSegAndOffsets(SegmentIndex, SegmentOffset,
+ PointerSize);
if (error) {
*E = malformedError("for BIND_OPCODE_ADD_ADDR_ULEB " + Twine(error) +
" for opcode at: 0x" +
@@ -3664,7 +3691,8 @@ void MachOBindEntry::moveNext() {
case MachO::BIND_OPCODE_DO_BIND:
AdvanceAmount = PointerSize;
RemainingLoopCount = 0;
- error = O->BindEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, true);
+ error = O->BindEntryCheckSegAndOffsets(SegmentIndex, SegmentOffset,
+ PointerSize);
if (error) {
*E = malformedError("for BIND_OPCODE_DO_BIND " + Twine(error) +
" for opcode at: 0x" +
@@ -3701,7 +3729,8 @@ void MachOBindEntry::moveNext() {
moveToEnd();
return;
}
- error = O->BindEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, true);
+ error = O->BindEntryCheckSegAndOffsets(SegmentIndex, SegmentOffset,
+ PointerSize);
if (error) {
*E = malformedError("for BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB " +
Twine(error) + " for opcode at: 0x" +
@@ -3737,8 +3766,8 @@ void MachOBindEntry::moveNext() {
// Note, this is not really an error until the next bind but make no sense
// for a BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB to not be followed by another
// bind operation.
- error = O->BindEntryCheckSegAndOffset(SegmentIndex, SegmentOffset +
- AdvanceAmount, false);
+ error = O->BindEntryCheckSegAndOffsets(SegmentIndex, SegmentOffset +
+ AdvanceAmount, PointerSize);
if (error) {
*E = malformedError("for BIND_OPCODE_ADD_ADDR_ULEB (after adding "
"ULEB) " +
@@ -3764,7 +3793,8 @@ void MachOBindEntry::moveNext() {
moveToEnd();
return;
}
- error = O->BindEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, true);
+ error = O->BindEntryCheckSegAndOffsets(SegmentIndex, SegmentOffset,
+ PointerSize);
if (error) {
*E = malformedError("for BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED " +
Twine(error) + " for opcode at: 0x" +
@@ -3792,8 +3822,8 @@ void MachOBindEntry::moveNext() {
}
AdvanceAmount = ImmValue * PointerSize + PointerSize;
RemainingLoopCount = 0;
- error = O->BindEntryCheckSegAndOffset(SegmentIndex, SegmentOffset +
- AdvanceAmount, false);
+ error = O->BindEntryCheckSegAndOffsets(SegmentIndex, SegmentOffset +
+ AdvanceAmount, PointerSize);
if (error) {
*E =
malformedError("for BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED "
@@ -3839,15 +3869,6 @@ void MachOBindEntry::moveNext() {
moveToEnd();
return;
}
- error = O->BindEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, true);
- if (error) {
- *E =
- malformedError("for BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB " +
- Twine(error) + " for opcode at: 0x" +
- Twine::utohexstr(OpcodeStart - Opcodes.begin()));
- moveToEnd();
- return;
- }
if (SymbolName == StringRef()) {
*E = malformedError(
"for BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB "
@@ -3866,8 +3887,8 @@ void MachOBindEntry::moveNext() {
moveToEnd();
return;
}
- error = O->BindEntryCheckCountAndSkip(Count, Skip, PointerSize,
- SegmentIndex, SegmentOffset);
+ error = O->BindEntryCheckSegAndOffsets(SegmentIndex, SegmentOffset,
+ PointerSize, Count, Skip);
if (error) {
*E =
malformedError("for BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB " +
@@ -3990,53 +4011,40 @@ BindRebaseSegInfo::BindRebaseSegInfo(const object::MachOObjectFile *Obj) {
MaxSegIndex = CurSegIndex;
}
-// For use with a SegIndex,SegOffset pair in MachOBindEntry::moveNext() to
-// validate a MachOBindEntry or MachORebaseEntry.
-const char * BindRebaseSegInfo::checkSegAndOffset(int32_t SegIndex,
- uint64_t SegOffset,
- bool endInvalid) {
+// For use with a SegIndex, SegOffset, and PointerSize triple in
+// MachOBindEntry::moveNext() to validate a MachOBindEntry or MachORebaseEntry.
+//
+// Given a SegIndex, SegOffset, and PointerSize, verify a valid section exists
+// that fully contains a pointer at that location. Multiple fixups in a bind
+// (such as with the BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB opcode) can
+// be tested via the Count and Skip parameters.
+const char * BindRebaseSegInfo::checkSegAndOffsets(int32_t SegIndex,
+ uint64_t SegOffset,
+ uint8_t PointerSize,
+ uint32_t Count,
+ uint32_t Skip) {
if (SegIndex == -1)
return "missing preceding *_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB";
if (SegIndex >= MaxSegIndex)
return "bad segIndex (too large)";
- for (const SectionInfo &SI : Sections) {
- if (SI.SegmentIndex != SegIndex)
- continue;
- if (SI.OffsetInSegment > SegOffset)
- continue;
- if (SegOffset > (SI.OffsetInSegment + SI.Size))
- continue;
- if (endInvalid && SegOffset >= (SI.OffsetInSegment + SI.Size))
- continue;
- return nullptr;
- }
- return "bad segOffset, too large";
-}
-
-// For use in MachOBindEntry::moveNext() to validate a MachOBindEntry for
-// the BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB opcode and for use in
-// MachORebaseEntry::moveNext() to validate a MachORebaseEntry for
-// REBASE_OPCODE_DO_*_TIMES* opcodes. The SegIndex and SegOffset must have
-// been already checked.
-const char * BindRebaseSegInfo::checkCountAndSkip(uint32_t Count, uint32_t Skip,
- uint8_t PointerSize,
- int32_t SegIndex,
- uint64_t SegOffset) {
- const SectionInfo &SI = findSection(SegIndex, SegOffset);
- uint64_t addr = SI.SegmentStartAddress + SegOffset;
- if (addr >= SI.Address + SI.Size)
- return "bad segOffset, too large";
- uint64_t i = 0;
- if (Count > 1)
- i = (Skip + PointerSize) * (Count - 1);
- else if (Count == 1)
- i = Skip + PointerSize;
- if (addr + i >= SI.Address + SI.Size) {
- // For rebase opcodes they can step from one section to another.
- uint64_t TrailingSegOffset = (addr + i) - SI.SegmentStartAddress;
- const char *error = checkSegAndOffset(SegIndex, TrailingSegOffset, false);
- if (error)
- return "bad count and skip, too large";
+ for (uint32_t i = 0; i < Count; ++i) {
+ uint32_t Start = SegOffset + i * (PointerSize + Skip);
+ uint32_t End = Start + PointerSize;
+ bool Found = false;
+ for (const SectionInfo &SI : Sections) {
+ if (SI.SegmentIndex != SegIndex)
+ continue;
+ if ((SI.OffsetInSegment<=Start) && (Start<(SI.OffsetInSegment+SI.Size))) {
+ if (End <= SI.OffsetInSegment + SI.Size) {
+ Found = true;
+ break;
+ }
+ else
+ return "bad offset, extends beyond section boundary";
+ }
+ }
+ if (!Found)
+ return "bad offset, not in section";
}
return nullptr;
}
@@ -4514,8 +4522,11 @@ ArrayRef<uint8_t> MachOObjectFile::getDyldInfoRebaseOpcodes() const {
if (!DyldInfoLoadCmd)
return None;
- MachO::dyld_info_command DyldInfo =
- getStruct<MachO::dyld_info_command>(*this, DyldInfoLoadCmd);
+ auto DyldInfoOrErr =
+ getStructOrErr<MachO::dyld_info_command>(*this, DyldInfoLoadCmd);
+ if (!DyldInfoOrErr)
+ return None;
+ MachO::dyld_info_command DyldInfo = DyldInfoOrErr.get();
const uint8_t *Ptr =
reinterpret_cast<const uint8_t *>(getPtr(*this, DyldInfo.rebase_off));
return makeArrayRef(Ptr, DyldInfo.rebase_size);
@@ -4525,8 +4536,11 @@ ArrayRef<uint8_t> MachOObjectFile::getDyldInfoBindOpcodes() const {
if (!DyldInfoLoadCmd)
return None;
- MachO::dyld_info_command DyldInfo =
- getStruct<MachO::dyld_info_command>(*this, DyldInfoLoadCmd);
+ auto DyldInfoOrErr =
+ getStructOrErr<MachO::dyld_info_command>(*this, DyldInfoLoadCmd);
+ if (!DyldInfoOrErr)
+ return None;
+ MachO::dyld_info_command DyldInfo = DyldInfoOrErr.get();
const uint8_t *Ptr =
reinterpret_cast<const uint8_t *>(getPtr(*this, DyldInfo.bind_off));
return makeArrayRef(Ptr, DyldInfo.bind_size);
@@ -4536,8 +4550,11 @@ ArrayRef<uint8_t> MachOObjectFile::getDyldInfoWeakBindOpcodes() const {
if (!DyldInfoLoadCmd)
return None;
- MachO::dyld_info_command DyldInfo =
- getStruct<MachO::dyld_info_command>(*this, DyldInfoLoadCmd);
+ auto DyldInfoOrErr =
+ getStructOrErr<MachO::dyld_info_command>(*this, DyldInfoLoadCmd);
+ if (!DyldInfoOrErr)
+ return None;
+ MachO::dyld_info_command DyldInfo = DyldInfoOrErr.get();
const uint8_t *Ptr =
reinterpret_cast<const uint8_t *>(getPtr(*this, DyldInfo.weak_bind_off));
return makeArrayRef(Ptr, DyldInfo.weak_bind_size);
@@ -4547,8 +4564,11 @@ ArrayRef<uint8_t> MachOObjectFile::getDyldInfoLazyBindOpcodes() const {
if (!DyldInfoLoadCmd)
return None;
- MachO::dyld_info_command DyldInfo =
- getStruct<MachO::dyld_info_command>(*this, DyldInfoLoadCmd);
+ auto DyldInfoOrErr =
+ getStructOrErr<MachO::dyld_info_command>(*this, DyldInfoLoadCmd);
+ if (!DyldInfoOrErr)
+ return None;
+ MachO::dyld_info_command DyldInfo = DyldInfoOrErr.get();
const uint8_t *Ptr =
reinterpret_cast<const uint8_t *>(getPtr(*this, DyldInfo.lazy_bind_off));
return makeArrayRef(Ptr, DyldInfo.lazy_bind_size);
@@ -4558,8 +4578,11 @@ ArrayRef<uint8_t> MachOObjectFile::getDyldInfoExportsTrie() const {
if (!DyldInfoLoadCmd)
return None;
- MachO::dyld_info_command DyldInfo =
- getStruct<MachO::dyld_info_command>(*this, DyldInfoLoadCmd);
+ auto DyldInfoOrErr =
+ getStructOrErr<MachO::dyld_info_command>(*this, DyldInfoLoadCmd);
+ if (!DyldInfoOrErr)
+ return None;
+ MachO::dyld_info_command DyldInfo = DyldInfoOrErr.get();
const uint8_t *Ptr =
reinterpret_cast<const uint8_t *>(getPtr(*this, DyldInfo.export_off));
return makeArrayRef(Ptr, DyldInfo.export_size);
diff --git a/lib/Object/MachOUniversal.cpp b/lib/Object/MachOUniversal.cpp
index 309708e9b37c..b3f0993412c6 100644
--- a/lib/Object/MachOUniversal.cpp
+++ b/lib/Object/MachOUniversal.cpp
@@ -1,9 +1,8 @@
//===- MachOUniversal.cpp - Mach-O universal binary -------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Object/Minidump.cpp b/lib/Object/Minidump.cpp
new file mode 100644
index 000000000000..7b5b21558699
--- /dev/null
+++ b/lib/Object/Minidump.cpp
@@ -0,0 +1,137 @@
+//===- Minidump.cpp - Minidump object file implementation -----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Object/Minidump.h"
+#include "llvm/Object/Error.h"
+#include "llvm/Support/ConvertUTF.h"
+
+using namespace llvm;
+using namespace llvm::object;
+using namespace llvm::minidump;
+
+Optional<ArrayRef<uint8_t>>
+MinidumpFile::getRawStream(minidump::StreamType Type) const {
+ auto It = StreamMap.find(Type);
+ if (It != StreamMap.end())
+ return getRawStream(Streams[It->second]);
+ return None;
+}
+
+Expected<std::string> MinidumpFile::getString(size_t Offset) const {
+ // Minidump strings consist of a 32-bit length field, which gives the size of
+ // the string in *bytes*. This is followed by the actual string encoded in
+ // UTF16.
+ auto ExpectedSize =
+ getDataSliceAs<support::ulittle32_t>(getData(), Offset, 1);
+ if (!ExpectedSize)
+ return ExpectedSize.takeError();
+ size_t Size = (*ExpectedSize)[0];
+ if (Size % 2 != 0)
+ return createError("String size not even");
+ Size /= 2;
+ if (Size == 0)
+ return "";
+
+ Offset += sizeof(support::ulittle32_t);
+ auto ExpectedData =
+ getDataSliceAs<support::ulittle16_t>(getData(), Offset, Size);
+ if (!ExpectedData)
+ return ExpectedData.takeError();
+
+ SmallVector<UTF16, 32> WStr(Size);
+ copy(*ExpectedData, WStr.begin());
+
+ std::string Result;
+ if (!convertUTF16ToUTF8String(WStr, Result))
+ return createError("String decoding failed");
+
+ return Result;
+}
+
+template <typename T>
+Expected<ArrayRef<T>> MinidumpFile::getListStream(StreamType Stream) const {
+ auto OptionalStream = getRawStream(Stream);
+ if (!OptionalStream)
+ return createError("No such stream");
+ auto ExpectedSize =
+ getDataSliceAs<support::ulittle32_t>(*OptionalStream, 0, 1);
+ if (!ExpectedSize)
+ return ExpectedSize.takeError();
+
+ size_t ListSize = ExpectedSize.get()[0];
+
+ size_t ListOffset = 4;
+ // Some producers insert additional padding bytes to align the list to an
+ // 8-byte boundary. Check for that by comparing the list size with the overall
+ // stream size.
+ if (ListOffset + sizeof(T) * ListSize < OptionalStream->size())
+ ListOffset = 8;
+
+ return getDataSliceAs<T>(*OptionalStream, ListOffset, ListSize);
+}
+template Expected<ArrayRef<Module>>
+ MinidumpFile::getListStream(StreamType) const;
+template Expected<ArrayRef<Thread>>
+ MinidumpFile::getListStream(StreamType) const;
+template Expected<ArrayRef<MemoryDescriptor>>
+ MinidumpFile::getListStream(StreamType) const;
+
+Expected<ArrayRef<uint8_t>>
+MinidumpFile::getDataSlice(ArrayRef<uint8_t> Data, size_t Offset, size_t Size) {
+ // Check for overflow.
+ if (Offset + Size < Offset || Offset + Size < Size ||
+ Offset + Size > Data.size())
+ return createEOFError();
+ return Data.slice(Offset, Size);
+}
+
+Expected<std::unique_ptr<MinidumpFile>>
+MinidumpFile::create(MemoryBufferRef Source) {
+ ArrayRef<uint8_t> Data = arrayRefFromStringRef(Source.getBuffer());
+ auto ExpectedHeader = getDataSliceAs<minidump::Header>(Data, 0, 1);
+ if (!ExpectedHeader)
+ return ExpectedHeader.takeError();
+
+ const minidump::Header &Hdr = (*ExpectedHeader)[0];
+ if (Hdr.Signature != Header::MagicSignature)
+ return createError("Invalid signature");
+ if ((Hdr.Version & 0xffff) != Header::MagicVersion)
+ return createError("Invalid version");
+
+ auto ExpectedStreams = getDataSliceAs<Directory>(Data, Hdr.StreamDirectoryRVA,
+ Hdr.NumberOfStreams);
+ if (!ExpectedStreams)
+ return ExpectedStreams.takeError();
+
+ DenseMap<StreamType, std::size_t> StreamMap;
+ for (const auto &Stream : llvm::enumerate(*ExpectedStreams)) {
+ StreamType Type = Stream.value().Type;
+ const LocationDescriptor &Loc = Stream.value().Location;
+
+ auto ExpectedStream = getDataSlice(Data, Loc.RVA, Loc.DataSize);
+ if (!ExpectedStream)
+ return ExpectedStream.takeError();
+
+ if (Type == StreamType::Unused && Loc.DataSize == 0) {
+ // Ignore dummy streams. This is technically ill-formed, but a number of
+ // existing minidumps seem to contain such streams.
+ continue;
+ }
+
+ if (Type == DenseMapInfo<StreamType>::getEmptyKey() ||
+ Type == DenseMapInfo<StreamType>::getTombstoneKey())
+ return createError("Cannot handle one of the minidump streams");
+
+ // Update the directory map, checking for duplicate stream types.
+ if (!StreamMap.try_emplace(Type, Stream.index()).second)
+ return createError("Duplicate stream type");
+ }
+
+ return std::unique_ptr<MinidumpFile>(
+ new MinidumpFile(Source, Hdr, *ExpectedStreams, std::move(StreamMap)));
+}
diff --git a/lib/Object/ModuleSymbolTable.cpp b/lib/Object/ModuleSymbolTable.cpp
index 33ce7d8109fb..d1e0ce5edae1 100644
--- a/lib/Object/ModuleSymbolTable.cpp
+++ b/lib/Object/ModuleSymbolTable.cpp
@@ -1,9 +1,8 @@
//===- ModuleSymbolTable.cpp - symbol table for in-memory IR --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Object/Object.cpp b/lib/Object/Object.cpp
index f5de2e1d5ce2..d84798cc6dd0 100644
--- a/lib/Object/Object.cpp
+++ b/lib/Object/Object.cpp
@@ -1,9 +1,8 @@
//===- Object.cpp - C bindings to the object file library--------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -14,7 +13,9 @@
#include "llvm-c/Object.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/Object/ObjectFile.h"
+#include "llvm/Object/MachOUniversal.h"
using namespace llvm;
using namespace object;
@@ -58,6 +59,121 @@ wrap(const relocation_iterator *SI) {
(const_cast<relocation_iterator*>(SI));
}
+/*--.. Operations on binary files ..........................................--*/
+
+LLVMBinaryRef LLVMCreateBinary(LLVMMemoryBufferRef MemBuf,
+ LLVMContextRef Context,
+ char **ErrorMessage) {
+ auto maybeContext = Context ? unwrap(Context) : nullptr;
+ Expected<std::unique_ptr<Binary>> ObjOrErr(
+ createBinary(unwrap(MemBuf)->getMemBufferRef(), maybeContext));
+ if (!ObjOrErr) {
+ *ErrorMessage = strdup(toString(ObjOrErr.takeError()).c_str());
+ return nullptr;
+ }
+
+ return wrap(ObjOrErr.get().release());
+}
+
+LLVMMemoryBufferRef LLVMBinaryCopyMemoryBuffer(LLVMBinaryRef BR) {
+ auto Buf = unwrap(BR)->getMemoryBufferRef();
+ return wrap(llvm::MemoryBuffer::getMemBuffer(
+ Buf.getBuffer(), Buf.getBufferIdentifier(),
+ /*RequiresNullTerminator*/false).release());
+}
+
+void LLVMDisposeBinary(LLVMBinaryRef BR) {
+ delete unwrap(BR);
+}
+
+LLVMBinaryType LLVMBinaryGetType(LLVMBinaryRef BR) {
+ class BinaryTypeMapper final : public Binary {
+ public:
+ static LLVMBinaryType mapBinaryTypeToLLVMBinaryType(unsigned Kind) {
+ switch (Kind) {
+ case ID_Archive:
+ return LLVMBinaryTypeArchive;
+ case ID_MachOUniversalBinary:
+ return LLVMBinaryTypeMachOUniversalBinary;
+ case ID_COFFImportFile:
+ return LLVMBinaryTypeCOFFImportFile;
+ case ID_IR:
+ return LLVMBinaryTypeIR;
+ case ID_WinRes:
+ return LLVMBinaryTypeWinRes;
+ case ID_COFF:
+ return LLVMBinaryTypeCOFF;
+ case ID_ELF32L:
+ return LLVMBinaryTypeELF32L;
+ case ID_ELF32B:
+ return LLVMBinaryTypeELF32B;
+ case ID_ELF64L:
+ return LLVMBinaryTypeELF64L;
+ case ID_ELF64B:
+ return LLVMBinaryTypeELF64B;
+ case ID_MachO32L:
+ return LLVMBinaryTypeMachO32L;
+ case ID_MachO32B:
+ return LLVMBinaryTypeMachO32B;
+ case ID_MachO64L:
+ return LLVMBinaryTypeMachO64L;
+ case ID_MachO64B:
+ return LLVMBinaryTypeMachO64B;
+ case ID_Wasm:
+ return LLVMBinaryTypeWasm;
+ case ID_StartObjects:
+ case ID_EndObjects:
+ llvm_unreachable("Marker types are not valid binary kinds!");
+ default:
+ llvm_unreachable("Unknown binary kind!");
+ }
+ }
+ };
+ return BinaryTypeMapper::mapBinaryTypeToLLVMBinaryType(unwrap(BR)->getType());
+}
+
+LLVMBinaryRef LLVMMachOUniversalBinaryCopyObjectForArch(LLVMBinaryRef BR,
+ const char *Arch,
+ size_t ArchLen,
+ char **ErrorMessage) {
+ auto universal = cast<MachOUniversalBinary>(unwrap(BR));
+ Expected<std::unique_ptr<ObjectFile>> ObjOrErr(
+ universal->getObjectForArch({Arch, ArchLen}));
+ if (!ObjOrErr) {
+ *ErrorMessage = strdup(toString(ObjOrErr.takeError()).c_str());
+ return nullptr;
+ }
+ return wrap(ObjOrErr.get().release());
+}
+
+LLVMSectionIteratorRef LLVMObjectFileCopySectionIterator(LLVMBinaryRef BR) {
+ auto OF = cast<ObjectFile>(unwrap(BR));
+ auto sections = OF->sections();
+ if (sections.begin() == sections.end())
+ return nullptr;
+ return wrap(new section_iterator(sections.begin()));
+}
+
+LLVMBool LLVMObjectFileIsSectionIteratorAtEnd(LLVMBinaryRef BR,
+ LLVMSectionIteratorRef SI) {
+ auto OF = cast<ObjectFile>(unwrap(BR));
+ return (*unwrap(SI) == OF->section_end()) ? 1 : 0;
+}
+
+LLVMSymbolIteratorRef LLVMObjectFileCopySymbolIterator(LLVMBinaryRef BR) {
+ auto OF = cast<ObjectFile>(unwrap(BR));
+ auto symbols = OF->symbols();
+ if (symbols.begin() == symbols.end())
+ return nullptr;
+ return wrap(new symbol_iterator(symbols.begin()));
+}
+
+LLVMBool LLVMObjectFileIsSymbolIteratorAtEnd(LLVMBinaryRef BR,
+ LLVMSymbolIteratorRef SI) {
+ auto OF = cast<ObjectFile>(unwrap(BR));
+ return (*unwrap(SI) == OF->symbol_end()) ? 1 : 0;
+}
+
// ObjectFile creation
LLVMObjectFileRef LLVMCreateObjectFile(LLVMMemoryBufferRef MemBuf) {
std::unique_ptr<MemoryBuffer> Buf(unwrap(MemBuf));
@@ -146,10 +262,10 @@ uint64_t LLVMGetSectionSize(LLVMSectionIteratorRef SI) {
}
const char *LLVMGetSectionContents(LLVMSectionIteratorRef SI) {
- StringRef ret;
- if (std::error_code ec = (*unwrap(SI))->getContents(ret))
- report_fatal_error(ec.message());
- return ret.data();
+ if (Expected<StringRef> E = (*unwrap(SI))->getContents())
+ return E->data();
+ else
+ report_fatal_error(E.takeError());
}
uint64_t LLVMGetSectionAddress(LLVMSectionIteratorRef SI) {
diff --git a/lib/Object/ObjectFile.cpp b/lib/Object/ObjectFile.cpp
index cf63b89adc12..101f5dcc0821 100644
--- a/lib/Object/ObjectFile.cpp
+++ b/lib/Object/ObjectFile.cpp
@@ -1,9 +1,8 @@
//===- ObjectFile.cpp - File format independent object file ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -57,21 +56,19 @@ uint64_t ObjectFile::getSymbolValue(DataRefImpl Ref) const {
return getSymbolValueImpl(Ref);
}
-std::error_code ObjectFile::printSymbolName(raw_ostream &OS,
- DataRefImpl Symb) const {
+Error ObjectFile::printSymbolName(raw_ostream &OS, DataRefImpl Symb) const {
Expected<StringRef> Name = getSymbolName(Symb);
if (!Name)
- return errorToErrorCode(Name.takeError());
+ return Name.takeError();
OS << *Name;
- return std::error_code();
+ return Error::success();
}
uint32_t ObjectFile::getSymbolAlignment(DataRefImpl DRI) const { return 0; }
bool ObjectFile::isSectionBitcode(DataRefImpl Sec) const {
- StringRef SectName;
- if (!getSectionName(Sec, SectName))
- return SectName == ".llvmbc";
+ if (Expected<StringRef> NameOrErr = getSectionName(Sec))
+ return *NameOrErr == ".llvmbc";
return false;
}
@@ -128,6 +125,7 @@ ObjectFile::createObjectFile(MemoryBufferRef Object, file_magic Type) {
case file_magic::macho_universal_binary:
case file_magic::windows_resource:
case file_magic::pdb:
+ case file_magic::minidump:
return errorCodeToError(object_error::invalid_file_type);
case file_magic::elf:
case file_magic::elf_relocatable:
@@ -151,6 +149,10 @@ ObjectFile::createObjectFile(MemoryBufferRef Object, file_magic Type) {
case file_magic::coff_import_library:
case file_magic::pecoff_executable:
return createCOFFObjectFile(Object);
+ case file_magic::xcoff_object_32:
+ return createXCOFFObjectFile(Object, Binary::ID_XCOFF32);
+ case file_magic::xcoff_object_64:
+ return createXCOFFObjectFile(Object, Binary::ID_XCOFF64);
case file_magic::wasm_object:
return createWasmObjectFile(Object);
}
diff --git a/lib/Object/RecordStreamer.cpp b/lib/Object/RecordStreamer.cpp
index 1f57867dd21a..f39a6c28ed50 100644
--- a/lib/Object/RecordStreamer.cpp
+++ b/lib/Object/RecordStreamer.cpp
@@ -1,9 +1,8 @@
//===-- RecordStreamer.cpp - Record asm defined and used symbols ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -83,7 +82,7 @@ RecordStreamer::const_iterator RecordStreamer::begin() {
RecordStreamer::const_iterator RecordStreamer::end() { return Symbols.end(); }
void RecordStreamer::EmitInstruction(const MCInst &Inst,
- const MCSubtargetInfo &STI, bool) {
+ const MCSubtargetInfo &STI) {
MCStreamer::EmitInstruction(Inst, STI);
}
diff --git a/lib/Object/RecordStreamer.h b/lib/Object/RecordStreamer.h
index 3d5ae59b58fe..c8b75bcc6d1d 100644
--- a/lib/Object/RecordStreamer.h
+++ b/lib/Object/RecordStreamer.h
@@ -1,9 +1,8 @@
//===- RecordStreamer.h - Record asm defined and used symbols ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -47,8 +46,7 @@ private:
public:
RecordStreamer(MCContext &Context, const Module &M);
- void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
- bool) override;
+ void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override;
void EmitLabel(MCSymbol *Symbol, SMLoc Loc = SMLoc()) override;
void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) override;
bool EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) override;
@@ -56,6 +54,15 @@ public:
unsigned ByteAlignment, SMLoc Loc = SMLoc()) override;
void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
unsigned ByteAlignment) override;
+
+ // Ignore COFF-specific directives; we do not need any information from them,
+ // but the default implementation of these methods crashes, so we override
+ // them with versions that do nothing.
+ void BeginCOFFSymbolDef(const MCSymbol *Symbol) override {}
+ void EmitCOFFSymbolStorageClass(int StorageClass) override {}
+ void EmitCOFFSymbolType(int Type) override {}
+ void EndCOFFSymbolDef() override {}
+
/// Record .symver aliases for later processing.
void emitELFSymverDirective(StringRef AliasName,
const MCSymbol *Aliasee) override;
diff --git a/lib/Object/RelocationResolver.cpp b/lib/Object/RelocationResolver.cpp
new file mode 100644
index 000000000000..0a243f32e12c
--- /dev/null
+++ b/lib/Object/RelocationResolver.cpp
@@ -0,0 +1,550 @@
+//===- RelocationResolver.cpp ------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines utilities to resolve relocations in object files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Object/RelocationResolver.h"
+
+namespace llvm {
+namespace object {
+
+static int64_t getELFAddend(RelocationRef R) {
+ Expected<int64_t> AddendOrErr = ELFRelocationRef(R).getAddend();
+ handleAllErrors(AddendOrErr.takeError(), [](const ErrorInfoBase &EI) {
+ report_fatal_error(EI.message());
+ });
+ return *AddendOrErr;
+}
+
+static bool supportsX86_64(uint64_t Type) {
+ switch (Type) {
+ case ELF::R_X86_64_NONE:
+ case ELF::R_X86_64_64:
+ case ELF::R_X86_64_DTPOFF32:
+ case ELF::R_X86_64_DTPOFF64:
+ case ELF::R_X86_64_PC32:
+ case ELF::R_X86_64_32:
+ case ELF::R_X86_64_32S:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static uint64_t resolveX86_64(RelocationRef R, uint64_t S, uint64_t A) {
+ switch (R.getType()) {
+ case ELF::R_X86_64_NONE:
+ return A;
+ case ELF::R_X86_64_64:
+ case ELF::R_X86_64_DTPOFF32:
+ case ELF::R_X86_64_DTPOFF64:
+ return S + getELFAddend(R);
+ case ELF::R_X86_64_PC32:
+ return S + getELFAddend(R) - R.getOffset();
+ case ELF::R_X86_64_32:
+ case ELF::R_X86_64_32S:
+ return (S + getELFAddend(R)) & 0xFFFFFFFF;
+ default:
+ llvm_unreachable("Invalid relocation type");
+ }
+}
+
+static bool supportsAArch64(uint64_t Type) {
+ switch (Type) {
+ case ELF::R_AARCH64_ABS32:
+ case ELF::R_AARCH64_ABS64:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static uint64_t resolveAArch64(RelocationRef R, uint64_t S, uint64_t A) {
+ switch (R.getType()) {
+ case ELF::R_AARCH64_ABS32:
+ return (S + getELFAddend(R)) & 0xFFFFFFFF;
+ case ELF::R_AARCH64_ABS64:
+ return S + getELFAddend(R);
+ default:
+ llvm_unreachable("Invalid relocation type");
+ }
+}
+
+static bool supportsBPF(uint64_t Type) {
+ switch (Type) {
+ case ELF::R_BPF_64_32:
+ case ELF::R_BPF_64_64:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static uint64_t resolveBPF(RelocationRef R, uint64_t S, uint64_t A) {
+ switch (R.getType()) {
+ case ELF::R_BPF_64_32:
+ return S & 0xFFFFFFFF;
+ case ELF::R_BPF_64_64:
+ return S;
+ default:
+ llvm_unreachable("Invalid relocation type");
+ }
+}
+
+static bool supportsMips64(uint64_t Type) {
+ switch (Type) {
+ case ELF::R_MIPS_32:
+ case ELF::R_MIPS_64:
+ case ELF::R_MIPS_TLS_DTPREL64:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static uint64_t resolveMips64(RelocationRef R, uint64_t S, uint64_t A) {
+ switch (R.getType()) {
+ case ELF::R_MIPS_32:
+ return (S + getELFAddend(R)) & 0xFFFFFFFF;
+ case ELF::R_MIPS_64:
+ return S + getELFAddend(R);
+ case ELF::R_MIPS_TLS_DTPREL64:
+ return S + getELFAddend(R) - 0x8000;
+ default:
+ llvm_unreachable("Invalid relocation type");
+ }
+}
+
+static bool supportsPPC64(uint64_t Type) {
+ switch (Type) {
+ case ELF::R_PPC64_ADDR32:
+ case ELF::R_PPC64_ADDR64:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static uint64_t resolvePPC64(RelocationRef R, uint64_t S, uint64_t A) {
+ switch (R.getType()) {
+ case ELF::R_PPC64_ADDR32:
+ return (S + getELFAddend(R)) & 0xFFFFFFFF;
+ case ELF::R_PPC64_ADDR64:
+ return S + getELFAddend(R);
+ default:
+ llvm_unreachable("Invalid relocation type");
+ }
+}
+
+static bool supportsSystemZ(uint64_t Type) {
+ switch (Type) {
+ case ELF::R_390_32:
+ case ELF::R_390_64:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static uint64_t resolveSystemZ(RelocationRef R, uint64_t S, uint64_t A) {
+ switch (R.getType()) {
+ case ELF::R_390_32:
+ return (S + getELFAddend(R)) & 0xFFFFFFFF;
+ case ELF::R_390_64:
+ return S + getELFAddend(R);
+ default:
+ llvm_unreachable("Invalid relocation type");
+ }
+}
+
+static bool supportsSparc64(uint64_t Type) {
+ switch (Type) {
+ case ELF::R_SPARC_32:
+ case ELF::R_SPARC_64:
+ case ELF::R_SPARC_UA32:
+ case ELF::R_SPARC_UA64:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static uint64_t resolveSparc64(RelocationRef R, uint64_t S, uint64_t A) {
+ switch (R.getType()) {
+ case ELF::R_SPARC_32:
+ case ELF::R_SPARC_64:
+ case ELF::R_SPARC_UA32:
+ case ELF::R_SPARC_UA64:
+ return S + getELFAddend(R);
+ default:
+ llvm_unreachable("Invalid relocation type");
+ }
+}
+
+static bool supportsAmdgpu(uint64_t Type) {
+ switch (Type) {
+ case ELF::R_AMDGPU_ABS32:
+ case ELF::R_AMDGPU_ABS64:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static uint64_t resolveAmdgpu(RelocationRef R, uint64_t S, uint64_t A) {
+ switch (R.getType()) {
+ case ELF::R_AMDGPU_ABS32:
+ case ELF::R_AMDGPU_ABS64:
+ return S + getELFAddend(R);
+ default:
+ llvm_unreachable("Invalid relocation type");
+ }
+}
+
+static bool supportsX86(uint64_t Type) {
+ switch (Type) {
+ case ELF::R_386_NONE:
+ case ELF::R_386_32:
+ case ELF::R_386_PC32:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static uint64_t resolveX86(RelocationRef R, uint64_t S, uint64_t A) {
+ switch (R.getType()) {
+ case ELF::R_386_NONE:
+ return A;
+ case ELF::R_386_32:
+ return S + A;
+ case ELF::R_386_PC32:
+ return S - R.getOffset() + A;
+ default:
+ llvm_unreachable("Invalid relocation type");
+ }
+}
+
+static bool supportsPPC32(uint64_t Type) {
+ return Type == ELF::R_PPC_ADDR32;
+}
+
+static uint64_t resolvePPC32(RelocationRef R, uint64_t S, uint64_t A) {
+ if (R.getType() == ELF::R_PPC_ADDR32)
+ return (S + getELFAddend(R)) & 0xFFFFFFFF;
+ llvm_unreachable("Invalid relocation type");
+}
+
+static bool supportsARM(uint64_t Type) {
+ return Type == ELF::R_ARM_ABS32;
+}
+
+static uint64_t resolveARM(RelocationRef R, uint64_t S, uint64_t A) {
+ if (R.getType() == ELF::R_ARM_ABS32)
+ return (S + A) & 0xFFFFFFFF;
+ llvm_unreachable("Invalid relocation type");
+}
+
+static bool supportsAVR(uint64_t Type) {
+ switch (Type) {
+ case ELF::R_AVR_16:
+ case ELF::R_AVR_32:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static uint64_t resolveAVR(RelocationRef R, uint64_t S, uint64_t A) {
+ switch (R.getType()) {
+ case ELF::R_AVR_16:
+ return (S + getELFAddend(R)) & 0xFFFF;
+ case ELF::R_AVR_32:
+ return (S + getELFAddend(R)) & 0xFFFFFFFF;
+ default:
+ llvm_unreachable("Invalid relocation type");
+ }
+}
+
+static bool supportsLanai(uint64_t Type) {
+ return Type == ELF::R_LANAI_32;
+}
+
+static uint64_t resolveLanai(RelocationRef R, uint64_t S, uint64_t A) {
+ if (R.getType() == ELF::R_LANAI_32)
+ return (S + getELFAddend(R)) & 0xFFFFFFFF;
+ llvm_unreachable("Invalid relocation type");
+}
+
+static bool supportsMips32(uint64_t Type) {
+ switch (Type) {
+ case ELF::R_MIPS_32:
+ case ELF::R_MIPS_TLS_DTPREL32:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static uint64_t resolveMips32(RelocationRef R, uint64_t S, uint64_t A) {
+ // FIXME: Take in account implicit addends to get correct results.
+ uint32_t Rel = R.getType();
+ if (Rel == ELF::R_MIPS_32)
+ return (S + A) & 0xFFFFFFFF;
+ if (Rel == ELF::R_MIPS_TLS_DTPREL32)
+ return (S + A) & 0xFFFFFFFF;
+ llvm_unreachable("Invalid relocation type");
+}
+
+static bool supportsSparc32(uint64_t Type) {
+ switch (Type) {
+ case ELF::R_SPARC_32:
+ case ELF::R_SPARC_UA32:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static uint64_t resolveSparc32(RelocationRef R, uint64_t S, uint64_t A) {
+ uint32_t Rel = R.getType();
+ if (Rel == ELF::R_SPARC_32 || Rel == ELF::R_SPARC_UA32)
+ return S + getELFAddend(R);
+ return A;
+}
+
+static bool supportsHexagon(uint64_t Type) {
+ return Type == ELF::R_HEX_32;
+}
+
+static uint64_t resolveHexagon(RelocationRef R, uint64_t S, uint64_t A) {
+ if (R.getType() == ELF::R_HEX_32)
+ return S + getELFAddend(R);
+ llvm_unreachable("Invalid relocation type");
+}
+
+static bool supportsRISCV(uint64_t Type) {
+ switch (Type) {
+ case ELF::R_RISCV_NONE:
+ case ELF::R_RISCV_32:
+ case ELF::R_RISCV_64:
+ case ELF::R_RISCV_ADD8:
+ case ELF::R_RISCV_SUB8:
+ case ELF::R_RISCV_ADD16:
+ case ELF::R_RISCV_SUB16:
+ case ELF::R_RISCV_ADD32:
+ case ELF::R_RISCV_SUB32:
+ case ELF::R_RISCV_ADD64:
+ case ELF::R_RISCV_SUB64:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static uint64_t resolveRISCV(RelocationRef R, uint64_t S, uint64_t A) {
+ int64_t RA = getELFAddend(R);
+ switch (R.getType()) {
+ case ELF::R_RISCV_NONE:
+ return A;
+ case ELF::R_RISCV_32:
+ return (S + RA) & 0xFFFFFFFF;
+ case ELF::R_RISCV_64:
+ return S + RA;
+ case ELF::R_RISCV_ADD8:
+ return (A + (S + RA)) & 0xFF;
+ case ELF::R_RISCV_SUB8:
+ return (A - (S + RA)) & 0xFF;
+ case ELF::R_RISCV_ADD16:
+ return (A + (S + RA)) & 0xFFFF;
+ case ELF::R_RISCV_SUB16:
+ return (A - (S + RA)) & 0xFFFF;
+ case ELF::R_RISCV_ADD32:
+ return (A + (S + RA)) & 0xFFFFFFFF;
+ case ELF::R_RISCV_SUB32:
+ return (A - (S + RA)) & 0xFFFFFFFF;
+ case ELF::R_RISCV_ADD64:
+ return (A + (S + RA));
+ case ELF::R_RISCV_SUB64:
+ return (A - (S + RA));
+ default:
+ llvm_unreachable("Invalid relocation type");
+ }
+}
+
+static bool supportsCOFFX86(uint64_t Type) {
+ switch (Type) {
+ case COFF::IMAGE_REL_I386_SECREL:
+ case COFF::IMAGE_REL_I386_DIR32:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static uint64_t resolveCOFFX86(RelocationRef R, uint64_t S, uint64_t A) {
+ switch (R.getType()) {
+ case COFF::IMAGE_REL_I386_SECREL:
+ case COFF::IMAGE_REL_I386_DIR32:
+ return (S + A) & 0xFFFFFFFF;
+ default:
+ llvm_unreachable("Invalid relocation type");
+ }
+}
+
+static bool supportsCOFFX86_64(uint64_t Type) {
+ switch (Type) {
+ case COFF::IMAGE_REL_AMD64_SECREL:
+ case COFF::IMAGE_REL_AMD64_ADDR64:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static uint64_t resolveCOFFX86_64(RelocationRef R, uint64_t S, uint64_t A) {
+ switch (R.getType()) {
+ case COFF::IMAGE_REL_AMD64_SECREL:
+ return (S + A) & 0xFFFFFFFF;
+ case COFF::IMAGE_REL_AMD64_ADDR64:
+ return S + A;
+ default:
+ llvm_unreachable("Invalid relocation type");
+ }
+}
+
+static bool supportsMachOX86_64(uint64_t Type) {
+ return Type == MachO::X86_64_RELOC_UNSIGNED;
+}
+
+static uint64_t resolveMachOX86_64(RelocationRef R, uint64_t S, uint64_t A) {
+ if (R.getType() == MachO::X86_64_RELOC_UNSIGNED)
+ return S;
+ llvm_unreachable("Invalid relocation type");
+}
+
+static bool supportsWasm32(uint64_t Type) {
+ switch (Type) {
+ case wasm::R_WASM_FUNCTION_INDEX_LEB:
+ case wasm::R_WASM_TABLE_INDEX_SLEB:
+ case wasm::R_WASM_TABLE_INDEX_I32:
+ case wasm::R_WASM_MEMORY_ADDR_LEB:
+ case wasm::R_WASM_MEMORY_ADDR_SLEB:
+ case wasm::R_WASM_MEMORY_ADDR_I32:
+ case wasm::R_WASM_TYPE_INDEX_LEB:
+ case wasm::R_WASM_GLOBAL_INDEX_LEB:
+ case wasm::R_WASM_FUNCTION_OFFSET_I32:
+ case wasm::R_WASM_SECTION_OFFSET_I32:
+ case wasm::R_WASM_EVENT_INDEX_LEB:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static uint64_t resolveWasm32(RelocationRef R, uint64_t S, uint64_t A) {
+ switch (R.getType()) {
+ case wasm::R_WASM_FUNCTION_INDEX_LEB:
+ case wasm::R_WASM_TABLE_INDEX_SLEB:
+ case wasm::R_WASM_TABLE_INDEX_I32:
+ case wasm::R_WASM_MEMORY_ADDR_LEB:
+ case wasm::R_WASM_MEMORY_ADDR_SLEB:
+ case wasm::R_WASM_MEMORY_ADDR_I32:
+ case wasm::R_WASM_TYPE_INDEX_LEB:
+ case wasm::R_WASM_GLOBAL_INDEX_LEB:
+ case wasm::R_WASM_FUNCTION_OFFSET_I32:
+ case wasm::R_WASM_SECTION_OFFSET_I32:
+ case wasm::R_WASM_EVENT_INDEX_LEB:
+ // For wasm section, its offset at 0 -- ignoring Value
+ return A;
+ default:
+ llvm_unreachable("Invalid relocation type");
+ }
+}
+
+std::pair<bool (*)(uint64_t), RelocationResolver>
+getRelocationResolver(const ObjectFile &Obj) {
+ if (Obj.isCOFF()) {
+ if (Obj.getBytesInAddress() == 8)
+ return {supportsCOFFX86_64, resolveCOFFX86_64};
+ return {supportsCOFFX86, resolveCOFFX86};
+ } else if (Obj.isELF()) {
+ if (Obj.getBytesInAddress() == 8) {
+ switch (Obj.getArch()) {
+ case Triple::x86_64:
+ return {supportsX86_64, resolveX86_64};
+ case Triple::aarch64:
+ case Triple::aarch64_be:
+ return {supportsAArch64, resolveAArch64};
+ case Triple::bpfel:
+ case Triple::bpfeb:
+ return {supportsBPF, resolveBPF};
+ case Triple::mips64el:
+ case Triple::mips64:
+ return {supportsMips64, resolveMips64};
+ case Triple::ppc64le:
+ case Triple::ppc64:
+ return {supportsPPC64, resolvePPC64};
+ case Triple::systemz:
+ return {supportsSystemZ, resolveSystemZ};
+ case Triple::sparcv9:
+ return {supportsSparc64, resolveSparc64};
+ case Triple::amdgcn:
+ return {supportsAmdgpu, resolveAmdgpu};
+ case Triple::riscv64:
+ return {supportsRISCV, resolveRISCV};
+ default:
+ return {nullptr, nullptr};
+ }
+ }
+
+ // 32-bit object file
+ assert(Obj.getBytesInAddress() == 4 &&
+ "Invalid word size in object file");
+
+ switch (Obj.getArch()) {
+ case Triple::x86:
+ return {supportsX86, resolveX86};
+ case Triple::ppc:
+ return {supportsPPC32, resolvePPC32};
+ case Triple::arm:
+ case Triple::armeb:
+ return {supportsARM, resolveARM};
+ case Triple::avr:
+ return {supportsAVR, resolveAVR};
+ case Triple::lanai:
+ return {supportsLanai, resolveLanai};
+ case Triple::mipsel:
+ case Triple::mips:
+ return {supportsMips32, resolveMips32};
+ case Triple::sparc:
+ return {supportsSparc32, resolveSparc32};
+ case Triple::hexagon:
+ return {supportsHexagon, resolveHexagon};
+ case Triple::riscv32:
+ return {supportsRISCV, resolveRISCV};
+ default:
+ return {nullptr, nullptr};
+ }
+ } else if (Obj.isMachO()) {
+ if (Obj.getArch() == Triple::x86_64)
+ return {supportsMachOX86_64, resolveMachOX86_64};
+ return {nullptr, nullptr};
+ } else if (Obj.isWasm()) {
+ if (Obj.getArch() == Triple::wasm32)
+ return {supportsWasm32, resolveWasm32};
+ return {nullptr, nullptr};
+ }
+
+ llvm_unreachable("Invalid object file");
+}
+
+} // namespace object
+} // namespace llvm
diff --git a/lib/Object/SymbolSize.cpp b/lib/Object/SymbolSize.cpp
index 004fb1b07546..bdf4dc55cf3c 100644
--- a/lib/Object/SymbolSize.cpp
+++ b/lib/Object/SymbolSize.cpp
@@ -1,9 +1,8 @@
//===- SymbolSize.cpp -----------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Object/SymbolicFile.cpp b/lib/Object/SymbolicFile.cpp
index 3e998a2682b8..2b152b7d8da3 100644
--- a/lib/Object/SymbolicFile.cpp
+++ b/lib/Object/SymbolicFile.cpp
@@ -1,9 +1,8 @@
//===- SymbolicFile.cpp - Interface that only provides symbols ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -53,6 +52,7 @@ SymbolicFile::createSymbolicFile(MemoryBufferRef Object, file_magic Type,
case file_magic::macho_universal_binary:
case file_magic::windows_resource:
case file_magic::pdb:
+ case file_magic::minidump:
return errorCodeToError(object_error::invalid_file_type);
case file_magic::elf:
case file_magic::elf_executable:
@@ -69,6 +69,8 @@ SymbolicFile::createSymbolicFile(MemoryBufferRef Object, file_magic Type,
case file_magic::macho_dsym_companion:
case file_magic::macho_kext_bundle:
case file_magic::pecoff_executable:
+ case file_magic::xcoff_object_32:
+ case file_magic::xcoff_object_64:
case file_magic::wasm_object:
return ObjectFile::createObjectFile(Object, Type);
case file_magic::coff_import_library:
diff --git a/lib/Object/WasmObjectFile.cpp b/lib/Object/WasmObjectFile.cpp
index d84cb48c9fbd..82aa1830dced 100644
--- a/lib/Object/WasmObjectFile.cpp
+++ b/lib/Object/WasmObjectFile.cpp
@@ -1,15 +1,15 @@
//===- WasmObjectFile.cpp - Wasm object file implementation ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/ADT/Triple.h"
@@ -131,24 +131,24 @@ static int64_t readLEB128(WasmObjectFile::ReadContext &Ctx) {
}
static uint8_t readVaruint1(WasmObjectFile::ReadContext &Ctx) {
- int64_t result = readLEB128(Ctx);
- if (result > VARUINT1_MAX || result < 0)
+ int64_t Result = readLEB128(Ctx);
+ if (Result > VARUINT1_MAX || Result < 0)
report_fatal_error("LEB is outside Varuint1 range");
- return result;
+ return Result;
}
static int32_t readVarint32(WasmObjectFile::ReadContext &Ctx) {
- int64_t result = readLEB128(Ctx);
- if (result > INT32_MAX || result < INT32_MIN)
+ int64_t Result = readLEB128(Ctx);
+ if (Result > INT32_MAX || Result < INT32_MIN)
report_fatal_error("LEB is outside Varint32 range");
- return result;
+ return Result;
}
static uint32_t readVaruint32(WasmObjectFile::ReadContext &Ctx) {
- uint64_t result = readULEB128(Ctx);
- if (result > UINT32_MAX)
+ uint64_t Result = readULEB128(Ctx);
+ if (Result > UINT32_MAX)
report_fatal_error("LEB is outside Varuint32 range");
- return result;
+ return Result;
}
static int64_t readVarint64(WasmObjectFile::ReadContext &Ctx) {
@@ -255,7 +255,7 @@ WasmObjectFile::WasmObjectFile(MemoryBufferRef Buffer, Error &Err)
}
ReadContext Ctx;
- Ctx.Start = getPtr(0);
+ Ctx.Start = getData().bytes_begin();
Ctx.Ptr = Ctx.Start + 4;
Ctx.End = Ctx.Start + getData().size();
@@ -316,14 +316,17 @@ Error WasmObjectFile::parseSection(WasmSection &Sec) {
return parseCodeSection(Ctx);
case wasm::WASM_SEC_DATA:
return parseDataSection(Ctx);
+ case wasm::WASM_SEC_DATACOUNT:
+ return parseDataCountSection(Ctx);
default:
- return make_error<GenericBinaryError>("Bad section type",
- object_error::parse_failed);
+ return make_error<GenericBinaryError>(
+ "Invalid section type: " + Twine(Sec.Type), object_error::parse_failed);
}
}
Error WasmObjectFile::parseDylinkSection(ReadContext &Ctx) {
// See https://github.com/WebAssembly/tool-conventions/blob/master/DynamicLinking.md
+ HasDylinkSection = true;
DylinkInfo.MemorySize = readVaruint32(Ctx);
DylinkInfo.MemoryAlignment = readVaruint32(Ctx);
DylinkInfo.TableSize = readVaruint32(Ctx);
@@ -418,17 +421,17 @@ Error WasmObjectFile::parseLinkingSection(ReadContext &Ctx) {
if (Count > DataSegments.size())
return make_error<GenericBinaryError>("Too many segment names",
object_error::parse_failed);
- for (uint32_t i = 0; i < Count; i++) {
- DataSegments[i].Data.Name = readString(Ctx);
- DataSegments[i].Data.Alignment = readVaruint32(Ctx);
- DataSegments[i].Data.Flags = readVaruint32(Ctx);
+ for (uint32_t I = 0; I < Count; I++) {
+ DataSegments[I].Data.Name = readString(Ctx);
+ DataSegments[I].Data.Alignment = readVaruint32(Ctx);
+ DataSegments[I].Data.LinkerFlags = readVaruint32(Ctx);
}
break;
}
case wasm::WASM_INIT_FUNCS: {
uint32_t Count = readVaruint32(Ctx);
LinkingData.InitFunctions.reserve(Count);
- for (uint32_t i = 0; i < Count; i++) {
+ for (uint32_t I = 0; I < Count; I++) {
wasm::WasmInitFunc Init;
Init.Priority = readVaruint32(Ctx);
Init.Symbol = readVaruint32(Ctx);
@@ -505,9 +508,13 @@ Error WasmObjectFile::parseLinkingSectionSymtab(ReadContext &Ctx) {
Function.SymbolName = Info.Name;
} else {
wasm::WasmImport &Import = *ImportedFunctions[Info.ElementIndex];
+ if ((Info.Flags & wasm::WASM_SYMBOL_EXPLICIT_NAME) != 0)
+ Info.Name = readString(Ctx);
+ else
+ Info.Name = Import.Field;
Signature = &Signatures[Import.SigIndex];
- Info.Name = Import.Field;
- Info.Module = Import.Module;
+ Info.ImportName = Import.Field;
+ Info.ImportModule = Import.Module;
}
break;
@@ -530,8 +537,13 @@ Error WasmObjectFile::parseLinkingSectionSymtab(ReadContext &Ctx) {
Global.SymbolName = Info.Name;
} else {
wasm::WasmImport &Import = *ImportedGlobals[Info.ElementIndex];
- Info.Name = Import.Field;
+ if ((Info.Flags & wasm::WASM_SYMBOL_EXPLICIT_NAME) != 0)
+ Info.Name = readString(Ctx);
+ else
+ Info.Name = Import.Field;
GlobalType = &Import.Global;
+ Info.ImportName = Import.Field;
+ Info.ImportModule = Import.Module;
}
break;
@@ -585,9 +597,14 @@ Error WasmObjectFile::parseLinkingSectionSymtab(ReadContext &Ctx) {
} else {
wasm::WasmImport &Import = *ImportedEvents[Info.ElementIndex];
+ if ((Info.Flags & wasm::WASM_SYMBOL_EXPLICIT_NAME) != 0)
+ Info.Name = readString(Ctx);
+ else
+ Info.Name = Import.Field;
EventType = &Import.Event;
Signature = &Signatures[EventType->SigIndex];
- Info.Name = Import.Field;
+ Info.ImportName = Import.Field;
+ Info.ImportModule = Import.Module;
}
break;
}
@@ -659,6 +676,77 @@ Error WasmObjectFile::parseLinkingSectionComdat(ReadContext &Ctx) {
return Error::success();
}
+Error WasmObjectFile::parseProducersSection(ReadContext &Ctx) {
+ llvm::SmallSet<StringRef, 3> FieldsSeen;
+ uint32_t Fields = readVaruint32(Ctx);
+ for (size_t I = 0; I < Fields; ++I) {
+ StringRef FieldName = readString(Ctx);
+ if (!FieldsSeen.insert(FieldName).second)
+ return make_error<GenericBinaryError>(
+ "Producers section does not have unique fields",
+ object_error::parse_failed);
+ std::vector<std::pair<std::string, std::string>> *ProducerVec = nullptr;
+ if (FieldName == "language") {
+ ProducerVec = &ProducerInfo.Languages;
+ } else if (FieldName == "processed-by") {
+ ProducerVec = &ProducerInfo.Tools;
+ } else if (FieldName == "sdk") {
+ ProducerVec = &ProducerInfo.SDKs;
+ } else {
+ return make_error<GenericBinaryError>(
+ "Producers section field is not named one of language, processed-by, "
+ "or sdk",
+ object_error::parse_failed);
+ }
+ uint32_t ValueCount = readVaruint32(Ctx);
+ llvm::SmallSet<StringRef, 8> ProducersSeen;
+ for (size_t J = 0; J < ValueCount; ++J) {
+ StringRef Name = readString(Ctx);
+ StringRef Version = readString(Ctx);
+ if (!ProducersSeen.insert(Name).second) {
+ return make_error<GenericBinaryError>(
+ "Producers section contains repeated producer",
+ object_error::parse_failed);
+ }
+ ProducerVec->emplace_back(Name, Version);
+ }
+ }
+ if (Ctx.Ptr != Ctx.End)
+ return make_error<GenericBinaryError>("Producers section ended prematurely",
+ object_error::parse_failed);
+ return Error::success();
+}
+
+Error WasmObjectFile::parseTargetFeaturesSection(ReadContext &Ctx) {
+ llvm::SmallSet<std::string, 8> FeaturesSeen;
+ uint32_t FeatureCount = readVaruint32(Ctx);
+ for (size_t I = 0; I < FeatureCount; ++I) {
+ wasm::WasmFeatureEntry Feature;
+ Feature.Prefix = readUint8(Ctx);
+ switch (Feature.Prefix) {
+ case wasm::WASM_FEATURE_PREFIX_USED:
+ case wasm::WASM_FEATURE_PREFIX_REQUIRED:
+ case wasm::WASM_FEATURE_PREFIX_DISALLOWED:
+ break;
+ default:
+ return make_error<GenericBinaryError>("Unknown feature policy prefix",
+ object_error::parse_failed);
+ }
+ Feature.Name = readString(Ctx);
+ if (!FeaturesSeen.insert(Feature.Name).second)
+ return make_error<GenericBinaryError>(
+ "Target features section contains repeated feature \"" +
+ Feature.Name + "\"",
+ object_error::parse_failed);
+ TargetFeatures.push_back(Feature);
+ }
+ if (Ctx.Ptr != Ctx.End)
+ return make_error<GenericBinaryError>(
+ "Target features section ended prematurely",
+ object_error::parse_failed);
+ return Error::success();
+}
+
Error WasmObjectFile::parseRelocSection(StringRef Name, ReadContext &Ctx) {
uint32_t SectionIndex = readVaruint32(Ctx);
if (SectionIndex >= Sections.size())
@@ -678,43 +766,49 @@ Error WasmObjectFile::parseRelocSection(StringRef Name, ReadContext &Ctx) {
PreviousOffset = Reloc.Offset;
Reloc.Index = readVaruint32(Ctx);
switch (Reloc.Type) {
- case wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB:
- case wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB:
- case wasm::R_WEBASSEMBLY_TABLE_INDEX_I32:
+ case wasm::R_WASM_FUNCTION_INDEX_LEB:
+ case wasm::R_WASM_TABLE_INDEX_SLEB:
+ case wasm::R_WASM_TABLE_INDEX_I32:
+ case wasm::R_WASM_TABLE_INDEX_REL_SLEB:
if (!isValidFunctionSymbol(Reloc.Index))
return make_error<GenericBinaryError>("Bad relocation function index",
object_error::parse_failed);
break;
- case wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB:
+ case wasm::R_WASM_TYPE_INDEX_LEB:
if (Reloc.Index >= Signatures.size())
return make_error<GenericBinaryError>("Bad relocation type index",
object_error::parse_failed);
break;
- case wasm::R_WEBASSEMBLY_GLOBAL_INDEX_LEB:
- if (!isValidGlobalSymbol(Reloc.Index))
+ case wasm::R_WASM_GLOBAL_INDEX_LEB:
+ // R_WASM_GLOBAL_INDEX_LEB are can be used against function and data
+ // symbols to refer to thier GOT enties.
+ if (!isValidGlobalSymbol(Reloc.Index) &&
+ !isValidDataSymbol(Reloc.Index) &&
+ !isValidFunctionSymbol(Reloc.Index))
return make_error<GenericBinaryError>("Bad relocation global index",
object_error::parse_failed);
break;
- case wasm::R_WEBASSEMBLY_EVENT_INDEX_LEB:
+ case wasm::R_WASM_EVENT_INDEX_LEB:
if (!isValidEventSymbol(Reloc.Index))
return make_error<GenericBinaryError>("Bad relocation event index",
object_error::parse_failed);
break;
- case wasm::R_WEBASSEMBLY_MEMORY_ADDR_LEB:
- case wasm::R_WEBASSEMBLY_MEMORY_ADDR_SLEB:
- case wasm::R_WEBASSEMBLY_MEMORY_ADDR_I32:
+ case wasm::R_WASM_MEMORY_ADDR_LEB:
+ case wasm::R_WASM_MEMORY_ADDR_SLEB:
+ case wasm::R_WASM_MEMORY_ADDR_I32:
+ case wasm::R_WASM_MEMORY_ADDR_REL_SLEB:
if (!isValidDataSymbol(Reloc.Index))
return make_error<GenericBinaryError>("Bad relocation data index",
object_error::parse_failed);
Reloc.Addend = readVarint32(Ctx);
break;
- case wasm::R_WEBASSEMBLY_FUNCTION_OFFSET_I32:
+ case wasm::R_WASM_FUNCTION_OFFSET_I32:
if (!isValidFunctionSymbol(Reloc.Index))
return make_error<GenericBinaryError>("Bad relocation function index",
object_error::parse_failed);
Reloc.Addend = readVarint32(Ctx);
break;
- case wasm::R_WEBASSEMBLY_SECTION_OFFSET_I32:
+ case wasm::R_WASM_SECTION_OFFSET_I32:
if (!isValidSectionSymbol(Reloc.Index))
return make_error<GenericBinaryError>("Bad relocation section index",
object_error::parse_failed);
@@ -730,10 +824,10 @@ Error WasmObjectFile::parseRelocSection(StringRef Name, ReadContext &Ctx) {
// also shouldn't overlap a function/element boundary, but we don't bother
// to check that.
uint64_t Size = 5;
- if (Reloc.Type == wasm::R_WEBASSEMBLY_TABLE_INDEX_I32 ||
- Reloc.Type == wasm::R_WEBASSEMBLY_MEMORY_ADDR_I32 ||
- Reloc.Type == wasm::R_WEBASSEMBLY_SECTION_OFFSET_I32 ||
- Reloc.Type == wasm::R_WEBASSEMBLY_FUNCTION_OFFSET_I32)
+ if (Reloc.Type == wasm::R_WASM_TABLE_INDEX_I32 ||
+ Reloc.Type == wasm::R_WASM_MEMORY_ADDR_I32 ||
+ Reloc.Type == wasm::R_WASM_SECTION_OFFSET_I32 ||
+ Reloc.Type == wasm::R_WASM_FUNCTION_OFFSET_I32)
Size = 4;
if (Reloc.Offset + Size > EndOffset)
return make_error<GenericBinaryError>("Bad relocation offset",
@@ -757,6 +851,12 @@ Error WasmObjectFile::parseCustomSection(WasmSection &Sec, ReadContext &Ctx) {
} else if (Sec.Name == "linking") {
if (Error Err = parseLinkingSection(Ctx))
return Err;
+ } else if (Sec.Name == "producers") {
+ if (Error Err = parseProducersSection(Ctx))
+ return Err;
+ } else if (Sec.Name == "target_features") {
+ if (Error Err = parseTargetFeaturesSection(Ctx))
+ return Err;
} else if (Sec.Name.startswith("reloc.")) {
if (Error Err = parseRelocSection(Sec.Name, Ctx))
return Err;
@@ -799,7 +899,7 @@ Error WasmObjectFile::parseTypeSection(ReadContext &Ctx) {
Error WasmObjectFile::parseImportSection(ReadContext &Ctx) {
uint32_t Count = readVaruint32(Ctx);
Imports.reserve(Count);
- for (uint32_t i = 0; i < Count; i++) {
+ for (uint32_t I = 0; I < Count; I++) {
wasm::WasmImport Im;
Im.Module = readString(Ctx);
Im.Field = readString(Ctx);
@@ -925,7 +1025,7 @@ Error WasmObjectFile::parseEventSection(ReadContext &Ctx) {
Error WasmObjectFile::parseExportSection(ReadContext &Ctx) {
uint32_t Count = readVaruint32(Ctx);
Exports.reserve(Count);
- for (uint32_t i = 0; i < Count; i++) {
+ for (uint32_t I = 0; I < Count; I++) {
wasm::WasmExport Ex;
Ex.Name = readString(Ctx);
Ex.Kind = readUint8(Ctx);
@@ -1010,6 +1110,12 @@ wasm::WasmFunction &WasmObjectFile::getDefinedFunction(uint32_t Index) {
return Functions[Index - NumImportedFunctions];
}
+const wasm::WasmFunction &
+WasmObjectFile::getDefinedFunction(uint32_t Index) const {
+ assert(isDefinedFunctionIndex(Index));
+ return Functions[Index - NumImportedFunctions];
+}
+
wasm::WasmGlobal &WasmObjectFile::getDefinedGlobal(uint32_t Index) {
assert(isDefinedGlobalIndex(Index));
return Globals[Index - NumImportedGlobals];
@@ -1097,12 +1203,22 @@ Error WasmObjectFile::parseElemSection(ReadContext &Ctx) {
Error WasmObjectFile::parseDataSection(ReadContext &Ctx) {
DataSection = Sections.size();
uint32_t Count = readVaruint32(Ctx);
+ if (DataCount && Count != DataCount.getValue())
+ return make_error<GenericBinaryError>(
+ "Number of data segments does not match DataCount section");
DataSegments.reserve(Count);
while (Count--) {
WasmSegment Segment;
- Segment.Data.MemoryIndex = readVaruint32(Ctx);
- if (Error Err = readInitExpr(Segment.Data.Offset, Ctx))
- return Err;
+ Segment.Data.InitFlags = readVaruint32(Ctx);
+ Segment.Data.MemoryIndex = (Segment.Data.InitFlags & wasm::WASM_SEGMENT_HAS_MEMINDEX)
+ ? readVaruint32(Ctx) : 0;
+ if ((Segment.Data.InitFlags & wasm::WASM_SEGMENT_IS_PASSIVE) == 0) {
+ if (Error Err = readInitExpr(Segment.Data.Offset, Ctx))
+ return Err;
+ } else {
+ Segment.Data.Offset.Opcode = wasm::WASM_OPCODE_I32_CONST;
+ Segment.Data.Offset.Value.Int32 = 0;
+ }
uint32_t Size = readVaruint32(Ctx);
if (Size > (size_t)(Ctx.End - Ctx.Ptr))
return make_error<GenericBinaryError>("Invalid segment size",
@@ -1111,7 +1227,7 @@ Error WasmObjectFile::parseDataSection(ReadContext &Ctx) {
// The rest of these Data fields are set later, when reading in the linking
// metadata section.
Segment.Data.Alignment = 0;
- Segment.Data.Flags = 0;
+ Segment.Data.LinkerFlags = 0;
Segment.Data.Comdat = UINT32_MAX;
Segment.SectionOffset = Ctx.Ptr - Ctx.Start;
Ctx.Ptr += Size;
@@ -1123,15 +1239,16 @@ Error WasmObjectFile::parseDataSection(ReadContext &Ctx) {
return Error::success();
}
-const uint8_t *WasmObjectFile::getPtr(size_t Offset) const {
- return reinterpret_cast<const uint8_t *>(getData().data() + Offset);
+Error WasmObjectFile::parseDataCountSection(ReadContext &Ctx) {
+ DataCount = readVaruint32(Ctx);
+ return Error::success();
}
const wasm::WasmObjectHeader &WasmObjectFile::getHeader() const {
return Header;
}
-void WasmObjectFile::moveSymbolNext(DataRefImpl &Symb) const { Symb.d.a++; }
+void WasmObjectFile::moveSymbolNext(DataRefImpl &Symb) const { Symb.d.b++; }
uint32_t WasmObjectFile::getSymbolFlags(DataRefImpl Symb) const {
uint32_t Result = SymbolRef::SF_None;
@@ -1153,18 +1270,20 @@ uint32_t WasmObjectFile::getSymbolFlags(DataRefImpl Symb) const {
basic_symbol_iterator WasmObjectFile::symbol_begin() const {
DataRefImpl Ref;
- Ref.d.a = 0;
+ Ref.d.a = 1; // Arbitrary non-zero value so that Ref.p is non-null
+ Ref.d.b = 0; // Symbol index
return BasicSymbolRef(Ref, this);
}
basic_symbol_iterator WasmObjectFile::symbol_end() const {
DataRefImpl Ref;
- Ref.d.a = Symbols.size();
+ Ref.d.a = 1; // Arbitrary non-zero value so that Ref.p is non-null
+ Ref.d.b = Symbols.size(); // Symbol index
return BasicSymbolRef(Ref, this);
}
const WasmSymbol &WasmObjectFile::getWasmSymbol(const DataRefImpl &Symb) const {
- return Symbols[Symb.d.a];
+ return Symbols[Symb.d.b];
}
const WasmSymbol &WasmObjectFile::getWasmSymbol(const SymbolRef &Symb) const {
@@ -1176,7 +1295,12 @@ Expected<StringRef> WasmObjectFile::getSymbolName(DataRefImpl Symb) const {
}
Expected<uint64_t> WasmObjectFile::getSymbolAddress(DataRefImpl Symb) const {
- return getSymbolValue(Symb);
+ auto &Sym = getWasmSymbol(Symb);
+ if (Sym.Info.Kind == wasm::WASM_SYMBOL_TYPE_FUNCTION &&
+ isDefinedFunctionIndex(Sym.Info.ElementIndex))
+ return getDefinedFunction(Sym.Info.ElementIndex).CodeSectionOffset;
+ else
+ return getSymbolValue(Symb);
}
uint64_t WasmObjectFile::getWasmSymbolValue(const WasmSymbol &Sym) const {
@@ -1265,13 +1389,11 @@ WasmObjectFile::getSymbolSection(DataRefImpl Symb) const {
void WasmObjectFile::moveSectionNext(DataRefImpl &Sec) const { Sec.d.a++; }
-std::error_code WasmObjectFile::getSectionName(DataRefImpl Sec,
- StringRef &Res) const {
+Expected<StringRef> WasmObjectFile::getSectionName(DataRefImpl Sec) const {
const WasmSection &S = Sections[Sec.d.a];
#define ECase(X) \
case wasm::WASM_SEC_##X: \
- Res = #X; \
- break
+ return #X;
switch (S.Type) {
ECase(TYPE);
ECase(IMPORT);
@@ -1285,14 +1407,13 @@ std::error_code WasmObjectFile::getSectionName(DataRefImpl Sec,
ECase(ELEM);
ECase(CODE);
ECase(DATA);
+ ECase(DATACOUNT);
case wasm::WASM_SEC_CUSTOM:
- Res = S.Name;
- break;
+ return S.Name;
default:
- return object_error::invalid_section_index;
+ return createStringError(object_error::invalid_section_index, "");
}
#undef ECase
- return std::error_code();
}
uint64_t WasmObjectFile::getSectionAddress(DataRefImpl Sec) const { return 0; }
@@ -1306,14 +1427,12 @@ uint64_t WasmObjectFile::getSectionSize(DataRefImpl Sec) const {
return S.Content.size();
}
-std::error_code WasmObjectFile::getSectionContents(DataRefImpl Sec,
- StringRef &Res) const {
+Expected<ArrayRef<uint8_t>>
+WasmObjectFile::getSectionContents(DataRefImpl Sec) const {
const WasmSection &S = Sections[Sec.d.a];
// This will never fail since wasm sections can never be empty (user-sections
// must have a name and non-user sections each have a defined structure).
- Res = StringRef(reinterpret_cast<const char *>(S.Content.data()),
- S.Content.size());
- return std::error_code();
+ return S.Content;
}
uint64_t WasmObjectFile::getSectionAlignment(DataRefImpl Sec) const {
@@ -1362,11 +1481,11 @@ uint64_t WasmObjectFile::getRelocationOffset(DataRefImpl Ref) const {
symbol_iterator WasmObjectFile::getRelocationSymbol(DataRefImpl Ref) const {
const wasm::WasmRelocation &Rel = getWasmRelocation(Ref);
- if (Rel.Type == wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB)
+ if (Rel.Type == wasm::R_WASM_TYPE_INDEX_LEB)
return symbol_end();
DataRefImpl Sym;
- Sym.d.a = Rel.Index;
- Sym.d.b = 0;
+ Sym.d.a = 1;
+ Sym.d.b = Rel.Index;
return symbol_iterator(SymbolRef(Sym, this));
}
@@ -1453,7 +1572,8 @@ int WasmSectionOrderChecker::getSectionOrder(unsigned ID,
.StartsWith("reloc.", WASM_SEC_ORDER_RELOC)
.Case("name", WASM_SEC_ORDER_NAME)
.Case("producers", WASM_SEC_ORDER_PRODUCERS)
- .Default(-1);
+ .Case("target_features", WASM_SEC_ORDER_TARGET_FEATURES)
+ .Default(WASM_SEC_ORDER_NONE);
case wasm::WASM_SEC_TYPE:
return WASM_SEC_ORDER_TYPE;
case wasm::WASM_SEC_IMPORT:
@@ -1481,19 +1601,73 @@ int WasmSectionOrderChecker::getSectionOrder(unsigned ID,
case wasm::WASM_SEC_EVENT:
return WASM_SEC_ORDER_EVENT;
default:
- llvm_unreachable("invalid section");
+ return WASM_SEC_ORDER_NONE;
}
}
+// Represents the edges in a directed graph where any node B reachable from node
+// A is not allowed to appear before A in the section ordering, but may appear
+// afterward.
+int WasmSectionOrderChecker::DisallowedPredecessors[WASM_NUM_SEC_ORDERS][WASM_NUM_SEC_ORDERS] = {
+ {}, // WASM_SEC_ORDER_NONE
+ {WASM_SEC_ORDER_TYPE, WASM_SEC_ORDER_IMPORT}, // WASM_SEC_ORDER_TYPE,
+ {WASM_SEC_ORDER_IMPORT, WASM_SEC_ORDER_FUNCTION}, // WASM_SEC_ORDER_IMPORT,
+ {WASM_SEC_ORDER_FUNCTION, WASM_SEC_ORDER_TABLE}, // WASM_SEC_ORDER_FUNCTION,
+ {WASM_SEC_ORDER_TABLE, WASM_SEC_ORDER_MEMORY}, // WASM_SEC_ORDER_TABLE,
+ {WASM_SEC_ORDER_MEMORY, WASM_SEC_ORDER_GLOBAL}, // WASM_SEC_ORDER_MEMORY,
+ {WASM_SEC_ORDER_GLOBAL, WASM_SEC_ORDER_EVENT}, // WASM_SEC_ORDER_GLOBAL,
+ {WASM_SEC_ORDER_EVENT, WASM_SEC_ORDER_EXPORT}, // WASM_SEC_ORDER_EVENT,
+ {WASM_SEC_ORDER_EXPORT, WASM_SEC_ORDER_START}, // WASM_SEC_ORDER_EXPORT,
+ {WASM_SEC_ORDER_START, WASM_SEC_ORDER_ELEM}, // WASM_SEC_ORDER_START,
+ {WASM_SEC_ORDER_ELEM, WASM_SEC_ORDER_DATACOUNT}, // WASM_SEC_ORDER_ELEM,
+ {WASM_SEC_ORDER_DATACOUNT, WASM_SEC_ORDER_CODE}, // WASM_SEC_ORDER_DATACOUNT,
+ {WASM_SEC_ORDER_CODE, WASM_SEC_ORDER_DATA}, // WASM_SEC_ORDER_CODE,
+ {WASM_SEC_ORDER_DATA, WASM_SEC_ORDER_LINKING}, // WASM_SEC_ORDER_DATA,
+
+ // Custom Sections
+ {WASM_SEC_ORDER_DYLINK, WASM_SEC_ORDER_TYPE}, // WASM_SEC_ORDER_DYLINK,
+ {WASM_SEC_ORDER_LINKING, WASM_SEC_ORDER_RELOC, WASM_SEC_ORDER_NAME}, // WASM_SEC_ORDER_LINKING,
+ {}, // WASM_SEC_ORDER_RELOC (can be repeated),
+ {WASM_SEC_ORDER_NAME, WASM_SEC_ORDER_PRODUCERS}, // WASM_SEC_ORDER_NAME,
+ {WASM_SEC_ORDER_PRODUCERS, WASM_SEC_ORDER_TARGET_FEATURES}, // WASM_SEC_ORDER_PRODUCERS,
+ {WASM_SEC_ORDER_TARGET_FEATURES} // WASM_SEC_ORDER_TARGET_FEATURES
+};
+
bool WasmSectionOrderChecker::isValidSectionOrder(unsigned ID,
StringRef CustomSectionName) {
int Order = getSectionOrder(ID, CustomSectionName);
- if (Order == -1) // Skip unknown sections
+ if (Order == WASM_SEC_ORDER_NONE)
return true;
- // There can be multiple "reloc." sections. Otherwise there shouldn't be any
- // duplicate section orders.
- bool IsValid = (LastOrder == Order && Order == WASM_SEC_ORDER_RELOC) ||
- LastOrder < Order;
- LastOrder = Order;
- return IsValid;
+
+ // Disallowed predecessors we need to check for
+ SmallVector<int, WASM_NUM_SEC_ORDERS> WorkList;
+
+ // Keep track of completed checks to avoid repeating work
+ bool Checked[WASM_NUM_SEC_ORDERS] = {};
+
+ int Curr = Order;
+ while (true) {
+ // Add new disallowed predecessors to work list
+ for (size_t I = 0;; ++I) {
+ int Next = DisallowedPredecessors[Curr][I];
+ if (Next == WASM_SEC_ORDER_NONE)
+ break;
+ if (Checked[Next])
+ continue;
+ WorkList.push_back(Next);
+ Checked[Next] = true;
+ }
+
+ if (WorkList.empty())
+ break;
+
+ // Consider next disallowed predecessor
+ Curr = WorkList.pop_back_val();
+ if (Seen[Curr])
+ return false;
+ }
+
+ // Have not seen any disallowed predecessors
+ Seen[Order] = true;
+ return true;
}
diff --git a/lib/Object/WindowsMachineFlag.cpp b/lib/Object/WindowsMachineFlag.cpp
new file mode 100644
index 000000000000..f7f2b20ae1a2
--- /dev/null
+++ b/lib/Object/WindowsMachineFlag.cpp
@@ -0,0 +1,44 @@
+//===- WindowsMachineFlag.cpp ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Functions for implementing the /machine: flag.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Object/WindowsMachineFlag.h"
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/BinaryFormat/COFF.h"
+
+using namespace llvm;
+
+// Returns /machine's value.
+COFF::MachineTypes llvm::getMachineType(StringRef S) {
+ return StringSwitch<COFF::MachineTypes>(S.lower())
+ .Cases("x64", "amd64", COFF::IMAGE_FILE_MACHINE_AMD64)
+ .Cases("x86", "i386", COFF::IMAGE_FILE_MACHINE_I386)
+ .Case("arm", COFF::IMAGE_FILE_MACHINE_ARMNT)
+ .Case("arm64", COFF::IMAGE_FILE_MACHINE_ARM64)
+ .Default(COFF::IMAGE_FILE_MACHINE_UNKNOWN);
+}
+
+StringRef llvm::machineToStr(COFF::MachineTypes MT) {
+ switch (MT) {
+ case COFF::IMAGE_FILE_MACHINE_ARMNT:
+ return "arm";
+ case COFF::IMAGE_FILE_MACHINE_ARM64:
+ return "arm64";
+ case COFF::IMAGE_FILE_MACHINE_AMD64:
+ return "x64";
+ case COFF::IMAGE_FILE_MACHINE_I386:
+ return "x86";
+ default:
+ llvm_unreachable("unknown machine type");
+ }
+}
diff --git a/lib/Object/WindowsResource.cpp b/lib/Object/WindowsResource.cpp
index 65413dd8bea1..d76e1231684c 100644
--- a/lib/Object/WindowsResource.cpp
+++ b/lib/Object/WindowsResource.cpp
@@ -1,9 +1,8 @@
//===-- WindowsResource.cpp -------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -16,6 +15,7 @@
#include "llvm/Support/FileOutputBuffer.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/ScopedPrinter.h"
#include <ctime>
#include <queue>
#include <system_error>
@@ -46,11 +46,12 @@ WindowsResource::WindowsResource(MemoryBufferRef Source)
support::little);
}
+// static
Expected<std::unique_ptr<WindowsResource>>
WindowsResource::createWindowsResource(MemoryBufferRef Source) {
if (Source.getBufferSize() < WIN_RES_MAGIC_SIZE + WIN_RES_NULL_ENTRY_SIZE)
return make_error<GenericBinaryError>(
- "File too small to be a resource file",
+ Source.getBufferIdentifier() + ": too small to be a resource file",
object_error::invalid_file_type);
std::unique_ptr<WindowsResource> Ret(new WindowsResource(Source));
return std::move(Ret);
@@ -58,14 +59,14 @@ WindowsResource::createWindowsResource(MemoryBufferRef Source) {
Expected<ResourceEntryRef> WindowsResource::getHeadEntry() {
if (BBS.getLength() < sizeof(WinResHeaderPrefix) + sizeof(WinResHeaderSuffix))
- return make_error<EmptyResError>(".res contains no entries",
+ return make_error<EmptyResError>(getFileName() + " contains no entries",
object_error::unexpected_eof);
return ResourceEntryRef::create(BinaryStreamRef(BBS), this);
}
ResourceEntryRef::ResourceEntryRef(BinaryStreamRef Ref,
const WindowsResource *Owner)
- : Reader(Ref) {}
+ : Reader(Ref), Owner(Owner) {}
Expected<ResourceEntryRef>
ResourceEntryRef::create(BinaryStreamRef BSR, const WindowsResource *Owner) {
@@ -108,7 +109,8 @@ Error ResourceEntryRef::loadNext() {
RETURN_IF_ERROR(Reader.readObject(Prefix));
if (Prefix->HeaderSize < MIN_HEADER_SIZE)
- return make_error<GenericBinaryError>("Header size is too small.",
+ return make_error<GenericBinaryError>(Owner->getFileName() +
+ ": header size too small",
object_error::parse_failed);
RETURN_IF_ERROR(readStringOrId(Reader, TypeID, Type, IsStringType));
@@ -128,7 +130,78 @@ Error ResourceEntryRef::loadNext() {
WindowsResourceParser::WindowsResourceParser() : Root(false) {}
-Error WindowsResourceParser::parse(WindowsResource *WR) {
+void printResourceTypeName(uint16_t TypeID, raw_ostream &OS) {
+ switch (TypeID) {
+ case 1: OS << "CURSOR (ID 1)"; break;
+ case 2: OS << "BITMAP (ID 2)"; break;
+ case 3: OS << "ICON (ID 3)"; break;
+ case 4: OS << "MENU (ID 4)"; break;
+ case 5: OS << "DIALOG (ID 5)"; break;
+ case 6: OS << "STRINGTABLE (ID 6)"; break;
+ case 7: OS << "FONTDIR (ID 7)"; break;
+ case 8: OS << "FONT (ID 8)"; break;
+ case 9: OS << "ACCELERATOR (ID 9)"; break;
+ case 10: OS << "RCDATA (ID 10)"; break;
+ case 11: OS << "MESSAGETABLE (ID 11)"; break;
+ case 12: OS << "GROUP_CURSOR (ID 12)"; break;
+ case 14: OS << "GROUP_ICON (ID 14)"; break;
+ case 16: OS << "VERSIONINFO (ID 16)"; break;
+ case 17: OS << "DLGINCLUDE (ID 17)"; break;
+ case 19: OS << "PLUGPLAY (ID 19)"; break;
+ case 20: OS << "VXD (ID 20)"; break;
+ case 21: OS << "ANICURSOR (ID 21)"; break;
+ case 22: OS << "ANIICON (ID 22)"; break;
+ case 23: OS << "HTML (ID 23)"; break;
+ case 24: OS << "MANIFEST (ID 24)"; break;
+ default: OS << "ID " << TypeID; break;
+ }
+}
+
+static bool convertUTF16LEToUTF8String(ArrayRef<UTF16> Src, std::string &Out) {
+ if (!sys::IsBigEndianHost)
+ return convertUTF16ToUTF8String(Src, Out);
+
+ std::vector<UTF16> EndianCorrectedSrc;
+ EndianCorrectedSrc.resize(Src.size() + 1);
+ llvm::copy(Src, EndianCorrectedSrc.begin() + 1);
+ EndianCorrectedSrc[0] = UNI_UTF16_BYTE_ORDER_MARK_SWAPPED;
+ return convertUTF16ToUTF8String(makeArrayRef(EndianCorrectedSrc), Out);
+}
+
+static std::string makeDuplicateResourceError(
+ const ResourceEntryRef &Entry, StringRef File1, StringRef File2) {
+ std::string Ret;
+ raw_string_ostream OS(Ret);
+
+ OS << "duplicate resource:";
+
+ OS << " type ";
+ if (Entry.checkTypeString()) {
+ std::string UTF8;
+ if (!convertUTF16LEToUTF8String(Entry.getTypeString(), UTF8))
+ UTF8 = "(failed conversion from UTF16)";
+ OS << '\"' << UTF8 << '\"';
+ } else
+ printResourceTypeName(Entry.getTypeID(), OS);
+
+ OS << "/name ";
+ if (Entry.checkNameString()) {
+ std::string UTF8;
+ if (!convertUTF16LEToUTF8String(Entry.getNameString(), UTF8))
+ UTF8 = "(failed conversion from UTF16)";
+ OS << '\"' << UTF8 << '\"';
+ } else {
+ OS << "ID " << Entry.getNameID();
+ }
+
+ OS << "/language " << Entry.getLanguage() << ", in " << File1 << " and in "
+ << File2;
+
+ return OS.str();
+}
+
+Error WindowsResourceParser::parse(WindowsResource *WR,
+ std::vector<std::string> &Duplicates) {
auto EntryOrErr = WR->getHeadEntry();
if (!EntryOrErr) {
auto E = EntryOrErr.takeError();
@@ -153,7 +226,14 @@ Error WindowsResourceParser::parse(WindowsResource *WR) {
bool IsNewTypeString = false;
bool IsNewNameString = false;
- Root.addEntry(Entry, IsNewTypeString, IsNewNameString);
+ TreeNode* Node;
+ bool IsNewNode = Root.addEntry(Entry, InputFilenames.size(),
+ IsNewTypeString, IsNewNameString, Node);
+ InputFilenames.push_back(WR->getFileName());
+ if (!IsNewNode) {
+ Duplicates.push_back(makeDuplicateResourceError(
+ Entry, InputFilenames[Node->Origin], WR->getFileName()));
+ }
if (IsNewTypeString)
StringTable.push_back(Entry.getTypeString());
@@ -172,12 +252,14 @@ void WindowsResourceParser::printTree(raw_ostream &OS) const {
Root.print(Writer, "Resource Tree");
}
-void WindowsResourceParser::TreeNode::addEntry(const ResourceEntryRef &Entry,
+bool WindowsResourceParser::TreeNode::addEntry(const ResourceEntryRef &Entry,
+ uint32_t Origin,
bool &IsNewTypeString,
- bool &IsNewNameString) {
+ bool &IsNewNameString,
+ TreeNode *&Result) {
TreeNode &TypeNode = addTypeNode(Entry, IsNewTypeString);
TreeNode &NameNode = TypeNode.addNameNode(Entry, IsNewNameString);
- NameNode.addLanguageNode(Entry);
+ return NameNode.addLanguageNode(Entry, Origin, Result);
}
WindowsResourceParser::TreeNode::TreeNode(bool IsStringNode) {
@@ -187,10 +269,11 @@ WindowsResourceParser::TreeNode::TreeNode(bool IsStringNode) {
WindowsResourceParser::TreeNode::TreeNode(uint16_t MajorVersion,
uint16_t MinorVersion,
- uint32_t Characteristics)
+ uint32_t Characteristics,
+ uint32_t Origin)
: IsDataNode(true), MajorVersion(MajorVersion), MinorVersion(MinorVersion),
- Characteristics(Characteristics) {
- DataIndex = DataCount++;
+ Characteristics(Characteristics), Origin(Origin) {
+ DataIndex = DataCount++;
}
std::unique_ptr<WindowsResourceParser::TreeNode>
@@ -206,44 +289,52 @@ WindowsResourceParser::TreeNode::createIDNode() {
std::unique_ptr<WindowsResourceParser::TreeNode>
WindowsResourceParser::TreeNode::createDataNode(uint16_t MajorVersion,
uint16_t MinorVersion,
- uint32_t Characteristics) {
+ uint32_t Characteristics,
+ uint32_t Origin) {
return std::unique_ptr<TreeNode>(
- new TreeNode(MajorVersion, MinorVersion, Characteristics));
+ new TreeNode(MajorVersion, MinorVersion, Characteristics, Origin));
}
WindowsResourceParser::TreeNode &
WindowsResourceParser::TreeNode::addTypeNode(const ResourceEntryRef &Entry,
bool &IsNewTypeString) {
if (Entry.checkTypeString())
- return addChild(Entry.getTypeString(), IsNewTypeString);
+ return addNameChild(Entry.getTypeString(), IsNewTypeString);
else
- return addChild(Entry.getTypeID());
+ return addIDChild(Entry.getTypeID());
}
WindowsResourceParser::TreeNode &
WindowsResourceParser::TreeNode::addNameNode(const ResourceEntryRef &Entry,
bool &IsNewNameString) {
if (Entry.checkNameString())
- return addChild(Entry.getNameString(), IsNewNameString);
+ return addNameChild(Entry.getNameString(), IsNewNameString);
else
- return addChild(Entry.getNameID());
+ return addIDChild(Entry.getNameID());
}
-WindowsResourceParser::TreeNode &
-WindowsResourceParser::TreeNode::addLanguageNode(
- const ResourceEntryRef &Entry) {
- return addChild(Entry.getLanguage(), true, Entry.getMajorVersion(),
- Entry.getMinorVersion(), Entry.getCharacteristics());
+bool WindowsResourceParser::TreeNode::addLanguageNode(
+ const ResourceEntryRef &Entry, uint32_t Origin, TreeNode *&Result) {
+ return addDataChild(Entry.getLanguage(), Entry.getMajorVersion(),
+ Entry.getMinorVersion(), Entry.getCharacteristics(),
+ Origin, Result);
}
-WindowsResourceParser::TreeNode &WindowsResourceParser::TreeNode::addChild(
- uint32_t ID, bool IsDataNode, uint16_t MajorVersion, uint16_t MinorVersion,
- uint32_t Characteristics) {
+bool WindowsResourceParser::TreeNode::addDataChild(
+ uint32_t ID, uint16_t MajorVersion, uint16_t MinorVersion,
+ uint32_t Characteristics, uint32_t Origin, TreeNode *&Result) {
+ auto NewChild =
+ createDataNode(MajorVersion, MinorVersion, Characteristics, Origin);
+ auto ElementInserted = IDChildren.emplace(ID, std::move(NewChild));
+ Result = ElementInserted.first->second.get();
+ return ElementInserted.second;
+}
+
+WindowsResourceParser::TreeNode &WindowsResourceParser::TreeNode::addIDChild(
+ uint32_t ID) {
auto Child = IDChildren.find(ID);
if (Child == IDChildren.end()) {
- auto NewChild =
- IsDataNode ? createDataNode(MajorVersion, MinorVersion, Characteristics)
- : createIDNode();
+ auto NewChild = createIDNode();
WindowsResourceParser::TreeNode &Node = *NewChild;
IDChildren.emplace(ID, std::move(NewChild));
return Node;
@@ -252,19 +343,10 @@ WindowsResourceParser::TreeNode &WindowsResourceParser::TreeNode::addChild(
}
WindowsResourceParser::TreeNode &
-WindowsResourceParser::TreeNode::addChild(ArrayRef<UTF16> NameRef,
- bool &IsNewString) {
+WindowsResourceParser::TreeNode::addNameChild(ArrayRef<UTF16> NameRef,
+ bool &IsNewString) {
std::string NameString;
- ArrayRef<UTF16> CorrectedName;
- std::vector<UTF16> EndianCorrectedName;
- if (sys::IsBigEndianHost) {
- EndianCorrectedName.resize(NameRef.size() + 1);
- llvm::copy(NameRef, EndianCorrectedName.begin() + 1);
- EndianCorrectedName[0] = UNI_UTF16_BYTE_ORDER_MARK_SWAPPED;
- CorrectedName = makeArrayRef(EndianCorrectedName);
- } else
- CorrectedName = NameRef;
- convertUTF16ToUTF8String(CorrectedName, NameString);
+ convertUTF16LEToUTF8String(NameRef, NameString);
auto Child = StringChildren.find(NameString);
if (Child == StringChildren.end()) {
@@ -318,13 +400,13 @@ class WindowsResourceCOFFWriter {
public:
WindowsResourceCOFFWriter(COFF::MachineTypes MachineType,
const WindowsResourceParser &Parser, Error &E);
- std::unique_ptr<MemoryBuffer> write();
+ std::unique_ptr<MemoryBuffer> write(uint32_t TimeDateStamp);
private:
void performFileLayout();
void performSectionOneLayout();
void performSectionTwoLayout();
- void writeCOFFHeader();
+ void writeCOFFHeader(uint32_t TimeDateStamp);
void writeFirstSectionHeader();
void writeSecondSectionHeader();
void writeFirstSection();
@@ -360,7 +442,8 @@ WindowsResourceCOFFWriter::WindowsResourceCOFFWriter(
Data(Parser.getData()), StringTable(Parser.getStringTable()) {
performFileLayout();
- OutputBuffer = WritableMemoryBuffer::getNewMemBuffer(FileSize);
+ OutputBuffer = WritableMemoryBuffer::getNewMemBuffer(
+ FileSize, "internal .obj file created from .res files");
}
void WindowsResourceCOFFWriter::performFileLayout() {
@@ -417,17 +500,11 @@ void WindowsResourceCOFFWriter::performSectionTwoLayout() {
FileSize = alignTo(FileSize, SECTION_ALIGNMENT);
}
-static std::time_t getTime() {
- std::time_t Now = time(nullptr);
- if (Now < 0 || !isUInt<32>(Now))
- return UINT32_MAX;
- return Now;
-}
-
-std::unique_ptr<MemoryBuffer> WindowsResourceCOFFWriter::write() {
+std::unique_ptr<MemoryBuffer>
+WindowsResourceCOFFWriter::write(uint32_t TimeDateStamp) {
BufferStart = OutputBuffer->getBufferStart();
- writeCOFFHeader();
+ writeCOFFHeader(TimeDateStamp);
writeFirstSectionHeader();
writeSecondSectionHeader();
writeFirstSection();
@@ -438,16 +515,17 @@ std::unique_ptr<MemoryBuffer> WindowsResourceCOFFWriter::write() {
return std::move(OutputBuffer);
}
-void WindowsResourceCOFFWriter::writeCOFFHeader() {
+void WindowsResourceCOFFWriter::writeCOFFHeader(uint32_t TimeDateStamp) {
// Write the COFF header.
auto *Header = reinterpret_cast<coff_file_header *>(BufferStart);
Header->Machine = MachineType;
Header->NumberOfSections = 2;
- Header->TimeDateStamp = getTime();
+ Header->TimeDateStamp = TimeDateStamp;
Header->PointerToSymbolTable = SymbolTableOffset;
- // One symbol for every resource plus 2 for each section and @feat.00
+ // One symbol for every resource plus 2 for each section and 1 for @feat.00
Header->NumberOfSymbols = Data.size() + 5;
Header->SizeOfOptionalHeader = 0;
+ // cvtres.exe sets 32BIT_MACHINE even for 64-bit machine types. Match it.
Header->Characteristics = COFF::IMAGE_FILE_32BIT_MACHINE;
}
@@ -712,12 +790,13 @@ void WindowsResourceCOFFWriter::writeFirstSectionRelocations() {
Expected<std::unique_ptr<MemoryBuffer>>
writeWindowsResourceCOFF(COFF::MachineTypes MachineType,
- const WindowsResourceParser &Parser) {
+ const WindowsResourceParser &Parser,
+ uint32_t TimeDateStamp) {
Error E = Error::success();
WindowsResourceCOFFWriter Writer(MachineType, Parser, E);
if (E)
return std::move(E);
- return Writer.write();
+ return Writer.write(TimeDateStamp);
}
} // namespace object
diff --git a/lib/Object/XCOFFObjectFile.cpp b/lib/Object/XCOFFObjectFile.cpp
new file mode 100644
index 000000000000..602b7357986a
--- /dev/null
+++ b/lib/Object/XCOFFObjectFile.cpp
@@ -0,0 +1,584 @@
+//===--- XCOFFObjectFile.cpp - XCOFF object file implementation -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the XCOFFObjectFile class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Object/XCOFFObjectFile.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/Support/BinaryStreamReader.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include <cstddef>
+#include <cstring>
+
+namespace llvm {
+namespace object {
+
+// Checks that [Ptr, Ptr + Size) bytes fall inside the memory buffer
+// 'M'. Returns a pointer to the underlying object on success.
+template <typename T>
+static Expected<const T *> getObject(MemoryBufferRef M, const void *Ptr,
+ const uint64_t Size = sizeof(T)) {
+ uintptr_t Addr = uintptr_t(Ptr);
+ if (std::error_code EC = Binary::checkOffset(M, Addr, Size))
+ return errorCodeToError(EC);
+ return reinterpret_cast<const T *>(Addr);
+}
+
+static uintptr_t getWithOffset(uintptr_t Base, ptrdiff_t Offset) {
+ return reinterpret_cast<uintptr_t>(reinterpret_cast<const char *>(Base) +
+ Offset);
+}
+
+template <typename T> static const T *viewAs(uintptr_t in) {
+ return reinterpret_cast<const T *>(in);
+}
+
+static StringRef generateStringRef(const char *Name, uint64_t Size) {
+ auto NulCharPtr = static_cast<const char *>(memchr(Name, '\0', Size));
+ return NulCharPtr ? StringRef(Name, NulCharPtr - Name)
+ : StringRef(Name, Size);
+}
+
+void XCOFFObjectFile::checkSectionAddress(uintptr_t Addr,
+ uintptr_t TableAddress) const {
+ if (Addr < TableAddress)
+ report_fatal_error("Section header outside of section header table.");
+
+ uintptr_t Offset = Addr - TableAddress;
+ if (Offset >= getSectionHeaderSize() * getNumberOfSections())
+ report_fatal_error("Section header outside of section header table.");
+
+ if (Offset % getSectionHeaderSize() != 0)
+ report_fatal_error(
+ "Section header pointer does not point to a valid section header.");
+}
+
+const XCOFFSectionHeader32 *
+XCOFFObjectFile::toSection32(DataRefImpl Ref) const {
+ assert(!is64Bit() && "32-bit interface called on 64-bit object file.");
+#ifndef NDEBUG
+ checkSectionAddress(Ref.p, getSectionHeaderTableAddress());
+#endif
+ return viewAs<XCOFFSectionHeader32>(Ref.p);
+}
+
+const XCOFFSectionHeader64 *
+XCOFFObjectFile::toSection64(DataRefImpl Ref) const {
+ assert(is64Bit() && "64-bit interface called on a 32-bit object file.");
+#ifndef NDEBUG
+ checkSectionAddress(Ref.p, getSectionHeaderTableAddress());
+#endif
+ return viewAs<XCOFFSectionHeader64>(Ref.p);
+}
+
+const XCOFFSymbolEntry *XCOFFObjectFile::toSymbolEntry(DataRefImpl Ref) const {
+ assert(!is64Bit() && "Symbol table support not implemented for 64-bit.");
+ assert(Ref.p != 0 && "Symbol table pointer can not be nullptr!");
+ auto SymEntPtr = viewAs<XCOFFSymbolEntry>(Ref.p);
+ return SymEntPtr;
+}
+
+const XCOFFFileHeader32 *XCOFFObjectFile::fileHeader32() const {
+ assert(!is64Bit() && "32-bit interface called on 64-bit object file.");
+ return static_cast<const XCOFFFileHeader32 *>(FileHeader);
+}
+
+const XCOFFFileHeader64 *XCOFFObjectFile::fileHeader64() const {
+ assert(is64Bit() && "64-bit interface called on a 32-bit object file.");
+ return static_cast<const XCOFFFileHeader64 *>(FileHeader);
+}
+
+const XCOFFSectionHeader32 *
+XCOFFObjectFile::sectionHeaderTable32() const {
+ assert(!is64Bit() && "32-bit interface called on 64-bit object file.");
+ return static_cast<const XCOFFSectionHeader32 *>(SectionHeaderTable);
+}
+
+const XCOFFSectionHeader64 *
+XCOFFObjectFile::sectionHeaderTable64() const {
+ assert(is64Bit() && "64-bit interface called on a 32-bit object file.");
+ return static_cast<const XCOFFSectionHeader64 *>(SectionHeaderTable);
+}
+
+void XCOFFObjectFile::moveSymbolNext(DataRefImpl &Symb) const {
+ const XCOFFSymbolEntry *SymEntPtr = toSymbolEntry(Symb);
+ SymEntPtr += SymEntPtr->NumberOfAuxEntries + 1;
+ Symb.p = reinterpret_cast<uintptr_t>(SymEntPtr);
+}
+
+Expected<StringRef> XCOFFObjectFile::getSymbolName(DataRefImpl Symb) const {
+ const XCOFFSymbolEntry *SymEntPtr = toSymbolEntry(Symb);
+
+ if (SymEntPtr->NameInStrTbl.Magic != XCOFFSymbolEntry::NAME_IN_STR_TBL_MAGIC)
+ return generateStringRef(SymEntPtr->SymbolName, XCOFF::SymbolNameSize);
+
+ // A storage class value with the high-order bit on indicates that the name is
+ // a symbolic debugger stabstring.
+ if (SymEntPtr->StorageClass & 0x80)
+ return StringRef("Unimplemented Debug Name");
+
+ uint32_t Offset = SymEntPtr->NameInStrTbl.Offset;
+ // The byte offset is relative to the start of the string table
+ // or .debug section. A byte offset value of 0 is a null or zero-length symbol
+ // name. A byte offset in the range 1 to 3 (inclusive) points into the length
+ // field; as a soft-error recovery mechanism, we treat such cases as having an
+ // offset of 0.
+ if (Offset < 4)
+ return StringRef(nullptr, 0);
+
+ if (StringTable.Data != nullptr && StringTable.Size > Offset)
+ return (StringTable.Data + Offset);
+
+ return make_error<GenericBinaryError>("Symbol Name parse failed",
+ object_error::parse_failed);
+}
+
+Expected<uint64_t> XCOFFObjectFile::getSymbolAddress(DataRefImpl Symb) const {
+ uint64_t Result = 0;
+ llvm_unreachable("Not yet implemented!");
+ return Result;
+}
+
+uint64_t XCOFFObjectFile::getSymbolValueImpl(DataRefImpl Symb) const {
+ return toSymbolEntry(Symb)->Value;
+}
+
+uint64_t XCOFFObjectFile::getCommonSymbolSizeImpl(DataRefImpl Symb) const {
+ uint64_t Result = 0;
+ llvm_unreachable("Not yet implemented!");
+ return Result;
+}
+
+Expected<SymbolRef::Type>
+XCOFFObjectFile::getSymbolType(DataRefImpl Symb) const {
+ llvm_unreachable("Not yet implemented!");
+ return SymbolRef::ST_Other;
+}
+
+Expected<section_iterator>
+XCOFFObjectFile::getSymbolSection(DataRefImpl Symb) const {
+ const XCOFFSymbolEntry *SymEntPtr = toSymbolEntry(Symb);
+ int16_t SectNum = SymEntPtr->SectionNumber;
+
+ if (isReservedSectionNumber(SectNum))
+ return section_end();
+
+ Expected<DataRefImpl> ExpSec = getSectionByNum(SectNum);
+ if (!ExpSec)
+ return ExpSec.takeError();
+
+ return section_iterator(SectionRef(ExpSec.get(), this));
+}
+
+void XCOFFObjectFile::moveSectionNext(DataRefImpl &Sec) const {
+ const char *Ptr = reinterpret_cast<const char *>(Sec.p);
+ Sec.p = reinterpret_cast<uintptr_t>(Ptr + getSectionHeaderSize());
+}
+
+Expected<StringRef> XCOFFObjectFile::getSectionName(DataRefImpl Sec) const {
+ return generateStringRef(getSectionNameInternal(Sec), XCOFF::SectionNameSize);
+}
+
+uint64_t XCOFFObjectFile::getSectionAddress(DataRefImpl Sec) const {
+ // Avoid ternary due to failure to convert the ubig32_t value to a unit64_t
+ // with MSVC.
+ if (is64Bit())
+ return toSection64(Sec)->VirtualAddress;
+
+ return toSection32(Sec)->VirtualAddress;
+}
+
+uint64_t XCOFFObjectFile::getSectionIndex(DataRefImpl Sec) const {
+ // Section numbers in XCOFF are numbered beginning at 1. A section number of
+ // zero is used to indicate that a symbol is being imported or is undefined.
+ if (is64Bit())
+ return toSection64(Sec) - sectionHeaderTable64() + 1;
+ else
+ return toSection32(Sec) - sectionHeaderTable32() + 1;
+}
+
+uint64_t XCOFFObjectFile::getSectionSize(DataRefImpl Sec) const {
+ // Avoid ternary due to failure to convert the ubig32_t value to a unit64_t
+ // with MSVC.
+ if (is64Bit())
+ return toSection64(Sec)->SectionSize;
+
+ return toSection32(Sec)->SectionSize;
+}
+
+Expected<ArrayRef<uint8_t>>
+XCOFFObjectFile::getSectionContents(DataRefImpl Sec) const {
+ llvm_unreachable("Not yet implemented!");
+}
+
+uint64_t XCOFFObjectFile::getSectionAlignment(DataRefImpl Sec) const {
+ uint64_t Result = 0;
+ llvm_unreachable("Not yet implemented!");
+ return Result;
+}
+
+bool XCOFFObjectFile::isSectionCompressed(DataRefImpl Sec) const {
+ bool Result = false;
+ llvm_unreachable("Not yet implemented!");
+ return Result;
+}
+
+bool XCOFFObjectFile::isSectionText(DataRefImpl Sec) const {
+ return getSectionFlags(Sec) & XCOFF::STYP_TEXT;
+}
+
+bool XCOFFObjectFile::isSectionData(DataRefImpl Sec) const {
+ uint32_t Flags = getSectionFlags(Sec);
+ return Flags & (XCOFF::STYP_DATA | XCOFF::STYP_TDATA);
+}
+
+bool XCOFFObjectFile::isSectionBSS(DataRefImpl Sec) const {
+ uint32_t Flags = getSectionFlags(Sec);
+ return Flags & (XCOFF::STYP_BSS | XCOFF::STYP_TBSS);
+}
+
+bool XCOFFObjectFile::isSectionVirtual(DataRefImpl Sec) const {
+ bool Result = false;
+ llvm_unreachable("Not yet implemented!");
+ return Result;
+}
+
+relocation_iterator XCOFFObjectFile::section_rel_begin(DataRefImpl Sec) const {
+ llvm_unreachable("Not yet implemented!");
+ return relocation_iterator(RelocationRef());
+}
+
+relocation_iterator XCOFFObjectFile::section_rel_end(DataRefImpl Sec) const {
+ llvm_unreachable("Not yet implemented!");
+ return relocation_iterator(RelocationRef());
+}
+
+void XCOFFObjectFile::moveRelocationNext(DataRefImpl &Rel) const {
+ llvm_unreachable("Not yet implemented!");
+ return;
+}
+
+uint64_t XCOFFObjectFile::getRelocationOffset(DataRefImpl Rel) const {
+ llvm_unreachable("Not yet implemented!");
+ uint64_t Result = 0;
+ return Result;
+}
+
+symbol_iterator XCOFFObjectFile::getRelocationSymbol(DataRefImpl Rel) const {
+ llvm_unreachable("Not yet implemented!");
+ return symbol_iterator(SymbolRef());
+}
+
+uint64_t XCOFFObjectFile::getRelocationType(DataRefImpl Rel) const {
+ llvm_unreachable("Not yet implemented!");
+ uint64_t Result = 0;
+ return Result;
+}
+
+void XCOFFObjectFile::getRelocationTypeName(
+ DataRefImpl Rel, SmallVectorImpl<char> &Result) const {
+ llvm_unreachable("Not yet implemented!");
+ return;
+}
+
+uint32_t XCOFFObjectFile::getSymbolFlags(DataRefImpl Symb) const {
+ uint32_t Result = 0;
+ llvm_unreachable("Not yet implemented!");
+ return Result;
+}
+
+basic_symbol_iterator XCOFFObjectFile::symbol_begin() const {
+ assert(!is64Bit() && "64-bit support not implemented yet.");
+ DataRefImpl SymDRI;
+ SymDRI.p = reinterpret_cast<uintptr_t>(SymbolTblPtr);
+ return basic_symbol_iterator(SymbolRef(SymDRI, this));
+}
+
+basic_symbol_iterator XCOFFObjectFile::symbol_end() const {
+ assert(!is64Bit() && "64-bit support not implemented yet.");
+ DataRefImpl SymDRI;
+ SymDRI.p = reinterpret_cast<uintptr_t>(
+ SymbolTblPtr + getLogicalNumberOfSymbolTableEntries32());
+ return basic_symbol_iterator(SymbolRef(SymDRI, this));
+}
+
+section_iterator XCOFFObjectFile::section_begin() const {
+ DataRefImpl DRI;
+ DRI.p = getSectionHeaderTableAddress();
+ return section_iterator(SectionRef(DRI, this));
+}
+
+section_iterator XCOFFObjectFile::section_end() const {
+ DataRefImpl DRI;
+ DRI.p = getWithOffset(getSectionHeaderTableAddress(),
+ getNumberOfSections() * getSectionHeaderSize());
+ return section_iterator(SectionRef(DRI, this));
+}
+
+uint8_t XCOFFObjectFile::getBytesInAddress() const { return is64Bit() ? 8 : 4; }
+
+StringRef XCOFFObjectFile::getFileFormatName() const {
+ return is64Bit() ? "aix5coff64-rs6000" : "aixcoff-rs6000";
+}
+
+Triple::ArchType XCOFFObjectFile::getArch() const {
+ return is64Bit() ? Triple::ppc64 : Triple::ppc;
+}
+
+SubtargetFeatures XCOFFObjectFile::getFeatures() const {
+ llvm_unreachable("Not yet implemented!");
+ return SubtargetFeatures();
+}
+
+bool XCOFFObjectFile::isRelocatableObject() const {
+ bool Result = false;
+ llvm_unreachable("Not yet implemented!");
+ return Result;
+}
+
+Expected<uint64_t> XCOFFObjectFile::getStartAddress() const {
+ // TODO FIXME Should get from auxiliary_header->o_entry when support for the
+ // auxiliary_header is added.
+ return 0;
+}
+
+size_t XCOFFObjectFile::getFileHeaderSize() const {
+ return is64Bit() ? sizeof(XCOFFFileHeader64) : sizeof(XCOFFFileHeader32);
+}
+
+size_t XCOFFObjectFile::getSectionHeaderSize() const {
+ return is64Bit() ? sizeof(XCOFFSectionHeader64) :
+ sizeof(XCOFFSectionHeader32);
+}
+
+bool XCOFFObjectFile::is64Bit() const {
+ return Binary::ID_XCOFF64 == getType();
+}
+
+uint16_t XCOFFObjectFile::getMagic() const {
+ return is64Bit() ? fileHeader64()->Magic : fileHeader32()->Magic;
+}
+
+Expected<DataRefImpl> XCOFFObjectFile::getSectionByNum(int16_t Num) const {
+ if (Num <= 0 || Num > getNumberOfSections())
+ return errorCodeToError(object_error::invalid_section_index);
+
+ DataRefImpl DRI;
+ DRI.p = getWithOffset(getSectionHeaderTableAddress(),
+ getSectionHeaderSize() * (Num - 1));
+ return DRI;
+}
+
+Expected<StringRef>
+XCOFFObjectFile::getSymbolSectionName(const XCOFFSymbolEntry *SymEntPtr) const {
+ assert(!is64Bit() && "Symbol table support not implemented for 64-bit.");
+ int16_t SectionNum = SymEntPtr->SectionNumber;
+
+ switch (SectionNum) {
+ case XCOFF::N_DEBUG:
+ return "N_DEBUG";
+ case XCOFF::N_ABS:
+ return "N_ABS";
+ case XCOFF::N_UNDEF:
+ return "N_UNDEF";
+ default:
+ Expected<DataRefImpl> SecRef = getSectionByNum(SectionNum);
+ if (SecRef)
+ return generateStringRef(getSectionNameInternal(SecRef.get()),
+ XCOFF::SectionNameSize);
+ return SecRef.takeError();
+ }
+}
+
+bool XCOFFObjectFile::isReservedSectionNumber(int16_t SectionNumber) {
+ return (SectionNumber <= 0 && SectionNumber >= -2);
+}
+
+uint16_t XCOFFObjectFile::getNumberOfSections() const {
+ return is64Bit() ? fileHeader64()->NumberOfSections
+ : fileHeader32()->NumberOfSections;
+}
+
+int32_t XCOFFObjectFile::getTimeStamp() const {
+ return is64Bit() ? fileHeader64()->TimeStamp : fileHeader32()->TimeStamp;
+}
+
+uint16_t XCOFFObjectFile::getOptionalHeaderSize() const {
+ return is64Bit() ? fileHeader64()->AuxHeaderSize
+ : fileHeader32()->AuxHeaderSize;
+}
+
+uint32_t XCOFFObjectFile::getSymbolTableOffset32() const {
+ return fileHeader32()->SymbolTableOffset;
+}
+
+int32_t XCOFFObjectFile::getRawNumberOfSymbolTableEntries32() const {
+ // As far as symbol table size is concerned, if this field is negative it is
+ // to be treated as a 0. However since this field is also used for printing we
+ // don't want to truncate any negative values.
+ return fileHeader32()->NumberOfSymTableEntries;
+}
+
+uint32_t XCOFFObjectFile::getLogicalNumberOfSymbolTableEntries32() const {
+ return (fileHeader32()->NumberOfSymTableEntries >= 0
+ ? fileHeader32()->NumberOfSymTableEntries
+ : 0);
+}
+
+uint64_t XCOFFObjectFile::getSymbolTableOffset64() const {
+ return fileHeader64()->SymbolTableOffset;
+}
+
+uint32_t XCOFFObjectFile::getNumberOfSymbolTableEntries64() const {
+ return fileHeader64()->NumberOfSymTableEntries;
+}
+
+uint16_t XCOFFObjectFile::getFlags() const {
+ return is64Bit() ? fileHeader64()->Flags : fileHeader32()->Flags;
+}
+
+const char *XCOFFObjectFile::getSectionNameInternal(DataRefImpl Sec) const {
+ return is64Bit() ? toSection64(Sec)->Name : toSection32(Sec)->Name;
+}
+
+uintptr_t XCOFFObjectFile::getSectionHeaderTableAddress() const {
+ return reinterpret_cast<uintptr_t>(SectionHeaderTable);
+}
+
+int32_t XCOFFObjectFile::getSectionFlags(DataRefImpl Sec) const {
+ return is64Bit() ? toSection64(Sec)->Flags : toSection32(Sec)->Flags;
+}
+
+XCOFFObjectFile::XCOFFObjectFile(unsigned int Type, MemoryBufferRef Object)
+ : ObjectFile(Type, Object) {
+ assert(Type == Binary::ID_XCOFF32 || Type == Binary::ID_XCOFF64);
+}
+
+ArrayRef<XCOFFSectionHeader64> XCOFFObjectFile::sections64() const {
+ assert(is64Bit() && "64-bit interface called for non 64-bit file.");
+ const XCOFFSectionHeader64 *TablePtr = sectionHeaderTable64();
+ return ArrayRef<XCOFFSectionHeader64>(TablePtr,
+ TablePtr + getNumberOfSections());
+}
+
+ArrayRef<XCOFFSectionHeader32> XCOFFObjectFile::sections32() const {
+ assert(!is64Bit() && "32-bit interface called for non 32-bit file.");
+ const XCOFFSectionHeader32 *TablePtr = sectionHeaderTable32();
+ return ArrayRef<XCOFFSectionHeader32>(TablePtr,
+ TablePtr + getNumberOfSections());
+}
+
+Expected<XCOFFStringTable>
+XCOFFObjectFile::parseStringTable(const XCOFFObjectFile *Obj, uint64_t Offset) {
+ // If there is a string table, then the buffer must contain at least 4 bytes
+ // for the string table's size. Not having a string table is not an error.
+ if (auto EC = Binary::checkOffset(
+ Obj->Data, reinterpret_cast<uintptr_t>(Obj->base() + Offset), 4))
+ return XCOFFStringTable{0, nullptr};
+
+ // Read the size out of the buffer.
+ uint32_t Size = support::endian::read32be(Obj->base() + Offset);
+
+ // If the size is less then 4, then the string table is just a size and no
+ // string data.
+ if (Size <= 4)
+ return XCOFFStringTable{4, nullptr};
+
+ auto StringTableOrErr =
+ getObject<char>(Obj->Data, Obj->base() + Offset, Size);
+ if (Error E = StringTableOrErr.takeError())
+ return std::move(E);
+
+ const char *StringTablePtr = StringTableOrErr.get();
+ if (StringTablePtr[Size - 1] != '\0')
+ return errorCodeToError(object_error::string_table_non_null_end);
+
+ return XCOFFStringTable{Size, StringTablePtr};
+}
+
+Expected<std::unique_ptr<XCOFFObjectFile>>
+XCOFFObjectFile::create(unsigned Type, MemoryBufferRef MBR) {
+ // Can't use make_unique because of the private constructor.
+ std::unique_ptr<XCOFFObjectFile> Obj;
+ Obj.reset(new XCOFFObjectFile(Type, MBR));
+
+ uint64_t CurOffset = 0;
+ const auto *Base = Obj->base();
+ MemoryBufferRef Data = Obj->Data;
+
+ // Parse file header.
+ auto FileHeaderOrErr =
+ getObject<void>(Data, Base + CurOffset, Obj->getFileHeaderSize());
+ if (Error E = FileHeaderOrErr.takeError())
+ return std::move(E);
+ Obj->FileHeader = FileHeaderOrErr.get();
+
+ CurOffset += Obj->getFileHeaderSize();
+ // TODO FIXME we don't have support for an optional header yet, so just skip
+ // past it.
+ CurOffset += Obj->getOptionalHeaderSize();
+
+ // Parse the section header table if it is present.
+ if (Obj->getNumberOfSections()) {
+ auto SecHeadersOrErr = getObject<void>(Data, Base + CurOffset,
+ Obj->getNumberOfSections() *
+ Obj->getSectionHeaderSize());
+ if (Error E = SecHeadersOrErr.takeError())
+ return std::move(E);
+ Obj->SectionHeaderTable = SecHeadersOrErr.get();
+ }
+
+ // 64-bit object supports only file header and section headers for now.
+ if (Obj->is64Bit())
+ return std::move(Obj);
+
+ // If there is no symbol table we are done parsing the memory buffer.
+ if (Obj->getLogicalNumberOfSymbolTableEntries32() == 0)
+ return std::move(Obj);
+
+ // Parse symbol table.
+ CurOffset = Obj->fileHeader32()->SymbolTableOffset;
+ uint64_t SymbolTableSize = (uint64_t)(sizeof(XCOFFSymbolEntry)) *
+ Obj->getLogicalNumberOfSymbolTableEntries32();
+ auto SymTableOrErr =
+ getObject<XCOFFSymbolEntry>(Data, Base + CurOffset, SymbolTableSize);
+ if (Error E = SymTableOrErr.takeError())
+ return std::move(E);
+ Obj->SymbolTblPtr = SymTableOrErr.get();
+ CurOffset += SymbolTableSize;
+
+ // Parse String table.
+ Expected<XCOFFStringTable> StringTableOrErr =
+ parseStringTable(Obj.get(), CurOffset);
+ if (Error E = StringTableOrErr.takeError())
+ return std::move(E);
+ Obj->StringTable = StringTableOrErr.get();
+
+ return std::move(Obj);
+}
+
+Expected<std::unique_ptr<ObjectFile>>
+ObjectFile::createXCOFFObjectFile(MemoryBufferRef MemBufRef,
+ unsigned FileType) {
+ return XCOFFObjectFile::create(FileType, MemBufRef);
+}
+
+StringRef XCOFFSectionHeader32::getName() const {
+ return generateStringRef(Name, XCOFF::SectionNameSize);
+}
+
+StringRef XCOFFSectionHeader64::getName() const {
+ return generateStringRef(Name, XCOFF::SectionNameSize);
+}
+
+} // namespace object
+} // namespace llvm
diff --git a/lib/ObjectYAML/COFFYAML.cpp b/lib/ObjectYAML/COFFYAML.cpp
index fdd94f4054e1..b5154467f11a 100644
--- a/lib/ObjectYAML/COFFYAML.cpp
+++ b/lib/ObjectYAML/COFFYAML.cpp
@@ -1,9 +1,8 @@
//===- COFFYAML.cpp - COFF YAMLIO implementation --------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -188,6 +187,7 @@ void ScalarEnumerationTraits<COFF::RelocationTypesARM>::enumeration(
ECase(IMAGE_REL_ARM_TOKEN);
ECase(IMAGE_REL_ARM_BLX24);
ECase(IMAGE_REL_ARM_BLX11);
+ ECase(IMAGE_REL_ARM_REL32);
ECase(IMAGE_REL_ARM_SECTION);
ECase(IMAGE_REL_ARM_SECREL);
ECase(IMAGE_REL_ARM_MOV32A);
@@ -195,6 +195,7 @@ void ScalarEnumerationTraits<COFF::RelocationTypesARM>::enumeration(
ECase(IMAGE_REL_ARM_BRANCH20T);
ECase(IMAGE_REL_ARM_BRANCH24T);
ECase(IMAGE_REL_ARM_BLX23T);
+ ECase(IMAGE_REL_ARM_PAIR);
}
void ScalarEnumerationTraits<COFF::RelocationTypesARM64>::enumeration(
@@ -216,6 +217,7 @@ void ScalarEnumerationTraits<COFF::RelocationTypesARM64>::enumeration(
ECase(IMAGE_REL_ARM64_ADDR64);
ECase(IMAGE_REL_ARM64_BRANCH19);
ECase(IMAGE_REL_ARM64_BRANCH14);
+ ECase(IMAGE_REL_ARM64_REL32);
}
void ScalarEnumerationTraits<COFF::WindowsSubsystem>::enumeration(
@@ -576,6 +578,12 @@ void MappingTraits<COFFYAML::Section>::mapping(IO &IO, COFFYAML::Section &Sec) {
else if (Sec.Name == ".debug$H")
IO.mapOptional("GlobalHashes", Sec.DebugH);
+ // Uninitialized sections, such as .bss, typically have no data, but the size
+ // is carried in SizeOfRawData, even though PointerToRawData is zero.
+ if (Sec.SectionData.binary_size() == 0 &&
+ NC->Characteristics & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA)
+ IO.mapOptional("SizeOfRawData", Sec.Header.SizeOfRawData);
+
IO.mapOptional("Relocations", Sec.Relocations);
}
diff --git a/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp b/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp
index 4deeae878013..eeebb694589b 100644
--- a/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp
+++ b/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp
@@ -1,9 +1,8 @@
//===- CodeViewYAMLDebugSections.cpp - CodeView YAMLIO debug sections -----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/ObjectYAML/CodeViewYAMLSymbols.cpp b/lib/ObjectYAML/CodeViewYAMLSymbols.cpp
index 713e9a710e94..227107c051dd 100644
--- a/lib/ObjectYAML/CodeViewYAMLSymbols.cpp
+++ b/lib/ObjectYAML/CodeViewYAMLSymbols.cpp
@@ -1,9 +1,8 @@
//===- CodeViewYAMLSymbols.cpp - CodeView YAMLIO Symbol implementation ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -148,7 +147,7 @@ void ScalarEnumerationTraits<CPUType>::enumeration(IO &io, CPUType &Cpu) {
}
void ScalarEnumerationTraits<RegisterId>::enumeration(IO &io, RegisterId &Reg) {
- auto RegNames = getRegisterNames();
+ auto RegNames = getRegisterNames(CPUType::X64);
for (const auto &E : RegNames) {
io.enumCase(Reg, E.Name.str().c_str(), static_cast<RegisterId>(E.Value));
}
@@ -249,7 +248,7 @@ struct UnknownSymbolRecord : public SymbolRecordBase {
uint8_t *Buffer = Allocator.Allocate<uint8_t>(TotalLen);
::memcpy(Buffer, &Prefix, sizeof(RecordPrefix));
::memcpy(Buffer + sizeof(RecordPrefix), Data.data(), Data.size());
- return CVSymbol(Kind, ArrayRef<uint8_t>(Buffer, TotalLen));
+ return CVSymbol(ArrayRef<uint8_t>(Buffer, TotalLen));
}
Error fromCodeViewSymbol(CVSymbol CVS) override {
@@ -554,6 +553,12 @@ template <> void SymbolRecordImpl<UsingNamespaceSym>::map(IO &IO) {
IO.mapRequired("Namespace", Symbol.Name);
}
+template <> void SymbolRecordImpl<AnnotationSym>::map(IO &IO) {
+ IO.mapOptional("Offset", Symbol.CodeOffset, 0U);
+ IO.mapOptional("Segment", Symbol.Segment, uint16_t(0));
+ IO.mapRequired("Strings", Symbol.Strings);
+}
+
} // end namespace detail
} // end namespace CodeViewYAML
} // end namespace llvm
diff --git a/lib/ObjectYAML/CodeViewYAMLTypeHashing.cpp b/lib/ObjectYAML/CodeViewYAMLTypeHashing.cpp
index ed117059560f..e921ae1e7d8d 100644
--- a/lib/ObjectYAML/CodeViewYAMLTypeHashing.cpp
+++ b/lib/ObjectYAML/CodeViewYAMLTypeHashing.cpp
@@ -1,9 +1,8 @@
//===- CodeViewYAMLTypeHashing.cpp - CodeView YAMLIO type hashing ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/ObjectYAML/CodeViewYAMLTypes.cpp b/lib/ObjectYAML/CodeViewYAMLTypes.cpp
index 791b115dc492..a5e3ce1e71e8 100644
--- a/lib/ObjectYAML/CodeViewYAMLTypes.cpp
+++ b/lib/ObjectYAML/CodeViewYAMLTypes.cpp
@@ -1,9 +1,8 @@
//===- CodeViewYAMLTypes.cpp - CodeView YAMLIO types implementation -------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -99,7 +98,7 @@ template <typename T> struct LeafRecordImpl : public LeafRecordBase {
CVType toCodeViewRecord(AppendingTypeTableBuilder &TS) const override {
TS.writeLeafType(Record);
- return CVType(Kind, TS.records().back());
+ return CVType(TS.records().back());
}
mutable T Record;
@@ -497,7 +496,7 @@ CVType LeafRecordImpl<FieldListRecord>::toCodeViewRecord(
Member.Member->writeTo(CRB);
}
TS.insertRecord(CRB);
- return CVType(Kind, TS.records().back());
+ return CVType(TS.records().back());
}
void MappingTraits<OneMethodRecord>::mapping(IO &io, OneMethodRecord &Record) {
diff --git a/lib/ObjectYAML/DWARFEmitter.cpp b/lib/ObjectYAML/DWARFEmitter.cpp
index f23fa1237600..2ae66997cf59 100644
--- a/lib/ObjectYAML/DWARFEmitter.cpp
+++ b/lib/ObjectYAML/DWARFEmitter.cpp
@@ -1,9 +1,8 @@
//===- DWARFEmitter - Convert YAML to DWARF binary data -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/lib/ObjectYAML/DWARFVisitor.cpp b/lib/ObjectYAML/DWARFVisitor.cpp
index e6114c85ac0b..ecb5967ac532 100644
--- a/lib/ObjectYAML/DWARFVisitor.cpp
+++ b/lib/ObjectYAML/DWARFVisitor.cpp
@@ -1,9 +1,8 @@
//===--- DWARFVisitor.cpp ---------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/ObjectYAML/DWARFVisitor.h b/lib/ObjectYAML/DWARFVisitor.h
index 5489031dc331..50e88aa7a26b 100644
--- a/lib/ObjectYAML/DWARFVisitor.h
+++ b/lib/ObjectYAML/DWARFVisitor.h
@@ -1,9 +1,8 @@
//===--- DWARFVisitor.h -----------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/ObjectYAML/DWARFYAML.cpp b/lib/ObjectYAML/DWARFYAML.cpp
index d6c09e1a35d7..bb3b1422eb62 100644
--- a/lib/ObjectYAML/DWARFYAML.cpp
+++ b/lib/ObjectYAML/DWARFYAML.cpp
@@ -1,9 +1,8 @@
//===- DWARFYAML.cpp - DWARF YAMLIO implementation ------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/ObjectYAML/ELFYAML.cpp b/lib/ObjectYAML/ELFYAML.cpp
index 215d6bdd091e..7497154c757d 100644
--- a/lib/ObjectYAML/ELFYAML.cpp
+++ b/lib/ObjectYAML/ELFYAML.cpp
@@ -1,9 +1,8 @@
//===- ELFYAML.cpp - ELF YAMLIO implementation ----------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -233,8 +232,9 @@ void ScalarEnumerationTraits<ELFYAML::ELF_ELFCLASS>::enumeration(
void ScalarEnumerationTraits<ELFYAML::ELF_ELFDATA>::enumeration(
IO &IO, ELFYAML::ELF_ELFDATA &Value) {
#define ECase(X) IO.enumCase(Value, #X, ELF::X)
- // Since the semantics of ELFDATANONE is "invalid", just don't accept it
- // here.
+ // ELFDATANONE is an invalid data encoding, but we accept it because
+ // we want to be able to produce invalid binaries for the tests.
+ ECase(ELFDATANONE);
ECase(ELFDATA2LSB);
ECase(ELFDATA2MSB);
#undef ECase
@@ -410,7 +410,11 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX902, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX904, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX906, EF_AMDGPU_MACH);
+ BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX908, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX909, EF_AMDGPU_MACH);
+ BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1010, EF_AMDGPU_MACH);
+ BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1011, EF_AMDGPU_MACH);
+ BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1012, EF_AMDGPU_MACH);
BCase(EF_AMDGPU_XNACK);
BCase(EF_AMDGPU_SRAM_ECC);
break;
@@ -447,7 +451,6 @@ void ScalarEnumerationTraits<ELFYAML::ELF_SHT>::enumeration(
ECase(SHT_GROUP);
ECase(SHT_SYMTAB_SHNDX);
ECase(SHT_RELR);
- ECase(SHT_LOOS);
ECase(SHT_ANDROID_REL);
ECase(SHT_ANDROID_RELA);
ECase(SHT_ANDROID_RELR);
@@ -455,13 +458,12 @@ void ScalarEnumerationTraits<ELFYAML::ELF_SHT>::enumeration(
ECase(SHT_LLVM_LINKER_OPTIONS);
ECase(SHT_LLVM_CALL_GRAPH_PROFILE);
ECase(SHT_LLVM_ADDRSIG);
+ ECase(SHT_LLVM_DEPENDENT_LIBRARIES);
ECase(SHT_GNU_ATTRIBUTES);
ECase(SHT_GNU_HASH);
ECase(SHT_GNU_verdef);
ECase(SHT_GNU_verneed);
ECase(SHT_GNU_versym);
- ECase(SHT_HIOS);
- ECase(SHT_LOPROC);
switch (Object->Header.Machine) {
case ELF::EM_ARM:
ECase(SHT_ARM_EXIDX);
@@ -479,6 +481,7 @@ void ScalarEnumerationTraits<ELFYAML::ELF_SHT>::enumeration(
case ELF::EM_MIPS:
ECase(SHT_MIPS_REGINFO);
ECase(SHT_MIPS_OPTIONS);
+ ECase(SHT_MIPS_DWARF);
ECase(SHT_MIPS_ABIFLAGS);
break;
default:
@@ -486,6 +489,7 @@ void ScalarEnumerationTraits<ELFYAML::ELF_SHT>::enumeration(
break;
}
#undef ECase
+ IO.enumFallback<Hex32>(Value);
}
void ScalarBitSetTraits<ELFYAML::ELF_PF>::bitset(IO &IO,
@@ -552,6 +556,7 @@ void ScalarEnumerationTraits<ELFYAML::ELF_SHN>::enumeration(
ECase(SHN_COMMON);
ECase(SHN_XINDEX);
ECase(SHN_HIRESERVE);
+ ECase(SHN_AMDGPU_LDS);
ECase(SHN_HEXAGON_SCOMMON);
ECase(SHN_HEXAGON_SCOMMON_1);
ECase(SHN_HEXAGON_SCOMMON_2);
@@ -561,6 +566,17 @@ void ScalarEnumerationTraits<ELFYAML::ELF_SHN>::enumeration(
IO.enumFallback<Hex32>(Value);
}
+void ScalarEnumerationTraits<ELFYAML::ELF_STB>::enumeration(
+ IO &IO, ELFYAML::ELF_STB &Value) {
+#define ECase(X) IO.enumCase(Value, #X, ELF::X)
+ ECase(STB_LOCAL);
+ ECase(STB_GLOBAL);
+ ECase(STB_WEAK);
+ ECase(STB_GNU_UNIQUE);
+#undef ECase
+ IO.enumFallback<Hex8>(Value);
+}
+
void ScalarEnumerationTraits<ELFYAML::ELF_STT>::enumeration(
IO &IO, ELFYAML::ELF_STT &Value) {
#define ECase(X) IO.enumCase(Value, #X, ELF::X)
@@ -573,6 +589,7 @@ void ScalarEnumerationTraits<ELFYAML::ELF_STT>::enumeration(
ECase(STT_TLS);
ECase(STT_GNU_IFUNC);
#undef ECase
+ IO.enumFallback<Hex8>(Value);
}
void ScalarEnumerationTraits<ELFYAML::ELF_STV>::enumeration(
@@ -661,6 +678,74 @@ void ScalarEnumerationTraits<ELFYAML::ELF_REL>::enumeration(
IO.enumFallback<Hex32>(Value);
}
+void ScalarEnumerationTraits<ELFYAML::ELF_DYNTAG>::enumeration(
+ IO &IO, ELFYAML::ELF_DYNTAG &Value) {
+ const auto *Object = static_cast<ELFYAML::Object *>(IO.getContext());
+ assert(Object && "The IO context is not initialized");
+
+// Disable architecture specific tags by default. We might enable them below.
+#define AARCH64_DYNAMIC_TAG(name, value)
+#define MIPS_DYNAMIC_TAG(name, value)
+#define HEXAGON_DYNAMIC_TAG(name, value)
+#define PPC_DYNAMIC_TAG(name, value)
+#define PPC64_DYNAMIC_TAG(name, value)
+// Ignore marker tags such as DT_HIOS (maps to DT_VERNEEDNUM), etc.
+#define DYNAMIC_TAG_MARKER(name, value)
+
+#define STRINGIFY(X) (#X)
+#define DYNAMIC_TAG(X, Y) IO.enumCase(Value, STRINGIFY(DT_##X), ELF::DT_##X);
+ switch (Object->Header.Machine) {
+ case ELF::EM_AARCH64:
+#undef AARCH64_DYNAMIC_TAG
+#define AARCH64_DYNAMIC_TAG(name, value) DYNAMIC_TAG(name, value)
+#include "llvm/BinaryFormat/DynamicTags.def"
+#undef AARCH64_DYNAMIC_TAG
+#define AARCH64_DYNAMIC_TAG(name, value)
+ break;
+ case ELF::EM_MIPS:
+#undef MIPS_DYNAMIC_TAG
+#define MIPS_DYNAMIC_TAG(name, value) DYNAMIC_TAG(name, value)
+#include "llvm/BinaryFormat/DynamicTags.def"
+#undef MIPS_DYNAMIC_TAG
+#define MIPS_DYNAMIC_TAG(name, value)
+ break;
+ case ELF::EM_HEXAGON:
+#undef HEXAGON_DYNAMIC_TAG
+#define HEXAGON_DYNAMIC_TAG(name, value) DYNAMIC_TAG(name, value)
+#include "llvm/BinaryFormat/DynamicTags.def"
+#undef HEXAGON_DYNAMIC_TAG
+#define HEXAGON_DYNAMIC_TAG(name, value)
+ break;
+ case ELF::EM_PPC:
+#undef PPC_DYNAMIC_TAG
+#define PPC_DYNAMIC_TAG(name, value) DYNAMIC_TAG(name, value)
+#include "llvm/BinaryFormat/DynamicTags.def"
+#undef PPC_DYNAMIC_TAG
+#define PPC_DYNAMIC_TAG(name, value)
+ break;
+ case ELF::EM_PPC64:
+#undef PPC64_DYNAMIC_TAG
+#define PPC64_DYNAMIC_TAG(name, value) DYNAMIC_TAG(name, value)
+#include "llvm/BinaryFormat/DynamicTags.def"
+#undef PPC64_DYNAMIC_TAG
+#define PPC64_DYNAMIC_TAG(name, value)
+ break;
+ default:
+#include "llvm/BinaryFormat/DynamicTags.def"
+ break;
+ }
+#undef AARCH64_DYNAMIC_TAG
+#undef MIPS_DYNAMIC_TAG
+#undef HEXAGON_DYNAMIC_TAG
+#undef PPC_DYNAMIC_TAG
+#undef PPC64_DYNAMIC_TAG
+#undef DYNAMIC_TAG_MARKER
+#undef STRINGIFY
+#undef DYNAMIC_TAG
+
+ IO.enumFallback<Hex64>(Value);
+}
+
void ScalarEnumerationTraits<ELFYAML::MIPS_AFL_REG>::enumeration(
IO &IO, ELFYAML::MIPS_AFL_REG &Value) {
#define ECase(X) IO.enumCase(Value, #X, Mips::AFL_##X)
@@ -758,6 +843,11 @@ void MappingTraits<ELFYAML::FileHeader>::mapping(IO &IO,
IO.mapRequired("Machine", FileHdr.Machine);
IO.mapOptional("Flags", FileHdr.Flags, ELFYAML::ELF_EF(0));
IO.mapOptional("Entry", FileHdr.Entry, Hex64(0));
+
+ IO.mapOptional("SHEntSize", FileHdr.SHEntSize);
+ IO.mapOptional("SHOffset", FileHdr.SHOffset);
+ IO.mapOptional("SHNum", FileHdr.SHNum);
+ IO.mapOptional("SHStrNdx", FileHdr.SHStrNdx);
}
void MappingTraits<ELFYAML::ProgramHeader>::mapping(
@@ -768,6 +858,9 @@ void MappingTraits<ELFYAML::ProgramHeader>::mapping(
IO.mapOptional("VAddr", Phdr.VAddr, Hex64(0));
IO.mapOptional("PAddr", Phdr.PAddr, Hex64(0));
IO.mapOptional("Align", Phdr.Align);
+ IO.mapOptional("FileSize", Phdr.FileSize);
+ IO.mapOptional("MemSize", Phdr.MemSize);
+ IO.mapOptional("Offset", Phdr.Offset);
}
namespace {
@@ -788,12 +881,13 @@ struct NormalizedOther {
void MappingTraits<ELFYAML::Symbol>::mapping(IO &IO, ELFYAML::Symbol &Symbol) {
IO.mapOptional("Name", Symbol.Name, StringRef());
+ IO.mapOptional("NameIndex", Symbol.NameIndex);
IO.mapOptional("Type", Symbol.Type, ELFYAML::ELF_STT(0));
IO.mapOptional("Section", Symbol.Section, StringRef());
IO.mapOptional("Index", Symbol.Index);
+ IO.mapOptional("Binding", Symbol.Binding, ELFYAML::ELF_STB(0));
IO.mapOptional("Value", Symbol.Value, Hex64(0));
IO.mapOptional("Size", Symbol.Size, Hex64(0));
-
MappingNormalization<NormalizedOther, uint8_t> Keys(IO, Symbol.Other);
IO.mapOptional("Visibility", Keys->Visibility, ELFYAML::ELF_STV(0));
IO.mapOptional("Other", Keys->Other, ELFYAML::ELF_STO(0));
@@ -801,40 +895,44 @@ void MappingTraits<ELFYAML::Symbol>::mapping(IO &IO, ELFYAML::Symbol &Symbol) {
StringRef MappingTraits<ELFYAML::Symbol>::validate(IO &IO,
ELFYAML::Symbol &Symbol) {
- if (Symbol.Index && Symbol.Section.data()) {
+ if (Symbol.Index && Symbol.Section.data())
return "Index and Section cannot both be specified for Symbol";
- }
- if (Symbol.Index && *Symbol.Index == ELFYAML::ELF_SHN(ELF::SHN_XINDEX)) {
+ if (Symbol.Index && *Symbol.Index == ELFYAML::ELF_SHN(ELF::SHN_XINDEX))
return "Large indexes are not supported";
- }
- if (Symbol.Index && *Symbol.Index < ELFYAML::ELF_SHN(ELF::SHN_LORESERVE)) {
- return "Use a section name to define which section a symbol is defined in";
- }
+ if (Symbol.NameIndex && !Symbol.Name.empty())
+ return "Name and NameIndex cannot both be specified for Symbol";
return StringRef();
}
-void MappingTraits<ELFYAML::LocalGlobalWeakSymbols>::mapping(
- IO &IO, ELFYAML::LocalGlobalWeakSymbols &Symbols) {
- IO.mapOptional("Local", Symbols.Local);
- IO.mapOptional("Global", Symbols.Global);
- IO.mapOptional("Weak", Symbols.Weak);
-}
-
static void commonSectionMapping(IO &IO, ELFYAML::Section &Section) {
IO.mapOptional("Name", Section.Name, StringRef());
IO.mapRequired("Type", Section.Type);
- IO.mapOptional("Flags", Section.Flags, ELFYAML::ELF_SHF(0));
+ IO.mapOptional("Flags", Section.Flags);
IO.mapOptional("Address", Section.Address, Hex64(0));
IO.mapOptional("Link", Section.Link, StringRef());
IO.mapOptional("AddressAlign", Section.AddressAlign, Hex64(0));
IO.mapOptional("EntSize", Section.EntSize);
- IO.mapOptional("Info", Section.Info, StringRef());
+
+ // obj2yaml does not dump these fields. They are expected to be empty when we
+ // are producing YAML, because yaml2obj sets appropriate values for sh_offset
+ // and sh_size automatically when they are not explicitly defined.
+ assert(!IO.outputting() ||
+ (!Section.ShOffset.hasValue() && !Section.ShSize.hasValue()));
+ IO.mapOptional("ShOffset", Section.ShOffset);
+ IO.mapOptional("ShSize", Section.ShSize);
+}
+
+static void sectionMapping(IO &IO, ELFYAML::DynamicSection &Section) {
+ commonSectionMapping(IO, Section);
+ IO.mapOptional("Entries", Section.Entries);
+ IO.mapOptional("Content", Section.Content);
}
static void sectionMapping(IO &IO, ELFYAML::RawContentSection &Section) {
commonSectionMapping(IO, Section);
IO.mapOptional("Content", Section.Content);
- IO.mapOptional("Size", Section.Size, Hex64(Section.Content.binary_size()));
+ IO.mapOptional("Size", Section.Size);
+ IO.mapOptional("Info", Section.Info);
}
static void sectionMapping(IO &IO, ELFYAML::NoBitsSection &Section) {
@@ -842,14 +940,33 @@ static void sectionMapping(IO &IO, ELFYAML::NoBitsSection &Section) {
IO.mapOptional("Size", Section.Size, Hex64(0));
}
+static void sectionMapping(IO &IO, ELFYAML::VerdefSection &Section) {
+ commonSectionMapping(IO, Section);
+ IO.mapRequired("Info", Section.Info);
+ IO.mapRequired("Entries", Section.Entries);
+}
+
+static void sectionMapping(IO &IO, ELFYAML::SymverSection &Section) {
+ commonSectionMapping(IO, Section);
+ IO.mapRequired("Entries", Section.Entries);
+}
+
+static void sectionMapping(IO &IO, ELFYAML::VerneedSection &Section) {
+ commonSectionMapping(IO, Section);
+ IO.mapRequired("Info", Section.Info);
+ IO.mapRequired("Dependencies", Section.VerneedV);
+}
+
static void sectionMapping(IO &IO, ELFYAML::RelocationSection &Section) {
commonSectionMapping(IO, Section);
+ IO.mapOptional("Info", Section.RelocatableSec, StringRef());
IO.mapOptional("Relocations", Section.Relocations);
}
-static void groupSectionMapping(IO &IO, ELFYAML::Group &group) {
- commonSectionMapping(IO, group);
- IO.mapRequired("Members", group.Members);
+static void groupSectionMapping(IO &IO, ELFYAML::Group &Group) {
+ commonSectionMapping(IO, Group);
+ IO.mapOptional("Info", Group.Signature, StringRef());
+ IO.mapRequired("Members", Group.Members);
}
void MappingTraits<ELFYAML::SectionOrType>::mapping(
@@ -891,6 +1008,11 @@ void MappingTraits<std::unique_ptr<ELFYAML::Section>>::mapping(
IO.mapRequired("Type", sectionType);
switch (sectionType) {
+ case ELF::SHT_DYNAMIC:
+ if (!IO.outputting())
+ Section.reset(new ELFYAML::DynamicSection());
+ sectionMapping(IO, *cast<ELFYAML::DynamicSection>(Section.get()));
+ break;
case ELF::SHT_REL:
case ELF::SHT_RELA:
if (!IO.outputting())
@@ -912,6 +1034,21 @@ void MappingTraits<std::unique_ptr<ELFYAML::Section>>::mapping(
Section.reset(new ELFYAML::MipsABIFlags());
sectionMapping(IO, *cast<ELFYAML::MipsABIFlags>(Section.get()));
break;
+ case ELF::SHT_GNU_verdef:
+ if (!IO.outputting())
+ Section.reset(new ELFYAML::VerdefSection());
+ sectionMapping(IO, *cast<ELFYAML::VerdefSection>(Section.get()));
+ break;
+ case ELF::SHT_GNU_versym:
+ if (!IO.outputting())
+ Section.reset(new ELFYAML::SymverSection());
+ sectionMapping(IO, *cast<ELFYAML::SymverSection>(Section.get()));
+ break;
+ case ELF::SHT_GNU_verneed:
+ if (!IO.outputting())
+ Section.reset(new ELFYAML::VerneedSection());
+ sectionMapping(IO, *cast<ELFYAML::VerneedSection>(Section.get()));
+ break;
default:
if (!IO.outputting())
Section.reset(new ELFYAML::RawContentSection());
@@ -922,9 +1059,12 @@ void MappingTraits<std::unique_ptr<ELFYAML::Section>>::mapping(
StringRef MappingTraits<std::unique_ptr<ELFYAML::Section>>::validate(
IO &io, std::unique_ptr<ELFYAML::Section> &Section) {
const auto *RawSection = dyn_cast<ELFYAML::RawContentSection>(Section.get());
- if (!RawSection || RawSection->Size >= RawSection->Content.binary_size())
- return StringRef();
- return "Section size must be greater or equal to the content size";
+ if (!RawSection)
+ return {};
+ if (RawSection->Size && RawSection->Content &&
+ (uint64_t)(*RawSection->Size) < RawSection->Content->binary_size())
+ return "Section size must be greater than or equal to the content size";
+ return {};
}
namespace {
@@ -952,6 +1092,44 @@ struct NormalizedMips64RelType {
} // end anonymous namespace
+void MappingTraits<ELFYAML::DynamicEntry>::mapping(IO &IO,
+ ELFYAML::DynamicEntry &Rel) {
+ assert(IO.getContext() && "The IO context is not initialized");
+
+ IO.mapRequired("Tag", Rel.Tag);
+ IO.mapRequired("Value", Rel.Val);
+}
+
+void MappingTraits<ELFYAML::VerdefEntry>::mapping(IO &IO,
+ ELFYAML::VerdefEntry &E) {
+ assert(IO.getContext() && "The IO context is not initialized");
+
+ IO.mapRequired("Version", E.Version);
+ IO.mapRequired("Flags", E.Flags);
+ IO.mapRequired("VersionNdx", E.VersionNdx);
+ IO.mapRequired("Hash", E.Hash);
+ IO.mapRequired("Names", E.VerNames);
+}
+
+void MappingTraits<ELFYAML::VerneedEntry>::mapping(IO &IO,
+ ELFYAML::VerneedEntry &E) {
+ assert(IO.getContext() && "The IO context is not initialized");
+
+ IO.mapRequired("Version", E.Version);
+ IO.mapRequired("File", E.File);
+ IO.mapRequired("Entries", E.AuxV);
+}
+
+void MappingTraits<ELFYAML::VernauxEntry>::mapping(IO &IO,
+ ELFYAML::VernauxEntry &E) {
+ assert(IO.getContext() && "The IO context is not initialized");
+
+ IO.mapRequired("Name", E.Name);
+ IO.mapRequired("Hash", E.Hash);
+ IO.mapRequired("Flags", E.Flags);
+ IO.mapRequired("Other", E.Other);
+}
+
void MappingTraits<ELFYAML::Relocation>::mapping(IO &IO,
ELFYAML::Relocation &Rel) {
const auto *Object = static_cast<ELFYAML::Object *>(IO.getContext());
diff --git a/lib/ObjectYAML/MachOYAML.cpp b/lib/ObjectYAML/MachOYAML.cpp
index e00a4ea93074..d12f12cf4435 100644
--- a/lib/ObjectYAML/MachOYAML.cpp
+++ b/lib/ObjectYAML/MachOYAML.cpp
@@ -1,9 +1,8 @@
//===- MachOYAML.cpp - MachO YAMLIO implementation ------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/ObjectYAML/MinidumpYAML.cpp b/lib/ObjectYAML/MinidumpYAML.cpp
new file mode 100644
index 000000000000..f5f2acd0cc4b
--- /dev/null
+++ b/lib/ObjectYAML/MinidumpYAML.cpp
@@ -0,0 +1,673 @@
+//===- MinidumpYAML.cpp - Minidump YAMLIO implementation ------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ObjectYAML/MinidumpYAML.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/ConvertUTF.h"
+
+using namespace llvm;
+using namespace llvm::MinidumpYAML;
+using namespace llvm::minidump;
+
+namespace {
+/// A helper class to manage the placement of various structures into the final
+/// minidump binary. Space for objects can be allocated via various allocate***
+/// methods, while the final minidump file is written by calling the writeTo
+/// method. The plain versions of allocation functions take a reference to the
+/// data which is to be written (and hence the data must be available until
+/// writeTo is called), while the "New" versions allocate the data in an
+/// allocator-managed buffer, which is available until the allocator object is
+/// destroyed. For both kinds of functions, it is possible to modify the
+/// data for which the space has been "allocated" until the final writeTo call.
+/// This is useful for "linking" the allocated structures via their offsets.
+class BlobAllocator {
+public:
+ size_t tell() const { return NextOffset; }
+
+ size_t allocateCallback(size_t Size,
+ std::function<void(raw_ostream &)> Callback) {
+ size_t Offset = NextOffset;
+ NextOffset += Size;
+ Callbacks.push_back(std::move(Callback));
+ return Offset;
+ }
+
+ size_t allocateBytes(ArrayRef<uint8_t> Data) {
+ return allocateCallback(
+ Data.size(), [Data](raw_ostream &OS) { OS << toStringRef(Data); });
+ }
+
+ size_t allocateBytes(yaml::BinaryRef Data) {
+ return allocateCallback(Data.binary_size(), [Data](raw_ostream &OS) {
+ Data.writeAsBinary(OS);
+ });
+ }
+
+ template <typename T> size_t allocateArray(ArrayRef<T> Data) {
+ return allocateBytes({reinterpret_cast<const uint8_t *>(Data.data()),
+ sizeof(T) * Data.size()});
+ }
+
+ template <typename T, typename RangeType>
+ std::pair<size_t, MutableArrayRef<T>>
+ allocateNewArray(const iterator_range<RangeType> &Range);
+
+ template <typename T> size_t allocateObject(const T &Data) {
+ return allocateArray(makeArrayRef(Data));
+ }
+
+ template <typename T, typename... Types>
+ std::pair<size_t, T *> allocateNewObject(Types &&... Args) {
+ T *Object = new (Temporaries.Allocate<T>()) T(std::forward<Types>(Args)...);
+ return {allocateObject(*Object), Object};
+ }
+
+ size_t allocateString(StringRef Str);
+
+ void writeTo(raw_ostream &OS) const;
+
+private:
+ size_t NextOffset = 0;
+
+ BumpPtrAllocator Temporaries;
+ std::vector<std::function<void(raw_ostream &)>> Callbacks;
+};
+} // namespace
+
+template <typename T, typename RangeType>
+std::pair<size_t, MutableArrayRef<T>>
+BlobAllocator::allocateNewArray(const iterator_range<RangeType> &Range) {
+ size_t Num = std::distance(Range.begin(), Range.end());
+ MutableArrayRef<T> Array(Temporaries.Allocate<T>(Num), Num);
+ std::uninitialized_copy(Range.begin(), Range.end(), Array.begin());
+ return {allocateArray(Array), Array};
+}
+
+size_t BlobAllocator::allocateString(StringRef Str) {
+ SmallVector<UTF16, 32> WStr;
+ bool OK = convertUTF8ToUTF16String(Str, WStr);
+ assert(OK && "Invalid UTF8 in Str?");
+ (void)OK;
+
+ // The utf16 string is null-terminated, but the terminator is not counted in
+ // the string size.
+ WStr.push_back(0);
+ size_t Result =
+ allocateNewObject<support::ulittle32_t>(2 * (WStr.size() - 1)).first;
+ allocateNewArray<support::ulittle16_t>(make_range(WStr.begin(), WStr.end()));
+ return Result;
+}
+
+void BlobAllocator::writeTo(raw_ostream &OS) const {
+ size_t BeginOffset = OS.tell();
+ for (const auto &Callback : Callbacks)
+ Callback(OS);
+ assert(OS.tell() == BeginOffset + NextOffset &&
+ "Callbacks wrote an unexpected number of bytes.");
+ (void)BeginOffset;
+}
+
+/// Perform an optional yaml-mapping of an endian-aware type EndianType. The
+/// only purpose of this function is to avoid casting the Default value to the
+/// endian type;
+template <typename EndianType>
+static inline void mapOptional(yaml::IO &IO, const char *Key, EndianType &Val,
+ typename EndianType::value_type Default) {
+ IO.mapOptional(Key, Val, EndianType(Default));
+}
+
+/// Yaml-map an endian-aware type EndianType as some other type MapType.
+template <typename MapType, typename EndianType>
+static inline void mapRequiredAs(yaml::IO &IO, const char *Key,
+ EndianType &Val) {
+ MapType Mapped = static_cast<typename EndianType::value_type>(Val);
+ IO.mapRequired(Key, Mapped);
+ Val = static_cast<typename EndianType::value_type>(Mapped);
+}
+
+/// Perform an optional yaml-mapping of an endian-aware type EndianType as some
+/// other type MapType.
+template <typename MapType, typename EndianType>
+static inline void mapOptionalAs(yaml::IO &IO, const char *Key, EndianType &Val,
+ MapType Default) {
+ MapType Mapped = static_cast<typename EndianType::value_type>(Val);
+ IO.mapOptional(Key, Mapped, Default);
+ Val = static_cast<typename EndianType::value_type>(Mapped);
+}
+
+namespace {
+/// Return the appropriate yaml Hex type for a given endian-aware type.
+template <typename EndianType> struct HexType;
+template <> struct HexType<support::ulittle16_t> { using type = yaml::Hex16; };
+template <> struct HexType<support::ulittle32_t> { using type = yaml::Hex32; };
+template <> struct HexType<support::ulittle64_t> { using type = yaml::Hex64; };
+} // namespace
+
+/// Yaml-map an endian-aware type as an appropriately-sized hex value.
+template <typename EndianType>
+static inline void mapRequiredHex(yaml::IO &IO, const char *Key,
+ EndianType &Val) {
+ mapRequiredAs<typename HexType<EndianType>::type>(IO, Key, Val);
+}
+
+/// Perform an optional yaml-mapping of an endian-aware type as an
+/// appropriately-sized hex value.
+template <typename EndianType>
+static inline void mapOptionalHex(yaml::IO &IO, const char *Key,
+ EndianType &Val,
+ typename EndianType::value_type Default) {
+ mapOptionalAs<typename HexType<EndianType>::type>(IO, Key, Val, Default);
+}
+
+Stream::~Stream() = default;
+
+Stream::StreamKind Stream::getKind(StreamType Type) {
+ switch (Type) {
+ case StreamType::MemoryList:
+ return StreamKind::MemoryList;
+ case StreamType::ModuleList:
+ return StreamKind::ModuleList;
+ case StreamType::SystemInfo:
+ return StreamKind::SystemInfo;
+ case StreamType::LinuxCPUInfo:
+ case StreamType::LinuxProcStatus:
+ case StreamType::LinuxLSBRelease:
+ case StreamType::LinuxCMDLine:
+ case StreamType::LinuxMaps:
+ case StreamType::LinuxProcStat:
+ case StreamType::LinuxProcUptime:
+ return StreamKind::TextContent;
+ case StreamType::ThreadList:
+ return StreamKind::ThreadList;
+ default:
+ return StreamKind::RawContent;
+ }
+}
+
+std::unique_ptr<Stream> Stream::create(StreamType Type) {
+ StreamKind Kind = getKind(Type);
+ switch (Kind) {
+ case StreamKind::MemoryList:
+ return llvm::make_unique<MemoryListStream>();
+ case StreamKind::ModuleList:
+ return llvm::make_unique<ModuleListStream>();
+ case StreamKind::RawContent:
+ return llvm::make_unique<RawContentStream>(Type);
+ case StreamKind::SystemInfo:
+ return llvm::make_unique<SystemInfoStream>();
+ case StreamKind::TextContent:
+ return llvm::make_unique<TextContentStream>(Type);
+ case StreamKind::ThreadList:
+ return llvm::make_unique<ThreadListStream>();
+ }
+ llvm_unreachable("Unhandled stream kind!");
+}
+
+void yaml::ScalarEnumerationTraits<ProcessorArchitecture>::enumeration(
+ IO &IO, ProcessorArchitecture &Arch) {
+#define HANDLE_MDMP_ARCH(CODE, NAME) \
+ IO.enumCase(Arch, #NAME, ProcessorArchitecture::NAME);
+#include "llvm/BinaryFormat/MinidumpConstants.def"
+ IO.enumFallback<Hex16>(Arch);
+}
+
+void yaml::ScalarEnumerationTraits<OSPlatform>::enumeration(IO &IO,
+ OSPlatform &Plat) {
+#define HANDLE_MDMP_PLATFORM(CODE, NAME) \
+ IO.enumCase(Plat, #NAME, OSPlatform::NAME);
+#include "llvm/BinaryFormat/MinidumpConstants.def"
+ IO.enumFallback<Hex32>(Plat);
+}
+
+void yaml::ScalarEnumerationTraits<StreamType>::enumeration(IO &IO,
+ StreamType &Type) {
+#define HANDLE_MDMP_STREAM_TYPE(CODE, NAME) \
+ IO.enumCase(Type, #NAME, StreamType::NAME);
+#include "llvm/BinaryFormat/MinidumpConstants.def"
+ IO.enumFallback<Hex32>(Type);
+}
+
+void yaml::MappingTraits<CPUInfo::ArmInfo>::mapping(IO &IO,
+ CPUInfo::ArmInfo &Info) {
+ mapRequiredHex(IO, "CPUID", Info.CPUID);
+ mapOptionalHex(IO, "ELF hwcaps", Info.ElfHWCaps, 0);
+}
+
+namespace {
+template <std::size_t N> struct FixedSizeHex {
+ FixedSizeHex(uint8_t (&Storage)[N]) : Storage(Storage) {}
+
+ uint8_t (&Storage)[N];
+};
+} // namespace
+
+namespace llvm {
+namespace yaml {
+template <std::size_t N> struct ScalarTraits<FixedSizeHex<N>> {
+ static void output(const FixedSizeHex<N> &Fixed, void *, raw_ostream &OS) {
+ OS << toHex(makeArrayRef(Fixed.Storage));
+ }
+
+ static StringRef input(StringRef Scalar, void *, FixedSizeHex<N> &Fixed) {
+ if (!all_of(Scalar, isHexDigit))
+ return "Invalid hex digit in input";
+ if (Scalar.size() < 2 * N)
+ return "String too short";
+ if (Scalar.size() > 2 * N)
+ return "String too long";
+ copy(fromHex(Scalar), Fixed.Storage);
+ return "";
+ }
+
+ static QuotingType mustQuote(StringRef S) { return QuotingType::None; }
+};
+} // namespace yaml
+} // namespace llvm
+void yaml::MappingTraits<CPUInfo::OtherInfo>::mapping(
+ IO &IO, CPUInfo::OtherInfo &Info) {
+ FixedSizeHex<sizeof(Info.ProcessorFeatures)> Features(Info.ProcessorFeatures);
+ IO.mapRequired("Features", Features);
+}
+
+namespace {
+/// A type which only accepts strings of a fixed size for yaml conversion.
+template <std::size_t N> struct FixedSizeString {
+ FixedSizeString(char (&Storage)[N]) : Storage(Storage) {}
+
+ char (&Storage)[N];
+};
+} // namespace
+
+namespace llvm {
+namespace yaml {
+template <std::size_t N> struct ScalarTraits<FixedSizeString<N>> {
+ static void output(const FixedSizeString<N> &Fixed, void *, raw_ostream &OS) {
+ OS << StringRef(Fixed.Storage, N);
+ }
+
+ static StringRef input(StringRef Scalar, void *, FixedSizeString<N> &Fixed) {
+ if (Scalar.size() < N)
+ return "String too short";
+ if (Scalar.size() > N)
+ return "String too long";
+ copy(Scalar, Fixed.Storage);
+ return "";
+ }
+
+ static QuotingType mustQuote(StringRef S) { return needsQuotes(S); }
+};
+} // namespace yaml
+} // namespace llvm
+
+void yaml::MappingTraits<CPUInfo::X86Info>::mapping(IO &IO,
+ CPUInfo::X86Info &Info) {
+ FixedSizeString<sizeof(Info.VendorID)> VendorID(Info.VendorID);
+ IO.mapRequired("Vendor ID", VendorID);
+
+ mapRequiredHex(IO, "Version Info", Info.VersionInfo);
+ mapRequiredHex(IO, "Feature Info", Info.FeatureInfo);
+ mapOptionalHex(IO, "AMD Extended Features", Info.AMDExtendedFeatures, 0);
+}
+
+void yaml::MappingTraits<VSFixedFileInfo>::mapping(IO &IO,
+ VSFixedFileInfo &Info) {
+ mapOptionalHex(IO, "Signature", Info.Signature, 0);
+ mapOptionalHex(IO, "Struct Version", Info.StructVersion, 0);
+ mapOptionalHex(IO, "File Version High", Info.FileVersionHigh, 0);
+ mapOptionalHex(IO, "File Version Low", Info.FileVersionLow, 0);
+ mapOptionalHex(IO, "Product Version High", Info.ProductVersionHigh, 0);
+ mapOptionalHex(IO, "Product Version Low", Info.ProductVersionLow, 0);
+ mapOptionalHex(IO, "File Flags Mask", Info.FileFlagsMask, 0);
+ mapOptionalHex(IO, "File Flags", Info.FileFlags, 0);
+ mapOptionalHex(IO, "File OS", Info.FileOS, 0);
+ mapOptionalHex(IO, "File Type", Info.FileType, 0);
+ mapOptionalHex(IO, "File Subtype", Info.FileSubtype, 0);
+ mapOptionalHex(IO, "File Date High", Info.FileDateHigh, 0);
+ mapOptionalHex(IO, "File Date Low", Info.FileDateLow, 0);
+}
+
+void yaml::MappingTraits<ModuleListStream::entry_type>::mapping(
+ IO &IO, ModuleListStream::entry_type &M) {
+ mapRequiredHex(IO, "Base of Image", M.Entry.BaseOfImage);
+ mapRequiredHex(IO, "Size of Image", M.Entry.SizeOfImage);
+ mapOptionalHex(IO, "Checksum", M.Entry.Checksum, 0);
+ IO.mapOptional("Time Date Stamp", M.Entry.TimeDateStamp,
+ support::ulittle32_t(0));
+ IO.mapRequired("Module Name", M.Name);
+ IO.mapOptional("Version Info", M.Entry.VersionInfo, VSFixedFileInfo());
+ IO.mapRequired("CodeView Record", M.CvRecord);
+ IO.mapOptional("Misc Record", M.MiscRecord, yaml::BinaryRef());
+ mapOptionalHex(IO, "Reserved0", M.Entry.Reserved0, 0);
+ mapOptionalHex(IO, "Reserved1", M.Entry.Reserved1, 0);
+}
+
+static void streamMapping(yaml::IO &IO, RawContentStream &Stream) {
+ IO.mapOptional("Content", Stream.Content);
+ IO.mapOptional("Size", Stream.Size, Stream.Content.binary_size());
+}
+
+static StringRef streamValidate(RawContentStream &Stream) {
+ if (Stream.Size.value < Stream.Content.binary_size())
+ return "Stream size must be greater or equal to the content size";
+ return "";
+}
+
+void yaml::MappingTraits<MemoryListStream::entry_type>::mapping(
+ IO &IO, MemoryListStream::entry_type &Range) {
+ MappingContextTraits<MemoryDescriptor, yaml::BinaryRef>::mapping(
+ IO, Range.Entry, Range.Content);
+}
+
+static void streamMapping(yaml::IO &IO, MemoryListStream &Stream) {
+ IO.mapRequired("Memory Ranges", Stream.Entries);
+}
+
+static void streamMapping(yaml::IO &IO, ModuleListStream &Stream) {
+ IO.mapRequired("Modules", Stream.Entries);
+}
+
+static void streamMapping(yaml::IO &IO, SystemInfoStream &Stream) {
+ SystemInfo &Info = Stream.Info;
+ IO.mapRequired("Processor Arch", Info.ProcessorArch);
+ mapOptional(IO, "Processor Level", Info.ProcessorLevel, 0);
+ mapOptional(IO, "Processor Revision", Info.ProcessorRevision, 0);
+ IO.mapOptional("Number of Processors", Info.NumberOfProcessors, 0);
+ IO.mapOptional("Product type", Info.ProductType, 0);
+ mapOptional(IO, "Major Version", Info.MajorVersion, 0);
+ mapOptional(IO, "Minor Version", Info.MinorVersion, 0);
+ mapOptional(IO, "Build Number", Info.BuildNumber, 0);
+ IO.mapRequired("Platform ID", Info.PlatformId);
+ IO.mapOptional("CSD Version", Stream.CSDVersion, "");
+ mapOptionalHex(IO, "Suite Mask", Info.SuiteMask, 0);
+ mapOptionalHex(IO, "Reserved", Info.Reserved, 0);
+ switch (static_cast<ProcessorArchitecture>(Info.ProcessorArch)) {
+ case ProcessorArchitecture::X86:
+ case ProcessorArchitecture::AMD64:
+ IO.mapOptional("CPU", Info.CPU.X86);
+ break;
+ case ProcessorArchitecture::ARM:
+ case ProcessorArchitecture::ARM64:
+ IO.mapOptional("CPU", Info.CPU.Arm);
+ break;
+ default:
+ IO.mapOptional("CPU", Info.CPU.Other);
+ break;
+ }
+}
+
+static void streamMapping(yaml::IO &IO, TextContentStream &Stream) {
+ IO.mapOptional("Text", Stream.Text);
+}
+
+void yaml::MappingContextTraits<MemoryDescriptor, yaml::BinaryRef>::mapping(
+ IO &IO, MemoryDescriptor &Memory, BinaryRef &Content) {
+ mapRequiredHex(IO, "Start of Memory Range", Memory.StartOfMemoryRange);
+ IO.mapRequired("Content", Content);
+}
+
+void yaml::MappingTraits<ThreadListStream::entry_type>::mapping(
+ IO &IO, ThreadListStream::entry_type &T) {
+ mapRequiredHex(IO, "Thread Id", T.Entry.ThreadId);
+ mapOptionalHex(IO, "Suspend Count", T.Entry.SuspendCount, 0);
+ mapOptionalHex(IO, "Priority Class", T.Entry.PriorityClass, 0);
+ mapOptionalHex(IO, "Priority", T.Entry.Priority, 0);
+ mapOptionalHex(IO, "Environment Block", T.Entry.EnvironmentBlock, 0);
+ IO.mapRequired("Context", T.Context);
+ IO.mapRequired("Stack", T.Entry.Stack, T.Stack);
+}
+
+static void streamMapping(yaml::IO &IO, ThreadListStream &Stream) {
+ IO.mapRequired("Threads", Stream.Entries);
+}
+
+void yaml::MappingTraits<std::unique_ptr<Stream>>::mapping(
+ yaml::IO &IO, std::unique_ptr<MinidumpYAML::Stream> &S) {
+ StreamType Type;
+ if (IO.outputting())
+ Type = S->Type;
+ IO.mapRequired("Type", Type);
+
+ if (!IO.outputting())
+ S = MinidumpYAML::Stream::create(Type);
+ switch (S->Kind) {
+ case MinidumpYAML::Stream::StreamKind::MemoryList:
+ streamMapping(IO, llvm::cast<MemoryListStream>(*S));
+ break;
+ case MinidumpYAML::Stream::StreamKind::ModuleList:
+ streamMapping(IO, llvm::cast<ModuleListStream>(*S));
+ break;
+ case MinidumpYAML::Stream::StreamKind::RawContent:
+ streamMapping(IO, llvm::cast<RawContentStream>(*S));
+ break;
+ case MinidumpYAML::Stream::StreamKind::SystemInfo:
+ streamMapping(IO, llvm::cast<SystemInfoStream>(*S));
+ break;
+ case MinidumpYAML::Stream::StreamKind::TextContent:
+ streamMapping(IO, llvm::cast<TextContentStream>(*S));
+ break;
+ case MinidumpYAML::Stream::StreamKind::ThreadList:
+ streamMapping(IO, llvm::cast<ThreadListStream>(*S));
+ break;
+ }
+}
+
+StringRef yaml::MappingTraits<std::unique_ptr<Stream>>::validate(
+ yaml::IO &IO, std::unique_ptr<MinidumpYAML::Stream> &S) {
+ switch (S->Kind) {
+ case MinidumpYAML::Stream::StreamKind::RawContent:
+ return streamValidate(cast<RawContentStream>(*S));
+ case MinidumpYAML::Stream::StreamKind::MemoryList:
+ case MinidumpYAML::Stream::StreamKind::ModuleList:
+ case MinidumpYAML::Stream::StreamKind::SystemInfo:
+ case MinidumpYAML::Stream::StreamKind::TextContent:
+ case MinidumpYAML::Stream::StreamKind::ThreadList:
+ return "";
+ }
+ llvm_unreachable("Fully covered switch above!");
+}
+
+void yaml::MappingTraits<Object>::mapping(IO &IO, Object &O) {
+ IO.mapTag("!minidump", true);
+ mapOptionalHex(IO, "Signature", O.Header.Signature, Header::MagicSignature);
+ mapOptionalHex(IO, "Version", O.Header.Version, Header::MagicVersion);
+ mapOptionalHex(IO, "Flags", O.Header.Flags, 0);
+ IO.mapRequired("Streams", O.Streams);
+}
+
+static LocationDescriptor layout(BlobAllocator &File, yaml::BinaryRef Data) {
+ return {support::ulittle32_t(Data.binary_size()),
+ support::ulittle32_t(File.allocateBytes(Data))};
+}
+
+static void layout(BlobAllocator &File, MemoryListStream::entry_type &Range) {
+ Range.Entry.Memory = layout(File, Range.Content);
+}
+
+static void layout(BlobAllocator &File, ModuleListStream::entry_type &M) {
+ M.Entry.ModuleNameRVA = File.allocateString(M.Name);
+
+ M.Entry.CvRecord = layout(File, M.CvRecord);
+ M.Entry.MiscRecord = layout(File, M.MiscRecord);
+}
+
+static void layout(BlobAllocator &File, ThreadListStream::entry_type &T) {
+ T.Entry.Stack.Memory = layout(File, T.Stack);
+ T.Entry.Context = layout(File, T.Context);
+}
+
+template <typename EntryT>
+static size_t layout(BlobAllocator &File,
+ MinidumpYAML::detail::ListStream<EntryT> &S) {
+
+ File.allocateNewObject<support::ulittle32_t>(S.Entries.size());
+ for (auto &E : S.Entries)
+ File.allocateObject(E.Entry);
+
+ size_t DataEnd = File.tell();
+
+ // Lay out the auxiliary data, (which is not a part of the stream).
+ DataEnd = File.tell();
+ for (auto &E : S.Entries)
+ layout(File, E);
+
+ return DataEnd;
+}
+
+static Directory layout(BlobAllocator &File, Stream &S) {
+ Directory Result;
+ Result.Type = S.Type;
+ Result.Location.RVA = File.tell();
+ Optional<size_t> DataEnd;
+ switch (S.Kind) {
+ case Stream::StreamKind::MemoryList:
+ DataEnd = layout(File, cast<MemoryListStream>(S));
+ break;
+ case Stream::StreamKind::ModuleList:
+ DataEnd = layout(File, cast<ModuleListStream>(S));
+ break;
+ case Stream::StreamKind::RawContent: {
+ RawContentStream &Raw = cast<RawContentStream>(S);
+ File.allocateCallback(Raw.Size, [&Raw](raw_ostream &OS) {
+ Raw.Content.writeAsBinary(OS);
+ assert(Raw.Content.binary_size() <= Raw.Size);
+ OS << std::string(Raw.Size - Raw.Content.binary_size(), '\0');
+ });
+ break;
+ }
+ case Stream::StreamKind::SystemInfo: {
+ SystemInfoStream &SystemInfo = cast<SystemInfoStream>(S);
+ File.allocateObject(SystemInfo.Info);
+ // The CSD string is not a part of the stream.
+ DataEnd = File.tell();
+ SystemInfo.Info.CSDVersionRVA = File.allocateString(SystemInfo.CSDVersion);
+ break;
+ }
+ case Stream::StreamKind::TextContent:
+ File.allocateArray(arrayRefFromStringRef(cast<TextContentStream>(S).Text));
+ break;
+ case Stream::StreamKind::ThreadList:
+ DataEnd = layout(File, cast<ThreadListStream>(S));
+ break;
+ }
+ // If DataEnd is not set, we assume everything we generated is a part of the
+ // stream.
+ Result.Location.DataSize =
+ DataEnd.getValueOr(File.tell()) - Result.Location.RVA;
+ return Result;
+}
+
+void MinidumpYAML::writeAsBinary(Object &Obj, raw_ostream &OS) {
+ BlobAllocator File;
+ File.allocateObject(Obj.Header);
+
+ std::vector<Directory> StreamDirectory(Obj.Streams.size());
+ Obj.Header.StreamDirectoryRVA =
+ File.allocateArray(makeArrayRef(StreamDirectory));
+ Obj.Header.NumberOfStreams = StreamDirectory.size();
+
+ for (auto &Stream : enumerate(Obj.Streams))
+ StreamDirectory[Stream.index()] = layout(File, *Stream.value());
+
+ File.writeTo(OS);
+}
+
+Error MinidumpYAML::writeAsBinary(StringRef Yaml, raw_ostream &OS) {
+ yaml::Input Input(Yaml);
+ Object Obj;
+ Input >> Obj;
+ if (std::error_code EC = Input.error())
+ return errorCodeToError(EC);
+
+ writeAsBinary(Obj, OS);
+ return Error::success();
+}
+
+Expected<std::unique_ptr<Stream>>
+Stream::create(const Directory &StreamDesc, const object::MinidumpFile &File) {
+ StreamKind Kind = getKind(StreamDesc.Type);
+ switch (Kind) {
+ case StreamKind::MemoryList: {
+ auto ExpectedList = File.getMemoryList();
+ if (!ExpectedList)
+ return ExpectedList.takeError();
+ std::vector<MemoryListStream::entry_type> Ranges;
+ for (const MemoryDescriptor &MD : *ExpectedList) {
+ auto ExpectedContent = File.getRawData(MD.Memory);
+ if (!ExpectedContent)
+ return ExpectedContent.takeError();
+ Ranges.push_back({MD, *ExpectedContent});
+ }
+ return llvm::make_unique<MemoryListStream>(std::move(Ranges));
+ }
+ case StreamKind::ModuleList: {
+ auto ExpectedList = File.getModuleList();
+ if (!ExpectedList)
+ return ExpectedList.takeError();
+ std::vector<ModuleListStream::entry_type> Modules;
+ for (const Module &M : *ExpectedList) {
+ auto ExpectedName = File.getString(M.ModuleNameRVA);
+ if (!ExpectedName)
+ return ExpectedName.takeError();
+ auto ExpectedCv = File.getRawData(M.CvRecord);
+ if (!ExpectedCv)
+ return ExpectedCv.takeError();
+ auto ExpectedMisc = File.getRawData(M.MiscRecord);
+ if (!ExpectedMisc)
+ return ExpectedMisc.takeError();
+ Modules.push_back(
+ {M, std::move(*ExpectedName), *ExpectedCv, *ExpectedMisc});
+ }
+ return llvm::make_unique<ModuleListStream>(std::move(Modules));
+ }
+ case StreamKind::RawContent:
+ return llvm::make_unique<RawContentStream>(StreamDesc.Type,
+ File.getRawStream(StreamDesc));
+ case StreamKind::SystemInfo: {
+ auto ExpectedInfo = File.getSystemInfo();
+ if (!ExpectedInfo)
+ return ExpectedInfo.takeError();
+ auto ExpectedCSDVersion = File.getString(ExpectedInfo->CSDVersionRVA);
+ if (!ExpectedCSDVersion)
+ return ExpectedInfo.takeError();
+ return llvm::make_unique<SystemInfoStream>(*ExpectedInfo,
+ std::move(*ExpectedCSDVersion));
+ }
+ case StreamKind::TextContent:
+ return llvm::make_unique<TextContentStream>(
+ StreamDesc.Type, toStringRef(File.getRawStream(StreamDesc)));
+ case StreamKind::ThreadList: {
+ auto ExpectedList = File.getThreadList();
+ if (!ExpectedList)
+ return ExpectedList.takeError();
+ std::vector<ThreadListStream::entry_type> Threads;
+ for (const Thread &T : *ExpectedList) {
+ auto ExpectedStack = File.getRawData(T.Stack.Memory);
+ if (!ExpectedStack)
+ return ExpectedStack.takeError();
+ auto ExpectedContext = File.getRawData(T.Context);
+ if (!ExpectedContext)
+ return ExpectedContext.takeError();
+ Threads.push_back({T, *ExpectedStack, *ExpectedContext});
+ }
+ return llvm::make_unique<ThreadListStream>(std::move(Threads));
+ }
+ }
+ llvm_unreachable("Unhandled stream kind!");
+}
+
+Expected<Object> Object::create(const object::MinidumpFile &File) {
+ std::vector<std::unique_ptr<Stream>> Streams;
+ Streams.reserve(File.streams().size());
+ for (const Directory &StreamDesc : File.streams()) {
+ auto ExpectedStream = Stream::create(StreamDesc, File);
+ if (!ExpectedStream)
+ return ExpectedStream.takeError();
+ Streams.push_back(std::move(*ExpectedStream));
+ }
+ return Object(File.header(), std::move(Streams));
+}
diff --git a/lib/ObjectYAML/ObjectYAML.cpp b/lib/ObjectYAML/ObjectYAML.cpp
index 850c1a5a06c0..7f636f4eabac 100644
--- a/lib/ObjectYAML/ObjectYAML.cpp
+++ b/lib/ObjectYAML/ObjectYAML.cpp
@@ -1,9 +1,8 @@
//===- ObjectYAML.cpp - YAML utilities for object files -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -33,6 +32,7 @@ void MappingTraits<YamlObjectFile>::mapping(IO &IO,
MappingTraits<MachOYAML::UniversalBinary>::mapping(IO,
*ObjectFile.FatMachO);
} else {
+ Input &In = (Input &)IO;
if (IO.mapTag("!ELF")) {
ObjectFile.Elf.reset(new ELFYAML::Object());
MappingTraits<ELFYAML::Object>::mapping(IO, *ObjectFile.Elf);
@@ -46,18 +46,18 @@ void MappingTraits<YamlObjectFile>::mapping(IO &IO,
ObjectFile.FatMachO.reset(new MachOYAML::UniversalBinary());
MappingTraits<MachOYAML::UniversalBinary>::mapping(IO,
*ObjectFile.FatMachO);
+ } else if (IO.mapTag("!minidump")) {
+ ObjectFile.Minidump.reset(new MinidumpYAML::Object());
+ MappingTraits<MinidumpYAML::Object>::mapping(IO, *ObjectFile.Minidump);
} else if (IO.mapTag("!WASM")) {
ObjectFile.Wasm.reset(new WasmYAML::Object());
MappingTraits<WasmYAML::Object>::mapping(IO, *ObjectFile.Wasm);
- } else {
- Input &In = (Input &)IO;
- std::string Tag = In.getCurrentNode()->getRawTag();
- if (Tag.empty())
+ } else if (const Node *N = In.getCurrentNode()) {
+ if (N->getRawTag().empty())
IO.setError("YAML Object File missing document type tag!");
else
- IO.setError(
- Twine("YAML Object File unsupported document type tag '") +
- Twine(Tag) + Twine("'!"));
+ IO.setError("YAML Object File unsupported document type tag '" +
+ N->getRawTag() + "'!");
}
}
}
diff --git a/lib/ObjectYAML/WasmYAML.cpp b/lib/ObjectYAML/WasmYAML.cpp
index 47bf853e0d3e..88491d955c49 100644
--- a/lib/ObjectYAML/WasmYAML.cpp
+++ b/lib/ObjectYAML/WasmYAML.cpp
@@ -1,9 +1,8 @@
//===- WasmYAML.cpp - Wasm YAMLIO implementation --------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -74,6 +73,20 @@ static void sectionMapping(IO &IO, WasmYAML::LinkingSection &Section) {
IO.mapOptional("Comdats", Section.Comdats);
}
+static void sectionMapping(IO &IO, WasmYAML::ProducersSection &Section) {
+ commonSectionMapping(IO, Section);
+ IO.mapRequired("Name", Section.Name);
+ IO.mapOptional("Languages", Section.Languages);
+ IO.mapOptional("Tools", Section.Tools);
+ IO.mapOptional("SDKs", Section.SDKs);
+}
+
+static void sectionMapping(IO &IO, WasmYAML::TargetFeaturesSection &Section) {
+ commonSectionMapping(IO, Section);
+ IO.mapRequired("Name", Section.Name);
+ IO.mapRequired("Features", Section.Features);
+}
+
static void sectionMapping(IO &IO, WasmYAML::CustomSection &Section) {
commonSectionMapping(IO, Section);
IO.mapRequired("Name", Section.Name);
@@ -140,6 +153,11 @@ static void sectionMapping(IO &IO, WasmYAML::DataSection &Section) {
IO.mapRequired("Segments", Section.Segments);
}
+static void sectionMapping(IO &IO, WasmYAML::DataCountSection &Section) {
+ commonSectionMapping(IO, Section);
+ IO.mapRequired("Count", Section.Count);
+}
+
void MappingTraits<std::unique_ptr<WasmYAML::Section>>::mapping(
IO &IO, std::unique_ptr<WasmYAML::Section> &Section) {
WasmYAML::SectionType SectionType;
@@ -169,6 +187,14 @@ void MappingTraits<std::unique_ptr<WasmYAML::Section>>::mapping(
if (!IO.outputting())
Section.reset(new WasmYAML::NameSection());
sectionMapping(IO, *cast<WasmYAML::NameSection>(Section.get()));
+ } else if (SectionName == "producers") {
+ if (!IO.outputting())
+ Section.reset(new WasmYAML::ProducersSection());
+ sectionMapping(IO, *cast<WasmYAML::ProducersSection>(Section.get()));
+ } else if (SectionName == "target_features") {
+ if (!IO.outputting())
+ Section.reset(new WasmYAML::TargetFeaturesSection());
+ sectionMapping(IO, *cast<WasmYAML::TargetFeaturesSection>(Section.get()));
} else {
if (!IO.outputting())
Section.reset(new WasmYAML::CustomSection(SectionName));
@@ -236,6 +262,11 @@ void MappingTraits<std::unique_ptr<WasmYAML::Section>>::mapping(
Section.reset(new WasmYAML::DataSection());
sectionMapping(IO, *cast<WasmYAML::DataSection>(Section.get()));
break;
+ case wasm::WASM_SEC_DATACOUNT:
+ if (!IO.outputting())
+ Section.reset(new WasmYAML::DataCountSection());
+ sectionMapping(IO, *cast<WasmYAML::DataCountSection>(Section.get()));
+ break;
default:
llvm_unreachable("Unknown section type");
}
@@ -257,6 +288,7 @@ void ScalarEnumerationTraits<WasmYAML::SectionType>::enumeration(
ECase(ELEM);
ECase(CODE);
ECase(DATA);
+ ECase(DATACOUNT);
#undef ECase
}
@@ -293,6 +325,27 @@ void MappingTraits<WasmYAML::NameEntry>::mapping(
IO.mapRequired("Name", NameEntry.Name);
}
+void MappingTraits<WasmYAML::ProducerEntry>::mapping(
+ IO &IO, WasmYAML::ProducerEntry &ProducerEntry) {
+ IO.mapRequired("Name", ProducerEntry.Name);
+ IO.mapRequired("Version", ProducerEntry.Version);
+}
+
+void ScalarEnumerationTraits<WasmYAML::FeaturePolicyPrefix>::enumeration(
+ IO &IO, WasmYAML::FeaturePolicyPrefix &Kind) {
+#define ECase(X) IO.enumCase(Kind, #X, wasm::WASM_FEATURE_PREFIX_##X);
+ ECase(USED);
+ ECase(REQUIRED);
+ ECase(DISALLOWED);
+#undef ECase
+}
+
+void MappingTraits<WasmYAML::FeatureEntry>::mapping(
+ IO &IO, WasmYAML::FeatureEntry &FeatureEntry) {
+ IO.mapRequired("Prefix", FeatureEntry.Prefix);
+ IO.mapRequired("Name", FeatureEntry.Name);
+}
+
void MappingTraits<WasmYAML::SegmentInfo>::mapping(
IO &IO, WasmYAML::SegmentInfo &SegmentInfo) {
IO.mapRequired("Index", SegmentInfo.Index);
@@ -386,8 +439,18 @@ void MappingTraits<wasm::WasmInitExpr>::mapping(IO &IO,
void MappingTraits<WasmYAML::DataSegment>::mapping(
IO &IO, WasmYAML::DataSegment &Segment) {
IO.mapOptional("SectionOffset", Segment.SectionOffset);
- IO.mapRequired("MemoryIndex", Segment.MemoryIndex);
- IO.mapRequired("Offset", Segment.Offset);
+ IO.mapRequired("InitFlags", Segment.InitFlags);
+ if (Segment.InitFlags & wasm::WASM_SEGMENT_HAS_MEMINDEX) {
+ IO.mapRequired("MemoryIndex", Segment.MemoryIndex);
+ } else {
+ Segment.MemoryIndex = 0;
+ }
+ if ((Segment.InitFlags & wasm::WASM_SEGMENT_IS_PASSIVE) == 0) {
+ IO.mapRequired("Offset", Segment.Offset);
+ } else {
+ Segment.Offset.Opcode = wasm::WASM_OPCODE_I32_CONST;
+ Segment.Offset.Value.Int32 = 0;
+ }
IO.mapRequired("Content", Segment.Content);
}
@@ -421,7 +484,8 @@ void MappingTraits<WasmYAML::SymbolInfo>::mapping(IO &IO,
WasmYAML::SymbolInfo &Info) {
IO.mapRequired("Index", Info.Index);
IO.mapRequired("Kind", Info.Kind);
- IO.mapRequired("Name", Info.Name);
+ if (Info.Kind != wasm::WASM_SYMBOL_TYPE_SECTION)
+ IO.mapRequired("Name", Info.Name);
IO.mapRequired("Flags", Info.Flags);
if (Info.Kind == wasm::WASM_SYMBOL_TYPE_FUNCTION) {
IO.mapRequired("Function", Info.ElementIndex);
@@ -469,6 +533,8 @@ void ScalarBitSetTraits<WasmYAML::SymbolFlags>::bitset(
// BCaseMask(VISIBILITY_MASK, VISIBILITY_DEFAULT);
BCaseMask(VISIBILITY_MASK, VISIBILITY_HIDDEN);
BCaseMask(UNDEFINED, UNDEFINED);
+ BCaseMask(EXPORTED, EXPORTED);
+ BCaseMask(EXPLICIT_NAME, EXPLICIT_NAME);
#undef BCaseMask
}
diff --git a/lib/ObjectYAML/XCOFFYAML.cpp b/lib/ObjectYAML/XCOFFYAML.cpp
new file mode 100644
index 000000000000..982e6aecbb98
--- /dev/null
+++ b/lib/ObjectYAML/XCOFFYAML.cpp
@@ -0,0 +1,109 @@
+//===-- XCOFFYAML.cpp - XCOFF YAMLIO implementation -------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines classes for handling the YAML representation of XCOFF.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ObjectYAML/XCOFFYAML.h"
+#include "llvm/BinaryFormat/XCOFF.h"
+#include <string.h>
+
+namespace llvm {
+namespace XCOFFYAML {
+
+Object::Object() { memset(&Header, 0, sizeof(Header)); }
+
+} // namespace XCOFFYAML
+
+namespace yaml {
+
+void ScalarEnumerationTraits<XCOFF::StorageClass>::enumeration(
+ IO &IO, XCOFF::StorageClass &Value) {
+#define ECase(X) IO.enumCase(Value, #X, XCOFF::X)
+ ECase(C_NULL);
+ ECase(C_AUTO);
+ ECase(C_EXT);
+ ECase(C_STAT);
+ ECase(C_REG);
+ ECase(C_EXTDEF);
+ ECase(C_LABEL);
+ ECase(C_ULABEL);
+ ECase(C_MOS);
+ ECase(C_ARG);
+ ECase(C_STRTAG);
+ ECase(C_MOU);
+ ECase(C_UNTAG);
+ ECase(C_TPDEF);
+ ECase(C_USTATIC);
+ ECase(C_ENTAG);
+ ECase(C_MOE);
+ ECase(C_REGPARM);
+ ECase(C_FIELD);
+ ECase(C_BLOCK);
+ ECase(C_FCN);
+ ECase(C_EOS);
+ ECase(C_FILE);
+ ECase(C_LINE);
+ ECase(C_ALIAS);
+ ECase(C_HIDDEN);
+ ECase(C_HIDEXT);
+ ECase(C_BINCL);
+ ECase(C_EINCL);
+ ECase(C_INFO);
+ ECase(C_WEAKEXT);
+ ECase(C_DWARF);
+ ECase(C_GSYM);
+ ECase(C_LSYM);
+ ECase(C_PSYM);
+ ECase(C_RSYM);
+ ECase(C_RPSYM);
+ ECase(C_STSYM);
+ ECase(C_TCSYM);
+ ECase(C_BCOMM);
+ ECase(C_ECOML);
+ ECase(C_ECOMM);
+ ECase(C_DECL);
+ ECase(C_ENTRY);
+ ECase(C_FUN);
+ ECase(C_BSTAT);
+ ECase(C_ESTAT);
+ ECase(C_GTLS);
+ ECase(C_STTLS);
+ ECase(C_EFCN);
+#undef ECase
+}
+
+void MappingTraits<XCOFFYAML::FileHeader>::mapping(
+ IO &IO, XCOFFYAML::FileHeader &FileHdr) {
+ IO.mapRequired("MagicNumber", FileHdr.Magic);
+ IO.mapRequired("NumberOfSections", FileHdr.NumberOfSections);
+ IO.mapRequired("CreationTime", FileHdr.TimeStamp);
+ IO.mapRequired("OffsetToSymbolTable", FileHdr.SymbolTableOffset);
+ IO.mapRequired("EntriesInSymbolTable", FileHdr.NumberOfSymTableEntries);
+ IO.mapRequired("AuxiliaryHeaderSize", FileHdr.AuxHeaderSize);
+ IO.mapRequired("Flags", FileHdr.Flags);
+}
+
+void MappingTraits<XCOFFYAML::Symbol>::mapping(IO &IO, XCOFFYAML::Symbol &S) {
+ IO.mapRequired("Name", S.SymbolName);
+ IO.mapRequired("Value", S.Value);
+ IO.mapRequired("Section", S.SectionName);
+ IO.mapRequired("Type", S.Type);
+ IO.mapRequired("StorageClass", S.StorageClass);
+ IO.mapRequired("NumberOfAuxEntries", S.NumberOfAuxEntries);
+}
+
+void MappingTraits<XCOFFYAML::Object>::mapping(IO &IO, XCOFFYAML::Object &Obj) {
+ IO.mapTag("!XCOFF", true);
+ IO.mapRequired("FileHeader", Obj.Header);
+ IO.mapRequired("Symbols", Obj.Symbols);
+}
+
+} // namespace yaml
+} // namespace llvm
diff --git a/lib/ObjectYAML/YAML.cpp b/lib/ObjectYAML/YAML.cpp
index 67b5764eadaa..6eba16e36c2a 100644
--- a/lib/ObjectYAML/YAML.cpp
+++ b/lib/ObjectYAML/YAML.cpp
@@ -1,9 +1,8 @@
//===- YAML.cpp - YAMLIO utilities for object files -----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -32,7 +31,7 @@ StringRef yaml::ScalarTraits<yaml::BinaryRef>::input(StringRef Scalar, void *,
// TODO: Can we improve YAMLIO to permit a more accurate diagnostic here?
// (e.g. a caret pointing to the offending character).
for (unsigned I = 0, N = Scalar.size(); I != N; ++I)
- if (!isxdigit(Scalar[I]))
+ if (!llvm::isHexDigit(Scalar[I]))
return "BinaryRef hex string must contain only hex digits.";
Val = yaml::BinaryRef(Scalar);
return {};
@@ -44,8 +43,9 @@ void yaml::BinaryRef::writeAsBinary(raw_ostream &OS) const {
return;
}
for (unsigned I = 0, N = Data.size(); I != N; I += 2) {
- uint8_t Byte;
- StringRef((const char *)&Data[I], 2).getAsInteger(16, Byte);
+ uint8_t Byte = llvm::hexDigitValue(Data[I]);
+ Byte <<= 4;
+ Byte |= llvm::hexDigitValue(Data[I + 1]);
OS.write(Byte);
}
}
diff --git a/lib/OptRemarks/OptRemarksParser.cpp b/lib/OptRemarks/OptRemarksParser.cpp
deleted file mode 100644
index 0478d2bfbfa6..000000000000
--- a/lib/OptRemarks/OptRemarksParser.cpp
+++ /dev/null
@@ -1,368 +0,0 @@
-//===- OptRemarksParser.cpp -----------------------------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is dual licensed under the MIT and the University of Illinois Open
-// Source Licenses. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file provides utility methods used by clients that want to use the
-// parser for optimization remarks in LLVM.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm-c/OptRemarks.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/YAMLTraits.h"
-
-using namespace llvm;
-
-namespace {
-struct RemarkParser {
- /// Source manager for better error messages.
- SourceMgr SM;
- /// Stream for yaml parsing.
- yaml::Stream Stream;
- /// Storage for the error stream.
- std::string ErrorString;
- /// The error stream.
- raw_string_ostream ErrorStream;
- /// Iterator in the YAML stream.
- yaml::document_iterator DI;
- /// The parsed remark (if any).
- Optional<LLVMOptRemarkEntry> LastRemark;
- /// Temporary parsing buffer for the arguments.
- SmallVector<LLVMOptRemarkArg, 8> TmpArgs;
- /// The state used by the parser to parse a remark entry. Invalidated with
- /// every call to `parseYAMLElement`.
- struct ParseState {
- /// Temporary parsing buffer for the arguments.
- SmallVectorImpl<LLVMOptRemarkArg> *Args;
- StringRef Type;
- StringRef Pass;
- StringRef Name;
- StringRef Function;
- /// Optional.
- Optional<StringRef> File;
- Optional<unsigned> Line;
- Optional<unsigned> Column;
- Optional<unsigned> Hotness;
-
- ParseState(SmallVectorImpl<LLVMOptRemarkArg> &Args) : Args(&Args) {}
- /// Use Args only as a **temporary** buffer.
- ~ParseState() { Args->clear(); }
- };
-
- ParseState State;
-
- /// Set to `true` if we had any errors during parsing.
- bool HadAnyErrors = false;
-
- RemarkParser(StringRef Buf)
- : SM(), Stream(Buf, SM), ErrorString(), ErrorStream(ErrorString),
- DI(Stream.begin()), LastRemark(), TmpArgs(), State(TmpArgs) {
- SM.setDiagHandler(RemarkParser::HandleDiagnostic, this);
- }
-
- /// Parse a YAML element.
- Error parseYAMLElement(yaml::Document &Remark);
-
-private:
- /// Parse one key to a string.
- /// otherwise.
- Error parseKey(StringRef &Result, yaml::KeyValueNode &Node);
- /// Parse one value to a string.
- Error parseValue(StringRef &Result, yaml::KeyValueNode &Node);
- /// Parse one value to an unsigned.
- Error parseValue(Optional<unsigned> &Result, yaml::KeyValueNode &Node);
- /// Parse a debug location.
- Error parseDebugLoc(Optional<StringRef> &File, Optional<unsigned> &Line,
- Optional<unsigned> &Column, yaml::KeyValueNode &Node);
- /// Parse an argument.
- Error parseArg(SmallVectorImpl<LLVMOptRemarkArg> &TmpArgs, yaml::Node &Node);
-
- /// Handle a diagnostic from the YAML stream. Records the error in the
- /// RemarkParser class.
- static void HandleDiagnostic(const SMDiagnostic &Diag, void *Ctx) {
- assert(Ctx && "Expected non-null Ctx in diagnostic handler.");
- auto *Parser = static_cast<RemarkParser *>(Ctx);
- Diag.print(/*ProgName=*/nullptr, Parser->ErrorStream, /*ShowColors*/ false,
- /*ShowKindLabels*/ true);
- }
-};
-
-class ParseError : public ErrorInfo<ParseError> {
-public:
- static char ID;
-
- ParseError(StringRef Message, yaml::Node &Node)
- : Message(Message), Node(Node) {}
-
- void log(raw_ostream &OS) const override { OS << Message; }
- std::error_code convertToErrorCode() const override {
- return inconvertibleErrorCode();
- }
-
- StringRef getMessage() const { return Message; }
- yaml::Node &getNode() const { return Node; }
-
-private:
- StringRef Message; // No need to hold a full copy of the buffer.
- yaml::Node &Node;
-};
-
-char ParseError::ID = 0;
-
-static LLVMOptRemarkStringRef toOptRemarkStr(StringRef Str) {
- return {Str.data(), static_cast<uint32_t>(Str.size())};
-}
-
-Error RemarkParser::parseKey(StringRef &Result, yaml::KeyValueNode &Node) {
- auto *Key = dyn_cast<yaml::ScalarNode>(Node.getKey());
- if (!Key)
- return make_error<ParseError>("key is not a string.", Node);
-
- Result = Key->getRawValue();
- return Error::success();
-}
-
-Error RemarkParser::parseValue(StringRef &Result, yaml::KeyValueNode &Node) {
- auto *Value = dyn_cast<yaml::ScalarNode>(Node.getValue());
- if (!Value)
- return make_error<ParseError>("expected a value of scalar type.", Node);
- Result = Value->getRawValue();
-
- if (Result.front() == '\'')
- Result = Result.drop_front();
-
- if (Result.back() == '\'')
- Result = Result.drop_back();
-
- return Error::success();
-}
-
-Error RemarkParser::parseValue(Optional<unsigned> &Result,
- yaml::KeyValueNode &Node) {
- SmallVector<char, 4> Tmp;
- auto *Value = dyn_cast<yaml::ScalarNode>(Node.getValue());
- if (!Value)
- return make_error<ParseError>("expected a value of scalar type.", Node);
- unsigned UnsignedValue = 0;
- if (Value->getValue(Tmp).getAsInteger(10, UnsignedValue))
- return make_error<ParseError>("expected a value of integer type.", *Value);
- Result = UnsignedValue;
- return Error::success();
-}
-
-Error RemarkParser::parseDebugLoc(Optional<StringRef> &File,
- Optional<unsigned> &Line,
- Optional<unsigned> &Column,
- yaml::KeyValueNode &Node) {
- auto *DebugLoc = dyn_cast<yaml::MappingNode>(Node.getValue());
- if (!DebugLoc)
- return make_error<ParseError>("expected a value of mapping type.", Node);
-
- for (yaml::KeyValueNode &DLNode : *DebugLoc) {
- StringRef KeyName;
- if (Error E = parseKey(KeyName, DLNode))
- return E;
- if (KeyName == "File") {
- File = StringRef(); // Set the optional to contain a default constructed
- // value, to be passed to the parsing function.
- if (Error E = parseValue(*File, DLNode))
- return E;
- } else if (KeyName == "Column") {
- if (Error E = parseValue(Column, DLNode))
- return E;
- } else if (KeyName == "Line") {
- if (Error E = parseValue(Line, DLNode))
- return E;
- } else {
- return make_error<ParseError>("unknown entry in DebugLoc map.", DLNode);
- }
- }
-
- // If any of the debug loc fields is missing, return an error.
- if (!File || !Line || !Column)
- return make_error<ParseError>("DebugLoc node incomplete.", Node);
-
- return Error::success();
-}
-
-Error RemarkParser::parseArg(SmallVectorImpl<LLVMOptRemarkArg> &Args,
- yaml::Node &Node) {
- auto *ArgMap = dyn_cast<yaml::MappingNode>(&Node);
- if (!ArgMap)
- return make_error<ParseError>("expected a value of mapping type.", Node);
-
- StringRef ValueStr;
- StringRef KeyStr;
- Optional<StringRef> File;
- Optional<unsigned> Line;
- Optional<unsigned> Column;
-
- for (yaml::KeyValueNode &ArgEntry : *ArgMap) {
- StringRef KeyName;
- if (Error E = parseKey(KeyName, ArgEntry))
- return E;
-
- // Try to parse debug locs.
- if (KeyName == "DebugLoc") {
- // Can't have multiple DebugLoc entries per argument.
- if (File || Line || Column)
- return make_error<ParseError>(
- "only one DebugLoc entry is allowed per argument.", ArgEntry);
-
- if (Error E = parseDebugLoc(File, Line, Column, ArgEntry))
- return E;
- continue;
- }
-
- // If we already have a string, error out.
- if (!ValueStr.empty())
- return make_error<ParseError>(
- "only one string entry is allowed per argument.", ArgEntry);
-
- // Try to parse a string.
- if (Error E = parseValue(ValueStr, ArgEntry))
- return E;
-
- // Keep the key from the string.
- KeyStr = KeyName;
- }
-
- if (KeyStr.empty())
- return make_error<ParseError>("argument key is missing.", *ArgMap);
- if (ValueStr.empty())
- return make_error<ParseError>("argument value is missing.", *ArgMap);
-
- Args.push_back(LLVMOptRemarkArg{
- toOptRemarkStr(KeyStr), toOptRemarkStr(ValueStr),
- LLVMOptRemarkDebugLoc{toOptRemarkStr(File.getValueOr(StringRef())),
- Line.getValueOr(0), Column.getValueOr(0)}});
-
- return Error::success();
-}
-
-Error RemarkParser::parseYAMLElement(yaml::Document &Remark) {
- // Parsing a new remark, clear the previous one.
- LastRemark = None;
- State = ParseState(TmpArgs);
-
- auto *Root = dyn_cast<yaml::MappingNode>(Remark.getRoot());
- if (!Root)
- return make_error<ParseError>("document root is not of mapping type.",
- *Remark.getRoot());
-
- State.Type = Root->getRawTag();
-
- for (yaml::KeyValueNode &RemarkField : *Root) {
- StringRef KeyName;
- if (Error E = parseKey(KeyName, RemarkField))
- return E;
-
- if (KeyName == "Pass") {
- if (Error E = parseValue(State.Pass, RemarkField))
- return E;
- } else if (KeyName == "Name") {
- if (Error E = parseValue(State.Name, RemarkField))
- return E;
- } else if (KeyName == "Function") {
- if (Error E = parseValue(State.Function, RemarkField))
- return E;
- } else if (KeyName == "Hotness") {
- if (Error E = parseValue(State.Hotness, RemarkField))
- return E;
- } else if (KeyName == "DebugLoc") {
- if (Error E =
- parseDebugLoc(State.File, State.Line, State.Column, RemarkField))
- return E;
- } else if (KeyName == "Args") {
- auto *Args = dyn_cast<yaml::SequenceNode>(RemarkField.getValue());
- if (!Args)
- return make_error<ParseError>("wrong value type for key.", RemarkField);
-
- for (yaml::Node &Arg : *Args)
- if (Error E = parseArg(*State.Args, Arg))
- return E;
- } else {
- return make_error<ParseError>("unknown key.", RemarkField);
- }
- }
-
- // If the YAML parsing failed, don't even continue parsing. We might
- // encounter malformed YAML.
- if (Stream.failed())
- return make_error<ParseError>("YAML parsing failed.", *Remark.getRoot());
-
- // Check if any of the mandatory fields are missing.
- if (State.Type.empty() || State.Pass.empty() || State.Name.empty() ||
- State.Function.empty())
- return make_error<ParseError>("Type, Pass, Name or Function missing.",
- *Remark.getRoot());
-
- LastRemark = LLVMOptRemarkEntry{
- toOptRemarkStr(State.Type),
- toOptRemarkStr(State.Pass),
- toOptRemarkStr(State.Name),
- toOptRemarkStr(State.Function),
- LLVMOptRemarkDebugLoc{toOptRemarkStr(State.File.getValueOr(StringRef())),
- State.Line.getValueOr(0),
- State.Column.getValueOr(0)},
- State.Hotness.getValueOr(0),
- static_cast<uint32_t>(State.Args->size()),
- State.Args->data()};
-
- return Error::success();
-}
-} // namespace
-
-// Create wrappers for C Binding types (see CBindingWrapping.h).
-DEFINE_SIMPLE_CONVERSION_FUNCTIONS(RemarkParser, LLVMOptRemarkParserRef)
-
-extern "C" LLVMOptRemarkParserRef LLVMOptRemarkParserCreate(const void *Buf,
- uint64_t Size) {
- return wrap(
- new RemarkParser(StringRef(static_cast<const char *>(Buf), Size)));
-}
-
-extern "C" LLVMOptRemarkEntry *
-LLVMOptRemarkParserGetNext(LLVMOptRemarkParserRef Parser) {
- RemarkParser &TheParser = *unwrap(Parser);
- // Check for EOF.
- if (TheParser.HadAnyErrors || TheParser.DI == TheParser.Stream.end())
- return nullptr;
-
- // Try to parse an entry.
- if (Error E = TheParser.parseYAMLElement(*TheParser.DI)) {
- handleAllErrors(std::move(E), [&](const ParseError &PE) {
- TheParser.Stream.printError(&PE.getNode(),
- Twine(PE.getMessage()) + Twine('\n'));
- TheParser.HadAnyErrors = true;
- });
- return nullptr;
- }
-
- // Move on.
- ++TheParser.DI;
-
- // Return the just-parsed remark.
- if (Optional<LLVMOptRemarkEntry> &Entry = TheParser.LastRemark)
- return &*Entry;
- return nullptr;
-}
-
-extern "C" LLVMBool LLVMOptRemarkParserHasError(LLVMOptRemarkParserRef Parser) {
- return unwrap(Parser)->HadAnyErrors;
-}
-
-extern "C" const char *
-LLVMOptRemarkParserGetErrorMessage(LLVMOptRemarkParserRef Parser) {
- return unwrap(Parser)->ErrorStream.str().c_str();
-}
-
-extern "C" void LLVMOptRemarkParserDispose(LLVMOptRemarkParserRef Parser) {
- delete unwrap(Parser);
-}
diff --git a/lib/Option/Arg.cpp b/lib/Option/Arg.cpp
index 4ce40e3ab26c..ea382b347345 100644
--- a/lib/Option/Arg.cpp
+++ b/lib/Option/Arg.cpp
@@ -1,9 +1,8 @@
//===- Arg.cpp - Argument Implementations ---------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -67,6 +66,9 @@ LLVM_DUMP_METHOD void Arg::dump() const { print(dbgs()); }
#endif
std::string Arg::getAsString(const ArgList &Args) const {
+ if (Alias)
+ return Alias->getAsString(Args);
+
SmallString<256> Res;
raw_svector_ostream OS(Res);
diff --git a/lib/Option/ArgList.cpp b/lib/Option/ArgList.cpp
index 8a7d59d24366..f37c142da69b 100644
--- a/lib/Option/ArgList.cpp
+++ b/lib/Option/ArgList.cpp
@@ -1,9 +1,8 @@
//===- ArgList.cpp - Argument List Management -----------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -96,21 +95,6 @@ std::vector<std::string> ArgList::getAllArgValues(OptSpecifier Id) const {
return std::vector<std::string>(Values.begin(), Values.end());
}
-void ArgList::AddLastArg(ArgStringList &Output, OptSpecifier Id) const {
- if (Arg *A = getLastArg(Id)) {
- A->claim();
- A->render(*this, Output);
- }
-}
-
-void ArgList::AddLastArg(ArgStringList &Output, OptSpecifier Id0,
- OptSpecifier Id1) const {
- if (Arg *A = getLastArg(Id0, Id1)) {
- A->claim();
- A->render(*this, Output);
- }
-}
-
void ArgList::AddAllArgsExcept(ArgStringList &Output,
ArrayRef<OptSpecifier> Ids,
ArrayRef<OptSpecifier> ExcludeIds) const {
diff --git a/lib/Option/OptTable.cpp b/lib/Option/OptTable.cpp
index 312ff7808759..5833d03069f8 100644
--- a/lib/Option/OptTable.cpp
+++ b/lib/Option/OptTable.cpp
@@ -1,9 +1,8 @@
//===- OptTable.cpp - Option Table Implementation -------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -252,59 +251,69 @@ unsigned OptTable::findNearest(StringRef Option, std::string &NearestString,
unsigned MinimumLength) const {
assert(!Option.empty());
- // Consider each option as a candidate, finding the closest match.
+ // Consider each [option prefix + option name] pair as a candidate, finding
+ // the closest match.
unsigned BestDistance = UINT_MAX;
for (const Info &CandidateInfo :
ArrayRef<Info>(OptionInfos).drop_front(FirstSearchableIndex)) {
StringRef CandidateName = CandidateInfo.Name;
- // Ignore option candidates with empty names, such as "--", or names
- // that do not meet the minimum length.
+ // We can eliminate some option prefix/name pairs as candidates right away:
+ // * Ignore option candidates with empty names, such as "--", or names
+ // that do not meet the minimum length.
if (CandidateName.empty() || CandidateName.size() < MinimumLength)
continue;
- // If FlagsToInclude were specified, ignore options that don't include
- // those flags.
+ // * If FlagsToInclude were specified, ignore options that don't include
+ // those flags.
if (FlagsToInclude && !(CandidateInfo.Flags & FlagsToInclude))
continue;
- // Ignore options that contain the FlagsToExclude.
+ // * Ignore options that contain the FlagsToExclude.
if (CandidateInfo.Flags & FlagsToExclude)
continue;
- // Ignore positional argument option candidates (which do not
- // have prefixes).
+ // * Ignore positional argument option candidates (which do not
+ // have prefixes).
if (!CandidateInfo.Prefixes)
continue;
- // Find the most appropriate prefix. For example, if a user asks for
- // "--helm", suggest "--help" over "-help".
- StringRef Prefix = CandidateInfo.Prefixes[0];
- for (int P = 1; CandidateInfo.Prefixes[P]; P++) {
- if (Option.startswith(CandidateInfo.Prefixes[P]))
- Prefix = CandidateInfo.Prefixes[P];
- }
- // Check if the candidate ends with a character commonly used when
+ // Now check if the candidate ends with a character commonly used when
// delimiting an option from its value, such as '=' or ':'. If it does,
// attempt to split the given option based on that delimiter.
- std::string Delimiter = "";
- char Last = CandidateName.back();
- if (Last == '=' || Last == ':')
- Delimiter = std::string(1, Last);
-
StringRef LHS, RHS;
- if (Delimiter.empty())
- LHS = Option;
- else
+ char Last = CandidateName.back();
+ bool CandidateHasDelimiter = Last == '=' || Last == ':';
+ std::string NormalizedName = Option;
+ if (CandidateHasDelimiter) {
std::tie(LHS, RHS) = Option.split(Last);
+ NormalizedName = LHS;
+ if (Option.find(Last) == LHS.size())
+ NormalizedName += Last;
+ }
- std::string NormalizedName =
- (LHS.drop_front(Prefix.size()) + Delimiter).str();
- unsigned Distance =
- CandidateName.edit_distance(NormalizedName, /*AllowReplacements=*/true,
- /*MaxEditDistance=*/BestDistance);
- if (Distance < BestDistance) {
- BestDistance = Distance;
- NearestString = (Prefix + CandidateName + RHS).str();
+ // Consider each possible prefix for each candidate to find the most
+ // appropriate one. For example, if a user asks for "--helm", suggest
+ // "--help" over "-help".
+ for (int P = 0;
+ const char *const CandidatePrefix = CandidateInfo.Prefixes[P]; P++) {
+ std::string Candidate = (CandidatePrefix + CandidateName).str();
+ StringRef CandidateRef = Candidate;
+ unsigned Distance =
+ CandidateRef.edit_distance(NormalizedName, /*AllowReplacements=*/true,
+ /*MaxEditDistance=*/BestDistance);
+ if (RHS.empty() && CandidateHasDelimiter) {
+ // The Candidate ends with a = or : delimiter, but the option passed in
+ // didn't contain the delimiter (or doesn't have anything after it).
+ // In that case, penalize the correction: `-nodefaultlibs` is more
+ // likely to be a spello for `-nodefaultlib` than `-nodefaultlib:` even
+ // though both have an unmodified editing distance of 1, since the
+ // latter would need an argument.
+ ++Distance;
+ }
+ if (Distance < BestDistance) {
+ BestDistance = Distance;
+ NearestString = (Candidate + RHS).str();
+ }
}
}
return BestDistance;
diff --git a/lib/Option/Option.cpp b/lib/Option/Option.cpp
index f9d8a5e54043..9abc9fdce4c7 100644
--- a/lib/Option/Option.cpp
+++ b/lib/Option/Option.cpp
@@ -1,9 +1,8 @@
//===- Option.cpp - Abstract Driver Options -------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -107,49 +106,23 @@ bool Option::matches(OptSpecifier Opt) const {
return false;
}
-Arg *Option::accept(const ArgList &Args,
- unsigned &Index,
- unsigned ArgSize) const {
- const Option &UnaliasedOption = getUnaliasedOption();
- StringRef Spelling;
- // If the option was an alias, get the spelling from the unaliased one.
- if (getID() == UnaliasedOption.getID()) {
- Spelling = StringRef(Args.getArgString(Index), ArgSize);
- } else {
- Spelling = Args.MakeArgString(Twine(UnaliasedOption.getPrefix()) +
- Twine(UnaliasedOption.getName()));
- }
-
+Arg *Option::acceptInternal(const ArgList &Args, unsigned &Index,
+ unsigned ArgSize) const {
+ StringRef Spelling = StringRef(Args.getArgString(Index), ArgSize);
switch (getKind()) {
case FlagClass: {
if (ArgSize != strlen(Args.getArgString(Index)))
return nullptr;
-
- Arg *A = new Arg(UnaliasedOption, Spelling, Index++);
- if (getAliasArgs()) {
- const char *Val = getAliasArgs();
- while (*Val != '\0') {
- A->getValues().push_back(Val);
-
- // Move past the '\0' to the next argument.
- Val += strlen(Val) + 1;
- }
- }
-
- if (UnaliasedOption.getKind() == JoinedClass && !getAliasArgs())
- // A Flag alias for a Joined option must provide an argument.
- A->getValues().push_back("");
-
- return A;
+ return new Arg(*this, Spelling, Index++);
}
case JoinedClass: {
const char *Value = Args.getArgString(Index) + ArgSize;
- return new Arg(UnaliasedOption, Spelling, Index++, Value);
+ return new Arg(*this, Spelling, Index++, Value);
}
case CommaJoinedClass: {
// Always matches.
const char *Str = Args.getArgString(Index) + ArgSize;
- Arg *A = new Arg(UnaliasedOption, Spelling, Index++);
+ Arg *A = new Arg(*this, Spelling, Index++);
// Parse out the comma separated values.
const char *Prev = Str;
@@ -185,8 +158,7 @@ Arg *Option::accept(const ArgList &Args,
Args.getArgString(Index - 1) == nullptr)
return nullptr;
- return new Arg(UnaliasedOption, Spelling,
- Index - 2, Args.getArgString(Index - 1));
+ return new Arg(*this, Spelling, Index - 2, Args.getArgString(Index - 1));
case MultiArgClass: {
// Matches iff this is an exact match.
// FIXME: Avoid strlen.
@@ -197,8 +169,8 @@ Arg *Option::accept(const ArgList &Args,
if (Index > Args.getNumInputArgStrings())
return nullptr;
- Arg *A = new Arg(UnaliasedOption, Spelling, Index - 1 - getNumArgs(),
- Args.getArgString(Index - getNumArgs()));
+ Arg *A = new Arg(*this, Spelling, Index - 1 - getNumArgs(),
+ Args.getArgString(Index - getNumArgs()));
for (unsigned i = 1; i != getNumArgs(); ++i)
A->getValues().push_back(Args.getArgString(Index - getNumArgs() + i));
return A;
@@ -217,8 +189,7 @@ Arg *Option::accept(const ArgList &Args,
Args.getArgString(Index - 1) == nullptr)
return nullptr;
- return new Arg(UnaliasedOption, Spelling,
- Index - 2, Args.getArgString(Index - 1));
+ return new Arg(*this, Spelling, Index - 2, Args.getArgString(Index - 1));
}
case JoinedAndSeparateClass:
// Always matches.
@@ -227,7 +198,7 @@ Arg *Option::accept(const ArgList &Args,
Args.getArgString(Index - 1) == nullptr)
return nullptr;
- return new Arg(UnaliasedOption, Spelling, Index - 2,
+ return new Arg(*this, Spelling, Index - 2,
Args.getArgString(Index - 2) + ArgSize,
Args.getArgString(Index - 1));
case RemainingArgsClass: {
@@ -235,14 +206,14 @@ Arg *Option::accept(const ArgList &Args,
// FIXME: Avoid strlen.
if (ArgSize != strlen(Args.getArgString(Index)))
return nullptr;
- Arg *A = new Arg(UnaliasedOption, Spelling, Index++);
+ Arg *A = new Arg(*this, Spelling, Index++);
while (Index < Args.getNumInputArgStrings() &&
Args.getArgString(Index) != nullptr)
A->getValues().push_back(Args.getArgString(Index++));
return A;
}
case RemainingArgsJoinedClass: {
- Arg *A = new Arg(UnaliasedOption, Spelling, Index);
+ Arg *A = new Arg(*this, Spelling, Index);
if (ArgSize != strlen(Args.getArgString(Index))) {
// An inexact match means there is a joined arg.
A->getValues().push_back(Args.getArgString(Index) + ArgSize);
@@ -258,3 +229,62 @@ Arg *Option::accept(const ArgList &Args,
llvm_unreachable("Invalid option kind!");
}
}
+
+Arg *Option::accept(const ArgList &Args,
+ unsigned &Index,
+ unsigned ArgSize) const {
+ std::unique_ptr<Arg> A(acceptInternal(Args, Index, ArgSize));
+ if (!A)
+ return nullptr;
+
+ const Option &UnaliasedOption = getUnaliasedOption();
+ if (getID() == UnaliasedOption.getID())
+ return A.release();
+
+ // "A" is an alias for a different flag. For most clients it's more convenient
+ // if this function returns unaliased Args, so create an unaliased arg for
+ // returning.
+
+ // This creates a completely new Arg object for the unaliased Arg because
+ // the alias and the unaliased arg can have different Kinds and different
+ // Values (due to AliasArgs<>).
+
+ // Get the spelling from the unaliased option.
+ StringRef UnaliasedSpelling = Args.MakeArgString(
+ Twine(UnaliasedOption.getPrefix()) + Twine(UnaliasedOption.getName()));
+
+ // It's a bit weird that aliased and unaliased arg share one index, but
+ // the index is mostly use as a memory optimization in render().
+ // Due to this, ArgList::getArgString(A->getIndex()) will return the spelling
+ // of the aliased arg always, while A->getSpelling() returns either the
+ // unaliased or the aliased arg, depending on which Arg object it's called on.
+ Arg *UnaliasedA = new Arg(UnaliasedOption, UnaliasedSpelling, A->getIndex());
+ Arg *RawA = A.get();
+ UnaliasedA->setAlias(std::move(A));
+
+ if (getKind() != FlagClass) {
+ // Values are usually owned by the ArgList. The exception are
+ // CommaJoined flags, where the Arg owns the values. For aliased flags,
+ // make the unaliased Arg the owner of the values.
+ // FIXME: There aren't many uses of CommaJoined -- try removing
+ // CommaJoined in favor of just calling StringRef::split(',') instead.
+ UnaliasedA->getValues() = RawA->getValues();
+ UnaliasedA->setOwnsValues(RawA->getOwnsValues());
+ RawA->setOwnsValues(false);
+ return UnaliasedA;
+ }
+
+ // FlagClass aliases can have AliasArgs<>; add those to the unaliased arg.
+ if (const char *Val = getAliasArgs()) {
+ while (*Val != '\0') {
+ UnaliasedA->getValues().push_back(Val);
+
+ // Move past the '\0' to the next argument.
+ Val += strlen(Val) + 1;
+ }
+ }
+ if (UnaliasedOption.getKind() == JoinedClass && !getAliasArgs())
+ // A Flag alias for a Joined option must provide an argument.
+ UnaliasedA->getValues().push_back("");
+ return UnaliasedA;
+}
diff --git a/lib/Passes/PassBuilder.cpp b/lib/Passes/PassBuilder.cpp
index 5ec94ea6f40a..e2b2a2b25268 100644
--- a/lib/Passes/PassBuilder.cpp
+++ b/lib/Passes/PassBuilder.cpp
@@ -1,9 +1,8 @@
//===- Parsing, selection, and construction of pass pipelines -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -57,6 +56,7 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRPrintingPasses.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/IR/SafepointIRVerifier.h"
#include "llvm/IR/Verifier.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/FormatVariadic.h"
@@ -65,6 +65,7 @@
#include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"
#include "llvm/Transforms/IPO/AlwaysInliner.h"
#include "llvm/Transforms/IPO/ArgumentPromotion.h"
+#include "llvm/Transforms/IPO/Attributor.h"
#include "llvm/Transforms/IPO/CalledValuePropagation.h"
#include "llvm/Transforms/IPO/ConstantMerge.h"
#include "llvm/Transforms/IPO/CrossDSOCFI.h"
@@ -89,14 +90,18 @@
#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
#include "llvm/Transforms/InstCombine/InstCombine.h"
#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/Transforms/Instrumentation/AddressSanitizer.h"
#include "llvm/Transforms/Instrumentation/BoundsChecking.h"
#include "llvm/Transforms/Instrumentation/CGProfile.h"
#include "llvm/Transforms/Instrumentation/ControlHeightReduction.h"
#include "llvm/Transforms/Instrumentation/GCOVProfiler.h"
+#include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h"
+#include "llvm/Transforms/Instrumentation/InstrOrderFile.h"
#include "llvm/Transforms/Instrumentation/InstrProfiling.h"
#include "llvm/Transforms/Instrumentation/MemorySanitizer.h"
-#include "llvm/Transforms/Instrumentation/ThreadSanitizer.h"
#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
+#include "llvm/Transforms/Instrumentation/PoisonChecking.h"
+#include "llvm/Transforms/Instrumentation/ThreadSanitizer.h"
#include "llvm/Transforms/Scalar/ADCE.h"
#include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h"
#include "llvm/Transforms/Scalar/BDCE.h"
@@ -120,6 +125,7 @@
#include "llvm/Transforms/Scalar/LoopDataPrefetch.h"
#include "llvm/Transforms/Scalar/LoopDeletion.h"
#include "llvm/Transforms/Scalar/LoopDistribute.h"
+#include "llvm/Transforms/Scalar/LoopFuse.h"
#include "llvm/Transforms/Scalar/LoopIdiomRecognize.h"
#include "llvm/Transforms/Scalar/LoopInstSimplify.h"
#include "llvm/Transforms/Scalar/LoopLoadElimination.h"
@@ -134,9 +140,11 @@
#include "llvm/Transforms/Scalar/LowerAtomic.h"
#include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"
#include "llvm/Transforms/Scalar/LowerGuardIntrinsic.h"
+#include "llvm/Transforms/Scalar/LowerWidenableCondition.h"
#include "llvm/Transforms/Scalar/MakeGuardsExplicit.h"
#include "llvm/Transforms/Scalar/MemCpyOptimizer.h"
#include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h"
+#include "llvm/Transforms/Scalar/MergeICmps.h"
#include "llvm/Transforms/Scalar/NaryReassociate.h"
#include "llvm/Transforms/Scalar/NewGVN.h"
#include "llvm/Transforms/Scalar/PartiallyInlineLibCalls.h"
@@ -181,10 +189,6 @@ static cl::opt<bool>
cl::Hidden, cl::ZeroOrMore,
cl::desc("Run NewGVN instead of GVN"));
-static cl::opt<bool> EnableEarlyCSEMemSSA(
- "enable-npm-earlycse-memssa", cl::init(true), cl::Hidden,
- cl::desc("Enable the EarlyCSE w/ MemorySSA pass for the new PM (default = on)"));
-
static cl::opt<bool> EnableGVNHoist(
"enable-npm-gvn-hoist", cl::init(false), cl::Hidden,
cl::desc("Enable the GVN hoisting pass for the new PM (default = off)"));
@@ -205,11 +209,26 @@ static cl::opt<bool> EnableSyntheticCounts(
static Regex DefaultAliasRegex(
"^(default|thinlto-pre-link|thinlto|lto-pre-link|lto)<(O[0123sz])>$");
+// This option is used in simplifying testing SampleFDO optimizations for
+// profile loading.
static cl::opt<bool>
EnableCHR("enable-chr-npm", cl::init(true), cl::Hidden,
cl::desc("Enable control height reduction optimization (CHR)"));
+PipelineTuningOptions::PipelineTuningOptions() {
+ LoopInterleaving = EnableLoopInterleaving;
+ LoopVectorization = EnableLoopVectorization;
+ SLPVectorization = RunSLPVectorization;
+ LoopUnrolling = true;
+ ForgetAllSCEVInLoopUnroll = ForgetSCEVInLoopUnroll;
+ LicmMssaOptCap = SetLicmMssaOptCap;
+ LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap;
+}
+
extern cl::opt<bool> EnableHotColdSplit;
+extern cl::opt<bool> EnableOrderFileInstrumentation;
+
+extern cl::opt<bool> FlattenedProfileUsed;
static bool isOptimizingForSize(PassBuilder::OptimizationLevel Level) {
switch (Level) {
@@ -371,7 +390,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
FPM.addPass(SROA());
// Catch trivial redundancies
- FPM.addPass(EarlyCSEPass(EnableEarlyCSEMemSSA));
+ FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
// Hoisting of scalars and load expressions.
if (EnableGVNHoist)
@@ -401,7 +420,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
// For PGO use pipeline, try to optimize memory intrinsics such as memcpy
// using the size value profile. Don't perform this when optimizing for size.
- if (PGOOpt && !PGOOpt->ProfileUseFile.empty() &&
+ if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse &&
!isOptimizingForSize(Level))
FPM.addPass(PGOMemOPSizeOpt());
@@ -432,7 +451,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
// Rotate Loop - disable header duplication at -Oz
LPM1.addPass(LoopRotatePass(Level != Oz));
- LPM1.addPass(LICMPass());
+ LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
LPM1.addPass(SimpleLoopUnswitchPass());
LPM2.addPass(IndVarSimplifyPass());
LPM2.addPass(LoopIdiomRecognizePass());
@@ -444,9 +463,11 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
// Do not enable unrolling in PreLinkThinLTO phase during sample PGO
// because it changes IR to makes profile annotation in back compile
// inaccurate.
- if (Phase != ThinLTOPhase::PreLink ||
- !PGOOpt || PGOOpt->SampleProfileFile.empty())
- LPM2.addPass(LoopFullUnrollPass(Level));
+ if ((Phase != ThinLTOPhase::PreLink || !PGOOpt ||
+ PGOOpt->Action != PGOOptions::SampleUse) &&
+ PTO.LoopUnrolling)
+ LPM2.addPass(
+ LoopFullUnrollPass(Level, false, PTO.ForgetAllSCEVInLoopUnroll));
for (auto &C : LoopOptimizerEndEPCallbacks)
C(LPM2, Level);
@@ -492,7 +513,9 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
FPM.addPass(JumpThreadingPass());
FPM.addPass(CorrelatedValuePropagationPass());
FPM.addPass(DSEPass());
- FPM.addPass(createFunctionToLoopPassAdaptor(LICMPass(), DebugLogging));
+ FPM.addPass(createFunctionToLoopPassAdaptor(
+ LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap),
+ DebugLogging));
for (auto &C : ScalarOptimizerLateEPCallbacks)
C(FPM, Level);
@@ -505,7 +528,8 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
invokePeepholeEPCallbacks(FPM, Level);
if (EnableCHR && Level == O3 && PGOOpt &&
- (!PGOOpt->ProfileUseFile.empty() || !PGOOpt->SampleProfileFile.empty()))
+ (PGOOpt->Action == PGOOptions::IRUse ||
+ PGOOpt->Action == PGOOptions::SampleUse))
FPM.addPass(ControlHeightReductionPass());
return FPM;
@@ -513,15 +537,15 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging,
PassBuilder::OptimizationLevel Level,
- bool RunProfileGen,
- std::string ProfileGenFile,
- std::string ProfileUseFile,
+ bool RunProfileGen, bool IsCS,
+ std::string ProfileFile,
std::string ProfileRemappingFile) {
// Generally running simplification passes and the inliner with an high
// threshold results in smaller executables, but there may be cases where
// the size grows, so let's be conservative here and skip this simplification
- // at -Os/Oz.
- if (!isOptimizingForSize(Level)) {
+ // at -Os/Oz. We will not do this inline for context sensistive PGO (when
+ // IsCS is true).
+ if (!isOptimizingForSize(Level) && !IsCS) {
InlineParams IP;
// In the old pass manager, this is a cl::opt. Should still this be one?
@@ -554,7 +578,7 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging,
MPM.addPass(GlobalDCEPass());
if (RunProfileGen) {
- MPM.addPass(PGOInstrumentationGen());
+ MPM.addPass(PGOInstrumentationGen(IsCS));
FunctionPassManager FPM;
FPM.addPass(
@@ -563,14 +587,17 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging,
// Add the profile lowering pass.
InstrProfOptions Options;
- if (!ProfileGenFile.empty())
- Options.InstrProfileOutput = ProfileGenFile;
+ if (!ProfileFile.empty())
+ Options.InstrProfileOutput = ProfileFile;
Options.DoCounterPromotion = true;
- MPM.addPass(InstrProfiling(Options));
+ Options.UseBFIInPromotion = IsCS;
+ MPM.addPass(InstrProfiling(Options, IsCS));
+ } else if (!ProfileFile.empty()) {
+ MPM.addPass(PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS));
+ // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
+ // RequireAnalysisPass for PSI before subsequent non-module passes.
+ MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
}
-
- if (!ProfileUseFile.empty())
- MPM.addPass(PGOInstrumentationUse(ProfileUseFile, ProfileRemappingFile));
}
static InlineParams
@@ -587,6 +614,32 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
bool DebugLogging) {
ModulePassManager MPM(DebugLogging);
+ bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse);
+
+ // In ThinLTO mode, when flattened profile is used, all the available
+ // profile information will be annotated in PreLink phase so there is
+ // no need to load the profile again in PostLink.
+ bool LoadSampleProfile =
+ HasSampleProfile &&
+ !(FlattenedProfileUsed && Phase == ThinLTOPhase::PostLink);
+
+ // During the ThinLTO backend phase we perform early indirect call promotion
+ // here, before globalopt. Otherwise imported available_externally functions
+ // look unreferenced and are removed. If we are going to load the sample
+ // profile then defer until later.
+ // TODO: See if we can move later and consolidate with the location where
+ // we perform ICP when we are loading a sample profile.
+ // TODO: We pass HasSampleProfile (whether there was a sample profile file
+ // passed to the compile) to the SamplePGO flag of ICP. This is used to
+ // determine whether the new direct calls are annotated with prof metadata.
+ // Ideally this should be determined from whether the IR is annotated with
+ // sample profile, and not whether the a sample profile was provided on the
+ // command line. E.g. for flattened profiles where we will not be reloading
+ // the sample profile in the ThinLTO backend, we ideally shouldn't have to
+ // provide the sample profile file.
+ if (Phase == ThinLTOPhase::PostLink && !LoadSampleProfile)
+ MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */, HasSampleProfile));
+
// Do basic inference of function attributes from known properties of system
// libraries and other oracles.
MPM.addPass(InferFunctionAttrsPass());
@@ -607,17 +660,19 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
// More details about SamplePGO design can be found in:
// https://research.google.com/pubs/pub45290.html
// FIXME: revisit how SampleProfileLoad/Inliner/ICP is structured.
- if (PGOOpt && !PGOOpt->SampleProfileFile.empty() &&
- Phase == ThinLTOPhase::PostLink)
+ if (LoadSampleProfile)
EarlyFPM.addPass(InstCombinePass());
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(EarlyFPM)));
- if (PGOOpt && !PGOOpt->SampleProfileFile.empty()) {
+ if (LoadSampleProfile) {
// Annotate sample profile right after early FPM to ensure freshness of
// the debug info.
- MPM.addPass(SampleProfileLoaderPass(PGOOpt->SampleProfileFile,
+ MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
PGOOpt->ProfileRemappingFile,
Phase == ThinLTOPhase::PreLink));
+ // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
+ // RequireAnalysisPass for PSI before subsequent non-module passes.
+ MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
// Do not invoke ICP in the ThinLTOPrelink phase as it makes it hard
// for the profile annotation to be accurate in the ThinLTO backend.
if (Phase != ThinLTOPhase::PreLink)
@@ -626,7 +681,7 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
// imported available_externally functions look unreferenced and are
// removed.
MPM.addPass(PGOIndirectCallPromotion(Phase == ThinLTOPhase::PostLink,
- true));
+ true /* SamplePGO */));
}
// Interprocedural constant propagation now that basic cleanup has occurred
@@ -664,12 +719,17 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
// Add all the requested passes for instrumentation PGO, if requested.
if (PGOOpt && Phase != ThinLTOPhase::PostLink &&
- (!PGOOpt->ProfileGenFile.empty() || !PGOOpt->ProfileUseFile.empty())) {
- addPGOInstrPasses(MPM, DebugLogging, Level, PGOOpt->RunProfileGen,
- PGOOpt->ProfileGenFile, PGOOpt->ProfileUseFile,
+ (PGOOpt->Action == PGOOptions::IRInstr ||
+ PGOOpt->Action == PGOOptions::IRUse)) {
+ addPGOInstrPasses(MPM, DebugLogging, Level,
+ /* RunProfileGen */ PGOOpt->Action == PGOOptions::IRInstr,
+ /* IsCS */ false, PGOOpt->ProfileFile,
PGOOpt->ProfileRemappingFile);
MPM.addPass(PGOIndirectCallPromotion(false, false));
}
+ if (PGOOpt && Phase != ThinLTOPhase::PostLink &&
+ PGOOpt->CSAction == PGOOptions::CSIRInstr)
+ MPM.addPass(PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile));
// Synthesize function entry counts for non-PGO compilation.
if (EnableSyntheticCounts && !PGOOpt)
@@ -700,8 +760,8 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
// For PreLinkThinLTO pass, we disable hot-caller heuristic for sample PGO
// because it makes profile annotation in the backend inaccurate.
InlineParams IP = getInlineParamsFromOptLevel(Level);
- if (Phase == ThinLTOPhase::PreLink &&
- PGOOpt && !PGOOpt->SampleProfileFile.empty())
+ if (Phase == ThinLTOPhase::PreLink && PGOOpt &&
+ PGOOpt->Action == PGOOptions::SampleUse)
IP.HotCallSiteThreshold = 0;
MainCGPipeline.addPass(InlinerPass(IP));
@@ -718,11 +778,6 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
buildFunctionSimplificationPipeline(Level, Phase, DebugLogging)));
- // We only want to do hot cold splitting once for ThinLTO, during the
- // post-link ThinLTO.
- if (EnableHotColdSplit && Phase != ThinLTOPhase::PreLink)
- MPM.addPass(HotColdSplittingPass());
-
for (auto &C : CGSCCOptimizerLateEPCallbacks)
C(MainCGPipeline, Level);
@@ -738,9 +793,8 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
return MPM;
}
-ModulePassManager
-PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
- bool DebugLogging) {
+ModulePassManager PassBuilder::buildModuleOptimizationPipeline(
+ OptimizationLevel Level, bool DebugLogging, bool LTOPreLink) {
ModulePassManager MPM(DebugLogging);
// Optimize globals now that the module is fully simplified.
@@ -759,14 +813,34 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
// available externally globals. Eventually they will be suppressed during
// codegen, but eliminating here enables more opportunity for GlobalDCE as it
// may make globals referenced by available external functions dead and saves
- // running remaining passes on the eliminated functions.
- MPM.addPass(EliminateAvailableExternallyPass());
+ // running remaining passes on the eliminated functions. These should be
+ // preserved during prelinking for link-time inlining decisions.
+ if (!LTOPreLink)
+ MPM.addPass(EliminateAvailableExternallyPass());
+
+ if (EnableOrderFileInstrumentation)
+ MPM.addPass(InstrOrderFilePass());
// Do RPO function attribute inference across the module to forward-propagate
// attributes where applicable.
// FIXME: Is this really an optimization rather than a canonicalization?
MPM.addPass(ReversePostOrderFunctionAttrsPass());
+ // Do a post inline PGO instrumentation and use pass. This is a context
+ // sensitive PGO pass. We don't want to do this in LTOPreLink phrase as
+ // cross-module inline has not been done yet. The context sensitive
+ // instrumentation is after all the inlines are done.
+ if (!LTOPreLink && PGOOpt) {
+ if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
+ addPGOInstrPasses(MPM, DebugLogging, Level, /* RunProfileGen */ true,
+ /* IsCS */ true, PGOOpt->CSProfileGenFile,
+ PGOOpt->ProfileRemappingFile);
+ else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
+ addPGOInstrPasses(MPM, DebugLogging, Level, /* RunProfileGen */ false,
+ /* IsCS */ true, PGOOpt->ProfileFile,
+ PGOOpt->ProfileRemappingFile);
+ }
+
// Re-require GloblasAA here prior to function passes. This is particularly
// useful as the above will have inlined, DCE'ed, and function-attr
// propagated everything. We should at this point have a reasonably minimal
@@ -799,7 +873,8 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
OptimizePM.addPass(LoopDistributePass());
// Now run the core loop vectorizer.
- OptimizePM.addPass(LoopVectorizePass());
+ OptimizePM.addPass(LoopVectorizePass(
+ LoopVectorizeOptions(!PTO.LoopInterleaving, !PTO.LoopVectorization)));
// Eliminate loads by forwarding stores from the previous iteration to loads
// of the current iteration.
@@ -824,7 +899,8 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
sinkCommonInsts(true)));
// Optimize parallel scalar instruction chains into SIMD instructions.
- OptimizePM.addPass(SLPVectorizerPass());
+ if (PTO.SLPVectorization)
+ OptimizePM.addPass(SLPVectorizerPass());
OptimizePM.addPass(InstCombinePass());
@@ -839,16 +915,26 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
OptimizePM.addPass(
createFunctionToLoopPassAdaptor(LoopUnrollAndJamPass(Level)));
}
- OptimizePM.addPass(LoopUnrollPass(LoopUnrollOptions(Level)));
+ if (PTO.LoopUnrolling)
+ OptimizePM.addPass(LoopUnrollPass(
+ LoopUnrollOptions(Level, false, PTO.ForgetAllSCEVInLoopUnroll)));
OptimizePM.addPass(WarnMissedTransformationsPass());
OptimizePM.addPass(InstCombinePass());
OptimizePM.addPass(RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
- OptimizePM.addPass(createFunctionToLoopPassAdaptor(LICMPass(), DebugLogging));
+ OptimizePM.addPass(createFunctionToLoopPassAdaptor(
+ LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap),
+ DebugLogging));
// Now that we've vectorized and unrolled loops, we may have more refined
// alignment information, try to re-derive it here.
OptimizePM.addPass(AlignmentFromAssumptionsPass());
+ // Split out cold code. Splitting is done late to avoid hiding context from
+ // other optimizations and inadvertently regressing performance. The tradeoff
+ // is that this has a higher code size cost than splitting early.
+ if (EnableHotColdSplit && !LTOPreLink)
+ MPM.addPass(HotColdSplittingPass());
+
// LoopSink pass sinks instructions hoisted by LICM, which serves as a
// canonicalization pass that enables other optimizations. As a result,
// LoopSink pass needs to be a very late IR pass to avoid undoing LICM
@@ -869,7 +955,7 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
// Optimize PHIs by speculating around them when profitable. Note that this
// pass needs to be run after any PRE or similar pass as it is essentially
- // inserting redudnancies into the progrem. This even includes SimplifyCFG.
+ // inserting redundancies into the program. This even includes SimplifyCFG.
OptimizePM.addPass(SpeculateAroundPHIsPass());
for (auto &C : OptimizerLastEPCallbacks)
@@ -892,7 +978,7 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
ModulePassManager
PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level,
- bool DebugLogging) {
+ bool DebugLogging, bool LTOPreLink) {
assert(Level != O0 && "Must request optimizations for the default pipeline!");
ModulePassManager MPM(DebugLogging);
@@ -912,7 +998,7 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level,
DebugLogging));
// Now add the optimization pipeline.
- MPM.addPass(buildModuleOptimizationPipeline(Level, DebugLogging));
+ MPM.addPass(buildModuleOptimizationPipeline(Level, DebugLogging, LTOPreLink));
return MPM;
}
@@ -974,22 +1060,19 @@ ModulePassManager PassBuilder::buildThinLTODefaultPipeline(
//
// Also, WPD has access to more precise information than ICP and can
// devirtualize more effectively, so it should operate on the IR first.
+ //
+ // The WPD and LowerTypeTest passes need to run at -O0 to lower type
+ // metadata and intrinsics.
MPM.addPass(WholeProgramDevirtPass(nullptr, ImportSummary));
MPM.addPass(LowerTypeTestsPass(nullptr, ImportSummary));
}
+ if (Level == O0)
+ return MPM;
+
// Force any function attributes we want the rest of the pipeline to observe.
MPM.addPass(ForceFunctionAttrsPass());
- // During the ThinLTO backend phase we perform early indirect call promotion
- // here, before globalopt. Otherwise imported available_externally functions
- // look unreferenced and are removed.
- // FIXME: move this into buildModuleSimplificationPipeline to merge the logic
- // with SamplePGO.
- if (!PGOOpt || PGOOpt->SampleProfileFile.empty())
- MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */,
- false /* SamplePGO */));
-
// Add the core simplification pipeline.
MPM.addPass(buildModuleSimplificationPipeline(Level, ThinLTOPhase::PostLink,
DebugLogging));
@@ -1005,20 +1088,31 @@ PassBuilder::buildLTOPreLinkDefaultPipeline(OptimizationLevel Level,
bool DebugLogging) {
assert(Level != O0 && "Must request optimizations for the default pipeline!");
// FIXME: We should use a customized pre-link pipeline!
- return buildPerModuleDefaultPipeline(Level, DebugLogging);
+ return buildPerModuleDefaultPipeline(Level, DebugLogging,
+ /* LTOPreLink */true);
}
ModulePassManager
PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging,
ModuleSummaryIndex *ExportSummary) {
- assert(Level != O0 && "Must request optimizations for the default pipeline!");
ModulePassManager MPM(DebugLogging);
- if (PGOOpt && !PGOOpt->SampleProfileFile.empty()) {
+ if (Level == O0) {
+ // The WPD and LowerTypeTest passes need to run at -O0 to lower type
+ // metadata and intrinsics.
+ MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
+ MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
+ return MPM;
+ }
+
+ if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
// Load sample profile before running the LTO optimization pipeline.
- MPM.addPass(SampleProfileLoaderPass(PGOOpt->SampleProfileFile,
+ MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
PGOOpt->ProfileRemappingFile,
false /* ThinLTOPhase::PreLink */));
+ // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
+ // RequireAnalysisPass for PSI before subsequent non-module passes.
+ MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
}
// Remove unused virtual tables to improve the quality of code generated by
@@ -1042,7 +1136,7 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging,
// This two-step promotion is to save the compile time. For LTO, it should
// produce the same result as if we only do promotion here.
MPM.addPass(PGOIndirectCallPromotion(
- true /* InLTO */, PGOOpt && !PGOOpt->SampleProfileFile.empty()));
+ true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
// Propagate constants at call sites into the functions they call. This
// opens opportunities for globalopt (and inlining) by substituting function
// pointers passed as arguments to direct uses of functions.
@@ -1062,7 +1156,7 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging,
// FIXME: Is this really an optimization rather than a canonicalization?
MPM.addPass(ReversePostOrderFunctionAttrsPass());
- // Use inragne annotations on GEP indices to split globals where beneficial.
+ // Use in-range annotations on GEP indices to split globals where beneficial.
MPM.addPass(GlobalSplitPass());
// Run whole program optimization of virtual call when the list of callees
@@ -1124,9 +1218,26 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging,
FPM.addPass(JumpThreadingPass());
+ // Do a post inline PGO instrumentation and use pass. This is a context
+ // sensitive PGO pass.
+ if (PGOOpt) {
+ if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
+ addPGOInstrPasses(MPM, DebugLogging, Level, /* RunProfileGen */ true,
+ /* IsCS */ true, PGOOpt->CSProfileGenFile,
+ PGOOpt->ProfileRemappingFile);
+ else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
+ addPGOInstrPasses(MPM, DebugLogging, Level, /* RunProfileGen */ false,
+ /* IsCS */ true, PGOOpt->ProfileFile,
+ PGOOpt->ProfileRemappingFile);
+ }
+
// Break up allocas
FPM.addPass(SROA());
+ // LTO provides additional opportunities for tailcall elimination due to
+ // link-time inlining, and visibility of nocapture attribute.
+ FPM.addPass(TailCallElimPass());
+
// Run a few AA driver optimizations here and now to cleanup the code.
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
@@ -1138,7 +1249,6 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging,
// FIXME: once we fix LoopPass Manager, add LICM here.
// FIXME: once we provide support for enabling MLSM, add it here.
- // FIXME: once we provide support for enabling NewGVN, add it here.
if (RunNewGVN)
MainFPM.addPass(NewGVNPass());
else
@@ -1151,7 +1261,7 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging,
MainFPM.addPass(DSEPass());
// FIXME: at this point, we run a bunch of loop passes:
- // indVarSimplify, loopDeletion, loopInterchange, loopUnrool,
+ // indVarSimplify, loopDeletion, loopInterchange, loopUnroll,
// loopVectorize. Enable them once the remaining issue with LPM
// are sorted out.
@@ -1186,6 +1296,11 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging,
// CFI is disabled.
MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
+ // Enable splitting late in the FullLTO post-link pipeline. This is done in
+ // the same stage in the old pass manager (\ref addLateLTOOptimizationPasses).
+ if (EnableHotColdSplit)
+ MPM.addPass(HotColdSplittingPass());
+
// Add late LTO optimization passes.
// Delete basic blocks, which optimization passes may have killed.
MPM.addPass(createModuleToFunctionPassAdaptor(SimplifyCFGPass()));
@@ -1196,7 +1311,7 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging,
// Now that we have optimized the program, discard unreachable functions.
MPM.addPass(GlobalDCEPass());
- // FIXME: Enable MergeFuncs, conditionally, after ported, maybe.
+ // FIXME: Maybe enable MergeFuncs conditionally after it's ported.
return MPM;
}
@@ -1326,6 +1441,107 @@ Expected<LoopUnrollOptions> parseLoopUnrollOptions(StringRef Params) {
return UnrollOpts;
}
+Expected<MemorySanitizerOptions> parseMSanPassOptions(StringRef Params) {
+ MemorySanitizerOptions Result;
+ while (!Params.empty()) {
+ StringRef ParamName;
+ std::tie(ParamName, Params) = Params.split(';');
+
+ if (ParamName == "recover") {
+ Result.Recover = true;
+ } else if (ParamName == "kernel") {
+ Result.Kernel = true;
+ } else if (ParamName.consume_front("track-origins=")) {
+ if (ParamName.getAsInteger(0, Result.TrackOrigins))
+ return make_error<StringError>(
+ formatv("invalid argument to MemorySanitizer pass track-origins "
+ "parameter: '{0}' ",
+ ParamName)
+ .str(),
+ inconvertibleErrorCode());
+ } else {
+ return make_error<StringError>(
+ formatv("invalid MemorySanitizer pass parameter '{0}' ", ParamName)
+ .str(),
+ inconvertibleErrorCode());
+ }
+ }
+ return Result;
+}
+
+/// Parser of parameters for SimplifyCFG pass.
+Expected<SimplifyCFGOptions> parseSimplifyCFGOptions(StringRef Params) {
+ SimplifyCFGOptions Result;
+ while (!Params.empty()) {
+ StringRef ParamName;
+ std::tie(ParamName, Params) = Params.split(';');
+
+ bool Enable = !ParamName.consume_front("no-");
+ if (ParamName == "forward-switch-cond") {
+ Result.forwardSwitchCondToPhi(Enable);
+ } else if (ParamName == "switch-to-lookup") {
+ Result.convertSwitchToLookupTable(Enable);
+ } else if (ParamName == "keep-loops") {
+ Result.needCanonicalLoops(Enable);
+ } else if (ParamName == "sink-common-insts") {
+ Result.sinkCommonInsts(Enable);
+ } else if (Enable && ParamName.consume_front("bonus-inst-threshold=")) {
+ APInt BonusInstThreshold;
+ if (ParamName.getAsInteger(0, BonusInstThreshold))
+ return make_error<StringError>(
+ formatv("invalid argument to SimplifyCFG pass bonus-threshold "
+ "parameter: '{0}' ",
+ ParamName).str(),
+ inconvertibleErrorCode());
+ Result.bonusInstThreshold(BonusInstThreshold.getSExtValue());
+ } else {
+ return make_error<StringError>(
+ formatv("invalid SimplifyCFG pass parameter '{0}' ", ParamName).str(),
+ inconvertibleErrorCode());
+ }
+ }
+ return Result;
+}
+
+/// Parser of parameters for LoopVectorize pass.
+Expected<LoopVectorizeOptions> parseLoopVectorizeOptions(StringRef Params) {
+ LoopVectorizeOptions Opts;
+ while (!Params.empty()) {
+ StringRef ParamName;
+ std::tie(ParamName, Params) = Params.split(';');
+
+ bool Enable = !ParamName.consume_front("no-");
+ if (ParamName == "interleave-forced-only") {
+ Opts.setInterleaveOnlyWhenForced(Enable);
+ } else if (ParamName == "vectorize-forced-only") {
+ Opts.setVectorizeOnlyWhenForced(Enable);
+ } else {
+ return make_error<StringError>(
+ formatv("invalid LoopVectorize parameter '{0}' ", ParamName).str(),
+ inconvertibleErrorCode());
+ }
+ }
+ return Opts;
+}
+
+Expected<bool> parseLoopUnswitchOptions(StringRef Params) {
+ bool Result = false;
+ while (!Params.empty()) {
+ StringRef ParamName;
+ std::tie(ParamName, Params) = Params.split(';');
+
+ bool Enable = !ParamName.consume_front("no-");
+ if (ParamName == "nontrivial") {
+ Result = Enable;
+ } else {
+ return make_error<StringError>(
+ formatv("invalid LoopUnswitch pass parameter '{0}' ", ParamName)
+ .str(),
+ inconvertibleErrorCode());
+ }
+ }
+ return Result;
+}
} // namespace
/// Tests whether a pass name starts with a valid prefix for a default pipeline
@@ -1447,6 +1663,9 @@ static bool isLoopPassName(StringRef Name, CallbacksT &Callbacks) {
#define LOOP_PASS(NAME, CREATE_PASS) \
if (Name == NAME) \
return true;
+#define LOOP_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER) \
+ if (checkParametrizedPassName(Name, NAME)) \
+ return true;
#define LOOP_ANALYSIS(NAME, CREATE_PASS) \
if (Name == "require<" NAME ">" || Name == "invalidate<" NAME ">") \
return true;
@@ -1834,6 +2053,14 @@ Error PassBuilder::parseLoopPass(LoopPassManager &LPM, const PipelineElement &E,
LPM.addPass(CREATE_PASS); \
return Error::success(); \
}
+#define LOOP_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER) \
+ if (checkParametrizedPassName(Name, NAME)) { \
+ auto Params = parsePassParameters(PARSER, Name, NAME); \
+ if (!Params) \
+ return Params.takeError(); \
+ LPM.addPass(CREATE_PASS(Params.get())); \
+ return Error::success(); \
+ }
#define LOOP_ANALYSIS(NAME, CREATE_PASS) \
if (Name == "require<" NAME ">") { \
LPM.addPass(RequireAnalysisPass< \
diff --git a/lib/Passes/PassPlugin.cpp b/lib/Passes/PassPlugin.cpp
index bf38fdb842e7..ceefa25a703b 100644
--- a/lib/Passes/PassPlugin.cpp
+++ b/lib/Passes/PassPlugin.cpp
@@ -1,9 +1,8 @@
//===- lib/Passes/PassPluginLoader.cpp - Load Plugins for New PM Passes ---===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Passes/PassRegistry.def b/lib/Passes/PassRegistry.def
index 771d2f5b212a..347f75870eb3 100644
--- a/lib/Passes/PassRegistry.def
+++ b/lib/Passes/PassRegistry.def
@@ -1,9 +1,8 @@
//===- PassRegistry.def - Registry of passes --------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -28,6 +27,7 @@ MODULE_ANALYSIS("stack-safety", StackSafetyGlobalAnalysis())
MODULE_ANALYSIS("targetlibinfo", TargetLibraryAnalysis())
MODULE_ANALYSIS("verify", VerifierAnalysis())
MODULE_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC))
+MODULE_ANALYSIS("asan-globals-md", ASanGlobalsMetadataAnalysis())
#ifndef MODULE_ALIAS_ANALYSIS
#define MODULE_ALIAS_ANALYSIS(NAME, CREATE_PASS) \
@@ -41,6 +41,7 @@ MODULE_ALIAS_ANALYSIS("globals-aa", GlobalsAA())
#define MODULE_PASS(NAME, CREATE_PASS)
#endif
MODULE_PASS("always-inline", AlwaysInlinerPass())
+MODULE_PASS("attributor", AttributorPass())
MODULE_PASS("called-value-propagation", CalledValuePropagationPass())
MODULE_PASS("canonicalize-aliases", CanonicalizeAliasesPass())
MODULE_PASS("cg-profile", CGProfilePass())
@@ -54,8 +55,11 @@ MODULE_PASS("globaldce", GlobalDCEPass())
MODULE_PASS("globalopt", GlobalOptPass())
MODULE_PASS("globalsplit", GlobalSplitPass())
MODULE_PASS("hotcoldsplit", HotColdSplittingPass())
+MODULE_PASS("hwasan", HWAddressSanitizerPass(false, false))
+MODULE_PASS("khwasan", HWAddressSanitizerPass(true, true))
MODULE_PASS("inferattrs", InferFunctionAttrsPass())
MODULE_PASS("insert-gcov-profiling", GCOVProfilerPass())
+MODULE_PASS("instrorderfile", InstrOrderFilePass())
MODULE_PASS("instrprof", InstrProfiling())
MODULE_PASS("internalize", InternalizePass())
MODULE_PASS("invalidate<all>", InvalidateAllAnalysesPass())
@@ -82,6 +86,9 @@ MODULE_PASS("strip-dead-prototypes", StripDeadPrototypesPass())
MODULE_PASS("synthetic-counts-propagation", SyntheticCountsPropagation())
MODULE_PASS("wholeprogramdevirt", WholeProgramDevirtPass(nullptr, nullptr))
MODULE_PASS("verify", VerifierPass())
+MODULE_PASS("asan-module", ModuleAddressSanitizerPass(/*CompileKernel=*/false, false, true, false))
+MODULE_PASS("kasan-module", ModuleAddressSanitizerPass(/*CompileKernel=*/true, false, true, false))
+MODULE_PASS("poison-checking", PoisonCheckingPass())
#undef MODULE_PASS
#ifndef CGSCC_ANALYSIS
@@ -178,6 +185,7 @@ FUNCTION_PASS("libcalls-shrinkwrap", LibCallsShrinkWrapPass())
FUNCTION_PASS("loweratomic", LowerAtomicPass())
FUNCTION_PASS("lower-expect", LowerExpectIntrinsicPass())
FUNCTION_PASS("lower-guard-intrinsic", LowerGuardIntrinsicPass())
+FUNCTION_PASS("lower-widenable-condition", LowerWidenableConditionPass())
FUNCTION_PASS("guard-widening", GuardWideningPass())
FUNCTION_PASS("gvn", GVN())
FUNCTION_PASS("load-store-vectorizer", LoadStoreVectorizerPass())
@@ -186,6 +194,7 @@ FUNCTION_PASS("loop-sink", LoopSinkPass())
FUNCTION_PASS("lowerinvoke", LowerInvokePass())
FUNCTION_PASS("mem2reg", PromotePass())
FUNCTION_PASS("memcpyopt", MemCpyOptPass())
+FUNCTION_PASS("mergeicmps", MergeICmpsPass())
FUNCTION_PASS("mldst-motion", MergedLoadStoreMotionPass())
FUNCTION_PASS("nary-reassociate", NaryReassociatePass())
FUNCTION_PASS("newgvn", NewGVNPass())
@@ -194,8 +203,8 @@ FUNCTION_PASS("partially-inline-libcalls", PartiallyInlineLibCallsPass())
FUNCTION_PASS("lcssa", LCSSAPass())
FUNCTION_PASS("loop-data-prefetch", LoopDataPrefetchPass())
FUNCTION_PASS("loop-load-elim", LoopLoadEliminationPass())
+FUNCTION_PASS("loop-fuse", LoopFusePass())
FUNCTION_PASS("loop-distribute", LoopDistributePass())
-FUNCTION_PASS("loop-vectorize", LoopVectorizePass())
FUNCTION_PASS("pgo-memop-opt", PGOMemOPSizeOpt())
FUNCTION_PASS("print", PrintFunctionPass(dbgs()))
FUNCTION_PASS("print<assumptions>", AssumptionPrinterPass(dbgs()))
@@ -215,7 +224,6 @@ FUNCTION_PASS("print<stack-safety-local>", StackSafetyPrinterPass(dbgs()))
FUNCTION_PASS("reassociate", ReassociatePass())
FUNCTION_PASS("scalarizer", ScalarizerPass())
FUNCTION_PASS("sccp", SCCPPass())
-FUNCTION_PASS("simplify-cfg", SimplifyCFGPass())
FUNCTION_PASS("sink", SinkingPass())
FUNCTION_PASS("slp-vectorizer", SLPVectorizerPass())
FUNCTION_PASS("speculative-execution", SpeculativeExecutionPass())
@@ -228,10 +236,14 @@ FUNCTION_PASS("verify<domtree>", DominatorTreeVerifierPass())
FUNCTION_PASS("verify<loops>", LoopVerifierPass())
FUNCTION_PASS("verify<memoryssa>", MemorySSAVerifierPass())
FUNCTION_PASS("verify<regions>", RegionInfoVerifierPass())
+FUNCTION_PASS("verify<safepoint-ir>", SafepointIRVerifierPass())
FUNCTION_PASS("view-cfg", CFGViewerPass())
FUNCTION_PASS("view-cfg-only", CFGOnlyViewerPass())
FUNCTION_PASS("transform-warning", WarnMissedTransformationsPass())
-FUNCTION_PASS("msan", MemorySanitizerPass())
+FUNCTION_PASS("asan", AddressSanitizerPass(false, false, false))
+FUNCTION_PASS("kasan", AddressSanitizerPass(true, false, false))
+FUNCTION_PASS("msan", MemorySanitizerPass({}))
+FUNCTION_PASS("kmsan", MemorySanitizerPass({0, false, /*Kernel=*/true}))
FUNCTION_PASS("tsan", ThreadSanitizerPass())
#undef FUNCTION_PASS
@@ -239,8 +251,25 @@ FUNCTION_PASS("tsan", ThreadSanitizerPass())
#define FUNCTION_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER)
#endif
FUNCTION_PASS_WITH_PARAMS("unroll",
- [](LoopUnrollOptions Opts) { return LoopUnrollPass(Opts); },
- parseLoopUnrollOptions)
+ [](LoopUnrollOptions Opts) {
+ return LoopUnrollPass(Opts);
+ },
+ parseLoopUnrollOptions)
+FUNCTION_PASS_WITH_PARAMS("msan",
+ [](MemorySanitizerOptions Opts) {
+ return MemorySanitizerPass(Opts);
+ },
+ parseMSanPassOptions)
+FUNCTION_PASS_WITH_PARAMS("simplify-cfg",
+ [](SimplifyCFGOptions Opts) {
+ return SimplifyCFGPass(Opts);
+ },
+ parseSimplifyCFGOptions)
+FUNCTION_PASS_WITH_PARAMS("loop-vectorize",
+ [](LoopVectorizeOptions Opts) {
+ return LoopVectorizePass(Opts);
+ },
+ parseLoopVectorizeOptions)
#undef FUNCTION_PASS_WITH_PARAMS
#ifndef LOOP_ANALYSIS
@@ -269,8 +298,18 @@ LOOP_PASS("indvars", IndVarSimplifyPass())
LOOP_PASS("irce", IRCEPass())
LOOP_PASS("unroll-and-jam", LoopUnrollAndJamPass())
LOOP_PASS("unroll-full", LoopFullUnrollPass())
-LOOP_PASS("unswitch", SimpleLoopUnswitchPass())
LOOP_PASS("print-access-info", LoopAccessInfoPrinterPass(dbgs()))
LOOP_PASS("print<ivusers>", IVUsersPrinterPass(dbgs()))
LOOP_PASS("loop-predication", LoopPredicationPass())
+LOOP_PASS("guard-widening", GuardWideningPass())
#undef LOOP_PASS
+
+#ifndef LOOP_PASS_WITH_PARAMS
+#define LOOP_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER)
+#endif
+LOOP_PASS_WITH_PARAMS("unswitch",
+ [](bool NonTrivial) {
+ return SimpleLoopUnswitchPass(NonTrivial);
+ },
+ parseLoopUnswitchOptions)
+#undef LOOP_PASS_WITH_PARAMS
diff --git a/lib/Passes/StandardInstrumentations.cpp b/lib/Passes/StandardInstrumentations.cpp
index a1dfc39d472c..5cf0ca8e28f6 100644
--- a/lib/Passes/StandardInstrumentations.cpp
+++ b/lib/Passes/StandardInstrumentations.cpp
@@ -1,9 +1,8 @@
//===- Standard pass instrumentations handling ----------------*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/lib/ProfileData/Coverage/CoverageMapping.cpp b/lib/ProfileData/Coverage/CoverageMapping.cpp
index b2dde3406a63..afd6618e7cb3 100644
--- a/lib/ProfileData/Coverage/CoverageMapping.cpp
+++ b/lib/ProfileData/Coverage/CoverageMapping.cpp
@@ -1,9 +1,8 @@
//===- CoverageMapping.cpp - Code coverage mapping support ----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -286,11 +285,14 @@ CoverageMapping::load(ArrayRef<StringRef> ObjectFilenames,
if (std::error_code EC = CovMappingBufOrErr.getError())
return errorCodeToError(EC);
StringRef Arch = Arches.empty() ? StringRef() : Arches[File.index()];
- auto CoverageReaderOrErr =
- BinaryCoverageReader::create(CovMappingBufOrErr.get(), Arch);
- if (Error E = CoverageReaderOrErr.takeError())
+ MemoryBufferRef CovMappingBufRef =
+ CovMappingBufOrErr.get()->getMemBufferRef();
+ auto CoverageReadersOrErr =
+ BinaryCoverageReader::create(CovMappingBufRef, Arch, Buffers);
+ if (Error E = CoverageReadersOrErr.takeError())
return std::move(E);
- Readers.push_back(std::move(CoverageReaderOrErr.get()));
+ for (auto &Reader : CoverageReadersOrErr.get())
+ Readers.push_back(std::move(Reader));
Buffers.push_back(std::move(CovMappingBufOrErr.get()));
}
return load(Readers, *ProfileReader);
diff --git a/lib/ProfileData/Coverage/CoverageMappingReader.cpp b/lib/ProfileData/Coverage/CoverageMappingReader.cpp
index ee48256bc2e5..e193e10f91d9 100644
--- a/lib/ProfileData/Coverage/CoverageMappingReader.cpp
+++ b/lib/ProfileData/Coverage/CoverageMappingReader.cpp
@@ -1,9 +1,8 @@
//===- CoverageMappingReader.cpp - Code coverage mapping reader -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -23,6 +22,7 @@
#include "llvm/Object/Error.h"
#include "llvm/Object/MachOUniversal.h"
#include "llvm/Object/ObjectFile.h"
+#include "llvm/Object/COFF.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
@@ -59,7 +59,7 @@ Error RawCoverageReader::readULEB128(uint64_t &Result) {
if (Data.empty())
return make_error<CoverageMapError>(coveragemap_error::truncated);
unsigned N = 0;
- Result = decodeULEB128(reinterpret_cast<const uint8_t *>(Data.data()), &N);
+ Result = decodeULEB128(Data.bytes_begin(), &N);
if (N > Data.size())
return make_error<CoverageMapError>(coveragemap_error::malformed);
Data = Data.substr(N);
@@ -348,9 +348,18 @@ Expected<bool> RawCoverageMappingDummyChecker::isDummy() {
}
Error InstrProfSymtab::create(SectionRef &Section) {
- if (auto EC = Section.getContents(Data))
- return errorCodeToError(EC);
+ Expected<StringRef> DataOrErr = Section.getContents();
+ if (!DataOrErr)
+ return DataOrErr.takeError();
+ Data = *DataOrErr;
Address = Section.getAddress();
+
+ // If this is a linked PE/COFF file, then we have to skip over the null byte
+ // that is allocated in the .lprfn$A section in the LLVM profiling runtime.
+ const ObjectFile *Obj = Section.getObject();
+ if (isa<COFFObjectFile>(Obj) && !Obj->isRelocatableObject())
+ Data = Data.drop_front(1);
+
return Error::success();
}
@@ -577,35 +586,65 @@ static Error readCoverageMappingData(
static const char *TestingFormatMagic = "llvmcovmtestdata";
-static Error loadTestingFormat(StringRef Data, InstrProfSymtab &ProfileNames,
- StringRef &CoverageMapping,
- uint8_t &BytesInAddress,
- support::endianness &Endian) {
- BytesInAddress = 8;
- Endian = support::endianness::little;
+Expected<std::unique_ptr<BinaryCoverageReader>>
+BinaryCoverageReader::createCoverageReaderFromBuffer(
+ StringRef Coverage, InstrProfSymtab &&ProfileNames, uint8_t BytesInAddress,
+ support::endianness Endian) {
+ std::unique_ptr<BinaryCoverageReader> Reader(new BinaryCoverageReader());
+ Reader->ProfileNames = std::move(ProfileNames);
+ if (BytesInAddress == 4 && Endian == support::endianness::little) {
+ if (Error E =
+ readCoverageMappingData<uint32_t, support::endianness::little>(
+ Reader->ProfileNames, Coverage, Reader->MappingRecords,
+ Reader->Filenames))
+ return std::move(E);
+ } else if (BytesInAddress == 4 && Endian == support::endianness::big) {
+ if (Error E = readCoverageMappingData<uint32_t, support::endianness::big>(
+ Reader->ProfileNames, Coverage, Reader->MappingRecords,
+ Reader->Filenames))
+ return std::move(E);
+ } else if (BytesInAddress == 8 && Endian == support::endianness::little) {
+ if (Error E =
+ readCoverageMappingData<uint64_t, support::endianness::little>(
+ Reader->ProfileNames, Coverage, Reader->MappingRecords,
+ Reader->Filenames))
+ return std::move(E);
+ } else if (BytesInAddress == 8 && Endian == support::endianness::big) {
+ if (Error E = readCoverageMappingData<uint64_t, support::endianness::big>(
+ Reader->ProfileNames, Coverage, Reader->MappingRecords,
+ Reader->Filenames))
+ return std::move(E);
+ } else
+ return make_error<CoverageMapError>(coveragemap_error::malformed);
+ return std::move(Reader);
+}
+
+static Expected<std::unique_ptr<BinaryCoverageReader>>
+loadTestingFormat(StringRef Data) {
+ uint8_t BytesInAddress = 8;
+ support::endianness Endian = support::endianness::little;
Data = Data.substr(StringRef(TestingFormatMagic).size());
if (Data.empty())
return make_error<CoverageMapError>(coveragemap_error::truncated);
unsigned N = 0;
- auto ProfileNamesSize =
- decodeULEB128(reinterpret_cast<const uint8_t *>(Data.data()), &N);
+ uint64_t ProfileNamesSize = decodeULEB128(Data.bytes_begin(), &N);
if (N > Data.size())
return make_error<CoverageMapError>(coveragemap_error::malformed);
Data = Data.substr(N);
if (Data.empty())
return make_error<CoverageMapError>(coveragemap_error::truncated);
N = 0;
- uint64_t Address =
- decodeULEB128(reinterpret_cast<const uint8_t *>(Data.data()), &N);
+ uint64_t Address = decodeULEB128(Data.bytes_begin(), &N);
if (N > Data.size())
return make_error<CoverageMapError>(coveragemap_error::malformed);
Data = Data.substr(N);
if (Data.size() < ProfileNamesSize)
return make_error<CoverageMapError>(coveragemap_error::malformed);
+ InstrProfSymtab ProfileNames;
if (Error E = ProfileNames.create(Data.substr(0, ProfileNamesSize), Address))
- return E;
- CoverageMapping = Data.substr(ProfileNamesSize);
+ return std::move(E);
+ StringRef CoverageMapping = Data.substr(ProfileNamesSize);
// Skip the padding bytes because coverage map data has an alignment of 8.
if (CoverageMapping.empty())
return make_error<CoverageMapError>(coveragemap_error::truncated);
@@ -613,29 +652,32 @@ static Error loadTestingFormat(StringRef Data, InstrProfSymtab &ProfileNames,
if (CoverageMapping.size() < Pad)
return make_error<CoverageMapError>(coveragemap_error::malformed);
CoverageMapping = CoverageMapping.substr(Pad);
- return Error::success();
+ return BinaryCoverageReader::createCoverageReaderFromBuffer(
+ CoverageMapping, std::move(ProfileNames), BytesInAddress, Endian);
}
static Expected<SectionRef> lookupSection(ObjectFile &OF, StringRef Name) {
+ // On COFF, the object file section name may end in "$M". This tells the
+ // linker to sort these sections between "$A" and "$Z". The linker removes the
+ // dollar and everything after it in the final binary. Do the same to match.
+ bool IsCOFF = isa<COFFObjectFile>(OF);
+ auto stripSuffix = [IsCOFF](StringRef N) {
+ return IsCOFF ? N.split('$').first : N;
+ };
+ Name = stripSuffix(Name);
+
StringRef FoundName;
for (const auto &Section : OF.sections()) {
if (auto EC = Section.getName(FoundName))
return errorCodeToError(EC);
- if (FoundName == Name)
+ if (stripSuffix(FoundName) == Name)
return Section;
}
return make_error<CoverageMapError>(coveragemap_error::no_data_found);
}
-static Error loadBinaryFormat(MemoryBufferRef ObjectBuffer,
- InstrProfSymtab &ProfileNames,
- StringRef &CoverageMapping,
- uint8_t &BytesInAddress,
- support::endianness &Endian, StringRef Arch) {
- auto BinOrErr = createBinary(ObjectBuffer);
- if (!BinOrErr)
- return BinOrErr.takeError();
- auto Bin = std::move(BinOrErr.get());
+static Expected<std::unique_ptr<BinaryCoverageReader>>
+loadBinaryFormat(std::unique_ptr<Binary> Bin, StringRef Arch) {
std::unique_ptr<ObjectFile> OF;
if (auto *Universal = dyn_cast<MachOUniversalBinary>(Bin.get())) {
// If we have a universal binary, try to look up the object for the
@@ -655,9 +697,10 @@ static Error loadBinaryFormat(MemoryBufferRef ObjectBuffer,
return make_error<CoverageMapError>(coveragemap_error::malformed);
// The coverage uses native pointer sizes for the object it's written in.
- BytesInAddress = OF->getBytesInAddress();
- Endian = OF->isLittleEndian() ? support::endianness::little
- : support::endianness::big;
+ uint8_t BytesInAddress = OF->getBytesInAddress();
+ support::endianness Endian = OF->isLittleEndian()
+ ? support::endianness::little
+ : support::endianness::big;
// Look for the sections that we are interested in.
auto ObjFormat = OF->getTripleObjectFormat();
@@ -665,63 +708,101 @@ static Error loadBinaryFormat(MemoryBufferRef ObjectBuffer,
lookupSection(*OF, getInstrProfSectionName(IPSK_name, ObjFormat,
/*AddSegmentInfo=*/false));
if (auto E = NamesSection.takeError())
- return E;
+ return std::move(E);
auto CoverageSection =
lookupSection(*OF, getInstrProfSectionName(IPSK_covmap, ObjFormat,
/*AddSegmentInfo=*/false));
if (auto E = CoverageSection.takeError())
- return E;
+ return std::move(E);
// Get the contents of the given sections.
- if (auto EC = CoverageSection->getContents(CoverageMapping))
- return errorCodeToError(EC);
+ auto CoverageMappingOrErr = CoverageSection->getContents();
+ if (!CoverageMappingOrErr)
+ return CoverageMappingOrErr.takeError();
+
+ InstrProfSymtab ProfileNames;
if (Error E = ProfileNames.create(*NamesSection))
- return E;
+ return std::move(E);
- return Error::success();
+ return BinaryCoverageReader::createCoverageReaderFromBuffer(
+ CoverageMappingOrErr.get(), std::move(ProfileNames), BytesInAddress,
+ Endian);
}
-Expected<std::unique_ptr<BinaryCoverageReader>>
-BinaryCoverageReader::create(std::unique_ptr<MemoryBuffer> &ObjectBuffer,
- StringRef Arch) {
- std::unique_ptr<BinaryCoverageReader> Reader(new BinaryCoverageReader());
+Expected<std::vector<std::unique_ptr<BinaryCoverageReader>>>
+BinaryCoverageReader::create(
+ MemoryBufferRef ObjectBuffer, StringRef Arch,
+ SmallVectorImpl<std::unique_ptr<MemoryBuffer>> &ObjectFileBuffers) {
+ std::vector<std::unique_ptr<BinaryCoverageReader>> Readers;
- StringRef Coverage;
- uint8_t BytesInAddress;
- support::endianness Endian;
- Error E = Error::success();
- consumeError(std::move(E));
- if (ObjectBuffer->getBuffer().startswith(TestingFormatMagic))
+ if (ObjectBuffer.getBuffer().startswith(TestingFormatMagic)) {
// This is a special format used for testing.
- E = loadTestingFormat(ObjectBuffer->getBuffer(), Reader->ProfileNames,
- Coverage, BytesInAddress, Endian);
- else
- E = loadBinaryFormat(ObjectBuffer->getMemBufferRef(), Reader->ProfileNames,
- Coverage, BytesInAddress, Endian, Arch);
- if (E)
- return std::move(E);
+ auto ReaderOrErr = loadTestingFormat(ObjectBuffer.getBuffer());
+ if (!ReaderOrErr)
+ return ReaderOrErr.takeError();
+ Readers.push_back(std::move(ReaderOrErr.get()));
+ return std::move(Readers);
+ }
- if (BytesInAddress == 4 && Endian == support::endianness::little)
- E = readCoverageMappingData<uint32_t, support::endianness::little>(
- Reader->ProfileNames, Coverage, Reader->MappingRecords,
- Reader->Filenames);
- else if (BytesInAddress == 4 && Endian == support::endianness::big)
- E = readCoverageMappingData<uint32_t, support::endianness::big>(
- Reader->ProfileNames, Coverage, Reader->MappingRecords,
- Reader->Filenames);
- else if (BytesInAddress == 8 && Endian == support::endianness::little)
- E = readCoverageMappingData<uint64_t, support::endianness::little>(
- Reader->ProfileNames, Coverage, Reader->MappingRecords,
- Reader->Filenames);
- else if (BytesInAddress == 8 && Endian == support::endianness::big)
- E = readCoverageMappingData<uint64_t, support::endianness::big>(
- Reader->ProfileNames, Coverage, Reader->MappingRecords,
- Reader->Filenames);
- else
- return make_error<CoverageMapError>(coveragemap_error::malformed);
- if (E)
- return std::move(E);
- return std::move(Reader);
+ auto BinOrErr = createBinary(ObjectBuffer);
+ if (!BinOrErr)
+ return BinOrErr.takeError();
+ std::unique_ptr<Binary> Bin = std::move(BinOrErr.get());
+
+ // MachO universal binaries which contain archives need to be treated as
+ // archives, not as regular binaries.
+ if (auto *Universal = dyn_cast<MachOUniversalBinary>(Bin.get())) {
+ for (auto &ObjForArch : Universal->objects()) {
+ // Skip slices within the universal binary which target the wrong arch.
+ std::string ObjArch = ObjForArch.getArchFlagName();
+ if (Arch != ObjArch)
+ continue;
+
+ auto ArchiveOrErr = ObjForArch.getAsArchive();
+ if (!ArchiveOrErr) {
+ // If this is not an archive, try treating it as a regular object.
+ consumeError(ArchiveOrErr.takeError());
+ break;
+ }
+
+ return BinaryCoverageReader::create(
+ ArchiveOrErr.get()->getMemoryBufferRef(), Arch, ObjectFileBuffers);
+ }
+ }
+
+ // Load coverage out of archive members.
+ if (auto *Ar = dyn_cast<Archive>(Bin.get())) {
+ Error Err = Error::success();
+ for (auto &Child : Ar->children(Err)) {
+ Expected<MemoryBufferRef> ChildBufOrErr = Child.getMemoryBufferRef();
+ if (!ChildBufOrErr)
+ return ChildBufOrErr.takeError();
+
+ auto ChildReadersOrErr = BinaryCoverageReader::create(
+ ChildBufOrErr.get(), Arch, ObjectFileBuffers);
+ if (!ChildReadersOrErr)
+ return ChildReadersOrErr.takeError();
+ for (auto &Reader : ChildReadersOrErr.get())
+ Readers.push_back(std::move(Reader));
+ }
+ if (Err)
+ return std::move(Err);
+
+ // Thin archives reference object files outside of the archive file, i.e.
+ // files which reside in memory not owned by the caller. Transfer ownership
+ // to the caller.
+ if (Ar->isThin())
+ for (auto &Buffer : Ar->takeThinBuffers())
+ ObjectFileBuffers.push_back(std::move(Buffer));
+
+ return std::move(Readers);
+ }
+
+ auto ReaderOrErr = loadBinaryFormat(std::move(Bin), Arch);
+ if (!ReaderOrErr)
+ return ReaderOrErr.takeError();
+ Readers.push_back(std::move(ReaderOrErr.get()));
+ return std::move(Readers);
}
Error BinaryCoverageReader::readNextRecord(CoverageMappingRecord &Record) {
diff --git a/lib/ProfileData/Coverage/CoverageMappingWriter.cpp b/lib/ProfileData/Coverage/CoverageMappingWriter.cpp
index bb3f4f854e04..432b20f217ca 100644
--- a/lib/ProfileData/Coverage/CoverageMappingWriter.cpp
+++ b/lib/ProfileData/Coverage/CoverageMappingWriter.cpp
@@ -1,9 +1,8 @@
//===- CoverageMappingWriter.cpp - Code coverage mapping writer -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -125,15 +124,14 @@ void CoverageMappingWriter::write(raw_ostream &OS) {
// Sort the regions in an ascending order by the file id and the starting
// location. Sort by region kinds to ensure stable order for tests.
- std::stable_sort(
- MappingRegions.begin(), MappingRegions.end(),
- [](const CounterMappingRegion &LHS, const CounterMappingRegion &RHS) {
- if (LHS.FileID != RHS.FileID)
- return LHS.FileID < RHS.FileID;
- if (LHS.startLoc() != RHS.startLoc())
- return LHS.startLoc() < RHS.startLoc();
- return LHS.Kind < RHS.Kind;
- });
+ llvm::stable_sort(MappingRegions, [](const CounterMappingRegion &LHS,
+ const CounterMappingRegion &RHS) {
+ if (LHS.FileID != RHS.FileID)
+ return LHS.FileID < RHS.FileID;
+ if (LHS.startLoc() != RHS.startLoc())
+ return LHS.startLoc() < RHS.startLoc();
+ return LHS.Kind < RHS.Kind;
+ });
// Write out the fileid -> filename mapping.
encodeULEB128(VirtualFileMapping.size(), OS);
diff --git a/lib/ProfileData/GCOV.cpp b/lib/ProfileData/GCOV.cpp
index b687346a2c05..fa4e433d7aa6 100644
--- a/lib/ProfileData/GCOV.cpp
+++ b/lib/ProfileData/GCOV.cpp
@@ -1,9 +1,8 @@
//===- GCOV.cpp - LLVM coverage tool --------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -19,6 +18,7 @@
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/Path.h"
+#include "llvm/Support/MD5.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <system_error>
@@ -396,10 +396,10 @@ void GCOVBlock::addCount(size_t DstEdgeNo, uint64_t N) {
/// sortDstEdges - Sort destination edges by block number, nop if already
/// sorted. This is required for printing branch info in the correct order.
void GCOVBlock::sortDstEdges() {
- if (!DstEdgesAreSorted) {
- SortDstEdgesFunctor SortEdges;
- std::stable_sort(DstEdges.begin(), DstEdges.end(), SortEdges);
- }
+ if (!DstEdgesAreSorted)
+ llvm::stable_sort(DstEdges, [](const GCOVEdge *E1, const GCOVEdge *E2) {
+ return E1->Dst.Number < E2->Dst.Number;
+ });
}
/// collectLineCounts - Collect line counts. This must be used after
@@ -687,7 +687,15 @@ std::string FileInfo::getCoveragePath(StringRef Filename,
if (Options.LongFileNames && !Filename.equals(MainFilename))
CoveragePath =
mangleCoveragePath(MainFilename, Options.PreservePaths) + "##";
- CoveragePath += mangleCoveragePath(Filename, Options.PreservePaths) + ".gcov";
+ CoveragePath += mangleCoveragePath(Filename, Options.PreservePaths);
+ if (Options.HashFilenames) {
+ MD5 Hasher;
+ MD5::MD5Result Result;
+ Hasher.update(Filename.str());
+ Hasher.final(Result);
+ CoveragePath += "##" + Result.digest().str().str();
+ }
+ CoveragePath += ".gcov";
return CoveragePath;
}
diff --git a/lib/ProfileData/InstrProf.cpp b/lib/ProfileData/InstrProf.cpp
index aaa8000ff2f9..510fd9887d9a 100644
--- a/lib/ProfileData/InstrProf.cpp
+++ b/lib/ProfileData/InstrProf.cpp
@@ -1,9 +1,8 @@
//===- InstrProf.cpp - Instrumented profiling format support --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -30,6 +29,7 @@
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
+#include "llvm/ProfileData/InstrProfReader.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
@@ -363,16 +363,15 @@ Error InstrProfSymtab::create(Module &M, bool InLTO) {
uint64_t InstrProfSymtab::getFunctionHashFromAddress(uint64_t Address) {
finalizeSymtab();
- auto Result =
- std::lower_bound(AddrToMD5Map.begin(), AddrToMD5Map.end(), Address,
- [](const std::pair<uint64_t, uint64_t> &LHS,
- uint64_t RHS) { return LHS.first < RHS; });
+ auto It = partition_point(AddrToMD5Map, [=](std::pair<uint64_t, uint64_t> A) {
+ return A.first < Address;
+ });
// Raw function pointer collected by value profiler may be from
// external functions that are not instrumented. They won't have
// mapping data to be used by the deserializer. Force the value to
// be 0 in this case.
- if (Result != AddrToMD5Map.end() && Result->first == Address)
- return (uint64_t)Result->second;
+ if (It != AddrToMD5Map.end() && It->first == Address)
+ return (uint64_t)It->second;
return 0;
}
@@ -435,9 +434,8 @@ Error collectPGOFuncNameStrings(ArrayRef<GlobalVariable *> NameVars,
}
Error readPGOFuncNameStrings(StringRef NameStrings, InstrProfSymtab &Symtab) {
- const uint8_t *P = reinterpret_cast<const uint8_t *>(NameStrings.data());
- const uint8_t *EndP = reinterpret_cast<const uint8_t *>(NameStrings.data() +
- NameStrings.size());
+ const uint8_t *P = NameStrings.bytes_begin();
+ const uint8_t *EndP = NameStrings.bytes_end();
while (P < EndP) {
uint32_t N;
uint64_t UncompressedSize = decodeULEB128(P, &N);
@@ -480,6 +478,126 @@ Error readPGOFuncNameStrings(StringRef NameStrings, InstrProfSymtab &Symtab) {
return Error::success();
}
+void InstrProfRecord::accumuateCounts(CountSumOrPercent &Sum) const {
+ uint64_t FuncSum = 0;
+ Sum.NumEntries += Counts.size();
+ for (size_t F = 0, E = Counts.size(); F < E; ++F)
+ FuncSum += Counts[F];
+ Sum.CountSum += FuncSum;
+
+ for (uint32_t VK = IPVK_First; VK <= IPVK_Last; ++VK) {
+ uint64_t KindSum = 0;
+ uint32_t NumValueSites = getNumValueSites(VK);
+ for (size_t I = 0; I < NumValueSites; ++I) {
+ uint32_t NV = getNumValueDataForSite(VK, I);
+ std::unique_ptr<InstrProfValueData[]> VD = getValueForSite(VK, I);
+ for (uint32_t V = 0; V < NV; V++)
+ KindSum += VD[V].Count;
+ }
+ Sum.ValueCounts[VK] += KindSum;
+ }
+}
+
+void InstrProfValueSiteRecord::overlap(InstrProfValueSiteRecord &Input,
+ uint32_t ValueKind,
+ OverlapStats &Overlap,
+ OverlapStats &FuncLevelOverlap) {
+ this->sortByTargetValues();
+ Input.sortByTargetValues();
+ double Score = 0.0f, FuncLevelScore = 0.0f;
+ auto I = ValueData.begin();
+ auto IE = ValueData.end();
+ auto J = Input.ValueData.begin();
+ auto JE = Input.ValueData.end();
+ while (I != IE && J != JE) {
+ if (I->Value == J->Value) {
+ Score += OverlapStats::score(I->Count, J->Count,
+ Overlap.Base.ValueCounts[ValueKind],
+ Overlap.Test.ValueCounts[ValueKind]);
+ FuncLevelScore += OverlapStats::score(
+ I->Count, J->Count, FuncLevelOverlap.Base.ValueCounts[ValueKind],
+ FuncLevelOverlap.Test.ValueCounts[ValueKind]);
+ ++I;
+ } else if (I->Value < J->Value) {
+ ++I;
+ continue;
+ }
+ ++J;
+ }
+ Overlap.Overlap.ValueCounts[ValueKind] += Score;
+ FuncLevelOverlap.Overlap.ValueCounts[ValueKind] += FuncLevelScore;
+}
+
+// Return false on mismatch.
+void InstrProfRecord::overlapValueProfData(uint32_t ValueKind,
+ InstrProfRecord &Other,
+ OverlapStats &Overlap,
+ OverlapStats &FuncLevelOverlap) {
+ uint32_t ThisNumValueSites = getNumValueSites(ValueKind);
+ assert(ThisNumValueSites == Other.getNumValueSites(ValueKind));
+ if (!ThisNumValueSites)
+ return;
+
+ std::vector<InstrProfValueSiteRecord> &ThisSiteRecords =
+ getOrCreateValueSitesForKind(ValueKind);
+ MutableArrayRef<InstrProfValueSiteRecord> OtherSiteRecords =
+ Other.getValueSitesForKind(ValueKind);
+ for (uint32_t I = 0; I < ThisNumValueSites; I++)
+ ThisSiteRecords[I].overlap(OtherSiteRecords[I], ValueKind, Overlap,
+ FuncLevelOverlap);
+}
+
+void InstrProfRecord::overlap(InstrProfRecord &Other, OverlapStats &Overlap,
+ OverlapStats &FuncLevelOverlap,
+ uint64_t ValueCutoff) {
+ // FuncLevel CountSum for other should already computed and nonzero.
+ assert(FuncLevelOverlap.Test.CountSum >= 1.0f);
+ accumuateCounts(FuncLevelOverlap.Base);
+ bool Mismatch = (Counts.size() != Other.Counts.size());
+
+ // Check if the value profiles mismatch.
+ if (!Mismatch) {
+ for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) {
+ uint32_t ThisNumValueSites = getNumValueSites(Kind);
+ uint32_t OtherNumValueSites = Other.getNumValueSites(Kind);
+ if (ThisNumValueSites != OtherNumValueSites) {
+ Mismatch = true;
+ break;
+ }
+ }
+ }
+ if (Mismatch) {
+ Overlap.addOneMismatch(FuncLevelOverlap.Test);
+ return;
+ }
+
+ // Compute overlap for value counts.
+ for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
+ overlapValueProfData(Kind, Other, Overlap, FuncLevelOverlap);
+
+ double Score = 0.0;
+ uint64_t MaxCount = 0;
+ // Compute overlap for edge counts.
+ for (size_t I = 0, E = Other.Counts.size(); I < E; ++I) {
+ Score += OverlapStats::score(Counts[I], Other.Counts[I],
+ Overlap.Base.CountSum, Overlap.Test.CountSum);
+ MaxCount = std::max(Other.Counts[I], MaxCount);
+ }
+ Overlap.Overlap.CountSum += Score;
+ Overlap.Overlap.NumEntries += 1;
+
+ if (MaxCount >= ValueCutoff) {
+ double FuncScore = 0.0;
+ for (size_t I = 0, E = Other.Counts.size(); I < E; ++I)
+ FuncScore += OverlapStats::score(Counts[I], Other.Counts[I],
+ FuncLevelOverlap.Base.CountSum,
+ FuncLevelOverlap.Test.CountSum);
+ FuncLevelOverlap.Overlap.CountSum = FuncScore;
+ FuncLevelOverlap.Overlap.NumEntries = Other.Counts.size();
+ FuncLevelOverlap.Valid = true;
+ }
+}
+
void InstrProfValueSiteRecord::merge(InstrProfValueSiteRecord &Input,
uint64_t Weight,
function_ref<void(instrprof_error)> Warn) {
@@ -1012,4 +1130,153 @@ void getMemOPSizeRangeFromOption(StringRef MemOPSizeRange, int64_t &RangeStart,
assert(RangeLast >= RangeStart);
}
+// Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime
+// aware this is an ir_level profile so it can set the version flag.
+void createIRLevelProfileFlagVar(Module &M, bool IsCS) {
+ const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR));
+ Type *IntTy64 = Type::getInt64Ty(M.getContext());
+ uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF);
+ if (IsCS)
+ ProfileVersion |= VARIANT_MASK_CSIR_PROF;
+ auto IRLevelVersionVariable = new GlobalVariable(
+ M, IntTy64, true, GlobalValue::WeakAnyLinkage,
+ Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), VarName);
+ IRLevelVersionVariable->setVisibility(GlobalValue::DefaultVisibility);
+ Triple TT(M.getTargetTriple());
+ if (TT.supportsCOMDAT()) {
+ IRLevelVersionVariable->setLinkage(GlobalValue::ExternalLinkage);
+ IRLevelVersionVariable->setComdat(M.getOrInsertComdat(VarName));
+ }
+}
+
+// Create the variable for the profile file name.
+void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput) {
+ if (InstrProfileOutput.empty())
+ return;
+ Constant *ProfileNameConst =
+ ConstantDataArray::getString(M.getContext(), InstrProfileOutput, true);
+ GlobalVariable *ProfileNameVar = new GlobalVariable(
+ M, ProfileNameConst->getType(), true, GlobalValue::WeakAnyLinkage,
+ ProfileNameConst, INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_NAME_VAR));
+ Triple TT(M.getTargetTriple());
+ if (TT.supportsCOMDAT()) {
+ ProfileNameVar->setLinkage(GlobalValue::ExternalLinkage);
+ ProfileNameVar->setComdat(M.getOrInsertComdat(
+ StringRef(INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_NAME_VAR))));
+ }
+}
+
+Error OverlapStats::accumuateCounts(const std::string &BaseFilename,
+ const std::string &TestFilename,
+ bool IsCS) {
+ auto getProfileSum = [IsCS](const std::string &Filename,
+ CountSumOrPercent &Sum) -> Error {
+ auto ReaderOrErr = InstrProfReader::create(Filename);
+ if (Error E = ReaderOrErr.takeError()) {
+ return E;
+ }
+ auto Reader = std::move(ReaderOrErr.get());
+ Reader->accumuateCounts(Sum, IsCS);
+ return Error::success();
+ };
+ auto Ret = getProfileSum(BaseFilename, Base);
+ if (Ret)
+ return Ret;
+ Ret = getProfileSum(TestFilename, Test);
+ if (Ret)
+ return Ret;
+ this->BaseFilename = &BaseFilename;
+ this->TestFilename = &TestFilename;
+ Valid = true;
+ return Error::success();
+}
+
+void OverlapStats::addOneMismatch(const CountSumOrPercent &MismatchFunc) {
+ Mismatch.NumEntries += 1;
+ Mismatch.CountSum += MismatchFunc.CountSum / Test.CountSum;
+ for (unsigned I = 0; I < IPVK_Last - IPVK_First + 1; I++) {
+ if (Test.ValueCounts[I] >= 1.0f)
+ Mismatch.ValueCounts[I] +=
+ MismatchFunc.ValueCounts[I] / Test.ValueCounts[I];
+ }
+}
+
+void OverlapStats::addOneUnique(const CountSumOrPercent &UniqueFunc) {
+ Unique.NumEntries += 1;
+ Unique.CountSum += UniqueFunc.CountSum / Test.CountSum;
+ for (unsigned I = 0; I < IPVK_Last - IPVK_First + 1; I++) {
+ if (Test.ValueCounts[I] >= 1.0f)
+ Unique.ValueCounts[I] += UniqueFunc.ValueCounts[I] / Test.ValueCounts[I];
+ }
+}
+
+void OverlapStats::dump(raw_fd_ostream &OS) const {
+ if (!Valid)
+ return;
+
+ const char *EntryName =
+ (Level == ProgramLevel ? "functions" : "edge counters");
+ if (Level == ProgramLevel) {
+ OS << "Profile overlap infomation for base_profile: " << *BaseFilename
+ << " and test_profile: " << *TestFilename << "\nProgram level:\n";
+ } else {
+ OS << "Function level:\n"
+ << " Function: " << FuncName << " (Hash=" << FuncHash << ")\n";
+ }
+
+ OS << " # of " << EntryName << " overlap: " << Overlap.NumEntries << "\n";
+ if (Mismatch.NumEntries)
+ OS << " # of " << EntryName << " mismatch: " << Mismatch.NumEntries
+ << "\n";
+ if (Unique.NumEntries)
+ OS << " # of " << EntryName
+ << " only in test_profile: " << Unique.NumEntries << "\n";
+
+ OS << " Edge profile overlap: " << format("%.3f%%", Overlap.CountSum * 100)
+ << "\n";
+ if (Mismatch.NumEntries)
+ OS << " Mismatched count percentage (Edge): "
+ << format("%.3f%%", Mismatch.CountSum * 100) << "\n";
+ if (Unique.NumEntries)
+ OS << " Percentage of Edge profile only in test_profile: "
+ << format("%.3f%%", Unique.CountSum * 100) << "\n";
+ OS << " Edge profile base count sum: " << format("%.0f", Base.CountSum)
+ << "\n"
+ << " Edge profile test count sum: " << format("%.0f", Test.CountSum)
+ << "\n";
+
+ for (unsigned I = 0; I < IPVK_Last - IPVK_First + 1; I++) {
+ if (Base.ValueCounts[I] < 1.0f && Test.ValueCounts[I] < 1.0f)
+ continue;
+ char ProfileKindName[20];
+ switch (I) {
+ case IPVK_IndirectCallTarget:
+ strncpy(ProfileKindName, "IndirectCall", 19);
+ break;
+ case IPVK_MemOPSize:
+ strncpy(ProfileKindName, "MemOP", 19);
+ break;
+ default:
+ snprintf(ProfileKindName, 19, "VP[%d]", I);
+ break;
+ }
+ OS << " " << ProfileKindName
+ << " profile overlap: " << format("%.3f%%", Overlap.ValueCounts[I] * 100)
+ << "\n";
+ if (Mismatch.NumEntries)
+ OS << " Mismatched count percentage (" << ProfileKindName
+ << "): " << format("%.3f%%", Mismatch.ValueCounts[I] * 100) << "\n";
+ if (Unique.NumEntries)
+ OS << " Percentage of " << ProfileKindName
+ << " profile only in test_profile: "
+ << format("%.3f%%", Unique.ValueCounts[I] * 100) << "\n";
+ OS << " " << ProfileKindName
+ << " profile base count sum: " << format("%.0f", Base.ValueCounts[I])
+ << "\n"
+ << " " << ProfileKindName
+ << " profile test count sum: " << format("%.0f", Test.ValueCounts[I])
+ << "\n";
+ }
+}
+
} // end namespace llvm
diff --git a/lib/ProfileData/InstrProfReader.cpp b/lib/ProfileData/InstrProfReader.cpp
index eaf0eb04bfbf..fec1c152991c 100644
--- a/lib/ProfileData/InstrProfReader.cpp
+++ b/lib/ProfileData/InstrProfReader.cpp
@@ -1,9 +1,8 @@
//===- InstrProfReader.cpp - Instrumented profiling reader ----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -63,7 +62,7 @@ InstrProfReader::create(const Twine &Path) {
Expected<std::unique_ptr<InstrProfReader>>
InstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer) {
// Sanity check the buffer.
- if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<unsigned>::max())
+ if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<uint64_t>::max())
return make_error<InstrProfError>(instrprof_error::too_large);
if (Buffer->getBufferSize() == 0)
@@ -114,7 +113,7 @@ Expected<std::unique_ptr<IndexedInstrProfReader>>
IndexedInstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer,
std::unique_ptr<MemoryBuffer> RemappingBuffer) {
// Sanity check the buffer.
- if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<unsigned>::max())
+ if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<uint64_t>::max())
return make_error<InstrProfError>(instrprof_error::too_large);
// Create the reader.
@@ -163,7 +162,10 @@ Error TextInstrProfReader::readHeader() {
IsIRInstr = true;
else if (Str.equals_lower("fe"))
IsIRInstr = false;
- else
+ else if (Str.equals_lower("csir")) {
+ IsIRInstr = true;
+ HasCSIRLevelProfile = true;
+ } else
return error(instrprof_error::bad_header);
++Line;
@@ -734,7 +736,7 @@ bool IndexedInstrProfReader::hasFormat(const MemoryBuffer &DataBuffer) {
const unsigned char *
IndexedInstrProfReader::readSummary(IndexedInstrProf::ProfVersion Version,
- const unsigned char *Cur) {
+ const unsigned char *Cur, bool UseCS) {
using namespace IndexedInstrProf;
using namespace support;
@@ -761,10 +763,13 @@ IndexedInstrProfReader::readSummary(IndexedInstrProf::ProfVersion Version,
DetailedSummary.emplace_back((uint32_t)Ent.Cutoff, Ent.MinBlockCount,
Ent.NumBlocks);
}
+ std::unique_ptr<llvm::ProfileSummary> &Summary =
+ UseCS ? this->CS_Summary : this->Summary;
+
// initialize InstrProfSummary using the SummaryData from disk.
- this->Summary = llvm::make_unique<ProfileSummary>(
- ProfileSummary::PSK_Instr, DetailedSummary,
- SummaryData->get(Summary::TotalBlockCount),
+ Summary = llvm::make_unique<ProfileSummary>(
+ UseCS ? ProfileSummary::PSK_CSInstr : ProfileSummary::PSK_Instr,
+ DetailedSummary, SummaryData->get(Summary::TotalBlockCount),
SummaryData->get(Summary::MaxBlockCount),
SummaryData->get(Summary::MaxInternalBlockCount),
SummaryData->get(Summary::MaxFunctionCount),
@@ -806,7 +811,11 @@ Error IndexedInstrProfReader::readHeader() {
IndexedInstrProf::ProfVersion::CurrentVersion)
return error(instrprof_error::unsupported_version);
- Cur = readSummary((IndexedInstrProf::ProfVersion)FormatVersion, Cur);
+ Cur = readSummary((IndexedInstrProf::ProfVersion)FormatVersion, Cur,
+ /* UseCS */ false);
+ if (FormatVersion & VARIANT_MASK_CSIR_PROF)
+ Cur = readSummary((IndexedInstrProf::ProfVersion)FormatVersion, Cur,
+ /* UseCS */ true);
// Read the hash type and start offset.
IndexedInstrProf::HashT HashType = static_cast<IndexedInstrProf::HashT>(
@@ -891,3 +900,17 @@ Error IndexedInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) {
}
return success();
}
+
+void InstrProfReader::accumuateCounts(CountSumOrPercent &Sum, bool IsCS) {
+ uint64_t NumFuncs = 0;
+ for (const auto &Func : *this) {
+ if (isIRLevelProfile()) {
+ bool FuncIsCS = NamedInstrProfRecord::hasCSFlagInHash(Func.Hash);
+ if (FuncIsCS != IsCS)
+ continue;
+ }
+ Func.accumuateCounts(Sum);
+ ++NumFuncs;
+ }
+ Sum.NumEntries = NumFuncs;
+}
diff --git a/lib/ProfileData/InstrProfWriter.cpp b/lib/ProfileData/InstrProfWriter.cpp
index 18b9deec158f..4ca2defd26da 100644
--- a/lib/ProfileData/InstrProfWriter.cpp
+++ b/lib/ProfileData/InstrProfWriter.cpp
@@ -1,9 +1,8 @@
//===- InstrProfWriter.cpp - Instrumented profiling writer ----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -102,6 +101,7 @@ public:
support::endianness ValueProfDataEndianness = support::little;
InstrProfSummaryBuilder *SummaryBuilder;
+ InstrProfSummaryBuilder *CSSummaryBuilder;
InstrProfRecordWriterTrait() = default;
@@ -143,7 +143,10 @@ public:
endian::Writer LE(Out, little);
for (const auto &ProfileData : *V) {
const InstrProfRecord &ProfRecord = ProfileData.second;
- SummaryBuilder->addRecord(ProfRecord);
+ if (NamedInstrProfRecord::hasCSFlagInHash(ProfileData.first))
+ CSSummaryBuilder->addRecord(ProfRecord);
+ else
+ SummaryBuilder->addRecord(ProfRecord);
LE.write<uint64_t>(ProfileData.first); // Function hash
LE.write<uint64_t>(ProfRecord.Counts.size());
@@ -184,6 +187,40 @@ void InstrProfWriter::addRecord(NamedInstrProfRecord &&I, uint64_t Weight,
addRecord(Name, Hash, std::move(I), Weight, Warn);
}
+void InstrProfWriter::overlapRecord(NamedInstrProfRecord &&Other,
+ OverlapStats &Overlap,
+ OverlapStats &FuncLevelOverlap,
+ const OverlapFuncFilters &FuncFilter) {
+ auto Name = Other.Name;
+ auto Hash = Other.Hash;
+ Other.accumuateCounts(FuncLevelOverlap.Test);
+ if (FunctionData.find(Name) == FunctionData.end()) {
+ Overlap.addOneUnique(FuncLevelOverlap.Test);
+ return;
+ }
+ if (FuncLevelOverlap.Test.CountSum < 1.0f) {
+ Overlap.Overlap.NumEntries += 1;
+ return;
+ }
+ auto &ProfileDataMap = FunctionData[Name];
+ bool NewFunc;
+ ProfilingData::iterator Where;
+ std::tie(Where, NewFunc) =
+ ProfileDataMap.insert(std::make_pair(Hash, InstrProfRecord()));
+ if (NewFunc) {
+ Overlap.addOneMismatch(FuncLevelOverlap.Test);
+ return;
+ }
+ InstrProfRecord &Dest = Where->second;
+
+ uint64_t ValueCutoff = FuncFilter.ValueCutoff;
+ if (!FuncFilter.NameFilter.empty() &&
+ Name.find(FuncFilter.NameFilter) != Name.npos)
+ ValueCutoff = 0;
+
+ Dest.overlap(Other, Overlap, FuncLevelOverlap, ValueCutoff);
+}
+
void InstrProfWriter::addRecord(StringRef Name, uint64_t Hash,
InstrProfRecord &&I, uint64_t Weight,
function_ref<void(Error)> Warn) {
@@ -254,6 +291,8 @@ void InstrProfWriter::writeImpl(ProfOStream &OS) {
InstrProfSummaryBuilder ISB(ProfileSummaryBuilder::DefaultCutoffs);
InfoObj->SummaryBuilder = &ISB;
+ InstrProfSummaryBuilder CSISB(ProfileSummaryBuilder::DefaultCutoffs);
+ InfoObj->CSSummaryBuilder = &CSISB;
// Populate the hash table generator.
for (const auto &I : FunctionData)
@@ -265,6 +304,10 @@ void InstrProfWriter::writeImpl(ProfOStream &OS) {
Header.Version = IndexedInstrProf::ProfVersion::CurrentVersion;
if (ProfileKind == PF_IRLevel)
Header.Version |= VARIANT_MASK_IR_PROF;
+ if (ProfileKind == PF_IRLevelWithCS) {
+ Header.Version |= VARIANT_MASK_IR_PROF;
+ Header.Version |= VARIANT_MASK_CSIR_PROF;
+ }
Header.Unused = 0;
Header.HashType = static_cast<uint64_t>(IndexedInstrProf::HashType);
Header.HashOffset = 0;
@@ -288,6 +331,14 @@ void InstrProfWriter::writeImpl(ProfOStream &OS) {
uint64_t SummaryOffset = OS.tell();
for (unsigned I = 0; I < SummarySize / sizeof(uint64_t); I++)
OS.write(0);
+ uint64_t CSSummaryOffset = 0;
+ uint64_t CSSummarySize = 0;
+ if (ProfileKind == PF_IRLevelWithCS) {
+ CSSummaryOffset = OS.tell();
+ CSSummarySize = SummarySize / sizeof(uint64_t);
+ for (unsigned I = 0; I < CSSummarySize; I++)
+ OS.write(0);
+ }
// Write the hash table.
uint64_t HashTableStart = Generator.Emit(OS.OS, *InfoObj);
@@ -301,13 +352,25 @@ void InstrProfWriter::writeImpl(ProfOStream &OS) {
setSummary(TheSummary.get(), *PS);
InfoObj->SummaryBuilder = nullptr;
+ // For Context Sensitive summary.
+ std::unique_ptr<IndexedInstrProf::Summary> TheCSSummary = nullptr;
+ if (ProfileKind == PF_IRLevelWithCS) {
+ TheCSSummary = IndexedInstrProf::allocSummary(SummarySize);
+ std::unique_ptr<ProfileSummary> CSPS = CSISB.getSummary();
+ setSummary(TheCSSummary.get(), *CSPS);
+ }
+ InfoObj->CSSummaryBuilder = nullptr;
+
// Now do the final patch:
PatchItem PatchItems[] = {
// Patch the Header.HashOffset field.
{HashTableStartFieldOffset, &HashTableStart, 1},
// Patch the summary data.
{SummaryOffset, reinterpret_cast<uint64_t *>(TheSummary.get()),
- (int)(SummarySize / sizeof(uint64_t))}};
+ (int)(SummarySize / sizeof(uint64_t))},
+ {CSSummaryOffset, reinterpret_cast<uint64_t *>(TheCSSummary.get()),
+ (int)CSSummarySize}};
+
OS.patch(PatchItems, sizeof(PatchItems) / sizeof(*PatchItems));
}
@@ -328,7 +391,7 @@ std::unique_ptr<MemoryBuffer> InstrProfWriter::writeBuffer() {
}
static const char *ValueProfKindStr[] = {
-#define VALUE_PROF_KIND(Enumerator, Value) #Enumerator,
+#define VALUE_PROF_KIND(Enumerator, Value, Descr) #Enumerator,
#include "llvm/ProfileData/InstrProfData.inc"
};
@@ -376,15 +439,33 @@ void InstrProfWriter::writeRecordInText(StringRef Name, uint64_t Hash,
Error InstrProfWriter::writeText(raw_fd_ostream &OS) {
if (ProfileKind == PF_IRLevel)
OS << "# IR level Instrumentation Flag\n:ir\n";
+ else if (ProfileKind == PF_IRLevelWithCS)
+ OS << "# CSIR level Instrumentation Flag\n:csir\n";
InstrProfSymtab Symtab;
- for (const auto &I : FunctionData)
- if (shouldEncodeData(I.getValue()))
+
+ using FuncPair = detail::DenseMapPair<uint64_t, InstrProfRecord>;
+ using RecordType = std::pair<StringRef, FuncPair>;
+ SmallVector<RecordType, 4> OrderedFuncData;
+
+ for (const auto &I : FunctionData) {
+ if (shouldEncodeData(I.getValue())) {
if (Error E = Symtab.addFuncName(I.getKey()))
return E;
-
- for (const auto &I : FunctionData)
- if (shouldEncodeData(I.getValue()))
for (const auto &Func : I.getValue())
- writeRecordInText(I.getKey(), Func.first, Func.second, Symtab, OS);
+ OrderedFuncData.push_back(std::make_pair(I.getKey(), Func));
+ }
+ }
+
+ llvm::sort(OrderedFuncData, [](const RecordType &A, const RecordType &B) {
+ return std::tie(A.first, A.second.first) <
+ std::tie(B.first, B.second.first);
+ });
+
+ for (const auto &record : OrderedFuncData) {
+ const StringRef &Name = record.first;
+ const FuncPair &Func = record.second;
+ writeRecordInText(Name, Func.first, Func.second, Symtab, OS);
+ }
+
return Error::success();
}
diff --git a/lib/ProfileData/ProfileSummaryBuilder.cpp b/lib/ProfileData/ProfileSummaryBuilder.cpp
index 3a8462fd9b0d..4d5b00935742 100644
--- a/lib/ProfileData/ProfileSummaryBuilder.cpp
+++ b/lib/ProfileData/ProfileSummaryBuilder.cpp
@@ -1,9 +1,8 @@
//=-- ProfilesummaryBuilder.cpp - Profile summary computation ---------------=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -45,12 +44,17 @@ void InstrProfSummaryBuilder::addRecord(const InstrProfRecord &R) {
// To compute the detailed summary, we consider each line containing samples as
// equivalent to a block with a count in the instrumented profile.
void SampleProfileSummaryBuilder::addRecord(
- const sampleprof::FunctionSamples &FS) {
- NumFunctions++;
- if (FS.getHeadSamples() > MaxFunctionCount)
- MaxFunctionCount = FS.getHeadSamples();
+ const sampleprof::FunctionSamples &FS, bool isCallsiteSample) {
+ if (!isCallsiteSample) {
+ NumFunctions++;
+ if (FS.getHeadSamples() > MaxFunctionCount)
+ MaxFunctionCount = FS.getHeadSamples();
+ }
for (const auto &I : FS.getBodySamples())
addCount(I.second.getSamples());
+ for (const auto &I : FS.getCallsiteSamples())
+ for (const auto &CS : I.second)
+ addRecord(CS.second, true);
}
// The argument to this method is a vector of cutoff percentages and the return
diff --git a/lib/ProfileData/SampleProf.cpp b/lib/ProfileData/SampleProf.cpp
index 1a124415f179..e17865cd15a4 100644
--- a/lib/ProfileData/SampleProf.cpp
+++ b/lib/ProfileData/SampleProf.cpp
@@ -1,9 +1,8 @@
//=-- SampleProf.cpp - Sample profiling format support --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/ProfileData/SampleProfReader.cpp b/lib/ProfileData/SampleProfReader.cpp
index a68d1e9d3ab0..192b6c711562 100644
--- a/lib/ProfileData/SampleProfReader.cpp
+++ b/lib/ProfileData/SampleProfReader.cpp
@@ -1,9 +1,8 @@
//===- SampleProfReader.cpp - Read LLVM sample profile data ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -594,8 +593,8 @@ std::error_code SampleProfileReaderCompactBinary::readFuncOffsetTable() {
void SampleProfileReaderCompactBinary::collectFuncsToUse(const Module &M) {
FuncsToUse.clear();
for (auto &F : M) {
- StringRef Fname = F.getName().split('.').first;
- FuncsToUse.insert(Fname);
+ StringRef CanonName = FunctionSamples::getCanonicalFnName(F);
+ FuncsToUse.insert(CanonName);
}
}
diff --git a/lib/ProfileData/SampleProfWriter.cpp b/lib/ProfileData/SampleProfWriter.cpp
index b1c669ec31c4..8b876e0aa5d9 100644
--- a/lib/ProfileData/SampleProfWriter.cpp
+++ b/lib/ProfileData/SampleProfWriter.cpp
@@ -1,9 +1,8 @@
//===- SampleProfWriter.cpp - Write LLVM sample profile data --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -51,9 +50,8 @@ SampleProfileWriter::write(const StringMap<FunctionSamples> &ProfileMap) {
for (const auto &I : ProfileMap)
V.push_back(std::make_pair(I.getKey(), &I.second));
- std::stable_sort(
- V.begin(), V.end(),
- [](const NameFunctionSamples &A, const NameFunctionSamples &B) {
+ llvm::stable_sort(
+ V, [](const NameFunctionSamples &A, const NameFunctionSamples &B) {
if (A.second->getTotalSamples() == B.second->getTotalSamples())
return A.first > B.first;
return A.second->getTotalSamples() > B.second->getTotalSamples();
diff --git a/lib/Remarks/Remark.cpp b/lib/Remarks/Remark.cpp
new file mode 100644
index 000000000000..401ac514b011
--- /dev/null
+++ b/lib/Remarks/Remark.cpp
@@ -0,0 +1,132 @@
+//===- Remark.cpp ---------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of the Remark type and the C API.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Remarks/Remark.h"
+#include "llvm-c/Remarks.h"
+#include "llvm/Support/CBindingWrapping.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+using namespace llvm::remarks;
+
+std::string Remark::getArgsAsMsg() const {
+ std::string Str;
+ raw_string_ostream OS(Str);
+ for (const Argument &Arg : Args)
+ OS << Arg.Val;
+ return OS.str();
+}
+
+// Create wrappers for C Binding types (see CBindingWrapping.h).
+DEFINE_SIMPLE_CONVERSION_FUNCTIONS(StringRef, LLVMRemarkStringRef)
+
+extern "C" const char *LLVMRemarkStringGetData(LLVMRemarkStringRef String) {
+ return unwrap(String)->data();
+}
+
+extern "C" uint32_t LLVMRemarkStringGetLen(LLVMRemarkStringRef String) {
+ return unwrap(String)->size();
+}
+
+extern "C" LLVMRemarkStringRef
+LLVMRemarkDebugLocGetSourceFilePath(LLVMRemarkDebugLocRef DL) {
+ return wrap(&unwrap(DL)->SourceFilePath);
+}
+
+extern "C" uint32_t LLVMRemarkDebugLocGetSourceLine(LLVMRemarkDebugLocRef DL) {
+ return unwrap(DL)->SourceLine;
+}
+
+extern "C" uint32_t
+LLVMRemarkDebugLocGetSourceColumn(LLVMRemarkDebugLocRef DL) {
+ return unwrap(DL)->SourceColumn;
+}
+
+extern "C" LLVMRemarkStringRef LLVMRemarkArgGetKey(LLVMRemarkArgRef Arg) {
+ return wrap(&unwrap(Arg)->Key);
+}
+
+extern "C" LLVMRemarkStringRef LLVMRemarkArgGetValue(LLVMRemarkArgRef Arg) {
+ return wrap(&unwrap(Arg)->Val);
+}
+
+extern "C" LLVMRemarkDebugLocRef
+LLVMRemarkArgGetDebugLoc(LLVMRemarkArgRef Arg) {
+ if (const Optional<RemarkLocation> &Loc = unwrap(Arg)->Loc)
+ return wrap(&*Loc);
+ return nullptr;
+}
+
+extern "C" void LLVMRemarkEntryDispose(LLVMRemarkEntryRef Remark) {
+ delete unwrap(Remark);
+}
+
+extern "C" LLVMRemarkType LLVMRemarkEntryGetType(LLVMRemarkEntryRef Remark) {
+ // Assume here that the enums can be converted both ways.
+ return static_cast<LLVMRemarkType>(unwrap(Remark)->RemarkType);
+}
+
+extern "C" LLVMRemarkStringRef
+LLVMRemarkEntryGetPassName(LLVMRemarkEntryRef Remark) {
+ return wrap(&unwrap(Remark)->PassName);
+}
+
+extern "C" LLVMRemarkStringRef
+LLVMRemarkEntryGetRemarkName(LLVMRemarkEntryRef Remark) {
+ return wrap(&unwrap(Remark)->RemarkName);
+}
+
+extern "C" LLVMRemarkStringRef
+LLVMRemarkEntryGetFunctionName(LLVMRemarkEntryRef Remark) {
+ return wrap(&unwrap(Remark)->FunctionName);
+}
+
+extern "C" LLVMRemarkDebugLocRef
+LLVMRemarkEntryGetDebugLoc(LLVMRemarkEntryRef Remark) {
+ if (const Optional<RemarkLocation> &Loc = unwrap(Remark)->Loc)
+ return wrap(&*Loc);
+ return nullptr;
+}
+
+extern "C" uint64_t LLVMRemarkEntryGetHotness(LLVMRemarkEntryRef Remark) {
+ if (const Optional<uint64_t> &Hotness = unwrap(Remark)->Hotness)
+ return *Hotness;
+ return 0;
+}
+
+extern "C" uint32_t LLVMRemarkEntryGetNumArgs(LLVMRemarkEntryRef Remark) {
+ return unwrap(Remark)->Args.size();
+}
+
+extern "C" LLVMRemarkArgRef
+LLVMRemarkEntryGetFirstArg(LLVMRemarkEntryRef Remark) {
+ ArrayRef<Argument> Args = unwrap(Remark)->Args;
+ // No arguments to iterate on.
+ if (Args.empty())
+ return NULL;
+ return reinterpret_cast<LLVMRemarkArgRef>(
+ const_cast<Argument *>(Args.begin()));
+}
+
+extern "C" LLVMRemarkArgRef
+LLVMRemarkEntryGetNextArg(LLVMRemarkArgRef ArgIt, LLVMRemarkEntryRef Remark) {
+ // No more arguments to iterate on.
+ if (ArgIt == NULL)
+ return NULL;
+
+ auto It = (ArrayRef<Argument>::const_iterator)ArgIt;
+ auto Next = std::next(It);
+ if (Next == unwrap(Remark)->Args.end())
+ return NULL;
+
+ return reinterpret_cast<LLVMRemarkArgRef>(const_cast<Argument *>(Next));
+}
diff --git a/lib/Remarks/RemarkFormat.cpp b/lib/Remarks/RemarkFormat.cpp
new file mode 100644
index 000000000000..bcd0f753ff64
--- /dev/null
+++ b/lib/Remarks/RemarkFormat.cpp
@@ -0,0 +1,30 @@
+//===- RemarkFormat.cpp --------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of utilities to handle the different remark formats.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Remarks/RemarkFormat.h"
+#include "llvm/ADT/StringSwitch.h"
+
+using namespace llvm;
+using namespace llvm::remarks;
+
+Expected<Format> llvm::remarks::parseFormat(StringRef FormatStr) {
+ auto Result = StringSwitch<Format>(FormatStr)
+ .Cases("", "yaml", Format::YAML)
+ .Default(Format::Unknown);
+
+ if (Result == Format::Unknown)
+ return createStringError(std::make_error_code(std::errc::invalid_argument),
+ "Unknown remark serializer format: '%s'",
+ FormatStr.data());
+
+ return Result;
+}
diff --git a/lib/Remarks/RemarkParser.cpp b/lib/Remarks/RemarkParser.cpp
new file mode 100644
index 000000000000..f67464073bd1
--- /dev/null
+++ b/lib/Remarks/RemarkParser.cpp
@@ -0,0 +1,119 @@
+//===- RemarkParser.cpp --------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides utility methods used by clients that want to use the
+// parser for remark diagnostics in LLVM.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Remarks/RemarkParser.h"
+#include "YAMLRemarkParser.h"
+#include "llvm-c/Remarks.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/CBindingWrapping.h"
+
+using namespace llvm;
+using namespace llvm::remarks;
+
+char EndOfFileError::ID = 0;
+
+ParsedStringTable::ParsedStringTable(StringRef InBuffer) : Buffer(InBuffer) {
+ while (!InBuffer.empty()) {
+ // Strings are separated by '\0' bytes.
+ std::pair<StringRef, StringRef> Split = InBuffer.split('\0');
+ // We only store the offset from the beginning of the buffer.
+ Offsets.push_back(Split.first.data() - Buffer.data());
+ InBuffer = Split.second;
+ }
+}
+
+Expected<StringRef> ParsedStringTable::operator[](size_t Index) const {
+ if (Index >= Offsets.size())
+ return createStringError(
+ std::make_error_code(std::errc::invalid_argument),
+ "String with index %u is out of bounds (size = %u).", Index,
+ Offsets.size());
+
+ size_t Offset = Offsets[Index];
+ // If it's the last offset, we can't use the next offset to know the size of
+ // the string.
+ size_t NextOffset =
+ (Index == Offsets.size() - 1) ? Buffer.size() : Offsets[Index + 1];
+ return StringRef(Buffer.data() + Offset, NextOffset - Offset - 1);
+}
+
+Expected<std::unique_ptr<Parser>>
+llvm::remarks::createRemarkParser(Format ParserFormat, StringRef Buf,
+ Optional<const ParsedStringTable *> StrTab) {
+ switch (ParserFormat) {
+ case Format::YAML:
+ return llvm::make_unique<YAMLRemarkParser>(Buf, StrTab);
+ case Format::Unknown:
+ return createStringError(std::make_error_code(std::errc::invalid_argument),
+ "Unknown remark parser format.");
+ }
+ llvm_unreachable("unknown format");
+}
+
+// Wrapper that holds the state needed to interact with the C API.
+struct CParser {
+ std::unique_ptr<Parser> TheParser;
+ Optional<std::string> Err;
+
+ CParser(Format ParserFormat, StringRef Buf,
+ Optional<const ParsedStringTable *> StrTab = None)
+ : TheParser(cantFail(createRemarkParser(ParserFormat, Buf, StrTab))) {}
+
+ void handleError(Error E) { Err.emplace(toString(std::move(E))); }
+ bool hasError() const { return Err.hasValue(); }
+ const char *getMessage() const { return Err ? Err->c_str() : nullptr; };
+};
+
+// Create wrappers for C Binding types (see CBindingWrapping.h).
+DEFINE_SIMPLE_CONVERSION_FUNCTIONS(CParser, LLVMRemarkParserRef)
+
+extern "C" LLVMRemarkParserRef LLVMRemarkParserCreateYAML(const void *Buf,
+ uint64_t Size) {
+ return wrap(new CParser(Format::YAML,
+ StringRef(static_cast<const char *>(Buf), Size)));
+}
+
+extern "C" LLVMRemarkEntryRef
+LLVMRemarkParserGetNext(LLVMRemarkParserRef Parser) {
+ CParser &TheCParser = *unwrap(Parser);
+ remarks::Parser &TheParser = *TheCParser.TheParser;
+
+ Expected<std::unique_ptr<Remark>> MaybeRemark = TheParser.next();
+ if (Error E = MaybeRemark.takeError()) {
+ if (E.isA<EndOfFileError>()) {
+ consumeError(std::move(E));
+ return nullptr;
+ }
+
+ // Handle the error. Allow it to be checked through HasError and
+ // GetErrorMessage.
+ TheCParser.handleError(std::move(E));
+ return nullptr;
+ }
+
+ // Valid remark.
+ return wrap(MaybeRemark->release());
+}
+
+extern "C" LLVMBool LLVMRemarkParserHasError(LLVMRemarkParserRef Parser) {
+ return unwrap(Parser)->hasError();
+}
+
+extern "C" const char *
+LLVMRemarkParserGetErrorMessage(LLVMRemarkParserRef Parser) {
+ return unwrap(Parser)->getMessage();
+}
+
+extern "C" void LLVMRemarkParserDispose(LLVMRemarkParserRef Parser) {
+ delete unwrap(Parser);
+}
diff --git a/lib/Remarks/RemarkStringTable.cpp b/lib/Remarks/RemarkStringTable.cpp
new file mode 100644
index 000000000000..984aa5b33b48
--- /dev/null
+++ b/lib/Remarks/RemarkStringTable.cpp
@@ -0,0 +1,48 @@
+//===- RemarkStringTable.cpp ----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of the Remark string table used at remark generation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Remarks/RemarkStringTable.h"
+#include "llvm/Support/EndianStream.h"
+#include "llvm/Support/Error.h"
+#include <vector>
+
+using namespace llvm;
+using namespace llvm::remarks;
+
+std::pair<unsigned, StringRef> StringTable::add(StringRef Str) {
+ size_t NextID = StrTab.size();
+ auto KV = StrTab.insert({Str, NextID});
+ // If it's a new string, add it to the final size.
+ if (KV.second)
+ SerializedSize += KV.first->first().size() + 1; // +1 for the '\0'
+ // Can be either NextID or the previous ID if the string is already there.
+ return {KV.first->second, KV.first->first()};
+}
+
+void StringTable::serialize(raw_ostream &OS) const {
+ // Emit the number of strings.
+ uint64_t StrTabSize = SerializedSize;
+ support::endian::write(OS, StrTabSize, support::little);
+ // Emit the sequence of strings.
+ for (StringRef Str : serialize()) {
+ OS << Str;
+ // Explicitly emit a '\0'.
+ OS.write('\0');
+ }
+}
+
+std::vector<StringRef> StringTable::serialize() const {
+ std::vector<StringRef> Strings{StrTab.size()};
+ for (const auto &KV : StrTab)
+ Strings[KV.second] = KV.first();
+ return Strings;
+}
diff --git a/lib/Remarks/YAMLRemarkParser.cpp b/lib/Remarks/YAMLRemarkParser.cpp
new file mode 100644
index 000000000000..ed78b7ba5d95
--- /dev/null
+++ b/lib/Remarks/YAMLRemarkParser.cpp
@@ -0,0 +1,327 @@
+//===- YAMLRemarkParser.cpp -----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides utility methods used by clients that want to use the
+// parser for remark diagnostics in LLVM.
+//
+//===----------------------------------------------------------------------===//
+
+#include "YAMLRemarkParser.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/Remarks/RemarkParser.h"
+
+using namespace llvm;
+using namespace llvm::remarks;
+
+char YAMLParseError::ID = 0;
+
+static void handleDiagnostic(const SMDiagnostic &Diag, void *Ctx) {
+ assert(Ctx && "Expected non-null Ctx in diagnostic handler.");
+ std::string &Message = *static_cast<std::string *>(Ctx);
+ assert(Message.empty() && "Expected an empty string.");
+ raw_string_ostream OS(Message);
+ Diag.print(/*ProgName=*/nullptr, OS, /*ShowColors*/ false,
+ /*ShowKindLabels*/ true);
+ OS << '\n';
+ OS.flush();
+}
+
+YAMLParseError::YAMLParseError(StringRef Msg, SourceMgr &SM,
+ yaml::Stream &Stream, yaml::Node &Node) {
+ // 1) Set up a diagnostic handler to avoid errors being printed out to
+ // stderr.
+ // 2) Use the stream to print the error with the associated node.
+ // 3) The stream will use the source manager to print the error, which will
+ // call the diagnostic handler.
+ // 4) The diagnostic handler will stream the error directly into this object's
+ // Message member, which is used when logging is asked for.
+ auto OldDiagHandler = SM.getDiagHandler();
+ auto OldDiagCtx = SM.getDiagContext();
+ SM.setDiagHandler(handleDiagnostic, &Message);
+ Stream.printError(&Node, Twine(Msg) + Twine('\n'));
+ // Restore the old handlers.
+ SM.setDiagHandler(OldDiagHandler, OldDiagCtx);
+}
+
+static SourceMgr setupSM(std::string &LastErrorMessage) {
+ SourceMgr SM;
+ SM.setDiagHandler(handleDiagnostic, &LastErrorMessage);
+ return SM;
+}
+
+YAMLRemarkParser::YAMLRemarkParser(StringRef Buf,
+ Optional<const ParsedStringTable *> StrTab)
+ : Parser{Format::YAML}, StrTab(StrTab), LastErrorMessage(),
+ SM(setupSM(LastErrorMessage)), Stream(Buf, SM), YAMLIt(Stream.begin()) {}
+
+Error YAMLRemarkParser::error(StringRef Message, yaml::Node &Node) {
+ return make_error<YAMLParseError>(Message, SM, Stream, Node);
+}
+
+Error YAMLRemarkParser::error() {
+ if (LastErrorMessage.empty())
+ return Error::success();
+ Error E = make_error<YAMLParseError>(LastErrorMessage);
+ LastErrorMessage.clear();
+ return E;
+}
+
+Expected<std::unique_ptr<Remark>>
+YAMLRemarkParser::parseRemark(yaml::Document &RemarkEntry) {
+ if (Error E = error())
+ return std::move(E);
+
+ yaml::Node *YAMLRoot = RemarkEntry.getRoot();
+ if (!YAMLRoot) {
+ return createStringError(std::make_error_code(std::errc::invalid_argument),
+ "not a valid YAML file.");
+ }
+
+ auto *Root = dyn_cast<yaml::MappingNode>(YAMLRoot);
+ if (!Root)
+ return error("document root is not of mapping type.", *YAMLRoot);
+
+ std::unique_ptr<Remark> Result = llvm::make_unique<Remark>();
+ Remark &TheRemark = *Result;
+
+ // First, the type. It needs special handling since is not part of the
+ // key-value stream.
+ Expected<Type> T = parseType(*Root);
+ if (!T)
+ return T.takeError();
+ else
+ TheRemark.RemarkType = *T;
+
+ // Then, parse the fields, one by one.
+ for (yaml::KeyValueNode &RemarkField : *Root) {
+ Expected<StringRef> MaybeKey = parseKey(RemarkField);
+ if (!MaybeKey)
+ return MaybeKey.takeError();
+ StringRef KeyName = *MaybeKey;
+
+ if (KeyName == "Pass") {
+ if (Expected<StringRef> MaybeStr = parseStr(RemarkField))
+ TheRemark.PassName = *MaybeStr;
+ else
+ return MaybeStr.takeError();
+ } else if (KeyName == "Name") {
+ if (Expected<StringRef> MaybeStr = parseStr(RemarkField))
+ TheRemark.RemarkName = *MaybeStr;
+ else
+ return MaybeStr.takeError();
+ } else if (KeyName == "Function") {
+ if (Expected<StringRef> MaybeStr = parseStr(RemarkField))
+ TheRemark.FunctionName = *MaybeStr;
+ else
+ return MaybeStr.takeError();
+ } else if (KeyName == "Hotness") {
+ if (Expected<unsigned> MaybeU = parseUnsigned(RemarkField))
+ TheRemark.Hotness = *MaybeU;
+ else
+ return MaybeU.takeError();
+ } else if (KeyName == "DebugLoc") {
+ if (Expected<RemarkLocation> MaybeLoc = parseDebugLoc(RemarkField))
+ TheRemark.Loc = *MaybeLoc;
+ else
+ return MaybeLoc.takeError();
+ } else if (KeyName == "Args") {
+ auto *Args = dyn_cast<yaml::SequenceNode>(RemarkField.getValue());
+ if (!Args)
+ return error("wrong value type for key.", RemarkField);
+
+ for (yaml::Node &Arg : *Args) {
+ if (Expected<Argument> MaybeArg = parseArg(Arg))
+ TheRemark.Args.push_back(*MaybeArg);
+ else
+ return MaybeArg.takeError();
+ }
+ } else {
+ return error("unknown key.", RemarkField);
+ }
+ }
+
+ // Check if any of the mandatory fields are missing.
+ if (TheRemark.RemarkType == Type::Unknown || TheRemark.PassName.empty() ||
+ TheRemark.RemarkName.empty() || TheRemark.FunctionName.empty())
+ return error("Type, Pass, Name or Function missing.",
+ *RemarkEntry.getRoot());
+
+ return std::move(Result);
+}
+
+Expected<Type> YAMLRemarkParser::parseType(yaml::MappingNode &Node) {
+ auto Type = StringSwitch<remarks::Type>(Node.getRawTag())
+ .Case("!Passed", remarks::Type::Passed)
+ .Case("!Missed", remarks::Type::Missed)
+ .Case("!Analysis", remarks::Type::Analysis)
+ .Case("!AnalysisFPCommute", remarks::Type::AnalysisFPCommute)
+ .Case("!AnalysisAliasing", remarks::Type::AnalysisAliasing)
+ .Case("!Failure", remarks::Type::Failure)
+ .Default(remarks::Type::Unknown);
+ if (Type == remarks::Type::Unknown)
+ return error("expected a remark tag.", Node);
+ return Type;
+}
+
+Expected<StringRef> YAMLRemarkParser::parseKey(yaml::KeyValueNode &Node) {
+ if (auto *Key = dyn_cast<yaml::ScalarNode>(Node.getKey()))
+ return Key->getRawValue();
+
+ return error("key is not a string.", Node);
+}
+
+Expected<StringRef> YAMLRemarkParser::parseStr(yaml::KeyValueNode &Node) {
+ auto *Value = dyn_cast<yaml::ScalarNode>(Node.getValue());
+ if (!Value)
+ return error("expected a value of scalar type.", Node);
+ StringRef Result;
+ if (!StrTab) {
+ Result = Value->getRawValue();
+ } else {
+ // If we have a string table, parse it as an unsigned.
+ unsigned StrID = 0;
+ if (Expected<unsigned> MaybeStrID = parseUnsigned(Node))
+ StrID = *MaybeStrID;
+ else
+ return MaybeStrID.takeError();
+
+ if (Expected<StringRef> Str = (**StrTab)[StrID])
+ Result = *Str;
+ else
+ return Str.takeError();
+ }
+
+ if (Result.front() == '\'')
+ Result = Result.drop_front();
+
+ if (Result.back() == '\'')
+ Result = Result.drop_back();
+
+ return Result;
+}
+
+Expected<unsigned> YAMLRemarkParser::parseUnsigned(yaml::KeyValueNode &Node) {
+ SmallVector<char, 4> Tmp;
+ auto *Value = dyn_cast<yaml::ScalarNode>(Node.getValue());
+ if (!Value)
+ return error("expected a value of scalar type.", Node);
+ unsigned UnsignedValue = 0;
+ if (Value->getValue(Tmp).getAsInteger(10, UnsignedValue))
+ return error("expected a value of integer type.", *Value);
+ return UnsignedValue;
+}
+
+Expected<RemarkLocation>
+YAMLRemarkParser::parseDebugLoc(yaml::KeyValueNode &Node) {
+ auto *DebugLoc = dyn_cast<yaml::MappingNode>(Node.getValue());
+ if (!DebugLoc)
+ return error("expected a value of mapping type.", Node);
+
+ Optional<StringRef> File;
+ Optional<unsigned> Line;
+ Optional<unsigned> Column;
+
+ for (yaml::KeyValueNode &DLNode : *DebugLoc) {
+ Expected<StringRef> MaybeKey = parseKey(DLNode);
+ if (!MaybeKey)
+ return MaybeKey.takeError();
+ StringRef KeyName = *MaybeKey;
+
+ if (KeyName == "File") {
+ if (Expected<StringRef> MaybeStr = parseStr(DLNode))
+ File = *MaybeStr;
+ else
+ return MaybeStr.takeError();
+ } else if (KeyName == "Column") {
+ if (Expected<unsigned> MaybeU = parseUnsigned(DLNode))
+ Column = *MaybeU;
+ else
+ return MaybeU.takeError();
+ } else if (KeyName == "Line") {
+ if (Expected<unsigned> MaybeU = parseUnsigned(DLNode))
+ Line = *MaybeU;
+ else
+ return MaybeU.takeError();
+ } else {
+ return error("unknown entry in DebugLoc map.", DLNode);
+ }
+ }
+
+ // If any of the debug loc fields is missing, return an error.
+ if (!File || !Line || !Column)
+ return error("DebugLoc node incomplete.", Node);
+
+ return RemarkLocation{*File, *Line, *Column};
+}
+
+Expected<Argument> YAMLRemarkParser::parseArg(yaml::Node &Node) {
+ auto *ArgMap = dyn_cast<yaml::MappingNode>(&Node);
+ if (!ArgMap)
+ return error("expected a value of mapping type.", Node);
+
+ Optional<StringRef> KeyStr;
+ Optional<StringRef> ValueStr;
+ Optional<RemarkLocation> Loc;
+
+ for (yaml::KeyValueNode &ArgEntry : *ArgMap) {
+ Expected<StringRef> MaybeKey = parseKey(ArgEntry);
+ if (!MaybeKey)
+ return MaybeKey.takeError();
+ StringRef KeyName = *MaybeKey;
+
+ // Try to parse debug locs.
+ if (KeyName == "DebugLoc") {
+ // Can't have multiple DebugLoc entries per argument.
+ if (Loc)
+ return error("only one DebugLoc entry is allowed per argument.",
+ ArgEntry);
+
+ if (Expected<RemarkLocation> MaybeLoc = parseDebugLoc(ArgEntry)) {
+ Loc = *MaybeLoc;
+ continue;
+ } else
+ return MaybeLoc.takeError();
+ }
+
+ // If we already have a string, error out.
+ if (ValueStr)
+ return error("only one string entry is allowed per argument.", ArgEntry);
+
+ // Try to parse the value.
+ if (Expected<StringRef> MaybeStr = parseStr(ArgEntry))
+ ValueStr = *MaybeStr;
+ else
+ return MaybeStr.takeError();
+
+ // Keep the key from the string.
+ KeyStr = KeyName;
+ }
+
+ if (!KeyStr)
+ return error("argument key is missing.", *ArgMap);
+ if (!ValueStr)
+ return error("argument value is missing.", *ArgMap);
+
+ return Argument{*KeyStr, *ValueStr, Loc};
+}
+
+Expected<std::unique_ptr<Remark>> YAMLRemarkParser::next() {
+ if (YAMLIt == Stream.end())
+ return make_error<EndOfFileError>();
+
+ Expected<std::unique_ptr<Remark>> MaybeResult = parseRemark(*YAMLIt);
+ if (!MaybeResult) {
+ // Avoid garbage input, set the iterator to the end.
+ YAMLIt = Stream.end();
+ return MaybeResult.takeError();
+ }
+
+ ++YAMLIt;
+
+ return std::move(*MaybeResult);
+}
diff --git a/lib/Remarks/YAMLRemarkParser.h b/lib/Remarks/YAMLRemarkParser.h
new file mode 100644
index 000000000000..cea76e63e75c
--- /dev/null
+++ b/lib/Remarks/YAMLRemarkParser.h
@@ -0,0 +1,96 @@
+//===-- YAMLRemarkParser.h - Parser for YAML remarks ------------*- C++/-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the impementation of the YAML remark parser.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_REMARKS_YAML_REMARK_PARSER_H
+#define LLVM_REMARKS_YAML_REMARK_PARSER_H
+
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Remarks/Remark.h"
+#include "llvm/Remarks/RemarkParser.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/YAMLParser.h"
+#include "llvm/Support/YAMLTraits.h"
+#include "llvm/Support/raw_ostream.h"
+#include <string>
+
+namespace llvm {
+namespace remarks {
+
+class YAMLParseError : public ErrorInfo<YAMLParseError> {
+public:
+ static char ID;
+
+ YAMLParseError(StringRef Message, SourceMgr &SM, yaml::Stream &Stream,
+ yaml::Node &Node);
+
+ YAMLParseError(StringRef Message) : Message(Message) {}
+
+ void log(raw_ostream &OS) const override { OS << Message; }
+ std::error_code convertToErrorCode() const override {
+ return inconvertibleErrorCode();
+ }
+
+private:
+ std::string Message;
+};
+
+/// Regular YAML to Remark parser.
+struct YAMLRemarkParser : public Parser {
+ /// The string table used for parsing strings.
+ Optional<const ParsedStringTable *> StrTab;
+ /// Last error message that can come from the YAML parser diagnostics.
+ /// We need this for catching errors in the constructor.
+ std::string LastErrorMessage;
+ /// Source manager for better error messages.
+ SourceMgr SM;
+ /// Stream for yaml parsing.
+ yaml::Stream Stream;
+ /// Iterator in the YAML stream.
+ yaml::document_iterator YAMLIt;
+
+ YAMLRemarkParser(StringRef Buf,
+ Optional<const ParsedStringTable *> StrTab = None);
+
+ Expected<std::unique_ptr<Remark>> next() override;
+
+ static bool classof(const Parser *P) {
+ return P->ParserFormat == Format::YAML;
+ }
+
+private:
+ /// Create a YAMLParseError error from an existing error generated by the YAML
+ /// parser.
+ /// If there is no error, this returns Success.
+ Error error();
+ /// Create a YAMLParseError error referencing a specific node.
+ Error error(StringRef Message, yaml::Node &Node);
+ /// Parse a YAML remark to a remarks::Remark object.
+ Expected<std::unique_ptr<Remark>> parseRemark(yaml::Document &Remark);
+ /// Parse the type of a remark to an enum type.
+ Expected<Type> parseType(yaml::MappingNode &Node);
+ /// Parse one key to a string.
+ Expected<StringRef> parseKey(yaml::KeyValueNode &Node);
+ /// Parse one value to a string.
+ Expected<StringRef> parseStr(yaml::KeyValueNode &Node);
+ /// Parse one value to an unsigned.
+ Expected<unsigned> parseUnsigned(yaml::KeyValueNode &Node);
+ /// Parse a debug location.
+ Expected<RemarkLocation> parseDebugLoc(yaml::KeyValueNode &Node);
+ /// Parse an argument.
+ Expected<Argument> parseArg(yaml::Node &Node);
+};
+} // end namespace remarks
+} // end namespace llvm
+
+#endif /* LLVM_REMARKS_YAML_REMARK_PARSER_H */
diff --git a/lib/Remarks/YAMLRemarkSerializer.cpp b/lib/Remarks/YAMLRemarkSerializer.cpp
new file mode 100644
index 000000000000..d64ae8e12ab0
--- /dev/null
+++ b/lib/Remarks/YAMLRemarkSerializer.cpp
@@ -0,0 +1,167 @@
+//===- YAMLRemarkSerializer.cpp -------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the implementation of the YAML remark serializer using
+// LLVM's YAMLTraits.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Remarks/RemarkSerializer.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+using namespace llvm::remarks;
+
+cl::opt<bool> RemarksYAMLStringTable(
+ "remarks-yaml-string-table", cl::init(false), cl::Hidden,
+ cl::desc("Enable the usage of a string table with YAML remarks."));
+
+// Use the same keys whether we use a string table or not (respectively, T is an
+// unsigned or a StringRef).
+template <typename T>
+static void mapRemarkHeader(yaml::IO &io, T PassName, T RemarkName,
+ Optional<RemarkLocation> RL, T FunctionName,
+ Optional<uint64_t> Hotness,
+ ArrayRef<Argument> Args) {
+ io.mapRequired("Pass", PassName);
+ io.mapRequired("Name", RemarkName);
+ io.mapOptional("DebugLoc", RL);
+ io.mapRequired("Function", FunctionName);
+ io.mapOptional("Hotness", Hotness);
+ io.mapOptional("Args", Args);
+}
+
+namespace llvm {
+namespace yaml {
+
+template <> struct MappingTraits<remarks::Remark *> {
+ static void mapping(IO &io, remarks::Remark *&Remark) {
+ assert(io.outputting() && "input not yet implemented");
+
+ if (io.mapTag("!Passed", (Remark->RemarkType == Type::Passed)))
+ ;
+ else if (io.mapTag("!Missed", (Remark->RemarkType == Type::Missed)))
+ ;
+ else if (io.mapTag("!Analysis", (Remark->RemarkType == Type::Analysis)))
+ ;
+ else if (io.mapTag("!AnalysisFPCommute",
+ (Remark->RemarkType == Type::AnalysisFPCommute)))
+ ;
+ else if (io.mapTag("!AnalysisAliasing",
+ (Remark->RemarkType == Type::AnalysisAliasing)))
+ ;
+ else if (io.mapTag("!Failure", (Remark->RemarkType == Type::Failure)))
+ ;
+ else
+ llvm_unreachable("Unknown remark type");
+
+ if (Optional<StringTable> &StrTab =
+ reinterpret_cast<YAMLSerializer *>(io.getContext())->StrTab) {
+ unsigned PassID = StrTab->add(Remark->PassName).first;
+ unsigned NameID = StrTab->add(Remark->RemarkName).first;
+ unsigned FunctionID = StrTab->add(Remark->FunctionName).first;
+ mapRemarkHeader(io, PassID, NameID, Remark->Loc, FunctionID,
+ Remark->Hotness, Remark->Args);
+ } else {
+ mapRemarkHeader(io, Remark->PassName, Remark->RemarkName, Remark->Loc,
+ Remark->FunctionName, Remark->Hotness, Remark->Args);
+ }
+ }
+};
+
+template <> struct MappingTraits<RemarkLocation> {
+ static void mapping(IO &io, RemarkLocation &RL) {
+ assert(io.outputting() && "input not yet implemented");
+
+ StringRef File = RL.SourceFilePath;
+ unsigned Line = RL.SourceLine;
+ unsigned Col = RL.SourceColumn;
+
+ if (Optional<StringTable> &StrTab =
+ reinterpret_cast<YAMLSerializer *>(io.getContext())->StrTab) {
+ unsigned FileID = StrTab->add(File).first;
+ io.mapRequired("File", FileID);
+ } else {
+ io.mapRequired("File", File);
+ }
+
+ io.mapRequired("Line", Line);
+ io.mapRequired("Column", Col);
+ }
+
+ static const bool flow = true;
+};
+
+/// Helper struct for multiline string block literals. Use this type to preserve
+/// newlines in strings.
+struct StringBlockVal {
+ StringRef Value;
+ StringBlockVal(const std::string &Value) : Value(Value) {}
+};
+
+template <> struct BlockScalarTraits<StringBlockVal> {
+ static void output(const StringBlockVal &S, void *Ctx, raw_ostream &OS) {
+ return ScalarTraits<StringRef>::output(S.Value, Ctx, OS);
+ }
+
+ static StringRef input(StringRef Scalar, void *Ctx, StringBlockVal &S) {
+ return ScalarTraits<StringRef>::input(Scalar, Ctx, S.Value);
+ }
+};
+
+/// ArrayRef is not really compatible with the YAMLTraits. Everything should be
+/// immutable in an ArrayRef, while the SequenceTraits expect a mutable version
+/// for inputting, but we're only using the outputting capabilities here.
+/// This is a hack, but still nicer than having to manually call the YAMLIO
+/// internal methods.
+/// Keep this in this file so that it doesn't get misused from YAMLTraits.h.
+template <typename T> struct SequenceTraits<ArrayRef<T>> {
+ static size_t size(IO &io, ArrayRef<T> &seq) { return seq.size(); }
+ static Argument &element(IO &io, ArrayRef<T> &seq, size_t index) {
+ assert(io.outputting() && "input not yet implemented");
+ // The assert above should make this "safer" to satisfy the YAMLTraits.
+ return const_cast<T &>(seq[index]);
+ }
+};
+
+/// Implement this as a mapping for now to get proper quotation for the value.
+template <> struct MappingTraits<Argument> {
+ static void mapping(IO &io, Argument &A) {
+ assert(io.outputting() && "input not yet implemented");
+
+ if (Optional<StringTable> &StrTab =
+ reinterpret_cast<YAMLSerializer *>(io.getContext())->StrTab) {
+ auto ValueID = StrTab->add(A.Val).first;
+ io.mapRequired(A.Key.data(), ValueID);
+ } else if (StringRef(A.Val).count('\n') > 1) {
+ StringBlockVal S(A.Val);
+ io.mapRequired(A.Key.data(), S);
+ } else {
+ io.mapRequired(A.Key.data(), A.Val);
+ }
+ io.mapOptional("DebugLoc", A.Loc);
+ }
+};
+
+} // end namespace yaml
+} // end namespace llvm
+
+LLVM_YAML_IS_SEQUENCE_VECTOR(Argument)
+
+YAMLSerializer::YAMLSerializer(raw_ostream &OS, UseStringTable UseStringTable)
+ : Serializer(OS), YAMLOutput(OS, reinterpret_cast<void *>(this)) {
+ if (UseStringTable == remarks::UseStringTable::Yes || RemarksYAMLStringTable)
+ StrTab.emplace();
+}
+
+void YAMLSerializer::emit(const Remark &Remark) {
+ // Again, YAMLTraits expect a non-const object for inputting, but we're not
+ // using that here.
+ auto R = const_cast<remarks::Remark *>(&Remark);
+ YAMLOutput << R;
+}
diff --git a/lib/Support/AArch64TargetParser.cpp b/lib/Support/AArch64TargetParser.cpp
index e897137df680..df4caa1f07fd 100644
--- a/lib/Support/AArch64TargetParser.cpp
+++ b/lib/Support/AArch64TargetParser.cpp
@@ -1,9 +1,8 @@
//===-- AArch64TargetParser - Parser for AArch64 features -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -89,6 +88,16 @@ bool AArch64::getExtensionFeatures(unsigned Extensions,
Features.push_back("+rdm");
if (Extensions & AEK_SVE)
Features.push_back("+sve");
+ if (Extensions & AEK_SVE2)
+ Features.push_back("+sve2");
+ if (Extensions & AEK_SVE2AES)
+ Features.push_back("+sve2-aes");
+ if (Extensions & AEK_SVE2SM4)
+ Features.push_back("+sve2-sm4");
+ if (Extensions & AEK_SVE2SHA3)
+ Features.push_back("+sve2-sha3");
+ if (Extensions & AEK_BITPERM)
+ Features.push_back("+bitperm");
if (Extensions & AEK_RCPC)
Features.push_back("+rcpc");
diff --git a/lib/Support/AMDGPUMetadata.cpp b/lib/Support/AMDGPUMetadata.cpp
index a04bfc2ea299..5f8102299f47 100644
--- a/lib/Support/AMDGPUMetadata.cpp
+++ b/lib/Support/AMDGPUMetadata.cpp
@@ -1,9 +1,8 @@
//===--- AMDGPUMetadata.cpp -------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -66,6 +65,8 @@ struct ScalarEnumerationTraits<ValueKind> {
YIO.enumCase(EN, "HiddenDefaultQueue", ValueKind::HiddenDefaultQueue);
YIO.enumCase(EN, "HiddenCompletionAction",
ValueKind::HiddenCompletionAction);
+ YIO.enumCase(EN, "HiddenMultiGridSyncArg",
+ ValueKind::HiddenMultiGridSyncArg);
}
};
@@ -219,19 +220,5 @@ std::error_code toString(Metadata HSAMetadata, std::string &String) {
}
} // end namespace HSAMD
-
-namespace PALMD {
-
-std::error_code toString(const Metadata &PALMetadata, std::string &String) {
- raw_string_ostream Stream(String);
- for (auto I = PALMetadata.begin(), E = PALMetadata.end(); I != E; ++I) {
- Stream << Twine(I == PALMetadata.begin() ? " 0x" : ",0x");
- Stream << Twine::utohexstr(*I);
- }
- Stream.flush();
- return std::error_code();
-}
-
-} // end namespace PALMD
} // end namespace AMDGPU
} // end namespace llvm
diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp
index e9e429c8031b..b79baf1834a7 100644
--- a/lib/Support/APFloat.cpp
+++ b/lib/Support/APFloat.cpp
@@ -1,9 +1,8 @@
//===-- APFloat.cpp - Implement APFloat class -----------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -114,6 +113,42 @@ namespace llvm {
static const fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53,
53 + 53, 128};
+ const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) {
+ switch (S) {
+ case S_IEEEhalf:
+ return IEEEhalf();
+ case S_IEEEsingle:
+ return IEEEsingle();
+ case S_IEEEdouble:
+ return IEEEdouble();
+ case S_x87DoubleExtended:
+ return x87DoubleExtended();
+ case S_IEEEquad:
+ return IEEEquad();
+ case S_PPCDoubleDouble:
+ return PPCDoubleDouble();
+ }
+ llvm_unreachable("Unrecognised floating semantics");
+ }
+
+ APFloatBase::Semantics
+ APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) {
+ if (&Sem == &llvm::APFloat::IEEEhalf())
+ return S_IEEEhalf;
+ else if (&Sem == &llvm::APFloat::IEEEsingle())
+ return S_IEEEsingle;
+ else if (&Sem == &llvm::APFloat::IEEEdouble())
+ return S_IEEEdouble;
+ else if (&Sem == &llvm::APFloat::x87DoubleExtended())
+ return S_x87DoubleExtended;
+ else if (&Sem == &llvm::APFloat::IEEEquad())
+ return S_IEEEquad;
+ else if (&Sem == &llvm::APFloat::PPCDoubleDouble())
+ return S_PPCDoubleDouble;
+ else
+ llvm_unreachable("Unknown floating semantics");
+ }
+
const fltSemantics &APFloatBase::IEEEhalf() {
return semIEEEhalf;
}
@@ -199,7 +234,10 @@ readExponent(StringRef::iterator begin, StringRef::iterator end)
const unsigned int overlargeExponent = 24000; /* FIXME. */
StringRef::iterator p = begin;
- assert(p != end && "Exponent has no digits");
+ // Treat no exponent as 0 to match binutils
+ if (p == end || ((*p == '-' || *p == '+') && (p + 1) == end)) {
+ return 0;
+ }
isNegative = (*p == '-');
if (*p == '-' || *p == '+') {
@@ -4416,8 +4454,9 @@ APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) {
return;
}
if (usesLayout<DoubleAPFloat>(Semantics)) {
+ const fltSemantics& S = F.getSemantics();
new (&Double)
- DoubleAPFloat(Semantics, APFloat(std::move(F), F.getSemantics()),
+ DoubleAPFloat(Semantics, APFloat(std::move(F), S),
APFloat(semIEEEdouble));
return;
}
diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp
index a5f4f98c489a..43173311cd80 100644
--- a/lib/Support/APInt.cpp
+++ b/lib/Support/APInt.cpp
@@ -1,9 +1,8 @@
//===-- APInt.cpp - Implement APInt class ---------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -483,10 +482,13 @@ unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
APInt tmp(sufficient, StringRef(p, slen), radix);
// Compute how many bits are required. If the log is infinite, assume we need
- // just bit.
+ // just bit. If the log is exact and value is negative, then the value is
+ // MinSignedValue with (log + 1) bits.
unsigned log = tmp.logBase2();
if (log == (unsigned)-1) {
return isNegative + 1;
+ } else if (isNegative && tmp.isPowerOf2()) {
+ return isNegative + log;
} else {
return isNegative + log + 1;
}
@@ -1096,6 +1098,8 @@ APInt APInt::sqrt() const {
/// however we simplify it to speed up calculating only the inverse, and take
/// advantage of div+rem calculations. We also use some tricks to avoid copying
/// (potentially large) APInts around.
+/// WARNING: a value of '0' may be returned,
+/// signifying that no multiplicative inverse exists!
APInt APInt::multiplicativeInverse(const APInt& modulo) const {
assert(ult(modulo) && "This APInt must be smaller than the modulo");
@@ -1915,12 +1919,19 @@ APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const {
}
APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const {
- APInt Res = *this * RHS;
+ if (countLeadingZeros() + RHS.countLeadingZeros() + 2 <= BitWidth) {
+ Overflow = true;
+ return *this * RHS;
+ }
- if (*this != 0 && RHS != 0)
- Overflow = Res.udiv(RHS) != *this || Res.udiv(*this) != RHS;
- else
- Overflow = false;
+ APInt Res = lshr(1) * RHS;
+ Overflow = Res.isNegative();
+ Res <<= 1;
+ if ((*this)[0]) {
+ Res += RHS;
+ if (Res.ult(RHS))
+ Overflow = true;
+ }
return Res;
}
@@ -2923,3 +2934,56 @@ llvm::APIntOps::SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
LLVM_DEBUG(dbgs() << __func__ << ": solution (wrap): " << X << '\n');
return X;
}
+
+/// StoreIntToMemory - Fills the StoreBytes bytes of memory starting from Dst
+/// with the integer held in IntVal.
+void llvm::StoreIntToMemory(const APInt &IntVal, uint8_t *Dst,
+ unsigned StoreBytes) {
+ assert((IntVal.getBitWidth()+7)/8 >= StoreBytes && "Integer too small!");
+ const uint8_t *Src = (const uint8_t *)IntVal.getRawData();
+
+ if (sys::IsLittleEndianHost) {
+ // Little-endian host - the source is ordered from LSB to MSB. Order the
+ // destination from LSB to MSB: Do a straight copy.
+ memcpy(Dst, Src, StoreBytes);
+ } else {
+ // Big-endian host - the source is an array of 64 bit words ordered from
+ // LSW to MSW. Each word is ordered from MSB to LSB. Order the destination
+ // from MSB to LSB: Reverse the word order, but not the bytes in a word.
+ while (StoreBytes > sizeof(uint64_t)) {
+ StoreBytes -= sizeof(uint64_t);
+ // May not be aligned so use memcpy.
+ memcpy(Dst + StoreBytes, Src, sizeof(uint64_t));
+ Src += sizeof(uint64_t);
+ }
+
+ memcpy(Dst, Src + sizeof(uint64_t) - StoreBytes, StoreBytes);
+ }
+}
+
+/// LoadIntFromMemory - Loads the integer stored in the LoadBytes bytes starting
+/// from Src into IntVal, which is assumed to be wide enough and to hold zero.
+void llvm::LoadIntFromMemory(APInt &IntVal, uint8_t *Src, unsigned LoadBytes) {
+ assert((IntVal.getBitWidth()+7)/8 >= LoadBytes && "Integer too small!");
+ uint8_t *Dst = reinterpret_cast<uint8_t *>(
+ const_cast<uint64_t *>(IntVal.getRawData()));
+
+ if (sys::IsLittleEndianHost)
+ // Little-endian host - the destination must be ordered from LSB to MSB.
+ // The source is ordered from LSB to MSB: Do a straight copy.
+ memcpy(Dst, Src, LoadBytes);
+ else {
+ // Big-endian - the destination is an array of 64 bit words ordered from
+ // LSW to MSW. Each word must be ordered from MSB to LSB. The source is
+ // ordered from MSB to LSB: Reverse the word order, but not the bytes in
+ // a word.
+ while (LoadBytes > sizeof(uint64_t)) {
+ LoadBytes -= sizeof(uint64_t);
+ // May not be aligned so use memcpy.
+ memcpy(Dst, Src + LoadBytes, sizeof(uint64_t));
+ Dst += sizeof(uint64_t);
+ }
+
+ memcpy(Dst + sizeof(uint64_t) - LoadBytes, Src, LoadBytes);
+ }
+}
diff --git a/lib/Support/APSInt.cpp b/lib/Support/APSInt.cpp
index 46c0f70ff66b..7c48880f96ea 100644
--- a/lib/Support/APSInt.cpp
+++ b/lib/Support/APSInt.cpp
@@ -1,9 +1,8 @@
//===-- llvm/ADT/APSInt.cpp - Arbitrary Precision Signed Int ---*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -23,18 +22,18 @@ APSInt::APSInt(StringRef Str) {
// (Over-)estimate the required number of bits.
unsigned NumBits = ((Str.size() * 64) / 19) + 2;
- APInt Tmp(NumBits, Str, /*Radix=*/10);
+ APInt Tmp(NumBits, Str, /*radix=*/10);
if (Str[0] == '-') {
unsigned MinBits = Tmp.getMinSignedBits();
if (MinBits > 0 && MinBits < NumBits)
Tmp = Tmp.trunc(MinBits);
- *this = APSInt(Tmp, /*IsUnsigned=*/false);
+ *this = APSInt(Tmp, /*isUnsigned=*/false);
return;
}
unsigned ActiveBits = Tmp.getActiveBits();
if (ActiveBits > 0 && ActiveBits < NumBits)
Tmp = Tmp.trunc(ActiveBits);
- *this = APSInt(Tmp, /*IsUnsigned=*/true);
+ *this = APSInt(Tmp, /*isUnsigned=*/true);
}
void APSInt::Profile(FoldingSetNodeID& ID) const {
diff --git a/lib/Support/ARMAttributeParser.cpp b/lib/Support/ARMAttributeParser.cpp
index 1f98ac2f40ba..df50fff720cd 100644
--- a/lib/Support/ARMAttributeParser.cpp
+++ b/lib/Support/ARMAttributeParser.cpp
@@ -1,9 +1,8 @@
//===--- ARMAttributeParser.cpp - ARM Attribute Information Printer -------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -38,6 +37,7 @@ ARMAttributeParser::DisplayRoutines[] = {
ATTRIBUTE_HANDLER(FP_arch),
ATTRIBUTE_HANDLER(WMMX_arch),
ATTRIBUTE_HANDLER(Advanced_SIMD_arch),
+ ATTRIBUTE_HANDLER(MVE_arch),
ATTRIBUTE_HANDLER(PCS_config),
ATTRIBUTE_HANDLER(ABI_PCS_R9_use),
ATTRIBUTE_HANDLER(ABI_PCS_RW_data),
@@ -133,7 +133,9 @@ void ARMAttributeParser::CPU_arch(AttrType Tag, const uint8_t *Data,
static const char *const Strings[] = {
"Pre-v4", "ARM v4", "ARM v4T", "ARM v5T", "ARM v5TE", "ARM v5TEJ", "ARM v6",
"ARM v6KZ", "ARM v6T2", "ARM v6K", "ARM v7", "ARM v6-M", "ARM v6S-M",
- "ARM v7E-M", "ARM v8"
+ "ARM v7E-M", "ARM v8", nullptr,
+ "ARM v8-M Baseline", "ARM v8-M Mainline", nullptr, nullptr, nullptr,
+ "ARM v8.1-M Mainline"
};
uint64_t Value = ParseInteger(Data, Offset);
@@ -214,6 +216,18 @@ void ARMAttributeParser::Advanced_SIMD_arch(AttrType Tag, const uint8_t *Data,
PrintAttribute(Tag, Value, ValueDesc);
}
+void ARMAttributeParser::MVE_arch(AttrType Tag, const uint8_t *Data,
+ uint32_t &Offset) {
+ static const char *const Strings[] = {
+ "Not Permitted", "MVE integer", "MVE integer and float"
+ };
+
+ uint64_t Value = ParseInteger(Data, Offset);
+ StringRef ValueDesc =
+ (Value < array_lengthof(Strings)) ? Strings[Value] : nullptr;
+ PrintAttribute(Tag, Value, ValueDesc);
+}
+
void ARMAttributeParser::PCS_config(AttrType Tag, const uint8_t *Data,
uint32_t &Offset) {
static const char *const Strings[] = {
@@ -682,7 +696,7 @@ void ARMAttributeParser::ParseSubsection(const uint8_t *Data, uint32_t Length) {
}
void ARMAttributeParser::Parse(ArrayRef<uint8_t> Section, bool isLittle) {
- size_t Offset = 1;
+ uint64_t Offset = 1;
unsigned SectionNumber = 0;
while (Offset < Section.size()) {
@@ -695,6 +709,12 @@ void ARMAttributeParser::Parse(ArrayRef<uint8_t> Section, bool isLittle) {
SW->indent();
}
+ if (SectionLength == 0 || (SectionLength + Offset) > Section.size()) {
+ errs() << "invalid subsection length " << SectionLength << " at offset "
+ << Offset << "\n";
+ return;
+ }
+
ParseSubsection(Section.data() + Offset, SectionLength);
Offset = Offset + SectionLength;
diff --git a/lib/Support/ARMBuildAttrs.cpp b/lib/Support/ARMBuildAttrs.cpp
index 8f18e9eb24ed..d0c4fb792cb8 100644
--- a/lib/Support/ARMBuildAttrs.cpp
+++ b/lib/Support/ARMBuildAttrs.cpp
@@ -1,9 +1,8 @@
//===-- ARMBuildAttrs.cpp - ARM Build Attributes --------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -29,6 +28,7 @@ const struct {
{ ARMBuildAttrs::FP_arch, "Tag_FP_arch" },
{ ARMBuildAttrs::WMMX_arch, "Tag_WMMX_arch" },
{ ARMBuildAttrs::Advanced_SIMD_arch, "Tag_Advanced_SIMD_arch" },
+ { ARMBuildAttrs::MVE_arch, "Tag_MVE_arch" },
{ ARMBuildAttrs::PCS_config, "Tag_PCS_config" },
{ ARMBuildAttrs::ABI_PCS_R9_use, "Tag_ABI_PCS_R9_use" },
{ ARMBuildAttrs::ABI_PCS_RW_data, "Tag_ABI_PCS_RW_data" },
diff --git a/lib/Support/ARMTargetParser.cpp b/lib/Support/ARMTargetParser.cpp
index 07294b0c09a3..be948cfc95d4 100644
--- a/lib/Support/ARMTargetParser.cpp
+++ b/lib/Support/ARMTargetParser.cpp
@@ -1,9 +1,8 @@
//===-- ARMTargetParser - Parser for ARM target features --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -78,6 +77,7 @@ unsigned ARM::parseArchVersion(StringRef Arch) {
case ArchKind::ARMV8R:
case ArchKind::ARMV8MBaseline:
case ArchKind::ARMV8MMainline:
+ case ArchKind::ARMV8_1MMainline:
return 8;
case ArchKind::INVALID:
return 0;
@@ -94,6 +94,7 @@ ARM::ProfileKind ARM::parseArchProfile(StringRef Arch) {
case ArchKind::ARMV7EM:
case ArchKind::ARMV8MMainline:
case ArchKind::ARMV8MBaseline:
+ case ArchKind::ARMV8_1MMainline:
return ProfileKind::M;
case ArchKind::ARMV7R:
case ArchKind::ARMV8R:
@@ -152,6 +153,7 @@ StringRef ARM::getArchSynonym(StringRef Arch) {
.Case("v8r", "v8-r")
.Case("v8m.base", "v8-m.base")
.Case("v8m.main", "v8-m.main")
+ .Case("v8.1m.main", "v8.1-m.main")
.Default(Arch);
}
@@ -160,77 +162,63 @@ bool ARM::getFPUFeatures(unsigned FPUKind, std::vector<StringRef> &Features) {
if (FPUKind >= FK_LAST || FPUKind == FK_INVALID)
return false;
- // fp-only-sp and d16 subtarget features are independent of each other, so we
- // must enable/disable both.
- switch (FPUNames[FPUKind].Restriction) {
- case FPURestriction::SP_D16:
- Features.push_back("+fp-only-sp");
- Features.push_back("+d16");
- break;
- case FPURestriction::D16:
- Features.push_back("-fp-only-sp");
- Features.push_back("+d16");
- break;
- case FPURestriction::None:
- Features.push_back("-fp-only-sp");
- Features.push_back("-d16");
- break;
- }
-
- // FPU version subtarget features are inclusive of lower-numbered ones, so
- // enable the one corresponding to this version and disable all that are
- // higher. We also have to make sure to disable fp16 when vfp4 is disabled,
- // as +vfp4 implies +fp16 but -vfp4 does not imply -fp16.
- switch (FPUNames[FPUKind].FPUVer) {
- case FPUVersion::VFPV5:
- Features.push_back("+fp-armv8");
- break;
- case FPUVersion::VFPV4:
- Features.push_back("+vfp4");
- Features.push_back("-fp-armv8");
- break;
- case FPUVersion::VFPV3_FP16:
- Features.push_back("+vfp3");
- Features.push_back("+fp16");
- Features.push_back("-vfp4");
- Features.push_back("-fp-armv8");
- break;
- case FPUVersion::VFPV3:
- Features.push_back("+vfp3");
- Features.push_back("-fp16");
- Features.push_back("-vfp4");
- Features.push_back("-fp-armv8");
- break;
- case FPUVersion::VFPV2:
- Features.push_back("+vfp2");
- Features.push_back("-vfp3");
- Features.push_back("-fp16");
- Features.push_back("-vfp4");
- Features.push_back("-fp-armv8");
- break;
- case FPUVersion::NONE:
- Features.push_back("-vfp2");
- Features.push_back("-vfp3");
- Features.push_back("-fp16");
- Features.push_back("-vfp4");
- Features.push_back("-fp-armv8");
- break;
+ static const struct FPUFeatureNameInfo {
+ const char *PlusName, *MinusName;
+ FPUVersion MinVersion;
+ FPURestriction MaxRestriction;
+ } FPUFeatureInfoList[] = {
+ // We have to specify the + and - versions of the name in full so
+ // that we can return them as static StringRefs.
+ //
+ // Also, the SubtargetFeatures ending in just "sp" are listed here
+ // under FPURestriction::None, which is the only FPURestriction in
+ // which they would be valid (since FPURestriction::SP doesn't
+ // exist).
+
+ {"+fpregs", "-fpregs", FPUVersion::VFPV2, FPURestriction::SP_D16},
+ {"+vfp2", "-vfp2", FPUVersion::VFPV2, FPURestriction::None},
+ {"+vfp2d16", "-vfp2d16", FPUVersion::VFPV2, FPURestriction::D16},
+ {"+vfp2d16sp", "-vfp2d16sp", FPUVersion::VFPV2, FPURestriction::SP_D16},
+ {"+vfp2sp", "-vfp2sp", FPUVersion::VFPV2, FPURestriction::None},
+ {"+vfp3", "-vfp3", FPUVersion::VFPV3, FPURestriction::None},
+ {"+vfp3d16", "-vfp3d16", FPUVersion::VFPV3, FPURestriction::D16},
+ {"+vfp3d16sp", "-vfp3d16sp", FPUVersion::VFPV3, FPURestriction::SP_D16},
+ {"+vfp3sp", "-vfp3sp", FPUVersion::VFPV3, FPURestriction::None},
+ {"+fp16", "-fp16", FPUVersion::VFPV3_FP16, FPURestriction::SP_D16},
+ {"+vfp4", "-vfp4", FPUVersion::VFPV4, FPURestriction::None},
+ {"+vfp4d16", "-vfp4d16", FPUVersion::VFPV4, FPURestriction::D16},
+ {"+vfp4d16sp", "-vfp4d16sp", FPUVersion::VFPV4, FPURestriction::SP_D16},
+ {"+vfp4sp", "-vfp4sp", FPUVersion::VFPV4, FPURestriction::None},
+ {"+fp-armv8", "-fp-armv8", FPUVersion::VFPV5, FPURestriction::None},
+ {"+fp-armv8d16", "-fp-armv8d16", FPUVersion::VFPV5, FPURestriction::D16},
+ {"+fp-armv8d16sp", "-fp-armv8d16sp", FPUVersion::VFPV5, FPURestriction::SP_D16},
+ {"+fp-armv8sp", "-fp-armv8sp", FPUVersion::VFPV5, FPURestriction::None},
+ {"+fullfp16", "-fullfp16", FPUVersion::VFPV5_FULLFP16, FPURestriction::SP_D16},
+ {"+fp64", "-fp64", FPUVersion::VFPV2, FPURestriction::D16},
+ {"+d32", "-d32", FPUVersion::VFPV2, FPURestriction::None},
+ };
+
+ for (const auto &Info: FPUFeatureInfoList) {
+ if (FPUNames[FPUKind].FPUVer >= Info.MinVersion &&
+ FPUNames[FPUKind].Restriction <= Info.MaxRestriction)
+ Features.push_back(Info.PlusName);
+ else
+ Features.push_back(Info.MinusName);
}
- // crypto includes neon, so we handle this similarly to FPU version.
- switch (FPUNames[FPUKind].NeonSupport) {
- case NeonSupportLevel::Crypto:
- Features.push_back("+neon");
- Features.push_back("+crypto");
- break;
- case NeonSupportLevel::Neon:
- Features.push_back("+neon");
- Features.push_back("-crypto");
- break;
- case NeonSupportLevel::None:
- Features.push_back("-neon");
- Features.push_back("-crypto");
- break;
+ static const struct NeonFeatureNameInfo {
+ const char *PlusName, *MinusName;
+ NeonSupportLevel MinSupportLevel;
+ } NeonFeatureInfoList[] = {
+ {"+neon", "-neon", NeonSupportLevel::Neon},
+ {"+crypto", "-crypto", NeonSupportLevel::Crypto},
+ };
+
+ for (const auto &Info: NeonFeatureInfoList) {
+ if (FPUNames[FPUKind].NeonSupport >= Info.MinSupportLevel)
+ Features.push_back(Info.PlusName);
+ else
+ Features.push_back(Info.MinusName);
}
return true;
@@ -249,7 +237,7 @@ ARM::EndianKind ARM::parseArchEndian(StringRef Arch) {
return EndianKind::LITTLE;
}
- if (Arch.startswith("aarch64"))
+ if (Arch.startswith("aarch64") || Arch.startswith("aarch64_32"))
return EndianKind::LITTLE;
return EndianKind::INVALID;
@@ -290,8 +278,12 @@ StringRef ARM::getCanonicalArchName(StringRef Arch) {
StringRef Error = "";
// Begins with "arm" / "thumb", move past it.
- if (A.startswith("arm64"))
+ if (A.startswith("arm64_32"))
+ offset = 8;
+ else if (A.startswith("arm64"))
offset = 5;
+ else if (A.startswith("aarch64_32"))
+ offset = 10;
else if (A.startswith("arm"))
offset = 3;
else if (A.startswith("thumb"))
@@ -417,30 +409,12 @@ bool ARM::getExtensionFeatures(unsigned Extensions,
if (Extensions == AEK_INVALID)
return false;
- if (Extensions & AEK_CRC)
- Features.push_back("+crc");
- else
- Features.push_back("-crc");
-
- if (Extensions & AEK_DSP)
- Features.push_back("+dsp");
- else
- Features.push_back("-dsp");
-
- if (Extensions & AEK_FP16FML)
- Features.push_back("+fp16fml");
- else
- Features.push_back("-fp16fml");
-
- if (Extensions & AEK_RAS)
- Features.push_back("+ras");
- else
- Features.push_back("-ras");
-
- if (Extensions & AEK_DOTPROD)
- Features.push_back("+dotprod");
- else
- Features.push_back("-dotprod");
+ for (const auto AE : ARCHExtNames) {
+ if ((Extensions & AE.ID) == AE.ID && AE.Feature)
+ Features.push_back(AE.Feature);
+ else if (AE.NegFeature)
+ Features.push_back(AE.NegFeature);
+ }
return getHWDivFeatures(Extensions, Features);
}
@@ -469,22 +443,99 @@ StringRef ARM::getArchExtName(unsigned ArchExtKind) {
return StringRef();
}
-StringRef ARM::getArchExtFeature(StringRef ArchExt) {
- if (ArchExt.startswith("no")) {
- StringRef ArchExtBase(ArchExt.substr(2));
- for (const auto AE : ARCHExtNames) {
- if (AE.NegFeature && ArchExtBase == AE.getName())
- return StringRef(AE.NegFeature);
- }
+static bool stripNegationPrefix(StringRef &Name) {
+ if (Name.startswith("no")) {
+ Name = Name.substr(2);
+ return true;
}
+ return false;
+}
+
+StringRef ARM::getArchExtFeature(StringRef ArchExt) {
+ bool Negated = stripNegationPrefix(ArchExt);
for (const auto AE : ARCHExtNames) {
if (AE.Feature && ArchExt == AE.getName())
- return StringRef(AE.Feature);
+ return StringRef(Negated ? AE.NegFeature : AE.Feature);
}
return StringRef();
}
+static unsigned findDoublePrecisionFPU(unsigned InputFPUKind) {
+ const ARM::FPUName &InputFPU = ARM::FPUNames[InputFPUKind];
+
+ // If the input FPU already supports double-precision, then there
+ // isn't any different FPU we can return here.
+ //
+ // The current available FPURestriction values are None (no
+ // restriction), D16 (only 16 d-regs) and SP_D16 (16 d-regs
+ // and single precision only); there's no value representing
+ // SP restriction without D16. So this test just means 'is it
+ // SP only?'.
+ if (InputFPU.Restriction != ARM::FPURestriction::SP_D16)
+ return ARM::FK_INVALID;
+
+ // Otherwise, look for an FPU entry with all the same fields, except
+ // that SP_D16 has been replaced with just D16, representing adding
+ // double precision and not changing anything else.
+ for (const ARM::FPUName &CandidateFPU : ARM::FPUNames) {
+ if (CandidateFPU.FPUVer == InputFPU.FPUVer &&
+ CandidateFPU.NeonSupport == InputFPU.NeonSupport &&
+ CandidateFPU.Restriction == ARM::FPURestriction::D16) {
+ return CandidateFPU.ID;
+ }
+ }
+
+ // nothing found
+ return ARM::FK_INVALID;
+}
+
+static unsigned getAEKID(StringRef ArchExtName) {
+ for (const auto AE : ARM::ARCHExtNames)
+ if (AE.getName() == ArchExtName)
+ return AE.ID;
+ return ARM::AEK_INVALID;
+}
+
+bool ARM::appendArchExtFeatures(
+ StringRef CPU, ARM::ArchKind AK, StringRef ArchExt,
+ std::vector<StringRef> &Features) {
+
+ size_t StartingNumFeatures = Features.size();
+ const bool Negated = stripNegationPrefix(ArchExt);
+ unsigned ID = getAEKID(ArchExt);
+
+ if (ID == AEK_INVALID)
+ return false;
+
+ for (const auto AE : ARCHExtNames) {
+ if (Negated && (AE.ID & ID) == ID && AE.NegFeature)
+ Features.push_back(AE.NegFeature);
+ else if (AE.ID == ID && AE.Feature)
+ Features.push_back(AE.Feature);
+ }
+
+ if (CPU == "")
+ CPU = "generic";
+
+ if (ArchExt == "fp" || ArchExt == "fp.dp") {
+ unsigned FPUKind;
+ if (ArchExt == "fp.dp") {
+ if (Negated) {
+ Features.push_back("-fp64");
+ return true;
+ }
+ FPUKind = findDoublePrecisionFPU(getDefaultFPU(CPU, AK));
+ } else if (Negated) {
+ FPUKind = ARM::FK_NONE;
+ } else {
+ FPUKind = getDefaultFPU(CPU, AK);
+ }
+ return ARM::getFPUFeatures(FPUKind, Features);
+ }
+ return StartingNumFeatures != Features.size();
+}
+
StringRef ARM::getHWDivName(unsigned HWDivKind) {
for (const auto D : HWDivNames) {
if (HWDivKind == D.ID)
diff --git a/lib/Support/ARMWinEH.cpp b/lib/Support/ARMWinEH.cpp
index 03c150f1150b..831f95cd4b0b 100644
--- a/lib/Support/ARMWinEH.cpp
+++ b/lib/Support/ARMWinEH.cpp
@@ -1,9 +1,8 @@
//===-- ARMWinEH.cpp - Windows on ARM EH Support Functions ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Support/Allocator.cpp b/lib/Support/Allocator.cpp
index f48edac0598c..718d3fc0d8e1 100644
--- a/lib/Support/Allocator.cpp
+++ b/lib/Support/Allocator.cpp
@@ -1,9 +1,8 @@
//===--- Allocator.cpp - Simple memory allocation abstraction -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Support/Atomic.cpp b/lib/Support/Atomic.cpp
index 7328a93052cc..f6865405c2b8 100644
--- a/lib/Support/Atomic.cpp
+++ b/lib/Support/Atomic.cpp
@@ -1,9 +1,8 @@
//===-- Atomic.cpp - Atomic Operations --------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Support/BinaryStreamError.cpp b/lib/Support/BinaryStreamError.cpp
index cdc811d78d63..f22523f09ac8 100644
--- a/lib/Support/BinaryStreamError.cpp
+++ b/lib/Support/BinaryStreamError.cpp
@@ -1,9 +1,8 @@
//===- BinaryStreamError.cpp - Error extensions for streams -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Support/BinaryStreamReader.cpp b/lib/Support/BinaryStreamReader.cpp
index e00527f2519e..b17786593bde 100644
--- a/lib/Support/BinaryStreamReader.cpp
+++ b/lib/Support/BinaryStreamReader.cpp
@@ -1,9 +1,8 @@
//===- BinaryStreamReader.cpp - Reads objects from a binary stream --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -11,6 +10,7 @@
#include "llvm/Support/BinaryStreamError.h"
#include "llvm/Support/BinaryStreamRef.h"
+#include "llvm/Support/LEB128.h"
using namespace llvm;
using endianness = llvm::support::endianness;
@@ -41,6 +41,36 @@ Error BinaryStreamReader::readBytes(ArrayRef<uint8_t> &Buffer, uint32_t Size) {
return Error::success();
}
+Error BinaryStreamReader::readULEB128(uint64_t &Dest) {
+ SmallVector<uint8_t, 10> EncodedBytes;
+ ArrayRef<uint8_t> NextByte;
+
+ // Copy the encoded ULEB into the buffer.
+ do {
+ if (auto Err = readBytes(NextByte, 1))
+ return Err;
+ EncodedBytes.push_back(NextByte[0]);
+ } while (NextByte[0] & 0x80);
+
+ Dest = decodeULEB128(EncodedBytes.begin(), nullptr, EncodedBytes.end());
+ return Error::success();
+}
+
+Error BinaryStreamReader::readSLEB128(int64_t &Dest) {
+ SmallVector<uint8_t, 10> EncodedBytes;
+ ArrayRef<uint8_t> NextByte;
+
+ // Copy the encoded ULEB into the buffer.
+ do {
+ if (auto Err = readBytes(NextByte, 1))
+ return Err;
+ EncodedBytes.push_back(NextByte[0]);
+ } while (NextByte[0] & 0x80);
+
+ Dest = decodeSLEB128(EncodedBytes.begin(), nullptr, EncodedBytes.end());
+ return Error::success();
+}
+
Error BinaryStreamReader::readCString(StringRef &Dest) {
uint32_t OriginalOffset = getOffset();
uint32_t FoundOffset = 0;
@@ -146,4 +176,4 @@ BinaryStreamReader::split(uint32_t Off) const {
BinaryStreamReader W1{First};
BinaryStreamReader W2{Second};
return std::make_pair(W1, W2);
-} \ No newline at end of file
+}
diff --git a/lib/Support/BinaryStreamRef.cpp b/lib/Support/BinaryStreamRef.cpp
index bdc0f54bf25a..6bcc504ffad5 100644
--- a/lib/Support/BinaryStreamRef.cpp
+++ b/lib/Support/BinaryStreamRef.cpp
@@ -1,9 +1,8 @@
//===- BinaryStreamRef.cpp - ----------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Support/BinaryStreamWriter.cpp b/lib/Support/BinaryStreamWriter.cpp
index bfad1280b929..986e18da281d 100644
--- a/lib/Support/BinaryStreamWriter.cpp
+++ b/lib/Support/BinaryStreamWriter.cpp
@@ -1,9 +1,8 @@
//===- BinaryStreamWriter.cpp - Writes objects to a BinaryStream ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -12,6 +11,7 @@
#include "llvm/Support/BinaryStreamError.h"
#include "llvm/Support/BinaryStreamReader.h"
#include "llvm/Support/BinaryStreamRef.h"
+#include "llvm/Support/LEB128.h"
using namespace llvm;
@@ -32,6 +32,18 @@ Error BinaryStreamWriter::writeBytes(ArrayRef<uint8_t> Buffer) {
return Error::success();
}
+Error BinaryStreamWriter::writeULEB128(uint64_t Value) {
+ uint8_t EncodedBytes[10] = {0};
+ unsigned Size = encodeULEB128(Value, &EncodedBytes[0]);
+ return writeBytes({EncodedBytes, Size});
+}
+
+Error BinaryStreamWriter::writeSLEB128(int64_t Value) {
+ uint8_t EncodedBytes[10] = {0};
+ unsigned Size = encodeSLEB128(Value, &EncodedBytes[0]);
+ return writeBytes({EncodedBytes, Size});
+}
+
Error BinaryStreamWriter::writeCString(StringRef Str) {
if (auto EC = writeFixedString(Str))
return EC;
diff --git a/lib/Support/BlockFrequency.cpp b/lib/Support/BlockFrequency.cpp
index 34fcbde23a28..2b63294f3789 100644
--- a/lib/Support/BlockFrequency.cpp
+++ b/lib/Support/BlockFrequency.cpp
@@ -1,9 +1,8 @@
//====--------------- lib/Support/BlockFrequency.cpp -----------*- C++ -*-====//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Support/BranchProbability.cpp b/lib/Support/BranchProbability.cpp
index 31dee9561f49..195e2d58d8e1 100644
--- a/lib/Support/BranchProbability.cpp
+++ b/lib/Support/BranchProbability.cpp
@@ -1,9 +1,8 @@
//===-------------- lib/Support/BranchProbability.cpp -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -89,10 +88,6 @@ static uint64_t scale(uint64_t Num, uint32_t N, uint32_t D) {
// Carry.
Upper32 += Mid32 < Mid32Partial;
- // Check for overflow.
- if (Upper32 >= D)
- return UINT64_MAX;
-
uint64_t Rem = (uint64_t(Upper32) << 32) | Mid32;
uint64_t UpperQ = Rem / D;
diff --git a/lib/Support/BuryPointer.cpp b/lib/Support/BuryPointer.cpp
index 6c988b4a0ab2..435f89010d41 100644
--- a/lib/Support/BuryPointer.cpp
+++ b/lib/Support/BuryPointer.cpp
@@ -1,9 +1,8 @@
//===- BuryPointer.cpp - Memory Manipulation/Leak ---------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Support/COM.cpp b/lib/Support/COM.cpp
index 97cd085853b0..f37b95ba8651 100644
--- a/lib/Support/COM.cpp
+++ b/lib/Support/COM.cpp
@@ -1,9 +1,8 @@
//===-- COM.cpp - Implement COM utility classes -----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Support/CRC.cpp b/lib/Support/CRC.cpp
new file mode 100644
index 000000000000..fd98f3a24003
--- /dev/null
+++ b/lib/Support/CRC.cpp
@@ -0,0 +1,68 @@
+//===--- CRC.cpp - Cyclic Redundancy Check implementation -----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements llvm::crc32 function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/CRC.h"
+#include "llvm/Config/config.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Threading.h"
+#include <array>
+
+using namespace llvm;
+
+#if LLVM_ENABLE_ZLIB == 0 || !HAVE_ZLIB_H
+using CRC32Table = std::array<uint32_t, 256>;
+
+static void initCRC32Table(CRC32Table *Tbl) {
+ auto Shuffle = [](uint32_t V) {
+ return (V & 1) ? (V >> 1) ^ 0xEDB88320U : V >> 1;
+ };
+
+ for (size_t I = 0; I < Tbl->size(); ++I) {
+ uint32_t V = Shuffle(I);
+ V = Shuffle(V);
+ V = Shuffle(V);
+ V = Shuffle(V);
+ V = Shuffle(V);
+ V = Shuffle(V);
+ V = Shuffle(V);
+ (*Tbl)[I] = Shuffle(V);
+ }
+}
+
+uint32_t llvm::crc32(uint32_t CRC, StringRef S) {
+ static llvm::once_flag InitFlag;
+ static CRC32Table Tbl;
+ llvm::call_once(InitFlag, initCRC32Table, &Tbl);
+
+ const uint8_t *P = reinterpret_cast<const uint8_t *>(S.data());
+ size_t Len = S.size();
+ CRC ^= 0xFFFFFFFFU;
+ for (; Len >= 8; Len -= 8) {
+ CRC = Tbl[(CRC ^ *P++) & 0xFF] ^ (CRC >> 8);
+ CRC = Tbl[(CRC ^ *P++) & 0xFF] ^ (CRC >> 8);
+ CRC = Tbl[(CRC ^ *P++) & 0xFF] ^ (CRC >> 8);
+ CRC = Tbl[(CRC ^ *P++) & 0xFF] ^ (CRC >> 8);
+ CRC = Tbl[(CRC ^ *P++) & 0xFF] ^ (CRC >> 8);
+ CRC = Tbl[(CRC ^ *P++) & 0xFF] ^ (CRC >> 8);
+ CRC = Tbl[(CRC ^ *P++) & 0xFF] ^ (CRC >> 8);
+ CRC = Tbl[(CRC ^ *P++) & 0xFF] ^ (CRC >> 8);
+ }
+ while (Len--)
+ CRC = Tbl[(CRC ^ *P++) & 0xFF] ^ (CRC >> 8);
+ return CRC ^ 0xFFFFFFFFU;
+}
+#else
+#include <zlib.h>
+uint32_t llvm::crc32(uint32_t CRC, StringRef S) {
+ return ::crc32(CRC, (const Bytef *)S.data(), S.size());
+}
+#endif
diff --git a/lib/Support/CachePruning.cpp b/lib/Support/CachePruning.cpp
index a0aa6024b3ed..9813eec0e433 100644
--- a/lib/Support/CachePruning.cpp
+++ b/lib/Support/CachePruning.cpp
@@ -1,9 +1,8 @@
//===-CachePruning.cpp - LLVM Cache Directory Pruning ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -36,15 +35,8 @@ struct FileInfo {
/// Used to determine which files to prune first. Also used to determine
/// set membership, so must take into account all fields.
bool operator<(const FileInfo &Other) const {
- if (Time < Other.Time)
- return true;
- else if (Other.Time < Time)
- return false;
- if (Other.Size < Size)
- return true;
- else if (Size < Other.Size)
- return false;
- return Path < Other.Path;
+ return std::tie(Time, Other.Size, Path) <
+ std::tie(Other.Time, Size, Other.Path);
}
};
} // anonymous namespace
diff --git a/lib/Support/Chrono.cpp b/lib/Support/Chrono.cpp
index a2626a89eb63..8c28d45d8822 100644
--- a/lib/Support/Chrono.cpp
+++ b/lib/Support/Chrono.cpp
@@ -1,9 +1,8 @@
//===- Support/Chrono.cpp - Utilities for Timing Manipulation ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Support/CodeGenCoverage.cpp b/lib/Support/CodeGenCoverage.cpp
index 811020e3254a..f39eb7533b43 100644
--- a/lib/Support/CodeGenCoverage.cpp
+++ b/lib/Support/CodeGenCoverage.cpp
@@ -1,9 +1,8 @@
//===- lib/Support/CodeGenCoverage.cpp -------------------------------------==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp
index f7290b54dcf3..25510fa58ff5 100644
--- a/lib/Support/CommandLine.cpp
+++ b/lib/Support/CommandLine.cpp
@@ -1,9 +1,8 @@
//===-- CommandLine.cpp - Command line parser implementation --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -55,6 +54,7 @@ template class basic_parser<bool>;
template class basic_parser<boolOrDefault>;
template class basic_parser<int>;
template class basic_parser<unsigned>;
+template class basic_parser<unsigned long>;
template class basic_parser<unsigned long long>;
template class basic_parser<double>;
template class basic_parser<float>;
@@ -79,6 +79,7 @@ void parser<bool>::anchor() {}
void parser<boolOrDefault>::anchor() {}
void parser<int>::anchor() {}
void parser<unsigned>::anchor() {}
+void parser<unsigned long>::anchor() {}
void parser<unsigned long long>::anchor() {}
void parser<double>::anchor() {}
void parser<float>::anchor() {}
@@ -87,8 +88,47 @@ void parser<char>::anchor() {}
//===----------------------------------------------------------------------===//
+static StringRef ArgPrefix = " -";
+static StringRef ArgPrefixLong = " --";
+static StringRef ArgHelpPrefix = " - ";
+
+static size_t argPlusPrefixesSize(StringRef ArgName) {
+ size_t Len = ArgName.size();
+ if (Len == 1)
+ return Len + ArgPrefix.size() + ArgHelpPrefix.size();
+ return Len + ArgPrefixLong.size() + ArgHelpPrefix.size();
+}
+
+static StringRef argPrefix(StringRef ArgName) {
+ if (ArgName.size() == 1)
+ return ArgPrefix;
+ return ArgPrefixLong;
+}
+
+// Option predicates...
+static inline bool isGrouping(const Option *O) {
+ return O->getMiscFlags() & cl::Grouping;
+}
+static inline bool isPrefixedOrGrouping(const Option *O) {
+ return isGrouping(O) || O->getFormattingFlag() == cl::Prefix ||
+ O->getFormattingFlag() == cl::AlwaysPrefix;
+}
+
+
namespace {
+class PrintArg {
+ StringRef ArgName;
+public:
+ PrintArg(StringRef ArgName) : ArgName(ArgName) {}
+ friend raw_ostream &operator<<(raw_ostream &OS, const PrintArg&);
+};
+
+raw_ostream &operator<<(raw_ostream &OS, const PrintArg& Arg) {
+ OS << argPrefix(Arg.ArgName) << Arg.ArgName;
+ return OS;
+}
+
class CommandLineParser {
public:
// Globals for name and overview of program. Program name is not a string to
@@ -99,6 +139,11 @@ public:
// This collects additional help to be printed.
std::vector<StringRef> MoreHelp;
+ // This collects Options added with the cl::DefaultOption flag. Since they can
+ // be overridden, they are not added to the appropriate SubCommands until
+ // ParseCommandLineOptions actually runs.
+ SmallVector<Option*, 4> DefaultOptions;
+
// This collects the different option categories that have been registered.
SmallPtrSet<OptionCategory *, 16> RegisteredOptionCategories;
@@ -113,7 +158,8 @@ public:
void ResetAllOptionOccurrences();
bool ParseCommandLineOptions(int argc, const char *const *argv,
- StringRef Overview, raw_ostream *Errs = nullptr);
+ StringRef Overview, raw_ostream *Errs = nullptr,
+ bool LongOptionsUseDoubleDash = false);
void addLiteralOption(Option &Opt, SubCommand *SC, StringRef Name) {
if (Opt.hasArgStr())
@@ -147,6 +193,11 @@ public:
void addOption(Option *O, SubCommand *SC) {
bool HadErrors = false;
if (O->hasArgStr()) {
+ // If it's a DefaultOption, check to make sure it isn't already there.
+ if (O->isDefaultOption() &&
+ SC->OptionsMap.find(O->ArgStr) != SC->OptionsMap.end())
+ return;
+
// Add argument to the argument map!
if (!SC->OptionsMap.insert(std::make_pair(O->ArgStr, O)).second) {
errs() << ProgramName << ": CommandLine Error: Option '" << O->ArgStr
@@ -186,7 +237,12 @@ public:
}
}
- void addOption(Option *O) {
+ void addOption(Option *O, bool ProcessDefaultOption = false) {
+ if (!ProcessDefaultOption && O->isDefaultOption()) {
+ DefaultOptions.push_back(O);
+ return;
+ }
+
if (O->Subs.empty()) {
addOption(O, &*TopLevelSubCommand);
} else {
@@ -202,8 +258,12 @@ public:
OptionNames.push_back(O->ArgStr);
SubCommand &Sub = *SC;
- for (auto Name : OptionNames)
- Sub.OptionsMap.erase(Name);
+ auto End = Sub.OptionsMap.end();
+ for (auto Name : OptionNames) {
+ auto I = Sub.OptionsMap.find(Name);
+ if (I != End && I->getValue() == O)
+ Sub.OptionsMap.erase(I);
+ }
if (O->getFormattingFlag() == cl::Positional)
for (auto Opt = Sub.PositionalOpts.begin();
@@ -267,8 +327,13 @@ public:
if (O->Subs.empty())
updateArgStr(O, NewName, &*TopLevelSubCommand);
else {
- for (auto SC : O->Subs)
- updateArgStr(O, NewName, SC);
+ if (O->isInAllSubCommands()) {
+ for (auto SC : RegisteredSubCommands)
+ updateArgStr(O, NewName, SC);
+ } else {
+ for (auto SC : O->Subs)
+ updateArgStr(O, NewName, SC);
+ }
}
}
@@ -332,12 +397,21 @@ public:
AllSubCommands->reset();
registerSubCommand(&*TopLevelSubCommand);
registerSubCommand(&*AllSubCommands);
+
+ DefaultOptions.clear();
}
private:
SubCommand *ActiveSubCommand;
Option *LookupOption(SubCommand &Sub, StringRef &Arg, StringRef &Value);
+ Option *LookupLongOption(SubCommand &Sub, StringRef &Arg, StringRef &Value,
+ bool LongOptionsUseDoubleDash, bool HaveDoubleDash) {
+ Option *Opt = LookupOption(Sub, Arg, Value);
+ if (Opt && LongOptionsUseDoubleDash && !HaveDoubleDash && !isGrouping(Opt))
+ return nullptr;
+ return Opt;
+ }
SubCommand *LookupSubCommand(StringRef Name);
};
@@ -365,6 +439,26 @@ void Option::setArgStr(StringRef S) {
GlobalParser->updateArgStr(this, S);
assert((S.empty() || S[0] != '-') && "Option can't start with '-");
ArgStr = S;
+ if (ArgStr.size() == 1)
+ setMiscFlag(Grouping);
+}
+
+void Option::addCategory(OptionCategory &C) {
+ assert(!Categories.empty() && "Categories cannot be empty.");
+ // Maintain backward compatibility by replacing the default GeneralCategory
+ // if it's still set. Otherwise, just add the new one. The GeneralCategory
+ // must be explicitly added if you want multiple categories that include it.
+ if (&C != &GeneralCategory && Categories[0] == &GeneralCategory)
+ Categories[0] = &C;
+ else if (find(Categories, &C) == Categories.end())
+ Categories.push_back(&C);
+}
+
+void Option::reset() {
+ NumOccurrences = 0;
+ setDefault();
+ if (isDefaultOption())
+ removeArgument();
}
// Initialise the general option category.
@@ -374,7 +468,11 @@ void OptionCategory::registerCategory() {
GlobalParser->registerCategory(this);
}
-// A special subcommand representing no subcommand
+// A special subcommand representing no subcommand. It is particularly important
+// that this ManagedStatic uses constant initailization and not dynamic
+// initialization because it is referenced from cl::opt constructors, which run
+// dynamically in an arbitrary order.
+LLVM_REQUIRE_CONSTANT_INITIALIZATION
ManagedStatic<SubCommand> llvm::cl::TopLevelSubCommand;
// A special subcommand that can be used to put an option into all subcommands.
@@ -599,15 +697,6 @@ static bool ProvidePositionalOption(Option *Handler, StringRef Arg, int i) {
return ProvideOption(Handler, Handler->ArgStr, Arg, 0, nullptr, Dummy);
}
-// Option predicates...
-static inline bool isGrouping(const Option *O) {
- return O->getFormattingFlag() == cl::Grouping;
-}
-static inline bool isPrefixedOrGrouping(const Option *O) {
- return isGrouping(O) || O->getFormattingFlag() == cl::Prefix ||
- O->getFormattingFlag() == cl::AlwaysPrefix;
-}
-
// getOptionPred - Check to see if there are any options that satisfy the
// specified predicate with names that are the prefixes in Name. This is
// checked by progressively stripping characters off of the name, checking to
@@ -617,8 +706,9 @@ static inline bool isPrefixedOrGrouping(const Option *O) {
static Option *getOptionPred(StringRef Name, size_t &Length,
bool (*Pred)(const Option *),
const StringMap<Option *> &OptionsMap) {
-
StringMap<Option *>::const_iterator OMI = OptionsMap.find(Name);
+ if (OMI != OptionsMap.end() && !Pred(OMI->getValue()))
+ OMI = OptionsMap.end();
// Loop while we haven't found an option and Name still has at least two
// characters in it (so that the next iteration will not be the empty
@@ -626,6 +716,8 @@ static Option *getOptionPred(StringRef Name, size_t &Length,
while (OMI == OptionsMap.end() && Name.size() > 1) {
Name = Name.substr(0, Name.size() - 1); // Chop off the last character.
OMI = OptionsMap.find(Name);
+ if (OMI != OptionsMap.end() && !Pred(OMI->getValue()))
+ OMI = OptionsMap.end();
}
if (OMI != OptionsMap.end() && Pred(OMI->second)) {
@@ -652,40 +744,46 @@ HandlePrefixedOrGroupedOption(StringRef &Arg, StringRef &Value,
if (!PGOpt)
return nullptr;
- // If the option is a prefixed option, then the value is simply the
- // rest of the name... so fall through to later processing, by
- // setting up the argument name flags and value fields.
- if (PGOpt->getFormattingFlag() == cl::Prefix ||
- PGOpt->getFormattingFlag() == cl::AlwaysPrefix) {
- Value = Arg.substr(Length);
+ do {
+ StringRef MaybeValue =
+ (Length < Arg.size()) ? Arg.substr(Length) : StringRef();
Arg = Arg.substr(0, Length);
assert(OptionsMap.count(Arg) && OptionsMap.find(Arg)->second == PGOpt);
- return PGOpt;
- }
- // This must be a grouped option... handle them now. Grouping options can't
- // have values.
- assert(isGrouping(PGOpt) && "Broken getOptionPred!");
+ // cl::Prefix options do not preserve '=' when used separately.
+ // The behavior for them with grouped options should be the same.
+ if (MaybeValue.empty() || PGOpt->getFormattingFlag() == cl::AlwaysPrefix ||
+ (PGOpt->getFormattingFlag() == cl::Prefix && MaybeValue[0] != '=')) {
+ Value = MaybeValue;
+ return PGOpt;
+ }
- do {
- // Move current arg name out of Arg into OneArgName.
- StringRef OneArgName = Arg.substr(0, Length);
- Arg = Arg.substr(Length);
-
- // Because ValueRequired is an invalid flag for grouped arguments,
- // we don't need to pass argc/argv in.
- assert(PGOpt->getValueExpectedFlag() != cl::ValueRequired &&
- "Option can not be cl::Grouping AND cl::ValueRequired!");
+ if (MaybeValue[0] == '=') {
+ Value = MaybeValue.substr(1);
+ return PGOpt;
+ }
+
+ // This must be a grouped option.
+ assert(isGrouping(PGOpt) && "Broken getOptionPred!");
+
+ // Grouping options inside a group can't have values.
+ if (PGOpt->getValueExpectedFlag() == cl::ValueRequired) {
+ ErrorParsing |= PGOpt->error("may not occur within a group!");
+ return nullptr;
+ }
+
+ // Because the value for the option is not required, we don't need to pass
+ // argc/argv in.
int Dummy = 0;
- ErrorParsing |=
- ProvideOption(PGOpt, OneArgName, StringRef(), 0, nullptr, Dummy);
+ ErrorParsing |= ProvideOption(PGOpt, Arg, StringRef(), 0, nullptr, Dummy);
// Get the next grouping option.
+ Arg = MaybeValue;
PGOpt = getOptionPred(Arg, Length, isGrouping, OptionsMap);
- } while (PGOpt && Length != Arg.size());
+ } while (PGOpt);
- // Return the last option with Arg cut down to just the last one.
- return PGOpt;
+ // We could not find a grouping option in the remainder of Arg.
+ return nullptr;
}
static bool RequiresValue(const Option *O) {
@@ -869,6 +967,13 @@ void cl::TokenizeWindowsCommandLine(StringRef Src, StringSaver &Saver,
// QUOTED state means that it's reading a token quoted by double quotes.
if (State == QUOTED) {
if (C == '"') {
+ if (I < (E - 1) && Src[I + 1] == '"') {
+ // Consecutive double-quotes inside a quoted string implies one
+ // double-quote.
+ Token.push_back('"');
+ I = I + 1;
+ continue;
+ }
State = UNQUOTED;
continue;
}
@@ -992,41 +1097,84 @@ static bool ExpandResponseFile(StringRef FName, StringSaver &Saver,
bool cl::ExpandResponseFiles(StringSaver &Saver, TokenizerCallback Tokenizer,
SmallVectorImpl<const char *> &Argv,
bool MarkEOLs, bool RelativeNames) {
- unsigned RspFiles = 0;
bool AllExpanded = true;
+ struct ResponseFileRecord {
+ const char *File;
+ size_t End;
+ };
+
+ // To detect recursive response files, we maintain a stack of files and the
+ // position of the last argument in the file. This position is updated
+ // dynamically as we recursively expand files.
+ SmallVector<ResponseFileRecord, 3> FileStack;
+
+ // Push a dummy entry that represents the initial command line, removing
+ // the need to check for an empty list.
+ FileStack.push_back({"", Argv.size()});
// Don't cache Argv.size() because it can change.
for (unsigned I = 0; I != Argv.size();) {
+ while (I == FileStack.back().End) {
+ // Passing the end of a file's argument list, so we can remove it from the
+ // stack.
+ FileStack.pop_back();
+ }
+
const char *Arg = Argv[I];
// Check if it is an EOL marker
if (Arg == nullptr) {
++I;
continue;
}
+
if (Arg[0] != '@') {
++I;
continue;
}
- // If we have too many response files, leave some unexpanded. This avoids
- // crashing on self-referential response files.
- if (RspFiles++ > 20)
- return false;
+ const char *FName = Arg + 1;
+ auto IsEquivalent = [FName](const ResponseFileRecord &RFile) {
+ return sys::fs::equivalent(RFile.File, FName);
+ };
+
+ // Check for recursive response files.
+ if (std::any_of(FileStack.begin() + 1, FileStack.end(), IsEquivalent)) {
+ // This file is recursive, so we leave it in the argument stream and
+ // move on.
+ AllExpanded = false;
+ ++I;
+ continue;
+ }
// Replace this response file argument with the tokenization of its
// contents. Nested response files are expanded in subsequent iterations.
SmallVector<const char *, 0> ExpandedArgv;
- if (!ExpandResponseFile(Arg + 1, Saver, Tokenizer, ExpandedArgv,
- MarkEOLs, RelativeNames)) {
+ if (!ExpandResponseFile(FName, Saver, Tokenizer, ExpandedArgv, MarkEOLs,
+ RelativeNames)) {
// We couldn't read this file, so we leave it in the argument stream and
// move on.
AllExpanded = false;
++I;
continue;
}
+
+ for (ResponseFileRecord &Record : FileStack) {
+ // Increase the end of all active records by the number of newly expanded
+ // arguments, minus the response file itself.
+ Record.End += ExpandedArgv.size() - 1;
+ }
+
+ FileStack.push_back({FName, I + ExpandedArgv.size()});
Argv.erase(Argv.begin() + I);
Argv.insert(Argv.begin() + I, ExpandedArgv.begin(), ExpandedArgv.end());
}
+
+ // If successful, the top of the file stack will mark the end of the Argv
+ // stream. A failure here indicates a bug in the stack popping logic above.
+ // Note that FileStack may have more than one element at this point because we
+ // don't have a chance to pop the stack when encountering recursive files at
+ // the end of the stream, so seeing that doesn't indicate a bug.
+ assert(FileStack.size() > 0 && Argv.size() == FileStack.back().End);
return AllExpanded;
}
@@ -1071,7 +1219,8 @@ void cl::ParseEnvironmentOptions(const char *progName, const char *envVar,
bool cl::ParseCommandLineOptions(int argc, const char *const *argv,
StringRef Overview, raw_ostream *Errs,
- const char *EnvVar) {
+ const char *EnvVar,
+ bool LongOptionsUseDoubleDash) {
SmallVector<const char *, 20> NewArgv;
BumpPtrAllocator A;
StringSaver Saver(A);
@@ -1091,7 +1240,7 @@ bool cl::ParseCommandLineOptions(int argc, const char *const *argv,
// Parse all options.
return GlobalParser->ParseCommandLineOptions(NewArgc, &NewArgv[0], Overview,
- Errs);
+ Errs, LongOptionsUseDoubleDash);
}
void CommandLineParser::ResetAllOptionOccurrences() {
@@ -1106,7 +1255,8 @@ void CommandLineParser::ResetAllOptionOccurrences() {
bool CommandLineParser::ParseCommandLineOptions(int argc,
const char *const *argv,
StringRef Overview,
- raw_ostream *Errs) {
+ raw_ostream *Errs,
+ bool LongOptionsUseDoubleDash) {
assert(hasOptions() && "No options specified!");
// Expand response files.
@@ -1152,6 +1302,10 @@ bool CommandLineParser::ParseCommandLineOptions(int argc,
auto &SinkOpts = ChosenSubCommand->SinkOpts;
auto &OptionsMap = ChosenSubCommand->OptionsMap;
+ for (auto O: DefaultOptions) {
+ addOption(O, true);
+ }
+
if (ConsumeAfterOpt) {
assert(PositionalOpts.size() > 0 &&
"Cannot specify cl::ConsumeAfter without a positional argument!");
@@ -1212,6 +1366,7 @@ bool CommandLineParser::ParseCommandLineOptions(int argc,
std::string NearestHandlerString;
StringRef Value;
StringRef ArgName = "";
+ bool HaveDoubleDash = false;
// Check to see if this is a positional argument. This argument is
// considered to be positional if it doesn't start with '-', if it is "-"
@@ -1249,26 +1404,31 @@ bool CommandLineParser::ParseCommandLineOptions(int argc,
// option is another positional argument. If so, treat it as an argument,
// otherwise feed it to the eating positional.
ArgName = StringRef(argv[i] + 1);
- // Eat leading dashes.
- while (!ArgName.empty() && ArgName[0] == '-')
+ // Eat second dash.
+ if (!ArgName.empty() && ArgName[0] == '-') {
+ HaveDoubleDash = true;
ArgName = ArgName.substr(1);
+ }
- Handler = LookupOption(*ChosenSubCommand, ArgName, Value);
+ Handler = LookupLongOption(*ChosenSubCommand, ArgName, Value,
+ LongOptionsUseDoubleDash, HaveDoubleDash);
if (!Handler || Handler->getFormattingFlag() != cl::Positional) {
ProvidePositionalOption(ActivePositionalArg, StringRef(argv[i]), i);
continue; // We are done!
}
-
} else { // We start with a '-', must be an argument.
ArgName = StringRef(argv[i] + 1);
- // Eat leading dashes.
- while (!ArgName.empty() && ArgName[0] == '-')
+ // Eat second dash.
+ if (!ArgName.empty() && ArgName[0] == '-') {
+ HaveDoubleDash = true;
ArgName = ArgName.substr(1);
+ }
- Handler = LookupOption(*ChosenSubCommand, ArgName, Value);
+ Handler = LookupLongOption(*ChosenSubCommand, ArgName, Value,
+ LongOptionsUseDoubleDash, HaveDoubleDash);
// Check to see if this "option" is really a prefixed or grouped argument.
- if (!Handler)
+ if (!Handler && !(LongOptionsUseDoubleDash && HaveDoubleDash))
Handler = HandlePrefixedOrGroupedOption(ArgName, Value, ErrorParsing,
OptionsMap);
@@ -1282,12 +1442,12 @@ bool CommandLineParser::ParseCommandLineOptions(int argc,
if (!Handler) {
if (SinkOpts.empty()) {
*Errs << ProgramName << ": Unknown command line argument '" << argv[i]
- << "'. Try: '" << argv[0] << " -help'\n";
+ << "'. Try: '" << argv[0] << " --help'\n";
if (NearestHandler) {
// If we know a near match, report it as well.
- *Errs << ProgramName << ": Did you mean '-" << NearestHandlerString
- << "'?\n";
+ *Errs << ProgramName << ": Did you mean '"
+ << PrintArg(NearestHandlerString) << "'?\n";
}
ErrorParsing = true;
@@ -1321,14 +1481,14 @@ bool CommandLineParser::ParseCommandLineOptions(int argc,
<< ": Not enough positional command line arguments specified!\n"
<< "Must specify at least " << NumPositionalRequired
<< " positional argument" << (NumPositionalRequired > 1 ? "s" : "")
- << ": See: " << argv[0] << " -help\n";
+ << ": See: " << argv[0] << " --help\n";
ErrorParsing = true;
} else if (!HasUnlimitedPositionals &&
PositionalVals.size() > PositionalOpts.size()) {
*Errs << ProgramName << ": Too many positional arguments specified!\n"
<< "Can specify at most " << PositionalOpts.size()
- << " positional arguments: See: " << argv[0] << " -help\n";
+ << " positional arguments: See: " << argv[0] << " --help\n";
ErrorParsing = true;
} else if (!ConsumeAfterOpt) {
@@ -1441,7 +1601,7 @@ bool Option::error(const Twine &Message, StringRef ArgName, raw_ostream &Errs) {
if (ArgName.empty())
Errs << HelpStr; // Be nice for positional arguments
else
- Errs << GlobalParser->ProgramName << ": for the -" << ArgName;
+ Errs << GlobalParser->ProgramName << ": for the " << PrintArg(ArgName);
Errs << " option: " << Message << "\n";
return true;
@@ -1484,12 +1644,16 @@ static StringRef getValueStr(const Option &O, StringRef DefaultMsg) {
//
// Return the width of the option tag for printing...
-size_t alias::getOptionWidth() const { return ArgStr.size() + 6; }
+size_t alias::getOptionWidth() const {
+ return argPlusPrefixesSize(ArgStr);
+}
void Option::printHelpStr(StringRef HelpStr, size_t Indent,
- size_t FirstLineIndentedBy) {
+ size_t FirstLineIndentedBy) {
+ assert(Indent >= FirstLineIndentedBy);
std::pair<StringRef, StringRef> Split = HelpStr.split('\n');
- outs().indent(Indent - FirstLineIndentedBy) << " - " << Split.first << "\n";
+ outs().indent(Indent - FirstLineIndentedBy)
+ << ArgHelpPrefix << Split.first << "\n";
while (!Split.second.empty()) {
Split = Split.second.split('\n');
outs().indent(Indent) << Split.first << "\n";
@@ -1498,8 +1662,8 @@ void Option::printHelpStr(StringRef HelpStr, size_t Indent,
// Print out the option for the alias.
void alias::printOptionInfo(size_t GlobalWidth) const {
- outs() << " -" << ArgStr;
- printHelpStr(HelpStr, GlobalWidth, ArgStr.size() + 6);
+ outs() << PrintArg(ArgStr);
+ printHelpStr(HelpStr, GlobalWidth, argPlusPrefixesSize(ArgStr));
}
//===----------------------------------------------------------------------===//
@@ -1511,7 +1675,7 @@ void alias::printOptionInfo(size_t GlobalWidth) const {
// Return the width of the option tag for printing...
size_t basic_parser_impl::getOptionWidth(const Option &O) const {
- size_t Len = O.ArgStr.size();
+ size_t Len = argPlusPrefixesSize(O.ArgStr);
auto ValName = getValueName();
if (!ValName.empty()) {
size_t FormattingLen = 3;
@@ -1520,7 +1684,7 @@ size_t basic_parser_impl::getOptionWidth(const Option &O) const {
Len += getValueStr(O, ValName).size() + FormattingLen;
}
- return Len + 6;
+ return Len;
}
// printOptionInfo - Print out information about this option. The
@@ -1528,7 +1692,7 @@ size_t basic_parser_impl::getOptionWidth(const Option &O) const {
//
void basic_parser_impl::printOptionInfo(const Option &O,
size_t GlobalWidth) const {
- outs() << " -" << O.ArgStr;
+ outs() << PrintArg(O.ArgStr);
auto ValName = getValueName();
if (!ValName.empty()) {
@@ -1544,7 +1708,7 @@ void basic_parser_impl::printOptionInfo(const Option &O,
void basic_parser_impl::printOptionName(const Option &O,
size_t GlobalWidth) const {
- outs() << " -" << O.ArgStr;
+ outs() << PrintArg(O.ArgStr);
outs().indent(GlobalWidth - O.ArgStr.size());
}
@@ -1603,6 +1767,16 @@ bool parser<unsigned>::parse(Option &O, StringRef ArgName, StringRef Arg,
return false;
}
+// parser<unsigned long> implementation
+//
+bool parser<unsigned long>::parse(Option &O, StringRef ArgName, StringRef Arg,
+ unsigned long &Value) {
+
+ if (Arg.getAsInteger(0, Value))
+ return O.error("'" + Arg + "' value invalid for ulong argument!");
+ return false;
+}
+
// parser<unsigned long long> implementation
//
bool parser<unsigned long long>::parse(Option &O, StringRef ArgName,
@@ -1610,7 +1784,7 @@ bool parser<unsigned long long>::parse(Option &O, StringRef ArgName,
unsigned long long &Value) {
if (Arg.getAsInteger(0, Value))
- return O.error("'" + Arg + "' value invalid for uint argument!");
+ return O.error("'" + Arg + "' value invalid for ullong argument!");
return false;
}
@@ -1652,12 +1826,29 @@ unsigned generic_parser_base::findOption(StringRef Name) {
return e;
}
+static StringRef EqValue = "=<value>";
+static StringRef EmptyOption = "<empty>";
+static StringRef OptionPrefix = " =";
+static size_t OptionPrefixesSize = OptionPrefix.size() + ArgHelpPrefix.size();
+
+static bool shouldPrintOption(StringRef Name, StringRef Description,
+ const Option &O) {
+ return O.getValueExpectedFlag() != ValueOptional || !Name.empty() ||
+ !Description.empty();
+}
+
// Return the width of the option tag for printing...
size_t generic_parser_base::getOptionWidth(const Option &O) const {
if (O.hasArgStr()) {
- size_t Size = O.ArgStr.size() + 6;
- for (unsigned i = 0, e = getNumOptions(); i != e; ++i)
- Size = std::max(Size, getOption(i).size() + 8);
+ size_t Size =
+ argPlusPrefixesSize(O.ArgStr) + EqValue.size();
+ for (unsigned i = 0, e = getNumOptions(); i != e; ++i) {
+ StringRef Name = getOption(i);
+ if (!shouldPrintOption(Name, getDescription(i), O))
+ continue;
+ size_t NameSize = Name.empty() ? EmptyOption.size() : Name.size();
+ Size = std::max(Size, NameSize + OptionPrefixesSize);
+ }
return Size;
} else {
size_t BaseSize = 0;
@@ -1673,20 +1864,46 @@ size_t generic_parser_base::getOptionWidth(const Option &O) const {
void generic_parser_base::printOptionInfo(const Option &O,
size_t GlobalWidth) const {
if (O.hasArgStr()) {
- outs() << " -" << O.ArgStr;
- Option::printHelpStr(O.HelpStr, GlobalWidth, O.ArgStr.size() + 6);
+ // When the value is optional, first print a line just describing the
+ // option without values.
+ if (O.getValueExpectedFlag() == ValueOptional) {
+ for (unsigned i = 0, e = getNumOptions(); i != e; ++i) {
+ if (getOption(i).empty()) {
+ outs() << PrintArg(O.ArgStr);
+ Option::printHelpStr(O.HelpStr, GlobalWidth,
+ argPlusPrefixesSize(O.ArgStr));
+ break;
+ }
+ }
+ }
+ outs() << PrintArg(O.ArgStr) << EqValue;
+ Option::printHelpStr(O.HelpStr, GlobalWidth,
+ EqValue.size() +
+ argPlusPrefixesSize(O.ArgStr));
for (unsigned i = 0, e = getNumOptions(); i != e; ++i) {
- size_t NumSpaces = GlobalWidth - getOption(i).size() - 8;
- outs() << " =" << getOption(i);
- outs().indent(NumSpaces) << " - " << getDescription(i) << '\n';
+ StringRef OptionName = getOption(i);
+ StringRef Description = getDescription(i);
+ if (!shouldPrintOption(OptionName, Description, O))
+ continue;
+ assert(GlobalWidth >= OptionName.size() + OptionPrefixesSize);
+ size_t NumSpaces = GlobalWidth - OptionName.size() - OptionPrefixesSize;
+ outs() << OptionPrefix << OptionName;
+ if (OptionName.empty()) {
+ outs() << EmptyOption;
+ assert(NumSpaces >= EmptyOption.size());
+ NumSpaces -= EmptyOption.size();
+ }
+ if (!Description.empty())
+ outs().indent(NumSpaces) << ArgHelpPrefix << " " << Description;
+ outs() << '\n';
}
} else {
if (!O.HelpStr.empty())
outs() << " " << O.HelpStr << '\n';
for (unsigned i = 0, e = getNumOptions(); i != e; ++i) {
- auto Option = getOption(i);
- outs() << " -" << Option;
+ StringRef Option = getOption(i);
+ outs() << " " << PrintArg(Option);
Option::printHelpStr(getDescription(i), GlobalWidth, Option.size() + 8);
}
}
@@ -1700,7 +1917,7 @@ static const size_t MaxOptWidth = 8; // arbitrary spacing for printOptionDiff
void generic_parser_base::printGenericOptionDiff(
const Option &O, const GenericOptionValue &Value,
const GenericOptionValue &Default, size_t GlobalWidth) const {
- outs() << " -" << O.ArgStr;
+ outs() << " " << PrintArg(O.ArgStr);
outs().indent(GlobalWidth - O.ArgStr.size());
unsigned NumOpts = getNumOptions();
@@ -1750,6 +1967,7 @@ PRINT_OPT_DIFF(bool)
PRINT_OPT_DIFF(boolOrDefault)
PRINT_OPT_DIFF(int)
PRINT_OPT_DIFF(unsigned)
+PRINT_OPT_DIFF(unsigned long)
PRINT_OPT_DIFF(unsigned long long)
PRINT_OPT_DIFF(double)
PRINT_OPT_DIFF(float)
@@ -1919,7 +2137,7 @@ public:
printSubCommands(Subs, MaxSubLen);
outs() << "\n";
outs() << " Type \"" << GlobalParser->ProgramName
- << " <subcommand> -help\" to get more help on a specific "
+ << " <subcommand> --help\" to get more help on a specific "
"subcommand";
}
@@ -1986,9 +2204,11 @@ protected:
// options within categories will also be alphabetically sorted.
for (size_t I = 0, E = Opts.size(); I != E; ++I) {
Option *Opt = Opts[I].second;
- assert(CategorizedOptions.count(Opt->Category) > 0 &&
- "Option has an unregistered category");
- CategorizedOptions[Opt->Category].push_back(Opt);
+ for (auto &Cat : Opt->Categories) {
+ assert(CategorizedOptions.count(Cat) > 0 &&
+ "Option has an unregistered category");
+ CategorizedOptions[Cat].push_back(Opt);
+ }
}
// Now do printing.
@@ -1996,7 +2216,7 @@ protected:
Category = SortedCategories.begin(),
E = SortedCategories.end();
Category != E; ++Category) {
- // Hide empty categories for -help, but show for -help-hidden.
+ // Hide empty categories for --help, but show for --help-hidden.
const auto &CategoryOptions = CategorizedOptions[*Category];
bool IsEmptyCategory = CategoryOptions.empty();
if (!ShowHidden && IsEmptyCategory)
@@ -2012,7 +2232,7 @@ protected:
else
outs() << "\n";
- // When using -help-hidden explicitly state if the category has no
+ // When using --help-hidden explicitly state if the category has no
// options associated with it.
if (IsEmptyCategory) {
outs() << " This option category has no options.\n";
@@ -2062,11 +2282,11 @@ static HelpPrinterWrapper WrappedHiddenPrinter(UncategorizedHiddenPrinter,
static cl::OptionCategory GenericCategory("Generic Options");
// Define uncategorized help printers.
-// -help-list is hidden by default because if Option categories are being used
-// then -help behaves the same as -help-list.
+// --help-list is hidden by default because if Option categories are being used
+// then --help behaves the same as --help-list.
static cl::opt<HelpPrinter, true, parser<bool>> HLOp(
"help-list",
- cl::desc("Display list of available options (-help-list-hidden for more)"),
+ cl::desc("Display list of available options (--help-list-hidden for more)"),
cl::location(UncategorizedNormalPrinter), cl::Hidden, cl::ValueDisallowed,
cl::cat(GenericCategory), cl::sub(*AllSubCommands));
@@ -2080,10 +2300,13 @@ static cl::opt<HelpPrinter, true, parser<bool>>
// behaviour at runtime depending on whether one or more Option categories have
// been declared.
static cl::opt<HelpPrinterWrapper, true, parser<bool>>
- HOp("help", cl::desc("Display available options (-help-hidden for more)"),
+ HOp("help", cl::desc("Display available options (--help-hidden for more)"),
cl::location(WrappedNormalPrinter), cl::ValueDisallowed,
cl::cat(GenericCategory), cl::sub(*AllSubCommands));
+static cl::alias HOpA("h", cl::desc("Alias for --help"), cl::aliasopt(HOp),
+ cl::DefaultOption);
+
static cl::opt<HelpPrinterWrapper, true, parser<bool>>
HHOp("help-hidden", cl::desc("Display all available options"),
cl::location(WrappedHiddenPrinter), cl::Hidden, cl::ValueDisallowed,
@@ -2108,7 +2331,7 @@ void HelpPrinterWrapper::operator=(bool Value) {
// registered then it is useful to show the categorized help instead of
// uncategorized help.
if (GlobalParser->RegisteredOptionCategories.size() > 1) {
- // unhide -help-list option so user can have uncategorized output if they
+ // unhide --help-list option so user can have uncategorized output if they
// want it.
HLOp.setHiddenFlag(NotHidden);
@@ -2242,21 +2465,21 @@ cl::getRegisteredSubcommands() {
void cl::HideUnrelatedOptions(cl::OptionCategory &Category, SubCommand &Sub) {
for (auto &I : Sub.OptionsMap) {
- if (I.second->Category != &Category &&
- I.second->Category != &GenericCategory)
- I.second->setHiddenFlag(cl::ReallyHidden);
+ for (auto &Cat : I.second->Categories) {
+ if (Cat != &Category &&
+ Cat != &GenericCategory)
+ I.second->setHiddenFlag(cl::ReallyHidden);
+ }
}
}
void cl::HideUnrelatedOptions(ArrayRef<const cl::OptionCategory *> Categories,
SubCommand &Sub) {
- auto CategoriesBegin = Categories.begin();
- auto CategoriesEnd = Categories.end();
for (auto &I : Sub.OptionsMap) {
- if (std::find(CategoriesBegin, CategoriesEnd, I.second->Category) ==
- CategoriesEnd &&
- I.second->Category != &GenericCategory)
- I.second->setHiddenFlag(cl::ReallyHidden);
+ for (auto &Cat : I.second->Categories) {
+ if (find(Categories, Cat) == Categories.end() && Cat != &GenericCategory)
+ I.second->setHiddenFlag(cl::ReallyHidden);
+ }
}
}
diff --git a/lib/Support/Compression.cpp b/lib/Support/Compression.cpp
index 95261d4aad23..97d5ffaadf82 100644
--- a/lib/Support/Compression.cpp
+++ b/lib/Support/Compression.cpp
@@ -1,9 +1,8 @@
//===--- Compression.cpp - Compression implementation ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Support/ConvertUTF.cpp b/lib/Support/ConvertUTF.cpp
index 8f02fae4f558..e24a918c5c89 100644
--- a/lib/Support/ConvertUTF.cpp
+++ b/lib/Support/ConvertUTF.cpp
@@ -1,9 +1,8 @@
/*===--- ConvertUTF.c - Universal Character Names conversions ---------------===
*
- * The LLVM Compiler Infrastructure
- *
- * This file is distributed under the University of Illinois Open Source
- * License. See LICENSE.TXT for details.
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===------------------------------------------------------------------------=*/
/*
diff --git a/lib/Support/ConvertUTFWrapper.cpp b/lib/Support/ConvertUTFWrapper.cpp
index 6cb4f6376250..eb4ead6b46b4 100644
--- a/lib/Support/ConvertUTFWrapper.cpp
+++ b/lib/Support/ConvertUTFWrapper.cpp
@@ -1,9 +1,8 @@
//===-- ConvertUTFWrapper.cpp - Wrap ConvertUTF.h with clang data types -----===
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Support/CrashRecoveryContext.cpp b/lib/Support/CrashRecoveryContext.cpp
index be4b5c3e01c3..c2459256f8fe 100644
--- a/lib/Support/CrashRecoveryContext.cpp
+++ b/lib/Support/CrashRecoveryContext.cpp
@@ -1,9 +1,8 @@
//===--- CrashRecoveryContext.cpp - Crash Recovery ------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Support/DAGDeltaAlgorithm.cpp b/lib/Support/DAGDeltaAlgorithm.cpp
index bd9f98b0b82d..4675fe3a9401 100644
--- a/lib/Support/DAGDeltaAlgorithm.cpp
+++ b/lib/Support/DAGDeltaAlgorithm.cpp
@@ -1,9 +1,8 @@
//===--- DAGDeltaAlgorithm.cpp - A DAG Minimization Algorithm --*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//===----------------------------------------------------------------------===//
//
// The algorithm we use attempts to exploit the dependency information by
diff --git a/lib/Support/DJB.cpp b/lib/Support/DJB.cpp
index 905dcf1b7e81..f06af7dfde44 100644
--- a/lib/Support/DJB.cpp
+++ b/lib/Support/DJB.cpp
@@ -1,9 +1,8 @@
//===-- Support/DJB.cpp ---DJB Hash -----------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -58,29 +57,26 @@ static UTF32 foldCharDwarf(UTF32 C) {
return sys::unicode::foldCharSimple(C);
}
-static uint32_t caseFoldingDjbHashCharSlow(StringRef &Buffer, uint32_t H) {
- UTF32 C = chopOneUTF32(Buffer);
-
- C = foldCharDwarf(C);
-
- std::array<UTF8, UNI_MAX_UTF8_BYTES_PER_CODE_POINT> Storage;
- StringRef Folded = toUTF8(C, Storage);
- return djbHash(Folded, H);
+static Optional<uint32_t> fastCaseFoldingDjbHash(StringRef Buffer, uint32_t H) {
+ bool AllASCII = true;
+ for (unsigned char C : Buffer) {
+ H = H * 33 + ('A' <= C && C <= 'Z' ? C - 'A' + 'a' : C);
+ AllASCII &= C <= 0x7f;
+ }
+ if (AllASCII)
+ return H;
+ return None;
}
uint32_t llvm::caseFoldingDjbHash(StringRef Buffer, uint32_t H) {
+ if (Optional<uint32_t> Result = fastCaseFoldingDjbHash(Buffer, H))
+ return *Result;
+
+ std::array<UTF8, UNI_MAX_UTF8_BYTES_PER_CODE_POINT> Storage;
while (!Buffer.empty()) {
- unsigned char C = Buffer.front();
- if (LLVM_LIKELY(C <= 0x7f)) {
- // US-ASCII, encoded as one character in utf-8.
- // This is by far the most common case, so handle this specially.
- if (C >= 'A' && C <= 'Z')
- C = 'a' + (C - 'A'); // fold uppercase into lowercase
- H = (H << 5) + H + C;
- Buffer = Buffer.drop_front();
- continue;
- }
- H = caseFoldingDjbHashCharSlow(Buffer, H);
+ UTF32 C = foldCharDwarf(chopOneUTF32(Buffer));
+ StringRef Folded = toUTF8(C, Storage);
+ H = djbHash(Folded, H);
}
return H;
}
diff --git a/lib/Support/DataExtractor.cpp b/lib/Support/DataExtractor.cpp
index 0199b300ba72..673bbb4d06f4 100644
--- a/lib/Support/DataExtractor.cpp
+++ b/lib/Support/DataExtractor.cpp
@@ -1,9 +1,8 @@
//===-- DataExtractor.cpp -------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -11,6 +10,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Host.h"
#include "llvm/Support/SwapByteOrder.h"
+#include "llvm/Support/LEB128.h"
using namespace llvm;
template <typename T>
@@ -146,47 +146,29 @@ StringRef DataExtractor::getCStrRef(uint32_t *OffsetPtr) const {
}
uint64_t DataExtractor::getULEB128(uint32_t *offset_ptr) const {
- uint64_t result = 0;
- if (Data.empty())
+ assert(*offset_ptr <= Data.size());
+
+ const char *error;
+ unsigned bytes_read;
+ uint64_t result = decodeULEB128(
+ reinterpret_cast<const uint8_t *>(Data.data() + *offset_ptr), &bytes_read,
+ reinterpret_cast<const uint8_t *>(Data.data() + Data.size()), &error);
+ if (error)
return 0;
-
- unsigned shift = 0;
- uint32_t offset = *offset_ptr;
- uint8_t byte = 0;
-
- while (isValidOffset(offset)) {
- byte = Data[offset++];
- result |= uint64_t(byte & 0x7f) << shift;
- shift += 7;
- if ((byte & 0x80) == 0)
- break;
- }
-
- *offset_ptr = offset;
+ *offset_ptr += bytes_read;
return result;
}
int64_t DataExtractor::getSLEB128(uint32_t *offset_ptr) const {
- int64_t result = 0;
- if (Data.empty())
+ assert(*offset_ptr <= Data.size());
+
+ const char *error;
+ unsigned bytes_read;
+ int64_t result = decodeSLEB128(
+ reinterpret_cast<const uint8_t *>(Data.data() + *offset_ptr), &bytes_read,
+ reinterpret_cast<const uint8_t *>(Data.data() + Data.size()), &error);
+ if (error)
return 0;
-
- unsigned shift = 0;
- uint32_t offset = *offset_ptr;
- uint8_t byte = 0;
-
- while (isValidOffset(offset)) {
- byte = Data[offset++];
- result |= uint64_t(byte & 0x7f) << shift;
- shift += 7;
- if ((byte & 0x80) == 0)
- break;
- }
-
- // Sign bit of byte is 2nd high order bit (0x40)
- if (shift < 64 && (byte & 0x40))
- result |= -(1ULL << shift);
-
- *offset_ptr = offset;
+ *offset_ptr += bytes_read;
return result;
}
diff --git a/lib/Support/Debug.cpp b/lib/Support/Debug.cpp
index 1a70017fee32..737cd576ed80 100644
--- a/lib/Support/Debug.cpp
+++ b/lib/Support/Debug.cpp
@@ -1,9 +1,8 @@
//===-- Debug.cpp - An easy way to add debug output to your code ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Support/DeltaAlgorithm.cpp b/lib/Support/DeltaAlgorithm.cpp
index 50ea4e9ce0c6..6aee69f43405 100644
--- a/lib/Support/DeltaAlgorithm.cpp
+++ b/lib/Support/DeltaAlgorithm.cpp
@@ -1,9 +1,8 @@
//===--- DeltaAlgorithm.cpp - A Set Minimization Algorithm -----*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//===----------------------------------------------------------------------===//
#include "llvm/ADT/DeltaAlgorithm.h"
diff --git a/lib/Support/DynamicLibrary.cpp b/lib/Support/DynamicLibrary.cpp
index 530e92d99a90..d23716016fb2 100644
--- a/lib/Support/DynamicLibrary.cpp
+++ b/lib/Support/DynamicLibrary.cpp
@@ -1,9 +1,8 @@
//===-- DynamicLibrary.cpp - Runtime link/load libraries --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Support/Errno.cpp b/lib/Support/Errno.cpp
index 2149f21281d3..d18231c6ebf5 100644
--- a/lib/Support/Errno.cpp
+++ b/lib/Support/Errno.cpp
@@ -1,9 +1,8 @@
//===- Errno.cpp - errno support --------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -12,7 +11,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Support/Errno.h"
-#include "llvm/Config/config.h" // Get autoconf configuration settings
+#include "llvm/Config/config.h"
#include "llvm/Support/raw_ostream.h"
#include <string.h>
diff --git a/lib/Support/Error.cpp b/lib/Support/Error.cpp
index 30bfc3e6d2fb..72bc08af2ddb 100644
--- a/lib/Support/Error.cpp
+++ b/lib/Support/Error.cpp
@@ -1,9 +1,8 @@
//===----- lib/Support/Error.cpp - Error and associated utilities ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Support/ErrorHandling.cpp b/lib/Support/ErrorHandling.cpp
index 21712c5c039e..0f13f7a536f1 100644
--- a/lib/Support/ErrorHandling.cpp
+++ b/lib/Support/ErrorHandling.cpp
@@ -1,9 +1,8 @@
//===- lib/Support/ErrorHandling.cpp - Callbacks for errors ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -187,25 +186,13 @@ static void out_of_memory_new_handler() {
llvm::report_bad_alloc_error("Allocation failed");
}
-// Installs new handler that causes crash on allocation failure. It does not
-// need to be called explicitly, if this file is linked to application, because
-// in this case it is called during construction of 'new_handler_installer'.
+// Installs new handler that causes crash on allocation failure. It is called by
+// InitLLVM.
void llvm::install_out_of_memory_new_handler() {
- static bool out_of_memory_new_handler_installed = false;
- if (!out_of_memory_new_handler_installed) {
- std::set_new_handler(out_of_memory_new_handler);
- out_of_memory_new_handler_installed = true;
- }
+ std::new_handler old = std::set_new_handler(out_of_memory_new_handler);
+ (void)old;
+ assert(old == nullptr && "new-handler already installed");
}
-
-// Static object that causes installation of 'out_of_memory_new_handler' before
-// execution of 'main'.
-static class NewHandlerInstaller {
-public:
- NewHandlerInstaller() {
- install_out_of_memory_new_handler();
- }
-} new_handler_installer;
#endif
void llvm::llvm_unreachable_internal(const char *msg, const char *file,
diff --git a/lib/Support/FileCheck.cpp b/lib/Support/FileCheck.cpp
index 37986c96c081..e0f17787bdf8 100644
--- a/lib/Support/FileCheck.cpp
+++ b/lib/Support/FileCheck.cpp
@@ -1,9 +1,8 @@
//===- FileCheck.cpp - Check that File's Contents match what is expected --===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -25,18 +24,303 @@
using namespace llvm;
-/// Parses the given string into the Pattern.
-///
-/// \p Prefix provides which prefix is being matched, \p SM provides the
-/// SourceMgr used for error reports, and \p LineNumber is the line number in
-/// the input file from which the pattern string was read. Returns true in
-/// case of an error, false otherwise.
-bool FileCheckPattern::ParsePattern(StringRef PatternStr, StringRef Prefix,
- SourceMgr &SM, unsigned LineNumber,
- const FileCheckRequest &Req) {
+void FileCheckNumericVariable::setValue(uint64_t NewValue) {
+ assert(!Value && "Overwriting numeric variable's value is not allowed");
+ Value = NewValue;
+}
+
+void FileCheckNumericVariable::clearValue() {
+ if (!Value)
+ return;
+ Value = None;
+}
+
+Expected<uint64_t> FileCheckNumericVariableUse::eval() const {
+ Optional<uint64_t> Value = NumericVariable->getValue();
+ if (Value)
+ return *Value;
+ return make_error<FileCheckUndefVarError>(Name);
+}
+
+Expected<uint64_t> FileCheckASTBinop::eval() const {
+ Expected<uint64_t> LeftOp = LeftOperand->eval();
+ Expected<uint64_t> RightOp = RightOperand->eval();
+
+ // Bubble up any error (e.g. undefined variables) in the recursive
+ // evaluation.
+ if (!LeftOp || !RightOp) {
+ Error Err = Error::success();
+ if (!LeftOp)
+ Err = joinErrors(std::move(Err), LeftOp.takeError());
+ if (!RightOp)
+ Err = joinErrors(std::move(Err), RightOp.takeError());
+ return std::move(Err);
+ }
+
+ return EvalBinop(*LeftOp, *RightOp);
+}
+
+Expected<std::string> FileCheckNumericSubstitution::getResult() const {
+ Expected<uint64_t> EvaluatedValue = ExpressionAST->eval();
+ if (!EvaluatedValue)
+ return EvaluatedValue.takeError();
+ return utostr(*EvaluatedValue);
+}
+
+Expected<std::string> FileCheckStringSubstitution::getResult() const {
+ // Look up the value and escape it so that we can put it into the regex.
+ Expected<StringRef> VarVal = Context->getPatternVarValue(FromStr);
+ if (!VarVal)
+ return VarVal.takeError();
+ return Regex::escape(*VarVal);
+}
+
+bool FileCheckPattern::isValidVarNameStart(char C) {
+ return C == '_' || isalpha(C);
+}
+
+Expected<FileCheckPattern::VariableProperties>
+FileCheckPattern::parseVariable(StringRef &Str, const SourceMgr &SM) {
+ if (Str.empty())
+ return FileCheckErrorDiagnostic::get(SM, Str, "empty variable name");
+
+ bool ParsedOneChar = false;
+ unsigned I = 0;
+ bool IsPseudo = Str[0] == '@';
+
+ // Global vars start with '$'.
+ if (Str[0] == '$' || IsPseudo)
+ ++I;
+
+ for (unsigned E = Str.size(); I != E; ++I) {
+ if (!ParsedOneChar && !isValidVarNameStart(Str[I]))
+ return FileCheckErrorDiagnostic::get(SM, Str, "invalid variable name");
+
+ // Variable names are composed of alphanumeric characters and underscores.
+ if (Str[I] != '_' && !isalnum(Str[I]))
+ break;
+ ParsedOneChar = true;
+ }
+
+ StringRef Name = Str.take_front(I);
+ Str = Str.substr(I);
+ return VariableProperties {Name, IsPseudo};
+}
+
+// StringRef holding all characters considered as horizontal whitespaces by
+// FileCheck input canonicalization.
+StringRef SpaceChars = " \t";
+
+// Parsing helper function that strips the first character in S and returns it.
+static char popFront(StringRef &S) {
+ char C = S.front();
+ S = S.drop_front();
+ return C;
+}
+
+char FileCheckUndefVarError::ID = 0;
+char FileCheckErrorDiagnostic::ID = 0;
+char FileCheckNotFoundError::ID = 0;
+
+Expected<FileCheckNumericVariable *>
+FileCheckPattern::parseNumericVariableDefinition(
+ StringRef &Expr, FileCheckPatternContext *Context,
+ Optional<size_t> LineNumber, const SourceMgr &SM) {
+ Expected<VariableProperties> ParseVarResult = parseVariable(Expr, SM);
+ if (!ParseVarResult)
+ return ParseVarResult.takeError();
+ StringRef Name = ParseVarResult->Name;
+
+ if (ParseVarResult->IsPseudo)
+ return FileCheckErrorDiagnostic::get(
+ SM, Name, "definition of pseudo numeric variable unsupported");
+
+ // Detect collisions between string and numeric variables when the latter
+ // is created later than the former.
+ if (Context->DefinedVariableTable.find(Name) !=
+ Context->DefinedVariableTable.end())
+ return FileCheckErrorDiagnostic::get(
+ SM, Name, "string variable with name '" + Name + "' already exists");
+
+ Expr = Expr.ltrim(SpaceChars);
+ if (!Expr.empty())
+ return FileCheckErrorDiagnostic::get(
+ SM, Expr, "unexpected characters after numeric variable name");
+
+ FileCheckNumericVariable *DefinedNumericVariable;
+ auto VarTableIter = Context->GlobalNumericVariableTable.find(Name);
+ if (VarTableIter != Context->GlobalNumericVariableTable.end())
+ DefinedNumericVariable = VarTableIter->second;
+ else
+ DefinedNumericVariable = Context->makeNumericVariable(Name, LineNumber);
+
+ return DefinedNumericVariable;
+}
+
+Expected<std::unique_ptr<FileCheckNumericVariableUse>>
+FileCheckPattern::parseNumericVariableUse(StringRef Name, bool IsPseudo,
+ const SourceMgr &SM) const {
+ if (IsPseudo && !Name.equals("@LINE"))
+ return FileCheckErrorDiagnostic::get(
+ SM, Name, "invalid pseudo numeric variable '" + Name + "'");
+
+ // Numeric variable definitions and uses are parsed in the order in which
+ // they appear in the CHECK patterns. For each definition, the pointer to the
+ // class instance of the corresponding numeric variable definition is stored
+ // in GlobalNumericVariableTable in parsePattern. Therefore, if the pointer
+ // we get below is null, it means no such variable was defined before. When
+ // that happens, we create a dummy variable so that parsing can continue. All
+ // uses of undefined variables, whether string or numeric, are then diagnosed
+ // in printSubstitutions() after failing to match.
+ auto VarTableIter = Context->GlobalNumericVariableTable.find(Name);
+ FileCheckNumericVariable *NumericVariable;
+ if (VarTableIter != Context->GlobalNumericVariableTable.end())
+ NumericVariable = VarTableIter->second;
+ else {
+ NumericVariable = Context->makeNumericVariable(Name);
+ Context->GlobalNumericVariableTable[Name] = NumericVariable;
+ }
+
+ Optional<size_t> DefLineNumber = NumericVariable->getDefLineNumber();
+ if (DefLineNumber && LineNumber && *DefLineNumber == *LineNumber)
+ return FileCheckErrorDiagnostic::get(
+ SM, Name,
+ "numeric variable '" + Name + "' defined on the same line as used");
+
+ return llvm::make_unique<FileCheckNumericVariableUse>(Name, NumericVariable);
+}
+
+Expected<std::unique_ptr<FileCheckExpressionAST>>
+FileCheckPattern::parseNumericOperand(StringRef &Expr, AllowedOperand AO,
+ const SourceMgr &SM) const {
+ if (AO == AllowedOperand::LineVar || AO == AllowedOperand::Any) {
+ // Try to parse as a numeric variable use.
+ Expected<FileCheckPattern::VariableProperties> ParseVarResult =
+ parseVariable(Expr, SM);
+ if (ParseVarResult)
+ return parseNumericVariableUse(ParseVarResult->Name,
+ ParseVarResult->IsPseudo, SM);
+ if (AO == AllowedOperand::LineVar)
+ return ParseVarResult.takeError();
+ // Ignore the error and retry parsing as a literal.
+ consumeError(ParseVarResult.takeError());
+ }
+
+ // Otherwise, parse it as a literal.
+ uint64_t LiteralValue;
+ if (!Expr.consumeInteger(/*Radix=*/10, LiteralValue))
+ return llvm::make_unique<FileCheckExpressionLiteral>(LiteralValue);
+
+ return FileCheckErrorDiagnostic::get(SM, Expr,
+ "invalid operand format '" + Expr + "'");
+}
+
+static uint64_t add(uint64_t LeftOp, uint64_t RightOp) {
+ return LeftOp + RightOp;
+}
+
+static uint64_t sub(uint64_t LeftOp, uint64_t RightOp) {
+ return LeftOp - RightOp;
+}
+
+Expected<std::unique_ptr<FileCheckExpressionAST>>
+FileCheckPattern::parseBinop(StringRef &Expr,
+ std::unique_ptr<FileCheckExpressionAST> LeftOp,
+ bool IsLegacyLineExpr, const SourceMgr &SM) const {
+ Expr = Expr.ltrim(SpaceChars);
+ if (Expr.empty())
+ return std::move(LeftOp);
+
+ // Check if this is a supported operation and select a function to perform
+ // it.
+ SMLoc OpLoc = SMLoc::getFromPointer(Expr.data());
+ char Operator = popFront(Expr);
+ binop_eval_t EvalBinop;
+ switch (Operator) {
+ case '+':
+ EvalBinop = add;
+ break;
+ case '-':
+ EvalBinop = sub;
+ break;
+ default:
+ return FileCheckErrorDiagnostic::get(
+ SM, OpLoc, Twine("unsupported operation '") + Twine(Operator) + "'");
+ }
+
+ // Parse right operand.
+ Expr = Expr.ltrim(SpaceChars);
+ if (Expr.empty())
+ return FileCheckErrorDiagnostic::get(SM, Expr,
+ "missing operand in expression");
+ // The second operand in a legacy @LINE expression is always a literal.
+ AllowedOperand AO =
+ IsLegacyLineExpr ? AllowedOperand::Literal : AllowedOperand::Any;
+ Expected<std::unique_ptr<FileCheckExpressionAST>> RightOpResult =
+ parseNumericOperand(Expr, AO, SM);
+ if (!RightOpResult)
+ return RightOpResult;
+
+ Expr = Expr.ltrim(SpaceChars);
+ return llvm::make_unique<FileCheckASTBinop>(EvalBinop, std::move(LeftOp),
+ std::move(*RightOpResult));
+}
+
+Expected<std::unique_ptr<FileCheckExpressionAST>>
+FileCheckPattern::parseNumericSubstitutionBlock(
+ StringRef Expr,
+ Optional<FileCheckNumericVariable *> &DefinedNumericVariable,
+ bool IsLegacyLineExpr, const SourceMgr &SM) const {
+ // Parse the numeric variable definition.
+ DefinedNumericVariable = None;
+ size_t DefEnd = Expr.find(':');
+ if (DefEnd != StringRef::npos) {
+ StringRef DefExpr = Expr.substr(0, DefEnd);
+ StringRef UseExpr = Expr.substr(DefEnd + 1);
+
+ UseExpr = UseExpr.ltrim(SpaceChars);
+ if (!UseExpr.empty())
+ return FileCheckErrorDiagnostic::get(
+ SM, UseExpr,
+ "unexpected string after variable definition: '" + UseExpr + "'");
+
+ DefExpr = DefExpr.ltrim(SpaceChars);
+ Expected<FileCheckNumericVariable *> ParseResult =
+ parseNumericVariableDefinition(DefExpr, Context, LineNumber, SM);
+ if (!ParseResult)
+ return ParseResult.takeError();
+ DefinedNumericVariable = *ParseResult;
+
+ return nullptr;
+ }
+
+ // Parse the expression itself.
+ Expr = Expr.ltrim(SpaceChars);
+ // The first operand in a legacy @LINE expression is always the @LINE pseudo
+ // variable.
+ AllowedOperand AO =
+ IsLegacyLineExpr ? AllowedOperand::LineVar : AllowedOperand::Any;
+ Expected<std::unique_ptr<FileCheckExpressionAST>> ParseResult =
+ parseNumericOperand(Expr, AO, SM);
+ while (ParseResult && !Expr.empty()) {
+ ParseResult =
+ parseBinop(Expr, std::move(*ParseResult), IsLegacyLineExpr, SM);
+ // Legacy @LINE expressions only allow 2 operands.
+ if (ParseResult && IsLegacyLineExpr && !Expr.empty())
+ return FileCheckErrorDiagnostic::get(
+ SM, Expr,
+ "unexpected characters at end of expression '" + Expr + "'");
+ }
+ if (!ParseResult)
+ return ParseResult;
+ return std::move(*ParseResult);
+}
+
+bool FileCheckPattern::parsePattern(StringRef PatternStr, StringRef Prefix,
+ SourceMgr &SM,
+ const FileCheckRequest &Req) {
bool MatchFullLinesHere = Req.MatchFullLines && CheckTy != Check::CheckNot;
- this->LineNumber = LineNumber;
PatternLoc = SMLoc::getFromPointer(PatternStr.data());
if (!(Req.NoCanonicalizeWhiteSpace && Req.MatchFullLines))
@@ -112,95 +396,164 @@ bool FileCheckPattern::ParsePattern(StringRef PatternStr, StringRef Prefix,
continue;
}
- // Named RegEx matches. These are of two forms: [[foo:.*]] which matches .*
- // (or some other regex) and assigns it to the FileCheck variable 'foo'. The
- // second form is [[foo]] which is a reference to foo. The variable name
- // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject
- // it. This is to catch some common errors.
+ // String and numeric substitution blocks. String substitution blocks come
+ // in two forms: [[foo:.*]] and [[foo]]. The former matches .* (or some
+ // other regex) and assigns it to the string variable 'foo'. The latter
+ // substitutes foo's value. Numeric substitution blocks work the same way
+ // as string ones, but start with a '#' sign after the double brackets.
+ // Both string and numeric variable names must satisfy the regular
+ // expression "[a-zA-Z_][0-9a-zA-Z_]*" to be valid, as this helps catch
+ // some common errors.
if (PatternStr.startswith("[[")) {
+ StringRef UnparsedPatternStr = PatternStr.substr(2);
// Find the closing bracket pair ending the match. End is going to be an
// offset relative to the beginning of the match string.
- size_t End = FindRegexVarEnd(PatternStr.substr(2), SM);
+ size_t End = FindRegexVarEnd(UnparsedPatternStr, SM);
+ StringRef MatchStr = UnparsedPatternStr.substr(0, End);
+ bool IsNumBlock = MatchStr.consume_front("#");
if (End == StringRef::npos) {
SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
SourceMgr::DK_Error,
- "invalid named regex reference, no ]] found");
+ "Invalid substitution block, no ]] found");
return true;
}
+ // Strip the substitution block we are parsing. End points to the start
+ // of the "]]" closing the expression so account for it in computing the
+ // index of the first unparsed character.
+ PatternStr = UnparsedPatternStr.substr(End + 2);
+
+ bool IsDefinition = false;
+ // Whether the substitution block is a legacy use of @LINE with string
+ // substitution block syntax.
+ bool IsLegacyLineExpr = false;
+ StringRef DefName;
+ StringRef SubstStr;
+ StringRef MatchRegexp;
+ size_t SubstInsertIdx = RegExStr.size();
+
+ // Parse string variable or legacy @LINE expression.
+ if (!IsNumBlock) {
+ size_t VarEndIdx = MatchStr.find(":");
+ size_t SpacePos = MatchStr.substr(0, VarEndIdx).find_first_of(" \t");
+ if (SpacePos != StringRef::npos) {
+ SM.PrintMessage(SMLoc::getFromPointer(MatchStr.data() + SpacePos),
+ SourceMgr::DK_Error, "unexpected whitespace");
+ return true;
+ }
- StringRef MatchStr = PatternStr.substr(2, End);
- PatternStr = PatternStr.substr(End + 4);
-
- // Get the regex name (e.g. "foo").
- size_t NameEnd = MatchStr.find(':');
- StringRef Name = MatchStr.substr(0, NameEnd);
+ // Get the name (e.g. "foo") and verify it is well formed.
+ StringRef OrigMatchStr = MatchStr;
+ Expected<FileCheckPattern::VariableProperties> ParseVarResult =
+ parseVariable(MatchStr, SM);
+ if (!ParseVarResult) {
+ logAllUnhandledErrors(ParseVarResult.takeError(), errs());
+ return true;
+ }
+ StringRef Name = ParseVarResult->Name;
+ bool IsPseudo = ParseVarResult->IsPseudo;
- if (Name.empty()) {
- SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
- "invalid name in named regex: empty name");
- return true;
- }
+ IsDefinition = (VarEndIdx != StringRef::npos);
+ if (IsDefinition) {
+ if ((IsPseudo || !MatchStr.consume_front(":"))) {
+ SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
+ SourceMgr::DK_Error,
+ "invalid name in string variable definition");
+ return true;
+ }
- // Verify that the name/expression is well formed. FileCheck currently
- // supports @LINE, @LINE+number, @LINE-number expressions. The check here
- // is relaxed, more strict check is performed in \c EvaluateExpression.
- bool IsExpression = false;
- for (unsigned i = 0, e = Name.size(); i != e; ++i) {
- if (i == 0) {
- if (Name[i] == '$') // Global vars start with '$'
- continue;
- if (Name[i] == '@') {
- if (NameEnd != StringRef::npos) {
- SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
- SourceMgr::DK_Error,
- "invalid name in named regex definition");
- return true;
- }
- IsExpression = true;
- continue;
+ // Detect collisions between string and numeric variables when the
+ // former is created later than the latter.
+ if (Context->GlobalNumericVariableTable.find(Name) !=
+ Context->GlobalNumericVariableTable.end()) {
+ SM.PrintMessage(
+ SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
+ "numeric variable with name '" + Name + "' already exists");
+ return true;
}
- }
- if (Name[i] != '_' && !isalnum(Name[i]) &&
- (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) {
- SM.PrintMessage(SMLoc::getFromPointer(Name.data() + i),
- SourceMgr::DK_Error, "invalid name in named regex");
- return true;
+ DefName = Name;
+ MatchRegexp = MatchStr;
+ } else {
+ if (IsPseudo) {
+ MatchStr = OrigMatchStr;
+ IsLegacyLineExpr = IsNumBlock = true;
+ } else
+ SubstStr = Name;
}
}
- // Name can't start with a digit.
- if (isdigit(static_cast<unsigned char>(Name[0]))) {
- SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
- "invalid name in named regex");
- return true;
+ // Parse numeric substitution block.
+ std::unique_ptr<FileCheckExpressionAST> ExpressionAST;
+ Optional<FileCheckNumericVariable *> DefinedNumericVariable;
+ if (IsNumBlock) {
+ Expected<std::unique_ptr<FileCheckExpressionAST>> ParseResult =
+ parseNumericSubstitutionBlock(MatchStr, DefinedNumericVariable,
+ IsLegacyLineExpr, SM);
+ if (!ParseResult) {
+ logAllUnhandledErrors(ParseResult.takeError(), errs());
+ return true;
+ }
+ ExpressionAST = std::move(*ParseResult);
+ if (DefinedNumericVariable) {
+ IsDefinition = true;
+ DefName = (*DefinedNumericVariable)->getName();
+ MatchRegexp = StringRef("[0-9]+");
+ } else
+ SubstStr = MatchStr;
}
- // Handle [[foo]].
- if (NameEnd == StringRef::npos) {
- // Handle variables that were defined earlier on the same line by
- // emitting a backreference.
- if (VariableDefs.find(Name) != VariableDefs.end()) {
- unsigned VarParenNum = VariableDefs[Name];
- if (VarParenNum < 1 || VarParenNum > 9) {
- SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
+ // Handle substitutions: [[foo]] and [[#<foo expr>]].
+ if (!IsDefinition) {
+ // Handle substitution of string variables that were defined earlier on
+ // the same line by emitting a backreference. Expressions do not
+ // support substituting a numeric variable defined on the same line.
+ if (!IsNumBlock && VariableDefs.find(SubstStr) != VariableDefs.end()) {
+ unsigned CaptureParenGroup = VariableDefs[SubstStr];
+ if (CaptureParenGroup < 1 || CaptureParenGroup > 9) {
+ SM.PrintMessage(SMLoc::getFromPointer(SubstStr.data()),
SourceMgr::DK_Error,
"Can't back-reference more than 9 variables");
return true;
}
- AddBackrefToRegEx(VarParenNum);
+ AddBackrefToRegEx(CaptureParenGroup);
} else {
- VariableUses.push_back(std::make_pair(Name, RegExStr.size()));
+ // Handle substitution of string variables ([[<var>]]) defined in
+ // previous CHECK patterns, and substitution of expressions.
+ FileCheckSubstitution *Substitution =
+ IsNumBlock
+ ? Context->makeNumericSubstitution(
+ SubstStr, std::move(ExpressionAST), SubstInsertIdx)
+ : Context->makeStringSubstitution(SubstStr, SubstInsertIdx);
+ Substitutions.push_back(Substitution);
}
continue;
}
- // Handle [[foo:.*]].
- VariableDefs[Name] = CurParen;
+ // Handle variable definitions: [[<def>:(...)]] and
+ // [[#(...)<def>:(...)]].
+ if (IsNumBlock) {
+ FileCheckNumericVariableMatch NumericVariableDefinition = {
+ *DefinedNumericVariable, CurParen};
+ NumericVariableDefs[DefName] = NumericVariableDefinition;
+ // This store is done here rather than in match() to allow
+ // parseNumericVariableUse() to get the pointer to the class instance
+ // of the right variable definition corresponding to a given numeric
+ // variable use.
+ Context->GlobalNumericVariableTable[DefName] = *DefinedNumericVariable;
+ } else {
+ VariableDefs[DefName] = CurParen;
+ // Mark the string variable as defined to detect collisions between
+ // string and numeric variables in parseNumericVariableUse() and
+ // DefineCmdlineVariables() when the latter is created later than the
+ // former. We cannot reuse GlobalVariableTable for this by populating
+ // it with an empty string since we would then lose the ability to
+ // detect the use of an undefined variable in match().
+ Context->DefinedVariableTable[DefName] = true;
+ }
RegExStr += '(';
++CurParen;
- if (AddRegExToRegEx(MatchStr.substr(NameEnd + 1), CurParen, SM))
+ if (AddRegExToRegEx(MatchRegexp, CurParen, SM))
return true;
RegExStr += ')';
@@ -243,37 +596,8 @@ void FileCheckPattern::AddBackrefToRegEx(unsigned BackrefNum) {
RegExStr += Backref;
}
-/// Evaluates expression and stores the result to \p Value.
-///
-/// Returns true on success and false when the expression has invalid syntax.
-bool FileCheckPattern::EvaluateExpression(StringRef Expr, std::string &Value) const {
- // The only supported expression is @LINE([\+-]\d+)?
- if (!Expr.startswith("@LINE"))
- return false;
- Expr = Expr.substr(StringRef("@LINE").size());
- int Offset = 0;
- if (!Expr.empty()) {
- if (Expr[0] == '+')
- Expr = Expr.substr(1);
- else if (Expr[0] != '-')
- return false;
- if (Expr.getAsInteger(10, Offset))
- return false;
- }
- Value = llvm::itostr(LineNumber + Offset);
- return true;
-}
-
-/// Matches the pattern string against the input buffer \p Buffer
-///
-/// This returns the position that is matched or npos if there is no match. If
-/// there is a match, the size of the matched string is returned in \p
-/// MatchLen.
-///
-/// The \p VariableTable StringMap provides the current values of filecheck
-/// variables and is updated if this match defines new values.
-size_t FileCheckPattern::Match(StringRef Buffer, size_t &MatchLen,
- StringMap<StringRef> &VariableTable) const {
+Expected<size_t> FileCheckPattern::match(StringRef Buffer, size_t &MatchLen,
+ const SourceMgr &SM) const {
// If this is the EOF pattern, match it immediately.
if (CheckTy == Check::CheckEOF) {
MatchLen = 0;
@@ -283,58 +607,76 @@ size_t FileCheckPattern::Match(StringRef Buffer, size_t &MatchLen,
// If this is a fixed string pattern, just match it now.
if (!FixedStr.empty()) {
MatchLen = FixedStr.size();
- return Buffer.find(FixedStr);
+ size_t Pos = Buffer.find(FixedStr);
+ if (Pos == StringRef::npos)
+ return make_error<FileCheckNotFoundError>();
+ return Pos;
}
// Regex match.
- // If there are variable uses, we need to create a temporary string with the
+ // If there are substitutions, we need to create a temporary string with the
// actual value.
StringRef RegExToMatch = RegExStr;
std::string TmpStr;
- if (!VariableUses.empty()) {
+ if (!Substitutions.empty()) {
TmpStr = RegExStr;
-
- unsigned InsertOffset = 0;
- for (const auto &VariableUse : VariableUses) {
- std::string Value;
-
- if (VariableUse.first[0] == '@') {
- if (!EvaluateExpression(VariableUse.first, Value))
- return StringRef::npos;
- } else {
- StringMap<StringRef>::iterator it =
- VariableTable.find(VariableUse.first);
- // If the variable is undefined, return an error.
- if (it == VariableTable.end())
- return StringRef::npos;
-
- // Look up the value and escape it so that we can put it into the regex.
- Value += Regex::escape(it->second);
+ if (LineNumber)
+ Context->LineVariable->setValue(*LineNumber);
+
+ size_t InsertOffset = 0;
+ // Substitute all string variables and expressions whose values are only
+ // now known. Use of string variables defined on the same line are handled
+ // by back-references.
+ for (const auto &Substitution : Substitutions) {
+ // Substitute and check for failure (e.g. use of undefined variable).
+ Expected<std::string> Value = Substitution->getResult();
+ if (!Value) {
+ Context->LineVariable->clearValue();
+ return Value.takeError();
}
// Plop it into the regex at the adjusted offset.
- TmpStr.insert(TmpStr.begin() + VariableUse.second + InsertOffset,
- Value.begin(), Value.end());
- InsertOffset += Value.size();
+ TmpStr.insert(TmpStr.begin() + Substitution->getIndex() + InsertOffset,
+ Value->begin(), Value->end());
+ InsertOffset += Value->size();
}
// Match the newly constructed regex.
RegExToMatch = TmpStr;
+ Context->LineVariable->clearValue();
}
SmallVector<StringRef, 4> MatchInfo;
if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo))
- return StringRef::npos;
+ return make_error<FileCheckNotFoundError>();
// Successful regex match.
assert(!MatchInfo.empty() && "Didn't get any match");
StringRef FullMatch = MatchInfo[0];
- // If this defines any variables, remember their values.
+ // If this defines any string variables, remember their values.
for (const auto &VariableDef : VariableDefs) {
assert(VariableDef.second < MatchInfo.size() && "Internal paren error");
- VariableTable[VariableDef.first] = MatchInfo[VariableDef.second];
+ Context->GlobalVariableTable[VariableDef.first] =
+ MatchInfo[VariableDef.second];
+ }
+
+ // If this defines any numeric variables, remember their values.
+ for (const auto &NumericVariableDef : NumericVariableDefs) {
+ const FileCheckNumericVariableMatch &NumericVariableMatch =
+ NumericVariableDef.getValue();
+ unsigned CaptureParenGroup = NumericVariableMatch.CaptureParenGroup;
+ assert(CaptureParenGroup < MatchInfo.size() && "Internal paren error");
+ FileCheckNumericVariable *DefinedNumericVariable =
+ NumericVariableMatch.DefinedNumericVariable;
+
+ StringRef MatchedValue = MatchInfo[CaptureParenGroup];
+ uint64_t Val;
+ if (MatchedValue.getAsInteger(10, Val))
+ return FileCheckErrorDiagnostic::get(SM, MatchedValue,
+ "Unable to represent numeric value");
+ DefinedNumericVariable->setValue(Val);
}
// Like CHECK-NEXT, CHECK-EMPTY's match range is considered to start after
@@ -345,13 +687,7 @@ size_t FileCheckPattern::Match(StringRef Buffer, size_t &MatchLen,
return FullMatch.data() - Buffer.data() + MatchStartSkip;
}
-
-/// Computes an arbitrary estimate for the quality of matching this pattern at
-/// the start of \p Buffer; a distance of zero should correspond to a perfect
-/// match.
-unsigned
-FileCheckPattern::ComputeMatchDistance(StringRef Buffer,
- const StringMap<StringRef> &VariableTable) const {
+unsigned FileCheckPattern::computeMatchDistance(StringRef Buffer) const {
// Just compute the number of matching characters. For regular expressions, we
// just compare against the regex itself and hope for the best.
//
@@ -368,38 +704,36 @@ FileCheckPattern::ComputeMatchDistance(StringRef Buffer,
return BufferPrefix.edit_distance(ExampleString);
}
-void FileCheckPattern::PrintVariableUses(const SourceMgr &SM, StringRef Buffer,
- const StringMap<StringRef> &VariableTable,
- SMRange MatchRange) const {
- // If this was a regular expression using variables, print the current
- // variable values.
- if (!VariableUses.empty()) {
- for (const auto &VariableUse : VariableUses) {
+void FileCheckPattern::printSubstitutions(const SourceMgr &SM, StringRef Buffer,
+ SMRange MatchRange) const {
+ // Print what we know about substitutions.
+ if (!Substitutions.empty()) {
+ for (const auto &Substitution : Substitutions) {
SmallString<256> Msg;
raw_svector_ostream OS(Msg);
- StringRef Var = VariableUse.first;
- if (Var[0] == '@') {
- std::string Value;
- if (EvaluateExpression(Var, Value)) {
- OS << "with expression \"";
- OS.write_escaped(Var) << "\" equal to \"";
- OS.write_escaped(Value) << "\"";
- } else {
- OS << "uses incorrect expression \"";
- OS.write_escaped(Var) << "\"";
- }
+ Expected<std::string> MatchedValue = Substitution->getResult();
+
+ // Substitution failed or is not known at match time, print the undefined
+ // variables it uses.
+ if (!MatchedValue) {
+ bool UndefSeen = false;
+ handleAllErrors(MatchedValue.takeError(),
+ [](const FileCheckNotFoundError &E) {},
+ // Handled in PrintNoMatch().
+ [](const FileCheckErrorDiagnostic &E) {},
+ [&](const FileCheckUndefVarError &E) {
+ if (!UndefSeen) {
+ OS << "uses undefined variable(s):";
+ UndefSeen = true;
+ }
+ OS << " ";
+ E.log(OS);
+ });
} else {
- StringMap<StringRef>::const_iterator it = VariableTable.find(Var);
-
- // Check for undefined variable references.
- if (it == VariableTable.end()) {
- OS << "uses undefined variable \"";
- OS.write_escaped(Var) << "\"";
- } else {
- OS << "with variable \"";
- OS.write_escaped(Var) << "\" equal to \"";
- OS.write_escaped(it->second) << "\"";
- }
+ // Substitution succeeded. Print substituted value.
+ OS << "with \"";
+ OS.write_escaped(Substitution->getFromString()) << "\" equal to \"";
+ OS.write_escaped(*MatchedValue) << "\"";
}
if (MatchRange.isValid())
@@ -430,9 +764,8 @@ static SMRange ProcessMatchResult(FileCheckDiag::MatchType MatchTy,
return Range;
}
-void FileCheckPattern::PrintFuzzyMatch(
+void FileCheckPattern::printFuzzyMatch(
const SourceMgr &SM, StringRef Buffer,
- const StringMap<StringRef> &VariableTable,
std::vector<FileCheckDiag> *Diags) const {
// Attempt to find the closest/best fuzzy match. Usually an error happens
// because some string in the output didn't exactly match. In these cases, we
@@ -454,7 +787,7 @@ void FileCheckPattern::PrintFuzzyMatch(
// Compute the "quality" of this match as an arbitrary combination of the
// match distance and the number of lines skipped to get to this match.
- unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable);
+ unsigned Distance = computeMatchDistance(Buffer.substr(i));
double Quality = Distance + (NumLinesForward / 100.);
if (Quality < BestQuality || Best == StringRef::npos) {
@@ -478,11 +811,39 @@ void FileCheckPattern::PrintFuzzyMatch(
}
}
-/// Finds the closing sequence of a regex variable usage or definition.
-///
-/// \p Str has to point in the beginning of the definition (right after the
-/// opening sequence). Returns the offset of the closing sequence within Str,
-/// or npos if it was not found.
+Expected<StringRef>
+FileCheckPatternContext::getPatternVarValue(StringRef VarName) {
+ auto VarIter = GlobalVariableTable.find(VarName);
+ if (VarIter == GlobalVariableTable.end())
+ return make_error<FileCheckUndefVarError>(VarName);
+
+ return VarIter->second;
+}
+
+template <class... Types>
+FileCheckNumericVariable *
+FileCheckPatternContext::makeNumericVariable(Types... args) {
+ NumericVariables.push_back(
+ llvm::make_unique<FileCheckNumericVariable>(args...));
+ return NumericVariables.back().get();
+}
+
+FileCheckSubstitution *
+FileCheckPatternContext::makeStringSubstitution(StringRef VarName,
+ size_t InsertIdx) {
+ Substitutions.push_back(
+ llvm::make_unique<FileCheckStringSubstitution>(this, VarName, InsertIdx));
+ return Substitutions.back().get();
+}
+
+FileCheckSubstitution *FileCheckPatternContext::makeNumericSubstitution(
+ StringRef ExpressionStr,
+ std::unique_ptr<FileCheckExpressionAST> ExpressionAST, size_t InsertIdx) {
+ Substitutions.push_back(llvm::make_unique<FileCheckNumericSubstitution>(
+ this, ExpressionStr, std::move(ExpressionAST), InsertIdx));
+ return Substitutions.back().get();
+}
+
size_t FileCheckPattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) {
// Offset keeps track of the current offset within the input Str
size_t Offset = 0;
@@ -521,11 +882,8 @@ size_t FileCheckPattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) {
return StringRef::npos;
}
-/// Canonicalize whitespaces in the file. Line endings are replaced with
-/// UNIX-style '\n'.
-StringRef
-llvm::FileCheck::CanonicalizeFile(MemoryBuffer &MB,
- SmallVectorImpl<char> &OutputBuffer) {
+StringRef FileCheck::CanonicalizeFile(MemoryBuffer &MB,
+ SmallVectorImpl<char> &OutputBuffer) {
OutputBuffer.reserve(MB.getBufferSize());
for (const char *Ptr = MB.getBufferStart(), *End = MB.getBufferEnd();
@@ -581,7 +939,6 @@ Check::FileCheckType &Check::FileCheckType::setCount(int C) {
return *this;
}
-// Get a description of the type.
std::string Check::FileCheckType::getDescription(StringRef Prefix) const {
switch (Kind) {
case Check::CheckNone:
@@ -674,7 +1031,7 @@ static size_t SkipWord(StringRef Str, size_t Loc) {
return Loc;
}
-/// Search the buffer for the first prefix in the prefix regular expression.
+/// Searches the buffer for the first prefix in the prefix regular expression.
///
/// This searches the buffer using the provided regular expression, however it
/// enforces constraints beyond that:
@@ -683,7 +1040,7 @@ static size_t SkipWord(StringRef Str, size_t Loc) {
/// 2) The found prefix must be followed by a valid check type suffix using \c
/// FindCheckType above.
///
-/// Returns a pair of StringRefs into the Buffer, which combines:
+/// \returns a pair of StringRefs into the Buffer, which combines:
/// - the first match of the regular expression to satisfy these two is
/// returned,
/// otherwise an empty StringRef is returned to indicate failure.
@@ -744,13 +1101,24 @@ FindFirstMatchingPrefix(Regex &PrefixRE, StringRef &Buffer,
return {StringRef(), StringRef()};
}
-/// Read the check file, which specifies the sequence of expected strings.
-///
-/// The strings are added to the CheckStrings vector. Returns true in case of
-/// an error, false otherwise.
-bool llvm::FileCheck::ReadCheckFile(SourceMgr &SM, StringRef Buffer,
- Regex &PrefixRE,
- std::vector<FileCheckString> &CheckStrings) {
+void FileCheckPatternContext::createLineVariable() {
+ assert(!LineVariable && "@LINE pseudo numeric variable already created");
+ StringRef LineName = "@LINE";
+ LineVariable = makeNumericVariable(LineName);
+ GlobalNumericVariableTable[LineName] = LineVariable;
+}
+
+bool FileCheck::ReadCheckFile(SourceMgr &SM, StringRef Buffer, Regex &PrefixRE,
+ std::vector<FileCheckString> &CheckStrings) {
+ Error DefineError =
+ PatternContext.defineCmdlineVariables(Req.GlobalDefines, SM);
+ if (DefineError) {
+ logAllUnhandledErrors(std::move(DefineError), errs());
+ return true;
+ }
+
+ PatternContext.createLineVariable();
+
std::vector<FileCheckPattern> ImplicitNegativeChecks;
for (const auto &PatternString : Req.ImplicitCheckNot) {
// Create a buffer with fake command line content in order to display the
@@ -764,9 +1132,10 @@ bool llvm::FileCheck::ReadCheckFile(SourceMgr &SM, StringRef Buffer,
CmdLine->getBuffer().substr(Prefix.size(), PatternString.size());
SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc());
- ImplicitNegativeChecks.push_back(FileCheckPattern(Check::CheckNot));
- ImplicitNegativeChecks.back().ParsePattern(PatternInBuffer,
- "IMPLICIT-CHECK", SM, 0, Req);
+ ImplicitNegativeChecks.push_back(
+ FileCheckPattern(Check::CheckNot, &PatternContext));
+ ImplicitNegativeChecks.back().parsePattern(PatternInBuffer,
+ "IMPLICIT-CHECK", SM, Req);
}
std::vector<FileCheckPattern> DagNotMatches = ImplicitNegativeChecks;
@@ -827,8 +1196,8 @@ bool llvm::FileCheck::ReadCheckFile(SourceMgr &SM, StringRef Buffer,
SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data());
// Parse the pattern.
- FileCheckPattern P(CheckTy);
- if (P.ParsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, LineNumber, Req))
+ FileCheckPattern P(CheckTy, &PatternContext, LineNumber);
+ if (P.parsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, Req))
return true;
// Verify that CHECK-LABEL lines do not define or use variables
@@ -871,8 +1240,9 @@ bool llvm::FileCheck::ReadCheckFile(SourceMgr &SM, StringRef Buffer,
// Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first
// prefix as a filler for the error message.
if (!DagNotMatches.empty()) {
- CheckStrings.emplace_back(FileCheckPattern(Check::CheckEOF), *Req.CheckPrefixes.begin(),
- SMLoc::getFromPointer(Buffer.data()));
+ CheckStrings.emplace_back(
+ FileCheckPattern(Check::CheckEOF, &PatternContext, LineNumber + 1),
+ *Req.CheckPrefixes.begin(), SMLoc::getFromPointer(Buffer.data()));
std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
}
@@ -897,20 +1267,27 @@ bool llvm::FileCheck::ReadCheckFile(SourceMgr &SM, StringRef Buffer,
static void PrintMatch(bool ExpectedMatch, const SourceMgr &SM,
StringRef Prefix, SMLoc Loc, const FileCheckPattern &Pat,
- int MatchedCount, StringRef Buffer,
- StringMap<StringRef> &VariableTable, size_t MatchPos,
+ int MatchedCount, StringRef Buffer, size_t MatchPos,
size_t MatchLen, const FileCheckRequest &Req,
std::vector<FileCheckDiag> *Diags) {
+ bool PrintDiag = true;
if (ExpectedMatch) {
if (!Req.Verbose)
return;
if (!Req.VerboseVerbose && Pat.getCheckTy() == Check::CheckEOF)
return;
+ // Due to their verbosity, we don't print verbose diagnostics here if we're
+ // gathering them for a different rendering, but we always print other
+ // diagnostics.
+ PrintDiag = !Diags;
}
SMRange MatchRange = ProcessMatchResult(
ExpectedMatch ? FileCheckDiag::MatchFoundAndExpected
: FileCheckDiag::MatchFoundButExcluded,
SM, Loc, Pat.getCheckTy(), Buffer, MatchPos, MatchLen, Diags);
+ if (!PrintDiag)
+ return;
+
std::string Message = formatv("{0}: {1} string found in input",
Pat.getCheckTy().getDescription(Prefix),
(ExpectedMatch ? "expected" : "excluded"))
@@ -922,65 +1299,87 @@ static void PrintMatch(bool ExpectedMatch, const SourceMgr &SM,
Loc, ExpectedMatch ? SourceMgr::DK_Remark : SourceMgr::DK_Error, Message);
SM.PrintMessage(MatchRange.Start, SourceMgr::DK_Note, "found here",
{MatchRange});
- Pat.PrintVariableUses(SM, Buffer, VariableTable, MatchRange);
+ Pat.printSubstitutions(SM, Buffer, MatchRange);
}
static void PrintMatch(bool ExpectedMatch, const SourceMgr &SM,
const FileCheckString &CheckStr, int MatchedCount,
- StringRef Buffer, StringMap<StringRef> &VariableTable,
- size_t MatchPos, size_t MatchLen, FileCheckRequest &Req,
+ StringRef Buffer, size_t MatchPos, size_t MatchLen,
+ FileCheckRequest &Req,
std::vector<FileCheckDiag> *Diags) {
PrintMatch(ExpectedMatch, SM, CheckStr.Prefix, CheckStr.Loc, CheckStr.Pat,
- MatchedCount, Buffer, VariableTable, MatchPos, MatchLen, Req,
- Diags);
+ MatchedCount, Buffer, MatchPos, MatchLen, Req, Diags);
}
static void PrintNoMatch(bool ExpectedMatch, const SourceMgr &SM,
StringRef Prefix, SMLoc Loc,
const FileCheckPattern &Pat, int MatchedCount,
- StringRef Buffer, StringMap<StringRef> &VariableTable,
- bool VerboseVerbose,
- std::vector<FileCheckDiag> *Diags) {
- if (!ExpectedMatch && !VerboseVerbose)
+ StringRef Buffer, bool VerboseVerbose,
+ std::vector<FileCheckDiag> *Diags, Error MatchErrors) {
+ assert(MatchErrors && "Called on successful match");
+ bool PrintDiag = true;
+ if (!ExpectedMatch) {
+ if (!VerboseVerbose) {
+ consumeError(std::move(MatchErrors));
+ return;
+ }
+ // Due to their verbosity, we don't print verbose diagnostics here if we're
+ // gathering them for a different rendering, but we always print other
+ // diagnostics.
+ PrintDiag = !Diags;
+ }
+
+ // If the current position is at the end of a line, advance to the start of
+ // the next line.
+ Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r"));
+ SMRange SearchRange = ProcessMatchResult(
+ ExpectedMatch ? FileCheckDiag::MatchNoneButExpected
+ : FileCheckDiag::MatchNoneAndExcluded,
+ SM, Loc, Pat.getCheckTy(), Buffer, 0, Buffer.size(), Diags);
+ if (!PrintDiag) {
+ consumeError(std::move(MatchErrors));
return;
+ }
+
+ MatchErrors =
+ handleErrors(std::move(MatchErrors),
+ [](const FileCheckErrorDiagnostic &E) { E.log(errs()); });
+
+ // No problem matching the string per se.
+ if (!MatchErrors)
+ return;
+ consumeError(std::move(MatchErrors));
- // Otherwise, we have an error, emit an error message.
+ // Print "not found" diagnostic.
std::string Message = formatv("{0}: {1} string not found in input",
Pat.getCheckTy().getDescription(Prefix),
(ExpectedMatch ? "expected" : "excluded"))
.str();
if (Pat.getCount() > 1)
Message += formatv(" ({0} out of {1})", MatchedCount, Pat.getCount()).str();
-
SM.PrintMessage(
Loc, ExpectedMatch ? SourceMgr::DK_Error : SourceMgr::DK_Remark, Message);
- // Print the "scanning from here" line. If the current position is at the
- // end of a line, advance to the start of the next line.
- Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r"));
- SMRange SearchRange = ProcessMatchResult(
- ExpectedMatch ? FileCheckDiag::MatchNoneButExpected
- : FileCheckDiag::MatchNoneAndExcluded,
- SM, Loc, Pat.getCheckTy(), Buffer, 0, Buffer.size(), Diags);
+ // Print the "scanning from here" line.
SM.PrintMessage(SearchRange.Start, SourceMgr::DK_Note, "scanning from here");
// Allow the pattern to print additional information if desired.
- Pat.PrintVariableUses(SM, Buffer, VariableTable);
+ Pat.printSubstitutions(SM, Buffer);
if (ExpectedMatch)
- Pat.PrintFuzzyMatch(SM, Buffer, VariableTable, Diags);
+ Pat.printFuzzyMatch(SM, Buffer, Diags);
}
static void PrintNoMatch(bool ExpectedMatch, const SourceMgr &SM,
const FileCheckString &CheckStr, int MatchedCount,
- StringRef Buffer, StringMap<StringRef> &VariableTable,
- bool VerboseVerbose,
- std::vector<FileCheckDiag> *Diags) {
+ StringRef Buffer, bool VerboseVerbose,
+ std::vector<FileCheckDiag> *Diags, Error MatchErrors) {
PrintNoMatch(ExpectedMatch, SM, CheckStr.Prefix, CheckStr.Loc, CheckStr.Pat,
- MatchedCount, Buffer, VariableTable, VerboseVerbose, Diags);
+ MatchedCount, Buffer, VerboseVerbose, Diags,
+ std::move(MatchErrors));
}
-/// Count the number of newlines in the specified range.
+/// Counts the number of newlines in the specified range.
static unsigned CountNumNewlinesBetween(StringRef Range,
const char *&FirstNewLine) {
unsigned NumNewLines = 0;
@@ -1003,10 +1402,8 @@ static unsigned CountNumNewlinesBetween(StringRef Range,
}
}
-/// Match check string and its "not strings" and/or "dag strings".
size_t FileCheckString::Check(const SourceMgr &SM, StringRef Buffer,
bool IsLabelScanMode, size_t &MatchLen,
- StringMap<StringRef> &VariableTable,
FileCheckRequest &Req,
std::vector<FileCheckDiag> *Diags) const {
size_t LastPos = 0;
@@ -1018,7 +1415,7 @@ size_t FileCheckString::Check(const SourceMgr &SM, StringRef Buffer,
// over the block again (including the last CHECK-LABEL) in normal mode.
if (!IsLabelScanMode) {
// Match "dag strings" (with mixed "not strings" if any).
- LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable, Req, Diags);
+ LastPos = CheckDag(SM, Buffer, NotStrings, Req, Diags);
if (LastPos == StringRef::npos)
return StringRef::npos;
}
@@ -1033,18 +1430,19 @@ size_t FileCheckString::Check(const SourceMgr &SM, StringRef Buffer,
StringRef MatchBuffer = Buffer.substr(LastMatchEnd);
size_t CurrentMatchLen;
// get a match at current start point
- size_t MatchPos = Pat.Match(MatchBuffer, CurrentMatchLen, VariableTable);
- if (i == 1)
- FirstMatchPos = LastPos + MatchPos;
+ Expected<size_t> MatchResult = Pat.match(MatchBuffer, CurrentMatchLen, SM);
// report
- if (MatchPos == StringRef::npos) {
- PrintNoMatch(true, SM, *this, i, MatchBuffer, VariableTable,
- Req.VerboseVerbose, Diags);
+ if (!MatchResult) {
+ PrintNoMatch(true, SM, *this, i, MatchBuffer, Req.VerboseVerbose, Diags,
+ MatchResult.takeError());
return StringRef::npos;
}
- PrintMatch(true, SM, *this, i, MatchBuffer, VariableTable, MatchPos,
- CurrentMatchLen, Req, Diags);
+ size_t MatchPos = *MatchResult;
+ PrintMatch(true, SM, *this, i, MatchBuffer, MatchPos, CurrentMatchLen, Req,
+ Diags);
+ if (i == 1)
+ FirstMatchPos = LastPos + MatchPos;
// move start point after the match
LastMatchEnd += MatchPos + CurrentMatchLen;
@@ -1079,14 +1477,13 @@ size_t FileCheckString::Check(const SourceMgr &SM, StringRef Buffer,
// If this match had "not strings", verify that they don't exist in the
// skipped region.
- if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable, Req, Diags))
+ if (CheckNot(SM, SkippedRegion, NotStrings, Req, Diags))
return StringRef::npos;
}
return FirstMatchPos;
}
-/// Verify there is a single line in the given buffer.
bool FileCheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const {
if (Pat.getCheckTy() != Check::CheckNext &&
Pat.getCheckTy() != Check::CheckEmpty)
@@ -1097,12 +1494,6 @@ bool FileCheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const {
Twine(Pat.getCheckTy() == Check::CheckEmpty ? "-EMPTY" : "-NEXT");
// Count the number of newlines between the previous match and this one.
- assert(Buffer.data() !=
- SM.getMemoryBuffer(SM.FindBufferContainingLoc(
- SMLoc::getFromPointer(Buffer.data())))
- ->getBufferStart() &&
- "CHECK-NEXT and CHECK-EMPTY can't be the first check in a file");
-
const char *FirstNewLine = nullptr;
unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
@@ -1132,18 +1523,11 @@ bool FileCheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const {
return false;
}
-/// Verify there is no newline in the given buffer.
bool FileCheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const {
if (Pat.getCheckTy() != Check::CheckSame)
return false;
// Count the number of newlines between the previous match and this one.
- assert(Buffer.data() !=
- SM.getMemoryBuffer(SM.FindBufferContainingLoc(
- SMLoc::getFromPointer(Buffer.data())))
- ->getBufferStart() &&
- "CHECK-SAME can't be the first check in a file");
-
const char *FirstNewLine = nullptr;
unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
@@ -1161,26 +1545,25 @@ bool FileCheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const {
return false;
}
-/// Verify there's no "not strings" in the given buffer.
bool FileCheckString::CheckNot(
const SourceMgr &SM, StringRef Buffer,
const std::vector<const FileCheckPattern *> &NotStrings,
- StringMap<StringRef> &VariableTable, const FileCheckRequest &Req,
- std::vector<FileCheckDiag> *Diags) const {
+ const FileCheckRequest &Req, std::vector<FileCheckDiag> *Diags) const {
for (const FileCheckPattern *Pat : NotStrings) {
assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!");
size_t MatchLen = 0;
- size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable);
+ Expected<size_t> MatchResult = Pat->match(Buffer, MatchLen, SM);
- if (Pos == StringRef::npos) {
+ if (!MatchResult) {
PrintNoMatch(false, SM, Prefix, Pat->getLoc(), *Pat, 1, Buffer,
- VariableTable, Req.VerboseVerbose, Diags);
+ Req.VerboseVerbose, Diags, MatchResult.takeError());
continue;
}
+ size_t Pos = *MatchResult;
- PrintMatch(false, SM, Prefix, Pat->getLoc(), *Pat, 1, Buffer, VariableTable,
- Pos, MatchLen, Req, Diags);
+ PrintMatch(false, SM, Prefix, Pat->getLoc(), *Pat, 1, Buffer, Pos, MatchLen,
+ Req, Diags);
return true;
}
@@ -1188,11 +1571,9 @@ bool FileCheckString::CheckNot(
return false;
}
-/// Match "dag strings" and their mixed "not strings".
size_t
FileCheckString::CheckDag(const SourceMgr &SM, StringRef Buffer,
std::vector<const FileCheckPattern *> &NotStrings,
- StringMap<StringRef> &VariableTable,
const FileCheckRequest &Req,
std::vector<FileCheckDiag> *Diags) const {
if (DagNotStrings.empty())
@@ -1233,19 +1614,20 @@ FileCheckString::CheckDag(const SourceMgr &SM, StringRef Buffer,
// CHECK-DAG group.
for (auto MI = MatchRanges.begin(), ME = MatchRanges.end(); true; ++MI) {
StringRef MatchBuffer = Buffer.substr(MatchPos);
- size_t MatchPosBuf = Pat.Match(MatchBuffer, MatchLen, VariableTable);
+ Expected<size_t> MatchResult = Pat.match(MatchBuffer, MatchLen, SM);
// With a group of CHECK-DAGs, a single mismatching means the match on
// that group of CHECK-DAGs fails immediately.
- if (MatchPosBuf == StringRef::npos) {
+ if (!MatchResult) {
PrintNoMatch(true, SM, Prefix, Pat.getLoc(), Pat, 1, MatchBuffer,
- VariableTable, Req.VerboseVerbose, Diags);
+ Req.VerboseVerbose, Diags, MatchResult.takeError());
return StringRef::npos;
}
+ size_t MatchPosBuf = *MatchResult;
// Re-calc it as the offset relative to the start of the original string.
MatchPos += MatchPosBuf;
if (Req.VerboseVerbose)
- PrintMatch(true, SM, Prefix, Pat.getLoc(), Pat, 1, Buffer,
- VariableTable, MatchPos, MatchLen, Req, Diags);
+ PrintMatch(true, SM, Prefix, Pat.getLoc(), Pat, 1, Buffer, MatchPos,
+ MatchLen, Req, Diags);
MatchRange M{MatchPos, MatchPos + MatchLen};
if (Req.AllowDeprecatedDagOverlap) {
// We don't need to track all matches in this mode, so we just maintain
@@ -1276,20 +1658,24 @@ FileCheckString::CheckDag(const SourceMgr &SM, StringRef Buffer,
break;
}
if (Req.VerboseVerbose) {
- SMLoc OldStart = SMLoc::getFromPointer(Buffer.data() + MI->Pos);
- SMLoc OldEnd = SMLoc::getFromPointer(Buffer.data() + MI->End);
- SMRange OldRange(OldStart, OldEnd);
- SM.PrintMessage(OldStart, SourceMgr::DK_Note,
- "match discarded, overlaps earlier DAG match here",
- {OldRange});
- if (Diags)
+ // Due to their verbosity, we don't print verbose diagnostics here if
+ // we're gathering them for a different rendering, but we always print
+ // other diagnostics.
+ if (!Diags) {
+ SMLoc OldStart = SMLoc::getFromPointer(Buffer.data() + MI->Pos);
+ SMLoc OldEnd = SMLoc::getFromPointer(Buffer.data() + MI->End);
+ SMRange OldRange(OldStart, OldEnd);
+ SM.PrintMessage(OldStart, SourceMgr::DK_Note,
+ "match discarded, overlaps earlier DAG match here",
+ {OldRange});
+ } else
Diags->rbegin()->MatchTy = FileCheckDiag::MatchFoundButDiscarded;
}
MatchPos = MI->End;
}
if (!Req.VerboseVerbose)
- PrintMatch(true, SM, Prefix, Pat.getLoc(), Pat, 1, Buffer, VariableTable,
- MatchPos, MatchLen, Req, Diags);
+ PrintMatch(true, SM, Prefix, Pat.getLoc(), Pat, 1, Buffer, MatchPos,
+ MatchLen, Req, Diags);
// Handle the end of a CHECK-DAG group.
if (std::next(PatItr) == PatEnd ||
@@ -1300,7 +1686,7 @@ FileCheckString::CheckDag(const SourceMgr &SM, StringRef Buffer,
// region.
StringRef SkippedRegion =
Buffer.slice(StartPos, MatchRanges.begin()->Pos);
- if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable, Req, Diags))
+ if (CheckNot(SM, SkippedRegion, NotStrings, Req, Diags))
return StringRef::npos;
// Clear "not strings".
NotStrings.clear();
@@ -1322,7 +1708,7 @@ static bool ValidateCheckPrefix(StringRef CheckPrefix) {
return Validator.match(CheckPrefix);
}
-bool llvm::FileCheck::ValidateCheckPrefixes() {
+bool FileCheck::ValidateCheckPrefixes() {
StringSet<> PrefixSet;
for (StringRef Prefix : Req.CheckPrefixes) {
@@ -1340,12 +1726,7 @@ bool llvm::FileCheck::ValidateCheckPrefixes() {
return true;
}
-// Combines the check prefixes into a single regex so that we can efficiently
-// scan for any of the set.
-//
-// The semantics are that the longest-match wins which matches our regex
-// library.
-Regex llvm::FileCheck::buildCheckPrefixRegex() {
+Regex FileCheck::buildCheckPrefixRegex() {
// I don't think there's a way to specify an initial value for cl::list,
// so if nothing was specified, add the default
if (Req.CheckPrefixes.empty())
@@ -1364,32 +1745,152 @@ Regex llvm::FileCheck::buildCheckPrefixRegex() {
return Regex(PrefixRegexStr);
}
-// Remove local variables from \p VariableTable. Global variables
-// (start with '$') are preserved.
-static void ClearLocalVars(StringMap<StringRef> &VariableTable) {
- SmallVector<StringRef, 16> LocalVars;
- for (const auto &Var : VariableTable)
- if (Var.first()[0] != '$')
- LocalVars.push_back(Var.first());
+Error FileCheckPatternContext::defineCmdlineVariables(
+ std::vector<std::string> &CmdlineDefines, SourceMgr &SM) {
+ assert(GlobalVariableTable.empty() && GlobalNumericVariableTable.empty() &&
+ "Overriding defined variable with command-line variable definitions");
+
+ if (CmdlineDefines.empty())
+ return Error::success();
+
+ // Create a string representing the vector of command-line definitions. Each
+ // definition is on its own line and prefixed with a definition number to
+ // clarify which definition a given diagnostic corresponds to.
+ unsigned I = 0;
+ Error Errs = Error::success();
+ std::string CmdlineDefsDiag;
+ StringRef Prefix1 = "Global define #";
+ StringRef Prefix2 = ": ";
+ for (StringRef CmdlineDef : CmdlineDefines)
+ CmdlineDefsDiag +=
+ (Prefix1 + Twine(++I) + Prefix2 + CmdlineDef + "\n").str();
+
+ // Create a buffer with fake command line content in order to display
+ // parsing diagnostic with location information and point to the
+ // global definition with invalid syntax.
+ std::unique_ptr<MemoryBuffer> CmdLineDefsDiagBuffer =
+ MemoryBuffer::getMemBufferCopy(CmdlineDefsDiag, "Global defines");
+ StringRef CmdlineDefsDiagRef = CmdLineDefsDiagBuffer->getBuffer();
+ SM.AddNewSourceBuffer(std::move(CmdLineDefsDiagBuffer), SMLoc());
+
+ SmallVector<StringRef, 4> CmdlineDefsDiagVec;
+ CmdlineDefsDiagRef.split(CmdlineDefsDiagVec, '\n', -1 /*MaxSplit*/,
+ false /*KeepEmpty*/);
+ for (StringRef CmdlineDefDiag : CmdlineDefsDiagVec) {
+ unsigned DefStart = CmdlineDefDiag.find(Prefix2) + Prefix2.size();
+ StringRef CmdlineDef = CmdlineDefDiag.substr(DefStart);
+ size_t EqIdx = CmdlineDef.find('=');
+ if (EqIdx == StringRef::npos) {
+ Errs = joinErrors(
+ std::move(Errs),
+ FileCheckErrorDiagnostic::get(
+ SM, CmdlineDef, "missing equal sign in global definition"));
+ continue;
+ }
- for (const auto &Var : LocalVars)
- VariableTable.erase(Var);
+ // Numeric variable definition.
+ if (CmdlineDef[0] == '#') {
+ StringRef CmdlineName = CmdlineDef.substr(1, EqIdx - 1);
+ Expected<FileCheckNumericVariable *> ParseResult =
+ FileCheckPattern::parseNumericVariableDefinition(CmdlineName, this,
+ None, SM);
+ if (!ParseResult) {
+ Errs = joinErrors(std::move(Errs), ParseResult.takeError());
+ continue;
+ }
+
+ StringRef CmdlineVal = CmdlineDef.substr(EqIdx + 1);
+ uint64_t Val;
+ if (CmdlineVal.getAsInteger(10, Val)) {
+ Errs = joinErrors(std::move(Errs),
+ FileCheckErrorDiagnostic::get(
+ SM, CmdlineVal,
+ "invalid value in numeric variable definition '" +
+ CmdlineVal + "'"));
+ continue;
+ }
+ FileCheckNumericVariable *DefinedNumericVariable = *ParseResult;
+ DefinedNumericVariable->setValue(Val);
+
+ // Record this variable definition.
+ GlobalNumericVariableTable[DefinedNumericVariable->getName()] =
+ DefinedNumericVariable;
+ } else {
+ // String variable definition.
+ std::pair<StringRef, StringRef> CmdlineNameVal = CmdlineDef.split('=');
+ StringRef CmdlineName = CmdlineNameVal.first;
+ StringRef OrigCmdlineName = CmdlineName;
+ Expected<FileCheckPattern::VariableProperties> ParseVarResult =
+ FileCheckPattern::parseVariable(CmdlineName, SM);
+ if (!ParseVarResult) {
+ Errs = joinErrors(std::move(Errs), ParseVarResult.takeError());
+ continue;
+ }
+ // Check that CmdlineName does not denote a pseudo variable is only
+ // composed of the parsed numeric variable. This catches cases like
+ // "FOO+2" in a "FOO+2=10" definition.
+ if (ParseVarResult->IsPseudo || !CmdlineName.empty()) {
+ Errs = joinErrors(std::move(Errs),
+ FileCheckErrorDiagnostic::get(
+ SM, OrigCmdlineName,
+ "invalid name in string variable definition '" +
+ OrigCmdlineName + "'"));
+ continue;
+ }
+ StringRef Name = ParseVarResult->Name;
+
+ // Detect collisions between string and numeric variables when the former
+ // is created later than the latter.
+ if (GlobalNumericVariableTable.find(Name) !=
+ GlobalNumericVariableTable.end()) {
+ Errs = joinErrors(std::move(Errs), FileCheckErrorDiagnostic::get(
+ SM, Name,
+ "numeric variable with name '" +
+ Name + "' already exists"));
+ continue;
+ }
+ GlobalVariableTable.insert(CmdlineNameVal);
+ // Mark the string variable as defined to detect collisions between
+ // string and numeric variables in DefineCmdlineVariables when the latter
+ // is created later than the former. We cannot reuse GlobalVariableTable
+ // for this by populating it with an empty string since we would then
+ // lose the ability to detect the use of an undefined variable in
+ // match().
+ DefinedVariableTable[Name] = true;
+ }
+ }
+
+ return Errs;
}
-/// Check the input to FileCheck provided in the \p Buffer against the \p
-/// CheckStrings read from the check file.
-///
-/// Returns false if the input fails to satisfy the checks.
-bool llvm::FileCheck::CheckInput(SourceMgr &SM, StringRef Buffer,
- ArrayRef<FileCheckString> CheckStrings,
- std::vector<FileCheckDiag> *Diags) {
- bool ChecksFailed = false;
+void FileCheckPatternContext::clearLocalVars() {
+ SmallVector<StringRef, 16> LocalPatternVars, LocalNumericVars;
+ for (const StringMapEntry<StringRef> &Var : GlobalVariableTable)
+ if (Var.first()[0] != '$')
+ LocalPatternVars.push_back(Var.first());
+
+ // Numeric substitution reads the value of a variable directly, not via
+ // GlobalNumericVariableTable. Therefore, we clear local variables by
+ // clearing their value which will lead to a numeric substitution failure. We
+ // also mark the variable for removal from GlobalNumericVariableTable since
+ // this is what defineCmdlineVariables checks to decide that no global
+ // variable has been defined.
+ for (const auto &Var : GlobalNumericVariableTable)
+ if (Var.first()[0] != '$') {
+ Var.getValue()->clearValue();
+ LocalNumericVars.push_back(Var.first());
+ }
- /// VariableTable - This holds all the current filecheck variables.
- StringMap<StringRef> VariableTable;
+ for (const auto &Var : LocalPatternVars)
+ GlobalVariableTable.erase(Var);
+ for (const auto &Var : LocalNumericVars)
+ GlobalNumericVariableTable.erase(Var);
+}
- for (const auto& Def : Req.GlobalDefines)
- VariableTable.insert(StringRef(Def).split('='));
+bool FileCheck::CheckInput(SourceMgr &SM, StringRef Buffer,
+ ArrayRef<FileCheckString> CheckStrings,
+ std::vector<FileCheckDiag> *Diags) {
+ bool ChecksFailed = false;
unsigned i = 0, j = 0, e = CheckStrings.size();
while (true) {
@@ -1405,10 +1906,10 @@ bool llvm::FileCheck::CheckInput(SourceMgr &SM, StringRef Buffer,
// Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG
size_t MatchLabelLen = 0;
- size_t MatchLabelPos = CheckLabelStr.Check(
- SM, Buffer, true, MatchLabelLen, VariableTable, Req, Diags);
+ size_t MatchLabelPos =
+ CheckLabelStr.Check(SM, Buffer, true, MatchLabelLen, Req, Diags);
if (MatchLabelPos == StringRef::npos)
- // Immediately bail of CHECK-LABEL fails, nothing else we can do.
+ // Immediately bail if CHECK-LABEL fails, nothing else we can do.
return false;
CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen);
@@ -1416,8 +1917,11 @@ bool llvm::FileCheck::CheckInput(SourceMgr &SM, StringRef Buffer,
++j;
}
- if (Req.EnableVarScope)
- ClearLocalVars(VariableTable);
+ // Do not clear the first region as it's the one before the first
+ // CHECK-LABEL and it would clear variables defined on the command-line
+ // before they get used.
+ if (i != 0 && Req.EnableVarScope)
+ PatternContext.clearLocalVars();
for (; i != j; ++i) {
const FileCheckString &CheckStr = CheckStrings[i];
@@ -1425,8 +1929,8 @@ bool llvm::FileCheck::CheckInput(SourceMgr &SM, StringRef Buffer,
// Check each string within the scanned region, including a second check
// of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG)
size_t MatchLen = 0;
- size_t MatchPos = CheckStr.Check(SM, CheckRegion, false, MatchLen,
- VariableTable, Req, Diags);
+ size_t MatchPos =
+ CheckStr.Check(SM, CheckRegion, false, MatchLen, Req, Diags);
if (MatchPos == StringRef::npos) {
ChecksFailed = true;
diff --git a/lib/Support/FileOutputBuffer.cpp b/lib/Support/FileOutputBuffer.cpp
index b8223126227d..3d6b569f2993 100644
--- a/lib/Support/FileOutputBuffer.cpp
+++ b/lib/Support/FileOutputBuffer.cpp
@@ -1,9 +1,8 @@
//===- FileOutputBuffer.cpp - File Output Buffer ----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -76,18 +75,26 @@ private:
// output file on commit(). This is used only when we cannot use OnDiskBuffer.
class InMemoryBuffer : public FileOutputBuffer {
public:
- InMemoryBuffer(StringRef Path, MemoryBlock Buf, unsigned Mode)
- : FileOutputBuffer(Path), Buffer(Buf), Mode(Mode) {}
+ InMemoryBuffer(StringRef Path, MemoryBlock Buf, std::size_t BufSize,
+ unsigned Mode)
+ : FileOutputBuffer(Path), Buffer(Buf), BufferSize(BufSize),
+ Mode(Mode) {}
uint8_t *getBufferStart() const override { return (uint8_t *)Buffer.base(); }
uint8_t *getBufferEnd() const override {
- return (uint8_t *)Buffer.base() + Buffer.size();
+ return (uint8_t *)Buffer.base() + BufferSize;
}
- size_t getBufferSize() const override { return Buffer.size(); }
+ size_t getBufferSize() const override { return BufferSize; }
Error commit() override {
+ if (FinalPath == "-") {
+ llvm::outs() << StringRef((const char *)Buffer.base(), BufferSize);
+ llvm::outs().flush();
+ return Error::success();
+ }
+
using namespace sys::fs;
int FD;
std::error_code EC;
@@ -95,12 +102,14 @@ public:
openFileForWrite(FinalPath, FD, CD_CreateAlways, OF_None, Mode))
return errorCodeToError(EC);
raw_fd_ostream OS(FD, /*shouldClose=*/true, /*unbuffered=*/true);
- OS << StringRef((const char *)Buffer.base(), Buffer.size());
+ OS << StringRef((const char *)Buffer.base(), BufferSize);
return Error::success();
}
private:
+ // Buffer may actually contain a larger memory block than BufferSize
OwningMemoryBlock Buffer;
+ size_t BufferSize;
unsigned Mode;
};
} // namespace
@@ -112,43 +121,42 @@ createInMemoryBuffer(StringRef Path, size_t Size, unsigned Mode) {
Size, nullptr, sys::Memory::MF_READ | sys::Memory::MF_WRITE, EC);
if (EC)
return errorCodeToError(EC);
- return llvm::make_unique<InMemoryBuffer>(Path, MB, Mode);
+ return llvm::make_unique<InMemoryBuffer>(Path, MB, Size, Mode);
}
-static Expected<std::unique_ptr<OnDiskBuffer>>
-createOnDiskBuffer(StringRef Path, size_t Size, bool InitExisting,
- unsigned Mode) {
+static Expected<std::unique_ptr<FileOutputBuffer>>
+createOnDiskBuffer(StringRef Path, size_t Size, unsigned Mode) {
Expected<fs::TempFile> FileOrErr =
fs::TempFile::create(Path + ".tmp%%%%%%%", Mode);
if (!FileOrErr)
return FileOrErr.takeError();
fs::TempFile File = std::move(*FileOrErr);
- if (InitExisting) {
- if (auto EC = sys::fs::copy_file(Path, File.FD))
- return errorCodeToError(EC);
- } else {
#ifndef _WIN32
- // On Windows, CreateFileMapping (the mmap function on Windows)
- // automatically extends the underlying file. We don't need to
- // extend the file beforehand. _chsize (ftruncate on Windows) is
- // pretty slow just like it writes specified amount of bytes,
- // so we should avoid calling that function.
- if (auto EC = fs::resize_file(File.FD, Size)) {
- consumeError(File.discard());
- return errorCodeToError(EC);
- }
-#endif
+ // On Windows, CreateFileMapping (the mmap function on Windows)
+ // automatically extends the underlying file. We don't need to
+ // extend the file beforehand. _chsize (ftruncate on Windows) is
+ // pretty slow just like it writes specified amount of bytes,
+ // so we should avoid calling that function.
+ if (auto EC = fs::resize_file(File.FD, Size)) {
+ consumeError(File.discard());
+ return errorCodeToError(EC);
}
+#endif
// Mmap it.
std::error_code EC;
auto MappedFile = llvm::make_unique<fs::mapped_file_region>(
- File.FD, fs::mapped_file_region::readwrite, Size, 0, EC);
+ fs::convertFDToNativeFile(File.FD), fs::mapped_file_region::readwrite,
+ Size, 0, EC);
+
+ // mmap(2) can fail if the underlying filesystem does not support it.
+ // If that happens, we fall back to in-memory buffer as the last resort.
if (EC) {
consumeError(File.discard());
- return errorCodeToError(EC);
+ return createInMemoryBuffer(Path, Size, Mode);
}
+
return llvm::make_unique<OnDiskBuffer>(Path, std::move(File),
std::move(MappedFile));
}
@@ -156,6 +164,10 @@ createOnDiskBuffer(StringRef Path, size_t Size, bool InitExisting,
// Create an instance of FileOutputBuffer.
Expected<std::unique_ptr<FileOutputBuffer>>
FileOutputBuffer::create(StringRef Path, size_t Size, unsigned Flags) {
+ // Handle "-" as stdout just like llvm::raw_ostream does.
+ if (Path == "-")
+ return createInMemoryBuffer("-", Size, /*Mode=*/0);
+
unsigned Mode = fs::all_read | fs::all_write;
if (Flags & F_executable)
Mode |= fs::all_exe;
@@ -163,15 +175,6 @@ FileOutputBuffer::create(StringRef Path, size_t Size, unsigned Flags) {
fs::file_status Stat;
fs::status(Path, Stat);
- if ((Flags & F_modify) && Size == size_t(-1)) {
- if (Stat.type() == fs::file_type::regular_file)
- Size = Stat.getSize();
- else if (Stat.type() == fs::file_type::file_not_found)
- return errorCodeToError(errc::no_such_file_or_directory);
- else
- return errorCodeToError(errc::invalid_argument);
- }
-
// Usually, we want to create OnDiskBuffer to create a temporary file in
// the same directory as the destination file and atomically replaces it
// by rename(2).
@@ -186,7 +189,7 @@ FileOutputBuffer::create(StringRef Path, size_t Size, unsigned Flags) {
case fs::file_type::regular_file:
case fs::file_type::file_not_found:
case fs::file_type::status_error:
- return createOnDiskBuffer(Path, Size, !!(Flags & F_modify), Mode);
+ return createOnDiskBuffer(Path, Size, Mode);
default:
return createInMemoryBuffer(Path, Size, Mode);
}
diff --git a/lib/Support/FileUtilities.cpp b/lib/Support/FileUtilities.cpp
index 39dbefff5b70..62eb7bfda195 100644
--- a/lib/Support/FileUtilities.cpp
+++ b/lib/Support/FileUtilities.cpp
@@ -1,9 +1,8 @@
//===- Support/FileUtilities.cpp - File System Utilities ------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Support/FoldingSet.cpp b/lib/Support/FoldingSet.cpp
index ee69a64ac97b..ce6f196e1060 100644
--- a/lib/Support/FoldingSet.cpp
+++ b/lib/Support/FoldingSet.cpp
@@ -1,9 +1,8 @@
//===-- Support/FoldingSet.cpp - Uniquing Hash Set --------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Support/FormatVariadic.cpp b/lib/Support/FormatVariadic.cpp
index 1f3505d5f74f..f9e89f69b528 100644
--- a/lib/Support/FormatVariadic.cpp
+++ b/lib/Support/FormatVariadic.cpp
@@ -1,9 +1,8 @@
//===- FormatVariadic.cpp - Format string parsing and analysis ----*-C++-*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//===----------------------------------------------------------------------===//
#include "llvm/Support/FormatVariadic.h"
diff --git a/lib/Support/FormattedStream.cpp b/lib/Support/FormattedStream.cpp
index b0cb06c1daa2..4eb747038bb9 100644
--- a/lib/Support/FormattedStream.cpp
+++ b/lib/Support/FormattedStream.cpp
@@ -1,9 +1,8 @@
//===-- llvm/Support/FormattedStream.cpp - Formatted streams ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Support/GlobPattern.cpp b/lib/Support/GlobPattern.cpp
index 4ea110301f16..6011be86d77f 100644
--- a/lib/Support/GlobPattern.cpp
+++ b/lib/Support/GlobPattern.cpp
@@ -1,9 +1,8 @@
//===-- GlobPattern.cpp - Glob pattern matcher implementation -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Support/GraphWriter.cpp b/lib/Support/GraphWriter.cpp
index 9335daffc3e2..c689a81925d4 100644
--- a/lib/Support/GraphWriter.cpp
+++ b/lib/Support/GraphWriter.cpp
@@ -1,9 +1,8 @@
//===- GraphWriter.cpp - Implements GraphWriter support routines ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Support/Hashing.cpp b/lib/Support/Hashing.cpp
index 7de25cec7371..1b20a670434f 100644
--- a/lib/Support/Hashing.cpp
+++ b/lib/Support/Hashing.cpp
@@ -1,9 +1,8 @@
//===-------------- lib/Support/Hashing.cpp -------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp
index d5a688c7fb9b..d491912bdc0c 100644
--- a/lib/Support/Host.cpp
+++ b/lib/Support/Host.cpp
@@ -1,9 +1,8 @@
//===-- Host.cpp - Implement OS Host Concept --------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -193,6 +192,8 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
.Case("0xd07", "cortex-a57")
.Case("0xd08", "cortex-a72")
.Case("0xd09", "cortex-a73")
+ .Case("0xd0a", "cortex-a75")
+ .Case("0xd0b", "cortex-a76")
.Default("generic");
}
@@ -236,6 +237,10 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
.Case("0x211", "kryo")
.Case("0x800", "cortex-a73")
.Case("0x801", "cortex-a73")
+ .Case("0x802", "cortex-a73")
+ .Case("0x803", "cortex-a73")
+ .Case("0x804", "cortex-a73")
+ .Case("0x805", "cortex-a73")
.Case("0xc00", "falkor")
.Case("0xc01", "saphira")
.Default("generic");
@@ -310,6 +315,8 @@ StringRef sys::detail::getHostCPUNameForS390x(StringRef ProcCpuinfoContent) {
Pos += sizeof("machine = ") - 1;
unsigned int Id;
if (!Lines[I].drop_front(Pos).getAsInteger(10, Id)) {
+ if (Id >= 8561 && HaveVectorSupport)
+ return "arch13";
if (Id >= 3906 && HaveVectorSupport)
return "z14";
if (Id >= 2964 && HaveVectorSupport)
@@ -331,7 +338,19 @@ StringRef sys::detail::getHostCPUNameForBPF() {
#if !defined(__linux__) || !defined(__x86_64__)
return "generic";
#else
- uint8_t insns[40] __attribute__ ((aligned (8))) =
+ uint8_t v3_insns[40] __attribute__ ((aligned (8))) =
+ /* BPF_MOV64_IMM(BPF_REG_0, 0) */
+ { 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+ /* BPF_MOV64_IMM(BPF_REG_2, 1) */
+ 0xb7, 0x2, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
+ /* BPF_JMP32_REG(BPF_JLT, BPF_REG_0, BPF_REG_2, 1) */
+ 0xae, 0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0,
+ /* BPF_MOV64_IMM(BPF_REG_0, 1) */
+ 0xb7, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
+ /* BPF_EXIT_INSN() */
+ 0x95, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 };
+
+ uint8_t v2_insns[40] __attribute__ ((aligned (8))) =
/* BPF_MOV64_IMM(BPF_REG_0, 0) */
{ 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
/* BPF_MOV64_IMM(BPF_REG_2, 1) */
@@ -356,10 +375,23 @@ StringRef sys::detail::getHostCPUNameForBPF() {
} attr = {};
attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */
attr.insn_cnt = 5;
- attr.insns = (uint64_t)insns;
+ attr.insns = (uint64_t)v3_insns;
attr.license = (uint64_t)"DUMMY";
- int fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr, sizeof(attr));
+ int fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr,
+ sizeof(attr));
+ if (fd >= 0) {
+ close(fd);
+ return "v3";
+ }
+
+ /* Clear the whole attr in case its content changed by syscall. */
+ memset(&attr, 0, sizeof(attr));
+ attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */
+ attr.insn_cnt = 5;
+ attr.insns = (uint64_t)v2_insns;
+ attr.license = (uint64_t)"DUMMY";
+ fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr, sizeof(attr));
if (fd >= 0) {
close(fd);
return "v2";
@@ -637,10 +669,10 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
break;
// Skylake:
- case 0x4e: // Skylake mobile
- case 0x5e: // Skylake desktop
- case 0x8e: // Kaby Lake mobile
- case 0x9e: // Kaby Lake desktop
+ case 0x4e: // Skylake mobile
+ case 0x5e: // Skylake desktop
+ case 0x8e: // Kaby Lake mobile
+ case 0x9e: // Kaby Lake desktop
*Type = X86::INTEL_COREI7; // "skylake"
*Subtype = X86::INTEL_COREI7_SKYLAKE;
break;
@@ -648,7 +680,12 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
// Skylake Xeon:
case 0x55:
*Type = X86::INTEL_COREI7;
- *Subtype = X86::INTEL_COREI7_SKYLAKE_AVX512; // "skylake-avx512"
+ if (Features3 & (1 << (X86::FEATURE_AVX512BF16 - 64)))
+ *Subtype = X86::INTEL_COREI7_COOPERLAKE; // "cooperlake"
+ else if (Features2 & (1 << (X86::FEATURE_AVX512VNNI - 32)))
+ *Subtype = X86::INTEL_COREI7_CASCADELAKE; // "cascadelake"
+ else
+ *Subtype = X86::INTEL_COREI7_SKYLAKE_AVX512; // "skylake-avx512"
break;
// Cannonlake:
@@ -657,6 +694,20 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
*Subtype = X86::INTEL_COREI7_CANNONLAKE; // "cannonlake"
break;
+ // Icelake:
+ case 0x7d:
+ case 0x7e:
+ *Type = X86::INTEL_COREI7;
+ *Subtype = X86::INTEL_COREI7_ICELAKE_CLIENT; // "icelake-client"
+ break;
+
+ // Icelake Xeon:
+ case 0x6a:
+ case 0x6c:
+ *Type = X86::INTEL_COREI7;
+ *Subtype = X86::INTEL_COREI7_ICELAKE_SERVER; // "icelake-server"
+ break;
+
case 0x1c: // Most 45 nm Intel Atom processors
case 0x26: // 45 nm Atom Lincroft
case 0x27: // 32 nm Atom Medfield
@@ -682,9 +733,14 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
case 0x7a:
*Type = X86::INTEL_GOLDMONT_PLUS;
break;
+ case 0x86:
+ *Type = X86::INTEL_TREMONT;
+ break;
+
case 0x57:
*Type = X86::INTEL_KNL; // knl
break;
+
case 0x85:
*Type = X86::INTEL_KNM; // knm
break;
@@ -702,6 +758,12 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
break;
}
+ if (Features3 & (1 << (X86::FEATURE_AVX512BF16 - 64))) {
+ *Type = X86::INTEL_COREI7;
+ *Subtype = X86::INTEL_COREI7_COOPERLAKE;
+ break;
+ }
+
if (Features2 & (1 << (X86::FEATURE_AVX512VNNI - 32))) {
*Type = X86::INTEL_COREI7;
*Subtype = X86::INTEL_COREI7_CASCADELAKE;
@@ -892,7 +954,14 @@ static void getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
break; // "btver2"
case 23:
*Type = X86::AMDFAM17H;
- *Subtype = X86::AMDFAM17H_ZNVER1;
+ if (Model >= 0x30 && Model <= 0x3f) {
+ *Subtype = X86::AMDFAM17H_ZNVER2;
+ break; // "znver2"; 30h-3fh: Zen2
+ }
+ if (Model <= 0x0f) {
+ *Subtype = X86::AMDFAM17H_ZNVER1;
+ break; // "znver1"; 00h-0Fh: Zen1
+ }
break;
default:
break; // "generic"
@@ -1233,8 +1302,10 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX);
+ Features["cx8"] = (EDX >> 8) & 1;
Features["cmov"] = (EDX >> 15) & 1;
Features["mmx"] = (EDX >> 23) & 1;
+ Features["fxsr"] = (EDX >> 24) & 1;
Features["sse"] = (EDX >> 25) & 1;
Features["sse2"] = (EDX >> 26) & 1;
@@ -1298,6 +1369,7 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
Features["bmi2"] = HasLeaf7 && ((EBX >> 8) & 1);
Features["invpcid"] = HasLeaf7 && ((EBX >> 10) & 1);
Features["rtm"] = HasLeaf7 && ((EBX >> 11) & 1);
+ Features["mpx"] = HasLeaf7 && ((EBX >> 14) & 1);
// AVX512 is only supported if the OS supports the context save for it.
Features["avx512f"] = HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save;
Features["avx512dq"] = HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save;
@@ -1329,6 +1401,7 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
Features["cldemote"] = HasLeaf7 && ((ECX >> 25) & 1);
Features["movdiri"] = HasLeaf7 && ((ECX >> 27) & 1);
Features["movdir64b"] = HasLeaf7 && ((ECX >> 28) & 1);
+ Features["enqcmd"] = HasLeaf7 && ((ECX >> 29) & 1);
// There are two CPUID leafs which information associated with the pconfig
// instruction:
@@ -1341,6 +1414,9 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
// detecting features using the "-march=native" flag.
// For more info, see X86 ISA docs.
Features["pconfig"] = HasLeaf7 && ((EDX >> 18) & 1);
+ bool HasLeaf7Subleaf1 =
+ MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
+ Features["avx512bf16"] = HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save;
bool HasLeafD = MaxLevel >= 0xd &&
!getX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX);
diff --git a/lib/Support/InitLLVM.cpp b/lib/Support/InitLLVM.cpp
index c008d0455c99..0d7d7fcc8cb6 100644
--- a/lib/Support/InitLLVM.cpp
+++ b/lib/Support/InitLLVM.cpp
@@ -1,9 +1,8 @@
//===-- InitLLVM.cpp -----------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -24,6 +23,7 @@ using namespace llvm::sys;
InitLLVM::InitLLVM(int &Argc, const char **&Argv) : StackPrinter(Argc, Argv) {
sys::PrintStackTraceOnErrorSignal(Argv[0]);
+ install_out_of_memory_new_handler();
#ifdef _WIN32
// We use UTF-8 as the internal character encoding. On Windows,
diff --git a/lib/Support/IntEqClasses.cpp b/lib/Support/IntEqClasses.cpp
index cb6e3a19e8d3..4a976dcefc65 100644
--- a/lib/Support/IntEqClasses.cpp
+++ b/lib/Support/IntEqClasses.cpp
@@ -1,9 +1,8 @@
//===-- llvm/ADT/IntEqClasses.cpp - Equivalence Classes of Integers -------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Support/IntervalMap.cpp b/lib/Support/IntervalMap.cpp
index e11a7f2eb843..f15c7c9403c3 100644
--- a/lib/Support/IntervalMap.cpp
+++ b/lib/Support/IntervalMap.cpp
@@ -1,9 +1,8 @@
//===- lib/Support/IntervalMap.cpp - A sorted interval map ----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Support/ItaniumManglingCanonicalizer.cpp b/lib/Support/ItaniumManglingCanonicalizer.cpp
index e55dcd761809..da6514f7170b 100644
--- a/lib/Support/ItaniumManglingCanonicalizer.cpp
+++ b/lib/Support/ItaniumManglingCanonicalizer.cpp
@@ -1,9 +1,8 @@
//===----------------- ItaniumManglingCanonicalizer.cpp -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is dual licensed under the MIT and the University of Illinois Open
-// Source Licenses. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -22,6 +21,7 @@ using namespace llvm;
using llvm::itanium_demangle::ForwardTemplateReference;
using llvm::itanium_demangle::Node;
using llvm::itanium_demangle::NodeKind;
+using llvm::itanium_demangle::StringView;
namespace {
struct FoldingSetNodeIDBuilder {
diff --git a/lib/Support/JSON.cpp b/lib/Support/JSON.cpp
index d468013fb94a..95e5ed654277 100644
--- a/lib/Support/JSON.cpp
+++ b/lib/Support/JSON.cpp
@@ -1,9 +1,8 @@
//=== JSON.cpp - JSON value, parsing and serialization - C++ -----------*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===---------------------------------------------------------------------===//
@@ -182,6 +181,12 @@ bool operator==(const Value &L, const Value &R) {
case Value::Boolean:
return *L.getAsBoolean() == *R.getAsBoolean();
case Value::Number:
+ // Workaround for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=323
+ // The same integer must convert to the same double, per the standard.
+ // However we see 64-vs-80-bit precision comparisons with gcc-7 -O3 -m32.
+ // So we avoid floating point promotion for exact comparisons.
+ if (L.Type == Value::T_Integer || R.Type == Value::T_Integer)
+ return L.getAsInteger() == R.getAsInteger();
return *L.getAsNumber() == *R.getAsNumber();
case Value::String:
return *L.getAsString() == *R.getAsString();
@@ -555,9 +560,6 @@ std::string fixUTF8(llvm::StringRef S) {
return Res;
}
-} // namespace json
-} // namespace llvm
-
static void quote(llvm::raw_ostream &OS, llvm::StringRef S) {
OS << '\"';
for (unsigned char C : S) {
@@ -588,106 +590,129 @@ static void quote(llvm::raw_ostream &OS, llvm::StringRef S) {
OS << '\"';
}
-enum IndenterAction {
- Indent,
- Outdent,
- Newline,
- Space,
-};
-
-// Prints JSON. The indenter can be used to control formatting.
-template <typename Indenter>
-void llvm::json::Value::print(raw_ostream &OS, const Indenter &I) const {
- switch (Type) {
- case T_Null:
+void llvm::json::OStream::value(const Value &V) {
+ switch (V.kind()) {
+ case Value::Null:
+ valueBegin();
OS << "null";
- break;
- case T_Boolean:
- OS << (as<bool>() ? "true" : "false");
- break;
- case T_Double:
- OS << format("%.*g", std::numeric_limits<double>::max_digits10,
- as<double>());
- break;
- case T_Integer:
- OS << as<int64_t>();
- break;
- case T_StringRef:
- quote(OS, as<StringRef>());
- break;
- case T_String:
- quote(OS, as<std::string>());
- break;
- case T_Object: {
- bool Comma = false;
- OS << '{';
- I(Indent);
- for (const auto *P : sortedElements(as<json::Object>())) {
- if (Comma)
- OS << ',';
- Comma = true;
- I(Newline);
- quote(OS, P->first);
- OS << ':';
- I(Space);
- P->second.print(OS, I);
- }
- I(Outdent);
- if (Comma)
- I(Newline);
- OS << '}';
- break;
+ return;
+ case Value::Boolean:
+ valueBegin();
+ OS << (*V.getAsBoolean() ? "true" : "false");
+ return;
+ case Value::Number:
+ valueBegin();
+ if (V.Type == Value::T_Integer)
+ OS << *V.getAsInteger();
+ else
+ OS << format("%.*g", std::numeric_limits<double>::max_digits10,
+ *V.getAsNumber());
+ return;
+ case Value::String:
+ valueBegin();
+ quote(OS, *V.getAsString());
+ return;
+ case Value::Array:
+ return array([&] {
+ for (const Value &E : *V.getAsArray())
+ value(E);
+ });
+ case Value::Object:
+ return object([&] {
+ for (const Object::value_type *E : sortedElements(*V.getAsObject()))
+ attribute(E->first, E->second);
+ });
}
- case T_Array: {
- bool Comma = false;
- OS << '[';
- I(Indent);
- for (const auto &E : as<json::Array>()) {
- if (Comma)
- OS << ',';
- Comma = true;
- I(Newline);
- E.print(OS, I);
- }
- I(Outdent);
- if (Comma)
- I(Newline);
- OS << ']';
- break;
+}
+
+void llvm::json::OStream::valueBegin() {
+ assert(Stack.back().Ctx != Object && "Only attributes allowed here");
+ if (Stack.back().HasValue) {
+ assert(Stack.back().Ctx != Singleton && "Only one value allowed here");
+ OS << ',';
+ }
+ if (Stack.back().Ctx == Array)
+ newline();
+ Stack.back().HasValue = true;
+}
+
+void llvm::json::OStream::newline() {
+ if (IndentSize) {
+ OS.write('\n');
+ OS.indent(Indent);
}
+}
+
+void llvm::json::OStream::arrayBegin() {
+ valueBegin();
+ Stack.emplace_back();
+ Stack.back().Ctx = Array;
+ Indent += IndentSize;
+ OS << '[';
+}
+
+void llvm::json::OStream::arrayEnd() {
+ assert(Stack.back().Ctx == Array);
+ Indent -= IndentSize;
+ if (Stack.back().HasValue)
+ newline();
+ OS << ']';
+ Stack.pop_back();
+ assert(!Stack.empty());
+}
+
+void llvm::json::OStream::objectBegin() {
+ valueBegin();
+ Stack.emplace_back();
+ Stack.back().Ctx = Object;
+ Indent += IndentSize;
+ OS << '{';
+}
+
+void llvm::json::OStream::objectEnd() {
+ assert(Stack.back().Ctx == Object);
+ Indent -= IndentSize;
+ if (Stack.back().HasValue)
+ newline();
+ OS << '}';
+ Stack.pop_back();
+ assert(!Stack.empty());
+}
+
+void llvm::json::OStream::attributeBegin(llvm::StringRef Key) {
+ assert(Stack.back().Ctx == Object);
+ if (Stack.back().HasValue)
+ OS << ',';
+ newline();
+ Stack.back().HasValue = true;
+ Stack.emplace_back();
+ Stack.back().Ctx = Singleton;
+ if (LLVM_LIKELY(isUTF8(Key))) {
+ quote(OS, Key);
+ } else {
+ assert(false && "Invalid UTF-8 in attribute key");
+ quote(OS, fixUTF8(Key));
}
+ OS.write(':');
+ if (IndentSize)
+ OS.write(' ');
+}
+
+void llvm::json::OStream::attributeEnd() {
+ assert(Stack.back().Ctx == Singleton);
+ assert(Stack.back().HasValue && "Attribute must have a value");
+ Stack.pop_back();
+ assert(Stack.back().Ctx == Object);
}
+} // namespace json
+} // namespace llvm
+
void llvm::format_provider<llvm::json::Value>::format(
const llvm::json::Value &E, raw_ostream &OS, StringRef Options) {
- if (Options.empty()) {
- OS << E;
- return;
- }
unsigned IndentAmount = 0;
- if (Options.getAsInteger(/*Radix=*/10, IndentAmount))
+ if (!Options.empty() && Options.getAsInteger(/*Radix=*/10, IndentAmount))
llvm_unreachable("json::Value format options should be an integer");
- unsigned IndentLevel = 0;
- E.print(OS, [&](IndenterAction A) {
- switch (A) {
- case Newline:
- OS << '\n';
- OS.indent(IndentLevel);
- break;
- case Space:
- OS << ' ';
- break;
- case Indent:
- IndentLevel += IndentAmount;
- break;
- case Outdent:
- IndentLevel -= IndentAmount;
- break;
- };
- });
+ json::OStream(OS, IndentAmount).value(E);
}
-llvm::raw_ostream &llvm::json::operator<<(raw_ostream &OS, const Value &E) {
- E.print(OS, [](IndenterAction A) { /*ignore*/ });
- return OS;
-}
diff --git a/lib/Support/JamCRC.cpp b/lib/Support/JamCRC.cpp
index 17c55f565e08..e043a3c33c28 100644
--- a/lib/Support/JamCRC.cpp
+++ b/lib/Support/JamCRC.cpp
@@ -1,9 +1,8 @@
//===-- JamCRC.cpp - Cyclic Redundancy Check --------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Support/KnownBits.cpp b/lib/Support/KnownBits.cpp
index ac790ebed352..a6c591fca312 100644
--- a/lib/Support/KnownBits.cpp
+++ b/lib/Support/KnownBits.cpp
@@ -1,9 +1,8 @@
//===-- KnownBits.cpp - Stores known zeros/ones ---------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -16,18 +15,14 @@
using namespace llvm;
-KnownBits KnownBits::computeForAddSub(bool Add, bool NSW,
- const KnownBits &LHS, KnownBits RHS) {
- // Carry in a 1 for a subtract, rather than 0.
- bool CarryIn = false;
- if (!Add) {
- // Sum = LHS + ~RHS + 1
- std::swap(RHS.Zero, RHS.One);
- CarryIn = true;
- }
+static KnownBits computeForAddCarry(
+ const KnownBits &LHS, const KnownBits &RHS,
+ bool CarryZero, bool CarryOne) {
+ assert(!(CarryZero && CarryOne) &&
+ "Carry can't be zero and one at the same time");
- APInt PossibleSumZero = ~LHS.Zero + ~RHS.Zero + CarryIn;
- APInt PossibleSumOne = LHS.One + RHS.One + CarryIn;
+ APInt PossibleSumZero = ~LHS.Zero + ~RHS.Zero + !CarryZero;
+ APInt PossibleSumOne = LHS.One + RHS.One + CarryOne;
// Compute known bits of the carry.
APInt CarryKnownZero = ~(PossibleSumZero ^ LHS.Zero ^ RHS.Zero);
@@ -46,9 +41,32 @@ KnownBits KnownBits::computeForAddSub(bool Add, bool NSW,
KnownBits KnownOut;
KnownOut.Zero = ~std::move(PossibleSumZero) & Known;
KnownOut.One = std::move(PossibleSumOne) & Known;
+ return KnownOut;
+}
+
+KnownBits KnownBits::computeForAddCarry(
+ const KnownBits &LHS, const KnownBits &RHS, const KnownBits &Carry) {
+ assert(Carry.getBitWidth() == 1 && "Carry must be 1-bit");
+ return ::computeForAddCarry(
+ LHS, RHS, Carry.Zero.getBoolValue(), Carry.One.getBoolValue());
+}
+
+KnownBits KnownBits::computeForAddSub(bool Add, bool NSW,
+ const KnownBits &LHS, KnownBits RHS) {
+ KnownBits KnownOut;
+ if (Add) {
+ // Sum = LHS + RHS + 0
+ KnownOut = ::computeForAddCarry(
+ LHS, RHS, /*CarryZero*/true, /*CarryOne*/false);
+ } else {
+ // Sum = LHS + ~RHS + 1
+ std::swap(RHS.Zero, RHS.One);
+ KnownOut = ::computeForAddCarry(
+ LHS, RHS, /*CarryZero*/false, /*CarryOne*/true);
+ }
// Are we still trying to solve for the sign bit?
- if (!Known.isSignBitSet()) {
+ if (!KnownOut.isNegative() && !KnownOut.isNonNegative()) {
if (NSW) {
// Adding two non-negative numbers, or subtracting a negative number from
// a non-negative one, can't wrap into negative.
diff --git a/lib/Support/LEB128.cpp b/lib/Support/LEB128.cpp
index 449626f2d451..d41b673e9c8a 100644
--- a/lib/Support/LEB128.cpp
+++ b/lib/Support/LEB128.cpp
@@ -1,9 +1,8 @@
//===- LEB128.cpp - LEB128 utility functions implementation -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Support/LineIterator.cpp b/lib/Support/LineIterator.cpp
index 5baa1a37f385..164436a2c48e 100644
--- a/lib/Support/LineIterator.cpp
+++ b/lib/Support/LineIterator.cpp
@@ -1,9 +1,8 @@
//===- LineIterator.cpp - Implementation of line iteration ----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Support/LockFileManager.cpp b/lib/Support/LockFileManager.cpp
index c166230ba3a3..10181192afbd 100644
--- a/lib/Support/LockFileManager.cpp
+++ b/lib/Support/LockFileManager.cpp
@@ -1,9 +1,8 @@
//===--- LockFileManager.cpp - File-level Locking Utility------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Support/LowLevelType.cpp b/lib/Support/LowLevelType.cpp
index cb2187405d6b..fe77cb3db413 100644
--- a/lib/Support/LowLevelType.cpp
+++ b/lib/Support/LowLevelType.cpp
@@ -1,9 +1,8 @@
//===-- llvm/Support/LowLevelType.cpp -------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -18,14 +17,14 @@ using namespace llvm;
LLT::LLT(MVT VT) {
if (VT.isVector()) {
- init(/*isPointer=*/false, VT.getVectorNumElements() > 1,
+ init(/*IsPointer=*/false, VT.getVectorNumElements() > 1,
VT.getVectorNumElements(), VT.getVectorElementType().getSizeInBits(),
/*AddressSpace=*/0);
} else if (VT.isValid()) {
// Aggregates are no different from real scalars as far as GlobalISel is
// concerned.
assert(VT.getSizeInBits() != 0 && "invalid zero-sized type");
- init(/*isPointer=*/false, /*isVector=*/false, /*NumElements=*/0,
+ init(/*IsPointer=*/false, /*IsVector=*/false, /*NumElements=*/0,
VT.getSizeInBits(), /*AddressSpace=*/0);
} else {
IsPointer = false;
diff --git a/lib/Support/ManagedStatic.cpp b/lib/Support/ManagedStatic.cpp
index 74f71a385027..28ceb1a70e42 100644
--- a/lib/Support/ManagedStatic.cpp
+++ b/lib/Support/ManagedStatic.cpp
@@ -1,9 +1,8 @@
//===-- ManagedStatic.cpp - Static Global wrapper -------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Support/MathExtras.cpp b/lib/Support/MathExtras.cpp
index ba0924540ceb..87c7101c424b 100644
--- a/lib/Support/MathExtras.cpp
+++ b/lib/Support/MathExtras.cpp
@@ -1,9 +1,8 @@
//===-- MathExtras.cpp - Implement the MathExtras header --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Support/Memory.cpp b/lib/Support/Memory.cpp
index c245eedd2c16..581484268cd8 100644
--- a/lib/Support/Memory.cpp
+++ b/lib/Support/Memory.cpp
@@ -1,9 +1,8 @@
//===- Memory.cpp - Memory Handling Support ---------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -16,6 +15,10 @@
#include "llvm/Config/llvm-config.h"
#include "llvm/Support/Valgrind.h"
+#ifndef NDEBUG
+#include "llvm/Support/raw_ostream.h"
+#endif // ifndef NDEBUG
+
// Include the platform-specific parts of this class.
#ifdef LLVM_ON_UNIX
#include "Unix/Memory.inc"
@@ -23,3 +26,28 @@
#ifdef _WIN32
#include "Windows/Memory.inc"
#endif
+
+#ifndef NDEBUG
+
+namespace llvm {
+namespace sys {
+
+raw_ostream &operator<<(raw_ostream &OS, const Memory::ProtectionFlags &PF) {
+ assert((PF & ~(Memory::MF_READ | Memory::MF_WRITE | Memory::MF_EXEC)) == 0 &&
+ "Unrecognized flags");
+
+ return OS << (PF & Memory::MF_READ ? 'R' : '-')
+ << (PF & Memory::MF_WRITE ? 'W' : '-')
+ << (PF & Memory::MF_EXEC ? 'X' : '-');
+}
+
+raw_ostream &operator<<(raw_ostream &OS, const MemoryBlock &MB) {
+ return OS << "[ " << MB.base() << " .. "
+ << (void *)((char *)MB.base() + MB.allocatedSize()) << " ] ("
+ << MB.allocatedSize() << " bytes)";
+}
+
+} // end namespace sys
+} // end namespace llvm
+
+#endif // ifndef NDEBUG
diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp
index ef9159bac284..d0e5bb154c1a 100644
--- a/lib/Support/MemoryBuffer.cpp
+++ b/lib/Support/MemoryBuffer.cpp
@@ -1,9 +1,8 @@
//===--- MemoryBuffer.cpp - Memory Buffer implementation ------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -183,7 +182,7 @@ class MemoryBufferMMapFile : public MB {
}
public:
- MemoryBufferMMapFile(bool RequiresNullTerminator, int FD, uint64_t Len,
+ MemoryBufferMMapFile(bool RequiresNullTerminator, sys::fs::file_t FD, uint64_t Len,
uint64_t Offset, std::error_code &EC)
: MFR(FD, MB::Mapmode, getLegalMapSize(Len, Offset),
getLegalMapOffset(Offset), EC) {
@@ -209,16 +208,16 @@ public:
}
static ErrorOr<std::unique_ptr<WritableMemoryBuffer>>
-getMemoryBufferForStream(int FD, const Twine &BufferName) {
+getMemoryBufferForStream(sys::fs::file_t FD, const Twine &BufferName) {
const ssize_t ChunkSize = 4096*4;
SmallString<ChunkSize> Buffer;
- ssize_t ReadBytes;
+ size_t ReadBytes;
// Read into Buffer until we hit EOF.
do {
Buffer.reserve(Buffer.size() + ChunkSize);
- ReadBytes = sys::RetryAfterSignal(-1, ::read, FD, Buffer.end(), ChunkSize);
- if (ReadBytes == -1)
- return std::error_code(errno, std::generic_category());
+ if (auto EC = sys::fs::readNativeFile(
+ FD, makeMutableArrayRef(Buffer.end(), ChunkSize), &ReadBytes))
+ return EC;
Buffer.set_size(Buffer.size() + ReadBytes);
} while (ReadBytes != 0);
@@ -235,7 +234,7 @@ MemoryBuffer::getFile(const Twine &Filename, int64_t FileSize,
template <typename MB>
static ErrorOr<std::unique_ptr<MB>>
-getOpenFileImpl(int FD, const Twine &Filename, uint64_t FileSize,
+getOpenFileImpl(sys::fs::file_t FD, const Twine &Filename, uint64_t FileSize,
uint64_t MapSize, int64_t Offset, bool RequiresNullTerminator,
bool IsVolatile);
@@ -243,15 +242,14 @@ template <typename MB>
static ErrorOr<std::unique_ptr<MB>>
getFileAux(const Twine &Filename, int64_t FileSize, uint64_t MapSize,
uint64_t Offset, bool RequiresNullTerminator, bool IsVolatile) {
- int FD;
- std::error_code EC = sys::fs::openFileForRead(Filename, FD, sys::fs::OF_None);
-
- if (EC)
- return EC;
-
+ Expected<sys::fs::file_t> FDOrErr =
+ sys::fs::openNativeFileForRead(Filename, sys::fs::OF_None);
+ if (!FDOrErr)
+ return errorToErrorCode(FDOrErr.takeError());
+ sys::fs::file_t FD = *FDOrErr;
auto Ret = getOpenFileImpl<MB>(FD, Filename, FileSize, MapSize, Offset,
RequiresNullTerminator, IsVolatile);
- close(FD);
+ sys::fs::closeFile(FD);
return Ret;
}
@@ -305,7 +303,7 @@ WritableMemoryBuffer::getNewMemBuffer(size_t Size, const Twine &BufferName) {
return SB;
}
-static bool shouldUseMmap(int FD,
+static bool shouldUseMmap(sys::fs::file_t FD,
size_t FileSize,
size_t MapSize,
off_t Offset,
@@ -363,12 +361,11 @@ static bool shouldUseMmap(int FD,
static ErrorOr<std::unique_ptr<WriteThroughMemoryBuffer>>
getReadWriteFile(const Twine &Filename, uint64_t FileSize, uint64_t MapSize,
uint64_t Offset) {
- int FD;
- std::error_code EC = sys::fs::openFileForReadWrite(
- Filename, FD, sys::fs::CD_OpenExisting, sys::fs::OF_None);
-
- if (EC)
- return EC;
+ Expected<sys::fs::file_t> FDOrErr = sys::fs::openNativeFileForReadWrite(
+ Filename, sys::fs::CD_OpenExisting, sys::fs::OF_None);
+ if (!FDOrErr)
+ return errorToErrorCode(FDOrErr.takeError());
+ sys::fs::file_t FD = *FDOrErr;
// Default is to map the full file.
if (MapSize == uint64_t(-1)) {
@@ -392,6 +389,7 @@ getReadWriteFile(const Twine &Filename, uint64_t FileSize, uint64_t MapSize,
MapSize = FileSize;
}
+ std::error_code EC;
std::unique_ptr<WriteThroughMemoryBuffer> Result(
new (NamedBufferAlloc(Filename))
MemoryBufferMMapFile<WriteThroughMemoryBuffer>(false, FD, MapSize,
@@ -415,10 +413,10 @@ WriteThroughMemoryBuffer::getFileSlice(const Twine &Filename, uint64_t MapSize,
template <typename MB>
static ErrorOr<std::unique_ptr<MB>>
-getOpenFileImpl(int FD, const Twine &Filename, uint64_t FileSize,
+getOpenFileImpl(sys::fs::file_t FD, const Twine &Filename, uint64_t FileSize,
uint64_t MapSize, int64_t Offset, bool RequiresNullTerminator,
bool IsVolatile) {
- static int PageSize = sys::Process::getPageSize();
+ static int PageSize = sys::Process::getPageSizeEstimate();
// Default is to map the full file.
if (MapSize == uint64_t(-1)) {
@@ -460,45 +458,20 @@ getOpenFileImpl(int FD, const Twine &Filename, uint64_t FileSize,
return make_error_code(errc::not_enough_memory);
}
- char *BufPtr = Buf.get()->getBufferStart();
-
- size_t BytesLeft = MapSize;
-#ifndef HAVE_PREAD
- if (lseek(FD, Offset, SEEK_SET) == -1)
- return std::error_code(errno, std::generic_category());
-#endif
-
- while (BytesLeft) {
-#ifdef HAVE_PREAD
- ssize_t NumRead = sys::RetryAfterSignal(-1, ::pread, FD, BufPtr, BytesLeft,
- MapSize - BytesLeft + Offset);
-#else
- ssize_t NumRead = sys::RetryAfterSignal(-1, ::read, FD, BufPtr, BytesLeft);
-#endif
- if (NumRead == -1) {
- // Error while reading.
- return std::error_code(errno, std::generic_category());
- }
- if (NumRead == 0) {
- memset(BufPtr, 0, BytesLeft); // zero-initialize rest of the buffer.
- break;
- }
- BytesLeft -= NumRead;
- BufPtr += NumRead;
- }
+ sys::fs::readNativeFileSlice(FD, Buf->getBuffer(), Offset);
return std::move(Buf);
}
ErrorOr<std::unique_ptr<MemoryBuffer>>
-MemoryBuffer::getOpenFile(int FD, const Twine &Filename, uint64_t FileSize,
+MemoryBuffer::getOpenFile(sys::fs::file_t FD, const Twine &Filename, uint64_t FileSize,
bool RequiresNullTerminator, bool IsVolatile) {
return getOpenFileImpl<MemoryBuffer>(FD, Filename, FileSize, FileSize, 0,
RequiresNullTerminator, IsVolatile);
}
ErrorOr<std::unique_ptr<MemoryBuffer>>
-MemoryBuffer::getOpenFileSlice(int FD, const Twine &Filename, uint64_t MapSize,
+MemoryBuffer::getOpenFileSlice(sys::fs::file_t FD, const Twine &Filename, uint64_t MapSize,
int64_t Offset, bool IsVolatile) {
assert(MapSize != uint64_t(-1));
return getOpenFileImpl<MemoryBuffer>(FD, Filename, -1, MapSize, Offset, false,
@@ -512,18 +485,19 @@ ErrorOr<std::unique_ptr<MemoryBuffer>> MemoryBuffer::getSTDIN() {
// fallback if it fails.
sys::ChangeStdinToBinary();
- return getMemoryBufferForStream(0, "<stdin>");
+ return getMemoryBufferForStream(sys::fs::getStdinHandle(), "<stdin>");
}
ErrorOr<std::unique_ptr<MemoryBuffer>>
MemoryBuffer::getFileAsStream(const Twine &Filename) {
- int FD;
- std::error_code EC = sys::fs::openFileForRead(Filename, FD, sys::fs::OF_None);
- if (EC)
- return EC;
+ Expected<sys::fs::file_t> FDOrErr =
+ sys::fs::openNativeFileForRead(Filename, sys::fs::OF_None);
+ if (!FDOrErr)
+ return errorToErrorCode(FDOrErr.takeError());
+ sys::fs::file_t FD = *FDOrErr;
ErrorOr<std::unique_ptr<MemoryBuffer>> Ret =
getMemoryBufferForStream(FD, Filename);
- close(FD);
+ sys::fs::closeFile(FD);
return Ret;
}
diff --git a/lib/Support/Mutex.cpp b/lib/Support/Mutex.cpp
index 7138c7a4b984..69b7b8126ab1 100644
--- a/lib/Support/Mutex.cpp
+++ b/lib/Support/Mutex.cpp
@@ -1,9 +1,8 @@
//===- Mutex.cpp - Mutual Exclusion Lock ------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Support/NativeFormatting.cpp b/lib/Support/NativeFormatting.cpp
index 85b4bfb81568..3731e0c56359 100644
--- a/lib/Support/NativeFormatting.cpp
+++ b/lib/Support/NativeFormatting.cpp
@@ -1,9 +1,8 @@
//===- NativeFormatting.cpp - Low level formatting helpers -------*- C++-*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Support/Optional.cpp b/lib/Support/Optional.cpp
new file mode 100644
index 000000000000..2425739c845d
--- /dev/null
+++ b/lib/Support/Optional.cpp
@@ -0,0 +1,14 @@
+//===- Optional.cpp - Optional values ---------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Optional.h"
+#include "llvm/Support/raw_ostream.h"
+
+llvm::raw_ostream &llvm::operator<<(raw_ostream &OS, NoneType) {
+ return OS << "None";
+}
diff --git a/lib/Support/Options.cpp b/lib/Support/Options.cpp
index 71258450efa6..770b7381c20e 100644
--- a/lib/Support/Options.cpp
+++ b/lib/Support/Options.cpp
@@ -1,9 +1,8 @@
//===- llvm/Support/Options.cpp - Debug options support ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Support/Parallel.cpp b/lib/Support/Parallel.cpp
index 1844003b9d3d..621bccbf2a4c 100644
--- a/lib/Support/Parallel.cpp
+++ b/lib/Support/Parallel.cpp
@@ -1,9 +1,8 @@
//===- llvm/Support/Parallel.cpp - Parallel algorithms --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -18,7 +17,9 @@
#include <stack>
#include <thread>
-using namespace llvm;
+namespace llvm {
+namespace parallel {
+namespace detail {
namespace {
@@ -119,11 +120,28 @@ Executor *Executor::getDefaultExecutor() {
#endif
}
-void parallel::detail::TaskGroup::spawn(std::function<void()> F) {
- L.inc();
- Executor::getDefaultExecutor()->add([&, F] {
+static std::atomic<int> TaskGroupInstances;
+
+// Latch::sync() called by the dtor may cause one thread to block. If is a dead
+// lock if all threads in the default executor are blocked. To prevent the dead
+// lock, only allow the first TaskGroup to run tasks parallelly. In the scenario
+// of nested parallel_for_each(), only the outermost one runs parallelly.
+TaskGroup::TaskGroup() : Parallel(TaskGroupInstances++ == 0) {}
+TaskGroup::~TaskGroup() { --TaskGroupInstances; }
+
+void TaskGroup::spawn(std::function<void()> F) {
+ if (Parallel) {
+ L.inc();
+ Executor::getDefaultExecutor()->add([&, F] {
+ F();
+ L.dec();
+ });
+ } else {
F();
- L.dec();
- });
+ }
}
+
+} // namespace detail
+} // namespace parallel
+} // namespace llvm
#endif // LLVM_ENABLE_THREADS
diff --git a/lib/Support/Path.cpp b/lib/Support/Path.cpp
index 5ce2f50ebdaa..c49260125dba 100644
--- a/lib/Support/Path.cpp
+++ b/lib/Support/Path.cpp
@@ -1,9 +1,8 @@
//===-- Path.cpp - Implement OS Path Concept ------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -170,25 +169,6 @@ createUniqueEntity(const Twine &Model, int &ResultFD,
SmallVectorImpl<char> &ResultPath, bool MakeAbsolute,
unsigned Mode, FSEntity Type,
sys::fs::OpenFlags Flags = sys::fs::OF_None) {
- SmallString<128> ModelStorage;
- Model.toVector(ModelStorage);
-
- if (MakeAbsolute) {
- // Make model absolute by prepending a temp directory if it's not already.
- if (!sys::path::is_absolute(Twine(ModelStorage))) {
- SmallString<128> TDir;
- sys::path::system_temp_directory(true, TDir);
- sys::path::append(TDir, Twine(ModelStorage));
- ModelStorage.swap(TDir);
- }
- }
-
- // From here on, DO NOT modify model. It may be needed if the randomly chosen
- // path already exists.
- ResultPath = ModelStorage;
- // Null terminate.
- ResultPath.push_back(0);
- ResultPath.pop_back();
// Limit the number of attempts we make, so that we don't infinite loop. E.g.
// "permission denied" could be for a specific file (so we retry with a
@@ -196,13 +176,7 @@ createUniqueEntity(const Twine &Model, int &ResultFD,
// Checking which is racy, so we try a number of times, then give up.
std::error_code EC;
for (int Retries = 128; Retries > 0; --Retries) {
- // Replace '%' with random chars.
- for (unsigned i = 0, e = ModelStorage.size(); i != e; ++i) {
- if (ModelStorage[i] == '%')
- ResultPath[i] =
- "0123456789abcdef"[sys::Process::GetRandomNumber() & 15];
- }
-
+ sys::fs::createUniquePath(Model, ResultPath, MakeAbsolute);
// Try to open + create the file.
switch (Type) {
case FS_File: {
@@ -323,7 +297,8 @@ reverse_iterator rbegin(StringRef Path, Style style) {
I.Path = Path;
I.Position = Path.size();
I.S = style;
- return ++I;
+ ++I;
+ return I;
}
reverse_iterator rend(StringRef Path) {
@@ -763,6 +738,32 @@ std::error_code getUniqueID(const Twine Path, UniqueID &Result) {
return std::error_code();
}
+void createUniquePath(const Twine &Model, SmallVectorImpl<char> &ResultPath,
+ bool MakeAbsolute) {
+ SmallString<128> ModelStorage;
+ Model.toVector(ModelStorage);
+
+ if (MakeAbsolute) {
+ // Make model absolute by prepending a temp directory if it's not already.
+ if (!sys::path::is_absolute(Twine(ModelStorage))) {
+ SmallString<128> TDir;
+ sys::path::system_temp_directory(true, TDir);
+ sys::path::append(TDir, Twine(ModelStorage));
+ ModelStorage.swap(TDir);
+ }
+ }
+
+ ResultPath = ModelStorage;
+ ResultPath.push_back(0);
+ ResultPath.pop_back();
+
+ // Replace '%' with random chars.
+ for (unsigned i = 0, e = ModelStorage.size(); i != e; ++i) {
+ if (ModelStorage[i] == '%')
+ ResultPath[i] = "0123456789abcdef"[sys::Process::GetRandomNumber() & 15];
+ }
+}
+
std::error_code createUniqueFile(const Twine &Model, int &ResultFd,
SmallVectorImpl<char> &ResultPath,
unsigned Mode) {
@@ -959,6 +960,7 @@ static std::error_code copy_file_internal(int ReadFD, int WriteFD) {
return std::error_code();
}
+#ifndef __APPLE__
std::error_code copy_file(const Twine &From, const Twine &To) {
int ReadFD, WriteFD;
if (std::error_code EC = openFileForRead(From, ReadFD, OF_None))
@@ -976,6 +978,7 @@ std::error_code copy_file(const Twine &From, const Twine &To) {
return EC;
}
+#endif
std::error_code copy_file(const Twine &From, int ToFD) {
int ReadFD;
@@ -1122,6 +1125,7 @@ TempFile &TempFile::operator=(TempFile &&Other) {
TmpName = std::move(Other.TmpName);
FD = Other.FD;
Other.Done = true;
+ Other.FD = -1;
return *this;
}
@@ -1129,26 +1133,27 @@ TempFile::~TempFile() { assert(Done); }
Error TempFile::discard() {
Done = true;
- std::error_code RemoveEC;
-// On windows closing will remove the file.
-#ifndef _WIN32
- // Always try to close and remove.
- if (!TmpName.empty()) {
- RemoveEC = fs::remove(TmpName);
- sys::DontRemoveFileOnSignal(TmpName);
- }
-#endif
-
- if (!RemoveEC)
- TmpName = "";
-
if (FD != -1 && close(FD) == -1) {
std::error_code EC = std::error_code(errno, std::generic_category());
return errorCodeToError(EC);
}
FD = -1;
+#ifdef _WIN32
+ // On windows closing will remove the file.
+ TmpName = "";
+ return Error::success();
+#else
+ // Always try to close and remove.
+ std::error_code RemoveEC;
+ if (!TmpName.empty()) {
+ RemoveEC = fs::remove(TmpName);
+ sys::DontRemoveFileOnSignal(TmpName);
+ if (!RemoveEC)
+ TmpName = "";
+ }
return errorCodeToError(RemoveEC);
+#endif
}
Error TempFile::keep(const Twine &Name) {
diff --git a/lib/Support/PluginLoader.cpp b/lib/Support/PluginLoader.cpp
index 358137f08f5f..6fe195ffda7a 100644
--- a/lib/Support/PluginLoader.cpp
+++ b/lib/Support/PluginLoader.cpp
@@ -1,9 +1,8 @@
//===-- PluginLoader.cpp - Implement -load command line option ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Support/PrettyStackTrace.cpp b/lib/Support/PrettyStackTrace.cpp
index 206de91ae239..aec00baec0e3 100644
--- a/lib/Support/PrettyStackTrace.cpp
+++ b/lib/Support/PrettyStackTrace.cpp
@@ -1,9 +1,8 @@
//===- PrettyStackTrace.cpp - Pretty Crash Handling -----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -15,12 +14,14 @@
#include "llvm/Support/PrettyStackTrace.h"
#include "llvm-c/ErrorHandling.h"
#include "llvm/ADT/SmallString.h"
-#include "llvm/Config/config.h" // Get autoconf configuration settings
+#include "llvm/Config/config.h"
#include "llvm/Support/Compiler.h"
+#include "llvm/Support/SaveAndRestore.h"
#include "llvm/Support/Signals.h"
#include "llvm/Support/Watchdog.h"
#include "llvm/Support/raw_ostream.h"
+#include <atomic>
#include <cstdarg>
#include <cstdio>
#include <tuple>
@@ -34,7 +35,7 @@ using namespace llvm;
// If backtrace support is not enabled, compile out support for pretty stack
// traces. This has the secondary effect of not requiring thread local storage
// when backtrace support is disabled.
-#if defined(HAVE_BACKTRACE) && ENABLE_BACKTRACES
+#if ENABLE_BACKTRACES
// We need a thread local pointer to manage the stack of our stack trace
// objects, but we *really* cannot tolerate destructors running and do not want
@@ -42,6 +43,22 @@ using namespace llvm;
// thread-local variable.
static LLVM_THREAD_LOCAL PrettyStackTraceEntry *PrettyStackTraceHead = nullptr;
+// The use of 'volatile' here is to ensure that any particular thread always
+// reloads the value of the counter. The 'std::atomic' allows us to specify that
+// this variable is accessed in an unsychronized way (it's not actually
+// synchronizing). This does technically mean that the value may not appear to
+// be the same across threads running simultaneously on different CPUs, but in
+// practice the worst that will happen is that we won't print a stack trace when
+// we could have.
+//
+// This is initialized to 1 because 0 is used as a sentinel for "not enabled on
+// the current thread". If the user happens to overflow an 'unsigned' with
+// SIGINFO requests, it's possible that some threads will stop responding to it,
+// but the program won't crash.
+static volatile std::atomic<unsigned> GlobalSigInfoGenerationCounter =
+ ATOMIC_VAR_INIT(1);
+static LLVM_THREAD_LOCAL unsigned ThreadLocalSigInfoGenerationCounter = 0;
+
namespace llvm {
PrettyStackTraceEntry *ReverseStackTrace(PrettyStackTraceEntry *Head) {
PrettyStackTraceEntry *Prev = nullptr;
@@ -57,8 +74,9 @@ static void PrintStack(raw_ostream &OS) {
// to fail if we crashed due to stack overflow), we do an up-front pass to
// reverse the stack, then print it, then reverse it again.
unsigned ID = 0;
- PrettyStackTraceEntry *ReversedStack =
- llvm::ReverseStackTrace(PrettyStackTraceHead);
+ SaveAndRestore<PrettyStackTraceEntry *> SavedStack{PrettyStackTraceHead,
+ nullptr};
+ PrettyStackTraceEntry *ReversedStack = ReverseStackTrace(SavedStack.get());
for (const PrettyStackTraceEntry *Entry = ReversedStack; Entry;
Entry = Entry->getNextEntry()) {
OS << ID++ << ".\t";
@@ -68,7 +86,10 @@ static void PrintStack(raw_ostream &OS) {
llvm::ReverseStackTrace(ReversedStack);
}
-/// PrintCurStackTrace - Print the current stack trace to the specified stream.
+/// Print the current stack trace to the specified stream.
+///
+/// Marked NOINLINE so it can be called from debuggers.
+LLVM_ATTRIBUTE_NOINLINE
static void PrintCurStackTrace(raw_ostream &OS) {
// Don't print an empty trace.
if (!PrettyStackTraceHead) return;
@@ -128,11 +149,24 @@ static void CrashHandler(void *) {
#endif
}
-// defined(HAVE_BACKTRACE) && ENABLE_BACKTRACES
-#endif
+static void printForSigInfoIfNeeded() {
+ unsigned CurrentSigInfoGeneration =
+ GlobalSigInfoGenerationCounter.load(std::memory_order_relaxed);
+ if (ThreadLocalSigInfoGenerationCounter == 0 ||
+ ThreadLocalSigInfoGenerationCounter == CurrentSigInfoGeneration) {
+ return;
+ }
+
+ PrintCurStackTrace(errs());
+ ThreadLocalSigInfoGenerationCounter = CurrentSigInfoGeneration;
+}
+
+#endif // ENABLE_BACKTRACES
PrettyStackTraceEntry::PrettyStackTraceEntry() {
-#if defined(HAVE_BACKTRACE) && ENABLE_BACKTRACES
+#if ENABLE_BACKTRACES
+ // Handle SIGINFO first, because we haven't finished constructing yet.
+ printForSigInfoIfNeeded();
// Link ourselves.
NextEntry = PrettyStackTraceHead;
PrettyStackTraceHead = this;
@@ -140,10 +174,12 @@ PrettyStackTraceEntry::PrettyStackTraceEntry() {
}
PrettyStackTraceEntry::~PrettyStackTraceEntry() {
-#if defined(HAVE_BACKTRACE) && ENABLE_BACKTRACES
+#if ENABLE_BACKTRACES
assert(PrettyStackTraceHead == this &&
"Pretty stack trace entry destruction is out of order");
PrettyStackTraceHead = NextEntry;
+ // Handle SIGINFO first, because we already started destructing.
+ printForSigInfoIfNeeded();
#endif
}
@@ -175,7 +211,7 @@ void PrettyStackTraceProgram::print(raw_ostream &OS) const {
OS << '\n';
}
-#if defined(HAVE_BACKTRACE) && ENABLE_BACKTRACES
+#if ENABLE_BACKTRACES
static bool RegisterCrashPrinter() {
sys::AddSignalHandler(CrashHandler, nullptr);
return false;
@@ -183,15 +219,37 @@ static bool RegisterCrashPrinter() {
#endif
void llvm::EnablePrettyStackTrace() {
-#if defined(HAVE_BACKTRACE) && ENABLE_BACKTRACES
+#if ENABLE_BACKTRACES
// The first time this is called, we register the crash printer.
static bool HandlerRegistered = RegisterCrashPrinter();
(void)HandlerRegistered;
#endif
}
+void llvm::EnablePrettyStackTraceOnSigInfoForThisThread(bool ShouldEnable) {
+#if ENABLE_BACKTRACES
+ if (!ShouldEnable) {
+ ThreadLocalSigInfoGenerationCounter = 0;
+ return;
+ }
+
+ // The first time this is called, we register the SIGINFO handler.
+ static bool HandlerRegistered = []{
+ sys::SetInfoSignalFunction([]{
+ GlobalSigInfoGenerationCounter.fetch_add(1, std::memory_order_relaxed);
+ });
+ return false;
+ }();
+ (void)HandlerRegistered;
+
+ // Next, enable it for the current thread.
+ ThreadLocalSigInfoGenerationCounter =
+ GlobalSigInfoGenerationCounter.load(std::memory_order_relaxed);
+#endif
+}
+
const void *llvm::SavePrettyStackState() {
-#if defined(HAVE_BACKTRACE) && ENABLE_BACKTRACES
+#if ENABLE_BACKTRACES
return PrettyStackTraceHead;
#else
return nullptr;
@@ -199,7 +257,7 @@ const void *llvm::SavePrettyStackState() {
}
void llvm::RestorePrettyStackState(const void *Top) {
-#if defined(HAVE_BACKTRACE) && ENABLE_BACKTRACES
+#if ENABLE_BACKTRACES
PrettyStackTraceHead =
static_cast<PrettyStackTraceEntry *>(const_cast<void *>(Top));
#endif
diff --git a/lib/Support/Process.cpp b/lib/Support/Process.cpp
index f32355aefbb7..5b6471008159 100644
--- a/lib/Support/Process.cpp
+++ b/lib/Support/Process.cpp
@@ -1,9 +1,8 @@
//===-- Process.cpp - Implement OS Process Concept --------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Support/Program.cpp b/lib/Support/Program.cpp
index 63cdcdaabee9..0a9363c59fc6 100644
--- a/lib/Support/Program.cpp
+++ b/lib/Support/Program.cpp
@@ -1,9 +1,8 @@
//===-- Program.cpp - Implement OS Program Concept --------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Support/RWMutex.cpp b/lib/Support/RWMutex.cpp
index 8b6d74e49f31..7ce856b716c6 100644
--- a/lib/Support/RWMutex.cpp
+++ b/lib/Support/RWMutex.cpp
@@ -1,9 +1,8 @@
//===- RWMutex.cpp - Reader/Writer Mutual Exclusion Lock --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Support/RandomNumberGenerator.cpp b/lib/Support/RandomNumberGenerator.cpp
index df0d87fab021..09fad1979985 100644
--- a/lib/Support/RandomNumberGenerator.cpp
+++ b/lib/Support/RandomNumberGenerator.cpp
@@ -1,9 +1,8 @@
//===-- RandomNumberGenerator.cpp - Implement RNG class -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -27,13 +26,9 @@ using namespace llvm;
#define DEBUG_TYPE "rng"
-// Tracking BUG: 19665
-// http://llvm.org/bugs/show_bug.cgi?id=19665
-//
-// Do not change to cl::opt<uint64_t> since this silently breaks argument parsing.
-static cl::opt<unsigned long long>
- Seed("rng-seed", cl::value_desc("seed"), cl::Hidden,
- cl::desc("Seed for the random number generator"), cl::init(0));
+static cl::opt<uint64_t> Seed("rng-seed", cl::value_desc("seed"), cl::Hidden,
+ cl::desc("Seed for the random number generator"),
+ cl::init(0));
RandomNumberGenerator::RandomNumberGenerator(StringRef Salt) {
LLVM_DEBUG(if (Seed == 0) dbgs()
diff --git a/lib/Support/Regex.cpp b/lib/Support/Regex.cpp
index 48caab131526..4c1b07038024 100644
--- a/lib/Support/Regex.cpp
+++ b/lib/Support/Regex.cpp
@@ -1,9 +1,8 @@
//===-- Regex.cpp - Regular Expression matcher implementation -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Support/SHA1.cpp b/lib/Support/SHA1.cpp
index 3007a78d5e22..47a5f07fbe7b 100644
--- a/lib/Support/SHA1.cpp
+++ b/lib/Support/SHA1.cpp
@@ -1,9 +1,8 @@
//====- SHA1.cpp - Private copy of the SHA1 implementation ---*- C++ -* ======//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Support/ScaledNumber.cpp b/lib/Support/ScaledNumber.cpp
index 807c9fa521de..54d4cc33410b 100644
--- a/lib/Support/ScaledNumber.cpp
+++ b/lib/Support/ScaledNumber.cpp
@@ -1,9 +1,8 @@
//==- lib/Support/ScaledNumber.cpp - Support for scaled numbers -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Support/Signals.cpp b/lib/Support/Signals.cpp
index 333f492d4589..173a07f009d2 100644
--- a/lib/Support/Signals.cpp
+++ b/lib/Support/Signals.cpp
@@ -1,9 +1,8 @@
//===- Signals.cpp - Signal Handling support --------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -132,8 +131,8 @@ static bool printSymbolizedStackTrace(StringRef Argv0, void **StackTrace,
// If we don't know argv0 or the address of main() at this point, try
// to guess it anyway (it's possible on some platforms).
std::string MainExecutableName =
- Argv0.empty() ? sys::fs::getMainExecutable(nullptr, nullptr)
- : (std::string)Argv0;
+ sys::fs::exists(Argv0) ? (std::string)Argv0
+ : sys::fs::getMainExecutable(nullptr, nullptr);
BumpPtrAllocator Allocator;
StringSaver StrPool(Allocator);
std::vector<const char *> Modules(Depth, nullptr);
diff --git a/lib/Support/Signposts.cpp b/lib/Support/Signposts.cpp
new file mode 100644
index 000000000000..d456f41d2fa6
--- /dev/null
+++ b/lib/Support/Signposts.cpp
@@ -0,0 +1,119 @@
+//===-- Signposts.cpp - Interval debug annotations ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Signposts.h"
+#include "llvm/Support/Timer.h"
+
+#include "llvm/Config/config.h"
+#if LLVM_SUPPORT_XCODE_SIGNPOSTS
+#include "llvm/ADT/DenseMap.h"
+#include <os/signpost.h>
+#endif // if LLVM_SUPPORT_XCODE_SIGNPOSTS
+
+using namespace llvm;
+
+#if LLVM_SUPPORT_XCODE_SIGNPOSTS
+namespace {
+os_log_t *LogCreator() {
+ os_log_t *X = new os_log_t;
+ *X = os_log_create("org.llvm.signposts", OS_LOG_CATEGORY_POINTS_OF_INTEREST);
+ return X;
+}
+void LogDeleter(os_log_t *X) {
+ os_release(*X);
+ delete X;
+}
+} // end anonymous namespace
+
+namespace llvm {
+class SignpostEmitterImpl {
+ using LogPtrTy =
+ std::unique_ptr<os_log_t, std::function<void(os_log_t *)>>;
+ using LogTy = LogPtrTy::element_type;
+
+ LogPtrTy SignpostLog;
+ DenseMap<const Timer *, os_signpost_id_t> Signposts;
+
+ LogTy &getLogger() const { return *SignpostLog; }
+ os_signpost_id_t getSignpostForTimer(const Timer *T) {
+ const auto &I = Signposts.find(T);
+ if (I != Signposts.end())
+ return I->second;
+
+ const auto &Inserted = Signposts.insert(
+ std::make_pair(T, os_signpost_id_make_with_pointer(getLogger(), T)));
+ return Inserted.first->second;
+ }
+
+public:
+ SignpostEmitterImpl() : SignpostLog(LogCreator(), LogDeleter), Signposts() {}
+
+ bool isEnabled() const { return os_signpost_enabled(*SignpostLog); }
+
+ void startTimerInterval(Timer *T) {
+ if (isEnabled()) {
+ // Both strings used here are required to be constant literal strings
+ os_signpost_interval_begin(getLogger(), getSignpostForTimer(T),
+ "Pass Timers", "Begin %s",
+ T->getName().c_str());
+ }
+ }
+
+ void endTimerInterval(Timer *T) {
+ if (isEnabled()) {
+ // Both strings used here are required to be constant literal strings
+ os_signpost_interval_end(getLogger(), getSignpostForTimer(T),
+ "Pass Timers", "End %s", T->getName().c_str());
+ }
+ }
+};
+} // end namespace llvm
+#endif // if LLVM_SUPPORT_XCODE_SIGNPOSTS
+
+#if LLVM_SUPPORT_XCODE_SIGNPOSTS
+#define HAVE_ANY_SIGNPOST_IMPL 1
+#endif
+
+SignpostEmitter::SignpostEmitter() {
+#if HAVE_ANY_SIGNPOST_IMPL
+ Impl = new SignpostEmitterImpl();
+#else // if HAVE_ANY_SIGNPOST_IMPL
+ Impl = nullptr;
+#endif // if !HAVE_ANY_SIGNPOST_IMPL
+}
+
+SignpostEmitter::~SignpostEmitter() {
+#if HAVE_ANY_SIGNPOST_IMPL
+ delete Impl;
+#endif // if HAVE_ANY_SIGNPOST_IMPL
+}
+
+bool SignpostEmitter::isEnabled() const {
+#if HAVE_ANY_SIGNPOST_IMPL
+ return Impl->isEnabled();
+#else
+ return false;
+#endif // if !HAVE_ANY_SIGNPOST_IMPL
+}
+
+void SignpostEmitter::startTimerInterval(Timer *T) {
+#if HAVE_ANY_SIGNPOST_IMPL
+ if (Impl == nullptr)
+ return;
+ return Impl->startTimerInterval(T);
+#endif // if !HAVE_ANY_SIGNPOST_IMPL
+}
+
+void SignpostEmitter::endTimerInterval(Timer *T) {
+#if HAVE_ANY_SIGNPOST_IMPL
+ if (Impl == nullptr)
+ return;
+ Impl->endTimerInterval(T);
+#endif // if !HAVE_ANY_SIGNPOST_IMPL
+}
diff --git a/lib/Support/SmallPtrSet.cpp b/lib/Support/SmallPtrSet.cpp
index fed4a17d6635..f60464c8e756 100644
--- a/lib/Support/SmallPtrSet.cpp
+++ b/lib/Support/SmallPtrSet.cpp
@@ -1,9 +1,8 @@
//===- llvm/ADT/SmallPtrSet.cpp - 'Normally small' pointer set ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Support/SmallVector.cpp b/lib/Support/SmallVector.cpp
index 1070c6672edc..36f0a81f6b00 100644
--- a/lib/Support/SmallVector.cpp
+++ b/lib/Support/SmallVector.cpp
@@ -1,9 +1,8 @@
//===- llvm/ADT/SmallVector.cpp - 'Normally small' vectors ----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Support/SourceMgr.cpp b/lib/Support/SourceMgr.cpp
index a55ad881d012..2a241f18c362 100644
--- a/lib/Support/SourceMgr.cpp
+++ b/lib/Support/SourceMgr.cpp
@@ -1,9 +1,8 @@
//===- SourceMgr.cpp - Manager for Simple Source Buffers & Diagnostics ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -96,14 +95,9 @@ unsigned SourceMgr::SrcBuffer::getLineNumber(const char *Ptr) const {
assert(PtrDiff >= 0 && static_cast<size_t>(PtrDiff) <= std::numeric_limits<T>::max());
T PtrOffset = static_cast<T>(PtrDiff);
- // std::lower_bound returns the first EOL offset that's not-less-than
- // PtrOffset, meaning the EOL that _ends the line_ that PtrOffset is on
- // (including if PtrOffset refers to the EOL itself). If there's no such
- // EOL, returns end().
- auto EOL = std::lower_bound(Offsets->begin(), Offsets->end(), PtrOffset);
-
- // Lines count from 1, so add 1 to the distance from the 0th line.
- return (1 + (EOL - Offsets->begin()));
+ // llvm::lower_bound gives the number of EOL before PtrOffset. Add 1 to get
+ // the line number.
+ return llvm::lower_bound(*Offsets, PtrOffset) - Offsets->begin() + 1;
}
SourceMgr::SrcBuffer::SrcBuffer(SourceMgr::SrcBuffer &&Other)
diff --git a/lib/Support/SpecialCaseList.cpp b/lib/Support/SpecialCaseList.cpp
index bf807e66e02c..96e09f9552bb 100644
--- a/lib/Support/SpecialCaseList.cpp
+++ b/lib/Support/SpecialCaseList.cpp
@@ -1,9 +1,8 @@
//===-- SpecialCaseList.cpp - special case list for sanitizers ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Support/Statistic.cpp b/lib/Support/Statistic.cpp
index d57300a75d1d..e4f0535d21aa 100644
--- a/lib/Support/Statistic.cpp
+++ b/lib/Support/Statistic.cpp
@@ -1,9 +1,8 @@
//===-- Statistic.cpp - Easy way to expose stats information --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -136,8 +135,7 @@ bool llvm::AreStatisticsEnabled() {
}
void StatisticInfo::sort() {
- std::stable_sort(Stats.begin(), Stats.end(),
- [](const Statistic *LHS, const Statistic *RHS) {
+ llvm::stable_sort(Stats, [](const Statistic *LHS, const Statistic *RHS) {
if (int Cmp = std::strcmp(LHS->getDebugType(), RHS->getDebugType()))
return Cmp < 0;
diff --git a/lib/Support/StringExtras.cpp b/lib/Support/StringExtras.cpp
index 386d74a47983..bf28b2be5657 100644
--- a/lib/Support/StringExtras.cpp
+++ b/lib/Support/StringExtras.cpp
@@ -1,9 +1,8 @@
//===-- StringExtras.cpp - Implement the StringExtras header --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Support/StringMap.cpp b/lib/Support/StringMap.cpp
index c1f707ce50a5..6b5ea020dd46 100644
--- a/lib/Support/StringMap.cpp
+++ b/lib/Support/StringMap.cpp
@@ -1,9 +1,8 @@
//===--- StringMap.cpp - String Hash table map implementation -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Support/StringPool.cpp b/lib/Support/StringPool.cpp
index c591857c415d..82351017b8cc 100644
--- a/lib/Support/StringPool.cpp
+++ b/lib/Support/StringPool.cpp
@@ -1,9 +1,8 @@
//===-- StringPool.cpp - Interned string pool -----------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Support/StringRef.cpp b/lib/Support/StringRef.cpp
index f0349260e22f..4bafc4ec7181 100644
--- a/lib/Support/StringRef.cpp
+++ b/lib/Support/StringRef.cpp
@@ -1,9 +1,8 @@
//===-- StringRef.cpp - Lightweight String References ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Support/StringSaver.cpp b/lib/Support/StringSaver.cpp
index bf0ac8de9821..f7ccfb97ea79 100644
--- a/lib/Support/StringSaver.cpp
+++ b/lib/Support/StringSaver.cpp
@@ -1,9 +1,8 @@
//===-- StringSaver.cpp ---------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Support/SymbolRemappingReader.cpp b/lib/Support/SymbolRemappingReader.cpp
index 264c890ce8f1..1caf0947216e 100644
--- a/lib/Support/SymbolRemappingReader.cpp
+++ b/lib/Support/SymbolRemappingReader.cpp
@@ -1,9 +1,8 @@
//===- SymbolRemappingReader.cpp - Read symbol remapping file -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Support/SystemUtils.cpp b/lib/Support/SystemUtils.cpp
index 7fa6ae3f6199..47e0c72ec7c1 100644
--- a/lib/Support/SystemUtils.cpp
+++ b/lib/Support/SystemUtils.cpp
@@ -1,9 +1,8 @@
//===- SystemUtils.cpp - Utilities for low-level system tasks -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Support/TarWriter.cpp b/lib/Support/TarWriter.cpp
index 5b4d554befe4..6136e9219767 100644
--- a/lib/Support/TarWriter.cpp
+++ b/lib/Support/TarWriter.cpp
@@ -1,9 +1,8 @@
//===-- TarWriter.cpp - Tar archive file creator --------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Support/TargetParser.cpp b/lib/Support/TargetParser.cpp
index bdc0dc52c5e2..d213b9a8c6af 100644
--- a/lib/Support/TargetParser.cpp
+++ b/lib/Support/TargetParser.cpp
@@ -1,9 +1,8 @@
//===-- TargetParser - Parser for target features ---------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -63,7 +62,7 @@ constexpr GPUInfo R600GPUs[26] = {
// This table should be sorted by the value of GPUKind
// Don't bother listing the implicitly true features
-constexpr GPUInfo AMDGCNGPUs[33] = {
+constexpr GPUInfo AMDGCNGPUs[37] = {
// Name Canonical Kind Features
// Name
{{"gfx600"}, {"gfx600"}, GK_GFX600, FEATURE_FAST_FMA_F32},
@@ -98,7 +97,11 @@ constexpr GPUInfo AMDGCNGPUs[33] = {
{{"gfx902"}, {"gfx902"}, GK_GFX902, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
{{"gfx904"}, {"gfx904"}, GK_GFX904, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
{{"gfx906"}, {"gfx906"}, GK_GFX906, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
+ {{"gfx908"}, {"gfx908"}, GK_GFX908, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
{{"gfx909"}, {"gfx909"}, GK_GFX909, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
+ {{"gfx1010"}, {"gfx1010"}, GK_GFX1010, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
+ {{"gfx1011"}, {"gfx1011"}, GK_GFX1011, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
+ {{"gfx1012"}, {"gfx1012"}, GK_GFX1012, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
};
const GPUInfo *getArchEntry(AMDGPU::GPUKind AK, ArrayRef<GPUInfo> Table) {
@@ -170,30 +173,36 @@ void AMDGPU::fillValidArchListR600(SmallVectorImpl<StringRef> &Values) {
}
AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) {
- if (GPU == "generic")
- return {7, 0, 0};
-
AMDGPU::GPUKind AK = parseArchAMDGCN(GPU);
- if (AK == AMDGPU::GPUKind::GK_NONE)
+ if (AK == AMDGPU::GPUKind::GK_NONE) {
+ if (GPU == "generic-hsa")
+ return {7, 0, 0};
+ if (GPU == "generic")
+ return {6, 0, 0};
return {0, 0, 0};
+ }
switch (AK) {
- case GK_GFX600: return {6, 0, 0};
- case GK_GFX601: return {6, 0, 1};
- case GK_GFX700: return {7, 0, 0};
- case GK_GFX701: return {7, 0, 1};
- case GK_GFX702: return {7, 0, 2};
- case GK_GFX703: return {7, 0, 3};
- case GK_GFX704: return {7, 0, 4};
- case GK_GFX801: return {8, 0, 1};
- case GK_GFX802: return {8, 0, 2};
- case GK_GFX803: return {8, 0, 3};
- case GK_GFX810: return {8, 1, 0};
- case GK_GFX900: return {9, 0, 0};
- case GK_GFX902: return {9, 0, 2};
- case GK_GFX904: return {9, 0, 4};
- case GK_GFX906: return {9, 0, 6};
- case GK_GFX909: return {9, 0, 9};
- default: return {0, 0, 0};
+ case GK_GFX600: return {6, 0, 0};
+ case GK_GFX601: return {6, 0, 1};
+ case GK_GFX700: return {7, 0, 0};
+ case GK_GFX701: return {7, 0, 1};
+ case GK_GFX702: return {7, 0, 2};
+ case GK_GFX703: return {7, 0, 3};
+ case GK_GFX704: return {7, 0, 4};
+ case GK_GFX801: return {8, 0, 1};
+ case GK_GFX802: return {8, 0, 2};
+ case GK_GFX803: return {8, 0, 3};
+ case GK_GFX810: return {8, 1, 0};
+ case GK_GFX900: return {9, 0, 0};
+ case GK_GFX902: return {9, 0, 2};
+ case GK_GFX904: return {9, 0, 4};
+ case GK_GFX906: return {9, 0, 6};
+ case GK_GFX908: return {9, 0, 8};
+ case GK_GFX909: return {9, 0, 9};
+ case GK_GFX1010: return {10, 1, 0};
+ case GK_GFX1011: return {10, 1, 1};
+ case GK_GFX1012: return {10, 1, 2};
+ default: return {0, 0, 0};
}
}
diff --git a/lib/Support/TargetRegistry.cpp b/lib/Support/TargetRegistry.cpp
index bb63891cd713..1f9c3bbf8229 100644
--- a/lib/Support/TargetRegistry.cpp
+++ b/lib/Support/TargetRegistry.cpp
@@ -1,9 +1,8 @@
//===--- TargetRegistry.cpp - Target registration -------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Support/ThreadLocal.cpp b/lib/Support/ThreadLocal.cpp
index f6e4a652302c..44e6223cf17b 100644
--- a/lib/Support/ThreadLocal.cpp
+++ b/lib/Support/ThreadLocal.cpp
@@ -1,9 +1,8 @@
//===- ThreadLocal.cpp - Thread Local Data ----------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Support/ThreadPool.cpp b/lib/Support/ThreadPool.cpp
index d0212ca13467..40982d777914 100644
--- a/lib/Support/ThreadPool.cpp
+++ b/lib/Support/ThreadPool.cpp
@@ -1,9 +1,8 @@
//==-- llvm/Support/ThreadPool.cpp - A ThreadPool implementation -*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Support/Threading.cpp b/lib/Support/Threading.cpp
index fcb1030e1ab4..e5899a60f4db 100644
--- a/lib/Support/Threading.cpp
+++ b/lib/Support/Threading.cpp
@@ -1,9 +1,8 @@
//===-- llvm/Support/Threading.cpp- Control multithreading mode --*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Support/TimeProfiler.cpp b/lib/Support/TimeProfiler.cpp
new file mode 100644
index 000000000000..bc2340815645
--- /dev/null
+++ b/lib/Support/TimeProfiler.cpp
@@ -0,0 +1,199 @@
+//===-- TimeProfiler.cpp - Hierarchical Time Profiler ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements hierarchical time profiler.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/TimeProfiler.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/JSON.h"
+#include <cassert>
+#include <chrono>
+#include <string>
+#include <vector>
+
+using namespace std::chrono;
+
+namespace llvm {
+
+static cl::opt<unsigned> TimeTraceGranularity(
+ "time-trace-granularity",
+ cl::desc(
+ "Minimum time granularity (in microseconds) traced by time profiler"),
+ cl::init(500));
+
+TimeTraceProfiler *TimeTraceProfilerInstance = nullptr;
+
+typedef duration<steady_clock::rep, steady_clock::period> DurationType;
+typedef std::pair<size_t, DurationType> CountAndDurationType;
+typedef std::pair<std::string, CountAndDurationType>
+ NameAndCountAndDurationType;
+
+struct Entry {
+ time_point<steady_clock> Start;
+ DurationType Duration;
+ std::string Name;
+ std::string Detail;
+
+ Entry(time_point<steady_clock> &&S, DurationType &&D, std::string &&N,
+ std::string &&Dt)
+ : Start(std::move(S)), Duration(std::move(D)), Name(std::move(N)),
+ Detail(std::move(Dt)){};
+};
+
+struct TimeTraceProfiler {
+ TimeTraceProfiler() {
+ StartTime = steady_clock::now();
+ }
+
+ void begin(std::string Name, llvm::function_ref<std::string()> Detail) {
+ Stack.emplace_back(steady_clock::now(), DurationType{}, std::move(Name),
+ Detail());
+ }
+
+ void end() {
+ assert(!Stack.empty() && "Must call begin() first");
+ auto &E = Stack.back();
+ E.Duration = steady_clock::now() - E.Start;
+
+ // Only include sections longer than TimeTraceGranularity msec.
+ if (duration_cast<microseconds>(E.Duration).count() > TimeTraceGranularity)
+ Entries.emplace_back(E);
+
+ // Track total time taken by each "name", but only the topmost levels of
+ // them; e.g. if there's a template instantiation that instantiates other
+ // templates from within, we only want to add the topmost one. "topmost"
+ // happens to be the ones that don't have any currently open entries above
+ // itself.
+ if (std::find_if(++Stack.rbegin(), Stack.rend(), [&](const Entry &Val) {
+ return Val.Name == E.Name;
+ }) == Stack.rend()) {
+ auto &CountAndTotal = CountAndTotalPerName[E.Name];
+ CountAndTotal.first++;
+ CountAndTotal.second += E.Duration;
+ }
+
+ Stack.pop_back();
+ }
+
+ void Write(raw_pwrite_stream &OS) {
+ assert(Stack.empty() &&
+ "All profiler sections should be ended when calling Write");
+ json::OStream J(OS);
+ J.objectBegin();
+ J.attributeBegin("traceEvents");
+ J.arrayBegin();
+
+ // Emit all events for the main flame graph.
+ for (const auto &E : Entries) {
+ auto StartUs = duration_cast<microseconds>(E.Start - StartTime).count();
+ auto DurUs = duration_cast<microseconds>(E.Duration).count();
+
+ J.object([&]{
+ J.attribute("pid", 1);
+ J.attribute("tid", 0);
+ J.attribute("ph", "X");
+ J.attribute("ts", StartUs);
+ J.attribute("dur", DurUs);
+ J.attribute("name", E.Name);
+ J.attributeObject("args", [&] { J.attribute("detail", E.Detail); });
+ });
+ }
+
+ // Emit totals by section name as additional "thread" events, sorted from
+ // longest one.
+ int Tid = 1;
+ std::vector<NameAndCountAndDurationType> SortedTotals;
+ SortedTotals.reserve(CountAndTotalPerName.size());
+ for (const auto &E : CountAndTotalPerName)
+ SortedTotals.emplace_back(E.getKey(), E.getValue());
+
+ llvm::sort(SortedTotals.begin(), SortedTotals.end(),
+ [](const NameAndCountAndDurationType &A,
+ const NameAndCountAndDurationType &B) {
+ return A.second.second > B.second.second;
+ });
+ for (const auto &E : SortedTotals) {
+ auto DurUs = duration_cast<microseconds>(E.second.second).count();
+ auto Count = CountAndTotalPerName[E.first].first;
+
+ J.object([&]{
+ J.attribute("pid", 1);
+ J.attribute("tid", Tid);
+ J.attribute("ph", "X");
+ J.attribute("ts", 0);
+ J.attribute("dur", DurUs);
+ J.attribute("name", "Total " + E.first);
+ J.attributeObject("args", [&] {
+ J.attribute("count", int64_t(Count));
+ J.attribute("avg ms", int64_t(DurUs / Count / 1000));
+ });
+ });
+
+ ++Tid;
+ }
+
+ // Emit metadata event with process name.
+ J.object([&] {
+ J.attribute("cat", "");
+ J.attribute("pid", 1);
+ J.attribute("tid", 0);
+ J.attribute("ts", 0);
+ J.attribute("ph", "M");
+ J.attribute("name", "process_name");
+ J.attributeObject("args", [&] { J.attribute("name", "clang"); });
+ });
+
+ J.arrayEnd();
+ J.attributeEnd();
+ J.objectEnd();
+ }
+
+ SmallVector<Entry, 16> Stack;
+ SmallVector<Entry, 128> Entries;
+ StringMap<CountAndDurationType> CountAndTotalPerName;
+ time_point<steady_clock> StartTime;
+};
+
+void timeTraceProfilerInitialize() {
+ assert(TimeTraceProfilerInstance == nullptr &&
+ "Profiler should not be initialized");
+ TimeTraceProfilerInstance = new TimeTraceProfiler();
+}
+
+void timeTraceProfilerCleanup() {
+ delete TimeTraceProfilerInstance;
+ TimeTraceProfilerInstance = nullptr;
+}
+
+void timeTraceProfilerWrite(raw_pwrite_stream &OS) {
+ assert(TimeTraceProfilerInstance != nullptr &&
+ "Profiler object can't be null");
+ TimeTraceProfilerInstance->Write(OS);
+}
+
+void timeTraceProfilerBegin(StringRef Name, StringRef Detail) {
+ if (TimeTraceProfilerInstance != nullptr)
+ TimeTraceProfilerInstance->begin(Name, [&]() { return Detail; });
+}
+
+void timeTraceProfilerBegin(StringRef Name,
+ llvm::function_ref<std::string()> Detail) {
+ if (TimeTraceProfilerInstance != nullptr)
+ TimeTraceProfilerInstance->begin(Name, Detail);
+}
+
+void timeTraceProfilerEnd() {
+ if (TimeTraceProfilerInstance != nullptr)
+ TimeTraceProfilerInstance->end();
+}
+
+} // namespace llvm
diff --git a/lib/Support/Timer.cpp b/lib/Support/Timer.cpp
index 82f5810dd107..2a7ff1eaaf63 100644
--- a/lib/Support/Timer.cpp
+++ b/lib/Support/Timer.cpp
@@ -1,9 +1,8 @@
//===-- Timer.cpp - Interval Timing Support -------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -20,6 +19,7 @@
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/Mutex.h"
#include "llvm/Support/Process.h"
+#include "llvm/Support/Signposts.h"
#include "llvm/Support/YAMLTraits.h"
#include "llvm/Support/raw_ostream.h"
#include <limits>
@@ -40,6 +40,9 @@ static std::string &getLibSupportInfoOutputFilename() {
static ManagedStatic<sys::SmartMutex<true> > TimerLock;
+/// Allows llvm::Timer to emit signposts when supported.
+static ManagedStatic<SignpostEmitter> Signposts;
+
namespace {
static cl::opt<bool>
TrackSpace("track-memory", cl::desc("Enable -time-passes memory "
@@ -134,6 +137,7 @@ TimeRecord TimeRecord::getCurrentTime(bool Start) {
void Timer::startTimer() {
assert(!Running && "Cannot start a running timer");
Running = Triggered = true;
+ Signposts->startTimerInterval(this);
StartTime = TimeRecord::getCurrentTime(true);
}
@@ -142,6 +146,7 @@ void Timer::stopTimer() {
Running = false;
Time += TimeRecord::getCurrentTime(false);
Time -= StartTime;
+ Signposts->endTimerInterval(this);
}
void Timer::clear() {
@@ -342,7 +347,7 @@ void TimerGroup::PrintQueuedTimers(raw_ostream &OS) {
TimersToPrint.clear();
}
-void TimerGroup::prepareToPrintList() {
+void TimerGroup::prepareToPrintList(bool ResetTime) {
// See if any of our timers were started, if so add them to TimersToPrint.
for (Timer *T = FirstTimer; T; T = T->Next) {
if (!T->hasTriggered()) continue;
@@ -352,15 +357,20 @@ void TimerGroup::prepareToPrintList() {
TimersToPrint.emplace_back(T->Time, T->Name, T->Description);
+ if (ResetTime)
+ T->clear();
+
if (WasRunning)
T->startTimer();
}
}
-void TimerGroup::print(raw_ostream &OS) {
- sys::SmartScopedLock<true> L(*TimerLock);
-
- prepareToPrintList();
+void TimerGroup::print(raw_ostream &OS, bool ResetAfterPrint) {
+ {
+ // After preparing the timers we can free the lock
+ sys::SmartScopedLock<true> L(*TimerLock);
+ prepareToPrintList(ResetAfterPrint);
+ }
// If any timers were started, print the group.
if (!TimersToPrint.empty())
@@ -400,7 +410,7 @@ void TimerGroup::printJSONValue(raw_ostream &OS, const PrintRecord &R,
const char *TimerGroup::printJSONValues(raw_ostream &OS, const char *delim) {
sys::SmartScopedLock<true> L(*TimerLock);
- prepareToPrintList();
+ prepareToPrintList(false);
for (const PrintRecord &R : TimersToPrint) {
OS << delim;
delim = ",\n";
diff --git a/lib/Support/ToolOutputFile.cpp b/lib/Support/ToolOutputFile.cpp
index e12d9e824f7e..ed3a247f0115 100644
--- a/lib/Support/ToolOutputFile.cpp
+++ b/lib/Support/ToolOutputFile.cpp
@@ -1,9 +1,8 @@
//===--- ToolOutputFile.cpp - Implement the ToolOutputFile class --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Support/TrigramIndex.cpp b/lib/Support/TrigramIndex.cpp
index 721763c88525..94810b56db8e 100644
--- a/lib/Support/TrigramIndex.cpp
+++ b/lib/Support/TrigramIndex.cpp
@@ -1,9 +1,8 @@
//===-- TrigramIndex.cpp - a heuristic for SpecialCaseList ----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp
index 26d9327f6208..d419463e6a5e 100644
--- a/lib/Support/Triple.cpp
+++ b/lib/Support/Triple.cpp
@@ -1,9 +1,8 @@
//===--- Triple.cpp - Target triple helper class --------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -23,6 +22,7 @@ StringRef Triple::getArchTypeName(ArchType Kind) {
case aarch64: return "aarch64";
case aarch64_be: return "aarch64_be";
+ case aarch64_32: return "aarch64_32";
case arm: return "arm";
case armeb: return "armeb";
case arc: return "arc";
@@ -81,7 +81,8 @@ StringRef Triple::getArchTypePrefix(ArchType Kind) {
return StringRef();
case aarch64:
- case aarch64_be: return "aarch64";
+ case aarch64_be:
+ case aarch64_32: return "aarch64";
case arc: return "arc";
@@ -209,6 +210,7 @@ StringRef Triple::getOSTypeName(OSType Kind) {
case HermitCore: return "hermit";
case Hurd: return "hurd";
case WASI: return "wasi";
+ case Emscripten: return "emscripten";
}
llvm_unreachable("Invalid OSType");
@@ -226,6 +228,8 @@ StringRef Triple::getEnvironmentTypeName(EnvironmentType Kind) {
case CODE16: return "code16";
case EABI: return "eabi";
case EABIHF: return "eabihf";
+ case ELFv1: return "elfv1";
+ case ELFv2: return "elfv2";
case Android: return "android";
case Musl: return "musl";
case MuslEABI: return "musleabi";
@@ -235,6 +239,7 @@ StringRef Triple::getEnvironmentTypeName(EnvironmentType Kind) {
case Cygnus: return "cygnus";
case CoreCLR: return "coreclr";
case Simulator: return "simulator";
+ case MacABI: return "macabi";
}
llvm_unreachable("Invalid EnvironmentType!");
@@ -260,8 +265,10 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
return StringSwitch<Triple::ArchType>(Name)
.Case("aarch64", aarch64)
.Case("aarch64_be", aarch64_be)
+ .Case("aarch64_32", aarch64_32)
.Case("arc", arc)
.Case("arm64", aarch64) // "arm64" is an alias for "aarch64"
+ .Case("arm64_32", aarch64_32)
.Case("arm", arm)
.Case("armeb", armeb)
.Case("avr", avr)
@@ -389,8 +396,10 @@ static Triple::ArchType parseArch(StringRef ArchName) {
.Case("xscaleeb", Triple::armeb)
.Case("aarch64", Triple::aarch64)
.Case("aarch64_be", Triple::aarch64_be)
+ .Case("aarch64_32", Triple::aarch64_32)
.Case("arc", Triple::arc)
.Case("arm64", Triple::aarch64)
+ .Case("arm64_32", Triple::aarch64_32)
.Case("arm", Triple::arm)
.Case("armeb", Triple::armeb)
.Case("thumb", Triple::thumb)
@@ -507,6 +516,7 @@ static Triple::OSType parseOS(StringRef OSName) {
.StartsWith("hermit", Triple::HermitCore)
.StartsWith("hurd", Triple::Hurd)
.StartsWith("wasi", Triple::WASI)
+ .StartsWith("emscripten", Triple::Emscripten)
.Default(Triple::UnknownOS);
}
@@ -514,6 +524,8 @@ static Triple::EnvironmentType parseEnvironment(StringRef EnvironmentName) {
return StringSwitch<Triple::EnvironmentType>(EnvironmentName)
.StartsWith("eabihf", Triple::EABIHF)
.StartsWith("eabi", Triple::EABI)
+ .StartsWith("elfv1", Triple::ELFv1)
+ .StartsWith("elfv2", Triple::ELFv2)
.StartsWith("gnuabin32", Triple::GNUABIN32)
.StartsWith("gnuabi64", Triple::GNUABI64)
.StartsWith("gnueabihf", Triple::GNUEABIHF)
@@ -530,11 +542,15 @@ static Triple::EnvironmentType parseEnvironment(StringRef EnvironmentName) {
.StartsWith("cygnus", Triple::Cygnus)
.StartsWith("coreclr", Triple::CoreCLR)
.StartsWith("simulator", Triple::Simulator)
+ .StartsWith("macabi", Triple::MacABI)
.Default(Triple::UnknownEnvironment);
}
static Triple::ObjectFormatType parseFormat(StringRef EnvironmentName) {
return StringSwitch<Triple::ObjectFormatType>(EnvironmentName)
+ // "xcoff" must come before "coff" because of the order-dependendent
+ // pattern matching.
+ .EndsWith("xcoff", Triple::XCOFF)
.EndsWith("coff", Triple::COFF)
.EndsWith("elf", Triple::ELF)
.EndsWith("macho", Triple::MachO)
@@ -611,6 +627,8 @@ static Triple::SubArchType parseSubArch(StringRef SubArchName) {
return Triple::ARMSubArch_v8m_baseline;
case ARM::ArchKind::ARMV8MMainline:
return Triple::ARMSubArch_v8m_mainline;
+ case ARM::ArchKind::ARMV8_1MMainline:
+ return Triple::ARMSubArch_v8_1m_mainline;
default:
return Triple::NoSubArch;
}
@@ -623,6 +641,7 @@ static StringRef getObjectFormatTypeName(Triple::ObjectFormatType Kind) {
case Triple::ELF: return "elf";
case Triple::MachO: return "macho";
case Triple::Wasm: return "wasm";
+ case Triple::XCOFF: return "xcoff";
}
llvm_unreachable("unknown object format type");
}
@@ -631,6 +650,7 @@ static Triple::ObjectFormatType getDefaultFormat(const Triple &T) {
switch (T.getArch()) {
case Triple::UnknownArch:
case Triple::aarch64:
+ case Triple::aarch64_32:
case Triple::arm:
case Triple::thumb:
case Triple::x86:
@@ -687,6 +707,8 @@ static Triple::ObjectFormatType getDefaultFormat(const Triple &T) {
case Triple::ppc64:
if (T.isOSDarwin())
return Triple::MachO;
+ else if (T.isOSAIX())
+ return Triple::XCOFF;
return Triple::ELF;
case Triple::wasm32:
@@ -1212,6 +1234,7 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) {
case llvm::Triple::msp430:
return 16;
+ case llvm::Triple::aarch64_32:
case llvm::Triple::arc:
case llvm::Triple::arm:
case llvm::Triple::armeb:
@@ -1292,6 +1315,7 @@ Triple Triple::get32BitArchVariant() const {
T.setArch(UnknownArch);
break;
+ case Triple::aarch64_32:
case Triple::amdil:
case Triple::hsail:
case Triple::spir:
@@ -1383,6 +1407,7 @@ Triple Triple::get64BitArchVariant() const {
// Already 64-bit.
break;
+ case Triple::aarch64_32: T.setArch(Triple::aarch64); break;
case Triple::arm: T.setArch(Triple::aarch64); break;
case Triple::armeb: T.setArch(Triple::aarch64_be); break;
case Triple::le32: T.setArch(Triple::le64); break;
@@ -1493,6 +1518,7 @@ Triple Triple::getLittleEndianArchVariant() const {
bool Triple::isLittleEndian() const {
switch (getArch()) {
case Triple::aarch64:
+ case Triple::aarch64_32:
case Triple::amdgcn:
case Triple::amdil64:
case Triple::amdil:
diff --git a/lib/Support/Twine.cpp b/lib/Support/Twine.cpp
index 4726c8ab7494..fbbcd8848f1c 100644
--- a/lib/Support/Twine.cpp
+++ b/lib/Support/Twine.cpp
@@ -1,9 +1,8 @@
//===-- Twine.cpp - Fast Temporary String Concatenation -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Support/Unicode.cpp b/lib/Support/Unicode.cpp
index b719bd826dc1..4d195069682b 100644
--- a/lib/Support/Unicode.cpp
+++ b/lib/Support/Unicode.cpp
@@ -1,9 +1,8 @@
//===- llvm/Support/Unicode.cpp - Unicode character properties -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Support/Unix/COM.inc b/lib/Support/Unix/COM.inc
index 5b71de74ebf3..03a690ac3766 100644
--- a/lib/Support/Unix/COM.inc
+++ b/lib/Support/Unix/COM.inc
@@ -1,9 +1,8 @@
//===- llvm/Support/Unix/COM.inc - Unix COM Implementation -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Support/Unix/DynamicLibrary.inc b/lib/Support/Unix/DynamicLibrary.inc
index 029451f347e8..a2a379963de0 100644
--- a/lib/Support/Unix/DynamicLibrary.inc
+++ b/lib/Support/Unix/DynamicLibrary.inc
@@ -1,9 +1,8 @@
//===- Unix/DynamicLibrary.cpp - Unix DL Implementation ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Support/Unix/Host.inc b/lib/Support/Unix/Host.inc
index b65f84bf4444..17d78dc18be7 100644
--- a/lib/Support/Unix/Host.inc
+++ b/lib/Support/Unix/Host.inc
@@ -1,9 +1,8 @@
//===- llvm/Support/Unix/Host.inc -------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -50,6 +49,23 @@ static std::string updateTripleOSVersion(std::string TargetTripleString) {
TargetTripleString += "-darwin";
TargetTripleString += getOSVersion();
}
+ // On AIX, the AIX version and release should be that of the current host
+ // unless if the version has already been specified.
+ if (Triple(LLVM_HOST_TRIPLE).getOS() == Triple::AIX) {
+ Triple TT(TargetTripleString);
+ if (TT.getOS() == Triple::AIX && !TT.getOSMajorVersion()) {
+ struct utsname name;
+ if (uname(&name) != -1) {
+ std::string NewOSName = Triple::getOSTypeName(Triple::AIX);
+ NewOSName += name.version;
+ NewOSName += '.';
+ NewOSName += name.release;
+ NewOSName += ".0.0";
+ TT.setOSName(NewOSName);
+ return TT.str();
+ }
+ }
+ }
return TargetTripleString;
}
diff --git a/lib/Support/Unix/Memory.inc b/lib/Support/Unix/Memory.inc
index adbfff2f59a5..a0927da50e48 100644
--- a/lib/Support/Unix/Memory.inc
+++ b/lib/Support/Unix/Memory.inc
@@ -1,9 +1,8 @@
//===- Unix/Memory.cpp - Generic UNIX System Configuration ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -46,7 +45,7 @@ extern "C" void __clear_cache(void *, void*);
namespace {
int getPosixProtectionFlags(unsigned Flags) {
- switch (Flags) {
+ switch (Flags & llvm::sys::Memory::MF_RWE_MASK) {
case llvm::sys::Memory::MF_READ:
return PROT_READ;
case llvm::sys::Memory::MF_WRITE:
@@ -59,14 +58,13 @@ int getPosixProtectionFlags(unsigned Flags) {
llvm::sys::Memory::MF_EXEC:
return PROT_READ | PROT_WRITE | PROT_EXEC;
case llvm::sys::Memory::MF_EXEC:
-#if defined(__FreeBSD__)
+#if (defined(__FreeBSD__) || defined(__POWERPC__) || defined (__ppc__) || \
+ defined(_POWER) || defined(_ARCH_PPC))
// On PowerPC, having an executable page that has no read permission
// can have unintended consequences. The function InvalidateInstruction-
// Cache uses instructions dcbf and icbi, both of which are treated by
// the processor as loads. If the page has no read permissions,
// executing these instructions will result in a segmentation fault.
- // Somehow, this problem is not present on Linux, but it does happen
- // on FreeBSD.
return PROT_READ | PROT_EXEC;
#else
return PROT_EXEC;
@@ -92,19 +90,24 @@ Memory::allocateMappedMemory(size_t NumBytes,
if (NumBytes == 0)
return MemoryBlock();
- static const size_t PageSize = Process::getPageSize();
- const size_t NumPages = (NumBytes+PageSize-1)/PageSize;
-
- int fd = -1;
-
- int MMFlags = MAP_PRIVATE |
-#ifdef MAP_ANONYMOUS
- MAP_ANONYMOUS
+ // On platforms that have it, we can use MAP_ANON to get a memory-mapped
+ // page without file backing, but we need a fallback of opening /dev/zero
+ // for strictly POSIX platforms instead.
+ int fd;
+#if defined(MAP_ANON)
+ fd = -1;
#else
- MAP_ANON
+ fd = open("/dev/zero", O_RDWR);
+ if (fd == -1) {
+ EC = std::error_code(errno, std::generic_category());
+ return MemoryBlock();
+ }
#endif
- ; // Ends statement above
+ int MMFlags = MAP_PRIVATE;
+#if defined(MAP_ANON)
+ MMFlags |= MAP_ANON;
+#endif
int Protect = getPosixProtectionFlags(PFlags);
#if defined(__NetBSD__) && defined(PROT_MPROTECT)
@@ -113,23 +116,39 @@ Memory::allocateMappedMemory(size_t NumBytes,
// Use any near hint and the page size to set a page-aligned starting address
uintptr_t Start = NearBlock ? reinterpret_cast<uintptr_t>(NearBlock->base()) +
- NearBlock->size() : 0;
+ NearBlock->allocatedSize() : 0;
+ static const size_t PageSize = Process::getPageSizeEstimate();
+ const size_t NumPages = (NumBytes+PageSize-1)/PageSize;
+
if (Start && Start % PageSize)
Start += PageSize - Start % PageSize;
- void *Addr = ::mmap(reinterpret_cast<void*>(Start), PageSize*NumPages,
- Protect, MMFlags, fd, 0);
+ // FIXME: Handle huge page requests (MF_HUGE_HINT).
+ void *Addr = ::mmap(reinterpret_cast<void *>(Start), PageSize*NumPages, Protect,
+ MMFlags, fd, 0);
if (Addr == MAP_FAILED) {
- if (NearBlock) //Try again without a near hint
+ if (NearBlock) { //Try again without a near hint
+#if !defined(MAP_ANON)
+ close(fd);
+#endif
return allocateMappedMemory(NumBytes, nullptr, PFlags, EC);
+ }
EC = std::error_code(errno, std::generic_category());
+#if !defined(MAP_ANON)
+ close(fd);
+#endif
return MemoryBlock();
}
+#if !defined(MAP_ANON)
+ close(fd);
+#endif
+
MemoryBlock Result;
Result.Address = Addr;
- Result.Size = NumPages*PageSize;
+ Result.AllocatedSize = PageSize*NumPages;
+ Result.Flags = PFlags;
// Rely on protectMappedMemory to invalidate instruction cache.
if (PFlags & MF_EXEC) {
@@ -143,22 +162,22 @@ Memory::allocateMappedMemory(size_t NumBytes,
std::error_code
Memory::releaseMappedMemory(MemoryBlock &M) {
- if (M.Address == nullptr || M.Size == 0)
+ if (M.Address == nullptr || M.AllocatedSize == 0)
return std::error_code();
- if (0 != ::munmap(M.Address, M.Size))
+ if (0 != ::munmap(M.Address, M.AllocatedSize))
return std::error_code(errno, std::generic_category());
M.Address = nullptr;
- M.Size = 0;
+ M.AllocatedSize = 0;
return std::error_code();
}
std::error_code
Memory::protectMappedMemory(const MemoryBlock &M, unsigned Flags) {
- static const size_t PageSize = Process::getPageSize();
- if (M.Address == nullptr || M.Size == 0)
+ static const size_t PageSize = Process::getPageSizeEstimate();
+ if (M.Address == nullptr || M.AllocatedSize == 0)
return std::error_code();
if (!Flags)
@@ -166,7 +185,7 @@ Memory::protectMappedMemory(const MemoryBlock &M, unsigned Flags) {
int Protect = getPosixProtectionFlags(Flags);
uintptr_t Start = alignAddr((uint8_t *)M.Address - PageSize + 1, PageSize);
- uintptr_t End = alignAddr((uint8_t *)M.Address + M.Size, PageSize);
+ uintptr_t End = alignAddr((uint8_t *)M.Address + M.AllocatedSize, PageSize);
bool InvalidateCache = (Flags & MF_EXEC);
@@ -179,7 +198,7 @@ Memory::protectMappedMemory(const MemoryBlock &M, unsigned Flags) {
if (Result != 0)
return std::error_code(errno, std::generic_category());
- Memory::InvalidateInstructionCache(M.Address, M.Size);
+ Memory::InvalidateInstructionCache(M.Address, M.AllocatedSize);
InvalidateCache = false;
}
#endif
@@ -190,7 +209,7 @@ Memory::protectMappedMemory(const MemoryBlock &M, unsigned Flags) {
return std::error_code(errno, std::generic_category());
if (InvalidateCache)
- Memory::InvalidateInstructionCache(M.Address, M.Size);
+ Memory::InvalidateInstructionCache(M.Address, M.AllocatedSize);
return std::error_code();
}
diff --git a/lib/Support/Unix/Mutex.inc b/lib/Support/Unix/Mutex.inc
index fe6b17041457..2c982b38d6ff 100644
--- a/lib/Support/Unix/Mutex.inc
+++ b/lib/Support/Unix/Mutex.inc
@@ -1,9 +1,8 @@
//===- llvm/Support/Unix/Mutex.inc - Unix Mutex Implementation ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Support/Unix/Path.inc b/lib/Support/Unix/Path.inc
index d7cc0d627d09..e80880c6b3cb 100644
--- a/lib/Support/Unix/Path.inc
+++ b/lib/Support/Unix/Path.inc
@@ -1,9 +1,8 @@
//===- llvm/Support/Unix/Path.inc - Unix Path Implementation ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -38,6 +37,7 @@
#ifdef __APPLE__
#include <mach-o/dyld.h>
#include <sys/attr.h>
+#include <copyfile.h>
#elif defined(__DragonFly__)
#include <sys/mount.h>
#endif
@@ -56,7 +56,7 @@
#include <sys/types.h>
#if !defined(__APPLE__) && !defined(__OpenBSD__) && !defined(__FreeBSD__) && \
- !defined(__linux__) && !defined(__FreeBSD_kernel__)
+ !defined(__linux__) && !defined(__FreeBSD_kernel__) && !defined(_AIX)
#include <sys/statvfs.h>
#define STATVFS statvfs
#define FSTATVFS fstatvfs
@@ -77,6 +77,14 @@
#endif
#endif
#include <sys/vfs.h>
+#elif defined(_AIX)
+#include <sys/statfs.h>
+
+// <sys/vmount.h> depends on `uint` to be a typedef from <sys/types.h> to
+// `uint_t`; however, <sys/types.h> does not always declare `uint`. We provide
+// the typedef prior to including <sys/vmount.h> to work around this issue.
+typedef uint_t uint;
+#include <sys/vmount.h>
#else
#include <sys/mount.h>
#endif
@@ -108,7 +116,11 @@ test_dir(char ret[PATH_MAX], const char *dir, const char *bin)
struct stat sb;
char fullpath[PATH_MAX];
- snprintf(fullpath, PATH_MAX, "%s/%s", dir, bin);
+ int chars = snprintf(fullpath, PATH_MAX, "%s/%s", dir, bin);
+ // We cannot write PATH_MAX characters because the string will be terminated
+ // with a null character. Fail if truncation happened.
+ if (chars >= PATH_MAX)
+ return 1;
if (!realpath(fullpath, ret))
return 1;
if (stat(fullpath, &sb) != 0)
@@ -120,8 +132,6 @@ test_dir(char ret[PATH_MAX], const char *dir, const char *bin)
static char *
getprogpath(char ret[PATH_MAX], const char *bin)
{
- char *pv, *s, *t;
-
/* First approach: absolute path. */
if (bin[0] == '/') {
if (test_dir(ret, "/", bin) == 0)
@@ -140,18 +150,21 @@ getprogpath(char ret[PATH_MAX], const char *bin)
}
/* Third approach: $PATH */
+ char *pv;
if ((pv = getenv("PATH")) == nullptr)
return nullptr;
- s = pv = strdup(pv);
- if (!pv)
+ char *s = strdup(pv);
+ if (!s)
return nullptr;
- while ((t = strsep(&s, ":")) != nullptr) {
+ char *state;
+ for (char *t = strtok_r(s, ":", &state); t != nullptr;
+ t = strtok_r(nullptr, ":", &state)) {
if (test_dir(ret, t, bin) == 0) {
- free(pv);
+ free(s);
return ret;
}
}
- free(pv);
+ free(s);
return nullptr;
}
#endif // __FreeBSD__ || __NetBSD__ || __FreeBSD_kernel__
@@ -173,8 +186,21 @@ std::string getMainExecutable(const char *argv0, void *MainAddr) {
#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \
defined(__minix) || defined(__DragonFly__) || \
defined(__FreeBSD_kernel__) || defined(_AIX)
+ StringRef curproc("/proc/curproc/file");
char exe_path[PATH_MAX];
-
+ // /proc is not mounted by default under FreeBSD, but gives more accurate
+ // information than argv[0] when it is.
+ if (sys::fs::exists(curproc)) {
+ ssize_t len = readlink(curproc.str().c_str(), exe_path, sizeof(exe_path));
+ if (len > 0) {
+ // Null terminate the string for realpath. readlink never null
+ // terminates its output.
+ len = std::min(len, ssize_t(sizeof(exe_path) - 1));
+ exe_path[len] = '\0';
+ return exe_path;
+ }
+ }
+ // If we don't have procfs mounted, fall back to argv[0]
if (getprogpath(exe_path, argv0) != NULL)
return exe_path;
#elif defined(__linux__) || defined(__CYGWIN__)
@@ -196,20 +222,20 @@ std::string getMainExecutable(const char *argv0, void *MainAddr) {
// the program, and not the eventual binary file. Therefore, call realpath
// so this behaves the same on all platforms.
#if _POSIX_VERSION >= 200112 || defined(__GLIBC__)
- char *real_path = realpath(exe_path, NULL);
- std::string ret = std::string(real_path);
- free(real_path);
- return ret;
+ if (char *real_path = realpath(exe_path, NULL)) {
+ std::string ret = std::string(real_path);
+ free(real_path);
+ return ret;
+ }
#else
char real_path[MAXPATHLEN];
- realpath(exe_path, real_path);
- return std::string(real_path);
+ if (realpath(exe_path, real_path))
+ return std::string(real_path);
#endif
- } else {
- // Fall back to the classical detection.
- if (getprogpath(exe_path, argv0))
- return exe_path;
}
+ // Fall back to the classical detection.
+ if (getprogpath(exe_path, argv0))
+ return exe_path;
#elif defined(HAVE_DLFCN_H) && defined(HAVE_DLADDR)
// Use dladdr to get executable path if available.
Dl_info DLInfo;
@@ -246,7 +272,7 @@ uint32_t file_status::getLinkCount() const {
ErrorOr<space_info> disk_space(const Twine &Path) {
struct STATVFS Vfs;
- if (::STATVFS(Path.str().c_str(), &Vfs))
+ if (::STATVFS(const_cast<char *>(Path.str().c_str()), &Vfs))
return std::error_code(errno, std::generic_category());
auto FrSize = STATVFS_F_FRSIZE(Vfs);
space_info SpaceInfo;
@@ -398,6 +424,9 @@ static bool is_local_impl(struct STATVFS &Vfs) {
#elif defined(__Fuchsia__)
// Fuchsia doesn't yet support remote filesystem mounts.
return true;
+#elif defined(__EMSCRIPTEN__)
+ // Emscripten doesn't currently support remote filesystem mounts.
+ return true;
#elif defined(__HAIKU__)
// Haiku doesn't expose this information.
return false;
@@ -406,6 +435,40 @@ static bool is_local_impl(struct STATVFS &Vfs) {
StringRef fstype(Vfs.f_basetype);
// NFS is the only non-local fstype??
return !fstype.equals("nfs");
+#elif defined(_AIX)
+ // Call mntctl; try more than twice in case of timing issues with a concurrent
+ // mount.
+ int Ret;
+ size_t BufSize = 2048u;
+ std::unique_ptr<char[]> Buf;
+ int Tries = 3;
+ while (Tries--) {
+ Buf = llvm::make_unique<char[]>(BufSize);
+ Ret = mntctl(MCTL_QUERY, BufSize, Buf.get());
+ if (Ret != 0)
+ break;
+ BufSize = *reinterpret_cast<unsigned int *>(Buf.get());
+ Buf.reset();
+ }
+
+ if (Ret == -1)
+ // There was an error; "remote" is the conservative answer.
+ return false;
+
+ // Look for the correct vmount entry.
+ char *CurObjPtr = Buf.get();
+ while (Ret--) {
+ struct vmount *Vp = reinterpret_cast<struct vmount *>(CurObjPtr);
+ static_assert(sizeof(Vfs.f_fsid) == sizeof(Vp->vmt_fsid),
+ "fsid length mismatch");
+ if (memcmp(&Vfs.f_fsid, &Vp->vmt_fsid, sizeof Vfs.f_fsid) == 0)
+ return (Vp->vmt_flags & MNT_REMOTE) == 0;
+
+ CurObjPtr += Vp->vmt_length;
+ }
+
+ // vmount entry not found; "remote" is the conservative answer.
+ return false;
#else
return !!(STATVFS_F_FLAG(Vfs) & MNT_LOCAL);
#endif
@@ -413,7 +476,7 @@ static bool is_local_impl(struct STATVFS &Vfs) {
std::error_code is_local(const Twine &Path, bool &Result) {
struct STATVFS Vfs;
- if (::STATVFS(Path.str().c_str(), &Vfs))
+ if (::STATVFS(const_cast<char *>(Path.str().c_str()), &Vfs))
return std::error_code(errno, std::generic_category());
Result = is_local_impl(Vfs);
@@ -447,7 +510,12 @@ std::error_code resize_file(int FD, uint64_t Size) {
// If we have posix_fallocate use it. Unlike ftruncate it always allocates
// space, so we get an error if the disk is full.
if (int Err = ::posix_fallocate(FD, 0, Size)) {
- if (Err != EINVAL && Err != EOPNOTSUPP)
+#ifdef _AIX
+ constexpr int NotSupportedError = ENOTSUP;
+#else
+ constexpr int NotSupportedError = EOPNOTSUPP;
+#endif
+ if (Err != EINVAL && Err != NotSupportedError)
return std::error_code(Err, std::generic_category());
}
#endif
@@ -626,6 +694,14 @@ std::error_code status(int FD, file_status &Result) {
return fillStatus(StatRet, Status, Result);
}
+unsigned getUmask() {
+ // Chose arbitary new mask and reset the umask to the old mask.
+ // umask(2) never fails so ignore the return of the second call.
+ unsigned Mask = ::umask(0);
+ (void) ::umask(Mask);
+ return Mask;
+}
+
std::error_code setPermissions(const Twine &Path, perms Permissions) {
SmallString<128> PathStorage;
StringRef P = Path.toNullTerminatedStringRef(PathStorage);
@@ -635,6 +711,12 @@ std::error_code setPermissions(const Twine &Path, perms Permissions) {
return std::error_code();
}
+std::error_code setPermissions(int FD, perms Permissions) {
+ if (::fchmod(FD, Permissions))
+ return std::error_code(errno, std::generic_category());
+ return std::error_code();
+}
+
std::error_code setLastAccessAndModificationTime(int FD, TimePoint<> AccessTime,
TimePoint<> ModificationTime) {
#if defined(HAVE_FUTIMENS)
@@ -722,7 +804,7 @@ const char *mapped_file_region::const_data() const {
}
int mapped_file_region::alignment() {
- return Process::getPageSize();
+ return Process::getPageSizeEstimate();
}
std::error_code detail::directory_iterator_construct(detail::DirIterState &it,
@@ -910,9 +992,54 @@ Expected<file_t> openNativeFileForRead(const Twine &Name, OpenFlags Flags,
return ResultFD;
}
-void closeFile(file_t &F) {
- ::close(F);
+file_t getStdinHandle() { return 0; }
+file_t getStdoutHandle() { return 1; }
+file_t getStderrHandle() { return 2; }
+
+std::error_code readNativeFile(file_t FD, MutableArrayRef<char> Buf,
+ size_t *BytesRead) {
+ *BytesRead = sys::RetryAfterSignal(-1, ::read, FD, Buf.data(), Buf.size());
+ if (ssize_t(*BytesRead) == -1)
+ return std::error_code(errno, std::generic_category());
+ return std::error_code();
+}
+
+std::error_code readNativeFileSlice(file_t FD, MutableArrayRef<char> Buf,
+ size_t Offset) {
+ char *BufPtr = Buf.data();
+ size_t BytesLeft = Buf.size();
+
+#ifndef HAVE_PREAD
+ // If we don't have pread, seek to Offset.
+ if (lseek(FD, Offset, SEEK_SET) == -1)
+ return std::error_code(errno, std::generic_category());
+#endif
+
+ while (BytesLeft) {
+#ifdef HAVE_PREAD
+ ssize_t NumRead = sys::RetryAfterSignal(-1, ::pread, FD, BufPtr, BytesLeft,
+ Buf.size() - BytesLeft + Offset);
+#else
+ ssize_t NumRead = sys::RetryAfterSignal(-1, ::read, FD, BufPtr, BytesLeft);
+#endif
+ if (NumRead == -1) {
+ // Error while reading.
+ return std::error_code(errno, std::generic_category());
+ }
+ if (NumRead == 0) {
+ memset(BufPtr, 0, BytesLeft); // zero-initialize rest of the buffer.
+ break;
+ }
+ BytesLeft -= NumRead;
+ BufPtr += NumRead;
+ }
+ return std::error_code();
+}
+
+std::error_code closeFile(file_t &F) {
+ file_t TmpF = F;
F = kInvalidFile;
+ return Process::SafelyCloseFileDescriptor(TmpF);
}
template <typename T>
@@ -1063,5 +1190,37 @@ void system_temp_directory(bool ErasedOnReboot, SmallVectorImpl<char> &Result) {
} // end namespace path
+namespace fs {
+
+#ifdef __APPLE__
+/// This implementation tries to perform an APFS CoW clone of the file,
+/// which can be much faster and uses less space.
+/// Unfortunately fcopyfile(3) does not support COPYFILE_CLONE, so the
+/// file descriptor variant of this function still uses the default
+/// implementation.
+std::error_code copy_file(const Twine &From, const Twine &To) {
+ uint32_t Flag = COPYFILE_DATA;
+#if __has_builtin(__builtin_available)
+ if (__builtin_available(macos 10.12, *)) {
+ bool IsSymlink;
+ if (std::error_code Error = is_symlink_file(From, IsSymlink))
+ return Error;
+ // COPYFILE_CLONE clones the symlink instead of following it
+ // and returns EEXISTS if the target file already exists.
+ if (!IsSymlink && !exists(To))
+ Flag = COPYFILE_CLONE;
+ }
+#endif
+ int Status =
+ copyfile(From.str().c_str(), To.str().c_str(), /* State */ NULL, Flag);
+
+ if (Status == 0)
+ return std::error_code();
+ return std::error_code(errno, std::generic_category());
+}
+#endif // __APPLE__
+
+} // end namespace fs
+
} // end namespace sys
} // end namespace llvm
diff --git a/lib/Support/Unix/Process.inc b/lib/Support/Unix/Process.inc
index 3185f45a3a61..4115ee396582 100644
--- a/lib/Support/Unix/Process.inc
+++ b/lib/Support/Unix/Process.inc
@@ -1,9 +1,8 @@
//===- Unix/Process.cpp - Unix Process Implementation --------- -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -33,10 +32,7 @@
#if HAVE_SIGNAL_H
#include <signal.h>
#endif
-// DragonFlyBSD, and OpenBSD have deprecated <malloc.h> for
-// <stdlib.h> instead. Unix.h includes this for us already.
-#if defined(HAVE_MALLOC_H) && !defined(__DragonFly__) && \
- !defined(__OpenBSD__)
+#if defined(HAVE_MALLINFO)
#include <malloc.h>
#endif
#if defined(HAVE_MALLCTL)
@@ -73,7 +69,7 @@ static std::pair<std::chrono::microseconds, std::chrono::microseconds> getRUsage
// On Cygwin, getpagesize() returns 64k(AllocationGranularity) and
// offset in mmap(3) should be aligned to the AllocationGranularity.
-unsigned Process::getPageSize() {
+Expected<unsigned> Process::getPageSize() {
#if defined(HAVE_GETPAGESIZE)
static const int page_size = ::getpagesize();
#elif defined(HAVE_SYSCONF)
@@ -81,6 +77,9 @@ unsigned Process::getPageSize() {
#else
#error Cannot get the page size on this machine
#endif
+ if (page_size == -1)
+ return errorCodeToError(std::error_code(errno, std::generic_category()));
+
return static_cast<unsigned>(page_size);
}
@@ -292,7 +291,8 @@ static unsigned getColumns(int FileID) {
unsigned Columns = 0;
-#if defined(HAVE_SYS_IOCTL_H) && defined(HAVE_TERMIOS_H)
+#if defined(HAVE_SYS_IOCTL_H) && defined(HAVE_TERMIOS_H) \
+ && !(defined(_XOPEN_SOURCE) || defined(_POSIX_C_SOURCE))
// Try to determine the width of the terminal.
struct winsize ws;
if (ioctl(FileID, TIOCGWINSZ, &ws) == 0)
diff --git a/lib/Support/Unix/Program.inc b/lib/Support/Unix/Program.inc
index d0abc3763e82..c4123a64046f 100644
--- a/lib/Support/Unix/Program.inc
+++ b/lib/Support/Unix/Program.inc
@@ -1,9 +1,8 @@
//===- llvm/Support/Unix/Program.cpp -----------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -246,12 +245,16 @@ static bool Execute(ProcessInfo &PI, StringRef Program,
Envp = const_cast<const char **>(*_NSGetEnviron());
#endif
- // Explicitly initialized to prevent what appears to be a valgrind false
- // positive.
- pid_t PID = 0;
- int Err = posix_spawn(&PID, Program.str().c_str(), FileActions,
- /*attrp*/ nullptr, const_cast<char **>(Argv),
- const_cast<char **>(Envp));
+ constexpr int maxRetries = 8;
+ int retries = 0;
+ pid_t PID;
+ int Err;
+ do {
+ PID = 0; // Make Valgrind happy.
+ Err = posix_spawn(&PID, Program.str().c_str(), FileActions,
+ /*attrp*/ nullptr, const_cast<char **>(Argv),
+ const_cast<char **>(Envp));
+ } while (Err == EINTR && ++retries < maxRetries);
if (FileActions)
posix_spawn_file_actions_destroy(FileActions);
diff --git a/lib/Support/Unix/RWMutex.inc b/lib/Support/Unix/RWMutex.inc
index 85a104334a27..8b47dfa0f85c 100644
--- a/lib/Support/Unix/RWMutex.inc
+++ b/lib/Support/Unix/RWMutex.inc
@@ -1,9 +1,8 @@
//= llvm/Support/Unix/RWMutex.inc - Unix Reader/Writer Mutual Exclusion Lock =//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Support/Unix/Signals.inc b/lib/Support/Unix/Signals.inc
index ad88d5e96906..634c16aa36c7 100644
--- a/lib/Support/Unix/Signals.inc
+++ b/lib/Support/Unix/Signals.inc
@@ -1,9 +1,8 @@
//===- Signals.cpp - Generic Unix Signals Implementation -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -43,6 +42,7 @@
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Mutex.h"
#include "llvm/Support/Program.h"
+#include "llvm/Support/SaveAndRestore.h"
#include "llvm/Support/UniqueLock.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
@@ -81,10 +81,13 @@
using namespace llvm;
static RETSIGTYPE SignalHandler(int Sig); // defined below.
+static RETSIGTYPE InfoSignalHandler(int Sig); // defined below.
+using SignalHandlerFunctionType = void (*)();
/// The function to call if ctrl-c is pressed.
-using InterruptFunctionType = void (*)();
-static std::atomic<InterruptFunctionType> InterruptFunction =
+static std::atomic<SignalHandlerFunctionType> InterruptFunction =
+ ATOMIC_VAR_INIT(nullptr);
+static std::atomic<SignalHandlerFunctionType> InfoSignalFunction =
ATOMIC_VAR_INIT(nullptr);
namespace {
@@ -200,15 +203,15 @@ struct FilesToRemoveCleanup {
static StringRef Argv0;
-// Signals that represent requested termination. There's no bug or failure, or
-// if there is, it's not our direct responsibility. For whatever reason, our
-// continued execution is no longer desirable.
+/// Signals that represent requested termination. There's no bug or failure, or
+/// if there is, it's not our direct responsibility. For whatever reason, our
+/// continued execution is no longer desirable.
static const int IntSigs[] = {
- SIGHUP, SIGINT, SIGPIPE, SIGTERM, SIGUSR1, SIGUSR2
+ SIGHUP, SIGINT, SIGPIPE, SIGTERM, SIGUSR2
};
-// Signals that represent that we have a bug, and our prompt termination has
-// been ordered.
+/// Signals that represent that we have a bug, and our prompt termination has
+/// been ordered.
static const int KillSigs[] = {
SIGILL, SIGTRAP, SIGABRT, SIGFPE, SIGBUS, SIGSEGV, SIGQUIT
#ifdef SIGSYS
@@ -225,11 +228,24 @@ static const int KillSigs[] = {
#endif
};
+/// Signals that represent requests for status.
+static const int InfoSigs[] = {
+ SIGUSR1
+#ifdef SIGINFO
+ , SIGINFO
+#endif
+};
+
+static const size_t NumSigs =
+ array_lengthof(IntSigs) + array_lengthof(KillSigs) +
+ array_lengthof(InfoSigs);
+
+
static std::atomic<unsigned> NumRegisteredSignals = ATOMIC_VAR_INIT(0);
static struct {
struct sigaction SA;
int SigNo;
-} RegisteredSignalInfo[array_lengthof(IntSigs) + array_lengthof(KillSigs)];
+} RegisteredSignalInfo[NumSigs];
#if defined(HAVE_SIGALTSTACK)
// Hold onto both the old and new alternate signal stack so that it's not
@@ -277,15 +293,24 @@ static void RegisterHandlers() { // Not signal-safe.
// be able to reliably handle signals due to stack overflow.
CreateSigAltStack();
- auto registerHandler = [&](int Signal) {
+ enum class SignalKind { IsKill, IsInfo };
+ auto registerHandler = [&](int Signal, SignalKind Kind) {
unsigned Index = NumRegisteredSignals.load();
assert(Index < array_lengthof(RegisteredSignalInfo) &&
"Out of space for signal handlers!");
struct sigaction NewHandler;
- NewHandler.sa_handler = SignalHandler;
- NewHandler.sa_flags = SA_NODEFER | SA_RESETHAND | SA_ONSTACK;
+ switch (Kind) {
+ case SignalKind::IsKill:
+ NewHandler.sa_handler = SignalHandler;
+ NewHandler.sa_flags = SA_NODEFER | SA_RESETHAND | SA_ONSTACK;
+ break;
+ case SignalKind::IsInfo:
+ NewHandler.sa_handler = InfoSignalHandler;
+ NewHandler.sa_flags = SA_ONSTACK;
+ break;
+ }
sigemptyset(&NewHandler.sa_mask);
// Install the new handler, save the old one in RegisteredSignalInfo.
@@ -295,9 +320,11 @@ static void RegisterHandlers() { // Not signal-safe.
};
for (auto S : IntSigs)
- registerHandler(S);
+ registerHandler(S, SignalKind::IsKill);
for (auto S : KillSigs)
- registerHandler(S);
+ registerHandler(S, SignalKind::IsKill);
+ for (auto S : InfoSigs)
+ registerHandler(S, SignalKind::IsInfo);
}
static void UnregisterHandlers() {
@@ -357,6 +384,12 @@ static RETSIGTYPE SignalHandler(int Sig) {
#endif
}
+static RETSIGTYPE InfoSignalHandler(int Sig) {
+ SaveAndRestore<int> SaveErrnoDuringASignalHandler(errno);
+ if (SignalHandlerFunctionType CurrentInfoFunction = InfoSignalFunction)
+ CurrentInfoFunction();
+}
+
void llvm::sys::RunInterruptHandlers() {
RemoveFilesToRemove();
}
@@ -366,6 +399,11 @@ void llvm::sys::SetInterruptFunction(void (*IF)()) {
RegisterHandlers();
}
+void llvm::sys::SetInfoSignalFunction(void (*Handler)()) {
+ InfoSignalFunction.exchange(Handler);
+ RegisterHandlers();
+}
+
// The public API
bool llvm::sys::RemoveFileOnSignal(StringRef Filename,
std::string* ErrMsg) {
@@ -540,11 +578,8 @@ void llvm::sys::PrintStackTrace(raw_ostream &OS) {
else OS << d;
free(d);
- // FIXME: When we move to C++11, use %t length modifier. It's not in
- // C++03 and causes gcc to issue warnings. Losing the upper 32 bits of
- // the stack offset for a stack dump isn't likely to cause any problems.
- OS << format(" + %u",(unsigned)((char*)StackTrace[i]-
- (char*)dlinfo.dli_saddr));
+ OS << format(" + %tu", (static_cast<const char*>(StackTrace[i])-
+ static_cast<const char*>(dlinfo.dli_saddr)));
}
OS << '\n';
}
diff --git a/lib/Support/Unix/ThreadLocal.inc b/lib/Support/Unix/ThreadLocal.inc
index a6564f0fa281..a402ae980424 100644
--- a/lib/Support/Unix/ThreadLocal.inc
+++ b/lib/Support/Unix/ThreadLocal.inc
@@ -1,9 +1,8 @@
//=== llvm/Support/Unix/ThreadLocal.inc - Unix Thread Local Data -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Support/Unix/Threading.inc b/lib/Support/Unix/Threading.inc
index 2d49ce1ad747..ed9a96563055 100644
--- a/lib/Support/Unix/Threading.inc
+++ b/lib/Support/Unix/Threading.inc
@@ -1,9 +1,8 @@
//===- Unix/Threading.inc - Unix Threading Implementation ----- -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -203,6 +202,12 @@ void llvm::get_thread_name(SmallVectorImpl<char> &Name) {
::pthread_getname_np(::pthread_self(), buf, len);
Name.append(buf, buf + strlen(buf));
+#elif defined(__OpenBSD__)
+ constexpr uint32_t len = get_max_thread_name_length_impl();
+ char buf[len];
+ ::pthread_get_name_np(::pthread_self(), buf, len);
+
+ Name.append(buf, buf + strlen(buf));
#elif defined(__linux__)
#if HAVE_PTHREAD_GETNAME_NP
constexpr uint32_t len = get_max_thread_name_length_impl();
@@ -212,3 +217,42 @@ void llvm::get_thread_name(SmallVectorImpl<char> &Name) {
#endif
#endif
}
+
+SetThreadPriorityResult llvm::set_thread_priority(ThreadPriority Priority) {
+#if defined(__linux__) && defined(SCHED_IDLE)
+ // Some *really* old glibcs are missing SCHED_IDLE.
+ // http://man7.org/linux/man-pages/man3/pthread_setschedparam.3.html
+ // http://man7.org/linux/man-pages/man2/sched_setscheduler.2.html
+ sched_param priority;
+ // For each of the above policies, param->sched_priority must be 0.
+ priority.sched_priority = 0;
+ // SCHED_IDLE for running very low priority background jobs.
+ // SCHED_OTHER the standard round-robin time-sharing policy;
+ return !pthread_setschedparam(
+ pthread_self(),
+ Priority == ThreadPriority::Background ? SCHED_IDLE : SCHED_OTHER,
+ &priority)
+ ? SetThreadPriorityResult::SUCCESS
+ : SetThreadPriorityResult::FAILURE;
+#elif defined(__APPLE__)
+ // https://developer.apple.com/library/archive/documentation/System/Conceptual/ManPages_iPhoneOS/man2/getpriority.2.html
+ // When setting a thread into background state the scheduling priority is set
+ // to lowest value, disk and network IO are throttled. Network IO will be
+ // throttled for any sockets the thread opens after going into background
+ // state. Any previously opened sockets are not affected.
+
+ // https://developer.apple.com/library/archive/documentation/System/Conceptual/ManPages_iPhoneOS/man3/getiopolicy_np.3.html
+ // I/Os with THROTTLE policy are called THROTTLE I/Os. If a THROTTLE I/O
+ // request occurs within a small time window (usually a fraction of a second)
+ // of another NORMAL I/O request, the thread that issues the THROTTLE I/O is
+ // forced to sleep for a certain interval. This slows down the thread that
+ // issues the THROTTLE I/O so that NORMAL I/Os can utilize most of the disk
+ // I/O bandwidth.
+ return !setpriority(PRIO_DARWIN_THREAD, 0,
+ Priority == ThreadPriority::Background ? PRIO_DARWIN_BG
+ : 0)
+ ? SetThreadPriorityResult::SUCCESS
+ : SetThreadPriorityResult::FAILURE;
+#endif
+ return SetThreadPriorityResult::FAILURE;
+}
diff --git a/lib/Support/Unix/Unix.h b/lib/Support/Unix/Unix.h
index 0c5d4de556d5..86309b0567f5 100644
--- a/lib/Support/Unix/Unix.h
+++ b/lib/Support/Unix/Unix.h
@@ -1,9 +1,8 @@
//===- llvm/Support/Unix/Unix.h - Common Unix Include File -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -19,7 +18,7 @@
//=== is guaranteed to work on all UNIX variants.
//===----------------------------------------------------------------------===//
-#include "llvm/Config/config.h" // Get autoconf configuration settings
+#include "llvm/Config/config.h"
#include "llvm/Support/Chrono.h"
#include "llvm/Support/Errno.h"
#include <algorithm>
diff --git a/lib/Support/Unix/Watchdog.inc b/lib/Support/Unix/Watchdog.inc
index f4253391d952..b363ef779560 100644
--- a/lib/Support/Unix/Watchdog.inc
+++ b/lib/Support/Unix/Watchdog.inc
@@ -1,9 +1,8 @@
//===--- Unix/Watchdog.inc - Unix Watchdog Implementation -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Support/Valgrind.cpp b/lib/Support/Valgrind.cpp
index 8d852a67c075..886cb6ba3311 100644
--- a/lib/Support/Valgrind.cpp
+++ b/lib/Support/Valgrind.cpp
@@ -1,9 +1,8 @@
//===-- Valgrind.cpp - Implement Valgrind communication ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Support/VersionTuple.cpp b/lib/Support/VersionTuple.cpp
index 3f219bfbedfa..60b59424fbb4 100644
--- a/lib/Support/VersionTuple.cpp
+++ b/lib/Support/VersionTuple.cpp
@@ -1,9 +1,8 @@
//===- VersionTuple.cpp - Version Number Handling ---------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Support/VirtualFileSystem.cpp b/lib/Support/VirtualFileSystem.cpp
index f2a8a1bb27af..5d3480e97148 100644
--- a/lib/Support/VirtualFileSystem.cpp
+++ b/lib/Support/VirtualFileSystem.cpp
@@ -1,9 +1,8 @@
//===- VirtualFileSystem.cpp - Virtual File System Layer ------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -57,8 +56,10 @@
using namespace llvm;
using namespace llvm::vfs;
+using llvm::sys::fs::file_t;
using llvm::sys::fs::file_status;
using llvm::sys::fs::file_type;
+using llvm::sys::fs::kInvalidFile;
using llvm::sys::fs::perms;
using llvm::sys::fs::UniqueID;
@@ -67,19 +68,19 @@ Status::Status(const file_status &Status)
User(Status.getUser()), Group(Status.getGroup()), Size(Status.getSize()),
Type(Status.type()), Perms(Status.permissions()) {}
-Status::Status(StringRef Name, UniqueID UID, sys::TimePoint<> MTime,
+Status::Status(const Twine &Name, UniqueID UID, sys::TimePoint<> MTime,
uint32_t User, uint32_t Group, uint64_t Size, file_type Type,
perms Perms)
- : Name(Name), UID(UID), MTime(MTime), User(User), Group(Group), Size(Size),
- Type(Type), Perms(Perms) {}
+ : Name(Name.str()), UID(UID), MTime(MTime), User(User), Group(Group),
+ Size(Size), Type(Type), Perms(Perms) {}
-Status Status::copyWithNewName(const Status &In, StringRef NewName) {
+Status Status::copyWithNewName(const Status &In, const Twine &NewName) {
return Status(NewName, In.getUniqueID(), In.getLastModificationTime(),
In.getUser(), In.getGroup(), In.getSize(), In.getType(),
In.getPermissions());
}
-Status Status::copyWithNewName(const file_status &In, StringRef NewName) {
+Status Status::copyWithNewName(const file_status &In, const Twine &NewName) {
return Status(NewName, In.getUniqueID(), In.getLastModificationTime(),
In.getUser(), In.getGroup(), In.getSize(), In.type(),
In.permissions());
@@ -171,15 +172,15 @@ namespace {
class RealFile : public File {
friend class RealFileSystem;
- int FD;
+ file_t FD;
Status S;
std::string RealName;
- RealFile(int FD, StringRef NewName, StringRef NewRealPathName)
+ RealFile(file_t FD, StringRef NewName, StringRef NewRealPathName)
: FD(FD), S(NewName, {}, {}, {}, {}, {},
llvm::sys::fs::file_type::status_error, {}),
RealName(NewRealPathName.str()) {
- assert(FD >= 0 && "Invalid or inactive file descriptor");
+ assert(FD != kInvalidFile && "Invalid or inactive file descriptor");
}
public:
@@ -199,7 +200,7 @@ public:
RealFile::~RealFile() { close(); }
ErrorOr<Status> RealFile::status() {
- assert(FD != -1 && "cannot stat closed file");
+ assert(FD != kInvalidFile && "cannot stat closed file");
if (!S.isStatusKnown()) {
file_status RealStatus;
if (std::error_code EC = sys::fs::status(FD, RealStatus))
@@ -216,22 +217,41 @@ ErrorOr<std::string> RealFile::getName() {
ErrorOr<std::unique_ptr<MemoryBuffer>>
RealFile::getBuffer(const Twine &Name, int64_t FileSize,
bool RequiresNullTerminator, bool IsVolatile) {
- assert(FD != -1 && "cannot get buffer for closed file");
+ assert(FD != kInvalidFile && "cannot get buffer for closed file");
return MemoryBuffer::getOpenFile(FD, Name, FileSize, RequiresNullTerminator,
IsVolatile);
}
std::error_code RealFile::close() {
- std::error_code EC = sys::Process::SafelyCloseFileDescriptor(FD);
- FD = -1;
+ std::error_code EC = sys::fs::closeFile(FD);
+ FD = kInvalidFile;
return EC;
}
namespace {
-/// The file system according to your operating system.
+/// A file system according to your operating system.
+/// This may be linked to the process's working directory, or maintain its own.
+///
+/// Currently, its own working directory is emulated by storing the path and
+/// sending absolute paths to llvm::sys::fs:: functions.
+/// A more principled approach would be to push this down a level, modelling
+/// the working dir as an llvm::sys::fs::WorkingDir or similar.
+/// This would enable the use of openat()-style functions on some platforms.
class RealFileSystem : public FileSystem {
public:
+ explicit RealFileSystem(bool LinkCWDToProcess) {
+ if (!LinkCWDToProcess) {
+ SmallString<128> PWD, RealPWD;
+ if (llvm::sys::fs::current_path(PWD))
+ return; // Awful, but nothing to do here.
+ if (llvm::sys::fs::real_path(PWD, RealPWD))
+ WD = {PWD, PWD};
+ else
+ WD = {PWD, RealPWD};
+ }
+ }
+
ErrorOr<Status> status(const Twine &Path) override;
ErrorOr<std::unique_ptr<File>> openFileForRead(const Twine &Path) override;
directory_iterator dir_begin(const Twine &Dir, std::error_code &EC) override;
@@ -243,72 +263,95 @@ public:
SmallVectorImpl<char> &Output) const override;
private:
- mutable std::mutex CWDMutex;
- mutable std::string CWDCache;
+ // If this FS has its own working dir, use it to make Path absolute.
+ // The returned twine is safe to use as long as both Storage and Path live.
+ Twine adjustPath(const Twine &Path, SmallVectorImpl<char> &Storage) const {
+ if (!WD)
+ return Path;
+ Path.toVector(Storage);
+ sys::fs::make_absolute(WD->Resolved, Storage);
+ return Storage;
+ }
+
+ struct WorkingDirectory {
+ // The current working directory, without symlinks resolved. (echo $PWD).
+ SmallString<128> Specified;
+ // The current working directory, with links resolved. (readlink .).
+ SmallString<128> Resolved;
+ };
+ Optional<WorkingDirectory> WD;
};
} // namespace
ErrorOr<Status> RealFileSystem::status(const Twine &Path) {
+ SmallString<256> Storage;
sys::fs::file_status RealStatus;
- if (std::error_code EC = sys::fs::status(Path, RealStatus))
+ if (std::error_code EC =
+ sys::fs::status(adjustPath(Path, Storage), RealStatus))
return EC;
- return Status::copyWithNewName(RealStatus, Path.str());
+ return Status::copyWithNewName(RealStatus, Path);
}
ErrorOr<std::unique_ptr<File>>
RealFileSystem::openFileForRead(const Twine &Name) {
- int FD;
- SmallString<256> RealName;
- if (std::error_code EC =
- sys::fs::openFileForRead(Name, FD, sys::fs::OF_None, &RealName))
- return EC;
- return std::unique_ptr<File>(new RealFile(FD, Name.str(), RealName.str()));
+ SmallString<256> RealName, Storage;
+ Expected<file_t> FDOrErr = sys::fs::openNativeFileForRead(
+ adjustPath(Name, Storage), sys::fs::OF_None, &RealName);
+ if (!FDOrErr)
+ return errorToErrorCode(FDOrErr.takeError());
+ return std::unique_ptr<File>(
+ new RealFile(*FDOrErr, Name.str(), RealName.str()));
}
llvm::ErrorOr<std::string> RealFileSystem::getCurrentWorkingDirectory() const {
- std::lock_guard<std::mutex> Lock(CWDMutex);
- if (!CWDCache.empty())
- return CWDCache;
- SmallString<256> Dir;
+ if (WD)
+ return WD->Specified.str();
+
+ SmallString<128> Dir;
if (std::error_code EC = llvm::sys::fs::current_path(Dir))
return EC;
- CWDCache = Dir.str();
- return CWDCache;
+ return Dir.str();
}
std::error_code RealFileSystem::setCurrentWorkingDirectory(const Twine &Path) {
- // FIXME: chdir is thread hostile; on the other hand, creating the same
- // behavior as chdir is complex: chdir resolves the path once, thus
- // guaranteeing that all subsequent relative path operations work
- // on the same path the original chdir resulted in. This makes a
- // difference for example on network filesystems, where symlinks might be
- // switched during runtime of the tool. Fixing this depends on having a
- // file system abstraction that allows openat() style interactions.
- if (auto EC = llvm::sys::fs::set_current_path(Path))
- return EC;
-
- // Invalidate cache.
- std::lock_guard<std::mutex> Lock(CWDMutex);
- CWDCache.clear();
+ if (!WD)
+ return llvm::sys::fs::set_current_path(Path);
+
+ SmallString<128> Absolute, Resolved, Storage;
+ adjustPath(Path, Storage).toVector(Absolute);
+ bool IsDir;
+ if (auto Err = llvm::sys::fs::is_directory(Absolute, IsDir))
+ return Err;
+ if (!IsDir)
+ return std::make_error_code(std::errc::not_a_directory);
+ if (auto Err = llvm::sys::fs::real_path(Absolute, Resolved))
+ return Err;
+ WD = {Absolute, Resolved};
return std::error_code();
}
std::error_code RealFileSystem::isLocal(const Twine &Path, bool &Result) {
- return llvm::sys::fs::is_local(Path, Result);
+ SmallString<256> Storage;
+ return llvm::sys::fs::is_local(adjustPath(Path, Storage), Result);
}
std::error_code
RealFileSystem::getRealPath(const Twine &Path,
SmallVectorImpl<char> &Output) const {
- return llvm::sys::fs::real_path(Path, Output);
+ SmallString<256> Storage;
+ return llvm::sys::fs::real_path(adjustPath(Path, Storage), Output);
}
IntrusiveRefCntPtr<FileSystem> vfs::getRealFileSystem() {
- static IntrusiveRefCntPtr<FileSystem> FS = new RealFileSystem();
+ static IntrusiveRefCntPtr<FileSystem> FS(new RealFileSystem(true));
return FS;
}
+std::unique_ptr<FileSystem> vfs::createPhysicalFileSystem() {
+ return llvm::make_unique<RealFileSystem>(false);
+}
+
namespace {
class RealFSDirIter : public llvm::vfs::detail::DirIterImpl {
@@ -334,7 +377,9 @@ public:
directory_iterator RealFileSystem::dir_begin(const Twine &Dir,
std::error_code &EC) {
- return directory_iterator(std::make_shared<RealFSDirIter>(Dir, EC));
+ SmallString<128> Storage;
+ return directory_iterator(
+ std::make_shared<RealFSDirIter>(adjustPath(Dir, Storage), EC));
}
//===-----------------------------------------------------------------------===/
@@ -511,7 +556,7 @@ public:
/// Return the \p Status for this node. \p RequestedName should be the name
/// through which the caller referred to this node. It will override
/// \p Status::Name in the return value, to mimic the behavior of \p RealFile.
- Status getStatus(StringRef RequestedName) const {
+ Status getStatus(const Twine &RequestedName) const {
return Status::copyWithNewName(Stat, RequestedName);
}
llvm::MemoryBuffer *getBuffer() const { return Buffer.get(); }
@@ -585,7 +630,7 @@ public:
/// Return the \p Status for this node. \p RequestedName should be the name
/// through which the caller referred to this node. It will override
/// \p Status::Name in the return value, to mimic the behavior of \p RealFile.
- Status getStatus(StringRef RequestedName) const {
+ Status getStatus(const Twine &RequestedName) const {
return Status::copyWithNewName(Stat, RequestedName);
}
InMemoryNode *getChild(StringRef Name) {
@@ -619,7 +664,7 @@ public:
};
namespace {
-Status getNodeStatus(const InMemoryNode *Node, StringRef RequestedName) {
+Status getNodeStatus(const InMemoryNode *Node, const Twine &RequestedName) {
if (auto Dir = dyn_cast<detail::InMemoryDirectory>(Node))
return Dir->getStatus(RequestedName);
if (auto File = dyn_cast<detail::InMemoryFile>(Node))
@@ -817,7 +862,7 @@ bool InMemoryFileSystem::addHardLink(const Twine &FromPath,
llvm::ErrorOr<Status> InMemoryFileSystem::status(const Twine &Path) {
auto Node = lookupInMemoryNode(*this, Root.get(), Path);
if (Node)
- return detail::getNodeStatus(*Node, Path.str());
+ return detail::getNodeStatus(*Node, Path);
return Node.getError();
}
@@ -1237,7 +1282,7 @@ class llvm::vfs::RedirectingFileSystemParser {
EntryArrayContents;
std::string ExternalContentsPath;
std::string Name;
- yaml::Node *NameValueNode;
+ yaml::Node *NameValueNode = nullptr;
auto UseExternalName =
RedirectingFileSystem::RedirectingFileEntry::NK_NotSet;
RedirectingFileSystem::EntryKind Kind;
@@ -1633,7 +1678,7 @@ static Status getRedirectedFileStatus(const Twine &Path, bool UseExternalNames,
Status ExternalStatus) {
Status S = ExternalStatus;
if (!UseExternalNames)
- S = Status::copyWithNewName(S, Path.str());
+ S = Status::copyWithNewName(S, Path);
S.IsVFSMapped = true;
return S;
}
@@ -1650,7 +1695,7 @@ ErrorOr<Status> RedirectingFileSystem::status(const Twine &Path,
return S;
} else { // directory
auto *DE = cast<RedirectingFileSystem::RedirectingDirectoryEntry>(E);
- return Status::copyWithNewName(DE->getStatus(), Path.str());
+ return Status::copyWithNewName(DE->getStatus(), Path);
}
}
diff --git a/lib/Support/Watchdog.cpp b/lib/Support/Watchdog.cpp
index be55e3122e70..246f3dc7a0ca 100644
--- a/lib/Support/Watchdog.cpp
+++ b/lib/Support/Watchdog.cpp
@@ -1,9 +1,8 @@
//===---- Watchdog.cpp - Implement Watchdog ---------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Support/Windows/COM.inc b/lib/Support/Windows/COM.inc
index 54f3ecf28ec2..002182bc3939 100644
--- a/lib/Support/Windows/COM.inc
+++ b/lib/Support/Windows/COM.inc
@@ -1,9 +1,8 @@
//==- llvm/Support/Windows/COM.inc - Windows COM Implementation -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Support/Windows/DynamicLibrary.inc b/lib/Support/Windows/DynamicLibrary.inc
index 1d47f0848a6d..71b206c4cf9e 100644
--- a/lib/Support/Windows/DynamicLibrary.inc
+++ b/lib/Support/Windows/DynamicLibrary.inc
@@ -1,9 +1,8 @@
//===- Win32/DynamicLibrary.cpp - Win32 DL Implementation -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Support/Windows/Host.inc b/lib/Support/Windows/Host.inc
index 58c4dc5d678f..21b947f26df3 100644
--- a/lib/Support/Windows/Host.inc
+++ b/lib/Support/Windows/Host.inc
@@ -1,9 +1,8 @@
//===- llvm/Support/Win32/Host.inc ------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Support/Windows/Memory.inc b/lib/Support/Windows/Memory.inc
index 318e65aaa9ee..a67f9c7d0f35 100644
--- a/lib/Support/Windows/Memory.inc
+++ b/lib/Support/Windows/Memory.inc
@@ -1,9 +1,8 @@
//===- Win32/Memory.cpp - Win32 Memory Implementation -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -23,7 +22,7 @@
namespace {
DWORD getWindowsProtectionFlags(unsigned Flags) {
- switch (Flags) {
+ switch (Flags & llvm::sys::Memory::MF_RWE_MASK) {
// Contrary to what you might expect, the Windows page protection flags
// are not a bitwise combination of RWX values
case llvm::sys::Memory::MF_READ:
@@ -48,6 +47,9 @@ DWORD getWindowsProtectionFlags(unsigned Flags) {
return PAGE_NOACCESS;
}
+// While we'd be happy to allocate single pages, the Windows allocation
+// granularity may be larger than a single page (in practice, it is 64K)
+// so mapping less than that will create an unreachable fragment of memory.
size_t getAllocationGranularity() {
SYSTEM_INFO Info;
::GetSystemInfo(&Info);
@@ -57,6 +59,38 @@ size_t getAllocationGranularity() {
return Info.dwAllocationGranularity;
}
+// Large/huge memory pages need explicit process permissions in order to be
+// used. See https://blogs.msdn.microsoft.com/oldnewthing/20110128-00/?p=11643
+// Also large pages need to be manually enabled on your OS. If all this is
+// sucessfull, we return the minimal large memory page size.
+static size_t enableProcessLargePages() {
+ HANDLE Token = 0;
+ size_t LargePageMin = GetLargePageMinimum();
+ if (LargePageMin)
+ OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY,
+ &Token);
+ if (!Token)
+ return 0;
+ LUID Luid;
+ if (!LookupPrivilegeValue(0, SE_LOCK_MEMORY_NAME, &Luid)) {
+ CloseHandle(Token);
+ return 0;
+ }
+ TOKEN_PRIVILEGES TP{};
+ TP.PrivilegeCount = 1;
+ TP.Privileges[0].Luid = Luid;
+ TP.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
+ if (!AdjustTokenPrivileges(Token, FALSE, &TP, 0, 0, 0)) {
+ CloseHandle(Token);
+ return 0;
+ }
+ DWORD E = GetLastError();
+ CloseHandle(Token);
+ if (E == ERROR_SUCCESS)
+ return LargePageMin;
+ return 0;
+}
+
} // namespace
namespace llvm {
@@ -75,22 +109,23 @@ MemoryBlock Memory::allocateMappedMemory(size_t NumBytes,
if (NumBytes == 0)
return MemoryBlock();
- // While we'd be happy to allocate single pages, the Windows allocation
- // granularity may be larger than a single page (in practice, it is 64K)
- // so mapping less than that will create an unreachable fragment of memory.
- // Avoid using one-time initialization of static locals here, since they
- // aren't thread safe with MSVC.
- static volatile size_t GranularityCached;
- size_t Granularity = GranularityCached;
- if (Granularity == 0) {
- Granularity = getAllocationGranularity();
- GranularityCached = Granularity;
+ static size_t DefaultGranularity = getAllocationGranularity();
+ static size_t LargePageGranularity = enableProcessLargePages();
+
+ DWORD AllocType = MEM_RESERVE | MEM_COMMIT;
+ bool HugePages = false;
+ size_t Granularity = DefaultGranularity;
+
+ if ((Flags & MF_HUGE_HINT) && LargePageGranularity > 0) {
+ AllocType |= MEM_LARGE_PAGES;
+ HugePages = true;
+ Granularity = LargePageGranularity;
}
- const size_t NumBlocks = (NumBytes+Granularity-1)/Granularity;
+ size_t NumBlocks = (NumBytes + Granularity - 1) / Granularity;
uintptr_t Start = NearBlock ? reinterpret_cast<uintptr_t>(NearBlock->base()) +
- NearBlock->size()
+ NearBlock->allocatedSize()
: 0;
// If the requested address is not aligned to the allocation granularity,
@@ -100,13 +135,13 @@ MemoryBlock Memory::allocateMappedMemory(size_t NumBytes,
DWORD Protect = getWindowsProtectionFlags(Flags);
- void *PA = ::VirtualAlloc(reinterpret_cast<void*>(Start),
- NumBlocks*Granularity,
- MEM_RESERVE | MEM_COMMIT, Protect);
+ size_t AllocSize = NumBlocks * Granularity;
+ void *PA = ::VirtualAlloc(reinterpret_cast<void *>(Start),
+ AllocSize, AllocType, Protect);
if (PA == NULL) {
- if (NearBlock) {
- // Try again without the NearBlock hint
- return allocateMappedMemory(NumBytes, NULL, Flags, EC);
+ if (NearBlock || HugePages) {
+ // Try again without the NearBlock hint and without large memory pages
+ return allocateMappedMemory(NumBytes, NULL, Flags & ~MF_HUGE_HINT, EC);
}
EC = mapWindowsError(::GetLastError());
return MemoryBlock();
@@ -114,40 +149,41 @@ MemoryBlock Memory::allocateMappedMemory(size_t NumBytes,
MemoryBlock Result;
Result.Address = PA;
- Result.Size = NumBlocks*Granularity;
+ Result.AllocatedSize = AllocSize;
+ Result.Flags = (Flags & ~MF_HUGE_HINT) | (HugePages ? MF_HUGE_HINT : 0);
if (Flags & MF_EXEC)
- Memory::InvalidateInstructionCache(Result.Address, Result.Size);
+ Memory::InvalidateInstructionCache(Result.Address, AllocSize);
return Result;
}
std::error_code Memory::releaseMappedMemory(MemoryBlock &M) {
- if (M.Address == 0 || M.Size == 0)
+ if (M.Address == 0 || M.AllocatedSize == 0)
return std::error_code();
if (!VirtualFree(M.Address, 0, MEM_RELEASE))
return mapWindowsError(::GetLastError());
M.Address = 0;
- M.Size = 0;
+ M.AllocatedSize = 0;
return std::error_code();
}
std::error_code Memory::protectMappedMemory(const MemoryBlock &M,
unsigned Flags) {
- if (M.Address == 0 || M.Size == 0)
+ if (M.Address == 0 || M.AllocatedSize == 0)
return std::error_code();
DWORD Protect = getWindowsProtectionFlags(Flags);
DWORD OldFlags;
- if (!VirtualProtect(M.Address, M.Size, Protect, &OldFlags))
+ if (!VirtualProtect(M.Address, M.AllocatedSize, Protect, &OldFlags))
return mapWindowsError(::GetLastError());
if (Flags & MF_EXEC)
- Memory::InvalidateInstructionCache(M.Address, M.Size);
+ Memory::InvalidateInstructionCache(M.Address, M.AllocatedSize);
return std::error_code();
}
diff --git a/lib/Support/Windows/Mutex.inc b/lib/Support/Windows/Mutex.inc
index 0af145ec9a4e..b55b14febf2c 100644
--- a/lib/Support/Windows/Mutex.inc
+++ b/lib/Support/Windows/Mutex.inc
@@ -1,9 +1,8 @@
//===- llvm/Support/Win32/Mutex.inc - Win32 Mutex Implementation -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Support/Windows/Path.inc b/lib/Support/Windows/Path.inc
index d34aa763124c..5704930aeecc 100644
--- a/lib/Support/Windows/Path.inc
+++ b/lib/Support/Windows/Path.inc
@@ -1,9 +1,8 @@
//===- llvm/Support/Windows/Path.inc - Windows Path Impl --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -735,6 +734,14 @@ std::error_code status(int FD, file_status &Result) {
return getStatus(FileHandle, Result);
}
+std::error_code status(file_t FileHandle, file_status &Result) {
+ return getStatus(FileHandle, Result);
+}
+
+unsigned getUmask() {
+ return 0;
+}
+
std::error_code setPermissions(const Twine &Path, perms Permissions) {
SmallVector<wchar_t, 128> PathUTF16;
if (std::error_code EC = widenPath(Path, PathUTF16))
@@ -766,6 +773,11 @@ std::error_code setPermissions(const Twine &Path, perms Permissions) {
return std::error_code();
}
+std::error_code setPermissions(int FD, perms Permissions) {
+ // FIXME Not implemented.
+ return std::make_error_code(std::errc::not_supported);
+}
+
std::error_code setLastAccessAndModificationTime(int FD, TimePoint<> AccessTime,
TimePoint<> ModificationTime) {
FILETIME AccessFT = toFILETIME(AccessTime);
@@ -776,10 +788,9 @@ std::error_code setLastAccessAndModificationTime(int FD, TimePoint<> AccessTime,
return std::error_code();
}
-std::error_code mapped_file_region::init(int FD, uint64_t Offset,
- mapmode Mode) {
+std::error_code mapped_file_region::init(sys::fs::file_t OrigFileHandle,
+ uint64_t Offset, mapmode Mode) {
this->Mode = Mode;
- HANDLE OrigFileHandle = reinterpret_cast<HANDLE>(_get_osfhandle(FD));
if (OrigFileHandle == INVALID_HANDLE_VALUE)
return make_error_code(errc::bad_file_descriptor);
@@ -846,8 +857,9 @@ std::error_code mapped_file_region::init(int FD, uint64_t Offset,
return std::error_code();
}
-mapped_file_region::mapped_file_region(int fd, mapmode mode, size_t length,
- uint64_t offset, std::error_code &ec)
+mapped_file_region::mapped_file_region(sys::fs::file_t fd, mapmode mode,
+ size_t length, uint64_t offset,
+ std::error_code &ec)
: Size(length), Mapping() {
ec = init(fd, offset, mode);
if (ec)
@@ -1197,9 +1209,73 @@ Expected<file_t> openNativeFileForRead(const Twine &Name, OpenFlags Flags,
return Result;
}
-void closeFile(file_t &F) {
- ::CloseHandle(F);
+file_t convertFDToNativeFile(int FD) {
+ return reinterpret_cast<HANDLE>(::_get_osfhandle(FD));
+}
+
+file_t getStdinHandle() { return ::GetStdHandle(STD_INPUT_HANDLE); }
+file_t getStdoutHandle() { return ::GetStdHandle(STD_OUTPUT_HANDLE); }
+file_t getStderrHandle() { return ::GetStdHandle(STD_ERROR_HANDLE); }
+
+std::error_code readNativeFileImpl(file_t FileHandle, char *BufPtr, size_t BytesToRead,
+ size_t *BytesRead, OVERLAPPED *Overlap) {
+ // ReadFile can only read 2GB at a time. The caller should check the number of
+ // bytes and read in a loop until termination.
+ DWORD BytesToRead32 =
+ std::min(size_t(std::numeric_limits<DWORD>::max()), BytesToRead);
+ DWORD BytesRead32 = 0;
+ bool Success =
+ ::ReadFile(FileHandle, BufPtr, BytesToRead32, &BytesRead32, Overlap);
+ *BytesRead = BytesRead32;
+ if (!Success) {
+ DWORD Err = ::GetLastError();
+ // Pipe EOF is not an error.
+ if (Err == ERROR_BROKEN_PIPE)
+ return std::error_code();
+ return mapWindowsError(Err);
+ }
+ return std::error_code();
+}
+
+std::error_code readNativeFile(file_t FileHandle, MutableArrayRef<char> Buf,
+ size_t *BytesRead) {
+ return readNativeFileImpl(FileHandle, Buf.data(), Buf.size(), BytesRead,
+ /*Overlap=*/nullptr);
+}
+
+std::error_code readNativeFileSlice(file_t FileHandle,
+ MutableArrayRef<char> Buf, size_t Offset) {
+ char *BufPtr = Buf.data();
+ size_t BytesLeft = Buf.size();
+
+ while (BytesLeft) {
+ uint64_t CurOff = Buf.size() - BytesLeft + Offset;
+ OVERLAPPED Overlapped = {};
+ Overlapped.Offset = uint32_t(CurOff);
+ Overlapped.OffsetHigh = uint32_t(uint64_t(CurOff) >> 32);
+
+ size_t BytesRead = 0;
+ if (auto EC = readNativeFileImpl(FileHandle, BufPtr, BytesLeft, &BytesRead,
+ &Overlapped))
+ return EC;
+
+ // Once we reach EOF, zero the remaining bytes in the buffer.
+ if (BytesRead == 0) {
+ memset(BufPtr, 0, BytesLeft);
+ break;
+ }
+ BytesLeft -= BytesRead;
+ BufPtr += BytesRead;
+ }
+ return std::error_code();
+}
+
+std::error_code closeFile(file_t &F) {
+ file_t TmpF = F;
F = kInvalidFile;
+ if (!::CloseHandle(TmpF))
+ return mapWindowsError(::GetLastError());
+ return std::error_code();
}
std::error_code remove_directories(const Twine &path, bool IgnoreErrors) {
diff --git a/lib/Support/Windows/Process.inc b/lib/Support/Windows/Process.inc
index 2b2d79231434..4b91f9f7fc66 100644
--- a/lib/Support/Windows/Process.inc
+++ b/lib/Support/Windows/Process.inc
@@ -1,9 +1,8 @@
//===- Win32/Process.cpp - Win32 Process Implementation ------- -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -57,7 +56,7 @@ static unsigned computePageSize() {
return static_cast<unsigned>(info.dwPageSize);
}
-unsigned Process::getPageSize() {
+Expected<unsigned> Process::getPageSize() {
static unsigned Ret = computePageSize();
return Ret;
}
diff --git a/lib/Support/Windows/Program.inc b/lib/Support/Windows/Program.inc
index c037956603f2..0f54e59ee55b 100644
--- a/lib/Support/Windows/Program.inc
+++ b/lib/Support/Windows/Program.inc
@@ -1,9 +1,8 @@
//===- Win32/Program.cpp - Win32 Program Implementation ------- -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Support/Windows/RWMutex.inc b/lib/Support/Windows/RWMutex.inc
index 5eb9351eee52..8df9bc394160 100644
--- a/lib/Support/Windows/RWMutex.inc
+++ b/lib/Support/Windows/RWMutex.inc
@@ -1,9 +1,8 @@
//= llvm/Support/Win32/Mutex.inc - Win32 Reader/Writer Mutual Exclusion Lock =//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Support/Windows/Signals.inc b/lib/Support/Windows/Signals.inc
index 41eb5e593aa5..6a820ef22b1e 100644
--- a/lib/Support/Windows/Signals.inc
+++ b/lib/Support/Windows/Signals.inc
@@ -1,9 +1,8 @@
//===- Win32/Signals.cpp - Win32 Signals Implementation ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -557,6 +556,10 @@ void llvm::sys::SetInterruptFunction(void (*IF)()) {
LeaveCriticalSection(&CriticalSection);
}
+void llvm::sys::SetInfoSignalFunction(void (*Handler)()) {
+ // Unimplemented.
+}
+
/// Add a function to be called when a signal is delivered to the process. The
/// handler can have a cookie passed to it to identify what instance of the
diff --git a/lib/Support/Windows/ThreadLocal.inc b/lib/Support/Windows/ThreadLocal.inc
index 8be1c3ecfbb9..1e0ed955e9ab 100644
--- a/lib/Support/Windows/ThreadLocal.inc
+++ b/lib/Support/Windows/ThreadLocal.inc
@@ -1,9 +1,8 @@
//= llvm/Support/Win32/ThreadLocal.inc - Win32 Thread Local Data -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Support/Windows/Threading.inc b/lib/Support/Windows/Threading.inc
index 0bd92f66c6b8..96649472cc90 100644
--- a/lib/Support/Windows/Threading.inc
+++ b/lib/Support/Windows/Threading.inc
@@ -1,9 +1,8 @@
//===- Windows/Threading.inc - Win32 Threading Implementation - -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -107,3 +106,19 @@ void llvm::get_thread_name(SmallVectorImpl<char> &Name) {
// value.
Name.clear();
}
+
+SetThreadPriorityResult llvm::set_thread_priority(ThreadPriority Priority) {
+ // https://docs.microsoft.com/en-us/windows/desktop/api/processthreadsapi/nf-processthreadsapi-setthreadpriority
+ // Begin background processing mode. The system lowers the resource scheduling
+ // priorities of the thread so that it can perform background work without
+ // significantly affecting activity in the foreground.
+ // End background processing mode. The system restores the resource scheduling
+ // priorities of the thread as they were before the thread entered background
+ // processing mode.
+ return SetThreadPriority(GetCurrentThread(),
+ Priority == ThreadPriority::Background
+ ? THREAD_MODE_BACKGROUND_BEGIN
+ : THREAD_MODE_BACKGROUND_END)
+ ? SetThreadPriorityResult::SUCCESS
+ : SetThreadPriorityResult::FAILURE;
+}
diff --git a/lib/Support/Windows/Watchdog.inc b/lib/Support/Windows/Watchdog.inc
index fab2bdf2a941..a362c999de76 100644
--- a/lib/Support/Windows/Watchdog.inc
+++ b/lib/Support/Windows/Watchdog.inc
@@ -1,9 +1,8 @@
//===--- Windows/Watchdog.inc - Windows Watchdog Implementation -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Support/Windows/WindowsSupport.h b/lib/Support/Windows/WindowsSupport.h
index 979cc5d01390..fed9b2f462ef 100644
--- a/lib/Support/Windows/WindowsSupport.h
+++ b/lib/Support/Windows/WindowsSupport.h
@@ -1,9 +1,8 @@
//===- WindowsSupport.h - Common Windows Include File -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Support/WithColor.cpp b/lib/Support/WithColor.cpp
index cf4c10956f21..345dd9cf3949 100644
--- a/lib/Support/WithColor.cpp
+++ b/lib/Support/WithColor.cpp
@@ -1,9 +1,8 @@
//===- WithColor.cpp ------------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Support/YAMLParser.cpp b/lib/Support/YAMLParser.cpp
index 9ef1410b99a5..9b2fe9c4418a 100644
--- a/lib/Support/YAMLParser.cpp
+++ b/lib/Support/YAMLParser.cpp
@@ -1,9 +1,8 @@
//===- YAMLParser.cpp - Simple YAML parser --------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Support/YAMLTraits.cpp b/lib/Support/YAMLTraits.cpp
index b9bbee7883c6..09eb36943de9 100644
--- a/lib/Support/YAMLTraits.cpp
+++ b/lib/Support/YAMLTraits.cpp
@@ -1,9 +1,8 @@
//===- lib/Support/YAMLTraits.cpp -----------------------------------------===//
//
-// The LLVM Linker
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -114,6 +113,11 @@ const Node *Input::getCurrentNode() const {
}
bool Input::mapTag(StringRef Tag, bool Default) {
+ // CurrentNode can be null if setCurrentDocument() was unable to
+ // parse the document because it was invalid or empty.
+ if (!CurrentNode)
+ return false;
+
std::string foundTag = CurrentNode->_node->getVerbatimTag();
if (foundTag.empty()) {
// If no tag found and 'Tag' is the default, say it was found.
@@ -442,7 +446,8 @@ bool Output::outputting() {
void Output::beginMapping() {
StateStack.push_back(inMapFirstKey);
- NeedsNewLine = true;
+ PaddingBeforeContainer = Padding;
+ Padding = "\n";
}
bool Output::mapTag(StringRef Tag, bool Use) {
@@ -470,7 +475,7 @@ bool Output::mapTag(StringRef Tag, bool Use) {
}
// Tags inside maps in sequences should act as keys in the map from a
// formatting perspective, so we always want a newline in a sequence.
- NeedsNewLine = true;
+ Padding = "\n";
}
}
return Use;
@@ -478,8 +483,12 @@ bool Output::mapTag(StringRef Tag, bool Use) {
void Output::endMapping() {
// If we did not map anything, we should explicitly emit an empty map
- if (StateStack.back() == inMapFirstKey)
+ if (StateStack.back() == inMapFirstKey) {
+ Padding = PaddingBeforeContainer;
+ newLineCheck();
output("{}");
+ Padding = "\n";
+ }
StateStack.pop_back();
}
@@ -544,14 +553,19 @@ void Output::endDocuments() {
unsigned Output::beginSequence() {
StateStack.push_back(inSeqFirstElement);
- NeedsNewLine = true;
+ PaddingBeforeContainer = Padding;
+ Padding = "\n";
return 0;
}
void Output::endSequence() {
// If we did not emit anything, we should explicitly emit an empty sequence
- if (StateStack.back() == inSeqFirstElement)
+ if (StateStack.back() == inSeqFirstElement) {
+ Padding = PaddingBeforeContainer;
+ newLineCheck();
output("[]");
+ Padding = "\n";
+ }
StateStack.pop_back();
}
@@ -661,11 +675,6 @@ void Output::scalarString(StringRef &S, QuotingType MustQuote) {
return;
}
- unsigned i = 0;
- unsigned j = 0;
- unsigned End = S.size();
- const char *Base = S.data();
-
const char *const Quote = MustQuote == QuotingType::Single ? "'" : "\"";
output(Quote); // Starting quote.
@@ -673,11 +682,16 @@ void Output::scalarString(StringRef &S, QuotingType MustQuote) {
// present, and will be escaped using a variety of unicode-scalar and special short-form
// escapes. This is handled in yaml::escape.
if (MustQuote == QuotingType::Double) {
- output(yaml::escape(Base, /* EscapePrintable= */ false));
+ output(yaml::escape(S, /* EscapePrintable= */ false));
outputUpToEndOfLine(Quote);
return;
}
+ unsigned i = 0;
+ unsigned j = 0;
+ unsigned End = S.size();
+ const char *Base = S.data();
+
// When using single-quoted strings, any single quote ' must be doubled to be escaped.
while (j < End) {
if (S[j] == '\'') { // Escape quotes.
@@ -742,7 +756,7 @@ void Output::outputUpToEndOfLine(StringRef s) {
output(s);
if (StateStack.empty() || (!inFlowSeqAnyElement(StateStack.back()) &&
!inFlowMapAnyKey(StateStack.back())))
- NeedsNewLine = true;
+ Padding = "\n";
}
void Output::outputNewLine() {
@@ -755,11 +769,13 @@ void Output::outputNewLine() {
//
void Output::newLineCheck() {
- if (!NeedsNewLine)
+ if (Padding != "\n") {
+ output(Padding);
+ Padding = {};
return;
- NeedsNewLine = false;
-
+ }
outputNewLine();
+ Padding = {};
if (StateStack.size() == 0)
return;
@@ -793,9 +809,9 @@ void Output::paddedKey(StringRef key) {
output(":");
const char *spaces = " ";
if (key.size() < strlen(spaces))
- output(&spaces[key.size()]);
+ Padding = &spaces[key.size()];
else
- output(" ");
+ Padding = " ";
}
void Output::flowKey(StringRef Key) {
diff --git a/lib/Support/Z3Solver.cpp b/lib/Support/Z3Solver.cpp
new file mode 100644
index 000000000000..f1a6fdf87cf2
--- /dev/null
+++ b/lib/Support/Z3Solver.cpp
@@ -0,0 +1,900 @@
+//== Z3Solver.cpp -----------------------------------------------*- C++ -*--==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Twine.h"
+#include "llvm/Config/config.h"
+#include "llvm/Support/SMTAPI.h"
+#include <set>
+
+using namespace llvm;
+
+#if LLVM_WITH_Z3
+
+#include <z3.h>
+
+namespace {
+
+/// Configuration class for Z3
+class Z3Config {
+ friend class Z3Context;
+
+ Z3_config Config;
+
+public:
+ Z3Config() : Config(Z3_mk_config()) {
+ // Enable model finding
+ Z3_set_param_value(Config, "model", "true");
+ // Disable proof generation
+ Z3_set_param_value(Config, "proof", "false");
+ // Set timeout to 15000ms = 15s
+ Z3_set_param_value(Config, "timeout", "15000");
+ }
+
+ ~Z3Config() { Z3_del_config(Config); }
+}; // end class Z3Config
+
+// Function used to report errors
+void Z3ErrorHandler(Z3_context Context, Z3_error_code Error) {
+ llvm::report_fatal_error("Z3 error: " +
+ llvm::Twine(Z3_get_error_msg(Context, Error)));
+}
+
+/// Wrapper for Z3 context
+class Z3Context {
+public:
+ Z3_context Context;
+
+ Z3Context() {
+ Context = Z3_mk_context_rc(Z3Config().Config);
+ // The error function is set here because the context is the first object
+ // created by the backend
+ Z3_set_error_handler(Context, Z3ErrorHandler);
+ }
+
+ virtual ~Z3Context() {
+ Z3_del_context(Context);
+ Context = nullptr;
+ }
+}; // end class Z3Context
+
+/// Wrapper for Z3 Sort
+class Z3Sort : public SMTSort {
+ friend class Z3Solver;
+
+ Z3Context &Context;
+
+ Z3_sort Sort;
+
+public:
+ /// Default constructor, mainly used by make_shared
+ Z3Sort(Z3Context &C, Z3_sort ZS) : Context(C), Sort(ZS) {
+ Z3_inc_ref(Context.Context, reinterpret_cast<Z3_ast>(Sort));
+ }
+
+ /// Override implicit copy constructor for correct reference counting.
+ Z3Sort(const Z3Sort &Other) : Context(Other.Context), Sort(Other.Sort) {
+ Z3_inc_ref(Context.Context, reinterpret_cast<Z3_ast>(Sort));
+ }
+
+ /// Override implicit copy assignment constructor for correct reference
+ /// counting.
+ Z3Sort &operator=(const Z3Sort &Other) {
+ Z3_inc_ref(Context.Context, reinterpret_cast<Z3_ast>(Other.Sort));
+ Z3_dec_ref(Context.Context, reinterpret_cast<Z3_ast>(Sort));
+ Sort = Other.Sort;
+ return *this;
+ }
+
+ Z3Sort(Z3Sort &&Other) = delete;
+ Z3Sort &operator=(Z3Sort &&Other) = delete;
+
+ ~Z3Sort() {
+ if (Sort)
+ Z3_dec_ref(Context.Context, reinterpret_cast<Z3_ast>(Sort));
+ }
+
+ void Profile(llvm::FoldingSetNodeID &ID) const override {
+ ID.AddInteger(
+ Z3_get_ast_id(Context.Context, reinterpret_cast<Z3_ast>(Sort)));
+ }
+
+ bool isBitvectorSortImpl() const override {
+ return (Z3_get_sort_kind(Context.Context, Sort) == Z3_BV_SORT);
+ }
+
+ bool isFloatSortImpl() const override {
+ return (Z3_get_sort_kind(Context.Context, Sort) == Z3_FLOATING_POINT_SORT);
+ }
+
+ bool isBooleanSortImpl() const override {
+ return (Z3_get_sort_kind(Context.Context, Sort) == Z3_BOOL_SORT);
+ }
+
+ unsigned getBitvectorSortSizeImpl() const override {
+ return Z3_get_bv_sort_size(Context.Context, Sort);
+ }
+
+ unsigned getFloatSortSizeImpl() const override {
+ return Z3_fpa_get_ebits(Context.Context, Sort) +
+ Z3_fpa_get_sbits(Context.Context, Sort);
+ }
+
+ bool equal_to(SMTSort const &Other) const override {
+ return Z3_is_eq_sort(Context.Context, Sort,
+ static_cast<const Z3Sort &>(Other).Sort);
+ }
+
+ void print(raw_ostream &OS) const override {
+ OS << Z3_sort_to_string(Context.Context, Sort);
+ }
+}; // end class Z3Sort
+
+static const Z3Sort &toZ3Sort(const SMTSort &S) {
+ return static_cast<const Z3Sort &>(S);
+}
+
+class Z3Expr : public SMTExpr {
+ friend class Z3Solver;
+
+ Z3Context &Context;
+
+ Z3_ast AST;
+
+public:
+ Z3Expr(Z3Context &C, Z3_ast ZA) : SMTExpr(), Context(C), AST(ZA) {
+ Z3_inc_ref(Context.Context, AST);
+ }
+
+ /// Override implicit copy constructor for correct reference counting.
+ Z3Expr(const Z3Expr &Copy) : SMTExpr(), Context(Copy.Context), AST(Copy.AST) {
+ Z3_inc_ref(Context.Context, AST);
+ }
+
+ /// Override implicit copy assignment constructor for correct reference
+ /// counting.
+ Z3Expr &operator=(const Z3Expr &Other) {
+ Z3_inc_ref(Context.Context, Other.AST);
+ Z3_dec_ref(Context.Context, AST);
+ AST = Other.AST;
+ return *this;
+ }
+
+ Z3Expr(Z3Expr &&Other) = delete;
+ Z3Expr &operator=(Z3Expr &&Other) = delete;
+
+ ~Z3Expr() {
+ if (AST)
+ Z3_dec_ref(Context.Context, AST);
+ }
+
+ void Profile(llvm::FoldingSetNodeID &ID) const override {
+ ID.AddInteger(Z3_get_ast_id(Context.Context, AST));
+ }
+
+ /// Comparison of AST equality, not model equivalence.
+ bool equal_to(SMTExpr const &Other) const override {
+ assert(Z3_is_eq_sort(Context.Context, Z3_get_sort(Context.Context, AST),
+ Z3_get_sort(Context.Context,
+ static_cast<const Z3Expr &>(Other).AST)) &&
+ "AST's must have the same sort");
+ return Z3_is_eq_ast(Context.Context, AST,
+ static_cast<const Z3Expr &>(Other).AST);
+ }
+
+ void print(raw_ostream &OS) const override {
+ OS << Z3_ast_to_string(Context.Context, AST);
+ }
+}; // end class Z3Expr
+
+static const Z3Expr &toZ3Expr(const SMTExpr &E) {
+ return static_cast<const Z3Expr &>(E);
+}
+
+class Z3Model {
+ friend class Z3Solver;
+
+ Z3Context &Context;
+
+ Z3_model Model;
+
+public:
+ Z3Model(Z3Context &C, Z3_model ZM) : Context(C), Model(ZM) {
+ Z3_model_inc_ref(Context.Context, Model);
+ }
+
+ Z3Model(const Z3Model &Other) = delete;
+ Z3Model(Z3Model &&Other) = delete;
+ Z3Model &operator=(Z3Model &Other) = delete;
+ Z3Model &operator=(Z3Model &&Other) = delete;
+
+ ~Z3Model() {
+ if (Model)
+ Z3_model_dec_ref(Context.Context, Model);
+ }
+
+ void print(raw_ostream &OS) const {
+ OS << Z3_model_to_string(Context.Context, Model);
+ }
+
+ LLVM_DUMP_METHOD void dump() const { print(llvm::errs()); }
+}; // end class Z3Model
+
+/// Get the corresponding IEEE floating-point type for a given bitwidth.
+static const llvm::fltSemantics &getFloatSemantics(unsigned BitWidth) {
+ switch (BitWidth) {
+ default:
+ llvm_unreachable("Unsupported floating-point semantics!");
+ break;
+ case 16:
+ return llvm::APFloat::IEEEhalf();
+ case 32:
+ return llvm::APFloat::IEEEsingle();
+ case 64:
+ return llvm::APFloat::IEEEdouble();
+ case 128:
+ return llvm::APFloat::IEEEquad();
+ }
+}
+
+// Determine whether two float semantics are equivalent
+static bool areEquivalent(const llvm::fltSemantics &LHS,
+ const llvm::fltSemantics &RHS) {
+ return (llvm::APFloat::semanticsPrecision(LHS) ==
+ llvm::APFloat::semanticsPrecision(RHS)) &&
+ (llvm::APFloat::semanticsMinExponent(LHS) ==
+ llvm::APFloat::semanticsMinExponent(RHS)) &&
+ (llvm::APFloat::semanticsMaxExponent(LHS) ==
+ llvm::APFloat::semanticsMaxExponent(RHS)) &&
+ (llvm::APFloat::semanticsSizeInBits(LHS) ==
+ llvm::APFloat::semanticsSizeInBits(RHS));
+}
+
+class Z3Solver : public SMTSolver {
+ friend class Z3ConstraintManager;
+
+ Z3Context Context;
+
+ Z3_solver Solver;
+
+ // Cache Sorts
+ std::set<Z3Sort> CachedSorts;
+
+ // Cache Exprs
+ std::set<Z3Expr> CachedExprs;
+
+public:
+ Z3Solver() : Solver(Z3_mk_simple_solver(Context.Context)) {
+ Z3_solver_inc_ref(Context.Context, Solver);
+ }
+
+ Z3Solver(const Z3Solver &Other) = delete;
+ Z3Solver(Z3Solver &&Other) = delete;
+ Z3Solver &operator=(Z3Solver &Other) = delete;
+ Z3Solver &operator=(Z3Solver &&Other) = delete;
+
+ ~Z3Solver() {
+ if (Solver)
+ Z3_solver_dec_ref(Context.Context, Solver);
+ }
+
+ void addConstraint(const SMTExprRef &Exp) const override {
+ Z3_solver_assert(Context.Context, Solver, toZ3Expr(*Exp).AST);
+ }
+
+ // Given an SMTSort, adds/retrives it from the cache and returns
+ // an SMTSortRef to the SMTSort in the cache
+ SMTSortRef newSortRef(const SMTSort &Sort) {
+ auto It = CachedSorts.insert(toZ3Sort(Sort));
+ return &(*It.first);
+ }
+
+ // Given an SMTExpr, adds/retrives it from the cache and returns
+ // an SMTExprRef to the SMTExpr in the cache
+ SMTExprRef newExprRef(const SMTExpr &Exp) {
+ auto It = CachedExprs.insert(toZ3Expr(Exp));
+ return &(*It.first);
+ }
+
+ SMTSortRef getBoolSort() override {
+ return newSortRef(Z3Sort(Context, Z3_mk_bool_sort(Context.Context)));
+ }
+
+ SMTSortRef getBitvectorSort(unsigned BitWidth) override {
+ return newSortRef(
+ Z3Sort(Context, Z3_mk_bv_sort(Context.Context, BitWidth)));
+ }
+
+ SMTSortRef getSort(const SMTExprRef &Exp) override {
+ return newSortRef(
+ Z3Sort(Context, Z3_get_sort(Context.Context, toZ3Expr(*Exp).AST)));
+ }
+
+ SMTSortRef getFloat16Sort() override {
+ return newSortRef(Z3Sort(Context, Z3_mk_fpa_sort_16(Context.Context)));
+ }
+
+ SMTSortRef getFloat32Sort() override {
+ return newSortRef(Z3Sort(Context, Z3_mk_fpa_sort_32(Context.Context)));
+ }
+
+ SMTSortRef getFloat64Sort() override {
+ return newSortRef(Z3Sort(Context, Z3_mk_fpa_sort_64(Context.Context)));
+ }
+
+ SMTSortRef getFloat128Sort() override {
+ return newSortRef(Z3Sort(Context, Z3_mk_fpa_sort_128(Context.Context)));
+ }
+
+ SMTExprRef mkBVNeg(const SMTExprRef &Exp) override {
+ return newExprRef(
+ Z3Expr(Context, Z3_mk_bvneg(Context.Context, toZ3Expr(*Exp).AST)));
+ }
+
+ SMTExprRef mkBVNot(const SMTExprRef &Exp) override {
+ return newExprRef(
+ Z3Expr(Context, Z3_mk_bvnot(Context.Context, toZ3Expr(*Exp).AST)));
+ }
+
+ SMTExprRef mkNot(const SMTExprRef &Exp) override {
+ return newExprRef(
+ Z3Expr(Context, Z3_mk_not(Context.Context, toZ3Expr(*Exp).AST)));
+ }
+
+ SMTExprRef mkBVAdd(const SMTExprRef &LHS, const SMTExprRef &RHS) override {
+ return newExprRef(
+ Z3Expr(Context, Z3_mk_bvadd(Context.Context, toZ3Expr(*LHS).AST,
+ toZ3Expr(*RHS).AST)));
+ }
+
+ SMTExprRef mkBVSub(const SMTExprRef &LHS, const SMTExprRef &RHS) override {
+ return newExprRef(
+ Z3Expr(Context, Z3_mk_bvsub(Context.Context, toZ3Expr(*LHS).AST,
+ toZ3Expr(*RHS).AST)));
+ }
+
+ SMTExprRef mkBVMul(const SMTExprRef &LHS, const SMTExprRef &RHS) override {
+ return newExprRef(
+ Z3Expr(Context, Z3_mk_bvmul(Context.Context, toZ3Expr(*LHS).AST,
+ toZ3Expr(*RHS).AST)));
+ }
+
+ SMTExprRef mkBVSRem(const SMTExprRef &LHS, const SMTExprRef &RHS) override {
+ return newExprRef(
+ Z3Expr(Context, Z3_mk_bvsrem(Context.Context, toZ3Expr(*LHS).AST,
+ toZ3Expr(*RHS).AST)));
+ }
+
+ SMTExprRef mkBVURem(const SMTExprRef &LHS, const SMTExprRef &RHS) override {
+ return newExprRef(
+ Z3Expr(Context, Z3_mk_bvurem(Context.Context, toZ3Expr(*LHS).AST,
+ toZ3Expr(*RHS).AST)));
+ }
+
+ SMTExprRef mkBVSDiv(const SMTExprRef &LHS, const SMTExprRef &RHS) override {
+ return newExprRef(
+ Z3Expr(Context, Z3_mk_bvsdiv(Context.Context, toZ3Expr(*LHS).AST,
+ toZ3Expr(*RHS).AST)));
+ }
+
+ SMTExprRef mkBVUDiv(const SMTExprRef &LHS, const SMTExprRef &RHS) override {
+ return newExprRef(
+ Z3Expr(Context, Z3_mk_bvudiv(Context.Context, toZ3Expr(*LHS).AST,
+ toZ3Expr(*RHS).AST)));
+ }
+
+ SMTExprRef mkBVShl(const SMTExprRef &LHS, const SMTExprRef &RHS) override {
+ return newExprRef(
+ Z3Expr(Context, Z3_mk_bvshl(Context.Context, toZ3Expr(*LHS).AST,
+ toZ3Expr(*RHS).AST)));
+ }
+
+ SMTExprRef mkBVAshr(const SMTExprRef &LHS, const SMTExprRef &RHS) override {
+ return newExprRef(
+ Z3Expr(Context, Z3_mk_bvashr(Context.Context, toZ3Expr(*LHS).AST,
+ toZ3Expr(*RHS).AST)));
+ }
+
+ SMTExprRef mkBVLshr(const SMTExprRef &LHS, const SMTExprRef &RHS) override {
+ return newExprRef(
+ Z3Expr(Context, Z3_mk_bvlshr(Context.Context, toZ3Expr(*LHS).AST,
+ toZ3Expr(*RHS).AST)));
+ }
+
+ SMTExprRef mkBVXor(const SMTExprRef &LHS, const SMTExprRef &RHS) override {
+ return newExprRef(
+ Z3Expr(Context, Z3_mk_bvxor(Context.Context, toZ3Expr(*LHS).AST,
+ toZ3Expr(*RHS).AST)));
+ }
+
+ SMTExprRef mkBVOr(const SMTExprRef &LHS, const SMTExprRef &RHS) override {
+ return newExprRef(
+ Z3Expr(Context, Z3_mk_bvor(Context.Context, toZ3Expr(*LHS).AST,
+ toZ3Expr(*RHS).AST)));
+ }
+
+ SMTExprRef mkBVAnd(const SMTExprRef &LHS, const SMTExprRef &RHS) override {
+ return newExprRef(
+ Z3Expr(Context, Z3_mk_bvand(Context.Context, toZ3Expr(*LHS).AST,
+ toZ3Expr(*RHS).AST)));
+ }
+
+ SMTExprRef mkBVUlt(const SMTExprRef &LHS, const SMTExprRef &RHS) override {
+ return newExprRef(
+ Z3Expr(Context, Z3_mk_bvult(Context.Context, toZ3Expr(*LHS).AST,
+ toZ3Expr(*RHS).AST)));
+ }
+
+ SMTExprRef mkBVSlt(const SMTExprRef &LHS, const SMTExprRef &RHS) override {
+ return newExprRef(
+ Z3Expr(Context, Z3_mk_bvslt(Context.Context, toZ3Expr(*LHS).AST,
+ toZ3Expr(*RHS).AST)));
+ }
+
+ SMTExprRef mkBVUgt(const SMTExprRef &LHS, const SMTExprRef &RHS) override {
+ return newExprRef(
+ Z3Expr(Context, Z3_mk_bvugt(Context.Context, toZ3Expr(*LHS).AST,
+ toZ3Expr(*RHS).AST)));
+ }
+
+ SMTExprRef mkBVSgt(const SMTExprRef &LHS, const SMTExprRef &RHS) override {
+ return newExprRef(
+ Z3Expr(Context, Z3_mk_bvsgt(Context.Context, toZ3Expr(*LHS).AST,
+ toZ3Expr(*RHS).AST)));
+ }
+
+ SMTExprRef mkBVUle(const SMTExprRef &LHS, const SMTExprRef &RHS) override {
+ return newExprRef(
+ Z3Expr(Context, Z3_mk_bvule(Context.Context, toZ3Expr(*LHS).AST,
+ toZ3Expr(*RHS).AST)));
+ }
+
+ SMTExprRef mkBVSle(const SMTExprRef &LHS, const SMTExprRef &RHS) override {
+ return newExprRef(
+ Z3Expr(Context, Z3_mk_bvsle(Context.Context, toZ3Expr(*LHS).AST,
+ toZ3Expr(*RHS).AST)));
+ }
+
+ SMTExprRef mkBVUge(const SMTExprRef &LHS, const SMTExprRef &RHS) override {
+ return newExprRef(
+ Z3Expr(Context, Z3_mk_bvuge(Context.Context, toZ3Expr(*LHS).AST,
+ toZ3Expr(*RHS).AST)));
+ }
+
+ SMTExprRef mkBVSge(const SMTExprRef &LHS, const SMTExprRef &RHS) override {
+ return newExprRef(
+ Z3Expr(Context, Z3_mk_bvsge(Context.Context, toZ3Expr(*LHS).AST,
+ toZ3Expr(*RHS).AST)));
+ }
+
+ SMTExprRef mkAnd(const SMTExprRef &LHS, const SMTExprRef &RHS) override {
+ Z3_ast Args[2] = {toZ3Expr(*LHS).AST, toZ3Expr(*RHS).AST};
+ return newExprRef(Z3Expr(Context, Z3_mk_and(Context.Context, 2, Args)));
+ }
+
+ SMTExprRef mkOr(const SMTExprRef &LHS, const SMTExprRef &RHS) override {
+ Z3_ast Args[2] = {toZ3Expr(*LHS).AST, toZ3Expr(*RHS).AST};
+ return newExprRef(Z3Expr(Context, Z3_mk_or(Context.Context, 2, Args)));
+ }
+
+ SMTExprRef mkEqual(const SMTExprRef &LHS, const SMTExprRef &RHS) override {
+ return newExprRef(
+ Z3Expr(Context, Z3_mk_eq(Context.Context, toZ3Expr(*LHS).AST,
+ toZ3Expr(*RHS).AST)));
+ }
+
+ SMTExprRef mkFPNeg(const SMTExprRef &Exp) override {
+ return newExprRef(
+ Z3Expr(Context, Z3_mk_fpa_neg(Context.Context, toZ3Expr(*Exp).AST)));
+ }
+
+ SMTExprRef mkFPIsInfinite(const SMTExprRef &Exp) override {
+ return newExprRef(Z3Expr(
+ Context, Z3_mk_fpa_is_infinite(Context.Context, toZ3Expr(*Exp).AST)));
+ }
+
+ SMTExprRef mkFPIsNaN(const SMTExprRef &Exp) override {
+ return newExprRef(
+ Z3Expr(Context, Z3_mk_fpa_is_nan(Context.Context, toZ3Expr(*Exp).AST)));
+ }
+
+ SMTExprRef mkFPIsNormal(const SMTExprRef &Exp) override {
+ return newExprRef(Z3Expr(
+ Context, Z3_mk_fpa_is_normal(Context.Context, toZ3Expr(*Exp).AST)));
+ }
+
+ SMTExprRef mkFPIsZero(const SMTExprRef &Exp) override {
+ return newExprRef(Z3Expr(
+ Context, Z3_mk_fpa_is_zero(Context.Context, toZ3Expr(*Exp).AST)));
+ }
+
+ SMTExprRef mkFPMul(const SMTExprRef &LHS, const SMTExprRef &RHS) override {
+ SMTExprRef RoundingMode = getFloatRoundingMode();
+ return newExprRef(
+ Z3Expr(Context,
+ Z3_mk_fpa_mul(Context.Context, toZ3Expr(*LHS).AST,
+ toZ3Expr(*RHS).AST, toZ3Expr(*RoundingMode).AST)));
+ }
+
+ SMTExprRef mkFPDiv(const SMTExprRef &LHS, const SMTExprRef &RHS) override {
+ SMTExprRef RoundingMode = getFloatRoundingMode();
+ return newExprRef(
+ Z3Expr(Context,
+ Z3_mk_fpa_div(Context.Context, toZ3Expr(*LHS).AST,
+ toZ3Expr(*RHS).AST, toZ3Expr(*RoundingMode).AST)));
+ }
+
+ SMTExprRef mkFPRem(const SMTExprRef &LHS, const SMTExprRef &RHS) override {
+ return newExprRef(
+ Z3Expr(Context, Z3_mk_fpa_rem(Context.Context, toZ3Expr(*LHS).AST,
+ toZ3Expr(*RHS).AST)));
+ }
+
+ SMTExprRef mkFPAdd(const SMTExprRef &LHS, const SMTExprRef &RHS) override {
+ SMTExprRef RoundingMode = getFloatRoundingMode();
+ return newExprRef(
+ Z3Expr(Context,
+ Z3_mk_fpa_add(Context.Context, toZ3Expr(*LHS).AST,
+ toZ3Expr(*RHS).AST, toZ3Expr(*RoundingMode).AST)));
+ }
+
+ SMTExprRef mkFPSub(const SMTExprRef &LHS, const SMTExprRef &RHS) override {
+ SMTExprRef RoundingMode = getFloatRoundingMode();
+ return newExprRef(
+ Z3Expr(Context,
+ Z3_mk_fpa_sub(Context.Context, toZ3Expr(*LHS).AST,
+ toZ3Expr(*RHS).AST, toZ3Expr(*RoundingMode).AST)));
+ }
+
+ SMTExprRef mkFPLt(const SMTExprRef &LHS, const SMTExprRef &RHS) override {
+ return newExprRef(
+ Z3Expr(Context, Z3_mk_fpa_lt(Context.Context, toZ3Expr(*LHS).AST,
+ toZ3Expr(*RHS).AST)));
+ }
+
+ SMTExprRef mkFPGt(const SMTExprRef &LHS, const SMTExprRef &RHS) override {
+ return newExprRef(
+ Z3Expr(Context, Z3_mk_fpa_gt(Context.Context, toZ3Expr(*LHS).AST,
+ toZ3Expr(*RHS).AST)));
+ }
+
+ SMTExprRef mkFPLe(const SMTExprRef &LHS, const SMTExprRef &RHS) override {
+ return newExprRef(
+ Z3Expr(Context, Z3_mk_fpa_leq(Context.Context, toZ3Expr(*LHS).AST,
+ toZ3Expr(*RHS).AST)));
+ }
+
+ SMTExprRef mkFPGe(const SMTExprRef &LHS, const SMTExprRef &RHS) override {
+ return newExprRef(
+ Z3Expr(Context, Z3_mk_fpa_geq(Context.Context, toZ3Expr(*LHS).AST,
+ toZ3Expr(*RHS).AST)));
+ }
+
+ SMTExprRef mkFPEqual(const SMTExprRef &LHS, const SMTExprRef &RHS) override {
+ return newExprRef(
+ Z3Expr(Context, Z3_mk_fpa_eq(Context.Context, toZ3Expr(*LHS).AST,
+ toZ3Expr(*RHS).AST)));
+ }
+
+ SMTExprRef mkIte(const SMTExprRef &Cond, const SMTExprRef &T,
+ const SMTExprRef &F) override {
+ return newExprRef(
+ Z3Expr(Context, Z3_mk_ite(Context.Context, toZ3Expr(*Cond).AST,
+ toZ3Expr(*T).AST, toZ3Expr(*F).AST)));
+ }
+
+ SMTExprRef mkBVSignExt(unsigned i, const SMTExprRef &Exp) override {
+ return newExprRef(Z3Expr(
+ Context, Z3_mk_sign_ext(Context.Context, i, toZ3Expr(*Exp).AST)));
+ }
+
+ SMTExprRef mkBVZeroExt(unsigned i, const SMTExprRef &Exp) override {
+ return newExprRef(Z3Expr(
+ Context, Z3_mk_zero_ext(Context.Context, i, toZ3Expr(*Exp).AST)));
+ }
+
+ SMTExprRef mkBVExtract(unsigned High, unsigned Low,
+ const SMTExprRef &Exp) override {
+ return newExprRef(Z3Expr(Context, Z3_mk_extract(Context.Context, High, Low,
+ toZ3Expr(*Exp).AST)));
+ }
+
+ /// Creates a predicate that checks for overflow in a bitvector addition
+ /// operation
+ SMTExprRef mkBVAddNoOverflow(const SMTExprRef &LHS, const SMTExprRef &RHS,
+ bool isSigned) override {
+ return newExprRef(Z3Expr(
+ Context, Z3_mk_bvadd_no_overflow(Context.Context, toZ3Expr(*LHS).AST,
+ toZ3Expr(*RHS).AST, isSigned)));
+ }
+
+ /// Creates a predicate that checks for underflow in a signed bitvector
+ /// addition operation
+ SMTExprRef mkBVAddNoUnderflow(const SMTExprRef &LHS,
+ const SMTExprRef &RHS) override {
+ return newExprRef(Z3Expr(
+ Context, Z3_mk_bvadd_no_underflow(Context.Context, toZ3Expr(*LHS).AST,
+ toZ3Expr(*RHS).AST)));
+ }
+
+ /// Creates a predicate that checks for overflow in a signed bitvector
+ /// subtraction operation
+ SMTExprRef mkBVSubNoOverflow(const SMTExprRef &LHS,
+ const SMTExprRef &RHS) override {
+ return newExprRef(Z3Expr(
+ Context, Z3_mk_bvsub_no_overflow(Context.Context, toZ3Expr(*LHS).AST,
+ toZ3Expr(*RHS).AST)));
+ }
+
+ /// Creates a predicate that checks for underflow in a bitvector subtraction
+ /// operation
+ SMTExprRef mkBVSubNoUnderflow(const SMTExprRef &LHS, const SMTExprRef &RHS,
+ bool isSigned) override {
+ return newExprRef(Z3Expr(
+ Context, Z3_mk_bvsub_no_underflow(Context.Context, toZ3Expr(*LHS).AST,
+ toZ3Expr(*RHS).AST, isSigned)));
+ }
+
+ /// Creates a predicate that checks for overflow in a signed bitvector
+ /// division/modulus operation
+ SMTExprRef mkBVSDivNoOverflow(const SMTExprRef &LHS,
+ const SMTExprRef &RHS) override {
+ return newExprRef(Z3Expr(
+ Context, Z3_mk_bvsdiv_no_overflow(Context.Context, toZ3Expr(*LHS).AST,
+ toZ3Expr(*RHS).AST)));
+ }
+
+ /// Creates a predicate that checks for overflow in a bitvector negation
+ /// operation
+ SMTExprRef mkBVNegNoOverflow(const SMTExprRef &Exp) override {
+ return newExprRef(Z3Expr(
+ Context, Z3_mk_bvneg_no_overflow(Context.Context, toZ3Expr(*Exp).AST)));
+ }
+
+ /// Creates a predicate that checks for overflow in a bitvector multiplication
+ /// operation
+ SMTExprRef mkBVMulNoOverflow(const SMTExprRef &LHS, const SMTExprRef &RHS,
+ bool isSigned) override {
+ return newExprRef(Z3Expr(
+ Context, Z3_mk_bvmul_no_overflow(Context.Context, toZ3Expr(*LHS).AST,
+ toZ3Expr(*RHS).AST, isSigned)));
+ }
+
+ /// Creates a predicate that checks for underflow in a signed bitvector
+ /// multiplication operation
+ SMTExprRef mkBVMulNoUnderflow(const SMTExprRef &LHS,
+ const SMTExprRef &RHS) override {
+ return newExprRef(Z3Expr(
+ Context, Z3_mk_bvmul_no_underflow(Context.Context, toZ3Expr(*LHS).AST,
+ toZ3Expr(*RHS).AST)));
+ }
+
+ SMTExprRef mkBVConcat(const SMTExprRef &LHS, const SMTExprRef &RHS) override {
+ return newExprRef(
+ Z3Expr(Context, Z3_mk_concat(Context.Context, toZ3Expr(*LHS).AST,
+ toZ3Expr(*RHS).AST)));
+ }
+
+ SMTExprRef mkFPtoFP(const SMTExprRef &From, const SMTSortRef &To) override {
+ SMTExprRef RoundingMode = getFloatRoundingMode();
+ return newExprRef(Z3Expr(
+ Context,
+ Z3_mk_fpa_to_fp_float(Context.Context, toZ3Expr(*RoundingMode).AST,
+ toZ3Expr(*From).AST, toZ3Sort(*To).Sort)));
+ }
+
+ SMTExprRef mkSBVtoFP(const SMTExprRef &From, const SMTSortRef &To) override {
+ SMTExprRef RoundingMode = getFloatRoundingMode();
+ return newExprRef(Z3Expr(
+ Context,
+ Z3_mk_fpa_to_fp_signed(Context.Context, toZ3Expr(*RoundingMode).AST,
+ toZ3Expr(*From).AST, toZ3Sort(*To).Sort)));
+ }
+
+ SMTExprRef mkUBVtoFP(const SMTExprRef &From, const SMTSortRef &To) override {
+ SMTExprRef RoundingMode = getFloatRoundingMode();
+ return newExprRef(Z3Expr(
+ Context,
+ Z3_mk_fpa_to_fp_unsigned(Context.Context, toZ3Expr(*RoundingMode).AST,
+ toZ3Expr(*From).AST, toZ3Sort(*To).Sort)));
+ }
+
+ SMTExprRef mkFPtoSBV(const SMTExprRef &From, unsigned ToWidth) override {
+ SMTExprRef RoundingMode = getFloatRoundingMode();
+ return newExprRef(Z3Expr(
+ Context, Z3_mk_fpa_to_sbv(Context.Context, toZ3Expr(*RoundingMode).AST,
+ toZ3Expr(*From).AST, ToWidth)));
+ }
+
+ SMTExprRef mkFPtoUBV(const SMTExprRef &From, unsigned ToWidth) override {
+ SMTExprRef RoundingMode = getFloatRoundingMode();
+ return newExprRef(Z3Expr(
+ Context, Z3_mk_fpa_to_ubv(Context.Context, toZ3Expr(*RoundingMode).AST,
+ toZ3Expr(*From).AST, ToWidth)));
+ }
+
+ SMTExprRef mkBoolean(const bool b) override {
+ return newExprRef(Z3Expr(Context, b ? Z3_mk_true(Context.Context)
+ : Z3_mk_false(Context.Context)));
+ }
+
+ SMTExprRef mkBitvector(const llvm::APSInt Int, unsigned BitWidth) override {
+ const SMTSortRef Sort = getBitvectorSort(BitWidth);
+ return newExprRef(
+ Z3Expr(Context, Z3_mk_numeral(Context.Context, Int.toString(10).c_str(),
+ toZ3Sort(*Sort).Sort)));
+ }
+
+ SMTExprRef mkFloat(const llvm::APFloat Float) override {
+ SMTSortRef Sort =
+ getFloatSort(llvm::APFloat::semanticsSizeInBits(Float.getSemantics()));
+
+ llvm::APSInt Int = llvm::APSInt(Float.bitcastToAPInt(), false);
+ SMTExprRef Z3Int = mkBitvector(Int, Int.getBitWidth());
+ return newExprRef(Z3Expr(
+ Context, Z3_mk_fpa_to_fp_bv(Context.Context, toZ3Expr(*Z3Int).AST,
+ toZ3Sort(*Sort).Sort)));
+ }
+
+ SMTExprRef mkSymbol(const char *Name, SMTSortRef Sort) override {
+ return newExprRef(
+ Z3Expr(Context, Z3_mk_const(Context.Context,
+ Z3_mk_string_symbol(Context.Context, Name),
+ toZ3Sort(*Sort).Sort)));
+ }
+
+ llvm::APSInt getBitvector(const SMTExprRef &Exp, unsigned BitWidth,
+ bool isUnsigned) override {
+ return llvm::APSInt(
+ llvm::APInt(BitWidth,
+ Z3_get_numeral_string(Context.Context, toZ3Expr(*Exp).AST),
+ 10),
+ isUnsigned);
+ }
+
+ bool getBoolean(const SMTExprRef &Exp) override {
+ return Z3_get_bool_value(Context.Context, toZ3Expr(*Exp).AST) == Z3_L_TRUE;
+ }
+
+ SMTExprRef getFloatRoundingMode() override {
+ // TODO: Don't assume nearest ties to even rounding mode
+ return newExprRef(Z3Expr(Context, Z3_mk_fpa_rne(Context.Context)));
+ }
+
+ bool toAPFloat(const SMTSortRef &Sort, const SMTExprRef &AST,
+ llvm::APFloat &Float, bool useSemantics) {
+ assert(Sort->isFloatSort() && "Unsupported sort to floating-point!");
+
+ llvm::APSInt Int(Sort->getFloatSortSize(), true);
+ const llvm::fltSemantics &Semantics =
+ getFloatSemantics(Sort->getFloatSortSize());
+ SMTSortRef BVSort = getBitvectorSort(Sort->getFloatSortSize());
+ if (!toAPSInt(BVSort, AST, Int, true)) {
+ return false;
+ }
+
+ if (useSemantics && !areEquivalent(Float.getSemantics(), Semantics)) {
+ assert(false && "Floating-point types don't match!");
+ return false;
+ }
+
+ Float = llvm::APFloat(Semantics, Int);
+ return true;
+ }
+
+ bool toAPSInt(const SMTSortRef &Sort, const SMTExprRef &AST,
+ llvm::APSInt &Int, bool useSemantics) {
+ if (Sort->isBitvectorSort()) {
+ if (useSemantics && Int.getBitWidth() != Sort->getBitvectorSortSize()) {
+ assert(false && "Bitvector types don't match!");
+ return false;
+ }
+
+ // FIXME: This function is also used to retrieve floating-point values,
+ // which can be 16, 32, 64 or 128 bits long. Bitvectors can be anything
+ // between 1 and 64 bits long, which is the reason we have this weird
+ // guard. In the future, we need proper calls in the backend to retrieve
+ // floating-points and its special values (NaN, +/-infinity, +/-zero),
+ // then we can drop this weird condition.
+ if (Sort->getBitvectorSortSize() <= 64 ||
+ Sort->getBitvectorSortSize() == 128) {
+ Int = getBitvector(AST, Int.getBitWidth(), Int.isUnsigned());
+ return true;
+ }
+
+ assert(false && "Bitwidth not supported!");
+ return false;
+ }
+
+ if (Sort->isBooleanSort()) {
+ if (useSemantics && Int.getBitWidth() < 1) {
+ assert(false && "Boolean type doesn't match!");
+ return false;
+ }
+
+ Int = llvm::APSInt(llvm::APInt(Int.getBitWidth(), getBoolean(AST)),
+ Int.isUnsigned());
+ return true;
+ }
+
+ llvm_unreachable("Unsupported sort to integer!");
+ }
+
+ bool getInterpretation(const SMTExprRef &Exp, llvm::APSInt &Int) override {
+ Z3Model Model(Context, Z3_solver_get_model(Context.Context, Solver));
+ Z3_func_decl Func = Z3_get_app_decl(
+ Context.Context, Z3_to_app(Context.Context, toZ3Expr(*Exp).AST));
+ if (Z3_model_has_interp(Context.Context, Model.Model, Func) != Z3_L_TRUE)
+ return false;
+
+ SMTExprRef Assign = newExprRef(
+ Z3Expr(Context,
+ Z3_model_get_const_interp(Context.Context, Model.Model, Func)));
+ SMTSortRef Sort = getSort(Assign);
+ return toAPSInt(Sort, Assign, Int, true);
+ }
+
+ bool getInterpretation(const SMTExprRef &Exp, llvm::APFloat &Float) override {
+ Z3Model Model(Context, Z3_solver_get_model(Context.Context, Solver));
+ Z3_func_decl Func = Z3_get_app_decl(
+ Context.Context, Z3_to_app(Context.Context, toZ3Expr(*Exp).AST));
+ if (Z3_model_has_interp(Context.Context, Model.Model, Func) != Z3_L_TRUE)
+ return false;
+
+ SMTExprRef Assign = newExprRef(
+ Z3Expr(Context,
+ Z3_model_get_const_interp(Context.Context, Model.Model, Func)));
+ SMTSortRef Sort = getSort(Assign);
+ return toAPFloat(Sort, Assign, Float, true);
+ }
+
+ Optional<bool> check() const override {
+ Z3_lbool res = Z3_solver_check(Context.Context, Solver);
+ if (res == Z3_L_TRUE)
+ return true;
+
+ if (res == Z3_L_FALSE)
+ return false;
+
+ return Optional<bool>();
+ }
+
+ void push() override { return Z3_solver_push(Context.Context, Solver); }
+
+ void pop(unsigned NumStates = 1) override {
+ assert(Z3_solver_get_num_scopes(Context.Context, Solver) >= NumStates);
+ return Z3_solver_pop(Context.Context, Solver, NumStates);
+ }
+
+ bool isFPSupported() override { return true; }
+
+ /// Reset the solver and remove all constraints.
+ void reset() override { Z3_solver_reset(Context.Context, Solver); }
+
+ void print(raw_ostream &OS) const override {
+ OS << Z3_solver_to_string(Context.Context, Solver);
+ }
+}; // end class Z3Solver
+
+} // end anonymous namespace
+
+#endif
+
+llvm::SMTSolverRef llvm::CreateZ3Solver() {
+#if LLVM_WITH_Z3
+ return llvm::make_unique<Z3Solver>();
+#else
+ llvm::report_fatal_error("LLVM was not compiled with Z3 support, rebuild "
+ "with -DLLVM_ENABLE_Z3_SOLVER=ON",
+ false);
+ return nullptr;
+#endif
+}
+
+LLVM_DUMP_METHOD void SMTSort::dump() const { print(llvm::errs()); }
+LLVM_DUMP_METHOD void SMTExpr::dump() const { print(llvm::errs()); }
+LLVM_DUMP_METHOD void SMTSolver::dump() const { print(llvm::errs()); }
diff --git a/lib/Support/circular_raw_ostream.cpp b/lib/Support/circular_raw_ostream.cpp
index e768f17cd00d..acd230704ff8 100644
--- a/lib/Support/circular_raw_ostream.cpp
+++ b/lib/Support/circular_raw_ostream.cpp
@@ -1,9 +1,8 @@
//===- circular_raw_ostream.cpp - Implement circular_raw_ostream ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Support/raw_os_ostream.cpp b/lib/Support/raw_os_ostream.cpp
index 44f2325d7f8a..81f0d739696e 100644
--- a/lib/Support/raw_os_ostream.cpp
+++ b/lib/Support/raw_os_ostream.cpp
@@ -1,9 +1,8 @@
//===--- raw_os_ostream.cpp - Implement the raw_os_ostream class ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp
index 21dde7ff914a..2baccaa0cbd7 100644
--- a/lib/Support/raw_ostream.cpp
+++ b/lib/Support/raw_ostream.cpp
@@ -1,9 +1,8 @@
//===--- raw_ostream.cpp - Implement the raw_ostream classes --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -613,7 +612,7 @@ raw_fd_ostream::~raw_fd_ostream() {
// destructing raw_ostream objects which may have errors.
if (has_error())
report_fatal_error("IO failure on output stream: " + error().message(),
- /*GenCrashDiag=*/false);
+ /*gen_crash_diag=*/false);
}
#if defined(_WIN32)
diff --git a/lib/TableGen/Error.cpp b/lib/TableGen/Error.cpp
index e6171c71efc0..7523b32ca0e5 100644
--- a/lib/TableGen/Error.cpp
+++ b/lib/TableGen/Error.cpp
@@ -1,9 +1,8 @@
//===- Error.cpp - tblgen error handling helper routines --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/TableGen/JSONBackend.cpp b/lib/TableGen/JSONBackend.cpp
index 36cb2208a294..196644cda667 100644
--- a/lib/TableGen/JSONBackend.cpp
+++ b/lib/TableGen/JSONBackend.cpp
@@ -1,9 +1,8 @@
//===- JSONBackend.cpp - Generate a JSON dump of all records. -*- C++ -*-=====//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/TableGen/Main.cpp b/lib/TableGen/Main.cpp
index 02698416609f..bcd39584e450 100644
--- a/lib/TableGen/Main.cpp
+++ b/lib/TableGen/Main.cpp
@@ -1,9 +1,8 @@
//===- Main.cpp - Top-Level TableGen implementation -----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/TableGen/Record.cpp b/lib/TableGen/Record.cpp
index cf1685a2e8c2..27d1bdc7f4c3 100644
--- a/lib/TableGen/Record.cpp
+++ b/lib/TableGen/Record.cpp
@@ -1,9 +1,8 @@
//===- Record.cpp - Record implementation ---------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -16,9 +15,11 @@
#include "llvm/ADT/FoldingSet.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSet.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Casting.h"
@@ -31,14 +32,20 @@
#include <cassert>
#include <cstdint>
#include <memory>
+#include <map>
#include <string>
#include <utility>
#include <vector>
using namespace llvm;
+#define DEBUG_TYPE "tblgen-records"
+
static BumpPtrAllocator Allocator;
+STATISTIC(CodeInitsConstructed,
+ "The total number of unique CodeInits constructed");
+
//===----------------------------------------------------------------------===//
// Type implementations
//===----------------------------------------------------------------------===//
@@ -451,7 +458,7 @@ Init *BitsInit::resolveReferences(Resolver &R) const {
}
IntInit *IntInit::get(int64_t V) {
- static DenseMap<int64_t, IntInit*> ThePool;
+ static std::map<int64_t, IntInit*> ThePool;
IntInit *&I = ThePool[V];
if (!I) I = new(Allocator) IntInit(V);
@@ -507,13 +514,20 @@ IntInit::convertInitializerBitRange(ArrayRef<unsigned> Bits) const {
return BitsInit::get(NewBits);
}
-CodeInit *CodeInit::get(StringRef V) {
- static StringMap<CodeInit*, BumpPtrAllocator &> ThePool(Allocator);
+CodeInit *CodeInit::get(StringRef V, const SMLoc &Loc) {
+ static StringSet<BumpPtrAllocator &> ThePool(Allocator);
- auto &Entry = *ThePool.insert(std::make_pair(V, nullptr)).first;
- if (!Entry.second)
- Entry.second = new(Allocator) CodeInit(Entry.getKey());
- return Entry.second;
+ CodeInitsConstructed++;
+
+ // Unlike StringMap, StringSet doesn't accept empty keys.
+ if (V.empty())
+ return new (Allocator) CodeInit("", Loc);
+
+ // Location tracking prevents us from de-duping CodeInits as we're never
+ // called with the same string and same location twice. However, we can at
+ // least de-dupe the strings for a modest saving.
+ auto &Entry = *ThePool.insert(V).first;
+ return new(Allocator) CodeInit(Entry.getKey(), Loc);
}
StringInit *StringInit::get(StringRef V) {
@@ -529,7 +543,7 @@ Init *StringInit::convertInitializerTo(RecTy *Ty) const {
if (isa<StringRecTy>(Ty))
return const_cast<StringInit *>(this);
if (isa<CodeRecTy>(Ty))
- return CodeInit::get(getValue());
+ return CodeInit::get(getValue(), SMLoc());
return nullptr;
}
@@ -843,6 +857,28 @@ Init *BinOpInit::getStrConcat(Init *I0, Init *I1) {
return BinOpInit::get(BinOpInit::STRCONCAT, I0, I1, StringRecTy::get());
}
+static ListInit *ConcatListInits(const ListInit *LHS,
+ const ListInit *RHS) {
+ SmallVector<Init *, 8> Args;
+ Args.insert(Args.end(), LHS->begin(), LHS->end());
+ Args.insert(Args.end(), RHS->begin(), RHS->end());
+ return ListInit::get(Args, LHS->getElementType());
+}
+
+Init *BinOpInit::getListConcat(TypedInit *LHS, Init *RHS) {
+ assert(isa<ListRecTy>(LHS->getType()) && "First arg must be a list");
+
+ // Shortcut for the common case of concatenating two lists.
+ if (const ListInit *LHSList = dyn_cast<ListInit>(LHS))
+ if (const ListInit *RHSList = dyn_cast<ListInit>(RHS))
+ return ConcatListInits(LHSList, RHSList);
+ return BinOpInit::get(BinOpInit::LISTCONCAT, LHS, RHS, LHS->getType());
+}
+
+Init *BinOpInit::getListSplat(TypedInit *LHS, Init *RHS) {
+ return BinOpInit::get(BinOpInit::LISTSPLAT, LHS, RHS, LHS->getType());
+}
+
Init *BinOpInit::Fold(Record *CurRec) const {
switch (getOpcode()) {
case CONCAT: {
@@ -883,6 +919,15 @@ Init *BinOpInit::Fold(Record *CurRec) const {
}
break;
}
+ case LISTSPLAT: {
+ TypedInit *Value = dyn_cast<TypedInit>(LHS);
+ IntInit *Size = dyn_cast<IntInit>(RHS);
+ if (Value && Size) {
+ SmallVector<Init *, 8> Args(Size->getValue(), Value);
+ return ListInit::get(Args, Value->getType());
+ }
+ break;
+ }
case STRCONCAT: {
StringInit *LHSs = dyn_cast<StringInit>(LHS);
StringInit *RHSs = dyn_cast<StringInit>(RHS);
@@ -931,6 +976,7 @@ Init *BinOpInit::Fold(Record *CurRec) const {
break;
}
case ADD:
+ case MUL:
case AND:
case OR:
case SHL:
@@ -946,6 +992,7 @@ Init *BinOpInit::Fold(Record *CurRec) const {
switch (getOpcode()) {
default: llvm_unreachable("Bad opcode!");
case ADD: Result = LHSv + RHSv; break;
+ case MUL: Result = LHSv * RHSv; break;
case AND: Result = LHSv & RHSv; break;
case OR: Result = LHSv | RHSv; break;
case SHL: Result = LHSv << RHSv; break;
@@ -975,6 +1022,7 @@ std::string BinOpInit::getAsString() const {
switch (getOpcode()) {
case CONCAT: Result = "!con"; break;
case ADD: Result = "!add"; break;
+ case MUL: Result = "!mul"; break;
case AND: Result = "!and"; break;
case OR: Result = "!or"; break;
case SHL: Result = "!shl"; break;
@@ -987,6 +1035,7 @@ std::string BinOpInit::getAsString() const {
case GE: Result = "!ge"; break;
case GT: Result = "!gt"; break;
case LISTCONCAT: Result = "!listconcat"; break;
+ case LISTSPLAT: Result = "!listsplat"; break;
case STRCONCAT: Result = "!strconcat"; break;
}
return Result + "(" + LHS->getAsString() + ", " + RHS->getAsString() + ")";
@@ -1694,6 +1743,137 @@ Init *FieldInit::Fold(Record *CurRec) const {
return const_cast<FieldInit *>(this);
}
+static void ProfileCondOpInit(FoldingSetNodeID &ID,
+ ArrayRef<Init *> CondRange,
+ ArrayRef<Init *> ValRange,
+ const RecTy *ValType) {
+ assert(CondRange.size() == ValRange.size() &&
+ "Number of conditions and values must match!");
+ ID.AddPointer(ValType);
+ ArrayRef<Init *>::iterator Case = CondRange.begin();
+ ArrayRef<Init *>::iterator Val = ValRange.begin();
+
+ while (Case != CondRange.end()) {
+ ID.AddPointer(*Case++);
+ ID.AddPointer(*Val++);
+ }
+}
+
+void CondOpInit::Profile(FoldingSetNodeID &ID) const {
+ ProfileCondOpInit(ID,
+ makeArrayRef(getTrailingObjects<Init *>(), NumConds),
+ makeArrayRef(getTrailingObjects<Init *>() + NumConds, NumConds),
+ ValType);
+}
+
+CondOpInit *
+CondOpInit::get(ArrayRef<Init *> CondRange,
+ ArrayRef<Init *> ValRange, RecTy *Ty) {
+ assert(CondRange.size() == ValRange.size() &&
+ "Number of conditions and values must match!");
+
+ static FoldingSet<CondOpInit> ThePool;
+ FoldingSetNodeID ID;
+ ProfileCondOpInit(ID, CondRange, ValRange, Ty);
+
+ void *IP = nullptr;
+ if (CondOpInit *I = ThePool.FindNodeOrInsertPos(ID, IP))
+ return I;
+
+ void *Mem = Allocator.Allocate(totalSizeToAlloc<Init *>(2*CondRange.size()),
+ alignof(BitsInit));
+ CondOpInit *I = new(Mem) CondOpInit(CondRange.size(), Ty);
+
+ std::uninitialized_copy(CondRange.begin(), CondRange.end(),
+ I->getTrailingObjects<Init *>());
+ std::uninitialized_copy(ValRange.begin(), ValRange.end(),
+ I->getTrailingObjects<Init *>()+CondRange.size());
+ ThePool.InsertNode(I, IP);
+ return I;
+}
+
+Init *CondOpInit::resolveReferences(Resolver &R) const {
+ SmallVector<Init*, 4> NewConds;
+ bool Changed = false;
+ for (const Init *Case : getConds()) {
+ Init *NewCase = Case->resolveReferences(R);
+ NewConds.push_back(NewCase);
+ Changed |= NewCase != Case;
+ }
+
+ SmallVector<Init*, 4> NewVals;
+ for (const Init *Val : getVals()) {
+ Init *NewVal = Val->resolveReferences(R);
+ NewVals.push_back(NewVal);
+ Changed |= NewVal != Val;
+ }
+
+ if (Changed)
+ return (CondOpInit::get(NewConds, NewVals,
+ getValType()))->Fold(R.getCurrentRecord());
+
+ return const_cast<CondOpInit *>(this);
+}
+
+Init *CondOpInit::Fold(Record *CurRec) const {
+ for ( unsigned i = 0; i < NumConds; ++i) {
+ Init *Cond = getCond(i);
+ Init *Val = getVal(i);
+
+ if (IntInit *CondI = dyn_cast_or_null<IntInit>(
+ Cond->convertInitializerTo(IntRecTy::get()))) {
+ if (CondI->getValue())
+ return Val->convertInitializerTo(getValType());
+ } else
+ return const_cast<CondOpInit *>(this);
+ }
+
+ PrintFatalError(CurRec->getLoc(),
+ CurRec->getName() +
+ " does not have any true condition in:" +
+ this->getAsString());
+ return nullptr;
+}
+
+bool CondOpInit::isConcrete() const {
+ for (const Init *Case : getConds())
+ if (!Case->isConcrete())
+ return false;
+
+ for (const Init *Val : getVals())
+ if (!Val->isConcrete())
+ return false;
+
+ return true;
+}
+
+bool CondOpInit::isComplete() const {
+ for (const Init *Case : getConds())
+ if (!Case->isComplete())
+ return false;
+
+ for (const Init *Val : getVals())
+ if (!Val->isConcrete())
+ return false;
+
+ return true;
+}
+
+std::string CondOpInit::getAsString() const {
+ std::string Result = "!cond(";
+ for (unsigned i = 0; i < getNumConds(); i++) {
+ Result += getCond(i)->getAsString() + ": ";
+ Result += getVal(i)->getAsString();
+ if (i != getNumConds()-1)
+ Result += ", ";
+ }
+ return Result + ")";
+}
+
+Init *CondOpInit::getBit(unsigned Bit) const {
+ return VarBitInit::get(const_cast<CondOpInit *>(this), Bit);
+}
+
static void ProfileDagInit(FoldingSetNodeID &ID, Init *V, StringInit *VN,
ArrayRef<Init *> ArgRange,
ArrayRef<StringInit *> NameRange) {
diff --git a/lib/TableGen/SetTheory.cpp b/lib/TableGen/SetTheory.cpp
index 733e0aeef623..a870e41d58f8 100644
--- a/lib/TableGen/SetTheory.cpp
+++ b/lib/TableGen/SetTheory.cpp
@@ -1,9 +1,8 @@
//===- SetTheory.cpp - Generate ordered sets from DAG expressions ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/TableGen/StringMatcher.cpp b/lib/TableGen/StringMatcher.cpp
index 2c4d1f33997d..2fca068893f3 100644
--- a/lib/TableGen/StringMatcher.cpp
+++ b/lib/TableGen/StringMatcher.cpp
@@ -1,9 +1,8 @@
//===- StringMatcher.cpp - Generate a matcher for input strings -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/TableGen/TGLexer.cpp b/lib/TableGen/TGLexer.cpp
index 16aeee561075..d28c62b3133d 100644
--- a/lib/TableGen/TGLexer.cpp
+++ b/lib/TableGen/TGLexer.cpp
@@ -1,9 +1,8 @@
//===- TGLexer.cpp - Lexer for TableGen -----------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -37,6 +36,7 @@ struct {
const char *Word;
} PreprocessorDirs[] = {
{ tgtok::Ifdef, "ifdef" },
+ { tgtok::Ifndef, "ifndef" },
{ tgtok::Else, "else" },
{ tgtok::Endif, "endif" },
{ tgtok::Define, "define" }
@@ -545,6 +545,7 @@ tgtok::TokKind TGLexer::LexExclaim() {
.Case("ge", tgtok::XGe)
.Case("gt", tgtok::XGt)
.Case("if", tgtok::XIf)
+ .Case("cond", tgtok::XCond)
.Case("isa", tgtok::XIsA)
.Case("head", tgtok::XHead)
.Case("tail", tgtok::XTail)
@@ -552,6 +553,7 @@ tgtok::TokKind TGLexer::LexExclaim() {
.Case("con", tgtok::XConcat)
.Case("dag", tgtok::XDag)
.Case("add", tgtok::XADD)
+ .Case("mul", tgtok::XMUL)
.Case("and", tgtok::XAND)
.Case("or", tgtok::XOR)
.Case("shl", tgtok::XSHL)
@@ -563,6 +565,7 @@ tgtok::TokKind TGLexer::LexExclaim() {
.Case("foldl", tgtok::XFoldl)
.Case("foreach", tgtok::XForEach)
.Case("listconcat", tgtok::XListConcat)
+ .Case("listsplat", tgtok::XListSplat)
.Case("strconcat", tgtok::XStrConcat)
.Default(tgtok::Error);
@@ -674,21 +677,28 @@ tgtok::TokKind TGLexer::lexPreprocessor(
PrintFatalError("lexPreprocessor() called for unknown "
"preprocessor directive");
- if (Kind == tgtok::Ifdef) {
+ if (Kind == tgtok::Ifdef || Kind == tgtok::Ifndef) {
StringRef MacroName = prepLexMacroName();
+ StringRef IfTokName = Kind == tgtok::Ifdef ? "#ifdef" : "#ifndef";
if (MacroName.empty())
- return ReturnError(TokStart, "Expected macro name after #ifdef");
+ return ReturnError(TokStart, "Expected macro name after " + IfTokName);
bool MacroIsDefined = DefinedMacros.count(MacroName) != 0;
+ // Canonicalize ifndef to ifdef equivalent
+ if (Kind == tgtok::Ifndef) {
+ MacroIsDefined = !MacroIsDefined;
+ Kind = tgtok::Ifdef;
+ }
+
// Regardless of whether we are processing tokens or not,
// we put the #ifdef control on stack.
PrepIncludeStack.back()->push_back(
{Kind, MacroIsDefined, SMLoc::getFromPointer(TokStart)});
if (!prepSkipDirectiveEnd())
- return ReturnError(CurPtr,
- "Only comments are supported after #ifdef NAME");
+ return ReturnError(CurPtr, "Only comments are supported after " +
+ IfTokName + " NAME");
// If we were not processing tokens before this #ifdef,
// then just return back to the lines skipping code.
@@ -712,7 +722,7 @@ tgtok::TokKind TGLexer::lexPreprocessor(
// Check if this #else is correct before calling prepSkipDirectiveEnd(),
// which will move CurPtr away from the beginning of #else.
if (PrepIncludeStack.back()->empty())
- return ReturnError(TokStart, "#else without #ifdef");
+ return ReturnError(TokStart, "#else without #ifdef or #ifndef");
PreprocessorControlDesc IfdefEntry = PrepIncludeStack.back()->back();
diff --git a/lib/TableGen/TGLexer.h b/lib/TableGen/TGLexer.h
index e9980b36b97b..3085ab2c0478 100644
--- a/lib/TableGen/TGLexer.h
+++ b/lib/TableGen/TGLexer.h
@@ -1,9 +1,8 @@
//===- TGLexer.h - Lexer for TableGen Files ---------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -50,9 +49,9 @@ namespace tgtok {
MultiClass, String, Defset,
// !keywords.
- XConcat, XADD, XAND, XOR, XSRA, XSRL, XSHL, XListConcat, XStrConcat, XCast,
- XSubst, XForEach, XFoldl, XHead, XTail, XSize, XEmpty, XIf, XEq, XIsA, XDag,
- XNe, XLe, XLt, XGe, XGt,
+ XConcat, XADD, XMUL, XAND, XOR, XSRA, XSRL, XSHL, XListConcat, XListSplat,
+ XStrConcat, XCast, XSubst, XForEach, XFoldl, XHead, XTail, XSize, XEmpty,
+ XIf, XCond, XEq, XIsA, XDag, XNe, XLe, XLt, XGe, XGt,
// Integer value.
IntVal,
@@ -66,7 +65,7 @@ namespace tgtok {
// Preprocessing tokens for internal usage by the lexer.
// They are never returned as a result of Lex().
- Ifdef, Else, Endif, Define
+ Ifdef, Ifndef, Else, Endif, Define
};
}
diff --git a/lib/TableGen/TGParser.cpp b/lib/TableGen/TGParser.cpp
index 1d1f3603c83c..a9ace152d59e 100644
--- a/lib/TableGen/TGParser.cpp
+++ b/lib/TableGen/TGParser.cpp
@@ -1,9 +1,8 @@
//===- TGParser.cpp - Parser for TableGen Files ---------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -537,8 +536,14 @@ Record *TGParser::ParseClassID() {
}
Record *Result = Records.getClass(Lex.getCurStrVal());
- if (!Result)
- TokError("Couldn't find class '" + Lex.getCurStrVal() + "'");
+ if (!Result) {
+ std::string Msg("Couldn't find class '" + Lex.getCurStrVal() + "'");
+ if (MultiClasses[Lex.getCurStrVal()].get())
+ TokError(Msg + ". Use 'defm' if you meant to use multiclass '" +
+ Lex.getCurStrVal() + "'");
+ else
+ TokError(Msg);
+ }
Lex.Lex();
return Result;
@@ -661,35 +666,47 @@ ParseSubMultiClassReference(MultiClass *CurMC) {
/// RangePiece ::= INTVAL
/// RangePiece ::= INTVAL '-' INTVAL
/// RangePiece ::= INTVAL INTVAL
-bool TGParser::ParseRangePiece(SmallVectorImpl<unsigned> &Ranges) {
- if (Lex.getCode() != tgtok::IntVal) {
- TokError("expected integer or bitrange");
- return true;
- }
- int64_t Start = Lex.getCurIntVal();
+bool TGParser::ParseRangePiece(SmallVectorImpl<unsigned> &Ranges,
+ TypedInit *FirstItem) {
+ Init *CurVal = FirstItem;
+ if (!CurVal)
+ CurVal = ParseValue(nullptr);
+
+ IntInit *II = dyn_cast_or_null<IntInit>(CurVal);
+ if (!II)
+ return TokError("expected integer or bitrange");
+
+ int64_t Start = II->getValue();
int64_t End;
if (Start < 0)
return TokError("invalid range, cannot be negative");
- switch (Lex.Lex()) { // eat first character.
+ switch (Lex.getCode()) {
default:
Ranges.push_back(Start);
return false;
- case tgtok::minus:
- if (Lex.Lex() != tgtok::IntVal) {
+ case tgtok::minus: {
+ Lex.Lex(); // eat
+
+ Init *I_End = ParseValue(nullptr);
+ IntInit *II_End = dyn_cast_or_null<IntInit>(I_End);
+ if (!II_End) {
TokError("expected integer value as end of range");
return true;
}
- End = Lex.getCurIntVal();
+
+ End = II_End->getValue();
break;
- case tgtok::IntVal:
+ }
+ case tgtok::IntVal: {
End = -Lex.getCurIntVal();
+ Lex.Lex();
break;
}
+ }
if (End < 0)
return TokError("invalid range, cannot be negative");
- Lex.Lex();
// Add to the range.
if (Start < End)
@@ -1024,6 +1041,7 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
case tgtok::XConcat:
case tgtok::XADD:
+ case tgtok::XMUL:
case tgtok::XAND:
case tgtok::XOR:
case tgtok::XSRA:
@@ -1036,6 +1054,7 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
case tgtok::XGe:
case tgtok::XGt:
case tgtok::XListConcat:
+ case tgtok::XListSplat:
case tgtok::XStrConcat: { // Value ::= !binop '(' Value ',' Value ')'
tgtok::TokKind OpTok = Lex.getCode();
SMLoc OpLoc = Lex.getLoc();
@@ -1046,6 +1065,7 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
default: llvm_unreachable("Unhandled code!");
case tgtok::XConcat: Code = BinOpInit::CONCAT; break;
case tgtok::XADD: Code = BinOpInit::ADD; break;
+ case tgtok::XMUL: Code = BinOpInit::MUL; break;
case tgtok::XAND: Code = BinOpInit::AND; break;
case tgtok::XOR: Code = BinOpInit::OR; break;
case tgtok::XSRA: Code = BinOpInit::SRA; break;
@@ -1058,6 +1078,7 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
case tgtok::XGe: Code = BinOpInit::GE; break;
case tgtok::XGt: Code = BinOpInit::GT; break;
case tgtok::XListConcat: Code = BinOpInit::LISTCONCAT; break;
+ case tgtok::XListSplat: Code = BinOpInit::LISTSPLAT; break;
case tgtok::XStrConcat: Code = BinOpInit::STRCONCAT; break;
}
@@ -1076,6 +1097,7 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
case tgtok::XSRL:
case tgtok::XSHL:
case tgtok::XADD:
+ case tgtok::XMUL:
Type = IntRecTy::get();
ArgType = IntRecTy::get();
break;
@@ -1095,6 +1117,9 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
// We don't know the list type until we parse the first argument
ArgType = ItemType;
break;
+ case tgtok::XListSplat:
+ // Can't do any typechecking until we parse the first argument.
+ break;
case tgtok::XStrConcat:
Type = StringRecTy::get();
ArgType = StringRecTy::get();
@@ -1134,6 +1159,33 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
return nullptr;
}
break;
+ case BinOpInit::LISTSPLAT:
+ if (ItemType && InitList.size() == 1) {
+ if (!isa<ListRecTy>(ItemType)) {
+ Error(OpLoc,
+ Twine("expected output type to be a list, got type '") +
+ ItemType->getAsString() + "'");
+ return nullptr;
+ }
+ if (!ArgType->getListTy()->typeIsConvertibleTo(ItemType)) {
+ Error(OpLoc, Twine("expected first arg type to be '") +
+ ArgType->getAsString() +
+ "', got value of type '" +
+ cast<ListRecTy>(ItemType)
+ ->getElementType()
+ ->getAsString() +
+ "'");
+ return nullptr;
+ }
+ }
+ if (InitList.size() == 2 && !isa<IntRecTy>(ArgType)) {
+ Error(InitLoc, Twine("expected second parameter to be an int, got "
+ "value of type '") +
+ ArgType->getAsString() + "'");
+ return nullptr;
+ }
+ ArgType = nullptr; // Broken invariant: types not identical.
+ break;
case BinOpInit::EQ:
case BinOpInit::NE:
if (!ArgType->typeIsConvertibleTo(IntRecTy::get()) &&
@@ -1155,7 +1207,8 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
}
if (Code != BinOpInit::ADD && Code != BinOpInit::AND &&
Code != BinOpInit::OR && Code != BinOpInit::SRA &&
- Code != BinOpInit::SRL && Code != BinOpInit::SHL)
+ Code != BinOpInit::SRL && Code != BinOpInit::SHL &&
+ Code != BinOpInit::MUL)
ArgType = Resolved;
}
@@ -1170,14 +1223,19 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
}
Lex.Lex(); // eat the ')'
+ // listconcat returns a list with type of the argument.
if (Code == BinOpInit::LISTCONCAT)
Type = ArgType;
+ // listsplat returns a list of type of the *first* argument.
+ if (Code == BinOpInit::LISTSPLAT)
+ Type = cast<TypedInit>(InitList.front())->getType()->getListTy();
// We allow multiple operands to associative operators like !strconcat as
// shorthand for nesting them.
if (Code == BinOpInit::STRCONCAT || Code == BinOpInit::LISTCONCAT ||
Code == BinOpInit::CONCAT || Code == BinOpInit::ADD ||
- Code == BinOpInit::AND || Code == BinOpInit::OR) {
+ Code == BinOpInit::AND || Code == BinOpInit::OR ||
+ Code == BinOpInit::MUL) {
while (InitList.size() > 2) {
Init *RHS = InitList.pop_back_val();
RHS = (BinOpInit::get(Code, InitList.back(), RHS, Type))->Fold(CurRec);
@@ -1445,6 +1503,9 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
return (TernOpInit::get(Code, LHS, MHS, RHS, Type))->Fold(CurRec);
}
+ case tgtok::XCond:
+ return ParseOperationCond(CurRec, ItemType);
+
case tgtok::XFoldl: {
// Value ::= !foldl '(' Id ',' Id ',' Value ',' Value ',' Value ')'
Lex.Lex(); // eat the operation
@@ -1603,6 +1664,91 @@ RecTy *TGParser::ParseOperatorType() {
return Type;
}
+Init *TGParser::ParseOperationCond(Record *CurRec, RecTy *ItemType) {
+ Lex.Lex(); // eat the operation 'cond'
+
+ if (Lex.getCode() != tgtok::l_paren) {
+ TokError("expected '(' after !cond operator");
+ return nullptr;
+ }
+ Lex.Lex(); // eat the '('
+
+ // Parse through '[Case: Val,]+'
+ SmallVector<Init *, 4> Case;
+ SmallVector<Init *, 4> Val;
+ while (true) {
+ if (Lex.getCode() == tgtok::r_paren) {
+ Lex.Lex(); // eat the ')'
+ break;
+ }
+
+ Init *V = ParseValue(CurRec);
+ if (!V)
+ return nullptr;
+ Case.push_back(V);
+
+ if (Lex.getCode() != tgtok::colon) {
+ TokError("expected ':' following a condition in !cond operator");
+ return nullptr;
+ }
+ Lex.Lex(); // eat the ':'
+
+ V = ParseValue(CurRec, ItemType);
+ if (!V)
+ return nullptr;
+ Val.push_back(V);
+
+ if (Lex.getCode() == tgtok::r_paren) {
+ Lex.Lex(); // eat the ')'
+ break;
+ }
+
+ if (Lex.getCode() != tgtok::comma) {
+ TokError("expected ',' or ')' following a value in !cond operator");
+ return nullptr;
+ }
+ Lex.Lex(); // eat the ','
+ }
+
+ if (Case.size() < 1) {
+ TokError("there should be at least 1 'condition : value' in the !cond operator");
+ return nullptr;
+ }
+
+ // resolve type
+ RecTy *Type = nullptr;
+ for (Init *V : Val) {
+ RecTy *VTy = nullptr;
+ if (TypedInit *Vt = dyn_cast<TypedInit>(V))
+ VTy = Vt->getType();
+ if (BitsInit *Vbits = dyn_cast<BitsInit>(V))
+ VTy = BitsRecTy::get(Vbits->getNumBits());
+ if (isa<BitInit>(V))
+ VTy = BitRecTy::get();
+
+ if (Type == nullptr) {
+ if (!isa<UnsetInit>(V))
+ Type = VTy;
+ } else {
+ if (!isa<UnsetInit>(V)) {
+ RecTy *RType = resolveTypes(Type, VTy);
+ if (!RType) {
+ TokError(Twine("inconsistent types '") + Type->getAsString() +
+ "' and '" + VTy->getAsString() + "' for !cond");
+ return nullptr;
+ }
+ Type = RType;
+ }
+ }
+ }
+
+ if (!Type) {
+ TokError("could not determine type for !cond from its arguments");
+ return nullptr;
+ }
+ return CondOpInit::get(Case, Val, Type)->Fold(CurRec);
+}
+
/// ParseSimpleValue - Parse a tblgen value. This returns null on error.
///
/// SimpleValue ::= IDValue
@@ -1620,7 +1766,9 @@ RecTy *TGParser::ParseOperatorType() {
/// SimpleValue ::= SRATOK '(' Value ',' Value ')'
/// SimpleValue ::= SRLTOK '(' Value ',' Value ')'
/// SimpleValue ::= LISTCONCATTOK '(' Value ',' Value ')'
+/// SimpleValue ::= LISTSPLATTOK '(' Value ',' Value ')'
/// SimpleValue ::= STRCONCATTOK '(' Value ',' Value ')'
+/// SimpleValue ::= COND '(' [Value ':' Value,]+ ')'
///
Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
IDParseMode Mode) {
@@ -1656,7 +1804,7 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
break;
}
case tgtok::CodeFragment:
- R = CodeInit::get(Lex.getCurStrVal());
+ R = CodeInit::get(Lex.getCurStrVal(), Lex.getLoc());
Lex.Lex();
break;
case tgtok::question:
@@ -1919,6 +2067,7 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
case tgtok::XConcat:
case tgtok::XDag:
case tgtok::XADD:
+ case tgtok::XMUL:
case tgtok::XAND:
case tgtok::XOR:
case tgtok::XSRA:
@@ -1931,8 +2080,10 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
case tgtok::XGe:
case tgtok::XGt:
case tgtok::XListConcat:
+ case tgtok::XListSplat:
case tgtok::XStrConcat: // Value ::= !binop '(' Value ',' Value ')'
case tgtok::XIf:
+ case tgtok::XCond:
case tgtok::XFoldl:
case tgtok::XForEach:
case tgtok::XSubst: { // Value ::= !ternop '(' Value ',' Value ',' Value ')'
@@ -2024,25 +2175,41 @@ Init *TGParser::ParseValue(Record *CurRec, RecTy *ItemType, IDParseMode Mode) {
case tgtok::paste:
SMLoc PasteLoc = Lex.getLoc();
-
- // Create a !strconcat() operation, first casting each operand to
- // a string if necessary.
-
TypedInit *LHS = dyn_cast<TypedInit>(Result);
if (!LHS) {
Error(PasteLoc, "LHS of paste is not typed!");
return nullptr;
}
+ // Check if it's a 'listA # listB'
+ if (isa<ListRecTy>(LHS->getType())) {
+ Lex.Lex(); // Eat the '#'.
+
+ switch (Lex.getCode()) {
+ case tgtok::colon:
+ case tgtok::semi:
+ case tgtok::l_brace:
+ Result = LHS; // trailing paste, ignore.
+ break;
+ default:
+ Init *RHSResult = ParseValue(CurRec, ItemType, ParseNameMode);
+ Result = BinOpInit::getListConcat(LHS, RHSResult);
+ }
+ break;
+ }
+
+ // Create a !strconcat() operation, first casting each operand to
+ // a string if necessary.
if (LHS->getType() != StringRecTy::get()) {
- LHS = dyn_cast<TypedInit>(
+ auto CastLHS = dyn_cast<TypedInit>(
UnOpInit::get(UnOpInit::CAST, LHS, StringRecTy::get())
->Fold(CurRec));
- if (!LHS) {
- Error(PasteLoc, Twine("can't cast '") + LHS->getAsString() +
- "' to string");
+ if (!CastLHS) {
+ Error(PasteLoc,
+ Twine("can't cast '") + LHS->getAsString() + "' to string");
return nullptr;
}
+ LHS = CastLHS;
}
TypedInit *RHS = nullptr;
@@ -2069,14 +2236,15 @@ Init *TGParser::ParseValue(Record *CurRec, RecTy *ItemType, IDParseMode Mode) {
}
if (RHS->getType() != StringRecTy::get()) {
- RHS = dyn_cast<TypedInit>(
+ auto CastRHS = dyn_cast<TypedInit>(
UnOpInit::get(UnOpInit::CAST, RHS, StringRecTy::get())
->Fold(CurRec));
- if (!RHS) {
- Error(PasteLoc, Twine("can't cast '") + RHS->getAsString() +
- "' to string");
+ if (!CastRHS) {
+ Error(PasteLoc,
+ Twine("can't cast '") + RHS->getAsString() + "' to string");
return nullptr;
}
+ RHS = CastRHS;
}
break;
@@ -2167,6 +2335,10 @@ void TGParser::ParseValueList(SmallVectorImpl<Init*> &Result, Record *CurRec,
while (Lex.getCode() == tgtok::comma) {
Lex.Lex(); // Eat the comma
+ // ignore trailing comma for lists
+ if (Lex.getCode() == tgtok::r_square)
+ return;
+
if (ArgsRec && !EltTy) {
ArrayRef<Init *> TArgs = ArgsRec->getTemplateArgs();
if (ArgN >= TArgs.size()) {
@@ -2279,12 +2451,6 @@ VarInit *TGParser::ParseForeachDeclaration(Init *&ForeachListValue) {
SmallVector<unsigned, 16> Ranges;
switch (Lex.getCode()) {
- case tgtok::IntVal: { // RangePiece.
- if (ParseRangePiece(Ranges))
- return nullptr;
- break;
- }
-
case tgtok::l_brace: { // '{' RangeList '}'
Lex.Lex(); // eat the '{'
ParseRangeList(Ranges);
@@ -2299,23 +2465,35 @@ VarInit *TGParser::ParseForeachDeclaration(Init *&ForeachListValue) {
default: {
SMLoc ValueLoc = Lex.getLoc();
Init *I = ParseValue(nullptr);
- TypedInit *TI = dyn_cast<TypedInit>(I);
- if (!TI || !isa<ListRecTy>(TI->getType())) {
- std::string Type;
- if (TI)
- Type = (Twine("' of type '") + TI->getType()->getAsString()).str();
- Error(ValueLoc, "expected a list, got '" + I->getAsString() + Type + "'");
- if (CurMultiClass)
- PrintNote({}, "references to multiclass template arguments cannot be "
- "resolved at this time");
+ if (!I)
return nullptr;
+
+ TypedInit *TI = dyn_cast<TypedInit>(I);
+ if (TI && isa<ListRecTy>(TI->getType())) {
+ ForeachListValue = I;
+ IterType = cast<ListRecTy>(TI->getType())->getElementType();
+ break;
}
- ForeachListValue = I;
- IterType = cast<ListRecTy>(TI->getType())->getElementType();
- break;
+
+ if (TI) {
+ if (ParseRangePiece(Ranges, TI))
+ return nullptr;
+ break;
+ }
+
+ std::string Type;
+ if (TI)
+ Type = (Twine("' of type '") + TI->getType()->getAsString()).str();
+ Error(ValueLoc, "expected a list, got '" + I->getAsString() + Type + "'");
+ if (CurMultiClass) {
+ PrintNote({}, "references to multiclass template arguments cannot be "
+ "resolved at this time");
+ }
+ return nullptr;
}
}
+
if (!Ranges.empty()) {
assert(!IterType && "Type already initialized?");
IterType = IntRecTy::get();
diff --git a/lib/TableGen/TGParser.h b/lib/TableGen/TGParser.h
index e3849043513b..af2b639f8d59 100644
--- a/lib/TableGen/TGParser.h
+++ b/lib/TableGen/TGParser.h
@@ -1,9 +1,8 @@
//===- TGParser.h - Parser for TableGen Files -------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -191,9 +190,11 @@ private: // Parser methods.
bool ParseOptionalRangeList(SmallVectorImpl<unsigned> &Ranges);
bool ParseOptionalBitList(SmallVectorImpl<unsigned> &Ranges);
void ParseRangeList(SmallVectorImpl<unsigned> &Result);
- bool ParseRangePiece(SmallVectorImpl<unsigned> &Ranges);
+ bool ParseRangePiece(SmallVectorImpl<unsigned> &Ranges,
+ TypedInit *FirstItem = nullptr);
RecTy *ParseType();
Init *ParseOperation(Record *CurRec, RecTy *ItemType);
+ Init *ParseOperationCond(Record *CurRec, RecTy *ItemType);
RecTy *ParseOperatorType();
Init *ParseObjectName(MultiClass *CurMultiClass);
Record *ParseClassID();
diff --git a/lib/TableGen/TableGenBackend.cpp b/lib/TableGen/TableGenBackend.cpp
index 77ed8414b15f..e11b28e8cff9 100644
--- a/lib/TableGen/TableGenBackend.cpp
+++ b/lib/TableGen/TableGenBackend.cpp
@@ -1,9 +1,8 @@
//===- TableGenBackend.cpp - Utilities for TableGen Backends ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AArch64/AArch64.h b/lib/Target/AArch64/AArch64.h
index c36d9354f3ba..6965403a25ab 100644
--- a/lib/Target/AArch64/AArch64.h
+++ b/lib/Target/AArch64/AArch64.h
@@ -1,9 +1,8 @@
//==-- AArch64.h - Top-level interface for AArch64 --------------*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -57,6 +56,7 @@ InstructionSelector *
createAArch64InstructionSelector(const AArch64TargetMachine &,
AArch64Subtarget &, AArch64RegisterBankInfo &);
FunctionPass *createAArch64PreLegalizeCombiner();
+FunctionPass *createAArch64StackTaggingPass();
void initializeAArch64A53Fix835769Pass(PassRegistry&);
void initializeAArch64A57FPLoadBalancingPass(PassRegistry&);
@@ -79,6 +79,7 @@ void initializeAArch64StorePairSuppressPass(PassRegistry&);
void initializeFalkorHWPFFixPass(PassRegistry&);
void initializeFalkorMarkStridedAccessesLegacyPass(PassRegistry&);
void initializeLDTLSCleanupPass(PassRegistry&);
+void initializeAArch64StackTaggingPass(PassRegistry&);
} // end namespace llvm
#endif
diff --git a/lib/Target/AArch64/AArch64.td b/lib/Target/AArch64/AArch64.td
index 8f79140cba64..e39c6995e367 100644
--- a/lib/Target/AArch64/AArch64.td
+++ b/lib/Target/AArch64/AArch64.td
@@ -1,9 +1,8 @@
//=- AArch64.td - Describe the AArch64 Target Machine --------*- tablegen -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -104,6 +103,21 @@ def FeatureCCPP : SubtargetFeature<"ccpp", "HasCCPP",
def FeatureSVE : SubtargetFeature<"sve", "HasSVE", "true",
"Enable Scalable Vector Extension (SVE) instructions">;
+def FeatureSVE2 : SubtargetFeature<"sve2", "HasSVE2", "true",
+ "Enable Scalable Vector Extension 2 (SVE2) instructions", [FeatureSVE]>;
+
+def FeatureSVE2AES : SubtargetFeature<"sve2-aes", "HasSVE2AES", "true",
+ "Enable AES SVE2 instructions", [FeatureSVE2, FeatureAES]>;
+
+def FeatureSVE2SM4 : SubtargetFeature<"sve2-sm4", "HasSVE2SM4", "true",
+ "Enable SM4 SVE2 instructions", [FeatureSVE2, FeatureSM4]>;
+
+def FeatureSVE2SHA3 : SubtargetFeature<"sve2-sha3", "HasSVE2SHA3", "true",
+ "Enable SHA3 SVE2 instructions", [FeatureSVE2, FeatureSHA3]>;
+
+def FeatureSVE2BitPerm : SubtargetFeature<"bitperm", "HasSVE2BitPerm", "true",
+ "Enable bit permutation SVE2 instructions", [FeatureSVE2]>;
+
def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true",
"Has zero-cycle register moves">;
def FeatureZCZeroingGP : SubtargetFeature<"zcz-gp", "HasZeroCycleZeroingGP", "true",
@@ -127,7 +141,7 @@ def FeatureStrictAlign : SubtargetFeature<"strict-align",
"Disallow all unaligned memory "
"access">;
-foreach i = {1-7,18,20} in
+foreach i = {1-7,9-15,18,20-28} in
def FeatureReserveX#i : SubtargetFeature<"reserve-x"#i, "ReserveXRegister["#i#"]", "true",
"Reserve X"#i#", making it unavailable "
"as a GPR">;
@@ -386,8 +400,28 @@ def AArch64InstrInfo : InstrInfo;
include "AArch64SystemOperands.td"
//===----------------------------------------------------------------------===//
+// Access to privileged registers
+//===----------------------------------------------------------------------===//
+
+foreach i = 1-3 in
+def FeatureUseEL#i#ForTP : SubtargetFeature<"tpidr-el"#i, "UseEL"#i#"ForTP",
+ "true", "Permit use of TPIDR_EL"#i#" for the TLS base">;
+
+//===----------------------------------------------------------------------===//
// AArch64 Processors supported.
//
+
+//===----------------------------------------------------------------------===//
+// Unsupported features to disable for scheduling models
+//===----------------------------------------------------------------------===//
+
+class AArch64Unsupported { list<Predicate> F; }
+
+def SVEUnsupported : AArch64Unsupported {
+ let F = [HasSVE, HasSVE2, HasSVE2AES, HasSVE2SM4, HasSVE2SHA3,
+ HasSVE2BitPerm];
+}
+
include "AArch64SchedA53.td"
include "AArch64SchedA57.td"
include "AArch64SchedCyclone.td"
@@ -483,6 +517,18 @@ def ProcA75 : SubtargetFeature<"a75", "ARMProcFamily", "CortexA75",
FeaturePerfMon
]>;
+def ProcA76 : SubtargetFeature<"a76", "ARMProcFamily", "CortexA76",
+ "Cortex-A76 ARM processors", [
+ HasV8_2aOps,
+ FeatureFPARMv8,
+ FeatureNEON,
+ FeatureRCPC,
+ FeatureCrypto,
+ FeatureFullFP16,
+ FeatureDotProd,
+ FeatureSSBS
+ ]>;
+
// Note that cyclone does not fuse AES instructions, but newer apple chips do
// perform the fusion and cyclone is used by default when targetting apple OSes.
def ProcCyclone : SubtargetFeature<"cyclone", "ARMProcFamily", "Cyclone",
@@ -554,7 +600,7 @@ def ProcExynosM4 : SubtargetFeature<"exynosm4", "ARMProcFamily", "ExynosM3",
FeatureDotProd,
FeatureExynosCheapAsMoveHandling,
FeatureForce32BitJumpTables,
- FeatureFP16FML,
+ FeatureFullFP16,
FeatureFuseAddress,
FeatureFuseAES,
FeatureFuseArithmeticLogic,
@@ -694,15 +740,17 @@ def : ProcessorModel<"cortex-a35", CortexA53Model, [ProcA35]>;
def : ProcessorModel<"cortex-a53", CortexA53Model, [ProcA53]>;
def : ProcessorModel<"cortex-a55", CortexA53Model, [ProcA55]>;
def : ProcessorModel<"cortex-a57", CortexA57Model, [ProcA57]>;
-// FIXME: Cortex-A72, Cortex-A73 and Cortex-A75 are currently modeled as a Cortex-A57.
def : ProcessorModel<"cortex-a72", CortexA57Model, [ProcA72]>;
def : ProcessorModel<"cortex-a73", CortexA57Model, [ProcA73]>;
def : ProcessorModel<"cortex-a75", CortexA57Model, [ProcA75]>;
+def : ProcessorModel<"cortex-a76", CortexA57Model, [ProcA76]>;
+def : ProcessorModel<"cortex-a76ae", CortexA57Model, [ProcA76]>;
def : ProcessorModel<"cyclone", CycloneModel, [ProcCyclone]>;
def : ProcessorModel<"exynos-m1", ExynosM1Model, [ProcExynosM1]>;
def : ProcessorModel<"exynos-m2", ExynosM1Model, [ProcExynosM2]>;
def : ProcessorModel<"exynos-m3", ExynosM3Model, [ProcExynosM3]>;
def : ProcessorModel<"exynos-m4", ExynosM4Model, [ProcExynosM4]>;
+def : ProcessorModel<"exynos-m5", ExynosM4Model, [ProcExynosM4]>;
def : ProcessorModel<"falkor", FalkorModel, [ProcFalkor]>;
def : ProcessorModel<"saphira", FalkorModel, [ProcSaphira]>;
def : ProcessorModel<"kryo", KryoModel, [ProcKryo]>;
@@ -716,6 +764,9 @@ def : ProcessorModel<"thunderx2t99", ThunderX2T99Model, [ProcThunderX2T99]>;
// FIXME: HiSilicon TSV110 is currently modeled as a Cortex-A57.
def : ProcessorModel<"tsv110", CortexA57Model, [ProcTSV110]>;
+// Alias for the latest Apple processor model supported by LLVM.
+def : ProcessorModel<"apple-latest", CycloneModel, [ProcCyclone]>;
+
//===----------------------------------------------------------------------===//
// Assembly parser
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AArch64/AArch64A53Fix835769.cpp b/lib/Target/AArch64/AArch64A53Fix835769.cpp
index 30232afaf024..e80fe2cada09 100644
--- a/lib/Target/AArch64/AArch64A53Fix835769.cpp
+++ b/lib/Target/AArch64/AArch64A53Fix835769.cpp
@@ -1,9 +1,8 @@
//===-- AArch64A53Fix835769.cpp -------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// This pass changes code to work around Cortex-A53 erratum 835769.
diff --git a/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp b/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
index 452fbd3488b0..92c8c4955d50 100644
--- a/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
+++ b/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
@@ -1,9 +1,8 @@
//===-- AArch64A57FPLoadBalancing.cpp - Balance FP ops statically on A57---===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// For best-case performance on Cortex-A57, we should try to use a balanced
diff --git a/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp b/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp
index 22b0c1e3b471..89404463e1f0 100644
--- a/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp
+++ b/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp
@@ -1,9 +1,8 @@
//===-- AArch64AdvSIMDScalar.cpp - Replace dead defs w/ zero reg --===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// When profitable, replace GPR targeting i64 instructions with their
diff --git a/lib/Target/AArch64/AArch64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp
index 0442076992e2..094fbd999523 100644
--- a/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -1,9 +1,8 @@
//===- AArch64AsmPrinter.cpp - AArch64 LLVM assembly writer ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -18,10 +17,12 @@
#include "AArch64RegisterInfo.h"
#include "AArch64Subtarget.h"
#include "AArch64TargetObjectFile.h"
-#include "InstPrinter/AArch64InstPrinter.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
+#include "MCTargetDesc/AArch64InstPrinter.h"
+#include "MCTargetDesc/AArch64MCExpr.h"
#include "MCTargetDesc/AArch64MCTargetDesc.h"
#include "MCTargetDesc/AArch64TargetStreamer.h"
+#include "TargetInfo/AArch64TargetInfo.h"
#include "Utils/AArch64BaseInfo.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
@@ -29,6 +30,7 @@
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/BinaryFormat/COFF.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -44,6 +46,7 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstBuilder.h"
+#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/Casting.h"
@@ -96,6 +99,10 @@ public:
void LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI);
void LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI);
+ std::map<std::pair<unsigned, uint32_t>, MCSymbol *> HwasanMemaccessSymbols;
+ void LowerHWASAN_CHECK_MEMACCESS(const MachineInstr &MI);
+ void EmitHwasanMemaccessSymbols(Module &M);
+
void EmitSled(const MachineInstr &MI, SledKind Kind);
/// tblgen'erated driver function for lowering simple MI->MC
@@ -147,11 +154,9 @@ private:
raw_ostream &O);
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
- unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &O) override;
+ const char *ExtraCode, raw_ostream &O) override;
bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
- unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &O) override;
+ const char *ExtraCode, raw_ostream &O) override;
void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
@@ -230,7 +235,204 @@ void AArch64AsmPrinter::EmitSled(const MachineInstr &MI, SledKind Kind)
recordSled(CurSled, MI, Kind);
}
+void AArch64AsmPrinter::LowerHWASAN_CHECK_MEMACCESS(const MachineInstr &MI) {
+ unsigned Reg = MI.getOperand(0).getReg();
+ uint32_t AccessInfo = MI.getOperand(1).getImm();
+ MCSymbol *&Sym = HwasanMemaccessSymbols[{Reg, AccessInfo}];
+ if (!Sym) {
+ // FIXME: Make this work on non-ELF.
+ if (!TM.getTargetTriple().isOSBinFormatELF())
+ report_fatal_error("llvm.hwasan.check.memaccess only supported on ELF");
+
+ std::string SymName = "__hwasan_check_x" + utostr(Reg - AArch64::X0) + "_" +
+ utostr(AccessInfo);
+ Sym = OutContext.getOrCreateSymbol(SymName);
+ }
+
+ EmitToStreamer(*OutStreamer,
+ MCInstBuilder(AArch64::BL)
+ .addExpr(MCSymbolRefExpr::create(Sym, OutContext)));
+}
+
+void AArch64AsmPrinter::EmitHwasanMemaccessSymbols(Module &M) {
+ if (HwasanMemaccessSymbols.empty())
+ return;
+
+ const Triple &TT = TM.getTargetTriple();
+ assert(TT.isOSBinFormatELF());
+ std::unique_ptr<MCSubtargetInfo> STI(
+ TM.getTarget().createMCSubtargetInfo(TT.str(), "", ""));
+
+ MCSymbol *HwasanTagMismatchSym =
+ OutContext.getOrCreateSymbol("__hwasan_tag_mismatch");
+
+ const MCSymbolRefExpr *HwasanTagMismatchRef =
+ MCSymbolRefExpr::create(HwasanTagMismatchSym, OutContext);
+
+ for (auto &P : HwasanMemaccessSymbols) {
+ unsigned Reg = P.first.first;
+ uint32_t AccessInfo = P.first.second;
+ MCSymbol *Sym = P.second;
+
+ OutStreamer->SwitchSection(OutContext.getELFSection(
+ ".text.hot", ELF::SHT_PROGBITS,
+ ELF::SHF_EXECINSTR | ELF::SHF_ALLOC | ELF::SHF_GROUP, 0,
+ Sym->getName()));
+
+ OutStreamer->EmitSymbolAttribute(Sym, MCSA_ELF_TypeFunction);
+ OutStreamer->EmitSymbolAttribute(Sym, MCSA_Weak);
+ OutStreamer->EmitSymbolAttribute(Sym, MCSA_Hidden);
+ OutStreamer->EmitLabel(Sym);
+
+ OutStreamer->EmitInstruction(MCInstBuilder(AArch64::UBFMXri)
+ .addReg(AArch64::X16)
+ .addReg(Reg)
+ .addImm(4)
+ .addImm(55),
+ *STI);
+ OutStreamer->EmitInstruction(MCInstBuilder(AArch64::LDRBBroX)
+ .addReg(AArch64::W16)
+ .addReg(AArch64::X9)
+ .addReg(AArch64::X16)
+ .addImm(0)
+ .addImm(0),
+ *STI);
+ OutStreamer->EmitInstruction(
+ MCInstBuilder(AArch64::SUBSXrs)
+ .addReg(AArch64::XZR)
+ .addReg(AArch64::X16)
+ .addReg(Reg)
+ .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSR, 56)),
+ *STI);
+ MCSymbol *HandlePartialSym = OutContext.createTempSymbol();
+ OutStreamer->EmitInstruction(
+ MCInstBuilder(AArch64::Bcc)
+ .addImm(AArch64CC::NE)
+ .addExpr(MCSymbolRefExpr::create(HandlePartialSym, OutContext)),
+ *STI);
+ MCSymbol *ReturnSym = OutContext.createTempSymbol();
+ OutStreamer->EmitLabel(ReturnSym);
+ OutStreamer->EmitInstruction(
+ MCInstBuilder(AArch64::RET).addReg(AArch64::LR), *STI);
+
+ OutStreamer->EmitLabel(HandlePartialSym);
+ OutStreamer->EmitInstruction(MCInstBuilder(AArch64::SUBSWri)
+ .addReg(AArch64::WZR)
+ .addReg(AArch64::W16)
+ .addImm(15)
+ .addImm(0),
+ *STI);
+ MCSymbol *HandleMismatchSym = OutContext.createTempSymbol();
+ OutStreamer->EmitInstruction(
+ MCInstBuilder(AArch64::Bcc)
+ .addImm(AArch64CC::HI)
+ .addExpr(MCSymbolRefExpr::create(HandleMismatchSym, OutContext)),
+ *STI);
+
+ OutStreamer->EmitInstruction(
+ MCInstBuilder(AArch64::ANDXri)
+ .addReg(AArch64::X17)
+ .addReg(Reg)
+ .addImm(AArch64_AM::encodeLogicalImmediate(0xf, 64)),
+ *STI);
+ unsigned Size = 1 << (AccessInfo & 0xf);
+ if (Size != 1)
+ OutStreamer->EmitInstruction(MCInstBuilder(AArch64::ADDXri)
+ .addReg(AArch64::X17)
+ .addReg(AArch64::X17)
+ .addImm(Size - 1)
+ .addImm(0),
+ *STI);
+ OutStreamer->EmitInstruction(MCInstBuilder(AArch64::SUBSWrs)
+ .addReg(AArch64::WZR)
+ .addReg(AArch64::W16)
+ .addReg(AArch64::W17)
+ .addImm(0),
+ *STI);
+ OutStreamer->EmitInstruction(
+ MCInstBuilder(AArch64::Bcc)
+ .addImm(AArch64CC::LS)
+ .addExpr(MCSymbolRefExpr::create(HandleMismatchSym, OutContext)),
+ *STI);
+
+ OutStreamer->EmitInstruction(
+ MCInstBuilder(AArch64::ORRXri)
+ .addReg(AArch64::X16)
+ .addReg(Reg)
+ .addImm(AArch64_AM::encodeLogicalImmediate(0xf, 64)),
+ *STI);
+ OutStreamer->EmitInstruction(MCInstBuilder(AArch64::LDRBBui)
+ .addReg(AArch64::W16)
+ .addReg(AArch64::X16)
+ .addImm(0),
+ *STI);
+ OutStreamer->EmitInstruction(
+ MCInstBuilder(AArch64::SUBSXrs)
+ .addReg(AArch64::XZR)
+ .addReg(AArch64::X16)
+ .addReg(Reg)
+ .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSR, 56)),
+ *STI);
+ OutStreamer->EmitInstruction(
+ MCInstBuilder(AArch64::Bcc)
+ .addImm(AArch64CC::EQ)
+ .addExpr(MCSymbolRefExpr::create(ReturnSym, OutContext)),
+ *STI);
+
+ OutStreamer->EmitLabel(HandleMismatchSym);
+ OutStreamer->EmitInstruction(MCInstBuilder(AArch64::STPXpre)
+ .addReg(AArch64::SP)
+ .addReg(AArch64::X0)
+ .addReg(AArch64::X1)
+ .addReg(AArch64::SP)
+ .addImm(-32),
+ *STI);
+ OutStreamer->EmitInstruction(MCInstBuilder(AArch64::STPXi)
+ .addReg(AArch64::FP)
+ .addReg(AArch64::LR)
+ .addReg(AArch64::SP)
+ .addImm(29),
+ *STI);
+
+ if (Reg != AArch64::X0)
+ OutStreamer->EmitInstruction(MCInstBuilder(AArch64::ORRXrs)
+ .addReg(AArch64::X0)
+ .addReg(AArch64::XZR)
+ .addReg(Reg)
+ .addImm(0),
+ *STI);
+ OutStreamer->EmitInstruction(MCInstBuilder(AArch64::MOVZXi)
+ .addReg(AArch64::X1)
+ .addImm(AccessInfo)
+ .addImm(0),
+ *STI);
+
+ // Intentionally load the GOT entry and branch to it, rather than possibly
+ // late binding the function, which may clobber the registers before we have
+ // a chance to save them.
+ OutStreamer->EmitInstruction(
+ MCInstBuilder(AArch64::ADRP)
+ .addReg(AArch64::X16)
+ .addExpr(AArch64MCExpr::create(
+ HwasanTagMismatchRef,
+ AArch64MCExpr::VariantKind::VK_GOT_PAGE, OutContext)),
+ *STI);
+ OutStreamer->EmitInstruction(
+ MCInstBuilder(AArch64::LDRXui)
+ .addReg(AArch64::X16)
+ .addReg(AArch64::X16)
+ .addExpr(AArch64MCExpr::create(
+ HwasanTagMismatchRef,
+ AArch64MCExpr::VariantKind::VK_GOT_LO12, OutContext)),
+ *STI);
+ OutStreamer->EmitInstruction(
+ MCInstBuilder(AArch64::BR).addReg(AArch64::X16), *STI);
+ }
+}
+
void AArch64AsmPrinter::EmitEndOfAsmFile(Module &M) {
+ EmitHwasanMemaccessSymbols(M);
+
const Triple &TT = TM.getTargetTriple();
if (TT.isOSBinFormatMachO()) {
// Funny Darwin hack: This flag tells the linker that no global symbols
@@ -295,14 +497,7 @@ void AArch64AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNum,
break;
}
case MachineOperand::MO_GlobalAddress: {
- const GlobalValue *GV = MO.getGlobal();
- MCSymbol *Sym = getSymbol(GV);
-
- // FIXME: Can we get anything other than a plain symbol here?
- assert(!MO.getTargetFlags() && "Unknown operand target flag!");
-
- Sym->print(O, MAI);
- printOffset(MO.getOffset(), O);
+ PrintSymbolOperand(MO, O);
break;
}
case MachineOperand::MO_BlockAddress: {
@@ -348,12 +543,11 @@ bool AArch64AsmPrinter::printAsmRegInClass(const MachineOperand &MO,
}
bool AArch64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
- unsigned AsmVariant,
const char *ExtraCode, raw_ostream &O) {
const MachineOperand &MO = MI->getOperand(OpNum);
// First try the generic code, which knows about modifiers like 'c' and 'n'.
- if (!AsmPrinter::PrintAsmOperand(MI, OpNum, AsmVariant, ExtraCode, O))
+ if (!AsmPrinter::PrintAsmOperand(MI, OpNum, ExtraCode, O))
return false;
// Does this asm operand have a single letter operand modifier?
@@ -364,9 +558,6 @@ bool AArch64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
switch (ExtraCode[0]) {
default:
return true; // Unknown modifier.
- case 'a': // Print 'a' modifier
- PrintAsmMemoryOperand(MI, OpNum, AsmVariant, ExtraCode, O);
- return false;
case 'w': // Print W register
case 'x': // Print X register
if (MO.isReg())
@@ -432,7 +623,6 @@ bool AArch64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
bool AArch64AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
unsigned OpNum,
- unsigned AsmVariant,
const char *ExtraCode,
raw_ostream &O) {
if (ExtraCode && ExtraCode[0] && ExtraCode[0] != 'a')
@@ -471,9 +661,18 @@ void AArch64AsmPrinter::EmitJumpTableInfo() {
const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
if (JT.empty()) return;
+ const Function &F = MF->getFunction();
const TargetLoweringObjectFile &TLOF = getObjFileLowering();
- MCSection *ReadOnlySec = TLOF.getSectionForJumpTable(MF->getFunction(), TM);
- OutStreamer->SwitchSection(ReadOnlySec);
+ bool JTInDiffSection =
+ !STI->isTargetCOFF() ||
+ !TLOF.shouldPutJumpTableInFunctionSection(
+ MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32,
+ F);
+ if (JTInDiffSection) {
+ // Drop it in the readonly section.
+ MCSection *ReadOnlySec = TLOF.getSectionForJumpTable(F, TM);
+ OutStreamer->SwitchSection(ReadOnlySec);
+ }
auto AFI = MF->getInfo<AArch64FunctionInfo>();
for (unsigned JTI = 0, e = JT.size(); JTI != e; ++JTI) {
@@ -694,6 +893,34 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
switch (MI->getOpcode()) {
default:
break;
+ case AArch64::MOVMCSym: {
+ unsigned DestReg = MI->getOperand(0).getReg();
+ const MachineOperand &MO_Sym = MI->getOperand(1);
+ MachineOperand Hi_MOSym(MO_Sym), Lo_MOSym(MO_Sym);
+ MCOperand Hi_MCSym, Lo_MCSym;
+
+ Hi_MOSym.setTargetFlags(AArch64II::MO_G1 | AArch64II::MO_S);
+ Lo_MOSym.setTargetFlags(AArch64II::MO_G0 | AArch64II::MO_NC);
+
+ MCInstLowering.lowerOperand(Hi_MOSym, Hi_MCSym);
+ MCInstLowering.lowerOperand(Lo_MOSym, Lo_MCSym);
+
+ MCInst MovZ;
+ MovZ.setOpcode(AArch64::MOVZXi);
+ MovZ.addOperand(MCOperand::createReg(DestReg));
+ MovZ.addOperand(Hi_MCSym);
+ MovZ.addOperand(MCOperand::createImm(16));
+ EmitToStreamer(*OutStreamer, MovZ);
+
+ MCInst MovK;
+ MovK.setOpcode(AArch64::MOVKXi);
+ MovK.addOperand(MCOperand::createReg(DestReg));
+ MovK.addOperand(MCOperand::createReg(DestReg));
+ MovK.addOperand(Lo_MCSym);
+ MovK.addOperand(MCOperand::createImm(0));
+ EmitToStreamer(*OutStreamer, MovK);
+ return;
+ }
case AArch64::MOVIv2d_ns:
// If the target has <rdar://problem/16473581>, lower this
// instruction to movi.16b instead.
@@ -856,6 +1083,10 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
LowerPATCHABLE_TAIL_CALL(*MI);
return;
+ case AArch64::HWASAN_CHECK_MEMACCESS:
+ LowerHWASAN_CHECK_MEMACCESS(*MI);
+ return;
+
case AArch64::SEH_StackAlloc:
TS->EmitARM64WinCFIAllocStack(MI->getOperand(0).getImm());
return;
diff --git a/lib/Target/AArch64/AArch64BranchTargets.cpp b/lib/Target/AArch64/AArch64BranchTargets.cpp
index da70a624c5be..6fa3a462bc71 100644
--- a/lib/Target/AArch64/AArch64BranchTargets.cpp
+++ b/lib/Target/AArch64/AArch64BranchTargets.cpp
@@ -1,9 +1,8 @@
//===-- AArch64BranchTargets.cpp -- Harden code using v8.5-A BTI extension -==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AArch64/AArch64CallLowering.cpp b/lib/Target/AArch64/AArch64CallLowering.cpp
index 5980e5684e89..59757769c89a 100644
--- a/lib/Target/AArch64/AArch64CallLowering.cpp
+++ b/lib/Target/AArch64/AArch64CallLowering.cpp
@@ -1,9 +1,8 @@
//===--- AArch64CallLowering.cpp - Call lowering --------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -45,6 +44,8 @@
#include <cstdint>
#include <iterator>
+#define DEBUG_TYPE "aarch64-call-lowering"
+
using namespace llvm;
AArch64CallLowering::AArch64CallLowering(const AArch64TargetLowering &TLI)
@@ -56,18 +57,18 @@ struct IncomingArgHandler : public CallLowering::ValueHandler {
CCAssignFn *AssignFn)
: ValueHandler(MIRBuilder, MRI, AssignFn), StackUsed(0) {}
- unsigned getStackAddress(uint64_t Size, int64_t Offset,
+ Register getStackAddress(uint64_t Size, int64_t Offset,
MachinePointerInfo &MPO) override {
auto &MFI = MIRBuilder.getMF().getFrameInfo();
int FI = MFI.CreateFixedObject(Size, Offset, true);
MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI);
- unsigned AddrReg = MRI.createGenericVirtualRegister(LLT::pointer(0, 64));
+ Register AddrReg = MRI.createGenericVirtualRegister(LLT::pointer(0, 64));
MIRBuilder.buildFrameIndex(AddrReg, FI);
StackUsed = std::max(StackUsed, Size + Offset);
return AddrReg;
}
- void assignValueToReg(unsigned ValVReg, unsigned PhysReg,
+ void assignValueToReg(Register ValVReg, Register PhysReg,
CCValAssign &VA) override {
markPhysRegUsed(PhysReg);
switch (VA.getLocInfo()) {
@@ -84,11 +85,12 @@ struct IncomingArgHandler : public CallLowering::ValueHandler {
}
}
- void assignValueToAddress(unsigned ValVReg, unsigned Addr, uint64_t Size,
+ void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size,
MachinePointerInfo &MPO, CCValAssign &VA) override {
+ // FIXME: Get alignment
auto MMO = MIRBuilder.getMF().getMachineMemOperand(
MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, Size,
- 0);
+ 1);
MIRBuilder.buildLoad(ValVReg, Addr, *MMO);
}
@@ -97,6 +99,8 @@ struct IncomingArgHandler : public CallLowering::ValueHandler {
/// (it's an implicit-def of the BL).
virtual void markPhysRegUsed(unsigned PhysReg) = 0;
+ bool isArgumentHandler() const override { return true; }
+
uint64_t StackUsed;
};
@@ -129,31 +133,31 @@ struct OutgoingArgHandler : public CallLowering::ValueHandler {
: ValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB),
AssignFnVarArg(AssignFnVarArg), StackSize(0) {}
- unsigned getStackAddress(uint64_t Size, int64_t Offset,
+ Register getStackAddress(uint64_t Size, int64_t Offset,
MachinePointerInfo &MPO) override {
LLT p0 = LLT::pointer(0, 64);
LLT s64 = LLT::scalar(64);
- unsigned SPReg = MRI.createGenericVirtualRegister(p0);
- MIRBuilder.buildCopy(SPReg, AArch64::SP);
+ Register SPReg = MRI.createGenericVirtualRegister(p0);
+ MIRBuilder.buildCopy(SPReg, Register(AArch64::SP));
- unsigned OffsetReg = MRI.createGenericVirtualRegister(s64);
+ Register OffsetReg = MRI.createGenericVirtualRegister(s64);
MIRBuilder.buildConstant(OffsetReg, Offset);
- unsigned AddrReg = MRI.createGenericVirtualRegister(p0);
+ Register AddrReg = MRI.createGenericVirtualRegister(p0);
MIRBuilder.buildGEP(AddrReg, SPReg, OffsetReg);
MPO = MachinePointerInfo::getStack(MIRBuilder.getMF(), Offset);
return AddrReg;
}
- void assignValueToReg(unsigned ValVReg, unsigned PhysReg,
+ void assignValueToReg(Register ValVReg, Register PhysReg,
CCValAssign &VA) override {
MIB.addUse(PhysReg, RegState::Implicit);
- unsigned ExtReg = extendRegister(ValVReg, VA);
+ Register ExtReg = extendRegister(ValVReg, VA);
MIRBuilder.buildCopy(PhysReg, ExtReg);
}
- void assignValueToAddress(unsigned ValVReg, unsigned Addr, uint64_t Size,
+ void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size,
MachinePointerInfo &MPO, CCValAssign &VA) override {
if (VA.getLocInfo() == CCValAssign::LocInfo::AExt) {
Size = VA.getLocVT().getSizeInBits() / 8;
@@ -162,7 +166,7 @@ struct OutgoingArgHandler : public CallLowering::ValueHandler {
.getReg();
}
auto MMO = MIRBuilder.getMF().getMachineMemOperand(
- MPO, MachineMemOperand::MOStore, Size, 0);
+ MPO, MachineMemOperand::MOStore, Size, 1);
MIRBuilder.buildStore(ValVReg, Addr, *MMO);
}
@@ -188,8 +192,7 @@ struct OutgoingArgHandler : public CallLowering::ValueHandler {
void AArch64CallLowering::splitToValueTypes(
const ArgInfo &OrigArg, SmallVectorImpl<ArgInfo> &SplitArgs,
- const DataLayout &DL, MachineRegisterInfo &MRI, CallingConv::ID CallConv,
- const SplitArgTy &PerformArgSplit) const {
+ const DataLayout &DL, MachineRegisterInfo &MRI, CallingConv::ID CallConv) const {
const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
LLVMContext &Ctx = OrigArg.Ty->getContext();
@@ -203,32 +206,31 @@ void AArch64CallLowering::splitToValueTypes(
if (SplitVTs.size() == 1) {
// No splitting to do, but we want to replace the original type (e.g. [1 x
// double] -> double).
- SplitArgs.emplace_back(OrigArg.Reg, SplitVTs[0].getTypeForEVT(Ctx),
+ SplitArgs.emplace_back(OrigArg.Regs[0], SplitVTs[0].getTypeForEVT(Ctx),
OrigArg.Flags, OrigArg.IsFixed);
return;
}
- unsigned FirstRegIdx = SplitArgs.size();
+ // Create one ArgInfo for each virtual register in the original ArgInfo.
+ assert(OrigArg.Regs.size() == SplitVTs.size() && "Regs / types mismatch");
+
bool NeedsRegBlock = TLI.functionArgumentNeedsConsecutiveRegisters(
OrigArg.Ty, CallConv, false);
- for (auto SplitVT : SplitVTs) {
- Type *SplitTy = SplitVT.getTypeForEVT(Ctx);
- SplitArgs.push_back(
- ArgInfo{MRI.createGenericVirtualRegister(getLLTForType(*SplitTy, DL)),
- SplitTy, OrigArg.Flags, OrigArg.IsFixed});
+ for (unsigned i = 0, e = SplitVTs.size(); i < e; ++i) {
+ Type *SplitTy = SplitVTs[i].getTypeForEVT(Ctx);
+ SplitArgs.emplace_back(OrigArg.Regs[i], SplitTy, OrigArg.Flags,
+ OrigArg.IsFixed);
if (NeedsRegBlock)
SplitArgs.back().Flags.setInConsecutiveRegs();
}
SplitArgs.back().Flags.setInConsecutiveRegsLast();
-
- for (unsigned i = 0; i < Offsets.size(); ++i)
- PerformArgSplit(SplitArgs[FirstRegIdx + i].Reg, Offsets[i] * 8);
}
bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
const Value *Val,
- ArrayRef<unsigned> VRegs) const {
+ ArrayRef<Register> VRegs,
+ Register SwiftErrorVReg) const {
auto MIB = MIRBuilder.buildInstrNoInsert(AArch64::RET_ReallyLR);
assert(((Val && !VRegs.empty()) || (!Val && VRegs.empty())) &&
"Return value without a vreg");
@@ -250,34 +252,101 @@ bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
"For each split Type there should be exactly one VReg.");
SmallVector<ArgInfo, 8> SplitArgs;
+ CallingConv::ID CC = F.getCallingConv();
+
for (unsigned i = 0; i < SplitEVTs.size(); ++i) {
- // We zero-extend i1s to i8.
- unsigned CurVReg = VRegs[i];
- if (MRI.getType(VRegs[i]).getSizeInBits() == 1) {
- CurVReg = MIRBuilder.buildZExt(LLT::scalar(8), CurVReg)
- ->getOperand(0)
- .getReg();
+ if (TLI.getNumRegistersForCallingConv(Ctx, CC, SplitEVTs[i]) > 1) {
+ LLVM_DEBUG(dbgs() << "Can't handle extended arg types which need split");
+ return false;
}
+ Register CurVReg = VRegs[i];
ArgInfo CurArgInfo = ArgInfo{CurVReg, SplitEVTs[i].getTypeForEVT(Ctx)};
setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F);
- splitToValueTypes(CurArgInfo, SplitArgs, DL, MRI, F.getCallingConv(),
- [&](unsigned Reg, uint64_t Offset) {
- MIRBuilder.buildExtract(Reg, CurVReg, Offset);
- });
+
+ // i1 is a special case because SDAG i1 true is naturally zero extended
+ // when widened using ANYEXT. We need to do it explicitly here.
+ if (MRI.getType(CurVReg).getSizeInBits() == 1) {
+ CurVReg = MIRBuilder.buildZExt(LLT::scalar(8), CurVReg).getReg(0);
+ } else {
+ // Some types will need extending as specified by the CC.
+ MVT NewVT = TLI.getRegisterTypeForCallingConv(Ctx, CC, SplitEVTs[i]);
+ if (EVT(NewVT) != SplitEVTs[i]) {
+ unsigned ExtendOp = TargetOpcode::G_ANYEXT;
+ if (F.getAttributes().hasAttribute(AttributeList::ReturnIndex,
+ Attribute::SExt))
+ ExtendOp = TargetOpcode::G_SEXT;
+ else if (F.getAttributes().hasAttribute(AttributeList::ReturnIndex,
+ Attribute::ZExt))
+ ExtendOp = TargetOpcode::G_ZEXT;
+
+ LLT NewLLT(NewVT);
+ LLT OldLLT(MVT::getVT(CurArgInfo.Ty));
+ CurArgInfo.Ty = EVT(NewVT).getTypeForEVT(Ctx);
+ // Instead of an extend, we might have a vector type which needs
+ // padding with more elements, e.g. <2 x half> -> <4 x half>.
+ if (NewVT.isVector()) {
+ if (OldLLT.isVector()) {
+ if (NewLLT.getNumElements() > OldLLT.getNumElements()) {
+ // We don't handle VA types which are not exactly twice the
+ // size, but can easily be done in future.
+ if (NewLLT.getNumElements() != OldLLT.getNumElements() * 2) {
+ LLVM_DEBUG(dbgs() << "Outgoing vector ret has too many elts");
+ return false;
+ }
+ auto Undef = MIRBuilder.buildUndef({OldLLT});
+ CurVReg =
+ MIRBuilder.buildMerge({NewLLT}, {CurVReg, Undef.getReg(0)})
+ .getReg(0);
+ } else {
+ // Just do a vector extend.
+ CurVReg = MIRBuilder.buildInstr(ExtendOp, {NewLLT}, {CurVReg})
+ .getReg(0);
+ }
+ } else if (NewLLT.getNumElements() == 2) {
+ // We need to pad a <1 x S> type to <2 x S>. Since we don't have
+ // <1 x S> vector types in GISel we use a build_vector instead
+ // of a vector merge/concat.
+ auto Undef = MIRBuilder.buildUndef({OldLLT});
+ CurVReg =
+ MIRBuilder
+ .buildBuildVector({NewLLT}, {CurVReg, Undef.getReg(0)})
+ .getReg(0);
+ } else {
+ LLVM_DEBUG(dbgs() << "Could not handle ret ty");
+ return false;
+ }
+ } else {
+ // A scalar extend.
+ CurVReg =
+ MIRBuilder.buildInstr(ExtendOp, {NewLLT}, {CurVReg}).getReg(0);
+ }
+ }
+ }
+ if (CurVReg != CurArgInfo.Regs[0]) {
+ CurArgInfo.Regs[0] = CurVReg;
+ // Reset the arg flags after modifying CurVReg.
+ setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F);
+ }
+ splitToValueTypes(CurArgInfo, SplitArgs, DL, MRI, CC);
}
OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, AssignFn, AssignFn);
Success = handleAssignments(MIRBuilder, SplitArgs, Handler);
}
+ if (SwiftErrorVReg) {
+ MIB.addUse(AArch64::X21, RegState::Implicit);
+ MIRBuilder.buildCopy(AArch64::X21, SwiftErrorVReg);
+ }
+
MIRBuilder.insertInstr(MIB);
return Success;
}
-bool AArch64CallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
- const Function &F,
- ArrayRef<unsigned> VRegs) const {
+bool AArch64CallLowering::lowerFormalArguments(
+ MachineIRBuilder &MIRBuilder, const Function &F,
+ ArrayRef<ArrayRef<Register>> VRegs) const {
MachineFunction &MF = MIRBuilder.getMF();
MachineBasicBlock &MBB = MIRBuilder.getMBB();
MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -288,26 +357,11 @@ bool AArch64CallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
for (auto &Arg : F.args()) {
if (DL.getTypeStoreSize(Arg.getType()) == 0)
continue;
+
ArgInfo OrigArg{VRegs[i], Arg.getType()};
setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, F);
- bool Split = false;
- LLT Ty = MRI.getType(VRegs[i]);
- unsigned Dst = VRegs[i];
-
- splitToValueTypes(OrigArg, SplitArgs, DL, MRI, F.getCallingConv(),
- [&](unsigned Reg, uint64_t Offset) {
- if (!Split) {
- Split = true;
- Dst = MRI.createGenericVirtualRegister(Ty);
- MIRBuilder.buildUndef(Dst);
- }
- unsigned Tmp = MRI.createGenericVirtualRegister(Ty);
- MIRBuilder.buildInsert(Tmp, Dst, Reg, Offset);
- Dst = Tmp;
- });
-
- if (Dst != VRegs[i])
- MIRBuilder.buildCopy(VRegs[i], Dst);
+
+ splitToValueTypes(OrigArg, SplitArgs, DL, MRI, F.getCallingConv());
++i;
}
@@ -351,7 +405,8 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
CallingConv::ID CallConv,
const MachineOperand &Callee,
const ArgInfo &OrigRet,
- ArrayRef<ArgInfo> OrigArgs) const {
+ ArrayRef<ArgInfo> OrigArgs,
+ Register SwiftErrorVReg) const {
MachineFunction &MF = MIRBuilder.getMF();
const Function &F = MF.getFunction();
MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -359,10 +414,10 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
SmallVector<ArgInfo, 8> SplitArgs;
for (auto &OrigArg : OrigArgs) {
- splitToValueTypes(OrigArg, SplitArgs, DL, MRI, CallConv,
- [&](unsigned Reg, uint64_t Offset) {
- MIRBuilder.buildExtract(Reg, OrigArg.Reg, Offset);
- });
+ splitToValueTypes(OrigArg, SplitArgs, DL, MRI, CallConv);
+ // AAPCS requires that we zero-extend i1 to 8 bits by the caller.
+ if (OrigArg.Ty->isIntegerTy(1))
+ SplitArgs.back().Flags.setZExt();
}
// Find out which ABI gets to decide where things go.
@@ -412,23 +467,19 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
// symmetry with the arugments, the physical register must be an
// implicit-define of the call instruction.
CCAssignFn *RetAssignFn = TLI.CCAssignFnForReturn(F.getCallingConv());
- if (OrigRet.Reg) {
+ if (!OrigRet.Ty->isVoidTy()) {
SplitArgs.clear();
- SmallVector<uint64_t, 8> RegOffsets;
- SmallVector<unsigned, 8> SplitRegs;
- splitToValueTypes(OrigRet, SplitArgs, DL, MRI, F.getCallingConv(),
- [&](unsigned Reg, uint64_t Offset) {
- RegOffsets.push_back(Offset);
- SplitRegs.push_back(Reg);
- });
+ splitToValueTypes(OrigRet, SplitArgs, DL, MRI, F.getCallingConv());
CallReturnHandler Handler(MIRBuilder, MRI, MIB, RetAssignFn);
if (!handleAssignments(MIRBuilder, SplitArgs, Handler))
return false;
+ }
- if (!RegOffsets.empty())
- MIRBuilder.buildSequence(OrigRet.Reg, SplitRegs, RegOffsets);
+ if (SwiftErrorVReg) {
+ MIB.addDef(AArch64::X21, RegState::Implicit);
+ MIRBuilder.buildCopy(SwiftErrorVReg, Register(AArch64::X21));
}
CallSeqStart.addImm(Handler.StackSize).addImm(0);
diff --git a/lib/Target/AArch64/AArch64CallLowering.h b/lib/Target/AArch64/AArch64CallLowering.h
index 1c2bd6a4de5d..4f428f254537 100644
--- a/lib/Target/AArch64/AArch64CallLowering.h
+++ b/lib/Target/AArch64/AArch64CallLowering.h
@@ -1,9 +1,8 @@
//===- AArch64CallLowering.h - Call lowering --------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -35,14 +34,24 @@ public:
AArch64CallLowering(const AArch64TargetLowering &TLI);
bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val,
- ArrayRef<unsigned> VRegs) const override;
+ ArrayRef<Register> VRegs,
+ Register SwiftErrorVReg) const override;
bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
- ArrayRef<unsigned> VRegs) const override;
+ ArrayRef<ArrayRef<Register>> VRegs) const override;
+
+ bool lowerCall(MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv,
+ const MachineOperand &Callee, const ArgInfo &OrigRet,
+ ArrayRef<ArgInfo> OrigArgs,
+ Register SwiftErrorVReg) const override;
bool lowerCall(MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv,
const MachineOperand &Callee, const ArgInfo &OrigRet,
- ArrayRef<ArgInfo> OrigArgs) const override;
+ ArrayRef<ArgInfo> OrigArgs) const override {
+ return lowerCall(MIRBuilder, CallConv, Callee, OrigRet, OrigArgs, 0);
+ }
+
+ bool supportSwiftError() const override { return true; }
private:
using RegHandler = std::function<void(MachineIRBuilder &, Type *, unsigned,
@@ -51,13 +60,10 @@ private:
using MemHandler =
std::function<void(MachineIRBuilder &, int, CCValAssign &)>;
- using SplitArgTy = std::function<void(unsigned, uint64_t)>;
-
void splitToValueTypes(const ArgInfo &OrigArgInfo,
SmallVectorImpl<ArgInfo> &SplitArgs,
const DataLayout &DL, MachineRegisterInfo &MRI,
- CallingConv::ID CallConv,
- const SplitArgTy &SplitArg) const;
+ CallingConv::ID CallConv) const;
};
} // end namespace llvm
diff --git a/lib/Target/AArch64/AArch64CallingConvention.cpp b/lib/Target/AArch64/AArch64CallingConvention.cpp
new file mode 100644
index 000000000000..02538a187611
--- /dev/null
+++ b/lib/Target/AArch64/AArch64CallingConvention.cpp
@@ -0,0 +1,134 @@
+//=== AArch64CallingConvention.cpp - AArch64 CC impl ------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the table-generated and custom routines for the AArch64
+// Calling Convention.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64CallingConvention.h"
+#include "AArch64.h"
+#include "AArch64InstrInfo.h"
+#include "AArch64Subtarget.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/IR/CallingConv.h"
+using namespace llvm;
+
+static const MCPhysReg XRegList[] = {AArch64::X0, AArch64::X1, AArch64::X2,
+ AArch64::X3, AArch64::X4, AArch64::X5,
+ AArch64::X6, AArch64::X7};
+static const MCPhysReg HRegList[] = {AArch64::H0, AArch64::H1, AArch64::H2,
+ AArch64::H3, AArch64::H4, AArch64::H5,
+ AArch64::H6, AArch64::H7};
+static const MCPhysReg SRegList[] = {AArch64::S0, AArch64::S1, AArch64::S2,
+ AArch64::S3, AArch64::S4, AArch64::S5,
+ AArch64::S6, AArch64::S7};
+static const MCPhysReg DRegList[] = {AArch64::D0, AArch64::D1, AArch64::D2,
+ AArch64::D3, AArch64::D4, AArch64::D5,
+ AArch64::D6, AArch64::D7};
+static const MCPhysReg QRegList[] = {AArch64::Q0, AArch64::Q1, AArch64::Q2,
+ AArch64::Q3, AArch64::Q4, AArch64::Q5,
+ AArch64::Q6, AArch64::Q7};
+
+static bool finishStackBlock(SmallVectorImpl<CCValAssign> &PendingMembers,
+ MVT LocVT, ISD::ArgFlagsTy &ArgFlags,
+ CCState &State, unsigned SlotAlign) {
+ unsigned Size = LocVT.getSizeInBits() / 8;
+ unsigned StackAlign =
+ State.getMachineFunction().getDataLayout().getStackAlignment();
+ unsigned Align = std::min(ArgFlags.getOrigAlign(), StackAlign);
+
+ for (auto &It : PendingMembers) {
+ It.convertToMem(State.AllocateStack(Size, std::max(Align, SlotAlign)));
+ State.addLoc(It);
+ SlotAlign = 1;
+ }
+
+ // All pending members have now been allocated
+ PendingMembers.clear();
+ return true;
+}
+
+/// The Darwin variadic PCS places anonymous arguments in 8-byte stack slots. An
+/// [N x Ty] type must still be contiguous in memory though.
+static bool CC_AArch64_Custom_Stack_Block(
+ unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+ SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
+
+ // Add the argument to the list to be allocated once we know the size of the
+ // block.
+ PendingMembers.push_back(
+ CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
+
+ if (!ArgFlags.isInConsecutiveRegsLast())
+ return true;
+
+ return finishStackBlock(PendingMembers, LocVT, ArgFlags, State, 8);
+}
+
+/// Given an [N x Ty] block, it should be passed in a consecutive sequence of
+/// registers. If no such sequence is available, mark the rest of the registers
+/// of that type as used and place the argument on the stack.
+static bool CC_AArch64_Custom_Block(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+ // Try to allocate a contiguous block of registers, each of the correct
+ // size to hold one member.
+ ArrayRef<MCPhysReg> RegList;
+ if (LocVT.SimpleTy == MVT::i64)
+ RegList = XRegList;
+ else if (LocVT.SimpleTy == MVT::f16)
+ RegList = HRegList;
+ else if (LocVT.SimpleTy == MVT::f32 || LocVT.is32BitVector())
+ RegList = SRegList;
+ else if (LocVT.SimpleTy == MVT::f64 || LocVT.is64BitVector())
+ RegList = DRegList;
+ else if (LocVT.SimpleTy == MVT::f128 || LocVT.is128BitVector())
+ RegList = QRegList;
+ else {
+ // Not an array we want to split up after all.
+ return false;
+ }
+
+ SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
+
+ // Add the argument to the list to be allocated once we know the size of the
+ // block.
+ PendingMembers.push_back(
+ CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
+
+ if (!ArgFlags.isInConsecutiveRegsLast())
+ return true;
+
+ unsigned RegResult = State.AllocateRegBlock(RegList, PendingMembers.size());
+ if (RegResult) {
+ for (auto &It : PendingMembers) {
+ It.convertToReg(RegResult);
+ State.addLoc(It);
+ ++RegResult;
+ }
+ PendingMembers.clear();
+ return true;
+ }
+
+ // Mark all regs in the class as unavailable
+ for (auto Reg : RegList)
+ State.AllocateReg(Reg);
+
+ const AArch64Subtarget &Subtarget = static_cast<const AArch64Subtarget &>(
+ State.getMachineFunction().getSubtarget());
+ unsigned SlotAlign = Subtarget.isTargetDarwin() ? 1 : 8;
+
+ return finishStackBlock(PendingMembers, LocVT, ArgFlags, State, SlotAlign);
+}
+
+// TableGen provides definitions of the calling convention analysis entry
+// points.
+#include "AArch64GenCallingConv.inc"
diff --git a/lib/Target/AArch64/AArch64CallingConvention.h b/lib/Target/AArch64/AArch64CallingConvention.h
index 461c01318d4e..13cc0c583fd2 100644
--- a/lib/Target/AArch64/AArch64CallingConvention.h
+++ b/lib/Target/AArch64/AArch64CallingConvention.h
@@ -1,139 +1,45 @@
-//=== AArch64CallingConv.h - Custom Calling Convention Routines -*- C++ -*-===//
+//=== AArch64CallingConvention.h - AArch64 CC entry points ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
-// This file contains the custom routines for the AArch64 Calling Convention
-// that aren't done by tablegen.
+// This file declares the entry points for AArch64 calling convention analysis.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64CALLINGCONVENTION_H
#define LLVM_LIB_TARGET_AARCH64_AARCH64CALLINGCONVENTION_H
-#include "AArch64.h"
-#include "AArch64InstrInfo.h"
-#include "AArch64Subtarget.h"
#include "llvm/CodeGen/CallingConvLower.h"
-#include "llvm/CodeGen/TargetInstrInfo.h"
-#include "llvm/IR/CallingConv.h"
-
-namespace {
-using namespace llvm;
-
-static const MCPhysReg XRegList[] = {AArch64::X0, AArch64::X1, AArch64::X2,
- AArch64::X3, AArch64::X4, AArch64::X5,
- AArch64::X6, AArch64::X7};
-static const MCPhysReg HRegList[] = {AArch64::H0, AArch64::H1, AArch64::H2,
- AArch64::H3, AArch64::H4, AArch64::H5,
- AArch64::H6, AArch64::H7};
-static const MCPhysReg SRegList[] = {AArch64::S0, AArch64::S1, AArch64::S2,
- AArch64::S3, AArch64::S4, AArch64::S5,
- AArch64::S6, AArch64::S7};
-static const MCPhysReg DRegList[] = {AArch64::D0, AArch64::D1, AArch64::D2,
- AArch64::D3, AArch64::D4, AArch64::D5,
- AArch64::D6, AArch64::D7};
-static const MCPhysReg QRegList[] = {AArch64::Q0, AArch64::Q1, AArch64::Q2,
- AArch64::Q3, AArch64::Q4, AArch64::Q5,
- AArch64::Q6, AArch64::Q7};
-
-static bool finishStackBlock(SmallVectorImpl<CCValAssign> &PendingMembers,
- MVT LocVT, ISD::ArgFlagsTy &ArgFlags,
- CCState &State, unsigned SlotAlign) {
- unsigned Size = LocVT.getSizeInBits() / 8;
- unsigned StackAlign =
- State.getMachineFunction().getDataLayout().getStackAlignment();
- unsigned Align = std::min(ArgFlags.getOrigAlign(), StackAlign);
-
- for (auto &It : PendingMembers) {
- It.convertToMem(State.AllocateStack(Size, std::max(Align, SlotAlign)));
- State.addLoc(It);
- SlotAlign = 1;
- }
-
- // All pending members have now been allocated
- PendingMembers.clear();
- return true;
-}
-
-/// The Darwin variadic PCS places anonymous arguments in 8-byte stack slots. An
-/// [N x Ty] type must still be contiguous in memory though.
-static bool CC_AArch64_Custom_Stack_Block(
- unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags, CCState &State) {
- SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
-
- // Add the argument to the list to be allocated once we know the size of the
- // block.
- PendingMembers.push_back(
- CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
-
- if (!ArgFlags.isInConsecutiveRegsLast())
- return true;
-
- return finishStackBlock(PendingMembers, LocVT, ArgFlags, State, 8);
-}
-
-/// Given an [N x Ty] block, it should be passed in a consecutive sequence of
-/// registers. If no such sequence is available, mark the rest of the registers
-/// of that type as used and place the argument on the stack.
-static bool CC_AArch64_Custom_Block(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags, CCState &State) {
- // Try to allocate a contiguous block of registers, each of the correct
- // size to hold one member.
- ArrayRef<MCPhysReg> RegList;
- if (LocVT.SimpleTy == MVT::i64)
- RegList = XRegList;
- else if (LocVT.SimpleTy == MVT::f16)
- RegList = HRegList;
- else if (LocVT.SimpleTy == MVT::f32 || LocVT.is32BitVector())
- RegList = SRegList;
- else if (LocVT.SimpleTy == MVT::f64 || LocVT.is64BitVector())
- RegList = DRegList;
- else if (LocVT.SimpleTy == MVT::f128 || LocVT.is128BitVector())
- RegList = QRegList;
- else {
- // Not an array we want to split up after all.
- return false;
- }
-
- SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
-
- // Add the argument to the list to be allocated once we know the size of the
- // block.
- PendingMembers.push_back(
- CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
-
- if (!ArgFlags.isInConsecutiveRegsLast())
- return true;
-
- unsigned RegResult = State.AllocateRegBlock(RegList, PendingMembers.size());
- if (RegResult) {
- for (auto &It : PendingMembers) {
- It.convertToReg(RegResult);
- State.addLoc(It);
- ++RegResult;
- }
- PendingMembers.clear();
- return true;
- }
-
- // Mark all regs in the class as unavailable
- for (auto Reg : RegList)
- State.AllocateReg(Reg);
-
- const AArch64Subtarget &Subtarget = static_cast<const AArch64Subtarget &>(
- State.getMachineFunction().getSubtarget());
- unsigned SlotAlign = Subtarget.isTargetDarwin() ? 1 : 8;
-
- return finishStackBlock(PendingMembers, LocVT, ArgFlags, State, SlotAlign);
-}
-}
+namespace llvm {
+bool CC_AArch64_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
+ CCState &State);
+bool CC_AArch64_DarwinPCS_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State);
+bool CC_AArch64_DarwinPCS(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State);
+bool CC_AArch64_Win64_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State);
+bool CC_AArch64_WebKit_JS(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State);
+bool CC_AArch64_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
+ CCState &State);
+bool RetCC_AArch64_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
+ CCState &State);
+bool RetCC_AArch64_WebKit_JS(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State);
+} // namespace llvm
#endif
diff --git a/lib/Target/AArch64/AArch64CallingConvention.td b/lib/Target/AArch64/AArch64CallingConvention.td
index 5db941e9dac7..d969a9e1ab3a 100644
--- a/lib/Target/AArch64/AArch64CallingConvention.td
+++ b/lib/Target/AArch64/AArch64CallingConvention.td
@@ -1,9 +1,8 @@
//=- AArch64CallingConv.td - Calling Conventions for AArch64 -*- tablegen -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -22,6 +21,7 @@ class CCIfBigEndian<CCAction A> :
// ARM AAPCS64 Calling Convention
//===----------------------------------------------------------------------===//
+let Entry = 1 in
def CC_AArch64_AAPCS : CallingConv<[
CCIfType<[iPTR], CCBitConvertToType<i64>>,
CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
@@ -34,7 +34,23 @@ def CC_AArch64_AAPCS : CallingConv<[
CCIfBigEndian<CCIfType<[v2i64, v2f64, v4i32, v4f32, v8i16, v8f16, v16i8],
CCBitConvertToType<f128>>>,
- // An SRet is passed in X8, not X0 like a normal pointer parameter.
+ // In AAPCS, an SRet is passed in X8, not X0 like a normal pointer parameter.
+ // However, on windows, in some circumstances, the SRet is passed in X0 or X1
+ // instead. The presence of the inreg attribute indicates that SRet is
+ // passed in the alternative register (X0 or X1), not X8:
+ // - X0 for non-instance methods.
+ // - X1 for instance methods.
+
+ // The "sret" attribute identifies indirect returns.
+ // The "inreg" attribute identifies non-aggregate types.
+ // The position of the "sret" attribute identifies instance/non-instance
+ // methods.
+ // "sret" on argument 0 means non-instance methods.
+ // "sret" on argument 1 means instance methods.
+
+ CCIfInReg<CCIfType<[i64],
+ CCIfSRet<CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1], [W0, W1]>>>>>,
+
CCIfSRet<CCIfType<[i64], CCAssignToRegWithShadow<[X8], [W8]>>>,
// Put ByVal arguments directly on the stack. Minimum size and alignment of a
@@ -89,6 +105,7 @@ def CC_AArch64_AAPCS : CallingConv<[
CCAssignToStack<16, 16>>
]>;
+let Entry = 1 in
def RetCC_AArch64_AAPCS : CallingConv<[
CCIfType<[iPTR], CCBitConvertToType<i64>>,
CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
@@ -122,6 +139,7 @@ def RetCC_AArch64_AAPCS : CallingConv<[
]>;
// Vararg functions on windows pass floats in integer registers
+let Entry = 1 in
def CC_AArch64_Win64_VarArg : CallingConv<[
CCIfType<[f16, f32], CCPromoteToType<f64>>,
CCIfType<[f64], CCBitConvertToType<i64>>,
@@ -133,6 +151,7 @@ def CC_AArch64_Win64_VarArg : CallingConv<[
// from the standard one at this level:
// + i128s (i.e. split i64s) don't need even registers.
// + Stack slots are sized as needed rather than being at least 64-bit.
+let Entry = 1 in
def CC_AArch64_DarwinPCS : CallingConv<[
CCIfType<[iPTR], CCBitConvertToType<i64>>,
CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
@@ -189,6 +208,7 @@ def CC_AArch64_DarwinPCS : CallingConv<[
CCAssignToStack<16, 16>>
]>;
+let Entry = 1 in
def CC_AArch64_DarwinPCS_VarArg : CallingConv<[
CCIfType<[iPTR], CCBitConvertToType<i64>>,
CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
@@ -213,6 +233,7 @@ def CC_AArch64_DarwinPCS_VarArg : CallingConv<[
// in register and the remaining arguments on stack. We allow 32bit stack slots,
// so that WebKit can write partial values in the stack and define the other
// 32bit quantity as undef.
+let Entry = 1 in
def CC_AArch64_WebKit_JS : CallingConv<[
// Handle i1, i8, i16, i32, and i64 passing in register X0 (W0).
CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
@@ -224,6 +245,7 @@ def CC_AArch64_WebKit_JS : CallingConv<[
CCIfType<[i64, f64], CCAssignToStack<8, 8>>
]>;
+let Entry = 1 in
def RetCC_AArch64_WebKit_JS : CallingConv<[
CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7],
[X0, X1, X2, X3, X4, X5, X6, X7]>>,
@@ -257,6 +279,7 @@ def RetCC_AArch64_WebKit_JS : CallingConv<[
// The AArch64 register mapping is under the heading "The ARMv8/AArch64 ABI
// register mapping".
+let Entry = 1 in
def CC_AArch64_GHC : CallingConv<[
CCIfType<[iPTR], CCBitConvertToType<i64>>,
diff --git a/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp b/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
index b88fba4452a1..688bd1b28e85 100644
--- a/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
+++ b/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
@@ -1,9 +1,8 @@
//===-- AArch64CleanupLocalDynamicTLSPass.cpp ---------------------*- C++ -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AArch64/AArch64CollectLOH.cpp b/lib/Target/AArch64/AArch64CollectLOH.cpp
index 720323f81d29..9f324b433209 100644
--- a/lib/Target/AArch64/AArch64CollectLOH.cpp
+++ b/lib/Target/AArch64/AArch64CollectLOH.cpp
@@ -1,9 +1,8 @@
//===---------- AArch64CollectLOH.cpp - AArch64 collect LOH pass --*- C++ -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AArch64/AArch64CompressJumpTables.cpp b/lib/Target/AArch64/AArch64CompressJumpTables.cpp
index 0924a27e2586..48dab79b32d3 100644
--- a/lib/Target/AArch64/AArch64CompressJumpTables.cpp
+++ b/lib/Target/AArch64/AArch64CompressJumpTables.cpp
@@ -1,9 +1,8 @@
//==-- AArch64CompressJumpTables.cpp - Compress jump tables for AArch64 --====//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
// This pass looks at the basic blocks each jump-table refers to and works out
// whether they can be emitted in a compressed form (with 8 or 16-bit
@@ -108,6 +107,7 @@ bool AArch64CompressJumpTables::compressJumpTable(MachineInstr &MI,
MinBlock = Block;
}
}
+ assert(MinBlock && "Failed to find minimum offset block");
// The ADR instruction needed to calculate the address of the first reachable
// basic block can address +/-1MB.
@@ -141,7 +141,7 @@ bool AArch64CompressJumpTables::runOnMachineFunction(MachineFunction &MFIn) {
const auto &ST = MF->getSubtarget<AArch64Subtarget>();
TII = ST.getInstrInfo();
- if (ST.force32BitJumpTables() && !MF->getFunction().optForMinSize())
+ if (ST.force32BitJumpTables() && !MF->getFunction().hasMinSize())
return false;
scanFunction();
diff --git a/lib/Target/AArch64/AArch64CondBrTuning.cpp b/lib/Target/AArch64/AArch64CondBrTuning.cpp
index 5ae787409ae8..453132e09669 100644
--- a/lib/Target/AArch64/AArch64CondBrTuning.cpp
+++ b/lib/Target/AArch64/AArch64CondBrTuning.cpp
@@ -1,9 +1,8 @@
//===-- AArch64CondBrTuning.cpp --- Conditional branch tuning for AArch64 -===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/lib/Target/AArch64/AArch64ConditionOptimizer.cpp b/lib/Target/AArch64/AArch64ConditionOptimizer.cpp
index 5064762b9f77..a6efb115ed44 100644
--- a/lib/Target/AArch64/AArch64ConditionOptimizer.cpp
+++ b/lib/Target/AArch64/AArch64ConditionOptimizer.cpp
@@ -1,9 +1,8 @@
//=- AArch64ConditionOptimizer.cpp - Remove useless comparisons for AArch64 -=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AArch64/AArch64ConditionalCompares.cpp b/lib/Target/AArch64/AArch64ConditionalCompares.cpp
index 8176b6fb269d..2cfbcc592d6a 100644
--- a/lib/Target/AArch64/AArch64ConditionalCompares.cpp
+++ b/lib/Target/AArch64/AArch64ConditionalCompares.cpp
@@ -1,9 +1,8 @@
//===-- AArch64ConditionalCompares.cpp --- CCMP formation for AArch64 -----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -941,7 +940,7 @@ bool AArch64ConditionalCompares::runOnMachineFunction(MachineFunction &MF) {
MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
Traces = &getAnalysis<MachineTraceMetrics>();
MinInstr = nullptr;
- MinSize = MF.getFunction().optForMinSize();
+ MinSize = MF.getFunction().hasMinSize();
bool Changed = false;
CmpConv.runOnMachineFunction(MF, MBPI);
diff --git a/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp b/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
index 2ba10d25e939..a43077cb88ec 100644
--- a/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
+++ b/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
@@ -1,9 +1,8 @@
//==-- AArch64DeadRegisterDefinitions.cpp - Replace dead defs w/ zero reg --==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file When allowed by the instruction, replace a dead definition of a GPR
@@ -55,8 +54,6 @@ public:
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
}
-
- bool shouldSkip(const MachineInstr &MI, const MachineFunction &MF) const;
};
char AArch64DeadRegisterDefinitions::ID = 0;
} // end anonymous namespace
@@ -71,60 +68,48 @@ static bool usesFrameIndex(const MachineInstr &MI) {
return false;
}
-bool
-AArch64DeadRegisterDefinitions::shouldSkip(const MachineInstr &MI,
- const MachineFunction &MF) const {
- if (!MF.getSubtarget<AArch64Subtarget>().hasLSE())
- return false;
-
-#define CASE_AARCH64_ATOMIC_(PREFIX) \
- case AArch64::PREFIX##X: \
- case AArch64::PREFIX##W: \
- case AArch64::PREFIX##H: \
- case AArch64::PREFIX##B
-
- for (const MachineMemOperand *MMO : MI.memoperands()) {
- if (MMO->isAtomic()) {
- unsigned Opcode = MI.getOpcode();
- switch (Opcode) {
- default:
- return false;
- break;
-
- CASE_AARCH64_ATOMIC_(LDADDA):
- CASE_AARCH64_ATOMIC_(LDADDAL):
-
- CASE_AARCH64_ATOMIC_(LDCLRA):
- CASE_AARCH64_ATOMIC_(LDCLRAL):
-
- CASE_AARCH64_ATOMIC_(LDEORA):
- CASE_AARCH64_ATOMIC_(LDEORAL):
-
- CASE_AARCH64_ATOMIC_(LDSETA):
- CASE_AARCH64_ATOMIC_(LDSETAL):
-
- CASE_AARCH64_ATOMIC_(LDSMAXA):
- CASE_AARCH64_ATOMIC_(LDSMAXAL):
-
- CASE_AARCH64_ATOMIC_(LDSMINA):
- CASE_AARCH64_ATOMIC_(LDSMINAL):
-
- CASE_AARCH64_ATOMIC_(LDUMAXA):
- CASE_AARCH64_ATOMIC_(LDUMAXAL):
-
- CASE_AARCH64_ATOMIC_(LDUMINA):
- CASE_AARCH64_ATOMIC_(LDUMINAL):
-
- CASE_AARCH64_ATOMIC_(SWPA):
- CASE_AARCH64_ATOMIC_(SWPAL):
- return true;
- break;
- }
- }
+// Instructions that lose their 'read' operation for a subesquent fence acquire
+// (DMB LD) once the zero register is used.
+//
+// WARNING: The aquire variants of the instructions are also affected, but they
+// are split out into `atomicBarrierDroppedOnZero()` to support annotations on
+// assembly.
+static bool atomicReadDroppedOnZero(unsigned Opcode) {
+ switch (Opcode) {
+ case AArch64::LDADDB: case AArch64::LDADDH:
+ case AArch64::LDADDW: case AArch64::LDADDX:
+ case AArch64::LDADDLB: case AArch64::LDADDLH:
+ case AArch64::LDADDLW: case AArch64::LDADDLX:
+ case AArch64::LDCLRB: case AArch64::LDCLRH:
+ case AArch64::LDCLRW: case AArch64::LDCLRX:
+ case AArch64::LDCLRLB: case AArch64::LDCLRLH:
+ case AArch64::LDCLRLW: case AArch64::LDCLRLX:
+ case AArch64::LDEORB: case AArch64::LDEORH:
+ case AArch64::LDEORW: case AArch64::LDEORX:
+ case AArch64::LDEORLB: case AArch64::LDEORLH:
+ case AArch64::LDEORLW: case AArch64::LDEORLX:
+ case AArch64::LDSETB: case AArch64::LDSETH:
+ case AArch64::LDSETW: case AArch64::LDSETX:
+ case AArch64::LDSETLB: case AArch64::LDSETLH:
+ case AArch64::LDSETLW: case AArch64::LDSETLX:
+ case AArch64::LDSMAXB: case AArch64::LDSMAXH:
+ case AArch64::LDSMAXW: case AArch64::LDSMAXX:
+ case AArch64::LDSMAXLB: case AArch64::LDSMAXLH:
+ case AArch64::LDSMAXLW: case AArch64::LDSMAXLX:
+ case AArch64::LDSMINB: case AArch64::LDSMINH:
+ case AArch64::LDSMINW: case AArch64::LDSMINX:
+ case AArch64::LDSMINLB: case AArch64::LDSMINLH:
+ case AArch64::LDSMINLW: case AArch64::LDSMINLX:
+ case AArch64::LDUMAXB: case AArch64::LDUMAXH:
+ case AArch64::LDUMAXW: case AArch64::LDUMAXX:
+ case AArch64::LDUMAXLB: case AArch64::LDUMAXLH:
+ case AArch64::LDUMAXLW: case AArch64::LDUMAXLX:
+ case AArch64::LDUMINB: case AArch64::LDUMINH:
+ case AArch64::LDUMINW: case AArch64::LDUMINX:
+ case AArch64::LDUMINLB: case AArch64::LDUMINLH:
+ case AArch64::LDUMINLW: case AArch64::LDUMINLX:
+ return true;
}
-
-#undef CASE_AARCH64_ATOMIC_
-
return false;
}
@@ -148,9 +133,8 @@ void AArch64DeadRegisterDefinitions::processMachineBasicBlock(
continue;
}
- if (shouldSkip(MI, MF)) {
- LLVM_DEBUG(dbgs() << " Ignoring, Atomic instruction with acquire "
- "semantics using WZR/XZR\n");
+ if (atomicBarrierDroppedOnZero(MI.getOpcode()) || atomicReadDroppedOnZero(MI.getOpcode())) {
+ LLVM_DEBUG(dbgs() << " Ignoring, semantics change with xzr/wzr.\n");
continue;
}
diff --git a/lib/Target/AArch64/AArch64ExpandImm.cpp b/lib/Target/AArch64/AArch64ExpandImm.cpp
new file mode 100644
index 000000000000..c764af80eb86
--- /dev/null
+++ b/lib/Target/AArch64/AArch64ExpandImm.cpp
@@ -0,0 +1,411 @@
+//===- AArch64ExpandImm.h - AArch64 Immediate Expansion -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AArch64ExpandImm stuff.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64.h"
+#include "AArch64ExpandImm.h"
+#include "MCTargetDesc/AArch64AddressingModes.h"
+
+namespace llvm {
+
+namespace AArch64_IMM {
+
+/// Helper function which extracts the specified 16-bit chunk from a
+/// 64-bit value.
+static uint64_t getChunk(uint64_t Imm, unsigned ChunkIdx) {
+ assert(ChunkIdx < 4 && "Out of range chunk index specified!");
+
+ return (Imm >> (ChunkIdx * 16)) & 0xFFFF;
+}
+
+/// Check whether the given 16-bit chunk replicated to full 64-bit width
+/// can be materialized with an ORR instruction.
+static bool canUseOrr(uint64_t Chunk, uint64_t &Encoding) {
+ Chunk = (Chunk << 48) | (Chunk << 32) | (Chunk << 16) | Chunk;
+
+ return AArch64_AM::processLogicalImmediate(Chunk, 64, Encoding);
+}
+
+/// Check for identical 16-bit chunks within the constant and if so
+/// materialize them with a single ORR instruction. The remaining one or two
+/// 16-bit chunks will be materialized with MOVK instructions.
+///
+/// This allows us to materialize constants like |A|B|A|A| or |A|B|C|A| (order
+/// of the chunks doesn't matter), assuming |A|A|A|A| can be materialized with
+/// an ORR instruction.
+static bool tryToreplicateChunks(uint64_t UImm,
+ SmallVectorImpl<ImmInsnModel> &Insn) {
+ using CountMap = DenseMap<uint64_t, unsigned>;
+
+ CountMap Counts;
+
+ // Scan the constant and count how often every chunk occurs.
+ for (unsigned Idx = 0; Idx < 4; ++Idx)
+ ++Counts[getChunk(UImm, Idx)];
+
+ // Traverse the chunks to find one which occurs more than once.
+ for (CountMap::const_iterator Chunk = Counts.begin(), End = Counts.end();
+ Chunk != End; ++Chunk) {
+ const uint64_t ChunkVal = Chunk->first;
+ const unsigned Count = Chunk->second;
+
+ uint64_t Encoding = 0;
+
+ // We are looking for chunks which have two or three instances and can be
+ // materialized with an ORR instruction.
+ if ((Count != 2 && Count != 3) || !canUseOrr(ChunkVal, Encoding))
+ continue;
+
+ const bool CountThree = Count == 3;
+
+ Insn.push_back({ AArch64::ORRXri, 0, Encoding });
+
+ unsigned ShiftAmt = 0;
+ uint64_t Imm16 = 0;
+ // Find the first chunk not materialized with the ORR instruction.
+ for (; ShiftAmt < 64; ShiftAmt += 16) {
+ Imm16 = (UImm >> ShiftAmt) & 0xFFFF;
+
+ if (Imm16 != ChunkVal)
+ break;
+ }
+
+ // Create the first MOVK instruction.
+ Insn.push_back({ AArch64::MOVKXi, Imm16,
+ AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt) });
+
+ // In case we have three instances the whole constant is now materialized
+ // and we can exit.
+ if (CountThree)
+ return true;
+
+ // Find the remaining chunk which needs to be materialized.
+ for (ShiftAmt += 16; ShiftAmt < 64; ShiftAmt += 16) {
+ Imm16 = (UImm >> ShiftAmt) & 0xFFFF;
+
+ if (Imm16 != ChunkVal)
+ break;
+ }
+ Insn.push_back({ AArch64::MOVKXi, Imm16,
+ AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt) });
+ return true;
+ }
+
+ return false;
+}
+
+/// Check whether this chunk matches the pattern '1...0...'. This pattern
+/// starts a contiguous sequence of ones if we look at the bits from the LSB
+/// towards the MSB.
+static bool isStartChunk(uint64_t Chunk) {
+ if (Chunk == 0 || Chunk == std::numeric_limits<uint64_t>::max())
+ return false;
+
+ return isMask_64(~Chunk);
+}
+
+/// Check whether this chunk matches the pattern '0...1...' This pattern
+/// ends a contiguous sequence of ones if we look at the bits from the LSB
+/// towards the MSB.
+static bool isEndChunk(uint64_t Chunk) {
+ if (Chunk == 0 || Chunk == std::numeric_limits<uint64_t>::max())
+ return false;
+
+ return isMask_64(Chunk);
+}
+
+/// Clear or set all bits in the chunk at the given index.
+static uint64_t updateImm(uint64_t Imm, unsigned Idx, bool Clear) {
+ const uint64_t Mask = 0xFFFF;
+
+ if (Clear)
+ // Clear chunk in the immediate.
+ Imm &= ~(Mask << (Idx * 16));
+ else
+ // Set all bits in the immediate for the particular chunk.
+ Imm |= Mask << (Idx * 16);
+
+ return Imm;
+}
+
+/// Check whether the constant contains a sequence of contiguous ones,
+/// which might be interrupted by one or two chunks. If so, materialize the
+/// sequence of contiguous ones with an ORR instruction.
+/// Materialize the chunks which are either interrupting the sequence or outside
+/// of the sequence with a MOVK instruction.
+///
+/// Assuming S is a chunk which starts the sequence (1...0...), E is a chunk
+/// which ends the sequence (0...1...). Then we are looking for constants which
+/// contain at least one S and E chunk.
+/// E.g. |E|A|B|S|, |A|E|B|S| or |A|B|E|S|.
+///
+/// We are also looking for constants like |S|A|B|E| where the contiguous
+/// sequence of ones wraps around the MSB into the LSB.
+static bool trySequenceOfOnes(uint64_t UImm,
+ SmallVectorImpl<ImmInsnModel> &Insn) {
+ const int NotSet = -1;
+ const uint64_t Mask = 0xFFFF;
+
+ int StartIdx = NotSet;
+ int EndIdx = NotSet;
+ // Try to find the chunks which start/end a contiguous sequence of ones.
+ for (int Idx = 0; Idx < 4; ++Idx) {
+ int64_t Chunk = getChunk(UImm, Idx);
+ // Sign extend the 16-bit chunk to 64-bit.
+ Chunk = (Chunk << 48) >> 48;
+
+ if (isStartChunk(Chunk))
+ StartIdx = Idx;
+ else if (isEndChunk(Chunk))
+ EndIdx = Idx;
+ }
+
+ // Early exit in case we can't find a start/end chunk.
+ if (StartIdx == NotSet || EndIdx == NotSet)
+ return false;
+
+ // Outside of the contiguous sequence of ones everything needs to be zero.
+ uint64_t Outside = 0;
+ // Chunks between the start and end chunk need to have all their bits set.
+ uint64_t Inside = Mask;
+
+ // If our contiguous sequence of ones wraps around from the MSB into the LSB,
+ // just swap indices and pretend we are materializing a contiguous sequence
+ // of zeros surrounded by a contiguous sequence of ones.
+ if (StartIdx > EndIdx) {
+ std::swap(StartIdx, EndIdx);
+ std::swap(Outside, Inside);
+ }
+
+ uint64_t OrrImm = UImm;
+ int FirstMovkIdx = NotSet;
+ int SecondMovkIdx = NotSet;
+
+ // Find out which chunks we need to patch up to obtain a contiguous sequence
+ // of ones.
+ for (int Idx = 0; Idx < 4; ++Idx) {
+ const uint64_t Chunk = getChunk(UImm, Idx);
+
+ // Check whether we are looking at a chunk which is not part of the
+ // contiguous sequence of ones.
+ if ((Idx < StartIdx || EndIdx < Idx) && Chunk != Outside) {
+ OrrImm = updateImm(OrrImm, Idx, Outside == 0);
+
+ // Remember the index we need to patch.
+ if (FirstMovkIdx == NotSet)
+ FirstMovkIdx = Idx;
+ else
+ SecondMovkIdx = Idx;
+
+ // Check whether we are looking a chunk which is part of the contiguous
+ // sequence of ones.
+ } else if (Idx > StartIdx && Idx < EndIdx && Chunk != Inside) {
+ OrrImm = updateImm(OrrImm, Idx, Inside != Mask);
+
+ // Remember the index we need to patch.
+ if (FirstMovkIdx == NotSet)
+ FirstMovkIdx = Idx;
+ else
+ SecondMovkIdx = Idx;
+ }
+ }
+ assert(FirstMovkIdx != NotSet && "Constant materializable with single ORR!");
+
+ // Create the ORR-immediate instruction.
+ uint64_t Encoding = 0;
+ AArch64_AM::processLogicalImmediate(OrrImm, 64, Encoding);
+ Insn.push_back({ AArch64::ORRXri, 0, Encoding });
+
+ const bool SingleMovk = SecondMovkIdx == NotSet;
+ Insn.push_back({ AArch64::MOVKXi, getChunk(UImm, FirstMovkIdx),
+ AArch64_AM::getShifterImm(AArch64_AM::LSL,
+ FirstMovkIdx * 16) });
+
+ // Early exit in case we only need to emit a single MOVK instruction.
+ if (SingleMovk)
+ return true;
+
+ // Create the second MOVK instruction.
+ Insn.push_back({ AArch64::MOVKXi, getChunk(UImm, SecondMovkIdx),
+ AArch64_AM::getShifterImm(AArch64_AM::LSL,
+ SecondMovkIdx * 16) });
+
+ return true;
+}
+
+/// \brief Expand a MOVi32imm or MOVi64imm pseudo instruction to a
+/// MOVZ or MOVN of width BitSize followed by up to 3 MOVK instructions.
+static inline void expandMOVImmSimple(uint64_t Imm, unsigned BitSize,
+ unsigned OneChunks, unsigned ZeroChunks,
+ SmallVectorImpl<ImmInsnModel> &Insn) {
+ const unsigned Mask = 0xFFFF;
+
+ // Use a MOVZ or MOVN instruction to set the high bits, followed by one or
+ // more MOVK instructions to insert additional 16-bit portions into the
+ // lower bits.
+ bool isNeg = false;
+
+ // Use MOVN to materialize the high bits if we have more all one chunks
+ // than all zero chunks.
+ if (OneChunks > ZeroChunks) {
+ isNeg = true;
+ Imm = ~Imm;
+ }
+
+ unsigned FirstOpc;
+ if (BitSize == 32) {
+ Imm &= (1LL << 32) - 1;
+ FirstOpc = (isNeg ? AArch64::MOVNWi : AArch64::MOVZWi);
+ } else {
+ FirstOpc = (isNeg ? AArch64::MOVNXi : AArch64::MOVZXi);
+ }
+ unsigned Shift = 0; // LSL amount for high bits with MOVZ/MOVN
+ unsigned LastShift = 0; // LSL amount for last MOVK
+ if (Imm != 0) {
+ unsigned LZ = countLeadingZeros(Imm);
+ unsigned TZ = countTrailingZeros(Imm);
+ Shift = (TZ / 16) * 16;
+ LastShift = ((63 - LZ) / 16) * 16;
+ }
+ unsigned Imm16 = (Imm >> Shift) & Mask;
+
+ Insn.push_back({ FirstOpc, Imm16,
+ AArch64_AM::getShifterImm(AArch64_AM::LSL, Shift) });
+
+ if (Shift == LastShift)
+ return;
+
+ // If a MOVN was used for the high bits of a negative value, flip the rest
+ // of the bits back for use with MOVK.
+ if (isNeg)
+ Imm = ~Imm;
+
+ unsigned Opc = (BitSize == 32 ? AArch64::MOVKWi : AArch64::MOVKXi);
+ while (Shift < LastShift) {
+ Shift += 16;
+ Imm16 = (Imm >> Shift) & Mask;
+ if (Imm16 == (isNeg ? Mask : 0))
+ continue; // This 16-bit portion is already set correctly.
+
+ Insn.push_back({ Opc, Imm16,
+ AArch64_AM::getShifterImm(AArch64_AM::LSL, Shift) });
+ }
+}
+
+/// Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more
+/// real move-immediate instructions to synthesize the immediate.
+void expandMOVImm(uint64_t Imm, unsigned BitSize,
+ SmallVectorImpl<ImmInsnModel> &Insn) {
+ const unsigned Mask = 0xFFFF;
+
+ // Scan the immediate and count the number of 16-bit chunks which are either
+ // all ones or all zeros.
+ unsigned OneChunks = 0;
+ unsigned ZeroChunks = 0;
+ for (unsigned Shift = 0; Shift < BitSize; Shift += 16) {
+ const unsigned Chunk = (Imm >> Shift) & Mask;
+ if (Chunk == Mask)
+ OneChunks++;
+ else if (Chunk == 0)
+ ZeroChunks++;
+ }
+
+ // Prefer MOVZ/MOVN over ORR because of the rules for the "mov" alias.
+ if ((BitSize / 16) - OneChunks <= 1 || (BitSize / 16) - ZeroChunks <= 1) {
+ expandMOVImmSimple(Imm, BitSize, OneChunks, ZeroChunks, Insn);
+ return;
+ }
+
+ // Try a single ORR.
+ uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
+ uint64_t Encoding;
+ if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
+ unsigned Opc = (BitSize == 32 ? AArch64::ORRWri : AArch64::ORRXri);
+ Insn.push_back({ Opc, 0, Encoding });
+ return;
+ }
+
+ // One to up three instruction sequences.
+ //
+ // Prefer MOVZ/MOVN followed by MOVK; it's more readable, and possibly the
+ // fastest sequence with fast literal generation.
+ if (OneChunks >= (BitSize / 16) - 2 || ZeroChunks >= (BitSize / 16) - 2) {
+ expandMOVImmSimple(Imm, BitSize, OneChunks, ZeroChunks, Insn);
+ return;
+ }
+
+ assert(BitSize == 64 && "All 32-bit immediates can be expanded with a"
+ "MOVZ/MOVK pair");
+
+ // Try other two-instruction sequences.
+
+ // 64-bit ORR followed by MOVK.
+ // We try to construct the ORR immediate in three different ways: either we
+ // zero out the chunk which will be replaced, we fill the chunk which will
+ // be replaced with ones, or we take the bit pattern from the other half of
+ // the 64-bit immediate. This is comprehensive because of the way ORR
+ // immediates are constructed.
+ for (unsigned Shift = 0; Shift < BitSize; Shift += 16) {
+ uint64_t ShiftedMask = (0xFFFFULL << Shift);
+ uint64_t ZeroChunk = UImm & ~ShiftedMask;
+ uint64_t OneChunk = UImm | ShiftedMask;
+ uint64_t RotatedImm = (UImm << 32) | (UImm >> 32);
+ uint64_t ReplicateChunk = ZeroChunk | (RotatedImm & ShiftedMask);
+ if (AArch64_AM::processLogicalImmediate(ZeroChunk, BitSize, Encoding) ||
+ AArch64_AM::processLogicalImmediate(OneChunk, BitSize, Encoding) ||
+ AArch64_AM::processLogicalImmediate(ReplicateChunk, BitSize,
+ Encoding)) {
+ // Create the ORR-immediate instruction.
+ Insn.push_back({ AArch64::ORRXri, 0, Encoding });
+
+ // Create the MOVK instruction.
+ const unsigned Imm16 = getChunk(UImm, Shift / 16);
+ Insn.push_back({ AArch64::MOVKXi, Imm16,
+ AArch64_AM::getShifterImm(AArch64_AM::LSL, Shift) });
+ return;
+ }
+ }
+
+ // FIXME: Add more two-instruction sequences.
+
+ // Three instruction sequences.
+ //
+ // Prefer MOVZ/MOVN followed by two MOVK; it's more readable, and possibly
+ // the fastest sequence with fast literal generation. (If neither MOVK is
+ // part of a fast literal generation pair, it could be slower than the
+ // four-instruction sequence, but we won't worry about that for now.)
+ if (OneChunks || ZeroChunks) {
+ expandMOVImmSimple(Imm, BitSize, OneChunks, ZeroChunks, Insn);
+ return;
+ }
+
+ // Check for identical 16-bit chunks within the constant and if so materialize
+ // them with a single ORR instruction. The remaining one or two 16-bit chunks
+ // will be materialized with MOVK instructions.
+ if (BitSize == 64 && tryToreplicateChunks(UImm, Insn))
+ return;
+
+ // Check whether the constant contains a sequence of contiguous ones, which
+ // might be interrupted by one or two chunks. If so, materialize the sequence
+ // of contiguous ones with an ORR instruction. Materialize the chunks which
+ // are either interrupting the sequence or outside of the sequence with a
+ // MOVK instruction.
+ if (BitSize == 64 && trySequenceOfOnes(UImm, Insn))
+ return;
+
+ // We found no possible two or three instruction sequence; use the general
+ // four-instruction sequence.
+ expandMOVImmSimple(Imm, BitSize, OneChunks, ZeroChunks, Insn);
+}
+
+} // end namespace AArch64_AM
+
+} // end namespace llvm
diff --git a/lib/Target/AArch64/AArch64ExpandImm.h b/lib/Target/AArch64/AArch64ExpandImm.h
new file mode 100644
index 000000000000..42c97d2c3e9b
--- /dev/null
+++ b/lib/Target/AArch64/AArch64ExpandImm.h
@@ -0,0 +1,35 @@
+//===- AArch64ExpandImm.h - AArch64 Immediate Expansion ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the AArch64 immediate expansion stuff.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64EXPANDIMM_H
+#define LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64EXPANDIMM_H
+
+#include "llvm/ADT/SmallVector.h"
+
+namespace llvm {
+
+namespace AArch64_IMM {
+
+struct ImmInsnModel {
+ unsigned Opcode;
+ uint64_t Op1;
+ uint64_t Op2;
+};
+
+void expandMOVImm(uint64_t Imm, unsigned BitSize,
+ SmallVectorImpl<ImmInsnModel> &Insn);
+
+} // end namespace AArch64_IMM
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index f7190d58fbf9..210c10eb1842 100644
--- a/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -1,9 +1,8 @@
//===- AArch64ExpandPseudoInsts.cpp - Expand pseudo instructions ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -14,7 +13,9 @@
//
//===----------------------------------------------------------------------===//
+#include "AArch64ExpandImm.h"
#include "AArch64InstrInfo.h"
+#include "AArch64MachineFunctionInfo.h"
#include "AArch64Subtarget.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "Utils/AArch64BaseInfo.h"
@@ -66,11 +67,6 @@ private:
MachineBasicBlock::iterator &NextMBBI);
bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
unsigned BitSize);
- bool expandMOVImmSimple(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- unsigned BitSize,
- unsigned OneChunks,
- unsigned ZeroChunks);
bool expandCMP_SWAP(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
unsigned LdarOp, unsigned StlrOp, unsigned CmpOp,
@@ -79,6 +75,9 @@ private:
bool expandCMP_SWAP_128(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
MachineBasicBlock::iterator &NextMBBI);
+ bool expandSetTagLoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ MachineBasicBlock::iterator &NextMBBI);
};
} // end anonymous namespace
@@ -104,279 +103,6 @@ static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI,
}
}
-/// Helper function which extracts the specified 16-bit chunk from a
-/// 64-bit value.
-static uint64_t getChunk(uint64_t Imm, unsigned ChunkIdx) {
- assert(ChunkIdx < 4 && "Out of range chunk index specified!");
-
- return (Imm >> (ChunkIdx * 16)) & 0xFFFF;
-}
-
-/// Check whether the given 16-bit chunk replicated to full 64-bit width
-/// can be materialized with an ORR instruction.
-static bool canUseOrr(uint64_t Chunk, uint64_t &Encoding) {
- Chunk = (Chunk << 48) | (Chunk << 32) | (Chunk << 16) | Chunk;
-
- return AArch64_AM::processLogicalImmediate(Chunk, 64, Encoding);
-}
-
-/// Check for identical 16-bit chunks within the constant and if so
-/// materialize them with a single ORR instruction. The remaining one or two
-/// 16-bit chunks will be materialized with MOVK instructions.
-///
-/// This allows us to materialize constants like |A|B|A|A| or |A|B|C|A| (order
-/// of the chunks doesn't matter), assuming |A|A|A|A| can be materialized with
-/// an ORR instruction.
-static bool tryToreplicateChunks(uint64_t UImm, MachineInstr &MI,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator &MBBI,
- const AArch64InstrInfo *TII) {
- using CountMap = DenseMap<uint64_t, unsigned>;
-
- CountMap Counts;
-
- // Scan the constant and count how often every chunk occurs.
- for (unsigned Idx = 0; Idx < 4; ++Idx)
- ++Counts[getChunk(UImm, Idx)];
-
- // Traverse the chunks to find one which occurs more than once.
- for (CountMap::const_iterator Chunk = Counts.begin(), End = Counts.end();
- Chunk != End; ++Chunk) {
- const uint64_t ChunkVal = Chunk->first;
- const unsigned Count = Chunk->second;
-
- uint64_t Encoding = 0;
-
- // We are looking for chunks which have two or three instances and can be
- // materialized with an ORR instruction.
- if ((Count != 2 && Count != 3) || !canUseOrr(ChunkVal, Encoding))
- continue;
-
- const bool CountThree = Count == 3;
- // Create the ORR-immediate instruction.
- MachineInstrBuilder MIB =
- BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXri))
- .add(MI.getOperand(0))
- .addReg(AArch64::XZR)
- .addImm(Encoding);
-
- const unsigned DstReg = MI.getOperand(0).getReg();
- const bool DstIsDead = MI.getOperand(0).isDead();
-
- unsigned ShiftAmt = 0;
- uint64_t Imm16 = 0;
- // Find the first chunk not materialized with the ORR instruction.
- for (; ShiftAmt < 64; ShiftAmt += 16) {
- Imm16 = (UImm >> ShiftAmt) & 0xFFFF;
-
- if (Imm16 != ChunkVal)
- break;
- }
-
- // Create the first MOVK instruction.
- MachineInstrBuilder MIB1 =
- BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi))
- .addReg(DstReg,
- RegState::Define | getDeadRegState(DstIsDead && CountThree))
- .addReg(DstReg)
- .addImm(Imm16)
- .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt));
-
- // In case we have three instances the whole constant is now materialized
- // and we can exit.
- if (CountThree) {
- transferImpOps(MI, MIB, MIB1);
- MI.eraseFromParent();
- return true;
- }
-
- // Find the remaining chunk which needs to be materialized.
- for (ShiftAmt += 16; ShiftAmt < 64; ShiftAmt += 16) {
- Imm16 = (UImm >> ShiftAmt) & 0xFFFF;
-
- if (Imm16 != ChunkVal)
- break;
- }
-
- // Create the second MOVK instruction.
- MachineInstrBuilder MIB2 =
- BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi))
- .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstReg)
- .addImm(Imm16)
- .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt));
-
- transferImpOps(MI, MIB, MIB2);
- MI.eraseFromParent();
- return true;
- }
-
- return false;
-}
-
-/// Check whether this chunk matches the pattern '1...0...'. This pattern
-/// starts a contiguous sequence of ones if we look at the bits from the LSB
-/// towards the MSB.
-static bool isStartChunk(uint64_t Chunk) {
- if (Chunk == 0 || Chunk == std::numeric_limits<uint64_t>::max())
- return false;
-
- return isMask_64(~Chunk);
-}
-
-/// Check whether this chunk matches the pattern '0...1...' This pattern
-/// ends a contiguous sequence of ones if we look at the bits from the LSB
-/// towards the MSB.
-static bool isEndChunk(uint64_t Chunk) {
- if (Chunk == 0 || Chunk == std::numeric_limits<uint64_t>::max())
- return false;
-
- return isMask_64(Chunk);
-}
-
-/// Clear or set all bits in the chunk at the given index.
-static uint64_t updateImm(uint64_t Imm, unsigned Idx, bool Clear) {
- const uint64_t Mask = 0xFFFF;
-
- if (Clear)
- // Clear chunk in the immediate.
- Imm &= ~(Mask << (Idx * 16));
- else
- // Set all bits in the immediate for the particular chunk.
- Imm |= Mask << (Idx * 16);
-
- return Imm;
-}
-
-/// Check whether the constant contains a sequence of contiguous ones,
-/// which might be interrupted by one or two chunks. If so, materialize the
-/// sequence of contiguous ones with an ORR instruction.
-/// Materialize the chunks which are either interrupting the sequence or outside
-/// of the sequence with a MOVK instruction.
-///
-/// Assuming S is a chunk which starts the sequence (1...0...), E is a chunk
-/// which ends the sequence (0...1...). Then we are looking for constants which
-/// contain at least one S and E chunk.
-/// E.g. |E|A|B|S|, |A|E|B|S| or |A|B|E|S|.
-///
-/// We are also looking for constants like |S|A|B|E| where the contiguous
-/// sequence of ones wraps around the MSB into the LSB.
-static bool trySequenceOfOnes(uint64_t UImm, MachineInstr &MI,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator &MBBI,
- const AArch64InstrInfo *TII) {
- const int NotSet = -1;
- const uint64_t Mask = 0xFFFF;
-
- int StartIdx = NotSet;
- int EndIdx = NotSet;
- // Try to find the chunks which start/end a contiguous sequence of ones.
- for (int Idx = 0; Idx < 4; ++Idx) {
- int64_t Chunk = getChunk(UImm, Idx);
- // Sign extend the 16-bit chunk to 64-bit.
- Chunk = (Chunk << 48) >> 48;
-
- if (isStartChunk(Chunk))
- StartIdx = Idx;
- else if (isEndChunk(Chunk))
- EndIdx = Idx;
- }
-
- // Early exit in case we can't find a start/end chunk.
- if (StartIdx == NotSet || EndIdx == NotSet)
- return false;
-
- // Outside of the contiguous sequence of ones everything needs to be zero.
- uint64_t Outside = 0;
- // Chunks between the start and end chunk need to have all their bits set.
- uint64_t Inside = Mask;
-
- // If our contiguous sequence of ones wraps around from the MSB into the LSB,
- // just swap indices and pretend we are materializing a contiguous sequence
- // of zeros surrounded by a contiguous sequence of ones.
- if (StartIdx > EndIdx) {
- std::swap(StartIdx, EndIdx);
- std::swap(Outside, Inside);
- }
-
- uint64_t OrrImm = UImm;
- int FirstMovkIdx = NotSet;
- int SecondMovkIdx = NotSet;
-
- // Find out which chunks we need to patch up to obtain a contiguous sequence
- // of ones.
- for (int Idx = 0; Idx < 4; ++Idx) {
- const uint64_t Chunk = getChunk(UImm, Idx);
-
- // Check whether we are looking at a chunk which is not part of the
- // contiguous sequence of ones.
- if ((Idx < StartIdx || EndIdx < Idx) && Chunk != Outside) {
- OrrImm = updateImm(OrrImm, Idx, Outside == 0);
-
- // Remember the index we need to patch.
- if (FirstMovkIdx == NotSet)
- FirstMovkIdx = Idx;
- else
- SecondMovkIdx = Idx;
-
- // Check whether we are looking a chunk which is part of the contiguous
- // sequence of ones.
- } else if (Idx > StartIdx && Idx < EndIdx && Chunk != Inside) {
- OrrImm = updateImm(OrrImm, Idx, Inside != Mask);
-
- // Remember the index we need to patch.
- if (FirstMovkIdx == NotSet)
- FirstMovkIdx = Idx;
- else
- SecondMovkIdx = Idx;
- }
- }
- assert(FirstMovkIdx != NotSet && "Constant materializable with single ORR!");
-
- // Create the ORR-immediate instruction.
- uint64_t Encoding = 0;
- AArch64_AM::processLogicalImmediate(OrrImm, 64, Encoding);
- MachineInstrBuilder MIB =
- BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXri))
- .add(MI.getOperand(0))
- .addReg(AArch64::XZR)
- .addImm(Encoding);
-
- const unsigned DstReg = MI.getOperand(0).getReg();
- const bool DstIsDead = MI.getOperand(0).isDead();
-
- const bool SingleMovk = SecondMovkIdx == NotSet;
- // Create the first MOVK instruction.
- MachineInstrBuilder MIB1 =
- BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi))
- .addReg(DstReg,
- RegState::Define | getDeadRegState(DstIsDead && SingleMovk))
- .addReg(DstReg)
- .addImm(getChunk(UImm, FirstMovkIdx))
- .addImm(
- AArch64_AM::getShifterImm(AArch64_AM::LSL, FirstMovkIdx * 16));
-
- // Early exit in case we only need to emit a single MOVK instruction.
- if (SingleMovk) {
- transferImpOps(MI, MIB, MIB1);
- MI.eraseFromParent();
- return true;
- }
-
- // Create the second MOVK instruction.
- MachineInstrBuilder MIB2 =
- BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi))
- .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstReg)
- .addImm(getChunk(UImm, SecondMovkIdx))
- .addImm(
- AArch64_AM::getShifterImm(AArch64_AM::LSL, SecondMovkIdx * 16));
-
- transferImpOps(MI, MIB, MIB2);
- MI.eraseFromParent();
- return true;
-}
-
/// Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more
/// real move-immediate instructions to synthesize the immediate.
bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
@@ -385,7 +111,6 @@ bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
MachineInstr &MI = *MBBI;
unsigned DstReg = MI.getOperand(0).getReg();
uint64_t Imm = MI.getOperand(1).getImm();
- const unsigned Mask = 0xFFFF;
if (DstReg == AArch64::XZR || DstReg == AArch64::WZR) {
// Useless def, and we don't want to risk creating an invalid ORR (which
@@ -394,194 +119,50 @@ bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
return true;
}
- // Scan the immediate and count the number of 16-bit chunks which are either
- // all ones or all zeros.
- unsigned OneChunks = 0;
- unsigned ZeroChunks = 0;
- for (unsigned Shift = 0; Shift < BitSize; Shift += 16) {
- const unsigned Chunk = (Imm >> Shift) & Mask;
- if (Chunk == Mask)
- OneChunks++;
- else if (Chunk == 0)
- ZeroChunks++;
- }
+ SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
+ AArch64_IMM::expandMOVImm(Imm, BitSize, Insn);
+ assert(Insn.size() != 0);
- // FIXME: Prefer MOVZ/MOVN over ORR because of the rules for the "mov"
- // alias.
+ SmallVector<MachineInstrBuilder, 4> MIBS;
+ for (auto I = Insn.begin(), E = Insn.end(); I != E; ++I) {
+ bool LastItem = std::next(I) == E;
+ switch (I->Opcode)
+ {
+ default: llvm_unreachable("unhandled!"); break;
- // Try a single ORR.
- uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
- uint64_t Encoding;
- if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
- unsigned Opc = (BitSize == 32 ? AArch64::ORRWri : AArch64::ORRXri);
- MachineInstrBuilder MIB =
- BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
- .add(MI.getOperand(0))
- .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR)
- .addImm(Encoding);
- transferImpOps(MI, MIB, MIB);
- MI.eraseFromParent();
- return true;
- }
-
- // Two instruction sequences.
- //
- // Prefer MOVZ/MOVN followed by MOVK; it's more readable, and possibly the
- // fastest sequence with fast literal generation.
- if (OneChunks >= (BitSize / 16) - 2 || ZeroChunks >= (BitSize / 16) - 2)
- return expandMOVImmSimple(MBB, MBBI, BitSize, OneChunks, ZeroChunks);
-
- assert(BitSize == 64 && "All 32-bit immediates can be expanded with a"
- "MOVZ/MOVK pair");
-
- // Try other two-instruction sequences.
-
- // 64-bit ORR followed by MOVK.
- // We try to construct the ORR immediate in three different ways: either we
- // zero out the chunk which will be replaced, we fill the chunk which will
- // be replaced with ones, or we take the bit pattern from the other half of
- // the 64-bit immediate. This is comprehensive because of the way ORR
- // immediates are constructed.
- for (unsigned Shift = 0; Shift < BitSize; Shift += 16) {
- uint64_t ShiftedMask = (0xFFFFULL << Shift);
- uint64_t ZeroChunk = UImm & ~ShiftedMask;
- uint64_t OneChunk = UImm | ShiftedMask;
- uint64_t RotatedImm = (UImm << 32) | (UImm >> 32);
- uint64_t ReplicateChunk = ZeroChunk | (RotatedImm & ShiftedMask);
- if (AArch64_AM::processLogicalImmediate(ZeroChunk, BitSize, Encoding) ||
- AArch64_AM::processLogicalImmediate(OneChunk, BitSize, Encoding) ||
- AArch64_AM::processLogicalImmediate(ReplicateChunk,
- BitSize, Encoding)) {
- // Create the ORR-immediate instruction.
- MachineInstrBuilder MIB =
- BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXri))
- .add(MI.getOperand(0))
- .addReg(AArch64::XZR)
- .addImm(Encoding);
-
- // Create the MOVK instruction.
- const unsigned Imm16 = getChunk(UImm, Shift / 16);
- const unsigned DstReg = MI.getOperand(0).getReg();
- const bool DstIsDead = MI.getOperand(0).isDead();
- MachineInstrBuilder MIB1 =
- BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi))
- .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstReg)
- .addImm(Imm16)
- .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, Shift));
-
- transferImpOps(MI, MIB, MIB1);
- MI.eraseFromParent();
- return true;
+ case AArch64::ORRWri:
+ case AArch64::ORRXri:
+ MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
+ .add(MI.getOperand(0))
+ .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR)
+ .addImm(I->Op2));
+ break;
+ case AArch64::MOVNWi:
+ case AArch64::MOVNXi:
+ case AArch64::MOVZWi:
+ case AArch64::MOVZXi: {
+ bool DstIsDead = MI.getOperand(0).isDead();
+ MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
+ .addReg(DstReg, RegState::Define |
+ getDeadRegState(DstIsDead && LastItem))
+ .addImm(I->Op1)
+ .addImm(I->Op2));
+ } break;
+ case AArch64::MOVKWi:
+ case AArch64::MOVKXi: {
+ unsigned DstReg = MI.getOperand(0).getReg();
+ bool DstIsDead = MI.getOperand(0).isDead();
+ MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
+ .addReg(DstReg,
+ RegState::Define |
+ getDeadRegState(DstIsDead && LastItem))
+ .addReg(DstReg)
+ .addImm(I->Op1)
+ .addImm(I->Op2));
+ } break;
}
}
-
- // FIXME: Add more two-instruction sequences.
-
- // Three instruction sequences.
- //
- // Prefer MOVZ/MOVN followed by two MOVK; it's more readable, and possibly
- // the fastest sequence with fast literal generation. (If neither MOVK is
- // part of a fast literal generation pair, it could be slower than the
- // four-instruction sequence, but we won't worry about that for now.)
- if (OneChunks || ZeroChunks)
- return expandMOVImmSimple(MBB, MBBI, BitSize, OneChunks, ZeroChunks);
-
- // Check for identical 16-bit chunks within the constant and if so materialize
- // them with a single ORR instruction. The remaining one or two 16-bit chunks
- // will be materialized with MOVK instructions.
- if (BitSize == 64 && tryToreplicateChunks(UImm, MI, MBB, MBBI, TII))
- return true;
-
- // Check whether the constant contains a sequence of contiguous ones, which
- // might be interrupted by one or two chunks. If so, materialize the sequence
- // of contiguous ones with an ORR instruction. Materialize the chunks which
- // are either interrupting the sequence or outside of the sequence with a
- // MOVK instruction.
- if (BitSize == 64 && trySequenceOfOnes(UImm, MI, MBB, MBBI, TII))
- return true;
-
- // We found no possible two or three instruction sequence; use the general
- // four-instruction sequence.
- return expandMOVImmSimple(MBB, MBBI, BitSize, OneChunks, ZeroChunks);
-}
-
-/// \brief Expand a MOVi32imm or MOVi64imm pseudo instruction to a
-/// MOVZ or MOVN of width BitSize followed by up to 3 MOVK instructions.
-bool AArch64ExpandPseudo::expandMOVImmSimple(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- unsigned BitSize,
- unsigned OneChunks,
- unsigned ZeroChunks) {
- MachineInstr &MI = *MBBI;
- unsigned DstReg = MI.getOperand(0).getReg();
- uint64_t Imm = MI.getOperand(1).getImm();
- const unsigned Mask = 0xFFFF;
-
- // Use a MOVZ or MOVN instruction to set the high bits, followed by one or
- // more MOVK instructions to insert additional 16-bit portions into the
- // lower bits.
- bool isNeg = false;
-
- // Use MOVN to materialize the high bits if we have more all one chunks
- // than all zero chunks.
- if (OneChunks > ZeroChunks) {
- isNeg = true;
- Imm = ~Imm;
- }
-
- unsigned FirstOpc;
- if (BitSize == 32) {
- Imm &= (1LL << 32) - 1;
- FirstOpc = (isNeg ? AArch64::MOVNWi : AArch64::MOVZWi);
- } else {
- FirstOpc = (isNeg ? AArch64::MOVNXi : AArch64::MOVZXi);
- }
- unsigned Shift = 0; // LSL amount for high bits with MOVZ/MOVN
- unsigned LastShift = 0; // LSL amount for last MOVK
- if (Imm != 0) {
- unsigned LZ = countLeadingZeros(Imm);
- unsigned TZ = countTrailingZeros(Imm);
- Shift = (TZ / 16) * 16;
- LastShift = ((63 - LZ) / 16) * 16;
- }
- unsigned Imm16 = (Imm >> Shift) & Mask;
- bool DstIsDead = MI.getOperand(0).isDead();
- MachineInstrBuilder MIB1 =
- BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(FirstOpc))
- .addReg(DstReg, RegState::Define |
- getDeadRegState(DstIsDead && Shift == LastShift))
- .addImm(Imm16)
- .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, Shift));
-
- // If a MOVN was used for the high bits of a negative value, flip the rest
- // of the bits back for use with MOVK.
- if (isNeg)
- Imm = ~Imm;
-
- if (Shift == LastShift) {
- transferImpOps(MI, MIB1, MIB1);
- MI.eraseFromParent();
- return true;
- }
-
- MachineInstrBuilder MIB2;
- unsigned Opc = (BitSize == 32 ? AArch64::MOVKWi : AArch64::MOVKXi);
- while (Shift < LastShift) {
- Shift += 16;
- Imm16 = (Imm >> Shift) & Mask;
- if (Imm16 == (isNeg ? Mask : 0))
- continue; // This 16-bit portion is already set correctly.
- MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
- .addReg(DstReg,
- RegState::Define |
- getDeadRegState(DstIsDead && Shift == LastShift))
- .addReg(DstReg)
- .addImm(Imm16)
- .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, Shift));
- }
-
- transferImpOps(MI, MIB1, MIB2);
+ transferImpOps(MI, MIBS.front(), MIBS.back());
MI.eraseFromParent();
return true;
}
@@ -759,6 +340,64 @@ bool AArch64ExpandPseudo::expandCMP_SWAP_128(
return true;
}
+bool AArch64ExpandPseudo::expandSetTagLoop(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ MachineBasicBlock::iterator &NextMBBI) {
+ MachineInstr &MI = *MBBI;
+ DebugLoc DL = MI.getDebugLoc();
+ Register SizeReg = MI.getOperand(2).getReg();
+ Register AddressReg = MI.getOperand(3).getReg();
+
+ MachineFunction *MF = MBB.getParent();
+
+ bool ZeroData = MI.getOpcode() == AArch64::STZGloop;
+ const unsigned OpCode =
+ ZeroData ? AArch64::STZ2GPostIndex : AArch64::ST2GPostIndex;
+
+ auto LoopBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+ auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+
+ MF->insert(++MBB.getIterator(), LoopBB);
+ MF->insert(++LoopBB->getIterator(), DoneBB);
+
+ BuildMI(LoopBB, DL, TII->get(OpCode))
+ .addDef(AddressReg)
+ .addReg(AddressReg)
+ .addReg(AddressReg)
+ .addImm(2)
+ .cloneMemRefs(MI)
+ .setMIFlags(MI.getFlags());
+ BuildMI(LoopBB, DL, TII->get(AArch64::SUBXri))
+ .addDef(SizeReg)
+ .addReg(SizeReg)
+ .addImm(16 * 2)
+ .addImm(0);
+ BuildMI(LoopBB, DL, TII->get(AArch64::CBNZX)).addUse(SizeReg).addMBB(LoopBB);
+
+ LoopBB->addSuccessor(LoopBB);
+ LoopBB->addSuccessor(DoneBB);
+
+ DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
+ DoneBB->transferSuccessors(&MBB);
+
+ MBB.addSuccessor(LoopBB);
+
+ NextMBBI = MBB.end();
+ MI.eraseFromParent();
+ // Recompute liveness bottom up.
+ LivePhysRegs LiveRegs;
+ computeAndAddLiveIns(LiveRegs, *DoneBB);
+ computeAndAddLiveIns(LiveRegs, *LoopBB);
+ // Do an extra pass in the loop to get the loop carried dependencies right.
+ // FIXME: is this necessary?
+ LoopBB->clearLiveIns();
+ computeAndAddLiveIns(LiveRegs, *LoopBB);
+ DoneBB->clearLiveIns();
+ computeAndAddLiveIns(LiveRegs, *DoneBB);
+
+ return true;
+}
+
/// If MBBI references a pseudo instruction that should be expanded here,
/// do the expansion and return true. Otherwise return false.
bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
@@ -928,6 +567,12 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
if (MF->getTarget().getTargetTriple().isOSFuchsia() &&
MF->getTarget().getCodeModel() == CodeModel::Kernel)
SysReg = AArch64SysReg::TPIDR_EL1;
+ else if (MF->getSubtarget<AArch64Subtarget>().useEL3ForTP())
+ SysReg = AArch64SysReg::TPIDR_EL3;
+ else if (MF->getSubtarget<AArch64Subtarget>().useEL2ForTP())
+ SysReg = AArch64SysReg::TPIDR_EL2;
+ else if (MF->getSubtarget<AArch64Subtarget>().useEL1ForTP())
+ SysReg = AArch64SysReg::TPIDR_EL1;
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MRS), DstReg)
.addImm(SysReg);
MI.eraseFromParent();
@@ -986,6 +631,46 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
MI.eraseFromParent();
return true;
}
+ case AArch64::IRGstack: {
+ MachineFunction &MF = *MBB.getParent();
+ const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+ const AArch64FrameLowering *TFI =
+ MF.getSubtarget<AArch64Subtarget>().getFrameLowering();
+
+ // IRG does not allow immediate offset. getTaggedBasePointerOffset should
+ // almost always point to SP-after-prologue; if not, emit a longer
+ // instruction sequence.
+ int BaseOffset = -AFI->getTaggedBasePointerOffset();
+ unsigned FrameReg;
+ int FrameRegOffset = TFI->resolveFrameOffsetReference(
+ MF, BaseOffset, false /*isFixed*/, FrameReg, /*PreferFP=*/false,
+ /*ForSimm=*/true);
+ Register SrcReg = FrameReg;
+ if (FrameRegOffset != 0) {
+ // Use output register as temporary.
+ SrcReg = MI.getOperand(0).getReg();
+ emitFrameOffset(MBB, &MI, MI.getDebugLoc(), SrcReg, FrameReg,
+ FrameRegOffset, TII);
+ }
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::IRG))
+ .add(MI.getOperand(0))
+ .addUse(SrcReg)
+ .add(MI.getOperand(2));
+ MI.eraseFromParent();
+ return true;
+ }
+ case AArch64::TAGPstack: {
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDG))
+ .add(MI.getOperand(0))
+ .add(MI.getOperand(1))
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(4));
+ MI.eraseFromParent();
+ return true;
+ }
+ case AArch64::STGloop:
+ case AArch64::STZGloop:
+ return expandSetTagLoop(MBB, MBBI, NextMBBI);
}
return false;
}
diff --git a/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp b/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp
index bc9a5ca97fea..3b3182128c4c 100644
--- a/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp
+++ b/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp
@@ -1,9 +1,8 @@
//===- AArch64FalkorHWPFFix.cpp - Avoid HW prefetcher pitfalls on Falkor --===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file For Falkor, we want to avoid HW prefetcher instruction tag collisions
@@ -213,8 +212,8 @@ private:
struct LoadInfo {
LoadInfo() = default;
- unsigned DestReg = 0;
- unsigned BaseReg = 0;
+ Register DestReg;
+ Register BaseReg;
int BaseRegIdx = -1;
const MachineOperand *OffsetOpnd = nullptr;
bool IsPrePost = false;
@@ -648,7 +647,7 @@ static Optional<LoadInfo> getLoadInfo(const MachineInstr &MI) {
return None;
LoadInfo LI;
- LI.DestReg = DestRegIdx == -1 ? 0 : MI.getOperand(DestRegIdx).getReg();
+ LI.DestReg = DestRegIdx == -1 ? Register() : MI.getOperand(DestRegIdx).getReg();
LI.BaseReg = BaseReg;
LI.BaseRegIdx = BaseRegIdx;
LI.OffsetOpnd = OffsetIdx == -1 ? nullptr : &MI.getOperand(OffsetIdx);
diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp
index 47550cabb9f0..8dc2768b9597 100644
--- a/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/lib/Target/AArch64/AArch64FastISel.cpp
@@ -1,9 +1,8 @@
//===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -305,8 +304,6 @@ public:
} // end anonymous namespace
-#include "AArch64GenCallingConv.inc"
-
/// Check if the sign-/zero-extend will be a noop.
static bool isIntExtFree(const Instruction *I) {
assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
@@ -408,10 +405,9 @@ unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
bool Is64Bit = (VT == MVT::f64);
// This checks to see if we can use FMOV instructions to materialize
// a constant, otherwise we have to materialize via the constant pool.
- if (TLI.isFPImmLegal(Val, VT)) {
- int Imm =
- Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
- assert((Imm != -1) && "Cannot encode floating-point constant.");
+ int Imm =
+ Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
+ if (Imm != -1) {
unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
}
@@ -2369,7 +2365,7 @@ bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
AArch64::sub_32);
if ((BW < 32) && !IsBitTest)
- SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*IsZExt=*/true);
+ SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*isZExt=*/true);
// Emit the combined compare and branch instruction.
SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
@@ -3608,6 +3604,14 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
.addImm(1);
return true;
+ case Intrinsic::debugtrap: {
+ if (Subtarget->isTargetWindows()) {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
+ .addImm(0xF000);
+ return true;
+ }
+ break;
+ }
case Intrinsic::sqrt: {
Type *RetTy = II->getCalledFunction()->getReturnType();
@@ -4268,7 +4272,7 @@ unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
const TargetRegisterClass *RC =
(RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
if (NeedTrunc) {
- Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*IsZExt=*/false);
+ Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*isZExt=*/false);
Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
Op0IsKill = Op1IsKill = true;
}
@@ -4948,7 +4952,7 @@ std::pair<unsigned, bool> AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
MVT PtrVT = TLI.getPointerTy(DL);
EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
if (IdxVT.bitsLT(PtrVT)) {
- IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*IsZExt=*/false);
+ IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*isZExt=*/false);
IdxNIsKill = true;
} else if (IdxVT.bitsGT(PtrVT))
llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
@@ -5172,10 +5176,6 @@ bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I));
}
- // Silence warnings.
- (void)&CC_AArch64_DarwinPCS_VarArg;
- (void)&CC_AArch64_Win64_VarArg;
-
// fall-back to target-independent instruction selection.
return selectOperator(I, I->getOpcode());
}
diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp
index 538a8d7e8fbc..8c6e5cbd5c13 100644
--- a/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -1,9 +1,8 @@
//===- AArch64FrameLowering.cpp - AArch64 Frame Lowering -------*- C++ -*-====//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -251,8 +250,7 @@ MachineBasicBlock::iterator AArch64FrameLowering::eliminateCallFramePseudoInstr(
bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
- const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
- if (!TFI->hasReservedCallFrame(MF)) {
+ if (!hasReservedCallFrame(MF)) {
unsigned Align = getStackAlignment();
int64_t Amount = I->getOperand(0).getImm();
@@ -588,7 +586,7 @@ static void fixupSEHOpcode(MachineBasicBlock::iterator MBBI,
static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc,
- bool NeedsWinCFI, bool InProlog = true) {
+ bool NeedsWinCFI, bool *HasWinCFI, bool InProlog = true) {
// Ignore instructions that do not operate on SP, i.e. shadow call stack
// instructions and associated CFI instruction.
while (MBBI->getOpcode() == AArch64::STRXpost ||
@@ -674,9 +672,11 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
MIB.setMemRefs(MBBI->memoperands());
// Generate a new SEH code that corresponds to the new instruction.
- if (NeedsWinCFI)
+ if (NeedsWinCFI) {
+ *HasWinCFI = true;
InsertSEH(*MIB, *TII,
InProlog ? MachineInstr::FrameSetup : MachineInstr::FrameDestroy);
+ }
return std::prev(MBB.erase(MBBI));
}
@@ -685,7 +685,8 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
// combined SP bump by adding the local stack size to the stack offsets.
static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI,
unsigned LocalStackSize,
- bool NeedsWinCFI) {
+ bool NeedsWinCFI,
+ bool *HasWinCFI) {
if (AArch64InstrInfo::isSEHInstruction(MI))
return;
@@ -732,6 +733,7 @@ static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI,
OffsetOpnd.setImm(OffsetOpnd.getImm() + LocalStackSize / Scale);
if (NeedsWinCFI) {
+ *HasWinCFI = true;
auto MBBI = std::next(MachineBasicBlock::iterator(MI));
assert(MBBI != MI.getParent()->end() && "Expecting a valid instruction");
assert(AArch64InstrInfo::isSEHInstruction(*MBBI) &&
@@ -803,7 +805,9 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
!MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
bool HasFP = hasFP(MF);
bool NeedsWinCFI = needsWinCFI(MF);
- MF.setHasWinCFI(NeedsWinCFI);
+ bool HasWinCFI = false;
+ auto Cleanup = make_scope_exit([&]() { MF.setHasWinCFI(HasWinCFI); });
+
bool IsFunclet = MBB.isEHFuncletEntry();
// At this point, we're going to decide whether or not the function uses a
@@ -838,6 +842,10 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
if (MF.getFunction().getCallingConv() == CallingConv::GHC)
return;
+ // Set tagged base pointer to the bottom of the stack frame.
+ // Ideally it should match SP value after prologue.
+ AFI->setTaggedBasePointerOffset(MFI.getStackSize());
+
// getStackSize() includes all the locals in its size calculation. We don't
// include these locals when computing the stack size of a funclet, as they
// are allocated in the parent's stack frame and accessed via the frame
@@ -859,7 +867,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
++NumRedZoneFunctions;
} else {
emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
- MachineInstr::FrameSetup, false, NeedsWinCFI);
+ MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
if (!NeedsWinCFI) {
// Label used to tie together the PROLOG_LABEL and the MachineMoves.
MCSymbol *FrameLabel = MMI.getContext().createTempSymbol();
@@ -872,9 +880,11 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
}
}
- if (NeedsWinCFI)
+ if (NeedsWinCFI) {
+ HasWinCFI = true;
BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
.setMIFlag(MachineInstr::FrameSetup);
+ }
return;
}
@@ -892,11 +902,11 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
if (CombineSPBump) {
emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
- MachineInstr::FrameSetup, false, NeedsWinCFI);
+ MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
NumBytes = 0;
} else if (PrologueSaveSize != 0) {
MBBI = convertCalleeSaveRestoreToSPPrePostIncDec(
- MBB, MBBI, DL, TII, -PrologueSaveSize, NeedsWinCFI);
+ MBB, MBBI, DL, TII, -PrologueSaveSize, NeedsWinCFI, &HasWinCFI);
NumBytes -= PrologueSaveSize;
}
assert(NumBytes >= 0 && "Negative stack allocation size!?");
@@ -908,7 +918,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup)) {
if (CombineSPBump)
fixupCalleeSaveRestoreStackOffset(*MBBI, AFI->getLocalStackSize(),
- NeedsWinCFI);
+ NeedsWinCFI, &HasWinCFI);
++MBBI;
}
@@ -916,9 +926,24 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
// opcodes that we needed to emit. The FP and BP belong to the containing
// function.
if (IsFunclet) {
- if (NeedsWinCFI)
+ if (NeedsWinCFI) {
+ HasWinCFI = true;
BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
.setMIFlag(MachineInstr::FrameSetup);
+ }
+
+ // SEH funclets are passed the frame pointer in X1. If the parent
+ // function uses the base register, then the base register is used
+ // directly, and is not retrieved from X1.
+ if (F.hasPersonalityFn()) {
+ EHPersonality Per = classifyEHPersonality(F.getPersonalityFn());
+ if (isAsynchronousEHPersonality(Per)) {
+ BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), AArch64::FP)
+ .addReg(AArch64::X1).setMIFlag(MachineInstr::FrameSetup);
+ MBB.addLiveIn(AArch64::X1);
+ }
+ }
+
return;
}
@@ -934,12 +959,13 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
// Note: All stores of callee-saved registers are marked as "FrameSetup".
// This code marks the instruction(s) that set the FP also.
emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP, FPOffset, TII,
- MachineInstr::FrameSetup, false, NeedsWinCFI);
+ MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
}
if (windowsRequiresStackProbe(MF, NumBytes)) {
uint32_t NumWords = NumBytes >> 4;
if (NeedsWinCFI) {
+ HasWinCFI = true;
// alloc_l can hold at most 256MB, so assume that NumBytes doesn't
// exceed this amount. We need to move at most 2^24 - 1 into x15.
// This is at most two instructions, MOVZ follwed by MOVK.
@@ -983,9 +1009,11 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
.addReg(AArch64::X17, RegState::Implicit | RegState::Define | RegState::Dead)
.addReg(AArch64::NZCV, RegState::Implicit | RegState::Define | RegState::Dead)
.setMIFlags(MachineInstr::FrameSetup);
- if (NeedsWinCFI)
+ if (NeedsWinCFI) {
+ HasWinCFI = true;
BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
.setMIFlag(MachineInstr::FrameSetup);
+ }
break;
case CodeModel::Large:
BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVaddrEXT))
@@ -993,9 +1021,11 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
.addExternalSymbol("__chkstk")
.addExternalSymbol("__chkstk")
.setMIFlags(MachineInstr::FrameSetup);
- if (NeedsWinCFI)
+ if (NeedsWinCFI) {
+ HasWinCFI = true;
BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
.setMIFlag(MachineInstr::FrameSetup);
+ }
BuildMI(MBB, MBBI, DL, TII->get(AArch64::BLR))
.addReg(AArch64::X16, RegState::Kill)
@@ -1004,9 +1034,11 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
.addReg(AArch64::X17, RegState::Implicit | RegState::Define | RegState::Dead)
.addReg(AArch64::NZCV, RegState::Implicit | RegState::Define | RegState::Dead)
.setMIFlags(MachineInstr::FrameSetup);
- if (NeedsWinCFI)
+ if (NeedsWinCFI) {
+ HasWinCFI = true;
BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
.setMIFlag(MachineInstr::FrameSetup);
+ }
break;
}
@@ -1015,10 +1047,12 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
.addReg(AArch64::X15, RegState::Kill)
.addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 4))
.setMIFlags(MachineInstr::FrameSetup);
- if (NeedsWinCFI)
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
- .addImm(NumBytes)
- .setMIFlag(MachineInstr::FrameSetup);
+ if (NeedsWinCFI) {
+ HasWinCFI = true;
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
+ .addImm(NumBytes)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
NumBytes = 0;
}
@@ -1038,7 +1072,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
// the correct value here, as NumBytes also includes padding bytes,
// which shouldn't be counted here.
emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP, -NumBytes, TII,
- MachineInstr::FrameSetup, false, NeedsWinCFI);
+ MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
if (NeedsRealignment) {
const unsigned Alignment = MFI.getMaxAlignment();
@@ -1061,10 +1095,12 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
.addReg(scratchSPReg, RegState::Kill)
.addImm(andMaskEncoded);
AFI->setStackRealigned(true);
- if (NeedsWinCFI)
+ if (NeedsWinCFI) {
+ HasWinCFI = true;
BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
.addImm(NumBytes & andMaskEncoded)
.setMIFlag(MachineInstr::FrameSetup);
+ }
}
}
@@ -1078,16 +1114,19 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
if (RegInfo->hasBasePointer(MF)) {
TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP,
false);
- if (NeedsWinCFI)
+ if (NeedsWinCFI) {
+ HasWinCFI = true;
BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
.setMIFlag(MachineInstr::FrameSetup);
+ }
}
// The very last FrameSetup instruction indicates the end of prologue. Emit a
// SEH opcode indicating the prologue end.
- if (NeedsWinCFI)
+ if (NeedsWinCFI && HasWinCFI) {
BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
.setMIFlag(MachineInstr::FrameSetup);
+ }
if (needsFrameMoves) {
const DataLayout &TD = MF.getDataLayout();
@@ -1231,7 +1270,12 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
DebugLoc DL;
bool IsTailCallReturn = false;
bool NeedsWinCFI = needsWinCFI(MF);
+ bool HasWinCFI = false;
bool IsFunclet = false;
+ auto WinCFI = make_scope_exit([&]() {
+ if (!MF.hasWinCFI())
+ MF.setHasWinCFI(HasWinCFI);
+ });
if (MBB.end() != MBBI) {
DL = MBBI->getDebugLoc();
@@ -1326,7 +1370,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
// If the offset is 0, convert it to a post-index ldp.
if (OffsetOp.getImm() == 0)
convertCalleeSaveRestoreToSPPrePostIncDec(
- MBB, Pop, DL, TII, PrologueSaveSize, NeedsWinCFI, false);
+ MBB, Pop, DL, TII, PrologueSaveSize, NeedsWinCFI, &HasWinCFI, false);
else {
// If not, make sure to emit an add after the last ldp.
// We're doing this by transfering the size to be restored from the
@@ -1348,19 +1392,21 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
break;
} else if (CombineSPBump)
fixupCalleeSaveRestoreStackOffset(*LastPopI, AFI->getLocalStackSize(),
- NeedsWinCFI);
+ NeedsWinCFI, &HasWinCFI);
}
- if (NeedsWinCFI)
+ if (NeedsWinCFI) {
+ HasWinCFI = true;
BuildMI(MBB, LastPopI, DL, TII->get(AArch64::SEH_EpilogStart))
.setMIFlag(MachineInstr::FrameDestroy);
+ }
// If there is a single SP update, insert it before the ret and we're done.
if (CombineSPBump) {
emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
NumBytes + AfterCSRPopSize, TII, MachineInstr::FrameDestroy,
- false, NeedsWinCFI);
- if (NeedsWinCFI)
+ false, NeedsWinCFI, &HasWinCFI);
+ if (NeedsWinCFI && HasWinCFI)
BuildMI(MBB, MBB.getFirstTerminator(), DL,
TII->get(AArch64::SEH_EpilogEnd))
.setMIFlag(MachineInstr::FrameDestroy);
@@ -1392,12 +1438,14 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
StackRestoreBytes, TII, MachineInstr::FrameDestroy, false,
- NeedsWinCFI);
+ NeedsWinCFI, &HasWinCFI);
if (Done) {
- if (NeedsWinCFI)
+ if (NeedsWinCFI) {
+ HasWinCFI = true;
BuildMI(MBB, MBB.getFirstTerminator(), DL,
TII->get(AArch64::SEH_EpilogEnd))
.setMIFlag(MachineInstr::FrameDestroy);
+ }
return;
}
@@ -1436,11 +1484,13 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
emitFrameOffset(MBB, FirstSPPopI, DL, AArch64::SP, AArch64::SP,
AfterCSRPopSize, TII, MachineInstr::FrameDestroy, false,
- NeedsWinCFI);
+ NeedsWinCFI, &HasWinCFI);
}
- if (NeedsWinCFI)
+ if (NeedsWinCFI && HasWinCFI)
BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::SEH_EpilogEnd))
.setMIFlag(MachineInstr::FrameDestroy);
+
+ MF.setHasWinCFI(HasWinCFI);
}
/// getFrameIndexReference - Provide a base+offset reference to an FI slot for
@@ -1450,25 +1500,66 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
int AArch64FrameLowering::getFrameIndexReference(const MachineFunction &MF,
int FI,
unsigned &FrameReg) const {
- return resolveFrameIndexReference(MF, FI, FrameReg);
+ return resolveFrameIndexReference(
+ MF, FI, FrameReg,
+ /*PreferFP=*/
+ MF.getFunction().hasFnAttribute(Attribute::SanitizeHWAddress),
+ /*ForSimm=*/false);
}
-int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
- int FI, unsigned &FrameReg,
- bool PreferFP) const {
- const MachineFrameInfo &MFI = MF.getFrameInfo();
- const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
- MF.getSubtarget().getRegisterInfo());
- const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
- const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
+int AArch64FrameLowering::getNonLocalFrameIndexReference(
+ const MachineFunction &MF, int FI) const {
+ return getSEHFrameIndexOffset(MF, FI);
+}
+
+static int getFPOffset(const MachineFunction &MF, int ObjectOffset) {
+ const auto *AFI = MF.getInfo<AArch64FunctionInfo>();
+ const auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
bool IsWin64 =
Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv());
unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0;
- int FPOffset = MFI.getObjectOffset(FI) + FixedObject + 16;
- int Offset = MFI.getObjectOffset(FI) + MFI.getStackSize();
+ return ObjectOffset + FixedObject + 16;
+}
+
+static int getStackOffset(const MachineFunction &MF, int ObjectOffset) {
+ const auto &MFI = MF.getFrameInfo();
+ return ObjectOffset + MFI.getStackSize();
+}
+
+int AArch64FrameLowering::getSEHFrameIndexOffset(const MachineFunction &MF,
+ int FI) const {
+ const auto *RegInfo = static_cast<const AArch64RegisterInfo *>(
+ MF.getSubtarget().getRegisterInfo());
+ int ObjectOffset = MF.getFrameInfo().getObjectOffset(FI);
+ return RegInfo->getLocalAddressRegister(MF) == AArch64::FP
+ ? getFPOffset(MF, ObjectOffset)
+ : getStackOffset(MF, ObjectOffset);
+}
+
+int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
+ int FI, unsigned &FrameReg,
+ bool PreferFP,
+ bool ForSimm) const {
+ const auto &MFI = MF.getFrameInfo();
+ int ObjectOffset = MFI.getObjectOffset(FI);
bool isFixed = MFI.isFixedObjectIndex(FI);
- bool isCSR = !isFixed && MFI.getObjectOffset(FI) >=
- -((int)AFI->getCalleeSavedStackSize());
+ return resolveFrameOffsetReference(MF, ObjectOffset, isFixed, FrameReg,
+ PreferFP, ForSimm);
+}
+
+int AArch64FrameLowering::resolveFrameOffsetReference(
+ const MachineFunction &MF, int ObjectOffset, bool isFixed,
+ unsigned &FrameReg, bool PreferFP, bool ForSimm) const {
+ const auto &MFI = MF.getFrameInfo();
+ const auto *RegInfo = static_cast<const AArch64RegisterInfo *>(
+ MF.getSubtarget().getRegisterInfo());
+ const auto *AFI = MF.getInfo<AArch64FunctionInfo>();
+ const auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
+
+ int FPOffset = getFPOffset(MF, ObjectOffset);
+ int Offset = getStackOffset(MF, ObjectOffset);
+ bool isCSR =
+ !isFixed && ObjectOffset >= -((int)AFI->getCalleeSavedStackSize());
// Use frame pointer to reference fixed objects. Use it for locals if
// there are VLAs or a dynamically realigned SP (and thus the SP isn't
@@ -1489,11 +1580,11 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
assert(hasFP(MF) && "Re-aligned stack must have frame pointer");
UseFP = true;
} else if (hasFP(MF) && !RegInfo->needsStackRealignment(MF)) {
- // If the FPOffset is negative, we have to keep in mind that the
- // available offset range for negative offsets is smaller than for
- // positive ones. If an offset is
- // available via the FP and the SP, use whichever is closest.
- bool FPOffsetFits = FPOffset >= -256;
+ // If the FPOffset is negative and we're producing a signed immediate, we
+ // have to keep in mind that the available offset range for negative
+ // offsets is smaller than for positive ones. If an offset is available
+ // via the FP and the SP, use whichever is closest.
+ bool FPOffsetFits = !ForSimm || FPOffset >= -256;
PreferFP |= Offset > -FPOffset;
if (MFI.hasVarSizedObjects()) {
@@ -1517,6 +1608,7 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
// Funclets access the locals contained in the parent's stack frame
// via the frame pointer, so we have to use the FP in the parent
// function.
+ (void) Subtarget;
assert(
Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv()) &&
"Funclets should only be present on Win64");
@@ -1759,8 +1851,8 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
static_cast<char>(unsigned(dwarf::DW_OP_breg18)),
static_cast<char>(-8) & 0x7f, // addend (sleb128)
};
- unsigned CFIIndex =
- MF.addFrameInst(MCCFIInstruction::createEscape(nullptr, CFIInst));
+ unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createEscape(
+ nullptr, StringRef(CFIInst, sizeof(CFIInst))));
BuildMI(MBB, MI, DL, TII.get(AArch64::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
.setMIFlag(MachineInstr::FrameSetup);
@@ -2104,9 +2196,6 @@ void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup))
++MBBI;
- if (MBBI->isTerminator())
- return;
-
// Create an UnwindHelp object.
int UnwindHelpFI =
MFI.CreateStackObject(/*size*/8, /*alignment*/16, false);
@@ -2114,8 +2203,10 @@ void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
// We need to store -2 into the UnwindHelp object at the start of the
// function.
DebugLoc DL;
- RS->enterBasicBlock(MBB);
- unsigned DstReg = RS->scavengeRegister(&AArch64::GPR64RegClass, MBBI, 0);
+ RS->enterBasicBlockEnd(MBB);
+ RS->backward(std::prev(MBBI));
+ unsigned DstReg = RS->FindUnusedReg(&AArch64::GPR64commonRegClass);
+ assert(DstReg && "There must be a free register after frame setup");
BuildMI(MBB, MBBI, DL, TII.get(AArch64::MOVi64imm), DstReg).addImm(-2);
BuildMI(MBB, MBBI, DL, TII.get(AArch64::STURXi))
.addReg(DstReg, getKillRegState(true))
diff --git a/lib/Target/AArch64/AArch64FrameLowering.h b/lib/Target/AArch64/AArch64FrameLowering.h
index 0d0385acf46e..6dbd34b2189f 100644
--- a/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/lib/Target/AArch64/AArch64FrameLowering.h
@@ -1,9 +1,8 @@
//==-- AArch64FrameLowering.h - TargetFrameLowering for AArch64 --*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -41,8 +40,11 @@ public:
int getFrameIndexReference(const MachineFunction &MF, int FI,
unsigned &FrameReg) const override;
int resolveFrameIndexReference(const MachineFunction &MF, int FI,
- unsigned &FrameReg,
- bool PreferFP = false) const;
+ unsigned &FrameReg, bool PreferFP,
+ bool ForSimm) const;
+ int resolveFrameOffsetReference(const MachineFunction &MF, int ObjectOffset,
+ bool isFixed, unsigned &FrameReg,
+ bool PreferFP, bool ForSimm) const;
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI,
@@ -79,6 +81,9 @@ public:
int getFrameIndexReferencePreferSP(const MachineFunction &MF, int FI,
unsigned &FrameReg,
bool IgnoreSPUpdates) const override;
+ int getNonLocalFrameIndexReference(const MachineFunction &MF,
+ int FI) const override;
+ int getSEHFrameIndexOffset(const MachineFunction &MF, int FI) const;
private:
bool shouldCombineCSRLocalStackBump(MachineFunction &MF,
diff --git a/lib/Target/AArch64/AArch64GenRegisterBankInfo.def b/lib/Target/AArch64/AArch64GenRegisterBankInfo.def
index 37720cbd32bb..528756b34856 100644
--- a/lib/Target/AArch64/AArch64GenRegisterBankInfo.def
+++ b/lib/Target/AArch64/AArch64GenRegisterBankInfo.def
@@ -1,9 +1,8 @@
//===- AArch64GenRegisterBankInfo.def ----------------------------*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -111,6 +110,10 @@ RegisterBankInfo::ValueMapping AArch64GenRegisterBankInfo::ValMappings[]{
// 47: FPExt vector: 64 to 128. <-- This must match FPExt64To128Idx.
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR128 - PMI_Min], 1},
{&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1},
+ // 49: Shift scalar with 64 bit shift imm
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1},
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1},
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1},
};
bool AArch64GenRegisterBankInfo::checkPartialMap(unsigned Idx,
diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index fc9855f6a0da..cd7e927ac80c 100644
--- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -1,9 +1,8 @@
//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -53,7 +52,7 @@ public:
}
bool runOnMachineFunction(MachineFunction &MF) override {
- ForCodeSize = MF.getFunction().optForSize();
+ ForCodeSize = MF.getFunction().hasOptSize();
Subtarget = &MF.getSubtarget<AArch64Subtarget>();
return SelectionDAGISel::runOnMachineFunction(MF);
}
@@ -92,6 +91,12 @@ public:
bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
return SelectAddrModeIndexed7S(N, 16, Base, OffImm);
}
+ bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) {
+ return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm);
+ }
+ bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) {
+ return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm);
+ }
bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
return SelectAddrModeIndexed(N, 1, Base, OffImm);
}
@@ -152,6 +157,9 @@ public:
bool tryIndexedLoad(SDNode *N);
+ bool trySelectStackSlotTagP(SDNode *N);
+ void SelectTagP(SDNode *N);
+
void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
unsigned SubRegIdx);
void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
@@ -180,7 +188,12 @@ private:
bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
SDValue &Shift);
bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
- SDValue &OffImm);
+ SDValue &OffImm) {
+ return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm);
+ }
+ bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW,
+ unsigned Size, SDValue &Base,
+ SDValue &OffImm);
bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
SDValue &OffImm);
bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
@@ -676,12 +689,13 @@ static bool isWorthFoldingADDlow(SDValue N) {
return true;
}
-/// SelectAddrModeIndexed7S - Select a "register plus scaled signed 7-bit
+/// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit
/// immediate" address. The "Size" argument is the size in bytes of the memory
/// reference, which determines the scale.
-bool AArch64DAGToDAGISel::SelectAddrModeIndexed7S(SDValue N, unsigned Size,
- SDValue &Base,
- SDValue &OffImm) {
+bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm,
+ unsigned BW, unsigned Size,
+ SDValue &Base,
+ SDValue &OffImm) {
SDLoc dl(N);
const DataLayout &DL = CurDAG->getDataLayout();
const TargetLowering *TLI = getTargetLowering();
@@ -692,26 +706,43 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexed7S(SDValue N, unsigned Size,
return true;
}
- // As opposed to the (12-bit) Indexed addressing mode below, the 7-bit signed
+ // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed
// selected here doesn't support labels/immediates, only base+offset.
-
if (CurDAG->isBaseWithConstantOffset(N)) {
if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
- int64_t RHSC = RHS->getSExtValue();
- unsigned Scale = Log2_32(Size);
- if ((RHSC & (Size - 1)) == 0 && RHSC >= -(0x40 << Scale) &&
- RHSC < (0x40 << Scale)) {
- Base = N.getOperand(0);
- if (Base.getOpcode() == ISD::FrameIndex) {
- int FI = cast<FrameIndexSDNode>(Base)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
+ if (IsSignedImm) {
+ int64_t RHSC = RHS->getSExtValue();
+ unsigned Scale = Log2_32(Size);
+ int64_t Range = 0x1LL << (BW - 1);
+
+ if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) &&
+ RHSC < (Range << Scale)) {
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
+ }
+ OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
+ return true;
+ }
+ } else {
+ // unsigned Immediate
+ uint64_t RHSC = RHS->getZExtValue();
+ unsigned Scale = Log2_32(Size);
+ uint64_t Range = 0x1ULL << BW;
+
+ if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) {
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
+ }
+ OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
+ return true;
}
- OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
- return true;
}
}
}
-
// Base only. The address will be materialized into a register before
// the memory is accessed.
// add x0, Xbase, #offset
@@ -2650,6 +2681,14 @@ bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {
return true;
}
+ if (RegString->getString() == "pc") {
+ ReplaceNode(N, CurDAG->getMachineNode(
+ AArch64::ADR, DL, N->getSimpleValueType(0), MVT::Other,
+ CurDAG->getTargetConstant(0, DL, MVT::i32),
+ N->getOperand(0)));
+ return true;
+ }
+
return false;
}
@@ -2754,6 +2793,58 @@ bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
return true;
}
+bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) {
+ // tagp(FrameIndex, IRGstack, tag_offset):
+ // since the offset between FrameIndex and IRGstack is a compile-time
+ // constant, this can be lowered to a single ADDG instruction.
+ if (!(isa<FrameIndexSDNode>(N->getOperand(1)))) {
+ return false;
+ }
+
+ SDValue IRG_SP = N->getOperand(2);
+ if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN ||
+ cast<ConstantSDNode>(IRG_SP->getOperand(1))->getZExtValue() !=
+ Intrinsic::aarch64_irg_sp) {
+ return false;
+ }
+
+ const TargetLowering *TLI = getTargetLowering();
+ SDLoc DL(N);
+ int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex();
+ SDValue FiOp = CurDAG->getTargetFrameIndex(
+ FI, TLI->getPointerTy(CurDAG->getDataLayout()));
+ int TagOffset = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
+
+ SDNode *Out = CurDAG->getMachineNode(
+ AArch64::TAGPstack, DL, MVT::i64,
+ {FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2),
+ CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
+ ReplaceNode(N, Out);
+ return true;
+}
+
+void AArch64DAGToDAGISel::SelectTagP(SDNode *N) {
+ assert(isa<ConstantSDNode>(N->getOperand(3)) &&
+ "llvm.aarch64.tagp third argument must be an immediate");
+ if (trySelectStackSlotTagP(N))
+ return;
+ // FIXME: above applies in any case when offset between Op1 and Op2 is a
+ // compile-time constant, not just for stack allocations.
+
+ // General case for unrelated pointers in Op1 and Op2.
+ SDLoc DL(N);
+ int TagOffset = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
+ SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64,
+ {N->getOperand(1), N->getOperand(2)});
+ SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64,
+ {SDValue(N1, 0), N->getOperand(2)});
+ SDNode *N3 = CurDAG->getMachineNode(
+ AArch64::ADDG, DL, MVT::i64,
+ {SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64),
+ CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
+ ReplaceNode(N, N3);
+}
+
void AArch64DAGToDAGISel::Select(SDNode *Node) {
// If we have a custom node, we already have selected!
if (Node->isMachineOpcode()) {
@@ -3247,6 +3338,9 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
switch (IntNo) {
default:
break;
+ case Intrinsic::aarch64_tagp:
+ SelectTagP(Node);
+ return;
case Intrinsic::aarch64_neon_tbl2:
SelectTable(Node, 2,
VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two,
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index e01ca14d7f63..7becc99fb5c7 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1,9 +1,8 @@
//===-- AArch64ISelLowering.cpp - AArch64 DAG Lowering Implementation ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -11,6 +10,7 @@
//
//===----------------------------------------------------------------------===//
+#include "AArch64ExpandImm.h"
#include "AArch64ISelLowering.h"
#include "AArch64CallingConvention.h"
#include "AArch64MachineFunctionInfo.h"
@@ -55,9 +55,11 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/OperandTraits.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/Value.h"
@@ -87,6 +89,7 @@
#include <vector>
using namespace llvm;
+using namespace llvm::PatternMatch;
#define DEBUG_TYPE "aarch64-lower"
@@ -454,6 +457,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FMAXNUM, Ty, Legal);
setOperationAction(ISD::FMINIMUM, Ty, Legal);
setOperationAction(ISD::FMAXIMUM, Ty, Legal);
+ setOperationAction(ISD::LROUND, Ty, Legal);
+ setOperationAction(ISD::LLROUND, Ty, Legal);
+ setOperationAction(ISD::LRINT, Ty, Legal);
+ setOperationAction(ISD::LLRINT, Ty, Legal);
}
if (Subtarget->hasFullFP16()) {
@@ -544,9 +551,13 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
// Trap.
setOperationAction(ISD::TRAP, MVT::Other, Legal);
+ if (Subtarget->isTargetWindows())
+ setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
// We combine OR nodes for bitfield operations.
setTargetDAGCombine(ISD::OR);
+ // Try to create BICs for vector ANDs.
+ setTargetDAGCombine(ISD::AND);
// Vector add and sub nodes may conceal a high-half opportunity.
// Also, try to fold ADD into CSINC/CSINV..
@@ -608,9 +619,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setPrefLoopAlignment(STI.getPrefLoopAlignment());
// Only change the limit for entries in a jump table if specified by
- // the subtarget, but not at the command line.
+ // the sub target, but not at the command line.
unsigned MaxJT = STI.getMaximumJumpTableSize();
- if (MaxJT && getMaximumJumpTableSize() == 0)
+ if (MaxJT && getMaximumJumpTableSize() == UINT_MAX)
setMaximumJumpTableSize(MaxJT);
setHasExtractBitsInsn(true);
@@ -658,14 +669,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
// elements smaller than i32, so promote the input to i32 first.
setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i8, MVT::v4i32);
setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i8, MVT::v4i32);
- setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i16, MVT::v4i32);
- setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i16, MVT::v4i32);
- // i8 and i16 vector elements also need promotion to i32 for v8i8 or v8i16
- // -> v8f16 conversions.
+ // i8 vector elements also need promotion to i32 for v8i8
setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i8, MVT::v8i32);
setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i8, MVT::v8i32);
- setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i16, MVT::v8i32);
- setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i16, MVT::v8i32);
// Similarly, there is no direct i32 -> f64 vector conversion instruction.
setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
@@ -676,18 +682,23 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
+ if (Subtarget->hasFullFP16()) {
+ setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom);
+ } else {
+ // when AArch64 doesn't have fullfp16 support, promote the input
+ // to i32 first.
+ setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i16, MVT::v4i32);
+ setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i16, MVT::v4i32);
+ setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i16, MVT::v8i32);
+ setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i16, MVT::v8i32);
+ }
+
setOperationAction(ISD::CTLZ, MVT::v1i64, Expand);
setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);
- setOperationAction(ISD::CTTZ, MVT::v2i8, Expand);
- setOperationAction(ISD::CTTZ, MVT::v4i16, Expand);
- setOperationAction(ISD::CTTZ, MVT::v2i32, Expand);
- setOperationAction(ISD::CTTZ, MVT::v1i64, Expand);
- setOperationAction(ISD::CTTZ, MVT::v16i8, Expand);
- setOperationAction(ISD::CTTZ, MVT::v8i16, Expand);
- setOperationAction(ISD::CTTZ, MVT::v4i32, Expand);
- setOperationAction(ISD::CTTZ, MVT::v2i64, Expand);
-
// AArch64 doesn't have MUL.2d:
setOperationAction(ISD::MUL, MVT::v2i64, Expand);
// Custom handling for some quad-vector types to detect MULL.
@@ -696,14 +707,16 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::MUL, MVT::v2i64, Custom);
// Vector reductions
- for (MVT VT : MVT::integer_valuetypes()) {
+ for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
+ MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
}
- for (MVT VT : MVT::fp_valuetypes()) {
+ for (MVT VT : { MVT::v4f16, MVT::v2f32,
+ MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {
setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
}
@@ -726,6 +739,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::UMUL_LOHI, VT, Expand);
setOperationAction(ISD::BSWAP, VT, Expand);
+ setOperationAction(ISD::CTTZ, VT, Expand);
for (MVT InnerVT : MVT::vector_valuetypes()) {
setTruncStoreAction(VT, InnerVT, Expand);
@@ -745,6 +759,17 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FROUND, Ty, Legal);
}
+ if (Subtarget->hasFullFP16()) {
+ for (MVT Ty : {MVT::v4f16, MVT::v8f16}) {
+ setOperationAction(ISD::FFLOOR, Ty, Legal);
+ setOperationAction(ISD::FNEARBYINT, Ty, Legal);
+ setOperationAction(ISD::FCEIL, Ty, Legal);
+ setOperationAction(ISD::FRINT, Ty, Legal);
+ setOperationAction(ISD::FTRUNC, Ty, Legal);
+ setOperationAction(ISD::FROUND, Ty, Legal);
+ }
+ }
+
setTruncStoreAction(MVT::v4i16, MVT::v4i8, Custom);
}
@@ -783,7 +808,6 @@ void AArch64TargetLowering::addTypeForNEON(MVT VT, MVT PromotedBitwiseVT) {
setOperationAction(ISD::SRA, VT, Custom);
setOperationAction(ISD::SRL, VT, Custom);
setOperationAction(ISD::SHL, VT, Custom);
- setOperationAction(ISD::AND, VT, Custom);
setOperationAction(ISD::OR, VT, Custom);
setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
@@ -1052,10 +1076,9 @@ MVT AArch64TargetLowering::getScalarShiftAmountTy(const DataLayout &DL,
return MVT::i64;
}
-bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
- unsigned AddrSpace,
- unsigned Align,
- bool *Fast) const {
+bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(
+ EVT VT, unsigned AddrSpace, unsigned Align, MachineMemOperand::Flags Flags,
+ bool *Fast) const {
if (Subtarget->requiresStrictAlign())
return false;
@@ -1211,6 +1234,10 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
case AArch64ISD::FRECPS: return "AArch64ISD::FRECPS";
case AArch64ISD::FRSQRTE: return "AArch64ISD::FRSQRTE";
case AArch64ISD::FRSQRTS: return "AArch64ISD::FRSQRTS";
+ case AArch64ISD::STG: return "AArch64ISD::STG";
+ case AArch64ISD::STZG: return "AArch64ISD::STZG";
+ case AArch64ISD::ST2G: return "AArch64ISD::ST2G";
+ case AArch64ISD::STZ2G: return "AArch64ISD::STZ2G";
}
return nullptr;
}
@@ -2326,7 +2353,8 @@ SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op,
SDLoc(Op)).first;
}
-static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
+SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
+ SelectionDAG &DAG) const {
// Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
// Any additional optimization in this function should be recorded
// in the cost tables.
@@ -2334,8 +2362,9 @@ static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
EVT VT = Op.getValueType();
unsigned NumElts = InVT.getVectorNumElements();
- // f16 vectors are promoted to f32 before a conversion.
- if (InVT.getVectorElementType() == MVT::f16) {
+ // f16 conversions are promoted to f32 when full fp16 is not supported.
+ if (InVT.getVectorElementType() == MVT::f16 &&
+ !Subtarget->hasFullFP16()) {
MVT NewVT = MVT::getVectorVT(MVT::f32, NumElts);
SDLoc dl(Op);
return DAG.getNode(
@@ -2743,6 +2772,28 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::aarch64_neon_umin:
return DAG.getNode(ISD::UMIN, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
+
+ case Intrinsic::localaddress: {
+ const auto &MF = DAG.getMachineFunction();
+ const auto *RegInfo = Subtarget->getRegisterInfo();
+ unsigned Reg = RegInfo->getLocalAddressRegister(MF);
+ return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg,
+ Op.getSimpleValueType());
+ }
+
+ case Intrinsic::eh_recoverfp: {
+ // FIXME: This needs to be implemented to correctly handle highly aligned
+ // stack objects. For now we simply return the incoming FP. Refer D53541
+ // for more details.
+ SDValue FnOp = Op.getOperand(1);
+ SDValue IncomingFPOp = Op.getOperand(2);
+ GlobalAddressSDNode *GSD = dyn_cast<GlobalAddressSDNode>(FnOp);
+ auto *Fn = dyn_cast_or_null<Function>(GSD ? GSD->getGlobal() : nullptr);
+ if (!Fn)
+ report_fatal_error(
+ "llvm.eh.recoverfp must take a function as the first argument");
+ return IncomingFPOp;
+ }
}
}
@@ -2797,7 +2848,8 @@ SDValue AArch64TargetLowering::LowerSTORE(SDValue Op,
unsigned AS = StoreNode->getAddressSpace();
unsigned Align = StoreNode->getAlignment();
if (Align < MemVT.getStoreSize() &&
- !allowsMisalignedMemoryAccesses(MemVT, AS, Align, nullptr)) {
+ !allowsMisalignedMemoryAccesses(
+ MemVT, AS, Align, StoreNode->getMemOperand()->getFlags(), nullptr)) {
return scalarizeVectorStore(StoreNode, DAG);
}
@@ -2900,8 +2952,6 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
return LowerCTPOP(Op, DAG);
case ISD::FCOPYSIGN:
return LowerFCOPYSIGN(Op, DAG);
- case ISD::AND:
- return LowerVectorAND(Op, DAG);
case ISD::OR:
return LowerVectorOR(Op, DAG);
case ISD::XOR:
@@ -2945,8 +2995,6 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
// Calling Convention Implementation
//===----------------------------------------------------------------------===//
-#include "AArch64GenCallingConv.inc"
-
/// Selects the correct CCAssignFn for a given CallingConvention value.
CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
bool IsVarArg) const {
@@ -3167,6 +3215,32 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
FuncInfo->getForwardedMustTailRegParms();
CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes,
CC_AArch64_AAPCS);
+
+ // Conservatively forward X8, since it might be used for aggregate return.
+ if (!CCInfo.isAllocated(AArch64::X8)) {
+ unsigned X8VReg = MF.addLiveIn(AArch64::X8, &AArch64::GPR64RegClass);
+ Forwards.push_back(ForwardedRegister(X8VReg, AArch64::X8, MVT::i64));
+ }
+ }
+ }
+
+ // On Windows, InReg pointers must be returned, so record the pointer in a
+ // virtual register at the start of the function so it can be returned in the
+ // epilogue.
+ if (IsWin64) {
+ for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
+ if (Ins[I].Flags.isInReg()) {
+ assert(!FuncInfo->getSRetReturnReg());
+
+ MVT PtrTy = getPointerTy(DAG.getDataLayout());
+ unsigned Reg =
+ MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
+ FuncInfo->setSRetReturnReg(Reg);
+
+ SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), DL, Reg, InVals[I]);
+ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Copy, Chain);
+ break;
+ }
}
}
@@ -3365,10 +3439,20 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(
// X86) but less efficient and uglier in LowerCall.
for (Function::const_arg_iterator i = CallerF.arg_begin(),
e = CallerF.arg_end();
- i != e; ++i)
+ i != e; ++i) {
if (i->hasByValAttr())
return false;
+ // On Windows, "inreg" attributes signify non-aggregate indirect returns.
+ // In this case, it is necessary to save/restore X0 in the callee. Tail
+ // call opt interferes with this. So we disable tail call opt when the
+ // caller has an argument with "inreg" attribute.
+
+ // FIXME: Check whether the callee also has an "inreg" argument.
+ if (i->hasInRegAttr())
+ return false;
+ }
+
if (getTargetMachine().Options.GuaranteedTailCallOpt)
return canGuaranteeTCO(CalleeCC) && CCMatch;
@@ -3886,6 +3970,9 @@ AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SDLoc &DL, SelectionDAG &DAG) const {
+ auto &MF = DAG.getMachineFunction();
+ auto *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
+
CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS
? RetCC_AArch64_WebKit_JS
: RetCC_AArch64_AAPCS;
@@ -3924,6 +4011,23 @@ AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
Flag = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
}
+
+ // Windows AArch64 ABIs require that for returning structs by value we copy
+ // the sret argument into X0 for the return.
+ // We saved the argument into a virtual register in the entry block,
+ // so now we copy the value out and into X0.
+ if (unsigned SRetReg = FuncInfo->getSRetReturnReg()) {
+ SDValue Val = DAG.getCopyFromReg(RetOps[0], DL, SRetReg,
+ getPointerTy(MF.getDataLayout()));
+
+ unsigned RetValReg = AArch64::X0;
+ Chain = DAG.getCopyToReg(Chain, DL, RetValReg, Val, Flag);
+ Flag = Chain.getValue(1);
+
+ RetOps.push_back(
+ DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
+ }
+
const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
const MCPhysReg *I =
TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
@@ -5197,50 +5301,20 @@ SDValue AArch64TargetLowering::LowerSPONENTRY(SDValue Op,
return DAG.getFrameIndex(FI, VT);
}
+#define GET_REGISTER_MATCHER
+#include "AArch64GenAsmMatcher.inc"
+
// FIXME? Maybe this could be a TableGen attribute on some registers and
// this table could be generated automatically from RegInfo.
unsigned AArch64TargetLowering::getRegisterByName(const char* RegName, EVT VT,
SelectionDAG &DAG) const {
- unsigned Reg = StringSwitch<unsigned>(RegName)
- .Case("sp", AArch64::SP)
- .Case("x1", AArch64::X1)
- .Case("w1", AArch64::W1)
- .Case("x2", AArch64::X2)
- .Case("w2", AArch64::W2)
- .Case("x3", AArch64::X3)
- .Case("w3", AArch64::W3)
- .Case("x4", AArch64::X4)
- .Case("w4", AArch64::W4)
- .Case("x5", AArch64::X5)
- .Case("w5", AArch64::W5)
- .Case("x6", AArch64::X6)
- .Case("w6", AArch64::W6)
- .Case("x7", AArch64::X7)
- .Case("w7", AArch64::W7)
- .Case("x18", AArch64::X18)
- .Case("w18", AArch64::W18)
- .Case("x20", AArch64::X20)
- .Case("w20", AArch64::W20)
- .Default(0);
- if (((Reg == AArch64::X1 || Reg == AArch64::W1) &&
- !Subtarget->isXRegisterReserved(1)) ||
- ((Reg == AArch64::X2 || Reg == AArch64::W2) &&
- !Subtarget->isXRegisterReserved(2)) ||
- ((Reg == AArch64::X3 || Reg == AArch64::W3) &&
- !Subtarget->isXRegisterReserved(3)) ||
- ((Reg == AArch64::X4 || Reg == AArch64::W4) &&
- !Subtarget->isXRegisterReserved(4)) ||
- ((Reg == AArch64::X5 || Reg == AArch64::W5) &&
- !Subtarget->isXRegisterReserved(5)) ||
- ((Reg == AArch64::X6 || Reg == AArch64::W6) &&
- !Subtarget->isXRegisterReserved(6)) ||
- ((Reg == AArch64::X7 || Reg == AArch64::W7) &&
- !Subtarget->isXRegisterReserved(7)) ||
- ((Reg == AArch64::X18 || Reg == AArch64::W18) &&
- !Subtarget->isXRegisterReserved(18)) ||
- ((Reg == AArch64::X20 || Reg == AArch64::W20) &&
- !Subtarget->isXRegisterReserved(20)))
- Reg = 0;
+ unsigned Reg = MatchRegisterName(RegName);
+ if (AArch64::X1 <= Reg && Reg <= AArch64::X28) {
+ const MCRegisterInfo *MRI = Subtarget->getRegisterInfo();
+ unsigned DwarfRegNum = MRI->getDwarfRegNum(Reg, false);
+ if (!Subtarget->isXRegisterReserved(DwarfRegNum))
+ Reg = 0;
+ }
if (Reg)
return Reg;
report_fatal_error(Twine("Invalid register name \""
@@ -5398,35 +5472,41 @@ bool AArch64TargetLowering::isOffsetFoldingLegal(
return false;
}
-bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
- // We can materialize #0.0 as fmov $Rd, XZR for 64-bit and 32-bit cases.
- // FIXME: We should be able to handle f128 as well with a clever lowering.
- if (Imm.isPosZero() && (VT == MVT::f64 || VT == MVT::f32 ||
- (VT == MVT::f16 && Subtarget->hasFullFP16()))) {
- LLVM_DEBUG(dbgs() << "Legal " << VT.getEVTString() << " imm value: 0\n");
- return true;
- }
-
+bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
+ bool OptForSize) const {
bool IsLegal = false;
- SmallString<128> ImmStrVal;
- Imm.toString(ImmStrVal);
-
+ // We can materialize #0.0 as fmov $Rd, XZR for 64-bit, 32-bit cases, and
+ // 16-bit case when target has full fp16 support.
+ // FIXME: We should be able to handle f128 as well with a clever lowering.
+ const APInt ImmInt = Imm.bitcastToAPInt();
if (VT == MVT::f64)
- IsLegal = AArch64_AM::getFP64Imm(Imm) != -1;
+ IsLegal = AArch64_AM::getFP64Imm(ImmInt) != -1 || Imm.isPosZero();
else if (VT == MVT::f32)
- IsLegal = AArch64_AM::getFP32Imm(Imm) != -1;
+ IsLegal = AArch64_AM::getFP32Imm(ImmInt) != -1 || Imm.isPosZero();
else if (VT == MVT::f16 && Subtarget->hasFullFP16())
- IsLegal = AArch64_AM::getFP16Imm(Imm) != -1;
-
- if (IsLegal) {
- LLVM_DEBUG(dbgs() << "Legal " << VT.getEVTString()
- << " imm value: " << ImmStrVal << "\n");
- return true;
- }
-
- LLVM_DEBUG(dbgs() << "Illegal " << VT.getEVTString()
- << " imm value: " << ImmStrVal << "\n");
- return false;
+ IsLegal = AArch64_AM::getFP16Imm(ImmInt) != -1 || Imm.isPosZero();
+ // TODO: fmov h0, w0 is also legal, however on't have an isel pattern to
+ // generate that fmov.
+
+ // If we can not materialize in immediate field for fmov, check if the
+ // value can be encoded as the immediate operand of a logical instruction.
+ // The immediate value will be created with either MOVZ, MOVN, or ORR.
+ if (!IsLegal && (VT == MVT::f64 || VT == MVT::f32)) {
+ // The cost is actually exactly the same for mov+fmov vs. adrp+ldr;
+ // however the mov+fmov sequence is always better because of the reduced
+ // cache pressure. The timings are still the same if you consider
+ // movw+movk+fmov vs. adrp+ldr (it's one instruction longer, but the
+ // movw+movk is fused). So we limit up to 2 instrdduction at most.
+ SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
+ AArch64_IMM::expandMOVImm(ImmInt.getZExtValue(), VT.getSizeInBits(),
+ Insn);
+ unsigned Limit = (OptForSize ? 1 : (Subtarget->hasFuseLiterals() ? 5 : 2));
+ IsLegal = Insn.size() <= Limit;
+ }
+
+ LLVM_DEBUG(dbgs() << (IsLegal ? "Legal " : "Illegal ") << VT.getEVTString()
+ << " imm value: "; Imm.dump(););
+ return IsLegal;
}
//===----------------------------------------------------------------------===//
@@ -6226,6 +6306,8 @@ static bool isUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
static bool isTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
unsigned NumElts = VT.getVectorNumElements();
+ if (NumElts % 2 != 0)
+ return false;
WhichResult = (M[0] == 0 ? 0 : 1);
for (unsigned i = 0; i < NumElts; i += 2) {
if ((M[i] >= 0 && (unsigned)M[i] != i + WhichResult) ||
@@ -6240,6 +6322,8 @@ static bool isTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
static bool isZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
unsigned NumElts = VT.getVectorNumElements();
+ if (NumElts % 2 != 0)
+ return false;
WhichResult = (M[0] == 0 ? 0 : 1);
unsigned Idx = WhichResult * NumElts / 2;
for (unsigned i = 0; i != NumElts; i += 2) {
@@ -6276,6 +6360,8 @@ static bool isUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
static bool isTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
unsigned NumElts = VT.getVectorNumElements();
+ if (NumElts % 2 != 0)
+ return false;
WhichResult = (M[0] == 0 ? 0 : 1);
for (unsigned i = 0; i < NumElts; i += 2) {
if ((M[i] >= 0 && (unsigned)M[i] != i + WhichResult) ||
@@ -6918,46 +7004,6 @@ static SDValue tryAdvSIMDModImmFP(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
return SDValue();
}
-SDValue AArch64TargetLowering::LowerVectorAND(SDValue Op,
- SelectionDAG &DAG) const {
- SDValue LHS = Op.getOperand(0);
- EVT VT = Op.getValueType();
-
- BuildVectorSDNode *BVN =
- dyn_cast<BuildVectorSDNode>(Op.getOperand(1).getNode());
- if (!BVN) {
- // AND commutes, so try swapping the operands.
- LHS = Op.getOperand(1);
- BVN = dyn_cast<BuildVectorSDNode>(Op.getOperand(0).getNode());
- }
- if (!BVN)
- return Op;
-
- APInt DefBits(VT.getSizeInBits(), 0);
- APInt UndefBits(VT.getSizeInBits(), 0);
- if (resolveBuildVector(BVN, DefBits, UndefBits)) {
- SDValue NewOp;
-
- // We only have BIC vector immediate instruction, which is and-not.
- DefBits = ~DefBits;
- if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::BICi, Op, DAG,
- DefBits, &LHS)) ||
- (NewOp = tryAdvSIMDModImm16(AArch64ISD::BICi, Op, DAG,
- DefBits, &LHS)))
- return NewOp;
-
- UndefBits = ~UndefBits;
- if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::BICi, Op, DAG,
- UndefBits, &LHS)) ||
- (NewOp = tryAdvSIMDModImm16(AArch64ISD::BICi, Op, DAG,
- UndefBits, &LHS)))
- return NewOp;
- }
-
- // We can always fall back to a non-immediate AND.
- return Op;
-}
-
// Specialized code to quickly find if PotentialBVec is a BuildVector that
// consists of only the same constant int value, returned in reference arg
// ConstVal
@@ -7799,8 +7845,8 @@ SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op,
// Make v4f16 (only) fcmp operations utilise vector instructions
// v8f16 support will be a litle more complicated
- if (LHS.getValueType().getVectorElementType() == MVT::f16) {
- if (!FullFP16 && LHS.getValueType().getVectorNumElements() == 4) {
+ if (!FullFP16 && LHS.getValueType().getVectorElementType() == MVT::f16) {
+ if (LHS.getValueType().getVectorNumElements() == 4) {
LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::v4f32, LHS);
RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::v4f32, RHS);
SDValue NewSetcc = DAG.getSetCC(dl, MVT::v4i16, LHS, RHS, CC);
@@ -7810,8 +7856,8 @@ SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op,
return SDValue();
}
- assert(LHS.getValueType().getVectorElementType() == MVT::f32 ||
- LHS.getValueType().getVectorElementType() == MVT::f64);
+ assert((!FullFP16 && LHS.getValueType().getVectorElementType() != MVT::f16) ||
+ LHS.getValueType().getVectorElementType() != MVT::f128);
// Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
// clean. Some of them require two branches to implement.
@@ -8255,6 +8301,110 @@ bool AArch64TargetLowering::isExtFreeImpl(const Instruction *Ext) const {
return true;
}
+/// Check if both Op1 and Op2 are shufflevector extracts of either the lower
+/// or upper half of the vector elements.
+static bool areExtractShuffleVectors(Value *Op1, Value *Op2) {
+ auto areTypesHalfed = [](Value *FullV, Value *HalfV) {
+ auto *FullVT = cast<VectorType>(FullV->getType());
+ auto *HalfVT = cast<VectorType>(HalfV->getType());
+ return FullVT->getBitWidth() == 2 * HalfVT->getBitWidth();
+ };
+
+ auto extractHalf = [](Value *FullV, Value *HalfV) {
+ auto *FullVT = cast<VectorType>(FullV->getType());
+ auto *HalfVT = cast<VectorType>(HalfV->getType());
+ return FullVT->getNumElements() == 2 * HalfVT->getNumElements();
+ };
+
+ Constant *M1, *M2;
+ Value *S1Op1, *S2Op1;
+ if (!match(Op1, m_ShuffleVector(m_Value(S1Op1), m_Undef(), m_Constant(M1))) ||
+ !match(Op2, m_ShuffleVector(m_Value(S2Op1), m_Undef(), m_Constant(M2))))
+ return false;
+
+ // Check that the operands are half as wide as the result and we extract
+ // half of the elements of the input vectors.
+ if (!areTypesHalfed(S1Op1, Op1) || !areTypesHalfed(S2Op1, Op2) ||
+ !extractHalf(S1Op1, Op1) || !extractHalf(S2Op1, Op2))
+ return false;
+
+ // Check the mask extracts either the lower or upper half of vector
+ // elements.
+ int M1Start = -1;
+ int M2Start = -1;
+ int NumElements = cast<VectorType>(Op1->getType())->getNumElements() * 2;
+ if (!ShuffleVectorInst::isExtractSubvectorMask(M1, NumElements, M1Start) ||
+ !ShuffleVectorInst::isExtractSubvectorMask(M2, NumElements, M2Start) ||
+ M1Start != M2Start || (M1Start != 0 && M2Start != (NumElements / 2)))
+ return false;
+
+ return true;
+}
+
+/// Check if Ext1 and Ext2 are extends of the same type, doubling the bitwidth
+/// of the vector elements.
+static bool areExtractExts(Value *Ext1, Value *Ext2) {
+ auto areExtDoubled = [](Instruction *Ext) {
+ return Ext->getType()->getScalarSizeInBits() ==
+ 2 * Ext->getOperand(0)->getType()->getScalarSizeInBits();
+ };
+
+ if (!match(Ext1, m_ZExtOrSExt(m_Value())) ||
+ !match(Ext2, m_ZExtOrSExt(m_Value())) ||
+ !areExtDoubled(cast<Instruction>(Ext1)) ||
+ !areExtDoubled(cast<Instruction>(Ext2)))
+ return false;
+
+ return true;
+}
+
+/// Check if sinking \p I's operands to I's basic block is profitable, because
+/// the operands can be folded into a target instruction, e.g.
+/// shufflevectors extracts and/or sext/zext can be folded into (u,s)subl(2).
+bool AArch64TargetLowering::shouldSinkOperands(
+ Instruction *I, SmallVectorImpl<Use *> &Ops) const {
+ if (!I->getType()->isVectorTy())
+ return false;
+
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::aarch64_neon_umull:
+ if (!areExtractShuffleVectors(II->getOperand(0), II->getOperand(1)))
+ return false;
+ Ops.push_back(&II->getOperandUse(0));
+ Ops.push_back(&II->getOperandUse(1));
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ switch (I->getOpcode()) {
+ case Instruction::Sub:
+ case Instruction::Add: {
+ if (!areExtractExts(I->getOperand(0), I->getOperand(1)))
+ return false;
+
+ // If the exts' operands extract either the lower or upper elements, we
+ // can sink them too.
+ auto Ext1 = cast<Instruction>(I->getOperand(0));
+ auto Ext2 = cast<Instruction>(I->getOperand(1));
+ if (areExtractShuffleVectors(Ext1, Ext2)) {
+ Ops.push_back(&Ext1->getOperandUse(0));
+ Ops.push_back(&Ext2->getOperandUse(0));
+ }
+
+ Ops.push_back(&I->getOperandUse(0));
+ Ops.push_back(&I->getOperandUse(1));
+
+ return true;
+ }
+ default:
+ return false;
+ }
+ return false;
+}
+
bool AArch64TargetLowering::hasPairedLoad(EVT LoadedType,
unsigned &RequiredAligment) const {
if (!LoadedType.isSimple() ||
@@ -8377,8 +8527,9 @@ bool AArch64TargetLowering::lowerInterleavedLoad(
// If we're generating more than one load, compute the base address of
// subsequent loads as an offset from the previous.
if (LoadCount > 0)
- BaseAddr = Builder.CreateConstGEP1_32(
- BaseAddr, VecTy->getVectorNumElements() * Factor);
+ BaseAddr =
+ Builder.CreateConstGEP1_32(VecTy->getVectorElementType(), BaseAddr,
+ VecTy->getVectorNumElements() * Factor);
CallInst *LdN = Builder.CreateCall(
LdNFunc, Builder.CreateBitCast(BaseAddr, PtrTy), "ldN");
@@ -8540,7 +8691,8 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
// If we generating more than one store, we compute the base address of
// subsequent stores as an offset from the previous.
if (StoreCount > 0)
- BaseAddr = Builder.CreateConstGEP1_32(BaseAddr, LaneLen * Factor);
+ BaseAddr = Builder.CreateConstGEP1_32(SubVecTy->getVectorElementType(),
+ BaseAddr, LaneLen * Factor);
Ops.push_back(Builder.CreateBitCast(BaseAddr, PtrTy));
Builder.CreateCall(StNFunc, Ops);
@@ -8554,13 +8706,12 @@ static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign,
(DstAlign == 0 || DstAlign % AlignCheck == 0));
}
-EVT AArch64TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
- unsigned SrcAlign, bool IsMemset,
- bool ZeroMemset,
- bool MemcpyStrSrc,
- MachineFunction &MF) const {
- const Function &F = MF.getFunction();
- bool CanImplicitFloat = !F.hasFnAttribute(Attribute::NoImplicitFloat);
+EVT AArch64TargetLowering::getOptimalMemOpType(
+ uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset,
+ bool ZeroMemset, bool MemcpyStrSrc,
+ const AttributeList &FuncAttributes) const {
+ bool CanImplicitFloat =
+ !FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat);
bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;
// Only use AdvSIMD to implement memset of 32-byte and above. It would have
@@ -8571,7 +8722,9 @@ EVT AArch64TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
if (memOpAlign(SrcAlign, DstAlign, AlignCheck))
return true;
bool Fast;
- return allowsMisalignedMemoryAccesses(VT, 0, 1, &Fast) && Fast;
+ return allowsMisalignedMemoryAccesses(VT, 0, 1, MachineMemOperand::MONone,
+ &Fast) &&
+ Fast;
};
if (CanUseNEON && IsMemset && !IsSmallMemset &&
@@ -9061,6 +9214,9 @@ static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG,
if (!Subtarget->hasNEON())
return SDValue();
+ if (!N->getValueType(0).isSimple())
+ return SDValue();
+
SDValue Op = N->getOperand(0);
if (!Op.getValueType().isVector() || !Op.getValueType().isSimple() ||
Op.getOpcode() != ISD::FMUL)
@@ -9323,6 +9479,46 @@ static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
return SDValue();
}
+static SDValue performANDCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ SelectionDAG &DAG = DCI.DAG;
+ SDValue LHS = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ if (!VT.isVector() || !DAG.getTargetLoweringInfo().isTypeLegal(VT))
+ return SDValue();
+
+ BuildVectorSDNode *BVN =
+ dyn_cast<BuildVectorSDNode>(N->getOperand(1).getNode());
+ if (!BVN)
+ return SDValue();
+
+ // AND does not accept an immediate, so check if we can use a BIC immediate
+ // instruction instead. We do this here instead of using a (and x, (mvni imm))
+ // pattern in isel, because some immediates may be lowered to the preferred
+ // (and x, (movi imm)) form, even though an mvni representation also exists.
+ APInt DefBits(VT.getSizeInBits(), 0);
+ APInt UndefBits(VT.getSizeInBits(), 0);
+ if (resolveBuildVector(BVN, DefBits, UndefBits)) {
+ SDValue NewOp;
+
+ DefBits = ~DefBits;
+ if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::BICi, SDValue(N, 0), DAG,
+ DefBits, &LHS)) ||
+ (NewOp = tryAdvSIMDModImm16(AArch64ISD::BICi, SDValue(N, 0), DAG,
+ DefBits, &LHS)))
+ return NewOp;
+
+ UndefBits = ~UndefBits;
+ if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::BICi, SDValue(N, 0), DAG,
+ UndefBits, &LHS)) ||
+ (NewOp = tryAdvSIMDModImm16(AArch64ISD::BICi, SDValue(N, 0), DAG,
+ UndefBits, &LHS)))
+ return NewOp;
+ }
+
+ return SDValue();
+}
+
static SDValue performSRLCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
SelectionDAG &DAG = DCI.DAG;
@@ -9598,12 +9794,13 @@ static SDValue tryExtendDUPToExtractHigh(SDValue N, SelectionDAG &DAG) {
DAG.getConstant(NumElems, dl, MVT::i64));
}
-static bool isEssentiallyExtractSubvector(SDValue N) {
- if (N.getOpcode() == ISD::EXTRACT_SUBVECTOR)
- return true;
-
- return N.getOpcode() == ISD::BITCAST &&
- N.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR;
+static bool isEssentiallyExtractHighSubvector(SDValue N) {
+ if (N.getOpcode() == ISD::BITCAST)
+ N = N.getOperand(0);
+ if (N.getOpcode() != ISD::EXTRACT_SUBVECTOR)
+ return false;
+ return cast<ConstantSDNode>(N.getOperand(1))->getAPIntValue() ==
+ N.getOperand(0).getValueType().getVectorNumElements() / 2;
}
/// Helper structure to keep track of ISD::SET_CC operands.
@@ -9770,13 +9967,13 @@ static SDValue performAddSubLongCombine(SDNode *N,
// It's not worth doing if at least one of the inputs isn't already an
// extract, but we don't know which it'll be so we have to try both.
- if (isEssentiallyExtractSubvector(LHS.getOperand(0))) {
+ if (isEssentiallyExtractHighSubvector(LHS.getOperand(0))) {
RHS = tryExtendDUPToExtractHigh(RHS.getOperand(0), DAG);
if (!RHS.getNode())
return SDValue();
RHS = DAG.getNode(ExtType, SDLoc(N), VT, RHS);
- } else if (isEssentiallyExtractSubvector(RHS.getOperand(0))) {
+ } else if (isEssentiallyExtractHighSubvector(RHS.getOperand(0))) {
LHS = tryExtendDUPToExtractHigh(LHS.getOperand(0), DAG);
if (!LHS.getNode())
return SDValue();
@@ -9809,11 +10006,11 @@ static SDValue tryCombineLongOpWithDup(unsigned IID, SDNode *N,
// Either node could be a DUP, but it's not worth doing both of them (you'd
// just as well use the non-high version) so look for a corresponding extract
// operation on the other "wing".
- if (isEssentiallyExtractSubvector(LHS)) {
+ if (isEssentiallyExtractHighSubvector(LHS)) {
RHS = tryExtendDUPToExtractHigh(RHS, DAG);
if (!RHS.getNode())
return SDValue();
- } else if (isEssentiallyExtractSubvector(RHS)) {
+ } else if (isEssentiallyExtractHighSubvector(RHS)) {
LHS = tryExtendDUPToExtractHigh(LHS, DAG);
if (!LHS.getNode())
return SDValue();
@@ -10261,7 +10458,7 @@ static SDValue splitStores(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
return SDValue();
// Don't split at -Oz.
- if (DAG.getMachineFunction().getFunction().optForMinSize())
+ if (DAG.getMachineFunction().getFunction().hasMinSize())
return SDValue();
// Don't split v2i64 vectors. Memcpy lowering produces those and splitting
@@ -10917,6 +11114,12 @@ static SDValue getTestBitOperand(SDValue Op, unsigned &Bit, bool &Invert,
return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
}
+ // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
+ if (Op->getOpcode() == ISD::ANY_EXTEND &&
+ Bit < Op->getOperand(0).getValueSizeInBits()) {
+ return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
+ }
+
if (Op->getNumOperands() != 2)
return Op;
@@ -11172,6 +11375,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
return performFDivCombine(N, DAG, DCI, Subtarget);
case ISD::OR:
return performORCombine(N, DCI, Subtarget);
+ case ISD::AND:
+ return performANDCombine(N, DCI);
case ISD::SRL:
return performSRLCombine(N, DCI);
case ISD::INTRINSIC_WO_CHAIN:
@@ -11573,6 +11778,9 @@ AArch64TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
// For the real atomic operations, we have ldxr/stxr up to 128 bits,
TargetLowering::AtomicExpansionKind
AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
+ if (AI->isFloatingPointOperation())
+ return AtomicExpansionKind::CmpXChg;
+
unsigned Size = AI->getType()->getPrimitiveSizeInBits();
if (Size > 128) return AtomicExpansionKind::None;
// Nand not supported in LSE.
@@ -11627,9 +11835,13 @@ Value *AArch64TargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
IsAcquire ? Intrinsic::aarch64_ldaxr : Intrinsic::aarch64_ldxr;
Function *Ldxr = Intrinsic::getDeclaration(M, Int, Tys);
- return Builder.CreateTruncOrBitCast(
- Builder.CreateCall(Ldxr, Addr),
- cast<PointerType>(Addr->getType())->getElementType());
+ Type *EltTy = cast<PointerType>(Addr->getType())->getElementType();
+
+ const DataLayout &DL = M->getDataLayout();
+ IntegerType *IntEltTy = Builder.getIntNTy(DL.getTypeSizeInBits(EltTy));
+ Value *Trunc = Builder.CreateTrunc(Builder.CreateCall(Ldxr, Addr), IntEltTy);
+
+ return Builder.CreateBitCast(Trunc, EltTy);
}
void AArch64TargetLowering::emitAtomicCmpXchgNoStoreLLBalance(
@@ -11664,6 +11876,10 @@ Value *AArch64TargetLowering::emitStoreConditional(IRBuilder<> &Builder,
Type *Tys[] = { Addr->getType() };
Function *Stxr = Intrinsic::getDeclaration(M, Int, Tys);
+ const DataLayout &DL = M->getDataLayout();
+ IntegerType *IntValTy = Builder.getIntNTy(DL.getTypeSizeInBits(Val->getType()));
+ Val = Builder.CreateBitCast(Val, IntValTy);
+
return Builder.CreateCall(Stxr,
{Builder.CreateZExtOrBitCast(
Val, Stxr->getFunctionType()->getParamType(0)),
@@ -11685,8 +11901,9 @@ static Value *UseTlsOffset(IRBuilder<> &IRB, unsigned Offset) {
Function *ThreadPointerFunc =
Intrinsic::getDeclaration(M, Intrinsic::thread_pointer);
return IRB.CreatePointerCast(
- IRB.CreateConstGEP1_32(IRB.CreateCall(ThreadPointerFunc), Offset),
- Type::getInt8PtrTy(IRB.getContext())->getPointerTo(0));
+ IRB.CreateConstGEP1_32(IRB.getInt8Ty(), IRB.CreateCall(ThreadPointerFunc),
+ Offset),
+ IRB.getInt8PtrTy()->getPointerTo(0));
}
Value *AArch64TargetLowering::getIRStackGuard(IRBuilder<> &IRB) const {
@@ -11712,12 +11929,13 @@ void AArch64TargetLowering::insertSSPDeclarations(Module &M) const {
Type::getInt8PtrTy(M.getContext()));
// MSVC CRT has a function to validate security cookie.
- auto *SecurityCheckCookie = cast<Function>(
- M.getOrInsertFunction("__security_check_cookie",
- Type::getVoidTy(M.getContext()),
- Type::getInt8PtrTy(M.getContext())));
- SecurityCheckCookie->setCallingConv(CallingConv::Win64);
- SecurityCheckCookie->addAttribute(1, Attribute::AttrKind::InReg);
+ FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
+ "__security_check_cookie", Type::getVoidTy(M.getContext()),
+ Type::getInt8PtrTy(M.getContext()));
+ if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) {
+ F->setCallingConv(CallingConv::Win64);
+ F->addAttribute(1, Attribute::AttrKind::InReg);
+ }
return;
}
TargetLowering::insertSSPDeclarations(M);
@@ -11730,7 +11948,7 @@ Value *AArch64TargetLowering::getSDagStackGuard(const Module &M) const {
return TargetLowering::getSDagStackGuard(M);
}
-Value *AArch64TargetLowering::getSSPStackGuardCheck(const Module &M) const {
+Function *AArch64TargetLowering::getSSPStackGuardCheck(const Module &M) const {
// MSVC CRT has a function to validate security cookie.
if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
return M.getFunction("__security_check_cookie");
@@ -11825,6 +12043,11 @@ bool AArch64TargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {
return OptSize && !VT.isVector();
}
+bool AArch64TargetLowering::preferIncOfAddToSubOfNot(EVT VT) const {
+ // We want inc-of-add for scalars and sub-of-not for vectors.
+ return VT.isScalarInteger();
+}
+
bool AArch64TargetLowering::enableAggressiveFMAFusion(EVT VT) const {
return Subtarget->hasAggressiveFMA() && VT.isFloatingPoint();
}
diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h
index ffc4cc3ef534..4421c31f65c9 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/lib/Target/AArch64/AArch64ISelLowering.h
@@ -1,9 +1,8 @@
//==-- AArch64ISelLowering.h - AArch64 DAG Lowering Interface ----*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -215,7 +214,13 @@ enum NodeType : unsigned {
LD4LANEpost,
ST2LANEpost,
ST3LANEpost,
- ST4LANEpost
+ ST4LANEpost,
+
+ STG,
+ STZG,
+ ST2G,
+ STZ2G
+
};
} // end namespace AArch64ISD
@@ -263,9 +268,10 @@ public:
/// Returns true if the target allows unaligned memory accesses of the
/// specified type.
- bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace = 0,
- unsigned Align = 1,
- bool *Fast = nullptr) const override;
+ bool allowsMisalignedMemoryAccesses(
+ EVT VT, unsigned AddrSpace = 0, unsigned Align = 1,
+ MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
+ bool *Fast = nullptr) const override;
/// Provide custom lowering hooks for some operations.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
@@ -287,7 +293,8 @@ public:
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
- bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
+ bool isFPImmLegal(const APFloat &Imm, EVT VT,
+ bool ForCodeSize) const override;
/// Return true if the given shuffle mask can be codegen'd directly, or if it
/// should be stack expanded.
@@ -328,6 +335,9 @@ public:
bool isZExtFree(EVT VT1, EVT VT2) const override;
bool isZExtFree(SDValue Val, EVT VT2) const override;
+ bool shouldSinkOperands(Instruction *I,
+ SmallVectorImpl<Use *> &Ops) const override;
+
bool hasPairedLoad(EVT LoadedType, unsigned &RequiredAligment) const override;
unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
@@ -346,7 +356,7 @@ public:
EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
- MachineFunction &MF) const override;
+ const AttributeList &FuncAttributes) const override;
/// Return true if the addressing mode represented by AM is legal for this
/// target, for a load/store of the specified type.
@@ -409,7 +419,7 @@ public:
void insertSSPDeclarations(Module &M) const override;
Value *getSDagStackGuard(const Module &M) const override;
- Value *getSSPStackGuardCheck(const Module &M) const override;
+ Function *getSSPStackGuardCheck(const Module &M) const override;
/// If the target has a standard location for the unsafe stack pointer,
/// returns the address of that location. Otherwise, returns nullptr.
@@ -470,6 +480,12 @@ public:
return VT.getSizeInBits() >= 64; // vector 'bic'
}
+ bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override {
+ if (DAG.getMachineFunction().getFunction().hasMinSize())
+ return false;
+ return true;
+ }
+
bool shouldTransformSignedTruncationCheck(EVT XVT,
unsigned KeptBits) const override {
// For vectors, we don't have a preference..
@@ -487,6 +503,8 @@ public:
return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
}
+ bool preferIncOfAddToSubOfNot(EVT VT) const override;
+
bool hasBitPreservingFPLogic(EVT VT) const override {
// FIXME: Is this always true? It should be true for vectors at least.
return VT == MVT::f32 || VT == MVT::f64;
@@ -648,9 +666,9 @@ private:
SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerVectorAND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/AArch64/AArch64InstrAtomics.td b/lib/Target/AArch64/AArch64InstrAtomics.td
index 35cd7735ceb7..e22cb44d81ae 100644
--- a/lib/Target/AArch64/AArch64InstrAtomics.td
+++ b/lib/Target/AArch64/AArch64InstrAtomics.td
@@ -1,9 +1,8 @@
//=- AArch64InstrAtomics.td - AArch64 Atomic codegen support -*- tablegen -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td
index 9061ed4f9f54..d619137b55c5 100644
--- a/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/lib/Target/AArch64/AArch64InstrFormats.td
@@ -1,9 +1,8 @@
//===- AArch64InstrFormats.td - AArch64 Instruction Formats --*- tblgen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -356,6 +355,9 @@ def am_indexed7s32 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S32", []>;
def am_indexed7s64 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S64", []>;
def am_indexed7s128 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S128", []>;
+def am_indexedu6s128 : ComplexPattern<i64, 2, "SelectAddrModeIndexedU6S128", []>;
+def am_indexeds9s128 : ComplexPattern<i64, 2, "SelectAddrModeIndexedS9S128", []>;
+
// uimm5sN predicate - True if the immediate is a multiple of N in the range
// [0 * N, 32 * N].
def UImm5s2Operand : UImmScaledMemoryIndexed<5, 2>;
@@ -1818,6 +1820,14 @@ multiclass Shift<bits<2> shift_type, string asm, SDNode OpNode> {
def : Pat<(i32 (OpNode GPR32:$Rn, (i64 (sext GPR32:$Rm)))),
(!cast<Instruction>(NAME # "Wr") GPR32:$Rn, GPR32:$Rm)>;
+
+ def : Pat<(i64 (OpNode GPR64:$Rn, (i64 (sext GPR32:$Rm)))),
+ (!cast<Instruction>(NAME # "Xr") GPR64:$Rn,
+ (SUBREG_TO_REG (i32 0), GPR32:$Rm, sub_32))>;
+
+ def : Pat<(i64 (OpNode GPR64:$Rn, (i64 (zext GPR32:$Rm)))),
+ (!cast<Instruction>(NAME # "Xr") GPR64:$Rn,
+ (SUBREG_TO_REG (i32 0), GPR32:$Rm, sub_32))>;
}
class ShiftAlias<string asm, Instruction inst, RegisterClass regtype>
@@ -2332,7 +2342,7 @@ class AddSubG<bit isSub, string asm_inst, SDPatternOperator OpNode>
}
class SUBP<bit setsFlags, string asm_instr, SDPatternOperator OpNode>
- : BaseTwoOperand<0b0000, GPR64, asm_instr, null_frag, GPR64sp, GPR64sp> {
+ : BaseTwoOperand<0b0000, GPR64, asm_instr, OpNode, GPR64sp, GPR64sp> {
let Inst{31} = 1;
let Inst{29} = setsFlags;
}
@@ -4017,7 +4027,7 @@ class BaseMemTag<bits<2> opc1, bits<2> opc2, string asm_insn,
class MemTagVector<bit Load, string asm_insn, string asm_opnds,
dag oops, dag iops>
: BaseMemTag<{0b1, Load}, 0b00, asm_insn, asm_opnds,
- "$Rn = $wback,@earlyclobber $wback", oops, iops> {
+ "", oops, iops> {
bits<5> Rt;
let Inst{20-12} = 0b000000000;
@@ -4027,8 +4037,9 @@ class MemTagVector<bit Load, string asm_insn, string asm_opnds,
}
class MemTagLoad<string asm_insn, string asm_opnds>
- : BaseMemTag<0b01, 0b00, asm_insn, asm_opnds, "", (outs GPR64:$Rt),
- (ins GPR64sp:$Rn, simm9s16:$offset)> {
+ : BaseMemTag<0b01, 0b00, asm_insn, asm_opnds, "$Rt = $wback",
+ (outs GPR64:$wback),
+ (ins GPR64:$Rt, GPR64sp:$Rn, simm9s16:$offset)> {
bits<5> Rt;
bits<9> offset;
@@ -4045,29 +4056,28 @@ class BaseMemTagStore<bits<2> opc1, bits<2> opc2, string asm_insn,
bits<9> offset;
let Inst{20-12} = offset;
- let Inst{4-0} = 0b11111;
- let Unpredictable{4-0} = 0b11111;
+ let Inst{4-0} = Rt;
let mayStore = 1;
}
multiclass MemTagStore<bits<2> opc1, string insn> {
def Offset :
- BaseMemTagStore<opc1, 0b10, insn, "\t[$Rn, $offset]", "",
- (outs), (ins GPR64sp:$Rn, simm9s16:$offset)>;
+ BaseMemTagStore<opc1, 0b10, insn, "\t$Rt, [$Rn, $offset]", "",
+ (outs), (ins GPR64sp:$Rt, GPR64sp:$Rn, simm9s16:$offset)>;
def PreIndex :
- BaseMemTagStore<opc1, 0b11, insn, "\t[$Rn, $offset]!",
- "$Rn = $wback,@earlyclobber $wback",
+ BaseMemTagStore<opc1, 0b11, insn, "\t$Rt, [$Rn, $offset]!",
+ "$Rn = $wback",
(outs GPR64sp:$wback),
- (ins GPR64sp:$Rn, simm9s16:$offset)>;
+ (ins GPR64sp:$Rt, GPR64sp:$Rn, simm9s16:$offset)>;
def PostIndex :
- BaseMemTagStore<opc1, 0b01, insn, "\t[$Rn], $offset",
- "$Rn = $wback,@earlyclobber $wback",
+ BaseMemTagStore<opc1, 0b01, insn, "\t$Rt, [$Rn], $offset",
+ "$Rn = $wback",
(outs GPR64sp:$wback),
- (ins GPR64sp:$Rn, simm9s16:$offset)>;
+ (ins GPR64sp:$Rt, GPR64sp:$Rn, simm9s16:$offset)>;
- def : InstAlias<insn # "\t[$Rn]",
- (!cast<Instruction>(NAME # "Offset") GPR64sp:$Rn, 0)>;
+ def : InstAlias<insn # "\t$Rt, [$Rn]",
+ (!cast<Instruction>(NAME # "Offset") GPR64sp:$Rt, GPR64sp:$Rn, 0)>;
}
//---
diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp
index ada067888572..215e96a82d0e 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -1,9 +1,8 @@
//===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -77,8 +76,11 @@ unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
const MachineFunction *MF = MBB.getParent();
const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
- if (MI.getOpcode() == AArch64::INLINEASM)
- return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
+ {
+ auto Op = MI.getOpcode();
+ if (Op == AArch64::INLINEASM || Op == AArch64::INLINEASM_BR)
+ return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
+ }
// FIXME: We currently only handle pseudoinstructions that don't get expanded
// before the assembly printer.
@@ -928,9 +930,9 @@ bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
}
bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint(
- MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA) const {
+ const MachineInstr &MIa, const MachineInstr &MIb, AliasAnalysis *AA) const {
const TargetRegisterInfo *TRI = &getRegisterInfo();
- MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
+ const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
int64_t OffsetA = 0, OffsetB = 0;
unsigned WidthA = 0, WidthB = 0;
@@ -1715,6 +1717,69 @@ bool AArch64InstrInfo::isUnscaledLdSt(unsigned Opc) {
}
}
+Optional<unsigned> AArch64InstrInfo::getUnscaledLdSt(unsigned Opc) {
+ switch (Opc) {
+ default: return {};
+ case AArch64::PRFMui: return AArch64::PRFUMi;
+ case AArch64::LDRXui: return AArch64::LDURXi;
+ case AArch64::LDRWui: return AArch64::LDURWi;
+ case AArch64::LDRBui: return AArch64::LDURBi;
+ case AArch64::LDRHui: return AArch64::LDURHi;
+ case AArch64::LDRSui: return AArch64::LDURSi;
+ case AArch64::LDRDui: return AArch64::LDURDi;
+ case AArch64::LDRQui: return AArch64::LDURQi;
+ case AArch64::LDRBBui: return AArch64::LDURBBi;
+ case AArch64::LDRHHui: return AArch64::LDURHHi;
+ case AArch64::LDRSBXui: return AArch64::LDURSBXi;
+ case AArch64::LDRSBWui: return AArch64::LDURSBWi;
+ case AArch64::LDRSHXui: return AArch64::LDURSHXi;
+ case AArch64::LDRSHWui: return AArch64::LDURSHWi;
+ case AArch64::LDRSWui: return AArch64::LDURSWi;
+ case AArch64::STRXui: return AArch64::STURXi;
+ case AArch64::STRWui: return AArch64::STURWi;
+ case AArch64::STRBui: return AArch64::STURBi;
+ case AArch64::STRHui: return AArch64::STURHi;
+ case AArch64::STRSui: return AArch64::STURSi;
+ case AArch64::STRDui: return AArch64::STURDi;
+ case AArch64::STRQui: return AArch64::STURQi;
+ case AArch64::STRBBui: return AArch64::STURBBi;
+ case AArch64::STRHHui: return AArch64::STURHHi;
+ }
+}
+
+unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) {
+ switch (Opc) {
+ default:
+ return 2;
+ case AArch64::LDPXi:
+ case AArch64::LDPDi:
+ case AArch64::STPXi:
+ case AArch64::STPDi:
+ case AArch64::LDNPXi:
+ case AArch64::LDNPDi:
+ case AArch64::STNPXi:
+ case AArch64::STNPDi:
+ case AArch64::LDPQi:
+ case AArch64::STPQi:
+ case AArch64::LDNPQi:
+ case AArch64::STNPQi:
+ case AArch64::LDPWi:
+ case AArch64::LDPSi:
+ case AArch64::STPWi:
+ case AArch64::STPSi:
+ case AArch64::LDNPWi:
+ case AArch64::LDNPSi:
+ case AArch64::STNPWi:
+ case AArch64::STNPSi:
+ case AArch64::LDG:
+ case AArch64::STGPi:
+ return 3;
+ case AArch64::ADDG:
+ case AArch64::STGOffset:
+ return 2;
+ }
+}
+
bool AArch64InstrInfo::isPairableLdStInst(const MachineInstr &MI) {
switch (MI.getOpcode()) {
default:
@@ -1837,7 +1902,7 @@ unsigned AArch64InstrInfo::convertToFlagSettingOpc(unsigned Opc,
// Is this a candidate for ld/st merging or pairing? For example, we don't
// touch volatiles or load/stores that have a hint to avoid pair formation.
-bool AArch64InstrInfo::isCandidateToMergeOrPair(MachineInstr &MI) const {
+bool AArch64InstrInfo::isCandidateToMergeOrPair(const MachineInstr &MI) const {
// If this is a volatile load/store, don't mess with it.
if (MI.hasOrderedMemoryRef())
return false;
@@ -1879,8 +1944,8 @@ bool AArch64InstrInfo::isCandidateToMergeOrPair(MachineInstr &MI) const {
return true;
}
-bool AArch64InstrInfo::getMemOperandWithOffset(MachineInstr &LdSt,
- MachineOperand *&BaseOp,
+bool AArch64InstrInfo::getMemOperandWithOffset(const MachineInstr &LdSt,
+ const MachineOperand *&BaseOp,
int64_t &Offset,
const TargetRegisterInfo *TRI) const {
unsigned Width;
@@ -1888,7 +1953,7 @@ bool AArch64InstrInfo::getMemOperandWithOffset(MachineInstr &LdSt,
}
bool AArch64InstrInfo::getMemOperandWithOffsetWidth(
- MachineInstr &LdSt, MachineOperand *&BaseOp, int64_t &Offset,
+ const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
unsigned &Width, const TargetRegisterInfo *TRI) const {
assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
// Handle only loads/stores with base register followed by immediate offset.
@@ -1944,7 +2009,7 @@ AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const {
bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
unsigned &Width, int64_t &MinOffset,
- int64_t &MaxOffset) const {
+ int64_t &MaxOffset) {
switch (Opcode) {
// Not a memory operation or something we want to handle.
default:
@@ -1965,6 +2030,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
MinOffset = -256;
MaxOffset = 255;
break;
+ case AArch64::PRFUMi:
case AArch64::LDURXi:
case AArch64::LDURDi:
case AArch64::STURXi:
@@ -2034,6 +2100,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
MinOffset = -64;
MaxOffset = 63;
break;
+ case AArch64::PRFMui:
case AArch64::LDRXui:
case AArch64::LDRDui:
case AArch64::STRXui:
@@ -2066,6 +2133,8 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
break;
case AArch64::LDRHui:
case AArch64::LDRHHui:
+ case AArch64::LDRSHWui:
+ case AArch64::LDRSHXui:
case AArch64::STRHui:
case AArch64::STRHHui:
Scale = Width = 2;
@@ -2074,12 +2143,40 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
break;
case AArch64::LDRBui:
case AArch64::LDRBBui:
+ case AArch64::LDRSBWui:
+ case AArch64::LDRSBXui:
case AArch64::STRBui:
case AArch64::STRBBui:
Scale = Width = 1;
MinOffset = 0;
MaxOffset = 4095;
break;
+ case AArch64::ADDG:
+ case AArch64::TAGPstack:
+ Scale = 16;
+ Width = 0;
+ MinOffset = 0;
+ MaxOffset = 63;
+ break;
+ case AArch64::LDG:
+ case AArch64::STGOffset:
+ case AArch64::STZGOffset:
+ Scale = Width = 16;
+ MinOffset = -256;
+ MaxOffset = 255;
+ break;
+ case AArch64::ST2GOffset:
+ case AArch64::STZ2GOffset:
+ Scale = 16;
+ Width = 32;
+ MinOffset = -256;
+ MaxOffset = 255;
+ break;
+ case AArch64::STGPi:
+ Scale = Width = 16;
+ MinOffset = -64;
+ MaxOffset = 63;
+ break;
}
return true;
@@ -2181,11 +2278,11 @@ static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
/// Detect opportunities for ldp/stp formation.
///
/// Only called for LdSt for which getMemOperandWithOffset returns true.
-bool AArch64InstrInfo::shouldClusterMemOps(MachineOperand &BaseOp1,
- MachineOperand &BaseOp2,
+bool AArch64InstrInfo::shouldClusterMemOps(const MachineOperand &BaseOp1,
+ const MachineOperand &BaseOp2,
unsigned NumLoads) const {
- MachineInstr &FirstLdSt = *BaseOp1.getParent();
- MachineInstr &SecondLdSt = *BaseOp2.getParent();
+ const MachineInstr &FirstLdSt = *BaseOp1.getParent();
+ const MachineInstr &SecondLdSt = *BaseOp2.getParent();
if (BaseOp1.getType() != BaseOp2.getType())
return false;
@@ -2292,6 +2389,31 @@ void AArch64InstrInfo::copyPhysRegTuple(MachineBasicBlock &MBB,
}
}
+void AArch64InstrInfo::copyGPRRegTuple(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ DebugLoc DL, unsigned DestReg,
+ unsigned SrcReg, bool KillSrc,
+ unsigned Opcode, unsigned ZeroReg,
+ llvm::ArrayRef<unsigned> Indices) const {
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ unsigned NumRegs = Indices.size();
+
+#ifndef NDEBUG
+ uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
+ uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
+ assert(DestEncoding % NumRegs == 0 && SrcEncoding % NumRegs == 0 &&
+ "GPR reg sequences should not be able to overlap");
+#endif
+
+ for (unsigned SubReg = 0; SubReg != NumRegs; ++SubReg) {
+ const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
+ AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
+ MIB.addReg(ZeroReg);
+ AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
+ MIB.addImm(0);
+ }
+}
+
void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
const DebugLoc &DL, unsigned DestReg,
@@ -2431,6 +2553,22 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
return;
}
+ if (AArch64::XSeqPairsClassRegClass.contains(DestReg) &&
+ AArch64::XSeqPairsClassRegClass.contains(SrcReg)) {
+ static const unsigned Indices[] = {AArch64::sube64, AArch64::subo64};
+ copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRXrs,
+ AArch64::XZR, Indices);
+ return;
+ }
+
+ if (AArch64::WSeqPairsClassRegClass.contains(DestReg) &&
+ AArch64::WSeqPairsClassRegClass.contains(SrcReg)) {
+ static const unsigned Indices[] = {AArch64::sube32, AArch64::subo32};
+ copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRWrs,
+ AArch64::WZR, Indices);
+ return;
+ }
+
if (AArch64::FPR128RegClass.contains(DestReg) &&
AArch64::FPR128RegClass.contains(SrcReg)) {
if (Subtarget.hasNEON()) {
@@ -2839,7 +2977,7 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB,
unsigned DestReg, unsigned SrcReg, int Offset,
const TargetInstrInfo *TII,
MachineInstr::MIFlag Flag, bool SetNZCV,
- bool NeedsWinCFI) {
+ bool NeedsWinCFI, bool *HasWinCFI) {
if (DestReg == SrcReg && Offset == 0)
return;
@@ -2884,10 +3022,13 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB,
.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftSize))
.setMIFlag(Flag);
- if (NeedsWinCFI && SrcReg == AArch64::SP && DestReg == AArch64::SP)
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
- .addImm(ThisVal)
- .setMIFlag(Flag);
+ if (NeedsWinCFI && SrcReg == AArch64::SP && DestReg == AArch64::SP) {
+ if (HasWinCFI)
+ *HasWinCFI = true;
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
+ .addImm(ThisVal)
+ .setMIFlag(Flag);
+ }
SrcReg = DestReg;
Offset -= ThisVal;
@@ -2903,6 +3044,8 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB,
if (NeedsWinCFI) {
if ((DestReg == AArch64::FP && SrcReg == AArch64::SP) ||
(SrcReg == AArch64::FP && DestReg == AArch64::SP)) {
+ if (HasWinCFI)
+ *HasWinCFI = true;
if (Offset == 0)
BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_SetFP)).
setMIFlag(Flag);
@@ -2910,6 +3053,8 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB,
BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AddFP)).
addImm(Offset).setMIFlag(Flag);
} else if (DestReg == AArch64::SP) {
+ if (HasWinCFI)
+ *HasWinCFI = true;
BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc)).
addImm(Offset).setMIFlag(Flag);
}
@@ -2919,7 +3064,7 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB,
MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
MachineBasicBlock::iterator InsertPt, int FrameIndex,
- LiveIntervals *LIS) const {
+ LiveIntervals *LIS, VirtRegMap *VRM) const {
// This is a bit of a hack. Consider this instruction:
//
// %0 = COPY %sp; GPR64all:%0
@@ -3102,11 +3247,6 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset,
bool *OutUseUnscaledOp,
unsigned *OutUnscaledOp,
int *EmittableOffset) {
- int Scale = 1;
- bool IsSigned = false;
- // The ImmIdx should be changed case by case if it is not 2.
- unsigned ImmIdx = 2;
- unsigned UnscaledOp = 0;
// Set output values in case of early exit.
if (EmittableOffset)
*EmittableOffset = 0;
@@ -3114,10 +3254,12 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset,
*OutUseUnscaledOp = false;
if (OutUnscaledOp)
*OutUnscaledOp = 0;
+
+ // Exit early for structured vector spills/fills as they can't take an
+ // immediate offset.
switch (MI.getOpcode()) {
default:
- llvm_unreachable("unhandled opcode in rewriteAArch64FrameIndex");
- // Vector spills/fills can't take an immediate offset.
+ break;
case AArch64::LD1Twov2d:
case AArch64::LD1Threev2d:
case AArch64::LD1Fourv2d:
@@ -3130,208 +3272,53 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset,
case AArch64::ST1Twov1d:
case AArch64::ST1Threev1d:
case AArch64::ST1Fourv1d:
+ case AArch64::IRG:
+ case AArch64::IRGstack:
return AArch64FrameOffsetCannotUpdate;
- case AArch64::PRFMui:
- Scale = 8;
- UnscaledOp = AArch64::PRFUMi;
- break;
- case AArch64::LDRXui:
- Scale = 8;
- UnscaledOp = AArch64::LDURXi;
- break;
- case AArch64::LDRWui:
- Scale = 4;
- UnscaledOp = AArch64::LDURWi;
- break;
- case AArch64::LDRBui:
- Scale = 1;
- UnscaledOp = AArch64::LDURBi;
- break;
- case AArch64::LDRHui:
- Scale = 2;
- UnscaledOp = AArch64::LDURHi;
- break;
- case AArch64::LDRSui:
- Scale = 4;
- UnscaledOp = AArch64::LDURSi;
- break;
- case AArch64::LDRDui:
- Scale = 8;
- UnscaledOp = AArch64::LDURDi;
- break;
- case AArch64::LDRQui:
- Scale = 16;
- UnscaledOp = AArch64::LDURQi;
- break;
- case AArch64::LDRBBui:
- Scale = 1;
- UnscaledOp = AArch64::LDURBBi;
- break;
- case AArch64::LDRHHui:
- Scale = 2;
- UnscaledOp = AArch64::LDURHHi;
- break;
- case AArch64::LDRSBXui:
- Scale = 1;
- UnscaledOp = AArch64::LDURSBXi;
- break;
- case AArch64::LDRSBWui:
- Scale = 1;
- UnscaledOp = AArch64::LDURSBWi;
- break;
- case AArch64::LDRSHXui:
- Scale = 2;
- UnscaledOp = AArch64::LDURSHXi;
- break;
- case AArch64::LDRSHWui:
- Scale = 2;
- UnscaledOp = AArch64::LDURSHWi;
- break;
- case AArch64::LDRSWui:
- Scale = 4;
- UnscaledOp = AArch64::LDURSWi;
- break;
-
- case AArch64::STRXui:
- Scale = 8;
- UnscaledOp = AArch64::STURXi;
- break;
- case AArch64::STRWui:
- Scale = 4;
- UnscaledOp = AArch64::STURWi;
- break;
- case AArch64::STRBui:
- Scale = 1;
- UnscaledOp = AArch64::STURBi;
- break;
- case AArch64::STRHui:
- Scale = 2;
- UnscaledOp = AArch64::STURHi;
- break;
- case AArch64::STRSui:
- Scale = 4;
- UnscaledOp = AArch64::STURSi;
- break;
- case AArch64::STRDui:
- Scale = 8;
- UnscaledOp = AArch64::STURDi;
- break;
- case AArch64::STRQui:
- Scale = 16;
- UnscaledOp = AArch64::STURQi;
- break;
- case AArch64::STRBBui:
- Scale = 1;
- UnscaledOp = AArch64::STURBBi;
- break;
- case AArch64::STRHHui:
- Scale = 2;
- UnscaledOp = AArch64::STURHHi;
- break;
-
- case AArch64::LDPXi:
- case AArch64::LDPDi:
- case AArch64::STPXi:
- case AArch64::STPDi:
- case AArch64::LDNPXi:
- case AArch64::LDNPDi:
- case AArch64::STNPXi:
- case AArch64::STNPDi:
- ImmIdx = 3;
- IsSigned = true;
- Scale = 8;
- break;
- case AArch64::LDPQi:
- case AArch64::STPQi:
- case AArch64::LDNPQi:
- case AArch64::STNPQi:
- ImmIdx = 3;
- IsSigned = true;
- Scale = 16;
- break;
- case AArch64::LDPWi:
- case AArch64::LDPSi:
- case AArch64::STPWi:
- case AArch64::STPSi:
- case AArch64::LDNPWi:
- case AArch64::LDNPSi:
- case AArch64::STNPWi:
- case AArch64::STNPSi:
- ImmIdx = 3;
- IsSigned = true;
- Scale = 4;
- break;
-
- case AArch64::LDURXi:
- case AArch64::LDURWi:
- case AArch64::LDURBi:
- case AArch64::LDURHi:
- case AArch64::LDURSi:
- case AArch64::LDURDi:
- case AArch64::LDURQi:
- case AArch64::LDURHHi:
- case AArch64::LDURBBi:
- case AArch64::LDURSBXi:
- case AArch64::LDURSBWi:
- case AArch64::LDURSHXi:
- case AArch64::LDURSHWi:
- case AArch64::LDURSWi:
- case AArch64::STURXi:
- case AArch64::STURWi:
- case AArch64::STURBi:
- case AArch64::STURHi:
- case AArch64::STURSi:
- case AArch64::STURDi:
- case AArch64::STURQi:
- case AArch64::STURBBi:
- case AArch64::STURHHi:
- Scale = 1;
- break;
}
- Offset += MI.getOperand(ImmIdx).getImm() * Scale;
+ // Get the min/max offset and the scale.
+ unsigned Scale, Width;
+ int64_t MinOff, MaxOff;
+ if (!AArch64InstrInfo::getMemOpInfo(MI.getOpcode(), Scale, Width, MinOff,
+ MaxOff))
+ llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
+
+ // Construct the complete offset.
+ const MachineOperand &ImmOpnd =
+ MI.getOperand(AArch64InstrInfo::getLoadStoreImmIdx(MI.getOpcode()));
+ Offset += ImmOpnd.getImm() * Scale;
- bool useUnscaledOp = false;
// If the offset doesn't match the scale, we rewrite the instruction to
// use the unscaled instruction instead. Likewise, if we have a negative
- // offset (and have an unscaled op to use).
- if ((Offset & (Scale - 1)) != 0 || (Offset < 0 && UnscaledOp != 0))
- useUnscaledOp = true;
-
- // Use an unscaled addressing mode if the instruction has a negative offset
- // (or if the instruction is already using an unscaled addressing mode).
- unsigned MaskBits;
- if (IsSigned) {
- // ldp/stp instructions.
- MaskBits = 7;
- Offset /= Scale;
- } else if (UnscaledOp == 0 || useUnscaledOp) {
- MaskBits = 9;
- IsSigned = true;
- Scale = 1;
- } else {
- MaskBits = 12;
- IsSigned = false;
- Offset /= Scale;
+ // offset and there is an unscaled op to use.
+ Optional<unsigned> UnscaledOp =
+ AArch64InstrInfo::getUnscaledLdSt(MI.getOpcode());
+ bool useUnscaledOp = UnscaledOp && (Offset % Scale || Offset < 0);
+ if (useUnscaledOp &&
+ !AArch64InstrInfo::getMemOpInfo(*UnscaledOp, Scale, Width, MinOff, MaxOff))
+ llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
+
+ int64_t Remainder = Offset % Scale;
+ assert(!(Remainder && useUnscaledOp) &&
+ "Cannot have remainder when using unscaled op");
+
+ assert(MinOff < MaxOff && "Unexpected Min/Max offsets");
+ int NewOffset = Offset / Scale;
+ if (MinOff <= NewOffset && NewOffset <= MaxOff)
+ Offset = Remainder;
+ else {
+ NewOffset = NewOffset < 0 ? MinOff : MaxOff;
+ Offset = Offset - NewOffset * Scale + Remainder;
}
- // Attempt to fold address computation.
- int MaxOff = (1 << (MaskBits - IsSigned)) - 1;
- int MinOff = (IsSigned ? (-MaxOff - 1) : 0);
- if (Offset >= MinOff && Offset <= MaxOff) {
- if (EmittableOffset)
- *EmittableOffset = Offset;
- Offset = 0;
- } else {
- int NewOff = Offset < 0 ? MinOff : MaxOff;
- if (EmittableOffset)
- *EmittableOffset = NewOff;
- Offset = (Offset - NewOff) * Scale;
- }
+ if (EmittableOffset)
+ *EmittableOffset = NewOffset;
if (OutUseUnscaledOp)
*OutUseUnscaledOp = useUnscaledOp;
- if (OutUnscaledOp)
- *OutUnscaledOp = UnscaledOp;
+ if (OutUnscaledOp && UnscaledOp)
+ *OutUnscaledOp = *UnscaledOp;
+
return AArch64FrameOffsetCanUpdate |
(Offset == 0 ? AArch64FrameOffsetIsLegal : 0);
}
@@ -4974,8 +4961,8 @@ AArch64InstrInfo::getOutliningCandidateInfo(
// At this point, we have a stack instruction that we might need to
// fix up. We'll handle it if it's a load or store.
if (MI.mayLoadOrStore()) {
- MachineOperand *Base; // Filled with the base operand of MI.
- int64_t Offset; // Filled with the offset of MI.
+ const MachineOperand *Base; // Filled with the base operand of MI.
+ int64_t Offset; // Filled with the offset of MI.
// Does it allow us to offset the base operand and is the base the
// register SP?
@@ -5331,12 +5318,20 @@ AArch64InstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT,
MI.modifiesRegister(AArch64::W30, &getRegisterInfo()))
return outliner::InstrType::Illegal;
+ // Don't outline BTI instructions, because that will prevent the outlining
+ // site from being indirectly callable.
+ if (MI.getOpcode() == AArch64::HINT) {
+ int64_t Imm = MI.getOperand(0).getImm();
+ if (Imm == 32 || Imm == 34 || Imm == 36 || Imm == 38)
+ return outliner::InstrType::Illegal;
+ }
+
return outliner::InstrType::Legal;
}
void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
for (MachineInstr &MI : MBB) {
- MachineOperand *Base;
+ const MachineOperand *Base;
unsigned Width;
int64_t Offset;
@@ -5534,7 +5529,32 @@ MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
bool AArch64InstrInfo::shouldOutlineFromFunctionByDefault(
MachineFunction &MF) const {
- return MF.getFunction().optForMinSize();
+ return MF.getFunction().hasMinSize();
+}
+
+bool AArch64InstrInfo::isCopyInstrImpl(
+ const MachineInstr &MI, const MachineOperand *&Source,
+ const MachineOperand *&Destination) const {
+
+ // AArch64::ORRWrs and AArch64::ORRXrs with WZR/XZR reg
+ // and zero immediate operands used as an alias for mov instruction.
+ if (MI.getOpcode() == AArch64::ORRWrs &&
+ MI.getOperand(1).getReg() == AArch64::WZR &&
+ MI.getOperand(3).getImm() == 0x0) {
+ Destination = &MI.getOperand(0);
+ Source = &MI.getOperand(2);
+ return true;
+ }
+
+ if (MI.getOpcode() == AArch64::ORRXrs &&
+ MI.getOperand(1).getReg() == AArch64::XZR &&
+ MI.getOperand(3).getImm() == 0x0) {
+ Destination = &MI.getOperand(0);
+ Source = &MI.getOperand(2);
+ return true;
+ }
+
+ return false;
}
#define GET_INSTRINFO_HELPERS
diff --git a/lib/Target/AArch64/AArch64InstrInfo.h b/lib/Target/AArch64/AArch64InstrInfo.h
index 9954669d5675..7be4daba7dc4 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/lib/Target/AArch64/AArch64InstrInfo.h
@@ -1,9 +1,8 @@
//===- AArch64InstrInfo.h - AArch64 Instruction Information -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -16,6 +15,7 @@
#include "AArch64.h"
#include "AArch64RegisterInfo.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/CodeGen/MachineCombinerPattern.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
@@ -54,7 +54,8 @@ public:
unsigned &DstReg, unsigned &SubIdx) const override;
bool
- areMemAccessesTriviallyDisjoint(MachineInstr &MIa, MachineInstr &MIb,
+ areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
+ const MachineInstr &MIb,
AliasAnalysis *AA = nullptr) const override;
unsigned isLoadFromStackSlot(const MachineInstr &MI,
@@ -84,6 +85,14 @@ public:
return isUnscaledLdSt(MI.getOpcode());
}
+ /// Returns the unscaled load/store for the scaled load/store opcode,
+ /// if there is a corresponding unscaled variant available.
+ static Optional<unsigned> getUnscaledLdSt(unsigned Opc);
+
+
+ /// Returns the index for the immediate for a given instruction.
+ static unsigned getLoadStoreImmIdx(unsigned Opc);
+
/// Return true if pairing the given load or store may be paired with another.
static bool isPairableLdStInst(const MachineInstr &MI);
@@ -92,16 +101,18 @@ public:
static unsigned convertToFlagSettingOpc(unsigned Opc, bool &Is64Bit);
/// Return true if this is a load/store that can be potentially paired/merged.
- bool isCandidateToMergeOrPair(MachineInstr &MI) const;
+ bool isCandidateToMergeOrPair(const MachineInstr &MI) const;
/// Hint that pairing the given load or store is unprofitable.
static void suppressLdStPair(MachineInstr &MI);
- bool getMemOperandWithOffset(MachineInstr &MI, MachineOperand *&BaseOp,
+ bool getMemOperandWithOffset(const MachineInstr &MI,
+ const MachineOperand *&BaseOp,
int64_t &Offset,
const TargetRegisterInfo *TRI) const override;
- bool getMemOperandWithOffsetWidth(MachineInstr &MI, MachineOperand *&BaseOp,
+ bool getMemOperandWithOffsetWidth(const MachineInstr &MI,
+ const MachineOperand *&BaseOp,
int64_t &Offset, unsigned &Width,
const TargetRegisterInfo *TRI) const;
@@ -112,16 +123,21 @@ public:
/// \p Scale, \p Width, \p MinOffset, and \p MaxOffset accordingly.
///
/// For unscaled instructions, \p Scale is set to 1.
- bool getMemOpInfo(unsigned Opcode, unsigned &Scale, unsigned &Width,
- int64_t &MinOffset, int64_t &MaxOffset) const;
+ static bool getMemOpInfo(unsigned Opcode, unsigned &Scale, unsigned &Width,
+ int64_t &MinOffset, int64_t &MaxOffset);
- bool shouldClusterMemOps(MachineOperand &BaseOp1, MachineOperand &BaseOp2,
+ bool shouldClusterMemOps(const MachineOperand &BaseOp1,
+ const MachineOperand &BaseOp2,
unsigned NumLoads) const override;
void copyPhysRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
bool KillSrc, unsigned Opcode,
llvm::ArrayRef<unsigned> Indices) const;
+ void copyGPRRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ DebugLoc DL, unsigned DestReg, unsigned SrcReg,
+ bool KillSrc, unsigned Opcode, unsigned ZeroReg,
+ llvm::ArrayRef<unsigned> Indices) const;
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
bool KillSrc) const override;
@@ -146,7 +162,8 @@ public:
foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
ArrayRef<unsigned> Ops,
MachineBasicBlock::iterator InsertPt, int FrameIndex,
- LiveIntervals *LIS = nullptr) const override;
+ LiveIntervals *LIS = nullptr,
+ VirtRegMap *VRM = nullptr) const override;
/// \returns true if a branch from an instruction with opcode \p BranchOpc
/// bytes is capable of jumping to a position \p BrOffset bytes away.
@@ -251,6 +268,13 @@ public:
#define GET_INSTRINFO_HELPER_DECLS
#include "AArch64GenInstrInfo.inc"
+protected:
+ /// If the specific machine instruction is a instruction that moves/copies
+ /// value from one register to another register return true along with
+ /// @Source machine operand and @Destination machine operand.
+ bool isCopyInstrImpl(const MachineInstr &MI, const MachineOperand *&Source,
+ const MachineOperand *&Destination) const override;
+
private:
/// Sets the offsets on outlined instructions in \p MBB which use SP
/// so that they will be valid post-outlining.
@@ -277,7 +301,8 @@ void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
int Offset, const TargetInstrInfo *TII,
MachineInstr::MIFlag = MachineInstr::NoFlags,
- bool SetNZCV = false, bool NeedsWinCFI = false);
+ bool SetNZCV = false, bool NeedsWinCFI = false,
+ bool *HasWinCFI = nullptr);
/// rewriteAArch64FrameIndex - Rewrite MI to access 'Offset' bytes from the
/// FP. Return false if the offset could not be handled directly in MI, and
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td
index c24b8b36441b..eed53f36d574 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -1,9 +1,8 @@
//=- AArch64InstrInfo.td - Describe the AArch64 Instructions -*- tablegen -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -108,6 +107,16 @@ def HasFuseAES : Predicate<"Subtarget->hasFuseAES()">,
"fuse-aes">;
def HasSVE : Predicate<"Subtarget->hasSVE()">,
AssemblerPredicate<"FeatureSVE", "sve">;
+def HasSVE2 : Predicate<"Subtarget->hasSVE2()">,
+ AssemblerPredicate<"FeatureSVE2", "sve2">;
+def HasSVE2AES : Predicate<"Subtarget->hasSVE2AES()">,
+ AssemblerPredicate<"FeatureSVE2AES", "sve2-aes">;
+def HasSVE2SM4 : Predicate<"Subtarget->hasSVE2SM4()">,
+ AssemblerPredicate<"FeatureSVE2SM4", "sve2-sm4">;
+def HasSVE2SHA3 : Predicate<"Subtarget->hasSVE2SHA3()">,
+ AssemblerPredicate<"FeatureSVE2SHA3", "sve2-sha3">;
+def HasSVE2BitPerm : Predicate<"Subtarget->hasSVE2BitPerm()">,
+ AssemblerPredicate<"FeatureSVE2BitPerm", "bitperm">;
def HasRCPC : Predicate<"Subtarget->hasRCPC()">,
AssemblerPredicate<"FeatureRCPC", "rcpc">;
def HasAltNZCV : Predicate<"Subtarget->hasAlternativeNZCV()">,
@@ -126,6 +135,7 @@ def HasMTE : Predicate<"Subtarget->hasMTE()">,
AssemblerPredicate<"FeatureMTE", "mte">;
def IsLE : Predicate<"Subtarget->isLittleEndian()">;
def IsBE : Predicate<"!Subtarget->isLittleEndian()">;
+def IsWindows : Predicate<"Subtarget->isTargetWindows()">;
def UseAlternateSExtLoadCVTF32
: Predicate<"Subtarget->useAlternateSExtLoadCVTF32Pattern()">;
@@ -133,6 +143,10 @@ def UseNegativeImmediates
: Predicate<"false">, AssemblerPredicate<"!FeatureNoNegativeImmediates",
"NegativeImmediates">;
+def AArch64LocalRecover : SDNode<"ISD::LOCAL_RECOVER",
+ SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
+ SDTCisInt<1>]>>;
+
//===----------------------------------------------------------------------===//
// AArch64-specific DAG Nodes.
@@ -395,6 +409,12 @@ def AArch64uminv : SDNode<"AArch64ISD::UMINV", SDT_AArch64UnaryVec>;
def AArch64smaxv : SDNode<"AArch64ISD::SMAXV", SDT_AArch64UnaryVec>;
def AArch64umaxv : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>;
+def SDT_AArch64SETTAG : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>;
+def AArch64stg : SDNode<"AArch64ISD::STG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def AArch64stzg : SDNode<"AArch64ISD::STZG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def AArch64st2g : SDNode<"AArch64ISD::ST2G", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def AArch64stz2g : SDNode<"AArch64ISD::STZ2G", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
@@ -404,10 +424,10 @@ def AArch64umaxv : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>;
// the Function object through the <Target>Subtarget and objections were raised
// to that (see post-commit review comments for r301750).
let RecomputePerFunction = 1 in {
- def ForCodeSize : Predicate<"MF->getFunction().optForSize()">;
- def NotForCodeSize : Predicate<"!MF->getFunction().optForSize()">;
+ def ForCodeSize : Predicate<"MF->getFunction().hasOptSize()">;
+ def NotForCodeSize : Predicate<"!MF->getFunction().hasOptSize()">;
// Avoid generating STRQro if it is slow, unless we're optimizing for code size.
- def UseSTRQro : Predicate<"!Subtarget->isSTRQroSlow() || MF->getFunction().optForSize()">;
+ def UseSTRQro : Predicate<"!Subtarget->isSTRQroSlow() || MF->getFunction().hasOptSize()">;
def UseBTI : Predicate<[{ MF->getFunction().hasFnAttribute("branch-target-enforcement") }]>;
def NotUseBTI : Predicate<[{ !MF->getFunction().hasFnAttribute("branch-target-enforcement") }]>;
@@ -703,7 +723,9 @@ let Predicates = [HasPA] in {
// v8.3a floating point conversion for javascript
let Predicates = [HasJS, HasFPARMv8] in
def FJCVTZS : BaseFPToIntegerUnscaled<0b01, 0b11, 0b110, FPR64, GPR32,
- "fjcvtzs", []> {
+ "fjcvtzs",
+ [(set GPR32:$Rd,
+ (int_aarch64_fjcvtzs FPR64:$Rn))]> {
let Inst{31} = 0;
} // HasJS, HasFPARMv8
@@ -760,6 +782,13 @@ def MSRpstateImm4 : MSRpstateImm0_15;
def MOVbaseTLS : Pseudo<(outs GPR64:$dst), (ins),
[(set GPR64:$dst, AArch64threadpointer)]>, Sched<[WriteSys]>;
+let Uses = [ X9 ], Defs = [ X16, X17, LR, NZCV ] in {
+def HWASAN_CHECK_MEMACCESS : Pseudo<
+ (outs), (ins GPR64noip:$ptr, i32imm:$accessinfo),
+ [(int_hwasan_check_memaccess X9, GPR64noip:$ptr, (i32 imm:$accessinfo))]>,
+ Sched<[]>;
+}
+
// The cycle counter PMC register is PMCCNTR_EL0.
let Predicates = [HasPerfMon] in
def : Pat<(readcyclecounter), (MRS 0xdce8)>;
@@ -1223,11 +1252,11 @@ defm : STOPregister<"stumin","LDUMIN">;// STUMINx
// v8.5 Memory Tagging Extension
let Predicates = [HasMTE] in {
-def IRG : BaseTwoOperand<0b0100, GPR64sp, "irg", null_frag, GPR64sp, GPR64>,
+def IRG : BaseTwoOperand<0b0100, GPR64sp, "irg", int_aarch64_irg, GPR64sp, GPR64>,
Sched<[]>{
let Inst{31} = 1;
}
-def GMI : BaseTwoOperand<0b0101, GPR64, "gmi", null_frag, GPR64sp>, Sched<[]>{
+def GMI : BaseTwoOperand<0b0101, GPR64, "gmi", int_aarch64_gmi, GPR64sp>, Sched<[]>{
let Inst{31} = 1;
let isNotDuplicable = 1;
}
@@ -1236,7 +1265,7 @@ def SUBG : AddSubG<1, "subg", null_frag>;
def : InstAlias<"irg $dst, $src", (IRG GPR64sp:$dst, GPR64sp:$src, XZR), 1>;
-def SUBP : SUBP<0, "subp", null_frag>, Sched<[]>;
+def SUBP : SUBP<0, "subp", int_aarch64_subp>, Sched<[]>;
def SUBPS : SUBP<1, "subps", null_frag>, Sched<[]>{
let Defs = [NZCV];
}
@@ -1244,24 +1273,74 @@ def SUBPS : SUBP<1, "subps", null_frag>, Sched<[]>{
def : InstAlias<"cmpp $lhs, $rhs", (SUBPS XZR, GPR64sp:$lhs, GPR64sp:$rhs), 0>;
def LDG : MemTagLoad<"ldg", "\t$Rt, [$Rn, $offset]">;
+
+def : Pat<(int_aarch64_addg (am_indexedu6s128 GPR64sp:$Rn, uimm6s16:$imm6), imm0_15:$imm4),
+ (ADDG GPR64sp:$Rn, imm0_63:$imm6, imm0_15:$imm4)>;
+def : Pat<(int_aarch64_ldg GPR64:$Rt, (am_indexeds9s128 GPR64sp:$Rn, simm9s16:$offset)),
+ (LDG GPR64:$Rt, GPR64sp:$Rn, simm9s16:$offset)>;
+
def : InstAlias<"ldg $Rt, [$Rn]", (LDG GPR64:$Rt, GPR64sp:$Rn, 0), 1>;
-def LDGV : MemTagVector<1, "ldgv", "\t$Rt, [$Rn]!",
- (outs GPR64sp:$wback, GPR64:$Rt), (ins GPR64sp:$Rn)> {
- let DecoderMethod = "DecodeLoadAllocTagArrayInstruction";
+def LDGM : MemTagVector<1, "ldgm", "\t$Rt, [$Rn]",
+ (outs GPR64:$Rt), (ins GPR64sp:$Rn)>;
+def STGM : MemTagVector<0, "stgm", "\t$Rt, [$Rn]",
+ (outs), (ins GPR64:$Rt, GPR64sp:$Rn)>;
+def STZGM : MemTagVector<0, "stzgm", "\t$Rt, [$Rn]",
+ (outs), (ins GPR64:$Rt, GPR64sp:$Rn)> {
+ let Inst{23} = 0;
}
-def STGV : MemTagVector<0, "stgv", "\t$Rt, [$Rn]!",
- (outs GPR64sp:$wback), (ins GPR64:$Rt, GPR64sp:$Rn)>;
defm STG : MemTagStore<0b00, "stg">;
defm STZG : MemTagStore<0b01, "stzg">;
defm ST2G : MemTagStore<0b10, "st2g">;
defm STZ2G : MemTagStore<0b11, "stz2g">;
+def : Pat<(AArch64stg GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)),
+ (STGOffset $Rn, $Rm, $imm)>;
+def : Pat<(AArch64stzg GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)),
+ (STZGOffset $Rn, $Rm, $imm)>;
+def : Pat<(AArch64st2g GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)),
+ (ST2GOffset $Rn, $Rm, $imm)>;
+def : Pat<(AArch64stz2g GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)),
+ (STZ2GOffset $Rn, $Rm, $imm)>;
+
defm STGP : StorePairOffset <0b01, 0, GPR64z, simm7s16, "stgp">;
def STGPpre : StorePairPreIdx <0b01, 0, GPR64z, simm7s16, "stgp">;
def STGPpost : StorePairPostIdx<0b01, 0, GPR64z, simm7s16, "stgp">;
+def : Pat<(int_aarch64_stg GPR64:$Rt, (am_indexeds9s128 GPR64sp:$Rn, simm9s16:$offset)),
+ (STGOffset GPR64:$Rt, GPR64sp:$Rn, simm9s16:$offset)>;
+
+def : Pat<(int_aarch64_stgp (am_indexed7s128 GPR64sp:$Rn, simm7s16:$imm), GPR64:$Rt, GPR64:$Rt2),
+ (STGPi $Rt, $Rt2, $Rn, $imm)>;
+
+def IRGstack
+ : Pseudo<(outs GPR64sp:$Rd), (ins GPR64sp:$Rsp, GPR64:$Rm), []>,
+ Sched<[]>;
+def TAGPstack
+ : Pseudo<(outs GPR64sp:$Rd), (ins GPR64sp:$Rn, uimm6s16:$imm6, GPR64sp:$Rm, imm0_15:$imm4), []>,
+ Sched<[]>;
+
+// Explicit SP in the first operand prevents ShrinkWrap optimization
+// from leaving this instruction out of the stack frame. When IRGstack
+// is transformed into IRG, this operand is replaced with the actual
+// register / expression for the tagged base pointer of the current function.
+def : Pat<(int_aarch64_irg_sp i64:$Rm), (IRGstack SP, i64:$Rm)>;
+
+// Large STG to be expanded into a loop. $Rm is the size, $Rn is start address.
+// $Rn_wback is one past the end of the range.
+let isCodeGenOnly=1, mayStore=1 in {
+def STGloop
+ : Pseudo<(outs GPR64common:$Rm_wback, GPR64sp:$Rn_wback), (ins GPR64common:$Rm, GPR64sp:$Rn),
+ [], "$Rn = $Rn_wback,@earlyclobber $Rn_wback,$Rm = $Rm_wback,@earlyclobber $Rm_wback" >,
+ Sched<[WriteAdr, WriteST]>;
+
+def STZGloop
+ : Pseudo<(outs GPR64common:$Rm_wback, GPR64sp:$Rn_wback), (ins GPR64common:$Rm, GPR64sp:$Rn),
+ [], "$Rn = $Rn_wback,@earlyclobber $Rn_wback,$Rm = $Rm_wback,@earlyclobber $Rm_wback" >,
+ Sched<[WriteAdr, WriteST]>;
+}
+
} // Predicates = [HasMTE]
//===----------------------------------------------------------------------===//
@@ -3052,6 +3131,27 @@ defm : FPToIntegerPats<fp_to_uint, ftrunc, "FCVTZU">;
defm : FPToIntegerPats<fp_to_sint, fround, "FCVTAS">;
defm : FPToIntegerPats<fp_to_uint, fround, "FCVTAU">;
+let Predicates = [HasFullFP16] in {
+ def : Pat<(i32 (lround f16:$Rn)),
+ (!cast<Instruction>(FCVTASUWHr) f16:$Rn)>;
+ def : Pat<(i64 (lround f16:$Rn)),
+ (!cast<Instruction>(FCVTASUXHr) f16:$Rn)>;
+ def : Pat<(i64 (llround f16:$Rn)),
+ (!cast<Instruction>(FCVTASUXHr) f16:$Rn)>;
+}
+def : Pat<(i32 (lround f32:$Rn)),
+ (!cast<Instruction>(FCVTASUWSr) f32:$Rn)>;
+def : Pat<(i32 (lround f64:$Rn)),
+ (!cast<Instruction>(FCVTASUWDr) f64:$Rn)>;
+def : Pat<(i64 (lround f32:$Rn)),
+ (!cast<Instruction>(FCVTASUXSr) f32:$Rn)>;
+def : Pat<(i64 (lround f64:$Rn)),
+ (!cast<Instruction>(FCVTASUXDr) f64:$Rn)>;
+def : Pat<(i64 (llround f32:$Rn)),
+ (!cast<Instruction>(FCVTASUXSr) f32:$Rn)>;
+def : Pat<(i64 (llround f64:$Rn)),
+ (!cast<Instruction>(FCVTASUXDr) f64:$Rn)>;
+
//===----------------------------------------------------------------------===//
// Scaled integer to floating point conversion instructions.
//===----------------------------------------------------------------------===//
@@ -3116,6 +3216,27 @@ let Predicates = [HasFRInt3264] in {
defm FRINT64X : FRIntNNT<0b11, "frint64x">;
} // HasFRInt3264
+let Predicates = [HasFullFP16] in {
+ def : Pat<(i32 (lrint f16:$Rn)),
+ (FCVTZSUWHr (!cast<Instruction>(FRINTXHr) f16:$Rn))>;
+ def : Pat<(i64 (lrint f16:$Rn)),
+ (FCVTZSUXHr (!cast<Instruction>(FRINTXHr) f16:$Rn))>;
+ def : Pat<(i64 (llrint f16:$Rn)),
+ (FCVTZSUXHr (!cast<Instruction>(FRINTXHr) f16:$Rn))>;
+}
+def : Pat<(i32 (lrint f32:$Rn)),
+ (FCVTZSUWSr (!cast<Instruction>(FRINTXSr) f32:$Rn))>;
+def : Pat<(i32 (lrint f64:$Rn)),
+ (FCVTZSUWDr (!cast<Instruction>(FRINTXDr) f64:$Rn))>;
+def : Pat<(i64 (lrint f32:$Rn)),
+ (FCVTZSUXSr (!cast<Instruction>(FRINTXSr) f32:$Rn))>;
+def : Pat<(i64 (lrint f64:$Rn)),
+ (FCVTZSUXDr (!cast<Instruction>(FRINTXDr) f64:$Rn))>;
+def : Pat<(i64 (llrint f32:$Rn)),
+ (FCVTZSUXSr (!cast<Instruction>(FRINTXSr) f32:$Rn))>;
+def : Pat<(i64 (llrint f64:$Rn)),
+ (FCVTZSUXDr (!cast<Instruction>(FRINTXDr) f64:$Rn))>;
+
//===----------------------------------------------------------------------===//
// Floating point two operand instructions.
//===----------------------------------------------------------------------===//
@@ -3489,7 +3610,7 @@ def : Pat<(fabs (fsub VT:$Rn, VT:$Rm)), (!cast<Instruction>("FABD"#VT) VT:$Rn, V
}
defm FACGE : SIMDThreeSameVectorFPCmp<1,0,0b101,"facge",int_aarch64_neon_facge>;
defm FACGT : SIMDThreeSameVectorFPCmp<1,1,0b101,"facgt",int_aarch64_neon_facgt>;
-defm FADDP : SIMDThreeSameVectorFP<1,0,0b010,"faddp",int_aarch64_neon_addp>;
+defm FADDP : SIMDThreeSameVectorFP<1,0,0b010,"faddp",int_aarch64_neon_faddp>;
defm FADD : SIMDThreeSameVectorFP<0,0,0b010,"fadd", fadd>;
defm FCMEQ : SIMDThreeSameVectorFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>;
defm FCMGE : SIMDThreeSameVectorFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>;
@@ -5314,6 +5435,8 @@ def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i32 (sext_inreg FPR32:$Rn, i16)), v
(SCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>;
def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i32 FPR32:$Rn), vecshiftR16:$imm)),
(SCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>;
+def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR16:$imm)),
+ (SCVTFh (EXTRACT_SUBREG FPR64:$Rn, hsub), vecshiftR16:$imm)>;
def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp
(and FPR32:$Rn, (i32 65535)),
vecshiftR16:$imm)),
@@ -5342,6 +5465,16 @@ def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxu (f16 FPR16:$Rn), vecshiftR64:$imm)),
(i64 (IMPLICIT_DEF)),
(FCVTZUh FPR16:$Rn, vecshiftR64:$imm),
hsub))>;
+def : Pat<(i32 (int_aarch64_neon_facge (f16 FPR16:$Rn), (f16 FPR16:$Rm))),
+ (i32 (INSERT_SUBREG
+ (i32 (IMPLICIT_DEF)),
+ (FACGE16 FPR16:$Rn, FPR16:$Rm),
+ hsub))>;
+def : Pat<(i32 (int_aarch64_neon_facgt (f16 FPR16:$Rn), (f16 FPR16:$Rm))),
+ (i32 (INSERT_SUBREG
+ (i32 (IMPLICIT_DEF)),
+ (FACGT16 FPR16:$Rn, FPR16:$Rm),
+ hsub))>;
defm SHL : SIMDScalarLShiftD< 0, 0b01010, "shl", AArch64vshl>;
defm SLI : SIMDScalarLShiftDTied<1, 0b01010, "sli">;
@@ -6031,6 +6164,7 @@ def : Pat<(i32 (trunc GPR64sp:$src)),
// __builtin_trap() uses the BRK instruction on AArch64.
def : Pat<(trap), (BRK 1)>;
+def : Pat<(debugtrap), (BRK 0xF000)>, Requires<[IsWindows]>;
// Multiply high patterns which multiply the lower subvector using smull/umull
// and the upper subvector with smull2/umull2. Then shuffle the high the high
@@ -6147,6 +6281,7 @@ def : Pat<(v4i16 (AArch64NvCast (v2f32 FPR64:$src))), (v4i16 FPR64:$src)>;
def : Pat<(v2i32 (AArch64NvCast (v2f32 FPR64:$src))), (v2i32 FPR64:$src)>;
def : Pat<(v2f32 (AArch64NvCast (v2f32 FPR64:$src))), (v2f32 FPR64:$src)>;
def : Pat<(v1i64 (AArch64NvCast (v2f32 FPR64:$src))), (v1i64 FPR64:$src)>;
+def : Pat<(v1f64 (AArch64NvCast (v2f32 FPR64:$src))), (v1f64 FPR64:$src)>;
// Natural vector casts (128 bit)
def : Pat<(v16i8 (AArch64NvCast (v4i32 FPR128:$src))), (v16i8 FPR128:$src)>;
@@ -6801,5 +6936,8 @@ def : Pat<(AArch64tcret tglobaladdr:$dst, (i32 timm:$FPDiff)),
def : Pat<(AArch64tcret texternalsym:$dst, (i32 timm:$FPDiff)),
(TCRETURNdi texternalsym:$dst, imm:$FPDiff)>;
+def MOVMCSym : Pseudo<(outs GPR64:$dst), (ins i64imm:$sym), []>, Sched<[]>;
+def : Pat<(i64 (AArch64LocalRecover mcsym:$sym)), (MOVMCSym mcsym:$sym)>;
+
include "AArch64InstrAtomics.td"
include "AArch64SVEInstrInfo.td"
diff --git a/lib/Target/AArch64/AArch64InstructionSelector.cpp b/lib/Target/AArch64/AArch64InstructionSelector.cpp
index 5eb589bf66d5..4e13fb8e2027 100644
--- a/lib/Target/AArch64/AArch64InstructionSelector.cpp
+++ b/lib/Target/AArch64/AArch64InstructionSelector.cpp
@@ -1,9 +1,8 @@
//===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -19,11 +18,14 @@
#include "AArch64Subtarget.h"
#include "AArch64TargetMachine.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -57,6 +59,15 @@ private:
/// the patterns that don't require complex C++.
bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
+ // A lowering phase that runs before any selection attempts.
+
+ void preISelLower(MachineInstr &I) const;
+
+ // An early selection function that runs before the selectImpl() call.
+ bool earlySelect(MachineInstr &I) const;
+
+ bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
+
bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
MachineRegisterInfo &MRI) const;
bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
@@ -65,15 +76,84 @@ private:
bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
MachineRegisterInfo &MRI) const;
+ bool selectVectorASHR(MachineInstr &I, MachineRegisterInfo &MRI) const;
+ bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
+
// Helper to generate an equivalent of scalar_to_vector into a new register,
// returned via 'Dst'.
- bool emitScalarToVector(unsigned &Dst, const LLT DstTy,
- const TargetRegisterClass *DstRC, unsigned Scalar,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- MachineRegisterInfo &MRI) const;
+ MachineInstr *emitScalarToVector(unsigned EltSize,
+ const TargetRegisterClass *DstRC,
+ Register Scalar,
+ MachineIRBuilder &MIRBuilder) const;
+
+ /// Emit a lane insert into \p DstReg, or a new vector register if None is
+ /// provided.
+ ///
+ /// The lane inserted into is defined by \p LaneIdx. The vector source
+ /// register is given by \p SrcReg. The register containing the element is
+ /// given by \p EltReg.
+ MachineInstr *emitLaneInsert(Optional<Register> DstReg, Register SrcReg,
+ Register EltReg, unsigned LaneIdx,
+ const RegisterBank &RB,
+ MachineIRBuilder &MIRBuilder) const;
+ bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
+ bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
+
+ void collectShuffleMaskIndices(MachineInstr &I, MachineRegisterInfo &MRI,
+ SmallVectorImpl<Optional<int>> &Idxs) const;
+ bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
+ bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
+ bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI) const;
+ bool selectSplitVectorUnmerge(MachineInstr &I,
+ MachineRegisterInfo &MRI) const;
+ bool selectIntrinsicWithSideEffects(MachineInstr &I,
+ MachineRegisterInfo &MRI) const;
+ bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI) const;
+ bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI) const;
+ bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const;
+ bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const;
+ bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI) const;
+ bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI) const;
+
+ unsigned emitConstantPoolEntry(Constant *CPVal, MachineFunction &MF) const;
+ MachineInstr *emitLoadFromConstantPool(Constant *CPVal,
+ MachineIRBuilder &MIRBuilder) const;
+
+ // Emit a vector concat operation.
+ MachineInstr *emitVectorConcat(Optional<Register> Dst, Register Op1,
+ Register Op2,
+ MachineIRBuilder &MIRBuilder) const;
+ MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
+ MachineOperand &Predicate,
+ MachineIRBuilder &MIRBuilder) const;
+ MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
+ MachineIRBuilder &MIRBuilder) const;
+ MachineInstr *emitTST(const Register &LHS, const Register &RHS,
+ MachineIRBuilder &MIRBuilder) const;
+ MachineInstr *emitExtractVectorElt(Optional<Register> DstReg,
+ const RegisterBank &DstRB, LLT ScalarTy,
+ Register VecReg, unsigned LaneIdx,
+ MachineIRBuilder &MIRBuilder) const;
+
+ /// Helper function for selecting G_FCONSTANT. If the G_FCONSTANT can be
+ /// materialized using a FMOV instruction, then update MI and return it.
+ /// Otherwise, do nothing and return a nullptr.
+ MachineInstr *emitFMovForFConstant(MachineInstr &MI,
+ MachineRegisterInfo &MRI) const;
+
+ /// Emit a CSet for a compare.
+ MachineInstr *emitCSetForICMP(Register DefReg, unsigned Pred,
+ MachineIRBuilder &MIRBuilder) const;
+
+ // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
+ // We use these manually instead of using the importer since it doesn't
+ // support SDNodeXForm.
+ ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
+ ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
+ ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
+ ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
@@ -109,6 +189,14 @@ private:
void materializeLargeCMVal(MachineInstr &I, const Value *V,
unsigned char OpFlags) const;
+ // Optimization methods.
+ bool tryOptVectorShuffle(MachineInstr &I) const;
+ bool tryOptVectorDup(MachineInstr &MI) const;
+ bool tryOptSelect(MachineInstr &MI) const;
+ MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
+ MachineOperand &Predicate,
+ MachineIRBuilder &MIRBuilder) const;
+
const AArch64TargetMachine &TM;
const AArch64Subtarget &STI;
const AArch64InstrInfo &TII;
@@ -177,6 +265,70 @@ getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
return nullptr;
}
+/// Given a register bank, and size in bits, return the smallest register class
+/// that can represent that combination.
+static const TargetRegisterClass *
+getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
+ bool GetAllRegSet = false) {
+ unsigned RegBankID = RB.getID();
+
+ if (RegBankID == AArch64::GPRRegBankID) {
+ if (SizeInBits <= 32)
+ return GetAllRegSet ? &AArch64::GPR32allRegClass
+ : &AArch64::GPR32RegClass;
+ if (SizeInBits == 64)
+ return GetAllRegSet ? &AArch64::GPR64allRegClass
+ : &AArch64::GPR64RegClass;
+ }
+
+ if (RegBankID == AArch64::FPRRegBankID) {
+ switch (SizeInBits) {
+ default:
+ return nullptr;
+ case 8:
+ return &AArch64::FPR8RegClass;
+ case 16:
+ return &AArch64::FPR16RegClass;
+ case 32:
+ return &AArch64::FPR32RegClass;
+ case 64:
+ return &AArch64::FPR64RegClass;
+ case 128:
+ return &AArch64::FPR128RegClass;
+ }
+ }
+
+ return nullptr;
+}
+
+/// Returns the correct subregister to use for a given register class.
+static bool getSubRegForClass(const TargetRegisterClass *RC,
+ const TargetRegisterInfo &TRI, unsigned &SubReg) {
+ switch (TRI.getRegSizeInBits(*RC)) {
+ case 8:
+ SubReg = AArch64::bsub;
+ break;
+ case 16:
+ SubReg = AArch64::hsub;
+ break;
+ case 32:
+ if (RC == &AArch64::GPR32RegClass)
+ SubReg = AArch64::sub_32;
+ else
+ SubReg = AArch64::ssub;
+ break;
+ case 64:
+ SubReg = AArch64::dsub;
+ break;
+ default:
+ LLVM_DEBUG(
+ dbgs() << "Couldn't find appropriate subregister for register class.");
+ return false;
+ }
+
+ return true;
+}
+
/// Check whether \p I is a currently unsupported binary operation:
/// - it has an unsized type
/// - an operand is not a vreg
@@ -332,107 +484,209 @@ static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
return GenericOpc;
}
-static bool selectFP16CopyFromGPR32(MachineInstr &I, const TargetInstrInfo &TII,
- MachineRegisterInfo &MRI, unsigned SrcReg) {
- // Copies from gpr32 to fpr16 need to use a sub-register copy.
- unsigned CopyReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
- BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::COPY))
- .addDef(CopyReg)
- .addUse(SrcReg);
- unsigned SubRegCopy = MRI.createVirtualRegister(&AArch64::FPR16RegClass);
- BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(TargetOpcode::COPY))
- .addDef(SubRegCopy)
- .addUse(CopyReg, 0, AArch64::hsub);
+#ifndef NDEBUG
+/// Helper function that verifies that we have a valid copy at the end of
+/// selectCopy. Verifies that the source and dest have the expected sizes and
+/// then returns true.
+static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank,
+ const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI,
+ const RegisterBankInfo &RBI) {
+ const unsigned DstReg = I.getOperand(0).getReg();
+ const unsigned SrcReg = I.getOperand(1).getReg();
+ const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
+ const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
+ // Make sure the size of the source and dest line up.
+ assert(
+ (DstSize == SrcSize ||
+ // Copies are a mean to setup initial types, the number of
+ // bits may not exactly match.
+ (TargetRegisterInfo::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
+ // Copies are a mean to copy bits around, as long as we are
+ // on the same register class, that's fine. Otherwise, that
+ // means we need some SUBREG_TO_REG or AND & co.
+ (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) &&
+ "Copy with different width?!");
+
+ // Check the size of the destination.
+ assert((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) &&
+ "GPRs cannot get more than 64-bit width values");
+
+ return true;
+}
+#endif
+
+/// Helper function for selectCopy. Inserts a subregister copy from
+/// \p *From to \p *To, linking it up to \p I.
+///
+/// e.g, given I = "Dst = COPY SrcReg", we'll transform that into
+///
+/// CopyReg (From class) = COPY SrcReg
+/// SubRegCopy (To class) = COPY CopyReg:SubReg
+/// Dst = COPY SubRegCopy
+static bool selectSubregisterCopy(MachineInstr &I, MachineRegisterInfo &MRI,
+ const RegisterBankInfo &RBI, unsigned SrcReg,
+ const TargetRegisterClass *From,
+ const TargetRegisterClass *To,
+ unsigned SubReg) {
+ MachineIRBuilder MIB(I);
+ auto Copy = MIB.buildCopy({From}, {SrcReg});
+ auto SubRegCopy = MIB.buildInstr(TargetOpcode::COPY, {To}, {})
+ .addReg(Copy.getReg(0), 0, SubReg);
MachineOperand &RegOp = I.getOperand(1);
- RegOp.setReg(SubRegCopy);
+ RegOp.setReg(SubRegCopy.getReg(0));
+
+ // It's possible that the destination register won't be constrained. Make
+ // sure that happens.
+ if (!TargetRegisterInfo::isPhysicalRegister(I.getOperand(0).getReg()))
+ RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
+
return true;
}
+/// Helper function to get the source and destination register classes for a
+/// copy. Returns a std::pair containing the source register class for the
+/// copy, and the destination register class for the copy. If a register class
+/// cannot be determined, then it will be nullptr.
+static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
+getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
+ MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
+ const RegisterBankInfo &RBI) {
+ unsigned DstReg = I.getOperand(0).getReg();
+ unsigned SrcReg = I.getOperand(1).getReg();
+ const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
+ const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
+ unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
+ unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
+
+ // Special casing for cross-bank copies of s1s. We can technically represent
+ // a 1-bit value with any size of register. The minimum size for a GPR is 32
+ // bits. So, we need to put the FPR on 32 bits as well.
+ //
+ // FIXME: I'm not sure if this case holds true outside of copies. If it does,
+ // then we can pull it into the helpers that get the appropriate class for a
+ // register bank. Or make a new helper that carries along some constraint
+ // information.
+ if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
+ SrcSize = DstSize = 32;
+
+ return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
+ getMinClassForRegBank(DstRegBank, DstSize, true)};
+}
+
static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
const RegisterBankInfo &RBI) {
unsigned DstReg = I.getOperand(0).getReg();
unsigned SrcReg = I.getOperand(1).getReg();
+ const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
+ const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
- if (TargetRegisterInfo::isPhysicalRegister(DstReg)) {
- if (TRI.getRegClass(AArch64::FPR16RegClassID)->contains(DstReg) &&
- !TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
- const RegisterBank &RegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
- const TargetRegisterClass *SrcRC = getRegClassForTypeOnBank(
- MRI.getType(SrcReg), RegBank, RBI, /* GetAllRegSet */ true);
- if (SrcRC == &AArch64::GPR32allRegClass)
- return selectFP16CopyFromGPR32(I, TII, MRI, SrcReg);
- }
- assert(I.isCopy() && "Generic operators do not allow physical registers");
- return true;
- }
-
- const RegisterBank &RegBank = *RBI.getRegBank(DstReg, MRI, TRI);
- const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
- (void)DstSize;
- const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
- (void)SrcSize;
- assert((!TargetRegisterInfo::isPhysicalRegister(SrcReg) || I.isCopy()) &&
- "No phys reg on generic operators");
- assert(
- (DstSize == SrcSize ||
- // Copies are a mean to setup initial types, the number of
- // bits may not exactly match.
- (TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
- DstSize <= RBI.getSizeInBits(SrcReg, MRI, TRI)) ||
- // Copies are a mean to copy bits around, as long as we are
- // on the same register class, that's fine. Otherwise, that
- // means we need some SUBREG_TO_REG or AND & co.
- (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) &&
- "Copy with different width?!");
- assert((DstSize <= 64 || RegBank.getID() == AArch64::FPRRegBankID) &&
- "GPRs cannot get more than 64-bit width values");
+ // Find the correct register classes for the source and destination registers.
+ const TargetRegisterClass *SrcRC;
+ const TargetRegisterClass *DstRC;
+ std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
- const TargetRegisterClass *RC = getRegClassForTypeOnBank(
- MRI.getType(DstReg), RegBank, RBI, /* GetAllRegSet */ true);
- if (!RC) {
- LLVM_DEBUG(dbgs() << "Unexpected bitcast size " << DstSize << '\n');
+ if (!DstRC) {
+ LLVM_DEBUG(dbgs() << "Unexpected dest size "
+ << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
return false;
}
- if (!TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
- const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(SrcReg);
- const TargetRegisterClass *SrcRC =
- RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
- const RegisterBank *RB = nullptr;
+ // A couple helpers below, for making sure that the copy we produce is valid.
+
+ // Set to true if we insert a SUBREG_TO_REG. If we do this, then we don't want
+ // to verify that the src and dst are the same size, since that's handled by
+ // the SUBREG_TO_REG.
+ bool KnownValid = false;
+
+ // Returns true, or asserts if something we don't expect happens. Instead of
+ // returning true, we return isValidCopy() to ensure that we verify the
+ // result.
+ auto CheckCopy = [&]() {
+ // If we have a bitcast or something, we can't have physical registers.
+ assert(
+ (I.isCopy() ||
+ (!TargetRegisterInfo::isPhysicalRegister(I.getOperand(0).getReg()) &&
+ !TargetRegisterInfo::isPhysicalRegister(I.getOperand(1).getReg()))) &&
+ "No phys reg on generic operator!");
+ assert(KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI));
+ (void)KnownValid;
+ return true;
+ };
+
+ // Is this a copy? If so, then we may need to insert a subregister copy, or
+ // a SUBREG_TO_REG.
+ if (I.isCopy()) {
+ // Yes. Check if there's anything to fix up.
if (!SrcRC) {
- RB = RegClassOrBank.get<const RegisterBank *>();
- SrcRC = getRegClassForTypeOnBank(MRI.getType(SrcReg), *RB, RBI, true);
- }
- // Copies from fpr16 to gpr32 need to use SUBREG_TO_REG.
- if (RC == &AArch64::GPR32allRegClass && SrcRC == &AArch64::FPR16RegClass) {
- unsigned PromoteReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
- BuildMI(*I.getParent(), I, I.getDebugLoc(),
- TII.get(AArch64::SUBREG_TO_REG))
- .addDef(PromoteReg)
- .addImm(0)
- .addUse(SrcReg)
- .addImm(AArch64::hsub);
- MachineOperand &RegOp = I.getOperand(1);
- RegOp.setReg(PromoteReg);
- } else if (RC == &AArch64::FPR16RegClass &&
- SrcRC == &AArch64::GPR32allRegClass) {
- selectFP16CopyFromGPR32(I, TII, MRI, SrcReg);
+ LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
+ return false;
}
+
+ // Is this a cross-bank copy?
+ if (DstRegBank.getID() != SrcRegBank.getID()) {
+ // If we're doing a cross-bank copy on different-sized registers, we need
+ // to do a bit more work.
+ unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
+ unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
+
+ if (SrcSize > DstSize) {
+ // We're doing a cross-bank copy into a smaller register. We need a
+ // subregister copy. First, get a register class that's on the same bank
+ // as the destination, but the same size as the source.
+ const TargetRegisterClass *SubregRC =
+ getMinClassForRegBank(DstRegBank, SrcSize, true);
+ assert(SubregRC && "Didn't get a register class for subreg?");
+
+ // Get the appropriate subregister for the destination.
+ unsigned SubReg = 0;
+ if (!getSubRegForClass(DstRC, TRI, SubReg)) {
+ LLVM_DEBUG(dbgs() << "Couldn't determine subregister for copy.\n");
+ return false;
+ }
+
+ // Now, insert a subregister copy using the new register class.
+ selectSubregisterCopy(I, MRI, RBI, SrcReg, SubregRC, DstRC, SubReg);
+ return CheckCopy();
+ }
+
+ else if (DstRegBank.getID() == AArch64::GPRRegBankID && DstSize == 32 &&
+ SrcSize == 16) {
+ // Special case for FPR16 to GPR32.
+ // FIXME: This can probably be generalized like the above case.
+ unsigned PromoteReg =
+ MRI.createVirtualRegister(&AArch64::FPR32RegClass);
+ BuildMI(*I.getParent(), I, I.getDebugLoc(),
+ TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
+ .addImm(0)
+ .addUse(SrcReg)
+ .addImm(AArch64::hsub);
+ MachineOperand &RegOp = I.getOperand(1);
+ RegOp.setReg(PromoteReg);
+
+ // Promise that the copy is implicitly validated by the SUBREG_TO_REG.
+ KnownValid = true;
+ }
+ }
+
+ // If the destination is a physical register, then there's nothing to
+ // change, so we're done.
+ if (TargetRegisterInfo::isPhysicalRegister(DstReg))
+ return CheckCopy();
}
- // No need to constrain SrcReg. It will get constrained when
- // we hit another of its use or its defs.
- // Copies do not have constraints.
- if (!RBI.constrainGenericRegister(DstReg, *RC, MRI)) {
+ // No need to constrain SrcReg. It will get constrained when we hit another
+ // of its use or its defs. Copies do not have constraints.
+ if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
<< " operand\n");
return false;
}
I.setDesc(TII.get(AArch64::COPY));
- return true;
+ return CheckCopy();
}
static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
@@ -511,6 +765,46 @@ static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
return GenericOpc;
}
+static unsigned selectSelectOpc(MachineInstr &I, MachineRegisterInfo &MRI,
+ const RegisterBankInfo &RBI) {
+ const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
+ bool IsFP = (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
+ AArch64::GPRRegBankID);
+ LLT Ty = MRI.getType(I.getOperand(0).getReg());
+ if (Ty == LLT::scalar(32))
+ return IsFP ? AArch64::FCSELSrrr : AArch64::CSELWr;
+ else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64))
+ return IsFP ? AArch64::FCSELDrrr : AArch64::CSELXr;
+ return 0;
+}
+
+/// Helper function to select the opcode for a G_FCMP.
+static unsigned selectFCMPOpc(MachineInstr &I, MachineRegisterInfo &MRI) {
+ // If this is a compare against +0.0, then we don't have to explicitly
+ // materialize a constant.
+ const ConstantFP *FPImm = getConstantFPVRegVal(I.getOperand(3).getReg(), MRI);
+ bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
+ unsigned OpSize = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
+ if (OpSize != 32 && OpSize != 64)
+ return 0;
+ unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
+ {AArch64::FCMPSri, AArch64::FCMPDri}};
+ return CmpOpcTbl[ShouldUseImm][OpSize == 64];
+}
+
+/// Returns true if \p P is an unsigned integer comparison predicate.
+static bool isUnsignedICMPPred(const CmpInst::Predicate P) {
+ switch (P) {
+ default:
+ return false;
+ case CmpInst::ICMP_UGT:
+ case CmpInst::ICMP_UGE:
+ case CmpInst::ICMP_ULT:
+ case CmpInst::ICMP_ULE:
+ return true;
+ }
+}
+
static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
switch (P) {
default:
@@ -595,7 +889,7 @@ static void changeFCMPPredToAArch64CC(CmpInst::Predicate P,
bool AArch64InstructionSelector::selectCompareBranch(
MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
- const unsigned CondReg = I.getOperand(0).getReg();
+ const Register CondReg = I.getOperand(0).getReg();
MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
MachineInstr *CCMI = MRI.getVRegDef(CondReg);
if (CCMI->getOpcode() == TargetOpcode::G_TRUNC)
@@ -603,14 +897,25 @@ bool AArch64InstructionSelector::selectCompareBranch(
if (CCMI->getOpcode() != TargetOpcode::G_ICMP)
return false;
- unsigned LHS = CCMI->getOperand(2).getReg();
- unsigned RHS = CCMI->getOperand(3).getReg();
- if (!getConstantVRegVal(RHS, MRI))
+ Register LHS = CCMI->getOperand(2).getReg();
+ Register RHS = CCMI->getOperand(3).getReg();
+ auto VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
+ if (!VRegAndVal)
std::swap(RHS, LHS);
- const auto RHSImm = getConstantVRegVal(RHS, MRI);
- if (!RHSImm || *RHSImm != 0)
- return false;
+ VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
+ if (!VRegAndVal || VRegAndVal->Value != 0) {
+ MachineIRBuilder MIB(I);
+ // If we can't select a CBZ then emit a cmp + Bcc.
+ if (!emitIntegerCompare(CCMI->getOperand(2), CCMI->getOperand(3),
+ CCMI->getOperand(1), MIB))
+ return false;
+ const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(
+ (CmpInst::Predicate)CCMI->getOperand(1).getPredicate());
+ MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
+ I.eraseFromParent();
+ return true;
+ }
const RegisterBank &RB = *RBI.getRegBank(LHS, MRI, TRI);
if (RB.getID() != AArch64::GPRRegBankID)
@@ -638,6 +943,74 @@ bool AArch64InstructionSelector::selectCompareBranch(
return true;
}
+bool AArch64InstructionSelector::selectVectorSHL(
+ MachineInstr &I, MachineRegisterInfo &MRI) const {
+ assert(I.getOpcode() == TargetOpcode::G_SHL);
+ Register DstReg = I.getOperand(0).getReg();
+ const LLT Ty = MRI.getType(DstReg);
+ Register Src1Reg = I.getOperand(1).getReg();
+ Register Src2Reg = I.getOperand(2).getReg();
+
+ if (!Ty.isVector())
+ return false;
+
+ unsigned Opc = 0;
+ if (Ty == LLT::vector(4, 32)) {
+ Opc = AArch64::USHLv4i32;
+ } else if (Ty == LLT::vector(2, 32)) {
+ Opc = AArch64::USHLv2i32;
+ } else {
+ LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
+ return false;
+ }
+
+ MachineIRBuilder MIB(I);
+ auto UShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Src2Reg});
+ constrainSelectedInstRegOperands(*UShl, TII, TRI, RBI);
+ I.eraseFromParent();
+ return true;
+}
+
+bool AArch64InstructionSelector::selectVectorASHR(
+ MachineInstr &I, MachineRegisterInfo &MRI) const {
+ assert(I.getOpcode() == TargetOpcode::G_ASHR);
+ Register DstReg = I.getOperand(0).getReg();
+ const LLT Ty = MRI.getType(DstReg);
+ Register Src1Reg = I.getOperand(1).getReg();
+ Register Src2Reg = I.getOperand(2).getReg();
+
+ if (!Ty.isVector())
+ return false;
+
+ // There is not a shift right register instruction, but the shift left
+ // register instruction takes a signed value, where negative numbers specify a
+ // right shift.
+
+ unsigned Opc = 0;
+ unsigned NegOpc = 0;
+ const TargetRegisterClass *RC = nullptr;
+ if (Ty == LLT::vector(4, 32)) {
+ Opc = AArch64::SSHLv4i32;
+ NegOpc = AArch64::NEGv4i32;
+ RC = &AArch64::FPR128RegClass;
+ } else if (Ty == LLT::vector(2, 32)) {
+ Opc = AArch64::SSHLv2i32;
+ NegOpc = AArch64::NEGv2i32;
+ RC = &AArch64::FPR64RegClass;
+ } else {
+ LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
+ return false;
+ }
+
+ MachineIRBuilder MIB(I);
+ auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
+ constrainSelectedInstRegOperands(*Neg, TII, TRI, RBI);
+ auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
+ constrainSelectedInstRegOperands(*SShl, TII, TRI, RBI);
+ I.eraseFromParent();
+ return true;
+}
+
bool AArch64InstructionSelector::selectVaStartAAPCS(
MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
return false;
@@ -646,9 +1019,9 @@ bool AArch64InstructionSelector::selectVaStartAAPCS(
bool AArch64InstructionSelector::selectVaStartDarwin(
MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
- unsigned ListReg = I.getOperand(0).getReg();
+ Register ListReg = I.getOperand(0).getReg();
- unsigned ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
+ Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
auto MIB =
BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
@@ -684,9 +1057,9 @@ void AArch64InstructionSelector::materializeLargeCMVal(
MovZ->addOperand(MF, MachineOperand::CreateImm(0));
constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI);
- auto BuildMovK = [&](unsigned SrcReg, unsigned char Flags, unsigned Offset,
- unsigned ForceDstReg) {
- unsigned DstReg = ForceDstReg
+ auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
+ Register ForceDstReg) {
+ Register DstReg = ForceDstReg
? ForceDstReg
: MRI.createVirtualRegister(&AArch64::GPR64RegClass);
auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
@@ -702,13 +1075,105 @@ void AArch64InstructionSelector::materializeLargeCMVal(
constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);
return DstReg;
};
- unsigned DstReg = BuildMovK(MovZ->getOperand(0).getReg(),
+ Register DstReg = BuildMovK(MovZ.getReg(0),
AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
return;
}
+void AArch64InstructionSelector::preISelLower(MachineInstr &I) const {
+ MachineBasicBlock &MBB = *I.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ switch (I.getOpcode()) {
+ case TargetOpcode::G_SHL:
+ case TargetOpcode::G_ASHR:
+ case TargetOpcode::G_LSHR: {
+ // These shifts are legalized to have 64 bit shift amounts because we want
+ // to take advantage of the existing imported selection patterns that assume
+ // the immediates are s64s. However, if the shifted type is 32 bits and for
+ // some reason we receive input GMIR that has an s64 shift amount that's not
+ // a G_CONSTANT, insert a truncate so that we can still select the s32
+ // register-register variant.
+ unsigned SrcReg = I.getOperand(1).getReg();
+ unsigned ShiftReg = I.getOperand(2).getReg();
+ const LLT ShiftTy = MRI.getType(ShiftReg);
+ const LLT SrcTy = MRI.getType(SrcReg);
+ if (SrcTy.isVector())
+ return;
+ assert(!ShiftTy.isVector() && "unexpected vector shift ty");
+ if (SrcTy.getSizeInBits() != 32 || ShiftTy.getSizeInBits() != 64)
+ return;
+ auto *AmtMI = MRI.getVRegDef(ShiftReg);
+ assert(AmtMI && "could not find a vreg definition for shift amount");
+ if (AmtMI->getOpcode() != TargetOpcode::G_CONSTANT) {
+ // Insert a subregister copy to implement a 64->32 trunc
+ MachineIRBuilder MIB(I);
+ auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
+ .addReg(ShiftReg, 0, AArch64::sub_32);
+ MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
+ I.getOperand(2).setReg(Trunc.getReg(0));
+ }
+ return;
+ }
+ default:
+ return;
+ }
+}
+
+bool AArch64InstructionSelector::earlySelectSHL(
+ MachineInstr &I, MachineRegisterInfo &MRI) const {
+ // We try to match the immediate variant of LSL, which is actually an alias
+ // for a special case of UBFM. Otherwise, we fall back to the imported
+ // selector which will match the register variant.
+ assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op");
+ const auto &MO = I.getOperand(2);
+ auto VRegAndVal = getConstantVRegVal(MO.getReg(), MRI);
+ if (!VRegAndVal)
+ return false;
+
+ const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
+ if (DstTy.isVector())
+ return false;
+ bool Is64Bit = DstTy.getSizeInBits() == 64;
+ auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
+ auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
+ MachineIRBuilder MIB(I);
+
+ if (!Imm1Fn || !Imm2Fn)
+ return false;
+
+ auto NewI =
+ MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
+ {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
+
+ for (auto &RenderFn : *Imm1Fn)
+ RenderFn(NewI);
+ for (auto &RenderFn : *Imm2Fn)
+ RenderFn(NewI);
+
+ I.eraseFromParent();
+ return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
+}
+
+bool AArch64InstructionSelector::earlySelect(MachineInstr &I) const {
+ assert(I.getParent() && "Instruction should be in a basic block!");
+ assert(I.getParent()->getParent() && "Instruction should be in a function!");
+
+ MachineBasicBlock &MBB = *I.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ switch (I.getOpcode()) {
+ case TargetOpcode::G_SHL:
+ return earlySelectSHL(I, MRI);
+ default:
+ return false;
+ }
+}
+
bool AArch64InstructionSelector::select(MachineInstr &I,
CodeGenCoverage &CoverageInfo) const {
assert(I.getParent() && "Instruction should be in a basic block!");
@@ -727,30 +1192,27 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
- const unsigned DefReg = I.getOperand(0).getReg();
+ const Register DefReg = I.getOperand(0).getReg();
const LLT DefTy = MRI.getType(DefReg);
- const TargetRegisterClass *DefRC = nullptr;
- if (TargetRegisterInfo::isPhysicalRegister(DefReg)) {
- DefRC = TRI.getRegClass(DefReg);
- } else {
- const RegClassOrRegBank &RegClassOrBank =
- MRI.getRegClassOrRegBank(DefReg);
+ const RegClassOrRegBank &RegClassOrBank =
+ MRI.getRegClassOrRegBank(DefReg);
- DefRC = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
+ const TargetRegisterClass *DefRC
+ = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
+ if (!DefRC) {
+ if (!DefTy.isValid()) {
+ LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
+ return false;
+ }
+ const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
+ DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI);
if (!DefRC) {
- if (!DefTy.isValid()) {
- LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
- return false;
- }
- const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
- DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI);
- if (!DefRC) {
- LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
- return false;
- }
+ LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
+ return false;
}
}
+
I.setDesc(TII.get(TargetOpcode::PHI));
return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
@@ -769,12 +1231,27 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
return false;
}
+ // Try to do some lowering before we start instruction selecting. These
+ // lowerings are purely transformations on the input G_MIR and so selection
+ // must continue after any modification of the instruction.
+ preISelLower(I);
+
+ // There may be patterns where the importer can't deal with them optimally,
+ // but does select it to a suboptimal sequence so our custom C++ selection
+ // code later never has a chance to work on it. Therefore, we have an early
+ // selection attempt here to give priority to certain selection routines
+ // over the imported ones.
+ if (earlySelect(I))
+ return true;
+
if (selectImpl(I, CoverageInfo))
return true;
LLT Ty =
I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
+ MachineIRBuilder MIB(I);
+
switch (Opcode) {
case TargetOpcode::G_BRCOND: {
if (Ty.getSizeInBits() > 32) {
@@ -786,7 +1263,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
return false;
}
- const unsigned CondReg = I.getOperand(0).getReg();
+ const Register CondReg = I.getOperand(0).getReg();
MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
// Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
@@ -826,15 +1303,57 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
+ case TargetOpcode::G_BRJT:
+ return selectBrJT(I, MRI);
+
+ case TargetOpcode::G_BSWAP: {
+ // Handle vector types for G_BSWAP directly.
+ Register DstReg = I.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+
+ // We should only get vector types here; everything else is handled by the
+ // importer right now.
+ if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) {
+ LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n");
+ return false;
+ }
+
+ // Only handle 4 and 2 element vectors for now.
+ // TODO: 16-bit elements.
+ unsigned NumElts = DstTy.getNumElements();
+ if (NumElts != 4 && NumElts != 2) {
+ LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n");
+ return false;
+ }
+
+ // Choose the correct opcode for the supported types. Right now, that's
+ // v2s32, v4s32, and v2s64.
+ unsigned Opc = 0;
+ unsigned EltSize = DstTy.getElementType().getSizeInBits();
+ if (EltSize == 32)
+ Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8
+ : AArch64::REV32v16i8;
+ else if (EltSize == 64)
+ Opc = AArch64::REV64v16i8;
+
+ // We should always get something by the time we get here...
+ assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?");
+
+ I.setDesc(TII.get(Opc));
+ return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+ }
+
case TargetOpcode::G_FCONSTANT:
case TargetOpcode::G_CONSTANT: {
const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
+ const LLT s8 = LLT::scalar(8);
+ const LLT s16 = LLT::scalar(16);
const LLT s32 = LLT::scalar(32);
const LLT s64 = LLT::scalar(64);
const LLT p0 = LLT::pointer(0, 64);
- const unsigned DefReg = I.getOperand(0).getReg();
+ const Register DefReg = I.getOperand(0).getReg();
const LLT DefTy = MRI.getType(DefReg);
const unsigned DefSize = DefTy.getSizeInBits();
const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
@@ -861,7 +1380,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
return false;
} else {
// s32 and s64 are covered by tablegen.
- if (Ty != p0) {
+ if (Ty != p0 && Ty != s8 && Ty != s16) {
LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
<< " constant, expected: " << s32 << ", " << s64
<< ", or " << p0 << '\n');
@@ -876,25 +1395,27 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
}
}
+ // We allow G_CONSTANT of types < 32b.
const unsigned MovOpc =
- DefSize == 32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
-
- I.setDesc(TII.get(MovOpc));
+ DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
if (isFP) {
+ // Either emit a FMOV, or emit a copy to emit a normal mov.
const TargetRegisterClass &GPRRC =
DefSize == 32 ? AArch64::GPR32RegClass : AArch64::GPR64RegClass;
const TargetRegisterClass &FPRRC =
DefSize == 32 ? AArch64::FPR32RegClass : AArch64::FPR64RegClass;
- const unsigned DefGPRReg = MRI.createVirtualRegister(&GPRRC);
+ // Can we use a FMOV instruction to represent the immediate?
+ if (emitFMovForFConstant(I, MRI))
+ return true;
+
+ // Nope. Emit a copy and use a normal mov instead.
+ const Register DefGPRReg = MRI.createVirtualRegister(&GPRRC);
MachineOperand &RegOp = I.getOperand(0);
RegOp.setReg(DefGPRReg);
-
- BuildMI(MBB, std::next(I.getIterator()), I.getDebugLoc(),
- TII.get(AArch64::COPY))
- .addDef(DefReg)
- .addUse(DefGPRReg);
+ MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
+ MIB.buildCopy({DefReg}, {DefGPRReg});
if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
@@ -913,6 +1434,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
I.getOperand(1).ChangeToImmediate(Val);
}
+ I.setDesc(TII.get(MovOpc));
constrainSelectedInstRegOperands(I, TII, TRI, RBI);
return true;
}
@@ -936,11 +1458,10 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
- unsigned DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
- BuildMI(MBB, std::next(I.getIterator()), I.getDebugLoc(),
- TII.get(AArch64::COPY))
- .addDef(I.getOperand(0).getReg())
- .addUse(DstReg, 0, AArch64::sub_32);
+ Register DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
+ MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
+ MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
+ .addReg(DstReg, 0, AArch64::sub_32);
RBI.constrainGenericRegister(I.getOperand(0).getReg(),
AArch64::GPR32RegClass, MRI);
I.getOperand(0).setReg(DstReg);
@@ -969,7 +1490,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
- unsigned SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
+ Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
TII.get(AArch64::SUBREG_TO_REG))
.addDef(SrcReg)
@@ -1026,8 +1547,12 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
+ case TargetOpcode::G_ZEXTLOAD:
case TargetOpcode::G_LOAD:
case TargetOpcode::G_STORE: {
+ bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
+ MachineIRBuilder MIB(I);
+
LLT PtrTy = MRI.getType(I.getOperand(1).getReg());
if (PtrTy != LLT::pointer(0, 64)) {
@@ -1043,7 +1568,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
}
unsigned MemSizeInBits = MemOp.getSize() * 8;
- const unsigned PtrReg = I.getOperand(1).getReg();
+ const Register PtrReg = I.getOperand(1).getReg();
#ifndef NDEBUG
const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
// Sanity-check the pointer register.
@@ -1053,7 +1578,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
"Load/Store pointer operand isn't a pointer");
#endif
- const unsigned ValReg = I.getOperand(0).getReg();
+ const Register ValReg = I.getOperand(0).getReg();
const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
const unsigned NewOpc =
@@ -1098,6 +1623,25 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
}
}
+ if (IsZExtLoad) {
+ // The zextload from a smaller type to i32 should be handled by the importer.
+ if (MRI.getType(ValReg).getSizeInBits() != 64)
+ return false;
+ // If we have a ZEXTLOAD then change the load's type to be a narrower reg
+ //and zero_extend with SUBREG_TO_REG.
+ Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
+ Register DstReg = I.getOperand(0).getReg();
+ I.getOperand(0).setReg(LdReg);
+
+ MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
+ MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
+ .addImm(0)
+ .addUse(LdReg)
+ .addImm(AArch64::sub_32);
+ constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+ return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
+ MRI);
+ }
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
@@ -1107,7 +1651,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
if (unsupportedBinOp(I, RBI, MRI, TRI))
return false;
- const unsigned DefReg = I.getOperand(0).getReg();
+ const Register DefReg = I.getOperand(0).getReg();
const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
if (RB.getID() != AArch64::GPRRegBankID) {
@@ -1134,10 +1678,17 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
case TargetOpcode::G_FMUL:
case TargetOpcode::G_FDIV:
- case TargetOpcode::G_OR:
+ case TargetOpcode::G_ASHR:
+ if (MRI.getType(I.getOperand(0).getReg()).isVector())
+ return selectVectorASHR(I, MRI);
+ LLVM_FALLTHROUGH;
case TargetOpcode::G_SHL:
+ if (Opcode == TargetOpcode::G_SHL &&
+ MRI.getType(I.getOperand(0).getReg()).isVector())
+ return selectVectorSHL(I, MRI);
+ LLVM_FALLTHROUGH;
+ case TargetOpcode::G_OR:
case TargetOpcode::G_LSHR:
- case TargetOpcode::G_ASHR:
case TargetOpcode::G_GEP: {
// Reject the various things we don't support yet.
if (unsupportedBinOp(I, RBI, MRI, TRI))
@@ -1145,7 +1696,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
const unsigned OpSize = Ty.getSizeInBits();
- const unsigned DefReg = I.getOperand(0).getReg();
+ const Register DefReg = I.getOperand(0).getReg();
const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
@@ -1160,6 +1711,43 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
+ case TargetOpcode::G_UADDO: {
+ // TODO: Support other types.
+ unsigned OpSize = Ty.getSizeInBits();
+ if (OpSize != 32 && OpSize != 64) {
+ LLVM_DEBUG(
+ dbgs()
+ << "G_UADDO currently only supported for 32 and 64 b types.\n");
+ return false;
+ }
+
+ // TODO: Support vectors.
+ if (Ty.isVector()) {
+ LLVM_DEBUG(dbgs() << "G_UADDO currently only supported for scalars.\n");
+ return false;
+ }
+
+ // Add and set the set condition flag.
+ unsigned AddsOpc = OpSize == 32 ? AArch64::ADDSWrr : AArch64::ADDSXrr;
+ MachineIRBuilder MIRBuilder(I);
+ auto AddsMI = MIRBuilder.buildInstr(
+ AddsOpc, {I.getOperand(0).getReg()},
+ {I.getOperand(2).getReg(), I.getOperand(3).getReg()});
+ constrainSelectedInstRegOperands(*AddsMI, TII, TRI, RBI);
+
+ // Now, put the overflow result in the register given by the first operand
+ // to the G_UADDO. CSINC increments the result when the predicate is false,
+ // so to get the increment when it's true, we need to use the inverse. In
+ // this case, we want to increment when carry is set.
+ auto CsetMI = MIRBuilder
+ .buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()},
+ {Register(AArch64::WZR), Register(AArch64::WZR)})
+ .addImm(getInvertedCondCode(AArch64CC::HS));
+ constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI);
+ I.eraseFromParent();
+ return true;
+ }
+
case TargetOpcode::G_PTR_MASK: {
uint64_t Align = I.getOperand(2).getImm();
if (Align >= 64 || Align == 0)
@@ -1176,8 +1764,8 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
- const unsigned DstReg = I.getOperand(0).getReg();
- const unsigned SrcReg = I.getOperand(1).getReg();
+ const Register DstReg = I.getOperand(0).getReg();
+ const Register SrcReg = I.getOperand(1).getReg();
const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
@@ -1234,8 +1822,8 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
}
case TargetOpcode::G_ANYEXT: {
- const unsigned DstReg = I.getOperand(0).getReg();
- const unsigned SrcReg = I.getOperand(1).getReg();
+ const Register DstReg = I.getOperand(0).getReg();
+ const Register SrcReg = I.getOperand(1).getReg();
const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
if (RBDst.getID() != AArch64::GPRRegBankID) {
@@ -1266,7 +1854,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
// At this point G_ANYEXT is just like a plain COPY, but we need
// to explicitly form the 64-bit value if any.
if (DstSize > 32) {
- unsigned ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
+ Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
.addDef(ExtSrc)
.addImm(0)
@@ -1283,8 +1871,8 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
SrcTy = MRI.getType(I.getOperand(1).getReg());
const bool isSigned = Opcode == TargetOpcode::G_SEXT;
- const unsigned DefReg = I.getOperand(0).getReg();
- const unsigned SrcReg = I.getOperand(1).getReg();
+ const Register DefReg = I.getOperand(0).getReg();
+ const Register SrcReg = I.getOperand(1).getReg();
const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
if (RB.getID() != AArch64::GPRRegBankID) {
@@ -1302,7 +1890,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
return false;
}
- const unsigned SrcXReg =
+ const Register SrcXReg =
MRI.createVirtualRegister(&AArch64::GPR64RegClass);
BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
.addDef(SrcXReg)
@@ -1358,11 +1946,10 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
case TargetOpcode::G_BITCAST:
// Imported SelectionDAG rules can handle every bitcast except those that
// bitcast from a type to the same type. Ideally, these shouldn't occur
- // but we might not run an optimizer that deletes them.
- if (MRI.getType(I.getOperand(0).getReg()) ==
- MRI.getType(I.getOperand(1).getReg()))
- return selectCopy(I, TII, MRI, TRI, RBI);
- return false;
+ // but we might not run an optimizer that deletes them. The other exception
+ // is bitcasts involving pointer types, as SelectionDAG has no knowledge
+ // of them.
+ return selectCopy(I, TII, MRI, TRI, RBI);
case TargetOpcode::G_SELECT: {
if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(1)) {
@@ -1371,20 +1958,14 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
return false;
}
- const unsigned CondReg = I.getOperand(1).getReg();
- const unsigned TReg = I.getOperand(2).getReg();
- const unsigned FReg = I.getOperand(3).getReg();
+ const Register CondReg = I.getOperand(1).getReg();
+ const Register TReg = I.getOperand(2).getReg();
+ const Register FReg = I.getOperand(3).getReg();
- unsigned CSelOpc = 0;
-
- if (Ty == LLT::scalar(32)) {
- CSelOpc = AArch64::CSELWr;
- } else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64)) {
- CSelOpc = AArch64::CSELXr;
- } else {
- return false;
- }
+ if (tryOptSelect(I))
+ return true;
+ Register CSelOpc = selectSelectOpc(I, MRI, RBI);
MachineInstr &TstMI =
*BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
.addDef(AArch64::WZR)
@@ -1404,48 +1985,21 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
return true;
}
case TargetOpcode::G_ICMP: {
+ if (Ty.isVector())
+ return selectVectorICmp(I, MRI);
+
if (Ty != LLT::scalar(32)) {
LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
<< ", expected: " << LLT::scalar(32) << '\n');
return false;
}
- unsigned CmpOpc = 0;
- unsigned ZReg = 0;
-
- LLT CmpTy = MRI.getType(I.getOperand(2).getReg());
- if (CmpTy == LLT::scalar(32)) {
- CmpOpc = AArch64::SUBSWrr;
- ZReg = AArch64::WZR;
- } else if (CmpTy == LLT::scalar(64) || CmpTy.isPointer()) {
- CmpOpc = AArch64::SUBSXrr;
- ZReg = AArch64::XZR;
- } else {
+ MachineIRBuilder MIRBuilder(I);
+ if (!emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1),
+ MIRBuilder))
return false;
- }
-
- // CSINC increments the result by one when the condition code is false.
- // Therefore, we have to invert the predicate to get an increment by 1 when
- // the predicate is true.
- const AArch64CC::CondCode invCC =
- changeICMPPredToAArch64CC(CmpInst::getInversePredicate(
- (CmpInst::Predicate)I.getOperand(1).getPredicate()));
-
- MachineInstr &CmpMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc))
- .addDef(ZReg)
- .addUse(I.getOperand(2).getReg())
- .addUse(I.getOperand(3).getReg());
-
- MachineInstr &CSetMI =
- *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
- .addDef(I.getOperand(0).getReg())
- .addUse(AArch64::WZR)
- .addUse(AArch64::WZR)
- .addImm(invCC);
-
- constrainSelectedInstRegOperands(CmpMI, TII, TRI, RBI);
- constrainSelectedInstRegOperands(CSetMI, TII, TRI, RBI);
-
+ emitCSetForICMP(I.getOperand(0).getReg(), I.getOperand(1).getPredicate(),
+ MIRBuilder);
I.eraseFromParent();
return true;
}
@@ -1457,15 +2011,9 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
return false;
}
- unsigned CmpOpc = 0;
- LLT CmpTy = MRI.getType(I.getOperand(2).getReg());
- if (CmpTy == LLT::scalar(32)) {
- CmpOpc = AArch64::FCMPSrr;
- } else if (CmpTy == LLT::scalar(64)) {
- CmpOpc = AArch64::FCMPDrr;
- } else {
+ unsigned CmpOpc = selectFCMPOpc(I, MRI);
+ if (!CmpOpc)
return false;
- }
// FIXME: regbank
@@ -1473,12 +2021,19 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
changeFCMPPredToAArch64CC(
(CmpInst::Predicate)I.getOperand(1).getPredicate(), CC1, CC2);
- MachineInstr &CmpMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc))
- .addUse(I.getOperand(2).getReg())
- .addUse(I.getOperand(3).getReg());
+ // Partially build the compare. Decide if we need to add a use for the
+ // third operand based off whether or not we're comparing against 0.0.
+ auto CmpMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc))
+ .addUse(I.getOperand(2).getReg());
- const unsigned DefReg = I.getOperand(0).getReg();
- unsigned Def1Reg = DefReg;
+ // If we don't have an immediate compare, then we need to add a use of the
+ // register which wasn't used for the immediate.
+ // Note that the immediate will always be the last operand.
+ if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri)
+ CmpMI = CmpMI.addUse(I.getOperand(3).getReg());
+
+ const Register DefReg = I.getOperand(0).getReg();
+ Register Def1Reg = DefReg;
if (CC2 != AArch64CC::AL)
Def1Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
@@ -1490,7 +2045,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
.addImm(getInvertedCondCode(CC1));
if (CC2 != AArch64CC::AL) {
- unsigned Def2Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
+ Register Def2Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
MachineInstr &CSet2MI =
*BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
.addDef(Def2Reg)
@@ -1505,8 +2060,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
constrainSelectedInstRegOperands(OrMI, TII, TRI, RBI);
constrainSelectedInstRegOperands(CSet2MI, TII, TRI, RBI);
}
-
- constrainSelectedInstRegOperands(CmpMI, TII, TRI, RBI);
+ constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
constrainSelectedInstRegOperands(CSetMI, TII, TRI, RBI);
I.eraseFromParent();
@@ -1515,19 +2069,14 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
case TargetOpcode::G_VASTART:
return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
: selectVaStartAAPCS(I, MF, MRI);
+ case TargetOpcode::G_INTRINSIC:
+ return selectIntrinsic(I, MRI);
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
- if (!I.getOperand(0).isIntrinsicID())
- return false;
- if (I.getOperand(0).getIntrinsicID() != Intrinsic::trap)
- return false;
- BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::BRK))
- .addImm(1);
- I.eraseFromParent();
- return true;
+ return selectIntrinsicWithSideEffects(I, MRI);
case TargetOpcode::G_IMPLICIT_DEF: {
I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
- const unsigned DstReg = I.getOperand(0).getReg();
+ const Register DstReg = I.getOperand(0).getReg();
const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
const TargetRegisterClass *DstRC =
getRegClassForTypeOnBank(DstTy, DstRB, RBI);
@@ -1552,44 +2101,374 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
}
}
+ case TargetOpcode::G_INTRINSIC_TRUNC:
+ return selectIntrinsicTrunc(I, MRI);
+ case TargetOpcode::G_INTRINSIC_ROUND:
+ return selectIntrinsicRound(I, MRI);
case TargetOpcode::G_BUILD_VECTOR:
return selectBuildVector(I, MRI);
case TargetOpcode::G_MERGE_VALUES:
return selectMergeValues(I, MRI);
+ case TargetOpcode::G_UNMERGE_VALUES:
+ return selectUnmergeValues(I, MRI);
+ case TargetOpcode::G_SHUFFLE_VECTOR:
+ return selectShuffleVector(I, MRI);
+ case TargetOpcode::G_EXTRACT_VECTOR_ELT:
+ return selectExtractElt(I, MRI);
+ case TargetOpcode::G_INSERT_VECTOR_ELT:
+ return selectInsertElt(I, MRI);
+ case TargetOpcode::G_CONCAT_VECTORS:
+ return selectConcatVectors(I, MRI);
+ case TargetOpcode::G_JUMP_TABLE:
+ return selectJumpTable(I, MRI);
}
return false;
}
-bool AArch64InstructionSelector::emitScalarToVector(
- unsigned &Dst, const LLT DstTy, const TargetRegisterClass *DstRC,
- unsigned Scalar, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI, MachineRegisterInfo &MRI) const {
- Dst = MRI.createVirtualRegister(DstRC);
+bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
+ MachineRegisterInfo &MRI) const {
+ assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
+ Register JTAddr = I.getOperand(0).getReg();
+ unsigned JTI = I.getOperand(1).getIndex();
+ Register Index = I.getOperand(2).getReg();
+ MachineIRBuilder MIB(I);
+
+ Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
+ Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
+ MIB.buildInstr(AArch64::JumpTableDest32, {TargetReg, ScratchReg},
+ {JTAddr, Index})
+ .addJumpTableIndex(JTI);
+
+ // Build the indirect branch.
+ MIB.buildInstr(AArch64::BR, {}, {TargetReg});
+ I.eraseFromParent();
+ return true;
+}
+
+bool AArch64InstructionSelector::selectJumpTable(
+ MachineInstr &I, MachineRegisterInfo &MRI) const {
+ assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table");
+ assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!");
+
+ Register DstReg = I.getOperand(0).getReg();
+ unsigned JTI = I.getOperand(1).getIndex();
+ // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
+ MachineIRBuilder MIB(I);
+ auto MovMI =
+ MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
+ .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
+ .addJumpTableIndex(JTI, AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
+ I.eraseFromParent();
+ return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
+}
- unsigned UndefVec = MRI.createVirtualRegister(DstRC);
- MachineInstr &UndefMI = *BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
- TII.get(TargetOpcode::IMPLICIT_DEF))
- .addDef(UndefVec);
+bool AArch64InstructionSelector::selectIntrinsicTrunc(
+ MachineInstr &I, MachineRegisterInfo &MRI) const {
+ const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
+
+ // Select the correct opcode.
+ unsigned Opc = 0;
+ if (!SrcTy.isVector()) {
+ switch (SrcTy.getSizeInBits()) {
+ default:
+ case 16:
+ Opc = AArch64::FRINTZHr;
+ break;
+ case 32:
+ Opc = AArch64::FRINTZSr;
+ break;
+ case 64:
+ Opc = AArch64::FRINTZDr;
+ break;
+ }
+ } else {
+ unsigned NumElts = SrcTy.getNumElements();
+ switch (SrcTy.getElementType().getSizeInBits()) {
+ default:
+ break;
+ case 16:
+ if (NumElts == 4)
+ Opc = AArch64::FRINTZv4f16;
+ else if (NumElts == 8)
+ Opc = AArch64::FRINTZv8f16;
+ break;
+ case 32:
+ if (NumElts == 2)
+ Opc = AArch64::FRINTZv2f32;
+ else if (NumElts == 4)
+ Opc = AArch64::FRINTZv4f32;
+ break;
+ case 64:
+ if (NumElts == 2)
+ Opc = AArch64::FRINTZv2f64;
+ break;
+ }
+ }
+
+ if (!Opc) {
+ // Didn't get an opcode above, bail.
+ LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n");
+ return false;
+ }
+
+ // Legalization would have set us up perfectly for this; we just need to
+ // set the opcode and move on.
+ I.setDesc(TII.get(Opc));
+ return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+}
+
+bool AArch64InstructionSelector::selectIntrinsicRound(
+ MachineInstr &I, MachineRegisterInfo &MRI) const {
+ const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
+
+ // Select the correct opcode.
+ unsigned Opc = 0;
+ if (!SrcTy.isVector()) {
+ switch (SrcTy.getSizeInBits()) {
+ default:
+ case 16:
+ Opc = AArch64::FRINTAHr;
+ break;
+ case 32:
+ Opc = AArch64::FRINTASr;
+ break;
+ case 64:
+ Opc = AArch64::FRINTADr;
+ break;
+ }
+ } else {
+ unsigned NumElts = SrcTy.getNumElements();
+ switch (SrcTy.getElementType().getSizeInBits()) {
+ default:
+ break;
+ case 16:
+ if (NumElts == 4)
+ Opc = AArch64::FRINTAv4f16;
+ else if (NumElts == 8)
+ Opc = AArch64::FRINTAv8f16;
+ break;
+ case 32:
+ if (NumElts == 2)
+ Opc = AArch64::FRINTAv2f32;
+ else if (NumElts == 4)
+ Opc = AArch64::FRINTAv4f32;
+ break;
+ case 64:
+ if (NumElts == 2)
+ Opc = AArch64::FRINTAv2f64;
+ break;
+ }
+ }
+
+ if (!Opc) {
+ // Didn't get an opcode above, bail.
+ LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n");
+ return false;
+ }
+
+ // Legalization would have set us up perfectly for this; we just need to
+ // set the opcode and move on.
+ I.setDesc(TII.get(Opc));
+ return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+}
+
+bool AArch64InstructionSelector::selectVectorICmp(
+ MachineInstr &I, MachineRegisterInfo &MRI) const {
+ Register DstReg = I.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ Register SrcReg = I.getOperand(2).getReg();
+ Register Src2Reg = I.getOperand(3).getReg();
+ LLT SrcTy = MRI.getType(SrcReg);
+
+ unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
+ unsigned NumElts = DstTy.getNumElements();
+
+ // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b
+ // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16
+ // Third index is cc opcode:
+ // 0 == eq
+ // 1 == ugt
+ // 2 == uge
+ // 3 == ult
+ // 4 == ule
+ // 5 == sgt
+ // 6 == sge
+ // 7 == slt
+ // 8 == sle
+ // ne is done by negating 'eq' result.
+
+ // This table below assumes that for some comparisons the operands will be
+ // commuted.
+ // ult op == commute + ugt op
+ // ule op == commute + uge op
+ // slt op == commute + sgt op
+ // sle op == commute + sge op
+ unsigned PredIdx = 0;
+ bool SwapOperands = false;
+ CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
+ switch (Pred) {
+ case CmpInst::ICMP_NE:
+ case CmpInst::ICMP_EQ:
+ PredIdx = 0;
+ break;
+ case CmpInst::ICMP_UGT:
+ PredIdx = 1;
+ break;
+ case CmpInst::ICMP_UGE:
+ PredIdx = 2;
+ break;
+ case CmpInst::ICMP_ULT:
+ PredIdx = 3;
+ SwapOperands = true;
+ break;
+ case CmpInst::ICMP_ULE:
+ PredIdx = 4;
+ SwapOperands = true;
+ break;
+ case CmpInst::ICMP_SGT:
+ PredIdx = 5;
+ break;
+ case CmpInst::ICMP_SGE:
+ PredIdx = 6;
+ break;
+ case CmpInst::ICMP_SLT:
+ PredIdx = 7;
+ SwapOperands = true;
+ break;
+ case CmpInst::ICMP_SLE:
+ PredIdx = 8;
+ SwapOperands = true;
+ break;
+ default:
+ llvm_unreachable("Unhandled icmp predicate");
+ return false;
+ }
+
+ // This table obviously should be tablegen'd when we have our GISel native
+ // tablegen selector.
+
+ static const unsigned OpcTable[4][4][9] = {
+ {
+ {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+ 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+ 0 /* invalid */},
+ {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+ 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+ 0 /* invalid */},
+ {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
+ AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
+ AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
+ {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
+ AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
+ AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
+ },
+ {
+ {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+ 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+ 0 /* invalid */},
+ {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
+ AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
+ AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
+ {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
+ AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
+ AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
+ {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+ 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+ 0 /* invalid */}
+ },
+ {
+ {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
+ AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
+ AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
+ {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
+ AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
+ AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
+ {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+ 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+ 0 /* invalid */},
+ {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+ 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+ 0 /* invalid */}
+ },
+ {
+ {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
+ AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
+ AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
+ {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+ 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+ 0 /* invalid */},
+ {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+ 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+ 0 /* invalid */},
+ {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+ 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+ 0 /* invalid */}
+ },
+ };
+ unsigned EltIdx = Log2_32(SrcEltSize / 8);
+ unsigned NumEltsIdx = Log2_32(NumElts / 2);
+ unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
+ if (!Opc) {
+ LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode");
+ return false;
+ }
+
+ const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI);
+ const TargetRegisterClass *SrcRC =
+ getRegClassForTypeOnBank(SrcTy, VecRB, RBI, true);
+ if (!SrcRC) {
+ LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
+ return false;
+ }
+
+ unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0;
+ if (SrcTy.getSizeInBits() == 128)
+ NotOpc = NotOpc ? AArch64::NOTv16i8 : 0;
+
+ if (SwapOperands)
+ std::swap(SrcReg, Src2Reg);
+
+ MachineIRBuilder MIB(I);
+ auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg});
+ constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
+
+ // Invert if we had a 'ne' cc.
+ if (NotOpc) {
+ Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp});
+ constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
+ } else {
+ MIB.buildCopy(DstReg, Cmp.getReg(0));
+ }
+ RBI.constrainGenericRegister(DstReg, *SrcRC, MRI);
+ I.eraseFromParent();
+ return true;
+}
+
+MachineInstr *AArch64InstructionSelector::emitScalarToVector(
+ unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
+ MachineIRBuilder &MIRBuilder) const {
+ auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
auto BuildFn = [&](unsigned SubregIndex) {
- MachineInstr &InsMI = *BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
- TII.get(TargetOpcode::INSERT_SUBREG))
- .addDef(Dst)
- .addUse(UndefVec)
- .addUse(Scalar)
- .addImm(SubregIndex);
- constrainSelectedInstRegOperands(UndefMI, TII, TRI, RBI);
- return constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI);
+ auto Ins =
+ MIRBuilder
+ .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
+ .addImm(SubregIndex);
+ constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI);
+ constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI);
+ return &*Ins;
};
- switch (DstTy.getElementType().getSizeInBits()) {
+ switch (EltSize) {
+ case 16:
+ return BuildFn(AArch64::hsub);
case 32:
return BuildFn(AArch64::ssub);
case 64:
return BuildFn(AArch64::dsub);
default:
- return false;
+ return nullptr;
}
}
@@ -1610,14 +2489,14 @@ bool AArch64InstructionSelector::selectMergeValues(
return false;
auto *DstRC = &AArch64::GPR64RegClass;
- unsigned SubToRegDef = MRI.createVirtualRegister(DstRC);
+ Register SubToRegDef = MRI.createVirtualRegister(DstRC);
MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
TII.get(TargetOpcode::SUBREG_TO_REG))
.addDef(SubToRegDef)
.addImm(0)
.addUse(I.getOperand(1).getReg())
.addImm(AArch64::sub_32);
- unsigned SubToRegDef2 = MRI.createVirtualRegister(DstRC);
+ Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
// Need to anyext the second scalar before we can use bfm
MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
TII.get(TargetOpcode::SUBREG_TO_REG))
@@ -1639,122 +2518,1362 @@ bool AArch64InstructionSelector::selectMergeValues(
return true;
}
-bool AArch64InstructionSelector::selectBuildVector(
+static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
+ const unsigned EltSize) {
+ // Choose a lane copy opcode and subregister based off of the size of the
+ // vector's elements.
+ switch (EltSize) {
+ case 16:
+ CopyOpc = AArch64::CPYi16;
+ ExtractSubReg = AArch64::hsub;
+ break;
+ case 32:
+ CopyOpc = AArch64::CPYi32;
+ ExtractSubReg = AArch64::ssub;
+ break;
+ case 64:
+ CopyOpc = AArch64::CPYi64;
+ ExtractSubReg = AArch64::dsub;
+ break;
+ default:
+ // Unknown size, bail out.
+ LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
+ return false;
+ }
+ return true;
+}
+
+MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
+ Optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
+ Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
+ MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
+ unsigned CopyOpc = 0;
+ unsigned ExtractSubReg = 0;
+ if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
+ LLVM_DEBUG(
+ dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
+ return nullptr;
+ }
+
+ const TargetRegisterClass *DstRC =
+ getRegClassForTypeOnBank(ScalarTy, DstRB, RBI, true);
+ if (!DstRC) {
+ LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
+ return nullptr;
+ }
+
+ const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
+ const LLT &VecTy = MRI.getType(VecReg);
+ const TargetRegisterClass *VecRC =
+ getRegClassForTypeOnBank(VecTy, VecRB, RBI, true);
+ if (!VecRC) {
+ LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
+ return nullptr;
+ }
+
+ // The register that we're going to copy into.
+ Register InsertReg = VecReg;
+ if (!DstReg)
+ DstReg = MRI.createVirtualRegister(DstRC);
+ // If the lane index is 0, we just use a subregister COPY.
+ if (LaneIdx == 0) {
+ auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
+ .addReg(VecReg, 0, ExtractSubReg);
+ RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
+ return &*Copy;
+ }
+
+ // Lane copies require 128-bit wide registers. If we're dealing with an
+ // unpacked vector, then we need to move up to that width. Insert an implicit
+ // def and a subregister insert to get us there.
+ if (VecTy.getSizeInBits() != 128) {
+ MachineInstr *ScalarToVector = emitScalarToVector(
+ VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
+ if (!ScalarToVector)
+ return nullptr;
+ InsertReg = ScalarToVector->getOperand(0).getReg();
+ }
+
+ MachineInstr *LaneCopyMI =
+ MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
+ constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
+
+ // Make sure that we actually constrain the initial copy.
+ RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
+ return LaneCopyMI;
+}
+
+bool AArch64InstructionSelector::selectExtractElt(
MachineInstr &I, MachineRegisterInfo &MRI) const {
- assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
- // Until we port more of the optimized selections, for now just use a vector
- // insert sequence.
- const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
- const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
- unsigned EltSize = EltTy.getSizeInBits();
- if (EltSize < 32 || EltSize > 64)
- return false; // Don't support all element types yet.
- const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
- unsigned Opc;
- unsigned SubregIdx;
+ assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
+ "unexpected opcode!");
+ Register DstReg = I.getOperand(0).getReg();
+ const LLT NarrowTy = MRI.getType(DstReg);
+ const Register SrcReg = I.getOperand(1).getReg();
+ const LLT WideTy = MRI.getType(SrcReg);
+ (void)WideTy;
+ assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
+ "source register size too small!");
+ assert(NarrowTy.isScalar() && "cannot extract vector into vector!");
+
+ // Need the lane index to determine the correct copy opcode.
+ MachineOperand &LaneIdxOp = I.getOperand(2);
+ assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
+
+ if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
+ LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
+ return false;
+ }
+
+ // Find the index to extract from.
+ auto VRegAndVal = getConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
+ if (!VRegAndVal)
+ return false;
+ unsigned LaneIdx = VRegAndVal->Value;
+
+ MachineIRBuilder MIRBuilder(I);
+
+ const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
+ MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
+ LaneIdx, MIRBuilder);
+ if (!Extract)
+ return false;
+
+ I.eraseFromParent();
+ return true;
+}
+
+bool AArch64InstructionSelector::selectSplitVectorUnmerge(
+ MachineInstr &I, MachineRegisterInfo &MRI) const {
+ unsigned NumElts = I.getNumOperands() - 1;
+ Register SrcReg = I.getOperand(NumElts).getReg();
+ const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
+ const LLT SrcTy = MRI.getType(SrcReg);
+
+ assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
+ if (SrcTy.getSizeInBits() > 128) {
+ LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
+ return false;
+ }
+
+ MachineIRBuilder MIB(I);
+
+ // We implement a split vector operation by treating the sub-vectors as
+ // scalars and extracting them.
+ const RegisterBank &DstRB =
+ *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
+ for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
+ Register Dst = I.getOperand(OpIdx).getReg();
+ MachineInstr *Extract =
+ emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
+ if (!Extract)
+ return false;
+ }
+ I.eraseFromParent();
+ return true;
+}
+
+bool AArch64InstructionSelector::selectUnmergeValues(
+ MachineInstr &I, MachineRegisterInfo &MRI) const {
+ assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
+ "unexpected opcode");
+
+ // TODO: Handle unmerging into GPRs and from scalars to scalars.
+ if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
+ AArch64::FPRRegBankID ||
+ RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
+ AArch64::FPRRegBankID) {
+ LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
+ "currently unsupported.\n");
+ return false;
+ }
+
+ // The last operand is the vector source register, and every other operand is
+ // a register to unpack into.
+ unsigned NumElts = I.getNumOperands() - 1;
+ Register SrcReg = I.getOperand(NumElts).getReg();
+ const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
+ const LLT WideTy = MRI.getType(SrcReg);
+ (void)WideTy;
+ assert(WideTy.isVector() && "can only unmerge from vector types!");
+ assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
+ "source register size too small!");
+
+ if (!NarrowTy.isScalar())
+ return selectSplitVectorUnmerge(I, MRI);
+
+ MachineIRBuilder MIB(I);
+
+ // Choose a lane copy opcode and subregister based off of the size of the
+ // vector's elements.
+ unsigned CopyOpc = 0;
+ unsigned ExtractSubReg = 0;
+ if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
+ return false;
+
+ // Set up for the lane copies.
+ MachineBasicBlock &MBB = *I.getParent();
+
+ // Stores the registers we'll be copying from.
+ SmallVector<Register, 4> InsertRegs;
+
+ // We'll use the first register twice, so we only need NumElts-1 registers.
+ unsigned NumInsertRegs = NumElts - 1;
+
+ // If our elements fit into exactly 128 bits, then we can copy from the source
+ // directly. Otherwise, we need to do a bit of setup with some subregister
+ // inserts.
+ if (NarrowTy.getSizeInBits() * NumElts == 128) {
+ InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
+ } else {
+ // No. We have to perform subregister inserts. For each insert, create an
+ // implicit def and a subregister insert, and save the register we create.
+ for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
+ Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
+ MachineInstr &ImpDefMI =
+ *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
+ ImpDefReg);
+
+ // Now, create the subregister insert from SrcReg.
+ Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
+ MachineInstr &InsMI =
+ *BuildMI(MBB, I, I.getDebugLoc(),
+ TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
+ .addUse(ImpDefReg)
+ .addUse(SrcReg)
+ .addImm(AArch64::dsub);
+
+ constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
+ constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI);
+
+ // Save the register so that we can copy from it after.
+ InsertRegs.push_back(InsertReg);
+ }
+ }
+
+ // Now that we've created any necessary subregister inserts, we can
+ // create the copies.
+ //
+ // Perform the first copy separately as a subregister copy.
+ Register CopyTo = I.getOperand(0).getReg();
+ auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
+ .addReg(InsertRegs[0], 0, ExtractSubReg);
+ constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
+
+ // Now, perform the remaining copies as vector lane copies.
+ unsigned LaneIdx = 1;
+ for (Register InsReg : InsertRegs) {
+ Register CopyTo = I.getOperand(LaneIdx).getReg();
+ MachineInstr &CopyInst =
+ *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
+ .addUse(InsReg)
+ .addImm(LaneIdx);
+ constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
+ ++LaneIdx;
+ }
+
+ // Separately constrain the first copy's destination. Because of the
+ // limitation in constrainOperandRegClass, we can't guarantee that this will
+ // actually be constrained. So, do it ourselves using the second operand.
+ const TargetRegisterClass *RC =
+ MRI.getRegClassOrNull(I.getOperand(1).getReg());
+ if (!RC) {
+ LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
+ return false;
+ }
+
+ RBI.constrainGenericRegister(CopyTo, *RC, MRI);
+ I.eraseFromParent();
+ return true;
+}
+
+bool AArch64InstructionSelector::selectConcatVectors(
+ MachineInstr &I, MachineRegisterInfo &MRI) const {
+ assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
+ "Unexpected opcode");
+ Register Dst = I.getOperand(0).getReg();
+ Register Op1 = I.getOperand(1).getReg();
+ Register Op2 = I.getOperand(2).getReg();
+ MachineIRBuilder MIRBuilder(I);
+ MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIRBuilder);
+ if (!ConcatMI)
+ return false;
+ I.eraseFromParent();
+ return true;
+}
+
+void AArch64InstructionSelector::collectShuffleMaskIndices(
+ MachineInstr &I, MachineRegisterInfo &MRI,
+ SmallVectorImpl<Optional<int>> &Idxs) const {
+ MachineInstr *MaskDef = MRI.getVRegDef(I.getOperand(3).getReg());
+ assert(
+ MaskDef->getOpcode() == TargetOpcode::G_BUILD_VECTOR &&
+ "G_SHUFFLE_VECTOR should have a constant mask operand as G_BUILD_VECTOR");
+ // Find the constant indices.
+ for (unsigned i = 1, e = MaskDef->getNumOperands(); i < e; ++i) {
+ // Look through copies.
+ MachineInstr *ScalarDef =
+ getDefIgnoringCopies(MaskDef->getOperand(i).getReg(), MRI);
+ assert(ScalarDef && "Could not find vreg def of shufflevec index op");
+ if (ScalarDef->getOpcode() != TargetOpcode::G_CONSTANT) {
+ // This be an undef if not a constant.
+ assert(ScalarDef->getOpcode() == TargetOpcode::G_IMPLICIT_DEF);
+ Idxs.push_back(None);
+ } else {
+ Idxs.push_back(ScalarDef->getOperand(1).getCImm()->getSExtValue());
+ }
+ }
+}
+
+unsigned
+AArch64InstructionSelector::emitConstantPoolEntry(Constant *CPVal,
+ MachineFunction &MF) const {
+ Type *CPTy = CPVal->getType();
+ unsigned Align = MF.getDataLayout().getPrefTypeAlignment(CPTy);
+ if (Align == 0)
+ Align = MF.getDataLayout().getTypeAllocSize(CPTy);
+
+ MachineConstantPool *MCP = MF.getConstantPool();
+ return MCP->getConstantPoolIndex(CPVal, Align);
+}
+
+MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
+ Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
+ unsigned CPIdx = emitConstantPoolEntry(CPVal, MIRBuilder.getMF());
+
+ auto Adrp =
+ MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
+ .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
+
+ MachineInstr *LoadMI = nullptr;
+ switch (MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType())) {
+ case 16:
+ LoadMI =
+ &*MIRBuilder
+ .buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp})
+ .addConstantPoolIndex(CPIdx, 0,
+ AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
+ break;
+ case 8:
+ LoadMI = &*MIRBuilder
+ .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
+ .addConstantPoolIndex(
+ CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
+ break;
+ default:
+ LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
+ << *CPVal->getType());
+ return nullptr;
+ }
+ constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI);
+ constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
+ return LoadMI;
+}
+
+/// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
+/// size and RB.
+static std::pair<unsigned, unsigned>
+getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
+ unsigned Opc, SubregIdx;
if (RB.getID() == AArch64::GPRRegBankID) {
if (EltSize == 32) {
Opc = AArch64::INSvi32gpr;
SubregIdx = AArch64::ssub;
- } else {
+ } else if (EltSize == 64) {
Opc = AArch64::INSvi64gpr;
SubregIdx = AArch64::dsub;
+ } else {
+ llvm_unreachable("invalid elt size!");
}
} else {
- if (EltSize == 32) {
+ if (EltSize == 8) {
+ Opc = AArch64::INSvi8lane;
+ SubregIdx = AArch64::bsub;
+ } else if (EltSize == 16) {
+ Opc = AArch64::INSvi16lane;
+ SubregIdx = AArch64::hsub;
+ } else if (EltSize == 32) {
Opc = AArch64::INSvi32lane;
SubregIdx = AArch64::ssub;
- } else {
+ } else if (EltSize == 64) {
Opc = AArch64::INSvi64lane;
SubregIdx = AArch64::dsub;
+ } else {
+ llvm_unreachable("invalid elt size!");
}
}
+ return std::make_pair(Opc, SubregIdx);
+}
- if (EltSize * DstTy.getNumElements() != 128)
- return false; // Don't handle unpacked vectors yet.
+MachineInstr *
+AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
+ MachineIRBuilder &MIRBuilder) const {
+ assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
+ MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
+ static const unsigned OpcTable[2][2]{{AArch64::ADDSXrr, AArch64::ADDSXri},
+ {AArch64::ADDSWrr, AArch64::ADDSWri}};
+ bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
+ auto ImmFns = selectArithImmed(RHS);
+ unsigned Opc = OpcTable[Is32Bit][ImmFns.hasValue()];
+ Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
+
+ auto CmpMI = MIRBuilder.buildInstr(Opc, {ZReg}, {LHS.getReg()});
+
+ // If we matched a valid constant immediate, add those operands.
+ if (ImmFns) {
+ for (auto &RenderFn : *ImmFns)
+ RenderFn(CmpMI);
+ } else {
+ CmpMI.addUse(RHS.getReg());
+ }
- unsigned DstVec = 0;
- const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(
- DstTy, RBI.getRegBank(AArch64::FPRRegBankID), RBI);
- emitScalarToVector(DstVec, DstTy, DstRC, I.getOperand(1).getReg(),
- *I.getParent(), I.getIterator(), MRI);
- for (unsigned i = 2, e = DstTy.getSizeInBits() / EltSize + 1; i < e; ++i) {
- unsigned InsDef;
- // For the last insert re-use the dst reg of the G_BUILD_VECTOR.
- if (i + 1 < e)
- InsDef = MRI.createVirtualRegister(DstRC);
- else
- InsDef = I.getOperand(0).getReg();
- unsigned LaneIdx = i - 1;
- if (RB.getID() == AArch64::FPRRegBankID) {
- unsigned ImpDef = MRI.createVirtualRegister(DstRC);
- MachineInstr &ImpDefMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
- TII.get(TargetOpcode::IMPLICIT_DEF))
- .addDef(ImpDef);
- unsigned InsSubDef = MRI.createVirtualRegister(DstRC);
- MachineInstr &InsSubMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
- TII.get(TargetOpcode::INSERT_SUBREG))
- .addDef(InsSubDef)
- .addUse(ImpDef)
- .addUse(I.getOperand(i).getReg())
- .addImm(SubregIdx);
- MachineInstr &InsEltMI =
- *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Opc))
- .addDef(InsDef)
- .addUse(DstVec)
- .addImm(LaneIdx)
- .addUse(InsSubDef)
- .addImm(0);
- constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
- constrainSelectedInstRegOperands(InsSubMI, TII, TRI, RBI);
- constrainSelectedInstRegOperands(InsEltMI, TII, TRI, RBI);
- DstVec = InsDef;
- } else {
- MachineInstr &InsMI =
- *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Opc))
- .addDef(InsDef)
- .addUse(DstVec)
- .addImm(LaneIdx)
- .addUse(I.getOperand(i).getReg());
- constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI);
- DstVec = InsDef;
+ constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
+ return &*CmpMI;
+}
+
+MachineInstr *
+AArch64InstructionSelector::emitTST(const Register &LHS, const Register &RHS,
+ MachineIRBuilder &MIRBuilder) const {
+ MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
+ unsigned RegSize = MRI.getType(LHS).getSizeInBits();
+ bool Is32Bit = (RegSize == 32);
+ static const unsigned OpcTable[2][2]{{AArch64::ANDSXrr, AArch64::ANDSXri},
+ {AArch64::ANDSWrr, AArch64::ANDSWri}};
+ Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
+
+ // We might be able to fold in an immediate into the TST. We need to make sure
+ // it's a logical immediate though, since ANDS requires that.
+ auto ValAndVReg = getConstantVRegValWithLookThrough(RHS, MRI);
+ bool IsImmForm = ValAndVReg.hasValue() &&
+ AArch64_AM::isLogicalImmediate(ValAndVReg->Value, RegSize);
+ unsigned Opc = OpcTable[Is32Bit][IsImmForm];
+ auto TstMI = MIRBuilder.buildInstr(Opc, {ZReg}, {LHS});
+
+ if (IsImmForm)
+ TstMI.addImm(
+ AArch64_AM::encodeLogicalImmediate(ValAndVReg->Value, RegSize));
+ else
+ TstMI.addUse(RHS);
+
+ constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
+ return &*TstMI;
+}
+
+MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
+ MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
+ MachineIRBuilder &MIRBuilder) const {
+ assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
+ MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
+
+ // Fold the compare if possible.
+ MachineInstr *FoldCmp =
+ tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder);
+ if (FoldCmp)
+ return FoldCmp;
+
+ // Can't fold into a CMN. Just emit a normal compare.
+ unsigned CmpOpc = 0;
+ Register ZReg;
+
+ LLT CmpTy = MRI.getType(LHS.getReg());
+ assert((CmpTy.isScalar() || CmpTy.isPointer()) &&
+ "Expected scalar or pointer");
+ if (CmpTy == LLT::scalar(32)) {
+ CmpOpc = AArch64::SUBSWrr;
+ ZReg = AArch64::WZR;
+ } else if (CmpTy == LLT::scalar(64) || CmpTy.isPointer()) {
+ CmpOpc = AArch64::SUBSXrr;
+ ZReg = AArch64::XZR;
+ } else {
+ return nullptr;
+ }
+
+ // Try to match immediate forms.
+ auto ImmFns = selectArithImmed(RHS);
+ if (ImmFns)
+ CmpOpc = CmpOpc == AArch64::SUBSWrr ? AArch64::SUBSWri : AArch64::SUBSXri;
+
+ auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addDef(ZReg).addUse(LHS.getReg());
+ // If we matched a valid constant immediate, add those operands.
+ if (ImmFns) {
+ for (auto &RenderFn : *ImmFns)
+ RenderFn(CmpMI);
+ } else {
+ CmpMI.addUse(RHS.getReg());
+ }
+
+ // Make sure that we can constrain the compare that we emitted.
+ constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
+ return &*CmpMI;
+}
+
+MachineInstr *AArch64InstructionSelector::emitVectorConcat(
+ Optional<Register> Dst, Register Op1, Register Op2,
+ MachineIRBuilder &MIRBuilder) const {
+ // We implement a vector concat by:
+ // 1. Use scalar_to_vector to insert the lower vector into the larger dest
+ // 2. Insert the upper vector into the destination's upper element
+ // TODO: some of this code is common with G_BUILD_VECTOR handling.
+ MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
+
+ const LLT Op1Ty = MRI.getType(Op1);
+ const LLT Op2Ty = MRI.getType(Op2);
+
+ if (Op1Ty != Op2Ty) {
+ LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
+ return nullptr;
+ }
+ assert(Op1Ty.isVector() && "Expected a vector for vector concat");
+
+ if (Op1Ty.getSizeInBits() >= 128) {
+ LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
+ return nullptr;
+ }
+
+ // At the moment we just support 64 bit vector concats.
+ if (Op1Ty.getSizeInBits() != 64) {
+ LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
+ return nullptr;
+ }
+
+ const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
+ const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
+ const TargetRegisterClass *DstRC =
+ getMinClassForRegBank(FPRBank, Op1Ty.getSizeInBits() * 2);
+
+ MachineInstr *WidenedOp1 =
+ emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
+ MachineInstr *WidenedOp2 =
+ emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
+ if (!WidenedOp1 || !WidenedOp2) {
+ LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
+ return nullptr;
+ }
+
+ // Now do the insert of the upper element.
+ unsigned InsertOpc, InsSubRegIdx;
+ std::tie(InsertOpc, InsSubRegIdx) =
+ getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
+
+ if (!Dst)
+ Dst = MRI.createVirtualRegister(DstRC);
+ auto InsElt =
+ MIRBuilder
+ .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
+ .addImm(1) /* Lane index */
+ .addUse(WidenedOp2->getOperand(0).getReg())
+ .addImm(0);
+ constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
+ return &*InsElt;
+}
+
+MachineInstr *AArch64InstructionSelector::emitFMovForFConstant(
+ MachineInstr &I, MachineRegisterInfo &MRI) const {
+ assert(I.getOpcode() == TargetOpcode::G_FCONSTANT &&
+ "Expected a G_FCONSTANT!");
+ MachineOperand &ImmOp = I.getOperand(1);
+ unsigned DefSize = MRI.getType(I.getOperand(0).getReg()).getSizeInBits();
+
+ // Only handle 32 and 64 bit defs for now.
+ if (DefSize != 32 && DefSize != 64)
+ return nullptr;
+
+ // Don't handle null values using FMOV.
+ if (ImmOp.getFPImm()->isNullValue())
+ return nullptr;
+
+ // Get the immediate representation for the FMOV.
+ const APFloat &ImmValAPF = ImmOp.getFPImm()->getValueAPF();
+ int Imm = DefSize == 32 ? AArch64_AM::getFP32Imm(ImmValAPF)
+ : AArch64_AM::getFP64Imm(ImmValAPF);
+
+ // If this is -1, it means the immediate can't be represented as the requested
+ // floating point value. Bail.
+ if (Imm == -1)
+ return nullptr;
+
+ // Update MI to represent the new FMOV instruction, constrain it, and return.
+ ImmOp.ChangeToImmediate(Imm);
+ unsigned MovOpc = DefSize == 32 ? AArch64::FMOVSi : AArch64::FMOVDi;
+ I.setDesc(TII.get(MovOpc));
+ constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+ return &I;
+}
+
+MachineInstr *
+AArch64InstructionSelector::emitCSetForICMP(Register DefReg, unsigned Pred,
+ MachineIRBuilder &MIRBuilder) const {
+ // CSINC increments the result when the predicate is false. Invert it.
+ const AArch64CC::CondCode InvCC = changeICMPPredToAArch64CC(
+ CmpInst::getInversePredicate((CmpInst::Predicate)Pred));
+ auto I =
+ MIRBuilder
+ .buildInstr(AArch64::CSINCWr, {DefReg}, {Register(AArch64::WZR), Register(AArch64::WZR)})
+ .addImm(InvCC);
+ constrainSelectedInstRegOperands(*I, TII, TRI, RBI);
+ return &*I;
+}
+
+bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const {
+ MachineIRBuilder MIB(I);
+ MachineRegisterInfo &MRI = *MIB.getMRI();
+ const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
+
+ // We want to recognize this pattern:
+ //
+ // $z = G_FCMP pred, $x, $y
+ // ...
+ // $w = G_SELECT $z, $a, $b
+ //
+ // Where the value of $z is *only* ever used by the G_SELECT (possibly with
+ // some copies/truncs in between.)
+ //
+ // If we see this, then we can emit something like this:
+ //
+ // fcmp $x, $y
+ // fcsel $w, $a, $b, pred
+ //
+ // Rather than emitting both of the rather long sequences in the standard
+ // G_FCMP/G_SELECT select methods.
+
+ // First, check if the condition is defined by a compare.
+ MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
+ while (CondDef) {
+ // We can only fold if all of the defs have one use.
+ if (!MRI.hasOneUse(CondDef->getOperand(0).getReg()))
+ return false;
+
+ // We can skip over G_TRUNC since the condition is 1-bit.
+ // Truncating/extending can have no impact on the value.
+ unsigned Opc = CondDef->getOpcode();
+ if (Opc != TargetOpcode::COPY && Opc != TargetOpcode::G_TRUNC)
+ break;
+
+ // Can't see past copies from physregs.
+ if (Opc == TargetOpcode::COPY &&
+ TargetRegisterInfo::isPhysicalRegister(CondDef->getOperand(1).getReg()))
+ return false;
+
+ CondDef = MRI.getVRegDef(CondDef->getOperand(1).getReg());
+ }
+
+ // Is the condition defined by a compare?
+ if (!CondDef)
+ return false;
+
+ unsigned CondOpc = CondDef->getOpcode();
+ if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP)
+ return false;
+
+ AArch64CC::CondCode CondCode;
+ if (CondOpc == TargetOpcode::G_ICMP) {
+ CondCode = changeICMPPredToAArch64CC(
+ (CmpInst::Predicate)CondDef->getOperand(1).getPredicate());
+ if (!emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3),
+ CondDef->getOperand(1), MIB)) {
+ LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");
+ return false;
}
+ } else {
+ // Get the condition code for the select.
+ AArch64CC::CondCode CondCode2;
+ changeFCMPPredToAArch64CC(
+ (CmpInst::Predicate)CondDef->getOperand(1).getPredicate(), CondCode,
+ CondCode2);
+
+ // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
+ // instructions to emit the comparison.
+ // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
+ // unnecessary.
+ if (CondCode2 != AArch64CC::AL)
+ return false;
+
+ // Make sure we'll be able to select the compare.
+ unsigned CmpOpc = selectFCMPOpc(*CondDef, MRI);
+ if (!CmpOpc)
+ return false;
+
+ // Emit a new compare.
+ auto Cmp = MIB.buildInstr(CmpOpc, {}, {CondDef->getOperand(2).getReg()});
+ if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri)
+ Cmp.addUse(CondDef->getOperand(3).getReg());
+ constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
}
+
+ // Emit the select.
+ unsigned CSelOpc = selectSelectOpc(I, MRI, RBI);
+ auto CSel =
+ MIB.buildInstr(CSelOpc, {I.getOperand(0).getReg()},
+ {I.getOperand(2).getReg(), I.getOperand(3).getReg()})
+ .addImm(CondCode);
+ constrainSelectedInstRegOperands(*CSel, TII, TRI, RBI);
I.eraseFromParent();
return true;
}
-/// SelectArithImmed - Select an immediate value that can be represented as
-/// a 12-bit value shifted left by either 0 or 12. If so, return true with
-/// Val set to the 12-bit value and Shift set to the shifter operand.
-InstructionSelector::ComplexRendererFns
-AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
- MachineInstr &MI = *Root.getParent();
- MachineBasicBlock &MBB = *MI.getParent();
+MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
+ MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
+ MachineIRBuilder &MIRBuilder) const {
+ assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&
+ "Unexpected MachineOperand");
+ MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
+ // We want to find this sort of thing:
+ // x = G_SUB 0, y
+ // G_ICMP z, x
+ //
+ // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
+ // e.g:
+ //
+ // cmn z, y
+
+ // Helper lambda to detect the subtract followed by the compare.
+ // Takes in the def of the LHS or RHS, and checks if it's a subtract from 0.
+ auto IsCMN = [&](MachineInstr *DefMI, const AArch64CC::CondCode &CC) {
+ if (!DefMI || DefMI->getOpcode() != TargetOpcode::G_SUB)
+ return false;
+
+ // Need to make sure NZCV is the same at the end of the transformation.
+ if (CC != AArch64CC::EQ && CC != AArch64CC::NE)
+ return false;
+
+ // We want to match against SUBs.
+ if (DefMI->getOpcode() != TargetOpcode::G_SUB)
+ return false;
+
+ // Make sure that we're getting
+ // x = G_SUB 0, y
+ auto ValAndVReg =
+ getConstantVRegValWithLookThrough(DefMI->getOperand(1).getReg(), MRI);
+ if (!ValAndVReg || ValAndVReg->Value != 0)
+ return false;
+
+ // This can safely be represented as a CMN.
+ return true;
+ };
+
+ // Check if the RHS or LHS of the G_ICMP is defined by a SUB
+ MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
+ MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);
+ CmpInst::Predicate P = (CmpInst::Predicate)Predicate.getPredicate();
+ const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(P);
+
+ // Given this:
+ //
+ // x = G_SUB 0, y
+ // G_ICMP x, z
+ //
+ // Produce this:
+ //
+ // cmn y, z
+ if (IsCMN(LHSDef, CC))
+ return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
+
+ // Same idea here, but with the RHS of the compare instead:
+ //
+ // Given this:
+ //
+ // x = G_SUB 0, y
+ // G_ICMP z, x
+ //
+ // Produce this:
+ //
+ // cmn z, y
+ if (IsCMN(RHSDef, CC))
+ return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
+
+ // Given this:
+ //
+ // z = G_AND x, y
+ // G_ICMP z, 0
+ //
+ // Produce this if the compare is signed:
+ //
+ // tst x, y
+ if (!isUnsignedICMPPred(P) && LHSDef &&
+ LHSDef->getOpcode() == TargetOpcode::G_AND) {
+ // Make sure that the RHS is 0.
+ auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI);
+ if (!ValAndVReg || ValAndVReg->Value != 0)
+ return nullptr;
+
+ return emitTST(LHSDef->getOperand(1).getReg(),
+ LHSDef->getOperand(2).getReg(), MIRBuilder);
+ }
+
+ return nullptr;
+}
+
+bool AArch64InstructionSelector::tryOptVectorDup(MachineInstr &I) const {
+ // Try to match a vector splat operation into a dup instruction.
+ // We're looking for this pattern:
+ // %scalar:gpr(s64) = COPY $x0
+ // %undef:fpr(<2 x s64>) = G_IMPLICIT_DEF
+ // %cst0:gpr(s32) = G_CONSTANT i32 0
+ // %zerovec:fpr(<2 x s32>) = G_BUILD_VECTOR %cst0(s32), %cst0(s32)
+ // %ins:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %undef, %scalar(s64), %cst0(s32)
+ // %splat:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %ins(<2 x s64>), %undef,
+ // %zerovec(<2 x s32>)
+ //
+ // ...into:
+ // %splat = DUP %scalar
+ // We use the regbank of the scalar to determine which kind of dup to use.
+ MachineIRBuilder MIB(I);
+ MachineRegisterInfo &MRI = *MIB.getMRI();
+ const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
+ using namespace TargetOpcode;
+ using namespace MIPatternMatch;
+
+ // Begin matching the insert.
+ auto *InsMI =
+ getOpcodeDef(G_INSERT_VECTOR_ELT, I.getOperand(1).getReg(), MRI);
+ if (!InsMI)
+ return false;
+ // Match the undef vector operand.
+ auto *UndefMI =
+ getOpcodeDef(G_IMPLICIT_DEF, InsMI->getOperand(1).getReg(), MRI);
+ if (!UndefMI)
+ return false;
+ // Match the scalar being splatted.
+ Register ScalarReg = InsMI->getOperand(2).getReg();
+ const RegisterBank *ScalarRB = RBI.getRegBank(ScalarReg, MRI, TRI);
+ // Match the index constant 0.
+ int64_t Index = 0;
+ if (!mi_match(InsMI->getOperand(3).getReg(), MRI, m_ICst(Index)) || Index)
+ return false;
+
+ // The shuffle's second operand doesn't matter if the mask is all zero.
+ auto *ZeroVec = getOpcodeDef(G_BUILD_VECTOR, I.getOperand(3).getReg(), MRI);
+ if (!ZeroVec)
+ return false;
+ int64_t Zero = 0;
+ if (!mi_match(ZeroVec->getOperand(1).getReg(), MRI, m_ICst(Zero)) || Zero)
+ return false;
+ for (unsigned i = 1, e = ZeroVec->getNumOperands() - 1; i < e; ++i) {
+ if (ZeroVec->getOperand(i).getReg() != ZeroVec->getOperand(1).getReg())
+ return false; // This wasn't an all zeros vector.
+ }
+
+ // We're done, now find out what kind of splat we need.
+ LLT VecTy = MRI.getType(I.getOperand(0).getReg());
+ LLT EltTy = VecTy.getElementType();
+ if (VecTy.getSizeInBits() != 128 || EltTy.getSizeInBits() < 32) {
+ LLVM_DEBUG(dbgs() << "Could not optimize splat pattern < 128b yet");
+ return false;
+ }
+ bool IsFP = ScalarRB->getID() == AArch64::FPRRegBankID;
+ static const unsigned OpcTable[2][2] = {
+ {AArch64::DUPv4i32gpr, AArch64::DUPv2i64gpr},
+ {AArch64::DUPv4i32lane, AArch64::DUPv2i64lane}};
+ unsigned Opc = OpcTable[IsFP][EltTy.getSizeInBits() == 64];
+
+ // For FP splats, we need to widen the scalar reg via undef too.
+ if (IsFP) {
+ MachineInstr *Widen = emitScalarToVector(
+ EltTy.getSizeInBits(), &AArch64::FPR128RegClass, ScalarReg, MIB);
+ if (!Widen)
+ return false;
+ ScalarReg = Widen->getOperand(0).getReg();
+ }
+ auto Dup = MIB.buildInstr(Opc, {I.getOperand(0).getReg()}, {ScalarReg});
+ if (IsFP)
+ Dup.addImm(0);
+ constrainSelectedInstRegOperands(*Dup, TII, TRI, RBI);
+ I.eraseFromParent();
+ return true;
+}
+
+bool AArch64InstructionSelector::tryOptVectorShuffle(MachineInstr &I) const {
+ if (TM.getOptLevel() == CodeGenOpt::None)
+ return false;
+ if (tryOptVectorDup(I))
+ return true;
+ return false;
+}
+
+bool AArch64InstructionSelector::selectShuffleVector(
+ MachineInstr &I, MachineRegisterInfo &MRI) const {
+ if (tryOptVectorShuffle(I))
+ return true;
+ const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
+ Register Src1Reg = I.getOperand(1).getReg();
+ const LLT Src1Ty = MRI.getType(Src1Reg);
+ Register Src2Reg = I.getOperand(2).getReg();
+ const LLT Src2Ty = MRI.getType(Src2Reg);
+
+ MachineBasicBlock &MBB = *I.getParent();
MachineFunction &MF = *MBB.getParent();
- MachineRegisterInfo &MRI = MF.getRegInfo();
+ LLVMContext &Ctx = MF.getFunction().getContext();
+
+ // G_SHUFFLE_VECTOR doesn't really have a strictly enforced constant mask
+ // operand, it comes in as a normal vector value which we have to analyze to
+ // find the mask indices. If the mask element is undef, then
+ // collectShuffleMaskIndices() will add a None entry for that index into
+ // the list.
+ SmallVector<Optional<int>, 8> Mask;
+ collectShuffleMaskIndices(I, MRI, Mask);
+ assert(!Mask.empty() && "Expected to find mask indices");
+
+ // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
+ // it's originated from a <1 x T> type. Those should have been lowered into
+ // G_BUILD_VECTOR earlier.
+ if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
+ LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");
+ return false;
+ }
- // This function is called from the addsub_shifted_imm ComplexPattern,
- // which lists [imm] as the list of opcode it's interested in, however
- // we still need to check whether the operand is actually an immediate
- // here because the ComplexPattern opcode list is only used in
- // root-level opcode matching.
+ unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
+
+ SmallVector<Constant *, 64> CstIdxs;
+ for (auto &MaybeVal : Mask) {
+ // For now, any undef indexes we'll just assume to be 0. This should be
+ // optimized in future, e.g. to select DUP etc.
+ int Val = MaybeVal.hasValue() ? *MaybeVal : 0;
+ for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
+ unsigned Offset = Byte + Val * BytesPerElt;
+ CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
+ }
+ }
+
+ MachineIRBuilder MIRBuilder(I);
+
+ // Use a constant pool to load the index vector for TBL.
+ Constant *CPVal = ConstantVector::get(CstIdxs);
+ MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIRBuilder);
+ if (!IndexLoad) {
+ LLVM_DEBUG(dbgs() << "Could not load from a constant pool");
+ return false;
+ }
+
+ if (DstTy.getSizeInBits() != 128) {
+ assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty");
+ // This case can be done with TBL1.
+ MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIRBuilder);
+ if (!Concat) {
+ LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");
+ return false;
+ }
+
+ // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
+ IndexLoad =
+ emitScalarToVector(64, &AArch64::FPR128RegClass,
+ IndexLoad->getOperand(0).getReg(), MIRBuilder);
+
+ auto TBL1 = MIRBuilder.buildInstr(
+ AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
+ {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
+ constrainSelectedInstRegOperands(*TBL1, TII, TRI, RBI);
+
+ auto Copy =
+ MIRBuilder
+ .buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
+ .addReg(TBL1.getReg(0), 0, AArch64::dsub);
+ RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
+ I.eraseFromParent();
+ return true;
+ }
+
+ // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
+ // Q registers for regalloc.
+ auto RegSeq = MIRBuilder
+ .buildInstr(TargetOpcode::REG_SEQUENCE,
+ {&AArch64::QQRegClass}, {Src1Reg})
+ .addImm(AArch64::qsub0)
+ .addUse(Src2Reg)
+ .addImm(AArch64::qsub1);
+
+ auto TBL2 =
+ MIRBuilder.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0).getReg()},
+ {RegSeq, IndexLoad->getOperand(0).getReg()});
+ constrainSelectedInstRegOperands(*RegSeq, TII, TRI, RBI);
+ constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI);
+ I.eraseFromParent();
+ return true;
+}
+
+MachineInstr *AArch64InstructionSelector::emitLaneInsert(
+ Optional<Register> DstReg, Register SrcReg, Register EltReg,
+ unsigned LaneIdx, const RegisterBank &RB,
+ MachineIRBuilder &MIRBuilder) const {
+ MachineInstr *InsElt = nullptr;
+ const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
+ MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
+
+ // Create a register to define with the insert if one wasn't passed in.
+ if (!DstReg)
+ DstReg = MRI.createVirtualRegister(DstRC);
+
+ unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
+ unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
+
+ if (RB.getID() == AArch64::FPRRegBankID) {
+ auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
+ InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
+ .addImm(LaneIdx)
+ .addUse(InsSub->getOperand(0).getReg())
+ .addImm(0);
+ } else {
+ InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
+ .addImm(LaneIdx)
+ .addUse(EltReg);
+ }
+
+ constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
+ return InsElt;
+}
+
+bool AArch64InstructionSelector::selectInsertElt(
+ MachineInstr &I, MachineRegisterInfo &MRI) const {
+ assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
+
+ // Get information on the destination.
+ Register DstReg = I.getOperand(0).getReg();
+ const LLT DstTy = MRI.getType(DstReg);
+ unsigned VecSize = DstTy.getSizeInBits();
+
+ // Get information on the element we want to insert into the destination.
+ Register EltReg = I.getOperand(2).getReg();
+ const LLT EltTy = MRI.getType(EltReg);
+ unsigned EltSize = EltTy.getSizeInBits();
+ if (EltSize < 16 || EltSize > 64)
+ return false; // Don't support all element types yet.
+
+ // Find the definition of the index. Bail out if it's not defined by a
+ // G_CONSTANT.
+ Register IdxReg = I.getOperand(3).getReg();
+ auto VRegAndVal = getConstantVRegValWithLookThrough(IdxReg, MRI);
+ if (!VRegAndVal)
+ return false;
+ unsigned LaneIdx = VRegAndVal->Value;
+
+ // Perform the lane insert.
+ Register SrcReg = I.getOperand(1).getReg();
+ const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
+ MachineIRBuilder MIRBuilder(I);
+
+ if (VecSize < 128) {
+ // If the vector we're inserting into is smaller than 128 bits, widen it
+ // to 128 to do the insert.
+ MachineInstr *ScalarToVec = emitScalarToVector(
+ VecSize, &AArch64::FPR128RegClass, SrcReg, MIRBuilder);
+ if (!ScalarToVec)
+ return false;
+ SrcReg = ScalarToVec->getOperand(0).getReg();
+ }
+
+ // Create an insert into a new FPR128 register.
+ // Note that if our vector is already 128 bits, we end up emitting an extra
+ // register.
+ MachineInstr *InsMI =
+ emitLaneInsert(None, SrcReg, EltReg, LaneIdx, EltRB, MIRBuilder);
+
+ if (VecSize < 128) {
+ // If we had to widen to perform the insert, then we have to demote back to
+ // the original size to get the result we want.
+ Register DemoteVec = InsMI->getOperand(0).getReg();
+ const TargetRegisterClass *RC =
+ getMinClassForRegBank(*RBI.getRegBank(DemoteVec, MRI, TRI), VecSize);
+ if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
+ LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
+ return false;
+ }
+ unsigned SubReg = 0;
+ if (!getSubRegForClass(RC, TRI, SubReg))
+ return false;
+ if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
+ LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << VecSize
+ << "\n");
+ return false;
+ }
+ MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
+ .addReg(DemoteVec, 0, SubReg);
+ RBI.constrainGenericRegister(DstReg, *RC, MRI);
+ } else {
+ // No widening needed.
+ InsMI->getOperand(0).setReg(DstReg);
+ constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
+ }
+
+ I.eraseFromParent();
+ return true;
+}
+
+bool AArch64InstructionSelector::selectBuildVector(
+ MachineInstr &I, MachineRegisterInfo &MRI) const {
+ assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
+ // Until we port more of the optimized selections, for now just use a vector
+ // insert sequence.
+ const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
+ const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
+ unsigned EltSize = EltTy.getSizeInBits();
+ if (EltSize < 16 || EltSize > 64)
+ return false; // Don't support all element types yet.
+ const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
+ MachineIRBuilder MIRBuilder(I);
+
+ const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
+ MachineInstr *ScalarToVec =
+ emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
+ I.getOperand(1).getReg(), MIRBuilder);
+ if (!ScalarToVec)
+ return false;
+
+ Register DstVec = ScalarToVec->getOperand(0).getReg();
+ unsigned DstSize = DstTy.getSizeInBits();
+
+ // Keep track of the last MI we inserted. Later on, we might be able to save
+ // a copy using it.
+ MachineInstr *PrevMI = nullptr;
+ for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
+ // Note that if we don't do a subregister copy, we can end up making an
+ // extra register.
+ PrevMI = &*emitLaneInsert(None, DstVec, I.getOperand(i).getReg(), i - 1, RB,
+ MIRBuilder);
+ DstVec = PrevMI->getOperand(0).getReg();
+ }
+
+ // If DstTy's size in bits is less than 128, then emit a subregister copy
+ // from DstVec to the last register we've defined.
+ if (DstSize < 128) {
+ // Force this to be FPR using the destination vector.
+ const TargetRegisterClass *RC =
+ getMinClassForRegBank(*RBI.getRegBank(DstVec, MRI, TRI), DstSize);
+ if (!RC)
+ return false;
+ if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
+ LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
+ return false;
+ }
+
+ unsigned SubReg = 0;
+ if (!getSubRegForClass(RC, TRI, SubReg))
+ return false;
+ if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
+ LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize
+ << "\n");
+ return false;
+ }
+
+ Register Reg = MRI.createVirtualRegister(RC);
+ Register DstReg = I.getOperand(0).getReg();
+
+ MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
+ .addReg(DstVec, 0, SubReg);
+ MachineOperand &RegOp = I.getOperand(1);
+ RegOp.setReg(Reg);
+ RBI.constrainGenericRegister(DstReg, *RC, MRI);
+ } else {
+ // We don't need a subregister copy. Save a copy by re-using the
+ // destination register on the final insert.
+ assert(PrevMI && "PrevMI was null?");
+ PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
+ constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI);
+ }
+
+ I.eraseFromParent();
+ return true;
+}
+
+/// Helper function to find an intrinsic ID on an a MachineInstr. Returns the
+/// ID if it exists, and 0 otherwise.
+static unsigned findIntrinsicID(MachineInstr &I) {
+ auto IntrinOp = find_if(I.operands(), [&](const MachineOperand &Op) {
+ return Op.isIntrinsicID();
+ });
+ if (IntrinOp == I.operands_end())
+ return 0;
+ return IntrinOp->getIntrinsicID();
+}
+
+/// Helper function to emit the correct opcode for a llvm.aarch64.stlxr
+/// intrinsic.
+static unsigned getStlxrOpcode(unsigned NumBytesToStore) {
+ switch (NumBytesToStore) {
+ // TODO: 1, 2, and 4 byte stores.
+ case 8:
+ return AArch64::STLXRX;
+ default:
+ LLVM_DEBUG(dbgs() << "Unexpected number of bytes to store! ("
+ << NumBytesToStore << ")\n");
+ break;
+ }
+ return 0;
+}
+
+bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
+ MachineInstr &I, MachineRegisterInfo &MRI) const {
+ // Find the intrinsic ID.
+ unsigned IntrinID = findIntrinsicID(I);
+ if (!IntrinID)
+ return false;
+ MachineIRBuilder MIRBuilder(I);
+
+ // Select the instruction.
+ switch (IntrinID) {
+ default:
+ return false;
+ case Intrinsic::trap:
+ MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(1);
+ break;
+ case Intrinsic::debugtrap:
+ if (!STI.isTargetWindows())
+ return false;
+ MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000);
+ break;
+ case Intrinsic::aarch64_stlxr:
+ Register StatReg = I.getOperand(0).getReg();
+ assert(RBI.getSizeInBits(StatReg, MRI, TRI) == 32 &&
+ "Status register must be 32 bits!");
+ Register SrcReg = I.getOperand(2).getReg();
+
+ if (RBI.getSizeInBits(SrcReg, MRI, TRI) != 64) {
+ LLVM_DEBUG(dbgs() << "Only support 64-bit sources right now.\n");
+ return false;
+ }
+
+ Register PtrReg = I.getOperand(3).getReg();
+ assert(MRI.getType(PtrReg).isPointer() && "Expected pointer operand");
+
+ // Expect only one memory operand.
+ if (!I.hasOneMemOperand())
+ return false;
+
+ const MachineMemOperand *MemOp = *I.memoperands_begin();
+ unsigned NumBytesToStore = MemOp->getSize();
+ unsigned Opc = getStlxrOpcode(NumBytesToStore);
+ if (!Opc)
+ return false;
+
+ auto StoreMI = MIRBuilder.buildInstr(Opc, {StatReg}, {SrcReg, PtrReg});
+ constrainSelectedInstRegOperands(*StoreMI, TII, TRI, RBI);
+ }
+
+ I.eraseFromParent();
+ return true;
+}
+
+bool AArch64InstructionSelector::selectIntrinsic(
+ MachineInstr &I, MachineRegisterInfo &MRI) const {
+ unsigned IntrinID = findIntrinsicID(I);
+ if (!IntrinID)
+ return false;
+ MachineIRBuilder MIRBuilder(I);
+
+ switch (IntrinID) {
+ default:
+ break;
+ case Intrinsic::aarch64_crypto_sha1h:
+ Register DstReg = I.getOperand(0).getReg();
+ Register SrcReg = I.getOperand(2).getReg();
+
+ // FIXME: Should this be an assert?
+ if (MRI.getType(DstReg).getSizeInBits() != 32 ||
+ MRI.getType(SrcReg).getSizeInBits() != 32)
+ return false;
+
+ // The operation has to happen on FPRs. Set up some new FPR registers for
+ // the source and destination if they are on GPRs.
+ if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
+ SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
+ MIRBuilder.buildCopy({SrcReg}, {I.getOperand(2)});
+
+ // Make sure the copy ends up getting constrained properly.
+ RBI.constrainGenericRegister(I.getOperand(2).getReg(),
+ AArch64::GPR32RegClass, MRI);
+ }
+
+ if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID)
+ DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
+
+ // Actually insert the instruction.
+ auto SHA1Inst = MIRBuilder.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg});
+ constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI);
+
+ // Did we create a new register for the destination?
+ if (DstReg != I.getOperand(0).getReg()) {
+ // Yep. Copy the result of the instruction back into the original
+ // destination.
+ MIRBuilder.buildCopy({I.getOperand(0)}, {DstReg});
+ RBI.constrainGenericRegister(I.getOperand(0).getReg(),
+ AArch64::GPR32RegClass, MRI);
+ }
+
+ I.eraseFromParent();
+ return true;
+ }
+ return false;
+}
+
+static Optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
+ auto &MI = *Root.getParent();
+ auto &MBB = *MI.getParent();
+ auto &MF = *MBB.getParent();
+ auto &MRI = MF.getRegInfo();
uint64_t Immed;
if (Root.isImm())
Immed = Root.getImm();
else if (Root.isCImm())
Immed = Root.getCImm()->getZExtValue();
else if (Root.isReg()) {
- MachineInstr *Def = MRI.getVRegDef(Root.getReg());
- if (Def->getOpcode() != TargetOpcode::G_CONSTANT)
- return None;
- MachineOperand &Op1 = Def->getOperand(1);
- if (!Op1.isCImm() || Op1.getCImm()->getBitWidth() > 64)
+ auto ValAndVReg =
+ getConstantVRegValWithLookThrough(Root.getReg(), MRI, true);
+ if (!ValAndVReg)
return None;
- Immed = Op1.getCImm()->getZExtValue();
+ Immed = ValAndVReg->Value;
} else
return None;
+ return Immed;
+}
+
+InstructionSelector::ComplexRendererFns
+AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const {
+ auto MaybeImmed = getImmedFromMO(Root);
+ if (MaybeImmed == None || *MaybeImmed > 31)
+ return None;
+ uint64_t Enc = (32 - *MaybeImmed) & 0x1f;
+ return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
+}
+
+InstructionSelector::ComplexRendererFns
+AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const {
+ auto MaybeImmed = getImmedFromMO(Root);
+ if (MaybeImmed == None || *MaybeImmed > 31)
+ return None;
+ uint64_t Enc = 31 - *MaybeImmed;
+ return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
+}
+
+InstructionSelector::ComplexRendererFns
+AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const {
+ auto MaybeImmed = getImmedFromMO(Root);
+ if (MaybeImmed == None || *MaybeImmed > 63)
+ return None;
+ uint64_t Enc = (64 - *MaybeImmed) & 0x3f;
+ return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
+}
+
+InstructionSelector::ComplexRendererFns
+AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const {
+ auto MaybeImmed = getImmedFromMO(Root);
+ if (MaybeImmed == None || *MaybeImmed > 63)
+ return None;
+ uint64_t Enc = 63 - *MaybeImmed;
+ return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
+}
+/// SelectArithImmed - Select an immediate value that can be represented as
+/// a 12-bit value shifted left by either 0 or 12. If so, return true with
+/// Val set to the 12-bit value and Shift set to the shifter operand.
+InstructionSelector::ComplexRendererFns
+AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
+ // This function is called from the addsub_shifted_imm ComplexPattern,
+ // which lists [imm] as the list of opcode it's interested in, however
+ // we still need to check whether the operand is actually an immediate
+ // here because the ComplexPattern opcode list is only used in
+ // root-level opcode matching.
+ auto MaybeImmed = getImmedFromMO(Root);
+ if (MaybeImmed == None)
+ return None;
+ uint64_t Immed = *MaybeImmed;
unsigned ShiftAmt;
if (Immed >> 12 == 0) {
diff --git a/lib/Target/AArch64/AArch64LegalizerInfo.cpp b/lib/Target/AArch64/AArch64LegalizerInfo.cpp
index 6f7fb7a8bc21..a985b330eafa 100644
--- a/lib/Target/AArch64/AArch64LegalizerInfo.cpp
+++ b/lib/Target/AArch64/AArch64LegalizerInfo.cpp
@@ -1,9 +1,8 @@
//===- AArch64LegalizerInfo.cpp ----------------------------------*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -22,8 +21,11 @@
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Type.h"
+#define DEBUG_TYPE "aarch64-legalinfo"
+
using namespace llvm;
using namespace LegalizeActions;
+using namespace LegalizeMutations;
using namespace LegalityPredicates;
AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
@@ -46,9 +48,10 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
const LLT v2s32 = LLT::vector(2, 32);
const LLT v4s32 = LLT::vector(4, 32);
const LLT v2s64 = LLT::vector(2, 64);
+ const LLT v2p0 = LLT::vector(2, p0);
getActionDefinitionsBuilder(G_IMPLICIT_DEF)
- .legalFor({p0, s1, s8, s16, s32, s64, v2s64})
+ .legalFor({p0, s1, s8, s16, s32, s64, v4s32, v2s64})
.clampScalar(0, s1, s64)
.widenScalarToNextPow2(0, 8)
.fewerElementsIf(
@@ -65,33 +68,58 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
});
getActionDefinitionsBuilder(G_PHI)
- .legalFor({p0, s16, s32, s64})
+ .legalFor({p0, s16, s32, s64, v2s32, v4s32, v2s64})
.clampScalar(0, s16, s64)
.widenScalarToNextPow2(0);
getActionDefinitionsBuilder(G_BSWAP)
- .legalFor({s32, s64})
+ .legalFor({s32, s64, v4s32, v2s32, v2s64})
.clampScalar(0, s16, s64)
.widenScalarToNextPow2(0);
- getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR, G_SHL})
- .legalFor({s32, s64, v2s32, v4s32, v2s64})
+ getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR})
+ .legalFor({s32, s64, v2s32, v4s32, v2s64, v8s16, v16s8})
.clampScalar(0, s32, s64)
.widenScalarToNextPow2(0)
.clampNumElements(0, v2s32, v4s32)
.clampNumElements(0, v2s64, v2s64)
.moreElementsToNextPow2(0);
+ getActionDefinitionsBuilder(G_SHL)
+ .legalFor({{s32, s32}, {s64, s64},
+ {v2s32, v2s32}, {v4s32, v4s32}, {v2s64, v2s64}})
+ .clampScalar(1, s32, s64)
+ .clampScalar(0, s32, s64)
+ .widenScalarToNextPow2(0)
+ .clampNumElements(0, v2s32, v4s32)
+ .clampNumElements(0, v2s64, v2s64)
+ .moreElementsToNextPow2(0)
+ .minScalarSameAs(1, 0);
+
getActionDefinitionsBuilder(G_GEP)
.legalFor({{p0, s64}})
.clampScalar(1, s64, s64);
getActionDefinitionsBuilder(G_PTR_MASK).legalFor({p0});
- getActionDefinitionsBuilder({G_LSHR, G_ASHR, G_SDIV, G_UDIV})
+ getActionDefinitionsBuilder({G_SDIV, G_UDIV})
.legalFor({s32, s64})
.clampScalar(0, s32, s64)
- .widenScalarToNextPow2(0);
+ .widenScalarToNextPow2(0)
+ .scalarize(0);
+
+ getActionDefinitionsBuilder({G_LSHR, G_ASHR})
+ .customIf([=](const LegalityQuery &Query) {
+ const auto &SrcTy = Query.Types[0];
+ const auto &AmtTy = Query.Types[1];
+ return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
+ AmtTy.getSizeInBits() == 32;
+ })
+ .legalFor(
+ {{s32, s32}, {s32, s64}, {s64, s64}, {v2s32, v2s32}, {v4s32, v4s32}})
+ .clampScalar(1, s32, s64)
+ .clampScalar(0, s32, s64)
+ .minScalarSameAs(1, 0);
getActionDefinitionsBuilder({G_SREM, G_UREM})
.lowerFor({s1, s8, s16, s32, s64});
@@ -101,15 +129,26 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
getActionDefinitionsBuilder({G_SMULH, G_UMULH}).legalFor({s32, s64});
- getActionDefinitionsBuilder({G_UADDE, G_USUBE, G_SADDO, G_SSUBO})
+ getActionDefinitionsBuilder({G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO})
.legalFor({{s32, s1}, {s64, s1}});
- getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMA, G_FMUL, G_FDIV})
- .legalFor({s32, s64});
+ getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FNEG})
+ .legalFor({s32, s64, v2s64, v4s32, v2s32});
- getActionDefinitionsBuilder({G_FREM, G_FPOW}).libcallFor({s32, s64});
+ getActionDefinitionsBuilder(G_FREM).libcallFor({s32, s64});
- getActionDefinitionsBuilder(G_FCEIL)
+ getActionDefinitionsBuilder({G_FCEIL, G_FABS, G_FSQRT, G_FFLOOR, G_FRINT,
+ G_FMA, G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND,
+ G_FNEARBYINT})
+ // If we don't have full FP16 support, then scalarize the elements of
+ // vectors containing fp16 types.
+ .fewerElementsIf(
+ [=, &ST](const LegalityQuery &Query) {
+ const auto &Ty = Query.Types[0];
+ return Ty.isVector() && Ty.getElementType() == s16 &&
+ !ST.hasFullFP16();
+ },
+ [=](const LegalityQuery &Query) { return std::make_pair(0, s16); })
// If we don't have full FP16 support, then widen s16 to s32 if we
// encounter it.
.widenScalarIf(
@@ -117,7 +156,15 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
return Query.Types[0] == s16 && !ST.hasFullFP16();
},
[=](const LegalityQuery &Query) { return std::make_pair(0, s32); })
- .legalFor({s16, s32, s64, v2s32, v4s32, v2s64});
+ .legalFor({s16, s32, s64, v2s32, v4s32, v2s64, v2s16, v4s16, v8s16});
+
+ getActionDefinitionsBuilder(
+ {G_FCOS, G_FSIN, G_FLOG10, G_FLOG, G_FLOG2, G_FEXP, G_FEXP2, G_FPOW})
+ // We need a call for these, so we always need to scalarize.
+ .scalarize(0)
+ // Regardless of FP16 support, widen 16-bit elements to 32-bits.
+ .minScalar(0, s32)
+ .libcallFor({s32, s64, v2s32, v4s32, v2s64});
getActionDefinitionsBuilder(G_INSERT)
.unsupportedIf([=](const LegalityQuery &Query) {
@@ -158,12 +205,15 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
.widenScalarToNextPow2(0);
getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD})
- .legalForTypesWithMemSize({{s32, p0, 8},
- {s32, p0, 16},
- {s32, p0, 32},
- {s64, p0, 64},
- {p0, p0, 64},
- {v2s32, p0, 64}})
+ .legalForTypesWithMemDesc({{s32, p0, 8, 8},
+ {s32, p0, 16, 8},
+ {s32, p0, 32, 8},
+ {s64, p0, 8, 2},
+ {s64, p0, 16, 2},
+ {s64, p0, 32, 4},
+ {s64, p0, 64, 8},
+ {p0, p0, 64, 8},
+ {v2s32, p0, 64, 8}})
.clampScalar(0, s32, s64)
.widenScalarToNextPow2(0)
// TODO: We could support sum-of-pow2's but the lowering code doesn't know
@@ -172,16 +222,30 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
// Lower anything left over into G_*EXT and G_LOAD
.lower();
+ auto IsPtrVecPred = [=](const LegalityQuery &Query) {
+ const LLT &ValTy = Query.Types[0];
+ if (!ValTy.isVector())
+ return false;
+ const LLT EltTy = ValTy.getElementType();
+ return EltTy.isPointer() && EltTy.getAddressSpace() == 0;
+ };
+
getActionDefinitionsBuilder(G_LOAD)
- .legalForTypesWithMemSize({{s8, p0, 8},
- {s16, p0, 16},
- {s32, p0, 32},
- {s64, p0, 64},
- {p0, p0, 64},
- {v2s32, p0, 64}})
+ .legalForTypesWithMemDesc({{s8, p0, 8, 8},
+ {s16, p0, 16, 8},
+ {s32, p0, 32, 8},
+ {s64, p0, 64, 8},
+ {p0, p0, 64, 8},
+ {v8s8, p0, 64, 8},
+ {v16s8, p0, 128, 8},
+ {v4s16, p0, 64, 8},
+ {v8s16, p0, 128, 8},
+ {v2s32, p0, 64, 8},
+ {v4s32, p0, 128, 8},
+ {v2s64, p0, 128, 8}})
// These extends are also legal
- .legalForTypesWithMemSize({{s32, p0, 8},
- {s32, p0, 16}})
+ .legalForTypesWithMemDesc({{s32, p0, 8, 8},
+ {s32, p0, 16, 8}})
.clampScalar(0, s8, s64)
.widenScalarToNextPow2(0)
// TODO: We could support sum-of-pow2's but the lowering code doesn't know
@@ -191,16 +255,22 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
.lowerIf([=](const LegalityQuery &Query) {
return Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits;
})
- .clampNumElements(0, v2s32, v2s32)
- .clampMaxNumElements(0, s64, 1);
+ .clampMaxNumElements(0, s32, 2)
+ .clampMaxNumElements(0, s64, 1)
+ .customIf(IsPtrVecPred);
getActionDefinitionsBuilder(G_STORE)
- .legalForTypesWithMemSize({{s8, p0, 8},
- {s16, p0, 16},
- {s32, p0, 32},
- {s64, p0, 64},
- {p0, p0, 64},
- {v2s32, p0, 64}})
+ .legalForTypesWithMemDesc({{s8, p0, 8, 8},
+ {s16, p0, 16, 8},
+ {s32, p0, 32, 8},
+ {s64, p0, 64, 8},
+ {p0, p0, 64, 8},
+ {v16s8, p0, 128, 8},
+ {v4s16, p0, 64, 8},
+ {v8s16, p0, 128, 8},
+ {v2s32, p0, 64, 8},
+ {v4s32, p0, 128, 8},
+ {v2s64, p0, 128, 8}})
.clampScalar(0, s8, s64)
.widenScalarToNextPow2(0)
// TODO: We could support sum-of-pow2's but the lowering code doesn't know
@@ -210,23 +280,48 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
return Query.Types[0].isScalar() &&
Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits;
})
- .clampNumElements(0, v2s32, v2s32)
- .clampMaxNumElements(0, s64, 1);
+ .clampMaxNumElements(0, s32, 2)
+ .clampMaxNumElements(0, s64, 1)
+ .customIf(IsPtrVecPred);
// Constants
getActionDefinitionsBuilder(G_CONSTANT)
- .legalFor({p0, s32, s64})
- .clampScalar(0, s32, s64)
+ .legalFor({p0, s8, s16, s32, s64})
+ .clampScalar(0, s8, s64)
.widenScalarToNextPow2(0);
getActionDefinitionsBuilder(G_FCONSTANT)
.legalFor({s32, s64})
.clampScalar(0, s32, s64);
getActionDefinitionsBuilder(G_ICMP)
- .legalFor({{s32, s32}, {s32, s64}, {s32, p0}})
+ .legalFor({{s32, s32},
+ {s32, s64},
+ {s32, p0},
+ {v4s32, v4s32},
+ {v2s32, v2s32},
+ {v2s64, v2s64},
+ {v2s64, v2p0},
+ {v4s16, v4s16},
+ {v8s16, v8s16},
+ {v8s8, v8s8},
+ {v16s8, v16s8}})
.clampScalar(0, s32, s32)
.clampScalar(1, s32, s64)
- .widenScalarToNextPow2(1);
+ .minScalarEltSameAsIf(
+ [=](const LegalityQuery &Query) {
+ const LLT &Ty = Query.Types[0];
+ const LLT &SrcTy = Query.Types[1];
+ return Ty.isVector() && !SrcTy.getElementType().isPointer() &&
+ Ty.getElementType() != SrcTy.getElementType();
+ },
+ 0, 1)
+ .minScalarOrEltIf(
+ [=](const LegalityQuery &Query) { return Query.Types[1] == v2s16; },
+ 1, s32)
+ .minScalarOrEltIf(
+ [=](const LegalityQuery &Query) { return Query.Types[1] == v2p0; }, 0,
+ s64)
+ .widenScalarOrEltToNextPow2(1);
getActionDefinitionsBuilder(G_FCMP)
.legalFor({{s32, s32}, {s32, s64}})
@@ -236,24 +331,48 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
// Extensions
getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT})
- .legalForCartesianProduct({s8, s16, s32, s64}, {s1, s8, s16, s32});
+ .legalIf([=](const LegalityQuery &Query) {
+ unsigned DstSize = Query.Types[0].getSizeInBits();
+
+ // Make sure that we have something that will fit in a register, and
+ // make sure it's a power of 2.
+ if (DstSize < 8 || DstSize > 128 || !isPowerOf2_32(DstSize))
+ return false;
+
+ const LLT &SrcTy = Query.Types[1];
+
+ // Special case for s1.
+ if (SrcTy == s1)
+ return true;
+
+ // Make sure we fit in a register otherwise. Don't bother checking that
+ // the source type is below 128 bits. We shouldn't be allowing anything
+ // through which is wider than the destination in the first place.
+ unsigned SrcSize = SrcTy.getSizeInBits();
+ if (SrcSize < 8 || !isPowerOf2_32(SrcSize))
+ return false;
+
+ return true;
+ });
+
+ getActionDefinitionsBuilder(G_TRUNC).alwaysLegal();
// FP conversions
getActionDefinitionsBuilder(G_FPTRUNC).legalFor(
- {{s16, s32}, {s16, s64}, {s32, s64}});
+ {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}});
getActionDefinitionsBuilder(G_FPEXT).legalFor(
- {{s32, s16}, {s64, s16}, {s64, s32}});
+ {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}});
// Conversions
getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
- .legalForCartesianProduct({s32, s64})
+ .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32})
.clampScalar(0, s32, s64)
.widenScalarToNextPow2(0)
.clampScalar(1, s32, s64)
.widenScalarToNextPow2(1);
getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
- .legalForCartesianProduct({s32, s64})
+ .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32})
.clampScalar(1, s32, s64)
.widenScalarToNextPow2(1)
.clampScalar(0, s32, s64)
@@ -264,10 +383,13 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});
// Select
+ // FIXME: We can probably do a bit better than just scalarizing vector
+ // selects.
getActionDefinitionsBuilder(G_SELECT)
.legalFor({{s32, s1}, {s64, s1}, {p0, s1}})
.clampScalar(0, s32, s64)
- .widenScalarToNextPow2(0);
+ .widenScalarToNextPow2(0)
+ .scalarize(0);
// Pointer-handling
getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
@@ -291,7 +413,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
// number of bits but it's what the previous code described and fixing
// it breaks tests.
.legalForCartesianProduct({s1, s8, s16, s32, s64, s128, v16s8, v8s8, v4s8,
- v8s16, v4s16, v2s16, v4s32, v2s32, v2s64});
+ v8s16, v4s16, v2s16, v4s32, v2s32, v2s64,
+ v2p0});
getActionDefinitionsBuilder(G_VASTART).legalFor({p0});
@@ -335,11 +458,6 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
}
return false;
};
- auto scalarize =
- [](const LegalityQuery &Query, unsigned TypeIdx) {
- const LLT &Ty = Query.Types[TypeIdx];
- return std::make_pair(TypeIdx, Ty.getElementType());
- };
// FIXME: This rule is horrible, but specifies the same as what we had
// before with the particularly strange definitions removed (e.g.
@@ -353,10 +471,10 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
// Break up vectors with weird elements into scalars
.fewerElementsIf(
[=](const LegalityQuery &Query) { return notValidElt(Query, 0); },
- [=](const LegalityQuery &Query) { return scalarize(Query, 0); })
+ scalarize(0))
.fewerElementsIf(
[=](const LegalityQuery &Query) { return notValidElt(Query, 1); },
- [=](const LegalityQuery &Query) { return scalarize(Query, 1); })
+ scalarize(1))
// Clamp the big scalar to s8-s512 and make it either a power of 2, 192,
// or 384.
.clampScalar(BigTyIdx, s8, s512)
@@ -397,16 +515,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
return BigTy.getSizeInBits() % LitTy.getSizeInBits() == 0;
})
// Any vectors left are the wrong size. Scalarize them.
- .fewerElementsIf([](const LegalityQuery &Query) { return true; },
- [](const LegalityQuery &Query) {
- return std::make_pair(
- 0, Query.Types[0].getElementType());
- })
- .fewerElementsIf([](const LegalityQuery &Query) { return true; },
- [](const LegalityQuery &Query) {
- return std::make_pair(
- 1, Query.Types[1].getElementType());
- });
+ .scalarize(0)
+ .scalarize(1);
}
getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
@@ -417,11 +527,24 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
.minScalar(2, s64)
.legalIf([=](const LegalityQuery &Query) {
const LLT &VecTy = Query.Types[1];
- return VecTy == v4s32 || VecTy == v2s64;
+ return VecTy == v2s16 || VecTy == v4s16 || VecTy == v8s16 ||
+ VecTy == v4s32 || VecTy == v2s64 || VecTy == v2s32;
+ });
+
+ getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
+ .legalIf([=](const LegalityQuery &Query) {
+ const LLT &VecTy = Query.Types[0];
+ // TODO: Support s8 and s16
+ return VecTy == v2s32 || VecTy == v4s32 || VecTy == v2s64;
});
getActionDefinitionsBuilder(G_BUILD_VECTOR)
- .legalFor({{v4s32, s32}, {v2s64, s64}})
+ .legalFor({{v4s16, s16},
+ {v8s16, s16},
+ {v2s32, s32},
+ {v4s32, s32},
+ {v2p0, p0},
+ {v2s64, s64}})
.clampNumElements(0, v4s32, v4s32)
.clampNumElements(0, v2s64, v2s64)
@@ -432,6 +555,42 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
})
.minScalarSameAs(1, 0);
+ getActionDefinitionsBuilder(G_CTLZ).legalForCartesianProduct(
+ {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
+ .scalarize(1);
+
+ getActionDefinitionsBuilder(G_SHUFFLE_VECTOR)
+ .legalIf([=](const LegalityQuery &Query) {
+ const LLT &DstTy = Query.Types[0];
+ const LLT &SrcTy = Query.Types[1];
+ // For now just support the TBL2 variant which needs the source vectors
+ // to be the same size as the dest.
+ if (DstTy != SrcTy)
+ return false;
+ for (auto &Ty : {v2s32, v4s32, v2s64}) {
+ if (DstTy == Ty)
+ return true;
+ }
+ return false;
+ })
+ // G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors), we
+ // just want those lowered into G_BUILD_VECTOR
+ .lowerIf([=](const LegalityQuery &Query) {
+ return !Query.Types[1].isVector();
+ })
+ .clampNumElements(0, v4s32, v4s32)
+ .clampNumElements(0, v2s64, v2s64);
+
+ getActionDefinitionsBuilder(G_CONCAT_VECTORS)
+ .legalFor({{v4s32, v2s32}, {v8s16, v4s16}});
+
+ getActionDefinitionsBuilder(G_JUMP_TABLE)
+ .legalFor({{p0}, {s64}});
+
+ getActionDefinitionsBuilder(G_BRJT).legalIf([=](const LegalityQuery &Query) {
+ return Query.Types[0] == p0 && Query.Types[1] == s64;
+ });
+
computeTables();
verify(*ST.getInstrInfo());
}
@@ -446,37 +605,106 @@ bool AArch64LegalizerInfo::legalizeCustom(MachineInstr &MI,
return false;
case TargetOpcode::G_VAARG:
return legalizeVaArg(MI, MRI, MIRBuilder);
+ case TargetOpcode::G_LOAD:
+ case TargetOpcode::G_STORE:
+ return legalizeLoadStore(MI, MRI, MIRBuilder, Observer);
+ case TargetOpcode::G_SHL:
+ case TargetOpcode::G_ASHR:
+ case TargetOpcode::G_LSHR:
+ return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer);
}
llvm_unreachable("expected switch to return");
}
+bool AArch64LegalizerInfo::legalizeShlAshrLshr(
+ MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder,
+ GISelChangeObserver &Observer) const {
+ assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
+ MI.getOpcode() == TargetOpcode::G_LSHR ||
+ MI.getOpcode() == TargetOpcode::G_SHL);
+ // If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the
+ // imported patterns can select it later. Either way, it will be legal.
+ Register AmtReg = MI.getOperand(2).getReg();
+ auto *CstMI = MRI.getVRegDef(AmtReg);
+ assert(CstMI && "expected to find a vreg def");
+ if (CstMI->getOpcode() != TargetOpcode::G_CONSTANT)
+ return true;
+ // Check the shift amount is in range for an immediate form.
+ unsigned Amount = CstMI->getOperand(1).getCImm()->getZExtValue();
+ if (Amount > 31)
+ return true; // This will have to remain a register variant.
+ assert(MRI.getType(AmtReg).getSizeInBits() == 32);
+ MIRBuilder.setInstr(MI);
+ auto ExtCst = MIRBuilder.buildZExt(LLT::scalar(64), AmtReg);
+ MI.getOperand(2).setReg(ExtCst.getReg(0));
+ return true;
+}
+
+bool AArch64LegalizerInfo::legalizeLoadStore(
+ MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder,
+ GISelChangeObserver &Observer) const {
+ assert(MI.getOpcode() == TargetOpcode::G_STORE ||
+ MI.getOpcode() == TargetOpcode::G_LOAD);
+ // Here we just try to handle vector loads/stores where our value type might
+ // have pointer elements, which the SelectionDAG importer can't handle. To
+ // allow the existing patterns for s64 to fire for p0, we just try to bitcast
+ // the value to use s64 types.
+
+ // Custom legalization requires the instruction, if not deleted, must be fully
+ // legalized. In order to allow further legalization of the inst, we create
+ // a new instruction and erase the existing one.
+
+ unsigned ValReg = MI.getOperand(0).getReg();
+ const LLT ValTy = MRI.getType(ValReg);
+
+ if (!ValTy.isVector() || !ValTy.getElementType().isPointer() ||
+ ValTy.getElementType().getAddressSpace() != 0) {
+ LLVM_DEBUG(dbgs() << "Tried to do custom legalization on wrong load/store");
+ return false;
+ }
+
+ MIRBuilder.setInstr(MI);
+ unsigned PtrSize = ValTy.getElementType().getSizeInBits();
+ const LLT NewTy = LLT::vector(ValTy.getNumElements(), PtrSize);
+ auto &MMO = **MI.memoperands_begin();
+ if (MI.getOpcode() == TargetOpcode::G_STORE) {
+ auto Bitcast = MIRBuilder.buildBitcast({NewTy}, {ValReg});
+ MIRBuilder.buildStore(Bitcast.getReg(0), MI.getOperand(1).getReg(), MMO);
+ } else {
+ unsigned NewReg = MRI.createGenericVirtualRegister(NewTy);
+ auto NewLoad = MIRBuilder.buildLoad(NewReg, MI.getOperand(1).getReg(), MMO);
+ MIRBuilder.buildBitcast({ValReg}, {NewLoad});
+ }
+ MI.eraseFromParent();
+ return true;
+}
+
bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
MachineRegisterInfo &MRI,
MachineIRBuilder &MIRBuilder) const {
MIRBuilder.setInstr(MI);
MachineFunction &MF = MIRBuilder.getMF();
unsigned Align = MI.getOperand(2).getImm();
- unsigned Dst = MI.getOperand(0).getReg();
- unsigned ListPtr = MI.getOperand(1).getReg();
+ Register Dst = MI.getOperand(0).getReg();
+ Register ListPtr = MI.getOperand(1).getReg();
LLT PtrTy = MRI.getType(ListPtr);
LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
const unsigned PtrSize = PtrTy.getSizeInBits() / 8;
- unsigned List = MRI.createGenericVirtualRegister(PtrTy);
+ Register List = MRI.createGenericVirtualRegister(PtrTy);
MIRBuilder.buildLoad(
List, ListPtr,
*MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad,
PtrSize, /* Align = */ PtrSize));
- unsigned DstPtr;
+ Register DstPtr;
if (Align > PtrSize) {
// Realign the list to the actual required alignment.
auto AlignMinus1 = MIRBuilder.buildConstant(IntPtrTy, Align - 1);
- unsigned ListTmp = MRI.createGenericVirtualRegister(PtrTy);
- MIRBuilder.buildGEP(ListTmp, List, AlignMinus1->getOperand(0).getReg());
+ auto ListTmp = MIRBuilder.buildGEP(PtrTy, List, AlignMinus1.getReg(0));
DstPtr = MRI.createGenericVirtualRegister(PtrTy);
MIRBuilder.buildPtrMask(DstPtr, ListTmp, Log2_64(Align));
@@ -489,11 +717,9 @@ bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
*MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad,
ValSize, std::max(Align, PtrSize)));
- unsigned SizeReg = MRI.createGenericVirtualRegister(IntPtrTy);
- MIRBuilder.buildConstant(SizeReg, alignTo(ValSize, PtrSize));
+ auto Size = MIRBuilder.buildConstant(IntPtrTy, alignTo(ValSize, PtrSize));
- unsigned NewList = MRI.createGenericVirtualRegister(PtrTy);
- MIRBuilder.buildGEP(NewList, DstPtr, SizeReg);
+ auto NewList = MIRBuilder.buildGEP(PtrTy, DstPtr, Size.getReg(0));
MIRBuilder.buildStore(
NewList, ListPtr,
diff --git a/lib/Target/AArch64/AArch64LegalizerInfo.h b/lib/Target/AArch64/AArch64LegalizerInfo.h
index 77e8bdc7623c..f3362a18620f 100644
--- a/lib/Target/AArch64/AArch64LegalizerInfo.h
+++ b/lib/Target/AArch64/AArch64LegalizerInfo.h
@@ -1,9 +1,8 @@
//===- AArch64LegalizerInfo --------------------------------------*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -35,6 +34,12 @@ public:
private:
bool legalizeVaArg(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &MIRBuilder) const;
+ bool legalizeLoadStore(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIRBuilder,
+ GISelChangeObserver &Observer) const;
+ bool legalizeShlAshrLshr(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIRBuilder,
+ GISelChangeObserver &Observer) const;
};
} // End llvm namespace.
#endif
diff --git a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index aa732a99469c..65b5f906e3f6 100644
--- a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -1,9 +1,8 @@
//===- AArch64LoadStoreOptimizer.cpp - AArch64 load/store opt. pass -------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -934,8 +933,6 @@ AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
? getLdStOffsetOp(*StoreI).getImm()
: getLdStOffsetOp(*StoreI).getImm() * StoreSize;
int Width = LoadSize * 8;
- int Immr = 8 * (UnscaledLdOffset - UnscaledStOffset);
- int Imms = Immr + Width - 1;
unsigned DestReg = IsStoreXReg
? TRI->getMatchingSuperReg(LdRt, AArch64::sub_32,
&AArch64::GPR64RegClass)
@@ -945,8 +942,8 @@ AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
(UnscaledLdOffset + LoadSize) <= UnscaledStOffset + StoreSize) &&
"Invalid offset");
- Immr = 8 * (UnscaledLdOffset - UnscaledStOffset);
- Imms = Immr + Width - 1;
+ int Immr = 8 * (UnscaledLdOffset - UnscaledStOffset);
+ int Imms = Immr + Width - 1;
if (UnscaledLdOffset == UnscaledStOffset) {
uint32_t AndMaskEncoded = ((IsStoreXReg ? 1 : 0) << 12) // N
| ((Immr) << 6) // immr
diff --git a/lib/Target/AArch64/AArch64MCInstLower.cpp b/lib/Target/AArch64/AArch64MCInstLower.cpp
index d71359223b1b..e7d4a2789a28 100644
--- a/lib/Target/AArch64/AArch64MCInstLower.cpp
+++ b/lib/Target/AArch64/AArch64MCInstLower.cpp
@@ -1,9 +1,8 @@
//==-- AArch64MCInstLower.cpp - Convert AArch64 MachineInstr to an MCInst --==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AArch64/AArch64MCInstLower.h b/lib/Target/AArch64/AArch64MCInstLower.h
index aa30fe1fa707..8f3148a98410 100644
--- a/lib/Target/AArch64/AArch64MCInstLower.h
+++ b/lib/Target/AArch64/AArch64MCInstLower.h
@@ -1,9 +1,8 @@
//===-- AArch64MCInstLower.h - Lower MachineInstr to MCInst ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/lib/Target/AArch64/AArch64MachineFunctionInfo.h
index 5183e7d3c0d0..0efeeb272ec1 100644
--- a/lib/Target/AArch64/AArch64MachineFunctionInfo.h
+++ b/lib/Target/AArch64/AArch64MachineFunctionInfo.h
@@ -1,9 +1,8 @@
//=- AArch64MachineFunctionInfo.h - AArch64 machine function info -*- C++ -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -92,6 +91,11 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
/// other stack allocations.
bool CalleeSaveStackHasFreeSpace = false;
+ /// SRetReturnReg - sret lowering includes returning the value of the
+ /// returned struct in a register. This field holds the virtual register into
+ /// which the sret argument is passed.
+ unsigned SRetReturnReg = 0;
+
/// Has a value when it is known whether or not the function uses a
/// redzone, and no value otherwise.
/// Initialized during frame lowering, unless the function has the noredzone
@@ -101,6 +105,12 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
/// ForwardedMustTailRegParms - A list of virtual and physical registers
/// that must be forwarded to every musttail call.
SmallVector<ForwardedRegister, 1> ForwardedMustTailRegParms;
+
+ // Offset from SP-at-entry to the tagged base pointer.
+ // Tagged base pointer is set up to point to the first (lowest address) tagged
+ // stack slot.
+ unsigned TaggedBasePointerOffset;
+
public:
AArch64FunctionInfo() = default;
@@ -166,6 +176,9 @@ public:
unsigned getVarArgsFPRSize() const { return VarArgsFPRSize; }
void setVarArgsFPRSize(unsigned Size) { VarArgsFPRSize = Size; }
+ unsigned getSRetReturnReg() const { return SRetReturnReg; }
+ void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
+
unsigned getJumpTableEntrySize(int Idx) const {
auto It = JumpTableEntryInfo.find(Idx);
if (It != JumpTableEntryInfo.end())
@@ -217,6 +230,13 @@ public:
return ForwardedMustTailRegParms;
}
+ unsigned getTaggedBasePointerOffset() const {
+ return TaggedBasePointerOffset;
+ }
+ void setTaggedBasePointerOffset(unsigned Offset) {
+ TaggedBasePointerOffset = Offset;
+ }
+
private:
// Hold the lists of LOHs.
MILOHContainer LOHContainerSet;
diff --git a/lib/Target/AArch64/AArch64MacroFusion.cpp b/lib/Target/AArch64/AArch64MacroFusion.cpp
index bc596dd38b6e..9a2103579a6a 100644
--- a/lib/Target/AArch64/AArch64MacroFusion.cpp
+++ b/lib/Target/AArch64/AArch64MacroFusion.cpp
@@ -1,9 +1,8 @@
//===- AArch64MacroFusion.cpp - AArch64 Macro Fusion ----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AArch64/AArch64MacroFusion.h b/lib/Target/AArch64/AArch64MacroFusion.h
index 32d90d4c40d6..4e7ccbe4baab 100644
--- a/lib/Target/AArch64/AArch64MacroFusion.h
+++ b/lib/Target/AArch64/AArch64MacroFusion.h
@@ -1,9 +1,8 @@
//===- AArch64MacroFusion.h - AArch64 Macro Fusion ------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp b/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp
index ccf646575296..aff861aae6be 100644
--- a/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp
+++ b/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp
@@ -1,9 +1,8 @@
//===-- AArch64PBQPRegAlloc.cpp - AArch64 specific PBQP constraints -------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// This file contains the AArch64 / Cortex-A57 specific register allocation
diff --git a/lib/Target/AArch64/AArch64PBQPRegAlloc.h b/lib/Target/AArch64/AArch64PBQPRegAlloc.h
index b99c1d1d6b3e..5ea91b4a1967 100644
--- a/lib/Target/AArch64/AArch64PBQPRegAlloc.h
+++ b/lib/Target/AArch64/AArch64PBQPRegAlloc.h
@@ -1,9 +1,8 @@
//==- AArch64PBQPRegAlloc.h - AArch64 specific PBQP constraints --*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AArch64/AArch64PerfectShuffle.h b/lib/Target/AArch64/AArch64PerfectShuffle.h
index 9e9eec48c555..f443cd03935c 100644
--- a/lib/Target/AArch64/AArch64PerfectShuffle.h
+++ b/lib/Target/AArch64/AArch64PerfectShuffle.h
@@ -1,9 +1,8 @@
//===-- AArch64PerfectShuffle.h - AdvSIMD Perfect Shuffle Table -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AArch64/AArch64PfmCounters.td b/lib/Target/AArch64/AArch64PfmCounters.td
index 16ba3e4282a0..b1d1664e3f1b 100644
--- a/lib/Target/AArch64/AArch64PfmCounters.td
+++ b/lib/Target/AArch64/AArch64PfmCounters.td
@@ -1,9 +1,8 @@
//===-- AArch64PfmCounters.td - AArch64 Hardware Counters --*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp b/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp
index 3da9306e6460..5f7245bfbd74 100644
--- a/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp
+++ b/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp
@@ -1,9 +1,8 @@
//=== lib/CodeGen/GlobalISel/AArch64PreLegalizerCombiner.cpp --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -44,6 +43,10 @@ bool AArch64PreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
switch (MI.getOpcode()) {
default:
return false;
+ case TargetOpcode::COPY:
+ return Helper.tryCombineCopy(MI);
+ case TargetOpcode::G_BR:
+ return Helper.tryCombineBr(MI);
case TargetOpcode::G_LOAD:
case TargetOpcode::G_SEXTLOAD:
case TargetOpcode::G_ZEXTLOAD:
diff --git a/lib/Target/AArch64/AArch64PromoteConstant.cpp b/lib/Target/AArch64/AArch64PromoteConstant.cpp
index 01d8a35bbc23..a594ecb71fc9 100644
--- a/lib/Target/AArch64/AArch64PromoteConstant.cpp
+++ b/lib/Target/AArch64/AArch64PromoteConstant.cpp
@@ -1,9 +1,8 @@
//==- AArch64PromoteConstant.cpp - Promote constant to global for AArch64 --==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -494,7 +493,8 @@ void AArch64PromoteConstant::insertDefinitions(Function &F,
for (const auto &IPI : InsertPts) {
// Create the load of the global variable.
IRBuilder<> Builder(IPI.first);
- LoadInst *LoadedCst = Builder.CreateLoad(&PromotedGV);
+ LoadInst *LoadedCst =
+ Builder.CreateLoad(PromotedGV.getValueType(), &PromotedGV);
LLVM_DEBUG(dbgs() << "**********\n");
LLVM_DEBUG(dbgs() << "New def: ");
LLVM_DEBUG(LoadedCst->print(dbgs()));
diff --git a/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp b/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp
index fcb0b36a9f6d..0d75ab7ac8a9 100644
--- a/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp
+++ b/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp
@@ -1,9 +1,8 @@
//=- AArch64RedundantCopyElimination.cpp - Remove useless copy for AArch64 -=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
// This pass removes unnecessary copies/moves in BBs based on a dominating
// condition.
@@ -380,8 +379,8 @@ bool AArch64RedundantCopyElimination::optimizeBlock(MachineBasicBlock *MBB) {
bool IsCopy = MI->isCopy();
bool IsMoveImm = MI->isMoveImmediate();
if (IsCopy || IsMoveImm) {
- MCPhysReg DefReg = MI->getOperand(0).getReg();
- MCPhysReg SrcReg = IsCopy ? MI->getOperand(1).getReg() : 0;
+ Register DefReg = MI->getOperand(0).getReg();
+ Register SrcReg = IsCopy ? MI->getOperand(1).getReg() : Register();
int64_t SrcImm = IsMoveImm ? MI->getOperand(1).getImm() : 0;
if (!MRI->isReserved(DefReg) &&
((IsCopy && (SrcReg == AArch64::XZR || SrcReg == AArch64::WZR)) ||
diff --git a/lib/Target/AArch64/AArch64RegisterBankInfo.cpp b/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
index 68c48a5ec216..b52259cc9acd 100644
--- a/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
+++ b/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
@@ -1,9 +1,8 @@
//===- AArch64RegisterBankInfo.cpp ----------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -243,12 +242,17 @@ const RegisterBank &AArch64RegisterBankInfo::getRegBankFromRegClass(
case AArch64::GPR32RegClassID:
case AArch64::GPR32spRegClassID:
case AArch64::GPR32sponlyRegClassID:
+ case AArch64::GPR32argRegClassID:
case AArch64::GPR32allRegClassID:
case AArch64::GPR64commonRegClassID:
case AArch64::GPR64RegClassID:
case AArch64::GPR64spRegClassID:
case AArch64::GPR64sponlyRegClassID:
+ case AArch64::GPR64argRegClassID:
case AArch64::GPR64allRegClassID:
+ case AArch64::GPR64noipRegClassID:
+ case AArch64::GPR64common_and_GPR64noipRegClassID:
+ case AArch64::GPR64noip_and_tcGPR64RegClassID:
case AArch64::tcGPR64RegClassID:
case AArch64::WSeqPairsClassRegClassID:
case AArch64::XSeqPairsClassRegClassID:
@@ -385,11 +389,26 @@ static bool isPreISelGenericFloatingPointOpcode(unsigned Opc) {
case TargetOpcode::G_FADD:
case TargetOpcode::G_FSUB:
case TargetOpcode::G_FMUL:
+ case TargetOpcode::G_FMA:
case TargetOpcode::G_FDIV:
case TargetOpcode::G_FCONSTANT:
case TargetOpcode::G_FPEXT:
case TargetOpcode::G_FPTRUNC:
case TargetOpcode::G_FCEIL:
+ case TargetOpcode::G_FFLOOR:
+ case TargetOpcode::G_FNEARBYINT:
+ case TargetOpcode::G_FNEG:
+ case TargetOpcode::G_FCOS:
+ case TargetOpcode::G_FSIN:
+ case TargetOpcode::G_FLOG10:
+ case TargetOpcode::G_FLOG:
+ case TargetOpcode::G_FLOG2:
+ case TargetOpcode::G_FSQRT:
+ case TargetOpcode::G_FABS:
+ case TargetOpcode::G_FEXP:
+ case TargetOpcode::G_FRINT:
+ case TargetOpcode::G_INTRINSIC_TRUNC:
+ case TargetOpcode::G_INTRINSIC_ROUND:
return true;
}
return false;
@@ -438,6 +457,54 @@ AArch64RegisterBankInfo::getSameKindOfOperandsMapping(
getValueMapping(RBIdx, Size), NumOperands);
}
+bool AArch64RegisterBankInfo::hasFPConstraints(
+ const MachineInstr &MI, const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI) const {
+ unsigned Op = MI.getOpcode();
+
+ // Do we have an explicit floating point instruction?
+ if (isPreISelGenericFloatingPointOpcode(Op))
+ return true;
+
+ // No. Check if we have a copy-like instruction. If we do, then we could
+ // still be fed by floating point instructions.
+ if (Op != TargetOpcode::COPY && !MI.isPHI())
+ return false;
+
+ // MI is copy-like. Return true if it outputs an FPR.
+ return getRegBank(MI.getOperand(0).getReg(), MRI, TRI) ==
+ &AArch64::FPRRegBank;
+}
+
+bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI) const {
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_FPTOSI:
+ case TargetOpcode::G_FPTOUI:
+ case TargetOpcode::G_FCMP:
+ return true;
+ default:
+ break;
+ }
+ return hasFPConstraints(MI, MRI, TRI);
+}
+
+bool AArch64RegisterBankInfo::onlyDefinesFP(
+ const MachineInstr &MI, const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI) const {
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_SITOFP:
+ case TargetOpcode::G_UITOFP:
+ case TargetOpcode::G_EXTRACT_VECTOR_ELT:
+ case TargetOpcode::G_INSERT_VECTOR_ELT:
+ return true;
+ default:
+ break;
+ }
+ return hasFPConstraints(MI, MRI, TRI);
+}
+
const RegisterBankInfo::InstructionMapping &
AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
const unsigned Opc = MI.getOpcode();
@@ -470,10 +537,6 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case TargetOpcode::G_AND:
case TargetOpcode::G_OR:
case TargetOpcode::G_XOR:
- // Shifts.
- case TargetOpcode::G_SHL:
- case TargetOpcode::G_LSHR:
- case TargetOpcode::G_ASHR:
// Floating point ops.
case TargetOpcode::G_FADD:
case TargetOpcode::G_FSUB:
@@ -488,6 +551,17 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
getFPExtMapping(DstTy.getSizeInBits(), SrcTy.getSizeInBits()),
/*NumOperands*/ 2);
}
+ // Shifts.
+ case TargetOpcode::G_SHL:
+ case TargetOpcode::G_LSHR:
+ case TargetOpcode::G_ASHR: {
+ LLT ShiftAmtTy = MRI.getType(MI.getOperand(2).getReg());
+ LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
+ if (ShiftAmtTy.getSizeInBits() == 64 && SrcTy.getSizeInBits() == 32)
+ return getInstructionMapping(DefaultMappingID, 1,
+ &ValMappings[Shift64Imm], 3);
+ return getSameKindOfOperandsMapping(MI);
+ }
case TargetOpcode::COPY: {
unsigned DstReg = MI.getOperand(0).getReg();
unsigned SrcReg = MI.getOperand(1).getReg();
@@ -563,10 +637,14 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
switch (Opc) {
case TargetOpcode::G_SITOFP:
case TargetOpcode::G_UITOFP:
+ if (MRI.getType(MI.getOperand(0).getReg()).isVector())
+ break;
OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR};
break;
case TargetOpcode::G_FPTOSI:
case TargetOpcode::G_FPTOUI:
+ if (MRI.getType(MI.getOperand(0).getReg()).isVector())
+ break;
OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR};
break;
case TargetOpcode::G_FCMP:
@@ -600,15 +678,7 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
// assume this was a floating point load in the IR.
// If it was not, we would have had a bitcast before
// reaching that instruction.
- unsigned UseOpc = UseMI.getOpcode();
- if (isPreISelGenericFloatingPointOpcode(UseOpc) ||
- // Check if we feed a copy-like instruction with
- // floating point constraints. In that case, we are still
- // feeding fp instructions, but indirectly
- // (e.g., through ABI copies).
- ((UseOpc == TargetOpcode::COPY || UseMI.isPHI()) &&
- getRegBank(UseMI.getOperand(0).getReg(), MRI, TRI) ==
- &AArch64::FPRRegBank)) {
+ if (onlyUsesFP(UseMI, MRI, TRI)) {
OpRegBankIdx[0] = PMI_FirstFPR;
break;
}
@@ -621,18 +691,134 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
if (!VReg)
break;
MachineInstr *DefMI = MRI.getVRegDef(VReg);
- unsigned DefOpc = DefMI->getOpcode();
- if (isPreISelGenericFloatingPointOpcode(DefOpc) ||
- // Check if we come from a copy-like instruction with
- // floating point constraints. In that case, we are still
- // fed by fp instructions, but indirectly
- // (e.g., through ABI copies).
- ((DefOpc == TargetOpcode::COPY || DefMI->isPHI()) &&
- getRegBank(DefMI->getOperand(0).getReg(), MRI, TRI) ==
- &AArch64::FPRRegBank))
+ if (onlyDefinesFP(*DefMI, MRI, TRI))
OpRegBankIdx[0] = PMI_FirstFPR;
break;
}
+ break;
+ case TargetOpcode::G_SELECT: {
+ // If the destination is FPR, preserve that.
+ if (OpRegBankIdx[0] != PMI_FirstGPR)
+ break;
+
+ // If we're taking in vectors, we have no choice but to put everything on
+ // FPRs.
+ LLT SrcTy = MRI.getType(MI.getOperand(2).getReg());
+ if (SrcTy.isVector()) {
+ for (unsigned Idx = 0; Idx < 4; ++Idx)
+ OpRegBankIdx[Idx] = PMI_FirstFPR;
+ break;
+ }
+
+ // Try to minimize the number of copies. If we have more floating point
+ // constrained values than not, then we'll put everything on FPR. Otherwise,
+ // everything has to be on GPR.
+ unsigned NumFP = 0;
+
+ // Check if the uses of the result always produce floating point values.
+ //
+ // For example:
+ //
+ // %z = G_SELECT %cond %x %y
+ // fpr = G_FOO %z ...
+ if (any_of(
+ MRI.use_instructions(MI.getOperand(0).getReg()),
+ [&](MachineInstr &MI) { return onlyUsesFP(MI, MRI, TRI); }))
+ ++NumFP;
+
+ // Check if the defs of the source values always produce floating point
+ // values.
+ //
+ // For example:
+ //
+ // %x = G_SOMETHING_ALWAYS_FLOAT %a ...
+ // %z = G_SELECT %cond %x %y
+ //
+ // Also check whether or not the sources have already been decided to be
+ // FPR. Keep track of this.
+ //
+ // This doesn't check the condition, since it's just whatever is in NZCV.
+ // This isn't passed explicitly in a register to fcsel/csel.
+ for (unsigned Idx = 2; Idx < 4; ++Idx) {
+ unsigned VReg = MI.getOperand(Idx).getReg();
+ MachineInstr *DefMI = MRI.getVRegDef(VReg);
+ if (getRegBank(VReg, MRI, TRI) == &AArch64::FPRRegBank ||
+ onlyDefinesFP(*DefMI, MRI, TRI))
+ ++NumFP;
+ }
+
+ // If we have more FP constraints than not, then move everything over to
+ // FPR.
+ if (NumFP >= 2)
+ for (unsigned Idx = 0; Idx < 4; ++Idx)
+ OpRegBankIdx[Idx] = PMI_FirstFPR;
+
+ break;
+ }
+ case TargetOpcode::G_UNMERGE_VALUES: {
+ // If the first operand belongs to a FPR register bank, then make sure that
+ // we preserve that.
+ if (OpRegBankIdx[0] != PMI_FirstGPR)
+ break;
+
+ LLT SrcTy = MRI.getType(MI.getOperand(MI.getNumOperands()-1).getReg());
+ // UNMERGE into scalars from a vector should always use FPR.
+ // Likewise if any of the uses are FP instructions.
+ if (SrcTy.isVector() ||
+ any_of(MRI.use_instructions(MI.getOperand(0).getReg()),
+ [&](MachineInstr &MI) { return onlyUsesFP(MI, MRI, TRI); })) {
+ // Set the register bank of every operand to FPR.
+ for (unsigned Idx = 0, NumOperands = MI.getNumOperands();
+ Idx < NumOperands; ++Idx)
+ OpRegBankIdx[Idx] = PMI_FirstFPR;
+ }
+ break;
+ }
+ case TargetOpcode::G_EXTRACT_VECTOR_ELT:
+ // Destination and source need to be FPRs.
+ OpRegBankIdx[0] = PMI_FirstFPR;
+ OpRegBankIdx[1] = PMI_FirstFPR;
+
+ // Index needs to be a GPR.
+ OpRegBankIdx[2] = PMI_FirstGPR;
+ break;
+ case TargetOpcode::G_INSERT_VECTOR_ELT:
+ OpRegBankIdx[0] = PMI_FirstFPR;
+ OpRegBankIdx[1] = PMI_FirstFPR;
+
+ // The element may be either a GPR or FPR. Preserve that behaviour.
+ if (getRegBank(MI.getOperand(2).getReg(), MRI, TRI) == &AArch64::FPRRegBank)
+ OpRegBankIdx[2] = PMI_FirstFPR;
+ else
+ OpRegBankIdx[2] = PMI_FirstGPR;
+
+ // Index needs to be a GPR.
+ OpRegBankIdx[3] = PMI_FirstGPR;
+ break;
+ case TargetOpcode::G_BUILD_VECTOR:
+ // If the first source operand belongs to a FPR register bank, then make
+ // sure that we preserve that.
+ if (OpRegBankIdx[1] != PMI_FirstGPR)
+ break;
+ unsigned VReg = MI.getOperand(1).getReg();
+ if (!VReg)
+ break;
+
+ // Get the instruction that defined the source operand reg, and check if
+ // it's a floating point operation. Or, if it's a type like s16 which
+ // doesn't have a exact size gpr register class.
+ MachineInstr *DefMI = MRI.getVRegDef(VReg);
+ unsigned DefOpc = DefMI->getOpcode();
+ const LLT SrcTy = MRI.getType(VReg);
+ if (isPreISelGenericFloatingPointOpcode(DefOpc) ||
+ SrcTy.getSizeInBits() < 32) {
+ // Have a floating point op.
+ // Make sure every operand gets mapped to a FPR register class.
+ unsigned NumOperands = MI.getNumOperands();
+ for (unsigned Idx = 0; Idx < NumOperands; ++Idx)
+ OpRegBankIdx[Idx] = PMI_FirstFPR;
+ }
+ break;
}
// Finally construct the computed mapping.
diff --git a/lib/Target/AArch64/AArch64RegisterBankInfo.h b/lib/Target/AArch64/AArch64RegisterBankInfo.h
index 008221dbef58..016fed65eb2a 100644
--- a/lib/Target/AArch64/AArch64RegisterBankInfo.h
+++ b/lib/Target/AArch64/AArch64RegisterBankInfo.h
@@ -1,9 +1,8 @@
//===- AArch64RegisterBankInfo -----------------------------------*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -58,6 +57,7 @@ protected:
FPExt16To64Idx = 43,
FPExt32To64Idx = 45,
FPExt64To128Idx = 47,
+ Shift64Imm = 49
};
static bool checkPartialMap(unsigned Idx, unsigned ValStartIdx,
@@ -114,6 +114,18 @@ class AArch64RegisterBankInfo final : public AArch64GenRegisterBankInfo {
const InstructionMapping &
getSameKindOfOperandsMapping(const MachineInstr &MI) const;
+ /// Returns true if the output of \p MI must be stored on a FPR register.
+ bool hasFPConstraints(const MachineInstr &MI, const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI) const;
+
+ /// Returns true if the source registers of \p MI must all be FPRs.
+ bool onlyUsesFP(const MachineInstr &MI, const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI) const;
+
+ /// Returns true if the destination register of \p MI must be a FPR.
+ bool onlyDefinesFP(const MachineInstr &MI, const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI) const;
+
public:
AArch64RegisterBankInfo(const TargetRegisterInfo &TRI);
diff --git a/lib/Target/AArch64/AArch64RegisterBanks.td b/lib/Target/AArch64/AArch64RegisterBanks.td
index eee584708f69..7bbd992890d1 100644
--- a/lib/Target/AArch64/AArch64RegisterBanks.td
+++ b/lib/Target/AArch64/AArch64RegisterBanks.td
@@ -1,9 +1,8 @@
//=- AArch64RegisterBank.td - Describe the AArch64 Banks -----*- tablegen -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AArch64/AArch64RegisterInfo.cpp b/lib/Target/AArch64/AArch64RegisterInfo.cpp
index 96ae45ae3d0d..6d5a4e3d2f76 100644
--- a/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -1,9 +1,8 @@
//===- AArch64RegisterInfo.cpp - AArch64 Register Information -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -217,11 +216,8 @@ bool AArch64RegisterInfo::isReservedReg(const MachineFunction &MF,
}
bool AArch64RegisterInfo::isAnyArgRegReserved(const MachineFunction &MF) const {
- // FIXME: Get the list of argument registers from TableGen.
- static const MCPhysReg GPRArgRegs[] = { AArch64::X0, AArch64::X1, AArch64::X2,
- AArch64::X3, AArch64::X4, AArch64::X5,
- AArch64::X6, AArch64::X7 };
- return std::any_of(std::begin(GPRArgRegs), std::end(GPRArgRegs),
+ return std::any_of(std::begin(*AArch64::GPR64argRegClass.MC),
+ std::end(*AArch64::GPR64argRegClass.MC),
[this, &MF](MCPhysReg r){return isReservedReg(MF, r);});
}
@@ -283,7 +279,7 @@ bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
return false;
}
-unsigned
+Register
AArch64RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
const AArch64FrameLowering *TFI = getFrameLowering(MF);
return TFI->hasFP(MF) ? AArch64::FP : AArch64::SP;
@@ -457,15 +453,34 @@ void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
if (MI.isDebugValue() || MI.getOpcode() == TargetOpcode::STACKMAP ||
MI.getOpcode() == TargetOpcode::PATCHPOINT) {
Offset = TFI->resolveFrameIndexReference(MF, FrameIndex, FrameReg,
- /*PreferFP=*/true);
+ /*PreferFP=*/true,
+ /*ForSimm=*/false);
Offset += MI.getOperand(FIOperandNum + 1).getImm();
MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false /*isDef*/);
MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
return;
}
+ if (MI.getOpcode() == TargetOpcode::LOCAL_ESCAPE) {
+ MachineOperand &FI = MI.getOperand(FIOperandNum);
+ Offset = TFI->getNonLocalFrameIndexReference(MF, FrameIndex);
+ FI.ChangeToImmediate(Offset);
+ return;
+ }
+
+ if (MI.getOpcode() == AArch64::TAGPstack) {
+ // TAGPstack must use the virtual frame register in its 3rd operand.
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+ FrameReg = MI.getOperand(3).getReg();
+ Offset =
+ MFI.getObjectOffset(FrameIndex) + AFI->getTaggedBasePointerOffset();
+ } else {
+ Offset = TFI->resolveFrameIndexReference(
+ MF, FrameIndex, FrameReg, /*PreferFP=*/false, /*ForSimm=*/true);
+ }
+
// Modify MI as necessary to handle as much of 'Offset' as possible
- Offset = TFI->resolveFrameIndexReference(MF, FrameIndex, FrameReg);
if (rewriteAArch64FrameIndex(MI, FIOperandNum, FrameReg, Offset, TII))
return;
@@ -519,3 +534,13 @@ unsigned AArch64RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
return 16;
}
}
+
+unsigned AArch64RegisterInfo::getLocalAddressRegister(
+ const MachineFunction &MF) const {
+ const auto &MFI = MF.getFrameInfo();
+ if (!MF.hasEHFunclets() && !MFI.hasVarSizedObjects())
+ return AArch64::SP;
+ else if (needsStackRealignment(MF))
+ return getBaseRegister();
+ return getFrameRegister(MF);
+}
diff --git a/lib/Target/AArch64/AArch64RegisterInfo.h b/lib/Target/AArch64/AArch64RegisterInfo.h
index c4153228a7c0..2c3f82c530d8 100644
--- a/lib/Target/AArch64/AArch64RegisterInfo.h
+++ b/lib/Target/AArch64/AArch64RegisterInfo.h
@@ -1,9 +1,8 @@
//==- AArch64RegisterInfo.h - AArch64 Register Information Impl --*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -114,7 +113,7 @@ public:
unsigned getBaseRegister() const;
// Debug information queries.
- unsigned getFrameRegister(const MachineFunction &MF) const override;
+ Register getFrameRegister(const MachineFunction &MF) const override;
unsigned getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const override;
@@ -122,6 +121,8 @@ public:
bool trackLivenessAfterRegAlloc(const MachineFunction&) const override {
return true;
}
+
+ unsigned getLocalAddressRegister(const MachineFunction &MF) const;
};
} // end namespace llvm
diff --git a/lib/Target/AArch64/AArch64RegisterInfo.td b/lib/Target/AArch64/AArch64RegisterInfo.td
index d3710cea0687..61fc0795c242 100644
--- a/lib/Target/AArch64/AArch64RegisterInfo.td
+++ b/lib/Target/AArch64/AArch64RegisterInfo.td
@@ -1,9 +1,8 @@
//=- AArch64RegisterInfo.td - Describe the AArch64 Registers -*- tablegen -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -188,6 +187,10 @@ def GPR64z : RegisterOperand<GPR64> {
let GIZeroRegister = XZR;
}
+// GPR argument registers.
+def GPR32arg : RegisterClass<"AArch64", [i32], 32, (sequence "W%u", 0, 7)>;
+def GPR64arg : RegisterClass<"AArch64", [i64], 64, (sequence "X%u", 0, 7)>;
+
// GPR register classes which include WZR/XZR AND SP/WSP. This is not a
// constraint used by any instructions, it is used as a common super-class.
def GPR32all : RegisterClass<"AArch64", [i32], 32, (add GPR32common, WZR, WSP)>;
@@ -206,6 +209,11 @@ def tcGPR64 : RegisterClass<"AArch64", [i64], 64, (sub GPR64common, X19, X20, X2
// BTI-protected function.
def rtcGPR64 : RegisterClass<"AArch64", [i64], 64, (add X16, X17)>;
+// Register set that excludes registers that are reserved for procedure calls.
+// This is used for pseudo-instructions that are actually implemented using a
+// procedure call.
+def GPR64noip : RegisterClass<"AArch64", [i64], 64, (sub GPR64, X16, X17, LR)>;
+
// GPR register classes for post increment amount of vector load/store that
// has alternate printing when Rm=31 and prints a constant immediate value
// equal to the total number of bytes transferred.
@@ -649,10 +657,12 @@ def FPR128Op : RegisterOperand<FPR128, "printOperand"> {
// ARMv8.1a atomic CASP register operands
-def WSeqPairs : RegisterTuples<[sube32, subo32],
- [(rotl GPR32, 0), (rotl GPR32, 1)]>;
-def XSeqPairs : RegisterTuples<[sube64, subo64],
- [(rotl GPR64, 0), (rotl GPR64, 1)]>;
+def WSeqPairs : RegisterTuples<[sube32, subo32],
+ [(decimate (rotl GPR32, 0), 2),
+ (decimate (rotl GPR32, 1), 2)]>;
+def XSeqPairs : RegisterTuples<[sube64, subo64],
+ [(decimate (rotl GPR64, 0), 2),
+ (decimate (rotl GPR64, 1), 2)]>;
def WSeqPairsClass : RegisterClass<"AArch64", [untyped], 32,
(add WSeqPairs)>{
diff --git a/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp b/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp
index af555f6d2266..854670079e40 100644
--- a/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp
+++ b/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp
@@ -1,8 +1,7 @@
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AArch64/AArch64SVEInstrInfo.td b/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 0fde68011e86..79ab42f4c080 100644
--- a/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -1,9 +1,8 @@
//=- AArch64SVEInstrInfo.td - AArch64 SVE Instructions -*- tablegen -*-----=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -26,10 +25,10 @@ let Predicates = [HasSVE] in {
defm SQSUB_ZZZ : sve_int_bin_cons_arit_0<0b110, "sqsub">;
defm UQSUB_ZZZ : sve_int_bin_cons_arit_0<0b111, "uqsub">;
- def AND_ZZZ : sve_int_bin_cons_log<0b00, "and">;
- def ORR_ZZZ : sve_int_bin_cons_log<0b01, "orr">;
- def EOR_ZZZ : sve_int_bin_cons_log<0b10, "eor">;
- def BIC_ZZZ : sve_int_bin_cons_log<0b11, "bic">;
+ defm AND_ZZZ : sve_int_bin_cons_log<0b00, "and">;
+ defm ORR_ZZZ : sve_int_bin_cons_log<0b01, "orr">;
+ defm EOR_ZZZ : sve_int_bin_cons_log<0b10, "eor">;
+ defm BIC_ZZZ : sve_int_bin_cons_log<0b11, "bic">;
defm ADD_ZPmZ : sve_int_bin_pred_arit_0<0b000, "add">;
defm SUB_ZPmZ : sve_int_bin_pred_arit_0<0b001, "sub">;
@@ -876,10 +875,10 @@ let Predicates = [HasSVE] in {
defm LSL_WIDE_ZZZ : sve_int_bin_cons_shift_wide<0b11, "lsl">;
// Predicated shifts
- defm ASR_ZPmI : sve_int_bin_pred_shift_imm_right<0b000, "asr">;
- defm LSR_ZPmI : sve_int_bin_pred_shift_imm_right<0b001, "lsr">;
- defm LSL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b011, "lsl">;
- defm ASRD_ZPmI : sve_int_bin_pred_shift_imm_right<0b100, "asrd">;
+ defm ASR_ZPmI : sve_int_bin_pred_shift_imm_right<0b0000, "asr">;
+ defm LSR_ZPmI : sve_int_bin_pred_shift_imm_right<0b0001, "lsr">;
+ defm LSL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0011, "lsl">;
+ defm ASRD_ZPmI : sve_int_bin_pred_shift_imm_right<0b0100, "asrd">;
defm ASR_ZPmZ : sve_int_bin_pred_shift<0b000, "asr">;
defm LSR_ZPmZ : sve_int_bin_pred_shift<0b001, "lsr">;
@@ -1022,3 +1021,406 @@ let Predicates = [HasSVE] in {
def : InstAlias<"fcmlt $Zd, $Pg/z, $Zm, $Zn",
(FCMGT_PPzZZ_D PPR64:$Zd, PPR3bAny:$Pg, ZPR64:$Zn, ZPR64:$Zm), 0>;
}
+
+let Predicates = [HasSVE2] in {
+ // SVE2 integer multiply-add (indexed)
+ defm MLA_ZZZI : sve2_int_mla_by_indexed_elem<0b01, 0b0, "mla">;
+ defm MLS_ZZZI : sve2_int_mla_by_indexed_elem<0b01, 0b1, "mls">;
+
+ // SVE2 saturating multiply-add high (indexed)
+ defm SQRDMLAH_ZZZI : sve2_int_mla_by_indexed_elem<0b10, 0b0, "sqrdmlah">;
+ defm SQRDMLSH_ZZZI : sve2_int_mla_by_indexed_elem<0b10, 0b1, "sqrdmlsh">;
+
+ // SVE2 saturating multiply-add high (vectors, unpredicated)
+ defm SQRDMLAH_ZZZ : sve2_int_mla<0b0, "sqrdmlah">;
+ defm SQRDMLSH_ZZZ : sve2_int_mla<0b1, "sqrdmlsh">;
+
+ // SVE2 integer multiply (indexed)
+ defm MUL_ZZZI : sve2_int_mul_by_indexed_elem<0b1110, "mul">;
+
+ // SVE2 saturating multiply high (indexed)
+ defm SQDMULH_ZZZI : sve2_int_mul_by_indexed_elem<0b1100, "sqdmulh">;
+ defm SQRDMULH_ZZZI : sve2_int_mul_by_indexed_elem<0b1101, "sqrdmulh">;
+
+ // SVE2 signed saturating doubling multiply high (unpredicated)
+ defm SQDMULH_ZZZ : sve2_int_mul<0b100, "sqdmulh">;
+ defm SQRDMULH_ZZZ : sve2_int_mul<0b101, "sqrdmulh">;
+
+ // SVE2 integer multiply vectors (unpredicated)
+ defm MUL_ZZZ : sve2_int_mul<0b000, "mul">;
+ defm SMULH_ZZZ : sve2_int_mul<0b010, "smulh">;
+ defm UMULH_ZZZ : sve2_int_mul<0b011, "umulh">;
+ def PMUL_ZZZ_B : sve2_int_mul<0b00, 0b001, "pmul", ZPR8>;
+
+ // SVE2 complex integer dot product (indexed)
+ defm CDOT_ZZZI : sve2_cintx_dot_by_indexed_elem<"cdot">;
+
+ // SVE2 complex integer dot product
+ defm CDOT_ZZZ : sve2_cintx_dot<"cdot">;
+
+ // SVE2 complex integer multiply-add (indexed)
+ defm CMLA_ZZZI : sve2_cmla_by_indexed_elem<0b0, "cmla">;
+ // SVE2 complex saturating multiply-add (indexed)
+ defm SQRDCMLAH_ZZZI : sve2_cmla_by_indexed_elem<0b1, "sqrdcmlah">;
+
+ // SVE2 complex integer multiply-add
+ defm CMLA_ZZZ : sve2_int_cmla<0b0, "cmla">;
+ defm SQRDCMLAH_ZZZ : sve2_int_cmla<0b1, "sqrdcmlah">;
+
+ // SVE2 integer multiply long (indexed)
+ defm SMULLB_ZZZI : sve2_int_mul_long_by_indexed_elem<0b000, "smullb">;
+ defm SMULLT_ZZZI : sve2_int_mul_long_by_indexed_elem<0b001, "smullt">;
+ defm UMULLB_ZZZI : sve2_int_mul_long_by_indexed_elem<0b010, "umullb">;
+ defm UMULLT_ZZZI : sve2_int_mul_long_by_indexed_elem<0b011, "umullt">;
+
+ // SVE2 saturating multiply (indexed)
+ defm SQDMULLB_ZZZI : sve2_int_mul_long_by_indexed_elem<0b100, "sqdmullb">;
+ defm SQDMULLT_ZZZI : sve2_int_mul_long_by_indexed_elem<0b101, "sqdmullt">;
+
+ // SVE2 integer multiply-add long (indexed)
+ defm SMLALB_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1000, "smlalb">;
+ defm SMLALT_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1001, "smlalt">;
+ defm UMLALB_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1010, "umlalb">;
+ defm UMLALT_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1011, "umlalt">;
+ defm SMLSLB_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1100, "smlslb">;
+ defm SMLSLT_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1101, "smlslt">;
+ defm UMLSLB_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1110, "umlslb">;
+ defm UMLSLT_ZZZI : sve2_int_mla_long_by_indexed_elem<0b1111, "umlslt">;
+
+ // SVE2 integer multiply-add long (vectors, unpredicated)
+ defm SMLALB_ZZZ : sve2_int_mla_long<0b10000, "smlalb">;
+ defm SMLALT_ZZZ : sve2_int_mla_long<0b10001, "smlalt">;
+ defm UMLALB_ZZZ : sve2_int_mla_long<0b10010, "umlalb">;
+ defm UMLALT_ZZZ : sve2_int_mla_long<0b10011, "umlalt">;
+ defm SMLSLB_ZZZ : sve2_int_mla_long<0b10100, "smlslb">;
+ defm SMLSLT_ZZZ : sve2_int_mla_long<0b10101, "smlslt">;
+ defm UMLSLB_ZZZ : sve2_int_mla_long<0b10110, "umlslb">;
+ defm UMLSLT_ZZZ : sve2_int_mla_long<0b10111, "umlslt">;
+
+ // SVE2 saturating multiply-add long (indexed)
+ defm SQDMLALB_ZZZI : sve2_int_mla_long_by_indexed_elem<0b0100, "sqdmlalb">;
+ defm SQDMLALT_ZZZI : sve2_int_mla_long_by_indexed_elem<0b0101, "sqdmlalt">;
+ defm SQDMLSLB_ZZZI : sve2_int_mla_long_by_indexed_elem<0b0110, "sqdmlslb">;
+ defm SQDMLSLT_ZZZI : sve2_int_mla_long_by_indexed_elem<0b0111, "sqdmlslt">;
+
+ // SVE2 saturating multiply-add long (vectors, unpredicated)
+ defm SQDMLALB_ZZZ : sve2_int_mla_long<0b11000, "sqdmlalb">;
+ defm SQDMLALT_ZZZ : sve2_int_mla_long<0b11001, "sqdmlalt">;
+ defm SQDMLSLB_ZZZ : sve2_int_mla_long<0b11010, "sqdmlslb">;
+ defm SQDMLSLT_ZZZ : sve2_int_mla_long<0b11011, "sqdmlslt">;
+
+ // SVE2 saturating multiply-add interleaved long
+ defm SQDMLALBT_ZZZ : sve2_int_mla_long<0b00010, "sqdmlalbt">;
+ defm SQDMLSLBT_ZZZ : sve2_int_mla_long<0b00011, "sqdmlslbt">;
+
+ // SVE2 integer halving add/subtract (predicated)
+ defm SHADD_ZPmZ : sve2_int_arith_pred<0b100000, "shadd">;
+ defm UHADD_ZPmZ : sve2_int_arith_pred<0b100010, "uhadd">;
+ defm SHSUB_ZPmZ : sve2_int_arith_pred<0b100100, "shsub">;
+ defm UHSUB_ZPmZ : sve2_int_arith_pred<0b100110, "uhsub">;
+ defm SRHADD_ZPmZ : sve2_int_arith_pred<0b101000, "srhadd">;
+ defm URHADD_ZPmZ : sve2_int_arith_pred<0b101010, "urhadd">;
+ defm SHSUBR_ZPmZ : sve2_int_arith_pred<0b101100, "shsubr">;
+ defm UHSUBR_ZPmZ : sve2_int_arith_pred<0b101110, "uhsubr">;
+
+ // SVE2 integer pairwise add and accumulate long
+ defm SADALP_ZPmZ : sve2_int_sadd_long_accum_pairwise<0, "sadalp">;
+ defm UADALP_ZPmZ : sve2_int_sadd_long_accum_pairwise<1, "uadalp">;
+
+ // SVE2 integer pairwise arithmetic
+ defm ADDP_ZPmZ : sve2_int_arith_pred<0b100011, "addp">;
+ defm SMAXP_ZPmZ : sve2_int_arith_pred<0b101001, "smaxp">;
+ defm UMAXP_ZPmZ : sve2_int_arith_pred<0b101011, "umaxp">;
+ defm SMINP_ZPmZ : sve2_int_arith_pred<0b101101, "sminp">;
+ defm UMINP_ZPmZ : sve2_int_arith_pred<0b101111, "uminp">;
+
+ // SVE2 integer unary operations (predicated)
+ defm URECPE_ZPmZ : sve2_int_un_pred_arit_s<0b000, "urecpe">;
+ defm URSQRTE_ZPmZ : sve2_int_un_pred_arit_s<0b001, "ursqrte">;
+ defm SQABS_ZPmZ : sve2_int_un_pred_arit<0b100, "sqabs">;
+ defm SQNEG_ZPmZ : sve2_int_un_pred_arit<0b101, "sqneg">;
+
+ // SVE2 saturating add/subtract
+ defm SQADD_ZPmZ : sve2_int_arith_pred<0b110000, "sqadd">;
+ defm UQADD_ZPmZ : sve2_int_arith_pred<0b110010, "uqadd">;
+ defm SQSUB_ZPmZ : sve2_int_arith_pred<0b110100, "sqsub">;
+ defm UQSUB_ZPmZ : sve2_int_arith_pred<0b110110, "uqsub">;
+ defm SUQADD_ZPmZ : sve2_int_arith_pred<0b111000, "suqadd">;
+ defm USQADD_ZPmZ : sve2_int_arith_pred<0b111010, "usqadd">;
+ defm SQSUBR_ZPmZ : sve2_int_arith_pred<0b111100, "sqsubr">;
+ defm UQSUBR_ZPmZ : sve2_int_arith_pred<0b111110, "uqsubr">;
+
+ // SVE2 saturating/rounding bitwise shift left (predicated)
+ defm SRSHL_ZPmZ : sve2_int_arith_pred<0b000100, "srshl">;
+ defm URSHL_ZPmZ : sve2_int_arith_pred<0b000110, "urshl">;
+ defm SRSHLR_ZPmZ : sve2_int_arith_pred<0b001100, "srshlr">;
+ defm URSHLR_ZPmZ : sve2_int_arith_pred<0b001110, "urshlr">;
+ defm SQSHL_ZPmZ : sve2_int_arith_pred<0b010000, "sqshl">;
+ defm UQSHL_ZPmZ : sve2_int_arith_pred<0b010010, "uqshl">;
+ defm SQRSHL_ZPmZ : sve2_int_arith_pred<0b010100, "sqrshl">;
+ defm UQRSHL_ZPmZ : sve2_int_arith_pred<0b010110, "uqrshl">;
+ defm SQSHLR_ZPmZ : sve2_int_arith_pred<0b011000, "sqshlr">;
+ defm UQSHLR_ZPmZ : sve2_int_arith_pred<0b011010, "uqshlr">;
+ defm SQRSHLR_ZPmZ : sve2_int_arith_pred<0b011100, "sqrshlr">;
+ defm UQRSHLR_ZPmZ : sve2_int_arith_pred<0b011110, "uqrshlr">;
+
+ // SVE2 integer add/subtract long
+ defm SADDLB_ZZZ : sve2_wide_int_arith_long<0b00000, "saddlb">;
+ defm SADDLT_ZZZ : sve2_wide_int_arith_long<0b00001, "saddlt">;
+ defm UADDLB_ZZZ : sve2_wide_int_arith_long<0b00010, "uaddlb">;
+ defm UADDLT_ZZZ : sve2_wide_int_arith_long<0b00011, "uaddlt">;
+ defm SSUBLB_ZZZ : sve2_wide_int_arith_long<0b00100, "ssublb">;
+ defm SSUBLT_ZZZ : sve2_wide_int_arith_long<0b00101, "ssublt">;
+ defm USUBLB_ZZZ : sve2_wide_int_arith_long<0b00110, "usublb">;
+ defm USUBLT_ZZZ : sve2_wide_int_arith_long<0b00111, "usublt">;
+ defm SABDLB_ZZZ : sve2_wide_int_arith_long<0b01100, "sabdlb">;
+ defm SABDLT_ZZZ : sve2_wide_int_arith_long<0b01101, "sabdlt">;
+ defm UABDLB_ZZZ : sve2_wide_int_arith_long<0b01110, "uabdlb">;
+ defm UABDLT_ZZZ : sve2_wide_int_arith_long<0b01111, "uabdlt">;
+
+ // SVE2 integer add/subtract wide
+ defm SADDWB_ZZZ : sve2_wide_int_arith_wide<0b000, "saddwb">;
+ defm SADDWT_ZZZ : sve2_wide_int_arith_wide<0b001, "saddwt">;
+ defm UADDWB_ZZZ : sve2_wide_int_arith_wide<0b010, "uaddwb">;
+ defm UADDWT_ZZZ : sve2_wide_int_arith_wide<0b011, "uaddwt">;
+ defm SSUBWB_ZZZ : sve2_wide_int_arith_wide<0b100, "ssubwb">;
+ defm SSUBWT_ZZZ : sve2_wide_int_arith_wide<0b101, "ssubwt">;
+ defm USUBWB_ZZZ : sve2_wide_int_arith_wide<0b110, "usubwb">;
+ defm USUBWT_ZZZ : sve2_wide_int_arith_wide<0b111, "usubwt">;
+
+ // SVE2 integer multiply long
+ defm SQDMULLB_ZZZ : sve2_wide_int_arith_long<0b11000, "sqdmullb">;
+ defm SQDMULLT_ZZZ : sve2_wide_int_arith_long<0b11001, "sqdmullt">;
+ defm SMULLB_ZZZ : sve2_wide_int_arith_long<0b11100, "smullb">;
+ defm SMULLT_ZZZ : sve2_wide_int_arith_long<0b11101, "smullt">;
+ defm UMULLB_ZZZ : sve2_wide_int_arith_long<0b11110, "umullb">;
+ defm UMULLT_ZZZ : sve2_wide_int_arith_long<0b11111, "umullt">;
+ defm PMULLB_ZZZ : sve2_pmul_long<0b0, "pmullb">;
+ defm PMULLT_ZZZ : sve2_pmul_long<0b1, "pmullt">;
+
+ // SVE2 bitwise shift and insert
+ defm SRI_ZZI : sve2_int_bin_cons_shift_imm_right<0b0, "sri">;
+ defm SLI_ZZI : sve2_int_bin_cons_shift_imm_left< 0b1, "sli">;
+
+ // SVE2 bitwise shift right and accumulate
+ defm SSRA_ZZI : sve2_int_bin_accum_cons_shift_imm_right<0b00, "ssra">;
+ defm USRA_ZZI : sve2_int_bin_accum_cons_shift_imm_right<0b01, "usra">;
+ defm SRSRA_ZZI : sve2_int_bin_accum_cons_shift_imm_right<0b10, "srsra">;
+ defm URSRA_ZZI : sve2_int_bin_accum_cons_shift_imm_right<0b11, "ursra">;
+
+ // SVE2 complex integer add
+ defm CADD_ZZI : sve2_int_cadd<0b0, "cadd">;
+ defm SQCADD_ZZI : sve2_int_cadd<0b1, "sqcadd">;
+
+ // SVE2 integer absolute difference and accumulate
+ defm SABA_ZZZ : sve2_int_absdiff_accum<0b0, "saba">;
+ defm UABA_ZZZ : sve2_int_absdiff_accum<0b1, "uaba">;
+
+ // SVE2 integer absolute difference and accumulate long
+ defm SABALB_ZZZ : sve2_int_absdiff_accum_long<0b00, "sabalb">;
+ defm SABALT_ZZZ : sve2_int_absdiff_accum_long<0b01, "sabalt">;
+ defm UABALB_ZZZ : sve2_int_absdiff_accum_long<0b10, "uabalb">;
+ defm UABALT_ZZZ : sve2_int_absdiff_accum_long<0b11, "uabalt">;
+
+ // SVE2 integer add/subtract long with carry
+ defm ADCLB_ZZZ : sve2_int_addsub_long_carry<0b00, "adclb">;
+ defm ADCLT_ZZZ : sve2_int_addsub_long_carry<0b01, "adclt">;
+ defm SBCLB_ZZZ : sve2_int_addsub_long_carry<0b10, "sbclb">;
+ defm SBCLT_ZZZ : sve2_int_addsub_long_carry<0b11, "sbclt">;
+
+ // SVE2 bitwise shift right narrow
+ defm SQSHRUNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0000, "sqshrunb">;
+ defm SQSHRUNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0001, "sqshrunt">;
+ defm SQRSHRUNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0010, "sqrshrunb">;
+ defm SQRSHRUNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0011, "sqrshrunt">;
+ defm SHRNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0100, "shrnb">;
+ defm SHRNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0101, "shrnt">;
+ defm RSHRNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0110, "rshrnb">;
+ defm RSHRNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0111, "rshrnt">;
+ defm SQSHRNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1000, "sqshrnb">;
+ defm SQSHRNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1001, "sqshrnt">;
+ defm SQRSHRNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1010, "sqrshrnb">;
+ defm SQRSHRNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1011, "sqrshrnt">;
+ defm UQSHRNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1100, "uqshrnb">;
+ defm UQSHRNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1101, "uqshrnt">;
+ defm UQRSHRNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1110, "uqrshrnb">;
+ defm UQRSHRNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1111, "uqrshrnt">;
+
+ // SVE2 integer add/subtract narrow high part
+ defm ADDHNB_ZZZ : sve2_int_addsub_narrow_high<0b000, "addhnb">;
+ defm ADDHNT_ZZZ : sve2_int_addsub_narrow_high<0b001, "addhnt">;
+ defm RADDHNB_ZZZ : sve2_int_addsub_narrow_high<0b010, "raddhnb">;
+ defm RADDHNT_ZZZ : sve2_int_addsub_narrow_high<0b011, "raddhnt">;
+ defm SUBHNB_ZZZ : sve2_int_addsub_narrow_high<0b100, "subhnb">;
+ defm SUBHNT_ZZZ : sve2_int_addsub_narrow_high<0b101, "subhnt">;
+ defm RSUBHNB_ZZZ : sve2_int_addsub_narrow_high<0b110, "rsubhnb">;
+ defm RSUBHNT_ZZZ : sve2_int_addsub_narrow_high<0b111, "rsubhnt">;
+
+ // SVE2 saturating extract narrow
+ defm SQXTNB_ZZ : sve2_int_sat_extract_narrow<0b000, "sqxtnb">;
+ defm SQXTNT_ZZ : sve2_int_sat_extract_narrow<0b001, "sqxtnt">;
+ defm UQXTNB_ZZ : sve2_int_sat_extract_narrow<0b010, "uqxtnb">;
+ defm UQXTNT_ZZ : sve2_int_sat_extract_narrow<0b011, "uqxtnt">;
+ defm SQXTUNB_ZZ : sve2_int_sat_extract_narrow<0b100, "sqxtunb">;
+ defm SQXTUNT_ZZ : sve2_int_sat_extract_narrow<0b101, "sqxtunt">;
+
+ // SVE2 character match
+ defm MATCH_PPzZZ : sve2_char_match<0b0, "match">;
+ defm NMATCH_PPzZZ : sve2_char_match<0b1, "nmatch">;
+
+ // SVE2 bitwise exclusive-or interleaved
+ defm EORBT_ZZZ : sve2_bitwise_xor_interleaved<0b0, "eorbt">;
+ defm EORTB_ZZZ : sve2_bitwise_xor_interleaved<0b1, "eortb">;
+
+ // SVE2 bitwise shift left long
+ defm SSHLLB_ZZI : sve2_bitwise_shift_left_long<0b00, "sshllb">;
+ defm SSHLLT_ZZI : sve2_bitwise_shift_left_long<0b01, "sshllt">;
+ defm USHLLB_ZZI : sve2_bitwise_shift_left_long<0b10, "ushllb">;
+ defm USHLLT_ZZI : sve2_bitwise_shift_left_long<0b11, "ushllt">;
+
+ // SVE2 integer add/subtract interleaved long
+ defm SADDLBT_ZZZ : sve2_misc_int_addsub_long_interleaved<0b00, "saddlbt">;
+ defm SSUBLBT_ZZZ : sve2_misc_int_addsub_long_interleaved<0b10, "ssublbt">;
+ defm SSUBLTB_ZZZ : sve2_misc_int_addsub_long_interleaved<0b11, "ssubltb">;
+
+ // SVE2 histogram generation (segment)
+ def HISTSEG_ZZZ : sve2_hist_gen_segment<"histseg">;
+
+ // SVE2 histogram generation (vector)
+ defm HISTCNT_ZPzZZ : sve2_hist_gen_vector<"histcnt">;
+
+ // SVE2 floating-point convert precision
+ defm FCVTXNT_ZPmZ : sve2_fp_convert_down_odd_rounding<"fcvtxnt">;
+ defm FCVTNT_ZPmZ : sve2_fp_convert_down_narrow<"fcvtnt">;
+ defm FCVTLT_ZPmZ : sve2_fp_convert_up_long<"fcvtlt">;
+
+ // SVE2 floating-point pairwise operations
+ defm FADDP_ZPmZZ : sve2_fp_pairwise_pred<0b000, "faddp">;
+ defm FMAXNMP_ZPmZZ : sve2_fp_pairwise_pred<0b100, "fmaxnmp">;
+ defm FMINNMP_ZPmZZ : sve2_fp_pairwise_pred<0b101, "fminnmp">;
+ defm FMAXP_ZPmZZ : sve2_fp_pairwise_pred<0b110, "fmaxp">;
+ defm FMINP_ZPmZZ : sve2_fp_pairwise_pred<0b111, "fminp">;
+
+ // SVE2 floating-point multiply-add long (indexed)
+ def FMLALB_ZZZI_SHH : sve2_fp_mla_long_by_indexed_elem<0b00, "fmlalb">;
+ def FMLALT_ZZZI_SHH : sve2_fp_mla_long_by_indexed_elem<0b01, "fmlalt">;
+ def FMLSLB_ZZZI_SHH : sve2_fp_mla_long_by_indexed_elem<0b10, "fmlslb">;
+ def FMLSLT_ZZZI_SHH : sve2_fp_mla_long_by_indexed_elem<0b11, "fmlslt">;
+
+ // SVE2 floating-point multiply-add long
+ def FMLALB_ZZZ_SHH : sve2_fp_mla_long<0b00, "fmlalb">;
+ def FMLALT_ZZZ_SHH : sve2_fp_mla_long<0b01, "fmlalt">;
+ def FMLSLB_ZZZ_SHH : sve2_fp_mla_long<0b10, "fmlslb">;
+ def FMLSLT_ZZZ_SHH : sve2_fp_mla_long<0b11, "fmlslt">;
+
+ // SVE2 bitwise ternary operations
+ defm EOR3_ZZZZ_D : sve2_int_bitwise_ternary_op<0b000, "eor3">;
+ defm BCAX_ZZZZ_D : sve2_int_bitwise_ternary_op<0b010, "bcax">;
+ def BSL_ZZZZ_D : sve2_int_bitwise_ternary_op_d<0b001, "bsl">;
+ def BSL1N_ZZZZ_D : sve2_int_bitwise_ternary_op_d<0b011, "bsl1n">;
+ def BSL2N_ZZZZ_D : sve2_int_bitwise_ternary_op_d<0b101, "bsl2n">;
+ def NBSL_ZZZZ_D : sve2_int_bitwise_ternary_op_d<0b111, "nbsl">;
+
+ // sve_int_rotate_imm
+ defm XAR_ZZZI : sve2_int_rotate_right_imm<"xar">;
+
+ // SVE2 extract vector (immediate offset, constructive)
+ def EXT_ZZI_B : sve2_int_perm_extract_i_cons<"ext">;
+
+ // SVE floating-point convert precision
+ def FCVTX_ZPmZ_DtoS : sve_fp_2op_p_zd<0b0001010, "fcvtx", ZPR64, ZPR32, ElementSizeD>;
+
+ // SVE floating-point convert to integer
+ defm FLOGB_ZPmZ : sve2_fp_flogb<"flogb">;
+
+ // Non-temporal contiguous loads (vector + register)
+ defm LDNT1SB_ZZR_S : sve2_mem_cldnt_vs<0b00000, "ldnt1sb", Z_s, ZPR32>;
+ defm LDNT1B_ZZR_S : sve2_mem_cldnt_vs<0b00001, "ldnt1b", Z_s, ZPR32>;
+ defm LDNT1SH_ZZR_S : sve2_mem_cldnt_vs<0b00100, "ldnt1sh", Z_s, ZPR32>;
+ defm LDNT1H_ZZR_S : sve2_mem_cldnt_vs<0b00101, "ldnt1h", Z_s, ZPR32>;
+ defm LDNT1W_ZZR_S : sve2_mem_cldnt_vs<0b01001, "ldnt1w", Z_s, ZPR32>;
+
+ defm LDNT1SB_ZZR_D : sve2_mem_cldnt_vs<0b10000, "ldnt1sb", Z_d, ZPR64>;
+ defm LDNT1B_ZZR_D : sve2_mem_cldnt_vs<0b10010, "ldnt1b", Z_d, ZPR64>;
+ defm LDNT1SH_ZZR_D : sve2_mem_cldnt_vs<0b10100, "ldnt1sh", Z_d, ZPR64>;
+ defm LDNT1H_ZZR_D : sve2_mem_cldnt_vs<0b10110, "ldnt1h", Z_d, ZPR64>;
+ defm LDNT1SW_ZZR_D : sve2_mem_cldnt_vs<0b11000, "ldnt1sw", Z_d, ZPR64>;
+ defm LDNT1W_ZZR_D : sve2_mem_cldnt_vs<0b11010, "ldnt1w", Z_d, ZPR64>;
+ defm LDNT1D_ZZR_D : sve2_mem_cldnt_vs<0b11110, "ldnt1d", Z_d, ZPR64>;
+
+ // SVE2 vector splice (constructive)
+ defm SPLICE_ZPZZ : sve2_int_perm_splice_cons<"splice">;
+
+ // Predicated shifts
+ defm SQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0110, "sqshl">;
+ defm UQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0111, "uqshl">;
+ defm SRSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1100, "srshr">;
+ defm URSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1101, "urshr">;
+ defm SQSHLU_ZPmI : sve_int_bin_pred_shift_imm_left< 0b1111, "sqshlu">;
+
+ // Non-temporal contiguous stores (vector + register)
+ defm STNT1B_ZZR_S : sve2_mem_cstnt_vs<0b001, "stnt1b", Z_s, ZPR32>;
+ defm STNT1H_ZZR_S : sve2_mem_cstnt_vs<0b011, "stnt1h", Z_s, ZPR32>;
+ defm STNT1W_ZZR_S : sve2_mem_cstnt_vs<0b101, "stnt1w", Z_s, ZPR32>;
+
+ defm STNT1B_ZZR_D : sve2_mem_cstnt_vs<0b000, "stnt1b", Z_d, ZPR64>;
+ defm STNT1H_ZZR_D : sve2_mem_cstnt_vs<0b010, "stnt1h", Z_d, ZPR64>;
+ defm STNT1W_ZZR_D : sve2_mem_cstnt_vs<0b100, "stnt1w", Z_d, ZPR64>;
+ defm STNT1D_ZZR_D : sve2_mem_cstnt_vs<0b110, "stnt1d", Z_d, ZPR64>;
+
+ // SVE table lookup (three sources)
+ defm TBL_ZZZZ : sve2_int_perm_tbl<"tbl">;
+ defm TBX_ZZZ : sve2_int_perm_tbx<"tbx">;
+
+ // SVE integer compare scalar count and limit
+ defm WHILEGE_PWW : sve_int_while4_rr<0b000, "whilege">;
+ defm WHILEGT_PWW : sve_int_while4_rr<0b001, "whilegt">;
+ defm WHILEHS_PWW : sve_int_while4_rr<0b100, "whilehs">;
+ defm WHILEHI_PWW : sve_int_while4_rr<0b101, "whilehi">;
+
+ defm WHILEGE_PXX : sve_int_while8_rr<0b000, "whilege">;
+ defm WHILEGT_PXX : sve_int_while8_rr<0b001, "whilegt">;
+ defm WHILEHS_PXX : sve_int_while8_rr<0b100, "whilehs">;
+ defm WHILEHI_PXX : sve_int_while8_rr<0b101, "whilehi">;
+
+ // SVE pointer conflict compare
+ defm WHILEWR_PXX : sve2_int_while_rr<0b0, "whilewr">;
+ defm WHILERW_PXX : sve2_int_while_rr<0b1, "whilerw">;
+}
+
+let Predicates = [HasSVE2AES] in {
+ // SVE2 crypto destructive binary operations
+ def AESE_ZZZ_B : sve2_crypto_des_bin_op<0b00, "aese", ZPR8>;
+ def AESD_ZZZ_B : sve2_crypto_des_bin_op<0b01, "aesd", ZPR8>;
+
+ // SVE2 crypto unary operations
+ def AESMC_ZZ_B : sve2_crypto_unary_op<0b0, "aesmc">;
+ def AESIMC_ZZ_B : sve2_crypto_unary_op<0b1, "aesimc">;
+
+ // PMULLB and PMULLT instructions which operate with 64-bit source and
+ // 128-bit destination elements are enabled with crypto extensions, similar
+ // to NEON PMULL2 instruction.
+ def PMULLB_ZZZ_Q : sve2_wide_int_arith<0b00, 0b11010, "pmullb",
+ ZPR128, ZPR64, ZPR64>;
+ def PMULLT_ZZZ_Q : sve2_wide_int_arith<0b00, 0b11011, "pmullt",
+ ZPR128, ZPR64, ZPR64>;
+}
+
+let Predicates = [HasSVE2SM4] in {
+ // SVE2 crypto constructive binary operations
+ def SM4EKEY_ZZZ_S : sve2_crypto_cons_bin_op<0b0, "sm4ekey", ZPR32>;
+ // SVE2 crypto destructive binary operations
+ def SM4E_ZZZ_S : sve2_crypto_des_bin_op<0b10, "sm4e", ZPR32>;
+}
+
+let Predicates = [HasSVE2SHA3] in {
+ // SVE2 crypto constructive binary operations
+ def RAX1_ZZZ_D : sve2_crypto_cons_bin_op<0b1, "rax1", ZPR64>;
+}
+
+let Predicates = [HasSVE2BitPerm] in {
+ // SVE2 bitwise permute
+ defm BEXT_ZZZ : sve2_misc_bitwise<0b1100, "bext">;
+ defm BDEP_ZZZ : sve2_misc_bitwise<0b1101, "bdep">;
+ defm BGRP_ZZZ : sve2_misc_bitwise<0b1110, "bgrp">;
+}
diff --git a/lib/Target/AArch64/AArch64SchedA53.td b/lib/Target/AArch64/AArch64SchedA53.td
index f253a4f3e25a..a6df0f3f083c 100644
--- a/lib/Target/AArch64/AArch64SchedA53.td
+++ b/lib/Target/AArch64/AArch64SchedA53.td
@@ -1,9 +1,8 @@
//==- AArch64SchedA53.td - Cortex-A53 Scheduling Definitions -*- tablegen -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -27,7 +26,7 @@ def CortexA53Model : SchedMachineModel {
// v 1.0 Spreadsheet
let CompleteModel = 1;
- list<Predicate> UnsupportedFeatures = [HasSVE];
+ list<Predicate> UnsupportedFeatures = SVEUnsupported.F;
}
diff --git a/lib/Target/AArch64/AArch64SchedA57.td b/lib/Target/AArch64/AArch64SchedA57.td
index ade03f23f8c7..9f566d1c7079 100644
--- a/lib/Target/AArch64/AArch64SchedA57.td
+++ b/lib/Target/AArch64/AArch64SchedA57.td
@@ -1,9 +1,8 @@
//=- AArch64SchedA57.td - ARM Cortex-A57 Scheduling Defs -----*- tablegen -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -32,7 +31,7 @@ def CortexA57Model : SchedMachineModel {
let LoopMicroOpBufferSize = 16;
let CompleteModel = 1;
- list<Predicate> UnsupportedFeatures = [HasSVE];
+ list<Predicate> UnsupportedFeatures = SVEUnsupported.F;
}
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AArch64/AArch64SchedA57WriteRes.td b/lib/Target/AArch64/AArch64SchedA57WriteRes.td
index 55005e1d9ed1..987ed3c4ebfb 100644
--- a/lib/Target/AArch64/AArch64SchedA57WriteRes.td
+++ b/lib/Target/AArch64/AArch64SchedA57WriteRes.td
@@ -1,9 +1,8 @@
//=- AArch64SchedA57WriteRes.td - ARM Cortex-A57 Write Res ---*- tablegen -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AArch64/AArch64SchedCyclone.td b/lib/Target/AArch64/AArch64SchedCyclone.td
index 7a474ba8ef9b..798ecb7508c0 100644
--- a/lib/Target/AArch64/AArch64SchedCyclone.td
+++ b/lib/Target/AArch64/AArch64SchedCyclone.td
@@ -1,9 +1,8 @@
//=- AArch64SchedCyclone.td - Cyclone Scheduling Definitions -*- tablegen -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -19,7 +18,7 @@ def CycloneModel : SchedMachineModel {
let MispredictPenalty = 16; // 14-19 cycles are typical.
let CompleteModel = 1;
- list<Predicate> UnsupportedFeatures = [HasSVE];
+ list<Predicate> UnsupportedFeatures = SVEUnsupported.F;
}
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AArch64/AArch64SchedExynosM1.td b/lib/Target/AArch64/AArch64SchedExynosM1.td
index f757d53b6c1c..f1e76e2c20d3 100644
--- a/lib/Target/AArch64/AArch64SchedExynosM1.td
+++ b/lib/Target/AArch64/AArch64SchedExynosM1.td
@@ -1,9 +1,8 @@
//=- AArch64SchedExynosM1.td - Samsung Exynos M1 Sched Defs --*- tablegen -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -25,7 +24,7 @@ def ExynosM1Model : SchedMachineModel {
let MispredictPenalty = 14; // Minimum branch misprediction penalty.
let CompleteModel = 1; // Use the default model otherwise.
- list<Predicate> UnsupportedFeatures = [HasSVE];
+ list<Predicate> UnsupportedFeatures = SVEUnsupported.F;
}
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AArch64/AArch64SchedExynosM3.td b/lib/Target/AArch64/AArch64SchedExynosM3.td
index 15935088a17e..c9d29d75d9db 100644
--- a/lib/Target/AArch64/AArch64SchedExynosM3.td
+++ b/lib/Target/AArch64/AArch64SchedExynosM3.td
@@ -1,9 +1,8 @@
//=- AArch64SchedExynosM3.td - Samsung Exynos M3 Sched Defs --*- tablegen -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -25,7 +24,7 @@ def ExynosM3Model : SchedMachineModel {
let MispredictPenalty = 16; // Minimum branch misprediction penalty.
let CompleteModel = 1; // Use the default model otherwise.
- list<Predicate> UnsupportedFeatures = [HasSVE];
+ list<Predicate> UnsupportedFeatures = SVEUnsupported.F;
}
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AArch64/AArch64SchedExynosM4.td b/lib/Target/AArch64/AArch64SchedExynosM4.td
index 4d892465b3f2..c8bf05f16131 100644
--- a/lib/Target/AArch64/AArch64SchedExynosM4.td
+++ b/lib/Target/AArch64/AArch64SchedExynosM4.td
@@ -1,9 +1,8 @@
//=- AArch64SchedExynosM4.td - Samsung Exynos M4 Sched Defs --*- tablegen -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -25,7 +24,7 @@ def ExynosM4Model : SchedMachineModel {
let MispredictPenalty = 16; // Minimum branch misprediction penalty.
let CompleteModel = 1; // Use the default model otherwise.
- list<Predicate> UnsupportedFeatures = [HasSVE];
+ list<Predicate> UnsupportedFeatures = SVEUnsupported.F;
}
//===----------------------------------------------------------------------===//
@@ -239,7 +238,6 @@ def M4WriteNEONK : SchedWriteRes<[M4UnitNSHF,
M4UnitS0]> { let Latency = 5;
let NumMicroOps = 2; }
def M4WriteNEONL : SchedWriteRes<[M4UnitNMUL]> { let Latency = 3; }
-def M4WriteNEONM : SchedWriteRes<[M4UnitNMUL]> { let Latency = 3; }
def M4WriteNEONN : SchedWriteRes<[M4UnitNMSC,
M4UnitNMSC]> { let Latency = 5;
let NumMicroOps = 2; }
@@ -480,8 +478,6 @@ def M4WriteCOPY : SchedWriteVariant<[SchedVar<ExynosFPPred, [M4WriteNALU1]>,
SchedVar<NoSchedPred, [M4WriteZ0]>]>;
def M4WriteMOVI : SchedWriteVariant<[SchedVar<IsZeroFPIdiomPred, [M4WriteZ0]>,
SchedVar<NoSchedPred, [M4WriteNALU1]>]>;
-def M4WriteMULL : SchedWriteVariant<[SchedVar<ExynosLongVectorUpperPred, [M4WriteNEONM]>,
- SchedVar<NoSchedPred, [M4WriteNMUL3]>]>;
// Fast forwarding.
def M4ReadAESM1 : SchedReadAdvance<+1, [M4WriteNCRY1]>;
@@ -489,7 +485,8 @@ def M4ReadFMACM1 : SchedReadAdvance<+1, [M4WriteFMAC4,
M4WriteFMAC4H,
M4WriteFMAC5]>;
def M4ReadNMULM1 : SchedReadAdvance<+1, [M4WriteNMUL3]>;
-def M4ReadMULLP2 : SchedReadAdvance<-2, [M4WriteNEONM]>;
+def M4ReadNMULP2 : SchedReadAdvance<-2, [M4WriteNMUL3]>;
+
//===----------------------------------------------------------------------===//
// Coarse scheduling model.
@@ -662,10 +659,8 @@ def : InstRW<[M4WriteNEONK], (instregex "^FMOVDXHighr")>;
def : InstRW<[M4WriteFCVT3H], (instregex "^F(RECP|RSQRT)Ev1f16")>;
def : InstRW<[M4WriteFCVT3], (instregex "^F(RECP|RSQRT)Ev1i(32|64)")>;
def : InstRW<[M4WriteNMSC1], (instregex "^FRECPXv1")>;
-def : InstRW<[M4WriteFMAC4H,
- M4ReadFMACM1], (instregex "^F(RECP|RSQRT)S16")>;
-def : InstRW<[M4WriteFMAC4,
- M4ReadFMACM1], (instregex "^F(RECP|RSQRT)S(32|64)")>;
+def : InstRW<[M4WriteFMAC4H], (instregex "^F(RECP|RSQRT)S16")>;
+def : InstRW<[M4WriteFMAC4], (instregex "^F(RECP|RSQRT)S(32|64)")>;
// FP load instructions.
def : InstRW<[WriteVLD], (instregex "^LDR[SDQ]l")>;
@@ -736,14 +731,20 @@ def : InstRW<[M4WriteNALU1], (instregex "^(AND|BIC|EOR|NOT|ORN|ORR)v")>;
def : InstRW<[M4WriteNMSC1], (instregex "^[SU](MIN|MAX)v")>;
def : InstRW<[M4WriteNMSC2], (instregex "^[SU](MIN|MAX)Pv")>;
def : InstRW<[M4WriteNHAD3], (instregex "^[SU](MIN|MAX)Vv")>;
-def : InstRW<[M4WriteNMUL3], (instregex "^(SQR?D)?MULH?v")>;
def : InstRW<[M4WriteNMUL3,
M4ReadNMULM1], (instregex "^ML[AS]v")>;
-def : InstRW<[M4WriteNMUL3], (instregex "^SQRDML[AS]H")>;
-def : InstRW<[M4WriteMULL,
- M4ReadMULLP2], (instregex "^(S|U|SQD)ML[AS]Lv")>;
-def : InstRW<[M4WriteMULL,
- M4ReadMULLP2], (instregex "^(S|U|SQD)MULLv")>;
+def : InstRW<[M4WriteNMUL3,
+ M4ReadNMULM1], (instregex "^(SQR?D)?MULH?v")>;
+def : InstRW<[M4WriteNMUL3,
+ M4ReadNMULM1], (instregex "^SQRDML[AS]H")>;
+def : InstRW<[M4WriteNMUL3,
+ M4ReadNMULM1], (instregex "^(S|U|SQD)ML[AS]L(v1(i32|i64)|v2i32|v4i16|v8i8)")>;
+def : InstRW<[M4WriteNMUL3,
+ M4ReadNMULP2], (instregex "^(S|U|SQD)ML[AS]L(v4i32|v8i16|v16i8)")>;
+def : InstRW<[M4WriteNMUL3,
+ M4ReadNMULM1], (instregex "^(S|U|SQD)MULL(v1(i32|i64)|v2i32|v4i16|v8i8)")>;
+def : InstRW<[M4WriteNMUL3,
+ M4ReadNMULP2], (instregex "^(S|U|SQD)MULL(v4i32|v8i16|v16i8)")>;
def : InstRW<[M4WriteNMUL3], (instregex "^[SU]DOT(lane)?v")>;
def : InstRW<[M4WriteNHAD3], (instregex "^[SU]ADALPv")>;
def : InstRW<[M4WriteNSHT4A], (instregex "^[SU]R?SRA[dv]")>;
@@ -808,10 +809,8 @@ def : InstRW<[M4WriteNALU1], (instregex "^FMOVv.f(32|64)")>;
def : InstRW<[M4WriteFCVT3H], (instregex "^F(RECP|RSQRT)Ev[248]f16")>;
def : InstRW<[M4WriteFCVT3], (instregex "^F(RECP|RSQRT)Ev[248]f(32|64)")>;
def : InstRW<[M4WriteFCVT3], (instregex "^U(RECP|RSQRT)Ev[24]i32")>;
-def : InstRW<[M4WriteFMAC4H,
- M4ReadFMACM1], (instregex "^F(RECP|RSQRT)Sv.f16")>;
-def : InstRW<[M4WriteFMAC4,
- M4ReadFMACM1], (instregex "^F(RECP|RSQRT)Sv.f(32|64)")>;
+def : InstRW<[M4WriteFMAC4H], (instregex "^F(RECP|RSQRT)Sv.f16")>;
+def : InstRW<[M4WriteFMAC4], (instregex "^F(RECP|RSQRT)Sv.f(32|64)")>;
def : InstRW<[M4WriteNSHF1], (instregex "^REV(16|32|64)v")>;
def : InstRW<[M4WriteNSHFA], (instregex "^TB[LX]v(8|16)i8One")>;
def : InstRW<[M4WriteNSHFB], (instregex "^TB[LX]v(8|16)i8Two")>;
diff --git a/lib/Target/AArch64/AArch64SchedFalkor.td b/lib/Target/AArch64/AArch64SchedFalkor.td
index 84825458e47c..92d03963de57 100644
--- a/lib/Target/AArch64/AArch64SchedFalkor.td
+++ b/lib/Target/AArch64/AArch64SchedFalkor.td
@@ -1,9 +1,8 @@
//==- AArch64SchedFalkor.td - Falkor Scheduling Definitions -*- tablegen -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -24,7 +23,7 @@ def FalkorModel : SchedMachineModel {
let MispredictPenalty = 11; // Minimum branch misprediction penalty.
let CompleteModel = 1;
- list<Predicate> UnsupportedFeatures = [HasSVE];
+ list<Predicate> UnsupportedFeatures = SVEUnsupported.F;
// FIXME: Remove when all errors have been fixed.
let FullInstRWOverlapCheck = 0;
diff --git a/lib/Target/AArch64/AArch64SchedFalkorDetails.td b/lib/Target/AArch64/AArch64SchedFalkorDetails.td
index ff14e639d1a5..697a0f69c58c 100644
--- a/lib/Target/AArch64/AArch64SchedFalkorDetails.td
+++ b/lib/Target/AArch64/AArch64SchedFalkorDetails.td
@@ -1,9 +1,8 @@
//==- AArch64SchedFalkorDetails.td - Falkor Scheduling Defs -*- tablegen -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AArch64/AArch64SchedKryo.td b/lib/Target/AArch64/AArch64SchedKryo.td
index 68de3e077c96..0e1a24103121 100644
--- a/lib/Target/AArch64/AArch64SchedKryo.td
+++ b/lib/Target/AArch64/AArch64SchedKryo.td
@@ -1,9 +1,8 @@
//==- AArch64SchedKryo.td - Qualcomm Kryo Scheduling Defs ---*- tablegen -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -28,7 +27,7 @@ def KryoModel : SchedMachineModel {
let LoopMicroOpBufferSize = 16;
let CompleteModel = 1;
- list<Predicate> UnsupportedFeatures = [HasSVE];
+ list<Predicate> UnsupportedFeatures = SVEUnsupported.F;
// FIXME: Remove when all errors have been fixed.
let FullInstRWOverlapCheck = 0;
diff --git a/lib/Target/AArch64/AArch64SchedKryoDetails.td b/lib/Target/AArch64/AArch64SchedKryoDetails.td
index cf4cdabb8cbf..4c60992e6351 100644
--- a/lib/Target/AArch64/AArch64SchedKryoDetails.td
+++ b/lib/Target/AArch64/AArch64SchedKryoDetails.td
@@ -1,9 +1,8 @@
//=- AArch64SchedKryoDetails.td - QC Kryo Scheduling Defs ----*- tablegen -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AArch64/AArch64SchedPredExynos.td b/lib/Target/AArch64/AArch64SchedPredExynos.td
index 48c54230e9d8..0c1d82d354c0 100644
--- a/lib/Target/AArch64/AArch64SchedPredExynos.td
+++ b/lib/Target/AArch64/AArch64SchedPredExynos.td
@@ -1,9 +1,8 @@
//===- AArch64SchedPredExynos.td - AArch64 Sched Preds -----*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -103,17 +102,6 @@ def ExynosScaledIdxPred : MCSchedPredicate<ExynosScaledIdxFn>;
// Identify FP instructions.
def ExynosFPPred : MCSchedPredicate<CheckAny<[CheckDForm, CheckQForm]>>;
-// Identify whether an instruction whose result is a long vector
-// operates on the upper half of the input registers.
-def ExynosLongVectorUpperFn : TIIPredicate<
- "isExynosLongVectorUpper",
- MCOpcodeSwitchStatement<
- [MCOpcodeSwitchCase<
- IsLongVectorUpperOp.ValidOpcodes,
- MCReturnStatement<TruePred>>],
- MCReturnStatement<FalsePred>>>;
-def ExynosLongVectorUpperPred : MCSchedPredicate<ExynosLongVectorUpperFn>;
-
// Identify 128-bit NEON instructions.
def ExynosQFormPred : MCSchedPredicate<CheckQForm>;
diff --git a/lib/Target/AArch64/AArch64SchedPredicates.td b/lib/Target/AArch64/AArch64SchedPredicates.td
index dbaf11fc95dd..0ef0f3f8675a 100644
--- a/lib/Target/AArch64/AArch64SchedPredicates.td
+++ b/lib/Target/AArch64/AArch64SchedPredicates.td
@@ -1,9 +1,8 @@
//===- AArch64SchedPredicates.td - AArch64 Sched Preds -----*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -268,59 +267,6 @@ def IsStoreRegOffsetOp : CheckOpcode<[STRBBroW, STRBBroX,
def IsLoadStoreRegOffsetOp : CheckOpcode<!listconcat(IsLoadRegOffsetOp.ValidOpcodes,
IsStoreRegOffsetOp.ValidOpcodes)>;
-// Identify whether an instruction whose result is a long vector
-// operates on the upper half of the input registers.
-def IsLongVectorUpperOp : CheckOpcode<[FCVTLv8i16, FCVTLv4i32,
- FCVTNv8i16, FCVTNv4i32,
- FCVTXNv4f32,
- PMULLv16i8, PMULLv2i64,
- RADDHNv8i16_v16i8, RADDHNv4i32_v8i16, RADDHNv2i64_v4i32,
- RSHRNv16i8_shift, RSHRNv8i16_shift, RSHRNv4i32_shift,
- RSUBHNv8i16_v16i8, RSUBHNv4i32_v8i16, RSUBHNv2i64_v4i32,
- SABALv16i8_v8i16, SABALv8i16_v4i32, SABALv4i32_v2i64,
- SABDLv16i8_v8i16, SABDLv8i16_v4i32, SABDLv4i32_v2i64,
- SADDLv16i8_v8i16, SADDLv8i16_v4i32, SADDLv4i32_v2i64,
- SADDWv16i8_v8i16, SADDWv8i16_v4i32, SADDWv4i32_v2i64,
- SHLLv16i8, SHLLv8i16, SHLLv4i32,
- SHRNv16i8_shift, SHRNv8i16_shift, SHRNv4i32_shift,
- SMLALv16i8_v8i16, SMLALv8i16_v4i32, SMLALv4i32_v2i64,
- SMLALv8i16_indexed, SMLALv4i32_indexed,
- SMLSLv16i8_v8i16, SMLSLv8i16_v4i32, SMLSLv4i32_v2i64,
- SMLSLv8i16_indexed, SMLSLv4i32_indexed,
- SMULLv16i8_v8i16, SMULLv8i16_v4i32, SMULLv4i32_v2i64,
- SMULLv8i16_indexed, SMULLv4i32_indexed,
- SQDMLALv8i16_v4i32, SQDMLALv4i32_v2i64,
- SQDMLALv8i16_indexed, SQDMLALv4i32_indexed,
- SQDMLSLv8i16_v4i32, SQDMLSLv4i32_v2i64,
- SQDMLSLv8i16_indexed, SQDMLSLv4i32_indexed,
- SQDMULLv8i16_v4i32, SQDMULLv4i32_v2i64,
- SQDMULLv8i16_indexed, SQDMULLv4i32_indexed,
- SQRSHRNv16i8_shift, SQRSHRNv8i16_shift, SQRSHRNv4i32_shift,
- SQRSHRUNv16i8_shift, SQRSHRUNv8i16_shift, SQRSHRUNv4i32_shift,
- SQSHRNv16i8_shift, SQSHRNv8i16_shift, SQSHRNv4i32_shift,
- SQSHRUNv16i8_shift, SQSHRUNv8i16_shift, SQSHRUNv4i32_shift,
- SQXTNv16i8, SQXTNv8i16, SQXTNv4i32,
- SQXTUNv16i8, SQXTUNv8i16, SQXTUNv4i32,
- SSHLLv16i8_shift, SSHLLv8i16_shift, SSHLLv4i32_shift,
- SSUBLv16i8_v8i16, SSUBLv8i16_v4i32, SSUBLv4i32_v2i64,
- SSUBWv16i8_v8i16, SSUBWv8i16_v4i32, SSUBWv4i32_v2i64,
- UABALv16i8_v8i16, UABALv8i16_v4i32, UABALv4i32_v2i64,
- UABDLv16i8_v8i16, UABDLv8i16_v4i32, UABDLv4i32_v2i64,
- UADDLv16i8_v8i16, UADDLv8i16_v4i32, UADDLv4i32_v2i64,
- UADDWv16i8_v8i16, UADDWv8i16_v4i32, UADDWv4i32_v2i64,
- UMLALv16i8_v8i16, UMLALv8i16_v4i32, UMLALv4i32_v2i64,
- UMLALv8i16_indexed, UMLALv4i32_indexed,
- UMLSLv16i8_v8i16, UMLSLv8i16_v4i32, UMLSLv4i32_v2i64,
- UMLSLv8i16_indexed, UMLSLv4i32_indexed,
- UMULLv16i8_v8i16, UMULLv8i16_v4i32, UMULLv4i32_v2i64,
- UMULLv8i16_indexed, UMULLv4i32_indexed,
- UQSHRNv16i8_shift, UQSHRNv8i16_shift, UQSHRNv4i32_shift,
- UQXTNv16i8, UQXTNv8i16, UQXTNv4i32,
- USHLLv16i8_shift, USHLLv8i16_shift, USHLLv4i32_shift,
- USUBLv16i8_v8i16, USUBLv8i16_v4i32, USUBLv4i32_v2i64,
- USUBWv16i8_v8i16, USUBWv8i16_v4i32, USUBWv4i32_v2i64,
- XTNv16i8, XTNv8i16, XTNv4i32]>;
-
// Target predicates.
// Identify an instruction that effectively transfers a register to another.
diff --git a/lib/Target/AArch64/AArch64SchedThunderX.td b/lib/Target/AArch64/AArch64SchedThunderX.td
index fbbd3850d0fd..3b6aecf5c035 100644
--- a/lib/Target/AArch64/AArch64SchedThunderX.td
+++ b/lib/Target/AArch64/AArch64SchedThunderX.td
@@ -1,9 +1,8 @@
//==- AArch64SchedThunderX.td - Cavium ThunderX T8X Scheduling Definitions -*- tablegen -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -26,7 +25,7 @@ def ThunderXT8XModel : SchedMachineModel {
let PostRAScheduler = 1; // Use PostRA scheduler.
let CompleteModel = 1;
- list<Predicate> UnsupportedFeatures = [HasSVE];
+ list<Predicate> UnsupportedFeatures = SVEUnsupported.F;
// FIXME: Remove when all errors have been fixed.
let FullInstRWOverlapCheck = 0;
diff --git a/lib/Target/AArch64/AArch64SchedThunderX2T99.td b/lib/Target/AArch64/AArch64SchedThunderX2T99.td
index bee3392b6d3b..674ea19b082f 100644
--- a/lib/Target/AArch64/AArch64SchedThunderX2T99.td
+++ b/lib/Target/AArch64/AArch64SchedThunderX2T99.td
@@ -1,9 +1,8 @@
//=- AArch64SchedThunderX2T99.td - Cavium ThunderX T99 ---*- tablegen -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -26,7 +25,7 @@ def ThunderX2T99Model : SchedMachineModel {
let PostRAScheduler = 1; // Using PostRA sched.
let CompleteModel = 1;
- list<Predicate> UnsupportedFeatures = [HasSVE];
+ list<Predicate> UnsupportedFeatures = SVEUnsupported.F;
// FIXME: Remove when all errors have been fixed.
let FullInstRWOverlapCheck = 0;
diff --git a/lib/Target/AArch64/AArch64Schedule.td b/lib/Target/AArch64/AArch64Schedule.td
index f55ba4d42fce..49c0c1782236 100644
--- a/lib/Target/AArch64/AArch64Schedule.td
+++ b/lib/Target/AArch64/AArch64Schedule.td
@@ -1,9 +1,8 @@
//==-- AArch64Schedule.td - AArch64 Scheduling Definitions -*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
index a719d47618e5..60dbace03ca6 100644
--- a/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
+++ b/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
@@ -1,9 +1,8 @@
//===-- AArch64SelectionDAGInfo.cpp - AArch64 SelectionDAG Info -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -57,3 +56,91 @@ bool AArch64SelectionDAGInfo::generateFMAsInMachineCombiner(
CodeGenOpt::Level OptLevel) const {
return OptLevel >= CodeGenOpt::Aggressive;
}
+
+static const int kSetTagLoopThreshold = 176;
+
+static SDValue EmitUnrolledSetTag(SelectionDAG &DAG, const SDLoc &dl,
+ SDValue Chain, SDValue Ptr, uint64_t ObjSize,
+ const MachineMemOperand *BaseMemOperand,
+ bool ZeroData) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ unsigned ObjSizeScaled = ObjSize / 16;
+
+ SDValue TagSrc = Ptr;
+ if (Ptr.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Ptr)->getIndex();
+ Ptr = DAG.getTargetFrameIndex(FI, MVT::i64);
+ // A frame index operand may end up as [SP + offset] => it is fine to use SP
+ // register as the tag source.
+ TagSrc = DAG.getRegister(AArch64::SP, MVT::i64);
+ }
+
+ const unsigned OpCode1 = ZeroData ? AArch64ISD::STZG : AArch64ISD::STG;
+ const unsigned OpCode2 = ZeroData ? AArch64ISD::STZ2G : AArch64ISD::ST2G;
+
+ SmallVector<SDValue, 8> OutChains;
+ unsigned OffsetScaled = 0;
+ while (OffsetScaled < ObjSizeScaled) {
+ if (ObjSizeScaled - OffsetScaled >= 2) {
+ SDValue AddrNode = DAG.getMemBasePlusOffset(Ptr, OffsetScaled * 16, dl);
+ SDValue St = DAG.getMemIntrinsicNode(
+ OpCode2, dl, DAG.getVTList(MVT::Other),
+ {Chain, TagSrc, AddrNode},
+ MVT::v4i64,
+ MF.getMachineMemOperand(BaseMemOperand, OffsetScaled * 16, 16 * 2));
+ OffsetScaled += 2;
+ OutChains.push_back(St);
+ continue;
+ }
+
+ if (ObjSizeScaled - OffsetScaled > 0) {
+ SDValue AddrNode = DAG.getMemBasePlusOffset(Ptr, OffsetScaled * 16, dl);
+ SDValue St = DAG.getMemIntrinsicNode(
+ OpCode1, dl, DAG.getVTList(MVT::Other),
+ {Chain, TagSrc, AddrNode},
+ MVT::v2i64,
+ MF.getMachineMemOperand(BaseMemOperand, OffsetScaled * 16, 16));
+ OffsetScaled += 1;
+ OutChains.push_back(St);
+ }
+ }
+
+ SDValue Res = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
+ return Res;
+}
+
+SDValue AArch64SelectionDAGInfo::EmitTargetCodeForSetTag(
+ SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Addr,
+ SDValue Size, MachinePointerInfo DstPtrInfo, bool ZeroData) const {
+ uint64_t ObjSize = cast<ConstantSDNode>(Size)->getZExtValue();
+ assert(ObjSize % 16 == 0);
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineMemOperand *BaseMemOperand = MF.getMachineMemOperand(
+ DstPtrInfo, MachineMemOperand::MOStore, ObjSize, 16);
+
+ bool UseSetTagRangeLoop =
+ kSetTagLoopThreshold >= 0 && (int)ObjSize >= kSetTagLoopThreshold;
+ if (!UseSetTagRangeLoop)
+ return EmitUnrolledSetTag(DAG, dl, Chain, Addr, ObjSize, BaseMemOperand,
+ ZeroData);
+
+ if (ObjSize % 32 != 0) {
+ SDNode *St1 = DAG.getMachineNode(
+ ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex, dl,
+ {MVT::i64, MVT::Other},
+ {Addr, Addr, DAG.getTargetConstant(1, dl, MVT::i64), Chain});
+ DAG.setNodeMemRefs(cast<MachineSDNode>(St1), {BaseMemOperand});
+ ObjSize -= 16;
+ Addr = SDValue(St1, 0);
+ Chain = SDValue(St1, 1);
+ }
+
+ const EVT ResTys[] = {MVT::i64, MVT::i64, MVT::Other};
+ SDValue Ops[] = {DAG.getConstant(ObjSize, dl, MVT::i64), Addr, Chain};
+ SDNode *St = DAG.getMachineNode(
+ ZeroData ? AArch64::STZGloop : AArch64::STGloop, dl, ResTys, Ops);
+
+ DAG.setNodeMemRefs(cast<MachineSDNode>(St), {BaseMemOperand});
+ return SDValue(St, 2);
+}
diff --git a/lib/Target/AArch64/AArch64SelectionDAGInfo.h b/lib/Target/AArch64/AArch64SelectionDAGInfo.h
index 7e4f11091226..d0967fb973cc 100644
--- a/lib/Target/AArch64/AArch64SelectionDAGInfo.h
+++ b/lib/Target/AArch64/AArch64SelectionDAGInfo.h
@@ -1,9 +1,8 @@
//===-- AArch64SelectionDAGInfo.h - AArch64 SelectionDAG Info ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -24,6 +23,10 @@ public:
SDValue Chain, SDValue Dst, SDValue Src,
SDValue Size, unsigned Align, bool isVolatile,
MachinePointerInfo DstPtrInfo) const override;
+ SDValue EmitTargetCodeForSetTag(SelectionDAG &DAG, const SDLoc &dl,
+ SDValue Chain, SDValue Op1, SDValue Op2,
+ MachinePointerInfo DstPtrInfo,
+ bool ZeroData) const override;
bool generateFMAsInMachineCombiner(CodeGenOpt::Level OptLevel) const override;
};
}
diff --git a/lib/Target/AArch64/AArch64SpeculationHardening.cpp b/lib/Target/AArch64/AArch64SpeculationHardening.cpp
index e9699b0367d3..3087e6ce441d 100644
--- a/lib/Target/AArch64/AArch64SpeculationHardening.cpp
+++ b/lib/Target/AArch64/AArch64SpeculationHardening.cpp
@@ -1,9 +1,8 @@
//===- AArch64SpeculationHardening.cpp - Harden Against Missspeculation --===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -103,6 +102,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/Pass.h"
#include "llvm/Support/CodeGen.h"
@@ -146,25 +146,31 @@ private:
BitVector RegsAlreadyMasked;
bool functionUsesHardeningRegister(MachineFunction &MF) const;
- bool instrumentControlFlow(MachineBasicBlock &MBB);
+ bool instrumentControlFlow(MachineBasicBlock &MBB,
+ bool &UsesFullSpeculationBarrier);
bool endsWithCondControlFlow(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
AArch64CC::CondCode &CondCode) const;
void insertTrackingCode(MachineBasicBlock &SplitEdgeBB,
AArch64CC::CondCode &CondCode, DebugLoc DL) const;
- void insertSPToRegTaintPropagation(MachineBasicBlock *MBB,
+ void insertSPToRegTaintPropagation(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI) const;
- void insertRegToSPTaintPropagation(MachineBasicBlock *MBB,
+ void insertRegToSPTaintPropagation(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned TmpReg) const;
+ void insertFullSpeculationBarrier(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ DebugLoc DL) const;
bool slhLoads(MachineBasicBlock &MBB);
bool makeGPRSpeculationSafe(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
MachineInstr &MI, unsigned Reg);
- bool lowerSpeculationSafeValuePseudos(MachineBasicBlock &MBB);
+ bool lowerSpeculationSafeValuePseudos(MachineBasicBlock &MBB,
+ bool UsesFullSpeculationBarrier);
bool expandSpeculationSafeValue(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI);
+ MachineBasicBlock::iterator MBBI,
+ bool UsesFullSpeculationBarrier);
bool insertCSDB(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
DebugLoc DL);
};
@@ -207,15 +213,19 @@ bool AArch64SpeculationHardening::endsWithCondControlFlow(
return true;
}
+void AArch64SpeculationHardening::insertFullSpeculationBarrier(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ DebugLoc DL) const {
+ // A full control flow speculation barrier consists of (DSB SYS + ISB)
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::DSB)).addImm(0xf);
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::ISB)).addImm(0xf);
+}
+
void AArch64SpeculationHardening::insertTrackingCode(
MachineBasicBlock &SplitEdgeBB, AArch64CC::CondCode &CondCode,
DebugLoc DL) const {
if (UseControlFlowSpeculationBarrier) {
- // insert full control flow speculation barrier (DSB SYS + ISB)
- BuildMI(SplitEdgeBB, SplitEdgeBB.begin(), DL, TII->get(AArch64::ISB))
- .addImm(0xf);
- BuildMI(SplitEdgeBB, SplitEdgeBB.begin(), DL, TII->get(AArch64::DSB))
- .addImm(0xf);
+ insertFullSpeculationBarrier(SplitEdgeBB, SplitEdgeBB.begin(), DL);
} else {
BuildMI(SplitEdgeBB, SplitEdgeBB.begin(), DL, TII->get(AArch64::CSELXr))
.addDef(MisspeculatingTaintReg)
@@ -227,7 +237,7 @@ void AArch64SpeculationHardening::insertTrackingCode(
}
bool AArch64SpeculationHardening::instrumentControlFlow(
- MachineBasicBlock &MBB) {
+ MachineBasicBlock &MBB, bool &UsesFullSpeculationBarrier) {
LLVM_DEBUG(dbgs() << "Instrument control flow tracking on MBB: " << MBB);
bool Modified = false;
@@ -263,55 +273,105 @@ bool AArch64SpeculationHardening::instrumentControlFlow(
}
// Perform correct code generation around function calls and before returns.
- {
- SmallVector<MachineInstr *, 4> ReturnInstructions;
- SmallVector<MachineInstr *, 4> CallInstructions;
+ // The below variables record the return/terminator instructions and the call
+ // instructions respectively; including which register is available as a
+ // temporary register just before the recorded instructions.
+ SmallVector<std::pair<MachineInstr *, unsigned>, 4> ReturnInstructions;
+ SmallVector<std::pair<MachineInstr *, unsigned>, 4> CallInstructions;
+ // if a temporary register is not available for at least one of the
+ // instructions for which we need to transfer taint to the stack pointer, we
+ // need to insert a full speculation barrier.
+ // TmpRegisterNotAvailableEverywhere tracks that condition.
+ bool TmpRegisterNotAvailableEverywhere = false;
+
+ RegScavenger RS;
+ RS.enterBasicBlock(MBB);
+
+ for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); I++) {
+ MachineInstr &MI = *I;
+ if (!MI.isReturn() && !MI.isCall())
+ continue;
- for (MachineInstr &MI : MBB) {
- if (MI.isReturn())
- ReturnInstructions.push_back(&MI);
- else if (MI.isCall())
- CallInstructions.push_back(&MI);
- }
+ // The RegScavenger represents registers available *after* the MI
+ // instruction pointed to by RS.getCurrentPosition().
+ // We need to have a register that is available *before* the MI is executed.
+ if (I != MBB.begin())
+ RS.forward(std::prev(I));
+ // FIXME: The below just finds *a* unused register. Maybe code could be
+ // optimized more if this looks for the register that isn't used for the
+ // longest time around this place, to enable more scheduling freedom. Not
+ // sure if that would actually result in a big performance difference
+ // though. Maybe RegisterScavenger::findSurvivorBackwards has some logic
+ // already to do this - but it's unclear if that could easily be used here.
+ unsigned TmpReg = RS.FindUnusedReg(&AArch64::GPR64commonRegClass);
+ LLVM_DEBUG(dbgs() << "RS finds "
+ << ((TmpReg == 0) ? "no register " : "register ");
+ if (TmpReg != 0) dbgs() << printReg(TmpReg, TRI) << " ";
+ dbgs() << "to be available at MI " << MI);
+ if (TmpReg == 0)
+ TmpRegisterNotAvailableEverywhere = true;
+ if (MI.isReturn())
+ ReturnInstructions.push_back({&MI, TmpReg});
+ else if (MI.isCall())
+ CallInstructions.push_back({&MI, TmpReg});
+ }
- Modified |=
- (ReturnInstructions.size() > 0) || (CallInstructions.size() > 0);
+ if (TmpRegisterNotAvailableEverywhere) {
+ // When a temporary register is not available everywhere in this basic
+ // basic block where a propagate-taint-to-sp operation is needed, just
+ // emit a full speculation barrier at the start of this basic block, which
+ // renders the taint/speculation tracking in this basic block unnecessary.
+ insertFullSpeculationBarrier(MBB, MBB.begin(),
+ (MBB.begin())->getDebugLoc());
+ UsesFullSpeculationBarrier = true;
+ Modified = true;
+ } else {
+ for (auto MI_Reg : ReturnInstructions) {
+ assert(MI_Reg.second != 0);
+ LLVM_DEBUG(
+ dbgs()
+ << " About to insert Reg to SP taint propagation with temp register "
+ << printReg(MI_Reg.second, TRI)
+ << " on instruction: " << *MI_Reg.first);
+ insertRegToSPTaintPropagation(MBB, MI_Reg.first, MI_Reg.second);
+ Modified = true;
+ }
- for (MachineInstr *Return : ReturnInstructions)
- insertRegToSPTaintPropagation(Return->getParent(), Return, AArch64::X17);
- for (MachineInstr *Call : CallInstructions) {
+ for (auto MI_Reg : CallInstructions) {
+ assert(MI_Reg.second != 0);
+ LLVM_DEBUG(dbgs() << " About to insert Reg to SP and back taint "
+ "propagation with temp register "
+ << printReg(MI_Reg.second, TRI)
+ << " around instruction: " << *MI_Reg.first);
// Just after the call:
- MachineBasicBlock::iterator i = Call;
- i++;
- insertSPToRegTaintPropagation(Call->getParent(), i);
+ insertSPToRegTaintPropagation(
+ MBB, std::next((MachineBasicBlock::iterator)MI_Reg.first));
// Just before the call:
- insertRegToSPTaintPropagation(Call->getParent(), Call, AArch64::X17);
+ insertRegToSPTaintPropagation(MBB, MI_Reg.first, MI_Reg.second);
+ Modified = true;
}
}
-
return Modified;
}
void AArch64SpeculationHardening::insertSPToRegTaintPropagation(
- MachineBasicBlock *MBB, MachineBasicBlock::iterator MBBI) const {
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
// If full control flow speculation barriers are used, emit a control flow
// barrier to block potential miss-speculation in flight coming in to this
// function.
if (UseControlFlowSpeculationBarrier) {
- // insert full control flow speculation barrier (DSB SYS + ISB)
- BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::DSB)).addImm(0xf);
- BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::ISB)).addImm(0xf);
+ insertFullSpeculationBarrier(MBB, MBBI, DebugLoc());
return;
}
// CMP SP, #0 === SUBS xzr, SP, #0
- BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::SUBSXri))
+ BuildMI(MBB, MBBI, DebugLoc(), TII->get(AArch64::SUBSXri))
.addDef(AArch64::XZR)
.addUse(AArch64::SP)
.addImm(0)
.addImm(0); // no shift
// CSETM x16, NE === CSINV x16, xzr, xzr, EQ
- BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::CSINVXr))
+ BuildMI(MBB, MBBI, DebugLoc(), TII->get(AArch64::CSINVXr))
.addDef(MisspeculatingTaintReg)
.addUse(AArch64::XZR)
.addUse(AArch64::XZR)
@@ -319,7 +379,7 @@ void AArch64SpeculationHardening::insertSPToRegTaintPropagation(
}
void AArch64SpeculationHardening::insertRegToSPTaintPropagation(
- MachineBasicBlock *MBB, MachineBasicBlock::iterator MBBI,
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
unsigned TmpReg) const {
// If full control flow speculation barriers are used, there will not be
// miss-speculation when returning from this function, and therefore, also
@@ -328,19 +388,19 @@ void AArch64SpeculationHardening::insertRegToSPTaintPropagation(
return;
// mov Xtmp, SP === ADD Xtmp, SP, #0
- BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::ADDXri))
+ BuildMI(MBB, MBBI, DebugLoc(), TII->get(AArch64::ADDXri))
.addDef(TmpReg)
.addUse(AArch64::SP)
.addImm(0)
.addImm(0); // no shift
// and Xtmp, Xtmp, TaintReg === AND Xtmp, Xtmp, TaintReg, #0
- BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::ANDXrs))
+ BuildMI(MBB, MBBI, DebugLoc(), TII->get(AArch64::ANDXrs))
.addDef(TmpReg, RegState::Renamable)
.addUse(TmpReg, RegState::Kill | RegState::Renamable)
.addUse(MisspeculatingTaintReg, RegState::Kill)
.addImm(0);
// mov SP, Xtmp === ADD SP, Xtmp, #0
- BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::ADDXri))
+ BuildMI(MBB, MBBI, DebugLoc(), TII->get(AArch64::ADDXri))
.addDef(AArch64::SP)
.addUse(TmpReg, RegState::Kill)
.addImm(0)
@@ -484,7 +544,8 @@ bool AArch64SpeculationHardening::slhLoads(MachineBasicBlock &MBB) {
/// \brief If MBBI references a pseudo instruction that should be expanded
/// here, do the expansion and return true. Otherwise return false.
bool AArch64SpeculationHardening::expandSpeculationSafeValue(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ bool UsesFullSpeculationBarrier) {
MachineInstr &MI = *MBBI;
unsigned Opcode = MI.getOpcode();
bool Is64Bit = true;
@@ -499,7 +560,7 @@ bool AArch64SpeculationHardening::expandSpeculationSafeValue(
// Just remove the SpeculationSafe pseudo's if control flow
// miss-speculation isn't happening because we're already inserting barriers
// to guarantee that.
- if (!UseControlFlowSpeculationBarrier) {
+ if (!UseControlFlowSpeculationBarrier && !UsesFullSpeculationBarrier) {
unsigned DstReg = MI.getOperand(0).getReg();
unsigned SrcReg = MI.getOperand(1).getReg();
// Mark this register and all its aliasing registers as needing to be
@@ -537,7 +598,7 @@ bool AArch64SpeculationHardening::insertCSDB(MachineBasicBlock &MBB,
}
bool AArch64SpeculationHardening::lowerSpeculationSafeValuePseudos(
- MachineBasicBlock &MBB) {
+ MachineBasicBlock &MBB, bool UsesFullSpeculationBarrier) {
bool Modified = false;
RegsNeedingCSDBBeforeUse.reset();
@@ -572,15 +633,16 @@ bool AArch64SpeculationHardening::lowerSpeculationSafeValuePseudos(
break;
}
- if (NeedToEmitBarrier)
+ if (NeedToEmitBarrier && !UsesFullSpeculationBarrier)
Modified |= insertCSDB(MBB, MBBI, DL);
- Modified |= expandSpeculationSafeValue(MBB, MBBI);
+ Modified |=
+ expandSpeculationSafeValue(MBB, MBBI, UsesFullSpeculationBarrier);
MBBI = NMBBI;
}
- if (RegsNeedingCSDBBeforeUse.any())
+ if (RegsNeedingCSDBBeforeUse.any() && !UsesFullSpeculationBarrier)
Modified |= insertCSDB(MBB, MBBI, DL);
return Modified;
@@ -609,7 +671,7 @@ bool AArch64SpeculationHardening::runOnMachineFunction(MachineFunction &MF) {
Modified |= slhLoads(MBB);
}
- // 2.a Add instrumentation code to function entry and exits.
+ // 2. Add instrumentation code to function entry and exits.
LLVM_DEBUG(
dbgs()
<< "***** AArch64SpeculationHardening - track control flow *****\n");
@@ -620,17 +682,15 @@ bool AArch64SpeculationHardening::runOnMachineFunction(MachineFunction &MF) {
EntryBlocks.push_back(LPI.LandingPadBlock);
for (auto Entry : EntryBlocks)
insertSPToRegTaintPropagation(
- Entry, Entry->SkipPHIsLabelsAndDebug(Entry->begin()));
-
- // 2.b Add instrumentation code to every basic block.
- for (auto &MBB : MF)
- Modified |= instrumentControlFlow(MBB);
+ *Entry, Entry->SkipPHIsLabelsAndDebug(Entry->begin()));
- LLVM_DEBUG(dbgs() << "***** AArch64SpeculationHardening - Lowering "
- "SpeculationSafeValue Pseudos *****\n");
- // Step 3: Lower SpeculationSafeValue pseudo instructions.
- for (auto &MBB : MF)
- Modified |= lowerSpeculationSafeValuePseudos(MBB);
+ // 3. Add instrumentation code to every basic block.
+ for (auto &MBB : MF) {
+ bool UsesFullSpeculationBarrier = false;
+ Modified |= instrumentControlFlow(MBB, UsesFullSpeculationBarrier);
+ Modified |=
+ lowerSpeculationSafeValuePseudos(MBB, UsesFullSpeculationBarrier);
+ }
return Modified;
}
diff --git a/lib/Target/AArch64/AArch64StackTagging.cpp b/lib/Target/AArch64/AArch64StackTagging.cpp
new file mode 100644
index 000000000000..6e99c48bf1d7
--- /dev/null
+++ b/lib/Target/AArch64/AArch64StackTagging.cpp
@@ -0,0 +1,345 @@
+//===- AArch64StackTagging.cpp - Stack tagging in IR --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64.h"
+#include "AArch64InstrInfo.h"
+#include "AArch64Subtarget.h"
+#include "AArch64TargetMachine.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/LiveRegUnits.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <cassert>
+#include <iterator>
+#include <utility>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "stack-tagging"
+
+static constexpr unsigned kTagGranuleSize = 16;
+
+namespace {
+class AArch64StackTagging : public FunctionPass {
+ struct AllocaInfo {
+ AllocaInst *AI;
+ SmallVector<IntrinsicInst *, 2> LifetimeStart;
+ SmallVector<IntrinsicInst *, 2> LifetimeEnd;
+ SmallVector<DbgVariableIntrinsic *, 2> DbgVariableIntrinsics;
+ int Tag; // -1 for non-tagged allocations
+ };
+
+public:
+ static char ID; // Pass ID, replacement for typeid
+
+ AArch64StackTagging() : FunctionPass(ID) {
+ initializeAArch64StackTaggingPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool isInterestingAlloca(const AllocaInst &AI);
+ void alignAndPadAlloca(AllocaInfo &Info);
+
+ void tagAlloca(AllocaInst *AI, Instruction *InsertBefore, Value *Ptr,
+ uint64_t Size);
+ void untagAlloca(AllocaInst *AI, Instruction *InsertBefore, uint64_t Size);
+
+ Instruction *
+ insertBaseTaggedPointer(const MapVector<AllocaInst *, AllocaInfo> &Allocas,
+ const DominatorTree *DT);
+ bool runOnFunction(Function &F) override;
+
+ StringRef getPassName() const override { return "AArch64 Stack Tagging"; }
+
+private:
+ Function *F;
+ Function *SetTagFunc;
+ const DataLayout *DL;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ }
+};
+
+} // end anonymous namespace
+
+char AArch64StackTagging::ID = 0;
+
+INITIALIZE_PASS_BEGIN(AArch64StackTagging, DEBUG_TYPE, "AArch64 Stack Tagging",
+ false, false)
+INITIALIZE_PASS_END(AArch64StackTagging, DEBUG_TYPE, "AArch64 Stack Tagging",
+ false, false)
+
+FunctionPass *llvm::createAArch64StackTaggingPass() {
+ return new AArch64StackTagging();
+}
+
+bool AArch64StackTagging::isInterestingAlloca(const AllocaInst &AI) {
+ // FIXME: support dynamic allocas
+ bool IsInteresting =
+ AI.getAllocatedType()->isSized() && AI.isStaticAlloca() &&
+ // alloca() may be called with 0 size, ignore it.
+ AI.getAllocationSizeInBits(*DL).getValue() > 0 &&
+ // inalloca allocas are not treated as static, and we don't want
+ // dynamic alloca instrumentation for them as well.
+ !AI.isUsedWithInAlloca() &&
+ // swifterror allocas are register promoted by ISel
+ !AI.isSwiftError();
+ return IsInteresting;
+}
+
+void AArch64StackTagging::tagAlloca(AllocaInst *AI, Instruction *InsertBefore,
+ Value *Ptr, uint64_t Size) {
+ IRBuilder<> IRB(InsertBefore);
+ IRB.CreateCall(SetTagFunc, {Ptr, ConstantInt::get(IRB.getInt64Ty(), Size)});
+}
+
+void AArch64StackTagging::untagAlloca(AllocaInst *AI, Instruction *InsertBefore,
+ uint64_t Size) {
+ IRBuilder<> IRB(InsertBefore);
+ IRB.CreateCall(SetTagFunc, {IRB.CreatePointerCast(AI, IRB.getInt8PtrTy()),
+ ConstantInt::get(IRB.getInt64Ty(), Size)});
+}
+
+Instruction *AArch64StackTagging::insertBaseTaggedPointer(
+ const MapVector<AllocaInst *, AllocaInfo> &Allocas,
+ const DominatorTree *DT) {
+ BasicBlock *PrologueBB = nullptr;
+ // Try sinking IRG as deep as possible to avoid hurting shrink wrap.
+ for (auto &I : Allocas) {
+ const AllocaInfo &Info = I.second;
+ AllocaInst *AI = Info.AI;
+ if (Info.Tag < 0)
+ continue;
+ if (!PrologueBB) {
+ PrologueBB = AI->getParent();
+ continue;
+ }
+ PrologueBB = DT->findNearestCommonDominator(PrologueBB, AI->getParent());
+ }
+ assert(PrologueBB);
+
+ IRBuilder<> IRB(&PrologueBB->front());
+ Function *IRG_SP =
+ Intrinsic::getDeclaration(F->getParent(), Intrinsic::aarch64_irg_sp);
+ Instruction *Base =
+ IRB.CreateCall(IRG_SP, {Constant::getNullValue(IRB.getInt64Ty())});
+ Base->setName("basetag");
+ return Base;
+}
+
+void AArch64StackTagging::alignAndPadAlloca(AllocaInfo &Info) {
+ unsigned NewAlignment = std::max(Info.AI->getAlignment(), kTagGranuleSize);
+ Info.AI->setAlignment(NewAlignment);
+
+ uint64_t Size = Info.AI->getAllocationSizeInBits(*DL).getValue() / 8;
+ uint64_t AlignedSize = alignTo(Size, kTagGranuleSize);
+ if (Size == AlignedSize)
+ return;
+
+ // Add padding to the alloca.
+ Type *AllocatedType =
+ Info.AI->isArrayAllocation()
+ ? ArrayType::get(
+ Info.AI->getAllocatedType(),
+ dyn_cast<ConstantInt>(Info.AI->getArraySize())->getZExtValue())
+ : Info.AI->getAllocatedType();
+ Type *PaddingType =
+ ArrayType::get(Type::getInt8Ty(F->getContext()), AlignedSize - Size);
+ Type *TypeWithPadding = StructType::get(AllocatedType, PaddingType);
+ auto *NewAI = new AllocaInst(
+ TypeWithPadding, Info.AI->getType()->getAddressSpace(), nullptr, "", Info.AI);
+ NewAI->takeName(Info.AI);
+ NewAI->setAlignment(Info.AI->getAlignment());
+ NewAI->setUsedWithInAlloca(Info.AI->isUsedWithInAlloca());
+ NewAI->setSwiftError(Info.AI->isSwiftError());
+ NewAI->copyMetadata(*Info.AI);
+
+ auto *NewPtr = new BitCastInst(NewAI, Info.AI->getType(), "", Info.AI);
+ Info.AI->replaceAllUsesWith(NewPtr);
+ Info.AI->eraseFromParent();
+ Info.AI = NewAI;
+}
+
+// FIXME: check for MTE extension
+bool AArch64StackTagging::runOnFunction(Function &Fn) {
+ if (!Fn.hasFnAttribute(Attribute::SanitizeMemTag))
+ return false;
+
+ F = &Fn;
+ DL = &Fn.getParent()->getDataLayout();
+
+ MapVector<AllocaInst *, AllocaInfo> Allocas; // need stable iteration order
+ SmallVector<Instruction *, 8> RetVec;
+ DenseMap<Value *, AllocaInst *> AllocaForValue;
+ SmallVector<Instruction *, 4> UnrecognizedLifetimes;
+
+ for (auto &BB : *F) {
+ for (BasicBlock::iterator IT = BB.begin(); IT != BB.end(); ++IT) {
+ Instruction *I = &*IT;
+ if (auto *AI = dyn_cast<AllocaInst>(I)) {
+ Allocas[AI].AI = AI;
+ continue;
+ }
+
+ if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(I)) {
+ if (auto *AI =
+ dyn_cast_or_null<AllocaInst>(DVI->getVariableLocation())) {
+ Allocas[AI].DbgVariableIntrinsics.push_back(DVI);
+ }
+ continue;
+ }
+
+ auto *II = dyn_cast<IntrinsicInst>(I);
+ if (II && (II->getIntrinsicID() == Intrinsic::lifetime_start ||
+ II->getIntrinsicID() == Intrinsic::lifetime_end)) {
+ AllocaInst *AI =
+ llvm::findAllocaForValue(II->getArgOperand(1), AllocaForValue);
+ if (!AI) {
+ UnrecognizedLifetimes.push_back(I);
+ continue;
+ }
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start)
+ Allocas[AI].LifetimeStart.push_back(II);
+ else
+ Allocas[AI].LifetimeEnd.push_back(II);
+ }
+
+ if (isa<ReturnInst>(I) || isa<ResumeInst>(I) || isa<CleanupReturnInst>(I))
+ RetVec.push_back(I);
+ }
+ }
+
+ if (Allocas.empty())
+ return false;
+
+ int NextTag = 0;
+ int NumInterestingAllocas = 0;
+ for (auto &I : Allocas) {
+ AllocaInfo &Info = I.second;
+ assert(Info.AI);
+
+ if (!isInterestingAlloca(*Info.AI)) {
+ Info.Tag = -1;
+ continue;
+ }
+
+ alignAndPadAlloca(Info);
+ NumInterestingAllocas++;
+ Info.Tag = NextTag;
+ NextTag = (NextTag + 1) % 16;
+ }
+
+ if (NumInterestingAllocas == 0)
+ return true;
+
+ SetTagFunc =
+ Intrinsic::getDeclaration(F->getParent(), Intrinsic::aarch64_settag);
+
+ // Compute DT only if the function has the attribute, there are more than 1
+ // interesting allocas, and it is not available for free.
+ Instruction *Base;
+ if (NumInterestingAllocas > 1) {
+ auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+ if (DTWP) {
+ Base = insertBaseTaggedPointer(Allocas, &DTWP->getDomTree());
+ } else {
+ DominatorTree DT(*F);
+ Base = insertBaseTaggedPointer(Allocas, &DT);
+ }
+ } else {
+ Base = insertBaseTaggedPointer(Allocas, nullptr);
+ }
+
+ for (auto &I : Allocas) {
+ const AllocaInfo &Info = I.second;
+ AllocaInst *AI = Info.AI;
+ if (Info.Tag < 0)
+ continue;
+
+ // Replace alloca with tagp(alloca).
+ IRBuilder<> IRB(Info.AI->getNextNode());
+ Function *TagP = Intrinsic::getDeclaration(
+ F->getParent(), Intrinsic::aarch64_tagp, {Info.AI->getType()});
+ Instruction *TagPCall =
+ IRB.CreateCall(TagP, {Constant::getNullValue(Info.AI->getType()), Base,
+ ConstantInt::get(IRB.getInt64Ty(), Info.Tag)});
+ if (Info.AI->hasName())
+ TagPCall->setName(Info.AI->getName() + ".tag");
+ Info.AI->replaceAllUsesWith(TagPCall);
+ TagPCall->setOperand(0, Info.AI);
+
+ if (UnrecognizedLifetimes.empty() && Info.LifetimeStart.size() == 1 &&
+ Info.LifetimeEnd.size() == 1) {
+ IntrinsicInst *Start = Info.LifetimeStart[0];
+ uint64_t Size =
+ dyn_cast<ConstantInt>(Start->getArgOperand(0))->getZExtValue();
+ Size = alignTo(Size, kTagGranuleSize);
+ tagAlloca(AI, Start->getNextNode(), Start->getArgOperand(1), Size);
+ untagAlloca(AI, Info.LifetimeEnd[0], Size);
+ } else {
+ uint64_t Size = Info.AI->getAllocationSizeInBits(*DL).getValue() / 8;
+ Value *Ptr = IRB.CreatePointerCast(TagPCall, IRB.getInt8PtrTy());
+ tagAlloca(AI, &*IRB.GetInsertPoint(), Ptr, Size);
+ for (auto &RI : RetVec) {
+ untagAlloca(AI, RI, Size);
+ }
+ // We may have inserted tag/untag outside of any lifetime interval.
+ // Remove all lifetime intrinsics for this alloca.
+ for (auto &II : Info.LifetimeStart)
+ II->eraseFromParent();
+ for (auto &II : Info.LifetimeEnd)
+ II->eraseFromParent();
+ }
+
+ // Fixup debug intrinsics to point to the new alloca.
+ for (auto DVI : Info.DbgVariableIntrinsics)
+ DVI->setArgOperand(
+ 0,
+ MetadataAsValue::get(F->getContext(), LocalAsMetadata::get(Info.AI)));
+ }
+
+ // If we have instrumented at least one alloca, all unrecognized lifetime
+ // instrinsics have to go.
+ for (auto &I : UnrecognizedLifetimes)
+ I->eraseFromParent();
+
+ return true;
+}
diff --git a/lib/Target/AArch64/AArch64StorePairSuppress.cpp b/lib/Target/AArch64/AArch64StorePairSuppress.cpp
index d5643d384283..0e84a00df006 100644
--- a/lib/Target/AArch64/AArch64StorePairSuppress.cpp
+++ b/lib/Target/AArch64/AArch64StorePairSuppress.cpp
@@ -1,9 +1,8 @@
//===--- AArch64StorePairSuppress.cpp --- Suppress store pair formation ---===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -148,7 +147,7 @@ bool AArch64StorePairSuppress::runOnMachineFunction(MachineFunction &MF) {
for (auto &MI : MBB) {
if (!isNarrowFPStore(MI))
continue;
- MachineOperand *BaseOp;
+ const MachineOperand *BaseOp;
int64_t Offset;
if (TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI) &&
BaseOp->isReg()) {
diff --git a/lib/Target/AArch64/AArch64Subtarget.cpp b/lib/Target/AArch64/AArch64Subtarget.cpp
index dd30d25b2b50..3bc89b91c3f7 100644
--- a/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -1,9 +1,8 @@
//===-- AArch64Subtarget.cpp - AArch64 Subtarget Information ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -83,6 +82,7 @@ void AArch64Subtarget::initializeProperties() {
case CortexA72:
case CortexA73:
case CortexA75:
+ case CortexA76:
PrefFunctionAlignment = 4;
break;
case Cyclone:
diff --git a/lib/Target/AArch64/AArch64Subtarget.h b/lib/Target/AArch64/AArch64Subtarget.h
index 82f7bb755951..0c84cfb8329a 100644
--- a/lib/Target/AArch64/AArch64Subtarget.h
+++ b/lib/Target/AArch64/AArch64Subtarget.h
@@ -1,9 +1,8 @@
//===--- AArch64Subtarget.h - Define Subtarget for the AArch64 -*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -46,6 +45,7 @@ public:
CortexA72,
CortexA73,
CortexA75,
+ CortexA76,
Cyclone,
ExynosM1,
ExynosM3,
@@ -93,6 +93,12 @@ protected:
bool HasPAN_RWV = false;
bool HasCCPP = false;
+ // Armv8.2 Crypto extensions
+ bool HasSM4 = false;
+ bool HasSHA3 = false;
+ bool HasSHA2 = false;
+ bool HasAES = false;
+
// ARMv8.3 extensions
bool HasPA = false;
bool HasJS = false;
@@ -110,15 +116,10 @@ protected:
bool HasTLB_RMI = false;
bool HasFMI = false;
bool HasRCPC_IMMO = false;
- // ARMv8.4 Crypto extensions
- bool HasSM4 = true;
- bool HasSHA3 = true;
-
- bool HasSHA2 = true;
- bool HasAES = true;
bool HasLSLFast = false;
bool HasSVE = false;
+ bool HasSVE2 = false;
bool HasRCPC = false;
bool HasAggressiveFMA = false;
@@ -134,6 +135,12 @@ protected:
bool HasRandGen = false;
bool HasMTE = false;
+ // Arm SVE2 extensions
+ bool HasSVE2AES = false;
+ bool HasSVE2SM4 = false;
+ bool HasSVE2SHA3 = false;
+ bool HasSVE2BitPerm = false;
+
// HasZeroCycleRegMove - Has zero-cycle register mov instructions.
bool HasZeroCycleRegMove = false;
@@ -173,6 +180,9 @@ protected:
bool DisableLatencySchedHeuristic = false;
bool UseRSqrt = false;
bool Force32BitJumpTables = false;
+ bool UseEL1ForTP = false;
+ bool UseEL2ForTP = false;
+ bool UseEL3ForTP = false;
uint8_t MaxInterleaveFactor = 2;
uint8_t VectorInsertExtractBaseCost = 3;
uint16_t CacheLineSize = 0;
@@ -324,6 +334,10 @@ public:
hasFuseCCSelect() || hasFuseLiterals();
}
+ bool useEL1ForTP() const { return UseEL1ForTP; }
+ bool useEL2ForTP() const { return UseEL2ForTP; }
+ bool useEL3ForTP() const { return UseEL3ForTP; }
+
bool useRSqrt() const { return UseRSqrt; }
bool force32BitJumpTables() const { return Force32BitJumpTables; }
unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }
@@ -353,6 +367,7 @@ public:
bool hasSPE() const { return HasSPE; }
bool hasLSLFast() const { return HasLSLFast; }
bool hasSVE() const { return HasSVE; }
+ bool hasSVE2() const { return HasSVE2; }
bool hasRCPC() const { return HasRCPC; }
bool hasAggressiveFMA() const { return HasAggressiveFMA; }
bool hasAlternativeNZCV() const { return HasAlternativeNZCV; }
@@ -365,6 +380,11 @@ public:
bool hasBTI() const { return HasBTI; }
bool hasRandGen() const { return HasRandGen; }
bool hasMTE() const { return HasMTE; }
+ // Arm SVE2 extensions
+ bool hasSVE2AES() const { return HasSVE2AES; }
+ bool hasSVE2SM4() const { return HasSVE2SM4; }
+ bool hasSVE2SHA3() const { return HasSVE2SHA3; }
+ bool hasSVE2BitPerm() const { return HasSVE2BitPerm; }
bool isLittleEndian() const { return IsLittle; }
diff --git a/lib/Target/AArch64/AArch64SystemOperands.td b/lib/Target/AArch64/AArch64SystemOperands.td
index a804fb11175b..536a6591478b 100644
--- a/lib/Target/AArch64/AArch64SystemOperands.td
+++ b/lib/Target/AArch64/AArch64SystemOperands.td
@@ -1,9 +1,8 @@
//===- AArch64SystemOperands.td ----------------------------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -1458,6 +1457,7 @@ def : RWSysReg<"TFSR_EL2", 0b11, 0b100, 0b0110, 0b0101, 0b000>;
def : RWSysReg<"TFSR_EL3", 0b11, 0b110, 0b0110, 0b0110, 0b000>;
def : RWSysReg<"TFSR_EL12", 0b11, 0b101, 0b0110, 0b0110, 0b000>;
def : RWSysReg<"TFSRE0_EL1", 0b11, 0b000, 0b0110, 0b0110, 0b001>;
+def : ROSysReg<"GMID_EL1", 0b11, 0b001, 0b0000, 0b0000, 0b100>;
} // HasMTE
// Cyclone specific system registers
diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp
index 4e016525f7e4..865461480499 100644
--- a/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -1,9 +1,8 @@
//===-- AArch64TargetMachine.cpp - Define TargetMachine for AArch64 -------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -17,9 +16,11 @@
#include "AArch64TargetObjectFile.h"
#include "AArch64TargetTransformInfo.h"
#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "TargetInfo/AArch64TargetInfo.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/CSEConfigBase.h"
#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
#include "llvm/CodeGen/GlobalISel/Legalizer.h"
@@ -178,6 +179,7 @@ extern "C" void LLVMInitializeAArch64Target() {
initializeFalkorMarkStridedAccessesLegacyPass(*PR);
initializeLDTLSCleanupPass(*PR);
initializeAArch64SpeculationHardeningPass(*PR);
+ initializeAArch64StackTaggingPass(*PR);
}
//===----------------------------------------------------------------------===//
@@ -209,8 +211,8 @@ static std::string computeDataLayout(const Triple &TT,
static Reloc::Model getEffectiveRelocModel(const Triple &TT,
Optional<Reloc::Model> RM) {
- // AArch64 Darwin is always PIC.
- if (TT.isOSDarwin())
+ // AArch64 Darwin and Windows are always PIC.
+ if (TT.isOSDarwin() || TT.isOSWindows())
return Reloc::PIC_;
// On ELF platforms the default static relocation model has a smart enough
// linker to cope with referencing external symbols defined in a shared
@@ -384,6 +386,8 @@ public:
void addPostRegAlloc() override;
void addPreSched2() override;
void addPreEmitPass() override;
+
+ std::unique_ptr<CSEConfigBase> getCSEConfig() const override;
};
} // end anonymous namespace
@@ -397,6 +401,10 @@ TargetPassConfig *AArch64TargetMachine::createPassConfig(PassManagerBase &PM) {
return new AArch64PassConfig(*this, PM);
}
+std::unique_ptr<CSEConfigBase> AArch64PassConfig::getCSEConfig() const {
+ return getStandardCSEConfigForOpt(TM->getOptLevel());
+}
+
void AArch64PassConfig::addIRPasses() {
// Always expand atomic operations, we don't deal with atomicrmw or cmpxchg
// ourselves.
@@ -439,6 +447,8 @@ void AArch64PassConfig::addIRPasses() {
// invariant.
addPass(createLICMPass());
}
+
+ addPass(createAArch64StackTaggingPass());
}
// Pass Pipeline Configuration
@@ -455,7 +465,20 @@ bool AArch64PassConfig::addPreISel() {
EnableGlobalMerge == cl::BOU_TRUE) {
bool OnlyOptimizeForSize = (TM->getOptLevel() < CodeGenOpt::Aggressive) &&
(EnableGlobalMerge == cl::BOU_UNSET);
- addPass(createGlobalMergePass(TM, 4095, OnlyOptimizeForSize));
+
+ // Merging of extern globals is enabled by default on non-Mach-O as we
+ // expect it to be generally either beneficial or harmless. On Mach-O it
+ // is disabled as we emit the .subsections_via_symbols directive which
+ // means that merging extern globals is not safe.
+ bool MergeExternalByDefault = !TM->getTargetTriple().isOSBinFormatMachO();
+
+ // FIXME: extern global merging is only enabled when we optimise for size
+ // because there are some regressions with it also enabled for performance.
+ if (!OnlyOptimizeForSize)
+ MergeExternalByDefault = false;
+
+ addPass(createGlobalMergePass(TM, 4095, OnlyOptimizeForSize,
+ MergeExternalByDefault));
}
return false;
diff --git a/lib/Target/AArch64/AArch64TargetMachine.h b/lib/Target/AArch64/AArch64TargetMachine.h
index 8d28a5e30ebf..5264efb89b9c 100644
--- a/lib/Target/AArch64/AArch64TargetMachine.h
+++ b/lib/Target/AArch64/AArch64TargetMachine.h
@@ -1,9 +1,8 @@
//==-- AArch64TargetMachine.h - Define TargetMachine for AArch64 -*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AArch64/AArch64TargetObjectFile.cpp b/lib/Target/AArch64/AArch64TargetObjectFile.cpp
index 8ae72a7ddb57..1c3d5d0743ad 100644
--- a/lib/Target/AArch64/AArch64TargetObjectFile.cpp
+++ b/lib/Target/AArch64/AArch64TargetObjectFile.cpp
@@ -1,9 +1,8 @@
//===-- AArch64TargetObjectFile.cpp - AArch64 Object Info -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AArch64/AArch64TargetObjectFile.h b/lib/Target/AArch64/AArch64TargetObjectFile.h
index 9077eb7902fd..7ead363d42fe 100644
--- a/lib/Target/AArch64/AArch64TargetObjectFile.h
+++ b/lib/Target/AArch64/AArch64TargetObjectFile.h
@@ -1,9 +1,8 @@
//===-- AArch64TargetObjectFile.h - AArch64 Object Info -*- C++ ---------*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index a256cb7c9215..a4b78f2a7d6b 100644
--- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -1,12 +1,12 @@
//===-- AArch64TargetTransformInfo.cpp - AArch64 specific TTI -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
+#include "AArch64ExpandImm.h"
#include "AArch64TargetTransformInfo.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "llvm/Analysis/LoopInfo.h"
@@ -50,8 +50,9 @@ int AArch64TTIImpl::getIntImmCost(int64_t Val) {
Val = ~Val;
// Calculate how many moves we will need to materialize this constant.
- unsigned LZ = countLeadingZeros((uint64_t)Val);
- return (64 - LZ + 15) / 16;
+ SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
+ AArch64_IMM::expandMOVImm(Val, 64, Insn);
+ return Insn.size();
}
/// Calculate the cost of materializing the given constant.
@@ -665,7 +666,7 @@ int AArch64TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
assert(Factor >= 2 && "Invalid interleave factor");
assert(isa<VectorType>(VecTy) && "Expect a vector type");
- if (!UseMaskForCond && !UseMaskForGaps &&
+ if (!UseMaskForCond && !UseMaskForGaps &&
Factor <= TLI->getMaxSupportedInterleaveFactor()) {
unsigned NumElts = VecTy->getVectorNumElements();
auto *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.h b/lib/Target/AArch64/AArch64TargetTransformInfo.h
index 08c1a8924220..10c15a139b4c 100644
--- a/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -1,9 +1,8 @@
//===- AArch64TargetTransformInfo.h - AArch64 specific TTI ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -166,6 +165,10 @@ public:
return false;
}
+ unsigned getGISelRematGlobalCost() const {
+ return 2;
+ }
+
bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
TTI::ReductionFlags Flags) const;
diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 6cc9b67e4d27..f4c55d48d215 100644
--- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -1,9 +1,8 @@
//==- AArch64AsmParser.cpp - Parse AArch64 assembly to MCInst instructions -==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -11,6 +10,7 @@
#include "MCTargetDesc/AArch64MCExpr.h"
#include "MCTargetDesc/AArch64MCTargetDesc.h"
#include "MCTargetDesc/AArch64TargetStreamer.h"
+#include "TargetInfo/AArch64TargetInfo.h"
#include "AArch64InstrInfo.h"
#include "Utils/AArch64BaseInfo.h"
#include "llvm/ADT/APFloat.h"
@@ -242,11 +242,13 @@ public:
if (S.getTargetStreamer() == nullptr)
new AArch64TargetStreamer(S);
- // Alias .hword/.word/xword to the target-independent .2byte/.4byte/.8byte
- // directives as they have the same form and semantics:
- /// ::= (.hword | .word | .xword ) [ expression (, expression)* ]
+ // Alias .hword/.word/.[dx]word to the target-independent
+ // .2byte/.4byte/.8byte directives as they have the same form and
+ // semantics:
+ /// ::= (.hword | .word | .dword | .xword ) [ expression (, expression)* ]
Parser.addAliasForDirective(".hword", ".2byte");
Parser.addAliasForDirective(".word", ".4byte");
+ Parser.addAliasForDirective(".dword", ".8byte");
Parser.addAliasForDirective(".xword", ".8byte");
// Initialize the set of available features.
@@ -1079,8 +1081,7 @@ public:
if (Kind != k_Register || Reg.Kind != RegKind::SVEPredicateVector)
return DiagnosticPredicateTy::NoMatch;
- if (isSVEVectorReg<Class>() &&
- (ElementWidth == 0 || Reg.ElementWidth == ElementWidth))
+ if (isSVEVectorReg<Class>() && (Reg.ElementWidth == ElementWidth))
return DiagnosticPredicateTy::Match;
return DiagnosticPredicateTy::NearMatch;
@@ -1091,8 +1092,7 @@ public:
if (Kind != k_Register || Reg.Kind != RegKind::SVEDataVector)
return DiagnosticPredicateTy::NoMatch;
- if (isSVEVectorReg<Class>() &&
- (ElementWidth == 0 || Reg.ElementWidth == ElementWidth))
+ if (isSVEVectorReg<Class>() && Reg.ElementWidth == ElementWidth)
return DiagnosticPredicateTy::Match;
return DiagnosticPredicateTy::NearMatch;
@@ -1272,9 +1272,11 @@ public:
bool isExtend64() const {
if (!isExtend())
return false;
- // UXTX and SXTX require a 64-bit source register (the ExtendLSL64 class).
+ // Make sure the extend expects a 32-bit source register.
AArch64_AM::ShiftExtendType ET = getShiftExtendType();
- return ET != AArch64_AM::UXTX && ET != AArch64_AM::SXTX;
+ return ET == AArch64_AM::UXTB || ET == AArch64_AM::SXTB ||
+ ET == AArch64_AM::UXTH || ET == AArch64_AM::SXTH ||
+ ET == AArch64_AM::UXTW || ET == AArch64_AM::SXTW;
}
bool isExtendLSL64() const {
@@ -2473,7 +2475,7 @@ OperandMatchResultTy
AArch64AsmParser::tryParseAdrpLabel(OperandVector &Operands) {
MCAsmParser &Parser = getParser();
SMLoc S = getLoc();
- const MCExpr *Expr;
+ const MCExpr *Expr = nullptr;
if (Parser.getTok().is(AsmToken::Hash)) {
Parser.Lex(); // Eat hash token.
@@ -2500,6 +2502,7 @@ AArch64AsmParser::tryParseAdrpLabel(OperandVector &Operands) {
} else if (DarwinRefKind != MCSymbolRefExpr::VK_PAGE &&
DarwinRefKind != MCSymbolRefExpr::VK_GOTPAGE &&
DarwinRefKind != MCSymbolRefExpr::VK_TLVPPAGE &&
+ ELFRefKind != AArch64MCExpr::VK_ABS_PAGE_NC &&
ELFRefKind != AArch64MCExpr::VK_GOT_PAGE &&
ELFRefKind != AArch64MCExpr::VK_GOTTPREL_PAGE &&
ELFRefKind != AArch64MCExpr::VK_TLSDESC_PAGE) {
@@ -2523,7 +2526,7 @@ AArch64AsmParser::tryParseAdrpLabel(OperandVector &Operands) {
OperandMatchResultTy
AArch64AsmParser::tryParseAdrLabel(OperandVector &Operands) {
SMLoc S = getLoc();
- const MCExpr *Expr;
+ const MCExpr *Expr = nullptr;
// Leave anything with a bracket to the default for SVE
if (getParser().getTok().is(AsmToken::LBrac))
@@ -2621,7 +2624,7 @@ AArch64AsmParser::tryParseImmWithOptionalShift(OperandVector &Operands) {
// Operand should start from # or should be integer, emit error otherwise.
return MatchOperand_NoMatch;
- const MCExpr *Imm;
+ const MCExpr *Imm = nullptr;
if (parseSymbolicImmVal(Imm))
return MatchOperand_ParseFail;
else if (Parser.getTok().isNot(AsmToken::Comma)) {
@@ -2660,7 +2663,7 @@ AArch64AsmParser::tryParseImmWithOptionalShift(OperandVector &Operands) {
Parser.Lex(); // Eat the number
// Just in case the optional lsl #0 is used for immediates other than zero.
- if (ShiftAmount == 0 && Imm != 0) {
+ if (ShiftAmount == 0 && Imm != nullptr) {
SMLoc E = Parser.getTok().getLoc();
Operands.push_back(AArch64Operand::CreateImm(Imm, S, E, getContext()));
return MatchOperand_Success;
@@ -2833,6 +2836,11 @@ static const struct Extension {
{"pan-rwv", {AArch64::FeaturePAN_RWV}},
{"ccpp", {AArch64::FeatureCCPP}},
{"sve", {AArch64::FeatureSVE}},
+ {"sve2", {AArch64::FeatureSVE2}},
+ {"sve2-aes", {AArch64::FeatureSVE2AES}},
+ {"sve2-sm4", {AArch64::FeatureSVE2SM4}},
+ {"sve2-sha3", {AArch64::FeatureSVE2SHA3}},
+ {"bitperm", {AArch64::FeatureSVE2BitPerm}},
// FIXME: Unsupported extensions
{"pan", {}},
{"lor", {}},
@@ -3260,6 +3268,7 @@ bool AArch64AsmParser::parseSymbolicImmVal(const MCExpr *&ImmVal) {
.Case("dtprel_hi12", AArch64MCExpr::VK_DTPREL_HI12)
.Case("dtprel_lo12", AArch64MCExpr::VK_DTPREL_LO12)
.Case("dtprel_lo12_nc", AArch64MCExpr::VK_DTPREL_LO12_NC)
+ .Case("pg_hi21_nc", AArch64MCExpr::VK_ABS_PAGE_NC)
.Case("tprel_g2", AArch64MCExpr::VK_TPREL_G2)
.Case("tprel_g1", AArch64MCExpr::VK_TPREL_G1)
.Case("tprel_g1_nc", AArch64MCExpr::VK_TPREL_G1_NC)
@@ -4098,15 +4107,6 @@ bool AArch64AsmParser::validateInstruction(MCInst &Inst, SMLoc &IDLoc,
"unpredictable STXP instruction, status is also a source");
break;
}
- case AArch64::LDGV: {
- unsigned Rt = Inst.getOperand(0).getReg();
- unsigned Rn = Inst.getOperand(1).getReg();
- if (RI->isSubRegisterEq(Rt, Rn)) {
- return Error(Loc[0],
- "unpredictable LDGV instruction, writeback register is also "
- "the target register");
- }
- }
}
@@ -4167,7 +4167,8 @@ bool AArch64AsmParser::validateInstruction(MCInst &Inst, SMLoc &IDLoc,
}
}
-static std::string AArch64MnemonicSpellCheck(StringRef S, uint64_t FBS,
+static std::string AArch64MnemonicSpellCheck(StringRef S,
+ const FeatureBitset &FBS,
unsigned VariantID = 0);
bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode,
@@ -4199,7 +4200,7 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode,
return Error(Loc, "expected AArch64 condition code");
case Match_AddSubRegExtendSmall:
return Error(Loc,
- "expected '[su]xt[bhw]' or 'lsl' with optional integer in range [0, 4]");
+ "expected '[su]xt[bhw]' with optional integer in range [0, 4]");
case Match_AddSubRegExtendLarge:
return Error(Loc,
"expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]");
@@ -4442,7 +4443,7 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode,
case Match_InvalidZPR64LSL64:
return Error(Loc, "invalid shift/extend specified, expected 'z[0..31].d, lsl #3'");
case Match_InvalidZPR0:
- return Error(Loc, "expected register without element width sufix");
+ return Error(Loc, "expected register without element width suffix");
case Match_InvalidZPR8:
case Match_InvalidZPR16:
case Match_InvalidZPR32:
@@ -4470,11 +4471,15 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode,
case Match_InvalidSVEPredicateDReg:
return Error(Loc, "invalid predicate register.");
case Match_InvalidSVEPredicate3bAnyReg:
+ return Error(Loc, "invalid restricted predicate register, expected p0..p7 (without element suffix)");
case Match_InvalidSVEPredicate3bBReg:
+ return Error(Loc, "invalid restricted predicate register, expected p0.b..p7.b");
case Match_InvalidSVEPredicate3bHReg:
+ return Error(Loc, "invalid restricted predicate register, expected p0.h..p7.h");
case Match_InvalidSVEPredicate3bSReg:
+ return Error(Loc, "invalid restricted predicate register, expected p0.s..p7.s");
case Match_InvalidSVEPredicate3bDReg:
- return Error(Loc, "restricted predicate has range [0, 7].");
+ return Error(Loc, "invalid restricted predicate register, expected p0.d..p7.d");
case Match_InvalidSVEExactFPImmOperandHalfOne:
return Error(Loc, "Invalid floating point constant, expected 0.5 or 1.0.");
case Match_InvalidSVEExactFPImmOperandHalfTwo:
@@ -4777,10 +4782,12 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
}
MCInst Inst;
+ FeatureBitset MissingFeatures;
// First try to match against the secondary set of tables containing the
// short-form NEON instructions (e.g. "fadd.2s v0, v1, v2").
unsigned MatchResult =
- MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm, 1);
+ MatchInstructionImpl(Operands, Inst, ErrorInfo, MissingFeatures,
+ MatchingInlineAsm, 1);
// If that fails, try against the alternate table containing long-form NEON:
// "fadd v0.2s, v1.2s, v2.2s"
@@ -4789,9 +4796,11 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
// long-form match also fails.
auto ShortFormNEONErrorInfo = ErrorInfo;
auto ShortFormNEONMatchResult = MatchResult;
+ auto ShortFormNEONMissingFeatures = MissingFeatures;
MatchResult =
- MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm, 0);
+ MatchInstructionImpl(Operands, Inst, ErrorInfo, MissingFeatures,
+ MatchingInlineAsm, 0);
// Now, both matches failed, and the long-form match failed on the mnemonic
// suffix token operand. The short-form match failure is probably more
@@ -4801,6 +4810,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
((AArch64Operand &)*Operands[1]).isTokenSuffix()) {
MatchResult = ShortFormNEONMatchResult;
ErrorInfo = ShortFormNEONErrorInfo;
+ MissingFeatures = ShortFormNEONMissingFeatures;
}
}
@@ -4819,17 +4829,15 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
return false;
}
case Match_MissingFeature: {
- assert(ErrorInfo && "Unknown missing feature!");
+ assert(MissingFeatures.any() && "Unknown missing feature!");
// Special case the error message for the very common case where only
// a single subtarget feature is missing (neon, e.g.).
std::string Msg = "instruction requires:";
- uint64_t Mask = 1;
- for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
- if (ErrorInfo & Mask) {
+ for (unsigned i = 0, e = MissingFeatures.size(); i != e; ++i) {
+ if (MissingFeatures[i]) {
Msg += " ";
- Msg += getSubtargetFeatureName(ErrorInfo & Mask);
+ Msg += getSubtargetFeatureName(i);
}
- Mask <<= 1;
}
return Error(IDLoc, Msg);
}
@@ -5148,7 +5156,7 @@ bool AArch64AsmParser::parseDirectiveArch(SMLoc L) {
FeatureBitset ToggleFeatures = EnableFeature
? (~Features & Extension.Features)
: ( Features & Extension.Features);
- uint64_t Features =
+ FeatureBitset Features =
ComputeAvailableFeatures(STI.ToggleFeature(ToggleFeatures));
setAvailableFeatures(Features);
break;
@@ -5160,15 +5168,9 @@ bool AArch64AsmParser::parseDirectiveArch(SMLoc L) {
/// parseDirectiveArchExtension
/// ::= .arch_extension [no]feature
bool AArch64AsmParser::parseDirectiveArchExtension(SMLoc L) {
- MCAsmParser &Parser = getParser();
-
- if (getLexer().isNot(AsmToken::Identifier))
- return Error(getLexer().getLoc(), "expected architecture extension name");
+ SMLoc ExtLoc = getLoc();
- const AsmToken &Tok = Parser.getTok();
- StringRef Name = Tok.getString();
- SMLoc ExtLoc = Tok.getLoc();
- Lex();
+ StringRef Name = getParser().parseStringToEndOfStatement().trim();
if (parseToken(AsmToken::EndOfStatement,
"unexpected token in '.arch_extension' directive"))
@@ -5192,7 +5194,7 @@ bool AArch64AsmParser::parseDirectiveArchExtension(SMLoc L) {
FeatureBitset ToggleFeatures = EnableFeature
? (~Features & Extension.Features)
: (Features & Extension.Features);
- uint64_t Features =
+ FeatureBitset Features =
ComputeAvailableFeatures(STI.ToggleFeature(ToggleFeatures));
setAvailableFeatures(Features);
return false;
@@ -5257,7 +5259,7 @@ bool AArch64AsmParser::parseDirectiveCPU(SMLoc L) {
FeatureBitset ToggleFeatures = EnableFeature
? (~Features & Extension.Features)
: ( Features & Extension.Features);
- uint64_t Features =
+ FeatureBitset Features =
ComputeAvailableFeatures(STI.ToggleFeature(ToggleFeatures));
setAvailableFeatures(Features);
FoundExtension = true;
@@ -5518,6 +5520,8 @@ extern "C" void LLVMInitializeAArch64AsmParser() {
RegisterMCAsmParser<AArch64AsmParser> X(getTheAArch64leTarget());
RegisterMCAsmParser<AArch64AsmParser> Y(getTheAArch64beTarget());
RegisterMCAsmParser<AArch64AsmParser> Z(getTheARM64Target());
+ RegisterMCAsmParser<AArch64AsmParser> W(getTheARM64_32Target());
+ RegisterMCAsmParser<AArch64AsmParser> V(getTheAArch64_32Target());
}
#define GET_REGISTER_MATCHER
diff --git a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
index 4102f1eb5cc1..145ffef6f6f9 100644
--- a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -1,9 +1,8 @@
//===- AArch64Disassembler.cpp - Disassembler for AArch64 -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -14,6 +13,7 @@
#include "AArch64ExternalSymbolizer.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "TargetInfo/AArch64TargetInfo.h"
#include "Utils/AArch64BaseInfo.h"
#include "llvm-c/Disassembler.h"
#include "llvm/MC/MCDisassembler/MCRelocationInfo.h"
@@ -220,11 +220,6 @@ static DecodeStatus DecodeImm8OptLsl(MCInst &Inst, unsigned Imm,
static DecodeStatus DecodeSVEIncDecImm(MCInst &Inst, unsigned Imm,
uint64_t Addr, const void *Decoder);
-static DecodeStatus DecodeLoadAllocTagArrayInstruction(MCInst &Inst,
- uint32_t insn,
- uint64_t address,
- const void* Decoder);
-
static bool Check(DecodeStatus &Out, DecodeStatus In) {
switch (In) {
case MCDisassembler::Success:
@@ -292,11 +287,19 @@ extern "C" void LLVMInitializeAArch64Disassembler() {
createAArch64ExternalSymbolizer);
TargetRegistry::RegisterMCSymbolizer(getTheAArch64beTarget(),
createAArch64ExternalSymbolizer);
+ TargetRegistry::RegisterMCDisassembler(getTheAArch64_32Target(),
+ createAArch64Disassembler);
+ TargetRegistry::RegisterMCSymbolizer(getTheAArch64_32Target(),
+ createAArch64ExternalSymbolizer);
TargetRegistry::RegisterMCDisassembler(getTheARM64Target(),
createAArch64Disassembler);
TargetRegistry::RegisterMCSymbolizer(getTheARM64Target(),
createAArch64ExternalSymbolizer);
+ TargetRegistry::RegisterMCDisassembler(getTheARM64_32Target(),
+ createAArch64Disassembler);
+ TargetRegistry::RegisterMCSymbolizer(getTheARM64_32Target(),
+ createAArch64ExternalSymbolizer);
}
static const unsigned FPR128DecoderTable[] = {
@@ -1619,7 +1622,7 @@ static DecodeStatus DecodeModImmInstruction(MCInst &Inst, uint32_t insn,
case AArch64::MOVIv4s_msl:
case AArch64::MVNIv2s_msl:
case AArch64::MVNIv4s_msl:
- Inst.addOperand(MCOperand::createImm(cmode & 1 ? 0x110 : 0x108));
+ Inst.addOperand(MCOperand::createImm((cmode & 1) ? 0x110 : 0x108));
break;
}
@@ -1779,8 +1782,8 @@ static DecodeStatus DecodeGPRSeqPairsClassRegisterClass(MCInst &Inst,
if (RegNo & 0x1)
return Fail;
- unsigned Register = AArch64MCRegisterClasses[RegClassID].getRegister(RegNo);
- Inst.addOperand(MCOperand::createReg(Register));
+ unsigned Reg = AArch64MCRegisterClasses[RegClassID].getRegister(RegNo / 2);
+ Inst.addOperand(MCOperand::createReg(Reg));
return Success;
}
@@ -1852,25 +1855,3 @@ static DecodeStatus DecodeSVEIncDecImm(MCInst &Inst, unsigned Imm,
Inst.addOperand(MCOperand::createImm(Imm + 1));
return Success;
}
-
-static DecodeStatus DecodeLoadAllocTagArrayInstruction(MCInst &Inst,
- uint32_t insn,
- uint64_t address,
- const void* Decoder) {
- unsigned Rn = fieldFromInstruction(insn, 5, 5);
- unsigned Rt = fieldFromInstruction(insn, 0, 5);
-
- // Outputs
- DecodeGPR64spRegisterClass(Inst, Rn, address, Decoder);
- DecodeGPR64RegisterClass(Inst, Rt, address, Decoder);
-
- // Input (Rn again)
- Inst.addOperand(Inst.getOperand(0));
-
- //Do this post decode since the raw number for xzr and sp is the same
- if (Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg()) {
- return SoftFail;
- } else {
- return Success;
- }
-}
diff --git a/lib/Target/AArch64/Disassembler/AArch64Disassembler.h b/lib/Target/AArch64/Disassembler/AArch64Disassembler.h
index bc2f7f181699..2ba5a695701f 100644
--- a/lib/Target/AArch64/Disassembler/AArch64Disassembler.h
+++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.h
@@ -1,9 +1,8 @@
//===- AArch64Disassembler.h - Disassembler for AArch64 ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp b/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp
index 342655a29b1d..3f815ac8c3d0 100644
--- a/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp
+++ b/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp
@@ -1,9 +1,8 @@
//===- AArch64ExternalSymbolizer.cpp - Symbolizer for AArch64 ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.h b/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.h
index 49e844963797..dc72331660cc 100644
--- a/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.h
+++ b/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.h
@@ -1,9 +1,8 @@
//===- AArch64ExternalSymbolizer.h - Symbolizer for AArch64 -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h b/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
index 688ca755d0b5..05a909f1780a 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
@@ -1,9 +1,8 @@
//===- AArch64AddressingModes.h - AArch64 Addressing Modes ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
index ed89d991d9fb..6418211a4f55 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
@@ -1,15 +1,15 @@
//===-- AArch64AsmBackend.cpp - AArch64 Assembler Backend -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-#include "AArch64.h"
#include "MCTargetDesc/AArch64FixupKinds.h"
#include "MCTargetDesc/AArch64MCExpr.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "Utils/AArch64BaseInfo.h"
#include "llvm/ADT/Triple.h"
#include "llvm/BinaryFormat/MachO.h"
#include "llvm/MC/MCAsmBackend.h"
@@ -22,8 +22,10 @@
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCTargetOptions.h"
#include "llvm/MC/MCValue.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
namespace {
@@ -42,6 +44,8 @@ public:
return AArch64::NumTargetFixupKinds;
}
+ Optional<MCFixupKind> getFixupKind(StringRef Name) const override;
+
const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override {
const static MCFixupKindInfo Infos[AArch64::NumTargetFixupKinds] = {
// This table *must* be in the order that the fixup_* kinds are defined
@@ -104,6 +108,7 @@ static unsigned getFixupKindNumBytes(unsigned Kind) {
default:
llvm_unreachable("Unknown fixup kind!");
+ case FK_NONE:
case AArch64::fixup_aarch64_tlsdesc_call:
return 0;
@@ -274,7 +279,7 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, const MCValue &Target,
if (RefKind & AArch64MCExpr::VK_NC) {
Value &= 0xFFFF;
}
- else if (RefKind & AArch64MCExpr::VK_SABS) {
+ else if (AArch64MCExpr::getSymbolLoc(RefKind) == AArch64MCExpr::VK_SABS) {
if (SignedValue > 0xFFFF || SignedValue < -0xFFFF)
Ctx.reportError(Fixup.getLoc(), "fixup value out of range");
@@ -305,6 +310,7 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, const MCValue &Target,
if (Value & 0x3)
Ctx.reportError(Fixup.getLoc(), "fixup not sufficiently aligned");
return (Value >> 2) & 0x3ffffff;
+ case FK_NONE:
case FK_Data_1:
case FK_Data_2:
case FK_Data_4:
@@ -315,6 +321,12 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, const MCValue &Target,
}
}
+Optional<MCFixupKind> AArch64AsmBackend::getFixupKind(StringRef Name) const {
+ if (TheTriple.isOSBinFormatELF() && Name == "R_AARCH64_NONE")
+ return FK_NONE;
+ return MCAsmBackend::getFixupKind(Name);
+}
+
/// getFixupKindContainereSizeInBytes - The number of bytes of the
/// container involved in big endian or 0 if the item is little endian
unsigned AArch64AsmBackend::getFixupKindContainereSizeInBytes(unsigned Kind) const {
@@ -398,7 +410,7 @@ void AArch64AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
// handle this more cleanly. This may affect the output of -show-mc-encoding.
AArch64MCExpr::VariantKind RefKind =
static_cast<AArch64MCExpr::VariantKind>(Target.getRefKind());
- if (RefKind & AArch64MCExpr::VK_SABS) {
+ if (AArch64MCExpr::getSymbolLoc(RefKind) == AArch64MCExpr::VK_SABS) {
// If the immediate is negative, generate MOVN else MOVZ.
// (Bit 30 = 0) ==> MOVN, (Bit 30 = 1) ==> MOVZ.
if (SignedValue < 0)
@@ -446,6 +458,10 @@ bool AArch64AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const {
bool AArch64AsmBackend::shouldForceRelocation(const MCAssembler &Asm,
const MCFixup &Fixup,
const MCValue &Target) {
+ unsigned Kind = Fixup.getKind();
+ if (Kind == FK_NONE)
+ return true;
+
// The ADRP instruction adds some multiple of 0x1000 to the current PC &
// ~0xfff. This means that the required offset to reach a symbol can vary by
// up to one step depending on where the ADRP is in memory. For example:
@@ -458,14 +474,14 @@ bool AArch64AsmBackend::shouldForceRelocation(const MCAssembler &Asm,
// same page as the ADRP and the instruction should encode 0x0. Assuming the
// section isn't 0x1000-aligned, we therefore need to delegate this decision
// to the linker -- a relocation!
- if ((uint32_t)Fixup.getKind() == AArch64::fixup_aarch64_pcrel_adrp_imm21)
+ if (Kind == AArch64::fixup_aarch64_pcrel_adrp_imm21)
return true;
AArch64MCExpr::VariantKind RefKind =
static_cast<AArch64MCExpr::VariantKind>(Target.getRefKind());
AArch64MCExpr::VariantKind SymLoc = AArch64MCExpr::getSymbolLoc(RefKind);
// LDR GOT relocations need a relocation
- if ((uint32_t)Fixup.getKind() == AArch64::fixup_aarch64_ldr_pcrel_imm19 &&
+ if (Kind == AArch64::fixup_aarch64_ldr_pcrel_imm19 &&
SymLoc == AArch64MCExpr::VK_GOT)
return true;
return false;
@@ -513,6 +529,7 @@ enum CompactUnwindEncodings {
// FIXME: This should be in a separate file.
class DarwinAArch64AsmBackend : public AArch64AsmBackend {
const MCRegisterInfo &MRI;
+ bool IsILP32;
/// Encode compact unwind stack adjustment for frameless functions.
/// See UNWIND_ARM64_FRAMELESS_STACK_SIZE_MASK in compact_unwind_encoding.h.
@@ -523,13 +540,18 @@ class DarwinAArch64AsmBackend : public AArch64AsmBackend {
public:
DarwinAArch64AsmBackend(const Target &T, const Triple &TT,
- const MCRegisterInfo &MRI)
- : AArch64AsmBackend(T, TT, /*IsLittleEndian*/ true), MRI(MRI) {}
+ const MCRegisterInfo &MRI, bool IsILP32)
+ : AArch64AsmBackend(T, TT, /*IsLittleEndian*/ true), MRI(MRI),
+ IsILP32(IsILP32) {}
std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const override {
- return createAArch64MachObjectWriter(MachO::CPU_TYPE_ARM64,
- MachO::CPU_SUBTYPE_ARM64_ALL);
+ if (IsILP32)
+ return createAArch64MachObjectWriter(
+ MachO::CPU_TYPE_ARM64_32, MachO::CPU_SUBTYPE_ARM64_32_V8, true);
+ else
+ return createAArch64MachObjectWriter(MachO::CPU_TYPE_ARM64,
+ MachO::CPU_SUBTYPE_ARM64_ALL, false);
}
/// Generate the compact unwind encoding from the CFI directives.
@@ -711,8 +733,10 @@ MCAsmBackend *llvm::createAArch64leAsmBackend(const Target &T,
const MCRegisterInfo &MRI,
const MCTargetOptions &Options) {
const Triple &TheTriple = STI.getTargetTriple();
- if (TheTriple.isOSBinFormatMachO())
- return new DarwinAArch64AsmBackend(T, TheTriple, MRI);
+ if (TheTriple.isOSBinFormatMachO()) {
+ const bool IsILP32 = TheTriple.isArch32Bit();
+ return new DarwinAArch64AsmBackend(T, TheTriple, MRI, IsILP32);
+ }
if (TheTriple.isOSBinFormatCOFF())
return new COFFAArch64AsmBackend(T, TheTriple);
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
index 2ccd7cef8bef..c871e2c62eac 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
@@ -1,9 +1,8 @@
//===-- AArch64ELFObjectWriter.cpp - AArch64 ELF Writer -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -186,6 +185,8 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx,
if (IsILP32 && isNonILP32reloc(Fixup, RefKind, Ctx))
return ELF::R_AARCH64_NONE;
switch ((unsigned)Fixup.getKind()) {
+ case FK_NONE:
+ return ELF::R_AARCH64_NONE;
case FK_Data_1:
Ctx.reportError(Fixup.getLoc(), "1-byte data relocations not supported");
return ELF::R_AARCH64_NONE;
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
index 9a7e34b0aeb1..c33f7e957b54 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
@@ -1,9 +1,8 @@
//===- lib/MC/AArch64ELFStreamer.cpp - ELF Object Output for AArch64 ------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -103,8 +102,8 @@ public:
/// This function is the one used to emit instruction data into the ELF
/// streamer. We override it to add the appropriate mapping symbol if
/// necessary.
- void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
- bool) override {
+ void EmitInstruction(const MCInst &Inst,
+ const MCSubtargetInfo &STI) override {
EmitA64MappingSymbol();
MCELFStreamer::EmitInstruction(Inst, STI);
}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h
index d5b009ec30d1..25c609ee1496 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h
@@ -1,9 +1,8 @@
//===-- AArch64ELFStreamer.h - ELF Streamer for AArch64 ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h b/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h
index 4293dcba955e..fe8043fe5ec0 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h
@@ -1,9 +1,8 @@
//===-- AArch64FixupKinds.h - AArch64 Specific Fixup Entries ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
index dcf2dd251149..d0a544273b8b 100644
--- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
@@ -1,9 +1,8 @@
//==-- AArch64InstPrinter.cpp - Convert AArch64 MCInst to assembly syntax --==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -293,6 +292,12 @@ void AArch64InstPrinter::printInst(const MCInst *MI, raw_ostream &O,
printInstruction(MI, STI, O);
printAnnotation(O, Annot);
+
+ if (atomicBarrierDroppedOnZero(Opcode) &&
+ (MI->getOperand(0).getReg() == AArch64::XZR ||
+ MI->getOperand(0).getReg() == AArch64::WZR)) {
+ printAnnotation(O, "acquire semantics dropped since destination is zero");
+ }
}
static bool isTblTbxInstruction(unsigned Opcode, StringRef &Layout,
diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h b/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h
index 4e9982f5b7be..5311f73ca21c 100644
--- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h
@@ -1,9 +1,8 @@
//===-- AArch64InstPrinter.h - Convert AArch64 MCInst to assembly syntax --===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -11,8 +10,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIB_TARGET_AARCH64_INSTPRINTER_AARCH64INSTPRINTER_H
-#define LLVM_LIB_TARGET_AARCH64_INSTPRINTER_AARCH64INSTPRINTER_H
+#ifndef LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64INSTPRINTER_H
+#define LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64INSTPRINTER_H
#include "MCTargetDesc/AArch64MCTargetDesc.h"
#include "llvm/ADT/StringRef.h"
@@ -220,4 +219,4 @@ public:
} // end namespace llvm
-#endif // LLVM_LIB_TARGET_AARCH64_INSTPRINTER_AARCH64INSTPRINTER_H
+#endif // LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64INSTPRINTER_H
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
index 58e4a9c9a9e9..ecff1ab0a8b3 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
@@ -1,9 +1,8 @@
//===-- AArch64MCAsmInfo.cpp - AArch64 asm properties ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -131,8 +130,6 @@ AArch64MCAsmInfoGNUCOFF::AArch64MCAsmInfoGNUCOFF() {
CodePointerSize = 8;
CommentString = "//";
- ExceptionsType = ExceptionHandling::DwarfCFI;
- // The default is dwarf, but WinEH can be enabled optionally, which requires
- // WinEHEncodingType to be set.
+ ExceptionsType = ExceptionHandling::WinEH;
WinEHEncodingType = WinEH::EncodingType::Itanium;
}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
index e8570b1c2887..36ae92afc8c1 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
@@ -1,9 +1,8 @@
//=====-- AArch64MCAsmInfo.h - AArch64 asm properties ---------*- C++ -*--====//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
index 41cad48f7aea..8cb7a1672983 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
@@ -1,9 +1,8 @@
//=- AArch64/AArch64MCCodeEmitter.cpp - Convert AArch64 code to machine code-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -188,9 +187,10 @@ public:
const MCSubtargetInfo &STI) const;
private:
- uint64_t computeAvailableFeatures(const FeatureBitset &FB) const;
- void verifyInstructionPredicates(const MCInst &MI,
- uint64_t AvailableFeatures) const;
+ FeatureBitset computeAvailableFeatures(const FeatureBitset &FB) const;
+ void
+ verifyInstructionPredicates(const MCInst &MI,
+ const FeatureBitset &AvailableFeatures) const;
};
} // end anonymous namespace
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
index 729486b1020c..0a529321edc8 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
@@ -1,9 +1,8 @@
//===-- AArch64MCExpr.cpp - AArch64 specific MC expression classes --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -80,8 +79,7 @@ StringRef AArch64MCExpr::getVariantKindName() const {
}
void AArch64MCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const {
- if (getKind() != VK_NONE)
- OS << getVariantKindName();
+ OS << getVariantKindName();
Expr->print(OS, MAI);
}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
index b6bf254d3835..ec9c95911628 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
@@ -1,9 +1,8 @@
//=--- AArch64MCExpr.h - AArch64 specific MC expression classes ---*- C++ -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -23,8 +22,6 @@ namespace llvm {
class AArch64MCExpr : public MCTargetExpr {
public:
enum VariantKind {
- VK_NONE = 0x000,
-
// Symbol locations specifying (roughly speaking) what calculation should be
// performed to construct the final address for the relocated
// symbol. E.g. direct, via the GOT, ...
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
index 0f8198ba4e9b..df12274d9470 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
@@ -1,9 +1,8 @@
//===-- AArch64MCTargetDesc.cpp - AArch64 Target Descriptions ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -15,8 +14,10 @@
#include "AArch64ELFStreamer.h"
#include "AArch64MCAsmInfo.h"
#include "AArch64WinCOFFStreamer.h"
-#include "InstPrinter/AArch64InstPrinter.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
+#include "MCTargetDesc/AArch64InstPrinter.h"
+#include "TargetInfo/AArch64TargetInfo.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCInstrAnalysis.h"
@@ -56,11 +57,177 @@ createAArch64MCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
}
void AArch64_MC::initLLVMToCVRegMapping(MCRegisterInfo *MRI) {
- for (unsigned Reg = AArch64::NoRegister + 1;
- Reg < AArch64::NUM_TARGET_REGS; ++Reg) {
- unsigned CV = MRI->getEncodingValue(Reg);
- MRI->mapLLVMRegToCVReg(Reg, CV);
- }
+ // Mapping from CodeView to MC register id.
+ static const struct {
+ codeview::RegisterId CVReg;
+ MCPhysReg Reg;
+ } RegMap[] = {
+ {codeview::RegisterId::ARM64_W0, AArch64::W0},
+ {codeview::RegisterId::ARM64_W1, AArch64::W1},
+ {codeview::RegisterId::ARM64_W2, AArch64::W2},
+ {codeview::RegisterId::ARM64_W3, AArch64::W3},
+ {codeview::RegisterId::ARM64_W4, AArch64::W4},
+ {codeview::RegisterId::ARM64_W5, AArch64::W5},
+ {codeview::RegisterId::ARM64_W6, AArch64::W6},
+ {codeview::RegisterId::ARM64_W7, AArch64::W7},
+ {codeview::RegisterId::ARM64_W8, AArch64::W8},
+ {codeview::RegisterId::ARM64_W9, AArch64::W9},
+ {codeview::RegisterId::ARM64_W10, AArch64::W10},
+ {codeview::RegisterId::ARM64_W11, AArch64::W11},
+ {codeview::RegisterId::ARM64_W12, AArch64::W12},
+ {codeview::RegisterId::ARM64_W13, AArch64::W13},
+ {codeview::RegisterId::ARM64_W14, AArch64::W14},
+ {codeview::RegisterId::ARM64_W15, AArch64::W15},
+ {codeview::RegisterId::ARM64_W16, AArch64::W16},
+ {codeview::RegisterId::ARM64_W17, AArch64::W17},
+ {codeview::RegisterId::ARM64_W18, AArch64::W18},
+ {codeview::RegisterId::ARM64_W19, AArch64::W19},
+ {codeview::RegisterId::ARM64_W20, AArch64::W20},
+ {codeview::RegisterId::ARM64_W21, AArch64::W21},
+ {codeview::RegisterId::ARM64_W22, AArch64::W22},
+ {codeview::RegisterId::ARM64_W23, AArch64::W23},
+ {codeview::RegisterId::ARM64_W24, AArch64::W24},
+ {codeview::RegisterId::ARM64_W25, AArch64::W25},
+ {codeview::RegisterId::ARM64_W26, AArch64::W26},
+ {codeview::RegisterId::ARM64_W27, AArch64::W27},
+ {codeview::RegisterId::ARM64_W28, AArch64::W28},
+ {codeview::RegisterId::ARM64_W29, AArch64::W29},
+ {codeview::RegisterId::ARM64_W30, AArch64::W30},
+ {codeview::RegisterId::ARM64_WZR, AArch64::WZR},
+ {codeview::RegisterId::ARM64_X0, AArch64::X0},
+ {codeview::RegisterId::ARM64_X1, AArch64::X1},
+ {codeview::RegisterId::ARM64_X2, AArch64::X2},
+ {codeview::RegisterId::ARM64_X3, AArch64::X3},
+ {codeview::RegisterId::ARM64_X4, AArch64::X4},
+ {codeview::RegisterId::ARM64_X5, AArch64::X5},
+ {codeview::RegisterId::ARM64_X6, AArch64::X6},
+ {codeview::RegisterId::ARM64_X7, AArch64::X7},
+ {codeview::RegisterId::ARM64_X8, AArch64::X8},
+ {codeview::RegisterId::ARM64_X9, AArch64::X9},
+ {codeview::RegisterId::ARM64_X10, AArch64::X10},
+ {codeview::RegisterId::ARM64_X11, AArch64::X11},
+ {codeview::RegisterId::ARM64_X12, AArch64::X12},
+ {codeview::RegisterId::ARM64_X13, AArch64::X13},
+ {codeview::RegisterId::ARM64_X14, AArch64::X14},
+ {codeview::RegisterId::ARM64_X15, AArch64::X15},
+ {codeview::RegisterId::ARM64_X16, AArch64::X16},
+ {codeview::RegisterId::ARM64_X17, AArch64::X17},
+ {codeview::RegisterId::ARM64_X18, AArch64::X18},
+ {codeview::RegisterId::ARM64_X19, AArch64::X19},
+ {codeview::RegisterId::ARM64_X20, AArch64::X20},
+ {codeview::RegisterId::ARM64_X21, AArch64::X21},
+ {codeview::RegisterId::ARM64_X22, AArch64::X22},
+ {codeview::RegisterId::ARM64_X23, AArch64::X23},
+ {codeview::RegisterId::ARM64_X24, AArch64::X24},
+ {codeview::RegisterId::ARM64_X25, AArch64::X25},
+ {codeview::RegisterId::ARM64_X26, AArch64::X26},
+ {codeview::RegisterId::ARM64_X27, AArch64::X27},
+ {codeview::RegisterId::ARM64_X28, AArch64::X28},
+ {codeview::RegisterId::ARM64_FP, AArch64::FP},
+ {codeview::RegisterId::ARM64_LR, AArch64::LR},
+ {codeview::RegisterId::ARM64_SP, AArch64::SP},
+ {codeview::RegisterId::ARM64_ZR, AArch64::XZR},
+ {codeview::RegisterId::ARM64_NZCV, AArch64::NZCV},
+ {codeview::RegisterId::ARM64_S0, AArch64::S0},
+ {codeview::RegisterId::ARM64_S1, AArch64::S1},
+ {codeview::RegisterId::ARM64_S2, AArch64::S2},
+ {codeview::RegisterId::ARM64_S3, AArch64::S3},
+ {codeview::RegisterId::ARM64_S4, AArch64::S4},
+ {codeview::RegisterId::ARM64_S5, AArch64::S5},
+ {codeview::RegisterId::ARM64_S6, AArch64::S6},
+ {codeview::RegisterId::ARM64_S7, AArch64::S7},
+ {codeview::RegisterId::ARM64_S8, AArch64::S8},
+ {codeview::RegisterId::ARM64_S9, AArch64::S9},
+ {codeview::RegisterId::ARM64_S10, AArch64::S10},
+ {codeview::RegisterId::ARM64_S11, AArch64::S11},
+ {codeview::RegisterId::ARM64_S12, AArch64::S12},
+ {codeview::RegisterId::ARM64_S13, AArch64::S13},
+ {codeview::RegisterId::ARM64_S14, AArch64::S14},
+ {codeview::RegisterId::ARM64_S15, AArch64::S15},
+ {codeview::RegisterId::ARM64_S16, AArch64::S16},
+ {codeview::RegisterId::ARM64_S17, AArch64::S17},
+ {codeview::RegisterId::ARM64_S18, AArch64::S18},
+ {codeview::RegisterId::ARM64_S19, AArch64::S19},
+ {codeview::RegisterId::ARM64_S20, AArch64::S20},
+ {codeview::RegisterId::ARM64_S21, AArch64::S21},
+ {codeview::RegisterId::ARM64_S22, AArch64::S22},
+ {codeview::RegisterId::ARM64_S23, AArch64::S23},
+ {codeview::RegisterId::ARM64_S24, AArch64::S24},
+ {codeview::RegisterId::ARM64_S25, AArch64::S25},
+ {codeview::RegisterId::ARM64_S26, AArch64::S26},
+ {codeview::RegisterId::ARM64_S27, AArch64::S27},
+ {codeview::RegisterId::ARM64_S28, AArch64::S28},
+ {codeview::RegisterId::ARM64_S29, AArch64::S29},
+ {codeview::RegisterId::ARM64_S30, AArch64::S30},
+ {codeview::RegisterId::ARM64_S31, AArch64::S31},
+ {codeview::RegisterId::ARM64_D0, AArch64::D0},
+ {codeview::RegisterId::ARM64_D1, AArch64::D1},
+ {codeview::RegisterId::ARM64_D2, AArch64::D2},
+ {codeview::RegisterId::ARM64_D3, AArch64::D3},
+ {codeview::RegisterId::ARM64_D4, AArch64::D4},
+ {codeview::RegisterId::ARM64_D5, AArch64::D5},
+ {codeview::RegisterId::ARM64_D6, AArch64::D6},
+ {codeview::RegisterId::ARM64_D7, AArch64::D7},
+ {codeview::RegisterId::ARM64_D8, AArch64::D8},
+ {codeview::RegisterId::ARM64_D9, AArch64::D9},
+ {codeview::RegisterId::ARM64_D10, AArch64::D10},
+ {codeview::RegisterId::ARM64_D11, AArch64::D11},
+ {codeview::RegisterId::ARM64_D12, AArch64::D12},
+ {codeview::RegisterId::ARM64_D13, AArch64::D13},
+ {codeview::RegisterId::ARM64_D14, AArch64::D14},
+ {codeview::RegisterId::ARM64_D15, AArch64::D15},
+ {codeview::RegisterId::ARM64_D16, AArch64::D16},
+ {codeview::RegisterId::ARM64_D17, AArch64::D17},
+ {codeview::RegisterId::ARM64_D18, AArch64::D18},
+ {codeview::RegisterId::ARM64_D19, AArch64::D19},
+ {codeview::RegisterId::ARM64_D20, AArch64::D20},
+ {codeview::RegisterId::ARM64_D21, AArch64::D21},
+ {codeview::RegisterId::ARM64_D22, AArch64::D22},
+ {codeview::RegisterId::ARM64_D23, AArch64::D23},
+ {codeview::RegisterId::ARM64_D24, AArch64::D24},
+ {codeview::RegisterId::ARM64_D25, AArch64::D25},
+ {codeview::RegisterId::ARM64_D26, AArch64::D26},
+ {codeview::RegisterId::ARM64_D27, AArch64::D27},
+ {codeview::RegisterId::ARM64_D28, AArch64::D28},
+ {codeview::RegisterId::ARM64_D29, AArch64::D29},
+ {codeview::RegisterId::ARM64_D30, AArch64::D30},
+ {codeview::RegisterId::ARM64_D31, AArch64::D31},
+ {codeview::RegisterId::ARM64_Q0, AArch64::Q0},
+ {codeview::RegisterId::ARM64_Q1, AArch64::Q1},
+ {codeview::RegisterId::ARM64_Q2, AArch64::Q2},
+ {codeview::RegisterId::ARM64_Q3, AArch64::Q3},
+ {codeview::RegisterId::ARM64_Q4, AArch64::Q4},
+ {codeview::RegisterId::ARM64_Q5, AArch64::Q5},
+ {codeview::RegisterId::ARM64_Q6, AArch64::Q6},
+ {codeview::RegisterId::ARM64_Q7, AArch64::Q7},
+ {codeview::RegisterId::ARM64_Q8, AArch64::Q8},
+ {codeview::RegisterId::ARM64_Q9, AArch64::Q9},
+ {codeview::RegisterId::ARM64_Q10, AArch64::Q10},
+ {codeview::RegisterId::ARM64_Q11, AArch64::Q11},
+ {codeview::RegisterId::ARM64_Q12, AArch64::Q12},
+ {codeview::RegisterId::ARM64_Q13, AArch64::Q13},
+ {codeview::RegisterId::ARM64_Q14, AArch64::Q14},
+ {codeview::RegisterId::ARM64_Q15, AArch64::Q15},
+ {codeview::RegisterId::ARM64_Q16, AArch64::Q16},
+ {codeview::RegisterId::ARM64_Q17, AArch64::Q17},
+ {codeview::RegisterId::ARM64_Q18, AArch64::Q18},
+ {codeview::RegisterId::ARM64_Q19, AArch64::Q19},
+ {codeview::RegisterId::ARM64_Q20, AArch64::Q20},
+ {codeview::RegisterId::ARM64_Q21, AArch64::Q21},
+ {codeview::RegisterId::ARM64_Q22, AArch64::Q22},
+ {codeview::RegisterId::ARM64_Q23, AArch64::Q23},
+ {codeview::RegisterId::ARM64_Q24, AArch64::Q24},
+ {codeview::RegisterId::ARM64_Q25, AArch64::Q25},
+ {codeview::RegisterId::ARM64_Q26, AArch64::Q26},
+ {codeview::RegisterId::ARM64_Q27, AArch64::Q27},
+ {codeview::RegisterId::ARM64_Q28, AArch64::Q28},
+ {codeview::RegisterId::ARM64_Q29, AArch64::Q29},
+ {codeview::RegisterId::ARM64_Q30, AArch64::Q30},
+ {codeview::RegisterId::ARM64_Q31, AArch64::Q31},
+
+ };
+ for (unsigned I = 0; I < array_lengthof(RegMap); ++I)
+ MRI->mapLLVMRegToCVReg(RegMap[I].Reg, static_cast<int>(RegMap[I].CVReg));
}
static MCRegisterInfo *createAArch64MCRegisterInfo(const Triple &Triple) {
@@ -166,12 +333,20 @@ public:
for (uint64_t Byte = 0, End = PltContents.size(); Byte + 7 < End;
Byte += 4) {
uint32_t Insn = support::endian::read32le(PltContents.data() + Byte);
+ uint64_t Off = 0;
+ // Check for optional bti c that prefixes adrp in BTI enabled entries
+ if (Insn == 0xd503245f) {
+ Off = 4;
+ Insn = support::endian::read32le(PltContents.data() + Byte + Off);
+ }
// Check for adrp.
if ((Insn & 0x9f000000) != 0x90000000)
continue;
+ Off += 4;
uint64_t Imm = (((PltSectionVA + Byte) >> 12) << 12) +
(((Insn >> 29) & 3) << 12) + (((Insn >> 5) & 0x3ffff) << 14);
- uint32_t Insn2 = support::endian::read32le(PltContents.data() + Byte + 4);
+ uint32_t Insn2 =
+ support::endian::read32le(PltContents.data() + Byte + Off);
// Check for: ldr Xt, [Xn, #pimm].
if (Insn2 >> 22 == 0x3e5) {
Imm += ((Insn2 >> 10) & 0xfff) << 3;
@@ -192,7 +367,8 @@ static MCInstrAnalysis *createAArch64InstrAnalysis(const MCInstrInfo *Info) {
// Force static initialization.
extern "C" void LLVMInitializeAArch64TargetMC() {
for (Target *T : {&getTheAArch64leTarget(), &getTheAArch64beTarget(),
- &getTheARM64Target()}) {
+ &getTheAArch64_32Target(), &getTheARM64Target(),
+ &getTheARM64_32Target()}) {
// Register the MC asm info.
RegisterMCAsmInfoFn X(*T, createAArch64MCAsmInfo);
@@ -228,7 +404,8 @@ extern "C" void LLVMInitializeAArch64TargetMC() {
}
// Register the asm backend.
- for (Target *T : {&getTheAArch64leTarget(), &getTheARM64Target()})
+ for (Target *T : {&getTheAArch64leTarget(), &getTheAArch64_32Target(),
+ &getTheARM64Target(), &getTheARM64_32Target()})
TargetRegistry::RegisterMCAsmBackend(*T, createAArch64leAsmBackend);
TargetRegistry::RegisterMCAsmBackend(getTheAArch64beTarget(),
createAArch64beAsmBackend);
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
index 0f22f69bd5b0..c84c313c1db0 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
@@ -1,9 +1,8 @@
//===-- AArch64MCTargetDesc.h - AArch64 Target Descriptions -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -37,10 +36,6 @@ class Triple;
class raw_ostream;
class raw_pwrite_stream;
-Target &getTheAArch64leTarget();
-Target &getTheAArch64beTarget();
-Target &getTheARM64Target();
-
MCCodeEmitter *createAArch64MCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
MCContext &Ctx);
@@ -57,7 +52,8 @@ std::unique_ptr<MCObjectTargetWriter>
createAArch64ELFObjectWriter(uint8_t OSABI, bool IsILP32);
std::unique_ptr<MCObjectTargetWriter>
-createAArch64MachObjectWriter(uint32_t CPUType, uint32_t CPUSubtype);
+createAArch64MachObjectWriter(uint32_t CPUType, uint32_t CPUSubtype,
+ bool IsILP32);
std::unique_ptr<MCObjectTargetWriter> createAArch64WinCOFFObjectWriter();
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
index 1021cdeeb3be..b3ce5ef22eef 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
@@ -1,9 +1,8 @@
//===-- AArch64MachObjectWriter.cpp - ARM Mach Object Writer --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -38,8 +37,8 @@ class AArch64MachObjectWriter : public MCMachObjectTargetWriter {
unsigned &Log2Size, const MCAssembler &Asm);
public:
- AArch64MachObjectWriter(uint32_t CPUType, uint32_t CPUSubtype)
- : MCMachObjectTargetWriter(true /* is64Bit */, CPUType, CPUSubtype) {}
+ AArch64MachObjectWriter(uint32_t CPUType, uint32_t CPUSubtype, bool IsILP32)
+ : MCMachObjectTargetWriter(!IsILP32 /* is64Bit */, CPUType, CPUSubtype) {}
void recordRelocation(MachObjectWriter *Writer, MCAssembler &Asm,
const MCAsmLayout &Layout, const MCFragment *Fragment,
@@ -405,6 +404,8 @@ void AArch64MachObjectWriter::recordRelocation(
}
std::unique_ptr<MCObjectTargetWriter>
-llvm::createAArch64MachObjectWriter(uint32_t CPUType, uint32_t CPUSubtype) {
- return llvm::make_unique<AArch64MachObjectWriter>(CPUType, CPUSubtype);
+llvm::createAArch64MachObjectWriter(uint32_t CPUType, uint32_t CPUSubtype,
+ bool IsILP32) {
+ return llvm::make_unique<AArch64MachObjectWriter>(CPUType, CPUSubtype,
+ IsILP32);
}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
index a6b8d963bef9..f70752f5303f 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
@@ -1,9 +1,8 @@
//===- AArch64TargetStreamer.cpp - AArch64TargetStreamer class ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -13,6 +12,7 @@
#include "AArch64TargetStreamer.h"
#include "llvm/MC/ConstantPools.h"
+#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCSubtargetInfo.h"
using namespace llvm;
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h b/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h
index 73fb9baea3e3..3a0c5d8318dd 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h
@@ -1,9 +1,8 @@
//===-- AArch64TargetStreamer.h - AArch64 Target Streamer ------*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp
index 7ea7d5f2a20e..a45880a07427 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp
@@ -1,9 +1,8 @@
//= AArch64WinCOFFObjectWriter.cpp - AArch64 Windows COFF Object Writer C++ =//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===---------------------------------------------------------------------===//
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp
index b828ab832e9d..37c6fbb03908 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp
@@ -1,9 +1,8 @@
//===-- AArch64WinCOFFStreamer.cpp - ARM Target WinCOFF Streamer ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.h b/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.h
index ed265a876ab3..8c0656652eed 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.h
@@ -1,9 +1,8 @@
//===-- AArch64WinCOFFStreamer.h - WinCOFF Streamer for AArch64 -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AArch64/SVEInstrFormats.td b/lib/Target/AArch64/SVEInstrFormats.td
index 23a65b345bad..808e59467081 100644
--- a/lib/Target/AArch64/SVEInstrFormats.td
+++ b/lib/Target/AArch64/SVEInstrFormats.td
@@ -1,9 +1,8 @@
//=-- SVEInstrFormats.td - AArch64 SVE Instruction classes -*- tablegen -*--=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -701,8 +700,8 @@ multiclass sve_int_perm_dup_i<string asm> {
(!cast<Instruction>(NAME # _Q) ZPR128:$Zd, FPR128asZPR:$Qn, 0), 2>;
}
-class sve_int_perm_tbl<bits<2> sz8_64, string asm, ZPRRegOp zprty,
- RegisterOperand VecList>
+class sve_int_perm_tbl<bits<2> sz8_64, bits<2> opc, string asm,
+ ZPRRegOp zprty, RegisterOperand VecList>
: I<(outs zprty:$Zd), (ins VecList:$Zn, zprty:$Zm),
asm, "\t$Zd, $Zn, $Zm",
"",
@@ -714,16 +713,18 @@ class sve_int_perm_tbl<bits<2> sz8_64, string asm, ZPRRegOp zprty,
let Inst{23-22} = sz8_64;
let Inst{21} = 0b1;
let Inst{20-16} = Zm;
- let Inst{15-10} = 0b001100;
+ let Inst{15-13} = 0b001;
+ let Inst{12-11} = opc;
+ let Inst{10} = 0b0;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
}
multiclass sve_int_perm_tbl<string asm> {
- def _B : sve_int_perm_tbl<0b00, asm, ZPR8, Z_b>;
- def _H : sve_int_perm_tbl<0b01, asm, ZPR16, Z_h>;
- def _S : sve_int_perm_tbl<0b10, asm, ZPR32, Z_s>;
- def _D : sve_int_perm_tbl<0b11, asm, ZPR64, Z_d>;
+ def _B : sve_int_perm_tbl<0b00, 0b10, asm, ZPR8, Z_b>;
+ def _H : sve_int_perm_tbl<0b01, 0b10, asm, ZPR16, Z_h>;
+ def _S : sve_int_perm_tbl<0b10, 0b10, asm, ZPR32, Z_s>;
+ def _D : sve_int_perm_tbl<0b11, 0b10, asm, ZPR64, Z_d>;
def : InstAlias<asm # "\t$Zd, $Zn, $Zm",
(!cast<Instruction>(NAME # _B) ZPR8:$Zd, ZPR8:$Zn, ZPR8:$Zm), 0>;
@@ -735,6 +736,37 @@ multiclass sve_int_perm_tbl<string asm> {
(!cast<Instruction>(NAME # _D) ZPR64:$Zd, ZPR64:$Zn, ZPR64:$Zm), 0>;
}
+multiclass sve2_int_perm_tbl<string asm> {
+ def _B : sve_int_perm_tbl<0b00, 0b01, asm, ZPR8, ZZ_b>;
+ def _H : sve_int_perm_tbl<0b01, 0b01, asm, ZPR16, ZZ_h>;
+ def _S : sve_int_perm_tbl<0b10, 0b01, asm, ZPR32, ZZ_s>;
+ def _D : sve_int_perm_tbl<0b11, 0b01, asm, ZPR64, ZZ_d>;
+}
+
+class sve2_int_perm_tbx<bits<2> sz8_64, string asm, ZPRRegOp zprty>
+: I<(outs zprty:$Zd), (ins zprty:$Zn, zprty:$Zm),
+ asm, "\t$Zd, $Zn, $Zm",
+ "",
+ []>, Sched<[]> {
+ bits<5> Zd;
+ bits<5> Zm;
+ bits<5> Zn;
+ let Inst{31-24} = 0b00000101;
+ let Inst{23-22} = sz8_64;
+ let Inst{21} = 0b1;
+ let Inst{20-16} = Zm;
+ let Inst{15-10} = 0b001011;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zd;
+}
+
+multiclass sve2_int_perm_tbx<string asm> {
+ def _B : sve2_int_perm_tbx<0b00, asm, ZPR8>;
+ def _H : sve2_int_perm_tbx<0b01, asm, ZPR16>;
+ def _S : sve2_int_perm_tbx<0b10, asm, ZPR32>;
+ def _D : sve2_int_perm_tbx<0b11, asm, ZPR64>;
+}
+
class sve_int_perm_reverse_z<bits<2> sz8_64, string asm, ZPRRegOp zprty>
: I<(outs zprty:$Zd), (ins zprty:$Zn),
asm, "\t$Zd, $Zn",
@@ -875,6 +907,21 @@ class sve_int_perm_extract_i<string asm>
let ElementSize = ElementSizeNone;
}
+class sve2_int_perm_extract_i_cons<string asm>
+: I<(outs ZPR8:$Zd), (ins ZZ_b:$Zn, imm0_255:$imm8),
+ asm, "\t$Zd, $Zn, $imm8",
+ "", []>, Sched<[]> {
+ bits<5> Zd;
+ bits<5> Zn;
+ bits<8> imm8;
+ let Inst{31-21} = 0b00000101011;
+ let Inst{20-16} = imm8{7-3};
+ let Inst{15-13} = 0b000;
+ let Inst{12-10} = imm8{2-0};
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zd;
+}
+
//===----------------------------------------------------------------------===//
// SVE Vector Select Group
//===----------------------------------------------------------------------===//
@@ -1437,6 +1484,132 @@ multiclass sve_fp_fcadd<string asm> {
}
//===----------------------------------------------------------------------===//
+// SVE2 Floating Point Convert Group
+//===----------------------------------------------------------------------===//
+
+class sve2_fp_convert_precision<bits<4> opc, string asm,
+ ZPRRegOp zprty1, ZPRRegOp zprty2>
+: I<(outs zprty1:$Zd), (ins PPR3bAny:$Pg, zprty2:$Zn),
+ asm, "\t$Zd, $Pg/m, $Zn",
+ "",
+ []>, Sched<[]> {
+ bits<5> Zd;
+ bits<5> Zn;
+ bits<3> Pg;
+ let Inst{31-24} = 0b01100100;
+ let Inst{23-22} = opc{3-2};
+ let Inst{21-18} = 0b0010;
+ let Inst{17-16} = opc{1-0};
+ let Inst{15-13} = 0b101;
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zd;
+}
+
+multiclass sve2_fp_convert_down_narrow<string asm> {
+ def _StoH : sve2_fp_convert_precision<0b1000, asm, ZPR16, ZPR32>;
+ def _DtoS : sve2_fp_convert_precision<0b1110, asm, ZPR32, ZPR64>;
+}
+
+multiclass sve2_fp_convert_up_long<string asm> {
+ def _HtoS : sve2_fp_convert_precision<0b1001, asm, ZPR32, ZPR16>;
+ def _StoD : sve2_fp_convert_precision<0b1111, asm, ZPR64, ZPR32>;
+}
+
+multiclass sve2_fp_convert_down_odd_rounding<string asm> {
+ def _DtoS : sve2_fp_convert_precision<0b0010, asm, ZPR32, ZPR64>;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE2 Floating Point Pairwise Group
+//===----------------------------------------------------------------------===//
+
+class sve2_fp_pairwise_pred<bits<2> sz, bits<3> opc, string asm,
+ ZPRRegOp zprty>
+: I<(outs zprty:$Zdn), (ins PPR3bAny:$Pg, zprty:$_Zdn, zprty:$Zm),
+ asm, "\t$Zdn, $Pg/m, $_Zdn, $Zm",
+ "",
+ []>, Sched<[]> {
+ bits<3> Pg;
+ bits<5> Zm;
+ bits<5> Zdn;
+ let Inst{31-24} = 0b01100100;
+ let Inst{23-22} = sz;
+ let Inst{21-19} = 0b010;
+ let Inst{18-16} = opc;
+ let Inst{15-13} = 0b100;
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Zm;
+ let Inst{4-0} = Zdn;
+
+ let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty.ElementSize;
+}
+
+multiclass sve2_fp_pairwise_pred<bits<3> opc, string asm> {
+ def _H : sve2_fp_pairwise_pred<0b01, opc, asm, ZPR16>;
+ def _S : sve2_fp_pairwise_pred<0b10, opc, asm, ZPR32>;
+ def _D : sve2_fp_pairwise_pred<0b11, opc, asm, ZPR64>;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE2 Floating Point Widening Multiply-Add - Indexed Group
+//===----------------------------------------------------------------------===//
+
+class sve2_fp_mla_long_by_indexed_elem<bits<2> opc, string asm>
+: I<(outs ZPR32:$Zda), (ins ZPR32:$_Zda, ZPR16:$Zn, ZPR3b16:$Zm,
+ VectorIndexH:$iop),
+ asm, "\t$Zda, $Zn, $Zm$iop",
+ "",
+ []>, Sched<[]> {
+ bits<5> Zda;
+ bits<5> Zn;
+ bits<3> Zm;
+ bits<3> iop;
+ let Inst{31-21} = 0b01100100101;
+ let Inst{20-19} = iop{2-1};
+ let Inst{18-16} = Zm;
+ let Inst{15-14} = 0b01;
+ let Inst{13} = opc{1};
+ let Inst{12} = 0b0;
+ let Inst{11} = iop{0};
+ let Inst{10} = opc{0};
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zda;
+
+ let Constraints = "$Zda = $_Zda";
+ let DestructiveInstType = Destructive;
+ let ElementSize = ElementSizeNone;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE2 Floating Point Widening Multiply-Add Group
+//===----------------------------------------------------------------------===//
+
+class sve2_fp_mla_long<bits<2> opc, string asm>
+: I<(outs ZPR32:$Zda), (ins ZPR32:$_Zda, ZPR16:$Zn, ZPR16:$Zm),
+ asm, "\t$Zda, $Zn, $Zm",
+ "",
+ []>, Sched<[]> {
+ bits<5> Zda;
+ bits<5> Zn;
+ bits<5> Zm;
+ let Inst{31-21} = 0b01100100101;
+ let Inst{20-16} = Zm;
+ let Inst{15-14} = 0b10;
+ let Inst{13} = opc{1};
+ let Inst{12-11} = 0b00;
+ let Inst{10} = opc{0};
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zda;
+
+ let Constraints = "$Zda = $_Zda";
+ let DestructiveInstType = Destructive;
+ let ElementSize = ElementSizeNone;
+}
+
+//===----------------------------------------------------------------------===//
// SVE Stack Allocation Group
//===----------------------------------------------------------------------===//
@@ -1536,6 +1709,12 @@ multiclass sve_fp_2op_p_zd_HSD<bits<5> opc, string asm> {
def _D : sve_fp_2op_p_zd<{ 0b11, opc }, asm, ZPR64, ZPR64, ElementSizeD>;
}
+multiclass sve2_fp_flogb<string asm> {
+ def _H : sve_fp_2op_p_zd<0b0011010, asm, ZPR16, ZPR16, ElementSizeH>;
+ def _S : sve_fp_2op_p_zd<0b0011100, asm, ZPR32, ZPR32, ElementSizeS>;
+ def _D : sve_fp_2op_p_zd<0b0011110, asm, ZPR64, ZPR64, ElementSizeD>;
+}
+
//===----------------------------------------------------------------------===//
// SVE Floating Point Unary Operations - Unpredicated Group
//===----------------------------------------------------------------------===//
@@ -1692,6 +1871,112 @@ multiclass sve_int_mlas_vvv_pred<bits<1> opc, string asm> {
}
//===----------------------------------------------------------------------===//
+// SVE2 Integer Multiply-Add - Unpredicated Group
+//===----------------------------------------------------------------------===//
+
+class sve2_int_mla<bits<2> sz, bits<5> opc, string asm,
+ ZPRRegOp zprty1, ZPRRegOp zprty2>
+: I<(outs zprty1:$Zda), (ins zprty1:$_Zda, zprty2:$Zn, zprty2:$Zm),
+ asm, "\t$Zda, $Zn, $Zm", "", []>, Sched<[]> {
+ bits<5> Zda;
+ bits<5> Zn;
+ bits<5> Zm;
+ let Inst{31-24} = 0b01000100;
+ let Inst{23-22} = sz;
+ let Inst{21} = 0b0;
+ let Inst{20-16} = Zm;
+ let Inst{15} = 0b0;
+ let Inst{14-10} = opc;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zda;
+
+ let Constraints = "$Zda = $_Zda";
+ let DestructiveInstType = Destructive;
+ let ElementSize = ElementSizeNone;
+}
+
+multiclass sve2_int_mla<bit S, string asm> {
+ def _B : sve2_int_mla<0b00, { 0b1110, S }, asm, ZPR8, ZPR8>;
+ def _H : sve2_int_mla<0b01, { 0b1110, S }, asm, ZPR16, ZPR16>;
+ def _S : sve2_int_mla<0b10, { 0b1110, S }, asm, ZPR32, ZPR32>;
+ def _D : sve2_int_mla<0b11, { 0b1110, S }, asm, ZPR64, ZPR64>;
+}
+
+multiclass sve2_int_mla_long<bits<5> opc, string asm> {
+ def _H : sve2_int_mla<0b01, opc, asm, ZPR16, ZPR8>;
+ def _S : sve2_int_mla<0b10, opc, asm, ZPR32, ZPR16>;
+ def _D : sve2_int_mla<0b11, opc, asm, ZPR64, ZPR32>;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE2 Integer Multiply-Add - Indexed Group
+//===----------------------------------------------------------------------===//
+
+class sve2_int_mla_by_indexed_elem<bits<2> sz, bits<6> opc, string asm,
+ ZPRRegOp zprty1, ZPRRegOp zprty2,
+ ZPRRegOp zprty3, Operand itype>
+: I<(outs zprty1:$Zda), (ins zprty1:$_Zda, zprty2:$Zn, zprty3:$Zm, itype:$iop),
+ asm, "\t$Zda, $Zn, $Zm$iop", "", []>, Sched<[]> {
+ bits<5> Zda;
+ bits<5> Zn;
+ let Inst{31-24} = 0b01000100;
+ let Inst{23-22} = sz;
+ let Inst{21} = 0b1;
+ let Inst{15-10} = opc;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zda;
+
+ let Constraints = "$Zda = $_Zda";
+ let DestructiveInstType = Destructive;
+ let ElementSize = ElementSizeNone;
+}
+
+multiclass sve2_int_mla_by_indexed_elem<bits<2> opc, bit S, string asm> {
+ def _H : sve2_int_mla_by_indexed_elem<{0, ?}, { 0b000, opc, S }, asm, ZPR16, ZPR16, ZPR3b16, VectorIndexH> {
+ bits<3> Zm;
+ bits<3> iop;
+ let Inst{22} = iop{2};
+ let Inst{20-19} = iop{1-0};
+ let Inst{18-16} = Zm;
+ }
+ def _S : sve2_int_mla_by_indexed_elem<0b10, { 0b000, opc, S }, asm, ZPR32, ZPR32, ZPR3b32, VectorIndexS> {
+ bits<3> Zm;
+ bits<2> iop;
+ let Inst{20-19} = iop;
+ let Inst{18-16} = Zm;
+ }
+ def _D : sve2_int_mla_by_indexed_elem<0b11, { 0b000, opc, S }, asm, ZPR64, ZPR64, ZPR4b64, VectorIndexD> {
+ bits<4> Zm;
+ bit iop;
+ let Inst{20} = iop;
+ let Inst{19-16} = Zm;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// SVE2 Integer Multiply-Add Long - Indexed Group
+//===----------------------------------------------------------------------===//
+
+multiclass sve2_int_mla_long_by_indexed_elem<bits<4> opc, string asm> {
+ def _S : sve2_int_mla_by_indexed_elem<0b10, { opc{3}, 0b0, opc{2-1}, ?, opc{0} },
+ asm, ZPR32, ZPR16, ZPR3b16, VectorIndexH> {
+ bits<3> Zm;
+ bits<3> iop;
+ let Inst{20-19} = iop{2-1};
+ let Inst{18-16} = Zm;
+ let Inst{11} = iop{0};
+ }
+ def _D : sve2_int_mla_by_indexed_elem<0b11, { opc{3}, 0b0, opc{2-1}, ?, opc{0} },
+ asm, ZPR64, ZPR32, ZPR4b32, VectorIndexS> {
+ bits<4> Zm;
+ bits<2> iop;
+ let Inst{20} = iop{1};
+ let Inst{19-16} = Zm;
+ let Inst{11} = iop{0};
+ }
+}
+
+//===----------------------------------------------------------------------===//
// SVE Integer Dot Product Group
//===----------------------------------------------------------------------===//
@@ -1762,6 +2047,645 @@ multiclass sve_intx_dot_by_indexed_elem<bit opc, string asm> {
}
//===----------------------------------------------------------------------===//
+// SVE2 Complex Integer Dot Product Group
+//===----------------------------------------------------------------------===//
+
+class sve2_complex_int_arith<bits<2> sz, bits<4> opc, string asm,
+ ZPRRegOp zprty1, ZPRRegOp zprty2>
+: I<(outs zprty1:$Zda), (ins zprty1:$_Zda, zprty2:$Zn, zprty2:$Zm,
+ complexrotateop:$rot),
+ asm, "\t$Zda, $Zn, $Zm, $rot", "", []>, Sched<[]> {
+ bits<5> Zda;
+ bits<5> Zn;
+ bits<5> Zm;
+ bits<2> rot;
+ let Inst{31-24} = 0b01000100;
+ let Inst{23-22} = sz;
+ let Inst{21} = 0b0;
+ let Inst{20-16} = Zm;
+ let Inst{15-12} = opc;
+ let Inst{11-10} = rot;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zda;
+
+ let Constraints = "$Zda = $_Zda";
+ let DestructiveInstType = Destructive;
+ let ElementSize = ElementSizeNone;
+}
+
+multiclass sve2_cintx_dot<string asm> {
+ def _S : sve2_complex_int_arith<0b10, 0b0001, asm, ZPR32, ZPR8>;
+ def _D : sve2_complex_int_arith<0b11, 0b0001, asm, ZPR64, ZPR16>;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE2 Complex Multiply-Add Group
+//===----------------------------------------------------------------------===//
+
+multiclass sve2_int_cmla<bit opc, string asm> {
+ def _B : sve2_complex_int_arith<0b00, { 0b001, opc }, asm, ZPR8, ZPR8>;
+ def _H : sve2_complex_int_arith<0b01, { 0b001, opc }, asm, ZPR16, ZPR16>;
+ def _S : sve2_complex_int_arith<0b10, { 0b001, opc }, asm, ZPR32, ZPR32>;
+ def _D : sve2_complex_int_arith<0b11, { 0b001, opc }, asm, ZPR64, ZPR64>;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE2 Complex Integer Dot Product - Indexed Group
+//===----------------------------------------------------------------------===//
+
+class sve2_complex_int_arith_indexed<bits<2> sz, bits<4> opc, string asm,
+ ZPRRegOp zprty1, ZPRRegOp zprty2,
+ ZPRRegOp zprty3, Operand itype>
+: I<(outs zprty1:$Zda), (ins zprty1:$_Zda, zprty2:$Zn, zprty3:$Zm, itype:$iop,
+ complexrotateop:$rot),
+ asm, "\t$Zda, $Zn, $Zm$iop, $rot", "", []>, Sched<[]> {
+ bits<5> Zda;
+ bits<5> Zn;
+ bits<2> rot;
+ let Inst{31-24} = 0b01000100;
+ let Inst{23-22} = sz;
+ let Inst{21} = 0b1;
+ let Inst{15-12} = opc;
+ let Inst{11-10} = rot;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zda;
+
+ let Constraints = "$Zda = $_Zda";
+ let DestructiveInstType = Destructive;
+ let ElementSize = ElementSizeNone;
+}
+
+multiclass sve2_cintx_dot_by_indexed_elem<string asm> {
+ def _S : sve2_complex_int_arith_indexed<0b10, 0b0100, asm, ZPR32, ZPR8, ZPR3b8, VectorIndexS> {
+ bits<2> iop;
+ bits<3> Zm;
+ let Inst{20-19} = iop;
+ let Inst{18-16} = Zm;
+ }
+ def _D : sve2_complex_int_arith_indexed<0b11, 0b0100, asm, ZPR64, ZPR16, ZPR4b16, VectorIndexD> {
+ bit iop;
+ bits<4> Zm;
+ let Inst{20} = iop;
+ let Inst{19-16} = Zm;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// SVE2 Complex Multiply-Add - Indexed Group
+//===----------------------------------------------------------------------===//
+
+multiclass sve2_cmla_by_indexed_elem<bit opc, string asm> {
+ def _H : sve2_complex_int_arith_indexed<0b10, { 0b011, opc }, asm, ZPR16, ZPR16, ZPR3b16, VectorIndexS> {
+ bits<2> iop;
+ bits<3> Zm;
+ let Inst{20-19} = iop;
+ let Inst{18-16} = Zm;
+ }
+ def _S : sve2_complex_int_arith_indexed<0b11, { 0b011, opc }, asm, ZPR32, ZPR32, ZPR4b32, VectorIndexD> {
+ bit iop;
+ bits<4> Zm;
+ let Inst{20} = iop;
+ let Inst{19-16} = Zm;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// SVE2 Integer Multiply - Unpredicated Group
+//===----------------------------------------------------------------------===//
+
+class sve2_int_mul<bits<2> sz, bits<3> opc, string asm, ZPRRegOp zprty>
+: I<(outs zprty:$Zd), (ins zprty:$Zn, zprty:$Zm),
+ asm, "\t$Zd, $Zn, $Zm", "", []>, Sched<[]> {
+ bits<5> Zd;
+ bits<5> Zm;
+ bits<5> Zn;
+ let Inst{31-24} = 0b00000100;
+ let Inst{23-22} = sz;
+ let Inst{21} = 0b1;
+ let Inst{20-16} = Zm;
+ let Inst{15-13} = 0b011;
+ let Inst{12-10} = opc;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zd;
+}
+
+multiclass sve2_int_mul<bits<3> opc, string asm> {
+ def _B : sve2_int_mul<0b00, opc, asm, ZPR8>;
+ def _H : sve2_int_mul<0b01, opc, asm, ZPR16>;
+ def _S : sve2_int_mul<0b10, opc, asm, ZPR32>;
+ def _D : sve2_int_mul<0b11, opc, asm, ZPR64>;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE2 Integer Multiply - Indexed Group
+//===----------------------------------------------------------------------===//
+
+class sve2_int_mul_by_indexed_elem<bits<2> sz, bits<4> opc, string asm,
+ ZPRRegOp zprty1, ZPRRegOp zprty2,
+ ZPRRegOp zprty3, Operand itype>
+: I<(outs zprty1:$Zd), (ins zprty2:$Zn, zprty3:$Zm, itype:$iop),
+ asm, "\t$Zd, $Zn, $Zm$iop", "", []>, Sched<[]> {
+ bits<5> Zd;
+ bits<5> Zn;
+ let Inst{31-24} = 0b01000100;
+ let Inst{23-22} = sz;
+ let Inst{21} = 0b1;
+ let Inst{15-14} = 0b11;
+ let Inst{13-10} = opc;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zd;
+}
+
+multiclass sve2_int_mul_by_indexed_elem<bits<4> opc, string asm> {
+ def _H : sve2_int_mul_by_indexed_elem<{0, ?}, opc, asm, ZPR16, ZPR16, ZPR3b16, VectorIndexH> {
+ bits<3> Zm;
+ bits<3> iop;
+ let Inst{22} = iop{2};
+ let Inst{20-19} = iop{1-0};
+ let Inst{18-16} = Zm;
+ }
+ def _S : sve2_int_mul_by_indexed_elem<0b10, opc, asm, ZPR32, ZPR32, ZPR3b32, VectorIndexS> {
+ bits<3> Zm;
+ bits<2> iop;
+ let Inst{20-19} = iop;
+ let Inst{18-16} = Zm;
+ }
+ def _D : sve2_int_mul_by_indexed_elem<0b11, opc, asm, ZPR64, ZPR64, ZPR4b64, VectorIndexD> {
+ bits<4> Zm;
+ bit iop;
+ let Inst{20} = iop;
+ let Inst{19-16} = Zm;
+ }
+}
+
+multiclass sve2_int_mul_long_by_indexed_elem<bits<3> opc, string asm> {
+ def _S : sve2_int_mul_by_indexed_elem<0b10, { opc{2-1}, ?, opc{0} }, asm,
+ ZPR32, ZPR16, ZPR3b16, VectorIndexH> {
+ bits<3> Zm;
+ bits<3> iop;
+ let Inst{20-19} = iop{2-1};
+ let Inst{18-16} = Zm;
+ let Inst{11} = iop{0};
+ }
+ def _D : sve2_int_mul_by_indexed_elem<0b11, { opc{2-1}, ?, opc{0} }, asm,
+ ZPR64, ZPR32, ZPR4b32, VectorIndexS> {
+ bits<4> Zm;
+ bits<2> iop;
+ let Inst{20} = iop{1};
+ let Inst{19-16} = Zm;
+ let Inst{11} = iop{0};
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// SVE2 Integer - Predicated Group
+//===----------------------------------------------------------------------===//
+
+class sve2_int_arith_pred<bits<2> sz, bits<6> opc, string asm,
+ ZPRRegOp zprty>
+: I<(outs zprty:$Zdn), (ins PPR3bAny:$Pg, zprty:$_Zdn, zprty:$Zm),
+ asm, "\t$Zdn, $Pg/m, $_Zdn, $Zm", "", []>, Sched<[]> {
+ bits<3> Pg;
+ bits<5> Zm;
+ bits<5> Zdn;
+ let Inst{31-24} = 0b01000100;
+ let Inst{23-22} = sz;
+ let Inst{21} = 0b0;
+ let Inst{20-16} = opc{5-1};
+ let Inst{15-14} = 0b10;
+ let Inst{13} = opc{0};
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Zm;
+ let Inst{4-0} = Zdn;
+
+ let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty.ElementSize;
+}
+
+multiclass sve2_int_arith_pred<bits<6> opc, string asm> {
+ def _B : sve2_int_arith_pred<0b00, opc, asm, ZPR8>;
+ def _H : sve2_int_arith_pred<0b01, opc, asm, ZPR16>;
+ def _S : sve2_int_arith_pred<0b10, opc, asm, ZPR32>;
+ def _D : sve2_int_arith_pred<0b11, opc, asm, ZPR64>;
+}
+
+class sve2_int_sadd_long_accum_pairwise<bits<2> sz, bit U, string asm,
+ ZPRRegOp zprty1, ZPRRegOp zprty2>
+: I<(outs zprty1:$Zda), (ins PPR3bAny:$Pg, zprty1:$_Zda, zprty2:$Zn),
+ asm, "\t$Zda, $Pg/m, $Zn", "", []>, Sched<[]> {
+ bits<3> Pg;
+ bits<5> Zn;
+ bits<5> Zda;
+ let Inst{31-24} = 0b01000100;
+ let Inst{23-22} = sz;
+ let Inst{21-17} = 0b00010;
+ let Inst{16} = U;
+ let Inst{15-13} = 0b101;
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zda;
+
+ let Constraints = "$Zda = $_Zda";
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty1.ElementSize;
+}
+
+multiclass sve2_int_sadd_long_accum_pairwise<bit U, string asm> {
+ def _H : sve2_int_sadd_long_accum_pairwise<0b01, U, asm, ZPR16, ZPR8>;
+ def _S : sve2_int_sadd_long_accum_pairwise<0b10, U, asm, ZPR32, ZPR16>;
+ def _D : sve2_int_sadd_long_accum_pairwise<0b11, U, asm, ZPR64, ZPR32>;
+}
+
+class sve2_int_un_pred_arit<bits<2> sz, bit Q, bits<2> opc,
+ string asm, ZPRRegOp zprty>
+: I<(outs zprty:$Zd), (ins zprty:$_Zd, PPR3bAny:$Pg, zprty:$Zn),
+ asm, "\t$Zd, $Pg/m, $Zn",
+ "",
+ []>, Sched<[]> {
+ bits<3> Pg;
+ bits<5> Zd;
+ bits<5> Zn;
+ let Inst{31-24} = 0b01000100;
+ let Inst{23-22} = sz;
+ let Inst{21-20} = 0b00;
+ let Inst{19} = Q;
+ let Inst{18} = 0b0;
+ let Inst{17-16} = opc;
+ let Inst{15-13} = 0b101;
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zd;
+
+ let Constraints = "$Zd = $_Zd";
+ let DestructiveInstType = Destructive;
+ let ElementSize = zprty.ElementSize;
+}
+
+multiclass sve2_int_un_pred_arit_s<bits<3> opc, string asm> {
+ def _S : sve2_int_un_pred_arit<0b10, opc{2}, opc{1-0}, asm, ZPR32>;
+}
+
+multiclass sve2_int_un_pred_arit<bits<3> opc, string asm> {
+ def _B : sve2_int_un_pred_arit<0b00, opc{2}, opc{1-0}, asm, ZPR8>;
+ def _H : sve2_int_un_pred_arit<0b01, opc{2}, opc{1-0}, asm, ZPR16>;
+ def _S : sve2_int_un_pred_arit<0b10, opc{2}, opc{1-0}, asm, ZPR32>;
+ def _D : sve2_int_un_pred_arit<0b11, opc{2}, opc{1-0}, asm, ZPR64>;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE2 Widening Integer Arithmetic Group
+//===----------------------------------------------------------------------===//
+
+class sve2_wide_int_arith<bits<2> sz, bits<5> opc, string asm,
+ ZPRRegOp zprty1, ZPRRegOp zprty2, ZPRRegOp zprty3>
+: I<(outs zprty1:$Zd), (ins zprty2:$Zn, zprty3:$Zm),
+ asm, "\t$Zd, $Zn, $Zm", "", []>, Sched<[]> {
+ bits<5> Zd;
+ bits<5> Zn;
+ bits<5> Zm;
+ let Inst{31-24} = 0b01000101;
+ let Inst{23-22} = sz;
+ let Inst{21} = 0b0;
+ let Inst{20-16} = Zm;
+ let Inst{15} = 0b0;
+ let Inst{14-10} = opc;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zd;
+}
+
+multiclass sve2_wide_int_arith_long<bits<5> opc, string asm> {
+ def _H : sve2_wide_int_arith<0b01, opc, asm, ZPR16, ZPR8, ZPR8>;
+ def _S : sve2_wide_int_arith<0b10, opc, asm, ZPR32, ZPR16, ZPR16>;
+ def _D : sve2_wide_int_arith<0b11, opc, asm, ZPR64, ZPR32, ZPR32>;
+}
+
+multiclass sve2_wide_int_arith_wide<bits<3> opc, string asm> {
+ def _H : sve2_wide_int_arith<0b01, { 0b10, opc }, asm, ZPR16, ZPR16, ZPR8>;
+ def _S : sve2_wide_int_arith<0b10, { 0b10, opc }, asm, ZPR32, ZPR32, ZPR16>;
+ def _D : sve2_wide_int_arith<0b11, { 0b10, opc }, asm, ZPR64, ZPR64, ZPR32>;
+}
+
+multiclass sve2_pmul_long<bits<1> opc, string asm> {
+ def _H : sve2_wide_int_arith<0b01, {0b1101, opc}, asm, ZPR16, ZPR8, ZPR8>;
+ def _D : sve2_wide_int_arith<0b11, {0b1101, opc}, asm, ZPR64, ZPR32, ZPR32>;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE2 Misc Group
+//===----------------------------------------------------------------------===//
+
+class sve2_misc<bits<2> sz, bits<4> opc, string asm,
+ ZPRRegOp zprty1, ZPRRegOp zprty2>
+: I<(outs zprty1:$Zd), (ins zprty2:$Zn, zprty2:$Zm),
+ asm, "\t$Zd, $Zn, $Zm", "", []>, Sched<[]> {
+ bits<5> Zd;
+ bits<5> Zn;
+ bits<5> Zm;
+ let Inst{31-24} = 0b01000101;
+ let Inst{23-22} = sz;
+ let Inst{21} = 0b0;
+ let Inst{20-16} = Zm;
+ let Inst{15-14} = 0b10;
+ let Inst{13-10} = opc;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zd;
+}
+
+multiclass sve2_misc_bitwise<bits<4> opc, string asm> {
+ def _B : sve2_misc<0b00, opc, asm, ZPR8, ZPR8>;
+ def _H : sve2_misc<0b01, opc, asm, ZPR16, ZPR16>;
+ def _S : sve2_misc<0b10, opc, asm, ZPR32, ZPR32>;
+ def _D : sve2_misc<0b11, opc, asm, ZPR64, ZPR64>;
+}
+
+multiclass sve2_bitwise_xor_interleaved<bit opc, string asm> {
+ let DestructiveInstType = Destructive, ElementSize = ElementSizeNone in {
+ def _B : sve2_misc<0b00, { 0b010, opc }, asm, ZPR8, ZPR8>;
+ def _H : sve2_misc<0b01, { 0b010, opc }, asm, ZPR16, ZPR16>;
+ def _S : sve2_misc<0b10, { 0b010, opc }, asm, ZPR32, ZPR32>;
+ def _D : sve2_misc<0b11, { 0b010, opc }, asm, ZPR64, ZPR64>;
+ }
+}
+
+multiclass sve2_misc_int_addsub_long_interleaved<bits<2> opc, string asm> {
+ def _H : sve2_misc<0b01, { 0b00, opc }, asm, ZPR16, ZPR8>;
+ def _S : sve2_misc<0b10, { 0b00, opc }, asm, ZPR32, ZPR16>;
+ def _D : sve2_misc<0b11, { 0b00, opc }, asm, ZPR64, ZPR32>;
+}
+
+class sve2_bitwise_shift_left_long<bits<3> tsz8_64, bits<2> opc, string asm,
+ ZPRRegOp zprty1, ZPRRegOp zprty2,
+ Operand immtype>
+: I<(outs zprty1:$Zd), (ins zprty2:$Zn, immtype:$imm),
+ asm, "\t$Zd, $Zn, $imm",
+ "", []>, Sched<[]> {
+ bits<5> Zd;
+ bits<5> Zn;
+ bits<5> imm;
+ let Inst{31-23} = 0b010001010;
+ let Inst{22} = tsz8_64{2};
+ let Inst{21} = 0b0;
+ let Inst{20-19} = tsz8_64{1-0};
+ let Inst{18-16} = imm{2-0}; // imm3
+ let Inst{15-12} = 0b1010;
+ let Inst{11-10} = opc;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zd;
+}
+
+multiclass sve2_bitwise_shift_left_long<bits<2> opc, string asm> {
+ def _H : sve2_bitwise_shift_left_long<{0,0,1}, opc, asm,
+ ZPR16, ZPR8, vecshiftL8>;
+ def _S : sve2_bitwise_shift_left_long<{0,1,?}, opc, asm,
+ ZPR32, ZPR16, vecshiftL16> {
+ let Inst{19} = imm{3};
+ }
+ def _D : sve2_bitwise_shift_left_long<{1,?,?}, opc, asm,
+ ZPR64, ZPR32, vecshiftL32> {
+ let Inst{20-19} = imm{4-3};
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// SVE2 Accumulate Group
+//===----------------------------------------------------------------------===//
+
+class sve2_int_bin_cons_shift_imm<bits<4> tsz8_64, bit opc, string asm,
+ ZPRRegOp zprty, Operand immtype>
+: I<(outs zprty:$Zd), (ins zprty:$Zn, immtype:$imm),
+ asm, "\t$Zd, $Zn, $imm",
+ "", []>, Sched<[]> {
+ bits<5> Zd;
+ bits<5> Zn;
+ bits<6> imm;
+ let Inst{31-24} = 0b01000101;
+ let Inst{23-22} = tsz8_64{3-2};
+ let Inst{21} = 0b0;
+ let Inst{20-19} = tsz8_64{1-0};
+ let Inst{18-16} = imm{2-0}; // imm3
+ let Inst{15-11} = 0b11110;
+ let Inst{10} = opc;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zd;
+}
+
+multiclass sve2_int_bin_cons_shift_imm_left<bit opc, string asm> {
+ def _B : sve2_int_bin_cons_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8>;
+ def _H : sve2_int_bin_cons_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16> {
+ let Inst{19} = imm{3};
+ }
+ def _S : sve2_int_bin_cons_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftL32> {
+ let Inst{20-19} = imm{4-3};
+ }
+ def _D : sve2_int_bin_cons_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftL64> {
+ let Inst{22} = imm{5};
+ let Inst{20-19} = imm{4-3};
+ }
+}
+
+multiclass sve2_int_bin_cons_shift_imm_right<bit opc, string asm> {
+ def _B : sve2_int_bin_cons_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>;
+ def _H : sve2_int_bin_cons_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> {
+ let Inst{19} = imm{3};
+ }
+ def _S : sve2_int_bin_cons_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32> {
+ let Inst{20-19} = imm{4-3};
+ }
+ def _D : sve2_int_bin_cons_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64> {
+ let Inst{22} = imm{5};
+ let Inst{20-19} = imm{4-3};
+ }
+}
+
+class sve2_int_bin_accum_cons_shift_imm<bits<4> tsz8_64, bits<2> opc, string asm,
+ ZPRRegOp zprty, Operand immtype>
+: I<(outs zprty:$Zda), (ins zprty:$_Zda, zprty:$Zn, immtype:$imm),
+ asm, "\t$Zda, $Zn, $imm",
+ "", []>, Sched<[]> {
+ bits<5> Zda;
+ bits<5> Zn;
+ bits<6> imm;
+ let Inst{31-24} = 0b01000101;
+ let Inst{23-22} = tsz8_64{3-2};
+ let Inst{21} = 0b0;
+ let Inst{20-19} = tsz8_64{1-0};
+ let Inst{18-16} = imm{2-0}; // imm3
+ let Inst{15-12} = 0b1110;
+ let Inst{11-10} = opc;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zda;
+
+ let Constraints = "$Zda = $_Zda";
+ let DestructiveInstType = Destructive;
+ let ElementSize = ElementSizeNone;
+}
+
+multiclass sve2_int_bin_accum_cons_shift_imm_right<bits<2> opc, string asm> {
+ def _B : sve2_int_bin_accum_cons_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>;
+ def _H : sve2_int_bin_accum_cons_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> {
+ let Inst{19} = imm{3};
+ }
+ def _S : sve2_int_bin_accum_cons_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32> {
+ let Inst{20-19} = imm{4-3};
+ }
+ def _D : sve2_int_bin_accum_cons_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64> {
+ let Inst{22} = imm{5};
+ let Inst{20-19} = imm{4-3};
+ }
+}
+
+class sve2_int_cadd<bits<2> sz, bit opc, string asm, ZPRRegOp zprty>
+: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, zprty:$Zm, complexrotateopodd:$rot),
+ asm, "\t$Zdn, $_Zdn, $Zm, $rot", "", []>, Sched<[]> {
+ bits<5> Zdn;
+ bits<5> Zm;
+ bit rot;
+ let Inst{31-24} = 0b01000101;
+ let Inst{23-22} = sz;
+ let Inst{21-17} = 0b00000;
+ let Inst{16} = opc;
+ let Inst{15-11} = 0b11011;
+ let Inst{10} = rot;
+ let Inst{9-5} = Zm;
+ let Inst{4-0} = Zdn;
+
+ let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = ElementSizeNone;
+}
+
+multiclass sve2_int_cadd<bit opc, string asm> {
+ def _B : sve2_int_cadd<0b00, opc, asm, ZPR8>;
+ def _H : sve2_int_cadd<0b01, opc, asm, ZPR16>;
+ def _S : sve2_int_cadd<0b10, opc, asm, ZPR32>;
+ def _D : sve2_int_cadd<0b11, opc, asm, ZPR64>;
+}
+
+class sve2_int_absdiff_accum<bits<2> sz, bits<4> opc, string asm,
+ ZPRRegOp zprty1, ZPRRegOp zprty2>
+: I<(outs zprty1:$Zda), (ins zprty1:$_Zda, zprty2:$Zn, zprty2:$Zm),
+ asm, "\t$Zda, $Zn, $Zm", "", []>, Sched<[]> {
+ bits<5> Zda;
+ bits<5> Zn;
+ bits<5> Zm;
+ let Inst{31-24} = 0b01000101;
+ let Inst{23-22} = sz;
+ let Inst{21} = 0b0;
+ let Inst{20-16} = Zm;
+ let Inst{15-14} = 0b11;
+ let Inst{13-10} = opc;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zda;
+
+ let Constraints = "$Zda = $_Zda";
+ let DestructiveInstType = Destructive;
+ let ElementSize = ElementSizeNone;
+}
+
+multiclass sve2_int_absdiff_accum<bit opc, string asm> {
+ def _B : sve2_int_absdiff_accum<0b00, { 0b111, opc }, asm, ZPR8, ZPR8>;
+ def _H : sve2_int_absdiff_accum<0b01, { 0b111, opc }, asm, ZPR16, ZPR16>;
+ def _S : sve2_int_absdiff_accum<0b10, { 0b111, opc }, asm, ZPR32, ZPR32>;
+ def _D : sve2_int_absdiff_accum<0b11, { 0b111, opc }, asm, ZPR64, ZPR64>;
+}
+
+multiclass sve2_int_absdiff_accum_long<bits<2> opc, string asm> {
+ def _H : sve2_int_absdiff_accum<0b01, { 0b00, opc }, asm, ZPR16, ZPR8>;
+ def _S : sve2_int_absdiff_accum<0b10, { 0b00, opc }, asm, ZPR32, ZPR16>;
+ def _D : sve2_int_absdiff_accum<0b11, { 0b00, opc }, asm, ZPR64, ZPR32>;
+}
+
+multiclass sve2_int_addsub_long_carry<bits<2> opc, string asm> {
+ def _S : sve2_int_absdiff_accum<{ opc{1}, 0b0 }, { 0b010, opc{0} }, asm,
+ ZPR32, ZPR32>;
+ def _D : sve2_int_absdiff_accum<{ opc{1}, 0b1 }, { 0b010, opc{0} }, asm,
+ ZPR64, ZPR64>;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE2 Narrowing Group
+//===----------------------------------------------------------------------===//
+
+class sve2_int_bin_cons_shift_imm_narrow<bits<3> tsz8_64, bits<4> opc,
+ string asm, ZPRRegOp zprty1,
+ ZPRRegOp zprty2, Operand immtype>
+: I<(outs zprty1:$Zd), (ins zprty2:$Zn, immtype:$imm),
+ asm, "\t$Zd, $Zn, $imm",
+ "", []>, Sched<[]> {
+ bits<5> Zd;
+ bits<5> Zn;
+ bits<5> imm;
+ let Inst{31-23} = 0b010001010;
+ let Inst{22} = tsz8_64{2};
+ let Inst{21} = 0b1;
+ let Inst{20-19} = tsz8_64{1-0};
+ let Inst{18-16} = imm{2-0}; // imm3
+ let Inst{15-14} = 0b00;
+ let Inst{13-10} = opc;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zd;
+}
+
+multiclass sve2_int_bin_cons_shift_imm_right_narrow<bits<4> opc, string asm> {
+ def _B : sve2_int_bin_cons_shift_imm_narrow<{0,0,1}, opc, asm, ZPR8, ZPR16,
+ vecshiftR8>;
+ def _H : sve2_int_bin_cons_shift_imm_narrow<{0,1,?}, opc, asm, ZPR16, ZPR32,
+ vecshiftR16> {
+ let Inst{19} = imm{3};
+ }
+ def _S : sve2_int_bin_cons_shift_imm_narrow<{1,?,?}, opc, asm, ZPR32, ZPR64,
+ vecshiftR32> {
+ let Inst{20-19} = imm{4-3};
+ }
+}
+
+class sve2_int_addsub_narrow_high<bits<2> sz, bits<3> opc, string asm,
+ ZPRRegOp zprty1, ZPRRegOp zprty2>
+: I<(outs zprty1:$Zd), (ins zprty2:$Zn, zprty2:$Zm),
+ asm, "\t$Zd, $Zn, $Zm", "", []>, Sched<[]> {
+ bits<5> Zd;
+ bits<5> Zn;
+ bits<5> Zm;
+ let Inst{31-24} = 0b01000101;
+ let Inst{23-22} = sz;
+ let Inst{21} = 0b1;
+ let Inst{20-16} = Zm;
+ let Inst{15-13} = 0b011;
+ let Inst{12-10} = opc; // S, R, T
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zd;
+}
+
+multiclass sve2_int_addsub_narrow_high<bits<3> opc, string asm> {
+ def _B : sve2_int_addsub_narrow_high<0b01, opc, asm, ZPR8, ZPR16>;
+ def _H : sve2_int_addsub_narrow_high<0b10, opc, asm, ZPR16, ZPR32>;
+ def _S : sve2_int_addsub_narrow_high<0b11, opc, asm, ZPR32, ZPR64>;
+}
+
+class sve2_int_sat_extract_narrow<bits<3> tsz8_64, bits<3> opc, string asm,
+ ZPRRegOp zprty1, ZPRRegOp zprty2>
+: I<(outs zprty1:$Zd), (ins zprty2:$Zn),
+ asm, "\t$Zd, $Zn", "", []>, Sched<[]> {
+ bits<5> Zd;
+ bits<5> Zn;
+ let Inst{31-23} = 0b010001010;
+ let Inst{22} = tsz8_64{2};
+ let Inst{21} = 0b1;
+ let Inst{20-19} = tsz8_64{1-0};
+ let Inst{18-13} = 0b000010;
+ let Inst{12-10} = opc;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zd;
+}
+
+multiclass sve2_int_sat_extract_narrow<bits<3> opc, string asm> {
+ def _B : sve2_int_sat_extract_narrow<0b001, opc, asm, ZPR8, ZPR16>;
+ def _H : sve2_int_sat_extract_narrow<0b010, opc, asm, ZPR16, ZPR32>;
+ def _S : sve2_int_sat_extract_narrow<0b100, opc, asm, ZPR32, ZPR64>;
+}
+
+//===----------------------------------------------------------------------===//
// SVE Integer Arithmetic - Unary Predicated Group
//===----------------------------------------------------------------------===//
@@ -1983,6 +2907,86 @@ class sve_int_bin_cons_log<bits<2> opc, string asm>
let Inst{4-0} = Zd;
}
+multiclass sve_int_bin_cons_log<bits<2> opc, string asm> {
+ def NAME : sve_int_bin_cons_log<opc, asm>;
+
+ def : InstAlias<asm # "\t$Zd, $Zn, $Zm",
+ (!cast<Instruction>(NAME) ZPR8:$Zd, ZPR8:$Zn, ZPR8:$Zm), 1>;
+ def : InstAlias<asm # "\t$Zd, $Zn, $Zm",
+ (!cast<Instruction>(NAME) ZPR16:$Zd, ZPR16:$Zn, ZPR16:$Zm), 1>;
+ def : InstAlias<asm # "\t$Zd, $Zn, $Zm",
+ (!cast<Instruction>(NAME) ZPR32:$Zd, ZPR32:$Zn, ZPR32:$Zm), 1>;
+}
+
+class sve2_int_bitwise_ternary_op_d<bits<3> opc, string asm>
+: I<(outs ZPR64:$Zdn), (ins ZPR64:$_Zdn, ZPR64:$Zm, ZPR64:$Zk),
+ asm, "\t$Zdn, $_Zdn, $Zm, $Zk",
+ "",
+ []>, Sched<[]> {
+ bits<5> Zdn;
+ bits<5> Zk;
+ bits<5> Zm;
+ let Inst{31-24} = 0b00000100;
+ let Inst{23-22} = opc{2-1};
+ let Inst{21} = 0b1;
+ let Inst{20-16} = Zm;
+ let Inst{15-11} = 0b00111;
+ let Inst{10} = opc{0};
+ let Inst{9-5} = Zk;
+ let Inst{4-0} = Zdn;
+
+ let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = ElementSizeNone;
+}
+
+multiclass sve2_int_bitwise_ternary_op<bits<3> opc, string asm> {
+ def NAME : sve2_int_bitwise_ternary_op_d<opc, asm>;
+
+ def : InstAlias<asm # "\t$Zdn, $Zdn, $Zm, $Zk",
+ (!cast<Instruction>(NAME) ZPR8:$Zdn, ZPR8:$Zm, ZPR8:$Zk), 1>;
+ def : InstAlias<asm # "\t$Zdn, $Zdn, $Zm, $Zk",
+ (!cast<Instruction>(NAME) ZPR16:$Zdn, ZPR16:$Zm, ZPR16:$Zk), 1>;
+ def : InstAlias<asm # "\t$Zdn, $Zdn, $Zm, $Zk",
+ (!cast<Instruction>(NAME) ZPR32:$Zdn, ZPR32:$Zm, ZPR32:$Zk), 1>;
+}
+
+class sve2_int_rotate_right_imm<bits<4> tsz8_64, string asm,
+ ZPRRegOp zprty, Operand immtype>
+: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, zprty:$Zm, immtype:$imm),
+ asm, "\t$Zdn, $_Zdn, $Zm, $imm",
+ "",
+ []>, Sched<[]> {
+ bits<5> Zdn;
+ bits<5> Zm;
+ bits<6> imm;
+ let Inst{31-24} = 0b00000100;
+ let Inst{23-22} = tsz8_64{3-2};
+ let Inst{21} = 0b1;
+ let Inst{20-19} = tsz8_64{1-0};
+ let Inst{18-16} = imm{2-0}; // imm3
+ let Inst{15-10} = 0b001101;
+ let Inst{9-5} = Zm;
+ let Inst{4-0} = Zdn;
+
+ let Constraints = "$Zdn = $_Zdn";
+ let DestructiveInstType = Destructive;
+ let ElementSize = ElementSizeNone;
+}
+
+multiclass sve2_int_rotate_right_imm<string asm> {
+ def _B : sve2_int_rotate_right_imm<{0,0,0,1}, asm, ZPR8, vecshiftR8>;
+ def _H : sve2_int_rotate_right_imm<{0,0,1,?}, asm, ZPR16, vecshiftR16> {
+ let Inst{19} = imm{3};
+ }
+ def _S : sve2_int_rotate_right_imm<{0,1,?,?}, asm, ZPR32, vecshiftR32> {
+ let Inst{20-19} = imm{4-3};
+ }
+ def _D : sve2_int_rotate_right_imm<{1,?,?,?}, asm, ZPR64, vecshiftR64> {
+ let Inst{22} = imm{5};
+ let Inst{20-19} = imm{4-3};
+ }
+}
//===----------------------------------------------------------------------===//
// SVE Integer Wide Immediate - Predicated Group
@@ -2266,6 +3270,32 @@ multiclass sve_int_while8_rr<bits<3> opc, string asm> {
def _D : sve_int_while_rr<0b11, { 1, opc }, asm, GPR64, PPR64>;
}
+class sve2_int_while_rr<bits<2> sz8_64, bits<1> rw, string asm,
+ PPRRegOp pprty>
+: I<(outs pprty:$Pd), (ins GPR64:$Rn, GPR64:$Rm),
+ asm, "\t$Pd, $Rn, $Rm",
+ "", []>, Sched<[]> {
+ bits<4> Pd;
+ bits<5> Rm;
+ bits<5> Rn;
+ let Inst{31-24} = 0b00100101;
+ let Inst{23-22} = sz8_64;
+ let Inst{21} = 0b1;
+ let Inst{20-16} = Rm;
+ let Inst{15-10} = 0b001100;
+ let Inst{9-5} = Rn;
+ let Inst{4} = rw;
+ let Inst{3-0} = Pd;
+
+ let Defs = [NZCV];
+}
+
+multiclass sve2_int_while_rr<bits<1> rw, string asm> {
+ def _B : sve2_int_while_rr<0b00, rw, asm, PPR8>;
+ def _H : sve2_int_while_rr<0b01, rw, asm, PPR16>;
+ def _S : sve2_int_while_rr<0b10, rw, asm, PPR32>;
+ def _D : sve2_int_while_rr<0b11, rw, asm, PPR64>;
+}
//===----------------------------------------------------------------------===//
// SVE Floating Point Fast Reduction Group
@@ -2497,9 +3527,9 @@ multiclass sve_int_index_rr<string asm> {
//===----------------------------------------------------------------------===//
// SVE Bitwise Shift - Predicated Group
//===----------------------------------------------------------------------===//
-class sve_int_bin_pred_shift_imm<bits<4> tsz8_64, bits<3> opc, string asm,
- ZPRRegOp zprty, Operand immtype,
- ElementSizeEnum size>
+class sve_int_bin_pred_shift_imm<bits<4> tsz8_64, bits<4> opc, string asm,
+ ZPRRegOp zprty, Operand immtype,
+ ElementSizeEnum size>
: I<(outs zprty:$Zdn), (ins PPR3bAny:$Pg, zprty:$_Zdn, immtype:$imm),
asm, "\t$Zdn, $Pg/m, $_Zdn, $imm",
"",
@@ -2509,8 +3539,8 @@ class sve_int_bin_pred_shift_imm<bits<4> tsz8_64, bits<3> opc, string asm,
bits<6> imm;
let Inst{31-24} = 0b00000100;
let Inst{23-22} = tsz8_64{3-2};
- let Inst{21-19} = 0b000;
- let Inst{18-16} = opc;
+ let Inst{21-20} = 0b00;
+ let Inst{19-16} = opc;
let Inst{15-13} = 0b100;
let Inst{12-10} = Pg;
let Inst{9-8} = tsz8_64{1-0};
@@ -2522,7 +3552,7 @@ class sve_int_bin_pred_shift_imm<bits<4> tsz8_64, bits<3> opc, string asm,
let ElementSize = size;
}
-multiclass sve_int_bin_pred_shift_imm_left<bits<3> opc, string asm> {
+multiclass sve_int_bin_pred_shift_imm_left<bits<4> opc, string asm> {
def _B : sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8,
ElementSizeB>;
def _H : sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16,
@@ -2540,7 +3570,7 @@ multiclass sve_int_bin_pred_shift_imm_left<bits<3> opc, string asm> {
}
}
-multiclass sve_int_bin_pred_shift_imm_right<bits<3> opc, string asm> {
+multiclass sve_int_bin_pred_shift_imm_right<bits<4> opc, string asm> {
def _B : sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8,
ElementSizeB>;
def _H : sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16,
@@ -2856,6 +3886,43 @@ multiclass sve_mem_cstnt_ss<bits<2> msz, string asm, RegisterOperand listty,
(!cast<Instruction>(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), 0>;
}
+class sve2_mem_cstnt_vs_base<bits<3> opc, dag iops, string asm,
+ RegisterOperand VecList>
+: I<(outs VecList:$Zt), iops,
+ asm, "\t$Zt, $Pg, [$Zn, $Rm]",
+ "",
+ []>, Sched<[]> {
+ bits<3> Pg;
+ bits<5> Rm;
+ bits<5> Zn;
+ bits<5> Zt;
+ let Inst{31-25} = 0b1110010;
+ let Inst{24-22} = opc;
+ let Inst{21} = 0b0;
+ let Inst{20-16} = Rm;
+ let Inst{15-13} = 0b001;
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zt;
+
+ let mayStore = 1;
+}
+
+multiclass sve2_mem_cstnt_vs<bits<3> opc, string asm,
+ RegisterOperand listty, ZPRRegOp zprty> {
+ def _REAL : sve2_mem_cstnt_vs_base<opc, (ins PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm),
+ asm, listty>;
+
+ def : InstAlias<asm # "\t$Zt, $Pg, [$Zn, $Rm]",
+ (!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm), 0>;
+ def : InstAlias<asm # "\t$Zt, $Pg, [$Zn]",
+ (!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, XZR), 0>;
+ def : InstAlias<asm # "\t$Zt, $Pg, [$Zn, $Rm]",
+ (!cast<Instruction>(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm), 0>;
+ def : InstAlias<asm # "\t$Zt, $Pg, [$Zn]",
+ (!cast<Instruction>(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, XZR), 1>;
+}
+
class sve_mem_sst_sv<bits<3> opc, bit xs, bit scaled, string asm,
RegisterOperand VecList, RegisterOperand zprext>
: I<(outs), (ins VecList:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, zprext:$Zm),
@@ -3304,6 +4371,30 @@ multiclass sve_int_perm_splice<string asm> {
def _D : sve_int_perm_splice<0b11, asm, ZPR64>;
}
+class sve2_int_perm_splice_cons<bits<2> sz8_64, string asm,
+ ZPRRegOp zprty, RegisterOperand VecList>
+: I<(outs zprty:$Zd), (ins PPR3bAny:$Pg, VecList:$Zn),
+ asm, "\t$Zd, $Pg, $Zn",
+ "",
+ []>, Sched<[]> {
+ bits<3> Pg;
+ bits<5> Zn;
+ bits<5> Zd;
+ let Inst{31-24} = 0b00000101;
+ let Inst{23-22} = sz8_64;
+ let Inst{21-13} = 0b101101100;
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zd;
+}
+
+multiclass sve2_int_perm_splice_cons<string asm> {
+ def _B : sve2_int_perm_splice_cons<0b00, asm, ZPR8, ZZ_b>;
+ def _H : sve2_int_perm_splice_cons<0b01, asm, ZPR16, ZZ_h>;
+ def _S : sve2_int_perm_splice_cons<0b10, asm, ZPR32, ZZ_s>;
+ def _D : sve2_int_perm_splice_cons<0b11, asm, ZPR64, ZZ_d>;
+}
+
class sve_int_perm_rev<bits<2> sz8_64, bits<2> opc, string asm,
ZPRRegOp zprty>
: I<(outs zprty:$Zd), (ins zprty:$_Zd, PPR3bAny:$Pg, zprty:$Zn),
@@ -4003,6 +5094,46 @@ multiclass sve_mem_p_fill<string asm> {
(!cast<Instruction>(NAME) PPRAny:$Pt, GPR64sp:$Rn, 0), 1>;
}
+class sve2_mem_cldnt_vs_base<bits<5> opc, dag iops, string asm,
+ RegisterOperand VecList>
+: I<(outs VecList:$Zt), iops,
+ asm, "\t$Zt, $Pg/z, [$Zn, $Rm]",
+ "",
+ []>, Sched<[]> {
+ bits<3> Pg;
+ bits<5> Rm;
+ bits<5> Zn;
+ bits<5> Zt;
+ let Inst{31} = 0b1;
+ let Inst{30} = opc{4};
+ let Inst{29-25} = 0b00010;
+ let Inst{24-23} = opc{3-2};
+ let Inst{22-21} = 0b00;
+ let Inst{20-16} = Rm;
+ let Inst{15} = 0b1;
+ let Inst{14-13} = opc{1-0};
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zt;
+
+ let mayLoad = 1;
+}
+
+multiclass sve2_mem_cldnt_vs<bits<5> opc, string asm,
+ RegisterOperand listty, ZPRRegOp zprty> {
+ def _REAL : sve2_mem_cldnt_vs_base<opc, (ins PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm),
+ asm, listty>;
+
+ def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn, $Rm]",
+ (!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm), 0>;
+ def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn]",
+ (!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, XZR), 0>;
+ def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn, $Rm]",
+ (!cast<Instruction>(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm), 0>;
+ def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn]",
+ (!cast<Instruction>(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, XZR), 1>;
+}
+
//===----------------------------------------------------------------------===//
// SVE Memory - 64-bit Gather Group
//===----------------------------------------------------------------------===//
@@ -4454,3 +5585,132 @@ multiclass sve_int_break_z<bits<3> opc, string asm> {
def NAME : sve_int_break<opc, asm, "/z", (ins PPRAny:$Pg, PPR8:$Pn)>;
}
+//===----------------------------------------------------------------------===//
+// SVE2 String Processing Group
+//===----------------------------------------------------------------------===//
+
+class sve2_char_match<bit sz, bit opc, string asm,
+ PPRRegOp pprty, ZPRRegOp zprty>
+: I<(outs pprty:$Pd), (ins PPR3bAny:$Pg, zprty:$Zn, zprty:$Zm),
+ asm, "\t$Pd, $Pg/z, $Zn, $Zm",
+ "",
+ []>, Sched<[]> {
+ bits<4> Pd;
+ bits<3> Pg;
+ bits<5> Zm;
+ bits<5> Zn;
+ let Inst{31-23} = 0b010001010;
+ let Inst{22} = sz;
+ let Inst{21} = 0b1;
+ let Inst{20-16} = Zm;
+ let Inst{15-13} = 0b100;
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Zn;
+ let Inst{4} = opc;
+ let Inst{3-0} = Pd;
+
+ let Defs = [NZCV];
+}
+
+multiclass sve2_char_match<bit opc, string asm> {
+ def _B : sve2_char_match<0b0, opc, asm, PPR8, ZPR8>;
+ def _H : sve2_char_match<0b1, opc, asm, PPR16, ZPR16>;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE2 Histogram Computation - Segment Group
+//===----------------------------------------------------------------------===//
+
+class sve2_hist_gen_segment<string asm>
+: I<(outs ZPR8:$Zd), (ins ZPR8:$Zn, ZPR8:$Zm),
+ asm, "\t$Zd, $Zn, $Zm",
+ "",
+ []>, Sched<[]> {
+ bits<5> Zd;
+ bits<5> Zn;
+ bits<5> Zm;
+ let Inst{31-21} = 0b01000101001;
+ let Inst{20-16} = Zm;
+ let Inst{15-10} = 0b101000;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zd;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE2 Histogram Computation - Vector Group
+//===----------------------------------------------------------------------===//
+
+class sve2_hist_gen_vector<bit sz, string asm, ZPRRegOp zprty>
+: I<(outs zprty:$Zd), (ins PPR3bAny:$Pg, zprty:$Zn, zprty:$Zm),
+ asm, "\t$Zd, $Pg/z, $Zn, $Zm",
+ "",
+ []>, Sched<[]> {
+ bits<5> Zd;
+ bits<5> Zn;
+ bits<3> Pg;
+ bits<5> Zm;
+ let Inst{31-23} = 0b010001011;
+ let Inst{22} = sz;
+ let Inst{21} = 0b1;
+ let Inst{20-16} = Zm;
+ let Inst{15-13} = 0b110;
+ let Inst{12-10} = Pg;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zd;
+}
+
+multiclass sve2_hist_gen_vector<string asm> {
+ def _S : sve2_hist_gen_vector<0b0, asm, ZPR32>;
+ def _D : sve2_hist_gen_vector<0b1, asm, ZPR64>;
+}
+
+//===----------------------------------------------------------------------===//
+// SVE2 Crypto Extensions Group
+//===----------------------------------------------------------------------===//
+
+class sve2_crypto_cons_bin_op<bit opc, string asm, ZPRRegOp zprty>
+: I<(outs zprty:$Zd), (ins zprty:$Zn, zprty:$Zm),
+ asm, "\t$Zd, $Zn, $Zm",
+ "",
+ []>, Sched<[]> {
+ bits<5> Zd;
+ bits<5> Zn;
+ bits<5> Zm;
+ let Inst{31-21} = 0b01000101001;
+ let Inst{20-16} = Zm;
+ let Inst{15-11} = 0b11110;
+ let Inst{10} = opc;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zd;
+}
+
+class sve2_crypto_des_bin_op<bits<2> opc, string asm, ZPRRegOp zprty>
+: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, zprty:$Zm),
+ asm, "\t$Zdn, $_Zdn, $Zm",
+ "",
+ []>, Sched<[]> {
+ bits<5> Zdn;
+ bits<5> Zm;
+ let Inst{31-17} = 0b010001010010001;
+ let Inst{16} = opc{1};
+ let Inst{15-11} = 0b11100;
+ let Inst{10} = opc{0};
+ let Inst{9-5} = Zm;
+ let Inst{4-0} = Zdn;
+
+ let Constraints = "$Zdn = $_Zdn";
+}
+
+class sve2_crypto_unary_op<bit opc, string asm>
+: I<(outs ZPR8:$Zdn), (ins ZPR8:$_Zdn),
+ asm, "\t$Zdn, $_Zdn",
+ "",
+ []>, Sched<[]> {
+ bits<5> Zdn;
+ let Inst{31-11} = 0b010001010010000011100;
+ let Inst{10} = opc;
+ let Inst{9-5} = 0b00000;
+ let Inst{4-0} = Zdn;
+
+ let Constraints = "$Zdn = $_Zdn";
+}
diff --git a/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp b/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp
index 8fb161574c5b..7f02da6a9516 100644
--- a/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp
+++ b/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp
@@ -1,39 +1,50 @@
//===-- AArch64TargetInfo.cpp - AArch64 Target Implementation -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-#include "llvm/ADT/Triple.h"
+#include "TargetInfo/AArch64TargetInfo.h"
#include "llvm/Support/TargetRegistry.h"
+
using namespace llvm;
-namespace llvm {
-Target &getTheAArch64leTarget() {
+Target &llvm::getTheAArch64leTarget() {
static Target TheAArch64leTarget;
return TheAArch64leTarget;
}
-Target &getTheAArch64beTarget() {
+Target &llvm::getTheAArch64beTarget() {
static Target TheAArch64beTarget;
return TheAArch64beTarget;
}
-Target &getTheARM64Target() {
+Target &llvm::getTheAArch64_32Target() {
+ static Target TheAArch64leTarget;
+ return TheAArch64leTarget;
+}
+Target &llvm::getTheARM64Target() {
static Target TheARM64Target;
return TheARM64Target;
}
-} // namespace llvm
+Target &llvm::getTheARM64_32Target() {
+ static Target TheARM64_32Target;
+ return TheARM64_32Target;
+}
extern "C" void LLVMInitializeAArch64TargetInfo() {
// Now register the "arm64" name for use with "-march". We don't want it to
- // take possession of the Triple::aarch64 tag though.
+ // take possession of the Triple::aarch64 tags though.
TargetRegistry::RegisterTarget(getTheARM64Target(), "arm64",
"ARM64 (little endian)", "AArch64",
[](Triple::ArchType) { return false; }, true);
+ TargetRegistry::RegisterTarget(getTheARM64_32Target(), "arm64_32",
+ "ARM64 (little endian ILP32)", "AArch64",
+ [](Triple::ArchType) { return false; }, true);
RegisterTarget<Triple::aarch64, /*HasJIT=*/true> Z(
getTheAArch64leTarget(), "aarch64", "AArch64 (little endian)", "AArch64");
RegisterTarget<Triple::aarch64_be, /*HasJIT=*/true> W(
getTheAArch64beTarget(), "aarch64_be", "AArch64 (big endian)", "AArch64");
+ RegisterTarget<Triple::aarch64_32, /*HasJIT=*/true> X(
+ getTheAArch64_32Target(), "aarch64_32", "AArch64 (little endian ILP32)", "AArch64");
}
diff --git a/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.h b/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.h
new file mode 100644
index 000000000000..b3728a11bb5d
--- /dev/null
+++ b/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.h
@@ -0,0 +1,24 @@
+//===-- AArch64TargetInfo.h - AArch64 Target Implementation -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AARCH64_TARGETINFO_AARCH64TARGETINFO_H
+#define LLVM_LIB_TARGET_AARCH64_TARGETINFO_AARCH64TARGETINFO_H
+
+namespace llvm {
+
+class Target;
+
+Target &getTheAArch64leTarget();
+Target &getTheAArch64beTarget();
+Target &getTheAArch64_32Target();
+Target &getTheARM64Target();
+Target &getTheARM64_32Target();
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_AARCH64_TARGETINFO_AARCH64TARGETINFO_H
diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
index c88155db7037..7bb075c36e79 100644
--- a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
+++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
@@ -1,9 +1,8 @@
//===-- AArch64BaseInfo.cpp - AArch64 Base encoding information------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/lib/Target/AArch64/Utils/AArch64BaseInfo.h
index 44c6a6b44895..e5e2fc2cb0df 100644
--- a/lib/Target/AArch64/Utils/AArch64BaseInfo.h
+++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -1,9 +1,8 @@
//===-- AArch64BaseInfo.h - Top level definitions for AArch64 ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -186,6 +185,49 @@ static inline unsigned getDRegFromBReg(unsigned Reg) {
return Reg;
}
+static inline bool atomicBarrierDroppedOnZero(unsigned Opcode) {
+ switch (Opcode) {
+ case AArch64::LDADDAB: case AArch64::LDADDAH:
+ case AArch64::LDADDAW: case AArch64::LDADDAX:
+ case AArch64::LDADDALB: case AArch64::LDADDALH:
+ case AArch64::LDADDALW: case AArch64::LDADDALX:
+ case AArch64::LDCLRAB: case AArch64::LDCLRAH:
+ case AArch64::LDCLRAW: case AArch64::LDCLRAX:
+ case AArch64::LDCLRALB: case AArch64::LDCLRALH:
+ case AArch64::LDCLRALW: case AArch64::LDCLRALX:
+ case AArch64::LDEORAB: case AArch64::LDEORAH:
+ case AArch64::LDEORAW: case AArch64::LDEORAX:
+ case AArch64::LDEORALB: case AArch64::LDEORALH:
+ case AArch64::LDEORALW: case AArch64::LDEORALX:
+ case AArch64::LDSETAB: case AArch64::LDSETAH:
+ case AArch64::LDSETAW: case AArch64::LDSETAX:
+ case AArch64::LDSETALB: case AArch64::LDSETALH:
+ case AArch64::LDSETALW: case AArch64::LDSETALX:
+ case AArch64::LDSMAXAB: case AArch64::LDSMAXAH:
+ case AArch64::LDSMAXAW: case AArch64::LDSMAXAX:
+ case AArch64::LDSMAXALB: case AArch64::LDSMAXALH:
+ case AArch64::LDSMAXALW: case AArch64::LDSMAXALX:
+ case AArch64::LDSMINAB: case AArch64::LDSMINAH:
+ case AArch64::LDSMINAW: case AArch64::LDSMINAX:
+ case AArch64::LDSMINALB: case AArch64::LDSMINALH:
+ case AArch64::LDSMINALW: case AArch64::LDSMINALX:
+ case AArch64::LDUMAXAB: case AArch64::LDUMAXAH:
+ case AArch64::LDUMAXAW: case AArch64::LDUMAXAX:
+ case AArch64::LDUMAXALB: case AArch64::LDUMAXALH:
+ case AArch64::LDUMAXALW: case AArch64::LDUMAXALX:
+ case AArch64::LDUMINAB: case AArch64::LDUMINAH:
+ case AArch64::LDUMINAW: case AArch64::LDUMINAX:
+ case AArch64::LDUMINALB: case AArch64::LDUMINALH:
+ case AArch64::LDUMINALW: case AArch64::LDUMINALX:
+ case AArch64::SWPAB: case AArch64::SWPAH:
+ case AArch64::SWPAW: case AArch64::SWPAX:
+ case AArch64::SWPALB: case AArch64::SWPALH:
+ case AArch64::SWPALW: case AArch64::SWPALX:
+ return true;
+ }
+ return false;
+}
+
namespace AArch64CC {
// The CondCodes constants map directly to the 4-bit encoding of the condition
diff --git a/lib/Target/AMDGPU/AMDGPU.h b/lib/Target/AMDGPU/AMDGPU.h
index bb7801c172f6..19a8bd901629 100644
--- a/lib/Target/AMDGPU/AMDGPU.h
+++ b/lib/Target/AMDGPU/AMDGPU.h
@@ -1,9 +1,8 @@
//===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
/// \file
//===----------------------------------------------------------------------===//
@@ -51,14 +50,16 @@ FunctionPass *createSIFixControlFlowLiveIntervalsPass();
FunctionPass *createSIOptimizeExecMaskingPreRAPass();
FunctionPass *createSIFixSGPRCopiesPass();
FunctionPass *createSIMemoryLegalizerPass();
-FunctionPass *createSIDebuggerInsertNopsPass();
FunctionPass *createSIInsertWaitcntsPass();
-FunctionPass *createSIFixWWMLivenessPass();
+FunctionPass *createSIPreAllocateWWMRegsPass();
FunctionPass *createSIFormMemoryClausesPass();
-FunctionPass *createAMDGPUSimplifyLibCallsPass(const TargetOptions &);
+FunctionPass *createAMDGPUSimplifyLibCallsPass(const TargetOptions &,
+ const TargetMachine *);
FunctionPass *createAMDGPUUseNativeCallsPass();
FunctionPass *createAMDGPUCodeGenPreparePass();
FunctionPass *createAMDGPUMachineCFGStructurizerPass();
+FunctionPass *createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *);
+ModulePass *createAMDGPUPropagateAttributesLatePass(const TargetMachine *);
FunctionPass *createAMDGPURewriteOutArgumentsPass();
FunctionPass *createSIModeRegisterPass();
@@ -93,6 +94,12 @@ ModulePass *createAMDGPULowerKernelAttributesPass();
void initializeAMDGPULowerKernelAttributesPass(PassRegistry &);
extern char &AMDGPULowerKernelAttributesID;
+void initializeAMDGPUPropagateAttributesEarlyPass(PassRegistry &);
+extern char &AMDGPUPropagateAttributesEarlyID;
+
+void initializeAMDGPUPropagateAttributesLatePass(PassRegistry &);
+extern char &AMDGPUPropagateAttributesLateID;
+
void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &);
extern char &AMDGPURewriteOutArgumentsID;
@@ -135,6 +142,9 @@ extern char &SIFixupVectorISelID;
void initializeSILowerI1CopiesPass(PassRegistry &);
extern char &SILowerI1CopiesID;
+void initializeSILowerSGPRSpillsPass(PassRegistry &);
+extern char &SILowerSGPRSpillsID;
+
void initializeSILoadStoreOptimizerPass(PassRegistry &);
extern char &SILoadStoreOptimizerID;
@@ -150,8 +160,8 @@ extern char &SIInsertSkipsPassID;
void initializeSIOptimizeExecMaskingPass(PassRegistry &);
extern char &SIOptimizeExecMaskingID;
-void initializeSIFixWWMLivenessPass(PassRegistry &);
-extern char &SIFixWWMLivenessID;
+void initializeSIPreAllocateWWMRegsPass(PassRegistry &);
+extern char &SIPreAllocateWWMRegsID;
void initializeAMDGPUSimplifyLibCallsPass(PassRegistry &);
extern char &AMDGPUSimplifyLibCallsID;
@@ -197,9 +207,6 @@ extern char &SIAnnotateControlFlowPassID;
void initializeSIMemoryLegalizerPass(PassRegistry&);
extern char &SIMemoryLegalizerID;
-void initializeSIDebuggerInsertNopsPass(PassRegistry&);
-extern char &SIDebuggerInsertNopsID;
-
void initializeSIModeRegisterPass(PassRegistry&);
extern char &SIModeRegisterID;
@@ -226,8 +233,11 @@ ModulePass *createAMDGPUOpenCLEnqueuedBlockLoweringPass();
void initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(PassRegistry &);
extern char &AMDGPUOpenCLEnqueuedBlockLoweringID;
-Target &getTheAMDGPUTarget();
-Target &getTheGCNTarget();
+void initializeGCNRegBankReassignPass(PassRegistry &);
+extern char &GCNRegBankReassignID;
+
+void initializeGCNNSAReassignPass(PassRegistry &);
+extern char &GCNNSAReassignID;
namespace AMDGPU {
enum TargetIndex {
@@ -250,21 +260,23 @@ enum TargetIndex {
namespace AMDGPUAS {
enum : unsigned {
// The maximum value for flat, generic, local, private, constant and region.
- MAX_AMDGPU_ADDRESS = 6,
+ MAX_AMDGPU_ADDRESS = 7,
FLAT_ADDRESS = 0, ///< Address space for flat memory.
GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0).
- REGION_ADDRESS = 2, ///< Address space for region memory.
+ REGION_ADDRESS = 2, ///< Address space for region memory. (GDS)
- CONSTANT_ADDRESS = 4, ///< Address space for constant memory (VTX2)
+ CONSTANT_ADDRESS = 4, ///< Address space for constant memory (VTX2).
LOCAL_ADDRESS = 3, ///< Address space for local memory.
PRIVATE_ADDRESS = 5, ///< Address space for private memory.
- CONSTANT_ADDRESS_32BIT = 6, ///< Address space for 32-bit constant memory
+ CONSTANT_ADDRESS_32BIT = 6, ///< Address space for 32-bit constant memory.
+
+ BUFFER_FAT_POINTER = 7, ///< Address space for 160-bit buffer fat pointers.
- /// Address space for direct addressible parameter memory (CONST0)
+ /// Address space for direct addressible parameter memory (CONST0).
PARAM_D_ADDRESS = 6,
- /// Address space for indirect addressible parameter memory (VTX1)
+ /// Address space for indirect addressible parameter memory (VTX1).
PARAM_I_ADDRESS = 7,
// Do not re-order the CONSTANT_BUFFER_* enums. Several places depend on
diff --git a/lib/Target/AMDGPU/AMDGPU.td b/lib/Target/AMDGPU/AMDGPU.td
index 6a4cfe08e491..baeba534012c 100644
--- a/lib/Target/AMDGPU/AMDGPU.td
+++ b/lib/Target/AMDGPU/AMDGPU.td
@@ -1,9 +1,8 @@
//===-- AMDGPU.td - AMDGPU Tablegen files --------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===------------------------------------------------------------===//
@@ -61,6 +60,12 @@ def FeatureFlatScratchInsts : SubtargetFeature<"flat-scratch-insts",
"Have scratch_* flat memory instructions"
>;
+def FeatureScalarFlatScratchInsts : SubtargetFeature<"scalar-flat-scratch-insts",
+ "ScalarFlatScratchInsts",
+ "true",
+ "Have s_scratch_* flat memory instructions"
+>;
+
def FeatureAddNoCarryInsts : SubtargetFeature<"add-no-carry-insts",
"AddNoCarryInsts",
"true",
@@ -103,6 +108,12 @@ def FeatureFmaMixInsts : SubtargetFeature<"fma-mix-insts",
"Has v_fma_mix_f32, v_fma_mixlo_f16, v_fma_mixhi_f16 instructions"
>;
+def FeatureDoesNotSupportXNACK : SubtargetFeature<"no-xnack-support",
+ "DoesNotSupportXNACK",
+ "true",
+ "Hardware does not support XNACK"
+>;
+
// XNACK is disabled if SH_MEM_CONFIG.ADDRESS_MODE = GPUVM on chips that support
// XNACK. The current default kernel driver setting is:
// - graphics ring: XNACK disabled
@@ -116,12 +127,78 @@ def FeatureXNACK : SubtargetFeature<"xnack",
"Enable XNACK support"
>;
+def FeatureCuMode : SubtargetFeature<"cumode",
+ "EnableCuMode",
+ "true",
+ "Enable CU wavefront execution mode"
+>;
+
def FeatureSGPRInitBug : SubtargetFeature<"sgpr-init-bug",
"SGPRInitBug",
"true",
"VI SGPR initialization bug requiring a fixed SGPR allocation size"
>;
+def FeatureLdsMisalignedBug : SubtargetFeature<"lds-misaligned-bug",
+ "LDSMisalignedBug",
+ "true",
+ "Some GFX10 bug with misaligned multi-dword LDS access in WGP mode"
+>;
+
+def FeatureVcmpxPermlaneHazard : SubtargetFeature<"vcmpx-permlane-hazard",
+ "HasVcmpxPermlaneHazard",
+ "true",
+ "TODO: describe me"
+>;
+
+def FeatureVMEMtoScalarWriteHazard : SubtargetFeature<"vmem-to-scalar-write-hazard",
+ "HasVMEMtoScalarWriteHazard",
+ "true",
+ "VMEM instruction followed by scalar writing to EXEC mask, M0 or SGPR leads to incorrect execution."
+>;
+
+def FeatureSMEMtoVectorWriteHazard : SubtargetFeature<"smem-to-vector-write-hazard",
+ "HasSMEMtoVectorWriteHazard",
+ "true",
+ "s_load_dword followed by v_cmp page faults"
+>;
+
+def FeatureInstFwdPrefetchBug : SubtargetFeature<"inst-fwd-prefetch-bug",
+ "HasInstFwdPrefetchBug",
+ "true",
+ "S_INST_PREFETCH instruction causes shader to hang"
+>;
+
+def FeatureVcmpxExecWARHazard : SubtargetFeature<"vcmpx-exec-war-hazard",
+ "HasVcmpxExecWARHazard",
+ "true",
+ "V_CMPX WAR hazard on EXEC (V_CMPX issue ONLY)"
+>;
+
+def FeatureLdsBranchVmemWARHazard : SubtargetFeature<"lds-branch-vmem-war-hazard",
+ "HasLdsBranchVmemWARHazard",
+ "true",
+ "Switching between LDS and VMEM-tex not waiting VM_VSRC=0"
+>;
+
+def FeatureNSAtoVMEMBug : SubtargetFeature<"nsa-to-vmem-bug",
+ "HasNSAtoVMEMBug",
+ "true",
+ "MIMG-NSA followed by VMEM fail if EXEC_LO or EXEC_HI equals zero"
+>;
+
+def FeatureFlatSegmentOffsetBug : SubtargetFeature<"flat-segment-offset-bug",
+ "HasFlatSegmentOffsetBug",
+ "true",
+ "GFX10 bug, inst_offset ignored in flat segment"
+>;
+
+def FeatureOffset3fBug : SubtargetFeature<"offset-3f-bug",
+ "HasOffset3fBug",
+ "true",
+ "Branch offset of 3f hardware bug"
+>;
+
class SubtargetFeatureLDSBankCount <int Value> : SubtargetFeature <
"ldsbankcount"#Value,
"LDSBankCount",
@@ -144,10 +221,10 @@ def FeatureCIInsts : SubtargetFeature<"ci-insts",
"Additional instructions for CI+"
>;
-def FeatureVIInsts : SubtargetFeature<"vi-insts",
- "VIInsts",
+def FeatureGFX8Insts : SubtargetFeature<"gfx8-insts",
+ "GFX8Insts",
"true",
- "Additional instructions for VI+"
+ "Additional instructions for GFX8+"
>;
def FeatureGFX9Insts : SubtargetFeature<"gfx9-insts",
@@ -156,6 +233,18 @@ def FeatureGFX9Insts : SubtargetFeature<"gfx9-insts",
"Additional instructions for GFX9+"
>;
+def FeatureGFX10Insts : SubtargetFeature<"gfx10-insts",
+ "GFX10Insts",
+ "true",
+ "Additional instructions for GFX10+"
+>;
+
+def FeatureGFX7GFX8GFX9Insts : SubtargetFeature<"gfx7-gfx8-gfx9-insts",
+ "GFX7GFX8GFX9Insts",
+ "true",
+ "Instructions shared in GFX7, GFX8, GFX9"
+>;
+
def FeatureSMemRealTime : SubtargetFeature<"s-memrealtime",
"HasSMemRealTime",
"true",
@@ -246,12 +335,25 @@ def FeatureDPP : SubtargetFeature<"dpp",
"Support DPP (Data Parallel Primitives) extension"
>;
+// DPP8 allows arbitrary cross-lane swizzling withing groups of 8 lanes.
+def FeatureDPP8 : SubtargetFeature<"dpp8",
+ "HasDPP8",
+ "true",
+ "Support DPP8 (Data Parallel Primitives) extension"
+>;
+
def FeatureR128A16 : SubtargetFeature<"r128-a16",
"HasR128A16",
"true",
"Support 16 bit coordindates/gradients/lod/clamp/mip types on gfx9"
>;
+def FeatureNSAEncoding : SubtargetFeature<"nsa-encoding",
+ "HasNSAEncoding",
+ "true",
+ "Support NSA encoding for image instructions"
+>;
+
def FeatureIntClamp : SubtargetFeature<"int-clamp-insts",
"HasIntClamp",
"true",
@@ -270,10 +372,65 @@ def FeatureDLInsts : SubtargetFeature<"dl-insts",
"Has v_fmac_f32 and v_xnor_b32 instructions"
>;
-def FeatureDotInsts : SubtargetFeature<"dot-insts",
- "HasDotInsts",
+def FeatureDot1Insts : SubtargetFeature<"dot1-insts",
+ "HasDot1Insts",
+ "true",
+ "Has v_dot4_i32_i8 and v_dot8_i32_i4 instructions"
+>;
+
+def FeatureDot2Insts : SubtargetFeature<"dot2-insts",
+ "HasDot2Insts",
+ "true",
+ "Has v_dot2_f32_f16, v_dot2_i32_i16, v_dot2_u32_u16, v_dot4_u32_u8, v_dot8_u32_u4 instructions"
+>;
+
+def FeatureDot3Insts : SubtargetFeature<"dot3-insts",
+ "HasDot3Insts",
+ "true",
+ "Has v_dot8c_i32_i4 instruction"
+>;
+
+def FeatureDot4Insts : SubtargetFeature<"dot4-insts",
+ "HasDot4Insts",
+ "true",
+ "Has v_dot2c_i32_i16 instruction"
+>;
+
+def FeatureDot5Insts : SubtargetFeature<"dot5-insts",
+ "HasDot5Insts",
"true",
- "Has v_dot* instructions"
+ "Has v_dot2c_f32_f16 instruction"
+>;
+
+def FeatureDot6Insts : SubtargetFeature<"dot6-insts",
+ "HasDot6Insts",
+ "true",
+ "Has v_dot4c_i32_i8 instruction"
+>;
+
+def FeatureMAIInsts : SubtargetFeature<"mai-insts",
+ "HasMAIInsts",
+ "true",
+ "Has mAI instructions"
+>;
+
+def FeaturePkFmacF16Inst : SubtargetFeature<"pk-fmac-f16-inst",
+ "HasPkFmacF16Inst",
+ "true",
+ "Has v_pk_fmac_f16 instruction"
+>;
+
+def FeatureAtomicFaddInsts : SubtargetFeature<"atomic-fadd-insts",
+ "HasAtomicFaddInsts",
+ "true",
+ "Has buffer_atomic_add_f32, buffer_atomic_pk_add_f16, global_atomic_add_f32, "
+ "global_atomic_pk_add_f16 instructions"
+>;
+
+def FeatureDoesNotSupportSRAMECC : SubtargetFeature<"no-sram-ecc-support",
+ "DoesNotSupportSRAMECC",
+ "true",
+ "Hardware does not support SRAM ECC"
>;
def FeatureSRAMECC : SubtargetFeature<"sram-ecc",
@@ -282,6 +439,36 @@ def FeatureSRAMECC : SubtargetFeature<"sram-ecc",
"Enable SRAM ECC"
>;
+def FeatureNoSdstCMPX : SubtargetFeature<"no-sdst-cmpx",
+ "HasNoSdstCMPX",
+ "true",
+ "V_CMPX does not write VCC/SGPR in addition to EXEC"
+>;
+
+def FeatureVscnt : SubtargetFeature<"vscnt",
+ "HasVscnt",
+ "true",
+ "Has separate store vscnt counter"
+>;
+
+def FeatureRegisterBanking : SubtargetFeature<"register-banking",
+ "HasRegisterBanking",
+ "true",
+ "Has register banking"
+>;
+
+def FeatureVOP3Literal : SubtargetFeature<"vop3-literal",
+ "HasVOP3Literal",
+ "true",
+ "Can use one literal in VOP3"
+>;
+
+def FeatureNoDataDepHazard : SubtargetFeature<"no-data-dep-hazard",
+ "HasNoDataDepHazard",
+ "true",
+ "Does not need SW waitstates"
+>;
+
//===------------------------------------------------------------===//
// Subtarget Features (options and debugging)
//===------------------------------------------------------------===//
@@ -327,13 +514,6 @@ def FeatureMaxPrivateElementSize4 : FeatureMaxPrivateElementSize<4>;
def FeatureMaxPrivateElementSize8 : FeatureMaxPrivateElementSize<8>;
def FeatureMaxPrivateElementSize16 : FeatureMaxPrivateElementSize<16>;
-def FeatureEnableHugePrivateBuffer : SubtargetFeature<
- "huge-private-buffer",
- "EnableHugePrivateBuffer",
- "true",
- "Enable private/scratch buffer sizes greater than 128 GB"
->;
-
def FeatureDumpCode : SubtargetFeature <"DumpCode",
"DumpCode",
"true",
@@ -425,103 +605,123 @@ def FeatureDisable : SubtargetFeature<"",
"Dummy feature to disable assembler instructions"
>;
-def FeatureGCN : SubtargetFeature<"gcn",
- "IsGCN",
- "true",
- "GCN or newer GPU"
->;
-
class GCNSubtargetFeatureGeneration <string Value,
- list<SubtargetFeature> Implies> :
- SubtargetFeatureGeneration <Value, "GCNSubtarget", Implies>;
+ string FeatureName,
+ list<SubtargetFeature> Implies> :
+ SubtargetFeatureGeneration <Value, FeatureName, "GCNSubtarget", Implies>;
def FeatureSouthernIslands : GCNSubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
+ "southern-islands",
[FeatureFP64, FeatureLocalMemorySize32768, FeatureMIMG_R128,
- FeatureWavefrontSize64, FeatureGCN,
- FeatureLDSBankCount32, FeatureMovrel, FeatureTrigReducedRange]
+ FeatureWavefrontSize64,
+ FeatureLDSBankCount32, FeatureMovrel, FeatureTrigReducedRange,
+ FeatureDoesNotSupportSRAMECC, FeatureDoesNotSupportXNACK]
>;
def FeatureSeaIslands : GCNSubtargetFeatureGeneration<"SEA_ISLANDS",
+ "sea-islands",
[FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128,
- FeatureWavefrontSize64, FeatureGCN, FeatureFlatAddressSpace,
- FeatureCIInsts, FeatureMovrel, FeatureTrigReducedRange]
+ FeatureWavefrontSize64, FeatureFlatAddressSpace,
+ FeatureCIInsts, FeatureMovrel, FeatureTrigReducedRange,
+ FeatureGFX7GFX8GFX9Insts, FeatureDoesNotSupportSRAMECC]
>;
def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
+ "volcanic-islands",
[FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128,
- FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
- FeatureGCN3Encoding, FeatureCIInsts, FeatureVIInsts, Feature16BitInsts,
+ FeatureWavefrontSize64, FeatureFlatAddressSpace,
+ FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
FeatureSMemRealTime, FeatureVGPRIndexMode, FeatureMovrel,
FeatureScalarStores, FeatureInv2PiInlineImm,
FeatureSDWA, FeatureSDWAOutModsVOPC, FeatureSDWAMac, FeatureDPP,
- FeatureIntClamp, FeatureTrigReducedRange
+ FeatureIntClamp, FeatureTrigReducedRange, FeatureDoesNotSupportSRAMECC,
+ FeatureGFX8Insts, FeatureGFX7GFX8GFX9Insts
]
>;
def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9",
+ "gfx9",
[FeatureFP64, FeatureLocalMemorySize65536,
- FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
- FeatureGCN3Encoding, FeatureCIInsts, FeatureVIInsts, Feature16BitInsts,
+ FeatureWavefrontSize64, FeatureFlatAddressSpace,
+ FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
FeatureSMemRealTime, FeatureScalarStores, FeatureInv2PiInlineImm,
FeatureApertureRegs, FeatureGFX9Insts, FeatureVOP3P, FeatureVGPRIndexMode,
FeatureFastFMAF32, FeatureDPP, FeatureIntClamp,
FeatureSDWA, FeatureSDWAOmod, FeatureSDWAScalar, FeatureSDWASdst,
FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts,
- FeatureAddNoCarryInsts, FeatureScalarAtomics, FeatureR128A16
+ FeatureAddNoCarryInsts, FeatureGFX8Insts, FeatureGFX7GFX8GFX9Insts,
+ FeatureScalarFlatScratchInsts, FeatureScalarAtomics, FeatureR128A16
]
>;
-class SubtargetFeatureISAVersion <int Major, int Minor, int Stepping,
- list<SubtargetFeature> Implies>
- : SubtargetFeature <
- "isaver"#Major#"."#Minor#"."#Stepping,
- "IsaVersion",
- "ISAVersion"#Major#"_"#Minor#"_"#Stepping,
- "Instruction set version number",
- Implies
+def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10",
+ "gfx10",
+ [FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128,
+ FeatureFlatAddressSpace,
+ FeatureCIInsts, Feature16BitInsts,
+ FeatureSMemRealTime, FeatureInv2PiInlineImm,
+ FeatureApertureRegs, FeatureGFX9Insts, FeatureGFX10Insts, FeatureVOP3P,
+ FeatureMovrel, FeatureFastFMAF32, FeatureDPP, FeatureIntClamp,
+ FeatureSDWA, FeatureSDWAOmod, FeatureSDWAScalar, FeatureSDWASdst,
+ FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts,
+ FeatureAddNoCarryInsts, FeatureFmaMixInsts, FeatureGFX8Insts,
+ FeatureNoSdstCMPX, FeatureVscnt, FeatureRegisterBanking,
+ FeatureVOP3Literal, FeatureDPP8,
+ FeatureNoDataDepHazard, FeaturePkFmacF16Inst, FeatureDoesNotSupportSRAMECC
+ ]
>;
-def FeatureISAVersion6_0_0 : SubtargetFeatureISAVersion <6,0,0,
- [FeatureSouthernIslands,
+class FeatureSet<list<SubtargetFeature> Features_> {
+ list<SubtargetFeature> Features = Features_;
+}
+
+def FeatureISAVersion6_0_0 : FeatureSet<[FeatureSouthernIslands,
FeatureFastFMAF32,
HalfRate64Ops,
FeatureLDSBankCount32,
+ FeatureDoesNotSupportXNACK,
FeatureCodeObjectV3]>;
-def FeatureISAVersion6_0_1 : SubtargetFeatureISAVersion <6,0,1,
+def FeatureISAVersion6_0_1 : FeatureSet<
[FeatureSouthernIslands,
FeatureLDSBankCount32,
+ FeatureDoesNotSupportXNACK,
FeatureCodeObjectV3]>;
-def FeatureISAVersion7_0_0 : SubtargetFeatureISAVersion <7,0,0,
+def FeatureISAVersion7_0_0 : FeatureSet<
[FeatureSeaIslands,
FeatureLDSBankCount32,
+ FeatureDoesNotSupportXNACK,
FeatureCodeObjectV3]>;
-def FeatureISAVersion7_0_1 : SubtargetFeatureISAVersion <7,0,1,
+def FeatureISAVersion7_0_1 : FeatureSet<
[FeatureSeaIslands,
HalfRate64Ops,
FeatureLDSBankCount32,
FeatureFastFMAF32,
+ FeatureDoesNotSupportXNACK,
FeatureCodeObjectV3]>;
-def FeatureISAVersion7_0_2 : SubtargetFeatureISAVersion <7,0,2,
+def FeatureISAVersion7_0_2 : FeatureSet<
[FeatureSeaIslands,
FeatureLDSBankCount16,
FeatureFastFMAF32,
+ FeatureDoesNotSupportXNACK,
FeatureCodeObjectV3]>;
-def FeatureISAVersion7_0_3 : SubtargetFeatureISAVersion <7,0,3,
+def FeatureISAVersion7_0_3 : FeatureSet<
[FeatureSeaIslands,
FeatureLDSBankCount16,
+ FeatureDoesNotSupportXNACK,
FeatureCodeObjectV3]>;
-def FeatureISAVersion7_0_4 : SubtargetFeatureISAVersion <7,0,4,
+def FeatureISAVersion7_0_4 : FeatureSet<
[FeatureSeaIslands,
FeatureLDSBankCount32,
+ FeatureDoesNotSupportXNACK,
FeatureCodeObjectV3]>;
-def FeatureISAVersion8_0_1 : SubtargetFeatureISAVersion <8,0,1,
+def FeatureISAVersion8_0_1 : FeatureSet<
[FeatureVolcanicIslands,
FeatureFastFMAF32,
HalfRate64Ops,
@@ -530,78 +730,151 @@ def FeatureISAVersion8_0_1 : SubtargetFeatureISAVersion <8,0,1,
FeatureUnpackedD16VMem,
FeatureCodeObjectV3]>;
-def FeatureISAVersion8_0_2 : SubtargetFeatureISAVersion <8,0,2,
+def FeatureISAVersion8_0_2 : FeatureSet<
[FeatureVolcanicIslands,
FeatureLDSBankCount32,
FeatureSGPRInitBug,
FeatureUnpackedD16VMem,
+ FeatureDoesNotSupportXNACK,
FeatureCodeObjectV3]>;
-def FeatureISAVersion8_0_3 : SubtargetFeatureISAVersion <8,0,3,
+def FeatureISAVersion8_0_3 : FeatureSet<
[FeatureVolcanicIslands,
FeatureLDSBankCount32,
FeatureUnpackedD16VMem,
+ FeatureDoesNotSupportXNACK,
FeatureCodeObjectV3]>;
-def FeatureISAVersion8_1_0 : SubtargetFeatureISAVersion <8,1,0,
+def FeatureISAVersion8_1_0 : FeatureSet<
[FeatureVolcanicIslands,
FeatureLDSBankCount16,
FeatureXNACK,
FeatureCodeObjectV3]>;
-def FeatureISAVersion9_0_0 : SubtargetFeatureISAVersion <9,0,0,
+def FeatureISAVersion9_0_0 : FeatureSet<
[FeatureGFX9,
FeatureMadMixInsts,
FeatureLDSBankCount32,
- FeatureCodeObjectV3]>;
+ FeatureCodeObjectV3,
+ FeatureDoesNotSupportXNACK,
+ FeatureDoesNotSupportSRAMECC]>;
-def FeatureISAVersion9_0_2 : SubtargetFeatureISAVersion <9,0,2,
+def FeatureISAVersion9_0_2 : FeatureSet<
[FeatureGFX9,
FeatureMadMixInsts,
FeatureLDSBankCount32,
FeatureXNACK,
+ FeatureDoesNotSupportSRAMECC,
FeatureCodeObjectV3]>;
-def FeatureISAVersion9_0_4 : SubtargetFeatureISAVersion <9,0,4,
+def FeatureISAVersion9_0_4 : FeatureSet<
[FeatureGFX9,
FeatureLDSBankCount32,
FeatureFmaMixInsts,
+ FeatureDoesNotSupportXNACK,
+ FeatureDoesNotSupportSRAMECC,
FeatureCodeObjectV3]>;
-def FeatureISAVersion9_0_6 : SubtargetFeatureISAVersion <9,0,6,
+def FeatureISAVersion9_0_6 : FeatureSet<
[FeatureGFX9,
HalfRate64Ops,
FeatureFmaMixInsts,
FeatureLDSBankCount32,
FeatureDLInsts,
- FeatureDotInsts,
+ FeatureDot1Insts,
+ FeatureDot2Insts,
+ FeatureDoesNotSupportXNACK,
+ FeatureCodeObjectV3]>;
+
+def FeatureISAVersion9_0_8 : FeatureSet<
+ [FeatureGFX9,
+ HalfRate64Ops,
+ FeatureFmaMixInsts,
+ FeatureLDSBankCount32,
+ FeatureDLInsts,
+ FeatureDot1Insts,
+ FeatureDot2Insts,
+ FeatureDot3Insts,
+ FeatureDot4Insts,
+ FeatureDot5Insts,
+ FeatureDot6Insts,
+ FeatureMAIInsts,
+ FeaturePkFmacF16Inst,
+ FeatureAtomicFaddInsts,
FeatureSRAMECC,
FeatureCodeObjectV3]>;
-def FeatureISAVersion9_0_9 : SubtargetFeatureISAVersion <9,0,9,
+def FeatureISAVersion9_0_9 : FeatureSet<
[FeatureGFX9,
FeatureMadMixInsts,
FeatureLDSBankCount32,
FeatureXNACK,
FeatureCodeObjectV3]>;
-//===----------------------------------------------------------------------===//
-// Debugger related subtarget features.
-//===----------------------------------------------------------------------===//
-
-def FeatureDebuggerInsertNops : SubtargetFeature<
- "amdgpu-debugger-insert-nops",
- "DebuggerInsertNops",
- "true",
- "Insert one nop instruction for each high level source statement"
->;
+// TODO: Organize more features into groups.
+def FeatureGroup {
+ // Bugs present on gfx10.1.
+ list<SubtargetFeature> GFX10_1_Bugs = [
+ FeatureVcmpxPermlaneHazard,
+ FeatureVMEMtoScalarWriteHazard,
+ FeatureSMEMtoVectorWriteHazard,
+ FeatureInstFwdPrefetchBug,
+ FeatureVcmpxExecWARHazard,
+ FeatureLdsBranchVmemWARHazard,
+ FeatureNSAtoVMEMBug,
+ FeatureOffset3fBug,
+ FeatureFlatSegmentOffsetBug
+ ];
+}
-def FeatureDebuggerEmitPrologue : SubtargetFeature<
- "amdgpu-debugger-emit-prologue",
- "DebuggerEmitPrologue",
- "true",
- "Emit debugger prologue"
->;
+def FeatureISAVersion10_1_0 : FeatureSet<
+ !listconcat(FeatureGroup.GFX10_1_Bugs,
+ [FeatureGFX10,
+ FeatureLDSBankCount32,
+ FeatureDLInsts,
+ FeatureNSAEncoding,
+ FeatureWavefrontSize32,
+ FeatureScalarStores,
+ FeatureScalarAtomics,
+ FeatureScalarFlatScratchInsts,
+ FeatureLdsMisalignedBug,
+ FeatureDoesNotSupportXNACK,
+ FeatureCodeObjectV3])>;
+
+def FeatureISAVersion10_1_1 : FeatureSet<
+ !listconcat(FeatureGroup.GFX10_1_Bugs,
+ [FeatureGFX10,
+ FeatureLDSBankCount32,
+ FeatureDLInsts,
+ FeatureDot1Insts,
+ FeatureDot2Insts,
+ FeatureDot5Insts,
+ FeatureDot6Insts,
+ FeatureNSAEncoding,
+ FeatureWavefrontSize32,
+ FeatureScalarStores,
+ FeatureScalarAtomics,
+ FeatureScalarFlatScratchInsts,
+ FeatureDoesNotSupportXNACK,
+ FeatureCodeObjectV3])>;
+
+def FeatureISAVersion10_1_2 : FeatureSet<
+ !listconcat(FeatureGroup.GFX10_1_Bugs,
+ [FeatureGFX10,
+ FeatureLDSBankCount32,
+ FeatureDLInsts,
+ FeatureDot1Insts,
+ FeatureDot2Insts,
+ FeatureDot5Insts,
+ FeatureDot6Insts,
+ FeatureNSAEncoding,
+ FeatureWavefrontSize32,
+ FeatureScalarStores,
+ FeatureScalarAtomics,
+ FeatureScalarFlatScratchInsts,
+ FeatureLdsMisalignedBug,
+ FeatureDoesNotSupportXNACK,
+ FeatureCodeObjectV3])>;
//===----------------------------------------------------------------------===//
@@ -682,23 +955,71 @@ def NullALU : InstrItinClass;
// Predicate helper class
//===----------------------------------------------------------------------===//
-def isSICI : Predicate<
- "Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
- "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS"
->, AssemblerPredicate<"!FeatureGCN3Encoding">;
+def isGFX6 :
+ Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS">,
+ AssemblerPredicate<"FeatureSouthernIslands">;
+
+def isGFX6GFX7 :
+ Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
+ "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS">,
+ AssemblerPredicate<"!FeatureGCN3Encoding,!FeatureGFX10Insts">;
+
+def isGFX6GFX7GFX10 :
+ Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
+ "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||"
+ "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">,
+ AssemblerPredicate<"!FeatureGCN3Encoding">;
+
+def isGFX7Only :
+ Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS">,
+ AssemblerPredicate<"!FeatureGCN3Encoding,FeatureCIInsts,!FeatureGFX10Insts">;
+
+def isGFX7GFX10 :
+ Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||"
+ "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">,
+ AssemblerPredicate<"!FeatureGCN3Encoding,FeatureCIInsts">;
+
+def isGFX7GFX8GFX9 :
+ Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||"
+ "Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||"
+ "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">,
+ AssemblerPredicate<"FeatureGFX7GFX8GFX9Insts">;
+
+def isGFX6GFX7GFX8GFX9 :
+ Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
+ "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||"
+ "Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||"
+ "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">,
+ AssemblerPredicate<"!FeatureGFX10Insts">;
+
+def isGFX7Plus :
+ Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS">,
+ AssemblerPredicate<"FeatureCIInsts">;
+
+def isGFX8Plus :
+ Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS">,
+ AssemblerPredicate<"FeatureGFX8Insts">;
-def isVI : Predicate <
- "Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS">,
- AssemblerPredicate<"FeatureGCN3Encoding">;
+def isGFX8Only : Predicate<"Subtarget->getGeneration() =="
+ "AMDGPUSubtarget::VOLCANIC_ISLANDS">,
+ AssemblerPredicate <"FeatureVolcanicIslands">;
-def isGFX9 : Predicate <
- "Subtarget->getGeneration() >= AMDGPUSubtarget::GFX9">,
+def isGFX9Plus :
+ Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX9">,
AssemblerPredicate<"FeatureGFX9Insts">;
-// TODO: Either the name to be changed or we simply use IsCI!
-def isCIVI : Predicate <
- "Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS">,
- AssemblerPredicate<"FeatureCIInsts">;
+def isGFX9Only : Predicate <
+ "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">,
+ AssemblerPredicate<"FeatureGCN3Encoding,FeatureGFX9Insts">;
+
+def isGFX8GFX9 :
+ Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||"
+ "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">,
+ AssemblerPredicate<"FeatureGFX8Insts,FeatureGCN3Encoding">;
+
+def isGFX10Plus :
+ Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10">,
+ AssemblerPredicate<"FeatureGFX10Insts">;
def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">,
AssemblerPredicate<"FeatureFlatAddressSpace">;
@@ -707,6 +1028,8 @@ def HasFlatGlobalInsts : Predicate<"Subtarget->hasFlatGlobalInsts()">,
AssemblerPredicate<"FeatureFlatGlobalInsts">;
def HasFlatScratchInsts : Predicate<"Subtarget->hasFlatScratchInsts()">,
AssemblerPredicate<"FeatureFlatScratchInsts">;
+def HasScalarFlatScratchInsts : Predicate<"Subtarget->hasScalarFlatScratchInsts()">,
+ AssemblerPredicate<"FeatureScalarFlatScratchInsts">;
def HasD16LoadStore : Predicate<"Subtarget->hasD16LoadStore()">,
AssemblerPredicate<"FeatureGFX9Insts">;
@@ -716,7 +1039,7 @@ def HasPackedD16VMem : Predicate<"!Subtarget->hasUnpackedD16VMem()">,
AssemblerPredicate<"!FeatureUnpackedD16VMem">;
def D16PreservesUnusedBits :
- Predicate<"Subtarget->hasD16LoadStore() && !Subtarget->isSRAMECCEnabled()">,
+ Predicate<"Subtarget->d16PreservesUnusedBits()">,
AssemblerPredicate<"FeatureGFX9Insts,!FeatureSRAMECC">;
def LDSRequiresM0Init : Predicate<"Subtarget->ldsRequiresM0Init()">;
@@ -728,38 +1051,54 @@ def HasDSAddTid : Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX9
def HasAddNoCarryInsts : Predicate<"Subtarget->hasAddNoCarry()">,
AssemblerPredicate<"FeatureAddNoCarryInsts">;
-def NotHasAddNoCarryInsts : Predicate<"!Subtarget->hasAddNoCarry()">,
- AssemblerPredicate<"!FeatureAddNoCarryInsts">;
+def NotHasAddNoCarryInsts : Predicate<"!Subtarget->hasAddNoCarry()">;
def Has16BitInsts : Predicate<"Subtarget->has16BitInsts()">,
AssemblerPredicate<"Feature16BitInsts">;
def HasVOP3PInsts : Predicate<"Subtarget->hasVOP3PInsts()">,
AssemblerPredicate<"FeatureVOP3P">;
-def NotHasVOP3PInsts : Predicate<"!Subtarget->hasVOP3PInsts()">,
- AssemblerPredicate<"!FeatureVOP3P">;
-
def HasSDWA : Predicate<"Subtarget->hasSDWA()">,
AssemblerPredicate<"FeatureSDWA,FeatureVolcanicIslands">;
-def HasSDWA9 : Predicate<"Subtarget->hasSDWA()">,
- AssemblerPredicate<"FeatureSDWA,FeatureGFX9">;
+def HasSDWA9 :
+ Predicate<"Subtarget->hasSDWA()">,
+ AssemblerPredicate<"FeatureGCN3Encoding,FeatureGFX9Insts,FeatureSDWA">;
+
+def HasSDWA10 :
+ Predicate<"Subtarget->hasSDWA()">,
+ AssemblerPredicate<"!FeatureGCN3Encoding,FeatureGFX10Insts,FeatureSDWA">;
def HasDPP : Predicate<"Subtarget->hasDPP()">,
- AssemblerPredicate<"FeatureDPP">;
+ AssemblerPredicate<"FeatureGCN3Encoding,FeatureDPP">;
+
+def HasDPP8 : Predicate<"Subtarget->hasDPP8()">,
+ AssemblerPredicate<"!FeatureGCN3Encoding,FeatureGFX10Insts,FeatureDPP8">;
def HasR128A16 : Predicate<"Subtarget->hasR128A16()">,
AssemblerPredicate<"FeatureR128A16">;
+def HasDPP16 : Predicate<"Subtarget->hasDPP()">,
+ AssemblerPredicate<"!FeatureGCN3Encoding,FeatureGFX10Insts,FeatureDPP">;
+
def HasIntClamp : Predicate<"Subtarget->hasIntClamp()">,
AssemblerPredicate<"FeatureIntClamp">;
def HasMadMixInsts : Predicate<"Subtarget->hasMadMixInsts()">,
AssemblerPredicate<"FeatureMadMixInsts">;
+def HasScalarStores : Predicate<"Subtarget->hasScalarStores()">,
+ AssemblerPredicate<"FeatureScalarStores">;
+
def HasScalarAtomics : Predicate<"Subtarget->hasScalarAtomics()">,
AssemblerPredicate<"FeatureScalarAtomics">;
+def HasNoSdstCMPX : Predicate<"Subtarget->hasNoSdstCMPX()">,
+ AssemblerPredicate<"FeatureNoSdstCMPX">;
+
+def HasSdstCMPX : Predicate<"!Subtarget->hasNoSdstCMPX()">,
+ AssemblerPredicate<"!FeatureNoSdstCMPX">;
+
def has16BankLDS : Predicate<"Subtarget->getLDSBankCount() == 16">;
def has32BankLDS : Predicate<"Subtarget->getLDSBankCount() == 32">;
def HasVGPRIndexMode : Predicate<"Subtarget->hasVGPRIndexMode()">,
@@ -773,9 +1112,35 @@ def HasFmaMixInsts : Predicate<"Subtarget->hasFmaMixInsts()">,
def HasDLInsts : Predicate<"Subtarget->hasDLInsts()">,
AssemblerPredicate<"FeatureDLInsts">;
-def HasDotInsts : Predicate<"Subtarget->hasDotInsts()">,
- AssemblerPredicate<"FeatureDotInsts">;
+def HasDot1Insts : Predicate<"Subtarget->hasDot1Insts()">,
+ AssemblerPredicate<"FeatureDot1Insts">;
+
+def HasDot2Insts : Predicate<"Subtarget->hasDot2Insts()">,
+ AssemblerPredicate<"FeatureDot2Insts">;
+
+def HasDot3Insts : Predicate<"Subtarget->hasDot3Insts()">,
+ AssemblerPredicate<"FeatureDot3Insts">;
+
+def HasDot4Insts : Predicate<"Subtarget->hasDot4Insts()">,
+ AssemblerPredicate<"FeatureDot4Insts">;
+
+def HasDot5Insts : Predicate<"Subtarget->hasDot5Insts()">,
+ AssemblerPredicate<"FeatureDot5Insts">;
+
+def HasDot6Insts : Predicate<"Subtarget->hasDot6Insts()">,
+ AssemblerPredicate<"FeatureDot6Insts">;
+
+def HasMAIInsts : Predicate<"Subtarget->hasMAIInsts()">,
+ AssemblerPredicate<"FeatureMAIInsts">;
+
+def HasPkFmacF16Inst : Predicate<"Subtarget->hasPkFmacF16Inst()">,
+ AssemblerPredicate<"FeaturePkFmacF16Inst">;
+
+def HasAtomicFaddInsts : Predicate<"Subtarget->hasAtomicFaddInsts()">,
+ AssemblerPredicate<"FeatureAtomicFaddInsts">;
+def HasOffset3fBug : Predicate<"!Subtarget->hasOffset3fBug()">,
+ AssemblerPredicate<"FeatureOffset3fBug">;
def EnableLateCFGStructurize : Predicate<
"EnableLateStructurizeCFG">;
@@ -784,7 +1149,6 @@ def EnableLateCFGStructurize : Predicate<
include "SISchedule.td"
include "GCNProcessors.td"
include "AMDGPUInstrInfo.td"
-include "SIIntrinsics.td"
include "AMDGPURegisterInfo.td"
include "AMDGPURegisterBanks.td"
include "AMDGPUInstructions.td"
diff --git a/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp b/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
index 73709ba13643..bba132c3bc46 100644
--- a/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
@@ -1,9 +1,8 @@
//===- AMDGPUAliasAnalysis ------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -54,20 +53,21 @@ void AMDGPUAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
}
-// These arrays are indexed by address space value enum elements 0 ... to 6
-static const AliasResult ASAliasRules[7][7] = {
- /* Flat Global Region Group Constant Private Constant 32-bit */
- /* Flat */ {MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias},
- /* Global */ {MayAlias, MayAlias, NoAlias , NoAlias , MayAlias, NoAlias , MayAlias},
- /* Region */ {MayAlias, NoAlias , NoAlias , NoAlias, MayAlias, NoAlias , MayAlias},
- /* Group */ {MayAlias, NoAlias , NoAlias , MayAlias, NoAlias , NoAlias , NoAlias},
- /* Constant */ {MayAlias, MayAlias, MayAlias, NoAlias , NoAlias, NoAlias , MayAlias},
- /* Private */ {MayAlias, NoAlias , NoAlias , NoAlias , NoAlias , MayAlias, NoAlias},
- /* Constant 32-bit */ {MayAlias, MayAlias, MayAlias, NoAlias , MayAlias, NoAlias , NoAlias}
+// These arrays are indexed by address space value enum elements 0 ... to 7
+static const AliasResult ASAliasRules[8][8] = {
+ /* Flat Global Region Group Constant Private Constant 32-bit Buffer Fat Ptr */
+ /* Flat */ {MayAlias, MayAlias, NoAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias},
+ /* Global */ {MayAlias, MayAlias, NoAlias , NoAlias , MayAlias, NoAlias , MayAlias, MayAlias},
+ /* Region */ {NoAlias, NoAlias , MayAlias, NoAlias , NoAlias, NoAlias , NoAlias, NoAlias},
+ /* Group */ {MayAlias, NoAlias , NoAlias , MayAlias, NoAlias , NoAlias , NoAlias , NoAlias},
+ /* Constant */ {MayAlias, MayAlias, NoAlias, NoAlias , NoAlias , NoAlias , MayAlias, MayAlias},
+ /* Private */ {MayAlias, NoAlias , NoAlias , NoAlias , NoAlias , MayAlias, NoAlias , NoAlias},
+ /* Constant 32-bit */ {MayAlias, MayAlias, NoAlias, NoAlias , MayAlias, NoAlias , NoAlias , MayAlias},
+ /* Buffer Fat Ptr */ {MayAlias, MayAlias, NoAlias , NoAlias , MayAlias, NoAlias , MayAlias, MayAlias}
};
static AliasResult getAliasResult(unsigned AS1, unsigned AS2) {
- static_assert(AMDGPUAS::MAX_AMDGPU_ADDRESS <= 6, "Addr space out of range");
+ static_assert(AMDGPUAS::MAX_AMDGPU_ADDRESS <= 7, "Addr space out of range");
if (AS1 > AMDGPUAS::MAX_AMDGPU_ADDRESS || AS2 > AMDGPUAS::MAX_AMDGPU_ADDRESS)
return MayAlias;
@@ -76,7 +76,8 @@ static AliasResult getAliasResult(unsigned AS1, unsigned AS2) {
}
AliasResult AMDGPUAAResult::alias(const MemoryLocation &LocA,
- const MemoryLocation &LocB) {
+ const MemoryLocation &LocB,
+ AAQueryInfo &AAQI) {
unsigned asA = LocA.Ptr->getType()->getPointerAddressSpace();
unsigned asB = LocB.Ptr->getType()->getPointerAddressSpace();
@@ -85,11 +86,11 @@ AliasResult AMDGPUAAResult::alias(const MemoryLocation &LocA,
return Result;
// Forward the query to the next alias analysis.
- return AAResultBase::alias(LocA, LocB);
+ return AAResultBase::alias(LocA, LocB, AAQI);
}
bool AMDGPUAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
- bool OrLocal) {
+ AAQueryInfo &AAQI, bool OrLocal) {
const Value *Base = GetUnderlyingObject(Loc.Ptr, DL);
unsigned AS = Base->getType()->getPointerAddressSpace();
if (AS == AMDGPUAS::CONSTANT_ADDRESS ||
@@ -106,7 +107,7 @@ bool AMDGPUAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
// Only assume constant memory for arguments on kernels.
switch (F->getCallingConv()) {
default:
- return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
+ return AAResultBase::pointsToConstantMemory(Loc, AAQI, OrLocal);
case CallingConv::AMDGPU_LS:
case CallingConv::AMDGPU_HS:
case CallingConv::AMDGPU_ES:
@@ -133,5 +134,5 @@ bool AMDGPUAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
return true;
}
}
- return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
+ return AAResultBase::pointsToConstantMemory(Loc, AAQI, OrLocal);
}
diff --git a/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h b/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h
index d76c9fc48199..fb722920900f 100644
--- a/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h
+++ b/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h
@@ -1,9 +1,8 @@
//===- AMDGPUAliasAnalysis --------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -45,8 +44,10 @@ public:
/// By definition, this result is stateless and so remains valid.
bool invalidate(Function &, const PreservedAnalyses &) { return false; }
- AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB);
- bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal);
+ AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB,
+ AAQueryInfo &AAQI);
+ bool pointsToConstantMemory(const MemoryLocation &Loc, AAQueryInfo &AAQI,
+ bool OrLocal);
private:
bool Aliases(const MDNode *A, const MDNode *B) const;
diff --git a/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp b/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
index fc65430b745f..4c1dbd4c5304 100644
--- a/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
@@ -1,9 +1,8 @@
//===-- AMDGPUAlwaysInlinePass.cpp - Promote Allocas ----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp b/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
index 896ac9c87779..419ebb2240ad 100644
--- a/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
@@ -1,9 +1,8 @@
//===- AMDGPUAnnotateKernelFeaturesPass.cpp -------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -46,8 +45,11 @@ namespace {
class AMDGPUAnnotateKernelFeatures : public CallGraphSCCPass {
private:
const TargetMachine *TM = nullptr;
+ SmallVector<CallGraphNode*, 8> NodeList;
bool addFeatureAttributes(Function &F);
+ bool processUniformWorkGroupAttribute();
+ bool propagateUniformWorkGroupAttribute(Function &Caller, Function &Callee);
public:
static char ID;
@@ -186,7 +188,6 @@ static bool handleAttr(Function &Parent, const Function &Callee,
Parent.addFnAttr(Name);
return true;
}
-
return false;
}
@@ -213,6 +214,56 @@ static void copyFeaturesToFunction(Function &Parent, const Function &Callee,
handleAttr(Parent, Callee, AttrName);
}
+bool AMDGPUAnnotateKernelFeatures::processUniformWorkGroupAttribute() {
+ bool Changed = false;
+
+ for (auto *Node : reverse(NodeList)) {
+ Function *Caller = Node->getFunction();
+
+ for (auto I : *Node) {
+ Function *Callee = std::get<1>(I)->getFunction();
+ if (Callee)
+ Changed = propagateUniformWorkGroupAttribute(*Caller, *Callee);
+ }
+ }
+
+ return Changed;
+}
+
+bool AMDGPUAnnotateKernelFeatures::propagateUniformWorkGroupAttribute(
+ Function &Caller, Function &Callee) {
+
+ // Check for externally defined function
+ if (!Callee.hasExactDefinition()) {
+ Callee.addFnAttr("uniform-work-group-size", "false");
+ if (!Caller.hasFnAttribute("uniform-work-group-size"))
+ Caller.addFnAttr("uniform-work-group-size", "false");
+
+ return true;
+ }
+ // Check if the Caller has the attribute
+ if (Caller.hasFnAttribute("uniform-work-group-size")) {
+ // Check if the value of the attribute is true
+ if (Caller.getFnAttribute("uniform-work-group-size")
+ .getValueAsString().equals("true")) {
+ // Propagate the attribute to the Callee, if it does not have it
+ if (!Callee.hasFnAttribute("uniform-work-group-size")) {
+ Callee.addFnAttr("uniform-work-group-size", "true");
+ return true;
+ }
+ } else {
+ Callee.addFnAttr("uniform-work-group-size", "false");
+ return true;
+ }
+ } else {
+ // If the attribute is absent, set it as false
+ Caller.addFnAttr("uniform-work-group-size", "false");
+ Callee.addFnAttr("uniform-work-group-size", "false");
+ return true;
+ }
+ return false;
+}
+
bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
const GCNSubtarget &ST = TM->getSubtarget<GCNSubtarget>(F);
bool HasFlat = ST.hasFlatAddressSpace();
@@ -293,15 +344,21 @@ bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
}
bool AMDGPUAnnotateKernelFeatures::runOnSCC(CallGraphSCC &SCC) {
- Module &M = SCC.getCallGraph().getModule();
- Triple TT(M.getTargetTriple());
-
bool Changed = false;
+
for (CallGraphNode *I : SCC) {
+ // Build a list of CallGraphNodes from most number of uses to least
+ if (I->getNumReferences())
+ NodeList.push_back(I);
+ else {
+ processUniformWorkGroupAttribute();
+ NodeList.clear();
+ }
+
Function *F = I->getFunction();
+ // Add feature attributes
if (!F || F->isDeclaration())
continue;
-
Changed |= addFeatureAttributes(*F);
}
diff --git a/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp b/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
index f88e3b0dac86..71121ade0a49 100644
--- a/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
@@ -1,9 +1,8 @@
//===-- AMDGPUAnnotateUniformValues.cpp - ---------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -14,7 +13,6 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
-#include "AMDGPUIntrinsicInfo.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
diff --git a/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp b/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
index 7465cf22b5a4..99a01ca3a2fd 100644
--- a/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
+++ b/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
@@ -1,15 +1,15 @@
//===----------------------------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
#include "AMDGPUArgumentUsageInfo.h"
#include "SIRegisterInfo.h"
+#include "llvm/Support/NativeFormatting.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -27,9 +27,16 @@ void ArgDescriptor::print(raw_ostream &OS,
}
if (isRegister())
- OS << "Reg " << printReg(getRegister(), TRI) << '\n';
+ OS << "Reg " << printReg(getRegister(), TRI);
else
- OS << "Stack offset " << getStackOffset() << '\n';
+ OS << "Stack offset " << getStackOffset();
+
+ if (isMasked()) {
+ OS << " & ";
+ llvm::write_hex(OS, Mask, llvm::HexPrintStyle::PrefixLower);
+ }
+
+ OS << '\n';
}
char AMDGPUArgumentUsageInfo::ID = 0;
diff --git a/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h b/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
index f0e6d1b83f15..097730441ed8 100644
--- a/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
+++ b/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
@@ -1,9 +1,8 @@
//==- AMDGPUArgumentrUsageInfo.h - Function Arg Usage Info -------*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -11,6 +10,7 @@
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUARGUMENTUSAGEINFO_H
#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/Register.h"
#include "llvm/IR/Function.h"
#include "llvm/Pass.h"
@@ -29,22 +29,31 @@ private:
friend class AMDGPUArgumentUsageInfo;
union {
- unsigned Register;
+ Register Reg;
unsigned StackOffset;
};
+ // Bitmask to locate argument within the register.
+ unsigned Mask;
+
bool IsStack : 1;
bool IsSet : 1;
- ArgDescriptor(unsigned Val = 0, bool IsStack = false, bool IsSet = false)
- : Register(Val), IsStack(IsStack), IsSet(IsSet) {}
public:
- static ArgDescriptor createRegister(unsigned Reg) {
- return ArgDescriptor(Reg, false, true);
+ ArgDescriptor(unsigned Val = 0, unsigned Mask = ~0u,
+ bool IsStack = false, bool IsSet = false)
+ : Reg(Val), Mask(Mask), IsStack(IsStack), IsSet(IsSet) {}
+
+ static ArgDescriptor createRegister(Register Reg, unsigned Mask = ~0u) {
+ return ArgDescriptor(Reg, Mask, false, true);
+ }
+
+ static ArgDescriptor createStack(Register Reg, unsigned Mask = ~0u) {
+ return ArgDescriptor(Reg, Mask, true, true);
}
- static ArgDescriptor createStack(unsigned Reg) {
- return ArgDescriptor(Reg, true, true);
+ static ArgDescriptor createArg(const ArgDescriptor &Arg, unsigned Mask) {
+ return ArgDescriptor(Arg.Reg, Mask, Arg.IsStack, Arg.IsSet);
}
bool isSet() const {
@@ -59,9 +68,9 @@ public:
return !IsStack;
}
- unsigned getRegister() const {
+ Register getRegister() const {
assert(!IsStack);
- return Register;
+ return Reg;
}
unsigned getStackOffset() const {
@@ -69,6 +78,14 @@ public:
return StackOffset;
}
+ unsigned getMask() const {
+ return Mask;
+ }
+
+ bool isMasked() const {
+ return Mask != ~0u;
+ }
+
void print(raw_ostream &OS, const TargetRegisterInfo *TRI = nullptr) const;
};
diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 2ded7cdb6489..743ac64b8f10 100644
--- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -1,9 +1,8 @@
//===-- AMDGPUAsmPrinter.cpp - AMDGPU assembly printer -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -20,7 +19,7 @@
#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
#include "AMDGPUTargetMachine.h"
-#include "InstPrinter/AMDGPUInstPrinter.h"
+#include "MCTargetDesc/AMDGPUInstPrinter.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "MCTargetDesc/AMDGPUTargetStreamer.h"
#include "R600AsmPrinter.h"
@@ -31,10 +30,12 @@
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
#include "SIRegisterInfo.h"
+#include "TargetInfo/AMDGPUTargetInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
@@ -100,7 +101,7 @@ extern "C" void LLVMInitializeAMDGPUAsmPrinter() {
AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM,
std::unique_ptr<MCStreamer> Streamer)
: AsmPrinter(TM, std::move(Streamer)) {
- if (IsaInfo::hasCodeObjectV3(getSTI()))
+ if (IsaInfo::hasCodeObjectV3(getGlobalSTI()))
HSAMetadataStream.reset(new MetadataStreamerV3());
else
HSAMetadataStream.reset(new MetadataStreamerV2());
@@ -110,7 +111,7 @@ StringRef AMDGPUAsmPrinter::getPassName() const {
return "AMDGPU Assembly Printer";
}
-const MCSubtargetInfo* AMDGPUAsmPrinter::getSTI() const {
+const MCSubtargetInfo *AMDGPUAsmPrinter::getGlobalSTI() const {
return TM.getMCSubtargetInfo();
}
@@ -121,10 +122,10 @@ AMDGPUTargetStreamer* AMDGPUAsmPrinter::getTargetStreamer() const {
}
void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) {
- if (IsaInfo::hasCodeObjectV3(getSTI())) {
+ if (IsaInfo::hasCodeObjectV3(getGlobalSTI())) {
std::string ExpectedTarget;
raw_string_ostream ExpectedTargetOS(ExpectedTarget);
- IsaInfo::streamIsaVersion(getSTI(), ExpectedTargetOS);
+ IsaInfo::streamIsaVersion(getGlobalSTI(), ExpectedTargetOS);
getTargetStreamer()->EmitDirectiveAMDGCNTarget(ExpectedTarget);
}
@@ -137,9 +138,9 @@ void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) {
HSAMetadataStream->begin(M);
if (TM.getTargetTriple().getOS() == Triple::AMDPAL)
- readPALMetadata(M);
+ getTargetStreamer()->getPALMetadata()->readFromIR(M);
- if (IsaInfo::hasCodeObjectV3(getSTI()))
+ if (IsaInfo::hasCodeObjectV3(getGlobalSTI()))
return;
// HSA emits NT_AMDGPU_HSA_CODE_OBJECT_VERSION for code objects v2.
@@ -147,7 +148,7 @@ void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) {
getTargetStreamer()->EmitDirectiveHSACodeObjectVersion(2, 1);
// HSA and PAL emit NT_AMDGPU_HSA_ISA for code objects v2.
- IsaVersion Version = getIsaVersion(getSTI()->getCPU());
+ IsaVersion Version = getIsaVersion(getGlobalSTI()->getCPU());
getTargetStreamer()->EmitDirectiveHSACodeObjectISA(
Version.Major, Version.Minor, Version.Stepping, "AMD", "AMDGPU");
}
@@ -157,11 +158,11 @@ void AMDGPUAsmPrinter::EmitEndOfAsmFile(Module &M) {
if (!getTargetStreamer())
return;
- if (!IsaInfo::hasCodeObjectV3(getSTI())) {
+ if (!IsaInfo::hasCodeObjectV3(getGlobalSTI())) {
// Emit ISA Version (NT_AMD_AMDGPU_ISA).
std::string ISAVersionString;
raw_string_ostream ISAVersionStream(ISAVersionString);
- IsaInfo::streamIsaVersion(getSTI(), ISAVersionStream);
+ IsaInfo::streamIsaVersion(getGlobalSTI(), ISAVersionStream);
getTargetStreamer()->EmitISAVersion(ISAVersionStream.str());
}
@@ -172,20 +173,6 @@ void AMDGPUAsmPrinter::EmitEndOfAsmFile(Module &M) {
(void)Success;
assert(Success && "Malformed HSA Metadata");
}
-
- if (!IsaInfo::hasCodeObjectV3(getSTI())) {
- // Emit PAL Metadata (NT_AMD_AMDGPU_PAL_METADATA).
- if (TM.getTargetTriple().getOS() == Triple::AMDPAL) {
- // Copy the PAL metadata from the map where we collected it into a vector,
- // then write it as a .note.
- PALMD::Metadata PALMetadataVector;
- for (auto i : PALMetadataMap) {
- PALMetadataVector.push_back(i.first);
- PALMetadataVector.push_back(i.second);
- }
- getTargetStreamer()->EmitPALMetadata(PALMetadataVector);
- }
- }
}
bool AMDGPUAsmPrinter::isBlockOnlyReachableByFallthrough(
@@ -225,7 +212,8 @@ void AMDGPUAsmPrinter::EmitFunctionBodyEnd() {
const SIMachineFunctionInfo &MFI = *MF->getInfo<SIMachineFunctionInfo>();
if (!MFI.isEntryFunction())
return;
- if (!IsaInfo::hasCodeObjectV3(getSTI()) ||
+
+ if (!IsaInfo::hasCodeObjectV3(getGlobalSTI()) ||
TM.getTargetTriple().getOS() != Triple::AMDHSA)
return;
@@ -243,23 +231,25 @@ void AMDGPUAsmPrinter::EmitFunctionBodyEnd() {
if (ReadOnlySection.getAlignment() < 64)
ReadOnlySection.setAlignment(64);
+ const MCSubtargetInfo &STI = MF->getSubtarget();
+
SmallString<128> KernelName;
getNameWithPrefix(KernelName, &MF->getFunction());
getTargetStreamer()->EmitAmdhsaKernelDescriptor(
- *getSTI(), KernelName, getAmdhsaKernelDescriptor(*MF, CurrentProgramInfo),
+ STI, KernelName, getAmdhsaKernelDescriptor(*MF, CurrentProgramInfo),
CurrentProgramInfo.NumVGPRsForWavesPerEU,
CurrentProgramInfo.NumSGPRsForWavesPerEU -
- IsaInfo::getNumExtraSGPRs(getSTI(),
+ IsaInfo::getNumExtraSGPRs(&STI,
CurrentProgramInfo.VCCUsed,
CurrentProgramInfo.FlatUsed),
CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed,
- hasXNACK(*getSTI()));
+ hasXNACK(STI));
Streamer.PopSection();
}
void AMDGPUAsmPrinter::EmitFunctionEntryLabel() {
- if (IsaInfo::hasCodeObjectV3(getSTI()) &&
+ if (IsaInfo::hasCodeObjectV3(getGlobalSTI()) &&
TM.getTargetTriple().getOS() == Triple::AMDHSA) {
AsmPrinter::EmitFunctionEntryLabel();
return;
@@ -273,8 +263,7 @@ void AMDGPUAsmPrinter::EmitFunctionEntryLabel() {
getTargetStreamer()->EmitAMDGPUSymbolType(
SymbolName, ELF::STT_AMDGPU_HSA_KERNEL);
}
- const GCNSubtarget &STI = MF->getSubtarget<GCNSubtarget>();
- if (STI.dumpCode()) {
+ if (DumpCodeInstEmitter) {
// Disassemble function name label to text.
DisasmLines.push_back(MF->getName().str() + ":");
DisasmLineMaxLen = std::max(DisasmLineMaxLen, DisasmLines.back().size());
@@ -285,8 +274,7 @@ void AMDGPUAsmPrinter::EmitFunctionEntryLabel() {
}
void AMDGPUAsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const {
- const GCNSubtarget &STI = MBB.getParent()->getSubtarget<GCNSubtarget>();
- if (STI.dumpCode() && !isBlockOnlyReachableByFallthrough(&MBB)) {
+ if (DumpCodeInstEmitter && !isBlockOnlyReachableByFallthrough(&MBB)) {
// Write a line for the basic block label if it is not only fallthrough.
DisasmLines.push_back(
(Twine("BB") + Twine(getFunctionNumber())
@@ -298,38 +286,57 @@ void AMDGPUAsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const {
}
void AMDGPUAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
+ if (GV->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
+ if (GV->hasInitializer() && !isa<UndefValue>(GV->getInitializer())) {
+ OutContext.reportError({},
+ Twine(GV->getName()) +
+ ": unsupported initializer for address space");
+ return;
+ }
+
+ // LDS variables aren't emitted in HSA or PAL yet.
+ const Triple::OSType OS = TM.getTargetTriple().getOS();
+ if (OS == Triple::AMDHSA || OS == Triple::AMDPAL)
+ return;
- // Group segment variables aren't emitted in HSA.
- if (AMDGPU::isGroupSegment(GV))
+ MCSymbol *GVSym = getSymbol(GV);
+
+ GVSym->redefineIfPossible();
+ if (GVSym->isDefined() || GVSym->isVariable())
+ report_fatal_error("symbol '" + Twine(GVSym->getName()) +
+ "' is already defined");
+
+ const DataLayout &DL = GV->getParent()->getDataLayout();
+ uint64_t Size = DL.getTypeAllocSize(GV->getValueType());
+ unsigned Align = GV->getAlignment();
+ if (!Align)
+ Align = 4;
+
+ EmitVisibility(GVSym, GV->getVisibility(), !GV->isDeclaration());
+ EmitLinkage(GV, GVSym);
+ if (auto TS = getTargetStreamer())
+ TS->emitAMDGPULDS(GVSym, Size, Align);
return;
+ }
AsmPrinter::EmitGlobalVariable(GV);
}
bool AMDGPUAsmPrinter::doFinalization(Module &M) {
CallGraphResourceInfo.clear();
- return AsmPrinter::doFinalization(M);
-}
-// For the amdpal OS type, read the amdgpu.pal.metadata supplied by the
-// frontend into our PALMetadataMap, ready for per-function modification. It
-// is a NamedMD containing an MDTuple containing a number of MDNodes each of
-// which is an integer value, and each two integer values forms a key=value
-// pair that we store as PALMetadataMap[key]=value in the map.
-void AMDGPUAsmPrinter::readPALMetadata(Module &M) {
- auto NamedMD = M.getNamedMetadata("amdgpu.pal.metadata");
- if (!NamedMD || !NamedMD->getNumOperands())
- return;
- auto Tuple = dyn_cast<MDTuple>(NamedMD->getOperand(0));
- if (!Tuple)
- return;
- for (unsigned I = 0, E = Tuple->getNumOperands() & -2; I != E; I += 2) {
- auto Key = mdconst::dyn_extract<ConstantInt>(Tuple->getOperand(I));
- auto Val = mdconst::dyn_extract<ConstantInt>(Tuple->getOperand(I + 1));
- if (!Key || !Val)
- continue;
- PALMetadataMap[Key->getZExtValue()] = Val->getZExtValue();
+ // Pad with s_code_end to help tools and guard against instruction prefetch
+ // causing stale data in caches. Arguably this should be done by the linker,
+ // which is why this isn't done for Mesa.
+ const MCSubtargetInfo &STI = *getGlobalSTI();
+ if (AMDGPU::isGFX10(STI) &&
+ (STI.getTargetTriple().getOS() == Triple::AMDHSA ||
+ STI.getTargetTriple().getOS() == Triple::AMDPAL)) {
+ OutStreamer->SwitchSection(getObjFileLowering().getTextSection());
+ getTargetStreamer()->EmitCodeEnd();
}
+
+ return AsmPrinter::doFinalization(M);
}
// Print comments that apply to both callable functions and entry points.
@@ -376,6 +383,10 @@ uint16_t AMDGPUAsmPrinter::getAmdhsaKernelCodeProperties(
KernelCodeProperties |=
amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
}
+ if (MF.getSubtarget<GCNSubtarget>().isWave32()) {
+ KernelCodeProperties |=
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
+ }
return KernelCodeProperties;
}
@@ -435,6 +446,18 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
EmitProgramInfoSI(MF, CurrentProgramInfo);
}
+ DumpCodeInstEmitter = nullptr;
+ if (STM.dumpCode()) {
+ // For -dumpcode, get the assembler out of the streamer, even if it does
+ // not really want to let us have it. This only works with -filetype=obj.
+ bool SaveFlag = OutStreamer->getUseAssemblerInfoForParsing();
+ OutStreamer->setUseAssemblerInfoForParsing(true);
+ MCAssembler *Assembler = OutStreamer->getAssemblerPtr();
+ OutStreamer->setUseAssemblerInfoForParsing(SaveFlag);
+ if (Assembler)
+ DumpCodeInstEmitter = Assembler->getEmitterPtr();
+ }
+
DisasmLines.clear();
HexLines.clear();
DisasmLineMaxLen = 0;
@@ -486,15 +509,6 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
OutStreamer->emitRawComment(
" WaveLimiterHint : " + Twine(MFI->needsWaveLimiter()), false);
- if (MF.getSubtarget<GCNSubtarget>().debuggerEmitPrologue()) {
- OutStreamer->emitRawComment(
- " DebuggerWavefrontPrivateSegmentOffsetSGPR: s" +
- Twine(CurrentProgramInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR), false);
- OutStreamer->emitRawComment(
- " DebuggerPrivateSegmentBufferSGPR: s" +
- Twine(CurrentProgramInfo.DebuggerPrivateSegmentBufferSGPR), false);
- }
-
OutStreamer->emitRawComment(
" COMPUTE_PGM_RSRC2:USER_SGPR: " +
Twine(G_00B84C_USER_SGPR(CurrentProgramInfo.ComputePGMRSrc2)), false);
@@ -516,7 +530,7 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
false);
}
- if (STM.dumpCode()) {
+ if (DumpCodeInstEmitter) {
OutStreamer->SwitchSection(
Context.getELFSection(".AMDGPU.disasm", ELF::SHT_NOTE, 0));
@@ -620,6 +634,11 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
HighestVGPRReg = Reg;
break;
}
+ MCPhysReg AReg = AMDGPU::AGPR0 + TRI.getHWRegIndex(Reg);
+ if (MRI.isPhysRegUsed(AReg)) {
+ HighestVGPRReg = AReg;
+ break;
+ }
}
MCPhysReg HighestSGPRReg = AMDGPU::NoRegister;
@@ -665,8 +684,12 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
case AMDGPU::SRC_SHARED_LIMIT:
case AMDGPU::SRC_PRIVATE_BASE:
case AMDGPU::SRC_PRIVATE_LIMIT:
+ case AMDGPU::SGPR_NULL:
continue;
+ case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
+ llvm_unreachable("src_pops_exiting_wave_id should not be used");
+
case AMDGPU::NoRegister:
assert(MI.isDebugInstr());
continue;
@@ -687,6 +710,9 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
case AMDGPU::XNACK_MASK_HI:
llvm_unreachable("xnack_mask registers should not be used");
+ case AMDGPU::LDS_DIRECT:
+ llvm_unreachable("lds_direct register should not be used");
+
case AMDGPU::TBA:
case AMDGPU::TBA_LO:
case AMDGPU::TBA_HI:
@@ -695,6 +721,15 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
case AMDGPU::TMA_HI:
llvm_unreachable("trap handler registers should not be used");
+ case AMDGPU::SRC_VCCZ:
+ llvm_unreachable("src_vccz register should not be used");
+
+ case AMDGPU::SRC_EXECZ:
+ llvm_unreachable("src_execz register should not be used");
+
+ case AMDGPU::SRC_SCC:
+ llvm_unreachable("src_scc register should not be used");
+
default:
break;
}
@@ -707,6 +742,9 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
} else if (AMDGPU::VGPR_32RegClass.contains(Reg)) {
IsSGPR = false;
Width = 1;
+ } else if (AMDGPU::AGPR_32RegClass.contains(Reg)) {
+ IsSGPR = false;
+ Width = 1;
} else if (AMDGPU::SReg_64RegClass.contains(Reg)) {
assert(!AMDGPU::TTMP_64RegClass.contains(Reg) &&
"trap handler registers should not be used");
@@ -715,9 +753,14 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
} else if (AMDGPU::VReg_64RegClass.contains(Reg)) {
IsSGPR = false;
Width = 2;
+ } else if (AMDGPU::AReg_64RegClass.contains(Reg)) {
+ IsSGPR = false;
+ Width = 2;
} else if (AMDGPU::VReg_96RegClass.contains(Reg)) {
IsSGPR = false;
Width = 3;
+ } else if (AMDGPU::SReg_96RegClass.contains(Reg)) {
+ Width = 3;
} else if (AMDGPU::SReg_128RegClass.contains(Reg)) {
assert(!AMDGPU::TTMP_128RegClass.contains(Reg) &&
"trap handler registers should not be used");
@@ -726,6 +769,9 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
} else if (AMDGPU::VReg_128RegClass.contains(Reg)) {
IsSGPR = false;
Width = 4;
+ } else if (AMDGPU::AReg_128RegClass.contains(Reg)) {
+ IsSGPR = false;
+ Width = 4;
} else if (AMDGPU::SReg_256RegClass.contains(Reg)) {
assert(!AMDGPU::TTMP_256RegClass.contains(Reg) &&
"trap handler registers should not be used");
@@ -742,6 +788,18 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
} else if (AMDGPU::VReg_512RegClass.contains(Reg)) {
IsSGPR = false;
Width = 16;
+ } else if (AMDGPU::AReg_512RegClass.contains(Reg)) {
+ IsSGPR = false;
+ Width = 16;
+ } else if (AMDGPU::SReg_1024RegClass.contains(Reg)) {
+ IsSGPR = true;
+ Width = 32;
+ } else if (AMDGPU::VReg_1024RegClass.contains(Reg)) {
+ IsSGPR = false;
+ Width = 32;
+ } else if (AMDGPU::AReg_1024RegClass.contains(Reg)) {
+ IsSGPR = false;
+ Width = 32;
} else {
llvm_unreachable("Unknown register class");
}
@@ -767,8 +825,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
// 48 SGPRs - vcc, - flat_scr, -xnack
int MaxSGPRGuess =
- 47 - IsaInfo::getNumExtraSGPRs(getSTI(), true,
- ST.hasFlatAddressSpace());
+ 47 - IsaInfo::getNumExtraSGPRs(&ST, true, ST.hasFlatAddressSpace());
MaxSGPR = std::max(MaxSGPR, MaxSGPRGuess);
MaxVGPR = std::max(MaxVGPR, 23);
@@ -779,9 +836,19 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
} else {
// We force CodeGen to run in SCC order, so the callee's register
// usage etc. should be the cumulative usage of all callees.
+
auto I = CallGraphResourceInfo.find(Callee);
- assert(I != CallGraphResourceInfo.end() &&
- "callee should have been handled before caller");
+ if (I == CallGraphResourceInfo.end()) {
+ // Avoid crashing on undefined behavior with an illegal call to a
+ // kernel. If a callsite's calling convention doesn't match the
+ // function's, it's undefined behavior. If the callsite calling
+ // convention does match, that would have errored earlier.
+ // FIXME: The verifier shouldn't allow this.
+ if (AMDGPU::isEntryFunctionCC(Callee->getCallingConv()))
+ report_fatal_error("invalid call to entry function");
+
+ llvm_unreachable("callee should have been handled before caller");
+ }
MaxSGPR = std::max(I->second.NumExplicitSGPR - 1, MaxSGPR);
MaxVGPR = std::max(I->second.NumVGPR - 1, MaxVGPR);
@@ -825,14 +892,12 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
- const SIInstrInfo *TII = STM.getInstrInfo();
- const SIRegisterInfo *RI = &TII->getRegisterInfo();
// TODO(scott.linder): The calculations related to SGPR/VGPR blocks are
// duplicated in part in AMDGPUAsmParser::calculateGPRBlocks, and could be
// unified.
unsigned ExtraSGPRs = IsaInfo::getNumExtraSGPRs(
- getSTI(), ProgInfo.VCCUsed, ProgInfo.FlatUsed);
+ &STM, ProgInfo.VCCUsed, ProgInfo.FlatUsed);
// Check the addressable register limit before we add ExtraSGPRs.
if (STM.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&
@@ -918,24 +983,15 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
ProgInfo.VGPRBlocks = IsaInfo::getNumVGPRBlocks(
&STM, ProgInfo.NumVGPRsForWavesPerEU);
- // Update DebuggerWavefrontPrivateSegmentOffsetSGPR and
- // DebuggerPrivateSegmentBufferSGPR fields if "amdgpu-debugger-emit-prologue"
- // attribute was requested.
- if (STM.debuggerEmitPrologue()) {
- ProgInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR =
- RI->getHWRegIndex(MFI->getScratchWaveOffsetReg());
- ProgInfo.DebuggerPrivateSegmentBufferSGPR =
- RI->getHWRegIndex(MFI->getScratchRSrcReg());
- }
-
// Set the value to initialize FP_ROUND and FP_DENORM parts of the mode
// register.
ProgInfo.FloatMode = getFPMode(MF);
- ProgInfo.IEEEMode = STM.enableIEEEBit(MF);
+ const SIModeRegisterDefaults Mode = MFI->getMode();
+ ProgInfo.IEEEMode = Mode.IEEE;
// Make clamp modifier on NaN input returns 0.
- ProgInfo.DX10Clamp = STM.enableDX10Clamp();
+ ProgInfo.DX10Clamp = Mode.DX10Clamp;
unsigned LDSAlignShift;
if (STM.getGeneration() < AMDGPUSubtarget::SEA_ISLANDS) {
@@ -963,6 +1019,11 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
1ULL << ScratchAlignShift) >>
ScratchAlignShift;
+ if (getIsaVersion(getGlobalSTI()->getCPU()).Major >= 10) {
+ ProgInfo.WgpMode = STM.isCuModeEnabled() ? 0 : 1;
+ ProgInfo.MemOrdered = 1;
+ }
+
ProgInfo.ComputePGMRSrc1 =
S_00B848_VGPRS(ProgInfo.VGPRBlocks) |
S_00B848_SGPRS(ProgInfo.SGPRBlocks) |
@@ -971,7 +1032,9 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
S_00B848_PRIV(ProgInfo.Priv) |
S_00B848_DX10_CLAMP(ProgInfo.DX10Clamp) |
S_00B848_DEBUG_MODE(ProgInfo.DebugMode) |
- S_00B848_IEEE_MODE(ProgInfo.IEEEMode);
+ S_00B848_IEEE_MODE(ProgInfo.IEEEMode) |
+ S_00B848_WGP_MODE(ProgInfo.WgpMode) |
+ S_00B848_MEM_ORDERED(ProgInfo.MemOrdered);
// 0 = X, 1 = XY, 2 = XYZ
unsigned TIDIGCompCnt = 0;
@@ -1053,71 +1116,38 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
// This is the equivalent of EmitProgramInfoSI above, but for when the OS type
// is AMDPAL. It stores each compute/SPI register setting and other PAL
-// metadata items into the PALMetadataMap, combining with any provided by the
-// frontend as LLVM metadata. Once all functions are written, PALMetadataMap is
-// then written as a single block in the .note section.
+// metadata items into the PALMD::Metadata, combining with any provided by the
+// frontend as LLVM metadata. Once all functions are written, the PAL metadata
+// is then written as a single block in the .note section.
void AMDGPUAsmPrinter::EmitPALMetadata(const MachineFunction &MF,
const SIProgramInfo &CurrentProgramInfo) {
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
- // Given the calling convention, calculate the register number for rsrc1. In
- // principle the register number could change in future hardware, but we know
- // it is the same for gfx6-9 (except that LS and ES don't exist on gfx9), so
- // we can use the same fixed value that .AMDGPU.config has for Mesa. Note
- // that we use a register number rather than a byte offset, so we need to
- // divide by 4.
- unsigned Rsrc1Reg = getRsrcReg(MF.getFunction().getCallingConv()) / 4;
- unsigned Rsrc2Reg = Rsrc1Reg + 1;
- // Also calculate the PAL metadata key for *S_SCRATCH_SIZE. It can be used
- // with a constant offset to access any non-register shader-specific PAL
- // metadata key.
- unsigned ScratchSizeKey = PALMD::Key::CS_SCRATCH_SIZE;
- switch (MF.getFunction().getCallingConv()) {
- case CallingConv::AMDGPU_PS:
- ScratchSizeKey = PALMD::Key::PS_SCRATCH_SIZE;
- break;
- case CallingConv::AMDGPU_VS:
- ScratchSizeKey = PALMD::Key::VS_SCRATCH_SIZE;
- break;
- case CallingConv::AMDGPU_GS:
- ScratchSizeKey = PALMD::Key::GS_SCRATCH_SIZE;
- break;
- case CallingConv::AMDGPU_ES:
- ScratchSizeKey = PALMD::Key::ES_SCRATCH_SIZE;
- break;
- case CallingConv::AMDGPU_HS:
- ScratchSizeKey = PALMD::Key::HS_SCRATCH_SIZE;
- break;
- case CallingConv::AMDGPU_LS:
- ScratchSizeKey = PALMD::Key::LS_SCRATCH_SIZE;
- break;
- }
- unsigned NumUsedVgprsKey = ScratchSizeKey +
- PALMD::Key::VS_NUM_USED_VGPRS - PALMD::Key::VS_SCRATCH_SIZE;
- unsigned NumUsedSgprsKey = ScratchSizeKey +
- PALMD::Key::VS_NUM_USED_SGPRS - PALMD::Key::VS_SCRATCH_SIZE;
- PALMetadataMap[NumUsedVgprsKey] = CurrentProgramInfo.NumVGPRsForWavesPerEU;
- PALMetadataMap[NumUsedSgprsKey] = CurrentProgramInfo.NumSGPRsForWavesPerEU;
+ auto CC = MF.getFunction().getCallingConv();
+ auto MD = getTargetStreamer()->getPALMetadata();
+
+ MD->setEntryPoint(CC, MF.getFunction().getName());
+ MD->setNumUsedVgprs(CC, CurrentProgramInfo.NumVGPRsForWavesPerEU);
+ MD->setNumUsedSgprs(CC, CurrentProgramInfo.NumSGPRsForWavesPerEU);
if (AMDGPU::isCompute(MF.getFunction().getCallingConv())) {
- PALMetadataMap[Rsrc1Reg] |= CurrentProgramInfo.ComputePGMRSrc1;
- PALMetadataMap[Rsrc2Reg] |= CurrentProgramInfo.ComputePGMRSrc2;
- // ScratchSize is in bytes, 16 aligned.
- PALMetadataMap[ScratchSizeKey] |=
- alignTo(CurrentProgramInfo.ScratchSize, 16);
+ MD->setRsrc1(CC, CurrentProgramInfo.ComputePGMRSrc1);
+ MD->setRsrc2(CC, CurrentProgramInfo.ComputePGMRSrc2);
} else {
- PALMetadataMap[Rsrc1Reg] |= S_00B028_VGPRS(CurrentProgramInfo.VGPRBlocks) |
- S_00B028_SGPRS(CurrentProgramInfo.SGPRBlocks);
+ MD->setRsrc1(CC, S_00B028_VGPRS(CurrentProgramInfo.VGPRBlocks) |
+ S_00B028_SGPRS(CurrentProgramInfo.SGPRBlocks));
if (CurrentProgramInfo.ScratchBlocks > 0)
- PALMetadataMap[Rsrc2Reg] |= S_00B84C_SCRATCH_EN(1);
- // ScratchSize is in bytes, 16 aligned.
- PALMetadataMap[ScratchSizeKey] |=
- alignTo(CurrentProgramInfo.ScratchSize, 16);
+ MD->setRsrc2(CC, S_00B84C_SCRATCH_EN(1));
}
+ // ScratchSize is in bytes, 16 aligned.
+ MD->setScratchSize(CC, alignTo(CurrentProgramInfo.ScratchSize, 16));
if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_PS) {
- PALMetadataMap[Rsrc2Reg] |=
- S_00B02C_EXTRA_LDS_SIZE(CurrentProgramInfo.LDSBlocks);
- PALMetadataMap[R_0286CC_SPI_PS_INPUT_ENA / 4] |= MFI->getPSInputEnable();
- PALMetadataMap[R_0286D0_SPI_PS_INPUT_ADDR / 4] |= MFI->getPSInputAddr();
+ MD->setRsrc2(CC, S_00B02C_EXTRA_LDS_SIZE(CurrentProgramInfo.LDSBlocks));
+ MD->setSpiPsInputEna(MFI->getPSInputEnable());
+ MD->setSpiPsInputAddr(MFI->getPSInputAddr());
}
+
+ const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
+ if (STM.isWave32())
+ MD->setWave32(MF.getFunction().getCallingConv());
}
// This is supposed to be log2(Size)
@@ -1144,12 +1174,12 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
- AMDGPU::initDefaultAMDKernelCodeT(Out, getSTI());
+ AMDGPU::initDefaultAMDKernelCodeT(Out, &STM);
Out.compute_pgm_resource_registers =
CurrentProgramInfo.ComputePGMRSrc1 |
(CurrentProgramInfo.ComputePGMRSrc2 << 32);
- Out.code_properties = AMD_CODE_PROPERTY_IS_PTR64;
+ Out.code_properties |= AMD_CODE_PROPERTY_IS_PTR64;
if (CurrentProgramInfo.DynamicCallStack)
Out.code_properties |= AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK;
@@ -1181,9 +1211,6 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
if (MFI->hasDispatchPtr())
Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
- if (STM.debuggerSupported())
- Out.code_properties |= AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED;
-
if (STM.isXNACKEnabled())
Out.code_properties |= AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED;
@@ -1196,22 +1223,14 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
// These alignment values are specified in powers of two, so alignment =
// 2^n. The minimum alignment is 2^4 = 16.
- Out.kernarg_segment_alignment = std::max((size_t)4,
+ Out.kernarg_segment_alignment = std::max<size_t>(4,
countTrailingZeros(MaxKernArgAlign));
-
- if (STM.debuggerEmitPrologue()) {
- Out.debug_wavefront_private_segment_offset_sgpr =
- CurrentProgramInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR;
- Out.debug_private_segment_buffer_sgpr =
- CurrentProgramInfo.DebuggerPrivateSegmentBufferSGPR;
- }
}
bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
- unsigned AsmVariant,
const char *ExtraCode, raw_ostream &O) {
// First try the generic code, which knows about modifiers like 'c' and 'n'.
- if (!AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O))
+ if (!AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O))
return false;
if (ExtraCode && ExtraCode[0]) {
diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
index 167ac4b21e1e..cf77034329ef 100644
--- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
+++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
@@ -1,9 +1,8 @@
//===-- AMDGPUAsmPrinter.h - Print AMDGPU assembly code ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -33,6 +32,7 @@ namespace llvm {
class AMDGPUMachineFunction;
class AMDGPUTargetStreamer;
+class MCCodeEmitter;
class MCOperand;
class GCNSubtarget;
@@ -57,12 +57,12 @@ private:
DenseMap<const Function *, SIFunctionResourceInfo> CallGraphResourceInfo;
std::unique_ptr<AMDGPU::HSAMD::MetadataStreamer> HSAMetadataStream;
- std::map<uint32_t, uint32_t> PALMetadataMap;
+
+ MCCodeEmitter *DumpCodeInstEmitter = nullptr;
uint64_t getFunctionCodeSize(const MachineFunction &MF) const;
SIFunctionResourceInfo analyzeResourceUsage(const MachineFunction &MF) const;
- void readPALMetadata(Module &M);
void getSIProgramInfo(SIProgramInfo &Out, const MachineFunction &MF);
void getAmdKernelCode(amd_kernel_code_t &Out, const SIProgramInfo &KernelInfo,
const MachineFunction &MF) const;
@@ -95,7 +95,7 @@ public:
StringRef getPassName() const override;
- const MCSubtargetInfo* getSTI() const;
+ const MCSubtargetInfo* getGlobalSTI() const;
AMDGPUTargetStreamer* getTargetStreamer() const;
@@ -137,8 +137,7 @@ public:
const MachineBasicBlock *MBB) const override;
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
- unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &O) override;
+ const char *ExtraCode, raw_ostream &O) override;
protected:
mutable std::vector<std::string> DisasmLines, HexLines;
diff --git a/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp b/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
index 644e4fd558ba..8a92e7d923fb 100644
--- a/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
@@ -1,9 +1,8 @@
//===-- AMDGPUAtomicOptimizer.cpp -----------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -31,6 +30,7 @@ namespace {
enum DPP_CTRL {
DPP_ROW_SR1 = 0x111,
DPP_ROW_SR2 = 0x112,
+ DPP_ROW_SR3 = 0x113,
DPP_ROW_SR4 = 0x114,
DPP_ROW_SR8 = 0x118,
DPP_WF_SR1 = 0x138,
@@ -40,7 +40,7 @@ enum DPP_CTRL {
struct ReplacementInfo {
Instruction *I;
- Instruction::BinaryOps Op;
+ AtomicRMWInst::BinOp Op;
unsigned ValIdx;
bool ValDivergent;
};
@@ -55,10 +55,8 @@ private:
bool HasDPP;
bool IsPixelShader;
- void optimizeAtomic(Instruction &I, Instruction::BinaryOps Op,
- unsigned ValIdx, bool ValDivergent) const;
-
- void setConvergent(CallInst *const CI) const;
+ void optimizeAtomic(Instruction &I, AtomicRMWInst::BinOp Op, unsigned ValIdx,
+ bool ValDivergent) const;
public:
static char ID;
@@ -122,16 +120,20 @@ void AMDGPUAtomicOptimizer::visitAtomicRMWInst(AtomicRMWInst &I) {
break;
}
- Instruction::BinaryOps Op;
+ AtomicRMWInst::BinOp Op = I.getOperation();
- switch (I.getOperation()) {
+ switch (Op) {
default:
return;
case AtomicRMWInst::Add:
- Op = Instruction::Add;
- break;
case AtomicRMWInst::Sub:
- Op = Instruction::Sub;
+ case AtomicRMWInst::And:
+ case AtomicRMWInst::Or:
+ case AtomicRMWInst::Xor:
+ case AtomicRMWInst::Max:
+ case AtomicRMWInst::Min:
+ case AtomicRMWInst::UMax:
+ case AtomicRMWInst::UMin:
break;
}
@@ -163,7 +165,7 @@ void AMDGPUAtomicOptimizer::visitAtomicRMWInst(AtomicRMWInst &I) {
}
void AMDGPUAtomicOptimizer::visitIntrinsicInst(IntrinsicInst &I) {
- Instruction::BinaryOps Op;
+ AtomicRMWInst::BinOp Op;
switch (I.getIntrinsicID()) {
default:
@@ -171,12 +173,47 @@ void AMDGPUAtomicOptimizer::visitIntrinsicInst(IntrinsicInst &I) {
case Intrinsic::amdgcn_buffer_atomic_add:
case Intrinsic::amdgcn_struct_buffer_atomic_add:
case Intrinsic::amdgcn_raw_buffer_atomic_add:
- Op = Instruction::Add;
+ Op = AtomicRMWInst::Add;
break;
case Intrinsic::amdgcn_buffer_atomic_sub:
case Intrinsic::amdgcn_struct_buffer_atomic_sub:
case Intrinsic::amdgcn_raw_buffer_atomic_sub:
- Op = Instruction::Sub;
+ Op = AtomicRMWInst::Sub;
+ break;
+ case Intrinsic::amdgcn_buffer_atomic_and:
+ case Intrinsic::amdgcn_struct_buffer_atomic_and:
+ case Intrinsic::amdgcn_raw_buffer_atomic_and:
+ Op = AtomicRMWInst::And;
+ break;
+ case Intrinsic::amdgcn_buffer_atomic_or:
+ case Intrinsic::amdgcn_struct_buffer_atomic_or:
+ case Intrinsic::amdgcn_raw_buffer_atomic_or:
+ Op = AtomicRMWInst::Or;
+ break;
+ case Intrinsic::amdgcn_buffer_atomic_xor:
+ case Intrinsic::amdgcn_struct_buffer_atomic_xor:
+ case Intrinsic::amdgcn_raw_buffer_atomic_xor:
+ Op = AtomicRMWInst::Xor;
+ break;
+ case Intrinsic::amdgcn_buffer_atomic_smin:
+ case Intrinsic::amdgcn_struct_buffer_atomic_smin:
+ case Intrinsic::amdgcn_raw_buffer_atomic_smin:
+ Op = AtomicRMWInst::Min;
+ break;
+ case Intrinsic::amdgcn_buffer_atomic_umin:
+ case Intrinsic::amdgcn_struct_buffer_atomic_umin:
+ case Intrinsic::amdgcn_raw_buffer_atomic_umin:
+ Op = AtomicRMWInst::UMin;
+ break;
+ case Intrinsic::amdgcn_buffer_atomic_smax:
+ case Intrinsic::amdgcn_struct_buffer_atomic_smax:
+ case Intrinsic::amdgcn_raw_buffer_atomic_smax:
+ Op = AtomicRMWInst::Max;
+ break;
+ case Intrinsic::amdgcn_buffer_atomic_umax:
+ case Intrinsic::amdgcn_struct_buffer_atomic_umax:
+ case Intrinsic::amdgcn_raw_buffer_atomic_umax:
+ Op = AtomicRMWInst::UMax;
break;
}
@@ -208,12 +245,68 @@ void AMDGPUAtomicOptimizer::visitIntrinsicInst(IntrinsicInst &I) {
ToReplace.push_back(Info);
}
+// Use the builder to create the non-atomic counterpart of the specified
+// atomicrmw binary op.
+static Value *buildNonAtomicBinOp(IRBuilder<> &B, AtomicRMWInst::BinOp Op,
+ Value *LHS, Value *RHS) {
+ CmpInst::Predicate Pred;
+
+ switch (Op) {
+ default:
+ llvm_unreachable("Unhandled atomic op");
+ case AtomicRMWInst::Add:
+ return B.CreateBinOp(Instruction::Add, LHS, RHS);
+ case AtomicRMWInst::Sub:
+ return B.CreateBinOp(Instruction::Sub, LHS, RHS);
+ case AtomicRMWInst::And:
+ return B.CreateBinOp(Instruction::And, LHS, RHS);
+ case AtomicRMWInst::Or:
+ return B.CreateBinOp(Instruction::Or, LHS, RHS);
+ case AtomicRMWInst::Xor:
+ return B.CreateBinOp(Instruction::Xor, LHS, RHS);
+
+ case AtomicRMWInst::Max:
+ Pred = CmpInst::ICMP_SGT;
+ break;
+ case AtomicRMWInst::Min:
+ Pred = CmpInst::ICMP_SLT;
+ break;
+ case AtomicRMWInst::UMax:
+ Pred = CmpInst::ICMP_UGT;
+ break;
+ case AtomicRMWInst::UMin:
+ Pred = CmpInst::ICMP_ULT;
+ break;
+ }
+ Value *Cond = B.CreateICmp(Pred, LHS, RHS);
+ return B.CreateSelect(Cond, LHS, RHS);
+}
+
+static APInt getIdentityValueForAtomicOp(AtomicRMWInst::BinOp Op,
+ unsigned BitWidth) {
+ switch (Op) {
+ default:
+ llvm_unreachable("Unhandled atomic op");
+ case AtomicRMWInst::Add:
+ case AtomicRMWInst::Sub:
+ case AtomicRMWInst::Or:
+ case AtomicRMWInst::Xor:
+ case AtomicRMWInst::UMax:
+ return APInt::getMinValue(BitWidth);
+ case AtomicRMWInst::And:
+ case AtomicRMWInst::UMin:
+ return APInt::getMaxValue(BitWidth);
+ case AtomicRMWInst::Max:
+ return APInt::getSignedMinValue(BitWidth);
+ case AtomicRMWInst::Min:
+ return APInt::getSignedMaxValue(BitWidth);
+ }
+}
+
void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
- Instruction::BinaryOps Op,
+ AtomicRMWInst::BinOp Op,
unsigned ValIdx,
bool ValDivergent) const {
- LLVMContext &Context = I.getContext();
-
// Start building just before the instruction.
IRBuilder<> B(&I);
@@ -251,115 +344,130 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
Value *const V = I.getOperand(ValIdx);
// We need to know how many lanes are active within the wavefront, and we do
- // this by getting the exec register, which tells us all the lanes that are
- // active.
- MDNode *const RegName =
- llvm::MDNode::get(Context, llvm::MDString::get(Context, "exec"));
- Value *const Metadata = llvm::MetadataAsValue::get(Context, RegName);
- CallInst *const Exec =
- B.CreateIntrinsic(Intrinsic::read_register, {B.getInt64Ty()}, {Metadata});
- setConvergent(Exec);
+ // this by doing a ballot of active lanes.
+ CallInst *const Ballot = B.CreateIntrinsic(
+ Intrinsic::amdgcn_icmp, {B.getInt64Ty(), B.getInt32Ty()},
+ {B.getInt32(1), B.getInt32(0), B.getInt32(CmpInst::ICMP_NE)});
// We need to know how many lanes are active within the wavefront that are
// below us. If we counted each lane linearly starting from 0, a lane is
// below us only if its associated index was less than ours. We do this by
// using the mbcnt intrinsic.
- Value *const BitCast = B.CreateBitCast(Exec, VecTy);
+ Value *const BitCast = B.CreateBitCast(Ballot, VecTy);
Value *const ExtractLo = B.CreateExtractElement(BitCast, B.getInt32(0));
Value *const ExtractHi = B.CreateExtractElement(BitCast, B.getInt32(1));
CallInst *const PartialMbcnt = B.CreateIntrinsic(
Intrinsic::amdgcn_mbcnt_lo, {}, {ExtractLo, B.getInt32(0)});
- CallInst *const Mbcnt = B.CreateIntrinsic(Intrinsic::amdgcn_mbcnt_hi, {},
- {ExtractHi, PartialMbcnt});
+ Value *const Mbcnt =
+ B.CreateIntCast(B.CreateIntrinsic(Intrinsic::amdgcn_mbcnt_hi, {},
+ {ExtractHi, PartialMbcnt}),
+ Ty, false);
- Value *const MbcntCast = B.CreateIntCast(Mbcnt, Ty, false);
+ Value *const Identity = B.getInt(getIdentityValueForAtomicOp(Op, TyBitWidth));
- Value *LaneOffset = nullptr;
+ Value *ExclScan = nullptr;
Value *NewV = nullptr;
// If we have a divergent value in each lane, we need to combine the value
// using DPP.
if (ValDivergent) {
- // First we need to set all inactive invocations to 0, so that they can
- // correctly contribute to the final result.
- CallInst *const SetInactive = B.CreateIntrinsic(
- Intrinsic::amdgcn_set_inactive, Ty, {V, B.getIntN(TyBitWidth, 0)});
- setConvergent(SetInactive);
- NewV = SetInactive;
-
- const unsigned Iters = 6;
- const unsigned DPPCtrl[Iters] = {DPP_ROW_SR1, DPP_ROW_SR2,
- DPP_ROW_SR4, DPP_ROW_SR8,
- DPP_ROW_BCAST15, DPP_ROW_BCAST31};
- const unsigned RowMask[Iters] = {0xf, 0xf, 0xf, 0xf, 0xa, 0xc};
-
- // This loop performs an inclusive scan across the wavefront, with all lanes
+ // First we need to set all inactive invocations to the identity value, so
+ // that they can correctly contribute to the final result.
+ CallInst *const SetInactive =
+ B.CreateIntrinsic(Intrinsic::amdgcn_set_inactive, Ty, {V, Identity});
+
+ CallInst *const FirstDPP =
+ B.CreateIntrinsic(Intrinsic::amdgcn_update_dpp, Ty,
+ {Identity, SetInactive, B.getInt32(DPP_WF_SR1),
+ B.getInt32(0xf), B.getInt32(0xf), B.getFalse()});
+ ExclScan = FirstDPP;
+
+ const unsigned Iters = 7;
+ const unsigned DPPCtrl[Iters] = {
+ DPP_ROW_SR1, DPP_ROW_SR2, DPP_ROW_SR3, DPP_ROW_SR4,
+ DPP_ROW_SR8, DPP_ROW_BCAST15, DPP_ROW_BCAST31};
+ const unsigned RowMask[Iters] = {0xf, 0xf, 0xf, 0xf, 0xf, 0xa, 0xc};
+ const unsigned BankMask[Iters] = {0xf, 0xf, 0xf, 0xe, 0xc, 0xf, 0xf};
+
+ // This loop performs an exclusive scan across the wavefront, with all lanes
// active (by using the WWM intrinsic).
for (unsigned Idx = 0; Idx < Iters; Idx++) {
- CallInst *const DPP = B.CreateIntrinsic(Intrinsic::amdgcn_mov_dpp, Ty,
- {NewV, B.getInt32(DPPCtrl[Idx]),
- B.getInt32(RowMask[Idx]),
- B.getInt32(0xf), B.getFalse()});
- setConvergent(DPP);
- Value *const WWM = B.CreateIntrinsic(Intrinsic::amdgcn_wwm, Ty, DPP);
-
- NewV = B.CreateBinOp(Op, NewV, WWM);
- NewV = B.CreateIntrinsic(Intrinsic::amdgcn_wwm, Ty, NewV);
+ Value *const UpdateValue = Idx < 3 ? FirstDPP : ExclScan;
+ CallInst *const DPP = B.CreateIntrinsic(
+ Intrinsic::amdgcn_update_dpp, Ty,
+ {Identity, UpdateValue, B.getInt32(DPPCtrl[Idx]),
+ B.getInt32(RowMask[Idx]), B.getInt32(BankMask[Idx]), B.getFalse()});
+
+ ExclScan = buildNonAtomicBinOp(B, Op, ExclScan, DPP);
}
- // NewV has returned the inclusive scan of V, but for the lane offset we
- // require an exclusive scan. We do this by shifting the values from the
- // entire wavefront right by 1, and by setting the bound_ctrl (last argument
- // to the intrinsic below) to true, we can guarantee that 0 will be shifted
- // into the 0'th invocation.
- CallInst *const DPP =
- B.CreateIntrinsic(Intrinsic::amdgcn_mov_dpp, {Ty},
- {NewV, B.getInt32(DPP_WF_SR1), B.getInt32(0xf),
- B.getInt32(0xf), B.getTrue()});
- setConvergent(DPP);
- LaneOffset = B.CreateIntrinsic(Intrinsic::amdgcn_wwm, Ty, DPP);
+ NewV = buildNonAtomicBinOp(B, Op, SetInactive, ExclScan);
// Read the value from the last lane, which has accumlated the values of
- // each active lane in the wavefront. This will be our new value with which
- // we will provide to the atomic operation.
+ // each active lane in the wavefront. This will be our new value which we
+ // will provide to the atomic operation.
if (TyBitWidth == 64) {
Value *const ExtractLo = B.CreateTrunc(NewV, B.getInt32Ty());
Value *const ExtractHi =
B.CreateTrunc(B.CreateLShr(NewV, B.getInt64(32)), B.getInt32Ty());
CallInst *const ReadLaneLo = B.CreateIntrinsic(
Intrinsic::amdgcn_readlane, {}, {ExtractLo, B.getInt32(63)});
- setConvergent(ReadLaneLo);
CallInst *const ReadLaneHi = B.CreateIntrinsic(
Intrinsic::amdgcn_readlane, {}, {ExtractHi, B.getInt32(63)});
- setConvergent(ReadLaneHi);
Value *const PartialInsert = B.CreateInsertElement(
UndefValue::get(VecTy), ReadLaneLo, B.getInt32(0));
Value *const Insert =
B.CreateInsertElement(PartialInsert, ReadLaneHi, B.getInt32(1));
NewV = B.CreateBitCast(Insert, Ty);
} else if (TyBitWidth == 32) {
- CallInst *const ReadLane = B.CreateIntrinsic(Intrinsic::amdgcn_readlane,
- {}, {NewV, B.getInt32(63)});
- setConvergent(ReadLane);
- NewV = ReadLane;
+ NewV = B.CreateIntrinsic(Intrinsic::amdgcn_readlane, {},
+ {NewV, B.getInt32(63)});
} else {
llvm_unreachable("Unhandled atomic bit width");
}
+
+ // Finally mark the readlanes in the WWM section.
+ NewV = B.CreateIntrinsic(Intrinsic::amdgcn_wwm, Ty, NewV);
} else {
- // Get the total number of active lanes we have by using popcount.
- Instruction *const Ctpop = B.CreateUnaryIntrinsic(Intrinsic::ctpop, Exec);
- Value *const CtpopCast = B.CreateIntCast(Ctpop, Ty, false);
-
- // Calculate the new value we will be contributing to the atomic operation
- // for the entire wavefront.
- NewV = B.CreateMul(V, CtpopCast);
- LaneOffset = B.CreateMul(V, MbcntCast);
+ switch (Op) {
+ default:
+ llvm_unreachable("Unhandled atomic op");
+
+ case AtomicRMWInst::Add:
+ case AtomicRMWInst::Sub: {
+ // The new value we will be contributing to the atomic operation is the
+ // old value times the number of active lanes.
+ Value *const Ctpop = B.CreateIntCast(
+ B.CreateUnaryIntrinsic(Intrinsic::ctpop, Ballot), Ty, false);
+ NewV = B.CreateMul(V, Ctpop);
+ break;
+ }
+
+ case AtomicRMWInst::And:
+ case AtomicRMWInst::Or:
+ case AtomicRMWInst::Max:
+ case AtomicRMWInst::Min:
+ case AtomicRMWInst::UMax:
+ case AtomicRMWInst::UMin:
+ // These operations with a uniform value are idempotent: doing the atomic
+ // operation multiple times has the same effect as doing it once.
+ NewV = V;
+ break;
+
+ case AtomicRMWInst::Xor:
+ // The new value we will be contributing to the atomic operation is the
+ // old value times the parity of the number of active lanes.
+ Value *const Ctpop = B.CreateIntCast(
+ B.CreateUnaryIntrinsic(Intrinsic::ctpop, Ballot), Ty, false);
+ NewV = B.CreateMul(V, B.CreateAnd(Ctpop, 1));
+ break;
+ }
}
// We only want a single lane to enter our new control flow, and we do this
// by checking if there are any active lanes below us. Only one lane will
// have 0 active lanes below us, so that will be the only one to progress.
- Value *const Cond = B.CreateICmpEQ(MbcntCast, B.getIntN(TyBitWidth, 0));
+ Value *const Cond = B.CreateICmpEQ(Mbcnt, B.getIntN(TyBitWidth, 0));
// Store I's original basic block before we split the block.
BasicBlock *const EntryBB = I.getParent();
@@ -401,20 +509,16 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
B.CreateTrunc(B.CreateLShr(PHI, B.getInt64(32)), B.getInt32Ty());
CallInst *const ReadFirstLaneLo =
B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, ExtractLo);
- setConvergent(ReadFirstLaneLo);
CallInst *const ReadFirstLaneHi =
B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, ExtractHi);
- setConvergent(ReadFirstLaneHi);
Value *const PartialInsert = B.CreateInsertElement(
UndefValue::get(VecTy), ReadFirstLaneLo, B.getInt32(0));
Value *const Insert =
B.CreateInsertElement(PartialInsert, ReadFirstLaneHi, B.getInt32(1));
BroadcastI = B.CreateBitCast(Insert, Ty);
} else if (TyBitWidth == 32) {
- CallInst *const ReadFirstLane =
- B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, PHI);
- setConvergent(ReadFirstLane);
- BroadcastI = ReadFirstLane;
+
+ BroadcastI = B.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, PHI);
} else {
llvm_unreachable("Unhandled atomic bit width");
}
@@ -423,7 +527,31 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
// get our individual lane's slice into the result. We use the lane offset we
// previously calculated combined with the atomic result value we got from the
// first lane, to get our lane's index into the atomic result.
- Value *const Result = B.CreateBinOp(Op, BroadcastI, LaneOffset);
+ Value *LaneOffset = nullptr;
+ if (ValDivergent) {
+ LaneOffset = B.CreateIntrinsic(Intrinsic::amdgcn_wwm, Ty, ExclScan);
+ } else {
+ switch (Op) {
+ default:
+ llvm_unreachable("Unhandled atomic op");
+ case AtomicRMWInst::Add:
+ case AtomicRMWInst::Sub:
+ LaneOffset = B.CreateMul(V, Mbcnt);
+ break;
+ case AtomicRMWInst::And:
+ case AtomicRMWInst::Or:
+ case AtomicRMWInst::Max:
+ case AtomicRMWInst::Min:
+ case AtomicRMWInst::UMax:
+ case AtomicRMWInst::UMin:
+ LaneOffset = B.CreateSelect(Cond, Identity, V);
+ break;
+ case AtomicRMWInst::Xor:
+ LaneOffset = B.CreateMul(V, B.CreateAnd(Mbcnt, 1));
+ break;
+ }
+ }
+ Value *const Result = buildNonAtomicBinOp(B, Op, BroadcastI, LaneOffset);
if (IsPixelShader) {
// Need a final PHI to reconverge to above the helper lane branch mask.
@@ -442,10 +570,6 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
I.eraseFromParent();
}
-void AMDGPUAtomicOptimizer::setConvergent(CallInst *const CI) const {
- CI->addAttribute(AttributeList::FunctionIndex, Attribute::Convergent);
-}
-
INITIALIZE_PASS_BEGIN(AMDGPUAtomicOptimizer, DEBUG_TYPE,
"AMDGPU atomic optimizations", false, false)
INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
diff --git a/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
index daef37f9c21f..b107c357196d 100644
--- a/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
@@ -1,9 +1,8 @@
//===-- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp - Call lowering -----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -21,28 +20,98 @@
#include "SIMachineFunctionInfo.h"
#include "SIRegisterInfo.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Support/LowLevelTypeImpl.h"
using namespace llvm;
+namespace {
+
+struct OutgoingArgHandler : public CallLowering::ValueHandler {
+ OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
+ MachineInstrBuilder MIB, CCAssignFn *AssignFn)
+ : ValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {}
+
+ MachineInstrBuilder MIB;
+
+ Register getStackAddress(uint64_t Size, int64_t Offset,
+ MachinePointerInfo &MPO) override {
+ llvm_unreachable("not implemented");
+ }
+
+ void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size,
+ MachinePointerInfo &MPO, CCValAssign &VA) override {
+ llvm_unreachable("not implemented");
+ }
+
+ void assignValueToReg(Register ValVReg, Register PhysReg,
+ CCValAssign &VA) override {
+ MIB.addUse(PhysReg);
+ MIRBuilder.buildCopy(PhysReg, ValVReg);
+ }
+
+ bool assignArg(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo,
+ const CallLowering::ArgInfo &Info,
+ CCState &State) override {
+ return AssignFn(ValNo, ValVT, LocVT, LocInfo, Info.Flags, State);
+ }
+};
+
+}
+
AMDGPUCallLowering::AMDGPUCallLowering(const AMDGPUTargetLowering &TLI)
: CallLowering(&TLI) {
}
bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
const Value *Val,
- ArrayRef<unsigned> VRegs) const {
- // FIXME: Add support for non-void returns.
- if (Val)
+ ArrayRef<Register> VRegs) const {
+
+ MachineFunction &MF = MIRBuilder.getMF();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ MFI->setIfReturnsVoid(!Val);
+
+ if (!Val) {
+ MIRBuilder.buildInstr(AMDGPU::S_ENDPGM).addImm(0);
+ return true;
+ }
+
+ Register VReg = VRegs[0];
+
+ const Function &F = MF.getFunction();
+ auto &DL = F.getParent()->getDataLayout();
+ if (!AMDGPU::isShader(F.getCallingConv()))
return false;
- MIRBuilder.buildInstr(AMDGPU::S_ENDPGM);
+
+ const AMDGPUTargetLowering &TLI = *getTLI<AMDGPUTargetLowering>();
+ SmallVector<EVT, 4> SplitVTs;
+ SmallVector<uint64_t, 4> Offsets;
+ ArgInfo OrigArg{VReg, Val->getType()};
+ setArgFlags(OrigArg, AttributeList::ReturnIndex, DL, F);
+ ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs, &Offsets, 0);
+
+ SmallVector<ArgInfo, 8> SplitArgs;
+ CCAssignFn *AssignFn = CCAssignFnForReturn(F.getCallingConv(), false);
+ for (unsigned i = 0, e = Offsets.size(); i != e; ++i) {
+ Type *SplitTy = SplitVTs[i].getTypeForEVT(F.getContext());
+ SplitArgs.push_back({VRegs[i], SplitTy, OrigArg.Flags, OrigArg.IsFixed});
+ }
+ auto RetInstr = MIRBuilder.buildInstrNoInsert(AMDGPU::SI_RETURN_TO_EPILOG);
+ OutgoingArgHandler Handler(MIRBuilder, MRI, RetInstr, AssignFn);
+ if (!handleAssignments(MIRBuilder, SplitArgs, Handler))
+ return false;
+ MIRBuilder.insertInstr(RetInstr);
+
return true;
}
-unsigned AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder,
+Register AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder,
Type *ParamTy,
uint64_t Offset) const {
@@ -53,12 +122,12 @@ unsigned AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder,
const DataLayout &DL = F.getParent()->getDataLayout();
PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS);
LLT PtrType = getLLTForType(*PtrTy, DL);
- unsigned DstReg = MRI.createGenericVirtualRegister(PtrType);
- unsigned KernArgSegmentPtr =
+ Register DstReg = MRI.createGenericVirtualRegister(PtrType);
+ Register KernArgSegmentPtr =
MFI->getPreloadedReg(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
- unsigned KernArgSegmentVReg = MRI.getLiveInVirtReg(KernArgSegmentPtr);
+ Register KernArgSegmentVReg = MRI.getLiveInVirtReg(KernArgSegmentPtr);
- unsigned OffsetReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
+ Register OffsetReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
MIRBuilder.buildConstant(OffsetReg, Offset);
MIRBuilder.buildGEP(DstReg, KernArgSegmentVReg, OffsetReg);
@@ -69,14 +138,14 @@ unsigned AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder,
void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &MIRBuilder,
Type *ParamTy, uint64_t Offset,
unsigned Align,
- unsigned DstReg) const {
+ Register DstReg) const {
MachineFunction &MF = MIRBuilder.getMF();
const Function &F = MF.getFunction();
const DataLayout &DL = F.getParent()->getDataLayout();
PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS);
MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
unsigned TypeSize = DL.getTypeStoreSize(ParamTy);
- unsigned PtrReg = lowerParameterPtr(MIRBuilder, ParamTy, Offset);
+ Register PtrReg = lowerParameterPtr(MIRBuilder, ParamTy, Offset);
MachineMemOperand *MMO =
MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad |
@@ -87,93 +156,233 @@ void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &MIRBuilder,
MIRBuilder.buildLoad(DstReg, PtrReg, *MMO);
}
-bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
- const Function &F,
- ArrayRef<unsigned> VRegs) const {
- // AMDGPU_GS and AMDGP_HS are not supported yet.
- if (F.getCallingConv() == CallingConv::AMDGPU_GS ||
- F.getCallingConv() == CallingConv::AMDGPU_HS)
- return false;
+static Register findFirstFreeSGPR(CCState &CCInfo) {
+ unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
+ for (unsigned Reg = 0; Reg < NumSGPRs; ++Reg) {
+ if (!CCInfo.isAllocated(AMDGPU::SGPR0 + Reg)) {
+ return AMDGPU::SGPR0 + Reg;
+ }
+ }
+ llvm_unreachable("Cannot allocate sgpr");
+}
- MachineFunction &MF = MIRBuilder.getMF();
- const GCNSubtarget *Subtarget = &MF.getSubtarget<GCNSubtarget>();
+static void allocateSpecialEntryInputVGPRs(CCState &CCInfo,
+ MachineFunction &MF,
+ const SIRegisterInfo &TRI,
+ SIMachineFunctionInfo &Info) {
+ const LLT S32 = LLT::scalar(32);
MachineRegisterInfo &MRI = MF.getRegInfo();
- SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
- const SIRegisterInfo *TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
- const DataLayout &DL = F.getParent()->getDataLayout();
- SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
+ if (Info.hasWorkItemIDX()) {
+ Register Reg = AMDGPU::VGPR0;
+ MRI.setType(MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass), S32);
+
+ CCInfo.AllocateReg(Reg);
+ Info.setWorkItemIDX(ArgDescriptor::createRegister(Reg));
+ }
+
+ if (Info.hasWorkItemIDY()) {
+ Register Reg = AMDGPU::VGPR1;
+ MRI.setType(MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass), S32);
+
+ CCInfo.AllocateReg(Reg);
+ Info.setWorkItemIDY(ArgDescriptor::createRegister(Reg));
+ }
+
+ if (Info.hasWorkItemIDZ()) {
+ Register Reg = AMDGPU::VGPR2;
+ MRI.setType(MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass), S32);
+
+ CCInfo.AllocateReg(Reg);
+ Info.setWorkItemIDZ(ArgDescriptor::createRegister(Reg));
+ }
+}
+// Allocate special inputs passed in user SGPRs.
+static void allocateHSAUserSGPRs(CCState &CCInfo,
+ MachineIRBuilder &MIRBuilder,
+ MachineFunction &MF,
+ const SIRegisterInfo &TRI,
+ SIMachineFunctionInfo &Info) {
// FIXME: How should these inputs interact with inreg / custom SGPR inputs?
- if (Info->hasPrivateSegmentBuffer()) {
- unsigned PrivateSegmentBufferReg = Info->addPrivateSegmentBuffer(*TRI);
- MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SReg_128RegClass);
+ if (Info.hasPrivateSegmentBuffer()) {
+ unsigned PrivateSegmentBufferReg = Info.addPrivateSegmentBuffer(TRI);
+ MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SGPR_128RegClass);
CCInfo.AllocateReg(PrivateSegmentBufferReg);
}
- if (Info->hasDispatchPtr()) {
- unsigned DispatchPtrReg = Info->addDispatchPtr(*TRI);
- // FIXME: Need to add reg as live-in
+ if (Info.hasDispatchPtr()) {
+ unsigned DispatchPtrReg = Info.addDispatchPtr(TRI);
+ MF.addLiveIn(DispatchPtrReg, &AMDGPU::SGPR_64RegClass);
CCInfo.AllocateReg(DispatchPtrReg);
}
- if (Info->hasQueuePtr()) {
- unsigned QueuePtrReg = Info->addQueuePtr(*TRI);
- // FIXME: Need to add reg as live-in
+ if (Info.hasQueuePtr()) {
+ unsigned QueuePtrReg = Info.addQueuePtr(TRI);
+ MF.addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass);
CCInfo.AllocateReg(QueuePtrReg);
}
- if (Info->hasKernargSegmentPtr()) {
- unsigned InputPtrReg = Info->addKernargSegmentPtr(*TRI);
- const LLT P2 = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
- unsigned VReg = MRI.createGenericVirtualRegister(P2);
+ if (Info.hasKernargSegmentPtr()) {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ Register InputPtrReg = Info.addKernargSegmentPtr(TRI);
+ const LLT P4 = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
+ Register VReg = MRI.createGenericVirtualRegister(P4);
MRI.addLiveIn(InputPtrReg, VReg);
MIRBuilder.getMBB().addLiveIn(InputPtrReg);
MIRBuilder.buildCopy(VReg, InputPtrReg);
CCInfo.AllocateReg(InputPtrReg);
}
- if (Info->hasDispatchID()) {
- unsigned DispatchIDReg = Info->addDispatchID(*TRI);
- // FIXME: Need to add reg as live-in
+ if (Info.hasDispatchID()) {
+ unsigned DispatchIDReg = Info.addDispatchID(TRI);
+ MF.addLiveIn(DispatchIDReg, &AMDGPU::SGPR_64RegClass);
CCInfo.AllocateReg(DispatchIDReg);
}
- if (Info->hasFlatScratchInit()) {
- unsigned FlatScratchInitReg = Info->addFlatScratchInit(*TRI);
- // FIXME: Need to add reg as live-in
+ if (Info.hasFlatScratchInit()) {
+ unsigned FlatScratchInitReg = Info.addFlatScratchInit(TRI);
+ MF.addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass);
CCInfo.AllocateReg(FlatScratchInitReg);
}
+ // TODO: Add GridWorkGroupCount user SGPRs when used. For now with HSA we read
+ // these from the dispatch pointer.
+}
+
+static void allocateSystemSGPRs(CCState &CCInfo,
+ MachineFunction &MF,
+ SIMachineFunctionInfo &Info,
+ CallingConv::ID CallConv,
+ bool IsShader) {
+ const LLT S32 = LLT::scalar(32);
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ if (Info.hasWorkGroupIDX()) {
+ Register Reg = Info.addWorkGroupIDX();
+ MRI.setType(MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass), S32);
+ CCInfo.AllocateReg(Reg);
+ }
+
+ if (Info.hasWorkGroupIDY()) {
+ Register Reg = Info.addWorkGroupIDY();
+ MRI.setType(MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass), S32);
+ CCInfo.AllocateReg(Reg);
+ }
+
+ if (Info.hasWorkGroupIDZ()) {
+ unsigned Reg = Info.addWorkGroupIDZ();
+ MRI.setType(MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass), S32);
+ CCInfo.AllocateReg(Reg);
+ }
+
+ if (Info.hasWorkGroupInfo()) {
+ unsigned Reg = Info.addWorkGroupInfo();
+ MRI.setType(MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass), S32);
+ CCInfo.AllocateReg(Reg);
+ }
+
+ if (Info.hasPrivateSegmentWaveByteOffset()) {
+ // Scratch wave offset passed in system SGPR.
+ unsigned PrivateSegmentWaveByteOffsetReg;
+
+ if (IsShader) {
+ PrivateSegmentWaveByteOffsetReg =
+ Info.getPrivateSegmentWaveByteOffsetSystemSGPR();
+
+ // This is true if the scratch wave byte offset doesn't have a fixed
+ // location.
+ if (PrivateSegmentWaveByteOffsetReg == AMDGPU::NoRegister) {
+ PrivateSegmentWaveByteOffsetReg = findFirstFreeSGPR(CCInfo);
+ Info.setPrivateSegmentWaveByteOffset(PrivateSegmentWaveByteOffsetReg);
+ }
+ } else
+ PrivateSegmentWaveByteOffsetReg = Info.addPrivateSegmentWaveByteOffset();
+
+ MF.addLiveIn(PrivateSegmentWaveByteOffsetReg, &AMDGPU::SGPR_32RegClass);
+ CCInfo.AllocateReg(PrivateSegmentWaveByteOffsetReg);
+ }
+}
+
+bool AMDGPUCallLowering::lowerFormalArgumentsKernel(
+ MachineIRBuilder &MIRBuilder, const Function &F,
+ ArrayRef<ArrayRef<Register>> VRegs) const {
+ MachineFunction &MF = MIRBuilder.getMF();
+ const GCNSubtarget *Subtarget = &MF.getSubtarget<GCNSubtarget>();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+ const SIRegisterInfo *TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
+ const DataLayout &DL = F.getParent()->getDataLayout();
+
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
+
+ allocateHSAUserSGPRs(CCInfo, MIRBuilder, MF, *TRI, *Info);
+
+ unsigned i = 0;
+ const unsigned KernArgBaseAlign = 16;
+ const unsigned BaseOffset = Subtarget->getExplicitKernelArgOffset(F);
+ uint64_t ExplicitArgOffset = 0;
+
+ // TODO: Align down to dword alignment and extract bits for extending loads.
+ for (auto &Arg : F.args()) {
+ Type *ArgTy = Arg.getType();
+ unsigned AllocSize = DL.getTypeAllocSize(ArgTy);
+ if (AllocSize == 0)
+ continue;
+
+ unsigned ABIAlign = DL.getABITypeAlignment(ArgTy);
+
+ uint64_t ArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + BaseOffset;
+ ExplicitArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + AllocSize;
+
+ ArrayRef<Register> OrigArgRegs = VRegs[i];
+ Register ArgReg =
+ OrigArgRegs.size() == 1
+ ? OrigArgRegs[0]
+ : MRI.createGenericVirtualRegister(getLLTForType(*ArgTy, DL));
+ unsigned Align = MinAlign(KernArgBaseAlign, ArgOffset);
+ ArgOffset = alignTo(ArgOffset, DL.getABITypeAlignment(ArgTy));
+ lowerParameter(MIRBuilder, ArgTy, ArgOffset, Align, ArgReg);
+ if (OrigArgRegs.size() > 1)
+ unpackRegs(OrigArgRegs, ArgReg, ArgTy, MIRBuilder);
+ ++i;
+ }
+
+ allocateSpecialEntryInputVGPRs(CCInfo, MF, *TRI, *Info);
+ allocateSystemSGPRs(CCInfo, MF, *Info, F.getCallingConv(), false);
+ return true;
+}
+
+bool AMDGPUCallLowering::lowerFormalArguments(
+ MachineIRBuilder &MIRBuilder, const Function &F,
+ ArrayRef<ArrayRef<Register>> VRegs) const {
// The infrastructure for normal calling convention lowering is essentially
// useless for kernels. We want to avoid any kind of legalization or argument
// splitting.
- if (F.getCallingConv() == CallingConv::AMDGPU_KERNEL) {
- unsigned i = 0;
- const unsigned KernArgBaseAlign = 16;
- const unsigned BaseOffset = Subtarget->getExplicitKernelArgOffset(F);
- uint64_t ExplicitArgOffset = 0;
-
- // TODO: Align down to dword alignment and extract bits for extending loads.
- for (auto &Arg : F.args()) {
- Type *ArgTy = Arg.getType();
- unsigned AllocSize = DL.getTypeAllocSize(ArgTy);
- if (AllocSize == 0)
- continue;
+ if (F.getCallingConv() == CallingConv::AMDGPU_KERNEL)
+ return lowerFormalArgumentsKernel(MIRBuilder, F, VRegs);
- unsigned ABIAlign = DL.getABITypeAlignment(ArgTy);
+ // AMDGPU_GS and AMDGP_HS are not supported yet.
+ if (F.getCallingConv() == CallingConv::AMDGPU_GS ||
+ F.getCallingConv() == CallingConv::AMDGPU_HS)
+ return false;
+
+ MachineFunction &MF = MIRBuilder.getMF();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+ const SIRegisterInfo *TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
+ const DataLayout &DL = F.getParent()->getDataLayout();
- uint64_t ArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + BaseOffset;
- ExplicitArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + AllocSize;
+ bool IsShader = AMDGPU::isShader(F.getCallingConv());
- unsigned Align = MinAlign(KernArgBaseAlign, ArgOffset);
- ArgOffset = alignTo(ArgOffset, DL.getABITypeAlignment(ArgTy));
- lowerParameter(MIRBuilder, ArgTy, ArgOffset, Align, VRegs[i]);
- ++i;
- }
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
- return true;
+ if (Info->hasImplicitBufferPtr()) {
+ unsigned ImplicitBufferPtrReg = Info->addImplicitBufferPtr(*TRI);
+ MF.addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass);
+ CCInfo.AllocateReg(ImplicitBufferPtrReg);
}
unsigned NumArgs = F.arg_size();
@@ -186,7 +395,8 @@ bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
// We can only hanlde simple value types at the moment.
ISD::ArgFlagsTy Flags;
- ArgInfo OrigArg{VRegs[i], CurOrigArg->getType()};
+ assert(VRegs[i].size() == 1 && "Can't lower into more than one register");
+ ArgInfo OrigArg{VRegs[i][0], CurOrigArg->getType()};
setArgFlags(OrigArg, i + 1, DL, F);
Flags.setOrigAlign(DL.getABITypeAlignment(CurOrigArg->getType()));
@@ -239,11 +449,15 @@ bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
OrigArgIdx != NumArgs && i != ArgLocs.size(); ++Arg, ++OrigArgIdx) {
if (Skipped.test(OrigArgIdx))
continue;
- CCValAssign &VA = ArgLocs[i++];
- MRI.addLiveIn(VA.getLocReg(), VRegs[OrigArgIdx]);
- MIRBuilder.getMBB().addLiveIn(VA.getLocReg());
- MIRBuilder.buildCopy(VRegs[OrigArgIdx], VA.getLocReg());
+ assert(VRegs[OrigArgIdx].size() == 1 &&
+ "Can't lower into more than 1 reg");
+ CCValAssign &VA = ArgLocs[i++];
+ MRI.addLiveIn(VA.getLocReg(), VRegs[OrigArgIdx][0]);
+ MIRBuilder.getMBB().addLiveIn(VA.getLocReg());
+ MIRBuilder.buildCopy(VRegs[OrigArgIdx][0], VA.getLocReg());
}
+
+ allocateSystemSGPRs(CCInfo, MF, *Info, F.getCallingConv(), IsShader);
return true;
}
diff --git a/lib/Target/AMDGPU/AMDGPUCallLowering.h b/lib/Target/AMDGPU/AMDGPUCallLowering.h
index ed859716218e..3599659cac6a 100644
--- a/lib/Target/AMDGPU/AMDGPUCallLowering.h
+++ b/lib/Target/AMDGPU/AMDGPUCallLowering.h
@@ -1,9 +1,8 @@
//===- lib/Target/AMDGPU/AMDGPUCallLowering.h - Call lowering -*- C++ -*---===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -23,20 +22,25 @@ namespace llvm {
class AMDGPUTargetLowering;
class AMDGPUCallLowering: public CallLowering {
- unsigned lowerParameterPtr(MachineIRBuilder &MIRBuilder, Type *ParamTy,
+ Register lowerParameterPtr(MachineIRBuilder &MIRBuilder, Type *ParamTy,
uint64_t Offset) const;
void lowerParameter(MachineIRBuilder &MIRBuilder, Type *ParamTy,
uint64_t Offset, unsigned Align,
- unsigned DstReg) const;
+ Register DstReg) const;
public:
AMDGPUCallLowering(const AMDGPUTargetLowering &TLI);
bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val,
- ArrayRef<unsigned> VRegs) const override;
+ ArrayRef<Register> VRegs) const override;
+
+ bool lowerFormalArgumentsKernel(MachineIRBuilder &MIRBuilder,
+ const Function &F,
+ ArrayRef<ArrayRef<Register>> VRegs) const;
+
bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
- ArrayRef<unsigned> VRegs) const override;
+ ArrayRef<ArrayRef<Register>> VRegs) const override;
static CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg);
static CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg);
};
diff --git a/lib/Target/AMDGPU/AMDGPUCallingConv.td b/lib/Target/AMDGPU/AMDGPUCallingConv.td
index 367f120b5fa6..3688cd77542e 100644
--- a/lib/Target/AMDGPU/AMDGPUCallingConv.td
+++ b/lib/Target/AMDGPU/AMDGPUCallingConv.td
@@ -1,9 +1,8 @@
//===---- AMDCallingConv.td - Calling Conventions for Radeon GPUs ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -24,7 +23,16 @@ def CC_SI : CallingConv<[
SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23,
SGPR24, SGPR25, SGPR26, SGPR27, SGPR28, SGPR29, SGPR30, SGPR31,
- SGPR32, SGPR33, SGPR34, SGPR35, SGPR36, SGPR37, SGPR38, SGPR39
+ SGPR32, SGPR33, SGPR34, SGPR35, SGPR36, SGPR37, SGPR38, SGPR39,
+ SGPR40, SGPR41, SGPR42, SGPR43, SGPR44, SGPR45, SGPR46, SGPR47,
+ SGPR48, SGPR49, SGPR50, SGPR51, SGPR52, SGPR53, SGPR54, SGPR55,
+ SGPR56, SGPR57, SGPR58, SGPR59, SGPR60, SGPR61, SGPR62, SGPR63,
+ SGPR64, SGPR65, SGPR66, SGPR67, SGPR68, SGPR69, SGPR70, SGPR71,
+ SGPR72, SGPR73, SGPR74, SGPR75, SGPR76, SGPR77, SGPR78, SGPR79,
+ SGPR80, SGPR81, SGPR82, SGPR83, SGPR84, SGPR85, SGPR86, SGPR87,
+ SGPR88, SGPR89, SGPR90, SGPR91, SGPR92, SGPR93, SGPR94, SGPR95,
+ SGPR96, SGPR97, SGPR98, SGPR99, SGPR100, SGPR101, SGPR102, SGPR103,
+ SGPR104, SGPR105
]>>>,
// We have no way of referring to the generated register tuples
@@ -60,7 +68,16 @@ def RetCC_SI_Shader : CallingConv<[
SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23,
SGPR24, SGPR25, SGPR26, SGPR27, SGPR28, SGPR29, SGPR30, SGPR31,
- SGPR32, SGPR33, SGPR34, SGPR35, SGPR36, SGPR37, SGPR38, SGPR39
+ SGPR32, SGPR33, SGPR34, SGPR35, SGPR36, SGPR37, SGPR38, SGPR39,
+ SGPR40, SGPR41, SGPR42, SGPR43, SGPR44, SGPR45, SGPR46, SGPR47,
+ SGPR48, SGPR49, SGPR50, SGPR51, SGPR52, SGPR53, SGPR54, SGPR55,
+ SGPR56, SGPR57, SGPR58, SGPR59, SGPR60, SGPR61, SGPR62, SGPR63,
+ SGPR64, SGPR65, SGPR66, SGPR67, SGPR68, SGPR69, SGPR70, SGPR71,
+ SGPR72, SGPR73, SGPR74, SGPR75, SGPR76, SGPR77, SGPR78, SGPR79,
+ SGPR80, SGPR81, SGPR82, SGPR83, SGPR84, SGPR85, SGPR86, SGPR87,
+ SGPR88, SGPR89, SGPR90, SGPR91, SGPR92, SGPR93, SGPR94, SGPR95,
+ SGPR96, SGPR97, SGPR98, SGPR99, SGPR100, SGPR101, SGPR102, SGPR103,
+ SGPR104, SGPR105
]>>,
// 32*4 + 4 is the minimum for a fetch shader with 32 outputs.
@@ -93,12 +110,22 @@ def CSR_AMDGPU_VGPRs_32_255 : CalleeSavedRegs<
(sequence "VGPR%u", 32, 255)
>;
-def CSR_AMDGPU_SGPRs_32_103 : CalleeSavedRegs<
- (sequence "SGPR%u", 32, 103)
+def CSR_AMDGPU_SGPRs_32_105 : CalleeSavedRegs<
+ (sequence "SGPR%u", 32, 105)
+>;
+
+// Just to get the regmask, not for calling convention purposes.
+def CSR_AMDGPU_AllVGPRs : CalleeSavedRegs<
+ (sequence "VGPR%u", 0, 255)
+>;
+
+// Just to get the regmask, not for calling convention purposes.
+def CSR_AMDGPU_AllAllocatableSRegs : CalleeSavedRegs<
+ (add (sequence "SGPR%u", 0, 105), VCC_LO, VCC_HI)
>;
def CSR_AMDGPU_HighRegs : CalleeSavedRegs<
- (add CSR_AMDGPU_VGPRs_32_255, CSR_AMDGPU_SGPRs_32_103)
+ (add CSR_AMDGPU_VGPRs_32_255, CSR_AMDGPU_SGPRs_32_105)
>;
// Calling convention for leaf functions
@@ -111,10 +138,12 @@ def CC_AMDGPU_Func : CallingConv<[
VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31]>>,
- CCIfType<[i64, f64, v2i32, v2f32, v4i32, v4f32, v8i32, v8f32, v16i32, v16f32, v2i64, v2f64, v4i16, v4f16], CCCustom<"allocateVGPRTuple">>,
+ CCIfType<[i64, f64, v2i32, v2f32, v3i32, v3f32, v4i32, v4f32, v5i32, v5f32, v8i32, v8f32, v16i32, v16f32, v2i64, v2f64, v4i16, v4f16], CCCustom<"allocateVGPRTuple">>,
CCIfType<[i32, f32, v2i16, v2f16, i16, f16, i1], CCAssignToStack<4, 4>>,
CCIfType<[i64, f64, v2i32, v2f32], CCAssignToStack<8, 4>>,
+ CCIfType<[v3i32, v3f32], CCAssignToStack<12, 4>>,
CCIfType<[v4i32, v4f32, v2i64, v2f64], CCAssignToStack<16, 4>>,
+ CCIfType<[v5i32, v5f32], CCAssignToStack<20, 4>>,
CCIfType<[v8i32, v8f32], CCAssignToStack<32, 4>>,
CCIfType<[v16i32, v16f32], CCAssignToStack<64, 4>>
]>;
diff --git a/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index 4dc1e67c573d..b750c6b5f6d2 100644
--- a/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -1,9 +1,8 @@
//===-- AMDGPUCodeGenPrepare.cpp ------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -62,6 +61,7 @@ class AMDGPUCodeGenPrepare : public FunctionPass,
AssumptionCache *AC = nullptr;
LegacyDivergenceAnalysis *DA = nullptr;
Module *Mod = nullptr;
+ const DataLayout *DL = nullptr;
bool HasUnsafeFPMath = false;
/// Copies exact/nsw/nuw flags (if any) from binary operation \p I to
@@ -134,6 +134,16 @@ class AMDGPUCodeGenPrepare : public FunctionPass,
/// \returns True.
bool promoteUniformBitreverseToI32(IntrinsicInst &I) const;
+
+ unsigned numBitsUnsigned(Value *Op, unsigned ScalarSize) const;
+ unsigned numBitsSigned(Value *Op, unsigned ScalarSize) const;
+ bool isI24(Value *V, unsigned ScalarSize) const;
+ bool isU24(Value *V, unsigned ScalarSize) const;
+
+ /// Replace mul instructions with llvm.amdgcn.mul.u24 or llvm.amdgcn.mul.s24.
+ /// SelectionDAG has an issue where an and asserting the bits are known
+ bool replaceMulWithMul24(BinaryOperator &I) const;
+
/// Expands 24 bit div or rem.
Value* expandDivRem24(IRBuilder<> &Builder, BinaryOperator &I,
Value *Num, Value *Den,
@@ -393,6 +403,118 @@ bool AMDGPUCodeGenPrepare::promoteUniformBitreverseToI32(
return true;
}
+unsigned AMDGPUCodeGenPrepare::numBitsUnsigned(Value *Op,
+ unsigned ScalarSize) const {
+ KnownBits Known = computeKnownBits(Op, *DL, 0, AC);
+ return ScalarSize - Known.countMinLeadingZeros();
+}
+
+unsigned AMDGPUCodeGenPrepare::numBitsSigned(Value *Op,
+ unsigned ScalarSize) const {
+ // In order for this to be a signed 24-bit value, bit 23, must
+ // be a sign bit.
+ return ScalarSize - ComputeNumSignBits(Op, *DL, 0, AC);
+}
+
+bool AMDGPUCodeGenPrepare::isI24(Value *V, unsigned ScalarSize) const {
+ return ScalarSize >= 24 && // Types less than 24-bit should be treated
+ // as unsigned 24-bit values.
+ numBitsSigned(V, ScalarSize) < 24;
+}
+
+bool AMDGPUCodeGenPrepare::isU24(Value *V, unsigned ScalarSize) const {
+ return numBitsUnsigned(V, ScalarSize) <= 24;
+}
+
+static void extractValues(IRBuilder<> &Builder,
+ SmallVectorImpl<Value *> &Values, Value *V) {
+ VectorType *VT = dyn_cast<VectorType>(V->getType());
+ if (!VT) {
+ Values.push_back(V);
+ return;
+ }
+
+ for (int I = 0, E = VT->getNumElements(); I != E; ++I)
+ Values.push_back(Builder.CreateExtractElement(V, I));
+}
+
+static Value *insertValues(IRBuilder<> &Builder,
+ Type *Ty,
+ SmallVectorImpl<Value *> &Values) {
+ if (Values.size() == 1)
+ return Values[0];
+
+ Value *NewVal = UndefValue::get(Ty);
+ for (int I = 0, E = Values.size(); I != E; ++I)
+ NewVal = Builder.CreateInsertElement(NewVal, Values[I], I);
+
+ return NewVal;
+}
+
+bool AMDGPUCodeGenPrepare::replaceMulWithMul24(BinaryOperator &I) const {
+ if (I.getOpcode() != Instruction::Mul)
+ return false;
+
+ Type *Ty = I.getType();
+ unsigned Size = Ty->getScalarSizeInBits();
+ if (Size <= 16 && ST->has16BitInsts())
+ return false;
+
+ // Prefer scalar if this could be s_mul_i32
+ if (DA->isUniform(&I))
+ return false;
+
+ Value *LHS = I.getOperand(0);
+ Value *RHS = I.getOperand(1);
+ IRBuilder<> Builder(&I);
+ Builder.SetCurrentDebugLocation(I.getDebugLoc());
+
+ Intrinsic::ID IntrID = Intrinsic::not_intrinsic;
+
+ // TODO: Should this try to match mulhi24?
+ if (ST->hasMulU24() && isU24(LHS, Size) && isU24(RHS, Size)) {
+ IntrID = Intrinsic::amdgcn_mul_u24;
+ } else if (ST->hasMulI24() && isI24(LHS, Size) && isI24(RHS, Size)) {
+ IntrID = Intrinsic::amdgcn_mul_i24;
+ } else
+ return false;
+
+ SmallVector<Value *, 4> LHSVals;
+ SmallVector<Value *, 4> RHSVals;
+ SmallVector<Value *, 4> ResultVals;
+ extractValues(Builder, LHSVals, LHS);
+ extractValues(Builder, RHSVals, RHS);
+
+
+ IntegerType *I32Ty = Builder.getInt32Ty();
+ FunctionCallee Intrin = Intrinsic::getDeclaration(Mod, IntrID);
+ for (int I = 0, E = LHSVals.size(); I != E; ++I) {
+ Value *LHS, *RHS;
+ if (IntrID == Intrinsic::amdgcn_mul_u24) {
+ LHS = Builder.CreateZExtOrTrunc(LHSVals[I], I32Ty);
+ RHS = Builder.CreateZExtOrTrunc(RHSVals[I], I32Ty);
+ } else {
+ LHS = Builder.CreateSExtOrTrunc(LHSVals[I], I32Ty);
+ RHS = Builder.CreateSExtOrTrunc(RHSVals[I], I32Ty);
+ }
+
+ Value *Result = Builder.CreateCall(Intrin, {LHS, RHS});
+
+ if (IntrID == Intrinsic::amdgcn_mul_u24) {
+ ResultVals.push_back(Builder.CreateZExtOrTrunc(Result,
+ LHSVals[I]->getType()));
+ } else {
+ ResultVals.push_back(Builder.CreateSExtOrTrunc(Result,
+ LHSVals[I]->getType()));
+ }
+ }
+
+ I.replaceAllUsesWith(insertValues(Builder, Ty, ResultVals));
+ I.eraseFromParent();
+
+ return true;
+}
+
static bool shouldKeepFDivF32(Value *Num, bool UnsafeDiv, bool HasDenormals) {
const ConstantFP *CNum = dyn_cast<ConstantFP>(Num);
if (!CNum)
@@ -757,6 +879,9 @@ bool AMDGPUCodeGenPrepare::visitBinaryOperator(BinaryOperator &I) {
DA->isUniform(&I) && promoteUniformOpToI32(I))
return true;
+ if (replaceMulWithMul24(I))
+ return true;
+
bool Changed = false;
Instruction::BinaryOps Opc = I.getOpcode();
Type *Ty = I.getType();
@@ -807,7 +932,7 @@ bool AMDGPUCodeGenPrepare::visitLoadInst(LoadInst &I) {
Type *I32Ty = Builder.getInt32Ty();
Type *PT = PointerType::get(I32Ty, I.getPointerAddressSpace());
Value *BitCast= Builder.CreateBitCast(I.getPointerOperand(), PT);
- LoadInst *WidenLoad = Builder.CreateLoad(BitCast);
+ LoadInst *WidenLoad = Builder.CreateLoad(I32Ty, BitCast);
WidenLoad->copyMetadata(I);
// If we have range metadata, we need to convert the type, and not make
@@ -883,6 +1008,7 @@ bool AMDGPUCodeGenPrepare::visitBitreverseIntrinsicInst(IntrinsicInst &I) {
bool AMDGPUCodeGenPrepare::doInitialization(Module &M) {
Mod = &M;
+ DL = &Mod->getDataLayout();
return false;
}
diff --git a/lib/Target/AMDGPU/AMDGPUFeatures.td b/lib/Target/AMDGPU/AMDGPUFeatures.td
index 3c7d8a8fc550..ea3952c316e4 100644
--- a/lib/Target/AMDGPU/AMDGPUFeatures.td
+++ b/lib/Target/AMDGPU/AMDGPUFeatures.td
@@ -1,9 +1,8 @@
//===-- AMDGPUFeatures.td - AMDGPU Feature Definitions -----*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -50,17 +49,12 @@ def FeatureWavefrontSize16 : SubtargetFeatureWavefrontSize<16>;
def FeatureWavefrontSize32 : SubtargetFeatureWavefrontSize<32>;
def FeatureWavefrontSize64 : SubtargetFeatureWavefrontSize<64>;
-class SubtargetFeatureGeneration <string Value, string Subtarget,
+class SubtargetFeatureGeneration <string Value, string FeatureName,
+ string Subtarget,
list<SubtargetFeature> Implies> :
- SubtargetFeature <Value, "Gen", Subtarget#"::"#Value,
+ SubtargetFeature <FeatureName, "Gen", Subtarget#"::"#Value,
Value#" GPU generation", Implies>;
-def FeatureDX10Clamp : SubtargetFeature<"dx10-clamp",
- "DX10Clamp",
- "true",
- "clamp modifier clamps NaNs to 0.0"
->;
-
def FeaturePromoteAlloca : SubtargetFeature <"promote-alloca",
"EnablePromoteAlloca",
"true",
diff --git a/lib/Target/AMDGPU/AMDGPUFixFunctionBitcasts.cpp b/lib/Target/AMDGPU/AMDGPUFixFunctionBitcasts.cpp
index 6e2a981d3396..9ba04d113c70 100644
--- a/lib/Target/AMDGPU/AMDGPUFixFunctionBitcasts.cpp
+++ b/lib/Target/AMDGPU/AMDGPUFixFunctionBitcasts.cpp
@@ -1,9 +1,8 @@
//===-- AMDGPUFixFunctionBitcasts.cpp - Fix function bitcasts -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp b/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp
index e32ca9653b3a..e80797736363 100644
--- a/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp
@@ -1,9 +1,8 @@
//===----------------------- AMDGPUFrameLowering.cpp ----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//==-----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AMDGPU/AMDGPUFrameLowering.h b/lib/Target/AMDGPU/AMDGPUFrameLowering.h
index ee836bf8a631..48b64488303e 100644
--- a/lib/Target/AMDGPU/AMDGPUFrameLowering.h
+++ b/lib/Target/AMDGPU/AMDGPUFrameLowering.h
@@ -1,9 +1,8 @@
//===--------------------- AMDGPUFrameLowering.h ----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AMDGPU/AMDGPUGISel.td b/lib/Target/AMDGPU/AMDGPUGISel.td
index 59bb2a16e0f3..cad4c2ef404c 100644
--- a/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -1,9 +1,8 @@
//===-- AMDGPUGIsel.td - AMDGPU GlobalISel Patterns---------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// This files contains patterns that should only be used by GlobalISel. For
@@ -13,6 +12,10 @@
include "AMDGPU.td"
+def p0 : PtrValueType<i64, 0>;
+def p1 : PtrValueType<i64, 1>;
+def p4 : PtrValueType<i64, 4>;
+
def sd_vsrc0 : ComplexPattern<i32, 1, "">;
def gi_vsrc0 :
GIComplexOperandMatcher<s32, "selectVSRC0">,
@@ -35,6 +38,33 @@ def gi_vop3omods :
GIComplexOperandMatcher<s32, "selectVOP3OMods">,
GIComplexPatternEquiv<VOP3OMods>;
+def gi_smrd_imm :
+ GIComplexOperandMatcher<s64, "selectSmrdImm">,
+ GIComplexPatternEquiv<SMRDImm>;
+
+def gi_smrd_imm32 :
+ GIComplexOperandMatcher<s64, "selectSmrdImm32">,
+ GIComplexPatternEquiv<SMRDImm32>;
+
+def gi_smrd_sgpr :
+ GIComplexOperandMatcher<s64, "selectSmrdSgpr">,
+ GIComplexPatternEquiv<SMRDSgpr>;
+
+def gi_flat_offset :
+ GIComplexOperandMatcher<s64, "selectFlatOffset">,
+ GIComplexPatternEquiv<FLATOffset>;
+def gi_flat_offset_signed :
+ GIComplexOperandMatcher<s64, "selectFlatOffsetSigned">,
+ GIComplexPatternEquiv<FLATOffsetSigned>;
+
+def gi_mubuf_scratch_offset :
+ GIComplexOperandMatcher<s32, "selectMUBUFScratchOffset">,
+ GIComplexPatternEquiv<MUBUFScratchOffset>;
+def gi_mubuf_scratch_offen :
+ GIComplexOperandMatcher<s32, "selectMUBUFScratchOffen">,
+ GIComplexPatternEquiv<MUBUFScratchOffen>;
+
+
class GISelSop2Pat <
SDPatternOperator node,
Instruction inst,
@@ -113,15 +143,6 @@ multiclass GISelVop2IntrPat <
def : GISelSop2Pat <or, S_OR_B32, i32>;
def : GISelVop2Pat <or, V_OR_B32_e32, i32>;
-def : GISelSop2Pat <sra, S_ASHR_I32, i32>;
-let AddedComplexity = 100 in {
-let SubtargetPredicate = isSICI in {
-def : GISelVop2Pat <sra, V_ASHR_I32_e32, i32>;
-}
-def : GISelVop2CommutePat <sra, V_ASHRREV_I32_e32, i32>;
-}
-def : GISelVop3Pat2CommutePat <sra, V_ASHRREV_I32_e64, i32>;
-
// FIXME: We can't re-use SelectionDAG patterns here because they match
// against a custom SDNode and we would need to create a generic machine
// instruction that is equivalent to the custom SDNode. This would also require
@@ -135,3 +156,11 @@ defm : GISelVop2IntrPat <int_maxnum, V_MAX_F32_e32, f32>;
def : GISelVop3Pat2ModsPat <int_maxnum, V_MAX_F64, f64>;
defm : GISelVop2IntrPat <int_minnum, V_MIN_F32_e32, f32>;
def : GISelVop3Pat2ModsPat <int_minnum, V_MIN_F64, f64>;
+
+// Since GlobalISel is more flexible then SelectionDAG, I think we can get
+// away with adding patterns for integer types and not legalizing all
+// loads and stores to vector types. This should help simplify the load/store
+// legalization.
+foreach Ty = [i64, p0, p1, p4] in {
+ defm : SMRD_Pattern <"S_LOAD_DWORDX2", Ty>;
+}
diff --git a/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def b/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def
index 6eab59ab4e09..0a1f48231b18 100644
--- a/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def
+++ b/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def
@@ -1,9 +1,8 @@
//===- AMDGPUGenRegisterBankInfo.def -----------------------------*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -92,6 +91,28 @@ const RegisterBankInfo::ValueMapping ValMappings[] {
{&PartMappings[17], 1}
};
+const RegisterBankInfo::PartialMapping SGPROnly64BreakDown[] {
+ /*32-bit op*/ {0, 32, SGPRRegBank},
+ /*2x32-bit op*/ {0, 32, SGPRRegBank},
+ {32, 32, SGPRRegBank},
+/*<2x32-bit> op*/ {0, 64, SGPRRegBank},
+
+ /*32-bit op*/ {0, 32, VGPRRegBank},
+ /*2x32-bit op*/ {0, 32, VGPRRegBank},
+ {32, 32, VGPRRegBank},
+};
+
+
+// For some instructions which can operate 64-bit only for the scalar version.
+const RegisterBankInfo::ValueMapping ValMappingsSGPR64OnlyVGPR32[] {
+ /*32-bit sgpr*/ {&SGPROnly64BreakDown[0], 1},
+ /*2 x 32-bit sgpr*/ {&SGPROnly64BreakDown[1], 2},
+ /*64-bit sgpr */ {&SGPROnly64BreakDown[3], 1},
+
+ /*32-bit vgpr*/ {&SGPROnly64BreakDown[4], 1},
+ /*2 x 32-bit vgpr*/ {&SGPROnly64BreakDown[5], 2}
+};
+
enum ValueMappingIdx {
SCCStartIdx = 0,
SGPRStartIdx = 2,
@@ -128,5 +149,89 @@ const RegisterBankInfo::ValueMapping *getValueMapping(unsigned BankID,
return &ValMappings[Idx];
}
+const RegisterBankInfo::ValueMapping *getValueMappingSGPR64Only(unsigned BankID,
+ unsigned Size) {
+ if (Size != 64)
+ return getValueMapping(BankID, Size);
+
+ if (BankID == AMDGPU::VGPRRegBankID)
+ return &ValMappingsSGPR64OnlyVGPR32[4];
+
+ assert(BankID == AMDGPU::SGPRRegBankID);
+ return &ValMappingsSGPR64OnlyVGPR32[2];
+}
+
+const RegisterBankInfo::PartialMapping LoadSGPROnlyBreakDown[] {
+ /* 256-bit load */ {0, 256, SGPRRegBank},
+ /* 512-bit load */ {0, 512, SGPRRegBank},
+ /* 8 32-bit loads */ {0, 32, VGPRRegBank}, {32, 32, VGPRRegBank},
+ {64, 32, VGPRRegBank}, {96, 32, VGPRRegBank},
+ {128, 32, VGPRRegBank}, {160, 32, VGPRRegBank},
+ {192, 32, VGPRRegBank}, {224, 32, VGPRRegBank},
+ /* 16 32-bit loads */ {0, 32, VGPRRegBank}, {32, 32, VGPRRegBank},
+ {64, 32, VGPRRegBank}, {96, 32, VGPRRegBank},
+ {128, 32, VGPRRegBank}, {160, 32, VGPRRegBank},
+ {192, 32, VGPRRegBank}, {224, 32, VGPRRegBank},
+ {256, 32, VGPRRegBank}, {288, 32, VGPRRegBank},
+ {320, 32, VGPRRegBank}, {352, 32, VGPRRegBank},
+ {384, 32, VGPRRegBank}, {416, 32, VGPRRegBank},
+ {448, 32, VGPRRegBank}, {480, 32, VGPRRegBank},
+ /* 4 64-bit loads */ {0, 64, VGPRRegBank}, {64, 64, VGPRRegBank},
+ {128, 64, VGPRRegBank}, {192, 64, VGPRRegBank},
+ /* 8 64-bit loads */ {0, 64, VGPRRegBank}, {64, 64, VGPRRegBank},
+ {128, 64, VGPRRegBank}, {192, 64, VGPRRegBank},
+ {256, 64, VGPRRegBank}, {320, 64, VGPRRegBank},
+ {384, 64, VGPRRegBank}, {448, 64, VGPRRegBank},
+
+ /* FIXME: The generic register bank select does not support complex
+ * break downs where the number of vector elements does not equal the
+ * number of breakdowns.
+ * FIXME: register bank select now tries to handle complex break downs,
+ * but it emits an illegal instruction:
+ * %1:vgpr(<8 x s32>) = G_CONCAT_VECTORS %2:vgpr(s128), %3:vgpr(s128)
+ */
+ /* 2 128-bit loads */ {0, 128, VGPRRegBank}, {128, 128, VGPRRegBank},
+ /* 4 128-bit loads */ {0, 128, VGPRRegBank}, {128, 128, VGPRRegBank},
+ {256, 128, VGPRRegBank}, {384, 128, VGPRRegBank}
+};
+
+const RegisterBankInfo::ValueMapping ValMappingsLoadSGPROnly[] {
+ /* 256-bit load */ {&LoadSGPROnlyBreakDown[0], 1},
+ /* 512-bit load */ {&LoadSGPROnlyBreakDown[1], 1},
+ /* <8 x i32> load */ {&LoadSGPROnlyBreakDown[2], 8},
+ /* <16 x i32> load */ {&LoadSGPROnlyBreakDown[10], 16},
+ /* <4 x i64> load */ {&LoadSGPROnlyBreakDown[26], 4},
+ /* <8 x i64> load */ {&LoadSGPROnlyBreakDown[30], 8}
+};
+
+const RegisterBankInfo::ValueMapping *
+getValueMappingLoadSGPROnly(unsigned BankID, LLT SizeTy) {
+ unsigned Size = SizeTy.getSizeInBits();
+ if (Size < 256 || BankID == AMDGPU::SGPRRegBankID)
+ return getValueMapping(BankID, Size);
+
+ assert((Size == 256 || Size == 512) && BankID == AMDGPU::VGPRRegBankID);
+
+ // Default to using the non-split ValueMappings, we will use these if
+ // the register bank is SGPR or if we don't know how to handle the vector
+ // type.
+ unsigned Idx = Size == 256 ? 0 : 1;
+
+ // We need to split this load if it has a vgpr pointer.
+ if (BankID == AMDGPU::VGPRRegBankID) {
+ if (SizeTy == LLT::vector(8, 32))
+ Idx = 2;
+ else if (SizeTy == LLT::vector(16, 32))
+ Idx = 3;
+ else if (SizeTy == LLT::vector(4, 64))
+ Idx = 4;
+ else if (SizeTy == LLT::vector(8, 64))
+ Idx = 5;
+ }
+
+ return &ValMappingsLoadSGPROnly[Idx];
+}
+
+
} // End AMDGPU namespace.
} // End llvm namespace.
diff --git a/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
index c38b0e61558b..b31de0af5018 100644
--- a/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
+++ b/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
@@ -1,9 +1,8 @@
//===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -240,23 +239,7 @@ MetadataStreamerV2::getHSACodeProps(const MachineFunction &MF,
Kernel::DebugProps::Metadata
MetadataStreamerV2::getHSADebugProps(const MachineFunction &MF,
const SIProgramInfo &ProgramInfo) const {
- const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
- HSAMD::Kernel::DebugProps::Metadata HSADebugProps;
-
- if (!STM.debuggerSupported())
- return HSADebugProps;
-
- HSADebugProps.mDebuggerABIVersion.push_back(1);
- HSADebugProps.mDebuggerABIVersion.push_back(0);
-
- if (STM.debuggerEmitPrologue()) {
- HSADebugProps.mPrivateSegmentBufferSGPR =
- ProgramInfo.DebuggerPrivateSegmentBufferSGPR;
- HSADebugProps.mWavefrontPrivateSegmentOffsetSGPR =
- ProgramInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR;
- }
-
- return HSADebugProps;
+ return HSAMD::Kernel::DebugProps::Metadata();
}
void MetadataStreamerV2::emitVersion() {
@@ -452,6 +435,10 @@ void MetadataStreamerV2::emitHiddenKernelArgs(const Function &Func) {
emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
}
}
+
+ // Emit the pointer argument for multi-grid object.
+ if (HiddenArgNumBytes >= 56)
+ emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenMultiGridSyncArg);
}
bool MetadataStreamerV2::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
@@ -506,20 +493,16 @@ void MetadataStreamerV3::dump(StringRef HSAMetadataString) const {
void MetadataStreamerV3::verify(StringRef HSAMetadataString) const {
errs() << "AMDGPU HSA Metadata Parser Test: ";
- std::shared_ptr<msgpack::Node> FromHSAMetadataString =
- std::make_shared<msgpack::MapNode>();
+ msgpack::Document FromHSAMetadataString;
- yaml::Input YIn(HSAMetadataString);
- YIn >> FromHSAMetadataString;
- if (YIn.error()) {
+ if (!FromHSAMetadataString.fromYAML(HSAMetadataString)) {
errs() << "FAIL\n";
return;
}
std::string ToHSAMetadataString;
raw_string_ostream StrOS(ToHSAMetadataString);
- yaml::Output YOut(StrOS);
- YOut << FromHSAMetadataString;
+ FromHSAMetadataString.toYAML(StrOS);
errs() << (HSAMetadataString == StrOS.str() ? "PASS" : "FAIL") << '\n';
if (HSAMetadataString != ToHSAMetadataString) {
@@ -653,23 +636,23 @@ std::string MetadataStreamerV3::getTypeName(Type *Ty, bool Signed) const {
}
}
-std::shared_ptr<msgpack::ArrayNode>
+msgpack::ArrayDocNode
MetadataStreamerV3::getWorkGroupDimensions(MDNode *Node) const {
- auto Dims = std::make_shared<msgpack::ArrayNode>();
+ auto Dims = HSAMetadataDoc->getArrayNode();
if (Node->getNumOperands() != 3)
return Dims;
for (auto &Op : Node->operands())
- Dims->push_back(std::make_shared<msgpack::ScalarNode>(
- mdconst::extract<ConstantInt>(Op)->getZExtValue()));
+ Dims.push_back(Dims.getDocument()->getNode(
+ uint64_t(mdconst::extract<ConstantInt>(Op)->getZExtValue())));
return Dims;
}
void MetadataStreamerV3::emitVersion() {
- auto Version = std::make_shared<msgpack::ArrayNode>();
- Version->push_back(std::make_shared<msgpack::ScalarNode>(V3::VersionMajor));
- Version->push_back(std::make_shared<msgpack::ScalarNode>(V3::VersionMinor));
- getRootMetadata("amdhsa.version") = std::move(Version);
+ auto Version = HSAMetadataDoc->getArrayNode();
+ Version.push_back(Version.getDocument()->getNode(VersionMajor));
+ Version.push_back(Version.getDocument()->getNode(VersionMinor));
+ getRootMetadata("amdhsa.version") = Version;
}
void MetadataStreamerV3::emitPrintf(const Module &Mod) {
@@ -677,16 +660,16 @@ void MetadataStreamerV3::emitPrintf(const Module &Mod) {
if (!Node)
return;
- auto Printf = std::make_shared<msgpack::ArrayNode>();
+ auto Printf = HSAMetadataDoc->getArrayNode();
for (auto Op : Node->operands())
if (Op->getNumOperands())
- Printf->push_back(std::make_shared<msgpack::ScalarNode>(
- cast<MDString>(Op->getOperand(0))->getString()));
- getRootMetadata("amdhsa.printf") = std::move(Printf);
+ Printf.push_back(Printf.getDocument()->getNode(
+ cast<MDString>(Op->getOperand(0))->getString(), /*Copy=*/true));
+ getRootMetadata("amdhsa.printf") = Printf;
}
void MetadataStreamerV3::emitKernelLanguage(const Function &Func,
- msgpack::MapNode &Kern) {
+ msgpack::MapDocNode Kern) {
// TODO: What about other languages?
auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
if (!Node || !Node->getNumOperands())
@@ -695,77 +678,50 @@ void MetadataStreamerV3::emitKernelLanguage(const Function &Func,
if (Op0->getNumOperands() <= 1)
return;
- Kern[".language"] = std::make_shared<msgpack::ScalarNode>("OpenCL C");
- auto LanguageVersion = std::make_shared<msgpack::ArrayNode>();
- LanguageVersion->push_back(std::make_shared<msgpack::ScalarNode>(
+ Kern[".language"] = Kern.getDocument()->getNode("OpenCL C");
+ auto LanguageVersion = Kern.getDocument()->getArrayNode();
+ LanguageVersion.push_back(Kern.getDocument()->getNode(
mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue()));
- LanguageVersion->push_back(std::make_shared<msgpack::ScalarNode>(
+ LanguageVersion.push_back(Kern.getDocument()->getNode(
mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue()));
- Kern[".language_version"] = std::move(LanguageVersion);
+ Kern[".language_version"] = LanguageVersion;
}
void MetadataStreamerV3::emitKernelAttrs(const Function &Func,
- msgpack::MapNode &Kern) {
+ msgpack::MapDocNode Kern) {
if (auto Node = Func.getMetadata("reqd_work_group_size"))
Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node);
if (auto Node = Func.getMetadata("work_group_size_hint"))
Kern[".workgroup_size_hint"] = getWorkGroupDimensions(Node);
if (auto Node = Func.getMetadata("vec_type_hint")) {
- Kern[".vec_type_hint"] = std::make_shared<msgpack::ScalarNode>(getTypeName(
- cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
- mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()));
+ Kern[".vec_type_hint"] = Kern.getDocument()->getNode(
+ getTypeName(
+ cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
+ mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()),
+ /*Copy=*/true);
}
if (Func.hasFnAttribute("runtime-handle")) {
- Kern[".device_enqueue_symbol"] = std::make_shared<msgpack::ScalarNode>(
- Func.getFnAttribute("runtime-handle").getValueAsString().str());
+ Kern[".device_enqueue_symbol"] = Kern.getDocument()->getNode(
+ Func.getFnAttribute("runtime-handle").getValueAsString().str(),
+ /*Copy=*/true);
}
}
void MetadataStreamerV3::emitKernelArgs(const Function &Func,
- msgpack::MapNode &Kern) {
+ msgpack::MapDocNode Kern) {
unsigned Offset = 0;
- auto Args = std::make_shared<msgpack::ArrayNode>();
+ auto Args = HSAMetadataDoc->getArrayNode();
for (auto &Arg : Func.args())
- emitKernelArg(Arg, Offset, *Args);
-
- emitHiddenKernelArgs(Func, Offset, *Args);
-
- // TODO: What about other languages?
- if (Func.getParent()->getNamedMetadata("opencl.ocl.version")) {
- auto &DL = Func.getParent()->getDataLayout();
- auto Int64Ty = Type::getInt64Ty(Func.getContext());
-
- emitKernelArg(DL, Int64Ty, "hidden_global_offset_x", Offset, *Args);
- emitKernelArg(DL, Int64Ty, "hidden_global_offset_y", Offset, *Args);
- emitKernelArg(DL, Int64Ty, "hidden_global_offset_z", Offset, *Args);
-
- auto Int8PtrTy =
- Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
+ emitKernelArg(Arg, Offset, Args);
- // Emit "printf buffer" argument if printf is used, otherwise emit dummy
- // "none" argument.
- if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
- emitKernelArg(DL, Int8PtrTy, "hidden_printf_buffer", Offset, *Args);
- else
- emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, *Args);
+ emitHiddenKernelArgs(Func, Offset, Args);
- // Emit "default queue" and "completion action" arguments if enqueue kernel
- // is used, otherwise emit dummy "none" arguments.
- if (Func.hasFnAttribute("calls-enqueue-kernel")) {
- emitKernelArg(DL, Int8PtrTy, "hidden_default_queue", Offset, *Args);
- emitKernelArg(DL, Int8PtrTy, "hidden_completion_action", Offset, *Args);
- } else {
- emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, *Args);
- emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, *Args);
- }
- }
-
- Kern[".args"] = std::move(Args);
+ Kern[".args"] = Args;
}
void MetadataStreamerV3::emitKernelArg(const Argument &Arg, unsigned &Offset,
- msgpack::ArrayNode &Args) {
+ msgpack::ArrayDocNode Args) {
auto Func = Arg.getParent();
auto ArgNo = Arg.getArgNo();
const MDNode *Node;
@@ -822,36 +778,35 @@ void MetadataStreamerV3::emitKernelArg(const Argument &Arg, unsigned &Offset,
void MetadataStreamerV3::emitKernelArg(const DataLayout &DL, Type *Ty,
StringRef ValueKind, unsigned &Offset,
- msgpack::ArrayNode &Args,
+ msgpack::ArrayDocNode Args,
unsigned PointeeAlign, StringRef Name,
StringRef TypeName,
StringRef BaseTypeName,
StringRef AccQual, StringRef TypeQual) {
- auto ArgPtr = std::make_shared<msgpack::MapNode>();
- auto &Arg = *ArgPtr;
+ auto Arg = Args.getDocument()->getMapNode();
if (!Name.empty())
- Arg[".name"] = std::make_shared<msgpack::ScalarNode>(Name);
+ Arg[".name"] = Arg.getDocument()->getNode(Name, /*Copy=*/true);
if (!TypeName.empty())
- Arg[".type_name"] = std::make_shared<msgpack::ScalarNode>(TypeName);
+ Arg[".type_name"] = Arg.getDocument()->getNode(TypeName, /*Copy=*/true);
auto Size = DL.getTypeAllocSize(Ty);
auto Align = DL.getABITypeAlignment(Ty);
- Arg[".size"] = std::make_shared<msgpack::ScalarNode>(Size);
+ Arg[".size"] = Arg.getDocument()->getNode(Size);
Offset = alignTo(Offset, Align);
- Arg[".offset"] = std::make_shared<msgpack::ScalarNode>(Offset);
+ Arg[".offset"] = Arg.getDocument()->getNode(Offset);
Offset += Size;
- Arg[".value_kind"] = std::make_shared<msgpack::ScalarNode>(ValueKind);
+ Arg[".value_kind"] = Arg.getDocument()->getNode(ValueKind, /*Copy=*/true);
Arg[".value_type"] =
- std::make_shared<msgpack::ScalarNode>(getValueType(Ty, BaseTypeName));
+ Arg.getDocument()->getNode(getValueType(Ty, BaseTypeName), /*Copy=*/true);
if (PointeeAlign)
- Arg[".pointee_align"] = std::make_shared<msgpack::ScalarNode>(PointeeAlign);
+ Arg[".pointee_align"] = Arg.getDocument()->getNode(PointeeAlign);
if (auto PtrTy = dyn_cast<PointerType>(Ty))
if (auto Qualifier = getAddressSpaceQualifier(PtrTy->getAddressSpace()))
- Arg[".address_space"] = std::make_shared<msgpack::ScalarNode>(*Qualifier);
+ Arg[".address_space"] = Arg.getDocument()->getNode(*Qualifier, /*Copy=*/true);
if (auto AQ = getAccessQualifier(AccQual))
- Arg[".access"] = std::make_shared<msgpack::ScalarNode>(*AQ);
+ Arg[".access"] = Arg.getDocument()->getNode(*AQ, /*Copy=*/true);
// TODO: Emit Arg[".actual_access"].
@@ -859,21 +814,21 @@ void MetadataStreamerV3::emitKernelArg(const DataLayout &DL, Type *Ty,
TypeQual.split(SplitTypeQuals, " ", -1, false);
for (StringRef Key : SplitTypeQuals) {
if (Key == "const")
- Arg[".is_const"] = std::make_shared<msgpack::ScalarNode>(true);
+ Arg[".is_const"] = Arg.getDocument()->getNode(true);
else if (Key == "restrict")
- Arg[".is_restrict"] = std::make_shared<msgpack::ScalarNode>(true);
+ Arg[".is_restrict"] = Arg.getDocument()->getNode(true);
else if (Key == "volatile")
- Arg[".is_volatile"] = std::make_shared<msgpack::ScalarNode>(true);
+ Arg[".is_volatile"] = Arg.getDocument()->getNode(true);
else if (Key == "pipe")
- Arg[".is_pipe"] = std::make_shared<msgpack::ScalarNode>(true);
+ Arg[".is_pipe"] = Arg.getDocument()->getNode(true);
}
- Args.push_back(std::move(ArgPtr));
+ Args.push_back(Arg);
}
void MetadataStreamerV3::emitHiddenKernelArgs(const Function &Func,
unsigned &Offset,
- msgpack::ArrayNode &Args) {
+ msgpack::ArrayDocNode Args) {
int HiddenArgNumBytes =
getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0);
@@ -913,56 +868,58 @@ void MetadataStreamerV3::emitHiddenKernelArgs(const Function &Func,
emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args);
}
}
+
+ // Emit the pointer argument for multi-grid object.
+ if (HiddenArgNumBytes >= 56)
+ emitKernelArg(DL, Int8PtrTy, "hidden_multigrid_sync_arg", Offset, Args);
}
-std::shared_ptr<msgpack::MapNode>
+msgpack::MapDocNode
MetadataStreamerV3::getHSAKernelProps(const MachineFunction &MF,
const SIProgramInfo &ProgramInfo) const {
const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
const Function &F = MF.getFunction();
- auto HSAKernelProps = std::make_shared<msgpack::MapNode>();
- auto &Kern = *HSAKernelProps;
+ auto Kern = HSAMetadataDoc->getMapNode();
unsigned MaxKernArgAlign;
- Kern[".kernarg_segment_size"] = std::make_shared<msgpack::ScalarNode>(
+ Kern[".kernarg_segment_size"] = Kern.getDocument()->getNode(
STM.getKernArgSegmentSize(F, MaxKernArgAlign));
Kern[".group_segment_fixed_size"] =
- std::make_shared<msgpack::ScalarNode>(ProgramInfo.LDSSize);
+ Kern.getDocument()->getNode(ProgramInfo.LDSSize);
Kern[".private_segment_fixed_size"] =
- std::make_shared<msgpack::ScalarNode>(ProgramInfo.ScratchSize);
+ Kern.getDocument()->getNode(ProgramInfo.ScratchSize);
Kern[".kernarg_segment_align"] =
- std::make_shared<msgpack::ScalarNode>(std::max(uint32_t(4), MaxKernArgAlign));
+ Kern.getDocument()->getNode(std::max(uint32_t(4), MaxKernArgAlign));
Kern[".wavefront_size"] =
- std::make_shared<msgpack::ScalarNode>(STM.getWavefrontSize());
- Kern[".sgpr_count"] = std::make_shared<msgpack::ScalarNode>(ProgramInfo.NumSGPR);
- Kern[".vgpr_count"] = std::make_shared<msgpack::ScalarNode>(ProgramInfo.NumVGPR);
+ Kern.getDocument()->getNode(STM.getWavefrontSize());
+ Kern[".sgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumSGPR);
+ Kern[".vgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumVGPR);
Kern[".max_flat_workgroup_size"] =
- std::make_shared<msgpack::ScalarNode>(MFI.getMaxFlatWorkGroupSize());
+ Kern.getDocument()->getNode(MFI.getMaxFlatWorkGroupSize());
Kern[".sgpr_spill_count"] =
- std::make_shared<msgpack::ScalarNode>(MFI.getNumSpilledSGPRs());
+ Kern.getDocument()->getNode(MFI.getNumSpilledSGPRs());
Kern[".vgpr_spill_count"] =
- std::make_shared<msgpack::ScalarNode>(MFI.getNumSpilledVGPRs());
+ Kern.getDocument()->getNode(MFI.getNumSpilledVGPRs());
- return HSAKernelProps;
+ return Kern;
}
bool MetadataStreamerV3::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
- return TargetStreamer.EmitHSAMetadata(getHSAMetadataRoot(), true);
+ return TargetStreamer.EmitHSAMetadata(*HSAMetadataDoc, true);
}
void MetadataStreamerV3::begin(const Module &Mod) {
emitVersion();
emitPrintf(Mod);
- getRootMetadata("amdhsa.kernels").reset(new msgpack::ArrayNode());
+ getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode();
}
void MetadataStreamerV3::end() {
std::string HSAMetadataString;
raw_string_ostream StrOS(HSAMetadataString);
- yaml::Output YOut(StrOS);
- YOut << HSAMetadataRoot;
+ HSAMetadataDoc->toYAML(StrOS);
if (DumpHSAMetadata)
dump(StrOS.str());
@@ -973,25 +930,24 @@ void MetadataStreamerV3::end() {
void MetadataStreamerV3::emitKernel(const MachineFunction &MF,
const SIProgramInfo &ProgramInfo) {
auto &Func = MF.getFunction();
- auto KernelProps = getHSAKernelProps(MF, ProgramInfo);
+ auto Kern = getHSAKernelProps(MF, ProgramInfo);
assert(Func.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
Func.getCallingConv() == CallingConv::SPIR_KERNEL);
- auto &KernelsNode = getRootMetadata("amdhsa.kernels");
- auto Kernels = cast<msgpack::ArrayNode>(KernelsNode.get());
+ auto Kernels =
+ getRootMetadata("amdhsa.kernels").getArray(/*Convert=*/true);
{
- auto &Kern = *KernelProps;
- Kern[".name"] = std::make_shared<msgpack::ScalarNode>(Func.getName());
- Kern[".symbol"] = std::make_shared<msgpack::ScalarNode>(
- (Twine(Func.getName()) + Twine(".kd")).str());
+ Kern[".name"] = Kern.getDocument()->getNode(Func.getName());
+ Kern[".symbol"] = Kern.getDocument()->getNode(
+ (Twine(Func.getName()) + Twine(".kd")).str(), /*Copy=*/true);
emitKernelLanguage(Func, Kern);
emitKernelAttrs(Func, Kern);
emitKernelArgs(Func, Kern);
}
- Kernels->push_back(std::move(KernelProps));
+ Kernels.push_back(Kern);
}
} // end namespace HSAMD
diff --git a/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h b/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
index afc09baf952d..2eecddbd7b01 100644
--- a/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
+++ b/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
@@ -1,9 +1,8 @@
//===--- AMDGPUHSAMetadataStreamer.h ----------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -19,7 +18,7 @@
#include "AMDGPU.h"
#include "AMDKernelCodeT.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/BinaryFormat/MsgPackTypes.h"
+#include "llvm/BinaryFormat/MsgPackDocument.h"
#include "llvm/Support/AMDGPUMetadata.h"
namespace llvm {
@@ -52,8 +51,8 @@ public:
class MetadataStreamerV3 final : public MetadataStreamer {
private:
- std::shared_ptr<msgpack::Node> HSAMetadataRoot =
- std::make_shared<msgpack::MapNode>();
+ std::unique_ptr<msgpack::Document> HSAMetadataDoc =
+ llvm::make_unique<msgpack::Document>();
void dump(StringRef HSAMetadataString) const;
@@ -70,41 +69,39 @@ private:
std::string getTypeName(Type *Ty, bool Signed) const;
- std::shared_ptr<msgpack::ArrayNode>
- getWorkGroupDimensions(MDNode *Node) const;
+ msgpack::ArrayDocNode getWorkGroupDimensions(MDNode *Node) const;
- std::shared_ptr<msgpack::MapNode>
- getHSAKernelProps(const MachineFunction &MF,
- const SIProgramInfo &ProgramInfo) const;
+ msgpack::MapDocNode getHSAKernelProps(const MachineFunction &MF,
+ const SIProgramInfo &ProgramInfo) const;
void emitVersion();
void emitPrintf(const Module &Mod);
- void emitKernelLanguage(const Function &Func, msgpack::MapNode &Kern);
+ void emitKernelLanguage(const Function &Func, msgpack::MapDocNode Kern);
- void emitKernelAttrs(const Function &Func, msgpack::MapNode &Kern);
+ void emitKernelAttrs(const Function &Func, msgpack::MapDocNode Kern);
- void emitKernelArgs(const Function &Func, msgpack::MapNode &Kern);
+ void emitKernelArgs(const Function &Func, msgpack::MapDocNode Kern);
void emitKernelArg(const Argument &Arg, unsigned &Offset,
- msgpack::ArrayNode &Args);
+ msgpack::ArrayDocNode Args);
void emitKernelArg(const DataLayout &DL, Type *Ty, StringRef ValueKind,
- unsigned &Offset, msgpack::ArrayNode &Args,
+ unsigned &Offset, msgpack::ArrayDocNode Args,
unsigned PointeeAlign = 0, StringRef Name = "",
StringRef TypeName = "", StringRef BaseTypeName = "",
StringRef AccQual = "", StringRef TypeQual = "");
void emitHiddenKernelArgs(const Function &Func, unsigned &Offset,
- msgpack::ArrayNode &Args);
+ msgpack::ArrayDocNode Args);
- std::shared_ptr<msgpack::Node> &getRootMetadata(StringRef Key) {
- return (*cast<msgpack::MapNode>(HSAMetadataRoot.get()))[Key];
+ msgpack::DocNode &getRootMetadata(StringRef Key) {
+ return HSAMetadataDoc->getRoot().getMap(/*Convert=*/true)[Key];
}
- std::shared_ptr<msgpack::Node> &getHSAMetadataRoot() {
- return HSAMetadataRoot;
+ msgpack::DocNode &getHSAMetadataRoot() {
+ return HSAMetadataDoc->getRoot();
}
public:
diff --git a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index a0a045e72a58..ea730539f834 100644
--- a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -1,9 +1,8 @@
//===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//==-----------------------------------------------------------------------===//
//
@@ -40,6 +39,9 @@
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/BasicBlock.h"
+#ifdef EXPENSIVE_CHECKS
+#include "llvm/IR/Dominators.h"
+#endif
#include "llvm/IR/Instruction.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/Support/Casting.h"
@@ -52,6 +54,8 @@
#include <new>
#include <vector>
+#define DEBUG_TYPE "isel"
+
using namespace llvm;
namespace llvm {
@@ -66,6 +70,57 @@ class R600InstrInfo;
namespace {
+static bool isNullConstantOrUndef(SDValue V) {
+ if (V.isUndef())
+ return true;
+
+ ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
+ return Const != nullptr && Const->isNullValue();
+}
+
+static bool getConstantValue(SDValue N, uint32_t &Out) {
+ // This is only used for packed vectors, where ussing 0 for undef should
+ // always be good.
+ if (N.isUndef()) {
+ Out = 0;
+ return true;
+ }
+
+ if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
+ Out = C->getAPIntValue().getSExtValue();
+ return true;
+ }
+
+ if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) {
+ Out = C->getValueAPF().bitcastToAPInt().getSExtValue();
+ return true;
+ }
+
+ return false;
+}
+
+// TODO: Handle undef as zero
+static SDNode *packConstantV2I16(const SDNode *N, SelectionDAG &DAG,
+ bool Negate = false) {
+ assert(N->getOpcode() == ISD::BUILD_VECTOR && N->getNumOperands() == 2);
+ uint32_t LHSVal, RHSVal;
+ if (getConstantValue(N->getOperand(0), LHSVal) &&
+ getConstantValue(N->getOperand(1), RHSVal)) {
+ SDLoc SL(N);
+ uint32_t K = Negate ?
+ (-LHSVal & 0xffff) | (-RHSVal << 16) :
+ (LHSVal & 0xffff) | (RHSVal << 16);
+ return DAG.getMachineNode(AMDGPU::S_MOV_B32, SL, N->getValueType(0),
+ DAG.getTargetConstant(K, SL, MVT::i32));
+ }
+
+ return nullptr;
+}
+
+static SDNode *packNegConstantV2I16(const SDNode *N, SelectionDAG &DAG) {
+ return packConstantV2I16(N, DAG, true);
+}
+
/// AMDGPU specific code to select AMDGPU machine instructions for
/// SelectionDAG operations.
class AMDGPUDAGToDAGISel : public SelectionDAGISel {
@@ -84,12 +139,18 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AMDGPUArgumentUsageInfo>();
- AU.addRequired<AMDGPUPerfHintAnalysis>();
AU.addRequired<LegacyDivergenceAnalysis>();
+#ifdef EXPENSIVE_CHECKS
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
+#endif
SelectionDAGISel::getAnalysisUsage(AU);
}
+ bool matchLoadD16FromBuildVector(SDNode *N) const;
+
bool runOnMachineFunction(MachineFunction &MF) override;
+ void PreprocessISelDAG() override;
void Select(SDNode *N) override;
StringRef getPassName() const override;
void PostprocessISelDAG() override;
@@ -100,19 +161,24 @@ protected:
private:
std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const;
bool isNoNanSrc(SDValue N) const;
- bool isInlineImmediate(const SDNode *N) const;
+ bool isInlineImmediate(const SDNode *N, bool Negated = false) const;
+ bool isNegInlineImmediate(const SDNode *N) const {
+ return isInlineImmediate(N, true);
+ }
+
bool isVGPRImm(const SDNode *N) const;
bool isUniformLoad(const SDNode *N) const;
bool isUniformBr(const SDNode *N) const;
MachineSDNode *buildSMovImm64(SDLoc &DL, uint64_t Val, EVT VT) const;
- SDNode *glueCopyToM0(SDNode *N) const;
+ SDNode *glueCopyToM0LDSInit(SDNode *N) const;
+ SDNode *glueCopyToM0(SDNode *N, SDValue Val) const;
const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
- bool isDSOffsetLegal(const SDValue &Base, unsigned Offset,
+ bool isDSOffsetLegal(SDValue Base, unsigned Offset,
unsigned OffsetBits) const;
bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
@@ -120,10 +186,10 @@ private:
bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
SDValue &SOffset, SDValue &Offset, SDValue &Offen,
SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
- SDValue &TFE) const;
+ SDValue &TFE, SDValue &DLC) const;
bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
SDValue &SOffset, SDValue &Offset, SDValue &GLC,
- SDValue &SLC, SDValue &TFE) const;
+ SDValue &SLC, SDValue &TFE, SDValue &DLC) const;
bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
SDValue &VAddr, SDValue &SOffset, SDValue &Offset,
SDValue &SLC) const;
@@ -136,19 +202,19 @@ private:
bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
SDValue &Offset, SDValue &GLC, SDValue &SLC,
- SDValue &TFE) const;
+ SDValue &TFE, SDValue &DLC) const;
bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
SDValue &Offset, SDValue &SLC) const;
bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
SDValue &Offset) const;
- bool SelectFlatAtomic(SDValue Addr, SDValue &VAddr,
+ bool SelectFlatAtomic(SDNode *N, SDValue Addr, SDValue &VAddr,
SDValue &Offset, SDValue &SLC) const;
- bool SelectFlatAtomicSigned(SDValue Addr, SDValue &VAddr,
+ bool SelectFlatAtomicSigned(SDNode *N, SDValue Addr, SDValue &VAddr,
SDValue &Offset, SDValue &SLC) const;
template <bool IsSigned>
- bool SelectFlatOffset(SDValue Addr, SDValue &VAddr,
+ bool SelectFlatOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
SDValue &Offset, SDValue &SLC) const;
bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
@@ -164,6 +230,7 @@ private:
bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+ bool SelectVOP3Mods_f32(SDValue In, SDValue &Src, SDValue &SrcMods) const;
bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods) const;
bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
bool SelectVOP3NoMods(SDValue In, SDValue &Src) const;
@@ -193,11 +260,13 @@ private:
bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, unsigned &Mods) const;
bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
- bool SelectHi16Elt(SDValue In, SDValue &Src) const;
+ SDValue getHi16Elt(SDValue In) const;
void SelectADD_SUB_I64(SDNode *N);
+ void SelectAddcSubb(SDNode *N);
void SelectUADDO_USUBO(SDNode *N);
void SelectDIV_SCALE(SDNode *N);
+ void SelectDIV_FMAS(SDNode *N);
void SelectMAD_64_32(SDNode *N);
void SelectFMA_W_CHAIN(SDNode *N);
void SelectFMUL_W_CHAIN(SDNode *N);
@@ -210,6 +279,10 @@ private:
void SelectBRCOND(SDNode *N);
void SelectFMAD_FMA(SDNode *N);
void SelectATOMIC_CMP_SWAP(SDNode *N);
+ void SelectDSAppendConsume(SDNode *N, unsigned IntrID);
+ void SelectDS_GWS(SDNode *N, unsigned IntrID);
+ void SelectINTRINSIC_W_CHAIN(SDNode *N);
+ void SelectINTRINSIC_VOID(SDNode *N);
protected:
// Include the pieces autogenerated from the target description.
@@ -235,11 +308,49 @@ public:
SDValue &Offset) override;
bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void PreprocessISelDAG() override {}
+
protected:
// Include the pieces autogenerated from the target description.
#include "R600GenDAGISel.inc"
};
+static SDValue stripBitcast(SDValue Val) {
+ return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val;
+}
+
+// Figure out if this is really an extract of the high 16-bits of a dword.
+static bool isExtractHiElt(SDValue In, SDValue &Out) {
+ In = stripBitcast(In);
+ if (In.getOpcode() != ISD::TRUNCATE)
+ return false;
+
+ SDValue Srl = In.getOperand(0);
+ if (Srl.getOpcode() == ISD::SRL) {
+ if (ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(Srl.getOperand(1))) {
+ if (ShiftAmt->getZExtValue() == 16) {
+ Out = stripBitcast(Srl.getOperand(0));
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+// Look through operations that obscure just looking at the low 16-bits of the
+// same register.
+static SDValue stripExtractLoElt(SDValue In) {
+ if (In.getOpcode() == ISD::TRUNCATE) {
+ SDValue Src = In.getOperand(0);
+ if (Src.getValueType().getSizeInBits() == 32)
+ return stripBitcast(Src);
+ }
+
+ return In;
+}
+
} // end anonymous namespace
INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "amdgpu-isel",
@@ -247,6 +358,10 @@ INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "amdgpu-isel",
INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfo)
INITIALIZE_PASS_DEPENDENCY(AMDGPUPerfHintAnalysis)
INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
+#ifdef EXPENSIVE_CHECKS
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+#endif
INITIALIZE_PASS_END(AMDGPUDAGToDAGISel, "amdgpu-isel",
"AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
@@ -265,10 +380,125 @@ FunctionPass *llvm::createR600ISelDag(TargetMachine *TM,
}
bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
+#ifdef EXPENSIVE_CHECKS
+ DominatorTree & DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ LoopInfo * LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ for (auto &L : LI->getLoopsInPreorder()) {
+ assert(L->isLCSSAForm(DT));
+ }
+#endif
Subtarget = &MF.getSubtarget<GCNSubtarget>();
return SelectionDAGISel::runOnMachineFunction(MF);
}
+bool AMDGPUDAGToDAGISel::matchLoadD16FromBuildVector(SDNode *N) const {
+ assert(Subtarget->d16PreservesUnusedBits());
+ MVT VT = N->getValueType(0).getSimpleVT();
+ if (VT != MVT::v2i16 && VT != MVT::v2f16)
+ return false;
+
+ SDValue Lo = N->getOperand(0);
+ SDValue Hi = N->getOperand(1);
+
+ LoadSDNode *LdHi = dyn_cast<LoadSDNode>(stripBitcast(Hi));
+
+ // build_vector lo, (load ptr) -> load_d16_hi ptr, lo
+ // build_vector lo, (zextload ptr from i8) -> load_d16_hi_u8 ptr, lo
+ // build_vector lo, (sextload ptr from i8) -> load_d16_hi_i8 ptr, lo
+
+ // Need to check for possible indirect dependencies on the other half of the
+ // vector to avoid introducing a cycle.
+ if (LdHi && Hi.hasOneUse() && !LdHi->isPredecessorOf(Lo.getNode())) {
+ SDVTList VTList = CurDAG->getVTList(VT, MVT::Other);
+
+ SDValue TiedIn = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Lo);
+ SDValue Ops[] = {
+ LdHi->getChain(), LdHi->getBasePtr(), TiedIn
+ };
+
+ unsigned LoadOp = AMDGPUISD::LOAD_D16_HI;
+ if (LdHi->getMemoryVT() == MVT::i8) {
+ LoadOp = LdHi->getExtensionType() == ISD::SEXTLOAD ?
+ AMDGPUISD::LOAD_D16_HI_I8 : AMDGPUISD::LOAD_D16_HI_U8;
+ } else {
+ assert(LdHi->getMemoryVT() == MVT::i16);
+ }
+
+ SDValue NewLoadHi =
+ CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdHi), VTList,
+ Ops, LdHi->getMemoryVT(),
+ LdHi->getMemOperand());
+
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadHi);
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdHi, 1), NewLoadHi.getValue(1));
+ return true;
+ }
+
+ // build_vector (load ptr), hi -> load_d16_lo ptr, hi
+ // build_vector (zextload ptr from i8), hi -> load_d16_lo_u8 ptr, hi
+ // build_vector (sextload ptr from i8), hi -> load_d16_lo_i8 ptr, hi
+ LoadSDNode *LdLo = dyn_cast<LoadSDNode>(stripBitcast(Lo));
+ if (LdLo && Lo.hasOneUse()) {
+ SDValue TiedIn = getHi16Elt(Hi);
+ if (!TiedIn || LdLo->isPredecessorOf(TiedIn.getNode()))
+ return false;
+
+ SDVTList VTList = CurDAG->getVTList(VT, MVT::Other);
+ unsigned LoadOp = AMDGPUISD::LOAD_D16_LO;
+ if (LdLo->getMemoryVT() == MVT::i8) {
+ LoadOp = LdLo->getExtensionType() == ISD::SEXTLOAD ?
+ AMDGPUISD::LOAD_D16_LO_I8 : AMDGPUISD::LOAD_D16_LO_U8;
+ } else {
+ assert(LdLo->getMemoryVT() == MVT::i16);
+ }
+
+ TiedIn = CurDAG->getNode(ISD::BITCAST, SDLoc(N), VT, TiedIn);
+
+ SDValue Ops[] = {
+ LdLo->getChain(), LdLo->getBasePtr(), TiedIn
+ };
+
+ SDValue NewLoadLo =
+ CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdLo), VTList,
+ Ops, LdLo->getMemoryVT(),
+ LdLo->getMemOperand());
+
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadLo);
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdLo, 1), NewLoadLo.getValue(1));
+ return true;
+ }
+
+ return false;
+}
+
+void AMDGPUDAGToDAGISel::PreprocessISelDAG() {
+ if (!Subtarget->d16PreservesUnusedBits())
+ return;
+
+ SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
+
+ bool MadeChange = false;
+ while (Position != CurDAG->allnodes_begin()) {
+ SDNode *N = &*--Position;
+ if (N->use_empty())
+ continue;
+
+ switch (N->getOpcode()) {
+ case ISD::BUILD_VECTOR:
+ MadeChange |= matchLoadD16FromBuildVector(N);
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (MadeChange) {
+ CurDAG->RemoveDeadNodes();
+ LLVM_DEBUG(dbgs() << "After PreProcess:\n";
+ CurDAG->dump(););
+ }
+}
+
bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const {
if (TM.Options.NoNaNsFPMath)
return true;
@@ -280,14 +510,26 @@ bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const {
return CurDAG->isKnownNeverNaN(N);
}
-bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const {
+bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N,
+ bool Negated) const {
+ if (N->isUndef())
+ return true;
+
const SIInstrInfo *TII = Subtarget->getInstrInfo();
+ if (Negated) {
+ if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
+ return TII->isInlineConstant(-C->getAPIntValue());
+
+ if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
+ return TII->isInlineConstant(-C->getValueAPF().bitcastToAPInt());
- if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
- return TII->isInlineConstant(C->getAPIntValue());
+ } else {
+ if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
+ return TII->isInlineConstant(C->getAPIntValue());
- if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
- return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt());
+ if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
+ return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt());
+ }
return false;
}
@@ -340,37 +582,48 @@ const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
}
}
-SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const {
- if (cast<MemSDNode>(N)->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS ||
- !Subtarget->ldsRequiresM0Init())
- return N;
-
+SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N, SDValue Val) const {
const SITargetLowering& Lowering =
- *static_cast<const SITargetLowering*>(getTargetLowering());
+ *static_cast<const SITargetLowering*>(getTargetLowering());
- // Write max value to m0 before each load operation
+ assert(N->getOperand(0).getValueType() == MVT::Other && "Expected chain");
- SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N),
- CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
+ SDValue M0 = Lowering.copyToM0(*CurDAG, N->getOperand(0), SDLoc(N),
+ Val);
SDValue Glue = M0.getValue(1);
SmallVector <SDValue, 8> Ops;
- for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
- Ops.push_back(N->getOperand(i));
- }
+ Ops.push_back(M0); // Replace the chain.
+ for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i)
+ Ops.push_back(N->getOperand(i));
+
Ops.push_back(Glue);
return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
}
+SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(SDNode *N) const {
+ unsigned AS = cast<MemSDNode>(N)->getAddressSpace();
+ if (AS == AMDGPUAS::LOCAL_ADDRESS) {
+ if (Subtarget->ldsRequiresM0Init())
+ return glueCopyToM0(N, CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
+ } else if (AS == AMDGPUAS::REGION_ADDRESS) {
+ MachineFunction &MF = CurDAG->getMachineFunction();
+ unsigned Value = MF.getInfo<SIMachineFunctionInfo>()->getGDSSize();
+ return
+ glueCopyToM0(N, CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i32));
+ }
+ return N;
+}
+
MachineSDNode *AMDGPUDAGToDAGISel::buildSMovImm64(SDLoc &DL, uint64_t Imm,
EVT VT) const {
SDNode *Lo = CurDAG->getMachineNode(
AMDGPU::S_MOV_B32, DL, MVT::i32,
- CurDAG->getConstant(Imm & 0xFFFFFFFF, DL, MVT::i32));
+ CurDAG->getTargetConstant(Imm & 0xFFFFFFFF, DL, MVT::i32));
SDNode *Hi =
CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
- CurDAG->getConstant(Imm >> 32, DL, MVT::i32));
+ CurDAG->getTargetConstant(Imm >> 32, DL, MVT::i32));
const SDValue Ops[] = {
CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
@@ -385,31 +638,23 @@ static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) {
return AMDGPU::SReg_32_XM0RegClassID;
case 2:
return AMDGPU::SReg_64RegClassID;
+ case 3:
+ return AMDGPU::SGPR_96RegClassID;
case 4:
return AMDGPU::SReg_128RegClassID;
+ case 5:
+ return AMDGPU::SGPR_160RegClassID;
case 8:
return AMDGPU::SReg_256RegClassID;
case 16:
return AMDGPU::SReg_512RegClassID;
+ case 32:
+ return AMDGPU::SReg_1024RegClassID;
}
llvm_unreachable("invalid vector size");
}
-static bool getConstantValue(SDValue N, uint32_t &Out) {
- if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
- Out = C->getAPIntValue().getZExtValue();
- return true;
- }
-
- if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) {
- Out = C->getValueAPF().bitcastToAPInt().getZExtValue();
- return true;
- }
-
- return false;
-}
-
void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
EVT VT = N->getValueType(0);
unsigned NumVectorElts = VT.getVectorNumElements();
@@ -423,12 +668,12 @@ void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
return;
}
- assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not "
+ assert(NumVectorElts <= 32 && "Vectors with more than 32 elements not "
"supported yet");
- // 16 = Max Num Vector Elements
+ // 32 = Max Num Vector Elements
// 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
// 1 = Vector Register Class
- SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
+ SmallVector<SDValue, 32 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
bool IsRegSeq = true;
@@ -470,10 +715,10 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
if (isa<AtomicSDNode>(N) ||
(Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC ||
- Opc == AMDGPUISD::ATOMIC_LOAD_FADD ||
+ Opc == ISD::ATOMIC_LOAD_FADD ||
Opc == AMDGPUISD::ATOMIC_LOAD_FMIN ||
Opc == AMDGPUISD::ATOMIC_LOAD_FMAX))
- N = glueCopyToM0(N);
+ N = glueCopyToM0LDSInit(N);
switch (Opc) {
default:
@@ -491,6 +736,13 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
SelectADD_SUB_I64(N);
return;
}
+ case ISD::ADDCARRY:
+ case ISD::SUBCARRY:
+ if (N->getValueType(0) != MVT::i32)
+ break;
+
+ SelectAddcSubb(N);
+ return;
case ISD::UADDO:
case ISD::USUBO: {
SelectUADDO_USUBO(N);
@@ -511,12 +763,8 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
unsigned NumVectorElts = VT.getVectorNumElements();
if (VT.getScalarSizeInBits() == 16) {
if (Opc == ISD::BUILD_VECTOR && NumVectorElts == 2) {
- uint32_t LHSVal, RHSVal;
- if (getConstantValue(N->getOperand(0), LHSVal) &&
- getConstantValue(N->getOperand(1), RHSVal)) {
- uint32_t K = LHSVal | (RHSVal << 16);
- CurDAG->SelectNodeTo(N, AMDGPU::S_MOV_B32, VT,
- CurDAG->getTargetConstant(K, SDLoc(N), MVT::i32));
+ if (SDNode *Packed = packConstantV2I16(N, *CurDAG)) {
+ ReplaceNode(N, Packed);
return;
}
}
@@ -571,7 +819,7 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
case ISD::STORE:
case ISD::ATOMIC_LOAD:
case ISD::ATOMIC_STORE: {
- N = glueCopyToM0(N);
+ N = glueCopyToM0LDSInit(N);
break;
}
@@ -606,6 +854,10 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
SelectDIV_SCALE(N);
return;
}
+ case AMDGPUISD::DIV_FMAS: {
+ SelectDIV_FMAS(N);
+ return;
+ }
case AMDGPUISD::MAD_I64_I32:
case AMDGPUISD::MAD_U64_U32: {
SelectMAD_64_32(N);
@@ -649,6 +901,16 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
SelectCode(N);
return;
}
+
+ break;
+ }
+ case ISD::INTRINSIC_W_CHAIN: {
+ SelectINTRINSIC_W_CHAIN(N);
+ return;
+ }
+ case ISD::INTRINSIC_VOID: {
+ SelectINTRINSIC_VOID(N);
+ return;
}
}
@@ -763,6 +1025,19 @@ void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
ReplaceNode(N, RegSequence);
}
+void AMDGPUDAGToDAGISel::SelectAddcSubb(SDNode *N) {
+ SDLoc DL(N);
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ SDValue CI = N->getOperand(2);
+
+ unsigned Opc = N->getOpcode() == ISD::ADDCARRY ? AMDGPU::V_ADDC_U32_e64
+ : AMDGPU::V_SUBB_U32_e64;
+ CurDAG->SelectNodeTo(
+ N, Opc, N->getVTList(),
+ {LHS, RHS, CI, CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/});
+}
+
void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) {
// The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned
// carry out despite the _i32 name. These were renamed in VI to _U32.
@@ -770,8 +1045,10 @@ void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) {
unsigned Opc = N->getOpcode() == ISD::UADDO ?
AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64;
- CurDAG->SelectNodeTo(N, Opc, N->getVTList(),
- { N->getOperand(0), N->getOperand(1) });
+ CurDAG->SelectNodeTo(
+ N, Opc, N->getVTList(),
+ {N->getOperand(0), N->getOperand(1),
+ CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/});
}
void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) {
@@ -816,6 +1093,35 @@ void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
}
+void AMDGPUDAGToDAGISel::SelectDIV_FMAS(SDNode *N) {
+ const GCNSubtarget *ST = static_cast<const GCNSubtarget *>(Subtarget);
+ const SIRegisterInfo *TRI = ST->getRegisterInfo();
+
+ SDLoc SL(N);
+ EVT VT = N->getValueType(0);
+
+ assert(VT == MVT::f32 || VT == MVT::f64);
+
+ unsigned Opc
+ = (VT == MVT::f64) ? AMDGPU::V_DIV_FMAS_F64 : AMDGPU::V_DIV_FMAS_F32;
+
+ SDValue CarryIn = N->getOperand(3);
+ // V_DIV_FMAS implicitly reads VCC.
+ SDValue VCC = CurDAG->getCopyToReg(CurDAG->getEntryNode(), SL,
+ TRI->getVCC(), CarryIn, SDValue());
+
+ SDValue Ops[10];
+
+ SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
+ SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]);
+ SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]);
+
+ Ops[8] = VCC;
+ Ops[9] = VCC.getValue(1);
+
+ CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
+}
+
// We need to handle this here because tablegen doesn't support matching
// instructions with multiple outputs.
void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) {
@@ -829,13 +1135,13 @@ void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) {
CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
}
-bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset,
+bool AMDGPUDAGToDAGISel::isDSOffsetLegal(SDValue Base, unsigned Offset,
unsigned OffsetBits) const {
if ((OffsetBits == 16 && !isUInt<16>(Offset)) ||
(OffsetBits == 8 && !isUInt<8>(Offset)))
return false;
- if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS ||
+ if (Subtarget->hasUsableDSOffset() ||
Subtarget->unsafeDSOffsetFoldingEnabled())
return true;
@@ -871,13 +1177,20 @@ bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
Zero, Addr.getOperand(1));
if (isDSOffsetLegal(Sub, ByteOffset, 16)) {
+ SmallVector<SDValue, 3> Opnds;
+ Opnds.push_back(Zero);
+ Opnds.push_back(Addr.getOperand(1));
+
// FIXME: Select to VOP3 version for with-carry.
- unsigned SubOp = Subtarget->hasAddNoCarry() ?
- AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_I32_e32;
+ unsigned SubOp = AMDGPU::V_SUB_I32_e32;
+ if (Subtarget->hasAddNoCarry()) {
+ SubOp = AMDGPU::V_SUB_U32_e64;
+ Opnds.push_back(
+ CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit
+ }
- MachineSDNode *MachineSub
- = CurDAG->getMachineNode(SubOp, DL, MVT::i32,
- Zero, Addr.getOperand(1));
+ MachineSDNode *MachineSub =
+ CurDAG->getMachineNode(SubOp, DL, MVT::i32, Opnds);
Base = SDValue(MachineSub, 0);
Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16);
@@ -945,12 +1258,18 @@ bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
Zero, Addr.getOperand(1));
if (isDSOffsetLegal(Sub, DWordOffset1, 8)) {
- unsigned SubOp = Subtarget->hasAddNoCarry() ?
- AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_I32_e32;
+ SmallVector<SDValue, 3> Opnds;
+ Opnds.push_back(Zero);
+ Opnds.push_back(Addr.getOperand(1));
+ unsigned SubOp = AMDGPU::V_SUB_I32_e32;
+ if (Subtarget->hasAddNoCarry()) {
+ SubOp = AMDGPU::V_SUB_U32_e64;
+ Opnds.push_back(
+ CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit
+ }
MachineSDNode *MachineSub
- = CurDAG->getMachineNode(SubOp, DL, MVT::i32,
- Zero, Addr.getOperand(1));
+ = CurDAG->getMachineNode(SubOp, DL, MVT::i32, Opnds);
Base = SDValue(MachineSub, 0);
Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
@@ -989,7 +1308,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
SDValue &Offset, SDValue &Offen,
SDValue &Idxen, SDValue &Addr64,
SDValue &GLC, SDValue &SLC,
- SDValue &TFE) const {
+ SDValue &TFE, SDValue &DLC) const {
// Subtarget prefers to use flat instruction
if (Subtarget->useFlatForGlobal())
return false;
@@ -1001,6 +1320,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
if (!SLC.getNode())
SLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
TFE = CurDAG->getTargetConstant(0, DL, MVT::i1);
+ DLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
@@ -1079,15 +1399,16 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
SDValue &VAddr, SDValue &SOffset,
SDValue &Offset, SDValue &GLC,
- SDValue &SLC, SDValue &TFE) const {
+ SDValue &SLC, SDValue &TFE,
+ SDValue &DLC) const {
SDValue Ptr, Offen, Idxen, Addr64;
// addr64 bit was removed for volcanic islands.
- if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
+ if (!Subtarget->hasAddr64())
return false;
if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
- GLC, SLC, TFE))
+ GLC, SLC, TFE, DLC))
return false;
ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
@@ -1109,9 +1430,9 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
SDValue &Offset,
SDValue &SLC) const {
SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1);
- SDValue GLC, TFE;
+ SDValue GLC, TFE, DLC;
- return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE);
+ return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE, DLC);
}
static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) {
@@ -1127,10 +1448,10 @@ std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const
SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(),
FI->getValueType(0));
- // If we can resolve this to a frame index access, this is relative to the
- // frame pointer SGPR.
- return std::make_pair(TFI, CurDAG->getRegister(Info->getFrameOffsetReg(),
- MVT::i32));
+ // If we can resolve this to a frame index access, this will be relative to
+ // either the stack or frame pointer SGPR.
+ return std::make_pair(
+ TFI, CurDAG->getRegister(Info->getStackPtrOffsetReg(), MVT::i32));
}
// If we don't know this private access is a local stack object, it needs to
@@ -1236,13 +1557,13 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,
bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
SDValue &SOffset, SDValue &Offset,
SDValue &GLC, SDValue &SLC,
- SDValue &TFE) const {
+ SDValue &TFE, SDValue &DLC) const {
SDValue Ptr, VAddr, Offen, Idxen, Addr64;
const SIInstrInfo *TII =
static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
- GLC, SLC, TFE))
+ GLC, SLC, TFE, DLC))
return false;
if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
@@ -1264,57 +1585,42 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
SDValue &Soffset, SDValue &Offset
) const {
- SDValue GLC, SLC, TFE;
+ SDValue GLC, SLC, TFE, DLC;
- return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
+ return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC);
}
bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
SDValue &Soffset, SDValue &Offset,
SDValue &SLC) const {
- SDValue GLC, TFE;
+ SDValue GLC, TFE, DLC;
- return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
+ return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC);
}
template <bool IsSigned>
-bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDValue Addr,
+bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDNode *N,
+ SDValue Addr,
SDValue &VAddr,
SDValue &Offset,
SDValue &SLC) const {
- int64_t OffsetVal = 0;
-
- if (Subtarget->hasFlatInstOffsets() &&
- CurDAG->isBaseWithConstantOffset(Addr)) {
- SDValue N0 = Addr.getOperand(0);
- SDValue N1 = Addr.getOperand(1);
- int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
-
- if ((IsSigned && isInt<13>(COffsetVal)) ||
- (!IsSigned && isUInt<12>(COffsetVal))) {
- Addr = N0;
- OffsetVal = COffsetVal;
- }
- }
-
- VAddr = Addr;
- Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16);
- SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1);
-
- return true;
+ return static_cast<const SITargetLowering*>(getTargetLowering())->
+ SelectFlatOffset(IsSigned, *CurDAG, N, Addr, VAddr, Offset, SLC);
}
-bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDValue Addr,
+bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDNode *N,
+ SDValue Addr,
SDValue &VAddr,
SDValue &Offset,
SDValue &SLC) const {
- return SelectFlatOffset<false>(Addr, VAddr, Offset, SLC);
+ return SelectFlatOffset<false>(N, Addr, VAddr, Offset, SLC);
}
-bool AMDGPUDAGToDAGISel::SelectFlatAtomicSigned(SDValue Addr,
+bool AMDGPUDAGToDAGISel::SelectFlatAtomicSigned(SDNode *N,
+ SDValue Addr,
SDValue &VAddr,
SDValue &Offset,
SDValue &SLC) const {
- return SelectFlatOffset<true>(Addr, VAddr, Offset, SLC);
+ return SelectFlatOffset<true>(N, Addr, VAddr, Offset, SLC);
}
bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
@@ -1619,9 +1925,12 @@ void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
return;
}
+ const GCNSubtarget *ST = static_cast<const GCNSubtarget *>(Subtarget);
+ const SIRegisterInfo *TRI = ST->getRegisterInfo();
+
bool UseSCCBr = isCBranchSCC(N) && isUniformBr(N);
unsigned BrOp = UseSCCBr ? AMDGPU::S_CBRANCH_SCC1 : AMDGPU::S_CBRANCH_VCCNZ;
- unsigned CondReg = UseSCCBr ? AMDGPU::SCC : AMDGPU::VCC;
+ unsigned CondReg = UseSCCBr ? (unsigned)AMDGPU::SCC : TRI->getVCC();
SDLoc SL(N);
if (!UseSCCBr) {
@@ -1638,9 +1947,13 @@ void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
// the S_AND when is unnecessary. But it would be better to add a separate
// pass after SIFixSGPRCopies to do the unnecessary S_AND removal, so it
// catches both cases.
- Cond = SDValue(CurDAG->getMachineNode(AMDGPU::S_AND_B64, SL, MVT::i1,
- CurDAG->getRegister(AMDGPU::EXEC, MVT::i1),
- Cond),
+ Cond = SDValue(CurDAG->getMachineNode(ST->isWave32() ? AMDGPU::S_AND_B32
+ : AMDGPU::S_AND_B64,
+ SL, MVT::i1,
+ CurDAG->getRegister(ST->isWave32() ? AMDGPU::EXEC_LO
+ : AMDGPU::EXEC,
+ MVT::i1),
+ Cond),
0);
}
@@ -1761,6 +2074,183 @@ void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
CurDAG->RemoveDeadNode(N);
}
+void AMDGPUDAGToDAGISel::SelectDSAppendConsume(SDNode *N, unsigned IntrID) {
+ // The address is assumed to be uniform, so if it ends up in a VGPR, it will
+ // be copied to an SGPR with readfirstlane.
+ unsigned Opc = IntrID == Intrinsic::amdgcn_ds_append ?
+ AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
+
+ SDValue Chain = N->getOperand(0);
+ SDValue Ptr = N->getOperand(2);
+ MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
+ MachineMemOperand *MMO = M->getMemOperand();
+ bool IsGDS = M->getAddressSpace() == AMDGPUAS::REGION_ADDRESS;
+
+ SDValue Offset;
+ if (CurDAG->isBaseWithConstantOffset(Ptr)) {
+ SDValue PtrBase = Ptr.getOperand(0);
+ SDValue PtrOffset = Ptr.getOperand(1);
+
+ const APInt &OffsetVal = cast<ConstantSDNode>(PtrOffset)->getAPIntValue();
+ if (isDSOffsetLegal(PtrBase, OffsetVal.getZExtValue(), 16)) {
+ N = glueCopyToM0(N, PtrBase);
+ Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i32);
+ }
+ }
+
+ if (!Offset) {
+ N = glueCopyToM0(N, Ptr);
+ Offset = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
+ }
+
+ SDValue Ops[] = {
+ Offset,
+ CurDAG->getTargetConstant(IsGDS, SDLoc(), MVT::i32),
+ Chain,
+ N->getOperand(N->getNumOperands() - 1) // New glue
+ };
+
+ SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
+}
+
+static unsigned gwsIntrinToOpcode(unsigned IntrID) {
+ switch (IntrID) {
+ case Intrinsic::amdgcn_ds_gws_init:
+ return AMDGPU::DS_GWS_INIT;
+ case Intrinsic::amdgcn_ds_gws_barrier:
+ return AMDGPU::DS_GWS_BARRIER;
+ case Intrinsic::amdgcn_ds_gws_sema_v:
+ return AMDGPU::DS_GWS_SEMA_V;
+ case Intrinsic::amdgcn_ds_gws_sema_br:
+ return AMDGPU::DS_GWS_SEMA_BR;
+ case Intrinsic::amdgcn_ds_gws_sema_p:
+ return AMDGPU::DS_GWS_SEMA_P;
+ case Intrinsic::amdgcn_ds_gws_sema_release_all:
+ return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
+ default:
+ llvm_unreachable("not a gws intrinsic");
+ }
+}
+
+void AMDGPUDAGToDAGISel::SelectDS_GWS(SDNode *N, unsigned IntrID) {
+ if (IntrID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
+ !Subtarget->hasGWSSemaReleaseAll()) {
+ // Let this error.
+ SelectCode(N);
+ return;
+ }
+
+ // Chain, intrinsic ID, vsrc, offset
+ const bool HasVSrc = N->getNumOperands() == 4;
+ assert(HasVSrc || N->getNumOperands() == 3);
+
+ SDLoc SL(N);
+ SDValue BaseOffset = N->getOperand(HasVSrc ? 3 : 2);
+ int ImmOffset = 0;
+ MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
+ MachineMemOperand *MMO = M->getMemOperand();
+
+ // Don't worry if the offset ends up in a VGPR. Only one lane will have
+ // effect, so SIFixSGPRCopies will validly insert readfirstlane.
+
+ // The resource id offset is computed as (<isa opaque base> + M0[21:16] +
+ // offset field) % 64. Some versions of the programming guide omit the m0
+ // part, or claim it's from offset 0.
+ if (ConstantSDNode *ConstOffset = dyn_cast<ConstantSDNode>(BaseOffset)) {
+ // If we have a constant offset, try to use the default value for m0 as a
+ // base to possibly avoid setting it up.
+ glueCopyToM0(N, CurDAG->getTargetConstant(-1, SL, MVT::i32));
+ ImmOffset = ConstOffset->getZExtValue() + 1;
+ } else {
+ if (CurDAG->isBaseWithConstantOffset(BaseOffset)) {
+ ImmOffset = BaseOffset.getConstantOperandVal(1);
+ BaseOffset = BaseOffset.getOperand(0);
+ }
+
+ // Prefer to do the shift in an SGPR since it should be possible to use m0
+ // as the result directly. If it's already an SGPR, it will be eliminated
+ // later.
+ SDNode *SGPROffset
+ = CurDAG->getMachineNode(AMDGPU::V_READFIRSTLANE_B32, SL, MVT::i32,
+ BaseOffset);
+ // Shift to offset in m0
+ SDNode *M0Base
+ = CurDAG->getMachineNode(AMDGPU::S_LSHL_B32, SL, MVT::i32,
+ SDValue(SGPROffset, 0),
+ CurDAG->getTargetConstant(16, SL, MVT::i32));
+ glueCopyToM0(N, SDValue(M0Base, 0));
+ }
+
+ SDValue V0;
+ SDValue Chain = N->getOperand(0);
+ SDValue Glue;
+ if (HasVSrc) {
+ SDValue VSrc0 = N->getOperand(2);
+
+ // The manual doesn't mention this, but it seems only v0 works.
+ V0 = CurDAG->getRegister(AMDGPU::VGPR0, MVT::i32);
+
+ SDValue CopyToV0 = CurDAG->getCopyToReg(
+ N->getOperand(0), SL, V0, VSrc0,
+ N->getOperand(N->getNumOperands() - 1));
+ Chain = CopyToV0;
+ Glue = CopyToV0.getValue(1);
+ }
+
+ SDValue OffsetField = CurDAG->getTargetConstant(ImmOffset, SL, MVT::i32);
+
+ // TODO: Can this just be removed from the instruction?
+ SDValue GDS = CurDAG->getTargetConstant(1, SL, MVT::i1);
+
+ const unsigned Opc = gwsIntrinToOpcode(IntrID);
+ SmallVector<SDValue, 5> Ops;
+ if (HasVSrc)
+ Ops.push_back(V0);
+ Ops.push_back(OffsetField);
+ Ops.push_back(GDS);
+ Ops.push_back(Chain);
+
+ if (HasVSrc)
+ Ops.push_back(Glue);
+
+ SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
+}
+
+void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) {
+ unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ switch (IntrID) {
+ case Intrinsic::amdgcn_ds_append:
+ case Intrinsic::amdgcn_ds_consume: {
+ if (N->getValueType(0) != MVT::i32)
+ break;
+ SelectDSAppendConsume(N, IntrID);
+ return;
+ }
+ }
+
+ SelectCode(N);
+}
+
+void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(SDNode *N) {
+ unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ switch (IntrID) {
+ case Intrinsic::amdgcn_ds_gws_init:
+ case Intrinsic::amdgcn_ds_gws_barrier:
+ case Intrinsic::amdgcn_ds_gws_sema_v:
+ case Intrinsic::amdgcn_ds_gws_sema_br:
+ case Intrinsic::amdgcn_ds_gws_sema_p:
+ case Intrinsic::amdgcn_ds_gws_sema_release_all:
+ SelectDS_GWS(N, IntrID);
+ return;
+ default:
+ break;
+ }
+
+ SelectCode(N);
+}
+
bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src,
unsigned &Mods) const {
Mods = 0;
@@ -1796,6 +2286,15 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src,
return isNoNanSrc(Src);
}
+bool AMDGPUDAGToDAGISel::SelectVOP3Mods_f32(SDValue In, SDValue &Src,
+ SDValue &SrcMods) const {
+ if (In.getValueType() == MVT::f32)
+ return SelectVOP3Mods(In, Src, SrcMods);
+ Src = In;
+ SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);;
+ return true;
+}
+
bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const {
if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG)
return false;
@@ -1833,41 +2332,6 @@ bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src,
return true;
}
-static SDValue stripBitcast(SDValue Val) {
- return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val;
-}
-
-// Figure out if this is really an extract of the high 16-bits of a dword.
-static bool isExtractHiElt(SDValue In, SDValue &Out) {
- In = stripBitcast(In);
- if (In.getOpcode() != ISD::TRUNCATE)
- return false;
-
- SDValue Srl = In.getOperand(0);
- if (Srl.getOpcode() == ISD::SRL) {
- if (ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(Srl.getOperand(1))) {
- if (ShiftAmt->getZExtValue() == 16) {
- Out = stripBitcast(Srl.getOperand(0));
- return true;
- }
- }
- }
-
- return false;
-}
-
-// Look through operations that obscure just looking at the low 16-bits of the
-// same register.
-static SDValue stripExtractLoElt(SDValue In) {
- if (In.getOpcode() == ISD::TRUNCATE) {
- SDValue Src = In.getOperand(0);
- if (Src.getValueType().getSizeInBits() == 32)
- return stripBitcast(Src);
- }
-
- return In;
-}
-
bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
SDValue &SrcMods) const {
unsigned Mods = 0;
@@ -2020,39 +2484,31 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(SDValue In, SDValue &Src,
return true;
}
-// TODO: Can we identify things like v_mad_mixhi_f16?
-bool AMDGPUDAGToDAGISel::SelectHi16Elt(SDValue In, SDValue &Src) const {
- if (In.isUndef()) {
- Src = In;
- return true;
- }
+SDValue AMDGPUDAGToDAGISel::getHi16Elt(SDValue In) const {
+ if (In.isUndef())
+ return CurDAG->getUNDEF(MVT::i32);
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(In)) {
SDLoc SL(In);
- SDValue K = CurDAG->getTargetConstant(C->getZExtValue() << 16, SL, MVT::i32);
- MachineSDNode *MovK = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
- SL, MVT::i32, K);
- Src = SDValue(MovK, 0);
- return true;
+ return CurDAG->getConstant(C->getZExtValue() << 16, SL, MVT::i32);
}
if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(In)) {
SDLoc SL(In);
- SDValue K = CurDAG->getTargetConstant(
+ return CurDAG->getConstant(
C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32);
- MachineSDNode *MovK = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
- SL, MVT::i32, K);
- Src = SDValue(MovK, 0);
- return true;
}
- return isExtractHiElt(In, Src);
+ SDValue Src;
+ if (isExtractHiElt(In, Src))
+ return Src;
+
+ return SDValue();
}
bool AMDGPUDAGToDAGISel::isVGPRImm(const SDNode * N) const {
- if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) {
- return false;
- }
+ assert(CurDAG->getTarget().getTargetTriple().getArch() == Triple::amdgcn);
+
const SIRegisterInfo *SIRI =
static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
const SIInstrInfo * SII =
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 6951c915b177..39016ed37193 100644
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -1,9 +1,8 @@
//===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -21,7 +20,6 @@
#include "AMDGPU.h"
#include "AMDGPUCallLowering.h"
#include "AMDGPUFrameLowering.h"
-#include "AMDGPUIntrinsicInfo.h"
#include "AMDGPURegisterInfo.h"
#include "AMDGPUSubtarget.h"
#include "AMDGPUTargetMachine.h"
@@ -65,9 +63,9 @@ static bool allocateSGPRTuple(unsigned ValNo, MVT ValVT, MVT LocVT,
case MVT::v2f32:
case MVT::v4i16:
case MVT::v4f16: {
- // Up to SGPR0-SGPR39
+ // Up to SGPR0-SGPR105
return allocateCCRegs(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State,
- &AMDGPU::SGPR_64RegClass, 20);
+ &AMDGPU::SGPR_64RegClass, 53);
}
default:
return false;
@@ -152,15 +150,24 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::LOAD, MVT::v2f32, Promote);
AddPromotedToType(ISD::LOAD, MVT::v2f32, MVT::v2i32);
+ setOperationAction(ISD::LOAD, MVT::v3f32, Promote);
+ AddPromotedToType(ISD::LOAD, MVT::v3f32, MVT::v3i32);
+
setOperationAction(ISD::LOAD, MVT::v4f32, Promote);
AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32);
+ setOperationAction(ISD::LOAD, MVT::v5f32, Promote);
+ AddPromotedToType(ISD::LOAD, MVT::v5f32, MVT::v5i32);
+
setOperationAction(ISD::LOAD, MVT::v8f32, Promote);
AddPromotedToType(ISD::LOAD, MVT::v8f32, MVT::v8i32);
setOperationAction(ISD::LOAD, MVT::v16f32, Promote);
AddPromotedToType(ISD::LOAD, MVT::v16f32, MVT::v16i32);
+ setOperationAction(ISD::LOAD, MVT::v32f32, Promote);
+ AddPromotedToType(ISD::LOAD, MVT::v32f32, MVT::v32i32);
+
setOperationAction(ISD::LOAD, MVT::i64, Promote);
AddPromotedToType(ISD::LOAD, MVT::i64, MVT::v2i32);
@@ -237,15 +244,24 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::STORE, MVT::v2f32, Promote);
AddPromotedToType(ISD::STORE, MVT::v2f32, MVT::v2i32);
+ setOperationAction(ISD::STORE, MVT::v3f32, Promote);
+ AddPromotedToType(ISD::STORE, MVT::v3f32, MVT::v3i32);
+
setOperationAction(ISD::STORE, MVT::v4f32, Promote);
AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32);
+ setOperationAction(ISD::STORE, MVT::v5f32, Promote);
+ AddPromotedToType(ISD::STORE, MVT::v5f32, MVT::v5i32);
+
setOperationAction(ISD::STORE, MVT::v8f32, Promote);
AddPromotedToType(ISD::STORE, MVT::v8f32, MVT::v8i32);
setOperationAction(ISD::STORE, MVT::v16f32, Promote);
AddPromotedToType(ISD::STORE, MVT::v16f32, MVT::v16i32);
+ setOperationAction(ISD::STORE, MVT::v32f32, Promote);
+ AddPromotedToType(ISD::STORE, MVT::v32f32, MVT::v32i32);
+
setOperationAction(ISD::STORE, MVT::i64, Promote);
AddPromotedToType(ISD::STORE, MVT::i64, MVT::v2i32);
@@ -327,16 +343,28 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
// Expand to fneg + fadd.
setOperationAction(ISD::FSUB, MVT::f64, Expand);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v3i32, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v3f32, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v5i32, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v5f32, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i32, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f32, Custom);
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32, Custom);
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i32, Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v3f32, Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v3i32, Custom);
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4f32, Custom);
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i32, Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v5f32, Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v5i32, Custom);
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8f32, Custom);
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8i32, Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v16f32, Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v16i32, Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v32f32, Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v32i32, Custom);
setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
setOperationAction(ISD::FP_TO_FP16, MVT::f64, Custom);
@@ -394,7 +422,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom);
static const MVT::SimpleValueType VectorIntTypes[] = {
- MVT::v2i32, MVT::v4i32
+ MVT::v2i32, MVT::v3i32, MVT::v4i32, MVT::v5i32
};
for (MVT VT : VectorIntTypes) {
@@ -436,7 +464,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
}
static const MVT::SimpleValueType FloatVectorTypes[] = {
- MVT::v2f32, MVT::v4f32
+ MVT::v2f32, MVT::v3f32, MVT::v4f32, MVT::v5f32
};
for (MVT VT : FloatVectorTypes) {
@@ -478,9 +506,15 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SELECT, MVT::v2f32, Promote);
AddPromotedToType(ISD::SELECT, MVT::v2f32, MVT::v2i32);
+ setOperationAction(ISD::SELECT, MVT::v3f32, Promote);
+ AddPromotedToType(ISD::SELECT, MVT::v3f32, MVT::v3i32);
+
setOperationAction(ISD::SELECT, MVT::v4f32, Promote);
AddPromotedToType(ISD::SELECT, MVT::v4f32, MVT::v4i32);
+ setOperationAction(ISD::SELECT, MVT::v5f32, Promote);
+ AddPromotedToType(ISD::SELECT, MVT::v5f32, MVT::v5i32);
+
// There are no libcalls of any kind.
for (int I = 0; I < RTLIB::UNKNOWN_LIBCALL; ++I)
setLibcallName(static_cast<RTLIB::Libcall>(I), nullptr);
@@ -499,6 +533,9 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
// vector compares until that is fixed.
setHasMultipleConditionRegisters(true);
+ setMinCmpXchgSizeInBits(32);
+ setSupportsUnalignedAtomics(false);
+
PredictableSelectIsExpensive = false;
// We want to find all load dependencies for long chains of stores to enable
@@ -592,6 +629,7 @@ static bool hasSourceMods(const SDNode *N) {
case ISD::FDIV:
case ISD::FREM:
case ISD::INLINEASM:
+ case ISD::INLINEASM_BR:
case AMDGPUISD::INTERP_P1:
case AMDGPUISD::INTERP_P2:
case AMDGPUISD::DIV_SCALE:
@@ -640,7 +678,8 @@ bool AMDGPUTargetLowering::isSelectSupported(SelectSupportKind SelType) const {
// The backend supports 32 and 64 bit floating point immediates.
// FIXME: Why are we reporting vectors of FP immediates as legal?
-bool AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+bool AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
+ bool ForCodeSize) const {
EVT ScalarVT = VT.getScalarType();
return (ScalarVT == MVT::f32 || ScalarVT == MVT::f64 ||
(ScalarVT == MVT::f16 && Subtarget->has16BitInsts()));
@@ -690,8 +729,9 @@ bool AMDGPUTargetLowering::shouldReduceLoadWidth(SDNode *N,
return (OldSize < 32);
}
-bool AMDGPUTargetLowering::isLoadBitCastBeneficial(EVT LoadTy,
- EVT CastTy) const {
+bool AMDGPUTargetLowering::isLoadBitCastBeneficial(EVT LoadTy, EVT CastTy,
+ const SelectionDAG &DAG,
+ const MachineMemOperand &MMO) const {
assert(LoadTy.getSizeInBits() == CastTy.getSizeInBits());
@@ -701,8 +741,12 @@ bool AMDGPUTargetLowering::isLoadBitCastBeneficial(EVT LoadTy,
unsigned LScalarSize = LoadTy.getScalarSizeInBits();
unsigned CastScalarSize = CastTy.getScalarSizeInBits();
- return (LScalarSize < CastScalarSize) ||
- (CastScalarSize >= 32);
+ if ((LScalarSize >= CastScalarSize) && (CastScalarSize < 32))
+ return false;
+
+ bool Fast = false;
+ return allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), CastTy,
+ MMO, &Fast) && Fast;
}
// SI+ has instructions for cttz / ctlz for 32-bit values. This is probably also
@@ -849,9 +893,6 @@ bool AMDGPUTargetLowering::isNarrowingProfitable(EVT SrcVT, EVT DestVT) const {
CCAssignFn *AMDGPUCallLowering::CCAssignFnForCall(CallingConv::ID CC,
bool IsVarArg) {
switch (CC) {
- case CallingConv::AMDGPU_KERNEL:
- case CallingConv::SPIR_KERNEL:
- llvm_unreachable("kernels should not be handled here");
case CallingConv::AMDGPU_VS:
case CallingConv::AMDGPU_GS:
case CallingConv::AMDGPU_PS:
@@ -864,8 +905,10 @@ CCAssignFn *AMDGPUCallLowering::CCAssignFnForCall(CallingConv::ID CC,
case CallingConv::Fast:
case CallingConv::Cold:
return CC_AMDGPU_Func;
+ case CallingConv::AMDGPU_KERNEL:
+ case CallingConv::SPIR_KERNEL:
default:
- report_fatal_error("Unsupported calling convention.");
+ report_fatal_error("Unsupported calling convention for call");
}
}
@@ -1010,9 +1053,10 @@ void AMDGPUTargetLowering::analyzeFormalArgumentsCompute(
if (MemVT.isVector() && MemVT.getVectorNumElements() == 1)
MemVT = MemVT.getScalarType();
- if (MemVT.isExtended()) {
- // This should really only happen if we have vec3 arguments
- assert(MemVT.isVector() && MemVT.getVectorNumElements() == 3);
+ // Round up vec3/vec5 argument.
+ if (MemVT.isVector() && !MemVT.isPow2VectorType()) {
+ assert(MemVT.getVectorNumElements() == 3 ||
+ MemVT.getVectorNumElements() == 5);
MemVT = MemVT.getPow2VectorType(State.getContext());
}
@@ -1372,6 +1416,41 @@ SDValue AMDGPUTargetLowering::getHiHalf64(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Vec, One);
}
+// Split a vector type into two parts. The first part is a power of two vector.
+// The second part is whatever is left over, and is a scalar if it would
+// otherwise be a 1-vector.
+std::pair<EVT, EVT>
+AMDGPUTargetLowering::getSplitDestVTs(const EVT &VT, SelectionDAG &DAG) const {
+ EVT LoVT, HiVT;
+ EVT EltVT = VT.getVectorElementType();
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned LoNumElts = PowerOf2Ceil((NumElts + 1) / 2);
+ LoVT = EVT::getVectorVT(*DAG.getContext(), EltVT, LoNumElts);
+ HiVT = NumElts - LoNumElts == 1
+ ? EltVT
+ : EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts - LoNumElts);
+ return std::make_pair(LoVT, HiVT);
+}
+
+// Split a vector value into two parts of types LoVT and HiVT. HiVT could be
+// scalar.
+std::pair<SDValue, SDValue>
+AMDGPUTargetLowering::splitVector(const SDValue &N, const SDLoc &DL,
+ const EVT &LoVT, const EVT &HiVT,
+ SelectionDAG &DAG) const {
+ assert(LoVT.getVectorNumElements() +
+ (HiVT.isVector() ? HiVT.getVectorNumElements() : 1) <=
+ N.getValueType().getVectorNumElements() &&
+ "More vector elements requested than available!");
+ auto IdxTy = getVectorIdxTy(DAG.getDataLayout());
+ SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoVT, N,
+ DAG.getConstant(0, DL, IdxTy));
+ SDValue Hi = DAG.getNode(
+ HiVT.isVector() ? ISD::EXTRACT_SUBVECTOR : ISD::EXTRACT_VECTOR_ELT, DL,
+ HiVT, N, DAG.getConstant(LoVT.getVectorNumElements(), DL, IdxTy));
+ return std::make_pair(Lo, Hi);
+}
+
SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue Op,
SelectionDAG &DAG) const {
LoadSDNode *Load = cast<LoadSDNode>(Op);
@@ -1393,9 +1472,9 @@ SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue Op,
EVT LoMemVT, HiMemVT;
SDValue Lo, Hi;
- std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
- std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemVT);
- std::tie(Lo, Hi) = DAG.SplitVector(Op, SL, LoVT, HiVT);
+ std::tie(LoVT, HiVT) = getSplitDestVTs(VT, DAG);
+ std::tie(LoMemVT, HiMemVT) = getSplitDestVTs(MemVT, DAG);
+ std::tie(Lo, Hi) = splitVector(Op, SL, LoVT, HiVT, DAG);
unsigned Size = LoMemVT.getStoreSize();
unsigned BaseAlign = Load->getAlignment();
@@ -1410,15 +1489,52 @@ SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue Op,
HiPtr, SrcValue.getWithOffset(LoMemVT.getStoreSize()),
HiMemVT, HiAlign, Load->getMemOperand()->getFlags());
- SDValue Ops[] = {
- DAG.getNode(ISD::CONCAT_VECTORS, SL, VT, LoLoad, HiLoad),
- DAG.getNode(ISD::TokenFactor, SL, MVT::Other,
- LoLoad.getValue(1), HiLoad.getValue(1))
- };
+ auto IdxTy = getVectorIdxTy(DAG.getDataLayout());
+ SDValue Join;
+ if (LoVT == HiVT) {
+ // This is the case that the vector is power of two so was evenly split.
+ Join = DAG.getNode(ISD::CONCAT_VECTORS, SL, VT, LoLoad, HiLoad);
+ } else {
+ Join = DAG.getNode(ISD::INSERT_SUBVECTOR, SL, VT, DAG.getUNDEF(VT), LoLoad,
+ DAG.getConstant(0, SL, IdxTy));
+ Join = DAG.getNode(HiVT.isVector() ? ISD::INSERT_SUBVECTOR
+ : ISD::INSERT_VECTOR_ELT,
+ SL, VT, Join, HiLoad,
+ DAG.getConstant(LoVT.getVectorNumElements(), SL, IdxTy));
+ }
+
+ SDValue Ops[] = {Join, DAG.getNode(ISD::TokenFactor, SL, MVT::Other,
+ LoLoad.getValue(1), HiLoad.getValue(1))};
return DAG.getMergeValues(Ops, SL);
}
+// Widen a vector load from vec3 to vec4.
+SDValue AMDGPUTargetLowering::WidenVectorLoad(SDValue Op,
+ SelectionDAG &DAG) const {
+ LoadSDNode *Load = cast<LoadSDNode>(Op);
+ EVT VT = Op.getValueType();
+ assert(VT.getVectorNumElements() == 3);
+ SDValue BasePtr = Load->getBasePtr();
+ EVT MemVT = Load->getMemoryVT();
+ SDLoc SL(Op);
+ const MachinePointerInfo &SrcValue = Load->getMemOperand()->getPointerInfo();
+ unsigned BaseAlign = Load->getAlignment();
+
+ EVT WideVT =
+ EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), 4);
+ EVT WideMemVT =
+ EVT::getVectorVT(*DAG.getContext(), MemVT.getVectorElementType(), 4);
+ SDValue WideLoad = DAG.getExtLoad(
+ Load->getExtensionType(), SL, WideVT, Load->getChain(), BasePtr, SrcValue,
+ WideMemVT, BaseAlign, Load->getMemOperand()->getFlags());
+ return DAG.getMergeValues(
+ {DAG.getNode(ISD::EXTRACT_SUBVECTOR, SL, VT, WideLoad,
+ DAG.getConstant(0, SL, getVectorIdxTy(DAG.getDataLayout()))),
+ WideLoad.getValue(1)},
+ SL);
+}
+
SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op,
SelectionDAG &DAG) const {
StoreSDNode *Store = cast<StoreSDNode>(Op);
@@ -1439,9 +1555,9 @@ SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op,
EVT LoMemVT, HiMemVT;
SDValue Lo, Hi;
- std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
- std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemVT);
- std::tie(Lo, Hi) = DAG.SplitVector(Val, SL, LoVT, HiVT);
+ std::tie(LoVT, HiVT) = getSplitDestVTs(VT, DAG);
+ std::tie(LoMemVT, HiMemVT) = getSplitDestVTs(MemVT, DAG);
+ std::tie(Lo, Hi) = splitVector(Val, SL, LoVT, HiVT, DAG);
SDValue HiPtr = DAG.getObjectPtrOffset(SL, BasePtr, LoMemVT.getStoreSize());
@@ -2788,6 +2904,54 @@ bool AMDGPUTargetLowering::shouldCombineMemoryType(EVT VT) const {
return true;
}
+// Find a load or store from corresponding pattern root.
+// Roots may be build_vector, bitconvert or their combinations.
+static MemSDNode* findMemSDNode(SDNode *N) {
+ N = AMDGPUTargetLowering::stripBitcast(SDValue(N,0)).getNode();
+ if (MemSDNode *MN = dyn_cast<MemSDNode>(N))
+ return MN;
+ assert(isa<BuildVectorSDNode>(N));
+ for (SDValue V : N->op_values())
+ if (MemSDNode *MN =
+ dyn_cast<MemSDNode>(AMDGPUTargetLowering::stripBitcast(V)))
+ return MN;
+ llvm_unreachable("cannot find MemSDNode in the pattern!");
+}
+
+bool AMDGPUTargetLowering::SelectFlatOffset(bool IsSigned,
+ SelectionDAG &DAG,
+ SDNode *N,
+ SDValue Addr,
+ SDValue &VAddr,
+ SDValue &Offset,
+ SDValue &SLC) const {
+ const GCNSubtarget &ST =
+ DAG.getMachineFunction().getSubtarget<GCNSubtarget>();
+ int64_t OffsetVal = 0;
+
+ if (ST.hasFlatInstOffsets() &&
+ (!ST.hasFlatSegmentOffsetBug() ||
+ findMemSDNode(N)->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS) &&
+ DAG.isBaseWithConstantOffset(Addr)) {
+ SDValue N0 = Addr.getOperand(0);
+ SDValue N1 = Addr.getOperand(1);
+ int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
+
+ const SIInstrInfo *TII = ST.getInstrInfo();
+ if (TII->isLegalFLATOffset(COffsetVal, findMemSDNode(N)->getAddressSpace(),
+ IsSigned)) {
+ Addr = N0;
+ OffsetVal = COffsetVal;
+ }
+ }
+
+ VAddr = Addr;
+ Offset = DAG.getTargetConstant(OffsetVal, SDLoc(), MVT::i16);
+ SLC = DAG.getTargetConstant(0, SDLoc(), MVT::i1);
+
+ return true;
+}
+
// Replace load of an illegal type with a store of a bitcast to a friendlier
// type.
SDValue AMDGPUTargetLowering::performLoadCombine(SDNode *N,
@@ -2812,7 +2976,8 @@ SDValue AMDGPUTargetLowering::performLoadCombine(SDNode *N,
// Expand unaligned loads earlier than legalization. Due to visitation order
// problems during legalization, the emitted instructions to pack and unpack
// the bytes again are not eliminated in the case of an unaligned copy.
- if (!allowsMisalignedMemoryAccesses(VT, AS, Align, &IsFast)) {
+ if (!allowsMisalignedMemoryAccesses(
+ VT, AS, Align, LN->getMemOperand()->getFlags(), &IsFast)) {
if (VT.isVector())
return scalarizeVectorLoad(LN, DAG);
@@ -2864,7 +3029,8 @@ SDValue AMDGPUTargetLowering::performStoreCombine(SDNode *N,
// order problems during legalization, the emitted instructions to pack and
// unpack the bytes again are not eliminated in the case of an unaligned
// copy.
- if (!allowsMisalignedMemoryAccesses(VT, AS, Align, &IsFast)) {
+ if (!allowsMisalignedMemoryAccesses(
+ VT, AS, Align, SN->getMemOperand()->getFlags(), &IsFast)) {
if (VT.isVector())
return scalarizeVectorStore(SN, DAG);
@@ -3049,30 +3215,44 @@ SDValue AMDGPUTargetLowering::performSraCombine(SDNode *N,
SDValue AMDGPUTargetLowering::performSrlCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
- if (N->getValueType(0) != MVT::i64)
- return SDValue();
-
- const ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ auto *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1));
if (!RHS)
return SDValue();
+ EVT VT = N->getValueType(0);
+ SDValue LHS = N->getOperand(0);
unsigned ShiftAmt = RHS->getZExtValue();
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc SL(N);
+
+ // fold (srl (and x, c1 << c2), c2) -> (and (srl(x, c2), c1)
+ // this improves the ability to match BFE patterns in isel.
+ if (LHS.getOpcode() == ISD::AND) {
+ if (auto *Mask = dyn_cast<ConstantSDNode>(LHS.getOperand(1))) {
+ if (Mask->getAPIntValue().isShiftedMask() &&
+ Mask->getAPIntValue().countTrailingZeros() == ShiftAmt) {
+ return DAG.getNode(
+ ISD::AND, SL, VT,
+ DAG.getNode(ISD::SRL, SL, VT, LHS.getOperand(0), N->getOperand(1)),
+ DAG.getNode(ISD::SRL, SL, VT, LHS.getOperand(1), N->getOperand(1)));
+ }
+ }
+ }
+
+ if (VT != MVT::i64)
+ return SDValue();
+
if (ShiftAmt < 32)
return SDValue();
// srl i64:x, C for C >= 32
// =>
// build_pair (srl hi_32(x), C - 32), 0
-
- SelectionDAG &DAG = DCI.DAG;
- SDLoc SL(N);
-
SDValue One = DAG.getConstant(1, SL, MVT::i32);
SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
- SDValue VecOp = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, N->getOperand(0));
- SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32,
- VecOp, One);
+ SDValue VecOp = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, LHS);
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, VecOp, One);
SDValue NewConst = DAG.getConstant(ShiftAmt - 32, SL, MVT::i32);
SDValue NewShift = DAG.getNode(ISD::SRL, SL, MVT::i32, Hi, NewConst);
@@ -3090,7 +3270,7 @@ SDValue AMDGPUTargetLowering::performTruncateCombine(
SDValue Src = N->getOperand(0);
// vt1 (truncate (bitcast (build_vector vt0:x, ...))) -> vt1 (bitcast vt0:x)
- if (Src.getOpcode() == ISD::BITCAST) {
+ if (Src.getOpcode() == ISD::BITCAST && !VT.isVector()) {
SDValue Vec = Src.getOperand(0);
if (Vec.getOpcode() == ISD::BUILD_VECTOR) {
SDValue Elt0 = Vec.getOperand(0);
@@ -3478,13 +3658,11 @@ SDValue AMDGPUTargetLowering::performSelectCombine(SDNode *N,
if (Cond.hasOneUse()) { // TODO: Look for multiple select uses.
SelectionDAG &DAG = DCI.DAG;
- if ((DAG.isConstantValueOfAnyType(True) ||
- DAG.isConstantValueOfAnyType(True)) &&
- (!DAG.isConstantValueOfAnyType(False) &&
- !DAG.isConstantValueOfAnyType(False))) {
+ if (DAG.isConstantValueOfAnyType(True) &&
+ !DAG.isConstantValueOfAnyType(False)) {
// Swap cmp + select pair to move constant to false input.
// This will allow using VOPC cndmasks more often.
- // select (setcc x, y), k, x -> select (setcc y, x) x, x
+ // select (setcc x, y), k, x -> select (setccinv x, y), x, k
SDLoc SL(N);
ISD::CondCode NewCC = getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
@@ -3594,6 +3772,8 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
RHS = RHS.getOperand(0);
SDValue Res = DAG.getNode(ISD::FADD, SL, VT, LHS, RHS, N0->getFlags());
+ if (Res.getOpcode() != ISD::FADD)
+ return SDValue(); // Op got folded away.
if (!N0.hasOneUse())
DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res));
return Res;
@@ -3613,6 +3793,8 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
RHS = DAG.getNode(ISD::FNEG, SL, VT, RHS);
SDValue Res = DAG.getNode(Opc, SL, VT, LHS, RHS, N0->getFlags());
+ if (Res.getOpcode() != Opc)
+ return SDValue(); // Op got folded away.
if (!N0.hasOneUse())
DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res));
return Res;
@@ -3640,6 +3822,8 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
RHS = RHS.getOperand(0);
SDValue Res = DAG.getNode(Opc, SL, VT, LHS, MHS, RHS);
+ if (Res.getOpcode() != Opc)
+ return SDValue(); // Op got folded away.
if (!N0.hasOneUse())
DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res));
return Res;
@@ -3668,6 +3852,8 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
unsigned Opposite = inverseMinMax(Opc);
SDValue Res = DAG.getNode(Opposite, SL, VT, NegLHS, NegRHS, N0->getFlags());
+ if (Res.getOpcode() != Opposite)
+ return SDValue(); // Op got folded away.
if (!N0.hasOneUse())
DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res));
return Res;
@@ -3678,6 +3864,8 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
Ops[I] = DAG.getNode(ISD::FNEG, SL, VT, N0->getOperand(I), N0->getFlags());
SDValue Res = DAG.getNode(AMDGPUISD::FMED3, SL, VT, Ops, N0->getFlags());
+ if (Res.getOpcode() != AMDGPUISD::FMED3)
+ return SDValue(); // Op got folded away.
if (!N0.hasOneUse())
DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res));
return Res;
@@ -4051,9 +4239,19 @@ SDValue AMDGPUTargetLowering::loadInputValue(SelectionDAG &DAG,
const ArgDescriptor &Arg) const {
assert(Arg && "Attempting to load missing argument");
- if (Arg.isRegister())
- return CreateLiveInRegister(DAG, RC, Arg.getRegister(), VT, SL);
- return loadStackInputValue(DAG, VT, SL, Arg.getStackOffset());
+ SDValue V = Arg.isRegister() ?
+ CreateLiveInRegister(DAG, RC, Arg.getRegister(), VT, SL) :
+ loadStackInputValue(DAG, VT, SL, Arg.getStackOffset());
+
+ if (!Arg.isMasked())
+ return V;
+
+ unsigned Mask = Arg.getMask();
+ unsigned Shift = countTrailingZeros<unsigned>(Mask);
+ V = DAG.getNode(ISD::SRL, SL, VT, V,
+ DAG.getShiftAmountConstant(Shift, VT, SL));
+ return DAG.getNode(ISD::AND, SL, VT, V,
+ DAG.getConstant(Mask >> Shift, SL, VT));
}
uint32_t AMDGPUTargetLowering::getImplicitParameterOffset(
@@ -4175,6 +4373,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(BUILD_VERTICAL_VECTOR)
NODE_NAME_CASE(CONST_DATA_PTR)
NODE_NAME_CASE(PC_ADD_REL_OFFSET)
+ NODE_NAME_CASE(LDS)
NODE_NAME_CASE(KILL)
NODE_NAME_CASE(DUMMY_CHAIN)
case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break;
@@ -4185,24 +4384,38 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(INTERP_MOV)
NODE_NAME_CASE(INTERP_P1)
NODE_NAME_CASE(INTERP_P2)
+ NODE_NAME_CASE(INTERP_P1LL_F16)
+ NODE_NAME_CASE(INTERP_P1LV_F16)
+ NODE_NAME_CASE(INTERP_P2_F16)
+ NODE_NAME_CASE(LOAD_D16_HI)
+ NODE_NAME_CASE(LOAD_D16_LO)
+ NODE_NAME_CASE(LOAD_D16_HI_I8)
+ NODE_NAME_CASE(LOAD_D16_HI_U8)
+ NODE_NAME_CASE(LOAD_D16_LO_I8)
+ NODE_NAME_CASE(LOAD_D16_LO_U8)
NODE_NAME_CASE(STORE_MSKOR)
NODE_NAME_CASE(LOAD_CONSTANT)
NODE_NAME_CASE(TBUFFER_STORE_FORMAT)
- NODE_NAME_CASE(TBUFFER_STORE_FORMAT_X3)
NODE_NAME_CASE(TBUFFER_STORE_FORMAT_D16)
NODE_NAME_CASE(TBUFFER_LOAD_FORMAT)
NODE_NAME_CASE(TBUFFER_LOAD_FORMAT_D16)
+ NODE_NAME_CASE(DS_ORDERED_COUNT)
NODE_NAME_CASE(ATOMIC_CMP_SWAP)
NODE_NAME_CASE(ATOMIC_INC)
NODE_NAME_CASE(ATOMIC_DEC)
- NODE_NAME_CASE(ATOMIC_LOAD_FADD)
NODE_NAME_CASE(ATOMIC_LOAD_FMIN)
NODE_NAME_CASE(ATOMIC_LOAD_FMAX)
NODE_NAME_CASE(BUFFER_LOAD)
+ NODE_NAME_CASE(BUFFER_LOAD_UBYTE)
+ NODE_NAME_CASE(BUFFER_LOAD_USHORT)
+ NODE_NAME_CASE(BUFFER_LOAD_BYTE)
+ NODE_NAME_CASE(BUFFER_LOAD_SHORT)
NODE_NAME_CASE(BUFFER_LOAD_FORMAT)
NODE_NAME_CASE(BUFFER_LOAD_FORMAT_D16)
NODE_NAME_CASE(SBUFFER_LOAD)
NODE_NAME_CASE(BUFFER_STORE)
+ NODE_NAME_CASE(BUFFER_STORE_BYTE)
+ NODE_NAME_CASE(BUFFER_STORE_SHORT)
NODE_NAME_CASE(BUFFER_STORE_FORMAT)
NODE_NAME_CASE(BUFFER_STORE_FORMAT_D16)
NODE_NAME_CASE(BUFFER_ATOMIC_SWAP)
@@ -4216,6 +4429,10 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(BUFFER_ATOMIC_OR)
NODE_NAME_CASE(BUFFER_ATOMIC_XOR)
NODE_NAME_CASE(BUFFER_ATOMIC_CMPSWAP)
+ NODE_NAME_CASE(BUFFER_ATOMIC_FADD)
+ NODE_NAME_CASE(BUFFER_ATOMIC_PK_FADD)
+ NODE_NAME_CASE(ATOMIC_FADD)
+ NODE_NAME_CASE(ATOMIC_PK_FADD)
case AMDGPUISD::LAST_AMDGPU_ISD_NUMBER: break;
}
@@ -4367,6 +4584,23 @@ void AMDGPUTargetLowering::computeKnownBitsForTargetNode(
}
break;
}
+ case AMDGPUISD::BUFFER_LOAD_UBYTE: {
+ Known.Zero.setHighBits(24);
+ break;
+ }
+ case AMDGPUISD::BUFFER_LOAD_USHORT: {
+ Known.Zero.setHighBits(16);
+ break;
+ }
+ case AMDGPUISD::LDS: {
+ auto GA = cast<GlobalAddressSDNode>(Op.getOperand(0).getNode());
+ unsigned Align = GA->getGlobal()->getAlignment();
+
+ Known.Zero.setHighBits(16);
+ if (Align)
+ Known.Zero.setLowBits(Log2_32(Align));
+ break;
+ }
case ISD::INTRINSIC_WO_CHAIN: {
unsigned IID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
switch (IID) {
@@ -4412,6 +4646,14 @@ unsigned AMDGPUTargetLowering::ComputeNumSignBitsForTargetNode(
case AMDGPUISD::CARRY:
case AMDGPUISD::BORROW:
return 31;
+ case AMDGPUISD::BUFFER_LOAD_BYTE:
+ return 25;
+ case AMDGPUISD::BUFFER_LOAD_SHORT:
+ return 17;
+ case AMDGPUISD::BUFFER_LOAD_UBYTE:
+ return 24;
+ case AMDGPUISD::BUFFER_LOAD_USHORT:
+ return 16;
case AMDGPUISD::FP_TO_FP16:
case AMDGPUISD::FP16_ZEXT:
return 16;
@@ -4519,7 +4761,12 @@ bool AMDGPUTargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
TargetLowering::AtomicExpansionKind
AMDGPUTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
- if (RMW->getOperation() == AtomicRMWInst::Nand)
+ switch (RMW->getOperation()) {
+ case AtomicRMWInst::Nand:
+ case AtomicRMWInst::FAdd:
+ case AtomicRMWInst::FSub:
return AtomicExpansionKind::CmpXChg;
- return AtomicExpansionKind::None;
+ default:
+ return AtomicExpansionKind::None;
+ }
}
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.h b/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 0d22cb2e3e20..fe7ad694943d 100644
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -1,9 +1,8 @@
//===-- AMDGPUISelLowering.h - AMDGPU Lowering Interface --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -111,9 +110,23 @@ protected:
SDValue getLoHalf64(SDValue Op, SelectionDAG &DAG) const;
SDValue getHiHalf64(SDValue Op, SelectionDAG &DAG) const;
+ /// Split a vector type into two parts. The first part is a power of two
+ /// vector. The second part is whatever is left over, and is a scalar if it
+ /// would otherwise be a 1-vector.
+ std::pair<EVT, EVT> getSplitDestVTs(const EVT &VT, SelectionDAG &DAG) const;
+
+ /// Split a vector value into two parts of types LoVT and HiVT. HiVT could be
+ /// scalar.
+ std::pair<SDValue, SDValue> splitVector(const SDValue &N, const SDLoc &DL,
+ const EVT &LoVT, const EVT &HighVT,
+ SelectionDAG &DAG) const;
+
/// Split a vector load into 2 loads of half the vector.
SDValue SplitVectorLoad(SDValue Op, SelectionDAG &DAG) const;
+ /// Widen a vector load from vec3 to vec4.
+ SDValue WidenVectorLoad(SDValue Op, SelectionDAG &DAG) const;
+
/// Split a vector store into 2 stores of half the vector.
SDValue SplitVectorStore(SDValue Op, SelectionDAG &DAG) const;
@@ -162,13 +175,15 @@ public:
MVT getVectorIdxTy(const DataLayout &) const override;
bool isSelectSupported(SelectSupportKind) const override;
- bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
+ bool isFPImmLegal(const APFloat &Imm, EVT VT,
+ bool ForCodeSize) const override;
bool ShouldShrinkFPConstant(EVT VT) const override;
bool shouldReduceLoadWidth(SDNode *Load,
ISD::LoadExtType ExtType,
EVT ExtVT) const override;
- bool isLoadBitCastBeneficial(EVT, EVT) const final;
+ bool isLoadBitCastBeneficial(EVT, EVT, const SelectionDAG &DAG,
+ const MachineMemOperand &MMO) const final;
bool storeOfVectorConstantIsCheap(EVT MemVT,
unsigned NumElem,
@@ -212,15 +227,15 @@ public:
const char* getTargetNodeName(unsigned Opcode) const override;
- // FIXME: Turn off MergeConsecutiveStores() before Instruction Selection
- // for AMDGPU.
- // A commit ( git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@319036
- // 91177308-0d34-0410-b5e6-96231b3b80d8 ) turned on
- // MergeConsecutiveStores() before Instruction Selection for all targets.
- // Enough AMDGPU compiles go into an infinite loop ( MergeConsecutiveStores()
- // merges two stores; LegalizeStoreOps() un-merges; MergeConsecutiveStores()
- // re-merges, etc. ) to warrant turning it off for now.
- bool mergeStoresAfterLegalization() const override { return false; }
+ // FIXME: Turn off MergeConsecutiveStores() before Instruction Selection for
+ // AMDGPU. Commit r319036,
+ // (https://github.com/llvm/llvm-project/commit/db77e57ea86d941a4262ef60261692f4cb6893e6)
+ // turned on MergeConsecutiveStores() before Instruction Selection for all
+ // targets. Enough AMDGPU compiles go into an infinite loop (
+ // MergeConsecutiveStores() merges two stores; LegalizeStoreOps() un-merges;
+ // MergeConsecutiveStores() re-merges, etc. ) to warrant turning it off for
+ // now.
+ bool mergeStoresAfterLegalization(EVT) const override { return false; }
bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override {
return true;
@@ -309,6 +324,10 @@ public:
}
AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override;
+
+ bool SelectFlatOffset(bool IsSigned, SelectionDAG &DAG, SDNode *N,
+ SDValue Addr, SDValue &VAddr, SDValue &Offset,
+ SDValue &SLC) const;
};
namespace AMDGPUISD {
@@ -463,28 +482,44 @@ enum NodeType : unsigned {
INTERP_MOV,
INTERP_P1,
INTERP_P2,
+ INTERP_P1LL_F16,
+ INTERP_P1LV_F16,
+ INTERP_P2_F16,
PC_ADD_REL_OFFSET,
+ LDS,
KILL,
DUMMY_CHAIN,
FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE,
+ LOAD_D16_HI,
+ LOAD_D16_LO,
+ LOAD_D16_HI_I8,
+ LOAD_D16_HI_U8,
+ LOAD_D16_LO_I8,
+ LOAD_D16_LO_U8,
+
STORE_MSKOR,
LOAD_CONSTANT,
TBUFFER_STORE_FORMAT,
- TBUFFER_STORE_FORMAT_X3,
TBUFFER_STORE_FORMAT_D16,
TBUFFER_LOAD_FORMAT,
TBUFFER_LOAD_FORMAT_D16,
+ DS_ORDERED_COUNT,
ATOMIC_CMP_SWAP,
ATOMIC_INC,
ATOMIC_DEC,
- ATOMIC_LOAD_FADD,
ATOMIC_LOAD_FMIN,
ATOMIC_LOAD_FMAX,
BUFFER_LOAD,
+ BUFFER_LOAD_UBYTE,
+ BUFFER_LOAD_USHORT,
+ BUFFER_LOAD_BYTE,
+ BUFFER_LOAD_SHORT,
BUFFER_LOAD_FORMAT,
BUFFER_LOAD_FORMAT_D16,
SBUFFER_LOAD,
BUFFER_STORE,
+ BUFFER_STORE_BYTE,
+ BUFFER_STORE_SHORT,
BUFFER_STORE_FORMAT,
BUFFER_STORE_FORMAT_D16,
BUFFER_ATOMIC_SWAP,
@@ -498,6 +533,10 @@ enum NodeType : unsigned {
BUFFER_ATOMIC_OR,
BUFFER_ATOMIC_XOR,
BUFFER_ATOMIC_CMPSWAP,
+ BUFFER_ATOMIC_FADD,
+ BUFFER_ATOMIC_PK_FADD,
+ ATOMIC_FADD,
+ ATOMIC_PK_FADD,
LAST_AMDGPU_ISD_NUMBER
};
diff --git a/lib/Target/AMDGPU/AMDGPUInline.cpp b/lib/Target/AMDGPU/AMDGPUInline.cpp
index 945c9acd379a..f4df20b8f03e 100644
--- a/lib/Target/AMDGPU/AMDGPUInline.cpp
+++ b/lib/Target/AMDGPU/AMDGPUInline.cpp
@@ -1,9 +1,8 @@
//===- AMDGPUInline.cpp - Code to perform simple function inlining --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -40,7 +39,7 @@ using namespace llvm;
#define DEBUG_TYPE "inline"
static cl::opt<int>
-ArgAllocaCost("amdgpu-inline-arg-alloca-cost", cl::Hidden, cl::init(2200),
+ArgAllocaCost("amdgpu-inline-arg-alloca-cost", cl::Hidden, cl::init(1500),
cl::desc("Cost of alloca argument"));
// If the amount of scratch memory to eliminate exceeds our ability to allocate
@@ -50,6 +49,12 @@ static cl::opt<unsigned>
ArgAllocaCutoff("amdgpu-inline-arg-alloca-cutoff", cl::Hidden, cl::init(256),
cl::desc("Maximum alloca size to use for inline cost"));
+// Inliner constraint to achieve reasonable compilation time
+static cl::opt<size_t>
+MaxBB("amdgpu-inline-max-bb", cl::Hidden, cl::init(300),
+ cl::desc("Maximum BB number allowed in a function after inlining"
+ " (compile time constraint)"));
+
namespace {
class AMDGPUInliner : public LegacyInlinerBase {
@@ -112,7 +117,8 @@ unsigned AMDGPUInliner::getInlineThreshold(CallSite CS) const {
Callee->hasFnAttribute(Attribute::InlineHint);
if (InlineHint && Params.HintThreshold && Params.HintThreshold > Thres
&& !Caller->hasFnAttribute(Attribute::MinSize))
- Thres = Params.HintThreshold.getValue();
+ Thres = Params.HintThreshold.getValue() *
+ TTIWP->getTTI(*Callee).getInliningThresholdMultiplier();
const DataLayout &DL = Caller->getParent()->getDataLayout();
if (!Callee)
@@ -124,10 +130,11 @@ unsigned AMDGPUInliner::getInlineThreshold(CallSite CS) const {
uint64_t AllocaSize = 0;
SmallPtrSet<const AllocaInst *, 8> AIVisited;
for (Value *PtrArg : CS.args()) {
- Type *Ty = PtrArg->getType();
- if (!Ty->isPointerTy() ||
- Ty->getPointerAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS)
+ PointerType *Ty = dyn_cast<PointerType>(PtrArg->getType());
+ if (!Ty || (Ty->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS &&
+ Ty->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS))
continue;
+
PtrArg = GetUnderlyingObject(PtrArg, DL);
if (const AllocaInst *AI = dyn_cast<AllocaInst>(PtrArg)) {
if (!AI->isStaticAlloca() || !AIVisited.insert(AI).second)
@@ -170,7 +177,6 @@ static bool isWrapperOnlyCall(CallSite CS) {
InlineCost AMDGPUInliner::getInlineCost(CallSite CS) {
Function *Callee = CS.getCalledFunction();
Function *Caller = CS.getCaller();
- TargetTransformInfo &TTI = TTIWP->getTTI(*Callee);
if (!Callee || Callee->isDeclaration())
return llvm::InlineCost::getNever("undefined callee");
@@ -178,13 +184,15 @@ InlineCost AMDGPUInliner::getInlineCost(CallSite CS) {
if (CS.isNoInline())
return llvm::InlineCost::getNever("noinline");
+ TargetTransformInfo &TTI = TTIWP->getTTI(*Callee);
if (!TTI.areInlineCompatible(Caller, Callee))
return llvm::InlineCost::getNever("incompatible");
if (CS.hasFnAttr(Attribute::AlwaysInline)) {
- if (isInlineViable(*Callee))
+ auto IsViable = isInlineViable(*Callee);
+ if (IsViable)
return llvm::InlineCost::getAlways("alwaysinline viable");
- return llvm::InlineCost::getNever("alwaysinline unviable");
+ return llvm::InlineCost::getNever(IsViable.message);
}
if (isWrapperOnlyCall(CS))
@@ -206,6 +214,15 @@ InlineCost AMDGPUInliner::getInlineCost(CallSite CS) {
return ACT->getAssumptionCache(F);
};
- return llvm::getInlineCost(CS, Callee, LocalParams, TTI, GetAssumptionCache,
- None, PSI, RemarksEnabled ? &ORE : nullptr);
+ auto IC = llvm::getInlineCost(cast<CallBase>(*CS.getInstruction()), Callee,
+ LocalParams, TTI, GetAssumptionCache, None, PSI,
+ RemarksEnabled ? &ORE : nullptr);
+
+ if (IC && !IC.isAlways() && !Callee->hasFnAttribute(Attribute::InlineHint)) {
+ // Single BB does not increase total BB amount, thus subtract 1
+ size_t Size = Caller->size() + Callee->size() - 1;
+ if (MaxBB && Size > MaxBB)
+ return llvm::InlineCost::getNever("max number of bb exceeded");
+ }
+ return IC;
}
diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp b/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
index 07aa7c2cc8ad..9951cbf2326e 100644
--- a/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
+++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
@@ -1,9 +1,8 @@
//===-- AMDGPUInstrInfo.cpp - Base class for AMD GPU InstrInfo ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.h b/lib/Target/AMDGPU/AMDGPUInstrInfo.h
index 2f8166da0d33..698189e14c21 100644
--- a/lib/Target/AMDGPU/AMDGPUInstrInfo.h
+++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.h
@@ -1,9 +1,8 @@
//===-- AMDGPUInstrInfo.h - AMDGPU Instruction Information ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/lib/Target/AMDGPU/AMDGPUInstrInfo.td
index 82644be26563..4a8446955496 100644
--- a/lib/Target/AMDGPU/AMDGPUInstrInfo.td
+++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.td
@@ -1,9 +1,8 @@
//===-- AMDGPUInstrInfo.td - AMDGPU DAG nodes --------------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -51,27 +50,21 @@ def AMDGPUFmasOp : SDTypeProfile<1, 4,
def AMDGPUKillSDT : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
def AMDGPUIfOp : SDTypeProfile<1, 2,
- [SDTCisVT<0, i64>, SDTCisVT<1, i1>, SDTCisVT<2, OtherVT>]
+ [SDTCisVT<0, i1>, SDTCisVT<1, i1>, SDTCisVT<2, OtherVT>]
>;
def AMDGPUElseOp : SDTypeProfile<1, 2,
- [SDTCisVT<0, i64>, SDTCisVT<1, i64>, SDTCisVT<2, OtherVT>]
+ [SDTCisVT<0, i1>, SDTCisVT<1, i1>, SDTCisVT<2, OtherVT>]
>;
def AMDGPULoopOp : SDTypeProfile<0, 2,
- [SDTCisVT<0, i64>, SDTCisVT<1, OtherVT>]
+ [SDTCisVT<0, i1>, SDTCisVT<1, OtherVT>]
>;
def AMDGPUIfBreakOp : SDTypeProfile<1, 2,
- [SDTCisVT<0, i64>, SDTCisVT<1, i1>, SDTCisVT<2, i64>]
->;
-
-def AMDGPUAddeSubeOp : SDTypeProfile<2, 3,
- [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisVT<0, i32>, SDTCisVT<1, i1>, SDTCisVT<4, i1>]
+ [SDTCisVT<0, i1>, SDTCisVT<1, i1>, SDTCisVT<2, i1>]
>;
-def SDT_AMDGPUTCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>]>;
-
//===----------------------------------------------------------------------===//
// AMDGPU DAG Nodes
//
@@ -96,7 +89,8 @@ def AMDGPUcall : SDNode<"AMDGPUISD::CALL",
SDNPVariadic]
>;
-def AMDGPUtc_return: SDNode<"AMDGPUISD::TC_RETURN", SDT_AMDGPUTCRET,
+def AMDGPUtc_return: SDNode<"AMDGPUISD::TC_RETURN",
+ SDTypeProfile<0, 3, [SDTCisPtrTy<0>]>,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]
>;
@@ -205,14 +199,8 @@ def AMDGPUcarry : SDNode<"AMDGPUISD::CARRY", SDTIntBinOp, []>;
// out = (src1 > src0) ? 1 : 0
def AMDGPUborrow : SDNode<"AMDGPUISD::BORROW", SDTIntBinOp, []>;
-// TODO: remove AMDGPUadde/AMDGPUsube when ADDCARRY/SUBCARRY get their own
-// nodes in TargetSelectionDAG.td.
-def AMDGPUadde : SDNode<"ISD::ADDCARRY", AMDGPUAddeSubeOp, []>;
-
-def AMDGPUsube : SDNode<"ISD::SUBCARRY", AMDGPUAddeSubeOp, []>;
-
def AMDGPUSetCCOp : SDTypeProfile<1, 3, [ // setcc
- SDTCisVT<0, i64>, SDTCisSameAs<1, 2>, SDTCisVT<3, OtherVT>
+ SDTCisInt<0>, SDTCisSameAs<1, 2>, SDTCisVT<3, OtherVT>
]>;
def AMDGPUsetcc : SDNode<"AMDGPUISD::SETCC", AMDGPUSetCCOp>;
@@ -251,7 +239,8 @@ def AMDGPUdiv_scale : SDNode<"AMDGPUISD::DIV_SCALE", AMDGPUDivScaleOp>;
// Special case divide FMA with scale and flags (src0 = Quotient,
// src1 = Denominator, src2 = Numerator).
-def AMDGPUdiv_fmas : SDNode<"AMDGPUISD::DIV_FMAS", AMDGPUFmasOp>;
+def AMDGPUdiv_fmas : SDNode<"AMDGPUISD::DIV_FMAS", AMDGPUFmasOp,
+ [SDNPOptInGlue]>;
// Single or double precision division fixup.
// Special case divide fixup and flags(src0 = Quotient, src1 =
@@ -370,6 +359,17 @@ def AMDGPUinterp_p2 : SDNode<"AMDGPUISD::INTERP_P2",
SDTypeProfile<1, 4, [SDTCisFP<0>]>,
[SDNPInGlue]>;
+def AMDGPUinterp_p1ll_f16 : SDNode<"AMDGPUISD::INTERP_P1LL_F16",
+ SDTypeProfile<1, 7, [SDTCisFP<0>]>,
+ [SDNPInGlue, SDNPOutGlue]>;
+
+def AMDGPUinterp_p1lv_f16 : SDNode<"AMDGPUISD::INTERP_P1LV_F16",
+ SDTypeProfile<1, 9, [SDTCisFP<0>]>,
+ [SDNPInGlue, SDNPOutGlue]>;
+
+def AMDGPUinterp_p2_f16 : SDNode<"AMDGPUISD::INTERP_P2_F16",
+ SDTypeProfile<1, 8, [SDTCisFP<0>]>,
+ [SDNPInGlue]>;
def AMDGPUkill : SDNode<"AMDGPUISD::KILL", AMDGPUKillSDT,
[SDNPHasChain, SDNPSideEffect]>;
diff --git a/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 8eb49d49b2e0..901a2eaa8829 100644
--- a/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -1,9 +1,8 @@
//===- AMDGPUInstructionSelector.cpp ----------------------------*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -18,10 +17,11 @@
#include "AMDGPURegisterInfo.h"
#include "AMDGPUSubtarget.h"
#include "AMDGPUTargetMachine.h"
-#include "SIMachineFunctionInfo.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
+#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -35,6 +35,7 @@
#define DEBUG_TYPE "amdgpu-isel"
using namespace llvm;
+using namespace MIPatternMatch;
#define GET_GLOBALISEL_IMPL
#define AMDGPUSubtarget GCNSubtarget
@@ -60,11 +61,101 @@ AMDGPUInstructionSelector::AMDGPUInstructionSelector(
const char *AMDGPUInstructionSelector::getName() { return DEBUG_TYPE; }
+static bool isSCC(Register Reg, const MachineRegisterInfo &MRI) {
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ return Reg == AMDGPU::SCC;
+
+ auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
+ const TargetRegisterClass *RC =
+ RegClassOrBank.dyn_cast<const TargetRegisterClass*>();
+ if (RC) {
+ // FIXME: This is ambiguous for wave32. This could be SCC or VCC, but the
+ // context of the register bank has been lost.
+ if (RC->getID() != AMDGPU::SReg_32_XM0RegClassID)
+ return false;
+ const LLT Ty = MRI.getType(Reg);
+ return Ty.isValid() && Ty.getSizeInBits() == 1;
+ }
+
+ const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>();
+ return RB->getID() == AMDGPU::SCCRegBankID;
+}
+
+bool AMDGPUInstructionSelector::isVCC(Register Reg,
+ const MachineRegisterInfo &MRI) const {
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ return Reg == TRI.getVCC();
+
+ auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
+ const TargetRegisterClass *RC =
+ RegClassOrBank.dyn_cast<const TargetRegisterClass*>();
+ if (RC) {
+ const LLT Ty = MRI.getType(Reg);
+ return RC->hasSuperClassEq(TRI.getBoolRC()) &&
+ Ty.isValid() && Ty.getSizeInBits() == 1;
+ }
+
+ const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>();
+ return RB->getID() == AMDGPU::VCCRegBankID;
+}
+
bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
+ const DebugLoc &DL = I.getDebugLoc();
MachineBasicBlock *BB = I.getParent();
MachineFunction *MF = BB->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
I.setDesc(TII.get(TargetOpcode::COPY));
+
+ const MachineOperand &Src = I.getOperand(1);
+ MachineOperand &Dst = I.getOperand(0);
+ Register DstReg = Dst.getReg();
+ Register SrcReg = Src.getReg();
+
+ if (isVCC(DstReg, MRI)) {
+ if (SrcReg == AMDGPU::SCC) {
+ const TargetRegisterClass *RC
+ = TRI.getConstrainedRegClassForOperand(Dst, MRI);
+ if (!RC)
+ return true;
+ return RBI.constrainGenericRegister(DstReg, *RC, MRI);
+ }
+
+ if (!isVCC(SrcReg, MRI)) {
+ // TODO: Should probably leave the copy and let copyPhysReg expand it.
+ if (!RBI.constrainGenericRegister(DstReg, *TRI.getBoolRC(), MRI))
+ return false;
+
+ BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
+ .addImm(0)
+ .addReg(SrcReg);
+
+ if (!MRI.getRegClassOrNull(SrcReg))
+ MRI.setRegClass(SrcReg, TRI.getConstrainedRegClassForOperand(Src, MRI));
+ I.eraseFromParent();
+ return true;
+ }
+
+ const TargetRegisterClass *RC =
+ TRI.getConstrainedRegClassForOperand(Dst, MRI);
+ if (RC && !RBI.constrainGenericRegister(DstReg, *RC, MRI))
+ return false;
+
+ // Don't constrain the source register to a class so the def instruction
+ // handles it (unless it's undef).
+ //
+ // FIXME: This is a hack. When selecting the def, we neeed to know
+ // specifically know that the result is VCCRegBank, and not just an SGPR
+ // with size 1. An SReg_32 with size 1 is ambiguous with wave32.
+ if (Src.isUndef()) {
+ const TargetRegisterClass *SrcRC =
+ TRI.getConstrainedRegClassForOperand(Src, MRI);
+ if (SrcRC && !RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI))
+ return false;
+ }
+
+ return true;
+ }
+
for (const MachineOperand &MO : I.operands()) {
if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
continue;
@@ -78,15 +169,54 @@ bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
return true;
}
+bool AMDGPUInstructionSelector::selectPHI(MachineInstr &I) const {
+ MachineBasicBlock *BB = I.getParent();
+ MachineFunction *MF = BB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+
+ const Register DefReg = I.getOperand(0).getReg();
+ const LLT DefTy = MRI.getType(DefReg);
+
+ // TODO: Verify this doesn't have insane operands (i.e. VGPR to SGPR copy)
+
+ const RegClassOrRegBank &RegClassOrBank =
+ MRI.getRegClassOrRegBank(DefReg);
+
+ const TargetRegisterClass *DefRC
+ = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
+ if (!DefRC) {
+ if (!DefTy.isValid()) {
+ LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
+ return false;
+ }
+
+ const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
+ if (RB.getID() == AMDGPU::SCCRegBankID) {
+ LLVM_DEBUG(dbgs() << "illegal scc phi\n");
+ return false;
+ }
+
+ DefRC = TRI.getRegClassForTypeOnBank(DefTy, RB, MRI);
+ if (!DefRC) {
+ LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
+ return false;
+ }
+ }
+
+ I.setDesc(TII.get(TargetOpcode::PHI));
+ return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
+}
+
MachineOperand
AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO,
+ const TargetRegisterClass &SubRC,
unsigned SubIdx) const {
MachineInstr *MI = MO.getParent();
MachineBasicBlock *BB = MO.getParent()->getParent();
MachineFunction *MF = BB->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
- unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+ Register DstReg = MRI.createVirtualRegister(&SubRC);
if (MO.isReg()) {
unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.getSubReg(), SubIdx);
@@ -118,51 +248,273 @@ static int64_t getConstant(const MachineInstr *MI) {
return MI->getOperand(1).getCImm()->getSExtValue();
}
-bool AMDGPUInstructionSelector::selectG_ADD(MachineInstr &I) const {
+static unsigned getLogicalBitOpcode(unsigned Opc, bool Is64) {
+ switch (Opc) {
+ case AMDGPU::G_AND:
+ return Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
+ case AMDGPU::G_OR:
+ return Is64 ? AMDGPU::S_OR_B64 : AMDGPU::S_OR_B32;
+ case AMDGPU::G_XOR:
+ return Is64 ? AMDGPU::S_XOR_B64 : AMDGPU::S_XOR_B32;
+ default:
+ llvm_unreachable("not a bit op");
+ }
+}
+
+bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(MachineInstr &I) const {
MachineBasicBlock *BB = I.getParent();
MachineFunction *MF = BB->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
- unsigned Size = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI);
- unsigned DstLo = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
- unsigned DstHi = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ MachineOperand &Dst = I.getOperand(0);
+ MachineOperand &Src0 = I.getOperand(1);
+ MachineOperand &Src1 = I.getOperand(2);
+ Register DstReg = Dst.getReg();
+ unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI);
+
+ const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI);
+ if (DstRB->getID() == AMDGPU::VCCRegBankID) {
+ const TargetRegisterClass *RC = TRI.getBoolRC();
+ unsigned InstOpc = getLogicalBitOpcode(I.getOpcode(),
+ RC == &AMDGPU::SReg_64RegClass);
+ I.setDesc(TII.get(InstOpc));
+
+ // FIXME: Hack to avoid turning the register bank into a register class.
+ // The selector for G_ICMP relies on seeing the register bank for the result
+ // is VCC. In wave32 if we constrain the registers to SReg_32 here, it will
+ // be ambiguous whether it's a scalar or vector bool.
+ if (Src0.isUndef() && !MRI.getRegClassOrNull(Src0.getReg()))
+ MRI.setRegClass(Src0.getReg(), RC);
+ if (Src1.isUndef() && !MRI.getRegClassOrNull(Src1.getReg()))
+ MRI.setRegClass(Src1.getReg(), RC);
+
+ return RBI.constrainGenericRegister(DstReg, *RC, MRI);
+ }
- if (Size != 64)
- return false;
+ // TODO: Should this allow an SCC bank result, and produce a copy from SCC for
+ // the result?
+ if (DstRB->getID() == AMDGPU::SGPRRegBankID) {
+ unsigned InstOpc = getLogicalBitOpcode(I.getOpcode(), Size > 32);
+ I.setDesc(TII.get(InstOpc));
- DebugLoc DL = I.getDebugLoc();
+ const TargetRegisterClass *RC
+ = TRI.getConstrainedRegClassForOperand(Dst, MRI);
+ if (!RC)
+ return false;
+ return RBI.constrainGenericRegister(DstReg, *RC, MRI) &&
+ RBI.constrainGenericRegister(Src0.getReg(), *RC, MRI) &&
+ RBI.constrainGenericRegister(Src1.getReg(), *RC, MRI);
+ }
- MachineOperand Lo1(getSubOperand64(I.getOperand(1), AMDGPU::sub0));
- MachineOperand Lo2(getSubOperand64(I.getOperand(2), AMDGPU::sub0));
+ return false;
+}
- BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
- .add(Lo1)
- .add(Lo2);
+bool AMDGPUInstructionSelector::selectG_ADD_SUB(MachineInstr &I) const {
+ MachineBasicBlock *BB = I.getParent();
+ MachineFunction *MF = BB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ Register DstReg = I.getOperand(0).getReg();
+ const DebugLoc &DL = I.getDebugLoc();
+ unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI);
+ const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI);
+ const bool IsSALU = DstRB->getID() == AMDGPU::SGPRRegBankID;
+ const bool Sub = I.getOpcode() == TargetOpcode::G_SUB;
- MachineOperand Hi1(getSubOperand64(I.getOperand(1), AMDGPU::sub1));
- MachineOperand Hi2(getSubOperand64(I.getOperand(2), AMDGPU::sub1));
+ if (Size == 32) {
+ if (IsSALU) {
+ const unsigned Opc = Sub ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32;
+ MachineInstr *Add =
+ BuildMI(*BB, &I, DL, TII.get(Opc), DstReg)
+ .add(I.getOperand(1))
+ .add(I.getOperand(2));
+ I.eraseFromParent();
+ return constrainSelectedInstRegOperands(*Add, TII, TRI, RBI);
+ }
- BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
- .add(Hi1)
- .add(Hi2);
+ if (STI.hasAddNoCarry()) {
+ const unsigned Opc = Sub ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_ADD_U32_e64;
+ I.setDesc(TII.get(Opc));
+ I.addOperand(*MF, MachineOperand::CreateImm(0));
+ I.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true));
+ return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+ }
- BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), I.getOperand(0).getReg())
- .addReg(DstLo)
- .addImm(AMDGPU::sub0)
- .addReg(DstHi)
- .addImm(AMDGPU::sub1);
+ const unsigned Opc = Sub ? AMDGPU::V_SUB_I32_e64 : AMDGPU::V_ADD_I32_e64;
- for (MachineOperand &MO : I.explicit_operands()) {
- if (!MO.isReg() || TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
- continue;
- RBI.constrainGenericRegister(MO.getReg(), AMDGPU::SReg_64RegClass, MRI);
+ Register UnusedCarry = MRI.createVirtualRegister(TRI.getWaveMaskRegClass());
+ MachineInstr *Add
+ = BuildMI(*BB, &I, DL, TII.get(Opc), DstReg)
+ .addDef(UnusedCarry, RegState::Dead)
+ .add(I.getOperand(1))
+ .add(I.getOperand(2))
+ .addImm(0);
+ I.eraseFromParent();
+ return constrainSelectedInstRegOperands(*Add, TII, TRI, RBI);
}
+ assert(!Sub && "illegal sub should not reach here");
+
+ const TargetRegisterClass &RC
+ = IsSALU ? AMDGPU::SReg_64_XEXECRegClass : AMDGPU::VReg_64RegClass;
+ const TargetRegisterClass &HalfRC
+ = IsSALU ? AMDGPU::SReg_32RegClass : AMDGPU::VGPR_32RegClass;
+
+ MachineOperand Lo1(getSubOperand64(I.getOperand(1), HalfRC, AMDGPU::sub0));
+ MachineOperand Lo2(getSubOperand64(I.getOperand(2), HalfRC, AMDGPU::sub0));
+ MachineOperand Hi1(getSubOperand64(I.getOperand(1), HalfRC, AMDGPU::sub1));
+ MachineOperand Hi2(getSubOperand64(I.getOperand(2), HalfRC, AMDGPU::sub1));
+
+ Register DstLo = MRI.createVirtualRegister(&HalfRC);
+ Register DstHi = MRI.createVirtualRegister(&HalfRC);
+
+ if (IsSALU) {
+ BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
+ .add(Lo1)
+ .add(Lo2);
+ BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
+ .add(Hi1)
+ .add(Hi2);
+ } else {
+ const TargetRegisterClass *CarryRC = TRI.getWaveMaskRegClass();
+ Register CarryReg = MRI.createVirtualRegister(CarryRC);
+ BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADD_I32_e64), DstLo)
+ .addDef(CarryReg)
+ .add(Lo1)
+ .add(Lo2)
+ .addImm(0);
+ MachineInstr *Addc = BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADDC_U32_e64), DstHi)
+ .addDef(MRI.createVirtualRegister(CarryRC), RegState::Dead)
+ .add(Hi1)
+ .add(Hi2)
+ .addReg(CarryReg, RegState::Kill)
+ .addImm(0);
+
+ if (!constrainSelectedInstRegOperands(*Addc, TII, TRI, RBI))
+ return false;
+ }
+
+ BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
+ .addReg(DstLo)
+ .addImm(AMDGPU::sub0)
+ .addReg(DstHi)
+ .addImm(AMDGPU::sub1);
+
+
+ if (!RBI.constrainGenericRegister(DstReg, RC, MRI))
+ return false;
+
+ I.eraseFromParent();
+ return true;
+}
+
+bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const {
+ MachineBasicBlock *BB = I.getParent();
+ MachineFunction *MF = BB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ assert(I.getOperand(2).getImm() % 32 == 0);
+ unsigned SubReg = TRI.getSubRegFromChannel(I.getOperand(2).getImm() / 32);
+ const DebugLoc &DL = I.getDebugLoc();
+ MachineInstr *Copy = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::COPY),
+ I.getOperand(0).getReg())
+ .addReg(I.getOperand(1).getReg(), 0, SubReg);
+
+ for (const MachineOperand &MO : Copy->operands()) {
+ const TargetRegisterClass *RC =
+ TRI.getConstrainedRegClassForOperand(MO, MRI);
+ if (!RC)
+ continue;
+ RBI.constrainGenericRegister(MO.getReg(), *RC, MRI);
+ }
I.eraseFromParent();
return true;
}
+bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(MachineInstr &MI) const {
+ MachineBasicBlock *BB = MI.getParent();
+ MachineFunction *MF = BB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
+
+ const unsigned SrcSize = SrcTy.getSizeInBits();
+ if (SrcSize < 32)
+ return false;
+
+ const DebugLoc &DL = MI.getDebugLoc();
+ const RegisterBank *DstBank = RBI.getRegBank(DstReg, MRI, TRI);
+ const unsigned DstSize = DstTy.getSizeInBits();
+ const TargetRegisterClass *DstRC =
+ TRI.getRegClassForSizeOnBank(DstSize, *DstBank, MRI);
+ if (!DstRC)
+ return false;
+
+ ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(DstRC, SrcSize / 8);
+ MachineInstrBuilder MIB =
+ BuildMI(*BB, &MI, DL, TII.get(TargetOpcode::REG_SEQUENCE), DstReg);
+ for (int I = 0, E = MI.getNumOperands() - 1; I != E; ++I) {
+ MachineOperand &Src = MI.getOperand(I + 1);
+ MIB.addReg(Src.getReg(), getUndefRegState(Src.isUndef()));
+ MIB.addImm(SubRegs[I]);
+
+ const TargetRegisterClass *SrcRC
+ = TRI.getConstrainedRegClassForOperand(Src, MRI);
+ if (SrcRC && !RBI.constrainGenericRegister(Src.getReg(), *SrcRC, MRI))
+ return false;
+ }
+
+ if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI))
+ return false;
+
+ MI.eraseFromParent();
+ return true;
+}
+
+bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(MachineInstr &MI) const {
+ MachineBasicBlock *BB = MI.getParent();
+ MachineFunction *MF = BB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ const int NumDst = MI.getNumOperands() - 1;
+
+ MachineOperand &Src = MI.getOperand(NumDst);
+
+ Register SrcReg = Src.getReg();
+ Register DstReg0 = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(DstReg0);
+ LLT SrcTy = MRI.getType(SrcReg);
+
+ const unsigned DstSize = DstTy.getSizeInBits();
+ const unsigned SrcSize = SrcTy.getSizeInBits();
+ const DebugLoc &DL = MI.getDebugLoc();
+ const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, MRI, TRI);
+
+ const TargetRegisterClass *SrcRC =
+ TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank, MRI);
+ if (!SrcRC || !RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI))
+ return false;
+
+ const unsigned SrcFlags = getUndefRegState(Src.isUndef());
+
+ // Note we could have mixed SGPR and VGPR destination banks for an SGPR
+ // source, and this relies on the fact that the same subregister indices are
+ // used for both.
+ ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(SrcRC, DstSize / 8);
+ for (int I = 0, E = NumDst; I != E; ++I) {
+ MachineOperand &Dst = MI.getOperand(I);
+ BuildMI(*BB, &MI, DL, TII.get(TargetOpcode::COPY), Dst.getReg())
+ .addReg(SrcReg, SrcFlags, SubRegs[I]);
+
+ const TargetRegisterClass *DstRC =
+ TRI.getConstrainedRegClassForOperand(Dst, MRI);
+ if (DstRC && !RBI.constrainGenericRegister(Dst.getReg(), *DstRC, MRI))
+ return false;
+ }
+
+ MI.eraseFromParent();
+ return true;
+}
+
bool AMDGPUInstructionSelector::selectG_GEP(MachineInstr &I) const {
- return selectG_ADD(I);
+ return selectG_ADD_SUB(I);
}
bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(MachineInstr &I) const {
@@ -170,47 +522,200 @@ bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(MachineInstr &I) const {
MachineFunction *MF = BB->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
const MachineOperand &MO = I.getOperand(0);
- const TargetRegisterClass *RC =
- TRI.getConstrainedRegClassForOperand(MO, MRI);
- if (RC)
+
+ // FIXME: Interface for getConstrainedRegClassForOperand needs work. The
+ // regbank check here is to know why getConstrainedRegClassForOperand failed.
+ const TargetRegisterClass *RC = TRI.getConstrainedRegClassForOperand(MO, MRI);
+ if ((!RC && !MRI.getRegBankOrNull(MO.getReg())) ||
+ (RC && RBI.constrainGenericRegister(MO.getReg(), *RC, MRI))) {
+ I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
+ return true;
+ }
+
+ return false;
+}
+
+bool AMDGPUInstructionSelector::selectG_INSERT(MachineInstr &I) const {
+ MachineBasicBlock *BB = I.getParent();
+ MachineFunction *MF = BB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ unsigned SubReg = TRI.getSubRegFromChannel(I.getOperand(3).getImm() / 32);
+ DebugLoc DL = I.getDebugLoc();
+ MachineInstr *Ins = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::INSERT_SUBREG))
+ .addDef(I.getOperand(0).getReg())
+ .addReg(I.getOperand(1).getReg())
+ .addReg(I.getOperand(2).getReg())
+ .addImm(SubReg);
+
+ for (const MachineOperand &MO : Ins->operands()) {
+ if (!MO.isReg())
+ continue;
+ if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
+ continue;
+
+ const TargetRegisterClass *RC =
+ TRI.getConstrainedRegClassForOperand(MO, MRI);
+ if (!RC)
+ continue;
RBI.constrainGenericRegister(MO.getReg(), *RC, MRI);
- I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
+ }
+ I.eraseFromParent();
return true;
}
-bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I,
- CodeGenCoverage &CoverageInfo) const {
- unsigned IntrinsicID = I.getOperand(1).getIntrinsicID();
-
+bool AMDGPUInstructionSelector::selectG_INTRINSIC(
+ MachineInstr &I, CodeGenCoverage &CoverageInfo) const {
+ unsigned IntrinsicID = I.getOperand(I.getNumExplicitDefs()).getIntrinsicID();
switch (IntrinsicID) {
- default:
- break;
case Intrinsic::maxnum:
case Intrinsic::minnum:
case Intrinsic::amdgcn_cvt_pkrtz:
return selectImpl(I, CoverageInfo);
-
- case Intrinsic::amdgcn_kernarg_segment_ptr: {
- MachineFunction *MF = I.getParent()->getParent();
+ case Intrinsic::amdgcn_if_break: {
+ MachineBasicBlock *BB = I.getParent();
+ MachineFunction *MF = BB->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
- const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
- const ArgDescriptor *InputPtrReg;
- const TargetRegisterClass *RC;
- const DebugLoc &DL = I.getDebugLoc();
-
- std::tie(InputPtrReg, RC)
- = MFI->getPreloadedValue(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
- if (!InputPtrReg)
- report_fatal_error("missing kernarg segment ptr");
- BuildMI(*I.getParent(), &I, DL, TII.get(AMDGPU::COPY))
+ // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick
+ // SelectionDAG uses for wave32 vs wave64.
+ BuildMI(*BB, &I, I.getDebugLoc(), TII.get(AMDGPU::SI_IF_BREAK))
.add(I.getOperand(0))
- .addReg(MRI.getLiveInVirtReg(InputPtrReg->getRegister()));
+ .add(I.getOperand(2))
+ .add(I.getOperand(3));
+
+ Register DstReg = I.getOperand(0).getReg();
+ Register Src0Reg = I.getOperand(2).getReg();
+ Register Src1Reg = I.getOperand(3).getReg();
+
I.eraseFromParent();
+
+ for (Register Reg : { DstReg, Src0Reg, Src1Reg }) {
+ if (!MRI.getRegClassOrNull(Reg))
+ MRI.setRegClass(Reg, TRI.getWaveMaskRegClass());
+ }
+
return true;
}
+ default:
+ return selectImpl(I, CoverageInfo);
+ }
+}
+
+static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size) {
+ if (Size != 32 && Size != 64)
+ return -1;
+ switch (P) {
+ default:
+ llvm_unreachable("Unknown condition code!");
+ case CmpInst::ICMP_NE:
+ return Size == 32 ? AMDGPU::V_CMP_NE_U32_e64 : AMDGPU::V_CMP_NE_U64_e64;
+ case CmpInst::ICMP_EQ:
+ return Size == 32 ? AMDGPU::V_CMP_EQ_U32_e64 : AMDGPU::V_CMP_EQ_U64_e64;
+ case CmpInst::ICMP_SGT:
+ return Size == 32 ? AMDGPU::V_CMP_GT_I32_e64 : AMDGPU::V_CMP_GT_I64_e64;
+ case CmpInst::ICMP_SGE:
+ return Size == 32 ? AMDGPU::V_CMP_GE_I32_e64 : AMDGPU::V_CMP_GE_I64_e64;
+ case CmpInst::ICMP_SLT:
+ return Size == 32 ? AMDGPU::V_CMP_LT_I32_e64 : AMDGPU::V_CMP_LT_I64_e64;
+ case CmpInst::ICMP_SLE:
+ return Size == 32 ? AMDGPU::V_CMP_LE_I32_e64 : AMDGPU::V_CMP_LE_I64_e64;
+ case CmpInst::ICMP_UGT:
+ return Size == 32 ? AMDGPU::V_CMP_GT_U32_e64 : AMDGPU::V_CMP_GT_U64_e64;
+ case CmpInst::ICMP_UGE:
+ return Size == 32 ? AMDGPU::V_CMP_GE_U32_e64 : AMDGPU::V_CMP_GE_U64_e64;
+ case CmpInst::ICMP_ULT:
+ return Size == 32 ? AMDGPU::V_CMP_LT_U32_e64 : AMDGPU::V_CMP_LT_U64_e64;
+ case CmpInst::ICMP_ULE:
+ return Size == 32 ? AMDGPU::V_CMP_LE_U32_e64 : AMDGPU::V_CMP_LE_U64_e64;
}
- return false;
+}
+
+int AMDGPUInstructionSelector::getS_CMPOpcode(CmpInst::Predicate P,
+ unsigned Size) const {
+ if (Size == 64) {
+ if (!STI.hasScalarCompareEq64())
+ return -1;
+
+ switch (P) {
+ case CmpInst::ICMP_NE:
+ return AMDGPU::S_CMP_LG_U64;
+ case CmpInst::ICMP_EQ:
+ return AMDGPU::S_CMP_EQ_U64;
+ default:
+ return -1;
+ }
+ }
+
+ if (Size != 32)
+ return -1;
+
+ switch (P) {
+ case CmpInst::ICMP_NE:
+ return AMDGPU::S_CMP_LG_U32;
+ case CmpInst::ICMP_EQ:
+ return AMDGPU::S_CMP_EQ_U32;
+ case CmpInst::ICMP_SGT:
+ return AMDGPU::S_CMP_GT_I32;
+ case CmpInst::ICMP_SGE:
+ return AMDGPU::S_CMP_GE_I32;
+ case CmpInst::ICMP_SLT:
+ return AMDGPU::S_CMP_LT_I32;
+ case CmpInst::ICMP_SLE:
+ return AMDGPU::S_CMP_LE_I32;
+ case CmpInst::ICMP_UGT:
+ return AMDGPU::S_CMP_GT_U32;
+ case CmpInst::ICMP_UGE:
+ return AMDGPU::S_CMP_GE_U32;
+ case CmpInst::ICMP_ULT:
+ return AMDGPU::S_CMP_LT_U32;
+ case CmpInst::ICMP_ULE:
+ return AMDGPU::S_CMP_LE_U32;
+ default:
+ llvm_unreachable("Unknown condition code!");
+ }
+}
+
+bool AMDGPUInstructionSelector::selectG_ICMP(MachineInstr &I) const {
+ MachineBasicBlock *BB = I.getParent();
+ MachineFunction *MF = BB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ const DebugLoc &DL = I.getDebugLoc();
+
+ unsigned SrcReg = I.getOperand(2).getReg();
+ unsigned Size = RBI.getSizeInBits(SrcReg, MRI, TRI);
+
+ auto Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
+
+ unsigned CCReg = I.getOperand(0).getReg();
+ if (isSCC(CCReg, MRI)) {
+ int Opcode = getS_CMPOpcode(Pred, Size);
+ if (Opcode == -1)
+ return false;
+ MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode))
+ .add(I.getOperand(2))
+ .add(I.getOperand(3));
+ BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CCReg)
+ .addReg(AMDGPU::SCC);
+ bool Ret =
+ constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI) &&
+ RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32RegClass, MRI);
+ I.eraseFromParent();
+ return Ret;
+ }
+
+ int Opcode = getV_CMPOpcode(Pred, Size);
+ if (Opcode == -1)
+ return false;
+
+ MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode),
+ I.getOperand(0).getReg())
+ .add(I.getOperand(2))
+ .add(I.getOperand(3));
+ RBI.constrainGenericRegister(ICmp->getOperand(0).getReg(),
+ *TRI.getBoolRC(), MRI);
+ bool Ret = constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI);
+ I.eraseFromParent();
+ return Ret;
}
static MachineInstr *
@@ -232,8 +737,7 @@ buildEXP(const TargetInstrInfo &TII, MachineInstr *Insert, unsigned Tgt,
}
bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
- MachineInstr &I,
- CodeGenCoverage &CoverageInfo) const {
+ MachineInstr &I, CodeGenCoverage &CoverageInfo) const {
MachineBasicBlock *BB = I.getParent();
MachineFunction *MF = BB->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
@@ -272,8 +776,72 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
I.eraseFromParent();
return constrainSelectedInstRegOperands(*Exp, TII, TRI, RBI);
}
+ case Intrinsic::amdgcn_end_cf: {
+ // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick
+ // SelectionDAG uses for wave32 vs wave64.
+ BuildMI(*BB, &I, I.getDebugLoc(),
+ TII.get(AMDGPU::SI_END_CF))
+ .add(I.getOperand(1));
+
+ Register Reg = I.getOperand(1).getReg();
+ I.eraseFromParent();
+
+ if (!MRI.getRegClassOrNull(Reg))
+ MRI.setRegClass(Reg, TRI.getWaveMaskRegClass());
+ return true;
}
- return false;
+ default:
+ return selectImpl(I, CoverageInfo);
+ }
+}
+
+bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr &I) const {
+ MachineBasicBlock *BB = I.getParent();
+ MachineFunction *MF = BB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ const DebugLoc &DL = I.getDebugLoc();
+
+ unsigned DstReg = I.getOperand(0).getReg();
+ unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI);
+ assert(Size <= 32 || Size == 64);
+ const MachineOperand &CCOp = I.getOperand(1);
+ unsigned CCReg = CCOp.getReg();
+ if (isSCC(CCReg, MRI)) {
+ unsigned SelectOpcode = Size == 64 ? AMDGPU::S_CSELECT_B64 :
+ AMDGPU::S_CSELECT_B32;
+ MachineInstr *CopySCC = BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
+ .addReg(CCReg);
+
+ // The generic constrainSelectedInstRegOperands doesn't work for the scc register
+ // bank, because it does not cover the register class that we used to represent
+ // for it. So we need to manually set the register class here.
+ if (!MRI.getRegClassOrNull(CCReg))
+ MRI.setRegClass(CCReg, TRI.getConstrainedRegClassForOperand(CCOp, MRI));
+ MachineInstr *Select = BuildMI(*BB, &I, DL, TII.get(SelectOpcode), DstReg)
+ .add(I.getOperand(2))
+ .add(I.getOperand(3));
+
+ bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI) |
+ constrainSelectedInstRegOperands(*CopySCC, TII, TRI, RBI);
+ I.eraseFromParent();
+ return Ret;
+ }
+
+ // Wide VGPR select should have been split in RegBankSelect.
+ if (Size > 32)
+ return false;
+
+ MachineInstr *Select =
+ BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
+ .addImm(0)
+ .add(I.getOperand(3))
+ .addImm(0)
+ .add(I.getOperand(2))
+ .add(I.getOperand(1));
+
+ bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI);
+ I.eraseFromParent();
+ return Ret;
}
bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const {
@@ -281,10 +849,16 @@ bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const {
MachineFunction *MF = BB->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
DebugLoc DL = I.getDebugLoc();
+ unsigned PtrSize = RBI.getSizeInBits(I.getOperand(1).getReg(), MRI, TRI);
+ if (PtrSize != 64) {
+ LLVM_DEBUG(dbgs() << "Unhandled address space\n");
+ return false;
+ }
+
unsigned StoreSize = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI);
unsigned Opcode;
- // FIXME: Select store instruction based on address space
+ // FIXME: Remove this when integers > s32 naturally selected.
switch (StoreSize) {
default:
return false;
@@ -307,7 +881,8 @@ bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const {
.add(I.getOperand(0))
.addImm(0) // offset
.addImm(0) // glc
- .addImm(0); // slc
+ .addImm(0) // slc
+ .addImm(0); // dlc
// Now that we selected an opcode, we need to constrain the register
@@ -318,6 +893,218 @@ bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const {
return Ret;
}
+static int sizeToSubRegIndex(unsigned Size) {
+ switch (Size) {
+ case 32:
+ return AMDGPU::sub0;
+ case 64:
+ return AMDGPU::sub0_sub1;
+ case 96:
+ return AMDGPU::sub0_sub1_sub2;
+ case 128:
+ return AMDGPU::sub0_sub1_sub2_sub3;
+ case 256:
+ return AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7;
+ default:
+ if (Size < 32)
+ return AMDGPU::sub0;
+ if (Size > 256)
+ return -1;
+ return sizeToSubRegIndex(PowerOf2Ceil(Size));
+ }
+}
+
+bool AMDGPUInstructionSelector::selectG_TRUNC(MachineInstr &I) const {
+ MachineBasicBlock *BB = I.getParent();
+ MachineFunction *MF = BB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+
+ unsigned DstReg = I.getOperand(0).getReg();
+ unsigned SrcReg = I.getOperand(1).getReg();
+ const LLT DstTy = MRI.getType(DstReg);
+ const LLT SrcTy = MRI.getType(SrcReg);
+ if (!DstTy.isScalar())
+ return false;
+
+ const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI);
+ const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, MRI, TRI);
+ if (SrcRB != DstRB)
+ return false;
+
+ unsigned DstSize = DstTy.getSizeInBits();
+ unsigned SrcSize = SrcTy.getSizeInBits();
+
+ const TargetRegisterClass *SrcRC
+ = TRI.getRegClassForSizeOnBank(SrcSize, *SrcRB, MRI);
+ const TargetRegisterClass *DstRC
+ = TRI.getRegClassForSizeOnBank(DstSize, *DstRB, MRI);
+
+ if (SrcSize > 32) {
+ int SubRegIdx = sizeToSubRegIndex(DstSize);
+ if (SubRegIdx == -1)
+ return false;
+
+ // Deal with weird cases where the class only partially supports the subreg
+ // index.
+ SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegIdx);
+ if (!SrcRC)
+ return false;
+
+ I.getOperand(1).setSubReg(SubRegIdx);
+ }
+
+ if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
+ !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
+ LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC\n");
+ return false;
+ }
+
+ I.setDesc(TII.get(TargetOpcode::COPY));
+ return true;
+}
+
+/// \returns true if a bitmask for \p Size bits will be an inline immediate.
+static bool shouldUseAndMask(unsigned Size, unsigned &Mask) {
+ Mask = maskTrailingOnes<unsigned>(Size);
+ int SignedMask = static_cast<int>(Mask);
+ return SignedMask >= -16 && SignedMask <= 64;
+}
+
+bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const {
+ bool Signed = I.getOpcode() == AMDGPU::G_SEXT;
+ const DebugLoc &DL = I.getDebugLoc();
+ MachineBasicBlock &MBB = *I.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const unsigned DstReg = I.getOperand(0).getReg();
+ const unsigned SrcReg = I.getOperand(1).getReg();
+
+ const LLT DstTy = MRI.getType(DstReg);
+ const LLT SrcTy = MRI.getType(SrcReg);
+ const LLT S1 = LLT::scalar(1);
+ const unsigned SrcSize = SrcTy.getSizeInBits();
+ const unsigned DstSize = DstTy.getSizeInBits();
+ if (!DstTy.isScalar())
+ return false;
+
+ const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, MRI, TRI);
+
+ if (SrcBank->getID() == AMDGPU::SCCRegBankID) {
+ if (SrcTy != S1 || DstSize > 64) // Invalid
+ return false;
+
+ unsigned Opcode =
+ DstSize > 32 ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
+ const TargetRegisterClass *DstRC =
+ DstSize > 32 ? &AMDGPU::SReg_64RegClass : &AMDGPU::SReg_32RegClass;
+
+ // FIXME: Create an extra copy to avoid incorrectly constraining the result
+ // of the scc producer.
+ unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ BuildMI(MBB, I, DL, TII.get(AMDGPU::COPY), TmpReg)
+ .addReg(SrcReg);
+ BuildMI(MBB, I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
+ .addReg(TmpReg);
+
+ // The instruction operands are backwards from what you would expect.
+ BuildMI(MBB, I, DL, TII.get(Opcode), DstReg)
+ .addImm(0)
+ .addImm(Signed ? -1 : 1);
+ return RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
+ }
+
+ if (SrcBank->getID() == AMDGPU::VCCRegBankID && DstSize <= 32) {
+ if (SrcTy != S1) // Invalid
+ return false;
+
+ MachineInstr *ExtI =
+ BuildMI(MBB, I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
+ .addImm(0) // src0_modifiers
+ .addImm(0) // src0
+ .addImm(0) // src1_modifiers
+ .addImm(Signed ? -1 : 1) // src1
+ .addUse(SrcReg);
+ return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
+ }
+
+ if (I.getOpcode() == AMDGPU::G_ANYEXT)
+ return selectCOPY(I);
+
+ if (SrcBank->getID() == AMDGPU::VGPRRegBankID && DstSize <= 32) {
+ // 64-bit should have been split up in RegBankSelect
+
+ // Try to use an and with a mask if it will save code size.
+ unsigned Mask;
+ if (!Signed && shouldUseAndMask(SrcSize, Mask)) {
+ MachineInstr *ExtI =
+ BuildMI(MBB, I, DL, TII.get(AMDGPU::V_AND_B32_e32), DstReg)
+ .addImm(Mask)
+ .addReg(SrcReg);
+ return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
+ }
+
+ const unsigned BFE = Signed ? AMDGPU::V_BFE_I32 : AMDGPU::V_BFE_U32;
+ MachineInstr *ExtI =
+ BuildMI(MBB, I, DL, TII.get(BFE), DstReg)
+ .addReg(SrcReg)
+ .addImm(0) // Offset
+ .addImm(SrcSize); // Width
+ return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
+ }
+
+ if (SrcBank->getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) {
+ if (!RBI.constrainGenericRegister(SrcReg, AMDGPU::SReg_32RegClass, MRI))
+ return false;
+
+ if (Signed && DstSize == 32 && (SrcSize == 8 || SrcSize == 16)) {
+ const unsigned SextOpc = SrcSize == 8 ?
+ AMDGPU::S_SEXT_I32_I8 : AMDGPU::S_SEXT_I32_I16;
+ BuildMI(MBB, I, DL, TII.get(SextOpc), DstReg)
+ .addReg(SrcReg);
+ return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, MRI);
+ }
+
+ const unsigned BFE64 = Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64;
+ const unsigned BFE32 = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
+
+ // Scalar BFE is encoded as S1[5:0] = offset, S1[22:16]= width.
+ if (DstSize > 32 && SrcSize <= 32) {
+ // We need a 64-bit register source, but the high bits don't matter.
+ unsigned ExtReg
+ = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ unsigned UndefReg
+ = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ BuildMI(MBB, I, DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
+ BuildMI(MBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), ExtReg)
+ .addReg(SrcReg)
+ .addImm(AMDGPU::sub0)
+ .addReg(UndefReg)
+ .addImm(AMDGPU::sub1);
+
+ BuildMI(MBB, I, DL, TII.get(BFE64), DstReg)
+ .addReg(ExtReg)
+ .addImm(SrcSize << 16);
+
+ return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, MRI);
+ }
+
+ unsigned Mask;
+ if (!Signed && shouldUseAndMask(SrcSize, Mask)) {
+ BuildMI(MBB, I, DL, TII.get(AMDGPU::S_AND_B32), DstReg)
+ .addReg(SrcReg)
+ .addImm(Mask);
+ } else {
+ BuildMI(MBB, I, DL, TII.get(BFE32), DstReg)
+ .addReg(SrcReg)
+ .addImm(SrcSize << 16);
+ }
+
+ return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, MRI);
+ }
+
+ return false;
+}
+
bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const {
MachineBasicBlock *BB = I.getParent();
MachineFunction *MF = BB->getParent();
@@ -423,7 +1210,7 @@ void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr &Load,
getAddrModeInfo(*PtrMI, MRI, AddrInfo);
}
-static bool isInstrUniform(const MachineInstr &MI) {
+bool AMDGPUInstructionSelector::isInstrUniform(const MachineInstr &MI) const {
if (!MI.hasOneMemOperand())
return false;
@@ -445,52 +1232,6 @@ static bool isInstrUniform(const MachineInstr &MI) {
return I && I->getMetadata("amdgpu.uniform");
}
-static unsigned getSmrdOpcode(unsigned BaseOpcode, unsigned LoadSize) {
-
- if (LoadSize == 32)
- return BaseOpcode;
-
- switch (BaseOpcode) {
- case AMDGPU::S_LOAD_DWORD_IMM:
- switch (LoadSize) {
- case 64:
- return AMDGPU::S_LOAD_DWORDX2_IMM;
- case 128:
- return AMDGPU::S_LOAD_DWORDX4_IMM;
- case 256:
- return AMDGPU::S_LOAD_DWORDX8_IMM;
- case 512:
- return AMDGPU::S_LOAD_DWORDX16_IMM;
- }
- break;
- case AMDGPU::S_LOAD_DWORD_IMM_ci:
- switch (LoadSize) {
- case 64:
- return AMDGPU::S_LOAD_DWORDX2_IMM_ci;
- case 128:
- return AMDGPU::S_LOAD_DWORDX4_IMM_ci;
- case 256:
- return AMDGPU::S_LOAD_DWORDX8_IMM_ci;
- case 512:
- return AMDGPU::S_LOAD_DWORDX16_IMM_ci;
- }
- break;
- case AMDGPU::S_LOAD_DWORD_SGPR:
- switch (LoadSize) {
- case 64:
- return AMDGPU::S_LOAD_DWORDX2_SGPR;
- case 128:
- return AMDGPU::S_LOAD_DWORDX4_SGPR;
- case 256:
- return AMDGPU::S_LOAD_DWORDX8_SGPR;
- case 512:
- return AMDGPU::S_LOAD_DWORDX16_SGPR;
- }
- break;
- }
- llvm_unreachable("Invalid base smrd opcode or size");
-}
-
bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const {
for (const GEPInfo &GEPInfo : AddrInfo) {
if (!GEPInfo.VgprParts.empty())
@@ -499,125 +1240,77 @@ bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const {
return false;
}
-bool AMDGPUInstructionSelector::selectSMRD(MachineInstr &I,
- ArrayRef<GEPInfo> AddrInfo) const {
-
- if (!I.hasOneMemOperand())
- return false;
-
- if ((*I.memoperands_begin())->getAddrSpace() != AMDGPUAS::CONSTANT_ADDRESS &&
- (*I.memoperands_begin())->getAddrSpace() != AMDGPUAS::CONSTANT_ADDRESS_32BIT)
- return false;
-
- if (!isInstrUniform(I))
- return false;
-
- if (hasVgprParts(AddrInfo))
- return false;
+bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I) const {
+ // TODO: Can/should we insert m0 initialization here for DS instructions and
+ // call the normal selector?
+ return false;
+}
+bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const {
MachineBasicBlock *BB = I.getParent();
MachineFunction *MF = BB->getParent();
- const GCNSubtarget &Subtarget = MF->getSubtarget<GCNSubtarget>();
MachineRegisterInfo &MRI = MF->getRegInfo();
- unsigned DstReg = I.getOperand(0).getReg();
+ MachineOperand &CondOp = I.getOperand(0);
+ Register CondReg = CondOp.getReg();
const DebugLoc &DL = I.getDebugLoc();
- unsigned Opcode;
- unsigned LoadSize = RBI.getSizeInBits(DstReg, MRI, TRI);
-
- if (!AddrInfo.empty() && AddrInfo[0].SgprParts.size() == 1) {
-
- const GEPInfo &GEPInfo = AddrInfo[0];
-
- unsigned PtrReg = GEPInfo.SgprParts[0];
- int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(Subtarget, GEPInfo.Imm);
- if (AMDGPU::isLegalSMRDImmOffset(Subtarget, GEPInfo.Imm)) {
- Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_IMM, LoadSize);
- MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg)
- .addReg(PtrReg)
- .addImm(EncodedImm)
- .addImm(0); // glc
- return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI);
- }
+ unsigned BrOpcode;
+ Register CondPhysReg;
+ const TargetRegisterClass *ConstrainRC;
+
+ // In SelectionDAG, we inspect the IR block for uniformity metadata to decide
+ // whether the branch is uniform when selecting the instruction. In
+ // GlobalISel, we should push that decision into RegBankSelect. Assume for now
+ // RegBankSelect knows what it's doing if the branch condition is scc, even
+ // though it currently does not.
+ if (isSCC(CondReg, MRI)) {
+ CondPhysReg = AMDGPU::SCC;
+ BrOpcode = AMDGPU::S_CBRANCH_SCC1;
+ ConstrainRC = &AMDGPU::SReg_32_XM0RegClass;
+ } else if (isVCC(CondReg, MRI)) {
+ // FIXME: Do we have to insert an and with exec here, like in SelectionDAG?
+ // We sort of know that a VCC producer based on the register bank, that ands
+ // inactive lanes with 0. What if there was a logical operation with vcc
+ // producers in different blocks/with different exec masks?
+ // FIXME: Should scc->vcc copies and with exec?
+ CondPhysReg = TRI.getVCC();
+ BrOpcode = AMDGPU::S_CBRANCH_VCCNZ;
+ ConstrainRC = TRI.getBoolRC();
+ } else
+ return false;
- if (Subtarget.getGeneration() == AMDGPUSubtarget::SEA_ISLANDS &&
- isUInt<32>(EncodedImm)) {
- Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_IMM_ci, LoadSize);
- MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg)
- .addReg(PtrReg)
- .addImm(EncodedImm)
- .addImm(0); // glc
- return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI);
- }
+ if (!MRI.getRegClassOrNull(CondReg))
+ MRI.setRegClass(CondReg, ConstrainRC);
- if (isUInt<32>(GEPInfo.Imm)) {
- Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_SGPR, LoadSize);
- unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
- BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B32), OffsetReg)
- .addImm(GEPInfo.Imm);
-
- MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg)
- .addReg(PtrReg)
- .addReg(OffsetReg)
- .addImm(0); // glc
- return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI);
- }
- }
+ BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CondPhysReg)
+ .addReg(CondReg);
+ BuildMI(*BB, &I, DL, TII.get(BrOpcode))
+ .addMBB(I.getOperand(1).getMBB());
- unsigned PtrReg = I.getOperand(1).getReg();
- Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_IMM, LoadSize);
- MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg)
- .addReg(PtrReg)
- .addImm(0)
- .addImm(0); // glc
- return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI);
+ I.eraseFromParent();
+ return true;
}
-
-bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I) const {
+bool AMDGPUInstructionSelector::selectG_FRAME_INDEX(MachineInstr &I) const {
MachineBasicBlock *BB = I.getParent();
MachineFunction *MF = BB->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
- DebugLoc DL = I.getDebugLoc();
- unsigned DstReg = I.getOperand(0).getReg();
- unsigned PtrReg = I.getOperand(1).getReg();
- unsigned LoadSize = RBI.getSizeInBits(DstReg, MRI, TRI);
- unsigned Opcode;
-
- SmallVector<GEPInfo, 4> AddrInfo;
-
- getAddrModeInfo(I, MRI, AddrInfo);
-
- if (selectSMRD(I, AddrInfo)) {
- I.eraseFromParent();
- return true;
- }
- switch (LoadSize) {
- default:
- llvm_unreachable("Load size not supported\n");
- case 32:
- Opcode = AMDGPU::FLAT_LOAD_DWORD;
- break;
- case 64:
- Opcode = AMDGPU::FLAT_LOAD_DWORDX2;
- break;
- }
+ Register DstReg = I.getOperand(0).getReg();
+ const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI);
+ const bool IsVGPR = DstRB->getID() == AMDGPU::VGPRRegBankID;
+ I.setDesc(TII.get(IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32));
+ if (IsVGPR)
+ I.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true));
- MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode))
- .add(I.getOperand(0))
- .addReg(PtrReg)
- .addImm(0) // offset
- .addImm(0) // glc
- .addImm(0); // slc
-
- bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI);
- I.eraseFromParent();
- return Ret;
+ return RBI.constrainGenericRegister(
+ DstReg, IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass, MRI);
}
bool AMDGPUInstructionSelector::select(MachineInstr &I,
CodeGenCoverage &CoverageInfo) const {
+ if (I.isPHI())
+ return selectPHI(I);
if (!isPreISelGenericOpcode(I.getOpcode())) {
if (I.isCopy())
@@ -626,28 +1319,75 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I,
}
switch (I.getOpcode()) {
- default:
+ case TargetOpcode::G_AND:
+ case TargetOpcode::G_OR:
+ case TargetOpcode::G_XOR:
+ if (selectG_AND_OR_XOR(I))
+ return true;
return selectImpl(I, CoverageInfo);
case TargetOpcode::G_ADD:
- return selectG_ADD(I);
+ case TargetOpcode::G_SUB:
+ if (selectG_ADD_SUB(I))
+ return true;
+ LLVM_FALLTHROUGH;
+ default:
+ return selectImpl(I, CoverageInfo);
case TargetOpcode::G_INTTOPTR:
case TargetOpcode::G_BITCAST:
return selectCOPY(I);
case TargetOpcode::G_CONSTANT:
case TargetOpcode::G_FCONSTANT:
return selectG_CONSTANT(I);
+ case TargetOpcode::G_EXTRACT:
+ return selectG_EXTRACT(I);
+ case TargetOpcode::G_MERGE_VALUES:
+ case TargetOpcode::G_BUILD_VECTOR:
+ case TargetOpcode::G_CONCAT_VECTORS:
+ return selectG_MERGE_VALUES(I);
+ case TargetOpcode::G_UNMERGE_VALUES:
+ return selectG_UNMERGE_VALUES(I);
case TargetOpcode::G_GEP:
return selectG_GEP(I);
case TargetOpcode::G_IMPLICIT_DEF:
return selectG_IMPLICIT_DEF(I);
+ case TargetOpcode::G_INSERT:
+ return selectG_INSERT(I);
case TargetOpcode::G_INTRINSIC:
return selectG_INTRINSIC(I, CoverageInfo);
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
return selectG_INTRINSIC_W_SIDE_EFFECTS(I, CoverageInfo);
+ case TargetOpcode::G_ICMP:
+ if (selectG_ICMP(I))
+ return true;
+ return selectImpl(I, CoverageInfo);
case TargetOpcode::G_LOAD:
- return selectG_LOAD(I);
+ return selectImpl(I, CoverageInfo);
+ case TargetOpcode::G_SELECT:
+ return selectG_SELECT(I);
case TargetOpcode::G_STORE:
+ if (selectImpl(I, CoverageInfo))
+ return true;
return selectG_STORE(I);
+ case TargetOpcode::G_TRUNC:
+ return selectG_TRUNC(I);
+ case TargetOpcode::G_SEXT:
+ case TargetOpcode::G_ZEXT:
+ case TargetOpcode::G_ANYEXT:
+ if (selectG_SZA_EXT(I)) {
+ I.eraseFromParent();
+ return true;
+ }
+
+ return false;
+ case TargetOpcode::G_BRCOND:
+ return selectG_BRCOND(I);
+ case TargetOpcode::G_FRAME_INDEX:
+ return selectG_FRAME_INDEX(I);
+ case TargetOpcode::G_FENCE:
+ // FIXME: Tablegen importer doesn't handle the imm operands correctly, and
+ // is checking for G_CONSTANT
+ I.setDesc(TII.get(AMDGPU::ATOMIC_FENCE));
+ return true;
}
return false;
}
@@ -660,6 +1400,26 @@ AMDGPUInstructionSelector::selectVCSRC(MachineOperand &Root) const {
}
+std::pair<Register, unsigned>
+AMDGPUInstructionSelector::selectVOP3ModsImpl(
+ Register Src, const MachineRegisterInfo &MRI) const {
+ unsigned Mods = 0;
+ MachineInstr *MI = MRI.getVRegDef(Src);
+
+ if (MI && MI->getOpcode() == AMDGPU::G_FNEG) {
+ Src = MI->getOperand(1).getReg();
+ Mods |= SISrcMods::NEG;
+ MI = MRI.getVRegDef(Src);
+ }
+
+ if (MI && MI->getOpcode() == AMDGPU::G_FABS) {
+ Src = MI->getOperand(1).getReg();
+ Mods |= SISrcMods::ABS;
+ }
+
+ return std::make_pair(Src, Mods);
+}
+
///
/// This will select either an SGPR or VGPR operand and will save us from
/// having to write an extra tablegen pattern.
@@ -672,11 +1432,18 @@ AMDGPUInstructionSelector::selectVSRC0(MachineOperand &Root) const {
InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectVOP3Mods0(MachineOperand &Root) const {
+ MachineRegisterInfo &MRI
+ = Root.getParent()->getParent()->getParent()->getRegInfo();
+
+ Register Src;
+ unsigned Mods;
+ std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(), MRI);
+
return {{
- [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
- [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // src0_mods
- [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp
- [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }, // src0_mods
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod
}};
}
InstructionSelector::ComplexRendererFns
@@ -690,8 +1457,274 @@ AMDGPUInstructionSelector::selectVOP3OMods(MachineOperand &Root) const {
InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectVOP3Mods(MachineOperand &Root) const {
+ MachineRegisterInfo &MRI
+ = Root.getParent()->getParent()->getParent()->getRegInfo();
+
+ Register Src;
+ unsigned Mods;
+ std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(), MRI);
+
return {{
- [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
- [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // src_mods
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods
+ }};
+}
+
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectSmrdImm(MachineOperand &Root) const {
+ MachineRegisterInfo &MRI =
+ Root.getParent()->getParent()->getParent()->getRegInfo();
+
+ SmallVector<GEPInfo, 4> AddrInfo;
+ getAddrModeInfo(*Root.getParent(), MRI, AddrInfo);
+
+ if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
+ return None;
+
+ const GEPInfo &GEPInfo = AddrInfo[0];
+
+ if (!AMDGPU::isLegalSMRDImmOffset(STI, GEPInfo.Imm))
+ return None;
+
+ unsigned PtrReg = GEPInfo.SgprParts[0];
+ int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm);
+ return {{
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); }
+ }};
+}
+
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectSmrdImm32(MachineOperand &Root) const {
+ MachineRegisterInfo &MRI =
+ Root.getParent()->getParent()->getParent()->getRegInfo();
+
+ SmallVector<GEPInfo, 4> AddrInfo;
+ getAddrModeInfo(*Root.getParent(), MRI, AddrInfo);
+
+ if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
+ return None;
+
+ const GEPInfo &GEPInfo = AddrInfo[0];
+ unsigned PtrReg = GEPInfo.SgprParts[0];
+ int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm);
+ if (!isUInt<32>(EncodedImm))
+ return None;
+
+ return {{
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); }
+ }};
+}
+
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectSmrdSgpr(MachineOperand &Root) const {
+ MachineInstr *MI = Root.getParent();
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+
+ SmallVector<GEPInfo, 4> AddrInfo;
+ getAddrModeInfo(*MI, MRI, AddrInfo);
+
+ // FIXME: We should shrink the GEP if the offset is known to be <= 32-bits,
+ // then we can select all ptr + 32-bit offsets not just immediate offsets.
+ if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
+ return None;
+
+ const GEPInfo &GEPInfo = AddrInfo[0];
+ if (!GEPInfo.Imm || !isUInt<32>(GEPInfo.Imm))
+ return None;
+
+ // If we make it this far we have a load with an 32-bit immediate offset.
+ // It is OK to select this using a sgpr offset, because we have already
+ // failed trying to select this load into one of the _IMM variants since
+ // the _IMM Patterns are considered before the _SGPR patterns.
+ unsigned PtrReg = GEPInfo.SgprParts[0];
+ unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+ BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), OffsetReg)
+ .addImm(GEPInfo.Imm);
+ return {{
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(OffsetReg); }
+ }};
+}
+
+template <bool Signed>
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectFlatOffsetImpl(MachineOperand &Root) const {
+ MachineInstr *MI = Root.getParent();
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+
+ InstructionSelector::ComplexRendererFns Default = {{
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(Root.getReg()); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // offset
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // slc
+ }};
+
+ if (!STI.hasFlatInstOffsets())
+ return Default;
+
+ const MachineInstr *OpDef = MRI.getVRegDef(Root.getReg());
+ if (!OpDef || OpDef->getOpcode() != AMDGPU::G_GEP)
+ return Default;
+
+ Optional<int64_t> Offset =
+ getConstantVRegVal(OpDef->getOperand(2).getReg(), MRI);
+ if (!Offset.hasValue())
+ return Default;
+
+ unsigned AddrSpace = (*MI->memoperands_begin())->getAddrSpace();
+ if (!TII.isLegalFLATOffset(Offset.getValue(), AddrSpace, Signed))
+ return Default;
+
+ Register BasePtr = OpDef->getOperand(1).getReg();
+
+ return {{
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(BasePtr); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset.getValue()); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // slc
+ }};
+}
+
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectFlatOffset(MachineOperand &Root) const {
+ return selectFlatOffsetImpl<false>(Root);
+}
+
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectFlatOffsetSigned(MachineOperand &Root) const {
+ return selectFlatOffsetImpl<true>(Root);
+}
+
+// FIXME: Implement
+static bool signBitIsZero(const MachineOperand &Op,
+ const MachineRegisterInfo &MRI) {
+ return false;
+}
+
+static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) {
+ auto PSV = PtrInfo.V.dyn_cast<const PseudoSourceValue *>();
+ return PSV && PSV->isStack();
+}
+
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const {
+ MachineInstr *MI = Root.getParent();
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineFunction *MF = MBB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ const SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>();
+
+ int64_t Offset = 0;
+ if (mi_match(Root.getReg(), MRI, m_ICst(Offset))) {
+ Register HighBits = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+
+ // TODO: Should this be inside the render function? The iterator seems to
+ // move.
+ BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
+ HighBits)
+ .addImm(Offset & ~4095);
+
+ return {{[=](MachineInstrBuilder &MIB) { // rsrc
+ MIB.addReg(Info->getScratchRSrcReg());
+ },
+ [=](MachineInstrBuilder &MIB) { // vaddr
+ MIB.addReg(HighBits);
+ },
+ [=](MachineInstrBuilder &MIB) { // soffset
+ const MachineMemOperand *MMO = *MI->memoperands_begin();
+ const MachinePointerInfo &PtrInfo = MMO->getPointerInfo();
+
+ Register SOffsetReg = isStackPtrRelative(PtrInfo)
+ ? Info->getStackPtrOffsetReg()
+ : Info->getScratchWaveOffsetReg();
+ MIB.addReg(SOffsetReg);
+ },
+ [=](MachineInstrBuilder &MIB) { // offset
+ MIB.addImm(Offset & 4095);
+ }}};
+ }
+
+ assert(Offset == 0);
+
+ // Try to fold a frame index directly into the MUBUF vaddr field, and any
+ // offsets.
+ Optional<int> FI;
+ Register VAddr = Root.getReg();
+ if (const MachineInstr *RootDef = MRI.getVRegDef(Root.getReg())) {
+ if (isBaseWithConstantOffset(Root, MRI)) {
+ const MachineOperand &LHS = RootDef->getOperand(1);
+ const MachineOperand &RHS = RootDef->getOperand(2);
+ const MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
+ const MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
+ if (LHSDef && RHSDef) {
+ int64_t PossibleOffset =
+ RHSDef->getOperand(1).getCImm()->getSExtValue();
+ if (SIInstrInfo::isLegalMUBUFImmOffset(PossibleOffset) &&
+ (!STI.privateMemoryResourceIsRangeChecked() ||
+ signBitIsZero(LHS, MRI))) {
+ if (LHSDef->getOpcode() == AMDGPU::G_FRAME_INDEX)
+ FI = LHSDef->getOperand(1).getIndex();
+ else
+ VAddr = LHS.getReg();
+ Offset = PossibleOffset;
+ }
+ }
+ } else if (RootDef->getOpcode() == AMDGPU::G_FRAME_INDEX) {
+ FI = RootDef->getOperand(1).getIndex();
+ }
+ }
+
+ // If we don't know this private access is a local stack object, it needs to
+ // be relative to the entry point's scratch wave offset register.
+ // TODO: Should split large offsets that don't fit like above.
+ // TODO: Don't use scratch wave offset just because the offset didn't fit.
+ Register SOffset = FI.hasValue() ? Info->getStackPtrOffsetReg()
+ : Info->getScratchWaveOffsetReg();
+
+ return {{[=](MachineInstrBuilder &MIB) { // rsrc
+ MIB.addReg(Info->getScratchRSrcReg());
+ },
+ [=](MachineInstrBuilder &MIB) { // vaddr
+ if (FI.hasValue())
+ MIB.addFrameIndex(FI.getValue());
+ else
+ MIB.addReg(VAddr);
+ },
+ [=](MachineInstrBuilder &MIB) { // soffset
+ MIB.addReg(SOffset);
+ },
+ [=](MachineInstrBuilder &MIB) { // offset
+ MIB.addImm(Offset);
+ }}};
+}
+
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectMUBUFScratchOffset(
+ MachineOperand &Root) const {
+ MachineInstr *MI = Root.getParent();
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+
+ int64_t Offset = 0;
+ if (!mi_match(Root.getReg(), MRI, m_ICst(Offset)) ||
+ !SIInstrInfo::isLegalMUBUFImmOffset(Offset))
+ return {};
+
+ const MachineFunction *MF = MBB->getParent();
+ const SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>();
+ const MachineMemOperand *MMO = *MI->memoperands_begin();
+ const MachinePointerInfo &PtrInfo = MMO->getPointerInfo();
+
+ Register SOffsetReg = isStackPtrRelative(PtrInfo)
+ ? Info->getStackPtrOffsetReg()
+ : Info->getScratchWaveOffsetReg();
+ return {{
+ [=](MachineInstrBuilder &MIB) {
+ MIB.addReg(Info->getScratchRSrcReg());
+ }, // rsrc
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(SOffsetReg); }, // soffset
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); } // offset
}};
}
diff --git a/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index 449431adc561..4f489ddfb23d 100644
--- a/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -1,9 +1,8 @@
//===- AMDGPUInstructionSelector --------------------------------*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -18,7 +17,9 @@
#include "AMDGPUArgumentUsageInfo.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/Register.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
+#include "llvm/IR/InstrTypes.h"
namespace {
#define GET_GLOBALISEL_PREDICATE_BITSET
@@ -58,24 +59,45 @@ private:
GEPInfo(const MachineInstr &GEP) : GEP(GEP), Imm(0) { }
};
+ bool isInstrUniform(const MachineInstr &MI) const;
+ bool isVCC(Register Reg, const MachineRegisterInfo &MRI) const;
+
/// tblgen-erated 'select' implementation.
bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
- MachineOperand getSubOperand64(MachineOperand &MO, unsigned SubIdx) const;
+ MachineOperand getSubOperand64(MachineOperand &MO,
+ const TargetRegisterClass &SubRC,
+ unsigned SubIdx) const;
bool selectCOPY(MachineInstr &I) const;
+ bool selectPHI(MachineInstr &I) const;
+ bool selectG_TRUNC(MachineInstr &I) const;
+ bool selectG_SZA_EXT(MachineInstr &I) const;
bool selectG_CONSTANT(MachineInstr &I) const;
- bool selectG_ADD(MachineInstr &I) const;
+ bool selectG_AND_OR_XOR(MachineInstr &I) const;
+ bool selectG_ADD_SUB(MachineInstr &I) const;
+ bool selectG_EXTRACT(MachineInstr &I) const;
+ bool selectG_MERGE_VALUES(MachineInstr &I) const;
+ bool selectG_UNMERGE_VALUES(MachineInstr &I) const;
bool selectG_GEP(MachineInstr &I) const;
bool selectG_IMPLICIT_DEF(MachineInstr &I) const;
+ bool selectG_INSERT(MachineInstr &I) const;
bool selectG_INTRINSIC(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
bool selectG_INTRINSIC_W_SIDE_EFFECTS(MachineInstr &I,
CodeGenCoverage &CoverageInfo) const;
+ int getS_CMPOpcode(CmpInst::Predicate P, unsigned Size) const;
+ bool selectG_ICMP(MachineInstr &I) const;
bool hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const;
void getAddrModeInfo(const MachineInstr &Load, const MachineRegisterInfo &MRI,
SmallVectorImpl<GEPInfo> &AddrInfo) const;
bool selectSMRD(MachineInstr &I, ArrayRef<GEPInfo> AddrInfo) const;
bool selectG_LOAD(MachineInstr &I) const;
+ bool selectG_SELECT(MachineInstr &I) const;
bool selectG_STORE(MachineInstr &I) const;
+ bool selectG_BRCOND(MachineInstr &I) const;
+ bool selectG_FRAME_INDEX(MachineInstr &I) const;
+
+ std::pair<Register, unsigned>
+ selectVOP3ModsImpl(Register Src, const MachineRegisterInfo &MRI) const;
InstructionSelector::ComplexRendererFns
selectVCSRC(MachineOperand &Root) const;
@@ -90,6 +112,27 @@ private:
InstructionSelector::ComplexRendererFns
selectVOP3Mods(MachineOperand &Root) const;
+ InstructionSelector::ComplexRendererFns
+ selectSmrdImm(MachineOperand &Root) const;
+ InstructionSelector::ComplexRendererFns
+ selectSmrdImm32(MachineOperand &Root) const;
+ InstructionSelector::ComplexRendererFns
+ selectSmrdSgpr(MachineOperand &Root) const;
+
+ template <bool Signed>
+ InstructionSelector::ComplexRendererFns
+ selectFlatOffsetImpl(MachineOperand &Root) const;
+ InstructionSelector::ComplexRendererFns
+ selectFlatOffset(MachineOperand &Root) const;
+
+ InstructionSelector::ComplexRendererFns
+ selectFlatOffsetSigned(MachineOperand &Root) const;
+
+ InstructionSelector::ComplexRendererFns
+ selectMUBUFScratchOffen(MachineOperand &Root) const;
+ InstructionSelector::ComplexRendererFns
+ selectMUBUFScratchOffset(MachineOperand &Root) const;
+
const SIInstrInfo &TII;
const SIRegisterInfo &TRI;
const AMDGPURegisterBankInfo &RBI;
diff --git a/lib/Target/AMDGPU/AMDGPUInstructions.td b/lib/Target/AMDGPU/AMDGPUInstructions.td
index eb8f2002ff2d..61bc415c839d 100644
--- a/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -1,9 +1,8 @@
//===-- AMDGPUInstructions.td - Common instruction defs ---*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -12,6 +11,18 @@
//
//===----------------------------------------------------------------------===//
+class AddressSpacesImpl {
+ int Flat = 0;
+ int Global = 1;
+ int Region = 2;
+ int Local = 3;
+ int Constant = 4;
+ int Private = 5;
+}
+
+def AddrSpaces : AddressSpacesImpl;
+
+
class AMDGPUInst <dag outs, dag ins, string asm = "",
list<dag> pattern = []> : Instruction {
field bit isRegisterLoad = 0;
@@ -66,17 +77,15 @@ class ILFormat<dag outs, dag ins, string asmstr, list<dag> pattern>
def TruePredicate : Predicate<"true">;
-// Exists to help track down where SubtargetPredicate isn't set rather
-// than letting tablegen crash with an unhelpful error.
-def InvalidPred : Predicate<"predicate not set on instruction or pattern">;
-
class PredicateControl {
- Predicate SubtargetPredicate = InvalidPred;
+ Predicate SubtargetPredicate = TruePredicate;
list<Predicate> AssemblerPredicates = [];
Predicate AssemblerPredicate = TruePredicate;
+ Predicate WaveSizePredicate = TruePredicate;
list<Predicate> OtherPredicates = [];
list<Predicate> Predicates = !listconcat([SubtargetPredicate,
- AssemblerPredicate],
+ AssemblerPredicate,
+ WaveSizePredicate],
AssemblerPredicates,
OtherPredicates);
}
@@ -326,6 +335,10 @@ def TEX_SHADOW_ARRAY : PatLeaf<
// Load/Store Pattern Fragments
//===----------------------------------------------------------------------===//
+class AddressSpaceList<list<int> AS> {
+ list<int> AddrSpaces = AS;
+}
+
class Aligned8Bytes <dag ops, dag frag> : PatFrag <ops, frag, [{
return cast<MemSDNode>(N)->getAlignment() % 8 == 0;
}]>;
@@ -344,21 +357,25 @@ class StoreHi16<SDPatternOperator op> : PatFrag <
(ops node:$value, node:$ptr), (op (srl node:$value, (i32 16)), node:$ptr)
>;
-class PrivateAddress : CodePatPred<[{
- return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS;
-}]>;
+def LoadAddress_constant : AddressSpaceList<[ AddrSpaces.Constant ]>;
+def LoadAddress_global : AddressSpaceList<[ AddrSpaces.Global, AddrSpaces.Constant ]>;
+def StoreAddress_global : AddressSpaceList<[ AddrSpaces.Global ]>;
-class ConstantAddress : CodePatPred<[{
- return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS;
-}]>;
+def LoadAddress_flat : AddressSpaceList<[ AddrSpaces.Flat,
+ AddrSpaces.Global,
+ AddrSpaces.Constant ]>;
+def StoreAddress_flat : AddressSpaceList<[ AddrSpaces.Flat, AddrSpaces.Global ]>;
+
+def LoadAddress_private : AddressSpaceList<[ AddrSpaces.Private ]>;
+def StoreAddress_private : AddressSpaceList<[ AddrSpaces.Private ]>;
+
+def LoadAddress_local : AddressSpaceList<[ AddrSpaces.Local ]>;
+def StoreAddress_local : AddressSpaceList<[ AddrSpaces.Local ]>;
+
+def LoadAddress_region : AddressSpaceList<[ AddrSpaces.Region ]>;
+def StoreAddress_region : AddressSpaceList<[ AddrSpaces.Region ]>;
-class LocalAddress : CodePatPred<[{
- return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
-}]>;
-class GlobalAddress : CodePatPred<[{
- return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
-}]>;
class GlobalLoadAddress : CodePatPred<[{
auto AS = cast<MemSDNode>(N)->getAddressSpace();
@@ -372,86 +389,126 @@ class FlatLoadAddress : CodePatPred<[{
AS == AMDGPUAS::CONSTANT_ADDRESS;
}]>;
-class FlatStoreAddress : CodePatPred<[{
- const auto AS = cast<MemSDNode>(N)->getAddressSpace();
- return AS == AMDGPUAS::FLAT_ADDRESS ||
- AS == AMDGPUAS::GLOBAL_ADDRESS;
+class GlobalAddress : CodePatPred<[{
+ return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
}]>;
-class AZExtLoadBase <SDPatternOperator ld_node>: PatFrag<(ops node:$ptr),
- (ld_node node:$ptr), [{
- LoadSDNode *L = cast<LoadSDNode>(N);
- return L->getExtensionType() == ISD::ZEXTLOAD ||
- L->getExtensionType() == ISD::EXTLOAD;
+class PrivateAddress : CodePatPred<[{
+ return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS;
}]>;
-def az_extload : AZExtLoadBase <unindexedload>;
-
-def az_extloadi8 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
- return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i8;
+class LocalAddress : CodePatPred<[{
+ return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
}]>;
-def az_extloadi16 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
- return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i16;
+class RegionAddress : CodePatPred<[{
+ return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::REGION_ADDRESS;
}]>;
-def az_extloadi32 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
- return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i32;
+class FlatStoreAddress : CodePatPred<[{
+ const auto AS = cast<MemSDNode>(N)->getAddressSpace();
+ return AS == AMDGPUAS::FLAT_ADDRESS ||
+ AS == AMDGPUAS::GLOBAL_ADDRESS;
}]>;
-class PrivateLoad <SDPatternOperator op> : LoadFrag <op>, PrivateAddress;
+// TODO: Remove these when stores to new PatFrag format.
class PrivateStore <SDPatternOperator op> : StoreFrag <op>, PrivateAddress;
-
-class LocalLoad <SDPatternOperator op> : LoadFrag <op>, LocalAddress;
class LocalStore <SDPatternOperator op> : StoreFrag <op>, LocalAddress;
-
-class GlobalLoad <SDPatternOperator op> : LoadFrag<op>, GlobalLoadAddress;
+class RegionStore <SDPatternOperator op> : StoreFrag <op>, RegionAddress;
class GlobalStore <SDPatternOperator op> : StoreFrag<op>, GlobalAddress;
-
-class FlatLoad <SDPatternOperator op> : LoadFrag <op>, FlatLoadAddress;
class FlatStore <SDPatternOperator op> : StoreFrag <op>, FlatStoreAddress;
-class ConstantLoad <SDPatternOperator op> : LoadFrag <op>, ConstantAddress;
+foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in {
+let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in {
-def load_private : PrivateLoad <load>;
-def az_extloadi8_private : PrivateLoad <az_extloadi8>;
-def sextloadi8_private : PrivateLoad <sextloadi8>;
-def az_extloadi16_private : PrivateLoad <az_extloadi16>;
-def sextloadi16_private : PrivateLoad <sextloadi16>;
+def load_#as : PatFrag<(ops node:$ptr), (unindexedload node:$ptr)> {
+ let IsLoad = 1;
+ let IsNonExtLoad = 1;
+}
-def store_private : PrivateStore <store>;
-def truncstorei8_private : PrivateStore<truncstorei8>;
-def truncstorei16_private : PrivateStore <truncstorei16>;
-def store_hi16_private : StoreHi16 <truncstorei16>, PrivateAddress;
-def truncstorei8_hi16_private : StoreHi16<truncstorei8>, PrivateAddress;
+def extloadi8_#as : PatFrag<(ops node:$ptr), (extload node:$ptr)> {
+ let IsLoad = 1;
+ let MemoryVT = i8;
+}
+
+def extloadi16_#as : PatFrag<(ops node:$ptr), (extload node:$ptr)> {
+ let IsLoad = 1;
+ let MemoryVT = i16;
+}
+def sextloadi8_#as : PatFrag<(ops node:$ptr), (sextload node:$ptr)> {
+ let IsLoad = 1;
+ let MemoryVT = i8;
+}
+
+def sextloadi16_#as : PatFrag<(ops node:$ptr), (sextload node:$ptr)> {
+ let IsLoad = 1;
+ let MemoryVT = i16;
+}
+
+def zextloadi8_#as : PatFrag<(ops node:$ptr), (zextload node:$ptr)> {
+ let IsLoad = 1;
+ let MemoryVT = i8;
+}
+
+def zextloadi16_#as : PatFrag<(ops node:$ptr), (zextload node:$ptr)> {
+ let IsLoad = 1;
+ let MemoryVT = i16;
+}
+
+def atomic_load_32_#as : PatFrag<(ops node:$ptr), (atomic_load_32 node:$ptr)> {
+ let IsAtomic = 1;
+ let MemoryVT = i32;
+}
-def load_global : GlobalLoad <load>;
-def sextloadi8_global : GlobalLoad <sextloadi8>;
-def az_extloadi8_global : GlobalLoad <az_extloadi8>;
-def sextloadi16_global : GlobalLoad <sextloadi16>;
-def az_extloadi16_global : GlobalLoad <az_extloadi16>;
-def atomic_load_global : GlobalLoad<atomic_load>;
+def atomic_load_64_#as : PatFrag<(ops node:$ptr), (atomic_load_64 node:$ptr)> {
+ let IsAtomic = 1;
+ let MemoryVT = i64;
+}
+
+def store_#as : PatFrag<(ops node:$val, node:$ptr),
+ (unindexedstore node:$val, node:$ptr)> {
+ let IsStore = 1;
+ let IsTruncStore = 0;
+}
+
+// truncstore fragments.
+def truncstore_#as : PatFrag<(ops node:$val, node:$ptr),
+ (unindexedstore node:$val, node:$ptr)> {
+ let IsStore = 1;
+ let IsTruncStore = 1;
+}
+
+// TODO: We don't really need the truncstore here. We can use
+// unindexedstore with MemoryVT directly, which will save an
+// unnecessary check that the memory size is less than the value type
+// in the generated matcher table.
+def truncstorei8_#as : PatFrag<(ops node:$val, node:$ptr),
+ (truncstore node:$val, node:$ptr)> {
+ let IsStore = 1;
+ let MemoryVT = i8;
+}
+
+def truncstorei16_#as : PatFrag<(ops node:$val, node:$ptr),
+ (truncstore node:$val, node:$ptr)> {
+ let IsStore = 1;
+ let MemoryVT = i16;
+}
+
+defm atomic_store_#as : binary_atomic_op<atomic_store>;
+
+} // End let AddressSpaces = ...
+} // End foreach AddrSpace
+
+
+def store_hi16_private : StoreHi16 <truncstorei16>, PrivateAddress;
+def truncstorei8_hi16_private : StoreHi16<truncstorei8>, PrivateAddress;
-def store_global : GlobalStore <store>;
-def truncstorei8_global : GlobalStore <truncstorei8>;
-def truncstorei16_global : GlobalStore <truncstorei16>;
def store_atomic_global : GlobalStore<atomic_store>;
def truncstorei8_hi16_global : StoreHi16 <truncstorei8>, GlobalAddress;
def truncstorei16_hi16_global : StoreHi16 <truncstorei16>, GlobalAddress;
-def load_local : LocalLoad <load>;
-def az_extloadi8_local : LocalLoad <az_extloadi8>;
-def sextloadi8_local : LocalLoad <sextloadi8>;
-def az_extloadi16_local : LocalLoad <az_extloadi16>;
-def sextloadi16_local : LocalLoad <sextloadi16>;
-def atomic_load_32_local : LocalLoad<atomic_load_32>;
-def atomic_load_64_local : LocalLoad<atomic_load_64>;
-
-def store_local : LocalStore <store>;
-def truncstorei8_local : LocalStore <truncstorei8>;
-def truncstorei16_local : LocalStore <truncstorei16>;
def store_local_hi16 : StoreHi16 <truncstorei16>, LocalAddress;
def truncstorei8_local_hi16 : StoreHi16<truncstorei8>, LocalAddress;
def atomic_store_local : LocalStore <atomic_store>;
@@ -472,34 +529,24 @@ def store_align16_local : Aligned16Bytes <
(ops node:$val, node:$ptr), (store_local node:$val, node:$ptr)
>;
-def load_flat : FlatLoad <load>;
-def az_extloadi8_flat : FlatLoad <az_extloadi8>;
-def sextloadi8_flat : FlatLoad <sextloadi8>;
-def az_extloadi16_flat : FlatLoad <az_extloadi16>;
-def sextloadi16_flat : FlatLoad <sextloadi16>;
-def atomic_load_flat : FlatLoad<atomic_load>;
-
-def store_flat : FlatStore <store>;
-def truncstorei8_flat : FlatStore <truncstorei8>;
-def truncstorei16_flat : FlatStore <truncstorei16>;
def atomic_store_flat : FlatStore <atomic_store>;
def truncstorei8_hi16_flat : StoreHi16<truncstorei8>, FlatStoreAddress;
def truncstorei16_hi16_flat : StoreHi16<truncstorei16>, FlatStoreAddress;
-def constant_load : ConstantLoad<load>;
-def sextloadi8_constant : ConstantLoad <sextloadi8>;
-def az_extloadi8_constant : ConstantLoad <az_extloadi8>;
-def sextloadi16_constant : ConstantLoad <sextloadi16>;
-def az_extloadi16_constant : ConstantLoad <az_extloadi16>;
-
-
class local_binary_atomic_op<SDNode atomic_op> :
PatFrag<(ops node:$ptr, node:$value),
(atomic_op node:$ptr, node:$value), [{
return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
}]>;
+class region_binary_atomic_op<SDNode atomic_op> :
+ PatFrag<(ops node:$ptr, node:$value),
+ (atomic_op node:$ptr, node:$value), [{
+ return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::REGION_ADDRESS;
+}]>;
+
+
def atomic_swap_local : local_binary_atomic_op<atomic_swap>;
def atomic_load_add_local : local_binary_atomic_op<atomic_load_add>;
def atomic_load_sub_local : local_binary_atomic_op<atomic_load_sub>;
@@ -524,13 +571,22 @@ class AtomicCmpSwapLocal <SDNode cmp_swap_node> : PatFrag<
return AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
}]>;
+class AtomicCmpSwapRegion <SDNode cmp_swap_node> : PatFrag<
+ (ops node:$ptr, node:$cmp, node:$swap),
+ (cmp_swap_node node:$ptr, node:$cmp, node:$swap), [{
+ AtomicSDNode *AN = cast<AtomicSDNode>(N);
+ return AN->getAddressSpace() == AMDGPUAS::REGION_ADDRESS;
+}]>;
+
def atomic_cmp_swap_local : AtomicCmpSwapLocal <atomic_cmp_swap>;
+class global_binary_atomic_op_frag<SDNode atomic_op> : PatFrag<
+ (ops node:$ptr, node:$value),
+ (atomic_op node:$ptr, node:$value),
+ [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;}]>;
+
multiclass global_binary_atomic_op<SDNode atomic_op> {
- def "" : PatFrag<
- (ops node:$ptr, node:$value),
- (atomic_op node:$ptr, node:$value),
- [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;}]>;
+ def "" : global_binary_atomic_op_frag<atomic_op>;
def _noret : PatFrag<
(ops node:$ptr, node:$value),
@@ -585,7 +641,6 @@ int TWO_PI_INV = 0x3e22f983;
int FP_UINT_MAX_PLUS_1 = 0x4f800000; // 1 << 32 in floating point encoding
int FP16_ONE = 0x3C00;
int FP16_NEG_ONE = 0xBC00;
-int V2FP16_ONE = 0x3C003C00;
int FP32_ONE = 0x3f800000;
int FP32_NEG_ONE = 0xbf800000;
int FP64_ONE = 0x3ff0000000000000;
@@ -626,9 +681,7 @@ class Extract_Element <ValueType sub_type, ValueType vec_type, int sub_idx,
: AMDGPUPat<
(sub_type (extractelt vec_type:$src, sub_idx)),
(EXTRACT_SUBREG $src, sub_reg)
-> {
- let SubtargetPredicate = TruePredicate;
-}
+>;
/* Insert element pattern */
class Insert_Element <ValueType elem_type, ValueType vec_type,
@@ -636,9 +689,7 @@ class Insert_Element <ValueType elem_type, ValueType vec_type,
: AMDGPUPat <
(insertelt vec_type:$vec, elem_type:$elem, sub_idx),
(INSERT_SUBREG $vec, $elem, sub_reg)
-> {
- let SubtargetPredicate = TruePredicate;
-}
+>;
// XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer
// can handle COPY instructions.
@@ -811,7 +862,7 @@ multiclass IntMed3Pat<Instruction med3Inst,
SDPatternOperator max_oneuse,
ValueType vt = i32> {
- // This matches 16 permutations of
+ // This matches 16 permutations of
// min(max(a, b), max(min(a, b), c))
def : AMDGPUPat <
(min (max_oneuse vt:$src0, vt:$src1),
@@ -819,7 +870,7 @@ multiclass IntMed3Pat<Instruction med3Inst,
(med3Inst vt:$src0, vt:$src1, vt:$src2)
>;
- // This matches 16 permutations of
+ // This matches 16 permutations of
// max(min(x, y), min(max(x, y), z))
def : AMDGPUPat <
(max (min_oneuse vt:$src0, vt:$src1),
@@ -827,7 +878,7 @@ multiclass IntMed3Pat<Instruction med3Inst,
(med3Inst $src0, $src1, $src2)
>;
}
-
+
// Special conversion patterns
def cvt_rpi_i32_f32 : PatFrag <
diff --git a/lib/Target/AMDGPU/AMDGPUIntrinsicInfo.cpp b/lib/Target/AMDGPU/AMDGPUIntrinsicInfo.cpp
deleted file mode 100644
index 02108ca3ddd7..000000000000
--- a/lib/Target/AMDGPU/AMDGPUIntrinsicInfo.cpp
+++ /dev/null
@@ -1,103 +0,0 @@
-//===- AMDGPUIntrinsicInfo.cpp - AMDGPU Intrinsic Information ---*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-//
-/// \file
-/// AMDGPU Implementation of the IntrinsicInfo class.
-//
-//===-----------------------------------------------------------------------===//
-
-#include "AMDGPUIntrinsicInfo.h"
-#include "AMDGPUSubtarget.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/Module.h"
-
-using namespace llvm;
-
-AMDGPUIntrinsicInfo::AMDGPUIntrinsicInfo()
- : TargetIntrinsicInfo() {}
-
-static const char *const IntrinsicNameTable[] = {
-#define GET_INTRINSIC_NAME_TABLE
-#include "AMDGPUGenIntrinsicImpl.inc"
-#undef GET_INTRINSIC_NAME_TABLE
-};
-
-namespace {
-#define GET_INTRINSIC_ATTRIBUTES
-#include "AMDGPUGenIntrinsicImpl.inc"
-#undef GET_INTRINSIC_ATTRIBUTES
-}
-
-StringRef AMDGPUIntrinsicInfo::getName(unsigned IntrID,
- ArrayRef<Type *> Tys) const {
- if (IntrID < Intrinsic::num_intrinsics)
- return StringRef();
-
- assert(IntrID < SIIntrinsic::num_AMDGPU_intrinsics &&
- "Invalid intrinsic ID");
-
- return IntrinsicNameTable[IntrID - Intrinsic::num_intrinsics];
-}
-
-std::string AMDGPUIntrinsicInfo::getName(unsigned IntrID, Type **Tys,
- unsigned NumTys) const {
- return getName(IntrID, makeArrayRef(Tys, NumTys)).str();
-}
-
-FunctionType *AMDGPUIntrinsicInfo::getType(LLVMContext &Context, unsigned ID,
- ArrayRef<Type*> Tys) const {
- // FIXME: Re-use Intrinsic::getType machinery
- llvm_unreachable("unhandled intrinsic");
-}
-
-unsigned AMDGPUIntrinsicInfo::lookupName(const char *NameData,
- unsigned Len) const {
- StringRef Name(NameData, Len);
- if (!Name.startswith("llvm."))
- return 0; // All intrinsics start with 'llvm.'
-
- // Look for a name match in our table. If the intrinsic is not overloaded,
- // require an exact match. If it is overloaded, require a prefix match. The
- // AMDGPU enum enum starts at Intrinsic::num_intrinsics.
- int Idx = Intrinsic::lookupLLVMIntrinsicByName(IntrinsicNameTable, Name);
- if (Idx >= 0) {
- bool IsPrefixMatch = Name.size() > strlen(IntrinsicNameTable[Idx]);
- return IsPrefixMatch == isOverloaded(Idx + 1)
- ? Intrinsic::num_intrinsics + Idx
- : 0;
- }
-
- return 0;
-}
-
-bool AMDGPUIntrinsicInfo::isOverloaded(unsigned id) const {
-// Overload Table
-#define GET_INTRINSIC_OVERLOAD_TABLE
-#include "AMDGPUGenIntrinsicImpl.inc"
-#undef GET_INTRINSIC_OVERLOAD_TABLE
-}
-
-Function *AMDGPUIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID,
- ArrayRef<Type *> Tys) const {
- FunctionType *FTy = getType(M->getContext(), IntrID, Tys);
- Function *F
- = cast<Function>(M->getOrInsertFunction(getName(IntrID, Tys), FTy));
-
- AttributeList AS =
- getAttributes(M->getContext(), static_cast<SIIntrinsic::ID>(IntrID));
- F->setAttributes(AS);
- return F;
-}
-
-Function *AMDGPUIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID,
- Type **Tys,
- unsigned NumTys) const {
- return getDeclaration(M, IntrID, makeArrayRef(Tys, NumTys));
-}
diff --git a/lib/Target/AMDGPU/AMDGPUIntrinsicInfo.h b/lib/Target/AMDGPU/AMDGPUIntrinsicInfo.h
deleted file mode 100644
index a1a094dded23..000000000000
--- a/lib/Target/AMDGPU/AMDGPUIntrinsicInfo.h
+++ /dev/null
@@ -1,58 +0,0 @@
-//===- AMDGPUIntrinsicInfo.h - AMDGPU Intrinsic Information ------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-//
-/// \file
-/// Interface for the AMDGPU Implementation of the Intrinsic Info class.
-//
-//===-----------------------------------------------------------------------===//
-#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUINTRINSICINFO_H
-#define LLVM_LIB_TARGET_AMDGPU_AMDGPUINTRINSICINFO_H
-
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/Target/TargetIntrinsicInfo.h"
-
-namespace llvm {
-class TargetMachine;
-
-namespace SIIntrinsic {
-enum ID {
- last_non_AMDGPU_intrinsic = Intrinsic::num_intrinsics - 1,
-#define GET_INTRINSIC_ENUM_VALUES
-#include "AMDGPUGenIntrinsicEnums.inc"
-#undef GET_INTRINSIC_ENUM_VALUES
- , num_AMDGPU_intrinsics
-};
-
-} // end namespace AMDGPUIntrinsic
-
-class AMDGPUIntrinsicInfo final : public TargetIntrinsicInfo {
-public:
- AMDGPUIntrinsicInfo();
-
- StringRef getName(unsigned IntrId, ArrayRef<Type *> Tys = None) const;
-
- std::string getName(unsigned IntrId, Type **Tys = nullptr,
- unsigned NumTys = 0) const override;
-
- unsigned lookupName(const char *Name, unsigned Len) const override;
- bool isOverloaded(unsigned IID) const override;
- Function *getDeclaration(Module *M, unsigned ID,
- Type **Tys = nullptr,
- unsigned NumTys = 0) const override;
-
- Function *getDeclaration(Module *M, unsigned ID,
- ArrayRef<Type *> = None) const;
-
- FunctionType *getType(LLVMContext &Context, unsigned ID,
- ArrayRef<Type*> Tys = None) const;
-};
-
-} // end namespace llvm
-
-#endif
diff --git a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index ef85c1040545..670f6225fbf7 100644
--- a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -1,9 +1,8 @@
//===- AMDGPULegalizerInfo.cpp -----------------------------------*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -15,17 +14,93 @@
#include "AMDGPU.h"
#include "AMDGPULegalizerInfo.h"
#include "AMDGPUTargetMachine.h"
+#include "SIMachineFunctionInfo.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Type.h"
#include "llvm/Support/Debug.h"
+#define DEBUG_TYPE "amdgpu-legalinfo"
+
using namespace llvm;
using namespace LegalizeActions;
+using namespace LegalizeMutations;
+using namespace LegalityPredicates;
+
+
+static LegalityPredicate isMultiple32(unsigned TypeIdx,
+ unsigned MaxSize = 512) {
+ return [=](const LegalityQuery &Query) {
+ const LLT Ty = Query.Types[TypeIdx];
+ const LLT EltTy = Ty.getScalarType();
+ return Ty.getSizeInBits() <= MaxSize && EltTy.getSizeInBits() % 32 == 0;
+ };
+}
+
+static LegalityPredicate isSmallOddVector(unsigned TypeIdx) {
+ return [=](const LegalityQuery &Query) {
+ const LLT Ty = Query.Types[TypeIdx];
+ return Ty.isVector() &&
+ Ty.getNumElements() % 2 != 0 &&
+ Ty.getElementType().getSizeInBits() < 32;
+ };
+}
-AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST,
- const GCNTargetMachine &TM) {
+static LegalizeMutation oneMoreElement(unsigned TypeIdx) {
+ return [=](const LegalityQuery &Query) {
+ const LLT Ty = Query.Types[TypeIdx];
+ const LLT EltTy = Ty.getElementType();
+ return std::make_pair(TypeIdx, LLT::vector(Ty.getNumElements() + 1, EltTy));
+ };
+}
+
+static LegalizeMutation fewerEltsToSize64Vector(unsigned TypeIdx) {
+ return [=](const LegalityQuery &Query) {
+ const LLT Ty = Query.Types[TypeIdx];
+ const LLT EltTy = Ty.getElementType();
+ unsigned Size = Ty.getSizeInBits();
+ unsigned Pieces = (Size + 63) / 64;
+ unsigned NewNumElts = (Ty.getNumElements() + 1) / Pieces;
+ return std::make_pair(TypeIdx, LLT::scalarOrVector(NewNumElts, EltTy));
+ };
+}
+
+static LegalityPredicate vectorWiderThan(unsigned TypeIdx, unsigned Size) {
+ return [=](const LegalityQuery &Query) {
+ const LLT QueryTy = Query.Types[TypeIdx];
+ return QueryTy.isVector() && QueryTy.getSizeInBits() > Size;
+ };
+}
+
+static LegalityPredicate numElementsNotEven(unsigned TypeIdx) {
+ return [=](const LegalityQuery &Query) {
+ const LLT QueryTy = Query.Types[TypeIdx];
+ return QueryTy.isVector() && QueryTy.getNumElements() % 2 != 0;
+ };
+}
+
+// Any combination of 32 or 64-bit elements up to 512 bits, and multiples of
+// v2s16.
+static LegalityPredicate isRegisterType(unsigned TypeIdx) {
+ return [=](const LegalityQuery &Query) {
+ const LLT Ty = Query.Types[TypeIdx];
+ if (Ty.isVector()) {
+ const int EltSize = Ty.getElementType().getSizeInBits();
+ return EltSize == 32 || EltSize == 64 ||
+ (EltSize == 16 && Ty.getNumElements() % 2 == 0) ||
+ EltSize == 128 || EltSize == 256;
+ }
+
+ return Ty.getSizeInBits() % 32 == 0 && Ty.getSizeInBits() <= 512;
+ };
+}
+
+AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
+ const GCNTargetMachine &TM)
+ : ST(ST_) {
using namespace TargetOpcode;
auto GetAddrSpacePtr = [&TM](unsigned AS) {
@@ -33,13 +108,16 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST,
};
const LLT S1 = LLT::scalar(1);
+ const LLT S8 = LLT::scalar(8);
+ const LLT S16 = LLT::scalar(16);
const LLT S32 = LLT::scalar(32);
const LLT S64 = LLT::scalar(64);
+ const LLT S128 = LLT::scalar(128);
+ const LLT S256 = LLT::scalar(256);
const LLT S512 = LLT::scalar(512);
const LLT V2S16 = LLT::vector(2, 16);
const LLT V4S16 = LLT::vector(4, 16);
- const LLT V8S16 = LLT::vector(8, 16);
const LLT V2S32 = LLT::vector(2, 32);
const LLT V3S32 = LLT::vector(3, 32);
@@ -79,156 +157,428 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST,
const LLT CodePtr = FlatPtr;
- const LLT AddrSpaces[] = {
- GlobalPtr,
- ConstantPtr,
- LocalPtr,
- FlatPtr,
- PrivatePtr
+ const std::initializer_list<LLT> AddrSpaces64 = {
+ GlobalPtr, ConstantPtr, FlatPtr
+ };
+
+ const std::initializer_list<LLT> AddrSpaces32 = {
+ LocalPtr, PrivatePtr
+ };
+
+ const std::initializer_list<LLT> FPTypesBase = {
+ S32, S64
+ };
+
+ const std::initializer_list<LLT> FPTypes16 = {
+ S32, S64, S16
+ };
+
+ const std::initializer_list<LLT> FPTypesPK16 = {
+ S32, S64, S16, V2S16
};
setAction({G_BRCOND, S1}, Legal);
- setAction({G_ADD, S32}, Legal);
- setAction({G_ASHR, S32}, Legal);
- setAction({G_SUB, S32}, Legal);
- setAction({G_MUL, S32}, Legal);
+ // TODO: All multiples of 32, vectors of pointers, all v2s16 pairs, more
+ // elements for v3s16
+ getActionDefinitionsBuilder(G_PHI)
+ .legalFor({S32, S64, V2S16, V4S16, S1, S128, S256})
+ .legalFor(AllS32Vectors)
+ .legalFor(AllS64Vectors)
+ .legalFor(AddrSpaces64)
+ .legalFor(AddrSpaces32)
+ .clampScalar(0, S32, S256)
+ .widenScalarToNextPow2(0, 32)
+ .clampMaxNumElements(0, S32, 16)
+ .moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
+ .legalIf(isPointer(0));
- // FIXME: 64-bit ones only legal for scalar
+ if (ST.has16BitInsts()) {
+ getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL})
+ .legalFor({S32, S16})
+ .clampScalar(0, S16, S32)
+ .scalarize(0);
+ } else {
+ getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL})
+ .legalFor({S32})
+ .clampScalar(0, S32, S32)
+ .scalarize(0);
+ }
+
+ getActionDefinitionsBuilder({G_UMULH, G_SMULH})
+ .legalFor({S32})
+ .clampScalar(0, S32, S32)
+ .scalarize(0);
+
+ // Report legal for any types we can handle anywhere. For the cases only legal
+ // on the SALU, RegBankSelect will be able to re-legalize.
getActionDefinitionsBuilder({G_AND, G_OR, G_XOR})
- .legalFor({S32, S1, S64, V2S32});
+ .legalFor({S32, S1, S64, V2S32, S16, V2S16, V4S16})
+ .clampScalar(0, S32, S64)
+ .moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
+ .fewerElementsIf(vectorWiderThan(0, 32), fewerEltsToSize64Vector(0))
+ .widenScalarToNextPow2(0)
+ .scalarize(0);
getActionDefinitionsBuilder({G_UADDO, G_SADDO, G_USUBO, G_SSUBO,
G_UADDE, G_SADDE, G_USUBE, G_SSUBE})
- .legalFor({{S32, S1}});
+ .legalFor({{S32, S1}})
+ .clampScalar(0, S32, S32);
- setAction({G_BITCAST, V2S16}, Legal);
- setAction({G_BITCAST, 1, S32}, Legal);
+ getActionDefinitionsBuilder(G_BITCAST)
+ .legalForCartesianProduct({S32, V2S16})
+ .legalForCartesianProduct({S64, V2S32, V4S16})
+ .legalForCartesianProduct({V2S64, V4S32})
+ // Don't worry about the size constraint.
+ .legalIf(all(isPointer(0), isPointer(1)));
- setAction({G_BITCAST, S32}, Legal);
- setAction({G_BITCAST, 1, V2S16}, Legal);
-
- getActionDefinitionsBuilder(G_FCONSTANT)
- .legalFor({S32, S64});
+ if (ST.has16BitInsts()) {
+ getActionDefinitionsBuilder(G_FCONSTANT)
+ .legalFor({S32, S64, S16})
+ .clampScalar(0, S16, S64);
+ } else {
+ getActionDefinitionsBuilder(G_FCONSTANT)
+ .legalFor({S32, S64})
+ .clampScalar(0, S32, S64);
+ }
- // G_IMPLICIT_DEF is a no-op so we can make it legal for any value type that
- // can fit in a register.
- // FIXME: We need to legalize several more operations before we can add
- // a test case for size > 512.
getActionDefinitionsBuilder(G_IMPLICIT_DEF)
- .legalIf([=](const LegalityQuery &Query) {
- return Query.Types[0].getSizeInBits() <= 512;
- })
- .clampScalar(0, S1, S512);
+ .legalFor({S1, S32, S64, V2S32, V4S32, V2S16, V4S16, GlobalPtr,
+ ConstantPtr, LocalPtr, FlatPtr, PrivatePtr})
+ .moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
+ .clampScalarOrElt(0, S32, S512)
+ .legalIf(isMultiple32(0))
+ .widenScalarToNextPow2(0, 32)
+ .clampMaxNumElements(0, S32, 16);
- getActionDefinitionsBuilder(G_CONSTANT)
- .legalFor({S1, S32, S64});
// FIXME: i1 operands to intrinsics should always be legal, but other i1
// values may not be legal. We need to figure out how to distinguish
// between these two scenarios.
- setAction({G_CONSTANT, S1}, Legal);
+ getActionDefinitionsBuilder(G_CONSTANT)
+ .legalFor({S1, S32, S64, GlobalPtr,
+ LocalPtr, ConstantPtr, PrivatePtr, FlatPtr })
+ .clampScalar(0, S32, S64)
+ .widenScalarToNextPow2(0)
+ .legalIf(isPointer(0));
setAction({G_FRAME_INDEX, PrivatePtr}, Legal);
- getActionDefinitionsBuilder(
- { G_FADD, G_FMUL, G_FNEG, G_FABS, G_FMA})
+ auto &FPOpActions = getActionDefinitionsBuilder(
+ { G_FADD, G_FMUL, G_FNEG, G_FABS, G_FMA, G_FCANONICALIZE})
.legalFor({S32, S64});
- getActionDefinitionsBuilder(G_FPTRUNC)
- .legalFor({{S32, S64}});
+ if (ST.has16BitInsts()) {
+ if (ST.hasVOP3PInsts())
+ FPOpActions.legalFor({S16, V2S16});
+ else
+ FPOpActions.legalFor({S16});
+ }
- // Use actual fsub instruction
- setAction({G_FSUB, S32}, Legal);
+ auto &MinNumMaxNum = getActionDefinitionsBuilder({
+ G_FMINNUM, G_FMAXNUM, G_FMINNUM_IEEE, G_FMAXNUM_IEEE});
+
+ if (ST.hasVOP3PInsts()) {
+ MinNumMaxNum.customFor(FPTypesPK16)
+ .clampMaxNumElements(0, S16, 2)
+ .clampScalar(0, S16, S64)
+ .scalarize(0);
+ } else if (ST.has16BitInsts()) {
+ MinNumMaxNum.customFor(FPTypes16)
+ .clampScalar(0, S16, S64)
+ .scalarize(0);
+ } else {
+ MinNumMaxNum.customFor(FPTypesBase)
+ .clampScalar(0, S32, S64)
+ .scalarize(0);
+ }
- // Must use fadd + fneg
- setAction({G_FSUB, S64}, Lower);
+ // TODO: Implement
+ getActionDefinitionsBuilder({G_FMINIMUM, G_FMAXIMUM}).lower();
- setAction({G_FCMP, S1}, Legal);
- setAction({G_FCMP, 1, S32}, Legal);
- setAction({G_FCMP, 1, S64}, Legal);
+ if (ST.hasVOP3PInsts())
+ FPOpActions.clampMaxNumElements(0, S16, 2);
+ FPOpActions
+ .scalarize(0)
+ .clampScalar(0, ST.has16BitInsts() ? S16 : S32, S64);
- setAction({G_ZEXT, S64}, Legal);
- setAction({G_ZEXT, 1, S32}, Legal);
+ if (ST.has16BitInsts()) {
+ getActionDefinitionsBuilder(G_FSQRT)
+ .legalFor({S32, S64, S16})
+ .scalarize(0)
+ .clampScalar(0, S16, S64);
+ } else {
+ getActionDefinitionsBuilder(G_FSQRT)
+ .legalFor({S32, S64})
+ .scalarize(0)
+ .clampScalar(0, S32, S64);
+ }
- setAction({G_SEXT, S64}, Legal);
- setAction({G_SEXT, 1, S32}, Legal);
+ getActionDefinitionsBuilder(G_FPTRUNC)
+ .legalFor({{S32, S64}, {S16, S32}})
+ .scalarize(0);
- setAction({G_ANYEXT, S64}, Legal);
- setAction({G_ANYEXT, 1, S32}, Legal);
+ getActionDefinitionsBuilder(G_FPEXT)
+ .legalFor({{S64, S32}, {S32, S16}})
+ .lowerFor({{S64, S16}}) // FIXME: Implement
+ .scalarize(0);
- setAction({G_FPTOSI, S32}, Legal);
- setAction({G_FPTOSI, 1, S32}, Legal);
+ // TODO: Verify V_BFI_B32 is generated from expanded bit ops.
+ getActionDefinitionsBuilder(G_FCOPYSIGN).lower();
- setAction({G_SITOFP, S32}, Legal);
- setAction({G_SITOFP, 1, S32}, Legal);
+ getActionDefinitionsBuilder(G_FSUB)
+ // Use actual fsub instruction
+ .legalFor({S32})
+ // Must use fadd + fneg
+ .lowerFor({S64, S16, V2S16})
+ .scalarize(0)
+ .clampScalar(0, S32, S64);
- setAction({G_UITOFP, S32}, Legal);
- setAction({G_UITOFP, 1, S32}, Legal);
+ getActionDefinitionsBuilder({G_SEXT, G_ZEXT, G_ANYEXT})
+ .legalFor({{S64, S32}, {S32, S16}, {S64, S16},
+ {S32, S1}, {S64, S1}, {S16, S1},
+ // FIXME: Hack
+ {S64, LLT::scalar(33)},
+ {S32, S8}, {S128, S32}, {S128, S64}, {S32, LLT::scalar(24)}})
+ .scalarize(0);
- setAction({G_FPTOUI, S32}, Legal);
- setAction({G_FPTOUI, 1, S32}, Legal);
+ getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
+ .legalFor({{S32, S32}, {S64, S32}})
+ .lowerFor({{S32, S64}})
+ .customFor({{S64, S64}})
+ .scalarize(0);
- setAction({G_FPOW, S32}, Legal);
- setAction({G_FEXP2, S32}, Legal);
- setAction({G_FLOG2, S32}, Legal);
+ getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
+ .legalFor({{S32, S32}, {S32, S64}})
+ .scalarize(0);
- getActionDefinitionsBuilder({G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND})
- .legalFor({S32, S64});
+ getActionDefinitionsBuilder(G_INTRINSIC_ROUND)
+ .legalFor({S32, S64})
+ .scalarize(0);
- for (LLT PtrTy : AddrSpaces) {
- LLT IdxTy = LLT::scalar(PtrTy.getSizeInBits());
- setAction({G_GEP, PtrTy}, Legal);
- setAction({G_GEP, 1, IdxTy}, Legal);
+ if (ST.getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS) {
+ getActionDefinitionsBuilder({G_INTRINSIC_TRUNC, G_FCEIL, G_FRINT})
+ .legalFor({S32, S64})
+ .clampScalar(0, S32, S64)
+ .scalarize(0);
+ } else {
+ getActionDefinitionsBuilder({G_INTRINSIC_TRUNC, G_FCEIL, G_FRINT})
+ .legalFor({S32})
+ .customFor({S64})
+ .clampScalar(0, S32, S64)
+ .scalarize(0);
}
+ getActionDefinitionsBuilder(G_GEP)
+ .legalForCartesianProduct(AddrSpaces64, {S64})
+ .legalForCartesianProduct(AddrSpaces32, {S32})
+ .scalarize(0);
+
setAction({G_BLOCK_ADDR, CodePtr}, Legal);
- setAction({G_ICMP, S1}, Legal);
- setAction({G_ICMP, 1, S32}, Legal);
+ auto &CmpBuilder =
+ getActionDefinitionsBuilder(G_ICMP)
+ .legalForCartesianProduct(
+ {S1}, {S32, S64, GlobalPtr, LocalPtr, ConstantPtr, PrivatePtr, FlatPtr})
+ .legalFor({{S1, S32}, {S1, S64}});
+ if (ST.has16BitInsts()) {
+ CmpBuilder.legalFor({{S1, S16}});
+ }
+
+ CmpBuilder
+ .widenScalarToNextPow2(1)
+ .clampScalar(1, S32, S64)
+ .scalarize(0)
+ .legalIf(all(typeIs(0, S1), isPointer(1)));
+
+ getActionDefinitionsBuilder(G_FCMP)
+ .legalForCartesianProduct({S1}, ST.has16BitInsts() ? FPTypes16 : FPTypesBase)
+ .widenScalarToNextPow2(1)
+ .clampScalar(1, S32, S64)
+ .scalarize(0);
+
+ // FIXME: fexp, flog2, flog10 needs to be custom lowered.
+ getActionDefinitionsBuilder({G_FPOW, G_FEXP, G_FEXP2,
+ G_FLOG, G_FLOG2, G_FLOG10})
+ .legalFor({S32})
+ .scalarize(0);
+
+ // The 64-bit versions produce 32-bit results, but only on the SALU.
+ getActionDefinitionsBuilder({G_CTLZ, G_CTLZ_ZERO_UNDEF,
+ G_CTTZ, G_CTTZ_ZERO_UNDEF,
+ G_CTPOP})
+ .legalFor({{S32, S32}, {S32, S64}})
+ .clampScalar(0, S32, S32)
+ .clampScalar(1, S32, S64)
+ .scalarize(0)
+ .widenScalarToNextPow2(0, 32)
+ .widenScalarToNextPow2(1, 32);
+
+ // TODO: Expand for > s32
+ getActionDefinitionsBuilder(G_BSWAP)
+ .legalFor({S32})
+ .clampScalar(0, S32, S32)
+ .scalarize(0);
+
+ if (ST.has16BitInsts()) {
+ if (ST.hasVOP3PInsts()) {
+ getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX})
+ .legalFor({S32, S16, V2S16})
+ .moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
+ .clampMaxNumElements(0, S16, 2)
+ .clampScalar(0, S16, S32)
+ .widenScalarToNextPow2(0)
+ .scalarize(0);
+ } else {
+ getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX})
+ .legalFor({S32, S16})
+ .widenScalarToNextPow2(0)
+ .clampScalar(0, S16, S32)
+ .scalarize(0);
+ }
+ } else {
+ getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX})
+ .legalFor({S32})
+ .clampScalar(0, S32, S32)
+ .widenScalarToNextPow2(0)
+ .scalarize(0);
+ }
- setAction({G_CTLZ, S32}, Legal);
- setAction({G_CTLZ_ZERO_UNDEF, S32}, Legal);
- setAction({G_CTTZ, S32}, Legal);
- setAction({G_CTTZ_ZERO_UNDEF, S32}, Legal);
- setAction({G_BSWAP, S32}, Legal);
- setAction({G_CTPOP, S32}, Legal);
+ auto smallerThan = [](unsigned TypeIdx0, unsigned TypeIdx1) {
+ return [=](const LegalityQuery &Query) {
+ return Query.Types[TypeIdx0].getSizeInBits() <
+ Query.Types[TypeIdx1].getSizeInBits();
+ };
+ };
+
+ auto greaterThan = [](unsigned TypeIdx0, unsigned TypeIdx1) {
+ return [=](const LegalityQuery &Query) {
+ return Query.Types[TypeIdx0].getSizeInBits() >
+ Query.Types[TypeIdx1].getSizeInBits();
+ };
+ };
getActionDefinitionsBuilder(G_INTTOPTR)
- .legalIf([](const LegalityQuery &Query) {
- return true;
- });
+ // List the common cases
+ .legalForCartesianProduct(AddrSpaces64, {S64})
+ .legalForCartesianProduct(AddrSpaces32, {S32})
+ .scalarize(0)
+ // Accept any address space as long as the size matches
+ .legalIf(sameSize(0, 1))
+ .widenScalarIf(smallerThan(1, 0),
+ [](const LegalityQuery &Query) {
+ return std::make_pair(1, LLT::scalar(Query.Types[0].getSizeInBits()));
+ })
+ .narrowScalarIf(greaterThan(1, 0),
+ [](const LegalityQuery &Query) {
+ return std::make_pair(1, LLT::scalar(Query.Types[0].getSizeInBits()));
+ });
getActionDefinitionsBuilder(G_PTRTOINT)
- .legalIf([](const LegalityQuery &Query) {
- return true;
- });
+ // List the common cases
+ .legalForCartesianProduct(AddrSpaces64, {S64})
+ .legalForCartesianProduct(AddrSpaces32, {S32})
+ .scalarize(0)
+ // Accept any address space as long as the size matches
+ .legalIf(sameSize(0, 1))
+ .widenScalarIf(smallerThan(0, 1),
+ [](const LegalityQuery &Query) {
+ return std::make_pair(0, LLT::scalar(Query.Types[1].getSizeInBits()));
+ })
+ .narrowScalarIf(
+ greaterThan(0, 1),
+ [](const LegalityQuery &Query) {
+ return std::make_pair(0, LLT::scalar(Query.Types[1].getSizeInBits()));
+ });
+
+ if (ST.hasFlatAddressSpace()) {
+ getActionDefinitionsBuilder(G_ADDRSPACE_CAST)
+ .scalarize(0)
+ .custom();
+ }
+ // TODO: Should load to s16 be legal? Most loads extend to 32-bits, but we
+ // handle some operations by just promoting the register during
+ // selection. There are also d16 loads on GFX9+ which preserve the high bits.
getActionDefinitionsBuilder({G_LOAD, G_STORE})
- .legalIf([=, &ST](const LegalityQuery &Query) {
+ .narrowScalarIf([](const LegalityQuery &Query) {
+ unsigned Size = Query.Types[0].getSizeInBits();
+ unsigned MemSize = Query.MMODescrs[0].SizeInBits;
+ return (Size > 32 && MemSize < Size);
+ },
+ [](const LegalityQuery &Query) {
+ return std::make_pair(0, LLT::scalar(32));
+ })
+ .fewerElementsIf([=](const LegalityQuery &Query) {
+ unsigned MemSize = Query.MMODescrs[0].SizeInBits;
+ return (MemSize == 96) &&
+ Query.Types[0].isVector() &&
+ !ST.hasDwordx3LoadStores();
+ },
+ [=](const LegalityQuery &Query) {
+ return std::make_pair(0, V2S32);
+ })
+ .legalIf([=](const LegalityQuery &Query) {
const LLT &Ty0 = Query.Types[0];
+ unsigned Size = Ty0.getSizeInBits();
+ unsigned MemSize = Query.MMODescrs[0].SizeInBits;
+ if (Size < 32 || (Size > 32 && MemSize < Size))
+ return false;
+
+ if (Ty0.isVector() && Size != MemSize)
+ return false;
+
// TODO: Decompose private loads into 4-byte components.
// TODO: Illegal flat loads on SI
- switch (Ty0.getSizeInBits()) {
+ switch (MemSize) {
+ case 8:
+ case 16:
+ return Size == 32;
case 32:
case 64:
case 128:
return true;
case 96:
- // XXX hasLoadX3
- return (ST.getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS);
+ return ST.hasDwordx3LoadStores();
case 256:
case 512:
- // TODO: constant loads
+ // TODO: Possibly support loads of i256 and i512 . This will require
+ // adding i256 and i512 types to MVT in order for to be able to use
+ // TableGen.
+ // TODO: Add support for other vector types, this will require
+ // defining more value mappings for the new types.
+ return Ty0.isVector() && (Ty0.getScalarType().getSizeInBits() == 32 ||
+ Ty0.getScalarType().getSizeInBits() == 64);
+
default:
return false;
}
- });
+ })
+ .clampScalar(0, S32, S64);
+ // FIXME: Handle alignment requirements.
+ auto &ExtLoads = getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD})
+ .legalForTypesWithMemDesc({
+ {S32, GlobalPtr, 8, 8},
+ {S32, GlobalPtr, 16, 8},
+ {S32, LocalPtr, 8, 8},
+ {S32, LocalPtr, 16, 8},
+ {S32, PrivatePtr, 8, 8},
+ {S32, PrivatePtr, 16, 8}});
+ if (ST.hasFlatAddressSpace()) {
+ ExtLoads.legalForTypesWithMemDesc({{S32, FlatPtr, 8, 8},
+ {S32, FlatPtr, 16, 8}});
+ }
+
+ ExtLoads.clampScalar(0, S32, S32)
+ .widenScalarToNextPow2(0)
+ .unsupportedIfMemSizeNotPow2()
+ .lower();
+
auto &Atomics = getActionDefinitionsBuilder(
{G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, G_ATOMICRMW_SUB,
G_ATOMICRMW_AND, G_ATOMICRMW_OR, G_ATOMICRMW_XOR,
@@ -240,84 +590,805 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST,
Atomics.legalFor({{S32, FlatPtr}, {S64, FlatPtr}});
}
- setAction({G_SELECT, S32}, Legal);
- setAction({G_SELECT, 1, S1}, Legal);
+ // TODO: Pointer types, any 32-bit or 64-bit vector
+ getActionDefinitionsBuilder(G_SELECT)
+ .legalForCartesianProduct({S32, S64, S16, V2S32, V2S16, V4S16,
+ GlobalPtr, LocalPtr, FlatPtr, PrivatePtr,
+ LLT::vector(2, LocalPtr), LLT::vector(2, PrivatePtr)}, {S1})
+ .clampScalar(0, S16, S64)
+ .moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
+ .fewerElementsIf(numElementsNotEven(0), scalarize(0))
+ .scalarize(1)
+ .clampMaxNumElements(0, S32, 2)
+ .clampMaxNumElements(0, LocalPtr, 2)
+ .clampMaxNumElements(0, PrivatePtr, 2)
+ .scalarize(0)
+ .widenScalarToNextPow2(0)
+ .legalIf(all(isPointer(0), typeIs(1, S1)));
- setAction({G_SHL, S32}, Legal);
+ // TODO: Only the low 4/5/6 bits of the shift amount are observed, so we can
+ // be more flexible with the shift amount type.
+ auto &Shifts = getActionDefinitionsBuilder({G_SHL, G_LSHR, G_ASHR})
+ .legalFor({{S32, S32}, {S64, S32}});
+ if (ST.has16BitInsts()) {
+ if (ST.hasVOP3PInsts()) {
+ Shifts.legalFor({{S16, S32}, {S16, S16}, {V2S16, V2S16}})
+ .clampMaxNumElements(0, S16, 2);
+ } else
+ Shifts.legalFor({{S16, S32}, {S16, S16}});
-
- // FIXME: When RegBankSelect inserts copies, it will only create new
- // registers with scalar types. This means we can end up with
- // G_LOAD/G_STORE/G_GEP instruction with scalar types for their pointer
- // operands. In assert builds, the instruction selector will assert
- // if it sees a generic instruction which isn't legal, so we need to
- // tell it that scalar types are legal for pointer operands
- setAction({G_GEP, S64}, Legal);
+ Shifts.clampScalar(1, S16, S32);
+ Shifts.clampScalar(0, S16, S64);
+ Shifts.widenScalarToNextPow2(0, 16);
+ } else {
+ // Make sure we legalize the shift amount type first, as the general
+ // expansion for the shifted type will produce much worse code if it hasn't
+ // been truncated already.
+ Shifts.clampScalar(1, S32, S32);
+ Shifts.clampScalar(0, S32, S64);
+ Shifts.widenScalarToNextPow2(0, 32);
+ }
+ Shifts.scalarize(0);
for (unsigned Op : {G_EXTRACT_VECTOR_ELT, G_INSERT_VECTOR_ELT}) {
+ unsigned VecTypeIdx = Op == G_EXTRACT_VECTOR_ELT ? 1 : 0;
+ unsigned EltTypeIdx = Op == G_EXTRACT_VECTOR_ELT ? 0 : 1;
+ unsigned IdxTypeIdx = 2;
+
getActionDefinitionsBuilder(Op)
- .legalIf([=](const LegalityQuery &Query) {
- const LLT &VecTy = Query.Types[1];
- const LLT &IdxTy = Query.Types[2];
- return VecTy.getSizeInBits() % 32 == 0 &&
- VecTy.getSizeInBits() <= 512 &&
- IdxTy.getSizeInBits() == 32;
- });
+ .customIf([=](const LegalityQuery &Query) {
+ const LLT EltTy = Query.Types[EltTypeIdx];
+ const LLT VecTy = Query.Types[VecTypeIdx];
+ const LLT IdxTy = Query.Types[IdxTypeIdx];
+ return (EltTy.getSizeInBits() == 16 ||
+ EltTy.getSizeInBits() % 32 == 0) &&
+ VecTy.getSizeInBits() % 32 == 0 &&
+ VecTy.getSizeInBits() <= 512 &&
+ IdxTy.getSizeInBits() == 32;
+ })
+ .clampScalar(EltTypeIdx, S32, S64)
+ .clampScalar(VecTypeIdx, S32, S64)
+ .clampScalar(IdxTypeIdx, S32, S32);
}
- // FIXME: Doesn't handle extract of illegal sizes.
- getActionDefinitionsBuilder({G_EXTRACT, G_INSERT})
- .legalIf([=](const LegalityQuery &Query) {
- const LLT &Ty0 = Query.Types[0];
- const LLT &Ty1 = Query.Types[1];
- return (Ty0.getSizeInBits() % 32 == 0) &&
- (Ty1.getSizeInBits() % 32 == 0);
+ getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
+ .unsupportedIf([=](const LegalityQuery &Query) {
+ const LLT &EltTy = Query.Types[1].getElementType();
+ return Query.Types[0] != EltTy;
});
+ for (unsigned Op : {G_EXTRACT, G_INSERT}) {
+ unsigned BigTyIdx = Op == G_EXTRACT ? 1 : 0;
+ unsigned LitTyIdx = Op == G_EXTRACT ? 0 : 1;
+
+ // FIXME: Doesn't handle extract of illegal sizes.
+ getActionDefinitionsBuilder(Op)
+ .legalIf([=](const LegalityQuery &Query) {
+ const LLT BigTy = Query.Types[BigTyIdx];
+ const LLT LitTy = Query.Types[LitTyIdx];
+ return (BigTy.getSizeInBits() % 32 == 0) &&
+ (LitTy.getSizeInBits() % 16 == 0);
+ })
+ .widenScalarIf(
+ [=](const LegalityQuery &Query) {
+ const LLT BigTy = Query.Types[BigTyIdx];
+ return (BigTy.getScalarSizeInBits() < 16);
+ },
+ LegalizeMutations::widenScalarOrEltToNextPow2(BigTyIdx, 16))
+ .widenScalarIf(
+ [=](const LegalityQuery &Query) {
+ const LLT LitTy = Query.Types[LitTyIdx];
+ return (LitTy.getScalarSizeInBits() < 16);
+ },
+ LegalizeMutations::widenScalarOrEltToNextPow2(LitTyIdx, 16))
+ .moreElementsIf(isSmallOddVector(BigTyIdx), oneMoreElement(BigTyIdx))
+ .widenScalarToNextPow2(BigTyIdx, 32);
+
+ }
+
getActionDefinitionsBuilder(G_BUILD_VECTOR)
- .legalForCartesianProduct(AllS32Vectors, {S32})
- .legalForCartesianProduct(AllS64Vectors, {S64})
- .clampNumElements(0, V16S32, V16S32)
- .clampNumElements(0, V2S64, V8S64)
- .minScalarSameAs(1, 0);
+ .legalForCartesianProduct(AllS32Vectors, {S32})
+ .legalForCartesianProduct(AllS64Vectors, {S64})
+ .clampNumElements(0, V16S32, V16S32)
+ .clampNumElements(0, V2S64, V8S64)
+ .minScalarSameAs(1, 0)
+ .legalIf(isRegisterType(0))
+ .minScalarOrElt(0, S32);
- // TODO: Support any combination of v2s32
getActionDefinitionsBuilder(G_CONCAT_VECTORS)
- .legalFor({{V4S32, V2S32},
- {V8S32, V2S32},
- {V8S32, V4S32},
- {V4S64, V2S64},
- {V4S16, V2S16},
- {V8S16, V2S16},
- {V8S16, V4S16}});
+ .legalIf(isRegisterType(0));
// Merge/Unmerge
for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
+ auto notValidElt = [=](const LegalityQuery &Query, unsigned TypeIdx) {
+ const LLT &Ty = Query.Types[TypeIdx];
+ if (Ty.isVector()) {
+ const LLT &EltTy = Ty.getElementType();
+ if (EltTy.getSizeInBits() < 8 || EltTy.getSizeInBits() > 64)
+ return true;
+ if (!isPowerOf2_32(EltTy.getSizeInBits()))
+ return true;
+ }
+ return false;
+ };
+
getActionDefinitionsBuilder(Op)
+ .widenScalarToNextPow2(LitTyIdx, /*Min*/ 16)
+ // Clamp the little scalar to s8-s256 and make it a power of 2. It's not
+ // worth considering the multiples of 64 since 2*192 and 2*384 are not
+ // valid.
+ .clampScalar(LitTyIdx, S16, S256)
+ .widenScalarToNextPow2(LitTyIdx, /*Min*/ 32)
+
+ // Break up vectors with weird elements into scalars
+ .fewerElementsIf(
+ [=](const LegalityQuery &Query) { return notValidElt(Query, 0); },
+ scalarize(0))
+ .fewerElementsIf(
+ [=](const LegalityQuery &Query) { return notValidElt(Query, 1); },
+ scalarize(1))
+ .clampScalar(BigTyIdx, S32, S512)
+ .widenScalarIf(
+ [=](const LegalityQuery &Query) {
+ const LLT &Ty = Query.Types[BigTyIdx];
+ return !isPowerOf2_32(Ty.getSizeInBits()) &&
+ Ty.getSizeInBits() % 16 != 0;
+ },
+ [=](const LegalityQuery &Query) {
+ // Pick the next power of 2, or a multiple of 64 over 128.
+ // Whichever is smaller.
+ const LLT &Ty = Query.Types[BigTyIdx];
+ unsigned NewSizeInBits = 1 << Log2_32_Ceil(Ty.getSizeInBits() + 1);
+ if (NewSizeInBits >= 256) {
+ unsigned RoundedTo = alignTo<64>(Ty.getSizeInBits() + 1);
+ if (RoundedTo < NewSizeInBits)
+ NewSizeInBits = RoundedTo;
+ }
+ return std::make_pair(BigTyIdx, LLT::scalar(NewSizeInBits));
+ })
.legalIf([=](const LegalityQuery &Query) {
const LLT &BigTy = Query.Types[BigTyIdx];
const LLT &LitTy = Query.Types[LitTyIdx];
- return BigTy.getSizeInBits() % 32 == 0 &&
- LitTy.getSizeInBits() % 32 == 0 &&
+
+ if (BigTy.isVector() && BigTy.getSizeInBits() < 32)
+ return false;
+ if (LitTy.isVector() && LitTy.getSizeInBits() < 32)
+ return false;
+
+ return BigTy.getSizeInBits() % 16 == 0 &&
+ LitTy.getSizeInBits() % 16 == 0 &&
BigTy.getSizeInBits() <= 512;
})
// Any vectors left are the wrong size. Scalarize them.
- .fewerElementsIf([](const LegalityQuery &Query) { return true; },
- [](const LegalityQuery &Query) {
- return std::make_pair(
- 0, Query.Types[0].getElementType());
- })
- .fewerElementsIf([](const LegalityQuery &Query) { return true; },
- [](const LegalityQuery &Query) {
- return std::make_pair(
- 1, Query.Types[1].getElementType());
- });
-
+ .scalarize(0)
+ .scalarize(1);
}
computeTables();
verify(*ST.getInstrInfo());
}
+
+bool AMDGPULegalizerInfo::legalizeCustom(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIRBuilder,
+ GISelChangeObserver &Observer) const {
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_ADDRSPACE_CAST:
+ return legalizeAddrSpaceCast(MI, MRI, MIRBuilder);
+ case TargetOpcode::G_FRINT:
+ return legalizeFrint(MI, MRI, MIRBuilder);
+ case TargetOpcode::G_FCEIL:
+ return legalizeFceil(MI, MRI, MIRBuilder);
+ case TargetOpcode::G_INTRINSIC_TRUNC:
+ return legalizeIntrinsicTrunc(MI, MRI, MIRBuilder);
+ case TargetOpcode::G_SITOFP:
+ return legalizeITOFP(MI, MRI, MIRBuilder, true);
+ case TargetOpcode::G_UITOFP:
+ return legalizeITOFP(MI, MRI, MIRBuilder, false);
+ case TargetOpcode::G_FMINNUM:
+ case TargetOpcode::G_FMAXNUM:
+ case TargetOpcode::G_FMINNUM_IEEE:
+ case TargetOpcode::G_FMAXNUM_IEEE:
+ return legalizeMinNumMaxNum(MI, MRI, MIRBuilder);
+ case TargetOpcode::G_EXTRACT_VECTOR_ELT:
+ return legalizeExtractVectorElt(MI, MRI, MIRBuilder);
+ case TargetOpcode::G_INSERT_VECTOR_ELT:
+ return legalizeInsertVectorElt(MI, MRI, MIRBuilder);
+ default:
+ return false;
+ }
+
+ llvm_unreachable("expected switch to return");
+}
+
+Register AMDGPULegalizerInfo::getSegmentAperture(
+ unsigned AS,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIRBuilder) const {
+ MachineFunction &MF = MIRBuilder.getMF();
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ const LLT S32 = LLT::scalar(32);
+
+ if (ST.hasApertureRegs()) {
+ // FIXME: Use inline constants (src_{shared, private}_base) instead of
+ // getreg.
+ unsigned Offset = AS == AMDGPUAS::LOCAL_ADDRESS ?
+ AMDGPU::Hwreg::OFFSET_SRC_SHARED_BASE :
+ AMDGPU::Hwreg::OFFSET_SRC_PRIVATE_BASE;
+ unsigned WidthM1 = AS == AMDGPUAS::LOCAL_ADDRESS ?
+ AMDGPU::Hwreg::WIDTH_M1_SRC_SHARED_BASE :
+ AMDGPU::Hwreg::WIDTH_M1_SRC_PRIVATE_BASE;
+ unsigned Encoding =
+ AMDGPU::Hwreg::ID_MEM_BASES << AMDGPU::Hwreg::ID_SHIFT_ |
+ Offset << AMDGPU::Hwreg::OFFSET_SHIFT_ |
+ WidthM1 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_;
+
+ Register ApertureReg = MRI.createGenericVirtualRegister(S32);
+ Register GetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+
+ MIRBuilder.buildInstr(AMDGPU::S_GETREG_B32)
+ .addDef(GetReg)
+ .addImm(Encoding);
+ MRI.setType(GetReg, S32);
+
+ auto ShiftAmt = MIRBuilder.buildConstant(S32, WidthM1 + 1);
+ MIRBuilder.buildInstr(TargetOpcode::G_SHL)
+ .addDef(ApertureReg)
+ .addUse(GetReg)
+ .addUse(ShiftAmt.getReg(0));
+
+ return ApertureReg;
+ }
+
+ Register QueuePtr = MRI.createGenericVirtualRegister(
+ LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
+
+ // FIXME: Placeholder until we can track the input registers.
+ MIRBuilder.buildConstant(QueuePtr, 0xdeadbeef);
+
+ // Offset into amd_queue_t for group_segment_aperture_base_hi /
+ // private_segment_aperture_base_hi.
+ uint32_t StructOffset = (AS == AMDGPUAS::LOCAL_ADDRESS) ? 0x40 : 0x44;
+
+ // FIXME: Don't use undef
+ Value *V = UndefValue::get(PointerType::get(
+ Type::getInt8Ty(MF.getFunction().getContext()),
+ AMDGPUAS::CONSTANT_ADDRESS));
+
+ MachinePointerInfo PtrInfo(V, StructOffset);
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ PtrInfo,
+ MachineMemOperand::MOLoad |
+ MachineMemOperand::MODereferenceable |
+ MachineMemOperand::MOInvariant,
+ 4,
+ MinAlign(64, StructOffset));
+
+ Register LoadResult = MRI.createGenericVirtualRegister(S32);
+ Register LoadAddr;
+
+ MIRBuilder.materializeGEP(LoadAddr, QueuePtr, LLT::scalar(64), StructOffset);
+ MIRBuilder.buildLoad(LoadResult, LoadAddr, *MMO);
+ return LoadResult;
+}
+
+bool AMDGPULegalizerInfo::legalizeAddrSpaceCast(
+ MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIRBuilder) const {
+ MachineFunction &MF = MIRBuilder.getMF();
+
+ MIRBuilder.setInstr(MI);
+
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src = MI.getOperand(1).getReg();
+
+ LLT DstTy = MRI.getType(Dst);
+ LLT SrcTy = MRI.getType(Src);
+ unsigned DestAS = DstTy.getAddressSpace();
+ unsigned SrcAS = SrcTy.getAddressSpace();
+
+ // TODO: Avoid reloading from the queue ptr for each cast, or at least each
+ // vector element.
+ assert(!DstTy.isVector());
+
+ const AMDGPUTargetMachine &TM
+ = static_cast<const AMDGPUTargetMachine &>(MF.getTarget());
+
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ if (ST.getTargetLowering()->isNoopAddrSpaceCast(SrcAS, DestAS)) {
+ MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BITCAST));
+ return true;
+ }
+
+ if (SrcAS == AMDGPUAS::FLAT_ADDRESS) {
+ assert(DestAS == AMDGPUAS::LOCAL_ADDRESS ||
+ DestAS == AMDGPUAS::PRIVATE_ADDRESS);
+ unsigned NullVal = TM.getNullPointerValue(DestAS);
+
+ auto SegmentNull = MIRBuilder.buildConstant(DstTy, NullVal);
+ auto FlatNull = MIRBuilder.buildConstant(SrcTy, 0);
+
+ Register PtrLo32 = MRI.createGenericVirtualRegister(DstTy);
+
+ // Extract low 32-bits of the pointer.
+ MIRBuilder.buildExtract(PtrLo32, Src, 0);
+
+ Register CmpRes = MRI.createGenericVirtualRegister(LLT::scalar(1));
+ MIRBuilder.buildICmp(CmpInst::ICMP_NE, CmpRes, Src, FlatNull.getReg(0));
+ MIRBuilder.buildSelect(Dst, CmpRes, PtrLo32, SegmentNull.getReg(0));
+
+ MI.eraseFromParent();
+ return true;
+ }
+
+ assert(SrcAS == AMDGPUAS::LOCAL_ADDRESS ||
+ SrcAS == AMDGPUAS::PRIVATE_ADDRESS);
+
+ auto SegmentNull =
+ MIRBuilder.buildConstant(SrcTy, TM.getNullPointerValue(SrcAS));
+ auto FlatNull =
+ MIRBuilder.buildConstant(DstTy, TM.getNullPointerValue(DestAS));
+
+ Register ApertureReg = getSegmentAperture(DestAS, MRI, MIRBuilder);
+
+ Register CmpRes = MRI.createGenericVirtualRegister(LLT::scalar(1));
+ MIRBuilder.buildICmp(CmpInst::ICMP_NE, CmpRes, Src, SegmentNull.getReg(0));
+
+ Register BuildPtr = MRI.createGenericVirtualRegister(DstTy);
+
+ // Coerce the type of the low half of the result so we can use merge_values.
+ Register SrcAsInt = MRI.createGenericVirtualRegister(LLT::scalar(32));
+ MIRBuilder.buildInstr(TargetOpcode::G_PTRTOINT)
+ .addDef(SrcAsInt)
+ .addUse(Src);
+
+ // TODO: Should we allow mismatched types but matching sizes in merges to
+ // avoid the ptrtoint?
+ MIRBuilder.buildMerge(BuildPtr, {SrcAsInt, ApertureReg});
+ MIRBuilder.buildSelect(Dst, CmpRes, BuildPtr, FlatNull.getReg(0));
+
+ MI.eraseFromParent();
+ return true;
+}
+
+bool AMDGPULegalizerInfo::legalizeFrint(
+ MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIRBuilder) const {
+ MIRBuilder.setInstr(MI);
+
+ Register Src = MI.getOperand(1).getReg();
+ LLT Ty = MRI.getType(Src);
+ assert(Ty.isScalar() && Ty.getSizeInBits() == 64);
+
+ APFloat C1Val(APFloat::IEEEdouble(), "0x1.0p+52");
+ APFloat C2Val(APFloat::IEEEdouble(), "0x1.fffffffffffffp+51");
+
+ auto C1 = MIRBuilder.buildFConstant(Ty, C1Val);
+ auto CopySign = MIRBuilder.buildFCopysign(Ty, C1, Src);
+
+ // TODO: Should this propagate fast-math-flags?
+ auto Tmp1 = MIRBuilder.buildFAdd(Ty, Src, CopySign);
+ auto Tmp2 = MIRBuilder.buildFSub(Ty, Tmp1, CopySign);
+
+ auto C2 = MIRBuilder.buildFConstant(Ty, C2Val);
+ auto Fabs = MIRBuilder.buildFAbs(Ty, Src);
+
+ auto Cond = MIRBuilder.buildFCmp(CmpInst::FCMP_OGT, LLT::scalar(1), Fabs, C2);
+ MIRBuilder.buildSelect(MI.getOperand(0).getReg(), Cond, Src, Tmp2);
+ return true;
+}
+
+bool AMDGPULegalizerInfo::legalizeFceil(
+ MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const {
+ B.setInstr(MI);
+
+ const LLT S1 = LLT::scalar(1);
+ const LLT S64 = LLT::scalar(64);
+
+ Register Src = MI.getOperand(1).getReg();
+ assert(MRI.getType(Src) == S64);
+
+ // result = trunc(src)
+ // if (src > 0.0 && src != result)
+ // result += 1.0
+
+ auto Trunc = B.buildInstr(TargetOpcode::G_INTRINSIC_TRUNC, {S64}, {Src});
+
+ const auto Zero = B.buildFConstant(S64, 0.0);
+ const auto One = B.buildFConstant(S64, 1.0);
+ auto Lt0 = B.buildFCmp(CmpInst::FCMP_OGT, S1, Src, Zero);
+ auto NeTrunc = B.buildFCmp(CmpInst::FCMP_ONE, S1, Src, Trunc);
+ auto And = B.buildAnd(S1, Lt0, NeTrunc);
+ auto Add = B.buildSelect(S64, And, One, Zero);
+
+ // TODO: Should this propagate fast-math-flags?
+ B.buildFAdd(MI.getOperand(0).getReg(), Trunc, Add);
+ return true;
+}
+
+static MachineInstrBuilder extractF64Exponent(unsigned Hi,
+ MachineIRBuilder &B) {
+ const unsigned FractBits = 52;
+ const unsigned ExpBits = 11;
+ LLT S32 = LLT::scalar(32);
+
+ auto Const0 = B.buildConstant(S32, FractBits - 32);
+ auto Const1 = B.buildConstant(S32, ExpBits);
+
+ auto ExpPart = B.buildIntrinsic(Intrinsic::amdgcn_ubfe, {S32}, false)
+ .addUse(Const0.getReg(0))
+ .addUse(Const1.getReg(0));
+
+ return B.buildSub(S32, ExpPart, B.buildConstant(S32, 1023));
+}
+
+bool AMDGPULegalizerInfo::legalizeIntrinsicTrunc(
+ MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const {
+ B.setInstr(MI);
+
+ const LLT S1 = LLT::scalar(1);
+ const LLT S32 = LLT::scalar(32);
+ const LLT S64 = LLT::scalar(64);
+
+ Register Src = MI.getOperand(1).getReg();
+ assert(MRI.getType(Src) == S64);
+
+ // TODO: Should this use extract since the low half is unused?
+ auto Unmerge = B.buildUnmerge({S32, S32}, Src);
+ Register Hi = Unmerge.getReg(1);
+
+ // Extract the upper half, since this is where we will find the sign and
+ // exponent.
+ auto Exp = extractF64Exponent(Hi, B);
+
+ const unsigned FractBits = 52;
+
+ // Extract the sign bit.
+ const auto SignBitMask = B.buildConstant(S32, UINT32_C(1) << 31);
+ auto SignBit = B.buildAnd(S32, Hi, SignBitMask);
+
+ const auto FractMask = B.buildConstant(S64, (UINT64_C(1) << FractBits) - 1);
+
+ const auto Zero32 = B.buildConstant(S32, 0);
+
+ // Extend back to 64-bits.
+ auto SignBit64 = B.buildMerge(S64, {Zero32.getReg(0), SignBit.getReg(0)});
+
+ auto Shr = B.buildAShr(S64, FractMask, Exp);
+ auto Not = B.buildNot(S64, Shr);
+ auto Tmp0 = B.buildAnd(S64, Src, Not);
+ auto FiftyOne = B.buildConstant(S32, FractBits - 1);
+
+ auto ExpLt0 = B.buildICmp(CmpInst::ICMP_SLT, S1, Exp, Zero32);
+ auto ExpGt51 = B.buildICmp(CmpInst::ICMP_SGT, S1, Exp, FiftyOne);
+
+ auto Tmp1 = B.buildSelect(S64, ExpLt0, SignBit64, Tmp0);
+ B.buildSelect(MI.getOperand(0).getReg(), ExpGt51, Src, Tmp1);
+ return true;
+}
+
+bool AMDGPULegalizerInfo::legalizeITOFP(
+ MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B, bool Signed) const {
+ B.setInstr(MI);
+
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src = MI.getOperand(1).getReg();
+
+ const LLT S64 = LLT::scalar(64);
+ const LLT S32 = LLT::scalar(32);
+
+ assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S64);
+
+ auto Unmerge = B.buildUnmerge({S32, S32}, Src);
+
+ auto CvtHi = Signed ?
+ B.buildSITOFP(S64, Unmerge.getReg(1)) :
+ B.buildUITOFP(S64, Unmerge.getReg(1));
+
+ auto CvtLo = B.buildUITOFP(S64, Unmerge.getReg(0));
+
+ auto ThirtyTwo = B.buildConstant(S32, 32);
+ auto LdExp = B.buildIntrinsic(Intrinsic::amdgcn_ldexp, {S64}, false)
+ .addUse(CvtHi.getReg(0))
+ .addUse(ThirtyTwo.getReg(0));
+
+ // TODO: Should this propagate fast-math-flags?
+ B.buildFAdd(Dst, LdExp, CvtLo);
+ MI.eraseFromParent();
+ return true;
+}
+
+bool AMDGPULegalizerInfo::legalizeMinNumMaxNum(
+ MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const {
+ MachineFunction &MF = B.getMF();
+ const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+
+ const bool IsIEEEOp = MI.getOpcode() == AMDGPU::G_FMINNUM_IEEE ||
+ MI.getOpcode() == AMDGPU::G_FMAXNUM_IEEE;
+
+ // With ieee_mode disabled, the instructions have the correct behavior
+ // already for G_FMINNUM/G_FMAXNUM
+ if (!MFI->getMode().IEEE)
+ return !IsIEEEOp;
+
+ if (IsIEEEOp)
+ return true;
+
+ MachineIRBuilder HelperBuilder(MI);
+ GISelObserverWrapper DummyObserver;
+ LegalizerHelper Helper(MF, DummyObserver, HelperBuilder);
+ HelperBuilder.setMBB(*MI.getParent());
+ return Helper.lowerFMinNumMaxNum(MI) == LegalizerHelper::Legalized;
+}
+
+bool AMDGPULegalizerInfo::legalizeExtractVectorElt(
+ MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const {
+ // TODO: Should move some of this into LegalizerHelper.
+
+ // TODO: Promote dynamic indexing of s16 to s32
+ // TODO: Dynamic s64 indexing is only legal for SGPR.
+ Optional<int64_t> IdxVal = getConstantVRegVal(MI.getOperand(2).getReg(), MRI);
+ if (!IdxVal) // Dynamic case will be selected to register indexing.
+ return true;
+
+ Register Dst = MI.getOperand(0).getReg();
+ Register Vec = MI.getOperand(1).getReg();
+
+ LLT VecTy = MRI.getType(Vec);
+ LLT EltTy = VecTy.getElementType();
+ assert(EltTy == MRI.getType(Dst));
+
+ B.setInstr(MI);
+
+ if (IdxVal.getValue() < VecTy.getNumElements())
+ B.buildExtract(Dst, Vec, IdxVal.getValue() * EltTy.getSizeInBits());
+ else
+ B.buildUndef(Dst);
+
+ MI.eraseFromParent();
+ return true;
+}
+
+bool AMDGPULegalizerInfo::legalizeInsertVectorElt(
+ MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const {
+ // TODO: Should move some of this into LegalizerHelper.
+
+ // TODO: Promote dynamic indexing of s16 to s32
+ // TODO: Dynamic s64 indexing is only legal for SGPR.
+ Optional<int64_t> IdxVal = getConstantVRegVal(MI.getOperand(3).getReg(), MRI);
+ if (!IdxVal) // Dynamic case will be selected to register indexing.
+ return true;
+
+ Register Dst = MI.getOperand(0).getReg();
+ Register Vec = MI.getOperand(1).getReg();
+ Register Ins = MI.getOperand(2).getReg();
+
+ LLT VecTy = MRI.getType(Vec);
+ LLT EltTy = VecTy.getElementType();
+ assert(EltTy == MRI.getType(Ins));
+
+ B.setInstr(MI);
+
+ if (IdxVal.getValue() < VecTy.getNumElements())
+ B.buildInsert(Dst, Vec, Ins, IdxVal.getValue() * EltTy.getSizeInBits());
+ else
+ B.buildUndef(Dst);
+
+ MI.eraseFromParent();
+ return true;
+}
+
+// Return the use branch instruction, otherwise null if the usage is invalid.
+static MachineInstr *verifyCFIntrinsic(MachineInstr &MI,
+ MachineRegisterInfo &MRI) {
+ Register CondDef = MI.getOperand(0).getReg();
+ if (!MRI.hasOneNonDBGUse(CondDef))
+ return nullptr;
+
+ MachineInstr &UseMI = *MRI.use_instr_nodbg_begin(CondDef);
+ return UseMI.getParent() == MI.getParent() &&
+ UseMI.getOpcode() == AMDGPU::G_BRCOND ? &UseMI : nullptr;
+}
+
+Register AMDGPULegalizerInfo::getLiveInRegister(MachineRegisterInfo &MRI,
+ Register Reg, LLT Ty) const {
+ Register LiveIn = MRI.getLiveInVirtReg(Reg);
+ if (LiveIn)
+ return LiveIn;
+
+ Register NewReg = MRI.createGenericVirtualRegister(Ty);
+ MRI.addLiveIn(Reg, NewReg);
+ return NewReg;
+}
+
+bool AMDGPULegalizerInfo::loadInputValue(Register DstReg, MachineIRBuilder &B,
+ const ArgDescriptor *Arg) const {
+ if (!Arg->isRegister())
+ return false; // TODO: Handle these
+
+ assert(Arg->getRegister() != 0);
+ assert(Arg->getRegister().isPhysical());
+
+ MachineRegisterInfo &MRI = *B.getMRI();
+
+ LLT Ty = MRI.getType(DstReg);
+ Register LiveIn = getLiveInRegister(MRI, Arg->getRegister(), Ty);
+
+ if (Arg->isMasked()) {
+ // TODO: Should we try to emit this once in the entry block?
+ const LLT S32 = LLT::scalar(32);
+ const unsigned Mask = Arg->getMask();
+ const unsigned Shift = countTrailingZeros<unsigned>(Mask);
+
+ auto ShiftAmt = B.buildConstant(S32, Shift);
+ auto LShr = B.buildLShr(S32, LiveIn, ShiftAmt);
+ B.buildAnd(DstReg, LShr, B.buildConstant(S32, Mask >> Shift));
+ } else
+ B.buildCopy(DstReg, LiveIn);
+
+ // Insert the argument copy if it doens't already exist.
+ // FIXME: It seems EmitLiveInCopies isn't called anywhere?
+ if (!MRI.getVRegDef(LiveIn)) {
+ MachineBasicBlock &EntryMBB = B.getMF().front();
+ EntryMBB.addLiveIn(Arg->getRegister());
+ B.setInsertPt(EntryMBB, EntryMBB.begin());
+ B.buildCopy(LiveIn, Arg->getRegister());
+ }
+
+ return true;
+}
+
+bool AMDGPULegalizerInfo::legalizePreloadedArgIntrin(
+ MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &B,
+ AMDGPUFunctionArgInfo::PreloadedValue ArgType) const {
+ B.setInstr(MI);
+
+ const SIMachineFunctionInfo *MFI = B.getMF().getInfo<SIMachineFunctionInfo>();
+
+ const ArgDescriptor *Arg;
+ const TargetRegisterClass *RC;
+ std::tie(Arg, RC) = MFI->getPreloadedValue(ArgType);
+ if (!Arg) {
+ LLVM_DEBUG(dbgs() << "Required arg register missing\n");
+ return false;
+ }
+
+ if (loadInputValue(MI.getOperand(0).getReg(), B, Arg)) {
+ MI.eraseFromParent();
+ return true;
+ }
+
+ return false;
+}
+
+bool AMDGPULegalizerInfo::legalizeImplicitArgPtr(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const {
+ const SIMachineFunctionInfo *MFI = B.getMF().getInfo<SIMachineFunctionInfo>();
+ if (!MFI->isEntryFunction()) {
+ return legalizePreloadedArgIntrin(MI, MRI, B,
+ AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR);
+ }
+
+ B.setInstr(MI);
+
+ uint64_t Offset =
+ ST.getTargetLowering()->getImplicitParameterOffset(
+ B.getMF(), AMDGPUTargetLowering::FIRST_IMPLICIT);
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ LLT IdxTy = LLT::scalar(DstTy.getSizeInBits());
+
+ const ArgDescriptor *Arg;
+ const TargetRegisterClass *RC;
+ std::tie(Arg, RC)
+ = MFI->getPreloadedValue(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
+ if (!Arg)
+ return false;
+
+ Register KernargPtrReg = MRI.createGenericVirtualRegister(DstTy);
+ if (!loadInputValue(KernargPtrReg, B, Arg))
+ return false;
+
+ B.buildGEP(DstReg, KernargPtrReg, B.buildConstant(IdxTy, Offset).getReg(0));
+ MI.eraseFromParent();
+ return true;
+}
+
+bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const {
+ // Replace the use G_BRCOND with the exec manipulate and branch pseudos.
+ switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) {
+ case Intrinsic::amdgcn_if: {
+ if (MachineInstr *BrCond = verifyCFIntrinsic(MI, MRI)) {
+ const SIRegisterInfo *TRI
+ = static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
+
+ B.setInstr(*BrCond);
+ Register Def = MI.getOperand(1).getReg();
+ Register Use = MI.getOperand(3).getReg();
+ B.buildInstr(AMDGPU::SI_IF)
+ .addDef(Def)
+ .addUse(Use)
+ .addMBB(BrCond->getOperand(1).getMBB());
+
+ MRI.setRegClass(Def, TRI->getWaveMaskRegClass());
+ MRI.setRegClass(Use, TRI->getWaveMaskRegClass());
+ MI.eraseFromParent();
+ BrCond->eraseFromParent();
+ return true;
+ }
+
+ return false;
+ }
+ case Intrinsic::amdgcn_loop: {
+ if (MachineInstr *BrCond = verifyCFIntrinsic(MI, MRI)) {
+ const SIRegisterInfo *TRI
+ = static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
+
+ B.setInstr(*BrCond);
+ Register Reg = MI.getOperand(2).getReg();
+ B.buildInstr(AMDGPU::SI_LOOP)
+ .addUse(Reg)
+ .addMBB(BrCond->getOperand(1).getMBB());
+ MI.eraseFromParent();
+ BrCond->eraseFromParent();
+ MRI.setRegClass(Reg, TRI->getWaveMaskRegClass());
+ return true;
+ }
+
+ return false;
+ }
+ case Intrinsic::amdgcn_kernarg_segment_ptr:
+ return legalizePreloadedArgIntrin(
+ MI, MRI, B, AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
+ case Intrinsic::amdgcn_implicitarg_ptr:
+ return legalizeImplicitArgPtr(MI, MRI, B);
+ case Intrinsic::amdgcn_workitem_id_x:
+ return legalizePreloadedArgIntrin(MI, MRI, B,
+ AMDGPUFunctionArgInfo::WORKITEM_ID_X);
+ case Intrinsic::amdgcn_workitem_id_y:
+ return legalizePreloadedArgIntrin(MI, MRI, B,
+ AMDGPUFunctionArgInfo::WORKITEM_ID_Y);
+ case Intrinsic::amdgcn_workitem_id_z:
+ return legalizePreloadedArgIntrin(MI, MRI, B,
+ AMDGPUFunctionArgInfo::WORKITEM_ID_Z);
+ case Intrinsic::amdgcn_workgroup_id_x:
+ return legalizePreloadedArgIntrin(MI, MRI, B,
+ AMDGPUFunctionArgInfo::WORKGROUP_ID_X);
+ case Intrinsic::amdgcn_workgroup_id_y:
+ return legalizePreloadedArgIntrin(MI, MRI, B,
+ AMDGPUFunctionArgInfo::WORKGROUP_ID_Y);
+ case Intrinsic::amdgcn_workgroup_id_z:
+ return legalizePreloadedArgIntrin(MI, MRI, B,
+ AMDGPUFunctionArgInfo::WORKGROUP_ID_Z);
+ case Intrinsic::amdgcn_dispatch_ptr:
+ return legalizePreloadedArgIntrin(MI, MRI, B,
+ AMDGPUFunctionArgInfo::DISPATCH_PTR);
+ case Intrinsic::amdgcn_queue_ptr:
+ return legalizePreloadedArgIntrin(MI, MRI, B,
+ AMDGPUFunctionArgInfo::QUEUE_PTR);
+ case Intrinsic::amdgcn_implicit_buffer_ptr:
+ return legalizePreloadedArgIntrin(
+ MI, MRI, B, AMDGPUFunctionArgInfo::IMPLICIT_BUFFER_PTR);
+ case Intrinsic::amdgcn_dispatch_id:
+ return legalizePreloadedArgIntrin(MI, MRI, B,
+ AMDGPUFunctionArgInfo::DISPATCH_ID);
+ default:
+ return true;
+ }
+
+ return true;
+}
diff --git a/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
index 1cbd37c42c4b..3f1cc1d265dd 100644
--- a/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
+++ b/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
@@ -1,9 +1,8 @@
//===- AMDGPULegalizerInfo ---------------------------------------*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -16,6 +15,7 @@
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINELEGALIZER_H
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "AMDGPUArgumentUsageInfo.h"
namespace llvm {
@@ -25,9 +25,51 @@ class GCNSubtarget;
/// This class provides the information for the target register banks.
class AMDGPULegalizerInfo : public LegalizerInfo {
+ const GCNSubtarget &ST;
+
public:
AMDGPULegalizerInfo(const GCNSubtarget &ST,
const GCNTargetMachine &TM);
+
+ bool legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIRBuilder,
+ GISelChangeObserver &Observer) const override;
+
+ Register getSegmentAperture(unsigned AddrSpace,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIRBuilder) const;
+
+ bool legalizeAddrSpaceCast(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIRBuilder) const;
+ bool legalizeFrint(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIRBuilder) const;
+ bool legalizeFceil(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIRBuilder) const;
+ bool legalizeIntrinsicTrunc(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIRBuilder) const;
+ bool legalizeITOFP(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIRBuilder, bool Signed) const;
+ bool legalizeMinNumMaxNum(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIRBuilder) const;
+ bool legalizeExtractVectorElt(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIRBuilder) const;
+ bool legalizeInsertVectorElt(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIRBuilder) const;
+
+ Register getLiveInRegister(MachineRegisterInfo &MRI,
+ Register Reg, LLT Ty) const;
+
+ bool loadInputValue(Register DstReg, MachineIRBuilder &B,
+ const ArgDescriptor *Arg) const;
+ bool legalizePreloadedArgIntrin(
+ MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B,
+ AMDGPUFunctionArgInfo::PreloadedValue ArgType) const;
+
+ bool legalizeImplicitArgPtr(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const;
+ bool legalizeIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIRBuilder) const override;
+
};
} // End llvm namespace.
#endif
diff --git a/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/lib/Target/AMDGPU/AMDGPULibCalls.cpp
index 14e880042691..ce0a9db7c7f4 100644
--- a/lib/Target/AMDGPU/AMDGPULibCalls.cpp
+++ b/lib/Target/AMDGPU/AMDGPULibCalls.cpp
@@ -1,9 +1,8 @@
//===- AMDGPULibCalls.cpp -------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -16,6 +15,7 @@
#include "AMDGPU.h"
#include "AMDGPULibFunc.h"
+#include "AMDGPUSubtarget.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/ADT/StringSet.h"
@@ -23,6 +23,7 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/LLVMContext.h"
@@ -30,6 +31,7 @@
#include "llvm/IR/ValueSymbolTable.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include <vector>
#include <cmath>
@@ -66,6 +68,8 @@ private:
typedef llvm::AMDGPULibFunc FuncInfo;
+ const TargetMachine *TM;
+
// -fuse-native.
bool AllNative = false;
@@ -73,7 +77,7 @@ private:
// Return a pointer (pointer expr) to the function if function defintion with
// "FuncName" exists. It may create a new function prototype in pre-link mode.
- Constant *getFunction(Module *M, const FuncInfo& fInfo);
+ FunctionCallee getFunction(Module *M, const FuncInfo &fInfo);
// Replace a normal function with its native version.
bool replaceWithNative(CallInst *CI, const FuncInfo &FInfo);
@@ -135,12 +139,15 @@ private:
// __read_pipe/__write_pipe
bool fold_read_write_pipe(CallInst *CI, IRBuilder<> &B, FuncInfo &FInfo);
+ // llvm.amdgcn.wavefrontsize
+ bool fold_wavefrontsize(CallInst *CI, IRBuilder<> &B);
+
// Get insertion point at entry.
BasicBlock::iterator getEntryIns(CallInst * UI);
// Insert an Alloc instruction.
AllocaInst* insertAlloca(CallInst * UI, IRBuilder<> &B, const char *prefix);
// Get a scalar native builtin signle argument FP function
- Constant* getNativeFunction(Module* M, const FuncInfo &FInfo);
+ FunctionCallee getNativeFunction(Module *M, const FuncInfo &FInfo);
protected:
CallInst *CI;
@@ -153,6 +160,8 @@ protected:
}
public:
+ AMDGPULibCalls(const TargetMachine *TM_ = nullptr) : TM(TM_) {}
+
bool fold(CallInst *CI, AliasAnalysis *AA = nullptr);
void initNativeFuncs();
@@ -167,15 +176,16 @@ namespace {
class AMDGPUSimplifyLibCalls : public FunctionPass {
- AMDGPULibCalls Simplifier;
-
const TargetOptions Options;
+ AMDGPULibCalls Simplifier;
+
public:
static char ID; // Pass identification
- AMDGPUSimplifyLibCalls(const TargetOptions &Opt = TargetOptions())
- : FunctionPass(ID), Options(Opt) {
+ AMDGPUSimplifyLibCalls(const TargetOptions &Opt = TargetOptions(),
+ const TargetMachine *TM = nullptr)
+ : FunctionPass(ID), Options(Opt), Simplifier(TM) {
initializeAMDGPUSimplifyLibCallsPass(*PassRegistry::getPassRegistry());
}
@@ -217,19 +227,19 @@ INITIALIZE_PASS(AMDGPUUseNativeCalls, "amdgpu-usenative",
false, false)
template <typename IRB>
-static CallInst *CreateCallEx(IRB &B, Value *Callee, Value *Arg,
+static CallInst *CreateCallEx(IRB &B, FunctionCallee Callee, Value *Arg,
const Twine &Name = "") {
CallInst *R = B.CreateCall(Callee, Arg, Name);
- if (Function* F = dyn_cast<Function>(Callee))
+ if (Function *F = dyn_cast<Function>(Callee.getCallee()))
R->setCallingConv(F->getCallingConv());
return R;
}
template <typename IRB>
-static CallInst *CreateCallEx2(IRB &B, Value *Callee, Value *Arg1, Value *Arg2,
- const Twine &Name = "") {
+static CallInst *CreateCallEx2(IRB &B, FunctionCallee Callee, Value *Arg1,
+ Value *Arg2, const Twine &Name = "") {
CallInst *R = B.CreateCall(Callee, {Arg1, Arg2}, Name);
- if (Function* F = dyn_cast<Function>(Callee))
+ if (Function *F = dyn_cast<Function>(Callee.getCallee()))
R->setCallingConv(F->getCallingConv());
return R;
}
@@ -472,7 +482,7 @@ static inline AMDGPULibFunc::EType getArgType(const AMDGPULibFunc& FInfo) {
return (AMDGPULibFunc::EType)FInfo.getLeads()[0].ArgType;
}
-Constant *AMDGPULibCalls::getFunction(Module *M, const FuncInfo& fInfo) {
+FunctionCallee AMDGPULibCalls::getFunction(Module *M, const FuncInfo &fInfo) {
// If we are doing PreLinkOpt, the function is external. So it is safe to
// use getOrInsertFunction() at this stage.
@@ -519,11 +529,11 @@ bool AMDGPULibCalls::sincosUseNative(CallInst *aCI, const FuncInfo &FInfo) {
nf.setPrefix(AMDGPULibFunc::NATIVE);
nf.setId(AMDGPULibFunc::EI_SIN);
- Constant *sinExpr = getFunction(M, nf);
+ FunctionCallee sinExpr = getFunction(M, nf);
nf.setPrefix(AMDGPULibFunc::NATIVE);
nf.setId(AMDGPULibFunc::EI_COS);
- Constant *cosExpr = getFunction(M, nf);
+ FunctionCallee cosExpr = getFunction(M, nf);
if (sinExpr && cosExpr) {
Value *sinval = CallInst::Create(sinExpr, opr0, "splitsin", aCI);
Value *cosval = CallInst::Create(cosExpr, opr0, "splitcos", aCI);
@@ -555,7 +565,7 @@ bool AMDGPULibCalls::useNative(CallInst *aCI) {
return sincosUseNative(aCI, FInfo);
FInfo.setPrefix(AMDGPULibFunc::NATIVE);
- Constant *F = getFunction(aCI->getModule(), FInfo);
+ FunctionCallee F = getFunction(aCI->getModule(), FInfo);
if (!F)
return false;
@@ -613,7 +623,7 @@ bool AMDGPULibCalls::fold_read_write_pipe(CallInst *CI, IRBuilder<> &B,
auto *FTy = FunctionType::get(Callee->getReturnType(),
ArrayRef<Type *>(ArgTys), false);
AMDGPULibFunc NewLibFunc(Name, FTy);
- auto *F = AMDGPULibFunc::getOrInsertFunction(M, NewLibFunc);
+ FunctionCallee F = AMDGPULibFunc::getOrInsertFunction(M, NewLibFunc);
if (!F)
return false;
@@ -640,14 +650,6 @@ bool AMDGPULibCalls::fold(CallInst *CI, AliasAnalysis *AA) {
// Ignore indirect calls.
if (Callee == 0) return false;
- FuncInfo FInfo;
- if (!parseFunctionName(Callee->getName(), &FInfo))
- return false;
-
- // Further check the number of arguments to see if they match.
- if (CI->getNumArgOperands() != FInfo.getNumArgs())
- return false;
-
BasicBlock *BB = CI->getParent();
LLVMContext &Context = CI->getParent()->getContext();
IRBuilder<> B(Context);
@@ -659,6 +661,21 @@ bool AMDGPULibCalls::fold(CallInst *CI, AliasAnalysis *AA) {
if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(CI))
B.setFastMathFlags(FPOp->getFastMathFlags());
+ switch (Callee->getIntrinsicID()) {
+ default:
+ break;
+ case Intrinsic::amdgcn_wavefrontsize:
+ return !EnablePreLink && fold_wavefrontsize(CI, B);
+ }
+
+ FuncInfo FInfo;
+ if (!parseFunctionName(Callee->getName(), &FInfo))
+ return false;
+
+ // Further check the number of arguments to see if they match.
+ if (CI->getNumArgOperands() != FInfo.getNumArgs())
+ return false;
+
if (TDOFold(CI, FInfo))
return true;
@@ -795,7 +812,7 @@ bool AMDGPULibCalls::replaceWithNative(CallInst *CI, const FuncInfo &FInfo) {
AMDGPULibFunc nf = FInfo;
nf.setPrefix(AMDGPULibFunc::NATIVE);
- if (Constant *FPExpr = getFunction(M, nf)) {
+ if (FunctionCallee FPExpr = getFunction(M, nf)) {
LLVM_DEBUG(dbgs() << "AMDIC: " << *CI << " ---> ");
CI->setCalledFunction(FPExpr);
@@ -848,7 +865,7 @@ bool AMDGPULibCalls::fold_divide(CallInst *CI, IRBuilder<> &B,
namespace llvm {
static double log2(double V) {
-#if _XOPEN_SOURCE >= 600 || _ISOC99_SOURCE || _POSIX_C_SOURCE >= 200112L
+#if _XOPEN_SOURCE >= 600 || defined(_ISOC99_SOURCE) || _POSIX_C_SOURCE >= 200112L
return ::log2(V);
#else
return log(V) / 0.693147180559945309417;
@@ -934,9 +951,10 @@ bool AMDGPULibCalls::fold_pow(CallInst *CI, IRBuilder<> &B,
if (CF && (CF->isExactlyValue(0.5) || CF->isExactlyValue(-0.5))) {
// pow[r](x, [-]0.5) = sqrt(x)
bool issqrt = CF->isExactlyValue(0.5);
- if (Constant *FPExpr = getFunction(M,
- AMDGPULibFunc(issqrt ? AMDGPULibFunc::EI_SQRT
- : AMDGPULibFunc::EI_RSQRT, FInfo))) {
+ if (FunctionCallee FPExpr =
+ getFunction(M, AMDGPULibFunc(issqrt ? AMDGPULibFunc::EI_SQRT
+ : AMDGPULibFunc::EI_RSQRT,
+ FInfo))) {
LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "
<< FInfo.getName().c_str() << "(" << *opr0 << ")\n");
Value *nval = CreateCallEx(B,FPExpr, opr0, issqrt ? "__pow2sqrt"
@@ -1003,8 +1021,8 @@ bool AMDGPULibCalls::fold_pow(CallInst *CI, IRBuilder<> &B,
// powr ---> exp2(y * log2(x))
// pown/pow ---> powr(fabs(x), y) | (x & ((int)y << 31))
- Constant *ExpExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_EXP2,
- FInfo));
+ FunctionCallee ExpExpr =
+ getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_EXP2, FInfo));
if (!ExpExpr)
return false;
@@ -1090,8 +1108,8 @@ bool AMDGPULibCalls::fold_pow(CallInst *CI, IRBuilder<> &B,
Value *nval;
if (needabs) {
- Constant *AbsExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_FABS,
- FInfo));
+ FunctionCallee AbsExpr =
+ getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_FABS, FInfo));
if (!AbsExpr)
return false;
nval = CreateCallEx(B, AbsExpr, opr0, "__fabs");
@@ -1099,8 +1117,8 @@ bool AMDGPULibCalls::fold_pow(CallInst *CI, IRBuilder<> &B,
nval = cnval ? cnval : opr0;
}
if (needlog) {
- Constant *LogExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_LOG2,
- FInfo));
+ FunctionCallee LogExpr =
+ getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_LOG2, FInfo));
if (!LogExpr)
return false;
nval = CreateCallEx(B,LogExpr, nval, "__log2");
@@ -1159,8 +1177,8 @@ bool AMDGPULibCalls::fold_rootn(CallInst *CI, IRBuilder<> &B,
std::vector<const Type*> ParamsTys;
ParamsTys.push_back(opr0->getType());
Module *M = CI->getModule();
- if (Constant *FPExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_SQRT,
- FInfo))) {
+ if (FunctionCallee FPExpr =
+ getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) {
LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> sqrt(" << *opr0 << ")\n");
Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2sqrt");
replaceCall(nval);
@@ -1168,8 +1186,8 @@ bool AMDGPULibCalls::fold_rootn(CallInst *CI, IRBuilder<> &B,
}
} else if (ci_opr1 == 3) { // rootn(x, 3) = cbrt(x)
Module *M = CI->getModule();
- if (Constant *FPExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_CBRT,
- FInfo))) {
+ if (FunctionCallee FPExpr =
+ getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_CBRT, FInfo))) {
LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> cbrt(" << *opr0 << ")\n");
Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2cbrt");
replaceCall(nval);
@@ -1186,8 +1204,8 @@ bool AMDGPULibCalls::fold_rootn(CallInst *CI, IRBuilder<> &B,
std::vector<const Type*> ParamsTys;
ParamsTys.push_back(opr0->getType());
Module *M = CI->getModule();
- if (Constant *FPExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_RSQRT,
- FInfo))) {
+ if (FunctionCallee FPExpr =
+ getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_RSQRT, FInfo))) {
LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> rsqrt(" << *opr0
<< ")\n");
Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2rsqrt");
@@ -1243,7 +1261,8 @@ bool AMDGPULibCalls::fold_fma_mad(CallInst *CI, IRBuilder<> &B,
}
// Get a scalar native builtin signle argument FP function
-Constant* AMDGPULibCalls::getNativeFunction(Module* M, const FuncInfo& FInfo) {
+FunctionCallee AMDGPULibCalls::getNativeFunction(Module *M,
+ const FuncInfo &FInfo) {
if (getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId()))
return nullptr;
FuncInfo nf = FInfo;
@@ -1256,8 +1275,8 @@ bool AMDGPULibCalls::fold_sqrt(CallInst *CI, IRBuilder<> &B,
const FuncInfo &FInfo) {
if (getArgType(FInfo) == AMDGPULibFunc::F32 && (getVecSize(FInfo) == 1) &&
(FInfo.getPrefix() != AMDGPULibFunc::NATIVE)) {
- if (Constant *FPExpr = getNativeFunction(
- CI->getModule(), AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) {
+ if (FunctionCallee FPExpr = getNativeFunction(
+ CI->getModule(), AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) {
Value *opr0 = CI->getArgOperand(0);
LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "
<< "sqrt(" << *opr0 << ")\n");
@@ -1334,7 +1353,7 @@ bool AMDGPULibCalls::fold_sincos(CallInst *CI, IRBuilder<> &B,
// function.
AMDGPULibFunc nf(AMDGPULibFunc::EI_SINCOS, fInfo);
nf.getLeads()[0].PtrKind = AMDGPULibFunc::getEPtrKindFromAddrSpace(AMDGPUAS::FLAT_ADDRESS);
- Function *Fsincos = dyn_cast_or_null<Function>(getFunction(M, nf));
+ FunctionCallee Fsincos = getFunction(M, nf);
if (!Fsincos) return false;
BasicBlock::iterator ItOld = B.GetInsertPoint();
@@ -1342,7 +1361,7 @@ bool AMDGPULibCalls::fold_sincos(CallInst *CI, IRBuilder<> &B,
B.SetInsertPoint(UI);
Value *P = Alloc;
- Type *PTy = Fsincos->getFunctionType()->getParamType(1);
+ Type *PTy = Fsincos.getFunctionType()->getParamType(1);
// The allocaInst allocates the memory in private address space. This need
// to be bitcasted to point to the address space of cos pointer type.
// In OpenCL 2.0 this is generic, while in 1.2 that is private.
@@ -1356,12 +1375,12 @@ bool AMDGPULibCalls::fold_sincos(CallInst *CI, IRBuilder<> &B,
if (!isSin) { // CI->cos, UI->sin
B.SetInsertPoint(&*ItOld);
UI->replaceAllUsesWith(&*Call);
- Instruction *Reload = B.CreateLoad(Alloc);
+ Instruction *Reload = B.CreateLoad(Alloc->getAllocatedType(), Alloc);
CI->replaceAllUsesWith(Reload);
UI->eraseFromParent();
CI->eraseFromParent();
} else { // CI->sin, UI->cos
- Instruction *Reload = B.CreateLoad(Alloc);
+ Instruction *Reload = B.CreateLoad(Alloc->getAllocatedType(), Alloc);
UI->replaceAllUsesWith(Reload);
CI->replaceAllUsesWith(Call);
UI->eraseFromParent();
@@ -1370,6 +1389,29 @@ bool AMDGPULibCalls::fold_sincos(CallInst *CI, IRBuilder<> &B,
return true;
}
+bool AMDGPULibCalls::fold_wavefrontsize(CallInst *CI, IRBuilder<> &B) {
+ if (!TM)
+ return false;
+
+ StringRef CPU = TM->getTargetCPU();
+ StringRef Features = TM->getTargetFeatureString();
+ if ((CPU.empty() || CPU.equals_lower("generic")) &&
+ (Features.empty() ||
+ Features.find_lower("wavefrontsize") == StringRef::npos))
+ return false;
+
+ Function *F = CI->getParent()->getParent();
+ const GCNSubtarget &ST = TM->getSubtarget<GCNSubtarget>(*F);
+ unsigned N = ST.getWavefrontSize();
+
+ LLVM_DEBUG(errs() << "AMDIC: fold_wavefrontsize (" << *CI << ") with "
+ << N << "\n");
+
+ CI->replaceAllUsesWith(ConstantInt::get(B.getInt32Ty(), N));
+ CI->eraseFromParent();
+ return true;
+}
+
// Get insertion point at entry.
BasicBlock::iterator AMDGPULibCalls::getEntryIns(CallInst * UI) {
Function * Func = UI->getParent()->getParent();
@@ -1679,8 +1721,9 @@ bool AMDGPULibCalls::evaluateCall(CallInst *aCI, FuncInfo &FInfo) {
}
// Public interface to the Simplify LibCalls pass.
-FunctionPass *llvm::createAMDGPUSimplifyLibCallsPass(const TargetOptions &Opt) {
- return new AMDGPUSimplifyLibCalls(Opt);
+FunctionPass *llvm::createAMDGPUSimplifyLibCallsPass(const TargetOptions &Opt,
+ const TargetMachine *TM) {
+ return new AMDGPUSimplifyLibCalls(Opt, TM);
}
FunctionPass *llvm::createAMDGPUUseNativeCallsPass() {
diff --git a/lib/Target/AMDGPU/AMDGPULibFunc.cpp b/lib/Target/AMDGPU/AMDGPULibFunc.cpp
index 4fc3fe0f105b..a5bac25701a0 100644
--- a/lib/Target/AMDGPU/AMDGPULibFunc.cpp
+++ b/lib/Target/AMDGPU/AMDGPULibFunc.cpp
@@ -1,9 +1,8 @@
//===-- AMDGPULibFunc.cpp -------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -64,6 +63,8 @@ struct ManglingRule {
int getNumLeads() const { return (Lead[0] ? 1 : 0) + (Lead[1] ? 1 : 0); }
unsigned getNumArgs() const;
+
+ static StringMap<int> buildManglingRulesMap();
};
// Information about library functions with unmangled names.
@@ -77,16 +78,7 @@ class UnmangledFuncInfo {
// Number of entries in Table.
static const unsigned TableSize;
- // Map function name to index.
- class NameMap : public StringMap<unsigned> {
- public:
- NameMap() {
- for (unsigned I = 0; I != TableSize; ++I)
- (*this)[Table[I].Name] = I;
- }
- };
- friend class NameMap;
- static NameMap Map;
+ static StringMap<unsigned> buildNameMap();
public:
using ID = AMDGPULibFunc::EFuncId;
@@ -102,7 +94,8 @@ public:
static_cast<unsigned>(AMDGPULibFunc::EI_LAST_MANGLED);
}
static ID toFuncId(unsigned Index) {
- assert(Index < TableSize && "Invalid unmangled library function");
+ assert(Index < TableSize &&
+ "Invalid unmangled library function");
return static_cast<ID>(
Index + 1 + static_cast<unsigned>(AMDGPULibFunc::EI_LAST_MANGLED));
}
@@ -350,18 +343,7 @@ const UnmangledFuncInfo UnmangledFuncInfo::Table[] = {
};
const unsigned UnmangledFuncInfo::TableSize =
- sizeof(UnmangledFuncInfo::Table) / sizeof(UnmangledFuncInfo::Table[0]);
-
-UnmangledFuncInfo::NameMap UnmangledFuncInfo::Map;
-
-static const struct ManglingRulesMap : public StringMap<int> {
- ManglingRulesMap()
- : StringMap<int>(sizeof(manglingRules)/sizeof(manglingRules[0])) {
- int Id = 0;
- for (auto Rule : manglingRules)
- insert({ Rule.Name, Id++ });
- }
-} manglingRulesMap;
+ array_lengthof(UnmangledFuncInfo::Table);
static AMDGPULibFunc::Param getRetType(AMDGPULibFunc::EFuncId id,
const AMDGPULibFunc::Param (&Leads)[2]) {
@@ -569,7 +551,17 @@ static AMDGPULibFunc::ENamePrefix parseNamePrefix(StringRef& mangledName) {
return Pfx;
}
+StringMap<int> ManglingRule::buildManglingRulesMap() {
+ StringMap<int> Map(array_lengthof(manglingRules));
+ int Id = 0;
+ for (auto Rule : manglingRules)
+ Map.insert({Rule.Name, Id++});
+ return Map;
+}
+
bool AMDGPUMangledLibFunc::parseUnmangledName(StringRef FullName) {
+ static const StringMap<int> manglingRulesMap =
+ ManglingRule::buildManglingRulesMap();
FuncId = static_cast<EFuncId>(manglingRulesMap.lookup(FullName));
return FuncId != EI_NONE;
}
@@ -961,8 +953,8 @@ Function *AMDGPULibFunc::getFunction(Module *M, const AMDGPULibFunc &fInfo) {
return nullptr;
}
-Function *AMDGPULibFunc::getOrInsertFunction(Module *M,
- const AMDGPULibFunc &fInfo) {
+FunctionCallee AMDGPULibFunc::getOrInsertFunction(Module *M,
+ const AMDGPULibFunc &fInfo) {
std::string const FuncName = fInfo.mangle();
Function *F = dyn_cast_or_null<Function>(
M->getValueSymbolTable().lookup(FuncName));
@@ -988,7 +980,7 @@ Function *AMDGPULibFunc::getOrInsertFunction(Module *M,
}
}
- Constant *C = nullptr;
+ FunctionCallee C;
if (hasPtr) {
// Do not set extra attributes for functions with pointer arguments.
C = M->getOrInsertFunction(FuncName, FuncTy);
@@ -1002,10 +994,18 @@ Function *AMDGPULibFunc::getOrInsertFunction(Module *M,
C = M->getOrInsertFunction(FuncName, FuncTy, Attr);
}
- return cast<Function>(C);
+ return C;
+}
+
+StringMap<unsigned> UnmangledFuncInfo::buildNameMap() {
+ StringMap<unsigned> Map;
+ for (unsigned I = 0; I != TableSize; ++I)
+ Map[Table[I].Name] = I;
+ return Map;
}
bool UnmangledFuncInfo::lookup(StringRef Name, ID &Id) {
+ static const StringMap<unsigned> Map = buildNameMap();
auto Loc = Map.find(Name);
if (Loc != Map.end()) {
Id = toFuncId(Loc->second);
diff --git a/lib/Target/AMDGPU/AMDGPULibFunc.h b/lib/Target/AMDGPU/AMDGPULibFunc.h
index fe062384800a..2354ed7df205 100644
--- a/lib/Target/AMDGPU/AMDGPULibFunc.h
+++ b/lib/Target/AMDGPU/AMDGPULibFunc.h
@@ -1,9 +1,8 @@
//===-- AMDGPULibFunc.h ----------------------------------------*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -394,8 +393,8 @@ public:
}
static Function *getFunction(llvm::Module *M, const AMDGPULibFunc &fInfo);
- static Function *getOrInsertFunction(llvm::Module *M,
- const AMDGPULibFunc &fInfo);
+ static FunctionCallee getOrInsertFunction(llvm::Module *M,
+ const AMDGPULibFunc &fInfo);
static bool parse(StringRef MangledName, AMDGPULibFunc &Ptr);
private:
diff --git a/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp b/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp
index 2cec8fe53283..15032969890e 100644
--- a/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp
+++ b/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp
@@ -1,9 +1,8 @@
//===-- AMDGPULowerIntrinsics.cpp -----------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp b/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
index 743dc7a0d00b..5dd5b3691e0a 100644
--- a/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
+++ b/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
@@ -1,9 +1,8 @@
//===-- AMDGPULowerKernelArguments.cpp ------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -110,8 +109,9 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
// modes on SI to know the high bits are 0 so pointer adds don't wrap. We
// can't represent this with range metadata because it's only allowed for
// integer types.
- if (PT->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS &&
- ST.getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS)
+ if ((PT->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
+ PT->getAddressSpace() == AMDGPUAS::REGION_ADDRESS) &&
+ !ST.hasUsableDSOffset())
continue;
// FIXME: We can replace this with equivalent alias.scope/noalias
@@ -132,6 +132,7 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
KernArgBaseAlign);
Value *ArgPtr;
+ Type *AdjustedArgTy;
if (DoShiftOpt) { // FIXME: Handle aggregate types
// Since we don't have sub-dword scalar loads, avoid doing an extload by
// loading earlier than the argument address, and extracting the relevant
@@ -139,30 +140,27 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
//
// Additionally widen any sub-dword load to i32 even if suitably aligned,
// so that CSE between different argument loads works easily.
-
ArgPtr = Builder.CreateConstInBoundsGEP1_64(
- KernArgSegment,
- AlignDownOffset,
- Arg.getName() + ".kernarg.offset.align.down");
- ArgPtr = Builder.CreateBitCast(ArgPtr,
- Builder.getInt32Ty()->getPointerTo(AS),
- ArgPtr->getName() + ".cast");
+ Builder.getInt8Ty(), KernArgSegment, AlignDownOffset,
+ Arg.getName() + ".kernarg.offset.align.down");
+ AdjustedArgTy = Builder.getInt32Ty();
} else {
ArgPtr = Builder.CreateConstInBoundsGEP1_64(
- KernArgSegment,
- EltOffset,
- Arg.getName() + ".kernarg.offset");
- ArgPtr = Builder.CreateBitCast(ArgPtr, ArgTy->getPointerTo(AS),
- ArgPtr->getName() + ".cast");
+ Builder.getInt8Ty(), KernArgSegment, EltOffset,
+ Arg.getName() + ".kernarg.offset");
+ AdjustedArgTy = ArgTy;
}
if (IsV3 && Size >= 32) {
V4Ty = VectorType::get(VT->getVectorElementType(), 4);
// Use the hack that clang uses to avoid SelectionDAG ruining v3 loads
- ArgPtr = Builder.CreateBitCast(ArgPtr, V4Ty->getPointerTo(AS));
+ AdjustedArgTy = V4Ty;
}
- LoadInst *Load = Builder.CreateAlignedLoad(ArgPtr, AdjustedAlign);
+ ArgPtr = Builder.CreateBitCast(ArgPtr, AdjustedArgTy->getPointerTo(AS),
+ ArgPtr->getName() + ".cast");
+ LoadInst *Load =
+ Builder.CreateAlignedLoad(AdjustedArgTy, ArgPtr, AdjustedAlign);
Load->setMetadata(LLVMContext::MD_invariant_load, MDNode::get(Ctx, {}));
MDBuilder MDB(Ctx);
diff --git a/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp b/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp
index a43dcef4cf0b..00e12f808783 100644
--- a/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp
+++ b/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp
@@ -1,9 +1,8 @@
//===-- AMDGPULowerKernelAttributes.cpp ------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
index f6bdbf5e9be2..ae4c32c258a7 100644
--- a/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
+++ b/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
@@ -1,9 +1,8 @@
//===- AMDGPUMCInstLower.cpp - Lower AMDGPU MachineInstr to an MCInst -----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -16,7 +15,7 @@
#include "AMDGPUAsmPrinter.h"
#include "AMDGPUSubtarget.h"
#include "AMDGPUTargetMachine.h"
-#include "InstPrinter/AMDGPUInstPrinter.h"
+#include "MCTargetDesc/AMDGPUInstPrinter.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "R600AsmPrinter.h"
#include "SIInstrInfo.h"
@@ -91,6 +90,10 @@ static MCSymbolRefExpr::VariantKind getVariantKind(unsigned MOFlags) {
return MCSymbolRefExpr::VK_AMDGPU_REL32_LO;
case SIInstrInfo::MO_REL32_HI:
return MCSymbolRefExpr::VK_AMDGPU_REL32_HI;
+ case SIInstrInfo::MO_ABS32_LO:
+ return MCSymbolRefExpr::VK_AMDGPU_ABS32_LO;
+ case SIInstrInfo::MO_ABS32_HI:
+ return MCSymbolRefExpr::VK_AMDGPU_ABS32_HI;
}
}
@@ -101,17 +104,22 @@ const MCExpr *AMDGPUMCInstLower::getLongBranchBlockExpr(
= MCSymbolRefExpr::create(MO.getMBB()->getSymbol(), Ctx);
const MCExpr *SrcBBSym = MCSymbolRefExpr::create(SrcBB.getSymbol(), Ctx);
- assert(SrcBB.front().getOpcode() == AMDGPU::S_GETPC_B64 &&
- ST.getInstrInfo()->get(AMDGPU::S_GETPC_B64).Size == 4);
+ // FIXME: The first half of this assert should be removed. This should
+ // probably be PC relative instead of using the source block symbol, and
+ // therefore the indirect branch expansion should use a bundle.
+ assert(
+ skipDebugInstructionsForward(SrcBB.begin(), SrcBB.end())->getOpcode() ==
+ AMDGPU::S_GETPC_B64 &&
+ ST.getInstrInfo()->get(AMDGPU::S_GETPC_B64).Size == 4);
// s_getpc_b64 returns the address of next instruction.
const MCConstantExpr *One = MCConstantExpr::create(4, Ctx);
SrcBBSym = MCBinaryExpr::createAdd(SrcBBSym, One, Ctx);
- if (MO.getTargetFlags() == AMDGPU::TF_LONG_BRANCH_FORWARD)
+ if (MO.getTargetFlags() == SIInstrInfo::MO_LONG_BRANCH_FORWARD)
return MCBinaryExpr::createSub(DestBBSym, SrcBBSym, Ctx);
- assert(MO.getTargetFlags() == AMDGPU::TF_LONG_BRANCH_BACKWARD);
+ assert(MO.getTargetFlags() == SIInstrInfo::MO_LONG_BRANCH_BACKWARD);
return MCBinaryExpr::createSub(SrcBBSym, DestBBSym, Ctx);
}
@@ -142,10 +150,13 @@ bool AMDGPUMCInstLower::lowerOperand(const MachineOperand &MO,
SmallString<128> SymbolName;
AP.getNameWithPrefix(SymbolName, GV);
MCSymbol *Sym = Ctx.getOrCreateSymbol(SymbolName);
- const MCExpr *SymExpr =
+ const MCExpr *Expr =
MCSymbolRefExpr::create(Sym, getVariantKind(MO.getTargetFlags()),Ctx);
- const MCExpr *Expr = MCBinaryExpr::createAdd(SymExpr,
- MCConstantExpr::create(MO.getOffset(), Ctx), Ctx);
+ int64_t Offset = MO.getOffset();
+ if (Offset != 0) {
+ Expr = MCBinaryExpr::createAdd(Expr,
+ MCConstantExpr::create(Offset, Ctx), Ctx);
+ }
MCOp = MCOperand::createExpr(Expr);
return true;
}
@@ -321,14 +332,13 @@ void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
#endif
- if (STI.dumpCode()) {
- // Disassemble instruction/operands to text.
+ if (DumpCodeInstEmitter) {
+ // Disassemble instruction/operands to text
DisasmLines.resize(DisasmLines.size() + 1);
std::string &DisasmLine = DisasmLines.back();
raw_string_ostream DisasmStream(DisasmLine);
- AMDGPUInstPrinter InstPrinter(*TM.getMCAsmInfo(),
- *STI.getInstrInfo(),
+ AMDGPUInstPrinter InstPrinter(*TM.getMCAsmInfo(), *STI.getInstrInfo(),
*STI.getRegisterInfo());
InstPrinter.printInst(&TmpInst, DisasmStream, StringRef(), STI);
@@ -337,10 +347,8 @@ void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) {
SmallVector<char, 16> CodeBytes;
raw_svector_ostream CodeStream(CodeBytes);
- auto &ObjStreamer = static_cast<MCObjectStreamer&>(*OutStreamer);
- MCCodeEmitter &InstEmitter = ObjStreamer.getAssembler().getEmitter();
- InstEmitter.encodeInstruction(TmpInst, CodeStream, Fixups,
- MF->getSubtarget<MCSubtargetInfo>());
+ DumpCodeInstEmitter->encodeInstruction(
+ TmpInst, CodeStream, Fixups, MF->getSubtarget<MCSubtargetInfo>());
HexLines.resize(HexLines.size() + 1);
std::string &HexLine = HexLines.back();
raw_string_ostream HexStream(HexLine);
diff --git a/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp b/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp
index 6f44e2dbb2d5..237490957058 100644
--- a/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp
+++ b/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp
@@ -1,9 +1,8 @@
//===- AMDGPUMachineCFGStructurizer.cpp - Machine code if conversion pass. ===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp b/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
index 13b4b50149ce..0d3a1f1a769f 100644
--- a/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
+++ b/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
@@ -1,9 +1,8 @@
//===-- AMDGPUMachineFunctionInfo.cpp ---------------------------------------=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -30,13 +29,13 @@ AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) :
// except reserved size is not correctly aligned.
const Function &F = MF.getFunction();
- if (auto *Resolver = MF.getMMI().getResolver()) {
- if (AMDGPUPerfHintAnalysis *PHA = static_cast<AMDGPUPerfHintAnalysis*>(
- Resolver->getAnalysisIfAvailable(&AMDGPUPerfHintAnalysisID, true))) {
- MemoryBound = PHA->isMemoryBound(&F);
- WaveLimiter = PHA->needsWaveLimiter(&F);
- }
- }
+ Attribute MemBoundAttr = F.getFnAttribute("amdgpu-memory-bound");
+ MemoryBound = MemBoundAttr.isStringAttribute() &&
+ MemBoundAttr.getValueAsString() == "true";
+
+ Attribute WaveLimitAttr = F.getFnAttribute("amdgpu-wave-limiter");
+ WaveLimiter = WaveLimitAttr.isStringAttribute() &&
+ WaveLimitAttr.getValueAsString() == "true";
CallingConv::ID CC = F.getCallingConv();
if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL)
diff --git a/lib/Target/AMDGPU/AMDGPUMachineFunction.h b/lib/Target/AMDGPU/AMDGPUMachineFunction.h
index 8d6b871bc03e..52987e2fa411 100644
--- a/lib/Target/AMDGPU/AMDGPUMachineFunction.h
+++ b/lib/Target/AMDGPU/AMDGPUMachineFunction.h
@@ -1,9 +1,8 @@
//===-- AMDGPUMachineFunctionInfo.h -------------------------------*- C++ -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.cpp b/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.cpp
index 7b9f673c418c..4d9f08b3af01 100644
--- a/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.cpp
+++ b/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.cpp
@@ -1,9 +1,8 @@
//===--- AMDGPUMachineModuleInfo.cpp ----------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -24,6 +23,16 @@ AMDGPUMachineModuleInfo::AMDGPUMachineModuleInfo(const MachineModuleInfo &MMI)
AgentSSID = CTX.getOrInsertSyncScopeID("agent");
WorkgroupSSID = CTX.getOrInsertSyncScopeID("workgroup");
WavefrontSSID = CTX.getOrInsertSyncScopeID("wavefront");
+ SystemOneAddressSpaceSSID =
+ CTX.getOrInsertSyncScopeID("one-as");
+ AgentOneAddressSpaceSSID =
+ CTX.getOrInsertSyncScopeID("agent-one-as");
+ WorkgroupOneAddressSpaceSSID =
+ CTX.getOrInsertSyncScopeID("workgroup-one-as");
+ WavefrontOneAddressSpaceSSID =
+ CTX.getOrInsertSyncScopeID("wavefront-one-as");
+ SingleThreadOneAddressSpaceSSID =
+ CTX.getOrInsertSyncScopeID("singlethread-one-as");
}
} // end namespace llvm
diff --git a/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.h b/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.h
index 1219ab26fb69..2b0b8b42acfe 100644
--- a/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.h
+++ b/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.h
@@ -1,9 +1,8 @@
//===--- AMDGPUMachineModuleInfo.h ------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -30,12 +29,22 @@ private:
// All supported memory/synchronization scopes can be found here:
// http://llvm.org/docs/AMDGPUUsage.html#memory-scopes
- /// Agent synchronization scope ID.
+ /// Agent synchronization scope ID (cross address space).
SyncScope::ID AgentSSID;
- /// Workgroup synchronization scope ID.
+ /// Workgroup synchronization scope ID (cross address space).
SyncScope::ID WorkgroupSSID;
- /// Wavefront synchronization scope ID.
+ /// Wavefront synchronization scope ID (cross address space).
SyncScope::ID WavefrontSSID;
+ /// System synchronization scope ID (single address space).
+ SyncScope::ID SystemOneAddressSpaceSSID;
+ /// Agent synchronization scope ID (single address space).
+ SyncScope::ID AgentOneAddressSpaceSSID;
+ /// Workgroup synchronization scope ID (single address space).
+ SyncScope::ID WorkgroupOneAddressSpaceSSID;
+ /// Wavefront synchronization scope ID (single address space).
+ SyncScope::ID WavefrontOneAddressSpaceSSID;
+ /// Single thread synchronization scope ID (single address space).
+ SyncScope::ID SingleThreadOneAddressSpaceSSID;
/// In AMDGPU target synchronization scopes are inclusive, meaning a
/// larger synchronization scope is inclusive of a smaller synchronization
@@ -44,35 +53,70 @@ private:
/// \returns \p SSID's inclusion ordering, or "None" if \p SSID is not
/// supported by the AMDGPU target.
Optional<uint8_t> getSyncScopeInclusionOrdering(SyncScope::ID SSID) const {
- if (SSID == SyncScope::SingleThread)
+ if (SSID == SyncScope::SingleThread ||
+ SSID == getSingleThreadOneAddressSpaceSSID())
return 0;
- else if (SSID == getWavefrontSSID())
+ else if (SSID == getWavefrontSSID() ||
+ SSID == getWavefrontOneAddressSpaceSSID())
return 1;
- else if (SSID == getWorkgroupSSID())
+ else if (SSID == getWorkgroupSSID() ||
+ SSID == getWorkgroupOneAddressSpaceSSID())
return 2;
- else if (SSID == getAgentSSID())
+ else if (SSID == getAgentSSID() ||
+ SSID == getAgentOneAddressSpaceSSID())
return 3;
- else if (SSID == SyncScope::System)
+ else if (SSID == SyncScope::System ||
+ SSID == getSystemOneAddressSpaceSSID())
return 4;
return None;
}
+ /// \returns True if \p SSID is restricted to single address space, false
+ /// otherwise
+ bool isOneAddressSpace(SyncScope::ID SSID) const {
+ return SSID == getSingleThreadOneAddressSpaceSSID() ||
+ SSID == getWavefrontOneAddressSpaceSSID() ||
+ SSID == getWorkgroupOneAddressSpaceSSID() ||
+ SSID == getAgentOneAddressSpaceSSID() ||
+ SSID == getSystemOneAddressSpaceSSID();
+ }
+
public:
AMDGPUMachineModuleInfo(const MachineModuleInfo &MMI);
- /// \returns Agent synchronization scope ID.
+ /// \returns Agent synchronization scope ID (cross address space).
SyncScope::ID getAgentSSID() const {
return AgentSSID;
}
- /// \returns Workgroup synchronization scope ID.
+ /// \returns Workgroup synchronization scope ID (cross address space).
SyncScope::ID getWorkgroupSSID() const {
return WorkgroupSSID;
}
- /// \returns Wavefront synchronization scope ID.
+ /// \returns Wavefront synchronization scope ID (cross address space).
SyncScope::ID getWavefrontSSID() const {
return WavefrontSSID;
}
+ /// \returns System synchronization scope ID (single address space).
+ SyncScope::ID getSystemOneAddressSpaceSSID() const {
+ return SystemOneAddressSpaceSSID;
+ }
+ /// \returns Agent synchronization scope ID (single address space).
+ SyncScope::ID getAgentOneAddressSpaceSSID() const {
+ return AgentOneAddressSpaceSSID;
+ }
+ /// \returns Workgroup synchronization scope ID (single address space).
+ SyncScope::ID getWorkgroupOneAddressSpaceSSID() const {
+ return WorkgroupOneAddressSpaceSSID;
+ }
+ /// \returns Wavefront synchronization scope ID (single address space).
+ SyncScope::ID getWavefrontOneAddressSpaceSSID() const {
+ return WavefrontOneAddressSpaceSSID;
+ }
+ /// \returns Single thread synchronization scope ID (single address space).
+ SyncScope::ID getSingleThreadOneAddressSpaceSSID() const {
+ return SingleThreadOneAddressSpaceSSID;
+ }
/// In AMDGPU target synchronization scopes are inclusive, meaning a
/// larger synchronization scope is inclusive of a smaller synchronization
@@ -88,7 +132,11 @@ public:
if (!AIO || !BIO)
return None;
- return AIO.getValue() > BIO.getValue();
+ bool IsAOneAddressSpace = isOneAddressSpace(A);
+ bool IsBOneAddressSpace = isOneAddressSpace(B);
+
+ return AIO.getValue() >= BIO.getValue() &&
+ (IsAOneAddressSpace == IsBOneAddressSpace || !IsAOneAddressSpace);
}
};
diff --git a/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp b/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp
index 5e0b7d429022..8c11230f411a 100644
--- a/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp
+++ b/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp
@@ -1,9 +1,8 @@
//===--- AMDGPUMacroFusion.cpp - AMDGPU Macro Fusion ----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AMDGPU/AMDGPUMacroFusion.h b/lib/Target/AMDGPU/AMDGPUMacroFusion.h
index 844958580a65..da4b3cf8bc24 100644
--- a/lib/Target/AMDGPU/AMDGPUMacroFusion.h
+++ b/lib/Target/AMDGPU/AMDGPUMacroFusion.h
@@ -1,9 +1,8 @@
//===- AMDGPUMacroFusion.h - AMDGPU Macro Fusion ----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp b/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp
index 7bd8533a0ccf..f7231471c107 100644
--- a/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp
@@ -1,9 +1,8 @@
//===- AMDGPUOpenCLEnqueuedBlockLowering.cpp - Lower enqueued block -------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -120,11 +119,11 @@ bool AMDGPUOpenCLEnqueuedBlockLowering::runOnModule(Module &M) {
auto T = ArrayType::get(Type::getInt64Ty(C), 2);
auto *GV = new GlobalVariable(
M, T,
- /*IsConstant=*/false, GlobalValue::ExternalLinkage,
+ /*isConstant=*/false, GlobalValue::ExternalLinkage,
/*Initializer=*/Constant::getNullValue(T), RuntimeHandle,
/*InsertBefore=*/nullptr, GlobalValue::NotThreadLocal,
AMDGPUAS::GLOBAL_ADDRESS,
- /*IsExternallyInitialized=*/false);
+ /*isExternallyInitialized=*/false);
LLVM_DEBUG(dbgs() << "runtime handle created: " << *GV << '\n');
for (auto U : F.users()) {
diff --git a/lib/Target/AMDGPU/AMDGPUPTNote.h b/lib/Target/AMDGPU/AMDGPUPTNote.h
index 2feff14d34a1..8b69f51c1a0d 100644
--- a/lib/Target/AMDGPU/AMDGPUPTNote.h
+++ b/lib/Target/AMDGPU/AMDGPUPTNote.h
@@ -1,9 +1,8 @@
//===-- AMDGPUNoteType.h - AMDGPU ELF PT_NOTE section info-------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp b/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp
index e53a8fe7c074..9613d5a843b3 100644
--- a/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp
+++ b/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp
@@ -1,9 +1,8 @@
//===- AMDGPUPerfHintAnalysis.cpp - analysis of functions memory traffic --===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -18,6 +17,7 @@
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
@@ -72,7 +72,7 @@ public:
const TargetLowering *TLI_)
: FIM(FIM_), DL(nullptr), TLI(TLI_) {}
- void runOnFunction(Function &F);
+ bool runOnFunction(Function &F);
private:
struct MemAccessInfo {
@@ -101,7 +101,7 @@ private:
const TargetLowering *TLI;
- void visit(const Function &F);
+ AMDGPUPerfHintAnalysis::FuncInfo *visit(const Function &F);
static bool isMemBound(const AMDGPUPerfHintAnalysis::FuncInfo &F);
static bool needLimitWave(const AMDGPUPerfHintAnalysis::FuncInfo &F);
@@ -203,12 +203,8 @@ bool AMDGPUPerfHint::isIndirectAccess(const Instruction *Inst) const {
return false;
}
-void AMDGPUPerfHint::visit(const Function &F) {
- auto FIP = FIM.insert(std::make_pair(&F, AMDGPUPerfHintAnalysis::FuncInfo()));
- if (!FIP.second)
- return;
-
- AMDGPUPerfHintAnalysis::FuncInfo &FI = FIP.first->second;
+AMDGPUPerfHintAnalysis::FuncInfo *AMDGPUPerfHint::visit(const Function &F) {
+ AMDGPUPerfHintAnalysis::FuncInfo &FI = FIM[&F];
LLVM_DEBUG(dbgs() << "[AMDGPUPerfHint] process " << F.getName() << '\n');
@@ -234,10 +230,10 @@ void AMDGPUPerfHint::visit(const Function &F) {
if (&F == Callee) // Handle immediate recursion
continue;
- visit(*Callee);
auto Loc = FIM.find(Callee);
+ if (Loc == FIM.end())
+ continue;
- assert(Loc != FIM.end() && "No func info");
FI.MemInstCount += Loc->second.MemInstCount;
FI.InstCount += Loc->second.InstCount;
FI.IAMInstCount += Loc->second.IAMInstCount;
@@ -257,36 +253,39 @@ void AMDGPUPerfHint::visit(const Function &F) {
}
}
}
-}
-void AMDGPUPerfHint::runOnFunction(Function &F) {
- if (FIM.find(&F) != FIM.end())
- return;
+ return &FI;
+}
+bool AMDGPUPerfHint::runOnFunction(Function &F) {
const Module &M = *F.getParent();
DL = &M.getDataLayout();
- visit(F);
- auto Loc = FIM.find(&F);
+ if (F.hasFnAttribute("amdgpu-wave-limiter") &&
+ F.hasFnAttribute("amdgpu-memory-bound"))
+ return false;
+
+ const AMDGPUPerfHintAnalysis::FuncInfo *Info = visit(F);
- assert(Loc != FIM.end() && "No func info");
- LLVM_DEBUG(dbgs() << F.getName() << " MemInst: " << Loc->second.MemInstCount
+ LLVM_DEBUG(dbgs() << F.getName() << " MemInst: " << Info->MemInstCount
<< '\n'
- << " IAMInst: " << Loc->second.IAMInstCount << '\n'
- << " LSMInst: " << Loc->second.LSMInstCount << '\n'
- << " TotalInst: " << Loc->second.InstCount << '\n');
-
- auto &FI = Loc->second;
+ << " IAMInst: " << Info->IAMInstCount << '\n'
+ << " LSMInst: " << Info->LSMInstCount << '\n'
+ << " TotalInst: " << Info->InstCount << '\n');
- if (isMemBound(FI)) {
+ if (isMemBound(*Info)) {
LLVM_DEBUG(dbgs() << F.getName() << " is memory bound\n");
NumMemBound++;
+ F.addFnAttr("amdgpu-memory-bound", "true");
}
- if (AMDGPU::isEntryFunctionCC(F.getCallingConv()) && needLimitWave(FI)) {
+ if (AMDGPU::isEntryFunctionCC(F.getCallingConv()) && needLimitWave(*Info)) {
LLVM_DEBUG(dbgs() << F.getName() << " needs limit wave\n");
NumLimitWave++;
+ F.addFnAttr("amdgpu-wave-limiter", "true");
}
+
+ return true;
}
bool AMDGPUPerfHint::isMemBound(const AMDGPUPerfHintAnalysis::FuncInfo &FI) {
@@ -365,17 +364,27 @@ bool AMDGPUPerfHint::MemAccessInfo::isLargeStride(
}
} // namespace
-bool AMDGPUPerfHintAnalysis::runOnFunction(Function &F) {
+bool AMDGPUPerfHintAnalysis::runOnSCC(CallGraphSCC &SCC) {
auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
if (!TPC)
return false;
const TargetMachine &TM = TPC->getTM<TargetMachine>();
- const TargetSubtargetInfo *ST = TM.getSubtargetImpl(F);
- AMDGPUPerfHint Analyzer(FIM, ST->getTargetLowering());
- Analyzer.runOnFunction(F);
- return false;
+ bool Changed = false;
+ for (CallGraphNode *I : SCC) {
+ Function *F = I->getFunction();
+ if (!F || F->isDeclaration())
+ continue;
+
+ const TargetSubtargetInfo *ST = TM.getSubtargetImpl(*F);
+ AMDGPUPerfHint Analyzer(FIM, ST->getTargetLowering());
+
+ if (Analyzer.runOnFunction(*F))
+ Changed = true;
+ }
+
+ return Changed;
}
bool AMDGPUPerfHintAnalysis::isMemoryBound(const Function *F) const {
diff --git a/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h b/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h
index be7f37cb6815..9599e09fbd96 100644
--- a/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h
+++ b/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h
@@ -1,9 +1,8 @@
-//===- AMDGPUPerfHintAnalysis.h - analysis of functions memory traffic ----===//
+//===- AMDGPUPerfHintAnalysis.h ---- analysis of memory traffic -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -15,18 +14,20 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_MDGPUPERFHINTANALYSIS_H
#define LLVM_LIB_TARGET_AMDGPU_MDGPUPERFHINTANALYSIS_H
+
+#include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/IR/ValueMap.h"
#include "llvm/Pass.h"
namespace llvm {
-struct AMDGPUPerfHintAnalysis : public FunctionPass {
+struct AMDGPUPerfHintAnalysis : public CallGraphSCCPass {
static char ID;
public:
- AMDGPUPerfHintAnalysis() : FunctionPass(ID) {}
+ AMDGPUPerfHintAnalysis() : CallGraphSCCPass(ID) {}
- bool runOnFunction(Function &F) override;
+ bool runOnSCC(CallGraphSCC &SCC) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll();
diff --git a/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index 5d087c099184..e4c9d6685d4a 100644
--- a/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -1,9 +1,8 @@
//===-- AMDGPUPromoteAlloca.cpp - Promote Allocas -------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -163,12 +162,16 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
bool SufficientLDS = hasSufficientLocalMem(F);
bool Changed = false;
BasicBlock &EntryBB = *F.begin();
- for (auto I = EntryBB.begin(), E = EntryBB.end(); I != E; ) {
- AllocaInst *AI = dyn_cast<AllocaInst>(I);
- ++I;
- if (AI)
- Changed |= handleAlloca(*AI, SufficientLDS);
+ SmallVector<AllocaInst *, 16> Allocas;
+ for (Instruction &I : EntryBB) {
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(&I))
+ Allocas.push_back(AI);
+ }
+
+ for (AllocaInst *AI : Allocas) {
+ if (handleAlloca(*AI, SufficientLDS))
+ Changed = true;
}
return Changed;
@@ -245,11 +248,11 @@ AMDGPUPromoteAlloca::getLocalSizeYZ(IRBuilder<> &Builder) {
// We could do a single 64-bit load here, but it's likely that the basic
// 32-bit and extract sequence is already present, and it is probably easier
// to CSE this. The loads should be mergable later anyway.
- Value *GEPXY = Builder.CreateConstInBoundsGEP1_64(CastDispatchPtr, 1);
- LoadInst *LoadXY = Builder.CreateAlignedLoad(GEPXY, 4);
+ Value *GEPXY = Builder.CreateConstInBoundsGEP1_64(I32Ty, CastDispatchPtr, 1);
+ LoadInst *LoadXY = Builder.CreateAlignedLoad(I32Ty, GEPXY, 4);
- Value *GEPZU = Builder.CreateConstInBoundsGEP1_64(CastDispatchPtr, 2);
- LoadInst *LoadZU = Builder.CreateAlignedLoad(GEPZU, 4);
+ Value *GEPZU = Builder.CreateConstInBoundsGEP1_64(I32Ty, CastDispatchPtr, 2);
+ LoadInst *LoadZU = Builder.CreateAlignedLoad(I32Ty, GEPZU, 4);
MDNode *MD = MDNode::get(Mod->getContext(), None);
LoadXY->setMetadata(LLVMContext::MD_invariant_load, MD);
@@ -427,7 +430,7 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {
Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy);
- Value *VecValue = Builder.CreateLoad(BitCast);
+ Value *VecValue = Builder.CreateLoad(VectorTy, BitCast);
Value *ExtractElement = Builder.CreateExtractElement(VecValue, Index);
Inst->replaceAllUsesWith(ExtractElement);
Inst->eraseFromParent();
@@ -442,7 +445,7 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {
Value *Ptr = SI->getPointerOperand();
Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy);
- Value *VecValue = Builder.CreateLoad(BitCast);
+ Value *VecValue = Builder.CreateLoad(VectorTy, BitCast);
Value *NewVecValue = Builder.CreateInsertElement(VecValue,
SI->getValueOperand(),
Index);
@@ -919,7 +922,8 @@ bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) {
);
CallInst *NewCall = Builder.CreateCall(
- ObjectSize, {Src, Intr->getOperand(1), Intr->getOperand(2)});
+ ObjectSize,
+ {Src, Intr->getOperand(1), Intr->getOperand(2), Intr->getOperand(3)});
Intr->replaceAllUsesWith(NewCall);
Intr->eraseFromParent();
continue;
diff --git a/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp b/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp
new file mode 100644
index 000000000000..7a7addd0f5cf
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp
@@ -0,0 +1,336 @@
+//===--- AMDGPUPropagateAttributes.cpp --------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief This pass propagates attributes from kernels to the non-entry
+/// functions. Most of the library functions were not compiled for specific ABI,
+/// yet will be correctly compiled if proper attrbutes are propagated from the
+/// caller.
+///
+/// The pass analyzes call graph and propagates ABI target features through the
+/// call graph.
+///
+/// It can run in two modes: as a function or module pass. A function pass
+/// simply propagates attributes. A module pass clones functions if there are
+/// callers with different ABI. If a function is clonned all call sites will
+/// be updated to use a correct clone.
+///
+/// A function pass is limited in functionality but can run early in the
+/// pipeline. A module pass is more powerful but has to run late, so misses
+/// library folding opportunities.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include <string>
+
+#define DEBUG_TYPE "amdgpu-propagate-attributes"
+
+using namespace llvm;
+
+namespace llvm {
+extern const SubtargetFeatureKV AMDGPUFeatureKV[AMDGPU::NumSubtargetFeatures-1];
+}
+
+namespace {
+
+class AMDGPUPropagateAttributes {
+ const FeatureBitset TargetFeatures = {
+ AMDGPU::FeatureWavefrontSize16,
+ AMDGPU::FeatureWavefrontSize32,
+ AMDGPU::FeatureWavefrontSize64
+ };
+
+ class Clone{
+ public:
+ Clone(FeatureBitset FeatureMask, Function *OrigF, Function *NewF) :
+ FeatureMask(FeatureMask), OrigF(OrigF), NewF(NewF) {}
+
+ FeatureBitset FeatureMask;
+ Function *OrigF;
+ Function *NewF;
+ };
+
+ const TargetMachine *TM;
+
+ // Clone functions as needed or just set attributes.
+ bool AllowClone;
+
+ // Option propagation roots.
+ SmallSet<Function *, 32> Roots;
+
+ // Clones of functions with their attributes.
+ SmallVector<Clone, 32> Clones;
+
+ // Find a clone with required features.
+ Function *findFunction(const FeatureBitset &FeaturesNeeded,
+ Function *OrigF);
+
+ // Clone function F and set NewFeatures on the clone.
+ // Cole takes the name of original function.
+ Function *cloneWithFeatures(Function &F,
+ const FeatureBitset &NewFeatures);
+
+ // Set new function's features in place.
+ void setFeatures(Function &F, const FeatureBitset &NewFeatures);
+
+ std::string getFeatureString(const FeatureBitset &Features) const;
+
+ // Propagate attributes from Roots.
+ bool process();
+
+public:
+ AMDGPUPropagateAttributes(const TargetMachine *TM, bool AllowClone) :
+ TM(TM), AllowClone(AllowClone) {}
+
+ // Use F as a root and propagate its attributes.
+ bool process(Function &F);
+
+ // Propagate attributes starting from kernel functions.
+ bool process(Module &M);
+};
+
+// Allows to propagate attributes early, but no clonning is allowed as it must
+// be a function pass to run before any optimizations.
+// TODO: We shall only need a one instance of module pass, but that needs to be
+// in the linker pipeline which is currently not possible.
+class AMDGPUPropagateAttributesEarly : public FunctionPass {
+ const TargetMachine *TM;
+
+public:
+ static char ID; // Pass identification
+
+ AMDGPUPropagateAttributesEarly(const TargetMachine *TM = nullptr) :
+ FunctionPass(ID), TM(TM) {
+ initializeAMDGPUPropagateAttributesEarlyPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override;
+};
+
+// Allows to propagate attributes with clonning but does that late in the
+// pipeline.
+class AMDGPUPropagateAttributesLate : public ModulePass {
+ const TargetMachine *TM;
+
+public:
+ static char ID; // Pass identification
+
+ AMDGPUPropagateAttributesLate(const TargetMachine *TM = nullptr) :
+ ModulePass(ID), TM(TM) {
+ initializeAMDGPUPropagateAttributesLatePass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ bool runOnModule(Module &M) override;
+};
+
+} // end anonymous namespace.
+
+char AMDGPUPropagateAttributesEarly::ID = 0;
+char AMDGPUPropagateAttributesLate::ID = 0;
+
+INITIALIZE_PASS(AMDGPUPropagateAttributesEarly,
+ "amdgpu-propagate-attributes-early",
+ "Early propagate attributes from kernels to functions",
+ false, false)
+INITIALIZE_PASS(AMDGPUPropagateAttributesLate,
+ "amdgpu-propagate-attributes-late",
+ "Late propagate attributes from kernels to functions",
+ false, false)
+
+Function *
+AMDGPUPropagateAttributes::findFunction(const FeatureBitset &FeaturesNeeded,
+ Function *OrigF) {
+ // TODO: search for clone's clones.
+ for (Clone &C : Clones)
+ if (C.OrigF == OrigF && FeaturesNeeded == C.FeatureMask)
+ return C.NewF;
+
+ return nullptr;
+}
+
+bool AMDGPUPropagateAttributes::process(Module &M) {
+ for (auto &F : M.functions())
+ if (AMDGPU::isEntryFunctionCC(F.getCallingConv()))
+ Roots.insert(&F);
+
+ return process();
+}
+
+bool AMDGPUPropagateAttributes::process(Function &F) {
+ Roots.insert(&F);
+ return process();
+}
+
+bool AMDGPUPropagateAttributes::process() {
+ bool Changed = false;
+ SmallSet<Function *, 32> NewRoots;
+ SmallSet<Function *, 32> Replaced;
+
+ if (Roots.empty())
+ return false;
+ Module &M = *(*Roots.begin())->getParent();
+
+ do {
+ Roots.insert(NewRoots.begin(), NewRoots.end());
+ NewRoots.clear();
+
+ for (auto &F : M.functions()) {
+ if (F.isDeclaration() || Roots.count(&F) || Roots.count(&F))
+ continue;
+
+ const FeatureBitset &CalleeBits =
+ TM->getSubtargetImpl(F)->getFeatureBits();
+ SmallVector<std::pair<CallBase *, Function *>, 32> ToReplace;
+
+ for (User *U : F.users()) {
+ Instruction *I = dyn_cast<Instruction>(U);
+ if (!I)
+ continue;
+ CallBase *CI = dyn_cast<CallBase>(I);
+ if (!CI)
+ continue;
+ Function *Caller = CI->getCaller();
+ if (!Caller)
+ continue;
+ if (!Roots.count(Caller))
+ continue;
+
+ const FeatureBitset &CallerBits =
+ TM->getSubtargetImpl(*Caller)->getFeatureBits() & TargetFeatures;
+
+ if (CallerBits == (CalleeBits & TargetFeatures)) {
+ NewRoots.insert(&F);
+ continue;
+ }
+
+ Function *NewF = findFunction(CallerBits, &F);
+ if (!NewF) {
+ FeatureBitset NewFeatures((CalleeBits & ~TargetFeatures) |
+ CallerBits);
+ if (!AllowClone) {
+ // This may set different features on different iteartions if
+ // there is a contradiction in callers' attributes. In this case
+ // we rely on a second pass running on Module, which is allowed
+ // to clone.
+ setFeatures(F, NewFeatures);
+ NewRoots.insert(&F);
+ Changed = true;
+ break;
+ }
+
+ NewF = cloneWithFeatures(F, NewFeatures);
+ Clones.push_back(Clone(CallerBits, &F, NewF));
+ NewRoots.insert(NewF);
+ }
+
+ ToReplace.push_back(std::make_pair(CI, NewF));
+ Replaced.insert(&F);
+
+ Changed = true;
+ }
+
+ while (!ToReplace.empty()) {
+ auto R = ToReplace.pop_back_val();
+ R.first->setCalledFunction(R.second);
+ }
+ }
+ } while (!NewRoots.empty());
+
+ for (Function *F : Replaced) {
+ if (F->use_empty())
+ F->eraseFromParent();
+ }
+
+ return Changed;
+}
+
+Function *
+AMDGPUPropagateAttributes::cloneWithFeatures(Function &F,
+ const FeatureBitset &NewFeatures) {
+ LLVM_DEBUG(dbgs() << "Cloning " << F.getName() << '\n');
+
+ ValueToValueMapTy dummy;
+ Function *NewF = CloneFunction(&F, dummy);
+ setFeatures(*NewF, NewFeatures);
+
+ // Swap names. If that is the only clone it will retain the name of now
+ // dead value.
+ if (F.hasName()) {
+ std::string NewName = NewF->getName();
+ NewF->takeName(&F);
+ F.setName(NewName);
+
+ // Name has changed, it does not need an external symbol.
+ F.setVisibility(GlobalValue::DefaultVisibility);
+ F.setLinkage(GlobalValue::InternalLinkage);
+ }
+
+ return NewF;
+}
+
+void AMDGPUPropagateAttributes::setFeatures(Function &F,
+ const FeatureBitset &NewFeatures) {
+ std::string NewFeatureStr = getFeatureString(NewFeatures);
+
+ LLVM_DEBUG(dbgs() << "Set features "
+ << getFeatureString(NewFeatures & TargetFeatures)
+ << " on " << F.getName() << '\n');
+
+ F.removeFnAttr("target-features");
+ F.addFnAttr("target-features", NewFeatureStr);
+}
+
+std::string
+AMDGPUPropagateAttributes::getFeatureString(const FeatureBitset &Features) const
+{
+ std::string Ret;
+ for (const SubtargetFeatureKV &KV : AMDGPUFeatureKV) {
+ if (Features[KV.Value])
+ Ret += (StringRef("+") + KV.Key + ",").str();
+ else if (TargetFeatures[KV.Value])
+ Ret += (StringRef("-") + KV.Key + ",").str();
+ }
+ Ret.pop_back(); // Remove last comma.
+ return Ret;
+}
+
+bool AMDGPUPropagateAttributesEarly::runOnFunction(Function &F) {
+ if (!TM || !AMDGPU::isEntryFunctionCC(F.getCallingConv()))
+ return false;
+
+ return AMDGPUPropagateAttributes(TM, false).process(F);
+}
+
+bool AMDGPUPropagateAttributesLate::runOnModule(Module &M) {
+ if (!TM)
+ return false;
+
+ return AMDGPUPropagateAttributes(TM, true).process(M);
+}
+
+FunctionPass
+*llvm::createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *TM) {
+ return new AMDGPUPropagateAttributesEarly(TM);
+}
+
+ModulePass
+*llvm::createAMDGPUPropagateAttributesLatePass(const TargetMachine *TM) {
+ return new AMDGPUPropagateAttributesLate(TM);
+}
diff --git a/lib/Target/AMDGPU/AMDGPURegAsmNames.inc.cpp b/lib/Target/AMDGPU/AMDGPURegAsmNames.inc.cpp
deleted file mode 100644
index 36d88f52910d..000000000000
--- a/lib/Target/AMDGPU/AMDGPURegAsmNames.inc.cpp
+++ /dev/null
@@ -1,353 +0,0 @@
-//===-- AMDGPURegAsmNames.inc - Register asm names ----------*- C++ -*-----===//
-
-#ifdef AMDGPU_REG_ASM_NAMES
-
-static const char *const VGPR32RegNames[] = {
- "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
- "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
- "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
- "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
- "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
- "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
- "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
- "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
- "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
- "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
- "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
- "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
- "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
- "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
- "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
- "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
- "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
- "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
- "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
- "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
- "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
- "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
- "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
- "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
- "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
- "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
- "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
- "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
- "v252", "v253", "v254", "v255"
-};
-
-static const char *const SGPR32RegNames[] = {
- "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8", "s9",
- "s10", "s11", "s12", "s13", "s14", "s15", "s16", "s17", "s18", "s19",
- "s20", "s21", "s22", "s23", "s24", "s25", "s26", "s27", "s28", "s29",
- "s30", "s31", "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39",
- "s40", "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
- "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58", "s59",
- "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67", "s68", "s69",
- "s70", "s71", "s72", "s73", "s74", "s75", "s76", "s77", "s78", "s79",
- "s80", "s81", "s82", "s83", "s84", "s85", "s86", "s87", "s88", "s89",
- "s90", "s91", "s92", "s93", "s94", "s95", "s96", "s97", "s98", "s99",
- "s100", "s101", "s102", "s103"
-};
-
-static const char *const VGPR64RegNames[] = {
- "v[0:1]", "v[1:2]", "v[2:3]", "v[3:4]", "v[4:5]",
- "v[5:6]", "v[6:7]", "v[7:8]", "v[8:9]", "v[9:10]",
- "v[10:11]", "v[11:12]", "v[12:13]", "v[13:14]", "v[14:15]",
- "v[15:16]", "v[16:17]", "v[17:18]", "v[18:19]", "v[19:20]",
- "v[20:21]", "v[21:22]", "v[22:23]", "v[23:24]", "v[24:25]",
- "v[25:26]", "v[26:27]", "v[27:28]", "v[28:29]", "v[29:30]",
- "v[30:31]", "v[31:32]", "v[32:33]", "v[33:34]", "v[34:35]",
- "v[35:36]", "v[36:37]", "v[37:38]", "v[38:39]", "v[39:40]",
- "v[40:41]", "v[41:42]", "v[42:43]", "v[43:44]", "v[44:45]",
- "v[45:46]", "v[46:47]", "v[47:48]", "v[48:49]", "v[49:50]",
- "v[50:51]", "v[51:52]", "v[52:53]", "v[53:54]", "v[54:55]",
- "v[55:56]", "v[56:57]", "v[57:58]", "v[58:59]", "v[59:60]",
- "v[60:61]", "v[61:62]", "v[62:63]", "v[63:64]", "v[64:65]",
- "v[65:66]", "v[66:67]", "v[67:68]", "v[68:69]", "v[69:70]",
- "v[70:71]", "v[71:72]", "v[72:73]", "v[73:74]", "v[74:75]",
- "v[75:76]", "v[76:77]", "v[77:78]", "v[78:79]", "v[79:80]",
- "v[80:81]", "v[81:82]", "v[82:83]", "v[83:84]", "v[84:85]",
- "v[85:86]", "v[86:87]", "v[87:88]", "v[88:89]", "v[89:90]",
- "v[90:91]", "v[91:92]", "v[92:93]", "v[93:94]", "v[94:95]",
- "v[95:96]", "v[96:97]", "v[97:98]", "v[98:99]", "v[99:100]",
- "v[100:101]", "v[101:102]", "v[102:103]", "v[103:104]", "v[104:105]",
- "v[105:106]", "v[106:107]", "v[107:108]", "v[108:109]", "v[109:110]",
- "v[110:111]", "v[111:112]", "v[112:113]", "v[113:114]", "v[114:115]",
- "v[115:116]", "v[116:117]", "v[117:118]", "v[118:119]", "v[119:120]",
- "v[120:121]", "v[121:122]", "v[122:123]", "v[123:124]", "v[124:125]",
- "v[125:126]", "v[126:127]", "v[127:128]", "v[128:129]", "v[129:130]",
- "v[130:131]", "v[131:132]", "v[132:133]", "v[133:134]", "v[134:135]",
- "v[135:136]", "v[136:137]", "v[137:138]", "v[138:139]", "v[139:140]",
- "v[140:141]", "v[141:142]", "v[142:143]", "v[143:144]", "v[144:145]",
- "v[145:146]", "v[146:147]", "v[147:148]", "v[148:149]", "v[149:150]",
- "v[150:151]", "v[151:152]", "v[152:153]", "v[153:154]", "v[154:155]",
- "v[155:156]", "v[156:157]", "v[157:158]", "v[158:159]", "v[159:160]",
- "v[160:161]", "v[161:162]", "v[162:163]", "v[163:164]", "v[164:165]",
- "v[165:166]", "v[166:167]", "v[167:168]", "v[168:169]", "v[169:170]",
- "v[170:171]", "v[171:172]", "v[172:173]", "v[173:174]", "v[174:175]",
- "v[175:176]", "v[176:177]", "v[177:178]", "v[178:179]", "v[179:180]",
- "v[180:181]", "v[181:182]", "v[182:183]", "v[183:184]", "v[184:185]",
- "v[185:186]", "v[186:187]", "v[187:188]", "v[188:189]", "v[189:190]",
- "v[190:191]", "v[191:192]", "v[192:193]", "v[193:194]", "v[194:195]",
- "v[195:196]", "v[196:197]", "v[197:198]", "v[198:199]", "v[199:200]",
- "v[200:201]", "v[201:202]", "v[202:203]", "v[203:204]", "v[204:205]",
- "v[205:206]", "v[206:207]", "v[207:208]", "v[208:209]", "v[209:210]",
- "v[210:211]", "v[211:212]", "v[212:213]", "v[213:214]", "v[214:215]",
- "v[215:216]", "v[216:217]", "v[217:218]", "v[218:219]", "v[219:220]",
- "v[220:221]", "v[221:222]", "v[222:223]", "v[223:224]", "v[224:225]",
- "v[225:226]", "v[226:227]", "v[227:228]", "v[228:229]", "v[229:230]",
- "v[230:231]", "v[231:232]", "v[232:233]", "v[233:234]", "v[234:235]",
- "v[235:236]", "v[236:237]", "v[237:238]", "v[238:239]", "v[239:240]",
- "v[240:241]", "v[241:242]", "v[242:243]", "v[243:244]", "v[244:245]",
- "v[245:246]", "v[246:247]", "v[247:248]", "v[248:249]", "v[249:250]",
- "v[250:251]", "v[251:252]", "v[252:253]", "v[253:254]", "v[254:255]"
-};
-
-static const char *const VGPR96RegNames[] = {
- "v[0:2]", "v[1:3]", "v[2:4]", "v[3:5]", "v[4:6]",
- "v[5:7]", "v[6:8]", "v[7:9]", "v[8:10]", "v[9:11]",
- "v[10:12]", "v[11:13]", "v[12:14]", "v[13:15]", "v[14:16]",
- "v[15:17]", "v[16:18]", "v[17:19]", "v[18:20]", "v[19:21]",
- "v[20:22]", "v[21:23]", "v[22:24]", "v[23:25]", "v[24:26]",
- "v[25:27]", "v[26:28]", "v[27:29]", "v[28:30]", "v[29:31]",
- "v[30:32]", "v[31:33]", "v[32:34]", "v[33:35]", "v[34:36]",
- "v[35:37]", "v[36:38]", "v[37:39]", "v[38:40]", "v[39:41]",
- "v[40:42]", "v[41:43]", "v[42:44]", "v[43:45]", "v[44:46]",
- "v[45:47]", "v[46:48]", "v[47:49]", "v[48:50]", "v[49:51]",
- "v[50:52]", "v[51:53]", "v[52:54]", "v[53:55]", "v[54:56]",
- "v[55:57]", "v[56:58]", "v[57:59]", "v[58:60]", "v[59:61]",
- "v[60:62]", "v[61:63]", "v[62:64]", "v[63:65]", "v[64:66]",
- "v[65:67]", "v[66:68]", "v[67:69]", "v[68:70]", "v[69:71]",
- "v[70:72]", "v[71:73]", "v[72:74]", "v[73:75]", "v[74:76]",
- "v[75:77]", "v[76:78]", "v[77:79]", "v[78:80]", "v[79:81]",
- "v[80:82]", "v[81:83]", "v[82:84]", "v[83:85]", "v[84:86]",
- "v[85:87]", "v[86:88]", "v[87:89]", "v[88:90]", "v[89:91]",
- "v[90:92]", "v[91:93]", "v[92:94]", "v[93:95]", "v[94:96]",
- "v[95:97]", "v[96:98]", "v[97:99]", "v[98:100]", "v[99:101]",
- "v[100:102]", "v[101:103]", "v[102:104]", "v[103:105]", "v[104:106]",
- "v[105:107]", "v[106:108]", "v[107:109]", "v[108:110]", "v[109:111]",
- "v[110:112]", "v[111:113]", "v[112:114]", "v[113:115]", "v[114:116]",
- "v[115:117]", "v[116:118]", "v[117:119]", "v[118:120]", "v[119:121]",
- "v[120:122]", "v[121:123]", "v[122:124]", "v[123:125]", "v[124:126]",
- "v[125:127]", "v[126:128]", "v[127:129]", "v[128:130]", "v[129:131]",
- "v[130:132]", "v[131:133]", "v[132:134]", "v[133:135]", "v[134:136]",
- "v[135:137]", "v[136:138]", "v[137:139]", "v[138:140]", "v[139:141]",
- "v[140:142]", "v[141:143]", "v[142:144]", "v[143:145]", "v[144:146]",
- "v[145:147]", "v[146:148]", "v[147:149]", "v[148:150]", "v[149:151]",
- "v[150:152]", "v[151:153]", "v[152:154]", "v[153:155]", "v[154:156]",
- "v[155:157]", "v[156:158]", "v[157:159]", "v[158:160]", "v[159:161]",
- "v[160:162]", "v[161:163]", "v[162:164]", "v[163:165]", "v[164:166]",
- "v[165:167]", "v[166:168]", "v[167:169]", "v[168:170]", "v[169:171]",
- "v[170:172]", "v[171:173]", "v[172:174]", "v[173:175]", "v[174:176]",
- "v[175:177]", "v[176:178]", "v[177:179]", "v[178:180]", "v[179:181]",
- "v[180:182]", "v[181:183]", "v[182:184]", "v[183:185]", "v[184:186]",
- "v[185:187]", "v[186:188]", "v[187:189]", "v[188:190]", "v[189:191]",
- "v[190:192]", "v[191:193]", "v[192:194]", "v[193:195]", "v[194:196]",
- "v[195:197]", "v[196:198]", "v[197:199]", "v[198:200]", "v[199:201]",
- "v[200:202]", "v[201:203]", "v[202:204]", "v[203:205]", "v[204:206]",
- "v[205:207]", "v[206:208]", "v[207:209]", "v[208:210]", "v[209:211]",
- "v[210:212]", "v[211:213]", "v[212:214]", "v[213:215]", "v[214:216]",
- "v[215:217]", "v[216:218]", "v[217:219]", "v[218:220]", "v[219:221]",
- "v[220:222]", "v[221:223]", "v[222:224]", "v[223:225]", "v[224:226]",
- "v[225:227]", "v[226:228]", "v[227:229]", "v[228:230]", "v[229:231]",
- "v[230:232]", "v[231:233]", "v[232:234]", "v[233:235]", "v[234:236]",
- "v[235:237]", "v[236:238]", "v[237:239]", "v[238:240]", "v[239:241]",
- "v[240:242]", "v[241:243]", "v[242:244]", "v[243:245]", "v[244:246]",
- "v[245:247]", "v[246:248]", "v[247:249]", "v[248:250]", "v[249:251]",
- "v[250:252]", "v[251:253]", "v[252:254]", "v[253:255]"
-};
-
-static const char *const VGPR128RegNames[] = {
- "v[0:3]", "v[1:4]", "v[2:5]", "v[3:6]", "v[4:7]",
- "v[5:8]", "v[6:9]", "v[7:10]", "v[8:11]", "v[9:12]",
- "v[10:13]", "v[11:14]", "v[12:15]", "v[13:16]", "v[14:17]",
- "v[15:18]", "v[16:19]", "v[17:20]", "v[18:21]", "v[19:22]",
- "v[20:23]", "v[21:24]", "v[22:25]", "v[23:26]", "v[24:27]",
- "v[25:28]", "v[26:29]", "v[27:30]", "v[28:31]", "v[29:32]",
- "v[30:33]", "v[31:34]", "v[32:35]", "v[33:36]", "v[34:37]",
- "v[35:38]", "v[36:39]", "v[37:40]", "v[38:41]", "v[39:42]",
- "v[40:43]", "v[41:44]", "v[42:45]", "v[43:46]", "v[44:47]",
- "v[45:48]", "v[46:49]", "v[47:50]", "v[48:51]", "v[49:52]",
- "v[50:53]", "v[51:54]", "v[52:55]", "v[53:56]", "v[54:57]",
- "v[55:58]", "v[56:59]", "v[57:60]", "v[58:61]", "v[59:62]",
- "v[60:63]", "v[61:64]", "v[62:65]", "v[63:66]", "v[64:67]",
- "v[65:68]", "v[66:69]", "v[67:70]", "v[68:71]", "v[69:72]",
- "v[70:73]", "v[71:74]", "v[72:75]", "v[73:76]", "v[74:77]",
- "v[75:78]", "v[76:79]", "v[77:80]", "v[78:81]", "v[79:82]",
- "v[80:83]", "v[81:84]", "v[82:85]", "v[83:86]", "v[84:87]",
- "v[85:88]", "v[86:89]", "v[87:90]", "v[88:91]", "v[89:92]",
- "v[90:93]", "v[91:94]", "v[92:95]", "v[93:96]", "v[94:97]",
- "v[95:98]", "v[96:99]", "v[97:100]", "v[98:101]", "v[99:102]",
- "v[100:103]", "v[101:104]", "v[102:105]", "v[103:106]", "v[104:107]",
- "v[105:108]", "v[106:109]", "v[107:110]", "v[108:111]", "v[109:112]",
- "v[110:113]", "v[111:114]", "v[112:115]", "v[113:116]", "v[114:117]",
- "v[115:118]", "v[116:119]", "v[117:120]", "v[118:121]", "v[119:122]",
- "v[120:123]", "v[121:124]", "v[122:125]", "v[123:126]", "v[124:127]",
- "v[125:128]", "v[126:129]", "v[127:130]", "v[128:131]", "v[129:132]",
- "v[130:133]", "v[131:134]", "v[132:135]", "v[133:136]", "v[134:137]",
- "v[135:138]", "v[136:139]", "v[137:140]", "v[138:141]", "v[139:142]",
- "v[140:143]", "v[141:144]", "v[142:145]", "v[143:146]", "v[144:147]",
- "v[145:148]", "v[146:149]", "v[147:150]", "v[148:151]", "v[149:152]",
- "v[150:153]", "v[151:154]", "v[152:155]", "v[153:156]", "v[154:157]",
- "v[155:158]", "v[156:159]", "v[157:160]", "v[158:161]", "v[159:162]",
- "v[160:163]", "v[161:164]", "v[162:165]", "v[163:166]", "v[164:167]",
- "v[165:168]", "v[166:169]", "v[167:170]", "v[168:171]", "v[169:172]",
- "v[170:173]", "v[171:174]", "v[172:175]", "v[173:176]", "v[174:177]",
- "v[175:178]", "v[176:179]", "v[177:180]", "v[178:181]", "v[179:182]",
- "v[180:183]", "v[181:184]", "v[182:185]", "v[183:186]", "v[184:187]",
- "v[185:188]", "v[186:189]", "v[187:190]", "v[188:191]", "v[189:192]",
- "v[190:193]", "v[191:194]", "v[192:195]", "v[193:196]", "v[194:197]",
- "v[195:198]", "v[196:199]", "v[197:200]", "v[198:201]", "v[199:202]",
- "v[200:203]", "v[201:204]", "v[202:205]", "v[203:206]", "v[204:207]",
- "v[205:208]", "v[206:209]", "v[207:210]", "v[208:211]", "v[209:212]",
- "v[210:213]", "v[211:214]", "v[212:215]", "v[213:216]", "v[214:217]",
- "v[215:218]", "v[216:219]", "v[217:220]", "v[218:221]", "v[219:222]",
- "v[220:223]", "v[221:224]", "v[222:225]", "v[223:226]", "v[224:227]",
- "v[225:228]", "v[226:229]", "v[227:230]", "v[228:231]", "v[229:232]",
- "v[230:233]", "v[231:234]", "v[232:235]", "v[233:236]", "v[234:237]",
- "v[235:238]", "v[236:239]", "v[237:240]", "v[238:241]", "v[239:242]",
- "v[240:243]", "v[241:244]", "v[242:245]", "v[243:246]", "v[244:247]",
- "v[245:248]", "v[246:249]", "v[247:250]", "v[248:251]", "v[249:252]",
- "v[250:253]", "v[251:254]", "v[252:255]"
-};
-
-static const char *const VGPR256RegNames[] = {
- "v[0:7]", "v[1:8]", "v[2:9]", "v[3:10]", "v[4:11]",
- "v[5:12]", "v[6:13]", "v[7:14]", "v[8:15]", "v[9:16]",
- "v[10:17]", "v[11:18]", "v[12:19]", "v[13:20]", "v[14:21]",
- "v[15:22]", "v[16:23]", "v[17:24]", "v[18:25]", "v[19:26]",
- "v[20:27]", "v[21:28]", "v[22:29]", "v[23:30]", "v[24:31]",
- "v[25:32]", "v[26:33]", "v[27:34]", "v[28:35]", "v[29:36]",
- "v[30:37]", "v[31:38]", "v[32:39]", "v[33:40]", "v[34:41]",
- "v[35:42]", "v[36:43]", "v[37:44]", "v[38:45]", "v[39:46]",
- "v[40:47]", "v[41:48]", "v[42:49]", "v[43:50]", "v[44:51]",
- "v[45:52]", "v[46:53]", "v[47:54]", "v[48:55]", "v[49:56]",
- "v[50:57]", "v[51:58]", "v[52:59]", "v[53:60]", "v[54:61]",
- "v[55:62]", "v[56:63]", "v[57:64]", "v[58:65]", "v[59:66]",
- "v[60:67]", "v[61:68]", "v[62:69]", "v[63:70]", "v[64:71]",
- "v[65:72]", "v[66:73]", "v[67:74]", "v[68:75]", "v[69:76]",
- "v[70:77]", "v[71:78]", "v[72:79]", "v[73:80]", "v[74:81]",
- "v[75:82]", "v[76:83]", "v[77:84]", "v[78:85]", "v[79:86]",
- "v[80:87]", "v[81:88]", "v[82:89]", "v[83:90]", "v[84:91]",
- "v[85:92]", "v[86:93]", "v[87:94]", "v[88:95]", "v[89:96]",
- "v[90:97]", "v[91:98]", "v[92:99]", "v[93:100]", "v[94:101]",
- "v[95:102]", "v[96:103]", "v[97:104]", "v[98:105]", "v[99:106]",
- "v[100:107]", "v[101:108]", "v[102:109]", "v[103:110]", "v[104:111]",
- "v[105:112]", "v[106:113]", "v[107:114]", "v[108:115]", "v[109:116]",
- "v[110:117]", "v[111:118]", "v[112:119]", "v[113:120]", "v[114:121]",
- "v[115:122]", "v[116:123]", "v[117:124]", "v[118:125]", "v[119:126]",
- "v[120:127]", "v[121:128]", "v[122:129]", "v[123:130]", "v[124:131]",
- "v[125:132]", "v[126:133]", "v[127:134]", "v[128:135]", "v[129:136]",
- "v[130:137]", "v[131:138]", "v[132:139]", "v[133:140]", "v[134:141]",
- "v[135:142]", "v[136:143]", "v[137:144]", "v[138:145]", "v[139:146]",
- "v[140:147]", "v[141:148]", "v[142:149]", "v[143:150]", "v[144:151]",
- "v[145:152]", "v[146:153]", "v[147:154]", "v[148:155]", "v[149:156]",
- "v[150:157]", "v[151:158]", "v[152:159]", "v[153:160]", "v[154:161]",
- "v[155:162]", "v[156:163]", "v[157:164]", "v[158:165]", "v[159:166]",
- "v[160:167]", "v[161:168]", "v[162:169]", "v[163:170]", "v[164:171]",
- "v[165:172]", "v[166:173]", "v[167:174]", "v[168:175]", "v[169:176]",
- "v[170:177]", "v[171:178]", "v[172:179]", "v[173:180]", "v[174:181]",
- "v[175:182]", "v[176:183]", "v[177:184]", "v[178:185]", "v[179:186]",
- "v[180:187]", "v[181:188]", "v[182:189]", "v[183:190]", "v[184:191]",
- "v[185:192]", "v[186:193]", "v[187:194]", "v[188:195]", "v[189:196]",
- "v[190:197]", "v[191:198]", "v[192:199]", "v[193:200]", "v[194:201]",
- "v[195:202]", "v[196:203]", "v[197:204]", "v[198:205]", "v[199:206]",
- "v[200:207]", "v[201:208]", "v[202:209]", "v[203:210]", "v[204:211]",
- "v[205:212]", "v[206:213]", "v[207:214]", "v[208:215]", "v[209:216]",
- "v[210:217]", "v[211:218]", "v[212:219]", "v[213:220]", "v[214:221]",
- "v[215:222]", "v[216:223]", "v[217:224]", "v[218:225]", "v[219:226]",
- "v[220:227]", "v[221:228]", "v[222:229]", "v[223:230]", "v[224:231]",
- "v[225:232]", "v[226:233]", "v[227:234]", "v[228:235]", "v[229:236]",
- "v[230:237]", "v[231:238]", "v[232:239]", "v[233:240]", "v[234:241]",
- "v[235:242]", "v[236:243]", "v[237:244]", "v[238:245]", "v[239:246]",
- "v[240:247]", "v[241:248]", "v[242:249]", "v[243:250]", "v[244:251]",
- "v[245:252]", "v[246:253]", "v[247:254]", "v[248:255]"
-};
-
-static const char *const VGPR512RegNames[] = {
- "v[0:15]", "v[1:16]", "v[2:17]", "v[3:18]", "v[4:19]",
- "v[5:20]", "v[6:21]", "v[7:22]", "v[8:23]", "v[9:24]",
- "v[10:25]", "v[11:26]", "v[12:27]", "v[13:28]", "v[14:29]",
- "v[15:30]", "v[16:31]", "v[17:32]", "v[18:33]", "v[19:34]",
- "v[20:35]", "v[21:36]", "v[22:37]", "v[23:38]", "v[24:39]",
- "v[25:40]", "v[26:41]", "v[27:42]", "v[28:43]", "v[29:44]",
- "v[30:45]", "v[31:46]", "v[32:47]", "v[33:48]", "v[34:49]",
- "v[35:50]", "v[36:51]", "v[37:52]", "v[38:53]", "v[39:54]",
- "v[40:55]", "v[41:56]", "v[42:57]", "v[43:58]", "v[44:59]",
- "v[45:60]", "v[46:61]", "v[47:62]", "v[48:63]", "v[49:64]",
- "v[50:65]", "v[51:66]", "v[52:67]", "v[53:68]", "v[54:69]",
- "v[55:70]", "v[56:71]", "v[57:72]", "v[58:73]", "v[59:74]",
- "v[60:75]", "v[61:76]", "v[62:77]", "v[63:78]", "v[64:79]",
- "v[65:80]", "v[66:81]", "v[67:82]", "v[68:83]", "v[69:84]",
- "v[70:85]", "v[71:86]", "v[72:87]", "v[73:88]", "v[74:89]",
- "v[75:90]", "v[76:91]", "v[77:92]", "v[78:93]", "v[79:94]",
- "v[80:95]", "v[81:96]", "v[82:97]", "v[83:98]", "v[84:99]",
- "v[85:100]", "v[86:101]", "v[87:102]", "v[88:103]", "v[89:104]",
- "v[90:105]", "v[91:106]", "v[92:107]", "v[93:108]", "v[94:109]",
- "v[95:110]", "v[96:111]", "v[97:112]", "v[98:113]", "v[99:114]",
- "v[100:115]", "v[101:116]", "v[102:117]", "v[103:118]", "v[104:119]",
- "v[105:120]", "v[106:121]", "v[107:122]", "v[108:123]", "v[109:124]",
- "v[110:125]", "v[111:126]", "v[112:127]", "v[113:128]", "v[114:129]",
- "v[115:130]", "v[116:131]", "v[117:132]", "v[118:133]", "v[119:134]",
- "v[120:135]", "v[121:136]", "v[122:137]", "v[123:138]", "v[124:139]",
- "v[125:140]", "v[126:141]", "v[127:142]", "v[128:143]", "v[129:144]",
- "v[130:145]", "v[131:146]", "v[132:147]", "v[133:148]", "v[134:149]",
- "v[135:150]", "v[136:151]", "v[137:152]", "v[138:153]", "v[139:154]",
- "v[140:155]", "v[141:156]", "v[142:157]", "v[143:158]", "v[144:159]",
- "v[145:160]", "v[146:161]", "v[147:162]", "v[148:163]", "v[149:164]",
- "v[150:165]", "v[151:166]", "v[152:167]", "v[153:168]", "v[154:169]",
- "v[155:170]", "v[156:171]", "v[157:172]", "v[158:173]", "v[159:174]",
- "v[160:175]", "v[161:176]", "v[162:177]", "v[163:178]", "v[164:179]",
- "v[165:180]", "v[166:181]", "v[167:182]", "v[168:183]", "v[169:184]",
- "v[170:185]", "v[171:186]", "v[172:187]", "v[173:188]", "v[174:189]",
- "v[175:190]", "v[176:191]", "v[177:192]", "v[178:193]", "v[179:194]",
- "v[180:195]", "v[181:196]", "v[182:197]", "v[183:198]", "v[184:199]",
- "v[185:200]", "v[186:201]", "v[187:202]", "v[188:203]", "v[189:204]",
- "v[190:205]", "v[191:206]", "v[192:207]", "v[193:208]", "v[194:209]",
- "v[195:210]", "v[196:211]", "v[197:212]", "v[198:213]", "v[199:214]",
- "v[200:215]", "v[201:216]", "v[202:217]", "v[203:218]", "v[204:219]",
- "v[205:220]", "v[206:221]", "v[207:222]", "v[208:223]", "v[209:224]",
- "v[210:225]", "v[211:226]", "v[212:227]", "v[213:228]", "v[214:229]",
- "v[215:230]", "v[216:231]", "v[217:232]", "v[218:233]", "v[219:234]",
- "v[220:235]", "v[221:236]", "v[222:237]", "v[223:238]", "v[224:239]",
- "v[225:240]", "v[226:241]", "v[227:242]", "v[228:243]", "v[229:244]",
- "v[230:245]", "v[231:246]", "v[232:247]", "v[233:248]", "v[234:249]",
- "v[235:250]", "v[236:251]", "v[237:252]", "v[238:253]", "v[239:254]",
- "v[240:255]"
-};
-
-static const char *const SGPR64RegNames[] = {
- "s[0:1]", "s[2:3]", "s[4:5]", "s[6:7]", "s[8:9]", "s[10:11]",
- "s[12:13]", "s[14:15]", "s[16:17]", "s[18:19]", "s[20:21]", "s[22:23]",
- "s[24:25]", "s[26:27]", "s[28:29]", "s[30:31]", "s[32:33]", "s[34:35]",
- "s[36:37]", "s[38:39]", "s[40:41]", "s[42:43]", "s[44:45]", "s[46:47]",
- "s[48:49]", "s[50:51]", "s[52:53]", "s[54:55]", "s[56:57]", "s[58:59]",
- "s[60:61]", "s[62:63]", "s[64:65]", "s[66:67]", "s[68:69]", "s[70:71]",
- "s[72:73]", "s[74:75]", "s[76:77]", "s[78:79]", "s[80:81]", "s[82:83]",
- "s[84:85]", "s[86:87]", "s[88:89]", "s[90:91]", "s[92:93]", "s[94:95]",
- "s[96:97]", "s[98:99]", "s[100:101]", "s[102:103]"
-};
-
-static const char *const SGPR128RegNames[] = {
- "s[0:3]", "s[4:7]", "s[8:11]", "s[12:15]", "s[16:19]", "s[20:23]",
- "s[24:27]", "s[28:31]", "s[32:35]", "s[36:39]", "s[40:43]", "s[44:47]",
- "s[48:51]", "s[52:55]", "s[56:59]", "s[60:63]", "s[64:67]", "s[68:71]",
- "s[72:75]", "s[76:79]", "s[80:83]", "s[84:87]", "s[88:91]", "s[92:95]",
- "s[96:99]", "s[100:103]"
-};
-
-static const char *const SGPR256RegNames[] = {
- "s[0:7]", "s[4:11]", "s[8:15]", "s[12:19]", "s[16:23]",
- "s[20:27]", "s[24:31]", "s[28:35]", "s[32:39]", "s[36:43]",
- "s[40:47]", "s[44:51]", "s[48:55]", "s[52:59]", "s[56:63]",
- "s[60:67]", "s[64:71]", "s[68:75]", "s[72:79]", "s[76:83]",
- "s[80:87]", "s[84:91]", "s[88:95]", "s[92:99]", "s[96:103]"
-};
-
-static const char *const SGPR512RegNames[] = {
- "s[0:15]", "s[4:19]", "s[8:23]", "s[12:27]", "s[16:31]", "s[20:35]",
- "s[24:39]", "s[28:43]", "s[32:47]", "s[36:51]", "s[40:55]", "s[44:59]",
- "s[48:63]", "s[52:67]", "s[56:71]", "s[60:75]", "s[64:79]", "s[68:83]",
- "s[72:87]", "s[76:91]", "s[80:95]", "s[84:99]", "s[88:103]"
-};
-
-#endif
diff --git a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 7a760dcf7a90..815cbc5e26ee 100644
--- a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -1,9 +1,8 @@
//===- AMDGPURegisterBankInfo.cpp -------------------------------*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -14,9 +13,13 @@
#include "AMDGPURegisterBankInfo.h"
#include "AMDGPUInstrInfo.h"
+#include "AMDGPUSubtarget.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIMachineFunctionInfo.h"
#include "SIRegisterInfo.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
@@ -31,6 +34,56 @@
using namespace llvm;
+namespace {
+
+// Observer to apply a register bank to new registers created by LegalizerHelper.
+class ApplyRegBankMapping final : public GISelChangeObserver {
+private:
+ MachineRegisterInfo &MRI;
+ const RegisterBank *NewBank;
+ SmallVector<MachineInstr *, 4> NewInsts;
+
+public:
+ ApplyRegBankMapping(MachineRegisterInfo &MRI_, const RegisterBank *RB)
+ : MRI(MRI_), NewBank(RB) {}
+
+ ~ApplyRegBankMapping() {
+ for (MachineInstr *MI : NewInsts)
+ applyBank(*MI);
+ }
+
+ /// Set any registers that don't have a set register class or bank to SALU.
+ void applyBank(MachineInstr &MI) {
+ for (MachineOperand &Op : MI.operands()) {
+ if (!Op.isReg())
+ continue;
+
+ Register Reg = Op.getReg();
+ if (MRI.getRegClassOrRegBank(Reg))
+ continue;
+
+ const RegisterBank *RB = NewBank;
+ // FIXME: This might not be enough to detect when SCC should be used.
+ if (MRI.getType(Reg) == LLT::scalar(1))
+ RB = (NewBank == &AMDGPU::SGPRRegBank ?
+ &AMDGPU::SCCRegBank : &AMDGPU::VCCRegBank);
+
+ MRI.setRegBank(Reg, *RB);
+ }
+ }
+
+ void erasingInstr(MachineInstr &MI) override {}
+
+ void createdInstr(MachineInstr &MI) override {
+ // At this point, the instruction was just inserted and has no operands.
+ NewInsts.push_back(&MI);
+ }
+
+ void changingInstr(MachineInstr &MI) override {}
+ void changedInstr(MachineInstr &MI) override {}
+};
+
+}
AMDGPURegisterBankInfo::AMDGPURegisterBankInfo(const TargetRegisterInfo &TRI)
: AMDGPUGenRegisterBankInfo(),
TRI(static_cast<const SIRegisterInfo*>(&TRI)) {
@@ -52,43 +105,62 @@ AMDGPURegisterBankInfo::AMDGPURegisterBankInfo(const TargetRegisterInfo &TRI)
}
-static bool isConstant(const MachineOperand &MO, int64_t &C) {
- const MachineFunction *MF = MO.getParent()->getParent()->getParent();
- const MachineRegisterInfo &MRI = MF->getRegInfo();
- const MachineInstr *Def = MRI.getVRegDef(MO.getReg());
- if (!Def)
- return false;
-
- if (Def->getOpcode() == AMDGPU::G_CONSTANT) {
- C = Def->getOperand(1).getCImm()->getSExtValue();
- return true;
- }
-
- if (Def->getOpcode() == AMDGPU::COPY)
- return isConstant(Def->getOperand(1), C);
-
- return false;
-}
-
unsigned AMDGPURegisterBankInfo::copyCost(const RegisterBank &Dst,
const RegisterBank &Src,
unsigned Size) const {
+ // TODO: Should there be a UniformVGPRRegBank which can use readfirstlane?
if (Dst.getID() == AMDGPU::SGPRRegBankID &&
Src.getID() == AMDGPU::VGPRRegBankID) {
return std::numeric_limits<unsigned>::max();
}
- // SGPRRegBank with size 1 is actually vcc or another 64-bit sgpr written by
- // the valu.
- if (Size == 1 && Dst.getID() == AMDGPU::SCCRegBankID &&
+ // Bool values are tricky, because the meaning is based on context. The SCC
+ // and VCC banks are for the natural scalar and vector conditions produced by
+ // a compare.
+ //
+ // Legalization doesn't know about the necessary context, so an s1 use may
+ // have been a truncate from an arbitrary value, in which case a copy (lowered
+ // as a compare with 0) needs to be inserted.
+ if (Size == 1 &&
+ (Dst.getID() == AMDGPU::SCCRegBankID ||
+ Dst.getID() == AMDGPU::SGPRRegBankID) &&
(Src.getID() == AMDGPU::SGPRRegBankID ||
Src.getID() == AMDGPU::VGPRRegBankID ||
Src.getID() == AMDGPU::VCCRegBankID))
return std::numeric_limits<unsigned>::max();
+ if (Dst.getID() == AMDGPU::SCCRegBankID &&
+ Src.getID() == AMDGPU::VCCRegBankID)
+ return std::numeric_limits<unsigned>::max();
+
return RegisterBankInfo::copyCost(Dst, Src, Size);
}
+unsigned AMDGPURegisterBankInfo::getBreakDownCost(
+ const ValueMapping &ValMapping,
+ const RegisterBank *CurBank) const {
+ // Check if this is a breakdown for G_LOAD to move the pointer from SGPR to
+ // VGPR.
+ // FIXME: Is there a better way to do this?
+ if (ValMapping.NumBreakDowns >= 2 || ValMapping.BreakDown[0].Length >= 64)
+ return 10; // This is expensive.
+
+ assert(ValMapping.NumBreakDowns == 2 &&
+ ValMapping.BreakDown[0].Length == 32 &&
+ ValMapping.BreakDown[0].StartIdx == 0 &&
+ ValMapping.BreakDown[1].Length == 32 &&
+ ValMapping.BreakDown[1].StartIdx == 32 &&
+ ValMapping.BreakDown[0].RegBank == ValMapping.BreakDown[1].RegBank);
+
+ // 32-bit extract of a 64-bit value is just access of a subregister, so free.
+ // TODO: Cost of 0 hits assert, though it's not clear it's what we really
+ // want.
+
+ // TODO: 32-bit insert to a 64-bit SGPR may incur a non-free copy due to SGPR
+ // alignment restrictions, but this probably isn't important.
+ return 1;
+}
+
const RegisterBank &AMDGPURegisterBankInfo::getRegBankFromRegClass(
const TargetRegisterClass &RC) const {
@@ -98,6 +170,163 @@ const RegisterBank &AMDGPURegisterBankInfo::getRegBankFromRegClass(
return getRegBank(AMDGPU::VGPRRegBankID);
}
+template <unsigned NumOps>
+RegisterBankInfo::InstructionMappings
+AMDGPURegisterBankInfo::addMappingFromTable(
+ const MachineInstr &MI, const MachineRegisterInfo &MRI,
+ const std::array<unsigned, NumOps> RegSrcOpIdx,
+ ArrayRef<OpRegBankEntry<NumOps>> Table) const {
+
+ InstructionMappings AltMappings;
+
+ SmallVector<const ValueMapping *, 10> Operands(MI.getNumOperands());
+
+ unsigned Sizes[NumOps];
+ for (unsigned I = 0; I < NumOps; ++I) {
+ Register Reg = MI.getOperand(RegSrcOpIdx[I]).getReg();
+ Sizes[I] = getSizeInBits(Reg, MRI, *TRI);
+ }
+
+ for (unsigned I = 0, E = MI.getNumExplicitDefs(); I != E; ++I) {
+ unsigned SizeI = getSizeInBits(MI.getOperand(I).getReg(), MRI, *TRI);
+ Operands[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SizeI);
+ }
+
+ unsigned MappingID = 0;
+ for (const auto &Entry : Table) {
+ for (unsigned I = 0; I < NumOps; ++I) {
+ int OpIdx = RegSrcOpIdx[I];
+ Operands[OpIdx] = AMDGPU::getValueMapping(Entry.RegBanks[I], Sizes[I]);
+ }
+
+ AltMappings.push_back(&getInstructionMapping(MappingID++, Entry.Cost,
+ getOperandsMapping(Operands),
+ Operands.size()));
+ }
+
+ return AltMappings;
+}
+
+RegisterBankInfo::InstructionMappings
+AMDGPURegisterBankInfo::getInstrAlternativeMappingsIntrinsic(
+ const MachineInstr &MI, const MachineRegisterInfo &MRI) const {
+ switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) {
+ case Intrinsic::amdgcn_readlane: {
+ static const OpRegBankEntry<3> Table[2] = {
+ // Perfectly legal.
+ { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID }, 1 },
+
+ // Need a readfirstlane for the index.
+ { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 }
+ };
+
+ const std::array<unsigned, 3> RegSrcOpIdx = { { 0, 2, 3 } };
+ return addMappingFromTable<3>(MI, MRI, RegSrcOpIdx, makeArrayRef(Table));
+ }
+ case Intrinsic::amdgcn_writelane: {
+ static const OpRegBankEntry<4> Table[4] = {
+ // Perfectly legal.
+ { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 1 },
+
+ // Need readfirstlane of first op
+ { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 },
+
+ // Need readfirstlane of second op
+ { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 },
+
+ // Need readfirstlane of both ops
+ { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 3 }
+ };
+
+ // rsrc, voffset, offset
+ const std::array<unsigned, 4> RegSrcOpIdx = { { 0, 2, 3, 4 } };
+ return addMappingFromTable<4>(MI, MRI, RegSrcOpIdx, makeArrayRef(Table));
+ }
+ default:
+ return RegisterBankInfo::getInstrAlternativeMappings(MI);
+ }
+}
+
+RegisterBankInfo::InstructionMappings
+AMDGPURegisterBankInfo::getInstrAlternativeMappingsIntrinsicWSideEffects(
+ const MachineInstr &MI, const MachineRegisterInfo &MRI) const {
+
+ switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) {
+ case Intrinsic::amdgcn_buffer_load: {
+ static const OpRegBankEntry<3> Table[4] = {
+ // Perfectly legal.
+ { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID }, 1 },
+ { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 1 },
+
+ // Waterfall loop needed for rsrc. In the worst case this will execute
+ // approximately an extra 10 * wavesize + 2 instructions.
+ { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID }, 1000 },
+ { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 1000 }
+ };
+
+ // rsrc, voffset, offset
+ const std::array<unsigned, 3> RegSrcOpIdx = { { 2, 3, 4 } };
+ return addMappingFromTable<3>(MI, MRI, RegSrcOpIdx, makeArrayRef(Table));
+ }
+ case Intrinsic::amdgcn_s_buffer_load: {
+ static const OpRegBankEntry<2> Table[4] = {
+ // Perfectly legal.
+ { { AMDGPU::SGPRRegBankID, AMDGPU::SGPRRegBankID }, 1 },
+
+ // Only need 1 register in loop
+ { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 300 },
+
+ // Have to waterfall the resource.
+ { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID }, 1000 },
+
+ // Have to waterfall the resource, and the offset.
+ { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 1500 }
+ };
+
+ // rsrc, offset
+ const std::array<unsigned, 2> RegSrcOpIdx = { { 2, 3 } };
+ return addMappingFromTable<2>(MI, MRI, RegSrcOpIdx, makeArrayRef(Table));
+ }
+ case Intrinsic::amdgcn_ds_ordered_add:
+ case Intrinsic::amdgcn_ds_ordered_swap: {
+ // VGPR = M0, VGPR
+ static const OpRegBankEntry<3> Table[2] = {
+ // Perfectly legal.
+ { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 1 },
+
+ // Need a readfirstlane for m0
+ { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 }
+ };
+
+ const std::array<unsigned, 3> RegSrcOpIdx = { { 0, 2, 3 } };
+ return addMappingFromTable<3>(MI, MRI, RegSrcOpIdx, makeArrayRef(Table));
+ }
+ case Intrinsic::amdgcn_s_sendmsg:
+ case Intrinsic::amdgcn_s_sendmsghalt: {
+ static const OpRegBankEntry<1> Table[2] = {
+ // Perfectly legal.
+ { { AMDGPU::SGPRRegBankID }, 1 },
+
+ // Need readlane
+ { { AMDGPU::VGPRRegBankID }, 3 }
+ };
+
+ const std::array<unsigned, 1> RegSrcOpIdx = { { 2 } };
+ return addMappingFromTable<1>(MI, MRI, RegSrcOpIdx, makeArrayRef(Table));
+ }
+ default:
+ return RegisterBankInfo::getInstrAlternativeMappings(MI);
+ }
+}
+
+static bool isInstrUniform(const MachineInstr &MI) {
+ if (!MI.hasOneMemOperand())
+ return false;
+
+ const MachineMemOperand *MMO = *MI.memoperands_begin();
+ return AMDGPUInstrInfo::isUniformMMO(MMO);
+}
+
RegisterBankInfo::InstructionMappings
AMDGPURegisterBankInfo::getInstrAlternativeMappings(
const MachineInstr &MI) const {
@@ -108,31 +337,102 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings(
InstructionMappings AltMappings;
switch (MI.getOpcode()) {
- case TargetOpcode::G_LOAD: {
+ case TargetOpcode::G_AND:
+ case TargetOpcode::G_OR:
+ case TargetOpcode::G_XOR: {
unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
- // FIXME: Should we be hard coding the size for these mappings?
- const InstructionMapping &SSMapping = getInstructionMapping(
+
+ if (Size == 1) {
+ // s_{and|or|xor}_b32 set scc when the result of the 32-bit op is not 0.
+ const InstructionMapping &SCCMapping = getInstructionMapping(
1, 1, getOperandsMapping(
- {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
- AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64)}),
- 2); // Num Operands
+ {AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, Size),
+ AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
+ AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}),
+ 3); // Num Operands
+ AltMappings.push_back(&SCCMapping);
+
+ const InstructionMapping &SGPRMapping = getInstructionMapping(
+ 1, 1, getOperandsMapping(
+ {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
+ AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
+ AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}),
+ 3); // Num Operands
+ AltMappings.push_back(&SGPRMapping);
+
+ const InstructionMapping &VCCMapping0 = getInstructionMapping(
+ 2, 10, getOperandsMapping(
+ {AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, Size),
+ AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, Size),
+ AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, Size)}),
+ 3); // Num Operands
+ AltMappings.push_back(&VCCMapping0);
+ return AltMappings;
+ }
+
+ if (Size != 64)
+ break;
+
+ const InstructionMapping &SSMapping = getInstructionMapping(
+ 1, 1, getOperandsMapping(
+ {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
+ AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
+ AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}),
+ 3); // Num Operands
AltMappings.push_back(&SSMapping);
const InstructionMapping &VVMapping = getInstructionMapping(
+ 2, 2, getOperandsMapping(
+ {AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
+ AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
+ AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size)}),
+ 3); // Num Operands
+ AltMappings.push_back(&VVMapping);
+
+ const InstructionMapping &SVMapping = getInstructionMapping(
+ 3, 3, getOperandsMapping(
+ {AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
+ AMDGPU::getValueMappingSGPR64Only(AMDGPU::SGPRRegBankID, Size),
+ AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size)}),
+ 3); // Num Operands
+ AltMappings.push_back(&SVMapping);
+
+ // SGPR in LHS is slightly preferrable, so make it VS more expensive than
+ // SV.
+ const InstructionMapping &VSMapping = getInstructionMapping(
+ 3, 4, getOperandsMapping(
+ {AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
+ AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
+ AMDGPU::getValueMappingSGPR64Only(AMDGPU::SGPRRegBankID, Size)}),
+ 3); // Num Operands
+ AltMappings.push_back(&VSMapping);
+ break;
+ }
+ case TargetOpcode::G_LOAD: {
+ unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
+ LLT LoadTy = MRI.getType(MI.getOperand(0).getReg());
+ // FIXME: Should we be hard coding the size for these mappings?
+ if (isInstrUniform(MI)) {
+ const InstructionMapping &SSMapping = getInstructionMapping(
+ 1, 1, getOperandsMapping(
+ {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
+ AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64)}),
+ 2); // Num Operands
+ AltMappings.push_back(&SSMapping);
+ }
+
+ const InstructionMapping &VVMapping = getInstructionMapping(
2, 1, getOperandsMapping(
- {AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
+ {AMDGPU::getValueMappingLoadSGPROnly(AMDGPU::VGPRRegBankID, LoadTy),
AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64)}),
2); // Num Operands
AltMappings.push_back(&VVMapping);
- // FIXME: Should this be the pointer-size (64-bits) or the size of the
- // register that will hold the bufffer resourc (128-bits).
- const InstructionMapping &VSMapping = getInstructionMapping(
- 3, 1, getOperandsMapping(
- {AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
- AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64)}),
- 2); // Num Operands
- AltMappings.push_back(&VSMapping);
+ // It may be possible to have a vgpr = load sgpr mapping here, because
+ // the mubuf instructions support this kind of load, but probably for only
+ // gfx7 and older. However, the addressing mode matching in the instruction
+ // selector should be able to do a better job of detecting and selecting
+ // these kinds of loads from the vgpr = load vgpr mapping.
return AltMappings;
@@ -184,15 +484,32 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings(
AltMappings.push_back(&SSMapping);
const InstructionMapping &VVMapping = getInstructionMapping(2, 1,
- getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
+ getOperandsMapping({AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
- AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
- AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size)}),
+ AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size),
+ AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size)}),
4); // Num Operands
AltMappings.push_back(&VVMapping);
return AltMappings;
}
+ case TargetOpcode::G_SMIN:
+ case TargetOpcode::G_SMAX:
+ case TargetOpcode::G_UMIN:
+ case TargetOpcode::G_UMAX: {
+ static const OpRegBankEntry<3> Table[4] = {
+ { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 1 },
+ { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 1 },
+ { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID }, 1 },
+
+ // Scalar requires cmp+select, and extends if 16-bit.
+ // FIXME: Should there be separate costs for 32 and 16-bit
+ { { AMDGPU::SGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::SGPRRegBankID }, 3 }
+ };
+
+ const std::array<unsigned, 3> RegSrcOpIdx = { { 0, 1, 2 } };
+ return addMappingFromTable<3>(MI, MRI, RegSrcOpIdx, makeArrayRef(Table));
+ }
case TargetOpcode::G_UADDE:
case TargetOpcode::G_USUBE:
case TargetOpcode::G_SADDE:
@@ -234,23 +551,816 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings(
AltMappings.push_back(&VMapping);
return AltMappings;
}
+ case AMDGPU::G_INTRINSIC:
+ return getInstrAlternativeMappingsIntrinsic(MI, MRI);
+ case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
+ return getInstrAlternativeMappingsIntrinsicWSideEffects(MI, MRI);
default:
break;
}
return RegisterBankInfo::getInstrAlternativeMappings(MI);
}
-void AMDGPURegisterBankInfo::applyMappingImpl(
- const OperandsMapper &OpdMapper) const {
- return applyDefaultMapping(OpdMapper);
+void AMDGPURegisterBankInfo::split64BitValueForMapping(
+ MachineIRBuilder &B,
+ SmallVector<Register, 2> &Regs,
+ LLT HalfTy,
+ Register Reg) const {
+ assert(HalfTy.getSizeInBits() == 32);
+ MachineRegisterInfo *MRI = B.getMRI();
+ Register LoLHS = MRI->createGenericVirtualRegister(HalfTy);
+ Register HiLHS = MRI->createGenericVirtualRegister(HalfTy);
+ const RegisterBank *Bank = getRegBank(Reg, *MRI, *TRI);
+ MRI->setRegBank(LoLHS, *Bank);
+ MRI->setRegBank(HiLHS, *Bank);
+
+ Regs.push_back(LoLHS);
+ Regs.push_back(HiLHS);
+
+ B.buildInstr(AMDGPU::G_UNMERGE_VALUES)
+ .addDef(LoLHS)
+ .addDef(HiLHS)
+ .addUse(Reg);
}
-static bool isInstrUniform(const MachineInstr &MI) {
- if (!MI.hasOneMemOperand())
+/// Replace the current type each register in \p Regs has with \p NewTy
+static void setRegsToType(MachineRegisterInfo &MRI, ArrayRef<Register> Regs,
+ LLT NewTy) {
+ for (Register Reg : Regs) {
+ assert(MRI.getType(Reg).getSizeInBits() == NewTy.getSizeInBits());
+ MRI.setType(Reg, NewTy);
+ }
+}
+
+static LLT getHalfSizedType(LLT Ty) {
+ if (Ty.isVector()) {
+ assert(Ty.getNumElements() % 2 == 0);
+ return LLT::scalarOrVector(Ty.getNumElements() / 2, Ty.getElementType());
+ }
+
+ assert(Ty.getSizeInBits() % 2 == 0);
+ return LLT::scalar(Ty.getSizeInBits() / 2);
+}
+
+/// Legalize instruction \p MI where operands in \p OpIndices must be SGPRs. If
+/// any of the required SGPR operands are VGPRs, perform a waterfall loop to
+/// execute the instruction for each unique combination of values in all lanes
+/// in the wave. The block will be split such that rest of the instructions are
+/// moved to a new block.
+///
+/// Essentially performs this loop:
+//
+/// Save Execution Mask
+/// For (Lane : Wavefront) {
+/// Enable Lane, Disable all other lanes
+/// SGPR = read SGPR value for current lane from VGPR
+/// VGPRResult[Lane] = use_op SGPR
+/// }
+/// Restore Execution Mask
+///
+/// There is additional complexity to try for compare values to identify the
+/// unique values used.
+void AMDGPURegisterBankInfo::executeInWaterfallLoop(
+ MachineInstr &MI, MachineRegisterInfo &MRI,
+ ArrayRef<unsigned> OpIndices) const {
+ MachineFunction *MF = MI.getParent()->getParent();
+ const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
+ const SIInstrInfo *TII = ST.getInstrInfo();
+ MachineBasicBlock::iterator I(MI);
+
+ MachineBasicBlock &MBB = *MI.getParent();
+ const DebugLoc &DL = MI.getDebugLoc();
+
+ // Use a set to avoid extra readfirstlanes in the case where multiple operands
+ // are the same register.
+ SmallSet<Register, 4> SGPROperandRegs;
+ for (unsigned Op : OpIndices) {
+ assert(MI.getOperand(Op).isUse());
+ Register Reg = MI.getOperand(Op).getReg();
+ const RegisterBank *OpBank = getRegBank(Reg, MRI, *TRI);
+ if (OpBank->getID() == AMDGPU::VGPRRegBankID)
+ SGPROperandRegs.insert(Reg);
+ }
+
+ // No operands need to be replaced, so no need to loop.
+ if (SGPROperandRegs.empty())
+ return;
+
+ MachineIRBuilder B(MI);
+ SmallVector<Register, 4> ResultRegs;
+ SmallVector<Register, 4> InitResultRegs;
+ SmallVector<Register, 4> PhiRegs;
+ for (MachineOperand &Def : MI.defs()) {
+ LLT ResTy = MRI.getType(Def.getReg());
+ const RegisterBank *DefBank = getRegBank(Def.getReg(), MRI, *TRI);
+ ResultRegs.push_back(Def.getReg());
+ Register InitReg = B.buildUndef(ResTy).getReg(0);
+ Register PhiReg = MRI.createGenericVirtualRegister(ResTy);
+ InitResultRegs.push_back(InitReg);
+ PhiRegs.push_back(PhiReg);
+ MRI.setRegBank(PhiReg, *DefBank);
+ MRI.setRegBank(InitReg, *DefBank);
+ }
+
+ Register SaveExecReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
+ Register InitSaveExecReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
+
+ // Don't bother using generic instructions/registers for the exec mask.
+ B.buildInstr(TargetOpcode::IMPLICIT_DEF)
+ .addDef(InitSaveExecReg);
+
+ Register PhiExec = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ Register NewExec = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+
+ // To insert the loop we need to split the block. Move everything before this
+ // point to a new block, and insert a new empty block before this instruction.
+ MachineBasicBlock *LoopBB = MF->CreateMachineBasicBlock();
+ MachineBasicBlock *RemainderBB = MF->CreateMachineBasicBlock();
+ MachineBasicBlock *RestoreExecBB = MF->CreateMachineBasicBlock();
+ MachineFunction::iterator MBBI(MBB);
+ ++MBBI;
+ MF->insert(MBBI, LoopBB);
+ MF->insert(MBBI, RestoreExecBB);
+ MF->insert(MBBI, RemainderBB);
+
+ LoopBB->addSuccessor(RestoreExecBB);
+ LoopBB->addSuccessor(LoopBB);
+
+ // Move the rest of the block into a new block.
+ RemainderBB->transferSuccessorsAndUpdatePHIs(&MBB);
+ RemainderBB->splice(RemainderBB->begin(), &MBB, I, MBB.end());
+
+ MBB.addSuccessor(LoopBB);
+ RestoreExecBB->addSuccessor(RemainderBB);
+
+ B.setInsertPt(*LoopBB, LoopBB->end());
+
+ B.buildInstr(TargetOpcode::PHI)
+ .addDef(PhiExec)
+ .addReg(InitSaveExecReg)
+ .addMBB(&MBB)
+ .addReg(NewExec)
+ .addMBB(LoopBB);
+
+ for (auto Result : zip(InitResultRegs, ResultRegs, PhiRegs)) {
+ B.buildInstr(TargetOpcode::G_PHI)
+ .addDef(std::get<2>(Result))
+ .addReg(std::get<0>(Result)) // Initial value / implicit_def
+ .addMBB(&MBB)
+ .addReg(std::get<1>(Result)) // Mid-loop value.
+ .addMBB(LoopBB);
+ }
+
+ // Move the instruction into the loop.
+ LoopBB->splice(LoopBB->end(), &MBB, I);
+ I = std::prev(LoopBB->end());
+
+ B.setInstr(*I);
+
+ Register CondReg;
+
+ for (MachineOperand &Op : MI.uses()) {
+ if (!Op.isReg())
+ continue;
+
+ assert(!Op.isDef());
+ if (SGPROperandRegs.count(Op.getReg())) {
+ LLT OpTy = MRI.getType(Op.getReg());
+ unsigned OpSize = OpTy.getSizeInBits();
+
+ // Can only do a readlane of 32-bit pieces.
+ if (OpSize == 32) {
+ // Avoid extra copies in the simple case of one 32-bit register.
+ Register CurrentLaneOpReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+ MRI.setType(CurrentLaneOpReg, OpTy);
+
+ constrainGenericRegister(Op.getReg(), AMDGPU::VGPR_32RegClass, MRI);
+ // Read the next variant <- also loop target.
+ BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), CurrentLaneOpReg)
+ .addReg(Op.getReg());
+
+ Register NewCondReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ bool First = CondReg == AMDGPU::NoRegister;
+ if (First)
+ CondReg = NewCondReg;
+
+ // Compare the just read M0 value to all possible Idx values.
+ B.buildInstr(AMDGPU::V_CMP_EQ_U32_e64)
+ .addDef(NewCondReg)
+ .addReg(CurrentLaneOpReg)
+ .addReg(Op.getReg());
+ Op.setReg(CurrentLaneOpReg);
+
+ if (!First) {
+ Register AndReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
+
+ // If there are multiple operands to consider, and the conditions.
+ B.buildInstr(AMDGPU::S_AND_B64)
+ .addDef(AndReg)
+ .addReg(NewCondReg)
+ .addReg(CondReg);
+ CondReg = AndReg;
+ }
+ } else {
+ LLT S32 = LLT::scalar(32);
+ SmallVector<Register, 8> ReadlanePieces;
+
+ // The compares can be done as 64-bit, but the extract needs to be done
+ // in 32-bit pieces.
+
+ bool Is64 = OpSize % 64 == 0;
+
+ LLT UnmergeTy = OpSize % 64 == 0 ? LLT::scalar(64) : LLT::scalar(32);
+ unsigned CmpOp = OpSize % 64 == 0 ? AMDGPU::V_CMP_EQ_U64_e64
+ : AMDGPU::V_CMP_EQ_U32_e64;
+
+ // The compares can be done as 64-bit, but the extract needs to be done
+ // in 32-bit pieces.
+
+ // Insert the unmerge before the loop.
+
+ B.setMBB(MBB);
+ auto Unmerge = B.buildUnmerge(UnmergeTy, Op.getReg());
+ B.setInstr(*I);
+
+ unsigned NumPieces = Unmerge->getNumOperands() - 1;
+ for (unsigned PieceIdx = 0; PieceIdx != NumPieces; ++PieceIdx) {
+ unsigned UnmergePiece = Unmerge.getReg(PieceIdx);
+
+ Register CurrentLaneOpReg;
+ if (Is64) {
+ Register CurrentLaneOpRegLo = MRI.createGenericVirtualRegister(S32);
+ Register CurrentLaneOpRegHi = MRI.createGenericVirtualRegister(S32);
+
+ MRI.setRegClass(UnmergePiece, &AMDGPU::VReg_64RegClass);
+ MRI.setRegClass(CurrentLaneOpRegLo, &AMDGPU::SReg_32_XM0RegClass);
+ MRI.setRegClass(CurrentLaneOpRegHi, &AMDGPU::SReg_32_XM0RegClass);
+
+ // Read the next variant <- also loop target.
+ BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
+ CurrentLaneOpRegLo)
+ .addReg(UnmergePiece, 0, AMDGPU::sub0);
+
+ // Read the next variant <- also loop target.
+ BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
+ CurrentLaneOpRegHi)
+ .addReg(UnmergePiece, 0, AMDGPU::sub1);
+
+ CurrentLaneOpReg =
+ B.buildMerge(LLT::scalar(64),
+ {CurrentLaneOpRegLo, CurrentLaneOpRegHi})
+ .getReg(0);
+
+ MRI.setRegClass(CurrentLaneOpReg, &AMDGPU::SReg_64_XEXECRegClass);
+
+ if (OpTy.getScalarSizeInBits() == 64) {
+ // If we need to produce a 64-bit element vector, so use the
+ // merged pieces
+ ReadlanePieces.push_back(CurrentLaneOpReg);
+ } else {
+ // 32-bit element type.
+ ReadlanePieces.push_back(CurrentLaneOpRegLo);
+ ReadlanePieces.push_back(CurrentLaneOpRegHi);
+ }
+ } else {
+ CurrentLaneOpReg = MRI.createGenericVirtualRegister(LLT::scalar(32));
+ MRI.setRegClass(UnmergePiece, &AMDGPU::VGPR_32RegClass);
+ MRI.setRegClass(CurrentLaneOpReg, &AMDGPU::SReg_32_XM0RegClass);
+
+ // Read the next variant <- also loop target.
+ BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
+ CurrentLaneOpReg)
+ .addReg(UnmergePiece);
+ ReadlanePieces.push_back(CurrentLaneOpReg);
+ }
+
+ Register NewCondReg
+ = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
+ bool First = CondReg == AMDGPU::NoRegister;
+ if (First)
+ CondReg = NewCondReg;
+
+ B.buildInstr(CmpOp)
+ .addDef(NewCondReg)
+ .addReg(CurrentLaneOpReg)
+ .addReg(UnmergePiece);
+
+ if (!First) {
+ Register AndReg
+ = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
+
+ // If there are multiple operands to consider, and the conditions.
+ B.buildInstr(AMDGPU::S_AND_B64)
+ .addDef(AndReg)
+ .addReg(NewCondReg)
+ .addReg(CondReg);
+ CondReg = AndReg;
+ }
+ }
+
+ // FIXME: Build merge seems to switch to CONCAT_VECTORS but not
+ // BUILD_VECTOR
+ if (OpTy.isVector()) {
+ auto Merge = B.buildBuildVector(OpTy, ReadlanePieces);
+ Op.setReg(Merge.getReg(0));
+ } else {
+ auto Merge = B.buildMerge(OpTy, ReadlanePieces);
+ Op.setReg(Merge.getReg(0));
+ }
+
+ MRI.setRegBank(Op.getReg(), getRegBank(AMDGPU::SGPRRegBankID));
+ }
+ }
+ }
+
+ B.setInsertPt(*LoopBB, LoopBB->end());
+
+ // Update EXEC, save the original EXEC value to VCC.
+ B.buildInstr(AMDGPU::S_AND_SAVEEXEC_B64)
+ .addDef(NewExec)
+ .addReg(CondReg, RegState::Kill);
+
+ MRI.setSimpleHint(NewExec, CondReg);
+
+ // Update EXEC, switch all done bits to 0 and all todo bits to 1.
+ B.buildInstr(AMDGPU::S_XOR_B64_term)
+ .addDef(AMDGPU::EXEC)
+ .addReg(AMDGPU::EXEC)
+ .addReg(NewExec);
+
+ // XXX - s_xor_b64 sets scc to 1 if the result is nonzero, so can we use
+ // s_cbranch_scc0?
+
+ // Loop back to V_READFIRSTLANE_B32 if there are still variants to cover.
+ B.buildInstr(AMDGPU::S_CBRANCH_EXECNZ)
+ .addMBB(LoopBB);
+
+ // Save the EXEC mask before the loop.
+ BuildMI(MBB, MBB.end(), DL, TII->get(AMDGPU::S_MOV_B64_term), SaveExecReg)
+ .addReg(AMDGPU::EXEC);
+
+ // Restore the EXEC mask after the loop.
+ B.setMBB(*RestoreExecBB);
+ B.buildInstr(AMDGPU::S_MOV_B64_term)
+ .addDef(AMDGPU::EXEC)
+ .addReg(SaveExecReg);
+}
+
+// Legalize an operand that must be an SGPR by inserting a readfirstlane.
+void AMDGPURegisterBankInfo::constrainOpWithReadfirstlane(
+ MachineInstr &MI, MachineRegisterInfo &MRI, unsigned OpIdx) const {
+ Register Reg = MI.getOperand(OpIdx).getReg();
+ const RegisterBank *Bank = getRegBank(Reg, MRI, *TRI);
+ if (Bank != &AMDGPU::VGPRRegBank)
+ return;
+
+ MachineIRBuilder B(MI);
+ Register SGPR = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+ B.buildInstr(AMDGPU::V_READFIRSTLANE_B32)
+ .addDef(SGPR)
+ .addReg(Reg);
+
+ const TargetRegisterClass *Constrained =
+ constrainGenericRegister(Reg, AMDGPU::VGPR_32RegClass, MRI);
+ (void)Constrained;
+ assert(Constrained && "Failed to constrain readfirstlane src reg");
+
+ MI.getOperand(OpIdx).setReg(SGPR);
+}
+
+// When regbankselect repairs registers, it will insert a repair instruction
+// which defines the repaired register. Then it calls applyMapping and expects
+// that the targets will either delete or rewrite the originally wrote to the
+// repaired registers. Beccause of this, we end up in a situation where
+// we have 2 instructions defining the same registers.
+static MachineInstr *getOtherVRegDef(const MachineRegisterInfo &MRI,
+ Register Reg,
+ const MachineInstr &MI) {
+ // Is there some way we can assert that there are exactly 2 def instructions?
+ for (MachineInstr &Other : MRI.def_instructions(Reg)) {
+ if (&Other != &MI)
+ return &Other;
+ }
+
+ return nullptr;
+}
+
+bool AMDGPURegisterBankInfo::applyMappingWideLoad(MachineInstr &MI,
+ const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper,
+ MachineRegisterInfo &MRI) const {
+ Register DstReg = MI.getOperand(0).getReg();
+ const LLT LoadTy = MRI.getType(DstReg);
+ unsigned LoadSize = LoadTy.getSizeInBits();
+ const unsigned MaxNonSmrdLoadSize = 128;
+ // 128-bit loads are supported for all instruction types.
+ if (LoadSize <= MaxNonSmrdLoadSize)
return false;
- const MachineMemOperand *MMO = *MI.memoperands_begin();
- return AMDGPUInstrInfo::isUniformMMO(MMO);
+ SmallVector<unsigned, 16> DefRegs(OpdMapper.getVRegs(0));
+ SmallVector<unsigned, 1> SrcRegs(OpdMapper.getVRegs(1));
+
+ // If the pointer is an SGPR, we have nothing to do.
+ if (SrcRegs.empty())
+ return false;
+
+ assert(LoadSize % MaxNonSmrdLoadSize == 0);
+
+ // We want to get the repair instruction now, because it will help us
+ // determine which instruction the legalizer inserts that will also
+ // write to DstReg.
+ MachineInstr *RepairInst = getOtherVRegDef(MRI, DstReg, MI);
+
+ // RegBankSelect only emits scalar types, so we need to reset the pointer
+ // operand to a pointer type.
+ Register BasePtrReg = SrcRegs[0];
+ LLT PtrTy = MRI.getType(MI.getOperand(1).getReg());
+ MRI.setType(BasePtrReg, PtrTy);
+
+ MachineIRBuilder B(MI);
+
+ unsigned SplitElts =
+ MaxNonSmrdLoadSize / LoadTy.getScalarType().getSizeInBits();
+ const LLT LoadSplitTy = LLT::vector(SplitElts, LoadTy.getScalarType());
+ ApplyRegBankMapping O(MRI, &AMDGPU::VGPRRegBank);
+ GISelObserverWrapper Observer(&O);
+ B.setChangeObserver(Observer);
+ LegalizerHelper Helper(B.getMF(), Observer, B);
+ if (Helper.fewerElementsVector(MI, 0, LoadSplitTy) != LegalizerHelper::Legalized)
+ return false;
+
+ // At this point, the legalizer has split the original load into smaller
+ // loads. At the end of lowering, it inserts an instruction (LegalizedInst)
+ // that combines the outputs of the lower loads and writes it to DstReg.
+ // The register bank selector has also added the RepairInst which writes to
+ // DstReg as well.
+
+ MachineInstr *LegalizedInst = getOtherVRegDef(MRI, DstReg, *RepairInst);
+
+ // Replace the output of the LegalizedInst with a temporary register, since
+ // RepairInst already defines DstReg.
+ Register TmpReg = MRI.createGenericVirtualRegister(MRI.getType(DstReg));
+ LegalizedInst->getOperand(0).setReg(TmpReg);
+ B.setInsertPt(*RepairInst->getParent(), RepairInst);
+
+ for (unsigned DefIdx = 0, e = DefRegs.size(); DefIdx != e; ++DefIdx) {
+ Register IdxReg = MRI.createGenericVirtualRegister(LLT::scalar(32));
+ B.buildConstant(IdxReg, DefIdx);
+ MRI.setRegBank(IdxReg, getRegBank(AMDGPU::VGPRRegBankID));
+ B.buildExtractVectorElement(DefRegs[DefIdx], TmpReg, IdxReg);
+ }
+
+ MRI.setRegBank(DstReg, getRegBank(AMDGPU::VGPRRegBankID));
+ return true;
+}
+
+// For cases where only a single copy is inserted for matching register banks.
+// Replace the register in the instruction operand
+static void substituteSimpleCopyRegs(
+ const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper, unsigned OpIdx) {
+ SmallVector<unsigned, 1> SrcReg(OpdMapper.getVRegs(OpIdx));
+ if (!SrcReg.empty()) {
+ assert(SrcReg.size() == 1);
+ OpdMapper.getMI().getOperand(OpIdx).setReg(SrcReg[0]);
+ }
+}
+
+void AMDGPURegisterBankInfo::applyMappingImpl(
+ const OperandsMapper &OpdMapper) const {
+ MachineInstr &MI = OpdMapper.getMI();
+ unsigned Opc = MI.getOpcode();
+ MachineRegisterInfo &MRI = OpdMapper.getMRI();
+ switch (Opc) {
+ case AMDGPU::G_SELECT: {
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ if (DstTy.getSizeInBits() != 64)
+ break;
+
+ LLT HalfTy = getHalfSizedType(DstTy);
+
+ SmallVector<Register, 2> DefRegs(OpdMapper.getVRegs(0));
+ SmallVector<Register, 1> Src0Regs(OpdMapper.getVRegs(1));
+ SmallVector<Register, 2> Src1Regs(OpdMapper.getVRegs(2));
+ SmallVector<Register, 2> Src2Regs(OpdMapper.getVRegs(3));
+
+ // All inputs are SGPRs, nothing special to do.
+ if (DefRegs.empty()) {
+ assert(Src1Regs.empty() && Src2Regs.empty());
+ break;
+ }
+
+ MachineIRBuilder B(MI);
+ if (Src0Regs.empty())
+ Src0Regs.push_back(MI.getOperand(1).getReg());
+ else {
+ assert(Src0Regs.size() == 1);
+ }
+
+ if (Src1Regs.empty())
+ split64BitValueForMapping(B, Src1Regs, HalfTy, MI.getOperand(2).getReg());
+ else {
+ setRegsToType(MRI, Src1Regs, HalfTy);
+ }
+
+ if (Src2Regs.empty())
+ split64BitValueForMapping(B, Src2Regs, HalfTy, MI.getOperand(3).getReg());
+ else
+ setRegsToType(MRI, Src2Regs, HalfTy);
+
+ setRegsToType(MRI, DefRegs, HalfTy);
+
+ B.buildSelect(DefRegs[0], Src0Regs[0], Src1Regs[0], Src2Regs[0]);
+ B.buildSelect(DefRegs[1], Src0Regs[0], Src1Regs[1], Src2Regs[1]);
+
+ MRI.setRegBank(DstReg, getRegBank(AMDGPU::VGPRRegBankID));
+ MI.eraseFromParent();
+ return;
+ }
+ case AMDGPU::G_AND:
+ case AMDGPU::G_OR:
+ case AMDGPU::G_XOR: {
+ // 64-bit and is only available on the SALU, so split into 2 32-bit ops if
+ // there is a VGPR input.
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ if (DstTy.getSizeInBits() != 64)
+ break;
+
+ LLT HalfTy = getHalfSizedType(DstTy);
+ SmallVector<Register, 2> DefRegs(OpdMapper.getVRegs(0));
+ SmallVector<Register, 2> Src0Regs(OpdMapper.getVRegs(1));
+ SmallVector<Register, 2> Src1Regs(OpdMapper.getVRegs(2));
+
+ // All inputs are SGPRs, nothing special to do.
+ if (DefRegs.empty()) {
+ assert(Src0Regs.empty() && Src1Regs.empty());
+ break;
+ }
+
+ assert(DefRegs.size() == 2);
+ assert(Src0Regs.size() == Src1Regs.size() &&
+ (Src0Regs.empty() || Src0Regs.size() == 2));
+
+ // Depending on where the source registers came from, the generic code may
+ // have decided to split the inputs already or not. If not, we still need to
+ // extract the values.
+ MachineIRBuilder B(MI);
+
+ if (Src0Regs.empty())
+ split64BitValueForMapping(B, Src0Regs, HalfTy, MI.getOperand(1).getReg());
+ else
+ setRegsToType(MRI, Src0Regs, HalfTy);
+
+ if (Src1Regs.empty())
+ split64BitValueForMapping(B, Src1Regs, HalfTy, MI.getOperand(2).getReg());
+ else
+ setRegsToType(MRI, Src1Regs, HalfTy);
+
+ setRegsToType(MRI, DefRegs, HalfTy);
+
+ B.buildInstr(Opc)
+ .addDef(DefRegs[0])
+ .addUse(Src0Regs[0])
+ .addUse(Src1Regs[0]);
+
+ B.buildInstr(Opc)
+ .addDef(DefRegs[1])
+ .addUse(Src0Regs[1])
+ .addUse(Src1Regs[1]);
+
+ MRI.setRegBank(DstReg, getRegBank(AMDGPU::VGPRRegBankID));
+ MI.eraseFromParent();
+ return;
+ }
+ case AMDGPU::G_ADD:
+ case AMDGPU::G_SUB:
+ case AMDGPU::G_MUL: {
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ if (DstTy != LLT::scalar(16))
+ break;
+
+ const RegisterBank *DstBank = getRegBank(DstReg, MRI, *TRI);
+ if (DstBank == &AMDGPU::VGPRRegBank)
+ break;
+
+ // 16-bit operations are VALU only, but can be promoted to 32-bit SALU.
+ MachineFunction *MF = MI.getParent()->getParent();
+ MachineIRBuilder B(MI);
+ ApplyRegBankMapping ApplySALU(MRI, &AMDGPU::SGPRRegBank);
+ GISelObserverWrapper Observer(&ApplySALU);
+ LegalizerHelper Helper(*MF, Observer, B);
+
+ if (Helper.widenScalar(MI, 0, LLT::scalar(32)) !=
+ LegalizerHelper::Legalized)
+ llvm_unreachable("widen scalar should have succeeded");
+ return;
+ }
+ case AMDGPU::G_SMIN:
+ case AMDGPU::G_SMAX:
+ case AMDGPU::G_UMIN:
+ case AMDGPU::G_UMAX: {
+ Register DstReg = MI.getOperand(0).getReg();
+ const RegisterBank *DstBank = getRegBank(DstReg, MRI, *TRI);
+ if (DstBank == &AMDGPU::VGPRRegBank)
+ break;
+
+ MachineFunction *MF = MI.getParent()->getParent();
+ MachineIRBuilder B(MI);
+ ApplyRegBankMapping ApplySALU(MRI, &AMDGPU::SGPRRegBank);
+ GISelObserverWrapper Observer(&ApplySALU);
+ LegalizerHelper Helper(*MF, Observer, B);
+
+ // Turn scalar min/max into a compare and select.
+ LLT Ty = MRI.getType(DstReg);
+ LLT S32 = LLT::scalar(32);
+ LLT S16 = LLT::scalar(16);
+
+ if (Ty == S16) {
+ // Need to widen to s32, and expand as cmp + select.
+ if (Helper.widenScalar(MI, 0, S32) != LegalizerHelper::Legalized)
+ llvm_unreachable("widenScalar should have succeeded");
+
+ // FIXME: This is relying on widenScalar leaving MI in place.
+ if (Helper.lower(MI, 0, S32) != LegalizerHelper::Legalized)
+ llvm_unreachable("lower should have succeeded");
+ } else {
+ if (Helper.lower(MI, 0, Ty) != LegalizerHelper::Legalized)
+ llvm_unreachable("lower should have succeeded");
+ }
+
+ return;
+ }
+ case AMDGPU::G_SEXT:
+ case AMDGPU::G_ZEXT: {
+ Register SrcReg = MI.getOperand(1).getReg();
+ LLT SrcTy = MRI.getType(SrcReg);
+ bool Signed = Opc == AMDGPU::G_SEXT;
+
+ MachineIRBuilder B(MI);
+ const RegisterBank *SrcBank = getRegBank(SrcReg, MRI, *TRI);
+
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ if (DstTy.isScalar() &&
+ SrcBank != &AMDGPU::SGPRRegBank &&
+ SrcBank != &AMDGPU::SCCRegBank &&
+ SrcBank != &AMDGPU::VCCRegBank &&
+ // FIXME: Should handle any type that round to s64 when irregular
+ // breakdowns supported.
+ DstTy.getSizeInBits() == 64 &&
+ SrcTy.getSizeInBits() <= 32) {
+ const LLT S32 = LLT::scalar(32);
+ SmallVector<Register, 2> DefRegs(OpdMapper.getVRegs(0));
+
+ // Extend to 32-bit, and then extend the low half.
+ if (Signed) {
+ // TODO: Should really be buildSExtOrCopy
+ B.buildSExtOrTrunc(DefRegs[0], SrcReg);
+
+ // Replicate sign bit from 32-bit extended part.
+ auto ShiftAmt = B.buildConstant(S32, 31);
+ MRI.setRegBank(ShiftAmt.getReg(0), *SrcBank);
+ B.buildAShr(DefRegs[1], DefRegs[0], ShiftAmt);
+ } else {
+ B.buildZExtOrTrunc(DefRegs[0], SrcReg);
+ B.buildConstant(DefRegs[1], 0);
+ }
+
+ MRI.setRegBank(DstReg, *SrcBank);
+ MI.eraseFromParent();
+ return;
+ }
+
+ if (SrcTy != LLT::scalar(1))
+ return;
+
+ if (SrcBank == &AMDGPU::SCCRegBank || SrcBank == &AMDGPU::VCCRegBank) {
+ SmallVector<Register, 2> DefRegs(OpdMapper.getVRegs(0));
+
+ const RegisterBank *DstBank = SrcBank == &AMDGPU::SCCRegBank ?
+ &AMDGPU::SGPRRegBank : &AMDGPU::VGPRRegBank;
+
+ unsigned DstSize = DstTy.getSizeInBits();
+ // 64-bit select is SGPR only
+ const bool UseSel64 = DstSize > 32 &&
+ SrcBank->getID() == AMDGPU::SCCRegBankID;
+
+ // TODO: Should s16 select be legal?
+ LLT SelType = UseSel64 ? LLT::scalar(64) : LLT::scalar(32);
+ auto True = B.buildConstant(SelType, Signed ? -1 : 1);
+ auto False = B.buildConstant(SelType, 0);
+
+ MRI.setRegBank(True.getReg(0), *DstBank);
+ MRI.setRegBank(False.getReg(0), *DstBank);
+ MRI.setRegBank(DstReg, *DstBank);
+
+ if (DstSize > 32 && SrcBank->getID() != AMDGPU::SCCRegBankID) {
+ B.buildSelect(DefRegs[0], SrcReg, True, False);
+ B.buildCopy(DefRegs[1], DefRegs[0]);
+ } else if (DstSize < 32) {
+ auto Sel = B.buildSelect(SelType, SrcReg, True, False);
+ MRI.setRegBank(Sel.getReg(0), *DstBank);
+ B.buildTrunc(DstReg, Sel);
+ } else {
+ B.buildSelect(DstReg, SrcReg, True, False);
+ }
+
+ MI.eraseFromParent();
+ return;
+ }
+
+ // Fixup the case with an s1 src that isn't a condition register. Use shifts
+ // instead of introducing a compare to avoid an unnecessary condition
+ // register (and since there's no scalar 16-bit compares).
+ auto Ext = B.buildAnyExt(DstTy, SrcReg);
+ auto ShiftAmt = B.buildConstant(LLT::scalar(32), DstTy.getSizeInBits() - 1);
+ auto Shl = B.buildShl(DstTy, Ext, ShiftAmt);
+
+ if (MI.getOpcode() == AMDGPU::G_SEXT)
+ B.buildAShr(DstReg, Shl, ShiftAmt);
+ else
+ B.buildLShr(DstReg, Shl, ShiftAmt);
+
+ MRI.setRegBank(DstReg, *SrcBank);
+ MRI.setRegBank(Ext.getReg(0), *SrcBank);
+ MRI.setRegBank(ShiftAmt.getReg(0), *SrcBank);
+ MRI.setRegBank(Shl.getReg(0), *SrcBank);
+ MI.eraseFromParent();
+ return;
+ }
+ case AMDGPU::G_EXTRACT_VECTOR_ELT:
+ applyDefaultMapping(OpdMapper);
+ executeInWaterfallLoop(MI, MRI, { 2 });
+ return;
+ case AMDGPU::G_INTRINSIC: {
+ switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) {
+ case Intrinsic::amdgcn_s_buffer_load: {
+ // FIXME: Move to G_INTRINSIC_W_SIDE_EFFECTS
+ executeInWaterfallLoop(MI, MRI, { 2, 3 });
+ return;
+ }
+ case Intrinsic::amdgcn_readlane: {
+ substituteSimpleCopyRegs(OpdMapper, 2);
+
+ assert(empty(OpdMapper.getVRegs(0)));
+ assert(empty(OpdMapper.getVRegs(3)));
+
+ // Make sure the index is an SGPR. It doesn't make sense to run this in a
+ // waterfall loop, so assume it's a uniform value.
+ constrainOpWithReadfirstlane(MI, MRI, 3); // Index
+ return;
+ }
+ case Intrinsic::amdgcn_writelane: {
+ assert(empty(OpdMapper.getVRegs(0)));
+ assert(empty(OpdMapper.getVRegs(2)));
+ assert(empty(OpdMapper.getVRegs(3)));
+
+ substituteSimpleCopyRegs(OpdMapper, 4); // VGPR input val
+ constrainOpWithReadfirstlane(MI, MRI, 2); // Source value
+ constrainOpWithReadfirstlane(MI, MRI, 3); // Index
+ return;
+ }
+ default:
+ break;
+ }
+ break;
+ }
+ case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: {
+ switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) {
+ case Intrinsic::amdgcn_buffer_load: {
+ executeInWaterfallLoop(MI, MRI, { 2 });
+ return;
+ }
+ case Intrinsic::amdgcn_ds_ordered_add:
+ case Intrinsic::amdgcn_ds_ordered_swap: {
+ // This is only allowed to execute with 1 lane, so readfirstlane is safe.
+ assert(empty(OpdMapper.getVRegs(0)));
+ substituteSimpleCopyRegs(OpdMapper, 3);
+ constrainOpWithReadfirstlane(MI, MRI, 2); // M0
+ return;
+ }
+ case Intrinsic::amdgcn_s_sendmsg:
+ case Intrinsic::amdgcn_s_sendmsghalt: {
+ // FIXME: Should this use a waterfall loop?
+ constrainOpWithReadfirstlane(MI, MRI, 2); // M0
+ return;
+ }
+ default:
+ break;
+ }
+ break;
+ }
+ case AMDGPU::G_LOAD: {
+ if (applyMappingWideLoad(MI, OpdMapper, MRI))
+ return;
+ break;
+ }
+ default:
+ break;
+ }
+
+ return applyDefaultMapping(OpdMapper);
}
bool AMDGPURegisterBankInfo::isSALUMapping(const MachineInstr &MI) const {
@@ -259,7 +1369,7 @@ bool AMDGPURegisterBankInfo::isSALUMapping(const MachineInstr &MI) const {
for (unsigned i = 0, e = MI.getNumOperands();i != e; ++i) {
if (!MI.getOperand(i).isReg())
continue;
- unsigned Reg = MI.getOperand(i).getReg();
+ Register Reg = MI.getOperand(i).getReg();
if (const RegisterBank *Bank = getRegBank(Reg, MRI, *TRI)) {
if (Bank->getID() == AMDGPU::VGPRRegBankID)
return false;
@@ -299,7 +1409,7 @@ AMDGPURegisterBankInfo::getDefaultMappingVOP(const MachineInstr &MI) const {
if (MI.getOperand(OpdIdx).isIntrinsicID())
OpdsMapping[OpdIdx++] = nullptr;
- unsigned Reg1 = MI.getOperand(OpdIdx).getReg();
+ Register Reg1 = MI.getOperand(OpdIdx).getReg();
unsigned Size1 = getSizeInBits(Reg1, MRI, *TRI);
unsigned DefaultBankID = Size1 == 1 ?
@@ -309,7 +1419,11 @@ AMDGPURegisterBankInfo::getDefaultMappingVOP(const MachineInstr &MI) const {
OpdsMapping[OpdIdx++] = AMDGPU::getValueMapping(Bank1, Size1);
for (unsigned e = MI.getNumOperands(); OpdIdx != e; ++OpdIdx) {
- unsigned Size = getSizeInBits(MI.getOperand(OpdIdx).getReg(), MRI, *TRI);
+ const MachineOperand &MO = MI.getOperand(OpdIdx);
+ if (!MO.isReg())
+ continue;
+
+ unsigned Size = getSizeInBits(MO.getReg(), MRI, *TRI);
unsigned BankID = Size == 1 ? AMDGPU::VCCRegBankID : AMDGPU::VGPRRegBankID;
OpdsMapping[OpdIdx] = AMDGPU::getValueMapping(BankID, Size);
}
@@ -325,7 +1439,11 @@ AMDGPURegisterBankInfo::getDefaultMappingAllVGPR(const MachineInstr &MI) const {
SmallVector<const ValueMapping*, 8> OpdsMapping(MI.getNumOperands());
for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
- unsigned Size = getSizeInBits(MI.getOperand(I).getReg(), MRI, *TRI);
+ const MachineOperand &Op = MI.getOperand(I);
+ if (!Op.isReg())
+ continue;
+
+ unsigned Size = getSizeInBits(Op.getReg(), MRI, *TRI);
OpdsMapping[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
}
@@ -340,6 +1458,7 @@ AMDGPURegisterBankInfo::getInstrMappingForLoad(const MachineInstr &MI) const {
const MachineRegisterInfo &MRI = MF.getRegInfo();
SmallVector<const ValueMapping*, 8> OpdsMapping(MI.getNumOperands());
unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
+ LLT LoadTy = MRI.getType(MI.getOperand(0).getReg());
unsigned PtrSize = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI);
const ValueMapping *ValMapping;
@@ -350,7 +1469,7 @@ AMDGPURegisterBankInfo::getInstrMappingForLoad(const MachineInstr &MI) const {
ValMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
PtrMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, PtrSize);
} else {
- ValMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
+ ValMapping = AMDGPU::getValueMappingLoadSGPROnly(AMDGPU::VGPRRegBankID, LoadTy);
// FIXME: What would happen if we used SGPRRegBankID here?
PtrMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, PtrSize);
}
@@ -366,7 +1485,7 @@ AMDGPURegisterBankInfo::getInstrMappingForLoad(const MachineInstr &MI) const {
}
unsigned
-AMDGPURegisterBankInfo::getRegBankID(unsigned Reg,
+AMDGPURegisterBankInfo::getRegBankID(Register Reg,
const MachineRegisterInfo &MRI,
const TargetRegisterInfo &TRI,
unsigned Default) const {
@@ -383,13 +1502,81 @@ AMDGPURegisterBankInfo::getRegBankID(unsigned Reg,
///
const RegisterBankInfo::InstructionMapping &
AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
- const RegisterBankInfo::InstructionMapping &Mapping = getInstrMappingImpl(MI);
+ const MachineFunction &MF = *MI.getParent()->getParent();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ if (MI.isRegSequence()) {
+ // If any input is a VGPR, the result must be a VGPR. The default handling
+ // assumes any copy between banks is legal.
+ unsigned BankID = AMDGPU::SGPRRegBankID;
+
+ for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
+ auto OpBank = getRegBankID(MI.getOperand(I).getReg(), MRI, *TRI);
+ // It doesn't make sense to use vcc or scc banks here, so just ignore
+ // them.
+ if (OpBank != AMDGPU::SGPRRegBankID) {
+ BankID = AMDGPU::VGPRRegBankID;
+ break;
+ }
+ }
+ unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
+
+ const ValueMapping &ValMap = getValueMapping(0, Size, getRegBank(BankID));
+ return getInstructionMapping(
+ 1, /*Cost*/ 1,
+ /*OperandsMapping*/ getOperandsMapping({&ValMap}), 1);
+ }
+
+ // The default handling is broken and doesn't handle illegal SGPR->VGPR copies
+ // properly.
+ //
+ // TODO: There are additional exec masking dependencies to analyze.
+ if (MI.getOpcode() == TargetOpcode::G_PHI) {
+ // TODO: Generate proper invalid bank enum.
+ int ResultBank = -1;
+
+ for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
+ unsigned Reg = MI.getOperand(I).getReg();
+ const RegisterBank *Bank = getRegBank(Reg, MRI, *TRI);
+
+ // FIXME: Assuming VGPR for any undetermined inputs.
+ if (!Bank || Bank->getID() == AMDGPU::VGPRRegBankID) {
+ ResultBank = AMDGPU::VGPRRegBankID;
+ break;
+ }
+
+ unsigned OpBank = Bank->getID();
+ // scc, scc -> sgpr
+ if (OpBank == AMDGPU::SCCRegBankID) {
+ // There's only one SCC register, so a phi requires copying to SGPR.
+ OpBank = AMDGPU::SGPRRegBankID;
+ } else if (OpBank == AMDGPU::VCCRegBankID) {
+ // vcc, vcc -> vcc
+ // vcc, sgpr -> vgpr
+ if (ResultBank != -1 && ResultBank != AMDGPU::VCCRegBankID) {
+ ResultBank = AMDGPU::VGPRRegBankID;
+ break;
+ }
+ }
+
+ ResultBank = OpBank;
+ }
+
+ assert(ResultBank != -1);
+
+ unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+
+ const ValueMapping &ValMap =
+ getValueMapping(0, Size, getRegBank(ResultBank));
+ return getInstructionMapping(
+ 1, /*Cost*/ 1,
+ /*OperandsMapping*/ getOperandsMapping({&ValMap}), 1);
+ }
+
+ const RegisterBankInfo::InstructionMapping &Mapping = getInstrMappingImpl(MI);
if (Mapping.isValid())
return Mapping;
- const MachineFunction &MF = *MI.getParent()->getParent();
- const MachineRegisterInfo &MRI = MF.getRegInfo();
SmallVector<const ValueMapping*, 8> OpdsMapping(MI.getNumOperands());
switch (MI.getOpcode()) {
@@ -401,18 +1588,86 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case AMDGPU::G_XOR: {
unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
if (Size == 1) {
- OpdsMapping[0] = OpdsMapping[1] =
- OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
+ const RegisterBank *DstBank
+ = getRegBank(MI.getOperand(0).getReg(), MRI, *TRI);
+
+ unsigned TargetBankID = -1;
+ unsigned BankLHS = -1;
+ unsigned BankRHS = -1;
+ if (DstBank) {
+ TargetBankID = DstBank->getID();
+ if (DstBank == &AMDGPU::VCCRegBank) {
+ TargetBankID = AMDGPU::VCCRegBankID;
+ BankLHS = AMDGPU::VCCRegBankID;
+ BankRHS = AMDGPU::VCCRegBankID;
+ } else if (DstBank == &AMDGPU::SCCRegBank) {
+ TargetBankID = AMDGPU::SCCRegBankID;
+ BankLHS = AMDGPU::SGPRRegBankID;
+ BankRHS = AMDGPU::SGPRRegBankID;
+ } else {
+ BankLHS = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI,
+ AMDGPU::SGPRRegBankID);
+ BankRHS = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI,
+ AMDGPU::SGPRRegBankID);
+ }
+ } else {
+ BankLHS = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI,
+ AMDGPU::VCCRegBankID);
+ BankRHS = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI,
+ AMDGPU::VCCRegBankID);
+
+ // Both inputs should be true booleans to produce a boolean result.
+ if (BankLHS == AMDGPU::VGPRRegBankID || BankRHS == AMDGPU::VGPRRegBankID) {
+ TargetBankID = AMDGPU::VGPRRegBankID;
+ } else if (BankLHS == AMDGPU::VCCRegBankID || BankRHS == AMDGPU::VCCRegBankID) {
+ TargetBankID = AMDGPU::VCCRegBankID;
+ BankLHS = AMDGPU::VCCRegBankID;
+ BankRHS = AMDGPU::VCCRegBankID;
+ } else if (BankLHS == AMDGPU::SGPRRegBankID && BankRHS == AMDGPU::SGPRRegBankID) {
+ TargetBankID = AMDGPU::SGPRRegBankID;
+ } else if (BankLHS == AMDGPU::SCCRegBankID || BankRHS == AMDGPU::SCCRegBankID) {
+ // The operation must be done on a 32-bit register, but it will set
+ // scc. The result type could interchangably be SCC or SGPR, since
+ // both values will be produced.
+ TargetBankID = AMDGPU::SCCRegBankID;
+ BankLHS = AMDGPU::SGPRRegBankID;
+ BankRHS = AMDGPU::SGPRRegBankID;
+ }
+ }
+
+ OpdsMapping[0] = AMDGPU::getValueMapping(TargetBankID, Size);
+ OpdsMapping[1] = AMDGPU::getValueMapping(BankLHS, Size);
+ OpdsMapping[2] = AMDGPU::getValueMapping(BankRHS, Size);
+ break;
+ }
+
+ if (Size == 64) {
+
+ if (isSALUMapping(MI)) {
+ OpdsMapping[0] = getValueMappingSGPR64Only(AMDGPU::SGPRRegBankID, Size);
+ OpdsMapping[1] = OpdsMapping[2] = OpdsMapping[0];
+ } else {
+ OpdsMapping[0] = getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID, Size);
+ unsigned Bank1 = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI/*, DefaultBankID*/);
+ OpdsMapping[1] = AMDGPU::getValueMapping(Bank1, Size);
+
+ unsigned Bank2 = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI/*, DefaultBankID*/);
+ OpdsMapping[2] = AMDGPU::getValueMapping(Bank2, Size);
+ }
+
break;
}
LLVM_FALLTHROUGH;
}
+ case AMDGPU::G_GEP:
case AMDGPU::G_ADD:
case AMDGPU::G_SUB:
case AMDGPU::G_MUL:
case AMDGPU::G_SHL:
+ case AMDGPU::G_LSHR:
+ case AMDGPU::G_ASHR:
case AMDGPU::G_UADDO:
case AMDGPU::G_SADDO:
case AMDGPU::G_USUBO:
@@ -421,6 +1676,12 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case AMDGPU::G_SADDE:
case AMDGPU::G_USUBE:
case AMDGPU::G_SSUBE:
+ case AMDGPU::G_UMULH:
+ case AMDGPU::G_SMULH:
+ case AMDGPU::G_SMIN:
+ case AMDGPU::G_SMAX:
+ case AMDGPU::G_UMIN:
+ case AMDGPU::G_UMAX:
if (isSALUMapping(MI))
return getDefaultMappingSOP(MI);
LLVM_FALLTHROUGH;
@@ -431,11 +1692,14 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case AMDGPU::G_FPTOUI:
case AMDGPU::G_FMUL:
case AMDGPU::G_FMA:
+ case AMDGPU::G_FSQRT:
case AMDGPU::G_SITOFP:
case AMDGPU::G_UITOFP:
case AMDGPU::G_FPTRUNC:
+ case AMDGPU::G_FPEXT:
case AMDGPU::G_FEXP2:
case AMDGPU::G_FLOG2:
+ case AMDGPU::G_FCANONICALIZE:
case AMDGPU::G_INTRINSIC_TRUNC:
case AMDGPU::G_INTRINSIC_ROUND:
return getDefaultMappingVOP(MI);
@@ -473,7 +1737,9 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[2] = nullptr;
break;
}
- case AMDGPU::G_MERGE_VALUES: {
+ case AMDGPU::G_MERGE_VALUES:
+ case AMDGPU::G_BUILD_VECTOR:
+ case AMDGPU::G_CONCAT_VECTORS: {
unsigned Bank = isSALUMapping(MI) ?
AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
@@ -502,8 +1768,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
break;
}
case AMDGPU::G_TRUNC: {
- unsigned Dst = MI.getOperand(0).getReg();
- unsigned Src = MI.getOperand(1).getReg();
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src = MI.getOperand(1).getReg();
unsigned Bank = getRegBankID(Src, MRI, *TRI);
unsigned DstSize = getSizeInBits(Dst, MRI, *TRI);
unsigned SrcSize = getSizeInBits(Src, MRI, *TRI);
@@ -514,23 +1780,35 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case AMDGPU::G_ZEXT:
case AMDGPU::G_SEXT:
case AMDGPU::G_ANYEXT: {
- unsigned Dst = MI.getOperand(0).getReg();
- unsigned Src = MI.getOperand(1).getReg();
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src = MI.getOperand(1).getReg();
unsigned DstSize = getSizeInBits(Dst, MRI, *TRI);
unsigned SrcSize = getSizeInBits(Src, MRI, *TRI);
- unsigned SrcBank = getRegBankID(Src, MRI, *TRI,
- SrcSize == 1 ? AMDGPU::SGPRRegBankID :
- AMDGPU::VGPRRegBankID);
- unsigned DstBank = SrcBank;
- if (SrcSize == 1) {
- if (SrcBank == AMDGPU::SGPRRegBankID)
- DstBank = AMDGPU::VGPRRegBankID;
- else
- DstBank = AMDGPU::SGPRRegBankID;
- }
-
- OpdsMapping[0] = AMDGPU::getValueMapping(DstBank, DstSize);
- OpdsMapping[1] = AMDGPU::getValueMapping(SrcBank, SrcSize);
+
+ unsigned DstBank;
+ const RegisterBank *SrcBank = getRegBank(Src, MRI, *TRI);
+ assert(SrcBank);
+ switch (SrcBank->getID()) {
+ case AMDGPU::SCCRegBankID:
+ case AMDGPU::SGPRRegBankID:
+ DstBank = AMDGPU::SGPRRegBankID;
+ break;
+ default:
+ DstBank = AMDGPU::VGPRRegBankID;
+ break;
+ }
+
+ // TODO: Should anyext be split into 32-bit part as well?
+ if (MI.getOpcode() == AMDGPU::G_ANYEXT) {
+ OpdsMapping[0] = AMDGPU::getValueMapping(DstBank, DstSize);
+ OpdsMapping[1] = AMDGPU::getValueMapping(SrcBank->getID(), SrcSize);
+ } else {
+ // Scalar extend can use 64-bit BFE, but VGPRs require extending to
+ // 32-bits, and then to 64.
+ OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(DstBank, DstSize);
+ OpdsMapping[1] = AMDGPU::getValueMappingSGPR64Only(SrcBank->getID(),
+ SrcSize);
+ }
break;
}
case AMDGPU::G_FCMP: {
@@ -542,16 +1820,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
break;
}
- case AMDGPU::G_GEP: {
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- if (!MI.getOperand(i).isReg())
- continue;
-
- unsigned Size = MRI.getType(MI.getOperand(i).getReg()).getSizeInBits();
- OpdsMapping[i] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
- }
- break;
- }
case AMDGPU::G_STORE: {
assert(MI.getOperand(0).isReg());
unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
@@ -571,57 +1839,55 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
}
case AMDGPU::G_ICMP: {
+ auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
unsigned Size = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
unsigned Op2Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
unsigned Op3Bank = getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI);
- unsigned Op0Bank = Op2Bank == AMDGPU::SGPRRegBankID &&
- Op3Bank == AMDGPU::SGPRRegBankID ?
- AMDGPU::SCCRegBankID : AMDGPU::VCCRegBankID;
+
+ bool CanUseSCC = Op2Bank == AMDGPU::SGPRRegBankID &&
+ Op3Bank == AMDGPU::SGPRRegBankID &&
+ (Size == 32 || (Size == 64 &&
+ (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) &&
+ MF.getSubtarget<GCNSubtarget>().hasScalarCompareEq64()));
+
+ unsigned Op0Bank = CanUseSCC ? AMDGPU::SCCRegBankID : AMDGPU::VCCRegBankID;
+
OpdsMapping[0] = AMDGPU::getValueMapping(Op0Bank, 1);
OpdsMapping[1] = nullptr; // Predicate Operand.
OpdsMapping[2] = AMDGPU::getValueMapping(Op2Bank, Size);
OpdsMapping[3] = AMDGPU::getValueMapping(Op3Bank, Size);
break;
}
-
-
case AMDGPU::G_EXTRACT_VECTOR_ELT: {
- unsigned IdxOp = 2;
- int64_t Imm;
- // XXX - Do we really need to fully handle these? The constant case should
- // be legalized away before RegBankSelect?
-
- unsigned OutputBankID = isSALUMapping(MI) && isConstant(MI.getOperand(IdxOp), Imm) ?
+ unsigned OutputBankID = isSALUMapping(MI) ?
AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
-
+ unsigned SrcSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
+ unsigned IdxSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
unsigned IdxBank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
- OpdsMapping[0] = AMDGPU::getValueMapping(OutputBankID, MRI.getType(MI.getOperand(0).getReg()).getSizeInBits());
- OpdsMapping[1] = AMDGPU::getValueMapping(OutputBankID, MRI.getType(MI.getOperand(1).getReg()).getSizeInBits());
+
+ OpdsMapping[0] = AMDGPU::getValueMapping(OutputBankID, SrcSize);
+ OpdsMapping[1] = AMDGPU::getValueMapping(OutputBankID, SrcSize);
// The index can be either if the source vector is VGPR.
- OpdsMapping[2] = AMDGPU::getValueMapping(IdxBank, MRI.getType(MI.getOperand(2).getReg()).getSizeInBits());
+ OpdsMapping[2] = AMDGPU::getValueMapping(IdxBank, IdxSize);
break;
}
case AMDGPU::G_INSERT_VECTOR_ELT: {
- // XXX - Do we really need to fully handle these? The constant case should
- // be legalized away before RegBankSelect?
-
- int64_t Imm;
-
- unsigned IdxOp = MI.getOpcode() == AMDGPU::G_EXTRACT_VECTOR_ELT ? 2 : 3;
- unsigned BankID = isSALUMapping(MI) && isConstant(MI.getOperand(IdxOp), Imm) ?
- AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
-
-
+ unsigned OutputBankID = isSALUMapping(MI) ?
+ AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
- // TODO: Can do SGPR indexing, which would obviate the need for the
- // isConstant check.
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- unsigned Size = getSizeInBits(MI.getOperand(i).getReg(), MRI, *TRI);
- OpdsMapping[i] = AMDGPU::getValueMapping(BankID, Size);
- }
+ unsigned VecSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+ unsigned InsertSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
+ unsigned IdxSize = MRI.getType(MI.getOperand(3).getReg()).getSizeInBits();
+ unsigned InsertEltBank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
+ unsigned IdxBank = getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI);
+ OpdsMapping[0] = AMDGPU::getValueMapping(OutputBankID, VecSize);
+ OpdsMapping[1] = AMDGPU::getValueMapping(OutputBankID, VecSize);
+ OpdsMapping[2] = AMDGPU::getValueMapping(InsertEltBank, InsertSize);
+ // The index can be either if the source vector is VGPR.
+ OpdsMapping[3] = AMDGPU::getValueMapping(IdxBank, IdxSize);
break;
}
case AMDGPU::G_UNMERGE_VALUES: {
@@ -637,14 +1903,70 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
break;
}
case AMDGPU::G_INTRINSIC: {
- switch (MI.getOperand(1).getIntrinsicID()) {
+ switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) {
default:
return getInvalidInstructionMapping();
case Intrinsic::maxnum:
case Intrinsic::minnum:
+ case Intrinsic::amdgcn_div_fmas:
+ case Intrinsic::amdgcn_trig_preop:
+ case Intrinsic::amdgcn_sin:
+ case Intrinsic::amdgcn_cos:
+ case Intrinsic::amdgcn_log_clamp:
+ case Intrinsic::amdgcn_rcp:
+ case Intrinsic::amdgcn_rcp_legacy:
+ case Intrinsic::amdgcn_rsq:
+ case Intrinsic::amdgcn_rsq_legacy:
+ case Intrinsic::amdgcn_rsq_clamp:
+ case Intrinsic::amdgcn_ldexp:
+ case Intrinsic::amdgcn_frexp_mant:
+ case Intrinsic::amdgcn_frexp_exp:
+ case Intrinsic::amdgcn_fract:
case Intrinsic::amdgcn_cvt_pkrtz:
+ case Intrinsic::amdgcn_cvt_pknorm_i16:
+ case Intrinsic::amdgcn_cvt_pknorm_u16:
+ case Intrinsic::amdgcn_cvt_pk_i16:
+ case Intrinsic::amdgcn_cvt_pk_u16:
+ case Intrinsic::amdgcn_fmed3:
+ case Intrinsic::amdgcn_cubeid:
+ case Intrinsic::amdgcn_cubema:
+ case Intrinsic::amdgcn_cubesc:
+ case Intrinsic::amdgcn_cubetc:
+ case Intrinsic::amdgcn_sffbh:
+ case Intrinsic::amdgcn_fmad_ftz:
+ case Intrinsic::amdgcn_mbcnt_lo:
+ case Intrinsic::amdgcn_mbcnt_hi:
+ case Intrinsic::amdgcn_ubfe:
+ case Intrinsic::amdgcn_sbfe:
+ case Intrinsic::amdgcn_lerp:
+ case Intrinsic::amdgcn_sad_u8:
+ case Intrinsic::amdgcn_msad_u8:
+ case Intrinsic::amdgcn_sad_hi_u8:
+ case Intrinsic::amdgcn_sad_u16:
+ case Intrinsic::amdgcn_qsad_pk_u16_u8:
+ case Intrinsic::amdgcn_mqsad_pk_u16_u8:
+ case Intrinsic::amdgcn_mqsad_u32_u8:
+ case Intrinsic::amdgcn_cvt_pk_u8_f32:
+ case Intrinsic::amdgcn_alignbit:
+ case Intrinsic::amdgcn_alignbyte:
+ case Intrinsic::amdgcn_fdot2:
+ case Intrinsic::amdgcn_sdot2:
+ case Intrinsic::amdgcn_udot2:
+ case Intrinsic::amdgcn_sdot4:
+ case Intrinsic::amdgcn_udot4:
+ case Intrinsic::amdgcn_sdot8:
+ case Intrinsic::amdgcn_udot8:
+ case Intrinsic::amdgcn_fdiv_fast:
+ case Intrinsic::amdgcn_wwm:
+ case Intrinsic::amdgcn_wqm:
return getDefaultMappingVOP(MI);
- case Intrinsic::amdgcn_kernarg_segment_ptr: {
+ case Intrinsic::amdgcn_ds_permute:
+ case Intrinsic::amdgcn_ds_bpermute:
+ case Intrinsic::amdgcn_update_dpp:
+ return getDefaultMappingAllVGPR(MI);
+ case Intrinsic::amdgcn_kernarg_segment_ptr:
+ case Intrinsic::amdgcn_s_getpc:
+ case Intrinsic::amdgcn_groupstaticsize: {
unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
break;
@@ -652,16 +1974,142 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_wqm_vote: {
unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
OpdsMapping[0] = OpdsMapping[2]
- = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
+ = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, Size);
+ break;
+ }
+ case Intrinsic::amdgcn_s_buffer_load: {
+ // FIXME: This should be moved to G_INTRINSIC_W_SIDE_EFFECTS
+ Register RSrc = MI.getOperand(2).getReg(); // SGPR
+ Register Offset = MI.getOperand(3).getReg(); // SGPR/imm
+
+ unsigned Size0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+ unsigned Size2 = MRI.getType(RSrc).getSizeInBits();
+ unsigned Size3 = MRI.getType(Offset).getSizeInBits();
+
+ unsigned RSrcBank = getRegBankID(RSrc, MRI, *TRI);
+ unsigned OffsetBank = getRegBankID(Offset, MRI, *TRI);
+
+ OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size0);
+ OpdsMapping[1] = nullptr; // intrinsic id
+
+ // Lie and claim everything is legal, even though some need to be
+ // SGPRs. applyMapping will have to deal with it as a waterfall loop.
+ OpdsMapping[2] = AMDGPU::getValueMapping(RSrcBank, Size2); // rsrc
+ OpdsMapping[3] = AMDGPU::getValueMapping(OffsetBank, Size3);
+ OpdsMapping[4] = nullptr;
+ break;
+ }
+ case Intrinsic::amdgcn_div_scale: {
+ unsigned Dst0Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+ unsigned Dst1Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
+ OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Dst0Size);
+ OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, Dst1Size);
+
+ unsigned SrcSize = MRI.getType(MI.getOperand(3).getReg()).getSizeInBits();
+ OpdsMapping[3] = AMDGPU::getValueMapping(
+ getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI), SrcSize);
+ OpdsMapping[4] = AMDGPU::getValueMapping(
+ getRegBankID(MI.getOperand(4).getReg(), MRI, *TRI), SrcSize);
+
+ break;
+ }
+ case Intrinsic::amdgcn_class: {
+ Register Src0Reg = MI.getOperand(2).getReg();
+ Register Src1Reg = MI.getOperand(3).getReg();
+ unsigned Src0Size = MRI.getType(Src0Reg).getSizeInBits();
+ unsigned Src1Size = MRI.getType(Src1Reg).getSizeInBits();
+ unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+ OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, DstSize);
+ OpdsMapping[2] = AMDGPU::getValueMapping(getRegBankID(Src0Reg, MRI, *TRI),
+ Src0Size);
+ OpdsMapping[3] = AMDGPU::getValueMapping(getRegBankID(Src1Reg, MRI, *TRI),
+ Src1Size);
+ break;
+ }
+ case Intrinsic::amdgcn_icmp:
+ case Intrinsic::amdgcn_fcmp: {
+ unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+ // This is not VCCRegBank because this is not used in boolean contexts.
+ OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, DstSize);
+ unsigned OpSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
+ unsigned Op1Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
+ unsigned Op2Bank = getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI);
+ OpdsMapping[2] = AMDGPU::getValueMapping(Op1Bank, OpSize);
+ OpdsMapping[3] = AMDGPU::getValueMapping(Op2Bank, OpSize);
+ break;
+ }
+ case Intrinsic::amdgcn_readlane: {
+ // This must be an SGPR, but accept a VGPR.
+ unsigned IdxReg = MI.getOperand(3).getReg();
+ unsigned IdxSize = MRI.getType(IdxReg).getSizeInBits();
+ unsigned IdxBank = getRegBankID(IdxReg, MRI, *TRI, AMDGPU::SGPRRegBankID);
+ OpdsMapping[3] = AMDGPU::getValueMapping(IdxBank, IdxSize);
+ LLVM_FALLTHROUGH;
+ }
+ case Intrinsic::amdgcn_readfirstlane: {
+ unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+ unsigned SrcSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
+ OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, DstSize);
+ OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
+ break;
+ }
+ case Intrinsic::amdgcn_writelane: {
+ unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+ unsigned SrcReg = MI.getOperand(2).getReg();
+ unsigned SrcSize = MRI.getType(SrcReg).getSizeInBits();
+ unsigned SrcBank = getRegBankID(SrcReg, MRI, *TRI, AMDGPU::SGPRRegBankID);
+ unsigned IdxReg = MI.getOperand(3).getReg();
+ unsigned IdxSize = MRI.getType(IdxReg).getSizeInBits();
+ unsigned IdxBank = getRegBankID(IdxReg, MRI, *TRI, AMDGPU::SGPRRegBankID);
+ OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
+
+ // These 2 must be SGPRs, but accept VGPRs. Readfirstlane will be inserted
+ // to legalize.
+ OpdsMapping[2] = AMDGPU::getValueMapping(SrcBank, SrcSize);
+ OpdsMapping[3] = AMDGPU::getValueMapping(IdxBank, IdxSize);
+ OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
+ break;
+ }
+ case Intrinsic::amdgcn_if_break: {
+ unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
+ OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
+ OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
+ OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
break;
}
}
break;
}
case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: {
- switch (MI.getOperand(0).getIntrinsicID()) {
+ switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) {
default:
return getInvalidInstructionMapping();
+ case Intrinsic::amdgcn_s_getreg:
+ case Intrinsic::amdgcn_s_memtime:
+ case Intrinsic::amdgcn_s_memrealtime:
+ case Intrinsic::amdgcn_s_get_waveid_in_workgroup: {
+ unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+ OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
+ break;
+ }
+ case Intrinsic::amdgcn_ds_append:
+ case Intrinsic::amdgcn_ds_consume:
+ case Intrinsic::amdgcn_ds_fadd:
+ case Intrinsic::amdgcn_ds_fmin:
+ case Intrinsic::amdgcn_ds_fmax:
+ case Intrinsic::amdgcn_atomic_inc:
+ case Intrinsic::amdgcn_atomic_dec:
+ return getDefaultMappingAllVGPR(MI);
+ case Intrinsic::amdgcn_ds_ordered_add:
+ case Intrinsic::amdgcn_ds_ordered_swap: {
+ unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+ OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
+ unsigned M0Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI,
+ AMDGPU::SGPRRegBankID);
+ OpdsMapping[2] = AMDGPU::getValueMapping(M0Bank, 32);
+ OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
+ break;
+ }
case Intrinsic::amdgcn_exp_compr:
OpdsMapping[0] = nullptr; // IntrinsicID
// FIXME: These are immediate values which can't be read from registers.
@@ -688,24 +2136,82 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[7] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
OpdsMapping[8] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
break;
+ case Intrinsic::amdgcn_buffer_load: {
+ Register RSrc = MI.getOperand(2).getReg(); // SGPR
+ Register VIndex = MI.getOperand(3).getReg(); // VGPR
+ Register Offset = MI.getOperand(4).getReg(); // SGPR/VGPR/imm
+
+ unsigned Size0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+ unsigned Size2 = MRI.getType(RSrc).getSizeInBits();
+ unsigned Size3 = MRI.getType(VIndex).getSizeInBits();
+ unsigned Size4 = MRI.getType(Offset).getSizeInBits();
+
+ unsigned RSrcBank = getRegBankID(RSrc, MRI, *TRI);
+ unsigned OffsetBank = getRegBankID(Offset, MRI, *TRI);
+
+ OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size0);
+ OpdsMapping[1] = nullptr; // intrinsic id
+
+ // Lie and claim everything is legal, even though some need to be
+ // SGPRs. applyMapping will have to deal with it as a waterfall loop.
+ OpdsMapping[2] = AMDGPU::getValueMapping(RSrcBank, Size2); // rsrc
+ OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size3);
+ OpdsMapping[4] = AMDGPU::getValueMapping(OffsetBank, Size4);
+ OpdsMapping[5] = nullptr;
+ OpdsMapping[6] = nullptr;
+ break;
+ }
+ case Intrinsic::amdgcn_s_sendmsg:
+ case Intrinsic::amdgcn_s_sendmsghalt: {
+ // This must be an SGPR, but accept a VGPR.
+ unsigned Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI,
+ AMDGPU::SGPRRegBankID);
+ OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32);
+ break;
+ }
+ case Intrinsic::amdgcn_end_cf: {
+ unsigned Size = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI);
+ OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
+ break;
+ }
}
break;
}
case AMDGPU::G_SELECT: {
unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
- unsigned Op1Bank = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI,
+ unsigned Op2Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI,
AMDGPU::SGPRRegBankID);
- unsigned Op2Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
- unsigned Op3Bank = getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI);
- bool SGPRSrcs = Op1Bank == AMDGPU::SCCRegBankID &&
- Op2Bank == AMDGPU::SGPRRegBankID &&
+ unsigned Op3Bank = getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI,
+ AMDGPU::SGPRRegBankID);
+ bool SGPRSrcs = Op2Bank == AMDGPU::SGPRRegBankID &&
Op3Bank == AMDGPU::SGPRRegBankID;
- unsigned Bank = SGPRSrcs ? AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
- Op1Bank = SGPRSrcs ? AMDGPU::SCCRegBankID : AMDGPU::VCCRegBankID;
- OpdsMapping[0] = AMDGPU::getValueMapping(Bank, Size);
- OpdsMapping[1] = AMDGPU::getValueMapping(Op1Bank, 1);
- OpdsMapping[2] = AMDGPU::getValueMapping(Bank, Size);
- OpdsMapping[3] = AMDGPU::getValueMapping(Bank, Size);
+
+ unsigned CondBankDefault = SGPRSrcs ?
+ AMDGPU::SCCRegBankID : AMDGPU::VCCRegBankID;
+ unsigned CondBank = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI,
+ CondBankDefault);
+ if (CondBank == AMDGPU::SGPRRegBankID)
+ CondBank = SGPRSrcs ? AMDGPU::SCCRegBankID : AMDGPU::VCCRegBankID;
+ else if (CondBank == AMDGPU::VGPRRegBankID)
+ CondBank = AMDGPU::VCCRegBankID;
+
+ unsigned Bank = SGPRSrcs && CondBank == AMDGPU::SCCRegBankID ?
+ AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
+
+ assert(CondBank == AMDGPU::VCCRegBankID || CondBank == AMDGPU::SCCRegBankID);
+
+ if (Size == 64) {
+ OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(Bank, Size);
+ OpdsMapping[1] = AMDGPU::getValueMapping(CondBank, 1);
+ OpdsMapping[2] = AMDGPU::getValueMappingSGPR64Only(Bank, Size);
+ OpdsMapping[3] = AMDGPU::getValueMappingSGPR64Only(Bank, Size);
+ } else {
+ OpdsMapping[0] = AMDGPU::getValueMapping(Bank, Size);
+ OpdsMapping[1] = AMDGPU::getValueMapping(CondBank, 1);
+ OpdsMapping[2] = AMDGPU::getValueMapping(Bank, Size);
+ OpdsMapping[3] = AMDGPU::getValueMapping(Bank, Size);
+ }
+
break;
}
@@ -737,6 +2243,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
}
}
- return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping),
+ return getInstructionMapping(/*ID*/1, /*Cost*/1,
+ getOperandsMapping(OpdsMapping),
MI.getNumOperands());
}
+
diff --git a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
index d29f4bc79a51..f3a96e2a6128 100644
--- a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
+++ b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
@@ -1,9 +1,8 @@
//===- AMDGPURegisterBankInfo -----------------------------------*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -14,6 +13,7 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUREGISTERBANKINFO_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUREGISTERBANKINFO_H
+#include "llvm/CodeGen/Register.h"
#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#define GET_REGBANK_DECLARATIONS
@@ -22,6 +22,8 @@
namespace llvm {
+class LLT;
+class MachineIRBuilder;
class SIRegisterInfo;
class TargetRegisterInfo;
@@ -36,16 +38,53 @@ protected:
class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo {
const SIRegisterInfo *TRI;
+ void executeInWaterfallLoop(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ ArrayRef<unsigned> OpIndices) const;
+
+ void constrainOpWithReadfirstlane(MachineInstr &MI, MachineRegisterInfo &MRI,
+ unsigned OpIdx) const;
+ bool applyMappingWideLoad(MachineInstr &MI,
+ const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper,
+ MachineRegisterInfo &MRI) const;
+
/// See RegisterBankInfo::applyMapping.
void applyMappingImpl(const OperandsMapper &OpdMapper) const override;
const RegisterBankInfo::InstructionMapping &
getInstrMappingForLoad(const MachineInstr &MI) const;
- unsigned getRegBankID(unsigned Reg, const MachineRegisterInfo &MRI,
+ unsigned getRegBankID(Register Reg, const MachineRegisterInfo &MRI,
const TargetRegisterInfo &TRI,
unsigned Default = AMDGPU::VGPRRegBankID) const;
+ /// Split 64-bit value \p Reg into two 32-bit halves and populate them into \p
+ /// Regs. This appropriately sets the regbank of the new registers.
+ void split64BitValueForMapping(MachineIRBuilder &B,
+ SmallVector<Register, 2> &Regs,
+ LLT HalfTy,
+ Register Reg) const;
+
+ template <unsigned NumOps>
+ struct OpRegBankEntry {
+ int8_t RegBanks[NumOps];
+ int16_t Cost;
+ };
+
+ template <unsigned NumOps>
+ InstructionMappings
+ addMappingFromTable(const MachineInstr &MI, const MachineRegisterInfo &MRI,
+ const std::array<unsigned, NumOps> RegSrcOpIdx,
+ ArrayRef<OpRegBankEntry<NumOps>> Table) const;
+
+ RegisterBankInfo::InstructionMappings
+ getInstrAlternativeMappingsIntrinsic(
+ const MachineInstr &MI, const MachineRegisterInfo &MRI) const;
+
+ RegisterBankInfo::InstructionMappings
+ getInstrAlternativeMappingsIntrinsicWSideEffects(
+ const MachineInstr &MI, const MachineRegisterInfo &MRI) const;
+
bool isSALUMapping(const MachineInstr &MI) const;
const InstructionMapping &getDefaultMappingSOP(const MachineInstr &MI) const;
const InstructionMapping &getDefaultMappingVOP(const MachineInstr &MI) const;
@@ -57,6 +96,9 @@ public:
unsigned copyCost(const RegisterBank &A, const RegisterBank &B,
unsigned Size) const override;
+ unsigned getBreakDownCost(const ValueMapping &ValMapping,
+ const RegisterBank *CurBank = nullptr) const override;
+
const RegisterBank &
getRegBankFromRegClass(const TargetRegisterClass &RC) const override;
diff --git a/lib/Target/AMDGPU/AMDGPURegisterBanks.td b/lib/Target/AMDGPU/AMDGPURegisterBanks.td
index 570379a820e1..9555694fb106 100644
--- a/lib/Target/AMDGPU/AMDGPURegisterBanks.td
+++ b/lib/Target/AMDGPU/AMDGPURegisterBanks.td
@@ -1,9 +1,8 @@
//=- AMDGPURegisterBank.td - Describe the AMDGPU Banks -------*- tablegen -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -15,7 +14,7 @@ def VGPRRegBank : RegisterBank<"VGPR",
[VGPR_32, VReg_64, VReg_96, VReg_128, VReg_256, VReg_512]
>;
-def SCCRegBank : RegisterBank <"SCC", [SCC_CLASS]>;
+def SCCRegBank : RegisterBank <"SCC", [SReg_32, SCC_CLASS]>;
// It is helpful to distinguish conditions from ordinary SGPRs.
def VCCRegBank : RegisterBank <"VCC", [SReg_64]>;
diff --git a/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp b/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp
index 50f859addc2b..7cffdf1a4dcf 100644
--- a/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp
+++ b/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp
@@ -1,9 +1,8 @@
//===-- AMDGPURegisterInfo.cpp - AMDGPU Register Information -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -32,7 +31,10 @@ unsigned AMDGPURegisterInfo::getSubRegFromChannel(unsigned Channel) {
AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, AMDGPU::sub4,
AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, AMDGPU::sub8, AMDGPU::sub9,
AMDGPU::sub10, AMDGPU::sub11, AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14,
- AMDGPU::sub15
+ AMDGPU::sub15, AMDGPU::sub16, AMDGPU::sub17, AMDGPU::sub18, AMDGPU::sub19,
+ AMDGPU::sub20, AMDGPU::sub21, AMDGPU::sub22, AMDGPU::sub23, AMDGPU::sub24,
+ AMDGPU::sub25, AMDGPU::sub26, AMDGPU::sub27, AMDGPU::sub28, AMDGPU::sub29,
+ AMDGPU::sub30, AMDGPU::sub31
};
assert(Channel < array_lengthof(SubRegs));
@@ -83,7 +85,18 @@ const uint32_t *SIRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
}
}
-unsigned SIRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+Register SIRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+ const SIFrameLowering *TFI =
+ MF.getSubtarget<GCNSubtarget>().getFrameLowering();
const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
- return FuncInfo->getFrameOffsetReg();
+ return TFI->hasFP(MF) ? FuncInfo->getFrameOffsetReg()
+ : FuncInfo->getStackPtrOffsetReg();
+}
+
+const uint32_t *SIRegisterInfo::getAllVGPRRegMask() const {
+ return CSR_AMDGPU_AllVGPRs_RegMask;
+}
+
+const uint32_t *SIRegisterInfo::getAllAllocatableSRegMask() const {
+ return CSR_AMDGPU_AllAllocatableSRegs_RegMask;
}
diff --git a/lib/Target/AMDGPU/AMDGPURegisterInfo.h b/lib/Target/AMDGPU/AMDGPURegisterInfo.h
index 922d974f2ebd..3453a8c1b0b3 100644
--- a/lib/Target/AMDGPU/AMDGPURegisterInfo.h
+++ b/lib/Target/AMDGPU/AMDGPURegisterInfo.h
@@ -1,9 +1,8 @@
//===-- AMDGPURegisterInfo.h - AMDGPURegisterInfo Interface -*- C++ -*-----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AMDGPU/AMDGPURegisterInfo.td b/lib/Target/AMDGPU/AMDGPURegisterInfo.td
index ceabae524414..ab71b7aa8a57 100644
--- a/lib/Target/AMDGPU/AMDGPURegisterInfo.td
+++ b/lib/Target/AMDGPU/AMDGPURegisterInfo.td
@@ -1,9 +1,8 @@
//===-- AMDGPURegisterInfo.td - AMDGPU register info -------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -13,7 +12,7 @@
let Namespace = "AMDGPU" in {
-foreach Index = 0-15 in {
+foreach Index = 0-31 in {
def sub#Index : SubRegIndex<32, !shl(Index, 5)>;
}
diff --git a/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp b/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp
index efe501cb73c2..4f095087a57f 100644
--- a/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp
+++ b/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp
@@ -1,9 +1,8 @@
//===- AMDGPURewriteOutArgumentsPass.cpp - Create struct returns ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AMDGPU/AMDGPUSearchableTables.td b/lib/Target/AMDGPU/AMDGPUSearchableTables.td
index 9dbd7751b4d8..f8703c36127a 100644
--- a/lib/Target/AMDGPU/AMDGPUSearchableTables.td
+++ b/lib/Target/AMDGPU/AMDGPUSearchableTables.td
@@ -1,9 +1,8 @@
//===-- AMDGPUSearchableTables.td - ------------------------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -49,6 +48,8 @@ def : SourceOfDivergence<int_amdgcn_workitem_id_z>;
def : SourceOfDivergence<int_amdgcn_interp_mov>;
def : SourceOfDivergence<int_amdgcn_interp_p1>;
def : SourceOfDivergence<int_amdgcn_interp_p2>;
+def : SourceOfDivergence<int_amdgcn_interp_p1_f16>;
+def : SourceOfDivergence<int_amdgcn_interp_p2_f16>;
def : SourceOfDivergence<int_amdgcn_mbcnt_hi>;
def : SourceOfDivergence<int_amdgcn_mbcnt_lo>;
def : SourceOfDivergence<int_r600_read_tidig_x>;
@@ -70,8 +71,59 @@ def : SourceOfDivergence<int_amdgcn_buffer_atomic_and>;
def : SourceOfDivergence<int_amdgcn_buffer_atomic_or>;
def : SourceOfDivergence<int_amdgcn_buffer_atomic_xor>;
def : SourceOfDivergence<int_amdgcn_buffer_atomic_cmpswap>;
+def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_swap>;
+def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_add>;
+def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_sub>;
+def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_smin>;
+def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_umin>;
+def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_smax>;
+def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_umax>;
+def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_and>;
+def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_or>;
+def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_xor>;
+def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_cmpswap>;
+def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_swap>;
+def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_add>;
+def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_sub>;
+def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_smin>;
+def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_umin>;
+def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_smax>;
+def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_umax>;
+def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_and>;
+def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_or>;
+def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_xor>;
+def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_cmpswap>;
def : SourceOfDivergence<int_amdgcn_ps_live>;
def : SourceOfDivergence<int_amdgcn_ds_swizzle>;
+def : SourceOfDivergence<int_amdgcn_ds_ordered_add>;
+def : SourceOfDivergence<int_amdgcn_ds_ordered_swap>;
+def : SourceOfDivergence<int_amdgcn_permlane16>;
+def : SourceOfDivergence<int_amdgcn_permlanex16>;
+def : SourceOfDivergence<int_amdgcn_mov_dpp>;
+def : SourceOfDivergence<int_amdgcn_mov_dpp8>;
+def : SourceOfDivergence<int_amdgcn_update_dpp>;
+
+def : SourceOfDivergence<int_amdgcn_mfma_f32_4x4x1f32>;
+def : SourceOfDivergence<int_amdgcn_mfma_f32_4x4x1f32>;
+def : SourceOfDivergence<int_amdgcn_mfma_f32_4x4x4f16>;
+def : SourceOfDivergence<int_amdgcn_mfma_i32_4x4x4i8>;
+def : SourceOfDivergence<int_amdgcn_mfma_f32_4x4x2bf16>;
+def : SourceOfDivergence<int_amdgcn_mfma_f32_16x16x1f32>;
+def : SourceOfDivergence<int_amdgcn_mfma_f32_16x16x4f32>;
+def : SourceOfDivergence<int_amdgcn_mfma_f32_16x16x4f16>;
+def : SourceOfDivergence<int_amdgcn_mfma_f32_16x16x16f16>;
+def : SourceOfDivergence<int_amdgcn_mfma_i32_16x16x4i8>;
+def : SourceOfDivergence<int_amdgcn_mfma_i32_16x16x16i8>;
+def : SourceOfDivergence<int_amdgcn_mfma_f32_16x16x2bf16>;
+def : SourceOfDivergence<int_amdgcn_mfma_f32_16x16x8bf16>;
+def : SourceOfDivergence<int_amdgcn_mfma_f32_32x32x1f32>;
+def : SourceOfDivergence<int_amdgcn_mfma_f32_32x32x2f32>;
+def : SourceOfDivergence<int_amdgcn_mfma_f32_32x32x4f16>;
+def : SourceOfDivergence<int_amdgcn_mfma_f32_32x32x8f16>;
+def : SourceOfDivergence<int_amdgcn_mfma_i32_32x32x4i8>;
+def : SourceOfDivergence<int_amdgcn_mfma_i32_32x32x8i8>;
+def : SourceOfDivergence<int_amdgcn_mfma_f32_32x32x2bf16>;
+def : SourceOfDivergence<int_amdgcn_mfma_f32_32x32x4bf16>;
foreach intr = AMDGPUImageDimAtomicIntrinsics in
def : SourceOfDivergence<intr>;
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index ed0cc70c3d9a..1eb9b83456c5 100644
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -1,9 +1,8 @@
//===-- AMDGPUSubtarget.cpp - AMDGPU Subtarget Information ----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -41,12 +40,17 @@ using namespace llvm;
#undef AMDGPUSubtarget
#include "R600GenSubtargetInfo.inc"
+static cl::opt<bool> DisablePowerSched(
+ "amdgpu-disable-power-sched",
+ cl::desc("Disable scheduling to minimize mAI power bursts"),
+ cl::init(false));
+
GCNSubtarget::~GCNSubtarget() = default;
R600Subtarget &
R600Subtarget::initializeSubtargetDependencies(const Triple &TT,
StringRef GPU, StringRef FS) {
- SmallString<256> FullFS("+promote-alloca,+dx10-clamp,");
+ SmallString<256> FullFS("+promote-alloca,");
FullFS += FS;
ParseSubtargetFeatures(GPU, FullFS);
@@ -65,7 +69,7 @@ R600Subtarget::initializeSubtargetDependencies(const Triple &TT,
GCNSubtarget &
GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
- StringRef GPU, StringRef FS) {
+ StringRef GPU, StringRef FS) {
// Determine default and user-specified characteristics
// On SI+, we want FP64 denormals to be on by default. FP32 denormals can be
// enabled, but some instructions do not respect them and they run at the
@@ -78,10 +82,11 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
// Similarly we want enable-prt-strict-null to be on by default and not to
// unset everything else if it is disabled
- SmallString<256> FullFS("+promote-alloca,+dx10-clamp,+load-store-opt,");
+ // Assuming ECC is enabled is the conservative default.
+ SmallString<256> FullFS("+promote-alloca,+load-store-opt,+sram-ecc,+xnack,");
if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA.
- FullFS += "+flat-address-space,+flat-for-global,+unaligned-buffer-access,+trap-handler,";
+ FullFS += "+flat-for-global,+unaligned-buffer-access,+trap-handler,";
// FIXME: I don't think think Evergreen has any useful support for
// denormals, but should be checked. Should we issue a warning somewhere
@@ -94,6 +99,16 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
FullFS += "+enable-prt-strict-null,"; // This is overridden by a disable in FS
+ // Disable mutually exclusive bits.
+ if (FS.find_lower("+wavefrontsize") != StringRef::npos) {
+ if (FS.find_lower("wavefrontsize16") == StringRef::npos)
+ FullFS += "-wavefrontsize16,";
+ if (FS.find_lower("wavefrontsize32") == StringRef::npos)
+ FullFS += "-wavefrontsize32,";
+ if (FS.find_lower("wavefrontsize64") == StringRef::npos)
+ FullFS += "-wavefrontsize64,";
+ }
+
FullFS += FS;
ParseSubtargetFeatures(GPU, FullFS);
@@ -124,8 +139,25 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
HasMovrel = true;
}
+ // Don't crash on invalid devices.
+ if (WavefrontSize == 0)
+ WavefrontSize = 64;
+
HasFminFmaxLegacy = getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS;
+ if (DoesNotSupportXNACK && EnableXNACK) {
+ ToggleFeature(AMDGPU::FeatureXNACK);
+ EnableXNACK = false;
+ }
+
+ // ECC is on by default, but turn it off if the hardware doesn't support it
+ // anyway. This matters for the gfx9 targets with d16 loads, but don't support
+ // ECC.
+ if (DoesNotSupportSRAMECC && EnableSRAMECC) {
+ ToggleFeature(AMDGPU::FeatureSRAMECC);
+ EnableSRAMECC = false;
+ }
+
return *this;
}
@@ -152,8 +184,7 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
AMDGPUGenSubtargetInfo(TT, GPU, FS),
AMDGPUSubtarget(TT),
TargetTriple(TT),
- Gen(SOUTHERN_ISLANDS),
- IsaVersion(ISAVersion0_0_0),
+ Gen(TT.getOS() == Triple::AMDHSA ? SEA_ISLANDS : SOUTHERN_ISLANDS),
InstrItins(getInstrItineraryForCPU(GPU)),
LDSBankCount(0),
MaxPrivateElementSize(0),
@@ -162,7 +193,6 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
HalfRate64Ops(false),
FP64FP16Denormals(false),
- DX10Clamp(false),
FlatForGlobal(false),
AutoWaitcntBeforeBarrier(false),
CodeObjectV3(false),
@@ -171,11 +201,10 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
HasApertureRegs(false),
EnableXNACK(false),
+ DoesNotSupportXNACK(false),
+ EnableCuMode(false),
TrapHandler(false),
- DebuggerInsertNops(false),
- DebuggerEmitPrologue(false),
- EnableHugePrivateBuffer(false),
EnableLoadStoreOpt(false),
EnableUnsafeDSOffsetFolding(false),
EnableSIScheduler(false),
@@ -186,8 +215,10 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
FP64(false),
GCN3Encoding(false),
CIInsts(false),
- VIInsts(false),
+ GFX8Insts(false),
GFX9Insts(false),
+ GFX10Insts(false),
+ GFX7GFX8GFX9Insts(false),
SGPRInitBug(false),
HasSMemRealTime(false),
HasIntClamp(false),
@@ -202,19 +233,47 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
HasSDWAMac(false),
HasSDWAOutModsVOPC(false),
HasDPP(false),
+ HasDPP8(false),
HasR128A16(false),
+ HasNSAEncoding(false),
HasDLInsts(false),
- HasDotInsts(false),
+ HasDot1Insts(false),
+ HasDot2Insts(false),
+ HasDot3Insts(false),
+ HasDot4Insts(false),
+ HasDot5Insts(false),
+ HasDot6Insts(false),
+ HasMAIInsts(false),
+ HasPkFmacF16Inst(false),
+ HasAtomicFaddInsts(false),
EnableSRAMECC(false),
+ DoesNotSupportSRAMECC(false),
+ HasNoSdstCMPX(false),
+ HasVscnt(false),
+ HasRegisterBanking(false),
+ HasVOP3Literal(false),
+ HasNoDataDepHazard(false),
FlatAddressSpace(false),
FlatInstOffsets(false),
FlatGlobalInsts(false),
FlatScratchInsts(false),
+ ScalarFlatScratchInsts(false),
AddNoCarryInsts(false),
HasUnpackedD16VMem(false),
+ LDSMisalignedBug(false),
ScalarizeGlobal(false),
+ HasVcmpxPermlaneHazard(false),
+ HasVMEMtoScalarWriteHazard(false),
+ HasSMEMtoVectorWriteHazard(false),
+ HasInstFwdPrefetchBug(false),
+ HasVcmpxExecWARHazard(false),
+ HasLdsBranchVmemWARHazard(false),
+ HasNSAtoVMEMBug(false),
+ HasOffset3fBug(false),
+ HasFlatSegmentOffsetBug(false),
+
FeatureDisable(false),
InstrInfo(initializeSubtargetDependencies(TT, GPU, FS)),
TLInfo(TM, *this),
@@ -226,12 +285,34 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
*this, *static_cast<AMDGPURegisterBankInfo *>(RegBankInfo.get()), TM));
}
+unsigned GCNSubtarget::getConstantBusLimit(unsigned Opcode) const {
+ if (getGeneration() < GFX10)
+ return 1;
+
+ switch (Opcode) {
+ case AMDGPU::V_LSHLREV_B64:
+ case AMDGPU::V_LSHLREV_B64_gfx10:
+ case AMDGPU::V_LSHL_B64:
+ case AMDGPU::V_LSHRREV_B64:
+ case AMDGPU::V_LSHRREV_B64_gfx10:
+ case AMDGPU::V_LSHR_B64:
+ case AMDGPU::V_ASHRREV_I64:
+ case AMDGPU::V_ASHRREV_I64_gfx10:
+ case AMDGPU::V_ASHR_I64:
+ return 1;
+ }
+
+ return 2;
+}
+
unsigned AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount(unsigned NWaves,
const Function &F) const {
if (NWaves == 1)
return getLocalMemorySize();
unsigned WorkGroupSize = getFlatWorkGroupSizes(F).second;
unsigned WorkGroupsPerCu = getMaxWorkGroupsPerCU(WorkGroupSize);
+ if (!WorkGroupsPerCu)
+ return 0;
unsigned MaxWaves = getMaxWavesPerEU();
return getLocalMemorySize() * MaxWaves / WorkGroupsPerCu / NWaves;
}
@@ -240,6 +321,8 @@ unsigned AMDGPUSubtarget::getOccupancyWithLocalMemSize(uint32_t Bytes,
const Function &F) const {
unsigned WorkGroupSize = getFlatWorkGroupSizes(F).second;
unsigned WorkGroupsPerCu = getMaxWorkGroupsPerCU(WorkGroupSize);
+ if (!WorkGroupsPerCu)
+ return 0;
unsigned MaxWaves = getMaxWavesPerEU();
unsigned Limit = getLocalMemorySize() * MaxWaves / WorkGroupsPerCu;
unsigned NumWaves = Limit / (Bytes ? Bytes : 1u);
@@ -260,7 +343,8 @@ AMDGPUSubtarget::getDefaultFlatWorkGroupSize(CallingConv::ID CC) const {
case CallingConv::AMDGPU_CS:
case CallingConv::AMDGPU_KERNEL:
case CallingConv::SPIR_KERNEL:
- return std::make_pair(getWavefrontSize() * 2, getWavefrontSize() * 4);
+ return std::make_pair(getWavefrontSize() * 2,
+ std::max(getWavefrontSize() * 4, 256u));
case CallingConv::AMDGPU_VS:
case CallingConv::AMDGPU_LS:
case CallingConv::AMDGPU_HS:
@@ -280,12 +364,6 @@ std::pair<unsigned, unsigned> AMDGPUSubtarget::getFlatWorkGroupSizes(
std::pair<unsigned, unsigned> Default =
getDefaultFlatWorkGroupSize(F.getCallingConv());
- // TODO: Do not process "amdgpu-max-work-group-size" attribute once mesa
- // starts using "amdgpu-flat-work-group-size" attribute.
- Default.second = AMDGPU::getIntegerAttribute(
- F, "amdgpu-max-work-group-size", Default.second);
- Default.first = std::min(Default.first, Default.second);
-
// Requested minimum/maximum flat work group sizes.
std::pair<unsigned, unsigned> Requested = AMDGPU::getIntegerPairAttribute(
F, "amdgpu-flat-work-group-size", Default);
@@ -319,10 +397,7 @@ std::pair<unsigned, unsigned> AMDGPUSubtarget::getWavesPerEU(
getMaxWavesPerEU(FlatWorkGroupSizes.second);
bool RequestedFlatWorkGroupSize = false;
- // TODO: Do not process "amdgpu-max-work-group-size" attribute once mesa
- // starts using "amdgpu-flat-work-group-size" attribute.
- if (F.hasFnAttribute("amdgpu-max-work-group-size") ||
- F.hasFnAttribute("amdgpu-flat-work-group-size")) {
+ if (F.hasFnAttribute("amdgpu-flat-work-group-size")) {
Default.first = MinImpliedByFlatWorkGroupSize;
RequestedFlatWorkGroupSize = true;
}
@@ -460,7 +535,6 @@ R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS,
FMA(false),
CaymanISA(false),
CFALUBug(false),
- DX10Clamp(false),
HasVertexCache(false),
R600ALUInst(false),
FP64(false),
@@ -486,7 +560,14 @@ void GCNSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
Policy.ShouldTrackLaneMasks = true;
}
+bool GCNSubtarget::hasMadF16() const {
+ return InstrInfo.pseudoToMCOpcode(AMDGPU::V_MAD_F16) != -1;
+}
+
unsigned GCNSubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const {
+ if (getGeneration() >= AMDGPUSubtarget::GFX10)
+ return 10;
+
if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
if (SGPRs <= 80)
return 10;
@@ -533,6 +614,9 @@ unsigned GCNSubtarget::getOccupancyWithNumVGPRs(unsigned VGPRs) const {
unsigned GCNSubtarget::getReservedNumSGPRs(const MachineFunction &MF) const {
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
+ if (getGeneration() >= AMDGPUSubtarget::GFX10)
+ return 2; // VCC. FLAT_SCRATCH and XNACK are no longer in SGPRs.
+
if (MFI.hasFlatScratchInit()) {
if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
return 6; // FLAT_SCRATCH, XNACK, VCC (in that order).
@@ -631,9 +715,7 @@ struct MemOpClusterMutation : ScheduleDAGMutation {
MemOpClusterMutation(const SIInstrInfo *tii) : TII(tii) {}
- void apply(ScheduleDAGInstrs *DAGInstrs) override {
- ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
-
+ void apply(ScheduleDAGInstrs *DAG) override {
SUnit *SUa = nullptr;
// Search for two consequent memory operations and link them
// to prevent scheduler from moving them apart.
@@ -674,11 +756,130 @@ struct MemOpClusterMutation : ScheduleDAGMutation {
}
}
};
+
+struct FillMFMAShadowMutation : ScheduleDAGMutation {
+ const SIInstrInfo *TII;
+
+ ScheduleDAGMI *DAG;
+
+ FillMFMAShadowMutation(const SIInstrInfo *tii) : TII(tii) {}
+
+ bool isSALU(const SUnit *SU) const {
+ const MachineInstr *MI = SU->getInstr();
+ return MI && TII->isSALU(*MI) && !MI->isTerminator();
+ }
+
+ bool canAddEdge(const SUnit *Succ, const SUnit *Pred) const {
+ if (Pred->NodeNum < Succ->NodeNum)
+ return true;
+
+ SmallVector<const SUnit*, 64> Succs({Succ}), Preds({Pred});
+
+ for (unsigned I = 0; I < Succs.size(); ++I) {
+ for (const SDep &SI : Succs[I]->Succs) {
+ const SUnit *SU = SI.getSUnit();
+ if (SU != Succs[I] && llvm::find(Succs, SU) == Succs.end())
+ Succs.push_back(SU);
+ }
+ }
+
+ SmallPtrSet<const SUnit*, 32> Visited;
+ while (!Preds.empty()) {
+ const SUnit *SU = Preds.pop_back_val();
+ if (llvm::find(Succs, SU) != Succs.end())
+ return false;
+ Visited.insert(SU);
+ for (const SDep &SI : SU->Preds)
+ if (SI.getSUnit() != SU && !Visited.count(SI.getSUnit()))
+ Preds.push_back(SI.getSUnit());
+ }
+
+ return true;
+ }
+
+ // Link as much SALU intructions in chain as possible. Return the size
+ // of the chain. Links up to MaxChain instructions.
+ unsigned linkSALUChain(SUnit *From, SUnit *To, unsigned MaxChain,
+ SmallPtrSetImpl<SUnit *> &Visited) const {
+ SmallVector<SUnit *, 8> Worklist({To});
+ unsigned Linked = 0;
+
+ while (!Worklist.empty() && MaxChain-- > 0) {
+ SUnit *SU = Worklist.pop_back_val();
+ if (!Visited.insert(SU).second)
+ continue;
+
+ LLVM_DEBUG(dbgs() << "Inserting edge from\n" ; DAG->dumpNode(*From);
+ dbgs() << "to\n"; DAG->dumpNode(*SU); dbgs() << '\n');
+
+ if (SU->addPred(SDep(From, SDep::Artificial), false))
+ ++Linked;
+
+ for (SDep &SI : From->Succs) {
+ SUnit *SUv = SI.getSUnit();
+ if (SUv != From && TII->isVALU(*SUv->getInstr()) && canAddEdge(SUv, SU))
+ SUv->addPred(SDep(SU, SDep::Artificial), false);
+ }
+
+ for (SDep &SI : SU->Succs) {
+ SUnit *Succ = SI.getSUnit();
+ if (Succ != SU && isSALU(Succ) && canAddEdge(From, Succ))
+ Worklist.push_back(Succ);
+ }
+ }
+
+ return Linked;
+ }
+
+ void apply(ScheduleDAGInstrs *DAGInstrs) override {
+ const GCNSubtarget &ST = DAGInstrs->MF.getSubtarget<GCNSubtarget>();
+ if (!ST.hasMAIInsts() || DisablePowerSched)
+ return;
+ DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
+ const TargetSchedModel *TSchedModel = DAGInstrs->getSchedModel();
+ if (!TSchedModel || DAG->SUnits.empty())
+ return;
+
+ // Scan for MFMA long latency instructions and try to add a dependency
+ // of available SALU instructions to give them a chance to fill MFMA
+ // shadow. That is desirable to fill MFMA shadow with SALU instructions
+ // rather than VALU to prevent power consumption bursts and throttle.
+ auto LastSALU = DAG->SUnits.begin();
+ auto E = DAG->SUnits.end();
+ SmallPtrSet<SUnit*, 32> Visited;
+ for (SUnit &SU : DAG->SUnits) {
+ MachineInstr &MAI = *SU.getInstr();
+ if (!TII->isMAI(MAI) ||
+ MAI.getOpcode() == AMDGPU::V_ACCVGPR_WRITE_B32 ||
+ MAI.getOpcode() == AMDGPU::V_ACCVGPR_READ_B32)
+ continue;
+
+ unsigned Lat = TSchedModel->computeInstrLatency(&MAI) - 1;
+
+ LLVM_DEBUG(dbgs() << "Found MFMA: "; DAG->dumpNode(SU);
+ dbgs() << "Need " << Lat
+ << " instructions to cover latency.\n");
+
+ // Find up to Lat independent scalar instructions as early as
+ // possible such that they can be scheduled after this MFMA.
+ for ( ; Lat && LastSALU != E; ++LastSALU) {
+ if (Visited.count(&*LastSALU))
+ continue;
+
+ if (!isSALU(&*LastSALU) || !canAddEdge(&*LastSALU, &SU))
+ continue;
+
+ Lat -= linkSALUChain(&SU, &*LastSALU, Lat, Visited);
+ }
+ }
+ }
+};
} // namespace
void GCNSubtarget::getPostRAMutations(
std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
Mutations.push_back(llvm::make_unique<MemOpClusterMutation>(&InstrInfo));
+ Mutations.push_back(llvm::make_unique<FillMFMAShadowMutation>(&InstrInfo));
}
const AMDGPUSubtarget &AMDGPUSubtarget::get(const MachineFunction &MF) {
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 5584759e5580..78c3b823946d 100644
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -1,9 +1,8 @@
//=====-- AMDGPUSubtarget.h - Define Subtarget for AMDGPU ------*- C++ -*-====//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//==-----------------------------------------------------------------------===//
//
@@ -56,7 +55,8 @@ public:
SOUTHERN_ISLANDS = 4,
SEA_ISLANDS = 5,
VOLCANIC_ISLANDS = 6,
- GFX9 = 7
+ GFX9 = 7,
+ GFX10 = 8
};
private:
@@ -246,26 +246,6 @@ public:
class GCNSubtarget : public AMDGPUGenSubtargetInfo,
public AMDGPUSubtarget {
public:
- enum {
- ISAVersion0_0_0,
- ISAVersion6_0_0,
- ISAVersion6_0_1,
- ISAVersion7_0_0,
- ISAVersion7_0_1,
- ISAVersion7_0_2,
- ISAVersion7_0_3,
- ISAVersion7_0_4,
- ISAVersion8_0_1,
- ISAVersion8_0_2,
- ISAVersion8_0_3,
- ISAVersion8_1_0,
- ISAVersion9_0_0,
- ISAVersion9_0_2,
- ISAVersion9_0_4,
- ISAVersion9_0_6,
- ISAVersion9_0_9,
- };
-
enum TrapHandlerAbi {
TrapHandlerAbiNone = 0,
TrapHandlerAbiHsa = 1
@@ -297,7 +277,6 @@ protected:
// Basic subtarget description.
Triple TargetTriple;
unsigned Gen;
- unsigned IsaVersion;
InstrItineraryData InstrItins;
int LDSBankCount;
unsigned MaxPrivateElementSize;
@@ -308,7 +287,6 @@ protected:
// Dynamially set bits that enable features.
bool FP64FP16Denormals;
- bool DX10Clamp;
bool FlatForGlobal;
bool AutoWaitcntBeforeBarrier;
bool CodeObjectV3;
@@ -316,12 +294,11 @@ protected:
bool UnalignedBufferAccess;
bool HasApertureRegs;
bool EnableXNACK;
+ bool DoesNotSupportXNACK;
+ bool EnableCuMode;
bool TrapHandler;
- bool DebuggerInsertNops;
- bool DebuggerEmitPrologue;
// Used as options.
- bool EnableHugePrivateBuffer;
bool EnableLoadStoreOpt;
bool EnableUnsafeDSOffsetFolding;
bool EnableSIScheduler;
@@ -336,8 +313,10 @@ protected:
bool IsGCN;
bool GCN3Encoding;
bool CIInsts;
- bool VIInsts;
+ bool GFX8Insts;
bool GFX9Insts;
+ bool GFX10Insts;
+ bool GFX7GFX8GFX9Insts;
bool SGPRInitBug;
bool HasSMemRealTime;
bool HasIntClamp;
@@ -352,23 +331,51 @@ protected:
bool HasSDWAMac;
bool HasSDWAOutModsVOPC;
bool HasDPP;
+ bool HasDPP8;
bool HasR128A16;
+ bool HasNSAEncoding;
bool HasDLInsts;
- bool HasDotInsts;
+ bool HasDot1Insts;
+ bool HasDot2Insts;
+ bool HasDot3Insts;
+ bool HasDot4Insts;
+ bool HasDot5Insts;
+ bool HasDot6Insts;
+ bool HasMAIInsts;
+ bool HasPkFmacF16Inst;
+ bool HasAtomicFaddInsts;
bool EnableSRAMECC;
+ bool DoesNotSupportSRAMECC;
+ bool HasNoSdstCMPX;
+ bool HasVscnt;
+ bool HasRegisterBanking;
+ bool HasVOP3Literal;
+ bool HasNoDataDepHazard;
bool FlatAddressSpace;
bool FlatInstOffsets;
bool FlatGlobalInsts;
bool FlatScratchInsts;
+ bool ScalarFlatScratchInsts;
bool AddNoCarryInsts;
bool HasUnpackedD16VMem;
bool R600ALUInst;
bool CaymanISA;
bool CFALUBug;
+ bool LDSMisalignedBug;
bool HasVertexCache;
short TexVTXClauseSize;
bool ScalarizeGlobal;
+ bool HasVcmpxPermlaneHazard;
+ bool HasVMEMtoScalarWriteHazard;
+ bool HasSMEMtoVectorWriteHazard;
+ bool HasInstFwdPrefetchBug;
+ bool HasVcmpxExecWARHazard;
+ bool HasLdsBranchVmemWARHazard;
+ bool HasNSAtoVMEMBug;
+ bool HasOffset3fBug;
+ bool HasFlatSegmentOffsetBug;
+
// Dummy feature to use for assembler in tablegen.
bool FeatureDisable;
@@ -378,6 +385,9 @@ private:
SITargetLowering TLInfo;
SIFrameLowering FrameLowering;
+ // See COMPUTE_TMPRING_SIZE.WAVESIZE, 13-bit field in units of 256-dword.
+ static const unsigned MaxWaveScratchSize = (256 * 4) * ((1 << 13) - 1);
+
public:
GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
const GCNTargetMachine &TM);
@@ -437,6 +447,11 @@ public:
return Log2_32(WavefrontSize);
}
+ /// Return the number of high bits known to be zero fror a frame index.
+ unsigned getKnownHighZeroBitsForFrameIndex() const {
+ return countLeadingZeros(MaxWaveScratchSize) + getWavefrontSizeLog2();
+ }
+
int getLDSBankCount() const {
return LDSBankCount;
}
@@ -445,6 +460,8 @@ public:
return MaxPrivateElementSize;
}
+ unsigned getConstantBusLimit(unsigned Opcode) const;
+
bool hasIntClamp() const {
return HasIntClamp;
}
@@ -473,6 +490,12 @@ public:
return (getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS);
}
+ // Return true if the target only has the reverse operand versions of VALU
+ // shift instructions (e.g. v_lshrrev_b32, and no v_lshr_b32).
+ bool hasOnlyRevVALUShifts() const {
+ return getGeneration() >= VOLCANIC_ISLANDS;
+ }
+
bool hasBFE() const {
return true;
}
@@ -525,14 +548,48 @@ public:
return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone;
}
- bool enableHugePrivateBuffer() const {
- return EnableHugePrivateBuffer;
+ /// True if the offset field of DS instructions works as expected. On SI, the
+ /// offset uses a 16-bit adder and does not always wrap properly.
+ bool hasUsableDSOffset() const {
+ return getGeneration() >= SEA_ISLANDS;
}
bool unsafeDSOffsetFoldingEnabled() const {
return EnableUnsafeDSOffsetFolding;
}
+ /// Condition output from div_scale is usable.
+ bool hasUsableDivScaleConditionOutput() const {
+ return getGeneration() != SOUTHERN_ISLANDS;
+ }
+
+ /// Extra wait hazard is needed in some cases before
+ /// s_cbranch_vccnz/s_cbranch_vccz.
+ bool hasReadVCCZBug() const {
+ return getGeneration() <= SEA_ISLANDS;
+ }
+
+ /// A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR
+ /// was written by a VALU instruction.
+ bool hasSMRDReadVALUDefHazard() const {
+ return getGeneration() == SOUTHERN_ISLANDS;
+ }
+
+ /// A read of an SGPR by a VMEM instruction requires 5 wait states when the
+ /// SGPR was written by a VALU Instruction.
+ bool hasVMEMReadSGPRVALUDefHazard() const {
+ return getGeneration() >= VOLCANIC_ISLANDS;
+ }
+
+ bool hasRFEHazards() const {
+ return getGeneration() >= VOLCANIC_ISLANDS;
+ }
+
+ /// Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
+ unsigned getSetRegWaitStates() const {
+ return getGeneration() <= SEA_ISLANDS ? 1 : 2;
+ }
+
bool dumpCode() const {
return DumpCode;
}
@@ -554,14 +611,6 @@ public:
return getGeneration() >= AMDGPUSubtarget::GFX9;
}
- bool enableDX10Clamp() const {
- return DX10Clamp;
- }
-
- bool enableIEEEBit(const MachineFunction &MF) const {
- return AMDGPU::isCompute(MF.getFunction().getCallingConv());
- }
-
bool useFlatForGlobal() const {
return FlatForGlobal;
}
@@ -572,6 +621,11 @@ public:
return CIInsts && EnableDS128;
}
+ /// Have v_trunc_f64, v_ceil_f64, v_rndne_f64
+ bool haveRoundOpsF64() const {
+ return CIInsts;
+ }
+
/// \returns If MUBUF instructions always perform range checking, even for
/// buffer resources used for private memory access.
bool privateMemoryResourceIsRangeChecked() const {
@@ -613,10 +667,18 @@ public:
return EnableXNACK;
}
+ bool isCuModeEnabled() const {
+ return EnableCuMode;
+ }
+
bool hasFlatAddressSpace() const {
return FlatAddressSpace;
}
+ bool hasFlatScrRegister() const {
+ return hasFlatAddressSpace();
+ }
+
bool hasFlatInstOffsets() const {
return FlatInstOffsets;
}
@@ -629,6 +691,14 @@ public:
return FlatScratchInsts;
}
+ bool hasScalarFlatScratchInsts() const {
+ return ScalarFlatScratchInsts;
+ }
+
+ bool hasFlatSegmentOffsetBug() const {
+ return HasFlatSegmentOffsetBug;
+ }
+
bool hasFlatLgkmVMemCountInOrder() const {
return getGeneration() > GFX9;
}
@@ -637,12 +707,34 @@ public:
return getGeneration() >= GFX9;
}
+ bool d16PreservesUnusedBits() const {
+ return hasD16LoadStore() && !isSRAMECCEnabled();
+ }
+
+ bool hasD16Images() const {
+ return getGeneration() >= VOLCANIC_ISLANDS;
+ }
+
/// Return if most LDS instructions have an m0 use that require m0 to be
/// iniitalized.
bool ldsRequiresM0Init() const {
return getGeneration() < GFX9;
}
+ // True if the hardware rewinds and replays GWS operations if a wave is
+ // preempted.
+ //
+ // If this is false, a GWS operation requires testing if a nack set the
+ // MEM_VIOL bit, and repeating if so.
+ bool hasGWSAutoReplay() const {
+ return getGeneration() >= GFX9;
+ }
+
+ /// \returns if target has ds_gws_sema_release_all instruction.
+ bool hasGWSSemaReleaseAll() const {
+ return CIInsts;
+ }
+
bool hasAddNoCarry() const {
return AddNoCarryInsts;
}
@@ -680,22 +772,74 @@ public:
return HasSDWAOutModsVOPC;
}
- bool vmemWriteNeedsExpWaitcnt() const {
- return getGeneration() < SEA_ISLANDS;
- }
-
bool hasDLInsts() const {
return HasDLInsts;
}
- bool hasDotInsts() const {
- return HasDotInsts;
+ bool hasDot1Insts() const {
+ return HasDot1Insts;
+ }
+
+ bool hasDot2Insts() const {
+ return HasDot2Insts;
+ }
+
+ bool hasDot3Insts() const {
+ return HasDot3Insts;
+ }
+
+ bool hasDot4Insts() const {
+ return HasDot4Insts;
+ }
+
+ bool hasDot5Insts() const {
+ return HasDot5Insts;
+ }
+
+ bool hasDot6Insts() const {
+ return HasDot6Insts;
+ }
+
+ bool hasMAIInsts() const {
+ return HasMAIInsts;
+ }
+
+ bool hasPkFmacF16Inst() const {
+ return HasPkFmacF16Inst;
+ }
+
+ bool hasAtomicFaddInsts() const {
+ return HasAtomicFaddInsts;
}
bool isSRAMECCEnabled() const {
return EnableSRAMECC;
}
+ bool hasNoSdstCMPX() const {
+ return HasNoSdstCMPX;
+ }
+
+ bool hasVscnt() const {
+ return HasVscnt;
+ }
+
+ bool hasRegisterBanking() const {
+ return HasRegisterBanking;
+ }
+
+ bool hasVOP3Literal() const {
+ return HasVOP3Literal;
+ }
+
+ bool hasNoDataDepHazard() const {
+ return HasNoDataDepHazard;
+ }
+
+ bool vmemWriteNeedsExpWaitcnt() const {
+ return getGeneration() < SEA_ISLANDS;
+ }
+
// Scratch is allocated in 256 dword per wave blocks for the entire
// wavefront. When viewed from the perspecive of an arbitrary workitem, this
// is 4-byte aligned.
@@ -792,29 +936,34 @@ public:
return HasScalarAtomics;
}
+ bool hasLDSFPAtomics() const {
+ return GFX8Insts;
+ }
bool hasDPP() const {
return HasDPP;
}
+ bool hasDPP8() const {
+ return HasDPP8;
+ }
+
bool hasR128A16() const {
return HasR128A16;
}
- bool enableSIScheduler() const {
- return EnableSIScheduler;
+ bool hasOffset3fBug() const {
+ return HasOffset3fBug;
}
- bool debuggerSupported() const {
- return debuggerInsertNops() && debuggerEmitPrologue();
+ bool hasNSAEncoding() const {
+ return HasNSAEncoding;
}
- bool debuggerInsertNops() const {
- return DebuggerInsertNops;
- }
+ bool hasMadF16() const;
- bool debuggerEmitPrologue() const {
- return DebuggerEmitPrologue;
+ bool enableSIScheduler() const {
+ return EnableSIScheduler;
}
bool loadStoreOptEnabled() const {
@@ -835,15 +984,48 @@ public:
}
bool hasSMovFedHazard() const {
- return getGeneration() >= AMDGPUSubtarget::GFX9;
+ return getGeneration() == AMDGPUSubtarget::GFX9;
}
bool hasReadM0MovRelInterpHazard() const {
- return getGeneration() >= AMDGPUSubtarget::GFX9;
+ return getGeneration() == AMDGPUSubtarget::GFX9;
}
bool hasReadM0SendMsgHazard() const {
- return getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS;
+ return getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&
+ getGeneration() <= AMDGPUSubtarget::GFX9;
+ }
+
+ bool hasVcmpxPermlaneHazard() const {
+ return HasVcmpxPermlaneHazard;
+ }
+
+ bool hasVMEMtoScalarWriteHazard() const {
+ return HasVMEMtoScalarWriteHazard;
+ }
+
+ bool hasSMEMtoVectorWriteHazard() const {
+ return HasSMEMtoVectorWriteHazard;
+ }
+
+ bool hasLDSMisalignedBug() const {
+ return LDSMisalignedBug && !EnableCuMode;
+ }
+
+ bool hasInstFwdPrefetchBug() const {
+ return HasInstFwdPrefetchBug;
+ }
+
+ bool hasVcmpxExecWARHazard() const {
+ return HasVcmpxExecWARHazard;
+ }
+
+ bool hasLdsBranchVmemWARHazard() const {
+ return HasLdsBranchVmemWARHazard;
+ }
+
+ bool hasNSAtoVMEMBug() const {
+ return HasNSAtoVMEMBug;
}
/// Return the maximum number of waves per SIMD for kernels using \p SGPRs
@@ -957,6 +1139,14 @@ public:
std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
const override;
+ bool isWave32() const {
+ return WavefrontSize == 32;
+ }
+
+ const TargetRegisterClass *getBoolRC() const {
+ return getRegisterInfo()->getBoolRC();
+ }
+
/// \returns Maximum number of work groups per compute unit supported by the
/// subtarget and limited by given \p FlatWorkGroupSize.
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
@@ -994,7 +1184,6 @@ private:
bool FMA;
bool CaymanISA;
bool CFALUBug;
- bool DX10Clamp;
bool HasVertexCache;
bool R600ALUInst;
bool FP64;
diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index e8cefdbf74b9..0ea8db04c298 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -1,9 +1,8 @@
//===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -25,11 +24,14 @@
#include "GCNIterativeScheduler.h"
#include "GCNSchedStrategy.h"
#include "R600MachineScheduler.h"
+#include "SIMachineFunctionInfo.h"
#include "SIMachineScheduler.h"
+#include "TargetInfo/AMDGPUTargetInfo.h"
#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
#include "llvm/CodeGen/GlobalISel/Legalizer.h"
#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
+#include "llvm/CodeGen/MIRParser/MIParser.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Attributes.h"
@@ -67,6 +69,11 @@ EnableEarlyIfConversion("amdgpu-early-ifcvt", cl::Hidden,
cl::desc("Run early if-conversion"),
cl::init(false));
+static cl::opt<bool>
+OptExecMaskPreRA("amdgpu-opt-exec-mask-pre-ra", cl::Hidden,
+ cl::desc("Run pre-RA exec mask optimizations"),
+ cl::init(true));
+
static cl::opt<bool> EnableR600IfConvert(
"r600-if-convert",
cl::desc("Use if conversion pass"),
@@ -109,7 +116,7 @@ static cl::opt<bool> EnableSDWAPeephole(
static cl::opt<bool> EnableDPPCombine(
"amdgpu-dpp-combine",
cl::desc("Enable DPP combiner"),
- cl::init(false));
+ cl::init(true));
// Enable address space based alias analysis
static cl::opt<bool> EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden,
@@ -123,11 +130,11 @@ static cl::opt<bool, true> LateCFGStructurize(
cl::location(AMDGPUTargetMachine::EnableLateStructurizeCFG),
cl::Hidden);
-static cl::opt<bool, true> EnableAMDGPUFunctionCalls(
+static cl::opt<bool, true> EnableAMDGPUFunctionCallsOpt(
"amdgpu-function-calls",
cl::desc("Enable AMDGPU function call support"),
cl::location(AMDGPUTargetMachine::EnableFunctionCalls),
- cl::init(false),
+ cl::init(true),
cl::Hidden);
// Enable lib calls simplifications
@@ -143,6 +150,12 @@ static cl::opt<bool> EnableLowerKernelArguments(
cl::init(true),
cl::Hidden);
+static cl::opt<bool> EnableRegReassign(
+ "amdgpu-reassign-regs",
+ cl::desc("Enable register reassign optimizations on gfx10+"),
+ cl::init(true),
+ cl::Hidden);
+
// Enable atomic optimization
static cl::opt<bool> EnableAtomicOptimizations(
"amdgpu-atomic-optimizations",
@@ -157,6 +170,18 @@ static cl::opt<bool> EnableSIModeRegisterPass(
cl::init(true),
cl::Hidden);
+// Option is used in lit tests to prevent deadcoding of patterns inspected.
+static cl::opt<bool>
+EnableDCEInRA("amdgpu-dce-in-ra",
+ cl::init(true), cl::Hidden,
+ cl::desc("Enable machine DCE inside regalloc"));
+
+static cl::opt<bool> EnableScalarIRPasses(
+ "amdgpu-scalar-ir-passes",
+ cl::desc("Enable scalar IR passes"),
+ cl::init(true),
+ cl::Hidden);
+
extern "C" void LLVMInitializeAMDGPUTarget() {
// Register the target
RegisterTargetMachine<R600TargetMachine> X(getTheAMDGPUTarget());
@@ -172,6 +197,7 @@ extern "C" void LLVMInitializeAMDGPUTarget() {
initializeAMDGPUDAGToDAGISelPass(*PR);
initializeGCNDPPCombinePass(*PR);
initializeSILowerI1CopiesPass(*PR);
+ initializeSILowerSGPRSpillsPass(*PR);
initializeSIFixSGPRCopiesPass(*PR);
initializeSIFixVGPRCopiesPass(*PR);
initializeSIFixupVectorISelPass(*PR);
@@ -192,6 +218,8 @@ extern "C" void LLVMInitializeAMDGPUTarget() {
initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(*PR);
initializeAMDGPUPromoteAllocaPass(*PR);
initializeAMDGPUCodeGenPreparePass(*PR);
+ initializeAMDGPUPropagateAttributesEarlyPass(*PR);
+ initializeAMDGPUPropagateAttributesLatePass(*PR);
initializeAMDGPURewriteOutArgumentsPass(*PR);
initializeAMDGPUUnifyMetadataPass(*PR);
initializeSIAnnotateControlFlowPass(*PR);
@@ -201,9 +229,8 @@ extern "C" void LLVMInitializeAMDGPUTarget() {
initializeSILowerControlFlowPass(*PR);
initializeSIInsertSkipsPass(*PR);
initializeSIMemoryLegalizerPass(*PR);
- initializeSIDebuggerInsertNopsPass(*PR);
initializeSIOptimizeExecMaskingPass(*PR);
- initializeSIFixWWMLivenessPass(*PR);
+ initializeSIPreAllocateWWMRegsPass(*PR);
initializeSIFormMemoryClausesPass(*PR);
initializeAMDGPUUnifyDivergentExitNodesPass(*PR);
initializeAMDGPUAAWrapperPassPass(*PR);
@@ -211,6 +238,8 @@ extern "C" void LLVMInitializeAMDGPUTarget() {
initializeAMDGPUUseNativeCallsPass(*PR);
initializeAMDGPUSimplifyLibCallsPass(*PR);
initializeAMDGPUInlinerPass(*PR);
+ initializeGCNRegBankReassignPass(*PR);
+ initializeGCNNSAReassignPass(*PR);
}
static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
@@ -295,10 +324,11 @@ static StringRef computeDataLayout(const Triple &TT) {
}
// 32-bit private, local, and region pointers. 64-bit global, constant and
- // flat.
+ // flat, non-integral buffer fat pointers.
return "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
"-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
- "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
+ "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
+ "-ni:7";
}
LLVM_READNONE
@@ -306,8 +336,9 @@ static StringRef getGPUOrDefault(const Triple &TT, StringRef GPU) {
if (!GPU.empty())
return GPU;
+ // Need to default to a target with flat support for HSA.
if (TT.getArch() == Triple::amdgcn)
- return "generic";
+ return TT.getOS() == Triple::AMDHSA ? "generic-hsa" : "generic";
return "r600";
}
@@ -363,24 +394,25 @@ void AMDGPUTargetMachine::adjustPassManager(PassManagerBuilder &Builder) {
bool EnableOpt = getOptLevel() > CodeGenOpt::None;
bool Internalize = InternalizeSymbols;
- bool EarlyInline = EarlyInlineAll && EnableOpt && !EnableAMDGPUFunctionCalls;
+ bool EarlyInline = EarlyInlineAll && EnableOpt && !EnableFunctionCalls;
bool AMDGPUAA = EnableAMDGPUAliasAnalysis && EnableOpt;
bool LibCallSimplify = EnableLibCallSimplify && EnableOpt;
- if (EnableAMDGPUFunctionCalls) {
+ if (EnableFunctionCalls) {
delete Builder.Inliner;
Builder.Inliner = createAMDGPUFunctionInliningPass();
}
Builder.addExtension(
PassManagerBuilder::EP_ModuleOptimizerEarly,
- [Internalize, EarlyInline, AMDGPUAA](const PassManagerBuilder &,
- legacy::PassManagerBase &PM) {
+ [Internalize, EarlyInline, AMDGPUAA, this](const PassManagerBuilder &,
+ legacy::PassManagerBase &PM) {
if (AMDGPUAA) {
PM.add(createAMDGPUAAWrapperPass());
PM.add(createAMDGPUExternalAAWrapperPass());
}
PM.add(createAMDGPUUnifyMetadataPass());
+ PM.add(createAMDGPUPropagateAttributesLatePass(this));
if (Internalize) {
PM.add(createInternalizePass(mustPreserveGV));
PM.add(createGlobalDCEPass());
@@ -392,15 +424,16 @@ void AMDGPUTargetMachine::adjustPassManager(PassManagerBuilder &Builder) {
const auto &Opt = Options;
Builder.addExtension(
PassManagerBuilder::EP_EarlyAsPossible,
- [AMDGPUAA, LibCallSimplify, &Opt](const PassManagerBuilder &,
- legacy::PassManagerBase &PM) {
+ [AMDGPUAA, LibCallSimplify, &Opt, this](const PassManagerBuilder &,
+ legacy::PassManagerBase &PM) {
if (AMDGPUAA) {
PM.add(createAMDGPUAAWrapperPass());
PM.add(createAMDGPUExternalAAWrapperPass());
}
+ PM.add(llvm::createAMDGPUPropagateAttributesEarlyPass(this));
PM.add(llvm::createAMDGPUUseNativeCallsPass());
if (LibCallSimplify)
- PM.add(llvm::createAMDGPUSimplifyLibCallsPass(Opt));
+ PM.add(llvm::createAMDGPUSimplifyLibCallsPass(Opt, this));
});
Builder.addExtension(
@@ -428,6 +461,11 @@ R600TargetMachine::R600TargetMachine(const Target &T, const Triple &TT,
CodeGenOpt::Level OL, bool JIT)
: AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {
setRequiresStructuredCFG(true);
+
+ // Override the default since calls aren't supported for r600.
+ if (EnableFunctionCalls &&
+ EnableAMDGPUFunctionCallsOpt.getNumOccurrences() == 0)
+ EnableFunctionCalls = false;
}
const R600Subtarget *R600TargetMachine::getSubtargetImpl(
@@ -528,8 +566,14 @@ public:
bool addPreISel() override;
bool addInstSelector() override;
bool addGCPasses() override;
+
+ std::unique_ptr<CSEConfigBase> getCSEConfig() const override;
};
+std::unique_ptr<CSEConfigBase> AMDGPUPassConfig::getCSEConfig() const {
+ return getStandardCSEConfigForOpt(TM->getOptLevel());
+}
+
class R600PassConfig final : public AMDGPUPassConfig {
public:
R600PassConfig(LLVMTargetMachine &TM, PassManagerBase &PM)
@@ -572,9 +616,10 @@ public:
bool addLegalizeMachineIR() override;
bool addRegBankSelect() override;
bool addGlobalInstructionSelect() override;
- void addFastRegAlloc(FunctionPass *RegAllocPass) override;
- void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override;
+ void addFastRegAlloc() override;
+ void addOptimizedRegAlloc() override;
void addPreRegAlloc() override;
+ bool addPreRewrite() override;
void addPostRegAlloc() override;
void addPreSched2() override;
void addPreEmitPass() override;
@@ -614,12 +659,16 @@ void AMDGPUPassConfig::addIRPasses() {
disablePass(&FuncletLayoutID);
disablePass(&PatchableFunctionID);
- addPass(createAtomicExpandPass());
-
// This must occur before inlining, as the inliner will not look through
// bitcast calls.
addPass(createAMDGPUFixFunctionBitcastsPass());
+ // A call to propagate attributes pass in the backend in case opt was not run.
+ addPass(createAMDGPUPropagateAttributesEarlyPass(&TM));
+
+ addPass(createAtomicExpandPass());
+
+
addPass(createAMDGPULowerIntrinsicsPass());
// Function calls are not supported, so make sure we inline everything.
@@ -652,7 +701,8 @@ void AMDGPUPassConfig::addIRPasses() {
if (EnableSROA)
addPass(createSROAPass());
- addStraightLineScalarOptimizationPasses();
+ if (EnableScalarIRPasses)
+ addStraightLineScalarOptimizationPasses();
if (EnableAMDGPUAliasAnalysis) {
addPass(createAMDGPUAAWrapperPass());
@@ -678,15 +728,20 @@ void AMDGPUPassConfig::addIRPasses() {
// %1 = shl %a, 2
//
// but EarlyCSE can do neither of them.
- if (getOptLevel() != CodeGenOpt::None)
+ if (getOptLevel() != CodeGenOpt::None && EnableScalarIRPasses)
addEarlyCSEOrGVNPass();
}
void AMDGPUPassConfig::addCodeGenPrepare() {
+ if (TM->getTargetTriple().getArch() == Triple::amdgcn)
+ addPass(createAMDGPUAnnotateKernelFeaturesPass());
+
if (TM->getTargetTriple().getArch() == Triple::amdgcn &&
EnableLowerKernelArguments)
addPass(createAMDGPULowerKernelArgumentsPass());
+ addPass(&AMDGPUPerfHintAnalysisID);
+
TargetPassConfig::addCodeGenPrepare();
if (EnableLoadStoreVectorizer)
@@ -700,7 +755,8 @@ bool AMDGPUPassConfig::addPreISel() {
}
bool AMDGPUPassConfig::addInstSelector() {
- addPass(createAMDGPUISelDag(&getAMDGPUTargetMachine(), getOptLevel()));
+ // Defer the verifier until FinalizeISel.
+ addPass(createAMDGPUISelDag(&getAMDGPUTargetMachine(), getOptLevel()), false);
return false;
}
@@ -770,7 +826,6 @@ bool GCNPassConfig::addPreISel() {
// FIXME: We need to run a pass to propagate the attributes when calls are
// supported.
- addPass(createAMDGPUAnnotateKernelFeaturesPass());
// Merge divergent exit nodes. StructurizeCFG won't recognize the multi-exit
// regions formed by them.
@@ -783,6 +838,7 @@ bool GCNPassConfig::addPreISel() {
if (!LateCFGStructurize) {
addPass(createSIAnnotateControlFlowPass());
}
+ addPass(createLCSSAPass());
return false;
}
@@ -856,7 +912,7 @@ void GCNPassConfig::addPreRegAlloc() {
addPass(createSIWholeQuadModePass());
}
-void GCNPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {
+void GCNPassConfig::addFastRegAlloc() {
// FIXME: We have to disable the verifier here because of PHIElimination +
// TwoAddressInstructions disabling it.
@@ -865,28 +921,40 @@ void GCNPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {
// SI_ELSE will introduce a copy of the tied operand source after the else.
insertPass(&PHIEliminationID, &SILowerControlFlowID, false);
- // This must be run after SILowerControlFlow, since it needs to use the
- // machine-level CFG, but before register allocation.
- insertPass(&SILowerControlFlowID, &SIFixWWMLivenessID, false);
+ // This must be run just after RegisterCoalescing.
+ insertPass(&RegisterCoalescerID, &SIPreAllocateWWMRegsID, false);
- TargetPassConfig::addFastRegAlloc(RegAllocPass);
+ TargetPassConfig::addFastRegAlloc();
}
-void GCNPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
- insertPass(&MachineSchedulerID, &SIOptimizeExecMaskingPreRAID);
-
- insertPass(&SIOptimizeExecMaskingPreRAID, &SIFormMemoryClausesID);
+void GCNPassConfig::addOptimizedRegAlloc() {
+ if (OptExecMaskPreRA) {
+ insertPass(&MachineSchedulerID, &SIOptimizeExecMaskingPreRAID);
+ insertPass(&SIOptimizeExecMaskingPreRAID, &SIFormMemoryClausesID);
+ } else {
+ insertPass(&MachineSchedulerID, &SIFormMemoryClausesID);
+ }
// This must be run immediately after phi elimination and before
// TwoAddressInstructions, otherwise the processing of the tied operand of
// SI_ELSE will introduce a copy of the tied operand source after the else.
insertPass(&PHIEliminationID, &SILowerControlFlowID, false);
- // This must be run after SILowerControlFlow, since it needs to use the
- // machine-level CFG, but before register allocation.
- insertPass(&SILowerControlFlowID, &SIFixWWMLivenessID, false);
+ // This must be run just after RegisterCoalescing.
+ insertPass(&RegisterCoalescerID, &SIPreAllocateWWMRegsID, false);
+
+ if (EnableDCEInRA)
+ insertPass(&RenameIndependentSubregsID, &DeadMachineInstructionElimID);
- TargetPassConfig::addOptimizedRegAlloc(RegAllocPass);
+ TargetPassConfig::addOptimizedRegAlloc();
+}
+
+bool GCNPassConfig::addPreRewrite() {
+ if (EnableRegReassign) {
+ addPass(&GCNNSAReassignID);
+ addPass(&GCNRegBankReassignID);
+ }
+ return true;
}
void GCNPassConfig::addPostRegAlloc() {
@@ -894,6 +962,9 @@ void GCNPassConfig::addPostRegAlloc() {
if (getOptLevel() > CodeGenOpt::None)
addPass(&SIOptimizeExecMaskingID);
TargetPassConfig::addPostRegAlloc();
+
+ // Equivalent of PEI for SGPRs.
+ addPass(&SILowerSGPRSpillsID);
}
void GCNPassConfig::addPreSched2() {
@@ -919,10 +990,164 @@ void GCNPassConfig::addPreEmitPass() {
addPass(&PostRAHazardRecognizerID);
addPass(&SIInsertSkipsPassID);
- addPass(createSIDebuggerInsertNopsPass());
addPass(&BranchRelaxationPassID);
}
TargetPassConfig *GCNTargetMachine::createPassConfig(PassManagerBase &PM) {
return new GCNPassConfig(*this, PM);
}
+
+yaml::MachineFunctionInfo *GCNTargetMachine::createDefaultFuncInfoYAML() const {
+ return new yaml::SIMachineFunctionInfo();
+}
+
+yaml::MachineFunctionInfo *
+GCNTargetMachine::convertFuncInfoToYAML(const MachineFunction &MF) const {
+ const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ return new yaml::SIMachineFunctionInfo(*MFI,
+ *MF.getSubtarget().getRegisterInfo());
+}
+
+bool GCNTargetMachine::parseMachineFunctionInfo(
+ const yaml::MachineFunctionInfo &MFI_, PerFunctionMIParsingState &PFS,
+ SMDiagnostic &Error, SMRange &SourceRange) const {
+ const yaml::SIMachineFunctionInfo &YamlMFI =
+ reinterpret_cast<const yaml::SIMachineFunctionInfo &>(MFI_);
+ MachineFunction &MF = PFS.MF;
+ SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+
+ MFI->initializeBaseYamlFields(YamlMFI);
+
+ auto parseRegister = [&](const yaml::StringValue &RegName, unsigned &RegVal) {
+ if (parseNamedRegisterReference(PFS, RegVal, RegName.Value, Error)) {
+ SourceRange = RegName.SourceRange;
+ return true;
+ }
+
+ return false;
+ };
+
+ auto diagnoseRegisterClass = [&](const yaml::StringValue &RegName) {
+ // Create a diagnostic for a the register string literal.
+ const MemoryBuffer &Buffer =
+ *PFS.SM->getMemoryBuffer(PFS.SM->getMainFileID());
+ Error = SMDiagnostic(*PFS.SM, SMLoc(), Buffer.getBufferIdentifier(), 1,
+ RegName.Value.size(), SourceMgr::DK_Error,
+ "incorrect register class for field", RegName.Value,
+ None, None);
+ SourceRange = RegName.SourceRange;
+ return true;
+ };
+
+ if (parseRegister(YamlMFI.ScratchRSrcReg, MFI->ScratchRSrcReg) ||
+ parseRegister(YamlMFI.ScratchWaveOffsetReg, MFI->ScratchWaveOffsetReg) ||
+ parseRegister(YamlMFI.FrameOffsetReg, MFI->FrameOffsetReg) ||
+ parseRegister(YamlMFI.StackPtrOffsetReg, MFI->StackPtrOffsetReg))
+ return true;
+
+ if (MFI->ScratchRSrcReg != AMDGPU::PRIVATE_RSRC_REG &&
+ !AMDGPU::SReg_128RegClass.contains(MFI->ScratchRSrcReg)) {
+ return diagnoseRegisterClass(YamlMFI.ScratchRSrcReg);
+ }
+
+ if (MFI->ScratchWaveOffsetReg != AMDGPU::SCRATCH_WAVE_OFFSET_REG &&
+ !AMDGPU::SGPR_32RegClass.contains(MFI->ScratchWaveOffsetReg)) {
+ return diagnoseRegisterClass(YamlMFI.ScratchWaveOffsetReg);
+ }
+
+ if (MFI->FrameOffsetReg != AMDGPU::FP_REG &&
+ !AMDGPU::SGPR_32RegClass.contains(MFI->FrameOffsetReg)) {
+ return diagnoseRegisterClass(YamlMFI.FrameOffsetReg);
+ }
+
+ if (MFI->StackPtrOffsetReg != AMDGPU::SP_REG &&
+ !AMDGPU::SGPR_32RegClass.contains(MFI->StackPtrOffsetReg)) {
+ return diagnoseRegisterClass(YamlMFI.StackPtrOffsetReg);
+ }
+
+ auto parseAndCheckArgument = [&](const Optional<yaml::SIArgument> &A,
+ const TargetRegisterClass &RC,
+ ArgDescriptor &Arg, unsigned UserSGPRs,
+ unsigned SystemSGPRs) {
+ // Skip parsing if it's not present.
+ if (!A)
+ return false;
+
+ if (A->IsRegister) {
+ unsigned Reg;
+ if (parseNamedRegisterReference(PFS, Reg, A->RegisterName.Value, Error)) {
+ SourceRange = A->RegisterName.SourceRange;
+ return true;
+ }
+ if (!RC.contains(Reg))
+ return diagnoseRegisterClass(A->RegisterName);
+ Arg = ArgDescriptor::createRegister(Reg);
+ } else
+ Arg = ArgDescriptor::createStack(A->StackOffset);
+ // Check and apply the optional mask.
+ if (A->Mask)
+ Arg = ArgDescriptor::createArg(Arg, A->Mask.getValue());
+
+ MFI->NumUserSGPRs += UserSGPRs;
+ MFI->NumSystemSGPRs += SystemSGPRs;
+ return false;
+ };
+
+ if (YamlMFI.ArgInfo &&
+ (parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentBuffer,
+ AMDGPU::SReg_128RegClass,
+ MFI->ArgInfo.PrivateSegmentBuffer, 4, 0) ||
+ parseAndCheckArgument(YamlMFI.ArgInfo->DispatchPtr,
+ AMDGPU::SReg_64RegClass, MFI->ArgInfo.DispatchPtr,
+ 2, 0) ||
+ parseAndCheckArgument(YamlMFI.ArgInfo->QueuePtr, AMDGPU::SReg_64RegClass,
+ MFI->ArgInfo.QueuePtr, 2, 0) ||
+ parseAndCheckArgument(YamlMFI.ArgInfo->KernargSegmentPtr,
+ AMDGPU::SReg_64RegClass,
+ MFI->ArgInfo.KernargSegmentPtr, 2, 0) ||
+ parseAndCheckArgument(YamlMFI.ArgInfo->DispatchID,
+ AMDGPU::SReg_64RegClass, MFI->ArgInfo.DispatchID,
+ 2, 0) ||
+ parseAndCheckArgument(YamlMFI.ArgInfo->FlatScratchInit,
+ AMDGPU::SReg_64RegClass,
+ MFI->ArgInfo.FlatScratchInit, 2, 0) ||
+ parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentSize,
+ AMDGPU::SGPR_32RegClass,
+ MFI->ArgInfo.PrivateSegmentSize, 0, 0) ||
+ parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupIDX,
+ AMDGPU::SGPR_32RegClass, MFI->ArgInfo.WorkGroupIDX,
+ 0, 1) ||
+ parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupIDY,
+ AMDGPU::SGPR_32RegClass, MFI->ArgInfo.WorkGroupIDY,
+ 0, 1) ||
+ parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupIDZ,
+ AMDGPU::SGPR_32RegClass, MFI->ArgInfo.WorkGroupIDZ,
+ 0, 1) ||
+ parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupInfo,
+ AMDGPU::SGPR_32RegClass,
+ MFI->ArgInfo.WorkGroupInfo, 0, 1) ||
+ parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentWaveByteOffset,
+ AMDGPU::SGPR_32RegClass,
+ MFI->ArgInfo.PrivateSegmentWaveByteOffset, 0, 1) ||
+ parseAndCheckArgument(YamlMFI.ArgInfo->ImplicitArgPtr,
+ AMDGPU::SReg_64RegClass,
+ MFI->ArgInfo.ImplicitArgPtr, 0, 0) ||
+ parseAndCheckArgument(YamlMFI.ArgInfo->ImplicitBufferPtr,
+ AMDGPU::SReg_64RegClass,
+ MFI->ArgInfo.ImplicitBufferPtr, 2, 0) ||
+ parseAndCheckArgument(YamlMFI.ArgInfo->WorkItemIDX,
+ AMDGPU::VGPR_32RegClass,
+ MFI->ArgInfo.WorkItemIDX, 0, 0) ||
+ parseAndCheckArgument(YamlMFI.ArgInfo->WorkItemIDY,
+ AMDGPU::VGPR_32RegClass,
+ MFI->ArgInfo.WorkItemIDY, 0, 0) ||
+ parseAndCheckArgument(YamlMFI.ArgInfo->WorkItemIDZ,
+ AMDGPU::VGPR_32RegClass,
+ MFI->ArgInfo.WorkItemIDZ, 0, 0)))
+ return true;
+
+ MFI->Mode.IEEE = YamlMFI.Mode.IEEE;
+ MFI->Mode.DX10Clamp = YamlMFI.Mode.DX10Clamp;
+
+ return false;
+}
diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/lib/Target/AMDGPU/AMDGPUTargetMachine.h
index 62fbe71d1902..70fa3961236f 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetMachine.h
+++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.h
@@ -1,9 +1,8 @@
//===-- AMDGPUTargetMachine.h - AMDGPU TargetMachine Interface --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -15,7 +14,6 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETMACHINE_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETMACHINE_H
-#include "AMDGPUIntrinsicInfo.h"
#include "AMDGPUSubtarget.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/StringMap.h"
@@ -95,7 +93,6 @@ public:
class GCNTargetMachine final : public AMDGPUTargetMachine {
private:
- AMDGPUIntrinsicInfo IntrinsicInfo;
mutable StringMap<std::unique_ptr<GCNSubtarget>> SubtargetMap;
public:
@@ -110,13 +107,17 @@ public:
TargetTransformInfo getTargetTransformInfo(const Function &F) override;
- const AMDGPUIntrinsicInfo *getIntrinsicInfo() const override {
- return &IntrinsicInfo;
- }
-
bool useIPRA() const override {
return true;
}
+
+ yaml::MachineFunctionInfo *createDefaultFuncInfoYAML() const override;
+ yaml::MachineFunctionInfo *
+ convertFuncInfoToYAML(const MachineFunction &MF) const override;
+ bool parseMachineFunctionInfo(const yaml::MachineFunctionInfo &,
+ PerFunctionMIParsingState &PFS,
+ SMDiagnostic &Error,
+ SMRange &SourceRange) const override;
};
} // end namespace llvm
diff --git a/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp b/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp
index c4e1efde130b..6569980d2c75 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp
@@ -1,9 +1,8 @@
//===-- AMDGPUHSATargetObjectFile.cpp - AMDGPU Object Files ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AMDGPU/AMDGPUTargetObjectFile.h b/lib/Target/AMDGPU/AMDGPUTargetObjectFile.h
index a4ae1a2c18c2..819bebb7932d 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetObjectFile.h
+++ b/lib/Target/AMDGPU/AMDGPUTargetObjectFile.h
@@ -1,9 +1,8 @@
//===-- AMDGPUTargetObjectFile.h - AMDGPU Object Info ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 11e4ba4b5010..aaed280a1270 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -1,9 +1,8 @@
//===- AMDGPUTargetTransformInfo.cpp - AMDGPU specific TTI pass -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -118,8 +117,10 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
// Add a small bonus for each of such "if" statements.
if (const BranchInst *Br = dyn_cast<BranchInst>(&I)) {
if (UP.Threshold < MaxBoost && Br->isConditional()) {
- if (L->isLoopExiting(Br->getSuccessor(0)) ||
- L->isLoopExiting(Br->getSuccessor(1)))
+ BasicBlock *Succ0 = Br->getSuccessor(0);
+ BasicBlock *Succ1 = Br->getSuccessor(1);
+ if ((L->contains(Succ0) && L->isLoopExiting(Succ0)) ||
+ (L->contains(Succ1) && L->isLoopExiting(Succ1)))
continue;
if (dependsOnLocalPhi(L, Br->getCondition())) {
UP.Threshold += UnrollThresholdIf;
@@ -141,7 +142,7 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
unsigned Threshold = 0;
if (AS == AMDGPUAS::PRIVATE_ADDRESS)
Threshold = ThresholdPrivate;
- else if (AS == AMDGPUAS::LOCAL_ADDRESS)
+ else if (AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS)
Threshold = ThresholdLocal;
else
continue;
@@ -159,7 +160,8 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
unsigned AllocaSize = Ty->isSized() ? DL.getTypeAllocSize(Ty) : 0;
if (AllocaSize > MaxAlloca)
continue;
- } else if (AS == AMDGPUAS::LOCAL_ADDRESS) {
+ } else if (AS == AMDGPUAS::LOCAL_ADDRESS ||
+ AS == AMDGPUAS::REGION_ADDRESS) {
LocalGEPsSeen++;
// Inhibit unroll for local memory if we have seen addressing not to
// a variable, most likely we will be unable to combine it.
@@ -254,7 +256,8 @@ unsigned GCNTTIImpl::getStoreVectorFactor(unsigned VF, unsigned StoreSize,
unsigned GCNTTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const {
if (AddrSpace == AMDGPUAS::GLOBAL_ADDRESS ||
AddrSpace == AMDGPUAS::CONSTANT_ADDRESS ||
- AddrSpace == AMDGPUAS::CONSTANT_ADDRESS_32BIT) {
+ AddrSpace == AMDGPUAS::CONSTANT_ADDRESS_32BIT ||
+ AddrSpace == AMDGPUAS::BUFFER_FAT_POINTER) {
return 512;
}
@@ -308,6 +311,8 @@ bool GCNTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
switch (Inst->getIntrinsicID()) {
case Intrinsic::amdgcn_atomic_inc:
case Intrinsic::amdgcn_atomic_dec:
+ case Intrinsic::amdgcn_ds_ordered_add:
+ case Intrinsic::amdgcn_ds_ordered_swap:
case Intrinsic::amdgcn_ds_fadd:
case Intrinsic::amdgcn_ds_fmin:
case Intrinsic::amdgcn_ds_fmax: {
@@ -399,7 +404,7 @@ int GCNTTIImpl::getArithmeticInstrCost(
if (SLT == MVT::f64) {
int Cost = 4 * get64BitInstrCost() + 7 * getQuarterRateInstrCost();
// Add cost of workaround.
- if (ST->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS)
+ if (!ST->hasUsableDivScaleConditionOutput())
Cost += 3 * getFullRateInstrCost();
return LT.first * Cost * NElts;
@@ -577,6 +582,8 @@ bool GCNTTIImpl::isAlwaysUniform(const Value *V) const {
return false;
case Intrinsic::amdgcn_readfirstlane:
case Intrinsic::amdgcn_readlane:
+ case Intrinsic::amdgcn_icmp:
+ case Intrinsic::amdgcn_fcmp:
return true;
}
}
@@ -607,7 +614,7 @@ unsigned GCNTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
}
bool GCNTTIImpl::areInlineCompatible(const Function *Caller,
- const Function *Callee) const {
+ const Function *Callee) const {
const TargetMachine &TM = getTLI()->getTargetMachine();
const FeatureBitset &CallerBits =
TM.getSubtargetImpl(*Caller)->getFeatureBits();
@@ -616,7 +623,14 @@ bool GCNTTIImpl::areInlineCompatible(const Function *Caller,
FeatureBitset RealCallerBits = CallerBits & ~InlineFeatureIgnoreList;
FeatureBitset RealCalleeBits = CalleeBits & ~InlineFeatureIgnoreList;
- return ((RealCallerBits & RealCalleeBits) == RealCalleeBits);
+ if ((RealCallerBits & RealCalleeBits) != RealCalleeBits)
+ return false;
+
+ // FIXME: dx10_clamp can just take the caller setting, but there seems to be
+ // no way to support merge for backend defined attributes.
+ AMDGPU::SIModeRegisterDefaults CallerMode(*Caller);
+ AMDGPU::SIModeRegisterDefaults CalleeMode(*Callee);
+ return CallerMode.isInlineCompatible(CalleeMode);
}
void GCNTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
index 397c5c6fa6fb..6f1bf5a26f0d 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -1,9 +1,8 @@
//===- AMDGPUTargetTransformInfo.h - AMDGPU specific TTI --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -78,13 +77,16 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
AMDGPU::FeatureUnalignedScratchAccess,
AMDGPU::FeatureAutoWaitcntBeforeBarrier,
- AMDGPU::FeatureDebuggerEmitPrologue,
- AMDGPU::FeatureDebuggerInsertNops,
// Property of the kernel/environment which can't actually differ.
AMDGPU::FeatureSGPRInitBug,
AMDGPU::FeatureXNACK,
AMDGPU::FeatureTrapHandler,
+ AMDGPU::FeatureCodeObjectV3,
+
+ // The default assumption needs to be ecc is enabled, but no directly
+ // exposed operations depend on it, so it can be safely inlined.
+ AMDGPU::FeatureSRAMECC,
// Perf-tuning features
AMDGPU::FeatureFastFMAF32,
@@ -178,8 +180,7 @@ public:
// don't use flat addressing.
if (IsGraphicsShader)
return -1;
- return ST->hasFlatAddressSpace() ?
- AMDGPUAS::FLAT_ADDRESS : AMDGPUAS::UNKNOWN_ADDRESS_SPACE;
+ return AMDGPUAS::FLAT_ADDRESS;
}
unsigned getVectorSplitCost() { return 0; }
@@ -190,7 +191,9 @@ public:
bool areInlineCompatible(const Function *Caller,
const Function *Callee) const;
- unsigned getInliningThresholdMultiplier() { return 9; }
+ unsigned getInliningThresholdMultiplier() { return 7; }
+
+ int getInlinerVectorBonusPercent() { return 0; }
int getArithmeticReductionCost(unsigned Opcode,
Type *Ty,
diff --git a/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp b/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
index ced3f6f567e2..396e0ed2e76c 100644
--- a/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
+++ b/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
@@ -1,9 +1,8 @@
//===- AMDGPUUnifyDivergentExitNodes.cpp ----------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -199,14 +198,11 @@ bool AMDGPUUnifyDivergentExitNodes::runOnFunction(Function &F) {
BranchInst::Create(LoopHeaderBB, DummyReturnBB, BoolTrue, BB);
} else { // Conditional branch.
// Create a new transition block to hold the conditional branch.
- BasicBlock *TransitionBB = BasicBlock::Create(F.getContext(),
- "TransitionBlock", &F);
-
- // Move BI from BB to the new transition block.
- BI->removeFromParent();
- TransitionBB->getInstList().push_back(BI);
+ BasicBlock *TransitionBB = BB->splitBasicBlock(BI, "TransitionBlock");
- // Create a branch that will always branch to the transition block.
+ // Create a branch that will always branch to the transition block and
+ // references DummyReturnBB.
+ BB->getTerminator()->eraseFromParent();
BranchInst::Create(TransitionBB, DummyReturnBB, BoolTrue, BB);
}
}
diff --git a/lib/Target/AMDGPU/AMDGPUUnifyMetadata.cpp b/lib/Target/AMDGPU/AMDGPUUnifyMetadata.cpp
index 1f6d9234c1ed..d4401a22a1ad 100644
--- a/lib/Target/AMDGPU/AMDGPUUnifyMetadata.cpp
+++ b/lib/Target/AMDGPU/AMDGPUUnifyMetadata.cpp
@@ -1,9 +1,8 @@
//===- AMDGPUUnifyMetadata.cpp - Unify OpenCL metadata --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp b/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
index 11cd49e5b3dc..12f2e9519c9e 100644
--- a/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
+++ b/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
@@ -1,9 +1,8 @@
//===- AMDILCFGStructurizer.cpp - CFG Structurizer ------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//==-----------------------------------------------------------------------===//
diff --git a/lib/Target/AMDGPU/AMDKernelCodeT.h b/lib/Target/AMDGPU/AMDKernelCodeT.h
index 289642aaa2d0..3e658a144c1f 100644
--- a/lib/Target/AMDGPU/AMDKernelCodeT.h
+++ b/lib/Target/AMDGPU/AMDKernelCodeT.h
@@ -1,9 +1,8 @@
//===-- AMDGPUKernelCodeT.h - Print AMDGPU assembly code ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file AMDKernelCodeT.h
@@ -127,8 +126,12 @@ enum amd_code_property_mask_t {
AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z_WIDTH = 1,
AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z_SHIFT,
- AMD_CODE_PROPERTY_RESERVED1_SHIFT = 10,
- AMD_CODE_PROPERTY_RESERVED1_WIDTH = 6,
+ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_SHIFT = 10,
+ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_WIDTH = 1,
+ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32 = ((1 << AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_SHIFT,
+
+ AMD_CODE_PROPERTY_RESERVED1_SHIFT = 11,
+ AMD_CODE_PROPERTY_RESERVED1_WIDTH = 5,
AMD_CODE_PROPERTY_RESERVED1 = ((1 << AMD_CODE_PROPERTY_RESERVED1_WIDTH) - 1) << AMD_CODE_PROPERTY_RESERVED1_SHIFT,
/// Control wave ID base counter for GDS ordered-append. Used to set
diff --git a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 3f9af27a2e5e..6d678966c98e 100644
--- a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -1,9 +1,8 @@
//===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -13,6 +12,7 @@
#include "MCTargetDesc/AMDGPUTargetStreamer.h"
#include "SIDefines.h"
#include "SIInstrInfo.h"
+#include "TargetInfo/AMDGPUTargetInfo.h"
#include "Utils/AMDGPUAsmUtils.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "Utils/AMDKernelCodeTUtils.h"
@@ -69,7 +69,7 @@ namespace {
class AMDGPUAsmParser;
-enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_TTMP, IS_SPECIAL };
+enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
//===----------------------------------------------------------------------===//
// Operand
@@ -103,14 +103,14 @@ public:
int64_t getFPModifiersOperand() const {
int64_t Operand = 0;
- Operand |= Abs ? SISrcMods::ABS : 0;
- Operand |= Neg ? SISrcMods::NEG : 0;
+ Operand |= Abs ? SISrcMods::ABS : 0u;
+ Operand |= Neg ? SISrcMods::NEG : 0u;
return Operand;
}
int64_t getIntModifiersOperand() const {
int64_t Operand = 0;
- Operand |= Sext ? SISrcMods::SEXT : 0;
+ Operand |= Sext ? SISrcMods::SEXT : 0u;
return Operand;
}
@@ -140,21 +140,25 @@ public:
ImmTyInstOffset,
ImmTyOffset0,
ImmTyOffset1,
+ ImmTyDLC,
ImmTyGLC,
ImmTySLC,
ImmTyTFE,
ImmTyD16,
ImmTyClampSI,
ImmTyOModSI,
+ ImmTyDPP8,
ImmTyDppCtrl,
ImmTyDppRowMask,
ImmTyDppBankMask,
ImmTyDppBoundCtrl,
+ ImmTyDppFi,
ImmTySdwaDstSel,
ImmTySdwaSrc0Sel,
ImmTySdwaSrc1Sel,
ImmTySdwaDstUnused,
ImmTyDMask,
+ ImmTyDim,
ImmTyUNorm,
ImmTyDA,
ImmTyR128A16,
@@ -174,9 +178,15 @@ public:
ImmTyNegLo,
ImmTyNegHi,
ImmTySwizzle,
- ImmTyHigh
+ ImmTyGprIdxMode,
+ ImmTyHigh,
+ ImmTyBLGP,
+ ImmTyCBSZ,
+ ImmTyABID,
+ ImmTyEndpgm,
};
+private:
struct TokOp {
const char *Data;
unsigned Length;
@@ -191,7 +201,6 @@ public:
struct RegOp {
unsigned RegNo;
- bool IsForcedVOP3;
Modifiers Mods;
};
@@ -202,6 +211,7 @@ public:
const MCExpr *Expr;
};
+public:
bool isToken() const override {
if (Kind == Token)
return true;
@@ -231,32 +241,32 @@ public:
return isRegKind() && !hasModifiers();
}
- bool isRegOrImmWithInputMods(MVT type) const {
- return isRegKind() || isInlinableImm(type);
+ bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
+ return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
}
bool isRegOrImmWithInt16InputMods() const {
- return isRegOrImmWithInputMods(MVT::i16);
+ return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
}
bool isRegOrImmWithInt32InputMods() const {
- return isRegOrImmWithInputMods(MVT::i32);
+ return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
}
bool isRegOrImmWithInt64InputMods() const {
- return isRegOrImmWithInputMods(MVT::i64);
+ return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
}
bool isRegOrImmWithFP16InputMods() const {
- return isRegOrImmWithInputMods(MVT::f16);
+ return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
}
bool isRegOrImmWithFP32InputMods() const {
- return isRegOrImmWithInputMods(MVT::f32);
+ return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
}
bool isRegOrImmWithFP64InputMods() const {
- return isRegOrImmWithInputMods(MVT::f64);
+ return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
}
bool isVReg() const {
@@ -268,8 +278,12 @@ public:
isRegClass(AMDGPU::VReg_512RegClassID);
}
+ bool isVReg32() const {
+ return isRegClass(AMDGPU::VGPR_32RegClassID);
+ }
+
bool isVReg32OrOff() const {
- return isOff() || isRegClass(AMDGPU::VGPR_32RegClassID);
+ return isOff() || isVReg32();
}
bool isSDWAOperand(MVT type) const;
@@ -289,6 +303,7 @@ public:
bool isClampSI() const { return isImmTy(ImmTyClampSI); }
bool isOModSI() const { return isImmTy(ImmTyOModSI); }
bool isDMask() const { return isImmTy(ImmTyDMask); }
+ bool isDim() const { return isImmTy(ImmTyDim); }
bool isUNorm() const { return isImmTy(ImmTyUNorm); }
bool isDA() const { return isImmTy(ImmTyDA); }
bool isR128A16() const { return isImmTy(ImmTyR128A16); }
@@ -301,13 +316,13 @@ public:
bool isIdxen() const { return isImmTy(ImmTyIdxen); }
bool isAddr64() const { return isImmTy(ImmTyAddr64); }
bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
- bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<16>(getImm()); }
+ bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
- bool isOffsetU12() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isUInt<12>(getImm()); }
- bool isOffsetS13() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isInt<13>(getImm()); }
+ bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
bool isGDS() const { return isImmTy(ImmTyGDS); }
bool isLDS() const { return isImmTy(ImmTyLDS); }
+ bool isDLC() const { return isImmTy(ImmTyDLC); }
bool isGLC() const { return isImmTy(ImmTyGLC); }
bool isSLC() const { return isImmTy(ImmTySLC); }
bool isTFE() const { return isImmTy(ImmTyTFE); }
@@ -316,6 +331,7 @@ public:
bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
+ bool isFI() const { return isImmTy(ImmTyDppFi); }
bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
@@ -339,6 +355,8 @@ public:
bool isRegClass(unsigned RCID) const;
+ bool isInlineValue() const;
+
bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
}
@@ -359,6 +377,8 @@ public:
return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
}
+ bool isBoolReg() const;
+
bool isSCSrcF16() const {
return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
}
@@ -411,6 +431,11 @@ public:
return isSSrcF16();
}
+ bool isSSrcOrLdsB32() const {
+ return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
+ isLiteralImm(MVT::i32) || isExpr();
+ }
+
bool isVCSrcB32() const {
return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
}
@@ -456,8 +481,7 @@ public:
}
bool isVSrcV2B16() const {
- llvm_unreachable("cannot happen");
- return isVSrcB16();
+ return isVSrcB16() || isLiteralImm(MVT::v2i16);
}
bool isVSrcF32() const {
@@ -473,8 +497,127 @@ public:
}
bool isVSrcV2F16() const {
- llvm_unreachable("cannot happen");
- return isVSrcF16();
+ return isVSrcF16() || isLiteralImm(MVT::v2f16);
+ }
+
+ bool isVISrcB32() const {
+ return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
+ }
+
+ bool isVISrcB16() const {
+ return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
+ }
+
+ bool isVISrcV2B16() const {
+ return isVISrcB16();
+ }
+
+ bool isVISrcF32() const {
+ return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
+ }
+
+ bool isVISrcF16() const {
+ return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
+ }
+
+ bool isVISrcV2F16() const {
+ return isVISrcF16() || isVISrcB32();
+ }
+
+ bool isAISrcB32() const {
+ return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
+ }
+
+ bool isAISrcB16() const {
+ return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
+ }
+
+ bool isAISrcV2B16() const {
+ return isAISrcB16();
+ }
+
+ bool isAISrcF32() const {
+ return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
+ }
+
+ bool isAISrcF16() const {
+ return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
+ }
+
+ bool isAISrcV2F16() const {
+ return isAISrcF16() || isAISrcB32();
+ }
+
+ bool isAISrc_128B32() const {
+ return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
+ }
+
+ bool isAISrc_128B16() const {
+ return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
+ }
+
+ bool isAISrc_128V2B16() const {
+ return isAISrc_128B16();
+ }
+
+ bool isAISrc_128F32() const {
+ return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
+ }
+
+ bool isAISrc_128F16() const {
+ return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
+ }
+
+ bool isAISrc_128V2F16() const {
+ return isAISrc_128F16() || isAISrc_128B32();
+ }
+
+ bool isAISrc_512B32() const {
+ return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
+ }
+
+ bool isAISrc_512B16() const {
+ return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
+ }
+
+ bool isAISrc_512V2B16() const {
+ return isAISrc_512B16();
+ }
+
+ bool isAISrc_512F32() const {
+ return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
+ }
+
+ bool isAISrc_512F16() const {
+ return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
+ }
+
+ bool isAISrc_512V2F16() const {
+ return isAISrc_512F16() || isAISrc_512B32();
+ }
+
+ bool isAISrc_1024B32() const {
+ return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
+ }
+
+ bool isAISrc_1024B16() const {
+ return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
+ }
+
+ bool isAISrc_1024V2B16() const {
+ return isAISrc_1024B16();
+ }
+
+ bool isAISrc_1024F32() const {
+ return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
+ }
+
+ bool isAISrc_1024F16() const {
+ return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
+ }
+
+ bool isAISrc_1024V2F16() const {
+ return isAISrc_1024F16() || isAISrc_1024B32();
}
bool isKImmFP32() const {
@@ -504,10 +647,15 @@ public:
bool isSMRDOffset8() const;
bool isSMRDOffset20() const;
bool isSMRDLiteralOffset() const;
+ bool isDPP8() const;
bool isDPPCtrl() const;
+ bool isBLGP() const;
+ bool isCBSZ() const;
+ bool isABID() const;
bool isGPRIdxMode() const;
bool isS16Imm() const;
bool isU16Imm() const;
+ bool isEndpgm() const;
StringRef getExpressionAsToken() const {
assert(isExpr());
@@ -535,6 +683,7 @@ public:
}
unsigned getReg() const override {
+ assert(isRegKind());
return Reg.RegNo;
}
@@ -594,6 +743,10 @@ public:
void addRegOperands(MCInst &Inst, unsigned N) const;
+ void addBoolRegOperands(MCInst &Inst, unsigned N) const {
+ addRegOperands(Inst, N);
+ }
+
void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
if (isRegKind())
addRegOperands(Inst, N);
@@ -661,6 +814,7 @@ public:
case ImmTyInstOffset: OS << "InstOffset"; break;
case ImmTyOffset0: OS << "Offset0"; break;
case ImmTyOffset1: OS << "Offset1"; break;
+ case ImmTyDLC: OS << "DLC"; break;
case ImmTyGLC: OS << "GLC"; break;
case ImmTySLC: OS << "SLC"; break;
case ImmTyTFE: OS << "TFE"; break;
@@ -668,15 +822,18 @@ public:
case ImmTyFORMAT: OS << "FORMAT"; break;
case ImmTyClampSI: OS << "ClampSI"; break;
case ImmTyOModSI: OS << "OModSI"; break;
+ case ImmTyDPP8: OS << "DPP8"; break;
case ImmTyDppCtrl: OS << "DppCtrl"; break;
case ImmTyDppRowMask: OS << "DppRowMask"; break;
case ImmTyDppBankMask: OS << "DppBankMask"; break;
case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
+ case ImmTyDppFi: OS << "FI"; break;
case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
case ImmTyDMask: OS << "DMask"; break;
+ case ImmTyDim: OS << "Dim"; break;
case ImmTyUNorm: OS << "UNorm"; break;
case ImmTyDA: OS << "DA"; break;
case ImmTyR128A16: OS << "R128A16"; break;
@@ -695,7 +852,12 @@ public:
case ImmTyNegLo: OS << "NegLo"; break;
case ImmTyNegHi: OS << "NegHi"; break;
case ImmTySwizzle: OS << "Swizzle"; break;
+ case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
case ImmTyHigh: OS << "High"; break;
+ case ImmTyBLGP: OS << "BLGP"; break;
+ case ImmTyCBSZ: OS << "CBSZ"; break;
+ case ImmTyABID: OS << "ABID"; break;
+ case ImmTyEndpgm: OS << "Endpgm"; break;
}
}
@@ -747,12 +909,10 @@ public:
static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
unsigned RegNo, SMLoc S,
- SMLoc E,
- bool ForceVOP3) {
+ SMLoc E) {
auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser);
Op->Reg.RegNo = RegNo;
Op->Reg.Mods = Modifiers();
- Op->Reg.IsForcedVOP3 = ForceVOP3;
Op->StartLoc = S;
Op->EndLoc = E;
return Op;
@@ -817,6 +977,7 @@ public:
void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
switch (RegKind) {
case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
+ case IS_AGPR: // fall through
case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
default: break;
}
@@ -853,6 +1014,8 @@ private:
/// \param VCCUsed [in] Whether VCC special SGPR is reserved.
/// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
/// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
+ /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
+ /// descriptor field, if valid.
/// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
/// \param VGPRRange [in] Token range, used for VGPR diagnostics.
/// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
@@ -861,9 +1024,10 @@ private:
/// \param SGPRBlocks [out] Result SGPR block count.
bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
bool FlatScrUsed, bool XNACKUsed,
- unsigned NextFreeVGPR, SMRange VGPRRange,
- unsigned NextFreeSGPR, SMRange SGPRRange,
- unsigned &VGPRBlocks, unsigned &SGPRBlocks);
+ Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
+ SMRange VGPRRange, unsigned NextFreeSGPR,
+ SMRange SGPRRange, unsigned &VGPRBlocks,
+ unsigned &SGPRBlocks);
bool ParseDirectiveAMDGCNTarget();
bool ParseDirectiveAMDHSAKernel();
bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
@@ -876,7 +1040,15 @@ private:
bool ParseDirectiveISAVersion();
bool ParseDirectiveHSAMetadata();
+ bool ParseDirectivePALMetadataBegin();
bool ParseDirectivePALMetadata();
+ bool ParseDirectiveAMDGPULDS();
+
+ /// Common code to parse out a block of text (typically YAML) between start and
+ /// end directives.
+ bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
+ const char *AssemblerDirectiveEnd,
+ std::string &CollectString);
bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
RegisterKind RegKind, unsigned Reg1,
@@ -884,6 +1056,8 @@ private:
bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
unsigned& RegNum, unsigned& RegWidth,
unsigned *DwordRegIndex);
+ bool isRegister();
+ bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
void initializeGprCountSymbol(RegisterKind RegKind);
bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
@@ -897,6 +1071,10 @@ public:
enum AMDGPUMatchResultTy {
Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
};
+ enum OperandMode {
+ OperandMode_Default,
+ OperandMode_NSA,
+ };
using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
@@ -908,7 +1086,7 @@ public:
if (getFeatureBits().none()) {
// Set default features.
- copySTI().ToggleFeature("SOUTHERN_ISLANDS");
+ copySTI().ToggleFeature("southern-islands");
}
setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
@@ -924,6 +1102,10 @@ public:
MCSymbol *Sym =
Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
+ Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
+ Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
+ Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
+ Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
} else {
MCSymbol *Sym =
Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
@@ -969,6 +1151,10 @@ public:
return AMDGPU::isGFX9(getSTI());
}
+ bool isGFX10() const {
+ return AMDGPU::isGFX10(getSTI());
+ }
+
bool hasInv2PiInlineImm() const {
return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
}
@@ -978,7 +1164,11 @@ public:
}
bool hasSGPR102_SGPR103() const {
- return !isVI();
+ return !isVI() && !isGFX9();
+ }
+
+ bool hasSGPR104_SGPR105() const {
+ return isGFX10();
}
bool hasIntClamp() const {
@@ -1024,7 +1214,8 @@ public:
uint64_t &ErrorInfo,
bool MatchingInlineAsm) override;
bool ParseDirective(AsmToken DirectiveID) override;
- OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic);
+ OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
+ OperandMode Mode = OperandMode_Default);
StringRef parseMnemonicSuffix(StringRef Name);
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;
@@ -1037,11 +1228,11 @@ public:
AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
bool (*ConvertResult)(int64_t &) = nullptr);
- OperandMatchResultTy parseOperandArrayWithPrefix(
- const char *Prefix,
- OperandVector &Operands,
- AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
- bool (*ConvertResult)(int64_t&) = nullptr);
+ OperandMatchResultTy
+ parseOperandArrayWithPrefix(const char *Prefix,
+ OperandVector &Operands,
+ AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
+ bool (*ConvertResult)(int64_t&) = nullptr);
OperandMatchResultTy
parseNamedBit(const char *Name, OperandVector &Operands,
@@ -1049,10 +1240,15 @@ public:
OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
StringRef &Value);
- bool parseAbsoluteExpr(int64_t &Val, bool AbsMod = false);
- OperandMatchResultTy parseImm(OperandVector &Operands, bool AbsMod = false);
+ bool isModifier();
+ bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
+ bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
+ bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
+ bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
+ bool parseSP3NegModifier();
+ OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
OperandMatchResultTy parseReg(OperandVector &Operands);
- OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool AbsMod = false);
+ OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
@@ -1073,33 +1269,63 @@ private:
struct OperandInfoTy {
int64_t Id;
bool IsSymbolic = false;
+ bool IsDefined = false;
OperandInfoTy(int64_t Id_) : Id(Id_) {}
};
- bool parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId);
- bool parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
+ bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
+ bool validateSendMsg(const OperandInfoTy &Msg,
+ const OperandInfoTy &Op,
+ const OperandInfoTy &Stream,
+ const SMLoc Loc);
+
+ bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
+ bool validateHwreg(const OperandInfoTy &HwReg,
+ const int64_t Offset,
+ const int64_t Width,
+ const SMLoc Loc);
void errorExpTgt();
OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
+ SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
- bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc);
+ bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
+ bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
+ bool validateSOPLiteral(const MCInst &Inst) const;
bool validateConstantBusLimitations(const MCInst &Inst);
bool validateEarlyClobberLimitations(const MCInst &Inst);
bool validateIntClampSupported(const MCInst &Inst);
bool validateMIMGAtomicDMask(const MCInst &Inst);
bool validateMIMGGatherDMask(const MCInst &Inst);
bool validateMIMGDataSize(const MCInst &Inst);
+ bool validateMIMGAddrSize(const MCInst &Inst);
bool validateMIMGD16(const MCInst &Inst);
+ bool validateMIMGDim(const MCInst &Inst);
+ bool validateLdsDirect(const MCInst &Inst);
+ bool validateOpSel(const MCInst &Inst);
+ bool validateVccOperand(unsigned Reg) const;
+ bool validateVOP3Literal(const MCInst &Inst) const;
bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
+ bool isId(const StringRef Id) const;
+ bool isId(const AsmToken &Token, const StringRef Id) const;
+ bool isToken(const AsmToken::TokenKind Kind) const;
bool trySkipId(const StringRef Id);
+ bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
bool trySkipToken(const AsmToken::TokenKind Kind);
bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
+ void peekTokens(MutableArrayRef<AsmToken> Tokens);
+ AsmToken::TokenKind getTokenKind() const;
bool parseExpr(int64_t &Imm);
+ StringRef getTokenStr() const;
+ AsmToken peekToken();
+ AsmToken getToken() const;
+ SMLoc getLoc() const;
+ void lex();
public:
OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
@@ -1110,6 +1336,7 @@ public:
OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
+ OperandMatchResultTy parseBoolReg(OperandVector &Operands);
bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
const unsigned MinVal,
@@ -1124,20 +1351,23 @@ public:
bool parseSwizzleSwap(int64_t &Imm);
bool parseSwizzleReverse(int64_t &Imm);
+ OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
+ int64_t parseGPRIdxMacro();
+
void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
+ AMDGPUOperand::Ptr defaultDLC() const;
AMDGPUOperand::Ptr defaultGLC() const;
AMDGPUOperand::Ptr defaultSLC() const;
AMDGPUOperand::Ptr defaultSMRDOffset8() const;
AMDGPUOperand::Ptr defaultSMRDOffset20() const;
AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
- AMDGPUOperand::Ptr defaultOffsetU12() const;
- AMDGPUOperand::Ptr defaultOffsetS13() const;
+ AMDGPUOperand::Ptr defaultFlatOffset() const;
OperandMatchResultTy parseOModOperand(OperandVector &Operands);
@@ -1153,11 +1383,15 @@ public:
bool IsAtomic = false);
void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
+ OperandMatchResultTy parseDim(OperandVector &Operands);
+ OperandMatchResultTy parseDPP8(OperandVector &Operands);
OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
AMDGPUOperand::Ptr defaultRowMask() const;
AMDGPUOperand::Ptr defaultBankMask() const;
AMDGPUOperand::Ptr defaultBoundCtrl() const;
- void cvtDPP(MCInst &Inst, const OperandVector &Operands);
+ AMDGPUOperand::Ptr defaultFI() const;
+ void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
+ void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
AMDGPUOperand::ImmTy Type);
@@ -1168,6 +1402,13 @@ public:
void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
uint64_t BasicInstType, bool skipVcc = false);
+
+ AMDGPUOperand::Ptr defaultBLGP() const;
+ AMDGPUOperand::Ptr defaultCBSZ() const;
+ AMDGPUOperand::Ptr defaultABID() const;
+
+ OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
+ AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
};
struct OptionalOperand {
@@ -1203,6 +1444,8 @@ static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
case AMDGPU::OPERAND_REG_IMM_FP32:
case AMDGPU::OPERAND_REG_INLINE_C_INT32:
case AMDGPU::OPERAND_REG_INLINE_C_FP32:
+ case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
+ case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
return &APFloat::IEEEsingle();
case AMDGPU::OPERAND_REG_IMM_INT64:
case AMDGPU::OPERAND_REG_IMM_FP64:
@@ -1215,6 +1458,12 @@ static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
+ case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
+ case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
+ case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
+ case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
+ case AMDGPU::OPERAND_REG_IMM_V2INT16:
+ case AMDGPU::OPERAND_REG_IMM_V2FP16:
return &APFloat::IEEEhalf();
default:
llvm_unreachable("unsupported fp type");
@@ -1243,7 +1492,20 @@ static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
return true;
}
+static bool isSafeTruncation(int64_t Val, unsigned Size) {
+ return isUIntN(Size, Val) || isIntN(Size, Val);
+}
+
bool AMDGPUOperand::isInlinableImm(MVT type) const {
+
+ // This is a hack to enable named inline values like
+ // shared_base with both 32-bit and 64-bit operands.
+ // Note that these values are defined as
+ // 32-bit operands only.
+ if (isInlineValue()) {
+ return true;
+ }
+
if (!isImmTy(ImmTyNone)) {
// Only plain immediates are inlinable (e.g. "clamp" attribute is not)
return false;
@@ -1282,6 +1544,10 @@ bool AMDGPUOperand::isInlinableImm(MVT type) const {
AsmParser->hasInv2PiInlineImm());
}
+ if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
+ return false;
+ }
+
if (type.getScalarSizeInBits() == 16) {
return AMDGPU::isInlinableLiteral16(
static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
@@ -1315,7 +1581,7 @@ bool AMDGPUOperand::isLiteralImm(MVT type) const {
// FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
// types.
- return isUIntN(Size, Imm.Val) || isIntN(Size, Imm.Val);
+ return isSafeTruncation(Imm.Val, Size);
}
// We got fp literal token
@@ -1330,8 +1596,14 @@ bool AMDGPUOperand::isLiteralImm(MVT type) const {
return false;
}
+ // We allow fp literals with f16x2 operands assuming that the specified
+ // literal goes into the lower half and the upper half is zero. We also
+ // require that the literal may be losslesly converted to f16.
+ MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
+ (type == MVT::v2i16)? MVT::i16 : type;
+
APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
- return canLosslesslyConvertToFPType(FPLiteral, type);
+ return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
}
bool AMDGPUOperand::isRegClass(unsigned RCID) const {
@@ -1340,9 +1612,9 @@ bool AMDGPUOperand::isRegClass(unsigned RCID) const {
bool AMDGPUOperand::isSDWAOperand(MVT type) const {
if (AsmParser->isVI())
- return isVReg();
- else if (AsmParser->isGFX9())
- return isRegKind() || isInlinableImm(type);
+ return isVReg32();
+ else if (AsmParser->isGFX9() || AsmParser->isGFX10())
+ return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
else
return false;
}
@@ -1363,6 +1635,11 @@ bool AMDGPUOperand::isSDWAInt32Operand() const {
return isSDWAOperand(MVT::i32);
}
+bool AMDGPUOperand::isBoolReg() const {
+ return AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] ?
+ isSCSrcB64() : isSCSrcB32();
+}
+
uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
{
assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
@@ -1441,12 +1718,20 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
case AMDGPU::OPERAND_REG_IMM_FP32:
case AMDGPU::OPERAND_REG_INLINE_C_INT32:
case AMDGPU::OPERAND_REG_INLINE_C_FP32:
+ case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
+ case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
case AMDGPU::OPERAND_REG_IMM_INT16:
case AMDGPU::OPERAND_REG_IMM_FP16:
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
- case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
+ case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
+ case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
+ case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
+ case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
+ case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
+ case AMDGPU::OPERAND_REG_IMM_V2INT16:
+ case AMDGPU::OPERAND_REG_IMM_V2FP16: {
bool lost;
APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
// Convert literal to single precision
@@ -1456,11 +1741,6 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
// checked earlier in isLiteralImm()
uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
- if (OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
- OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) {
- ImmVal |= (ImmVal << 16);
- }
-
Inst.addOperand(MCOperand::createImm(ImmVal));
return;
}
@@ -1471,15 +1751,18 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
return;
}
- // We got int literal token.
+ // We got int literal token.
// Only sign extend inline immediates.
- // FIXME: No errors on truncation
switch (OpTy) {
case AMDGPU::OPERAND_REG_IMM_INT32:
case AMDGPU::OPERAND_REG_IMM_FP32:
case AMDGPU::OPERAND_REG_INLINE_C_INT32:
case AMDGPU::OPERAND_REG_INLINE_C_FP32:
- if (isInt<32>(Val) &&
+ case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
+ case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
+ case AMDGPU::OPERAND_REG_IMM_V2INT16:
+ case AMDGPU::OPERAND_REG_IMM_V2FP16:
+ if (isSafeTruncation(Val, 32) &&
AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
AsmParser->hasInv2PiInlineImm())) {
Inst.addOperand(MCOperand::createImm(Val));
@@ -1505,7 +1788,9 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
case AMDGPU::OPERAND_REG_IMM_FP16:
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
- if (isInt<16>(Val) &&
+ case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
+ case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
+ if (isSafeTruncation(Val, 16) &&
AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
AsmParser->hasInv2PiInlineImm())) {
Inst.addOperand(MCOperand::createImm(Val));
@@ -1516,14 +1801,14 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
return;
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
- case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
- auto LiteralVal = static_cast<uint16_t>(Literal.getLoBits(16).getZExtValue());
- assert(AMDGPU::isInlinableLiteral16(LiteralVal,
+ case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
+ case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
+ case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
+ assert(isSafeTruncation(Val, 16));
+ assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
AsmParser->hasInv2PiInlineImm()));
- uint32_t ImmVal = static_cast<uint32_t>(LiteralVal) << 16 |
- static_cast<uint32_t>(LiteralVal);
- Inst.addOperand(MCOperand::createImm(ImmVal));
+ Inst.addOperand(MCOperand::createImm(Val));
return;
}
default:
@@ -1552,6 +1837,27 @@ void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
}
+static bool isInlineValue(unsigned Reg) {
+ switch (Reg) {
+ case AMDGPU::SRC_SHARED_BASE:
+ case AMDGPU::SRC_SHARED_LIMIT:
+ case AMDGPU::SRC_PRIVATE_BASE:
+ case AMDGPU::SRC_PRIVATE_LIMIT:
+ case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
+ return true;
+ case AMDGPU::SRC_VCCZ:
+ case AMDGPU::SRC_EXECZ:
+ case AMDGPU::SRC_SCC:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool AMDGPUOperand::isInlineValue() const {
+ return isRegKind() && ::isInlineValue(getReg());
+}
+
//===----------------------------------------------------------------------===//
// AsmParser
//===----------------------------------------------------------------------===//
@@ -1585,6 +1891,15 @@ static int getRegClass(RegisterKind Is, unsigned RegWidth) {
case 8: return AMDGPU::SGPR_256RegClassID;
case 16: return AMDGPU::SGPR_512RegClassID;
}
+ } else if (Is == IS_AGPR) {
+ switch (RegWidth) {
+ default: return -1;
+ case 1: return AMDGPU::AGPR_32RegClassID;
+ case 2: return AMDGPU::AReg_64RegClassID;
+ case 4: return AMDGPU::AReg_128RegClassID;
+ case 16: return AMDGPU::AReg_512RegClassID;
+ case 32: return AMDGPU::AReg_1024RegClassID;
+ }
}
return -1;
}
@@ -1595,8 +1910,25 @@ static unsigned getSpecialRegForName(StringRef RegName) {
.Case("vcc", AMDGPU::VCC)
.Case("flat_scratch", AMDGPU::FLAT_SCR)
.Case("xnack_mask", AMDGPU::XNACK_MASK)
+ .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
+ .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
+ .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
+ .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
+ .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
+ .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
+ .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
+ .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
+ .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
+ .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
+ .Case("lds_direct", AMDGPU::LDS_DIRECT)
+ .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
.Case("m0", AMDGPU::M0)
- .Case("scc", AMDGPU::SCC)
+ .Case("vccz", AMDGPU::SRC_VCCZ)
+ .Case("src_vccz", AMDGPU::SRC_VCCZ)
+ .Case("execz", AMDGPU::SRC_EXECZ)
+ .Case("src_execz", AMDGPU::SRC_EXECZ)
+ .Case("scc", AMDGPU::SRC_SCC)
+ .Case("src_scc", AMDGPU::SRC_SCC)
.Case("tba", AMDGPU::TBA)
.Case("tma", AMDGPU::TMA)
.Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
@@ -1611,6 +1943,7 @@ static unsigned getSpecialRegForName(StringRef RegName) {
.Case("tma_hi", AMDGPU::TMA_HI)
.Case("tba_lo", AMDGPU::TBA_LO)
.Case("tba_hi", AMDGPU::TBA_HI)
+ .Case("null", AMDGPU::SGPR_NULL)
.Default(0);
}
@@ -1663,6 +1996,7 @@ bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
return false;
case IS_VGPR:
case IS_SGPR:
+ case IS_AGPR:
case IS_TTMP:
if (Reg1 != Reg + RegWidth) {
return false;
@@ -1674,6 +2008,53 @@ bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
}
}
+static const StringRef Registers[] = {
+ { "v" },
+ { "s" },
+ { "ttmp" },
+ { "acc" },
+ { "a" },
+};
+
+bool
+AMDGPUAsmParser::isRegister(const AsmToken &Token,
+ const AsmToken &NextToken) const {
+
+ // A list of consecutive registers: [s0,s1,s2,s3]
+ if (Token.is(AsmToken::LBrac))
+ return true;
+
+ if (!Token.is(AsmToken::Identifier))
+ return false;
+
+ // A single register like s0 or a range of registers like s[0:1]
+
+ StringRef RegName = Token.getString();
+
+ for (StringRef Reg : Registers) {
+ if (RegName.startswith(Reg)) {
+ if (Reg.size() < RegName.size()) {
+ unsigned RegNum;
+ // A single register with an index: rXX
+ if (!RegName.substr(Reg.size()).getAsInteger(10, RegNum))
+ return true;
+ } else {
+ // A range of registers: r[XX:YY].
+ if (NextToken.is(AsmToken::LBrac))
+ return true;
+ }
+ }
+ }
+
+ return getSpecialRegForName(RegName);
+}
+
+bool
+AMDGPUAsmParser::isRegister()
+{
+ return isRegister(getToken(), peekToken());
+}
+
bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
unsigned &RegNum, unsigned &RegWidth,
unsigned *DwordRegIndex) {
@@ -1692,6 +2073,9 @@ bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
} else if (RegName[0] == 's') {
RegNumIndex = 1;
RegKind = IS_SGPR;
+ } else if (RegName[0] == 'a') {
+ RegNumIndex = RegName.startswith("acc") ? 3 : 1;
+ RegKind = IS_AGPR;
} else if (RegName.startswith("ttmp")) {
RegNumIndex = strlen("ttmp");
RegKind = IS_TTMP;
@@ -1773,6 +2157,7 @@ bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
break;
case IS_VGPR:
case IS_SGPR:
+ case IS_AGPR:
case IS_TTMP:
{
unsigned Size = 1;
@@ -1859,6 +2244,8 @@ std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
unsigned Reg, RegNum, RegWidth, DwordRegIndex;
if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) {
+ //FIXME: improve error messages (bug 41303).
+ Error(StartLoc, "not a valid operand.");
return nullptr;
}
if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
@@ -1866,201 +2253,260 @@ std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
return nullptr;
} else
KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth);
- return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc, false);
+ return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
}
-bool
-AMDGPUAsmParser::parseAbsoluteExpr(int64_t &Val, bool AbsMod) {
- if (AbsMod && getLexer().peekTok().is(AsmToken::Pipe) &&
- (getLexer().getKind() == AsmToken::Integer ||
- getLexer().getKind() == AsmToken::Real)) {
- // This is a workaround for handling operands like these:
- // |1.0|
- // |-1|
- // This syntax is not compatible with syntax of standard
- // MC expressions (due to the trailing '|').
-
- SMLoc EndLoc;
- const MCExpr *Expr;
+OperandMatchResultTy
+AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
+ // TODO: add syntactic sugar for 1/(2*PI)
- if (getParser().parsePrimaryExpr(Expr, EndLoc)) {
- return true;
- }
+ assert(!isRegister());
+ assert(!isModifier());
- return !Expr->evaluateAsAbsolute(Val);
+ const auto& Tok = getToken();
+ const auto& NextTok = peekToken();
+ bool IsReal = Tok.is(AsmToken::Real);
+ SMLoc S = getLoc();
+ bool Negate = false;
+
+ if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
+ lex();
+ IsReal = true;
+ Negate = true;
}
- return getParser().parseAbsoluteExpression(Val);
-}
+ if (IsReal) {
+ // Floating-point expressions are not supported.
+ // Can only allow floating-point literals with an
+ // optional sign.
-OperandMatchResultTy
-AMDGPUAsmParser::parseImm(OperandVector &Operands, bool AbsMod) {
- // TODO: add syntactic sugar for 1/(2*PI)
- bool Minus = false;
- if (getLexer().getKind() == AsmToken::Minus) {
- const AsmToken NextToken = getLexer().peekTok();
- if (!NextToken.is(AsmToken::Integer) &&
- !NextToken.is(AsmToken::Real)) {
- return MatchOperand_NoMatch;
- }
- Minus = true;
- Parser.Lex();
- }
+ StringRef Num = getTokenStr();
+ lex();
- SMLoc S = Parser.getTok().getLoc();
- switch(getLexer().getKind()) {
- case AsmToken::Integer: {
- int64_t IntVal;
- if (parseAbsoluteExpr(IntVal, AbsMod))
+ APFloat RealVal(APFloat::IEEEdouble());
+ auto roundMode = APFloat::rmNearestTiesToEven;
+ if (RealVal.convertFromString(Num, roundMode) == APFloat::opInvalidOp) {
return MatchOperand_ParseFail;
- if (Minus)
- IntVal *= -1;
- Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
+ }
+ if (Negate)
+ RealVal.changeSign();
+
+ Operands.push_back(
+ AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
+ AMDGPUOperand::ImmTyNone, true));
+
return MatchOperand_Success;
- }
- case AsmToken::Real: {
+
+ } else {
int64_t IntVal;
- if (parseAbsoluteExpr(IntVal, AbsMod))
- return MatchOperand_ParseFail;
+ const MCExpr *Expr;
+ SMLoc S = getLoc();
+
+ if (HasSP3AbsModifier) {
+ // This is a workaround for handling expressions
+ // as arguments of SP3 'abs' modifier, for example:
+ // |1.0|
+ // |-1|
+ // |1+x|
+ // This syntax is not compatible with syntax of standard
+ // MC expressions (due to the trailing '|').
+ SMLoc EndLoc;
+ if (getParser().parsePrimaryExpr(Expr, EndLoc))
+ return MatchOperand_ParseFail;
+ } else {
+ if (Parser.parseExpression(Expr))
+ return MatchOperand_ParseFail;
+ }
+
+ if (Expr->evaluateAsAbsolute(IntVal)) {
+ Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
+ } else {
+ Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
+ }
- APFloat F(BitsToDouble(IntVal));
- if (Minus)
- F.changeSign();
- Operands.push_back(
- AMDGPUOperand::CreateImm(this, F.bitcastToAPInt().getZExtValue(), S,
- AMDGPUOperand::ImmTyNone, true));
return MatchOperand_Success;
}
- default:
- return MatchOperand_NoMatch;
- }
+
+ return MatchOperand_NoMatch;
}
OperandMatchResultTy
AMDGPUAsmParser::parseReg(OperandVector &Operands) {
+ if (!isRegister())
+ return MatchOperand_NoMatch;
+
if (auto R = parseRegister()) {
assert(R->isReg());
- R->Reg.IsForcedVOP3 = isForcedVOP3();
Operands.push_back(std::move(R));
return MatchOperand_Success;
}
- return MatchOperand_NoMatch;
+ return MatchOperand_ParseFail;
}
OperandMatchResultTy
-AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool AbsMod) {
- auto res = parseImm(Operands, AbsMod);
+AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
+ auto res = parseReg(Operands);
if (res != MatchOperand_NoMatch) {
return res;
+ } else if (isModifier()) {
+ return MatchOperand_NoMatch;
+ } else {
+ return parseImm(Operands, HasSP3AbsMod);
}
+}
- return parseReg(Operands);
+bool
+AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
+ if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
+ const auto &str = Token.getString();
+ return str == "abs" || str == "neg" || str == "sext";
+ }
+ return false;
}
-OperandMatchResultTy
-AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
- bool AllowImm) {
- bool Negate = false, Negate2 = false, Abs = false, Abs2 = false;
+bool
+AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
+ return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
+}
- if (getLexer().getKind()== AsmToken::Minus) {
- const AsmToken NextToken = getLexer().peekTok();
+bool
+AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
+ return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
+}
- // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
- if (NextToken.is(AsmToken::Minus)) {
- Error(Parser.getTok().getLoc(), "invalid syntax, expected 'neg' modifier");
- return MatchOperand_ParseFail;
- }
+bool
+AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
+ return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
+}
+
+// Check if this is an operand modifier or an opcode modifier
+// which may look like an expression but it is not. We should
+// avoid parsing these modifiers as expressions. Currently
+// recognized sequences are:
+// |...|
+// abs(...)
+// neg(...)
+// sext(...)
+// -reg
+// -|...|
+// -abs(...)
+// name:...
+// Note that simple opcode modifiers like 'gds' may be parsed as
+// expressions; this is a special case. See getExpressionAsToken.
+//
+bool
+AMDGPUAsmParser::isModifier() {
- // '-' followed by an integer literal N should be interpreted as integer
- // negation rather than a floating-point NEG modifier applied to N.
- // Beside being contr-intuitive, such use of floating-point NEG modifier
- // results in different meaning of integer literals used with VOP1/2/C
- // and VOP3, for example:
- // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
- // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
- // Negative fp literals should be handled likewise for unifomtity
- if (!NextToken.is(AsmToken::Integer) && !NextToken.is(AsmToken::Real)) {
- Parser.Lex();
- Negate = true;
- }
+ AsmToken Tok = getToken();
+ AsmToken NextToken[2];
+ peekTokens(NextToken);
+
+ return isOperandModifier(Tok, NextToken[0]) ||
+ (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
+ isOpcodeModifierWithVal(Tok, NextToken[0]);
+}
+
+// Check if the current token is an SP3 'neg' modifier.
+// Currently this modifier is allowed in the following context:
+//
+// 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
+// 2. Before an 'abs' modifier: -abs(...)
+// 3. Before an SP3 'abs' modifier: -|...|
+//
+// In all other cases "-" is handled as a part
+// of an expression that follows the sign.
+//
+// Note: When "-" is followed by an integer literal,
+// this is interpreted as integer negation rather
+// than a floating-point NEG modifier applied to N.
+// Beside being contr-intuitive, such use of floating-point
+// NEG modifier would have resulted in different meaning
+// of integer literals used with VOP1/2/C and VOP3,
+// for example:
+// v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
+// v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
+// Negative fp literals with preceding "-" are
+// handled likewise for unifomtity
+//
+bool
+AMDGPUAsmParser::parseSP3NegModifier() {
+
+ AsmToken NextToken[2];
+ peekTokens(NextToken);
+
+ if (isToken(AsmToken::Minus) &&
+ (isRegister(NextToken[0], NextToken[1]) ||
+ NextToken[0].is(AsmToken::Pipe) ||
+ isId(NextToken[0], "abs"))) {
+ lex();
+ return true;
}
- if (getLexer().getKind() == AsmToken::Identifier &&
- Parser.getTok().getString() == "neg") {
- if (Negate) {
- Error(Parser.getTok().getLoc(), "expected register or immediate");
- return MatchOperand_ParseFail;
- }
- Parser.Lex();
- Negate2 = true;
- if (getLexer().isNot(AsmToken::LParen)) {
- Error(Parser.getTok().getLoc(), "expected left paren after neg");
- return MatchOperand_ParseFail;
- }
- Parser.Lex();
+ return false;
+}
+
+OperandMatchResultTy
+AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
+ bool AllowImm) {
+ bool Neg, SP3Neg;
+ bool Abs, SP3Abs;
+ SMLoc Loc;
+
+ // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
+ if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
+ Error(getLoc(), "invalid syntax, expected 'neg' modifier");
+ return MatchOperand_ParseFail;
}
- if (getLexer().getKind() == AsmToken::Identifier &&
- Parser.getTok().getString() == "abs") {
- Parser.Lex();
- Abs2 = true;
- if (getLexer().isNot(AsmToken::LParen)) {
- Error(Parser.getTok().getLoc(), "expected left paren after abs");
- return MatchOperand_ParseFail;
- }
- Parser.Lex();
+ SP3Neg = parseSP3NegModifier();
+
+ Loc = getLoc();
+ Neg = trySkipId("neg");
+ if (Neg && SP3Neg) {
+ Error(Loc, "expected register or immediate");
+ return MatchOperand_ParseFail;
}
+ if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
+ return MatchOperand_ParseFail;
- if (getLexer().getKind() == AsmToken::Pipe) {
- if (Abs2) {
- Error(Parser.getTok().getLoc(), "expected register or immediate");
- return MatchOperand_ParseFail;
- }
- Parser.Lex();
- Abs = true;
+ Abs = trySkipId("abs");
+ if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
+ return MatchOperand_ParseFail;
+
+ Loc = getLoc();
+ SP3Abs = trySkipToken(AsmToken::Pipe);
+ if (Abs && SP3Abs) {
+ Error(Loc, "expected register or immediate");
+ return MatchOperand_ParseFail;
}
OperandMatchResultTy Res;
if (AllowImm) {
- Res = parseRegOrImm(Operands, Abs);
+ Res = parseRegOrImm(Operands, SP3Abs);
} else {
Res = parseReg(Operands);
}
if (Res != MatchOperand_Success) {
- return Res;
+ return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
}
- AMDGPUOperand::Modifiers Mods;
- if (Abs) {
- if (getLexer().getKind() != AsmToken::Pipe) {
- Error(Parser.getTok().getLoc(), "expected vertical bar");
- return MatchOperand_ParseFail;
- }
- Parser.Lex();
- Mods.Abs = true;
- }
- if (Abs2) {
- if (getLexer().isNot(AsmToken::RParen)) {
- Error(Parser.getTok().getLoc(), "expected closing parentheses");
- return MatchOperand_ParseFail;
- }
- Parser.Lex();
- Mods.Abs = true;
- }
+ if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
+ return MatchOperand_ParseFail;
+ if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
+ return MatchOperand_ParseFail;
+ if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
+ return MatchOperand_ParseFail;
- if (Negate) {
- Mods.Neg = true;
- } else if (Negate2) {
- if (getLexer().isNot(AsmToken::RParen)) {
- Error(Parser.getTok().getLoc(), "expected closing parentheses");
- return MatchOperand_ParseFail;
- }
- Parser.Lex();
- Mods.Neg = true;
- }
+ AMDGPUOperand::Modifiers Mods;
+ Mods.Abs = Abs || SP3Abs;
+ Mods.Neg = Neg || SP3Neg;
if (Mods.hasFPModifiers()) {
AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
+ if (Op.isExpr()) {
+ Error(Op.getStartLoc(), "expected an absolute expression");
+ return MatchOperand_ParseFail;
+ }
Op.setModifiers(Mods);
}
return MatchOperand_Success;
@@ -2069,18 +2515,9 @@ AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
OperandMatchResultTy
AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
bool AllowImm) {
- bool Sext = false;
-
- if (getLexer().getKind() == AsmToken::Identifier &&
- Parser.getTok().getString() == "sext") {
- Parser.Lex();
- Sext = true;
- if (getLexer().isNot(AsmToken::LParen)) {
- Error(Parser.getTok().getLoc(), "expected left paren after sext");
- return MatchOperand_ParseFail;
- }
- Parser.Lex();
- }
+ bool Sext = trySkipId("sext");
+ if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
+ return MatchOperand_ParseFail;
OperandMatchResultTy Res;
if (AllowImm) {
@@ -2089,21 +2526,21 @@ AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
Res = parseReg(Operands);
}
if (Res != MatchOperand_Success) {
- return Res;
+ return Sext? MatchOperand_ParseFail : Res;
}
+ if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
+ return MatchOperand_ParseFail;
+
AMDGPUOperand::Modifiers Mods;
- if (Sext) {
- if (getLexer().isNot(AsmToken::RParen)) {
- Error(Parser.getTok().getLoc(), "expected closing parentheses");
- return MatchOperand_ParseFail;
- }
- Parser.Lex();
- Mods.Sext = true;
- }
+ Mods.Sext = Sext;
if (Mods.hasIntModifiers()) {
AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
+ if (Op.isExpr()) {
+ Error(Op.getStartLoc(), "expected an absolute expression");
+ return MatchOperand_ParseFail;
+ }
Op.setModifiers(Mods);
}
@@ -2121,21 +2558,24 @@ AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
}
OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
+ auto Loc = getLoc();
+ if (trySkipId("off")) {
+ Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
+ AMDGPUOperand::ImmTyOff, false));
+ return MatchOperand_Success;
+ }
+
+ if (!isRegister())
+ return MatchOperand_NoMatch;
+
std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
if (Reg) {
Operands.push_back(std::move(Reg));
return MatchOperand_Success;
}
- const AsmToken &Tok = Parser.getTok();
- if (Tok.getString() == "off") {
- Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Tok.getLoc(),
- AMDGPUOperand::ImmTyOff, false));
- Parser.Lex();
- return MatchOperand_Success;
- }
+ return MatchOperand_ParseFail;
- return MatchOperand_NoMatch;
}
unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
@@ -2163,15 +2603,6 @@ unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
}
}
- if ((TSFlags & SIInstrFlags::FLAT) && !hasFlatOffsets()) {
- // FIXME: Produces error without correct column reported.
- auto OpNum =
- AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset);
- const auto &Op = Inst.getOperand(OpNum);
- if (Op.getImm() != 0)
- return Match_InvalidOperand;
- }
-
return Match_Success;
}
@@ -2214,7 +2645,10 @@ unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
switch (Reg) {
case AMDGPU::FLAT_SCR:
case AMDGPU::VCC:
+ case AMDGPU::VCC_LO:
+ case AMDGPU::VCC_HI:
case AMDGPU::M0:
+ case AMDGPU::SGPR_NULL:
return Reg;
default:
break;
@@ -2248,7 +2682,11 @@ bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
case 2: {
const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
- OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) {
+ OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
+ OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
+ OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
+ OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 ||
+ OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) {
return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
} else {
return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
@@ -2272,6 +2710,8 @@ bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
const unsigned Opcode = Inst.getOpcode();
const MCInstrDesc &Desc = MII.get(Opcode);
unsigned ConstantBusUseCount = 0;
+ unsigned NumLiterals = 0;
+ unsigned LiteralSize;
if (Desc.TSFlags &
(SIInstrFlags::VOPC |
@@ -2283,8 +2723,10 @@ bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
++ConstantBusUseCount;
}
+ SmallDenseSet<unsigned> SGPRsUsed;
unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
if (SGPRUsed != AMDGPU::NoRegister) {
+ SGPRsUsed.insert(SGPRUsed);
++ConstantBusUseCount;
}
@@ -2307,16 +2749,41 @@ bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
// flat_scratch_lo, flat_scratch_hi
// are theoretically valid but they are disabled anyway.
// Note that this code mimics SIInstrInfo::verifyInstruction
- if (Reg != SGPRUsed) {
+ if (!SGPRsUsed.count(Reg)) {
+ SGPRsUsed.insert(Reg);
++ConstantBusUseCount;
}
- SGPRUsed = Reg;
} else { // Expression or a literal
- ++ConstantBusUseCount;
+
+ if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
+ continue; // special operand like VINTERP attr_chan
+
+ // An instruction may use only one literal.
+ // This has been validated on the previous step.
+ // See validateVOP3Literal.
+ // This literal may be used as more than one operand.
+ // If all these operands are of the same size,
+ // this literal counts as one scalar value.
+ // Otherwise it counts as 2 scalar values.
+ // See "GFX10 Shader Programming", section 3.6.2.3.
+
+ unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
+ if (Size < 4) Size = 4;
+
+ if (NumLiterals == 0) {
+ NumLiterals = 1;
+ LiteralSize = Size;
+ } else if (LiteralSize != Size) {
+ NumLiterals = 2;
+ }
}
}
}
}
+ ConstantBusUseCount += NumLiterals;
+
+ if (isGFX10())
+ return ConstantBusUseCount <= 2;
return ConstantBusUseCount <= 1;
}
@@ -2405,6 +2872,46 @@ bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
return (VDataSize / 4) == DataSize + TFESize;
}
+bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
+ const unsigned Opc = Inst.getOpcode();
+ const MCInstrDesc &Desc = MII.get(Opc);
+
+ if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10())
+ return true;
+
+ const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
+ const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
+ AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
+ int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
+ int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
+ int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
+
+ assert(VAddr0Idx != -1);
+ assert(SrsrcIdx != -1);
+ assert(DimIdx != -1);
+ assert(SrsrcIdx > VAddr0Idx);
+
+ unsigned Dim = Inst.getOperand(DimIdx).getImm();
+ const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
+ bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
+ unsigned VAddrSize =
+ IsNSA ? SrsrcIdx - VAddr0Idx
+ : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
+
+ unsigned AddrSize = BaseOpcode->NumExtraArgs +
+ (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
+ (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
+ (BaseOpcode->LodOrClampOrMip ? 1 : 0);
+ if (!IsNSA) {
+ if (AddrSize > 8)
+ AddrSize = 16;
+ else if (AddrSize > 4)
+ AddrSize = 8;
+ }
+
+ return VAddrSize == AddrSize;
+}
+
bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
const unsigned Opc = Inst.getOpcode();
@@ -2461,8 +2968,346 @@ bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
return true;
}
+bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
+ const unsigned Opc = Inst.getOpcode();
+ const MCInstrDesc &Desc = MII.get(Opc);
+
+ if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
+ return true;
+
+ int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
+ if (DimIdx < 0)
+ return true;
+
+ long Imm = Inst.getOperand(DimIdx).getImm();
+ if (Imm < 0 || Imm >= 8)
+ return false;
+
+ return true;
+}
+
+static bool IsRevOpcode(const unsigned Opcode)
+{
+ switch (Opcode) {
+ case AMDGPU::V_SUBREV_F32_e32:
+ case AMDGPU::V_SUBREV_F32_e64:
+ case AMDGPU::V_SUBREV_F32_e32_gfx10:
+ case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
+ case AMDGPU::V_SUBREV_F32_e32_vi:
+ case AMDGPU::V_SUBREV_F32_e64_gfx10:
+ case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
+ case AMDGPU::V_SUBREV_F32_e64_vi:
+
+ case AMDGPU::V_SUBREV_I32_e32:
+ case AMDGPU::V_SUBREV_I32_e64:
+ case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
+ case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
+
+ case AMDGPU::V_SUBBREV_U32_e32:
+ case AMDGPU::V_SUBBREV_U32_e64:
+ case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
+ case AMDGPU::V_SUBBREV_U32_e32_vi:
+ case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
+ case AMDGPU::V_SUBBREV_U32_e64_vi:
+
+ case AMDGPU::V_SUBREV_U32_e32:
+ case AMDGPU::V_SUBREV_U32_e64:
+ case AMDGPU::V_SUBREV_U32_e32_gfx9:
+ case AMDGPU::V_SUBREV_U32_e32_vi:
+ case AMDGPU::V_SUBREV_U32_e64_gfx9:
+ case AMDGPU::V_SUBREV_U32_e64_vi:
+
+ case AMDGPU::V_SUBREV_F16_e32:
+ case AMDGPU::V_SUBREV_F16_e64:
+ case AMDGPU::V_SUBREV_F16_e32_gfx10:
+ case AMDGPU::V_SUBREV_F16_e32_vi:
+ case AMDGPU::V_SUBREV_F16_e64_gfx10:
+ case AMDGPU::V_SUBREV_F16_e64_vi:
+
+ case AMDGPU::V_SUBREV_U16_e32:
+ case AMDGPU::V_SUBREV_U16_e64:
+ case AMDGPU::V_SUBREV_U16_e32_vi:
+ case AMDGPU::V_SUBREV_U16_e64_vi:
+
+ case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
+ case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
+ case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
+
+ case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
+ case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
+
+ case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
+ case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
+
+ case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
+ case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
+
+ case AMDGPU::V_LSHRREV_B32_e32:
+ case AMDGPU::V_LSHRREV_B32_e64:
+ case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
+ case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
+ case AMDGPU::V_LSHRREV_B32_e32_vi:
+ case AMDGPU::V_LSHRREV_B32_e64_vi:
+ case AMDGPU::V_LSHRREV_B32_e32_gfx10:
+ case AMDGPU::V_LSHRREV_B32_e64_gfx10:
+
+ case AMDGPU::V_ASHRREV_I32_e32:
+ case AMDGPU::V_ASHRREV_I32_e64:
+ case AMDGPU::V_ASHRREV_I32_e32_gfx10:
+ case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
+ case AMDGPU::V_ASHRREV_I32_e32_vi:
+ case AMDGPU::V_ASHRREV_I32_e64_gfx10:
+ case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
+ case AMDGPU::V_ASHRREV_I32_e64_vi:
+
+ case AMDGPU::V_LSHLREV_B32_e32:
+ case AMDGPU::V_LSHLREV_B32_e64:
+ case AMDGPU::V_LSHLREV_B32_e32_gfx10:
+ case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
+ case AMDGPU::V_LSHLREV_B32_e32_vi:
+ case AMDGPU::V_LSHLREV_B32_e64_gfx10:
+ case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
+ case AMDGPU::V_LSHLREV_B32_e64_vi:
+
+ case AMDGPU::V_LSHLREV_B16_e32:
+ case AMDGPU::V_LSHLREV_B16_e64:
+ case AMDGPU::V_LSHLREV_B16_e32_vi:
+ case AMDGPU::V_LSHLREV_B16_e64_vi:
+ case AMDGPU::V_LSHLREV_B16_gfx10:
+
+ case AMDGPU::V_LSHRREV_B16_e32:
+ case AMDGPU::V_LSHRREV_B16_e64:
+ case AMDGPU::V_LSHRREV_B16_e32_vi:
+ case AMDGPU::V_LSHRREV_B16_e64_vi:
+ case AMDGPU::V_LSHRREV_B16_gfx10:
+
+ case AMDGPU::V_ASHRREV_I16_e32:
+ case AMDGPU::V_ASHRREV_I16_e64:
+ case AMDGPU::V_ASHRREV_I16_e32_vi:
+ case AMDGPU::V_ASHRREV_I16_e64_vi:
+ case AMDGPU::V_ASHRREV_I16_gfx10:
+
+ case AMDGPU::V_LSHLREV_B64:
+ case AMDGPU::V_LSHLREV_B64_gfx10:
+ case AMDGPU::V_LSHLREV_B64_vi:
+
+ case AMDGPU::V_LSHRREV_B64:
+ case AMDGPU::V_LSHRREV_B64_gfx10:
+ case AMDGPU::V_LSHRREV_B64_vi:
+
+ case AMDGPU::V_ASHRREV_I64:
+ case AMDGPU::V_ASHRREV_I64_gfx10:
+ case AMDGPU::V_ASHRREV_I64_vi:
+
+ case AMDGPU::V_PK_LSHLREV_B16:
+ case AMDGPU::V_PK_LSHLREV_B16_gfx10:
+ case AMDGPU::V_PK_LSHLREV_B16_vi:
+
+ case AMDGPU::V_PK_LSHRREV_B16:
+ case AMDGPU::V_PK_LSHRREV_B16_gfx10:
+ case AMDGPU::V_PK_LSHRREV_B16_vi:
+ case AMDGPU::V_PK_ASHRREV_I16:
+ case AMDGPU::V_PK_ASHRREV_I16_gfx10:
+ case AMDGPU::V_PK_ASHRREV_I16_vi:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
+
+ using namespace SIInstrFlags;
+ const unsigned Opcode = Inst.getOpcode();
+ const MCInstrDesc &Desc = MII.get(Opcode);
+
+ // lds_direct register is defined so that it can be used
+ // with 9-bit operands only. Ignore encodings which do not accept these.
+ if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
+ return true;
+
+ const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
+ const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
+ const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
+
+ const int SrcIndices[] = { Src1Idx, Src2Idx };
+
+ // lds_direct cannot be specified as either src1 or src2.
+ for (int SrcIdx : SrcIndices) {
+ if (SrcIdx == -1) break;
+ const MCOperand &Src = Inst.getOperand(SrcIdx);
+ if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
+ return false;
+ }
+ }
+
+ if (Src0Idx == -1)
+ return true;
+
+ const MCOperand &Src = Inst.getOperand(Src0Idx);
+ if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
+ return true;
+
+ // lds_direct is specified as src0. Check additional limitations.
+ return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
+}
+
+SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
+ for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
+ AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
+ if (Op.isFlatOffset())
+ return Op.getStartLoc();
+ }
+ return getLoc();
+}
+
+bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
+ const OperandVector &Operands) {
+ uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
+ if ((TSFlags & SIInstrFlags::FLAT) == 0)
+ return true;
+
+ auto Opcode = Inst.getOpcode();
+ auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
+ assert(OpNum != -1);
+
+ const auto &Op = Inst.getOperand(OpNum);
+ if (!hasFlatOffsets() && Op.getImm() != 0) {
+ Error(getFlatOffsetLoc(Operands),
+ "flat offset modifier is not supported on this GPU");
+ return false;
+ }
+
+ // Address offset is 12-bit signed for GFX10, 13-bit for GFX9.
+ // For FLAT segment the offset must be positive;
+ // MSB is ignored and forced to zero.
+ unsigned OffsetSize = isGFX9() ? 13 : 12;
+ if (TSFlags & SIInstrFlags::IsNonFlatSeg) {
+ if (!isIntN(OffsetSize, Op.getImm())) {
+ Error(getFlatOffsetLoc(Operands),
+ isGFX9() ? "expected a 13-bit signed offset" :
+ "expected a 12-bit signed offset");
+ return false;
+ }
+ } else {
+ if (!isUIntN(OffsetSize - 1, Op.getImm())) {
+ Error(getFlatOffsetLoc(Operands),
+ isGFX9() ? "expected a 12-bit unsigned offset" :
+ "expected an 11-bit unsigned offset");
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
+ unsigned Opcode = Inst.getOpcode();
+ const MCInstrDesc &Desc = MII.get(Opcode);
+ if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
+ return true;
+
+ const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
+ const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
+
+ const int OpIndices[] = { Src0Idx, Src1Idx };
+
+ unsigned NumLiterals = 0;
+ uint32_t LiteralValue;
+
+ for (int OpIdx : OpIndices) {
+ if (OpIdx == -1) break;
+
+ const MCOperand &MO = Inst.getOperand(OpIdx);
+ if (MO.isImm() &&
+ // Exclude special imm operands (like that used by s_set_gpr_idx_on)
+ AMDGPU::isSISrcOperand(Desc, OpIdx) &&
+ !isInlineConstant(Inst, OpIdx)) {
+ uint32_t Value = static_cast<uint32_t>(MO.getImm());
+ if (NumLiterals == 0 || LiteralValue != Value) {
+ LiteralValue = Value;
+ ++NumLiterals;
+ }
+ }
+ }
+
+ return NumLiterals <= 1;
+}
+
+bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
+ const unsigned Opc = Inst.getOpcode();
+ if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
+ Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
+ int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
+ unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
+
+ if (OpSel & ~3)
+ return false;
+ }
+ return true;
+}
+
+// Check if VCC register matches wavefront size
+bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
+ auto FB = getFeatureBits();
+ return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
+ (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
+}
+
+// VOP3 literal is only allowed in GFX10+ and only one can be used
+bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const {
+ unsigned Opcode = Inst.getOpcode();
+ const MCInstrDesc &Desc = MII.get(Opcode);
+ if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
+ return true;
+
+ const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
+ const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
+ const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
+
+ const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
+
+ unsigned NumLiterals = 0;
+ uint32_t LiteralValue;
+
+ for (int OpIdx : OpIndices) {
+ if (OpIdx == -1) break;
+
+ const MCOperand &MO = Inst.getOperand(OpIdx);
+ if (!MO.isImm() || !AMDGPU::isSISrcOperand(Desc, OpIdx))
+ continue;
+
+ if (!isInlineConstant(Inst, OpIdx)) {
+ uint32_t Value = static_cast<uint32_t>(MO.getImm());
+ if (NumLiterals == 0 || LiteralValue != Value) {
+ LiteralValue = Value;
+ ++NumLiterals;
+ }
+ }
+ }
+
+ return !NumLiterals ||
+ (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]);
+}
+
bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
- const SMLoc &IDLoc) {
+ const SMLoc &IDLoc,
+ const OperandVector &Operands) {
+ if (!validateLdsDirect(Inst)) {
+ Error(IDLoc,
+ "invalid use of lds_direct");
+ return false;
+ }
+ if (!validateSOPLiteral(Inst)) {
+ Error(IDLoc,
+ "only one literal operand is allowed");
+ return false;
+ }
+ if (!validateVOP3Literal(Inst)) {
+ Error(IDLoc,
+ "invalid literal operand");
+ return false;
+ }
if (!validateConstantBusLimitations(Inst)) {
Error(IDLoc,
"invalid operand (violates constant bus restrictions)");
@@ -2478,17 +3323,31 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
"integer clamping is not supported on this GPU");
return false;
}
+ if (!validateOpSel(Inst)) {
+ Error(IDLoc,
+ "invalid op_sel operand");
+ return false;
+ }
// For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
if (!validateMIMGD16(Inst)) {
Error(IDLoc,
"d16 modifier is not supported on this GPU");
return false;
}
+ if (!validateMIMGDim(Inst)) {
+ Error(IDLoc, "dim modifier is required on this GPU");
+ return false;
+ }
if (!validateMIMGDataSize(Inst)) {
Error(IDLoc,
"image data size does not match dmask and tfe");
return false;
}
+ if (!validateMIMGAddrSize(Inst)) {
+ Error(IDLoc,
+ "image address size does not match dim and a16");
+ return false;
+ }
if (!validateMIMGAtomicDMask(Inst)) {
Error(IDLoc,
"invalid atomic image dmask");
@@ -2499,11 +3358,15 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
"invalid image_gather dmask: only one bit must be set");
return false;
}
+ if (!validateFlatOffset(Inst, Operands)) {
+ return false;
+ }
return true;
}
-static std::string AMDGPUMnemonicSpellCheck(StringRef S, uint64_t FBS,
+static std::string AMDGPUMnemonicSpellCheck(StringRef S,
+ const FeatureBitset &FBS,
unsigned VariantID = 0);
bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
@@ -2538,7 +3401,7 @@ bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
switch (Result) {
default: break;
case Match_Success:
- if (!validateInstruction(Inst, IDLoc)) {
+ if (!validateInstruction(Inst, IDLoc, Operands)) {
return true;
}
Inst.setLoc(IDLoc);
@@ -2549,7 +3412,7 @@ bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
return Error(IDLoc, "instruction not supported on this GPU");
case Match_MnemonicFail: {
- uint64_t FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
+ FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
std::string Suggestion = AMDGPUMnemonicSpellCheck(
((AMDGPUOperand &)*Operands[0]).getToken(), FBS);
return Error(IDLoc, "invalid instruction" + Suggestion,
@@ -2632,32 +3495,39 @@ bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
bool AMDGPUAsmParser::calculateGPRBlocks(
const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
- bool XNACKUsed, unsigned NextFreeVGPR, SMRange VGPRRange,
- unsigned NextFreeSGPR, SMRange SGPRRange, unsigned &VGPRBlocks,
- unsigned &SGPRBlocks) {
+ bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
+ SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
+ unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
// TODO(scott.linder): These calculations are duplicated from
// AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
IsaVersion Version = getIsaVersion(getSTI().getCPU());
unsigned NumVGPRs = NextFreeVGPR;
unsigned NumSGPRs = NextFreeSGPR;
- unsigned MaxAddressableNumSGPRs = IsaInfo::getAddressableNumSGPRs(&getSTI());
- if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
- NumSGPRs > MaxAddressableNumSGPRs)
- return OutOfRangeError(SGPRRange);
+ if (Version.Major >= 10)
+ NumSGPRs = 0;
+ else {
+ unsigned MaxAddressableNumSGPRs =
+ IsaInfo::getAddressableNumSGPRs(&getSTI());
- NumSGPRs +=
- IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
+ if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
+ NumSGPRs > MaxAddressableNumSGPRs)
+ return OutOfRangeError(SGPRRange);
- if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
- NumSGPRs > MaxAddressableNumSGPRs)
- return OutOfRangeError(SGPRRange);
+ NumSGPRs +=
+ IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
- if (Features.test(FeatureSGPRInitBug))
- NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
+ if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
+ NumSGPRs > MaxAddressableNumSGPRs)
+ return OutOfRangeError(SGPRRange);
- VGPRBlocks = IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs);
+ if (Features.test(FeatureSGPRInitBug))
+ NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
+ }
+
+ VGPRBlocks =
+ IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
return false;
@@ -2674,7 +3544,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
if (getParser().parseIdentifier(KernelName))
return true;
- kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor();
+ kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
StringSet<> Seen;
@@ -2688,6 +3558,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
bool ReserveVCC = true;
bool ReserveFlatScr = true;
bool ReserveXNACK = hasXNACK();
+ Optional<bool> EnableWavefrontSize32;
while (true) {
while (getLexer().is(AsmToken::EndOfStatement))
@@ -2736,37 +3607,45 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
Val, ValRange);
- UserSGPRCount++;
+ UserSGPRCount += 4;
} else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
ValRange);
- UserSGPRCount++;
+ UserSGPRCount += 2;
} else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
ValRange);
- UserSGPRCount++;
+ UserSGPRCount += 2;
} else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
Val, ValRange);
- UserSGPRCount++;
+ UserSGPRCount += 2;
} else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
ValRange);
- UserSGPRCount++;
+ UserSGPRCount += 2;
} else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
ValRange);
- UserSGPRCount++;
+ UserSGPRCount += 2;
} else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
Val, ValRange);
- UserSGPRCount++;
+ UserSGPRCount += 1;
+ } else if (ID == ".amdhsa_wavefront_size32") {
+ if (IVersion.Major < 10)
+ return getParser().Error(IDRange.Start, "directive requires gfx10+",
+ IDRange);
+ EnableWavefrontSize32 = Val;
+ PARSE_BITS_ENTRY(KD.kernel_code_properties,
+ KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
+ Val, ValRange);
} else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
PARSE_BITS_ENTRY(
KD.compute_pgm_rsrc2,
@@ -2841,6 +3720,24 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
IDRange);
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
ValRange);
+ } else if (ID == ".amdhsa_workgroup_processor_mode") {
+ if (IVersion.Major < 10)
+ return getParser().Error(IDRange.Start, "directive requires gfx10+",
+ IDRange);
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
+ ValRange);
+ } else if (ID == ".amdhsa_memory_ordered") {
+ if (IVersion.Major < 10)
+ return getParser().Error(IDRange.Start, "directive requires gfx10+",
+ IDRange);
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
+ ValRange);
+ } else if (ID == ".amdhsa_forward_progress") {
+ if (IVersion.Major < 10)
+ return getParser().Error(IDRange.Start, "directive requires gfx10+",
+ IDRange);
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
+ ValRange);
} else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
PARSE_BITS_ENTRY(
KD.compute_pgm_rsrc2,
@@ -2888,8 +3785,9 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
unsigned VGPRBlocks;
unsigned SGPRBlocks;
if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
- ReserveXNACK, NextFreeVGPR, VGPRRange, NextFreeSGPR,
- SGPRRange, VGPRBlocks, SGPRBlocks))
+ ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR,
+ VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
+ SGPRBlocks))
return true;
if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
@@ -2994,6 +3892,46 @@ bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
return TokError(Err.str());
}
Lex();
+
+ if (ID == "enable_wavefront_size32") {
+ if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
+ if (!isGFX10())
+ return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
+ if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
+ return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
+ } else {
+ if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
+ return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
+ }
+ }
+
+ if (ID == "wavefront_size") {
+ if (Header.wavefront_size == 5) {
+ if (!isGFX10())
+ return TokError("wavefront_size=5 is only allowed on GFX10+");
+ if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
+ return TokError("wavefront_size=5 requires +WavefrontSize32");
+ } else if (Header.wavefront_size == 6) {
+ if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
+ return TokError("wavefront_size=6 requires +WavefrontSize64");
+ }
+ }
+
+ if (ID == "enable_wgp_mode") {
+ if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10())
+ return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
+ }
+
+ if (ID == "enable_mem_ordered") {
+ if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10())
+ return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
+ }
+
+ if (ID == "enable_fwd_progress") {
+ if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10())
+ return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
+ }
+
return false;
}
@@ -3081,14 +4019,35 @@ bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
}
std::string HSAMetadataString;
- raw_string_ostream YamlStream(HSAMetadataString);
+ if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
+ HSAMetadataString))
+ return true;
+
+ if (IsaInfo::hasCodeObjectV3(&getSTI())) {
+ if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
+ return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
+ } else {
+ if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
+ return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
+ }
+
+ return false;
+}
+
+/// Common code to parse out a block of text (typically YAML) between start and
+/// end directives.
+bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
+ const char *AssemblerDirectiveEnd,
+ std::string &CollectString) {
+
+ raw_string_ostream CollectStream(CollectString);
getLexer().setSkipSpace(false);
bool FoundEnd = false;
while (!getLexer().is(AsmToken::Eof)) {
while (getLexer().is(AsmToken::Space)) {
- YamlStream << getLexer().getTok().getString();
+ CollectStream << getLexer().getTok().getString();
Lex();
}
@@ -3101,8 +4060,8 @@ bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
}
}
- YamlStream << Parser.parseStringToEndOfStatement()
- << getContext().getAsmInfo()->getSeparatorString();
+ CollectStream << Parser.parseStringToEndOfStatement()
+ << getContext().getAsmInfo()->getSeparatorString();
Parser.eatToEndOfStatement();
}
@@ -3111,22 +4070,27 @@ bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
return TokError(Twine("expected directive ") +
- Twine(HSAMD::AssemblerDirectiveEnd) + Twine(" not found"));
+ Twine(AssemblerDirectiveEnd) + Twine(" not found"));
}
- YamlStream.flush();
+ CollectStream.flush();
+ return false;
+}
- if (IsaInfo::hasCodeObjectV3(&getSTI())) {
- if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
- return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
- } else {
- if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
- return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
- }
+/// Parse the assembler directive for new MsgPack-format PAL metadata.
+bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
+ std::string String;
+ if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
+ AMDGPU::PALMD::AssemblerDirectiveEnd, String))
+ return true;
+ auto PALMetadata = getTargetStreamer().getPALMetadata();
+ if (!PALMetadata->setFromString(String))
+ return Error(getParser().getTok().getLoc(), "invalid PAL metadata");
return false;
}
+/// Parse the assembler directive for old linear-format PAL metadata.
bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
return Error(getParser().getTok().getLoc(),
@@ -3134,19 +4098,82 @@ bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
"not available on non-amdpal OSes")).str());
}
- PALMD::Metadata PALMetadata;
+ auto PALMetadata = getTargetStreamer().getPALMetadata();
+ PALMetadata->setLegacy();
for (;;) {
- uint32_t Value;
+ uint32_t Key, Value;
+ if (ParseAsAbsoluteExpression(Key)) {
+ return TokError(Twine("invalid value in ") +
+ Twine(PALMD::AssemblerDirective));
+ }
+ if (getLexer().isNot(AsmToken::Comma)) {
+ return TokError(Twine("expected an even number of values in ") +
+ Twine(PALMD::AssemblerDirective));
+ }
+ Lex();
if (ParseAsAbsoluteExpression(Value)) {
return TokError(Twine("invalid value in ") +
Twine(PALMD::AssemblerDirective));
}
- PALMetadata.push_back(Value);
+ PALMetadata->setRegister(Key, Value);
if (getLexer().isNot(AsmToken::Comma))
break;
Lex();
}
- getTargetStreamer().EmitPALMetadata(PALMetadata);
+ return false;
+}
+
+/// ParseDirectiveAMDGPULDS
+/// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
+bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
+ if (getParser().checkForValidSection())
+ return true;
+
+ StringRef Name;
+ SMLoc NameLoc = getLexer().getLoc();
+ if (getParser().parseIdentifier(Name))
+ return TokError("expected identifier in directive");
+
+ MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
+ if (parseToken(AsmToken::Comma, "expected ','"))
+ return true;
+
+ unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
+
+ int64_t Size;
+ SMLoc SizeLoc = getLexer().getLoc();
+ if (getParser().parseAbsoluteExpression(Size))
+ return true;
+ if (Size < 0)
+ return Error(SizeLoc, "size must be non-negative");
+ if (Size > LocalMemorySize)
+ return Error(SizeLoc, "size is too large");
+
+ int64_t Align = 4;
+ if (getLexer().is(AsmToken::Comma)) {
+ Lex();
+ SMLoc AlignLoc = getLexer().getLoc();
+ if (getParser().parseAbsoluteExpression(Align))
+ return true;
+ if (Align < 0 || !isPowerOf2_64(Align))
+ return Error(AlignLoc, "alignment must be a power of two");
+
+ // Alignment larger than the size of LDS is possible in theory, as long
+ // as the linker manages to place to symbol at address 0, but we do want
+ // to make sure the alignment fits nicely into a 32-bit integer.
+ if (Align >= 1u << 31)
+ return Error(AlignLoc, "alignment is too large");
+ }
+
+ if (parseToken(AsmToken::EndOfStatement,
+ "unexpected token in '.amdgpu_lds' directive"))
+ return true;
+
+ Symbol->redefineIfPossible();
+ if (!Symbol->isUndefined())
+ return Error(NameLoc, "invalid symbol redefinition");
+
+ getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align);
return false;
}
@@ -3183,6 +4210,12 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
return ParseDirectiveHSAMetadata();
}
+ if (IDVal == ".amdgpu_lds")
+ return ParseDirectiveAMDGPULDS();
+
+ if (IDVal == PALMD::AssemblerDirectiveBegin)
+ return ParseDirectivePALMetadataBegin();
+
if (IDVal == PALMD::AssemblerDirective)
return ParseDirectivePALMetadata();
@@ -3195,21 +4228,36 @@ bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
R.isValid(); ++R) {
if (*R == RegNo)
- return isGFX9();
+ return isGFX9() || isGFX10();
+ }
+
+ // GFX10 has 2 more SGPRs 104 and 105.
+ for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
+ R.isValid(); ++R) {
+ if (*R == RegNo)
+ return hasSGPR104_SGPR105();
}
switch (RegNo) {
+ case AMDGPU::SRC_SHARED_BASE:
+ case AMDGPU::SRC_SHARED_LIMIT:
+ case AMDGPU::SRC_PRIVATE_BASE:
+ case AMDGPU::SRC_PRIVATE_LIMIT:
+ case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
+ return !isCI() && !isSI() && !isVI();
case AMDGPU::TBA:
case AMDGPU::TBA_LO:
case AMDGPU::TBA_HI:
case AMDGPU::TMA:
case AMDGPU::TMA_LO:
case AMDGPU::TMA_HI:
- return !isGFX9();
+ return !isGFX9() && !isGFX10();
case AMDGPU::XNACK_MASK:
case AMDGPU::XNACK_MASK_LO:
case AMDGPU::XNACK_MASK_HI:
- return !isCI() && !isSI() && hasXNACK();
+ return !isCI() && !isSI() && !isGFX10() && hasXNACK();
+ case AMDGPU::SGPR_NULL:
+ return isGFX10();
default:
break;
}
@@ -3217,8 +4265,10 @@ bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
if (isCI())
return true;
- if (isSI()) {
- // No flat_scr
+ if (isSI() || isGFX10()) {
+ // No flat_scr on SI.
+ // On GFX10 flat scratch is not a valid register operand and can only be
+ // accessed with s_setreg/s_getreg.
switch (RegNo) {
case AMDGPU::FLAT_SCR:
case AMDGPU::FLAT_SCR_LO:
@@ -3234,14 +4284,15 @@ bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
R.isValid(); ++R) {
if (*R == RegNo)
- return false;
+ return hasSGPR102_SGPR103();
}
return true;
}
OperandMatchResultTy
-AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
+AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
+ OperandMode Mode) {
// Try to parse with a custom parser
OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
@@ -3255,28 +4306,36 @@ AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
getLexer().is(AsmToken::EndOfStatement))
return ResTy;
- ResTy = parseRegOrImm(Operands);
+ if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) {
+ unsigned Prefix = Operands.size();
+ SMLoc LBraceLoc = getTok().getLoc();
+ Parser.Lex(); // eat the '['
- if (ResTy == MatchOperand_Success)
- return ResTy;
+ for (;;) {
+ ResTy = parseReg(Operands);
+ if (ResTy != MatchOperand_Success)
+ return ResTy;
- const auto &Tok = Parser.getTok();
- SMLoc S = Tok.getLoc();
+ if (getLexer().is(AsmToken::RBrac))
+ break;
- const MCExpr *Expr = nullptr;
- if (!Parser.parseExpression(Expr)) {
- Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
- return MatchOperand_Success;
- }
+ if (getLexer().isNot(AsmToken::Comma))
+ return MatchOperand_ParseFail;
+ Parser.Lex();
+ }
- // Possibly this is an instruction flag like 'gds'.
- if (Tok.getKind() == AsmToken::Identifier) {
- Operands.push_back(AMDGPUOperand::CreateToken(this, Tok.getString(), S));
- Parser.Lex();
+ if (Operands.size() - Prefix > 1) {
+ Operands.insert(Operands.begin() + Prefix,
+ AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
+ Operands.push_back(AMDGPUOperand::CreateToken(this, "]",
+ getTok().getLoc()));
+ }
+
+ Parser.Lex(); // eat the ']'
return MatchOperand_Success;
}
- return MatchOperand_NoMatch;
+ return parseRegOrImm(Operands);
}
StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
@@ -3308,8 +4367,13 @@ bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
Name = parseMnemonicSuffix(Name);
Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
+ bool IsMIMG = Name.startswith("image_");
+
while (!getLexer().is(AsmToken::EndOfStatement)) {
- OperandMatchResultTy Res = parseOperand(Operands, Name);
+ OperandMode Mode = OperandMode_Default;
+ if (IsMIMG && isGFX10() && Operands.size() == 2)
+ Mode = OperandMode_NSA;
+ OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
// Eat the comma or space if there is one.
if (getLexer().is(AsmToken::Comma))
@@ -3318,12 +4382,14 @@ bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
switch (Res) {
case MatchOperand_Success: break;
case MatchOperand_ParseFail:
+ // FIXME: use real operand location rather than the current location.
Error(getLexer().getLoc(), "failed parsing operand.");
while (!getLexer().is(AsmToken::EndOfStatement)) {
Parser.Lex();
}
return true;
case MatchOperand_NoMatch:
+ // FIXME: use real operand location rather than the current location.
Error(getLexer().getLoc(), "not a valid operand.");
while (!getLexer().is(AsmToken::EndOfStatement)) {
Parser.Lex();
@@ -3340,46 +4406,19 @@ bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
//===----------------------------------------------------------------------===//
OperandMatchResultTy
-AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int) {
- switch(getLexer().getKind()) {
- default: return MatchOperand_NoMatch;
- case AsmToken::Identifier: {
- StringRef Name = Parser.getTok().getString();
- if (!Name.equals(Prefix)) {
- return MatchOperand_NoMatch;
- }
-
- Parser.Lex();
- if (getLexer().isNot(AsmToken::Colon))
- return MatchOperand_ParseFail;
-
- Parser.Lex();
-
- bool IsMinus = false;
- if (getLexer().getKind() == AsmToken::Minus) {
- Parser.Lex();
- IsMinus = true;
- }
+AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
- if (getLexer().isNot(AsmToken::Integer))
- return MatchOperand_ParseFail;
-
- if (getParser().parseAbsoluteExpression(Int))
- return MatchOperand_ParseFail;
+ if (!trySkipId(Prefix, AsmToken::Colon))
+ return MatchOperand_NoMatch;
- if (IsMinus)
- Int = -Int;
- break;
- }
- }
- return MatchOperand_Success;
+ return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
}
OperandMatchResultTy
AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
AMDGPUOperand::ImmTy ImmTy,
bool (*ConvertResult)(int64_t&)) {
- SMLoc S = Parser.getTok().getLoc();
+ SMLoc S = getLoc();
int64_t Value = 0;
OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
@@ -3387,59 +4426,55 @@ AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
return Res;
if (ConvertResult && !ConvertResult(Value)) {
- return MatchOperand_ParseFail;
+ Error(S, "invalid " + StringRef(Prefix) + " value.");
}
Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
return MatchOperand_Success;
}
-OperandMatchResultTy AMDGPUAsmParser::parseOperandArrayWithPrefix(
- const char *Prefix,
- OperandVector &Operands,
- AMDGPUOperand::ImmTy ImmTy,
- bool (*ConvertResult)(int64_t&)) {
- StringRef Name = Parser.getTok().getString();
- if (!Name.equals(Prefix))
+OperandMatchResultTy
+AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
+ OperandVector &Operands,
+ AMDGPUOperand::ImmTy ImmTy,
+ bool (*ConvertResult)(int64_t&)) {
+ SMLoc S = getLoc();
+ if (!trySkipId(Prefix, AsmToken::Colon))
return MatchOperand_NoMatch;
- Parser.Lex();
- if (getLexer().isNot(AsmToken::Colon))
- return MatchOperand_ParseFail;
-
- Parser.Lex();
- if (getLexer().isNot(AsmToken::LBrac))
+ if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
return MatchOperand_ParseFail;
- Parser.Lex();
unsigned Val = 0;
- SMLoc S = Parser.getTok().getLoc();
+ const unsigned MaxSize = 4;
// FIXME: How to verify the number of elements matches the number of src
// operands?
- for (int I = 0; I < 4; ++I) {
- if (I != 0) {
- if (getLexer().is(AsmToken::RBrac))
- break;
+ for (int I = 0; ; ++I) {
+ int64_t Op;
+ SMLoc Loc = getLoc();
+ if (!parseExpr(Op))
+ return MatchOperand_ParseFail;
- if (getLexer().isNot(AsmToken::Comma))
- return MatchOperand_ParseFail;
- Parser.Lex();
+ if (Op != 0 && Op != 1) {
+ Error(Loc, "invalid " + StringRef(Prefix) + " value.");
+ return MatchOperand_ParseFail;
}
- if (getLexer().isNot(AsmToken::Integer))
- return MatchOperand_ParseFail;
+ Val |= (Op << I);
- int64_t Op;
- if (getParser().parseAbsoluteExpression(Op))
+ if (trySkipToken(AsmToken::RBrac))
+ break;
+
+ if (I + 1 == MaxSize) {
+ Error(getLoc(), "expected a closing square bracket");
return MatchOperand_ParseFail;
+ }
- if (Op != 0 && Op != 1)
+ if (!skipToken(AsmToken::Comma, "expected a comma"))
return MatchOperand_ParseFail;
- Val |= (Op << I);
}
- Parser.Lex();
Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
return MatchOperand_Success;
}
@@ -3459,7 +4494,7 @@ AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
if (Tok == Name) {
if (Tok == "r128" && isGFX9())
Error(S, "r128 modifier is not supported on this GPU");
- if (Tok == "a16" && !isGFX9())
+ if (Tok == "a16" && !isGFX9() && !isGFX10())
Error(S, "a16 modifier is not supported on this GPU");
Bit = 1;
Parser.Lex();
@@ -3476,6 +4511,9 @@ AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
}
}
+ if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC)
+ return MatchOperand_ParseFail;
+
Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
return MatchOperand_Success;
}
@@ -3616,7 +4654,8 @@ void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
}
AMDGPUOperand::ImmTy OffsetType =
- (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_si ||
+ (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
+ Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
AMDGPUOperand::ImmTyOffset;
@@ -3716,20 +4755,18 @@ encodeCnt(
}
bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
- StringRef CntName = Parser.getTok().getString();
- int64_t CntVal;
- Parser.Lex();
- if (getLexer().isNot(AsmToken::LParen))
- return true;
+ SMLoc CntLoc = getLoc();
+ StringRef CntName = getTokenStr();
- Parser.Lex();
- if (getLexer().isNot(AsmToken::Integer))
- return true;
+ if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
+ !skipToken(AsmToken::LParen, "expected a left parenthesis"))
+ return false;
- SMLoc ValLoc = Parser.getTok().getLoc();
- if (getParser().parseAbsoluteExpression(CntVal))
- return true;
+ int64_t CntVal;
+ SMLoc ValLoc = getLoc();
+ if (!parseExpr(CntVal))
+ return false;
AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
@@ -3742,265 +4779,240 @@ bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
} else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
+ } else {
+ Error(CntLoc, "invalid counter name " + CntName);
+ return false;
}
if (Failed) {
Error(ValLoc, "too large value for " + CntName);
- return true;
+ return false;
}
- if (getLexer().isNot(AsmToken::RParen)) {
- return true;
- }
+ if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
+ return false;
- Parser.Lex();
- if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) {
- const AsmToken NextToken = getLexer().peekTok();
- if (NextToken.is(AsmToken::Identifier)) {
- Parser.Lex();
+ if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
+ if (isToken(AsmToken::EndOfStatement)) {
+ Error(getLoc(), "expected a counter name");
+ return false;
}
}
- return false;
+ return true;
}
OperandMatchResultTy
AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
int64_t Waitcnt = getWaitcntBitMask(ISA);
- SMLoc S = Parser.getTok().getLoc();
-
- switch(getLexer().getKind()) {
- default: return MatchOperand_ParseFail;
- case AsmToken::Integer:
- // The operand can be an integer value.
- if (getParser().parseAbsoluteExpression(Waitcnt))
- return MatchOperand_ParseFail;
- break;
+ SMLoc S = getLoc();
- case AsmToken::Identifier:
- do {
- if (parseCnt(Waitcnt))
- return MatchOperand_ParseFail;
- } while(getLexer().isNot(AsmToken::EndOfStatement));
- break;
+ // If parse failed, do not return error code
+ // to avoid excessive error messages.
+ if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
+ while (parseCnt(Waitcnt) && !isToken(AsmToken::EndOfStatement));
+ } else {
+ parseExpr(Waitcnt);
}
+
Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
return MatchOperand_Success;
}
-bool AMDGPUAsmParser::parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset,
- int64_t &Width) {
- using namespace llvm::AMDGPU::Hwreg;
+bool
+AMDGPUOperand::isSWaitCnt() const {
+ return isImm();
+}
- if (Parser.getTok().getString() != "hwreg")
- return true;
- Parser.Lex();
+//===----------------------------------------------------------------------===//
+// hwreg
+//===----------------------------------------------------------------------===//
- if (getLexer().isNot(AsmToken::LParen))
- return true;
- Parser.Lex();
+bool
+AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
+ int64_t &Offset,
+ int64_t &Width) {
+ using namespace llvm::AMDGPU::Hwreg;
- if (getLexer().is(AsmToken::Identifier)) {
+ // The register may be specified by name or using a numeric code
+ if (isToken(AsmToken::Identifier) &&
+ (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
HwReg.IsSymbolic = true;
- HwReg.Id = ID_UNKNOWN_;
- const StringRef tok = Parser.getTok().getString();
- int Last = ID_SYMBOLIC_LAST_;
- if (isSI() || isCI() || isVI())
- Last = ID_SYMBOLIC_FIRST_GFX9_;
- for (int i = ID_SYMBOLIC_FIRST_; i < Last; ++i) {
- if (tok == IdSymbolic[i]) {
- HwReg.Id = i;
- break;
- }
- }
- Parser.Lex();
- } else {
- HwReg.IsSymbolic = false;
- if (getLexer().isNot(AsmToken::Integer))
- return true;
- if (getParser().parseAbsoluteExpression(HwReg.Id))
- return true;
- }
-
- if (getLexer().is(AsmToken::RParen)) {
- Parser.Lex();
+ lex(); // skip message name
+ } else if (!parseExpr(HwReg.Id)) {
return false;
}
- // optional params
- if (getLexer().isNot(AsmToken::Comma))
- return true;
- Parser.Lex();
-
- if (getLexer().isNot(AsmToken::Integer))
- return true;
- if (getParser().parseAbsoluteExpression(Offset))
+ if (trySkipToken(AsmToken::RParen))
return true;
- if (getLexer().isNot(AsmToken::Comma))
- return true;
- Parser.Lex();
+ // parse optional params
+ return
+ skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") &&
+ parseExpr(Offset) &&
+ skipToken(AsmToken::Comma, "expected a comma") &&
+ parseExpr(Width) &&
+ skipToken(AsmToken::RParen, "expected a closing parenthesis");
+}
- if (getLexer().isNot(AsmToken::Integer))
- return true;
- if (getParser().parseAbsoluteExpression(Width))
- return true;
+bool
+AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
+ const int64_t Offset,
+ const int64_t Width,
+ const SMLoc Loc) {
- if (getLexer().isNot(AsmToken::RParen))
- return true;
- Parser.Lex();
+ using namespace llvm::AMDGPU::Hwreg;
- return false;
+ if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
+ Error(Loc, "specified hardware register is not supported on this GPU");
+ return false;
+ } else if (!isValidHwreg(HwReg.Id)) {
+ Error(Loc, "invalid code of hardware register: only 6-bit values are legal");
+ return false;
+ } else if (!isValidHwregOffset(Offset)) {
+ Error(Loc, "invalid bit offset: only 5-bit values are legal");
+ return false;
+ } else if (!isValidHwregWidth(Width)) {
+ Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal");
+ return false;
+ }
+ return true;
}
-OperandMatchResultTy AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
+OperandMatchResultTy
+AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
using namespace llvm::AMDGPU::Hwreg;
- int64_t Imm16Val = 0;
- SMLoc S = Parser.getTok().getLoc();
-
- switch(getLexer().getKind()) {
- default: return MatchOperand_NoMatch;
- case AsmToken::Integer:
- // The operand can be an integer value.
- if (getParser().parseAbsoluteExpression(Imm16Val))
- return MatchOperand_NoMatch;
- if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) {
- Error(S, "invalid immediate: only 16-bit values are legal");
- // Do not return error code, but create an imm operand anyway and proceed
- // to the next operand, if any. That avoids unneccessary error messages.
- }
- break;
-
- case AsmToken::Identifier: {
- OperandInfoTy HwReg(ID_UNKNOWN_);
- int64_t Offset = OFFSET_DEFAULT_;
- int64_t Width = WIDTH_M1_DEFAULT_ + 1;
- if (parseHwregConstruct(HwReg, Offset, Width))
- return MatchOperand_ParseFail;
- if (HwReg.Id < 0 || !isUInt<ID_WIDTH_>(HwReg.Id)) {
- if (HwReg.IsSymbolic)
- Error(S, "invalid symbolic name of hardware register");
- else
- Error(S, "invalid code of hardware register: only 6-bit values are legal");
- }
- if (Offset < 0 || !isUInt<OFFSET_WIDTH_>(Offset))
- Error(S, "invalid bit offset: only 5-bit values are legal");
- if ((Width-1) < 0 || !isUInt<WIDTH_M1_WIDTH_>(Width-1))
- Error(S, "invalid bitfield width: only values from 1 to 32 are legal");
- Imm16Val = (HwReg.Id << ID_SHIFT_) | (Offset << OFFSET_SHIFT_) | ((Width-1) << WIDTH_M1_SHIFT_);
- }
- break;
+ int64_t ImmVal = 0;
+ SMLoc Loc = getLoc();
+
+ // If parse failed, do not return error code
+ // to avoid excessive error messages.
+ if (trySkipId("hwreg", AsmToken::LParen)) {
+ OperandInfoTy HwReg(ID_UNKNOWN_);
+ int64_t Offset = OFFSET_DEFAULT_;
+ int64_t Width = WIDTH_DEFAULT_;
+ if (parseHwregBody(HwReg, Offset, Width) &&
+ validateHwreg(HwReg, Offset, Width, Loc)) {
+ ImmVal = encodeHwreg(HwReg.Id, Offset, Width);
+ }
+ } else if (parseExpr(ImmVal)) {
+ if (ImmVal < 0 || !isUInt<16>(ImmVal))
+ Error(Loc, "invalid immediate: only 16-bit values are legal");
}
- Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTyHwreg));
- return MatchOperand_Success;
-}
-bool AMDGPUOperand::isSWaitCnt() const {
- return isImm();
+ Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
+ return MatchOperand_Success;
}
bool AMDGPUOperand::isHwreg() const {
return isImmTy(ImmTyHwreg);
}
-bool AMDGPUAsmParser::parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId) {
+//===----------------------------------------------------------------------===//
+// sendmsg
+//===----------------------------------------------------------------------===//
+
+bool
+AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
+ OperandInfoTy &Op,
+ OperandInfoTy &Stream) {
using namespace llvm::AMDGPU::SendMsg;
- if (Parser.getTok().getString() != "sendmsg")
- return true;
- Parser.Lex();
+ if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
+ Msg.IsSymbolic = true;
+ lex(); // skip message name
+ } else if (!parseExpr(Msg.Id)) {
+ return false;
+ }
- if (getLexer().isNot(AsmToken::LParen))
- return true;
- Parser.Lex();
+ if (trySkipToken(AsmToken::Comma)) {
+ Op.IsDefined = true;
+ if (isToken(AsmToken::Identifier) &&
+ (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
+ lex(); // skip operation name
+ } else if (!parseExpr(Op.Id)) {
+ return false;
+ }
- if (getLexer().is(AsmToken::Identifier)) {
- Msg.IsSymbolic = true;
- Msg.Id = ID_UNKNOWN_;
- const std::string tok = Parser.getTok().getString();
- for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) {
- switch(i) {
- default: continue; // Omit gaps.
- case ID_INTERRUPT: case ID_GS: case ID_GS_DONE: case ID_SYSMSG: break;
- }
- if (tok == IdSymbolic[i]) {
- Msg.Id = i;
- break;
- }
+ if (trySkipToken(AsmToken::Comma)) {
+ Stream.IsDefined = true;
+ if (!parseExpr(Stream.Id))
+ return false;
}
- Parser.Lex();
- } else {
- Msg.IsSymbolic = false;
- if (getLexer().isNot(AsmToken::Integer))
- return true;
- if (getParser().parseAbsoluteExpression(Msg.Id))
- return true;
- if (getLexer().is(AsmToken::Integer))
- if (getParser().parseAbsoluteExpression(Msg.Id))
- Msg.Id = ID_UNKNOWN_;
}
- if (Msg.Id == ID_UNKNOWN_) // Don't know how to parse the rest.
- return false;
- if (!(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG)) {
- if (getLexer().isNot(AsmToken::RParen))
- return true;
- Parser.Lex();
+ return skipToken(AsmToken::RParen, "expected a closing parenthesis");
+}
+
+bool
+AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
+ const OperandInfoTy &Op,
+ const OperandInfoTy &Stream,
+ const SMLoc S) {
+ using namespace llvm::AMDGPU::SendMsg;
+
+ // Validation strictness depends on whether message is specified
+ // in a symbolc or in a numeric form. In the latter case
+ // only encoding possibility is checked.
+ bool Strict = Msg.IsSymbolic;
+
+ if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
+ Error(S, "invalid message id");
+ return false;
+ } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
+ Error(S, Op.IsDefined ?
+ "message does not support operations" :
+ "missing message operation");
+ return false;
+ } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) {
+ Error(S, "invalid operation id");
+ return false;
+ } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
+ Error(S, "message operation does not support streams");
+ return false;
+ } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) {
+ Error(S, "invalid message stream id");
return false;
}
+ return true;
+}
- if (getLexer().isNot(AsmToken::Comma))
- return true;
- Parser.Lex();
+OperandMatchResultTy
+AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
+ using namespace llvm::AMDGPU::SendMsg;
- assert(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG);
- Operation.Id = ID_UNKNOWN_;
- if (getLexer().is(AsmToken::Identifier)) {
- Operation.IsSymbolic = true;
- const char* const *S = (Msg.Id == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic;
- const int F = (Msg.Id == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_;
- const int L = (Msg.Id == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_;
- const StringRef Tok = Parser.getTok().getString();
- for (int i = F; i < L; ++i) {
- if (Tok == S[i]) {
- Operation.Id = i;
- break;
- }
+ int64_t ImmVal = 0;
+ SMLoc Loc = getLoc();
+
+ // If parse failed, do not return error code
+ // to avoid excessive error messages.
+ if (trySkipId("sendmsg", AsmToken::LParen)) {
+ OperandInfoTy Msg(ID_UNKNOWN_);
+ OperandInfoTy Op(OP_NONE_);
+ OperandInfoTy Stream(STREAM_ID_NONE_);
+ if (parseSendMsgBody(Msg, Op, Stream) &&
+ validateSendMsg(Msg, Op, Stream, Loc)) {
+ ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
}
- Parser.Lex();
- } else {
- Operation.IsSymbolic = false;
- if (getLexer().isNot(AsmToken::Integer))
- return true;
- if (getParser().parseAbsoluteExpression(Operation.Id))
- return true;
+ } else if (parseExpr(ImmVal)) {
+ if (ImmVal < 0 || !isUInt<16>(ImmVal))
+ Error(Loc, "invalid immediate: only 16-bit values are legal");
}
- if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
- // Stream id is optional.
- if (getLexer().is(AsmToken::RParen)) {
- Parser.Lex();
- return false;
- }
-
- if (getLexer().isNot(AsmToken::Comma))
- return true;
- Parser.Lex();
-
- if (getLexer().isNot(AsmToken::Integer))
- return true;
- if (getParser().parseAbsoluteExpression(StreamId))
- return true;
- }
+ Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
+ return MatchOperand_Success;
+}
- if (getLexer().isNot(AsmToken::RParen))
- return true;
- Parser.Lex();
- return false;
+bool AMDGPUOperand::isSendMsg() const {
+ return isImmTy(ImmTySendMsg);
}
+//===----------------------------------------------------------------------===//
+// v_interp
+//===----------------------------------------------------------------------===//
+
OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
if (getLexer().getKind() != AsmToken::Identifier)
return MatchOperand_NoMatch;
@@ -4062,6 +5074,10 @@ OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
return MatchOperand_Success;
}
+//===----------------------------------------------------------------------===//
+// exp
+//===----------------------------------------------------------------------===//
+
void AMDGPUAsmParser::errorExpTgt() {
Error(Parser.getTok().getLoc(), "invalid exp target");
}
@@ -4094,13 +5110,18 @@ OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
if (Str.getAsInteger(10, Val))
return MatchOperand_ParseFail;
- if (Val > 3)
+ if (Val > 4 || (Val == 4 && !isGFX10()))
errorExpTgt();
Val += 12;
return MatchOperand_Success;
}
+ if (isGFX10() && Str == "prim") {
+ Val = 20;
+ return MatchOperand_Success;
+ }
+
if (Str.startswith("param")) {
Str = Str.drop_front(5);
if (Str.getAsInteger(10, Val))
@@ -4141,98 +5162,39 @@ OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
return MatchOperand_Success;
}
-OperandMatchResultTy
-AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
- using namespace llvm::AMDGPU::SendMsg;
-
- int64_t Imm16Val = 0;
- SMLoc S = Parser.getTok().getLoc();
+//===----------------------------------------------------------------------===//
+// parser helpers
+//===----------------------------------------------------------------------===//
- switch(getLexer().getKind()) {
- default:
- return MatchOperand_NoMatch;
- case AsmToken::Integer:
- // The operand can be an integer value.
- if (getParser().parseAbsoluteExpression(Imm16Val))
- return MatchOperand_NoMatch;
- if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) {
- Error(S, "invalid immediate: only 16-bit values are legal");
- // Do not return error code, but create an imm operand anyway and proceed
- // to the next operand, if any. That avoids unneccessary error messages.
- }
- break;
- case AsmToken::Identifier: {
- OperandInfoTy Msg(ID_UNKNOWN_);
- OperandInfoTy Operation(OP_UNKNOWN_);
- int64_t StreamId = STREAM_ID_DEFAULT_;
- if (parseSendMsgConstruct(Msg, Operation, StreamId))
- return MatchOperand_ParseFail;
- do {
- // Validate and encode message ID.
- if (! ((ID_INTERRUPT <= Msg.Id && Msg.Id <= ID_GS_DONE)
- || Msg.Id == ID_SYSMSG)) {
- if (Msg.IsSymbolic)
- Error(S, "invalid/unsupported symbolic name of message");
- else
- Error(S, "invalid/unsupported code of message");
- break;
- }
- Imm16Val = (Msg.Id << ID_SHIFT_);
- // Validate and encode operation ID.
- if (Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) {
- if (! (OP_GS_FIRST_ <= Operation.Id && Operation.Id < OP_GS_LAST_)) {
- if (Operation.IsSymbolic)
- Error(S, "invalid symbolic name of GS_OP");
- else
- Error(S, "invalid code of GS_OP: only 2-bit values are legal");
- break;
- }
- if (Operation.Id == OP_GS_NOP
- && Msg.Id != ID_GS_DONE) {
- Error(S, "invalid GS_OP: NOP is for GS_DONE only");
- break;
- }
- Imm16Val |= (Operation.Id << OP_SHIFT_);
- }
- if (Msg.Id == ID_SYSMSG) {
- if (! (OP_SYS_FIRST_ <= Operation.Id && Operation.Id < OP_SYS_LAST_)) {
- if (Operation.IsSymbolic)
- Error(S, "invalid/unsupported symbolic name of SYSMSG_OP");
- else
- Error(S, "invalid/unsupported code of SYSMSG_OP");
- break;
- }
- Imm16Val |= (Operation.Id << OP_SHIFT_);
- }
- // Validate and encode stream ID.
- if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
- if (! (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_)) {
- Error(S, "invalid stream id: only 2-bit values are legal");
- break;
- }
- Imm16Val |= (StreamId << STREAM_ID_SHIFT_);
- }
- } while (false);
- }
- break;
- }
- Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTySendMsg));
- return MatchOperand_Success;
+bool
+AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
+ return Token.is(AsmToken::Identifier) && Token.getString() == Id;
}
-bool AMDGPUOperand::isSendMsg() const {
- return isImmTy(ImmTySendMsg);
+bool
+AMDGPUAsmParser::isId(const StringRef Id) const {
+ return isId(getToken(), Id);
}
-//===----------------------------------------------------------------------===//
-// parser helpers
-//===----------------------------------------------------------------------===//
+bool
+AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
+ return getTokenKind() == Kind;
+}
bool
AMDGPUAsmParser::trySkipId(const StringRef Id) {
- if (getLexer().getKind() == AsmToken::Identifier &&
- Parser.getTok().getString() == Id) {
- Parser.Lex();
+ if (isId(Id)) {
+ lex();
+ return true;
+ }
+ return false;
+}
+
+bool
+AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
+ if (isId(Id) && peekToken().is(Kind)) {
+ lex();
+ lex();
return true;
}
return false;
@@ -4240,8 +5202,8 @@ AMDGPUAsmParser::trySkipId(const StringRef Id) {
bool
AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
- if (getLexer().getKind() == Kind) {
- Parser.Lex();
+ if (isToken(Kind)) {
+ lex();
return true;
}
return false;
@@ -4251,7 +5213,7 @@ bool
AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
const StringRef ErrMsg) {
if (!trySkipToken(Kind)) {
- Error(Parser.getTok().getLoc(), ErrMsg);
+ Error(getLoc(), ErrMsg);
return false;
}
return true;
@@ -4264,17 +5226,54 @@ AMDGPUAsmParser::parseExpr(int64_t &Imm) {
bool
AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
- SMLoc S = Parser.getTok().getLoc();
- if (getLexer().getKind() == AsmToken::String) {
- Val = Parser.getTok().getStringContents();
- Parser.Lex();
+ if (isToken(AsmToken::String)) {
+ Val = getToken().getStringContents();
+ lex();
return true;
} else {
- Error(S, ErrMsg);
+ Error(getLoc(), ErrMsg);
return false;
}
}
+AsmToken
+AMDGPUAsmParser::getToken() const {
+ return Parser.getTok();
+}
+
+AsmToken
+AMDGPUAsmParser::peekToken() {
+ return getLexer().peekTok();
+}
+
+void
+AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
+ auto TokCount = getLexer().peekTokens(Tokens);
+
+ for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
+ Tokens[Idx] = AsmToken(AsmToken::Error, "");
+}
+
+AsmToken::TokenKind
+AMDGPUAsmParser::getTokenKind() const {
+ return getLexer().getKind();
+}
+
+SMLoc
+AMDGPUAsmParser::getLoc() const {
+ return getToken().getLoc();
+}
+
+StringRef
+AMDGPUAsmParser::getTokenStr() const {
+ return getToken().getString();
+}
+
+void
+AMDGPUAsmParser::lex() {
+ Parser.Lex();
+}
+
//===----------------------------------------------------------------------===//
// swizzle
//===----------------------------------------------------------------------===//
@@ -4322,8 +5321,8 @@ AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
"expected a 2-bit lane id")) {
Imm = QUAD_PERM_ENC;
- for (auto i = 0; i < LANE_NUM; ++i) {
- Imm |= Lane[i] << (LANE_SHIFT * i);
+ for (unsigned I = 0; I < LANE_NUM; ++I) {
+ Imm |= Lane[I] << (LANE_SHIFT * I);
}
return true;
}
@@ -4519,6 +5518,88 @@ AMDGPUOperand::isSwizzle() const {
}
//===----------------------------------------------------------------------===//
+// VGPR Index Mode
+//===----------------------------------------------------------------------===//
+
+int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
+
+ using namespace llvm::AMDGPU::VGPRIndexMode;
+
+ if (trySkipToken(AsmToken::RParen)) {
+ return OFF;
+ }
+
+ int64_t Imm = 0;
+
+ while (true) {
+ unsigned Mode = 0;
+ SMLoc S = Parser.getTok().getLoc();
+
+ for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
+ if (trySkipId(IdSymbolic[ModeId])) {
+ Mode = 1 << ModeId;
+ break;
+ }
+ }
+
+ if (Mode == 0) {
+ Error(S, (Imm == 0)?
+ "expected a VGPR index mode or a closing parenthesis" :
+ "expected a VGPR index mode");
+ break;
+ }
+
+ if (Imm & Mode) {
+ Error(S, "duplicate VGPR index mode");
+ break;
+ }
+ Imm |= Mode;
+
+ if (trySkipToken(AsmToken::RParen))
+ break;
+ if (!skipToken(AsmToken::Comma,
+ "expected a comma or a closing parenthesis"))
+ break;
+ }
+
+ return Imm;
+}
+
+OperandMatchResultTy
+AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
+
+ int64_t Imm = 0;
+ SMLoc S = Parser.getTok().getLoc();
+
+ if (getLexer().getKind() == AsmToken::Identifier &&
+ Parser.getTok().getString() == "gpr_idx" &&
+ getLexer().peekTok().is(AsmToken::LParen)) {
+
+ Parser.Lex();
+ Parser.Lex();
+
+ // If parse failed, trigger an error but do not return error code
+ // to avoid excessive error messages.
+ Imm = parseGPRIdxMacro();
+
+ } else {
+ if (getParser().parseAbsoluteExpression(Imm))
+ return MatchOperand_NoMatch;
+ if (Imm < 0 || !isUInt<4>(Imm)) {
+ Error(S, "invalid immediate: only 4-bit values are legal");
+ }
+ }
+
+ Operands.push_back(
+ AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
+ return MatchOperand_Success;
+}
+
+bool AMDGPUOperand::isGPRIdxMode() const {
+ return isImmTy(ImmTyGprIdxMode);
+}
+
+//===----------------------------------------------------------------------===//
// sopp branch targets
//===----------------------------------------------------------------------===//
@@ -4546,9 +5627,22 @@ AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
}
//===----------------------------------------------------------------------===//
+// Boolean holding registers
+//===----------------------------------------------------------------------===//
+
+OperandMatchResultTy
+AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
+ return parseReg(Operands);
+}
+
+//===----------------------------------------------------------------------===//
// mubuf
//===----------------------------------------------------------------------===//
+AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const {
+ return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC);
+}
+
AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
}
@@ -4566,13 +5660,19 @@ void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
bool HasLdsModifier = false;
OptionalImmIndexMap OptionalIdx;
assert(IsAtomicReturn ? IsAtomic : true);
+ unsigned FirstOperandIdx = 1;
- for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
+ for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
// Add the register arguments
if (Op.isReg()) {
Op.addRegOperands(Inst, 1);
+ // Insert a tied src for atomic return dst.
+ // This cannot be postponed as subsequent calls to
+ // addImmOperands rely on correct number of MC operands.
+ if (IsAtomicReturn && i == FirstOperandIdx)
+ Op.addRegOperands(Inst, 1);
continue;
}
@@ -4582,7 +5682,7 @@ void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
continue;
}
- HasLdsModifier = Op.isLDS();
+ HasLdsModifier |= Op.isLDS();
// Handle tokens like 'offen' which are sometimes hard-coded into the
// asm string. There are no MCInst operands for these.
@@ -4610,12 +5710,6 @@ void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
}
}
- // Copy $vdata_in operand and insert as $vdata for MUBUF_Atomic RTN insns.
- if (IsAtomicReturn) {
- MCInst::iterator I = Inst.begin(); // $vdata_in is always at the beginning.
- Inst.insert(I, *I);
- }
-
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
if (!IsAtomic) { // glc is hard-coded.
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
@@ -4625,6 +5719,9 @@ void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
}
+
+ if (isGFX10())
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
}
void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
@@ -4662,6 +5759,9 @@ void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
+
+ if (isGFX10())
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
}
//===----------------------------------------------------------------------===//
@@ -4692,19 +5792,26 @@ void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
Op.addRegOperands(Inst, 1);
} else if (Op.isImmModifier()) {
OptionalIdx[Op.getImmTy()] = I;
- } else {
+ } else if (!Op.isToken()) {
llvm_unreachable("unexpected operand type");
}
}
+ bool IsGFX10 = isGFX10();
+
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
+ if (IsGFX10)
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
+ if (IsGFX10)
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
+ if (!IsGFX10)
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
}
@@ -4742,11 +5849,7 @@ AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
}
-AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetU12() const {
- return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
-}
-
-AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetS13() const {
+AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
}
@@ -4801,7 +5904,8 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = {
{"lds", AMDGPUOperand::ImmTyLDS, true, nullptr},
{"offset", AMDGPUOperand::ImmTyOffset, false, nullptr},
{"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
- {"dfmt", AMDGPUOperand::ImmTyFORMAT, false, nullptr},
+ {"dlc", AMDGPUOperand::ImmTyDLC, true, nullptr},
+ {"format", AMDGPUOperand::ImmTyFORMAT, false, nullptr},
{"glc", AMDGPUOperand::ImmTyGLC, true, nullptr},
{"slc", AMDGPUOperand::ImmTySLC, true, nullptr},
{"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr},
@@ -4816,9 +5920,11 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = {
{"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr},
{"d16", AMDGPUOperand::ImmTyD16, true, nullptr},
{"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr},
+ {"dim", AMDGPUOperand::ImmTyDim, false, nullptr},
{"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
{"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
{"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
+ {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr},
{"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
{"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
{"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
@@ -4828,7 +5934,10 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = {
{"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
{"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
{"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
- {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}
+ {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
+ {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
+ {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
+ {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
};
OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
@@ -4884,7 +5993,9 @@ OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands)
Op.Type == AMDGPUOperand::ImmTyNegHi) {
res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
Op.ConvertResult);
- } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT) {
+ } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
+ res = parseDim(Operands);
+ } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT && !isGFX10()) {
res = parseDfmtNfmt(Operands);
} else {
res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
@@ -4964,7 +6075,7 @@ void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
} else if (Op.isInterpSlot() ||
Op.isInterpAttr() ||
Op.isAttrChan()) {
- Inst.addOperand(MCOperand::createImm(Op.Imm.Val));
+ Inst.addOperand(MCOperand::createImm(Op.getImm()));
} else if (Op.isImmModifier()) {
OptionalIdx[Op.getImmTy()] = I;
} else {
@@ -5029,14 +6140,17 @@ void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
}
- // Special case v_mac_{f16, f32} and v_fmac_f32 (gfx906):
+ // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
// it has src2 register operand that is tied to dst operand
// we don't allow modifiers for this operand in assembler so src2_modifiers
// should be 0.
- if (Opc == AMDGPU::V_MAC_F32_e64_si ||
+ if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
+ Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
Opc == AMDGPU::V_MAC_F32_e64_vi ||
Opc == AMDGPU::V_MAC_F16_e64_vi ||
- Opc == AMDGPU::V_FMAC_F32_e64_vi) {
+ Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
+ Opc == AMDGPU::V_FMAC_F32_e64_vi ||
+ Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
auto it = Inst.begin();
std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
@@ -5137,6 +6251,10 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
// dpp
//===----------------------------------------------------------------------===//
+bool AMDGPUOperand::isDPP8() const {
+ return isImmTy(ImmTyDPP8);
+}
+
bool AMDGPUOperand::isDPPCtrl() const {
using namespace AMDGPU::DPP;
@@ -5154,13 +6272,27 @@ bool AMDGPUOperand::isDPPCtrl() const {
(Imm == DppCtrl::ROW_MIRROR) ||
(Imm == DppCtrl::ROW_HALF_MIRROR) ||
(Imm == DppCtrl::BCAST15) ||
- (Imm == DppCtrl::BCAST31);
+ (Imm == DppCtrl::BCAST31) ||
+ (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
+ (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
}
return false;
}
-bool AMDGPUOperand::isGPRIdxMode() const {
- return isImm() && isUInt<4>(getImm());
+//===----------------------------------------------------------------------===//
+// mAI
+//===----------------------------------------------------------------------===//
+
+bool AMDGPUOperand::isBLGP() const {
+ return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
+}
+
+bool AMDGPUOperand::isCBSZ() const {
+ return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
+}
+
+bool AMDGPUOperand::isABID() const {
+ return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
}
bool AMDGPUOperand::isS16Imm() const {
@@ -5171,6 +6303,108 @@ bool AMDGPUOperand::isU16Imm() const {
return isImm() && isUInt<16>(getImm());
}
+OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
+ if (!isGFX10())
+ return MatchOperand_NoMatch;
+
+ SMLoc S = Parser.getTok().getLoc();
+
+ if (getLexer().isNot(AsmToken::Identifier))
+ return MatchOperand_NoMatch;
+ if (getLexer().getTok().getString() != "dim")
+ return MatchOperand_NoMatch;
+
+ Parser.Lex();
+ if (getLexer().isNot(AsmToken::Colon))
+ return MatchOperand_ParseFail;
+
+ Parser.Lex();
+
+ // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an
+ // integer.
+ std::string Token;
+ if (getLexer().is(AsmToken::Integer)) {
+ SMLoc Loc = getLexer().getTok().getEndLoc();
+ Token = getLexer().getTok().getString();
+ Parser.Lex();
+ if (getLexer().getTok().getLoc() != Loc)
+ return MatchOperand_ParseFail;
+ }
+ if (getLexer().isNot(AsmToken::Identifier))
+ return MatchOperand_ParseFail;
+ Token += getLexer().getTok().getString();
+
+ StringRef DimId = Token;
+ if (DimId.startswith("SQ_RSRC_IMG_"))
+ DimId = DimId.substr(12);
+
+ const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
+ if (!DimInfo)
+ return MatchOperand_ParseFail;
+
+ Parser.Lex();
+
+ Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S,
+ AMDGPUOperand::ImmTyDim));
+ return MatchOperand_Success;
+}
+
+OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+ StringRef Prefix;
+
+ if (getLexer().getKind() == AsmToken::Identifier) {
+ Prefix = Parser.getTok().getString();
+ } else {
+ return MatchOperand_NoMatch;
+ }
+
+ if (Prefix != "dpp8")
+ return parseDPPCtrl(Operands);
+ if (!isGFX10())
+ return MatchOperand_NoMatch;
+
+ // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
+
+ int64_t Sels[8];
+
+ Parser.Lex();
+ if (getLexer().isNot(AsmToken::Colon))
+ return MatchOperand_ParseFail;
+
+ Parser.Lex();
+ if (getLexer().isNot(AsmToken::LBrac))
+ return MatchOperand_ParseFail;
+
+ Parser.Lex();
+ if (getParser().parseAbsoluteExpression(Sels[0]))
+ return MatchOperand_ParseFail;
+ if (0 > Sels[0] || 7 < Sels[0])
+ return MatchOperand_ParseFail;
+
+ for (size_t i = 1; i < 8; ++i) {
+ if (getLexer().isNot(AsmToken::Comma))
+ return MatchOperand_ParseFail;
+
+ Parser.Lex();
+ if (getParser().parseAbsoluteExpression(Sels[i]))
+ return MatchOperand_ParseFail;
+ if (0 > Sels[i] || 7 < Sels[i])
+ return MatchOperand_ParseFail;
+ }
+
+ if (getLexer().isNot(AsmToken::RBrac))
+ return MatchOperand_ParseFail;
+ Parser.Lex();
+
+ unsigned DPP8 = 0;
+ for (size_t i = 0; i < 8; ++i)
+ DPP8 |= (Sels[i] << (i * 3));
+
+ Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
+ return MatchOperand_Success;
+}
+
OperandMatchResultTy
AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
using namespace AMDGPU::DPP;
@@ -5201,10 +6435,21 @@ AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
&& Prefix != "wave_rol"
&& Prefix != "wave_shr"
&& Prefix != "wave_ror"
- && Prefix != "row_bcast") {
+ && Prefix != "row_bcast"
+ && Prefix != "row_share"
+ && Prefix != "row_xmask") {
return MatchOperand_NoMatch;
}
+ if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask"))
+ return MatchOperand_NoMatch;
+
+ if (!isVI() && !isGFX9() &&
+ (Prefix == "wave_shl" || Prefix == "wave_shr" ||
+ Prefix == "wave_rol" || Prefix == "wave_ror" ||
+ Prefix == "row_bcast"))
+ return MatchOperand_NoMatch;
+
Parser.Lex();
if (getLexer().isNot(AsmToken::Colon))
return MatchOperand_ParseFail;
@@ -5262,6 +6507,10 @@ AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
} else {
return MatchOperand_ParseFail;
}
+ } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) {
+ Int |= DppCtrl::ROW_SHARE_FIRST;
+ } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) {
+ Int |= DppCtrl::ROW_XMASK_FIRST;
} else {
return MatchOperand_ParseFail;
}
@@ -5276,6 +6525,10 @@ AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
}
+AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
+ return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
+}
+
AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
}
@@ -5284,7 +6537,11 @@ AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
}
-void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) {
+AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
+ return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
+}
+
+void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
OptionalImmIndexMap OptionalIdx;
unsigned I = 1;
@@ -5293,6 +6550,7 @@ void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) {
((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
}
+ int Fi = 0;
for (unsigned E = Operands.size(); I != E; ++I) {
auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
MCOI::TIED_TO);
@@ -5303,25 +6561,49 @@ void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) {
}
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
// Add the register arguments
- if (Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) {
+ if (Op.isReg() && validateVccOperand(Op.getReg())) {
// VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
// Skip it.
continue;
- } if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
- Op.addRegWithFPInputModsOperands(Inst, 2);
- } else if (Op.isDPPCtrl()) {
- Op.addImmOperands(Inst, 1);
- } else if (Op.isImm()) {
- // Handle optional arguments
- OptionalIdx[Op.getImmTy()] = I;
+ }
+
+ if (IsDPP8) {
+ if (Op.isDPP8()) {
+ Op.addImmOperands(Inst, 1);
+ } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
+ Op.addRegWithFPInputModsOperands(Inst, 2);
+ } else if (Op.isFI()) {
+ Fi = Op.getImm();
+ } else if (Op.isReg()) {
+ Op.addRegOperands(Inst, 1);
+ } else {
+ llvm_unreachable("Invalid operand type");
+ }
} else {
- llvm_unreachable("Invalid operand type");
+ if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
+ Op.addRegWithFPInputModsOperands(Inst, 2);
+ } else if (Op.isDPPCtrl()) {
+ Op.addImmOperands(Inst, 1);
+ } else if (Op.isImm()) {
+ // Handle optional arguments
+ OptionalIdx[Op.getImmTy()] = I;
+ } else {
+ llvm_unreachable("Invalid operand type");
+ }
}
}
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
+ if (IsDPP8) {
+ using namespace llvm::AMDGPU::DPP;
+ Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
+ } else {
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
+ if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
+ }
+ }
}
//===----------------------------------------------------------------------===//
@@ -5422,7 +6704,8 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
for (unsigned E = Operands.size(); I != E; ++I) {
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
- if (skipVcc && !skippedVcc && Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) {
+ if (skipVcc && !skippedVcc && Op.isReg() &&
+ (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
// VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
// Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
// or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
@@ -5448,7 +6731,8 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
skippedVcc = false;
}
- if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
+ if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
+ Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
// v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
switch (BasicInstType) {
@@ -5474,7 +6758,8 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
break;
case SIInstrFlags::VOPC:
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
+ if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
break;
@@ -5495,6 +6780,22 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
}
}
+//===----------------------------------------------------------------------===//
+// mAI
+//===----------------------------------------------------------------------===//
+
+AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
+ return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
+}
+
+AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
+ return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
+}
+
+AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
+ return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
+}
+
/// Force static initialization.
extern "C" void LLVMInitializeAMDGPUAsmParser() {
RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
@@ -5552,3 +6853,28 @@ unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
return Match_InvalidOperand;
}
}
+
+//===----------------------------------------------------------------------===//
+// endpgm
+//===----------------------------------------------------------------------===//
+
+OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+ int64_t Imm = 0;
+
+ if (!parseExpr(Imm)) {
+ // The operand is optional, if not present default to 0
+ Imm = 0;
+ }
+
+ if (!isUInt<16>(Imm)) {
+ Error(S, "expected a 16-bit value");
+ return MatchOperand_ParseFail;
+ }
+
+ Operands.push_back(
+ AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
+ return MatchOperand_Success;
+}
+
+bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
diff --git a/lib/Target/AMDGPU/BUFInstructions.td b/lib/Target/AMDGPU/BUFInstructions.td
index 51c2abeac2ff..62a19d848af2 100644
--- a/lib/Target/AMDGPU/BUFInstructions.td
+++ b/lib/Target/AMDGPU/BUFInstructions.td
@@ -1,37 +1,22 @@
//===-- BUFInstructions.td - Buffer Instruction Defintions ----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
def MUBUFAddr32 : ComplexPattern<i64, 9, "SelectMUBUFAddr32">;
-def MUBUFAddr64 : ComplexPattern<i64, 7, "SelectMUBUFAddr64">;
+def MUBUFAddr64 : ComplexPattern<i64, 8, "SelectMUBUFAddr64">;
def MUBUFAddr64Atomic : ComplexPattern<i64, 5, "SelectMUBUFAddr64">;
def MUBUFScratchOffen : ComplexPattern<i64, 4, "SelectMUBUFScratchOffen", [], [SDNPWantParent]>;
def MUBUFScratchOffset : ComplexPattern<i64, 3, "SelectMUBUFScratchOffset", [], [SDNPWantParent], 20>;
-def MUBUFOffset : ComplexPattern<i64, 6, "SelectMUBUFOffset">;
+def MUBUFOffset : ComplexPattern<i64, 7, "SelectMUBUFOffset">;
def MUBUFOffsetNoGLC : ComplexPattern<i64, 3, "SelectMUBUFOffset">;
def MUBUFOffsetAtomic : ComplexPattern<i64, 4, "SelectMUBUFOffset">;
-class MubufLoad <SDPatternOperator op> : PatFrag <
- (ops node:$ptr), (op node:$ptr), [{
- auto const AS = cast<MemSDNode>(N)->getAddressSpace();
- return AS == AMDGPUAS::GLOBAL_ADDRESS ||
- AS == AMDGPUAS::CONSTANT_ADDRESS;
-}]>;
-
-def mubuf_load : MubufLoad <load>;
-def mubuf_az_extloadi8 : MubufLoad <az_extloadi8>;
-def mubuf_sextloadi8 : MubufLoad <sextloadi8>;
-def mubuf_az_extloadi16 : MubufLoad <az_extloadi16>;
-def mubuf_sextloadi16 : MubufLoad <sextloadi16>;
-def mubuf_load_atomic : MubufLoad <atomic_load>;
-
def BUFAddrKind {
int Offset = 0;
int OffEn = 1;
@@ -97,7 +82,9 @@ class MTBUF_Pseudo <string opName, dag outs, dag ins,
bits<1> has_vdata = 1;
bits<1> has_vaddr = 1;
bits<1> has_glc = 1;
+ bits<1> has_dlc = 1;
bits<1> glc_value = 0; // the value for glc if no such operand
+ bits<1> dlc_value = 0; // the value for dlc if no such operand
bits<1> has_srsrc = 1;
bits<1> has_soffset = 1;
bits<1> has_offset = 1;
@@ -120,6 +107,7 @@ class MTBUF_Real <MTBUF_Pseudo ps> :
bits<12> offset;
bits<1> glc;
+ bits<1> dlc;
bits<7> format;
bits<8> vaddr;
bits<8> vdata;
@@ -138,17 +126,17 @@ class getMTBUFInsDA<list<RegisterClass> vdataList,
RegisterClass vaddrClass = !if(!empty(vaddrList), ?, !head(vaddrList));
dag InsNoData = !if(!empty(vaddrList),
(ins SReg_128:$srsrc, SCSrc_b32:$soffset,
- offset:$offset, FORMAT:$format, GLC:$glc, SLC:$slc, TFE:$tfe),
+ offset:$offset, FORMAT:$format, GLC:$glc, SLC:$slc, TFE:$tfe, DLC:$dlc),
(ins vaddrClass:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset,
- offset:$offset, FORMAT:$format, GLC:$glc, SLC:$slc, TFE:$tfe)
+ offset:$offset, FORMAT:$format, GLC:$glc, SLC:$slc, TFE:$tfe, DLC:$dlc)
);
dag InsData = !if(!empty(vaddrList),
(ins vdataClass:$vdata, SReg_128:$srsrc,
SCSrc_b32:$soffset, offset:$offset, FORMAT:$format, GLC:$glc,
- SLC:$slc, TFE:$tfe),
+ SLC:$slc, TFE:$tfe, DLC:$dlc),
(ins vdataClass:$vdata, vaddrClass:$vaddr, SReg_128:$srsrc,
SCSrc_b32:$soffset, offset:$offset, FORMAT:$format, GLC:$glc,
- SLC:$slc, TFE:$tfe)
+ SLC:$slc, TFE:$tfe, DLC:$dlc)
);
dag ret = !if(!empty(vdataList), InsNoData, InsData);
}
@@ -199,7 +187,7 @@ class MTBUF_Load_Pseudo <string opName,
: MTBUF_Pseudo<opName,
(outs vdataClass:$vdata),
getMTBUFIns<addrKindCopy>.ret,
- " $vdata, " # getMTBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe",
+ " $vdata, " # getMTBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe$dlc",
pattern>,
MTBUF_SetupAddr<addrKindCopy> {
let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret;
@@ -214,13 +202,13 @@ multiclass MTBUF_Pseudo_Loads<string opName, RegisterClass vdataClass,
def _OFFSET : MTBUF_Load_Pseudo <opName, BUFAddrKind.Offset, vdataClass,
[(set load_vt:$vdata,
(ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, i8:$format,
- i1:$glc, i1:$slc, i1:$tfe)))]>,
+ i1:$glc, i1:$slc, i1:$tfe, i1:$dlc)))]>,
MTBUFAddr64Table<0, NAME>;
def _ADDR64 : MTBUF_Load_Pseudo <opName, BUFAddrKind.Addr64, vdataClass,
[(set load_vt:$vdata,
(ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset,
- i8:$format, i1:$glc, i1:$slc, i1:$tfe)))]>,
+ i8:$format, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc)))]>,
MTBUFAddr64Table<1, NAME>;
def _OFFEN : MTBUF_Load_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>;
@@ -245,7 +233,7 @@ class MTBUF_Store_Pseudo <string opName,
: MTBUF_Pseudo<opName,
(outs),
getMTBUFIns<addrKindCopy, [vdataClassCopy]>.ret,
- " $vdata, " # getMTBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe",
+ " $vdata, " # getMTBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe$dlc",
pattern>,
MTBUF_SetupAddr<addrKindCopy> {
let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret;
@@ -260,13 +248,13 @@ multiclass MTBUF_Pseudo_Stores<string opName, RegisterClass vdataClass,
def _OFFSET : MTBUF_Store_Pseudo <opName, BUFAddrKind.Offset, vdataClass,
[(st store_vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset,
i16:$offset, i8:$format, i1:$glc,
- i1:$slc, i1:$tfe))]>,
+ i1:$slc, i1:$tfe, i1:$dlc))]>,
MTBUFAddr64Table<0, NAME>;
def _ADDR64 : MTBUF_Store_Pseudo <opName, BUFAddrKind.Addr64, vdataClass,
[(st store_vt:$vdata, (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
i16:$offset, i8:$format, i1:$glc,
- i1:$slc, i1:$tfe))]>,
+ i1:$slc, i1:$tfe, i1:$dlc))]>,
MTBUFAddr64Table<1, NAME>;
def _OFFEN : MTBUF_Store_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>;
@@ -324,7 +312,9 @@ class MUBUF_Pseudo <string opName, dag outs, dag ins,
bits<1> has_vdata = 1;
bits<1> has_vaddr = 1;
bits<1> has_glc = 1;
+ bits<1> has_dlc = 1;
bits<1> glc_value = 0; // the value for glc if no such operand
+ bits<1> dlc_value = 0; // the value for dlc if no such operand
bits<1> has_srsrc = 1;
bits<1> has_soffset = 1;
bits<1> has_offset = 1;
@@ -333,7 +323,7 @@ class MUBUF_Pseudo <string opName, dag outs, dag ins,
bits<4> dwords = 0;
}
-class MUBUF_Real <bits<7> op, MUBUF_Pseudo ps> :
+class MUBUF_Real <MUBUF_Pseudo ps> :
InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []> {
let isPseudo = 0;
@@ -348,6 +338,7 @@ class MUBUF_Real <bits<7> op, MUBUF_Pseudo ps> :
bits<12> offset;
bits<1> glc;
+ bits<1> dlc;
bits<8> vaddr;
bits<8> vdata;
bits<7> srsrc;
@@ -358,7 +349,7 @@ class MUBUF_Real <bits<7> op, MUBUF_Pseudo ps> :
// For cache invalidation instructions.
-class MUBUF_Invalidate <string opName, SDPatternOperator node> :
+class MUBUF_Invalidate <string opName, SDPatternOperator node = null_frag> :
MUBUF_Pseudo<opName, (outs), (ins), "", [(node)]> {
let AsmMatchConverter = "";
@@ -373,7 +364,9 @@ class MUBUF_Invalidate <string opName, SDPatternOperator node> :
let has_vdata = 0;
let has_vaddr = 0;
let has_glc = 0;
+ let has_dlc = 0;
let glc_value = 0;
+ let dlc_value = 0;
let has_srsrc = 0;
let has_soffset = 0;
let has_offset = 0;
@@ -400,7 +393,7 @@ class getMUBUFInsDA<list<RegisterClass> vdataList,
);
dag ret = !con(
!if(!empty(vdataList), InsNoData, InsData),
- !if(isLds, (ins), (ins TFE:$tfe))
+ !if(isLds, (ins DLC:$dlc), (ins TFE:$tfe, DLC:$dlc))
);
}
@@ -460,7 +453,7 @@ class MUBUF_Load_Pseudo <string opName,
!con(getMUBUFIns<addrKindCopy, [], isLds>.ret,
!if(HasTiedDest, (ins vdataClass:$vdata_in), (ins))),
" $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$glc$slc" #
- !if(isLds, " lds", "$tfe"),
+ !if(isLds, " lds", "$tfe") # "$dlc",
pattern>,
MUBUF_SetupAddr<addrKindCopy> {
let PseudoInstr = opName # !if(isLds, "_lds", "") #
@@ -477,6 +470,24 @@ class MUBUF_Load_Pseudo <string opName,
let dwords = getMUBUFDwords<vdataClass>.ret;
}
+class MUBUF_Offset_Load_Pat <Instruction inst, ValueType load_vt = i32, SDPatternOperator ld = null_frag> : Pat <
+ (load_vt (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))),
+ (load_vt (inst v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))
+>;
+
+class MUBUF_Addr64_Load_Pat <Instruction inst,
+ ValueType load_vt = i32,
+ SDPatternOperator ld = null_frag> : Pat <
+ (load_vt (ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))),
+ (load_vt (inst i64:$vaddr, v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))
+>;
+
+multiclass MUBUF_Pseudo_Load_Pats<string BaseInst, ValueType load_vt = i32, SDPatternOperator ld = null_frag> {
+ def : MUBUF_Offset_Load_Pat<!cast<Instruction>(BaseInst#"_OFFSET"), load_vt, ld>;
+ def : MUBUF_Addr64_Load_Pat<!cast<Instruction>(BaseInst#"_ADDR64"), load_vt, ld>;
+}
+
+
// FIXME: tfe can't be an operand because it requires a separate
// opcode because it needs an N+1 register class dest register.
multiclass MUBUF_Pseudo_Loads<string opName, RegisterClass vdataClass,
@@ -485,20 +496,10 @@ multiclass MUBUF_Pseudo_Loads<string opName, RegisterClass vdataClass,
bit TiedDest = 0,
bit isLds = 0> {
- def _OFFSET : MUBUF_Load_Pseudo <opName, BUFAddrKind.Offset, vdataClass,
- TiedDest, isLds,
- !if(isLds,
- [],
- [(set load_vt:$vdata,
- (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe)))])>,
+ def _OFFSET : MUBUF_Load_Pseudo <opName, BUFAddrKind.Offset, vdataClass, TiedDest, isLds>,
MUBUFAddr64Table<0, NAME # !if(isLds, "_LDS", "")>;
- def _ADDR64 : MUBUF_Load_Pseudo <opName, BUFAddrKind.Addr64, vdataClass,
- TiedDest, isLds,
- !if(isLds,
- [],
- [(set load_vt:$vdata,
- (ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe)))])>,
+ def _ADDR64 : MUBUF_Load_Pseudo <opName, BUFAddrKind.Addr64, vdataClass, TiedDest, isLds>,
MUBUFAddr64Table<1, NAME # !if(isLds, "_LDS", "")>;
def _OFFEN : MUBUF_Load_Pseudo <opName, BUFAddrKind.OffEn, vdataClass, TiedDest, isLds>;
@@ -531,7 +532,7 @@ class MUBUF_Store_Pseudo <string opName,
: MUBUF_Pseudo<opName,
(outs),
getMUBUFIns<addrKindCopy, [vdataClassCopy]>.ret,
- " $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe",
+ " $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe$dlc",
pattern>,
MUBUF_SetupAddr<addrKindCopy> {
let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret;
@@ -547,12 +548,12 @@ multiclass MUBUF_Pseudo_Stores<string opName, RegisterClass vdataClass,
def _OFFSET : MUBUF_Store_Pseudo <opName, BUFAddrKind.Offset, vdataClass,
[(st store_vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset,
- i16:$offset, i1:$glc, i1:$slc, i1:$tfe))]>,
+ i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))]>,
MUBUFAddr64Table<0, NAME>;
def _ADDR64 : MUBUF_Store_Pseudo <opName, BUFAddrKind.Addr64, vdataClass,
[(st store_vt:$vdata, (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
- i16:$offset, i1:$glc, i1:$slc, i1:$tfe))]>,
+ i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))]>,
MUBUFAddr64Table<1, NAME>;
def _OFFEN : MUBUF_Store_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>;
@@ -638,6 +639,7 @@ class MUBUF_Atomic_Pseudo<string opName,
let hasSideEffects = 1;
let DisableWQM = 1;
let has_glc = 0;
+ let has_dlc = 0;
let has_tfe = 0;
let maybeAtomic = 1;
}
@@ -656,6 +658,7 @@ class MUBUF_AtomicNoRet_Pseudo<string opName, int addrKind,
AtomicNoRet<opName # "_" # getAddrName<addrKindCopy>.ret, 0> {
let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret;
let glc_value = 0;
+ let dlc_value = 0;
let AsmMatchConverter = "cvtMubufAtomic";
}
@@ -673,6 +676,7 @@ class MUBUF_AtomicRet_Pseudo<string opName, int addrKind,
AtomicNoRet<opName # "_" # getAddrName<addrKindCopy>.ret, 1> {
let PseudoInstr = opName # "_rtn_" # getAddrName<addrKindCopy>.ret;
let glc_value = 1;
+ let dlc_value = 0;
let Constraints = "$vdata = $vdata_in";
let DisableEncoding = "$vdata_in";
let AsmMatchConverter = "cvtMubufAtomicReturn";
@@ -681,34 +685,53 @@ class MUBUF_AtomicRet_Pseudo<string opName, int addrKind,
multiclass MUBUF_Pseudo_Atomics_NO_RTN <string opName,
RegisterClass vdataClass,
ValueType vdataType,
- SDPatternOperator atomic> {
+ SDPatternOperator atomic,
+ bit isFP = getIsFP<vdataType>.ret> {
+ let FPAtomic = isFP in
def _OFFSET : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.Offset, vdataClass>,
MUBUFAddr64Table <0, NAME>;
+
+ let FPAtomic = isFP in
def _ADDR64 : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.Addr64, vdataClass>,
MUBUFAddr64Table <1, NAME>;
+
+ let FPAtomic = isFP in
def _OFFEN : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>;
+
+ let FPAtomic = isFP in
+
def _IDXEN : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass>;
+
+ let FPAtomic = isFP in
def _BOTHEN : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.BothEn, vdataClass>;
}
multiclass MUBUF_Pseudo_Atomics_RTN <string opName,
RegisterClass vdataClass,
ValueType vdataType,
- SDPatternOperator atomic> {
+ SDPatternOperator atomic,
+ bit isFP = getIsFP<vdataType>.ret> {
+ let FPAtomic = isFP in
def _OFFSET_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.Offset, vdataClass,
[(set vdataType:$vdata,
(atomic (MUBUFOffsetAtomic v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$slc),
vdataType:$vdata_in))]>,
MUBUFAddr64Table <0, NAME # "_RTN">;
+ let FPAtomic = isFP in
def _ADDR64_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.Addr64, vdataClass,
[(set vdataType:$vdata,
(atomic (MUBUFAddr64Atomic v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, i1:$slc),
vdataType:$vdata_in))]>,
MUBUFAddr64Table <1, NAME # "_RTN">;
+ let FPAtomic = isFP in
def _OFFEN_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>;
+
+ let FPAtomic = isFP in
def _IDXEN_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass>;
+
+ let FPAtomic = isFP in
def _BOTHEN_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.BothEn, vdataClass>;
}
@@ -804,34 +827,45 @@ let SubtargetPredicate = HasPackedD16VMem, D16Buf = 1 in {
} // End HasPackedD16VMem.
defm BUFFER_LOAD_UBYTE : MUBUF_Pseudo_Loads_Lds <
- "buffer_load_ubyte", VGPR_32, i32, mubuf_az_extloadi8
+ "buffer_load_ubyte", VGPR_32, i32
>;
defm BUFFER_LOAD_SBYTE : MUBUF_Pseudo_Loads_Lds <
- "buffer_load_sbyte", VGPR_32, i32, mubuf_sextloadi8
+ "buffer_load_sbyte", VGPR_32, i32
>;
defm BUFFER_LOAD_USHORT : MUBUF_Pseudo_Loads_Lds <
- "buffer_load_ushort", VGPR_32, i32, mubuf_az_extloadi16
+ "buffer_load_ushort", VGPR_32, i32
>;
defm BUFFER_LOAD_SSHORT : MUBUF_Pseudo_Loads_Lds <
- "buffer_load_sshort", VGPR_32, i32, mubuf_sextloadi16
+ "buffer_load_sshort", VGPR_32, i32
>;
defm BUFFER_LOAD_DWORD : MUBUF_Pseudo_Loads_Lds <
- "buffer_load_dword", VGPR_32, i32, mubuf_load
+ "buffer_load_dword", VGPR_32, i32
>;
defm BUFFER_LOAD_DWORDX2 : MUBUF_Pseudo_Loads <
- "buffer_load_dwordx2", VReg_64, v2i32, mubuf_load
+ "buffer_load_dwordx2", VReg_64, v2i32
>;
defm BUFFER_LOAD_DWORDX3 : MUBUF_Pseudo_Loads <
- "buffer_load_dwordx3", VReg_96, untyped, mubuf_load
+ "buffer_load_dwordx3", VReg_96, v3i32
>;
defm BUFFER_LOAD_DWORDX4 : MUBUF_Pseudo_Loads <
- "buffer_load_dwordx4", VReg_128, v4i32, mubuf_load
+ "buffer_load_dwordx4", VReg_128, v4i32
>;
+defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_UBYTE", i32, extloadi8_global>;
+defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_UBYTE", i32, zextloadi8_global>;
+defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_SBYTE", i32, sextloadi8_global>;
+defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_USHORT", i32, extloadi16_global>;
+defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_USHORT", i32, zextloadi16_global>;
+defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_SSHORT", i32, sextloadi16_global>;
+defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_DWORD", i32, load_global>;
+defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_DWORDX2", v2i32, load_global>;
+defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_DWORDX3", v3i32, load_global>;
+defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_DWORDX4", v4i32, load_global>;
+
// This is not described in AMD documentation,
// but 'lds' versions of these opcodes are available
// in at least GFX8+ chips. See Bug 37653.
-let SubtargetPredicate = isVI in {
+let SubtargetPredicate = isGFX8GFX9 in {
defm BUFFER_LOAD_DWORDX2_LDS : MUBUF_Pseudo_Loads <
"buffer_load_dwordx2", VReg_64, v2i32, null_frag, 0, 1
>;
@@ -856,7 +890,7 @@ defm BUFFER_STORE_DWORDX2 : MUBUF_Pseudo_Stores <
"buffer_store_dwordx2", VReg_64, v2i32, store_global
>;
defm BUFFER_STORE_DWORDX3 : MUBUF_Pseudo_Stores <
- "buffer_store_dwordx3", VReg_96, untyped, store_global
+ "buffer_store_dwordx3", VReg_96, v3i32, store_global
>;
defm BUFFER_STORE_DWORDX4 : MUBUF_Pseudo_Stores <
"buffer_store_dwordx4", VReg_128, v4i32, store_global
@@ -940,11 +974,11 @@ defm BUFFER_ATOMIC_DEC_X2 : MUBUF_Pseudo_Atomics <
"buffer_atomic_dec_x2", VReg_64, i64, atomic_dec_global
>;
-let SubtargetPredicate = isVI in {
+let SubtargetPredicate = isGFX8GFX9 in {
def BUFFER_STORE_LDS_DWORD : MUBUF_Pseudo_Store_Lds <"buffer_store_lds_dword">;
}
-let SubtargetPredicate = isSI in { // isn't on CI & VI
+let SubtargetPredicate = isGFX6 in { // isn't on CI & VI
/*
defm BUFFER_ATOMIC_RSUB : MUBUF_Pseudo_Atomics <"buffer_atomic_rsub">;
defm BUFFER_ATOMIC_FCMPSWAP : MUBUF_Pseudo_Atomics <"buffer_atomic_fcmpswap">;
@@ -1006,17 +1040,28 @@ defm BUFFER_STORE_FORMAT_D16_HI_X : MUBUF_Pseudo_Stores <
def BUFFER_WBINVL1 : MUBUF_Invalidate <"buffer_wbinvl1",
int_amdgcn_buffer_wbinvl1>;
+let SubtargetPredicate = HasAtomicFaddInsts in {
+
+defm BUFFER_ATOMIC_ADD_F32 : MUBUF_Pseudo_Atomics_NO_RTN <
+ "buffer_atomic_add_f32", VGPR_32, f32, atomic_add_global
+>;
+defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Pseudo_Atomics_NO_RTN <
+ "buffer_atomic_pk_add_f16", VGPR_32, v2f16, atomic_add_global
+>;
+
+} // End SubtargetPredicate = HasAtomicFaddInsts
+
//===----------------------------------------------------------------------===//
// MTBUF Instructions
//===----------------------------------------------------------------------===//
defm TBUFFER_LOAD_FORMAT_X : MTBUF_Pseudo_Loads <"tbuffer_load_format_x", VGPR_32>;
defm TBUFFER_LOAD_FORMAT_XY : MTBUF_Pseudo_Loads <"tbuffer_load_format_xy", VReg_64>;
-defm TBUFFER_LOAD_FORMAT_XYZ : MTBUF_Pseudo_Loads <"tbuffer_load_format_xyz", VReg_128>;
+defm TBUFFER_LOAD_FORMAT_XYZ : MTBUF_Pseudo_Loads <"tbuffer_load_format_xyz", VReg_96>;
defm TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Pseudo_Loads <"tbuffer_load_format_xyzw", VReg_128>;
defm TBUFFER_STORE_FORMAT_X : MTBUF_Pseudo_Stores <"tbuffer_store_format_x", VGPR_32>;
defm TBUFFER_STORE_FORMAT_XY : MTBUF_Pseudo_Stores <"tbuffer_store_format_xy", VReg_64>;
-defm TBUFFER_STORE_FORMAT_XYZ : MTBUF_Pseudo_Stores <"tbuffer_store_format_xyz", VReg_128>;
+defm TBUFFER_STORE_FORMAT_XYZ : MTBUF_Pseudo_Stores <"tbuffer_store_format_xyz", VReg_96>;
defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Pseudo_Stores <"tbuffer_store_format_xyzw", VReg_128>;
let SubtargetPredicate = HasUnpackedD16VMem, D16Buf = 1 in {
@@ -1041,19 +1086,21 @@ let SubtargetPredicate = HasPackedD16VMem, D16Buf = 1 in {
defm TBUFFER_STORE_FORMAT_D16_XYZW : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xyzw", VReg_64>;
} // End HasPackedD16VMem.
-let SubtargetPredicate = isCIVI in {
+let SubtargetPredicate = isGFX7Plus in {
//===----------------------------------------------------------------------===//
// Instruction definitions for CI and newer.
//===----------------------------------------------------------------------===//
-// Remaining instructions:
-// BUFFER_LOAD_DWORDX3
-// BUFFER_STORE_DWORDX3
def BUFFER_WBINVL1_VOL : MUBUF_Invalidate <"buffer_wbinvl1_vol",
int_amdgcn_buffer_wbinvl1_vol>;
-} // End let SubtargetPredicate = isCIVI
+} // End let SubtargetPredicate = isGFX7Plus
+
+let SubtargetPredicate = isGFX10Plus in {
+ def BUFFER_GL0_INV : MUBUF_Invalidate<"buffer_gl0_inv">;
+ def BUFFER_GL1_INV : MUBUF_Invalidate<"buffer_gl1_inv">;
+} // End SubtargetPredicate = isGFX10Plus
//===----------------------------------------------------------------------===//
// MUBUF Patterns
@@ -1067,6 +1114,10 @@ def extract_slc : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant((N->getZExtValue() >> 1) & 1, SDLoc(N), MVT::i8);
}]>;
+def extract_dlc : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant((N->getZExtValue() >> 2) & 1, SDLoc(N), MVT::i8);
+}]>;
+
//===----------------------------------------------------------------------===//
// buffer_load/store_format patterns
//===----------------------------------------------------------------------===//
@@ -1077,21 +1128,21 @@ multiclass MUBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
(vt (name v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset,
imm:$cachepolicy, 0)),
(!cast<MUBUF_Pseudo>(opcode # _OFFSET) $rsrc, $soffset, (as_i16imm $offset),
- (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
+ (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
>;
def : GCNPat<
(vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset,
imm:$cachepolicy, 0)),
(!cast<MUBUF_Pseudo>(opcode # _OFFEN) $voffset, $rsrc, $soffset, (as_i16imm $offset),
- (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
+ (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
>;
def : GCNPat<
(vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset,
imm:$cachepolicy, imm)),
(!cast<MUBUF_Pseudo>(opcode # _IDXEN) $vindex, $rsrc, $soffset, (as_i16imm $offset),
- (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
+ (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
>;
def : GCNPat<
@@ -1100,7 +1151,7 @@ multiclass MUBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
(!cast<MUBUF_Pseudo>(opcode # _BOTHEN)
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
$rsrc, $soffset, (as_i16imm $offset),
- (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
+ (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
>;
}
@@ -1108,6 +1159,8 @@ defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, f32, "BUFFER_LOAD_FORMAT_X">
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, i32, "BUFFER_LOAD_FORMAT_X">;
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, v2f32, "BUFFER_LOAD_FORMAT_XY">;
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, v2i32, "BUFFER_LOAD_FORMAT_XY">;
+defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, v3f32, "BUFFER_LOAD_FORMAT_XYZ">;
+defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, v3i32, "BUFFER_LOAD_FORMAT_XYZ">;
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, v4f32, "BUFFER_LOAD_FORMAT_XYZW">;
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, v4i32, "BUFFER_LOAD_FORMAT_XYZW">;
@@ -1131,8 +1184,14 @@ defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, f32, "BUFFER_LOAD_DWORD">;
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, i32, "BUFFER_LOAD_DWORD">;
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v2f32, "BUFFER_LOAD_DWORDX2">;
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v2i32, "BUFFER_LOAD_DWORDX2">;
+defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v3f32, "BUFFER_LOAD_DWORDX3">;
+defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v3i32, "BUFFER_LOAD_DWORDX3">;
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v4f32, "BUFFER_LOAD_DWORDX4">;
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v4i32, "BUFFER_LOAD_DWORDX4">;
+defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_byte, i32, "BUFFER_LOAD_SBYTE">;
+defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_short, i32, "BUFFER_LOAD_SSHORT">;
+defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_ubyte, i32, "BUFFER_LOAD_UBYTE">;
+defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_ushort, i32, "BUFFER_LOAD_USHORT">;
multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
string opcode> {
@@ -1140,21 +1199,23 @@ multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
(name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset,
imm:$cachepolicy, 0),
(!cast<MUBUF_Pseudo>(opcode # _OFFSET_exact) $vdata, $rsrc, $soffset, (as_i16imm $offset),
- (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
+ (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
>;
def : GCNPat<
(name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset,
imm:$cachepolicy, 0),
(!cast<MUBUF_Pseudo>(opcode # _OFFEN_exact) $vdata, $voffset, $rsrc, $soffset,
- (as_i16imm $offset), (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
+ (as_i16imm $offset), (extract_glc $cachepolicy),
+ (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
>;
def : GCNPat<
(name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset,
imm:$cachepolicy, imm),
(!cast<MUBUF_Pseudo>(opcode # _IDXEN_exact) $vdata, $vindex, $rsrc, $soffset,
- (as_i16imm $offset), (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
+ (as_i16imm $offset), (extract_glc $cachepolicy),
+ (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
>;
def : GCNPat<
@@ -1163,8 +1224,8 @@ multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
(!cast<MUBUF_Pseudo>(opcode # _BOTHEN_exact)
$vdata,
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
- $rsrc, $soffset, (as_i16imm $offset),
- (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
+ $rsrc, $soffset, (as_i16imm $offset), (extract_glc $cachepolicy),
+ (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
>;
}
@@ -1172,6 +1233,8 @@ defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, f32, "BUFFER_STORE_FORMAT_
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, i32, "BUFFER_STORE_FORMAT_X">;
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, v2f32, "BUFFER_STORE_FORMAT_XY">;
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, v2i32, "BUFFER_STORE_FORMAT_XY">;
+defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, v3f32, "BUFFER_STORE_FORMAT_XYZ">;
+defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, v3i32, "BUFFER_STORE_FORMAT_XYZ">;
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, v4f32, "BUFFER_STORE_FORMAT_XYZW">;
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, v4i32, "BUFFER_STORE_FORMAT_XYZW">;
@@ -1195,42 +1258,47 @@ defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, f32, "BUFFER_STORE_DWORD">;
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, i32, "BUFFER_STORE_DWORD">;
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v2f32, "BUFFER_STORE_DWORDX2">;
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v2i32, "BUFFER_STORE_DWORDX2">;
+defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v3f32, "BUFFER_STORE_DWORDX3">;
+defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v3i32, "BUFFER_STORE_DWORDX3">;
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v4f32, "BUFFER_STORE_DWORDX4">;
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v4i32, "BUFFER_STORE_DWORDX4">;
+defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_byte, i32, "BUFFER_STORE_BYTE">;
+defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_short, i32, "BUFFER_STORE_SHORT">;
//===----------------------------------------------------------------------===//
// buffer_atomic patterns
//===----------------------------------------------------------------------===//
-multiclass BufferAtomicPatterns<SDPatternOperator name, string opcode> {
+multiclass BufferAtomicPatterns<SDPatternOperator name, ValueType vt,
+ string opcode> {
def : GCNPat<
- (name i32:$vdata_in, v4i32:$rsrc, 0,
+ (vt (name vt:$vdata_in, v4i32:$rsrc, 0,
0, i32:$soffset, imm:$offset,
- imm:$cachepolicy, 0),
+ imm:$cachepolicy, 0)),
(!cast<MUBUF_Pseudo>(opcode # _OFFSET_RTN) $vdata_in, $rsrc, $soffset,
(as_i16imm $offset), (extract_slc $cachepolicy))
>;
def : GCNPat<
- (name i32:$vdata_in, v4i32:$rsrc, i32:$vindex,
+ (vt (name vt:$vdata_in, v4i32:$rsrc, i32:$vindex,
0, i32:$soffset, imm:$offset,
- imm:$cachepolicy, imm),
+ imm:$cachepolicy, imm)),
(!cast<MUBUF_Pseudo>(opcode # _IDXEN_RTN) $vdata_in, $vindex, $rsrc, $soffset,
(as_i16imm $offset), (extract_slc $cachepolicy))
>;
def : GCNPat<
- (name i32:$vdata_in, v4i32:$rsrc, 0,
+ (vt (name vt:$vdata_in, v4i32:$rsrc, 0,
i32:$voffset, i32:$soffset, imm:$offset,
- imm:$cachepolicy, 0),
+ imm:$cachepolicy, 0)),
(!cast<MUBUF_Pseudo>(opcode # _OFFEN_RTN) $vdata_in, $voffset, $rsrc, $soffset,
(as_i16imm $offset), (extract_slc $cachepolicy))
>;
def : GCNPat<
- (name i32:$vdata_in, v4i32:$rsrc, i32:$vindex,
+ (vt (name vt:$vdata_in, v4i32:$rsrc, i32:$vindex,
i32:$voffset, i32:$soffset, imm:$offset,
- imm:$cachepolicy, imm),
+ imm:$cachepolicy, imm)),
(!cast<MUBUF_Pseudo>(opcode # _BOTHEN_RTN)
$vdata_in,
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
@@ -1238,16 +1306,66 @@ multiclass BufferAtomicPatterns<SDPatternOperator name, string opcode> {
>;
}
-defm : BufferAtomicPatterns<SIbuffer_atomic_swap, "BUFFER_ATOMIC_SWAP">;
-defm : BufferAtomicPatterns<SIbuffer_atomic_add, "BUFFER_ATOMIC_ADD">;
-defm : BufferAtomicPatterns<SIbuffer_atomic_sub, "BUFFER_ATOMIC_SUB">;
-defm : BufferAtomicPatterns<SIbuffer_atomic_smin, "BUFFER_ATOMIC_SMIN">;
-defm : BufferAtomicPatterns<SIbuffer_atomic_umin, "BUFFER_ATOMIC_UMIN">;
-defm : BufferAtomicPatterns<SIbuffer_atomic_smax, "BUFFER_ATOMIC_SMAX">;
-defm : BufferAtomicPatterns<SIbuffer_atomic_umax, "BUFFER_ATOMIC_UMAX">;
-defm : BufferAtomicPatterns<SIbuffer_atomic_and, "BUFFER_ATOMIC_AND">;
-defm : BufferAtomicPatterns<SIbuffer_atomic_or, "BUFFER_ATOMIC_OR">;
-defm : BufferAtomicPatterns<SIbuffer_atomic_xor, "BUFFER_ATOMIC_XOR">;
+defm : BufferAtomicPatterns<SIbuffer_atomic_swap, i32, "BUFFER_ATOMIC_SWAP">;
+defm : BufferAtomicPatterns<SIbuffer_atomic_add, i32, "BUFFER_ATOMIC_ADD">;
+defm : BufferAtomicPatterns<SIbuffer_atomic_sub, i32, "BUFFER_ATOMIC_SUB">;
+defm : BufferAtomicPatterns<SIbuffer_atomic_smin, i32, "BUFFER_ATOMIC_SMIN">;
+defm : BufferAtomicPatterns<SIbuffer_atomic_umin, i32, "BUFFER_ATOMIC_UMIN">;
+defm : BufferAtomicPatterns<SIbuffer_atomic_smax, i32, "BUFFER_ATOMIC_SMAX">;
+defm : BufferAtomicPatterns<SIbuffer_atomic_umax, i32, "BUFFER_ATOMIC_UMAX">;
+defm : BufferAtomicPatterns<SIbuffer_atomic_and, i32, "BUFFER_ATOMIC_AND">;
+defm : BufferAtomicPatterns<SIbuffer_atomic_or, i32, "BUFFER_ATOMIC_OR">;
+defm : BufferAtomicPatterns<SIbuffer_atomic_xor, i32, "BUFFER_ATOMIC_XOR">;
+defm : BufferAtomicPatterns<SIbuffer_atomic_swap, i64, "BUFFER_ATOMIC_SWAP_X2">;
+defm : BufferAtomicPatterns<SIbuffer_atomic_add, i64, "BUFFER_ATOMIC_ADD_X2">;
+defm : BufferAtomicPatterns<SIbuffer_atomic_sub, i64, "BUFFER_ATOMIC_SUB_X2">;
+defm : BufferAtomicPatterns<SIbuffer_atomic_smin, i64, "BUFFER_ATOMIC_SMIN_X2">;
+defm : BufferAtomicPatterns<SIbuffer_atomic_umin, i64, "BUFFER_ATOMIC_UMIN_X2">;
+defm : BufferAtomicPatterns<SIbuffer_atomic_smax, i64, "BUFFER_ATOMIC_SMAX_X2">;
+defm : BufferAtomicPatterns<SIbuffer_atomic_umax, i64, "BUFFER_ATOMIC_UMAX_X2">;
+defm : BufferAtomicPatterns<SIbuffer_atomic_and, i64, "BUFFER_ATOMIC_AND_X2">;
+defm : BufferAtomicPatterns<SIbuffer_atomic_or, i64, "BUFFER_ATOMIC_OR_X2">;
+defm : BufferAtomicPatterns<SIbuffer_atomic_xor, i64, "BUFFER_ATOMIC_XOR_X2">;
+
+multiclass BufferAtomicPatterns_NO_RTN<SDPatternOperator name, ValueType vt,
+ string opcode> {
+ def : GCNPat<
+ (name vt:$vdata_in, v4i32:$rsrc, 0,
+ 0, i32:$soffset, imm:$offset,
+ imm:$cachepolicy, 0),
+ (!cast<MUBUF_Pseudo>(opcode # _OFFSET) $vdata_in, $rsrc, $soffset,
+ (as_i16imm $offset), (extract_slc $cachepolicy))
+ >;
+
+ def : GCNPat<
+ (name vt:$vdata_in, v4i32:$rsrc, i32:$vindex,
+ 0, i32:$soffset, imm:$offset,
+ imm:$cachepolicy, imm),
+ (!cast<MUBUF_Pseudo>(opcode # _IDXEN) $vdata_in, $vindex, $rsrc, $soffset,
+ (as_i16imm $offset), (extract_slc $cachepolicy))
+ >;
+
+ def : GCNPat<
+ (name vt:$vdata_in, v4i32:$rsrc, 0,
+ i32:$voffset, i32:$soffset, imm:$offset,
+ imm:$cachepolicy, 0),
+ (!cast<MUBUF_Pseudo>(opcode # _OFFEN) $vdata_in, $voffset, $rsrc, $soffset,
+ (as_i16imm $offset), (extract_slc $cachepolicy))
+ >;
+
+ def : GCNPat<
+ (name vt:$vdata_in, v4i32:$rsrc, i32:$vindex,
+ i32:$voffset, i32:$soffset, imm:$offset,
+ imm:$cachepolicy, imm),
+ (!cast<MUBUF_Pseudo>(opcode # _BOTHEN)
+ $vdata_in,
+ (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
+ $rsrc, $soffset, (as_i16imm $offset), (extract_slc $cachepolicy))
+ >;
+}
+
+defm : BufferAtomicPatterns_NO_RTN<SIbuffer_atomic_fadd, f32, "BUFFER_ATOMIC_ADD_F32">;
+defm : BufferAtomicPatterns_NO_RTN<SIbuffer_atomic_pk_fadd, v2f16, "BUFFER_ATOMIC_PK_ADD_F16">;
def : GCNPat<
(SIbuffer_atomic_cmpswap
@@ -1298,12 +1416,11 @@ def : GCNPat<
sub0)
>;
-
class MUBUFLoad_PatternADDR64 <MUBUF_Pseudo Instr_ADDR64, ValueType vt,
PatFrag constant_ld> : GCNPat <
(vt (constant_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
- i16:$offset, i1:$glc, i1:$slc, i1:$tfe))),
- (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, $glc, $slc, $tfe)
+ i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))),
+ (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, $glc, $slc, $tfe, $dlc)
>;
multiclass MUBUFLoad_Atomic_Pattern <MUBUF_Pseudo Instr_ADDR64, MUBUF_Pseudo Instr_OFFSET,
@@ -1311,43 +1428,47 @@ multiclass MUBUFLoad_Atomic_Pattern <MUBUF_Pseudo Instr_ADDR64, MUBUF_Pseudo Ins
def : GCNPat <
(vt (atomic_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
i16:$offset, i1:$slc))),
- (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, 0, $slc, 0)
+ (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, 0, $slc, 0, 0)
>;
def : GCNPat <
(vt (atomic_ld (MUBUFOffsetNoGLC v4i32:$rsrc, i32:$soffset, i16:$offset))),
- (Instr_OFFSET $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0)
+ (Instr_OFFSET $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0, 0)
>;
}
-let SubtargetPredicate = isSICI in {
+let SubtargetPredicate = isGFX6GFX7 in {
def : MUBUFLoad_PatternADDR64 <BUFFER_LOAD_SBYTE_ADDR64, i32, sextloadi8_constant>;
-def : MUBUFLoad_PatternADDR64 <BUFFER_LOAD_UBYTE_ADDR64, i32, az_extloadi8_constant>;
+def : MUBUFLoad_PatternADDR64 <BUFFER_LOAD_UBYTE_ADDR64, i32, extloadi8_constant>;
+def : MUBUFLoad_PatternADDR64 <BUFFER_LOAD_UBYTE_ADDR64, i32, zextloadi8_constant>;
def : MUBUFLoad_PatternADDR64 <BUFFER_LOAD_SSHORT_ADDR64, i32, sextloadi16_constant>;
-def : MUBUFLoad_PatternADDR64 <BUFFER_LOAD_USHORT_ADDR64, i32, az_extloadi16_constant>;
+def : MUBUFLoad_PatternADDR64 <BUFFER_LOAD_USHORT_ADDR64, i32, extloadi16_constant>;
+def : MUBUFLoad_PatternADDR64 <BUFFER_LOAD_USHORT_ADDR64, i32, zextloadi16_constant>;
-defm : MUBUFLoad_Atomic_Pattern <BUFFER_LOAD_DWORD_ADDR64, BUFFER_LOAD_DWORD_OFFSET, i32, mubuf_load_atomic>;
-defm : MUBUFLoad_Atomic_Pattern <BUFFER_LOAD_DWORDX2_ADDR64, BUFFER_LOAD_DWORDX2_OFFSET, i64, mubuf_load_atomic>;
-} // End SubtargetPredicate = isSICI
+defm : MUBUFLoad_Atomic_Pattern <BUFFER_LOAD_DWORD_ADDR64, BUFFER_LOAD_DWORD_OFFSET, i32, atomic_load_32_global>;
+defm : MUBUFLoad_Atomic_Pattern <BUFFER_LOAD_DWORDX2_ADDR64, BUFFER_LOAD_DWORDX2_OFFSET, i64, atomic_load_64_global>;
+} // End SubtargetPredicate = isGFX6GFX7
multiclass MUBUFLoad_Pattern <MUBUF_Pseudo Instr_OFFSET, ValueType vt,
PatFrag ld> {
def : GCNPat <
(vt (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset,
- i16:$offset, i1:$glc, i1:$slc, i1:$tfe))),
- (Instr_OFFSET $srsrc, $soffset, $offset, $glc, $slc, $tfe)
+ i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))),
+ (Instr_OFFSET $srsrc, $soffset, $offset, $glc, $slc, $tfe, $dlc)
>;
}
let OtherPredicates = [Has16BitInsts] in {
defm : MUBUFLoad_Pattern <BUFFER_LOAD_SBYTE_OFFSET, i16, sextloadi8_constant>;
-defm : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_OFFSET, i16, az_extloadi8_constant>;
-defm : MUBUFLoad_Pattern <BUFFER_LOAD_SBYTE_OFFSET, i16, mubuf_sextloadi8>;
-defm : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_OFFSET, i16, mubuf_az_extloadi8>;
+defm : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_OFFSET, i16, extloadi8_constant>;
+defm : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_OFFSET, i16, zextloadi8_constant>;
+defm : MUBUFLoad_Pattern <BUFFER_LOAD_SBYTE_OFFSET, i16, sextloadi8_global>;
+defm : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_OFFSET, i16, extloadi8_global>;
+defm : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_OFFSET, i16, zextloadi8_global>;
-defm : MUBUFLoad_Pattern <BUFFER_LOAD_USHORT_OFFSET, i16, mubuf_load>;
+defm : MUBUFLoad_Pattern <BUFFER_LOAD_USHORT_OFFSET, i16, load_global>;
} // End OtherPredicates = [Has16BitInsts]
@@ -1357,111 +1478,79 @@ multiclass MUBUFScratchLoadPat <MUBUF_Pseudo InstrOffen,
def : GCNPat <
(vt (ld (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr,
i32:$soffset, u16imm:$offset))),
- (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0)
+ (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0)
>;
def : GCNPat <
(vt (ld (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset))),
- (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0)
+ (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, 0)
>;
}
// XXX - Is it possible to have a complex pattern in a PatFrag?
-multiclass MUBUFScratchLoadPat_Hi16 <MUBUF_Pseudo InstrOffen,
+multiclass MUBUFScratchLoadPat_D16 <MUBUF_Pseudo InstrOffen,
MUBUF_Pseudo InstrOffset,
- ValueType vt, PatFrag ld> {
- def : GCNPat <
- (build_vector vt:$lo, (vt (ld (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr,
- i32:$soffset, u16imm:$offset)))),
- (v2i16 (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, $lo))
- >;
-
- def : GCNPat <
- (build_vector f16:$lo, (f16 (bitconvert (vt (ld (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr,
- i32:$soffset, u16imm:$offset)))))),
- (v2f16 (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, $lo))
- >;
-
-
- def : GCNPat <
- (build_vector vt:$lo, (vt (ld (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset)))),
- (v2i16 (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, $lo))
- >;
-
- def : GCNPat <
- (build_vector f16:$lo, (f16 (bitconvert (vt (ld (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset)))))),
- (v2f16 (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, $lo))
- >;
-}
-
-multiclass MUBUFScratchLoadPat_Lo16 <MUBUF_Pseudo InstrOffen,
- MUBUF_Pseudo InstrOffset,
- ValueType vt, PatFrag ld> {
- def : GCNPat <
- (build_vector (vt (ld (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr,
- i32:$soffset, u16imm:$offset))),
- (vt (Hi16Elt vt:$hi))),
- (v2i16 (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, $hi))
- >;
-
+ ValueType vt, PatFrag ld_frag> {
def : GCNPat <
- (build_vector (f16 (bitconvert (vt (ld (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr,
- i32:$soffset, u16imm:$offset))))),
- (f16 (Hi16Elt f16:$hi))),
- (v2f16 (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, $hi))
+ (ld_frag (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr, i32:$soffset, u16imm:$offset), vt:$in),
+ (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0, $in)
>;
def : GCNPat <
- (build_vector (vt (ld (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset))),
- (vt (Hi16Elt vt:$hi))),
- (v2i16 (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, $hi))
- >;
-
- def : GCNPat <
- (build_vector (f16 (bitconvert (vt (ld (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset))))),
- (f16 (Hi16Elt f16:$hi))),
- (v2f16 (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, $hi))
+ (ld_frag (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset), vt:$in),
+ (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, 0, $in)
>;
}
defm : MUBUFScratchLoadPat <BUFFER_LOAD_SBYTE_OFFEN, BUFFER_LOAD_SBYTE_OFFSET, i32, sextloadi8_private>;
-defm : MUBUFScratchLoadPat <BUFFER_LOAD_UBYTE_OFFEN, BUFFER_LOAD_UBYTE_OFFSET, i32, az_extloadi8_private>;
+defm : MUBUFScratchLoadPat <BUFFER_LOAD_UBYTE_OFFEN, BUFFER_LOAD_UBYTE_OFFSET, i32, extloadi8_private>;
+defm : MUBUFScratchLoadPat <BUFFER_LOAD_UBYTE_OFFEN, BUFFER_LOAD_UBYTE_OFFSET, i32, zextloadi8_private>;
defm : MUBUFScratchLoadPat <BUFFER_LOAD_SBYTE_OFFEN, BUFFER_LOAD_SBYTE_OFFSET, i16, sextloadi8_private>;
-defm : MUBUFScratchLoadPat <BUFFER_LOAD_UBYTE_OFFEN, BUFFER_LOAD_UBYTE_OFFSET, i16, az_extloadi8_private>;
+defm : MUBUFScratchLoadPat <BUFFER_LOAD_UBYTE_OFFEN, BUFFER_LOAD_UBYTE_OFFSET, i16, extloadi8_private>;
+defm : MUBUFScratchLoadPat <BUFFER_LOAD_UBYTE_OFFEN, BUFFER_LOAD_UBYTE_OFFSET, i16, zextloadi8_private>;
defm : MUBUFScratchLoadPat <BUFFER_LOAD_SSHORT_OFFEN, BUFFER_LOAD_SSHORT_OFFSET, i32, sextloadi16_private>;
-defm : MUBUFScratchLoadPat <BUFFER_LOAD_USHORT_OFFEN, BUFFER_LOAD_USHORT_OFFSET, i32, az_extloadi16_private>;
+defm : MUBUFScratchLoadPat <BUFFER_LOAD_USHORT_OFFEN, BUFFER_LOAD_USHORT_OFFSET, i32, extloadi16_private>;
+defm : MUBUFScratchLoadPat <BUFFER_LOAD_USHORT_OFFEN, BUFFER_LOAD_USHORT_OFFSET, i32, zextloadi16_private>;
defm : MUBUFScratchLoadPat <BUFFER_LOAD_USHORT_OFFEN, BUFFER_LOAD_USHORT_OFFSET, i16, load_private>;
defm : MUBUFScratchLoadPat <BUFFER_LOAD_DWORD_OFFEN, BUFFER_LOAD_DWORD_OFFSET, i32, load_private>;
defm : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX2_OFFEN, BUFFER_LOAD_DWORDX2_OFFSET, v2i32, load_private>;
+defm : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX3_OFFEN, BUFFER_LOAD_DWORDX3_OFFSET, v3i32, load_private>;
defm : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX4_OFFEN, BUFFER_LOAD_DWORDX4_OFFSET, v4i32, load_private>;
let OtherPredicates = [D16PreservesUnusedBits] in {
-defm : MUBUFScratchLoadPat_Hi16<BUFFER_LOAD_SHORT_D16_HI_OFFEN, BUFFER_LOAD_SHORT_D16_HI_OFFSET, i16, load_private>;
-defm : MUBUFScratchLoadPat_Hi16<BUFFER_LOAD_UBYTE_D16_HI_OFFEN, BUFFER_LOAD_UBYTE_D16_HI_OFFSET, i16, az_extloadi8_private>;
-defm : MUBUFScratchLoadPat_Hi16<BUFFER_LOAD_SBYTE_D16_HI_OFFEN, BUFFER_LOAD_SBYTE_D16_HI_OFFSET, i16, sextloadi8_private>;
+defm : MUBUFScratchLoadPat_D16<BUFFER_LOAD_SHORT_D16_HI_OFFEN, BUFFER_LOAD_SHORT_D16_HI_OFFSET, v2i16, load_d16_hi_private>;
+defm : MUBUFScratchLoadPat_D16<BUFFER_LOAD_UBYTE_D16_HI_OFFEN, BUFFER_LOAD_UBYTE_D16_HI_OFFSET, v2i16, az_extloadi8_d16_hi_private>;
+defm : MUBUFScratchLoadPat_D16<BUFFER_LOAD_SBYTE_D16_HI_OFFEN, BUFFER_LOAD_SBYTE_D16_HI_OFFSET, v2i16, sextloadi8_d16_hi_private>;
+defm : MUBUFScratchLoadPat_D16<BUFFER_LOAD_SHORT_D16_HI_OFFEN, BUFFER_LOAD_SHORT_D16_HI_OFFSET, v2f16, load_d16_hi_private>;
+defm : MUBUFScratchLoadPat_D16<BUFFER_LOAD_UBYTE_D16_HI_OFFEN, BUFFER_LOAD_UBYTE_D16_HI_OFFSET, v2f16, az_extloadi8_d16_hi_private>;
+defm : MUBUFScratchLoadPat_D16<BUFFER_LOAD_SBYTE_D16_HI_OFFEN, BUFFER_LOAD_SBYTE_D16_HI_OFFSET, v2f16, sextloadi8_d16_hi_private>;
-defm : MUBUFScratchLoadPat_Lo16<BUFFER_LOAD_SHORT_D16_OFFEN, BUFFER_LOAD_SHORT_D16_OFFSET, i16, load_private>;
-defm : MUBUFScratchLoadPat_Lo16<BUFFER_LOAD_UBYTE_D16_OFFEN, BUFFER_LOAD_UBYTE_D16_OFFSET, i16, az_extloadi8_private>;
-defm : MUBUFScratchLoadPat_Lo16<BUFFER_LOAD_SBYTE_D16_OFFEN, BUFFER_LOAD_SBYTE_D16_OFFSET, i16, sextloadi8_private>;
+defm : MUBUFScratchLoadPat_D16<BUFFER_LOAD_SHORT_D16_OFFEN, BUFFER_LOAD_SHORT_D16_OFFSET, v2i16, load_d16_lo_private>;
+defm : MUBUFScratchLoadPat_D16<BUFFER_LOAD_UBYTE_D16_OFFEN, BUFFER_LOAD_UBYTE_D16_OFFSET, v2i16, az_extloadi8_d16_lo_private>;
+defm : MUBUFScratchLoadPat_D16<BUFFER_LOAD_SBYTE_D16_OFFEN, BUFFER_LOAD_SBYTE_D16_OFFSET, v2i16, sextloadi8_d16_lo_private>;
+defm : MUBUFScratchLoadPat_D16<BUFFER_LOAD_SHORT_D16_OFFEN, BUFFER_LOAD_SHORT_D16_OFFSET, v2f16, load_d16_lo_private>;
+defm : MUBUFScratchLoadPat_D16<BUFFER_LOAD_UBYTE_D16_OFFEN, BUFFER_LOAD_UBYTE_D16_OFFSET, v2f16, az_extloadi8_d16_lo_private>;
+defm : MUBUFScratchLoadPat_D16<BUFFER_LOAD_SBYTE_D16_OFFEN, BUFFER_LOAD_SBYTE_D16_OFFSET, v2f16, sextloadi8_d16_lo_private>;
}
+
multiclass MUBUFStore_Atomic_Pattern <MUBUF_Pseudo Instr_ADDR64, MUBUF_Pseudo Instr_OFFSET,
ValueType vt, PatFrag atomic_st> {
// Store follows atomic op convention so address is forst
def : GCNPat <
(atomic_st (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
i16:$offset, i1:$slc), vt:$val),
- (Instr_ADDR64 $val, $vaddr, $srsrc, $soffset, $offset, 0, $slc, 0)
+ (Instr_ADDR64 $val, $vaddr, $srsrc, $soffset, $offset, 0, $slc, 0, 0)
>;
def : GCNPat <
(atomic_st (MUBUFOffsetNoGLC v4i32:$rsrc, i32:$soffset, i16:$offset), vt:$val),
- (Instr_OFFSET $val, $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0)
+ (Instr_OFFSET $val, $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0, 0)
>;
}
-let SubtargetPredicate = isSICI in {
+let SubtargetPredicate = isGFX6GFX7 in {
defm : MUBUFStore_Atomic_Pattern <BUFFER_STORE_DWORD_ADDR64, BUFFER_STORE_DWORD_OFFSET, i32, store_atomic_global>;
defm : MUBUFStore_Atomic_Pattern <BUFFER_STORE_DWORDX2_ADDR64, BUFFER_STORE_DWORDX2_OFFSET, i64, store_atomic_global>;
-} // End Predicates = isSICI
+} // End Predicates = isGFX6GFX7
multiclass MUBUFStore_Pattern <MUBUF_Pseudo Instr_OFFSET, ValueType vt,
@@ -1469,8 +1558,8 @@ multiclass MUBUFStore_Pattern <MUBUF_Pseudo Instr_OFFSET, ValueType vt,
def : GCNPat <
(st vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset,
- i16:$offset, i1:$glc, i1:$slc, i1:$tfe)),
- (Instr_OFFSET $vdata, $srsrc, $soffset, $offset, $glc, $slc, $tfe)
+ i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc)),
+ (Instr_OFFSET $vdata, $srsrc, $soffset, $offset, $glc, $slc, $tfe, $dlc)
>;
}
@@ -1479,17 +1568,18 @@ defm : MUBUFStore_Pattern <BUFFER_STORE_SHORT_OFFSET, i16, store_global>;
multiclass MUBUFScratchStorePat <MUBUF_Pseudo InstrOffen,
MUBUF_Pseudo InstrOffset,
- ValueType vt, PatFrag st> {
+ ValueType vt, PatFrag st,
+ RegisterClass rc = VGPR_32> {
def : GCNPat <
(st vt:$value, (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr,
i32:$soffset, u16imm:$offset)),
- (InstrOffen $value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0)
+ (InstrOffen rc:$value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0)
>;
def : GCNPat <
(st vt:$value, (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset,
u16imm:$offset)),
- (InstrOffset $value, $srsrc, $soffset, $offset, 0, 0, 0)
+ (InstrOffset rc:$value, $srsrc, $soffset, $offset, 0, 0, 0, 0)
>;
}
@@ -1498,8 +1588,9 @@ defm : MUBUFScratchStorePat <BUFFER_STORE_SHORT_OFFEN, BUFFER_STORE_SHORT_OFFSET
defm : MUBUFScratchStorePat <BUFFER_STORE_BYTE_OFFEN, BUFFER_STORE_BYTE_OFFSET, i16, truncstorei8_private>;
defm : MUBUFScratchStorePat <BUFFER_STORE_SHORT_OFFEN, BUFFER_STORE_SHORT_OFFSET, i16, store_private>;
defm : MUBUFScratchStorePat <BUFFER_STORE_DWORD_OFFEN, BUFFER_STORE_DWORD_OFFSET, i32, store_private>;
-defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX2_OFFEN, BUFFER_STORE_DWORDX2_OFFSET, v2i32, store_private>;
-defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX4_OFFEN, BUFFER_STORE_DWORDX4_OFFSET, v4i32, store_private>;
+defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX2_OFFEN, BUFFER_STORE_DWORDX2_OFFSET, v2i32, store_private, VReg_64>;
+defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX3_OFFEN, BUFFER_STORE_DWORDX3_OFFSET, v3i32, store_private, VReg_96>;
+defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX4_OFFEN, BUFFER_STORE_DWORDX4_OFFSET, v4i32, store_private, VReg_128>;
let OtherPredicates = [D16PreservesUnusedBits] in {
@@ -1526,7 +1617,7 @@ multiclass MTBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
imm:$format, imm:$cachepolicy, 0)),
(!cast<MTBUF_Pseudo>(opcode # _OFFSET) $rsrc, $soffset, (as_i16imm $offset),
(as_i8imm $format),
- (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
+ (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
>;
def : GCNPat<
@@ -1534,7 +1625,7 @@ multiclass MTBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
imm:$format, imm:$cachepolicy, imm)),
(!cast<MTBUF_Pseudo>(opcode # _IDXEN) $vindex, $rsrc, $soffset, (as_i16imm $offset),
(as_i8imm $format),
- (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
+ (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
>;
def : GCNPat<
@@ -1542,7 +1633,7 @@ multiclass MTBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
imm:$format, imm:$cachepolicy, 0)),
(!cast<MTBUF_Pseudo>(opcode # _OFFEN) $voffset, $rsrc, $soffset, (as_i16imm $offset),
(as_i8imm $format),
- (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
+ (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
>;
def : GCNPat<
@@ -1552,15 +1643,17 @@ multiclass MTBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
$rsrc, $soffset, (as_i16imm $offset),
(as_i8imm $format),
- (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
+ (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
>;
}
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, i32, "TBUFFER_LOAD_FORMAT_X">;
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, v2i32, "TBUFFER_LOAD_FORMAT_XY">;
+defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, v3i32, "TBUFFER_LOAD_FORMAT_XYZ">;
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, v4i32, "TBUFFER_LOAD_FORMAT_XYZW">;
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, f32, "TBUFFER_LOAD_FORMAT_X">;
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, v2f32, "TBUFFER_LOAD_FORMAT_XY">;
+defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, v3f32, "TBUFFER_LOAD_FORMAT_XYZ">;
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, v4f32, "TBUFFER_LOAD_FORMAT_XYZW">;
let SubtargetPredicate = HasUnpackedD16VMem in {
@@ -1582,7 +1675,7 @@ multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
imm:$format, imm:$cachepolicy, 0),
(!cast<MTBUF_Pseudo>(opcode # _OFFSET_exact) $vdata, $rsrc, $soffset,
(as_i16imm $offset), (as_i8imm $format),
- (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
+ (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
>;
def : GCNPat<
@@ -1590,7 +1683,7 @@ multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
imm:$format, imm:$cachepolicy, imm),
(!cast<MTBUF_Pseudo>(opcode # _IDXEN_exact) $vdata, $vindex, $rsrc, $soffset,
(as_i16imm $offset), (as_i8imm $format),
- (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
+ (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
>;
def : GCNPat<
@@ -1598,7 +1691,7 @@ multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
imm:$format, imm:$cachepolicy, 0),
(!cast<MTBUF_Pseudo>(opcode # _OFFEN_exact) $vdata, $voffset, $rsrc, $soffset,
(as_i16imm $offset), (as_i8imm $format),
- (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
+ (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
>;
def : GCNPat<
@@ -1608,17 +1701,17 @@ multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
$vdata,
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
$rsrc, $soffset, (as_i16imm $offset), (as_i8imm $format),
- (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
+ (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
>;
}
defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, i32, "TBUFFER_STORE_FORMAT_X">;
defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, v2i32, "TBUFFER_STORE_FORMAT_XY">;
-defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_x3, v4i32, "TBUFFER_STORE_FORMAT_XYZ">;
+defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, v3i32, "TBUFFER_STORE_FORMAT_XYZ">;
defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, v4i32, "TBUFFER_STORE_FORMAT_XYZW">;
defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, f32, "TBUFFER_STORE_FORMAT_X">;
defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, v2f32, "TBUFFER_STORE_FORMAT_XY">;
-defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_x3, v4f32, "TBUFFER_STORE_FORMAT_XYZ">;
+defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, v3f32, "TBUFFER_STORE_FORMAT_XYZ">;
defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, v4f32, "TBUFFER_STORE_FORMAT_XYZW">;
let SubtargetPredicate = HasUnpackedD16VMem in {
@@ -1634,28 +1727,22 @@ let SubtargetPredicate = HasPackedD16VMem in {
} // End HasPackedD16VMem.
//===----------------------------------------------------------------------===//
-// Target instructions, move to the appropriate target TD file
+// Target-specific instruction encodings.
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
-// SI
+// Base ENC_MUBUF for GFX6, GFX7, GFX10.
//===----------------------------------------------------------------------===//
-class MUBUF_Real_si <bits<7> op, MUBUF_Pseudo ps> :
- MUBUF_Real<op, ps>,
- Enc64,
- SIMCInstr<ps.PseudoInstr, SIEncodingFamily.SI> {
- let AssemblerPredicate=isSICI;
- let DecoderNamespace="SICI";
-
+class Base_MUBUF_Real_gfx6_gfx7_gfx10<bits<7> op, MUBUF_Pseudo ps, int ef> :
+ MUBUF_Real<ps>, Enc64, SIMCInstr<ps.PseudoInstr, ef> {
let Inst{11-0} = !if(ps.has_offset, offset, ?);
let Inst{12} = ps.offen;
let Inst{13} = ps.idxen;
let Inst{14} = !if(ps.has_glc, glc, ps.glc_value);
- let Inst{15} = ps.addr64;
let Inst{16} = !if(ps.lds, 1, 0);
let Inst{24-18} = op;
- let Inst{31-26} = 0x38; //encoding
+ let Inst{31-26} = 0x38;
let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
let Inst{47-40} = !if(ps.has_vdata, vdata, ?);
let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?);
@@ -1664,125 +1751,250 @@ class MUBUF_Real_si <bits<7> op, MUBUF_Pseudo ps> :
let Inst{63-56} = !if(ps.has_soffset, soffset, ?);
}
-multiclass MUBUF_Real_AllAddr_si<bits<7> op> {
- def _OFFSET_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>;
- def _ADDR64_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_ADDR64")>;
- def _OFFEN_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>;
- def _IDXEN_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>;
- def _BOTHEN_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>;
-}
-
-multiclass MUBUF_Real_AllAddr_Lds_si<bits<7> op> {
-
- def _OFFSET_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>,
- MUBUFLdsTable<0, NAME # "_OFFSET_si">;
- def _ADDR64_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_ADDR64")>,
- MUBUFLdsTable<0, NAME # "_ADDR64_si">;
- def _OFFEN_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>,
- MUBUFLdsTable<0, NAME # "_OFFEN_si">;
- def _IDXEN_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>,
- MUBUFLdsTable<0, NAME # "_IDXEN_si">;
- def _BOTHEN_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>,
- MUBUFLdsTable<0, NAME # "_BOTHEN_si">;
-
- def _LDS_OFFSET_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFSET")>,
- MUBUFLdsTable<1, NAME # "_OFFSET_si">;
- def _LDS_ADDR64_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_ADDR64")>,
- MUBUFLdsTable<1, NAME # "_ADDR64_si">;
- def _LDS_OFFEN_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFEN")>,
- MUBUFLdsTable<1, NAME # "_OFFEN_si">;
- def _LDS_IDXEN_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_IDXEN")>,
- MUBUFLdsTable<1, NAME # "_IDXEN_si">;
- def _LDS_BOTHEN_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_BOTHEN")>,
- MUBUFLdsTable<1, NAME # "_BOTHEN_si">;
-}
-
-multiclass MUBUF_Real_Atomic_si<bits<7> op> : MUBUF_Real_AllAddr_si<op> {
- def _OFFSET_RTN_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET_RTN")>;
- def _ADDR64_RTN_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_ADDR64_RTN")>;
- def _OFFEN_RTN_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN_RTN")>;
- def _IDXEN_RTN_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN_RTN")>;
- def _BOTHEN_RTN_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN_RTN")>;
-}
-
-defm BUFFER_LOAD_FORMAT_X : MUBUF_Real_AllAddr_Lds_si <0x00>;
-defm BUFFER_LOAD_FORMAT_XY : MUBUF_Real_AllAddr_si <0x01>;
-defm BUFFER_LOAD_FORMAT_XYZ : MUBUF_Real_AllAddr_si <0x02>;
-defm BUFFER_LOAD_FORMAT_XYZW : MUBUF_Real_AllAddr_si <0x03>;
-defm BUFFER_STORE_FORMAT_X : MUBUF_Real_AllAddr_si <0x04>;
-defm BUFFER_STORE_FORMAT_XY : MUBUF_Real_AllAddr_si <0x05>;
-defm BUFFER_STORE_FORMAT_XYZ : MUBUF_Real_AllAddr_si <0x06>;
-defm BUFFER_STORE_FORMAT_XYZW : MUBUF_Real_AllAddr_si <0x07>;
-defm BUFFER_LOAD_UBYTE : MUBUF_Real_AllAddr_Lds_si <0x08>;
-defm BUFFER_LOAD_SBYTE : MUBUF_Real_AllAddr_Lds_si <0x09>;
-defm BUFFER_LOAD_USHORT : MUBUF_Real_AllAddr_Lds_si <0x0a>;
-defm BUFFER_LOAD_SSHORT : MUBUF_Real_AllAddr_Lds_si <0x0b>;
-defm BUFFER_LOAD_DWORD : MUBUF_Real_AllAddr_Lds_si <0x0c>;
-defm BUFFER_LOAD_DWORDX2 : MUBUF_Real_AllAddr_si <0x0d>;
-defm BUFFER_LOAD_DWORDX4 : MUBUF_Real_AllAddr_si <0x0e>;
-defm BUFFER_LOAD_DWORDX3 : MUBUF_Real_AllAddr_si <0x0f>;
-defm BUFFER_STORE_BYTE : MUBUF_Real_AllAddr_si <0x18>;
-defm BUFFER_STORE_SHORT : MUBUF_Real_AllAddr_si <0x1a>;
-defm BUFFER_STORE_DWORD : MUBUF_Real_AllAddr_si <0x1c>;
-defm BUFFER_STORE_DWORDX2 : MUBUF_Real_AllAddr_si <0x1d>;
-defm BUFFER_STORE_DWORDX4 : MUBUF_Real_AllAddr_si <0x1e>;
-defm BUFFER_STORE_DWORDX3 : MUBUF_Real_AllAddr_si <0x1f>;
-
-defm BUFFER_ATOMIC_SWAP : MUBUF_Real_Atomic_si <0x30>;
-defm BUFFER_ATOMIC_CMPSWAP : MUBUF_Real_Atomic_si <0x31>;
-defm BUFFER_ATOMIC_ADD : MUBUF_Real_Atomic_si <0x32>;
-defm BUFFER_ATOMIC_SUB : MUBUF_Real_Atomic_si <0x33>;
-//defm BUFFER_ATOMIC_RSUB : MUBUF_Real_Atomic_si <0x34>; // isn't on CI & VI
-defm BUFFER_ATOMIC_SMIN : MUBUF_Real_Atomic_si <0x35>;
-defm BUFFER_ATOMIC_UMIN : MUBUF_Real_Atomic_si <0x36>;
-defm BUFFER_ATOMIC_SMAX : MUBUF_Real_Atomic_si <0x37>;
-defm BUFFER_ATOMIC_UMAX : MUBUF_Real_Atomic_si <0x38>;
-defm BUFFER_ATOMIC_AND : MUBUF_Real_Atomic_si <0x39>;
-defm BUFFER_ATOMIC_OR : MUBUF_Real_Atomic_si <0x3a>;
-defm BUFFER_ATOMIC_XOR : MUBUF_Real_Atomic_si <0x3b>;
-defm BUFFER_ATOMIC_INC : MUBUF_Real_Atomic_si <0x3c>;
-defm BUFFER_ATOMIC_DEC : MUBUF_Real_Atomic_si <0x3d>;
-
-//defm BUFFER_ATOMIC_FCMPSWAP : MUBUF_Real_Atomic_si <0x3e>; // isn't on VI
-//defm BUFFER_ATOMIC_FMIN : MUBUF_Real_Atomic_si <0x3f>; // isn't on VI
-//defm BUFFER_ATOMIC_FMAX : MUBUF_Real_Atomic_si <0x40>; // isn't on VI
-defm BUFFER_ATOMIC_SWAP_X2 : MUBUF_Real_Atomic_si <0x50>;
-defm BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_Real_Atomic_si <0x51>;
-defm BUFFER_ATOMIC_ADD_X2 : MUBUF_Real_Atomic_si <0x52>;
-defm BUFFER_ATOMIC_SUB_X2 : MUBUF_Real_Atomic_si <0x53>;
-//defm BUFFER_ATOMIC_RSUB_X2 : MUBUF_Real_Atomic_si <0x54>; // isn't on CI & VI
-defm BUFFER_ATOMIC_SMIN_X2 : MUBUF_Real_Atomic_si <0x55>;
-defm BUFFER_ATOMIC_UMIN_X2 : MUBUF_Real_Atomic_si <0x56>;
-defm BUFFER_ATOMIC_SMAX_X2 : MUBUF_Real_Atomic_si <0x57>;
-defm BUFFER_ATOMIC_UMAX_X2 : MUBUF_Real_Atomic_si <0x58>;
-defm BUFFER_ATOMIC_AND_X2 : MUBUF_Real_Atomic_si <0x59>;
-defm BUFFER_ATOMIC_OR_X2 : MUBUF_Real_Atomic_si <0x5a>;
-defm BUFFER_ATOMIC_XOR_X2 : MUBUF_Real_Atomic_si <0x5b>;
-defm BUFFER_ATOMIC_INC_X2 : MUBUF_Real_Atomic_si <0x5c>;
-defm BUFFER_ATOMIC_DEC_X2 : MUBUF_Real_Atomic_si <0x5d>;
-// FIXME: Need to handle hazard for BUFFER_ATOMIC_FCMPSWAP_X2 on CI.
-//defm BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_Real_Atomic_si <0x5e">; // isn't on VI
-//defm BUFFER_ATOMIC_FMIN_X2 : MUBUF_Real_Atomic_si <0x5f>; // isn't on VI
-//defm BUFFER_ATOMIC_FMAX_X2 : MUBUF_Real_Atomic_si <0x60>; // isn't on VI
-
-def BUFFER_WBINVL1_SC_si : MUBUF_Real_si <0x70, BUFFER_WBINVL1_SC>;
-def BUFFER_WBINVL1_si : MUBUF_Real_si <0x71, BUFFER_WBINVL1>;
-
-class MTBUF_Real_si <bits<3> op, MTBUF_Pseudo ps> :
- MTBUF_Real<ps>,
- Enc64,
- SIMCInstr<ps.PseudoInstr, SIEncodingFamily.SI> {
- let AssemblerPredicate=isSICI;
- let DecoderNamespace="SICI";
+class MUBUF_Real_gfx10<bits<8> op, MUBUF_Pseudo ps> :
+ Base_MUBUF_Real_gfx6_gfx7_gfx10<op{6-0}, ps, SIEncodingFamily.GFX10> {
+ let Inst{15} = !if(ps.has_dlc, dlc, ps.dlc_value);
+ let Inst{25} = op{7};
+}
+
+class MUBUF_Real_gfx6_gfx7<bits<8> op, MUBUF_Pseudo ps> :
+ Base_MUBUF_Real_gfx6_gfx7_gfx10<op{6-0}, ps, SIEncodingFamily.SI> {
+ let Inst{15} = ps.addr64;
+}
+//===----------------------------------------------------------------------===//
+// MUBUF - GFX10.
+//===----------------------------------------------------------------------===//
+
+let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
+ multiclass MUBUF_Real_gfx10_with_name<bits<8> op, string opName,
+ string asmName> {
+ def _gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(opName)> {
+ MUBUF_Pseudo ps = !cast<MUBUF_Pseudo>(opName);
+ let AsmString = asmName # ps.AsmOperands;
+ }
+ }
+ multiclass MUBUF_Real_AllAddr_gfx10<bits<8> op> {
+ def _BOTHEN_gfx10 :
+ MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>;
+ def _IDXEN_gfx10 :
+ MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>;
+ def _OFFEN_gfx10 :
+ MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>;
+ def _OFFSET_gfx10 :
+ MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>;
+ }
+ multiclass MUBUF_Real_AllAddr_Lds_gfx10<bits<8> op> {
+ def _OFFSET_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>,
+ MUBUFLdsTable<0, NAME # "_OFFSET_gfx10">;
+ def _OFFEN_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>,
+ MUBUFLdsTable<0, NAME # "_OFFEN_gfx10">;
+ def _IDXEN_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>,
+ MUBUFLdsTable<0, NAME # "_IDXEN_gfx10">;
+ def _BOTHEN_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>,
+ MUBUFLdsTable<0, NAME # "_BOTHEN_gfx10">;
+
+ def _LDS_OFFSET_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFSET")>,
+ MUBUFLdsTable<1, NAME # "_OFFSET_gfx10">;
+ def _LDS_OFFEN_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFEN")>,
+ MUBUFLdsTable<1, NAME # "_OFFEN_gfx10">;
+ def _LDS_IDXEN_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_IDXEN")>,
+ MUBUFLdsTable<1, NAME # "_IDXEN_gfx10">;
+ def _LDS_BOTHEN_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_BOTHEN")>,
+ MUBUFLdsTable<1, NAME # "_BOTHEN_gfx10">;
+ }
+ multiclass MUBUF_Real_Atomics_gfx10<bits<8> op> :
+ MUBUF_Real_AllAddr_gfx10<op> {
+ def _BOTHEN_RTN_gfx10 :
+ MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN_RTN")>;
+ def _IDXEN_RTN_gfx10 :
+ MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN_RTN")>;
+ def _OFFEN_RTN_gfx10 :
+ MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN_RTN")>;
+ def _OFFSET_RTN_gfx10 :
+ MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET_RTN")>;
+ }
+} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10"
+
+defm BUFFER_STORE_BYTE_D16_HI : MUBUF_Real_AllAddr_gfx10<0x019>;
+defm BUFFER_STORE_SHORT_D16_HI : MUBUF_Real_AllAddr_gfx10<0x01b>;
+defm BUFFER_LOAD_UBYTE_D16 : MUBUF_Real_AllAddr_gfx10<0x020>;
+defm BUFFER_LOAD_UBYTE_D16_HI : MUBUF_Real_AllAddr_gfx10<0x021>;
+defm BUFFER_LOAD_SBYTE_D16 : MUBUF_Real_AllAddr_gfx10<0x022>;
+defm BUFFER_LOAD_SBYTE_D16_HI : MUBUF_Real_AllAddr_gfx10<0x023>;
+defm BUFFER_LOAD_SHORT_D16 : MUBUF_Real_AllAddr_gfx10<0x024>;
+defm BUFFER_LOAD_SHORT_D16_HI : MUBUF_Real_AllAddr_gfx10<0x025>;
+// FIXME-GFX10: Add following instructions:
+//defm BUFFER_LOAD_FORMAT_D16_HI_X : MUBUF_Real_AllAddr_gfx10<0x026>;
+//defm BUFFER_STORE_FORMAT_D16_HI_X : MUBUF_Real_AllAddr_gfx10<0x027>;
+defm BUFFER_LOAD_FORMAT_D16_X : MUBUF_Real_AllAddr_gfx10<0x080>;
+defm BUFFER_LOAD_FORMAT_D16_XY : MUBUF_Real_AllAddr_gfx10<0x081>;
+defm BUFFER_LOAD_FORMAT_D16_XYZ : MUBUF_Real_AllAddr_gfx10<0x082>;
+defm BUFFER_LOAD_FORMAT_D16_XYZW : MUBUF_Real_AllAddr_gfx10<0x083>;
+defm BUFFER_STORE_FORMAT_D16_X : MUBUF_Real_AllAddr_gfx10<0x084>;
+defm BUFFER_STORE_FORMAT_D16_XY : MUBUF_Real_AllAddr_gfx10<0x085>;
+defm BUFFER_STORE_FORMAT_D16_XYZ : MUBUF_Real_AllAddr_gfx10<0x086>;
+defm BUFFER_STORE_FORMAT_D16_XYZW : MUBUF_Real_AllAddr_gfx10<0x087>;
+
+def BUFFER_GL0_INV_gfx10 :
+ MUBUF_Real_gfx10<0x071, BUFFER_GL0_INV>;
+def BUFFER_GL1_INV_gfx10 :
+ MUBUF_Real_gfx10<0x072, BUFFER_GL1_INV>;
+
+//===----------------------------------------------------------------------===//
+// MUBUF - GFX6, GFX7, GFX10.
+//===----------------------------------------------------------------------===//
+
+let AssemblerPredicate = isGFX6, DecoderNamespace = "GFX6" in {
+ multiclass MUBUF_Real_gfx6<bits<8> op> {
+ def _gfx6 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME)>;
+ }
+} // End AssemblerPredicate = isGFX6, DecoderNamespace = "GFX6"
+
+let AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7" in {
+ multiclass MUBUF_Real_gfx7<bits<8> op> {
+ def _gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME)>;
+ }
+} // End AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7"
+
+let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in {
+ multiclass MUBUF_Real_AllAddr_gfx6_gfx7<bits<8> op> {
+ def _ADDR64_gfx6_gfx7 :
+ MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_ADDR64")>;
+ def _BOTHEN_gfx6_gfx7 :
+ MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>;
+ def _IDXEN_gfx6_gfx7 :
+ MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>;
+ def _OFFEN_gfx6_gfx7 :
+ MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>;
+ def _OFFSET_gfx6_gfx7 :
+ MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>;
+ }
+ multiclass MUBUF_Real_AllAddr_Lds_gfx6_gfx7<bits<8> op> {
+ def _OFFSET_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>,
+ MUBUFLdsTable<0, NAME # "_OFFSET_gfx6_gfx7">;
+ def _ADDR64_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_ADDR64")>,
+ MUBUFLdsTable<0, NAME # "_ADDR64_gfx6_gfx7">;
+ def _OFFEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>,
+ MUBUFLdsTable<0, NAME # "_OFFEN_gfx6_gfx7">;
+ def _IDXEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>,
+ MUBUFLdsTable<0, NAME # "_IDXEN_gfx6_gfx7">;
+ def _BOTHEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>,
+ MUBUFLdsTable<0, NAME # "_BOTHEN_gfx6_gfx7">;
+
+ def _LDS_OFFSET_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFSET")>,
+ MUBUFLdsTable<1, NAME # "_OFFSET_gfx6_gfx7">;
+ def _LDS_ADDR64_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_ADDR64")>,
+ MUBUFLdsTable<1, NAME # "_ADDR64_gfx6_gfx7">;
+ def _LDS_OFFEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFEN")>,
+ MUBUFLdsTable<1, NAME # "_OFFEN_gfx6_gfx7">;
+ def _LDS_IDXEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_IDXEN")>,
+ MUBUFLdsTable<1, NAME # "_IDXEN_gfx6_gfx7">;
+ def _LDS_BOTHEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_BOTHEN")>,
+ MUBUFLdsTable<1, NAME # "_BOTHEN_gfx6_gfx7">;
+ }
+ multiclass MUBUF_Real_Atomics_gfx6_gfx7<bits<8> op> :
+ MUBUF_Real_AllAddr_gfx6_gfx7<op> {
+ def _ADDR64_RTN_gfx6_gfx7 :
+ MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_ADDR64_RTN")>;
+ def _BOTHEN_RTN_gfx6_gfx7 :
+ MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN_RTN")>;
+ def _IDXEN_RTN_gfx6_gfx7 :
+ MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN_RTN")>;
+ def _OFFEN_RTN_gfx6_gfx7 :
+ MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN_RTN")>;
+ def _OFFSET_RTN_gfx6_gfx7 :
+ MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET_RTN")>;
+ }
+} // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7"
+
+multiclass MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<bits<8> op> :
+ MUBUF_Real_AllAddr_gfx6_gfx7<op>, MUBUF_Real_AllAddr_gfx10<op>;
+
+multiclass MUBUF_Real_AllAddr_Lds_gfx6_gfx7_gfx10<bits<8> op> :
+ MUBUF_Real_AllAddr_Lds_gfx6_gfx7<op>, MUBUF_Real_AllAddr_Lds_gfx10<op>;
+
+multiclass MUBUF_Real_Atomics_gfx6_gfx7_gfx10<bits<8> op> :
+ MUBUF_Real_Atomics_gfx6_gfx7<op>, MUBUF_Real_Atomics_gfx10<op>;
+
+// FIXME-GFX6: Following instructions are available only on GFX6.
+//defm BUFFER_ATOMIC_RSUB : MUBUF_Real_Atomics_gfx6 <0x034>;
+//defm BUFFER_ATOMIC_RSUB_X2 : MUBUF_Real_Atomics_gfx6 <0x054>;
+
+defm BUFFER_LOAD_FORMAT_X : MUBUF_Real_AllAddr_Lds_gfx6_gfx7_gfx10<0x000>;
+defm BUFFER_LOAD_FORMAT_XY : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x001>;
+defm BUFFER_LOAD_FORMAT_XYZ : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x002>;
+defm BUFFER_LOAD_FORMAT_XYZW : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x003>;
+defm BUFFER_STORE_FORMAT_X : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x004>;
+defm BUFFER_STORE_FORMAT_XY : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x005>;
+defm BUFFER_STORE_FORMAT_XYZ : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x006>;
+defm BUFFER_STORE_FORMAT_XYZW : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x007>;
+defm BUFFER_LOAD_UBYTE : MUBUF_Real_AllAddr_Lds_gfx6_gfx7_gfx10<0x008>;
+defm BUFFER_LOAD_SBYTE : MUBUF_Real_AllAddr_Lds_gfx6_gfx7_gfx10<0x009>;
+defm BUFFER_LOAD_USHORT : MUBUF_Real_AllAddr_Lds_gfx6_gfx7_gfx10<0x00a>;
+defm BUFFER_LOAD_SSHORT : MUBUF_Real_AllAddr_Lds_gfx6_gfx7_gfx10<0x00b>;
+defm BUFFER_LOAD_DWORD : MUBUF_Real_AllAddr_Lds_gfx6_gfx7_gfx10<0x00c>;
+defm BUFFER_LOAD_DWORDX2 : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x00d>;
+defm BUFFER_LOAD_DWORDX4 : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x00e>;
+defm BUFFER_LOAD_DWORDX3 : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x00f>;
+defm BUFFER_STORE_BYTE : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x018>;
+defm BUFFER_STORE_SHORT : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x01a>;
+defm BUFFER_STORE_DWORD : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x01c>;
+defm BUFFER_STORE_DWORDX2 : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x01d>;
+defm BUFFER_STORE_DWORDX4 : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x01e>;
+defm BUFFER_STORE_DWORDX3 : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x01f>;
+
+defm BUFFER_ATOMIC_SWAP : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x030>;
+defm BUFFER_ATOMIC_CMPSWAP : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x031>;
+defm BUFFER_ATOMIC_ADD : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x032>;
+defm BUFFER_ATOMIC_SUB : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x033>;
+defm BUFFER_ATOMIC_SMIN : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x035>;
+defm BUFFER_ATOMIC_UMIN : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x036>;
+defm BUFFER_ATOMIC_SMAX : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x037>;
+defm BUFFER_ATOMIC_UMAX : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x038>;
+defm BUFFER_ATOMIC_AND : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x039>;
+defm BUFFER_ATOMIC_OR : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03a>;
+defm BUFFER_ATOMIC_XOR : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03b>;
+defm BUFFER_ATOMIC_INC : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03c>;
+defm BUFFER_ATOMIC_DEC : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03d>;
+// FIXME-GFX6-GFX7-GFX10: Add following instructions:
+//defm BUFFER_ATOMIC_FCMPSWAP : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03e>;
+//defm BUFFER_ATOMIC_FMIN : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03f>;
+//defm BUFFER_ATOMIC_FMAX : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x040>;
+defm BUFFER_ATOMIC_SWAP_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x050>;
+defm BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x051>;
+defm BUFFER_ATOMIC_ADD_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x052>;
+defm BUFFER_ATOMIC_SUB_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x053>;
+defm BUFFER_ATOMIC_SMIN_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x055>;
+defm BUFFER_ATOMIC_UMIN_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x056>;
+defm BUFFER_ATOMIC_SMAX_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x057>;
+defm BUFFER_ATOMIC_UMAX_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x058>;
+defm BUFFER_ATOMIC_AND_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x059>;
+defm BUFFER_ATOMIC_OR_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05a>;
+defm BUFFER_ATOMIC_XOR_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05b>;
+defm BUFFER_ATOMIC_INC_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05c>;
+defm BUFFER_ATOMIC_DEC_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05d>;
+// FIXME-GFX7: Need to handle hazard for BUFFER_ATOMIC_FCMPSWAP_X2 on GFX7.
+// FIXME-GFX6-GFX7-GFX10: Add following instructions:
+//defm BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05e>;
+//defm BUFFER_ATOMIC_FMIN_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05f>;
+//defm BUFFER_ATOMIC_FMAX_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x060>;
+
+defm BUFFER_WBINVL1_SC : MUBUF_Real_gfx6<0x070>;
+defm BUFFER_WBINVL1_VOL : MUBUF_Real_gfx7<0x070>;
+def BUFFER_WBINVL1_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<0x071, BUFFER_WBINVL1>;
+
+//===----------------------------------------------------------------------===//
+// Base ENC_MTBUF for GFX6, GFX7, GFX10.
+//===----------------------------------------------------------------------===//
+
+class Base_MTBUF_Real_gfx6_gfx7_gfx10<bits<3> op, MTBUF_Pseudo ps, int ef> :
+ MTBUF_Real<ps>, Enc64, SIMCInstr<ps.PseudoInstr, ef> {
let Inst{11-0} = !if(ps.has_offset, offset, ?);
let Inst{12} = ps.offen;
let Inst{13} = ps.idxen;
let Inst{14} = !if(ps.has_glc, glc, ps.glc_value);
- let Inst{15} = ps.addr64;
let Inst{18-16} = op;
- let Inst{22-19} = dfmt;
- let Inst{25-23} = nfmt;
let Inst{31-26} = 0x3a; //encoding
let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
let Inst{47-40} = !if(ps.has_vdata, vdata, ?);
@@ -1792,47 +2004,87 @@ class MTBUF_Real_si <bits<3> op, MTBUF_Pseudo ps> :
let Inst{63-56} = !if(ps.has_soffset, soffset, ?);
}
-multiclass MTBUF_Real_AllAddr_si<bits<3> op> {
- def _OFFSET_si : MTBUF_Real_si <op, !cast<MTBUF_Pseudo>(NAME#"_OFFSET")>;
- def _ADDR64_si : MTBUF_Real_si <op, !cast<MTBUF_Pseudo>(NAME#"_ADDR64")>;
- def _OFFEN_si : MTBUF_Real_si <op, !cast<MTBUF_Pseudo>(NAME#"_OFFEN")>;
- def _IDXEN_si : MTBUF_Real_si <op, !cast<MTBUF_Pseudo>(NAME#"_IDXEN")>;
- def _BOTHEN_si : MTBUF_Real_si <op, !cast<MTBUF_Pseudo>(NAME#"_BOTHEN")>;
-}
+//===----------------------------------------------------------------------===//
+// MTBUF - GFX10.
+//===----------------------------------------------------------------------===//
+
+class MTBUF_Real_gfx10<bits<4> op, MTBUF_Pseudo ps> :
+ Base_MTBUF_Real_gfx6_gfx7_gfx10<op{2-0}, ps, SIEncodingFamily.GFX10> {
+ let Inst{15} = !if(ps.has_dlc, dlc, ps.dlc_value);
+ let Inst{25-19} = format;
+ let Inst{53} = op{3};
+}
+
+let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
+ multiclass MTBUF_Real_AllAddr_gfx10<bits<4> op> {
+ def _BOTHEN_gfx10 :
+ MTBUF_Real_gfx10<op, !cast<MTBUF_Pseudo>(NAME#"_BOTHEN")>;
+ def _IDXEN_gfx10 :
+ MTBUF_Real_gfx10<op, !cast<MTBUF_Pseudo>(NAME#"_IDXEN")>;
+ def _OFFEN_gfx10 :
+ MTBUF_Real_gfx10<op, !cast<MTBUF_Pseudo>(NAME#"_OFFEN")>;
+ def _OFFSET_gfx10 :
+ MTBUF_Real_gfx10<op, !cast<MTBUF_Pseudo>(NAME#"_OFFSET")>;
+ }
+} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10"
-defm TBUFFER_LOAD_FORMAT_X : MTBUF_Real_AllAddr_si <0>;
-defm TBUFFER_LOAD_FORMAT_XY : MTBUF_Real_AllAddr_si <1>;
-defm TBUFFER_LOAD_FORMAT_XYZ : MTBUF_Real_AllAddr_si <2>;
-defm TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Real_AllAddr_si <3>;
-defm TBUFFER_STORE_FORMAT_X : MTBUF_Real_AllAddr_si <4>;
-defm TBUFFER_STORE_FORMAT_XY : MTBUF_Real_AllAddr_si <5>;
-defm TBUFFER_STORE_FORMAT_XYZ : MTBUF_Real_AllAddr_si <6>;
-defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Real_AllAddr_si <7>;
+defm TBUFFER_LOAD_FORMAT_D16_X : MTBUF_Real_AllAddr_gfx10<0x008>;
+defm TBUFFER_LOAD_FORMAT_D16_XY : MTBUF_Real_AllAddr_gfx10<0x009>;
+defm TBUFFER_LOAD_FORMAT_D16_XYZ : MTBUF_Real_AllAddr_gfx10<0x00a>;
+defm TBUFFER_LOAD_FORMAT_D16_XYZW : MTBUF_Real_AllAddr_gfx10<0x00b>;
+defm TBUFFER_STORE_FORMAT_D16_X : MTBUF_Real_AllAddr_gfx10<0x00c>;
+defm TBUFFER_STORE_FORMAT_D16_XY : MTBUF_Real_AllAddr_gfx10<0x00d>;
+defm TBUFFER_STORE_FORMAT_D16_XYZ : MTBUF_Real_AllAddr_gfx10<0x00e>;
+defm TBUFFER_STORE_FORMAT_D16_XYZW : MTBUF_Real_AllAddr_gfx10<0x00f>;
//===----------------------------------------------------------------------===//
-// CI
-// MTBUF - GFX6, GFX7.
+// MTBUF - GFX6, GFX7, GFX10.
//===----------------------------------------------------------------------===//
-class MUBUF_Real_ci <bits<7> op, MUBUF_Pseudo ps> :
- MUBUF_Real_si<op, ps> {
- let AssemblerPredicate=isCIOnly;
- let DecoderNamespace="CI";
+class MTBUF_Real_gfx6_gfx7<bits<4> op, MTBUF_Pseudo ps> :
+ Base_MTBUF_Real_gfx6_gfx7_gfx10<op{2-0}, ps, SIEncodingFamily.SI> {
+ let Inst{15} = ps.addr64;
+ let Inst{22-19} = dfmt;
+ let Inst{25-23} = nfmt;
}
-def BUFFER_WBINVL1_VOL_ci : MUBUF_Real_ci <0x70, BUFFER_WBINVL1_VOL>;
+let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in {
+ multiclass MTBUF_Real_AllAddr_gfx6_gfx7<bits<4> op> {
+ def _ADDR64_gfx6_gfx7 :
+ MTBUF_Real_gfx6_gfx7<op, !cast<MTBUF_Pseudo>(NAME#"_ADDR64")>;
+ def _BOTHEN_gfx6_gfx7 :
+ MTBUF_Real_gfx6_gfx7<op, !cast<MTBUF_Pseudo>(NAME#"_BOTHEN")>;
+ def _IDXEN_gfx6_gfx7 :
+ MTBUF_Real_gfx6_gfx7<op, !cast<MTBUF_Pseudo>(NAME#"_IDXEN")>;
+ def _OFFEN_gfx6_gfx7 :
+ MTBUF_Real_gfx6_gfx7<op, !cast<MTBUF_Pseudo>(NAME#"_OFFEN")>;
+ def _OFFSET_gfx6_gfx7 :
+ MTBUF_Real_gfx6_gfx7<op, !cast<MTBUF_Pseudo>(NAME#"_OFFSET")>;
+ }
+} // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7"
+
+multiclass MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<bits<4> op> :
+ MTBUF_Real_AllAddr_gfx6_gfx7<op>, MTBUF_Real_AllAddr_gfx10<op>;
+defm TBUFFER_LOAD_FORMAT_X : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x000>;
+defm TBUFFER_LOAD_FORMAT_XY : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x001>;
+defm TBUFFER_LOAD_FORMAT_XYZ : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x002>;
+defm TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x003>;
+defm TBUFFER_STORE_FORMAT_X : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x004>;
+defm TBUFFER_STORE_FORMAT_XY : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x005>;
+defm TBUFFER_STORE_FORMAT_XYZ : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x006>;
+defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x007>;
//===----------------------------------------------------------------------===//
-// VI
+// GFX8, GFX9 (VI).
//===----------------------------------------------------------------------===//
class MUBUF_Real_vi <bits<7> op, MUBUF_Pseudo ps> :
- MUBUF_Real<op, ps>,
+ MUBUF_Real<ps>,
Enc64,
SIMCInstr<ps.PseudoInstr, SIEncodingFamily.VI> {
- let AssemblerPredicate=isVI;
- let DecoderNamespace="VI";
+ let AssemblerPredicate = isGFX8GFX9;
+ let DecoderNamespace = "GFX8";
let Inst{11-0} = !if(ps.has_offset, offset, ?);
let Inst{12} = ps.offen;
@@ -1878,7 +2130,7 @@ multiclass MUBUF_Real_AllAddr_Lds_vi<bits<7> op> {
}
class MUBUF_Real_gfx80 <bits<7> op, MUBUF_Pseudo ps> :
- MUBUF_Real<op, ps>,
+ MUBUF_Real<ps>,
Enc64,
SIMCInstr<ps.PseudoInstr, SIEncodingFamily.GFX80> {
let AssemblerPredicate=HasUnpackedD16VMem;
@@ -2002,12 +2254,19 @@ def BUFFER_STORE_LDS_DWORD_vi : MUBUF_Real_vi <0x3d, BUFFER_STORE_LDS_DWORD>;
def BUFFER_WBINVL1_vi : MUBUF_Real_vi <0x3e, BUFFER_WBINVL1>;
def BUFFER_WBINVL1_VOL_vi : MUBUF_Real_vi <0x3f, BUFFER_WBINVL1_VOL>;
+let SubtargetPredicate = HasAtomicFaddInsts in {
+
+defm BUFFER_ATOMIC_ADD_F32 : MUBUF_Real_AllAddr_vi <0x4d>;
+defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Real_AllAddr_vi <0x4e>;
+
+} // End SubtargetPredicate = HasAtomicFaddInsts
+
class MTBUF_Real_vi <bits<4> op, MTBUF_Pseudo ps> :
MTBUF_Real<ps>,
Enc64,
SIMCInstr<ps.PseudoInstr, SIEncodingFamily.VI> {
- let AssemblerPredicate=isVI;
- let DecoderNamespace="VI";
+ let AssemblerPredicate = isGFX8GFX9;
+ let DecoderNamespace = "GFX8";
let Inst{11-0} = !if(ps.has_offset, offset, ?);
let Inst{12} = ps.offen;
diff --git a/lib/Target/AMDGPU/CaymanInstructions.td b/lib/Target/AMDGPU/CaymanInstructions.td
index ae40c6387982..1a526675164a 100644
--- a/lib/Target/AMDGPU/CaymanInstructions.td
+++ b/lib/Target/AMDGPU/CaymanInstructions.td
@@ -1,9 +1,8 @@
//===-- CaymanInstructions.td - CM Instruction defs -------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AMDGPU/DSInstructions.td b/lib/Target/AMDGPU/DSInstructions.td
index 31d2ebef481d..c52eaaa3fdc5 100644
--- a/lib/Target/AMDGPU/DSInstructions.td
+++ b/lib/Target/AMDGPU/DSInstructions.td
@@ -1,9 +1,8 @@
//===-- DSInstructions.td - DS Instruction Defintions ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -11,8 +10,6 @@ class DS_Pseudo <string opName, dag outs, dag ins, string asmOps, list<dag> patt
InstSI <outs, ins, "", pattern>,
SIMCInstr <opName, SIEncodingFamily.NONE> {
- let SubtargetPredicate = isGCN;
-
let LGKM_CNT = 1;
let DS = 1;
let Size = 8;
@@ -21,6 +18,7 @@ class DS_Pseudo <string opName, dag outs, dag ins, string asmOps, list<dag> patt
// Most instruction load and store data, so set this as the default.
let mayLoad = 1;
let mayStore = 1;
+ let maybeAtomic = 1;
let hasSideEffects = 0;
let SchedRW = [WriteLDS];
@@ -40,6 +38,8 @@ class DS_Pseudo <string opName, dag outs, dag ins, string asmOps, list<dag> patt
bits<1> has_data0 = 1;
bits<1> has_data1 = 1;
+ bits<1> has_gws_data0 = 0; // data0 is encoded as addr
+
bits<1> has_offset = 1; // has "offset" that should be split to offset0,1
bits<1> has_offset0 = 1;
bits<1> has_offset1 = 1;
@@ -61,6 +61,7 @@ class DS_Real <DS_Pseudo ds> :
// copy relevant pseudo op flags
let SubtargetPredicate = ds.SubtargetPredicate;
+ let OtherPredicates = ds.OtherPredicates;
let AsmMatchConverter = ds.AsmMatchConverter;
// encoding fields
@@ -322,7 +323,7 @@ class DS_GWS_1D <string opName>
: DS_GWS<opName,
(ins VGPR_32:$data0, offset:$offset, gds:$gds), "$data0$offset gds"> {
- let has_data0 = 1;
+ let has_gws_data0 = 1;
}
class DS_VOID <string opName> : DS_Pseudo<opName,
@@ -469,11 +470,15 @@ defm DS_WRXCHG_RTN_B64 : DS_1A1D_RET_mc<"ds_wrxchg_rtn_b64", VReg_64>;
defm DS_WRXCHG2_RTN_B64 : DS_1A2D_Off8_RET_mc<"ds_wrxchg2_rtn_b64", VReg_128, VReg_64>;
defm DS_WRXCHG2ST64_RTN_B64 : DS_1A2D_Off8_RET_mc<"ds_wrxchg2st64_rtn_b64", VReg_128, VReg_64>;
-def DS_GWS_INIT : DS_GWS_1D<"ds_gws_init">;
+let isConvergent = 1, usesCustomInserter = 1 in {
+def DS_GWS_INIT : DS_GWS_1D<"ds_gws_init"> {
+ let mayLoad = 0;
+}
def DS_GWS_SEMA_V : DS_GWS_0D<"ds_gws_sema_v">;
def DS_GWS_SEMA_BR : DS_GWS_1D<"ds_gws_sema_br">;
def DS_GWS_SEMA_P : DS_GWS_0D<"ds_gws_sema_p">;
def DS_GWS_BARRIER : DS_GWS_1D<"ds_gws_barrier">;
+}
def DS_ADD_SRC2_U32 : DS_1A<"ds_add_src2_u32">;
def DS_SUB_SRC2_U32 : DS_1A<"ds_sub_src2_u32">;
@@ -550,12 +555,14 @@ def DS_ORDERED_COUNT : DS_1A_RET_GDS<"ds_ordered_count">;
// Instruction definitions for CI and newer.
//===----------------------------------------------------------------------===//
-let SubtargetPredicate = isCIVI in {
+let SubtargetPredicate = isGFX7Plus in {
defm DS_WRAP_RTN_B32 : DS_1A2D_RET_mc<"ds_wrap_rtn_b32", VGPR_32>;
defm DS_CONDXCHG32_RTN_B64 : DS_1A1D_RET_mc<"ds_condxchg32_rtn_b64", VReg_64>;
+let isConvergent = 1, usesCustomInserter = 1 in {
def DS_GWS_SEMA_RELEASE_ALL : DS_GWS_0D<"ds_gws_sema_release_all">;
+}
let mayStore = 0 in {
defm DS_READ_B96 : DS_1A_RET_mc<"ds_read_b96", VReg_96>;
@@ -569,13 +576,13 @@ defm DS_WRITE_B128 : DS_1A1D_NORET_mc<"ds_write_b128", VReg_128>;
def DS_NOP : DS_VOID<"ds_nop">;
-} // let SubtargetPredicate = isCIVI
+} // let SubtargetPredicate = isGFX7Plus
//===----------------------------------------------------------------------===//
// Instruction definitions for VI and newer.
//===----------------------------------------------------------------------===//
-let SubtargetPredicate = isVI in {
+let SubtargetPredicate = isGFX8Plus in {
let Uses = [EXEC] in {
def DS_PERMUTE_B32 : DS_1A1D_PERMUTE <"ds_permute_b32",
@@ -586,7 +593,7 @@ def DS_BPERMUTE_B32 : DS_1A1D_PERMUTE <"ds_bpermute_b32",
def DS_ADD_SRC2_F32 : DS_1A<"ds_add_src2_f32">;
-} // let SubtargetPredicate = isVI
+} // let SubtargetPredicate = isGFX8Plus
//===----------------------------------------------------------------------===//
// DS Patterns
@@ -597,9 +604,9 @@ def : GCNPat <
(DS_SWIZZLE_B32 $src, (as_i16imm $offset16), (i1 0))
>;
-class DSReadPat <DS_Pseudo inst, ValueType vt, PatFrag frag> : GCNPat <
+class DSReadPat <DS_Pseudo inst, ValueType vt, PatFrag frag, int gds=0> : GCNPat <
(vt (frag (DS1Addr1Offset i32:$ptr, i32:$offset))),
- (inst $ptr, (as_i16imm $offset), (i1 0))
+ (inst $ptr, (as_i16imm $offset), (i1 gds))
>;
multiclass DSReadPat_mc<DS_Pseudo inst, ValueType vt, string frag> {
@@ -613,38 +620,21 @@ multiclass DSReadPat_mc<DS_Pseudo inst, ValueType vt, string frag> {
}
}
-
-multiclass DSReadPat_Hi16 <DS_Pseudo inst, PatFrag frag, ValueType vt = i16> {
- def : GCNPat <
- (build_vector vt:$lo, (vt (frag (DS1Addr1Offset i32:$ptr, i32:$offset)))),
- (v2i16 (inst $ptr, (as_i16imm $offset), (i1 0), $lo))
- >;
-
- def : GCNPat <
- (build_vector f16:$lo, (f16 (bitconvert (vt (frag (DS1Addr1Offset i32:$ptr, i32:$offset)))))),
- (v2f16 (inst $ptr, (as_i16imm $offset), (i1 0), $lo))
- >;
-}
-
-multiclass DSReadPat_Lo16 <DS_Pseudo inst, PatFrag frag, ValueType vt = i16> {
- def : GCNPat <
- (build_vector (vt (frag (DS1Addr1Offset i32:$ptr, i32:$offset))), (vt (Hi16Elt vt:$hi))),
- (v2i16 (inst $ptr, (as_i16imm $offset), 0, $hi))
- >;
-
- def : GCNPat <
- (build_vector (f16 (bitconvert (vt (frag (DS1Addr1Offset i32:$ptr, i32:$offset))))), (f16 (Hi16Elt f16:$hi))),
- (v2f16 (inst $ptr, (as_i16imm $offset), 0, $hi))
- >;
-}
+class DSReadPat_D16 <DS_Pseudo inst, PatFrag frag, ValueType vt> : GCNPat <
+ (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$in),
+ (inst $ptr, (as_i16imm $offset), (i1 0), $in)
+>;
defm : DSReadPat_mc <DS_READ_I8, i32, "sextloadi8_local">;
-defm : DSReadPat_mc <DS_READ_U8, i32, "az_extloadi8_local">;
defm : DSReadPat_mc <DS_READ_I8, i16, "sextloadi8_local">;
-defm : DSReadPat_mc <DS_READ_U8, i16, "az_extloadi8_local">;
+defm : DSReadPat_mc <DS_READ_U8, i32, "extloadi8_local">;
+defm : DSReadPat_mc <DS_READ_U8, i32, "zextloadi8_local">;
+defm : DSReadPat_mc <DS_READ_U8, i16, "extloadi8_local">;
+defm : DSReadPat_mc <DS_READ_U8, i16, "zextloadi8_local">;
defm : DSReadPat_mc <DS_READ_I16, i32, "sextloadi16_local">;
defm : DSReadPat_mc <DS_READ_I16, i32, "sextloadi16_local">;
-defm : DSReadPat_mc <DS_READ_U16, i32, "az_extloadi16_local">;
+defm : DSReadPat_mc <DS_READ_U16, i32, "extloadi16_local">;
+defm : DSReadPat_mc <DS_READ_U16, i32, "zextloadi16_local">;
defm : DSReadPat_mc <DS_READ_U16, i16, "load_local">;
defm : DSReadPat_mc <DS_READ_B32, i32, "load_local">;
defm : DSReadPat_mc <DS_READ_B32, i32, "atomic_load_32_local">;
@@ -658,21 +648,24 @@ defm : DSReadPat_mc <DS_READ_B128, v4i32, "load_align16_local">;
} // End AddedComplexity = 100
let OtherPredicates = [D16PreservesUnusedBits] in {
-let AddedComplexity = 100 in {
-defm : DSReadPat_Hi16<DS_READ_U16_D16_HI, load_local>;
-defm : DSReadPat_Hi16<DS_READ_U8_D16_HI, az_extloadi8_local>;
-defm : DSReadPat_Hi16<DS_READ_I8_D16_HI, sextloadi8_local>;
-
-defm : DSReadPat_Lo16<DS_READ_U16_D16, load_local>;
-defm : DSReadPat_Lo16<DS_READ_U8_D16, az_extloadi8_local>;
-defm : DSReadPat_Lo16<DS_READ_I8_D16, sextloadi8_local>;
-
-}
+def : DSReadPat_D16<DS_READ_U16_D16_HI, load_d16_hi_local, v2i16>;
+def : DSReadPat_D16<DS_READ_U16_D16_HI, load_d16_hi_local, v2f16>;
+def : DSReadPat_D16<DS_READ_U8_D16_HI, az_extloadi8_d16_hi_local, v2i16>;
+def : DSReadPat_D16<DS_READ_U8_D16_HI, az_extloadi8_d16_hi_local, v2f16>;
+def : DSReadPat_D16<DS_READ_I8_D16_HI, sextloadi8_d16_hi_local, v2i16>;
+def : DSReadPat_D16<DS_READ_I8_D16_HI, sextloadi8_d16_hi_local, v2f16>;
+
+def : DSReadPat_D16<DS_READ_U16_D16, load_d16_lo_local, v2i16>;
+def : DSReadPat_D16<DS_READ_U16_D16, load_d16_lo_local, v2f16>;
+def : DSReadPat_D16<DS_READ_U8_D16, az_extloadi8_d16_lo_local, v2i16>;
+def : DSReadPat_D16<DS_READ_U8_D16, az_extloadi8_d16_lo_local, v2f16>;
+def : DSReadPat_D16<DS_READ_I8_D16, sextloadi8_d16_lo_local, v2i16>;
+def : DSReadPat_D16<DS_READ_I8_D16, sextloadi8_d16_lo_local, v2f16>;
}
-class DSWritePat <DS_Pseudo inst, ValueType vt, PatFrag frag> : GCNPat <
+class DSWritePat <DS_Pseudo inst, ValueType vt, PatFrag frag, int gds=0> : GCNPat <
(frag vt:$value, (DS1Addr1Offset i32:$ptr, i32:$offset)),
- (inst $ptr, $value, (as_i16imm $offset), (i1 0))
+ (inst $ptr, $value, (as_i16imm $offset), (i1 gds))
>;
multiclass DSWritePat_mc <DS_Pseudo inst, ValueType vt, string frag> {
@@ -730,7 +723,7 @@ class DS64Bit4ByteAlignedWritePat<DS_Pseudo inst, PatFrag frag> : GCNPat<
// v2i32 loads are split into i32 loads on SI during lowering, due to a bug
// related to bounds checking.
-let OtherPredicates = [LDSRequiresM0Init, isCIVI] in {
+let OtherPredicates = [LDSRequiresM0Init, isGFX7Plus] in {
def : DS64Bit4ByteAlignedReadPat<DS_READ2_B32, load_local_m0>;
def : DS64Bit4ByteAlignedWritePat<DS_WRITE2_B32, store_local_m0>;
}
@@ -747,260 +740,313 @@ defm : DSWritePat_mc <DS_WRITE_B64, v2i32, "store_align8_local">;
defm : DSWritePat_mc <DS_WRITE_B128, v4i32, "store_align16_local">;
} // End AddedComplexity = 100
-class DSAtomicRetPat<DS_Pseudo inst, ValueType vt, PatFrag frag> : GCNPat <
+class DSAtomicRetPat<DS_Pseudo inst, ValueType vt, PatFrag frag, bit gds=0> : GCNPat <
(frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$value),
- (inst $ptr, $value, (as_i16imm $offset), (i1 0))
+ (inst $ptr, $value, (as_i16imm $offset), (i1 gds))
>;
multiclass DSAtomicRetPat_mc<DS_Pseudo inst, ValueType vt, string frag> {
let OtherPredicates = [LDSRequiresM0Init] in {
- def : DSAtomicRetPat<inst, vt, !cast<PatFrag>(frag#"_m0")>;
+ def : DSAtomicRetPat<inst, vt, !cast<PatFrag>(frag#"_local_m0")>;
}
let OtherPredicates = [NotLDSRequiresM0Init] in {
def : DSAtomicRetPat<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt,
- !cast<PatFrag>(frag)>;
+ !cast<PatFrag>(frag#"_local")>;
}
+
+ def : DSAtomicRetPat<inst, vt, !cast<PatFrag>(frag#"_region_m0"), 1>;
}
-class DSAtomicCmpXChg<DS_Pseudo inst, ValueType vt, PatFrag frag> : GCNPat <
+class DSAtomicCmpXChg<DS_Pseudo inst, ValueType vt, PatFrag frag, bit gds=0> : GCNPat <
(frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$cmp, vt:$swap),
- (inst $ptr, $cmp, $swap, (as_i16imm $offset), (i1 0))
+ (inst $ptr, $cmp, $swap, (as_i16imm $offset), (i1 gds))
>;
multiclass DSAtomicCmpXChg_mc<DS_Pseudo inst, ValueType vt, string frag> {
let OtherPredicates = [LDSRequiresM0Init] in {
- def : DSAtomicCmpXChg<inst, vt, !cast<PatFrag>(frag#"_m0")>;
+ def : DSAtomicCmpXChg<inst, vt, !cast<PatFrag>(frag#"_local_m0")>;
}
let OtherPredicates = [NotLDSRequiresM0Init] in {
def : DSAtomicCmpXChg<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt,
- !cast<PatFrag>(frag)>;
+ !cast<PatFrag>(frag#"_local")>;
}
+
+ def : DSAtomicCmpXChg<inst, vt, !cast<PatFrag>(frag#"_region_m0"), 1>;
}
// 32-bit atomics.
-defm : DSAtomicRetPat_mc<DS_WRXCHG_RTN_B32, i32, "atomic_swap_local">;
-defm : DSAtomicRetPat_mc<DS_ADD_RTN_U32, i32, "atomic_load_add_local">;
-defm : DSAtomicRetPat_mc<DS_SUB_RTN_U32, i32, "atomic_load_sub_local">;
-defm : DSAtomicRetPat_mc<DS_INC_RTN_U32, i32, "atomic_inc_local">;
-defm : DSAtomicRetPat_mc<DS_DEC_RTN_U32, i32, "atomic_dec_local">;
-defm : DSAtomicRetPat_mc<DS_AND_RTN_B32, i32, "atomic_load_and_local">;
-defm : DSAtomicRetPat_mc<DS_OR_RTN_B32, i32, "atomic_load_or_local">;
-defm : DSAtomicRetPat_mc<DS_XOR_RTN_B32, i32, "atomic_load_xor_local">;
-defm : DSAtomicRetPat_mc<DS_MIN_RTN_I32, i32, "atomic_load_min_local">;
-defm : DSAtomicRetPat_mc<DS_MAX_RTN_I32, i32, "atomic_load_max_local">;
-defm : DSAtomicRetPat_mc<DS_MIN_RTN_U32, i32, "atomic_load_umin_local">;
-defm : DSAtomicRetPat_mc<DS_MAX_RTN_U32, i32, "atomic_load_umax_local">;
-defm : DSAtomicCmpXChg_mc<DS_CMPST_RTN_B32, i32, "atomic_cmp_swap_local">;
-defm : DSAtomicRetPat_mc<DS_MIN_RTN_F32, f32, "atomic_load_fmin_local">;
-defm : DSAtomicRetPat_mc<DS_MAX_RTN_F32, f32, "atomic_load_fmax_local">;
-defm : DSAtomicRetPat_mc<DS_ADD_RTN_F32, f32, "atomic_load_fadd_local">;
+defm : DSAtomicRetPat_mc<DS_WRXCHG_RTN_B32, i32, "atomic_swap">;
+defm : DSAtomicRetPat_mc<DS_ADD_RTN_U32, i32, "atomic_load_add">;
+defm : DSAtomicRetPat_mc<DS_SUB_RTN_U32, i32, "atomic_load_sub">;
+defm : DSAtomicRetPat_mc<DS_INC_RTN_U32, i32, "atomic_inc">;
+defm : DSAtomicRetPat_mc<DS_DEC_RTN_U32, i32, "atomic_dec">;
+defm : DSAtomicRetPat_mc<DS_AND_RTN_B32, i32, "atomic_load_and">;
+defm : DSAtomicRetPat_mc<DS_OR_RTN_B32, i32, "atomic_load_or">;
+defm : DSAtomicRetPat_mc<DS_XOR_RTN_B32, i32, "atomic_load_xor">;
+defm : DSAtomicRetPat_mc<DS_MIN_RTN_I32, i32, "atomic_load_min">;
+defm : DSAtomicRetPat_mc<DS_MAX_RTN_I32, i32, "atomic_load_max">;
+defm : DSAtomicRetPat_mc<DS_MIN_RTN_U32, i32, "atomic_load_umin">;
+defm : DSAtomicRetPat_mc<DS_MAX_RTN_U32, i32, "atomic_load_umax">;
+defm : DSAtomicCmpXChg_mc<DS_CMPST_RTN_B32, i32, "atomic_cmp_swap">;
+defm : DSAtomicRetPat_mc<DS_MIN_RTN_F32, f32, "atomic_load_fmin">;
+defm : DSAtomicRetPat_mc<DS_MAX_RTN_F32, f32, "atomic_load_fmax">;
+defm : DSAtomicRetPat_mc<DS_ADD_RTN_F32, f32, "atomic_load_fadd">;
// 64-bit atomics.
-defm : DSAtomicRetPat_mc<DS_WRXCHG_RTN_B64, i64, "atomic_swap_local">;
-defm : DSAtomicRetPat_mc<DS_ADD_RTN_U64, i64, "atomic_load_add_local">;
-defm : DSAtomicRetPat_mc<DS_SUB_RTN_U64, i64, "atomic_load_sub_local">;
-defm : DSAtomicRetPat_mc<DS_INC_RTN_U64, i64, "atomic_inc_local">;
-defm : DSAtomicRetPat_mc<DS_DEC_RTN_U64, i64, "atomic_dec_local">;
-defm : DSAtomicRetPat_mc<DS_AND_RTN_B64, i64, "atomic_load_and_local">;
-defm : DSAtomicRetPat_mc<DS_OR_RTN_B64, i64, "atomic_load_or_local">;
-defm : DSAtomicRetPat_mc<DS_XOR_RTN_B64, i64, "atomic_load_xor_local">;
-defm : DSAtomicRetPat_mc<DS_MIN_RTN_I64, i64, "atomic_load_min_local">;
-defm : DSAtomicRetPat_mc<DS_MAX_RTN_I64, i64, "atomic_load_max_local">;
-defm : DSAtomicRetPat_mc<DS_MIN_RTN_U64, i64, "atomic_load_umin_local">;
-defm : DSAtomicRetPat_mc<DS_MAX_RTN_U64, i64, "atomic_load_umax_local">;
-
-defm : DSAtomicCmpXChg_mc<DS_CMPST_RTN_B64, i64, "atomic_cmp_swap_local">;
+defm : DSAtomicRetPat_mc<DS_WRXCHG_RTN_B64, i64, "atomic_swap">;
+defm : DSAtomicRetPat_mc<DS_ADD_RTN_U64, i64, "atomic_load_add">;
+defm : DSAtomicRetPat_mc<DS_SUB_RTN_U64, i64, "atomic_load_sub">;
+defm : DSAtomicRetPat_mc<DS_INC_RTN_U64, i64, "atomic_inc">;
+defm : DSAtomicRetPat_mc<DS_DEC_RTN_U64, i64, "atomic_dec">;
+defm : DSAtomicRetPat_mc<DS_AND_RTN_B64, i64, "atomic_load_and">;
+defm : DSAtomicRetPat_mc<DS_OR_RTN_B64, i64, "atomic_load_or">;
+defm : DSAtomicRetPat_mc<DS_XOR_RTN_B64, i64, "atomic_load_xor">;
+defm : DSAtomicRetPat_mc<DS_MIN_RTN_I64, i64, "atomic_load_min">;
+defm : DSAtomicRetPat_mc<DS_MAX_RTN_I64, i64, "atomic_load_max">;
+defm : DSAtomicRetPat_mc<DS_MIN_RTN_U64, i64, "atomic_load_umin">;
+defm : DSAtomicRetPat_mc<DS_MAX_RTN_U64, i64, "atomic_load_umax">;
+
+defm : DSAtomicCmpXChg_mc<DS_CMPST_RTN_B64, i64, "atomic_cmp_swap">;
+
+def : Pat <
+ (SIds_ordered_count i32:$value, i16:$offset),
+ (DS_ORDERED_COUNT $value, (as_i16imm $offset))
+>;
//===----------------------------------------------------------------------===//
-// Real instructions
+// Target-specific instruction encodings.
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
-// SIInstructions.td
+// Base ENC_DS for GFX6, GFX7, GFX10.
//===----------------------------------------------------------------------===//
-class DS_Real_si <bits<8> op, DS_Pseudo ds> :
- DS_Real <ds>,
- SIMCInstr <ds.Mnemonic, SIEncodingFamily.SI> {
- let AssemblerPredicates=[isSICI];
- let DecoderNamespace="SICI";
+class Base_DS_Real_gfx6_gfx7_gfx10<bits<8> op, DS_Pseudo ps, int ef> :
+ DS_Real<ps>, SIMCInstr <ps.Mnemonic, ef> {
- // encoding
- let Inst{7-0} = !if(ds.has_offset0, offset0, 0);
- let Inst{15-8} = !if(ds.has_offset1, offset1, 0);
- let Inst{17} = !if(ds.has_gds, gds, ds.gdsValue);
+ let Inst{7-0} = !if(ps.has_offset0, offset0, 0);
+ let Inst{15-8} = !if(ps.has_offset1, offset1, 0);
+ let Inst{17} = !if(ps.has_gds, gds, ps.gdsValue);
let Inst{25-18} = op;
- let Inst{31-26} = 0x36; // ds prefix
- let Inst{39-32} = !if(ds.has_addr, addr, 0);
- let Inst{47-40} = !if(ds.has_data0, data0, 0);
- let Inst{55-48} = !if(ds.has_data1, data1, 0);
- let Inst{63-56} = !if(ds.has_vdst, vdst, 0);
+ let Inst{31-26} = 0x36;
+ let Inst{39-32} = !if(ps.has_addr, addr, !if(ps.has_gws_data0, data0, 0));
+ let Inst{47-40} = !if(ps.has_data0, data0, 0);
+ let Inst{55-48} = !if(ps.has_data1, data1, 0);
+ let Inst{63-56} = !if(ps.has_vdst, vdst, 0);
}
-def DS_ADD_U32_si : DS_Real_si<0x0, DS_ADD_U32>;
-def DS_SUB_U32_si : DS_Real_si<0x1, DS_SUB_U32>;
-def DS_RSUB_U32_si : DS_Real_si<0x2, DS_RSUB_U32>;
-def DS_INC_U32_si : DS_Real_si<0x3, DS_INC_U32>;
-def DS_DEC_U32_si : DS_Real_si<0x4, DS_DEC_U32>;
-def DS_MIN_I32_si : DS_Real_si<0x5, DS_MIN_I32>;
-def DS_MAX_I32_si : DS_Real_si<0x6, DS_MAX_I32>;
-def DS_MIN_U32_si : DS_Real_si<0x7, DS_MIN_U32>;
-def DS_MAX_U32_si : DS_Real_si<0x8, DS_MAX_U32>;
-def DS_AND_B32_si : DS_Real_si<0x9, DS_AND_B32>;
-def DS_OR_B32_si : DS_Real_si<0xa, DS_OR_B32>;
-def DS_XOR_B32_si : DS_Real_si<0xb, DS_XOR_B32>;
-def DS_MSKOR_B32_si : DS_Real_si<0xc, DS_MSKOR_B32>;
-def DS_WRITE_B32_si : DS_Real_si<0xd, DS_WRITE_B32>;
-def DS_WRITE2_B32_si : DS_Real_si<0xe, DS_WRITE2_B32>;
-def DS_WRITE2ST64_B32_si : DS_Real_si<0xf, DS_WRITE2ST64_B32>;
-def DS_CMPST_B32_si : DS_Real_si<0x10, DS_CMPST_B32>;
-def DS_CMPST_F32_si : DS_Real_si<0x11, DS_CMPST_F32>;
-def DS_MIN_F32_si : DS_Real_si<0x12, DS_MIN_F32>;
-def DS_MAX_F32_si : DS_Real_si<0x13, DS_MAX_F32>;
-def DS_NOP_si : DS_Real_si<0x14, DS_NOP>;
-def DS_GWS_INIT_si : DS_Real_si<0x19, DS_GWS_INIT>;
-def DS_GWS_SEMA_V_si : DS_Real_si<0x1a, DS_GWS_SEMA_V>;
-def DS_GWS_SEMA_BR_si : DS_Real_si<0x1b, DS_GWS_SEMA_BR>;
-def DS_GWS_SEMA_P_si : DS_Real_si<0x1c, DS_GWS_SEMA_P>;
-def DS_GWS_BARRIER_si : DS_Real_si<0x1d, DS_GWS_BARRIER>;
-def DS_WRITE_B8_si : DS_Real_si<0x1e, DS_WRITE_B8>;
-def DS_WRITE_B16_si : DS_Real_si<0x1f, DS_WRITE_B16>;
-def DS_ADD_RTN_U32_si : DS_Real_si<0x20, DS_ADD_RTN_U32>;
-def DS_SUB_RTN_U32_si : DS_Real_si<0x21, DS_SUB_RTN_U32>;
-def DS_RSUB_RTN_U32_si : DS_Real_si<0x22, DS_RSUB_RTN_U32>;
-def DS_INC_RTN_U32_si : DS_Real_si<0x23, DS_INC_RTN_U32>;
-def DS_DEC_RTN_U32_si : DS_Real_si<0x24, DS_DEC_RTN_U32>;
-def DS_MIN_RTN_I32_si : DS_Real_si<0x25, DS_MIN_RTN_I32>;
-def DS_MAX_RTN_I32_si : DS_Real_si<0x26, DS_MAX_RTN_I32>;
-def DS_MIN_RTN_U32_si : DS_Real_si<0x27, DS_MIN_RTN_U32>;
-def DS_MAX_RTN_U32_si : DS_Real_si<0x28, DS_MAX_RTN_U32>;
-def DS_AND_RTN_B32_si : DS_Real_si<0x29, DS_AND_RTN_B32>;
-def DS_OR_RTN_B32_si : DS_Real_si<0x2a, DS_OR_RTN_B32>;
-def DS_XOR_RTN_B32_si : DS_Real_si<0x2b, DS_XOR_RTN_B32>;
-def DS_MSKOR_RTN_B32_si : DS_Real_si<0x2c, DS_MSKOR_RTN_B32>;
-def DS_WRXCHG_RTN_B32_si : DS_Real_si<0x2d, DS_WRXCHG_RTN_B32>;
-def DS_WRXCHG2_RTN_B32_si : DS_Real_si<0x2e, DS_WRXCHG2_RTN_B32>;
-def DS_WRXCHG2ST64_RTN_B32_si : DS_Real_si<0x2f, DS_WRXCHG2ST64_RTN_B32>;
-def DS_CMPST_RTN_B32_si : DS_Real_si<0x30, DS_CMPST_RTN_B32>;
-def DS_CMPST_RTN_F32_si : DS_Real_si<0x31, DS_CMPST_RTN_F32>;
-def DS_MIN_RTN_F32_si : DS_Real_si<0x32, DS_MIN_RTN_F32>;
-def DS_MAX_RTN_F32_si : DS_Real_si<0x33, DS_MAX_RTN_F32>;
-
-// These instruction are CI/VI only
-def DS_WRAP_RTN_B32_si : DS_Real_si<0x34, DS_WRAP_RTN_B32>;
-def DS_CONDXCHG32_RTN_B64_si : DS_Real_si<0x7e, DS_CONDXCHG32_RTN_B64>;
-def DS_GWS_SEMA_RELEASE_ALL_si : DS_Real_si<0x18, DS_GWS_SEMA_RELEASE_ALL>;
-
-def DS_SWIZZLE_B32_si : DS_Real_si<0x35, DS_SWIZZLE_B32>;
-def DS_READ_B32_si : DS_Real_si<0x36, DS_READ_B32>;
-def DS_READ2_B32_si : DS_Real_si<0x37, DS_READ2_B32>;
-def DS_READ2ST64_B32_si : DS_Real_si<0x38, DS_READ2ST64_B32>;
-def DS_READ_I8_si : DS_Real_si<0x39, DS_READ_I8>;
-def DS_READ_U8_si : DS_Real_si<0x3a, DS_READ_U8>;
-def DS_READ_I16_si : DS_Real_si<0x3b, DS_READ_I16>;
-def DS_READ_U16_si : DS_Real_si<0x3c, DS_READ_U16>;
-def DS_CONSUME_si : DS_Real_si<0x3d, DS_CONSUME>;
-def DS_APPEND_si : DS_Real_si<0x3e, DS_APPEND>;
-def DS_ORDERED_COUNT_si : DS_Real_si<0x3f, DS_ORDERED_COUNT>;
-def DS_ADD_U64_si : DS_Real_si<0x40, DS_ADD_U64>;
-def DS_SUB_U64_si : DS_Real_si<0x41, DS_SUB_U64>;
-def DS_RSUB_U64_si : DS_Real_si<0x42, DS_RSUB_U64>;
-def DS_INC_U64_si : DS_Real_si<0x43, DS_INC_U64>;
-def DS_DEC_U64_si : DS_Real_si<0x44, DS_DEC_U64>;
-def DS_MIN_I64_si : DS_Real_si<0x45, DS_MIN_I64>;
-def DS_MAX_I64_si : DS_Real_si<0x46, DS_MAX_I64>;
-def DS_MIN_U64_si : DS_Real_si<0x47, DS_MIN_U64>;
-def DS_MAX_U64_si : DS_Real_si<0x48, DS_MAX_U64>;
-def DS_AND_B64_si : DS_Real_si<0x49, DS_AND_B64>;
-def DS_OR_B64_si : DS_Real_si<0x4a, DS_OR_B64>;
-def DS_XOR_B64_si : DS_Real_si<0x4b, DS_XOR_B64>;
-def DS_MSKOR_B64_si : DS_Real_si<0x4c, DS_MSKOR_B64>;
-def DS_WRITE_B64_si : DS_Real_si<0x4d, DS_WRITE_B64>;
-def DS_WRITE2_B64_si : DS_Real_si<0x4E, DS_WRITE2_B64>;
-def DS_WRITE2ST64_B64_si : DS_Real_si<0x4f, DS_WRITE2ST64_B64>;
-def DS_CMPST_B64_si : DS_Real_si<0x50, DS_CMPST_B64>;
-def DS_CMPST_F64_si : DS_Real_si<0x51, DS_CMPST_F64>;
-def DS_MIN_F64_si : DS_Real_si<0x52, DS_MIN_F64>;
-def DS_MAX_F64_si : DS_Real_si<0x53, DS_MAX_F64>;
-
-def DS_ADD_RTN_U64_si : DS_Real_si<0x60, DS_ADD_RTN_U64>;
-def DS_SUB_RTN_U64_si : DS_Real_si<0x61, DS_SUB_RTN_U64>;
-def DS_RSUB_RTN_U64_si : DS_Real_si<0x62, DS_RSUB_RTN_U64>;
-def DS_INC_RTN_U64_si : DS_Real_si<0x63, DS_INC_RTN_U64>;
-def DS_DEC_RTN_U64_si : DS_Real_si<0x64, DS_DEC_RTN_U64>;
-def DS_MIN_RTN_I64_si : DS_Real_si<0x65, DS_MIN_RTN_I64>;
-def DS_MAX_RTN_I64_si : DS_Real_si<0x66, DS_MAX_RTN_I64>;
-def DS_MIN_RTN_U64_si : DS_Real_si<0x67, DS_MIN_RTN_U64>;
-def DS_MAX_RTN_U64_si : DS_Real_si<0x68, DS_MAX_RTN_U64>;
-def DS_AND_RTN_B64_si : DS_Real_si<0x69, DS_AND_RTN_B64>;
-def DS_OR_RTN_B64_si : DS_Real_si<0x6a, DS_OR_RTN_B64>;
-def DS_XOR_RTN_B64_si : DS_Real_si<0x6b, DS_XOR_RTN_B64>;
-def DS_MSKOR_RTN_B64_si : DS_Real_si<0x6c, DS_MSKOR_RTN_B64>;
-def DS_WRXCHG_RTN_B64_si : DS_Real_si<0x6d, DS_WRXCHG_RTN_B64>;
-def DS_WRXCHG2_RTN_B64_si : DS_Real_si<0x6e, DS_WRXCHG2_RTN_B64>;
-def DS_WRXCHG2ST64_RTN_B64_si : DS_Real_si<0x6f, DS_WRXCHG2ST64_RTN_B64>;
-def DS_CMPST_RTN_B64_si : DS_Real_si<0x70, DS_CMPST_RTN_B64>;
-def DS_CMPST_RTN_F64_si : DS_Real_si<0x71, DS_CMPST_RTN_F64>;
-def DS_MIN_RTN_F64_si : DS_Real_si<0x72, DS_MIN_RTN_F64>;
-def DS_MAX_RTN_F64_si : DS_Real_si<0x73, DS_MAX_RTN_F64>;
-
-def DS_READ_B64_si : DS_Real_si<0x76, DS_READ_B64>;
-def DS_READ2_B64_si : DS_Real_si<0x77, DS_READ2_B64>;
-def DS_READ2ST64_B64_si : DS_Real_si<0x78, DS_READ2ST64_B64>;
-
-def DS_ADD_SRC2_U32_si : DS_Real_si<0x80, DS_ADD_SRC2_U32>;
-def DS_SUB_SRC2_U32_si : DS_Real_si<0x81, DS_SUB_SRC2_U32>;
-def DS_RSUB_SRC2_U32_si : DS_Real_si<0x82, DS_RSUB_SRC2_U32>;
-def DS_INC_SRC2_U32_si : DS_Real_si<0x83, DS_INC_SRC2_U32>;
-def DS_DEC_SRC2_U32_si : DS_Real_si<0x84, DS_DEC_SRC2_U32>;
-def DS_MIN_SRC2_I32_si : DS_Real_si<0x85, DS_MIN_SRC2_I32>;
-def DS_MAX_SRC2_I32_si : DS_Real_si<0x86, DS_MAX_SRC2_I32>;
-def DS_MIN_SRC2_U32_si : DS_Real_si<0x87, DS_MIN_SRC2_U32>;
-def DS_MAX_SRC2_U32_si : DS_Real_si<0x88, DS_MAX_SRC2_U32>;
-def DS_AND_SRC2_B32_si : DS_Real_si<0x89, DS_AND_SRC2_B32>;
-def DS_OR_SRC2_B32_si : DS_Real_si<0x8a, DS_OR_SRC2_B32>;
-def DS_XOR_SRC2_B32_si : DS_Real_si<0x8b, DS_XOR_SRC2_B32>;
-def DS_WRITE_SRC2_B32_si : DS_Real_si<0x8d, DS_WRITE_SRC2_B32>;
-
-def DS_MIN_SRC2_F32_si : DS_Real_si<0x92, DS_MIN_SRC2_F32>;
-def DS_MAX_SRC2_F32_si : DS_Real_si<0x93, DS_MAX_SRC2_F32>;
-
-def DS_ADD_SRC2_U64_si : DS_Real_si<0xc0, DS_ADD_SRC2_U64>;
-def DS_SUB_SRC2_U64_si : DS_Real_si<0xc1, DS_SUB_SRC2_U64>;
-def DS_RSUB_SRC2_U64_si : DS_Real_si<0xc2, DS_RSUB_SRC2_U64>;
-def DS_INC_SRC2_U64_si : DS_Real_si<0xc3, DS_INC_SRC2_U64>;
-def DS_DEC_SRC2_U64_si : DS_Real_si<0xc4, DS_DEC_SRC2_U64>;
-def DS_MIN_SRC2_I64_si : DS_Real_si<0xc5, DS_MIN_SRC2_I64>;
-def DS_MAX_SRC2_I64_si : DS_Real_si<0xc6, DS_MAX_SRC2_I64>;
-def DS_MIN_SRC2_U64_si : DS_Real_si<0xc7, DS_MIN_SRC2_U64>;
-def DS_MAX_SRC2_U64_si : DS_Real_si<0xc8, DS_MAX_SRC2_U64>;
-def DS_AND_SRC2_B64_si : DS_Real_si<0xc9, DS_AND_SRC2_B64>;
-def DS_OR_SRC2_B64_si : DS_Real_si<0xca, DS_OR_SRC2_B64>;
-def DS_XOR_SRC2_B64_si : DS_Real_si<0xcb, DS_XOR_SRC2_B64>;
-def DS_WRITE_SRC2_B64_si : DS_Real_si<0xcd, DS_WRITE_SRC2_B64>;
-
-def DS_MIN_SRC2_F64_si : DS_Real_si<0xd2, DS_MIN_SRC2_F64>;
-def DS_MAX_SRC2_F64_si : DS_Real_si<0xd3, DS_MAX_SRC2_F64>;
-def DS_WRITE_B96_si : DS_Real_si<0xde, DS_WRITE_B96>;
-def DS_WRITE_B128_si : DS_Real_si<0xdf, DS_WRITE_B128>;
-def DS_READ_B96_si : DS_Real_si<0xfe, DS_READ_B96>;
-def DS_READ_B128_si : DS_Real_si<0xff, DS_READ_B128>;
+//===----------------------------------------------------------------------===//
+// GFX10.
+//===----------------------------------------------------------------------===//
+
+let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
+ multiclass DS_Real_gfx10<bits<8> op> {
+ def _gfx10 : Base_DS_Real_gfx6_gfx7_gfx10<op, !cast<DS_Pseudo>(NAME),
+ SIEncodingFamily.GFX10>;
+ }
+} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10"
+
+defm DS_ADD_F32 : DS_Real_gfx10<0x015>;
+defm DS_ADD_RTN_F32 : DS_Real_gfx10<0x055>;
+defm DS_ADD_SRC2_F32 : DS_Real_gfx10<0x095>;
+defm DS_WRITE_B8_D16_HI : DS_Real_gfx10<0x0a0>;
+defm DS_WRITE_B16_D16_HI : DS_Real_gfx10<0x0a1>;
+defm DS_READ_U8_D16 : DS_Real_gfx10<0x0a2>;
+defm DS_READ_U8_D16_HI : DS_Real_gfx10<0x0a3>;
+defm DS_READ_I8_D16 : DS_Real_gfx10<0x0a4>;
+defm DS_READ_I8_D16_HI : DS_Real_gfx10<0x0a5>;
+defm DS_READ_U16_D16 : DS_Real_gfx10<0x0a6>;
+defm DS_READ_U16_D16_HI : DS_Real_gfx10<0x0a7>;
+defm DS_WRITE_ADDTID_B32 : DS_Real_gfx10<0x0b0>;
+defm DS_READ_ADDTID_B32 : DS_Real_gfx10<0x0b1>;
+defm DS_PERMUTE_B32 : DS_Real_gfx10<0x0b2>;
+defm DS_BPERMUTE_B32 : DS_Real_gfx10<0x0b3>;
+
+//===----------------------------------------------------------------------===//
+// GFX7, GFX10.
+//===----------------------------------------------------------------------===//
+
+let AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7" in {
+ multiclass DS_Real_gfx7<bits<8> op> {
+ def _gfx7 : Base_DS_Real_gfx6_gfx7_gfx10<op, !cast<DS_Pseudo>(NAME),
+ SIEncodingFamily.SI>;
+ }
+} // End AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7"
+
+multiclass DS_Real_gfx7_gfx10<bits<8> op> :
+ DS_Real_gfx7<op>, DS_Real_gfx10<op>;
+
+// FIXME-GFX7: Add tests when upstreaming this part.
+defm DS_GWS_SEMA_RELEASE_ALL : DS_Real_gfx7_gfx10<0x018>;
+defm DS_WRAP_RTN_B32 : DS_Real_gfx7_gfx10<0x034>;
+defm DS_CONDXCHG32_RTN_B64 : DS_Real_gfx7_gfx10<0x07e>;
+defm DS_WRITE_B96 : DS_Real_gfx7_gfx10<0x0de>;
+defm DS_WRITE_B128 : DS_Real_gfx7_gfx10<0x0df>;
+defm DS_READ_B96 : DS_Real_gfx7_gfx10<0x0fe>;
+defm DS_READ_B128 : DS_Real_gfx7_gfx10<0x0ff>;
+
+//===----------------------------------------------------------------------===//
+// GFX6, GFX7, GFX10.
+//===----------------------------------------------------------------------===//
+
+let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in {
+ multiclass DS_Real_gfx6_gfx7<bits<8> op> {
+ def _gfx6_gfx7 : Base_DS_Real_gfx6_gfx7_gfx10<op, !cast<DS_Pseudo>(NAME),
+ SIEncodingFamily.SI>;
+ }
+} // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7"
+
+multiclass DS_Real_gfx6_gfx7_gfx10<bits<8> op> :
+ DS_Real_gfx6_gfx7<op>, DS_Real_gfx10<op>;
+
+defm DS_ADD_U32 : DS_Real_gfx6_gfx7_gfx10<0x000>;
+defm DS_SUB_U32 : DS_Real_gfx6_gfx7_gfx10<0x001>;
+defm DS_RSUB_U32 : DS_Real_gfx6_gfx7_gfx10<0x002>;
+defm DS_INC_U32 : DS_Real_gfx6_gfx7_gfx10<0x003>;
+defm DS_DEC_U32 : DS_Real_gfx6_gfx7_gfx10<0x004>;
+defm DS_MIN_I32 : DS_Real_gfx6_gfx7_gfx10<0x005>;
+defm DS_MAX_I32 : DS_Real_gfx6_gfx7_gfx10<0x006>;
+defm DS_MIN_U32 : DS_Real_gfx6_gfx7_gfx10<0x007>;
+defm DS_MAX_U32 : DS_Real_gfx6_gfx7_gfx10<0x008>;
+defm DS_AND_B32 : DS_Real_gfx6_gfx7_gfx10<0x009>;
+defm DS_OR_B32 : DS_Real_gfx6_gfx7_gfx10<0x00a>;
+defm DS_XOR_B32 : DS_Real_gfx6_gfx7_gfx10<0x00b>;
+defm DS_MSKOR_B32 : DS_Real_gfx6_gfx7_gfx10<0x00c>;
+defm DS_WRITE_B32 : DS_Real_gfx6_gfx7_gfx10<0x00d>;
+defm DS_WRITE2_B32 : DS_Real_gfx6_gfx7_gfx10<0x00e>;
+defm DS_WRITE2ST64_B32 : DS_Real_gfx6_gfx7_gfx10<0x00f>;
+defm DS_CMPST_B32 : DS_Real_gfx6_gfx7_gfx10<0x010>;
+defm DS_CMPST_F32 : DS_Real_gfx6_gfx7_gfx10<0x011>;
+defm DS_MIN_F32 : DS_Real_gfx6_gfx7_gfx10<0x012>;
+defm DS_MAX_F32 : DS_Real_gfx6_gfx7_gfx10<0x013>;
+defm DS_NOP : DS_Real_gfx6_gfx7_gfx10<0x014>;
+defm DS_GWS_INIT : DS_Real_gfx6_gfx7_gfx10<0x019>;
+defm DS_GWS_SEMA_V : DS_Real_gfx6_gfx7_gfx10<0x01a>;
+defm DS_GWS_SEMA_BR : DS_Real_gfx6_gfx7_gfx10<0x01b>;
+defm DS_GWS_SEMA_P : DS_Real_gfx6_gfx7_gfx10<0x01c>;
+defm DS_GWS_BARRIER : DS_Real_gfx6_gfx7_gfx10<0x01d>;
+defm DS_WRITE_B8 : DS_Real_gfx6_gfx7_gfx10<0x01e>;
+defm DS_WRITE_B16 : DS_Real_gfx6_gfx7_gfx10<0x01f>;
+defm DS_ADD_RTN_U32 : DS_Real_gfx6_gfx7_gfx10<0x020>;
+defm DS_SUB_RTN_U32 : DS_Real_gfx6_gfx7_gfx10<0x021>;
+defm DS_RSUB_RTN_U32 : DS_Real_gfx6_gfx7_gfx10<0x022>;
+defm DS_INC_RTN_U32 : DS_Real_gfx6_gfx7_gfx10<0x023>;
+defm DS_DEC_RTN_U32 : DS_Real_gfx6_gfx7_gfx10<0x024>;
+defm DS_MIN_RTN_I32 : DS_Real_gfx6_gfx7_gfx10<0x025>;
+defm DS_MAX_RTN_I32 : DS_Real_gfx6_gfx7_gfx10<0x026>;
+defm DS_MIN_RTN_U32 : DS_Real_gfx6_gfx7_gfx10<0x027>;
+defm DS_MAX_RTN_U32 : DS_Real_gfx6_gfx7_gfx10<0x028>;
+defm DS_AND_RTN_B32 : DS_Real_gfx6_gfx7_gfx10<0x029>;
+defm DS_OR_RTN_B32 : DS_Real_gfx6_gfx7_gfx10<0x02a>;
+defm DS_XOR_RTN_B32 : DS_Real_gfx6_gfx7_gfx10<0x02b>;
+defm DS_MSKOR_RTN_B32 : DS_Real_gfx6_gfx7_gfx10<0x02c>;
+defm DS_WRXCHG_RTN_B32 : DS_Real_gfx6_gfx7_gfx10<0x02d>;
+defm DS_WRXCHG2_RTN_B32 : DS_Real_gfx6_gfx7_gfx10<0x02e>;
+defm DS_WRXCHG2ST64_RTN_B32 : DS_Real_gfx6_gfx7_gfx10<0x02f>;
+defm DS_CMPST_RTN_B32 : DS_Real_gfx6_gfx7_gfx10<0x030>;
+defm DS_CMPST_RTN_F32 : DS_Real_gfx6_gfx7_gfx10<0x031>;
+defm DS_MIN_RTN_F32 : DS_Real_gfx6_gfx7_gfx10<0x032>;
+defm DS_MAX_RTN_F32 : DS_Real_gfx6_gfx7_gfx10<0x033>;
+defm DS_SWIZZLE_B32 : DS_Real_gfx6_gfx7_gfx10<0x035>;
+defm DS_READ_B32 : DS_Real_gfx6_gfx7_gfx10<0x036>;
+defm DS_READ2_B32 : DS_Real_gfx6_gfx7_gfx10<0x037>;
+defm DS_READ2ST64_B32 : DS_Real_gfx6_gfx7_gfx10<0x038>;
+defm DS_READ_I8 : DS_Real_gfx6_gfx7_gfx10<0x039>;
+defm DS_READ_U8 : DS_Real_gfx6_gfx7_gfx10<0x03a>;
+defm DS_READ_I16 : DS_Real_gfx6_gfx7_gfx10<0x03b>;
+defm DS_READ_U16 : DS_Real_gfx6_gfx7_gfx10<0x03c>;
+defm DS_CONSUME : DS_Real_gfx6_gfx7_gfx10<0x03d>;
+defm DS_APPEND : DS_Real_gfx6_gfx7_gfx10<0x03e>;
+defm DS_ORDERED_COUNT : DS_Real_gfx6_gfx7_gfx10<0x03f>;
+defm DS_ADD_U64 : DS_Real_gfx6_gfx7_gfx10<0x040>;
+defm DS_SUB_U64 : DS_Real_gfx6_gfx7_gfx10<0x041>;
+defm DS_RSUB_U64 : DS_Real_gfx6_gfx7_gfx10<0x042>;
+defm DS_INC_U64 : DS_Real_gfx6_gfx7_gfx10<0x043>;
+defm DS_DEC_U64 : DS_Real_gfx6_gfx7_gfx10<0x044>;
+defm DS_MIN_I64 : DS_Real_gfx6_gfx7_gfx10<0x045>;
+defm DS_MAX_I64 : DS_Real_gfx6_gfx7_gfx10<0x046>;
+defm DS_MIN_U64 : DS_Real_gfx6_gfx7_gfx10<0x047>;
+defm DS_MAX_U64 : DS_Real_gfx6_gfx7_gfx10<0x048>;
+defm DS_AND_B64 : DS_Real_gfx6_gfx7_gfx10<0x049>;
+defm DS_OR_B64 : DS_Real_gfx6_gfx7_gfx10<0x04a>;
+defm DS_XOR_B64 : DS_Real_gfx6_gfx7_gfx10<0x04b>;
+defm DS_MSKOR_B64 : DS_Real_gfx6_gfx7_gfx10<0x04c>;
+defm DS_WRITE_B64 : DS_Real_gfx6_gfx7_gfx10<0x04d>;
+defm DS_WRITE2_B64 : DS_Real_gfx6_gfx7_gfx10<0x04e>;
+defm DS_WRITE2ST64_B64 : DS_Real_gfx6_gfx7_gfx10<0x04f>;
+defm DS_CMPST_B64 : DS_Real_gfx6_gfx7_gfx10<0x050>;
+defm DS_CMPST_F64 : DS_Real_gfx6_gfx7_gfx10<0x051>;
+defm DS_MIN_F64 : DS_Real_gfx6_gfx7_gfx10<0x052>;
+defm DS_MAX_F64 : DS_Real_gfx6_gfx7_gfx10<0x053>;
+defm DS_ADD_RTN_U64 : DS_Real_gfx6_gfx7_gfx10<0x060>;
+defm DS_SUB_RTN_U64 : DS_Real_gfx6_gfx7_gfx10<0x061>;
+defm DS_RSUB_RTN_U64 : DS_Real_gfx6_gfx7_gfx10<0x062>;
+defm DS_INC_RTN_U64 : DS_Real_gfx6_gfx7_gfx10<0x063>;
+defm DS_DEC_RTN_U64 : DS_Real_gfx6_gfx7_gfx10<0x064>;
+defm DS_MIN_RTN_I64 : DS_Real_gfx6_gfx7_gfx10<0x065>;
+defm DS_MAX_RTN_I64 : DS_Real_gfx6_gfx7_gfx10<0x066>;
+defm DS_MIN_RTN_U64 : DS_Real_gfx6_gfx7_gfx10<0x067>;
+defm DS_MAX_RTN_U64 : DS_Real_gfx6_gfx7_gfx10<0x068>;
+defm DS_AND_RTN_B64 : DS_Real_gfx6_gfx7_gfx10<0x069>;
+defm DS_OR_RTN_B64 : DS_Real_gfx6_gfx7_gfx10<0x06a>;
+defm DS_XOR_RTN_B64 : DS_Real_gfx6_gfx7_gfx10<0x06b>;
+defm DS_MSKOR_RTN_B64 : DS_Real_gfx6_gfx7_gfx10<0x06c>;
+defm DS_WRXCHG_RTN_B64 : DS_Real_gfx6_gfx7_gfx10<0x06d>;
+defm DS_WRXCHG2_RTN_B64 : DS_Real_gfx6_gfx7_gfx10<0x06e>;
+defm DS_WRXCHG2ST64_RTN_B64 : DS_Real_gfx6_gfx7_gfx10<0x06f>;
+defm DS_CMPST_RTN_B64 : DS_Real_gfx6_gfx7_gfx10<0x070>;
+defm DS_CMPST_RTN_F64 : DS_Real_gfx6_gfx7_gfx10<0x071>;
+defm DS_MIN_RTN_F64 : DS_Real_gfx6_gfx7_gfx10<0x072>;
+defm DS_MAX_RTN_F64 : DS_Real_gfx6_gfx7_gfx10<0x073>;
+defm DS_READ_B64 : DS_Real_gfx6_gfx7_gfx10<0x076>;
+defm DS_READ2_B64 : DS_Real_gfx6_gfx7_gfx10<0x077>;
+defm DS_READ2ST64_B64 : DS_Real_gfx6_gfx7_gfx10<0x078>;
+defm DS_ADD_SRC2_U32 : DS_Real_gfx6_gfx7_gfx10<0x080>;
+defm DS_SUB_SRC2_U32 : DS_Real_gfx6_gfx7_gfx10<0x081>;
+defm DS_RSUB_SRC2_U32 : DS_Real_gfx6_gfx7_gfx10<0x082>;
+defm DS_INC_SRC2_U32 : DS_Real_gfx6_gfx7_gfx10<0x083>;
+defm DS_DEC_SRC2_U32 : DS_Real_gfx6_gfx7_gfx10<0x084>;
+defm DS_MIN_SRC2_I32 : DS_Real_gfx6_gfx7_gfx10<0x085>;
+defm DS_MAX_SRC2_I32 : DS_Real_gfx6_gfx7_gfx10<0x086>;
+defm DS_MIN_SRC2_U32 : DS_Real_gfx6_gfx7_gfx10<0x087>;
+defm DS_MAX_SRC2_U32 : DS_Real_gfx6_gfx7_gfx10<0x088>;
+defm DS_AND_SRC2_B32 : DS_Real_gfx6_gfx7_gfx10<0x089>;
+defm DS_OR_SRC2_B32 : DS_Real_gfx6_gfx7_gfx10<0x08a>;
+defm DS_XOR_SRC2_B32 : DS_Real_gfx6_gfx7_gfx10<0x08b>;
+defm DS_WRITE_SRC2_B32 : DS_Real_gfx6_gfx7_gfx10<0x08d>;
+defm DS_MIN_SRC2_F32 : DS_Real_gfx6_gfx7_gfx10<0x092>;
+defm DS_MAX_SRC2_F32 : DS_Real_gfx6_gfx7_gfx10<0x093>;
+defm DS_ADD_SRC2_U64 : DS_Real_gfx6_gfx7_gfx10<0x0c0>;
+defm DS_SUB_SRC2_U64 : DS_Real_gfx6_gfx7_gfx10<0x0c1>;
+defm DS_RSUB_SRC2_U64 : DS_Real_gfx6_gfx7_gfx10<0x0c2>;
+defm DS_INC_SRC2_U64 : DS_Real_gfx6_gfx7_gfx10<0x0c3>;
+defm DS_DEC_SRC2_U64 : DS_Real_gfx6_gfx7_gfx10<0x0c4>;
+defm DS_MIN_SRC2_I64 : DS_Real_gfx6_gfx7_gfx10<0x0c5>;
+defm DS_MAX_SRC2_I64 : DS_Real_gfx6_gfx7_gfx10<0x0c6>;
+defm DS_MIN_SRC2_U64 : DS_Real_gfx6_gfx7_gfx10<0x0c7>;
+defm DS_MAX_SRC2_U64 : DS_Real_gfx6_gfx7_gfx10<0x0c8>;
+defm DS_AND_SRC2_B64 : DS_Real_gfx6_gfx7_gfx10<0x0c9>;
+defm DS_OR_SRC2_B64 : DS_Real_gfx6_gfx7_gfx10<0x0ca>;
+defm DS_XOR_SRC2_B64 : DS_Real_gfx6_gfx7_gfx10<0x0cb>;
+defm DS_WRITE_SRC2_B64 : DS_Real_gfx6_gfx7_gfx10<0x0cd>;
+defm DS_MIN_SRC2_F64 : DS_Real_gfx6_gfx7_gfx10<0x0d2>;
+defm DS_MAX_SRC2_F64 : DS_Real_gfx6_gfx7_gfx10<0x0d3>;
//===----------------------------------------------------------------------===//
-// VIInstructions.td
+// GFX8, GFX9 (VI).
//===----------------------------------------------------------------------===//
class DS_Real_vi <bits<8> op, DS_Pseudo ds> :
DS_Real <ds>,
SIMCInstr <ds.Mnemonic, SIEncodingFamily.VI> {
- let AssemblerPredicates = [isVI];
- let DecoderNamespace="VI";
+ let AssemblerPredicates = [isGFX8GFX9];
+ let DecoderNamespace = "GFX8";
// encoding
let Inst{7-0} = !if(ds.has_offset0, offset0, 0);
@@ -1008,7 +1054,7 @@ class DS_Real_vi <bits<8> op, DS_Pseudo ds> :
let Inst{16} = !if(ds.has_gds, gds, ds.gdsValue);
let Inst{24-17} = op;
let Inst{31-26} = 0x36; // ds prefix
- let Inst{39-32} = !if(ds.has_addr, addr, 0);
+ let Inst{39-32} = !if(ds.has_addr, addr, !if(ds.has_gws_data0, data0, 0));
let Inst{47-40} = !if(ds.has_data0, data0, 0);
let Inst{55-48} = !if(ds.has_data1, data1, 0);
let Inst{63-56} = !if(ds.has_vdst, vdst, 0);
diff --git a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index f3de903f21b2..4ec4be9bc485 100644
--- a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -1,9 +1,8 @@
//===- AMDGPUDisassembler.cpp - Disassembler for AMDGPU ISA ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -22,13 +21,14 @@
#include "AMDGPURegisterInfo.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIDefines.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "TargetInfo/AMDGPUTargetInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm-c/Disassembler.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCExpr.h"
@@ -52,8 +52,22 @@ using namespace llvm;
#define DEBUG_TYPE "amdgpu-disassembler"
+#define SGPR_MAX (isGFX10() ? AMDGPU::EncValues::SGPR_MAX_GFX10 \
+ : AMDGPU::EncValues::SGPR_MAX_SI)
+
using DecodeStatus = llvm::MCDisassembler::DecodeStatus;
+AMDGPUDisassembler::AMDGPUDisassembler(const MCSubtargetInfo &STI,
+ MCContext &Ctx,
+ MCInstrInfo const *MCII) :
+ MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()),
+ TargetMaxInstBytes(Ctx.getAsmInfo()->getMaxInstLength(&STI)) {
+
+ // ToDo: AMDGPUDisassembler supports only VI ISA.
+ if (!STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding] && !isGFX10())
+ report_fatal_error("Disassembly not yet supported for subtarget");
+}
+
inline static MCDisassembler::DecodeStatus
addOperand(MCInst &Inst, const MCOperand& Opnd) {
Inst.addOperand(Opnd);
@@ -77,6 +91,8 @@ static DecodeStatus decodeSoppBrTarget(MCInst &Inst, unsigned Imm,
uint64_t Addr, const void *Decoder) {
auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
+ // Our branches take a simm16, but we need two extra bits to account for the
+ // factor of 4.
APInt SignedOffset(18, Imm * 4, true);
int64_t Offset = (SignedOffset.sext(64) + 4 + Addr).getSExtValue();
@@ -85,6 +101,12 @@ static DecodeStatus decodeSoppBrTarget(MCInst &Inst, unsigned Imm,
return addOperand(Inst, MCOperand::createImm(Imm));
}
+static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val,
+ uint64_t Addr, const void *Decoder) {
+ auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
+ return addOperand(Inst, DAsm->decodeBoolReg(Val));
+}
+
#define DECODE_OPERAND(StaticDecoderName, DecoderName) \
static DecodeStatus StaticDecoderName(MCInst &Inst, \
unsigned Imm, \
@@ -98,6 +120,7 @@ static DecodeStatus StaticDecoderName(MCInst &Inst, \
DECODE_OPERAND(Decode##RegClass##RegisterClass, decodeOperand_##RegClass)
DECODE_OPERAND_REG(VGPR_32)
+DECODE_OPERAND_REG(VRegOrLds_32)
DECODE_OPERAND_REG(VS_32)
DECODE_OPERAND_REG(VS_64)
DECODE_OPERAND_REG(VS_128)
@@ -109,12 +132,20 @@ DECODE_OPERAND_REG(VReg_128)
DECODE_OPERAND_REG(SReg_32)
DECODE_OPERAND_REG(SReg_32_XM0_XEXEC)
DECODE_OPERAND_REG(SReg_32_XEXEC_HI)
+DECODE_OPERAND_REG(SRegOrLds_32)
DECODE_OPERAND_REG(SReg_64)
DECODE_OPERAND_REG(SReg_64_XEXEC)
DECODE_OPERAND_REG(SReg_128)
DECODE_OPERAND_REG(SReg_256)
DECODE_OPERAND_REG(SReg_512)
+DECODE_OPERAND_REG(AGPR_32)
+DECODE_OPERAND_REG(AReg_128)
+DECODE_OPERAND_REG(AReg_512)
+DECODE_OPERAND_REG(AReg_1024)
+DECODE_OPERAND_REG(AV_32)
+DECODE_OPERAND_REG(AV_64)
+
static DecodeStatus decodeOperand_VSrc16(MCInst &Inst,
unsigned Imm,
uint64_t Addr,
@@ -131,6 +162,62 @@ static DecodeStatus decodeOperand_VSrcV216(MCInst &Inst,
return addOperand(Inst, DAsm->decodeOperand_VSrcV216(Imm));
}
+static DecodeStatus decodeOperand_VS_16(MCInst &Inst,
+ unsigned Imm,
+ uint64_t Addr,
+ const void *Decoder) {
+ auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
+ return addOperand(Inst, DAsm->decodeOperand_VSrc16(Imm));
+}
+
+static DecodeStatus decodeOperand_VS_32(MCInst &Inst,
+ unsigned Imm,
+ uint64_t Addr,
+ const void *Decoder) {
+ auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
+ return addOperand(Inst, DAsm->decodeOperand_VS_32(Imm));
+}
+
+static DecodeStatus decodeOperand_AReg_128(MCInst &Inst,
+ unsigned Imm,
+ uint64_t Addr,
+ const void *Decoder) {
+ auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
+ return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW128, Imm | 512));
+}
+
+static DecodeStatus decodeOperand_AReg_512(MCInst &Inst,
+ unsigned Imm,
+ uint64_t Addr,
+ const void *Decoder) {
+ auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
+ return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW512, Imm | 512));
+}
+
+static DecodeStatus decodeOperand_AReg_1024(MCInst &Inst,
+ unsigned Imm,
+ uint64_t Addr,
+ const void *Decoder) {
+ auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
+ return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW1024, Imm | 512));
+}
+
+static DecodeStatus decodeOperand_SReg_32(MCInst &Inst,
+ unsigned Imm,
+ uint64_t Addr,
+ const void *Decoder) {
+ auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
+ return addOperand(Inst, DAsm->decodeOperand_SReg_32(Imm));
+}
+
+static DecodeStatus decodeOperand_VGPR_32(MCInst &Inst,
+ unsigned Imm,
+ uint64_t Addr,
+ const void *Decoder) {
+ auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
+ return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW32, Imm));
+}
+
#define DECODE_SDWA(DecName) \
DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName)
@@ -168,6 +255,16 @@ DecodeStatus AMDGPUDisassembler::tryDecodeInst(const uint8_t* Table,
return MCDisassembler::Fail;
}
+static bool isValidDPP8(const MCInst &MI) {
+ using namespace llvm::AMDGPU::DPP;
+ int FiIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::fi);
+ assert(FiIdx != -1);
+ if ((unsigned)FiIdx >= MI.getNumOperands())
+ return false;
+ unsigned Fi = MI.getOperand(FiIdx).getImm();
+ return Fi == DPP8_FI_0 || Fi == DPP8_FI_1;
+}
+
DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
ArrayRef<uint8_t> Bytes_,
uint64_t Address,
@@ -176,11 +273,7 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
CommentStream = &CS;
bool IsSDWA = false;
- // ToDo: AMDGPUDisassembler supports only VI ISA.
- if (!STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding])
- report_fatal_error("Disassembly not yet supported for subtarget");
-
- const unsigned MaxInstBytesNum = (std::min)((size_t)8, Bytes_.size());
+ unsigned MaxInstBytesNum = std::min((size_t)TargetMaxInstBytes, Bytes_.size());
Bytes = Bytes_.slice(0, MaxInstBytesNum);
DecodeStatus Res = MCDisassembler::Fail;
@@ -192,6 +285,13 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
// encodings
if (Bytes.size() >= 8) {
const uint64_t QW = eatBytes<uint64_t>(Bytes);
+
+ Res = tryDecodeInst(DecoderTableDPP864, MI, QW, Address);
+ if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)
+ break;
+
+ MI = MCInst(); // clear
+
Res = tryDecodeInst(DecoderTableDPP64, MI, QW, Address);
if (Res) break;
@@ -201,6 +301,18 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
Res = tryDecodeInst(DecoderTableSDWA964, MI, QW, Address);
if (Res) { IsSDWA = true; break; }
+ Res = tryDecodeInst(DecoderTableSDWA1064, MI, QW, Address);
+ if (Res) { IsSDWA = true; break; }
+
+ // Some GFX9 subtargets repurposed the v_mad_mix_f32, v_mad_mixlo_f16 and
+ // v_mad_mixhi_f16 for FMA variants. Try to decode using this special
+ // table first so we print the correct name.
+
+ if (STI.getFeatureBits()[AMDGPU::FeatureFmaMixInsts]) {
+ Res = tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address);
+ if (Res) break;
+ }
+
if (STI.getFeatureBits()[AMDGPU::FeatureUnpackedD16VMem]) {
Res = tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address);
if (Res)
@@ -223,7 +335,7 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
// Try decode 32-bit instruction
if (Bytes.size() < 4) break;
const uint32_t DW = eatBytes<uint32_t>(Bytes);
- Res = tryDecodeInst(DecoderTableVI32, MI, DW, Address);
+ Res = tryDecodeInst(DecoderTableGFX832, MI, DW, Address);
if (Res) break;
Res = tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address);
@@ -232,33 +344,84 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
Res = tryDecodeInst(DecoderTableGFX932, MI, DW, Address);
if (Res) break;
+ Res = tryDecodeInst(DecoderTableGFX1032, MI, DW, Address);
+ if (Res) break;
+
if (Bytes.size() < 4) break;
const uint64_t QW = ((uint64_t)eatBytes<uint32_t>(Bytes) << 32) | DW;
- Res = tryDecodeInst(DecoderTableVI64, MI, QW, Address);
+ Res = tryDecodeInst(DecoderTableGFX864, MI, QW, Address);
if (Res) break;
Res = tryDecodeInst(DecoderTableAMDGPU64, MI, QW, Address);
if (Res) break;
Res = tryDecodeInst(DecoderTableGFX964, MI, QW, Address);
+ if (Res) break;
+
+ Res = tryDecodeInst(DecoderTableGFX1064, MI, QW, Address);
} while (false);
+ if (Res && (MaxInstBytesNum - Bytes.size()) == 12 && (!HasLiteral ||
+ !(MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3))) {
+ MaxInstBytesNum = 8;
+ Bytes = Bytes_.slice(0, MaxInstBytesNum);
+ eatBytes<uint64_t>(Bytes);
+ }
+
if (Res && (MI.getOpcode() == AMDGPU::V_MAC_F32_e64_vi ||
- MI.getOpcode() == AMDGPU::V_MAC_F32_e64_si ||
+ MI.getOpcode() == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
+ MI.getOpcode() == AMDGPU::V_MAC_F32_e64_gfx10 ||
MI.getOpcode() == AMDGPU::V_MAC_F16_e64_vi ||
- MI.getOpcode() == AMDGPU::V_FMAC_F32_e64_vi)) {
+ MI.getOpcode() == AMDGPU::V_FMAC_F32_e64_vi ||
+ MI.getOpcode() == AMDGPU::V_FMAC_F32_e64_gfx10 ||
+ MI.getOpcode() == AMDGPU::V_FMAC_F16_e64_gfx10)) {
// Insert dummy unused src2_modifiers.
insertNamedMCOperand(MI, MCOperand::createImm(0),
AMDGPU::OpName::src2_modifiers);
}
if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::MIMG)) {
- Res = convertMIMGInst(MI);
+ int VAddr0Idx =
+ AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
+ int RsrcIdx =
+ AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc);
+ unsigned NSAArgs = RsrcIdx - VAddr0Idx - 1;
+ if (VAddr0Idx >= 0 && NSAArgs > 0) {
+ unsigned NSAWords = (NSAArgs + 3) / 4;
+ if (Bytes.size() < 4 * NSAWords) {
+ Res = MCDisassembler::Fail;
+ } else {
+ for (unsigned i = 0; i < NSAArgs; ++i) {
+ MI.insert(MI.begin() + VAddr0Idx + 1 + i,
+ decodeOperand_VGPR_32(Bytes[i]));
+ }
+ Bytes = Bytes.slice(4 * NSAWords);
+ }
+ }
+
+ if (Res)
+ Res = convertMIMGInst(MI);
}
if (Res && IsSDWA)
Res = convertSDWAInst(MI);
+ int VDstIn_Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
+ AMDGPU::OpName::vdst_in);
+ if (VDstIn_Idx != -1) {
+ int Tied = MCII->get(MI.getOpcode()).getOperandConstraint(VDstIn_Idx,
+ MCOI::OperandConstraint::TIED_TO);
+ if (Tied != -1 && (MI.getNumOperands() <= (unsigned)VDstIn_Idx ||
+ !MI.getOperand(VDstIn_Idx).isReg() ||
+ MI.getOperand(VDstIn_Idx).getReg() != MI.getOperand(Tied).getReg())) {
+ if (MI.getNumOperands() > (unsigned)VDstIn_Idx)
+ MI.erase(&MI.getOperand(VDstIn_Idx));
+ insertNamedMCOperand(MI,
+ MCOperand::createReg(MI.getOperand(Tied).getReg()),
+ AMDGPU::OpName::vdst_in);
+ }
+ }
+
// if the opcode was not recognized we'll assume a Size of 4 bytes
// (unless there are fewer bytes left)
Size = Res ? (MaxInstBytesNum - Bytes.size())
@@ -267,7 +430,8 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
DecodeStatus AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const {
- if (STI.getFeatureBits()[AMDGPU::FeatureGFX9]) {
+ if (STI.getFeatureBits()[AMDGPU::FeatureGFX9] ||
+ STI.getFeatureBits()[AMDGPU::FeatureGFX10]) {
if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst) != -1)
// VOPC - insert clamp
insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::clamp);
@@ -285,9 +449,27 @@ DecodeStatus AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const {
return MCDisassembler::Success;
}
-// Note that MIMG format provides no information about VADDR size.
-// Consequently, decoded instructions always show address
-// as if it has 1 dword, which could be not really so.
+DecodeStatus AMDGPUDisassembler::convertDPP8Inst(MCInst &MI) const {
+ unsigned Opc = MI.getOpcode();
+ unsigned DescNumOps = MCII->get(Opc).getNumOperands();
+
+ // Insert dummy unused src modifiers.
+ if (MI.getNumOperands() < DescNumOps &&
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1)
+ insertNamedMCOperand(MI, MCOperand::createImm(0),
+ AMDGPU::OpName::src0_modifiers);
+
+ if (MI.getNumOperands() < DescNumOps &&
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1_modifiers) != -1)
+ insertNamedMCOperand(MI, MCOperand::createImm(0),
+ AMDGPU::OpName::src1_modifiers);
+
+ return isValidDPP8(MI) ? MCDisassembler::Success : MCDisassembler::SoftFail;
+}
+
+// Note that before gfx10, the MIMG encoding provided no information about
+// VADDR size. Consequently, decoded instructions always show address as if it
+// has 1 dword, which could be not really so.
DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
@@ -295,7 +477,8 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
int VDataIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
AMDGPU::OpName::vdata);
-
+ int VAddr0Idx =
+ AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
AMDGPU::OpName::dmask);
@@ -308,16 +491,42 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
assert(DMaskIdx != -1);
assert(TFEIdx != -1);
+ const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
bool IsAtomic = (VDstIdx != -1);
bool IsGather4 = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::Gather4;
- unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf;
- if (DMask == 0)
- return MCDisassembler::Success;
+ bool IsNSA = false;
+ unsigned AddrSize = Info->VAddrDwords;
+
+ if (STI.getFeatureBits()[AMDGPU::FeatureGFX10]) {
+ unsigned DimIdx =
+ AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dim);
+ const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
+ AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
+ const AMDGPU::MIMGDimInfo *Dim =
+ AMDGPU::getMIMGDimInfoByEncoding(MI.getOperand(DimIdx).getImm());
+
+ AddrSize = BaseOpcode->NumExtraArgs +
+ (BaseOpcode->Gradients ? Dim->NumGradients : 0) +
+ (BaseOpcode->Coordinates ? Dim->NumCoords : 0) +
+ (BaseOpcode->LodOrClampOrMip ? 1 : 0);
+ IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA;
+ if (!IsNSA) {
+ if (AddrSize > 8)
+ AddrSize = 16;
+ else if (AddrSize > 4)
+ AddrSize = 8;
+ } else {
+ if (AddrSize > Info->VAddrDwords) {
+ // The NSA encoding does not contain enough operands for the combination
+ // of base opcode / dimension. Should this be an error?
+ return MCDisassembler::Success;
+ }
+ }
+ }
- unsigned DstSize = IsGather4 ? 4 : countPopulation(DMask);
- if (DstSize == 1)
- return MCDisassembler::Success;
+ unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf;
+ unsigned DstSize = IsGather4 ? 4 : std::max(countPopulation(DMask), 1u);
bool D16 = D16Idx >= 0 && MI.getOperand(D16Idx).getImm();
if (D16 && AMDGPU::hasPackedD16(STI)) {
@@ -328,44 +537,64 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
if (MI.getOperand(TFEIdx).getImm())
return MCDisassembler::Success;
- int NewOpcode = -1;
+ if (DstSize == Info->VDataDwords && AddrSize == Info->VAddrDwords)
+ return MCDisassembler::Success;
+
+ int NewOpcode =
+ AMDGPU::getMIMGOpcode(Info->BaseOpcode, Info->MIMGEncoding, DstSize, AddrSize);
+ if (NewOpcode == -1)
+ return MCDisassembler::Success;
- if (IsGather4) {
- if (D16 && AMDGPU::hasPackedD16(STI))
- NewOpcode = AMDGPU::getMaskedMIMGOp(MI.getOpcode(), 2);
- else
+ // Widen the register to the correct number of enabled channels.
+ unsigned NewVdata = AMDGPU::NoRegister;
+ if (DstSize != Info->VDataDwords) {
+ auto DataRCID = MCII->get(NewOpcode).OpInfo[VDataIdx].RegClass;
+
+ // Get first subregister of VData
+ unsigned Vdata0 = MI.getOperand(VDataIdx).getReg();
+ unsigned VdataSub0 = MRI.getSubReg(Vdata0, AMDGPU::sub0);
+ Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;
+
+ NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0,
+ &MRI.getRegClass(DataRCID));
+ if (NewVdata == AMDGPU::NoRegister) {
+ // It's possible to encode this such that the low register + enabled
+ // components exceeds the register count.
return MCDisassembler::Success;
- } else {
- NewOpcode = AMDGPU::getMaskedMIMGOp(MI.getOpcode(), DstSize);
- if (NewOpcode == -1)
+ }
+ }
+
+ unsigned NewVAddr0 = AMDGPU::NoRegister;
+ if (STI.getFeatureBits()[AMDGPU::FeatureGFX10] && !IsNSA &&
+ AddrSize != Info->VAddrDwords) {
+ unsigned VAddr0 = MI.getOperand(VAddr0Idx).getReg();
+ unsigned VAddrSub0 = MRI.getSubReg(VAddr0, AMDGPU::sub0);
+ VAddr0 = (VAddrSub0 != 0) ? VAddrSub0 : VAddr0;
+
+ auto AddrRCID = MCII->get(NewOpcode).OpInfo[VAddr0Idx].RegClass;
+ NewVAddr0 = MRI.getMatchingSuperReg(VAddr0, AMDGPU::sub0,
+ &MRI.getRegClass(AddrRCID));
+ if (NewVAddr0 == AMDGPU::NoRegister)
return MCDisassembler::Success;
}
- auto RCID = MCII->get(NewOpcode).OpInfo[VDataIdx].RegClass;
+ MI.setOpcode(NewOpcode);
- // Get first subregister of VData
- unsigned Vdata0 = MI.getOperand(VDataIdx).getReg();
- unsigned VdataSub0 = MRI.getSubReg(Vdata0, AMDGPU::sub0);
- Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;
+ if (NewVdata != AMDGPU::NoRegister) {
+ MI.getOperand(VDataIdx) = MCOperand::createReg(NewVdata);
- // Widen the register to the correct number of enabled channels.
- auto NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0,
- &MRI.getRegClass(RCID));
- if (NewVdata == AMDGPU::NoRegister) {
- // It's possible to encode this such that the low register + enabled
- // components exceeds the register count.
- return MCDisassembler::Success;
+ if (IsAtomic) {
+ // Atomic operations have an additional operand (a copy of data)
+ MI.getOperand(VDstIdx) = MCOperand::createReg(NewVdata);
+ }
}
- MI.setOpcode(NewOpcode);
- // vaddr will be always appear as a single VGPR. This will look different than
- // how it is usually emitted because the number of register components is not
- // in the instruction encoding.
- MI.getOperand(VDataIdx) = MCOperand::createReg(NewVdata);
-
- if (IsAtomic) {
- // Atomic operations have an additional operand (a copy of data)
- MI.getOperand(VDstIdx) = MCOperand::createReg(NewVdata);
+ if (NewVAddr0 != AMDGPU::NoRegister) {
+ MI.getOperand(VAddr0Idx) = MCOperand::createReg(NewVAddr0);
+ } else if (IsNSA) {
+ assert(AddrSize <= Info->VAddrDwords);
+ MI.erase(MI.begin() + VAddr0Idx + AddrSize,
+ MI.begin() + VAddr0Idx + Info->VAddrDwords);
}
return MCDisassembler::Success;
@@ -470,6 +699,34 @@ MCOperand AMDGPUDisassembler::decodeOperand_VGPR_32(unsigned Val) const {
return createRegOperand(AMDGPU::VGPR_32RegClassID, Val);
}
+MCOperand AMDGPUDisassembler::decodeOperand_VRegOrLds_32(unsigned Val) const {
+ return decodeSrcOp(OPW32, Val);
+}
+
+MCOperand AMDGPUDisassembler::decodeOperand_AGPR_32(unsigned Val) const {
+ return createRegOperand(AMDGPU::AGPR_32RegClassID, Val & 255);
+}
+
+MCOperand AMDGPUDisassembler::decodeOperand_AReg_128(unsigned Val) const {
+ return createRegOperand(AMDGPU::AReg_128RegClassID, Val & 255);
+}
+
+MCOperand AMDGPUDisassembler::decodeOperand_AReg_512(unsigned Val) const {
+ return createRegOperand(AMDGPU::AReg_512RegClassID, Val & 255);
+}
+
+MCOperand AMDGPUDisassembler::decodeOperand_AReg_1024(unsigned Val) const {
+ return createRegOperand(AMDGPU::AReg_1024RegClassID, Val & 255);
+}
+
+MCOperand AMDGPUDisassembler::decodeOperand_AV_32(unsigned Val) const {
+ return decodeSrcOp(OPW32, Val);
+}
+
+MCOperand AMDGPUDisassembler::decodeOperand_AV_64(unsigned Val) const {
+ return decodeSrcOp(OPW64, Val);
+}
+
MCOperand AMDGPUDisassembler::decodeOperand_VReg_64(unsigned Val) const {
return createRegOperand(AMDGPU::VReg_64RegClassID, Val);
}
@@ -482,6 +739,14 @@ MCOperand AMDGPUDisassembler::decodeOperand_VReg_128(unsigned Val) const {
return createRegOperand(AMDGPU::VReg_128RegClassID, Val);
}
+MCOperand AMDGPUDisassembler::decodeOperand_VReg_256(unsigned Val) const {
+ return createRegOperand(AMDGPU::VReg_256RegClassID, Val);
+}
+
+MCOperand AMDGPUDisassembler::decodeOperand_VReg_512(unsigned Val) const {
+ return createRegOperand(AMDGPU::VReg_512RegClassID, Val);
+}
+
MCOperand AMDGPUDisassembler::decodeOperand_SReg_32(unsigned Val) const {
// table-gen generated disassembler doesn't care about operand types
// leaving only registry class so SSrc_32 operand turns into SReg_32
@@ -501,6 +766,13 @@ MCOperand AMDGPUDisassembler::decodeOperand_SReg_32_XEXEC_HI(
return decodeOperand_SReg_32(Val);
}
+MCOperand AMDGPUDisassembler::decodeOperand_SRegOrLds_32(unsigned Val) const {
+ // table-gen generated disassembler doesn't care about operand types
+ // leaving only registry class so SSrc_32 operand turns into SReg_32
+ // and therefore we accept immediates and literals here as well
+ return decodeSrcOp(OPW32, Val);
+}
+
MCOperand AMDGPUDisassembler::decodeOperand_SReg_64(unsigned Val) const {
return decodeSrcOp(OPW64, Val);
}
@@ -628,6 +900,9 @@ MCOperand AMDGPUDisassembler::decodeFPImmed(OpWidthTy Width, unsigned Imm) {
// ToDo: case 248: 1/(2*PI) - is allowed only on VI
switch (Width) {
case OPW32:
+ case OPW128: // splat constants
+ case OPW512:
+ case OPW1024:
return MCOperand::createImm(getInlineImmVal32(Imm));
case OPW64:
return MCOperand::createImm(getInlineImmVal64(Imm));
@@ -654,6 +929,24 @@ unsigned AMDGPUDisassembler::getVgprClassId(const OpWidthTy Width) const {
}
}
+unsigned AMDGPUDisassembler::getAgprClassId(const OpWidthTy Width) const {
+ using namespace AMDGPU;
+
+ assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
+ switch (Width) {
+ default: // fall
+ case OPW32:
+ case OPW16:
+ case OPWV216:
+ return AGPR_32RegClassID;
+ case OPW64: return AReg_64RegClassID;
+ case OPW128: return AReg_128RegClassID;
+ case OPW512: return AReg_512RegClassID;
+ case OPW1024: return AReg_1024RegClassID;
+ }
+}
+
+
unsigned AMDGPUDisassembler::getSgprClassId(const OpWidthTy Width) const {
using namespace AMDGPU;
@@ -691,8 +984,10 @@ unsigned AMDGPUDisassembler::getTtmpClassId(const OpWidthTy Width) const {
int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const {
using namespace AMDGPU::EncValues;
- unsigned TTmpMin = isGFX9() ? TTMP_GFX9_MIN : TTMP_VI_MIN;
- unsigned TTmpMax = isGFX9() ? TTMP_GFX9_MAX : TTMP_VI_MAX;
+ unsigned TTmpMin =
+ (isGFX9() || isGFX10()) ? TTMP_GFX9_GFX10_MIN : TTMP_VI_MIN;
+ unsigned TTmpMax =
+ (isGFX9() || isGFX10()) ? TTMP_GFX9_GFX10_MAX : TTMP_VI_MAX;
return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
}
@@ -700,10 +995,14 @@ int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const {
MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val) const {
using namespace AMDGPU::EncValues;
- assert(Val < 512); // enum9
+ assert(Val < 1024); // enum10
+
+ bool IsAGPR = Val & 512;
+ Val &= 511;
if (VGPR_MIN <= Val && Val <= VGPR_MAX) {
- return createRegOperand(getVgprClassId(Width), Val - VGPR_MIN);
+ return createRegOperand(IsAGPR ? getAgprClassId(Width)
+ : getVgprClassId(Width), Val - VGPR_MIN);
}
if (Val <= SGPR_MAX) {
assert(SGPR_MIN == 0); // "SGPR_MIN <= Val" is always true and causes compilation warning.
@@ -765,23 +1064,23 @@ MCOperand AMDGPUDisassembler::decodeSpecialReg32(unsigned Val) const {
case 105: return createRegOperand(XNACK_MASK_HI);
case 106: return createRegOperand(VCC_LO);
case 107: return createRegOperand(VCC_HI);
- case 108: assert(!isGFX9()); return createRegOperand(TBA_LO);
- case 109: assert(!isGFX9()); return createRegOperand(TBA_HI);
- case 110: assert(!isGFX9()); return createRegOperand(TMA_LO);
- case 111: assert(!isGFX9()); return createRegOperand(TMA_HI);
+ case 108: return createRegOperand(TBA_LO);
+ case 109: return createRegOperand(TBA_HI);
+ case 110: return createRegOperand(TMA_LO);
+ case 111: return createRegOperand(TMA_HI);
case 124: return createRegOperand(M0);
+ case 125: return createRegOperand(SGPR_NULL);
case 126: return createRegOperand(EXEC_LO);
case 127: return createRegOperand(EXEC_HI);
case 235: return createRegOperand(SRC_SHARED_BASE);
case 236: return createRegOperand(SRC_SHARED_LIMIT);
case 237: return createRegOperand(SRC_PRIVATE_BASE);
case 238: return createRegOperand(SRC_PRIVATE_LIMIT);
- // TODO: SRC_POPS_EXITING_WAVE_ID
- // ToDo: no support for vccz register
- case 251: break;
- // ToDo: no support for execz register
- case 252: break;
- case 253: return createRegOperand(SCC);
+ case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
+ case 251: return createRegOperand(SRC_VCCZ);
+ case 252: return createRegOperand(SRC_EXECZ);
+ case 253: return createRegOperand(SRC_SCC);
+ case 254: return createRegOperand(LDS_DIRECT);
default: break;
}
return errOperand(Val, "unknown operand encoding " + Twine(Val));
@@ -794,9 +1093,17 @@ MCOperand AMDGPUDisassembler::decodeSpecialReg64(unsigned Val) const {
case 102: return createRegOperand(FLAT_SCR);
case 104: return createRegOperand(XNACK_MASK);
case 106: return createRegOperand(VCC);
- case 108: assert(!isGFX9()); return createRegOperand(TBA);
- case 110: assert(!isGFX9()); return createRegOperand(TMA);
+ case 108: return createRegOperand(TBA);
+ case 110: return createRegOperand(TMA);
case 126: return createRegOperand(EXEC);
+ case 235: return createRegOperand(SRC_SHARED_BASE);
+ case 236: return createRegOperand(SRC_SHARED_LIMIT);
+ case 237: return createRegOperand(SRC_PRIVATE_BASE);
+ case 238: return createRegOperand(SRC_PRIVATE_LIMIT);
+ case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
+ case 251: return createRegOperand(SRC_VCCZ);
+ case 252: return createRegOperand(SRC_EXECZ);
+ case 253: return createRegOperand(SRC_SCC);
default: break;
}
return errOperand(Val, "unknown operand encoding " + Twine(Val));
@@ -807,16 +1114,18 @@ MCOperand AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width,
using namespace AMDGPU::SDWA;
using namespace AMDGPU::EncValues;
- if (STI.getFeatureBits()[AMDGPU::FeatureGFX9]) {
- // XXX: static_cast<int> is needed to avoid stupid warning:
+ if (STI.getFeatureBits()[AMDGPU::FeatureGFX9] ||
+ STI.getFeatureBits()[AMDGPU::FeatureGFX10]) {
+ // XXX: cast to int is needed to avoid stupid warning:
// compare with unsigned is always true
- if (SDWA9EncValues::SRC_VGPR_MIN <= static_cast<int>(Val) &&
+ if (int(SDWA9EncValues::SRC_VGPR_MIN) <= int(Val) &&
Val <= SDWA9EncValues::SRC_VGPR_MAX) {
return createRegOperand(getVgprClassId(Width),
Val - SDWA9EncValues::SRC_VGPR_MIN);
}
if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
- Val <= SDWA9EncValues::SRC_SGPR_MAX) {
+ Val <= (isGFX10() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10
+ : SDWA9EncValues::SRC_SGPR_MAX_SI)) {
return createSRegOperand(getSgprClassId(Width),
Val - SDWA9EncValues::SRC_SGPR_MIN);
}
@@ -852,24 +1161,34 @@ MCOperand AMDGPUDisassembler::decodeSDWASrc32(unsigned Val) const {
MCOperand AMDGPUDisassembler::decodeSDWAVopcDst(unsigned Val) const {
using namespace AMDGPU::SDWA;
- assert(STI.getFeatureBits()[AMDGPU::FeatureGFX9] &&
- "SDWAVopcDst should be present only on GFX9");
+ assert((STI.getFeatureBits()[AMDGPU::FeatureGFX9] ||
+ STI.getFeatureBits()[AMDGPU::FeatureGFX10]) &&
+ "SDWAVopcDst should be present only on GFX9+");
+
+ bool IsWave64 = STI.getFeatureBits()[AMDGPU::FeatureWavefrontSize64];
+
if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
int TTmpIdx = getTTmpIdx(Val);
if (TTmpIdx >= 0) {
return createSRegOperand(getTtmpClassId(OPW64), TTmpIdx);
- } else if (Val > AMDGPU::EncValues::SGPR_MAX) {
- return decodeSpecialReg64(Val);
+ } else if (Val > SGPR_MAX) {
+ return IsWave64 ? decodeSpecialReg64(Val)
+ : decodeSpecialReg32(Val);
} else {
- return createSRegOperand(getSgprClassId(OPW64), Val);
+ return createSRegOperand(getSgprClassId(IsWave64 ? OPW64 : OPW32), Val);
}
} else {
- return createRegOperand(AMDGPU::VCC);
+ return createRegOperand(IsWave64 ? AMDGPU::VCC : AMDGPU::VCC_LO);
}
}
+MCOperand AMDGPUDisassembler::decodeBoolReg(unsigned Val) const {
+ return STI.getFeatureBits()[AMDGPU::FeatureWavefrontSize64] ?
+ decodeOperand_SReg_64(Val) : decodeOperand_SReg_32(Val);
+}
+
bool AMDGPUDisassembler::isVI() const {
return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands];
}
@@ -878,6 +1197,10 @@ bool AMDGPUDisassembler::isGFX9() const {
return STI.getFeatureBits()[AMDGPU::FeatureGFX9];
}
+bool AMDGPUDisassembler::isGFX10() const {
+ return STI.getFeatureBits()[AMDGPU::FeatureGFX10];
+}
+
//===----------------------------------------------------------------------===//
// AMDGPUSymbolizer
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
index 75cfc5e11282..c5eaba615c2a 100644
--- a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
+++ b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
@@ -1,9 +1,8 @@
//===- AMDGPUDisassembler.hpp - Disassembler for AMDGPU ISA -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -42,15 +41,14 @@ class AMDGPUDisassembler : public MCDisassembler {
private:
std::unique_ptr<MCInstrInfo const> const MCII;
const MCRegisterInfo &MRI;
+ const unsigned TargetMaxInstBytes;
mutable ArrayRef<uint8_t> Bytes;
mutable uint32_t Literal;
mutable bool HasLiteral;
public:
AMDGPUDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
- MCInstrInfo const *MCII) :
- MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()) {}
-
+ MCInstrInfo const *MCII);
~AMDGPUDisassembler() override = default;
DecodeStatus getInstruction(MCInst &MI, uint64_t &Size,
@@ -69,9 +67,12 @@ public:
uint64_t Address) const;
DecodeStatus convertSDWAInst(MCInst &MI) const;
+ DecodeStatus convertDPP8Inst(MCInst &MI) const;
DecodeStatus convertMIMGInst(MCInst &MI) const;
MCOperand decodeOperand_VGPR_32(unsigned Val) const;
+ MCOperand decodeOperand_VRegOrLds_32(unsigned Val) const;
+
MCOperand decodeOperand_VS_32(unsigned Val) const;
MCOperand decodeOperand_VS_64(unsigned Val) const;
MCOperand decodeOperand_VS_128(unsigned Val) const;
@@ -81,22 +82,33 @@ public:
MCOperand decodeOperand_VReg_64(unsigned Val) const;
MCOperand decodeOperand_VReg_96(unsigned Val) const;
MCOperand decodeOperand_VReg_128(unsigned Val) const;
+ MCOperand decodeOperand_VReg_256(unsigned Val) const;
+ MCOperand decodeOperand_VReg_512(unsigned Val) const;
MCOperand decodeOperand_SReg_32(unsigned Val) const;
MCOperand decodeOperand_SReg_32_XM0_XEXEC(unsigned Val) const;
MCOperand decodeOperand_SReg_32_XEXEC_HI(unsigned Val) const;
+ MCOperand decodeOperand_SRegOrLds_32(unsigned Val) const;
MCOperand decodeOperand_SReg_64(unsigned Val) const;
MCOperand decodeOperand_SReg_64_XEXEC(unsigned Val) const;
MCOperand decodeOperand_SReg_128(unsigned Val) const;
MCOperand decodeOperand_SReg_256(unsigned Val) const;
MCOperand decodeOperand_SReg_512(unsigned Val) const;
+ MCOperand decodeOperand_AGPR_32(unsigned Val) const;
+ MCOperand decodeOperand_AReg_128(unsigned Val) const;
+ MCOperand decodeOperand_AReg_512(unsigned Val) const;
+ MCOperand decodeOperand_AReg_1024(unsigned Val) const;
+ MCOperand decodeOperand_AV_32(unsigned Val) const;
+ MCOperand decodeOperand_AV_64(unsigned Val) const;
+
enum OpWidthTy {
OPW32,
OPW64,
OPW128,
OPW256,
OPW512,
+ OPW1024,
OPW16,
OPWV216,
OPW_LAST_,
@@ -104,6 +116,7 @@ public:
};
unsigned getVgprClassId(const OpWidthTy Width) const;
+ unsigned getAgprClassId(const OpWidthTy Width) const;
unsigned getSgprClassId(const OpWidthTy Width) const;
unsigned getTtmpClassId(const OpWidthTy Width) const;
@@ -121,11 +134,14 @@ public:
MCOperand decodeSDWASrc32(unsigned Val) const;
MCOperand decodeSDWAVopcDst(unsigned Val) const;
+ MCOperand decodeBoolReg(unsigned Val) const;
+
int getTTmpIdx(unsigned Val) const;
bool isVI() const;
bool isGFX9() const;
- };
+ bool isGFX10() const;
+};
//===----------------------------------------------------------------------===//
// AMDGPUSymbolizer
diff --git a/lib/Target/AMDGPU/EvergreenInstructions.td b/lib/Target/AMDGPU/EvergreenInstructions.td
index 944f4ffe598d..0550092ce1d6 100644
--- a/lib/Target/AMDGPU/EvergreenInstructions.td
+++ b/lib/Target/AMDGPU/EvergreenInstructions.td
@@ -1,9 +1,8 @@
//===-- EvergreenInstructions.td - EG Instruction defs ----*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AMDGPU/FLATInstructions.td b/lib/Target/AMDGPU/FLATInstructions.td
index 44040d352e6a..889f60dae920 100644
--- a/lib/Target/AMDGPU/FLATInstructions.td
+++ b/lib/Target/AMDGPU/FLATInstructions.td
@@ -1,17 +1,16 @@
//===-- FLATInstructions.td - FLAT Instruction Defintions -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-def FLATAtomic : ComplexPattern<i64, 3, "SelectFlatAtomic", [], [], -10>;
-def FLATOffset : ComplexPattern<i64, 3, "SelectFlatOffset<false>", [], [], -10>;
+def FLATAtomic : ComplexPattern<i64, 3, "SelectFlatAtomic", [], [SDNPWantRoot], -10>;
+def FLATOffset : ComplexPattern<i64, 3, "SelectFlatOffset<false>", [], [SDNPWantRoot], -10>;
-def FLATOffsetSigned : ComplexPattern<i64, 3, "SelectFlatOffset<true>", [], [], -10>;
-def FLATSignedAtomic : ComplexPattern<i64, 3, "SelectFlatAtomicSigned", [], [], -10>;
+def FLATOffsetSigned : ComplexPattern<i64, 3, "SelectFlatOffset<true>", [], [SDNPWantRoot], -10>;
+def FLATSignedAtomic : ComplexPattern<i64, 3, "SelectFlatAtomicSigned", [], [SDNPWantRoot], -10>;
//===----------------------------------------------------------------------===//
// FLAT classes
@@ -52,6 +51,8 @@ class FLAT_Pseudo<string opName, dag outs, dag ins,
bits<1> has_data = 1;
bits<1> has_glc = 1;
bits<1> glcValue = 0;
+ bits<1> has_dlc = 1;
+ bits<1> dlcValue = 0;
let SubtargetPredicate = !if(is_flat_global, HasFlatGlobalInsts,
!if(is_flat_scratch, HasFlatScratchInsts, HasFlatAddressSpace));
@@ -64,6 +65,8 @@ class FLAT_Pseudo<string opName, dag outs, dag ins,
// and are not considered done until both have been decremented.
let VM_CNT = 1;
let LGKM_CNT = !if(!or(is_flat_global, is_flat_scratch), 0, 1);
+
+ let IsNonFlatSeg = !if(!or(is_flat_global, is_flat_scratch), 1, 0);
}
class FLAT_Real <bits<7> op, FLAT_Pseudo ps> :
@@ -87,6 +90,7 @@ class FLAT_Real <bits<7> op, FLAT_Pseudo ps> :
bits<1> slc;
bits<1> glc;
+ bits<1> dlc;
// Only valid on gfx9
bits<1> lds = 0; // XXX - What does this actually do?
@@ -131,18 +135,16 @@ class GlobalSaddrTable <bit is_saddr, string Name = ""> {
// saddr is 32-bit (which isn't handled here yet).
class FLAT_Load_Pseudo <string opName, RegisterClass regClass,
bit HasTiedOutput = 0,
- bit HasSignedOffset = 0, bit HasSaddr = 0, bit EnableSaddr = 0> : FLAT_Pseudo<
+ bit HasSaddr = 0, bit EnableSaddr = 0> : FLAT_Pseudo<
opName,
(outs regClass:$vdst),
!con(
!con(
- !con(
- !con((ins VReg_64:$vaddr),
- !if(EnableSaddr, (ins SReg_64:$saddr), (ins))),
- (ins !if(HasSignedOffset,offset_s13,offset_u12):$offset)),
- (ins GLC:$glc, SLC:$slc)),
- !if(HasTiedOutput, (ins regClass:$vdst_in), (ins))),
- " $vdst, $vaddr"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$glc$slc"> {
+ !con((ins VReg_64:$vaddr),
+ !if(EnableSaddr, (ins SReg_64:$saddr), (ins))),
+ (ins flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)),
+ !if(HasTiedOutput, (ins regClass:$vdst_in), (ins))),
+ " $vdst, $vaddr"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$glc$slc$dlc"> {
let has_data = 0;
let mayLoad = 1;
let has_saddr = HasSaddr;
@@ -155,16 +157,14 @@ class FLAT_Load_Pseudo <string opName, RegisterClass regClass,
}
class FLAT_Store_Pseudo <string opName, RegisterClass vdataClass,
- bit HasSignedOffset = 0, bit HasSaddr = 0, bit EnableSaddr = 0> : FLAT_Pseudo<
+ bit HasSaddr = 0, bit EnableSaddr = 0> : FLAT_Pseudo<
opName,
(outs),
!con(
- !con(
- !con((ins VReg_64:$vaddr, vdataClass:$vdata),
- !if(EnableSaddr, (ins SReg_64:$saddr), (ins))),
- (ins !if(HasSignedOffset,offset_s13,offset_u12):$offset)),
- (ins GLC:$glc, SLC:$slc)),
- " $vaddr, $vdata"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$glc$slc"> {
+ !con((ins VReg_64:$vaddr, vdataClass:$vdata),
+ !if(EnableSaddr, (ins SReg_64:$saddr), (ins))),
+ (ins flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)),
+ " $vaddr, $vdata"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$glc$slc$dlc"> {
let mayLoad = 0;
let mayStore = 1;
let has_vdst = 0;
@@ -176,18 +176,18 @@ class FLAT_Store_Pseudo <string opName, RegisterClass vdataClass,
multiclass FLAT_Global_Load_Pseudo<string opName, RegisterClass regClass, bit HasTiedInput = 0> {
let is_flat_global = 1 in {
- def "" : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1, 1>,
+ def "" : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1>,
GlobalSaddrTable<0, opName>;
- def _SADDR : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1, 1, 1>,
+ def _SADDR : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1, 1>,
GlobalSaddrTable<1, opName>;
}
}
multiclass FLAT_Global_Store_Pseudo<string opName, RegisterClass regClass> {
let is_flat_global = 1 in {
- def "" : FLAT_Store_Pseudo<opName, regClass, 1, 1>,
+ def "" : FLAT_Store_Pseudo<opName, regClass, 1>,
GlobalSaddrTable<0, opName>;
- def _SADDR : FLAT_Store_Pseudo<opName, regClass, 1, 1, 1>,
+ def _SADDR : FLAT_Store_Pseudo<opName, regClass, 1, 1>,
GlobalSaddrTable<1, opName>;
}
}
@@ -197,9 +197,9 @@ class FLAT_Scratch_Load_Pseudo <string opName, RegisterClass regClass,
opName,
(outs regClass:$vdst),
!if(EnableSaddr,
- (ins SReg_32_XEXEC_HI:$saddr, offset_s13:$offset, GLC:$glc, SLC:$slc),
- (ins VGPR_32:$vaddr, offset_s13:$offset, GLC:$glc, SLC:$slc)),
- " $vdst, "#!if(EnableSaddr, "off", "$vaddr")#!if(EnableSaddr, ", $saddr", ", off")#"$offset$glc$slc"> {
+ (ins SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc),
+ (ins VGPR_32:$vaddr, flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)),
+ " $vdst, "#!if(EnableSaddr, "off", "$vaddr")#!if(EnableSaddr, ", $saddr", ", off")#"$offset$glc$slc$dlc"> {
let has_data = 0;
let mayLoad = 1;
let has_saddr = 1;
@@ -213,9 +213,9 @@ class FLAT_Scratch_Store_Pseudo <string opName, RegisterClass vdataClass, bit En
opName,
(outs),
!if(EnableSaddr,
- (ins vdataClass:$vdata, SReg_32_XEXEC_HI:$saddr, offset_s13:$offset, GLC:$glc, SLC:$slc),
- (ins vdataClass:$vdata, VGPR_32:$vaddr, offset_s13:$offset, GLC:$glc, SLC:$slc)),
- " "#!if(EnableSaddr, "off", "$vaddr")#", $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc"> {
+ (ins vdataClass:$vdata, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc),
+ (ins vdataClass:$vdata, VGPR_32:$vaddr, flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)),
+ " "#!if(EnableSaddr, "off", "$vaddr")#", $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc$dlc"> {
let mayLoad = 0;
let mayStore = 1;
let has_vdst = 0;
@@ -247,6 +247,8 @@ class FLAT_AtomicNoRet_Pseudo<string opName, dag outs, dag ins,
let mayStore = 1;
let has_glc = 0;
let glcValue = 0;
+ let has_dlc = 0;
+ let dlcValue = 0;
let has_vdst = 0;
let maybeAtomic = 1;
}
@@ -257,6 +259,7 @@ class FLAT_AtomicRet_Pseudo<string opName, dag outs, dag ins,
let hasPostISelHook = 1;
let has_vdst = 1;
let glcValue = 1;
+ let dlcValue = 0;
let PseudoInstr = NAME # "_RTN";
}
@@ -266,24 +269,28 @@ multiclass FLAT_Atomic_Pseudo<
ValueType vt,
SDPatternOperator atomic = null_frag,
ValueType data_vt = vt,
- RegisterClass data_rc = vdst_rc> {
+ RegisterClass data_rc = vdst_rc,
+ bit isFP = getIsFP<data_vt>.ret> {
def "" : FLAT_AtomicNoRet_Pseudo <opName,
(outs),
- (ins VReg_64:$vaddr, data_rc:$vdata, offset_u12:$offset, SLC:$slc),
+ (ins VReg_64:$vaddr, data_rc:$vdata, flat_offset:$offset, SLC:$slc),
" $vaddr, $vdata$offset$slc">,
GlobalSaddrTable<0, opName>,
AtomicNoRet <opName, 0> {
let PseudoInstr = NAME;
+ let FPAtomic = isFP;
}
def _RTN : FLAT_AtomicRet_Pseudo <opName,
(outs vdst_rc:$vdst),
- (ins VReg_64:$vaddr, data_rc:$vdata, offset_u12:$offset, SLC:$slc),
+ (ins VReg_64:$vaddr, data_rc:$vdata, flat_offset:$offset, SLC:$slc),
" $vdst, $vaddr, $vdata$offset glc$slc",
[(set vt:$vdst,
(atomic (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$vdata))]>,
GlobalSaddrTable<0, opName#"_rtn">,
- AtomicNoRet <opName, 1>;
+ AtomicNoRet <opName, 1>{
+ let FPAtomic = isFP;
+ }
}
multiclass FLAT_Global_Atomic_Pseudo_NO_RTN<
@@ -292,27 +299,30 @@ multiclass FLAT_Global_Atomic_Pseudo_NO_RTN<
ValueType vt,
SDPatternOperator atomic = null_frag,
ValueType data_vt = vt,
- RegisterClass data_rc = vdst_rc> {
+ RegisterClass data_rc = vdst_rc,
+ bit isFP = getIsFP<data_vt>.ret> {
def "" : FLAT_AtomicNoRet_Pseudo <opName,
(outs),
- (ins VReg_64:$vaddr, data_rc:$vdata, offset_s13:$offset, SLC:$slc),
+ (ins VReg_64:$vaddr, data_rc:$vdata, flat_offset:$offset, SLC:$slc),
" $vaddr, $vdata, off$offset$slc">,
GlobalSaddrTable<0, opName>,
AtomicNoRet <opName, 0> {
let has_saddr = 1;
let PseudoInstr = NAME;
+ let FPAtomic = isFP;
}
def _SADDR : FLAT_AtomicNoRet_Pseudo <opName,
(outs),
- (ins VReg_64:$vaddr, data_rc:$vdata, SReg_64:$saddr, offset_s13:$offset, SLC:$slc),
+ (ins VReg_64:$vaddr, data_rc:$vdata, SReg_64:$saddr, flat_offset:$offset, SLC:$slc),
" $vaddr, $vdata, $saddr$offset$slc">,
GlobalSaddrTable<1, opName>,
AtomicNoRet <opName#"_saddr", 0> {
let has_saddr = 1;
let enabled_saddr = 1;
let PseudoInstr = NAME#"_SADDR";
+ let FPAtomic = isFP;
}
}
@@ -322,28 +332,31 @@ multiclass FLAT_Global_Atomic_Pseudo_RTN<
ValueType vt,
SDPatternOperator atomic = null_frag,
ValueType data_vt = vt,
- RegisterClass data_rc = vdst_rc> {
+ RegisterClass data_rc = vdst_rc,
+ bit isFP = getIsFP<data_vt>.ret> {
def _RTN : FLAT_AtomicRet_Pseudo <opName,
(outs vdst_rc:$vdst),
- (ins VReg_64:$vaddr, data_rc:$vdata, offset_s13:$offset, SLC:$slc),
+ (ins VReg_64:$vaddr, data_rc:$vdata, flat_offset:$offset, SLC:$slc),
" $vdst, $vaddr, $vdata, off$offset glc$slc",
[(set vt:$vdst,
(atomic (FLATSignedAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$vdata))]>,
GlobalSaddrTable<0, opName#"_rtn">,
AtomicNoRet <opName, 1> {
let has_saddr = 1;
+ let FPAtomic = isFP;
}
def _SADDR_RTN : FLAT_AtomicRet_Pseudo <opName,
(outs vdst_rc:$vdst),
- (ins VReg_64:$vaddr, data_rc:$vdata, SReg_64:$saddr, offset_s13:$offset, SLC:$slc),
+ (ins VReg_64:$vaddr, data_rc:$vdata, SReg_64:$saddr, flat_offset:$offset, SLC:$slc),
" $vdst, $vaddr, $vdata, $saddr$offset glc$slc">,
GlobalSaddrTable<1, opName#"_rtn">,
AtomicNoRet <opName#"_saddr", 1> {
let has_saddr = 1;
let enabled_saddr = 1;
let PseudoInstr = NAME#"_SADDR_RTN";
+ let FPAtomic = isFP;
}
}
@@ -491,7 +504,8 @@ defm FLAT_ATOMIC_INC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_inc_x2",
defm FLAT_ATOMIC_DEC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_dec_x2",
VReg_64, i64, atomic_dec_flat>;
-let SubtargetPredicate = isCI in { // CI Only flat instructions : FIXME Only?
+// GFX7-, GFX10-only flat instructions.
+let SubtargetPredicate = isGFX7GFX10 in {
defm FLAT_ATOMIC_FCMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap",
VGPR_32, f32, null_frag, v2f32, VReg_64>;
@@ -511,7 +525,7 @@ defm FLAT_ATOMIC_FMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fmin_x2",
defm FLAT_ATOMIC_FMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fmax_x2",
VReg_64, f64>;
-} // End SubtargetPredicate = isCI
+} // End SubtargetPredicate = isGFX7GFX10
let SubtargetPredicate = HasFlatGlobalInsts in {
defm GLOBAL_LOAD_UBYTE : FLAT_Global_Load_Pseudo <"global_load_ubyte", VGPR_32>;
@@ -654,6 +668,32 @@ defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_shor
} // End SubtargetPredicate = HasFlatScratchInsts
+let SubtargetPredicate = isGFX10Plus, is_flat_global = 1 in {
+ defm GLOBAL_ATOMIC_FCMPSWAP :
+ FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap", VGPR_32, f32>;
+ defm GLOBAL_ATOMIC_FMIN :
+ FLAT_Global_Atomic_Pseudo<"global_atomic_fmin", VGPR_32, f32>;
+ defm GLOBAL_ATOMIC_FMAX :
+ FLAT_Global_Atomic_Pseudo<"global_atomic_fmax", VGPR_32, f32>;
+ defm GLOBAL_ATOMIC_FCMPSWAP_X2 :
+ FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap_x2", VReg_64, f64>;
+ defm GLOBAL_ATOMIC_FMIN_X2 :
+ FLAT_Global_Atomic_Pseudo<"global_atomic_fmin_x2", VReg_64, f64>;
+ defm GLOBAL_ATOMIC_FMAX_X2 :
+ FLAT_Global_Atomic_Pseudo<"global_atomic_fmax_x2", VReg_64, f64>;
+} // End SubtargetPredicate = isGFX10Plus, is_flat_global = 1
+
+let SubtargetPredicate = HasAtomicFaddInsts, is_flat_global = 1 in {
+
+defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_NO_RTN <
+ "global_atomic_add_f32", VGPR_32, f32, atomic_add_global
+>;
+defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_NO_RTN <
+ "global_atomic_pk_add_f16", VGPR_32, v2f16, atomic_add_global
+>;
+
+} // End SubtargetPredicate = HasAtomicFaddInsts
+
//===----------------------------------------------------------------------===//
// Flat Patterns
//===----------------------------------------------------------------------===//
@@ -661,89 +701,51 @@ defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_shor
// Patterns for global loads with no offset.
class FlatLoadPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
(vt (node (FLATOffset i64:$vaddr, i16:$offset, i1:$slc))),
- (inst $vaddr, $offset, 0, $slc)
+ (inst $vaddr, $offset, 0, 0, $slc)
>;
-multiclass FlatLoadPat_Hi16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt = i16> {
- def : GCNPat <
- (build_vector vt:$elt0, (vt (node (FLATOffset i64:$vaddr, i16:$offset, i1:$slc)))),
- (v2i16 (inst $vaddr, $offset, 0, $slc, $elt0))
- >;
-
- def : GCNPat <
- (build_vector f16:$elt0, (f16 (bitconvert (vt (node (FLATOffset i64:$vaddr, i16:$offset, i1:$slc)))))),
- (v2f16 (inst $vaddr, $offset, 0, $slc, $elt0))
- >;
-}
-
-multiclass FlatSignedLoadPat_Hi16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt = i16> {
- def : GCNPat <
- (build_vector vt:$elt0, (vt (node (FLATOffsetSigned i64:$vaddr, i16:$offset, i1:$slc)))),
- (v2i16 (inst $vaddr, $offset, 0, $slc, $elt0))
- >;
-
- def : GCNPat <
- (build_vector f16:$elt0, (f16 (bitconvert (vt (node (FLATOffsetSigned i64:$vaddr, i16:$offset, i1:$slc)))))),
- (v2f16 (inst $vaddr, $offset, 0, $slc, $elt0))
- >;
-}
-
-multiclass FlatLoadPat_Lo16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt = i16> {
- def : GCNPat <
- (build_vector (vt (node (FLATOffset i64:$vaddr, i16:$offset, i1:$slc))), (vt (Hi16Elt vt:$hi))),
- (v2i16 (inst $vaddr, $offset, 0, $slc, $hi))
- >;
-
- def : GCNPat <
- (build_vector (f16 (bitconvert (vt (node (FLATOffset i64:$vaddr, i16:$offset, i1:$slc))))), (f16 (Hi16Elt f16:$hi))),
- (v2f16 (inst $vaddr, $offset, 0, $slc, $hi))
- >;
-}
-
-multiclass FlatSignedLoadPat_Lo16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt = i16> {
- def : GCNPat <
- (build_vector (vt (node (FLATOffsetSigned i64:$vaddr, i16:$offset, i1:$slc))), (vt (Hi16Elt vt:$hi))),
- (v2i16 (inst $vaddr, $offset, 0, $slc, $hi))
- >;
+class FlatLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
+ (node (FLATOffset (i64 VReg_64:$vaddr), i16:$offset, i1:$slc), vt:$in),
+ (inst $vaddr, $offset, 0, 0, $slc, $in)
+>;
- def : GCNPat <
- (build_vector (f16 (bitconvert (vt (node (FLATOffsetSigned i64:$vaddr, i16:$offset, i1:$slc))))), (f16 (Hi16Elt f16:$hi))),
- (v2f16 (inst $vaddr, $offset, 0, $slc, $hi))
- >;
-}
+class FlatSignedLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
+ (node (FLATOffsetSigned (i64 VReg_64:$vaddr), i16:$offset, i1:$slc), vt:$in),
+ (inst $vaddr, $offset, 0, 0, $slc, $in)
+>;
class FlatLoadAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
- (vt (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc))),
- (inst $vaddr, $offset, 0, $slc)
+ (vt (node (FLATAtomic (i64 VReg_64:$vaddr), i16:$offset, i1:$slc))),
+ (inst $vaddr, $offset, 0, 0, $slc)
>;
class FlatLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
- (vt (node (FLATOffsetSigned i64:$vaddr, i16:$offset, i1:$slc))),
- (inst $vaddr, $offset, 0, $slc)
+ (vt (node (FLATOffsetSigned (i64 VReg_64:$vaddr), i16:$offset, i1:$slc))),
+ (inst $vaddr, $offset, 0, 0, $slc)
>;
-class FlatStorePat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
+class FlatStorePat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, RegisterClass rc = VGPR_32> : GCNPat <
(node vt:$data, (FLATOffset i64:$vaddr, i16:$offset, i1:$slc)),
- (inst $vaddr, $data, $offset, 0, $slc)
+ (inst $vaddr, rc:$data, $offset, 0, 0, $slc)
>;
-class FlatStoreSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
+class FlatStoreSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, RegisterClass rc = VGPR_32> : GCNPat <
(node vt:$data, (FLATOffsetSigned i64:$vaddr, i16:$offset, i1:$slc)),
- (inst $vaddr, $data, $offset, 0, $slc)
+ (inst $vaddr, rc:$data, $offset, 0, 0, $slc)
>;
-class FlatStoreAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
+class FlatStoreAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, RegisterClass rc = VGPR_32> : GCNPat <
// atomic store follows atomic binop convention so the address comes
// first.
(node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data),
- (inst $vaddr, $data, $offset, 0, $slc)
+ (inst $vaddr, rc:$data, $offset, 0, 0, $slc)
>;
-class FlatStoreSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
+class FlatStoreSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, RegisterClass rc = VGPR_32> : GCNPat <
// atomic store follows atomic binop convention so the address comes
// first.
(node (FLATSignedAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data),
- (inst $vaddr, $data, $offset, 0, $slc)
+ (inst $vaddr, rc:$data, $offset, 0, 0, $slc)
>;
class FlatAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt,
@@ -752,6 +754,11 @@ class FlatAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt,
(inst $vaddr, $data, $offset, $slc)
>;
+class FlatAtomicPatNoRtn <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
+ (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data),
+ (inst $vaddr, $data, $offset, $slc)
+>;
+
class FlatSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt,
ValueType data_vt = vt> : GCNPat <
(vt (node (FLATSignedAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$data)),
@@ -760,28 +767,33 @@ class FlatSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType v
let OtherPredicates = [HasFlatAddressSpace] in {
-def : FlatLoadPat <FLAT_LOAD_UBYTE, az_extloadi8_flat, i32>;
+def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i32>;
+def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i32>;
def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i32>;
-def : FlatLoadPat <FLAT_LOAD_UBYTE, az_extloadi8_flat, i16>;
+def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i16>;
+def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i16>;
def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i16>;
-def : FlatLoadPat <FLAT_LOAD_USHORT, az_extloadi16_flat, i32>;
+def : FlatLoadPat <FLAT_LOAD_USHORT, extloadi16_flat, i32>;
+def : FlatLoadPat <FLAT_LOAD_USHORT, zextloadi16_flat, i32>;
def : FlatLoadPat <FLAT_LOAD_USHORT, load_flat, i16>;
def : FlatLoadPat <FLAT_LOAD_SSHORT, sextloadi16_flat, i32>;
def : FlatLoadPat <FLAT_LOAD_DWORD, load_flat, i32>;
def : FlatLoadPat <FLAT_LOAD_DWORDX2, load_flat, v2i32>;
+def : FlatLoadPat <FLAT_LOAD_DWORDX3, load_flat, v3i32>;
def : FlatLoadPat <FLAT_LOAD_DWORDX4, load_flat, v4i32>;
-def : FlatLoadAtomicPat <FLAT_LOAD_DWORD, atomic_load_flat, i32>;
-def : FlatLoadAtomicPat <FLAT_LOAD_DWORDX2, atomic_load_flat, i64>;
+def : FlatLoadAtomicPat <FLAT_LOAD_DWORD, atomic_load_32_flat, i32>;
+def : FlatLoadAtomicPat <FLAT_LOAD_DWORDX2, atomic_load_64_flat, i64>;
def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i32>;
def : FlatStorePat <FLAT_STORE_SHORT, truncstorei16_flat, i32>;
def : FlatStorePat <FLAT_STORE_DWORD, store_flat, i32>;
-def : FlatStorePat <FLAT_STORE_DWORDX2, store_flat, v2i32>;
-def : FlatStorePat <FLAT_STORE_DWORDX4, store_flat, v4i32>;
+def : FlatStorePat <FLAT_STORE_DWORDX2, store_flat, v2i32, VReg_64>;
+def : FlatStorePat <FLAT_STORE_DWORDX3, store_flat, v3i32, VReg_96>;
+def : FlatStorePat <FLAT_STORE_DWORDX4, store_flat, v4i32, VReg_128>;
-def : FlatStoreAtomicPat <FLAT_STORE_DWORD, atomic_store_flat, i32>;
-def : FlatStoreAtomicPat <FLAT_STORE_DWORDX2, atomic_store_flat, i64>;
+def : FlatStoreAtomicPat <FLAT_STORE_DWORD, atomic_store_flat_32, i32>;
+def : FlatStoreAtomicPat <FLAT_STORE_DWORDX2, atomic_store_flat_64, i64, VReg_64>;
def : FlatAtomicPat <FLAT_ATOMIC_ADD_RTN, atomic_add_global, i32>;
def : FlatAtomicPat <FLAT_ATOMIC_SUB_RTN, atomic_sub_global, i32>;
@@ -818,62 +830,77 @@ let OtherPredicates = [D16PreservesUnusedBits] in {
def : FlatStorePat <FLAT_STORE_SHORT_D16_HI, truncstorei16_hi16_flat, i32>;
def : FlatStorePat <FLAT_STORE_BYTE_D16_HI, truncstorei8_hi16_flat, i32>;
-let AddedComplexity = 3 in {
-defm : FlatLoadPat_Hi16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_flat>;
-defm : FlatLoadPat_Hi16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_flat>;
-defm : FlatLoadPat_Hi16 <FLAT_LOAD_SHORT_D16_HI, load_flat>;
-}
-
-let AddedComplexity = 9 in {
-defm : FlatLoadPat_Lo16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_flat>;
-defm : FlatLoadPat_Lo16 <FLAT_LOAD_SBYTE_D16, sextloadi8_flat>;
-defm : FlatLoadPat_Lo16 <FLAT_LOAD_SHORT_D16, load_flat>;
-}
+def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2i16>;
+def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2f16>;
+def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2i16>;
+def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2f16>;
+def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16_HI, load_d16_hi_flat, v2i16>;
+def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16_HI, load_d16_hi_flat, v2f16>;
+
+def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_d16_lo_flat, v2i16>;
+def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_d16_lo_flat, v2f16>;
+def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16, sextloadi8_d16_lo_flat, v2i16>;
+def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16, sextloadi8_d16_lo_flat, v2f16>;
+def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2i16>;
+def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2f16>;
}
} // End OtherPredicates = [HasFlatAddressSpace]
+def atomic_fadd_global : global_binary_atomic_op_frag<SIglobal_atomic_fadd>;
+def atomic_pk_fadd_global : global_binary_atomic_op_frag<SIglobal_atomic_pk_fadd>;
+
let OtherPredicates = [HasFlatGlobalInsts], AddedComplexity = 10 in {
-def : FlatLoadSignedPat <GLOBAL_LOAD_UBYTE, az_extloadi8_global, i32>;
+def : FlatLoadSignedPat <GLOBAL_LOAD_UBYTE, extloadi8_global, i32>;
+def : FlatLoadSignedPat <GLOBAL_LOAD_UBYTE, zextloadi8_global, i32>;
def : FlatLoadSignedPat <GLOBAL_LOAD_SBYTE, sextloadi8_global, i32>;
-def : FlatLoadSignedPat <GLOBAL_LOAD_UBYTE, az_extloadi8_global, i16>;
+def : FlatLoadSignedPat <GLOBAL_LOAD_UBYTE, extloadi8_global, i16>;
+def : FlatLoadSignedPat <GLOBAL_LOAD_UBYTE, zextloadi8_global, i16>;
def : FlatLoadSignedPat <GLOBAL_LOAD_SBYTE, sextloadi8_global, i16>;
-def : FlatLoadSignedPat <GLOBAL_LOAD_USHORT, az_extloadi16_global, i32>;
+def : FlatLoadSignedPat <GLOBAL_LOAD_USHORT, extloadi16_global, i32>;
+def : FlatLoadSignedPat <GLOBAL_LOAD_USHORT, zextloadi16_global, i32>;
def : FlatLoadSignedPat <GLOBAL_LOAD_SSHORT, sextloadi16_global, i32>;
def : FlatLoadSignedPat <GLOBAL_LOAD_USHORT, load_global, i16>;
def : FlatLoadSignedPat <GLOBAL_LOAD_DWORD, load_global, i32>;
def : FlatLoadSignedPat <GLOBAL_LOAD_DWORDX2, load_global, v2i32>;
+def : FlatLoadSignedPat <GLOBAL_LOAD_DWORDX3, load_global, v3i32>;
def : FlatLoadSignedPat <GLOBAL_LOAD_DWORDX4, load_global, v4i32>;
-def : FlatLoadAtomicPat <GLOBAL_LOAD_DWORD, atomic_load_global, i32>;
-def : FlatLoadAtomicPat <GLOBAL_LOAD_DWORDX2, atomic_load_global, i64>;
+def : FlatLoadAtomicPat <GLOBAL_LOAD_DWORD, atomic_load_32_global, i32>;
+def : FlatLoadAtomicPat <GLOBAL_LOAD_DWORDX2, atomic_load_64_global, i64>;
-def : FlatStoreSignedPat <GLOBAL_STORE_BYTE, truncstorei8_global, i32>;
-def : FlatStoreSignedPat <GLOBAL_STORE_BYTE, truncstorei8_global, i16>;
-def : FlatStoreSignedPat <GLOBAL_STORE_SHORT, truncstorei16_global, i32>;
-def : FlatStoreSignedPat <GLOBAL_STORE_SHORT, store_global, i16>;
-def : FlatStoreSignedPat <GLOBAL_STORE_DWORD, store_global, i32>;
-def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX2, store_global, v2i32>;
-def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX4, store_global, v4i32>;
+def : FlatStoreSignedPat <GLOBAL_STORE_BYTE, truncstorei8_global, i32, VGPR_32>;
+def : FlatStoreSignedPat <GLOBAL_STORE_BYTE, truncstorei8_global, i16, VGPR_32>;
+def : FlatStoreSignedPat <GLOBAL_STORE_SHORT, truncstorei16_global, i32, VGPR_32>;
+def : FlatStoreSignedPat <GLOBAL_STORE_SHORT, store_global, i16, VGPR_32>;
+def : FlatStoreSignedPat <GLOBAL_STORE_DWORD, store_global, i32, VGPR_32>;
+def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX2, store_global, v2i32, VReg_64>;
+def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX3, store_global, v3i32, VReg_96>;
+def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX4, store_global, v4i32, VReg_128>;
let OtherPredicates = [D16PreservesUnusedBits] in {
def : FlatStoreSignedPat <GLOBAL_STORE_SHORT_D16_HI, truncstorei16_hi16_global, i32>;
def : FlatStoreSignedPat <GLOBAL_STORE_BYTE_D16_HI, truncstorei8_hi16_global, i32>;
-defm : FlatSignedLoadPat_Hi16 <GLOBAL_LOAD_UBYTE_D16_HI, az_extloadi8_global>;
-defm : FlatSignedLoadPat_Hi16 <GLOBAL_LOAD_SBYTE_D16_HI, sextloadi8_global>;
-defm : FlatSignedLoadPat_Hi16 <GLOBAL_LOAD_SHORT_D16_HI, load_global>;
-
-defm : FlatSignedLoadPat_Lo16 <GLOBAL_LOAD_UBYTE_D16, az_extloadi8_global>;
-defm : FlatSignedLoadPat_Lo16 <GLOBAL_LOAD_SBYTE_D16, sextloadi8_global>;
-defm : FlatSignedLoadPat_Lo16 <GLOBAL_LOAD_SHORT_D16, load_global>;
-
+def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_global, v2i16>;
+def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_global, v2f16>;
+def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_global, v2i16>;
+def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_global, v2f16>;
+def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SHORT_D16_HI, load_d16_hi_global, v2i16>;
+def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SHORT_D16_HI, load_d16_hi_global, v2f16>;
+
+def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_UBYTE_D16, az_extloadi8_d16_lo_global, v2i16>;
+def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_UBYTE_D16, az_extloadi8_d16_lo_global, v2f16>;
+def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SBYTE_D16, sextloadi8_d16_lo_global, v2i16>;
+def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SBYTE_D16, sextloadi8_d16_lo_global, v2f16>;
+def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SHORT_D16, load_d16_lo_global, v2i16>;
+def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SHORT_D16, load_d16_lo_global, v2f16>;
}
def : FlatStoreSignedAtomicPat <GLOBAL_STORE_DWORD, store_atomic_global, i32>;
-def : FlatStoreSignedAtomicPat <GLOBAL_STORE_DWORDX2, store_atomic_global, i64>;
+def : FlatStoreSignedAtomicPat <GLOBAL_STORE_DWORDX2, store_atomic_global, i64, VReg_64>;
def : FlatSignedAtomicPat <GLOBAL_ATOMIC_ADD_RTN, atomic_add_global, i32>;
def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SUB_RTN, atomic_sub_global, i32>;
@@ -903,7 +930,10 @@ def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SWAP_X2_RTN, atomic_swap_global, i64>;
def : FlatSignedAtomicPat <GLOBAL_ATOMIC_CMPSWAP_X2_RTN, AMDGPUatomic_cmp_swap_global, i64, v2i64>;
def : FlatSignedAtomicPat <GLOBAL_ATOMIC_XOR_X2_RTN, atomic_xor_global, i64>;
-} // End OtherPredicates = [HasFlatGlobalInsts]
+def : FlatAtomicPatNoRtn <GLOBAL_ATOMIC_ADD_F32, atomic_fadd_global, f32>;
+def : FlatAtomicPatNoRtn <GLOBAL_ATOMIC_PK_ADD_F16, atomic_pk_fadd_global, v2f16>;
+
+} // End OtherPredicates = [HasFlatGlobalInsts], AddedComplexity = 10
//===----------------------------------------------------------------------===//
@@ -917,8 +947,8 @@ def : FlatSignedAtomicPat <GLOBAL_ATOMIC_XOR_X2_RTN, atomic_xor_global, i64>;
class FLAT_Real_ci <bits<7> op, FLAT_Pseudo ps> :
FLAT_Real <op, ps>,
SIMCInstr <ps.PseudoInstr, SIEncodingFamily.SI> {
- let AssemblerPredicate = isCIOnly;
- let DecoderNamespace="CI";
+ let AssemblerPredicate = isGFX7Only;
+ let DecoderNamespace="GFX7";
}
def FLAT_LOAD_UBYTE_ci : FLAT_Real_ci <0x8, FLAT_LOAD_UBYTE>;
@@ -985,8 +1015,8 @@ defm FLAT_ATOMIC_FMAX_X2 : FLAT_Real_Atomics_ci <0x60, FLAT_ATOMIC_FMAX_X2
class FLAT_Real_vi <bits<7> op, FLAT_Pseudo ps> :
FLAT_Real <op, ps>,
SIMCInstr <ps.PseudoInstr, SIEncodingFamily.VI> {
- let AssemblerPredicate = isVI;
- let DecoderNamespace="VI";
+ let AssemblerPredicate = isGFX8GFX9;
+ let DecoderNamespace = "GFX8";
}
multiclass FLAT_Real_AllAddr_vi<bits<7> op> {
@@ -1133,3 +1163,200 @@ defm SCRATCH_STORE_DWORD : FLAT_Real_AllAddr_vi <0x1c>;
defm SCRATCH_STORE_DWORDX2 : FLAT_Real_AllAddr_vi <0x1d>;
defm SCRATCH_STORE_DWORDX3 : FLAT_Real_AllAddr_vi <0x1e>;
defm SCRATCH_STORE_DWORDX4 : FLAT_Real_AllAddr_vi <0x1f>;
+
+
+//===----------------------------------------------------------------------===//
+// GFX10.
+//===----------------------------------------------------------------------===//
+
+class FLAT_Real_gfx10<bits<7> op, FLAT_Pseudo ps> :
+ FLAT_Real<op, ps>, SIMCInstr<ps.PseudoInstr, SIEncodingFamily.GFX10> {
+ let AssemblerPredicate = isGFX10Plus;
+ let DecoderNamespace = "GFX10";
+
+ let Inst{11-0} = {offset{12}, offset{10-0}};
+ let Inst{12} = !if(ps.has_dlc, dlc, ps.dlcValue);
+ let Inst{54-48} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7d), 0x7d);
+ let Inst{55} = 0;
+}
+
+
+multiclass FLAT_Real_Base_gfx10<bits<7> op> {
+ def _gfx10 :
+ FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME)>;
+}
+
+multiclass FLAT_Real_RTN_gfx10<bits<7> op> {
+ def _RTN_gfx10 :
+ FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_RTN")>;
+}
+
+multiclass FLAT_Real_SADDR_gfx10<bits<7> op> {
+ def _SADDR_gfx10 :
+ FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>;
+}
+
+multiclass FLAT_Real_SADDR_RTN_gfx10<bits<7> op> {
+ def _SADDR_RTN_gfx10 :
+ FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN")>;
+}
+
+
+multiclass FLAT_Real_AllAddr_gfx10<bits<7> op> :
+ FLAT_Real_Base_gfx10<op>,
+ FLAT_Real_SADDR_gfx10<op>;
+
+multiclass FLAT_Real_Atomics_gfx10<bits<7> op> :
+ FLAT_Real_Base_gfx10<op>,
+ FLAT_Real_RTN_gfx10<op>;
+
+multiclass FLAT_Real_GlblAtomics_gfx10<bits<7> op> :
+ FLAT_Real_AllAddr_gfx10<op>,
+ FLAT_Real_RTN_gfx10<op>,
+ FLAT_Real_SADDR_RTN_gfx10<op>;
+
+
+// ENC_FLAT.
+defm FLAT_LOAD_UBYTE : FLAT_Real_Base_gfx10<0x008>;
+defm FLAT_LOAD_SBYTE : FLAT_Real_Base_gfx10<0x009>;
+defm FLAT_LOAD_USHORT : FLAT_Real_Base_gfx10<0x00a>;
+defm FLAT_LOAD_SSHORT : FLAT_Real_Base_gfx10<0x00b>;
+defm FLAT_LOAD_DWORD : FLAT_Real_Base_gfx10<0x00c>;
+defm FLAT_LOAD_DWORDX2 : FLAT_Real_Base_gfx10<0x00d>;
+defm FLAT_LOAD_DWORDX4 : FLAT_Real_Base_gfx10<0x00e>;
+defm FLAT_LOAD_DWORDX3 : FLAT_Real_Base_gfx10<0x00f>;
+defm FLAT_STORE_BYTE : FLAT_Real_Base_gfx10<0x018>;
+defm FLAT_STORE_BYTE_D16_HI : FLAT_Real_Base_gfx10<0x019>;
+defm FLAT_STORE_SHORT : FLAT_Real_Base_gfx10<0x01a>;
+defm FLAT_STORE_SHORT_D16_HI : FLAT_Real_Base_gfx10<0x01b>;
+defm FLAT_STORE_DWORD : FLAT_Real_Base_gfx10<0x01c>;
+defm FLAT_STORE_DWORDX2 : FLAT_Real_Base_gfx10<0x01d>;
+defm FLAT_STORE_DWORDX4 : FLAT_Real_Base_gfx10<0x01e>;
+defm FLAT_STORE_DWORDX3 : FLAT_Real_Base_gfx10<0x01f>;
+defm FLAT_LOAD_UBYTE_D16 : FLAT_Real_Base_gfx10<0x020>;
+defm FLAT_LOAD_UBYTE_D16_HI : FLAT_Real_Base_gfx10<0x021>;
+defm FLAT_LOAD_SBYTE_D16 : FLAT_Real_Base_gfx10<0x022>;
+defm FLAT_LOAD_SBYTE_D16_HI : FLAT_Real_Base_gfx10<0x023>;
+defm FLAT_LOAD_SHORT_D16 : FLAT_Real_Base_gfx10<0x024>;
+defm FLAT_LOAD_SHORT_D16_HI : FLAT_Real_Base_gfx10<0x025>;
+defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_gfx10<0x030>;
+defm FLAT_ATOMIC_CMPSWAP : FLAT_Real_Atomics_gfx10<0x031>;
+defm FLAT_ATOMIC_ADD : FLAT_Real_Atomics_gfx10<0x032>;
+defm FLAT_ATOMIC_SUB : FLAT_Real_Atomics_gfx10<0x033>;
+defm FLAT_ATOMIC_SMIN : FLAT_Real_Atomics_gfx10<0x035>;
+defm FLAT_ATOMIC_UMIN : FLAT_Real_Atomics_gfx10<0x036>;
+defm FLAT_ATOMIC_SMAX : FLAT_Real_Atomics_gfx10<0x037>;
+defm FLAT_ATOMIC_UMAX : FLAT_Real_Atomics_gfx10<0x038>;
+defm FLAT_ATOMIC_AND : FLAT_Real_Atomics_gfx10<0x039>;
+defm FLAT_ATOMIC_OR : FLAT_Real_Atomics_gfx10<0x03a>;
+defm FLAT_ATOMIC_XOR : FLAT_Real_Atomics_gfx10<0x03b>;
+defm FLAT_ATOMIC_INC : FLAT_Real_Atomics_gfx10<0x03c>;
+defm FLAT_ATOMIC_DEC : FLAT_Real_Atomics_gfx10<0x03d>;
+defm FLAT_ATOMIC_FCMPSWAP : FLAT_Real_Atomics_gfx10<0x03e>;
+defm FLAT_ATOMIC_FMIN : FLAT_Real_Atomics_gfx10<0x03f>;
+defm FLAT_ATOMIC_FMAX : FLAT_Real_Atomics_gfx10<0x040>;
+defm FLAT_ATOMIC_SWAP_X2 : FLAT_Real_Atomics_gfx10<0x050>;
+defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_gfx10<0x051>;
+defm FLAT_ATOMIC_ADD_X2 : FLAT_Real_Atomics_gfx10<0x052>;
+defm FLAT_ATOMIC_SUB_X2 : FLAT_Real_Atomics_gfx10<0x053>;
+defm FLAT_ATOMIC_SMIN_X2 : FLAT_Real_Atomics_gfx10<0x055>;
+defm FLAT_ATOMIC_UMIN_X2 : FLAT_Real_Atomics_gfx10<0x056>;
+defm FLAT_ATOMIC_SMAX_X2 : FLAT_Real_Atomics_gfx10<0x057>;
+defm FLAT_ATOMIC_UMAX_X2 : FLAT_Real_Atomics_gfx10<0x058>;
+defm FLAT_ATOMIC_AND_X2 : FLAT_Real_Atomics_gfx10<0x059>;
+defm FLAT_ATOMIC_OR_X2 : FLAT_Real_Atomics_gfx10<0x05a>;
+defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_gfx10<0x05b>;
+defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_gfx10<0x05c>;
+defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_gfx10<0x05d>;
+defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Real_Atomics_gfx10<0x05e>;
+defm FLAT_ATOMIC_FMIN_X2 : FLAT_Real_Atomics_gfx10<0x05f>;
+defm FLAT_ATOMIC_FMAX_X2 : FLAT_Real_Atomics_gfx10<0x060>;
+
+
+// ENC_FLAT_GLBL.
+defm GLOBAL_LOAD_UBYTE : FLAT_Real_AllAddr_gfx10<0x008>;
+defm GLOBAL_LOAD_SBYTE : FLAT_Real_AllAddr_gfx10<0x009>;
+defm GLOBAL_LOAD_USHORT : FLAT_Real_AllAddr_gfx10<0x00a>;
+defm GLOBAL_LOAD_SSHORT : FLAT_Real_AllAddr_gfx10<0x00b>;
+defm GLOBAL_LOAD_DWORD : FLAT_Real_AllAddr_gfx10<0x00c>;
+defm GLOBAL_LOAD_DWORDX2 : FLAT_Real_AllAddr_gfx10<0x00d>;
+defm GLOBAL_LOAD_DWORDX4 : FLAT_Real_AllAddr_gfx10<0x00e>;
+defm GLOBAL_LOAD_DWORDX3 : FLAT_Real_AllAddr_gfx10<0x00f>;
+defm GLOBAL_STORE_BYTE : FLAT_Real_AllAddr_gfx10<0x018>;
+defm GLOBAL_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x019>;
+defm GLOBAL_STORE_SHORT : FLAT_Real_AllAddr_gfx10<0x01a>;
+defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_gfx10<0x01b>;
+defm GLOBAL_STORE_DWORD : FLAT_Real_AllAddr_gfx10<0x01c>;
+defm GLOBAL_STORE_DWORDX2 : FLAT_Real_AllAddr_gfx10<0x01d>;
+defm GLOBAL_STORE_DWORDX4 : FLAT_Real_AllAddr_gfx10<0x01e>;
+defm GLOBAL_STORE_DWORDX3 : FLAT_Real_AllAddr_gfx10<0x01f>;
+defm GLOBAL_LOAD_UBYTE_D16 : FLAT_Real_AllAddr_gfx10<0x020>;
+defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x021>;
+defm GLOBAL_LOAD_SBYTE_D16 : FLAT_Real_AllAddr_gfx10<0x022>;
+defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x023>;
+defm GLOBAL_LOAD_SHORT_D16 : FLAT_Real_AllAddr_gfx10<0x024>;
+defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_gfx10<0x025>;
+defm GLOBAL_ATOMIC_SWAP : FLAT_Real_GlblAtomics_gfx10<0x030>;
+defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Real_GlblAtomics_gfx10<0x031>;
+defm GLOBAL_ATOMIC_ADD : FLAT_Real_GlblAtomics_gfx10<0x032>;
+defm GLOBAL_ATOMIC_SUB : FLAT_Real_GlblAtomics_gfx10<0x033>;
+defm GLOBAL_ATOMIC_SMIN : FLAT_Real_GlblAtomics_gfx10<0x035>;
+defm GLOBAL_ATOMIC_UMIN : FLAT_Real_GlblAtomics_gfx10<0x036>;
+defm GLOBAL_ATOMIC_SMAX : FLAT_Real_GlblAtomics_gfx10<0x037>;
+defm GLOBAL_ATOMIC_UMAX : FLAT_Real_GlblAtomics_gfx10<0x038>;
+defm GLOBAL_ATOMIC_AND : FLAT_Real_GlblAtomics_gfx10<0x039>;
+defm GLOBAL_ATOMIC_OR : FLAT_Real_GlblAtomics_gfx10<0x03a>;
+defm GLOBAL_ATOMIC_XOR : FLAT_Real_GlblAtomics_gfx10<0x03b>;
+defm GLOBAL_ATOMIC_INC : FLAT_Real_GlblAtomics_gfx10<0x03c>;
+defm GLOBAL_ATOMIC_DEC : FLAT_Real_GlblAtomics_gfx10<0x03d>;
+defm GLOBAL_ATOMIC_FCMPSWAP : FLAT_Real_GlblAtomics_gfx10<0x03e>;
+defm GLOBAL_ATOMIC_FMIN : FLAT_Real_GlblAtomics_gfx10<0x03f>;
+defm GLOBAL_ATOMIC_FMAX : FLAT_Real_GlblAtomics_gfx10<0x040>;
+defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Real_GlblAtomics_gfx10<0x050>;
+defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Real_GlblAtomics_gfx10<0x051>;
+defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Real_GlblAtomics_gfx10<0x052>;
+defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Real_GlblAtomics_gfx10<0x053>;
+defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Real_GlblAtomics_gfx10<0x055>;
+defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Real_GlblAtomics_gfx10<0x056>;
+defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Real_GlblAtomics_gfx10<0x057>;
+defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Real_GlblAtomics_gfx10<0x058>;
+defm GLOBAL_ATOMIC_AND_X2 : FLAT_Real_GlblAtomics_gfx10<0x059>;
+defm GLOBAL_ATOMIC_OR_X2 : FLAT_Real_GlblAtomics_gfx10<0x05a>;
+defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Real_GlblAtomics_gfx10<0x05b>;
+defm GLOBAL_ATOMIC_INC_X2 : FLAT_Real_GlblAtomics_gfx10<0x05c>;
+defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Real_GlblAtomics_gfx10<0x05d>;
+defm GLOBAL_ATOMIC_FCMPSWAP_X2 : FLAT_Real_GlblAtomics_gfx10<0x05e>;
+defm GLOBAL_ATOMIC_FMIN_X2 : FLAT_Real_GlblAtomics_gfx10<0x05f>;
+defm GLOBAL_ATOMIC_FMAX_X2 : FLAT_Real_GlblAtomics_gfx10<0x060>;
+
+
+// ENC_FLAT_SCRATCH.
+defm SCRATCH_LOAD_UBYTE : FLAT_Real_AllAddr_gfx10<0x008>;
+defm SCRATCH_LOAD_SBYTE : FLAT_Real_AllAddr_gfx10<0x009>;
+defm SCRATCH_LOAD_USHORT : FLAT_Real_AllAddr_gfx10<0x00a>;
+defm SCRATCH_LOAD_SSHORT : FLAT_Real_AllAddr_gfx10<0x00b>;
+defm SCRATCH_LOAD_DWORD : FLAT_Real_AllAddr_gfx10<0x00c>;
+defm SCRATCH_LOAD_DWORDX2 : FLAT_Real_AllAddr_gfx10<0x00d>;
+defm SCRATCH_LOAD_DWORDX4 : FLAT_Real_AllAddr_gfx10<0x00e>;
+defm SCRATCH_LOAD_DWORDX3 : FLAT_Real_AllAddr_gfx10<0x00f>;
+defm SCRATCH_STORE_BYTE : FLAT_Real_AllAddr_gfx10<0x018>;
+defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x019>;
+defm SCRATCH_STORE_SHORT : FLAT_Real_AllAddr_gfx10<0x01a>;
+defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_gfx10<0x01b>;
+defm SCRATCH_STORE_DWORD : FLAT_Real_AllAddr_gfx10<0x01c>;
+defm SCRATCH_STORE_DWORDX2 : FLAT_Real_AllAddr_gfx10<0x01d>;
+defm SCRATCH_STORE_DWORDX4 : FLAT_Real_AllAddr_gfx10<0x01e>;
+defm SCRATCH_STORE_DWORDX3 : FLAT_Real_AllAddr_gfx10<0x01f>;
+defm SCRATCH_LOAD_UBYTE_D16 : FLAT_Real_AllAddr_gfx10<0x020>;
+defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x021>;
+defm SCRATCH_LOAD_SBYTE_D16 : FLAT_Real_AllAddr_gfx10<0x022>;
+defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x023>;
+defm SCRATCH_LOAD_SHORT_D16 : FLAT_Real_AllAddr_gfx10<0x024>;
+defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_gfx10<0x025>;
+
+let SubtargetPredicate = HasAtomicFaddInsts in {
+
+defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Real_AllAddr_vi <0x04d>;
+defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Real_AllAddr_vi <0x04e>;
+
+} // End SubtargetPredicate = HasAtomicFaddInsts
diff --git a/lib/Target/AMDGPU/GCNDPPCombine.cpp b/lib/Target/AMDGPU/GCNDPPCombine.cpp
index 56071d0d2374..e1845e2e8e87 100644
--- a/lib/Target/AMDGPU/GCNDPPCombine.cpp
+++ b/lib/Target/AMDGPU/GCNDPPCombine.cpp
@@ -1,37 +1,40 @@
//=======- GCNDPPCombine.cpp - optimization for DPP instructions ---==========//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// The pass combines V_MOV_B32_dpp instruction with its VALU uses as a DPP src0
-// operand.If any of the use instruction cannot be combined with the mov the
+// operand. If any of the use instruction cannot be combined with the mov the
// whole sequence is reverted.
//
// $old = ...
// $dpp_value = V_MOV_B32_dpp $old, $vgpr_to_be_read_from_other_lane,
-// dpp_controls..., $bound_ctrl
-// $res = VALU $dpp_value, ...
+// dpp_controls..., $row_mask, $bank_mask, $bound_ctrl
+// $res = VALU $dpp_value [, src1]
//
// to
//
-// $res = VALU_DPP $folded_old, $vgpr_to_be_read_from_other_lane, ...,
-// dpp_controls..., $folded_bound_ctrl
+// $res = VALU_DPP $combined_old, $vgpr_to_be_read_from_other_lane, [src1,]
+// dpp_controls..., $row_mask, $bank_mask, $combined_bound_ctrl
//
// Combining rules :
//
-// $bound_ctrl is DPP_BOUND_ZERO, $old is any
-// $bound_ctrl is DPP_BOUND_OFF, $old is 0
+// if $row_mask and $bank_mask are fully enabled (0xF) and
+// $bound_ctrl==DPP_BOUND_ZERO or $old==0
+// -> $combined_old = undef,
+// $combined_bound_ctrl = DPP_BOUND_ZERO
//
-// ->$folded_old = undef, $folded_bound_ctrl = DPP_BOUND_ZERO
-// $bound_ctrl is DPP_BOUND_OFF, $old is undef
+// if the VALU op is binary and
+// $bound_ctrl==DPP_BOUND_OFF and
+// $old==identity value (immediate) for the VALU op
+// -> $combined_old = src1,
+// $combined_bound_ctrl = DPP_BOUND_OFF
//
-// ->$folded_old = undef, $folded_bound_ctrl = DPP_BOUND_OFF
-// $bound_ctrl is DPP_BOUND_OFF, $old is foldable
+// Otherwise cancel.
//
-// ->$folded_old = folded value, $folded_bound_ctrl = DPP_BOUND_OFF
+// The mov_dpp instruction should reside in the same BB as all its uses
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
@@ -67,20 +70,16 @@ class GCNDPPCombine : public MachineFunctionPass {
MachineOperand *getOldOpndValue(MachineOperand &OldOpnd) const;
- RegSubRegPair foldOldOpnd(MachineInstr &OrigMI,
- RegSubRegPair OldOpndVGPR,
- MachineOperand &OldOpndValue) const;
-
MachineInstr *createDPPInst(MachineInstr &OrigMI,
MachineInstr &MovMI,
- RegSubRegPair OldOpndVGPR,
+ RegSubRegPair CombOldVGPR,
MachineOperand *OldOpnd,
- bool BoundCtrlZero) const;
+ bool CombBCZ) const;
MachineInstr *createDPPInst(MachineInstr &OrigMI,
MachineInstr &MovMI,
- RegSubRegPair OldOpndVGPR,
- bool BoundCtrlZero) const;
+ RegSubRegPair CombOldVGPR,
+ bool CombBCZ) const;
bool hasNoImmOrEqual(MachineInstr &MI,
unsigned OpndName,
@@ -153,8 +152,8 @@ MachineOperand *GCNDPPCombine::getOldOpndValue(MachineOperand &OldOpnd) const {
MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
MachineInstr &MovMI,
- RegSubRegPair OldOpndVGPR,
- bool BoundCtrlZero) const {
+ RegSubRegPair CombOldVGPR,
+ bool CombBCZ) const {
assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp);
assert(TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst)->getReg() ==
TII->getNamedOperand(OrigMI, AMDGPU::OpName::src0)->getReg());
@@ -178,9 +177,15 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
const int OldIdx = AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::old);
if (OldIdx != -1) {
assert(OldIdx == NumOperands);
- assert(isOfRegClass(OldOpndVGPR, AMDGPU::VGPR_32RegClass, *MRI));
- DPPInst.addReg(OldOpndVGPR.Reg, 0, OldOpndVGPR.SubReg);
+ assert(isOfRegClass(CombOldVGPR, AMDGPU::VGPR_32RegClass, *MRI));
+ DPPInst.addReg(CombOldVGPR.Reg, 0, CombOldVGPR.SubReg);
++NumOperands;
+ } else {
+ // TODO: this discards MAC/FMA instructions for now, let's add it later
+ LLVM_DEBUG(dbgs() << " failed: no old operand in DPP instruction,"
+ " TBD\n");
+ Fail = true;
+ break;
}
if (auto *Mod0 = TII->getNamedOperand(OrigMI,
@@ -199,6 +204,7 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
break;
}
DPPInst.add(*Src0);
+ DPPInst->getOperand(NumOperands).setIsKill(false);
++NumOperands;
if (auto *Mod1 = TII->getNamedOperand(OrigMI,
@@ -231,7 +237,7 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::dpp_ctrl));
DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask));
DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask));
- DPPInst.addImm(BoundCtrlZero ? 1 : 0);
+ DPPInst.addImm(CombBCZ ? 1 : 0);
} while (false);
if (Fail) {
@@ -242,64 +248,81 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
return DPPInst.getInstr();
}
-GCNDPPCombine::RegSubRegPair
-GCNDPPCombine::foldOldOpnd(MachineInstr &OrigMI,
- RegSubRegPair OldOpndVGPR,
- MachineOperand &OldOpndValue) const {
- assert(OldOpndValue.isImm());
- switch (OrigMI.getOpcode()) {
+static bool isIdentityValue(unsigned OrigMIOp, MachineOperand *OldOpnd) {
+ assert(OldOpnd->isImm());
+ switch (OrigMIOp) {
default: break;
+ case AMDGPU::V_ADD_U32_e32:
+ case AMDGPU::V_ADD_U32_e64:
+ case AMDGPU::V_ADD_I32_e32:
+ case AMDGPU::V_ADD_I32_e64:
+ case AMDGPU::V_OR_B32_e32:
+ case AMDGPU::V_OR_B32_e64:
+ case AMDGPU::V_SUBREV_U32_e32:
+ case AMDGPU::V_SUBREV_U32_e64:
+ case AMDGPU::V_SUBREV_I32_e32:
+ case AMDGPU::V_SUBREV_I32_e64:
case AMDGPU::V_MAX_U32_e32:
- if (OldOpndValue.getImm() == std::numeric_limits<uint32_t>::max())
- return OldOpndVGPR;
+ case AMDGPU::V_MAX_U32_e64:
+ case AMDGPU::V_XOR_B32_e32:
+ case AMDGPU::V_XOR_B32_e64:
+ if (OldOpnd->getImm() == 0)
+ return true;
break;
- case AMDGPU::V_MAX_I32_e32:
- if (OldOpndValue.getImm() == std::numeric_limits<int32_t>::max())
- return OldOpndVGPR;
+ case AMDGPU::V_AND_B32_e32:
+ case AMDGPU::V_AND_B32_e64:
+ case AMDGPU::V_MIN_U32_e32:
+ case AMDGPU::V_MIN_U32_e64:
+ if (static_cast<uint32_t>(OldOpnd->getImm()) ==
+ std::numeric_limits<uint32_t>::max())
+ return true;
break;
case AMDGPU::V_MIN_I32_e32:
- if (OldOpndValue.getImm() == std::numeric_limits<int32_t>::min())
- return OldOpndVGPR;
+ case AMDGPU::V_MIN_I32_e64:
+ if (static_cast<int32_t>(OldOpnd->getImm()) ==
+ std::numeric_limits<int32_t>::max())
+ return true;
+ break;
+ case AMDGPU::V_MAX_I32_e32:
+ case AMDGPU::V_MAX_I32_e64:
+ if (static_cast<int32_t>(OldOpnd->getImm()) ==
+ std::numeric_limits<int32_t>::min())
+ return true;
break;
-
case AMDGPU::V_MUL_I32_I24_e32:
+ case AMDGPU::V_MUL_I32_I24_e64:
case AMDGPU::V_MUL_U32_U24_e32:
- if (OldOpndValue.getImm() == 1) {
- auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1);
- assert(Src1 && Src1->isReg());
- return getRegSubRegPair(*Src1);
- }
+ case AMDGPU::V_MUL_U32_U24_e64:
+ if (OldOpnd->getImm() == 1)
+ return true;
break;
}
- return RegSubRegPair();
+ return false;
}
-// Cases to combine:
-// $bound_ctrl is DPP_BOUND_ZERO, $old is any
-// $bound_ctrl is DPP_BOUND_OFF, $old is 0
-// -> $old = undef, $bound_ctrl = DPP_BOUND_ZERO
-
-// $bound_ctrl is DPP_BOUND_OFF, $old is undef
-// -> $old = undef, $bound_ctrl = DPP_BOUND_OFF
-
-// $bound_ctrl is DPP_BOUND_OFF, $old is foldable
-// -> $old = folded value, $bound_ctrl = DPP_BOUND_OFF
-
MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
MachineInstr &MovMI,
- RegSubRegPair OldOpndVGPR,
+ RegSubRegPair CombOldVGPR,
MachineOperand *OldOpndValue,
- bool BoundCtrlZero) const {
- assert(OldOpndVGPR.Reg);
- if (!BoundCtrlZero && OldOpndValue) {
- assert(OldOpndValue->isImm());
- OldOpndVGPR = foldOldOpnd(OrigMI, OldOpndVGPR, *OldOpndValue);
- if (!OldOpndVGPR.Reg) {
- LLVM_DEBUG(dbgs() << " failed: old immediate cannot be folded\n");
+ bool CombBCZ) const {
+ assert(CombOldVGPR.Reg);
+ if (!CombBCZ && OldOpndValue && OldOpndValue->isImm()) {
+ auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1);
+ if (!Src1 || !Src1->isReg()) {
+ LLVM_DEBUG(dbgs() << " failed: no src1 or it isn't a register\n");
+ return nullptr;
+ }
+ if (!isIdentityValue(OrigMI.getOpcode(), OldOpndValue)) {
+ LLVM_DEBUG(dbgs() << " failed: old immediate isn't an identity\n");
+ return nullptr;
+ }
+ CombOldVGPR = getRegSubRegPair(*Src1);
+ if (!isOfRegClass(CombOldVGPR, AMDGPU::VGPR_32RegClass, *MRI)) {
+ LLVM_DEBUG(dbgs() << " failed: src1 isn't a VGPR32 register\n");
return nullptr;
}
}
- return createDPPInst(OrigMI, MovMI, OldOpndVGPR, BoundCtrlZero);
+ return createDPPInst(OrigMI, MovMI, CombOldVGPR, CombBCZ);
}
// returns true if MI doesn't have OpndName immediate operand or the
@@ -316,31 +339,64 @@ bool GCNDPPCombine::hasNoImmOrEqual(MachineInstr &MI, unsigned OpndName,
bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp);
+ LLVM_DEBUG(dbgs() << "\nDPP combine: " << MovMI);
+
+ auto *DstOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst);
+ assert(DstOpnd && DstOpnd->isReg());
+ auto DPPMovReg = DstOpnd->getReg();
+ if (execMayBeModifiedBeforeAnyUse(*MRI, DPPMovReg, MovMI)) {
+ LLVM_DEBUG(dbgs() << " failed: EXEC mask should remain the same"
+ " for all uses\n");
+ return false;
+ }
+
+ auto *RowMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask);
+ assert(RowMaskOpnd && RowMaskOpnd->isImm());
+ auto *BankMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask);
+ assert(BankMaskOpnd && BankMaskOpnd->isImm());
+ const bool MaskAllLanes = RowMaskOpnd->getImm() == 0xF &&
+ BankMaskOpnd->getImm() == 0xF;
+
auto *BCZOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::bound_ctrl);
assert(BCZOpnd && BCZOpnd->isImm());
- bool BoundCtrlZero = 0 != BCZOpnd->getImm();
-
- LLVM_DEBUG(dbgs() << "\nDPP combine: " << MovMI);
+ bool BoundCtrlZero = BCZOpnd->getImm();
auto *OldOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::old);
assert(OldOpnd && OldOpnd->isReg());
- auto OldOpndVGPR = getRegSubRegPair(*OldOpnd);
- auto *OldOpndValue = getOldOpndValue(*OldOpnd);
+
+ auto * const OldOpndValue = getOldOpndValue(*OldOpnd);
+ // OldOpndValue is either undef (IMPLICIT_DEF) or immediate or something else
+ // We could use: assert(!OldOpndValue || OldOpndValue->isImm())
+ // but the third option is used to distinguish undef from non-immediate
+ // to reuse IMPLICIT_DEF instruction later
assert(!OldOpndValue || OldOpndValue->isImm() || OldOpndValue == OldOpnd);
- if (OldOpndValue) {
- if (BoundCtrlZero) {
- OldOpndVGPR.Reg = AMDGPU::NoRegister; // should be undef, ignore old opnd
- OldOpndValue = nullptr;
- } else {
- if (!OldOpndValue->isImm()) {
- LLVM_DEBUG(dbgs() << " failed: old operand isn't an imm or undef\n");
- return false;
- }
- if (OldOpndValue->getImm() == 0) {
- OldOpndVGPR.Reg = AMDGPU::NoRegister; // should be undef
- OldOpndValue = nullptr;
- BoundCtrlZero = true;
+
+ bool CombBCZ = false;
+
+ if (MaskAllLanes && BoundCtrlZero) { // [1]
+ CombBCZ = true;
+ } else {
+ if (!OldOpndValue || !OldOpndValue->isImm()) {
+ LLVM_DEBUG(dbgs() << " failed: the DPP mov isn't combinable\n");
+ return false;
+ }
+
+ if (OldOpndValue->getParent()->getParent() != MovMI.getParent()) {
+ LLVM_DEBUG(dbgs() <<
+ " failed: old reg def and mov should be in the same BB\n");
+ return false;
+ }
+
+ if (OldOpndValue->getImm() == 0) {
+ if (MaskAllLanes) {
+ assert(!BoundCtrlZero); // by check [1]
+ CombBCZ = true;
}
+ } else if (BoundCtrlZero) {
+ assert(!MaskAllLanes); // by check [1]
+ LLVM_DEBUG(dbgs() <<
+ " failed: old!=0 and bctrl:0 and not all lanes isn't combinable\n");
+ return false;
}
}
@@ -348,25 +404,28 @@ bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
if (!OldOpndValue)
dbgs() << "undef";
else
- dbgs() << OldOpndValue->getImm();
- dbgs() << ", bound_ctrl=" << BoundCtrlZero << '\n');
-
- std::vector<MachineInstr*> OrigMIs, DPPMIs;
- if (!OldOpndVGPR.Reg) { // OldOpndVGPR = undef
- OldOpndVGPR = RegSubRegPair(
+ dbgs() << *OldOpndValue;
+ dbgs() << ", bound_ctrl=" << CombBCZ << '\n');
+
+ SmallVector<MachineInstr*, 4> OrigMIs, DPPMIs;
+ auto CombOldVGPR = getRegSubRegPair(*OldOpnd);
+ // try to reuse previous old reg if its undefined (IMPLICIT_DEF)
+ if (CombBCZ && OldOpndValue) { // CombOldVGPR should be undef
+ CombOldVGPR = RegSubRegPair(
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass));
auto UndefInst = BuildMI(*MovMI.getParent(), MovMI, MovMI.getDebugLoc(),
- TII->get(AMDGPU::IMPLICIT_DEF), OldOpndVGPR.Reg);
+ TII->get(AMDGPU::IMPLICIT_DEF), CombOldVGPR.Reg);
DPPMIs.push_back(UndefInst.getInstr());
}
OrigMIs.push_back(&MovMI);
bool Rollback = true;
- for (auto &Use : MRI->use_nodbg_operands(
- TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst)->getReg())) {
+ for (auto &Use : MRI->use_nodbg_operands(DPPMovReg)) {
Rollback = true;
auto &OrigMI = *Use.getParent();
+ LLVM_DEBUG(dbgs() << " try: " << OrigMI);
+
auto OrigOp = OrigMI.getOpcode();
if (TII->isVOP3(OrigOp)) {
if (!TII->hasVALU32BitEncoding(OrigOp)) {
@@ -389,8 +448,8 @@ bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
LLVM_DEBUG(dbgs() << " combining: " << OrigMI);
if (&Use == TII->getNamedOperand(OrigMI, AMDGPU::OpName::src0)) {
- if (auto *DPPInst = createDPPInst(OrigMI, MovMI, OldOpndVGPR,
- OldOpndValue, BoundCtrlZero)) {
+ if (auto *DPPInst = createDPPInst(OrigMI, MovMI, CombOldVGPR,
+ OldOpndValue, CombBCZ)) {
DPPMIs.push_back(DPPInst);
Rollback = false;
}
@@ -401,8 +460,8 @@ bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
BB->insert(OrigMI, NewMI);
if (TII->commuteInstruction(*NewMI)) {
LLVM_DEBUG(dbgs() << " commuted: " << *NewMI);
- if (auto *DPPInst = createDPPInst(*NewMI, MovMI, OldOpndVGPR,
- OldOpndValue, BoundCtrlZero)) {
+ if (auto *DPPInst = createDPPInst(*NewMI, MovMI, CombOldVGPR,
+ OldOpndValue, CombBCZ)) {
DPPMIs.push_back(DPPInst);
Rollback = false;
}
diff --git a/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index c6396de89c4f..885239e2faed 100644
--- a/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -1,9 +1,8 @@
//===-- GCNHazardRecognizers.cpp - GCN Hazard Recognizer Impls ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -21,6 +20,7 @@
#include "llvm/ADT/iterator_range.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/MC/MCInstrDesc.h"
@@ -38,6 +38,7 @@ using namespace llvm;
//===----------------------------------------------------------------------===//
GCNHazardRecognizer::GCNHazardRecognizer(const MachineFunction &MF) :
+ IsHazardRecognizerMode(false),
CurrCycleInstr(nullptr),
MF(MF),
ST(MF.getSubtarget<GCNSubtarget>()),
@@ -45,7 +46,8 @@ GCNHazardRecognizer::GCNHazardRecognizer(const MachineFunction &MF) :
TRI(TII.getRegisterInfo()),
ClauseUses(TRI.getNumRegUnits()),
ClauseDefs(TRI.getNumRegUnits()) {
- MaxLookAhead = 5;
+ MaxLookAhead = MF.getRegInfo().isPhysRegUsed(AMDGPU::AGPR0) ? 18 : 5;
+ TSchedModel.init(&ST);
}
void GCNHazardRecognizer::EmitInstruction(SUnit *SU) {
@@ -88,18 +90,38 @@ static bool isSMovRel(unsigned Opcode) {
}
}
-static bool isSendMsgTraceDataOrGDS(const MachineInstr &MI) {
+static bool isSendMsgTraceDataOrGDS(const SIInstrInfo &TII,
+ const MachineInstr &MI) {
+ if (TII.isAlwaysGDS(MI.getOpcode()))
+ return true;
+
switch (MI.getOpcode()) {
case AMDGPU::S_SENDMSG:
case AMDGPU::S_SENDMSGHALT:
case AMDGPU::S_TTRACEDATA:
return true;
+ // These DS opcodes don't support GDS.
+ case AMDGPU::DS_NOP:
+ case AMDGPU::DS_PERMUTE_B32:
+ case AMDGPU::DS_BPERMUTE_B32:
+ return false;
default:
- // TODO: GDS
+ if (TII.isDS(MI.getOpcode())) {
+ int GDS = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
+ AMDGPU::OpName::gds);
+ if (MI.getOperand(GDS).getImm())
+ return true;
+ }
return false;
}
}
+static bool isPermlane(const MachineInstr &MI) {
+ unsigned Opcode = MI.getOpcode();
+ return Opcode == AMDGPU::V_PERMLANE16_B32 ||
+ Opcode == AMDGPU::V_PERMLANEX16_B32;
+}
+
static unsigned getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr) {
const MachineOperand *RegOp = TII->getNamedOperand(RegInstr,
AMDGPU::OpName::simm16);
@@ -109,6 +131,8 @@ static unsigned getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr) {
ScheduleHazardRecognizer::HazardType
GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
MachineInstr *MI = SU->getInstr();
+ if (MI->isBundle())
+ return NoHazard;
if (SIInstrInfo::isSMRD(*MI) && checkSMRDHazards(MI) > 0)
return NoopHazard;
@@ -119,6 +143,15 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
&& checkVMEMHazards(MI) > 0)
return NoopHazard;
+ if (ST.hasNSAtoVMEMBug() && checkNSAtoVMEMHazard(MI) > 0)
+ return NoopHazard;
+
+ if (checkFPAtomicToDenormModeHazard(MI) > 0)
+ return NoopHazard;
+
+ if (ST.hasNoDataDepHazard())
+ return NoHazard;
+
if (SIInstrInfo::isVALU(*MI) && checkVALUHazards(MI) > 0)
return NoopHazard;
@@ -145,10 +178,16 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
checkReadM0Hazards(MI) > 0)
return NoopHazard;
- if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(*MI) &&
+ if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI) &&
checkReadM0Hazards(MI) > 0)
return NoopHazard;
+ if (SIInstrInfo::isMAI(*MI) && checkMAIHazards(MI) > 0)
+ return NoopHazard;
+
+ if ((MI->mayLoad() || MI->mayStore()) && checkMAILdStHazards(MI) > 0)
+ return NoopHazard;
+
if (MI->isInlineAsm() && checkInlineAsmHazards(MI) > 0)
return NoopHazard;
@@ -158,22 +197,74 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
return NoHazard;
}
+static void insertNoopInBundle(MachineInstr *MI, const SIInstrInfo &TII) {
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII.get(AMDGPU::S_NOP))
+ .addImm(0);
+}
+
+void GCNHazardRecognizer::processBundle() {
+ MachineBasicBlock::instr_iterator MI = std::next(CurrCycleInstr->getIterator());
+ MachineBasicBlock::instr_iterator E = CurrCycleInstr->getParent()->instr_end();
+ // Check bundled MachineInstr's for hazards.
+ for (; MI != E && MI->isInsideBundle(); ++MI) {
+ CurrCycleInstr = &*MI;
+ unsigned WaitStates = PreEmitNoopsCommon(CurrCycleInstr);
+
+ if (IsHazardRecognizerMode)
+ fixHazards(CurrCycleInstr);
+
+ for (unsigned i = 0; i < WaitStates; ++i)
+ insertNoopInBundle(CurrCycleInstr, TII);
+
+ // It’s unnecessary to track more than MaxLookAhead instructions. Since we
+ // include the bundled MI directly after, only add a maximum of
+ // (MaxLookAhead - 1) noops to EmittedInstrs.
+ for (unsigned i = 0, e = std::min(WaitStates, MaxLookAhead - 1); i < e; ++i)
+ EmittedInstrs.push_front(nullptr);
+
+ EmittedInstrs.push_front(CurrCycleInstr);
+ EmittedInstrs.resize(MaxLookAhead);
+ }
+ CurrCycleInstr = nullptr;
+}
+
unsigned GCNHazardRecognizer::PreEmitNoops(SUnit *SU) {
- return PreEmitNoops(SU->getInstr());
+ IsHazardRecognizerMode = false;
+ return PreEmitNoopsCommon(SU->getInstr());
}
unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) {
+ IsHazardRecognizerMode = true;
+ CurrCycleInstr = MI;
+ unsigned W = PreEmitNoopsCommon(MI);
+ fixHazards(MI);
+ CurrCycleInstr = nullptr;
+ return W;
+}
+
+unsigned GCNHazardRecognizer::PreEmitNoopsCommon(MachineInstr *MI) {
+ if (MI->isBundle())
+ return 0;
+
int WaitStates = std::max(0, checkAnyInstHazards(MI));
if (SIInstrInfo::isSMRD(*MI))
return std::max(WaitStates, checkSMRDHazards(MI));
- if (SIInstrInfo::isVALU(*MI))
- WaitStates = std::max(WaitStates, checkVALUHazards(MI));
-
if (SIInstrInfo::isVMEM(*MI) || SIInstrInfo::isFLAT(*MI))
WaitStates = std::max(WaitStates, checkVMEMHazards(MI));
+ if (ST.hasNSAtoVMEMBug())
+ WaitStates = std::max(WaitStates, checkNSAtoVMEMHazard(MI));
+
+ WaitStates = std::max(WaitStates, checkFPAtomicToDenormModeHazard(MI));
+
+ if (ST.hasNoDataDepHazard())
+ return WaitStates;
+
+ if (SIInstrInfo::isVALU(*MI))
+ WaitStates = std::max(WaitStates, checkVALUHazards(MI));
+
if (SIInstrInfo::isDPP(*MI))
WaitStates = std::max(WaitStates, checkDPPHazards(MI));
@@ -199,9 +290,15 @@ unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) {
isSMovRel(MI->getOpcode())))
return std::max(WaitStates, checkReadM0Hazards(MI));
- if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(*MI))
+ if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI))
return std::max(WaitStates, checkReadM0Hazards(MI));
+ if (SIInstrInfo::isMAI(*MI))
+ return std::max(WaitStates, checkMAIHazards(MI));
+
+ if (MI->mayLoad() || MI->mayStore())
+ return std::max(WaitStates, checkMAILdStHazards(MI));
+
return WaitStates;
}
@@ -218,10 +315,14 @@ void GCNHazardRecognizer::AdvanceCycle() {
// Do not track non-instructions which do not affect the wait states.
// If included, these instructions can lead to buffer overflow such that
// detectable hazards are missed.
- if (CurrCycleInstr->getOpcode() == AMDGPU::IMPLICIT_DEF)
+ if (CurrCycleInstr->isImplicitDef() || CurrCycleInstr->isDebugInstr() ||
+ CurrCycleInstr->isKill())
return;
- else if (CurrCycleInstr->isDebugInstr())
+
+ if (CurrCycleInstr->isBundle()) {
+ processBundle();
return;
+ }
unsigned NumWaitStates = TII.getNumWaitStates(*CurrCycleInstr);
@@ -252,41 +353,112 @@ void GCNHazardRecognizer::RecedeCycle() {
// Helper Functions
//===----------------------------------------------------------------------===//
-int GCNHazardRecognizer::getWaitStatesSince(
- function_ref<bool(MachineInstr *)> IsHazard) {
+typedef function_ref<bool(MachineInstr *, int WaitStates)> IsExpiredFn;
+
+// Returns a minimum wait states since \p I walking all predecessors.
+// Only scans until \p IsExpired does not return true.
+// Can only be run in a hazard recognizer mode.
+static int getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard,
+ MachineBasicBlock *MBB,
+ MachineBasicBlock::reverse_instr_iterator I,
+ int WaitStates,
+ IsExpiredFn IsExpired,
+ DenseSet<const MachineBasicBlock *> &Visited) {
+ for (auto E = MBB->instr_rend(); I != E; ++I) {
+ // Don't add WaitStates for parent BUNDLE instructions.
+ if (I->isBundle())
+ continue;
+
+ if (IsHazard(&*I))
+ return WaitStates;
+
+ if (I->isInlineAsm() || I->isImplicitDef() || I->isDebugInstr())
+ continue;
+
+ WaitStates += SIInstrInfo::getNumWaitStates(*I);
+
+ if (IsExpired(&*I, WaitStates))
+ return std::numeric_limits<int>::max();
+ }
+
+ int MinWaitStates = WaitStates;
+ bool Found = false;
+ for (MachineBasicBlock *Pred : MBB->predecessors()) {
+ if (!Visited.insert(Pred).second)
+ continue;
+
+ int W = getWaitStatesSince(IsHazard, Pred, Pred->instr_rbegin(),
+ WaitStates, IsExpired, Visited);
+
+ if (W == std::numeric_limits<int>::max())
+ continue;
+
+ MinWaitStates = Found ? std::min(MinWaitStates, W) : W;
+ if (IsExpired(nullptr, MinWaitStates))
+ return MinWaitStates;
+
+ Found = true;
+ }
+
+ if (Found)
+ return MinWaitStates;
+
+ return std::numeric_limits<int>::max();
+}
+
+static int getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard,
+ MachineInstr *MI,
+ IsExpiredFn IsExpired) {
+ DenseSet<const MachineBasicBlock *> Visited;
+ return getWaitStatesSince(IsHazard, MI->getParent(),
+ std::next(MI->getReverseIterator()),
+ 0, IsExpired, Visited);
+}
+
+int GCNHazardRecognizer::getWaitStatesSince(IsHazardFn IsHazard, int Limit) {
+ if (IsHazardRecognizerMode) {
+ auto IsExpiredFn = [Limit] (MachineInstr *, int WaitStates) {
+ return WaitStates >= Limit;
+ };
+ return ::getWaitStatesSince(IsHazard, CurrCycleInstr, IsExpiredFn);
+ }
+
int WaitStates = 0;
for (MachineInstr *MI : EmittedInstrs) {
if (MI) {
if (IsHazard(MI))
return WaitStates;
- unsigned Opcode = MI->getOpcode();
- if (Opcode == AMDGPU::INLINEASM)
+ if (MI->isInlineAsm())
continue;
}
++WaitStates;
+
+ if (WaitStates >= Limit)
+ break;
}
return std::numeric_limits<int>::max();
}
-int GCNHazardRecognizer::getWaitStatesSinceDef(
- unsigned Reg, function_ref<bool(MachineInstr *)> IsHazardDef) {
+int GCNHazardRecognizer::getWaitStatesSinceDef(unsigned Reg,
+ IsHazardFn IsHazardDef,
+ int Limit) {
const SIRegisterInfo *TRI = ST.getRegisterInfo();
auto IsHazardFn = [IsHazardDef, TRI, Reg] (MachineInstr *MI) {
return IsHazardDef(MI) && MI->modifiesRegister(Reg, TRI);
};
- return getWaitStatesSince(IsHazardFn);
+ return getWaitStatesSince(IsHazardFn, Limit);
}
-int GCNHazardRecognizer::getWaitStatesSinceSetReg(
- function_ref<bool(MachineInstr *)> IsHazard) {
+int GCNHazardRecognizer::getWaitStatesSinceSetReg(IsHazardFn IsHazard,
+ int Limit) {
auto IsHazardFn = [IsHazard] (MachineInstr *MI) {
return isSSetReg(MI->getOpcode()) && IsHazard(MI);
};
- return getWaitStatesSince(IsHazardFn);
+ return getWaitStatesSince(IsHazardFn, Limit);
}
//===----------------------------------------------------------------------===//
@@ -328,9 +500,9 @@ int GCNHazardRecognizer::checkSoftClauseHazards(MachineInstr *MEM) {
// instructions in this group may return out of order and/or may be
// replayed (i.e. the same instruction issued more than once).
//
- // In order to handle these situations correctly we need to make sure
- // that when a clause has more than one instruction, no instruction in the
- // clause writes to a register that is read another instruction in the clause
+ // In order to handle these situations correctly we need to make sure that
+ // when a clause has more than one instruction, no instruction in the clause
+ // writes to a register that is read by another instruction in the clause
// (including itself). If we encounter this situaion, we need to break the
// clause by inserting a non SMEM instruction.
@@ -363,13 +535,12 @@ int GCNHazardRecognizer::checkSoftClauseHazards(MachineInstr *MEM) {
}
int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) {
- const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
int WaitStatesNeeded = 0;
WaitStatesNeeded = checkSoftClauseHazards(SMRD);
// This SMRD hazard only affects SI.
- if (ST.getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS)
+ if (!ST.hasSMRDReadVALUDefHazard())
return WaitStatesNeeded;
// A read of an SGPR by SMRD instruction requires 4 wait states when the
@@ -384,7 +555,8 @@ int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) {
if (!Use.isReg())
continue;
int WaitStatesNeededForUse =
- SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn);
+ SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn,
+ SmrdSgprWaitStates);
WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
// This fixes what appears to be undocumented hardware behavior in SI where
@@ -397,7 +569,8 @@ int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) {
if (IsBufferSMRD) {
int WaitStatesNeededForUse =
SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(),
- IsBufferHazardDefFn);
+ IsBufferHazardDefFn,
+ SmrdSgprWaitStates);
WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
}
}
@@ -406,7 +579,7 @@ int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) {
}
int GCNHazardRecognizer::checkVMEMHazards(MachineInstr* VMEM) {
- if (ST.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS)
+ if (!ST.hasVMEMReadSGPRVALUDefHazard())
return 0;
int WaitStatesNeeded = checkSoftClauseHazards(VMEM);
@@ -415,13 +588,13 @@ int GCNHazardRecognizer::checkVMEMHazards(MachineInstr* VMEM) {
// SGPR was written by a VALU Instruction.
const int VmemSgprWaitStates = 5;
auto IsHazardDefFn = [this] (MachineInstr *MI) { return TII.isVALU(*MI); };
-
for (const MachineOperand &Use : VMEM->uses()) {
if (!Use.isReg() || TRI.isVGPR(MF.getRegInfo(), Use.getReg()))
continue;
int WaitStatesNeededForUse =
- VmemSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn);
+ VmemSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn,
+ VmemSgprWaitStates);
WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
}
return WaitStatesNeeded;
@@ -441,13 +614,16 @@ int GCNHazardRecognizer::checkDPPHazards(MachineInstr *DPP) {
if (!Use.isReg() || !TRI->isVGPR(MF.getRegInfo(), Use.getReg()))
continue;
int WaitStatesNeededForUse =
- DppVgprWaitStates - getWaitStatesSinceDef(Use.getReg());
+ DppVgprWaitStates - getWaitStatesSinceDef(Use.getReg(),
+ [](MachineInstr *) { return true; },
+ DppVgprWaitStates);
WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
}
WaitStatesNeeded = std::max(
WaitStatesNeeded,
- DppExecWaitStates - getWaitStatesSinceDef(AMDGPU::EXEC, IsHazardDefFn));
+ DppExecWaitStates - getWaitStatesSinceDef(AMDGPU::EXEC, IsHazardDefFn,
+ DppExecWaitStates));
return WaitStatesNeeded;
}
@@ -459,7 +635,8 @@ int GCNHazardRecognizer::checkDivFMasHazards(MachineInstr *DivFMas) {
// instruction.
const int DivFMasWaitStates = 4;
auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); };
- int WaitStatesNeeded = getWaitStatesSinceDef(AMDGPU::VCC, IsHazardDefFn);
+ int WaitStatesNeeded = getWaitStatesSinceDef(AMDGPU::VCC, IsHazardDefFn,
+ DivFMasWaitStates);
return DivFMasWaitStates - WaitStatesNeeded;
}
@@ -472,7 +649,7 @@ int GCNHazardRecognizer::checkGetRegHazards(MachineInstr *GetRegInstr) {
auto IsHazardFn = [TII, GetRegHWReg] (MachineInstr *MI) {
return GetRegHWReg == getHWReg(TII, *MI);
};
- int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn);
+ int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, GetRegWaitStates);
return GetRegWaitStates - WaitStatesNeeded;
}
@@ -481,12 +658,11 @@ int GCNHazardRecognizer::checkSetRegHazards(MachineInstr *SetRegInstr) {
const SIInstrInfo *TII = ST.getInstrInfo();
unsigned HWReg = getHWReg(TII, *SetRegInstr);
- const int SetRegWaitStates =
- ST.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS ? 1 : 2;
+ const int SetRegWaitStates = ST.getSetRegWaitStates();
auto IsHazardFn = [TII, HWReg] (MachineInstr *MI) {
return HWReg == getHWReg(TII, *MI);
};
- int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn);
+ int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, SetRegWaitStates);
return SetRegWaitStates - WaitStatesNeeded;
}
@@ -557,7 +733,7 @@ int GCNHazardRecognizer::checkVALUHazardsHelper(const MachineOperand &Def,
TRI->regsOverlap(MI->getOperand(DataIdx).getReg(), Reg);
};
int WaitStatesNeededForDef =
- VALUWaitStates - getWaitStatesSince(IsHazardFn);
+ VALUWaitStates - getWaitStatesSince(IsHazardFn, VALUWaitStates);
WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef);
return WaitStatesNeeded;
@@ -622,12 +798,13 @@ int GCNHazardRecognizer::checkRWLaneHazards(MachineInstr *RWLane) {
};
const int RWLaneWaitStates = 4;
- int WaitStatesSince = getWaitStatesSinceDef(LaneSelectReg, IsHazardFn);
+ int WaitStatesSince = getWaitStatesSinceDef(LaneSelectReg, IsHazardFn,
+ RWLaneWaitStates);
return RWLaneWaitStates - WaitStatesSince;
}
int GCNHazardRecognizer::checkRFEHazards(MachineInstr *RFE) {
- if (ST.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS)
+ if (!ST.hasRFEHazards())
return 0;
const SIInstrInfo *TII = ST.getInstrInfo();
@@ -637,7 +814,7 @@ int GCNHazardRecognizer::checkRFEHazards(MachineInstr *RFE) {
auto IsHazardFn = [TII] (MachineInstr *MI) {
return getHWReg(TII, *MI) == AMDGPU::Hwreg::ID_TRAPSTS;
};
- int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn);
+ int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn, RFEWaitStates);
return RFEWaitStates - WaitStatesNeeded;
}
@@ -661,7 +838,8 @@ int GCNHazardRecognizer::checkAnyInstHazards(MachineInstr *MI) {
return MI->getOpcode() == AMDGPU::S_MOV_FED_B32;
};
int WaitStatesNeededForUse =
- MovFedWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardFn);
+ MovFedWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardFn,
+ MovFedWaitStates);
WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
}
@@ -674,5 +852,557 @@ int GCNHazardRecognizer::checkReadM0Hazards(MachineInstr *MI) {
auto IsHazardFn = [TII] (MachineInstr *MI) {
return TII->isSALU(*MI);
};
- return SMovRelWaitStates - getWaitStatesSinceDef(AMDGPU::M0, IsHazardFn);
+ return SMovRelWaitStates - getWaitStatesSinceDef(AMDGPU::M0, IsHazardFn,
+ SMovRelWaitStates);
+}
+
+void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
+ fixVMEMtoScalarWriteHazards(MI);
+ fixVcmpxPermlaneHazards(MI);
+ fixSMEMtoVectorWriteHazards(MI);
+ fixVcmpxExecWARHazard(MI);
+ fixLdsBranchVmemWARHazard(MI);
+}
+
+bool GCNHazardRecognizer::fixVcmpxPermlaneHazards(MachineInstr *MI) {
+ if (!ST.hasVcmpxPermlaneHazard() || !isPermlane(*MI))
+ return false;
+
+ const SIInstrInfo *TII = ST.getInstrInfo();
+ auto IsHazardFn = [TII] (MachineInstr *MI) {
+ return TII->isVOPC(*MI);
+ };
+
+ auto IsExpiredFn = [] (MachineInstr *MI, int) {
+ if (!MI)
+ return false;
+ unsigned Opc = MI->getOpcode();
+ return SIInstrInfo::isVALU(*MI) &&
+ Opc != AMDGPU::V_NOP_e32 &&
+ Opc != AMDGPU::V_NOP_e64 &&
+ Opc != AMDGPU::V_NOP_sdwa;
+ };
+
+ if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
+ std::numeric_limits<int>::max())
+ return false;
+
+ // V_NOP will be discarded by SQ.
+ // Use V_MOB_B32 v?, v?. Register must be alive so use src0 of V_PERMLANE*
+ // which is always a VGPR and available.
+ auto *Src0 = TII->getNamedOperand(*MI, AMDGPU::OpName::src0);
+ unsigned Reg = Src0->getReg();
+ bool IsUndef = Src0->isUndef();
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+ TII->get(AMDGPU::V_MOV_B32_e32))
+ .addReg(Reg, RegState::Define | (IsUndef ? RegState::Dead : 0))
+ .addReg(Reg, IsUndef ? RegState::Undef : RegState::Kill);
+
+ return true;
+}
+
+bool GCNHazardRecognizer::fixVMEMtoScalarWriteHazards(MachineInstr *MI) {
+ if (!ST.hasVMEMtoScalarWriteHazard())
+ return false;
+
+ if (!SIInstrInfo::isSALU(*MI) && !SIInstrInfo::isSMRD(*MI))
+ return false;
+
+ if (MI->getNumDefs() == 0)
+ return false;
+
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
+
+ auto IsHazardFn = [TRI, MI] (MachineInstr *I) {
+ if (!SIInstrInfo::isVMEM(*I) && !SIInstrInfo::isDS(*I) &&
+ !SIInstrInfo::isFLAT(*I))
+ return false;
+
+ for (const MachineOperand &Def : MI->defs()) {
+ MachineOperand *Op = I->findRegisterUseOperand(Def.getReg(), false, TRI);
+ if (!Op)
+ continue;
+ return true;
+ }
+ return false;
+ };
+
+ auto IsExpiredFn = [] (MachineInstr *MI, int) {
+ return MI && (SIInstrInfo::isVALU(*MI) ||
+ (MI->getOpcode() == AMDGPU::S_WAITCNT &&
+ !MI->getOperand(0).getImm()));
+ };
+
+ if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
+ std::numeric_limits<int>::max())
+ return false;
+
+ const SIInstrInfo *TII = ST.getInstrInfo();
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(AMDGPU::V_NOP_e32));
+ return true;
+}
+
+bool GCNHazardRecognizer::fixSMEMtoVectorWriteHazards(MachineInstr *MI) {
+ if (!ST.hasSMEMtoVectorWriteHazard())
+ return false;
+
+ if (!SIInstrInfo::isVALU(*MI))
+ return false;
+
+ unsigned SDSTName;
+ switch (MI->getOpcode()) {
+ case AMDGPU::V_READLANE_B32:
+ case AMDGPU::V_READFIRSTLANE_B32:
+ SDSTName = AMDGPU::OpName::vdst;
+ break;
+ default:
+ SDSTName = AMDGPU::OpName::sdst;
+ break;
+ }
+
+ const SIInstrInfo *TII = ST.getInstrInfo();
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
+ const AMDGPU::IsaVersion IV = AMDGPU::getIsaVersion(ST.getCPU());
+ const MachineOperand *SDST = TII->getNamedOperand(*MI, SDSTName);
+ if (!SDST) {
+ for (const auto &MO : MI->implicit_operands()) {
+ if (MO.isDef() && TRI->isSGPRClass(TRI->getPhysRegClass(MO.getReg()))) {
+ SDST = &MO;
+ break;
+ }
+ }
+ }
+
+ if (!SDST)
+ return false;
+
+ const unsigned SDSTReg = SDST->getReg();
+ auto IsHazardFn = [SDSTReg, TRI] (MachineInstr *I) {
+ return SIInstrInfo::isSMRD(*I) && I->readsRegister(SDSTReg, TRI);
+ };
+
+ auto IsExpiredFn = [TII, IV] (MachineInstr *MI, int) {
+ if (MI) {
+ if (TII->isSALU(*MI)) {
+ switch (MI->getOpcode()) {
+ case AMDGPU::S_SETVSKIP:
+ case AMDGPU::S_VERSION:
+ case AMDGPU::S_WAITCNT_VSCNT:
+ case AMDGPU::S_WAITCNT_VMCNT:
+ case AMDGPU::S_WAITCNT_EXPCNT:
+ // These instructions cannot not mitigate the hazard.
+ return false;
+ case AMDGPU::S_WAITCNT_LGKMCNT:
+ // Reducing lgkmcnt count to 0 always mitigates the hazard.
+ return (MI->getOperand(1).getImm() == 0) &&
+ (MI->getOperand(0).getReg() == AMDGPU::SGPR_NULL);
+ case AMDGPU::S_WAITCNT: {
+ const int64_t Imm = MI->getOperand(0).getImm();
+ AMDGPU::Waitcnt Decoded = AMDGPU::decodeWaitcnt(IV, Imm);
+ return (Decoded.LgkmCnt == 0);
+ }
+ default:
+ // SOPP instructions cannot mitigate the hazard.
+ if (TII->isSOPP(*MI))
+ return false;
+ // At this point the SALU can be assumed to mitigate the hazard
+ // because either:
+ // (a) it is independent of the at risk SMEM (breaking chain),
+ // or
+ // (b) it is dependent on the SMEM, in which case an appropriate
+ // s_waitcnt lgkmcnt _must_ exist between it and the at risk
+ // SMEM instruction.
+ return true;
+ }
+ }
+ }
+ return false;
+ };
+
+ if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
+ std::numeric_limits<int>::max())
+ return false;
+
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+ TII->get(AMDGPU::S_MOV_B32), AMDGPU::SGPR_NULL)
+ .addImm(0);
+ return true;
+}
+
+bool GCNHazardRecognizer::fixVcmpxExecWARHazard(MachineInstr *MI) {
+ if (!ST.hasVcmpxExecWARHazard() || !SIInstrInfo::isVALU(*MI))
+ return false;
+
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
+ if (!MI->modifiesRegister(AMDGPU::EXEC, TRI))
+ return false;
+
+ auto IsHazardFn = [TRI] (MachineInstr *I) {
+ if (SIInstrInfo::isVALU(*I))
+ return false;
+ return I->readsRegister(AMDGPU::EXEC, TRI);
+ };
+
+ const SIInstrInfo *TII = ST.getInstrInfo();
+ auto IsExpiredFn = [TII, TRI] (MachineInstr *MI, int) {
+ if (!MI)
+ return false;
+ if (SIInstrInfo::isVALU(*MI)) {
+ if (TII->getNamedOperand(*MI, AMDGPU::OpName::sdst))
+ return true;
+ for (auto MO : MI->implicit_operands())
+ if (MO.isDef() && TRI->isSGPRClass(TRI->getPhysRegClass(MO.getReg())))
+ return true;
+ }
+ if (MI->getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
+ (MI->getOperand(0).getImm() & 0xfffe) == 0xfffe)
+ return true;
+ return false;
+ };
+
+ if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
+ std::numeric_limits<int>::max())
+ return false;
+
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+ TII->get(AMDGPU::S_WAITCNT_DEPCTR))
+ .addImm(0xfffe);
+ return true;
+}
+
+bool GCNHazardRecognizer::fixLdsBranchVmemWARHazard(MachineInstr *MI) {
+ if (!ST.hasLdsBranchVmemWARHazard())
+ return false;
+
+ auto IsHazardInst = [] (const MachineInstr *MI) {
+ if (SIInstrInfo::isDS(*MI))
+ return 1;
+ if (SIInstrInfo::isVMEM(*MI) || SIInstrInfo::isSegmentSpecificFLAT(*MI))
+ return 2;
+ return 0;
+ };
+
+ auto InstType = IsHazardInst(MI);
+ if (!InstType)
+ return false;
+
+ auto IsExpiredFn = [&IsHazardInst] (MachineInstr *I, int) {
+ return I && (IsHazardInst(I) ||
+ (I->getOpcode() == AMDGPU::S_WAITCNT_VSCNT &&
+ I->getOperand(0).getReg() == AMDGPU::SGPR_NULL &&
+ !I->getOperand(1).getImm()));
+ };
+
+ auto IsHazardFn = [InstType, &IsHazardInst] (MachineInstr *I) {
+ if (!I->isBranch())
+ return false;
+
+ auto IsHazardFn = [InstType, IsHazardInst] (MachineInstr *I) {
+ auto InstType2 = IsHazardInst(I);
+ return InstType2 && InstType != InstType2;
+ };
+
+ auto IsExpiredFn = [InstType, &IsHazardInst] (MachineInstr *I, int) {
+ if (!I)
+ return false;
+
+ auto InstType2 = IsHazardInst(I);
+ if (InstType == InstType2)
+ return true;
+
+ return I->getOpcode() == AMDGPU::S_WAITCNT_VSCNT &&
+ I->getOperand(0).getReg() == AMDGPU::SGPR_NULL &&
+ !I->getOperand(1).getImm();
+ };
+
+ return ::getWaitStatesSince(IsHazardFn, I, IsExpiredFn) !=
+ std::numeric_limits<int>::max();
+ };
+
+ if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
+ std::numeric_limits<int>::max())
+ return false;
+
+ const SIInstrInfo *TII = ST.getInstrInfo();
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+ TII->get(AMDGPU::S_WAITCNT_VSCNT))
+ .addReg(AMDGPU::SGPR_NULL, RegState::Undef)
+ .addImm(0);
+
+ return true;
+}
+
+int GCNHazardRecognizer::checkNSAtoVMEMHazard(MachineInstr *MI) {
+ int NSAtoVMEMWaitStates = 1;
+
+ if (!ST.hasNSAtoVMEMBug())
+ return 0;
+
+ if (!SIInstrInfo::isMUBUF(*MI) && !SIInstrInfo::isMTBUF(*MI))
+ return 0;
+
+ const SIInstrInfo *TII = ST.getInstrInfo();
+ const auto *Offset = TII->getNamedOperand(*MI, AMDGPU::OpName::offset);
+ if (!Offset || (Offset->getImm() & 6) == 0)
+ return 0;
+
+ auto IsHazardFn = [TII] (MachineInstr *I) {
+ if (!SIInstrInfo::isMIMG(*I))
+ return false;
+ const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(I->getOpcode());
+ return Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA &&
+ TII->getInstSizeInBytes(*I) >= 16;
+ };
+
+ return NSAtoVMEMWaitStates - getWaitStatesSince(IsHazardFn, 1);
+}
+
+int GCNHazardRecognizer::checkFPAtomicToDenormModeHazard(MachineInstr *MI) {
+ int FPAtomicToDenormModeWaitStates = 3;
+
+ if (MI->getOpcode() != AMDGPU::S_DENORM_MODE)
+ return 0;
+
+ auto IsHazardFn = [] (MachineInstr *I) {
+ if (!SIInstrInfo::isVMEM(*I) && !SIInstrInfo::isFLAT(*I))
+ return false;
+ return SIInstrInfo::isFPAtomic(*I);
+ };
+
+ auto IsExpiredFn = [] (MachineInstr *MI, int WaitStates) {
+ if (WaitStates >= 3 || SIInstrInfo::isVALU(*MI))
+ return true;
+
+ switch (MI->getOpcode()) {
+ case AMDGPU::S_WAITCNT:
+ case AMDGPU::S_WAITCNT_VSCNT:
+ case AMDGPU::S_WAITCNT_VMCNT:
+ case AMDGPU::S_WAITCNT_EXPCNT:
+ case AMDGPU::S_WAITCNT_LGKMCNT:
+ case AMDGPU::S_WAITCNT_IDLE:
+ return true;
+ default:
+ break;
+ }
+
+ return false;
+ };
+
+
+ return FPAtomicToDenormModeWaitStates -
+ ::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn);
+}
+
+int GCNHazardRecognizer::checkMAIHazards(MachineInstr *MI) {
+ assert(SIInstrInfo::isMAI(*MI));
+
+ int WaitStatesNeeded = 0;
+ unsigned Opc = MI->getOpcode();
+
+ auto IsVALUFn = [] (MachineInstr *MI) {
+ return SIInstrInfo::isVALU(*MI);
+ };
+
+ if (Opc != AMDGPU::V_ACCVGPR_READ_B32) { // MFMA or v_accvgpr_write
+ const int LegacyVALUWritesVGPRWaitStates = 2;
+ const int VALUWritesExecWaitStates = 4;
+ const int MaxWaitStates = 4;
+
+ int WaitStatesNeededForUse = VALUWritesExecWaitStates -
+ getWaitStatesSinceDef(AMDGPU::EXEC, IsVALUFn, MaxWaitStates);
+ WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
+
+ if (WaitStatesNeeded < MaxWaitStates) {
+ for (const MachineOperand &Use : MI->explicit_uses()) {
+ const int MaxWaitStates = 2;
+
+ if (!Use.isReg() || !TRI.isVGPR(MF.getRegInfo(), Use.getReg()))
+ continue;
+
+ int WaitStatesNeededForUse = LegacyVALUWritesVGPRWaitStates -
+ getWaitStatesSinceDef(Use.getReg(), IsVALUFn, MaxWaitStates);
+ WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
+
+ if (WaitStatesNeeded == MaxWaitStates)
+ break;
+ }
+ }
+ }
+
+ auto IsMFMAFn = [] (MachineInstr *MI) {
+ return SIInstrInfo::isMAI(*MI) &&
+ MI->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32 &&
+ MI->getOpcode() != AMDGPU::V_ACCVGPR_READ_B32;
+ };
+
+ for (const MachineOperand &Op : MI->explicit_operands()) {
+ if (!Op.isReg() || !TRI.isAGPR(MF.getRegInfo(), Op.getReg()))
+ continue;
+
+ if (Op.isDef() && Opc != AMDGPU::V_ACCVGPR_WRITE_B32)
+ continue;
+
+ const int MFMAWritesAGPROverlappedSrcABWaitStates = 4;
+ const int MFMAWritesAGPROverlappedSrcCWaitStates = 2;
+ const int MFMA4x4WritesAGPRAccVgprReadWaitStates = 4;
+ const int MFMA16x16WritesAGPRAccVgprReadWaitStates = 10;
+ const int MFMA32x32WritesAGPRAccVgprReadWaitStates = 18;
+ const int MFMA4x4WritesAGPRAccVgprWriteWaitStates = 1;
+ const int MFMA16x16WritesAGPRAccVgprWriteWaitStates = 7;
+ const int MFMA32x32WritesAGPRAccVgprWriteWaitStates = 15;
+ const int MaxWaitStates = 18;
+ unsigned Reg = Op.getReg();
+ unsigned HazardDefLatency = 0;
+
+ auto IsOverlappedMFMAFn = [Reg, &IsMFMAFn, &HazardDefLatency, this]
+ (MachineInstr *MI) {
+ if (!IsMFMAFn(MI))
+ return false;
+ unsigned DstReg = MI->getOperand(0).getReg();
+ if (DstReg == Reg)
+ return false;
+ HazardDefLatency = std::max(HazardDefLatency,
+ TSchedModel.computeInstrLatency(MI));
+ return TRI.regsOverlap(DstReg, Reg);
+ };
+
+ int WaitStatesSinceDef = getWaitStatesSinceDef(Reg, IsOverlappedMFMAFn,
+ MaxWaitStates);
+ int NeedWaitStates = MFMAWritesAGPROverlappedSrcABWaitStates;
+ int SrcCIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
+ int OpNo = MI->getOperandNo(&Op);
+ if (OpNo == SrcCIdx) {
+ NeedWaitStates = MFMAWritesAGPROverlappedSrcCWaitStates;
+ } else if (Opc == AMDGPU::V_ACCVGPR_READ_B32) {
+ switch (HazardDefLatency) {
+ case 2: NeedWaitStates = MFMA4x4WritesAGPRAccVgprReadWaitStates;
+ break;
+ case 8: NeedWaitStates = MFMA16x16WritesAGPRAccVgprReadWaitStates;
+ break;
+ case 16: LLVM_FALLTHROUGH;
+ default: NeedWaitStates = MFMA32x32WritesAGPRAccVgprReadWaitStates;
+ break;
+ }
+ } else if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32) {
+ switch (HazardDefLatency) {
+ case 2: NeedWaitStates = MFMA4x4WritesAGPRAccVgprWriteWaitStates;
+ break;
+ case 8: NeedWaitStates = MFMA16x16WritesAGPRAccVgprWriteWaitStates;
+ break;
+ case 16: LLVM_FALLTHROUGH;
+ default: NeedWaitStates = MFMA32x32WritesAGPRAccVgprWriteWaitStates;
+ break;
+ }
+ }
+
+ int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSinceDef;
+ WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
+
+ if (WaitStatesNeeded == MaxWaitStates)
+ return WaitStatesNeeded; // Early exit.
+
+ auto IsAccVgprWriteFn = [Reg, this] (MachineInstr *MI) {
+ if (MI->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32)
+ return false;
+ unsigned DstReg = MI->getOperand(0).getReg();
+ return TRI.regsOverlap(Reg, DstReg);
+ };
+
+ const int AccVGPRWriteMFMAReadSrcCWaitStates = 1;
+ const int AccVGPRWriteMFMAReadSrcABWaitStates = 3;
+ const int AccVGPRWriteAccVgprReadWaitStates = 3;
+ NeedWaitStates = AccVGPRWriteMFMAReadSrcABWaitStates;
+ if (OpNo == SrcCIdx)
+ NeedWaitStates = AccVGPRWriteMFMAReadSrcCWaitStates;
+ else if (Opc == AMDGPU::V_ACCVGPR_READ_B32)
+ NeedWaitStates = AccVGPRWriteAccVgprReadWaitStates;
+
+ WaitStatesNeededForUse = NeedWaitStates -
+ getWaitStatesSinceDef(Reg, IsAccVgprWriteFn, MaxWaitStates);
+ WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
+
+ if (WaitStatesNeeded == MaxWaitStates)
+ return WaitStatesNeeded; // Early exit.
+ }
+
+ if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32) {
+ const int MFMA4x4ReadSrcCAccVgprWriteWaitStates = 0;
+ const int MFMA16x16ReadSrcCAccVgprWriteWaitStates = 5;
+ const int MFMA32x32ReadSrcCAccVgprWriteWaitStates = 13;
+ const int MaxWaitStates = 13;
+ unsigned DstReg = MI->getOperand(0).getReg();
+ unsigned HazardDefLatency = 0;
+
+ auto IsSrcCMFMAFn = [DstReg, &IsMFMAFn, &HazardDefLatency, this]
+ (MachineInstr *MI) {
+ if (!IsMFMAFn(MI))
+ return false;
+ unsigned Reg = TII.getNamedOperand(*MI, AMDGPU::OpName::src2)->getReg();
+ HazardDefLatency = std::max(HazardDefLatency,
+ TSchedModel.computeInstrLatency(MI));
+ return TRI.regsOverlap(Reg, DstReg);
+ };
+
+ int WaitStatesSince = getWaitStatesSince(IsSrcCMFMAFn, MaxWaitStates);
+ int NeedWaitStates;
+ switch (HazardDefLatency) {
+ case 2: NeedWaitStates = MFMA4x4ReadSrcCAccVgprWriteWaitStates;
+ break;
+ case 8: NeedWaitStates = MFMA16x16ReadSrcCAccVgprWriteWaitStates;
+ break;
+ case 16: LLVM_FALLTHROUGH;
+ default: NeedWaitStates = MFMA32x32ReadSrcCAccVgprWriteWaitStates;
+ break;
+ }
+
+ int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSince;
+ WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
+ }
+
+ return WaitStatesNeeded;
+}
+
+int GCNHazardRecognizer::checkMAILdStHazards(MachineInstr *MI) {
+ if (!ST.hasMAIInsts())
+ return 0;
+
+ int WaitStatesNeeded = 0;
+
+ auto IsAccVgprReadFn = [] (MachineInstr *MI) {
+ return MI->getOpcode() == AMDGPU::V_ACCVGPR_READ_B32;
+ };
+
+ for (const MachineOperand &Op : MI->explicit_uses()) {
+ if (!Op.isReg() || !TRI.isVGPR(MF.getRegInfo(), Op.getReg()))
+ continue;
+
+ unsigned Reg = Op.getReg();
+
+ const int AccVgprReadLdStWaitStates = 2;
+ const int VALUWriteAccVgprReadLdStDepVALUWaitStates = 1;
+ const int MaxWaitStates = 2;
+
+ int WaitStatesNeededForUse = AccVgprReadLdStWaitStates -
+ getWaitStatesSinceDef(Reg, IsAccVgprReadFn, MaxWaitStates);
+ WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
+
+ if (WaitStatesNeeded == MaxWaitStates)
+ return WaitStatesNeeded; // Early exit.
+
+ auto IsVALUAccVgprReadCheckFn = [Reg, this] (MachineInstr *MI) {
+ if (MI->getOpcode() != AMDGPU::V_ACCVGPR_READ_B32)
+ return false;
+ auto IsVALUFn = [] (MachineInstr *MI) {
+ return SIInstrInfo::isVALU(*MI) && !SIInstrInfo::isMAI(*MI);
+ };
+ return getWaitStatesSinceDef(Reg, IsVALUFn, 2 /*MaxWaitStates*/) <
+ std::numeric_limits<int>::max();
+ };
+
+ WaitStatesNeededForUse = VALUWriteAccVgprReadLdStDepVALUWaitStates -
+ getWaitStatesSince(IsVALUAccVgprReadCheckFn, MaxWaitStates);
+ WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
+ }
+
+ return WaitStatesNeeded;
}
diff --git a/lib/Target/AMDGPU/GCNHazardRecognizer.h b/lib/Target/AMDGPU/GCNHazardRecognizer.h
index ca17e7cb6018..6aa2e70dfbfb 100644
--- a/lib/Target/AMDGPU/GCNHazardRecognizer.h
+++ b/lib/Target/AMDGPU/GCNHazardRecognizer.h
@@ -1,9 +1,8 @@
//===-- GCNHazardRecognizers.h - GCN Hazard Recognizers ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -17,6 +16,7 @@
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/TargetSchedule.h"
#include <list>
namespace llvm {
@@ -31,6 +31,13 @@ class SIRegisterInfo;
class GCNSubtarget;
class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
+public:
+ typedef function_ref<bool(MachineInstr *)> IsHazardFn;
+
+private:
+ // Distinguish if we are called from scheduler or hazard recognizer
+ bool IsHazardRecognizerMode;
+
// This variable stores the instruction that has been emitted this cycle. It
// will be added to EmittedInstrs, when AdvanceCycle() or RecedeCycle() is
// called.
@@ -40,6 +47,7 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
const GCNSubtarget &ST;
const SIInstrInfo &TII;
const SIRegisterInfo &TRI;
+ TargetSchedModel TSchedModel;
/// RegUnits of uses in the current soft memory clause.
BitVector ClauseUses;
@@ -54,11 +62,13 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
void addClauseInst(const MachineInstr &MI);
- int getWaitStatesSince(function_ref<bool(MachineInstr *)> IsHazard);
- int getWaitStatesSinceDef(unsigned Reg,
- function_ref<bool(MachineInstr *)> IsHazardDef =
- [](MachineInstr *) { return true; });
- int getWaitStatesSinceSetReg(function_ref<bool(MachineInstr *)> IsHazard);
+ // Advance over a MachineInstr bundle. Look for hazards in the bundled
+ // instructions.
+ void processBundle();
+
+ int getWaitStatesSince(IsHazardFn IsHazard, int Limit);
+ int getWaitStatesSinceDef(unsigned Reg, IsHazardFn IsHazardDef, int Limit);
+ int getWaitStatesSinceSetReg(IsHazardFn IsHazard, int Limit);
int checkSoftClauseHazards(MachineInstr *SMEM);
int checkSMRDHazards(MachineInstr *SMRD);
@@ -75,6 +85,18 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
int checkInlineAsmHazards(MachineInstr *IA);
int checkAnyInstHazards(MachineInstr *MI);
int checkReadM0Hazards(MachineInstr *SMovRel);
+ int checkNSAtoVMEMHazard(MachineInstr *MI);
+ int checkFPAtomicToDenormModeHazard(MachineInstr *MI);
+ void fixHazards(MachineInstr *MI);
+ bool fixVcmpxPermlaneHazards(MachineInstr *MI);
+ bool fixVMEMtoScalarWriteHazards(MachineInstr *MI);
+ bool fixSMEMtoVectorWriteHazards(MachineInstr *MI);
+ bool fixVcmpxExecWARHazard(MachineInstr *MI);
+ bool fixLdsBranchVmemWARHazard(MachineInstr *MI);
+
+ int checkMAIHazards(MachineInstr *MI);
+ int checkMAILdStHazards(MachineInstr *MI);
+
public:
GCNHazardRecognizer(const MachineFunction &MF);
// We can only issue one instruction per cycle.
@@ -85,6 +107,7 @@ public:
void EmitNoop() override;
unsigned PreEmitNoops(SUnit *SU) override;
unsigned PreEmitNoops(MachineInstr *) override;
+ unsigned PreEmitNoopsCommon(MachineInstr *);
void AdvanceCycle() override;
void RecedeCycle() override;
};
diff --git a/lib/Target/AMDGPU/GCNILPSched.cpp b/lib/Target/AMDGPU/GCNILPSched.cpp
index d62dc8d86781..1eb617640c32 100644
--- a/lib/Target/AMDGPU/GCNILPSched.cpp
+++ b/lib/Target/AMDGPU/GCNILPSched.cpp
@@ -1,9 +1,8 @@
//===---------------------------- GCNILPSched.cpp - -----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AMDGPU/GCNIterativeScheduler.cpp b/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
index 8e4cc391dc21..3525174223bd 100644
--- a/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
+++ b/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
@@ -1,9 +1,8 @@
//===- GCNIterativeScheduler.cpp ------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AMDGPU/GCNIterativeScheduler.h b/lib/Target/AMDGPU/GCNIterativeScheduler.h
index 14ef5147f32a..e6f83914af5b 100644
--- a/lib/Target/AMDGPU/GCNIterativeScheduler.h
+++ b/lib/Target/AMDGPU/GCNIterativeScheduler.h
@@ -1,9 +1,8 @@
//===- GCNIterativeScheduler.h - GCN Scheduler ------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AMDGPU/GCNMinRegStrategy.cpp b/lib/Target/AMDGPU/GCNMinRegStrategy.cpp
index ec6bcae33555..c469cf290e26 100644
--- a/lib/Target/AMDGPU/GCNMinRegStrategy.cpp
+++ b/lib/Target/AMDGPU/GCNMinRegStrategy.cpp
@@ -1,9 +1,8 @@
//===- GCNMinRegStrategy.cpp ----------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AMDGPU/GCNNSAReassign.cpp b/lib/Target/AMDGPU/GCNNSAReassign.cpp
new file mode 100644
index 000000000000..51c4c99cfb18
--- /dev/null
+++ b/lib/Target/AMDGPU/GCNNSAReassign.cpp
@@ -0,0 +1,343 @@
+//===-- GCNNSAReassign.cpp - Reassign registers in NSA unstructions -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Try to reassign registers on GFX10+ from non-sequential to sequential
+/// in NSA image instructions. Later SIShrinkInstructions pass will relace NSA
+/// with sequential versions where possible.
+///
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "SIInstrInfo.h"
+#include "SIMachineFunctionInfo.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/LiveRegMatrix.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/Support/MathExtras.h"
+#include <algorithm>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "amdgpu-nsa-reassign"
+
+STATISTIC(NumNSAInstructions,
+ "Number of NSA instructions with non-sequential address found");
+STATISTIC(NumNSAConverted,
+ "Number of NSA instructions changed to sequential");
+
+namespace {
+
+class GCNNSAReassign : public MachineFunctionPass {
+public:
+ static char ID;
+
+ GCNNSAReassign() : MachineFunctionPass(ID) {
+ initializeGCNNSAReassignPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ StringRef getPassName() const override { return "GCN NSA Reassign"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<LiveIntervals>();
+ AU.addRequired<VirtRegMap>();
+ AU.addRequired<LiveRegMatrix>();
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+private:
+ typedef enum {
+ NOT_NSA, // Not an NSA instruction
+ FIXED, // NSA which we cannot modify
+ NON_CONTIGUOUS, // NSA with non-sequential address which we can try
+ // to optimize.
+ CONTIGUOUS // NSA with all sequential address registers
+ } NSA_Status;
+
+ const GCNSubtarget *ST;
+
+ const MachineRegisterInfo *MRI;
+
+ const SIRegisterInfo *TRI;
+
+ VirtRegMap *VRM;
+
+ LiveRegMatrix *LRM;
+
+ LiveIntervals *LIS;
+
+ unsigned MaxNumVGPRs;
+
+ const MCPhysReg *CSRegs;
+
+ NSA_Status CheckNSA(const MachineInstr &MI, bool Fast = false) const;
+
+ bool tryAssignRegisters(SmallVectorImpl<LiveInterval *> &Intervals,
+ unsigned StartReg) const;
+
+ bool canAssign(unsigned StartReg, unsigned NumRegs) const;
+
+ bool scavengeRegs(SmallVectorImpl<LiveInterval *> &Intervals) const;
+};
+
+} // End anonymous namespace.
+
+INITIALIZE_PASS_BEGIN(GCNNSAReassign, DEBUG_TYPE, "GCN NSA Reassign",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
+INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix)
+INITIALIZE_PASS_END(GCNNSAReassign, DEBUG_TYPE, "GCN NSA Reassign",
+ false, false)
+
+
+char GCNNSAReassign::ID = 0;
+
+char &llvm::GCNNSAReassignID = GCNNSAReassign::ID;
+
+bool
+GCNNSAReassign::tryAssignRegisters(SmallVectorImpl<LiveInterval *> &Intervals,
+ unsigned StartReg) const {
+ unsigned NumRegs = Intervals.size();
+
+ for (unsigned N = 0; N < NumRegs; ++N)
+ if (VRM->hasPhys(Intervals[N]->reg))
+ LRM->unassign(*Intervals[N]);
+
+ for (unsigned N = 0; N < NumRegs; ++N)
+ if (LRM->checkInterference(*Intervals[N], StartReg + N))
+ return false;
+
+ for (unsigned N = 0; N < NumRegs; ++N)
+ LRM->assign(*Intervals[N], StartReg + N);
+
+ return true;
+}
+
+bool GCNNSAReassign::canAssign(unsigned StartReg, unsigned NumRegs) const {
+ for (unsigned N = 0; N < NumRegs; ++N) {
+ unsigned Reg = StartReg + N;
+ if (!MRI->isAllocatable(Reg))
+ return false;
+
+ for (unsigned I = 0; CSRegs[I]; ++I)
+ if (TRI->isSubRegisterEq(Reg, CSRegs[I]) &&
+ !LRM->isPhysRegUsed(CSRegs[I]))
+ return false;
+ }
+
+ return true;
+}
+
+bool
+GCNNSAReassign::scavengeRegs(SmallVectorImpl<LiveInterval *> &Intervals) const {
+ unsigned NumRegs = Intervals.size();
+
+ if (NumRegs > MaxNumVGPRs)
+ return false;
+ unsigned MaxReg = MaxNumVGPRs - NumRegs + AMDGPU::VGPR0;
+
+ for (unsigned Reg = AMDGPU::VGPR0; Reg <= MaxReg; ++Reg) {
+ if (!canAssign(Reg, NumRegs))
+ continue;
+
+ if (tryAssignRegisters(Intervals, Reg))
+ return true;
+ }
+
+ return false;
+}
+
+GCNNSAReassign::NSA_Status
+GCNNSAReassign::CheckNSA(const MachineInstr &MI, bool Fast) const {
+ const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
+ if (!Info || Info->MIMGEncoding != AMDGPU::MIMGEncGfx10NSA)
+ return NSA_Status::NOT_NSA;
+
+ int VAddr0Idx =
+ AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
+
+ unsigned VgprBase = 0;
+ bool NSA = false;
+ for (unsigned I = 0; I < Info->VAddrDwords; ++I) {
+ const MachineOperand &Op = MI.getOperand(VAddr0Idx + I);
+ unsigned Reg = Op.getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg) || !VRM->isAssignedReg(Reg))
+ return NSA_Status::FIXED;
+
+ unsigned PhysReg = VRM->getPhys(Reg);
+
+ if (!Fast) {
+ if (!PhysReg)
+ return NSA_Status::FIXED;
+
+ // Bail if address is not a VGPR32. That should be possible to extend the
+ // optimization to work with subregs of a wider register tuples, but the
+ // logic to find free registers will be much more complicated with much
+ // less chances for success. That seems reasonable to assume that in most
+ // cases a tuple is used because a vector variable contains different
+ // parts of an address and it is either already consequitive or cannot
+ // be reassigned if not. If needed it is better to rely on register
+ // coalescer to process such address tuples.
+ if (MRI->getRegClass(Reg) != &AMDGPU::VGPR_32RegClass || Op.getSubReg())
+ return NSA_Status::FIXED;
+
+ const MachineInstr *Def = MRI->getUniqueVRegDef(Reg);
+
+ if (Def && Def->isCopy() && Def->getOperand(1).getReg() == PhysReg)
+ return NSA_Status::FIXED;
+
+ for (auto U : MRI->use_nodbg_operands(Reg)) {
+ if (U.isImplicit())
+ return NSA_Status::FIXED;
+ const MachineInstr *UseInst = U.getParent();
+ if (UseInst->isCopy() && UseInst->getOperand(0).getReg() == PhysReg)
+ return NSA_Status::FIXED;
+ }
+
+ if (!LIS->hasInterval(Reg))
+ return NSA_Status::FIXED;
+ }
+
+ if (I == 0)
+ VgprBase = PhysReg;
+ else if (VgprBase + I != PhysReg)
+ NSA = true;
+ }
+
+ return NSA ? NSA_Status::NON_CONTIGUOUS : NSA_Status::CONTIGUOUS;
+}
+
+bool GCNNSAReassign::runOnMachineFunction(MachineFunction &MF) {
+ ST = &MF.getSubtarget<GCNSubtarget>();
+ if (ST->getGeneration() < GCNSubtarget::GFX10)
+ return false;
+
+ MRI = &MF.getRegInfo();
+ TRI = ST->getRegisterInfo();
+ VRM = &getAnalysis<VirtRegMap>();
+ LRM = &getAnalysis<LiveRegMatrix>();
+ LIS = &getAnalysis<LiveIntervals>();
+
+ const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ MaxNumVGPRs = ST->getMaxNumVGPRs(MF);
+ MaxNumVGPRs = std::min(ST->getMaxNumVGPRs(MFI->getOccupancy()), MaxNumVGPRs);
+ CSRegs = MRI->getCalleeSavedRegs();
+
+ using Candidate = std::pair<const MachineInstr*, bool>;
+ SmallVector<Candidate, 32> Candidates;
+ for (const MachineBasicBlock &MBB : MF) {
+ for (const MachineInstr &MI : MBB) {
+ switch (CheckNSA(MI)) {
+ default:
+ continue;
+ case NSA_Status::CONTIGUOUS:
+ Candidates.push_back(std::make_pair(&MI, true));
+ break;
+ case NSA_Status::NON_CONTIGUOUS:
+ Candidates.push_back(std::make_pair(&MI, false));
+ ++NumNSAInstructions;
+ break;
+ }
+ }
+ }
+
+ bool Changed = false;
+ for (auto &C : Candidates) {
+ if (C.second)
+ continue;
+
+ const MachineInstr *MI = C.first;
+ if (CheckNSA(*MI, true) == NSA_Status::CONTIGUOUS) {
+ // Already happen to be fixed.
+ C.second = true;
+ ++NumNSAConverted;
+ continue;
+ }
+
+ const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI->getOpcode());
+ int VAddr0Idx =
+ AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::vaddr0);
+
+ SmallVector<LiveInterval *, 16> Intervals;
+ SmallVector<unsigned, 16> OrigRegs;
+ SlotIndex MinInd, MaxInd;
+ for (unsigned I = 0; I < Info->VAddrDwords; ++I) {
+ const MachineOperand &Op = MI->getOperand(VAddr0Idx + I);
+ unsigned Reg = Op.getReg();
+ LiveInterval *LI = &LIS->getInterval(Reg);
+ if (llvm::find(Intervals, LI) != Intervals.end()) {
+ // Same register used, unable to make sequential
+ Intervals.clear();
+ break;
+ }
+ Intervals.push_back(LI);
+ OrigRegs.push_back(VRM->getPhys(Reg));
+ MinInd = I ? std::min(MinInd, LI->beginIndex()) : LI->beginIndex();
+ MaxInd = I ? std::max(MaxInd, LI->endIndex()) : LI->endIndex();
+ }
+
+ if (Intervals.empty())
+ continue;
+
+ LLVM_DEBUG(dbgs() << "Attempting to reassign NSA: " << *MI
+ << "\tOriginal allocation:\t";
+ for(auto *LI : Intervals)
+ dbgs() << " " << llvm::printReg((VRM->getPhys(LI->reg)), TRI);
+ dbgs() << '\n');
+
+ bool Success = scavengeRegs(Intervals);
+ if (!Success) {
+ LLVM_DEBUG(dbgs() << "\tCannot reallocate.\n");
+ if (VRM->hasPhys(Intervals.back()->reg)) // Did not change allocation.
+ continue;
+ } else {
+ // Check we did not make it worse for other instructions.
+ auto I = std::lower_bound(Candidates.begin(), &C, MinInd,
+ [this](const Candidate &C, SlotIndex I) {
+ return LIS->getInstructionIndex(*C.first) < I;
+ });
+ for (auto E = Candidates.end(); Success && I != E &&
+ LIS->getInstructionIndex(*I->first) < MaxInd; ++I) {
+ if (I->second && CheckNSA(*I->first, true) < NSA_Status::CONTIGUOUS) {
+ Success = false;
+ LLVM_DEBUG(dbgs() << "\tNSA conversion conflict with " << *I->first);
+ }
+ }
+ }
+
+ if (!Success) {
+ for (unsigned I = 0; I < Info->VAddrDwords; ++I)
+ if (VRM->hasPhys(Intervals[I]->reg))
+ LRM->unassign(*Intervals[I]);
+
+ for (unsigned I = 0; I < Info->VAddrDwords; ++I)
+ LRM->assign(*Intervals[I], OrigRegs[I]);
+
+ continue;
+ }
+
+ C.second = true;
+ ++NumNSAConverted;
+ LLVM_DEBUG(dbgs() << "\tNew allocation:\t\t ["
+ << llvm::printReg((VRM->getPhys(Intervals.front()->reg)), TRI)
+ << " : "
+ << llvm::printReg((VRM->getPhys(Intervals.back()->reg)), TRI)
+ << "]\n");
+ Changed = true;
+ }
+
+ return Changed;
+}
diff --git a/lib/Target/AMDGPU/GCNProcessors.td b/lib/Target/AMDGPU/GCNProcessors.td
index b8142a4e4ff8..b926041afb2f 100644
--- a/lib/Target/AMDGPU/GCNProcessors.td
+++ b/lib/Target/AMDGPU/GCNProcessors.td
@@ -1,163 +1,185 @@
//===-- GCNProcessors.td - GCN Processor definitions ----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// The code produced for "generic" is only useful for tests and cannot
// reasonably be expected to execute on any particular target.
def : ProcessorModel<"generic", NoSchedModel,
- [FeatureGCN, FeatureWavefrontSize64]
+ [FeatureWavefrontSize64]
>;
-//===----------------------------------------------------------------------===//
+def : ProcessorModel<"generic-hsa", NoSchedModel,
+ [FeatureWavefrontSize64, FeatureFlatAddressSpace]
+>;
+
+//===------------------------------------------------------------===//
// GCN GFX6 (Southern Islands (SI)).
-//===----------------------------------------------------------------------===//
+//===------------------------------------------------------------===//
def : ProcessorModel<"gfx600", SIFullSpeedModel,
- [FeatureISAVersion6_0_0]
+ FeatureISAVersion6_0_0.Features
>;
def : ProcessorModel<"tahiti", SIFullSpeedModel,
- [FeatureISAVersion6_0_0]
+ FeatureISAVersion6_0_0.Features
>;
def : ProcessorModel<"gfx601", SIQuarterSpeedModel,
- [FeatureISAVersion6_0_1]
+ FeatureISAVersion6_0_1.Features
>;
def : ProcessorModel<"hainan", SIQuarterSpeedModel,
- [FeatureISAVersion6_0_1]
+ FeatureISAVersion6_0_1.Features
>;
def : ProcessorModel<"oland", SIQuarterSpeedModel,
- [FeatureISAVersion6_0_1]
+ FeatureISAVersion6_0_1.Features
>;
def : ProcessorModel<"pitcairn", SIQuarterSpeedModel,
- [FeatureISAVersion6_0_1]
+ FeatureISAVersion6_0_1.Features
>;
def : ProcessorModel<"verde", SIQuarterSpeedModel,
- [FeatureISAVersion6_0_1]
+ FeatureISAVersion6_0_1.Features
>;
-//===----------------------------------------------------------------------===//
+//===------------------------------------------------------------===//
// GCN GFX7 (Sea Islands (CI)).
-//===----------------------------------------------------------------------===//
+//===------------------------------------------------------------===//
def : ProcessorModel<"gfx700", SIQuarterSpeedModel,
- [FeatureISAVersion7_0_0]
+ FeatureISAVersion7_0_0.Features
>;
def : ProcessorModel<"kaveri", SIQuarterSpeedModel,
- [FeatureISAVersion7_0_0]
+ FeatureISAVersion7_0_0.Features
>;
def : ProcessorModel<"gfx701", SIFullSpeedModel,
- [FeatureISAVersion7_0_1]
+ FeatureISAVersion7_0_1.Features
>;
def : ProcessorModel<"hawaii", SIFullSpeedModel,
- [FeatureISAVersion7_0_1]
+ FeatureISAVersion7_0_1.Features
>;
def : ProcessorModel<"gfx702", SIQuarterSpeedModel,
- [FeatureISAVersion7_0_2]
+ FeatureISAVersion7_0_2.Features
>;
def : ProcessorModel<"gfx703", SIQuarterSpeedModel,
- [FeatureISAVersion7_0_3]
+ FeatureISAVersion7_0_3.Features
>;
def : ProcessorModel<"kabini", SIQuarterSpeedModel,
- [FeatureISAVersion7_0_3]
+ FeatureISAVersion7_0_3.Features
>;
def : ProcessorModel<"mullins", SIQuarterSpeedModel,
- [FeatureISAVersion7_0_3]
+ FeatureISAVersion7_0_3.Features
>;
def : ProcessorModel<"gfx704", SIQuarterSpeedModel,
- [FeatureISAVersion7_0_4]
+ FeatureISAVersion7_0_4.Features
>;
def : ProcessorModel<"bonaire", SIQuarterSpeedModel,
- [FeatureISAVersion7_0_4]
+ FeatureISAVersion7_0_4.Features
>;
-//===----------------------------------------------------------------------===//
+//===------------------------------------------------------------===//
// GCN GFX8 (Volcanic Islands (VI)).
-//===----------------------------------------------------------------------===//
+//===------------------------------------------------------------===//
def : ProcessorModel<"gfx801", SIQuarterSpeedModel,
- [FeatureISAVersion8_0_1]
+ FeatureISAVersion8_0_1.Features
>;
def : ProcessorModel<"carrizo", SIQuarterSpeedModel,
- [FeatureISAVersion8_0_1]
+ FeatureISAVersion8_0_1.Features
>;
def : ProcessorModel<"gfx802", SIQuarterSpeedModel,
- [FeatureISAVersion8_0_2]
+ FeatureISAVersion8_0_2.Features
>;
def : ProcessorModel<"iceland", SIQuarterSpeedModel,
- [FeatureISAVersion8_0_2]
+ FeatureISAVersion8_0_2.Features
>;
def : ProcessorModel<"tonga", SIQuarterSpeedModel,
- [FeatureISAVersion8_0_2]
+ FeatureISAVersion8_0_2.Features
>;
def : ProcessorModel<"gfx803", SIQuarterSpeedModel,
- [FeatureISAVersion8_0_3]
+ FeatureISAVersion8_0_3.Features
>;
def : ProcessorModel<"fiji", SIQuarterSpeedModel,
- [FeatureISAVersion8_0_3]
+ FeatureISAVersion8_0_3.Features
>;
def : ProcessorModel<"polaris10", SIQuarterSpeedModel,
- [FeatureISAVersion8_0_3]
+ FeatureISAVersion8_0_3.Features
>;
def : ProcessorModel<"polaris11", SIQuarterSpeedModel,
- [FeatureISAVersion8_0_3]
+ FeatureISAVersion8_0_3.Features
>;
def : ProcessorModel<"gfx810", SIQuarterSpeedModel,
- [FeatureISAVersion8_1_0]
+ FeatureISAVersion8_1_0.Features
>;
def : ProcessorModel<"stoney", SIQuarterSpeedModel,
- [FeatureISAVersion8_1_0]
+ FeatureISAVersion8_1_0.Features
>;
-//===----------------------------------------------------------------------===//
+//===------------------------------------------------------------===//
// GCN GFX9.
-//===----------------------------------------------------------------------===//
+//===------------------------------------------------------------===//
def : ProcessorModel<"gfx900", SIQuarterSpeedModel,
- [FeatureISAVersion9_0_0]
+ FeatureISAVersion9_0_0.Features
>;
def : ProcessorModel<"gfx902", SIQuarterSpeedModel,
- [FeatureISAVersion9_0_2]
+ FeatureISAVersion9_0_2.Features
>;
def : ProcessorModel<"gfx904", SIQuarterSpeedModel,
- [FeatureISAVersion9_0_4]
+ FeatureISAVersion9_0_4.Features
>;
def : ProcessorModel<"gfx906", SIQuarterSpeedModel,
- [FeatureISAVersion9_0_6]
+ FeatureISAVersion9_0_6.Features
+>;
+
+def : ProcessorModel<"gfx908", SIQuarterSpeedModel,
+ FeatureISAVersion9_0_8.Features
>;
def : ProcessorModel<"gfx909", SIQuarterSpeedModel,
- [FeatureISAVersion9_0_9]
+ FeatureISAVersion9_0_9.Features
+>;
+
+//===----------------------------------------------------------------------===//
+// GCN GFX10.
+//===----------------------------------------------------------------------===//
+
+def : ProcessorModel<"gfx1010", GFX10SpeedModel,
+ FeatureISAVersion10_1_0.Features
>;
+def : ProcessorModel<"gfx1011", GFX10SpeedModel,
+ FeatureISAVersion10_1_1.Features
+>;
+
+def : ProcessorModel<"gfx1012", GFX10SpeedModel,
+ FeatureISAVersion10_1_2.Features
+>;
diff --git a/lib/Target/AMDGPU/GCNRegBankReassign.cpp b/lib/Target/AMDGPU/GCNRegBankReassign.cpp
new file mode 100644
index 000000000000..f0d47eaa4ed1
--- /dev/null
+++ b/lib/Target/AMDGPU/GCNRegBankReassign.cpp
@@ -0,0 +1,800 @@
+//===-- GCNRegBankReassign.cpp - Reassign registers after regalloc --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Try to reassign registers on GFX10+ to reduce register bank
+/// conflicts.
+///
+/// On GFX10 registers are organized in banks. VGPRs have 4 banks assigned in
+/// a round-robin fashion: v0, v4, v8... belong to bank 0. v1, v5, v9... to
+/// bank 1, etc. SGPRs have 8 banks and allocated in pairs, so that s0:s1,
+/// s16:s17, s32:s33 are at bank 0. s2:s3, s18:s19, s34:s35 are at bank 1 etc.
+///
+/// The shader can read one dword from each of these banks once per cycle.
+/// If an instruction has to read more register operands from the same bank
+/// an additional cycle is needed. HW attempts to pre-load registers through
+/// input operand gathering, but a stall cycle may occur if that fails. For
+/// example V_FMA_F32 V111 = V0 + V4 * V8 will need 3 cycles to read operands,
+/// potentially incuring 2 stall cycles.
+///
+/// The pass tries to reassign registers to reduce bank conflicts.
+///
+/// In this pass bank numbers 0-3 are VGPR banks and 4-11 are SGPR banks, so
+/// that 4 has to be subtracted from an SGPR bank number to get the real value.
+/// This also corresponds to bit numbers in bank masks used in the pass.
+///
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "SIInstrInfo.h"
+#include "SIMachineFunctionInfo.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/LiveRegMatrix.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/Support/MathExtras.h"
+
+using namespace llvm;
+
+static cl::opt<unsigned> VerifyStallCycles("amdgpu-verify-regbanks-reassign",
+ cl::desc("Verify stall cycles in the regbanks reassign pass"),
+ cl::value_desc("0|1|2"),
+ cl::init(0), cl::Hidden);
+
+#define DEBUG_TYPE "amdgpu-regbanks-reassign"
+
+#define NUM_VGPR_BANKS 4
+#define NUM_SGPR_BANKS 8
+#define NUM_BANKS (NUM_VGPR_BANKS + NUM_SGPR_BANKS)
+#define SGPR_BANK_OFFSET NUM_VGPR_BANKS
+#define VGPR_BANK_MASK 0xf
+#define SGPR_BANK_MASK 0xff0
+#define SGPR_BANK_SHIFTED_MASK (SGPR_BANK_MASK >> SGPR_BANK_OFFSET)
+
+STATISTIC(NumStallsDetected,
+ "Number of operand read stalls detected");
+STATISTIC(NumStallsRecovered,
+ "Number of operand read stalls recovered");
+
+namespace {
+
+class GCNRegBankReassign : public MachineFunctionPass {
+
+ class OperandMask {
+ public:
+ OperandMask(unsigned r, unsigned s, unsigned m)
+ : Reg(r), SubReg(s), Mask(m) {}
+ unsigned Reg;
+ unsigned SubReg;
+ unsigned Mask;
+ };
+
+ class Candidate {
+ public:
+ Candidate(MachineInstr *mi, unsigned reg, unsigned freebanks,
+ unsigned weight)
+ : MI(mi), Reg(reg), FreeBanks(freebanks), Weight(weight) {}
+
+ bool operator< (const Candidate& RHS) const { return Weight < RHS.Weight; }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ void dump(const GCNRegBankReassign *P) const {
+ MI->dump();
+ dbgs() << P->printReg(Reg) << " to banks ";
+ dumpFreeBanks(FreeBanks);
+ dbgs() << " weight " << Weight << '\n';
+ }
+#endif
+
+ MachineInstr *MI;
+ unsigned Reg;
+ unsigned FreeBanks;
+ unsigned Weight;
+ };
+
+ class CandidateList : public std::list<Candidate> {
+ public:
+ // Speedup subsequent sort.
+ void push(const Candidate&& C) {
+ if (C.Weight) push_back(C);
+ else push_front(C);
+ }
+ };
+
+public:
+ static char ID;
+
+public:
+ GCNRegBankReassign() : MachineFunctionPass(ID) {
+ initializeGCNRegBankReassignPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ StringRef getPassName() const override { return "GCN RegBank Reassign"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineLoopInfo>();
+ AU.addRequired<LiveIntervals>();
+ AU.addRequired<VirtRegMap>();
+ AU.addRequired<LiveRegMatrix>();
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+private:
+ const GCNSubtarget *ST;
+
+ const MachineRegisterInfo *MRI;
+
+ const SIRegisterInfo *TRI;
+
+ MachineLoopInfo *MLI;
+
+ VirtRegMap *VRM;
+
+ LiveRegMatrix *LRM;
+
+ LiveIntervals *LIS;
+
+ unsigned MaxNumVGPRs;
+
+ unsigned MaxNumSGPRs;
+
+ BitVector RegsUsed;
+
+ SmallVector<OperandMask, 8> OperandMasks;
+
+ CandidateList Candidates;
+
+ const MCPhysReg *CSRegs;
+
+ // Returns bank for a phys reg.
+ unsigned getPhysRegBank(unsigned Reg) const;
+
+ // Return a bit set for each register bank used. 4 banks for VGPRs and
+ // 8 banks for SGPRs.
+ // Registers already processed and recorded in RegsUsed are excluded.
+ // If Bank is not -1 assume Reg:SubReg to belong to that Bank.
+ unsigned getRegBankMask(unsigned Reg, unsigned SubReg, int Bank);
+
+ // Return number of stalls in the instructions.
+ // UsedBanks has bits set for the banks used by all operands.
+ // If Reg and Bank provided substitute the Reg with the Bank.
+ unsigned analyzeInst(const MachineInstr& MI, unsigned& UsedBanks,
+ unsigned Reg = AMDGPU::NoRegister, int Bank = -1);
+
+ // Return true if register is regular VGPR or SGPR or their tuples.
+ // Returns false for special registers like m0, vcc etc.
+ bool isReassignable(unsigned Reg) const;
+
+ // Check if registers' defs are old and may be pre-loaded.
+ // Returns 0 if both registers are old enough, 1 or 2 if one or both
+ // registers will not likely be pre-loaded.
+ unsigned getOperandGatherWeight(const MachineInstr& MI,
+ unsigned Reg1,
+ unsigned Reg2,
+ unsigned StallCycles) const;
+
+
+ // Find all bank bits in UsedBanks where Mask can be relocated to.
+ unsigned getFreeBanks(unsigned Mask, unsigned UsedBanks) const;
+
+ // Find all bank bits in UsedBanks where Mask can be relocated to.
+ // Bank is relative to the register and not its subregister component.
+ // Returns 0 is a register is not reassignable.
+ unsigned getFreeBanks(unsigned Reg, unsigned SubReg, unsigned Mask,
+ unsigned UsedBanks) const;
+
+ // Add cadidate instruction to the work list.
+ void collectCandidates(MachineInstr& MI, unsigned UsedBanks,
+ unsigned StallCycles);
+
+ // Collect cadidate instructions across function. Returns a number stall
+ // cycles detected. Only counts stalls if Collect is false.
+ unsigned collectCandidates(MachineFunction &MF, bool Collect = true);
+
+ // Remove all candidates that read specified register.
+ void removeCandidates(unsigned Reg);
+
+ // Compute stalls within the uses of SrcReg replaced by a register from
+ // Bank. If Bank is -1 does not perform substitution. If Collect is set
+ // candidates are collected and added to work list.
+ unsigned computeStallCycles(unsigned SrcReg,
+ unsigned Reg = AMDGPU::NoRegister,
+ int Bank = -1, bool Collect = false);
+
+ // Search for a register in Bank unused within LI.
+ // Returns phys reg or NoRegister.
+ unsigned scavengeReg(LiveInterval& LI, unsigned Bank) const;
+
+ // Try to reassign candidate. Returns number or stall cycles saved.
+ unsigned tryReassign(Candidate &C);
+
+ bool verifyCycles(MachineFunction &MF,
+ unsigned OriginalCycles, unsigned CyclesSaved);
+
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+public:
+ Printable printReg(unsigned Reg, unsigned SubReg = 0) const {
+ return Printable([Reg, SubReg, this](raw_ostream &OS) {
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ OS << llvm::printReg(Reg, TRI);
+ return;
+ }
+ if (!VRM->isAssignedReg(Reg))
+ OS << "<unassigned> " << llvm::printReg(Reg, TRI);
+ else
+ OS << llvm::printReg(Reg, TRI) << '('
+ << llvm::printReg(VRM->getPhys(Reg), TRI) << ')';
+ if (SubReg)
+ OS << ':' << TRI->getSubRegIndexName(SubReg);
+ });
+ }
+
+ static Printable printBank(unsigned Bank) {
+ return Printable([Bank](raw_ostream &OS) {
+ OS << ((Bank >= SGPR_BANK_OFFSET) ? Bank - SGPR_BANK_OFFSET : Bank);
+ });
+ }
+
+ static void dumpFreeBanks(unsigned FreeBanks) {
+ for (unsigned L = 0; L < NUM_BANKS; ++L)
+ if (FreeBanks & (1 << L))
+ dbgs() << printBank(L) << ' ';
+ }
+#endif
+};
+
+} // End anonymous namespace.
+
+INITIALIZE_PASS_BEGIN(GCNRegBankReassign, DEBUG_TYPE, "GCN RegBank Reassign",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
+INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix)
+INITIALIZE_PASS_END(GCNRegBankReassign, DEBUG_TYPE, "GCN RegBank Reassign",
+ false, false)
+
+
+char GCNRegBankReassign::ID = 0;
+
+char &llvm::GCNRegBankReassignID = GCNRegBankReassign::ID;
+
+unsigned GCNRegBankReassign::getPhysRegBank(unsigned Reg) const {
+ assert (TargetRegisterInfo::isPhysicalRegister(Reg));
+
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ unsigned Size = TRI->getRegSizeInBits(*RC);
+ if (Size > 32)
+ Reg = TRI->getSubReg(Reg, AMDGPU::sub0);
+
+ if (TRI->hasVGPRs(RC)) {
+ Reg -= AMDGPU::VGPR0;
+ return Reg % NUM_VGPR_BANKS;
+ }
+
+ Reg = TRI->getEncodingValue(Reg) / 2;
+ return Reg % NUM_SGPR_BANKS + SGPR_BANK_OFFSET;
+}
+
+unsigned GCNRegBankReassign::getRegBankMask(unsigned Reg, unsigned SubReg,
+ int Bank) {
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (!VRM->isAssignedReg(Reg))
+ return 0;
+
+ Reg = VRM->getPhys(Reg);
+ if (!Reg)
+ return 0;
+ if (SubReg)
+ Reg = TRI->getSubReg(Reg, SubReg);
+ }
+
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ unsigned Size = TRI->getRegSizeInBits(*RC) / 32;
+ if (Size > 1)
+ Reg = TRI->getSubReg(Reg, AMDGPU::sub0);
+
+ if (TRI->hasVGPRs(RC)) {
+ // VGPRs have 4 banks assigned in a round-robin fashion.
+ Reg -= AMDGPU::VGPR0;
+ unsigned Mask = (1 << Size) - 1;
+ unsigned Used = 0;
+ // Bitmask lacks an extract method
+ for (unsigned I = 0; I < Size; ++I)
+ if (RegsUsed.test(Reg + I))
+ Used |= 1 << I;
+ RegsUsed.set(Reg, Reg + Size);
+ Mask &= ~Used;
+ Mask <<= (Bank == -1) ? Reg % NUM_VGPR_BANKS : unsigned(Bank);
+ return (Mask | (Mask >> NUM_VGPR_BANKS)) & VGPR_BANK_MASK;
+ }
+
+ // SGPRs have 8 banks holding 2 consequitive registers each.
+ Reg = TRI->getEncodingValue(Reg) / 2;
+ unsigned StartBit = AMDGPU::VGPR_32RegClass.getNumRegs();
+ if (Reg + StartBit >= RegsUsed.size())
+ return 0;
+
+ if (Size > 1)
+ Size /= 2;
+ unsigned Mask = (1 << Size) - 1;
+ unsigned Used = 0;
+ for (unsigned I = 0; I < Size; ++I)
+ if (RegsUsed.test(StartBit + Reg + I))
+ Used |= 1 << I;
+ RegsUsed.set(StartBit + Reg, StartBit + Reg + Size);
+ Mask &= ~Used;
+ Mask <<= (Bank == -1) ? Reg % NUM_SGPR_BANKS
+ : unsigned(Bank - SGPR_BANK_OFFSET);
+ Mask = (Mask | (Mask >> NUM_SGPR_BANKS)) & SGPR_BANK_SHIFTED_MASK;
+ // Reserve 4 bank ids for VGPRs.
+ return Mask << SGPR_BANK_OFFSET;
+}
+
+unsigned GCNRegBankReassign::analyzeInst(const MachineInstr& MI,
+ unsigned& UsedBanks,
+ unsigned Reg,
+ int Bank) {
+ unsigned StallCycles = 0;
+ UsedBanks = 0;
+
+ if (MI.isDebugValue())
+ return 0;
+
+ RegsUsed.reset();
+ OperandMasks.clear();
+ for (const auto& Op : MI.explicit_uses()) {
+ // Undef can be assigned to any register, so two vregs can be assigned
+ // the same phys reg within the same instruction.
+ if (!Op.isReg() || Op.isUndef())
+ continue;
+
+ unsigned R = Op.getReg();
+ if (TRI->hasAGPRs(TRI->getRegClassForReg(*MRI, R)))
+ continue;
+
+ unsigned ShiftedBank = Bank;
+
+ if (Bank != -1 && R == Reg && Op.getSubReg()) {
+ unsigned LM = TRI->getSubRegIndexLaneMask(Op.getSubReg()).getAsInteger();
+ if (!(LM & 1) && (Bank < NUM_VGPR_BANKS)) {
+ // If a register spans all banks we cannot shift it to avoid conflict.
+ if (countPopulation(LM) >= NUM_VGPR_BANKS)
+ continue;
+ ShiftedBank = (Bank + countTrailingZeros(LM)) % NUM_VGPR_BANKS;
+ } else if (!(LM & 3) && (Bank >= SGPR_BANK_OFFSET)) {
+ // If a register spans all banks we cannot shift it to avoid conflict.
+ if (countPopulation(LM) / 2 >= NUM_SGPR_BANKS)
+ continue;
+ ShiftedBank = SGPR_BANK_OFFSET + (Bank - SGPR_BANK_OFFSET +
+ (countTrailingZeros(LM) >> 1)) %
+ NUM_SGPR_BANKS;
+ }
+ }
+
+ unsigned Mask = getRegBankMask(R, Op.getSubReg(),
+ (Reg == R) ? ShiftedBank : -1);
+ StallCycles += countPopulation(UsedBanks & Mask);
+ UsedBanks |= Mask;
+ OperandMasks.push_back(OperandMask(Op.getReg(), Op.getSubReg(), Mask));
+ }
+
+ return StallCycles;
+}
+
+unsigned GCNRegBankReassign::getOperandGatherWeight(const MachineInstr& MI,
+ unsigned Reg1,
+ unsigned Reg2,
+ unsigned StallCycles) const
+{
+ unsigned Defs = 0;
+ MachineBasicBlock::const_instr_iterator Def(MI.getIterator());
+ MachineBasicBlock::const_instr_iterator B(MI.getParent()->instr_begin());
+ for (unsigned S = StallCycles; S && Def != B && Defs != 3; --S) {
+ if (MI.isDebugInstr())
+ continue;
+ --Def;
+ if (Def->getOpcode() == TargetOpcode::IMPLICIT_DEF)
+ continue;
+ if (Def->modifiesRegister(Reg1, TRI))
+ Defs |= 1;
+ if (Def->modifiesRegister(Reg2, TRI))
+ Defs |= 2;
+ }
+ return countPopulation(Defs);
+}
+
+bool GCNRegBankReassign::isReassignable(unsigned Reg) const {
+ if (TargetRegisterInfo::isPhysicalRegister(Reg) || !VRM->isAssignedReg(Reg))
+ return false;
+
+ const MachineInstr *Def = MRI->getUniqueVRegDef(Reg);
+
+ unsigned PhysReg = VRM->getPhys(Reg);
+
+ if (Def && Def->isCopy() && Def->getOperand(1).getReg() == PhysReg)
+ return false;
+
+ for (auto U : MRI->use_nodbg_operands(Reg)) {
+ if (U.isImplicit())
+ return false;
+ const MachineInstr *UseInst = U.getParent();
+ if (UseInst->isCopy() && UseInst->getOperand(0).getReg() == PhysReg)
+ return false;
+ }
+
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(PhysReg);
+ if (TRI->hasVGPRs(RC))
+ return true;
+
+ unsigned Size = TRI->getRegSizeInBits(*RC);
+ if (Size > 32)
+ PhysReg = TRI->getSubReg(PhysReg, AMDGPU::sub0);
+
+ return AMDGPU::SGPR_32RegClass.contains(PhysReg);
+}
+
+unsigned GCNRegBankReassign::getFreeBanks(unsigned Mask,
+ unsigned UsedBanks) const {
+ unsigned Size = countPopulation(Mask);
+ unsigned FreeBanks = 0;
+ unsigned Bank = findFirstSet(Mask);
+
+ UsedBanks &= ~Mask;
+
+ // Find free VGPR banks
+ if ((Mask & VGPR_BANK_MASK) && (Size < NUM_VGPR_BANKS)) {
+ for (unsigned I = 0; I < NUM_VGPR_BANKS; ++I) {
+ if (Bank == I)
+ continue;
+ unsigned NewMask = ((1 << Size) - 1) << I;
+ NewMask = (NewMask | (NewMask >> NUM_VGPR_BANKS)) & VGPR_BANK_MASK;
+ if (!(UsedBanks & NewMask))
+ FreeBanks |= 1 << I;
+ }
+ return FreeBanks;
+ }
+
+ // Find free SGPR banks
+ // SGPR tuples must be aligned, so step is size in banks it
+ // crosses.
+ Bank -= SGPR_BANK_OFFSET;
+ for (unsigned I = 0; I < NUM_SGPR_BANKS; I += Size) {
+ if (Bank == I)
+ continue;
+ unsigned NewMask = ((1 << Size) - 1) << I;
+ NewMask = (NewMask | (NewMask >> NUM_SGPR_BANKS)) & SGPR_BANK_SHIFTED_MASK;
+ if (!(UsedBanks & (NewMask << SGPR_BANK_OFFSET)))
+ FreeBanks |= (1 << SGPR_BANK_OFFSET) << I;
+ }
+
+ return FreeBanks;
+}
+
+unsigned GCNRegBankReassign::getFreeBanks(unsigned Reg,
+ unsigned SubReg,
+ unsigned Mask,
+ unsigned UsedBanks) const {
+ if (!isReassignable(Reg))
+ return 0;
+
+ unsigned FreeBanks = getFreeBanks(Mask, UsedBanks);
+
+ unsigned LM = TRI->getSubRegIndexLaneMask(SubReg).getAsInteger();
+ if (!(LM & 1) && (Mask & VGPR_BANK_MASK)) {
+ unsigned Shift = countTrailingZeros(LM);
+ if (Shift >= NUM_VGPR_BANKS)
+ return 0;
+ unsigned VB = FreeBanks & VGPR_BANK_MASK;
+ FreeBanks = ((VB >> Shift) | (VB << (NUM_VGPR_BANKS - Shift))) &
+ VGPR_BANK_MASK;
+ } else if (!(LM & 3) && (Mask & SGPR_BANK_MASK)) {
+ unsigned Shift = countTrailingZeros(LM) >> 1;
+ if (Shift >= NUM_SGPR_BANKS)
+ return 0;
+ unsigned SB = FreeBanks >> SGPR_BANK_OFFSET;
+ FreeBanks = ((SB >> Shift) | (SB << (NUM_SGPR_BANKS - Shift))) &
+ SGPR_BANK_SHIFTED_MASK;
+ FreeBanks <<= SGPR_BANK_OFFSET;
+ }
+
+ LLVM_DEBUG(if (FreeBanks) {
+ dbgs() << "Potential reassignments of " << printReg(Reg, SubReg)
+ << " to banks: "; dumpFreeBanks(FreeBanks);
+ dbgs() << '\n'; });
+
+ return FreeBanks;
+}
+
+void GCNRegBankReassign::collectCandidates(MachineInstr& MI,
+ unsigned UsedBanks,
+ unsigned StallCycles) {
+ LLVM_DEBUG(MI.dump());
+
+ if (!StallCycles)
+ return;
+
+ LLVM_DEBUG(dbgs() << "Stall cycles = " << StallCycles << '\n');
+
+ for (unsigned I = 0, E = OperandMasks.size(); I + 1 < E; ++I) {
+ for (unsigned J = I + 1; J != E; ++J) {
+ if (!(OperandMasks[I].Mask & OperandMasks[J].Mask))
+ continue;
+
+ unsigned Reg1 = OperandMasks[I].Reg;
+ unsigned Reg2 = OperandMasks[J].Reg;
+ unsigned SubReg1 = OperandMasks[I].SubReg;
+ unsigned SubReg2 = OperandMasks[J].SubReg;
+ unsigned Mask1 = OperandMasks[I].Mask;
+ unsigned Mask2 = OperandMasks[J].Mask;
+ unsigned Size1 = countPopulation(Mask1);
+ unsigned Size2 = countPopulation(Mask2);
+
+ LLVM_DEBUG(dbgs() << "Conflicting operands: " << printReg(Reg1, SubReg1) <<
+ " and " << printReg(Reg2, SubReg2) << '\n');
+
+ unsigned Weight = getOperandGatherWeight(MI, Reg1, Reg2, StallCycles);
+ Weight += MLI->getLoopDepth(MI.getParent()) * 10;
+
+ LLVM_DEBUG(dbgs() << "Stall weight = " << Weight << '\n');
+
+ unsigned FreeBanks1 = getFreeBanks(Reg1, SubReg1, Mask1, UsedBanks);
+ unsigned FreeBanks2 = getFreeBanks(Reg2, SubReg2, Mask2, UsedBanks);
+ if (FreeBanks1)
+ Candidates.push(Candidate(&MI, Reg1, FreeBanks1, Weight
+ + ((Size2 > Size1) ? 1 : 0)));
+ if (FreeBanks2)
+ Candidates.push(Candidate(&MI, Reg2, FreeBanks2, Weight
+ + ((Size1 > Size2) ? 1 : 0)));
+ }
+ }
+}
+
+unsigned GCNRegBankReassign::computeStallCycles(unsigned SrcReg,
+ unsigned Reg, int Bank,
+ bool Collect) {
+ unsigned TotalStallCycles = 0;
+ unsigned UsedBanks = 0;
+ SmallSet<const MachineInstr *, 16> Visited;
+
+ for (auto &MI : MRI->use_nodbg_instructions(SrcReg)) {
+ if (MI.isBundle())
+ continue;
+ if (!Visited.insert(&MI).second)
+ continue;
+ unsigned StallCycles = analyzeInst(MI, UsedBanks, Reg, Bank);
+ TotalStallCycles += StallCycles;
+ if (Collect)
+ collectCandidates(MI, UsedBanks, StallCycles);
+ }
+
+ return TotalStallCycles;
+}
+
+unsigned GCNRegBankReassign::scavengeReg(LiveInterval& LI,
+ unsigned Bank) const {
+ const TargetRegisterClass *RC = MRI->getRegClass(LI.reg);
+ unsigned MaxNumRegs = (Bank < NUM_VGPR_BANKS) ? MaxNumVGPRs
+ : MaxNumSGPRs;
+ unsigned MaxReg = MaxNumRegs + (Bank < NUM_VGPR_BANKS ? AMDGPU::VGPR0
+ : AMDGPU::SGPR0);
+
+ for (unsigned Reg : RC->getRegisters()) {
+ // Check occupancy limit.
+ if (TRI->isSubRegisterEq(Reg, MaxReg))
+ break;
+
+ if (!MRI->isAllocatable(Reg) || getPhysRegBank(Reg) != Bank)
+ continue;
+
+ for (unsigned I = 0; CSRegs[I]; ++I)
+ if (TRI->isSubRegisterEq(Reg, CSRegs[I]) &&
+ !LRM->isPhysRegUsed(CSRegs[I]))
+ return AMDGPU::NoRegister;
+
+ LLVM_DEBUG(dbgs() << "Trying register " << printReg(Reg) << '\n');
+
+ if (!LRM->checkInterference(LI, Reg))
+ return Reg;
+ }
+
+ return AMDGPU::NoRegister;
+}
+
+unsigned GCNRegBankReassign::tryReassign(Candidate &C) {
+ if (!LIS->hasInterval(C.Reg))
+ return 0;
+
+ LiveInterval &LI = LIS->getInterval(C.Reg);
+ LLVM_DEBUG(dbgs() << "Try reassign " << printReg(C.Reg) << " in "; C.MI->dump();
+ LI.dump());
+
+ // For each candidate bank walk all instructions in the range of live
+ // interval and check if replacing the register with one belonging to
+ // the candidate bank reduces conflicts.
+
+ unsigned OrigStalls = computeStallCycles(C.Reg);
+ LLVM_DEBUG(dbgs() << "--- Stall cycles in range = " << OrigStalls << '\n');
+ if (!OrigStalls)
+ return 0;
+
+ struct BankStall {
+ BankStall(unsigned b, unsigned s) : Bank(b), Stalls(s) {};
+ bool operator< (const BankStall &RHS) const { return Stalls > RHS.Stalls; }
+ unsigned Bank;
+ unsigned Stalls;
+ };
+ SmallVector<BankStall, 8> BankStalls;
+
+ for (int Bank = 0; Bank < NUM_BANKS; ++Bank) {
+ if (C.FreeBanks & (1 << Bank)) {
+ LLVM_DEBUG(dbgs() << "Trying bank " << printBank(Bank) << '\n');
+ unsigned Stalls = computeStallCycles(C.Reg, C.Reg, Bank);
+ if (Stalls < OrigStalls) {
+ LLVM_DEBUG(dbgs() << "With bank " << printBank(Bank) << " -> "
+ << Stalls << '\n');
+ BankStalls.push_back(BankStall((unsigned)Bank, Stalls));
+ }
+ }
+ }
+ std::sort(BankStalls.begin(), BankStalls.end());
+
+ unsigned OrigReg = VRM->getPhys(C.Reg);
+ LRM->unassign(LI);
+ while (!BankStalls.empty()) {
+ BankStall BS = BankStalls.pop_back_val();
+ unsigned Reg = scavengeReg(LI, BS.Bank);
+ if (Reg == AMDGPU::NoRegister) {
+ LLVM_DEBUG(dbgs() << "No free registers in bank " << printBank(BS.Bank)
+ << '\n');
+ continue;
+ }
+ LLVM_DEBUG(dbgs() << "Found free register " << printReg(Reg)
+ << (LRM->isPhysRegUsed(Reg) ? "" : " (new)")
+ << " in bank " << printBank(BS.Bank) << '\n');
+
+ LRM->assign(LI, Reg);
+
+ LLVM_DEBUG(dbgs() << "--- Cycles saved: " << OrigStalls - BS.Stalls << '\n');
+
+ return OrigStalls - BS.Stalls;
+ }
+ LRM->assign(LI, OrigReg);
+
+ return 0;
+}
+
+unsigned GCNRegBankReassign::collectCandidates(MachineFunction &MF,
+ bool Collect) {
+ unsigned TotalStallCycles = 0;
+
+ for (MachineBasicBlock &MBB : MF) {
+
+ LLVM_DEBUG(if (Collect) {
+ if (MBB.getName().empty()) dbgs() << "bb." << MBB.getNumber();
+ else dbgs() << MBB.getName(); dbgs() << ":\n";
+ });
+
+ for (MachineInstr &MI : MBB.instrs()) {
+ if (MI.isBundle())
+ continue; // we analyze the instructions inside the bundle individually
+
+ unsigned UsedBanks = 0;
+ unsigned StallCycles = analyzeInst(MI, UsedBanks);
+
+ if (Collect)
+ collectCandidates(MI, UsedBanks, StallCycles);
+
+ TotalStallCycles += StallCycles;
+ }
+
+ LLVM_DEBUG(if (Collect) { dbgs() << '\n'; });
+ }
+
+ return TotalStallCycles;
+}
+
+void GCNRegBankReassign::removeCandidates(unsigned Reg) {
+ Candidates.remove_if([Reg, this](const Candidate& C) {
+ return C.MI->readsRegister(Reg, TRI);
+ });
+}
+
+bool GCNRegBankReassign::verifyCycles(MachineFunction &MF,
+ unsigned OriginalCycles,
+ unsigned CyclesSaved) {
+ unsigned StallCycles = collectCandidates(MF, false);
+ LLVM_DEBUG(dbgs() << "=== After the pass " << StallCycles
+ << " stall cycles left\n");
+ return StallCycles + CyclesSaved == OriginalCycles;
+}
+
+bool GCNRegBankReassign::runOnMachineFunction(MachineFunction &MF) {
+ ST = &MF.getSubtarget<GCNSubtarget>();
+ if (!ST->hasRegisterBanking() || skipFunction(MF.getFunction()))
+ return false;
+
+ MRI = &MF.getRegInfo();
+ TRI = ST->getRegisterInfo();
+ MLI = &getAnalysis<MachineLoopInfo>();
+ VRM = &getAnalysis<VirtRegMap>();
+ LRM = &getAnalysis<LiveRegMatrix>();
+ LIS = &getAnalysis<LiveIntervals>();
+
+ const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ unsigned Occupancy = MFI->getOccupancy();
+ MaxNumVGPRs = ST->getMaxNumVGPRs(MF);
+ MaxNumSGPRs = ST->getMaxNumSGPRs(MF);
+ MaxNumVGPRs = std::min(ST->getMaxNumVGPRs(Occupancy), MaxNumVGPRs);
+ MaxNumSGPRs = std::min(ST->getMaxNumSGPRs(Occupancy, true), MaxNumSGPRs);
+
+ CSRegs = MRI->getCalleeSavedRegs();
+
+ RegsUsed.resize(AMDGPU::VGPR_32RegClass.getNumRegs() +
+ TRI->getEncodingValue(AMDGPU::SGPR_NULL) / 2 + 1);
+
+ LLVM_DEBUG(dbgs() << "=== RegBanks reassign analysis on function " << MF.getName()
+ << '\n');
+
+ unsigned StallCycles = collectCandidates(MF);
+ NumStallsDetected += StallCycles;
+
+ LLVM_DEBUG(dbgs() << "=== " << StallCycles << " stall cycles detected in "
+ "function " << MF.getName() << '\n');
+
+ Candidates.sort();
+
+ LLVM_DEBUG(dbgs() << "\nCandidates:\n\n";
+ for (auto C : Candidates) C.dump(this);
+ dbgs() << "\n\n");
+
+ unsigned CyclesSaved = 0;
+ while (!Candidates.empty()) {
+ Candidate C = Candidates.back();
+ unsigned LocalCyclesSaved = tryReassign(C);
+ CyclesSaved += LocalCyclesSaved;
+
+ if (VerifyStallCycles > 1 && !verifyCycles(MF, StallCycles, CyclesSaved))
+ report_fatal_error("RegBank reassign stall cycles verification failed.");
+
+ Candidates.pop_back();
+ if (LocalCyclesSaved) {
+ removeCandidates(C.Reg);
+ computeStallCycles(C.Reg, AMDGPU::NoRegister, -1, true);
+ Candidates.sort();
+
+ LLVM_DEBUG(dbgs() << "\nCandidates:\n\n";
+ for (auto C : Candidates)
+ C.dump(this);
+ dbgs() << "\n\n");
+ }
+ }
+ NumStallsRecovered += CyclesSaved;
+
+ LLVM_DEBUG(dbgs() << "=== After the pass " << CyclesSaved
+ << " cycles saved in function " << MF.getName() << '\n');
+
+ Candidates.clear();
+
+ if (VerifyStallCycles == 1 && !verifyCycles(MF, StallCycles, CyclesSaved))
+ report_fatal_error("RegBank reassign stall cycles verification failed.");
+
+ RegsUsed.clear();
+
+ return CyclesSaved > 0;
+}
diff --git a/lib/Target/AMDGPU/GCNRegPressure.cpp b/lib/Target/AMDGPU/GCNRegPressure.cpp
index 3d8cacc4f02c..39460fbd8a84 100644
--- a/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -1,9 +1,8 @@
//===- GCNRegPressure.cpp -------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -64,9 +63,10 @@ void llvm::printLivesAt(SlotIndex SI,
}
if (!Num) dbgs() << " <none>\n";
}
+#endif
-static bool isEqual(const GCNRPTracker::LiveRegSet &S1,
- const GCNRPTracker::LiveRegSet &S2) {
+bool llvm::isEqual(const GCNRPTracker::LiveRegSet &S1,
+ const GCNRPTracker::LiveRegSet &S2) {
if (S1.size() != S2.size())
return false;
@@ -77,7 +77,7 @@ static bool isEqual(const GCNRPTracker::LiveRegSet &S1,
}
return true;
}
-#endif
+
///////////////////////////////////////////////////////////////////////////////
// GCNRegPressure
@@ -89,7 +89,9 @@ unsigned GCNRegPressure::getRegKind(unsigned Reg,
auto STI = static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo());
return STI->isSGPRClass(RC) ?
(STI->getRegSizeInBits(*RC) == 32 ? SGPR32 : SGPR_TUPLE) :
- (STI->getRegSizeInBits(*RC) == 32 ? VGPR32 : VGPR_TUPLE);
+ STI->hasAGPRs(RC) ?
+ (STI->getRegSizeInBits(*RC) == 32 ? AGPR32 : AGPR_TUPLE) :
+ (STI->getRegSizeInBits(*RC) == 32 ? VGPR32 : VGPR_TUPLE);
}
void GCNRegPressure::inc(unsigned Reg,
@@ -110,16 +112,18 @@ void GCNRegPressure::inc(unsigned Reg,
switch (auto Kind = getRegKind(Reg, MRI)) {
case SGPR32:
case VGPR32:
+ case AGPR32:
assert(PrevMask.none() && NewMask == MaxMask);
Value[Kind] += Sign;
break;
case SGPR_TUPLE:
case VGPR_TUPLE:
+ case AGPR_TUPLE:
assert(NewMask < MaxMask || NewMask == MaxMask);
assert(PrevMask < NewMask);
- Value[Kind == SGPR_TUPLE ? SGPR32 : VGPR32] +=
+ Value[Kind == SGPR_TUPLE ? SGPR32 : Kind == AGPR_TUPLE ? AGPR32 : VGPR32] +=
Sign * (~PrevMask & NewMask).getNumLanes();
if (PrevMask.none()) {
diff --git a/lib/Target/AMDGPU/GCNRegPressure.h b/lib/Target/AMDGPU/GCNRegPressure.h
index 357d3b7b2334..e4894418b943 100644
--- a/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/lib/Target/AMDGPU/GCNRegPressure.h
@@ -1,9 +1,8 @@
//===- GCNRegPressure.h -----------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -32,6 +31,8 @@ struct GCNRegPressure {
SGPR_TUPLE,
VGPR32,
VGPR_TUPLE,
+ AGPR32,
+ AGPR_TUPLE,
TOTAL_KINDS
};
@@ -44,9 +45,10 @@ struct GCNRegPressure {
void clear() { std::fill(&Value[0], &Value[TOTAL_KINDS], 0); }
unsigned getSGPRNum() const { return Value[SGPR32]; }
- unsigned getVGPRNum() const { return Value[VGPR32]; }
+ unsigned getVGPRNum() const { return std::max(Value[VGPR32], Value[AGPR32]); }
- unsigned getVGPRTuplesWeight() const { return Value[VGPR_TUPLE]; }
+ unsigned getVGPRTuplesWeight() const { return std::max(Value[VGPR_TUPLE],
+ Value[AGPR_TUPLE]); }
unsigned getSGPRTuplesWeight() const { return Value[SGPR_TUPLE]; }
unsigned getOccupancy(const GCNSubtarget &ST) const {
@@ -191,6 +193,50 @@ GCNRPTracker::LiveRegSet getLiveRegs(SlotIndex SI,
const LiveIntervals &LIS,
const MachineRegisterInfo &MRI);
+/// creates a map MachineInstr -> LiveRegSet
+/// R - range of iterators on instructions
+/// After - upon entry or exit of every instruction
+/// Note: there is no entry in the map for instructions with empty live reg set
+/// Complexity = O(NumVirtRegs * averageLiveRangeSegmentsPerReg * lg(R))
+template <typename Range>
+DenseMap<MachineInstr*, GCNRPTracker::LiveRegSet>
+getLiveRegMap(Range &&R, bool After, LiveIntervals &LIS) {
+ std::vector<SlotIndex> Indexes;
+ Indexes.reserve(std::distance(R.begin(), R.end()));
+ auto &SII = *LIS.getSlotIndexes();
+ for (MachineInstr *I : R) {
+ auto SI = SII.getInstructionIndex(*I);
+ Indexes.push_back(After ? SI.getDeadSlot() : SI.getBaseIndex());
+ }
+ std::sort(Indexes.begin(), Indexes.end());
+
+ auto &MRI = (*R.begin())->getParent()->getParent()->getRegInfo();
+ DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> LiveRegMap;
+ SmallVector<SlotIndex, 32> LiveIdxs, SRLiveIdxs;
+ for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
+ auto Reg = TargetRegisterInfo::index2VirtReg(I);
+ if (!LIS.hasInterval(Reg))
+ continue;
+ auto &LI = LIS.getInterval(Reg);
+ LiveIdxs.clear();
+ if (!LI.findIndexesLiveAt(Indexes, std::back_inserter(LiveIdxs)))
+ continue;
+ if (!LI.hasSubRanges()) {
+ for (auto SI : LiveIdxs)
+ LiveRegMap[SII.getInstructionFromIndex(SI)][Reg] =
+ MRI.getMaxLaneMaskForVReg(Reg);
+ } else
+ for (const auto &S : LI.subranges()) {
+ // constrain search for subranges by indexes live at main range
+ SRLiveIdxs.clear();
+ S.findIndexesLiveAt(LiveIdxs, std::back_inserter(SRLiveIdxs));
+ for (auto SI : SRLiveIdxs)
+ LiveRegMap[SII.getInstructionFromIndex(SI)][Reg] |= S.LaneMask;
+ }
+ }
+ return LiveRegMap;
+}
+
inline GCNRPTracker::LiveRegSet getLiveRegsAfter(const MachineInstr &MI,
const LiveIntervals &LIS) {
return getLiveRegs(LIS.getInstructionIndex(MI).getDeadSlot(), LIS,
@@ -212,6 +258,9 @@ GCNRegPressure getRegPressure(const MachineRegisterInfo &MRI,
return Res;
}
+bool isEqual(const GCNRPTracker::LiveRegSet &S1,
+ const GCNRPTracker::LiveRegSet &S2);
+
void printLivesAt(SlotIndex SI,
const LiveIntervals &LIS,
const MachineRegisterInfo &MRI);
diff --git a/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index f09b7f6cff22..4ea990ae490e 100644
--- a/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -1,9 +1,8 @@
//===-- GCNSchedStrategy.cpp - GCN Scheduler Strategy ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -446,8 +445,12 @@ void GCNScheduleDAGMILive::computeBlockPressure(const MachineBasicBlock *MBB) {
RPTracker.reset(*MBB->begin(), &LiveIn);
MBBLiveIns.erase(LiveInIt);
} else {
- I = Regions[CurRegion].first;
- RPTracker.reset(*I);
+ auto &Rgn = Regions[CurRegion];
+ I = Rgn.first;
+ auto *NonDbgMI = &*skipDebugInstructionsForward(Rgn.first, Rgn.second);
+ auto LRS = BBLiveInMap.lookup(NonDbgMI);
+ assert(isEqual(getLiveRegsBefore(*NonDbgMI, *LIS), LRS));
+ RPTracker.reset(*I, &LRS);
}
for ( ; ; ) {
@@ -478,6 +481,23 @@ void GCNScheduleDAGMILive::computeBlockPressure(const MachineBasicBlock *MBB) {
}
}
+DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet>
+GCNScheduleDAGMILive::getBBLiveInMap() const {
+ assert(!Regions.empty());
+ std::vector<MachineInstr *> BBStarters;
+ BBStarters.reserve(Regions.size());
+ auto I = Regions.rbegin(), E = Regions.rend();
+ auto *BB = I->first->getParent();
+ do {
+ auto *MI = &*skipDebugInstructionsForward(I->first, I->second);
+ BBStarters.push_back(MI);
+ do {
+ ++I;
+ } while (I != E && I->first->getParent() == BB);
+ } while (I != E);
+ return getLiveRegMap(BBStarters, false /*After*/, *LIS);
+}
+
void GCNScheduleDAGMILive::finalizeSchedule() {
GCNMaxOccupancySchedStrategy &S = (GCNMaxOccupancySchedStrategy&)*SchedImpl;
LLVM_DEBUG(dbgs() << "All regions recorded, starting actual scheduling.\n");
@@ -485,6 +505,9 @@ void GCNScheduleDAGMILive::finalizeSchedule() {
LiveIns.resize(Regions.size());
Pressure.resize(Regions.size());
+ if (!Regions.empty())
+ BBLiveInMap = getBBLiveInMap();
+
do {
Stage++;
RegionIdx = 0;
diff --git a/lib/Target/AMDGPU/GCNSchedStrategy.h b/lib/Target/AMDGPU/GCNSchedStrategy.h
index 3ac6af89cb9b..eaf3dee9ba5d 100644
--- a/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -1,9 +1,8 @@
//===-- GCNSchedStrategy.h - GCN Scheduler Strategy -*- C++ -*-------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -27,7 +26,7 @@ class GCNSubtarget;
/// and the GenericScheduler is that GCNSchedStrategy uses different
/// heuristics to determine excess/critical pressure sets. Its goal is to
/// maximize kernel occupancy (i.e. maximum number of waves per simd).
-class GCNMaxOccupancySchedStrategy : public GenericScheduler {
+class GCNMaxOccupancySchedStrategy final : public GenericScheduler {
friend class GCNScheduleDAGMILive;
SUnit *pickNodeBidirectional(bool &IsTopNode);
@@ -60,7 +59,7 @@ public:
void setTargetOccupancy(unsigned Occ) { TargetOccupancy = Occ; }
};
-class GCNScheduleDAGMILive : public ScheduleDAGMILive {
+class GCNScheduleDAGMILive final : public ScheduleDAGMILive {
const GCNSubtarget &ST;
@@ -78,7 +77,7 @@ class GCNScheduleDAGMILive : public ScheduleDAGMILive {
// Current region index.
size_t RegionIdx;
- // Vecor of regions recorder for later rescheduling
+ // Vector of regions recorder for later rescheduling
SmallVector<std::pair<MachineBasicBlock::iterator,
MachineBasicBlock::iterator>, 32> Regions;
@@ -91,6 +90,9 @@ class GCNScheduleDAGMILive : public ScheduleDAGMILive {
// Temporary basic block live-in cache.
DenseMap<const MachineBasicBlock*, GCNRPTracker::LiveRegSet> MBBLiveIns;
+ DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> BBLiveInMap;
+ DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> getBBLiveInMap() const;
+
// Return current region pressure.
GCNRegPressure getRealRegPressure() const;
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
index abc88c02adca..57c0ba26cc3a 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
@@ -1,9 +1,8 @@
//===-- AMDGPUAsmBackend.cpp - AMDGPU Assembler Backend -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
/// \file
//===----------------------------------------------------------------------===//
@@ -19,8 +18,10 @@
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCValue.h"
#include "llvm/Support/TargetRegistry.h"
+#include "Utils/AMDGPUBaseInfo.h"
using namespace llvm;
+using namespace llvm::AMDGPU;
namespace {
@@ -36,17 +37,13 @@ public:
const MCSubtargetInfo *STI) const override;
bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
const MCRelaxableFragment *DF,
- const MCAsmLayout &Layout) const override {
- return false;
- }
+ const MCAsmLayout &Layout) const override;
+
void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
- MCInst &Res) const override {
- llvm_unreachable("Not implemented");
- }
+ MCInst &Res) const override;
+
bool mayNeedRelaxation(const MCInst &Inst,
- const MCSubtargetInfo &STI) const override {
- return false;
- }
+ const MCSubtargetInfo &STI) const override;
unsigned getMinimumNopSize() const override;
bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
@@ -56,6 +53,36 @@ public:
} //End anonymous namespace
+void AMDGPUAsmBackend::relaxInstruction(const MCInst &Inst,
+ const MCSubtargetInfo &STI,
+ MCInst &Res) const {
+ unsigned RelaxedOpcode = AMDGPU::getSOPPWithRelaxation(Inst.getOpcode());
+ Res.setOpcode(RelaxedOpcode);
+ Res.addOperand(Inst.getOperand(0));
+ return;
+}
+
+bool AMDGPUAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
+ uint64_t Value,
+ const MCRelaxableFragment *DF,
+ const MCAsmLayout &Layout) const {
+ // if the branch target has an offset of x3f this needs to be relaxed to
+ // add a s_nop 0 immediately after branch to effectively increment offset
+ // for hardware workaround in gfx1010
+ return (((int64_t(Value)/4)-1) == 0x3f);
+}
+
+bool AMDGPUAsmBackend::mayNeedRelaxation(const MCInst &Inst,
+ const MCSubtargetInfo &STI) const {
+ if (!STI.getFeatureBits()[AMDGPU::FeatureOffset3fBug])
+ return false;
+
+ if (AMDGPU::getSOPPWithRelaxation(Inst.getOpcode()) >= 0)
+ return true;
+
+ return false;
+}
+
static unsigned getFixupKindNumBytes(unsigned Kind) {
switch (Kind) {
case AMDGPU::fixup_si_sopp_br:
@@ -173,11 +200,13 @@ class ELFAMDGPUAsmBackend : public AMDGPUAsmBackend {
bool Is64Bit;
bool HasRelocationAddend;
uint8_t OSABI = ELF::ELFOSABI_NONE;
+ uint8_t ABIVersion = 0;
public:
- ELFAMDGPUAsmBackend(const Target &T, const Triple &TT) :
+ ELFAMDGPUAsmBackend(const Target &T, const Triple &TT, uint8_t ABIVersion) :
AMDGPUAsmBackend(T), Is64Bit(TT.getArch() == Triple::amdgcn),
- HasRelocationAddend(TT.getOS() == Triple::AMDHSA) {
+ HasRelocationAddend(TT.getOS() == Triple::AMDHSA),
+ ABIVersion(ABIVersion) {
switch (TT.getOS()) {
case Triple::AMDHSA:
OSABI = ELF::ELFOSABI_AMDGPU_HSA;
@@ -195,7 +224,8 @@ public:
std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const override {
- return createAMDGPUELFObjectWriter(Is64Bit, OSABI, HasRelocationAddend);
+ return createAMDGPUELFObjectWriter(Is64Bit, OSABI, HasRelocationAddend,
+ ABIVersion);
}
};
@@ -206,5 +236,6 @@ MCAsmBackend *llvm::createAMDGPUAsmBackend(const Target &T,
const MCRegisterInfo &MRI,
const MCTargetOptions &Options) {
// Use 64-bit ELF for amdgcn
- return new ELFAMDGPUAsmBackend(T, STI.getTargetTriple());
+ return new ELFAMDGPUAsmBackend(T, STI.getTargetTriple(),
+ IsaInfo::hasCodeObjectV3(&STI) ? 1 : 0);
}
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
index c85a1ea5b054..6549a8d7d592 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
@@ -1,9 +1,8 @@
//===- AMDGPUELFObjectWriter.cpp - AMDGPU ELF Writer ----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -23,7 +22,8 @@ namespace {
class AMDGPUELFObjectWriter : public MCELFObjectTargetWriter {
public:
- AMDGPUELFObjectWriter(bool Is64Bit, uint8_t OSABI, bool HasRelocationAddend);
+ AMDGPUELFObjectWriter(bool Is64Bit, uint8_t OSABI, bool HasRelocationAddend,
+ uint8_t ABIVersion);
protected:
unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
@@ -35,9 +35,10 @@ protected:
AMDGPUELFObjectWriter::AMDGPUELFObjectWriter(bool Is64Bit,
uint8_t OSABI,
- bool HasRelocationAddend)
+ bool HasRelocationAddend,
+ uint8_t ABIVersion)
: MCELFObjectTargetWriter(Is64Bit, OSABI, ELF::EM_AMDGPU,
- HasRelocationAddend) {}
+ HasRelocationAddend, ABIVersion) {}
unsigned AMDGPUELFObjectWriter::getRelocType(MCContext &Ctx,
const MCValue &Target,
@@ -84,7 +85,9 @@ unsigned AMDGPUELFObjectWriter::getRelocType(MCContext &Ctx,
std::unique_ptr<MCObjectTargetWriter>
llvm::createAMDGPUELFObjectWriter(bool Is64Bit, uint8_t OSABI,
- bool HasRelocationAddend) {
+ bool HasRelocationAddend,
+ uint8_t ABIVersion) {
return llvm::make_unique<AMDGPUELFObjectWriter>(Is64Bit, OSABI,
- HasRelocationAddend);
+ HasRelocationAddend,
+ ABIVersion);
}
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.cpp
index c627a08e7463..40437d8fa1a4 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.cpp
@@ -1,9 +1,8 @@
//===-------- AMDGPUELFStreamer.cpp - ELF Object Output -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.h
index 41e9063a759e..9fbf53c944ef 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.h
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.h
@@ -1,9 +1,8 @@
//===-------- AMDGPUELFStreamer.h - ELF Object Output -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUFixupKinds.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUFixupKinds.h
index 20c1adfbc6b9..d49bb196ab3a 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUFixupKinds.h
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUFixupKinds.h
@@ -1,9 +1,8 @@
//===-- AMDGPUFixupKinds.h - AMDGPU Specific Fixup Entries ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index fab0f87dfcbe..01b53432cbb7 100644
--- a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -1,9 +1,8 @@
//===-- AMDGPUInstPrinter.cpp - AMDGPU MC Inst -> ASM ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
// \file
//===----------------------------------------------------------------------===//
@@ -72,11 +71,6 @@ void AMDGPUInstPrinter::printU16ImmDecOperand(const MCInst *MI, unsigned OpNo,
O << formatDec(MI->getOperand(OpNo).getImm() & 0xffff);
}
-void AMDGPUInstPrinter::printS13ImmDecOperand(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- O << formatDec(SignExtend32<13>(MI->getOperand(OpNo).getImm()));
-}
-
void AMDGPUInstPrinter::printU32ImmOperand(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
@@ -123,13 +117,25 @@ void AMDGPUInstPrinter::printOffset(const MCInst *MI, unsigned OpNo,
}
}
-void AMDGPUInstPrinter::printOffsetS13(const MCInst *MI, unsigned OpNo,
- const MCSubtargetInfo &STI,
- raw_ostream &O) {
+void AMDGPUInstPrinter::printFlatOffset(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
uint16_t Imm = MI->getOperand(OpNo).getImm();
if (Imm != 0) {
O << ((OpNo == 0)? "offset:" : " offset:");
- printS13ImmDecOperand(MI, OpNo, O);
+
+ const MCInstrDesc &Desc = MII.get(MI->getOpcode());
+ bool IsFlatSeg = !(Desc.TSFlags & SIInstrFlags::IsNonFlatSeg);
+
+ if (IsFlatSeg) { // Unsigned offset
+ printU16ImmDecOperand(MI, OpNo, O);
+ } else { // Signed offset
+ if (AMDGPU::isGFX10(STI)) {
+ O << formatDec(SignExtend32<12>(MI->getOperand(OpNo).getImm()));
+ } else {
+ O << formatDec(SignExtend32<13>(MI->getOperand(OpNo).getImm()));
+ }
+ }
}
}
@@ -174,6 +180,12 @@ void AMDGPUInstPrinter::printGDS(const MCInst *MI, unsigned OpNo,
printNamedBit(MI, OpNo, O, "gds");
}
+void AMDGPUInstPrinter::printDLC(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O) {
+ if (AMDGPU::isGFX10(STI))
+ printNamedBit(MI, OpNo, O, "dlc");
+}
+
void AMDGPUInstPrinter::printGLC(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {
printNamedBit(MI, OpNo, O, "glc");
@@ -197,6 +209,18 @@ void AMDGPUInstPrinter::printDMask(const MCInst *MI, unsigned OpNo,
}
}
+void AMDGPUInstPrinter::printDim(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O) {
+ unsigned Dim = MI->getOperand(OpNo).getImm();
+ O << " dim:SQ_RSRC_IMG_";
+
+ const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
+ if (DimInfo)
+ O << DimInfo->AsmSuffix;
+ else
+ O << Dim;
+}
+
void AMDGPUInstPrinter::printUNorm(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {
printNamedBit(MI, OpNo, O, "unorm");
@@ -243,140 +267,96 @@ void AMDGPUInstPrinter::printFORMAT(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
if (unsigned Val = MI->getOperand(OpNo).getImm()) {
- O << " dfmt:" << (Val & 15);
- O << ", nfmt:" << (Val >> 4);
+ if (AMDGPU::isGFX10(STI))
+ O << " format:" << Val;
+ else {
+ O << " dfmt:" << (Val & 15);
+ O << ", nfmt:" << (Val >> 4);
+ }
}
}
void AMDGPUInstPrinter::printRegOperand(unsigned RegNo, raw_ostream &O,
const MCRegisterInfo &MRI) {
+#if !defined(NDEBUG)
switch (RegNo) {
- case AMDGPU::VCC:
- O << "vcc";
- return;
- case AMDGPU::SCC:
- O << "scc";
- return;
- case AMDGPU::EXEC:
- O << "exec";
- return;
- case AMDGPU::M0:
- O << "m0";
- return;
- case AMDGPU::FLAT_SCR:
- O << "flat_scratch";
- return;
- case AMDGPU::XNACK_MASK:
- O << "xnack_mask";
- return;
- case AMDGPU::VCC_LO:
- O << "vcc_lo";
- return;
- case AMDGPU::VCC_HI:
- O << "vcc_hi";
- return;
- case AMDGPU::TBA_LO:
- O << "tba_lo";
- return;
- case AMDGPU::TBA_HI:
- O << "tba_hi";
- return;
- case AMDGPU::TMA_LO:
- O << "tma_lo";
- return;
- case AMDGPU::TMA_HI:
- O << "tma_hi";
- return;
- case AMDGPU::EXEC_LO:
- O << "exec_lo";
- return;
- case AMDGPU::EXEC_HI:
- O << "exec_hi";
- return;
- case AMDGPU::FLAT_SCR_LO:
- O << "flat_scratch_lo";
- return;
- case AMDGPU::FLAT_SCR_HI:
- O << "flat_scratch_hi";
- return;
- case AMDGPU::XNACK_MASK_LO:
- O << "xnack_mask_lo";
- return;
- case AMDGPU::XNACK_MASK_HI:
- O << "xnack_mask_hi";
- return;
case AMDGPU::FP_REG:
case AMDGPU::SP_REG:
case AMDGPU::SCRATCH_WAVE_OFFSET_REG:
case AMDGPU::PRIVATE_RSRC_REG:
llvm_unreachable("pseudo-register should not ever be emitted");
+ case AMDGPU::SCC:
+ llvm_unreachable("pseudo scc should not ever be emitted");
default:
break;
}
-
- // The low 8 bits of the encoding value is the register index, for both VGPRs
- // and SGPRs.
- unsigned RegIdx = MRI.getEncodingValue(RegNo) & ((1 << 8) - 1);
-
- unsigned NumRegs;
- if (MRI.getRegClass(AMDGPU::VGPR_32RegClassID).contains(RegNo)) {
- O << 'v';
- NumRegs = 1;
- } else if (MRI.getRegClass(AMDGPU::SGPR_32RegClassID).contains(RegNo)) {
- O << 's';
- NumRegs = 1;
- } else if (MRI.getRegClass(AMDGPU::VReg_64RegClassID).contains(RegNo)) {
- O <<'v';
- NumRegs = 2;
- } else if (MRI.getRegClass(AMDGPU::SGPR_64RegClassID).contains(RegNo)) {
- O << 's';
- NumRegs = 2;
- } else if (MRI.getRegClass(AMDGPU::VReg_128RegClassID).contains(RegNo)) {
- O << 'v';
- NumRegs = 4;
- } else if (MRI.getRegClass(AMDGPU::SGPR_128RegClassID).contains(RegNo)) {
- O << 's';
- NumRegs = 4;
- } else if (MRI.getRegClass(AMDGPU::VReg_96RegClassID).contains(RegNo)) {
- O << 'v';
- NumRegs = 3;
- } else if (MRI.getRegClass(AMDGPU::VReg_256RegClassID).contains(RegNo)) {
- O << 'v';
- NumRegs = 8;
- } else if (MRI.getRegClass(AMDGPU::SGPR_256RegClassID).contains(RegNo)) {
- O << 's';
- NumRegs = 8;
- } else if (MRI.getRegClass(AMDGPU::VReg_512RegClassID).contains(RegNo)) {
- O << 'v';
- NumRegs = 16;
- } else if (MRI.getRegClass(AMDGPU::SGPR_512RegClassID).contains(RegNo)) {
- O << 's';
- NumRegs = 16;
- } else {
- O << getRegisterName(RegNo);
- return;
- }
-
- if (NumRegs == 1) {
- O << RegIdx;
- return;
- }
-
- O << '[' << RegIdx << ':' << (RegIdx + NumRegs - 1) << ']';
+#endif
+
+ unsigned AltName = AMDGPU::Reg32;
+
+ if (MRI.getRegClass(AMDGPU::VReg_64RegClassID).contains(RegNo) ||
+ MRI.getRegClass(AMDGPU::SGPR_64RegClassID).contains(RegNo) ||
+ MRI.getRegClass(AMDGPU::AReg_64RegClassID).contains(RegNo))
+ AltName = AMDGPU::Reg64;
+ else if (MRI.getRegClass(AMDGPU::VReg_128RegClassID).contains(RegNo) ||
+ MRI.getRegClass(AMDGPU::SGPR_128RegClassID).contains(RegNo) ||
+ MRI.getRegClass(AMDGPU::AReg_128RegClassID).contains(RegNo))
+ AltName = AMDGPU::Reg128;
+ else if (MRI.getRegClass(AMDGPU::VReg_96RegClassID).contains(RegNo) ||
+ MRI.getRegClass(AMDGPU::SReg_96RegClassID).contains(RegNo))
+ AltName = AMDGPU::Reg96;
+ else if (MRI.getRegClass(AMDGPU::VReg_160RegClassID).contains(RegNo) ||
+ MRI.getRegClass(AMDGPU::SReg_160RegClassID).contains(RegNo))
+ AltName = AMDGPU::Reg160;
+ else if (MRI.getRegClass(AMDGPU::VReg_256RegClassID).contains(RegNo) ||
+ MRI.getRegClass(AMDGPU::SGPR_256RegClassID).contains(RegNo))
+ AltName = AMDGPU::Reg256;
+ else if (MRI.getRegClass(AMDGPU::VReg_512RegClassID).contains(RegNo) ||
+ MRI.getRegClass(AMDGPU::SGPR_512RegClassID).contains(RegNo) ||
+ MRI.getRegClass(AMDGPU::AReg_512RegClassID).contains(RegNo))
+ AltName = AMDGPU::Reg512;
+ else if (MRI.getRegClass(AMDGPU::VReg_1024RegClassID).contains(RegNo) ||
+ MRI.getRegClass(AMDGPU::SReg_1024RegClassID).contains(RegNo) ||
+ MRI.getRegClass(AMDGPU::AReg_1024RegClassID).contains(RegNo))
+ AltName = AMDGPU::Reg1024;
+
+ O << getRegisterName(RegNo, AltName);
}
void AMDGPUInstPrinter::printVOPDst(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {
- if (MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::VOP3)
- O << "_e64 ";
- else if (MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::DPP)
- O << "_dpp ";
- else if (MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::SDWA)
- O << "_sdwa ";
- else
- O << "_e32 ";
+ if (OpNo == 0) {
+ if (MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::VOP3)
+ O << "_e64 ";
+ else if (MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::DPP)
+ O << "_dpp ";
+ else if (MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::SDWA)
+ O << "_sdwa ";
+ else
+ O << "_e32 ";
+ }
printOperand(MI, OpNo, STI, O);
+
+ // Print default vcc/vcc_lo operand.
+ switch (MI->getOpcode()) {
+ default: break;
+
+ case AMDGPU::V_ADD_CO_CI_U32_e32_gfx10:
+ case AMDGPU::V_SUB_CO_CI_U32_e32_gfx10:
+ case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
+ case AMDGPU::V_ADD_CO_CI_U32_sdwa_gfx10:
+ case AMDGPU::V_SUB_CO_CI_U32_sdwa_gfx10:
+ case AMDGPU::V_SUBREV_CO_CI_U32_sdwa_gfx10:
+ case AMDGPU::V_ADD_CO_CI_U32_dpp_gfx10:
+ case AMDGPU::V_SUB_CO_CI_U32_dpp_gfx10:
+ case AMDGPU::V_SUBREV_CO_CI_U32_dpp_gfx10:
+ case AMDGPU::V_ADD_CO_CI_U32_dpp8_gfx10:
+ case AMDGPU::V_SUB_CO_CI_U32_dpp8_gfx10:
+ case AMDGPU::V_SUBREV_CO_CI_U32_dpp8_gfx10:
+ printDefaultVccOperand(1, STI, O);
+ break;
+ }
}
void AMDGPUInstPrinter::printVINTRPDst(const MCInst *MI, unsigned OpNo,
@@ -491,7 +471,7 @@ void AMDGPUInstPrinter::printImmediate64(uint64_t Imm,
O << "-4.0";
else if (Imm == 0x3fc45f306dc9c882 &&
STI.getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm])
- O << "0.15915494";
+ O << "0.15915494309189532";
else {
assert(isUInt<32>(Imm) || Imm == 0x3fc45f306dc9c882);
@@ -501,9 +481,57 @@ void AMDGPUInstPrinter::printImmediate64(uint64_t Imm,
}
}
+void AMDGPUInstPrinter::printBLGP(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ unsigned Imm = MI->getOperand(OpNo).getImm();
+ if (!Imm)
+ return;
+
+ O << " blgp:" << Imm;
+}
+
+void AMDGPUInstPrinter::printCBSZ(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ unsigned Imm = MI->getOperand(OpNo).getImm();
+ if (!Imm)
+ return;
+
+ O << " cbsz:" << Imm;
+}
+
+void AMDGPUInstPrinter::printABID(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ unsigned Imm = MI->getOperand(OpNo).getImm();
+ if (!Imm)
+ return;
+
+ O << " abid:" << Imm;
+}
+
+void AMDGPUInstPrinter::printDefaultVccOperand(unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ if (OpNo > 0)
+ O << ", ";
+ printRegOperand(STI.getFeatureBits()[AMDGPU::FeatureWavefrontSize64] ?
+ AMDGPU::VCC : AMDGPU::VCC_LO, O, MRI);
+ if (OpNo == 0)
+ O << ", ";
+}
+
void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
+ // Print default vcc/vcc_lo operand of VOPC.
+ const MCInstrDesc &Desc = MII.get(MI->getOpcode());
+ if (OpNo == 0 && (Desc.TSFlags & SIInstrFlags::VOPC) &&
+ (Desc.hasImplicitDefOfPhysReg(AMDGPU::VCC) ||
+ Desc.hasImplicitDefOfPhysReg(AMDGPU::VCC_LO)))
+ printDefaultVccOperand(OpNo, STI, O);
+
if (OpNo >= MI->getNumOperands()) {
O << "/*Missing OP" << OpNo << "*/";
return;
@@ -513,12 +541,13 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
if (Op.isReg()) {
printRegOperand(Op.getReg(), O, MRI);
} else if (Op.isImm()) {
- const MCInstrDesc &Desc = MII.get(MI->getOpcode());
switch (Desc.OpInfo[OpNo].OperandType) {
case AMDGPU::OPERAND_REG_IMM_INT32:
case AMDGPU::OPERAND_REG_IMM_FP32:
case AMDGPU::OPERAND_REG_INLINE_C_INT32:
case AMDGPU::OPERAND_REG_INLINE_C_FP32:
+ case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
+ case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
case MCOI::OPERAND_IMMEDIATE:
printImmediate32(Op.getImm(), STI, O);
break;
@@ -530,12 +559,24 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
break;
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
+ case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
+ case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
case AMDGPU::OPERAND_REG_IMM_INT16:
case AMDGPU::OPERAND_REG_IMM_FP16:
printImmediate16(Op.getImm(), STI, O);
break;
+ case AMDGPU::OPERAND_REG_IMM_V2INT16:
+ case AMDGPU::OPERAND_REG_IMM_V2FP16:
+ if (!isUInt<16>(Op.getImm()) &&
+ STI.getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
+ printImmediate32(Op.getImm(), STI, O);
+ break;
+ }
+ LLVM_FALLTHROUGH;
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
+ case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
+ case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
printImmediateV216(Op.getImm(), STI, O);
break;
case MCOI::OPERAND_UNKNOWN:
@@ -573,6 +614,29 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
} else {
O << "/*INV_OP*/";
}
+
+ // Print default vcc/vcc_lo operand of v_cndmask_b32_e32.
+ switch (MI->getOpcode()) {
+ default: break;
+
+ case AMDGPU::V_CNDMASK_B32_e32_gfx10:
+ case AMDGPU::V_ADD_CO_CI_U32_e32_gfx10:
+ case AMDGPU::V_SUB_CO_CI_U32_e32_gfx10:
+ case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
+ case AMDGPU::V_ADD_CO_CI_U32_dpp_gfx10:
+ case AMDGPU::V_SUB_CO_CI_U32_dpp_gfx10:
+ case AMDGPU::V_SUBREV_CO_CI_U32_dpp_gfx10:
+ case AMDGPU::V_ADD_CO_CI_U32_dpp8_gfx10:
+ case AMDGPU::V_SUB_CO_CI_U32_dpp8_gfx10:
+ case AMDGPU::V_SUBREV_CO_CI_U32_dpp8_gfx10:
+
+ case AMDGPU::V_CNDMASK_B32_e32_gfx6_gfx7:
+ case AMDGPU::V_CNDMASK_B32_e32_vi:
+ if ((int)OpNo == AMDGPU::getNamedOperandIdx(MI->getOpcode(),
+ AMDGPU::OpName::src1))
+ printDefaultVccOperand(OpNo, STI, O);
+ break;
+ }
}
void AMDGPUInstPrinter::printOperandAndFPInputMods(const MCInst *MI,
@@ -620,6 +684,33 @@ void AMDGPUInstPrinter::printOperandAndIntInputMods(const MCInst *MI,
printOperand(MI, OpNo + 1, STI, O);
if (InputModifiers & SISrcMods::SEXT)
O << ')';
+
+ // Print default vcc/vcc_lo operand of VOP2b.
+ switch (MI->getOpcode()) {
+ default: break;
+
+ case AMDGPU::V_ADD_CO_CI_U32_sdwa_gfx10:
+ case AMDGPU::V_SUB_CO_CI_U32_sdwa_gfx10:
+ case AMDGPU::V_SUBREV_CO_CI_U32_sdwa_gfx10:
+ if ((int)OpNo + 1 == AMDGPU::getNamedOperandIdx(MI->getOpcode(),
+ AMDGPU::OpName::src1))
+ printDefaultVccOperand(OpNo, STI, O);
+ break;
+ }
+}
+
+void AMDGPUInstPrinter::printDPP8(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ if (!AMDGPU::isGFX10(STI))
+ llvm_unreachable("dpp8 is not supported on ASICs earlier than GFX10");
+
+ unsigned Imm = MI->getOperand(OpNo).getImm();
+ O << " dpp8:[" << formatDec(Imm & 0x7);
+ for (size_t i = 1; i < 8; ++i) {
+ O << ',' << formatDec((Imm >> (3 * i)) & 0x7);
+ }
+ O << ']';
}
void AMDGPUInstPrinter::printDPPCtrl(const MCInst *MI, unsigned OpNo,
@@ -647,21 +738,61 @@ void AMDGPUInstPrinter::printDPPCtrl(const MCInst *MI, unsigned OpNo,
O << " row_ror:";
printU4ImmDecOperand(MI, OpNo, O);
} else if (Imm == DppCtrl::WAVE_SHL1) {
+ if (!AMDGPU::isVI(STI) && !AMDGPU::isGFX9(STI)) {
+ O << " /* wave_shl is not supported starting from GFX10 */";
+ return;
+ }
O << " wave_shl:1";
} else if (Imm == DppCtrl::WAVE_ROL1) {
+ if (!AMDGPU::isVI(STI) && !AMDGPU::isGFX9(STI)) {
+ O << " /* wave_rol is not supported starting from GFX10 */";
+ return;
+ }
O << " wave_rol:1";
} else if (Imm == DppCtrl::WAVE_SHR1) {
+ if (!AMDGPU::isVI(STI) && !AMDGPU::isGFX9(STI)) {
+ O << " /* wave_shr is not supported starting from GFX10 */";
+ return;
+ }
O << " wave_shr:1";
} else if (Imm == DppCtrl::WAVE_ROR1) {
+ if (!AMDGPU::isVI(STI) && !AMDGPU::isGFX9(STI)) {
+ O << " /* wave_ror is not supported starting from GFX10 */";
+ return;
+ }
O << " wave_ror:1";
} else if (Imm == DppCtrl::ROW_MIRROR) {
O << " row_mirror";
} else if (Imm == DppCtrl::ROW_HALF_MIRROR) {
O << " row_half_mirror";
} else if (Imm == DppCtrl::BCAST15) {
+ if (!AMDGPU::isVI(STI) && !AMDGPU::isGFX9(STI)) {
+ O << " /* row_bcast is not supported starting from GFX10 */";
+ return;
+ }
O << " row_bcast:15";
} else if (Imm == DppCtrl::BCAST31) {
+ if (!AMDGPU::isVI(STI) && !AMDGPU::isGFX9(STI)) {
+ O << " /* row_bcast is not supported starting from GFX10 */";
+ return;
+ }
O << " row_bcast:31";
+ } else if ((Imm >= DppCtrl::ROW_SHARE_FIRST) &&
+ (Imm <= DppCtrl::ROW_SHARE_LAST)) {
+ if (!AMDGPU::isGFX10(STI)) {
+ O << " /* row_share is not supported on ASICs earlier than GFX10 */";
+ return;
+ }
+ O << " row_share:";
+ printU4ImmDecOperand(MI, OpNo, O);
+ } else if ((Imm >= DppCtrl::ROW_XMASK_FIRST) &&
+ (Imm <= DppCtrl::ROW_XMASK_LAST)) {
+ if (!AMDGPU::isGFX10(STI)) {
+ O << " /* row_xmask is not supported on ASICs earlier than GFX10 */";
+ return;
+ }
+ O << "row_xmask:";
+ printU4ImmDecOperand(MI, OpNo, O);
} else {
O << " /* Invalid dpp_ctrl value */";
}
@@ -690,6 +821,16 @@ void AMDGPUInstPrinter::printBoundCtrl(const MCInst *MI, unsigned OpNo,
}
}
+void AMDGPUInstPrinter::printFI(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ using namespace llvm::AMDGPU::DPP;
+ unsigned Imm = MI->getOperand(OpNo).getImm();
+ if (Imm == DPP_FI_1 || Imm == DPP8_FI_1) {
+ O << " fi:1";
+ }
+}
+
void AMDGPUInstPrinter::printSDWASel(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
using namespace llvm::AMDGPU::SDWA;
@@ -803,8 +944,10 @@ void AMDGPUInstPrinter::printExpTgt(const MCInst *MI, unsigned OpNo,
O << " mrtz";
else if (Tgt == 9)
O << " null";
- else if (Tgt >= 12 && Tgt <= 15)
+ else if ((Tgt >= 12 && Tgt <= 15) || (Tgt == 16 && AMDGPU::isGFX10(STI)))
O << " pos" << Tgt - 12;
+ else if (AMDGPU::isGFX10(STI) && Tgt == 20)
+ O << " prim";
else if (Tgt >= 32 && Tgt <= 63)
O << " param" << Tgt - 32;
else {
@@ -875,6 +1018,18 @@ void AMDGPUInstPrinter::printPackedModifier(const MCInst *MI,
void AMDGPUInstPrinter::printOpSel(const MCInst *MI, unsigned,
const MCSubtargetInfo &STI,
raw_ostream &O) {
+ unsigned Opc = MI->getOpcode();
+ if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
+ Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
+ auto FIN = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
+ auto BCN = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1_modifiers);
+ unsigned FI = !!(MI->getOperand(FIN).getImm() & SISrcMods::OP_SEL_0);
+ unsigned BC = !!(MI->getOperand(BCN).getImm() & SISrcMods::OP_SEL_0);
+ if (FI || BC)
+ O << " op_sel:[" << FI << ',' << BC << ']';
+ return;
+ }
+
printPackedModifier(MI, " op_sel:[", SISrcMods::OP_SEL_0, O);
}
@@ -932,23 +1087,24 @@ void AMDGPUInstPrinter::printInterpAttrChan(const MCInst *MI, unsigned OpNum,
void AMDGPUInstPrinter::printVGPRIndexMode(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
+ using namespace llvm::AMDGPU::VGPRIndexMode;
unsigned Val = MI->getOperand(OpNo).getImm();
- if (Val == 0) {
- O << " 0";
- return;
- }
-
- if (Val & VGPRIndexMode::DST_ENABLE)
- O << " dst";
-
- if (Val & VGPRIndexMode::SRC0_ENABLE)
- O << " src0";
- if (Val & VGPRIndexMode::SRC1_ENABLE)
- O << " src1";
-
- if (Val & VGPRIndexMode::SRC2_ENABLE)
- O << " src2";
+ if ((Val & ~ENABLE_MASK) != 0) {
+ O << " " << formatHex(static_cast<uint64_t>(Val));
+ } else {
+ O << " gpr_idx(";
+ bool NeedComma = false;
+ for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
+ if (Val & (1 << ModeId)) {
+ if (NeedComma)
+ O << ',';
+ O << IdSymbolic[ModeId];
+ NeedComma = true;
+ }
+ }
+ O << ')';
+ }
}
void AMDGPUInstPrinter::printMemOperand(const MCInst *MI, unsigned OpNo,
@@ -1010,40 +1166,29 @@ void AMDGPUInstPrinter::printSendMsg(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
using namespace llvm::AMDGPU::SendMsg;
- const unsigned SImm16 = MI->getOperand(OpNo).getImm();
- const unsigned Id = SImm16 & ID_MASK_;
- do {
- if (Id == ID_INTERRUPT) {
- if ((SImm16 & ~ID_MASK_) != 0) // Unused/unknown bits must be 0.
- break;
- O << "sendmsg(" << IdSymbolic[Id] << ')';
- return;
- }
- if (Id == ID_GS || Id == ID_GS_DONE) {
- if ((SImm16 & ~(ID_MASK_|OP_GS_MASK_|STREAM_ID_MASK_)) != 0) // Unused/unknown bits must be 0.
- break;
- const unsigned OpGs = (SImm16 & OP_GS_MASK_) >> OP_SHIFT_;
- const unsigned StreamId = (SImm16 & STREAM_ID_MASK_) >> STREAM_ID_SHIFT_;
- if (OpGs == OP_GS_NOP && Id != ID_GS_DONE) // NOP to be used for GS_DONE only.
- break;
- if (OpGs == OP_GS_NOP && StreamId != 0) // NOP does not use/define stream id bits.
- break;
- O << "sendmsg(" << IdSymbolic[Id] << ", " << OpGsSymbolic[OpGs];
- if (OpGs != OP_GS_NOP) { O << ", " << StreamId; }
- O << ')';
- return;
- }
- if (Id == ID_SYSMSG) {
- if ((SImm16 & ~(ID_MASK_|OP_SYS_MASK_)) != 0) // Unused/unknown bits must be 0.
- break;
- const unsigned OpSys = (SImm16 & OP_SYS_MASK_) >> OP_SHIFT_;
- if (! (OP_SYS_FIRST_ <= OpSys && OpSys < OP_SYS_LAST_)) // Unused/unknown.
- break;
- O << "sendmsg(" << IdSymbolic[Id] << ", " << OpSysSymbolic[OpSys] << ')';
- return;
+ const unsigned Imm16 = MI->getOperand(OpNo).getImm();
+
+ uint16_t MsgId;
+ uint16_t OpId;
+ uint16_t StreamId;
+ decodeMsg(Imm16, MsgId, OpId, StreamId);
+
+ if (isValidMsgId(MsgId, STI) &&
+ isValidMsgOp(MsgId, OpId) &&
+ isValidMsgStream(MsgId, OpId, StreamId)) {
+ O << "sendmsg(" << getMsgName(MsgId);
+ if (msgRequiresOp(MsgId)) {
+ O << ", " << getMsgOpName(MsgId, OpId);
+ if (msgSupportsStream(MsgId, OpId)) {
+ O << ", " << StreamId;
+ }
}
- } while (false);
- O << SImm16; // Unknown simm16 code.
+ O << ')';
+ } else if (encodeMsg(MsgId, OpId, StreamId) == Imm16) {
+ O << "sendmsg(" << MsgId << ", " << OpId << ", " << StreamId << ')';
+ } else {
+ O << Imm16; // Unknown imm16 code.
+ }
}
static void printSwizzleBitmask(const uint16_t AndMask,
@@ -1094,7 +1239,7 @@ void AMDGPUInstPrinter::printSwizzle(const MCInst *MI, unsigned OpNo,
if ((Imm & QUAD_PERM_ENC_MASK) == QUAD_PERM_ENC) {
O << "swizzle(" << IdSymbolic[ID_QUAD_PERM];
- for (auto i = 0; i < LANE_NUM; ++i) {
+ for (unsigned I = 0; I < LANE_NUM; ++I) {
O << ",";
O << formatDec(Imm & LANE_MASK);
Imm >>= LANE_SHIFT;
@@ -1184,32 +1329,42 @@ void AMDGPUInstPrinter::printWaitFlag(const MCInst *MI, unsigned OpNo,
void AMDGPUInstPrinter::printHwreg(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {
- using namespace llvm::AMDGPU::Hwreg;
+ unsigned Id;
+ unsigned Offset;
+ unsigned Width;
- unsigned SImm16 = MI->getOperand(OpNo).getImm();
- const unsigned Id = (SImm16 & ID_MASK_) >> ID_SHIFT_;
- const unsigned Offset = (SImm16 & OFFSET_MASK_) >> OFFSET_SHIFT_;
- const unsigned Width = ((SImm16 & WIDTH_M1_MASK_) >> WIDTH_M1_SHIFT_) + 1;
+ using namespace llvm::AMDGPU::Hwreg;
+ unsigned Val = MI->getOperand(OpNo).getImm();
+ decodeHwreg(Val, Id, Offset, Width);
+ StringRef HwRegName = getHwreg(Id, STI);
O << "hwreg(";
- unsigned Last = ID_SYMBOLIC_LAST_;
- if (AMDGPU::isSI(STI) || AMDGPU::isCI(STI) || AMDGPU::isVI(STI))
- Last = ID_SYMBOLIC_FIRST_GFX9_;
- if (ID_SYMBOLIC_FIRST_ <= Id && Id < Last && IdSymbolic[Id]) {
- O << IdSymbolic[Id];
+ if (!HwRegName.empty()) {
+ O << HwRegName;
} else {
O << Id;
}
- if (Width != WIDTH_M1_DEFAULT_ + 1 || Offset != OFFSET_DEFAULT_) {
+ if (Width != WIDTH_DEFAULT_ || Offset != OFFSET_DEFAULT_) {
O << ", " << Offset << ", " << Width;
}
O << ')';
}
+void AMDGPUInstPrinter::printEndpgm(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ uint16_t Imm = MI->getOperand(OpNo).getImm();
+ if (Imm == 0) {
+ return;
+ }
+
+ O << ' ' << formatDec(Imm);
+}
+
#include "AMDGPUGenAsmWriter.inc"
void R600InstPrinter::printInst(const MCInst *MI, raw_ostream &O,
- StringRef Annot, const MCSubtargetInfo &STI) {
+ StringRef Annot, const MCSubtargetInfo &STI) {
O.flush();
printInstruction(MI, O);
printAnnotation(O, Annot);
diff --git a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
index 0ba74ca0f3e1..b544d1ef3605 100644
--- a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
@@ -1,18 +1,18 @@
//===-- AMDGPUInstPrinter.h - AMDGPU MC Inst -> ASM interface ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
/// \file
//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIB_TARGET_AMDGPU_INSTPRINTER_AMDGPUINSTPRINTER_H
-#define LLVM_LIB_TARGET_AMDGPU_INSTPRINTER_AMDGPUINSTPRINTER_H
+#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUINSTPRINTER_H
+#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUINSTPRINTER_H
+#include "AMDGPUMCTargetDesc.h"
#include "llvm/MC/MCInstPrinter.h"
namespace llvm {
@@ -26,7 +26,8 @@ public:
//Autogenerated by tblgen
void printInstruction(const MCInst *MI, const MCSubtargetInfo &STI,
raw_ostream &O);
- static const char *getRegisterName(unsigned RegNo);
+ static const char *getRegisterName(unsigned RegNo,
+ unsigned AltIdx = AMDGPU::NoRegAltName);
void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
const MCSubtargetInfo &STI) override;
@@ -42,7 +43,6 @@ private:
void printU4ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printU8ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printU16ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printS13ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printU32ImmOperand(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
void printNamedBit(const MCInst *MI, unsigned OpNo, raw_ostream &O,
@@ -53,8 +53,8 @@ private:
void printMBUFOffset(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printOffset(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
- void printOffsetS13(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
- raw_ostream &O);
+ void printFlatOffset(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+ raw_ostream &O);
void printOffset0(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
@@ -68,6 +68,8 @@ private:
const MCSubtargetInfo &STI, raw_ostream &O);
void printGDS(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
+ void printDLC(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+ raw_ostream &O);
void printGLC(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
void printSLC(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
@@ -76,6 +78,8 @@ private:
raw_ostream &O);
void printDMask(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
+ void printDim(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+ raw_ostream &O);
void printUNorm(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
void printDA(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
@@ -112,6 +116,8 @@ private:
const MCSubtargetInfo &STI, raw_ostream &O);
void printOperandAndIntInputMods(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
+ void printDPP8(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+ raw_ostream &O);
void printDPPCtrl(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
void printRowMask(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
@@ -120,6 +126,8 @@ private:
const MCSubtargetInfo &STI, raw_ostream &O);
void printBoundCtrl(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
+ void printFI(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
void printSDWASel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printSDWADstSel(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
@@ -150,6 +158,14 @@ private:
const MCSubtargetInfo &STI, raw_ostream &O);
void printMemOperand(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
+ void printBLGP(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+ raw_ostream &O);
+ void printCBSZ(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+ raw_ostream &O);
+ void printABID(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+ raw_ostream &O);
+ void printDefaultVccOperand(unsigned OpNo, const MCSubtargetInfo &STI,
+ raw_ostream &O);
template <unsigned N>
@@ -214,6 +230,8 @@ protected:
const MCSubtargetInfo &STI, raw_ostream &O);
void printHwreg(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
+ void printEndpgm(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+ raw_ostream &O);
};
class R600InstPrinter : public MCInstPrinter {
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
index 2364e7b7b5fb..9e04ab9bae93 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
@@ -1,15 +1,16 @@
//===-- MCTargetDesc/AMDGPUMCAsmInfo.cpp - Assembly Info ------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
/// \file
//===----------------------------------------------------------------------===//
#include "AMDGPUMCAsmInfo.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
using namespace llvm;
@@ -19,7 +20,10 @@ AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Triple &TT) : MCAsmInfoELF() {
HasSingleParameterDotFile = false;
//===------------------------------------------------------------------===//
MinInstAlignment = 4;
- MaxInstLength = (TT.getArch() == Triple::amdgcn) ? 8 : 16;
+
+ // This is the maximum instruction encoded size for gfx10. With a known
+ // subtarget, it can be reduced to 8 bytes.
+ MaxInstLength = (TT.getArch() == Triple::amdgcn) ? 20 : 16;
SeparatorString = "\n";
CommentString = ";";
PrivateLabelPrefix = "";
@@ -45,3 +49,18 @@ bool AMDGPUMCAsmInfo::shouldOmitSectionDirective(StringRef SectionName) const {
SectionName == ".hsarodata_readonly_agent" ||
MCAsmInfo::shouldOmitSectionDirective(SectionName);
}
+
+unsigned AMDGPUMCAsmInfo::getMaxInstLength(const MCSubtargetInfo *STI) const {
+ if (!STI || STI->getTargetTriple().getArch() == Triple::r600)
+ return MaxInstLength;
+
+ // Maximum for NSA encoded images
+ if (STI->getFeatureBits()[AMDGPU::FeatureNSAEncoding])
+ return 20;
+
+ // 64-bit instruction with 32-bit literal.
+ if (STI->getFeatureBits()[AMDGPU::FeatureVOP3Literal])
+ return 12;
+
+ return 8;
+}
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.h
index 8cb33a3179cd..71e63ec27a8f 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.h
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.h
@@ -1,9 +1,8 @@
//===-- MCTargetDesc/AMDGPUMCAsmInfo.h - AMDGPU MCAsm Interface -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -28,6 +27,7 @@ class AMDGPUMCAsmInfo : public MCAsmInfoELF {
public:
explicit AMDGPUMCAsmInfo(const Triple &TT);
bool shouldOmitSectionDirective(StringRef SectionName) const override;
+ unsigned getMaxInstLength(const MCSubtargetInfo *STI) const override;
};
} // namespace llvm
#endif
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
index cae7a7a6c7e7..f3d945cc0764 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
@@ -1,9 +1,8 @@
//===-- AMDGPUCodeEmitter.cpp - AMDGPU Code Emitter interface -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h
index dcc10a032afe..62757a707890 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h
@@ -1,9 +1,8 @@
//===-- AMDGPUCodeEmitter.h - AMDGPU Code Emitter interface -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -64,10 +63,17 @@ public:
return 0;
}
+ virtual unsigned getAVOperandEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ return 0;
+ }
+
protected:
- uint64_t computeAvailableFeatures(const FeatureBitset &FB) const;
- void verifyInstructionPredicates(const MCInst &MI,
- uint64_t AvailableFeatures) const;
+ FeatureBitset computeAvailableFeatures(const FeatureBitset &FB) const;
+ void
+ verifyInstructionPredicates(const MCInst &MI,
+ const FeatureBitset &AvailableFeatures) const;
};
} // End namespace llvm
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
index c579c7d60e16..88df64d18cc5 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
@@ -1,9 +1,8 @@
//===-- AMDGPUMCTargetDesc.cpp - AMDGPU Target Descriptions ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -14,13 +13,15 @@
#include "AMDGPUMCTargetDesc.h"
#include "AMDGPUELFStreamer.h"
+#include "AMDGPUInstPrinter.h"
#include "AMDGPUMCAsmInfo.h"
#include "AMDGPUTargetStreamer.h"
-#include "InstPrinter/AMDGPUInstPrinter.h"
#include "SIDefines.h"
+#include "TargetInfo/AMDGPUTargetInfo.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInstrAnalysis.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCRegisterInfo.h"
@@ -104,6 +105,35 @@ static MCStreamer *createMCStreamer(const Triple &T, MCContext &Context,
std::move(Emitter), RelaxAll);
}
+namespace {
+
+class AMDGPUMCInstrAnalysis : public MCInstrAnalysis {
+public:
+ explicit AMDGPUMCInstrAnalysis(const MCInstrInfo *Info)
+ : MCInstrAnalysis(Info) {}
+
+ bool evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size,
+ uint64_t &Target) const override {
+ if (Inst.getNumOperands() == 0 || !Inst.getOperand(0).isImm() ||
+ Info->get(Inst.getOpcode()).OpInfo[0].OperandType !=
+ MCOI::OPERAND_PCREL)
+ return false;
+
+ int64_t Imm = Inst.getOperand(0).getImm();
+ // Our branches take a simm16, but we need two extra bits to account for
+ // the factor of 4.
+ APInt SignedOffset(18, Imm * 4, true);
+ Target = (SignedOffset.sext(64) + Addr + Size).getZExtValue();
+ return true;
+ }
+};
+
+} // end anonymous namespace
+
+static MCInstrAnalysis *createAMDGPUMCInstrAnalysis(const MCInstrInfo *Info) {
+ return new AMDGPUMCInstrAnalysis(Info);
+}
+
extern "C" void LLVMInitializeAMDGPUTargetMC() {
TargetRegistry::RegisterMCInstrInfo(getTheGCNTarget(), createAMDGPUMCInstrInfo);
@@ -114,6 +144,7 @@ extern "C" void LLVMInitializeAMDGPUTargetMC() {
TargetRegistry::RegisterMCRegInfo(*T, createAMDGPUMCRegisterInfo);
TargetRegistry::RegisterMCSubtargetInfo(*T, createAMDGPUMCSubtargetInfo);
TargetRegistry::RegisterMCInstPrinter(*T, createAMDGPUMCInstPrinter);
+ TargetRegistry::RegisterMCInstrAnalysis(*T, createAMDGPUMCInstrAnalysis);
TargetRegistry::RegisterMCAsmBackend(*T, createAMDGPUAsmBackend);
TargetRegistry::RegisterELFStreamer(*T, createMCStreamer);
}
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
index f3628d96d6e9..9754d31fee60 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
@@ -1,9 +1,8 @@
//===-- AMDGPUMCTargetDesc.h - AMDGPU Target Descriptions -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -34,9 +33,6 @@ class Target;
class Triple;
class raw_pwrite_stream;
-Target &getTheAMDGPUTarget();
-Target &getTheGCNTarget();
-
MCCodeEmitter *createR600MCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
MCContext &Ctx);
@@ -53,7 +49,7 @@ MCAsmBackend *createAMDGPUAsmBackend(const Target &T,
std::unique_ptr<MCObjectTargetWriter>
createAMDGPUELFObjectWriter(bool Is64Bit, uint8_t OSABI,
- bool HasRelocationAddend);
+ bool HasRelocationAddend, uint8_t ABIVersion);
} // End llvm namespace
#define GET_REGINFO_ENUM
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index c17fe126546c..8f11433476f4 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -1,9 +1,8 @@
//===-- AMDGPUTargetStreamer.cpp - Mips Target Streamer Methods -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -19,7 +18,6 @@
#include "llvm/ADT/Twine.h"
#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
#include "llvm/BinaryFormat/ELF.h"
-#include "llvm/BinaryFormat/MsgPackTypes.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Metadata.h"
@@ -52,51 +50,53 @@ bool AMDGPUTargetStreamer::EmitHSAMetadataV2(StringRef HSAMetadataString) {
}
bool AMDGPUTargetStreamer::EmitHSAMetadataV3(StringRef HSAMetadataString) {
- std::shared_ptr<msgpack::Node> HSAMetadataRoot;
- yaml::Input YIn(HSAMetadataString);
- YIn >> HSAMetadataRoot;
- if (YIn.error())
+ msgpack::Document HSAMetadataDoc;
+ if (!HSAMetadataDoc.fromYAML(HSAMetadataString))
return false;
- return EmitHSAMetadata(HSAMetadataRoot, false);
+ return EmitHSAMetadata(HSAMetadataDoc, false);
}
StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) {
AMDGPU::GPUKind AK;
switch (ElfMach) {
- case ELF::EF_AMDGPU_MACH_R600_R600: AK = GK_R600; break;
- case ELF::EF_AMDGPU_MACH_R600_R630: AK = GK_R630; break;
- case ELF::EF_AMDGPU_MACH_R600_RS880: AK = GK_RS880; break;
- case ELF::EF_AMDGPU_MACH_R600_RV670: AK = GK_RV670; break;
- case ELF::EF_AMDGPU_MACH_R600_RV710: AK = GK_RV710; break;
- case ELF::EF_AMDGPU_MACH_R600_RV730: AK = GK_RV730; break;
- case ELF::EF_AMDGPU_MACH_R600_RV770: AK = GK_RV770; break;
- case ELF::EF_AMDGPU_MACH_R600_CEDAR: AK = GK_CEDAR; break;
- case ELF::EF_AMDGPU_MACH_R600_CYPRESS: AK = GK_CYPRESS; break;
- case ELF::EF_AMDGPU_MACH_R600_JUNIPER: AK = GK_JUNIPER; break;
- case ELF::EF_AMDGPU_MACH_R600_REDWOOD: AK = GK_REDWOOD; break;
- case ELF::EF_AMDGPU_MACH_R600_SUMO: AK = GK_SUMO; break;
- case ELF::EF_AMDGPU_MACH_R600_BARTS: AK = GK_BARTS; break;
- case ELF::EF_AMDGPU_MACH_R600_CAICOS: AK = GK_CAICOS; break;
- case ELF::EF_AMDGPU_MACH_R600_CAYMAN: AK = GK_CAYMAN; break;
- case ELF::EF_AMDGPU_MACH_R600_TURKS: AK = GK_TURKS; break;
- case ELF::EF_AMDGPU_MACH_AMDGCN_GFX600: AK = GK_GFX600; break;
- case ELF::EF_AMDGPU_MACH_AMDGCN_GFX601: AK = GK_GFX601; break;
- case ELF::EF_AMDGPU_MACH_AMDGCN_GFX700: AK = GK_GFX700; break;
- case ELF::EF_AMDGPU_MACH_AMDGCN_GFX701: AK = GK_GFX701; break;
- case ELF::EF_AMDGPU_MACH_AMDGCN_GFX702: AK = GK_GFX702; break;
- case ELF::EF_AMDGPU_MACH_AMDGCN_GFX703: AK = GK_GFX703; break;
- case ELF::EF_AMDGPU_MACH_AMDGCN_GFX704: AK = GK_GFX704; break;
- case ELF::EF_AMDGPU_MACH_AMDGCN_GFX801: AK = GK_GFX801; break;
- case ELF::EF_AMDGPU_MACH_AMDGCN_GFX802: AK = GK_GFX802; break;
- case ELF::EF_AMDGPU_MACH_AMDGCN_GFX803: AK = GK_GFX803; break;
- case ELF::EF_AMDGPU_MACH_AMDGCN_GFX810: AK = GK_GFX810; break;
- case ELF::EF_AMDGPU_MACH_AMDGCN_GFX900: AK = GK_GFX900; break;
- case ELF::EF_AMDGPU_MACH_AMDGCN_GFX902: AK = GK_GFX902; break;
- case ELF::EF_AMDGPU_MACH_AMDGCN_GFX904: AK = GK_GFX904; break;
- case ELF::EF_AMDGPU_MACH_AMDGCN_GFX906: AK = GK_GFX906; break;
- case ELF::EF_AMDGPU_MACH_AMDGCN_GFX909: AK = GK_GFX909; break;
- case ELF::EF_AMDGPU_MACH_NONE: AK = GK_NONE; break;
+ case ELF::EF_AMDGPU_MACH_R600_R600: AK = GK_R600; break;
+ case ELF::EF_AMDGPU_MACH_R600_R630: AK = GK_R630; break;
+ case ELF::EF_AMDGPU_MACH_R600_RS880: AK = GK_RS880; break;
+ case ELF::EF_AMDGPU_MACH_R600_RV670: AK = GK_RV670; break;
+ case ELF::EF_AMDGPU_MACH_R600_RV710: AK = GK_RV710; break;
+ case ELF::EF_AMDGPU_MACH_R600_RV730: AK = GK_RV730; break;
+ case ELF::EF_AMDGPU_MACH_R600_RV770: AK = GK_RV770; break;
+ case ELF::EF_AMDGPU_MACH_R600_CEDAR: AK = GK_CEDAR; break;
+ case ELF::EF_AMDGPU_MACH_R600_CYPRESS: AK = GK_CYPRESS; break;
+ case ELF::EF_AMDGPU_MACH_R600_JUNIPER: AK = GK_JUNIPER; break;
+ case ELF::EF_AMDGPU_MACH_R600_REDWOOD: AK = GK_REDWOOD; break;
+ case ELF::EF_AMDGPU_MACH_R600_SUMO: AK = GK_SUMO; break;
+ case ELF::EF_AMDGPU_MACH_R600_BARTS: AK = GK_BARTS; break;
+ case ELF::EF_AMDGPU_MACH_R600_CAICOS: AK = GK_CAICOS; break;
+ case ELF::EF_AMDGPU_MACH_R600_CAYMAN: AK = GK_CAYMAN; break;
+ case ELF::EF_AMDGPU_MACH_R600_TURKS: AK = GK_TURKS; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX600: AK = GK_GFX600; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX601: AK = GK_GFX601; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX700: AK = GK_GFX700; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX701: AK = GK_GFX701; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX702: AK = GK_GFX702; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX703: AK = GK_GFX703; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX704: AK = GK_GFX704; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX801: AK = GK_GFX801; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX802: AK = GK_GFX802; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX803: AK = GK_GFX803; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX810: AK = GK_GFX810; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX900: AK = GK_GFX900; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX902: AK = GK_GFX902; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX904: AK = GK_GFX904; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX906: AK = GK_GFX906; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX908: AK = GK_GFX908; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX909: AK = GK_GFX909; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010: AK = GK_GFX1010; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011: AK = GK_GFX1011; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012: AK = GK_GFX1012; break;
+ case ELF::EF_AMDGPU_MACH_NONE: AK = GK_NONE; break;
}
StringRef GPUName = getArchNameAMDGCN(AK);
@@ -142,7 +142,11 @@ unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) {
case GK_GFX902: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX902;
case GK_GFX904: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX904;
case GK_GFX906: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX906;
+ case GK_GFX908: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX908;
case GK_GFX909: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX909;
+ case GK_GFX1010: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010;
+ case GK_GFX1011: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011;
+ case GK_GFX1012: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012;
case GK_NONE: return ELF::EF_AMDGPU_MACH_NONE;
}
@@ -157,6 +161,14 @@ AMDGPUTargetAsmStreamer::AMDGPUTargetAsmStreamer(MCStreamer &S,
formatted_raw_ostream &OS)
: AMDGPUTargetStreamer(S), OS(OS) { }
+// A hook for emitting stuff at the end.
+// We use it for emitting the accumulated PAL metadata as directives.
+void AMDGPUTargetAsmStreamer::finish() {
+ std::string S;
+ getPALMetadata()->toString(S);
+ OS << S;
+}
+
void AMDGPUTargetAsmStreamer::EmitDirectiveAMDGCNTarget(StringRef Target) {
OS << "\t.amdgcn_target \"" << Target << "\"\n";
}
@@ -196,6 +208,12 @@ void AMDGPUTargetAsmStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
}
}
+void AMDGPUTargetAsmStreamer::emitAMDGPULDS(MCSymbol *Symbol, unsigned Size,
+ unsigned Align) {
+ OS << "\t.amdgpu_lds " << Symbol->getName() << ", " << Size << ", " << Align
+ << '\n';
+}
+
bool AMDGPUTargetAsmStreamer::EmitISAVersion(StringRef IsaVersionString) {
OS << "\t.amd_amdgpu_isa \"" << IsaVersionString << "\"\n";
return true;
@@ -214,15 +232,14 @@ bool AMDGPUTargetAsmStreamer::EmitHSAMetadata(
}
bool AMDGPUTargetAsmStreamer::EmitHSAMetadata(
- std::shared_ptr<msgpack::Node> &HSAMetadataRoot, bool Strict) {
+ msgpack::Document &HSAMetadataDoc, bool Strict) {
V3::MetadataVerifier Verifier(Strict);
- if (!Verifier.verify(*HSAMetadataRoot))
+ if (!Verifier.verify(HSAMetadataDoc.getRoot()))
return false;
std::string HSAMetadataString;
raw_string_ostream StrOS(HSAMetadataString);
- yaml::Output YOut(StrOS);
- YOut << HSAMetadataRoot;
+ HSAMetadataDoc.toYAML(StrOS);
OS << '\t' << V3::AssemblerDirectiveBegin << '\n';
OS << StrOS.str() << '\n';
@@ -230,13 +247,10 @@ bool AMDGPUTargetAsmStreamer::EmitHSAMetadata(
return true;
}
-bool AMDGPUTargetAsmStreamer::EmitPALMetadata(
- const PALMD::Metadata &PALMetadata) {
- std::string PALMetadataString;
- if (PALMD::toString(PALMetadata, PALMetadataString))
- return false;
-
- OS << '\t' << PALMD::AssemblerDirective << PALMetadataString << '\n';
+bool AMDGPUTargetAsmStreamer::EmitCodeEnd() {
+ const uint32_t Encoded_s_code_end = 0xbf9f0000;
+ OS << "\t.p2alignl 6, " << Encoded_s_code_end << '\n';
+ OS << "\t.fill 32, 4, " << Encoded_s_code_end << '\n';
return true;
}
@@ -278,6 +292,10 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
PRINT_FIELD(OS, ".amdhsa_user_sgpr_private_segment_size", KD,
kernel_code_properties,
amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
+ if (IVersion.Major >= 10)
+ PRINT_FIELD(OS, ".amdhsa_wavefront_size32", KD,
+ kernel_code_properties,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
PRINT_FIELD(
OS, ".amdhsa_system_sgpr_private_segment_wavefront_offset", KD,
compute_pgm_rsrc2,
@@ -331,6 +349,17 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
PRINT_FIELD(OS, ".amdhsa_fp16_overflow", KD,
compute_pgm_rsrc1,
amdhsa::COMPUTE_PGM_RSRC1_FP16_OVFL);
+ if (IVersion.Major >= 10) {
+ PRINT_FIELD(OS, ".amdhsa_workgroup_processor_mode", KD,
+ compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_WGP_MODE);
+ PRINT_FIELD(OS, ".amdhsa_memory_ordered", KD,
+ compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_MEM_ORDERED);
+ PRINT_FIELD(OS, ".amdhsa_forward_progress", KD,
+ compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_FWD_PROGRESS);
+ }
PRINT_FIELD(
OS, ".amdhsa_exception_fp_ieee_invalid_op", KD,
compute_pgm_rsrc2,
@@ -387,6 +416,19 @@ MCELFStreamer &AMDGPUTargetELFStreamer::getStreamer() {
return static_cast<MCELFStreamer &>(Streamer);
}
+// A hook for emitting stuff at the end.
+// We use it for emitting the accumulated PAL metadata as a .note record.
+void AMDGPUTargetELFStreamer::finish() {
+ std::string Blob;
+ const char *Vendor = getPALMetadata()->getVendor();
+ unsigned Type = getPALMetadata()->getType();
+ getPALMetadata()->toBlob(Type, Blob);
+ if (Blob.empty())
+ return;
+ EmitNote(Vendor, MCConstantExpr::create(Blob.size(), getContext()), Type,
+ [&](MCELFStreamer &OS) { OS.EmitBytes(Blob); });
+}
+
void AMDGPUTargetELFStreamer::EmitNote(
StringRef Name, const MCExpr *DescSZ, unsigned NoteType,
function_ref<void(MCELFStreamer &)> EmitDesc) {
@@ -463,6 +505,27 @@ void AMDGPUTargetELFStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
Symbol->setType(Type);
}
+void AMDGPUTargetELFStreamer::emitAMDGPULDS(MCSymbol *Symbol, unsigned Size,
+ unsigned Align) {
+ assert(isPowerOf2_32(Align));
+
+ MCSymbolELF *SymbolELF = cast<MCSymbolELF>(Symbol);
+ SymbolELF->setType(ELF::STT_OBJECT);
+
+ if (!SymbolELF->isBindingSet()) {
+ SymbolELF->setBinding(ELF::STB_GLOBAL);
+ SymbolELF->setExternal(true);
+ }
+
+ if (SymbolELF->declareCommon(Size, Align, true)) {
+ report_fatal_error("Symbol: " + Symbol->getName() +
+ " redeclared as different type");
+ }
+
+ SymbolELF->setIndex(ELF::SHN_AMDGPU_LDS);
+ SymbolELF->setSize(MCConstantExpr::create(Size, getContext()));
+}
+
bool AMDGPUTargetELFStreamer::EmitISAVersion(StringRef IsaVersionString) {
// Create two labels to mark the beginning and end of the desc field
// and a MCExpr to calculate the size of the desc field.
@@ -482,16 +545,14 @@ bool AMDGPUTargetELFStreamer::EmitISAVersion(StringRef IsaVersionString) {
return true;
}
-bool AMDGPUTargetELFStreamer::EmitHSAMetadata(
- std::shared_ptr<msgpack::Node> &HSAMetadataRoot, bool Strict) {
+bool AMDGPUTargetELFStreamer::EmitHSAMetadata(msgpack::Document &HSAMetadataDoc,
+ bool Strict) {
V3::MetadataVerifier Verifier(Strict);
- if (!Verifier.verify(*HSAMetadataRoot))
+ if (!Verifier.verify(HSAMetadataDoc.getRoot()))
return false;
std::string HSAMetadataString;
- raw_string_ostream StrOS(HSAMetadataString);
- msgpack::Writer MPWriter(StrOS);
- HSAMetadataRoot->write(MPWriter);
+ HSAMetadataDoc.writeToBlob(HSAMetadataString);
// Create two labels to mark the beginning and end of the desc field
// and a MCExpr to calculate the size of the desc field.
@@ -505,7 +566,7 @@ bool AMDGPUTargetELFStreamer::EmitHSAMetadata(
EmitNote(ElfNote::NoteNameV3, DescSZ, ELF::NT_AMDGPU_METADATA,
[&](MCELFStreamer &OS) {
OS.EmitLabel(DescBegin);
- OS.EmitBytes(StrOS.str());
+ OS.EmitBytes(HSAMetadataString);
OS.EmitLabel(DescEnd);
});
return true;
@@ -535,15 +596,15 @@ bool AMDGPUTargetELFStreamer::EmitHSAMetadata(
return true;
}
-bool AMDGPUTargetELFStreamer::EmitPALMetadata(
- const PALMD::Metadata &PALMetadata) {
- EmitNote(ElfNote::NoteNameV2,
- MCConstantExpr::create(PALMetadata.size() * sizeof(uint32_t),
- getContext()),
- ELF::NT_AMD_AMDGPU_PAL_METADATA, [&](MCELFStreamer &OS) {
- for (auto I : PALMetadata)
- OS.EmitIntValue(I, sizeof(uint32_t));
- });
+bool AMDGPUTargetELFStreamer::EmitCodeEnd() {
+ const uint32_t Encoded_s_code_end = 0xbf9f0000;
+
+ MCStreamer &OS = getStreamer();
+ OS.PushSection();
+ OS.EmitValueToAlignment(64, Encoded_s_code_end, 4);
+ for (unsigned I = 0; I < 32; ++I)
+ OS.EmitIntValue(Encoded_s_code_end, 4);
+ OS.PopSection();
return true;
}
@@ -555,16 +616,25 @@ void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor(
auto &Streamer = getStreamer();
auto &Context = Streamer.getContext();
+ MCSymbolELF *KernelCodeSymbol = cast<MCSymbolELF>(
+ Context.getOrCreateSymbol(Twine(KernelName)));
MCSymbolELF *KernelDescriptorSymbol = cast<MCSymbolELF>(
Context.getOrCreateSymbol(Twine(KernelName) + Twine(".kd")));
- KernelDescriptorSymbol->setBinding(ELF::STB_GLOBAL);
+
+ // Copy kernel descriptor symbol's binding, other and visibility from the
+ // kernel code symbol.
+ KernelDescriptorSymbol->setBinding(KernelCodeSymbol->getBinding());
+ KernelDescriptorSymbol->setOther(KernelCodeSymbol->getOther());
+ KernelDescriptorSymbol->setVisibility(KernelCodeSymbol->getVisibility());
+ // Kernel descriptor symbol's type and size are fixed.
KernelDescriptorSymbol->setType(ELF::STT_OBJECT);
KernelDescriptorSymbol->setSize(
MCConstantExpr::create(sizeof(KernelDescriptor), Context));
- MCSymbolELF *KernelCodeSymbol = cast<MCSymbolELF>(
- Context.getOrCreateSymbol(Twine(KernelName)));
- KernelCodeSymbol->setBinding(ELF::STB_LOCAL);
+ // The visibility of the kernel code symbol must be protected or less to allow
+ // static relocations from the kernel descriptor to be used.
+ if (KernelCodeSymbol->getVisibility() == ELF::STV_DEFAULT)
+ KernelCodeSymbol->setVisibility(ELF::STV_PROTECTED);
Streamer.EmitLabel(KernelDescriptorSymbol);
Streamer.EmitBytes(StringRef(
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
index 9a807c804f9f..683b3e363b9a 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
@@ -1,9 +1,8 @@
//===-- AMDGPUTargetStreamer.h - AMDGPU Target Streamer --------*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -11,7 +10,8 @@
#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUTARGETSTREAMER_H
#include "AMDKernelCodeT.h"
-#include "llvm/BinaryFormat/MsgPackTypes.h"
+#include "Utils/AMDGPUPALMetadata.h"
+#include "llvm/BinaryFormat/MsgPackDocument.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/AMDGPUMetadata.h"
@@ -29,12 +29,16 @@ class Module;
class Type;
class AMDGPUTargetStreamer : public MCTargetStreamer {
+ AMDGPUPALMetadata PALMetadata;
+
protected:
MCContext &getContext() const { return Streamer.getContext(); }
public:
AMDGPUTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {}
+ AMDGPUPALMetadata *getPALMetadata() { return &PALMetadata; }
+
virtual void EmitDirectiveAMDGCNTarget(StringRef Target) = 0;
virtual void EmitDirectiveHSACodeObjectVersion(uint32_t Major,
@@ -49,6 +53,9 @@ public:
virtual void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) = 0;
+ virtual void emitAMDGPULDS(MCSymbol *Symbol, unsigned Size,
+ unsigned Align) = 0;
+
/// \returns True on success, false on failure.
virtual bool EmitISAVersion(StringRef IsaVersionString) = 0;
@@ -65,14 +72,13 @@ public:
/// the \p HSAMetadata structure is updated with the correct types.
///
/// \returns True on success, false on failure.
- virtual bool EmitHSAMetadata(std::shared_ptr<msgpack::Node> &HSAMetadata,
- bool Strict) = 0;
+ virtual bool EmitHSAMetadata(msgpack::Document &HSAMetadata, bool Strict) = 0;
/// \returns True on success, false on failure.
virtual bool EmitHSAMetadata(const AMDGPU::HSAMD::Metadata &HSAMetadata) = 0;
/// \returns True on success, false on failure.
- virtual bool EmitPALMetadata(const AMDGPU::PALMD::Metadata &PALMetadata) = 0;
+ virtual bool EmitCodeEnd() = 0;
virtual void EmitAmdhsaKernelDescriptor(
const MCSubtargetInfo &STI, StringRef KernelName,
@@ -89,6 +95,8 @@ class AMDGPUTargetAsmStreamer final : public AMDGPUTargetStreamer {
public:
AMDGPUTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS);
+ void finish() override;
+
void EmitDirectiveAMDGCNTarget(StringRef Target) override;
void EmitDirectiveHSACodeObjectVersion(uint32_t Major,
@@ -102,18 +110,19 @@ public:
void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override;
+ void emitAMDGPULDS(MCSymbol *Sym, unsigned Size, unsigned Align) override;
+
/// \returns True on success, false on failure.
bool EmitISAVersion(StringRef IsaVersionString) override;
/// \returns True on success, false on failure.
- bool EmitHSAMetadata(std::shared_ptr<msgpack::Node> &HSAMetadata,
- bool Strict) override;
+ bool EmitHSAMetadata(msgpack::Document &HSAMetadata, bool Strict) override;
/// \returns True on success, false on failure.
bool EmitHSAMetadata(const AMDGPU::HSAMD::Metadata &HSAMetadata) override;
/// \returns True on success, false on failure.
- bool EmitPALMetadata(const AMDGPU::PALMD::Metadata &PALMetadata) override;
+ bool EmitCodeEnd() override;
void EmitAmdhsaKernelDescriptor(
const MCSubtargetInfo &STI, StringRef KernelName,
@@ -133,6 +142,8 @@ public:
MCELFStreamer &getStreamer();
+ void finish() override;
+
void EmitDirectiveAMDGCNTarget(StringRef Target) override;
void EmitDirectiveHSACodeObjectVersion(uint32_t Major,
@@ -146,18 +157,19 @@ public:
void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override;
+ void emitAMDGPULDS(MCSymbol *Sym, unsigned Size, unsigned Align) override;
+
/// \returns True on success, false on failure.
bool EmitISAVersion(StringRef IsaVersionString) override;
/// \returns True on success, false on failure.
- bool EmitHSAMetadata(std::shared_ptr<msgpack::Node> &HSAMetadata,
- bool Strict) override;
+ bool EmitHSAMetadata(msgpack::Document &HSAMetadata, bool Strict) override;
/// \returns True on success, false on failure.
bool EmitHSAMetadata(const AMDGPU::HSAMD::Metadata &HSAMetadata) override;
/// \returns True on success, false on failure.
- bool EmitPALMetadata(const AMDGPU::PALMD::Metadata &PALMetadata) override;
+ bool EmitCodeEnd() override;
void EmitAmdhsaKernelDescriptor(
const MCSubtargetInfo &STI, StringRef KernelName,
diff --git a/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
index 28d4bc1829e2..2f1f4e7a0392 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -1,9 +1,8 @@
//===- R600MCCodeEmitter.cpp - Code Emitter for R600->Cayman GPU families -===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -65,9 +64,10 @@ private:
uint64_t getBinaryCodeForInstr(const MCInst &MI,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
- uint64_t computeAvailableFeatures(const FeatureBitset &FB) const;
- void verifyInstructionPredicates(const MCInst &MI,
- uint64_t AvailableFeatures) const;
+ FeatureBitset computeAvailableFeatures(const FeatureBitset &FB) const;
+ void
+ verifyInstructionPredicates(const MCInst &MI,
+ const FeatureBitset &AvailableFeatures) const;
};
diff --git a/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.cpp b/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.cpp
index 1c99a708e5ac..a4809af29daa 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.cpp
@@ -1,9 +1,8 @@
//===-- R600MCTargetDesc.cpp - R600 Target Descriptions -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp b/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
index 36913bd04274..f8ec3c36f019 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
@@ -1,9 +1,8 @@
//===-- SIMCCodeEmitter.cpp - SI Code Emitter -----------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -14,9 +13,11 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
+#include "AMDGPURegisterInfo.h"
#include "MCTargetDesc/AMDGPUFixupKinds.h"
#include "MCTargetDesc/AMDGPUMCCodeEmitter.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIDefines.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
@@ -77,6 +78,10 @@ public:
unsigned getSDWAVopcDstEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const override;
+
+ unsigned getAVOperandEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const override;
};
} // end anonymous namespace
@@ -233,6 +238,8 @@ uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO,
case AMDGPU::OPERAND_REG_IMM_FP32:
case AMDGPU::OPERAND_REG_INLINE_C_INT32:
case AMDGPU::OPERAND_REG_INLINE_C_FP32:
+ case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
+ case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
return getLit32Encoding(static_cast<uint32_t>(Imm), STI);
case AMDGPU::OPERAND_REG_IMM_INT64:
@@ -245,12 +252,21 @@ uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO,
case AMDGPU::OPERAND_REG_IMM_FP16:
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
+ case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
+ case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
// FIXME Is this correct? What do inline immediates do on SI for f16 src
// which does not have f16 support?
return getLit16Encoding(static_cast<uint16_t>(Imm), STI);
+ case AMDGPU::OPERAND_REG_IMM_V2INT16:
+ case AMDGPU::OPERAND_REG_IMM_V2FP16:
+ if (!isUInt<16>(Imm) && STI.getFeatureBits()[AMDGPU::FeatureVOP3Literal])
+ return getLit32Encoding(static_cast<uint32_t>(Imm), STI);
+ LLVM_FALLTHROUGH;
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
- case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
+ case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
+ case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
+ case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
uint16_t Lo16 = static_cast<uint16_t>(Imm);
uint32_t Encoding = getLit16Encoding(Lo16, STI);
return Encoding;
@@ -274,7 +290,25 @@ void SIMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
OS.write((uint8_t) ((Encoding >> (8 * i)) & 0xff));
}
- if (bytes > 4)
+ // NSA encoding.
+ if (AMDGPU::isGFX10(STI) && Desc.TSFlags & SIInstrFlags::MIMG) {
+ int vaddr0 = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
+ AMDGPU::OpName::vaddr0);
+ int srsrc = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
+ AMDGPU::OpName::srsrc);
+ assert(vaddr0 >= 0 && srsrc > vaddr0);
+ unsigned NumExtraAddrs = srsrc - vaddr0 - 1;
+ unsigned NumPadding = (-NumExtraAddrs) & 3;
+
+ for (unsigned i = 0; i < NumExtraAddrs; ++i)
+ OS.write((uint8_t)getMachineOpValue(MI, MI.getOperand(vaddr0 + 1 + i),
+ Fixups, STI));
+ for (unsigned i = 0; i < NumPadding; ++i)
+ OS.write(0);
+ }
+
+ if ((bytes > 8 && STI.getFeatureBits()[AMDGPU::FeatureVOP3Literal]) ||
+ (bytes > 4 && !STI.getFeatureBits()[AMDGPU::FeatureVOP3Literal]))
return;
// Check for additional literals in SRC0/1/2 (Op 1/2/3)
@@ -366,7 +400,7 @@ SIMCCodeEmitter::getSDWAVopcDstEncoding(const MCInst &MI, unsigned OpNo,
const MCOperand &MO = MI.getOperand(OpNo);
unsigned Reg = MO.getReg();
- if (Reg != AMDGPU::VCC) {
+ if (Reg != AMDGPU::VCC && Reg != AMDGPU::VCC_LO) {
RegEnc |= MRI.getEncodingValue(Reg);
RegEnc &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
RegEnc |= SDWA9EncValues::VOPC_DST_VCC_MASK;
@@ -374,10 +408,31 @@ SIMCCodeEmitter::getSDWAVopcDstEncoding(const MCInst &MI, unsigned OpNo,
return RegEnc;
}
+unsigned
+SIMCCodeEmitter::getAVOperandEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ unsigned Reg = MI.getOperand(OpNo).getReg();
+ uint64_t Enc = MRI.getEncodingValue(Reg);
+
+ // VGPR and AGPR have the same encoding, but SrcA and SrcB operands of mfma
+ // instructions use acc[0:1] modifier bits to distinguish. These bits are
+ // encoded as a virtual 9th bit of the register for these operands.
+ if (MRI.getRegClass(AMDGPU::AGPR_32RegClassID).contains(Reg) ||
+ MRI.getRegClass(AMDGPU::AReg_64RegClassID).contains(Reg))
+ Enc |= 512;
+
+ return Enc;
+}
+
static bool needsPCRel(const MCExpr *Expr) {
switch (Expr->getKind()) {
- case MCExpr::SymbolRef:
- return true;
+ case MCExpr::SymbolRef: {
+ auto *SE = cast<MCSymbolRefExpr>(Expr);
+ MCSymbolRefExpr::VariantKind Kind = SE->getKind();
+ return Kind != MCSymbolRefExpr::VK_AMDGPU_ABS32_LO &&
+ Kind != MCSymbolRefExpr::VK_AMDGPU_ABS32_HI;
+ }
case MCExpr::Binary: {
auto *BE = cast<MCBinaryExpr>(Expr);
if (BE->getOpcode() == MCBinaryExpr::Sub)
@@ -416,7 +471,13 @@ uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI,
Kind = FK_PCRel_4;
else
Kind = FK_Data_4;
- Fixups.push_back(MCFixup::create(4, MO.getExpr(), Kind, MI.getLoc()));
+
+ const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
+ uint32_t Offset = Desc.getSize();
+ assert(Offset == 4 || Offset == 8);
+
+ Fixups.push_back(
+ MCFixup::create(Offset, MO.getExpr(), Kind, MI.getLoc()));
}
// Figure out the operand number, needed for isSrcOperand check
@@ -429,7 +490,8 @@ uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI,
const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
if (AMDGPU::isSISrcOperand(Desc, OpNo)) {
uint32_t Enc = getLitEncoding(MO, Desc.OpInfo[OpNo], STI);
- if (Enc != ~0U && (Enc != 255 || Desc.getSize() == 4))
+ if (Enc != ~0U &&
+ (Enc != 255 || Desc.getSize() == 4 || Desc.getSize() == 8))
return Enc;
} else if (MO.isImm())
diff --git a/lib/Target/AMDGPU/MIMGInstructions.td b/lib/Target/AMDGPU/MIMGInstructions.td
index 1c68dbd78e75..4735e6cb2446 100644
--- a/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/lib/Target/AMDGPU/MIMGInstructions.td
@@ -1,9 +1,8 @@
//===-- MIMGInstructions.td - MIMG Instruction Defintions -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -12,10 +11,14 @@
//
// - MIMGEncGfx6: encoding introduced with gfx6 (obsoleted for atomics in gfx8)
// - MIMGEncGfx8: encoding introduced with gfx8 for atomics
+// - MIMGEncGfx10Default: gfx default (non-NSA) encoding
+// - MIMGEncGfx10NSA: gfx10 NSA encoding
class MIMGEncoding;
def MIMGEncGfx6 : MIMGEncoding;
def MIMGEncGfx8 : MIMGEncoding;
+def MIMGEncGfx10Default : MIMGEncoding;
+def MIMGEncGfx10NSA : MIMGEncoding;
def MIMGEncoding : GenericEnum {
let FilterClass = "MIMGEncoding";
@@ -60,13 +63,28 @@ def MIMGDim : GenericEnum {
def MIMGDimInfoTable : GenericTable {
let FilterClass = "AMDGPUDimProps";
let CppTypeName = "MIMGDimInfo";
- let Fields = ["Dim", "NumCoords", "NumGradients", "DA"];
+ let Fields = ["Dim", "NumCoords", "NumGradients", "DA", "Encoding", "AsmSuffix"];
GenericEnum TypeOf_Dim = MIMGDim;
let PrimaryKey = ["Dim"];
let PrimaryKeyName = "getMIMGDimInfo";
}
+def getMIMGDimInfoByEncoding : SearchIndex {
+ let Table = MIMGDimInfoTable;
+ let Key = ["Encoding"];
+}
+
+def getMIMGDimInfoByAsmSuffix : SearchIndex {
+ let Table = MIMGDimInfoTable;
+ let Key = ["AsmSuffix"];
+}
+
+class mimg <bits<8> si_gfx10, bits<8> vi = si_gfx10> {
+ field bits<8> SI_GFX10 = si_gfx10;
+ field bits<8> VI = vi;
+}
+
class MIMGLZMapping<MIMGBaseOpcode l, MIMGBaseOpcode lz> {
MIMGBaseOpcode L = l;
MIMGBaseOpcode LZ = lz;
@@ -83,12 +101,23 @@ def MIMGLZMappingTable : GenericTable {
let PrimaryKeyName = "getMIMGLZMappingInfo";
}
-class mimg <bits<7> si, bits<7> vi = si> {
- field bits<7> SI = si;
- field bits<7> VI = vi;
+class MIMGMIPMapping<MIMGBaseOpcode mip, MIMGBaseOpcode nonmip> {
+ MIMGBaseOpcode MIP = mip;
+ MIMGBaseOpcode NONMIP = nonmip;
}
-class MIMG <dag outs, string dns = "">
+def MIMGMIPMappingTable : GenericTable {
+ let FilterClass = "MIMGMIPMapping";
+ let CppTypeName = "MIMGMIPMappingInfo";
+ let Fields = ["MIP", "NONMIP"];
+ GenericEnum TypeOf_MIP = MIMGBaseOpcode;
+ GenericEnum TypeOf_NONMIP = MIMGBaseOpcode;
+
+ let PrimaryKey = ["MIP"];
+ let PrimaryKeyName = "getMIMGMIPMappingInfo";
+}
+
+class MIMG_Base <dag outs, string dns = "">
: InstSI <outs, (ins), "", []> {
let VM_CNT = 1;
@@ -97,20 +126,24 @@ class MIMG <dag outs, string dns = "">
let Uses = [EXEC];
let mayLoad = 1;
let mayStore = 0;
- let hasPostISelHook = 1;
let SchedRW = [WriteVMEM];
let UseNamedOperandTable = 1;
let hasSideEffects = 0; // XXX ????
- let SubtargetPredicate = isGCN;
let DecoderNamespace = dns;
let isAsmParserOnly = !if(!eq(dns,""), 1, 0);
- let AsmMatchConverter = "cvtMIMG";
let usesCustomInserter = 1;
+}
+
+class MIMG <dag outs, string dns = "">
+ : MIMG_Base <outs, dns> {
+
+ let hasPostISelHook = 1;
+ let AsmMatchConverter = "cvtMIMG";
Instruction Opcode = !cast<Instruction>(NAME);
MIMGBaseOpcode BaseOpcode;
- MIMGEncoding MIMGEncoding = MIMGEncGfx6;
+ MIMGEncoding MIMGEncoding;
bits<8> VDataDwords;
bits<8> VAddrDwords;
}
@@ -131,15 +164,66 @@ def getMIMGInfo : SearchIndex {
let Key = ["Opcode"];
}
-class MIMG_NoSampler_Helper <bits<7> op, string asm,
+// This is a separate class so that TableGen memoizes the computations.
+class MIMGNSAHelper<int num_addrs> {
+ list<string> AddrAsmNames =
+ !foldl([]<string>, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], lhs, i,
+ !if(!lt(i, num_addrs), !listconcat(lhs, ["vaddr"#!size(lhs)]), lhs));
+ dag AddrIns = !dag(ins, !foreach(arg, AddrAsmNames, VGPR_32), AddrAsmNames);
+ string AddrAsm = "[" # !foldl("$" # !head(AddrAsmNames), !tail(AddrAsmNames), lhs, rhs,
+ lhs # ", $" # rhs) # "]";
+
+ int NSA = !if(!le(num_addrs, 1), ?,
+ !if(!le(num_addrs, 5), 1,
+ !if(!le(num_addrs, 9), 2,
+ !if(!le(num_addrs, 13), 3, ?))));
+}
+
+// Base class of all pre-gfx10 MIMG instructions.
+class MIMG_gfx6789<bits<8> op, dag outs, string dns = "">
+ : MIMG<outs, dns>, MIMGe_gfx6789<op> {
+ let SubtargetPredicate = isGFX6GFX7GFX8GFX9;
+ let AssemblerPredicates = [isGFX6GFX7GFX8GFX9];
+
+ let MIMGEncoding = MIMGEncGfx6;
+
+ let d16 = !if(BaseOpcode.HasD16, ?, 0);
+}
+
+// Base class of all non-NSA gfx10 MIMG instructions.
+class MIMG_gfx10<int op, dag outs, string dns = "">
+ : MIMG<outs, dns>, MIMGe_gfx10<op> {
+ let SubtargetPredicate = isGFX10Plus;
+ let AssemblerPredicates = [isGFX10Plus];
+
+ let MIMGEncoding = MIMGEncGfx10Default;
+
+ let d16 = !if(BaseOpcode.HasD16, ?, 0);
+ let nsa = 0;
+}
+
+// Base class for all NSA MIMG instructions. Note that 1-dword addresses always
+// use non-NSA variants.
+class MIMG_nsa_gfx10<int op, dag outs, int num_addrs, string dns="">
+ : MIMG<outs, dns>, MIMGe_gfx10<op> {
+ let SubtargetPredicate = isGFX10Plus;
+ let AssemblerPredicates = [isGFX10Plus];
+
+ let MIMGEncoding = MIMGEncGfx10NSA;
+
+ MIMGNSAHelper nsah = MIMGNSAHelper<num_addrs>;
+ dag AddrIns = nsah.AddrIns;
+ string AddrAsm = nsah.AddrAsm;
+
+ let d16 = !if(BaseOpcode.HasD16, ?, 0);
+ let nsa = nsah.NSA;
+}
+
+class MIMG_NoSampler_Helper <bits<8> op, string asm,
RegisterClass dst_rc,
RegisterClass addr_rc,
string dns="">
- : MIMG <(outs dst_rc:$vdata), dns>,
- MIMGe<op> {
- let ssamp = 0;
- let d16 = !if(BaseOpcode.HasD16, ?, 0);
-
+ : MIMG_gfx6789 <op, (outs dst_rc:$vdata), dns> {
let InOperandList = !con((ins addr_rc:$vaddr, SReg_256:$srsrc,
DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
@@ -148,23 +232,66 @@ class MIMG_NoSampler_Helper <bits<7> op, string asm,
#!if(BaseOpcode.HasD16, "$d16", "");
}
-multiclass MIMG_NoSampler_Src_Helper <bits<7> op, string asm,
+class MIMG_NoSampler_gfx10<int op, string opcode,
+ RegisterClass DataRC, RegisterClass AddrRC,
+ string dns="">
+ : MIMG_gfx10<op, (outs DataRC:$vdata), dns> {
+ let InOperandList = !con((ins AddrRC:$vaddr0, SReg_256:$srsrc, DMask:$dmask,
+ Dim:$dim, UNorm:$unorm, DLC:$dlc, GLC:$glc,
+ SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe),
+ !if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
+ let AsmString = opcode#" $vdata, $vaddr0, $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$tfe$lwe"
+ #!if(BaseOpcode.HasD16, "$d16", "");
+}
+
+class MIMG_NoSampler_nsa_gfx10<int op, string opcode,
+ RegisterClass DataRC, int num_addrs,
+ string dns="">
+ : MIMG_nsa_gfx10<op, (outs DataRC:$vdata), num_addrs, dns> {
+ let InOperandList = !con(AddrIns,
+ (ins SReg_256:$srsrc, DMask:$dmask,
+ Dim:$dim, UNorm:$unorm, DLC:$dlc, GLC:$glc,
+ SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe),
+ !if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
+ let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$tfe$lwe"
+ #!if(BaseOpcode.HasD16, "$d16", "");
+}
+
+multiclass MIMG_NoSampler_Src_Helper <bits<8> op, string asm,
RegisterClass dst_rc,
bit enableDisasm> {
- let VAddrDwords = 1 in
- def NAME # _V1 : MIMG_NoSampler_Helper <op, asm, dst_rc, VGPR_32,
- !if(enableDisasm, "AMDGPU", "")>;
- let VAddrDwords = 2 in
- def NAME # _V2 : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_64>;
- let VAddrDwords = 3 in
- def NAME # _V3 : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_96>;
- let VAddrDwords = 4 in
- def NAME # _V4 : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_128>;
-}
-
-multiclass MIMG_NoSampler <bits<7> op, string asm, bit has_d16, bit mip = 0,
+ let ssamp = 0 in {
+ let VAddrDwords = 1 in {
+ def _V1 : MIMG_NoSampler_Helper <op, asm, dst_rc, VGPR_32,
+ !if(enableDisasm, "AMDGPU", "")>;
+ def _V1_gfx10 : MIMG_NoSampler_gfx10<op, asm, dst_rc, VGPR_32,
+ !if(enableDisasm, "AMDGPU", "")>;
+ }
+
+ let VAddrDwords = 2 in {
+ def _V2 : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_64>;
+ def _V2_gfx10 : MIMG_NoSampler_gfx10<op, asm, dst_rc, VReg_64>;
+ def _V2_nsa_gfx10 : MIMG_NoSampler_nsa_gfx10<op, asm, dst_rc, 2>;
+ }
+
+ let VAddrDwords = 3 in {
+ def _V3 : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_96>;
+ def _V3_gfx10 : MIMG_NoSampler_gfx10<op, asm, dst_rc, VReg_96>;
+ def _V3_nsa_gfx10 : MIMG_NoSampler_nsa_gfx10<op, asm, dst_rc, 3>;
+ }
+
+ let VAddrDwords = 4 in {
+ def _V4 : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_128>;
+ def _V4_gfx10 : MIMG_NoSampler_gfx10<op, asm, dst_rc, VReg_128>;
+ def _V4_nsa_gfx10 : MIMG_NoSampler_nsa_gfx10<op, asm, dst_rc, 4,
+ !if(enableDisasm, "AMDGPU", "")>;
+ }
+ }
+}
+
+multiclass MIMG_NoSampler <bits<8> op, string asm, bit has_d16, bit mip = 0,
bit isResInfo = 0> {
- def "" : MIMGBaseOpcode {
+ def "" : MIMGBaseOpcode, PredicateControl {
let Coordinates = !if(isResInfo, 0, 1);
let LodOrClampOrMip = mip;
let HasD16 = has_d16;
@@ -180,26 +307,16 @@ multiclass MIMG_NoSampler <bits<7> op, string asm, bit has_d16, bit mip = 0,
defm _V3 : MIMG_NoSampler_Src_Helper <op, asm, VReg_96, 0>;
let VDataDwords = 4 in
defm _V4 : MIMG_NoSampler_Src_Helper <op, asm, VReg_128, 0>;
- let VDataDwords = 8 in
- defm _V8 : MIMG_NoSampler_Src_Helper <op, asm, VReg_256, 0>;
+ let VDataDwords = 5 in
+ defm _V5 : MIMG_NoSampler_Src_Helper <op, asm, VReg_160, 0>;
}
}
-class MIMG_Store_Helper <bits<7> op, string asm,
+class MIMG_Store_Helper <bits<8> op, string asm,
RegisterClass data_rc,
RegisterClass addr_rc,
string dns = "">
- : MIMG <(outs), dns>,
- MIMGe<op> {
- let ssamp = 0;
- let d16 = !if(BaseOpcode.HasD16, ?, 0);
-
- let mayLoad = 0;
- let mayStore = 1;
- let hasSideEffects = 0;
- let hasPostISelHook = 0;
- let DisableWQM = 1;
-
+ : MIMG_gfx6789<op, (outs), dns> {
let InOperandList = !con((ins data_rc:$vdata, addr_rc:$vaddr, SReg_256:$srsrc,
DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
@@ -208,21 +325,63 @@ class MIMG_Store_Helper <bits<7> op, string asm,
#!if(BaseOpcode.HasD16, "$d16", "");
}
-multiclass MIMG_Store_Addr_Helper <bits<7> op, string asm,
+class MIMG_Store_gfx10<int op, string opcode,
+ RegisterClass DataRC, RegisterClass AddrRC,
+ string dns="">
+ : MIMG_gfx10<op, (outs), dns> {
+ let InOperandList = !con((ins DataRC:$vdata, AddrRC:$vaddr0, SReg_256:$srsrc,
+ DMask:$dmask, Dim:$dim, UNorm:$unorm, DLC:$dlc,
+ GLC:$glc, SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe),
+ !if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
+ let AsmString = opcode#" $vdata, $vaddr0, $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$tfe$lwe"
+ #!if(BaseOpcode.HasD16, "$d16", "");
+}
+
+class MIMG_Store_nsa_gfx10<int op, string opcode,
+ RegisterClass DataRC, int num_addrs,
+ string dns="">
+ : MIMG_nsa_gfx10<op, (outs), num_addrs, dns> {
+ let InOperandList = !con((ins DataRC:$vdata),
+ AddrIns,
+ (ins SReg_256:$srsrc, DMask:$dmask,
+ Dim:$dim, UNorm:$unorm, DLC:$dlc, GLC:$glc,
+ SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe),
+ !if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
+ let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$tfe$lwe"
+ #!if(BaseOpcode.HasD16, "$d16", "");
+}
+
+multiclass MIMG_Store_Addr_Helper <int op, string asm,
RegisterClass data_rc,
bit enableDisasm> {
- let VAddrDwords = 1 in
- def NAME # _V1 : MIMG_Store_Helper <op, asm, data_rc, VGPR_32,
- !if(enableDisasm, "AMDGPU", "")>;
- let VAddrDwords = 2 in
- def NAME # _V2 : MIMG_Store_Helper <op, asm, data_rc, VReg_64>;
- let VAddrDwords = 3 in
- def NAME # _V3 : MIMG_Store_Helper <op, asm, data_rc, VReg_96>;
- let VAddrDwords = 4 in
- def NAME # _V4 : MIMG_Store_Helper <op, asm, data_rc, VReg_128>;
-}
-
-multiclass MIMG_Store <bits<7> op, string asm, bit has_d16, bit mip = 0> {
+ let mayLoad = 0, mayStore = 1, hasSideEffects = 0, hasPostISelHook = 0,
+ DisableWQM = 1, ssamp = 0 in {
+ let VAddrDwords = 1 in {
+ def _V1 : MIMG_Store_Helper <op, asm, data_rc, VGPR_32,
+ !if(enableDisasm, "AMDGPU", "")>;
+ def _V1_gfx10 : MIMG_Store_gfx10 <op, asm, data_rc, VGPR_32,
+ !if(enableDisasm, "AMDGPU", "")>;
+ }
+ let VAddrDwords = 2 in {
+ def _V2 : MIMG_Store_Helper <op, asm, data_rc, VReg_64>;
+ def _V2_gfx10 : MIMG_Store_gfx10 <op, asm, data_rc, VReg_64>;
+ def _V2_nsa_gfx10 : MIMG_Store_nsa_gfx10 <op, asm, data_rc, 2>;
+ }
+ let VAddrDwords = 3 in {
+ def _V3 : MIMG_Store_Helper <op, asm, data_rc, VReg_96>;
+ def _V3_gfx10 : MIMG_Store_gfx10 <op, asm, data_rc, VReg_96>;
+ def _V3_nsa_gfx10 : MIMG_Store_nsa_gfx10 <op, asm, data_rc, 3>;
+ }
+ let VAddrDwords = 4 in {
+ def _V4 : MIMG_Store_Helper <op, asm, data_rc, VReg_128>;
+ def _V4_gfx10 : MIMG_Store_gfx10 <op, asm, data_rc, VReg_128>;
+ def _V4_nsa_gfx10 : MIMG_Store_nsa_gfx10 <op, asm, data_rc, 4,
+ !if(enableDisasm, "AMDGPU", "")>;
+ }
+ }
+}
+
+multiclass MIMG_Store <bits<8> op, string asm, bit has_d16, bit mip = 0> {
def "" : MIMGBaseOpcode {
let Store = 1;
let LodOrClampOrMip = mip;
@@ -241,15 +400,9 @@ multiclass MIMG_Store <bits<7> op, string asm, bit has_d16, bit mip = 0> {
}
}
-class MIMG_Atomic_Helper <string asm, RegisterClass data_rc,
- RegisterClass addr_rc, string dns="",
- bit enableDasm = 0>
- : MIMG <(outs data_rc:$vdst), !if(enableDasm, dns, "")> {
- let mayLoad = 1;
- let mayStore = 1;
- let hasSideEffects = 1; // FIXME: Remove this
- let hasPostISelHook = 0;
- let DisableWQM = 1;
+class MIMG_Atomic_gfx6789_base <bits<8> op, string asm, RegisterClass data_rc,
+ RegisterClass addr_rc, string dns="">
+ : MIMG_gfx6789 <op, (outs data_rc:$vdst), dns> {
let Constraints = "$vdst = $vdata";
let AsmMatchConverter = "cvtMIMGAtomic";
@@ -259,39 +412,80 @@ class MIMG_Atomic_Helper <string asm, RegisterClass data_rc,
let AsmString = asm#" $vdst, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da";
}
-multiclass MIMG_Atomic_Helper_m <mimg op, string asm, RegisterClass data_rc,
- RegisterClass addr_rc, bit enableDasm = 0> {
- let ssamp = 0, d16 = 0 in {
- def _si : MIMG_Atomic_Helper<asm, data_rc, addr_rc, "SICI", enableDasm>,
- SIMCInstr<NAME, SIEncodingFamily.SI>,
- MIMGe<op.SI> {
- let AssemblerPredicates = [isSICI];
- let DisableDecoder = DisableSIDecoder;
- }
+class MIMG_Atomic_si<mimg op, string asm, RegisterClass data_rc,
+ RegisterClass addr_rc, bit enableDasm = 0>
+ : MIMG_Atomic_gfx6789_base<op.SI_GFX10, asm, data_rc, addr_rc,
+ !if(enableDasm, "GFX6GFX7", "")> {
+ let AssemblerPredicates = [isGFX6GFX7];
+}
- def _vi : MIMG_Atomic_Helper<asm, data_rc, addr_rc, "VI", enableDasm>,
- SIMCInstr<NAME, SIEncodingFamily.VI>,
- MIMGe<op.VI> {
- let AssemblerPredicates = [isVI];
- let DisableDecoder = DisableVIDecoder;
- let MIMGEncoding = MIMGEncGfx8;
- }
- }
+class MIMG_Atomic_vi<mimg op, string asm, RegisterClass data_rc,
+ RegisterClass addr_rc, bit enableDasm = 0>
+ : MIMG_Atomic_gfx6789_base<op.VI, asm, data_rc, addr_rc, !if(enableDasm, "GFX8", "")> {
+ let AssemblerPredicates = [isGFX8GFX9];
+ let MIMGEncoding = MIMGEncGfx8;
+}
+
+class MIMG_Atomic_gfx10<mimg op, string opcode,
+ RegisterClass DataRC, RegisterClass AddrRC,
+ bit enableDisasm = 0>
+ : MIMG_gfx10<!cast<int>(op.SI_GFX10), (outs DataRC:$vdst),
+ !if(enableDisasm, "AMDGPU", "")> {
+ let Constraints = "$vdst = $vdata";
+ let AsmMatchConverter = "cvtMIMGAtomic";
+
+ let InOperandList = (ins DataRC:$vdata, AddrRC:$vaddr0, SReg_256:$srsrc,
+ DMask:$dmask, Dim:$dim, UNorm:$unorm, DLC:$dlc,
+ GLC:$glc, SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe);
+ let AsmString = opcode#" $vdst, $vaddr0, $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$tfe$lwe";
+}
+
+class MIMG_Atomic_nsa_gfx10<mimg op, string opcode,
+ RegisterClass DataRC, int num_addrs,
+ bit enableDisasm = 0>
+ : MIMG_nsa_gfx10<!cast<int>(op.SI_GFX10), (outs DataRC:$vdst), num_addrs,
+ !if(enableDisasm, "AMDGPU", "")> {
+ let Constraints = "$vdst = $vdata";
+ let AsmMatchConverter = "cvtMIMGAtomic";
+
+ let InOperandList = !con((ins DataRC:$vdata),
+ AddrIns,
+ (ins SReg_256:$srsrc, DMask:$dmask,
+ Dim:$dim, UNorm:$unorm, DLC:$dlc, GLC:$glc,
+ SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe));
+ let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$tfe$lwe";
}
multiclass MIMG_Atomic_Addr_Helper_m <mimg op, string asm,
RegisterClass data_rc,
bit enableDasm = 0> {
- // _V* variants have different address size, but the size is not encoded.
- // So only one variant can be disassembled. V1 looks the safest to decode.
- let VAddrDwords = 1 in
- defm _V1 : MIMG_Atomic_Helper_m <op, asm, data_rc, VGPR_32, enableDasm>;
- let VAddrDwords = 2 in
- defm _V2 : MIMG_Atomic_Helper_m <op, asm, data_rc, VReg_64>;
- let VAddrDwords = 3 in
- defm _V3 : MIMG_Atomic_Helper_m <op, asm, data_rc, VReg_96>;
- let VAddrDwords = 4 in
- defm _V4 : MIMG_Atomic_Helper_m <op, asm, data_rc, VReg_128>;
+ let hasSideEffects = 1, // FIXME: remove this
+ mayLoad = 1, mayStore = 1, hasPostISelHook = 0, DisableWQM = 1,
+ ssamp = 0 in {
+ let VAddrDwords = 1 in {
+ def _V1_si : MIMG_Atomic_si <op, asm, data_rc, VGPR_32, enableDasm>;
+ def _V1_vi : MIMG_Atomic_vi <op, asm, data_rc, VGPR_32, enableDasm>;
+ def _V1_gfx10 : MIMG_Atomic_gfx10 <op, asm, data_rc, VGPR_32, enableDasm>;
+ }
+ let VAddrDwords = 2 in {
+ def _V2_si : MIMG_Atomic_si <op, asm, data_rc, VReg_64, 0>;
+ def _V2_vi : MIMG_Atomic_vi <op, asm, data_rc, VReg_64, 0>;
+ def _V2_gfx10 : MIMG_Atomic_gfx10 <op, asm, data_rc, VReg_64, 0>;
+ def _V2_nsa_gfx10 : MIMG_Atomic_nsa_gfx10 <op, asm, data_rc, 2, 0>;
+ }
+ let VAddrDwords = 3 in {
+ def _V3_si : MIMG_Atomic_si <op, asm, data_rc, VReg_96, 0>;
+ def _V3_vi : MIMG_Atomic_vi <op, asm, data_rc, VReg_96, 0>;
+ def _V3_gfx10 : MIMG_Atomic_gfx10 <op, asm, data_rc, VReg_96, 0>;
+ def _V3_nsa_gfx10 : MIMG_Atomic_nsa_gfx10 <op, asm, data_rc, 3, 0>;
+ }
+ let VAddrDwords = 4 in {
+ def _V4_si : MIMG_Atomic_si <op, asm, data_rc, VReg_128, 0>;
+ def _V4_vi : MIMG_Atomic_vi <op, asm, data_rc, VReg_128, 0>;
+ def _V4_gfx10 : MIMG_Atomic_gfx10 <op, asm, data_rc, VReg_128, 0>;
+ def _V4_nsa_gfx10 : MIMG_Atomic_nsa_gfx10 <op, asm, data_rc, 4, enableDasm>;
+ }
+ }
}
multiclass MIMG_Atomic <mimg op, string asm, bit isCmpSwap = 0> { // 64-bit atomics
@@ -311,12 +505,9 @@ multiclass MIMG_Atomic <mimg op, string asm, bit isCmpSwap = 0> { // 64-bit atom
}
}
-class MIMG_Sampler_Helper <bits<7> op, string asm, RegisterClass dst_rc,
+class MIMG_Sampler_Helper <bits<8> op, string asm, RegisterClass dst_rc,
RegisterClass src_rc, string dns="">
- : MIMG <(outs dst_rc:$vdata), dns>,
- MIMGe<op> {
- let d16 = !if(BaseOpcode.HasD16, ?, 0);
-
+ : MIMG_gfx6789 <op, (outs dst_rc:$vdata), dns> {
let InOperandList = !con((ins src_rc:$vaddr, SReg_256:$srsrc, SReg_128:$ssamp,
DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
@@ -325,6 +516,33 @@ class MIMG_Sampler_Helper <bits<7> op, string asm, RegisterClass dst_rc,
#!if(BaseOpcode.HasD16, "$d16", "");
}
+class MIMG_Sampler_gfx10<int op, string opcode,
+ RegisterClass DataRC, RegisterClass AddrRC,
+ string dns="">
+ : MIMG_gfx10<op, (outs DataRC:$vdata), dns> {
+ let InOperandList = !con((ins AddrRC:$vaddr0, SReg_256:$srsrc, SReg_128:$ssamp,
+ DMask:$dmask, Dim:$dim, UNorm:$unorm, DLC:$dlc,
+ GLC:$glc, SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe),
+ !if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
+ let AsmString = opcode#" $vdata, $vaddr0, $srsrc, $ssamp$dmask$dim$unorm"
+ #"$dlc$glc$slc$r128$tfe$lwe"
+ #!if(BaseOpcode.HasD16, "$d16", "");
+}
+
+class MIMG_Sampler_nsa_gfx10<int op, string opcode,
+ RegisterClass DataRC, int num_addrs,
+ string dns="">
+ : MIMG_nsa_gfx10<op, (outs DataRC:$vdata), num_addrs, dns> {
+ let InOperandList = !con(AddrIns,
+ (ins SReg_256:$srsrc, SReg_128:$ssamp, DMask:$dmask,
+ Dim:$dim, UNorm:$unorm, DLC:$dlc, GLC:$glc,
+ SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe),
+ !if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
+ let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc, $ssamp$dmask$dim$unorm"
+ #"$dlc$glc$slc$r128$tfe$lwe"
+ #!if(BaseOpcode.HasD16, "$d16", "");
+}
+
class MIMGAddrSize<int dw, bit enable_disasm> {
int NumWords = dw;
@@ -341,6 +559,11 @@ class MIMGAddrSize<int dw, bit enable_disasm> {
bit Disassemble = enable_disasm;
}
+// Return whether x is in lst.
+class isIntInList<int x, list<int> lst> {
+ bit ret = !foldl(0, lst, lhs, y, !or(lhs, !eq(x, y)));
+}
+
// Return whether a value inside the range [min, max] (endpoints inclusive)
// is in the given list.
class isRangeInList<int min, int max, list<int> lst> {
@@ -376,16 +599,41 @@ class MIMG_Sampler_AddrSizes<AMDGPUSampleVariant sample> {
!listconcat(lhs.List, [MIMGAddrSize<dw, !empty(lhs.List)>]),
!if(!eq(dw, 3), 3, !add(dw, 1))>, // we still need _V4 for codegen w/ 3 dwords
lhs)).List;
-}
-multiclass MIMG_Sampler_Src_Helper <bits<7> op, string asm,
+ // For NSA, generate machine instructions for all possible numbers of words
+ // except 1 (which is already covered by the non-NSA case).
+ // The disassembler defaults to the largest number of arguments among the
+ // variants with the same number of NSA words, and custom code then derives
+ // the exact variant based on the sample variant and the image dimension.
+ list<MIMGAddrSize> NSAInstrs =
+ !foldl([]<MIMGAddrSize>, [[12, 11, 10], [9, 8, 7, 6], [5, 4, 3, 2]], prev, nsa_group,
+ !listconcat(prev,
+ !foldl([]<MIMGAddrSize>, nsa_group, lhs, dw,
+ !if(isIntInList<dw, AllNumAddrWords>.ret,
+ !listconcat(lhs, [MIMGAddrSize<dw, !empty(lhs)>]),
+ lhs))));
+}
+
+multiclass MIMG_Sampler_Src_Helper <bits<8> op, string asm,
AMDGPUSampleVariant sample, RegisterClass dst_rc,
bit enableDisasm = 0> {
foreach addr = MIMG_Sampler_AddrSizes<sample>.MachineInstrs in {
- let VAddrDwords = addr.NumWords in
- def _V # addr.NumWords
- : MIMG_Sampler_Helper <op, asm, dst_rc, addr.RegClass,
- !if(!and(enableDisasm, addr.Disassemble), "AMDGPU", "")>;
+ let VAddrDwords = addr.NumWords in {
+ def _V # addr.NumWords
+ : MIMG_Sampler_Helper <op, asm, dst_rc, addr.RegClass,
+ !if(!and(enableDisasm, addr.Disassemble), "AMDGPU", "")>;
+ def _V # addr.NumWords # _gfx10
+ : MIMG_Sampler_gfx10 <op, asm, dst_rc, addr.RegClass,
+ !if(!and(enableDisasm, addr.Disassemble), "AMDGPU", "")>;
+ }
+ }
+
+ foreach addr = MIMG_Sampler_AddrSizes<sample>.NSAInstrs in {
+ let VAddrDwords = addr.NumWords in {
+ def _V # addr.NumWords # _nsa_gfx10
+ : MIMG_Sampler_nsa_gfx10<op, asm, dst_rc, addr.NumWords,
+ !if(!and(enableDisasm, addr.Disassemble), "AMDGPU", "")>;
+ }
}
}
@@ -397,7 +645,7 @@ class MIMG_Sampler_BaseOpcode<AMDGPUSampleVariant sample>
let LodOrClampOrMip = !ne(sample.LodOrClamp, "");
}
-multiclass MIMG_Sampler <bits<7> op, AMDGPUSampleVariant sample, bit wqm = 0,
+multiclass MIMG_Sampler <bits<8> op, AMDGPUSampleVariant sample, bit wqm = 0,
bit isGetLod = 0,
string asm = "image_sample"#sample.LowerCaseMod> {
def "" : MIMG_Sampler_BaseOpcode<sample> {
@@ -414,15 +662,15 @@ multiclass MIMG_Sampler <bits<7> op, AMDGPUSampleVariant sample, bit wqm = 0,
defm _V3 : MIMG_Sampler_Src_Helper<op, asm, sample, VReg_96>;
let VDataDwords = 4 in
defm _V4 : MIMG_Sampler_Src_Helper<op, asm, sample, VReg_128>;
- let VDataDwords = 8 in
- defm _V8 : MIMG_Sampler_Src_Helper<op, asm, sample, VReg_256>;
+ let VDataDwords = 5 in
+ defm _V5 : MIMG_Sampler_Src_Helper<op, asm, sample, VReg_160>;
}
}
-multiclass MIMG_Sampler_WQM <bits<7> op, AMDGPUSampleVariant sample>
+multiclass MIMG_Sampler_WQM <bits<8> op, AMDGPUSampleVariant sample>
: MIMG_Sampler<op, sample, 1>;
-multiclass MIMG_Gather <bits<7> op, AMDGPUSampleVariant sample, bit wqm = 0,
+multiclass MIMG_Gather <bits<8> op, AMDGPUSampleVariant sample, bit wqm = 0,
string asm = "image_gather4"#sample.LowerCaseMod> {
def "" : MIMG_Sampler_BaseOpcode<sample> {
let HasD16 = 1;
@@ -435,12 +683,12 @@ multiclass MIMG_Gather <bits<7> op, AMDGPUSampleVariant sample, bit wqm = 0,
defm _V2 : MIMG_Sampler_Src_Helper<op, asm, sample, VReg_64>; /* for packed D16 only */
let VDataDwords = 4 in
defm _V4 : MIMG_Sampler_Src_Helper<op, asm, sample, VReg_128, 1>;
- let VDataDwords = 8 in
- defm _V8 : MIMG_Sampler_Src_Helper<op, asm, sample, VReg_256>;
+ let VDataDwords = 5 in
+ defm _V5 : MIMG_Sampler_Src_Helper<op, asm, sample, VReg_160>;
}
}
-multiclass MIMG_Gather_WQM <bits<7> op, AMDGPUSampleVariant sample>
+multiclass MIMG_Gather_WQM <bits<8> op, AMDGPUSampleVariant sample>
: MIMG_Gather<op, sample, 1>;
//===----------------------------------------------------------------------===//
@@ -473,9 +721,11 @@ defm IMAGE_ATOMIC_OR : MIMG_Atomic <mimg<0x19>, "image_atomic_or">;
defm IMAGE_ATOMIC_XOR : MIMG_Atomic <mimg<0x1a>, "image_atomic_xor">;
defm IMAGE_ATOMIC_INC : MIMG_Atomic <mimg<0x1b>, "image_atomic_inc">;
defm IMAGE_ATOMIC_DEC : MIMG_Atomic <mimg<0x1c>, "image_atomic_dec">;
+//let FPAtomic = 1 in {
//def IMAGE_ATOMIC_FCMPSWAP : MIMG_NoPattern_ <"image_atomic_fcmpswap", 0x0000001d, 1>; -- not on VI
//def IMAGE_ATOMIC_FMIN : MIMG_NoPattern_ <"image_atomic_fmin", 0x0000001e>; -- not on VI
//def IMAGE_ATOMIC_FMAX : MIMG_NoPattern_ <"image_atomic_fmax", 0x0000001f>; -- not on VI
+//} // End let FPAtomic = 1
defm IMAGE_SAMPLE : MIMG_Sampler_WQM <0x00000020, AMDGPUSample>;
defm IMAGE_SAMPLE_CL : MIMG_Sampler_WQM <0x00000021, AMDGPUSample_cl>;
defm IMAGE_SAMPLE_D : MIMG_Sampler <0x00000022, AMDGPUSample_d>;
@@ -581,3 +831,7 @@ def : MIMGLZMapping<IMAGE_GATHER4_L, IMAGE_GATHER4_LZ>;
def : MIMGLZMapping<IMAGE_GATHER4_C_L, IMAGE_GATHER4_C_LZ>;
def : MIMGLZMapping<IMAGE_GATHER4_L_O, IMAGE_GATHER4_LZ_O>;
def : MIMGLZMapping<IMAGE_GATHER4_C_L_O, IMAGE_GATHER4_C_LZ_O>;
+
+// MIP to NONMIP Optimization Mapping
+def : MIMGMIPMapping<IMAGE_LOAD_MIP, IMAGE_LOAD>;
+def : MIMGMIPMapping<IMAGE_STORE_MIP, IMAGE_STORE>;
diff --git a/lib/Target/AMDGPU/R600.td b/lib/Target/AMDGPU/R600.td
index 5c9c1c1ed504..1d11da969474 100644
--- a/lib/Target/AMDGPU/R600.td
+++ b/lib/Target/AMDGPU/R600.td
@@ -1,9 +1,8 @@
//===-- R600.td - R600 Tablegen files ----------------------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AMDGPU/R600AsmPrinter.cpp b/lib/Target/AMDGPU/R600AsmPrinter.cpp
index 68f8c30775b8..3fb18862fca8 100644
--- a/lib/Target/AMDGPU/R600AsmPrinter.cpp
+++ b/lib/Target/AMDGPU/R600AsmPrinter.cpp
@@ -1,9 +1,8 @@
//===-- R600AsmPrinter.cpp - R600 Assebly printer ------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AMDGPU/R600AsmPrinter.h b/lib/Target/AMDGPU/R600AsmPrinter.h
index 079fc707b03c..0da9526d716e 100644
--- a/lib/Target/AMDGPU/R600AsmPrinter.h
+++ b/lib/Target/AMDGPU/R600AsmPrinter.h
@@ -1,9 +1,8 @@
//===-- R600AsmPrinter.h - Print R600 assembly code -------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AMDGPU/R600ClauseMergePass.cpp b/lib/Target/AMDGPU/R600ClauseMergePass.cpp
index 0c62d6a4b3d9..290a960ae901 100644
--- a/lib/Target/AMDGPU/R600ClauseMergePass.cpp
+++ b/lib/Target/AMDGPU/R600ClauseMergePass.cpp
@@ -1,9 +1,8 @@
//===-- R600ClauseMergePass - Merge consecutive CF_ALU -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp b/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
index a19020276f35..8098b81d1ea2 100644
--- a/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
+++ b/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
@@ -1,9 +1,8 @@
//===- R600ControlFlowFinalizer.cpp - Finalize Control Flow Inst ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AMDGPU/R600Defines.h b/lib/Target/AMDGPU/R600Defines.h
index 0d33d82e8e0f..d72534908dcf 100644
--- a/lib/Target/AMDGPU/R600Defines.h
+++ b/lib/Target/AMDGPU/R600Defines.h
@@ -1,9 +1,8 @@
//===-- R600Defines.h - R600 Helper Macros ----------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
/// \file
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp b/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp
index 679cf18d2c20..b97e3c8b8dd7 100644
--- a/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp
+++ b/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp
@@ -1,9 +1,8 @@
//===-- R600EmitClauseMarkers.cpp - Emit CF_ALU ---------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp b/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp
index b924ff019dd1..c6e8a060d8a0 100644
--- a/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp
+++ b/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp
@@ -1,9 +1,8 @@
//===- R600ExpandSpecialInstrs.cpp - Expand special instructions ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AMDGPU/R600FrameLowering.cpp b/lib/Target/AMDGPU/R600FrameLowering.cpp
index 37787b3c5f72..d9aa9ebe878d 100644
--- a/lib/Target/AMDGPU/R600FrameLowering.cpp
+++ b/lib/Target/AMDGPU/R600FrameLowering.cpp
@@ -1,9 +1,8 @@
//===----------------------- R600FrameLowering.cpp ------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//==-----------------------------------------------------------------------===//
diff --git a/lib/Target/AMDGPU/R600FrameLowering.h b/lib/Target/AMDGPU/R600FrameLowering.h
index fe367d73682f..950e238f4979 100644
--- a/lib/Target/AMDGPU/R600FrameLowering.h
+++ b/lib/Target/AMDGPU/R600FrameLowering.h
@@ -1,9 +1,8 @@
//===--------------------- R600FrameLowering.h ------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp
index e2a0f05d2b34..f80a53ba1dc6 100644
--- a/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -1,9 +1,8 @@
//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -1240,11 +1239,13 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
+ const bool TruncatingStore = StoreNode->isTruncatingStore();
+
// Neither LOCAL nor PRIVATE can do vectors at the moment
- if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS) &&
+ if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS ||
+ TruncatingStore) &&
VT.isVector()) {
- if ((AS == AMDGPUAS::PRIVATE_ADDRESS) &&
- StoreNode->isTruncatingStore()) {
+ if ((AS == AMDGPUAS::PRIVATE_ADDRESS) && TruncatingStore) {
// Add an extra level of chain to isolate this vector
SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain);
// TODO: can the chain be replaced without creating a new store?
@@ -1260,7 +1261,8 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
unsigned Align = StoreNode->getAlignment();
if (Align < MemVT.getStoreSize() &&
- !allowsMisalignedMemoryAccesses(MemVT, AS, Align, nullptr)) {
+ !allowsMisalignedMemoryAccesses(
+ MemVT, AS, Align, StoreNode->getMemOperand()->getFlags(), nullptr)) {
return expandUnalignedStore(StoreNode, DAG);
}
@@ -1270,7 +1272,7 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
if (AS == AMDGPUAS::GLOBAL_ADDRESS) {
// It is beneficial to create MSKOR here instead of combiner to avoid
// artificial dependencies introduced by RMW
- if (StoreNode->isTruncatingStore()) {
+ if (TruncatingStore) {
assert(VT.bitsLE(MVT::i32));
SDValue MaskConstant;
if (MemVT == MVT::i8) {
@@ -1310,8 +1312,8 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
// Convert pointer from byte address to dword address.
Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr);
- if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
- llvm_unreachable("Truncated and indexed stores not supported yet");
+ if (StoreNode->isIndexed()) {
+ llvm_unreachable("Indexed stores not supported yet");
} else {
Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
}
@@ -1662,10 +1664,9 @@ bool R600TargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT,
return true;
}
-bool R600TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
- unsigned AddrSpace,
- unsigned Align,
- bool *IsFast) const {
+bool R600TargetLowering::allowsMisalignedMemoryAccesses(
+ EVT VT, unsigned AddrSpace, unsigned Align, MachineMemOperand::Flags Flags,
+ bool *IsFast) const {
if (IsFast)
*IsFast = false;
@@ -1713,6 +1714,12 @@ static SDValue CompactSwizzlableVector(
if (NewBldVec[i].isUndef())
continue;
+ // Fix spurious warning with gcc 7.3 -O3
+ // warning: array subscript is above array bounds [-Warray-bounds]
+ // if (NewBldVec[i] == NewBldVec[j]) {
+ // ~~~~~~~~~~~^
+ if (i >= 4)
+ continue;
for (unsigned j = 0; j < i; j++) {
if (NewBldVec[i] == NewBldVec[j]) {
NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
diff --git a/lib/Target/AMDGPU/R600ISelLowering.h b/lib/Target/AMDGPU/R600ISelLowering.h
index 767c3c7bd5bf..b560da8e91d9 100644
--- a/lib/Target/AMDGPU/R600ISelLowering.h
+++ b/lib/Target/AMDGPU/R600ISelLowering.h
@@ -1,9 +1,8 @@
//===-- R600ISelLowering.h - R600 DAG Lowering Interface -*- C++ -*--------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -50,9 +49,10 @@ public:
bool canMergeStoresTo(unsigned AS, EVT MemVT,
const SelectionDAG &DAG) const override;
- bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS,
- unsigned Align,
- bool *IsFast) const override;
+ bool allowsMisalignedMemoryAccesses(
+ EVT VT, unsigned AS, unsigned Align,
+ MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
+ bool *IsFast = nullptr) const override;
private:
unsigned Gen;
diff --git a/lib/Target/AMDGPU/R600InstrFormats.td b/lib/Target/AMDGPU/R600InstrFormats.td
index 687a9affa138..f62e6313b148 100644
--- a/lib/Target/AMDGPU/R600InstrFormats.td
+++ b/lib/Target/AMDGPU/R600InstrFormats.td
@@ -1,9 +1,8 @@
//===-- R600InstrFormats.td - R600 Instruction Encodings ------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AMDGPU/R600InstrInfo.cpp b/lib/Target/AMDGPU/R600InstrInfo.cpp
index 9cc3e5f3c314..d9e839fe2035 100644
--- a/lib/Target/AMDGPU/R600InstrInfo.cpp
+++ b/lib/Target/AMDGPU/R600InstrInfo.cpp
@@ -1,9 +1,8 @@
//===-- R600InstrInfo.cpp - R600 Instruction Information ------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -402,6 +401,7 @@ Swizzle(std::vector<std::pair<int, unsigned>> Src,
}
static unsigned getTransSwizzle(R600InstrInfo::BankSwizzle Swz, unsigned Op) {
+ assert(Op < 3 && "Out of range swizzle index");
switch (Swz) {
case R600InstrInfo::ALU_VEC_012_SCL_210: {
unsigned Cycles[3] = { 2, 1, 0};
diff --git a/lib/Target/AMDGPU/R600InstrInfo.h b/lib/Target/AMDGPU/R600InstrInfo.h
index e6e34dc125f4..00d96c9676aa 100644
--- a/lib/Target/AMDGPU/R600InstrInfo.h
+++ b/lib/Target/AMDGPU/R600InstrInfo.h
@@ -1,9 +1,8 @@
//===-- R600InstrInfo.h - R600 Instruction Info Interface -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AMDGPU/R600Instructions.td b/lib/Target/AMDGPU/R600Instructions.td
index 10e873755222..f40eece859ee 100644
--- a/lib/Target/AMDGPU/R600Instructions.td
+++ b/lib/Target/AMDGPU/R600Instructions.td
@@ -1,9 +1,8 @@
//===-- R600Instructions.td - R600 Instruction defs -------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -296,6 +295,34 @@ class VTX_READ <string name, dag outs, list<dag> pattern>
let VTXInst = 1;
}
+// FIXME: Deprecated.
+class LocalLoad <SDPatternOperator op> : LoadFrag <op>, LocalAddress;
+
+class AZExtLoadBase <SDPatternOperator ld_node>: PatFrag<(ops node:$ptr),
+ (ld_node node:$ptr), [{
+ LoadSDNode *L = cast<LoadSDNode>(N);
+ return L->getExtensionType() == ISD::ZEXTLOAD ||
+ L->getExtensionType() == ISD::EXTLOAD;
+}]>;
+
+def az_extload : AZExtLoadBase <unindexedload>;
+
+def az_extloadi8 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
+ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i8;
+}]>;
+
+def az_extloadi16 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
+ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i16;
+}]>;
+
+def az_extloadi32 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
+ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i32;
+}]>;
+
+// FIXME: These are deprecated
+def az_extloadi8_local : LocalLoad <az_extloadi8>;
+def az_extloadi16_local : LocalLoad <az_extloadi16>;
+
class LoadParamFrag <PatFrag load_type> : PatFrag <
(ops node:$ptr), (load_type node:$ptr),
[{ return isConstantLoad(cast<LoadSDNode>(N), 0) ||
diff --git a/lib/Target/AMDGPU/R600MachineFunctionInfo.cpp b/lib/Target/AMDGPU/R600MachineFunctionInfo.cpp
index 3ca319c6c6c2..65011a9eadf8 100644
--- a/lib/Target/AMDGPU/R600MachineFunctionInfo.cpp
+++ b/lib/Target/AMDGPU/R600MachineFunctionInfo.cpp
@@ -1,9 +1,8 @@
//===-- R600MachineFunctionInfo.cpp - R600 Machine Function Info-*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
/// \file
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AMDGPU/R600MachineFunctionInfo.h b/lib/Target/AMDGPU/R600MachineFunctionInfo.h
index 29ac0920f997..6a5ac9023329 100644
--- a/lib/Target/AMDGPU/R600MachineFunctionInfo.h
+++ b/lib/Target/AMDGPU/R600MachineFunctionInfo.h
@@ -1,9 +1,8 @@
//===-- R600MachineFunctionInfo.h - R600 Machine Function Info ----*- C++ -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AMDGPU/R600MachineScheduler.cpp b/lib/Target/AMDGPU/R600MachineScheduler.cpp
index 7769a35aadce..34267a909b5e 100644
--- a/lib/Target/AMDGPU/R600MachineScheduler.cpp
+++ b/lib/Target/AMDGPU/R600MachineScheduler.cpp
@@ -1,9 +1,8 @@
//===-- R600MachineScheduler.cpp - R600 Scheduler Interface -*- C++ -*-----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AMDGPU/R600MachineScheduler.h b/lib/Target/AMDGPU/R600MachineScheduler.h
index 8a9a8d3d1e23..bc66f2ef5907 100644
--- a/lib/Target/AMDGPU/R600MachineScheduler.h
+++ b/lib/Target/AMDGPU/R600MachineScheduler.h
@@ -1,9 +1,8 @@
//===-- R600MachineScheduler.h - R600 Scheduler Interface -*- C++ -*-------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AMDGPU/R600OpenCLImageTypeLoweringPass.cpp b/lib/Target/AMDGPU/R600OpenCLImageTypeLoweringPass.cpp
index 7de5e2c9577d..1fe92d2269d3 100644
--- a/lib/Target/AMDGPU/R600OpenCLImageTypeLoweringPass.cpp
+++ b/lib/Target/AMDGPU/R600OpenCLImageTypeLoweringPass.cpp
@@ -1,9 +1,8 @@
//===- R600OpenCLImageTypeLoweringPass.cpp ------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp b/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp
index 692451cb8fe0..9f1cb6582b5c 100644
--- a/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp
+++ b/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp
@@ -1,9 +1,8 @@
//===- R600MergeVectorRegisters.cpp ---------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -57,17 +56,12 @@ using namespace llvm;
#define DEBUG_TYPE "vec-merger"
-static bool
-isImplicitlyDef(MachineRegisterInfo &MRI, unsigned Reg) {
- for (MachineRegisterInfo::def_instr_iterator It = MRI.def_instr_begin(Reg),
- E = MRI.def_instr_end(); It != E; ++It) {
- return (*It).isImplicitDef();
- }
- if (MRI.isReserved(Reg)) {
+static bool isImplicitlyDef(MachineRegisterInfo &MRI, unsigned Reg) {
+ assert(MRI.isSSA());
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
return false;
- }
- llvm_unreachable("Reg without a def");
- return false;
+ const MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
+ return MI && MI->isImplicitDef();
}
namespace {
diff --git a/lib/Target/AMDGPU/R600Packetizer.cpp b/lib/Target/AMDGPU/R600Packetizer.cpp
index 612c62b514fd..df200baf11c1 100644
--- a/lib/Target/AMDGPU/R600Packetizer.cpp
+++ b/lib/Target/AMDGPU/R600Packetizer.cpp
@@ -1,9 +1,8 @@
//===----- R600Packetizer.cpp - VLIW packetizer ---------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -187,8 +186,8 @@ public:
// Does MII and MIJ share the same pred_sel ?
int OpI = TII->getOperandIdx(MII->getOpcode(), R600::OpName::pred_sel),
OpJ = TII->getOperandIdx(MIJ->getOpcode(), R600::OpName::pred_sel);
- unsigned PredI = (OpI > -1)?MII->getOperand(OpI).getReg():0,
- PredJ = (OpJ > -1)?MIJ->getOperand(OpJ).getReg():0;
+ Register PredI = (OpI > -1)?MII->getOperand(OpI).getReg() : Register(),
+ PredJ = (OpJ > -1)?MIJ->getOperand(OpJ).getReg() : Register();
if (PredI != PredJ)
return false;
if (SUJ->isSucc(SUI)) {
diff --git a/lib/Target/AMDGPU/R600Processors.td b/lib/Target/AMDGPU/R600Processors.td
index f39b3dc1bfd4..fff884e4848e 100644
--- a/lib/Target/AMDGPU/R600Processors.td
+++ b/lib/Target/AMDGPU/R600Processors.td
@@ -1,9 +1,8 @@
//===-- R600Processors.td - R600 Processor definitions --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -41,23 +40,24 @@ def FeatureCFALUBug : SubtargetFeature<"cfalubug",
"GPU has CF_ALU bug"
>;
-class R600SubtargetFeatureGeneration <string Value,
+class R600SubtargetFeatureGeneration <string Value, string FeatureName,
list<SubtargetFeature> Implies> :
- SubtargetFeatureGeneration <Value, "R600Subtarget", Implies>;
+ SubtargetFeatureGeneration <Value, FeatureName, "R600Subtarget", Implies>;
-def FeatureR600 : R600SubtargetFeatureGeneration<"R600",
+def FeatureR600 : R600SubtargetFeatureGeneration<"R600", "r600",
[FeatureR600ALUInst, FeatureFetchLimit8, FeatureLocalMemorySize0]
>;
-def FeatureR700 : R600SubtargetFeatureGeneration<"R700",
+def FeatureR700 : R600SubtargetFeatureGeneration<"R700", "r700",
[FeatureFetchLimit16, FeatureLocalMemorySize0]
>;
-def FeatureEvergreen : R600SubtargetFeatureGeneration<"EVERGREEN",
+def FeatureEvergreen : R600SubtargetFeatureGeneration<"EVERGREEN", "evergreen",
[FeatureFetchLimit16, FeatureLocalMemorySize32768]
>;
def FeatureNorthernIslands : R600SubtargetFeatureGeneration<"NORTHERN_ISLANDS",
+ "northern-islands",
[FeatureFetchLimit16, FeatureWavefrontSize64,
FeatureLocalMemorySize32768]
>;
diff --git a/lib/Target/AMDGPU/R600RegisterInfo.cpp b/lib/Target/AMDGPU/R600RegisterInfo.cpp
index 38933e7616a0..685df74490fe 100644
--- a/lib/Target/AMDGPU/R600RegisterInfo.cpp
+++ b/lib/Target/AMDGPU/R600RegisterInfo.cpp
@@ -1,9 +1,8 @@
//===-- R600RegisterInfo.cpp - R600 Register Information ------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -68,7 +67,7 @@ const MCPhysReg *R600RegisterInfo::getCalleeSavedRegs(
return &CalleeSavedReg;
}
-unsigned R600RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+Register R600RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
return R600::NoRegister;
}
diff --git a/lib/Target/AMDGPU/R600RegisterInfo.h b/lib/Target/AMDGPU/R600RegisterInfo.h
index c4c77172b299..9378b70ca580 100644
--- a/lib/Target/AMDGPU/R600RegisterInfo.h
+++ b/lib/Target/AMDGPU/R600RegisterInfo.h
@@ -1,9 +1,8 @@
//===-- R600RegisterInfo.h - R600 Register Info Interface ------*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -27,7 +26,7 @@ struct R600RegisterInfo final : public R600GenRegisterInfo {
BitVector getReservedRegs(const MachineFunction &MF) const override;
const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
- unsigned getFrameRegister(const MachineFunction &MF) const override;
+ Register getFrameRegister(const MachineFunction &MF) const override;
/// get the HW encoding for a register's channel.
unsigned getHWRegChan(unsigned reg) const;
diff --git a/lib/Target/AMDGPU/R600Schedule.td b/lib/Target/AMDGPU/R600Schedule.td
index 70fb46c1a7d6..c998fe848193 100644
--- a/lib/Target/AMDGPU/R600Schedule.td
+++ b/lib/Target/AMDGPU/R600Schedule.td
@@ -1,9 +1,8 @@
//===-- R600Schedule.td - R600 Scheduling definitions ------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AMDGPU/R700Instructions.td b/lib/Target/AMDGPU/R700Instructions.td
index 613a0d729bb3..9c9a03209ec2 100644
--- a/lib/Target/AMDGPU/R700Instructions.td
+++ b/lib/Target/AMDGPU/R700Instructions.td
@@ -1,9 +1,8 @@
//===-- R700Instructions.td - R700 Instruction defs -------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AMDGPU/SIAddIMGInit.cpp b/lib/Target/AMDGPU/SIAddIMGInit.cpp
index 69cafef4a351..f8094e35816c 100644
--- a/lib/Target/AMDGPU/SIAddIMGInit.cpp
+++ b/lib/Target/AMDGPU/SIAddIMGInit.cpp
@@ -1,9 +1,8 @@
//===-- SIAddIMGInit.cpp - Add any required IMG inits ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp b/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
index 98e9ea662324..b764ca7d7061 100644
--- a/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
+++ b/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
@@ -1,9 +1,8 @@
//===- SIAnnotateControlFlow.cpp ------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -13,12 +12,13 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constant.h"
@@ -38,6 +38,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
#include <cassert>
#include <utility>
@@ -56,13 +57,13 @@ class SIAnnotateControlFlow : public FunctionPass {
Type *Boolean;
Type *Void;
- Type *Int64;
+ Type *IntMask;
Type *ReturnStruct;
ConstantInt *BoolTrue;
ConstantInt *BoolFalse;
UndefValue *BoolUndef;
- Constant *Int64Zero;
+ Constant *IntMaskZero;
Function *If;
Function *Else;
@@ -75,6 +76,8 @@ class SIAnnotateControlFlow : public FunctionPass {
LoopInfo *LI;
+ void initialize(Module &M, const GCNSubtarget &ST);
+
bool isUniform(BranchInst *T);
bool isTopOfStack(BasicBlock *BB);
@@ -104,8 +107,6 @@ public:
SIAnnotateControlFlow() : FunctionPass(ID) {}
- bool doInitialization(Module &M) override;
-
bool runOnFunction(Function &F) override;
StringRef getPassName() const override { return "SI annotate control flow"; }
@@ -115,6 +116,7 @@ public:
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<LegacyDivergenceAnalysis>();
AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addRequired<TargetPassConfig>();
FunctionPass::getAnalysisUsage(AU);
}
};
@@ -125,31 +127,34 @@ INITIALIZE_PASS_BEGIN(SIAnnotateControlFlow, DEBUG_TYPE,
"Annotate SI Control Flow", false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
INITIALIZE_PASS_END(SIAnnotateControlFlow, DEBUG_TYPE,
"Annotate SI Control Flow", false, false)
char SIAnnotateControlFlow::ID = 0;
/// Initialize all the types and constants used in the pass
-bool SIAnnotateControlFlow::doInitialization(Module &M) {
+void SIAnnotateControlFlow::initialize(Module &M, const GCNSubtarget &ST) {
LLVMContext &Context = M.getContext();
Void = Type::getVoidTy(Context);
Boolean = Type::getInt1Ty(Context);
- Int64 = Type::getInt64Ty(Context);
- ReturnStruct = StructType::get(Boolean, Int64);
+ IntMask = ST.isWave32() ? Type::getInt32Ty(Context)
+ : Type::getInt64Ty(Context);
+ ReturnStruct = StructType::get(Boolean, IntMask);
BoolTrue = ConstantInt::getTrue(Context);
BoolFalse = ConstantInt::getFalse(Context);
BoolUndef = UndefValue::get(Boolean);
- Int64Zero = ConstantInt::get(Int64, 0);
-
- If = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_if);
- Else = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_else);
- IfBreak = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_if_break);
- Loop = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_loop);
- EndCf = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_end_cf);
- return false;
+ IntMaskZero = ConstantInt::get(IntMask, 0);
+
+ If = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_if, { IntMask });
+ Else = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_else,
+ { IntMask, IntMask });
+ IfBreak = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_if_break,
+ { IntMask, IntMask });
+ Loop = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_loop, { IntMask });
+ EndCf = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_end_cf, { IntMask });
}
/// Is the branch condition uniform or did the StructurizeCFG pass
@@ -259,14 +264,23 @@ void SIAnnotateControlFlow::handleLoop(BranchInst *Term) {
return;
BasicBlock *Target = Term->getSuccessor(1);
- PHINode *Broken = PHINode::Create(Int64, 0, "phi.broken", &Target->front());
+ PHINode *Broken = PHINode::Create(IntMask, 0, "phi.broken", &Target->front());
Value *Cond = Term->getCondition();
Term->setCondition(BoolTrue);
Value *Arg = handleLoopCondition(Cond, Broken, L, Term);
- for (BasicBlock *Pred : predecessors(Target))
- Broken->addIncoming(Pred == BB ? Arg : Int64Zero, Pred);
+ for (BasicBlock *Pred : predecessors(Target)) {
+ Value *PHIValue = IntMaskZero;
+ if (Pred == BB) // Remember the value of the previous iteration.
+ PHIValue = Arg;
+ // If the backedge from Pred to Target could be executed before the exit
+ // of the loop at BB, it should not reset or change "Broken", which keeps
+ // track of the number of threads exited the loop at BB.
+ else if (L->contains(Pred) && DT->dominates(Pred, BB))
+ PHIValue = Broken;
+ Broken->addIncoming(PHIValue, Pred);
+ }
Term->setCondition(CallInst::Create(Loop, Arg, "", Term));
@@ -308,6 +322,10 @@ bool SIAnnotateControlFlow::runOnFunction(Function &F) {
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
DA = &getAnalysis<LegacyDivergenceAnalysis>();
+ TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
+ const TargetMachine &TM = TPC.getTM<TargetMachine>();
+
+ initialize(*F.getParent(), TM.getSubtarget<GCNSubtarget>(F));
for (df_iterator<BasicBlock *> I = df_begin(&F.getEntryBlock()),
E = df_end(&F.getEntryBlock()); I != E; ++I) {
diff --git a/lib/Target/AMDGPU/SIDebuggerInsertNops.cpp b/lib/Target/AMDGPU/SIDebuggerInsertNops.cpp
deleted file mode 100644
index 7e884ad93a23..000000000000
--- a/lib/Target/AMDGPU/SIDebuggerInsertNops.cpp
+++ /dev/null
@@ -1,97 +0,0 @@
-//===--- SIDebuggerInsertNops.cpp - Inserts nops for debugger usage -------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// Inserts one nop instruction for each high level source statement for
-/// debugger usage.
-///
-/// Tools, such as a debugger, need to pause execution based on user input (i.e.
-/// breakpoint). In order to do this, one nop instruction is inserted before the
-/// first isa instruction of each high level source statement. Further, the
-/// debugger may replace nop instructions with trap instructions based on user
-/// input.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPUSubtarget.h"
-#include "SIInstrInfo.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-using namespace llvm;
-
-#define DEBUG_TYPE "si-debugger-insert-nops"
-#define PASS_NAME "SI Debugger Insert Nops"
-
-namespace {
-
-class SIDebuggerInsertNops : public MachineFunctionPass {
-public:
- static char ID;
-
- SIDebuggerInsertNops() : MachineFunctionPass(ID) { }
- StringRef getPassName() const override { return PASS_NAME; }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-
- bool runOnMachineFunction(MachineFunction &MF) override;
-};
-
-} // anonymous namespace
-
-INITIALIZE_PASS(SIDebuggerInsertNops, DEBUG_TYPE, PASS_NAME, false, false)
-
-char SIDebuggerInsertNops::ID = 0;
-char &llvm::SIDebuggerInsertNopsID = SIDebuggerInsertNops::ID;
-
-FunctionPass *llvm::createSIDebuggerInsertNopsPass() {
- return new SIDebuggerInsertNops();
-}
-
-bool SIDebuggerInsertNops::runOnMachineFunction(MachineFunction &MF) {
- // Skip this pass if "amdgpu-debugger-insert-nops" attribute was not
- // specified.
- const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
- if (!ST.debuggerInsertNops())
- return false;
-
- // Skip machine functions without debug info.
- if (!MF.getMMI().hasDebugInfo())
- return false;
-
- // Target instruction info.
- const SIInstrInfo *TII = ST.getInstrInfo();
-
- // Set containing line numbers that have nop inserted.
- DenseSet<unsigned> NopInserted;
-
- for (auto &MBB : MF) {
- for (auto MI = MBB.begin(); MI != MBB.end(); ++MI) {
- // Skip debug instructions and instructions without location.
- if (MI->isDebugInstr() || !MI->getDebugLoc())
- continue;
-
- // Insert nop instruction if line number does not have nop inserted.
- auto DL = MI->getDebugLoc();
- if (NopInserted.find(DL.getLine()) == NopInserted.end()) {
- BuildMI(MBB, *MI, DL, TII->get(AMDGPU::S_NOP))
- .addImm(0);
- NopInserted.insert(DL.getLine());
- }
- }
- }
-
- return true;
-}
diff --git a/lib/Target/AMDGPU/SIDefines.h b/lib/Target/AMDGPU/SIDefines.h
index 7f6abc34cff3..a0e1ec6ac235 100644
--- a/lib/Target/AMDGPU/SIDefines.h
+++ b/lib/Target/AMDGPU/SIDefines.h
@@ -1,9 +1,8 @@
//===-- SIDefines.h - SI Helper Macros ----------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
/// \file
//===----------------------------------------------------------------------===//
@@ -90,13 +89,22 @@ enum : uint64_t {
// Is a D16 buffer instruction.
D16Buf = UINT64_C(1) << 50,
+ // FLAT instruction accesses FLAT_GLBL or FLAT_SCRATCH segment.
+ IsNonFlatSeg = UINT64_C(1) << 51,
+
// Uses floating point double precision rounding mode
- FPDPRounding = UINT64_C(1) << 51
+ FPDPRounding = UINT64_C(1) << 52,
+
+ // Instruction is FP atomic.
+ FPAtomic = UINT64_C(1) << 53,
+
+ // Is a MFMA instruction.
+ IsMAI = UINT64_C(1) << 54
};
// v_cmp_class_* etc. use a 10-bit mask for what operation is checked.
// The result is true if any of these tests are true.
-enum ClassFlags {
+enum ClassFlags : unsigned {
S_NAN = 1 << 0, // Signaling NaN
Q_NAN = 1 << 1, // Quiet NaN
N_INFINITY = 1 << 2, // Negative infinity
@@ -111,7 +119,7 @@ enum ClassFlags {
}
namespace AMDGPU {
- enum OperandType {
+ enum OperandType : unsigned {
/// Operands with register or 32-bit immediate
OPERAND_REG_IMM_INT32 = MCOI::OPERAND_FIRST_TARGET,
OPERAND_REG_IMM_INT64,
@@ -119,6 +127,8 @@ namespace AMDGPU {
OPERAND_REG_IMM_FP32,
OPERAND_REG_IMM_FP64,
OPERAND_REG_IMM_FP16,
+ OPERAND_REG_IMM_V2FP16,
+ OPERAND_REG_IMM_V2INT16,
/// Operands with register or inline constant
OPERAND_REG_INLINE_C_INT16,
@@ -130,11 +140,22 @@ namespace AMDGPU {
OPERAND_REG_INLINE_C_V2FP16,
OPERAND_REG_INLINE_C_V2INT16,
+ /// Operands with an AccVGPR register or inline constant
+ OPERAND_REG_INLINE_AC_INT16,
+ OPERAND_REG_INLINE_AC_INT32,
+ OPERAND_REG_INLINE_AC_FP16,
+ OPERAND_REG_INLINE_AC_FP32,
+ OPERAND_REG_INLINE_AC_V2FP16,
+ OPERAND_REG_INLINE_AC_V2INT16,
+
OPERAND_REG_IMM_FIRST = OPERAND_REG_IMM_INT32,
- OPERAND_REG_IMM_LAST = OPERAND_REG_IMM_FP16,
+ OPERAND_REG_IMM_LAST = OPERAND_REG_IMM_V2INT16,
OPERAND_REG_INLINE_C_FIRST = OPERAND_REG_INLINE_C_INT16,
- OPERAND_REG_INLINE_C_LAST = OPERAND_REG_INLINE_C_V2INT16,
+ OPERAND_REG_INLINE_C_LAST = OPERAND_REG_INLINE_AC_V2INT16,
+
+ OPERAND_REG_INLINE_AC_FIRST = OPERAND_REG_INLINE_AC_INT16,
+ OPERAND_REG_INLINE_AC_LAST = OPERAND_REG_INLINE_AC_V2INT16,
OPERAND_SRC_FIRST = OPERAND_REG_IMM_INT32,
OPERAND_SRC_LAST = OPERAND_REG_INLINE_C_LAST,
@@ -151,17 +172,10 @@ namespace AMDGPU {
};
}
-namespace SIStackID {
-enum StackTypes : uint8_t {
- SCRATCH = 0,
- SGPR_SPILL = 1
-};
-}
-
// Input operand modifiers bit-masks
// NEG and SEXT share same bit-mask because they can't be set simultaneously.
namespace SISrcMods {
- enum {
+ enum : unsigned {
NEG = 1 << 0, // Floating-point negate modifier
ABS = 1 << 1, // Floating-point absolute modifier
SEXT = 1 << 0, // Integer sign-extend modifier
@@ -173,7 +187,7 @@ namespace SISrcMods {
}
namespace SIOutMods {
- enum {
+ enum : unsigned {
NONE = 0,
MUL2 = 1,
MUL4 = 2,
@@ -181,17 +195,33 @@ namespace SIOutMods {
};
}
+namespace AMDGPU {
namespace VGPRIndexMode {
- enum {
- SRC0_ENABLE = 1 << 0,
- SRC1_ENABLE = 1 << 1,
- SRC2_ENABLE = 1 << 2,
- DST_ENABLE = 1 << 3
- };
-}
+
+enum Id : unsigned { // id of symbolic names
+ ID_SRC0 = 0,
+ ID_SRC1,
+ ID_SRC2,
+ ID_DST,
+
+ ID_MIN = ID_SRC0,
+ ID_MAX = ID_DST
+};
+
+enum EncBits : unsigned {
+ OFF = 0,
+ SRC0_ENABLE = 1 << ID_SRC0,
+ SRC1_ENABLE = 1 << ID_SRC1,
+ SRC2_ENABLE = 1 << ID_SRC2,
+ DST_ENABLE = 1 << ID_DST,
+ ENABLE_MASK = SRC0_ENABLE | SRC1_ENABLE | SRC2_ENABLE | DST_ENABLE
+};
+
+} // namespace VGPRIndexMode
+} // namespace AMDGPU
namespace AMDGPUAsmVariants {
- enum {
+ enum : unsigned {
DEFAULT = 0,
VOP3 = 1,
SDWA = 2,
@@ -203,13 +233,14 @@ namespace AMDGPUAsmVariants {
namespace AMDGPU {
namespace EncValues { // Encoding values of enum9/8/7 operands
-enum {
+enum : unsigned {
SGPR_MIN = 0,
- SGPR_MAX = 101,
+ SGPR_MAX_SI = 101,
+ SGPR_MAX_GFX10 = 105,
TTMP_VI_MIN = 112,
TTMP_VI_MAX = 123,
- TTMP_GFX9_MIN = 108,
- TTMP_GFX9_MAX = 123,
+ TTMP_GFX9_GFX10_MIN = 108,
+ TTMP_GFX9_GFX10_MAX = 123,
INLINE_INTEGER_C_MIN = 128,
INLINE_INTEGER_C_POSITIVE_MAX = 192, // 64
INLINE_INTEGER_C_MAX = 208,
@@ -231,6 +262,8 @@ enum Id { // Message ID, width(4) [3:0].
ID_INTERRUPT = 1,
ID_GS,
ID_GS_DONE,
+ ID_GS_ALLOC_REQ = 9,
+ ID_GET_DOORBELL = 10,
ID_SYSMSG = 15,
ID_GAPS_LAST_, // Indicate that sequence has gaps.
ID_GAPS_FIRST_ = ID_INTERRUPT,
@@ -242,27 +275,28 @@ enum Id { // Message ID, width(4) [3:0].
enum Op { // Both GS and SYS operation IDs.
OP_UNKNOWN_ = -1,
OP_SHIFT_ = 4,
- // width(2) [5:4]
+ OP_NONE_ = 0,
+ // Bits used for operation encoding
+ OP_WIDTH_ = 3,
+ OP_MASK_ = (((1 << OP_WIDTH_) - 1) << OP_SHIFT_),
+ // GS operations are encoded in bits 5:4
OP_GS_NOP = 0,
OP_GS_CUT,
OP_GS_EMIT,
OP_GS_EMIT_CUT,
OP_GS_LAST_,
OP_GS_FIRST_ = OP_GS_NOP,
- OP_GS_WIDTH_ = 2,
- OP_GS_MASK_ = (((1 << OP_GS_WIDTH_) - 1) << OP_SHIFT_),
- // width(3) [6:4]
+ // SYS operations are encoded in bits 6:4
OP_SYS_ECC_ERR_INTERRUPT = 1,
OP_SYS_REG_RD,
OP_SYS_HOST_TRAP_ACK,
OP_SYS_TTRACE_PC,
OP_SYS_LAST_,
OP_SYS_FIRST_ = OP_SYS_ECC_ERR_INTERRUPT,
- OP_SYS_WIDTH_ = 3,
- OP_SYS_MASK_ = (((1 << OP_SYS_WIDTH_) - 1) << OP_SHIFT_)
};
-enum StreamId { // Stream ID, (2) [9:8].
+enum StreamId : unsigned { // Stream ID, (2) [9:8].
+ STREAM_ID_NONE_ = 0,
STREAM_ID_DEFAULT_ = 0,
STREAM_ID_LAST_ = 4,
STREAM_ID_FIRST_ = STREAM_ID_DEFAULT_,
@@ -287,23 +321,34 @@ enum Id { // HwRegCode, (6) [5:0]
ID_IB_STS = 7,
ID_MEM_BASES = 15,
ID_SYMBOLIC_FIRST_GFX9_ = ID_MEM_BASES,
- ID_SYMBOLIC_LAST_ = 16,
+ ID_TBA_LO = 16,
+ ID_SYMBOLIC_FIRST_GFX10_ = ID_TBA_LO,
+ ID_TBA_HI = 17,
+ ID_TMA_LO = 18,
+ ID_TMA_HI = 19,
+ ID_FLAT_SCR_LO = 20,
+ ID_FLAT_SCR_HI = 21,
+ ID_XNACK_MASK = 22,
+ ID_POPS_PACKER = 25,
+ ID_SYMBOLIC_LAST_ = 26,
ID_SHIFT_ = 0,
ID_WIDTH_ = 6,
ID_MASK_ = (((1 << ID_WIDTH_) - 1) << ID_SHIFT_)
};
-enum Offset { // Offset, (5) [10:6]
+enum Offset : unsigned { // Offset, (5) [10:6]
OFFSET_DEFAULT_ = 0,
OFFSET_SHIFT_ = 6,
OFFSET_WIDTH_ = 5,
OFFSET_MASK_ = (((1 << OFFSET_WIDTH_) - 1) << OFFSET_SHIFT_),
+ OFFSET_MEM_VIOL = 8,
+
OFFSET_SRC_SHARED_BASE = 16,
OFFSET_SRC_PRIVATE_BASE = 0
};
-enum WidthMinusOne { // WidthMinusOne, (5) [15:11]
+enum WidthMinusOne : unsigned { // WidthMinusOne, (5) [15:11]
WIDTH_M1_DEFAULT_ = 31,
WIDTH_M1_SHIFT_ = 11,
WIDTH_M1_WIDTH_ = 5,
@@ -313,11 +358,16 @@ enum WidthMinusOne { // WidthMinusOne, (5) [15:11]
WIDTH_M1_SRC_PRIVATE_BASE = 15
};
+// Some values from WidthMinusOne mapped into Width domain.
+enum Width : unsigned {
+ WIDTH_DEFAULT_ = WIDTH_M1_DEFAULT_ + 1,
+};
+
} // namespace Hwreg
namespace Swizzle { // Encoding of swizzle macro used in ds_swizzle_b32.
-enum Id { // id of symbolic names
+enum Id : unsigned { // id of symbolic names
ID_QUAD_PERM = 0,
ID_BITMASK_PERM,
ID_SWAP,
@@ -325,7 +375,7 @@ enum Id { // id of symbolic names
ID_BROADCAST
};
-enum EncBits {
+enum EncBits : unsigned {
// swizzle mode encodings
@@ -357,7 +407,7 @@ enum EncBits {
namespace SDWA {
-enum SdwaSel {
+enum SdwaSel : unsigned {
BYTE_0 = 0,
BYTE_1 = 1,
BYTE_2 = 2,
@@ -367,13 +417,13 @@ enum SdwaSel {
DWORD = 6,
};
-enum DstUnused {
+enum DstUnused : unsigned {
UNUSED_PAD = 0,
UNUSED_SEXT = 1,
UNUSED_PRESERVE = 2,
};
-enum SDWA9EncValues{
+enum SDWA9EncValues : unsigned {
SRC_SGPR_MASK = 0x100,
SRC_VGPR_MASK = 0xFF,
VOPC_DST_VCC_MASK = 0x80,
@@ -382,7 +432,8 @@ enum SDWA9EncValues{
SRC_VGPR_MIN = 0,
SRC_VGPR_MAX = 255,
SRC_SGPR_MIN = 256,
- SRC_SGPR_MAX = 357,
+ SRC_SGPR_MAX_SI = 357,
+ SRC_SGPR_MAX_GFX10 = 361,
SRC_TTMP_MIN = 364,
SRC_TTMP_MAX = 379,
};
@@ -391,7 +442,7 @@ enum SDWA9EncValues{
namespace DPP {
-enum DppCtrl {
+enum DppCtrl : unsigned {
QUAD_PERM_FIRST = 0,
QUAD_PERM_LAST = 0xFF,
DPP_UNUSED1 = 0x100,
@@ -422,7 +473,20 @@ enum DppCtrl {
ROW_HALF_MIRROR = 0x141,
BCAST15 = 0x142,
BCAST31 = 0x143,
- DPP_LAST = BCAST31
+ DPP_UNUSED8_FIRST = 0x144,
+ DPP_UNUSED8_LAST = 0x14F,
+ ROW_SHARE_FIRST = 0x150,
+ ROW_SHARE_LAST = 0x15F,
+ ROW_XMASK_FIRST = 0x160,
+ ROW_XMASK_LAST = 0x16F,
+ DPP_LAST = ROW_XMASK_LAST
+};
+
+enum DppFiMode {
+ DPP_FI_0 = 0,
+ DPP_FI_1 = 1,
+ DPP8_FI_0 = 0xE9,
+ DPP8_FI_1 = 0xEA,
};
} // namespace DPP
@@ -505,6 +569,15 @@ enum DppCtrl {
#define S_00B848_IEEE_MODE(x) (((x) & 0x1) << 23)
#define G_00B848_IEEE_MODE(x) (((x) >> 23) & 0x1)
#define C_00B848_IEEE_MODE 0xFF7FFFFF
+#define S_00B848_WGP_MODE(x) (((x) & 0x1) << 29)
+#define G_00B848_WGP_MODE(x) (((x) >> 29) & 0x1)
+#define C_00B848_WGP_MODE 0xDFFFFFFF
+#define S_00B848_MEM_ORDERED(x) (((x) & 0x1) << 30)
+#define G_00B848_MEM_ORDERED(x) (((x) >> 30) & 0x1)
+#define C_00B848_MEM_ORDERED 0xBFFFFFFF
+#define S_00B848_FWD_PROGRESS(x) (((x) & 0x1) << 31)
+#define G_00B848_FWD_PROGRESS(x) (((x) >> 31) & 0x1)
+#define C_00B848_FWD_PROGRESS 0x7FFFFFFF
// Helpers for setting FLOAT_MODE
@@ -535,6 +608,15 @@ enum DppCtrl {
#define R_0286E8_SPI_TMPRING_SIZE 0x0286E8
#define S_0286E8_WAVESIZE(x) (((x) & 0x1FFF) << 12)
+#define R_028B54_VGT_SHADER_STAGES_EN 0x028B54
+#define S_028B54_HS_W32_EN(x) (((x) & 0x1) << 21)
+#define S_028B54_GS_W32_EN(x) (((x) & 0x1) << 22)
+#define S_028B54_VS_W32_EN(x) (((x) & 0x1) << 23)
+#define R_0286D8_SPI_PS_IN_CONTROL 0x0286D8
+#define S_0286D8_PS_W32_EN(x) (((x) & 0x1) << 15)
+#define R_00B800_COMPUTE_DISPATCH_INITIATOR 0x00B800
+#define S_00B800_CS_W32_EN(x) (((x) & 0x1) << 15)
+
#define R_SPILLED_SGPRS 0x4
#define R_SPILLED_VGPRS 0x8
} // End namespace llvm
diff --git a/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index 809f5bab4693..624953963cf4 100644
--- a/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -1,9 +1,8 @@
//===- SIFixSGPRCopies.cpp - Remove potential VGPR => SGPR copies ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -104,7 +103,7 @@ using namespace llvm;
static cl::opt<bool> EnableM0Merge(
"amdgpu-enable-merge-m0",
cl::desc("Merge and hoist M0 initializations"),
- cl::init(false));
+ cl::init(true));
namespace {
@@ -144,14 +143,15 @@ FunctionPass *llvm::createSIFixSGPRCopiesPass() {
return new SIFixSGPRCopies();
}
-static bool hasVGPROperands(const MachineInstr &MI, const SIRegisterInfo *TRI) {
+static bool hasVectorOperands(const MachineInstr &MI,
+ const SIRegisterInfo *TRI) {
const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
if (!MI.getOperand(i).isReg() ||
!TargetRegisterInfo::isVirtualRegister(MI.getOperand(i).getReg()))
continue;
- if (TRI->hasVGPRs(MRI.getRegClass(MI.getOperand(i).getReg())))
+ if (TRI->hasVectorRegisters(MRI.getRegClass(MI.getOperand(i).getReg())))
return true;
}
return false;
@@ -184,14 +184,14 @@ static bool isVGPRToSGPRCopy(const TargetRegisterClass *SrcRC,
const TargetRegisterClass *DstRC,
const SIRegisterInfo &TRI) {
return SrcRC != &AMDGPU::VReg_1RegClass && TRI.isSGPRClass(DstRC) &&
- TRI.hasVGPRs(SrcRC);
+ TRI.hasVectorRegisters(SrcRC);
}
static bool isSGPRToVGPRCopy(const TargetRegisterClass *SrcRC,
const TargetRegisterClass *DstRC,
const SIRegisterInfo &TRI) {
return DstRC != &AMDGPU::VReg_1RegClass && TRI.isSGPRClass(SrcRC) &&
- TRI.hasVGPRs(DstRC);
+ TRI.hasVectorRegisters(DstRC);
}
static bool tryChangeVGPRtoSGPRinCopy(MachineInstr &MI,
@@ -278,6 +278,7 @@ static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI,
// VGPRz = REG_SEQUENCE VGPRx, sub0
MI.getOperand(0).setReg(CopyUse.getOperand(0).getReg());
+ bool IsAGPR = TRI->hasAGPRs(DstRC);
for (unsigned I = 1, N = MI.getNumOperands(); I != N; I += 2) {
unsigned SrcReg = MI.getOperand(I).getReg();
@@ -296,6 +297,17 @@ static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI,
TmpReg)
.add(MI.getOperand(I));
+ if (IsAGPR) {
+ const TargetRegisterClass *NewSrcRC = TRI->getEquivalentAGPRClass(SrcRC);
+ unsigned TmpAReg = MRI.createVirtualRegister(NewSrcRC);
+ unsigned Opc = NewSrcRC == &AMDGPU::AGPR_32RegClass ?
+ AMDGPU::V_ACCVGPR_WRITE_B32 : AMDGPU::COPY;
+ BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(), TII->get(Opc),
+ TmpAReg)
+ .addReg(TmpReg, RegState::Kill);
+ TmpReg = TmpAReg;
+ }
+
MI.getOperand(I).setReg(TmpReg);
}
@@ -440,18 +452,32 @@ static bool isReachable(const MachineInstr *From,
(const MachineBasicBlock *MBB) { return MBB == MBBFrom; });
}
+// Return the first non-prologue instruction in the block.
+static MachineBasicBlock::iterator
+getFirstNonPrologue(MachineBasicBlock *MBB, const TargetInstrInfo *TII) {
+ MachineBasicBlock::iterator I = MBB->getFirstNonPHI();
+ while (I != MBB->end() && TII->isBasicBlockPrologue(*I))
+ ++I;
+
+ return I;
+}
+
// Hoist and merge identical SGPR initializations into a common predecessor.
// This is intended to combine M0 initializations, but can work with any
// SGPR. A VGPR cannot be processed since we cannot guarantee vector
// executioon.
static bool hoistAndMergeSGPRInits(unsigned Reg,
const MachineRegisterInfo &MRI,
- MachineDominatorTree &MDT) {
+ MachineDominatorTree &MDT,
+ const TargetInstrInfo *TII) {
// List of inits by immediate value.
using InitListMap = std::map<unsigned, std::list<MachineInstr *>>;
InitListMap Inits;
// List of clobbering instructions.
SmallVector<MachineInstr*, 8> Clobbers;
+ // List of instructions marked for deletion.
+ SmallSet<MachineInstr*, 8> MergedInstrs;
+
bool Changed = false;
for (auto &MI : MRI.def_instructions(Reg)) {
@@ -480,8 +506,8 @@ static bool hoistAndMergeSGPRInits(unsigned Reg,
MachineInstr *MI2 = *I2;
// Check any possible interference
- auto intereferes = [&](MachineBasicBlock::iterator From,
- MachineBasicBlock::iterator To) -> bool {
+ auto interferes = [&](MachineBasicBlock::iterator From,
+ MachineBasicBlock::iterator To) -> bool {
assert(MDT.dominates(&*To, &*From));
@@ -513,23 +539,23 @@ static bool hoistAndMergeSGPRInits(unsigned Reg,
};
if (MDT.dominates(MI1, MI2)) {
- if (!intereferes(MI2, MI1)) {
+ if (!interferes(MI2, MI1)) {
LLVM_DEBUG(dbgs()
<< "Erasing from "
<< printMBBReference(*MI2->getParent()) << " " << *MI2);
- MI2->eraseFromParent();
- Defs.erase(I2++);
+ MergedInstrs.insert(MI2);
Changed = true;
+ ++I2;
continue;
}
} else if (MDT.dominates(MI2, MI1)) {
- if (!intereferes(MI1, MI2)) {
+ if (!interferes(MI1, MI2)) {
LLVM_DEBUG(dbgs()
<< "Erasing from "
<< printMBBReference(*MI1->getParent()) << " " << *MI1);
- MI1->eraseFromParent();
- Defs.erase(I1++);
+ MergedInstrs.insert(MI1);
Changed = true;
+ ++I1;
break;
}
} else {
@@ -540,8 +566,8 @@ static bool hoistAndMergeSGPRInits(unsigned Reg,
continue;
}
- MachineBasicBlock::iterator I = MBB->getFirstNonPHI();
- if (!intereferes(MI1, I) && !intereferes(MI2, I)) {
+ MachineBasicBlock::iterator I = getFirstNonPrologue(MBB, TII);
+ if (!interferes(MI1, I) && !interferes(MI2, I)) {
LLVM_DEBUG(dbgs()
<< "Erasing from "
<< printMBBReference(*MI1->getParent()) << " " << *MI1
@@ -549,9 +575,9 @@ static bool hoistAndMergeSGPRInits(unsigned Reg,
<< printMBBReference(*MI2->getParent()) << " to "
<< printMBBReference(*I->getParent()) << " " << *MI2);
I->getParent()->splice(I, MI2->getParent(), MI2);
- MI1->eraseFromParent();
- Defs.erase(I1++);
+ MergedInstrs.insert(MI1);
Changed = true;
+ ++I1;
break;
}
}
@@ -561,6 +587,9 @@ static bool hoistAndMergeSGPRInits(unsigned Reg,
}
}
+ for (auto MI : MergedInstrs)
+ MI->removeFromParent();
+
if (Changed)
MRI.clearKillFlags(Reg);
@@ -679,11 +708,12 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
LLVM_DEBUG(dbgs() << "Fixing PHI: " << MI);
TII->moveToVALU(MI, MDT);
}
+
break;
}
case AMDGPU::REG_SEQUENCE:
- if (TRI->hasVGPRs(TII->getOpRegClass(MI, 0)) ||
- !hasVGPROperands(MI, TRI)) {
+ if (TRI->hasVectorRegisters(TII->getOpRegClass(MI, 0)) ||
+ !hasVectorOperands(MI, TRI)) {
foldVGPRCopyIntoRegSequence(MI, TRI, TII, MRI);
continue;
}
@@ -698,7 +728,8 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
Src0RC = MRI.getRegClass(MI.getOperand(1).getReg());
Src1RC = MRI.getRegClass(MI.getOperand(2).getReg());
if (TRI->isSGPRClass(DstRC) &&
- (TRI->hasVGPRs(Src0RC) || TRI->hasVGPRs(Src1RC))) {
+ (TRI->hasVectorRegisters(Src0RC) ||
+ TRI->hasVectorRegisters(Src1RC))) {
LLVM_DEBUG(dbgs() << " Fixing INSERT_SUBREG: " << MI);
TII->moveToVALU(MI, MDT);
}
@@ -709,7 +740,7 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
}
if (MF.getTarget().getOptLevel() > CodeGenOpt::None && EnableM0Merge)
- hoistAndMergeSGPRInits(AMDGPU::M0, MRI, *MDT);
+ hoistAndMergeSGPRInits(AMDGPU::M0, MRI, *MDT, TII);
return true;
}
diff --git a/lib/Target/AMDGPU/SIFixVGPRCopies.cpp b/lib/Target/AMDGPU/SIFixVGPRCopies.cpp
index 15ba78edf919..29484668a01d 100644
--- a/lib/Target/AMDGPU/SIFixVGPRCopies.cpp
+++ b/lib/Target/AMDGPU/SIFixVGPRCopies.cpp
@@ -1,9 +1,8 @@
//===-- SIFixVGPRCopies.cpp - Fix VGPR Copies after regalloc --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AMDGPU/SIFixWWMLiveness.cpp b/lib/Target/AMDGPU/SIFixWWMLiveness.cpp
deleted file mode 100644
index 7761418c5336..000000000000
--- a/lib/Target/AMDGPU/SIFixWWMLiveness.cpp
+++ /dev/null
@@ -1,418 +0,0 @@
-//===-- SIFixWWMLiveness.cpp - Fix WWM live intervals ---------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// Computations in WWM can overwrite values in inactive channels for
-/// variables that the register allocator thinks are dead. This pass adds fake
-/// uses of those variables to their def(s) to make sure that they aren't
-/// overwritten.
-///
-/// As an example, consider this snippet:
-/// %vgpr0 = V_MOV_B32_e32 0.0
-/// if (...) {
-/// %vgpr1 = ...
-/// %vgpr2 = WWM killed %vgpr1
-/// ... = killed %vgpr2
-/// %vgpr0 = V_MOV_B32_e32 1.0
-/// }
-/// ... = %vgpr0
-///
-/// The live intervals of %vgpr0 don't overlap with those of %vgpr1. Normally,
-/// we can safely allocate %vgpr0 and %vgpr1 in the same register, since
-/// writing %vgpr1 would only write to channels that would be clobbered by the
-/// second write to %vgpr0 anyways. But if %vgpr1 is written with WWM enabled,
-/// it would clobber even the inactive channels for which the if-condition is
-/// false, for which %vgpr0 is supposed to be 0. This pass adds an implicit use
-/// of %vgpr0 to its def to make sure they aren't allocated to the
-/// same register.
-///
-/// In general, we need to figure out what registers might have their inactive
-/// channels which are eventually used accidentally clobbered by a WWM
-/// instruction. We do that by spotting three separate cases of registers:
-///
-/// 1. A "then phi": the value resulting from phi elimination of a phi node at
-/// the end of an if..endif. If there is WWM code in the "then", then we
-/// make the def at the end of the "then" branch a partial def by adding an
-/// implicit use of the register.
-///
-/// 2. A "loop exit register": a value written inside a loop but used outside the
-/// loop, where there is WWM code inside the loop (the case in the example
-/// above). We add an implicit_def of the register in the loop pre-header,
-/// and make the original def a partial def by adding an implicit use of the
-/// register.
-///
-/// 3. A "loop exit phi": the value resulting from phi elimination of a phi node
-/// in a loop header. If there is WWM code inside the loop, then we make all
-/// defs inside the loop partial defs by adding an implicit use of the
-/// register on each one.
-///
-/// Note that we do not need to consider an if..else..endif phi. We only need to
-/// consider non-uniform control flow, and control flow structurization would
-/// have transformed a non-uniform if..else..endif into two if..endifs.
-///
-/// The analysis to detect these cases relies on a property of the MIR
-/// arising from this pass running straight after PHIElimination and before any
-/// coalescing: that any virtual register with more than one definition must be
-/// the new register added to lower a phi node by PHIElimination.
-///
-/// FIXME: We should detect whether a register in one of the above categories is
-/// already live at the WWM code before deciding to add the implicit uses to
-/// synthesize its liveness.
-///
-/// FIXME: I believe this whole scheme may be flawed due to the possibility of
-/// the register allocator doing live interval splitting.
-///
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
-#include "SIInstrInfo.h"
-#include "SIRegisterInfo.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/SparseBitVector.h"
-#include "llvm/CodeGen/LiveIntervals.h"
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/TargetRegisterInfo.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "si-fix-wwm-liveness"
-
-namespace {
-
-class SIFixWWMLiveness : public MachineFunctionPass {
-private:
- MachineDominatorTree *DomTree;
- MachineLoopInfo *LoopInfo;
- LiveIntervals *LIS = nullptr;
- const SIInstrInfo *TII;
- const SIRegisterInfo *TRI;
- MachineRegisterInfo *MRI;
-
- std::vector<MachineInstr *> WWMs;
- std::vector<MachineOperand *> ThenDefs;
- std::vector<std::pair<MachineOperand *, MachineLoop *>> LoopExitDefs;
- std::vector<std::pair<MachineOperand *, MachineLoop *>> LoopPhiDefs;
-
-public:
- static char ID;
-
- SIFixWWMLiveness() : MachineFunctionPass(ID) {
- initializeSIFixWWMLivenessPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnMachineFunction(MachineFunction &MF) override;
-
- StringRef getPassName() const override { return "SI Fix WWM Liveness"; }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequiredID(MachineDominatorsID);
- AU.addRequiredID(MachineLoopInfoID);
- // Should preserve the same set that TwoAddressInstructions does.
- AU.addPreserved<SlotIndexes>();
- AU.addPreserved<LiveIntervals>();
- AU.addPreservedID(LiveVariablesID);
- AU.addPreservedID(MachineLoopInfoID);
- AU.addPreservedID(MachineDominatorsID);
- AU.setPreservesCFG();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-
-private:
- void processDef(MachineOperand &DefOpnd);
- bool processThenDef(MachineOperand *DefOpnd);
- bool processLoopExitDef(MachineOperand *DefOpnd, MachineLoop *Loop);
- bool processLoopPhiDef(MachineOperand *DefOpnd, MachineLoop *Loop);
-};
-
-} // End anonymous namespace.
-
-INITIALIZE_PASS_BEGIN(SIFixWWMLiveness, DEBUG_TYPE,
- "SI fix WWM liveness", false, false)
-INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
-INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
-INITIALIZE_PASS_END(SIFixWWMLiveness, DEBUG_TYPE,
- "SI fix WWM liveness", false, false)
-
-char SIFixWWMLiveness::ID = 0;
-
-char &llvm::SIFixWWMLivenessID = SIFixWWMLiveness::ID;
-
-FunctionPass *llvm::createSIFixWWMLivenessPass() {
- return new SIFixWWMLiveness();
-}
-
-bool SIFixWWMLiveness::runOnMachineFunction(MachineFunction &MF) {
- LLVM_DEBUG(dbgs() << "SIFixWWMLiveness: function " << MF.getName() << "\n");
- bool Modified = false;
-
- // This doesn't actually need LiveIntervals, but we can preserve them.
- LIS = getAnalysisIfAvailable<LiveIntervals>();
-
- const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
-
- TII = ST.getInstrInfo();
- TRI = &TII->getRegisterInfo();
- MRI = &MF.getRegInfo();
-
- DomTree = &getAnalysis<MachineDominatorTree>();
- LoopInfo = &getAnalysis<MachineLoopInfo>();
-
- // Scan the function to find the WWM sections and the candidate registers for
- // having liveness modified.
- for (MachineBasicBlock &MBB : MF) {
- for (MachineInstr &MI : MBB) {
- if (MI.getOpcode() == AMDGPU::EXIT_WWM)
- WWMs.push_back(&MI);
- else {
- for (MachineOperand &DefOpnd : MI.defs()) {
- if (DefOpnd.isReg()) {
- unsigned Reg = DefOpnd.getReg();
- if (TRI->isVGPR(*MRI, Reg))
- processDef(DefOpnd);
- }
- }
- }
- }
- }
- if (!WWMs.empty()) {
- // Synthesize liveness over WWM sections as required.
- for (auto ThenDef : ThenDefs)
- Modified |= processThenDef(ThenDef);
- for (auto LoopExitDef : LoopExitDefs)
- Modified |= processLoopExitDef(LoopExitDef.first, LoopExitDef.second);
- for (auto LoopPhiDef : LoopPhiDefs)
- Modified |= processLoopPhiDef(LoopPhiDef.first, LoopPhiDef.second);
- }
-
- WWMs.clear();
- ThenDefs.clear();
- LoopExitDefs.clear();
- LoopPhiDefs.clear();
-
- return Modified;
-}
-
-// During the function scan, process an operand that defines a VGPR.
-// This categorizes the register and puts it in the appropriate list for later
-// use when processing a WWM section.
-void SIFixWWMLiveness::processDef(MachineOperand &DefOpnd) {
- unsigned Reg = DefOpnd.getReg();
- // Get all the defining instructions. For convenience, make Defs[0] the def
- // we are on now.
- SmallVector<const MachineInstr *, 4> Defs;
- Defs.push_back(DefOpnd.getParent());
- for (auto &MI : MRI->def_instructions(Reg)) {
- if (&MI != DefOpnd.getParent())
- Defs.push_back(&MI);
- }
- // Check whether this def dominates all the others. If not, ignore this def.
- // Either it is going to be processed when the scan encounters its other def
- // that dominates all defs, or there is no def that dominates all others.
- // The latter case is an eliminated phi from an if..else..endif or similar,
- // which must be for uniform control flow so can be ignored.
- // Because this pass runs shortly after PHIElimination, we assume that any
- // multi-def register is a lowered phi, and thus has each def in a separate
- // basic block.
- for (unsigned I = 1; I != Defs.size(); ++I) {
- if (!DomTree->dominates(Defs[0]->getParent(), Defs[I]->getParent()))
- return;
- }
- // Check for the case of an if..endif lowered phi: It has two defs, one
- // dominates the other, and there is a single use in a successor of the
- // dominant def.
- // Later we will spot any WWM code inside
- // the "then" clause and turn the second def into a partial def so its
- // liveness goes through the WWM code in the "then" clause.
- if (Defs.size() == 2) {
- auto DomDefBlock = Defs[0]->getParent();
- if (DomDefBlock->succ_size() == 2 && MRI->hasOneUse(Reg)) {
- auto UseBlock = MRI->use_begin(Reg)->getParent()->getParent();
- for (auto Succ : DomDefBlock->successors()) {
- if (Succ == UseBlock) {
- LLVM_DEBUG(dbgs() << printReg(Reg, TRI) << " is a then phi reg\n");
- ThenDefs.push_back(&DefOpnd);
- return;
- }
- }
- }
- }
- // Check for the case of a non-lowered-phi register (single def) that exits
- // a loop, that is, it has a use that is outside a loop that the def is
- // inside. We find the outermost loop that the def is inside but a use is
- // outside. Later we will spot any WWM code inside that loop and then make
- // the def a partial def so its liveness goes round the loop and through the
- // WWM code.
- if (Defs.size() == 1) {
- auto Loop = LoopInfo->getLoopFor(Defs[0]->getParent());
- if (!Loop)
- return;
- bool IsLoopExit = false;
- for (auto &Use : MRI->use_instructions(Reg)) {
- auto UseBlock = Use.getParent();
- if (Loop->contains(UseBlock))
- continue;
- IsLoopExit = true;
- while (auto Parent = Loop->getParentLoop()) {
- if (Parent->contains(UseBlock))
- break;
- Loop = Parent;
- }
- }
- if (!IsLoopExit)
- return;
- LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
- << " is a loop exit reg with loop header at "
- << "bb." << Loop->getHeader()->getNumber() << "\n");
- LoopExitDefs.push_back(std::pair<MachineOperand *, MachineLoop *>(
- &DefOpnd, Loop));
- return;
- }
- // Check for the case of a lowered single-preheader-loop phi, that is, a
- // multi-def register where the dominating def is in the loop pre-header and
- // all other defs are in backedges. Later we will spot any WWM code inside
- // that loop and then make the backedge defs partial defs so the liveness
- // goes through the WWM code.
- // Note that we are ignoring multi-preheader loops on the basis that the
- // structurizer does not allow that for non-uniform loops.
- // There must be a single use in the loop header.
- if (!MRI->hasOneUse(Reg))
- return;
- auto UseBlock = MRI->use_begin(Reg)->getParent()->getParent();
- auto Loop = LoopInfo->getLoopFor(UseBlock);
- if (!Loop || Loop->getHeader() != UseBlock
- || Loop->contains(Defs[0]->getParent())) {
- LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
- << " is multi-def but single use not in loop header\n");
- return;
- }
- for (unsigned I = 1; I != Defs.size(); ++I) {
- if (!Loop->contains(Defs[I]->getParent()))
- return;
- }
- LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
- << " is a loop phi reg with loop header at "
- << "bb." << Loop->getHeader()->getNumber() << "\n");
- LoopPhiDefs.push_back(
- std::pair<MachineOperand *, MachineLoop *>(&DefOpnd, Loop));
-}
-
-// Process a then phi def: It has two defs, one dominates the other, and there
-// is a single use in a successor of the dominant def. Here we spot any WWM
-// code inside the "then" clause and turn the second def into a partial def so
-// its liveness goes through the WWM code in the "then" clause.
-bool SIFixWWMLiveness::processThenDef(MachineOperand *DefOpnd) {
- LLVM_DEBUG(dbgs() << "Processing then def: " << *DefOpnd->getParent());
- if (DefOpnd->getParent()->getOpcode() == TargetOpcode::IMPLICIT_DEF) {
- // Ignore if dominating def is undef.
- LLVM_DEBUG(dbgs() << " ignoring as dominating def is undef\n");
- return false;
- }
- unsigned Reg = DefOpnd->getReg();
- // Get the use block, which is the endif block.
- auto UseBlock = MRI->use_instr_begin(Reg)->getParent();
- // Check whether there is WWM code inside the then branch. The WWM code must
- // be dominated by the if but not dominated by the endif.
- bool ContainsWWM = false;
- for (auto WWM : WWMs) {
- if (DomTree->dominates(DefOpnd->getParent()->getParent(), WWM->getParent())
- && !DomTree->dominates(UseBlock, WWM->getParent())) {
- LLVM_DEBUG(dbgs() << " contains WWM: " << *WWM);
- ContainsWWM = true;
- break;
- }
- }
- if (!ContainsWWM)
- return false;
- // Get the other def.
- MachineInstr *OtherDef = nullptr;
- for (auto &MI : MRI->def_instructions(Reg)) {
- if (&MI != DefOpnd->getParent())
- OtherDef = &MI;
- }
- // Make it a partial def.
- OtherDef->addOperand(MachineOperand::CreateReg(Reg, false, /*isImp=*/true));
- LLVM_DEBUG(dbgs() << *OtherDef);
- return true;
-}
-
-// Process a loop exit def, that is, a register with a single use in a loop
-// that has a use outside the loop. Here we spot any WWM code inside that loop
-// and then make the def a partial def so its liveness goes round the loop and
-// through the WWM code.
-bool SIFixWWMLiveness::processLoopExitDef(MachineOperand *DefOpnd,
- MachineLoop *Loop) {
- LLVM_DEBUG(dbgs() << "Processing loop exit def: " << *DefOpnd->getParent());
- // Check whether there is WWM code inside the loop.
- bool ContainsWWM = false;
- for (auto WWM : WWMs) {
- if (Loop->contains(WWM->getParent())) {
- LLVM_DEBUG(dbgs() << " contains WWM: " << *WWM);
- ContainsWWM = true;
- break;
- }
- }
- if (!ContainsWWM)
- return false;
- unsigned Reg = DefOpnd->getReg();
- // Add a new implicit_def in loop preheader(s).
- for (auto Pred : Loop->getHeader()->predecessors()) {
- if (!Loop->contains(Pred)) {
- auto ImplicitDef = BuildMI(*Pred, Pred->getFirstTerminator(), DebugLoc(),
- TII->get(TargetOpcode::IMPLICIT_DEF), Reg);
- LLVM_DEBUG(dbgs() << *ImplicitDef);
- (void)ImplicitDef;
- }
- }
- // Make the original def partial.
- DefOpnd->getParent()->addOperand(MachineOperand::CreateReg(
- Reg, false, /*isImp=*/true));
- LLVM_DEBUG(dbgs() << *DefOpnd->getParent());
- return true;
-}
-
-// Process a loop phi def, that is, a multi-def register where the dominating
-// def is in the loop pre-header and all other defs are in backedges. Here we
-// spot any WWM code inside that loop and then make the backedge defs partial
-// defs so the liveness goes through the WWM code.
-bool SIFixWWMLiveness::processLoopPhiDef(MachineOperand *DefOpnd,
- MachineLoop *Loop) {
- LLVM_DEBUG(dbgs() << "Processing loop phi def: " << *DefOpnd->getParent());
- // Check whether there is WWM code inside the loop.
- bool ContainsWWM = false;
- for (auto WWM : WWMs) {
- if (Loop->contains(WWM->getParent())) {
- LLVM_DEBUG(dbgs() << " contains WWM: " << *WWM);
- ContainsWWM = true;
- break;
- }
- }
- if (!ContainsWWM)
- return false;
- unsigned Reg = DefOpnd->getReg();
- // Remove kill mark from uses.
- for (auto &Use : MRI->use_operands(Reg))
- Use.setIsKill(false);
- // Make all defs except the dominating one partial defs.
- SmallVector<MachineInstr *, 4> Defs;
- for (auto &Def : MRI->def_instructions(Reg))
- Defs.push_back(&Def);
- for (auto Def : Defs) {
- if (DefOpnd->getParent() == Def)
- continue;
- Def->addOperand(MachineOperand::CreateReg(Reg, false, /*isImp=*/true));
- LLVM_DEBUG(dbgs() << *Def);
- }
- return true;
-}
-
diff --git a/lib/Target/AMDGPU/SIFixupVectorISel.cpp b/lib/Target/AMDGPU/SIFixupVectorISel.cpp
index ee39eb04d831..5b834c8de13a 100644
--- a/lib/Target/AMDGPU/SIFixupVectorISel.cpp
+++ b/lib/Target/AMDGPU/SIFixupVectorISel.cpp
@@ -1,9 +1,8 @@
//===-- SIFixupVectorISel.cpp - Fixup post ISel vector issues -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
/// \file
/// SIFixupVectorISel pass cleans up post ISEL Vector issues.
@@ -198,6 +197,11 @@ static bool fixupGlobalSaddr(MachineBasicBlock &MBB,
// Atomics dont have a GLC, so omit the field if not there.
if (Glc)
NewGlob->addOperand(MF, *Glc);
+
+ MachineOperand *DLC = TII->getNamedOperand(MI, AMDGPU::OpName::dlc);
+ if (DLC)
+ NewGlob->addOperand(MF, *DLC);
+
NewGlob->addOperand(*TII->getNamedOperand(MI, AMDGPU::OpName::slc));
// _D16 have an vdst_in operand, copy it in.
MachineOperand *VDstInOp = TII->getNamedOperand(MI,
diff --git a/lib/Target/AMDGPU/SIFoldOperands.cpp b/lib/Target/AMDGPU/SIFoldOperands.cpp
index f4e866958369..74d77d328019 100644
--- a/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -1,9 +1,8 @@
//===-- SIFoldOperands.cpp - Fold operands --- ----------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
/// \file
//===----------------------------------------------------------------------===//
@@ -51,7 +50,7 @@ struct FoldCandidate {
} else if (FoldOp->isFI()) {
FrameIndexToFold = FoldOp->getIndex();
} else {
- assert(FoldOp->isReg());
+ assert(FoldOp->isReg() || FoldOp->isGlobal());
OpToFold = FoldOp;
}
}
@@ -68,6 +67,8 @@ struct FoldCandidate {
return Kind == MachineOperand::MO_Register;
}
+ bool isGlobal() const { return Kind == MachineOperand::MO_GlobalAddress; }
+
bool isCommuted() const {
return Commuted;
}
@@ -88,10 +89,11 @@ public:
const SIInstrInfo *TII;
const SIRegisterInfo *TRI;
const GCNSubtarget *ST;
+ const SIMachineFunctionInfo *MFI;
void foldOperand(MachineOperand &OpToFold,
MachineInstr *UseMI,
- unsigned UseOpIdx,
+ int UseOpIdx,
SmallVectorImpl<FoldCandidate> &FoldList,
SmallVectorImpl<MachineInstr *> &CopiesToReplace) const;
@@ -160,19 +162,34 @@ static bool isInlineConstantIfFolded(const SIInstrInfo *TII,
}
}
+// TODO: Add heuristic that the frame index might not fit in the addressing mode
+// immediate offset to avoid materializing in loops.
+static bool frameIndexMayFold(const SIInstrInfo *TII,
+ const MachineInstr &UseMI,
+ int OpNo,
+ const MachineOperand &OpToFold) {
+ return OpToFold.isFI() &&
+ (TII->isMUBUF(UseMI) || TII->isFLATScratch(UseMI)) &&
+ OpNo == AMDGPU::getNamedOperandIdx(UseMI.getOpcode(), AMDGPU::OpName::vaddr);
+}
+
FunctionPass *llvm::createSIFoldOperandsPass() {
return new SIFoldOperands();
}
static bool updateOperand(FoldCandidate &Fold,
const SIInstrInfo &TII,
- const TargetRegisterInfo &TRI) {
+ const TargetRegisterInfo &TRI,
+ const GCNSubtarget &ST) {
MachineInstr *MI = Fold.UseMI;
MachineOperand &Old = MI->getOperand(Fold.UseOpNo);
assert(Old.isReg());
if (Fold.isImm()) {
- if (MI->getDesc().TSFlags & SIInstrFlags::IsPacked) {
+ if (MI->getDesc().TSFlags & SIInstrFlags::IsPacked &&
+ !(MI->getDesc().TSFlags & SIInstrFlags::IsMAI) &&
+ AMDGPU::isInlinableLiteralV216(static_cast<uint16_t>(Fold.ImmToFold),
+ ST.hasInv2PiInlineImm())) {
// Set op_sel/op_sel_hi on this operand or bail out if op_sel is
// already set.
unsigned Opcode = MI->getOpcode();
@@ -190,77 +207,94 @@ static bool updateOperand(FoldCandidate &Fold,
unsigned Val = Mod.getImm();
if ((Val & SISrcMods::OP_SEL_0) || !(Val & SISrcMods::OP_SEL_1))
return false;
- // If upper part is all zero we do not need op_sel_hi.
- if (!isUInt<16>(Fold.ImmToFold)) {
- if (!(Fold.ImmToFold & 0xffff)) {
- Mod.setImm(Mod.getImm() | SISrcMods::OP_SEL_0);
+ // Only apply the following transformation if that operand requries
+ // a packed immediate.
+ switch (TII.get(Opcode).OpInfo[OpNo].OperandType) {
+ case AMDGPU::OPERAND_REG_IMM_V2FP16:
+ case AMDGPU::OPERAND_REG_IMM_V2INT16:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
+ // If upper part is all zero we do not need op_sel_hi.
+ if (!isUInt<16>(Fold.ImmToFold)) {
+ if (!(Fold.ImmToFold & 0xffff)) {
+ Mod.setImm(Mod.getImm() | SISrcMods::OP_SEL_0);
+ Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1);
+ Old.ChangeToImmediate((Fold.ImmToFold >> 16) & 0xffff);
+ return true;
+ }
Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1);
- Old.ChangeToImmediate((Fold.ImmToFold >> 16) & 0xffff);
+ Old.ChangeToImmediate(Fold.ImmToFold & 0xffff);
return true;
}
- Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1);
+ break;
+ default:
+ break;
}
}
+ }
- if (Fold.needsShrink()) {
- MachineBasicBlock *MBB = MI->getParent();
- auto Liveness = MBB->computeRegisterLiveness(&TRI, AMDGPU::VCC, MI);
- if (Liveness != MachineBasicBlock::LQR_Dead)
- return false;
-
- MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
- int Op32 = Fold.getShrinkOpcode();
- MachineOperand &Dst0 = MI->getOperand(0);
- MachineOperand &Dst1 = MI->getOperand(1);
- assert(Dst0.isDef() && Dst1.isDef());
-
- bool HaveNonDbgCarryUse = !MRI.use_nodbg_empty(Dst1.getReg());
+ if ((Fold.isImm() || Fold.isFI() || Fold.isGlobal()) && Fold.needsShrink()) {
+ MachineBasicBlock *MBB = MI->getParent();
+ auto Liveness = MBB->computeRegisterLiveness(&TRI, AMDGPU::VCC, MI);
+ if (Liveness != MachineBasicBlock::LQR_Dead)
+ return false;
- const TargetRegisterClass *Dst0RC = MRI.getRegClass(Dst0.getReg());
- unsigned NewReg0 = MRI.createVirtualRegister(Dst0RC);
- const TargetRegisterClass *Dst1RC = MRI.getRegClass(Dst1.getReg());
- unsigned NewReg1 = MRI.createVirtualRegister(Dst1RC);
+ MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+ int Op32 = Fold.getShrinkOpcode();
+ MachineOperand &Dst0 = MI->getOperand(0);
+ MachineOperand &Dst1 = MI->getOperand(1);
+ assert(Dst0.isDef() && Dst1.isDef());
- MachineInstr *Inst32 = TII.buildShrunkInst(*MI, Op32);
+ bool HaveNonDbgCarryUse = !MRI.use_nodbg_empty(Dst1.getReg());
- if (HaveNonDbgCarryUse) {
- BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), Dst1.getReg())
- .addReg(AMDGPU::VCC, RegState::Kill);
- }
+ const TargetRegisterClass *Dst0RC = MRI.getRegClass(Dst0.getReg());
+ unsigned NewReg0 = MRI.createVirtualRegister(Dst0RC);
- // Keep the old instruction around to avoid breaking iterators, but
- // replace the outputs with dummy registers.
- Dst0.setReg(NewReg0);
- Dst1.setReg(NewReg1);
+ MachineInstr *Inst32 = TII.buildShrunkInst(*MI, Op32);
- if (Fold.isCommuted())
- TII.commuteInstruction(*Inst32, false);
- return true;
+ if (HaveNonDbgCarryUse) {
+ BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), Dst1.getReg())
+ .addReg(AMDGPU::VCC, RegState::Kill);
}
- Old.ChangeToImmediate(Fold.ImmToFold);
+ // Keep the old instruction around to avoid breaking iterators, but
+ // replace it with a dummy instruction to remove uses.
+ //
+ // FIXME: We should not invert how this pass looks at operands to avoid
+ // this. Should track set of foldable movs instead of looking for uses
+ // when looking at a use.
+ Dst0.setReg(NewReg0);
+ for (unsigned I = MI->getNumOperands() - 1; I > 0; --I)
+ MI->RemoveOperand(I);
+ MI->setDesc(TII.get(AMDGPU::IMPLICIT_DEF));
+
+ if (Fold.isCommuted())
+ TII.commuteInstruction(*Inst32, false);
return true;
}
assert(!Fold.needsShrink() && "not handled");
- if (Fold.isFI()) {
- Old.ChangeToFrameIndex(Fold.FrameIndexToFold);
+ if (Fold.isImm()) {
+ Old.ChangeToImmediate(Fold.ImmToFold);
return true;
}
- MachineOperand *New = Fold.OpToFold;
- if (TargetRegisterInfo::isVirtualRegister(Old.getReg()) &&
- TargetRegisterInfo::isVirtualRegister(New->getReg())) {
- Old.substVirtReg(New->getReg(), New->getSubReg(), TRI);
-
- Old.setIsUndef(New->isUndef());
+ if (Fold.isGlobal()) {
+ Old.ChangeToGA(Fold.OpToFold->getGlobal(), Fold.OpToFold->getOffset(),
+ Fold.OpToFold->getTargetFlags());
return true;
}
- // FIXME: Handle physical registers.
+ if (Fold.isFI()) {
+ Old.ChangeToFrameIndex(Fold.FrameIndexToFold);
+ return true;
+ }
- return false;
+ MachineOperand *New = Fold.OpToFold;
+ Old.substVirtReg(New->getReg(), New->getSubReg(), TRI);
+ Old.setIsUndef(New->isUndef());
+ return true;
}
static bool isUseMIInFoldList(ArrayRef<FoldCandidate> FoldList,
@@ -277,7 +311,6 @@ static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
MachineOperand *OpToFold,
const SIInstrInfo *TII) {
if (!TII->isOperandLegal(*MI, OpNo, OpToFold)) {
-
// Special case for v_mac_{f16, f32}_e64 if we are trying to fold into src2
unsigned Opc = MI->getOpcode();
if ((Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
@@ -344,7 +377,7 @@ static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
if ((Opc == AMDGPU::V_ADD_I32_e64 ||
Opc == AMDGPU::V_SUB_I32_e64 ||
Opc == AMDGPU::V_SUBREV_I32_e64) && // FIXME
- OpToFold->isImm()) {
+ (OpToFold->isImm() || OpToFold->isFI() || OpToFold->isGlobal())) {
MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
// Verify the other operand is a VGPR, otherwise we would violate the
@@ -357,7 +390,10 @@ static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
assert(MI->getOperand(1).isDef());
- int Op32 = AMDGPU::getVOPe32(Opc);
+ // Make sure to get the 32-bit version of the commuted opcode.
+ unsigned MaybeCommutedOpc = MI->getOpcode();
+ int Op32 = AMDGPU::getVOPe32(MaybeCommutedOpc);
+
FoldList.push_back(FoldCandidate(MI, CommuteOpNo, OpToFold, true,
Op32));
return true;
@@ -384,10 +420,75 @@ static bool isUseSafeToFold(const SIInstrInfo *TII,
//return !MI.hasRegisterImplicitUseOperand(UseMO.getReg());
}
+static bool tryToFoldACImm(const SIInstrInfo *TII,
+ const MachineOperand &OpToFold,
+ MachineInstr *UseMI,
+ unsigned UseOpIdx,
+ SmallVectorImpl<FoldCandidate> &FoldList) {
+ const MCInstrDesc &Desc = UseMI->getDesc();
+ const MCOperandInfo *OpInfo = Desc.OpInfo;
+ if (!OpInfo || UseOpIdx >= Desc.getNumOperands())
+ return false;
+
+ uint8_t OpTy = OpInfo[UseOpIdx].OperandType;
+ if (OpTy < AMDGPU::OPERAND_REG_INLINE_AC_FIRST ||
+ OpTy > AMDGPU::OPERAND_REG_INLINE_AC_LAST)
+ return false;
+
+ if (OpToFold.isImm() && TII->isInlineConstant(OpToFold, OpTy)) {
+ UseMI->getOperand(UseOpIdx).ChangeToImmediate(OpToFold.getImm());
+ return true;
+ }
+
+ if (!OpToFold.isReg())
+ return false;
+
+ unsigned UseReg = OpToFold.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(UseReg))
+ return false;
+
+ if (llvm::find_if(FoldList, [UseMI](const FoldCandidate &FC) {
+ return FC.UseMI == UseMI; }) != FoldList.end())
+ return false;
+
+ MachineRegisterInfo &MRI = UseMI->getParent()->getParent()->getRegInfo();
+ const MachineInstr *Def = MRI.getUniqueVRegDef(UseReg);
+ if (!Def || !Def->isRegSequence())
+ return false;
+
+ int64_t Imm;
+ MachineOperand *Op;
+ for (unsigned I = 1, E = Def->getNumExplicitOperands(); I < E; I += 2) {
+ const MachineOperand &Sub = Def->getOperand(I);
+ if (!Sub.isReg() || Sub.getSubReg())
+ return false;
+ MachineInstr *SubDef = MRI.getUniqueVRegDef(Sub.getReg());
+ while (SubDef && !SubDef->isMoveImmediate() &&
+ !SubDef->getOperand(1).isImm() && TII->isFoldableCopy(*SubDef))
+ SubDef = MRI.getUniqueVRegDef(SubDef->getOperand(1).getReg());
+ if (!SubDef || !SubDef->isMoveImmediate() || !SubDef->getOperand(1).isImm())
+ return false;
+ Op = &SubDef->getOperand(1);
+ auto SubImm = Op->getImm();
+ if (I == 1) {
+ if (!TII->isInlineConstant(SubDef->getOperand(1), OpTy))
+ return false;
+
+ Imm = SubImm;
+ continue;
+ }
+ if (Imm != SubImm)
+ return false; // Can only fold splat constants
+ }
+
+ FoldList.push_back(FoldCandidate(UseMI, UseOpIdx, Op));
+ return true;
+}
+
void SIFoldOperands::foldOperand(
MachineOperand &OpToFold,
MachineInstr *UseMI,
- unsigned UseOpIdx,
+ int UseOpIdx,
SmallVectorImpl<FoldCandidate> &FoldList,
SmallVectorImpl<MachineInstr *> &CopiesToReplace) const {
const MachineOperand &UseOp = UseMI->getOperand(UseOpIdx);
@@ -420,11 +521,18 @@ void SIFoldOperands::foldOperand(
unsigned RegSeqDstReg = UseMI->getOperand(0).getReg();
unsigned RegSeqDstSubReg = UseMI->getOperand(UseOpIdx + 1).getImm();
+ MachineRegisterInfo::use_iterator Next;
for (MachineRegisterInfo::use_iterator
RSUse = MRI->use_begin(RegSeqDstReg), RSE = MRI->use_end();
- RSUse != RSE; ++RSUse) {
+ RSUse != RSE; RSUse = Next) {
+ Next = std::next(RSUse);
MachineInstr *RSUseMI = RSUse->getParent();
+
+ if (tryToFoldACImm(TII, UseMI->getOperand(0), RSUseMI,
+ RSUse.getOperandNo(), FoldList))
+ continue;
+
if (RSUse->getSubReg() != RegSeqDstSubReg)
continue;
@@ -435,10 +543,32 @@ void SIFoldOperands::foldOperand(
return;
}
+ if (tryToFoldACImm(TII, OpToFold, UseMI, UseOpIdx, FoldList))
+ return;
- bool FoldingImm = OpToFold.isImm();
+ if (frameIndexMayFold(TII, *UseMI, UseOpIdx, OpToFold)) {
+ // Sanity check that this is a stack access.
+ // FIXME: Should probably use stack pseudos before frame lowering.
+ MachineOperand *SOff = TII->getNamedOperand(*UseMI, AMDGPU::OpName::soffset);
+ if (!SOff->isReg() || (SOff->getReg() != MFI->getScratchWaveOffsetReg() &&
+ SOff->getReg() != MFI->getStackPtrOffsetReg()))
+ return;
+
+ if (TII->getNamedOperand(*UseMI, AMDGPU::OpName::srsrc)->getReg() !=
+ MFI->getScratchRSrcReg())
+ return;
- if (FoldingImm && UseMI->isCopy()) {
+ // A frame index will resolve to a positive constant, so it should always be
+ // safe to fold the addressing mode, even pre-GFX9.
+ UseMI->getOperand(UseOpIdx).ChangeToFrameIndex(OpToFold.getIndex());
+ SOff->setReg(MFI->getStackPtrOffsetReg());
+ return;
+ }
+
+ bool FoldingImmLike =
+ OpToFold.isImm() || OpToFold.isFI() || OpToFold.isGlobal();
+
+ if (FoldingImmLike && UseMI->isCopy()) {
unsigned DestReg = UseMI->getOperand(0).getReg();
const TargetRegisterClass *DestRC
= TargetRegisterInfo::isVirtualRegister(DestReg) ?
@@ -449,7 +579,7 @@ void SIFoldOperands::foldOperand(
if (TargetRegisterInfo::isVirtualRegister(DestReg) &&
TargetRegisterInfo::isVirtualRegister(SrcReg)) {
const TargetRegisterClass * SrcRC = MRI->getRegClass(SrcReg);
- if (TRI->isSGPRClass(SrcRC) && TRI->hasVGPRs(DestRC)) {
+ if (TRI->isSGPRClass(SrcRC) && TRI->hasVectorRegisters(DestRC)) {
MachineRegisterInfo::use_iterator NextUse;
SmallVector<FoldCandidate, 4> CopyUses;
for (MachineRegisterInfo::use_iterator
@@ -467,6 +597,14 @@ void SIFoldOperands::foldOperand(
}
}
+ if (DestRC == &AMDGPU::AGPR_32RegClass &&
+ TII->isInlineConstant(OpToFold, AMDGPU::OPERAND_REG_INLINE_C_INT32)) {
+ UseMI->setDesc(TII->get(AMDGPU::V_ACCVGPR_WRITE_B32));
+ UseMI->getOperand(1).ChangeToImmediate(OpToFold.getImm());
+ CopiesToReplace.push_back(UseMI);
+ return;
+ }
+
// In order to fold immediates into copies, we need to change the
// copy to a MOV.
@@ -479,18 +617,71 @@ void SIFoldOperands::foldOperand(
} else {
if (UseMI->isCopy() && OpToFold.isReg() &&
TargetRegisterInfo::isVirtualRegister(UseMI->getOperand(0).getReg()) &&
- TargetRegisterInfo::isVirtualRegister(UseMI->getOperand(1).getReg()) &&
- TRI->isVGPR(*MRI, UseMI->getOperand(0).getReg()) &&
- TRI->isVGPR(*MRI, UseMI->getOperand(1).getReg()) &&
+ TRI->isVectorRegister(*MRI, UseMI->getOperand(0).getReg()) &&
+ TRI->isVectorRegister(*MRI, UseMI->getOperand(1).getReg()) &&
!UseMI->getOperand(1).getSubReg()) {
+ unsigned Size = TII->getOpSize(*UseMI, 1);
UseMI->getOperand(1).setReg(OpToFold.getReg());
UseMI->getOperand(1).setSubReg(OpToFold.getSubReg());
UseMI->getOperand(1).setIsKill(false);
CopiesToReplace.push_back(UseMI);
OpToFold.setIsKill(false);
+ if (Size != 4)
+ return;
+ if (TRI->isAGPR(*MRI, UseMI->getOperand(0).getReg()) &&
+ TRI->isVGPR(*MRI, UseMI->getOperand(1).getReg()))
+ UseMI->setDesc(TII->get(AMDGPU::V_ACCVGPR_WRITE_B32));
+ else if (TRI->isVGPR(*MRI, UseMI->getOperand(0).getReg()) &&
+ TRI->isAGPR(*MRI, UseMI->getOperand(1).getReg()))
+ UseMI->setDesc(TII->get(AMDGPU::V_ACCVGPR_READ_B32));
return;
}
+ unsigned UseOpc = UseMI->getOpcode();
+ if (UseOpc == AMDGPU::V_READFIRSTLANE_B32 ||
+ (UseOpc == AMDGPU::V_READLANE_B32 &&
+ (int)UseOpIdx ==
+ AMDGPU::getNamedOperandIdx(UseOpc, AMDGPU::OpName::src0))) {
+ // %vgpr = V_MOV_B32 imm
+ // %sgpr = V_READFIRSTLANE_B32 %vgpr
+ // =>
+ // %sgpr = S_MOV_B32 imm
+ if (FoldingImmLike) {
+ if (execMayBeModifiedBeforeUse(*MRI,
+ UseMI->getOperand(UseOpIdx).getReg(),
+ *OpToFold.getParent(),
+ *UseMI))
+ return;
+
+ UseMI->setDesc(TII->get(AMDGPU::S_MOV_B32));
+
+ // FIXME: ChangeToImmediate should clear subreg
+ UseMI->getOperand(1).setSubReg(0);
+ if (OpToFold.isImm())
+ UseMI->getOperand(1).ChangeToImmediate(OpToFold.getImm());
+ else
+ UseMI->getOperand(1).ChangeToFrameIndex(OpToFold.getIndex());
+ UseMI->RemoveOperand(2); // Remove exec read (or src1 for readlane)
+ return;
+ }
+
+ if (OpToFold.isReg() && TRI->isSGPRReg(*MRI, OpToFold.getReg())) {
+ if (execMayBeModifiedBeforeUse(*MRI,
+ UseMI->getOperand(UseOpIdx).getReg(),
+ *OpToFold.getParent(),
+ *UseMI))
+ return;
+
+ // %vgpr = COPY %sgpr0
+ // %sgpr1 = V_READFIRSTLANE_B32 %vgpr
+ // =>
+ // %sgpr1 = COPY %sgpr0
+ UseMI->setDesc(TII->get(AMDGPU::COPY));
+ UseMI->RemoveOperand(2); // Remove exec read (or src1 for readlane)
+ return;
+ }
+ }
+
const MCInstrDesc &UseDesc = UseMI->getDesc();
// Don't fold into target independent nodes. Target independent opcodes
@@ -501,7 +692,7 @@ void SIFoldOperands::foldOperand(
return;
}
- if (!FoldingImm) {
+ if (!FoldingImmLike) {
tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII);
// FIXME: We could try to change the instruction from 64-bit to 32-bit
@@ -515,14 +706,10 @@ void SIFoldOperands::foldOperand(
const TargetRegisterClass *FoldRC =
TRI->getRegClass(FoldDesc.OpInfo[0].RegClass);
-
// Split 64-bit constants into 32-bits for folding.
if (UseOp.getSubReg() && AMDGPU::getRegBitWidth(FoldRC->getID()) == 64) {
unsigned UseReg = UseOp.getReg();
- const TargetRegisterClass *UseRC
- = TargetRegisterInfo::isVirtualRegister(UseReg) ?
- MRI->getRegClass(UseReg) :
- TRI->getPhysRegClass(UseReg);
+ const TargetRegisterClass *UseRC = MRI->getRegClass(UseReg);
if (AMDGPU::getRegBitWidth(UseRC->getID()) != 64)
return;
@@ -763,14 +950,23 @@ static bool tryFoldInst(const SIInstrInfo *TII,
Opc == AMDGPU::V_CNDMASK_B64_PSEUDO) {
const MachineOperand *Src0 = TII->getNamedOperand(*MI, AMDGPU::OpName::src0);
const MachineOperand *Src1 = TII->getNamedOperand(*MI, AMDGPU::OpName::src1);
- if (Src1->isIdenticalTo(*Src0)) {
+ int Src1ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1_modifiers);
+ int Src0ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
+ if (Src1->isIdenticalTo(*Src0) &&
+ (Src1ModIdx == -1 || !MI->getOperand(Src1ModIdx).getImm()) &&
+ (Src0ModIdx == -1 || !MI->getOperand(Src0ModIdx).getImm())) {
LLVM_DEBUG(dbgs() << "Folded " << *MI << " into ");
+ auto &NewDesc =
+ TII->get(Src0->isReg() ? (unsigned)AMDGPU::COPY : getMovOpc(false));
int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
if (Src2Idx != -1)
MI->RemoveOperand(Src2Idx);
MI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1));
- mutateCopyOp(*MI, TII->get(Src0->isReg() ? (unsigned)AMDGPU::COPY
- : getMovOpc(false)));
+ if (Src1ModIdx != -1)
+ MI->RemoveOperand(Src1ModIdx);
+ if (Src0ModIdx != -1)
+ MI->RemoveOperand(Src0ModIdx);
+ mutateCopyOp(*MI, NewDesc);
LLVM_DEBUG(dbgs() << *MI << '\n');
return true;
}
@@ -788,7 +984,7 @@ void SIFoldOperands::foldInstOperand(MachineInstr &MI,
SmallVector<FoldCandidate, 4> FoldList;
MachineOperand &Dst = MI.getOperand(0);
- bool FoldingImm = OpToFold.isImm() || OpToFold.isFI();
+ bool FoldingImm = OpToFold.isImm() || OpToFold.isFI() || OpToFold.isGlobal();
if (FoldingImm) {
unsigned NumLiteralUses = 0;
MachineOperand *NonInlineUse = nullptr;
@@ -840,6 +1036,9 @@ void SIFoldOperands::foldInstOperand(MachineInstr &MI,
// in some cases. A better heuristic is needed.
if (isInlineConstantIfFolded(TII, *UseMI, OpNo, OpToFold)) {
foldOperand(OpToFold, UseMI, OpNo, FoldList, CopiesToReplace);
+ } else if (frameIndexMayFold(TII, *UseMI, OpNo, OpToFold)) {
+ foldOperand(OpToFold, UseMI, OpNo, FoldList,
+ CopiesToReplace);
} else {
if (++NumLiteralUses == 1) {
NonInlineUse = &*Use;
@@ -874,7 +1073,7 @@ void SIFoldOperands::foldInstOperand(MachineInstr &MI,
Copy->addImplicitDefUseOperands(*MF);
for (FoldCandidate &Fold : FoldList) {
- if (updateOperand(Fold, *TII, *TRI)) {
+ if (updateOperand(Fold, *TII, *TRI, *ST)) {
// Clear kill flags.
if (Fold.isReg()) {
assert(Fold.OpToFold && Fold.OpToFold->isReg());
@@ -926,7 +1125,8 @@ const MachineOperand *SIFoldOperands::isClamp(const MachineInstr &MI) const {
// Having a 0 op_sel_hi would require swizzling the output in the source
// instruction, which we can't do.
- unsigned UnsetMods = (Op == AMDGPU::V_PK_MAX_F16) ? SISrcMods::OP_SEL_1 : 0;
+ unsigned UnsetMods = (Op == AMDGPU::V_PK_MAX_F16) ? SISrcMods::OP_SEL_1
+ : 0u;
if (Src0Mods != UnsetMods && Src1Mods != UnsetMods)
return nullptr;
return Src0;
@@ -1105,13 +1305,13 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
ST = &MF.getSubtarget<GCNSubtarget>();
TII = ST->getInstrInfo();
TRI = &TII->getRegisterInfo();
-
- const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ MFI = MF.getInfo<SIMachineFunctionInfo>();
// omod is ignored by hardware if IEEE bit is enabled. omod also does not
// correctly handle signed zeros.
//
- bool IsIEEEMode = ST->enableIEEEBit(MF);
+ // FIXME: Also need to check strictfp
+ bool IsIEEEMode = MFI->getMode().IEEE;
bool HasNSZ = MFI->hasNoSignedZerosFPMath();
for (MachineBasicBlock *MBB : depth_first(&MF)) {
@@ -1132,7 +1332,8 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
}
MachineOperand &OpToFold = MI.getOperand(1);
- bool FoldingImm = OpToFold.isImm() || OpToFold.isFI();
+ bool FoldingImm =
+ OpToFold.isImm() || OpToFold.isFI() || OpToFold.isGlobal();
// FIXME: We could also be folding things like TargetIndexes.
if (!FoldingImm && !OpToFold.isReg())
diff --git a/lib/Target/AMDGPU/SIFormMemoryClauses.cpp b/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
index aa976d5141f8..f3c9ad63a80a 100644
--- a/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
+++ b/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
@@ -1,9 +1,8 @@
//===-- SIFormMemoryClauses.cpp -------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -119,6 +118,17 @@ static bool isValidClauseInst(const MachineInstr &MI, bool IsVMEMClause) {
return false;
if (!IsVMEMClause && !isSMEMClauseInst(MI))
return false;
+ // If this is a load instruction where the result has been coalesced with an operand, then we cannot clause it.
+ for (const MachineOperand &ResMO : MI.defs()) {
+ unsigned ResReg = ResMO.getReg();
+ for (const MachineOperand &MO : MI.uses()) {
+ if (!MO.isReg() || MO.isDef())
+ continue;
+ if (MO.getReg() == ResReg)
+ return false;
+ }
+ break; // Only check the first def.
+ }
return true;
}
@@ -309,6 +319,8 @@ bool SIFormMemoryClauses::runOnMachineFunction(MachineFunction &MF) {
MaxVGPRs = TRI->getAllocatableSet(MF, &AMDGPU::VGPR_32RegClass).count();
MaxSGPRs = TRI->getAllocatableSet(MF, &AMDGPU::SGPR_32RegClass).count();
+ unsigned FuncMaxClause = AMDGPU::getIntegerAttribute(
+ MF.getFunction(), "amdgpu-max-memory-clause", MaxClause);
for (MachineBasicBlock &MBB : MF) {
MachineBasicBlock::instr_iterator Next;
@@ -329,7 +341,7 @@ bool SIFormMemoryClauses::runOnMachineFunction(MachineFunction &MF) {
continue;
unsigned Length = 1;
- for ( ; Next != E && Length < MaxClause; ++Next) {
+ for ( ; Next != E && Length < FuncMaxClause; ++Next) {
if (!isValidClauseInst(*Next, IsVMEM))
break;
diff --git a/lib/Target/AMDGPU/SIFrameLowering.cpp b/lib/Target/AMDGPU/SIFrameLowering.cpp
index e4633c88e18f..feab6bed2603 100644
--- a/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -1,9 +1,8 @@
//===----------------------- SIFrameLowering.cpp --------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//==-----------------------------------------------------------------------===//
@@ -22,6 +21,8 @@
using namespace llvm;
+#define DEBUG_TYPE "frame-info"
+
static ArrayRef<MCPhysReg> getAllSGPR128(const GCNSubtarget &ST,
const MachineFunction &MF) {
@@ -35,6 +36,150 @@ static ArrayRef<MCPhysReg> getAllSGPRs(const GCNSubtarget &ST,
ST.getMaxNumSGPRs(MF));
}
+// Find a scratch register that we can use at the start of the prologue to
+// re-align the stack pointer. We avoid using callee-save registers since they
+// may appear to be free when this is called from canUseAsPrologue (during
+// shrink wrapping), but then no longer be free when this is called from
+// emitPrologue.
+//
+// FIXME: This is a bit conservative, since in the above case we could use one
+// of the callee-save registers as a scratch temp to re-align the stack pointer,
+// but we would then have to make sure that we were in fact saving at least one
+// callee-save register in the prologue, which is additional complexity that
+// doesn't seem worth the benefit.
+static unsigned findScratchNonCalleeSaveRegister(MachineRegisterInfo &MRI,
+ LivePhysRegs &LiveRegs,
+ const TargetRegisterClass &RC,
+ bool Unused = false) {
+ // Mark callee saved registers as used so we will not choose them.
+ const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
+ for (unsigned i = 0; CSRegs[i]; ++i)
+ LiveRegs.addReg(CSRegs[i]);
+
+ if (Unused) {
+ // We are looking for a register that can be used throughout the entire
+ // function, so any use is unacceptable.
+ for (unsigned Reg : RC) {
+ if (!MRI.isPhysRegUsed(Reg) && LiveRegs.available(MRI, Reg))
+ return Reg;
+ }
+ } else {
+ for (unsigned Reg : RC) {
+ if (LiveRegs.available(MRI, Reg))
+ return Reg;
+ }
+ }
+
+ // If we require an unused register, this is used in contexts where failure is
+ // an option and has an alternative plan. In other contexts, this must
+ // succeed0.
+ if (!Unused)
+ report_fatal_error("failed to find free scratch register");
+
+ return AMDGPU::NoRegister;
+}
+
+static MCPhysReg findUnusedSGPRNonCalleeSaved(MachineRegisterInfo &MRI) {
+ LivePhysRegs LiveRegs;
+ LiveRegs.init(*MRI.getTargetRegisterInfo());
+ return findScratchNonCalleeSaveRegister(
+ MRI, LiveRegs, AMDGPU::SReg_32_XM0_XEXECRegClass, true);
+}
+
+// We need to specially emit stack operations here because a different frame
+// register is used than in the rest of the function, as getFrameRegister would
+// use.
+static void buildPrologSpill(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ const SIInstrInfo *TII, unsigned SpillReg,
+ unsigned ScratchRsrcReg, unsigned SPReg, int FI) {
+ MachineFunction *MF = MBB.getParent();
+ MachineFrameInfo &MFI = MF->getFrameInfo();
+
+ int64_t Offset = MFI.getObjectOffset(FI);
+
+ MachineMemOperand *MMO = MF->getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore, 4,
+ MFI.getObjectAlignment(FI));
+
+ if (isUInt<12>(Offset)) {
+ BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::BUFFER_STORE_DWORD_OFFSET))
+ .addReg(SpillReg, RegState::Kill)
+ .addReg(ScratchRsrcReg)
+ .addReg(SPReg)
+ .addImm(Offset)
+ .addImm(0) // glc
+ .addImm(0) // slc
+ .addImm(0) // tfe
+ .addImm(0) // dlc
+ .addMemOperand(MMO);
+ return;
+ }
+
+ MCPhysReg OffsetReg = findScratchNonCalleeSaveRegister(
+ MF->getRegInfo(), LiveRegs, AMDGPU::VGPR_32RegClass);
+
+ BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::V_MOV_B32_e32), OffsetReg)
+ .addImm(Offset);
+
+ BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::BUFFER_STORE_DWORD_OFFEN))
+ .addReg(SpillReg, RegState::Kill)
+ .addReg(OffsetReg, RegState::Kill)
+ .addReg(ScratchRsrcReg)
+ .addReg(SPReg)
+ .addImm(0)
+ .addImm(0) // glc
+ .addImm(0) // slc
+ .addImm(0) // tfe
+ .addImm(0) // dlc
+ .addMemOperand(MMO);
+}
+
+static void buildEpilogReload(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ const SIInstrInfo *TII, unsigned SpillReg,
+ unsigned ScratchRsrcReg, unsigned SPReg, int FI) {
+ MachineFunction *MF = MBB.getParent();
+ MachineFrameInfo &MFI = MF->getFrameInfo();
+ int64_t Offset = MFI.getObjectOffset(FI);
+
+ MachineMemOperand *MMO = MF->getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad, 4,
+ MFI.getObjectAlignment(FI));
+
+ if (isUInt<12>(Offset)) {
+ BuildMI(MBB, I, DebugLoc(),
+ TII->get(AMDGPU::BUFFER_LOAD_DWORD_OFFSET), SpillReg)
+ .addReg(ScratchRsrcReg)
+ .addReg(SPReg)
+ .addImm(Offset)
+ .addImm(0) // glc
+ .addImm(0) // slc
+ .addImm(0) // tfe
+ .addImm(0) // dlc
+ .addMemOperand(MMO);
+ return;
+ }
+
+ MCPhysReg OffsetReg = findScratchNonCalleeSaveRegister(
+ MF->getRegInfo(), LiveRegs, AMDGPU::VGPR_32RegClass);
+
+ BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::V_MOV_B32_e32), OffsetReg)
+ .addImm(Offset);
+
+ BuildMI(MBB, I, DebugLoc(),
+ TII->get(AMDGPU::BUFFER_LOAD_DWORD_OFFEN), SpillReg)
+ .addReg(OffsetReg, RegState::Kill)
+ .addReg(ScratchRsrcReg)
+ .addReg(SPReg)
+ .addImm(0)
+ .addImm(0) // glc
+ .addImm(0) // slc
+ .addImm(0) // tfe
+ .addImm(0) // dlc
+ .addMemOperand(MMO);
+}
+
void SIFrameLowering::emitFlatScratchInit(const GCNSubtarget &ST,
MachineFunction &MF,
MachineBasicBlock &MBB) const {
@@ -71,6 +216,24 @@ void SIFrameLowering::emitFlatScratchInit(const GCNSubtarget &ST,
// Do a 64-bit pointer add.
if (ST.flatScratchIsPointer()) {
+ if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) {
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)
+ .addReg(FlatScrInitLo)
+ .addReg(ScratchWaveOffsetReg);
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), FlatScrInitHi)
+ .addReg(FlatScrInitHi)
+ .addImm(0);
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)).
+ addReg(FlatScrInitLo).
+ addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_LO |
+ (31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_)));
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)).
+ addReg(FlatScrInitHi).
+ addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_HI |
+ (31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_)));
+ return;
+ }
+
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO)
.addReg(FlatScrInitLo)
.addReg(ScratchWaveOffsetReg);
@@ -81,6 +244,8 @@ void SIFrameLowering::emitFlatScratchInit(const GCNSubtarget &ST,
return;
}
+ assert(ST.getGeneration() < AMDGPUSubtarget::GFX10);
+
// Copy the size in bytes.
BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO)
.addReg(FlatScrInitHi, RegState::Kill);
@@ -145,34 +310,30 @@ unsigned SIFrameLowering::getReservedPrivateSegmentBufferReg(
return ScratchRsrcReg;
}
-// Shift down registers reserved for the scratch wave offset and stack pointer
-// SGPRs.
-std::pair<unsigned, unsigned>
+// Shift down registers reserved for the scratch wave offset.
+std::pair<unsigned, bool>
SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg(
- const GCNSubtarget &ST,
- const SIInstrInfo *TII,
- const SIRegisterInfo *TRI,
- SIMachineFunctionInfo *MFI,
- MachineFunction &MF) const {
+ const GCNSubtarget &ST, const SIInstrInfo *TII, const SIRegisterInfo *TRI,
+ SIMachineFunctionInfo *MFI, MachineFunction &MF) const {
MachineRegisterInfo &MRI = MF.getRegInfo();
unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
+ assert(MFI->isEntryFunction());
+
// No replacement necessary.
if (ScratchWaveOffsetReg == AMDGPU::NoRegister ||
- !MRI.isPhysRegUsed(ScratchWaveOffsetReg)) {
- assert(MFI->getStackPtrOffsetReg() == AMDGPU::SP_REG);
- return std::make_pair(AMDGPU::NoRegister, AMDGPU::NoRegister);
+ (!hasFP(MF) && !MRI.isPhysRegUsed(ScratchWaveOffsetReg))) {
+ return std::make_pair(AMDGPU::NoRegister, false);
}
- unsigned SPReg = MFI->getStackPtrOffsetReg();
if (ST.hasSGPRInitBug())
- return std::make_pair(ScratchWaveOffsetReg, SPReg);
+ return std::make_pair(ScratchWaveOffsetReg, false);
unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
ArrayRef<MCPhysReg> AllSGPRs = getAllSGPRs(ST, MF);
if (NumPreloaded > AllSGPRs.size())
- return std::make_pair(ScratchWaveOffsetReg, SPReg);
+ return std::make_pair(ScratchWaveOffsetReg, false);
AllSGPRs = AllSGPRs.slice(NumPreloaded);
@@ -193,10 +354,11 @@ SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg(
unsigned ReservedRegCount = 13;
if (AllSGPRs.size() < ReservedRegCount)
- return std::make_pair(ScratchWaveOffsetReg, SPReg);
+ return std::make_pair(ScratchWaveOffsetReg, false);
bool HandledScratchWaveOffsetReg =
ScratchWaveOffsetReg != TRI->reservedPrivateSegmentWaveByteOffsetReg(MF);
+ bool FPAdjusted = false;
for (MCPhysReg Reg : AllSGPRs.drop_back(ReservedRegCount)) {
// Pick the first unallocated SGPR. Be careful not to pick an alias of the
@@ -206,24 +368,25 @@ SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg(
HandledScratchWaveOffsetReg = true;
MRI.replaceRegWith(ScratchWaveOffsetReg, Reg);
+ if (MFI->getScratchWaveOffsetReg() == MFI->getStackPtrOffsetReg()) {
+ assert(!hasFP(MF));
+ MFI->setStackPtrOffsetReg(Reg);
+ }
+
MFI->setScratchWaveOffsetReg(Reg);
+ MFI->setFrameOffsetReg(Reg);
ScratchWaveOffsetReg = Reg;
+ FPAdjusted = true;
break;
}
}
}
- return std::make_pair(ScratchWaveOffsetReg, SPReg);
+ return std::make_pair(ScratchWaveOffsetReg, FPAdjusted);
}
void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
- // Emit debugger prologue if "amdgpu-debugger-emit-prologue" attribute was
- // specified.
- const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
- if (ST.debuggerEmitPrologue())
- emitDebuggerPrologue(MF, MBB);
-
assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
@@ -234,6 +397,7 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
// FIXME: We should be cleaning up these unused SGPR spill frame indices
// somewhere.
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIInstrInfo *TII = ST.getInstrInfo();
const SIRegisterInfo *TRI = &TII->getRegisterInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -251,38 +415,13 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
if (MFI->hasFlatScratchInit())
emitFlatScratchInit(ST, MF, MBB);
- unsigned SPReg = MFI->getStackPtrOffsetReg();
- if (SPReg != AMDGPU::SP_REG) {
- assert(MRI.isReserved(SPReg) && "SPReg used but not reserved");
-
- DebugLoc DL;
- const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
- int64_t StackSize = FrameInfo.getStackSize();
-
- if (StackSize == 0) {
- BuildMI(MBB, MBB.begin(), DL, TII->get(AMDGPU::COPY), SPReg)
- .addReg(MFI->getScratchWaveOffsetReg());
- } else {
- BuildMI(MBB, MBB.begin(), DL, TII->get(AMDGPU::S_ADD_U32), SPReg)
- .addReg(MFI->getScratchWaveOffsetReg())
- .addImm(StackSize * ST.getWavefrontSize());
- }
- }
-
unsigned ScratchRsrcReg
= getReservedPrivateSegmentBufferReg(ST, TII, TRI, MFI, MF);
unsigned ScratchWaveOffsetReg;
- std::tie(ScratchWaveOffsetReg, SPReg)
- = getReservedPrivateSegmentWaveByteOffsetReg(ST, TII, TRI, MFI, MF);
-
- // It's possible to have uses of only ScratchWaveOffsetReg without
- // ScratchRsrcReg if it's only used for the initialization of flat_scratch,
- // but the inverse is not true.
- if (ScratchWaveOffsetReg == AMDGPU::NoRegister) {
- assert(ScratchRsrcReg == AMDGPU::NoRegister);
- return;
- }
+ bool FPAdjusted;
+ std::tie(ScratchWaveOffsetReg, FPAdjusted) =
+ getReservedPrivateSegmentWaveByteOffsetReg(ST, TII, TRI, MFI, MF);
// We need to insert initialization of the scratch resource descriptor.
unsigned PreloadedScratchWaveOffsetReg = MFI->getPreloadedReg(
@@ -294,18 +433,19 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER);
}
- bool OffsetRegUsed = MRI.isPhysRegUsed(ScratchWaveOffsetReg);
+ bool OffsetRegUsed = ScratchWaveOffsetReg != AMDGPU::NoRegister &&
+ MRI.isPhysRegUsed(ScratchWaveOffsetReg);
bool ResourceRegUsed = ScratchRsrcReg != AMDGPU::NoRegister &&
MRI.isPhysRegUsed(ScratchRsrcReg);
+ // FIXME: Hack to not crash in situations which emitted an error.
+ if (PreloadedScratchWaveOffsetReg == AMDGPU::NoRegister)
+ return;
+
// We added live-ins during argument lowering, but since they were not used
// they were deleted. We're adding the uses now, so add them back.
- if (OffsetRegUsed) {
- assert(PreloadedScratchWaveOffsetReg != AMDGPU::NoRegister &&
- "scratch wave offset input is required");
- MRI.addLiveIn(PreloadedScratchWaveOffsetReg);
- MBB.addLiveIn(PreloadedScratchWaveOffsetReg);
- }
+ MRI.addLiveIn(PreloadedScratchWaveOffsetReg);
+ MBB.addLiveIn(PreloadedScratchWaveOffsetReg);
if (ResourceRegUsed && PreloadedPrivateBufferReg != AMDGPU::NoRegister) {
assert(ST.isAmdHsaOrMesa(F) || ST.isMesaGfxShader(F));
@@ -318,7 +458,7 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
if (&OtherBB == &MBB)
continue;
- if (OffsetRegUsed)
+ if (OffsetRegUsed || FPAdjusted)
OtherBB.addLiveIn(ScratchWaveOffsetReg);
if (ResourceRegUsed)
@@ -346,11 +486,16 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
.addReg(PreloadedPrivateBufferReg, RegState::Kill);
}
- if (OffsetRegUsed &&
- PreloadedScratchWaveOffsetReg != ScratchWaveOffsetReg) {
+ unsigned SPReg = MFI->getStackPtrOffsetReg();
+ assert(SPReg != AMDGPU::SP_REG);
+
+ // FIXME: Remove the isPhysRegUsed checks
+ const bool HasFP = hasFP(MF);
+
+ if (HasFP || OffsetRegUsed) {
+ assert(ScratchWaveOffsetReg);
BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchWaveOffsetReg)
- .addReg(PreloadedScratchWaveOffsetReg,
- MRI.isPhysRegUsed(ScratchWaveOffsetReg) ? 0 : RegState::Kill);
+ .addReg(PreloadedScratchWaveOffsetReg, HasFP ? RegState::Kill : 0);
}
if (CopyBuffer && !CopyBufferFirst) {
@@ -358,9 +503,26 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
.addReg(PreloadedPrivateBufferReg, RegState::Kill);
}
- if (ResourceRegUsed)
+ if (ResourceRegUsed) {
emitEntryFunctionScratchSetup(ST, MF, MBB, MFI, I,
PreloadedPrivateBufferReg, ScratchRsrcReg);
+ }
+
+ if (HasFP) {
+ DebugLoc DL;
+ const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
+ int64_t StackSize = FrameInfo.getStackSize();
+
+ // On kernel entry, the private scratch wave offset is the SP value.
+ if (StackSize == 0) {
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), SPReg)
+ .addReg(MFI->getScratchWaveOffsetReg());
+ } else {
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), SPReg)
+ .addReg(MFI->getScratchWaveOffsetReg())
+ .addImm(StackSize * ST.getWavefrontSize());
+ }
+ }
}
// Emit scratch setup code for AMDPAL or Mesa, assuming ResourceRegUsed is set.
@@ -405,7 +567,7 @@ void SIFrameLowering::emitEntryFunctionScratchSetup(const GCNSubtarget &ST,
}
}
MF.getRegInfo().addLiveIn(GitPtrLo);
- MF.front().addLiveIn(GitPtrLo);
+ MBB.addLiveIn(GitPtrLo);
BuildMI(MBB, I, DL, SMovB32, RsrcLo)
.addReg(GitPtrLo)
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
@@ -421,12 +583,15 @@ void SIFrameLowering::emitEntryFunctionScratchSetup(const GCNSubtarget &ST,
MachineMemOperand::MOLoad |
MachineMemOperand::MOInvariant |
MachineMemOperand::MODereferenceable,
- 0, 0);
+ 16, 4);
unsigned Offset = Fn.getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0;
+ const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
+ unsigned EncodedOffset = AMDGPU::getSMRDEncodedOffset(Subtarget, Offset);
BuildMI(MBB, I, DL, LoadDwordX4, ScratchRsrcReg)
.addReg(Rsrc01)
- .addImm(Offset) // offset
+ .addImm(EncodedOffset) // offset
.addImm(0) // glc
+ .addImm(0) // dlc
.addReg(ScratchRsrcReg, RegState::ImplicitDefine)
.addMemOperand(MMO);
return;
@@ -462,13 +627,17 @@ void SIFrameLowering::emitEntryFunctionScratchSetup(const GCNSubtarget &ST,
MachineMemOperand::MOLoad |
MachineMemOperand::MOInvariant |
MachineMemOperand::MODereferenceable,
- 0, 0);
+ 8, 4);
BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01)
.addReg(MFI->getImplicitBufferPtrUserSGPR())
.addImm(0) // offset
.addImm(0) // glc
+ .addImm(0) // dlc
.addMemOperand(MMO)
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+
+ MF.getRegInfo().addLiveIn(MFI->getImplicitBufferPtrUserSGPR());
+ MBB.addLiveIn(MFI->getImplicitBufferPtrUserSGPR());
}
} else {
unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
@@ -494,38 +663,14 @@ void SIFrameLowering::emitEntryFunctionScratchSetup(const GCNSubtarget &ST,
}
}
-// Find a scratch register that we can use at the start of the prologue to
-// re-align the stack pointer. We avoid using callee-save registers since they
-// may appear to be free when this is called from canUseAsPrologue (during
-// shrink wrapping), but then no longer be free when this is called from
-// emitPrologue.
-//
-// FIXME: This is a bit conservative, since in the above case we could use one
-// of the callee-save registers as a scratch temp to re-align the stack pointer,
-// but we would then have to make sure that we were in fact saving at least one
-// callee-save register in the prologue, which is additional complexity that
-// doesn't seem worth the benefit.
-static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock &MBB) {
- MachineFunction *MF = MBB.getParent();
-
- const GCNSubtarget &Subtarget = MF->getSubtarget<GCNSubtarget>();
- const SIRegisterInfo &TRI = *Subtarget.getRegisterInfo();
- LivePhysRegs LiveRegs(TRI);
- LiveRegs.addLiveIns(MBB);
-
- // Mark callee saved registers as used so we will not choose them.
- const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(MF);
- for (unsigned i = 0; CSRegs[i]; ++i)
- LiveRegs.addReg(CSRegs[i]);
-
- MachineRegisterInfo &MRI = MF->getRegInfo();
-
- for (unsigned Reg : AMDGPU::SReg_32_XM0RegClass) {
- if (LiveRegs.available(MRI, Reg))
- return Reg;
+bool SIFrameLowering::isSupportedStackID(TargetStackID::Value ID) const {
+ switch (ID) {
+ case TargetStackID::Default:
+ case TargetStackID::NoAlloc:
+ case TargetStackID::SGPRSpill:
+ return true;
}
-
- return AMDGPU::NoRegister;
+ llvm_unreachable("Invalid TargetStackID::Value");
}
void SIFrameLowering::emitPrologue(MachineFunction &MF,
@@ -537,31 +682,105 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
}
const MachineFrameInfo &MFI = MF.getFrameInfo();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIInstrInfo *TII = ST.getInstrInfo();
const SIRegisterInfo &TRI = TII->getRegisterInfo();
unsigned StackPtrReg = FuncInfo->getStackPtrOffsetReg();
unsigned FramePtrReg = FuncInfo->getFrameOffsetReg();
+ LivePhysRegs LiveRegs;
MachineBasicBlock::iterator MBBI = MBB.begin();
DebugLoc DL;
- // XXX - Is this the right predicate?
-
- bool NeedFP = hasFP(MF);
+ bool HasFP = false;
uint32_t NumBytes = MFI.getStackSize();
uint32_t RoundedSize = NumBytes;
- const bool NeedsRealignment = TRI.needsStackRealignment(MF);
+ // To avoid clobbering VGPRs in lanes that weren't active on function entry,
+ // turn on all lanes before doing the spill to memory.
+ unsigned ScratchExecCopy = AMDGPU::NoRegister;
+
+ // Emit the copy if we need an FP, and are using a free SGPR to save it.
+ if (FuncInfo->SGPRForFPSaveRestoreCopy != AMDGPU::NoRegister) {
+ BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FuncInfo->SGPRForFPSaveRestoreCopy)
+ .addReg(FramePtrReg)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+
+ for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg
+ : FuncInfo->getSGPRSpillVGPRs()) {
+ if (!Reg.FI.hasValue())
+ continue;
+
+ if (ScratchExecCopy == AMDGPU::NoRegister) {
+ if (LiveRegs.empty()) {
+ LiveRegs.init(TRI);
+ LiveRegs.addLiveIns(MBB);
+ if (FuncInfo->SGPRForFPSaveRestoreCopy)
+ LiveRegs.removeReg(FuncInfo->SGPRForFPSaveRestoreCopy);
+ }
+
+ ScratchExecCopy
+ = findScratchNonCalleeSaveRegister(MRI, LiveRegs,
+ *TRI.getWaveMaskRegClass());
+ assert(FuncInfo->SGPRForFPSaveRestoreCopy != ScratchExecCopy);
+
+ const unsigned OrSaveExec = ST.isWave32() ?
+ AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64;
+ BuildMI(MBB, MBBI, DL, TII->get(OrSaveExec),
+ ScratchExecCopy)
+ .addImm(-1);
+ }
- if (NeedsRealignment) {
- assert(NeedFP);
+ buildPrologSpill(LiveRegs, MBB, MBBI, TII, Reg.VGPR,
+ FuncInfo->getScratchRSrcReg(),
+ StackPtrReg,
+ Reg.FI.getValue());
+ }
+
+ if (ScratchExecCopy != AMDGPU::NoRegister) {
+ // FIXME: Split block and make terminator.
+ unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
+ unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
+ BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec)
+ .addReg(ScratchExecCopy, RegState::Kill);
+ LiveRegs.addReg(ScratchExecCopy);
+ }
+
+
+ if (FuncInfo->FramePointerSaveIndex) {
+ const int FI = FuncInfo->FramePointerSaveIndex.getValue();
+ assert(!MFI.isDeadObjectIndex(FI) &&
+ MFI.getStackID(FI) == TargetStackID::SGPRSpill);
+ ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill
+ = FuncInfo->getSGPRToVGPRSpills(FI);
+ assert(Spill.size() == 1);
+
+ // Save FP before setting it up.
+ // FIXME: This should respect spillSGPRToVGPR;
+ BuildMI(MBB, MBBI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
+ Spill[0].VGPR)
+ .addReg(FramePtrReg)
+ .addImm(Spill[0].Lane)
+ .addReg(Spill[0].VGPR, RegState::Undef);
+ }
+
+ if (TRI.needsStackRealignment(MF)) {
+ HasFP = true;
const unsigned Alignment = MFI.getMaxAlignment();
RoundedSize += Alignment;
+ if (LiveRegs.empty()) {
+ LiveRegs.init(TRI);
+ LiveRegs.addLiveIns(MBB);
+ LiveRegs.addReg(FuncInfo->SGPRForFPSaveRestoreCopy);
+ }
- unsigned ScratchSPReg = findScratchNonCalleeSaveRegister(MBB);
- assert(ScratchSPReg != AMDGPU::NoRegister);
+ unsigned ScratchSPReg = findScratchNonCalleeSaveRegister(
+ MRI, LiveRegs, AMDGPU::SReg_32_XM0RegClass);
+ assert(ScratchSPReg != AMDGPU::NoRegister &&
+ ScratchSPReg != FuncInfo->SGPRForFPSaveRestoreCopy);
// s_add_u32 tmp_reg, s32, NumBytes
// s_and_b32 s32, tmp_reg, 0b111...0000
@@ -574,7 +793,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
.addImm(-Alignment * ST.getWavefrontSize())
.setMIFlag(MachineInstr::FrameSetup);
FuncInfo->setIsStackRealigned(true);
- } else if (NeedFP) {
+ } else if ((HasFP = hasFP(MF))) {
// If we need a base pointer, set it up here. It's whatever the value of
// the stack pointer is at this point. Any variable size objects will be
// allocated after this, so we can still use the base pointer to reference
@@ -584,21 +803,20 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
.setMIFlag(MachineInstr::FrameSetup);
}
- if (RoundedSize != 0 && hasSP(MF)) {
+ if (HasFP && RoundedSize != 0) {
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), StackPtrReg)
.addReg(StackPtrReg)
.addImm(RoundedSize * ST.getWavefrontSize())
.setMIFlag(MachineInstr::FrameSetup);
}
- for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg
- : FuncInfo->getSGPRSpillVGPRs()) {
- if (!Reg.FI.hasValue())
- continue;
- TII->storeRegToStackSlot(MBB, MBBI, Reg.VGPR, true,
- Reg.FI.getValue(), &AMDGPU::VGPR_32RegClass,
- &TII->getRegisterInfo());
- }
+ assert((!HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy != AMDGPU::NoRegister ||
+ FuncInfo->FramePointerSaveIndex)) &&
+ "Needed to save FP but didn't save it anywhere");
+
+ assert((HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy == AMDGPU::NoRegister &&
+ !FuncInfo->FramePointerSaveIndex)) &&
+ "Saved FP but didn't need it");
}
void SIFrameLowering::emitEpilogue(MachineFunction &MF,
@@ -609,39 +827,87 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIInstrInfo *TII = ST.getInstrInfo();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
+ LivePhysRegs LiveRegs;
+ DebugLoc DL;
+
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ uint32_t NumBytes = MFI.getStackSize();
+ uint32_t RoundedSize = FuncInfo->isStackRealigned() ?
+ NumBytes + MFI.getMaxAlignment() : NumBytes;
+
+ if (RoundedSize != 0 && hasFP(MF)) {
+ const unsigned StackPtrReg = FuncInfo->getStackPtrOffsetReg();
+ BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_SUB_U32), StackPtrReg)
+ .addReg(StackPtrReg)
+ .addImm(RoundedSize * ST.getWavefrontSize())
+ .setMIFlag(MachineInstr::FrameDestroy);
+ }
+
+ if (FuncInfo->SGPRForFPSaveRestoreCopy != AMDGPU::NoRegister) {
+ BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FuncInfo->getFrameOffsetReg())
+ .addReg(FuncInfo->SGPRForFPSaveRestoreCopy)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+
+ if (FuncInfo->FramePointerSaveIndex) {
+ const int FI = FuncInfo->FramePointerSaveIndex.getValue();
+
+ assert(!MF.getFrameInfo().isDeadObjectIndex(FI) &&
+ MF.getFrameInfo().getStackID(FI) == TargetStackID::SGPRSpill);
+
+ ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill
+ = FuncInfo->getSGPRToVGPRSpills(FI);
+ assert(Spill.size() == 1);
+ BuildMI(MBB, MBBI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
+ FuncInfo->getFrameOffsetReg())
+ .addReg(Spill[0].VGPR)
+ .addImm(Spill[0].Lane);
+ }
+ unsigned ScratchExecCopy = AMDGPU::NoRegister;
for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg
: FuncInfo->getSGPRSpillVGPRs()) {
if (!Reg.FI.hasValue())
continue;
- TII->loadRegFromStackSlot(MBB, MBBI, Reg.VGPR,
- Reg.FI.getValue(), &AMDGPU::VGPR_32RegClass,
- &TII->getRegisterInfo());
- }
- unsigned StackPtrReg = FuncInfo->getStackPtrOffsetReg();
- if (StackPtrReg == AMDGPU::NoRegister)
- return;
+ const SIRegisterInfo &TRI = TII->getRegisterInfo();
+ if (ScratchExecCopy == AMDGPU::NoRegister) {
+ // See emitPrologue
+ if (LiveRegs.empty()) {
+ LiveRegs.init(*ST.getRegisterInfo());
+ LiveRegs.addLiveOuts(MBB);
+ LiveRegs.stepBackward(*MBBI);
+ }
- const MachineFrameInfo &MFI = MF.getFrameInfo();
- uint32_t NumBytes = MFI.getStackSize();
+ ScratchExecCopy = findScratchNonCalleeSaveRegister(
+ MRI, LiveRegs, *TRI.getWaveMaskRegClass());
+ LiveRegs.removeReg(ScratchExecCopy);
- DebugLoc DL;
+ const unsigned OrSaveExec =
+ ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64;
- // FIXME: Clarify distinction between no set SP and SP. For callee functions,
- // it's really whether we need SP to be accurate or not.
+ BuildMI(MBB, MBBI, DL, TII->get(OrSaveExec), ScratchExecCopy)
+ .addImm(-1);
+ }
- if (NumBytes != 0 && hasSP(MF)) {
- uint32_t RoundedSize = FuncInfo->isStackRealigned() ?
- NumBytes + MFI.getMaxAlignment() : NumBytes;
+ buildEpilogReload(LiveRegs, MBB, MBBI, TII, Reg.VGPR,
+ FuncInfo->getScratchRSrcReg(),
+ FuncInfo->getStackPtrOffsetReg(), Reg.FI.getValue());
+ }
- BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_SUB_U32), StackPtrReg)
- .addReg(StackPtrReg)
- .addImm(RoundedSize * ST.getWavefrontSize());
+ if (ScratchExecCopy != AMDGPU::NoRegister) {
+ // FIXME: Split block and make terminator.
+ unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
+ unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
+ BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec)
+ .addReg(ScratchExecCopy, RegState::Kill);
}
}
+// Note SGPRSpill stack IDs should only be used for SGPR spilling to VGPRs, not
+// memory. They should have been removed by now.
static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) {
for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
I != E; ++I) {
@@ -652,6 +918,22 @@ static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) {
return true;
}
+#ifndef NDEBUG
+static bool allSGPRSpillsAreDead(const MachineFrameInfo &MFI,
+ Optional<int> FramePointerSaveIndex) {
+ for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
+ I != E; ++I) {
+ if (!MFI.isDeadObjectIndex(I) &&
+ MFI.getStackID(I) == TargetStackID::SGPRSpill &&
+ FramePointerSaveIndex && I != FramePointerSaveIndex) {
+ return false;
+ }
+ }
+
+ return true;
+}
+#endif
+
int SIFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
unsigned &FrameReg) const {
const SIRegisterInfo *RI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
@@ -665,81 +947,145 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(
RegScavenger *RS) const {
MachineFrameInfo &MFI = MF.getFrameInfo();
- if (!MFI.hasStackObjects())
- return;
-
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
- const SIInstrInfo *TII = ST.getInstrInfo();
- const SIRegisterInfo &TRI = TII->getRegisterInfo();
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
- bool AllSGPRSpilledToVGPRs = false;
-
- if (TRI.spillSGPRToVGPR() && FuncInfo->hasSpilledSGPRs()) {
- AllSGPRSpilledToVGPRs = true;
-
- // Process all SGPR spills before frame offsets are finalized. Ideally SGPRs
- // are spilled to VGPRs, in which case we can eliminate the stack usage.
- //
- // XXX - This operates under the assumption that only other SGPR spills are
- // users of the frame index. I'm not 100% sure this is correct. The
- // StackColoring pass has a comment saying a future improvement would be to
- // merging of allocas with spill slots, but for now according to
- // MachineFrameInfo isSpillSlot can't alias any other object.
- for (MachineBasicBlock &MBB : MF) {
- MachineBasicBlock::iterator Next;
- for (auto I = MBB.begin(), E = MBB.end(); I != E; I = Next) {
- MachineInstr &MI = *I;
- Next = std::next(I);
-
- if (TII->isSGPRSpill(MI)) {
- int FI = TII->getNamedOperand(MI, AMDGPU::OpName::addr)->getIndex();
- assert(MFI.getStackID(FI) == SIStackID::SGPR_SPILL);
- if (FuncInfo->allocateSGPRSpillToVGPR(MF, FI)) {
- bool Spilled = TRI.eliminateSGPRToVGPRSpillFrameIndex(MI, FI, RS);
- (void)Spilled;
- assert(Spilled && "failed to spill SGPR to VGPR when allocated");
- } else
- AllSGPRSpilledToVGPRs = false;
- }
- }
- }
- FuncInfo->removeSGPRToVGPRFrameIndices(MFI);
- }
+ FuncInfo->removeDeadFrameIndices(MFI);
+ assert(allSGPRSpillsAreDead(MFI, None) &&
+ "SGPR spill should have been removed in SILowerSGPRSpills");
// FIXME: The other checks should be redundant with allStackObjectsAreDead,
// but currently hasNonSpillStackObjects is set only from source
// allocas. Stack temps produced from legalization are not counted currently.
- if (FuncInfo->hasNonSpillStackObjects() || FuncInfo->hasSpilledVGPRs() ||
- !AllSGPRSpilledToVGPRs || !allStackObjectsAreDead(MFI)) {
+ if (!allStackObjectsAreDead(MFI)) {
assert(RS && "RegScavenger required if spilling");
- // We force this to be at offset 0 so no user object ever has 0 as an
- // address, so we may use 0 as an invalid pointer value. This is because
- // LLVM assumes 0 is an invalid pointer in address space 0. Because alloca
- // is required to be address space 0, we are forced to accept this for
- // now. Ideally we could have the stack in another address space with 0 as a
- // valid pointer, and -1 as the null value.
- //
- // This will also waste additional space when user stack objects require > 4
- // byte alignment.
- //
- // The main cost here is losing the offset for addressing modes. However
- // this also ensures we shouldn't need a register for the offset when
- // emergency scavenging.
- int ScavengeFI = MFI.CreateFixedObject(
- TRI.getSpillSize(AMDGPU::SGPR_32RegClass), 0, false);
- RS->addScavengingFrameIndex(ScavengeFI);
+ if (FuncInfo->isEntryFunction()) {
+ int ScavengeFI = MFI.CreateFixedObject(
+ TRI->getSpillSize(AMDGPU::SGPR_32RegClass), 0, false);
+ RS->addScavengingFrameIndex(ScavengeFI);
+ } else {
+ int ScavengeFI = MFI.CreateStackObject(
+ TRI->getSpillSize(AMDGPU::SGPR_32RegClass),
+ TRI->getSpillAlignment(AMDGPU::SGPR_32RegClass),
+ false);
+ RS->addScavengingFrameIndex(ScavengeFI);
+ }
}
}
-void SIFrameLowering::determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
+// Only report VGPRs to generic code.
+void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
+ BitVector &SavedVGPRs,
RegScavenger *RS) const {
+ TargetFrameLowering::determineCalleeSaves(MF, SavedVGPRs, RS);
+ SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ if (MFI->isEntryFunction())
+ return;
+
+ const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
+
+ // Ignore the SGPRs the default implementation found.
+ SavedVGPRs.clearBitsNotInMask(TRI->getAllVGPRRegMask());
+
+ // hasFP only knows about stack objects that already exist. We're now
+ // determining the stack slots that will be created, so we have to predict
+ // them. Stack objects force FP usage with calls.
+ //
+ // Note a new VGPR CSR may be introduced if one is used for the spill, but we
+ // don't want to report it here.
+ //
+ // FIXME: Is this really hasReservedCallFrame?
+ const bool WillHaveFP =
+ FrameInfo.hasCalls() &&
+ (SavedVGPRs.any() || !allStackObjectsAreDead(FrameInfo));
+
+ // VGPRs used for SGPR spilling need to be specially inserted in the prolog,
+ // so don't allow the default insertion to handle them.
+ for (auto SSpill : MFI->getSGPRSpillVGPRs())
+ SavedVGPRs.reset(SSpill.VGPR);
+
+ const bool HasFP = WillHaveFP || hasFP(MF);
+ if (!HasFP)
+ return;
+
+ if (MFI->haveFreeLanesForSGPRSpill(MF, 1)) {
+ int NewFI = MF.getFrameInfo().CreateStackObject(4, 4, true, nullptr,
+ TargetStackID::SGPRSpill);
+
+ // If there is already a VGPR with free lanes, use it. We may already have
+ // to pay the penalty for spilling a CSR VGPR.
+ if (!MFI->allocateSGPRSpillToVGPR(MF, NewFI))
+ llvm_unreachable("allocate SGPR spill should have worked");
+
+ MFI->FramePointerSaveIndex = NewFI;
+
+ LLVM_DEBUG(
+ auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front();
+ dbgs() << "Spilling FP to " << printReg(Spill.VGPR, TRI)
+ << ':' << Spill.Lane << '\n');
+ return;
+ }
+
+ MFI->SGPRForFPSaveRestoreCopy = findUnusedSGPRNonCalleeSaved(MF.getRegInfo());
+
+ if (!MFI->SGPRForFPSaveRestoreCopy) {
+ // There's no free lane to spill, and no free register to save FP, so we're
+ // forced to spill another VGPR to use for the spill.
+ int NewFI = MF.getFrameInfo().CreateStackObject(4, 4, true, nullptr,
+ TargetStackID::SGPRSpill);
+ if (!MFI->allocateSGPRSpillToVGPR(MF, NewFI))
+ llvm_unreachable("allocate SGPR spill should have worked");
+ MFI->FramePointerSaveIndex = NewFI;
+
+ LLVM_DEBUG(
+ auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front();
+ dbgs() << "FP requires fallback spill to " << printReg(Spill.VGPR, TRI)
+ << ':' << Spill.Lane << '\n';);
+ } else {
+ LLVM_DEBUG(dbgs() << "Saving FP with copy to " <<
+ printReg(MFI->SGPRForFPSaveRestoreCopy, TRI) << '\n');
+ }
+}
+
+void SIFrameLowering::determineCalleeSavesSGPR(MachineFunction &MF,
+ BitVector &SavedRegs,
+ RegScavenger *RS) const {
TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ if (MFI->isEntryFunction())
+ return;
+
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
// The SP is specifically managed and we don't want extra spills of it.
SavedRegs.reset(MFI->getStackPtrOffsetReg());
+ SavedRegs.clearBitsInMask(TRI->getAllVGPRRegMask());
+}
+
+bool SIFrameLowering::assignCalleeSavedSpillSlots(
+ MachineFunction &MF, const TargetRegisterInfo *TRI,
+ std::vector<CalleeSavedInfo> &CSI) const {
+ if (CSI.empty())
+ return true; // Early exit if no callee saved registers are modified!
+
+ const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
+ if (!FuncInfo->SGPRForFPSaveRestoreCopy)
+ return false;
+
+ for (auto &CS : CSI) {
+ if (CS.getReg() == FuncInfo->getFrameOffsetReg()) {
+ if (FuncInfo->SGPRForFPSaveRestoreCopy != AMDGPU::NoRegister)
+ CS.setDstReg(FuncInfo->SGPRForFPSaveRestoreCopy);
+ break;
+ }
+ }
+
+ return false;
}
MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr(
@@ -757,8 +1103,7 @@ MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr(
bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
- const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
- if (!TFI->hasReservedCallFrame(MF)) {
+ if (!hasReservedCallFrame(MF)) {
unsigned Align = getStackAlignment();
Amount = alignTo(Amount, Align);
@@ -777,60 +1122,25 @@ MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr(
return MBB.erase(I);
}
-void SIFrameLowering::emitDebuggerPrologue(MachineFunction &MF,
- MachineBasicBlock &MBB) const {
- const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
- const SIInstrInfo *TII = ST.getInstrInfo();
- const SIRegisterInfo *TRI = &TII->getRegisterInfo();
- const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
-
- MachineBasicBlock::iterator I = MBB.begin();
- DebugLoc DL;
-
- // For each dimension:
- for (unsigned i = 0; i < 3; ++i) {
- // Get work group ID SGPR, and make it live-in again.
- unsigned WorkGroupIDSGPR = MFI->getWorkGroupIDSGPR(i);
- MF.getRegInfo().addLiveIn(WorkGroupIDSGPR);
- MBB.addLiveIn(WorkGroupIDSGPR);
-
- // Since SGPRs are spilled into VGPRs, copy work group ID SGPR to VGPR in
- // order to spill it to scratch.
- unsigned WorkGroupIDVGPR =
- MF.getRegInfo().createVirtualRegister(&AMDGPU::VGPR_32RegClass);
- BuildMI(MBB, I, DL, TII->get(AMDGPU::V_MOV_B32_e32), WorkGroupIDVGPR)
- .addReg(WorkGroupIDSGPR);
-
- // Spill work group ID.
- int WorkGroupIDObjectIdx = MFI->getDebuggerWorkGroupIDStackObjectIndex(i);
- TII->storeRegToStackSlot(MBB, I, WorkGroupIDVGPR, false,
- WorkGroupIDObjectIdx, &AMDGPU::VGPR_32RegClass, TRI);
-
- // Get work item ID VGPR, and make it live-in again.
- unsigned WorkItemIDVGPR = MFI->getWorkItemIDVGPR(i);
- MF.getRegInfo().addLiveIn(WorkItemIDVGPR);
- MBB.addLiveIn(WorkItemIDVGPR);
-
- // Spill work item ID.
- int WorkItemIDObjectIdx = MFI->getDebuggerWorkItemIDStackObjectIndex(i);
- TII->storeRegToStackSlot(MBB, I, WorkItemIDVGPR, false,
- WorkItemIDObjectIdx, &AMDGPU::VGPR_32RegClass, TRI);
- }
-}
-
bool SIFrameLowering::hasFP(const MachineFunction &MF) const {
- // All stack operations are relative to the frame offset SGPR.
- // TODO: Still want to eliminate sometimes.
const MachineFrameInfo &MFI = MF.getFrameInfo();
+ if (MFI.hasCalls()) {
+ // All offsets are unsigned, so need to be addressed in the same direction
+ // as stack growth.
+
+ // FIXME: This function is pretty broken, since it can be called before the
+ // frame layout is determined or CSR spills are inserted.
+ if (MFI.getStackSize() != 0)
+ return true;
+
+ // For the entry point, the input wave scratch offset must be copied to the
+ // API SP if there are calls.
+ if (MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction())
+ return true;
+ }
- // XXX - Is this only called after frame is finalized? Should be able to check
- // frame size.
- return MFI.hasStackObjects() && !allStackObjectsAreDead(MFI);
-}
-
-bool SIFrameLowering::hasSP(const MachineFunction &MF) const {
- const SIRegisterInfo *TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
- // All stack operations are relative to the frame offset SGPR.
- const MachineFrameInfo &MFI = MF.getFrameInfo();
- return MFI.hasCalls() || MFI.hasVarSizedObjects() || TRI->needsStackRealignment(MF);
+ return MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() ||
+ MFI.hasStackMap() || MFI.hasPatchPoint() ||
+ MF.getSubtarget<GCNSubtarget>().getRegisterInfo()->needsStackRealignment(MF) ||
+ MF.getTarget().Options.DisableFramePointerElim(MF);
}
diff --git a/lib/Target/AMDGPU/SIFrameLowering.h b/lib/Target/AMDGPU/SIFrameLowering.h
index 2f35b3631cdc..c644f4726e2c 100644
--- a/lib/Target/AMDGPU/SIFrameLowering.h
+++ b/lib/Target/AMDGPU/SIFrameLowering.h
@@ -1,9 +1,8 @@
//===--------------------- SIFrameLowering.h --------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -37,6 +36,14 @@ public:
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
RegScavenger *RS = nullptr) const override;
+ void determineCalleeSavesSGPR(MachineFunction &MF, BitVector &SavedRegs,
+ RegScavenger *RS = nullptr) const;
+ bool
+ assignCalleeSavedSpillSlots(MachineFunction &MF,
+ const TargetRegisterInfo *TRI,
+ std::vector<CalleeSavedInfo> &CSI) const override;
+
+ bool isSupportedStackID(TargetStackID::Value ID) const override;
void processFunctionBeforeFrameFinalized(
MachineFunction &MF,
@@ -59,15 +66,9 @@ private:
SIMachineFunctionInfo *MFI,
MachineFunction &MF) const;
- std::pair<unsigned, unsigned> getReservedPrivateSegmentWaveByteOffsetReg(
- const GCNSubtarget &ST,
- const SIInstrInfo *TII,
- const SIRegisterInfo *TRI,
- SIMachineFunctionInfo *MFI,
- MachineFunction &MF) const;
-
- /// Emits debugger prologue.
- void emitDebuggerPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+ std::pair<unsigned, bool> getReservedPrivateSegmentWaveByteOffsetReg(
+ const GCNSubtarget &ST, const SIInstrInfo *TII, const SIRegisterInfo *TRI,
+ SIMachineFunctionInfo *MFI, MachineFunction &MF) const;
// Emit scratch setup code for AMDPAL or Mesa, assuming ResourceRegUsed is set.
void emitEntryFunctionScratchSetup(const GCNSubtarget &ST, MachineFunction &MF,
@@ -77,7 +78,6 @@ private:
public:
bool hasFP(const MachineFunction &MF) const override;
- bool hasSP(const MachineFunction &MF) const;
};
} // end namespace llvm
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp
index 0ba921647097..db0782e2bf3e 100644
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1,9 +1,8 @@
//===-- SIISelLowering.cpp - SI DAG Lowering Implementation ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -19,7 +18,6 @@
#include "SIISelLowering.h"
#include "AMDGPU.h"
-#include "AMDGPUIntrinsicInfo.h"
#include "AMDGPUSubtarget.h"
#include "AMDGPUTargetMachine.h"
#include "SIDefines.h"
@@ -95,11 +93,10 @@ static cl::opt<bool> EnableVGPRIndexMode(
cl::desc("Use GPR indexing mode instead of movrel for vector indexing"),
cl::init(false));
-static cl::opt<unsigned> AssumeFrameIndexHighZeroBits(
- "amdgpu-frame-index-zero-bits",
- cl::desc("High bits of frame index assumed to be zero"),
- cl::init(5),
- cl::ReallyHidden);
+static cl::opt<bool> DisableLoopAlignment(
+ "amdgpu-disable-loop-alignment",
+ cl::desc("Do not align and prefetch loops"),
+ cl::init(false));
static unsigned findFirstFreeSGPR(CCState &CCInfo) {
unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
@@ -125,12 +122,18 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
addRegisterClass(MVT::v2i32, &AMDGPU::SReg_64RegClass);
addRegisterClass(MVT::v2f32, &AMDGPU::VReg_64RegClass);
+ addRegisterClass(MVT::v3i32, &AMDGPU::SGPR_96RegClass);
+ addRegisterClass(MVT::v3f32, &AMDGPU::VReg_96RegClass);
+
addRegisterClass(MVT::v2i64, &AMDGPU::SReg_128RegClass);
addRegisterClass(MVT::v2f64, &AMDGPU::SReg_128RegClass);
addRegisterClass(MVT::v4i32, &AMDGPU::SReg_128RegClass);
addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass);
+ addRegisterClass(MVT::v5i32, &AMDGPU::SGPR_160RegClass);
+ addRegisterClass(MVT::v5f32, &AMDGPU::VReg_160RegClass);
+
addRegisterClass(MVT::v8i32, &AMDGPU::SReg_256RegClass);
addRegisterClass(MVT::v8f32, &AMDGPU::VReg_256RegClass);
@@ -148,18 +151,27 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
addRegisterClass(MVT::v4f16, &AMDGPU::SReg_64RegClass);
}
+ if (Subtarget->hasMAIInsts()) {
+ addRegisterClass(MVT::v32i32, &AMDGPU::VReg_1024RegClass);
+ addRegisterClass(MVT::v32f32, &AMDGPU::VReg_1024RegClass);
+ }
+
computeRegisterProperties(Subtarget->getRegisterInfo());
// We need to custom lower vector stores from local memory
setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
+ setOperationAction(ISD::LOAD, MVT::v3i32, Custom);
setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
+ setOperationAction(ISD::LOAD, MVT::v5i32, Custom);
setOperationAction(ISD::LOAD, MVT::v8i32, Custom);
setOperationAction(ISD::LOAD, MVT::v16i32, Custom);
setOperationAction(ISD::LOAD, MVT::i1, Custom);
setOperationAction(ISD::LOAD, MVT::v32i32, Custom);
setOperationAction(ISD::STORE, MVT::v2i32, Custom);
+ setOperationAction(ISD::STORE, MVT::v3i32, Custom);
setOperationAction(ISD::STORE, MVT::v4i32, Custom);
+ setOperationAction(ISD::STORE, MVT::v5i32, Custom);
setOperationAction(ISD::STORE, MVT::v8i32, Custom);
setOperationAction(ISD::STORE, MVT::v16i32, Custom);
setOperationAction(ISD::STORE, MVT::i1, Custom);
@@ -218,11 +230,15 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v4f16, Custom);
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v8f16, Custom);
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
+ setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i16, Custom);
+ setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom);
setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_VOID, MVT::v2i16, Custom);
setOperationAction(ISD::INTRINSIC_VOID, MVT::v2f16, Custom);
setOperationAction(ISD::INTRINSIC_VOID, MVT::v4f16, Custom);
+ setOperationAction(ISD::INTRINSIC_VOID, MVT::i16, Custom);
+ setOperationAction(ISD::INTRINSIC_VOID, MVT::i8, Custom);
setOperationAction(ISD::BRCOND, MVT::Other, Custom);
setOperationAction(ISD::BR_CC, MVT::i1, Expand);
@@ -248,8 +264,9 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
// We only support LOAD/STORE and vector manipulation ops for vectors
// with > 4 elements.
- for (MVT VT : {MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32,
- MVT::v2i64, MVT::v2f64, MVT::v4i16, MVT::v4f16, MVT::v32i32 }) {
+ for (MVT VT : { MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32,
+ MVT::v2i64, MVT::v2f64, MVT::v4i16, MVT::v4f16,
+ MVT::v32i32, MVT::v32f32 }) {
for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) {
switch (Op) {
case ISD::LOAD:
@@ -323,6 +340,18 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i16, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f16, Custom);
+ // Deal with vec3 vector operations when widened to vec4.
+ setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v3i32, Custom);
+ setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v3f32, Custom);
+ setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v4i32, Custom);
+ setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v4f32, Custom);
+
+ // Deal with vec5 vector operations when widened to vec8.
+ setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v5i32, Custom);
+ setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v5f32, Custom);
+ setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8i32, Custom);
+ setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8f32, Custom);
+
// BUFFER/FLAT_ATOMIC_CMP_SWAP on GCN GPUs needs input marshalling,
// and output demarshalling
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);
@@ -400,7 +429,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
- if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS) {
+ if (Subtarget->haveRoundOpsF64()) {
setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
setOperationAction(ISD::FCEIL, MVT::f64, Legal);
setOperationAction(ISD::FRINT, MVT::f64, Legal);
@@ -492,7 +521,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
// F16 - VOP3 Actions.
setOperationAction(ISD::FMA, MVT::f16, Legal);
- if (!Subtarget->hasFP16Denormals())
+ if (!Subtarget->hasFP16Denormals() && STI.hasMadF16())
setOperationAction(ISD::FMAD, MVT::f16, Legal);
for (MVT VT : {MVT::v2i16, MVT::v2f16, MVT::v4i16, MVT::v4f16}) {
@@ -607,6 +636,9 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i16, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f16, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f16, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom);
+
setOperationAction(ISD::SHL, MVT::v4i16, Custom);
setOperationAction(ISD::SRA, MVT::v4i16, Custom);
setOperationAction(ISD::SRL, MVT::v4i16, Custom);
@@ -679,6 +711,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::FCANONICALIZE);
setTargetDAGCombine(ISD::SCALAR_TO_VECTOR);
setTargetDAGCombine(ISD::ZERO_EXTEND);
+ setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
@@ -701,13 +734,9 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::ATOMIC_LOAD_MAX);
setTargetDAGCombine(ISD::ATOMIC_LOAD_UMIN);
setTargetDAGCombine(ISD::ATOMIC_LOAD_UMAX);
+ setTargetDAGCombine(ISD::ATOMIC_LOAD_FADD);
setSchedulingPreference(Sched::RegPressure);
-
- // SI at least has hardware support for floating point exceptions, but no way
- // of using or handling them is implemented. They are also optional in OpenCL
- // (Section 7.3)
- setHasFloatingPointExceptions(Subtarget->hasFPExceptions());
}
const GCNSubtarget *SITargetLowering::getSubtarget() const {
@@ -910,6 +939,8 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
switch (IntrID) {
case Intrinsic::amdgcn_atomic_inc:
case Intrinsic::amdgcn_atomic_dec:
+ case Intrinsic::amdgcn_ds_ordered_add:
+ case Intrinsic::amdgcn_ds_ordered_swap:
case Intrinsic::amdgcn_ds_fadd:
case Intrinsic::amdgcn_ds_fmin:
case Intrinsic::amdgcn_ds_fmax: {
@@ -919,13 +950,75 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.align = 0;
Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+ const ConstantInt *Vol = cast<ConstantInt>(CI.getOperand(4));
+ if (!Vol->isZero())
+ Info.flags |= MachineMemOperand::MOVolatile;
+
+ return true;
+ }
+ case Intrinsic::amdgcn_buffer_atomic_fadd: {
+ SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+
+ Info.opc = ISD::INTRINSIC_VOID;
+ Info.memVT = MVT::getVT(CI.getOperand(0)->getType());
+ Info.ptrVal = MFI->getBufferPSV(
+ *MF.getSubtarget<GCNSubtarget>().getInstrInfo(),
+ CI.getArgOperand(1));
+ Info.align = 0;
+ Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+
const ConstantInt *Vol = dyn_cast<ConstantInt>(CI.getOperand(4));
if (!Vol || !Vol->isZero())
Info.flags |= MachineMemOperand::MOVolatile;
return true;
}
+ case Intrinsic::amdgcn_global_atomic_fadd: {
+ Info.opc = ISD::INTRINSIC_VOID;
+ Info.memVT = MVT::getVT(CI.getOperand(0)->getType()
+ ->getPointerElementType());
+ Info.ptrVal = CI.getOperand(0);
+ Info.align = 0;
+ Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+
+ return true;
+ }
+ case Intrinsic::amdgcn_ds_append:
+ case Intrinsic::amdgcn_ds_consume: {
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = MVT::getVT(CI.getType());
+ Info.ptrVal = CI.getOperand(0);
+ Info.align = 0;
+ Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+
+ const ConstantInt *Vol = cast<ConstantInt>(CI.getOperand(1));
+ if (!Vol->isZero())
+ Info.flags |= MachineMemOperand::MOVolatile;
+
+ return true;
+ }
+ case Intrinsic::amdgcn_ds_gws_init:
+ case Intrinsic::amdgcn_ds_gws_barrier:
+ case Intrinsic::amdgcn_ds_gws_sema_v:
+ case Intrinsic::amdgcn_ds_gws_sema_br:
+ case Intrinsic::amdgcn_ds_gws_sema_p:
+ case Intrinsic::amdgcn_ds_gws_sema_release_all: {
+ Info.opc = ISD::INTRINSIC_VOID;
+
+ SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ Info.ptrVal =
+ MFI->getGWSPSV(*MF.getSubtarget<GCNSubtarget>().getInstrInfo());
+ // This is an abstract access, but we need to specify a type and size.
+ Info.memVT = MVT::i32;
+ Info.size = 4;
+ Info.align = 4;
+
+ Info.flags = MachineMemOperand::MOStore;
+ if (IntrID == Intrinsic::amdgcn_ds_gws_barrier)
+ Info.flags = MachineMemOperand::MOLoad;
+ return true;
+ }
default:
return false;
}
@@ -937,6 +1030,8 @@ bool SITargetLowering::getAddrModeArguments(IntrinsicInst *II,
switch (II->getIntrinsicID()) {
case Intrinsic::amdgcn_atomic_inc:
case Intrinsic::amdgcn_atomic_dec:
+ case Intrinsic::amdgcn_ds_ordered_add:
+ case Intrinsic::amdgcn_ds_ordered_swap:
case Intrinsic::amdgcn_ds_fadd:
case Intrinsic::amdgcn_ds_fmin:
case Intrinsic::amdgcn_ds_fmax: {
@@ -960,6 +1055,13 @@ bool SITargetLowering::isLegalFlatAddressingMode(const AddrMode &AM) const {
// GFX9 added a 13-bit signed offset. When using regular flat instructions,
// the sign bit is ignored and is treated as a 12-bit unsigned offset.
+ // GFX10 shrinked signed offset to 12 bits. When using regular flat
+ // instructions, the sign bit is also ignored and is treated as 11-bit
+ // unsigned offset.
+
+ if (Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10)
+ return isUInt<11>(AM.BaseOffs) && AM.Scale == 0;
+
// Just r + i
return isUInt<12>(AM.BaseOffs) && AM.Scale == 0;
}
@@ -1030,7 +1132,8 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
return isLegalGlobalAddressingMode(AM);
if (AS == AMDGPUAS::CONSTANT_ADDRESS ||
- AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT) {
+ AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT ||
+ AS == AMDGPUAS::BUFFER_FAT_POINTER) {
// If the offset isn't a multiple of 4, it probably isn't going to be
// correctly aligned.
// FIXME: Can we get the real alignment here?
@@ -1106,16 +1209,15 @@ bool SITargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT,
} else if (AS == AMDGPUAS::PRIVATE_ADDRESS) {
unsigned MaxPrivateBits = 8 * getSubtarget()->getMaxPrivateElementSize();
return (MemVT.getSizeInBits() <= MaxPrivateBits);
- } else if (AS == AMDGPUAS::LOCAL_ADDRESS) {
+ } else if (AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS) {
return (MemVT.getSizeInBits() <= 2 * 32);
}
return true;
}
-bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
- unsigned AddrSpace,
- unsigned Align,
- bool *IsFast) const {
+bool SITargetLowering::allowsMisalignedMemoryAccesses(
+ EVT VT, unsigned AddrSpace, unsigned Align, MachineMemOperand::Flags Flags,
+ bool *IsFast) const {
if (IsFast)
*IsFast = false;
@@ -1178,11 +1280,10 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
return VT.bitsGT(MVT::i32) && Align % 4 == 0;
}
-EVT SITargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
- unsigned SrcAlign, bool IsMemset,
- bool ZeroMemset,
- bool MemcpyStrSrc,
- MachineFunction &MF) const {
+EVT SITargetLowering::getOptimalMemOpType(
+ uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset,
+ bool ZeroMemset, bool MemcpyStrSrc,
+ const AttributeList &FuncAttributes) const {
// FIXME: Should account for address space here.
// The default fallback uses the private pointer size as a guess for a type to
@@ -1201,7 +1302,8 @@ EVT SITargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
static bool isFlatGlobalAddrSpace(unsigned AS) {
return AS == AMDGPUAS::GLOBAL_ADDRESS ||
AS == AMDGPUAS::FLAT_ADDRESS ||
- AS == AMDGPUAS::CONSTANT_ADDRESS;
+ AS == AMDGPUAS::CONSTANT_ADDRESS ||
+ AS > AMDGPUAS::MAX_AMDGPU_ADDRESS;
}
bool SITargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
@@ -1216,8 +1318,8 @@ bool SITargetLowering::isMemOpHasNoClobberedMemOperand(const SDNode *N) const {
return I && I->getMetadata("amdgpu.noclobber");
}
-bool SITargetLowering::isCheapAddrSpaceCast(unsigned SrcAS,
- unsigned DestAS) const {
+bool SITargetLowering::isFreeAddrSpaceCast(unsigned SrcAS,
+ unsigned DestAS) const {
// Flat -> private/local is a simple truncate.
// Flat -> global is no-op
if (SrcAS == AMDGPUAS::FLAT_ADDRESS)
@@ -1305,6 +1407,17 @@ SDValue SITargetLowering::convertArgType(SelectionDAG &DAG, EVT VT, EVT MemVT,
const SDLoc &SL, SDValue Val,
bool Signed,
const ISD::InputArg *Arg) const {
+ // First, if it is a widened vector, narrow it.
+ if (VT.isVector() &&
+ VT.getVectorNumElements() != MemVT.getVectorNumElements()) {
+ EVT NarrowedVT =
+ EVT::getVectorVT(*DAG.getContext(), MemVT.getVectorElementType(),
+ VT.getVectorNumElements());
+ Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SL, NarrowedVT, Val,
+ DAG.getConstant(0, SL, MVT::i32));
+ }
+
+ // Then convert the vector elements or scalar value.
if (Arg && (Arg->Flags.isSExt() || Arg->Flags.isZExt()) &&
VT.bitsLT(MemVT)) {
unsigned Opc = Arg->Flags.isZExt() ? ISD::AssertZext : ISD::AssertSext;
@@ -1441,8 +1554,7 @@ static void processShaderInputArgs(SmallVectorImpl<ISD::InputArg> &Splits,
// First check if it's a PS input addr.
if (CallConv == CallingConv::AMDGPU_PS &&
- !Arg->Flags.isInReg() && !Arg->Flags.isByVal() && PSInputNum <= 15) {
-
+ !Arg->Flags.isInReg() && PSInputNum <= 15) {
bool SkipArg = !Arg->Used && !Info->isPSInputAllocated(PSInputNum);
// Inconveniently only the first part of the split is marked as isSplit,
@@ -1508,7 +1620,13 @@ static void allocateSpecialEntryInputVGPRs(CCState &CCInfo,
// Try to allocate a VGPR at the end of the argument list, or if no argument
// VGPRs are left allocating a stack slot.
-static ArgDescriptor allocateVGPR32Input(CCState &CCInfo) {
+// If \p Mask is is given it indicates bitfield position in the register.
+// If \p Arg is given use it with new ]p Mask instead of allocating new.
+static ArgDescriptor allocateVGPR32Input(CCState &CCInfo, unsigned Mask = ~0u,
+ ArgDescriptor Arg = ArgDescriptor()) {
+ if (Arg.isSet())
+ return ArgDescriptor::createArg(Arg, Mask);
+
ArrayRef<MCPhysReg> ArgVGPRs
= makeArrayRef(AMDGPU::VGPR_32RegClass.begin(), 32);
unsigned RegIdx = CCInfo.getFirstUnallocated(ArgVGPRs);
@@ -1516,7 +1634,7 @@ static ArgDescriptor allocateVGPR32Input(CCState &CCInfo) {
// Spill to stack required.
int64_t Offset = CCInfo.AllocateStack(4, 4);
- return ArgDescriptor::createStack(Offset);
+ return ArgDescriptor::createStack(Offset, Mask);
}
unsigned Reg = ArgVGPRs[RegIdx];
@@ -1525,7 +1643,7 @@ static ArgDescriptor allocateVGPR32Input(CCState &CCInfo) {
MachineFunction &MF = CCInfo.getMachineFunction();
MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass);
- return ArgDescriptor::createRegister(Reg);
+ return ArgDescriptor::createRegister(Reg, Mask);
}
static ArgDescriptor allocateSGPR32InputImpl(CCState &CCInfo,
@@ -1557,14 +1675,21 @@ static void allocateSpecialInputVGPRs(CCState &CCInfo,
MachineFunction &MF,
const SIRegisterInfo &TRI,
SIMachineFunctionInfo &Info) {
- if (Info.hasWorkItemIDX())
- Info.setWorkItemIDX(allocateVGPR32Input(CCInfo));
+ const unsigned Mask = 0x3ff;
+ ArgDescriptor Arg;
+
+ if (Info.hasWorkItemIDX()) {
+ Arg = allocateVGPR32Input(CCInfo, Mask);
+ Info.setWorkItemIDX(Arg);
+ }
- if (Info.hasWorkItemIDY())
- Info.setWorkItemIDY(allocateVGPR32Input(CCInfo));
+ if (Info.hasWorkItemIDY()) {
+ Arg = allocateVGPR32Input(CCInfo, Mask << 10, Arg);
+ Info.setWorkItemIDY(Arg);
+ }
if (Info.hasWorkItemIDZ())
- Info.setWorkItemIDZ(allocateVGPR32Input(CCInfo));
+ Info.setWorkItemIDZ(allocateVGPR32Input(CCInfo, Mask << 20, Arg));
}
static void allocateSpecialInputSGPRs(CCState &CCInfo,
@@ -1714,6 +1839,7 @@ static void reservePrivateMemoryRegs(const TargetMachine &TM,
// should reserve the arguments and use them directly.
MachineFrameInfo &MFI = MF.getFrameInfo();
bool HasStackObjects = MFI.hasStackObjects();
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
// Record that we know we have non-spill stack objects so we don't need to
// check all stack objects later.
@@ -1729,65 +1855,89 @@ static void reservePrivateMemoryRegs(const TargetMachine &TM,
// the scratch registers to pass in.
bool RequiresStackAccess = HasStackObjects || MFI.hasCalls();
- const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
- if (ST.isAmdHsaOrMesa(MF.getFunction())) {
- if (RequiresStackAccess) {
- // If we have stack objects, we unquestionably need the private buffer
- // resource. For the Code Object V2 ABI, this will be the first 4 user
- // SGPR inputs. We can reserve those and use them directly.
-
- unsigned PrivateSegmentBufferReg = Info.getPreloadedReg(
- AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER);
- Info.setScratchRSrcReg(PrivateSegmentBufferReg);
-
- if (MFI.hasCalls()) {
- // If we have calls, we need to keep the frame register in a register
- // that won't be clobbered by a call, so ensure it is copied somewhere.
-
- // This is not a problem for the scratch wave offset, because the same
- // registers are reserved in all functions.
-
- // FIXME: Nothing is really ensuring this is a call preserved register,
- // it's just selected from the end so it happens to be.
- unsigned ReservedOffsetReg
- = TRI.reservedPrivateSegmentWaveByteOffsetReg(MF);
- Info.setScratchWaveOffsetReg(ReservedOffsetReg);
- } else {
- unsigned PrivateSegmentWaveByteOffsetReg = Info.getPreloadedReg(
- AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
- Info.setScratchWaveOffsetReg(PrivateSegmentWaveByteOffsetReg);
- }
- } else {
- unsigned ReservedBufferReg
- = TRI.reservedPrivateSegmentBufferReg(MF);
- unsigned ReservedOffsetReg
- = TRI.reservedPrivateSegmentWaveByteOffsetReg(MF);
-
- // We tentatively reserve the last registers (skipping the last two
- // which may contain VCC). After register allocation, we'll replace
- // these with the ones immediately after those which were really
- // allocated. In the prologue copies will be inserted from the argument
- // to these reserved registers.
- Info.setScratchRSrcReg(ReservedBufferReg);
- Info.setScratchWaveOffsetReg(ReservedOffsetReg);
- }
+ if (RequiresStackAccess && ST.isAmdHsaOrMesa(MF.getFunction())) {
+ // If we have stack objects, we unquestionably need the private buffer
+ // resource. For the Code Object V2 ABI, this will be the first 4 user
+ // SGPR inputs. We can reserve those and use them directly.
+
+ unsigned PrivateSegmentBufferReg =
+ Info.getPreloadedReg(AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER);
+ Info.setScratchRSrcReg(PrivateSegmentBufferReg);
} else {
unsigned ReservedBufferReg = TRI.reservedPrivateSegmentBufferReg(MF);
+ // We tentatively reserve the last registers (skipping the last registers
+ // which may contain VCC, FLAT_SCR, and XNACK). After register allocation,
+ // we'll replace these with the ones immediately after those which were
+ // really allocated. In the prologue copies will be inserted from the
+ // argument to these reserved registers.
// Without HSA, relocations are used for the scratch pointer and the
// buffer resource setup is always inserted in the prologue. Scratch wave
// offset is still in an input SGPR.
Info.setScratchRSrcReg(ReservedBufferReg);
+ }
- if (HasStackObjects && !MFI.hasCalls()) {
- unsigned ScratchWaveOffsetReg = Info.getPreloadedReg(
- AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
- Info.setScratchWaveOffsetReg(ScratchWaveOffsetReg);
+ // hasFP should be accurate for kernels even before the frame is finalized.
+ if (ST.getFrameLowering()->hasFP(MF)) {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ // Try to use s32 as the SP, but move it if it would interfere with input
+ // arguments. This won't work with calls though.
+ //
+ // FIXME: Move SP to avoid any possible inputs, or find a way to spill input
+ // registers.
+ if (!MRI.isLiveIn(AMDGPU::SGPR32)) {
+ Info.setStackPtrOffsetReg(AMDGPU::SGPR32);
} else {
- unsigned ReservedOffsetReg
- = TRI.reservedPrivateSegmentWaveByteOffsetReg(MF);
+ assert(AMDGPU::isShader(MF.getFunction().getCallingConv()));
+
+ if (MFI.hasCalls())
+ report_fatal_error("call in graphics shader with too many input SGPRs");
+
+ for (unsigned Reg : AMDGPU::SGPR_32RegClass) {
+ if (!MRI.isLiveIn(Reg)) {
+ Info.setStackPtrOffsetReg(Reg);
+ break;
+ }
+ }
+
+ if (Info.getStackPtrOffsetReg() == AMDGPU::SP_REG)
+ report_fatal_error("failed to find register for SP");
+ }
+
+ if (MFI.hasCalls()) {
+ Info.setScratchWaveOffsetReg(AMDGPU::SGPR33);
+ Info.setFrameOffsetReg(AMDGPU::SGPR33);
+ } else {
+ unsigned ReservedOffsetReg =
+ TRI.reservedPrivateSegmentWaveByteOffsetReg(MF);
Info.setScratchWaveOffsetReg(ReservedOffsetReg);
+ Info.setFrameOffsetReg(ReservedOffsetReg);
}
+ } else if (RequiresStackAccess) {
+ assert(!MFI.hasCalls());
+ // We know there are accesses and they will be done relative to SP, so just
+ // pin it to the input.
+ //
+ // FIXME: Should not do this if inline asm is reading/writing these
+ // registers.
+ unsigned PreloadedSP = Info.getPreloadedReg(
+ AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
+
+ Info.setStackPtrOffsetReg(PreloadedSP);
+ Info.setScratchWaveOffsetReg(PreloadedSP);
+ Info.setFrameOffsetReg(PreloadedSP);
+ } else {
+ assert(!MFI.hasCalls());
+
+ // There may not be stack access at all. There may still be spills, or
+ // access of a constant pointer (in which cases an extra copy will be
+ // emitted in the prolog).
+ unsigned ReservedOffsetReg
+ = TRI.reservedPrivateSegmentWaveByteOffsetReg(MF);
+ Info.setStackPtrOffsetReg(ReservedOffsetReg);
+ Info.setScratchWaveOffsetReg(ReservedOffsetReg);
+ Info.setFrameOffsetReg(ReservedOffsetReg);
}
}
@@ -1845,7 +1995,6 @@ SDValue SITargetLowering::LowerFormalArguments(
const Function &Fn = MF.getFunction();
FunctionType *FType = MF.getFunction().getFunctionType();
SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
- const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
if (Subtarget->isAmdHsaOS() && AMDGPU::isShader(CallConv)) {
DiagnosticInfoUnsupported NoGraphicsHSA(
@@ -1854,11 +2003,6 @@ SDValue SITargetLowering::LowerFormalArguments(
return DAG.getEntryNode();
}
- // Create stack objects that are used for emitting debugger prologue if
- // "amdgpu-debugger-emit-prologue" attribute was specified.
- if (ST.debuggerEmitPrologue())
- createDebuggerPrologueStackObjects(MF);
-
SmallVector<ISD::InputArg, 16> Splits;
SmallVector<CCValAssign, 16> ArgLocs;
BitVector Skipped(Ins.size());
@@ -1869,12 +2013,6 @@ SDValue SITargetLowering::LowerFormalArguments(
bool IsKernel = AMDGPU::isKernel(CallConv);
bool IsEntryFunc = AMDGPU::isEntryFunctionCC(CallConv);
- if (!IsEntryFunc) {
- // 4 bytes are reserved at offset 0 for the emergency stack slot. Skip over
- // this when allocating argument fixed offsets.
- CCInfo.AllocateStack(4, 4);
- }
-
if (IsShader) {
processShaderInputArgs(Splits, CallConv, Ins, Skipped, FType, Info);
@@ -1975,7 +2113,8 @@ SDValue SITargetLowering::LowerFormalArguments(
auto *ParamTy =
dyn_cast<PointerType>(FType->getParamType(Ins[i].getOrigArgIndex()));
if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS &&
- ParamTy && ParamTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
+ ParamTy && (ParamTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
+ ParamTy->getAddressSpace() == AMDGPUAS::REGION_ADDRESS)) {
// On SI local pointers are just offsets into LDS, so they are always
// less than 16-bits. On CI and newer they could potentially be
// real pointers, so we can't guarantee their size.
@@ -2002,13 +2141,14 @@ SDValue SITargetLowering::LowerFormalArguments(
Reg = MF.addLiveIn(Reg, RC);
SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, VT);
- if (Arg.Flags.isSRet() && !getSubtarget()->enableHugePrivateBuffer()) {
+ if (Arg.Flags.isSRet()) {
// The return object should be reasonably addressable.
// FIXME: This helps when the return is a real sret. If it is a
// automatically inserted sret (i.e. CanLowerReturn returns false), an
// extra copy is inserted in SelectionDAGBuilder which obscures this.
- unsigned NumBits = 32 - AssumeFrameIndexHighZeroBits;
+ unsigned NumBits
+ = 32 - getSubtarget()->getKnownHighZeroBitsForFrameIndex();
Val = DAG.getNode(ISD::AssertZext, DL, VT, Val,
DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), NumBits)));
}
@@ -2126,16 +2266,13 @@ SITargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
SDValue ReturnAddrReg = CreateLiveInRegister(
DAG, &AMDGPU::SReg_64RegClass, TRI->getReturnAddressReg(MF), MVT::i64);
- // FIXME: Should be able to use a vreg here, but need a way to prevent it
- // from being allcoated to a CSR.
-
- SDValue PhysReturnAddrReg = DAG.getRegister(TRI->getReturnAddressReg(MF),
- MVT::i64);
-
- Chain = DAG.getCopyToReg(Chain, DL, PhysReturnAddrReg, ReturnAddrReg, Flag);
+ SDValue ReturnAddrVirtualReg = DAG.getRegister(
+ MF.getRegInfo().createVirtualRegister(&AMDGPU::CCR_SGPR_64RegClass),
+ MVT::i64);
+ Chain =
+ DAG.getCopyToReg(Chain, DL, ReturnAddrVirtualReg, ReturnAddrReg, Flag);
Flag = Chain.getValue(1);
-
- RetOps.push_back(PhysReturnAddrReg);
+ RetOps.push_back(ReturnAddrVirtualReg);
}
// Copy the result values into the output registers.
@@ -2295,9 +2432,6 @@ void SITargetLowering::passSpecialInputs(
AMDGPUFunctionArgInfo::WORKGROUP_ID_X,
AMDGPUFunctionArgInfo::WORKGROUP_ID_Y,
AMDGPUFunctionArgInfo::WORKGROUP_ID_Z,
- AMDGPUFunctionArgInfo::WORKITEM_ID_X,
- AMDGPUFunctionArgInfo::WORKITEM_ID_Y,
- AMDGPUFunctionArgInfo::WORKITEM_ID_Z,
AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR
};
@@ -2337,6 +2471,71 @@ void SITargetLowering::passSpecialInputs(
MemOpChains.push_back(ArgStore);
}
}
+
+ // Pack workitem IDs into a single register or pass it as is if already
+ // packed.
+ const ArgDescriptor *OutgoingArg;
+ const TargetRegisterClass *ArgRC;
+
+ std::tie(OutgoingArg, ArgRC) =
+ CalleeArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_X);
+ if (!OutgoingArg)
+ std::tie(OutgoingArg, ArgRC) =
+ CalleeArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Y);
+ if (!OutgoingArg)
+ std::tie(OutgoingArg, ArgRC) =
+ CalleeArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Z);
+ if (!OutgoingArg)
+ return;
+
+ const ArgDescriptor *IncomingArgX
+ = CallerArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_X).first;
+ const ArgDescriptor *IncomingArgY
+ = CallerArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Y).first;
+ const ArgDescriptor *IncomingArgZ
+ = CallerArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Z).first;
+
+ SDValue InputReg;
+ SDLoc SL;
+
+ // If incoming ids are not packed we need to pack them.
+ if (IncomingArgX && !IncomingArgX->isMasked() && CalleeArgInfo.WorkItemIDX)
+ InputReg = loadInputValue(DAG, ArgRC, MVT::i32, DL, *IncomingArgX);
+
+ if (IncomingArgY && !IncomingArgY->isMasked() && CalleeArgInfo.WorkItemIDY) {
+ SDValue Y = loadInputValue(DAG, ArgRC, MVT::i32, DL, *IncomingArgY);
+ Y = DAG.getNode(ISD::SHL, SL, MVT::i32, Y,
+ DAG.getShiftAmountConstant(10, MVT::i32, SL));
+ InputReg = InputReg.getNode() ?
+ DAG.getNode(ISD::OR, SL, MVT::i32, InputReg, Y) : Y;
+ }
+
+ if (IncomingArgZ && !IncomingArgZ->isMasked() && CalleeArgInfo.WorkItemIDZ) {
+ SDValue Z = loadInputValue(DAG, ArgRC, MVT::i32, DL, *IncomingArgZ);
+ Z = DAG.getNode(ISD::SHL, SL, MVT::i32, Z,
+ DAG.getShiftAmountConstant(20, MVT::i32, SL));
+ InputReg = InputReg.getNode() ?
+ DAG.getNode(ISD::OR, SL, MVT::i32, InputReg, Z) : Z;
+ }
+
+ if (!InputReg.getNode()) {
+ // Workitem ids are already packed, any of present incoming arguments
+ // will carry all required fields.
+ ArgDescriptor IncomingArg = ArgDescriptor::createArg(
+ IncomingArgX ? *IncomingArgX :
+ IncomingArgY ? *IncomingArgY :
+ *IncomingArgZ, ~0u);
+ InputReg = loadInputValue(DAG, ArgRC, MVT::i32, DL, IncomingArg);
+ }
+
+ if (OutgoingArg->isRegister()) {
+ RegsToPass.emplace_back(OutgoingArg->getRegister(), InputReg);
+ } else {
+ unsigned SpecialArgOffset = CCInfo.AllocateStack(4, 4);
+ SDValue ArgStore = storeStackInputValue(DAG, DL, Chain, InputReg,
+ SpecialArgOffset);
+ MemOpChains.push_back(ArgStore);
+ }
}
static bool canGuaranteeTCO(CallingConv::ID CC) {
@@ -2478,7 +2677,6 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
"unsupported call from graphics shader of function ");
}
- // The first 4 bytes are reserved for the callee's emergency stack slot.
if (IsTailCall) {
IsTailCall = isEligibleForTailCallOptimization(
Callee, CallConv, IsVarArg, Outs, OutVals, Ins, DAG);
@@ -2505,9 +2703,6 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, IsVarArg);
- // The first 4 bytes are reserved for the callee's emergency stack slot.
- CCInfo.AllocateStack(4, 4);
-
CCInfo.AnalyzeCallOperands(Outs, AssignFn);
// Get a count of how many bytes are to be pushed on the stack.
@@ -2528,31 +2723,19 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
MachineFrameInfo &MFI = MF.getFrameInfo();
SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
- SDValue CallerSavedFP;
-
// Adjust the stack pointer for the new arguments...
// These operations are automatically eliminated by the prolog/epilog pass
if (!IsSibCall) {
Chain = DAG.getCALLSEQ_START(Chain, 0, 0, DL);
- unsigned OffsetReg = Info->getScratchWaveOffsetReg();
+ SmallVector<SDValue, 4> CopyFromChains;
// In the HSA case, this should be an identity copy.
SDValue ScratchRSrcReg
= DAG.getCopyFromReg(Chain, DL, Info->getScratchRSrcReg(), MVT::v4i32);
RegsToPass.emplace_back(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, ScratchRSrcReg);
-
- // TODO: Don't hardcode these registers and get from the callee function.
- SDValue ScratchWaveOffsetReg
- = DAG.getCopyFromReg(Chain, DL, OffsetReg, MVT::i32);
- RegsToPass.emplace_back(AMDGPU::SGPR4, ScratchWaveOffsetReg);
-
- if (!Info->isEntryFunction()) {
- // Avoid clobbering this function's FP value. In the current convention
- // callee will overwrite this, so do save/restore around the call site.
- CallerSavedFP = DAG.getCopyFromReg(Chain, DL,
- Info->getFrameOffsetReg(), MVT::i32);
- }
+ CopyFromChains.push_back(ScratchRSrcReg.getValue(1));
+ Chain = DAG.getTokenFactor(DL, CopyFromChains);
}
SmallVector<SDValue, 8> MemOpChains;
@@ -2694,6 +2877,11 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
std::vector<SDValue> Ops;
Ops.push_back(Chain);
Ops.push_back(Callee);
+ // Add a redundant copy of the callee global which will not be legalized, as
+ // we need direct access to the callee later.
+ GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Callee);
+ const GlobalValue *GV = GSD->getGlobal();
+ Ops.push_back(DAG.getTargetGlobalAddress(GV, DL, MVT::i64));
if (IsTailCall) {
// Each tail call may have to adjust the stack by a different amount, so
@@ -2735,12 +2923,6 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
Chain = Call.getValue(0);
InFlag = Call.getValue(1);
- if (CallerSavedFP) {
- SDValue FPReg = DAG.getRegister(Info->getFrameOffsetReg(), MVT::i32);
- Chain = DAG.getCopyToReg(Chain, DL, FPReg, CallerSavedFP, InFlag);
- InFlag = Chain.getValue(1);
- }
-
uint64_t CalleePopBytes = NumBytes;
Chain = DAG.getCALLSEQ_END(Chain, DAG.getTargetConstant(0, DL, MVT::i32),
DAG.getTargetConstant(CalleePopBytes, DL, MVT::i32),
@@ -2773,8 +2955,8 @@ unsigned SITargetLowering::getRegisterByName(const char* RegName, EVT VT,
}
- if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS &&
- Subtarget->getRegisterInfo()->regsOverlap(Reg, AMDGPU::FLAT_SCR)) {
+ if (!Subtarget->hasFlatScrRegister() &&
+ Subtarget->getRegisterInfo()->regsOverlap(Reg, AMDGPU::FLAT_SCR)) {
report_fatal_error(Twine("invalid register \""
+ StringRef(RegName) + "\" for subtarget."));
}
@@ -2830,6 +3012,107 @@ MachineBasicBlock *SITargetLowering::splitKillBlock(MachineInstr &MI,
return SplitBB;
}
+// Split block \p MBB at \p MI, as to insert a loop. If \p InstInLoop is true,
+// \p MI will be the only instruction in the loop body block. Otherwise, it will
+// be the first instruction in the remainder block.
+//
+/// \returns { LoopBody, Remainder }
+static std::pair<MachineBasicBlock *, MachineBasicBlock *>
+splitBlockForLoop(MachineInstr &MI, MachineBasicBlock &MBB, bool InstInLoop) {
+ MachineFunction *MF = MBB.getParent();
+ MachineBasicBlock::iterator I(&MI);
+
+ // To insert the loop we need to split the block. Move everything after this
+ // point to a new block, and insert a new empty block between the two.
+ MachineBasicBlock *LoopBB = MF->CreateMachineBasicBlock();
+ MachineBasicBlock *RemainderBB = MF->CreateMachineBasicBlock();
+ MachineFunction::iterator MBBI(MBB);
+ ++MBBI;
+
+ MF->insert(MBBI, LoopBB);
+ MF->insert(MBBI, RemainderBB);
+
+ LoopBB->addSuccessor(LoopBB);
+ LoopBB->addSuccessor(RemainderBB);
+
+ // Move the rest of the block into a new block.
+ RemainderBB->transferSuccessorsAndUpdatePHIs(&MBB);
+
+ if (InstInLoop) {
+ auto Next = std::next(I);
+
+ // Move instruction to loop body.
+ LoopBB->splice(LoopBB->begin(), &MBB, I, Next);
+
+ // Move the rest of the block.
+ RemainderBB->splice(RemainderBB->begin(), &MBB, Next, MBB.end());
+ } else {
+ RemainderBB->splice(RemainderBB->begin(), &MBB, I, MBB.end());
+ }
+
+ MBB.addSuccessor(LoopBB);
+
+ return std::make_pair(LoopBB, RemainderBB);
+}
+
+MachineBasicBlock *
+SITargetLowering::emitGWSMemViolTestLoop(MachineInstr &MI,
+ MachineBasicBlock *BB) const {
+ const DebugLoc &DL = MI.getDebugLoc();
+
+ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+
+ MachineBasicBlock *LoopBB;
+ MachineBasicBlock *RemainderBB;
+ const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
+
+ MachineBasicBlock::iterator Prev = std::prev(MI.getIterator());
+
+ std::tie(LoopBB, RemainderBB) = splitBlockForLoop(MI, *BB, true);
+
+ MachineBasicBlock::iterator I = LoopBB->end();
+ MachineOperand *Src = TII->getNamedOperand(MI, AMDGPU::OpName::data0);
+
+ const unsigned EncodedReg = AMDGPU::Hwreg::encodeHwreg(
+ AMDGPU::Hwreg::ID_TRAPSTS, AMDGPU::Hwreg::OFFSET_MEM_VIOL, 1);
+
+ // Clear TRAP_STS.MEM_VIOL
+ BuildMI(*LoopBB, LoopBB->begin(), DL, TII->get(AMDGPU::S_SETREG_IMM32_B32))
+ .addImm(0)
+ .addImm(EncodedReg);
+
+ // This is a pain, but we're not allowed to have physical register live-ins
+ // yet. Insert a pair of copies if the VGPR0 hack is necessary.
+ if (Src && TargetRegisterInfo::isPhysicalRegister(Src->getReg())) {
+ unsigned Data0 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ BuildMI(*BB, std::next(Prev), DL, TII->get(AMDGPU::COPY), Data0)
+ .add(*Src);
+
+ BuildMI(*LoopBB, LoopBB->begin(), DL, TII->get(AMDGPU::COPY), Src->getReg())
+ .addReg(Data0);
+
+ MRI.setSimpleHint(Data0, Src->getReg());
+ }
+
+ BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::S_WAITCNT))
+ .addImm(0);
+
+ unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+
+ // Load and check TRAP_STS.MEM_VIOL
+ BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::S_GETREG_B32), Reg)
+ .addImm(EncodedReg);
+
+ // FIXME: Do we need to use an isel pseudo that may clobber scc?
+ BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::S_CMP_LG_U32))
+ .addReg(Reg, RegState::Kill)
+ .addImm(0);
+ BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::S_CBRANCH_SCC1))
+ .addMBB(LoopBB);
+
+ return RemainderBB;
+}
+
// Do a v_movrels_b32 or v_movreld_b32 for each unique value of \p IdxReg in the
// wavefront. If the value is uniform and just happens to be in a VGPR, this
// will only do one iteration. In the worst case, this will loop 64 times.
@@ -2849,12 +3132,16 @@ static MachineBasicBlock::iterator emitLoadM0FromVGPRLoop(
int Offset,
bool UseGPRIdxMode,
bool IsIndirectSrc) {
+ MachineFunction *MF = OrigBB.getParent();
+ const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
MachineBasicBlock::iterator I = LoopBB.begin();
- unsigned PhiExec = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
- unsigned NewExec = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ const TargetRegisterClass *BoolRC = TRI->getBoolRC();
+ unsigned PhiExec = MRI.createVirtualRegister(BoolRC);
+ unsigned NewExec = MRI.createVirtualRegister(BoolRC);
unsigned CurrentIdxReg = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
- unsigned CondReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ unsigned CondReg = MRI.createVirtualRegister(BoolRC);
BuildMI(LoopBB, I, DL, TII->get(TargetOpcode::PHI), PhiReg)
.addReg(InitReg)
@@ -2878,7 +3165,9 @@ static MachineBasicBlock::iterator emitLoadM0FromVGPRLoop(
.addReg(IdxReg.getReg(), 0, IdxReg.getSubReg());
// Update EXEC, save the original EXEC value to VCC.
- BuildMI(LoopBB, I, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), NewExec)
+ BuildMI(LoopBB, I, DL, TII->get(ST.isWave32() ? AMDGPU::S_AND_SAVEEXEC_B32
+ : AMDGPU::S_AND_SAVEEXEC_B64),
+ NewExec)
.addReg(CondReg, RegState::Kill);
MRI.setSimpleHint(NewExec, CondReg);
@@ -2894,7 +3183,7 @@ static MachineBasicBlock::iterator emitLoadM0FromVGPRLoop(
.addImm(Offset);
}
unsigned IdxMode = IsIndirectSrc ?
- VGPRIndexMode::SRC0_ENABLE : VGPRIndexMode::DST_ENABLE;
+ AMDGPU::VGPRIndexMode::SRC0_ENABLE : AMDGPU::VGPRIndexMode::DST_ENABLE;
MachineInstr *SetOn =
BuildMI(LoopBB, I, DL, TII->get(AMDGPU::S_SET_GPR_IDX_ON))
.addReg(IdxReg, RegState::Kill)
@@ -2913,10 +3202,12 @@ static MachineBasicBlock::iterator emitLoadM0FromVGPRLoop(
}
// Update EXEC, switch all done bits to 0 and all todo bits to 1.
+ unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
MachineInstr *InsertPt =
- BuildMI(LoopBB, I, DL, TII->get(AMDGPU::S_XOR_B64), AMDGPU::EXEC)
- .addReg(AMDGPU::EXEC)
- .addReg(NewExec);
+ BuildMI(LoopBB, I, DL, TII->get(ST.isWave32() ? AMDGPU::S_XOR_B32_term
+ : AMDGPU::S_XOR_B64_term), Exec)
+ .addReg(Exec)
+ .addReg(NewExec);
// XXX - s_xor_b64 sets scc to 1 if the result is nonzero, so can we use
// s_cbranch_scc0?
@@ -2942,38 +3233,28 @@ static MachineBasicBlock::iterator loadM0FromVGPR(const SIInstrInfo *TII,
bool UseGPRIdxMode,
bool IsIndirectSrc) {
MachineFunction *MF = MBB.getParent();
+ const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
MachineRegisterInfo &MRI = MF->getRegInfo();
const DebugLoc &DL = MI.getDebugLoc();
MachineBasicBlock::iterator I(&MI);
+ const auto *BoolXExecRC = TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
unsigned DstReg = MI.getOperand(0).getReg();
- unsigned SaveExec = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
- unsigned TmpExec = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
+ unsigned SaveExec = MRI.createVirtualRegister(BoolXExecRC);
+ unsigned TmpExec = MRI.createVirtualRegister(BoolXExecRC);
+ unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
+ unsigned MovExecOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
BuildMI(MBB, I, DL, TII->get(TargetOpcode::IMPLICIT_DEF), TmpExec);
// Save the EXEC mask
- BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B64), SaveExec)
- .addReg(AMDGPU::EXEC);
+ BuildMI(MBB, I, DL, TII->get(MovExecOpc), SaveExec)
+ .addReg(Exec);
- // To insert the loop we need to split the block. Move everything after this
- // point to a new block, and insert a new empty block between the two.
- MachineBasicBlock *LoopBB = MF->CreateMachineBasicBlock();
- MachineBasicBlock *RemainderBB = MF->CreateMachineBasicBlock();
- MachineFunction::iterator MBBI(MBB);
- ++MBBI;
-
- MF->insert(MBBI, LoopBB);
- MF->insert(MBBI, RemainderBB);
-
- LoopBB->addSuccessor(LoopBB);
- LoopBB->addSuccessor(RemainderBB);
-
- // Move the rest of the block into a new block.
- RemainderBB->transferSuccessorsAndUpdatePHIs(&MBB);
- RemainderBB->splice(RemainderBB->begin(), &MBB, I, MBB.end());
-
- MBB.addSuccessor(LoopBB);
+ MachineBasicBlock *LoopBB;
+ MachineBasicBlock *RemainderBB;
+ std::tie(LoopBB, RemainderBB) = splitBlockForLoop(MI, MBB, false);
const MachineOperand *Idx = TII->getNamedOperand(MI, AMDGPU::OpName::idx);
@@ -2982,7 +3263,7 @@ static MachineBasicBlock::iterator loadM0FromVGPR(const SIInstrInfo *TII,
Offset, UseGPRIdxMode, IsIndirectSrc);
MachineBasicBlock::iterator First = RemainderBB->begin();
- BuildMI(*RemainderBB, First, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
+ BuildMI(*RemainderBB, First, DL, TII->get(MovExecOpc), Exec)
.addReg(SaveExec);
return InsPt;
@@ -3025,7 +3306,7 @@ static bool setM0ToIndexFromSGPR(const SIInstrInfo *TII,
if (UseGPRIdxMode) {
unsigned IdxMode = IsIndirectSrc ?
- VGPRIndexMode::SRC0_ENABLE : VGPRIndexMode::DST_ENABLE;
+ AMDGPU::VGPRIndexMode::SRC0_ENABLE : AMDGPU::VGPRIndexMode::DST_ENABLE;
if (Offset == 0) {
MachineInstr *SetOn =
BuildMI(*MBB, I, DL, TII->get(AMDGPU::S_SET_GPR_IDX_ON))
@@ -3274,6 +3555,9 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
case AMDGPU::S_ADD_U64_PSEUDO:
case AMDGPU::S_SUB_U64_PSEUDO: {
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+ const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
+ const TargetRegisterClass *BoolRC = TRI->getBoolRC();
const DebugLoc &DL = MI.getDebugLoc();
MachineOperand &Dest = MI.getOperand(0);
@@ -3284,17 +3568,17 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
unsigned DestSub1 = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
MachineOperand Src0Sub0 = TII->buildExtractSubRegOrImm(MI, MRI,
- Src0, &AMDGPU::SReg_64RegClass, AMDGPU::sub0,
+ Src0, BoolRC, AMDGPU::sub0,
&AMDGPU::SReg_32_XM0RegClass);
MachineOperand Src0Sub1 = TII->buildExtractSubRegOrImm(MI, MRI,
- Src0, &AMDGPU::SReg_64RegClass, AMDGPU::sub1,
+ Src0, BoolRC, AMDGPU::sub1,
&AMDGPU::SReg_32_XM0RegClass);
MachineOperand Src1Sub0 = TII->buildExtractSubRegOrImm(MI, MRI,
- Src1, &AMDGPU::SReg_64RegClass, AMDGPU::sub0,
+ Src1, BoolRC, AMDGPU::sub0,
&AMDGPU::SReg_32_XM0RegClass);
MachineOperand Src1Sub1 = TII->buildExtractSubRegOrImm(MI, MRI,
- Src1, &AMDGPU::SReg_64RegClass, AMDGPU::sub1,
+ Src1, BoolRC, AMDGPU::sub1,
&AMDGPU::SReg_32_XM0RegClass);
bool IsAdd = (MI.getOpcode() == AMDGPU::S_ADD_U64_PSEUDO);
@@ -3330,6 +3614,14 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
MI.eraseFromParent();
return BB;
+ case AMDGPU::SI_INIT_EXEC_LO:
+ // This should be before all vector instructions.
+ BuildMI(*BB, &*BB->begin(), MI.getDebugLoc(), TII->get(AMDGPU::S_MOV_B32),
+ AMDGPU::EXEC_LO)
+ .addImm(MI.getOperand(0).getImm());
+ MI.eraseFromParent();
+ return BB;
+
case AMDGPU::SI_INIT_EXEC_FROM_INPUT: {
// Extract the thread count from an SGPR input and set EXEC accordingly.
// Since BFM can't shift by 64, handle that case with CMP + CMOV.
@@ -3363,24 +3655,31 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
(void)Found;
// This should be before all vector instructions.
+ unsigned Mask = (getSubtarget()->getWavefrontSize() << 1) - 1;
+ bool isWave32 = getSubtarget()->isWave32();
+ unsigned Exec = isWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
BuildMI(*BB, FirstMI, DebugLoc(), TII->get(AMDGPU::S_BFE_U32), CountReg)
.addReg(InputReg)
- .addImm((MI.getOperand(1).getImm() & 0x7f) | 0x70000);
- BuildMI(*BB, FirstMI, DebugLoc(), TII->get(AMDGPU::S_BFM_B64),
- AMDGPU::EXEC)
+ .addImm((MI.getOperand(1).getImm() & Mask) | 0x70000);
+ BuildMI(*BB, FirstMI, DebugLoc(),
+ TII->get(isWave32 ? AMDGPU::S_BFM_B32 : AMDGPU::S_BFM_B64),
+ Exec)
.addReg(CountReg)
.addImm(0);
BuildMI(*BB, FirstMI, DebugLoc(), TII->get(AMDGPU::S_CMP_EQ_U32))
.addReg(CountReg, RegState::Kill)
- .addImm(64);
- BuildMI(*BB, FirstMI, DebugLoc(), TII->get(AMDGPU::S_CMOV_B64),
- AMDGPU::EXEC)
+ .addImm(getSubtarget()->getWavefrontSize());
+ BuildMI(*BB, FirstMI, DebugLoc(),
+ TII->get(isWave32 ? AMDGPU::S_CMOV_B32 : AMDGPU::S_CMOV_B64),
+ Exec)
.addImm(-1);
MI.eraseFromParent();
return BB;
}
case AMDGPU::GET_GROUPSTATICSIZE: {
+ assert(getTargetMachine().getTargetTriple().getOS() == Triple::AMDHSA ||
+ getTargetMachine().getTargetTriple().getOS() == Triple::AMDPAL);
DebugLoc DL = MI.getDebugLoc();
BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_MOV_B32))
.add(MI.getOperand(0))
@@ -3405,6 +3704,8 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
return splitKillBlock(MI, BB);
case AMDGPU::V_CNDMASK_B64_PSEUDO: {
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+ const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
unsigned Dst = MI.getOperand(0).getReg();
unsigned Src0 = MI.getOperand(1).getReg();
@@ -3414,16 +3715,21 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
unsigned DstLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
unsigned DstHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
- unsigned SrcCondCopy = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
+ const auto *CondRC = TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
+ unsigned SrcCondCopy = MRI.createVirtualRegister(CondRC);
BuildMI(*BB, MI, DL, TII->get(AMDGPU::COPY), SrcCondCopy)
.addReg(SrcCond);
BuildMI(*BB, MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64), DstLo)
+ .addImm(0)
.addReg(Src0, 0, AMDGPU::sub0)
+ .addImm(0)
.addReg(Src1, 0, AMDGPU::sub0)
.addReg(SrcCondCopy);
BuildMI(*BB, MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64), DstHi)
+ .addImm(0)
.addReg(Src0, 0, AMDGPU::sub1)
+ .addImm(0)
.addReg(Src1, 0, AMDGPU::sub1)
.addReg(SrcCondCopy);
@@ -3457,40 +3763,60 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
.addReg(Info->getFrameOffsetReg(), RegState::Implicit);
return BB;
}
- case AMDGPU::SI_CALL_ISEL:
- case AMDGPU::SI_TCRETURN_ISEL: {
+ case AMDGPU::SI_CALL_ISEL: {
const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
const DebugLoc &DL = MI.getDebugLoc();
+
unsigned ReturnAddrReg = TII->getRegisterInfo().getReturnAddressReg(*MF);
- MachineRegisterInfo &MRI = MF->getRegInfo();
- unsigned GlobalAddrReg = MI.getOperand(0).getReg();
- MachineInstr *PCRel = MRI.getVRegDef(GlobalAddrReg);
- assert(PCRel->getOpcode() == AMDGPU::SI_PC_ADD_REL_OFFSET);
+ MachineInstrBuilder MIB;
+ MIB = BuildMI(*BB, MI, DL, TII->get(AMDGPU::SI_CALL), ReturnAddrReg);
- const GlobalValue *G = PCRel->getOperand(1).getGlobal();
+ for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I)
+ MIB.add(MI.getOperand(I));
- MachineInstrBuilder MIB;
- if (MI.getOpcode() == AMDGPU::SI_CALL_ISEL) {
- MIB = BuildMI(*BB, MI, DL, TII->get(AMDGPU::SI_CALL), ReturnAddrReg)
- .add(MI.getOperand(0))
- .addGlobalAddress(G);
- } else {
- MIB = BuildMI(*BB, MI, DL, TII->get(AMDGPU::SI_TCRETURN))
- .add(MI.getOperand(0))
- .addGlobalAddress(G);
+ MIB.cloneMemRefs(MI);
+ MI.eraseFromParent();
+ return BB;
+ }
+ case AMDGPU::V_ADD_I32_e32:
+ case AMDGPU::V_SUB_I32_e32:
+ case AMDGPU::V_SUBREV_I32_e32: {
+ // TODO: Define distinct V_*_I32_Pseudo instructions instead.
+ const DebugLoc &DL = MI.getDebugLoc();
+ unsigned Opc = MI.getOpcode();
- // There is an additional imm operand for tcreturn, but it should be in the
- // right place already.
+ bool NeedClampOperand = false;
+ if (TII->pseudoToMCOpcode(Opc) == -1) {
+ Opc = AMDGPU::getVOPe64(Opc);
+ NeedClampOperand = true;
}
- for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
- MIB.add(MI.getOperand(I));
+ auto I = BuildMI(*BB, MI, DL, TII->get(Opc), MI.getOperand(0).getReg());
+ if (TII->isVOP3(*I)) {
+ const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
+ I.addReg(TRI->getVCC(), RegState::Define);
+ }
+ I.add(MI.getOperand(1))
+ .add(MI.getOperand(2));
+ if (NeedClampOperand)
+ I.addImm(0); // clamp bit for e64 encoding
+
+ TII->legalizeOperands(*I);
- MIB.cloneMemRefs(MI);
MI.eraseFromParent();
return BB;
}
+ case AMDGPU::DS_GWS_INIT:
+ case AMDGPU::DS_GWS_SEMA_V:
+ case AMDGPU::DS_GWS_SEMA_BR:
+ case AMDGPU::DS_GWS_SEMA_P:
+ case AMDGPU::DS_GWS_SEMA_RELEASE_ALL:
+ case AMDGPU::DS_GWS_BARRIER:
+ if (getSubtarget()->hasGWSAutoReplay())
+ return BB;
+ return emitGWSMemViolTestLoop(MI, BB);
default:
return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
}
@@ -3617,6 +3943,7 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
case ISD::BRCOND: return LowerBRCOND(Op, DAG);
+ case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
case ISD::LOAD: {
SDValue Result = LowerLOAD(Op, DAG);
assert((!Result.getNode() ||
@@ -3641,10 +3968,14 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::INTRINSIC_W_CHAIN: return LowerINTRINSIC_W_CHAIN(Op, DAG);
case ISD::INTRINSIC_VOID: return LowerINTRINSIC_VOID(Op, DAG);
case ISD::ADDRSPACECAST: return lowerADDRSPACECAST(Op, DAG);
+ case ISD::INSERT_SUBVECTOR:
+ return lowerINSERT_SUBVECTOR(Op, DAG);
case ISD::INSERT_VECTOR_ELT:
return lowerINSERT_VECTOR_ELT(Op, DAG);
case ISD::EXTRACT_VECTOR_ELT:
return lowerEXTRACT_VECTOR_ELT(Op, DAG);
+ case ISD::VECTOR_SHUFFLE:
+ return lowerVECTOR_SHUFFLE(Op, DAG);
case ISD::BUILD_VECTOR:
return lowerBUILD_VECTOR(Op, DAG);
case ISD::FP_ROUND:
@@ -3742,10 +4073,7 @@ SDValue SITargetLowering::adjustLoadValueType(unsigned Opcode,
static SDValue lowerICMPIntrinsic(const SITargetLowering &TLI,
SDNode *N, SelectionDAG &DAG) {
EVT VT = N->getValueType(0);
- const auto *CD = dyn_cast<ConstantSDNode>(N->getOperand(3));
- if (!CD)
- return DAG.getUNDEF(VT);
-
+ const auto *CD = cast<ConstantSDNode>(N->getOperand(3));
int CondCode = CD->getSExtValue();
if (CondCode < ICmpInst::Predicate::FIRST_ICMP_PREDICATE ||
CondCode > ICmpInst::Predicate::LAST_ICMP_PREDICATE)
@@ -3753,7 +4081,6 @@ static SDValue lowerICMPIntrinsic(const SITargetLowering &TLI,
ICmpInst::Predicate IcInput = static_cast<ICmpInst::Predicate>(CondCode);
-
SDValue LHS = N->getOperand(1);
SDValue RHS = N->getOperand(2);
@@ -3769,16 +4096,20 @@ static SDValue lowerICMPIntrinsic(const SITargetLowering &TLI,
ISD::CondCode CCOpcode = getICmpCondCode(IcInput);
- return DAG.getNode(AMDGPUISD::SETCC, DL, VT, LHS, RHS,
- DAG.getCondCode(CCOpcode));
+ unsigned WavefrontSize = TLI.getSubtarget()->getWavefrontSize();
+ EVT CCVT = EVT::getIntegerVT(*DAG.getContext(), WavefrontSize);
+
+ SDValue SetCC = DAG.getNode(AMDGPUISD::SETCC, DL, CCVT, LHS, RHS,
+ DAG.getCondCode(CCOpcode));
+ if (VT.bitsEq(CCVT))
+ return SetCC;
+ return DAG.getZExtOrTrunc(SetCC, DL, VT);
}
static SDValue lowerFCMPIntrinsic(const SITargetLowering &TLI,
SDNode *N, SelectionDAG &DAG) {
EVT VT = N->getValueType(0);
- const auto *CD = dyn_cast<ConstantSDNode>(N->getOperand(3));
- if (!CD)
- return DAG.getUNDEF(VT);
+ const auto *CD = cast<ConstantSDNode>(N->getOperand(3));
int CondCode = CD->getSExtValue();
if (CondCode < FCmpInst::Predicate::FIRST_FCMP_PREDICATE ||
@@ -3798,8 +4129,13 @@ static SDValue lowerFCMPIntrinsic(const SITargetLowering &TLI,
FCmpInst::Predicate IcInput = static_cast<FCmpInst::Predicate>(CondCode);
ISD::CondCode CCOpcode = getFCmpCondCode(IcInput);
- return DAG.getNode(AMDGPUISD::SETCC, SL, VT, Src0,
- Src1, DAG.getCondCode(CCOpcode));
+ unsigned WavefrontSize = TLI.getSubtarget()->getWavefrontSize();
+ EVT CCVT = EVT::getIntegerVT(*DAG.getContext(), WavefrontSize);
+ SDValue SetCC = DAG.getNode(AMDGPUISD::SETCC, SL, CCVT, Src0,
+ Src1, DAG.getCondCode(CCOpcode));
+ if (VT.bitsEq(CCVT))
+ return SetCC;
+ return DAG.getZExtOrTrunc(SetCC, SL, VT);
}
void SITargetLowering::ReplaceNodeResults(SDNode *N,
@@ -3957,32 +4293,6 @@ unsigned SITargetLowering::isCFIntrinsic(const SDNode *Intr) const {
return 0;
}
-void SITargetLowering::createDebuggerPrologueStackObjects(
- MachineFunction &MF) const {
- // Create stack objects that are used for emitting debugger prologue.
- //
- // Debugger prologue writes work group IDs and work item IDs to scratch memory
- // at fixed location in the following format:
- // offset 0: work group ID x
- // offset 4: work group ID y
- // offset 8: work group ID z
- // offset 16: work item ID x
- // offset 20: work item ID y
- // offset 24: work item ID z
- SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
- int ObjectIdx = 0;
-
- // For each dimension:
- for (unsigned i = 0; i < 3; ++i) {
- // Create fixed stack object for work group ID.
- ObjectIdx = MF.getFrameInfo().CreateFixedObject(4, i * 4, true);
- Info->setDebuggerWorkGroupIDStackObjectIndex(i, ObjectIdx);
- // Create fixed stack object for work item ID.
- ObjectIdx = MF.getFrameInfo().CreateFixedObject(4, i * 4 + 16, true);
- Info->setDebuggerWorkItemIDStackObjectIndex(i, ObjectIdx);
- }
-}
-
bool SITargetLowering::shouldEmitFixup(const GlobalValue *GV) const {
const Triple &TT = getTargetMachine().getTargetTriple();
return (GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
@@ -3991,7 +4301,10 @@ bool SITargetLowering::shouldEmitFixup(const GlobalValue *GV) const {
}
bool SITargetLowering::shouldEmitGOTReloc(const GlobalValue *GV) const {
- return (GV->getType()->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
+ // FIXME: Either avoid relying on address space here or change the default
+ // address space for functions to avoid the explicit check.
+ return (GV->getValueType()->isFunctionTy() ||
+ GV->getType()->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) &&
!shouldEmitFixup(GV) &&
@@ -4103,6 +4416,31 @@ SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
return Chain;
}
+SDValue SITargetLowering::LowerRETURNADDR(SDValue Op,
+ SelectionDAG &DAG) const {
+ MVT VT = Op.getSimpleValueType();
+ SDLoc DL(Op);
+ // Checking the depth
+ if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() != 0)
+ return DAG.getConstant(0, DL, VT);
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+ // Check for kernel and shader functions
+ if (Info->isEntryFunction())
+ return DAG.getConstant(0, DL, VT);
+
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ // There is a call to @llvm.returnaddress in this function
+ MFI.setReturnAddressIsTaken(true);
+
+ const SIRegisterInfo *TRI = getSubtarget()->getRegisterInfo();
+ // Get the return address reg and mark it as an implicit live-in
+ unsigned Reg = MF.addLiveIn(TRI->getReturnAddressReg(MF), getRegClassFor(VT, Op.getNode()->isDivergent()));
+
+ return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, VT);
+}
+
SDValue SITargetLowering::getFPExtOrFPTrunc(SelectionDAG &DAG,
SDValue Op,
const SDLoc &DL,
@@ -4131,7 +4469,9 @@ SDValue SITargetLowering::lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
SDValue SITargetLowering::lowerFMINNUM_FMAXNUM(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
- bool IsIEEEMode = Subtarget->enableIEEEBit(DAG.getMachineFunction());
+ const MachineFunction &MF = DAG.getMachineFunction();
+ const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+ bool IsIEEEMode = Info->getMode().IEEE;
// FIXME: Assert during eslection that this is only selected for
// ieee_mode. Currently a combine can produce the ieee version for non-ieee
@@ -4302,6 +4642,32 @@ SDValue SITargetLowering::lowerADDRSPACECAST(SDValue Op,
return DAG.getUNDEF(ASC->getValueType(0));
}
+// This lowers an INSERT_SUBVECTOR by extracting the individual elements from
+// the small vector and inserting them into the big vector. That is better than
+// the default expansion of doing it via a stack slot. Even though the use of
+// the stack slot would be optimized away afterwards, the stack slot itself
+// remains.
+SDValue SITargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue Vec = Op.getOperand(0);
+ SDValue Ins = Op.getOperand(1);
+ SDValue Idx = Op.getOperand(2);
+ EVT VecVT = Vec.getValueType();
+ EVT InsVT = Ins.getValueType();
+ EVT EltVT = VecVT.getVectorElementType();
+ unsigned InsNumElts = InsVT.getVectorNumElements();
+ unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+ SDLoc SL(Op);
+
+ for (unsigned I = 0; I != InsNumElts; ++I) {
+ SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT, Ins,
+ DAG.getConstant(I, SL, MVT::i32));
+ Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, SL, VecVT, Vec, Elt,
+ DAG.getConstant(IdxVal + I, SL, MVT::i32));
+ }
+ return Vec;
+}
+
SDValue SITargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
SelectionDAG &DAG) const {
SDValue Vec = Op.getOperand(0);
@@ -4352,12 +4718,12 @@ SDValue SITargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
MVT IntVT = MVT::getIntegerVT(VecSize);
// Avoid stack access for dynamic indexing.
- SDValue Val = InsVal;
- if (InsVal.getValueType() == MVT::f16)
- Val = DAG.getNode(ISD::BITCAST, SL, MVT::i16, InsVal);
-
// v_bfi_b32 (v_bfm_b32 16, (shl idx, 16)), val, vec
- SDValue ExtVal = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Val);
+
+ // Create a congruent vector with the target value in each element so that
+ // the required element can be masked and ORed into the target vector.
+ SDValue ExtVal = DAG.getNode(ISD::BITCAST, SL, IntVT,
+ DAG.getSplatBuildVector(VecVT, SL, InsVal));
assert(isPowerOf2_32(EltSize));
SDValue ScaleFactor = DAG.getConstant(Log2_32(EltSize), SL, MVT::i32);
@@ -4419,6 +4785,63 @@ SDValue SITargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
return DAG.getAnyExtOrTrunc(Elt, SL, ResultVT);
}
+static bool elementPairIsContiguous(ArrayRef<int> Mask, int Elt) {
+ assert(Elt % 2 == 0);
+ return Mask[Elt + 1] == Mask[Elt] + 1 && (Mask[Elt] % 2 == 0);
+}
+
+SDValue SITargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc SL(Op);
+ EVT ResultVT = Op.getValueType();
+ ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
+
+ EVT PackVT = ResultVT.isInteger() ? MVT::v2i16 : MVT::v2f16;
+ EVT EltVT = PackVT.getVectorElementType();
+ int SrcNumElts = Op.getOperand(0).getValueType().getVectorNumElements();
+
+ // vector_shuffle <0,1,6,7> lhs, rhs
+ // -> concat_vectors (extract_subvector lhs, 0), (extract_subvector rhs, 2)
+ //
+ // vector_shuffle <6,7,2,3> lhs, rhs
+ // -> concat_vectors (extract_subvector rhs, 2), (extract_subvector lhs, 2)
+ //
+ // vector_shuffle <6,7,0,1> lhs, rhs
+ // -> concat_vectors (extract_subvector rhs, 2), (extract_subvector lhs, 0)
+
+ // Avoid scalarizing when both halves are reading from consecutive elements.
+ SmallVector<SDValue, 4> Pieces;
+ for (int I = 0, N = ResultVT.getVectorNumElements(); I != N; I += 2) {
+ if (elementPairIsContiguous(SVN->getMask(), I)) {
+ const int Idx = SVN->getMaskElt(I);
+ int VecIdx = Idx < SrcNumElts ? 0 : 1;
+ int EltIdx = Idx < SrcNumElts ? Idx : Idx - SrcNumElts;
+ SDValue SubVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SL,
+ PackVT, SVN->getOperand(VecIdx),
+ DAG.getConstant(EltIdx, SL, MVT::i32));
+ Pieces.push_back(SubVec);
+ } else {
+ const int Idx0 = SVN->getMaskElt(I);
+ const int Idx1 = SVN->getMaskElt(I + 1);
+ int VecIdx0 = Idx0 < SrcNumElts ? 0 : 1;
+ int VecIdx1 = Idx1 < SrcNumElts ? 0 : 1;
+ int EltIdx0 = Idx0 < SrcNumElts ? Idx0 : Idx0 - SrcNumElts;
+ int EltIdx1 = Idx1 < SrcNumElts ? Idx1 : Idx1 - SrcNumElts;
+
+ SDValue Vec0 = SVN->getOperand(VecIdx0);
+ SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT,
+ Vec0, DAG.getConstant(EltIdx0, SL, MVT::i32));
+
+ SDValue Vec1 = SVN->getOperand(VecIdx1);
+ SDValue Elt1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT,
+ Vec1, DAG.getConstant(EltIdx1, SL, MVT::i32));
+ Pieces.push_back(DAG.getBuildVector(PackVT, SL, { Elt0, Elt1 }));
+ }
+ }
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, SL, ResultVT, Pieces);
+}
+
SDValue SITargetLowering::lowerBUILD_VECTOR(SDValue Op,
SelectionDAG &DAG) const {
SDLoc SL(Op);
@@ -4512,11 +4935,18 @@ buildPCRelGlobalAddress(SelectionDAG &DAG, const GlobalValue *GV,
// of the s_add_u32 instruction, we end up with an offset that is 4 bytes too
// small. This requires us to add 4 to the global variable offset in order to
// compute the correct address.
- SDValue PtrLo = DAG.getTargetGlobalAddress(GV, DL, MVT::i32, Offset + 4,
- GAFlags);
- SDValue PtrHi = DAG.getTargetGlobalAddress(GV, DL, MVT::i32, Offset + 4,
- GAFlags == SIInstrInfo::MO_NONE ?
- GAFlags : GAFlags + 1);
+ unsigned LoFlags = GAFlags;
+ if (LoFlags == SIInstrInfo::MO_NONE)
+ LoFlags = SIInstrInfo::MO_REL32;
+ SDValue PtrLo =
+ DAG.getTargetGlobalAddress(GV, DL, MVT::i32, Offset + 4, LoFlags);
+ SDValue PtrHi;
+ if (GAFlags == SIInstrInfo::MO_NONE) {
+ PtrHi = DAG.getTargetConstant(0, DL, MVT::i32);
+ } else {
+ PtrHi =
+ DAG.getTargetGlobalAddress(GV, DL, MVT::i32, Offset + 4, GAFlags + 1);
+ }
return DAG.getNode(AMDGPUISD::PC_ADD_REL_OFFSET, DL, PtrVT, PtrLo, PtrHi);
}
@@ -4525,7 +4955,10 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
SelectionDAG &DAG) const {
GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
const GlobalValue *GV = GSD->getGlobal();
- if (GSD->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
+ if ((GSD->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS &&
+ (!GV->hasExternalLinkage() ||
+ getTargetMachine().getTargetTriple().getOS() == Triple::AMDHSA ||
+ getTargetMachine().getTargetTriple().getOS() == Triple::AMDPAL)) ||
GSD->getAddressSpace() == AMDGPUAS::REGION_ADDRESS ||
GSD->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS)
return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG);
@@ -4533,7 +4966,12 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
SDLoc DL(GSD);
EVT PtrVT = Op.getValueType();
- // FIXME: Should not make address space based decisions here.
+ if (GSD->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
+ SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i32, GSD->getOffset(),
+ SIInstrInfo::MO_ABS32_LO);
+ return DAG.getNode(AMDGPUISD::LDS, DL, MVT::i32, GA);
+ }
+
if (shouldEmitFixup(GV))
return buildPCRelGlobalAddress(DAG, GV, DL, GSD->getOffset(), PtrVT);
else if (shouldEmitPCReloc(GV))
@@ -4641,10 +5079,8 @@ static SDValue getBuildDwordsVector(SelectionDAG &DAG, SDLoc DL,
}
static bool parseCachePolicy(SDValue CachePolicy, SelectionDAG &DAG,
- SDValue *GLC, SDValue *SLC) {
- auto CachePolicyConst = dyn_cast<ConstantSDNode>(CachePolicy.getNode());
- if (!CachePolicyConst)
- return false;
+ SDValue *GLC, SDValue *SLC, SDValue *DLC) {
+ auto CachePolicyConst = cast<ConstantSDNode>(CachePolicy.getNode());
uint64_t Value = CachePolicyConst->getZExtValue();
SDLoc DL(CachePolicy);
@@ -4656,6 +5092,10 @@ static bool parseCachePolicy(SDValue CachePolicy, SelectionDAG &DAG,
*SLC = DAG.getTargetConstant((Value & 0x2) ? 1 : 0, DL, MVT::i32);
Value &= ~(uint64_t)0x2;
}
+ if (DLC) {
+ *DLC = DAG.getTargetConstant((Value & 0x4) ? 1 : 0, DL, MVT::i32);
+ Value &= ~(uint64_t)0x4;
+ }
return Value == 0;
}
@@ -4689,14 +5129,14 @@ static SDValue constructRetValue(SelectionDAG &DAG,
EVT CastVT = NumElts > 1 ? EVT::getVectorVT(Context, AdjEltVT, NumElts)
: AdjEltVT;
- // Special case for v8f16. Rather than add support for this, use v4i32 to
+ // Special case for v6f16. Rather than add support for this, use v3i32 to
// extract the data elements
- bool V8F16Special = false;
- if (CastVT == MVT::v8f16) {
- CastVT = MVT::v4i32;
+ bool V6F16Special = false;
+ if (NumElts == 6) {
+ CastVT = EVT::getVectorVT(Context, MVT::i32, NumElts / 2);
DMaskPop >>= 1;
ReqRetNumElts >>= 1;
- V8F16Special = true;
+ V6F16Special = true;
AdjVT = MVT::v2i32;
}
@@ -4726,7 +5166,7 @@ static SDValue constructRetValue(SelectionDAG &DAG,
PreTFCRes = BVElts[0];
}
- if (V8F16Special)
+ if (V6F16Special)
PreTFCRes = DAG.getNode(ISD::BITCAST, DL, MVT::v4f16, PreTFCRes);
if (!IsTexFail) {
@@ -4745,9 +5185,7 @@ static SDValue constructRetValue(SelectionDAG &DAG,
static bool parseTexFail(SDValue TexFailCtrl, SelectionDAG &DAG, SDValue *TFE,
SDValue *LWE, bool &IsTexFail) {
- auto TexFailCtrlConst = dyn_cast<ConstantSDNode>(TexFailCtrl.getNode());
- if (!TexFailCtrlConst)
- return false;
+ auto TexFailCtrlConst = cast<ConstantSDNode>(TexFailCtrl.getNode());
uint64_t Value = TexFailCtrlConst->getZExtValue();
if (Value) {
@@ -4774,7 +5212,10 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfo(Intr->Dim);
const AMDGPU::MIMGLZMappingInfo *LZMappingInfo =
AMDGPU::getMIMGLZMappingInfo(Intr->BaseOpcode);
+ const AMDGPU::MIMGMIPMappingInfo *MIPMappingInfo =
+ AMDGPU::getMIMGMIPMappingInfo(Intr->BaseOpcode);
unsigned IntrOpcode = Intr->BaseOpcode;
+ bool IsGFX10 = Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10;
SmallVector<EVT, 3> ResultTypes(Op->value_begin(), Op->value_end());
SmallVector<EVT, 3> OrigResultTypes(Op->value_begin(), Op->value_end());
@@ -4810,9 +5251,7 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
}
} else {
unsigned DMaskIdx = BaseOpcode->Store ? 3 : isa<MemSDNode>(Op) ? 2 : 1;
- auto DMaskConst = dyn_cast<ConstantSDNode>(Op.getOperand(DMaskIdx));
- if (!DMaskConst)
- return Op;
+ auto DMaskConst = cast<ConstantSDNode>(Op.getOperand(DMaskIdx));
DMask = DMaskConst->getZExtValue();
DMaskLanes = BaseOpcode->Gather4 ? 4 : countPopulation(DMask);
@@ -4821,8 +5260,7 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
MVT StoreVT = VData.getSimpleValueType();
if (StoreVT.getScalarType() == MVT::f16) {
- if (Subtarget->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS ||
- !BaseOpcode->HasD16)
+ if (!Subtarget->hasD16Images() || !BaseOpcode->HasD16)
return Op; // D16 is unsupported for this instruction
IsD16 = true;
@@ -4835,8 +5273,7 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
// and whether packing is supported.
MVT LoadVT = ResultTypes[0].getSimpleVT();
if (LoadVT.getScalarType() == MVT::f16) {
- if (Subtarget->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS ||
- !BaseOpcode->HasD16)
+ if (!Subtarget->hasD16Images() || !BaseOpcode->HasD16)
return Op; // D16 is unsupported for this instruction
IsD16 = true;
@@ -4878,6 +5315,17 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
}
}
+ // Optimize _mip away, when 'lod' is zero
+ if (MIPMappingInfo) {
+ if (auto ConstantLod =
+ dyn_cast<ConstantSDNode>(Op.getOperand(AddrIdx+NumVAddrs-1))) {
+ if (ConstantLod->isNullValue()) {
+ IntrOpcode = MIPMappingInfo->NONMIP; // set new opcode to variant without _mip
+ NumMIVAddrs--; // remove 'lod'
+ }
+ }
+ }
+
// Check for 16 bit addresses and pack if true.
unsigned DimIdx = AddrIdx + BaseOpcode->NumExtraArgs;
MVT VAddrVT = Op.getOperand(DimIdx).getSimpleValueType();
@@ -4915,7 +5363,22 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
VAddrs.push_back(Op.getOperand(AddrIdx + i));
}
- SDValue VAddr = getBuildDwordsVector(DAG, DL, VAddrs);
+ // If the register allocator cannot place the address registers contiguously
+ // without introducing moves, then using the non-sequential address encoding
+ // is always preferable, since it saves VALU instructions and is usually a
+ // wash in terms of code size or even better.
+ //
+ // However, we currently have no way of hinting to the register allocator that
+ // MIMG addresses should be placed contiguously when it is possible to do so,
+ // so force non-NSA for the common 2-address case as a heuristic.
+ //
+ // SIShrinkInstructions will convert NSA encodings to non-NSA after register
+ // allocation when possible.
+ bool UseNSA =
+ ST->hasFeature(AMDGPU::FeatureNSAEncoding) && VAddrs.size() >= 3;
+ SDValue VAddr;
+ if (!UseNSA)
+ VAddr = getBuildDwordsVector(DAG, DL, VAddrs);
SDValue True = DAG.getTargetConstant(1, DL, MVT::i1);
SDValue False = DAG.getTargetConstant(0, DL, MVT::i1);
@@ -4926,9 +5389,7 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
CtrlIdx = AddrIdx + NumVAddrs + 1;
} else {
auto UnormConst =
- dyn_cast<ConstantSDNode>(Op.getOperand(AddrIdx + NumVAddrs + 2));
- if (!UnormConst)
- return Op;
+ cast<ConstantSDNode>(Op.getOperand(AddrIdx + NumVAddrs + 2));
Unorm = UnormConst->getZExtValue() ? True : False;
CtrlIdx = AddrIdx + NumVAddrs + 3;
@@ -4965,9 +5426,6 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
return Undef;
}
- // Have to use a power of 2 number of dwords
- NumVDataDwords = 1 << Log2_32_Ceil(NumVDataDwords);
-
EVT NewVT = NumVDataDwords > 1 ?
EVT::getVectorVT(*DAG.getContext(), MVT::f32, NumVDataDwords)
: MVT::f32;
@@ -4983,45 +5441,66 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
SDValue GLC;
SDValue SLC;
+ SDValue DLC;
if (BaseOpcode->Atomic) {
GLC = True; // TODO no-return optimization
- if (!parseCachePolicy(Op.getOperand(CtrlIdx + 1), DAG, nullptr, &SLC))
+ if (!parseCachePolicy(Op.getOperand(CtrlIdx + 1), DAG, nullptr, &SLC,
+ IsGFX10 ? &DLC : nullptr))
return Op;
} else {
- if (!parseCachePolicy(Op.getOperand(CtrlIdx + 1), DAG, &GLC, &SLC))
+ if (!parseCachePolicy(Op.getOperand(CtrlIdx + 1), DAG, &GLC, &SLC,
+ IsGFX10 ? &DLC : nullptr))
return Op;
}
- SmallVector<SDValue, 14> Ops;
+ SmallVector<SDValue, 26> Ops;
if (BaseOpcode->Store || BaseOpcode->Atomic)
Ops.push_back(VData); // vdata
- Ops.push_back(VAddr);
+ if (UseNSA) {
+ for (const SDValue &Addr : VAddrs)
+ Ops.push_back(Addr);
+ } else {
+ Ops.push_back(VAddr);
+ }
Ops.push_back(Op.getOperand(AddrIdx + NumVAddrs)); // rsrc
if (BaseOpcode->Sampler)
Ops.push_back(Op.getOperand(AddrIdx + NumVAddrs + 1)); // sampler
Ops.push_back(DAG.getTargetConstant(DMask, DL, MVT::i32));
+ if (IsGFX10)
+ Ops.push_back(DAG.getTargetConstant(DimInfo->Encoding, DL, MVT::i32));
Ops.push_back(Unorm);
+ if (IsGFX10)
+ Ops.push_back(DLC);
Ops.push_back(GLC);
Ops.push_back(SLC);
Ops.push_back(IsA16 && // a16 or r128
ST->hasFeature(AMDGPU::FeatureR128A16) ? True : False);
Ops.push_back(TFE); // tfe
Ops.push_back(LWE); // lwe
- Ops.push_back(DimInfo->DA ? True : False);
+ if (!IsGFX10)
+ Ops.push_back(DimInfo->DA ? True : False);
if (BaseOpcode->HasD16)
Ops.push_back(IsD16 ? True : False);
if (isa<MemSDNode>(Op))
Ops.push_back(Op.getOperand(0)); // chain
- int NumVAddrDwords = VAddr.getValueType().getSizeInBits() / 32;
+ int NumVAddrDwords =
+ UseNSA ? VAddrs.size() : VAddr.getValueType().getSizeInBits() / 32;
int Opcode = -1;
- if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
- Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncGfx8,
- NumVDataDwords, NumVAddrDwords);
- if (Opcode == -1)
- Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncGfx6,
+ if (IsGFX10) {
+ Opcode = AMDGPU::getMIMGOpcode(IntrOpcode,
+ UseNSA ? AMDGPU::MIMGEncGfx10NSA
+ : AMDGPU::MIMGEncGfx10Default,
NumVDataDwords, NumVAddrDwords);
+ } else {
+ if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
+ Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncGfx8,
+ NumVDataDwords, NumVAddrDwords);
+ if (Opcode == -1)
+ Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncGfx6,
+ NumVDataDwords, NumVAddrDwords);
+ }
assert(Opcode != -1);
MachineSDNode *NewNode = DAG.getMachineNode(Opcode, DL, ResultTypes, Ops);
@@ -5046,7 +5525,7 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
}
SDValue SITargetLowering::lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc,
- SDValue Offset, SDValue GLC,
+ SDValue Offset, SDValue GLC, SDValue DLC,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
MachineMemOperand *MMO = MF.getMachineMemOperand(
@@ -5059,7 +5538,8 @@ SDValue SITargetLowering::lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc,
SDValue Ops[] = {
Rsrc,
Offset, // Offset
- GLC // glc
+ GLC,
+ DLC,
};
return DAG.getMemIntrinsicNode(AMDGPUISD::SBUFFER_LOAD, DL,
DAG.getVTList(VT), Ops, VT, MMO);
@@ -5263,16 +5743,18 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return loadInputValue(DAG, &AMDGPU::VGPR_32RegClass, MVT::i32,
SDLoc(DAG.getEntryNode()),
MFI->getArgInfo().WorkItemIDZ);
- case SIIntrinsic::SI_load_const: {
- SDValue Load =
- lowerSBuffer(MVT::i32, DL, Op.getOperand(1), Op.getOperand(2),
- DAG.getTargetConstant(0, DL, MVT::i1), DAG);
- return DAG.getNode(ISD::BITCAST, DL, MVT::f32, Load);
- }
+ case Intrinsic::amdgcn_wavefrontsize:
+ return DAG.getConstant(MF.getSubtarget<GCNSubtarget>().getWavefrontSize(),
+ SDLoc(Op), MVT::i32);
case Intrinsic::amdgcn_s_buffer_load: {
- unsigned Cache = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
- return lowerSBuffer(VT, DL, Op.getOperand(1), Op.getOperand(2),
- DAG.getTargetConstant(Cache & 1, DL, MVT::i1), DAG);
+ bool IsGFX10 = Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10;
+ SDValue GLC;
+ SDValue DLC = DAG.getTargetConstant(0, DL, MVT::i1);
+ if (!parseCachePolicy(Op.getOperand(3), DAG, &GLC, nullptr,
+ IsGFX10 ? &DLC : nullptr))
+ return Op;
+ return lowerSBuffer(VT, DL, Op.getOperand(1), Op.getOperand(2), GLC, DLC,
+ DAG);
}
case Intrinsic::amdgcn_fdiv_fast:
return lowerFDIV_FAST(Op, DAG);
@@ -5295,12 +5777,70 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
Op.getOperand(2), Op.getOperand(3), Op.getOperand(4),
Glue);
}
+ case Intrinsic::amdgcn_interp_p1_f16: {
+ SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(5));
+ SDValue Glue = M0.getValue(1);
+ if (getSubtarget()->getLDSBankCount() == 16) {
+ // 16 bank LDS
+ SDValue S = DAG.getNode(AMDGPUISD::INTERP_MOV, DL, MVT::f32,
+ DAG.getConstant(2, DL, MVT::i32), // P0
+ Op.getOperand(2), // Attrchan
+ Op.getOperand(3), // Attr
+ Glue);
+ SDValue Ops[] = {
+ Op.getOperand(1), // Src0
+ Op.getOperand(2), // Attrchan
+ Op.getOperand(3), // Attr
+ DAG.getConstant(0, DL, MVT::i32), // $src0_modifiers
+ S, // Src2 - holds two f16 values selected by high
+ DAG.getConstant(0, DL, MVT::i32), // $src2_modifiers
+ Op.getOperand(4), // high
+ DAG.getConstant(0, DL, MVT::i1), // $clamp
+ DAG.getConstant(0, DL, MVT::i32) // $omod
+ };
+ return DAG.getNode(AMDGPUISD::INTERP_P1LV_F16, DL, MVT::f32, Ops);
+ } else {
+ // 32 bank LDS
+ SDValue Ops[] = {
+ Op.getOperand(1), // Src0
+ Op.getOperand(2), // Attrchan
+ Op.getOperand(3), // Attr
+ DAG.getConstant(0, DL, MVT::i32), // $src0_modifiers
+ Op.getOperand(4), // high
+ DAG.getConstant(0, DL, MVT::i1), // $clamp
+ DAG.getConstant(0, DL, MVT::i32), // $omod
+ Glue
+ };
+ return DAG.getNode(AMDGPUISD::INTERP_P1LL_F16, DL, MVT::f32, Ops);
+ }
+ }
+ case Intrinsic::amdgcn_interp_p2_f16: {
+ SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(6));
+ SDValue Glue = SDValue(M0.getNode(), 1);
+ SDValue Ops[] = {
+ Op.getOperand(2), // Src0
+ Op.getOperand(3), // Attrchan
+ Op.getOperand(4), // Attr
+ DAG.getConstant(0, DL, MVT::i32), // $src0_modifiers
+ Op.getOperand(1), // Src2
+ DAG.getConstant(0, DL, MVT::i32), // $src2_modifiers
+ Op.getOperand(5), // high
+ DAG.getConstant(0, DL, MVT::i1), // $clamp
+ Glue
+ };
+ return DAG.getNode(AMDGPUISD::INTERP_P2_F16, DL, MVT::f16, Ops);
+ }
case Intrinsic::amdgcn_sin:
return DAG.getNode(AMDGPUISD::SIN_HW, DL, VT, Op.getOperand(1));
case Intrinsic::amdgcn_cos:
return DAG.getNode(AMDGPUISD::COS_HW, DL, VT, Op.getOperand(1));
+ case Intrinsic::amdgcn_mul_u24:
+ return DAG.getNode(AMDGPUISD::MUL_U24, DL, VT, Op.getOperand(1), Op.getOperand(2));
+ case Intrinsic::amdgcn_mul_i24:
+ return DAG.getNode(AMDGPUISD::MUL_I24, DL, VT, Op.getOperand(1), Op.getOperand(2));
+
case Intrinsic::amdgcn_log_clamp: {
if (Subtarget->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS)
return SDValue();
@@ -5334,10 +5874,7 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(AMDGPUISD::TRIG_PREOP, DL, VT,
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::amdgcn_div_scale: {
- // 3rd parameter required to be a constant.
- const ConstantSDNode *Param = dyn_cast<ConstantSDNode>(Op.getOperand(3));
- if (!Param)
- return DAG.getMergeValues({ DAG.getUNDEF(VT), DAG.getUNDEF(MVT::i1) }, DL);
+ const ConstantSDNode *Param = cast<ConstantSDNode>(Op.getOperand(3));
// Translate to the operands expected by the machine instruction. The
// first parameter must be the same as the first instruction.
@@ -5423,6 +5960,23 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::amdgcn_fmad_ftz:
return DAG.getNode(AMDGPUISD::FMAD_FTZ, DL, VT, Op.getOperand(1),
Op.getOperand(2), Op.getOperand(3));
+
+ case Intrinsic::amdgcn_if_break:
+ return SDValue(DAG.getMachineNode(AMDGPU::SI_IF_BREAK, DL, VT,
+ Op->getOperand(1), Op->getOperand(2)), 0);
+
+ case Intrinsic::amdgcn_groupstaticsize: {
+ Triple::OSType OS = getTargetMachine().getTargetTriple().getOS();
+ if (OS == Triple::AMDHSA || OS == Triple::AMDPAL)
+ return Op;
+
+ const Module *M = MF.getFunction().getParent();
+ const GlobalValue *GV =
+ M->getNamedValue(Intrinsic::getName(Intrinsic::amdgcn_groupstaticsize));
+ SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i32, 0,
+ SIInstrInfo::MO_ABS32_LO);
+ return {DAG.getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, GA), 0};
+ }
default:
if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
AMDGPU::getImageDimIntrinsicInfo(IntrinsicID))
@@ -5438,9 +5992,99 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
SDLoc DL(Op);
switch (IntrID) {
+ case Intrinsic::amdgcn_ds_ordered_add:
+ case Intrinsic::amdgcn_ds_ordered_swap: {
+ MemSDNode *M = cast<MemSDNode>(Op);
+ SDValue Chain = M->getOperand(0);
+ SDValue M0 = M->getOperand(2);
+ SDValue Value = M->getOperand(3);
+ unsigned IndexOperand = M->getConstantOperandVal(7);
+ unsigned WaveRelease = M->getConstantOperandVal(8);
+ unsigned WaveDone = M->getConstantOperandVal(9);
+ unsigned ShaderType;
+ unsigned Instruction;
+
+ unsigned OrderedCountIndex = IndexOperand & 0x3f;
+ IndexOperand &= ~0x3f;
+ unsigned CountDw = 0;
+
+ if (Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10) {
+ CountDw = (IndexOperand >> 24) & 0xf;
+ IndexOperand &= ~(0xf << 24);
+
+ if (CountDw < 1 || CountDw > 4) {
+ report_fatal_error(
+ "ds_ordered_count: dword count must be between 1 and 4");
+ }
+ }
+
+ if (IndexOperand)
+ report_fatal_error("ds_ordered_count: bad index operand");
+
+ switch (IntrID) {
+ case Intrinsic::amdgcn_ds_ordered_add:
+ Instruction = 0;
+ break;
+ case Intrinsic::amdgcn_ds_ordered_swap:
+ Instruction = 1;
+ break;
+ }
+
+ if (WaveDone && !WaveRelease)
+ report_fatal_error("ds_ordered_count: wave_done requires wave_release");
+
+ switch (DAG.getMachineFunction().getFunction().getCallingConv()) {
+ case CallingConv::AMDGPU_CS:
+ case CallingConv::AMDGPU_KERNEL:
+ ShaderType = 0;
+ break;
+ case CallingConv::AMDGPU_PS:
+ ShaderType = 1;
+ break;
+ case CallingConv::AMDGPU_VS:
+ ShaderType = 2;
+ break;
+ case CallingConv::AMDGPU_GS:
+ ShaderType = 3;
+ break;
+ default:
+ report_fatal_error("ds_ordered_count unsupported for this calling conv");
+ }
+
+ unsigned Offset0 = OrderedCountIndex << 2;
+ unsigned Offset1 = WaveRelease | (WaveDone << 1) | (ShaderType << 2) |
+ (Instruction << 4);
+
+ if (Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10)
+ Offset1 |= (CountDw - 1) << 6;
+
+ unsigned Offset = Offset0 | (Offset1 << 8);
+
+ SDValue Ops[] = {
+ Chain,
+ Value,
+ DAG.getTargetConstant(Offset, DL, MVT::i16),
+ copyToM0(DAG, Chain, DL, M0).getValue(1), // Glue
+ };
+ return DAG.getMemIntrinsicNode(AMDGPUISD::DS_ORDERED_COUNT, DL,
+ M->getVTList(), Ops, M->getMemoryVT(),
+ M->getMemOperand());
+ }
+ case Intrinsic::amdgcn_ds_fadd: {
+ MemSDNode *M = cast<MemSDNode>(Op);
+ unsigned Opc;
+ switch (IntrID) {
+ case Intrinsic::amdgcn_ds_fadd:
+ Opc = ISD::ATOMIC_LOAD_FADD;
+ break;
+ }
+
+ return DAG.getAtomic(Opc, SDLoc(Op), M->getMemoryVT(),
+ M->getOperand(0), M->getOperand(2), M->getOperand(3),
+ M->getMemOperand());
+ }
case Intrinsic::amdgcn_atomic_inc:
case Intrinsic::amdgcn_atomic_dec:
- case Intrinsic::amdgcn_ds_fadd:
case Intrinsic::amdgcn_ds_fmin:
case Intrinsic::amdgcn_ds_fmax: {
MemSDNode *M = cast<MemSDNode>(Op);
@@ -5452,9 +6096,6 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
case Intrinsic::amdgcn_atomic_dec:
Opc = AMDGPUISD::ATOMIC_DEC;
break;
- case Intrinsic::amdgcn_ds_fadd:
- Opc = AMDGPUISD::ATOMIC_LOAD_FADD;
- break;
case Intrinsic::amdgcn_ds_fmin:
Opc = AMDGPUISD::ATOMIC_LOAD_FMIN;
break;
@@ -5503,8 +6144,14 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
if (LoadVT.getScalarType() == MVT::f16)
return adjustLoadValueType(AMDGPUISD::BUFFER_LOAD_FORMAT_D16,
M, DAG, Ops);
- return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops, IntVT,
- M->getMemOperand());
+
+ // Handle BUFFER_LOAD_BYTE/UBYTE/SHORT/USHORT overloaded intrinsics
+ if (LoadVT.getScalarType() == MVT::i8 ||
+ LoadVT.getScalarType() == MVT::i16)
+ return handleByteShortBufferLoads(DAG, LoadVT, DL, Ops, M);
+
+ return getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops, IntVT,
+ M->getMemOperand(), DAG);
}
case Intrinsic::amdgcn_raw_buffer_load:
case Intrinsic::amdgcn_raw_buffer_load_format: {
@@ -5531,8 +6178,14 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
if (LoadVT.getScalarType() == MVT::f16)
return adjustLoadValueType(AMDGPUISD::BUFFER_LOAD_FORMAT_D16,
M, DAG, Ops);
- return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops, IntVT,
- M->getMemOperand());
+
+ // Handle BUFFER_LOAD_BYTE/UBYTE/SHORT/USHORT overloaded intrinsics
+ if (LoadVT.getScalarType() == MVT::i8 ||
+ LoadVT.getScalarType() == MVT::i16)
+ return handleByteShortBufferLoads(DAG, LoadVT, DL, Ops, M);
+
+ return getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops, IntVT,
+ M->getMemOperand(), DAG);
}
case Intrinsic::amdgcn_struct_buffer_load:
case Intrinsic::amdgcn_struct_buffer_load_format: {
@@ -5559,8 +6212,14 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
if (LoadVT.getScalarType() == MVT::f16)
return adjustLoadValueType(AMDGPUISD::BUFFER_LOAD_FORMAT_D16,
M, DAG, Ops);
- return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops, IntVT,
- M->getMemOperand());
+
+ // Handle BUFFER_LOAD_BYTE/UBYTE/SHORT/USHORT overloaded intrinsics
+ if (LoadVT.getScalarType() == MVT::i8 ||
+ LoadVT.getScalarType() == MVT::i16)
+ return handleByteShortBufferLoads(DAG, LoadVT, DL, Ops, M);
+
+ return getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops, IntVT,
+ M->getMemOperand(), DAG);
}
case Intrinsic::amdgcn_tbuffer_load: {
MemSDNode *M = cast<MemSDNode>(Op);
@@ -5588,9 +6247,9 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
if (LoadVT.getScalarType() == MVT::f16)
return adjustLoadValueType(AMDGPUISD::TBUFFER_LOAD_FORMAT_D16,
M, DAG, Ops);
- return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_LOAD_FORMAT, DL,
- Op->getVTList(), Ops, LoadVT,
- M->getMemOperand());
+ return getMemIntrinsicNode(AMDGPUISD::TBUFFER_LOAD_FORMAT, DL,
+ Op->getVTList(), Ops, LoadVT, M->getMemOperand(),
+ DAG);
}
case Intrinsic::amdgcn_raw_tbuffer_load: {
MemSDNode *M = cast<MemSDNode>(Op);
@@ -5612,9 +6271,9 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
if (LoadVT.getScalarType() == MVT::f16)
return adjustLoadValueType(AMDGPUISD::TBUFFER_LOAD_FORMAT_D16,
M, DAG, Ops);
- return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_LOAD_FORMAT, DL,
- Op->getVTList(), Ops, LoadVT,
- M->getMemOperand());
+ return getMemIntrinsicNode(AMDGPUISD::TBUFFER_LOAD_FORMAT, DL,
+ Op->getVTList(), Ops, LoadVT, M->getMemOperand(),
+ DAG);
}
case Intrinsic::amdgcn_struct_tbuffer_load: {
MemSDNode *M = cast<MemSDNode>(Op);
@@ -5636,9 +6295,9 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
if (LoadVT.getScalarType() == MVT::f16)
return adjustLoadValueType(AMDGPUISD::TBUFFER_LOAD_FORMAT_D16,
M, DAG, Ops);
- return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_LOAD_FORMAT, DL,
- Op->getVTList(), Ops, LoadVT,
- M->getMemOperand());
+ return getMemIntrinsicNode(AMDGPUISD::TBUFFER_LOAD_FORMAT, DL,
+ Op->getVTList(), Ops, LoadVT, M->getMemOperand(),
+ DAG);
}
case Intrinsic::amdgcn_buffer_atomic_swap:
case Intrinsic::amdgcn_buffer_atomic_add:
@@ -5913,6 +6572,39 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
}
}
+// Call DAG.getMemIntrinsicNode for a load, but first widen a dwordx3 type to
+// dwordx4 if on SI.
+SDValue SITargetLowering::getMemIntrinsicNode(unsigned Opcode, const SDLoc &DL,
+ SDVTList VTList,
+ ArrayRef<SDValue> Ops, EVT MemVT,
+ MachineMemOperand *MMO,
+ SelectionDAG &DAG) const {
+ EVT VT = VTList.VTs[0];
+ EVT WidenedVT = VT;
+ EVT WidenedMemVT = MemVT;
+ if (!Subtarget->hasDwordx3LoadStores() &&
+ (WidenedVT == MVT::v3i32 || WidenedVT == MVT::v3f32)) {
+ WidenedVT = EVT::getVectorVT(*DAG.getContext(),
+ WidenedVT.getVectorElementType(), 4);
+ WidenedMemVT = EVT::getVectorVT(*DAG.getContext(),
+ WidenedMemVT.getVectorElementType(), 4);
+ MMO = DAG.getMachineFunction().getMachineMemOperand(MMO, 0, 16);
+ }
+
+ assert(VTList.NumVTs == 2);
+ SDVTList WidenedVTList = DAG.getVTList(WidenedVT, VTList.VTs[1]);
+
+ auto NewOp = DAG.getMemIntrinsicNode(Opcode, DL, WidenedVTList, Ops,
+ WidenedMemVT, MMO);
+ if (WidenedVT != VT) {
+ auto Extract = DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, DL, VT, NewOp,
+ DAG.getConstant(0, DL, getVectorIdxTy(DAG.getDataLayout())));
+ NewOp = DAG.getMergeValues({ Extract, SDValue(NewOp.getNode(), 1) }, DL);
+ }
+ return NewOp;
+}
+
SDValue SITargetLowering::handleD16VData(SDValue VData,
SelectionDAG &DAG) const {
EVT StoreVT = VData.getValueType();
@@ -6129,6 +6821,12 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
AMDGPUISD::BUFFER_STORE : AMDGPUISD::BUFFER_STORE_FORMAT;
Opc = IsD16 ? AMDGPUISD::BUFFER_STORE_FORMAT_D16 : Opc;
MemSDNode *M = cast<MemSDNode>(Op);
+
+ // Handle BUFFER_STORE_BYTE/SHORT overloaded intrinsics
+ EVT VDataType = VData.getValueType().getScalarType();
+ if (VDataType == MVT::i8 || VDataType == MVT::i16)
+ return handleByteShortBufferStores(DAG, VDataType, DL, Ops, M);
+
return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops,
M->getMemoryVT(), M->getMemOperand());
}
@@ -6155,6 +6853,12 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
AMDGPUISD::BUFFER_STORE : AMDGPUISD::BUFFER_STORE_FORMAT;
Opc = IsD16 ? AMDGPUISD::BUFFER_STORE_FORMAT_D16 : Opc;
MemSDNode *M = cast<MemSDNode>(Op);
+
+ // Handle BUFFER_STORE_BYTE/SHORT overloaded intrinsics
+ EVT VDataType = VData.getValueType().getScalarType();
+ if (VDataType == MVT::i8 || VDataType == MVT::i16)
+ return handleByteShortBufferStores(DAG, VDataType, DL, Ops, M);
+
return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops,
M->getMemoryVT(), M->getMemOperand());
}
@@ -6181,10 +6885,63 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
AMDGPUISD::BUFFER_STORE : AMDGPUISD::BUFFER_STORE_FORMAT;
Opc = IsD16 ? AMDGPUISD::BUFFER_STORE_FORMAT_D16 : Opc;
MemSDNode *M = cast<MemSDNode>(Op);
+
+ // Handle BUFFER_STORE_BYTE/SHORT overloaded intrinsics
+ EVT VDataType = VData.getValueType().getScalarType();
+ if (VDataType == MVT::i8 || VDataType == MVT::i16)
+ return handleByteShortBufferStores(DAG, VDataType, DL, Ops, M);
+
return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops,
M->getMemoryVT(), M->getMemOperand());
}
+ case Intrinsic::amdgcn_buffer_atomic_fadd: {
+ unsigned Slc = cast<ConstantSDNode>(Op.getOperand(6))->getZExtValue();
+ unsigned IdxEn = 1;
+ if (auto Idx = dyn_cast<ConstantSDNode>(Op.getOperand(4)))
+ IdxEn = Idx->getZExtValue() != 0;
+ SDValue Ops[] = {
+ Chain,
+ Op.getOperand(2), // vdata
+ Op.getOperand(3), // rsrc
+ Op.getOperand(4), // vindex
+ SDValue(), // voffset -- will be set by setBufferOffsets
+ SDValue(), // soffset -- will be set by setBufferOffsets
+ SDValue(), // offset -- will be set by setBufferOffsets
+ DAG.getConstant(Slc << 1, DL, MVT::i32), // cachepolicy
+ DAG.getConstant(IdxEn, DL, MVT::i1), // idxen
+ };
+ setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]);
+ EVT VT = Op.getOperand(2).getValueType();
+
+ auto *M = cast<MemSDNode>(Op);
+ unsigned Opcode = VT.isVector() ? AMDGPUISD::BUFFER_ATOMIC_PK_FADD
+ : AMDGPUISD::BUFFER_ATOMIC_FADD;
+
+ return DAG.getMemIntrinsicNode(Opcode, DL, Op->getVTList(), Ops, VT,
+ M->getMemOperand());
+ }
+
+ case Intrinsic::amdgcn_global_atomic_fadd: {
+ SDValue Ops[] = {
+ Chain,
+ Op.getOperand(2), // ptr
+ Op.getOperand(3) // vdata
+ };
+ EVT VT = Op.getOperand(3).getValueType();
+
+ auto *M = cast<MemSDNode>(Op);
+ unsigned Opcode = VT.isVector() ? AMDGPUISD::ATOMIC_PK_FADD
+ : AMDGPUISD::ATOMIC_FADD;
+
+ return DAG.getMemIntrinsicNode(Opcode, DL, Op->getVTList(), Ops, VT,
+ M->getMemOperand());
+ }
+
+ case Intrinsic::amdgcn_end_cf:
+ return SDValue(DAG.getMachineNode(AMDGPU::SI_END_CF, DL, MVT::Other,
+ Op->getOperand(2), Chain), 0);
+
default: {
if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
AMDGPU::getImageDimIntrinsicInfo(IntrinsicID))
@@ -6283,6 +7040,38 @@ void SITargetLowering::setBufferOffsets(SDValue CombinedOffset,
Offsets[2] = DAG.getConstant(0, DL, MVT::i32);
}
+// Handle 8 bit and 16 bit buffer loads
+SDValue SITargetLowering::handleByteShortBufferLoads(SelectionDAG &DAG,
+ EVT LoadVT, SDLoc DL,
+ ArrayRef<SDValue> Ops,
+ MemSDNode *M) const {
+ EVT IntVT = LoadVT.changeTypeToInteger();
+ unsigned Opc = (LoadVT.getScalarType() == MVT::i8) ?
+ AMDGPUISD::BUFFER_LOAD_UBYTE : AMDGPUISD::BUFFER_LOAD_USHORT;
+
+ SDVTList ResList = DAG.getVTList(MVT::i32, MVT::Other);
+ SDValue BufferLoad = DAG.getMemIntrinsicNode(Opc, DL, ResList,
+ Ops, IntVT,
+ M->getMemOperand());
+ SDValue BufferLoadTrunc = DAG.getNode(ISD::TRUNCATE, DL,
+ LoadVT.getScalarType(), BufferLoad);
+ return DAG.getMergeValues({BufferLoadTrunc, BufferLoad.getValue(1)}, DL);
+}
+
+// Handle 8 bit and 16 bit buffer stores
+SDValue SITargetLowering::handleByteShortBufferStores(SelectionDAG &DAG,
+ EVT VDataType, SDLoc DL,
+ SDValue Ops[],
+ MemSDNode *M) const {
+ SDValue BufferStoreExt = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Ops[1]);
+ Ops[1] = BufferStoreExt;
+ unsigned Opc = (VDataType == MVT::i8) ? AMDGPUISD::BUFFER_STORE_BYTE :
+ AMDGPUISD::BUFFER_STORE_SHORT;
+ ArrayRef<SDValue> OpsRef = makeArrayRef(&Ops[0], 9);
+ return DAG.getMemIntrinsicNode(Opc, DL, M->getVTList(), OpsRef, VDataType,
+ M->getMemOperand());
+}
+
static SDValue getLoadExtOrTrunc(SelectionDAG &DAG,
ISD::LoadExtType ExtType, SDValue Op,
const SDLoc &SL, EVT VT) {
@@ -6395,8 +7184,25 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
SDValue NewLD = DAG.getExtLoad(ISD::EXTLOAD, DL, MVT::i32, Chain,
BasePtr, RealMemVT, MMO);
+ if (!MemVT.isVector()) {
+ SDValue Ops[] = {
+ DAG.getNode(ISD::TRUNCATE, DL, MemVT, NewLD),
+ NewLD.getValue(1)
+ };
+
+ return DAG.getMergeValues(Ops, DL);
+ }
+
+ SmallVector<SDValue, 3> Elts;
+ for (unsigned I = 0, N = MemVT.getVectorNumElements(); I != N; ++I) {
+ SDValue Elt = DAG.getNode(ISD::SRL, DL, MVT::i32, NewLD,
+ DAG.getConstant(I, DL, MVT::i32));
+
+ Elts.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Elt));
+ }
+
SDValue Ops[] = {
- DAG.getNode(ISD::TRUNCATE, DL, MemVT, NewLD),
+ DAG.getBuildVector(MemVT, DL, Elts),
NewLD.getValue(1)
};
@@ -6409,15 +7215,21 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
assert(Op.getValueType().getVectorElementType() == MVT::i32 &&
"Custom lowering for non-i32 vectors hasn't been implemented.");
- unsigned Alignment = Load->getAlignment();
- unsigned AS = Load->getAddressSpace();
if (!allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
- AS, Alignment)) {
+ *Load->getMemOperand())) {
SDValue Ops[2];
std::tie(Ops[0], Ops[1]) = expandUnalignedLoad(Load, DAG);
return DAG.getMergeValues(Ops, DL);
}
+ unsigned Alignment = Load->getAlignment();
+ unsigned AS = Load->getAddressSpace();
+ if (Subtarget->hasLDSMisalignedBug() &&
+ AS == AMDGPUAS::FLAT_ADDRESS &&
+ Alignment < MemVT.getStoreSize() && MemVT.getSizeInBits() > 32) {
+ return SplitVectorLoad(Op, DAG);
+ }
+
MachineFunction &MF = DAG.getMachineFunction();
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
// If there is a possibilty that flat instruction access scratch memory
@@ -6430,8 +7242,13 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
if (AS == AMDGPUAS::CONSTANT_ADDRESS ||
AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT) {
- if (!Op->isDivergent() && Alignment >= 4 && NumElements < 32)
- return SDValue();
+ if (!Op->isDivergent() && Alignment >= 4 && NumElements < 32) {
+ if (MemVT.isPow2VectorType())
+ return SDValue();
+ if (NumElements == 3)
+ return WidenVectorLoad(Op, DAG);
+ return SplitVectorLoad(Op, DAG);
+ }
// Non-uniform loads will be selected to MUBUF instructions, so they
// have the same legalization requirements as global and private
// loads.
@@ -6443,8 +7260,13 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
AS == AMDGPUAS::GLOBAL_ADDRESS) {
if (Subtarget->getScalarizeGlobalBehavior() && !Op->isDivergent() &&
!Load->isVolatile() && isMemOpHasNoClobberedMemOperand(Load) &&
- Alignment >= 4 && NumElements < 32)
- return SDValue();
+ Alignment >= 4 && NumElements < 32) {
+ if (MemVT.isPow2VectorType())
+ return SDValue();
+ if (NumElements == 3)
+ return WidenVectorLoad(Op, DAG);
+ return SplitVectorLoad(Op, DAG);
+ }
// Non-uniform loads will be selected to MUBUF instructions, so they
// have the same legalization requirements as global and private
// loads.
@@ -6456,7 +7278,10 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
AS == AMDGPUAS::FLAT_ADDRESS) {
if (NumElements > 4)
return SplitVectorLoad(Op, DAG);
- // v4 loads are supported for private and global memory.
+ // v3 loads not supported on SI.
+ if (NumElements == 3 && !Subtarget->hasDwordx3LoadStores())
+ return WidenVectorLoad(Op, DAG);
+ // v3 and v4 loads are supported for private and global memory.
return SDValue();
}
if (AS == AMDGPUAS::PRIVATE_ADDRESS) {
@@ -6474,11 +7299,14 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
// Same as global/flat
if (NumElements > 4)
return SplitVectorLoad(Op, DAG);
+ // v3 loads not supported on SI.
+ if (NumElements == 3 && !Subtarget->hasDwordx3LoadStores())
+ return WidenVectorLoad(Op, DAG);
return SDValue();
default:
llvm_unreachable("unsupported private_element_size");
}
- } else if (AS == AMDGPUAS::LOCAL_ADDRESS) {
+ } else if (AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS) {
// Use ds_read_b128 if possible.
if (Subtarget->useDS128() && Load->getAlignment() >= 16 &&
MemVT.getStoreSize() == 16)
@@ -6794,7 +7622,7 @@ SDValue SITargetLowering::LowerFDIV64(SDValue Op, SelectionDAG &DAG) const {
SDValue Scale;
- if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS) {
+ if (!Subtarget->hasUsableDivScaleConditionOutput()) {
// Workaround a hardware bug on SI where the condition output from div_scale
// is not usable.
@@ -6856,12 +7684,18 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
assert(VT.isVector() &&
Store->getValue().getValueType().getScalarType() == MVT::i32);
- unsigned AS = Store->getAddressSpace();
if (!allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
- AS, Store->getAlignment())) {
+ *Store->getMemOperand())) {
return expandUnalignedStore(Store, DAG);
}
+ unsigned AS = Store->getAddressSpace();
+ if (Subtarget->hasLDSMisalignedBug() &&
+ AS == AMDGPUAS::FLAT_ADDRESS &&
+ Store->getAlignment() < VT.getStoreSize() && VT.getSizeInBits() > 32) {
+ return SplitVectorStore(Op, DAG);
+ }
+
MachineFunction &MF = DAG.getMachineFunction();
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
// If there is a possibilty that flat instruction access scratch memory
@@ -6875,6 +7709,9 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
AS == AMDGPUAS::FLAT_ADDRESS) {
if (NumElements > 4)
return SplitVectorStore(Op, DAG);
+ // v3 stores not supported on SI.
+ if (NumElements == 3 && !Subtarget->hasDwordx3LoadStores())
+ return SplitVectorStore(Op, DAG);
return SDValue();
} else if (AS == AMDGPUAS::PRIVATE_ADDRESS) {
switch (Subtarget->getMaxPrivateElementSize()) {
@@ -6885,16 +7722,16 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
return SplitVectorStore(Op, DAG);
return SDValue();
case 16:
- if (NumElements > 4)
+ if (NumElements > 4 || NumElements == 3)
return SplitVectorStore(Op, DAG);
return SDValue();
default:
llvm_unreachable("unsupported private_element_size");
}
- } else if (AS == AMDGPUAS::LOCAL_ADDRESS) {
+ } else if (AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS) {
// Use ds_write_b128 if possible.
if (Subtarget->useDS128() && Store->getAlignment() >= 16 &&
- VT.getStoreSize() == 16)
+ VT.getStoreSize() == 16 && NumElements != 3)
return SDValue();
if (NumElements > 2)
@@ -6905,7 +7742,7 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
// out-of-bounds even if base + offsets is in bounds. Split vectorized
// stores here to avoid emitting ds_write2_b32. We may re-combine the
// store later in the SILoadStoreOptimizer.
- if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS &&
+ if (!Subtarget->hasUsableDSOffset() &&
NumElements == 2 && VT.getStoreSize() == 8 &&
Store->getAlignment() < 8) {
return SplitVectorStore(Op, DAG);
@@ -7614,6 +8451,43 @@ SDValue SITargetLowering::performZeroExtendCombine(SDNode *N,
return SDValue();
}
+SDValue SITargetLowering::performSignExtendInRegCombine(SDNode *N,
+ DAGCombinerInfo &DCI)
+ const {
+ SDValue Src = N->getOperand(0);
+ auto *VTSign = cast<VTSDNode>(N->getOperand(1));
+
+ if (((Src.getOpcode() == AMDGPUISD::BUFFER_LOAD_UBYTE &&
+ VTSign->getVT() == MVT::i8) ||
+ (Src.getOpcode() == AMDGPUISD::BUFFER_LOAD_USHORT &&
+ VTSign->getVT() == MVT::i16)) &&
+ Src.hasOneUse()) {
+ auto *M = cast<MemSDNode>(Src);
+ SDValue Ops[] = {
+ Src.getOperand(0), // Chain
+ Src.getOperand(1), // rsrc
+ Src.getOperand(2), // vindex
+ Src.getOperand(3), // voffset
+ Src.getOperand(4), // soffset
+ Src.getOperand(5), // offset
+ Src.getOperand(6),
+ Src.getOperand(7)
+ };
+ // replace with BUFFER_LOAD_BYTE/SHORT
+ SDVTList ResList = DCI.DAG.getVTList(MVT::i32,
+ Src.getOperand(0).getValueType());
+ unsigned Opc = (Src.getOpcode() == AMDGPUISD::BUFFER_LOAD_UBYTE) ?
+ AMDGPUISD::BUFFER_LOAD_BYTE : AMDGPUISD::BUFFER_LOAD_SHORT;
+ SDValue BufferLoadSignExt = DCI.DAG.getMemIntrinsicNode(Opc, SDLoc(N),
+ ResList,
+ Ops, M->getMemoryVT(),
+ M->getMemOperand());
+ return DCI.DAG.getMergeValues({BufferLoadSignExt,
+ BufferLoadSignExt.getValue(1)}, SDLoc(N));
+ }
+ return SDValue();
+}
+
SDValue SITargetLowering::performClassCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -8013,9 +8887,12 @@ SDValue SITargetLowering::performFPMed3ImmCombine(SelectionDAG &DAG,
if (Cmp == APFloat::cmpGreaterThan)
return SDValue();
+ const MachineFunction &MF = DAG.getMachineFunction();
+ const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+
// TODO: Check IEEE bit enabled?
EVT VT = Op0.getValueType();
- if (Subtarget->enableDX10Clamp()) {
+ if (Info->getMode().DX10Clamp) {
// If dx10_clamp is enabled, NaNs clamp to 0.0. This is the same as the
// hardware fmed3 behavior converting to a min.
// FIXME: Should this be allowing -0.0?
@@ -8059,10 +8936,10 @@ SDValue SITargetLowering::performMinMaxCombine(SDNode *N,
// Only do this if the inner op has one use since this will just increases
// register pressure for no benefit.
-
if (Opc != AMDGPUISD::FMIN_LEGACY && Opc != AMDGPUISD::FMAX_LEGACY &&
- !VT.isVector() && VT != MVT::f64 &&
- ((VT != MVT::f16 && VT != MVT::i16) || Subtarget->hasMin3Max3_16())) {
+ !VT.isVector() &&
+ (VT == MVT::i32 || VT == MVT::f32 ||
+ ((VT == MVT::f16 || VT == MVT::i16) && Subtarget->hasMin3Max3_16()))) {
// max(max(a, b), c) -> max3(a, b, c)
// min(min(a, b), c) -> min3(a, b, c)
if (Op0.getOpcode() == Opc && Op0.hasOneUse()) {
@@ -8149,9 +9026,12 @@ SDValue SITargetLowering::performFMed3Combine(SDNode *N,
return DAG.getNode(AMDGPUISD::CLAMP, SL, VT, Src2);
}
+ const MachineFunction &MF = DAG.getMachineFunction();
+ const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+
// FIXME: dx10_clamp behavior assumed in instcombine. Should we really bother
// handling no dx10-clamp?
- if (Subtarget->enableDX10Clamp()) {
+ if (Info->getMode().DX10Clamp) {
// If NaNs is clamped to 0, we are free to reorder the inputs.
if (isa<ConstantFPSDNode>(Src0) && !isa<ConstantFPSDNode>(Src1))
@@ -8342,8 +9222,10 @@ unsigned SITargetLowering::getFusedOpcode(const SelectionDAG &DAG,
// Only do this if we are not trying to support denormals. v_mad_f32 does not
// support denormals ever.
- if ((VT == MVT::f32 && !Subtarget->hasFP32Denormals()) ||
- (VT == MVT::f16 && !Subtarget->hasFP16Denormals()))
+ if (((VT == MVT::f32 && !Subtarget->hasFP32Denormals()) ||
+ (VT == MVT::f16 && !Subtarget->hasFP16Denormals() &&
+ getSubtarget()->hasMadF16())) &&
+ isOperationLegal(ISD::FMAD, VT))
return ISD::FMAD;
const TargetOptions &Options = DAG.getTarget().Options;
@@ -8357,6 +9239,46 @@ unsigned SITargetLowering::getFusedOpcode(const SelectionDAG &DAG,
return 0;
}
+// For a reassociatable opcode perform:
+// op x, (op y, z) -> op (op x, z), y, if x and z are uniform
+SDValue SITargetLowering::reassociateScalarOps(SDNode *N,
+ SelectionDAG &DAG) const {
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::i32 && VT != MVT::i64)
+ return SDValue();
+
+ unsigned Opc = N->getOpcode();
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+
+ if (!(Op0->isDivergent() ^ Op1->isDivergent()))
+ return SDValue();
+
+ if (Op0->isDivergent())
+ std::swap(Op0, Op1);
+
+ if (Op1.getOpcode() != Opc || !Op1.hasOneUse())
+ return SDValue();
+
+ SDValue Op2 = Op1.getOperand(1);
+ Op1 = Op1.getOperand(0);
+ if (!(Op1->isDivergent() ^ Op2->isDivergent()))
+ return SDValue();
+
+ if (Op1->isDivergent())
+ std::swap(Op1, Op2);
+
+ // If either operand is constant this will conflict with
+ // DAGCombiner::ReassociateOps().
+ if (DAG.isConstantIntBuildVectorOrConstantInt(Op0) ||
+ DAG.isConstantIntBuildVectorOrConstantInt(Op1))
+ return SDValue();
+
+ SDLoc SL(N);
+ SDValue Add1 = DAG.getNode(Opc, SL, VT, Op0, Op1);
+ return DAG.getNode(Opc, SL, VT, Add1, Op2);
+}
+
static SDValue getMad64_32(SelectionDAG &DAG, const SDLoc &SL,
EVT VT,
SDValue N0, SDValue N1, SDValue N2,
@@ -8405,6 +9327,10 @@ SDValue SITargetLowering::performAddCombine(SDNode *N,
return SDValue();
}
+ if (SDValue V = reassociateScalarOps(N, DAG)) {
+ return V;
+ }
+
if (VT != MVT::i32 || !DCI.isAfterLegalizeDAG())
return SDValue();
@@ -8452,14 +9378,10 @@ SDValue SITargetLowering::performSubCombine(SDNode *N,
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
- unsigned Opc = LHS.getOpcode();
- if (Opc != ISD::SUBCARRY)
- std::swap(RHS, LHS);
-
if (LHS.getOpcode() == ISD::SUBCARRY) {
// sub (subcarry x, 0, cc), y => subcarry x, y, cc
auto C = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
- if (!C || C->getZExtValue() != 0)
+ if (!C || !C->isNullValue())
return SDValue();
SDValue Args[] = { LHS.getOperand(0), RHS, LHS.getOperand(2) };
return DAG.getNode(ISD::SUBCARRY, SDLoc(N), LHS->getVTList(), Args);
@@ -8587,7 +9509,7 @@ SDValue SITargetLowering::performFMACombine(SDNode *N,
EVT VT = N->getValueType(0);
SDLoc SL(N);
- if (!Subtarget->hasDotInsts() || VT != MVT::f32)
+ if (!Subtarget->hasDot2Insts() || VT != MVT::f32)
return SDValue();
// FMA((F32)S0.x, (F32)S1. x, FMA((F32)S0.y, (F32)S1.y, (F32)z)) ->
@@ -8801,11 +9723,13 @@ SDValue SITargetLowering::performClampCombine(SDNode *N,
if (!CSrc)
return SDValue();
+ const MachineFunction &MF = DCI.DAG.getMachineFunction();
const APFloat &F = CSrc->getValueAPF();
APFloat Zero = APFloat::getZero(F.getSemantics());
APFloat::cmpResult Cmp0 = F.compare(Zero);
if (Cmp0 == APFloat::cmpLessThan ||
- (Cmp0 == APFloat::cmpUnordered && Subtarget->enableDX10Clamp())) {
+ (Cmp0 == APFloat::cmpUnordered &&
+ MF.getInfo<SIMachineFunctionInfo>()->getMode().DX10Clamp)) {
return DCI.DAG.getConstantFP(Zero, SDLoc(N), N->getValueType(0));
}
@@ -8822,7 +9746,6 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
return SDValue();
-
switch (N->getOpcode()) {
default:
return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
@@ -8873,11 +9796,11 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
case ISD::ATOMIC_LOAD_MAX:
case ISD::ATOMIC_LOAD_UMIN:
case ISD::ATOMIC_LOAD_UMAX:
+ case ISD::ATOMIC_LOAD_FADD:
case AMDGPUISD::ATOMIC_INC:
case AMDGPUISD::ATOMIC_DEC:
- case AMDGPUISD::ATOMIC_LOAD_FADD:
case AMDGPUISD::ATOMIC_LOAD_FMIN:
- case AMDGPUISD::ATOMIC_LOAD_FMAX: // TODO: Target mem intrinsics.
+ case AMDGPUISD::ATOMIC_LOAD_FMAX: // TODO: Target mem intrinsics.
if (DCI.isBeforeLegalize())
break;
return performMemSDNodeCombine(cast<MemSDNode>(N), DCI);
@@ -8889,6 +9812,8 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
return performXorCombine(N, DCI);
case ISD::ZERO_EXTEND:
return performZeroExtendCombine(N, DCI);
+ case ISD::SIGN_EXTEND_INREG:
+ return performSignExtendInRegCombine(N , DCI);
case AMDGPUISD::FP_CLASS:
return performClassCombine(N, DCI);
case ISD::FCANONICALIZE:
@@ -9034,6 +9959,10 @@ SDNode *SITargetLowering::adjustWritemask(MachineSDNode *&Node,
// Don't allow 0 dmask, as hardware assumes one channel enabled.
bool NoChannels = !NewDmask;
if (NoChannels) {
+ if (!UsesTFC) {
+ // No uses of the result and not using TFC. Then do nothing.
+ return Node;
+ }
// If the original dmask has one channel - then nothing to do
if (OldBitsSet == 1)
return Node;
@@ -9205,7 +10134,8 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
break;
MVT VT = Src0.getValueType().getSimpleVT();
- const TargetRegisterClass *RC = getRegClassFor(VT);
+ const TargetRegisterClass *RC =
+ getRegClassFor(VT, Src0.getNode()->isDivergent());
MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
SDValue UndefReg = DAG.getRegister(MRI.createVirtualRegister(RC), VT);
@@ -9238,6 +10168,24 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
Ops.push_back(ImpDef.getValue(1));
return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
}
+ case AMDGPU::V_PERMLANE16_B32:
+ case AMDGPU::V_PERMLANEX16_B32: {
+ ConstantSDNode *FI = cast<ConstantSDNode>(Node->getOperand(0));
+ ConstantSDNode *BC = cast<ConstantSDNode>(Node->getOperand(2));
+ if (!FI->getZExtValue() && !BC->getZExtValue())
+ break;
+ SDValue VDstIn = Node->getOperand(6);
+ if (VDstIn.isMachineOpcode()
+ && VDstIn.getMachineOpcode() == AMDGPU::IMPLICIT_DEF)
+ break;
+ MachineSDNode *ImpDef = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF,
+ SDLoc(Node), MVT::i32);
+ SmallVector<SDValue, 8> Ops = { SDValue(FI, 0), Node->getOperand(1),
+ SDValue(BC, 0), Node->getOperand(3),
+ Node->getOperand(4), Node->getOperand(5),
+ SDValue(ImpDef, 0), Node->getOperand(7) };
+ return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
+ }
default:
break;
}
@@ -9256,6 +10204,36 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
if (TII->isVOP3(MI.getOpcode())) {
// Make sure constant bus requirements are respected.
TII->legalizeOperandsVOP3(MRI, MI);
+
+ // Prefer VGPRs over AGPRs in mAI instructions where possible.
+ // This saves a chain-copy of registers and better ballance register
+ // use between vgpr and agpr as agpr tuples tend to be big.
+ if (const MCOperandInfo *OpInfo = MI.getDesc().OpInfo) {
+ unsigned Opc = MI.getOpcode();
+ const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
+ for (auto I : { AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0),
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1) }) {
+ if (I == -1)
+ break;
+ MachineOperand &Op = MI.getOperand(I);
+ if ((OpInfo[I].RegClass != llvm::AMDGPU::AV_64RegClassID &&
+ OpInfo[I].RegClass != llvm::AMDGPU::AV_32RegClassID) ||
+ !TargetRegisterInfo::isVirtualRegister(Op.getReg()) ||
+ !TRI->isAGPR(MRI, Op.getReg()))
+ continue;
+ auto *Src = MRI.getUniqueVRegDef(Op.getReg());
+ if (!Src || !Src->isCopy() ||
+ !TRI->isSGPRReg(MRI, Src->getOperand(1).getReg()))
+ continue;
+ auto *RC = TRI->getRegClassForReg(MRI, Op.getReg());
+ auto *NewRC = TRI->getEquivalentVGPRClass(RC);
+ // All uses of agpr64 and agpr32 can also accept vgpr except for
+ // v_accvgpr_read, but we do not produce agpr reads during selection,
+ // so no use checks are needed.
+ MRI.setRegClass(Op.getReg(), NewRC);
+ }
+ }
+
return;
}
@@ -9391,9 +10369,15 @@ SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
case 64:
RC = &AMDGPU::SGPR_64RegClass;
break;
+ case 96:
+ RC = &AMDGPU::SReg_96RegClass;
+ break;
case 128:
RC = &AMDGPU::SReg_128RegClass;
break;
+ case 160:
+ RC = &AMDGPU::SReg_160RegClass;
+ break;
case 256:
RC = &AMDGPU::SReg_256RegClass;
break;
@@ -9419,6 +10403,9 @@ SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
case 128:
RC = &AMDGPU::VReg_128RegClass;
break;
+ case 160:
+ RC = &AMDGPU::VReg_160RegClass;
+ break;
case 256:
RC = &AMDGPU::VReg_256RegClass;
break;
@@ -9427,6 +10414,29 @@ SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
break;
}
break;
+ case 'a':
+ switch (VT.getSizeInBits()) {
+ default:
+ return std::make_pair(0U, nullptr);
+ case 32:
+ case 16:
+ RC = &AMDGPU::AGPR_32RegClass;
+ break;
+ case 64:
+ RC = &AMDGPU::AReg_64RegClass;
+ break;
+ case 128:
+ RC = &AMDGPU::AReg_128RegClass;
+ break;
+ case 512:
+ RC = &AMDGPU::AReg_512RegClass;
+ break;
+ case 1024:
+ RC = &AMDGPU::AReg_1024RegClass;
+ // v32 types are not legal but we support them here.
+ return std::make_pair(0U, RC);
+ }
+ break;
}
// We actually support i128, i16 and f16 as inline parameters
// even if they are not reported as legal
@@ -9440,6 +10450,8 @@ SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
RC = &AMDGPU::VGPR_32RegClass;
} else if (Constraint[1] == 's') {
RC = &AMDGPU::SGPR_32RegClass;
+ } else if (Constraint[1] == 'a') {
+ RC = &AMDGPU::AGPR_32RegClass;
}
if (RC) {
@@ -9459,6 +10471,7 @@ SITargetLowering::getConstraintType(StringRef Constraint) const {
default: break;
case 's':
case 'v':
+ case 'a':
return C_RegisterClass;
}
}
@@ -9471,7 +10484,7 @@ SITargetLowering::getConstraintType(StringRef Constraint) const {
void SITargetLowering::finalizeLowering(MachineFunction &MF) const {
MachineRegisterInfo &MRI = MF.getRegInfo();
SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
- const MachineFrameInfo &MFI = MF.getFrameInfo();
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
if (Info->isEntryFunction()) {
@@ -9479,31 +10492,45 @@ void SITargetLowering::finalizeLowering(MachineFunction &MF) const {
reservePrivateMemoryRegs(getTargetMachine(), MF, *TRI, *Info);
}
- // We have to assume the SP is needed in case there are calls in the function
- // during lowering. Calls are only detected after the function is
- // lowered. We're about to reserve registers, so don't bother using it if we
- // aren't really going to use it.
- bool NeedSP = !Info->isEntryFunction() ||
- MFI.hasVarSizedObjects() ||
- MFI.hasCalls();
+ assert(!TRI->isSubRegister(Info->getScratchRSrcReg(),
+ Info->getStackPtrOffsetReg()));
+ if (Info->getStackPtrOffsetReg() != AMDGPU::SP_REG)
+ MRI.replaceRegWith(AMDGPU::SP_REG, Info->getStackPtrOffsetReg());
- if (NeedSP) {
- unsigned ReservedStackPtrOffsetReg = TRI->reservedStackPtrOffsetReg(MF);
- Info->setStackPtrOffsetReg(ReservedStackPtrOffsetReg);
+ // We need to worry about replacing the default register with itself in case
+ // of MIR testcases missing the MFI.
+ if (Info->getScratchRSrcReg() != AMDGPU::PRIVATE_RSRC_REG)
+ MRI.replaceRegWith(AMDGPU::PRIVATE_RSRC_REG, Info->getScratchRSrcReg());
- assert(Info->getStackPtrOffsetReg() != Info->getFrameOffsetReg());
- assert(!TRI->isSubRegister(Info->getScratchRSrcReg(),
- Info->getStackPtrOffsetReg()));
- MRI.replaceRegWith(AMDGPU::SP_REG, Info->getStackPtrOffsetReg());
- }
+ if (Info->getFrameOffsetReg() != AMDGPU::FP_REG)
+ MRI.replaceRegWith(AMDGPU::FP_REG, Info->getFrameOffsetReg());
- MRI.replaceRegWith(AMDGPU::PRIVATE_RSRC_REG, Info->getScratchRSrcReg());
- MRI.replaceRegWith(AMDGPU::FP_REG, Info->getFrameOffsetReg());
- MRI.replaceRegWith(AMDGPU::SCRATCH_WAVE_OFFSET_REG,
- Info->getScratchWaveOffsetReg());
+ if (Info->getScratchWaveOffsetReg() != AMDGPU::SCRATCH_WAVE_OFFSET_REG) {
+ MRI.replaceRegWith(AMDGPU::SCRATCH_WAVE_OFFSET_REG,
+ Info->getScratchWaveOffsetReg());
+ }
Info->limitOccupancy(MF);
+ if (ST.isWave32() && !MF.empty()) {
+ // Add VCC_HI def because many instructions marked as imp-use VCC where
+ // we may only define VCC_LO. If nothing defines VCC_HI we may end up
+ // having a use of undef.
+
+ const SIInstrInfo *TII = ST.getInstrInfo();
+ DebugLoc DL;
+
+ MachineBasicBlock &MBB = MF.front();
+ MachineBasicBlock::iterator I = MBB.getFirstNonDebugInstr();
+ BuildMI(MBB, I, DL, TII->get(TargetOpcode::IMPLICIT_DEF), AMDGPU::VCC_HI);
+
+ for (auto &MBB : MF) {
+ for (auto &MI : MBB) {
+ TII->fixImplicitOperands(MI);
+ }
+ }
+ }
+
TargetLoweringBase::finalizeLowering(MF);
}
@@ -9515,14 +10542,81 @@ void SITargetLowering::computeKnownBitsForFrameIndex(const SDValue Op,
TargetLowering::computeKnownBitsForFrameIndex(Op, Known, DemandedElts,
DAG, Depth);
- if (getSubtarget()->enableHugePrivateBuffer())
- return;
-
- // Technically it may be possible to have a dispatch with a single workitem
- // that uses the full private memory size, but that's not really useful. We
- // can't use vaddr in MUBUF instructions if we don't know the address
+ // Set the high bits to zero based on the maximum allowed scratch size per
+ // wave. We can't use vaddr in MUBUF instructions if we don't know the address
// calculation won't overflow, so assume the sign bit is never set.
- Known.Zero.setHighBits(AssumeFrameIndexHighZeroBits);
+ Known.Zero.setHighBits(getSubtarget()->getKnownHighZeroBitsForFrameIndex());
+}
+
+unsigned SITargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
+ const unsigned PrefAlign = TargetLowering::getPrefLoopAlignment(ML);
+ const unsigned CacheLineAlign = 6; // log2(64)
+
+ // Pre-GFX10 target did not benefit from loop alignment
+ if (!ML || DisableLoopAlignment ||
+ (getSubtarget()->getGeneration() < AMDGPUSubtarget::GFX10) ||
+ getSubtarget()->hasInstFwdPrefetchBug())
+ return PrefAlign;
+
+ // On GFX10 I$ is 4 x 64 bytes cache lines.
+ // By default prefetcher keeps one cache line behind and reads two ahead.
+ // We can modify it with S_INST_PREFETCH for larger loops to have two lines
+ // behind and one ahead.
+ // Therefor we can benefit from aligning loop headers if loop fits 192 bytes.
+ // If loop fits 64 bytes it always spans no more than two cache lines and
+ // does not need an alignment.
+ // Else if loop is less or equal 128 bytes we do not need to modify prefetch,
+ // Else if loop is less or equal 192 bytes we need two lines behind.
+
+ const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
+ const MachineBasicBlock *Header = ML->getHeader();
+ if (Header->getAlignment() != PrefAlign)
+ return Header->getAlignment(); // Already processed.
+
+ unsigned LoopSize = 0;
+ for (const MachineBasicBlock *MBB : ML->blocks()) {
+ // If inner loop block is aligned assume in average half of the alignment
+ // size to be added as nops.
+ if (MBB != Header)
+ LoopSize += (1 << MBB->getAlignment()) / 2;
+
+ for (const MachineInstr &MI : *MBB) {
+ LoopSize += TII->getInstSizeInBytes(MI);
+ if (LoopSize > 192)
+ return PrefAlign;
+ }
+ }
+
+ if (LoopSize <= 64)
+ return PrefAlign;
+
+ if (LoopSize <= 128)
+ return CacheLineAlign;
+
+ // If any of parent loops is surrounded by prefetch instructions do not
+ // insert new for inner loop, which would reset parent's settings.
+ for (MachineLoop *P = ML->getParentLoop(); P; P = P->getParentLoop()) {
+ if (MachineBasicBlock *Exit = P->getExitBlock()) {
+ auto I = Exit->getFirstNonDebugInstr();
+ if (I != Exit->end() && I->getOpcode() == AMDGPU::S_INST_PREFETCH)
+ return CacheLineAlign;
+ }
+ }
+
+ MachineBasicBlock *Pre = ML->getLoopPreheader();
+ MachineBasicBlock *Exit = ML->getExitBlock();
+
+ if (Pre && Exit) {
+ BuildMI(*Pre, Pre->getFirstTerminator(), DebugLoc(),
+ TII->get(AMDGPU::S_INST_PREFETCH))
+ .addImm(1); // prefetch 2 lines behind PC
+
+ BuildMI(*Exit, Exit->getFirstNonDebugInstr(), DebugLoc(),
+ TII->get(AMDGPU::S_INST_PREFETCH))
+ .addImm(2); // prefetch 1 line behind PC
+ }
+
+ return CacheLineAlign;
}
LLVM_ATTRIBUTE_UNUSED
@@ -9531,7 +10625,8 @@ static bool isCopyFromRegOfInlineAsm(const SDNode *N) {
do {
// Follow the chain until we find an INLINEASM node.
N = N->getOperand(0).getNode();
- if (N->getOpcode() == ISD::INLINEASM)
+ if (N->getOpcode() == ISD::INLINEASM ||
+ N->getOpcode() == ISD::INLINEASM_BR)
return true;
} while (N->getOpcode() == ISD::CopyFromReg);
return false;
@@ -9616,7 +10711,10 @@ bool SITargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
bool SNaN,
unsigned Depth) const {
if (Op.getOpcode() == AMDGPUISD::CLAMP) {
- if (Subtarget->enableDX10Clamp())
+ const MachineFunction &MF = DAG.getMachineFunction();
+ const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+
+ if (Info->getMode().DX10Clamp)
return true; // Clamped to 0.
return DAG.isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
}
@@ -9624,3 +10722,29 @@ bool SITargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
return AMDGPUTargetLowering::isKnownNeverNaNForTargetNode(Op, DAG,
SNaN, Depth);
}
+
+TargetLowering::AtomicExpansionKind
+SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
+ switch (RMW->getOperation()) {
+ case AtomicRMWInst::FAdd: {
+ Type *Ty = RMW->getType();
+
+ // We don't have a way to support 16-bit atomics now, so just leave them
+ // as-is.
+ if (Ty->isHalfTy())
+ return AtomicExpansionKind::None;
+
+ if (!Ty->isFloatTy())
+ return AtomicExpansionKind::CmpXChg;
+
+ // TODO: Do have these for flat. Older targets also had them for buffers.
+ unsigned AS = RMW->getPointerAddressSpace();
+ return (AS == AMDGPUAS::LOCAL_ADDRESS && Subtarget->hasLDSFPAtomics()) ?
+ AtomicExpansionKind::None : AtomicExpansionKind::CmpXChg;
+ }
+ default:
+ break;
+ }
+
+ return AMDGPUTargetLowering::shouldExpandAtomicRMWInIR(RMW);
+}
diff --git a/lib/Target/AMDGPU/SIISelLowering.h b/lib/Target/AMDGPU/SIISelLowering.h
index bcef519ee663..21a215e16ce7 100644
--- a/lib/Target/AMDGPU/SIISelLowering.h
+++ b/lib/Target/AMDGPU/SIISelLowering.h
@@ -1,9 +1,8 @@
//===-- SIISelLowering.h - SI DAG Lowering Interface ------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -61,7 +60,7 @@ private:
SDValue lowerImage(SDValue Op, const AMDGPU::ImageDimIntrinsicInfo *Intr,
SelectionDAG &DAG) const;
SDValue lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc, SDValue Offset,
- SDValue GLC, SelectionDAG &DAG) const;
+ SDValue GLC, SDValue DLC, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
@@ -90,11 +89,17 @@ private:
SDValue LowerTrig(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
-
+ SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue adjustLoadValueType(unsigned Opcode, MemSDNode *M,
SelectionDAG &DAG, ArrayRef<SDValue> Ops,
bool IsIntrinsic = false) const;
+ // Call DAG.getMemIntrinsicNode for a load, but first widen a dwordx3 type to
+ // dwordx4 if on SI.
+ SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
+ ArrayRef<SDValue> Ops, EVT MemVT,
+ MachineMemOperand *MMO, SelectionDAG &DAG) const;
+
SDValue handleD16VData(SDValue VData, SelectionDAG &DAG) const;
/// Converts \p Op, which must be of floating point type, to the
@@ -116,8 +121,10 @@ private:
SelectionDAG &DAG) const;
SDValue lowerADDRSPACECAST(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerTRAP(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerDEBUGTRAP(SDValue Op, SelectionDAG &DAG) const;
@@ -141,6 +148,7 @@ private:
SDValue performOrCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performXorCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performZeroExtendCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue performSignExtendInRegCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performClassCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue getCanonicalConstantFP(SelectionDAG &DAG, const SDLoc &SL, EVT VT,
const APFloat &C) const;
@@ -156,6 +164,7 @@ private:
SDValue performExtractVectorEltCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performInsertVectorEltCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue reassociateScalarOps(SDNode *N, SelectionDAG &DAG) const;
unsigned getFusedOpcode(const SelectionDAG &DAG,
const SDNode *N0, const SDNode *N1) const;
SDValue performAddCombine(SDNode *N, DAGCombinerInfo &DCI) const;
@@ -174,8 +183,6 @@ private:
unsigned isCFIntrinsic(const SDNode *Intr) const;
- void createDebuggerPrologueStackObjects(MachineFunction &MF) const;
-
/// \returns True if fixup needs to be emitted for given global value \p GV,
/// false otherwise.
bool shouldEmitFixup(const GlobalValue *GV) const;
@@ -194,6 +201,15 @@ private:
void setBufferOffsets(SDValue CombinedOffset, SelectionDAG &DAG,
SDValue *Offsets, unsigned Align = 4) const;
+ // Handle 8 bit and 16 bit buffer loads
+ SDValue handleByteShortBufferLoads(SelectionDAG &DAG, EVT LoadVT, SDLoc DL,
+ ArrayRef<SDValue> Ops, MemSDNode *M) const;
+
+ // Handle 8 bit and 16 bit buffer stores
+ SDValue handleByteShortBufferStores(SelectionDAG &DAG, EVT VDataType,
+ SDLoc DL, SDValue Ops[],
+ MemSDNode *M) const;
+
public:
SITargetLowering(const TargetMachine &tm, const GCNSubtarget &STI);
@@ -219,20 +235,21 @@ public:
bool canMergeStoresTo(unsigned AS, EVT MemVT,
const SelectionDAG &DAG) const override;
- bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS,
- unsigned Align,
- bool *IsFast) const override;
+ bool allowsMisalignedMemoryAccesses(
+ EVT VT, unsigned AS, unsigned Align,
+ MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
+ bool *IsFast = nullptr) const override;
EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
unsigned SrcAlign, bool IsMemset,
bool ZeroMemset,
bool MemcpyStrSrc,
- MachineFunction &MF) const override;
+ const AttributeList &FuncAttributes) const override;
bool isMemOpUniform(const SDNode *N) const;
bool isMemOpHasNoClobberedMemOperand(const SDNode *N) const;
bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
- bool isCheapAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
+ bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
TargetLoweringBase::LegalizeTypeAction
getPreferredVectorAction(MVT VT) const override;
@@ -298,6 +315,9 @@ public:
MachineBasicBlock *splitKillBlock(MachineInstr &MI,
MachineBasicBlock *BB) const;
+ MachineBasicBlock *emitGWSMemViolTestLoop(MachineInstr &MI,
+ MachineBasicBlock *BB) const;
+
MachineBasicBlock *
EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *BB) const override;
@@ -352,6 +372,9 @@ public:
const SelectionDAG &DAG,
bool SNaN = false,
unsigned Depth = 0) const override;
+ AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override;
+
+ unsigned getPrefLoopAlignment(MachineLoop *ML) const override;
};
} // End namespace llvm
diff --git a/lib/Target/AMDGPU/SIInsertSkips.cpp b/lib/Target/AMDGPU/SIInsertSkips.cpp
index ba21a5ce1293..87e63fcc4a04 100644
--- a/lib/Target/AMDGPU/SIInsertSkips.cpp
+++ b/lib/Target/AMDGPU/SIInsertSkips.cpp
@@ -1,9 +1,8 @@
//===-- SIInsertSkips.cpp - Use predicates for control flow ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -93,15 +92,13 @@ INITIALIZE_PASS(SIInsertSkips, DEBUG_TYPE,
char &llvm::SIInsertSkipsPassID = SIInsertSkips::ID;
-static bool opcodeEmitsNoInsts(unsigned Opc) {
- switch (Opc) {
- case TargetOpcode::IMPLICIT_DEF:
- case TargetOpcode::KILL:
- case TargetOpcode::BUNDLE:
- case TargetOpcode::CFI_INSTRUCTION:
- case TargetOpcode::EH_LABEL:
- case TargetOpcode::GC_LABEL:
- case TargetOpcode::DBG_VALUE:
+static bool opcodeEmitsNoInsts(const MachineInstr &MI) {
+ if (MI.isMetaInstruction())
+ return true;
+
+ // Handle target specific opcodes.
+ switch (MI.getOpcode()) {
+ case AMDGPU::SI_MASK_BRANCH:
return true;
default:
return false;
@@ -110,9 +107,6 @@ static bool opcodeEmitsNoInsts(unsigned Opc) {
bool SIInsertSkips::shouldSkip(const MachineBasicBlock &From,
const MachineBasicBlock &To) const {
- if (From.succ_empty())
- return false;
-
unsigned NumInstr = 0;
const MachineFunction *MF = From.getParent();
@@ -122,7 +116,7 @@ bool SIInsertSkips::shouldSkip(const MachineBasicBlock &From,
for (MachineBasicBlock::const_iterator I = MBB.begin(), E = MBB.end();
NumInstr < SkipThreshold && I != E; ++I) {
- if (opcodeEmitsNoInsts(I->getOpcode()))
+ if (opcodeEmitsNoInsts(*I))
continue;
// FIXME: Since this is required for correctness, this should be inserted
@@ -138,6 +132,11 @@ bool SIInsertSkips::shouldSkip(const MachineBasicBlock &From,
if (TII->hasUnwantedEffectsWhenEXECEmpty(*I))
return true;
+ // These instructions are potentially expensive even if EXEC = 0.
+ if (TII->isSMRD(*I) || TII->isVMEM(*I) || TII->isFLAT(*I) ||
+ I->getOpcode() == AMDGPU::S_WAITCNT)
+ return true;
+
++NumInstr;
if (NumInstr >= SkipThreshold)
return true;
@@ -177,7 +176,7 @@ bool SIInsertSkips::skipIfDead(MachineInstr &MI, MachineBasicBlock &NextBB) {
.addImm(0); // en
// ... and terminate wavefront.
- BuildMI(*SkipBB, Insert, DL, TII->get(AMDGPU::S_ENDPGM));
+ BuildMI(*SkipBB, Insert, DL, TII->get(AMDGPU::S_ENDPGM)).addImm(0);
return true;
}
@@ -245,6 +244,10 @@ void SIInsertSkips::kill(MachineInstr &MI) {
llvm_unreachable("invalid ISD:SET cond code");
}
+ const GCNSubtarget &ST = MBB.getParent()->getSubtarget<GCNSubtarget>();
+ if (ST.hasNoSdstCMPX())
+ Opcode = AMDGPU::getVCMPXNoSDstOp(Opcode);
+
assert(MI.getOperand(0).isReg());
if (TRI->isVGPR(MBB.getParent()->getRegInfo(),
@@ -254,17 +257,23 @@ void SIInsertSkips::kill(MachineInstr &MI) {
.add(MI.getOperand(1))
.add(MI.getOperand(0));
} else {
- BuildMI(MBB, &MI, DL, TII->get(Opcode))
- .addReg(AMDGPU::VCC, RegState::Define)
- .addImm(0) // src0 modifiers
- .add(MI.getOperand(1))
- .addImm(0) // src1 modifiers
- .add(MI.getOperand(0))
- .addImm(0); // omod
+ auto I = BuildMI(MBB, &MI, DL, TII->get(Opcode));
+ if (!ST.hasNoSdstCMPX())
+ I.addReg(AMDGPU::VCC, RegState::Define);
+
+ I.addImm(0) // src0 modifiers
+ .add(MI.getOperand(1))
+ .addImm(0) // src1 modifiers
+ .add(MI.getOperand(0));
+
+ I.addImm(0); // omod
}
break;
}
case AMDGPU::SI_KILL_I1_TERMINATOR: {
+ const MachineFunction *MF = MI.getParent()->getParent();
+ const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
+ unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
const MachineOperand &Op = MI.getOperand(0);
int64_t KillVal = MI.getOperand(1).getImm();
assert(KillVal == 0 || KillVal == -1);
@@ -275,14 +284,17 @@ void SIInsertSkips::kill(MachineInstr &MI) {
assert(Imm == 0 || Imm == -1);
if (Imm == KillVal)
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
+ BuildMI(MBB, &MI, DL, TII->get(ST.isWave32() ? AMDGPU::S_MOV_B32
+ : AMDGPU::S_MOV_B64), Exec)
.addImm(0);
break;
}
unsigned Opcode = KillVal ? AMDGPU::S_ANDN2_B64 : AMDGPU::S_AND_B64;
- BuildMI(MBB, &MI, DL, TII->get(Opcode), AMDGPU::EXEC)
- .addReg(AMDGPU::EXEC)
+ if (ST.isWave32())
+ Opcode = KillVal ? AMDGPU::S_ANDN2_B32 : AMDGPU::S_AND_B32;
+ BuildMI(MBB, &MI, DL, TII->get(Opcode), Exec)
+ .addReg(Exec)
.add(Op);
break;
}
@@ -331,9 +343,11 @@ bool SIInsertSkips::optimizeVccBranch(MachineInstr &MI) const {
// S_CBRANCH_EXEC[N]Z
bool Changed = false;
MachineBasicBlock &MBB = *MI.getParent();
- const unsigned CondReg = AMDGPU::VCC;
- const unsigned ExecReg = AMDGPU::EXEC;
- const unsigned And = AMDGPU::S_AND_B64;
+ const GCNSubtarget &ST = MBB.getParent()->getSubtarget<GCNSubtarget>();
+ const bool IsWave32 = ST.isWave32();
+ const unsigned CondReg = TRI->getVCC();
+ const unsigned ExecReg = IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
+ const unsigned And = IsWave32 ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
MachineBasicBlock::reverse_iterator A = MI.getReverseIterator(),
E = MBB.rend();
diff --git a/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index afc0b4467610..c89d5b71ec5c 100644
--- a/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -1,9 +1,8 @@
//===- SIInsertWaitcnts.cpp - Insert Wait Instructions --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -69,10 +68,10 @@ DEBUG_COUNTER(ForceLgkmCounter, DEBUG_TYPE"-forcelgkm",
DEBUG_COUNTER(ForceVMCounter, DEBUG_TYPE"-forcevm",
"Force emit s_waitcnt vmcnt(0) instrs");
-static cl::opt<unsigned> ForceEmitZeroFlag(
+static cl::opt<bool> ForceEmitZeroFlag(
"amdgpu-waitcnt-forcezero",
cl::desc("Force all waitcnt instrs to be emitted as s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)"),
- cl::init(0), cl::Hidden);
+ cl::init(false), cl::Hidden);
namespace {
@@ -101,7 +100,7 @@ public:
#define CNT_MASK(t) (1u << (t))
-enum InstCounterType { VM_CNT = 0, LGKM_CNT, EXP_CNT, NUM_INST_CNTS };
+enum InstCounterType { VM_CNT = 0, LGKM_CNT, EXP_CNT, VS_CNT, NUM_INST_CNTS };
iterator_range<enum_iterator<InstCounterType>> inst_counter_types() {
return make_range(enum_iterator<InstCounterType>(VM_CNT),
@@ -114,6 +113,7 @@ struct {
uint32_t VmcntMax;
uint32_t ExpcntMax;
uint32_t LgkmcntMax;
+ uint32_t VscntMax;
int32_t NumVGPRsMax;
int32_t NumSGPRsMax;
} HardwareLimits;
@@ -127,6 +127,8 @@ struct {
enum WaitEventType {
VMEM_ACCESS, // vector-memory read & write
+ VMEM_READ_ACCESS, // vector-memory read
+ VMEM_WRITE_ACCESS,// vector-memory write
LDS_ACCESS, // lds read & write
GDS_ACCESS, // gds read & write
SQ_MESSAGE, // send message
@@ -140,11 +142,12 @@ enum WaitEventType {
};
static const uint32_t WaitEventMaskForInst[NUM_INST_CNTS] = {
- (1 << VMEM_ACCESS),
+ (1 << VMEM_ACCESS) | (1 << VMEM_READ_ACCESS),
(1 << SMEM_ACCESS) | (1 << LDS_ACCESS) | (1 << GDS_ACCESS) |
(1 << SQ_MESSAGE),
(1 << EXP_GPR_LOCK) | (1 << GDS_GPR_LOCK) | (1 << VMW_GPR_LOCK) |
(1 << EXP_PARAM_ACCESS) | (1 << EXP_POS_ACCESS),
+ (1 << VMEM_WRITE_ACCESS)
};
// The mapping is:
@@ -172,6 +175,9 @@ void addWait(AMDGPU::Waitcnt &Wait, InstCounterType T, unsigned Count) {
case LGKM_CNT:
Wait.LgkmCnt = std::min(Wait.LgkmCnt, Count);
break;
+ case VS_CNT:
+ Wait.VsCnt = std::min(Wait.VsCnt, Count);
+ break;
default:
llvm_unreachable("bad InstCounterType");
}
@@ -200,6 +206,8 @@ public:
return HardwareLimits.LgkmcntMax;
case EXP_CNT:
return HardwareLimits.ExpcntMax;
+ case VS_CNT:
+ return HardwareLimits.VscntMax;
default:
break;
}
@@ -222,10 +230,12 @@ public:
// Mapping from event to counter.
InstCounterType eventCounter(WaitEventType E) {
- if (E == VMEM_ACCESS)
+ if (WaitEventMaskForInst[VM_CNT] & (1 << E))
return VM_CNT;
if (WaitEventMaskForInst[LGKM_CNT] & (1 << E))
return LGKM_CNT;
+ if (WaitEventMaskForInst[VS_CNT] & (1 << E))
+ return VS_CNT;
assert(WaitEventMaskForInst[EXP_CNT] & (1 << E));
return EXP_CNT;
}
@@ -453,7 +463,7 @@ RegInterval WaitcntBrackets::getRegInterval(const MachineInstr *MI,
unsigned OpNo, bool Def) const {
const MachineOperand &Op = MI->getOperand(OpNo);
if (!Op.isReg() || !TRI->isInAllocatableClass(Op.getReg()) ||
- (Def && !Op.isDef()))
+ (Def && !Op.isDef()) || TRI->isAGPR(*MRI, Op.getReg()))
return {-1, -1};
// A use via a PW operand does not need a waitcnt.
@@ -526,20 +536,22 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
// Put score on the source vgprs. If this is a store, just use those
// specific register(s).
if (TII->isDS(Inst) && (Inst.mayStore() || Inst.mayLoad())) {
+ int AddrOpIdx =
+ AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::addr);
// All GDS operations must protect their address register (same as
// export.)
- if (Inst.getOpcode() != AMDGPU::DS_APPEND &&
- Inst.getOpcode() != AMDGPU::DS_CONSUME) {
- setExpScore(
- &Inst, TII, TRI, MRI,
- AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::addr),
- CurrScore);
+ if (AddrOpIdx != -1) {
+ setExpScore(&Inst, TII, TRI, MRI, AddrOpIdx, CurrScore);
}
+
if (Inst.mayStore()) {
- setExpScore(
- &Inst, TII, TRI, MRI,
- AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0),
- CurrScore);
+ if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
+ AMDGPU::OpName::data0) != -1) {
+ setExpScore(
+ &Inst, TII, TRI, MRI,
+ AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0),
+ CurrScore);
+ }
if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
AMDGPU::OpName::data1) != -1) {
setExpScore(&Inst, TII, TRI, MRI,
@@ -663,6 +675,9 @@ void WaitcntBrackets::print(raw_ostream &OS) {
case EXP_CNT:
OS << " EXP_CNT(" << UB - LB << "): ";
break;
+ case VS_CNT:
+ OS << " VS_CNT(" << UB - LB << "): ";
+ break;
default:
OS << " UNKNOWN(" << UB - LB << "): ";
break;
@@ -702,7 +717,8 @@ void WaitcntBrackets::print(raw_ostream &OS) {
bool WaitcntBrackets::simplifyWaitcnt(AMDGPU::Waitcnt &Wait) const {
return simplifyWaitcnt(VM_CNT, Wait.VmCnt) |
simplifyWaitcnt(EXP_CNT, Wait.ExpCnt) |
- simplifyWaitcnt(LGKM_CNT, Wait.LgkmCnt);
+ simplifyWaitcnt(LGKM_CNT, Wait.LgkmCnt) |
+ simplifyWaitcnt(VS_CNT, Wait.VsCnt);
}
bool WaitcntBrackets::simplifyWaitcnt(InstCounterType T,
@@ -745,6 +761,7 @@ void WaitcntBrackets::applyWaitcnt(const AMDGPU::Waitcnt &Wait) {
applyWaitcnt(VM_CNT, Wait.VmCnt);
applyWaitcnt(EXP_CNT, Wait.ExpCnt);
applyWaitcnt(LGKM_CNT, Wait.LgkmCnt);
+ applyWaitcnt(VS_CNT, Wait.VsCnt);
}
void WaitcntBrackets::applyWaitcnt(InstCounterType T, unsigned Count) {
@@ -790,6 +807,21 @@ static bool readsVCCZ(const MachineInstr &MI) {
!MI.getOperand(1).isUndef();
}
+/// \returns true if the callee inserts an s_waitcnt 0 on function entry.
+static bool callWaitsOnFunctionEntry(const MachineInstr &MI) {
+ // Currently all conventions wait, but this may not always be the case.
+ //
+ // TODO: If IPRA is enabled, and the callee is isSafeForNoCSROpt, it may make
+ // senses to omit the wait and do it in the caller.
+ return true;
+}
+
+/// \returns true if the callee is expected to wait for any outstanding waits
+/// before returning.
+static bool callWaitsOnFunctionReturn(const MachineInstr &MI) {
+ return true;
+}
+
/// Generate s_waitcnt instruction to be placed before cur_Inst.
/// Instructions of a given type are returned in order,
/// but instructions of different types can complete out of order.
@@ -815,7 +847,9 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(
// TODO: Handle other cases of NeedsWaitcntVmBefore()
if (MI.getOpcode() == AMDGPU::BUFFER_WBINVL1 ||
MI.getOpcode() == AMDGPU::BUFFER_WBINVL1_SC ||
- MI.getOpcode() == AMDGPU::BUFFER_WBINVL1_VOL) {
+ MI.getOpcode() == AMDGPU::BUFFER_WBINVL1_VOL ||
+ MI.getOpcode() == AMDGPU::BUFFER_GL0_INV ||
+ MI.getOpcode() == AMDGPU::BUFFER_GL1_INV) {
Wait.VmCnt = 0;
}
@@ -823,8 +857,9 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(
// NOTE: this could be improved with knowledge of all call sites or
// with knowledge of the called routines.
if (MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG ||
- MI.getOpcode() == AMDGPU::S_SETPC_B64_return) {
- Wait = AMDGPU::Waitcnt::allZero();
+ MI.getOpcode() == AMDGPU::S_SETPC_B64_return ||
+ (MI.isReturn() && MI.isCall() && !callWaitsOnFunctionEntry(MI))) {
+ Wait = Wait.combined(AMDGPU::Waitcnt::allZero(IV));
}
// Resolve vm waits before gs-done.
else if ((MI.getOpcode() == AMDGPU::S_SENDMSG ||
@@ -903,91 +938,91 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(
}
}
-#if 0 // TODO: the following code to handle CALL.
- // The argument passing for CALLs should suffice for VM_CNT and LGKM_CNT.
- // However, there is a problem with EXP_CNT, because the call cannot
- // easily tell if a register is used in the function, and if it did, then
- // the referring instruction would have to have an S_WAITCNT, which is
- // dependent on all call sites. So Instead, force S_WAITCNT for EXP_CNTs
- // before the call.
- if (MI.getOpcode() == SC_CALL) {
- if (ScoreBrackets->getScoreUB(EXP_CNT) >
- ScoreBrackets->getScoreLB(EXP_CNT)) {
- ScoreBrackets->setScoreLB(EXP_CNT, ScoreBrackets->getScoreUB(EXP_CNT));
- EmitWaitcnt |= CNT_MASK(EXP_CNT);
- }
- }
-#endif
-
- // FIXME: Should not be relying on memoperands.
- // Look at the source operands of every instruction to see if
- // any of them results from a previous memory operation that affects
- // its current usage. If so, an s_waitcnt instruction needs to be
- // emitted.
- // If the source operand was defined by a load, add the s_waitcnt
- // instruction.
- for (const MachineMemOperand *Memop : MI.memoperands()) {
- unsigned AS = Memop->getAddrSpace();
- if (AS != AMDGPUAS::LOCAL_ADDRESS)
- continue;
- unsigned RegNo = SQ_MAX_PGM_VGPRS + EXTRA_VGPR_LDS;
- // VM_CNT is only relevant to vgpr or LDS.
- ScoreBrackets.determineWait(
- VM_CNT, ScoreBrackets.getRegScore(RegNo, VM_CNT), Wait);
- }
+ if (MI.isCall() && callWaitsOnFunctionEntry(MI)) {
+ // Don't bother waiting on anything except the call address. The function
+ // is going to insert a wait on everything in its prolog. This still needs
+ // to be careful if the call target is a load (e.g. a GOT load).
+ Wait = AMDGPU::Waitcnt();
- for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
- const MachineOperand &Op = MI.getOperand(I);
- const MachineRegisterInfo &MRIA = *MRI;
- RegInterval Interval =
- ScoreBrackets.getRegInterval(&MI, TII, MRI, TRI, I, false);
+ int CallAddrOpIdx =
+ AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
+ RegInterval Interval = ScoreBrackets.getRegInterval(&MI, TII, MRI, TRI,
+ CallAddrOpIdx, false);
for (signed RegNo = Interval.first; RegNo < Interval.second; ++RegNo) {
- if (TRI->isVGPR(MRIA, Op.getReg())) {
- // VM_CNT is only relevant to vgpr or LDS.
- ScoreBrackets.determineWait(
- VM_CNT, ScoreBrackets.getRegScore(RegNo, VM_CNT), Wait);
- }
ScoreBrackets.determineWait(
LGKM_CNT, ScoreBrackets.getRegScore(RegNo, LGKM_CNT), Wait);
}
- }
- // End of for loop that looks at all source operands to decide vm_wait_cnt
- // and lgk_wait_cnt.
-
- // Two cases are handled for destination operands:
- // 1) If the destination operand was defined by a load, add the s_waitcnt
- // instruction to guarantee the right WAW order.
- // 2) If a destination operand that was used by a recent export/store ins,
- // add s_waitcnt on exp_cnt to guarantee the WAR order.
- if (MI.mayStore()) {
+ } else {
// FIXME: Should not be relying on memoperands.
+ // Look at the source operands of every instruction to see if
+ // any of them results from a previous memory operation that affects
+ // its current usage. If so, an s_waitcnt instruction needs to be
+ // emitted.
+ // If the source operand was defined by a load, add the s_waitcnt
+ // instruction.
for (const MachineMemOperand *Memop : MI.memoperands()) {
unsigned AS = Memop->getAddrSpace();
if (AS != AMDGPUAS::LOCAL_ADDRESS)
continue;
unsigned RegNo = SQ_MAX_PGM_VGPRS + EXTRA_VGPR_LDS;
+ // VM_CNT is only relevant to vgpr or LDS.
ScoreBrackets.determineWait(
VM_CNT, ScoreBrackets.getRegScore(RegNo, VM_CNT), Wait);
- ScoreBrackets.determineWait(
- EXP_CNT, ScoreBrackets.getRegScore(RegNo, EXP_CNT), Wait);
}
- }
- for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
- MachineOperand &Def = MI.getOperand(I);
- const MachineRegisterInfo &MRIA = *MRI;
- RegInterval Interval =
- ScoreBrackets.getRegInterval(&MI, TII, MRI, TRI, I, true);
- for (signed RegNo = Interval.first; RegNo < Interval.second; ++RegNo) {
- if (TRI->isVGPR(MRIA, Def.getReg())) {
+
+ for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
+ const MachineOperand &Op = MI.getOperand(I);
+ const MachineRegisterInfo &MRIA = *MRI;
+ RegInterval Interval =
+ ScoreBrackets.getRegInterval(&MI, TII, MRI, TRI, I, false);
+ for (signed RegNo = Interval.first; RegNo < Interval.second; ++RegNo) {
+ if (TRI->isVGPR(MRIA, Op.getReg())) {
+ // VM_CNT is only relevant to vgpr or LDS.
+ ScoreBrackets.determineWait(
+ VM_CNT, ScoreBrackets.getRegScore(RegNo, VM_CNT), Wait);
+ }
+ ScoreBrackets.determineWait(
+ LGKM_CNT, ScoreBrackets.getRegScore(RegNo, LGKM_CNT), Wait);
+ }
+ }
+ // End of for loop that looks at all source operands to decide vm_wait_cnt
+ // and lgk_wait_cnt.
+
+ // Two cases are handled for destination operands:
+ // 1) If the destination operand was defined by a load, add the s_waitcnt
+ // instruction to guarantee the right WAW order.
+ // 2) If a destination operand that was used by a recent export/store ins,
+ // add s_waitcnt on exp_cnt to guarantee the WAR order.
+ if (MI.mayStore()) {
+ // FIXME: Should not be relying on memoperands.
+ for (const MachineMemOperand *Memop : MI.memoperands()) {
+ unsigned AS = Memop->getAddrSpace();
+ if (AS != AMDGPUAS::LOCAL_ADDRESS)
+ continue;
+ unsigned RegNo = SQ_MAX_PGM_VGPRS + EXTRA_VGPR_LDS;
ScoreBrackets.determineWait(
VM_CNT, ScoreBrackets.getRegScore(RegNo, VM_CNT), Wait);
ScoreBrackets.determineWait(
EXP_CNT, ScoreBrackets.getRegScore(RegNo, EXP_CNT), Wait);
}
- ScoreBrackets.determineWait(
- LGKM_CNT, ScoreBrackets.getRegScore(RegNo, LGKM_CNT), Wait);
}
- } // End of for loop that looks at all dest operands.
+ for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
+ MachineOperand &Def = MI.getOperand(I);
+ const MachineRegisterInfo &MRIA = *MRI;
+ RegInterval Interval =
+ ScoreBrackets.getRegInterval(&MI, TII, MRI, TRI, I, true);
+ for (signed RegNo = Interval.first; RegNo < Interval.second; ++RegNo) {
+ if (TRI->isVGPR(MRIA, Def.getReg())) {
+ ScoreBrackets.determineWait(
+ VM_CNT, ScoreBrackets.getRegScore(RegNo, VM_CNT), Wait);
+ ScoreBrackets.determineWait(
+ EXP_CNT, ScoreBrackets.getRegScore(RegNo, EXP_CNT), Wait);
+ }
+ ScoreBrackets.determineWait(
+ LGKM_CNT, ScoreBrackets.getRegScore(RegNo, LGKM_CNT), Wait);
+ }
+ } // End of for loop that looks at all dest operands.
+ }
}
// Check to see if this is an S_BARRIER, and if an implicit S_WAITCNT 0
@@ -996,13 +1031,13 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(
// requiring a WAITCNT beforehand.
if (MI.getOpcode() == AMDGPU::S_BARRIER &&
!ST->hasAutoWaitcntBeforeBarrier()) {
- Wait = AMDGPU::Waitcnt::allZero();
+ Wait = Wait.combined(AMDGPU::Waitcnt::allZero(IV));
}
// TODO: Remove this work-around, enable the assert for Bug 457939
// after fixing the scheduler. Also, the Shader Compiler code is
// independent of target.
- if (readsVCCZ(MI) && ST->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS) {
+ if (readsVCCZ(MI) && ST->hasReadVCCZBug()) {
if (ScoreBrackets.getScoreLB(LGKM_CNT) <
ScoreBrackets.getScoreUB(LGKM_CNT) &&
ScoreBrackets.hasPendingEvent(SMEM_ACCESS)) {
@@ -1014,21 +1049,31 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(
if (!ScoreBrackets.simplifyWaitcnt(Wait) && !IsForceEmitWaitcnt) {
bool Modified = false;
if (OldWaitcntInstr) {
- if (TrackedWaitcntSet.count(OldWaitcntInstr)) {
- TrackedWaitcntSet.erase(OldWaitcntInstr);
- OldWaitcntInstr->eraseFromParent();
- Modified = true;
- } else {
- int64_t Imm = OldWaitcntInstr->getOperand(0).getImm();
- ScoreBrackets.applyWaitcnt(AMDGPU::decodeWaitcnt(IV, Imm));
+ for (auto II = OldWaitcntInstr->getIterator(), NextI = std::next(II);
+ &*II != &MI; II = NextI, ++NextI) {
+ if (II->isDebugInstr())
+ continue;
+
+ if (TrackedWaitcntSet.count(&*II)) {
+ TrackedWaitcntSet.erase(&*II);
+ II->eraseFromParent();
+ Modified = true;
+ } else if (II->getOpcode() == AMDGPU::S_WAITCNT) {
+ int64_t Imm = II->getOperand(0).getImm();
+ ScoreBrackets.applyWaitcnt(AMDGPU::decodeWaitcnt(IV, Imm));
+ } else {
+ assert(II->getOpcode() == AMDGPU::S_WAITCNT_VSCNT);
+ assert(II->getOperand(0).getReg() == AMDGPU::SGPR_NULL);
+ ScoreBrackets.applyWaitcnt(
+ AMDGPU::Waitcnt(0, 0, 0, II->getOperand(1).getImm()));
+ }
}
- Modified = true;
}
return Modified;
}
if (ForceEmitZeroWaitcnts)
- Wait = AMDGPU::Waitcnt::allZero();
+ Wait = AMDGPU::Waitcnt::allZero(IV);
if (ForceEmitWaitcnt[VM_CNT])
Wait.VmCnt = 0;
@@ -1036,39 +1081,88 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(
Wait.ExpCnt = 0;
if (ForceEmitWaitcnt[LGKM_CNT])
Wait.LgkmCnt = 0;
+ if (ForceEmitWaitcnt[VS_CNT])
+ Wait.VsCnt = 0;
ScoreBrackets.applyWaitcnt(Wait);
AMDGPU::Waitcnt OldWait;
+ bool Modified = false;
+
if (OldWaitcntInstr) {
- OldWait =
- AMDGPU::decodeWaitcnt(IV, OldWaitcntInstr->getOperand(0).getImm());
- }
- if (OldWait.dominates(Wait))
- return false;
+ for (auto II = OldWaitcntInstr->getIterator(), NextI = std::next(II);
+ &*II != &MI; II = NextI, NextI++) {
+ if (II->isDebugInstr())
+ continue;
- if (OldWaitcntInstr && !TrackedWaitcntSet.count(OldWaitcntInstr))
- Wait = Wait.combined(OldWait);
+ if (II->getOpcode() == AMDGPU::S_WAITCNT) {
+ unsigned IEnc = II->getOperand(0).getImm();
+ AMDGPU::Waitcnt IWait = AMDGPU::decodeWaitcnt(IV, IEnc);
+ OldWait = OldWait.combined(IWait);
+ if (!TrackedWaitcntSet.count(&*II))
+ Wait = Wait.combined(IWait);
+ unsigned NewEnc = AMDGPU::encodeWaitcnt(IV, Wait);
+ if (IEnc != NewEnc) {
+ II->getOperand(0).setImm(NewEnc);
+ Modified = true;
+ }
+ Wait.VmCnt = ~0u;
+ Wait.LgkmCnt = ~0u;
+ Wait.ExpCnt = ~0u;
+ } else {
+ assert(II->getOpcode() == AMDGPU::S_WAITCNT_VSCNT);
+ assert(II->getOperand(0).getReg() == AMDGPU::SGPR_NULL);
+
+ unsigned ICnt = II->getOperand(1).getImm();
+ OldWait.VsCnt = std::min(OldWait.VsCnt, ICnt);
+ if (!TrackedWaitcntSet.count(&*II))
+ Wait.VsCnt = std::min(Wait.VsCnt, ICnt);
+ if (Wait.VsCnt != ICnt) {
+ II->getOperand(1).setImm(Wait.VsCnt);
+ Modified = true;
+ }
+ Wait.VsCnt = ~0u;
+ }
- unsigned Enc = AMDGPU::encodeWaitcnt(IV, Wait);
- if (OldWaitcntInstr) {
- OldWaitcntInstr->getOperand(0).setImm(Enc);
+ LLVM_DEBUG(dbgs() << "updateWaitcntInBlock\n"
+ << "Old Instr: " << MI << '\n'
+ << "New Instr: " << *II << '\n');
- LLVM_DEBUG(dbgs() << "updateWaitcntInBlock\n"
- << "Old Instr: " << MI << '\n'
- << "New Instr: " << *OldWaitcntInstr << '\n');
- } else {
+ if (!Wait.hasWait())
+ return Modified;
+ }
+ }
+
+ if (Wait.VmCnt != ~0u || Wait.LgkmCnt != ~0u || Wait.ExpCnt != ~0u) {
+ unsigned Enc = AMDGPU::encodeWaitcnt(IV, Wait);
auto SWaitInst = BuildMI(*MI.getParent(), MI.getIterator(),
MI.getDebugLoc(), TII->get(AMDGPU::S_WAITCNT))
.addImm(Enc);
TrackedWaitcntSet.insert(SWaitInst);
+ Modified = true;
LLVM_DEBUG(dbgs() << "insertWaitcntInBlock\n"
<< "Old Instr: " << MI << '\n'
<< "New Instr: " << *SWaitInst << '\n');
}
- return true;
+ if (Wait.VsCnt != ~0u) {
+ assert(ST->hasVscnt());
+
+ auto SWaitInst =
+ BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(),
+ TII->get(AMDGPU::S_WAITCNT_VSCNT))
+ .addReg(AMDGPU::SGPR_NULL, RegState::Undef)
+ .addImm(Wait.VsCnt);
+ TrackedWaitcntSet.insert(SWaitInst);
+ Modified = true;
+
+ LLVM_DEBUG(dbgs() << "insertWaitcntInBlock\n"
+ << "Old Instr: " << MI << '\n'
+ << "New Instr: " << *SWaitInst << '\n');
+ }
+
+ return Modified;
}
// This is a flat memory operation. Check to see if it has memory
@@ -1093,7 +1187,8 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
// bracket and the destination operand scores.
// TODO: Use the (TSFlags & SIInstrFlags::LGKM_CNT) property everywhere.
if (TII->isDS(Inst) && TII->usesLGKM_CNT(Inst)) {
- if (TII->hasModifiersSet(Inst, AMDGPU::OpName::gds)) {
+ if (TII->isAlwaysGDS(Inst.getOpcode()) ||
+ TII->hasModifiersSet(Inst, AMDGPU::OpName::gds)) {
ScoreBrackets->updateByEvent(TII, TRI, MRI, GDS_ACCESS, Inst);
ScoreBrackets->updateByEvent(TII, TRI, MRI, GDS_GPR_LOCK, Inst);
} else {
@@ -1102,8 +1197,15 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
} else if (TII->isFLAT(Inst)) {
assert(Inst.mayLoad() || Inst.mayStore());
- if (TII->usesVM_CNT(Inst))
- ScoreBrackets->updateByEvent(TII, TRI, MRI, VMEM_ACCESS, Inst);
+ if (TII->usesVM_CNT(Inst)) {
+ if (!ST->hasVscnt())
+ ScoreBrackets->updateByEvent(TII, TRI, MRI, VMEM_ACCESS, Inst);
+ else if (Inst.mayLoad() &&
+ AMDGPU::getAtomicRetOp(Inst.getOpcode()) == -1)
+ ScoreBrackets->updateByEvent(TII, TRI, MRI, VMEM_READ_ACCESS, Inst);
+ else
+ ScoreBrackets->updateByEvent(TII, TRI, MRI, VMEM_WRITE_ACCESS, Inst);
+ }
if (TII->usesLGKM_CNT(Inst)) {
ScoreBrackets->updateByEvent(TII, TRI, MRI, LDS_ACCESS, Inst);
@@ -1118,14 +1220,33 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
// TODO: get a better carve out.
Inst.getOpcode() != AMDGPU::BUFFER_WBINVL1 &&
Inst.getOpcode() != AMDGPU::BUFFER_WBINVL1_SC &&
- Inst.getOpcode() != AMDGPU::BUFFER_WBINVL1_VOL) {
- ScoreBrackets->updateByEvent(TII, TRI, MRI, VMEM_ACCESS, Inst);
+ Inst.getOpcode() != AMDGPU::BUFFER_WBINVL1_VOL &&
+ Inst.getOpcode() != AMDGPU::BUFFER_GL0_INV &&
+ Inst.getOpcode() != AMDGPU::BUFFER_GL1_INV) {
+ if (!ST->hasVscnt())
+ ScoreBrackets->updateByEvent(TII, TRI, MRI, VMEM_ACCESS, Inst);
+ else if ((Inst.mayLoad() &&
+ AMDGPU::getAtomicRetOp(Inst.getOpcode()) == -1) ||
+ /* IMAGE_GET_RESINFO / IMAGE_GET_LOD */
+ (TII->isMIMG(Inst) && !Inst.mayLoad() && !Inst.mayStore()))
+ ScoreBrackets->updateByEvent(TII, TRI, MRI, VMEM_READ_ACCESS, Inst);
+ else if (Inst.mayStore())
+ ScoreBrackets->updateByEvent(TII, TRI, MRI, VMEM_WRITE_ACCESS, Inst);
+
if (ST->vmemWriteNeedsExpWaitcnt() &&
(Inst.mayStore() || AMDGPU::getAtomicNoRetOp(Inst.getOpcode()) != -1)) {
ScoreBrackets->updateByEvent(TII, TRI, MRI, VMW_GPR_LOCK, Inst);
}
} else if (TII->isSMRD(Inst)) {
ScoreBrackets->updateByEvent(TII, TRI, MRI, SMEM_ACCESS, Inst);
+ } else if (Inst.isCall()) {
+ if (callWaitsOnFunctionReturn(Inst)) {
+ // Act as a wait on everything
+ ScoreBrackets->applyWaitcnt(AMDGPU::Waitcnt::allZero(IV));
+ } else {
+ // May need to way wait for anything.
+ ScoreBrackets->applyWaitcnt(AMDGPU::Waitcnt());
+ }
} else {
switch (Inst.getOpcode()) {
case AMDGPU::S_SENDMSG:
@@ -1236,31 +1357,18 @@ bool SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
// Walk over the instructions.
MachineInstr *OldWaitcntInstr = nullptr;
- for (MachineBasicBlock::iterator Iter = Block.begin(), E = Block.end();
+ for (MachineBasicBlock::instr_iterator Iter = Block.instr_begin(),
+ E = Block.instr_end();
Iter != E;) {
MachineInstr &Inst = *Iter;
- // Remove any previously existing waitcnts.
- if (Inst.getOpcode() == AMDGPU::S_WAITCNT) {
- if (OldWaitcntInstr) {
- if (TrackedWaitcntSet.count(OldWaitcntInstr)) {
- TrackedWaitcntSet.erase(OldWaitcntInstr);
- OldWaitcntInstr->eraseFromParent();
- OldWaitcntInstr = nullptr;
- } else if (!TrackedWaitcntSet.count(&Inst)) {
- // Two successive s_waitcnt's, both of which are pre-existing and
- // are therefore preserved.
- int64_t Imm = OldWaitcntInstr->getOperand(0).getImm();
- ScoreBrackets.applyWaitcnt(AMDGPU::decodeWaitcnt(IV, Imm));
- } else {
- ++Iter;
- Inst.eraseFromParent();
- Modified = true;
- continue;
- }
- }
-
- OldWaitcntInstr = &Inst;
+ // Track pre-existing waitcnts from earlier iterations.
+ if (Inst.getOpcode() == AMDGPU::S_WAITCNT ||
+ (Inst.getOpcode() == AMDGPU::S_WAITCNT_VSCNT &&
+ Inst.getOperand(0).isReg() &&
+ Inst.getOperand(0).getReg() == AMDGPU::SGPR_NULL)) {
+ if (!OldWaitcntInstr)
+ OldWaitcntInstr = &Inst;
++Iter;
continue;
}
@@ -1299,27 +1407,16 @@ bool SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
ScoreBrackets.dump();
});
- // Check to see if this is a GWS instruction. If so, and if this is CI or
- // VI, then the generated code sequence will include an S_WAITCNT 0.
- // TODO: Are these the only GWS instructions?
- if (Inst.getOpcode() == AMDGPU::DS_GWS_INIT ||
- Inst.getOpcode() == AMDGPU::DS_GWS_SEMA_V ||
- Inst.getOpcode() == AMDGPU::DS_GWS_SEMA_BR ||
- Inst.getOpcode() == AMDGPU::DS_GWS_SEMA_P ||
- Inst.getOpcode() == AMDGPU::DS_GWS_BARRIER) {
- // TODO: && context->target_info->GwsRequiresMemViolTest() ) {
- ScoreBrackets.applyWaitcnt(AMDGPU::Waitcnt::allZero());
- }
-
// TODO: Remove this work-around after fixing the scheduler and enable the
// assert above.
if (VCCZBugWorkAround) {
// Restore the vccz bit. Any time a value is written to vcc, the vcc
// bit is updated, so we can restore the bit by reading the value of
// vcc and then writing it back to the register.
- BuildMI(Block, Inst, Inst.getDebugLoc(), TII->get(AMDGPU::S_MOV_B64),
- AMDGPU::VCC)
- .addReg(AMDGPU::VCC);
+ BuildMI(Block, Inst, Inst.getDebugLoc(),
+ TII->get(ST->isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64),
+ TRI->getVCC())
+ .addReg(TRI->getVCC());
VCCZBugHandledSet.insert(&Inst);
Modified = true;
}
@@ -1345,6 +1442,7 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
HardwareLimits.VmcntMax = AMDGPU::getVmcntBitMask(IV);
HardwareLimits.ExpcntMax = AMDGPU::getExpcntBitMask(IV);
HardwareLimits.LgkmcntMax = AMDGPU::getLgkmcntBitMask(IV);
+ HardwareLimits.VscntMax = ST->hasVscnt() ? 63 : 0;
HardwareLimits.NumVGPRsMax = ST->getAddressableNumVGPRs();
HardwareLimits.NumSGPRsMax = ST->getAddressableNumSGPRs();
@@ -1480,6 +1578,11 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
// TODO: Could insert earlier and schedule more liberally with operations
// that only use caller preserved registers.
MachineBasicBlock &EntryBB = MF.front();
+ if (ST->hasVscnt())
+ BuildMI(EntryBB, EntryBB.getFirstNonPHI(), DebugLoc(),
+ TII->get(AMDGPU::S_WAITCNT_VSCNT))
+ .addReg(AMDGPU::SGPR_NULL, RegState::Undef)
+ .addImm(0);
BuildMI(EntryBB, EntryBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_WAITCNT))
.addImm(0);
diff --git a/lib/Target/AMDGPU/SIInstrFormats.td b/lib/Target/AMDGPU/SIInstrFormats.td
index 65ffc27b8b60..561a16c3e351 100644
--- a/lib/Target/AMDGPU/SIInstrFormats.td
+++ b/lib/Target/AMDGPU/SIInstrFormats.td
@@ -1,9 +1,8 @@
//===-- SIInstrFormats.td - SI Instruction Encodings ----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -11,19 +10,9 @@
//
//===----------------------------------------------------------------------===//
-def isGCN : Predicate<"Subtarget->getGeneration() "
- ">= AMDGPUSubtarget::SOUTHERN_ISLANDS">,
- AssemblerPredicate<"FeatureGCN">;
-def isSI : Predicate<"Subtarget->getGeneration() "
- "== AMDGPUSubtarget::SOUTHERN_ISLANDS">,
- AssemblerPredicate<"FeatureSouthernIslands">;
-
-
class InstSI <dag outs, dag ins, string asm = "",
list<dag> pattern = []> :
AMDGPUInst<outs, ins, asm, pattern>, GCNPredicateControl {
- let SubtargetPredicate = isGCN;
-
// Low bits - basic encoding information.
field bit SALU = 0;
field bit VALU = 0;
@@ -121,10 +110,20 @@ class InstSI <dag outs, dag ins, string asm = "",
// This bit indicates that this is a D16 buffer instruction.
field bit D16Buf = 0;
+ // This field indicates that FLAT instruction accesses FLAT_GLBL or
+ // FLAT_SCRATCH segment. Must be 0 for non-FLAT instructions.
+ field bit IsNonFlatSeg = 0;
+
// This bit indicates that this uses the floating point double precision
// rounding mode flags
field bit FPDPRounding = 0;
+ // Instruction is FP atomic.
+ field bit FPAtomic = 0;
+
+ // This bit indicates that this is one of MFMA instructions.
+ field bit IsMAI = 0;
+
// These need to be kept in sync with the enum in SIInstrFlags.
let TSFlags{0} = SALU;
let TSFlags{1} = VALU;
@@ -182,7 +181,13 @@ class InstSI <dag outs, dag ins, string asm = "",
let TSFlags{50} = D16Buf;
- let TSFlags{51} = FPDPRounding;
+ let TSFlags{51} = IsNonFlatSeg;
+
+ let TSFlags{52} = FPDPRounding;
+
+ let TSFlags{53} = FPAtomic;
+
+ let TSFlags{54} = IsMAI;
let SchedRW = [Write32Bit];
@@ -251,38 +256,59 @@ class VINTRPe <bits<2> op> : Enc32 {
let Inst{31-26} = 0x32; // encoding
}
-class MIMGe <bits<7> op> : Enc64 {
+class MIMGe : Enc64 {
bits<8> vdata;
bits<4> dmask;
bits<1> unorm;
bits<1> glc;
- bits<1> da;
bits<1> r128;
bits<1> tfe;
bits<1> lwe;
bits<1> slc;
bit d16;
- bits<8> vaddr;
bits<7> srsrc;
bits<7> ssamp;
let Inst{11-8} = dmask;
let Inst{12} = unorm;
let Inst{13} = glc;
- let Inst{14} = da;
let Inst{15} = r128;
let Inst{16} = tfe;
let Inst{17} = lwe;
- let Inst{24-18} = op;
let Inst{25} = slc;
let Inst{31-26} = 0x3c;
- let Inst{39-32} = vaddr;
let Inst{47-40} = vdata;
let Inst{52-48} = srsrc{6-2};
let Inst{57-53} = ssamp{6-2};
let Inst{63} = d16;
}
+class MIMGe_gfx6789 <bits<8> op> : MIMGe {
+ bits<8> vaddr;
+ bits<1> da;
+
+ let Inst{0} = op{7};
+ let Inst{14} = da;
+ let Inst{24-18} = op{6-0};
+ let Inst{39-32} = vaddr;
+}
+
+class MIMGe_gfx10 <bits<8> op> : MIMGe {
+ bits<8> vaddr0;
+ bits<3> dim;
+ bits<2> nsa;
+ bits<1> dlc;
+ bits<1> a16 = 0; // TODO: this should be an operand
+
+ let Inst{0} = op{7};
+ let Inst{2-1} = nsa;
+ let Inst{5-3} = dim;
+ let Inst{7} = dlc;
+ let Inst{24-18} = op{6-0};
+ let Inst{39-32} = vaddr0;
+ let Inst{62} = a16;
+}
+
class EXPe : Enc64 {
bits<4> en;
bits<6> tgt;
diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp
index 2370d5fa7b27..ba8ed6993a56 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -1,9 +1,8 @@
//===- SIInstrInfo.cpp - SI Instruction Information ----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -14,7 +13,6 @@
#include "SIInstrInfo.h"
#include "AMDGPU.h"
-#include "AMDGPUIntrinsicInfo.h"
#include "AMDGPUSubtarget.h"
#include "GCNHazardRecognizer.h"
#include "SIDefines.h"
@@ -100,12 +98,6 @@ static unsigned getNumOperandsNoGlue(SDNode *Node) {
return N;
}
-static SDValue findChainOperand(SDNode *Load) {
- SDValue LastOp = Load->getOperand(getNumOperandsNoGlue(Load) - 1);
- assert(LastOp.getValueType() == MVT::Other && "Chain missing from load node");
- return LastOp;
-}
-
/// Returns true if both nodes have the same value for the given
/// operand \p Op, or if both nodes do not have this operand.
static bool nodesHaveSameOperandValue(SDNode *N0, SDNode* N1, unsigned OpName) {
@@ -142,7 +134,8 @@ bool SIInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
case AMDGPU::V_MOV_B32_e32:
case AMDGPU::V_MOV_B32_e64:
case AMDGPU::V_MOV_B64_PSEUDO:
- return true;
+ // No implicit operands.
+ return MI.getNumOperands() == MI.getDesc().getNumOperands();
default:
return false;
}
@@ -168,22 +161,25 @@ bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1,
return false;
// Check base reg.
- if (Load0->getOperand(1) != Load1->getOperand(1))
- return false;
-
- // Check chain.
- if (findChainOperand(Load0) != findChainOperand(Load1))
+ if (Load0->getOperand(0) != Load1->getOperand(0))
return false;
// Skip read2 / write2 variants for simplicity.
// TODO: We should report true if the used offsets are adjacent (excluded
// st64 versions).
- if (AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::data1) != -1 ||
- AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::data1) != -1)
+ int Offset0Idx = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
+ int Offset1Idx = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
+ if (Offset0Idx == -1 || Offset1Idx == -1)
return false;
- Offset0 = cast<ConstantSDNode>(Load0->getOperand(2))->getZExtValue();
- Offset1 = cast<ConstantSDNode>(Load1->getOperand(2))->getZExtValue();
+ // XXX - be careful of datalesss loads
+ // getNamedOperandIdx returns the index for MachineInstrs. Since they
+ // include the output in the operand list, but SDNodes don't, we need to
+ // subtract the index by one.
+ Offset0Idx -= get(Opc0).NumDefs;
+ Offset1Idx -= get(Opc1).NumDefs;
+ Offset0 = cast<ConstantSDNode>(Load0->getOperand(Offset0Idx))->getZExtValue();
+ Offset1 = cast<ConstantSDNode>(Load1->getOperand(Offset1Idx))->getZExtValue();
return true;
}
@@ -207,10 +203,6 @@ bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1,
if (!Load0Offset || !Load1Offset)
return false;
- // Check chain.
- if (findChainOperand(Load0) != findChainOperand(Load1))
- return false;
-
Offset0 = Load0Offset->getZExtValue();
Offset1 = Load1Offset->getZExtValue();
return true;
@@ -221,7 +213,6 @@ bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1,
// MUBUF and MTBUF have vaddr at different indices.
if (!nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::soffset) ||
- findChainOperand(Load0) != findChainOperand(Load1) ||
!nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::vaddr) ||
!nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::srsrc))
return false;
@@ -233,10 +224,10 @@ bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1,
return false;
// getNamedOperandIdx returns the index for MachineInstrs. Since they
- // inlcude the output in the operand list, but SDNodes don't, we need to
+ // include the output in the operand list, but SDNodes don't, we need to
// subtract the index by one.
- --OffIdx0;
- --OffIdx1;
+ OffIdx0 -= get(Opc0).NumDefs;
+ OffIdx1 -= get(Opc1).NumDefs;
SDValue Off0 = Load0->getOperand(OffIdx0);
SDValue Off1 = Load1->getOperand(OffIdx1);
@@ -265,8 +256,8 @@ static bool isStride64(unsigned Opc) {
}
}
-bool SIInstrInfo::getMemOperandWithOffset(MachineInstr &LdSt,
- MachineOperand *&BaseOp,
+bool SIInstrInfo::getMemOperandWithOffset(const MachineInstr &LdSt,
+ const MachineOperand *&BaseOp,
int64_t &Offset,
const TargetRegisterInfo *TRI) const {
unsigned Opc = LdSt.getOpcode();
@@ -277,6 +268,11 @@ bool SIInstrInfo::getMemOperandWithOffset(MachineInstr &LdSt,
if (OffsetImm) {
// Normal, single offset LDS instruction.
BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::addr);
+ // TODO: ds_consume/ds_append use M0 for the base address. Is it safe to
+ // report that here?
+ if (!BaseOp)
+ return false;
+
Offset = OffsetImm->getImm();
assert(BaseOp->isReg() && "getMemOperandWithOffset only supports base "
"operands of type register.");
@@ -325,7 +321,7 @@ bool SIInstrInfo::getMemOperandWithOffset(MachineInstr &LdSt,
if (SOffset && SOffset->isReg())
return false;
- MachineOperand *AddrReg = getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
+ const MachineOperand *AddrReg = getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
if (!AddrReg)
return false;
@@ -348,7 +344,7 @@ bool SIInstrInfo::getMemOperandWithOffset(MachineInstr &LdSt,
if (!OffsetImm)
return false;
- MachineOperand *SBaseReg = getNamedOperand(LdSt, AMDGPU::OpName::sbase);
+ const MachineOperand *SBaseReg = getNamedOperand(LdSt, AMDGPU::OpName::sbase);
BaseOp = SBaseReg;
Offset = OffsetImm->getImm();
assert(BaseOp->isReg() && "getMemOperandWithOffset only supports base "
@@ -357,7 +353,7 @@ bool SIInstrInfo::getMemOperandWithOffset(MachineInstr &LdSt,
}
if (isFLAT(LdSt)) {
- MachineOperand *VAddr = getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
+ const MachineOperand *VAddr = getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
if (VAddr) {
// Can't analyze 2 offsets.
if (getNamedOperand(LdSt, AMDGPU::OpName::saddr))
@@ -413,11 +409,11 @@ static bool memOpsHaveSameBasePtr(const MachineInstr &MI1,
return Base1 == Base2;
}
-bool SIInstrInfo::shouldClusterMemOps(MachineOperand &BaseOp1,
- MachineOperand &BaseOp2,
+bool SIInstrInfo::shouldClusterMemOps(const MachineOperand &BaseOp1,
+ const MachineOperand &BaseOp2,
unsigned NumLoads) const {
- MachineInstr &FirstLdSt = *BaseOp1.getParent();
- MachineInstr &SecondLdSt = *BaseOp2.getParent();
+ const MachineInstr &FirstLdSt = *BaseOp1.getParent();
+ const MachineInstr &SecondLdSt = *BaseOp2.getParent();
if (!memOpsHaveSameBasePtr(FirstLdSt, BaseOp1, SecondLdSt, BaseOp2))
return false;
@@ -461,7 +457,12 @@ bool SIInstrInfo::shouldClusterMemOps(MachineOperand &BaseOp1,
const MachineRegisterInfo &MRI =
FirstLdSt.getParent()->getParent()->getRegInfo();
- const TargetRegisterClass *DstRC = MRI.getRegClass(FirstDst->getReg());
+
+ const unsigned Reg = FirstDst->getReg();
+
+ const TargetRegisterClass *DstRC = TargetRegisterInfo::isVirtualRegister(Reg)
+ ? MRI.getRegClass(Reg)
+ : RI.getPhysRegClass(Reg);
return (NumLoads * (RI.getRegSizeInBits(*DstRC) / 8)) <= LoadClusterThreshold;
}
@@ -511,8 +512,11 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
if (RC == &AMDGPU::VGPR_32RegClass) {
assert(AMDGPU::VGPR_32RegClass.contains(SrcReg) ||
- AMDGPU::SReg_32RegClass.contains(SrcReg));
- BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
+ AMDGPU::SReg_32RegClass.contains(SrcReg) ||
+ AMDGPU::AGPR_32RegClass.contains(SrcReg));
+ unsigned Opc = AMDGPU::AGPR_32RegClass.contains(SrcReg) ?
+ AMDGPU::V_ACCVGPR_READ_B32 : AMDGPU::V_MOV_B32_e32;
+ BuildMI(MBB, MI, DL, get(Opc), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
return;
}
@@ -526,6 +530,21 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
return;
}
+ if (DestReg == AMDGPU::VCC_LO) {
+ if (AMDGPU::SReg_32RegClass.contains(SrcReg)) {
+ BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), AMDGPU::VCC_LO)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ } else {
+ // FIXME: Hack until VReg_1 removed.
+ assert(AMDGPU::VGPR_32RegClass.contains(SrcReg));
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_U32_e32))
+ .addImm(0)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ }
+
+ return;
+ }
+
if (!AMDGPU::SReg_32RegClass.contains(SrcReg)) {
reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc);
return;
@@ -570,10 +589,83 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
return;
}
+ if (RC == &AMDGPU::AGPR_32RegClass) {
+ assert(AMDGPU::VGPR_32RegClass.contains(SrcReg) ||
+ AMDGPU::SReg_32RegClass.contains(SrcReg) ||
+ AMDGPU::AGPR_32RegClass.contains(SrcReg));
+ if (!AMDGPU::VGPR_32RegClass.contains(SrcReg)) {
+ // First try to find defining accvgpr_write to avoid temporary registers.
+ for (auto Def = MI, E = MBB.begin(); Def != E; ) {
+ --Def;
+ if (!Def->definesRegister(SrcReg, &RI))
+ continue;
+ if (Def->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32)
+ break;
+
+ MachineOperand &DefOp = Def->getOperand(1);
+ assert(DefOp.isReg() || DefOp.isImm());
+
+ if (DefOp.isReg()) {
+ // Check that register source operand if not clobbered before MI.
+ // Immediate operands are always safe to propagate.
+ bool SafeToPropagate = true;
+ for (auto I = Def; I != MI && SafeToPropagate; ++I)
+ if (I->modifiesRegister(DefOp.getReg(), &RI))
+ SafeToPropagate = false;
+
+ if (!SafeToPropagate)
+ break;
+
+ DefOp.setIsKill(false);
+ }
+
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_ACCVGPR_WRITE_B32), DestReg)
+ .add(DefOp);
+ return;
+ }
+
+ RegScavenger RS;
+ RS.enterBasicBlock(MBB);
+ RS.forward(MI);
+
+ // Ideally we want to have three registers for a long reg_sequence copy
+ // to hide 2 waitstates between v_mov_b32 and accvgpr_write.
+ unsigned MaxVGPRs = RI.getRegPressureLimit(&AMDGPU::VGPR_32RegClass,
+ *MBB.getParent());
+
+ // Registers in the sequence are allocated contiguously so we can just
+ // use register number to pick one of three round-robin temps.
+ unsigned RegNo = DestReg % 3;
+ unsigned Tmp = RS.scavengeRegister(&AMDGPU::VGPR_32RegClass, 0);
+ if (!Tmp)
+ report_fatal_error("Cannot scavenge VGPR to copy to AGPR");
+ RS.setRegUsed(Tmp);
+ // Only loop through if there are any free registers left, otherwise
+ // scavenger may report a fatal error without emergency spill slot
+ // or spill with the slot.
+ while (RegNo-- && RS.FindUnusedReg(&AMDGPU::VGPR_32RegClass)) {
+ unsigned Tmp2 = RS.scavengeRegister(&AMDGPU::VGPR_32RegClass, 0);
+ if (!Tmp2 || RI.getHWRegIndex(Tmp2) >= MaxVGPRs)
+ break;
+ Tmp = Tmp2;
+ RS.setRegUsed(Tmp);
+ }
+ copyPhysReg(MBB, MI, DL, Tmp, SrcReg, KillSrc);
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_ACCVGPR_WRITE_B32), DestReg)
+ .addReg(Tmp, RegState::Kill);
+ return;
+ }
+
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_ACCVGPR_WRITE_B32), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
+
unsigned EltSize = 4;
unsigned Opcode = AMDGPU::V_MOV_B32_e32;
if (RI.isSGPRClass(RC)) {
- if (RI.getRegSizeInBits(*RC) > 32) {
+ // TODO: Copy vec3/vec5 with s_mov_b64s then final s_mov_b32.
+ if (!(RI.getRegSizeInBits(*RC) % 64)) {
Opcode = AMDGPU::S_MOV_B64;
EltSize = 8;
} else {
@@ -585,6 +677,11 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc);
return;
}
+ } else if (RI.hasAGPRs(RC)) {
+ Opcode = RI.hasVGPRs(RI.getPhysRegClass(SrcReg)) ?
+ AMDGPU::V_ACCVGPR_WRITE_B32 : AMDGPU::COPY;
+ } else if (RI.hasVGPRs(RC) && RI.hasAGPRs(RI.getPhysRegClass(SrcReg))) {
+ Opcode = AMDGPU::V_ACCVGPR_READ_B32;
}
ArrayRef<int16_t> SubIndices = RI.getRegSplitParts(RC, EltSize);
@@ -597,6 +694,12 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
else
SubIdx = SubIndices[SubIndices.size() - Idx - 1];
+ if (Opcode == TargetOpcode::COPY) {
+ copyPhysReg(MBB, MI, DL, RI.getSubReg(DestReg, SubIdx),
+ RI.getSubReg(SrcReg, SubIdx), KillSrc);
+ continue;
+ }
+
MachineInstrBuilder Builder = BuildMI(MBB, MI, DL,
get(Opcode), RI.getSubReg(DestReg, SubIdx));
@@ -696,38 +799,50 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB,
unsigned TrueReg,
unsigned FalseReg) const {
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ MachineFunction *MF = MBB.getParent();
+ const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
+ const TargetRegisterClass *BoolXExecRC =
+ RI.getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
assert(MRI.getRegClass(DstReg) == &AMDGPU::VGPR_32RegClass &&
"Not a VGPR32 reg");
if (Cond.size() == 1) {
- unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
+ unsigned SReg = MRI.createVirtualRegister(BoolXExecRC);
BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg)
.add(Cond[0]);
BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
+ .addImm(0)
.addReg(FalseReg)
+ .addImm(0)
.addReg(TrueReg)
.addReg(SReg);
} else if (Cond.size() == 2) {
assert(Cond[0].isImm() && "Cond[0] is not an immediate");
switch (Cond[0].getImm()) {
case SIInstrInfo::SCC_TRUE: {
- unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
- BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg)
+ unsigned SReg = MRI.createVirtualRegister(BoolXExecRC);
+ BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_CSELECT_B32
+ : AMDGPU::S_CSELECT_B64), SReg)
.addImm(-1)
.addImm(0);
BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
+ .addImm(0)
.addReg(FalseReg)
+ .addImm(0)
.addReg(TrueReg)
.addReg(SReg);
break;
}
case SIInstrInfo::SCC_FALSE: {
- unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
- BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg)
+ unsigned SReg = MRI.createVirtualRegister(BoolXExecRC);
+ BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_CSELECT_B32
+ : AMDGPU::S_CSELECT_B64), SReg)
.addImm(0)
.addImm(-1);
BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
+ .addImm(0)
.addReg(FalseReg)
+ .addImm(0)
.addReg(TrueReg)
.addReg(SReg);
break;
@@ -735,11 +850,13 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB,
case SIInstrInfo::VCCNZ: {
MachineOperand RegOp = Cond[1];
RegOp.setImplicit(false);
- unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
+ unsigned SReg = MRI.createVirtualRegister(BoolXExecRC);
BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg)
.add(RegOp);
BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
+ .addImm(0)
.addReg(FalseReg)
+ .addImm(0)
.addReg(TrueReg)
.addReg(SReg);
break;
@@ -747,39 +864,49 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB,
case SIInstrInfo::VCCZ: {
MachineOperand RegOp = Cond[1];
RegOp.setImplicit(false);
- unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
+ unsigned SReg = MRI.createVirtualRegister(BoolXExecRC);
BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg)
.add(RegOp);
BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
+ .addImm(0)
.addReg(TrueReg)
+ .addImm(0)
.addReg(FalseReg)
.addReg(SReg);
break;
}
case SIInstrInfo::EXECNZ: {
- unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
- unsigned SReg2 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
- BuildMI(MBB, I, DL, get(AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
+ unsigned SReg = MRI.createVirtualRegister(BoolXExecRC);
+ unsigned SReg2 = MRI.createVirtualRegister(RI.getBoolRC());
+ BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32
+ : AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
.addImm(0);
- BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg)
+ BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_CSELECT_B32
+ : AMDGPU::S_CSELECT_B64), SReg)
.addImm(-1)
.addImm(0);
BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
+ .addImm(0)
.addReg(FalseReg)
+ .addImm(0)
.addReg(TrueReg)
.addReg(SReg);
break;
}
case SIInstrInfo::EXECZ: {
- unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
- unsigned SReg2 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
- BuildMI(MBB, I, DL, get(AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
+ unsigned SReg = MRI.createVirtualRegister(BoolXExecRC);
+ unsigned SReg2 = MRI.createVirtualRegister(RI.getBoolRC());
+ BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32
+ : AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
.addImm(0);
- BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg)
+ BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_CSELECT_B32
+ : AMDGPU::S_CSELECT_B64), SReg)
.addImm(0)
.addImm(-1);
BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
+ .addImm(0)
.addReg(FalseReg)
+ .addImm(0)
.addReg(TrueReg)
.addReg(SReg);
llvm_unreachable("Unhandled branch predicate EXECZ");
@@ -798,7 +925,7 @@ unsigned SIInstrInfo::insertEQ(MachineBasicBlock *MBB,
const DebugLoc &DL,
unsigned SrcReg, int Value) const {
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
- unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ unsigned Reg = MRI.createVirtualRegister(RI.getBoolRC());
BuildMI(*MBB, I, DL, get(AMDGPU::V_CMP_EQ_I32_e64), Reg)
.addImm(Value)
.addReg(SrcReg);
@@ -811,7 +938,7 @@ unsigned SIInstrInfo::insertNE(MachineBasicBlock *MBB,
const DebugLoc &DL,
unsigned SrcReg, int Value) const {
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
- unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ unsigned Reg = MRI.createVirtualRegister(RI.getBoolRC());
BuildMI(*MBB, I, DL, get(AMDGPU::V_CMP_NE_I32_e64), Reg)
.addImm(Value)
.addReg(SrcReg);
@@ -821,6 +948,8 @@ unsigned SIInstrInfo::insertNE(MachineBasicBlock *MBB,
unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const {
+ if (RI.hasAGPRs(DstRC))
+ return AMDGPU::COPY;
if (RI.getRegSizeInBits(*DstRC) == 32) {
return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
} else if (RI.getRegSizeInBits(*DstRC) == 64 && RI.isSGPRClass(DstRC)) {
@@ -837,12 +966,18 @@ static unsigned getSGPRSpillSaveOpcode(unsigned Size) {
return AMDGPU::SI_SPILL_S32_SAVE;
case 8:
return AMDGPU::SI_SPILL_S64_SAVE;
+ case 12:
+ return AMDGPU::SI_SPILL_S96_SAVE;
case 16:
return AMDGPU::SI_SPILL_S128_SAVE;
+ case 20:
+ return AMDGPU::SI_SPILL_S160_SAVE;
case 32:
return AMDGPU::SI_SPILL_S256_SAVE;
case 64:
return AMDGPU::SI_SPILL_S512_SAVE;
+ case 128:
+ return AMDGPU::SI_SPILL_S1024_SAVE;
default:
llvm_unreachable("unknown register size");
}
@@ -858,10 +993,31 @@ static unsigned getVGPRSpillSaveOpcode(unsigned Size) {
return AMDGPU::SI_SPILL_V96_SAVE;
case 16:
return AMDGPU::SI_SPILL_V128_SAVE;
+ case 20:
+ return AMDGPU::SI_SPILL_V160_SAVE;
case 32:
return AMDGPU::SI_SPILL_V256_SAVE;
case 64:
return AMDGPU::SI_SPILL_V512_SAVE;
+ case 128:
+ return AMDGPU::SI_SPILL_V1024_SAVE;
+ default:
+ llvm_unreachable("unknown register size");
+ }
+}
+
+static unsigned getAGPRSpillSaveOpcode(unsigned Size) {
+ switch (Size) {
+ case 4:
+ return AMDGPU::SI_SPILL_A32_SAVE;
+ case 8:
+ return AMDGPU::SI_SPILL_A64_SAVE;
+ case 16:
+ return AMDGPU::SI_SPILL_A128_SAVE;
+ case 64:
+ return AMDGPU::SI_SPILL_A512_SAVE;
+ case 128:
+ return AMDGPU::SI_SPILL_A1024_SAVE;
default:
llvm_unreachable("unknown register size");
}
@@ -906,12 +1062,12 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
.addFrameIndex(FrameIndex) // addr
.addMemOperand(MMO)
.addReg(MFI->getScratchRSrcReg(), RegState::Implicit)
- .addReg(MFI->getFrameOffsetReg(), RegState::Implicit);
+ .addReg(MFI->getStackPtrOffsetReg(), RegState::Implicit);
// Add the scratch resource registers as implicit uses because we may end up
// needing them, and need to ensure that the reserved registers are
// correctly handled.
-
- FrameInfo.setStackID(FrameIndex, SIStackID::SGPR_SPILL);
+ if (RI.spillSGPRToVGPR())
+ FrameInfo.setStackID(FrameIndex, TargetStackID::SGPRSpill);
if (ST.hasScalarStores()) {
// m0 is used for offset to scalar stores if used to spill.
Spill.addReg(AMDGPU::M0, RegState::ImplicitDefine | RegState::Dead);
@@ -920,17 +1076,22 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
return;
}
- assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected");
-
- unsigned Opcode = getVGPRSpillSaveOpcode(SpillSize);
+ unsigned Opcode = RI.hasAGPRs(RC) ? getAGPRSpillSaveOpcode(SpillSize)
+ : getVGPRSpillSaveOpcode(SpillSize);
MFI->setHasSpilledVGPRs();
- BuildMI(MBB, MI, DL, get(Opcode))
- .addReg(SrcReg, getKillRegState(isKill)) // data
- .addFrameIndex(FrameIndex) // addr
- .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc
- .addReg(MFI->getFrameOffsetReg()) // scratch_offset
- .addImm(0) // offset
- .addMemOperand(MMO);
+
+ auto MIB = BuildMI(MBB, MI, DL, get(Opcode));
+ if (RI.hasAGPRs(RC)) {
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ unsigned Tmp = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ MIB.addReg(Tmp, RegState::Define);
+ }
+ MIB.addReg(SrcReg, getKillRegState(isKill)) // data
+ .addFrameIndex(FrameIndex) // addr
+ .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc
+ .addReg(MFI->getStackPtrOffsetReg()) // scratch_offset
+ .addImm(0) // offset
+ .addMemOperand(MMO);
}
static unsigned getSGPRSpillRestoreOpcode(unsigned Size) {
@@ -939,12 +1100,18 @@ static unsigned getSGPRSpillRestoreOpcode(unsigned Size) {
return AMDGPU::SI_SPILL_S32_RESTORE;
case 8:
return AMDGPU::SI_SPILL_S64_RESTORE;
+ case 12:
+ return AMDGPU::SI_SPILL_S96_RESTORE;
case 16:
return AMDGPU::SI_SPILL_S128_RESTORE;
+ case 20:
+ return AMDGPU::SI_SPILL_S160_RESTORE;
case 32:
return AMDGPU::SI_SPILL_S256_RESTORE;
case 64:
return AMDGPU::SI_SPILL_S512_RESTORE;
+ case 128:
+ return AMDGPU::SI_SPILL_S1024_RESTORE;
default:
llvm_unreachable("unknown register size");
}
@@ -960,10 +1127,31 @@ static unsigned getVGPRSpillRestoreOpcode(unsigned Size) {
return AMDGPU::SI_SPILL_V96_RESTORE;
case 16:
return AMDGPU::SI_SPILL_V128_RESTORE;
+ case 20:
+ return AMDGPU::SI_SPILL_V160_RESTORE;
case 32:
return AMDGPU::SI_SPILL_V256_RESTORE;
case 64:
return AMDGPU::SI_SPILL_V512_RESTORE;
+ case 128:
+ return AMDGPU::SI_SPILL_V1024_RESTORE;
+ default:
+ llvm_unreachable("unknown register size");
+ }
+}
+
+static unsigned getAGPRSpillRestoreOpcode(unsigned Size) {
+ switch (Size) {
+ case 4:
+ return AMDGPU::SI_SPILL_A32_RESTORE;
+ case 8:
+ return AMDGPU::SI_SPILL_A64_RESTORE;
+ case 16:
+ return AMDGPU::SI_SPILL_A128_RESTORE;
+ case 64:
+ return AMDGPU::SI_SPILL_A512_RESTORE;
+ case 128:
+ return AMDGPU::SI_SPILL_A1024_RESTORE;
default:
llvm_unreachable("unknown register size");
}
@@ -999,12 +1187,13 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0RegClass);
}
- FrameInfo.setStackID(FrameIndex, SIStackID::SGPR_SPILL);
+ if (RI.spillSGPRToVGPR())
+ FrameInfo.setStackID(FrameIndex, TargetStackID::SGPRSpill);
MachineInstrBuilder Spill = BuildMI(MBB, MI, DL, OpDesc, DestReg)
.addFrameIndex(FrameIndex) // addr
.addMemOperand(MMO)
.addReg(MFI->getScratchRSrcReg(), RegState::Implicit)
- .addReg(MFI->getFrameOffsetReg(), RegState::Implicit);
+ .addReg(MFI->getStackPtrOffsetReg(), RegState::Implicit);
if (ST.hasScalarStores()) {
// m0 is used for offset to scalar stores if used to spill.
@@ -1014,15 +1203,19 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
return;
}
- assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected");
-
- unsigned Opcode = getVGPRSpillRestoreOpcode(SpillSize);
- BuildMI(MBB, MI, DL, get(Opcode), DestReg)
- .addFrameIndex(FrameIndex) // vaddr
- .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc
- .addReg(MFI->getFrameOffsetReg()) // scratch_offset
- .addImm(0) // offset
- .addMemOperand(MMO);
+ unsigned Opcode = RI.hasAGPRs(RC) ? getAGPRSpillRestoreOpcode(SpillSize)
+ : getVGPRSpillRestoreOpcode(SpillSize);
+ auto MIB = BuildMI(MBB, MI, DL, get(Opcode), DestReg);
+ if (RI.hasAGPRs(RC)) {
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ unsigned Tmp = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ MIB.addReg(Tmp, RegState::Define);
+ }
+ MIB.addFrameIndex(FrameIndex) // vaddr
+ .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc
+ .addReg(MFI->getStackPtrOffsetReg()) // scratch_offset
+ .addImm(0) // offset
+ .addMemOperand(MMO);
}
/// \param @Offset Offset in bytes of the FrameIndex being spilled
@@ -1089,7 +1282,8 @@ unsigned SIInstrInfo::calculateLDSSpillAddress(
// (NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X)) + TIDIG.Z
getAddNoCarry(Entry, Insert, DL, TIDReg)
.addReg(TIDReg)
- .addReg(TIDIGZReg);
+ .addReg(TIDIGZReg)
+ .addImm(0); // clamp bit
} else {
// Get the wave id
BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_LO_U32_B32_e64),
@@ -1114,7 +1308,8 @@ unsigned SIInstrInfo::calculateLDSSpillAddress(
unsigned LDSOffset = MFI->getLDSSize() + (FrameOffset * WorkGroupSize);
getAddNoCarry(MBB, MI, DL, TmpReg)
.addImm(LDSOffset)
- .addReg(TIDReg);
+ .addReg(TIDReg)
+ .addImm(0); // clamp bit
return TmpReg;
}
@@ -1148,13 +1343,17 @@ void SIInstrInfo::insertReturn(MachineBasicBlock &MBB) const {
if (MBB.succ_empty()) {
bool HasNoTerminator = MBB.getFirstTerminator() == MBB.end();
- if (HasNoTerminator)
- BuildMI(MBB, MBB.end(), DebugLoc(),
- get(Info->returnsVoid() ? AMDGPU::S_ENDPGM : AMDGPU::SI_RETURN_TO_EPILOG));
+ if (HasNoTerminator) {
+ if (Info->returnsVoid()) {
+ BuildMI(MBB, MBB.end(), DebugLoc(), get(AMDGPU::S_ENDPGM)).addImm(0);
+ } else {
+ BuildMI(MBB, MBB.end(), DebugLoc(), get(AMDGPU::SI_RETURN_TO_EPILOG));
+ }
+ }
}
}
-unsigned SIInstrInfo::getNumWaitStates(const MachineInstr &MI) const {
+unsigned SIInstrInfo::getNumWaitStates(const MachineInstr &MI) {
switch (MI.getOpcode()) {
default: return 1; // FIXME: Do wait states equal cycles?
@@ -1174,18 +1373,42 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
MI.setDesc(get(AMDGPU::S_MOV_B64));
break;
+ case AMDGPU::S_MOV_B32_term:
+ // This is only a terminator to get the correct spill code placement during
+ // register allocation.
+ MI.setDesc(get(AMDGPU::S_MOV_B32));
+ break;
+
case AMDGPU::S_XOR_B64_term:
// This is only a terminator to get the correct spill code placement during
// register allocation.
MI.setDesc(get(AMDGPU::S_XOR_B64));
break;
+ case AMDGPU::S_XOR_B32_term:
+ // This is only a terminator to get the correct spill code placement during
+ // register allocation.
+ MI.setDesc(get(AMDGPU::S_XOR_B32));
+ break;
+
+ case AMDGPU::S_OR_B32_term:
+ // This is only a terminator to get the correct spill code placement during
+ // register allocation.
+ MI.setDesc(get(AMDGPU::S_OR_B32));
+ break;
+
case AMDGPU::S_ANDN2_B64_term:
// This is only a terminator to get the correct spill code placement during
// register allocation.
MI.setDesc(get(AMDGPU::S_ANDN2_B64));
break;
+ case AMDGPU::S_ANDN2_B32_term:
+ // This is only a terminator to get the correct spill code placement during
+ // register allocation.
+ MI.setDesc(get(AMDGPU::S_ANDN2_B32));
+ break;
+
case AMDGPU::V_MOV_B64_PSEUDO: {
unsigned Dst = MI.getOperand(0).getReg();
unsigned DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
@@ -1215,24 +1438,28 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
break;
}
case AMDGPU::V_SET_INACTIVE_B32: {
- BuildMI(MBB, MI, DL, get(AMDGPU::S_NOT_B64), AMDGPU::EXEC)
- .addReg(AMDGPU::EXEC);
+ unsigned NotOpc = ST.isWave32() ? AMDGPU::S_NOT_B32 : AMDGPU::S_NOT_B64;
+ unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
+ BuildMI(MBB, MI, DL, get(NotOpc), Exec)
+ .addReg(Exec);
BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), MI.getOperand(0).getReg())
.add(MI.getOperand(2));
- BuildMI(MBB, MI, DL, get(AMDGPU::S_NOT_B64), AMDGPU::EXEC)
- .addReg(AMDGPU::EXEC);
+ BuildMI(MBB, MI, DL, get(NotOpc), Exec)
+ .addReg(Exec);
MI.eraseFromParent();
break;
}
case AMDGPU::V_SET_INACTIVE_B64: {
- BuildMI(MBB, MI, DL, get(AMDGPU::S_NOT_B64), AMDGPU::EXEC)
- .addReg(AMDGPU::EXEC);
+ unsigned NotOpc = ST.isWave32() ? AMDGPU::S_NOT_B32 : AMDGPU::S_NOT_B64;
+ unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
+ BuildMI(MBB, MI, DL, get(NotOpc), Exec)
+ .addReg(Exec);
MachineInstr *Copy = BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B64_PSEUDO),
MI.getOperand(0).getReg())
.add(MI.getOperand(2));
expandPostRAPseudo(*Copy);
- BuildMI(MBB, MI, DL, get(AMDGPU::S_NOT_B64), AMDGPU::EXEC)
- .addReg(AMDGPU::EXEC);
+ BuildMI(MBB, MI, DL, get(NotOpc), Exec)
+ .addReg(Exec);
MI.eraseFromParent();
break;
}
@@ -1282,10 +1509,7 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
MachineInstrBuilder MIB = BuildMI(MF, DL, get(AMDGPU::S_ADDC_U32), RegHi)
.addReg(RegHi);
- if (MI.getOperand(2).getTargetFlags() == SIInstrInfo::MO_NONE)
- MIB.addImm(0);
- else
- MIB.add(MI.getOperand(2));
+ MIB.add(MI.getOperand(2));
Bundler.append(MIB);
finalizeBundle(MBB, Bundler.begin());
@@ -1293,10 +1517,17 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
MI.eraseFromParent();
break;
}
+ case AMDGPU::ENTER_WWM: {
+ // This only gets its own opcode so that SIPreAllocateWWMRegs can tell when
+ // WWM is entered.
+ MI.setDesc(get(ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32
+ : AMDGPU::S_OR_SAVEEXEC_B64));
+ break;
+ }
case AMDGPU::EXIT_WWM: {
- // This only gets its own opcode so that SIFixWWMLiveness can tell when WWM
- // is exited.
- MI.setDesc(get(AMDGPU::S_MOV_B64));
+ // This only gets its own opcode so that SIPreAllocateWWMRegs can tell when
+ // WWM is exited.
+ MI.setDesc(get(ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64));
break;
}
case TargetOpcode::BUNDLE: {
@@ -1492,7 +1723,7 @@ unsigned SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
BuildMI(MBB, I, DL, get(AMDGPU::S_ADD_U32))
.addReg(PCReg, RegState::Define, AMDGPU::sub0)
.addReg(PCReg, 0, AMDGPU::sub0)
- .addMBB(&DestBB, AMDGPU::TF_LONG_BRANCH_FORWARD);
+ .addMBB(&DestBB, MO_LONG_BRANCH_FORWARD);
BuildMI(MBB, I, DL, get(AMDGPU::S_ADDC_U32))
.addReg(PCReg, RegState::Define, AMDGPU::sub1)
.addReg(PCReg, 0, AMDGPU::sub1)
@@ -1502,7 +1733,7 @@ unsigned SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
BuildMI(MBB, I, DL, get(AMDGPU::S_SUB_U32))
.addReg(PCReg, RegState::Define, AMDGPU::sub0)
.addReg(PCReg, 0, AMDGPU::sub0)
- .addMBB(&DestBB, AMDGPU::TF_LONG_BRANCH_BACKWARD);
+ .addMBB(&DestBB, MO_LONG_BRANCH_BACKWARD);
BuildMI(MBB, I, DL, get(AMDGPU::S_SUBB_U32))
.addReg(PCReg, RegState::Define, AMDGPU::sub1)
.addReg(PCReg, 0, AMDGPU::sub1)
@@ -1659,6 +1890,10 @@ bool SIInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
case AMDGPU::S_MOV_B64_term:
case AMDGPU::S_XOR_B64_term:
case AMDGPU::S_ANDN2_B64_term:
+ case AMDGPU::S_MOV_B32_term:
+ case AMDGPU::S_XOR_B32_term:
+ case AMDGPU::S_OR_B32_term:
+ case AMDGPU::S_ANDN2_B32_term:
break;
case AMDGPU::SI_IF:
case AMDGPU::SI_ELSE:
@@ -1826,7 +2061,7 @@ bool SIInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
CondCycles = TrueCycles = FalseCycles = NumInsts; // ???
// Limit to equal cost for branch vs. N v_cndmask_b32s.
- return !RI.isSGPRClass(RC) && NumInsts <= 6;
+ return RI.hasVGPRs(RC) && NumInsts <= 6;
}
case SCC_TRUE:
case SCC_FALSE: {
@@ -1907,14 +2142,18 @@ void SIInstrInfo::insertSelect(MachineBasicBlock &MBB,
const int16_t *SubIndices = Sub0_15;
int NElts = DstSize / 32;
- // 64-bit select is only avaialble for SALU.
+ // 64-bit select is only available for SALU.
+ // TODO: Split 96-bit into 64-bit and 32-bit, not 3x 32-bit.
if (Pred == SCC_TRUE) {
- SelOp = AMDGPU::S_CSELECT_B64;
- EltRC = &AMDGPU::SGPR_64RegClass;
- SubIndices = Sub0_15_64;
-
- assert(NElts % 2 == 0);
- NElts /= 2;
+ if (NElts % 2) {
+ SelOp = AMDGPU::S_CSELECT_B32;
+ EltRC = &AMDGPU::SGPR_32RegClass;
+ } else {
+ SelOp = AMDGPU::S_CSELECT_B64;
+ EltRC = &AMDGPU::SGPR_64RegClass;
+ SubIndices = Sub0_15_64;
+ NElts /= 2;
+ }
}
MachineInstrBuilder MIB = BuildMI(
@@ -1934,6 +2173,7 @@ void SIInstrInfo::insertSelect(MachineBasicBlock &MBB,
.addReg(FalseReg, 0, SubIdx)
.addReg(TrueReg, 0, SubIdx);
preserveCondRegFlags(Select->getOperand(3), Cond[1]);
+ fixImplicitOperands(*Select);
MIB.addReg(DstElt)
.addImm(SubIdx);
@@ -1955,6 +2195,8 @@ bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) const {
case AMDGPU::S_MOV_B32:
case AMDGPU::S_MOV_B64:
case AMDGPU::COPY:
+ case AMDGPU::V_ACCVGPR_WRITE_B32:
+ case AMDGPU::V_ACCVGPR_READ_B32:
return true;
default:
return false;
@@ -2007,6 +2249,7 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
case AMDGPU::V_MOV_B32_e32:
case AMDGPU::S_MOV_B32:
+ case AMDGPU::V_ACCVGPR_WRITE_B32:
break;
}
@@ -2020,6 +2263,11 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
if (Opc == AMDGPU::COPY) {
bool isVGPRCopy = RI.isVGPR(*MRI, UseMI.getOperand(0).getReg());
unsigned NewOpc = isVGPRCopy ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
+ if (RI.isAGPR(*MRI, UseMI.getOperand(0).getReg())) {
+ if (!isInlineConstant(*ImmOp, AMDGPU::OPERAND_REG_INLINE_AC_INT32))
+ return false;
+ NewOpc = AMDGPU::V_ACCVGPR_WRITE_B32;
+ }
UseMI.setDesc(get(NewOpc));
UseMI.getOperand(1).ChangeToImmediate(ImmOp->getImm());
UseMI.addImplicitDefUseOperands(*UseMI.getParent()->getParent());
@@ -2027,7 +2275,9 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
}
if (Opc == AMDGPU::V_MAD_F32 || Opc == AMDGPU::V_MAC_F32_e64 ||
- Opc == AMDGPU::V_MAD_F16 || Opc == AMDGPU::V_MAC_F16_e64) {
+ Opc == AMDGPU::V_MAD_F16 || Opc == AMDGPU::V_MAC_F16_e64 ||
+ Opc == AMDGPU::V_FMA_F32 || Opc == AMDGPU::V_FMAC_F32_e64 ||
+ Opc == AMDGPU::V_FMA_F16 || Opc == AMDGPU::V_FMAC_F16_e64) {
// Don't fold if we are using source or output modifiers. The new VOP2
// instructions don't have them.
if (hasAnyModifiersSet(UseMI))
@@ -2042,7 +2292,10 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
if (isInlineConstant(UseMI, *Src0, *ImmOp))
return false;
- bool IsF32 = Opc == AMDGPU::V_MAD_F32 || Opc == AMDGPU::V_MAC_F32_e64;
+ bool IsF32 = Opc == AMDGPU::V_MAD_F32 || Opc == AMDGPU::V_MAC_F32_e64 ||
+ Opc == AMDGPU::V_FMA_F32 || Opc == AMDGPU::V_FMAC_F32_e64;
+ bool IsFMA = Opc == AMDGPU::V_FMA_F32 || Opc == AMDGPU::V_FMAC_F32_e64 ||
+ Opc == AMDGPU::V_FMA_F16 || Opc == AMDGPU::V_FMAC_F16_e64;
MachineOperand *Src1 = getNamedOperand(UseMI, AMDGPU::OpName::src1);
MachineOperand *Src2 = getNamedOperand(UseMI, AMDGPU::OpName::src2);
@@ -2055,6 +2308,12 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
if (!Src2->isReg() || RI.isSGPRClass(MRI->getRegClass(Src2->getReg())))
return false;
+ unsigned NewOpc =
+ IsFMA ? (IsF32 ? AMDGPU::V_FMAMK_F32 : AMDGPU::V_FMAMK_F16)
+ : (IsF32 ? AMDGPU::V_MADMK_F32 : AMDGPU::V_MADMK_F16);
+ if (pseudoToMCOpcode(NewOpc) == -1)
+ return false;
+
// We need to swap operands 0 and 1 since madmk constant is at operand 1.
const int64_t Imm = ImmOp->getImm();
@@ -2075,14 +2334,16 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
Src0->setIsKill(Src1->isKill());
if (Opc == AMDGPU::V_MAC_F32_e64 ||
- Opc == AMDGPU::V_MAC_F16_e64)
+ Opc == AMDGPU::V_MAC_F16_e64 ||
+ Opc == AMDGPU::V_FMAC_F32_e64 ||
+ Opc == AMDGPU::V_FMAC_F16_e64)
UseMI.untieRegOperand(
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
Src1->ChangeToImmediate(Imm);
removeModOperands(UseMI);
- UseMI.setDesc(get(IsF32 ? AMDGPU::V_MADMK_F32 : AMDGPU::V_MADMK_F16));
+ UseMI.setDesc(get(NewOpc));
bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
if (DeleteDef)
@@ -2107,9 +2368,11 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
Src0->ChangeToImmediate(Def->getOperand(1).getImm());
Src0Inlined = true;
} else if ((RI.isPhysicalRegister(Src0->getReg()) &&
- RI.isSGPRClass(RI.getPhysRegClass(Src0->getReg()))) ||
+ (ST.getConstantBusLimit(Opc) <= 1 &&
+ RI.isSGPRClass(RI.getPhysRegClass(Src0->getReg())))) ||
(RI.isVirtualRegister(Src0->getReg()) &&
- RI.isSGPRClass(MRI->getRegClass(Src0->getReg()))))
+ (ST.getConstantBusLimit(Opc) <= 1 &&
+ RI.isSGPRClass(MRI->getRegClass(Src0->getReg())))))
return false;
// VGPR is okay as Src0 - fallthrough
}
@@ -2130,6 +2393,12 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
// VGPR is okay as Src1 - fallthrough
}
+ unsigned NewOpc =
+ IsFMA ? (IsF32 ? AMDGPU::V_FMAAK_F32 : AMDGPU::V_FMAAK_F16)
+ : (IsF32 ? AMDGPU::V_MADAK_F32 : AMDGPU::V_MADAK_F16);
+ if (pseudoToMCOpcode(NewOpc) == -1)
+ return false;
+
const int64_t Imm = ImmOp->getImm();
// FIXME: This would be a lot easier if we could return a new instruction
@@ -2142,7 +2411,9 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp));
if (Opc == AMDGPU::V_MAC_F32_e64 ||
- Opc == AMDGPU::V_MAC_F16_e64)
+ Opc == AMDGPU::V_MAC_F16_e64 ||
+ Opc == AMDGPU::V_FMAC_F32_e64 ||
+ Opc == AMDGPU::V_FMAC_F16_e64)
UseMI.untieRegOperand(
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
@@ -2151,7 +2422,11 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
// These come before src2.
removeModOperands(UseMI);
- UseMI.setDesc(get(IsF32 ? AMDGPU::V_MADAK_F32 : AMDGPU::V_MADAK_F16));
+ UseMI.setDesc(get(NewOpc));
+ // It might happen that UseMI was commuted
+ // and we now have SGPR as SRC1. If so 2 inlined
+ // constant and SGPR are illegal.
+ legalizeOperands(UseMI);
bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
if (DeleteDef)
@@ -2172,9 +2447,9 @@ static bool offsetsDoNotOverlap(int WidthA, int OffsetA,
return LowOffset + LowWidth <= HighOffset;
}
-bool SIInstrInfo::checkInstOffsetsDoNotOverlap(MachineInstr &MIa,
- MachineInstr &MIb) const {
- MachineOperand *BaseOp0, *BaseOp1;
+bool SIInstrInfo::checkInstOffsetsDoNotOverlap(const MachineInstr &MIa,
+ const MachineInstr &MIb) const {
+ const MachineOperand *BaseOp0, *BaseOp1;
int64_t Offset0, Offset1;
if (getMemOperandWithOffset(MIa, BaseOp0, Offset0, &RI) &&
@@ -2196,8 +2471,8 @@ bool SIInstrInfo::checkInstOffsetsDoNotOverlap(MachineInstr &MIa,
return false;
}
-bool SIInstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr &MIa,
- MachineInstr &MIb,
+bool SIInstrInfo::areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
+ const MachineInstr &MIb,
AliasAnalysis *AA) const {
assert((MIa.mayLoad() || MIa.mayStore()) &&
"MIa must load from or modify a memory location");
@@ -2211,17 +2486,6 @@ bool SIInstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr &MIa,
if (MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef())
return false;
- if (AA && MIa.hasOneMemOperand() && MIb.hasOneMemOperand()) {
- const MachineMemOperand *MMOa = *MIa.memoperands_begin();
- const MachineMemOperand *MMOb = *MIb.memoperands_begin();
- if (MMOa->getValue() && MMOb->getValue()) {
- MemoryLocation LocA(MMOa->getValue(), MMOa->getSize(), MMOa->getAAInfo());
- MemoryLocation LocB(MMOb->getValue(), MMOb->getSize(), MMOb->getAAInfo());
- if (!AA->alias(LocA, LocB))
- return true;
- }
- }
-
// TODO: Should we check the address space from the MachineMemOperand? That
// would allow us to distinguish objects we know don't alias based on the
// underlying address space, even if it was lowered to a different one,
@@ -2275,18 +2539,21 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB,
LiveVariables *LV) const {
unsigned Opc = MI.getOpcode();
bool IsF16 = false;
- bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e32 || Opc == AMDGPU::V_FMAC_F32_e64;
+ bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e32 || Opc == AMDGPU::V_FMAC_F32_e64 ||
+ Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64;
switch (Opc) {
default:
return nullptr;
case AMDGPU::V_MAC_F16_e64:
+ case AMDGPU::V_FMAC_F16_e64:
IsF16 = true;
LLVM_FALLTHROUGH;
case AMDGPU::V_MAC_F32_e64:
case AMDGPU::V_FMAC_F32_e64:
break;
case AMDGPU::V_MAC_F16_e32:
+ case AMDGPU::V_FMAC_F16_e32:
IsF16 = true;
LLVM_FALLTHROUGH;
case AMDGPU::V_MAC_F32_e32:
@@ -2315,30 +2582,38 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB,
const MachineOperand *Clamp = getNamedOperand(MI, AMDGPU::OpName::clamp);
const MachineOperand *Omod = getNamedOperand(MI, AMDGPU::OpName::omod);
- if (!IsFMA && !Src0Mods && !Src1Mods && !Clamp && !Omod &&
+ if (!Src0Mods && !Src1Mods && !Clamp && !Omod &&
// If we have an SGPR input, we will violate the constant bus restriction.
- (!Src0->isReg() || !RI.isSGPRReg(MBB->getParent()->getRegInfo(), Src0->getReg()))) {
+ (ST.getConstantBusLimit(Opc) > 1 ||
+ !Src0->isReg() ||
+ !RI.isSGPRReg(MBB->getParent()->getRegInfo(), Src0->getReg()))) {
if (auto Imm = getFoldableImm(Src2)) {
- return BuildMI(*MBB, MI, MI.getDebugLoc(),
- get(IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32))
- .add(*Dst)
- .add(*Src0)
- .add(*Src1)
- .addImm(Imm);
+ unsigned NewOpc =
+ IsFMA ? (IsF16 ? AMDGPU::V_FMAAK_F16 : AMDGPU::V_FMAAK_F32)
+ : (IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32);
+ if (pseudoToMCOpcode(NewOpc) != -1)
+ return BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpc))
+ .add(*Dst)
+ .add(*Src0)
+ .add(*Src1)
+ .addImm(Imm);
}
+ unsigned NewOpc =
+ IsFMA ? (IsF16 ? AMDGPU::V_FMAMK_F16 : AMDGPU::V_FMAMK_F32)
+ : (IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32);
if (auto Imm = getFoldableImm(Src1)) {
- return BuildMI(*MBB, MI, MI.getDebugLoc(),
- get(IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32))
- .add(*Dst)
- .add(*Src0)
- .addImm(Imm)
- .add(*Src2);
+ if (pseudoToMCOpcode(NewOpc) != -1)
+ return BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpc))
+ .add(*Dst)
+ .add(*Src0)
+ .addImm(Imm)
+ .add(*Src2);
}
if (auto Imm = getFoldableImm(Src0)) {
- if (isOperandLegal(MI, AMDGPU::getNamedOperandIdx(AMDGPU::V_MADMK_F32,
+ if (pseudoToMCOpcode(NewOpc) != -1 &&
+ isOperandLegal(MI, AMDGPU::getNamedOperandIdx(NewOpc,
AMDGPU::OpName::src0), Src1))
- return BuildMI(*MBB, MI, MI.getDebugLoc(),
- get(IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32))
+ return BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpc))
.add(*Dst)
.add(*Src1)
.addImm(Imm)
@@ -2346,9 +2621,11 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB,
}
}
- assert((!IsFMA || !IsF16) && "fmac only expected with f32");
- unsigned NewOpc = IsFMA ? AMDGPU::V_FMA_F32 :
- (IsF16 ? AMDGPU::V_MAD_F16 : AMDGPU::V_MAD_F32);
+ unsigned NewOpc = IsFMA ? (IsF16 ? AMDGPU::V_FMA_F16 : AMDGPU::V_FMA_F32)
+ : (IsF16 ? AMDGPU::V_MAD_F16 : AMDGPU::V_MAD_F32);
+ if (pseudoToMCOpcode(NewOpc) == -1)
+ return nullptr;
+
return BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpc))
.add(*Dst)
.addImm(Src0Mods ? Src0Mods->getImm() : 0)
@@ -2390,12 +2667,26 @@ bool SIInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
changesVGPRIndexingMode(MI);
}
+bool SIInstrInfo::isAlwaysGDS(uint16_t Opcode) const {
+ return Opcode == AMDGPU::DS_ORDERED_COUNT ||
+ Opcode == AMDGPU::DS_GWS_INIT ||
+ Opcode == AMDGPU::DS_GWS_SEMA_V ||
+ Opcode == AMDGPU::DS_GWS_SEMA_BR ||
+ Opcode == AMDGPU::DS_GWS_SEMA_P ||
+ Opcode == AMDGPU::DS_GWS_SEMA_RELEASE_ALL ||
+ Opcode == AMDGPU::DS_GWS_BARRIER;
+}
+
bool SIInstrInfo::hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const {
unsigned Opcode = MI.getOpcode();
if (MI.mayStore() && isSMRD(MI))
return true; // scalar store or atomic
+ // This will terminate the function when other lanes may need to continue.
+ if (MI.isReturn())
+ return true;
+
// These instructions cause shader I/O that may cause hardware lockups
// when executed with an empty EXEC mask.
//
@@ -2403,10 +2694,12 @@ bool SIInstrInfo::hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const
// EXEC = 0, but checking for that case here seems not worth it
// given the typical code patterns.
if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT ||
- Opcode == AMDGPU::EXP || Opcode == AMDGPU::EXP_DONE)
+ Opcode == AMDGPU::EXP || Opcode == AMDGPU::EXP_DONE ||
+ Opcode == AMDGPU::DS_ORDERED_COUNT || Opcode == AMDGPU::S_TRAP ||
+ Opcode == AMDGPU::DS_GWS_INIT || Opcode == AMDGPU::DS_GWS_BARRIER)
return true;
- if (MI.isInlineAsm())
+ if (MI.isCall() || MI.isInlineAsm())
return true; // conservative assumption
// These are like SALU instructions in terms of effects, so it's questionable
@@ -2420,8 +2713,36 @@ bool SIInstrInfo::hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const
return false;
}
+bool SIInstrInfo::mayReadEXEC(const MachineRegisterInfo &MRI,
+ const MachineInstr &MI) const {
+ if (MI.isMetaInstruction())
+ return false;
+
+ // This won't read exec if this is an SGPR->SGPR copy.
+ if (MI.isCopyLike()) {
+ if (!RI.isSGPRReg(MRI, MI.getOperand(0).getReg()))
+ return true;
+
+ // Make sure this isn't copying exec as a normal operand
+ return MI.readsRegister(AMDGPU::EXEC, &RI);
+ }
+
+ // Make a conservative assumption about the callee.
+ if (MI.isCall())
+ return true;
+
+ // Be conservative with any unhandled generic opcodes.
+ if (!isTargetSpecificOpcode(MI.getOpcode()))
+ return true;
+
+ return !isSALU(MI) || MI.readsRegister(AMDGPU::EXEC, &RI);
+}
+
bool SIInstrInfo::isInlineConstant(const APInt &Imm) const {
switch (Imm.getBitWidth()) {
+ case 1: // This likely will be a condition code mask.
+ return true;
+
case 32:
return AMDGPU::isInlinableLiteral32(Imm.getSExtValue(),
ST.hasInv2PiInlineImm());
@@ -2454,7 +2775,9 @@ bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
case AMDGPU::OPERAND_REG_IMM_INT32:
case AMDGPU::OPERAND_REG_IMM_FP32:
case AMDGPU::OPERAND_REG_INLINE_C_INT32:
- case AMDGPU::OPERAND_REG_INLINE_C_FP32: {
+ case AMDGPU::OPERAND_REG_INLINE_C_FP32:
+ case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
+ case AMDGPU::OPERAND_REG_INLINE_AC_FP32: {
int32_t Trunc = static_cast<int32_t>(Imm);
return AMDGPU::isInlinableLiteral32(Trunc, ST.hasInv2PiInlineImm());
}
@@ -2467,7 +2790,9 @@ bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
case AMDGPU::OPERAND_REG_IMM_INT16:
case AMDGPU::OPERAND_REG_IMM_FP16:
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
- case AMDGPU::OPERAND_REG_INLINE_C_FP16: {
+ case AMDGPU::OPERAND_REG_INLINE_C_FP16:
+ case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
+ case AMDGPU::OPERAND_REG_INLINE_AC_FP16: {
if (isInt<16>(Imm) || isUInt<16>(Imm)) {
// A few special case instructions have 16-bit operands on subtargets
// where 16-bit instructions are not legal.
@@ -2480,19 +2805,14 @@ bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
return false;
}
+ case AMDGPU::OPERAND_REG_IMM_V2INT16:
+ case AMDGPU::OPERAND_REG_IMM_V2FP16:
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
- case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
- if (isUInt<16>(Imm)) {
- int16_t Trunc = static_cast<int16_t>(Imm);
- return ST.has16BitInsts() &&
- AMDGPU::isInlinableLiteral16(Trunc, ST.hasInv2PiInlineImm());
- }
- if (!(Imm & 0xffff)) {
- return ST.has16BitInsts() &&
- AMDGPU::isInlinableLiteral16(Imm >> 16, ST.hasInv2PiInlineImm());
- }
+ case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
+ case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
+ case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
uint32_t Trunc = static_cast<uint32_t>(Imm);
- return AMDGPU::isInlinableLiteralV216(Trunc, ST.hasInv2PiInlineImm());
+ return AMDGPU::isInlinableLiteralV216(Trunc, ST.hasInv2PiInlineImm());
}
default:
llvm_unreachable("invalid bitwidth");
@@ -2534,9 +2854,10 @@ static bool compareMachineOp(const MachineOperand &Op0,
bool SIInstrInfo::isImmOperandLegal(const MachineInstr &MI, unsigned OpNo,
const MachineOperand &MO) const {
- const MCOperandInfo &OpInfo = get(MI.getOpcode()).OpInfo[OpNo];
+ const MCInstrDesc &InstDesc = MI.getDesc();
+ const MCOperandInfo &OpInfo = InstDesc.OpInfo[OpNo];
- assert(MO.isImm() || MO.isTargetIndex() || MO.isFI());
+ assert(MO.isImm() || MO.isTargetIndex() || MO.isFI() || MO.isGlobal());
if (OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE)
return true;
@@ -2547,7 +2868,15 @@ bool SIInstrInfo::isImmOperandLegal(const MachineInstr &MI, unsigned OpNo,
if (MO.isImm() && isInlineConstant(MO, OpInfo))
return RI.opCanUseInlineConstant(OpInfo.OperandType);
- return RI.opCanUseLiteralConstant(OpInfo.OperandType);
+ if (!RI.opCanUseLiteralConstant(OpInfo.OperandType))
+ return false;
+
+ if (!isVOP3(MI) || !AMDGPU::isSISrcOperand(InstDesc, OpNo))
+ return true;
+
+ const MachineFunction *MF = MI.getParent()->getParent();
+ const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
+ return ST.hasVOP3Literal();
}
bool SIInstrInfo::hasVALU32BitEncoding(unsigned Opcode) const {
@@ -2586,7 +2915,8 @@ bool SIInstrInfo::canShrink(const MachineInstr &MI,
// Can't shrink instruction with three operands.
// FIXME: v_cndmask_b32 has 3 operands and is shrinkable, but we need to add
// a special case for it. It can only be shrunk if the third operand
- // is vcc. We should handle this the same way we handle vopc, by addding
+ // is vcc, and src0_modifiers and src1_modifiers are not set.
+ // We should handle this the same way we handle vopc, by addding
// a register allocation hint pre-regalloc and then do the shrinking
// post-regalloc.
if (Src2) {
@@ -2606,6 +2936,7 @@ bool SIInstrInfo::canShrink(const MachineInstr &MI,
case AMDGPU::V_MAC_F32_e64:
case AMDGPU::V_MAC_F16_e64:
case AMDGPU::V_FMAC_F32_e64:
+ case AMDGPU::V_FMAC_F16_e64:
if (!Src2->isReg() || !RI.isVGPR(MRI, Src2->getReg()) ||
hasModifiersSet(MI, AMDGPU::OpName::src2_modifiers))
return false;
@@ -2662,7 +2993,8 @@ MachineInstr *SIInstrInfo::buildShrunkInst(MachineInstr &MI,
// dst
Inst32.add(MI.getOperand(0));
} else {
- assert(MI.getOperand(0).getReg() == AMDGPU::VCC &&
+ assert(((MI.getOperand(0).getReg() == AMDGPU::VCC) ||
+ (MI.getOperand(0).getReg() == AMDGPU::VCC_LO)) &&
"Unexpected case");
}
@@ -2707,19 +3039,19 @@ bool SIInstrInfo::usesConstantBus(const MachineRegisterInfo &MRI,
if (TargetRegisterInfo::isVirtualRegister(MO.getReg()))
return RI.isSGPRClass(MRI.getRegClass(MO.getReg()));
- // FLAT_SCR is just an SGPR pair.
- if (!MO.isImplicit() && (MO.getReg() == AMDGPU::FLAT_SCR))
- return true;
-
- // EXEC register uses the constant bus.
- if (!MO.isImplicit() && MO.getReg() == AMDGPU::EXEC)
- return true;
+ // Null is free
+ if (MO.getReg() == AMDGPU::SGPR_NULL)
+ return false;
// SGPRs use the constant bus
- return (MO.getReg() == AMDGPU::VCC || MO.getReg() == AMDGPU::M0 ||
- (!MO.isImplicit() &&
- (AMDGPU::SGPR_32RegClass.contains(MO.getReg()) ||
- AMDGPU::SGPR_64RegClass.contains(MO.getReg()))));
+ if (MO.isImplicit()) {
+ return MO.getReg() == AMDGPU::M0 ||
+ MO.getReg() == AMDGPU::VCC ||
+ MO.getReg() == AMDGPU::VCC_LO;
+ } else {
+ return AMDGPU::SReg_32RegClass.contains(MO.getReg()) ||
+ AMDGPU::SReg_64RegClass.contains(MO.getReg());
+ }
}
static unsigned findImplicitSGPRRead(const MachineInstr &MI) {
@@ -2730,6 +3062,8 @@ static unsigned findImplicitSGPRRead(const MachineInstr &MI) {
switch (MO.getReg()) {
case AMDGPU::VCC:
+ case AMDGPU::VCC_LO:
+ case AMDGPU::VCC_HI:
case AMDGPU::M0:
case AMDGPU::FLAT_SCR:
return MO.getReg();
@@ -2746,10 +3080,12 @@ static bool shouldReadExec(const MachineInstr &MI) {
if (SIInstrInfo::isVALU(MI)) {
switch (MI.getOpcode()) {
case AMDGPU::V_READLANE_B32:
- case AMDGPU::V_READLANE_B32_si:
+ case AMDGPU::V_READLANE_B32_gfx6_gfx7:
+ case AMDGPU::V_READLANE_B32_gfx10:
case AMDGPU::V_READLANE_B32_vi:
case AMDGPU::V_WRITELANE_B32:
- case AMDGPU::V_WRITELANE_B32_si:
+ case AMDGPU::V_WRITELANE_B32_gfx6_gfx7:
+ case AMDGPU::V_WRITELANE_B32_gfx10:
case AMDGPU::V_WRITELANE_B32_vi:
return false;
}
@@ -2830,7 +3166,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
switch (Desc.OpInfo[i].OperandType) {
case MCOI::OPERAND_REGISTER:
- if (MI.getOperand(i).isImm()) {
+ if (MI.getOperand(i).isImm() || MI.getOperand(i).isGlobal()) {
ErrInfo = "Illegal immediate value for operand.";
return false;
}
@@ -2843,7 +3179,11 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
case AMDGPU::OPERAND_REG_INLINE_C_INT64:
case AMDGPU::OPERAND_REG_INLINE_C_FP64:
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
- case AMDGPU::OPERAND_REG_INLINE_C_FP16: {
+ case AMDGPU::OPERAND_REG_INLINE_C_FP16:
+ case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
+ case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
+ case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
+ case AMDGPU::OPERAND_REG_INLINE_AC_FP16: {
const MachineOperand &MO = MI.getOperand(i);
if (!MO.isReg() && (!MO.isImm() || !isInlineConstant(MI, i))) {
ErrInfo = "Illegal immediate value for operand.";
@@ -3022,9 +3362,12 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1)
++ConstantBusCount;
+ SmallVector<unsigned, 2> SGPRsUsed;
unsigned SGPRUsed = findImplicitSGPRRead(MI);
- if (SGPRUsed != AMDGPU::NoRegister)
+ if (SGPRUsed != AMDGPU::NoRegister) {
++ConstantBusCount;
+ SGPRsUsed.push_back(SGPRUsed);
+ }
for (int OpIdx : OpIndices) {
if (OpIdx == -1)
@@ -3032,23 +3375,37 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
const MachineOperand &MO = MI.getOperand(OpIdx);
if (usesConstantBus(MRI, MO, MI.getDesc().OpInfo[OpIdx])) {
if (MO.isReg()) {
- if (MO.getReg() != SGPRUsed)
- ++ConstantBusCount;
SGPRUsed = MO.getReg();
+ if (llvm::all_of(SGPRsUsed, [this, SGPRUsed](unsigned SGPR) {
+ return !RI.regsOverlap(SGPRUsed, SGPR);
+ })) {
+ ++ConstantBusCount;
+ SGPRsUsed.push_back(SGPRUsed);
+ }
} else {
++ConstantBusCount;
++LiteralCount;
}
}
}
- if (ConstantBusCount > 1) {
- ErrInfo = "VOP* instruction uses the constant bus more than once";
+ const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
+ // v_writelane_b32 is an exception from constant bus restriction:
+ // vsrc0 can be sgpr, const or m0 and lane select sgpr, m0 or inline-const
+ if (ConstantBusCount > ST.getConstantBusLimit(Opcode) &&
+ Opcode != AMDGPU::V_WRITELANE_B32) {
+ ErrInfo = "VOP* instruction violates constant bus restriction";
return false;
}
if (isVOP3(MI) && LiteralCount) {
- ErrInfo = "VOP3 instruction uses literal";
- return false;
+ if (LiteralCount && !ST.hasVOP3Literal()) {
+ ErrInfo = "VOP3 instruction uses literal";
+ return false;
+ }
+ if (LiteralCount > 1) {
+ ErrInfo = "VOP3 instruction uses more than one literal";
+ return false;
+ }
}
}
@@ -3067,17 +3424,43 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
}
}
+ if (isSOP2(MI) || isSOPC(MI)) {
+ const MachineOperand &Src0 = MI.getOperand(Src0Idx);
+ const MachineOperand &Src1 = MI.getOperand(Src1Idx);
+ unsigned Immediates = 0;
+
+ if (!Src0.isReg() &&
+ !isInlineConstant(Src0, Desc.OpInfo[Src0Idx].OperandType))
+ Immediates++;
+ if (!Src1.isReg() &&
+ !isInlineConstant(Src1, Desc.OpInfo[Src1Idx].OperandType))
+ Immediates++;
+
+ if (Immediates > 1) {
+ ErrInfo = "SOP2/SOPC instruction requires too many immediate constants";
+ return false;
+ }
+ }
+
if (isSOPK(MI)) {
- int64_t Imm = getNamedOperand(MI, AMDGPU::OpName::simm16)->getImm();
- if (sopkIsZext(MI)) {
- if (!isUInt<16>(Imm)) {
- ErrInfo = "invalid immediate for SOPK instruction";
+ auto Op = getNamedOperand(MI, AMDGPU::OpName::simm16);
+ if (Desc.isBranch()) {
+ if (!Op->isMBB()) {
+ ErrInfo = "invalid branch target for SOPK instruction";
return false;
}
} else {
- if (!isInt<16>(Imm)) {
- ErrInfo = "invalid immediate for SOPK instruction";
- return false;
+ uint64_t Imm = Op->getImm();
+ if (sopkIsZext(MI)) {
+ if (!isUInt<16>(Imm)) {
+ ErrInfo = "invalid immediate for SOPK instruction";
+ return false;
+ }
+ } else {
+ if (!isInt<16>(Imm)) {
+ ErrInfo = "invalid immediate for SOPK instruction";
+ return false;
+ }
}
}
}
@@ -3155,6 +3538,53 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
}
}
+ if (isMIMG(MI)) {
+ const MachineOperand *DimOp = getNamedOperand(MI, AMDGPU::OpName::dim);
+ if (DimOp) {
+ int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opcode,
+ AMDGPU::OpName::vaddr0);
+ int SRsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::srsrc);
+ const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opcode);
+ const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
+ AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
+ const AMDGPU::MIMGDimInfo *Dim =
+ AMDGPU::getMIMGDimInfoByEncoding(DimOp->getImm());
+
+ if (!Dim) {
+ ErrInfo = "dim is out of range";
+ return false;
+ }
+
+ bool IsNSA = SRsrcIdx - VAddr0Idx > 1;
+ unsigned AddrWords = BaseOpcode->NumExtraArgs +
+ (BaseOpcode->Gradients ? Dim->NumGradients : 0) +
+ (BaseOpcode->Coordinates ? Dim->NumCoords : 0) +
+ (BaseOpcode->LodOrClampOrMip ? 1 : 0);
+
+ unsigned VAddrWords;
+ if (IsNSA) {
+ VAddrWords = SRsrcIdx - VAddr0Idx;
+ } else {
+ const TargetRegisterClass *RC = getOpRegClass(MI, VAddr0Idx);
+ VAddrWords = MRI.getTargetRegisterInfo()->getRegSizeInBits(*RC) / 32;
+ if (AddrWords > 8)
+ AddrWords = 16;
+ else if (AddrWords > 4)
+ AddrWords = 8;
+ else if (AddrWords == 3 && VAddrWords == 4) {
+ // CodeGen uses the V4 variant of instructions for three addresses,
+ // because the selection DAG does not support non-power-of-two types.
+ AddrWords = 4;
+ }
+ }
+
+ if (VAddrWords != AddrWords) {
+ ErrInfo = "bad vaddr size";
+ return false;
+ }
+ }
+ }
+
const MachineOperand *DppCt = getNamedOperand(MI, AMDGPU::OpName::dpp_ctrl);
if (DppCt) {
using namespace AMDGPU::DPP;
@@ -3165,10 +3595,29 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
(DC >= DppCtrl::DPP_UNUSED4_FIRST && DC <= DppCtrl::DPP_UNUSED4_LAST) ||
(DC >= DppCtrl::DPP_UNUSED5_FIRST && DC <= DppCtrl::DPP_UNUSED5_LAST) ||
(DC >= DppCtrl::DPP_UNUSED6_FIRST && DC <= DppCtrl::DPP_UNUSED6_LAST) ||
- (DC >= DppCtrl::DPP_UNUSED7_FIRST && DC <= DppCtrl::DPP_UNUSED7_LAST)) {
+ (DC >= DppCtrl::DPP_UNUSED7_FIRST && DC <= DppCtrl::DPP_UNUSED7_LAST) ||
+ (DC >= DppCtrl::DPP_UNUSED8_FIRST && DC <= DppCtrl::DPP_UNUSED8_LAST)) {
ErrInfo = "Invalid dpp_ctrl value";
return false;
}
+ if (DC >= DppCtrl::WAVE_SHL1 && DC <= DppCtrl::WAVE_ROR1 &&
+ ST.getGeneration() >= AMDGPUSubtarget::GFX10) {
+ ErrInfo = "Invalid dpp_ctrl value: "
+ "wavefront shifts are not supported on GFX10+";
+ return false;
+ }
+ if (DC >= DppCtrl::BCAST15 && DC <= DppCtrl::BCAST31 &&
+ ST.getGeneration() >= AMDGPUSubtarget::GFX10) {
+ ErrInfo = "Invalid dpp_ctrl value: "
+ "broadcats are not supported on GFX10+";
+ return false;
+ }
+ if (DC >= DppCtrl::ROW_SHARE_FIRST && DC <= DppCtrl::ROW_XMASK_LAST &&
+ ST.getGeneration() < AMDGPUSubtarget::GFX10) {
+ ErrInfo = "Invalid dpp_ctrl value: "
+ "row_share and row_xmask are not supported before GFX10";
+ return false;
+ }
}
return true;
@@ -3183,9 +3632,12 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
case AMDGPU::INSERT_SUBREG: return AMDGPU::INSERT_SUBREG;
case AMDGPU::WQM: return AMDGPU::WQM;
case AMDGPU::WWM: return AMDGPU::WWM;
- case AMDGPU::S_MOV_B32:
- return MI.getOperand(1).isReg() ?
+ case AMDGPU::S_MOV_B32: {
+ const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
+ return MI.getOperand(1).isReg() ||
+ RI.isAGPR(MRI, MI.getOperand(0).getReg()) ?
AMDGPU::COPY : AMDGPU::V_MOV_B32_e32;
+ }
case AMDGPU::S_ADD_I32:
return ST.hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_I32_e32;
case AMDGPU::S_ADDC_U32:
@@ -3199,7 +3651,9 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
case AMDGPU::S_SUB_U32:
return AMDGPU::V_SUB_I32_e32;
case AMDGPU::S_SUBB_U32: return AMDGPU::V_SUBB_U32_e32;
- case AMDGPU::S_MUL_I32: return AMDGPU::V_MUL_LO_I32;
+ case AMDGPU::S_MUL_I32: return AMDGPU::V_MUL_LO_U32;
+ case AMDGPU::S_MUL_HI_U32: return AMDGPU::V_MUL_HI_U32;
+ case AMDGPU::S_MUL_HI_I32: return AMDGPU::V_MUL_HI_I32;
case AMDGPU::S_AND_B32: return AMDGPU::V_AND_B32_e64;
case AMDGPU::S_OR_B32: return AMDGPU::V_OR_B32_e64;
case AMDGPU::S_XOR_B32: return AMDGPU::V_XOR_B32_e64;
@@ -3244,6 +3698,8 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
case AMDGPU::S_CBRANCH_SCC0: return AMDGPU::S_CBRANCH_VCCZ;
case AMDGPU::S_CBRANCH_SCC1: return AMDGPU::S_CBRANCH_VCCNZ;
}
+ llvm_unreachable(
+ "Unexpected scalar opcode without corresponding vector one!");
}
const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI,
@@ -3263,30 +3719,21 @@ const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI,
return RI.getRegClass(RCID);
}
-bool SIInstrInfo::canReadVGPR(const MachineInstr &MI, unsigned OpNo) const {
- switch (MI.getOpcode()) {
- case AMDGPU::COPY:
- case AMDGPU::REG_SEQUENCE:
- case AMDGPU::PHI:
- case AMDGPU::INSERT_SUBREG:
- return RI.hasVGPRs(getOpRegClass(MI, 0));
- default:
- return RI.hasVGPRs(getOpRegClass(MI, OpNo));
- }
-}
-
void SIInstrInfo::legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const {
MachineBasicBlock::iterator I = MI;
MachineBasicBlock *MBB = MI.getParent();
MachineOperand &MO = MI.getOperand(OpIdx);
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+ const SIRegisterInfo *TRI =
+ static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo());
unsigned RCID = get(MI.getOpcode()).OpInfo[OpIdx].RegClass;
const TargetRegisterClass *RC = RI.getRegClass(RCID);
- unsigned Opcode = AMDGPU::V_MOV_B32_e32;
+ unsigned Size = TRI->getRegSizeInBits(*RC);
+ unsigned Opcode = (Size == 64) ? AMDGPU::V_MOV_B64_PSEUDO : AMDGPU::V_MOV_B32_e32;
if (MO.isReg())
Opcode = AMDGPU::COPY;
else if (RI.isSGPRClass(RC))
- Opcode = AMDGPU::S_MOV_B32;
+ Opcode = (Size == 64) ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(RC);
if (RI.getCommonSubClass(&AMDGPU::VReg_64RegClass, VRC))
@@ -3396,37 +3843,53 @@ bool SIInstrInfo::isLegalVSrcOperand(const MachineRegisterInfo &MRI,
return isLegalRegOperand(MRI, OpInfo, MO);
// Handle non-register types that are treated like immediates.
- assert(MO.isImm() || MO.isTargetIndex() || MO.isFI());
+ assert(MO.isImm() || MO.isTargetIndex() || MO.isFI() || MO.isGlobal());
return true;
}
bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
const MachineOperand *MO) const {
- const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
+ const MachineFunction &MF = *MI.getParent()->getParent();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
const MCInstrDesc &InstDesc = MI.getDesc();
const MCOperandInfo &OpInfo = InstDesc.OpInfo[OpIdx];
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const TargetRegisterClass *DefinedRC =
OpInfo.RegClass != -1 ? RI.getRegClass(OpInfo.RegClass) : nullptr;
if (!MO)
MO = &MI.getOperand(OpIdx);
+ int ConstantBusLimit = ST.getConstantBusLimit(MI.getOpcode());
+ int VOP3LiteralLimit = ST.hasVOP3Literal() ? 1 : 0;
if (isVALU(MI) && usesConstantBus(MRI, *MO, OpInfo)) {
+ if (isVOP3(MI) && isLiteralConstantLike(*MO, OpInfo) && !VOP3LiteralLimit--)
+ return false;
- RegSubRegPair SGPRUsed;
+ SmallDenseSet<RegSubRegPair> SGPRsUsed;
if (MO->isReg())
- SGPRUsed = RegSubRegPair(MO->getReg(), MO->getSubReg());
+ SGPRsUsed.insert(RegSubRegPair(MO->getReg(), MO->getSubReg()));
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
if (i == OpIdx)
continue;
const MachineOperand &Op = MI.getOperand(i);
if (Op.isReg()) {
- if ((Op.getReg() != SGPRUsed.Reg || Op.getSubReg() != SGPRUsed.SubReg) &&
+ RegSubRegPair SGPR(Op.getReg(), Op.getSubReg());
+ if (!SGPRsUsed.count(SGPR) &&
usesConstantBus(MRI, Op, InstDesc.OpInfo[i])) {
- return false;
+ if (--ConstantBusLimit <= 0)
+ return false;
+ SGPRsUsed.insert(SGPR);
}
} else if (InstDesc.OpInfo[i].OperandType == AMDGPU::OPERAND_KIMM32) {
- return false;
+ if (--ConstantBusLimit <= 0)
+ return false;
+ } else if (isVOP3(MI) && AMDGPU::isSISrcOperand(InstDesc, i) &&
+ isLiteralConstantLike(Op, InstDesc.OpInfo[i])) {
+ if (!VOP3LiteralLimit--)
+ return false;
+ if (--ConstantBusLimit <= 0)
+ return false;
}
}
}
@@ -3437,7 +3900,7 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
}
// Handle non-register types that are treated like immediates.
- assert(MO->isImm() || MO->isTargetIndex() || MO->isFI());
+ assert(MO->isImm() || MO->isTargetIndex() || MO->isFI() || MO->isGlobal());
if (!DefinedRC) {
// This operand expects an immediate.
@@ -3452,30 +3915,24 @@ void SIInstrInfo::legalizeOperandsVOP2(MachineRegisterInfo &MRI,
unsigned Opc = MI.getOpcode();
const MCInstrDesc &InstrDesc = get(Opc);
+ int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
+ MachineOperand &Src0 = MI.getOperand(Src0Idx);
+
int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
MachineOperand &Src1 = MI.getOperand(Src1Idx);
// If there is an implicit SGPR use such as VCC use for v_addc_u32/v_subb_u32
- // we need to only have one constant bus use.
- //
- // Note we do not need to worry about literal constants here. They are
- // disabled for the operand type for instructions because they will always
- // violate the one constant bus use rule.
+ // we need to only have one constant bus use before GFX10.
bool HasImplicitSGPR = findImplicitSGPRRead(MI) != AMDGPU::NoRegister;
- if (HasImplicitSGPR) {
- int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
- MachineOperand &Src0 = MI.getOperand(Src0Idx);
-
- if (Src0.isReg() && RI.isSGPRReg(MRI, Src0.getReg()))
- legalizeOpWithMove(MI, Src0Idx);
- }
+ if (HasImplicitSGPR && ST.getConstantBusLimit(Opc) <= 1 &&
+ Src0.isReg() && (RI.isSGPRReg(MRI, Src0.getReg()) ||
+ isLiteralConstantLike(Src0, InstrDesc.OpInfo[Src0Idx])))
+ legalizeOpWithMove(MI, Src0Idx);
// Special case: V_WRITELANE_B32 accepts only immediate or SGPR operands for
// both the value to write (src0) and lane select (src1). Fix up non-SGPR
// src0/src1 with V_READFIRSTLANE.
if (Opc == AMDGPU::V_WRITELANE_B32) {
- int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
- MachineOperand &Src0 = MI.getOperand(Src0Idx);
const DebugLoc &DL = MI.getDebugLoc();
if (Src0.isReg() && RI.isVGPR(MRI, Src0.getReg())) {
unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
@@ -3493,6 +3950,13 @@ void SIInstrInfo::legalizeOperandsVOP2(MachineRegisterInfo &MRI,
return;
}
+ // No VOP2 instructions support AGPRs.
+ if (Src0.isReg() && RI.isAGPR(MRI, Src0.getReg()))
+ legalizeOpWithMove(MI, Src0Idx);
+
+ if (Src1.isReg() && RI.isAGPR(MRI, Src1.getReg()))
+ legalizeOpWithMove(MI, Src1Idx);
+
// VOP2 src0 instructions support all operand types, so we don't need to check
// their legality. If src1 is already legal, we don't need to do anything.
if (isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src1))
@@ -3520,9 +3984,6 @@ void SIInstrInfo::legalizeOperandsVOP2(MachineRegisterInfo &MRI,
return;
}
- int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
- MachineOperand &Src0 = MI.getOperand(Src0Idx);
-
// If src0 can be used as src1, commuting will make the operands legal.
// Otherwise we have to give up and insert a move.
//
@@ -3556,12 +4017,11 @@ void SIInstrInfo::legalizeOperandsVOP2(MachineRegisterInfo &MRI,
Src1.ChangeToRegister(Src0Reg, false, false, Src0Kill);
Src1.setSubReg(Src0SubReg);
+ fixImplicitOperands(MI);
}
-// Legalize VOP3 operands. Because all operand types are supported for any
-// operand, and since literal constants are not allowed and should never be
-// seen, we only need to worry about inserting copies if we use multiple SGPR
-// operands.
+// Legalize VOP3 operands. All operand types are supported for any operand
+// but only one literal constant and only starting from GFX10.
void SIInstrInfo::legalizeOperandsVOP3(MachineRegisterInfo &MRI,
MachineInstr &MI) const {
unsigned Opc = MI.getOpcode();
@@ -3572,8 +4032,35 @@ void SIInstrInfo::legalizeOperandsVOP3(MachineRegisterInfo &MRI,
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)
};
+ if (Opc == AMDGPU::V_PERMLANE16_B32 ||
+ Opc == AMDGPU::V_PERMLANEX16_B32) {
+ // src1 and src2 must be scalar
+ MachineOperand &Src1 = MI.getOperand(VOP3Idx[1]);
+ MachineOperand &Src2 = MI.getOperand(VOP3Idx[2]);
+ const DebugLoc &DL = MI.getDebugLoc();
+ if (Src1.isReg() && !RI.isSGPRClass(MRI.getRegClass(Src1.getReg()))) {
+ unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+ BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg)
+ .add(Src1);
+ Src1.ChangeToRegister(Reg, false);
+ }
+ if (Src2.isReg() && !RI.isSGPRClass(MRI.getRegClass(Src2.getReg()))) {
+ unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+ BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg)
+ .add(Src2);
+ Src2.ChangeToRegister(Reg, false);
+ }
+ }
+
// Find the one SGPR operand we are allowed to use.
+ int ConstantBusLimit = ST.getConstantBusLimit(Opc);
+ int LiteralLimit = ST.hasVOP3Literal() ? 1 : 0;
+ SmallDenseSet<unsigned> SGPRsUsed;
unsigned SGPRReg = findUsedSGPR(MI, VOP3Idx);
+ if (SGPRReg != AMDGPU::NoRegister) {
+ SGPRsUsed.insert(SGPRReg);
+ --ConstantBusLimit;
+ }
for (unsigned i = 0; i < 3; ++i) {
int Idx = VOP3Idx[i];
@@ -3581,16 +4068,38 @@ void SIInstrInfo::legalizeOperandsVOP3(MachineRegisterInfo &MRI,
break;
MachineOperand &MO = MI.getOperand(Idx);
- // We should never see a VOP3 instruction with an illegal immediate operand.
- if (!MO.isReg())
+ if (!MO.isReg()) {
+ if (!isLiteralConstantLike(MO, get(Opc).OpInfo[Idx]))
+ continue;
+
+ if (LiteralLimit > 0 && ConstantBusLimit > 0) {
+ --LiteralLimit;
+ --ConstantBusLimit;
+ continue;
+ }
+
+ --LiteralLimit;
+ --ConstantBusLimit;
+ legalizeOpWithMove(MI, Idx);
continue;
+ }
+
+ if (RI.hasAGPRs(MRI.getRegClass(MO.getReg())) &&
+ !isOperandLegal(MI, Idx, &MO)) {
+ legalizeOpWithMove(MI, Idx);
+ continue;
+ }
if (!RI.isSGPRClass(MRI.getRegClass(MO.getReg())))
continue; // VGPRs are legal
- if (SGPRReg == AMDGPU::NoRegister || SGPRReg == MO.getReg()) {
- SGPRReg = MO.getReg();
- // We can use one SGPR in each VOP3 instruction.
+ // We can use one SGPR in each VOP3 instruction prior to GFX10
+ // and two starting from GFX10.
+ if (SGPRsUsed.count(MO.getReg()))
+ continue;
+ if (ConstantBusLimit > 0) {
+ SGPRsUsed.insert(MO.getReg());
+ --ConstantBusLimit;
continue;
}
@@ -3607,6 +4116,15 @@ unsigned SIInstrInfo::readlaneVGPRToSGPR(unsigned SrcReg, MachineInstr &UseMI,
unsigned DstReg = MRI.createVirtualRegister(SRC);
unsigned SubRegs = RI.getRegSizeInBits(*VRC) / 32;
+ if (RI.hasAGPRs(VRC)) {
+ VRC = RI.getEquivalentVGPRClass(VRC);
+ unsigned NewSrcReg = MRI.createVirtualRegister(VRC);
+ BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(),
+ get(TargetOpcode::COPY), NewSrcReg)
+ .addReg(SrcReg);
+ SrcReg = NewSrcReg;
+ }
+
if (SubRegs == 1) {
BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(),
get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
@@ -3691,15 +4209,27 @@ static void
emitLoadSRsrcFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI,
MachineBasicBlock &OrigBB, MachineBasicBlock &LoopBB,
const DebugLoc &DL, MachineOperand &Rsrc) {
+ MachineFunction &MF = *OrigBB.getParent();
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
+ unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
+ unsigned SaveExecOpc =
+ ST.isWave32() ? AMDGPU::S_AND_SAVEEXEC_B32 : AMDGPU::S_AND_SAVEEXEC_B64;
+ unsigned XorTermOpc =
+ ST.isWave32() ? AMDGPU::S_XOR_B32_term : AMDGPU::S_XOR_B64_term;
+ unsigned AndOpc =
+ ST.isWave32() ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
+ const auto *BoolXExecRC = TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
+
MachineBasicBlock::iterator I = LoopBB.begin();
unsigned VRsrc = Rsrc.getReg();
unsigned VRsrcUndef = getUndefRegState(Rsrc.isUndef());
- unsigned SaveExec = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
- unsigned CondReg0 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
- unsigned CondReg1 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
- unsigned AndCond = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ unsigned SaveExec = MRI.createVirtualRegister(BoolXExecRC);
+ unsigned CondReg0 = MRI.createVirtualRegister(BoolXExecRC);
+ unsigned CondReg1 = MRI.createVirtualRegister(BoolXExecRC);
+ unsigned AndCond = MRI.createVirtualRegister(BoolXExecRC);
unsigned SRsrcSub0 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
unsigned SRsrcSub1 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
unsigned SRsrcSub2 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
@@ -3737,22 +4267,22 @@ emitLoadSRsrcFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI,
BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_CMP_EQ_U64_e64), CondReg1)
.addReg(SRsrc, 0, AMDGPU::sub2_sub3)
.addReg(VRsrc, 0, AMDGPU::sub2_sub3);
- BuildMI(LoopBB, I, DL, TII.get(AMDGPU::S_AND_B64), AndCond)
+ BuildMI(LoopBB, I, DL, TII.get(AndOpc), AndCond)
.addReg(CondReg0)
.addReg(CondReg1);
MRI.setSimpleHint(SaveExec, AndCond);
// Update EXEC to matching lanes, saving original to SaveExec.
- BuildMI(LoopBB, I, DL, TII.get(AMDGPU::S_AND_SAVEEXEC_B64), SaveExec)
+ BuildMI(LoopBB, I, DL, TII.get(SaveExecOpc), SaveExec)
.addReg(AndCond, RegState::Kill);
// The original instruction is here; we insert the terminators after it.
I = LoopBB.end();
// Update EXEC, switch all done bits to 0 and all todo bits to 1.
- BuildMI(LoopBB, I, DL, TII.get(AMDGPU::S_XOR_B64_term), AMDGPU::EXEC)
- .addReg(AMDGPU::EXEC)
+ BuildMI(LoopBB, I, DL, TII.get(XorTermOpc), Exec)
+ .addReg(Exec)
.addReg(SaveExec);
BuildMI(LoopBB, I, DL, TII.get(AMDGPU::S_CBRANCH_EXECNZ)).addMBB(&LoopBB);
}
@@ -3763,15 +4293,19 @@ static void loadSRsrcFromVGPR(const SIInstrInfo &TII, MachineInstr &MI,
MachineOperand &Rsrc, MachineDominatorTree *MDT) {
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
MachineBasicBlock::iterator I(&MI);
const DebugLoc &DL = MI.getDebugLoc();
+ unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
+ unsigned MovExecOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
+ const auto *BoolXExecRC = TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
- unsigned SaveExec = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
+ unsigned SaveExec = MRI.createVirtualRegister(BoolXExecRC);
// Save the EXEC mask
- BuildMI(MBB, I, DL, TII.get(AMDGPU::S_MOV_B64), SaveExec)
- .addReg(AMDGPU::EXEC);
+ BuildMI(MBB, I, DL, TII.get(MovExecOpc), SaveExec).addReg(Exec);
// Killed uses in the instruction we are waterfalling around will be
// incorrect due to the added control-flow.
@@ -3820,8 +4354,7 @@ static void loadSRsrcFromVGPR(const SIInstrInfo &TII, MachineInstr &MI,
// Restore the EXEC mask
MachineBasicBlock::iterator First = RemainderBB->begin();
- BuildMI(*RemainderBB, First, DL, TII.get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
- .addReg(SaveExec);
+ BuildMI(*RemainderBB, First, DL, TII.get(MovExecOpc), Exec).addReg(SaveExec);
}
// Extract pointer from Rsrc and return a zero-value Rsrc replacement.
@@ -3901,7 +4434,7 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI,
continue;
const TargetRegisterClass *OpRC =
MRI.getRegClass(MI.getOperand(i).getReg());
- if (RI.hasVGPRs(OpRC)) {
+ if (RI.hasVectorRegisters(OpRC)) {
VRC = OpRC;
} else {
SRC = OpRC;
@@ -3914,7 +4447,8 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI,
if (VRC || !RI.isSGPRClass(getOpRegClass(MI, 0))) {
if (!VRC) {
assert(SRC);
- VRC = RI.getEquivalentVGPRClass(SRC);
+ VRC = RI.hasAGPRs(getOpRegClass(MI, 0)) ? RI.getEquivalentAGPRClass(SRC)
+ : RI.getEquivalentVGPRClass(SRC);
}
RC = VRC;
} else {
@@ -3983,7 +4517,7 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI,
// Legalize SI_INIT_M0
if (MI.getOpcode() == AMDGPU::SI_INIT_M0) {
MachineOperand &Src = MI.getOperand(0);
- if (Src.isReg() && RI.hasVGPRs(MRI.getRegClass(Src.getReg())))
+ if (Src.isReg() && RI.hasVectorRegisters(MRI.getRegClass(Src.getReg())))
Src.setReg(readlaneVGPRToSGPR(Src.getReg(), MI, MRI));
return;
}
@@ -4047,19 +4581,28 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI,
unsigned NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
+ const auto *BoolXExecRC = RI.getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
+ unsigned CondReg0 = MRI.createVirtualRegister(BoolXExecRC);
+ unsigned CondReg1 = MRI.createVirtualRegister(BoolXExecRC);
+
unsigned RsrcPtr, NewSRsrc;
std::tie(RsrcPtr, NewSRsrc) = extractRsrcPtr(*this, MI, *Rsrc);
// NewVaddrLo = RsrcPtr:sub0 + VAddr:sub0
- DebugLoc DL = MI.getDebugLoc();
- BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_I32_e32), NewVAddrLo)
- .addReg(RsrcPtr, 0, AMDGPU::sub0)
- .addReg(VAddr->getReg(), 0, AMDGPU::sub0);
+ const DebugLoc &DL = MI.getDebugLoc();
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_I32_e64), NewVAddrLo)
+ .addDef(CondReg0)
+ .addReg(RsrcPtr, 0, AMDGPU::sub0)
+ .addReg(VAddr->getReg(), 0, AMDGPU::sub0)
+ .addImm(0);
// NewVaddrHi = RsrcPtr:sub1 + VAddr:sub1
- BuildMI(MBB, MI, DL, get(AMDGPU::V_ADDC_U32_e32), NewVAddrHi)
- .addReg(RsrcPtr, 0, AMDGPU::sub1)
- .addReg(VAddr->getReg(), 0, AMDGPU::sub1);
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_ADDC_U32_e64), NewVAddrHi)
+ .addDef(CondReg1, RegState::Dead)
+ .addReg(RsrcPtr, 0, AMDGPU::sub1)
+ .addReg(VAddr->getReg(), 0, AMDGPU::sub1)
+ .addReg(CondReg0, RegState::Kill)
+ .addImm(0);
// NewVaddr = {NewVaddrHi, NewVaddrLo}
BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewVAddr)
@@ -4106,6 +4649,10 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI,
getNamedOperand(MI, AMDGPU::OpName::glc)) {
MIB.addImm(GLC->getImm());
}
+ if (const MachineOperand *DLC =
+ getNamedOperand(MI, AMDGPU::OpName::dlc)) {
+ MIB.addImm(DLC->getImm());
+ }
MIB.addImm(getNamedImmOperand(MI, AMDGPU::OpName::slc));
@@ -4235,37 +4782,37 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst,
continue;
case AMDGPU::S_LSHL_B32:
- if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+ if (ST.hasOnlyRevVALUShifts()) {
NewOpcode = AMDGPU::V_LSHLREV_B32_e64;
swapOperands(Inst);
}
break;
case AMDGPU::S_ASHR_I32:
- if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+ if (ST.hasOnlyRevVALUShifts()) {
NewOpcode = AMDGPU::V_ASHRREV_I32_e64;
swapOperands(Inst);
}
break;
case AMDGPU::S_LSHR_B32:
- if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+ if (ST.hasOnlyRevVALUShifts()) {
NewOpcode = AMDGPU::V_LSHRREV_B32_e64;
swapOperands(Inst);
}
break;
case AMDGPU::S_LSHL_B64:
- if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+ if (ST.hasOnlyRevVALUShifts()) {
NewOpcode = AMDGPU::V_LSHLREV_B64;
swapOperands(Inst);
}
break;
case AMDGPU::S_ASHR_I64:
- if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+ if (ST.hasOnlyRevVALUShifts()) {
NewOpcode = AMDGPU::V_ASHRREV_I64;
swapOperands(Inst);
}
break;
case AMDGPU::S_LSHR_B64:
- if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+ if (ST.hasOnlyRevVALUShifts()) {
NewOpcode = AMDGPU::V_LSHRREV_B64;
swapOperands(Inst);
}
@@ -4279,10 +4826,16 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst,
case AMDGPU::S_CBRANCH_SCC0:
case AMDGPU::S_CBRANCH_SCC1:
// Clear unused bits of vcc
- BuildMI(*MBB, Inst, Inst.getDebugLoc(), get(AMDGPU::S_AND_B64),
- AMDGPU::VCC)
- .addReg(AMDGPU::EXEC)
- .addReg(AMDGPU::VCC);
+ if (ST.isWave32())
+ BuildMI(*MBB, Inst, Inst.getDebugLoc(), get(AMDGPU::S_AND_B32),
+ AMDGPU::VCC_LO)
+ .addReg(AMDGPU::EXEC_LO)
+ .addReg(AMDGPU::VCC_LO);
+ else
+ BuildMI(*MBB, Inst, Inst.getDebugLoc(), get(AMDGPU::S_AND_B64),
+ AMDGPU::VCC)
+ .addReg(AMDGPU::EXEC)
+ .addReg(AMDGPU::VCC);
break;
case AMDGPU::S_BFE_U64:
@@ -4339,8 +4892,10 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst,
for (unsigned i = Inst.getNumOperands() - 1; i > 0; --i) {
MachineOperand &Op = Inst.getOperand(i);
if (Op.isReg() && Op.getReg() == AMDGPU::SCC) {
+ // Only propagate through live-def of SCC.
+ if (Op.isDef() && !Op.isDead())
+ addSCCDefUsersToVALUWorklist(Op, Inst, Worklist);
Inst.RemoveOperand(i);
- addSCCDefUsersToVALUWorklist(Inst, Worklist);
}
}
@@ -4358,6 +4913,7 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst,
}
Inst.addImplicitDefUseOperands(*Inst.getParent()->getParent());
+ fixImplicitOperands(Inst);
if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) {
const MachineOperand &OffsetWidthOp = Inst.getOperand(2);
@@ -4445,6 +5001,7 @@ bool SIInstrInfo::moveScalarAddSub(SetVectorType &Worklist, MachineInstr &Inst,
Inst.RemoveOperand(3);
Inst.setDesc(get(NewOpc));
+ Inst.addOperand(MachineOperand::CreateImm(0)); // clamp bit
Inst.addImplicitDefUseOperands(*MBB.getParent());
MRI.replaceRegWith(OldDstReg, ResultReg);
legalizeOperands(Inst, MDT);
@@ -4514,8 +5071,7 @@ void SIInstrInfo::lowerScalarXnor(SetVectorType &Worklist,
RI.isSGPRClass(MRI.getRegClass(Src0.getReg()));
bool Src1IsSGPR = Src1.isReg() &&
RI.isSGPRClass(MRI.getRegClass(Src1.getReg()));
- MachineInstr *Not = nullptr;
- MachineInstr *Xor = nullptr;
+ MachineInstr *Xor;
unsigned Temp = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
unsigned NewDest = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
@@ -4523,14 +5079,12 @@ void SIInstrInfo::lowerScalarXnor(SetVectorType &Worklist,
// The next iteration over the work list will lower these to the vector
// unit as necessary.
if (Src0IsSGPR) {
- Not = BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), Temp)
- .add(Src0);
+ BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), Temp).add(Src0);
Xor = BuildMI(MBB, MII, DL, get(AMDGPU::S_XOR_B32), NewDest)
.addReg(Temp)
.add(Src1);
} else if (Src1IsSGPR) {
- Not = BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), Temp)
- .add(Src1);
+ BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), Temp).add(Src1);
Xor = BuildMI(MBB, MII, DL, get(AMDGPU::S_XOR_B32), NewDest)
.add(Src0)
.addReg(Temp);
@@ -4538,8 +5092,8 @@ void SIInstrInfo::lowerScalarXnor(SetVectorType &Worklist,
Xor = BuildMI(MBB, MII, DL, get(AMDGPU::S_XOR_B32), Temp)
.add(Src0)
.add(Src1);
- Not = BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), NewDest)
- .addReg(Temp);
+ MachineInstr *Not =
+ BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), NewDest).addReg(Temp);
Worklist.insert(Not);
}
@@ -4670,13 +5224,14 @@ void SIInstrInfo::splitScalar64BitAddSub(SetVectorType &Worklist,
MachineBasicBlock &MBB = *Inst.getParent();
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ const auto *CarryRC = RI.getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
unsigned FullDestReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
unsigned DestSub0 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
unsigned DestSub1 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
- unsigned CarryReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
- unsigned DeadCarryReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
+ unsigned CarryReg = MRI.createVirtualRegister(CarryRC);
+ unsigned DeadCarryReg = MRI.createVirtualRegister(CarryRC);
MachineOperand &Dest = Inst.getOperand(0);
MachineOperand &Src0 = Inst.getOperand(1);
@@ -4705,7 +5260,8 @@ void SIInstrInfo::splitScalar64BitAddSub(SetVectorType &Worklist,
BuildMI(MBB, MII, DL, get(LoOpc), DestSub0)
.addReg(CarryReg, RegState::Define)
.add(SrcReg0Sub0)
- .add(SrcReg1Sub0);
+ .add(SrcReg1Sub0)
+ .addImm(0); // clamp bit
unsigned HiOpc = IsAdd ? AMDGPU::V_ADDC_U32_e64 : AMDGPU::V_SUBB_U32_e64;
MachineInstr *HiHalf =
@@ -4713,7 +5269,8 @@ void SIInstrInfo::splitScalar64BitAddSub(SetVectorType &Worklist,
.addReg(DeadCarryReg, RegState::Define | RegState::Dead)
.add(SrcReg0Sub1)
.add(SrcReg1Sub1)
- .addReg(CarryReg, RegState::Kill);
+ .addReg(CarryReg, RegState::Kill)
+ .addImm(0); // clamp bit
BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
.addReg(DestSub0)
@@ -4943,7 +5500,23 @@ void SIInstrInfo::addUsersToMoveToVALUWorklist(
for (MachineRegisterInfo::use_iterator I = MRI.use_begin(DstReg),
E = MRI.use_end(); I != E;) {
MachineInstr &UseMI = *I->getParent();
- if (!canReadVGPR(UseMI, I.getOperandNo())) {
+
+ unsigned OpNo = 0;
+
+ switch (UseMI.getOpcode()) {
+ case AMDGPU::COPY:
+ case AMDGPU::WQM:
+ case AMDGPU::WWM:
+ case AMDGPU::REG_SEQUENCE:
+ case AMDGPU::PHI:
+ case AMDGPU::INSERT_SUBREG:
+ break;
+ default:
+ OpNo = I.getOperandNo();
+ break;
+ }
+
+ if (!RI.hasVectorRegisters(getOpRegClass(UseMI, OpNo))) {
Worklist.insert(&UseMI);
do {
@@ -5017,19 +5590,23 @@ void SIInstrInfo::movePackToVALU(SetVectorType &Worklist,
addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
}
-void SIInstrInfo::addSCCDefUsersToVALUWorklist(
- MachineInstr &SCCDefInst, SetVectorType &Worklist) const {
+void SIInstrInfo::addSCCDefUsersToVALUWorklist(MachineOperand &Op,
+ MachineInstr &SCCDefInst,
+ SetVectorType &Worklist) const {
+ // Ensure that def inst defines SCC, which is still live.
+ assert(Op.isReg() && Op.getReg() == AMDGPU::SCC && Op.isDef() &&
+ !Op.isDead() && Op.getParent() == &SCCDefInst);
// This assumes that all the users of SCC are in the same block
// as the SCC def.
- for (MachineInstr &MI :
- make_range(MachineBasicBlock::iterator(SCCDefInst),
- SCCDefInst.getParent()->end())) {
+ for (MachineInstr &MI : // Skip the def inst itself.
+ make_range(std::next(MachineBasicBlock::iterator(SCCDefInst)),
+ SCCDefInst.getParent()->end())) {
+ // Check if SCC is used first.
+ if (MI.findRegisterUseOperandIdx(AMDGPU::SCC, false, &RI) != -1)
+ Worklist.insert(&MI);
// Exit if we find another SCC def.
if (MI.findRegisterDefOperandIdx(AMDGPU::SCC, false, false, &RI) != -1)
return;
-
- if (MI.findRegisterUseOperandIdx(AMDGPU::SCC, false, &RI) != -1)
- Worklist.insert(&MI);
}
}
@@ -5046,14 +5623,26 @@ const TargetRegisterClass *SIInstrInfo::getDestEquivalentVGPRClass(
case AMDGPU::REG_SEQUENCE:
case AMDGPU::INSERT_SUBREG:
case AMDGPU::WQM:
- case AMDGPU::WWM:
- if (RI.hasVGPRs(NewDstRC))
- return nullptr;
+ case AMDGPU::WWM: {
+ const TargetRegisterClass *SrcRC = getOpRegClass(Inst, 1);
+ if (RI.hasAGPRs(SrcRC)) {
+ if (RI.hasAGPRs(NewDstRC))
+ return nullptr;
+
+ NewDstRC = RI.getEquivalentAGPRClass(NewDstRC);
+ if (!NewDstRC)
+ return nullptr;
+ } else {
+ if (RI.hasVGPRs(NewDstRC))
+ return nullptr;
+
+ NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
+ if (!NewDstRC)
+ return nullptr;
+ }
- NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
- if (!NewDstRC)
- return nullptr;
return NewDstRC;
+ }
default:
return NewDstRC;
}
@@ -5139,6 +5728,12 @@ MachineOperand *SIInstrInfo::getNamedOperand(MachineInstr &MI,
}
uint64_t SIInstrInfo::getDefaultRsrcDataFormat() const {
+ if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) {
+ return (22ULL << 44) | // IMG_FORMAT_32_FLOAT
+ (1ULL << 56) | // RESOURCE_LEVEL = 1
+ (3ULL << 60); // OOB_SELECT = 3
+ }
+
uint64_t RsrcDataFormat = AMDGPU::RSRC_DATA_FORMAT;
if (ST.isAmdHsaOS()) {
// Set ATC = 1. GFX9 doesn't have this bit.
@@ -5165,12 +5760,14 @@ uint64_t SIInstrInfo::getScratchRsrcWords23() const {
Rsrc23 |= EltSizeValue << AMDGPU::RSRC_ELEMENT_SIZE_SHIFT;
}
- // IndexStride = 64.
- Rsrc23 |= UINT64_C(3) << AMDGPU::RSRC_INDEX_STRIDE_SHIFT;
+ // IndexStride = 64 / 32.
+ uint64_t IndexStride = ST.getWavefrontSize() == 64 ? 3 : 2;
+ Rsrc23 |= IndexStride << AMDGPU::RSRC_INDEX_STRIDE_SHIFT;
// If TID_ENABLE is set, DATA_FORMAT specifies stride bits [14:17].
// Clear them unless we want a huge stride.
- if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
+ if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&
+ ST.getGeneration() <= AMDGPUSubtarget::GFX9)
Rsrc23 &= ~AMDGPU::RSRC_DATA_FORMAT;
return Rsrc23;
@@ -5267,25 +5864,35 @@ unsigned SIInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
return DescSize; // No operands.
if (isLiteralConstantLike(MI.getOperand(Src0Idx), Desc.OpInfo[Src0Idx]))
- return DescSize + 4;
+ return isVOP3(MI) ? 12 : (DescSize + 4);
int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
if (Src1Idx == -1)
return DescSize;
if (isLiteralConstantLike(MI.getOperand(Src1Idx), Desc.OpInfo[Src1Idx]))
- return DescSize + 4;
+ return isVOP3(MI) ? 12 : (DescSize + 4);
int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
if (Src2Idx == -1)
return DescSize;
if (isLiteralConstantLike(MI.getOperand(Src2Idx), Desc.OpInfo[Src2Idx]))
- return DescSize + 4;
+ return isVOP3(MI) ? 12 : (DescSize + 4);
return DescSize;
}
+ // Check whether we have extra NSA words.
+ if (isMIMG(MI)) {
+ int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
+ if (VAddr0Idx < 0)
+ return 8;
+
+ int RSrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
+ return 8 + 4 * ((RSrcIdx - VAddr0Idx + 2) / 4);
+ }
+
switch (Opc) {
case TargetOpcode::IMPLICIT_DEF:
case TargetOpcode::KILL:
@@ -5294,10 +5901,12 @@ unsigned SIInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
return 0;
case TargetOpcode::BUNDLE:
return getInstBundleSize(MI);
- case TargetOpcode::INLINEASM: {
+ case TargetOpcode::INLINEASM:
+ case TargetOpcode::INLINEASM_BR: {
const MachineFunction *MF = MI.getParent()->getParent();
const char *AsmStr = MI.getOperand(0).getSymbolName();
- return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
+ return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo(),
+ &MF->getSubtarget());
}
default:
return DescSize;
@@ -5332,7 +5941,7 @@ void SIInstrInfo::convertNonUniformIfRegion(MachineBasicBlock *IfEntry,
MachineRegisterInfo &MRI = IfEntry->getParent()->getRegInfo();
if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
- unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ unsigned DstReg = MRI.createVirtualRegister(RI.getBoolRC());
MachineInstr *SIIF =
BuildMI(*MF, Branch->getDebugLoc(), get(AMDGPU::SI_IF), DstReg)
.add(Branch->getOperand(0))
@@ -5359,8 +5968,8 @@ void SIInstrInfo::convertNonUniformLoopRegion(
if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
- unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
- unsigned BackEdgeReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ unsigned DstReg = MRI.createVirtualRegister(RI.getBoolRC());
+ unsigned BackEdgeReg = MRI.createVirtualRegister(RI.getBoolRC());
MachineInstrBuilder HeaderPHIBuilder =
BuildMI(*(MF), Branch->getDebugLoc(), get(TargetOpcode::PHI), DstReg);
for (MachineBasicBlock::pred_iterator PI = LoopEntry->pred_begin(),
@@ -5370,7 +5979,7 @@ void SIInstrInfo::convertNonUniformLoopRegion(
HeaderPHIBuilder.addReg(BackEdgeReg);
} else {
MachineBasicBlock *PMBB = *PI;
- unsigned ZeroReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ unsigned ZeroReg = MRI.createVirtualRegister(RI.getBoolRC());
materializeImmediate(*PMBB, PMBB->getFirstTerminator(), DebugLoc(),
ZeroReg, 0);
HeaderPHIBuilder.addReg(ZeroReg);
@@ -5432,7 +6041,9 @@ SIInstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
{ MO_GOTPCREL32_LO, "amdgpu-gotprel32-lo" },
{ MO_GOTPCREL32_HI, "amdgpu-gotprel32-hi" },
{ MO_REL32_LO, "amdgpu-rel32-lo" },
- { MO_REL32_HI, "amdgpu-rel32-hi" }
+ { MO_REL32_HI, "amdgpu-rel32-hi" },
+ { MO_ABS32_LO, "amdgpu-abs32-lo" },
+ { MO_ABS32_HI, "amdgpu-abs32-hi" },
};
return makeArrayRef(TargetFlags);
@@ -5452,8 +6063,8 @@ SIInstrInfo::getAddNoCarry(MachineBasicBlock &MBB,
return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_U32_e64), DestReg);
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
- unsigned UnusedCarry = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
- MRI.setRegAllocationHint(UnusedCarry, 0, AMDGPU::VCC);
+ unsigned UnusedCarry = MRI.createVirtualRegister(RI.getBoolRC());
+ MRI.setRegAllocationHint(UnusedCarry, 0, RI.getVCC());
return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_I32_e64), DestReg)
.addReg(UnusedCarry, RegState::Define | RegState::Dead);
@@ -5480,6 +6091,20 @@ const MCInstrDesc &SIInstrInfo::getKillTerminatorFromPseudo(unsigned Opcode) con
}
}
+void SIInstrInfo::fixImplicitOperands(MachineInstr &MI) const {
+ MachineBasicBlock *MBB = MI.getParent();
+ MachineFunction *MF = MBB->getParent();
+ const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
+
+ if (!ST.isWave32())
+ return;
+
+ for (auto &Op : MI.implicit_operands()) {
+ if (Op.isReg() && Op.getReg() == AMDGPU::VCC)
+ Op.setReg(AMDGPU::VCC_LO);
+ }
+}
+
bool SIInstrInfo::isBufferSMRD(const MachineInstr &MI) const {
if (!isSMRD(MI))
return false;
@@ -5493,6 +6118,25 @@ bool SIInstrInfo::isBufferSMRD(const MachineInstr &MI) const {
return RCID == AMDGPU::SReg_128RegClassID;
}
+bool SIInstrInfo::isLegalFLATOffset(int64_t Offset, unsigned AddrSpace,
+ bool Signed) const {
+ // TODO: Should 0 be special cased?
+ if (!ST.hasFlatInstOffsets())
+ return false;
+
+ if (ST.hasFlatSegmentOffsetBug() && AddrSpace == AMDGPUAS::FLAT_ADDRESS)
+ return false;
+
+ if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) {
+ return (Signed && isInt<12>(Offset)) ||
+ (!Signed && isUInt<11>(Offset));
+ }
+
+ return (Signed && isInt<13>(Offset)) ||
+ (!Signed && isUInt<12>(Offset));
+}
+
+
// This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td
enum SIEncodingFamily {
SI = 0,
@@ -5500,7 +6144,9 @@ enum SIEncodingFamily {
SDWA = 2,
SDWA9 = 3,
GFX80 = 4,
- GFX9 = 5
+ GFX9 = 5,
+ GFX10 = 6,
+ SDWA10 = 7
};
static SIEncodingFamily subtargetEncodingFamily(const GCNSubtarget &ST) {
@@ -5513,6 +6159,8 @@ static SIEncodingFamily subtargetEncodingFamily(const GCNSubtarget &ST) {
case AMDGPUSubtarget::VOLCANIC_ISLANDS:
case AMDGPUSubtarget::GFX9:
return SIEncodingFamily::VI;
+ case AMDGPUSubtarget::GFX10:
+ return SIEncodingFamily::GFX10;
}
llvm_unreachable("Unknown subtarget generation!");
}
@@ -5521,18 +6169,29 @@ int SIInstrInfo::pseudoToMCOpcode(int Opcode) const {
SIEncodingFamily Gen = subtargetEncodingFamily(ST);
if ((get(Opcode).TSFlags & SIInstrFlags::renamedInGFX9) != 0 &&
- ST.getGeneration() >= AMDGPUSubtarget::GFX9)
+ ST.getGeneration() == AMDGPUSubtarget::GFX9)
Gen = SIEncodingFamily::GFX9;
- if (get(Opcode).TSFlags & SIInstrFlags::SDWA)
- Gen = ST.getGeneration() == AMDGPUSubtarget::GFX9 ? SIEncodingFamily::SDWA9
- : SIEncodingFamily::SDWA;
// Adjust the encoding family to GFX80 for D16 buffer instructions when the
// subtarget has UnpackedD16VMem feature.
// TODO: remove this when we discard GFX80 encoding.
if (ST.hasUnpackedD16VMem() && (get(Opcode).TSFlags & SIInstrFlags::D16Buf))
Gen = SIEncodingFamily::GFX80;
+ if (get(Opcode).TSFlags & SIInstrFlags::SDWA) {
+ switch (ST.getGeneration()) {
+ default:
+ Gen = SIEncodingFamily::SDWA;
+ break;
+ case AMDGPUSubtarget::GFX9:
+ Gen = SIEncodingFamily::SDWA9;
+ break;
+ case AMDGPUSubtarget::GFX10:
+ Gen = SIEncodingFamily::SDWA10;
+ break;
+ }
+ }
+
int MCOp = AMDGPU::getMCOpcode(Opcode, Gen);
// -1 means that Opcode is already a native instruction.
@@ -5627,3 +6286,77 @@ MachineInstr *llvm::getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P,
}
return nullptr;
}
+
+bool llvm::execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI,
+ Register VReg,
+ const MachineInstr &DefMI,
+ const MachineInstr &UseMI) {
+ assert(MRI.isSSA() && "Must be run on SSA");
+
+ auto *TRI = MRI.getTargetRegisterInfo();
+ auto *DefBB = DefMI.getParent();
+
+ // Don't bother searching between blocks, although it is possible this block
+ // doesn't modify exec.
+ if (UseMI.getParent() != DefBB)
+ return true;
+
+ const int MaxInstScan = 20;
+ int NumInst = 0;
+
+ // Stop scan at the use.
+ auto E = UseMI.getIterator();
+ for (auto I = std::next(DefMI.getIterator()); I != E; ++I) {
+ if (I->isDebugInstr())
+ continue;
+
+ if (++NumInst > MaxInstScan)
+ return true;
+
+ if (I->modifiesRegister(AMDGPU::EXEC, TRI))
+ return true;
+ }
+
+ return false;
+}
+
+bool llvm::execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI,
+ Register VReg,
+ const MachineInstr &DefMI) {
+ assert(MRI.isSSA() && "Must be run on SSA");
+
+ auto *TRI = MRI.getTargetRegisterInfo();
+ auto *DefBB = DefMI.getParent();
+
+ const int MaxUseInstScan = 10;
+ int NumUseInst = 0;
+
+ for (auto &UseInst : MRI.use_nodbg_instructions(VReg)) {
+ // Don't bother searching between blocks, although it is possible this block
+ // doesn't modify exec.
+ if (UseInst.getParent() != DefBB)
+ return true;
+
+ if (++NumUseInst > MaxUseInstScan)
+ return true;
+ }
+
+ const int MaxInstScan = 20;
+ int NumInst = 0;
+
+ // Stop scan when we have seen all the uses.
+ for (auto I = std::next(DefMI.getIterator()); ; ++I) {
+ if (I->isDebugInstr())
+ continue;
+
+ if (++NumInst > MaxInstScan)
+ return true;
+
+ if (I->readsRegister(VReg))
+ if (--NumUseInst == 0)
+ return false;
+
+ if (I->modifiesRegister(AMDGPU::EXEC, TRI))
+ return true;
+ }
+}
diff --git a/lib/Target/AMDGPU/SIInstrInfo.h b/lib/Target/AMDGPU/SIInstrInfo.h
index 5b1a05f3785e..3ff35da0b963 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/lib/Target/AMDGPU/SIInstrInfo.h
@@ -1,9 +1,8 @@
//===- SIInstrInfo.h - SI Instruction Info Interface ------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -121,14 +120,15 @@ private:
void addUsersToMoveToVALUWorklist(unsigned Reg, MachineRegisterInfo &MRI,
SetVectorType &Worklist) const;
- void
- addSCCDefUsersToVALUWorklist(MachineInstr &SCCDefInst,
- SetVectorType &Worklist) const;
+ void addSCCDefUsersToVALUWorklist(MachineOperand &Op,
+ MachineInstr &SCCDefInst,
+ SetVectorType &Worklist) const;
const TargetRegisterClass *
getDestEquivalentVGPRClass(const MachineInstr &Inst) const;
- bool checkInstOffsetsDoNotOverlap(MachineInstr &MIa, MachineInstr &MIb) const;
+ bool checkInstOffsetsDoNotOverlap(const MachineInstr &MIa,
+ const MachineInstr &MIb) const;
unsigned findUsedSGPR(const MachineInstr &MI, int OpIndices[3]) const;
@@ -143,7 +143,7 @@ protected:
public:
enum TargetOperandFlags {
- MO_MASK = 0x7,
+ MO_MASK = 0xf,
MO_NONE = 0,
// MO_GOTPCREL -> symbol@GOTPCREL -> R_AMDGPU_GOTPCREL.
@@ -157,7 +157,13 @@ public:
MO_REL32 = 4,
MO_REL32_LO = 4,
// MO_REL32_HI -> symbol@rel32@hi -> R_AMDGPU_REL32_HI.
- MO_REL32_HI = 5
+ MO_REL32_HI = 5,
+
+ MO_LONG_BRANCH_FORWARD = 6,
+ MO_LONG_BRANCH_BACKWARD = 7,
+
+ MO_ABS32_LO = 8,
+ MO_ABS32_HI = 9,
};
explicit SIInstrInfo(const GCNSubtarget &ST);
@@ -173,11 +179,13 @@ public:
int64_t &Offset1,
int64_t &Offset2) const override;
- bool getMemOperandWithOffset(MachineInstr &LdSt, MachineOperand *&BaseOp,
+ bool getMemOperandWithOffset(const MachineInstr &LdSt,
+ const MachineOperand *&BaseOp,
int64_t &Offset,
const TargetRegisterInfo *TRI) const final;
- bool shouldClusterMemOps(MachineOperand &BaseOp1, MachineOperand &BaseOp2,
+ bool shouldClusterMemOps(const MachineOperand &BaseOp1,
+ const MachineOperand &BaseOp2,
unsigned NumLoads) const override;
bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0,
@@ -294,7 +302,8 @@ public:
unsigned Kind) const override;
bool
- areMemAccessesTriviallyDisjoint(MachineInstr &MIa, MachineInstr &MIb,
+ areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
+ const MachineInstr &MIb,
AliasAnalysis *AA = nullptr) const override;
bool isFoldableCopy(const MachineInstr &MI) const;
@@ -376,6 +385,14 @@ public:
return get(Opcode).TSFlags & SIInstrFlags::SOPP;
}
+ static bool isPacked(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & SIInstrFlags::IsPacked;
+ }
+
+ bool isPacked(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::IsPacked;
+ }
+
static bool isVOP1(const MachineInstr &MI) {
return MI.getDesc().TSFlags & SIInstrFlags::VOP1;
}
@@ -450,6 +467,8 @@ public:
return get(Opcode).TSFlags & SIInstrFlags::DS;
}
+ bool isAlwaysGDS(uint16_t Opcode) const;
+
static bool isMIMG(const MachineInstr &MI) {
return MI.getDesc().TSFlags & SIInstrFlags::MIMG;
}
@@ -477,6 +496,11 @@ public:
return (Flags & SIInstrFlags::FLAT) && !(Flags & SIInstrFlags::LGKM_CNT);
}
+ // FIXME: Make this more precise
+ static bool isFLATScratch(const MachineInstr &MI) {
+ return isSegmentSpecificFLAT(MI);
+ }
+
// Any FLAT encoded instruction, including global_* and scratch_*.
bool isFLAT(uint16_t Opcode) const {
return get(Opcode).TSFlags & SIInstrFlags::FLAT;
@@ -546,6 +570,14 @@ public:
return get(Opcode).TSFlags & SIInstrFlags::VINTRP;
}
+ static bool isMAI(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & SIInstrFlags::IsMAI;
+ }
+
+ bool isMAI(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::IsMAI;
+ }
+
static bool isScalarUnit(const MachineInstr &MI) {
return MI.getDesc().TSFlags & (SIInstrFlags::SALU | SIInstrFlags::SMRD);
}
@@ -612,6 +644,14 @@ public:
return get(Opcode).TSFlags & SIInstrFlags::FPDPRounding;
}
+ static bool isFPAtomic(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & SIInstrFlags::FPAtomic;
+ }
+
+ bool isFPAtomic(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::FPAtomic;
+ }
+
bool isVGPRCopy(const MachineInstr &MI) const {
assert(MI.isCopy());
unsigned Dest = MI.getOperand(0).getReg();
@@ -620,9 +660,21 @@ public:
return !RI.isSGPRReg(MRI, Dest);
}
+ bool hasVGPRUses(const MachineInstr &MI) const {
+ const MachineFunction &MF = *MI.getParent()->getParent();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ return llvm::any_of(MI.explicit_uses(),
+ [&MRI, this](const MachineOperand &MO) {
+ return MO.isReg() && RI.isVGPR(MRI, MO.getReg());});
+ }
+
/// Whether we must prevent this instruction from executing with EXEC = 0.
bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const;
+ /// Returns true if the instruction could potentially depend on the value of
+ /// exec. If false, exec dependencies may safely be ignored.
+ bool mayReadEXEC(const MachineRegisterInfo &MRI, const MachineInstr &MI) const;
+
bool isInlineConstant(const APInt &Imm) const;
bool isInlineConstant(const MachineOperand &MO, uint8_t OperandType) const;
@@ -761,10 +813,6 @@ public:
return RI.getRegSizeInBits(*getOpRegClass(MI, OpNo)) / 8;
}
- /// \returns true if it is legal for the operand at index \p OpNo
- /// to read a VGPR.
- bool canReadVGPR(const MachineInstr &MI, unsigned OpNo) const;
-
/// Legalize the \p OpIndex operand of this instruction by inserting
/// a MOV. For example:
/// ADD_I32_e32 VGPR0, 15
@@ -836,7 +884,7 @@ public:
void insertReturn(MachineBasicBlock &MBB) const;
/// Return the number of wait states that result from executing this
/// instruction.
- unsigned getNumWaitStates(const MachineInstr &MI) const;
+ static unsigned getNumWaitStates(const MachineInstr &MI);
/// Returns the operand named \p Op. If \p MI does not have an
/// operand named \c Op, this function returns nullptr.
@@ -922,10 +970,27 @@ public:
return isUInt<12>(Imm);
}
+ /// Returns if \p Offset is legal for the subtarget as the offset to a FLAT
+ /// encoded instruction. If \p Signed, this is for an instruction that
+ /// interprets the offset as signed.
+ bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace,
+ bool Signed) const;
+
/// \brief Return a target-specific opcode if Opcode is a pseudo instruction.
/// Return -1 if the target-specific opcode for the pseudo instruction does
/// not exist. If Opcode is not a pseudo instruction, this is identity.
int pseudoToMCOpcode(int Opcode) const;
+
+ const TargetRegisterClass *getRegClass(const MCInstrDesc &TID, unsigned OpNum,
+ const TargetRegisterInfo *TRI,
+ const MachineFunction &MF)
+ const override {
+ if (OpNum >= TID.getNumOperands())
+ return nullptr;
+ return RI.getRegClass(TID.OpInfo[OpNum].RegClass);
+ }
+
+ void fixImplicitOperands(MachineInstr &MI) const;
};
/// \brief Returns true if a reg:subreg pair P has a TRC class
@@ -956,6 +1021,21 @@ TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI,
MachineInstr *getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P,
MachineRegisterInfo &MRI);
+/// \brief Return false if EXEC is not changed between the def of \p VReg at \p
+/// DefMI and the use at \p UseMI. Should be run on SSA. Currently does not
+/// attempt to track between blocks.
+bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI,
+ Register VReg,
+ const MachineInstr &DefMI,
+ const MachineInstr &UseMI);
+
+/// \brief Return false if EXEC is not changed between the def of \p VReg at \p
+/// DefMI and all its uses. Should be run on SSA. Currently does not attempt to
+/// track between blocks.
+bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI,
+ Register VReg,
+ const MachineInstr &DefMI);
+
namespace AMDGPU {
LLVM_READONLY
@@ -1003,17 +1083,14 @@ namespace AMDGPU {
LLVM_READONLY
int getGlobalSaddrOp(uint16_t Opcode);
+ LLVM_READONLY
+ int getVCMPXNoSDstOp(uint16_t Opcode);
+
const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL;
const uint64_t RSRC_ELEMENT_SIZE_SHIFT = (32 + 19);
const uint64_t RSRC_INDEX_STRIDE_SHIFT = (32 + 21);
const uint64_t RSRC_TID_ENABLE = UINT64_C(1) << (32 + 23);
- // For MachineOperands.
- enum TargetFlags {
- TF_LONG_BRANCH_FORWARD = 1 << 0,
- TF_LONG_BRANCH_BACKWARD = 1 << 1
- };
-
} // end namespace AMDGPU
namespace SI {
diff --git a/lib/Target/AMDGPU/SIInstrInfo.td b/lib/Target/AMDGPU/SIInstrInfo.td
index 13afa4d4974b..c382c816e0b4 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1,25 +1,21 @@
//===-- SIInstrInfo.td - SI Instruction Infos -------------*- tablegen -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-def isCI : Predicate<"Subtarget->getGeneration() "
- ">= AMDGPUSubtarget::SEA_ISLANDS">;
-def isCIOnly : Predicate<"Subtarget->getGeneration() =="
- "AMDGPUSubtarget::SEA_ISLANDS">,
- AssemblerPredicate <"FeatureSeaIslands">;
-def isVIOnly : Predicate<"Subtarget->getGeneration() =="
- "AMDGPUSubtarget::VOLCANIC_ISLANDS">,
- AssemblerPredicate <"FeatureVolcanicIslands">;
+
+def isWave32 : Predicate<"Subtarget->getWavefrontSize() == 32">,
+ AssemblerPredicate <"FeatureWavefrontSize32">;
+def isWave64 : Predicate<"Subtarget->getWavefrontSize() == 64">,
+ AssemblerPredicate <"FeatureWavefrontSize64">;
def DisableInst : Predicate <"false">, AssemblerPredicate<"FeatureDisable">;
class GCNPredicateControl : PredicateControl {
- Predicate SIAssemblerPredicate = isSICI;
- Predicate VIAssemblerPredicate = isVI;
+ Predicate SIAssemblerPredicate = isGFX6GFX7;
+ Predicate VIAssemblerPredicate = isGFX8GFX9;
}
// Execpt for the NONE field, this must be kept in sync with the
@@ -32,6 +28,8 @@ def SIEncodingFamily {
int SDWA9 = 3;
int GFX80 = 4;
int GFX9 = 5;
+ int GFX10 = 6;
+ int SDWA10 = 7;
}
//===----------------------------------------------------------------------===//
@@ -41,10 +39,16 @@ def SIEncodingFamily {
def AMDGPUclamp : SDNode<"AMDGPUISD::CLAMP", SDTFPUnaryOp>;
def SIsbuffer_load : SDNode<"AMDGPUISD::SBUFFER_LOAD",
- SDTypeProfile<1, 3, [SDTCisVT<1, v4i32>, SDTCisVT<2, i32>, SDTCisVT<3, i1>]>,
+ SDTypeProfile<1, 4, [SDTCisVT<1, v4i32>, SDTCisVT<2, i32>, SDTCisVT<3, i1>,
+ SDTCisVT<4, i1>]>,
[SDNPMayLoad, SDNPMemOperand]
>;
+def SIds_ordered_count : SDNode<"AMDGPUISD::DS_ORDERED_COUNT",
+ SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i16>]>,
+ [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain, SDNPInGlue]
+>;
+
def SIatomic_inc : SDNode<"AMDGPUISD::ATOMIC_INC", SDTAtomic2,
[SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain]
>;
@@ -57,10 +61,6 @@ def SDTAtomic2_f32 : SDTypeProfile<1, 2, [
SDTCisSameAs<0,2>, SDTCisFP<0>, SDTCisPtrTy<1>
]>;
-def SIatomic_fadd : SDNode<"AMDGPUISD::ATOMIC_LOAD_FADD", SDTAtomic2_f32,
- [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain]
->;
-
def SIatomic_fmin : SDNode<"AMDGPUISD::ATOMIC_LOAD_FMIN", SDTAtomic2_f32,
[SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain]
>;
@@ -69,6 +69,13 @@ def SIatomic_fmax : SDNode<"AMDGPUISD::ATOMIC_LOAD_FMAX", SDTAtomic2_f32,
[SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain]
>;
+// load_d16_{lo|hi} ptr, tied_input
+def SIload_d16 : SDTypeProfile<1, 2, [
+ SDTCisPtrTy<1>,
+ SDTCisSameAs<0, 2>
+]>;
+
+
def SDTtbuffer_load : SDTypeProfile<1, 8,
[ // vdata
SDTCisVT<1, v4i32>, // rsrc
@@ -101,9 +108,6 @@ def SDTtbuffer_store : SDTypeProfile<0, 9,
def SItbuffer_store : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT", SDTtbuffer_store,
[SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
-def SItbuffer_store_x3 : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT_X3",
- SDTtbuffer_store,
- [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
def SItbuffer_store_d16 : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT_D16",
SDTtbuffer_store,
[SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
@@ -120,6 +124,14 @@ def SDTBufferLoad : SDTypeProfile<1, 7,
def SIbuffer_load : SDNode <"AMDGPUISD::BUFFER_LOAD", SDTBufferLoad,
[SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
+def SIbuffer_load_ubyte : SDNode <"AMDGPUISD::BUFFER_LOAD_UBYTE", SDTBufferLoad,
+ [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
+def SIbuffer_load_ushort : SDNode <"AMDGPUISD::BUFFER_LOAD_USHORT", SDTBufferLoad,
+ [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
+def SIbuffer_load_byte : SDNode <"AMDGPUISD::BUFFER_LOAD_BYTE", SDTBufferLoad,
+ [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
+def SIbuffer_load_short: SDNode <"AMDGPUISD::BUFFER_LOAD_SHORT", SDTBufferLoad,
+ [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
def SIbuffer_load_format : SDNode <"AMDGPUISD::BUFFER_LOAD_FORMAT", SDTBufferLoad,
[SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
def SIbuffer_load_format_d16 : SDNode <"AMDGPUISD::BUFFER_LOAD_FORMAT_D16",
@@ -138,6 +150,12 @@ def SDTBufferStore : SDTypeProfile<0, 8,
def SIbuffer_store : SDNode <"AMDGPUISD::BUFFER_STORE", SDTBufferStore,
[SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
+def SIbuffer_store_byte: SDNode <"AMDGPUISD::BUFFER_STORE_BYTE",
+ SDTBufferStore,
+ [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
+def SIbuffer_store_short : SDNode <"AMDGPUISD::BUFFER_STORE_SHORT",
+ SDTBufferStore,
+ [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
def SIbuffer_store_format : SDNode <"AMDGPUISD::BUFFER_STORE_FORMAT",
SDTBufferStore,
[SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
@@ -147,9 +165,7 @@ def SIbuffer_store_format_d16 : SDNode <"AMDGPUISD::BUFFER_STORE_FORMAT_D16",
class SDBufferAtomic<string opcode> : SDNode <opcode,
SDTypeProfile<1, 8,
- [SDTCisVT<0, i32>, // dst
- SDTCisVT<1, i32>, // vdata
- SDTCisVT<2, v4i32>, // rsrc
+ [SDTCisVT<2, v4i32>, // rsrc
SDTCisVT<3, i32>, // vindex(VGPR)
SDTCisVT<4, i32>, // voffset(VGPR)
SDTCisVT<5, i32>, // soffset(SGPR)
@@ -159,6 +175,19 @@ class SDBufferAtomic<string opcode> : SDNode <opcode,
[SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore]
>;
+class SDBufferAtomicNoRtn<string opcode, ValueType ty> : SDNode <opcode,
+ SDTypeProfile<0, 8,
+ [SDTCisVT<0, ty>, // vdata
+ SDTCisVT<1, v4i32>, // rsrc
+ SDTCisVT<2, i32>, // vindex(VGPR)
+ SDTCisVT<3, i32>, // voffset(VGPR)
+ SDTCisVT<4, i32>, // soffset(SGPR)
+ SDTCisVT<5, i32>, // offset(imm)
+ SDTCisVT<6, i32>, // cachepolicy(imm)
+ SDTCisVT<7, i1>]>, // idxen(imm)
+ [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore]
+>;
+
def SIbuffer_atomic_swap : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SWAP">;
def SIbuffer_atomic_add : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_ADD">;
def SIbuffer_atomic_sub : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SUB">;
@@ -169,6 +198,8 @@ def SIbuffer_atomic_umax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_UMAX">;
def SIbuffer_atomic_and : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_AND">;
def SIbuffer_atomic_or : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_OR">;
def SIbuffer_atomic_xor : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_XOR">;
+def SIbuffer_atomic_fadd : SDBufferAtomicNoRtn <"AMDGPUISD::BUFFER_ATOMIC_FADD", f32>;
+def SIbuffer_atomic_pk_fadd : SDBufferAtomicNoRtn <"AMDGPUISD::BUFFER_ATOMIC_PK_FADD", v2f16>;
def SIbuffer_atomic_cmpswap : SDNode <"AMDGPUISD::BUFFER_ATOMIC_CMPSWAP",
SDTypeProfile<1, 9,
@@ -185,10 +216,54 @@ def SIbuffer_atomic_cmpswap : SDNode <"AMDGPUISD::BUFFER_ATOMIC_CMPSWAP",
[SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore]
>;
+class SDGlobalAtomicNoRtn<string opcode, ValueType ty> : SDNode <opcode,
+ SDTypeProfile<0, 2,
+ [SDTCisPtrTy<0>, // vaddr
+ SDTCisVT<1, ty>]>, // vdata
+ [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore]
+>;
+
+def SIglobal_atomic_fadd : SDGlobalAtomicNoRtn <"AMDGPUISD::ATOMIC_FADD", f32>;
+def SIglobal_atomic_pk_fadd : SDGlobalAtomicNoRtn <"AMDGPUISD::ATOMIC_PK_FADD", v2f16>;
+
def SIpc_add_rel_offset : SDNode<"AMDGPUISD::PC_ADD_REL_OFFSET",
SDTypeProfile<1, 2, [SDTCisVT<0, iPTR>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>
>;
+def SIlds : SDNode<"AMDGPUISD::LDS",
+ SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisSameAs<0,1>]>
+>;
+
+def SIload_d16_lo : SDNode<"AMDGPUISD::LOAD_D16_LO",
+ SIload_d16,
+ [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
+>;
+
+def SIload_d16_lo_u8 : SDNode<"AMDGPUISD::LOAD_D16_LO_U8",
+ SIload_d16,
+ [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
+>;
+
+def SIload_d16_lo_i8 : SDNode<"AMDGPUISD::LOAD_D16_LO_I8",
+ SIload_d16,
+ [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
+>;
+
+def SIload_d16_hi : SDNode<"AMDGPUISD::LOAD_D16_HI",
+ SIload_d16,
+ [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
+>;
+
+def SIload_d16_hi_u8 : SDNode<"AMDGPUISD::LOAD_D16_HI_U8",
+ SIload_d16,
+ [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
+>;
+
+def SIload_d16_hi_i8 : SDNode<"AMDGPUISD::LOAD_D16_HI_I8",
+ SIload_d16,
+ [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
+>;
+
//===----------------------------------------------------------------------===//
// ValueType helpers
//===----------------------------------------------------------------------===//
@@ -201,7 +276,8 @@ class isFloatType<ValueType SrcVT> {
!if(!eq(SrcVT.Value, f32.Value), 1,
!if(!eq(SrcVT.Value, f64.Value), 1,
!if(!eq(SrcVT.Value, v2f16.Value), 1,
- 0))));
+ !if(!eq(SrcVT.Value, v4f16.Value), 1,
+ 0)))));
}
class isIntType<ValueType SrcVT> {
@@ -215,8 +291,9 @@ class isIntType<ValueType SrcVT> {
class isPackedType<ValueType SrcVT> {
bit ret =
!if(!eq(SrcVT.Value, v2i16.Value), 1,
- !if(!eq(SrcVT.Value, v2f16.Value), 1, 0)
- );
+ !if(!eq(SrcVT.Value, v2f16.Value), 1,
+ !if(!eq(SrcVT.Value, v4f16.Value), 1, 0)
+ ));
}
//===----------------------------------------------------------------------===//
@@ -228,7 +305,7 @@ defm atomic_dec_global : global_binary_atomic_op<SIatomic_dec>;
def atomic_inc_local : local_binary_atomic_op<SIatomic_inc>;
def atomic_dec_local : local_binary_atomic_op<SIatomic_dec>;
-def atomic_load_fadd_local : local_binary_atomic_op<SIatomic_fadd>;
+def atomic_load_fadd_local : local_binary_atomic_op<atomic_load_fadd>;
def atomic_load_fmin_local : local_binary_atomic_op<SIatomic_fmin>;
def atomic_load_fmax_local : local_binary_atomic_op<SIatomic_fmax>;
@@ -250,13 +327,13 @@ def AMDGPUatomic_ld_glue : SDNode <"ISD::ATOMIC_LOAD", SDTAtomicLoad,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand, SDNPInGlue]
>;
-def unindexedload_glue : PatFrag <(ops node:$ptr), (AMDGPUld_glue node:$ptr), [{
- return cast<LoadSDNode>(N)->getAddressingMode() == ISD::UNINDEXED;
-}]>;
+def unindexedload_glue : PatFrag <(ops node:$ptr), (AMDGPUld_glue node:$ptr)> {
+ let IsUnindexed = 1;
+}
-def load_glue : PatFrag <(ops node:$ptr), (unindexedload_glue node:$ptr), [{
- return cast<LoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD;
-}]>;
+def load_glue : PatFrag <(ops node:$ptr), (unindexedload_glue node:$ptr)> {
+ let IsNonExtLoad = 1;
+}
def atomic_load_32_glue : PatFrag<(ops node:$ptr),
(AMDGPUatomic_ld_glue node:$ptr)> {
@@ -270,35 +347,49 @@ def atomic_load_64_glue : PatFrag<(ops node:$ptr),
let MemoryVT = i64;
}
-def extload_glue : PatFrag<(ops node:$ptr), (load_glue node:$ptr), [{
- return cast<LoadSDNode>(N)->getExtensionType() == ISD::EXTLOAD;
-}]>;
+def extload_glue : PatFrag<(ops node:$ptr), (load_glue node:$ptr)> {
+ let IsLoad = 1;
+ let IsAnyExtLoad = 1;
+}
def sextload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr), [{
return cast<LoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD;
}]>;
-def zextload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr), [{
- return cast<LoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD;
-}]>;
+def zextload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> {
+ let IsLoad = 1;
+ let IsZeroExtLoad = 1;
+}
-def az_extload_glue : AZExtLoadBase <unindexedload_glue>;
+def extloadi8_glue : PatFrag<(ops node:$ptr), (extload_glue node:$ptr)> {
+ let IsLoad = 1;
+ let MemoryVT = i8;
+}
-def az_extloadi8_glue : PatFrag<(ops node:$ptr), (az_extload_glue node:$ptr), [{
- return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i8;
-}]>;
+def zextloadi8_glue : PatFrag<(ops node:$ptr), (zextload_glue node:$ptr)> {
+ let IsLoad = 1;
+ let MemoryVT = i8;
+}
-def az_extloadi16_glue : PatFrag<(ops node:$ptr), (az_extload_glue node:$ptr), [{
- return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i16;
-}]>;
+def extloadi16_glue : PatFrag<(ops node:$ptr), (extload_glue node:$ptr)> {
+ let IsLoad = 1;
+ let MemoryVT = i16;
+}
-def sextloadi8_glue : PatFrag<(ops node:$ptr), (sextload_glue node:$ptr), [{
- return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i8;
-}]>;
+def zextloadi16_glue : PatFrag<(ops node:$ptr), (zextload_glue node:$ptr)> {
+ let IsLoad = 1;
+ let MemoryVT = i16;
+}
-def sextloadi16_glue : PatFrag<(ops node:$ptr), (sextload_glue node:$ptr), [{
- return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i16;
-}]>;
+def sextloadi8_glue : PatFrag<(ops node:$ptr), (sextload_glue node:$ptr)> {
+ let IsLoad = 1;
+ let MemoryVT = i8;
+}
+
+def sextloadi16_glue : PatFrag<(ops node:$ptr), (sextload_glue node:$ptr)> {
+ let IsLoad = 1;
+ let MemoryVT = i16;
+}
def load_glue_align8 : Aligned8Bytes <
(ops node:$ptr), (load_glue node:$ptr)
@@ -311,8 +402,10 @@ def load_glue_align16 : Aligned16Bytes <
def load_local_m0 : LoadFrag<load_glue>, LocalAddress;
def sextloadi8_local_m0 : LoadFrag<sextloadi8_glue>, LocalAddress;
def sextloadi16_local_m0 : LoadFrag<sextloadi16_glue>, LocalAddress;
-def az_extloadi8_local_m0 : LoadFrag<az_extloadi8_glue>, LocalAddress;
-def az_extloadi16_local_m0 : LoadFrag<az_extloadi16_glue>, LocalAddress;
+def extloadi8_local_m0 : LoadFrag<extloadi8_glue>, LocalAddress;
+def zextloadi8_local_m0 : LoadFrag<zextloadi8_glue>, LocalAddress;
+def extloadi16_local_m0 : LoadFrag<extloadi16_glue>, LocalAddress;
+def zextloadi16_local_m0 : LoadFrag<zextloadi16_glue>, LocalAddress;
def load_align8_local_m0 : LoadFrag <load_glue_align8>, LocalAddress;
def load_align16_local_m0 : LoadFrag <load_glue_align16>, LocalAddress;
def atomic_load_32_local_m0 : LoadFrag<atomic_load_32_glue>, LocalAddress;
@@ -386,6 +479,51 @@ def si_setcc_uniform : PatFrag <
return true;
}]>;
+//===----------------------------------------------------------------------===//
+// SDNodes PatFrags for d16 loads
+//===----------------------------------------------------------------------===//
+
+class LoadD16Frag <SDPatternOperator op> : PatFrag<(ops node:$ptr, node:$tied_in), (op node:$ptr, node:$tied_in)>;
+class LocalLoadD16 <SDPatternOperator op> : LoadD16Frag <op>, LocalAddress;
+class GlobalLoadD16 <SDPatternOperator op> : LoadD16Frag <op>, GlobalLoadAddress;
+class PrivateLoadD16 <SDPatternOperator op> : LoadD16Frag <op>, PrivateAddress;
+class FlatLoadD16 <SDPatternOperator op> : LoadD16Frag <op>, FlatLoadAddress;
+
+def load_d16_hi_local : LocalLoadD16 <SIload_d16_hi>;
+def az_extloadi8_d16_hi_local : LocalLoadD16 <SIload_d16_hi_u8>;
+def sextloadi8_d16_hi_local : LocalLoadD16 <SIload_d16_hi_i8>;
+
+def load_d16_hi_global : GlobalLoadD16 <SIload_d16_hi>;
+def az_extloadi8_d16_hi_global : GlobalLoadD16 <SIload_d16_hi_u8>;
+def sextloadi8_d16_hi_global : GlobalLoadD16 <SIload_d16_hi_i8>;
+
+def load_d16_hi_private : PrivateLoadD16 <SIload_d16_hi>;
+def az_extloadi8_d16_hi_private : PrivateLoadD16 <SIload_d16_hi_u8>;
+def sextloadi8_d16_hi_private : PrivateLoadD16 <SIload_d16_hi_i8>;
+
+def load_d16_hi_flat : FlatLoadD16 <SIload_d16_hi>;
+def az_extloadi8_d16_hi_flat : FlatLoadD16 <SIload_d16_hi_u8>;
+def sextloadi8_d16_hi_flat : FlatLoadD16 <SIload_d16_hi_i8>;
+
+
+def load_d16_lo_local : LocalLoadD16 <SIload_d16_lo>;
+def az_extloadi8_d16_lo_local : LocalLoadD16 <SIload_d16_lo_u8>;
+def sextloadi8_d16_lo_local : LocalLoadD16 <SIload_d16_lo_i8>;
+
+def load_d16_lo_global : GlobalLoadD16 <SIload_d16_lo>;
+def az_extloadi8_d16_lo_global : GlobalLoadD16 <SIload_d16_lo_u8>;
+def sextloadi8_d16_lo_global : GlobalLoadD16 <SIload_d16_lo_i8>;
+
+def load_d16_lo_private : PrivateLoadD16 <SIload_d16_lo>;
+def az_extloadi8_d16_lo_private : PrivateLoadD16 <SIload_d16_lo_u8>;
+def sextloadi8_d16_lo_private : PrivateLoadD16 <SIload_d16_lo_i8>;
+
+def load_d16_lo_flat : FlatLoadD16 <SIload_d16_lo>;
+def az_extloadi8_d16_lo_flat : FlatLoadD16 <SIload_d16_lo_u8>;
+def sextloadi8_d16_lo_flat : FlatLoadD16 <SIload_d16_lo_i8>;
+
+
+
def lshr_rev : PatFrag <
(ops node:$src1, node:$src0),
(srl $src0, $src1)
@@ -410,6 +548,7 @@ multiclass SIAtomicM0Glue2 <string op_name, bit is_amdgpu = 0,
>;
def _local_m0 : local_binary_atomic_op <!cast<SDNode>(NAME#"_glue")>;
+ def _region_m0 : region_binary_atomic_op <!cast<SDNode>(NAME#"_glue")>;
}
defm atomic_load_add : SIAtomicM0Glue2 <"LOAD_ADD">;
@@ -424,7 +563,7 @@ defm atomic_load_xor : SIAtomicM0Glue2 <"LOAD_XOR">;
defm atomic_load_umin : SIAtomicM0Glue2 <"LOAD_UMIN">;
defm atomic_load_umax : SIAtomicM0Glue2 <"LOAD_UMAX">;
defm atomic_swap : SIAtomicM0Glue2 <"SWAP">;
-defm atomic_load_fadd : SIAtomicM0Glue2 <"LOAD_FADD", 1, SDTAtomic2_f32>;
+defm atomic_load_fadd : SIAtomicM0Glue2 <"LOAD_FADD", 0, SDTAtomic2_f32>;
defm atomic_load_fmin : SIAtomicM0Glue2 <"LOAD_FMIN", 1, SDTAtomic2_f32>;
defm atomic_load_fmax : SIAtomicM0Glue2 <"LOAD_FMAX", 1, SDTAtomic2_f32>;
@@ -433,6 +572,7 @@ def atomic_cmp_swap_glue : SDNode <"ISD::ATOMIC_CMP_SWAP", SDTAtomic3,
>;
def atomic_cmp_swap_local_m0 : AtomicCmpSwapLocal<atomic_cmp_swap_glue>;
+def atomic_cmp_swap_region_m0 : AtomicCmpSwapRegion<atomic_cmp_swap_glue>;
def as_i1imm : SDNodeXForm<imm, [{
@@ -482,8 +622,12 @@ class bitextract_imm<int bitnum> : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(Bit, SDLoc(N), MVT::i1);
}]>;
-def SIMM16bit : PatLeaf <(imm),
- [{return isInt<16>(N->getSExtValue());}]
+def SIMM16bit : ImmLeaf <i32,
+ [{return isInt<16>(Imm);}]
+>;
+
+def UIMM16bit : ImmLeaf <i32,
+ [{return isUInt<16>(Imm); }]
>;
class InlineImm <ValueType vt> : PatLeaf <(vt imm), [{
@@ -515,6 +659,22 @@ def ShiftAmt32Imm : PatLeaf <(imm), [{
return N->getZExtValue() < 32;
}]>;
+def getNegV2I16Imm : SDNodeXForm<build_vector, [{
+ return SDValue(packNegConstantV2I16(N, *CurDAG), 0);
+}]>;
+
+def NegSubInlineConstV216 : PatLeaf<(build_vector), [{
+ assert(N->getNumOperands() == 2);
+ assert(N->getOperand(0).getValueType().getSizeInBits() == 16);
+ SDValue Src0 = N->getOperand(0);
+ SDValue Src1 = N->getOperand(1);
+ if (Src0 == Src1)
+ return isNegInlineImmediate(Src0.getNode());
+
+ return (isNullConstantOrUndef(Src0) && isNegInlineImmediate(Src1.getNode())) ||
+ (isNullConstantOrUndef(Src1) && isNegInlineImmediate(Src0.getNode()));
+}], getNegV2I16Imm>;
+
//===----------------------------------------------------------------------===//
// Custom Operands
//===----------------------------------------------------------------------===//
@@ -588,6 +748,14 @@ def SwizzleMatchClass : AsmOperandClass {
let IsOptional = 1;
}
+def EndpgmMatchClass : AsmOperandClass {
+ let Name = "EndpgmImm";
+ let PredicateMethod = "isEndpgm";
+ let ParserMethod = "parseEndpgmOp";
+ let RenderMethod = "addImmOperands";
+ let IsOptional = 1;
+}
+
def ExpTgtMatchClass : AsmOperandClass {
let Name = "ExpTgt";
let PredicateMethod = "isExpTgt";
@@ -605,6 +773,11 @@ def SwizzleImm : Operand<i16> {
let ParserMatchClass = SwizzleMatchClass;
}
+def EndpgmImm : Operand<i16> {
+ let PrintMethod = "printEndpgm";
+ let ParserMatchClass = EndpgmMatchClass;
+}
+
def SWaitMatchClass : AsmOperandClass {
let Name = "SWaitCnt";
let RenderMethod = "addImmOperands";
@@ -619,11 +792,41 @@ def VReg32OrOffClass : AsmOperandClass {
def WAIT_FLAG : Operand <i32> {
let ParserMatchClass = SWaitMatchClass;
let PrintMethod = "printWaitFlag";
+ let OperandType = "OPERAND_IMMEDIATE";
}
include "SIInstrFormats.td"
include "VIInstrFormats.td"
+def BoolReg : AsmOperandClass {
+ let Name = "BoolReg";
+ let ParserMethod = "parseBoolReg";
+ let RenderMethod = "addRegOperands";
+}
+
+class BoolRC : RegisterOperand<SReg_1> {
+ let ParserMatchClass = BoolReg;
+ let DecoderMethod = "decodeBoolReg";
+}
+
+def SSrc_i1 : RegisterOperand<SReg_1_XEXEC> {
+ let ParserMatchClass = BoolReg;
+ let DecoderMethod = "decodeBoolReg";
+}
+
+def VOPDstS64orS32 : BoolRC {
+ let PrintMethod = "printVOPDst";
+}
+
+// SCSrc_i1 is the operand for pseudo instructions only.
+// Boolean immeadiates shall not be exposed to codegen instructions.
+def SCSrc_i1 : RegisterOperand<SReg_1_XEXEC> {
+ let OperandNamespace = "AMDGPU";
+ let OperandType = "OPERAND_REG_IMM_INT32";
+ let ParserMatchClass = BoolReg;
+ let DecoderMethod = "decodeBoolReg";
+}
+
// ===----------------------------------------------------------------------===//
// ExpSrc* Special cases for exp src operands which are printed as
// "off" depending on en operand.
@@ -662,11 +865,12 @@ def SDWASrc_i16 : SDWASrc<i16>;
def SDWASrc_f32 : SDWASrc<f32>;
def SDWASrc_f16 : SDWASrc<f16>;
-def SDWAVopcDst : VOPDstOperand<SReg_64> {
+def SDWAVopcDst : BoolRC {
let OperandNamespace = "AMDGPU";
let OperandType = "OPERAND_SDWA_VOPC_DST";
let EncoderMethod = "getSDWAVopcDstEncoding";
let DecoderMethod = "decodeSDWAVopcDst";
+ let PrintMethod = "printVOPDst";
}
class NamedMatchClass<string CName, bit Optional = 1> : AsmOperandClass {
@@ -688,21 +892,11 @@ class NamedOperandU8<string Name, AsmOperandClass MatchClass> : Operand<i8> {
let ParserMatchClass = MatchClass;
}
-class NamedOperandU12<string Name, AsmOperandClass MatchClass> : Operand<i16> {
- let PrintMethod = "print"#Name;
- let ParserMatchClass = MatchClass;
-}
-
class NamedOperandU16<string Name, AsmOperandClass MatchClass> : Operand<i16> {
let PrintMethod = "print"#Name;
let ParserMatchClass = MatchClass;
}
-class NamedOperandS13<string Name, AsmOperandClass MatchClass> : Operand<i16> {
- let PrintMethod = "print"#Name;
- let ParserMatchClass = MatchClass;
-}
-
class NamedOperandU32<string Name, AsmOperandClass MatchClass> : Operand<i32> {
let PrintMethod = "print"#Name;
let ParserMatchClass = MatchClass;
@@ -720,8 +914,7 @@ def offen : NamedOperandBit<"Offen", NamedMatchClass<"Offen">>;
def idxen : NamedOperandBit<"Idxen", NamedMatchClass<"Idxen">>;
def addr64 : NamedOperandBit<"Addr64", NamedMatchClass<"Addr64">>;
-def offset_u12 : NamedOperandU12<"Offset", NamedMatchClass<"OffsetU12">>;
-def offset_s13 : NamedOperandS13<"OffsetS13", NamedMatchClass<"OffsetS13">>;
+def flat_offset : NamedOperandU16<"FlatOffset", NamedMatchClass<"FlatOffset">>;
def offset : NamedOperandU16<"Offset", NamedMatchClass<"Offset">>;
def offset0 : NamedOperandU8<"Offset0", NamedMatchClass<"Offset0">>;
def offset1 : NamedOperandU8<"Offset1", NamedMatchClass<"Offset1">>;
@@ -732,6 +925,7 @@ def omod : NamedOperandU32<"OModSI", NamedMatchClass<"OModSI">>;
def clampmod : NamedOperandBit<"ClampSI", NamedMatchClass<"ClampSI">>;
def highmod : NamedOperandBit<"High", NamedMatchClass<"High">>;
+def DLC : NamedOperandBit<"DLC", NamedMatchClass<"DLC">>;
def GLC : NamedOperandBit<"GLC", NamedMatchClass<"GLC">>;
def SLC : NamedOperandBit<"SLC", NamedMatchClass<"SLC">>;
def TFE : NamedOperandBit<"TFE", NamedMatchClass<"TFE">>;
@@ -746,11 +940,15 @@ def exp_vm : NamedOperandBit<"ExpVM", NamedMatchClass<"ExpVM">>;
def FORMAT : NamedOperandU8<"FORMAT", NamedMatchClass<"FORMAT">>;
def DMask : NamedOperandU16<"DMask", NamedMatchClass<"DMask">>;
+def Dim : NamedOperandU8<"Dim", NamedMatchClass<"Dim", 0>>;
+
+def dpp8 : NamedOperandU32<"DPP8", NamedMatchClass<"DPP8", 0>>;
def dpp_ctrl : NamedOperandU32<"DPPCtrl", NamedMatchClass<"DPPCtrl", 0>>;
def row_mask : NamedOperandU32<"RowMask", NamedMatchClass<"RowMask">>;
def bank_mask : NamedOperandU32<"BankMask", NamedMatchClass<"BankMask">>;
def bound_ctrl : NamedOperandBit<"BoundCtrl", NamedMatchClass<"BoundCtrl">>;
+def FI : NamedOperandU32<"FI", NamedMatchClass<"FI">>;
def dst_sel : NamedOperandU32<"SDWADstSel", NamedMatchClass<"SDWADstSel">>;
def src0_sel : NamedOperandU32<"SDWASrc0Sel", NamedMatchClass<"SDWASrc0Sel">>;
@@ -762,6 +960,10 @@ def op_sel_hi : NamedOperandU32Default0<"OpSelHi", NamedMatchClass<"OpSelHi">>;
def neg_lo : NamedOperandU32Default0<"NegLo", NamedMatchClass<"NegLo">>;
def neg_hi : NamedOperandU32Default0<"NegHi", NamedMatchClass<"NegHi">>;
+def blgp : NamedOperandU32<"BLGP", NamedMatchClass<"BLGP">>;
+def cbsz : NamedOperandU32<"CBSZ", NamedMatchClass<"CBSZ">>;
+def abid : NamedOperandU32<"ABID", NamedMatchClass<"ABID">>;
+
def hwreg : NamedOperandU16<"Hwreg", NamedMatchClass<"Hwreg", 0>>;
def exp_tgt : NamedOperandU8<"ExpTgt", NamedMatchClass<"ExpTgt", 0>> {
@@ -793,9 +995,6 @@ def f32kimm : kimmOperand<i32>;
def KImmFP16MatchClass : KImmMatchClass<16>;
def f16kimm : kimmOperand<i16>;
-
-def VOPDstS64 : VOPDstOperand <SReg_64>;
-
class FPInputModsMatchClass <int opSize> : AsmOperandClass {
let Name = "RegOrImmWithFP"#opSize#"InputMods";
let ParserMethod = "parseRegOrImmWithFPInputMods";
@@ -863,7 +1062,7 @@ def FP32SDWAInputMods : FPSDWAInputMods<FP32SDWAInputModsMatchClass>;
def FPVRegInputModsMatchClass : AsmOperandClass {
let Name = "VRegWithFPInputMods";
let ParserMethod = "parseRegWithFPInputMods";
- let PredicateMethod = "isVReg";
+ let PredicateMethod = "isVReg32";
}
def FPVRegInputMods : InputMods <FPVRegInputModsMatchClass> {
@@ -890,7 +1089,7 @@ def Int32SDWAInputMods : IntSDWAInputMods<Int32SDWAInputModsMatchClass>;
def IntVRegInputModsMatchClass : AsmOperandClass {
let Name = "VRegWithIntInputMods";
let ParserMethod = "parseRegWithIntInputMods";
- let PredicateMethod = "isVReg";
+ let PredicateMethod = "isVReg32";
}
def IntVRegInputMods : InputMods <IntVRegInputModsMatchClass> {
@@ -941,6 +1140,8 @@ def VOP3Mods : ComplexPattern<untyped, 2, "SelectVOP3Mods">;
def VOP3NoMods : ComplexPattern<untyped, 1, "SelectVOP3NoMods">;
// VOP3Mods, but the input source is known to never be NaN.
def VOP3Mods_nnan : ComplexPattern<fAny, 2, "SelectVOP3Mods_NNaN">;
+// VOP3Mods, but only allowed for f32 operands.
+def VOP3Mods_f32 : ComplexPattern<fAny, 2, "SelectVOP3Mods_f32">;
def VOP3OMods : ComplexPattern<untyped, 3, "SelectVOP3OMods">;
@@ -995,6 +1196,31 @@ def TRAPID{
int LLVM_DEBUG_TRAP = 3;
}
+def HWREG {
+ int MODE = 1;
+ int STATUS = 2;
+ int TRAPSTS = 3;
+ int HW_ID = 4;
+ int GPR_ALLOC = 5;
+ int LDS_ALLOC = 6;
+ int IB_STS = 7;
+ int MEM_BASES = 15;
+ int TBA_LO = 16;
+ int TBA_HI = 17;
+ int TMA_LO = 18;
+ int TMA_HI = 19;
+ int FLAT_SCR_LO = 20;
+ int FLAT_SCR_HI = 21;
+ int XNACK_MASK = 22;
+ int POPS_PACKER = 25;
+}
+
+class getHwRegImm<int Reg, int Offset = 0, int Size = 32> {
+ int ret = !or(Reg,
+ !or(!shl(Offset, 6),
+ !shl(!add(Size, -1), 11)));
+}
+
//===----------------------------------------------------------------------===//
//
// SI Instruction multiclass helpers.
@@ -1045,18 +1271,26 @@ multiclass EXP_m<bit done, SDPatternOperator node> {
def _si : EXP_Helper<done>,
SIMCInstr <"exp"#!if(done, "_done", ""), SIEncodingFamily.SI>,
EXPe {
- let AssemblerPredicates = [isSICI];
- let DecoderNamespace = "SICI";
+ let AssemblerPredicates = [isGFX6GFX7];
+ let DecoderNamespace = "GFX6GFX7";
let DisableDecoder = DisableSIDecoder;
}
def _vi : EXP_Helper<done>,
SIMCInstr <"exp"#!if(done, "_done", ""), SIEncodingFamily.VI>,
EXPe_vi {
- let AssemblerPredicates = [isVI];
- let DecoderNamespace = "VI";
+ let AssemblerPredicates = [isGFX8GFX9];
+ let DecoderNamespace = "GFX8";
let DisableDecoder = DisableVIDecoder;
}
+
+ def _gfx10 : EXP_Helper<done>,
+ SIMCInstr <"exp"#!if(done, "_done", ""), SIEncodingFamily.GFX10>,
+ EXPe {
+ let AssemblerPredicates = [isGFX10Plus];
+ let DecoderNamespace = "GFX10";
+ let DisableDecoder = DisableSIDecoder;
+ }
}
}
}
@@ -1080,7 +1314,19 @@ class getVALUDstForVT<ValueType VT> {
!if(!eq(VT.Size, 128), VOPDstOperand<VReg_128>,
!if(!eq(VT.Size, 64), VOPDstOperand<VReg_64>,
!if(!eq(VT.Size, 16), VOPDstOperand<VGPR_32>,
- VOPDstOperand<SReg_64>)))); // else VT == i1
+ VOPDstS64orS32)))); // else VT == i1
+}
+
+// Returns true if VT is floating point.
+class getIsFP<ValueType VT> {
+ bit ret = !if(!eq(VT.Value, f16.Value), 1,
+ !if(!eq(VT.Value, v2f16.Value), 1,
+ !if(!eq(VT.Value, v4f16.Value), 1,
+ !if(!eq(VT.Value, f32.Value), 1,
+ !if(!eq(VT.Value, v2f32.Value), 1,
+ !if(!eq(VT.Value, f64.Value), 1,
+ !if(!eq(VT.Value, v2f64.Value), 1,
+ 0)))))));
}
// Returns the register class to use for the destination of VOP[12C]
@@ -1094,11 +1340,7 @@ class getSDWADstForVT<ValueType VT> {
// Returns the register class to use for source 0 of VOP[12C]
// instructions for the given VT.
class getVOPSrc0ForVT<ValueType VT> {
- bit isFP = !if(!eq(VT.Value, f16.Value), 1,
- !if(!eq(VT.Value, v2f16.Value), 1,
- !if(!eq(VT.Value, f32.Value), 1,
- !if(!eq(VT.Value, f64.Value), 1,
- 0))));
+ bit isFP = getIsFP<VT>.ret;
RegisterOperand ret =
!if(isFP,
@@ -1107,8 +1349,11 @@ class getVOPSrc0ForVT<ValueType VT> {
!if(!eq(VT.Value, f16.Value),
VSrc_f16,
!if(!eq(VT.Value, v2f16.Value),
- VCSrc_v2f16,
- VSrc_f32
+ VSrc_v2f16,
+ !if(!eq(VT.Value, v4f16.Value),
+ AVSrc_64,
+ VSrc_f32
+ )
)
)
),
@@ -1117,7 +1362,7 @@ class getVOPSrc0ForVT<ValueType VT> {
!if(!eq(VT.Value, i16.Value),
VSrc_b16,
!if(!eq(VT.Value, v2i16.Value),
- VCSrc_v2b16,
+ VSrc_v2b16,
VSrc_b32
)
)
@@ -1132,9 +1377,7 @@ class getVregSrcForVT<ValueType VT> {
}
class getSDWASrcForVT <ValueType VT> {
- bit isFP = !if(!eq(VT.Value, f16.Value), 1,
- !if(!eq(VT.Value, f32.Value), 1,
- 0));
+ bit isFP = getIsFP<VT>.ret;
RegisterOperand retFlt = !if(!eq(VT.Size, 16), SDWASrc_f16, SDWASrc_f32);
RegisterOperand retInt = !if(!eq(VT.Size, 16), SDWASrc_i16, SDWASrc_i32);
RegisterOperand ret = !if(isFP, retFlt, retInt);
@@ -1143,33 +1386,32 @@ class getSDWASrcForVT <ValueType VT> {
// Returns the register class to use for sources of VOP3 instructions for the
// given VT.
class getVOP3SrcForVT<ValueType VT> {
- bit isFP = !if(!eq(VT.Value, f16.Value), 1,
- !if(!eq(VT.Value, v2f16.Value), 1,
- !if(!eq(VT.Value, f32.Value), 1,
- !if(!eq(VT.Value, f64.Value), 1,
- 0))));
+ bit isFP = getIsFP<VT>.ret;
RegisterOperand ret =
!if(!eq(VT.Size, 128),
VSrc_128,
!if(!eq(VT.Size, 64),
!if(isFP,
- VCSrc_f64,
- VCSrc_b64),
+ VSrc_f64,
+ VSrc_b64),
!if(!eq(VT.Value, i1.Value),
- SCSrc_i1,
+ SSrc_i1,
!if(isFP,
!if(!eq(VT.Value, f16.Value),
- VCSrc_f16,
+ VSrc_f16,
!if(!eq(VT.Value, v2f16.Value),
- VCSrc_v2f16,
- VCSrc_f32
+ VSrc_v2f16,
+ !if(!eq(VT.Value, v4f16.Value),
+ AVSrc_64,
+ VSrc_f32
+ )
)
),
!if(!eq(VT.Value, i16.Value),
- VCSrc_b16,
+ VSrc_b16,
!if(!eq(VT.Value, v2i16.Value),
- VCSrc_v2b16,
- VCSrc_b32
+ VSrc_v2b16,
+ VSrc_b32
)
)
)
@@ -1190,11 +1432,8 @@ class isModifierType<ValueType SrcVT> {
}
// Return type of input modifiers operand for specified input operand
-class getSrcMod <ValueType VT> {
- bit isFP = !if(!eq(VT.Value, f16.Value), 1,
- !if(!eq(VT.Value, f32.Value), 1,
- !if(!eq(VT.Value, f64.Value), 1,
- 0)));
+class getSrcMod <ValueType VT, bit EnableF32SrcMods> {
+ bit isFP = getIsFP<VT>.ret;
bit isPacked = isPackedType<VT>.ret;
Operand ret = !if(!eq(VT.Size, 64),
!if(isFP, FP64InputMods, Int64InputMods),
@@ -1203,7 +1442,7 @@ class getSrcMod <ValueType VT> {
FP16InputMods,
FP32InputMods
),
- Int32InputMods)
+ !if(EnableF32SrcMods, FP32InputMods, Int32InputMods))
);
}
@@ -1213,10 +1452,7 @@ class getOpSelMod <ValueType VT> {
// Return type of input modifiers operand specified input operand for DPP
class getSrcModExt <ValueType VT> {
- bit isFP = !if(!eq(VT.Value, f16.Value), 1,
- !if(!eq(VT.Value, f32.Value), 1,
- !if(!eq(VT.Value, f64.Value), 1,
- 0)));
+ bit isFP = getIsFP<VT>.ret;
Operand ret = !if(isFP, FPVRegInputMods, IntVRegInputMods);
}
@@ -1238,7 +1474,7 @@ class getIns32 <RegisterOperand Src0RC, RegisterClass Src1RC, int NumSrcArgs> {
// Returns the input arguments for VOP3 instructions for the given SrcVT.
class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
RegisterOperand Src2RC, int NumSrcArgs,
- bit HasIntClamp, bit HasModifiers, bit HasOMod,
+ bit HasIntClamp, bit HasModifiers, bit HasSrc2Mods, bit HasOMod,
Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> {
dag ret =
@@ -1276,16 +1512,33 @@ class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
/* endif */ )
/* NumSrcArgs == 3 */,
!if (!eq(HasModifiers, 1),
- // VOP3 with modifiers
- !if (!eq(HasOMod, 1),
- (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
- Src1Mod:$src1_modifiers, Src1RC:$src1,
- Src2Mod:$src2_modifiers, Src2RC:$src2,
- clampmod:$clamp, omod:$omod),
- (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
- Src1Mod:$src1_modifiers, Src1RC:$src1,
- Src2Mod:$src2_modifiers, Src2RC:$src2,
- clampmod:$clamp))
+ !if (!eq(HasSrc2Mods, 1),
+ // VOP3 with modifiers
+ !if (!eq(HasOMod, 1),
+ (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+ Src1Mod:$src1_modifiers, Src1RC:$src1,
+ Src2Mod:$src2_modifiers, Src2RC:$src2,
+ clampmod:$clamp, omod:$omod),
+ !if (!eq(HasIntClamp, 1),
+ (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+ Src1Mod:$src1_modifiers, Src1RC:$src1,
+ Src2Mod:$src2_modifiers, Src2RC:$src2,
+ clampmod:$clamp),
+ (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+ Src1Mod:$src1_modifiers, Src1RC:$src1,
+ Src2Mod:$src2_modifiers, Src2RC:$src2))),
+ // VOP3 with modifiers except src2
+ !if (!eq(HasOMod, 1),
+ (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+ Src1Mod:$src1_modifiers, Src1RC:$src1,
+ Src2RC:$src2, clampmod:$clamp, omod:$omod),
+ !if (!eq(HasIntClamp, 1),
+ (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+ Src1Mod:$src1_modifiers, Src1RC:$src1,
+ Src2RC:$src2, clampmod:$clamp),
+ (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+ Src1Mod:$src1_modifiers, Src1RC:$src1,
+ Src2RC:$src2))))
/* else */,
// VOP3 without modifiers
!if (!eq(HasIntClamp, 1),
@@ -1398,6 +1651,42 @@ class getInsDPP <RegisterOperand DstRC, RegisterClass Src0RC, RegisterClass Src1
/* endif */)));
}
+class getInsDPP16 <RegisterOperand DstRC, RegisterClass Src0RC, RegisterClass Src1RC,
+ int NumSrcArgs, bit HasModifiers,
+ Operand Src0Mod, Operand Src1Mod> {
+ dag ret = !con(getInsDPP<DstRC, Src0RC, Src1RC, NumSrcArgs,
+ HasModifiers, Src0Mod, Src1Mod>.ret,
+ (ins FI:$fi));
+}
+
+class getInsDPP8 <RegisterOperand DstRC, RegisterClass Src0RC, RegisterClass Src1RC,
+ int NumSrcArgs, bit HasModifiers,
+ Operand Src0Mod, Operand Src1Mod> {
+ dag ret = !if (!eq(NumSrcArgs, 0),
+ // VOP1 without input operands (V_NOP)
+ (ins dpp8:$dpp8, FI:$fi),
+ !if (!eq(NumSrcArgs, 1),
+ !if (!eq(HasModifiers, 1),
+ // VOP1_DPP with modifiers
+ (ins DstRC:$old, Src0Mod:$src0_modifiers,
+ Src0RC:$src0, dpp8:$dpp8, FI:$fi)
+ /* else */,
+ // VOP1_DPP without modifiers
+ (ins DstRC:$old, Src0RC:$src0, dpp8:$dpp8, FI:$fi)
+ /* endif */)
+ /* NumSrcArgs == 2 */,
+ !if (!eq(HasModifiers, 1),
+ // VOP2_DPP with modifiers
+ (ins DstRC:$old,
+ Src0Mod:$src0_modifiers, Src0RC:$src0,
+ Src1Mod:$src1_modifiers, Src1RC:$src1,
+ dpp8:$dpp8, FI:$fi)
+ /* else */,
+ // VOP2_DPP without modifiers
+ (ins DstRC:$old,
+ Src0RC:$src0, Src1RC:$src1, dpp8:$dpp8, FI:$fi)
+ /* endif */)));
+}
// Ins for SDWA
@@ -1556,6 +1845,26 @@ class getAsmDPP <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT =
string ret = dst#args#" $dpp_ctrl$row_mask$bank_mask$bound_ctrl";
}
+class getAsmDPP16 <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> {
+ string ret = getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret#"$fi";
+}
+
+class getAsmDPP8 <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> {
+ string dst = !if(HasDst,
+ !if(!eq(DstVT.Size, 1),
+ "$sdst",
+ "$vdst"),
+ ""); // use $sdst for VOPC
+ string src0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,");
+ string src1 = !if(!eq(NumSrcArgs, 1), "",
+ !if(!eq(NumSrcArgs, 2), " $src1_modifiers",
+ " $src1_modifiers,"));
+ string args = !if(!eq(HasModifiers, 0),
+ getAsm32<0, NumSrcArgs, DstVT>.ret,
+ ", "#src0#src1);
+ string ret = dst#args#"$dpp8$fi";
+}
+
class getAsmSDWA <bit HasDst, int NumSrcArgs, ValueType DstVT = i32> {
string dst = !if(HasDst,
!if(!eq(DstVT.Size, 1),
@@ -1650,9 +1959,12 @@ def PatGenMode {
int Pattern = 1;
}
-class VOPProfile <list<ValueType> _ArgVT> {
+class VOPProfile <list<ValueType> _ArgVT, bit _EnableF32SrcMods = 0,
+ bit _EnableClamp = 0> {
field list<ValueType> ArgVT = _ArgVT;
+ field bit EnableF32SrcMods = _EnableF32SrcMods;
+ field bit EnableClamp = _EnableClamp;
field ValueType DstVT = ArgVT[0];
field ValueType Src0VT = ArgVT[1];
@@ -1670,9 +1982,9 @@ class VOPProfile <list<ValueType> _ArgVT> {
field RegisterClass Src1DPP = getVregSrcForVT<Src1VT>.ret;
field RegisterOperand Src0SDWA = getSDWASrcForVT<Src0VT>.ret;
field RegisterOperand Src1SDWA = getSDWASrcForVT<Src0VT>.ret;
- field Operand Src0Mod = getSrcMod<Src0VT>.ret;
- field Operand Src1Mod = getSrcMod<Src1VT>.ret;
- field Operand Src2Mod = getSrcMod<Src2VT>.ret;
+ field Operand Src0Mod = getSrcMod<Src0VT, EnableF32SrcMods>.ret;
+ field Operand Src1Mod = getSrcMod<Src1VT, EnableF32SrcMods>.ret;
+ field Operand Src2Mod = getSrcMod<Src2VT, EnableF32SrcMods>.ret;
field Operand Src0ModDPP = getSrcModExt<Src0VT>.ret;
field Operand Src1ModDPP = getSrcModExt<Src1VT>.ret;
field Operand Src0ModSDWA = getSrcModSDWA<Src0VT>.ret;
@@ -1688,12 +2000,16 @@ class VOPProfile <list<ValueType> _ArgVT> {
field bit HasSrc2 = !if(!eq(Src2VT.Value, untyped.Value), 0, 1);
// TODO: Modifiers logic is somewhat adhoc here, to be refined later
- field bit HasModifiers = isModifierType<Src0VT>.ret;
+ // HasModifiers affects the normal and DPP encodings. We take note of EnableF32SrcMods, which
+ // enables modifiers for i32 type.
+ field bit HasModifiers = BitOr<isModifierType<Src0VT>.ret, EnableF32SrcMods>.ret;
+ // HasSrc*FloatMods affects the SDWA encoding. We ignore EnableF32SrcMods.
field bit HasSrc0FloatMods = isFloatType<Src0VT>.ret;
field bit HasSrc1FloatMods = isFloatType<Src1VT>.ret;
field bit HasSrc2FloatMods = isFloatType<Src2VT>.ret;
+ // HasSrc*IntMods affects the SDWA encoding. We ignore EnableF32SrcMods.
field bit HasSrc0IntMods = isIntType<Src0VT>.ret;
field bit HasSrc1IntMods = isIntType<Src1VT>.ret;
field bit HasSrc2IntMods = isIntType<Src2VT>.ret;
@@ -1702,7 +2018,7 @@ class VOPProfile <list<ValueType> _ArgVT> {
field bit HasSrc1Mods = !if(HasModifiers, BitOr<HasSrc1FloatMods, HasSrc1IntMods>.ret, 0);
field bit HasSrc2Mods = !if(HasModifiers, BitOr<HasSrc2FloatMods, HasSrc2IntMods>.ret, 0);
- field bit HasClamp = HasModifiers;
+ field bit HasClamp = BitOr<isModifierType<Src0VT>.ret, EnableClamp>.ret;
field bit HasSDWAClamp = EmitDst;
field bit HasFPClamp = BitAnd<isFloatType<DstVT>.ret, HasClamp>.ret;
field bit HasIntClamp = !if(isFloatType<DstVT>.ret, 0, HasClamp);
@@ -1721,6 +2037,8 @@ class VOPProfile <list<ValueType> _ArgVT> {
field bit HasExtSDWA9 = HasExt;
field int NeedPatGen = PatGenMode.NoPattern;
+ field bit IsMAI = 0;
+
field Operand Src0PackedMod = !if(HasSrc0FloatMods, PackedF16InputMods, PackedI16InputMods);
field Operand Src1PackedMod = !if(HasSrc1FloatMods, PackedF16InputMods, PackedI16InputMods);
field Operand Src2PackedMod = !if(HasSrc2FloatMods, PackedF16InputMods, PackedI16InputMods);
@@ -1732,12 +2050,13 @@ class VOPProfile <list<ValueType> _ArgVT> {
field dag Outs32 = Outs;
field dag Outs64 = Outs;
field dag OutsDPP = getOutsExt<HasDst, DstVT, DstRCDPP>.ret;
+ field dag OutsDPP8 = getOutsExt<HasDst, DstVT, DstRCDPP>.ret;
field dag OutsSDWA = getOutsSDWA<HasDst, DstVT, DstRCSDWA>.ret;
field dag Ins32 = getIns32<Src0RC32, Src1RC32, NumSrcArgs>.ret;
field dag Ins64 = getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs,
- HasIntClamp, HasModifiers, HasOMod, Src0Mod, Src1Mod,
- Src2Mod>.ret;
+ HasIntClamp, HasModifiers, HasSrc2Mods,
+ HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret;
field dag InsVOP3P = getInsVOP3P<Src0RC64, Src1RC64, Src2RC64,
NumSrcArgs, HasClamp,
Src0PackedMod, Src1PackedMod, Src2PackedMod>.ret;
@@ -1751,6 +2070,10 @@ class VOPProfile <list<ValueType> _ArgVT> {
getInsDPP<DstRCDPP, Src0DPP, Src1DPP, NumSrcArgs,
HasModifiers, Src0ModDPP, Src1ModDPP>.ret,
(ins));
+ field dag InsDPP16 = getInsDPP16<DstRCDPP, Src0DPP, Src1DPP, NumSrcArgs,
+ HasModifiers, Src0ModDPP, Src1ModDPP>.ret;
+ field dag InsDPP8 = getInsDPP8<DstRCDPP, Src0DPP, Src1DPP, NumSrcArgs, 0,
+ Src0ModDPP, Src1ModDPP>.ret;
field dag InsSDWA = getInsSDWA<Src0SDWA, Src1SDWA, NumSrcArgs,
HasSDWAOMod, Src0ModSDWA, Src1ModSDWA,
DstVT>.ret;
@@ -1766,8 +2089,12 @@ class VOPProfile <list<ValueType> _ArgVT> {
HasSrc2FloatMods>.ret;
field string AsmDPP = !if(HasExtDPP,
getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret, "");
+ field string AsmDPP16 = getAsmDPP16<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret;
+ field string AsmDPP8 = getAsmDPP8<HasDst, NumSrcArgs, 0, DstVT>.ret;
field string AsmSDWA = getAsmSDWA<HasDst, NumSrcArgs, DstVT>.ret;
field string AsmSDWA9 = getAsmSDWA9<HasDst, HasSDWAOMod, NumSrcArgs, DstVT>.ret;
+
+ field string TieRegDPP = "$old";
}
class VOP_NO_EXT <VOPProfile p> : VOPProfile <p.ArgVT> {
@@ -1828,6 +2155,7 @@ def VOP_F64_F64_I32 : VOPProfile <[f64, f64, i32, untyped]>;
def VOP_I32_F32_F32 : VOPProfile <[i32, f32, f32, untyped]>;
def VOP_I32_F32_I32 : VOPProfile <[i32, f32, i32, untyped]>;
def VOP_I32_I32_I32 : VOPProfile <[i32, i32, i32, untyped]>;
+def VOP_I32_I32_I32_ARITH : VOPProfile <[i32, i32, i32, untyped], 0, /*EnableClamp=*/1>;
def VOP_V2F16_F32_F32 : VOPProfile <[v2f16, f32, f32, untyped]>;
def VOP_F32_F16_F16_F16 : VOPProfile <[f32, f16, f16, f16]>;
@@ -1848,6 +2176,19 @@ def VOP_V4I32_I64_I32_V4I32 : VOPProfile <[v4i32, i64, i32, v4i32]>;
def VOP_F32_V2F16_V2F16_F32 : VOPProfile <[f32, v2f16, v2f16, f32]>;
def VOP_I32_V2I16_V2I16_I32 : VOPProfile <[i32, v2i16, v2i16, i32]>;
+def VOP_V4F32_F32_F32_V4F32 : VOPProfile <[v4f32, f32, f32, v4f32]>;
+def VOP_V16F32_F32_F32_V16F32 : VOPProfile <[v16f32, f32, f32, v16f32]>;
+def VOP_V32F32_F32_F32_V32F32 : VOPProfile <[v32f32, f32, f32, v32f32]>;
+def VOP_V4F32_V4F16_V4F16_V4F32 : VOPProfile <[v4f32, v4f16, v4f16, v4f32]>;
+def VOP_V16F32_V4F16_V4F16_V16F32 : VOPProfile <[v16f32, v4f16, v4f16, v16f32]>;
+def VOP_V32F32_V4F16_V4F16_V32F32 : VOPProfile <[v32f32, v4f16, v4f16, v32f32]>;
+def VOP_V4F32_V2I16_V2I16_V4F32 : VOPProfile <[v4f32, v2i16, v2i16, v4f32]>;
+def VOP_V16F32_V2I16_V2I16_V16F32 : VOPProfile <[v16f32, v2i16, v2i16, v16f32]>;
+def VOP_V32F32_V2I16_V2I16_V32F32 : VOPProfile <[v32f32, v2i16, v2i16, v32f32]>;
+def VOP_V4I32_I32_I32_V4I32 : VOPProfile <[v4i32, i32, i32, v4i32]>;
+def VOP_V16I32_I32_I32_V16I32 : VOPProfile <[v16i32, i32, i32, v16i32]>;
+def VOP_V32I32_I32_I32_V32I32 : VOPProfile <[v32i32, i32, i32, v32i32]>;
+
class Commutable_REV <string revOp, bit isOrig> {
string RevOp = revOp;
bit IsOrig = isOrig;
@@ -1871,13 +2212,12 @@ class VINTRP_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
let isCodeGenOnly = 1;
}
+// FIXME-GFX10: WIP.
class VINTRP_Real_si <bits <2> op, string opName, dag outs, dag ins,
- string asm> :
+ string asm, int encodingFamily> :
VINTRPCommon <outs, ins, asm, []>,
VINTRPe <op>,
- SIMCInstr<opName, SIEncodingFamily.SI> {
- let AssemblerPredicate = SIAssemblerPredicate;
- let DecoderNamespace = "SICI";
+ SIMCInstr<opName, encodingFamily> {
let DisableDecoder = DisableSIDecoder;
}
@@ -1887,19 +2227,25 @@ class VINTRP_Real_vi <bits <2> op, string opName, dag outs, dag ins,
VINTRPe_vi <op>,
SIMCInstr<opName, SIEncodingFamily.VI> {
let AssemblerPredicate = VIAssemblerPredicate;
- let DecoderNamespace = "VI";
+ let DecoderNamespace = "GFX8";
let DisableDecoder = DisableVIDecoder;
}
+// FIXME-GFX10: WIP.
multiclass VINTRP_m <bits <2> op, dag outs, dag ins, string asm,
list<dag> pattern = []> {
def "" : VINTRP_Pseudo <NAME, outs, ins, pattern>;
- def _si : VINTRP_Real_si <op, NAME, outs, ins, asm>;
+ let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in {
+ def _si : VINTRP_Real_si <op, NAME, outs, ins, asm, SIEncodingFamily.SI>;
+ } // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7"
def _vi : VINTRP_Real_vi <op, NAME, outs, ins, asm>;
-}
+ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
+ def _gfx10 : VINTRP_Real_si<op, NAME, outs, ins, asm, SIEncodingFamily.GFX10>;
+ } // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10"
+}
//===----------------------------------------------------------------------===//
// Vector instruction mappings
//===----------------------------------------------------------------------===//
@@ -1981,7 +2327,9 @@ def getMCOpcodeGen : InstrMapping {
// does not actually change the encoding, and thus may be
// removed later.
[!cast<string>(SIEncodingFamily.GFX80)],
- [!cast<string>(SIEncodingFamily.GFX9)]];
+ [!cast<string>(SIEncodingFamily.GFX9)],
+ [!cast<string>(SIEncodingFamily.GFX10)],
+ [!cast<string>(SIEncodingFamily.SDWA10)]];
}
// Get equivalent SOPK instruction.
@@ -2044,6 +2392,24 @@ def getGlobalSaddrOp : InstrMapping {
let ValueCols = [["1"]];
}
+// Maps a v_cmpx opcode with sdst to opcode without sdst.
+def getVCMPXNoSDstOp : InstrMapping {
+ let FilterClass = "VCMPXNoSDstTable";
+ let RowFields = ["NoSDstOp"];
+ let ColFields = ["HasSDst"];
+ let KeyCol = ["1"];
+ let ValueCols = [["0"]];
+}
+
+// Maps a SOPP to a SOPP with S_NOP
+def getSOPPWithRelaxation : InstrMapping {
+ let FilterClass = "Base_SOPP";
+ let RowFields = ["AsmString"];
+ let ColFields = ["Size"];
+ let KeyCol = ["4"];
+ let ValueCols = [["8"]];
+}
+
include "SIInstructions.td"
include "DSInstructions.td"
diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td
index b6b00c2e4257..70f20bb69370 100644
--- a/lib/Target/AMDGPU/SIInstructions.td
+++ b/lib/Target/AMDGPU/SIInstructions.td
@@ -1,9 +1,8 @@
//===-- SIInstructions.td - SI Instruction Defintions ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// This file was originally auto-generated from a GPU register header file and
@@ -12,7 +11,7 @@
//===----------------------------------------------------------------------===//
class GCNPat<dag pattern, dag result> : Pat<pattern, result>, GCNPredicateControl {
- let SubtargetPredicate = isGCN;
+
}
include "SOPInstructions.td"
@@ -122,7 +121,14 @@ def WWM : PseudoInstSI <(outs unknown:$vdst), (ins unknown:$src0)>;
} // End let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [EXEC]
-def EXIT_WWM : SPseudoInstSI <(outs SReg_64:$sdst), (ins SReg_64:$src0)> {
+def ENTER_WWM : SPseudoInstSI <(outs SReg_1:$sdst), (ins i64imm:$src0)> {
+ let Defs = [EXEC];
+ let hasSideEffects = 0;
+ let mayLoad = 0;
+ let mayStore = 0;
+}
+
+def EXIT_WWM : SPseudoInstSI <(outs SReg_1:$sdst), (ins SReg_1:$src0)> {
let hasSideEffects = 0;
let mayLoad = 0;
let mayStore = 0;
@@ -155,13 +161,12 @@ def S_SUB_U64_PSEUDO : SPseudoInstSI <
>;
def S_ADD_U64_CO_PSEUDO : SPseudoInstSI <
- (outs SReg_64:$vdst, VOPDstS64:$sdst), (ins SSrc_b64:$src0, SSrc_b64:$src1)
+ (outs SReg_64:$vdst, VOPDstS64orS32:$sdst), (ins SSrc_b64:$src0, SSrc_b64:$src1)
>;
def S_SUB_U64_CO_PSEUDO : SPseudoInstSI <
- (outs SReg_64:$vdst, VOPDstS64:$sdst), (ins SSrc_b64:$src0, SSrc_b64:$src1)
+ (outs SReg_64:$vdst, VOPDstS64orS32:$sdst), (ins SSrc_b64:$src0, SSrc_b64:$src1)
>;
-
} // End usesCustomInserter = 1, Defs = [SCC]
let usesCustomInserter = 1 in {
@@ -169,23 +174,30 @@ def GET_GROUPSTATICSIZE : SPseudoInstSI <(outs SReg_32:$sdst), (ins),
[(set SReg_32:$sdst, (int_amdgcn_groupstaticsize))]>;
} // End let usesCustomInserter = 1, SALU = 1
-def S_MOV_B64_term : SPseudoInstSI<(outs SReg_64:$dst),
- (ins SSrc_b64:$src0)> {
- let isAsCheapAsAMove = 1;
+// Wrap an instruction by duplicating it, except for setting isTerminator.
+class WrapTerminatorInst<SOP_Pseudo base_inst> : SPseudoInstSI<
+ base_inst.OutOperandList,
+ base_inst.InOperandList> {
+ let Uses = base_inst.Uses;
+ let Defs = base_inst.Defs;
let isTerminator = 1;
+ let isAsCheapAsAMove = base_inst.isAsCheapAsAMove;
+ let hasSideEffects = base_inst.hasSideEffects;
+ let UseNamedOperandTable = base_inst.UseNamedOperandTable;
+ let CodeSize = base_inst.CodeSize;
}
-def S_XOR_B64_term : SPseudoInstSI<(outs SReg_64:$dst),
- (ins SSrc_b64:$src0, SSrc_b64:$src1)> {
- let isAsCheapAsAMove = 1;
- let isTerminator = 1;
- let Defs = [SCC];
+let WaveSizePredicate = isWave64 in {
+def S_MOV_B64_term : WrapTerminatorInst<S_MOV_B64>;
+def S_XOR_B64_term : WrapTerminatorInst<S_XOR_B64>;
+def S_ANDN2_B64_term : WrapTerminatorInst<S_ANDN2_B64>;
}
-def S_ANDN2_B64_term : SPseudoInstSI<(outs SReg_64:$dst),
- (ins SSrc_b64:$src0, SSrc_b64:$src1)> {
- let isAsCheapAsAMove = 1;
- let isTerminator = 1;
+let WaveSizePredicate = isWave32 in {
+def S_MOV_B32_term : WrapTerminatorInst<S_MOV_B32>;
+def S_XOR_B32_term : WrapTerminatorInst<S_XOR_B32>;
+def S_OR_B32_term : WrapTerminatorInst<S_OR_B32>;
+def S_ANDN2_B32_term : WrapTerminatorInst<S_ANDN2_B32>;
}
def WAVE_BARRIER : SPseudoInstSI<(outs), (ins),
@@ -195,7 +207,6 @@ def WAVE_BARRIER : SPseudoInstSI<(outs), (ins),
let hasSideEffects = 1;
let mayLoad = 1;
let mayStore = 1;
- let isBarrier = 1;
let isConvergent = 1;
let FixedSize = 1;
let Size = 0;
@@ -222,30 +233,30 @@ let isTerminator = 1 in {
let OtherPredicates = [EnableLateCFGStructurize] in {
def SI_NON_UNIFORM_BRCOND_PSEUDO : CFPseudoInstSI <
(outs),
- (ins SReg_64:$vcc, brtarget:$target),
+ (ins SReg_1:$vcc, brtarget:$target),
[(brcond i1:$vcc, bb:$target)]> {
let Size = 12;
}
}
def SI_IF: CFPseudoInstSI <
- (outs SReg_64:$dst), (ins SReg_64:$vcc, brtarget:$target),
- [(set i64:$dst, (AMDGPUif i1:$vcc, bb:$target))], 1, 1> {
+ (outs SReg_1:$dst), (ins SReg_1:$vcc, brtarget:$target),
+ [(set i1:$dst, (AMDGPUif i1:$vcc, bb:$target))], 1, 1> {
let Constraints = "";
let Size = 12;
let hasSideEffects = 1;
}
def SI_ELSE : CFPseudoInstSI <
- (outs SReg_64:$dst),
- (ins SReg_64:$src, brtarget:$target, i1imm:$execfix), [], 1, 1> {
+ (outs SReg_1:$dst),
+ (ins SReg_1:$src, brtarget:$target, i1imm:$execfix), [], 1, 1> {
let Size = 12;
let hasSideEffects = 1;
}
def SI_LOOP : CFPseudoInstSI <
- (outs), (ins SReg_64:$saved, brtarget:$target),
- [(AMDGPUloop i64:$saved, bb:$target)], 1, 1> {
+ (outs), (ins SReg_1:$saved, brtarget:$target),
+ [(AMDGPUloop i1:$saved, bb:$target)], 1, 1> {
let Size = 8;
let isBranch = 1;
let hasSideEffects = 1;
@@ -254,8 +265,7 @@ def SI_LOOP : CFPseudoInstSI <
} // End isTerminator = 1
def SI_END_CF : CFPseudoInstSI <
- (outs), (ins SReg_64:$saved),
- [(int_amdgcn_end_cf i64:$saved)], 1, 1> {
+ (outs), (ins SReg_1:$saved), [], 1, 1> {
let Size = 4;
let isAsCheapAsAMove = 1;
let isReMaterializable = 1;
@@ -265,8 +275,7 @@ def SI_END_CF : CFPseudoInstSI <
}
def SI_IF_BREAK : CFPseudoInstSI <
- (outs SReg_64:$dst), (ins SReg_64:$vcc, SReg_64:$src),
- [(set i64:$dst, (int_amdgcn_if_break i1:$vcc, i64:$src))]> {
+ (outs SReg_1:$dst), (ins SReg_1:$vcc, SReg_1:$src), []> {
let Size = 4;
let isAsCheapAsAMove = 1;
let isReMaterializable = 1;
@@ -292,7 +301,7 @@ multiclass PseudoInstKill <dag ins> {
}
}
-defm SI_KILL_I1 : PseudoInstKill <(ins SSrc_b64:$src, i1imm:$killvalue)>;
+defm SI_KILL_I1 : PseudoInstKill <(ins SCSrc_i1:$src, i1imm:$killvalue)>;
defm SI_KILL_F32_COND_IMM : PseudoInstKill <(ins VSrc_b32:$src0, i32imm:$src1, i32imm:$cond)>;
let Defs = [EXEC,VCC] in
@@ -311,7 +320,7 @@ def SI_BR_UNDEF : SPseudoInstSI <(outs), (ins sopp_brtarget:$simm16)> {
}
def SI_PS_LIVE : PseudoInstSI <
- (outs SReg_64:$dst), (ins),
+ (outs SReg_1:$dst), (ins),
[(set i1:$dst, (int_amdgcn_ps_live))]> {
let SALU = 1;
}
@@ -340,6 +349,15 @@ def SI_INIT_EXEC : SPseudoInstSI <
let Defs = [EXEC];
let usesCustomInserter = 1;
let isAsCheapAsAMove = 1;
+ let WaveSizePredicate = isWave64;
+}
+
+def SI_INIT_EXEC_LO : SPseudoInstSI <
+ (outs), (ins i32imm:$src), []> {
+ let Defs = [EXEC_LO];
+ let usesCustomInserter = 1;
+ let isAsCheapAsAMove = 1;
+ let WaveSizePredicate = isWave32;
}
def SI_INIT_EXEC_FROM_INPUT : SPseudoInstSI <
@@ -374,11 +392,14 @@ def SI_RETURN : SPseudoInstSI <
// This version is only needed so we can fill in the output regiter in
// the custom inserter.
def SI_CALL_ISEL : SPseudoInstSI <
- (outs), (ins SSrc_b64:$src0), [(AMDGPUcall i64:$src0)]> {
+ (outs), (ins SSrc_b64:$src0, unknown:$callee),
+ [(AMDGPUcall i64:$src0, tglobaladdr:$callee)]> {
let Size = 4;
let isCall = 1;
let SchedRW = [WriteBranch];
let usesCustomInserter = 1;
+ // TODO: Should really base this on the call target
+ let isConvergent = 1;
}
// Wrapper around s_swappc_b64 with extra $callee parameter to track
@@ -389,23 +410,14 @@ def SI_CALL : SPseudoInstSI <
let isCall = 1;
let UseNamedOperandTable = 1;
let SchedRW = [WriteBranch];
+ // TODO: Should really base this on the call target
+ let isConvergent = 1;
}
// Tail call handling pseudo
-def SI_TCRETURN_ISEL : SPseudoInstSI<(outs),
- (ins SSrc_b64:$src0, i32imm:$fpdiff),
- [(AMDGPUtc_return i64:$src0, i32:$fpdiff)]> {
- let isCall = 1;
- let isTerminator = 1;
- let isReturn = 1;
- let isBarrier = 1;
- let SchedRW = [WriteBranch];
- let usesCustomInserter = 1;
-}
-
-def SI_TCRETURN : SPseudoInstSI <
- (outs),
- (ins SSrc_b64:$src0, unknown:$callee, i32imm:$fpdiff)> {
+def SI_TCRETURN : SPseudoInstSI <(outs),
+ (ins SSrc_b64:$src0, unknown:$callee, i32imm:$fpdiff),
+ [(AMDGPUtc_return i64:$src0, tglobaladdr:$callee, i32:$fpdiff)]> {
let Size = 4;
let isCall = 1;
let isTerminator = 1;
@@ -413,6 +425,8 @@ def SI_TCRETURN : SPseudoInstSI <
let isBarrier = 1;
let UseNamedOperandTable = 1;
let SchedRW = [WriteBranch];
+ // TODO: Should really base this on the call target
+ let isConvergent = 1;
}
@@ -424,6 +438,8 @@ def ADJCALLSTACKUP : SPseudoInstSI<
let FixedSize = 1;
let hasSideEffects = 1;
let usesCustomInserter = 1;
+ let SchedRW = [WriteSALU];
+ let Defs = [SCC];
}
def ADJCALLSTACKDOWN : SPseudoInstSI<
@@ -433,6 +449,8 @@ def ADJCALLSTACKDOWN : SPseudoInstSI<
let Size = 8; // Worst case. (s_add_u32 + constant)
let hasSideEffects = 1;
let usesCustomInserter = 1;
+ let SchedRW = [WriteSALU];
+ let Defs = [SCC];
}
let Defs = [M0, EXEC, SCC],
@@ -490,9 +508,12 @@ multiclass SI_SPILL_SGPR <RegisterClass sgpr_class> {
// SI_SPILL_32_* instructions.
defm SI_SPILL_S32 : SI_SPILL_SGPR <SReg_32>;
defm SI_SPILL_S64 : SI_SPILL_SGPR <SReg_64>;
+defm SI_SPILL_S96 : SI_SPILL_SGPR <SReg_96>;
defm SI_SPILL_S128 : SI_SPILL_SGPR <SReg_128>;
+defm SI_SPILL_S160 : SI_SPILL_SGPR <SReg_160>;
defm SI_SPILL_S256 : SI_SPILL_SGPR <SReg_256>;
defm SI_SPILL_S512 : SI_SPILL_SGPR <SReg_512>;
+defm SI_SPILL_S1024 : SI_SPILL_SGPR <SReg_1024>;
multiclass SI_SPILL_VGPR <RegisterClass vgpr_class> {
let UseNamedOperandTable = 1, VGPRSpill = 1,
@@ -504,7 +525,9 @@ multiclass SI_SPILL_VGPR <RegisterClass vgpr_class> {
let mayStore = 1;
let mayLoad = 0;
// (2 * 4) + (8 * num_subregs) bytes maximum
- let Size = !add(!shl(!srl(vgpr_class.Size, 5), 3), 8);
+ int MaxSize = !add(!shl(!srl(vgpr_class.Size, 5), 3), 8);
+ // Size field is unsigned char and cannot fit more.
+ let Size = !if(!le(MaxSize, 256), MaxSize, 252);
}
def _RESTORE : VPseudoInstSI <
@@ -515,7 +538,9 @@ multiclass SI_SPILL_VGPR <RegisterClass vgpr_class> {
let mayLoad = 1;
// (2 * 4) + (8 * num_subregs) bytes maximum
- let Size = !add(!shl(!srl(vgpr_class.Size, 5), 3), 8);
+ int MaxSize = !add(!shl(!srl(vgpr_class.Size, 5), 3), 8);
+ // Size field is unsigned char and cannot fit more.
+ let Size = !if(!le(MaxSize, 256), MaxSize, 252);
}
} // End UseNamedOperandTable = 1, VGPRSpill = 1, SchedRW = [WriteVMEM]
}
@@ -524,21 +549,74 @@ defm SI_SPILL_V32 : SI_SPILL_VGPR <VGPR_32>;
defm SI_SPILL_V64 : SI_SPILL_VGPR <VReg_64>;
defm SI_SPILL_V96 : SI_SPILL_VGPR <VReg_96>;
defm SI_SPILL_V128 : SI_SPILL_VGPR <VReg_128>;
+defm SI_SPILL_V160 : SI_SPILL_VGPR <VReg_160>;
defm SI_SPILL_V256 : SI_SPILL_VGPR <VReg_256>;
defm SI_SPILL_V512 : SI_SPILL_VGPR <VReg_512>;
+defm SI_SPILL_V1024 : SI_SPILL_VGPR <VReg_1024>;
+
+multiclass SI_SPILL_AGPR <RegisterClass vgpr_class> {
+ let UseNamedOperandTable = 1, VGPRSpill = 1,
+ Constraints = "@earlyclobber $tmp",
+ SchedRW = [WriteVMEM] in {
+ def _SAVE : VPseudoInstSI <
+ (outs VGPR_32:$tmp),
+ (ins vgpr_class:$vdata, i32imm:$vaddr, SReg_128:$srsrc,
+ SReg_32:$soffset, i32imm:$offset)> {
+ let mayStore = 1;
+ let mayLoad = 0;
+ // (2 * 4) + (16 * num_subregs) bytes maximum
+ int MaxSize = !add(!shl(!srl(vgpr_class.Size, 5), 4), 8);
+ // Size field is unsigned char and cannot fit more.
+ let Size = !if(!le(MaxSize, 256), MaxSize, 252);
+ }
+
+ def _RESTORE : VPseudoInstSI <
+ (outs vgpr_class:$vdata, VGPR_32:$tmp),
+ (ins i32imm:$vaddr, SReg_128:$srsrc, SReg_32:$soffset,
+ i32imm:$offset)> {
+ let mayStore = 0;
+ let mayLoad = 1;
+
+ // (2 * 4) + (16 * num_subregs) bytes maximum
+ int MaxSize = !add(!shl(!srl(vgpr_class.Size, 5), 4), 8);
+ // Size field is unsigned char and cannot fit more.
+ let Size = !if(!le(MaxSize, 256), MaxSize, 252);
+ }
+ } // End UseNamedOperandTable = 1, VGPRSpill = 1, SchedRW = [WriteVMEM]
+}
+
+defm SI_SPILL_A32 : SI_SPILL_AGPR <AGPR_32>;
+defm SI_SPILL_A64 : SI_SPILL_AGPR <AReg_64>;
+defm SI_SPILL_A128 : SI_SPILL_AGPR <AReg_128>;
+defm SI_SPILL_A512 : SI_SPILL_AGPR <AReg_512>;
+defm SI_SPILL_A1024 : SI_SPILL_AGPR <AReg_1024>;
def SI_PC_ADD_REL_OFFSET : SPseudoInstSI <
(outs SReg_64:$dst),
(ins si_ga:$ptr_lo, si_ga:$ptr_hi),
[(set SReg_64:$dst,
- (i64 (SIpc_add_rel_offset (tglobaladdr:$ptr_lo), (tglobaladdr:$ptr_hi))))]> {
+ (i64 (SIpc_add_rel_offset tglobaladdr:$ptr_lo, tglobaladdr:$ptr_hi)))]> {
let Defs = [SCC];
}
def : GCNPat <
+ (SIpc_add_rel_offset tglobaladdr:$ptr_lo, 0),
+ (SI_PC_ADD_REL_OFFSET $ptr_lo, (i32 0))
+>;
+
+def : GCNPat <
(AMDGPUinit_exec i64:$src),
(SI_INIT_EXEC (as_i64imm $src))
->;
+> {
+ let WaveSizePredicate = isWave64;
+}
+
+def : GCNPat <
+ (AMDGPUinit_exec i64:$src),
+ (SI_INIT_EXEC_LO (as_i32imm $src))
+> {
+ let WaveSizePredicate = isWave32;
+}
def : GCNPat <
(AMDGPUinit_exec_from_input i32:$input, i32:$shift),
@@ -551,7 +629,7 @@ def : GCNPat<
>;
def : GCNPat<
- (AMDGPUelse i64:$src, bb:$target),
+ (AMDGPUelse i1:$src, bb:$target),
(SI_ELSE $src, $target, 0)
>;
@@ -584,7 +662,12 @@ def : Pat <
// TODO: we could add more variants for other types of conditionals
def : Pat <
- (int_amdgcn_icmp i1:$src, (i1 0), (i32 33)),
+ (i64 (int_amdgcn_icmp i1:$src, (i1 0), (i32 33))),
+ (COPY $src) // Return the SGPRs representing i1 src
+>;
+
+def : Pat <
+ (i32 (int_amdgcn_icmp i1:$src, (i1 0), (i32 33))),
(COPY $src) // Return the SGPRs representing i1 src
>;
@@ -592,7 +675,7 @@ def : Pat <
// VOP1 Patterns
//===----------------------------------------------------------------------===//
-let SubtargetPredicate = isGCN, OtherPredicates = [UnsafeFPMath] in {
+let OtherPredicates = [UnsafeFPMath] in {
//def : RcpPat<V_RCP_F64_e32, f64>;
//defm : RsqPat<V_RSQ_F64_e32, f64>;
@@ -615,7 +698,7 @@ def : GCNPat <
(V_FRACT_F64_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE)
>;
-} // End SubtargetPredicate = isGCN, OtherPredicates = [UnsafeFPMath]
+} // End OtherPredicates = [UnsafeFPMath]
// f16_to_fp patterns
@@ -706,17 +789,18 @@ def : FMADModsPat<V_MAD_F16, AMDGPUfmad_ftz, f16> {
let SubtargetPredicate = Has16BitInsts;
}
-multiclass SelectPat <ValueType vt, Instruction inst> {
+multiclass SelectPat <ValueType vt> {
def : GCNPat <
- (vt (select i1:$src0, vt:$src1, vt:$src2)),
- (inst $src2, $src1, $src0)
+ (vt (select i1:$src0, (VOP3Mods_f32 vt:$src1, i32:$src1_mods),
+ (VOP3Mods_f32 vt:$src2, i32:$src2_mods))),
+ (V_CNDMASK_B32_e64 $src2_mods, $src2, $src1_mods, $src1, $src0)
>;
}
-defm : SelectPat <i16, V_CNDMASK_B32_e64>;
-defm : SelectPat <i32, V_CNDMASK_B32_e64>;
-defm : SelectPat <f16, V_CNDMASK_B32_e64>;
-defm : SelectPat <f32, V_CNDMASK_B32_e64>;
+defm : SelectPat <i16>;
+defm : SelectPat <i32>;
+defm : SelectPat <f16>;
+defm : SelectPat <f32>;
let AddedComplexity = 1 in {
def : GCNPat <
@@ -749,6 +833,22 @@ foreach Index = 0-2 in {
>;
}
+foreach Index = 0-2 in {
+ def Extract_Element_v3i32_#Index : Extract_Element <
+ i32, v3i32, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+ def Insert_Element_v3i32_#Index : Insert_Element <
+ i32, v3i32, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+
+ def Extract_Element_v3f32_#Index : Extract_Element <
+ f32, v3f32, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+ def Insert_Element_v3f32_#Index : Insert_Element <
+ f32, v3f32, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+}
+
foreach Index = 0-3 in {
def Extract_Element_v4i32_#Index : Extract_Element <
i32, v4i32, Index, !cast<SubRegIndex>(sub#Index)
@@ -765,6 +865,22 @@ foreach Index = 0-3 in {
>;
}
+foreach Index = 0-4 in {
+ def Extract_Element_v5i32_#Index : Extract_Element <
+ i32, v5i32, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+ def Insert_Element_v5i32_#Index : Insert_Element <
+ i32, v5i32, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+
+ def Extract_Element_v5f32_#Index : Extract_Element <
+ f32, v5f32, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+ def Insert_Element_v5f32_#Index : Insert_Element <
+ f32, v5f32, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+}
+
foreach Index = 0-7 in {
def Extract_Element_v8i32_#Index : Extract_Element <
i32, v8i32, Index, !cast<SubRegIndex>(sub#Index)
@@ -818,7 +934,23 @@ def : Pat <
(v2f16 (EXTRACT_SUBREG v4f16:$vec, sub1))
>;
-let SubtargetPredicate = isGCN in {
+foreach Index = 0-31 in {
+ def Extract_Element_v32i32_#Index : Extract_Element <
+ i32, v32i32, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+
+ def Insert_Element_v32i32_#Index : Insert_Element <
+ i32, v32i32, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+
+ def Extract_Element_v32f32_#Index : Extract_Element <
+ f32, v32f32, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+
+ def Insert_Element_v32f32_#Index : Insert_Element <
+ f32, v32f32, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+}
// FIXME: Why do only some of these type combinations for SReg and
// VReg?
@@ -882,6 +1014,10 @@ def : BitConvert <i64, v4f16, VReg_64>;
def : BitConvert <v4i32, v4f32, VReg_128>;
def : BitConvert <v4f32, v4i32, VReg_128>;
+// 96-bit bitcast
+def : BitConvert <v3i32, v3f32, SGPR_96>;
+def : BitConvert <v3f32, v3i32, SGPR_96>;
+
// 128-bit bitcast
def : BitConvert <v2i64, v4i32, SReg_128>;
def : BitConvert <v4i32, v2i64, SReg_128>;
@@ -892,6 +1028,10 @@ def : BitConvert <v4i32, v2f64, VReg_128>;
def : BitConvert <v2i64, v2f64, VReg_128>;
def : BitConvert <v2f64, v2i64, VReg_128>;
+// 160-bit bitcast
+def : BitConvert <v5i32, v5f32, SGPR_160>;
+def : BitConvert <v5f32, v5i32, SGPR_160>;
+
// 256-bit bitcast
def : BitConvert <v8i32, v8f32, SReg_256>;
def : BitConvert <v8f32, v8i32, SReg_256>;
@@ -902,7 +1042,9 @@ def : BitConvert <v8f32, v8i32, VReg_256>;
def : BitConvert <v16i32, v16f32, VReg_512>;
def : BitConvert <v16f32, v16i32, VReg_512>;
-} // End SubtargetPredicate = isGCN
+// 1024-bit bitcast
+def : BitConvert <v32i32, v32f32, VReg_1024>;
+def : BitConvert <v32f32, v32i32, VReg_1024>;
/********** =================== **********/
/********** Src & Dst modifiers **********/
@@ -1070,6 +1212,16 @@ def : GCNPat <
(S_MOV_B32 imm:$imm)
>;
+def : GCNPat <
+ (VGPRImm<(SIlds tglobaladdr:$ga)>),
+ (V_MOV_B32_e32 $ga)
+>;
+
+def : GCNPat <
+ (SIlds tglobaladdr:$ga),
+ (S_MOV_B32 $ga)
+>;
+
// FIXME: Workaround for ordering issue with peephole optimizer where
// a register class copy interferes with immediate folding. Should
// use s_mov_b32, which can be shrunk to s_movk_i32
@@ -1104,7 +1256,16 @@ def : GCNPat <
def : GCNPat <
(i1 imm:$imm),
(S_MOV_B64 (i64 (as_i64imm $imm)))
->;
+> {
+ let WaveSizePredicate = isWave64;
+}
+
+def : GCNPat <
+ (i1 imm:$imm),
+ (S_MOV_B32 (i32 (as_i32imm $imm)))
+> {
+ let WaveSizePredicate = isWave32;
+}
def : GCNPat <
(f64 InlineFPImm<f64>:$imm),
@@ -1115,18 +1276,18 @@ def : GCNPat <
/********** Intrinsic Patterns **********/
/********** ================== **********/
-let SubtargetPredicate = isGCN in {
def : POW_Common <V_LOG_F32_e32, V_EXP_F32_e32, V_MUL_LEGACY_F32_e32>;
-}
def : GCNPat <
(i32 (sext i1:$src0)),
- (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src0)
+ (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
+ /*src1mod*/(i32 0), /*src1*/(i32 -1), $src0)
>;
class Ext32Pat <SDNode ext> : GCNPat <
(i32 (ext i1:$src0)),
- (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src0)
+ (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
+ /*src1mod*/(i32 0), /*src1*/(i32 1), $src0)
>;
def : Ext32Pat <zext>;
@@ -1144,8 +1305,6 @@ def : GCNPat <
// VOP3 Patterns
//===----------------------------------------------------------------------===//
-let SubtargetPredicate = isGCN in {
-
def : IMad24Pat<V_MAD_I32_I24, 1>;
def : UMad24Pat<V_MAD_U32_U24, 1>;
@@ -1153,8 +1312,6 @@ def : UMad24Pat<V_MAD_U32_U24, 1>;
defm : BFIPatterns <V_BFI_B32, S_MOV_B32, SReg_64>;
def : ROTRPattern <V_ALIGNBIT_B32>;
-}
-
def : GCNPat<(i32 (trunc (srl i64:$src0, (and i32:$src1, (i32 31))))),
(V_ALIGNBIT_B32 (i32 (EXTRACT_SUBREG (i64 $src0), sub1)),
(i32 (EXTRACT_SUBREG (i64 $src0), sub0)), $src1)>;
@@ -1261,8 +1418,9 @@ def : GCNPat <
class ZExt_i64_i1_Pat <SDNode ext> : GCNPat <
(i64 (ext i1:$src)),
(REG_SEQUENCE VReg_64,
- (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src), sub0,
- (S_MOV_B32 (i32 0)), sub1)
+ (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
+ /*src1mod*/(i32 0), /*src1*/(i32 1), $src),
+ sub0, (S_MOV_B32 (i32 0)), sub1)
>;
@@ -1280,8 +1438,10 @@ def : GCNPat <
def : GCNPat <
(i64 (sext i1:$src)),
(REG_SEQUENCE VReg_64,
- (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src), sub0,
- (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src), sub1)
+ (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
+ /*src1mod*/(i32 0), /*src1*/(i32 -1), $src), sub0,
+ (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
+ /*src1mod*/(i32 0), /*src1*/(i32 -1), $src), sub1)
>;
class FPToI1Pat<Instruction Inst, int KOne, ValueType kone_type, ValueType vt, SDPatternOperator fp_to_int> : GCNPat <
@@ -1296,10 +1456,12 @@ def : FPToI1Pat<V_CMP_EQ_F64_e64, CONST.FP64_NEG_ONE, i64, f64, fp_to_sint>;
// If we need to perform a logical operation on i1 values, we need to
// use vector comparisons since there is only one SCC register. Vector
-// comparisons still write to a pair of SGPRs, so treat these as
-// 64-bit comparisons. When legalizing SGPR copies, instructions
-// resulting in the copies from SCC to these instructions will be
-// moved to the VALU.
+// comparisons may write to a pair of SGPRs or a single SGPR, so treat
+// these as 32 or 64-bit comparisons. When legalizing SGPR copies,
+// instructions resulting in the copies from SCC to these instructions
+// will be moved to the VALU.
+
+let WaveSizePredicate = isWave64 in {
def : GCNPat <
(i1 (and i1:$src0, i1:$src1)),
(S_AND_B64 $src0, $src1)
@@ -1336,35 +1498,89 @@ def : GCNPat <
(S_NOT_B64 $src0)
>;
}
+} // end isWave64
+
+let WaveSizePredicate = isWave32 in {
+def : GCNPat <
+ (i1 (and i1:$src0, i1:$src1)),
+ (S_AND_B32 $src0, $src1)
+>;
+
+def : GCNPat <
+ (i1 (or i1:$src0, i1:$src1)),
+ (S_OR_B32 $src0, $src1)
+>;
+
+def : GCNPat <
+ (i1 (xor i1:$src0, i1:$src1)),
+ (S_XOR_B32 $src0, $src1)
+>;
+
+def : GCNPat <
+ (i1 (add i1:$src0, i1:$src1)),
+ (S_XOR_B32 $src0, $src1)
+>;
+
+def : GCNPat <
+ (i1 (sub i1:$src0, i1:$src1)),
+ (S_XOR_B32 $src0, $src1)
+>;
+
+let AddedComplexity = 1 in {
+def : GCNPat <
+ (i1 (add i1:$src0, (i1 -1))),
+ (S_NOT_B32 $src0)
+>;
+
+def : GCNPat <
+ (i1 (sub i1:$src0, (i1 -1))),
+ (S_NOT_B32 $src0)
+>;
+}
+} // end isWave32
def : GCNPat <
(f16 (sint_to_fp i1:$src)),
- (V_CVT_F16_F32_e32 (V_CNDMASK_B32_e64 (i32 0), (i32 CONST.FP32_NEG_ONE), $src))
+ (V_CVT_F16_F32_e32 (
+ V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
+ /*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_NEG_ONE),
+ $src))
>;
def : GCNPat <
(f16 (uint_to_fp i1:$src)),
- (V_CVT_F16_F32_e32 (V_CNDMASK_B32_e64 (i32 0), (i32 CONST.FP32_ONE), $src))
+ (V_CVT_F16_F32_e32 (
+ V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
+ /*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_ONE),
+ $src))
>;
def : GCNPat <
(f32 (sint_to_fp i1:$src)),
- (V_CNDMASK_B32_e64 (i32 0), (i32 CONST.FP32_NEG_ONE), $src)
+ (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
+ /*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_NEG_ONE),
+ $src)
>;
def : GCNPat <
(f32 (uint_to_fp i1:$src)),
- (V_CNDMASK_B32_e64 (i32 0), (i32 CONST.FP32_ONE), $src)
+ (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
+ /*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_ONE),
+ $src)
>;
def : GCNPat <
(f64 (sint_to_fp i1:$src)),
- (V_CVT_F64_I32_e32 (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src))
+ (V_CVT_F64_I32_e32 (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
+ /*src1mod*/(i32 0), /*src1*/(i32 -1),
+ $src))
>;
def : GCNPat <
(f64 (uint_to_fp i1:$src)),
- (V_CVT_F64_U32_e32 (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src))
+ (V_CVT_F64_U32_e32 (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
+ /*src1mod*/(i32 0), /*src1*/(i32 1),
+ $src))
>;
//===----------------------------------------------------------------------===//
@@ -1417,7 +1633,7 @@ def : GCNPat<
def : GCNPat<
(fcanonicalize (v2f16 (VOP3PMods v2f16:$src, i32:$src_mods))),
- (V_PK_MUL_F16 0, (i32 CONST.V2FP16_ONE), $src_mods, $src, DSTCLAMP.NONE)
+ (V_PK_MUL_F16 0, (i32 CONST.FP16_ONE), $src_mods, $src, DSTCLAMP.NONE)
>;
}
@@ -1478,6 +1694,14 @@ def : GCNPat <
>;
} // End OtherPredicates = [HasDLInsts]
+let SubtargetPredicate = isGFX10Plus in
+def : GCNPat <
+ (fma (f16 (VOP3Mods0 f32:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)),
+ (f16 (VOP3Mods f32:$src1, i32:$src1_modifiers)),
+ (f16 (VOP3NoMods f32:$src2))),
+ (V_FMAC_F16_e64 $src0_modifiers, $src0, $src1_modifiers, $src1,
+ SRCMODS.NONE, $src2, $clamp, $omod)
+>;
// Allow integer inputs
class ExpPattern<SDPatternOperator node, ValueType vt, Instruction Inst> : GCNPat<
@@ -1568,7 +1792,7 @@ def : GCNPat <
// Fract Patterns
//===----------------------------------------------------------------------===//
-let SubtargetPredicate = isSI in {
+let SubtargetPredicate = isGFX6 in {
// V_FRACT is buggy on SI, so the F32 version is never used and (x-floor(x)) is
// used instead. However, SI doesn't have V_FLOOR_F64, so the most efficient
@@ -1595,7 +1819,7 @@ def : GCNPat <
DSTCLAMP.NONE, DSTOMOD.NONE)
>;
-} // End SubtargetPredicates = isSI
+} // End SubtargetPredicates = isGFX6
//============================================================================//
// Miscellaneous Optimization Patterns
@@ -1609,6 +1833,13 @@ def : GCNPat<
(S_SUB_I32 $src0, NegSubInlineConst32:$src1)
>;
+// Avoid pointlessly materializing a constant in VGPR.
+// FIXME: Should also do this for readlane, but tablegen crashes on
+// the ignored src1.
+def : GCNPat<
+ (int_amdgcn_readfirstlane (i32 imm:$src)),
+ (S_MOV_B32 $src)
+>;
multiclass BFMPatterns <ValueType vt, InstSI BFM, InstSI MOV> {
def : GCNPat <
@@ -1622,8 +1853,6 @@ multiclass BFMPatterns <ValueType vt, InstSI BFM, InstSI MOV> {
>;
}
-let SubtargetPredicate = isGCN in {
-
defm : BFMPatterns <i32, S_BFM_B32, S_MOV_B32>;
// FIXME: defm : BFMPatterns <i64, S_BFM_B64, S_MOV_B64>;
@@ -1633,8 +1862,6 @@ defm : SHA256MaPattern <V_BFI_B32, V_XOR_B32_e64, SReg_64>;
defm : IntMed3Pat<V_MED3_I32, smin, smax, smin_oneuse, smax_oneuse>;
defm : IntMed3Pat<V_MED3_U32, umin, umax, umin_oneuse, umax_oneuse>;
-}
-
// This matches 16 permutations of
// max(min(x, y), min(max(x, y), z))
class FPMed3Pat<ValueType vt,
@@ -1683,8 +1910,8 @@ multiclass Int16Med3Pat<Instruction med3Inst,
def : FPMed3Pat<f32, V_MED3_F32>;
-let OtherPredicates = [isGFX9] in {
+let OtherPredicates = [isGFX9Plus] in {
def : FP16Med3Pat<f16, V_MED3_F16>;
defm : Int16Med3Pat<V_MED3_I16, smin, smax, smax_oneuse, smin_oneuse>;
defm : Int16Med3Pat<V_MED3_U16, umin, umax, umax_oneuse, umin_oneuse>;
-} // End Predicates = [isGFX9]
+} // End Predicates = [isGFX9Plus]
diff --git a/lib/Target/AMDGPU/SIIntrinsics.td b/lib/Target/AMDGPU/SIIntrinsics.td
deleted file mode 100644
index e51ff4b4bc50..000000000000
--- a/lib/Target/AMDGPU/SIIntrinsics.td
+++ /dev/null
@@ -1,19 +0,0 @@
-//===-- SIIntrinsics.td - SI Intrinsic defs ----------------*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Backend internal SI Intrinsic Definitions. User code should not
-// directly use these.
-//
-//===----------------------------------------------------------------------===//
-
-
-let TargetPrefix = "SI", isTarget = 1 in {
- def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>;
-
-} // End TargetPrefix = "SI", isTarget = 1
diff --git a/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index be291b127301..ae8b967893a2 100644
--- a/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -1,9 +1,8 @@
//===- SILoadStoreOptimizer.cpp -------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -132,6 +131,8 @@ class SILoadStoreOptimizer : public MachineFunctionPass {
bool GLC1;
bool SLC0;
bool SLC1;
+ bool DLC0;
+ bool DLC1;
bool UseST64;
SmallVector<MachineInstr *, 8> InstsToMove;
};
@@ -257,13 +258,11 @@ static void addDefsUsesToList(const MachineInstr &MI,
static bool memAccessesCanBeReordered(MachineBasicBlock::iterator A,
MachineBasicBlock::iterator B,
- const SIInstrInfo *TII,
AliasAnalysis *AA) {
// RAW or WAR - cannot reorder
// WAW - cannot reorder
// RAR - safe to reorder
- return !(A->mayStore() || B->mayStore()) ||
- TII->areMemAccessesTriviallyDisjoint(*A, *B, AA);
+ return !(A->mayStore() || B->mayStore()) || !A->mayAlias(AA, *B, true);
}
// Add MI and its defs to the lists if MI reads one of the defs that are
@@ -282,6 +281,7 @@ static bool addToListsIfDependent(MachineInstr &MI, DenseSet<unsigned> &RegDefs,
// registers are in SSA form.
if (Use.isReg() &&
((Use.readsReg() && RegDefs.count(Use.getReg())) ||
+ (Use.isDef() && RegDefs.count(Use.getReg())) ||
(Use.isDef() && TargetRegisterInfo::isPhysicalRegister(Use.getReg()) &&
PhysRegUses.count(Use.getReg())))) {
Insts.push_back(&MI);
@@ -295,13 +295,13 @@ static bool addToListsIfDependent(MachineInstr &MI, DenseSet<unsigned> &RegDefs,
static bool canMoveInstsAcrossMemOp(MachineInstr &MemOp,
ArrayRef<MachineInstr *> InstsToMove,
- const SIInstrInfo *TII, AliasAnalysis *AA) {
+ AliasAnalysis *AA) {
assert(MemOp.mayLoadOrStore());
for (MachineInstr *InstToMove : InstsToMove) {
if (!InstToMove->mayLoadOrStore())
continue;
- if (!memAccessesCanBeReordered(MemOp, *InstToMove, TII, AA))
+ if (!memAccessesCanBeReordered(MemOp, *InstToMove, AA))
return false;
}
return true;
@@ -326,7 +326,7 @@ bool SILoadStoreOptimizer::offsetsCanBeCombined(CombineInfo &CI) {
if ((CI.InstClass != DS_READ) && (CI.InstClass != DS_WRITE)) {
return (EltOffset0 + CI.Width0 == EltOffset1 ||
EltOffset1 + CI.Width1 == EltOffset0) &&
- CI.GLC0 == CI.GLC1 &&
+ CI.GLC0 == CI.GLC1 && CI.DLC0 == CI.DLC1 &&
(CI.InstClass == S_BUFFER_LOAD_IMM || CI.SLC0 == CI.SLC1);
}
@@ -567,8 +567,8 @@ bool SILoadStoreOptimizer::findMatchingInst(CombineInfo &CI) {
}
if (MBBI->mayLoadOrStore() &&
- (!memAccessesCanBeReordered(*CI.I, *MBBI, TII, AA) ||
- !canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, TII, AA))) {
+ (!memAccessesCanBeReordered(*CI.I, *MBBI, AA) ||
+ !canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, AA))) {
// We fail condition #1, but we may still be able to satisfy condition
// #2. Add this instruction to the move list and then we will check
// if condition #2 holds once we have selected the matching instruction.
@@ -640,6 +640,8 @@ bool SILoadStoreOptimizer::findMatchingInst(CombineInfo &CI) {
CI.SLC0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::slc)->getImm();
CI.SLC1 = TII->getNamedOperand(*MBBI, AMDGPU::OpName::slc)->getImm();
}
+ CI.DLC0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::dlc)->getImm();
+ CI.DLC1 = TII->getNamedOperand(*MBBI, AMDGPU::OpName::dlc)->getImm();
}
// Check both offsets fit in the reduced range.
@@ -647,7 +649,7 @@ bool SILoadStoreOptimizer::findMatchingInst(CombineInfo &CI) {
// move and make sure they are all safe to move down past the merged
// instruction.
if (widthsFit(*STM, CI) && offsetsCanBeCombined(CI))
- if (canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, TII, AA))
+ if (canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, AA))
return true;
}
@@ -656,8 +658,8 @@ bool SILoadStoreOptimizer::findMatchingInst(CombineInfo &CI) {
// it was safe to move I and also all the instruction in InstsToMove
// down past this instruction.
// check if we can move I across MBBI and if we can move all I's users
- if (!memAccessesCanBeReordered(*CI.I, *MBBI, TII, AA) ||
- !canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, TII, AA))
+ if (!memAccessesCanBeReordered(*CI.I, *MBBI, AA) ||
+ !canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, AA))
break;
}
return false;
@@ -726,7 +728,8 @@ SILoadStoreOptimizer::mergeRead2Pair(CombineInfo &CI) {
TII->getAddNoCarry(*MBB, CI.Paired, DL, BaseReg)
.addReg(ImmReg)
- .addReg(AddrReg->getReg(), 0, BaseSubReg);
+ .addReg(AddrReg->getReg(), 0, BaseSubReg)
+ .addImm(0); // clamp bit
BaseSubReg = 0;
}
@@ -819,7 +822,8 @@ SILoadStoreOptimizer::mergeWrite2Pair(CombineInfo &CI) {
TII->getAddNoCarry(*MBB, CI.Paired, DL, BaseReg)
.addReg(ImmReg)
- .addReg(AddrReg->getReg(), 0, BaseSubReg);
+ .addReg(AddrReg->getReg(), 0, BaseSubReg)
+ .addImm(0); // clamp bit
BaseSubReg = 0;
}
@@ -858,6 +862,7 @@ SILoadStoreOptimizer::mergeSBufferLoadImmPair(CombineInfo &CI) {
.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::sbase))
.addImm(MergedOffset) // offset
.addImm(CI.GLC0) // glc
+ .addImm(CI.DLC0) // dlc
.cloneMergedMemRefs({&*CI.I, &*CI.Paired});
std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI);
@@ -910,6 +915,7 @@ SILoadStoreOptimizer::mergeBufferLoadPair(CombineInfo &CI) {
.addImm(CI.GLC0) // glc
.addImm(CI.SLC0) // slc
.addImm(0) // tfe
+ .addImm(CI.DLC0) // dlc
.cloneMergedMemRefs({&*CI.I, &*CI.Paired});
std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI);
@@ -1089,9 +1095,10 @@ SILoadStoreOptimizer::mergeBufferStorePair(CombineInfo &CI) {
MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc))
.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset))
.addImm(std::min(CI.Offset0, CI.Offset1)) // offset
- .addImm(CI.GLC0) // glc
- .addImm(CI.SLC0) // slc
- .addImm(0) // tfe
+ .addImm(CI.GLC0) // glc
+ .addImm(CI.SLC0) // slc
+ .addImm(0) // tfe
+ .addImm(CI.DLC0) // dlc
.cloneMergedMemRefs({&*CI.I, &*CI.Paired});
moveInstsAfter(MIB, CI.InstsToMove);
@@ -1137,9 +1144,10 @@ unsigned SILoadStoreOptimizer::computeBase(MachineInstr &MI,
MachineOperand OffsetLo = createRegOrImm(static_cast<int32_t>(Addr.Offset), MI);
MachineOperand OffsetHi =
createRegOrImm(static_cast<int32_t>(Addr.Offset >> 32), MI);
- unsigned CarryReg = MRI->createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
- unsigned DeadCarryReg =
- MRI->createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
+
+ const auto *CarryRC = TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
+ unsigned CarryReg = MRI->createVirtualRegister(CarryRC);
+ unsigned DeadCarryReg = MRI->createVirtualRegister(CarryRC);
unsigned DestSub0 = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
unsigned DestSub1 = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
@@ -1147,7 +1155,8 @@ unsigned SILoadStoreOptimizer::computeBase(MachineInstr &MI,
BuildMI(*MBB, MBBI, DL, TII->get(AMDGPU::V_ADD_I32_e64), DestSub0)
.addReg(CarryReg, RegState::Define)
.addReg(Addr.Base.LoReg, 0, Addr.Base.LoSubReg)
- .add(OffsetLo);
+ .add(OffsetLo)
+ .addImm(0); // clamp bit
(void)LoHalf;
LLVM_DEBUG(dbgs() << " "; LoHalf->dump(););
@@ -1156,7 +1165,8 @@ unsigned SILoadStoreOptimizer::computeBase(MachineInstr &MI,
.addReg(DeadCarryReg, RegState::Define | RegState::Dead)
.addReg(Addr.Base.HiReg, 0, Addr.Base.HiSubReg)
.add(OffsetHi)
- .addReg(CarryReg, RegState::Kill);
+ .addReg(CarryReg, RegState::Kill)
+ .addImm(0); // clamp bit
(void)HiHalf;
LLVM_DEBUG(dbgs() << " "; HiHalf->dump(););
diff --git a/lib/Target/AMDGPU/SILowerControlFlow.cpp b/lib/Target/AMDGPU/SILowerControlFlow.cpp
index 1aa1feebbdae..78f409cd9555 100644
--- a/lib/Target/AMDGPU/SILowerControlFlow.cpp
+++ b/lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -1,9 +1,8 @@
//===-- SILowerControlFlow.cpp - Use predicates for control flow ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -83,6 +82,16 @@ private:
LiveIntervals *LIS = nullptr;
MachineRegisterInfo *MRI = nullptr;
+ const TargetRegisterClass *BoolRC = nullptr;
+ unsigned AndOpc;
+ unsigned OrOpc;
+ unsigned XorOpc;
+ unsigned MovTermOpc;
+ unsigned Andn2TermOpc;
+ unsigned XorTermrOpc;
+ unsigned OrSaveExecOpc;
+ unsigned Exec;
+
void emitIf(MachineInstr &MI);
void emitElse(MachineInstr &MI);
void emitIfBreak(MachineInstr &MI);
@@ -176,7 +185,7 @@ void SILowerControlFlow::emitIf(MachineInstr &MI) {
assert(SaveExec.getSubReg() == AMDGPU::NoSubRegister &&
Cond.getSubReg() == AMDGPU::NoSubRegister);
- unsigned SaveExecReg = SaveExec.getReg();
+ Register SaveExecReg = SaveExec.getReg();
MachineOperand &ImpDefSCC = MI.getOperand(4);
assert(ImpDefSCC.getReg() == AMDGPU::SCC && ImpDefSCC.isDef());
@@ -188,26 +197,26 @@ void SILowerControlFlow::emitIf(MachineInstr &MI) {
// Add an implicit def of exec to discourage scheduling VALU after this which
// will interfere with trying to form s_and_saveexec_b64 later.
- unsigned CopyReg = SimpleIf ? SaveExecReg
- : MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ Register CopyReg = SimpleIf ? SaveExecReg
+ : MRI->createVirtualRegister(BoolRC);
MachineInstr *CopyExec =
BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), CopyReg)
- .addReg(AMDGPU::EXEC)
- .addReg(AMDGPU::EXEC, RegState::ImplicitDefine);
+ .addReg(Exec)
+ .addReg(Exec, RegState::ImplicitDefine);
- unsigned Tmp = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ unsigned Tmp = MRI->createVirtualRegister(BoolRC);
MachineInstr *And =
- BuildMI(MBB, I, DL, TII->get(AMDGPU::S_AND_B64), Tmp)
+ BuildMI(MBB, I, DL, TII->get(AndOpc), Tmp)
.addReg(CopyReg)
- //.addReg(AMDGPU::EXEC)
- .addReg(Cond.getReg());
+ .add(Cond);
+
setImpSCCDefDead(*And, true);
MachineInstr *Xor = nullptr;
if (!SimpleIf) {
Xor =
- BuildMI(MBB, I, DL, TII->get(AMDGPU::S_XOR_B64), SaveExecReg)
+ BuildMI(MBB, I, DL, TII->get(XorOpc), SaveExecReg)
.addReg(Tmp)
.addReg(CopyReg);
setImpSCCDefDead(*Xor, ImpDefSCC.isDead());
@@ -216,7 +225,7 @@ void SILowerControlFlow::emitIf(MachineInstr &MI) {
// Use a copy that is a terminator to get correct spill code placement it with
// fast regalloc.
MachineInstr *SetExec =
- BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B64_term), AMDGPU::EXEC)
+ BuildMI(MBB, I, DL, TII->get(MovTermOpc), Exec)
.addReg(Tmp, RegState::Kill);
// Insert a pseudo terminator to help keep the verifier happy. This will also
@@ -240,7 +249,7 @@ void SILowerControlFlow::emitIf(MachineInstr &MI) {
LIS->InsertMachineInstrInMaps(*SetExec);
LIS->InsertMachineInstrInMaps(*NewBr);
- LIS->removeRegUnit(*MCRegUnitIterator(AMDGPU::EXEC, TRI));
+ LIS->removeAllRegUnitsForPhysReg(AMDGPU::EXEC);
MI.eraseFromParent();
// FIXME: Is there a better way of adjusting the liveness? It shouldn't be
@@ -257,7 +266,7 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
MachineBasicBlock &MBB = *MI.getParent();
const DebugLoc &DL = MI.getDebugLoc();
- unsigned DstReg = MI.getOperand(0).getReg();
+ Register DstReg = MI.getOperand(0).getReg();
assert(MI.getOperand(0).getSubReg() == AMDGPU::NoSubRegister);
bool ExecModified = MI.getOperand(3).getImm() != 0;
@@ -266,17 +275,17 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
// We are running before TwoAddressInstructions, and si_else's operands are
// tied. In order to correctly tie the registers, split this into a copy of
// the src like it does.
- unsigned CopyReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ Register CopyReg = MRI->createVirtualRegister(BoolRC);
MachineInstr *CopyExec =
BuildMI(MBB, Start, DL, TII->get(AMDGPU::COPY), CopyReg)
.add(MI.getOperand(1)); // Saved EXEC
// This must be inserted before phis and any spill code inserted before the
// else.
- unsigned SaveReg = ExecModified ?
- MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass) : DstReg;
+ Register SaveReg = ExecModified ?
+ MRI->createVirtualRegister(BoolRC) : DstReg;
MachineInstr *OrSaveExec =
- BuildMI(MBB, Start, DL, TII->get(AMDGPU::S_OR_SAVEEXEC_B64), SaveReg)
+ BuildMI(MBB, Start, DL, TII->get(OrSaveExecOpc), SaveReg)
.addReg(CopyReg);
MachineBasicBlock *DestBB = MI.getOperand(2).getMBB();
@@ -285,8 +294,8 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
if (ExecModified) {
MachineInstr *And =
- BuildMI(MBB, ElsePt, DL, TII->get(AMDGPU::S_AND_B64), DstReg)
- .addReg(AMDGPU::EXEC)
+ BuildMI(MBB, ElsePt, DL, TII->get(AndOpc), DstReg)
+ .addReg(Exec)
.addReg(SaveReg);
if (LIS)
@@ -294,8 +303,8 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
}
MachineInstr *Xor =
- BuildMI(MBB, ElsePt, DL, TII->get(AMDGPU::S_XOR_B64_term), AMDGPU::EXEC)
- .addReg(AMDGPU::EXEC)
+ BuildMI(MBB, ElsePt, DL, TII->get(XorTermrOpc), Exec)
+ .addReg(Exec)
.addReg(DstReg);
MachineInstr *Branch =
@@ -324,7 +333,7 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
LIS->createAndComputeVirtRegInterval(SaveReg);
// Let this be recomputed.
- LIS->removeRegUnit(*MCRegUnitIterator(AMDGPU::EXEC, TRI));
+ LIS->removeAllRegUnitsForPhysReg(AMDGPU::EXEC);
}
void SILowerControlFlow::emitIfBreak(MachineInstr &MI) {
@@ -348,14 +357,14 @@ void SILowerControlFlow::emitIfBreak(MachineInstr &MI) {
// exit" mask.
MachineInstr *And = nullptr, *Or = nullptr;
if (!SkipAnding) {
- And = BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_B64), Dst)
- .addReg(AMDGPU::EXEC)
+ And = BuildMI(MBB, &MI, DL, TII->get(AndOpc), Dst)
+ .addReg(Exec)
.add(MI.getOperand(1));
- Or = BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_B64), Dst)
+ Or = BuildMI(MBB, &MI, DL, TII->get(OrOpc), Dst)
.addReg(Dst)
.add(MI.getOperand(2));
} else
- Or = BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_B64), Dst)
+ Or = BuildMI(MBB, &MI, DL, TII->get(OrOpc), Dst)
.add(MI.getOperand(1))
.add(MI.getOperand(2));
@@ -373,8 +382,8 @@ void SILowerControlFlow::emitLoop(MachineInstr &MI) {
const DebugLoc &DL = MI.getDebugLoc();
MachineInstr *AndN2 =
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ANDN2_B64_term), AMDGPU::EXEC)
- .addReg(AMDGPU::EXEC)
+ BuildMI(MBB, &MI, DL, TII->get(Andn2TermOpc), Exec)
+ .addReg(Exec)
.add(MI.getOperand(0));
MachineInstr *Branch =
@@ -395,8 +404,8 @@ void SILowerControlFlow::emitEndCf(MachineInstr &MI) {
MachineBasicBlock::iterator InsPt = MBB.begin();
MachineInstr *NewMI =
- BuildMI(MBB, InsPt, DL, TII->get(AMDGPU::S_OR_B64), AMDGPU::EXEC)
- .addReg(AMDGPU::EXEC)
+ BuildMI(MBB, InsPt, DL, TII->get(OrOpc), Exec)
+ .addReg(Exec)
.add(MI.getOperand(0));
if (LIS)
@@ -428,13 +437,13 @@ void SILowerControlFlow::findMaskOperands(MachineInstr &MI, unsigned OpNo,
// does not really modify exec.
for (auto I = Def->getIterator(); I != MI.getIterator(); ++I)
if (I->modifiesRegister(AMDGPU::EXEC, TRI) &&
- !(I->isCopy() && I->getOperand(0).getReg() != AMDGPU::EXEC))
+ !(I->isCopy() && I->getOperand(0).getReg() != Exec))
return;
for (const auto &SrcOp : Def->explicit_operands())
if (SrcOp.isReg() && SrcOp.isUse() &&
(TargetRegisterInfo::isVirtualRegister(SrcOp.getReg()) ||
- SrcOp.getReg() == AMDGPU::EXEC))
+ SrcOp.getReg() == Exec))
Src.push_back(SrcOp);
}
@@ -472,6 +481,27 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
// This doesn't actually need LiveIntervals, but we can preserve them.
LIS = getAnalysisIfAvailable<LiveIntervals>();
MRI = &MF.getRegInfo();
+ BoolRC = TRI->getBoolRC();
+
+ if (ST.isWave32()) {
+ AndOpc = AMDGPU::S_AND_B32;
+ OrOpc = AMDGPU::S_OR_B32;
+ XorOpc = AMDGPU::S_XOR_B32;
+ MovTermOpc = AMDGPU::S_MOV_B32_term;
+ Andn2TermOpc = AMDGPU::S_ANDN2_B32_term;
+ XorTermrOpc = AMDGPU::S_XOR_B32_term;
+ OrSaveExecOpc = AMDGPU::S_OR_SAVEEXEC_B32;
+ Exec = AMDGPU::EXEC_LO;
+ } else {
+ AndOpc = AMDGPU::S_AND_B64;
+ OrOpc = AMDGPU::S_OR_B64;
+ XorOpc = AMDGPU::S_XOR_B64;
+ MovTermOpc = AMDGPU::S_MOV_B64_term;
+ Andn2TermOpc = AMDGPU::S_ANDN2_B64_term;
+ XorTermrOpc = AMDGPU::S_XOR_B64_term;
+ OrSaveExecOpc = AMDGPU::S_OR_SAVEEXEC_B64;
+ Exec = AMDGPU::EXEC;
+ }
MachineFunction::iterator NextBB;
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
@@ -508,6 +538,8 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
case AMDGPU::S_AND_B64:
case AMDGPU::S_OR_B64:
+ case AMDGPU::S_AND_B32:
+ case AMDGPU::S_OR_B32:
// Cleanup bit manipulations on exec mask
combineMasks(MI);
Last = I;
diff --git a/lib/Target/AMDGPU/SILowerI1Copies.cpp b/lib/Target/AMDGPU/SILowerI1Copies.cpp
index eb038bb5d5fc..1c0f836f07e6 100644
--- a/lib/Target/AMDGPU/SILowerI1Copies.cpp
+++ b/lib/Target/AMDGPU/SILowerI1Copies.cpp
@@ -1,15 +1,14 @@
//===-- SILowerI1Copies.cpp - Lower I1 Copies -----------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This pass lowers all occurrences of i1 values (with a vreg_1 register class)
-// to lane masks (64-bit scalar registers). The pass assumes machine SSA form
-// and a wave-level control flow graph.
+// to lane masks (32 / 64-bit scalar registers). The pass assumes machine SSA
+// form and a wave-level control flow graph.
//
// Before this pass, values that are semantically i1 and are defined and used
// within the same basic block are already represented as lane masks in scalar
@@ -51,6 +50,7 @@ public:
static char ID;
private:
+ bool IsWave32 = false;
MachineFunction *MF = nullptr;
MachineDominatorTree *DT = nullptr;
MachinePostDominatorTree *PDT = nullptr;
@@ -58,6 +58,14 @@ private:
const GCNSubtarget *ST = nullptr;
const SIInstrInfo *TII = nullptr;
+ unsigned ExecReg;
+ unsigned MovOp;
+ unsigned AndOp;
+ unsigned OrOp;
+ unsigned XorOp;
+ unsigned AndN2Op;
+ unsigned OrN2Op;
+
DenseSet<unsigned> ConstrainRegs;
public:
@@ -87,6 +95,11 @@ private:
MachineBasicBlock::iterator
getSaluInsertionAtEnd(MachineBasicBlock &MBB) const;
+ bool isVreg1(unsigned Reg) const {
+ return TargetRegisterInfo::isVirtualRegister(Reg) &&
+ MRI->getRegClass(Reg) == &AMDGPU::VReg_1RegClass;
+ }
+
bool isLaneMaskReg(unsigned Reg) const {
return TII->getRegisterInfo().isSGPRReg(*MRI, Reg) &&
TII->getRegisterInfo().getRegSizeInBits(Reg, *MRI) ==
@@ -412,8 +425,10 @@ FunctionPass *llvm::createSILowerI1CopiesPass() {
}
static unsigned createLaneMaskReg(MachineFunction &MF) {
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
MachineRegisterInfo &MRI = MF.getRegInfo();
- return MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ return MRI.createVirtualRegister(ST.isWave32() ? &AMDGPU::SReg_32RegClass
+ : &AMDGPU::SReg_64RegClass);
}
static unsigned insertUndefLaneMask(MachineBasicBlock &MBB) {
@@ -443,13 +458,32 @@ bool SILowerI1Copies::runOnMachineFunction(MachineFunction &TheMF) {
ST = &MF->getSubtarget<GCNSubtarget>();
TII = ST->getInstrInfo();
+ IsWave32 = ST->isWave32();
+
+ if (IsWave32) {
+ ExecReg = AMDGPU::EXEC_LO;
+ MovOp = AMDGPU::S_MOV_B32;
+ AndOp = AMDGPU::S_AND_B32;
+ OrOp = AMDGPU::S_OR_B32;
+ XorOp = AMDGPU::S_XOR_B32;
+ AndN2Op = AMDGPU::S_ANDN2_B32;
+ OrN2Op = AMDGPU::S_ORN2_B32;
+ } else {
+ ExecReg = AMDGPU::EXEC;
+ MovOp = AMDGPU::S_MOV_B64;
+ AndOp = AMDGPU::S_AND_B64;
+ OrOp = AMDGPU::S_OR_B64;
+ XorOp = AMDGPU::S_XOR_B64;
+ AndN2Op = AMDGPU::S_ANDN2_B64;
+ OrN2Op = AMDGPU::S_ORN2_B64;
+ }
lowerCopiesFromI1();
lowerPhis();
lowerCopiesToI1();
for (unsigned Reg : ConstrainRegs)
- MRI->constrainRegClass(Reg, &AMDGPU::SReg_64_XEXECRegClass);
+ MRI->constrainRegClass(Reg, &AMDGPU::SReg_1_XEXECRegClass);
ConstrainRegs.clear();
return true;
@@ -465,13 +499,10 @@ void SILowerI1Copies::lowerCopiesFromI1() {
unsigned DstReg = MI.getOperand(0).getReg();
unsigned SrcReg = MI.getOperand(1).getReg();
- if (!TargetRegisterInfo::isVirtualRegister(SrcReg) ||
- MRI->getRegClass(SrcReg) != &AMDGPU::VReg_1RegClass)
+ if (!isVreg1(SrcReg))
continue;
- if (isLaneMaskReg(DstReg) ||
- (TargetRegisterInfo::isVirtualRegister(DstReg) &&
- MRI->getRegClass(DstReg) == &AMDGPU::VReg_1RegClass))
+ if (isLaneMaskReg(DstReg) || isVreg1(DstReg))
continue;
// Copy into a 32-bit vector register.
@@ -484,6 +515,8 @@ void SILowerI1Copies::lowerCopiesFromI1() {
ConstrainRegs.insert(SrcReg);
BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
.addImm(0)
+ .addImm(0)
+ .addImm(0)
.addImm(-1)
.addReg(SrcReg);
DeadCopies.push_back(&MI);
@@ -503,18 +536,22 @@ void SILowerI1Copies::lowerPhis() {
SmallVector<MachineBasicBlock *, 4> IncomingBlocks;
SmallVector<unsigned, 4> IncomingRegs;
SmallVector<unsigned, 4> IncomingUpdated;
+#ifndef NDEBUG
+ DenseSet<unsigned> PhiRegisters;
+#endif
for (MachineBasicBlock &MBB : *MF) {
LF.initialize(MBB);
for (MachineInstr &MI : MBB.phis()) {
unsigned DstReg = MI.getOperand(0).getReg();
- if (MRI->getRegClass(DstReg) != &AMDGPU::VReg_1RegClass)
+ if (!isVreg1(DstReg))
continue;
LLVM_DEBUG(dbgs() << "Lower PHI: " << MI);
- MRI->setRegClass(DstReg, &AMDGPU::SReg_64RegClass);
+ MRI->setRegClass(DstReg, IsWave32 ? &AMDGPU::SReg_32RegClass
+ : &AMDGPU::SReg_64RegClass);
// Collect incoming values.
for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
@@ -525,18 +562,22 @@ void SILowerI1Copies::lowerPhis() {
if (IncomingDef->getOpcode() == AMDGPU::COPY) {
IncomingReg = IncomingDef->getOperand(1).getReg();
- assert(isLaneMaskReg(IncomingReg));
+ assert(isLaneMaskReg(IncomingReg) || isVreg1(IncomingReg));
assert(!IncomingDef->getOperand(1).getSubReg());
} else if (IncomingDef->getOpcode() == AMDGPU::IMPLICIT_DEF) {
continue;
} else {
- assert(IncomingDef->isPHI());
+ assert(IncomingDef->isPHI() || PhiRegisters.count(IncomingReg));
}
IncomingBlocks.push_back(IncomingMBB);
IncomingRegs.push_back(IncomingReg);
}
+#ifndef NDEBUG
+ PhiRegisters.insert(DstReg);
+#endif
+
// Phis in a loop that are observed outside the loop receive a simple but
// conservatively correct treatment.
MachineBasicBlock *PostDomBound = &MBB;
@@ -629,8 +670,7 @@ void SILowerI1Copies::lowerCopiesToI1() {
continue;
unsigned DstReg = MI.getOperand(0).getReg();
- if (!TargetRegisterInfo::isVirtualRegister(DstReg) ||
- MRI->getRegClass(DstReg) != &AMDGPU::VReg_1RegClass)
+ if (!isVreg1(DstReg))
continue;
if (MRI->use_empty(DstReg)) {
@@ -640,7 +680,8 @@ void SILowerI1Copies::lowerCopiesToI1() {
LLVM_DEBUG(dbgs() << "Lower Other: " << MI);
- MRI->setRegClass(DstReg, &AMDGPU::SReg_64RegClass);
+ MRI->setRegClass(DstReg, IsWave32 ? &AMDGPU::SReg_32RegClass
+ : &AMDGPU::SReg_64RegClass);
if (MI.getOpcode() == AMDGPU::IMPLICIT_DEF)
continue;
@@ -649,7 +690,7 @@ void SILowerI1Copies::lowerCopiesToI1() {
assert(!MI.getOperand(1).getSubReg());
if (!TargetRegisterInfo::isVirtualRegister(SrcReg) ||
- !isLaneMaskReg(SrcReg)) {
+ (!isLaneMaskReg(SrcReg) && !isVreg1(SrcReg))) {
assert(TII->getRegisterInfo().getRegSizeInBits(SrcReg, *MRI) == 32);
unsigned TmpReg = createLaneMaskReg(*MF);
BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_CMP_NE_U32_e64), TmpReg)
@@ -699,7 +740,7 @@ bool SILowerI1Copies::isConstantLaneMask(unsigned Reg, bool &Val) const {
return false;
}
- if (MI->getOpcode() != AMDGPU::S_MOV_B64)
+ if (MI->getOpcode() != MovOp)
return false;
if (!MI->getOperand(1).isImm())
@@ -774,10 +815,10 @@ void SILowerI1Copies::buildMergeLaneMasks(MachineBasicBlock &MBB,
if (PrevVal == CurVal) {
BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), DstReg).addReg(CurReg);
} else if (CurVal) {
- BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), DstReg).addReg(AMDGPU::EXEC);
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), DstReg).addReg(ExecReg);
} else {
- BuildMI(MBB, I, DL, TII->get(AMDGPU::S_XOR_B64), DstReg)
- .addReg(AMDGPU::EXEC)
+ BuildMI(MBB, I, DL, TII->get(XorOp), DstReg)
+ .addReg(ExecReg)
.addImm(-1);
}
return;
@@ -790,9 +831,9 @@ void SILowerI1Copies::buildMergeLaneMasks(MachineBasicBlock &MBB,
PrevMaskedReg = PrevReg;
} else {
PrevMaskedReg = createLaneMaskReg(*MF);
- BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ANDN2_B64), PrevMaskedReg)
+ BuildMI(MBB, I, DL, TII->get(AndN2Op), PrevMaskedReg)
.addReg(PrevReg)
- .addReg(AMDGPU::EXEC);
+ .addReg(ExecReg);
}
}
if (!CurConstant) {
@@ -801,9 +842,9 @@ void SILowerI1Copies::buildMergeLaneMasks(MachineBasicBlock &MBB,
CurMaskedReg = CurReg;
} else {
CurMaskedReg = createLaneMaskReg(*MF);
- BuildMI(MBB, I, DL, TII->get(AMDGPU::S_AND_B64), CurMaskedReg)
+ BuildMI(MBB, I, DL, TII->get(AndOp), CurMaskedReg)
.addReg(CurReg)
- .addReg(AMDGPU::EXEC);
+ .addReg(ExecReg);
}
}
@@ -814,12 +855,12 @@ void SILowerI1Copies::buildMergeLaneMasks(MachineBasicBlock &MBB,
BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), DstReg)
.addReg(PrevMaskedReg);
} else if (PrevConstant && PrevVal) {
- BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ORN2_B64), DstReg)
+ BuildMI(MBB, I, DL, TII->get(OrN2Op), DstReg)
.addReg(CurMaskedReg)
- .addReg(AMDGPU::EXEC);
+ .addReg(ExecReg);
} else {
- BuildMI(MBB, I, DL, TII->get(AMDGPU::S_OR_B64), DstReg)
+ BuildMI(MBB, I, DL, TII->get(OrOp), DstReg)
.addReg(PrevMaskedReg)
- .addReg(CurMaskedReg ? CurMaskedReg : (unsigned)AMDGPU::EXEC);
+ .addReg(CurMaskedReg ? CurMaskedReg : ExecReg);
}
}
diff --git a/lib/Target/AMDGPU/SILowerSGPRSpills.cpp b/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
new file mode 100644
index 000000000000..a82047473370
--- /dev/null
+++ b/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
@@ -0,0 +1,323 @@
+//===-- SILowerSGPRSPills.cpp ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Handle SGPR spills. This pass takes the place of PrologEpilogInserter for all
+// SGPR spills, so must insert CSR SGPR spills as well as expand them.
+//
+// This pass must never create new SGPR virtual registers.
+//
+// FIXME: Must stop RegScavenger spills in later passes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "SIInstrInfo.h"
+#include "SIMachineFunctionInfo.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "si-lower-sgpr-spills"
+
+using MBBVector = SmallVector<MachineBasicBlock *, 4>;
+
+namespace {
+
+static cl::opt<bool> EnableSpillVGPRToAGPR(
+ "amdgpu-spill-vgpr-to-agpr",
+ cl::desc("Enable spilling VGPRs to AGPRs"),
+ cl::ReallyHidden,
+ cl::init(true));
+
+class SILowerSGPRSpills : public MachineFunctionPass {
+private:
+ const SIRegisterInfo *TRI = nullptr;
+ const SIInstrInfo *TII = nullptr;
+ VirtRegMap *VRM = nullptr;
+ LiveIntervals *LIS = nullptr;
+
+ // Save and Restore blocks of the current function. Typically there is a
+ // single save block, unless Windows EH funclets are involved.
+ MBBVector SaveBlocks;
+ MBBVector RestoreBlocks;
+
+public:
+ static char ID;
+
+ SILowerSGPRSpills() : MachineFunctionPass(ID) {}
+
+ void calculateSaveRestoreBlocks(MachineFunction &MF);
+ bool spillCalleeSavedRegs(MachineFunction &MF);
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+
+} // end anonymous namespace
+
+char SILowerSGPRSpills::ID = 0;
+
+INITIALIZE_PASS_BEGIN(SILowerSGPRSpills, DEBUG_TYPE,
+ "SI lower SGPR spill instructions", false, false)
+INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
+INITIALIZE_PASS_END(SILowerSGPRSpills, DEBUG_TYPE,
+ "SI lower SGPR spill instructions", false, false)
+
+char &llvm::SILowerSGPRSpillsID = SILowerSGPRSpills::ID;
+
+/// Insert restore code for the callee-saved registers used in the function.
+static void insertCSRSaves(MachineBasicBlock &SaveBlock,
+ ArrayRef<CalleeSavedInfo> CSI,
+ LiveIntervals *LIS) {
+ MachineFunction &MF = *SaveBlock.getParent();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+
+ MachineBasicBlock::iterator I = SaveBlock.begin();
+ if (!TFI->spillCalleeSavedRegisters(SaveBlock, I, CSI, TRI)) {
+ for (const CalleeSavedInfo &CS : CSI) {
+ // Insert the spill to the stack frame.
+ unsigned Reg = CS.getReg();
+
+ MachineInstrSpan MIS(I, &SaveBlock);
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+
+ TII.storeRegToStackSlot(SaveBlock, I, Reg, true, CS.getFrameIdx(), RC,
+ TRI);
+
+ if (LIS) {
+ assert(std::distance(MIS.begin(), I) == 1);
+ MachineInstr &Inst = *std::prev(I);
+
+ LIS->InsertMachineInstrInMaps(Inst);
+ LIS->removeAllRegUnitsForPhysReg(Reg);
+ }
+ }
+ }
+}
+
+/// Insert restore code for the callee-saved registers used in the function.
+static void insertCSRRestores(MachineBasicBlock &RestoreBlock,
+ std::vector<CalleeSavedInfo> &CSI,
+ LiveIntervals *LIS) {
+ MachineFunction &MF = *RestoreBlock.getParent();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+
+ // Restore all registers immediately before the return and any
+ // terminators that precede it.
+ MachineBasicBlock::iterator I = RestoreBlock.getFirstTerminator();
+
+ // FIXME: Just emit the readlane/writelane directly
+ if (!TFI->restoreCalleeSavedRegisters(RestoreBlock, I, CSI, TRI)) {
+ for (const CalleeSavedInfo &CI : reverse(CSI)) {
+ unsigned Reg = CI.getReg();
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+
+ TII.loadRegFromStackSlot(RestoreBlock, I, Reg, CI.getFrameIdx(), RC, TRI);
+ assert(I != RestoreBlock.begin() &&
+ "loadRegFromStackSlot didn't insert any code!");
+ // Insert in reverse order. loadRegFromStackSlot can insert
+ // multiple instructions.
+
+ if (LIS) {
+ MachineInstr &Inst = *std::prev(I);
+ LIS->InsertMachineInstrInMaps(Inst);
+ LIS->removeAllRegUnitsForPhysReg(Reg);
+ }
+ }
+ }
+}
+
+/// Compute the sets of entry and return blocks for saving and restoring
+/// callee-saved registers, and placing prolog and epilog code.
+void SILowerSGPRSpills::calculateSaveRestoreBlocks(MachineFunction &MF) {
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+
+ // Even when we do not change any CSR, we still want to insert the
+ // prologue and epilogue of the function.
+ // So set the save points for those.
+
+ // Use the points found by shrink-wrapping, if any.
+ if (MFI.getSavePoint()) {
+ SaveBlocks.push_back(MFI.getSavePoint());
+ assert(MFI.getRestorePoint() && "Both restore and save must be set");
+ MachineBasicBlock *RestoreBlock = MFI.getRestorePoint();
+ // If RestoreBlock does not have any successor and is not a return block
+ // then the end point is unreachable and we do not need to insert any
+ // epilogue.
+ if (!RestoreBlock->succ_empty() || RestoreBlock->isReturnBlock())
+ RestoreBlocks.push_back(RestoreBlock);
+ return;
+ }
+
+ // Save refs to entry and return blocks.
+ SaveBlocks.push_back(&MF.front());
+ for (MachineBasicBlock &MBB : MF) {
+ if (MBB.isEHFuncletEntry())
+ SaveBlocks.push_back(&MBB);
+ if (MBB.isReturnBlock())
+ RestoreBlocks.push_back(&MBB);
+ }
+}
+
+bool SILowerSGPRSpills::spillCalleeSavedRegs(MachineFunction &MF) {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const Function &F = MF.getFunction();
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ const SIFrameLowering *TFI = ST.getFrameLowering();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ RegScavenger *RS = nullptr;
+
+ // Determine which of the registers in the callee save list should be saved.
+ BitVector SavedRegs;
+ TFI->determineCalleeSavesSGPR(MF, SavedRegs, RS);
+
+ // Add the code to save and restore the callee saved registers.
+ if (!F.hasFnAttribute(Attribute::Naked)) {
+ // FIXME: This is a lie. The CalleeSavedInfo is incomplete, but this is
+ // necessary for verifier liveness checks.
+ MFI.setCalleeSavedInfoValid(true);
+
+ std::vector<CalleeSavedInfo> CSI;
+ const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
+
+ for (unsigned I = 0; CSRegs[I]; ++I) {
+ unsigned Reg = CSRegs[I];
+ if (SavedRegs.test(Reg)) {
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ int JunkFI = MFI.CreateStackObject(TRI->getSpillSize(*RC),
+ TRI->getSpillAlignment(*RC),
+ true);
+
+ CSI.push_back(CalleeSavedInfo(Reg, JunkFI));
+ }
+ }
+
+ if (!CSI.empty()) {
+ for (MachineBasicBlock *SaveBlock : SaveBlocks)
+ insertCSRSaves(*SaveBlock, CSI, LIS);
+
+ for (MachineBasicBlock *RestoreBlock : RestoreBlocks)
+ insertCSRRestores(*RestoreBlock, CSI, LIS);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ TII = ST.getInstrInfo();
+ TRI = &TII->getRegisterInfo();
+
+ VRM = getAnalysisIfAvailable<VirtRegMap>();
+
+ assert(SaveBlocks.empty() && RestoreBlocks.empty());
+
+ // First, expose any CSR SGPR spills. This is mostly the same as what PEI
+ // does, but somewhat simpler.
+ calculateSaveRestoreBlocks(MF);
+ bool HasCSRs = spillCalleeSavedRegs(MF);
+
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ if (!MFI.hasStackObjects() && !HasCSRs) {
+ SaveBlocks.clear();
+ RestoreBlocks.clear();
+ return false;
+ }
+
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
+ const bool SpillVGPRToAGPR = ST.hasMAIInsts() && FuncInfo->hasSpilledVGPRs()
+ && EnableSpillVGPRToAGPR;
+
+ bool MadeChange = false;
+
+ const bool SpillToAGPR = EnableSpillVGPRToAGPR && ST.hasMAIInsts();
+
+ // TODO: CSR VGPRs will never be spilled to AGPRs. These can probably be
+ // handled as SpilledToReg in regular PrologEpilogInserter.
+ if ((TRI->spillSGPRToVGPR() && (HasCSRs || FuncInfo->hasSpilledSGPRs())) ||
+ SpillVGPRToAGPR) {
+ // Process all SGPR spills before frame offsets are finalized. Ideally SGPRs
+ // are spilled to VGPRs, in which case we can eliminate the stack usage.
+ //
+ // This operates under the assumption that only other SGPR spills are users
+ // of the frame index.
+ for (MachineBasicBlock &MBB : MF) {
+ MachineBasicBlock::iterator Next;
+ for (auto I = MBB.begin(), E = MBB.end(); I != E; I = Next) {
+ MachineInstr &MI = *I;
+ Next = std::next(I);
+
+ if (SpillToAGPR && TII->isVGPRSpill(MI)) {
+ // Try to eliminate stack used by VGPR spills before frame
+ // finalization.
+ unsigned FIOp = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
+ AMDGPU::OpName::vaddr);
+ int FI = MI.getOperand(FIOp).getIndex();
+ unsigned VReg = TII->getNamedOperand(MI, AMDGPU::OpName::vdata)
+ ->getReg();
+ if (FuncInfo->allocateVGPRSpillToAGPR(MF, FI,
+ TRI->isAGPR(MRI, VReg))) {
+ TRI->eliminateFrameIndex(MI, 0, FIOp, nullptr);
+ continue;
+ }
+ }
+
+ if (!TII->isSGPRSpill(MI))
+ continue;
+
+ int FI = TII->getNamedOperand(MI, AMDGPU::OpName::addr)->getIndex();
+ assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);
+ if (FuncInfo->allocateSGPRSpillToVGPR(MF, FI)) {
+ bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex(MI, FI, nullptr);
+ (void)Spilled;
+ assert(Spilled && "failed to spill SGPR to VGPR when allocated");
+ }
+ }
+ }
+
+ for (MachineBasicBlock &MBB : MF) {
+ for (auto SSpill : FuncInfo->getSGPRSpillVGPRs())
+ MBB.addLiveIn(SSpill.VGPR);
+
+ for (MCPhysReg Reg : FuncInfo->getVGPRSpillAGPRs())
+ MBB.addLiveIn(Reg);
+
+ for (MCPhysReg Reg : FuncInfo->getAGPRSpillVGPRs())
+ MBB.addLiveIn(Reg);
+
+ MBB.sortUniqueLiveIns();
+ }
+
+ MadeChange = true;
+ }
+
+ SaveBlocks.clear();
+ RestoreBlocks.clear();
+
+ return MadeChange;
+}
diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 181cc41bd5ff..46da974a2f45 100644
--- a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -1,9 +1,8 @@
//===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -29,6 +28,7 @@ using namespace llvm;
SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
: AMDGPUMachineFunction(MF),
+ Mode(MF.getFunction()),
PrivateSegmentBuffer(false),
DispatchPtr(false),
QueuePtr(false),
@@ -46,7 +46,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
ImplicitBufferPtr(false),
ImplicitArgPtr(false),
GITPtrHigh(0xffffffff),
- HighBitsOf32BitAddress(0) {
+ HighBitsOf32BitAddress(0),
+ GDSSize(0) {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const Function &F = MF.getFunction();
FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
@@ -69,8 +70,10 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
// Non-entry functions have no special inputs for now, other registers
// required for scratch access.
ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
- ScratchWaveOffsetReg = AMDGPU::SGPR4;
- FrameOffsetReg = AMDGPU::SGPR5;
+ ScratchWaveOffsetReg = AMDGPU::SGPR33;
+
+ // TODO: Pick a high register, and shift down, similar to a kernel.
+ FrameOffsetReg = AMDGPU::SGPR34;
StackPtrOffsetReg = AMDGPU::SGPR32;
ArgInfo.PrivateSegmentBuffer =
@@ -88,33 +91,23 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
}
}
- if (ST.debuggerEmitPrologue()) {
- // Enable everything.
+ if (F.hasFnAttribute("amdgpu-work-group-id-x"))
WorkGroupIDX = true;
- WorkGroupIDY = true;
- WorkGroupIDZ = true;
- WorkItemIDX = true;
- WorkItemIDY = true;
- WorkItemIDZ = true;
- } else {
- if (F.hasFnAttribute("amdgpu-work-group-id-x"))
- WorkGroupIDX = true;
- if (F.hasFnAttribute("amdgpu-work-group-id-y"))
- WorkGroupIDY = true;
+ if (F.hasFnAttribute("amdgpu-work-group-id-y"))
+ WorkGroupIDY = true;
- if (F.hasFnAttribute("amdgpu-work-group-id-z"))
- WorkGroupIDZ = true;
+ if (F.hasFnAttribute("amdgpu-work-group-id-z"))
+ WorkGroupIDZ = true;
- if (F.hasFnAttribute("amdgpu-work-item-id-x"))
- WorkItemIDX = true;
+ if (F.hasFnAttribute("amdgpu-work-item-id-x"))
+ WorkItemIDX = true;
- if (F.hasFnAttribute("amdgpu-work-item-id-y"))
- WorkItemIDY = true;
+ if (F.hasFnAttribute("amdgpu-work-item-id-y"))
+ WorkItemIDY = true;
- if (F.hasFnAttribute("amdgpu-work-item-id-z"))
- WorkItemIDZ = true;
- }
+ if (F.hasFnAttribute("amdgpu-work-item-id-z"))
+ WorkItemIDZ = true;
const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
bool HasStackObjects = FrameInfo.hasStackObjects();
@@ -154,9 +147,20 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
KernargSegmentPtr = true;
if (ST.hasFlatAddressSpace() && isEntryFunction() && isAmdHsaOrMesa) {
+ auto hasNonSpillStackObjects = [&]() {
+ // Avoid expensive checking if there's no stack objects.
+ if (!HasStackObjects)
+ return false;
+ for (auto OI = FrameInfo.getObjectIndexBegin(),
+ OE = FrameInfo.getObjectIndexEnd(); OI != OE; ++OI)
+ if (!FrameInfo.isSpillSlotObjectIndex(OI))
+ return true;
+ // All stack objects are spill slots.
+ return false;
+ };
// TODO: This could be refined a lot. The attribute is a poor way of
// detecting calls that may require it before argument lowering.
- if (HasStackObjects || F.hasFnAttribute("amdgpu-flat-scratch"))
+ if (hasNonSpillStackObjects() || F.hasFnAttribute("amdgpu-flat-scratch"))
FlatScratchInit = true;
}
@@ -169,6 +173,10 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
S = A.getValueAsString();
if (!S.empty())
S.consumeInteger(0, HighBitsOf32BitAddress);
+
+ S = F.getFnAttribute("amdgpu-gds-size").getValueAsString();
+ if (!S.empty())
+ S.consumeInteger(0, GDSSize);
}
void SIMachineFunctionInfo::limitOccupancy(const MachineFunction &MF) {
@@ -239,6 +247,17 @@ static bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg) {
return false;
}
+/// \p returns true if \p NumLanes slots are available in VGPRs already used for
+/// SGPR spilling.
+//
+// FIXME: This only works after processFunctionBeforeFrameFinalized
+bool SIMachineFunctionInfo::haveFreeLanesForSGPRSpill(const MachineFunction &MF,
+ unsigned NumNeed) const {
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ unsigned WaveSize = ST.getWavefrontSize();
+ return NumVGPRSpillLanes + NumNeed <= WaveSize * SpillVGPRs.size();
+}
+
/// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
int FI) {
@@ -260,7 +279,7 @@ bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
int NumLanes = Size / 4;
- const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
+ const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
// Make sure to handle the case where a wide SGPR spill may span between two
// VGPRs.
@@ -300,26 +319,92 @@ bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
return true;
}
-void SIMachineFunctionInfo::removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI) {
- for (auto &R : SGPRToVGPRSpills)
- MFI.RemoveStackObject(R.first);
+/// Reserve AGPRs or VGPRs to support spilling for FrameIndex \p FI.
+/// Either AGPR is spilled to VGPR to vice versa.
+/// Returns true if a \p FI can be eliminated completely.
+bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF,
+ int FI,
+ bool isAGPRtoVGPR) {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ MachineFrameInfo &FrameInfo = MF.getFrameInfo();
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+
+ assert(ST.hasMAIInsts() && FrameInfo.isSpillSlotObjectIndex(FI));
+
+ auto &Spill = VGPRToAGPRSpills[FI];
+
+ // This has already been allocated.
+ if (!Spill.Lanes.empty())
+ return Spill.FullyAllocated;
+
+ unsigned Size = FrameInfo.getObjectSize(FI);
+ unsigned NumLanes = Size / 4;
+ Spill.Lanes.resize(NumLanes, AMDGPU::NoRegister);
+
+ const TargetRegisterClass &RC =
+ isAGPRtoVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::AGPR_32RegClass;
+ auto Regs = RC.getRegisters();
+
+ auto &SpillRegs = isAGPRtoVGPR ? SpillAGPR : SpillVGPR;
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
+ Spill.FullyAllocated = true;
+
+ // FIXME: Move allocation logic out of MachineFunctionInfo and initialize
+ // once.
+ BitVector OtherUsedRegs;
+ OtherUsedRegs.resize(TRI->getNumRegs());
+
+ const uint32_t *CSRMask =
+ TRI->getCallPreservedMask(MF, MF.getFunction().getCallingConv());
+ if (CSRMask)
+ OtherUsedRegs.setBitsInMask(CSRMask);
+
+ // TODO: Should include register tuples, but doesn't matter with current
+ // usage.
+ for (MCPhysReg Reg : SpillAGPR)
+ OtherUsedRegs.set(Reg);
+ for (MCPhysReg Reg : SpillVGPR)
+ OtherUsedRegs.set(Reg);
+
+ SmallVectorImpl<MCPhysReg>::const_iterator NextSpillReg = Regs.begin();
+ for (unsigned I = 0; I < NumLanes; ++I) {
+ NextSpillReg = std::find_if(
+ NextSpillReg, Regs.end(), [&MRI, &OtherUsedRegs](MCPhysReg Reg) {
+ return MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg) &&
+ !OtherUsedRegs[Reg];
+ });
+
+ if (NextSpillReg == Regs.end()) { // Registers exhausted
+ Spill.FullyAllocated = false;
+ break;
+ }
+
+ OtherUsedRegs.set(*NextSpillReg);
+ SpillRegs.push_back(*NextSpillReg);
+ Spill.Lanes[I] = *NextSpillReg++;
+ }
+
+ return Spill.FullyAllocated;
}
+void SIMachineFunctionInfo::removeDeadFrameIndices(MachineFrameInfo &MFI) {
+ // The FP spill hasn't been inserted yet, so keep it around.
+ for (auto &R : SGPRToVGPRSpills) {
+ if (R.first != FramePointerSaveIndex)
+ MFI.RemoveStackObject(R.first);
+ }
-/// \returns VGPR used for \p Dim' work item ID.
-unsigned SIMachineFunctionInfo::getWorkItemIDVGPR(unsigned Dim) const {
- switch (Dim) {
- case 0:
- assert(hasWorkItemIDX());
- return AMDGPU::VGPR0;
- case 1:
- assert(hasWorkItemIDY());
- return AMDGPU::VGPR1;
- case 2:
- assert(hasWorkItemIDZ());
- return AMDGPU::VGPR2;
+ // All other SPGRs must be allocated on the default stack, so reset the stack
+ // ID.
+ for (int i = MFI.getObjectIndexBegin(), e = MFI.getObjectIndexEnd(); i != e;
+ ++i)
+ if (i != FramePointerSaveIndex)
+ MFI.setStackID(i, TargetStackID::Default);
+
+ for (auto &R : VGPRToAGPRSpills) {
+ if (R.second.FullyAllocated)
+ MFI.RemoveStackObject(R.first);
}
- llvm_unreachable("unexpected dimension");
}
MCPhysReg SIMachineFunctionInfo::getNextUserSGPR() const {
@@ -330,3 +415,97 @@ MCPhysReg SIMachineFunctionInfo::getNextUserSGPR() const {
MCPhysReg SIMachineFunctionInfo::getNextSystemSGPR() const {
return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
}
+
+static yaml::StringValue regToString(unsigned Reg,
+ const TargetRegisterInfo &TRI) {
+ yaml::StringValue Dest;
+ {
+ raw_string_ostream OS(Dest.Value);
+ OS << printReg(Reg, &TRI);
+ }
+ return Dest;
+}
+
+static Optional<yaml::SIArgumentInfo>
+convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo,
+ const TargetRegisterInfo &TRI) {
+ yaml::SIArgumentInfo AI;
+
+ auto convertArg = [&](Optional<yaml::SIArgument> &A,
+ const ArgDescriptor &Arg) {
+ if (!Arg)
+ return false;
+
+ // Create a register or stack argument.
+ yaml::SIArgument SA = yaml::SIArgument::createArgument(Arg.isRegister());
+ if (Arg.isRegister()) {
+ raw_string_ostream OS(SA.RegisterName.Value);
+ OS << printReg(Arg.getRegister(), &TRI);
+ } else
+ SA.StackOffset = Arg.getStackOffset();
+ // Check and update the optional mask.
+ if (Arg.isMasked())
+ SA.Mask = Arg.getMask();
+
+ A = SA;
+ return true;
+ };
+
+ bool Any = false;
+ Any |= convertArg(AI.PrivateSegmentBuffer, ArgInfo.PrivateSegmentBuffer);
+ Any |= convertArg(AI.DispatchPtr, ArgInfo.DispatchPtr);
+ Any |= convertArg(AI.QueuePtr, ArgInfo.QueuePtr);
+ Any |= convertArg(AI.KernargSegmentPtr, ArgInfo.KernargSegmentPtr);
+ Any |= convertArg(AI.DispatchID, ArgInfo.DispatchID);
+ Any |= convertArg(AI.FlatScratchInit, ArgInfo.FlatScratchInit);
+ Any |= convertArg(AI.PrivateSegmentSize, ArgInfo.PrivateSegmentSize);
+ Any |= convertArg(AI.WorkGroupIDX, ArgInfo.WorkGroupIDX);
+ Any |= convertArg(AI.WorkGroupIDY, ArgInfo.WorkGroupIDY);
+ Any |= convertArg(AI.WorkGroupIDZ, ArgInfo.WorkGroupIDZ);
+ Any |= convertArg(AI.WorkGroupInfo, ArgInfo.WorkGroupInfo);
+ Any |= convertArg(AI.PrivateSegmentWaveByteOffset,
+ ArgInfo.PrivateSegmentWaveByteOffset);
+ Any |= convertArg(AI.ImplicitArgPtr, ArgInfo.ImplicitArgPtr);
+ Any |= convertArg(AI.ImplicitBufferPtr, ArgInfo.ImplicitBufferPtr);
+ Any |= convertArg(AI.WorkItemIDX, ArgInfo.WorkItemIDX);
+ Any |= convertArg(AI.WorkItemIDY, ArgInfo.WorkItemIDY);
+ Any |= convertArg(AI.WorkItemIDZ, ArgInfo.WorkItemIDZ);
+
+ if (Any)
+ return AI;
+
+ return None;
+}
+
+yaml::SIMachineFunctionInfo::SIMachineFunctionInfo(
+ const llvm::SIMachineFunctionInfo& MFI,
+ const TargetRegisterInfo &TRI)
+ : ExplicitKernArgSize(MFI.getExplicitKernArgSize()),
+ MaxKernArgAlign(MFI.getMaxKernArgAlign()),
+ LDSSize(MFI.getLDSSize()),
+ IsEntryFunction(MFI.isEntryFunction()),
+ NoSignedZerosFPMath(MFI.hasNoSignedZerosFPMath()),
+ MemoryBound(MFI.isMemoryBound()),
+ WaveLimiter(MFI.needsWaveLimiter()),
+ ScratchRSrcReg(regToString(MFI.getScratchRSrcReg(), TRI)),
+ ScratchWaveOffsetReg(regToString(MFI.getScratchWaveOffsetReg(), TRI)),
+ FrameOffsetReg(regToString(MFI.getFrameOffsetReg(), TRI)),
+ StackPtrOffsetReg(regToString(MFI.getStackPtrOffsetReg(), TRI)),
+ ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)),
+ Mode(MFI.getMode()) {}
+
+void yaml::SIMachineFunctionInfo::mappingImpl(yaml::IO &YamlIO) {
+ MappingTraits<SIMachineFunctionInfo>::mapping(YamlIO, *this);
+}
+
+bool SIMachineFunctionInfo::initializeBaseYamlFields(
+ const yaml::SIMachineFunctionInfo &YamlMFI) {
+ ExplicitKernArgSize = YamlMFI.ExplicitKernArgSize;
+ MaxKernArgAlign = YamlMFI.MaxKernArgAlign;
+ LDSSize = YamlMFI.LDSSize;
+ IsEntryFunction = YamlMFI.IsEntryFunction;
+ NoSignedZerosFPMath = YamlMFI.NoSignedZerosFPMath;
+ MemoryBound = YamlMFI.MemoryBound;
+ WaveLimiter = YamlMFI.WaveLimiter;
+ return false;
+}
diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index ef91d1e43075..f19b20ceb5da 100644
--- a/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -1,9 +1,8 @@
//==- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface --*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -16,13 +15,16 @@
#include "AMDGPUArgumentUsageInfo.h"
#include "AMDGPUMachineFunction.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIInstrInfo.h"
#include "SIRegisterInfo.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SparseBitVector.h"
+#include "llvm/CodeGen/MIRYamlMapping.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
@@ -38,12 +40,19 @@ class MachineFrameInfo;
class MachineFunction;
class TargetRegisterClass;
-class AMDGPUImagePseudoSourceValue : public PseudoSourceValue {
+class AMDGPUPseudoSourceValue : public PseudoSourceValue {
public:
- // TODO: Is the img rsrc useful?
- explicit AMDGPUImagePseudoSourceValue(const TargetInstrInfo &TII) :
- PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) {}
+ enum AMDGPUPSVKind : unsigned {
+ PSVBuffer = PseudoSourceValue::TargetCustom,
+ PSVImage,
+ GWSResource
+ };
+
+protected:
+ AMDGPUPseudoSourceValue(unsigned Kind, const TargetInstrInfo &TII)
+ : PseudoSourceValue(Kind, TII) {}
+public:
bool isConstant(const MachineFrameInfo *) const override {
// This should probably be true for most images, but we will start by being
// conservative.
@@ -59,29 +68,250 @@ public:
}
};
-class AMDGPUBufferPseudoSourceValue : public PseudoSourceValue {
+class AMDGPUBufferPseudoSourceValue final : public AMDGPUPseudoSourceValue {
public:
- explicit AMDGPUBufferPseudoSourceValue(const TargetInstrInfo &TII) :
- PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) { }
+ explicit AMDGPUBufferPseudoSourceValue(const TargetInstrInfo &TII)
+ : AMDGPUPseudoSourceValue(PSVBuffer, TII) {}
- bool isConstant(const MachineFrameInfo *) const override {
- // This should probably be true for most images, but we will start by being
- // conservative.
- return false;
+ static bool classof(const PseudoSourceValue *V) {
+ return V->kind() == PSVBuffer;
}
+};
+class AMDGPUImagePseudoSourceValue final : public AMDGPUPseudoSourceValue {
+public:
+ // TODO: Is the img rsrc useful?
+ explicit AMDGPUImagePseudoSourceValue(const TargetInstrInfo &TII)
+ : AMDGPUPseudoSourceValue(PSVImage, TII) {}
+
+ static bool classof(const PseudoSourceValue *V) {
+ return V->kind() == PSVImage;
+ }
+};
+
+class AMDGPUGWSResourcePseudoSourceValue final : public AMDGPUPseudoSourceValue {
+public:
+ explicit AMDGPUGWSResourcePseudoSourceValue(const TargetInstrInfo &TII)
+ : AMDGPUPseudoSourceValue(GWSResource, TII) {}
+
+ static bool classof(const PseudoSourceValue *V) {
+ return V->kind() == GWSResource;
+ }
+
+ // These are inaccessible memory from IR.
bool isAliased(const MachineFrameInfo *) const override {
- return true;
+ return false;
}
+ // These are inaccessible memory from IR.
bool mayAlias(const MachineFrameInfo *) const override {
- return true;
+ return false;
+ }
+
+ void printCustom(raw_ostream &OS) const override {
+ OS << "GWSResource";
+ }
+};
+
+namespace yaml {
+
+struct SIArgument {
+ bool IsRegister;
+ union {
+ StringValue RegisterName;
+ unsigned StackOffset;
+ };
+ Optional<unsigned> Mask;
+
+ // Default constructor, which creates a stack argument.
+ SIArgument() : IsRegister(false), StackOffset(0) {}
+ SIArgument(const SIArgument &Other) {
+ IsRegister = Other.IsRegister;
+ if (IsRegister) {
+ ::new ((void *)std::addressof(RegisterName))
+ StringValue(Other.RegisterName);
+ } else
+ StackOffset = Other.StackOffset;
+ Mask = Other.Mask;
+ }
+ SIArgument &operator=(const SIArgument &Other) {
+ IsRegister = Other.IsRegister;
+ if (IsRegister) {
+ ::new ((void *)std::addressof(RegisterName))
+ StringValue(Other.RegisterName);
+ } else
+ StackOffset = Other.StackOffset;
+ Mask = Other.Mask;
+ return *this;
+ }
+ ~SIArgument() {
+ if (IsRegister)
+ RegisterName.~StringValue();
+ }
+
+ // Helper to create a register or stack argument.
+ static inline SIArgument createArgument(bool IsReg) {
+ if (IsReg)
+ return SIArgument(IsReg);
+ return SIArgument();
+ }
+
+private:
+ // Construct a register argument.
+ SIArgument(bool) : IsRegister(true), RegisterName() {}
+};
+
+template <> struct MappingTraits<SIArgument> {
+ static void mapping(IO &YamlIO, SIArgument &A) {
+ if (YamlIO.outputting()) {
+ if (A.IsRegister)
+ YamlIO.mapRequired("reg", A.RegisterName);
+ else
+ YamlIO.mapRequired("offset", A.StackOffset);
+ } else {
+ auto Keys = YamlIO.keys();
+ if (is_contained(Keys, "reg")) {
+ A = SIArgument::createArgument(true);
+ YamlIO.mapRequired("reg", A.RegisterName);
+ } else if (is_contained(Keys, "offset"))
+ YamlIO.mapRequired("offset", A.StackOffset);
+ else
+ YamlIO.setError("missing required key 'reg' or 'offset'");
+ }
+ YamlIO.mapOptional("mask", A.Mask);
+ }
+ static const bool flow = true;
+};
+
+struct SIArgumentInfo {
+ Optional<SIArgument> PrivateSegmentBuffer;
+ Optional<SIArgument> DispatchPtr;
+ Optional<SIArgument> QueuePtr;
+ Optional<SIArgument> KernargSegmentPtr;
+ Optional<SIArgument> DispatchID;
+ Optional<SIArgument> FlatScratchInit;
+ Optional<SIArgument> PrivateSegmentSize;
+
+ Optional<SIArgument> WorkGroupIDX;
+ Optional<SIArgument> WorkGroupIDY;
+ Optional<SIArgument> WorkGroupIDZ;
+ Optional<SIArgument> WorkGroupInfo;
+ Optional<SIArgument> PrivateSegmentWaveByteOffset;
+
+ Optional<SIArgument> ImplicitArgPtr;
+ Optional<SIArgument> ImplicitBufferPtr;
+
+ Optional<SIArgument> WorkItemIDX;
+ Optional<SIArgument> WorkItemIDY;
+ Optional<SIArgument> WorkItemIDZ;
+};
+
+template <> struct MappingTraits<SIArgumentInfo> {
+ static void mapping(IO &YamlIO, SIArgumentInfo &AI) {
+ YamlIO.mapOptional("privateSegmentBuffer", AI.PrivateSegmentBuffer);
+ YamlIO.mapOptional("dispatchPtr", AI.DispatchPtr);
+ YamlIO.mapOptional("queuePtr", AI.QueuePtr);
+ YamlIO.mapOptional("kernargSegmentPtr", AI.KernargSegmentPtr);
+ YamlIO.mapOptional("dispatchID", AI.DispatchID);
+ YamlIO.mapOptional("flatScratchInit", AI.FlatScratchInit);
+ YamlIO.mapOptional("privateSegmentSize", AI.PrivateSegmentSize);
+
+ YamlIO.mapOptional("workGroupIDX", AI.WorkGroupIDX);
+ YamlIO.mapOptional("workGroupIDY", AI.WorkGroupIDY);
+ YamlIO.mapOptional("workGroupIDZ", AI.WorkGroupIDZ);
+ YamlIO.mapOptional("workGroupInfo", AI.WorkGroupInfo);
+ YamlIO.mapOptional("privateSegmentWaveByteOffset",
+ AI.PrivateSegmentWaveByteOffset);
+
+ YamlIO.mapOptional("implicitArgPtr", AI.ImplicitArgPtr);
+ YamlIO.mapOptional("implicitBufferPtr", AI.ImplicitBufferPtr);
+
+ YamlIO.mapOptional("workItemIDX", AI.WorkItemIDX);
+ YamlIO.mapOptional("workItemIDY", AI.WorkItemIDY);
+ YamlIO.mapOptional("workItemIDZ", AI.WorkItemIDZ);
+ }
+};
+
+// Default to default mode for default calling convention.
+struct SIMode {
+ bool IEEE = true;
+ bool DX10Clamp = true;
+
+ SIMode() = default;
+
+
+ SIMode(const AMDGPU::SIModeRegisterDefaults &Mode) {
+ IEEE = Mode.IEEE;
+ DX10Clamp = Mode.DX10Clamp;
}
+
+ bool operator ==(const SIMode Other) const {
+ return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp;
+ }
+};
+
+template <> struct MappingTraits<SIMode> {
+ static void mapping(IO &YamlIO, SIMode &Mode) {
+ YamlIO.mapOptional("ieee", Mode.IEEE, true);
+ YamlIO.mapOptional("dx10-clamp", Mode.DX10Clamp, true);
+ }
+};
+
+struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo {
+ uint64_t ExplicitKernArgSize = 0;
+ unsigned MaxKernArgAlign = 0;
+ unsigned LDSSize = 0;
+ bool IsEntryFunction = false;
+ bool NoSignedZerosFPMath = false;
+ bool MemoryBound = false;
+ bool WaveLimiter = false;
+
+ StringValue ScratchRSrcReg = "$private_rsrc_reg";
+ StringValue ScratchWaveOffsetReg = "$scratch_wave_offset_reg";
+ StringValue FrameOffsetReg = "$fp_reg";
+ StringValue StackPtrOffsetReg = "$sp_reg";
+
+ Optional<SIArgumentInfo> ArgInfo;
+ SIMode Mode;
+
+ SIMachineFunctionInfo() = default;
+ SIMachineFunctionInfo(const llvm::SIMachineFunctionInfo &,
+ const TargetRegisterInfo &TRI);
+
+ void mappingImpl(yaml::IO &YamlIO) override;
+ ~SIMachineFunctionInfo() = default;
};
+template <> struct MappingTraits<SIMachineFunctionInfo> {
+ static void mapping(IO &YamlIO, SIMachineFunctionInfo &MFI) {
+ YamlIO.mapOptional("explicitKernArgSize", MFI.ExplicitKernArgSize,
+ UINT64_C(0));
+ YamlIO.mapOptional("maxKernArgAlign", MFI.MaxKernArgAlign, 0u);
+ YamlIO.mapOptional("ldsSize", MFI.LDSSize, 0u);
+ YamlIO.mapOptional("isEntryFunction", MFI.IsEntryFunction, false);
+ YamlIO.mapOptional("noSignedZerosFPMath", MFI.NoSignedZerosFPMath, false);
+ YamlIO.mapOptional("memoryBound", MFI.MemoryBound, false);
+ YamlIO.mapOptional("waveLimiter", MFI.WaveLimiter, false);
+ YamlIO.mapOptional("scratchRSrcReg", MFI.ScratchRSrcReg,
+ StringValue("$private_rsrc_reg"));
+ YamlIO.mapOptional("scratchWaveOffsetReg", MFI.ScratchWaveOffsetReg,
+ StringValue("$scratch_wave_offset_reg"));
+ YamlIO.mapOptional("frameOffsetReg", MFI.FrameOffsetReg,
+ StringValue("$fp_reg"));
+ YamlIO.mapOptional("stackPtrOffsetReg", MFI.StackPtrOffsetReg,
+ StringValue("$sp_reg"));
+ YamlIO.mapOptional("argumentInfo", MFI.ArgInfo);
+ YamlIO.mapOptional("mode", MFI.Mode, SIMode());
+ }
+};
+
+} // end namespace yaml
+
/// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
/// tells the hardware which interpolation parameters to load.
class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
+ friend class GCNTargetMachine;
+
unsigned TIDReg = AMDGPU::NoRegister;
// Registers that may be reserved for spilling purposes. These may be the same
@@ -99,6 +329,9 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
AMDGPUFunctionArgInfo ArgInfo;
+ // State of MODE register, assumed FP mode.
+ AMDGPU::SIModeRegisterDefaults Mode;
+
// Graphics info.
unsigned PSInputAddr = 0;
unsigned PSInputEnable = 0;
@@ -124,16 +357,11 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
// unit. Minimum - first, maximum - second.
std::pair<unsigned, unsigned> WavesPerEU = {0, 0};
- // Stack object indices for work group IDs.
- std::array<int, 3> DebuggerWorkGroupIDStackObjectIndices = {{0, 0, 0}};
-
- // Stack object indices for work item IDs.
- std::array<int, 3> DebuggerWorkItemIDStackObjectIndices = {{0, 0, 0}};
-
DenseMap<const Value *,
std::unique_ptr<const AMDGPUBufferPseudoSourceValue>> BufferPSVs;
DenseMap<const Value *,
std::unique_ptr<const AMDGPUImagePseudoSourceValue>> ImagePSVs;
+ std::unique_ptr<const AMDGPUGWSResourcePseudoSourceValue> GWSResourcePSV;
private:
unsigned LDSWaveSpillSize = 0;
@@ -182,6 +410,7 @@ private:
unsigned GITPtrHigh;
unsigned HighBitsOf32BitAddress;
+ unsigned GDSSize;
// Current recorded maximum possible occupancy.
unsigned Occupancy;
@@ -213,6 +442,15 @@ public:
SGPRSpillVGPRCSR(unsigned V, Optional<int> F) : VGPR(V), FI(F) {}
};
+ struct VGPRSpillToAGPR {
+ SmallVector<MCPhysReg, 32> Lanes;
+ bool FullyAllocated = false;
+ };
+
+ SparseBitVector<> WWMReservedRegs;
+
+ void ReserveWWMRegister(unsigned reg) { WWMReservedRegs.set(reg); }
+
private:
// SGPR->VGPR spilling support.
using SpillRegMask = std::pair<unsigned, unsigned>;
@@ -223,9 +461,25 @@ private:
unsigned NumVGPRSpillLanes = 0;
SmallVector<SGPRSpillVGPRCSR, 2> SpillVGPRs;
+ DenseMap<int, VGPRSpillToAGPR> VGPRToAGPRSpills;
+
+ // AGPRs used for VGPR spills.
+ SmallVector<MCPhysReg, 32> SpillAGPR;
+
+ // VGPRs used for AGPR spills.
+ SmallVector<MCPhysReg, 32> SpillVGPR;
+
+public: // FIXME
+ /// If this is set, an SGPR used for save/restore of the register used for the
+ /// frame pointer.
+ unsigned SGPRForFPSaveRestoreCopy = 0;
+ Optional<int> FramePointerSaveIndex;
+
public:
SIMachineFunctionInfo(const MachineFunction &MF);
+ bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo &YamlMFI);
+
ArrayRef<SpilledReg> getSGPRToVGPRSpills(int FrameIndex) const {
auto I = SGPRToVGPRSpills.find(FrameIndex);
return (I == SGPRToVGPRSpills.end()) ?
@@ -236,8 +490,29 @@ public:
return SpillVGPRs;
}
+ ArrayRef<MCPhysReg> getAGPRSpillVGPRs() const {
+ return SpillAGPR;
+ }
+
+ ArrayRef<MCPhysReg> getVGPRSpillAGPRs() const {
+ return SpillVGPR;
+ }
+
+ MCPhysReg getVGPRToAGPRSpill(int FrameIndex, unsigned Lane) const {
+ auto I = VGPRToAGPRSpills.find(FrameIndex);
+ return (I == VGPRToAGPRSpills.end()) ? (MCPhysReg)AMDGPU::NoRegister
+ : I->second.Lanes[Lane];
+ }
+
+ AMDGPU::SIModeRegisterDefaults getMode() const {
+ return Mode;
+ }
+
+ bool haveFreeLanesForSGPRSpill(const MachineFunction &MF,
+ unsigned NumLane) const;
bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI);
- void removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI);
+ bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR);
+ void removeDeadFrameIndices(MachineFrameInfo &MFI);
bool hasCalculatedTID() const { return TIDReg != 0; };
unsigned getTIDReg() const { return TIDReg; };
@@ -386,8 +661,9 @@ public:
return ArgInfo.getPreloadedValue(Value);
}
- unsigned getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
- return ArgInfo.getPreloadedValue(Value).first->getRegister();
+ Register getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
+ auto Arg = ArgInfo.getPreloadedValue(Value).first;
+ return Arg ? Arg->getRegister() : Register();
}
unsigned getGITPtrHigh() const {
@@ -398,6 +674,10 @@ public:
return HighBitsOf32BitAddress;
}
+ unsigned getGDSSize() const {
+ return GDSSize;
+ }
+
unsigned getNumUserSGPRs() const {
return NumUserSGPRs;
}
@@ -429,6 +709,11 @@ public:
return FrameOffsetReg;
}
+ void setFrameOffsetReg(unsigned Reg) {
+ assert(Reg != 0 && "Should never be unset");
+ FrameOffsetReg = Reg;
+ }
+
void setStackPtrOffsetReg(unsigned Reg) {
assert(Reg != 0 && "Should never be unset");
StackPtrOffsetReg = Reg;
@@ -445,8 +730,6 @@ public:
void setScratchWaveOffsetReg(unsigned Reg) {
assert(Reg != 0 && "Should never be unset");
ScratchWaveOffsetReg = Reg;
- if (isEntryFunction())
- FrameOffsetReg = ScratchWaveOffsetReg;
}
unsigned getQueuePtrUserSGPR() const {
@@ -565,30 +848,6 @@ public:
return WavesPerEU.second;
}
- /// \returns Stack object index for \p Dim's work group ID.
- int getDebuggerWorkGroupIDStackObjectIndex(unsigned Dim) const {
- assert(Dim < 3);
- return DebuggerWorkGroupIDStackObjectIndices[Dim];
- }
-
- /// Sets stack object index for \p Dim's work group ID to \p ObjectIdx.
- void setDebuggerWorkGroupIDStackObjectIndex(unsigned Dim, int ObjectIdx) {
- assert(Dim < 3);
- DebuggerWorkGroupIDStackObjectIndices[Dim] = ObjectIdx;
- }
-
- /// \returns Stack object index for \p Dim's work item ID.
- int getDebuggerWorkItemIDStackObjectIndex(unsigned Dim) const {
- assert(Dim < 3);
- return DebuggerWorkItemIDStackObjectIndices[Dim];
- }
-
- /// Sets stack object index for \p Dim's work item ID to \p ObjectIdx.
- void setDebuggerWorkItemIDStackObjectIndex(unsigned Dim, int ObjectIdx) {
- assert(Dim < 3);
- DebuggerWorkItemIDStackObjectIndices[Dim] = ObjectIdx;
- }
-
/// \returns SGPR used for \p Dim's work group ID.
unsigned getWorkGroupIDSGPR(unsigned Dim) const {
switch (Dim) {
@@ -605,9 +864,6 @@ public:
llvm_unreachable("unexpected dimension");
}
- /// \returns VGPR used for \p Dim' work item ID.
- unsigned getWorkItemIDVGPR(unsigned Dim) const;
-
unsigned getLDSWaveSpillSize() const {
return LDSWaveSpillSize;
}
@@ -630,6 +886,15 @@ public:
return PSV.first->second.get();
}
+ const AMDGPUGWSResourcePseudoSourceValue *getGWSPSV(const SIInstrInfo &TII) {
+ if (!GWSResourcePSV) {
+ GWSResourcePSV =
+ llvm::make_unique<AMDGPUGWSResourcePseudoSourceValue>(TII);
+ }
+
+ return GWSResourcePSV.get();
+ }
+
unsigned getOccupancy() const {
return Occupancy;
}
diff --git a/lib/Target/AMDGPU/SIMachineScheduler.cpp b/lib/Target/AMDGPU/SIMachineScheduler.cpp
index fb7e670068fe..ebbdf80f9567 100644
--- a/lib/Target/AMDGPU/SIMachineScheduler.cpp
+++ b/lib/Target/AMDGPU/SIMachineScheduler.cpp
@@ -1,9 +1,8 @@
//===-- SIMachineScheduler.cpp - SI Scheduler Interface -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -1875,6 +1874,8 @@ void SIScheduleDAGMI::moveLowLatencies() {
bool CopyForLowLat = false;
for (SDep& SuccDep : SU->Succs) {
SUnit *Succ = SuccDep.getSUnit();
+ if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
+ continue;
if (SITII->isLowLatencyInstruction(*Succ->getInstr())) {
CopyForLowLat = true;
}
@@ -1955,7 +1956,7 @@ void SIScheduleDAGMI::schedule()
for (unsigned i = 0, e = (unsigned)SUnits.size(); i != e; ++i) {
SUnit *SU = &SUnits[i];
- MachineOperand *BaseLatOp;
+ const MachineOperand *BaseLatOp;
int64_t OffLatReg;
if (SITII->isLowLatencyInstruction(*SU->getInstr())) {
IsLowLatencySU[i] = 1;
diff --git a/lib/Target/AMDGPU/SIMachineScheduler.h b/lib/Target/AMDGPU/SIMachineScheduler.h
index 0ce68ac6a897..c28a7be4d03a 100644
--- a/lib/Target/AMDGPU/SIMachineScheduler.h
+++ b/lib/Target/AMDGPU/SIMachineScheduler.h
@@ -1,9 +1,8 @@
//===-- SIMachineScheduler.h - SI Scheduler Interface -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
index b4a4e9e33133..4320e6c957a0 100644
--- a/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
+++ b/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
@@ -1,9 +1,8 @@
//===- SIMemoryLegalizer.cpp ----------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -146,7 +145,7 @@ private:
// only contains a single address space.
if ((OrderingAddrSpace == InstrAddrSpace) &&
isPowerOf2_32(uint32_t(InstrAddrSpace)))
- IsCrossAddressSpaceOrdering = false;
+ this->IsCrossAddressSpaceOrdering = false;
}
public:
@@ -353,6 +352,40 @@ public:
};
+class SIGfx10CacheControl : public SIGfx7CacheControl {
+protected:
+ bool CuMode = false;
+
+ /// Sets DLC bit to "true" if present in \p MI. Returns true if \p MI
+ /// is modified, false otherwise.
+ bool enableDLCBit(const MachineBasicBlock::iterator &MI) const {
+ return enableNamedBit<AMDGPU::OpName::dlc>(MI);
+ }
+
+public:
+
+ SIGfx10CacheControl(const GCNSubtarget &ST, bool CuMode) :
+ SIGfx7CacheControl(ST), CuMode(CuMode) {};
+
+ bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
+ SIAtomicScope Scope,
+ SIAtomicAddrSpace AddrSpace) const override;
+
+ bool enableNonTemporal(const MachineBasicBlock::iterator &MI) const override;
+
+ bool insertCacheInvalidate(MachineBasicBlock::iterator &MI,
+ SIAtomicScope Scope,
+ SIAtomicAddrSpace AddrSpace,
+ Position Pos) const override;
+
+ bool insertWait(MachineBasicBlock::iterator &MI,
+ SIAtomicScope Scope,
+ SIAtomicAddrSpace AddrSpace,
+ SIMemOp Op,
+ bool IsCrossAddrSpaceOrdering,
+ Position Pos) const override;
+};
+
class SIMemoryLegalizer final : public MachineFunctionPass {
private:
@@ -418,35 +451,46 @@ void SIMemOpAccess::reportUnsupported(const MachineBasicBlock::iterator &MI,
Optional<std::tuple<SIAtomicScope, SIAtomicAddrSpace, bool>>
SIMemOpAccess::toSIAtomicScope(SyncScope::ID SSID,
SIAtomicAddrSpace InstrScope) const {
- /// TODO: For now assume OpenCL memory model which treats each
- /// address space as having a separate happens-before relation, and
- /// so an instruction only has ordering with respect to the address
- /// space it accesses, and if it accesses multiple address spaces it
- /// does not require ordering of operations in different address
- /// spaces.
- if (SSID == SyncScope::System)
+ if (SSID == SyncScope::System)
+ return std::make_tuple(SIAtomicScope::SYSTEM,
+ SIAtomicAddrSpace::ATOMIC,
+ true);
+ if (SSID == MMI->getAgentSSID())
+ return std::make_tuple(SIAtomicScope::AGENT,
+ SIAtomicAddrSpace::ATOMIC,
+ true);
+ if (SSID == MMI->getWorkgroupSSID())
+ return std::make_tuple(SIAtomicScope::WORKGROUP,
+ SIAtomicAddrSpace::ATOMIC,
+ true);
+ if (SSID == MMI->getWavefrontSSID())
+ return std::make_tuple(SIAtomicScope::WAVEFRONT,
+ SIAtomicAddrSpace::ATOMIC,
+ true);
+ if (SSID == SyncScope::SingleThread)
+ return std::make_tuple(SIAtomicScope::SINGLETHREAD,
+ SIAtomicAddrSpace::ATOMIC,
+ true);
+ if (SSID == MMI->getSystemOneAddressSpaceSSID())
return std::make_tuple(SIAtomicScope::SYSTEM,
SIAtomicAddrSpace::ATOMIC & InstrScope,
false);
- if (SSID == MMI->getAgentSSID())
+ if (SSID == MMI->getAgentOneAddressSpaceSSID())
return std::make_tuple(SIAtomicScope::AGENT,
SIAtomicAddrSpace::ATOMIC & InstrScope,
false);
- if (SSID == MMI->getWorkgroupSSID())
+ if (SSID == MMI->getWorkgroupOneAddressSpaceSSID())
return std::make_tuple(SIAtomicScope::WORKGROUP,
SIAtomicAddrSpace::ATOMIC & InstrScope,
false);
- if (SSID == MMI->getWavefrontSSID())
+ if (SSID == MMI->getWavefrontOneAddressSpaceSSID())
return std::make_tuple(SIAtomicScope::WAVEFRONT,
SIAtomicAddrSpace::ATOMIC & InstrScope,
false);
- if (SSID == SyncScope::SingleThread)
+ if (SSID == MMI->getSingleThreadOneAddressSpaceSSID())
return std::make_tuple(SIAtomicScope::SINGLETHREAD,
SIAtomicAddrSpace::ATOMIC & InstrScope,
false);
- /// TODO: To support HSA Memory Model need to add additional memory
- /// scopes that specify that do require cross address space
- /// ordering.
return None;
}
@@ -613,7 +657,9 @@ std::unique_ptr<SICacheControl> SICacheControl::create(const GCNSubtarget &ST) {
GCNSubtarget::Generation Generation = ST.getGeneration();
if (Generation <= AMDGPUSubtarget::SOUTHERN_ISLANDS)
return make_unique<SIGfx6CacheControl>(ST);
- return make_unique<SIGfx7CacheControl>(ST);
+ if (Generation < AMDGPUSubtarget::GFX10)
+ return make_unique<SIGfx7CacheControl>(ST);
+ return make_unique<SIGfx10CacheControl>(ST, ST.isCuModeEnabled());
}
bool SIGfx6CacheControl::enableLoadCacheBypass(
@@ -722,13 +768,12 @@ bool SIGfx6CacheControl::insertWait(MachineBasicBlock::iterator &MI,
bool VMCnt = false;
bool LGKMCnt = false;
- bool EXPCnt = false;
if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
switch (Scope) {
case SIAtomicScope::SYSTEM:
case SIAtomicScope::AGENT:
- VMCnt = true;
+ VMCnt |= true;
break;
case SIAtomicScope::WORKGROUP:
case SIAtomicScope::WAVEFRONT:
@@ -752,7 +797,7 @@ bool SIGfx6CacheControl::insertWait(MachineBasicBlock::iterator &MI,
// also synchronizing with global/GDS memory as LDS operations
// could be reordered with respect to later global/GDS memory
// operations of the same wave.
- LGKMCnt = IsCrossAddrSpaceOrdering;
+ LGKMCnt |= IsCrossAddrSpaceOrdering;
break;
case SIAtomicScope::WAVEFRONT:
case SIAtomicScope::SINGLETHREAD:
@@ -774,7 +819,7 @@ bool SIGfx6CacheControl::insertWait(MachineBasicBlock::iterator &MI,
// also synchronizing with global/LDS memory as GDS operations
// could be reordered with respect to later global/LDS memory
// operations of the same wave.
- EXPCnt = IsCrossAddrSpaceOrdering;
+ LGKMCnt |= IsCrossAddrSpaceOrdering;
break;
case SIAtomicScope::WORKGROUP:
case SIAtomicScope::WAVEFRONT:
@@ -787,11 +832,11 @@ bool SIGfx6CacheControl::insertWait(MachineBasicBlock::iterator &MI,
}
}
- if (VMCnt || LGKMCnt || EXPCnt) {
+ if (VMCnt || LGKMCnt) {
unsigned WaitCntImmediate =
AMDGPU::encodeWaitcnt(IV,
VMCnt ? 0 : getVmcntBitMask(IV),
- EXPCnt ? 0 : getExpcntBitMask(IV),
+ getExpcntBitMask(IV),
LGKMCnt ? 0 : getLgkmcntBitMask(IV));
BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT)).addImm(WaitCntImmediate);
Changed = true;
@@ -851,6 +896,231 @@ bool SIGfx7CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,
return Changed;
}
+bool SIGfx10CacheControl::enableLoadCacheBypass(
+ const MachineBasicBlock::iterator &MI,
+ SIAtomicScope Scope,
+ SIAtomicAddrSpace AddrSpace) const {
+ assert(MI->mayLoad() && !MI->mayStore());
+ bool Changed = false;
+
+ if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
+ /// TODO Do not set glc for rmw atomic operations as they
+ /// implicitly bypass the L0/L1 caches.
+
+ switch (Scope) {
+ case SIAtomicScope::SYSTEM:
+ case SIAtomicScope::AGENT:
+ Changed |= enableGLCBit(MI);
+ Changed |= enableDLCBit(MI);
+ break;
+ case SIAtomicScope::WORKGROUP:
+ // In WGP mode the waves of a work-group can be executing on either CU of
+ // the WGP. Therefore need to bypass the L0 which is per CU. Otherwise in
+ // CU mode and all waves of a work-group are on the same CU, and so the
+ // L0 does not need to be bypassed.
+ if (!CuMode) Changed |= enableGLCBit(MI);
+ break;
+ case SIAtomicScope::WAVEFRONT:
+ case SIAtomicScope::SINGLETHREAD:
+ // No cache to bypass.
+ break;
+ default:
+ llvm_unreachable("Unsupported synchronization scope");
+ }
+ }
+
+ /// The scratch address space does not need the global memory caches
+ /// to be bypassed as all memory operations by the same thread are
+ /// sequentially consistent, and no other thread can access scratch
+ /// memory.
+
+ /// Other address spaces do not hava a cache.
+
+ return Changed;
+}
+
+bool SIGfx10CacheControl::enableNonTemporal(
+ const MachineBasicBlock::iterator &MI) const {
+ assert(MI->mayLoad() ^ MI->mayStore());
+ bool Changed = false;
+
+ Changed |= enableSLCBit(MI);
+ /// TODO for store (non-rmw atomic) instructions also enableGLCBit(MI)
+
+ return Changed;
+}
+
+bool SIGfx10CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,
+ SIAtomicScope Scope,
+ SIAtomicAddrSpace AddrSpace,
+ Position Pos) const {
+ bool Changed = false;
+
+ MachineBasicBlock &MBB = *MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+
+ if (Pos == Position::AFTER)
+ ++MI;
+
+ if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
+ switch (Scope) {
+ case SIAtomicScope::SYSTEM:
+ case SIAtomicScope::AGENT:
+ BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_GL0_INV));
+ BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_GL1_INV));
+ Changed = true;
+ break;
+ case SIAtomicScope::WORKGROUP:
+ // In WGP mode the waves of a work-group can be executing on either CU of
+ // the WGP. Therefore need to invalidate the L0 which is per CU. Otherwise
+ // in CU mode and all waves of a work-group are on the same CU, and so the
+ // L0 does not need to be invalidated.
+ if (!CuMode) {
+ BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_GL0_INV));
+ Changed = true;
+ }
+ break;
+ case SIAtomicScope::WAVEFRONT:
+ case SIAtomicScope::SINGLETHREAD:
+ // No cache to invalidate.
+ break;
+ default:
+ llvm_unreachable("Unsupported synchronization scope");
+ }
+ }
+
+ /// The scratch address space does not need the global memory cache
+ /// to be flushed as all memory operations by the same thread are
+ /// sequentially consistent, and no other thread can access scratch
+ /// memory.
+
+ /// Other address spaces do not hava a cache.
+
+ if (Pos == Position::AFTER)
+ --MI;
+
+ return Changed;
+}
+
+bool SIGfx10CacheControl::insertWait(MachineBasicBlock::iterator &MI,
+ SIAtomicScope Scope,
+ SIAtomicAddrSpace AddrSpace,
+ SIMemOp Op,
+ bool IsCrossAddrSpaceOrdering,
+ Position Pos) const {
+ bool Changed = false;
+
+ MachineBasicBlock &MBB = *MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+
+ if (Pos == Position::AFTER)
+ ++MI;
+
+ bool VMCnt = false;
+ bool VSCnt = false;
+ bool LGKMCnt = false;
+
+ if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
+ switch (Scope) {
+ case SIAtomicScope::SYSTEM:
+ case SIAtomicScope::AGENT:
+ if ((Op & SIMemOp::LOAD) != SIMemOp::NONE)
+ VMCnt |= true;
+ if ((Op & SIMemOp::STORE) != SIMemOp::NONE)
+ VSCnt |= true;
+ break;
+ case SIAtomicScope::WORKGROUP:
+ // In WGP mode the waves of a work-group can be executing on either CU of
+ // the WGP. Therefore need to wait for operations to complete to ensure
+ // they are visible to waves in the other CU as the L0 is per CU.
+ // Otherwise in CU mode and all waves of a work-group are on the same CU
+ // which shares the same L0.
+ if (!CuMode) {
+ if ((Op & SIMemOp::LOAD) != SIMemOp::NONE)
+ VMCnt |= true;
+ if ((Op & SIMemOp::STORE) != SIMemOp::NONE)
+ VSCnt |= true;
+ }
+ break;
+ case SIAtomicScope::WAVEFRONT:
+ case SIAtomicScope::SINGLETHREAD:
+ // The L0 cache keeps all memory operations in order for
+ // work-items in the same wavefront.
+ break;
+ default:
+ llvm_unreachable("Unsupported synchronization scope");
+ }
+ }
+
+ if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
+ switch (Scope) {
+ case SIAtomicScope::SYSTEM:
+ case SIAtomicScope::AGENT:
+ case SIAtomicScope::WORKGROUP:
+ // If no cross address space ordering then an LDS waitcnt is not
+ // needed as LDS operations for all waves are executed in a
+ // total global ordering as observed by all waves. Required if
+ // also synchronizing with global/GDS memory as LDS operations
+ // could be reordered with respect to later global/GDS memory
+ // operations of the same wave.
+ LGKMCnt |= IsCrossAddrSpaceOrdering;
+ break;
+ case SIAtomicScope::WAVEFRONT:
+ case SIAtomicScope::SINGLETHREAD:
+ // The LDS keeps all memory operations in order for
+ // the same wavesfront.
+ break;
+ default:
+ llvm_unreachable("Unsupported synchronization scope");
+ }
+ }
+
+ if ((AddrSpace & SIAtomicAddrSpace::GDS) != SIAtomicAddrSpace::NONE) {
+ switch (Scope) {
+ case SIAtomicScope::SYSTEM:
+ case SIAtomicScope::AGENT:
+ // If no cross address space ordering then an GDS waitcnt is not
+ // needed as GDS operations for all waves are executed in a
+ // total global ordering as observed by all waves. Required if
+ // also synchronizing with global/LDS memory as GDS operations
+ // could be reordered with respect to later global/LDS memory
+ // operations of the same wave.
+ LGKMCnt |= IsCrossAddrSpaceOrdering;
+ break;
+ case SIAtomicScope::WORKGROUP:
+ case SIAtomicScope::WAVEFRONT:
+ case SIAtomicScope::SINGLETHREAD:
+ // The GDS keeps all memory operations in order for
+ // the same work-group.
+ break;
+ default:
+ llvm_unreachable("Unsupported synchronization scope");
+ }
+ }
+
+ if (VMCnt || LGKMCnt) {
+ unsigned WaitCntImmediate =
+ AMDGPU::encodeWaitcnt(IV,
+ VMCnt ? 0 : getVmcntBitMask(IV),
+ getExpcntBitMask(IV),
+ LGKMCnt ? 0 : getLgkmcntBitMask(IV));
+ BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT)).addImm(WaitCntImmediate);
+ Changed = true;
+ }
+
+ if (VSCnt) {
+ BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT_VSCNT))
+ .addReg(AMDGPU::SGPR_NULL, RegState::Undef)
+ .addImm(0);
+ Changed = true;
+ }
+
+ if (Pos == Position::AFTER)
+ --MI;
+
+ return Changed;
+}
+
bool SIMemoryLegalizer::removeAtomicPseudoMIs() {
if (AtomicPseudoMIs.empty())
return false;
diff --git a/lib/Target/AMDGPU/SIModeRegister.cpp b/lib/Target/AMDGPU/SIModeRegister.cpp
index 883fd308f2f4..a5edd7b3554a 100644
--- a/lib/Target/AMDGPU/SIModeRegister.cpp
+++ b/lib/Target/AMDGPU/SIModeRegister.cpp
@@ -1,9 +1,8 @@
//===-- SIModeRegister.cpp - Mode Register --------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -45,7 +44,7 @@ struct Status {
Status() : Mask(0), Mode(0){};
- Status(unsigned Mask, unsigned Mode) : Mask(Mask), Mode(Mode) {
+ Status(unsigned NewMask, unsigned NewMode) : Mask(NewMask), Mode(NewMode) {
Mode &= Mask;
};
diff --git a/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp b/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
index ebcad30a1866..3227bff20513 100644
--- a/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
+++ b/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
@@ -1,9 +1,8 @@
//===-- SIOptimizeExecMasking.cpp -----------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -57,13 +56,16 @@ char SIOptimizeExecMasking::ID = 0;
char &llvm::SIOptimizeExecMaskingID = SIOptimizeExecMasking::ID;
/// If \p MI is a copy from exec, return the register copied to.
-static unsigned isCopyFromExec(const MachineInstr &MI) {
+static unsigned isCopyFromExec(const MachineInstr &MI, const GCNSubtarget &ST) {
switch (MI.getOpcode()) {
case AMDGPU::COPY:
case AMDGPU::S_MOV_B64:
- case AMDGPU::S_MOV_B64_term: {
+ case AMDGPU::S_MOV_B64_term:
+ case AMDGPU::S_MOV_B32:
+ case AMDGPU::S_MOV_B32_term: {
const MachineOperand &Src = MI.getOperand(1);
- if (Src.isReg() && Src.getReg() == AMDGPU::EXEC)
+ if (Src.isReg() &&
+ Src.getReg() == (ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC))
return MI.getOperand(0).getReg();
}
}
@@ -72,16 +74,20 @@ static unsigned isCopyFromExec(const MachineInstr &MI) {
}
/// If \p MI is a copy to exec, return the register copied from.
-static unsigned isCopyToExec(const MachineInstr &MI) {
+static unsigned isCopyToExec(const MachineInstr &MI, const GCNSubtarget &ST) {
switch (MI.getOpcode()) {
case AMDGPU::COPY:
- case AMDGPU::S_MOV_B64: {
+ case AMDGPU::S_MOV_B64:
+ case AMDGPU::S_MOV_B32: {
const MachineOperand &Dst = MI.getOperand(0);
- if (Dst.isReg() && Dst.getReg() == AMDGPU::EXEC && MI.getOperand(1).isReg())
+ if (Dst.isReg() &&
+ Dst.getReg() == (ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC) &&
+ MI.getOperand(1).isReg())
return MI.getOperand(1).getReg();
break;
}
case AMDGPU::S_MOV_B64_term:
+ case AMDGPU::S_MOV_B32_term:
llvm_unreachable("should have been replaced");
}
@@ -106,6 +112,23 @@ static unsigned isLogicalOpOnExec(const MachineInstr &MI) {
const MachineOperand &Src2 = MI.getOperand(2);
if (Src2.isReg() && Src2.getReg() == AMDGPU::EXEC)
return MI.getOperand(0).getReg();
+ break;
+ }
+ case AMDGPU::S_AND_B32:
+ case AMDGPU::S_OR_B32:
+ case AMDGPU::S_XOR_B32:
+ case AMDGPU::S_ANDN2_B32:
+ case AMDGPU::S_ORN2_B32:
+ case AMDGPU::S_NAND_B32:
+ case AMDGPU::S_NOR_B32:
+ case AMDGPU::S_XNOR_B32: {
+ const MachineOperand &Src1 = MI.getOperand(1);
+ if (Src1.isReg() && Src1.getReg() == AMDGPU::EXEC_LO)
+ return MI.getOperand(0).getReg();
+ const MachineOperand &Src2 = MI.getOperand(2);
+ if (Src2.isReg() && Src2.getReg() == AMDGPU::EXEC_LO)
+ return MI.getOperand(0).getReg();
+ break;
}
}
@@ -130,6 +153,22 @@ static unsigned getSaveExecOp(unsigned Opc) {
return AMDGPU::S_NOR_SAVEEXEC_B64;
case AMDGPU::S_XNOR_B64:
return AMDGPU::S_XNOR_SAVEEXEC_B64;
+ case AMDGPU::S_AND_B32:
+ return AMDGPU::S_AND_SAVEEXEC_B32;
+ case AMDGPU::S_OR_B32:
+ return AMDGPU::S_OR_SAVEEXEC_B32;
+ case AMDGPU::S_XOR_B32:
+ return AMDGPU::S_XOR_SAVEEXEC_B32;
+ case AMDGPU::S_ANDN2_B32:
+ return AMDGPU::S_ANDN2_SAVEEXEC_B32;
+ case AMDGPU::S_ORN2_B32:
+ return AMDGPU::S_ORN2_SAVEEXEC_B32;
+ case AMDGPU::S_NAND_B32:
+ return AMDGPU::S_NAND_SAVEEXEC_B32;
+ case AMDGPU::S_NOR_B32:
+ return AMDGPU::S_NOR_SAVEEXEC_B32;
+ case AMDGPU::S_XNOR_B32:
+ return AMDGPU::S_XNOR_SAVEEXEC_B32;
default:
return AMDGPU::INSTRUCTION_LIST_END;
}
@@ -140,7 +179,8 @@ static unsigned getSaveExecOp(unsigned Opc) {
// these is expected per block.
static bool removeTerminatorBit(const SIInstrInfo &TII, MachineInstr &MI) {
switch (MI.getOpcode()) {
- case AMDGPU::S_MOV_B64_term: {
+ case AMDGPU::S_MOV_B64_term:
+ case AMDGPU::S_MOV_B32_term: {
MI.setDesc(TII.get(AMDGPU::COPY));
return true;
}
@@ -150,12 +190,30 @@ static bool removeTerminatorBit(const SIInstrInfo &TII, MachineInstr &MI) {
MI.setDesc(TII.get(AMDGPU::S_XOR_B64));
return true;
}
+ case AMDGPU::S_XOR_B32_term: {
+ // This is only a terminator to get the correct spill code placement during
+ // register allocation.
+ MI.setDesc(TII.get(AMDGPU::S_XOR_B32));
+ return true;
+ }
+ case AMDGPU::S_OR_B32_term: {
+ // This is only a terminator to get the correct spill code placement during
+ // register allocation.
+ MI.setDesc(TII.get(AMDGPU::S_OR_B32));
+ return true;
+ }
case AMDGPU::S_ANDN2_B64_term: {
// This is only a terminator to get the correct spill code placement during
// register allocation.
MI.setDesc(TII.get(AMDGPU::S_ANDN2_B64));
return true;
}
+ case AMDGPU::S_ANDN2_B32_term: {
+ // This is only a terminator to get the correct spill code placement during
+ // register allocation.
+ MI.setDesc(TII.get(AMDGPU::S_ANDN2_B32));
+ return true;
+ }
default:
return false;
}
@@ -178,6 +236,7 @@ static MachineBasicBlock::reverse_iterator fixTerminators(
static MachineBasicBlock::reverse_iterator findExecCopy(
const SIInstrInfo &TII,
+ const GCNSubtarget &ST,
MachineBasicBlock &MBB,
MachineBasicBlock::reverse_iterator I,
unsigned CopyToExec) {
@@ -185,7 +244,7 @@ static MachineBasicBlock::reverse_iterator findExecCopy(
auto E = MBB.rend();
for (unsigned N = 0; N <= InstLimit && I != E; ++I, ++N) {
- unsigned CopyFromExec = isCopyFromExec(*I);
+ unsigned CopyFromExec = isCopyFromExec(*I, ST);
if (CopyFromExec != AMDGPU::NoRegister)
return I;
}
@@ -194,8 +253,8 @@ static MachineBasicBlock::reverse_iterator findExecCopy(
}
// XXX - Seems LivePhysRegs doesn't work correctly since it will incorrectly
-// repor tthe register as unavailable because a super-register with a lane mask
-// as unavailable.
+// report the register as unavailable because a super-register with a lane mask
+// is unavailable.
static bool isLiveOut(const MachineBasicBlock &MBB, unsigned Reg) {
for (MachineBasicBlock *Succ : MBB.successors()) {
if (Succ->isLiveIn(Reg))
@@ -212,6 +271,7 @@ bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIRegisterInfo *TRI = ST.getRegisterInfo();
const SIInstrInfo *TII = ST.getInstrInfo();
+ unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
// Optimize sequences emitted for control flow lowering. They are originally
// emitted as the separate operations because spill code may need to be
@@ -230,13 +290,13 @@ bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) {
if (I == E)
continue;
- unsigned CopyToExec = isCopyToExec(*I);
+ unsigned CopyToExec = isCopyToExec(*I, ST);
if (CopyToExec == AMDGPU::NoRegister)
continue;
// Scan backwards to find the def.
auto CopyToExecInst = &*I;
- auto CopyFromExecInst = findExecCopy(*TII, MBB, I, CopyToExec);
+ auto CopyFromExecInst = findExecCopy(*TII, ST, MBB, I, CopyToExec);
if (CopyFromExecInst == E) {
auto PrepareExecInst = std::next(I);
if (PrepareExecInst == E)
@@ -246,7 +306,7 @@ bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) {
isLogicalOpOnExec(*PrepareExecInst) == CopyToExec) {
LLVM_DEBUG(dbgs() << "Fold exec copy: " << *PrepareExecInst);
- PrepareExecInst->getOperand(0).setReg(AMDGPU::EXEC);
+ PrepareExecInst->getOperand(0).setReg(Exec);
LLVM_DEBUG(dbgs() << "into: " << *PrepareExecInst << '\n');
@@ -269,7 +329,7 @@ bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) {
for (MachineBasicBlock::iterator J
= std::next(CopyFromExecInst->getIterator()), JE = I->getIterator();
J != JE; ++J) {
- if (SaveExecInst && J->readsRegister(AMDGPU::EXEC, TRI)) {
+ if (SaveExecInst && J->readsRegister(Exec, TRI)) {
LLVM_DEBUG(dbgs() << "exec read prevents saveexec: " << *J << '\n');
// Make sure this is inserted after any VALU ops that may have been
// scheduled in between.
@@ -353,7 +413,7 @@ bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) {
CopyToExecInst->eraseFromParent();
for (MachineInstr *OtherInst : OtherUseInsts) {
- OtherInst->substituteRegister(CopyToExec, AMDGPU::EXEC,
+ OtherInst->substituteRegister(CopyToExec, Exec,
AMDGPU::NoSubRegister, *TRI);
}
}
diff --git a/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp b/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
index c671fed34bdf..7e10316eab92 100644
--- a/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
+++ b/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
@@ -1,9 +1,8 @@
//===-- SIOptimizeExecMaskingPreRA.cpp ------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -34,10 +33,22 @@ using namespace llvm;
namespace {
class SIOptimizeExecMaskingPreRA : public MachineFunctionPass {
+private:
+ const SIRegisterInfo *TRI;
+ const SIInstrInfo *TII;
+ MachineRegisterInfo *MRI;
+
public:
- static char ID;
+ MachineBasicBlock::iterator skipIgnoreExecInsts(
+ MachineBasicBlock::iterator I, MachineBasicBlock::iterator E) const;
+
+ MachineBasicBlock::iterator skipIgnoreExecInstsTrivialSucc(
+ MachineBasicBlock *&MBB,
+ MachineBasicBlock::iterator It) const;
public:
+ static char ID;
+
SIOptimizeExecMaskingPreRA() : MachineFunctionPass(ID) {
initializeSIOptimizeExecMaskingPreRAPass(*PassRegistry::getPassRegistry());
}
@@ -71,38 +82,93 @@ FunctionPass *llvm::createSIOptimizeExecMaskingPreRAPass() {
return new SIOptimizeExecMaskingPreRA();
}
-static bool isEndCF(const MachineInstr& MI, const SIRegisterInfo* TRI) {
+static bool isEndCF(const MachineInstr &MI, const SIRegisterInfo *TRI,
+ const GCNSubtarget &ST) {
+ if (ST.isWave32()) {
+ return MI.getOpcode() == AMDGPU::S_OR_B32 &&
+ MI.modifiesRegister(AMDGPU::EXEC_LO, TRI);
+ }
+
return MI.getOpcode() == AMDGPU::S_OR_B64 &&
MI.modifiesRegister(AMDGPU::EXEC, TRI);
}
-static bool isFullExecCopy(const MachineInstr& MI) {
- return MI.isFullCopy() && MI.getOperand(1).getReg() == AMDGPU::EXEC;
+static bool isFullExecCopy(const MachineInstr& MI, const GCNSubtarget& ST) {
+ unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
+
+ if (MI.isCopy() && MI.getOperand(1).getReg() == Exec) {
+ assert(MI.isFullCopy());
+ return true;
+ }
+
+ return false;
}
static unsigned getOrNonExecReg(const MachineInstr &MI,
- const SIInstrInfo &TII) {
+ const SIInstrInfo &TII,
+ const GCNSubtarget& ST) {
+ unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
auto Op = TII.getNamedOperand(MI, AMDGPU::OpName::src1);
- if (Op->isReg() && Op->getReg() != AMDGPU::EXEC)
+ if (Op->isReg() && Op->getReg() != Exec)
return Op->getReg();
Op = TII.getNamedOperand(MI, AMDGPU::OpName::src0);
- if (Op->isReg() && Op->getReg() != AMDGPU::EXEC)
+ if (Op->isReg() && Op->getReg() != Exec)
return Op->getReg();
return AMDGPU::NoRegister;
}
static MachineInstr* getOrExecSource(const MachineInstr &MI,
const SIInstrInfo &TII,
- const MachineRegisterInfo &MRI) {
- auto SavedExec = getOrNonExecReg(MI, TII);
+ const MachineRegisterInfo &MRI,
+ const GCNSubtarget& ST) {
+ auto SavedExec = getOrNonExecReg(MI, TII, ST);
if (SavedExec == AMDGPU::NoRegister)
return nullptr;
auto SaveExecInst = MRI.getUniqueVRegDef(SavedExec);
- if (!SaveExecInst || !isFullExecCopy(*SaveExecInst))
+ if (!SaveExecInst || !isFullExecCopy(*SaveExecInst, ST))
return nullptr;
return SaveExecInst;
}
+/// Skip over instructions that don't care about the exec mask.
+MachineBasicBlock::iterator SIOptimizeExecMaskingPreRA::skipIgnoreExecInsts(
+ MachineBasicBlock::iterator I, MachineBasicBlock::iterator E) const {
+ for ( ; I != E; ++I) {
+ if (TII->mayReadEXEC(*MRI, *I))
+ break;
+ }
+
+ return I;
+}
+
+// Skip to the next instruction, ignoring debug instructions, and trivial block
+// boundaries (blocks that have one (typically fallthrough) successor, and the
+// successor has one predecessor.
+MachineBasicBlock::iterator
+SIOptimizeExecMaskingPreRA::skipIgnoreExecInstsTrivialSucc(
+ MachineBasicBlock *&MBB,
+ MachineBasicBlock::iterator It) const {
+
+ do {
+ It = skipIgnoreExecInsts(It, MBB->end());
+ if (It != MBB->end() || MBB->succ_size() != 1)
+ break;
+
+ // If there is one trivial successor, advance to the next block.
+ MachineBasicBlock *Succ = *MBB->succ_begin();
+
+ // TODO: Is this really necessary?
+ if (!MBB->isLayoutSuccessor(Succ))
+ break;
+
+ It = Succ->begin();
+ MBB = Succ;
+ } while (true);
+
+ return It;
+}
+
+
// Optimize sequence
// %sel = V_CNDMASK_B32_e64 0, 1, %cc
// %cmp = V_CMP_NE_U32 1, %1
@@ -125,10 +191,11 @@ static unsigned optimizeVcndVcmpPair(MachineBasicBlock &MBB,
LiveIntervals *LIS) {
const SIRegisterInfo *TRI = ST.getRegisterInfo();
const SIInstrInfo *TII = ST.getInstrInfo();
- const unsigned AndOpc = AMDGPU::S_AND_B64;
- const unsigned Andn2Opc = AMDGPU::S_ANDN2_B64;
- const unsigned CondReg = AMDGPU::VCC;
- const unsigned ExecReg = AMDGPU::EXEC;
+ bool Wave32 = ST.isWave32();
+ const unsigned AndOpc = Wave32 ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
+ const unsigned Andn2Opc = Wave32 ? AMDGPU::S_ANDN2_B32 : AMDGPU::S_ANDN2_B64;
+ const unsigned CondReg = Wave32 ? AMDGPU::VCC_LO : AMDGPU::VCC;
+ const unsigned ExecReg = Wave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
auto I = llvm::find_if(MBB.terminators(), [](const MachineInstr &MI) {
unsigned Opc = MI.getOpcode();
@@ -172,6 +239,10 @@ static unsigned optimizeVcndVcmpPair(MachineBasicBlock &MBB,
if (!Sel || Sel->getOpcode() != AMDGPU::V_CNDMASK_B32_e64)
return AMDGPU::NoRegister;
+ if (TII->hasModifiersSet(*Sel, AMDGPU::OpName::src0_modifiers) ||
+ TII->hasModifiersSet(*Sel, AMDGPU::OpName::src1_modifiers))
+ return AMDGPU::NoRegister;
+
Op1 = TII->getNamedOperand(*Sel, AMDGPU::OpName::src0);
Op2 = TII->getNamedOperand(*Sel, AMDGPU::OpName::src1);
MachineOperand *CC = TII->getNamedOperand(*Sel, AMDGPU::OpName::src2);
@@ -187,7 +258,7 @@ static unsigned optimizeVcndVcmpPair(MachineBasicBlock &MBB,
MachineInstr *Andn2 = BuildMI(MBB, *And, And->getDebugLoc(),
TII->get(Andn2Opc), And->getOperand(0).getReg())
.addReg(ExecReg)
- .addReg(CCReg, CC->getSubReg());
+ .addReg(CCReg, 0, CC->getSubReg());
And->eraseFromParent();
LIS->InsertMachineInstrInMaps(*Andn2);
@@ -224,11 +295,14 @@ bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) {
return false;
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
- const SIRegisterInfo *TRI = ST.getRegisterInfo();
- const SIInstrInfo *TII = ST.getInstrInfo();
+ TRI = ST.getRegisterInfo();
+ TII = ST.getInstrInfo();
+ MRI = &MF.getRegInfo();
+
MachineRegisterInfo &MRI = MF.getRegInfo();
LiveIntervals *LIS = &getAnalysis<LiveIntervals>();
DenseSet<unsigned> RecalcRegs({AMDGPU::EXEC_LO, AMDGPU::EXEC_HI});
+ unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
bool Changed = false;
for (MachineBasicBlock &MBB : MF) {
@@ -248,9 +322,10 @@ bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) {
// Skip this if the endpgm has any implicit uses, otherwise we would need
// to be careful to update / remove them.
+ // S_ENDPGM always has a single imm operand that is not used other than to
+ // end up in the encoding
MachineInstr &Term = MBB.back();
- if (Term.getOpcode() != AMDGPU::S_ENDPGM ||
- Term.getNumOperands() != 0)
+ if (Term.getOpcode() != AMDGPU::S_ENDPGM || Term.getNumOperands() != 1)
continue;
SmallVector<MachineBasicBlock*, 4> Blocks({&MBB});
@@ -304,32 +379,21 @@ bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) {
}
// Try to collapse adjacent endifs.
- auto Lead = MBB.begin(), E = MBB.end();
- if (MBB.succ_size() != 1 || Lead == E || !isEndCF(*Lead, TRI))
- continue;
-
- const MachineBasicBlock* Succ = *MBB.succ_begin();
- if (!MBB.isLayoutSuccessor(Succ))
- continue;
-
- auto I = std::next(Lead);
-
- for ( ; I != E; ++I)
- if (!TII->isSALU(*I) || I->readsRegister(AMDGPU::EXEC, TRI))
- break;
-
- if (I != E)
+ auto E = MBB.end();
+ auto Lead = skipDebugInstructionsForward(MBB.begin(), E);
+ if (MBB.succ_size() != 1 || Lead == E || !isEndCF(*Lead, TRI, ST))
continue;
- const auto NextLead = Succ->begin();
- if (NextLead == Succ->end() || !isEndCF(*NextLead, TRI) ||
- !getOrExecSource(*NextLead, *TII, MRI))
+ MachineBasicBlock *TmpMBB = &MBB;
+ auto NextLead = skipIgnoreExecInstsTrivialSucc(TmpMBB, std::next(Lead));
+ if (NextLead == TmpMBB->end() || !isEndCF(*NextLead, TRI, ST) ||
+ !getOrExecSource(*NextLead, *TII, MRI, ST))
continue;
LLVM_DEBUG(dbgs() << "Redundant EXEC = S_OR_B64 found: " << *Lead << '\n');
- auto SaveExec = getOrExecSource(*Lead, *TII, MRI);
- unsigned SaveExecReg = getOrNonExecReg(*Lead, *TII);
+ auto SaveExec = getOrExecSource(*Lead, *TII, MRI, ST);
+ unsigned SaveExecReg = getOrNonExecReg(*Lead, *TII, ST);
for (auto &Op : Lead->operands()) {
if (Op.isReg())
RecalcRegs.insert(Op.getReg());
@@ -363,7 +427,7 @@ bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) {
if (SafeToReplace) {
LIS->RemoveMachineInstrFromMaps(*SaveExec);
SaveExec->eraseFromParent();
- MRI.replaceRegWith(SavedExec, AMDGPU::EXEC);
+ MRI.replaceRegWith(SavedExec, Exec);
LIS->removeInterval(SavedExec);
}
}
@@ -375,8 +439,7 @@ bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) {
if (!MRI.reg_empty(Reg))
LIS->createAndComputeVirtRegInterval(Reg);
} else {
- for (MCRegUnitIterator U(Reg, TRI); U.isValid(); ++U)
- LIS->removeRegUnit(*U);
+ LIS->removeAllRegUnitsForPhysReg(Reg);
}
}
}
diff --git a/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
index 2d43d5d05ef6..2d71abc0612a 100644
--- a/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
+++ b/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
@@ -1,9 +1,8 @@
//===- SIPeepholeSDWA.cpp - Peephole optimization for SDWA instructions ---===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -348,8 +347,8 @@ uint64_t SDWASrcOperand::getSrcMods(const SIInstrInfo *TII,
if (Abs || Neg) {
assert(!Sext &&
"Float and integer src modifiers can't be set simulteniously");
- Mods |= Abs ? SISrcMods::ABS : 0;
- Mods ^= Neg ? SISrcMods::NEG : 0;
+ Mods |= Abs ? SISrcMods::ABS : 0u;
+ Mods ^= Neg ? SISrcMods::NEG : 0u;
} else if (Sext) {
Mods |= SISrcMods::SEXT;
}
@@ -419,7 +418,9 @@ bool SDWASrcOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) {
}
assert(Src && Src->isReg());
- if ((MI.getOpcode() == AMDGPU::V_MAC_F16_sdwa ||
+ if ((MI.getOpcode() == AMDGPU::V_FMAC_F16_sdwa ||
+ MI.getOpcode() == AMDGPU::V_FMAC_F32_sdwa ||
+ MI.getOpcode() == AMDGPU::V_MAC_F16_sdwa ||
MI.getOpcode() == AMDGPU::V_MAC_F32_sdwa) &&
!isSameReg(*Src, *getReplacedOperand())) {
// In case of v_mac_f16/32_sdwa this pass can try to apply src operand to
@@ -461,7 +462,9 @@ MachineInstr *SDWADstOperand::potentialToConvert(const SIInstrInfo *TII) {
bool SDWADstOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) {
// Replace vdst operand in MI with target operand. Set dst_sel and dst_unused
- if ((MI.getOpcode() == AMDGPU::V_MAC_F16_sdwa ||
+ if ((MI.getOpcode() == AMDGPU::V_FMAC_F16_sdwa ||
+ MI.getOpcode() == AMDGPU::V_FMAC_F32_sdwa ||
+ MI.getOpcode() == AMDGPU::V_MAC_F16_sdwa ||
MI.getOpcode() == AMDGPU::V_MAC_F32_sdwa) &&
getDstSel() != AMDGPU::SDWA::DWORD) {
// v_mac_f16/32_sdwa allow dst_sel to be equal only to DWORD
@@ -951,7 +954,8 @@ bool SIPeepholeSDWA::isConvertibleToSDWA(MachineInstr &MI,
if (TII->isVOPC(Opc)) {
if (!ST.hasSDWASdst()) {
const MachineOperand *SDst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst);
- if (SDst && SDst->getReg() != AMDGPU::VCC)
+ if (SDst && (SDst->getReg() != AMDGPU::VCC &&
+ SDst->getReg() != AMDGPU::VCC_LO))
return false;
}
@@ -965,10 +969,16 @@ bool SIPeepholeSDWA::isConvertibleToSDWA(MachineInstr &MI,
return false;
}
- if (!ST.hasSDWAMac() && (Opc == AMDGPU::V_MAC_F16_e32 ||
+ if (!ST.hasSDWAMac() && (Opc == AMDGPU::V_FMAC_F16_e32 ||
+ Opc == AMDGPU::V_FMAC_F32_e32 ||
+ Opc == AMDGPU::V_MAC_F16_e32 ||
Opc == AMDGPU::V_MAC_F32_e32))
return false;
+ // Check if target supports this SDWA opcode
+ if (TII->pseudoToMCOpcode(Opc) == -1)
+ return false;
+
// FIXME: has SDWA but require handling of implicit VCC use
if (Opc == AMDGPU::V_CNDMASK_B32_e32)
return false;
@@ -1010,7 +1020,7 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
SDWAInst.add(*Dst);
} else {
assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::sdst) != -1);
- SDWAInst.addReg(AMDGPU::VCC, RegState::Define);
+ SDWAInst.addReg(TRI->getVCC(), RegState::Define);
}
// Copy src0, initialize src0_modifiers. All sdwa instructions has src0 and
@@ -1039,7 +1049,9 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
SDWAInst.add(*Src1);
}
- if (SDWAOpcode == AMDGPU::V_MAC_F16_sdwa ||
+ if (SDWAOpcode == AMDGPU::V_FMAC_F16_sdwa ||
+ SDWAOpcode == AMDGPU::V_FMAC_F32_sdwa ||
+ SDWAOpcode == AMDGPU::V_MAC_F16_sdwa ||
SDWAOpcode == AMDGPU::V_MAC_F32_sdwa) {
// v_mac_f16/32 has additional src2 operand tied to vdst
MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2);
diff --git a/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp b/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp
new file mode 100644
index 000000000000..f9bfe96f65cb
--- /dev/null
+++ b/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp
@@ -0,0 +1,221 @@
+//===- SIPreAllocateWWMRegs.cpp - WWM Register Pre-allocation -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Pass to pre-allocated WWM registers
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "SIInstrInfo.h"
+#include "SIRegisterInfo.h"
+#include "SIMachineFunctionInfo.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/LiveRegMatrix.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "si-pre-allocate-wwm-regs"
+
+namespace {
+
+class SIPreAllocateWWMRegs : public MachineFunctionPass {
+private:
+ const SIInstrInfo *TII;
+ const SIRegisterInfo *TRI;
+ MachineRegisterInfo *MRI;
+ LiveIntervals *LIS;
+ LiveRegMatrix *Matrix;
+ VirtRegMap *VRM;
+ RegisterClassInfo RegClassInfo;
+
+ std::vector<unsigned> RegsToRewrite;
+
+public:
+ static char ID;
+
+ SIPreAllocateWWMRegs() : MachineFunctionPass(ID) {
+ initializeSIPreAllocateWWMRegsPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<LiveIntervals>();
+ AU.addPreserved<LiveIntervals>();
+ AU.addRequired<VirtRegMap>();
+ AU.addRequired<LiveRegMatrix>();
+ AU.addPreserved<SlotIndexes>();
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+private:
+ bool processDef(MachineOperand &MO);
+ void rewriteRegs(MachineFunction &MF);
+};
+
+} // End anonymous namespace.
+
+INITIALIZE_PASS_BEGIN(SIPreAllocateWWMRegs, DEBUG_TYPE,
+ "SI Pre-allocate WWM Registers", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
+INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix)
+INITIALIZE_PASS_END(SIPreAllocateWWMRegs, DEBUG_TYPE,
+ "SI Pre-allocate WWM Registers", false, false)
+
+char SIPreAllocateWWMRegs::ID = 0;
+
+char &llvm::SIPreAllocateWWMRegsID = SIPreAllocateWWMRegs::ID;
+
+FunctionPass *llvm::createSIPreAllocateWWMRegsPass() {
+ return new SIPreAllocateWWMRegs();
+}
+
+bool SIPreAllocateWWMRegs::processDef(MachineOperand &MO) {
+ if (!MO.isReg())
+ return false;
+
+ unsigned Reg = MO.getReg();
+
+ if (!TRI->isVGPR(*MRI, Reg))
+ return false;
+
+ if (TRI->isPhysicalRegister(Reg))
+ return false;
+
+ if (VRM->hasPhys(Reg))
+ return false;
+
+ LiveInterval &LI = LIS->getInterval(Reg);
+
+ for (unsigned PhysReg : RegClassInfo.getOrder(MRI->getRegClass(Reg))) {
+ if (!MRI->isPhysRegUsed(PhysReg) &&
+ Matrix->checkInterference(LI, PhysReg) == LiveRegMatrix::IK_Free) {
+ Matrix->assign(LI, PhysReg);
+ assert(PhysReg != 0);
+ RegsToRewrite.push_back(Reg);
+ return true;
+ }
+ }
+
+ llvm_unreachable("physreg not found for WWM expression");
+ return false;
+}
+
+void SIPreAllocateWWMRegs::rewriteRegs(MachineFunction &MF) {
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ for (MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg())
+ continue;
+
+ const unsigned VirtReg = MO.getReg();
+ if (TRI->isPhysicalRegister(VirtReg))
+ continue;
+
+ if (!VRM->hasPhys(VirtReg))
+ continue;
+
+ unsigned PhysReg = VRM->getPhys(VirtReg);
+ const unsigned SubReg = MO.getSubReg();
+ if (SubReg != 0) {
+ PhysReg = TRI->getSubReg(PhysReg, SubReg);
+ MO.setSubReg(0);
+ }
+
+ MO.setReg(PhysReg);
+ MO.setIsRenamable(false);
+ }
+ }
+ }
+
+ SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+
+ for (unsigned Reg : RegsToRewrite) {
+ LIS->removeInterval(Reg);
+
+ const unsigned PhysReg = VRM->getPhys(Reg);
+ assert(PhysReg != 0);
+ MFI->ReserveWWMRegister(PhysReg);
+ }
+
+ RegsToRewrite.clear();
+
+ // Update the set of reserved registers to include WWM ones.
+ MRI->freezeReservedRegs(MF);
+}
+
+bool SIPreAllocateWWMRegs::runOnMachineFunction(MachineFunction &MF) {
+ LLVM_DEBUG(dbgs() << "SIPreAllocateWWMRegs: function " << MF.getName() << "\n");
+
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+
+ TII = ST.getInstrInfo();
+ TRI = &TII->getRegisterInfo();
+ MRI = &MF.getRegInfo();
+
+ LIS = &getAnalysis<LiveIntervals>();
+ Matrix = &getAnalysis<LiveRegMatrix>();
+ VRM = &getAnalysis<VirtRegMap>();
+
+ RegClassInfo.runOnMachineFunction(MF);
+
+ bool RegsAssigned = false;
+
+ // We use a reverse post-order traversal of the control-flow graph to
+ // guarantee that we visit definitions in dominance order. Since WWM
+ // expressions are guaranteed to never involve phi nodes, and we can only
+ // escape WWM through the special WWM instruction, this means that this is a
+ // perfect elimination order, so we can never do any better.
+ ReversePostOrderTraversal<MachineFunction*> RPOT(&MF);
+
+ for (MachineBasicBlock *MBB : RPOT) {
+ bool InWWM = false;
+ for (MachineInstr &MI : *MBB) {
+ if (MI.getOpcode() == AMDGPU::V_SET_INACTIVE_B32 ||
+ MI.getOpcode() == AMDGPU::V_SET_INACTIVE_B64)
+ RegsAssigned |= processDef(MI.getOperand(0));
+
+ if (MI.getOpcode() == AMDGPU::ENTER_WWM) {
+ LLVM_DEBUG(dbgs() << "entering WWM region: " << MI << "\n");
+ InWWM = true;
+ continue;
+ }
+
+ if (MI.getOpcode() == AMDGPU::EXIT_WWM) {
+ LLVM_DEBUG(dbgs() << "exiting WWM region: " << MI << "\n");
+ InWWM = false;
+ }
+
+ if (!InWWM)
+ continue;
+
+ LLVM_DEBUG(dbgs() << "processing " << MI << "\n");
+
+ for (MachineOperand &DefOpnd : MI.defs()) {
+ RegsAssigned |= processDef(DefOpnd);
+ }
+ }
+ }
+
+ if (!RegsAssigned)
+ return false;
+
+ rewriteRegs(MF);
+ return true;
+}
diff --git a/lib/Target/AMDGPU/SIProgramInfo.h b/lib/Target/AMDGPU/SIProgramInfo.h
index 383f6b575808..168f05f8fdd6 100644
--- a/lib/Target/AMDGPU/SIProgramInfo.h
+++ b/lib/Target/AMDGPU/SIProgramInfo.h
@@ -1,9 +1,8 @@
//===--- SIProgramInfo.h ----------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -29,6 +28,8 @@ struct SIProgramInfo {
uint32_t DX10Clamp = 0;
uint32_t DebugMode = 0;
uint32_t IEEEMode = 0;
+ uint32_t WgpMode = 0; // GFX10+
+ uint32_t MemOrdered = 0; // GFX10+
uint64_t ScratchSize = 0;
uint64_t ComputePGMRSrc1 = 0;
@@ -50,18 +51,6 @@ struct SIProgramInfo {
// Number of VGPRs that meets number of waves per execution unit request.
uint32_t NumVGPRsForWavesPerEU = 0;
- // Fixed SGPR number used to hold wave scratch offset for entire kernel
- // execution, or std::numeric_limits<uint16_t>::max() if the register is not
- // used or not known.
- uint16_t DebuggerWavefrontPrivateSegmentOffsetSGPR =
- std::numeric_limits<uint16_t>::max();
-
- // Fixed SGPR number of the first 4 SGPRs used to hold scratch V# for entire
- // kernel execution, or std::numeric_limits<uint16_t>::max() if the register
- // is not used or not known.
- uint16_t DebuggerPrivateSegmentBufferSGPR =
- std::numeric_limits<uint16_t>::max();
-
// Whether there is recursion, dynamic allocas, indirect calls or some other
// reason there may be statically unknown stack usage.
bool DynamicCallStack = false;
diff --git a/lib/Target/AMDGPU/SIRegisterInfo.cpp b/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 97cfde2b2354..f152deb28004 100644
--- a/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -1,9 +1,8 @@
//===-- SIRegisterInfo.cpp - SI Register Information ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -17,6 +16,7 @@
#include "AMDGPUSubtarget.h"
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
+#include "MCTargetDesc/AMDGPUInstPrinter.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/MachineDominators.h"
@@ -63,8 +63,10 @@ SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST) :
AMDGPURegisterInfo(),
SGPRPressureSets(getNumRegPressureSets()),
VGPRPressureSets(getNumRegPressureSets()),
+ AGPRPressureSets(getNumRegPressureSets()),
SpillSGPRToVGPR(false),
- SpillSGPRToSMEM(false) {
+ SpillSGPRToSMEM(false),
+ isWave32(ST.isWave32()) {
if (EnableSpillSGPRToSMEM && ST.hasScalarStores())
SpillSGPRToSMEM = true;
else if (EnableSpillSGPRToVGPR)
@@ -74,10 +76,12 @@ SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST) :
SGPRSetID = NumRegPressureSets;
VGPRSetID = NumRegPressureSets;
+ AGPRSetID = NumRegPressureSets;
for (unsigned i = 0; i < NumRegPressureSets; ++i) {
classifyPressureSet(i, AMDGPU::SGPR0, SGPRPressureSets);
classifyPressureSet(i, AMDGPU::VGPR0, VGPRPressureSets);
+ classifyPressureSet(i, AMDGPU::AGPR0, AGPRPressureSets);
}
// Determine the number of reg units for each pressure set.
@@ -89,7 +93,7 @@ SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST) :
}
}
- unsigned VGPRMax = 0, SGPRMax = 0;
+ unsigned VGPRMax = 0, SGPRMax = 0, AGPRMax = 0;
for (unsigned i = 0; i < NumRegPressureSets; ++i) {
if (isVGPRPressureSet(i) && PressureSetRegUnits[i] > VGPRMax) {
VGPRSetID = i;
@@ -100,10 +104,16 @@ SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST) :
SGPRSetID = i;
SGPRMax = PressureSetRegUnits[i];
}
+ if (isAGPRPressureSet(i) && PressureSetRegUnits[i] > AGPRMax) {
+ AGPRSetID = i;
+ AGPRMax = PressureSetRegUnits[i];
+ continue;
+ }
}
assert(SGPRSetID < NumRegPressureSets &&
- VGPRSetID < NumRegPressureSets);
+ VGPRSetID < NumRegPressureSets &&
+ AGPRSetID < NumRegPressureSets);
}
unsigned SIRegisterInfo::reservedPrivateSegmentBufferReg(
@@ -139,11 +149,6 @@ unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg(
return AMDGPU::SGPR_32RegClass.getRegister(Reg);
}
-unsigned SIRegisterInfo::reservedStackPtrOffsetReg(
- const MachineFunction &MF) const {
- return AMDGPU::SGPR32;
-}
-
BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
@@ -155,15 +160,26 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
// M0 has to be reserved so that llvm accepts it as a live-in into a block.
reserveRegisterTuples(Reserved, AMDGPU::M0);
+ // Reserve src_vccz, src_execz, src_scc.
+ reserveRegisterTuples(Reserved, AMDGPU::SRC_VCCZ);
+ reserveRegisterTuples(Reserved, AMDGPU::SRC_EXECZ);
+ reserveRegisterTuples(Reserved, AMDGPU::SRC_SCC);
+
// Reserve the memory aperture registers.
reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_BASE);
reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_LIMIT);
reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_BASE);
reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_LIMIT);
+ // Reserve src_pops_exiting_wave_id - support is not implemented in Codegen.
+ reserveRegisterTuples(Reserved, AMDGPU::SRC_POPS_EXITING_WAVE_ID);
+
// Reserve xnack_mask registers - support is not implemented in Codegen.
reserveRegisterTuples(Reserved, AMDGPU::XNACK_MASK);
+ // Reserve lds_direct register - support is not implemented in Codegen.
+ reserveRegisterTuples(Reserved, AMDGPU::LDS_DIRECT);
+
// Reserve Trap Handler registers - support is not implemented in Codegen.
reserveRegisterTuples(Reserved, AMDGPU::TBA);
reserveRegisterTuples(Reserved, AMDGPU::TMA);
@@ -176,6 +192,16 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
reserveRegisterTuples(Reserved, AMDGPU::TTMP12_TTMP13);
reserveRegisterTuples(Reserved, AMDGPU::TTMP14_TTMP15);
+ // Reserve null register - it shall never be allocated
+ reserveRegisterTuples(Reserved, AMDGPU::SGPR_NULL);
+
+ // Disallow vcc_hi allocation in wave32. It may be allocated but most likely
+ // will result in bugs.
+ if (isWave32) {
+ Reserved.set(AMDGPU::VCC);
+ Reserved.set(AMDGPU::VCC_HI);
+ }
+
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF);
@@ -190,6 +216,8 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) {
unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i);
reserveRegisterTuples(Reserved, Reg);
+ Reg = AMDGPU::AGPR_32RegClass.getRegister(i);
+ reserveRegisterTuples(Reserved, Reg);
}
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
@@ -225,9 +253,33 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
assert(!isSubRegister(ScratchRSrcReg, FrameReg));
}
+ for (unsigned Reg : MFI->WWMReservedRegs) {
+ reserveRegisterTuples(Reserved, Reg);
+ }
+
+ // FIXME: Stop using reserved registers for this.
+ for (MCPhysReg Reg : MFI->getAGPRSpillVGPRs())
+ reserveRegisterTuples(Reserved, Reg);
+
+ for (MCPhysReg Reg : MFI->getVGPRSpillAGPRs())
+ reserveRegisterTuples(Reserved, Reg);
+
return Reserved;
}
+bool SIRegisterInfo::canRealignStack(const MachineFunction &MF) const {
+ const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+ // On entry, the base address is 0, so it can't possibly need any more
+ // alignment.
+
+ // FIXME: Should be able to specify the entry frame alignment per calling
+ // convention instead.
+ if (Info->isEntryFunction())
+ return false;
+
+ return TargetRegisterInfo::canRealignStack(MF);
+}
+
bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction &Fn) const {
const SIMachineFunctionInfo *Info = Fn.getInfo<SIMachineFunctionInfo>();
if (Info->isEntryFunction()) {
@@ -252,11 +304,20 @@ bool SIRegisterInfo::requiresFrameIndexScavenging(
bool SIRegisterInfo::requiresFrameIndexReplacementScavenging(
const MachineFunction &MF) const {
- // m0 is needed for the scalar store offset. m0 is unallocatable, so we can't
- // create a virtual register for it during frame index elimination, so the
- // scavenger is directly needed.
- return MF.getFrameInfo().hasStackObjects() &&
- MF.getSubtarget<GCNSubtarget>().hasScalarStores() &&
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ if (!MFI.hasStackObjects())
+ return false;
+
+ // The scavenger is used for large frames which may require finding a free
+ // register for large offsets.
+ if (!isUInt<12>(MFI.getStackSize()))
+ return true;
+
+ // If using scalar stores, for spills, m0 is needed for the scalar store
+ // offset (pre-GFX9). m0 is unallocatable, so we can't create a virtual
+ // register for it during frame index elimination, so the scavenger is
+ // directly needed.
+ return MF.getSubtarget<GCNSubtarget>().hasScalarStores() &&
MF.getInfo<SIMachineFunctionInfo>()->hasSpilledSGPRs();
}
@@ -332,7 +393,8 @@ void SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
TII->getAddNoCarry(*MBB, Ins, DL, BaseReg)
.addReg(OffsetReg, RegState::Kill)
- .addReg(FIReg);
+ .addReg(FIReg)
+ .addImm(0); // clamp bit
}
void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
@@ -394,21 +456,39 @@ const TargetRegisterClass *SIRegisterInfo::getPointerRegClass(
static unsigned getNumSubRegsForSpillOp(unsigned Op) {
switch (Op) {
+ case AMDGPU::SI_SPILL_S1024_SAVE:
+ case AMDGPU::SI_SPILL_S1024_RESTORE:
+ case AMDGPU::SI_SPILL_V1024_SAVE:
+ case AMDGPU::SI_SPILL_V1024_RESTORE:
+ case AMDGPU::SI_SPILL_A1024_SAVE:
+ case AMDGPU::SI_SPILL_A1024_RESTORE:
+ return 32;
case AMDGPU::SI_SPILL_S512_SAVE:
case AMDGPU::SI_SPILL_S512_RESTORE:
case AMDGPU::SI_SPILL_V512_SAVE:
case AMDGPU::SI_SPILL_V512_RESTORE:
+ case AMDGPU::SI_SPILL_A512_SAVE:
+ case AMDGPU::SI_SPILL_A512_RESTORE:
return 16;
case AMDGPU::SI_SPILL_S256_SAVE:
case AMDGPU::SI_SPILL_S256_RESTORE:
case AMDGPU::SI_SPILL_V256_SAVE:
case AMDGPU::SI_SPILL_V256_RESTORE:
return 8;
+ case AMDGPU::SI_SPILL_S160_SAVE:
+ case AMDGPU::SI_SPILL_S160_RESTORE:
+ case AMDGPU::SI_SPILL_V160_SAVE:
+ case AMDGPU::SI_SPILL_V160_RESTORE:
+ return 5;
case AMDGPU::SI_SPILL_S128_SAVE:
case AMDGPU::SI_SPILL_S128_RESTORE:
case AMDGPU::SI_SPILL_V128_SAVE:
case AMDGPU::SI_SPILL_V128_RESTORE:
+ case AMDGPU::SI_SPILL_A128_SAVE:
+ case AMDGPU::SI_SPILL_A128_RESTORE:
return 4;
+ case AMDGPU::SI_SPILL_S96_SAVE:
+ case AMDGPU::SI_SPILL_S96_RESTORE:
case AMDGPU::SI_SPILL_V96_SAVE:
case AMDGPU::SI_SPILL_V96_RESTORE:
return 3;
@@ -416,11 +496,15 @@ static unsigned getNumSubRegsForSpillOp(unsigned Op) {
case AMDGPU::SI_SPILL_S64_RESTORE:
case AMDGPU::SI_SPILL_V64_SAVE:
case AMDGPU::SI_SPILL_V64_RESTORE:
+ case AMDGPU::SI_SPILL_A64_SAVE:
+ case AMDGPU::SI_SPILL_A64_RESTORE:
return 2;
case AMDGPU::SI_SPILL_S32_SAVE:
case AMDGPU::SI_SPILL_S32_RESTORE:
case AMDGPU::SI_SPILL_V32_SAVE:
case AMDGPU::SI_SPILL_V32_RESTORE:
+ case AMDGPU::SI_SPILL_A32_SAVE:
+ case AMDGPU::SI_SPILL_A32_RESTORE:
return 1;
default: llvm_unreachable("Invalid spill opcode");
}
@@ -480,6 +564,35 @@ static int getOffsetMUBUFLoad(unsigned Opc) {
}
}
+static MachineInstrBuilder spillVGPRtoAGPR(MachineBasicBlock::iterator MI,
+ int Index,
+ unsigned Lane,
+ unsigned ValueReg,
+ bool IsKill) {
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineFunction *MF = MI->getParent()->getParent();
+ SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
+ const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
+ const SIInstrInfo *TII = ST.getInstrInfo();
+
+ MCPhysReg Reg = MFI->getVGPRToAGPRSpill(Index, Lane);
+
+ if (Reg == AMDGPU::NoRegister)
+ return MachineInstrBuilder();
+
+ bool IsStore = MI->mayStore();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ auto *TRI = static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo());
+
+ unsigned Dst = IsStore ? Reg : ValueReg;
+ unsigned Src = IsStore ? ValueReg : Reg;
+ unsigned Opc = (IsStore ^ TRI->isVGPR(MRI, Reg)) ? AMDGPU::V_ACCVGPR_WRITE_B32
+ : AMDGPU::V_ACCVGPR_READ_B32;
+
+ return BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(Opc), Dst)
+ .addReg(Src, getKillRegState(IsKill));
+}
+
// This differs from buildSpillLoadStore by only scavenging a VGPR. It does not
// need to handle the case where an SGPR may need to be spilled while spilling.
static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII,
@@ -498,6 +611,9 @@ static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII,
return false;
const MachineOperand *Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata);
+ if (spillVGPRtoAGPR(MI, Index, 0, Reg->getReg(), false).getInstr())
+ return true;
+
MachineInstrBuilder NewMI =
BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
.add(*Reg)
@@ -507,6 +623,7 @@ static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII,
.addImm(0) // glc
.addImm(0) // slc
.addImm(0) // tfe
+ .addImm(0) // dlc
.cloneMemRefs(*MI);
const MachineOperand *VDataIn = TII->getNamedOperand(*MI,
@@ -549,6 +666,10 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
unsigned Align = MFI.getObjectAlignment(Index);
const MachinePointerInfo &BasePtrInfo = MMO->getPointerInfo();
+ Register TmpReg =
+ hasAGPRs(RC) ? TII->getNamedOperand(*MI, AMDGPU::OpName::tmp)->getReg()
+ : Register();
+
assert((Offset % EltSize) == 0 && "unexpected VGPR spill offset");
if (!isUInt<12>(Offset + Size - EltSize)) {
@@ -562,7 +683,7 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
// We don't have access to the register scavenger if this function is called
// during PEI::scavengeFrameVirtualRegs().
if (RS)
- SOffset = RS->FindUnusedReg(&AMDGPU::SGPR_32RegClass);
+ SOffset = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0, false);
if (SOffset == AMDGPU::NoRegister) {
// There are no free SGPRs, and since we are in the process of spilling
@@ -597,20 +718,38 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
SrcDstRegState |= getKillRegState(IsKill);
}
- MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(EltSize * i);
- MachineMemOperand *NewMMO
- = MF->getMachineMemOperand(PInfo, MMO->getFlags(),
- EltSize, MinAlign(Align, EltSize * i));
-
- auto MIB = BuildMI(*MBB, MI, DL, Desc)
- .addReg(SubReg, getDefRegState(!IsStore) | getKillRegState(IsKill))
- .addReg(ScratchRsrcReg)
- .addReg(SOffset, SOffsetRegState)
- .addImm(Offset)
- .addImm(0) // glc
- .addImm(0) // slc
- .addImm(0) // tfe
- .addMemOperand(NewMMO);
+ auto MIB = spillVGPRtoAGPR(MI, Index, i, SubReg, IsKill);
+
+ if (!MIB.getInstr()) {
+ unsigned FinalReg = SubReg;
+ if (TmpReg != AMDGPU::NoRegister) {
+ if (IsStore)
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ACCVGPR_READ_B32), TmpReg)
+ .addReg(SubReg, getKillRegState(IsKill));
+ SubReg = TmpReg;
+ }
+
+ MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(EltSize * i);
+ MachineMemOperand *NewMMO
+ = MF->getMachineMemOperand(PInfo, MMO->getFlags(),
+ EltSize, MinAlign(Align, EltSize * i));
+
+ MIB = BuildMI(*MBB, MI, DL, Desc)
+ .addReg(SubReg, getDefRegState(!IsStore) | getKillRegState(IsKill))
+ .addReg(ScratchRsrcReg)
+ .addReg(SOffset, SOffsetRegState)
+ .addImm(Offset)
+ .addImm(0) // glc
+ .addImm(0) // slc
+ .addImm(0) // tfe
+ .addImm(0) // dlc
+ .addMemOperand(NewMMO);
+
+ if (!IsStore && TmpReg != AMDGPU::NoRegister)
+ MIB = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ACCVGPR_WRITE_B32),
+ FinalReg)
+ .addReg(TmpReg, RegState::Kill);
+ }
if (NumSubRegs > 1)
MIB.addReg(ValueReg, RegState::Implicit | SrcDstRegState);
@@ -669,6 +808,8 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
if (SpillToSMEM && OnlyToVGPR)
return false;
+ Register FrameReg = getFrameRegister(*MF);
+
assert(SpillToVGPR || (SuperReg != MFI->getStackPtrOffsetReg() &&
SuperReg != MFI->getFrameOffsetReg() &&
SuperReg != MFI->getScratchWaveOffsetReg()));
@@ -728,11 +869,11 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
if (Offset != 0) {
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
- .addReg(MFI->getFrameOffsetReg())
+ .addReg(FrameReg)
.addImm(Offset);
} else {
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
- .addReg(MFI->getFrameOffsetReg());
+ .addReg(FrameReg);
}
BuildMI(*MBB, MI, DL, TII->get(ScalarStoreOp))
@@ -740,6 +881,7 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
.addReg(MFI->getScratchRSrcReg()) // sbase
.addReg(OffsetReg, RegState::Kill) // soff
.addImm(0) // glc
+ .addImm(0) // dlc
.addMemOperand(MMO);
continue;
@@ -799,11 +941,11 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
= MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
EltSize, MinAlign(Align, EltSize * i));
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE))
- .addReg(TmpReg, RegState::Kill) // src
- .addFrameIndex(Index) // vaddr
- .addReg(MFI->getScratchRSrcReg()) // srrsrc
- .addReg(MFI->getFrameOffsetReg()) // soffset
- .addImm(i * 4) // offset
+ .addReg(TmpReg, RegState::Kill) // src
+ .addFrameIndex(Index) // vaddr
+ .addReg(MFI->getScratchRSrcReg()) // srrsrc
+ .addReg(MFI->getStackPtrOffsetReg()) // soffset
+ .addImm(i * 4) // offset
.addMemOperand(MMO);
}
}
@@ -859,6 +1001,8 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
unsigned EltSize = 4;
unsigned ScalarLoadOp;
+ Register FrameReg = getFrameRegister(*MF);
+
const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
if (SpillToSMEM && isSGPRClass(RC)) {
// XXX - if private_element_size is larger than 4 it might be useful to be
@@ -890,18 +1034,19 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
if (Offset != 0) {
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
- .addReg(MFI->getFrameOffsetReg())
+ .addReg(FrameReg)
.addImm(Offset);
} else {
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
- .addReg(MFI->getFrameOffsetReg());
+ .addReg(FrameReg);
}
auto MIB =
BuildMI(*MBB, MI, DL, TII->get(ScalarLoadOp), SubReg)
- .addReg(MFI->getScratchRSrcReg()) // sbase
- .addReg(OffsetReg, RegState::Kill) // soff
- .addImm(0) // glc
+ .addReg(MFI->getScratchRSrcReg()) // sbase
+ .addReg(OffsetReg, RegState::Kill) // soff
+ .addImm(0) // glc
+ .addImm(0) // dlc
.addMemOperand(MMO);
if (NumSubRegs > 1 && i == 0)
@@ -937,10 +1082,10 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
MinAlign(Align, EltSize * i));
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg)
- .addFrameIndex(Index) // vaddr
- .addReg(MFI->getScratchRSrcReg()) // srsrc
- .addReg(MFI->getFrameOffsetReg()) // soffset
- .addImm(i * 4) // offset
+ .addFrameIndex(Index) // vaddr
+ .addReg(MFI->getScratchRSrcReg()) // srsrc
+ .addReg(MFI->getStackPtrOffsetReg()) // soffset
+ .addImm(i * 4) // offset
.addMemOperand(MMO);
auto MIB =
@@ -969,15 +1114,21 @@ bool SIRegisterInfo::eliminateSGPRToVGPRSpillFrameIndex(
int FI,
RegScavenger *RS) const {
switch (MI->getOpcode()) {
+ case AMDGPU::SI_SPILL_S1024_SAVE:
case AMDGPU::SI_SPILL_S512_SAVE:
case AMDGPU::SI_SPILL_S256_SAVE:
+ case AMDGPU::SI_SPILL_S160_SAVE:
case AMDGPU::SI_SPILL_S128_SAVE:
+ case AMDGPU::SI_SPILL_S96_SAVE:
case AMDGPU::SI_SPILL_S64_SAVE:
case AMDGPU::SI_SPILL_S32_SAVE:
return spillSGPR(MI, FI, RS, true);
+ case AMDGPU::SI_SPILL_S1024_RESTORE:
case AMDGPU::SI_SPILL_S512_RESTORE:
case AMDGPU::SI_SPILL_S256_RESTORE:
+ case AMDGPU::SI_SPILL_S160_RESTORE:
case AMDGPU::SI_SPILL_S128_RESTORE:
+ case AMDGPU::SI_SPILL_S96_RESTORE:
case AMDGPU::SI_SPILL_S64_RESTORE:
case AMDGPU::SI_SPILL_S32_RESTORE:
return restoreSGPR(MI, FI, RS, true);
@@ -998,14 +1149,21 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
const SIInstrInfo *TII = ST.getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
+ assert(SPAdj == 0 && "unhandled SP adjustment in call sequence?");
+
MachineOperand &FIOp = MI->getOperand(FIOperandNum);
int Index = MI->getOperand(FIOperandNum).getIndex();
+ Register FrameReg = getFrameRegister(*MF);
+
switch (MI->getOpcode()) {
// SGPR register spill
+ case AMDGPU::SI_SPILL_S1024_SAVE:
case AMDGPU::SI_SPILL_S512_SAVE:
case AMDGPU::SI_SPILL_S256_SAVE:
+ case AMDGPU::SI_SPILL_S160_SAVE:
case AMDGPU::SI_SPILL_S128_SAVE:
+ case AMDGPU::SI_SPILL_S96_SAVE:
case AMDGPU::SI_SPILL_S64_SAVE:
case AMDGPU::SI_SPILL_S32_SAVE: {
spillSGPR(MI, Index, RS);
@@ -1013,9 +1171,12 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
}
// SGPR register restore
+ case AMDGPU::SI_SPILL_S1024_RESTORE:
case AMDGPU::SI_SPILL_S512_RESTORE:
case AMDGPU::SI_SPILL_S256_RESTORE:
+ case AMDGPU::SI_SPILL_S160_RESTORE:
case AMDGPU::SI_SPILL_S128_RESTORE:
+ case AMDGPU::SI_SPILL_S96_RESTORE:
case AMDGPU::SI_SPILL_S64_RESTORE:
case AMDGPU::SI_SPILL_S32_RESTORE: {
restoreSGPR(MI, Index, RS);
@@ -1023,19 +1184,29 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
}
// VGPR register spill
+ case AMDGPU::SI_SPILL_V1024_SAVE:
case AMDGPU::SI_SPILL_V512_SAVE:
case AMDGPU::SI_SPILL_V256_SAVE:
+ case AMDGPU::SI_SPILL_V160_SAVE:
case AMDGPU::SI_SPILL_V128_SAVE:
case AMDGPU::SI_SPILL_V96_SAVE:
case AMDGPU::SI_SPILL_V64_SAVE:
- case AMDGPU::SI_SPILL_V32_SAVE: {
+ case AMDGPU::SI_SPILL_V32_SAVE:
+ case AMDGPU::SI_SPILL_A1024_SAVE:
+ case AMDGPU::SI_SPILL_A512_SAVE:
+ case AMDGPU::SI_SPILL_A128_SAVE:
+ case AMDGPU::SI_SPILL_A64_SAVE:
+ case AMDGPU::SI_SPILL_A32_SAVE: {
const MachineOperand *VData = TII->getNamedOperand(*MI,
AMDGPU::OpName::vdata);
+ assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
+ MFI->getStackPtrOffsetReg());
+
buildSpillLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET,
Index,
VData->getReg(), VData->isKill(),
TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
- TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(),
+ FrameReg,
TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
*MI->memoperands_begin(),
RS);
@@ -1047,16 +1218,25 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
case AMDGPU::SI_SPILL_V64_RESTORE:
case AMDGPU::SI_SPILL_V96_RESTORE:
case AMDGPU::SI_SPILL_V128_RESTORE:
+ case AMDGPU::SI_SPILL_V160_RESTORE:
case AMDGPU::SI_SPILL_V256_RESTORE:
- case AMDGPU::SI_SPILL_V512_RESTORE: {
+ case AMDGPU::SI_SPILL_V512_RESTORE:
+ case AMDGPU::SI_SPILL_V1024_RESTORE:
+ case AMDGPU::SI_SPILL_A32_RESTORE:
+ case AMDGPU::SI_SPILL_A64_RESTORE:
+ case AMDGPU::SI_SPILL_A128_RESTORE:
+ case AMDGPU::SI_SPILL_A512_RESTORE:
+ case AMDGPU::SI_SPILL_A1024_RESTORE: {
const MachineOperand *VData = TII->getNamedOperand(*MI,
AMDGPU::OpName::vdata);
+ assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
+ MFI->getStackPtrOffsetReg());
buildSpillLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET,
Index,
VData->getReg(), VData->isKill(),
TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
- TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(),
+ FrameReg,
TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
*MI->memoperands_begin(),
RS);
@@ -1068,24 +1248,23 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
const DebugLoc &DL = MI->getDebugLoc();
bool IsMUBUF = TII->isMUBUF(*MI);
- if (!IsMUBUF &&
- MFI->getFrameOffsetReg() != MFI->getScratchWaveOffsetReg()) {
+ if (!IsMUBUF && !MFI->isEntryFunction()) {
// Convert to an absolute stack address by finding the offset from the
// scratch wave base and scaling by the wave size.
//
- // In an entry function/kernel the stack address is already the
- // absolute address relative to the scratch wave offset.
+ // In an entry function/kernel the offset is already the absolute
+ // address relative to the frame register.
unsigned DiffReg
= MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32;
- unsigned ResultReg = IsCopy ?
+ Register ResultReg = IsCopy ?
MI->getOperand(0).getReg() :
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), DiffReg)
- .addReg(MFI->getFrameOffsetReg())
+ .addReg(FrameReg)
.addReg(MFI->getScratchWaveOffsetReg());
int64_t Offset = FrameInfo.getObjectOffset(Index);
@@ -1106,7 +1285,8 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
if (AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm())) {
TII->getAddNoCarry(*MBB, MI, DL, ResultReg)
.addImm(Offset)
- .addReg(ScaledReg, RegState::Kill);
+ .addReg(ScaledReg, RegState::Kill)
+ .addImm(0); // clamp bit
} else {
unsigned ConstOffsetReg
= MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
@@ -1115,7 +1295,8 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
.addImm(Offset);
TII->getAddNoCarry(*MBB, MI, DL, ResultReg)
.addReg(ConstOffsetReg, RegState::Kill)
- .addReg(ScaledReg, RegState::Kill);
+ .addReg(ScaledReg, RegState::Kill)
+ .addImm(0); // clamp bit
}
}
@@ -1133,8 +1314,10 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
AMDGPU::getNamedOperandIdx(MI->getOpcode(),
AMDGPU::OpName::vaddr));
- assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg()
- == MFI->getFrameOffsetReg());
+ assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
+ MFI->getStackPtrOffsetReg());
+
+ TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->setReg(FrameReg);
int64_t Offset = FrameInfo.getObjectOffset(Index);
int64_t OldImm
@@ -1164,63 +1347,21 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
}
StringRef SIRegisterInfo::getRegAsmName(unsigned Reg) const {
- #define AMDGPU_REG_ASM_NAMES
- #include "AMDGPURegAsmNames.inc.cpp"
-
- #define REG_RANGE(BeginReg, EndReg, RegTable) \
- if (Reg >= BeginReg && Reg <= EndReg) { \
- unsigned Index = Reg - BeginReg; \
- assert(Index < array_lengthof(RegTable)); \
- return RegTable[Index]; \
- }
+ const TargetRegisterClass *RC = getMinimalPhysRegClass(Reg);
+ unsigned Size = getRegSizeInBits(*RC);
+ unsigned AltName = AMDGPU::NoRegAltName;
- REG_RANGE(AMDGPU::VGPR0, AMDGPU::VGPR255, VGPR32RegNames);
- REG_RANGE(AMDGPU::SGPR0, AMDGPU::SGPR103, SGPR32RegNames);
- REG_RANGE(AMDGPU::VGPR0_VGPR1, AMDGPU::VGPR254_VGPR255, VGPR64RegNames);
- REG_RANGE(AMDGPU::SGPR0_SGPR1, AMDGPU::SGPR102_SGPR103, SGPR64RegNames);
- REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2, AMDGPU::VGPR253_VGPR254_VGPR255,
- VGPR96RegNames);
-
- REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3,
- AMDGPU::VGPR252_VGPR253_VGPR254_VGPR255,
- VGPR128RegNames);
- REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3,
- AMDGPU::SGPR100_SGPR101_SGPR102_SGPR103,
- SGPR128RegNames);
-
- REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7,
- AMDGPU::VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255,
- VGPR256RegNames);
-
- REG_RANGE(
- AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7_VGPR8_VGPR9_VGPR10_VGPR11_VGPR12_VGPR13_VGPR14_VGPR15,
- AMDGPU::VGPR240_VGPR241_VGPR242_VGPR243_VGPR244_VGPR245_VGPR246_VGPR247_VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255,
- VGPR512RegNames);
-
- REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7,
- AMDGPU::SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103,
- SGPR256RegNames);
-
- REG_RANGE(
- AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7_SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15,
- AMDGPU::SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95_SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103,
- SGPR512RegNames
- );
-
-#undef REG_RANGE
-
- // FIXME: Rename flat_scr so we don't need to special case this.
- switch (Reg) {
- case AMDGPU::FLAT_SCR:
- return "flat_scratch";
- case AMDGPU::FLAT_SCR_LO:
- return "flat_scratch_lo";
- case AMDGPU::FLAT_SCR_HI:
- return "flat_scratch_hi";
- default:
- // For the special named registers the default is fine.
- return TargetRegisterInfo::getRegAsmName(Reg);
+ switch (Size) {
+ case 32: AltName = AMDGPU::Reg32; break;
+ case 64: AltName = AMDGPU::Reg64; break;
+ case 96: AltName = AMDGPU::Reg96; break;
+ case 128: AltName = AMDGPU::Reg128; break;
+ case 160: AltName = AMDGPU::Reg160; break;
+ case 256: AltName = AMDGPU::Reg256; break;
+ case 512: AltName = AMDGPU::Reg512; break;
+ case 1024: AltName = AMDGPU::Reg1024; break;
}
+ return AMDGPUInstPrinter::getRegisterName(Reg, AltName);
}
// FIXME: This is very slow. It might be worth creating a map from physreg to
@@ -1231,15 +1372,25 @@ const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const {
static const TargetRegisterClass *const BaseClasses[] = {
&AMDGPU::VGPR_32RegClass,
&AMDGPU::SReg_32RegClass,
+ &AMDGPU::AGPR_32RegClass,
&AMDGPU::VReg_64RegClass,
&AMDGPU::SReg_64RegClass,
+ &AMDGPU::AReg_64RegClass,
&AMDGPU::VReg_96RegClass,
+ &AMDGPU::SReg_96RegClass,
&AMDGPU::VReg_128RegClass,
&AMDGPU::SReg_128RegClass,
+ &AMDGPU::AReg_128RegClass,
+ &AMDGPU::VReg_160RegClass,
+ &AMDGPU::SReg_160RegClass,
&AMDGPU::VReg_256RegClass,
&AMDGPU::SReg_256RegClass,
&AMDGPU::VReg_512RegClass,
&AMDGPU::SReg_512RegClass,
+ &AMDGPU::AReg_512RegClass,
+ &AMDGPU::SReg_1024RegClass,
+ &AMDGPU::VReg_1024RegClass,
+ &AMDGPU::AReg_1024RegClass,
&AMDGPU::SCC_CLASSRegClass,
&AMDGPU::Pseudo_SReg_32RegClass,
&AMDGPU::Pseudo_SReg_128RegClass,
@@ -1268,10 +1419,39 @@ bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass *RC) const {
return getCommonSubClass(&AMDGPU::VReg_96RegClass, RC) != nullptr;
case 128:
return getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) != nullptr;
+ case 160:
+ return getCommonSubClass(&AMDGPU::VReg_160RegClass, RC) != nullptr;
case 256:
return getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) != nullptr;
case 512:
return getCommonSubClass(&AMDGPU::VReg_512RegClass, RC) != nullptr;
+ case 1024:
+ return getCommonSubClass(&AMDGPU::VReg_1024RegClass, RC) != nullptr;
+ default:
+ llvm_unreachable("Invalid register class size");
+ }
+}
+
+bool SIRegisterInfo::hasAGPRs(const TargetRegisterClass *RC) const {
+ unsigned Size = getRegSizeInBits(*RC);
+ if (Size < 32)
+ return false;
+ switch (Size) {
+ case 32:
+ return getCommonSubClass(&AMDGPU::AGPR_32RegClass, RC) != nullptr;
+ case 64:
+ return getCommonSubClass(&AMDGPU::AReg_64RegClass, RC) != nullptr;
+ case 96:
+ return false;
+ case 128:
+ return getCommonSubClass(&AMDGPU::AReg_128RegClass, RC) != nullptr;
+ case 160:
+ case 256:
+ return false;
+ case 512:
+ return getCommonSubClass(&AMDGPU::AReg_512RegClass, RC) != nullptr;
+ case 1024:
+ return getCommonSubClass(&AMDGPU::AReg_1024RegClass, RC) != nullptr;
default:
llvm_unreachable("Invalid register class size");
}
@@ -1288,10 +1468,32 @@ const TargetRegisterClass *SIRegisterInfo::getEquivalentVGPRClass(
return &AMDGPU::VReg_96RegClass;
case 128:
return &AMDGPU::VReg_128RegClass;
+ case 160:
+ return &AMDGPU::VReg_160RegClass;
case 256:
return &AMDGPU::VReg_256RegClass;
case 512:
return &AMDGPU::VReg_512RegClass;
+ case 1024:
+ return &AMDGPU::VReg_1024RegClass;
+ default:
+ llvm_unreachable("Invalid register class size");
+ }
+}
+
+const TargetRegisterClass *SIRegisterInfo::getEquivalentAGPRClass(
+ const TargetRegisterClass *SRC) const {
+ switch (getRegSizeInBits(*SRC)) {
+ case 32:
+ return &AMDGPU::AGPR_32RegClass;
+ case 64:
+ return &AMDGPU::AReg_64RegClass;
+ case 128:
+ return &AMDGPU::AReg_128RegClass;
+ case 512:
+ return &AMDGPU::AReg_512RegClass;
+ case 1024:
+ return &AMDGPU::AReg_1024RegClass;
default:
llvm_unreachable("Invalid register class size");
}
@@ -1304,12 +1506,18 @@ const TargetRegisterClass *SIRegisterInfo::getEquivalentSGPRClass(
return &AMDGPU::SGPR_32RegClass;
case 64:
return &AMDGPU::SReg_64RegClass;
+ case 96:
+ return &AMDGPU::SReg_96RegClass;
case 128:
return &AMDGPU::SReg_128RegClass;
+ case 160:
+ return &AMDGPU::SReg_160RegClass;
case 256:
return &AMDGPU::SReg_256RegClass;
case 512:
return &AMDGPU::SReg_512RegClass;
+ case 1024:
+ return &AMDGPU::SReg_1024RegClass;
default:
llvm_unreachable("Invalid register class size");
}
@@ -1328,11 +1536,31 @@ const TargetRegisterClass *SIRegisterInfo::getSubRegClass(
return &AMDGPU::SGPR_32RegClass;
case 2:
return &AMDGPU::SReg_64RegClass;
+ case 3:
+ return &AMDGPU::SReg_96RegClass;
case 4:
return &AMDGPU::SReg_128RegClass;
+ case 5:
+ return &AMDGPU::SReg_160RegClass;
case 8:
return &AMDGPU::SReg_256RegClass;
- case 16: /* fall-through */
+ case 16:
+ return &AMDGPU::SReg_512RegClass;
+ case 32: /* fall-through */
+ default:
+ llvm_unreachable("Invalid sub-register class size");
+ }
+ } else if (hasAGPRs(RC)) {
+ switch (Count) {
+ case 1:
+ return &AMDGPU::AGPR_32RegClass;
+ case 2:
+ return &AMDGPU::AReg_64RegClass;
+ case 4:
+ return &AMDGPU::AReg_128RegClass;
+ case 16:
+ return &AMDGPU::AReg_512RegClass;
+ case 32: /* fall-through */
default:
llvm_unreachable("Invalid sub-register class size");
}
@@ -1346,9 +1574,13 @@ const TargetRegisterClass *SIRegisterInfo::getSubRegClass(
return &AMDGPU::VReg_96RegClass;
case 4:
return &AMDGPU::VReg_128RegClass;
+ case 5:
+ return &AMDGPU::VReg_160RegClass;
case 8:
return &AMDGPU::VReg_256RegClass;
- case 16: /* fall-through */
+ case 16:
+ return &AMDGPU::VReg_512RegClass;
+ case 32: /* fall-through */
default:
llvm_unreachable("Invalid sub-register class size");
}
@@ -1396,6 +1628,17 @@ SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI,
ArrayRef<int16_t> SIRegisterInfo::getRegSplitParts(const TargetRegisterClass *RC,
unsigned EltSize) const {
if (EltSize == 4) {
+ static const int16_t Sub0_31[] = {
+ AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
+ AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
+ AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
+ AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
+ AMDGPU::sub16, AMDGPU::sub17, AMDGPU::sub18, AMDGPU::sub19,
+ AMDGPU::sub20, AMDGPU::sub21, AMDGPU::sub22, AMDGPU::sub23,
+ AMDGPU::sub24, AMDGPU::sub25, AMDGPU::sub26, AMDGPU::sub27,
+ AMDGPU::sub28, AMDGPU::sub29, AMDGPU::sub30, AMDGPU::sub31,
+ };
+
static const int16_t Sub0_15[] = {
AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
@@ -1408,6 +1651,10 @@ ArrayRef<int16_t> SIRegisterInfo::getRegSplitParts(const TargetRegisterClass *RC
AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
};
+ static const int16_t Sub0_4[] = {
+ AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, AMDGPU::sub4,
+ };
+
static const int16_t Sub0_3[] = {
AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
};
@@ -1429,16 +1676,31 @@ ArrayRef<int16_t> SIRegisterInfo::getRegSplitParts(const TargetRegisterClass *RC
return makeArrayRef(Sub0_2);
case 128:
return makeArrayRef(Sub0_3);
+ case 160:
+ return makeArrayRef(Sub0_4);
case 256:
return makeArrayRef(Sub0_7);
case 512:
return makeArrayRef(Sub0_15);
+ case 1024:
+ return makeArrayRef(Sub0_31);
default:
llvm_unreachable("unhandled register size");
}
}
if (EltSize == 8) {
+ static const int16_t Sub0_31_64[] = {
+ AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
+ AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
+ AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
+ AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
+ AMDGPU::sub16_sub17, AMDGPU::sub18_sub19,
+ AMDGPU::sub20_sub21, AMDGPU::sub22_sub23,
+ AMDGPU::sub24_sub25, AMDGPU::sub26_sub27,
+ AMDGPU::sub28_sub29, AMDGPU::sub30_sub31
+ };
+
static const int16_t Sub0_15_64[] = {
AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
@@ -1465,32 +1727,73 @@ ArrayRef<int16_t> SIRegisterInfo::getRegSplitParts(const TargetRegisterClass *RC
return makeArrayRef(Sub0_7_64);
case 512:
return makeArrayRef(Sub0_15_64);
+ case 1024:
+ return makeArrayRef(Sub0_31_64);
default:
llvm_unreachable("unhandled register size");
}
}
- assert(EltSize == 16 && "unhandled register spill split size");
+ if (EltSize == 16) {
+
+ static const int16_t Sub0_31_128[] = {
+ AMDGPU::sub0_sub1_sub2_sub3,
+ AMDGPU::sub4_sub5_sub6_sub7,
+ AMDGPU::sub8_sub9_sub10_sub11,
+ AMDGPU::sub12_sub13_sub14_sub15,
+ AMDGPU::sub16_sub17_sub18_sub19,
+ AMDGPU::sub20_sub21_sub22_sub23,
+ AMDGPU::sub24_sub25_sub26_sub27,
+ AMDGPU::sub28_sub29_sub30_sub31
+ };
+
+ static const int16_t Sub0_15_128[] = {
+ AMDGPU::sub0_sub1_sub2_sub3,
+ AMDGPU::sub4_sub5_sub6_sub7,
+ AMDGPU::sub8_sub9_sub10_sub11,
+ AMDGPU::sub12_sub13_sub14_sub15
+ };
+
+ static const int16_t Sub0_7_128[] = {
+ AMDGPU::sub0_sub1_sub2_sub3,
+ AMDGPU::sub4_sub5_sub6_sub7
+ };
- static const int16_t Sub0_15_128[] = {
- AMDGPU::sub0_sub1_sub2_sub3,
- AMDGPU::sub4_sub5_sub6_sub7,
- AMDGPU::sub8_sub9_sub10_sub11,
- AMDGPU::sub12_sub13_sub14_sub15
+ switch (AMDGPU::getRegBitWidth(*RC->MC)) {
+ case 128:
+ return {};
+ case 256:
+ return makeArrayRef(Sub0_7_128);
+ case 512:
+ return makeArrayRef(Sub0_15_128);
+ case 1024:
+ return makeArrayRef(Sub0_31_128);
+ default:
+ llvm_unreachable("unhandled register size");
+ }
+ }
+
+ assert(EltSize == 32 && "unhandled elt size");
+
+ static const int16_t Sub0_31_256[] = {
+ AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7,
+ AMDGPU::sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15,
+ AMDGPU::sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23,
+ AMDGPU::sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31
};
- static const int16_t Sub0_7_128[] = {
- AMDGPU::sub0_sub1_sub2_sub3,
- AMDGPU::sub4_sub5_sub6_sub7
+ static const int16_t Sub0_15_256[] = {
+ AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7,
+ AMDGPU::sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15
};
switch (AMDGPU::getRegBitWidth(*RC->MC)) {
- case 128:
- return {};
case 256:
- return makeArrayRef(Sub0_7_128);
+ return {};
case 512:
- return makeArrayRef(Sub0_15_128);
+ return makeArrayRef(Sub0_15_256);
+ case 1024:
+ return makeArrayRef(Sub0_31_256);
default:
llvm_unreachable("unhandled register size");
}
@@ -1512,6 +1815,13 @@ bool SIRegisterInfo::isVGPR(const MachineRegisterInfo &MRI,
return hasVGPRs(RC);
}
+bool SIRegisterInfo::isAGPR(const MachineRegisterInfo &MRI,
+ unsigned Reg) const {
+ const TargetRegisterClass * RC = getRegClassForReg(MRI, Reg);
+ assert(RC && "Register class for the reg not found");
+ return hasAGPRs(RC);
+}
+
bool SIRegisterInfo::shouldCoalesce(MachineInstr *MI,
const TargetRegisterClass *SrcRC,
unsigned SubReg,
@@ -1553,7 +1863,7 @@ unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
unsigned SIRegisterInfo::getRegPressureSetLimit(const MachineFunction &MF,
unsigned Idx) const {
- if (Idx == getVGPRPressureSet())
+ if (Idx == getVGPRPressureSet() || Idx == getAGPRPressureSet())
return getRegPressureLimit(&AMDGPU::VGPR_32RegClass,
const_cast<MachineFunction &>(MF));
@@ -1578,28 +1888,80 @@ unsigned SIRegisterInfo::getReturnAddressReg(const MachineFunction &MF) const {
}
const TargetRegisterClass *
-SIRegisterInfo::getConstrainedRegClassForOperand(const MachineOperand &MO,
+SIRegisterInfo::getRegClassForSizeOnBank(unsigned Size,
+ const RegisterBank &RB,
const MachineRegisterInfo &MRI) const {
- unsigned Size = getRegSizeInBits(MO.getReg(), MRI);
- const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg());
- if (!RB)
- return nullptr;
-
switch (Size) {
+ case 1: {
+ switch (RB.getID()) {
+ case AMDGPU::VGPRRegBankID:
+ return &AMDGPU::VGPR_32RegClass;
+ case AMDGPU::VCCRegBankID:
+ return isWave32 ?
+ &AMDGPU::SReg_32_XM0_XEXECRegClass : &AMDGPU::SReg_64_XEXECRegClass;
+ case AMDGPU::SGPRRegBankID:
+ return &AMDGPU::SReg_32_XM0RegClass;
+ case AMDGPU::SCCRegBankID:
+ // This needs to return an allocatable class, so don't bother returning
+ // the dummy SCC class.
+ return &AMDGPU::SReg_32_XM0RegClass;
+ default:
+ llvm_unreachable("unknown register bank");
+ }
+ }
case 32:
- return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VGPR_32RegClass :
- &AMDGPU::SReg_32_XM0RegClass;
+ return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VGPR_32RegClass :
+ &AMDGPU::SReg_32_XM0RegClass;
case 64:
- return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_64RegClass :
- &AMDGPU::SReg_64_XEXECRegClass;
+ return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_64RegClass :
+ &AMDGPU::SReg_64_XEXECRegClass;
case 96:
- return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_96RegClass :
- nullptr;
+ return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_96RegClass :
+ &AMDGPU::SReg_96RegClass;
case 128:
- return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_128RegClass :
- &AMDGPU::SReg_128RegClass;
+ return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_128RegClass :
+ &AMDGPU::SReg_128RegClass;
+ case 160:
+ return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_160RegClass :
+ &AMDGPU::SReg_160RegClass;
+ case 256:
+ return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_256RegClass :
+ &AMDGPU::SReg_256RegClass;
+ case 512:
+ return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_512RegClass :
+ &AMDGPU::SReg_512RegClass;
+ default:
+ if (Size < 32)
+ return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VGPR_32RegClass :
+ &AMDGPU::SReg_32_XM0RegClass;
+ return nullptr;
+ }
+}
+
+const TargetRegisterClass *
+SIRegisterInfo::getConstrainedRegClassForOperand(const MachineOperand &MO,
+ const MachineRegisterInfo &MRI) const {
+ if (const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg()))
+ return getRegClassForTypeOnBank(MRI.getType(MO.getReg()), *RB, MRI);
+ return nullptr;
+}
+
+unsigned SIRegisterInfo::getVCC() const {
+ return isWave32 ? AMDGPU::VCC_LO : AMDGPU::VCC;
+}
+
+const TargetRegisterClass *
+SIRegisterInfo::getRegClass(unsigned RCID) const {
+ switch ((int)RCID) {
+ case AMDGPU::SReg_1RegClassID:
+ return getBoolRC();
+ case AMDGPU::SReg_1_XEXECRegClassID:
+ return isWave32 ? &AMDGPU::SReg_32_XM0_XEXECRegClass
+ : &AMDGPU::SReg_64_XEXECRegClass;
+ case -1:
+ return nullptr;
default:
- llvm_unreachable("not implemented");
+ return AMDGPURegisterInfo::getRegClass(RCID);
}
}
diff --git a/lib/Target/AMDGPU/SIRegisterInfo.h b/lib/Target/AMDGPU/SIRegisterInfo.h
index b82fefde47e1..34487c96e72e 100644
--- a/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -1,9 +1,8 @@
//===-- SIRegisterInfo.h - SI Register Info Interface ----------*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -30,10 +29,13 @@ class SIRegisterInfo final : public AMDGPURegisterInfo {
private:
unsigned SGPRSetID;
unsigned VGPRSetID;
+ unsigned AGPRSetID;
BitVector SGPRPressureSets;
BitVector VGPRPressureSets;
+ BitVector AGPRPressureSets;
bool SpillSGPRToVGPR;
bool SpillSGPRToSMEM;
+ bool isWave32;
void classifyPressureSet(unsigned PSetID, unsigned Reg,
BitVector &PressureSets) const;
@@ -57,8 +59,6 @@ public:
unsigned reservedPrivateSegmentWaveByteOffsetReg(
const MachineFunction &MF) const;
- unsigned reservedStackPtrOffsetReg(const MachineFunction &MF) const;
-
BitVector getReservedRegs(const MachineFunction &MF) const override;
const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
@@ -72,8 +72,9 @@ public:
return 100;
}
- unsigned getFrameRegister(const MachineFunction &MF) const override;
+ Register getFrameRegister(const MachineFunction &MF) const override;
+ bool canRealignStack(const MachineFunction &MF) const override;
bool requiresRegisterScavenging(const MachineFunction &Fn) const override;
bool requiresFrameIndexScavenging(const MachineFunction &MF) const override;
@@ -130,7 +131,7 @@ public:
/// \returns true if this class contains only SGPR registers
bool isSGPRClass(const TargetRegisterClass *RC) const {
- return !hasVGPRs(RC);
+ return !hasVGPRs(RC) && !hasAGPRs(RC);
}
/// \returns true if this class ID contains only SGPR registers
@@ -150,10 +151,22 @@ public:
/// \returns true if this class contains VGPR registers.
bool hasVGPRs(const TargetRegisterClass *RC) const;
+ /// \returns true if this class contains AGPR registers.
+ bool hasAGPRs(const TargetRegisterClass *RC) const;
+
+ /// \returns true if this class contains any vector registers.
+ bool hasVectorRegisters(const TargetRegisterClass *RC) const {
+ return hasVGPRs(RC) || hasAGPRs(RC);
+ }
+
/// \returns A VGPR reg class with the same width as \p SRC
const TargetRegisterClass *getEquivalentVGPRClass(
const TargetRegisterClass *SRC) const;
+ /// \returns An AGPR reg class with the same width as \p SRC
+ const TargetRegisterClass *getEquivalentAGPRClass(
+ const TargetRegisterClass *SRC) const;
+
/// \returns A SGPR reg class with the same width as \p SRC
const TargetRegisterClass *getEquivalentSGPRClass(
const TargetRegisterClass *VRC) const;
@@ -191,16 +204,32 @@ public:
unsigned getSGPRPressureSet() const { return SGPRSetID; };
unsigned getVGPRPressureSet() const { return VGPRSetID; };
+ unsigned getAGPRPressureSet() const { return AGPRSetID; };
const TargetRegisterClass *getRegClassForReg(const MachineRegisterInfo &MRI,
unsigned Reg) const;
bool isVGPR(const MachineRegisterInfo &MRI, unsigned Reg) const;
+ bool isAGPR(const MachineRegisterInfo &MRI, unsigned Reg) const;
+ bool isVectorRegister(const MachineRegisterInfo &MRI, unsigned Reg) const {
+ return isVGPR(MRI, Reg) || isAGPR(MRI, Reg);
+ }
+
+ virtual bool
+ isDivergentRegClass(const TargetRegisterClass *RC) const override {
+ return !isSGPRClass(RC);
+ }
bool isSGPRPressureSet(unsigned SetID) const {
- return SGPRPressureSets.test(SetID) && !VGPRPressureSets.test(SetID);
+ return SGPRPressureSets.test(SetID) && !VGPRPressureSets.test(SetID) &&
+ !AGPRPressureSets.test(SetID);
}
bool isVGPRPressureSet(unsigned SetID) const {
- return VGPRPressureSets.test(SetID) && !SGPRPressureSets.test(SetID);
+ return VGPRPressureSets.test(SetID) && !SGPRPressureSets.test(SetID) &&
+ !AGPRPressureSets.test(SetID);
+ }
+ bool isAGPRPressureSet(unsigned SetID) const {
+ return AGPRPressureSets.test(SetID) && !SGPRPressureSets.test(SetID) &&
+ !VGPRPressureSets.test(SetID);
}
ArrayRef<int16_t> getRegSplitParts(const TargetRegisterClass *RC,
@@ -225,15 +254,44 @@ public:
unsigned getReturnAddressReg(const MachineFunction &MF) const;
const TargetRegisterClass *
+ getRegClassForSizeOnBank(unsigned Size,
+ const RegisterBank &Bank,
+ const MachineRegisterInfo &MRI) const;
+
+ const TargetRegisterClass *
+ getRegClassForTypeOnBank(LLT Ty,
+ const RegisterBank &Bank,
+ const MachineRegisterInfo &MRI) const {
+ return getRegClassForSizeOnBank(Ty.getSizeInBits(), Bank, MRI);
+ }
+
+ const TargetRegisterClass *
getConstrainedRegClassForOperand(const MachineOperand &MO,
const MachineRegisterInfo &MRI) const override;
+ const TargetRegisterClass *getBoolRC() const {
+ return isWave32 ? &AMDGPU::SReg_32_XM0RegClass
+ : &AMDGPU::SReg_64RegClass;
+ }
+
+ const TargetRegisterClass *getWaveMaskRegClass() const {
+ return isWave32 ? &AMDGPU::SReg_32_XM0_XEXECRegClass
+ : &AMDGPU::SReg_64_XEXECRegClass;
+ }
+
+ unsigned getVCC() const;
+
+ const TargetRegisterClass *getRegClass(unsigned RCID) const;
+
// Find reaching register definition
MachineInstr *findReachingDef(unsigned Reg, unsigned SubReg,
MachineInstr &Use,
MachineRegisterInfo &MRI,
LiveIntervals *LIS) const;
+ const uint32_t *getAllVGPRRegMask() const;
+ const uint32_t *getAllAllocatableSRegMask() const;
+
private:
void buildSpillLoadStore(MachineBasicBlock::iterator MI,
unsigned LoadStoreOp,
diff --git a/lib/Target/AMDGPU/SIRegisterInfo.td b/lib/Target/AMDGPU/SIRegisterInfo.td
index c625ecc9b750..d5948a7862cc 100644
--- a/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -1,9 +1,8 @@
//===-- SIRegisterInfo.td - SI Register defs ---------------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -15,43 +14,86 @@ class getSubRegs<int size> {
list<SubRegIndex> ret2 = [sub0, sub1];
list<SubRegIndex> ret3 = [sub0, sub1, sub2];
list<SubRegIndex> ret4 = [sub0, sub1, sub2, sub3];
+ list<SubRegIndex> ret5 = [sub0, sub1, sub2, sub3, sub4];
list<SubRegIndex> ret8 = [sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7];
list<SubRegIndex> ret16 = [sub0, sub1, sub2, sub3,
sub4, sub5, sub6, sub7,
sub8, sub9, sub10, sub11,
sub12, sub13, sub14, sub15];
+ list<SubRegIndex> ret32 = [sub0, sub1, sub2, sub3,
+ sub4, sub5, sub6, sub7,
+ sub8, sub9, sub10, sub11,
+ sub12, sub13, sub14, sub15,
+ sub16, sub17, sub18, sub19,
+ sub20, sub21, sub22, sub23,
+ sub24, sub25, sub26, sub27,
+ sub28, sub29, sub30, sub31];
list<SubRegIndex> ret = !if(!eq(size, 2), ret2,
!if(!eq(size, 3), ret3,
!if(!eq(size, 4), ret4,
- !if(!eq(size, 8), ret8, ret16))));
+ !if(!eq(size, 5), ret5,
+ !if(!eq(size, 8), ret8,
+ !if(!eq(size, 16), ret16, ret32))))));
+}
+
+let Namespace = "AMDGPU" in {
+defset list<RegAltNameIndex> AllRegAltNameIndices = {
+ def Reg32 : RegAltNameIndex;
+ def Reg64 : RegAltNameIndex;
+ def Reg96 : RegAltNameIndex;
+ def Reg128 : RegAltNameIndex;
+ def Reg160 : RegAltNameIndex;
+ def Reg256 : RegAltNameIndex;
+ def Reg512 : RegAltNameIndex;
+ def Reg1024 : RegAltNameIndex;
+}
}
//===----------------------------------------------------------------------===//
// Declarations that describe the SI registers
//===----------------------------------------------------------------------===//
-class SIReg <string n, bits<16> regIdx = 0> : Register<n>,
+class SIReg <string n, bits<16> regIdx = 0, string prefix = "",
+ int regNo = !cast<int>(regIdx)> :
+ Register<n, !if(!eq(prefix, ""),
+ [ n, n, n, n, n, n, n, n ],
+ [ prefix # regNo,
+ prefix # "[" # regNo # ":" # !and(!add(regNo, 1), 255) # "]",
+ prefix # "[" # regNo # ":" # !and(!add(regNo, 2), 255) # "]",
+ prefix # "[" # regNo # ":" # !and(!add(regNo, 3), 255) # "]",
+ prefix # "[" # regNo # ":" # !and(!add(regNo, 4), 255) # "]",
+ prefix # "[" # regNo # ":" # !and(!add(regNo, 7), 255) # "]",
+ prefix # "[" # regNo # ":" # !and(!add(regNo, 15), 255) # "]",
+ prefix # "[" # regNo # ":" # !and(!add(regNo, 31), 255) # "]",
+ ])>,
DwarfRegNum<[!cast<int>(HWEncoding)]> {
let Namespace = "AMDGPU";
+ let RegAltNameIndices = AllRegAltNameIndices;
// This is the not yet the complete register encoding. An additional
// bit is set for VGPRs.
let HWEncoding = regIdx;
}
+class SIRegisterWithSubRegs<string n, list<Register> subregs> :
+ RegisterWithSubRegs<n, subregs> {
+ let RegAltNameIndices = AllRegAltNameIndices;
+ let AltNames = [ n, n, n, n, n, n, n, n ];
+}
+
// Special Registers
def VCC_LO : SIReg<"vcc_lo", 106>;
def VCC_HI : SIReg<"vcc_hi", 107>;
// Pseudo-registers: Used as placeholders during isel and immediately
// replaced, never seeing the verifier.
-def PRIVATE_RSRC_REG : SIReg<"", 0>;
-def FP_REG : SIReg<"", 0>;
-def SP_REG : SIReg<"", 0>;
-def SCRATCH_WAVE_OFFSET_REG : SIReg<"", 0>;
+def PRIVATE_RSRC_REG : SIReg<"private_rsrc", 0>;
+def FP_REG : SIReg<"fp", 0>;
+def SP_REG : SIReg<"sp", 0>;
+def SCRATCH_WAVE_OFFSET_REG : SIReg<"scratch_wave_offset", 0>;
// VCC for 64-bit instructions
-def VCC : RegisterWithSubRegs<"vcc", [VCC_LO, VCC_HI]>,
+def VCC : SIRegisterWithSubRegs<"vcc", [VCC_LO, VCC_HI]>,
DwarfRegAlias<VCC_LO> {
let Namespace = "AMDGPU";
let SubRegIndices = [sub0, sub1];
@@ -61,25 +103,38 @@ def VCC : RegisterWithSubRegs<"vcc", [VCC_LO, VCC_HI]>,
def EXEC_LO : SIReg<"exec_lo", 126>;
def EXEC_HI : SIReg<"exec_hi", 127>;
-def EXEC : RegisterWithSubRegs<"EXEC", [EXEC_LO, EXEC_HI]>,
+def EXEC : SIRegisterWithSubRegs<"exec", [EXEC_LO, EXEC_HI]>,
DwarfRegAlias<EXEC_LO> {
let Namespace = "AMDGPU";
let SubRegIndices = [sub0, sub1];
let HWEncoding = 126;
}
-def SCC : SIReg<"scc", 253>;
+// 32-bit real registers, for MC only.
+// May be used with both 32-bit and 64-bit operands.
+def SRC_VCCZ : SIReg<"src_vccz", 251>;
+def SRC_EXECZ : SIReg<"src_execz", 252>;
+def SRC_SCC : SIReg<"src_scc", 253>;
+
+// 1-bit pseudo register, for codegen only.
+// Should never be emitted.
+def SCC : SIReg<"scc">;
+
def M0 : SIReg <"m0", 124>;
+def SGPR_NULL : SIReg<"null", 125>;
def SRC_SHARED_BASE : SIReg<"src_shared_base", 235>;
def SRC_SHARED_LIMIT : SIReg<"src_shared_limit", 236>;
def SRC_PRIVATE_BASE : SIReg<"src_private_base", 237>;
def SRC_PRIVATE_LIMIT : SIReg<"src_private_limit", 238>;
+def SRC_POPS_EXITING_WAVE_ID : SIReg<"src_pops_exiting_wave_id", 239>;
+
+def LDS_DIRECT : SIReg <"src_lds_direct", 254>;
def XNACK_MASK_LO : SIReg<"xnack_mask_lo", 104>;
def XNACK_MASK_HI : SIReg<"xnack_mask_hi", 105>;
-def XNACK_MASK : RegisterWithSubRegs<"xnack_mask", [XNACK_MASK_LO, XNACK_MASK_HI]>,
+def XNACK_MASK : SIRegisterWithSubRegs<"xnack_mask", [XNACK_MASK_LO, XNACK_MASK_HI]>,
DwarfRegAlias<XNACK_MASK_LO> {
let Namespace = "AMDGPU";
let SubRegIndices = [sub0, sub1];
@@ -90,7 +145,7 @@ def XNACK_MASK : RegisterWithSubRegs<"xnack_mask", [XNACK_MASK_LO, XNACK_MASK_HI
def TBA_LO : SIReg<"tba_lo", 108>;
def TBA_HI : SIReg<"tba_hi", 109>;
-def TBA : RegisterWithSubRegs<"tba", [TBA_LO, TBA_HI]>,
+def TBA : SIRegisterWithSubRegs<"tba", [TBA_LO, TBA_HI]>,
DwarfRegAlias<TBA_LO> {
let Namespace = "AMDGPU";
let SubRegIndices = [sub0, sub1];
@@ -100,7 +155,7 @@ def TBA : RegisterWithSubRegs<"tba", [TBA_LO, TBA_HI]>,
def TMA_LO : SIReg<"tma_lo", 110>;
def TMA_HI : SIReg<"tma_hi", 111>;
-def TMA : RegisterWithSubRegs<"tma", [TMA_LO, TMA_HI]>,
+def TMA : SIRegisterWithSubRegs<"tma", [TMA_LO, TMA_HI]>,
DwarfRegAlias<TMA_LO> {
let Namespace = "AMDGPU";
let SubRegIndices = [sub0, sub1];
@@ -108,19 +163,19 @@ def TMA : RegisterWithSubRegs<"tma", [TMA_LO, TMA_HI]>,
}
foreach Index = 0-15 in {
- def TTMP#Index#_vi : SIReg<"ttmp"#Index, !add(112, Index)>;
- def TTMP#Index#_gfx9 : SIReg<"ttmp"#Index, !add(108, Index)>;
- def TTMP#Index : SIReg<"", 0>;
+ def TTMP#Index#_vi : SIReg<"ttmp"#Index, !add(112, Index)>;
+ def TTMP#Index#_gfx9_gfx10 : SIReg<"ttmp"#Index, !add(108, Index)>;
+ def TTMP#Index : SIReg<"ttmp"#Index, 0>;
}
multiclass FLAT_SCR_LOHI_m <string n, bits<16> ci_e, bits<16> vi_e> {
def _ci : SIReg<n, ci_e>;
def _vi : SIReg<n, vi_e>;
- def "" : SIReg<"", 0>;
+ def "" : SIReg<n, 0>;
}
class FlatReg <Register lo, Register hi, bits<16> encoding> :
- RegisterWithSubRegs<"flat_scratch", [lo, hi]>,
+ SIRegisterWithSubRegs<"flat_scratch", [lo, hi]>,
DwarfRegAlias<lo> {
let Namespace = "AMDGPU";
let SubRegIndices = [sub0, sub1];
@@ -135,13 +190,20 @@ def FLAT_SCR_vi : FlatReg<FLAT_SCR_LO_vi, FLAT_SCR_HI_vi, 102>;
def FLAT_SCR : FlatReg<FLAT_SCR_LO, FLAT_SCR_HI, 0>;
// SGPR registers
-foreach Index = 0-103 in {
- def SGPR#Index : SIReg <"SGPR"#Index, Index>;
+foreach Index = 0-105 in {
+ def SGPR#Index : SIReg <"SGPR"#Index, Index, "s">;
}
// VGPR registers
foreach Index = 0-255 in {
- def VGPR#Index : SIReg <"VGPR"#Index, Index> {
+ def VGPR#Index : SIReg <"VGPR"#Index, Index, "v"> {
+ let HWEncoding{8} = 1;
+ }
+}
+
+// AccVGPR registers
+foreach Index = 0-255 in {
+ def AGPR#Index : SIReg <"AGPR"#Index, Index, "a"> {
let HWEncoding{8} = 1;
}
}
@@ -164,10 +226,10 @@ def M0_CLASS : RegisterClass<"AMDGPU", [i32], 32, (add M0)> {
// SGPR 32-bit registers
def SGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
- (add (sequence "SGPR%u", 0, 103))> {
+ (add (sequence "SGPR%u", 0, 105)), Reg32> {
// Give all SGPR classes higher priority than VGPR classes, because
// we want to spill SGPRs to VGPRs.
- let AllocationPriority = 7;
+ let AllocationPriority = 9;
}
// SGPR 64-bit registers
@@ -175,6 +237,12 @@ def SGPR_64Regs : RegisterTuples<getSubRegs<2>.ret,
[(add (decimate SGPR_32, 2)),
(add (decimate (shl SGPR_32, 1), 2))]>;
+// SGPR 96-bit registers. No operations use these, but for symmetry with 96-bit VGPRs.
+def SGPR_96Regs : RegisterTuples<getSubRegs<3>.ret,
+ [(add (decimate SGPR_32, 3)),
+ (add (decimate (shl SGPR_32, 1), 3)),
+ (add (decimate (shl SGPR_32, 2), 3))]>;
+
// SGPR 128-bit registers
def SGPR_128Regs : RegisterTuples<getSubRegs<4>.ret,
[(add (decimate SGPR_32, 4)),
@@ -182,6 +250,14 @@ def SGPR_128Regs : RegisterTuples<getSubRegs<4>.ret,
(add (decimate (shl SGPR_32, 2), 4)),
(add (decimate (shl SGPR_32, 3), 4))]>;
+// SGPR 160-bit registers. No operations use these, but for symmetry with 160-bit VGPRs.
+def SGPR_160Regs : RegisterTuples<getSubRegs<5>.ret,
+ [(add (decimate SGPR_32, 4)),
+ (add (decimate (shl SGPR_32, 1), 4)),
+ (add (decimate (shl SGPR_32, 2), 4)),
+ (add (decimate (shl SGPR_32, 3), 4)),
+ (add (decimate (shl SGPR_32, 4), 4))]>;
+
// SGPR 256-bit registers
def SGPR_256Regs : RegisterTuples<getSubRegs<8>.ret,
[(add (decimate SGPR_32, 4)),
@@ -212,6 +288,41 @@ def SGPR_512Regs : RegisterTuples<getSubRegs<16>.ret,
(add (decimate (shl SGPR_32, 14), 4)),
(add (decimate (shl SGPR_32, 15), 4))]>;
+// SGPR 1024-bit registers
+def SGPR_1024Regs : RegisterTuples<getSubRegs<32>.ret,
+ [(add (decimate SGPR_32, 4)),
+ (add (decimate (shl SGPR_32, 1), 4)),
+ (add (decimate (shl SGPR_32, 2), 4)),
+ (add (decimate (shl SGPR_32, 3), 4)),
+ (add (decimate (shl SGPR_32, 4), 4)),
+ (add (decimate (shl SGPR_32, 5), 4)),
+ (add (decimate (shl SGPR_32, 6), 4)),
+ (add (decimate (shl SGPR_32, 7), 4)),
+ (add (decimate (shl SGPR_32, 8), 4)),
+ (add (decimate (shl SGPR_32, 9), 4)),
+ (add (decimate (shl SGPR_32, 10), 4)),
+ (add (decimate (shl SGPR_32, 11), 4)),
+ (add (decimate (shl SGPR_32, 12), 4)),
+ (add (decimate (shl SGPR_32, 13), 4)),
+ (add (decimate (shl SGPR_32, 14), 4)),
+ (add (decimate (shl SGPR_32, 15), 4)),
+ (add (decimate (shl SGPR_32, 16), 4)),
+ (add (decimate (shl SGPR_32, 17), 4)),
+ (add (decimate (shl SGPR_32, 18), 4)),
+ (add (decimate (shl SGPR_32, 19), 4)),
+ (add (decimate (shl SGPR_32, 20), 4)),
+ (add (decimate (shl SGPR_32, 21), 4)),
+ (add (decimate (shl SGPR_32, 22), 4)),
+ (add (decimate (shl SGPR_32, 23), 4)),
+ (add (decimate (shl SGPR_32, 24), 4)),
+ (add (decimate (shl SGPR_32, 25), 4)),
+ (add (decimate (shl SGPR_32, 26), 4)),
+ (add (decimate (shl SGPR_32, 27), 4)),
+ (add (decimate (shl SGPR_32, 28), 4)),
+ (add (decimate (shl SGPR_32, 29), 4)),
+ (add (decimate (shl SGPR_32, 30), 4)),
+ (add (decimate (shl SGPR_32, 31), 4))]>;
+
// Trap handler TMP 32-bit registers
def TTMP_32 : RegisterClass<"AMDGPU", [i32, f32, v2i16, v2f16], 32,
(add (sequence "TTMP%u", 0, 15))> {
@@ -263,7 +374,7 @@ class TmpRegTuplesBase<int index, int size,
list<SubRegIndex> indices = getSubRegs<size>.ret,
int index1 = !add(index, !add(size, -1)),
string name = "ttmp["#index#":"#index1#"]"> :
- RegisterWithSubRegs<name, subRegs> {
+ SIRegisterWithSubRegs<name, subRegs> {
let HWEncoding = subRegs[0].HWEncoding;
let SubRegIndices = indices;
}
@@ -293,8 +404,8 @@ class TmpRegTuples<string tgt,
getSubRegs<size>.ret>;
foreach Index = {0, 2, 4, 6, 8, 10, 12, 14} in {
- def TTMP#Index#_TTMP#!add(Index,1)#_vi : TmpRegTuples<"_vi", 2, Index>;
- def TTMP#Index#_TTMP#!add(Index,1)#_gfx9 : TmpRegTuples<"_gfx9", 2, Index>;
+ def TTMP#Index#_TTMP#!add(Index,1)#_vi : TmpRegTuples<"_vi", 2, Index>;
+ def TTMP#Index#_TTMP#!add(Index,1)#_gfx9_gfx10 : TmpRegTuples<"_gfx9_gfx10", 2, Index>;
}
foreach Index = {0, 4, 8, 12} in {
@@ -303,7 +414,7 @@ foreach Index = {0, 4, 8, 12} in {
_TTMP#!add(Index,3)#_vi : TmpRegTuples<"_vi", 4, Index>;
def TTMP#Index#_TTMP#!add(Index,1)#
_TTMP#!add(Index,2)#
- _TTMP#!add(Index,3)#_gfx9 : TmpRegTuples<"_gfx9", 4, Index>;
+ _TTMP#!add(Index,3)#_gfx9_gfx10 : TmpRegTuples<"_gfx9_gfx10", 4, Index>;
}
foreach Index = {0, 4, 8} in {
@@ -320,7 +431,7 @@ foreach Index = {0, 4, 8} in {
_TTMP#!add(Index,4)#
_TTMP#!add(Index,5)#
_TTMP#!add(Index,6)#
- _TTMP#!add(Index,7)#_gfx9 : TmpRegTuples<"_gfx9", 8, Index>;
+ _TTMP#!add(Index,7)#_gfx9_gfx10 : TmpRegTuples<"_gfx9_gfx10", 8, Index>;
}
def TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15_vi :
@@ -330,18 +441,17 @@ def TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TT
TTMP8_vi, TTMP9_vi, TTMP10_vi, TTMP11_vi,
TTMP12_vi, TTMP13_vi, TTMP14_vi, TTMP15_vi]>;
-def TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15_gfx9 :
+def TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15_gfx9_gfx10 :
TmpRegTuplesBase<0, 16,
- [TTMP0_gfx9, TTMP1_gfx9, TTMP2_gfx9, TTMP3_gfx9,
- TTMP4_gfx9, TTMP5_gfx9, TTMP6_gfx9, TTMP7_gfx9,
- TTMP8_gfx9, TTMP9_gfx9, TTMP10_gfx9, TTMP11_gfx9,
- TTMP12_gfx9, TTMP13_gfx9, TTMP14_gfx9, TTMP15_gfx9]>;
-
+ [TTMP0_gfx9_gfx10, TTMP1_gfx9_gfx10, TTMP2_gfx9_gfx10, TTMP3_gfx9_gfx10,
+ TTMP4_gfx9_gfx10, TTMP5_gfx9_gfx10, TTMP6_gfx9_gfx10, TTMP7_gfx9_gfx10,
+ TTMP8_gfx9_gfx10, TTMP9_gfx9_gfx10, TTMP10_gfx9_gfx10, TTMP11_gfx9_gfx10,
+ TTMP12_gfx9_gfx10, TTMP13_gfx9_gfx10, TTMP14_gfx9_gfx10, TTMP15_gfx9_gfx10]>;
// VGPR 32-bit registers
// i16/f16 only on VI+
def VGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
- (add (sequence "VGPR%u", 0, 255))> {
+ (add (sequence "VGPR%u", 0, 255)), Reg32> {
let AllocationPriority = 1;
let Size = 32;
}
@@ -364,6 +474,14 @@ def VGPR_128 : RegisterTuples<getSubRegs<4>.ret,
(add (shl VGPR_32, 2)),
(add (shl VGPR_32, 3))]>;
+// VGPR 160-bit registers
+def VGPR_160 : RegisterTuples<getSubRegs<5>.ret,
+ [(add (trunc VGPR_32, 252)),
+ (add (shl VGPR_32, 1)),
+ (add (shl VGPR_32, 2)),
+ (add (shl VGPR_32, 3)),
+ (add (shl VGPR_32, 4))]>;
+
// VGPR 256-bit registers
def VGPR_256 : RegisterTuples<getSubRegs<8>.ret,
[(add (trunc VGPR_32, 249)),
@@ -394,88 +512,257 @@ def VGPR_512 : RegisterTuples<getSubRegs<16>.ret,
(add (shl VGPR_32, 14)),
(add (shl VGPR_32, 15))]>;
+// VGPR 1024-bit registers
+def VGPR_1024 : RegisterTuples<getSubRegs<32>.ret,
+ [(add (trunc VGPR_32, 225)),
+ (add (shl VGPR_32, 1)),
+ (add (shl VGPR_32, 2)),
+ (add (shl VGPR_32, 3)),
+ (add (shl VGPR_32, 4)),
+ (add (shl VGPR_32, 5)),
+ (add (shl VGPR_32, 6)),
+ (add (shl VGPR_32, 7)),
+ (add (shl VGPR_32, 8)),
+ (add (shl VGPR_32, 9)),
+ (add (shl VGPR_32, 10)),
+ (add (shl VGPR_32, 11)),
+ (add (shl VGPR_32, 12)),
+ (add (shl VGPR_32, 13)),
+ (add (shl VGPR_32, 14)),
+ (add (shl VGPR_32, 15)),
+ (add (shl VGPR_32, 16)),
+ (add (shl VGPR_32, 17)),
+ (add (shl VGPR_32, 18)),
+ (add (shl VGPR_32, 19)),
+ (add (shl VGPR_32, 20)),
+ (add (shl VGPR_32, 21)),
+ (add (shl VGPR_32, 22)),
+ (add (shl VGPR_32, 23)),
+ (add (shl VGPR_32, 24)),
+ (add (shl VGPR_32, 25)),
+ (add (shl VGPR_32, 26)),
+ (add (shl VGPR_32, 27)),
+ (add (shl VGPR_32, 28)),
+ (add (shl VGPR_32, 29)),
+ (add (shl VGPR_32, 30)),
+ (add (shl VGPR_32, 31))]>;
+
+// AccVGPR 32-bit registers
+def AGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
+ (add (sequence "AGPR%u", 0, 255)), Reg32> {
+ let AllocationPriority = 1;
+ let Size = 32;
+}
+
+// AGPR 64-bit registers
+def AGPR_64 : RegisterTuples<getSubRegs<2>.ret,
+ [(add (trunc AGPR_32, 255)),
+ (add (shl AGPR_32, 1))]>;
+
+// AGPR 128-bit registers
+def AGPR_128 : RegisterTuples<getSubRegs<4>.ret,
+ [(add (trunc AGPR_32, 253)),
+ (add (shl AGPR_32, 1)),
+ (add (shl AGPR_32, 2)),
+ (add (shl AGPR_32, 3))]>;
+
+// AGPR 512-bit registers
+def AGPR_512 : RegisterTuples<getSubRegs<16>.ret,
+ [(add (trunc AGPR_32, 241)),
+ (add (shl AGPR_32, 1)),
+ (add (shl AGPR_32, 2)),
+ (add (shl AGPR_32, 3)),
+ (add (shl AGPR_32, 4)),
+ (add (shl AGPR_32, 5)),
+ (add (shl AGPR_32, 6)),
+ (add (shl AGPR_32, 7)),
+ (add (shl AGPR_32, 8)),
+ (add (shl AGPR_32, 9)),
+ (add (shl AGPR_32, 10)),
+ (add (shl AGPR_32, 11)),
+ (add (shl AGPR_32, 12)),
+ (add (shl AGPR_32, 13)),
+ (add (shl AGPR_32, 14)),
+ (add (shl AGPR_32, 15))]>;
+
+// AGPR 1024-bit registers
+def AGPR_1024 : RegisterTuples<getSubRegs<32>.ret,
+ [(add (trunc AGPR_32, 225)),
+ (add (shl AGPR_32, 1)),
+ (add (shl AGPR_32, 2)),
+ (add (shl AGPR_32, 3)),
+ (add (shl AGPR_32, 4)),
+ (add (shl AGPR_32, 5)),
+ (add (shl AGPR_32, 6)),
+ (add (shl AGPR_32, 7)),
+ (add (shl AGPR_32, 8)),
+ (add (shl AGPR_32, 9)),
+ (add (shl AGPR_32, 10)),
+ (add (shl AGPR_32, 11)),
+ (add (shl AGPR_32, 12)),
+ (add (shl AGPR_32, 13)),
+ (add (shl AGPR_32, 14)),
+ (add (shl AGPR_32, 15)),
+ (add (shl AGPR_32, 16)),
+ (add (shl AGPR_32, 17)),
+ (add (shl AGPR_32, 18)),
+ (add (shl AGPR_32, 19)),
+ (add (shl AGPR_32, 20)),
+ (add (shl AGPR_32, 21)),
+ (add (shl AGPR_32, 22)),
+ (add (shl AGPR_32, 23)),
+ (add (shl AGPR_32, 24)),
+ (add (shl AGPR_32, 25)),
+ (add (shl AGPR_32, 26)),
+ (add (shl AGPR_32, 27)),
+ (add (shl AGPR_32, 28)),
+ (add (shl AGPR_32, 29)),
+ (add (shl AGPR_32, 30)),
+ (add (shl AGPR_32, 31))]>;
+
//===----------------------------------------------------------------------===//
// Register classes used as source and destination
//===----------------------------------------------------------------------===//
def Pseudo_SReg_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
- (add FP_REG, SP_REG, SCRATCH_WAVE_OFFSET_REG)> {
+ (add FP_REG, SP_REG, SCRATCH_WAVE_OFFSET_REG), Reg32> {
let isAllocatable = 0;
let CopyCost = -1;
}
def Pseudo_SReg_128 : RegisterClass<"AMDGPU", [v4i32, v2i64, v2f64], 32,
- (add PRIVATE_RSRC_REG)> {
+ (add PRIVATE_RSRC_REG), Reg128> {
+ let isAllocatable = 0;
+ let CopyCost = -1;
+}
+
+def LDS_DIRECT_CLASS : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
+ (add LDS_DIRECT), Reg32> {
let isAllocatable = 0;
let CopyCost = -1;
}
// Subset of SReg_32 without M0 for SMRD instructions and alike.
// See comments in SIInstructions.td for more info.
-def SReg_32_XM0_XEXEC : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
+def SReg_32_XM0_XEXEC : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32,
(add SGPR_32, VCC_LO, VCC_HI, FLAT_SCR_LO, FLAT_SCR_HI, XNACK_MASK_LO, XNACK_MASK_HI,
- TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI, SRC_SHARED_BASE, SRC_SHARED_LIMIT,
- SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT)> {
- let AllocationPriority = 7;
+ SGPR_NULL, TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI, SRC_SHARED_BASE, SRC_SHARED_LIMIT,
+ SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT, SRC_POPS_EXITING_WAVE_ID,
+ SRC_VCCZ, SRC_EXECZ, SRC_SCC), Reg32> {
+ let AllocationPriority = 10;
}
-def SReg_32_XEXEC_HI : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
- (add SReg_32_XM0_XEXEC, EXEC_LO, M0_CLASS)> {
- let AllocationPriority = 7;
+def SReg_32_XEXEC_HI : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32,
+ (add SReg_32_XM0_XEXEC, EXEC_LO, M0_CLASS), Reg32> {
+ let AllocationPriority = 10;
}
-def SReg_32_XM0 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
- (add SReg_32_XM0_XEXEC, EXEC_LO, EXEC_HI)> {
- let AllocationPriority = 7;
+def SReg_32_XM0 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32,
+ (add SReg_32_XM0_XEXEC, EXEC_LO, EXEC_HI), Reg32> {
+ let AllocationPriority = 10;
}
// Register class for all scalar registers (SGPRs + Special Registers)
-def SReg_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
- (add SReg_32_XM0, M0_CLASS, EXEC_LO, EXEC_HI, SReg_32_XEXEC_HI)> {
- let AllocationPriority = 7;
+def SReg_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32,
+ (add SReg_32_XM0, M0_CLASS, EXEC_LO, EXEC_HI, SReg_32_XEXEC_HI), Reg32> {
+ let AllocationPriority = 10;
+}
+
+def SRegOrLds_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32,
+ (add SReg_32_XM0, M0_CLASS, EXEC_LO, EXEC_HI, SReg_32_XEXEC_HI, LDS_DIRECT_CLASS),
+ Reg32> {
+ let isAllocatable = 0;
}
-def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, v4i16, v4f16], 32, (add SGPR_64Regs)> {
+def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, v4i16, v4f16], 32,
+ (add SGPR_64Regs), Reg64> {
let CopyCost = 1;
- let AllocationPriority = 8;
+ let AllocationPriority = 11;
+}
+
+// CCR (call clobbered registers) SGPR 64-bit registers
+def CCR_SGPR_64 : RegisterClass<"AMDGPU", SGPR_64.RegTypes, 32,
+ (add (trunc SGPR_64, 16)), Reg64> {
+ let CopyCost = SGPR_64.CopyCost;
+ let AllocationPriority = SGPR_64.AllocationPriority;
}
-def TTMP_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64, v4i16, v4f16], 32, (add TTMP_64Regs)> {
+def TTMP_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64, v4i16, v4f16], 32,
+ (add TTMP_64Regs)> {
let isAllocatable = 0;
}
def SReg_64_XEXEC : RegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16], 32,
- (add SGPR_64, VCC, FLAT_SCR, XNACK_MASK, TTMP_64, TBA, TMA)> {
+ (add SGPR_64, VCC, FLAT_SCR, XNACK_MASK, TTMP_64, TBA, TMA), Reg64> {
let CopyCost = 1;
- let AllocationPriority = 8;
+ let AllocationPriority = 13;
}
def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16], 32,
- (add SReg_64_XEXEC, EXEC)> {
+ (add SReg_64_XEXEC, EXEC), Reg64> {
let CopyCost = 1;
- let AllocationPriority = 8;
+ let AllocationPriority = 13;
+}
+
+def SReg_1_XEXEC : RegisterClass<"AMDGPU", [i1], 32,
+ (add SReg_64_XEXEC, SReg_32_XM0_XEXEC)> {
+ let CopyCost = 1;
+ let isAllocatable = 0;
+}
+
+def SReg_1 : RegisterClass<"AMDGPU", [i1], 32,
+ (add SReg_1_XEXEC, EXEC, EXEC_LO)> {
+ let CopyCost = 1;
+ let isAllocatable = 0;
}
// Requires 2 s_mov_b64 to copy
let CopyCost = 2 in {
-def SGPR_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64], 32, (add SGPR_128Regs)> {
- let AllocationPriority = 10;
+// There are no 3-component scalar instructions, but this is needed
+// for symmetry with VGPRs.
+def SGPR_96 : RegisterClass<"AMDGPU", [v3i32, v3f32], 32,
+ (add SGPR_96Regs), Reg96> {
+ let AllocationPriority = 14;
}
-def TTMP_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64], 32, (add TTMP_128Regs)> {
+def SReg_96 : RegisterClass<"AMDGPU", [v3i32, v3f32], 32,
+ (add SGPR_96), Reg96> {
+ let AllocationPriority = 14;
+}
+
+def SGPR_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64], 32,
+ (add SGPR_128Regs), Reg128> {
+ let AllocationPriority = 15;
+}
+
+def TTMP_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64], 32,
+ (add TTMP_128Regs)> {
let isAllocatable = 0;
}
def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32,
- (add SGPR_128, TTMP_128)> {
- let AllocationPriority = 10;
+ (add SGPR_128, TTMP_128), Reg128> {
+ let AllocationPriority = 15;
}
} // End CopyCost = 2
-def SGPR_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add SGPR_256Regs)> {
- let AllocationPriority = 11;
+// There are no 5-component scalar instructions, but this is needed
+// for symmetry with VGPRs.
+def SGPR_160 : RegisterClass<"AMDGPU", [v5i32, v5f32], 32,
+ (add SGPR_160Regs), Reg160> {
+ let AllocationPriority = 16;
+}
+
+def SReg_160 : RegisterClass<"AMDGPU", [v5i32, v5f32], 32,
+ (add SGPR_160), Reg160> {
+ let AllocationPriority = 16;
+}
+
+def SGPR_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add SGPR_256Regs),
+ Reg256> {
+ let AllocationPriority = 17;
}
def TTMP_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add TTMP_256Regs)> {
@@ -483,29 +770,48 @@ def TTMP_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add TTMP_256Regs)> {
}
def SReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32,
- (add SGPR_256, TTMP_256)> {
+ (add SGPR_256, TTMP_256), Reg256> {
// Requires 4 s_mov_b64 to copy
let CopyCost = 4;
- let AllocationPriority = 11;
+ let AllocationPriority = 17;
}
-def SGPR_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, (add SGPR_512Regs)> {
- let AllocationPriority = 12;
+def SGPR_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32,
+ (add SGPR_512Regs), Reg512> {
+ let AllocationPriority = 18;
}
-def TTMP_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, (add TTMP_512Regs)> {
+def TTMP_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32,
+ (add TTMP_512Regs)> {
let isAllocatable = 0;
}
def SReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32,
- (add SGPR_512, TTMP_512)> {
+ (add SGPR_512, TTMP_512), Reg512> {
// Requires 8 s_mov_b64 to copy
let CopyCost = 8;
- let AllocationPriority = 12;
+ let AllocationPriority = 18;
+}
+
+def VRegOrLds_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
+ (add VGPR_32, LDS_DIRECT_CLASS), Reg32> {
+ let isAllocatable = 0;
+}
+
+def SGPR_1024 : RegisterClass<"AMDGPU", [v32i32, v32f32], 32,
+ (add SGPR_1024Regs), Reg1024> {
+ let AllocationPriority = 19;
+}
+
+def SReg_1024 : RegisterClass<"AMDGPU", [v32i32, v32f32], 32,
+ (add SGPR_1024), Reg1024> {
+ let CopyCost = 16;
+ let AllocationPriority = 19;
}
// Register class for all vector registers (VGPRs + Interploation Registers)
-def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32, v4f16, v4i16], 32, (add VGPR_64)> {
+def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32, v4f16, v4i16], 32,
+ (add VGPR_64), Reg64> {
let Size = 64;
// Requires 2 v_mov_b32 to copy
@@ -513,7 +819,7 @@ def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32, v4f16, v4i16], 32
let AllocationPriority = 2;
}
-def VReg_96 : RegisterClass<"AMDGPU", [untyped], 32, (add VGPR_96)> {
+def VReg_96 : RegisterClass<"AMDGPU", [v3i32, v3f32], 32, (add VGPR_96), Reg96> {
let Size = 96;
// Requires 3 v_mov_b32 to copy
@@ -521,7 +827,8 @@ def VReg_96 : RegisterClass<"AMDGPU", [untyped], 32, (add VGPR_96)> {
let AllocationPriority = 3;
}
-def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32, (add VGPR_128)> {
+def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32,
+ (add VGPR_128), Reg128> {
let Size = 128;
// Requires 4 v_mov_b32 to copy
@@ -529,28 +836,88 @@ def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32, (add VG
let AllocationPriority = 4;
}
-def VReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add VGPR_256)> {
+def VReg_160 : RegisterClass<"AMDGPU", [v5i32, v5f32], 32,
+ (add VGPR_160), Reg160> {
+ let Size = 160;
+
+ // Requires 5 v_mov_b32 to copy
+ let CopyCost = 5;
+ let AllocationPriority = 5;
+}
+
+def VReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32,
+ (add VGPR_256), Reg256> {
let Size = 256;
let CopyCost = 8;
- let AllocationPriority = 5;
+ let AllocationPriority = 6;
}
-def VReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, (add VGPR_512)> {
+def VReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32,
+ (add VGPR_512), Reg512> {
let Size = 512;
let CopyCost = 16;
- let AllocationPriority = 6;
+ let AllocationPriority = 7;
+}
+
+def VReg_1024 : RegisterClass<"AMDGPU", [v32i32, v32f32], 32,
+ (add VGPR_1024), Reg1024> {
+ let Size = 1024;
+ let CopyCost = 32;
+ let AllocationPriority = 8;
}
-def VReg_1 : RegisterClass<"AMDGPU", [i1], 32, (add VGPR_32)> {
+def AReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32, v4f16, v4i16], 32,
+ (add AGPR_64), Reg64> {
+ let Size = 64;
+
+ let CopyCost = 5;
+ let AllocationPriority = 2;
+}
+
+def AReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32,
+ (add AGPR_128), Reg128> {
+ let Size = 128;
+
+ // Requires 4 v_accvgpr_write and 4 v_accvgpr_read to copy + burn 1 vgpr
+ let CopyCost = 9;
+ let AllocationPriority = 4;
+}
+
+def AReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32,
+ (add AGPR_512), Reg512> {
+ let Size = 512;
+ let CopyCost = 33;
+ let AllocationPriority = 7;
+}
+
+def AReg_1024 : RegisterClass<"AMDGPU", [v32i32, v32f32], 32,
+ (add AGPR_1024), Reg1024> {
+ let Size = 1024;
+ let CopyCost = 65;
+ let AllocationPriority = 8;
+}
+
+def VReg_1 : RegisterClass<"AMDGPU", [i1], 32, (add VGPR_32), Reg32> {
let Size = 32;
}
def VS_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
- (add VGPR_32, SReg_32)> {
+ (add VGPR_32, SReg_32, LDS_DIRECT_CLASS), Reg32> {
+ let isAllocatable = 0;
+}
+
+def VS_64 : RegisterClass<"AMDGPU", [i64, f64], 32, (add VReg_64, SReg_64),
+ Reg64> {
let isAllocatable = 0;
}
-def VS_64 : RegisterClass<"AMDGPU", [i64, f64], 32, (add VReg_64, SReg_64)> {
+def AV_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
+ (add AGPR_32, VGPR_32), Reg32> {
+ let isAllocatable = 0;
+}
+
+def AV_64 : RegisterClass<"AMDGPU", [i64, f64, v4f16], 32,
+ (add AReg_64, VReg_64), Reg64> {
let isAllocatable = 0;
}
@@ -563,47 +930,40 @@ class RegImmMatcher<string name> : AsmOperandClass {
let RenderMethod = "addRegOrImmOperands";
}
-multiclass SIRegOperand <string rc, string MatchName, string opType> {
+multiclass SIRegOperand32 <string rc, string MatchName, string opType,
+ string rc_suffix = "_32"> {
let OperandNamespace = "AMDGPU" in {
- def _b16 : RegisterOperand<!cast<RegisterClass>(rc#"_32")> {
+ def _b16 : RegisterOperand<!cast<RegisterClass>(rc#rc_suffix)> {
let OperandType = opType#"_INT16";
let ParserMatchClass = RegImmMatcher<MatchName#"B16">;
let DecoderMethod = "decodeOperand_VSrc16";
}
- def _f16 : RegisterOperand<!cast<RegisterClass>(rc#"_32")> {
+ def _f16 : RegisterOperand<!cast<RegisterClass>(rc#rc_suffix)> {
let OperandType = opType#"_FP16";
let ParserMatchClass = RegImmMatcher<MatchName#"F16">;
- let DecoderMethod = "decodeOperand_VSrc16";
+ let DecoderMethod = "decodeOperand_" # rc # "_16";
}
- def _b32 : RegisterOperand<!cast<RegisterClass>(rc#"_32")> {
+ def _b32 : RegisterOperand<!cast<RegisterClass>(rc#rc_suffix)> {
let OperandType = opType#"_INT32";
let ParserMatchClass = RegImmMatcher<MatchName#"B32">;
+ let DecoderMethod = "decodeOperand_" # rc # rc_suffix;
}
- def _f32 : RegisterOperand<!cast<RegisterClass>(rc#"_32")> {
+ def _f32 : RegisterOperand<!cast<RegisterClass>(rc#rc_suffix)> {
let OperandType = opType#"_FP32";
let ParserMatchClass = RegImmMatcher<MatchName#"F32">;
+ let DecoderMethod = "decodeOperand_" # rc # rc_suffix;
}
- def _b64 : RegisterOperand<!cast<RegisterClass>(rc#"_64")> {
- let OperandType = opType#"_INT64";
- let ParserMatchClass = RegImmMatcher<MatchName#"B64">;
- }
-
- def _f64 : RegisterOperand<!cast<RegisterClass>(rc#"_64")> {
- let OperandType = opType#"_FP64";
- let ParserMatchClass = RegImmMatcher<MatchName#"F64">;
- }
-
- def _v2b16 : RegisterOperand<!cast<RegisterClass>(rc#"_32")> {
+ def _v2b16 : RegisterOperand<!cast<RegisterClass>(rc#rc_suffix)> {
let OperandType = opType#"_V2INT16";
let ParserMatchClass = RegImmMatcher<MatchName#"V2B16">;
let DecoderMethod = "decodeOperand_VSrcV216";
}
- def _v2f16 : RegisterOperand<!cast<RegisterClass>(rc#"_32")> {
+ def _v2f16 : RegisterOperand<!cast<RegisterClass>(rc#rc_suffix)> {
let OperandType = opType#"_V2FP16";
let ParserMatchClass = RegImmMatcher<MatchName#"V2F16">;
let DecoderMethod = "decodeOperand_VSrcV216";
@@ -611,6 +971,21 @@ multiclass SIRegOperand <string rc, string MatchName, string opType> {
}
}
+multiclass SIRegOperand <string rc, string MatchName, string opType> :
+ SIRegOperand32<rc, MatchName, opType> {
+ let OperandNamespace = "AMDGPU" in {
+ def _b64 : RegisterOperand<!cast<RegisterClass>(rc#"_64")> {
+ let OperandType = opType#"_INT64";
+ let ParserMatchClass = RegImmMatcher<MatchName#"B64">;
+ }
+
+ def _f64 : RegisterOperand<!cast<RegisterClass>(rc#"_64")> {
+ let OperandType = opType#"_FP64";
+ let ParserMatchClass = RegImmMatcher<MatchName#"F64">;
+ }
+ }
+}
+
// FIXME: 64-bit sources can sometimes use 32-bit constants.
multiclass RegImmOperand <string rc, string MatchName>
: SIRegOperand<rc, MatchName, "OPERAND_REG_IMM">;
@@ -618,20 +993,32 @@ multiclass RegImmOperand <string rc, string MatchName>
multiclass RegInlineOperand <string rc, string MatchName>
: SIRegOperand<rc, MatchName, "OPERAND_REG_INLINE_C">;
+multiclass RegInlineOperand32 <string rc, string MatchName,
+ string rc_suffix = "_32">
+ : SIRegOperand32<rc, MatchName, "OPERAND_REG_INLINE_C", rc_suffix>;
+
+multiclass RegInlineOperandAC <string rc, string MatchName,
+ string rc_suffix = "_32">
+ : SIRegOperand32<rc, MatchName, "OPERAND_REG_INLINE_AC", rc_suffix>;
+
//===----------------------------------------------------------------------===//
// SSrc_* Operands with an SGPR or a 32-bit immediate
//===----------------------------------------------------------------------===//
defm SSrc : RegImmOperand<"SReg", "SSrc">;
+def SSrcOrLds_b32 : RegisterOperand<SRegOrLds_32> {
+ let OperandNamespace = "AMDGPU";
+ let OperandType = "OPERAND_REG_IMM_INT32";
+ let ParserMatchClass = RegImmMatcher<"SSrcOrLdsB32">;
+}
+
//===----------------------------------------------------------------------===//
// SCSrc_* Operands with an SGPR or a inline constant
//===----------------------------------------------------------------------===//
defm SCSrc : RegInlineOperand<"SReg", "SCSrc"> ;
-def SCSrc_i1 : RegisterOperand<SReg_64_XEXEC>;
-
//===----------------------------------------------------------------------===//
// VSrc_* Operands with an SGPR, VGPR or a 32-bit immediate
//===----------------------------------------------------------------------===//
@@ -654,7 +1041,45 @@ def VRegSrc_32 : RegisterOperand<VGPR_32> {
}
//===----------------------------------------------------------------------===//
+// ASrc_* Operands with an AccVGPR
+//===----------------------------------------------------------------------===//
+
+def ARegSrc_32 : RegisterOperand<AGPR_32> {
+ let DecoderMethod = "DecodeAGPR_32RegisterClass";
+ let EncoderMethod = "getAVOperandEncoding";
+}
+
+//===----------------------------------------------------------------------===//
// VCSrc_* Operands with an SGPR, VGPR or an inline constant
//===----------------------------------------------------------------------===//
defm VCSrc : RegInlineOperand<"VS", "VCSrc">;
+
+//===----------------------------------------------------------------------===//
+// VISrc_* Operands with a VGPR or an inline constant
+//===----------------------------------------------------------------------===//
+
+defm VISrc : RegInlineOperand32<"VGPR", "VISrc">;
+
+//===----------------------------------------------------------------------===//
+// AVSrc_* Operands with an AGPR or VGPR
+//===----------------------------------------------------------------------===//
+
+def AVSrc_32 : RegisterOperand<AV_32> {
+ let DecoderMethod = "DecodeAV_32RegisterClass";
+ let EncoderMethod = "getAVOperandEncoding";
+}
+
+def AVSrc_64 : RegisterOperand<AV_64> {
+ let DecoderMethod = "DecodeAV_64RegisterClass";
+ let EncoderMethod = "getAVOperandEncoding";
+}
+
+//===----------------------------------------------------------------------===//
+// ACSrc_* Operands with an AGPR or an inline constant
+//===----------------------------------------------------------------------===//
+
+defm AISrc : RegInlineOperandAC<"AGPR", "AISrc">;
+defm AISrc_128 : RegInlineOperandAC<"AReg", "AISrc_128", "_128">;
+defm AISrc_512 : RegInlineOperandAC<"AReg", "AISrc_512", "_512">;
+defm AISrc_1024 : RegInlineOperandAC<"AReg", "AISrc_1024", "_1024">;
diff --git a/lib/Target/AMDGPU/SISchedule.td b/lib/Target/AMDGPU/SISchedule.td
index 7af69cb6a46d..824d1aeb0df9 100644
--- a/lib/Target/AMDGPU/SISchedule.td
+++ b/lib/Target/AMDGPU/SISchedule.td
@@ -1,9 +1,8 @@
//===-- SISchedule.td - SI Scheduling definitons -------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -25,6 +24,9 @@ def WriteSMEM : SchedWrite;
def WriteVMEM : SchedWrite;
def WriteBarrier : SchedWrite;
+def MIVGPRRead : SchedRead;
+def MIMFMARead : SchedRead;
+
// Vector ALU instructions
def Write32Bit : SchedWrite;
def WriteQuarterRate32 : SchedWrite;
@@ -38,9 +40,17 @@ def WriteDouble : SchedWrite;
// half rate f64 instruction (same as v_add_f64)
def WriteDoubleAdd : SchedWrite;
+// Conversion to or from f64 instruction
+def WriteDoubleCvt : SchedWrite;
+
// Half rate 64-bit instructions.
def Write64Bit : SchedWrite;
+// mAI multipass instructions.
+def Write2PassMAI : SchedWrite;
+def Write8PassMAI : SchedWrite;
+def Write16PassMAI : SchedWrite;
+
// FIXME: Should there be a class for instructions which are VALU
// instructions and have VALU rates, but write to the SALU (i.e. VOPC
// instructions)
@@ -62,6 +72,7 @@ class SISchedMachineModel : SchedMachineModel {
def SIFullSpeedModel : SISchedMachineModel;
def SIQuarterSpeedModel : SISchedMachineModel;
+def GFX10SpeedModel : SISchedMachineModel;
// XXX: Are the resource counts correct?
def HWBranch : ProcResource<1> {
@@ -82,6 +93,9 @@ def HWVMEM : ProcResource<1> {
def HWVALU : ProcResource<1> {
let BufferSize = 1;
}
+def HWRC : ProcResource<1> { // Register destination cache
+ let BufferSize = 1;
+}
class HWWriteRes<SchedWrite write, list<ProcResourceKind> resources,
int latency> : WriteRes<write, resources> {
@@ -91,6 +105,11 @@ class HWWriteRes<SchedWrite write, list<ProcResourceKind> resources,
class HWVALUWriteRes<SchedWrite write, int latency> :
HWWriteRes<write, [HWVALU], latency>;
+def PredMIReadVGPR : SchedPredicate<[{TII->hasVGPRUses(*MI)}]>;
+
+def MIReadVGPR : SchedReadVariant<[
+ SchedVar<PredMIReadVGPR, [MIVGPRRead]>,
+ SchedVar<NoSchedPred, [ReadDefault]>]>;
// The latency numbers are taken from AMD Accelerated Parallel Processing
// guide. They may not be accurate.
@@ -109,6 +128,24 @@ multiclass SICommonWriteRes {
def : HWVALUWriteRes<Write32Bit, 1>;
def : HWVALUWriteRes<Write64Bit, 2>;
def : HWVALUWriteRes<WriteQuarterRate32, 4>;
+ def : HWVALUWriteRes<Write2PassMAI, 2>;
+ def : HWVALUWriteRes<Write8PassMAI, 8>;
+ def : HWVALUWriteRes<Write16PassMAI, 16>;
+
+ def : ReadAdvance<MIVGPRRead, -2>;
+ def : InstRW<[Write64Bit, MIReadVGPR], (instregex "^V_ACCVGPR_WRITE_B32$")>;
+
+ // Technicaly mfma reads can be from 0 to 4 cycles but that does not make
+ // sense to model because its register setup is huge. In particular if we
+ // properly model read advanice as -2 for a vgpr read it will result in a
+ // bad scheduling of acc writes before that mfma. To avoid it we would
+ // need to consume 2 or 4 more vgprs to be initialized before the acc
+ // write sequence. Just assume worst case here.
+ def : ReadAdvance<MIMFMARead, -4>;
+
+ def : InstRW<[Write2PassMAI, MIMFMARead], (instregex "^V_MFMA_..._4X4X")>;
+ def : InstRW<[Write8PassMAI, MIMFMARead], (instregex "^V_MFMA_..._16X16X")>;
+ def : InstRW<[Write16PassMAI, MIMFMARead], (instregex "^V_MFMA_..._32X32X")>;
}
def PredIsVGPR32Copy : SchedPredicate<[{TII->isVGPRCopy(*MI) && TII->getOpSize(*MI, 0) <= 32}]>;
@@ -125,6 +162,7 @@ defm : SICommonWriteRes;
def : HWVALUWriteRes<WriteFloatFMA, 1>;
def : HWVALUWriteRes<WriteDouble, 4>;
def : HWVALUWriteRes<WriteDoubleAdd, 2>;
+def : HWVALUWriteRes<WriteDoubleCvt, 4>;
def : InstRW<[WriteCopy], (instrs COPY)>;
@@ -137,7 +175,32 @@ defm : SICommonWriteRes;
def : HWVALUWriteRes<WriteFloatFMA, 16>;
def : HWVALUWriteRes<WriteDouble, 16>;
def : HWVALUWriteRes<WriteDoubleAdd, 8>;
+def : HWVALUWriteRes<WriteDoubleCvt, 4>;
def : InstRW<[WriteCopy], (instrs COPY)>;
} // End SchedModel = SIQuarterSpeedModel
+
+let SchedModel = GFX10SpeedModel in {
+
+// The latency values are 1 / (operations / cycle).
+// Add 1 stall cycle for VGPR read.
+def : HWWriteRes<Write32Bit, [HWVALU, HWRC], 5>;
+def : HWWriteRes<Write64Bit, [HWVALU, HWRC], 9>;
+def : HWWriteRes<WriteQuarterRate32, [HWVALU, HWRC], 17>;
+def : HWWriteRes<WriteFloatFMA, [HWVALU, HWRC], 5>;
+def : HWWriteRes<WriteDouble, [HWVALU, HWRC], 17>;
+def : HWWriteRes<WriteDoubleAdd, [HWVALU, HWRC], 17>;
+def : HWWriteRes<WriteDoubleCvt, [HWVALU, HWRC], 17>;
+
+def : HWWriteRes<WriteBranch, [HWBranch], 32>;
+def : HWWriteRes<WriteExport, [HWExport, HWRC], 16>;
+def : HWWriteRes<WriteLDS, [HWLGKM, HWRC], 20>;
+def : HWWriteRes<WriteSALU, [HWSALU, HWRC], 5>;
+def : HWWriteRes<WriteSMEM, [HWLGKM, HWRC], 20>;
+def : HWWriteRes<WriteVMEM, [HWVMEM, HWRC], 320>;
+def : HWWriteRes<WriteBarrier, [HWBranch], 2000>;
+
+def : InstRW<[WriteCopy], (instrs COPY)>;
+
+} // End SchedModel = GFX10SpeedModel
diff --git a/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index 6ad7dd0e3a7c..7ee178149c7a 100644
--- a/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -1,9 +1,8 @@
//===-- SIShrinkInstructions.cpp - Shrink Instructions --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
/// The pass tries to use the 32-bit encoding for instructions when possible.
//===----------------------------------------------------------------------===//
@@ -39,6 +38,8 @@ class SIShrinkInstructions : public MachineFunctionPass {
public:
static char ID;
+ void shrinkMIMG(MachineInstr &MI);
+
public:
SIShrinkInstructions() : MachineFunctionPass(ID) {
}
@@ -94,6 +95,10 @@ static bool foldImmediates(MachineInstr &MI, const SIInstrInfo *TII,
Src0.setSubReg(0);
Src0.ChangeToFrameIndex(MovSrc.getIndex());
ConstantFolded = true;
+ } else if (MovSrc.isGlobal()) {
+ Src0.ChangeToGA(MovSrc.getGlobal(), MovSrc.getOffset(),
+ MovSrc.getTargetFlags());
+ ConstantFolded = true;
}
if (ConstantFolded) {
@@ -212,6 +217,96 @@ static void shrinkScalarCompare(const SIInstrInfo *TII, MachineInstr &MI) {
}
}
+// Shrink NSA encoded instructions with contiguous VGPRs to non-NSA encoding.
+void SIShrinkInstructions::shrinkMIMG(MachineInstr &MI) {
+ const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
+ if (Info->MIMGEncoding != AMDGPU::MIMGEncGfx10NSA)
+ return;
+
+ MachineFunction *MF = MI.getParent()->getParent();
+ const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
+ const SIInstrInfo *TII = ST.getInstrInfo();
+ const SIRegisterInfo &TRI = TII->getRegisterInfo();
+ int VAddr0Idx =
+ AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
+ unsigned NewAddrDwords = Info->VAddrDwords;
+ const TargetRegisterClass *RC;
+
+ if (Info->VAddrDwords == 2) {
+ RC = &AMDGPU::VReg_64RegClass;
+ } else if (Info->VAddrDwords == 3) {
+ RC = &AMDGPU::VReg_96RegClass;
+ } else if (Info->VAddrDwords == 4) {
+ RC = &AMDGPU::VReg_128RegClass;
+ } else if (Info->VAddrDwords <= 8) {
+ RC = &AMDGPU::VReg_256RegClass;
+ NewAddrDwords = 8;
+ } else {
+ RC = &AMDGPU::VReg_512RegClass;
+ NewAddrDwords = 16;
+ }
+
+ unsigned VgprBase = 0;
+ bool IsUndef = true;
+ bool IsKill = NewAddrDwords == Info->VAddrDwords;
+ for (unsigned i = 0; i < Info->VAddrDwords; ++i) {
+ const MachineOperand &Op = MI.getOperand(VAddr0Idx + i);
+ unsigned Vgpr = TRI.getHWRegIndex(Op.getReg());
+
+ if (i == 0) {
+ VgprBase = Vgpr;
+ } else if (VgprBase + i != Vgpr)
+ return;
+
+ if (!Op.isUndef())
+ IsUndef = false;
+ if (!Op.isKill())
+ IsKill = false;
+ }
+
+ if (VgprBase + NewAddrDwords > 256)
+ return;
+
+ // Further check for implicit tied operands - this may be present if TFE is
+ // enabled
+ int TFEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::tfe);
+ int LWEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::lwe);
+ unsigned TFEVal = MI.getOperand(TFEIdx).getImm();
+ unsigned LWEVal = MI.getOperand(LWEIdx).getImm();
+ int ToUntie = -1;
+ if (TFEVal || LWEVal) {
+ // TFE/LWE is enabled so we need to deal with an implicit tied operand
+ for (unsigned i = LWEIdx + 1, e = MI.getNumOperands(); i != e; ++i) {
+ if (MI.getOperand(i).isReg() && MI.getOperand(i).isTied() &&
+ MI.getOperand(i).isImplicit()) {
+ // This is the tied operand
+ assert(
+ ToUntie == -1 &&
+ "found more than one tied implicit operand when expecting only 1");
+ ToUntie = i;
+ MI.untieRegOperand(ToUntie);
+ }
+ }
+ }
+
+ unsigned NewOpcode =
+ AMDGPU::getMIMGOpcode(Info->BaseOpcode, AMDGPU::MIMGEncGfx10Default,
+ Info->VDataDwords, NewAddrDwords);
+ MI.setDesc(TII->get(NewOpcode));
+ MI.getOperand(VAddr0Idx).setReg(RC->getRegister(VgprBase));
+ MI.getOperand(VAddr0Idx).setIsUndef(IsUndef);
+ MI.getOperand(VAddr0Idx).setIsKill(IsKill);
+
+ for (unsigned i = 1; i < Info->VAddrDwords; ++i)
+ MI.RemoveOperand(VAddr0Idx + 1);
+
+ if (ToUntie >= 0) {
+ MI.tieOperands(
+ AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata),
+ ToUntie - (Info->VAddrDwords - 1));
+ }
+}
+
/// Attempt to shink AND/OR/XOR operations requiring non-inlineable literals.
/// For AND or OR, try using S_BITSET{0,1} to clear or set bits.
/// If the inverse of the immediate is legal, use ANDN2, ORN2 or
@@ -277,7 +372,9 @@ static bool shrinkScalarLogicOp(const GCNSubtarget &ST,
if (Opc == AMDGPU::S_BITSET0_B32 ||
Opc == AMDGPU::S_BITSET1_B32) {
Src0->ChangeToImmediate(NewImm);
- MI.RemoveOperand(2);
+ // Remove the immediate and add the tied input.
+ MI.getOperand(2).ChangeToRegister(Dest->getReg(), false);
+ MI.tieOperands(0, 2);
} else {
SrcImm->setImm(NewImm);
}
@@ -458,6 +555,7 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
MachineRegisterInfo &MRI = MF.getRegInfo();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIInstrInfo *TII = ST.getInstrInfo();
+ unsigned VCCReg = ST.isWave32() ? AMDGPU::VCC_LO : AMDGPU::VCC;
std::vector<unsigned> I1Defs;
@@ -596,6 +694,14 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
continue;
}
+ if (TII->isMIMG(MI.getOpcode()) &&
+ ST.getGeneration() >= AMDGPUSubtarget::GFX10 &&
+ MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::NoVRegs)) {
+ shrinkMIMG(MI);
+ continue;
+ }
+
if (!TII->hasVALU32BitEncoding(MI.getOpcode()))
continue;
@@ -625,10 +731,10 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
// So, instead of forcing the instruction to write to VCC, we provide
// a hint to the register allocator to use VCC and then we will run
// this pass again after RA and shrink it if it outputs to VCC.
- MRI.setRegAllocationHint(MI.getOperand(0).getReg(), 0, AMDGPU::VCC);
+ MRI.setRegAllocationHint(MI.getOperand(0).getReg(), 0, VCCReg);
continue;
}
- if (DstReg != AMDGPU::VCC)
+ if (DstReg != VCCReg)
continue;
}
@@ -641,10 +747,10 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
continue;
unsigned SReg = Src2->getReg();
if (TargetRegisterInfo::isVirtualRegister(SReg)) {
- MRI.setRegAllocationHint(SReg, 0, AMDGPU::VCC);
+ MRI.setRegAllocationHint(SReg, 0, VCCReg);
continue;
}
- if (SReg != AMDGPU::VCC)
+ if (SReg != VCCReg)
continue;
}
@@ -657,20 +763,24 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
AMDGPU::OpName::src2);
if (SDst) {
- if (SDst->getReg() != AMDGPU::VCC) {
+ bool Next = false;
+
+ if (SDst->getReg() != VCCReg) {
if (TargetRegisterInfo::isVirtualRegister(SDst->getReg()))
- MRI.setRegAllocationHint(SDst->getReg(), 0, AMDGPU::VCC);
- continue;
+ MRI.setRegAllocationHint(SDst->getReg(), 0, VCCReg);
+ Next = true;
}
// All of the instructions with carry outs also have an SGPR input in
// src2.
- if (Src2 && Src2->getReg() != AMDGPU::VCC) {
+ if (Src2 && Src2->getReg() != VCCReg) {
if (TargetRegisterInfo::isVirtualRegister(Src2->getReg()))
- MRI.setRegAllocationHint(Src2->getReg(), 0, AMDGPU::VCC);
+ MRI.setRegAllocationHint(Src2->getReg(), 0, VCCReg);
+ Next = true;
+ }
+ if (Next)
continue;
- }
}
// We can shrink this instruction
diff --git a/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/lib/Target/AMDGPU/SIWholeQuadMode.cpp
index 879726b1528c..4e07efff55d8 100644
--- a/lib/Target/AMDGPU/SIWholeQuadMode.cpp
+++ b/lib/Target/AMDGPU/SIWholeQuadMode.cpp
@@ -1,9 +1,8 @@
//===-- SIWholeQuadMode.cpp - enter and suspend whole quad mode -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -149,6 +148,7 @@ private:
CallingConv::ID CallingConv;
const SIInstrInfo *TII;
const SIRegisterInfo *TRI;
+ const GCNSubtarget *ST;
MachineRegisterInfo *MRI;
LiveIntervals *LIS;
@@ -201,6 +201,8 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<LiveIntervals>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addPreserved<LiveIntervals>();
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -277,7 +279,7 @@ void SIWholeQuadMode::markInstructionUses(const MachineInstr &MI, char Flag,
// for VCC, which can appear as the (implicit) input of a uniform branch,
// e.g. when a loop counter is stored in a VGPR.
if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
- if (Reg == AMDGPU::EXEC)
+ if (Reg == AMDGPU::EXEC || Reg == AMDGPU::EXEC_LO)
continue;
for (MCRegUnitIterator RegUnit(Reg, TRI); RegUnit.isValid(); ++RegUnit) {
@@ -386,7 +388,7 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
unsigned Reg = MO.getReg();
if (!TRI->isVirtualRegister(Reg) &&
- TRI->hasVGPRs(TRI->getPhysRegClass(Reg))) {
+ TRI->hasVectorRegisters(TRI->getPhysRegClass(Reg))) {
Flags = StateWQM;
break;
}
@@ -619,13 +621,16 @@ void SIWholeQuadMode::toExact(MachineBasicBlock &MBB,
MachineInstr *MI;
if (SaveWQM) {
- MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::S_AND_SAVEEXEC_B64),
+ MI = BuildMI(MBB, Before, DebugLoc(), TII->get(ST->isWave32() ?
+ AMDGPU::S_AND_SAVEEXEC_B32 : AMDGPU::S_AND_SAVEEXEC_B64),
SaveWQM)
.addReg(LiveMaskReg);
} else {
- MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::S_AND_B64),
- AMDGPU::EXEC)
- .addReg(AMDGPU::EXEC)
+ unsigned Exec = ST->isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
+ MI = BuildMI(MBB, Before, DebugLoc(), TII->get(ST->isWave32() ?
+ AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64),
+ Exec)
+ .addReg(Exec)
.addReg(LiveMaskReg);
}
@@ -637,13 +642,15 @@ void SIWholeQuadMode::toWQM(MachineBasicBlock &MBB,
unsigned SavedWQM) {
MachineInstr *MI;
+ unsigned Exec = ST->isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
if (SavedWQM) {
- MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::COPY), AMDGPU::EXEC)
+ MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::COPY), Exec)
.addReg(SavedWQM);
} else {
- MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::S_WQM_B64),
- AMDGPU::EXEC)
- .addReg(AMDGPU::EXEC);
+ MI = BuildMI(MBB, Before, DebugLoc(), TII->get(ST->isWave32() ?
+ AMDGPU::S_WQM_B32 : AMDGPU::S_WQM_B64),
+ Exec)
+ .addReg(Exec);
}
LIS->InsertMachineInstrInMaps(*MI);
@@ -655,8 +662,7 @@ void SIWholeQuadMode::toWWM(MachineBasicBlock &MBB,
MachineInstr *MI;
assert(SaveOrig);
- MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::S_OR_SAVEEXEC_B64),
- SaveOrig)
+ MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::ENTER_WWM), SaveOrig)
.addImm(-1);
LIS->InsertMachineInstrInMaps(*MI);
}
@@ -667,7 +673,8 @@ void SIWholeQuadMode::fromWWM(MachineBasicBlock &MBB,
MachineInstr *MI;
assert(SavedOrig);
- MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::EXIT_WWM), AMDGPU::EXEC)
+ MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::EXIT_WWM),
+ ST->isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC)
.addReg(SavedOrig);
LIS->InsertMachineInstrInMaps(*MI);
}
@@ -693,6 +700,7 @@ void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg,
bool WQMFromExec = isEntry;
char State = (isEntry || !(BI.InNeeds & StateWQM)) ? StateExact : StateWQM;
char NonWWMState = 0;
+ const TargetRegisterClass *BoolRC = TRI->getBoolRC();
auto II = MBB.getFirstNonPHI(), IE = MBB.end();
if (isEntry)
@@ -780,13 +788,13 @@ void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg,
if (Needs == StateWWM) {
NonWWMState = State;
- SavedNonWWMReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ SavedNonWWMReg = MRI->createVirtualRegister(BoolRC);
toWWM(MBB, Before, SavedNonWWMReg);
State = StateWWM;
} else {
if (State == StateWQM && (Needs & StateExact) && !(Needs & StateWQM)) {
if (!WQMFromExec && (OutNeeds & StateWQM))
- SavedWQMReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ SavedWQMReg = MRI->createVirtualRegister(BoolRC);
toExact(MBB, Before, SavedWQMReg, LiveMaskReg);
State = StateExact;
@@ -838,7 +846,23 @@ void SIWholeQuadMode::lowerCopyInstrs() {
for (MachineInstr *MI : LowerToCopyInstrs) {
for (unsigned i = MI->getNumExplicitOperands() - 1; i > 1; i--)
MI->RemoveOperand(i);
- MI->setDesc(TII->get(AMDGPU::COPY));
+
+ const unsigned Reg = MI->getOperand(0).getReg();
+
+ if (TRI->isVGPR(*MRI, Reg)) {
+ const TargetRegisterClass *regClass =
+ TargetRegisterInfo::isVirtualRegister(Reg)
+ ? MRI->getRegClass(Reg)
+ : TRI->getPhysRegClass(Reg);
+
+ const unsigned MovOp = TII->getMovOpcode(regClass);
+ MI->setDesc(TII->get(MovOp));
+
+ // And make it implicitly depend on exec (like all VALU movs should do).
+ MI->addOperand(MachineOperand::CreateReg(AMDGPU::EXEC, false, true));
+ } else {
+ MI->setDesc(TII->get(AMDGPU::COPY));
+ }
}
}
@@ -849,17 +873,18 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
LowerToCopyInstrs.clear();
CallingConv = MF.getFunction().getCallingConv();
- const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ ST = &MF.getSubtarget<GCNSubtarget>();
- TII = ST.getInstrInfo();
+ TII = ST->getInstrInfo();
TRI = &TII->getRegisterInfo();
MRI = &MF.getRegInfo();
LIS = &getAnalysis<LiveIntervals>();
char GlobalFlags = analyzeFunction(MF);
unsigned LiveMaskReg = 0;
+ unsigned Exec = ST->isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
if (!(GlobalFlags & StateWQM)) {
- lowerLiveMaskQueries(AMDGPU::EXEC);
+ lowerLiveMaskQueries(Exec);
if (!(GlobalFlags & StateWWM))
return !LiveMaskQueries.empty();
} else {
@@ -868,10 +893,10 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
MachineBasicBlock::iterator EntryMI = Entry.getFirstNonPHI();
if (GlobalFlags & StateExact || !LiveMaskQueries.empty()) {
- LiveMaskReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ LiveMaskReg = MRI->createVirtualRegister(TRI->getBoolRC());
MachineInstr *MI = BuildMI(Entry, EntryMI, DebugLoc(),
TII->get(AMDGPU::COPY), LiveMaskReg)
- .addReg(AMDGPU::EXEC);
+ .addReg(Exec);
LIS->InsertMachineInstrInMaps(*MI);
}
@@ -879,9 +904,10 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
if (GlobalFlags == StateWQM) {
// For a shader that needs only WQM, we can just set it once.
- BuildMI(Entry, EntryMI, DebugLoc(), TII->get(AMDGPU::S_WQM_B64),
- AMDGPU::EXEC)
- .addReg(AMDGPU::EXEC);
+ BuildMI(Entry, EntryMI, DebugLoc(), TII->get(ST->isWave32() ?
+ AMDGPU::S_WQM_B32 : AMDGPU::S_WQM_B64),
+ Exec)
+ .addReg(Exec);
lowerCopyInstrs();
// EntryMI may become invalid here
diff --git a/lib/Target/AMDGPU/SMInstructions.td b/lib/Target/AMDGPU/SMInstructions.td
index 8a063e1a4867..1b410b6b5912 100644
--- a/lib/Target/AMDGPU/SMInstructions.td
+++ b/lib/Target/AMDGPU/SMInstructions.td
@@ -1,9 +1,8 @@
//===---- SMInstructions.td - Scalar Memory Instruction Defintions --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -34,7 +33,6 @@ class SM_Pseudo <string opName, dag outs, dag ins, string asmOps, list<dag> patt
let hasSideEffects = 0;
let UseNamedOperandTable = 1;
let SchedRW = [WriteSMEM];
- let SubtargetPredicate = isGCN;
string Mnemonic = opName;
string AsmOperands = asmOps;
@@ -42,6 +40,7 @@ class SM_Pseudo <string opName, dag outs, dag ins, string asmOps, list<dag> patt
bits<1> has_sbase = 1;
bits<1> has_sdst = 1;
bit has_glc = 0;
+ bit has_dlc = 0;
bits<1> has_offset = 1;
bits<1> offset_is_imm = 0;
}
@@ -81,6 +80,7 @@ class SM_Load_Pseudo <string opName, dag outs, dag ins, string asmOps, list<dag>
let mayLoad = 1;
let mayStore = 0;
let has_glc = 1;
+ let has_dlc = 1;
}
class SM_Store_Pseudo <string opName, dag ins, string asmOps, list<dag> pattern = []>
@@ -90,6 +90,7 @@ class SM_Store_Pseudo <string opName, dag ins, string asmOps, list<dag> pattern
let mayLoad = 0;
let mayStore = 1;
let has_glc = 1;
+ let has_dlc = 1;
let ScalarStore = 1;
}
@@ -110,21 +111,23 @@ multiclass SM_Pseudo_Loads<string opName,
RegisterClass dstClass> {
def _IMM : SM_Load_Pseudo <opName,
(outs dstClass:$sdst),
- (ins baseClass:$sbase, i32imm:$offset, i1imm:$glc),
- " $sdst, $sbase, $offset$glc", []> {
+ (ins baseClass:$sbase, i32imm:$offset, i1imm:$glc, i1imm:$dlc),
+ " $sdst, $sbase, $offset$glc$dlc", []> {
let offset_is_imm = 1;
let BaseClass = baseClass;
let PseudoInstr = opName # "_IMM";
let has_glc = 1;
+ let has_dlc = 1;
}
def _SGPR : SM_Load_Pseudo <opName,
(outs dstClass:$sdst),
- (ins baseClass:$sbase, SReg_32:$soff, i1imm:$glc),
- " $sdst, $sbase, $offset$glc", []> {
+ (ins baseClass:$sbase, SReg_32:$soff, i1imm:$glc, i1imm:$dlc),
+ " $sdst, $sbase, $offset$glc$dlc", []> {
let BaseClass = baseClass;
let PseudoInstr = opName # "_SGPR";
let has_glc = 1;
+ let has_dlc = 1;
}
}
@@ -132,8 +135,8 @@ multiclass SM_Pseudo_Stores<string opName,
RegisterClass baseClass,
RegisterClass srcClass> {
def _IMM : SM_Store_Pseudo <opName,
- (ins srcClass:$sdata, baseClass:$sbase, i32imm:$offset, i1imm:$glc),
- " $sdata, $sbase, $offset$glc", []> {
+ (ins srcClass:$sdata, baseClass:$sbase, i32imm:$offset, i1imm:$glc, i1imm:$dlc),
+ " $sdata, $sbase, $offset$glc$dlc", []> {
let offset_is_imm = 1;
let BaseClass = baseClass;
let SrcClass = srcClass;
@@ -141,8 +144,8 @@ multiclass SM_Pseudo_Stores<string opName,
}
def _SGPR : SM_Store_Pseudo <opName,
- (ins srcClass:$sdata, baseClass:$sbase, SReg_32:$soff, i1imm:$glc),
- " $sdata, $sbase, $offset$glc", []> {
+ (ins srcClass:$sdata, baseClass:$sbase, SReg_32:$soff, i1imm:$glc, i1imm:$dlc),
+ " $sdata, $sbase, $offset$glc$dlc", []> {
let BaseClass = baseClass;
let SrcClass = srcClass;
let PseudoInstr = opName # "_SGPR";
@@ -154,17 +157,25 @@ multiclass SM_Pseudo_Discards<string opName> {
def _SGPR : SM_Discard_Pseudo <opName, (ins SReg_64:$sbase, SReg_32:$offset), 0>;
}
-class SM_Time_Pseudo<string opName, SDPatternOperator node> : SM_Pseudo<
+class SM_Time_Pseudo<string opName, SDPatternOperator node = null_frag> : SM_Pseudo<
opName, (outs SReg_64_XEXEC:$sdst), (ins),
" $sdst", [(set i64:$sdst, (node))]> {
let hasSideEffects = 1;
- let mayStore = 0;
+
+ // FIXME: This should be definitively mayStore = 0. TableGen
+ // brokenly tries to infer these based on the intrinsic properties
+ // corresponding to the IR attributes. The target intrinsics are
+ // considered as writing to memory for IR dependency purposes, but
+ // those can be modeled with hasSideEffects here. These also end up
+ // inferring differently for llvm.readcyclecounter and the amdgcn
+ // intrinsics.
+ let mayStore = ?;
let mayLoad = 1;
let has_sbase = 0;
let has_offset = 0;
}
-class SM_Inval_Pseudo <string opName, SDPatternOperator node> : SM_Pseudo<
+class SM_Inval_Pseudo <string opName, SDPatternOperator node = null_frag> : SM_Pseudo<
opName, (outs), (ins), "", [(node)]> {
let hasSideEffects = 1;
let mayStore = 1;
@@ -178,6 +189,16 @@ multiclass SM_Pseudo_Probe<string opName, RegisterClass baseClass> {
def _SGPR : SM_Probe_Pseudo <opName, (ins i8imm:$sdata, baseClass:$sbase, SReg_32:$offset), 0>;
}
+class SM_WaveId_Pseudo<string opName, SDPatternOperator node> : SM_Pseudo<
+ opName, (outs SReg_32_XM0_XEXEC:$sdst), (ins),
+ " $sdst", [(set i32:$sdst, (node))]> {
+ let hasSideEffects = 1;
+ let mayStore = 0;
+ let mayLoad = 1;
+ let has_sbase = 0;
+ let has_offset = 0;
+}
+
//===----------------------------------------------------------------------===//
// Scalar Atomic Memory Classes
//===----------------------------------------------------------------------===//
@@ -191,6 +212,7 @@ class SM_Atomic_Pseudo <string opName,
let mayLoad = 1;
let mayStore = 1;
let has_glc = 1;
+ let has_dlc = 1;
// Should these be set?
let ScalarStore = 1;
@@ -206,9 +228,9 @@ class SM_Pseudo_Atomic<string opName,
SM_Atomic_Pseudo<opName,
!if(isRet, (outs dataClass:$sdst), (outs)),
!if(isImm,
- (ins dataClass:$sdata, baseClass:$sbase, smrd_offset_20:$offset),
- (ins dataClass:$sdata, baseClass:$sbase, SReg_32:$offset)),
- !if(isRet, " $sdst", " $sdata") # ", $sbase, $offset" # !if(isRet, " glc", ""),
+ (ins dataClass:$sdata, baseClass:$sbase, smrd_offset_20:$offset, DLC:$dlc),
+ (ins dataClass:$sdata, baseClass:$sbase, SReg_32:$offset, DLC:$dlc)),
+ !if(isRet, " $sdst", " $sdata") # ", $sbase, $offset" # !if(isRet, " glc", "") # "$dlc",
isRet> {
let offset_is_imm = isImm;
let PseudoInstr = opName # !if(isImm,
@@ -266,6 +288,7 @@ defm S_BUFFER_LOAD_DWORDX16 : SM_Pseudo_Loads <
"s_buffer_load_dwordx16", SReg_128, SReg_512
>;
+let SubtargetPredicate = HasScalarStores in {
defm S_STORE_DWORD : SM_Pseudo_Stores <"s_store_dword", SReg_64, SReg_32_XM0_XEXEC>;
defm S_STORE_DWORDX2 : SM_Pseudo_Stores <"s_store_dwordx2", SReg_64, SReg_64_XEXEC>;
defm S_STORE_DWORDX4 : SM_Pseudo_Stores <"s_store_dwordx4", SReg_64, SReg_128>;
@@ -281,25 +304,32 @@ defm S_BUFFER_STORE_DWORDX2 : SM_Pseudo_Stores <
defm S_BUFFER_STORE_DWORDX4 : SM_Pseudo_Stores <
"s_buffer_store_dwordx4", SReg_128, SReg_128
>;
-
+} // End SubtargetPredicate = HasScalarStores
def S_MEMTIME : SM_Time_Pseudo <"s_memtime", int_amdgcn_s_memtime>;
def S_DCACHE_INV : SM_Inval_Pseudo <"s_dcache_inv", int_amdgcn_s_dcache_inv>;
-let SubtargetPredicate = isCIVI in {
+let SubtargetPredicate = isGFX7GFX8GFX9 in {
def S_DCACHE_INV_VOL : SM_Inval_Pseudo <"s_dcache_inv_vol", int_amdgcn_s_dcache_inv_vol>;
-} // let SubtargetPredicate = isCIVI
+} // let SubtargetPredicate = isGFX7GFX8GFX9
-let SubtargetPredicate = isVI in {
+let SubtargetPredicate = isGFX8Plus in {
+let OtherPredicates = [HasScalarStores] in {
def S_DCACHE_WB : SM_Inval_Pseudo <"s_dcache_wb", int_amdgcn_s_dcache_wb>;
def S_DCACHE_WB_VOL : SM_Inval_Pseudo <"s_dcache_wb_vol", int_amdgcn_s_dcache_wb_vol>;
+} // End OtherPredicates = [HasScalarStores]
def S_MEMREALTIME : SM_Time_Pseudo <"s_memrealtime", int_amdgcn_s_memrealtime>;
defm S_ATC_PROBE : SM_Pseudo_Probe <"s_atc_probe", SReg_64>;
defm S_ATC_PROBE_BUFFER : SM_Pseudo_Probe <"s_atc_probe_buffer", SReg_128>;
-} // SubtargetPredicate = isVI
+} // SubtargetPredicate = isGFX8Plus
+
+let SubtargetPredicate = isGFX10Plus in {
+def S_GL1_INV : SM_Inval_Pseudo<"s_gl1_inv">;
+def S_GET_WAVEID_IN_WORKGROUP : SM_WaveId_Pseudo <"s_get_waveid_in_workgroup", int_amdgcn_s_get_waveid_in_workgroup>;
+} // End SubtargetPredicate = isGFX10Plus
-let SubtargetPredicate = HasFlatScratchInsts, Uses = [FLAT_SCR] in {
+let SubtargetPredicate = HasScalarFlatScratchInsts, Uses = [FLAT_SCR] in {
defm S_SCRATCH_LOAD_DWORD : SM_Pseudo_Loads <"s_scratch_load_dword", SReg_64, SReg_32_XM0_XEXEC>;
defm S_SCRATCH_LOAD_DWORDX2 : SM_Pseudo_Loads <"s_scratch_load_dwordx2", SReg_64, SReg_64_XEXEC>;
defm S_SCRATCH_LOAD_DWORDX4 : SM_Pseudo_Loads <"s_scratch_load_dwordx4", SReg_64, SReg_128>;
@@ -307,7 +337,7 @@ defm S_SCRATCH_LOAD_DWORDX4 : SM_Pseudo_Loads <"s_scratch_load_dwordx4", SReg_6
defm S_SCRATCH_STORE_DWORD : SM_Pseudo_Stores <"s_scratch_store_dword", SReg_64, SReg_32_XM0_XEXEC>;
defm S_SCRATCH_STORE_DWORDX2 : SM_Pseudo_Stores <"s_scratch_store_dwordx2", SReg_64, SReg_64_XEXEC>;
defm S_SCRATCH_STORE_DWORDX4 : SM_Pseudo_Stores <"s_scratch_store_dwordx4", SReg_64, SReg_128>;
-} // SubtargetPredicate = HasFlatScratchInsts
+} // SubtargetPredicate = HasScalarFlatScratchInsts
let SubtargetPredicate = HasScalarAtomics in {
@@ -369,7 +399,7 @@ defm S_ATOMIC_DEC_X2 : SM_Pseudo_Atomics <"s_atomic_dec_x2", SReg_6
} // let SubtargetPredicate = HasScalarAtomics
-let SubtargetPredicate = isGFX9 in {
+let SubtargetPredicate = HasScalarAtomics in {
defm S_DCACHE_DISCARD : SM_Pseudo_Discards <"s_dcache_discard">;
defm S_DCACHE_DISCARD_X2 : SM_Pseudo_Discards <"s_dcache_discard_x2">;
}
@@ -387,8 +417,8 @@ class SMRD_Real_si <bits<5> op, SM_Pseudo ps>
, SIMCInstr<ps.PseudoInstr, SIEncodingFamily.SI>
, Enc32 {
- let AssemblerPredicates = [isSICI];
- let DecoderNamespace = "SICI";
+ let AssemblerPredicates = [isGFX6GFX7];
+ let DecoderNamespace = "GFX6GFX7";
let Inst{7-0} = !if(ps.has_offset, offset{7-0}, ?);
let Inst{8} = imm;
@@ -405,13 +435,13 @@ multiclass SM_Real_Loads_si<bits<5> op, string ps,
SM_Load_Pseudo sgprPs = !cast<SM_Load_Pseudo>(ps#_SGPR)> {
def _IMM_si : SMRD_Real_si <op, immPs> {
- let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset_8:$offset, GLC:$glc);
+ let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset_8:$offset, GLC:$glc, DLC:$dlc);
}
// FIXME: The operand name $offset is inconsistent with $soff used
// in the pseudo
def _SGPR_si : SMRD_Real_si <op, sgprPs> {
- let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc);
+ let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc, DLC:$dlc);
}
}
@@ -441,8 +471,8 @@ class SMEM_Real_vi <bits<8> op, SM_Pseudo ps>
, Enc64 {
bit glc;
- let AssemblerPredicates = [isVI];
- let DecoderNamespace = "VI";
+ let AssemblerPredicates = [isGFX8GFX9];
+ let DecoderNamespace = "GFX8";
let Inst{5-0} = !if(ps.has_sbase, sbase{6-1}, ?);
let Inst{12-6} = !if(ps.has_sdst, sdst{6-0}, ?);
@@ -458,10 +488,10 @@ multiclass SM_Real_Loads_vi<bits<8> op, string ps,
SM_Load_Pseudo immPs = !cast<SM_Load_Pseudo>(ps#_IMM),
SM_Load_Pseudo sgprPs = !cast<SM_Load_Pseudo>(ps#_SGPR)> {
def _IMM_vi : SMEM_Real_vi <op, immPs> {
- let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset_20:$offset, GLC:$glc);
+ let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset_20:$offset, GLC:$glc, DLC:$dlc);
}
def _SGPR_vi : SMEM_Real_vi <op, sgprPs> {
- let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc);
+ let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc, DLC:$dlc);
}
}
@@ -479,11 +509,11 @@ multiclass SM_Real_Stores_vi<bits<8> op, string ps,
// FIXME: The operand name $offset is inconsistent with $soff used
// in the pseudo
def _IMM_vi : SMEM_Real_Store_vi <op, immPs> {
- let InOperandList = (ins immPs.SrcClass:$sdata, immPs.BaseClass:$sbase, smrd_offset_20:$offset, GLC:$glc);
+ let InOperandList = (ins immPs.SrcClass:$sdata, immPs.BaseClass:$sbase, smrd_offset_20:$offset, GLC:$glc, DLC:$dlc);
}
def _SGPR_vi : SMEM_Real_Store_vi <op, sgprPs> {
- let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc);
+ let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc, DLC:$dlc);
}
}
@@ -630,9 +660,9 @@ class SMRD_Real_Load_IMM_ci <bits<5> op, SM_Load_Pseudo ps> :
SM_Real<ps>,
Enc64 {
- let AssemblerPredicates = [isCIOnly];
- let DecoderNamespace = "CI";
- let InOperandList = (ins ps.BaseClass:$sbase, smrd_literal_offset:$offset, GLC:$glc);
+ let AssemblerPredicates = [isGFX7Only];
+ let DecoderNamespace = "GFX7";
+ let InOperandList = (ins ps.BaseClass:$sbase, smrd_literal_offset:$offset, GLC:$glc, DLC:$dlc);
let LGKM_CNT = ps.LGKM_CNT;
let SMRD = ps.SMRD;
@@ -667,8 +697,8 @@ class SMRD_Real_ci <bits<5> op, SM_Pseudo ps>
, SIMCInstr<ps.PseudoInstr, SIEncodingFamily.SI>
, Enc32 {
- let AssemblerPredicates = [isCIOnly];
- let DecoderNamespace = "CI";
+ let AssemblerPredicates = [isGFX7Only];
+ let DecoderNamespace = "GFX7";
let Inst{7-0} = !if(ps.has_offset, offset{7-0}, ?);
let Inst{8} = imm;
@@ -684,7 +714,22 @@ def S_DCACHE_INV_VOL_ci : SMRD_Real_ci <0x1d, S_DCACHE_INV_VOL>;
// Scalar Memory Patterns
//===----------------------------------------------------------------------===//
-def smrd_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{ return isUniformLoad(N);}]>;
+def smrd_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{ return isUniformLoad(N);}]> {
+ let GISelPredicateCode = [{
+ if (!MI.hasOneMemOperand())
+ return false;
+ if (!isInstrUniform(MI))
+ return false;
+
+ // FIXME: We should probably be caching this.
+ SmallVector<GEPInfo, 4> AddrInfo;
+ getAddrModeInfo(MI, MRI, AddrInfo);
+
+ if (hasVgprParts(AddrInfo))
+ return false;
+ return true;
+ }];
+}
def SMRDImm : ComplexPattern<i64, 2, "SelectSMRDImm">;
def SMRDImm32 : ComplexPattern<i64, 2, "SelectSMRDImm32">;
@@ -697,41 +742,49 @@ multiclass SMRD_Pattern <string Instr, ValueType vt> {
// 1. IMM offset
def : GCNPat <
(smrd_load (SMRDImm i64:$sbase, i32:$offset)),
- (vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, 0))
+ (vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, 0, 0))
>;
// 2. 32-bit IMM offset on CI
def : GCNPat <
(smrd_load (SMRDImm32 i64:$sbase, i32:$offset)),
- (vt (!cast<InstSI>(Instr#"_IMM_ci") $sbase, $offset, 0))> {
- let OtherPredicates = [isCIOnly];
+ (vt (!cast<InstSI>(Instr#"_IMM_ci") $sbase, $offset, 0, 0))> {
+ let OtherPredicates = [isGFX7Only];
}
// 3. SGPR offset
def : GCNPat <
(smrd_load (SMRDSgpr i64:$sbase, i32:$offset)),
- (vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $offset, 0))
+ (vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $offset, 0, 0))
+ >;
+
+ // 4. No offset
+ def : GCNPat <
+ (vt (smrd_load (i64 SReg_64:$sbase))),
+ (vt (!cast<SM_Pseudo>(Instr#"_IMM") i64:$sbase, 0, 0, 0))
>;
}
multiclass SMLoad_Pattern <string Instr, ValueType vt> {
// 1. Offset as an immediate
def : GCNPat <
- (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm i32:$offset), i1:$glc),
- (vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, (as_i1imm $glc)))
+ (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm i32:$offset), i1:$glc, i1:$dlc),
+ (vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, (as_i1imm $glc),
+ (as_i1imm $dlc)))
>;
// 2. 32-bit IMM offset on CI
def : GCNPat <
- (vt (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm32 i32:$offset), i1:$glc)),
- (!cast<InstSI>(Instr#"_IMM_ci") $sbase, $offset, (as_i1imm $glc))> {
- let OtherPredicates = [isCIOnly];
+ (vt (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm32 i32:$offset), i1:$glc, i1:$dlc)),
+ (!cast<InstSI>(Instr#"_IMM_ci") $sbase, $offset, (as_i1imm $glc), (as_i1imm $dlc))> {
+ let OtherPredicates = [isGFX7Only];
}
// 3. Offset loaded in an 32bit SGPR
def : GCNPat <
- (SIsbuffer_load v4i32:$sbase, i32:$offset, i1:$glc),
- (vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $offset, (as_i1imm $glc)))
+ (SIsbuffer_load v4i32:$sbase, i32:$offset, i1:$glc, i1:$dlc),
+ (vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $offset, (as_i1imm $glc),
+ (as_i1imm $dlc)))
>;
}
@@ -759,18 +812,202 @@ defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX8", v8f32>;
defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX16", v16f32>;
} // End let AddedComplexity = 100
-let OtherPredicates = [isSICI] in {
def : GCNPat <
(i64 (readcyclecounter)),
(S_MEMTIME)
>;
+
+//===----------------------------------------------------------------------===//
+// GFX10.
+//===----------------------------------------------------------------------===//
+
+class SMEM_Real_gfx10<bits<8> op, SM_Pseudo ps> :
+ SM_Real<ps>, SIMCInstr<ps.PseudoInstr, SIEncodingFamily.GFX10>, Enc64 {
+ bit glc;
+ bit dlc;
+
+ let AssemblerPredicates = [isGFX10Plus];
+ let DecoderNamespace = "GFX10";
+
+ let Inst{5-0} = !if(ps.has_sbase, sbase{6-1}, ?);
+ let Inst{12-6} = !if(ps.has_sdst, sdst{6-0}, ?);
+ let Inst{14} = !if(ps.has_dlc, dlc, ?);
+ let Inst{16} = !if(ps.has_glc, glc, ?);
+ let Inst{25-18} = op;
+ let Inst{31-26} = 0x3d;
+ let Inst{51-32} = !if(ps.offset_is_imm, !if(ps.has_offset, offset{19-0}, ?), ?);
+ let Inst{63-57} = !if(ps.offset_is_imm, !cast<int>(SGPR_NULL.HWEncoding),
+ !if(ps.has_offset, offset{6-0}, ?));
}
-let OtherPredicates = [isVI] in {
+multiclass SM_Real_Loads_gfx10<bits<8> op, string ps,
+ SM_Load_Pseudo immPs = !cast<SM_Load_Pseudo>(ps#_IMM),
+ SM_Load_Pseudo sgprPs = !cast<SM_Load_Pseudo>(ps#_SGPR)> {
+ def _IMM_gfx10 : SMEM_Real_gfx10<op, immPs> {
+ let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset_20:$offset, GLC:$glc, DLC:$dlc);
+ }
+ def _SGPR_gfx10 : SMEM_Real_gfx10<op, sgprPs> {
+ let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc, DLC:$dlc);
+ }
+}
-def : GCNPat <
- (i64 (readcyclecounter)),
- (S_MEMREALTIME)
->;
+class SMEM_Real_Store_gfx10<bits<8> op, SM_Pseudo ps> : SMEM_Real_gfx10<op, ps> {
+ bits<7> sdata;
+
+ let sdst = ?;
+ let Inst{12-6} = !if(ps.has_sdst, sdata{6-0}, ?);
+}
+
+multiclass SM_Real_Stores_gfx10<bits<8> op, string ps,
+ SM_Store_Pseudo immPs = !cast<SM_Store_Pseudo>(ps#_IMM),
+ SM_Store_Pseudo sgprPs = !cast<SM_Store_Pseudo>(ps#_SGPR)> {
+ // FIXME: The operand name $offset is inconsistent with $soff used
+ // in the pseudo
+ def _IMM_gfx10 : SMEM_Real_Store_gfx10 <op, immPs> {
+ let InOperandList = (ins immPs.SrcClass:$sdata, immPs.BaseClass:$sbase, smrd_offset_20:$offset, GLC:$glc, DLC:$dlc);
+ }
+
+ def _SGPR_gfx10 : SMEM_Real_Store_gfx10 <op, sgprPs> {
+ let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc, DLC:$dlc);
+ }
+}
+
+defm S_LOAD_DWORD : SM_Real_Loads_gfx10<0x000, "S_LOAD_DWORD">;
+defm S_LOAD_DWORDX2 : SM_Real_Loads_gfx10<0x001, "S_LOAD_DWORDX2">;
+defm S_LOAD_DWORDX4 : SM_Real_Loads_gfx10<0x002, "S_LOAD_DWORDX4">;
+defm S_LOAD_DWORDX8 : SM_Real_Loads_gfx10<0x003, "S_LOAD_DWORDX8">;
+defm S_LOAD_DWORDX16 : SM_Real_Loads_gfx10<0x004, "S_LOAD_DWORDX16">;
+
+let SubtargetPredicate = HasScalarFlatScratchInsts in {
+defm S_SCRATCH_LOAD_DWORD : SM_Real_Loads_gfx10<0x005, "S_SCRATCH_LOAD_DWORD">;
+defm S_SCRATCH_LOAD_DWORDX2 : SM_Real_Loads_gfx10<0x006, "S_SCRATCH_LOAD_DWORDX2">;
+defm S_SCRATCH_LOAD_DWORDX4 : SM_Real_Loads_gfx10<0x007, "S_SCRATCH_LOAD_DWORDX4">;
+} // End SubtargetPredicate = HasScalarFlatScratchInsts
+
+defm S_BUFFER_LOAD_DWORD : SM_Real_Loads_gfx10<0x008, "S_BUFFER_LOAD_DWORD">;
+defm S_BUFFER_LOAD_DWORDX2 : SM_Real_Loads_gfx10<0x009, "S_BUFFER_LOAD_DWORDX2">;
+defm S_BUFFER_LOAD_DWORDX4 : SM_Real_Loads_gfx10<0x00a, "S_BUFFER_LOAD_DWORDX4">;
+defm S_BUFFER_LOAD_DWORDX8 : SM_Real_Loads_gfx10<0x00b, "S_BUFFER_LOAD_DWORDX8">;
+defm S_BUFFER_LOAD_DWORDX16 : SM_Real_Loads_gfx10<0x00c, "S_BUFFER_LOAD_DWORDX16">;
+
+let SubtargetPredicate = HasScalarStores in {
+defm S_STORE_DWORD : SM_Real_Stores_gfx10<0x010, "S_STORE_DWORD">;
+defm S_STORE_DWORDX2 : SM_Real_Stores_gfx10<0x011, "S_STORE_DWORDX2">;
+defm S_STORE_DWORDX4 : SM_Real_Stores_gfx10<0x012, "S_STORE_DWORDX4">;
+let OtherPredicates = [HasScalarFlatScratchInsts] in {
+defm S_SCRATCH_STORE_DWORD : SM_Real_Stores_gfx10<0x015, "S_SCRATCH_STORE_DWORD">;
+defm S_SCRATCH_STORE_DWORDX2 : SM_Real_Stores_gfx10<0x016, "S_SCRATCH_STORE_DWORDX2">;
+defm S_SCRATCH_STORE_DWORDX4 : SM_Real_Stores_gfx10<0x017, "S_SCRATCH_STORE_DWORDX4">;
+} // End OtherPredicates = [HasScalarFlatScratchInsts]
+defm S_BUFFER_STORE_DWORD : SM_Real_Stores_gfx10<0x018, "S_BUFFER_STORE_DWORD">;
+defm S_BUFFER_STORE_DWORDX2 : SM_Real_Stores_gfx10<0x019, "S_BUFFER_STORE_DWORDX2">;
+defm S_BUFFER_STORE_DWORDX4 : SM_Real_Stores_gfx10<0x01a, "S_BUFFER_STORE_DWORDX4">;
+} // End SubtargetPredicate = HasScalarStores
+
+def S_MEMREALTIME_gfx10 : SMEM_Real_gfx10<0x025, S_MEMREALTIME>;
+def S_MEMTIME_gfx10 : SMEM_Real_gfx10<0x024, S_MEMTIME>;
+def S_GL1_INV_gfx10 : SMEM_Real_gfx10<0x01f, S_GL1_INV>;
+def S_GET_WAVEID_IN_WORKGROUP_gfx10 : SMEM_Real_gfx10<0x02a, S_GET_WAVEID_IN_WORKGROUP>;
+def S_DCACHE_INV_gfx10 : SMEM_Real_gfx10<0x020, S_DCACHE_INV>;
+
+let SubtargetPredicate = HasScalarStores in {
+def S_DCACHE_WB_gfx10 : SMEM_Real_gfx10<0x021, S_DCACHE_WB>;
+} // End SubtargetPredicate = HasScalarStores
+
+multiclass SM_Real_Probe_gfx10<bits<8> op, string ps> {
+ def _IMM_gfx10 : SMEM_Real_Store_gfx10 <op, !cast<SM_Pseudo>(ps#_IMM)>;
+ def _SGPR_gfx10 : SMEM_Real_Store_gfx10 <op, !cast<SM_Pseudo>(ps#_SGPR)>;
+}
+
+defm S_ATC_PROBE : SM_Real_Probe_gfx10 <0x26, "S_ATC_PROBE">;
+defm S_ATC_PROBE_BUFFER : SM_Real_Probe_gfx10 <0x27, "S_ATC_PROBE_BUFFER">;
+
+class SMEM_Atomic_Real_gfx10 <bits<8> op, SM_Atomic_Pseudo ps>
+ : SMEM_Real_gfx10 <op, ps> {
+
+ bits<7> sdata;
+ bit dlc;
+
+ let Constraints = ps.Constraints;
+ let DisableEncoding = ps.DisableEncoding;
+
+ let glc = ps.glc;
+
+ let Inst{14} = !if(ps.has_dlc, dlc, 0);
+ let Inst{12-6} = !if(glc, sdst{6-0}, sdata{6-0});
+}
+
+multiclass SM_Real_Atomics_gfx10<bits<8> op, string ps> {
+ def _IMM_gfx10 : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_IMM)>;
+ def _SGPR_gfx10 : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR)>;
+ def _IMM_RTN_gfx10 : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_IMM_RTN)>;
+ def _SGPR_RTN_gfx10 : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_RTN)>;
+}
+
+let SubtargetPredicate = HasScalarAtomics in {
-} // let OtherPredicates = [isVI]
+defm S_BUFFER_ATOMIC_SWAP : SM_Real_Atomics_gfx10 <0x40, "S_BUFFER_ATOMIC_SWAP">;
+defm S_BUFFER_ATOMIC_CMPSWAP : SM_Real_Atomics_gfx10 <0x41, "S_BUFFER_ATOMIC_CMPSWAP">;
+defm S_BUFFER_ATOMIC_ADD : SM_Real_Atomics_gfx10 <0x42, "S_BUFFER_ATOMIC_ADD">;
+defm S_BUFFER_ATOMIC_SUB : SM_Real_Atomics_gfx10 <0x43, "S_BUFFER_ATOMIC_SUB">;
+defm S_BUFFER_ATOMIC_SMIN : SM_Real_Atomics_gfx10 <0x44, "S_BUFFER_ATOMIC_SMIN">;
+defm S_BUFFER_ATOMIC_UMIN : SM_Real_Atomics_gfx10 <0x45, "S_BUFFER_ATOMIC_UMIN">;
+defm S_BUFFER_ATOMIC_SMAX : SM_Real_Atomics_gfx10 <0x46, "S_BUFFER_ATOMIC_SMAX">;
+defm S_BUFFER_ATOMIC_UMAX : SM_Real_Atomics_gfx10 <0x47, "S_BUFFER_ATOMIC_UMAX">;
+defm S_BUFFER_ATOMIC_AND : SM_Real_Atomics_gfx10 <0x48, "S_BUFFER_ATOMIC_AND">;
+defm S_BUFFER_ATOMIC_OR : SM_Real_Atomics_gfx10 <0x49, "S_BUFFER_ATOMIC_OR">;
+defm S_BUFFER_ATOMIC_XOR : SM_Real_Atomics_gfx10 <0x4a, "S_BUFFER_ATOMIC_XOR">;
+defm S_BUFFER_ATOMIC_INC : SM_Real_Atomics_gfx10 <0x4b, "S_BUFFER_ATOMIC_INC">;
+defm S_BUFFER_ATOMIC_DEC : SM_Real_Atomics_gfx10 <0x4c, "S_BUFFER_ATOMIC_DEC">;
+
+defm S_BUFFER_ATOMIC_SWAP_X2 : SM_Real_Atomics_gfx10 <0x60, "S_BUFFER_ATOMIC_SWAP_X2">;
+defm S_BUFFER_ATOMIC_CMPSWAP_X2 : SM_Real_Atomics_gfx10 <0x61, "S_BUFFER_ATOMIC_CMPSWAP_X2">;
+defm S_BUFFER_ATOMIC_ADD_X2 : SM_Real_Atomics_gfx10 <0x62, "S_BUFFER_ATOMIC_ADD_X2">;
+defm S_BUFFER_ATOMIC_SUB_X2 : SM_Real_Atomics_gfx10 <0x63, "S_BUFFER_ATOMIC_SUB_X2">;
+defm S_BUFFER_ATOMIC_SMIN_X2 : SM_Real_Atomics_gfx10 <0x64, "S_BUFFER_ATOMIC_SMIN_X2">;
+defm S_BUFFER_ATOMIC_UMIN_X2 : SM_Real_Atomics_gfx10 <0x65, "S_BUFFER_ATOMIC_UMIN_X2">;
+defm S_BUFFER_ATOMIC_SMAX_X2 : SM_Real_Atomics_gfx10 <0x66, "S_BUFFER_ATOMIC_SMAX_X2">;
+defm S_BUFFER_ATOMIC_UMAX_X2 : SM_Real_Atomics_gfx10 <0x67, "S_BUFFER_ATOMIC_UMAX_X2">;
+defm S_BUFFER_ATOMIC_AND_X2 : SM_Real_Atomics_gfx10 <0x68, "S_BUFFER_ATOMIC_AND_X2">;
+defm S_BUFFER_ATOMIC_OR_X2 : SM_Real_Atomics_gfx10 <0x69, "S_BUFFER_ATOMIC_OR_X2">;
+defm S_BUFFER_ATOMIC_XOR_X2 : SM_Real_Atomics_gfx10 <0x6a, "S_BUFFER_ATOMIC_XOR_X2">;
+defm S_BUFFER_ATOMIC_INC_X2 : SM_Real_Atomics_gfx10 <0x6b, "S_BUFFER_ATOMIC_INC_X2">;
+defm S_BUFFER_ATOMIC_DEC_X2 : SM_Real_Atomics_gfx10 <0x6c, "S_BUFFER_ATOMIC_DEC_X2">;
+
+defm S_ATOMIC_SWAP : SM_Real_Atomics_gfx10 <0x80, "S_ATOMIC_SWAP">;
+defm S_ATOMIC_CMPSWAP : SM_Real_Atomics_gfx10 <0x81, "S_ATOMIC_CMPSWAP">;
+defm S_ATOMIC_ADD : SM_Real_Atomics_gfx10 <0x82, "S_ATOMIC_ADD">;
+defm S_ATOMIC_SUB : SM_Real_Atomics_gfx10 <0x83, "S_ATOMIC_SUB">;
+defm S_ATOMIC_SMIN : SM_Real_Atomics_gfx10 <0x84, "S_ATOMIC_SMIN">;
+defm S_ATOMIC_UMIN : SM_Real_Atomics_gfx10 <0x85, "S_ATOMIC_UMIN">;
+defm S_ATOMIC_SMAX : SM_Real_Atomics_gfx10 <0x86, "S_ATOMIC_SMAX">;
+defm S_ATOMIC_UMAX : SM_Real_Atomics_gfx10 <0x87, "S_ATOMIC_UMAX">;
+defm S_ATOMIC_AND : SM_Real_Atomics_gfx10 <0x88, "S_ATOMIC_AND">;
+defm S_ATOMIC_OR : SM_Real_Atomics_gfx10 <0x89, "S_ATOMIC_OR">;
+defm S_ATOMIC_XOR : SM_Real_Atomics_gfx10 <0x8a, "S_ATOMIC_XOR">;
+defm S_ATOMIC_INC : SM_Real_Atomics_gfx10 <0x8b, "S_ATOMIC_INC">;
+defm S_ATOMIC_DEC : SM_Real_Atomics_gfx10 <0x8c, "S_ATOMIC_DEC">;
+
+defm S_ATOMIC_SWAP_X2 : SM_Real_Atomics_gfx10 <0xa0, "S_ATOMIC_SWAP_X2">;
+defm S_ATOMIC_CMPSWAP_X2 : SM_Real_Atomics_gfx10 <0xa1, "S_ATOMIC_CMPSWAP_X2">;
+defm S_ATOMIC_ADD_X2 : SM_Real_Atomics_gfx10 <0xa2, "S_ATOMIC_ADD_X2">;
+defm S_ATOMIC_SUB_X2 : SM_Real_Atomics_gfx10 <0xa3, "S_ATOMIC_SUB_X2">;
+defm S_ATOMIC_SMIN_X2 : SM_Real_Atomics_gfx10 <0xa4, "S_ATOMIC_SMIN_X2">;
+defm S_ATOMIC_UMIN_X2 : SM_Real_Atomics_gfx10 <0xa5, "S_ATOMIC_UMIN_X2">;
+defm S_ATOMIC_SMAX_X2 : SM_Real_Atomics_gfx10 <0xa6, "S_ATOMIC_SMAX_X2">;
+defm S_ATOMIC_UMAX_X2 : SM_Real_Atomics_gfx10 <0xa7, "S_ATOMIC_UMAX_X2">;
+defm S_ATOMIC_AND_X2 : SM_Real_Atomics_gfx10 <0xa8, "S_ATOMIC_AND_X2">;
+defm S_ATOMIC_OR_X2 : SM_Real_Atomics_gfx10 <0xa9, "S_ATOMIC_OR_X2">;
+defm S_ATOMIC_XOR_X2 : SM_Real_Atomics_gfx10 <0xaa, "S_ATOMIC_XOR_X2">;
+defm S_ATOMIC_INC_X2 : SM_Real_Atomics_gfx10 <0xab, "S_ATOMIC_INC_X2">;
+defm S_ATOMIC_DEC_X2 : SM_Real_Atomics_gfx10 <0xac, "S_ATOMIC_DEC_X2">;
+
+multiclass SM_Real_Discard_gfx10<bits<8> op, string ps> {
+ def _IMM_gfx10 : SMEM_Real_gfx10 <op, !cast<SM_Pseudo>(ps#_IMM)>;
+ def _SGPR_gfx10 : SMEM_Real_gfx10 <op, !cast<SM_Pseudo>(ps#_SGPR)>;
+}
+
+defm S_DCACHE_DISCARD : SM_Real_Discard_gfx10 <0x28, "S_DCACHE_DISCARD">;
+defm S_DCACHE_DISCARD_X2 : SM_Real_Discard_gfx10 <0x29, "S_DCACHE_DISCARD_X2">;
+
+} // End SubtargetPredicate = HasScalarAtomics
diff --git a/lib/Target/AMDGPU/SOPInstructions.td b/lib/Target/AMDGPU/SOPInstructions.td
index ca5e981ac5c2..dfafdccc05a3 100644
--- a/lib/Target/AMDGPU/SOPInstructions.td
+++ b/lib/Target/AMDGPU/SOPInstructions.td
@@ -1,15 +1,15 @@
//===-- SOPInstructions.td - SOP Instruction Defintions -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
def GPRIdxModeMatchClass : AsmOperandClass {
let Name = "GPRIdxMode";
let PredicateMethod = "isGPRIdxMode";
+ let ParserMethod = "parseGPRIdxMode";
let RenderMethod = "addImmOperands";
}
@@ -26,7 +26,6 @@ class SOP_Pseudo<string opName, dag outs, dag ins, string asmOps,
let isPseudo = 1;
let isCodeGenOnly = 1;
- let SubtargetPredicate = isGCN;
string Mnemonic = opName;
string AsmOperands = asmOps;
@@ -78,10 +77,13 @@ class SOP1_Real<bits<8> op, SOP1_Pseudo ps> :
let Inst{31-23} = 0x17d; //encoding;
}
-class SOP1_32 <string opName, list<dag> pattern=[]> : SOP1_Pseudo <
- opName, (outs SReg_32:$sdst), (ins SSrc_b32:$src0),
- "$sdst, $src0", pattern
->;
+class SOP1_32 <string opName, list<dag> pattern=[], bit tied_in = 0> : SOP1_Pseudo <
+ opName, (outs SReg_32:$sdst),
+ !if(tied_in, (ins SSrc_b32:$src0, SReg_32:$sdst_in),
+ (ins SSrc_b32:$src0)),
+ "$sdst, $src0", pattern> {
+ let Constraints = !if(tied_in, "$sdst = $sdst_in", "");
+}
// 32-bit input, no output.
class SOP1_0_32 <string opName, list<dag> pattern = []> : SOP1_Pseudo <
@@ -108,10 +110,13 @@ class SOP1_32_64 <string opName, list<dag> pattern=[]> : SOP1_Pseudo <
>;
// 32-bit input, 64-bit output.
-class SOP1_64_32 <string opName, list<dag> pattern=[]> : SOP1_Pseudo <
- opName, (outs SReg_64:$sdst), (ins SSrc_b32:$src0),
- "$sdst, $src0", pattern
->;
+class SOP1_64_32 <string opName, list<dag> pattern=[], bit tied_in = 0> : SOP1_Pseudo <
+ opName, (outs SReg_64:$sdst),
+ !if(tied_in, (ins SSrc_b32:$src0, SReg_64:$sdst_in),
+ (ins SSrc_b32:$src0)),
+ "$sdst, $src0", pattern> {
+ let Constraints = !if(tied_in, "$sdst = $sdst_in", "");
+}
// no input, 64-bit output.
class SOP1_64_0 <string opName, list<dag> pattern=[]> : SOP1_Pseudo <
@@ -120,8 +125,8 @@ class SOP1_64_0 <string opName, list<dag> pattern=[]> : SOP1_Pseudo <
}
// 64-bit input, no output
-class SOP1_1 <string opName, list<dag> pattern=[]> : SOP1_Pseudo <
- opName, (outs), (ins SReg_64:$src0), "$src0", pattern> {
+class SOP1_1 <string opName, RegisterClass rc = SReg_64, list<dag> pattern=[]> : SOP1_Pseudo <
+ opName, (outs), (ins rc:$src0), "$src0", pattern> {
let has_sdst = 0;
}
@@ -147,12 +152,24 @@ let Defs = [SCC] in {
[(set i64:$sdst, (not i64:$src0))]
>;
def S_WQM_B32 : SOP1_32 <"s_wqm_b32">;
- def S_WQM_B64 : SOP1_64 <"s_wqm_b64",
- [(set i1:$sdst, (int_amdgcn_wqm_vote i1:$src0))]
- >;
+ def S_WQM_B64 : SOP1_64 <"s_wqm_b64">;
} // End Defs = [SCC]
+let WaveSizePredicate = isWave32 in {
+def : GCNPat <
+ (int_amdgcn_wqm_vote i1:$src0),
+ (S_WQM_B32 $src0)
+>;
+}
+
+let WaveSizePredicate = isWave64 in {
+def : GCNPat <
+ (int_amdgcn_wqm_vote i1:$src0),
+ (S_WQM_B64 $src0)
+>;
+}
+
def S_BREV_B32 : SOP1_32 <"s_brev_b32",
[(set i32:$sdst, (bitreverse i32:$src0))]
>;
@@ -191,10 +208,10 @@ def S_SEXT_I32_I16 : SOP1_32 <"s_sext_i32_i16",
[(set i32:$sdst, (sext_inreg i32:$src0, i16))]
>;
-def S_BITSET0_B32 : SOP1_32 <"s_bitset0_b32">;
-def S_BITSET0_B64 : SOP1_64_32 <"s_bitset0_b64">;
-def S_BITSET1_B32 : SOP1_32 <"s_bitset1_b32">;
-def S_BITSET1_B64 : SOP1_64_32 <"s_bitset1_b64">;
+def S_BITSET0_B32 : SOP1_32 <"s_bitset0_b32", [], 1>;
+def S_BITSET0_B64 : SOP1_64_32 <"s_bitset0_b64", [], 1>;
+def S_BITSET1_B32 : SOP1_32 <"s_bitset1_b32", [], 1>;
+def S_BITSET1_B64 : SOP1_64_32 <"s_bitset1_b64", [], 1>;
def S_GETPC_B64 : SOP1_64_0 <"s_getpc_b64",
[(set i64:$sdst, (int_amdgcn_s_getpc))]
>;
@@ -207,7 +224,7 @@ def S_SETPC_B64 : SOP1_1 <"s_setpc_b64">;
let isReturn = 1 in {
// Define variant marked as return rather than branch.
-def S_SETPC_B64_return : SOP1_1<"", [(AMDGPUret_flag i64:$src0)]>;
+def S_SETPC_B64_return : SOP1_1<"", CCR_SGPR_64, [(AMDGPUret_flag i64:$src0)]>;
}
} // End isTerminator = 1, isBarrier = 1
@@ -241,8 +258,11 @@ def S_MOVRELD_B32 : SOP1_32 <"s_movreld_b32">;
def S_MOVRELD_B64 : SOP1_64 <"s_movreld_b64">;
} // End Uses = [M0]
+let SubtargetPredicate = isGFX6GFX7GFX8GFX9 in {
def S_CBRANCH_JOIN : SOP1_0_32R <"s_cbranch_join">;
def S_MOV_REGRD_B32 : SOP1_32 <"s_mov_regrd_b32">;
+} // End SubtargetPredicate = isGFX6GFX7GFX8GFX9
+
let Defs = [SCC] in {
def S_ABS_I32 : SOP1_32 <"s_abs_i32">;
} // End Defs = [SCC]
@@ -255,7 +275,7 @@ def S_SET_GPR_IDX_IDX : SOP1_0_32<"s_set_gpr_idx_idx"> {
}
}
-let SubtargetPredicate = isGFX9 in {
+let SubtargetPredicate = isGFX9Plus in {
let hasSideEffects = 1, Defs = [EXEC, SCC], Uses = [EXEC] in {
def S_ANDN1_SAVEEXEC_B64 : SOP1_64<"s_andn1_saveexec_b64">;
def S_ORN1_SAVEEXEC_B64 : SOP1_64<"s_orn1_saveexec_b64">;
@@ -264,7 +284,28 @@ let SubtargetPredicate = isGFX9 in {
} // End hasSideEffects = 1, Defs = [EXEC, SCC], Uses = [EXEC]
def S_BITREPLICATE_B64_B32 : SOP1_64_32<"s_bitreplicate_b64_b32">;
-} // End SubtargetPredicate = isGFX9
+} // End SubtargetPredicate = isGFX9Plus
+
+let SubtargetPredicate = isGFX10Plus in {
+ let hasSideEffects = 1, Defs = [EXEC, SCC], Uses = [EXEC] in {
+ def S_AND_SAVEEXEC_B32 : SOP1_32<"s_and_saveexec_b32">;
+ def S_OR_SAVEEXEC_B32 : SOP1_32<"s_or_saveexec_b32">;
+ def S_XOR_SAVEEXEC_B32 : SOP1_32<"s_xor_saveexec_b32">;
+ def S_ANDN2_SAVEEXEC_B32 : SOP1_32<"s_andn2_saveexec_b32">;
+ def S_ORN2_SAVEEXEC_B32 : SOP1_32<"s_orn2_saveexec_b32">;
+ def S_NAND_SAVEEXEC_B32 : SOP1_32<"s_nand_saveexec_b32">;
+ def S_NOR_SAVEEXEC_B32 : SOP1_32<"s_nor_saveexec_b32">;
+ def S_XNOR_SAVEEXEC_B32 : SOP1_32<"s_xnor_saveexec_b32">;
+ def S_ANDN1_SAVEEXEC_B32 : SOP1_32<"s_andn1_saveexec_b32">;
+ def S_ORN1_SAVEEXEC_B32 : SOP1_32<"s_orn1_saveexec_b32">;
+ def S_ANDN1_WREXEC_B32 : SOP1_32<"s_andn1_wrexec_b32">;
+ def S_ANDN2_WREXEC_B32 : SOP1_32<"s_andn2_wrexec_b32">;
+ } // End hasSideEffects = 1, Defs = [EXEC, SCC], Uses = [EXEC]
+
+ let Uses = [M0] in {
+ def S_MOVRELSD_2_B32 : SOP1_32<"s_movrelsd_2_b32">;
+ } // End Uses = [M0]
+} // End SubtargetPredicate = isGFX10Plus
//===----------------------------------------------------------------------===//
// SOP2 Instructions
@@ -302,6 +343,8 @@ class SOP2_Real<bits<7> op, SOP_Pseudo ps> :
// copy relevant pseudo op flags
let SubtargetPredicate = ps.SubtargetPredicate;
let AsmMatchConverter = ps.AsmMatchConverter;
+ let UseNamedOperandTable = ps.UseNamedOperandTable;
+ let TSFlags = ps.TSFlags;
// encoding
bits<7> sdst;
@@ -468,22 +511,22 @@ let AddedComplexity = 1 in {
let Defs = [SCC] in {
// TODO: b64 versions require VOP3 change since v_lshlrev_b64 is VOP3
def S_LSHL_B32 : SOP2_32 <"s_lshl_b32",
- [(set i32:$sdst, (UniformBinFrag<shl> i32:$src0, i32:$src1))]
+ [(set SReg_32:$sdst, (shl (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))]
>;
def S_LSHL_B64 : SOP2_64_32 <"s_lshl_b64",
- [(set i64:$sdst, (UniformBinFrag<shl> i64:$src0, i32:$src1))]
+ [(set SReg_64:$sdst, (shl (i64 SSrc_b64:$src0), (i32 SSrc_b32:$src1)))]
>;
def S_LSHR_B32 : SOP2_32 <"s_lshr_b32",
- [(set i32:$sdst, (UniformBinFrag<srl> i32:$src0, i32:$src1))]
+ [(set SReg_32:$sdst, (srl (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))]
>;
def S_LSHR_B64 : SOP2_64_32 <"s_lshr_b64",
- [(set i64:$sdst, (UniformBinFrag<srl> i64:$src0, i32:$src1))]
+ [(set SReg_64:$sdst, (srl (i64 SSrc_b64:$src0), (i32 SSrc_b32:$src1)))]
>;
def S_ASHR_I32 : SOP2_32 <"s_ashr_i32",
- [(set i32:$sdst, (UniformBinFrag<sra> i32:$src0, i32:$src1))]
+ [(set SReg_32:$sdst, (sra (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))]
>;
def S_ASHR_I64 : SOP2_64_32 <"s_ashr_i64",
- [(set i64:$sdst, (UniformBinFrag<sra> i64:$src0, i32:$src1))]
+ [(set SReg_64:$sdst, (sra (i64 SSrc_b64:$src0), (i32 SSrc_b32:$src1)))]
>;
} // End Defs = [SCC]
@@ -512,13 +555,14 @@ def S_CBRANCH_G_FORK : SOP2_Pseudo <
"$src0, $src1"
> {
let has_sdst = 0;
+ let SubtargetPredicate = isGFX6GFX7GFX8GFX9;
}
let Defs = [SCC] in {
def S_ABSDIFF_I32 : SOP2_32 <"s_absdiff_i32">;
} // End Defs = [SCC]
-let SubtargetPredicate = isVI in {
+let SubtargetPredicate = isGFX8GFX9 in {
def S_RFE_RESTORE_B64 : SOP2_Pseudo <
"s_rfe_restore_b64", (outs),
(ins SSrc_b64:$src0, SSrc_b32:$src1),
@@ -529,7 +573,7 @@ let SubtargetPredicate = isVI in {
}
}
-let SubtargetPredicate = isGFX9 in {
+let SubtargetPredicate = isGFX9Plus in {
def S_PACK_LL_B32_B16 : SOP2_32<"s_pack_ll_b32_b16">;
def S_PACK_LH_B32_B16 : SOP2_32<"s_pack_lh_b32_b16">;
def S_PACK_HH_B32_B16 : SOP2_32<"s_pack_hh_b32_b16">;
@@ -543,7 +587,7 @@ let SubtargetPredicate = isGFX9 in {
def S_MUL_HI_U32 : SOP2_32<"s_mul_hi_u32">;
def S_MUL_HI_I32 : SOP2_32<"s_mul_hi_i32">;
-}
+} // End SubtargetPredicate = isGFX9Plus
//===----------------------------------------------------------------------===//
// SOPK Instructions
@@ -555,7 +599,6 @@ class SOPK_Pseudo <string opName, dag outs, dag ins,
SIMCInstr<opName, SIEncodingFamily.NONE> {
let isPseudo = 1;
let isCodeGenOnly = 1;
- let SubtargetPredicate = isGCN;
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
@@ -618,6 +661,19 @@ class SOPK_32 <string opName, list<dag> pattern=[]> : SOPK_Pseudo <
"$sdst, $simm16",
pattern>;
+class SOPK_32_BR <string opName, list<dag> pattern=[]> : SOPK_Pseudo <
+ opName,
+ (outs),
+ (ins sopp_brtarget:$simm16, SReg_32:$sdst),
+ "$sdst, $simm16",
+ pattern> {
+ let Defs = [EXEC];
+ let Uses = [EXEC];
+ let isBranch = 1;
+ let isTerminator = 1;
+ let SchedRW = [WriteBranch];
+}
+
class SOPK_SCC <string opName, string base_op, bit isSignExt> : SOPK_Pseudo <
opName,
(outs),
@@ -684,9 +740,10 @@ let Defs = [SCC], isCommutable = 1, DisableEncoding = "$src0",
def S_MULK_I32 : SOPK_32TIE <"s_mulk_i32">;
}
+let SubtargetPredicate = isGFX6GFX7GFX8GFX9 in
def S_CBRANCH_I_FORK : SOPK_Pseudo <
"s_cbranch_i_fork",
- (outs), (ins SReg_64:$sdst, s16imm:$simm16),
+ (outs), (ins SReg_64:$sdst, sopp_brtarget:$simm16),
"$sdst, $simm16"
>;
@@ -720,15 +777,46 @@ def S_SETREG_IMM32_B32 : SOPK_Pseudo <
} // End hasSideEffects = 1
-let SubtargetPredicate = isGFX9 in {
+class SOPK_WAITCNT<string opName, list<dag> pat=[]> :
+ SOPK_Pseudo<
+ opName,
+ (outs),
+ (ins SReg_32:$sdst, s16imm:$simm16),
+ "$sdst, $simm16",
+ pat> {
+ let hasSideEffects = 1;
+ let mayLoad = 1;
+ let mayStore = 1;
+ let has_sdst = 1; // First source takes place of sdst in encoding
+}
+
+let SubtargetPredicate = isGFX9Plus in {
def S_CALL_B64 : SOPK_Pseudo<
"s_call_b64",
(outs SReg_64:$sdst),
- (ins s16imm:$simm16),
+ (ins sopp_brtarget:$simm16),
"$sdst, $simm16"> {
let isCall = 1;
}
-}
+} // End SubtargetPredicate = isGFX9Plus
+
+let SubtargetPredicate = isGFX10Plus in {
+ def S_VERSION : SOPK_Pseudo<
+ "s_version",
+ (outs),
+ (ins s16imm:$simm16),
+ "$simm16"> {
+ let has_sdst = 0;
+ }
+
+ def S_SUBVECTOR_LOOP_BEGIN : SOPK_32_BR<"s_subvector_loop_begin">;
+ def S_SUBVECTOR_LOOP_END : SOPK_32_BR<"s_subvector_loop_end">;
+
+ def S_WAITCNT_VSCNT : SOPK_WAITCNT<"s_waitcnt_vscnt">;
+ def S_WAITCNT_VMCNT : SOPK_WAITCNT<"s_waitcnt_vmcnt">;
+ def S_WAITCNT_EXPCNT : SOPK_WAITCNT<"s_waitcnt_expcnt">;
+ def S_WAITCNT_LGKMCNT : SOPK_WAITCNT<"s_waitcnt_lgkmcnt">;
+} // End SubtargetPredicate = isGFX10Plus
//===----------------------------------------------------------------------===//
// SOPC Instructions
@@ -756,7 +844,6 @@ class SOPC <bits<7> op, dag outs, dag ins, string asm,
let Defs = [SCC];
let SchedRW = [WriteSALU];
let UseNamedOperandTable = 1;
- let SubtargetPredicate = isGCN;
}
class SOPC_Base <bits<7> op, RegisterOperand rc0, RegisterOperand rc1,
@@ -811,12 +898,13 @@ def S_BITCMP0_B32 : SOPC_32 <0x0c, "s_bitcmp0_b32">;
def S_BITCMP1_B32 : SOPC_32 <0x0d, "s_bitcmp1_b32">;
def S_BITCMP0_B64 : SOPC_64_32 <0x0e, "s_bitcmp0_b64">;
def S_BITCMP1_B64 : SOPC_64_32 <0x0f, "s_bitcmp1_b64">;
+let SubtargetPredicate = isGFX6GFX7GFX8GFX9 in
def S_SETVSKIP : SOPC_32 <0x10, "s_setvskip">;
-let SubtargetPredicate = isVI in {
+let SubtargetPredicate = isGFX8Plus in {
def S_CMP_EQ_U64 : SOPC_CMP_64 <0x12, "s_cmp_eq_u64", COND_EQ>;
def S_CMP_LG_U64 : SOPC_CMP_64 <0x13, "s_cmp_lg_u64", COND_NE>;
-}
+} // End SubtargetPredicate = isGFX8Plus
let SubtargetPredicate = HasVGPRIndexMode in {
def S_SET_GPR_IDX_ON : SOPC <0x11,
@@ -834,6 +922,10 @@ def S_SET_GPR_IDX_ON : SOPC <0x11,
// SOPP Instructions
//===----------------------------------------------------------------------===//
+class Base_SOPP <string asm> {
+ string AsmString = asm;
+}
+
class SOPPe <bits<7> op> : Enc32 {
bits <16> simm16;
@@ -843,7 +935,7 @@ class SOPPe <bits<7> op> : Enc32 {
}
class SOPP <bits<7> op, dag ins, string asm, list<dag> pattern = []> :
- InstSI <(outs), ins, asm, pattern >, SOPPe <op> {
+ InstSI <(outs), ins, asm, pattern >, SOPPe <op>, Base_SOPP <asm> {
let mayLoad = 0;
let mayStore = 0;
@@ -854,92 +946,124 @@ class SOPP <bits<7> op, dag ins, string asm, list<dag> pattern = []> :
let SchedRW = [WriteSALU];
let UseNamedOperandTable = 1;
- let SubtargetPredicate = isGCN;
}
-
def S_NOP : SOPP <0x00000000, (ins i16imm:$simm16), "s_nop $simm16">;
+class SOPP_w_nop_e <bits<7> op> : Enc64 {
+ bits <16> simm16;
+
+ let Inst{15-0} = simm16;
+ let Inst{22-16} = op;
+ let Inst{31-23} = 0x17f; // encoding
+ let Inst{47-32} = 0x0;
+ let Inst{54-48} = S_NOP.Inst{22-16}; // opcode
+ let Inst{63-55} = S_NOP.Inst{31-23}; // encoding
+}
+
+class SOPP_w_nop <bits<7> op, dag ins, string asm, list<dag> pattern = []> :
+ InstSI <(outs), ins, asm, pattern >, SOPP_w_nop_e <op>, Base_SOPP <asm> {
+
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let SALU = 1;
+ let SOPP = 1;
+ let Size = 8;
+ let SchedRW = [WriteSALU];
+
+ let UseNamedOperandTable = 1;
+}
+
+multiclass SOPP_With_Relaxation <bits<7> op, dag ins, string asm, list<dag> pattern = []> {
+ def "" : SOPP <op, ins, asm, pattern>;
+ def _pad_s_nop : SOPP_w_nop <op, ins, asm, pattern>;
+}
+
let isTerminator = 1 in {
-def S_ENDPGM : SOPP <0x00000001, (ins), "s_endpgm",
- [(AMDGPUendpgm)]> {
- let simm16 = 0;
+def S_ENDPGM : SOPP <0x00000001, (ins EndpgmImm:$simm16), "s_endpgm$simm16"> {
let isBarrier = 1;
let isReturn = 1;
}
-let SubtargetPredicate = isVI in {
def S_ENDPGM_SAVED : SOPP <0x0000001B, (ins), "s_endpgm_saved"> {
+ let SubtargetPredicate = isGFX8Plus;
let simm16 = 0;
let isBarrier = 1;
let isReturn = 1;
}
-}
-let SubtargetPredicate = isGFX9 in {
+let SubtargetPredicate = isGFX9Plus in {
let isBarrier = 1, isReturn = 1, simm16 = 0 in {
def S_ENDPGM_ORDERED_PS_DONE :
SOPP<0x01e, (ins), "s_endpgm_ordered_ps_done">;
} // End isBarrier = 1, isReturn = 1, simm16 = 0
-} // End SubtargetPredicate = isGFX9
+} // End SubtargetPredicate = isGFX9Plus
+
+let SubtargetPredicate = isGFX10Plus in {
+ let isBarrier = 1, isReturn = 1, simm16 = 0 in {
+ def S_CODE_END :
+ SOPP<0x01f, (ins), "s_code_end">;
+ } // End isBarrier = 1, isReturn = 1, simm16 = 0
+} // End SubtargetPredicate = isGFX10Plus
let isBranch = 1, SchedRW = [WriteBranch] in {
-def S_BRANCH : SOPP <
+let isBarrier = 1 in {
+defm S_BRANCH : SOPP_With_Relaxation <
0x00000002, (ins sopp_brtarget:$simm16), "s_branch $simm16",
- [(br bb:$simm16)]> {
- let isBarrier = 1;
+ [(br bb:$simm16)]>;
}
let Uses = [SCC] in {
-def S_CBRANCH_SCC0 : SOPP <
+defm S_CBRANCH_SCC0 : SOPP_With_Relaxation <
0x00000004, (ins sopp_brtarget:$simm16),
"s_cbranch_scc0 $simm16"
>;
-def S_CBRANCH_SCC1 : SOPP <
+defm S_CBRANCH_SCC1 : SOPP_With_Relaxation <
0x00000005, (ins sopp_brtarget:$simm16),
"s_cbranch_scc1 $simm16"
>;
} // End Uses = [SCC]
let Uses = [VCC] in {
-def S_CBRANCH_VCCZ : SOPP <
+defm S_CBRANCH_VCCZ : SOPP_With_Relaxation <
0x00000006, (ins sopp_brtarget:$simm16),
"s_cbranch_vccz $simm16"
>;
-def S_CBRANCH_VCCNZ : SOPP <
+defm S_CBRANCH_VCCNZ : SOPP_With_Relaxation <
0x00000007, (ins sopp_brtarget:$simm16),
"s_cbranch_vccnz $simm16"
>;
} // End Uses = [VCC]
let Uses = [EXEC] in {
-def S_CBRANCH_EXECZ : SOPP <
+defm S_CBRANCH_EXECZ : SOPP_With_Relaxation <
0x00000008, (ins sopp_brtarget:$simm16),
"s_cbranch_execz $simm16"
>;
-def S_CBRANCH_EXECNZ : SOPP <
+defm S_CBRANCH_EXECNZ : SOPP_With_Relaxation <
0x00000009, (ins sopp_brtarget:$simm16),
"s_cbranch_execnz $simm16"
>;
} // End Uses = [EXEC]
-def S_CBRANCH_CDBGSYS : SOPP <
+defm S_CBRANCH_CDBGSYS : SOPP_With_Relaxation <
0x00000017, (ins sopp_brtarget:$simm16),
"s_cbranch_cdbgsys $simm16"
>;
-def S_CBRANCH_CDBGSYS_AND_USER : SOPP <
+defm S_CBRANCH_CDBGSYS_AND_USER : SOPP_With_Relaxation <
0x0000001A, (ins sopp_brtarget:$simm16),
"s_cbranch_cdbgsys_and_user $simm16"
>;
-def S_CBRANCH_CDBGSYS_OR_USER : SOPP <
+defm S_CBRANCH_CDBGSYS_OR_USER : SOPP_With_Relaxation <
0x00000019, (ins sopp_brtarget:$simm16),
"s_cbranch_cdbgsys_or_user $simm16"
>;
-def S_CBRANCH_CDBGUSER : SOPP <
+defm S_CBRANCH_CDBGUSER : SOPP_With_Relaxation <
0x00000018, (ins sopp_brtarget:$simm16),
"s_cbranch_cdbguser $simm16"
>;
@@ -957,16 +1081,16 @@ def S_BARRIER : SOPP <0x0000000a, (ins), "s_barrier",
let isConvergent = 1;
}
-let SubtargetPredicate = isVI in {
def S_WAKEUP : SOPP <0x00000003, (ins), "s_wakeup"> {
+ let SubtargetPredicate = isGFX8Plus;
let simm16 = 0;
let mayLoad = 1;
let mayStore = 1;
}
-}
let mayLoad = 1, mayStore = 1, hasSideEffects = 1 in
-def S_WAITCNT : SOPP <0x0000000c, (ins WAIT_FLAG:$simm16), "s_waitcnt $simm16">;
+def S_WAITCNT : SOPP <0x0000000c, (ins WAIT_FLAG:$simm16), "s_waitcnt $simm16",
+ [(int_amdgcn_s_waitcnt UIMM16bit:$simm16)]>;
def S_SETHALT : SOPP <0x0000000d, (ins i16imm:$simm16), "s_sethalt $simm16">;
def S_SETKILL : SOPP <0x0000000b, (ins i16imm:$simm16), "s_setkill $simm16">;
@@ -994,7 +1118,10 @@ def S_SENDMSGHALT : SOPP <0x00000011, (ins SendMsgImm:$simm16), "s_sendmsghalt $
>;
} // End Uses = [EXEC, M0]
-def S_TRAP : SOPP <0x00000012, (ins i16imm:$simm16), "s_trap $simm16">;
+def S_TRAP : SOPP <0x00000012, (ins i16imm:$simm16), "s_trap $simm16"> {
+ let isTrap = 1;
+}
+
def S_ICACHE_INV : SOPP <0x00000013, (ins), "s_icache_inv"> {
let simm16 = 0;
}
@@ -1028,6 +1155,25 @@ def S_SET_GPR_IDX_MODE : SOPP<0x1d, (ins GPRIdxMode:$simm16),
}
}
+let SubtargetPredicate = isGFX10Plus in {
+ def S_INST_PREFETCH :
+ SOPP<0x020, (ins s16imm:$simm16), "s_inst_prefetch $simm16">;
+ def S_CLAUSE :
+ SOPP<0x021, (ins s16imm:$simm16), "s_clause $simm16">;
+ def S_WAITCNT_IDLE :
+ SOPP <0x022, (ins), "s_wait_idle"> {
+ let simm16 = 0;
+ }
+ def S_WAITCNT_DEPCTR :
+ SOPP <0x023, (ins s16imm:$simm16), "s_waitcnt_depctr $simm16">;
+ def S_ROUND_MODE :
+ SOPP<0x024, (ins s16imm:$simm16), "s_round_mode $simm16">;
+ def S_DENORM_MODE :
+ SOPP<0x025, (ins s16imm:$simm16), "s_denorm_mode $simm16">;
+ def S_TTRACEDATA_IMM :
+ SOPP<0x028, (ins s16imm:$simm16), "s_ttracedata_imm $simm16">;
+} // End SubtargetPredicate = isGFX10Plus
+
//===----------------------------------------------------------------------===//
// S_GETREG_B32 Intrinsic Pattern.
//===----------------------------------------------------------------------===//
@@ -1041,6 +1187,11 @@ def : GCNPat <
//===----------------------------------------------------------------------===//
def : GCNPat <
+ (AMDGPUendpgm),
+ (S_ENDPGM (i16 0))
+>;
+
+def : GCNPat <
(i64 (ctpop i64:$src)),
(i64 (REG_SEQUENCE SReg_64,
(i32 (COPY_TO_REGCLASS (S_BCNT1_I32_B64 $src), SReg_32)), sub0,
@@ -1097,162 +1248,261 @@ def : GCNPat<
>;
+//===----------------------------------------------------------------------===//
+// Target-specific instruction encodings.
+//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
-// SOPP Patterns
+// SOP1 - GFX10.
//===----------------------------------------------------------------------===//
-def : GCNPat <
- (int_amdgcn_s_waitcnt i32:$simm16),
- (S_WAITCNT (as_i16imm $simm16))
->;
+class Select_gfx10<string opName> : SIMCInstr<opName, SIEncodingFamily.GFX10> {
+ Predicate AssemblerPredicate = isGFX10Plus;
+ string DecoderNamespace = "GFX10";
+}
+
+multiclass SOP1_Real_gfx10<bits<8> op> {
+ def _gfx10 : SOP1_Real<op, !cast<SOP1_Pseudo>(NAME)>,
+ Select_gfx10<!cast<SOP1_Pseudo>(NAME).Mnemonic>;
+}
+defm S_ANDN1_SAVEEXEC_B64 : SOP1_Real_gfx10<0x037>;
+defm S_ORN1_SAVEEXEC_B64 : SOP1_Real_gfx10<0x038>;
+defm S_ANDN1_WREXEC_B64 : SOP1_Real_gfx10<0x039>;
+defm S_ANDN2_WREXEC_B64 : SOP1_Real_gfx10<0x03a>;
+defm S_BITREPLICATE_B64_B32 : SOP1_Real_gfx10<0x03b>;
+defm S_AND_SAVEEXEC_B32 : SOP1_Real_gfx10<0x03c>;
+defm S_OR_SAVEEXEC_B32 : SOP1_Real_gfx10<0x03d>;
+defm S_XOR_SAVEEXEC_B32 : SOP1_Real_gfx10<0x03e>;
+defm S_ANDN2_SAVEEXEC_B32 : SOP1_Real_gfx10<0x03f>;
+defm S_ORN2_SAVEEXEC_B32 : SOP1_Real_gfx10<0x040>;
+defm S_NAND_SAVEEXEC_B32 : SOP1_Real_gfx10<0x041>;
+defm S_NOR_SAVEEXEC_B32 : SOP1_Real_gfx10<0x042>;
+defm S_XNOR_SAVEEXEC_B32 : SOP1_Real_gfx10<0x043>;
+defm S_ANDN1_SAVEEXEC_B32 : SOP1_Real_gfx10<0x044>;
+defm S_ORN1_SAVEEXEC_B32 : SOP1_Real_gfx10<0x045>;
+defm S_ANDN1_WREXEC_B32 : SOP1_Real_gfx10<0x046>;
+defm S_ANDN2_WREXEC_B32 : SOP1_Real_gfx10<0x047>;
+defm S_MOVRELSD_2_B32 : SOP1_Real_gfx10<0x049>;
//===----------------------------------------------------------------------===//
-// Real target instructions, move this to the appropriate subtarget TD file
+// SOP1 - GFX6, GFX7.
//===----------------------------------------------------------------------===//
-class Select_si<string opName> :
- SIMCInstr<opName, SIEncodingFamily.SI> {
- list<Predicate> AssemblerPredicates = [isSICI];
- string DecoderNamespace = "SICI";
+class Select_gfx6_gfx7<string opName> : SIMCInstr<opName, SIEncodingFamily.SI> {
+ Predicate AssemblerPredicate = isGFX6GFX7;
+ string DecoderNamespace = "GFX6GFX7";
}
-class SOP1_Real_si<bits<8> op, SOP1_Pseudo ps> :
- SOP1_Real<op, ps>,
- Select_si<ps.Mnemonic>;
+multiclass SOP1_Real_gfx6_gfx7<bits<8> op> {
+ def _gfx6_gfx7 : SOP1_Real<op, !cast<SOP1_Pseudo>(NAME)>,
+ Select_gfx6_gfx7<!cast<SOP1_Pseudo>(NAME).Mnemonic>;
+}
-class SOP2_Real_si<bits<7> op, SOP2_Pseudo ps> :
- SOP2_Real<op, ps>,
- Select_si<ps.Mnemonic>;
+multiclass SOP1_Real_gfx6_gfx7_gfx10<bits<8> op> :
+ SOP1_Real_gfx6_gfx7<op>, SOP1_Real_gfx10<op>;
+
+defm S_CBRANCH_JOIN : SOP1_Real_gfx6_gfx7<0x032>;
+defm S_MOV_REGRD_B32 : SOP1_Real_gfx6_gfx7<0x033>;
+
+defm S_MOV_B32 : SOP1_Real_gfx6_gfx7_gfx10<0x003>;
+defm S_MOV_B64 : SOP1_Real_gfx6_gfx7_gfx10<0x004>;
+defm S_CMOV_B32 : SOP1_Real_gfx6_gfx7_gfx10<0x005>;
+defm S_CMOV_B64 : SOP1_Real_gfx6_gfx7_gfx10<0x006>;
+defm S_NOT_B32 : SOP1_Real_gfx6_gfx7_gfx10<0x007>;
+defm S_NOT_B64 : SOP1_Real_gfx6_gfx7_gfx10<0x008>;
+defm S_WQM_B32 : SOP1_Real_gfx6_gfx7_gfx10<0x009>;
+defm S_WQM_B64 : SOP1_Real_gfx6_gfx7_gfx10<0x00a>;
+defm S_BREV_B32 : SOP1_Real_gfx6_gfx7_gfx10<0x00b>;
+defm S_BREV_B64 : SOP1_Real_gfx6_gfx7_gfx10<0x00c>;
+defm S_BCNT0_I32_B32 : SOP1_Real_gfx6_gfx7_gfx10<0x00d>;
+defm S_BCNT0_I32_B64 : SOP1_Real_gfx6_gfx7_gfx10<0x00e>;
+defm S_BCNT1_I32_B32 : SOP1_Real_gfx6_gfx7_gfx10<0x00f>;
+defm S_BCNT1_I32_B64 : SOP1_Real_gfx6_gfx7_gfx10<0x010>;
+defm S_FF0_I32_B32 : SOP1_Real_gfx6_gfx7_gfx10<0x011>;
+defm S_FF0_I32_B64 : SOP1_Real_gfx6_gfx7_gfx10<0x012>;
+defm S_FF1_I32_B32 : SOP1_Real_gfx6_gfx7_gfx10<0x013>;
+defm S_FF1_I32_B64 : SOP1_Real_gfx6_gfx7_gfx10<0x014>;
+defm S_FLBIT_I32_B32 : SOP1_Real_gfx6_gfx7_gfx10<0x015>;
+defm S_FLBIT_I32_B64 : SOP1_Real_gfx6_gfx7_gfx10<0x016>;
+defm S_FLBIT_I32 : SOP1_Real_gfx6_gfx7_gfx10<0x017>;
+defm S_FLBIT_I32_I64 : SOP1_Real_gfx6_gfx7_gfx10<0x018>;
+defm S_SEXT_I32_I8 : SOP1_Real_gfx6_gfx7_gfx10<0x019>;
+defm S_SEXT_I32_I16 : SOP1_Real_gfx6_gfx7_gfx10<0x01a>;
+defm S_BITSET0_B32 : SOP1_Real_gfx6_gfx7_gfx10<0x01b>;
+defm S_BITSET0_B64 : SOP1_Real_gfx6_gfx7_gfx10<0x01c>;
+defm S_BITSET1_B32 : SOP1_Real_gfx6_gfx7_gfx10<0x01d>;
+defm S_BITSET1_B64 : SOP1_Real_gfx6_gfx7_gfx10<0x01e>;
+defm S_GETPC_B64 : SOP1_Real_gfx6_gfx7_gfx10<0x01f>;
+defm S_SETPC_B64 : SOP1_Real_gfx6_gfx7_gfx10<0x020>;
+defm S_SWAPPC_B64 : SOP1_Real_gfx6_gfx7_gfx10<0x021>;
+defm S_RFE_B64 : SOP1_Real_gfx6_gfx7_gfx10<0x022>;
+defm S_AND_SAVEEXEC_B64 : SOP1_Real_gfx6_gfx7_gfx10<0x024>;
+defm S_OR_SAVEEXEC_B64 : SOP1_Real_gfx6_gfx7_gfx10<0x025>;
+defm S_XOR_SAVEEXEC_B64 : SOP1_Real_gfx6_gfx7_gfx10<0x026>;
+defm S_ANDN2_SAVEEXEC_B64 : SOP1_Real_gfx6_gfx7_gfx10<0x027>;
+defm S_ORN2_SAVEEXEC_B64 : SOP1_Real_gfx6_gfx7_gfx10<0x028>;
+defm S_NAND_SAVEEXEC_B64 : SOP1_Real_gfx6_gfx7_gfx10<0x029>;
+defm S_NOR_SAVEEXEC_B64 : SOP1_Real_gfx6_gfx7_gfx10<0x02a>;
+defm S_XNOR_SAVEEXEC_B64 : SOP1_Real_gfx6_gfx7_gfx10<0x02b>;
+defm S_QUADMASK_B32 : SOP1_Real_gfx6_gfx7_gfx10<0x02c>;
+defm S_QUADMASK_B64 : SOP1_Real_gfx6_gfx7_gfx10<0x02d>;
+defm S_MOVRELS_B32 : SOP1_Real_gfx6_gfx7_gfx10<0x02e>;
+defm S_MOVRELS_B64 : SOP1_Real_gfx6_gfx7_gfx10<0x02f>;
+defm S_MOVRELD_B32 : SOP1_Real_gfx6_gfx7_gfx10<0x030>;
+defm S_MOVRELD_B64 : SOP1_Real_gfx6_gfx7_gfx10<0x031>;
+defm S_ABS_I32 : SOP1_Real_gfx6_gfx7_gfx10<0x034>;
+defm S_MOV_FED_B32 : SOP1_Real_gfx6_gfx7_gfx10<0x035>;
-class SOPK_Real_si<bits<5> op, SOPK_Pseudo ps> :
- SOPK_Real32<op, ps>,
- Select_si<ps.Mnemonic>;
-
-def S_MOV_B32_si : SOP1_Real_si <0x03, S_MOV_B32>;
-def S_MOV_B64_si : SOP1_Real_si <0x04, S_MOV_B64>;
-def S_CMOV_B32_si : SOP1_Real_si <0x05, S_CMOV_B32>;
-def S_CMOV_B64_si : SOP1_Real_si <0x06, S_CMOV_B64>;
-def S_NOT_B32_si : SOP1_Real_si <0x07, S_NOT_B32>;
-def S_NOT_B64_si : SOP1_Real_si <0x08, S_NOT_B64>;
-def S_WQM_B32_si : SOP1_Real_si <0x09, S_WQM_B32>;
-def S_WQM_B64_si : SOP1_Real_si <0x0a, S_WQM_B64>;
-def S_BREV_B32_si : SOP1_Real_si <0x0b, S_BREV_B32>;
-def S_BREV_B64_si : SOP1_Real_si <0x0c, S_BREV_B64>;
-def S_BCNT0_I32_B32_si : SOP1_Real_si <0x0d, S_BCNT0_I32_B32>;
-def S_BCNT0_I32_B64_si : SOP1_Real_si <0x0e, S_BCNT0_I32_B64>;
-def S_BCNT1_I32_B32_si : SOP1_Real_si <0x0f, S_BCNT1_I32_B32>;
-def S_BCNT1_I32_B64_si : SOP1_Real_si <0x10, S_BCNT1_I32_B64>;
-def S_FF0_I32_B32_si : SOP1_Real_si <0x11, S_FF0_I32_B32>;
-def S_FF0_I32_B64_si : SOP1_Real_si <0x12, S_FF0_I32_B64>;
-def S_FF1_I32_B32_si : SOP1_Real_si <0x13, S_FF1_I32_B32>;
-def S_FF1_I32_B64_si : SOP1_Real_si <0x14, S_FF1_I32_B64>;
-def S_FLBIT_I32_B32_si : SOP1_Real_si <0x15, S_FLBIT_I32_B32>;
-def S_FLBIT_I32_B64_si : SOP1_Real_si <0x16, S_FLBIT_I32_B64>;
-def S_FLBIT_I32_si : SOP1_Real_si <0x17, S_FLBIT_I32>;
-def S_FLBIT_I32_I64_si : SOP1_Real_si <0x18, S_FLBIT_I32_I64>;
-def S_SEXT_I32_I8_si : SOP1_Real_si <0x19, S_SEXT_I32_I8>;
-def S_SEXT_I32_I16_si : SOP1_Real_si <0x1a, S_SEXT_I32_I16>;
-def S_BITSET0_B32_si : SOP1_Real_si <0x1b, S_BITSET0_B32>;
-def S_BITSET0_B64_si : SOP1_Real_si <0x1c, S_BITSET0_B64>;
-def S_BITSET1_B32_si : SOP1_Real_si <0x1d, S_BITSET1_B32>;
-def S_BITSET1_B64_si : SOP1_Real_si <0x1e, S_BITSET1_B64>;
-def S_GETPC_B64_si : SOP1_Real_si <0x1f, S_GETPC_B64>;
-def S_SETPC_B64_si : SOP1_Real_si <0x20, S_SETPC_B64>;
-def S_SWAPPC_B64_si : SOP1_Real_si <0x21, S_SWAPPC_B64>;
-def S_RFE_B64_si : SOP1_Real_si <0x22, S_RFE_B64>;
-def S_AND_SAVEEXEC_B64_si : SOP1_Real_si <0x24, S_AND_SAVEEXEC_B64>;
-def S_OR_SAVEEXEC_B64_si : SOP1_Real_si <0x25, S_OR_SAVEEXEC_B64>;
-def S_XOR_SAVEEXEC_B64_si : SOP1_Real_si <0x26, S_XOR_SAVEEXEC_B64>;
-def S_ANDN2_SAVEEXEC_B64_si: SOP1_Real_si <0x27, S_ANDN2_SAVEEXEC_B64>;
-def S_ORN2_SAVEEXEC_B64_si : SOP1_Real_si <0x28, S_ORN2_SAVEEXEC_B64>;
-def S_NAND_SAVEEXEC_B64_si : SOP1_Real_si <0x29, S_NAND_SAVEEXEC_B64>;
-def S_NOR_SAVEEXEC_B64_si : SOP1_Real_si <0x2a, S_NOR_SAVEEXEC_B64>;
-def S_XNOR_SAVEEXEC_B64_si : SOP1_Real_si <0x2b, S_XNOR_SAVEEXEC_B64>;
-def S_QUADMASK_B32_si : SOP1_Real_si <0x2c, S_QUADMASK_B32>;
-def S_QUADMASK_B64_si : SOP1_Real_si <0x2d, S_QUADMASK_B64>;
-def S_MOVRELS_B32_si : SOP1_Real_si <0x2e, S_MOVRELS_B32>;
-def S_MOVRELS_B64_si : SOP1_Real_si <0x2f, S_MOVRELS_B64>;
-def S_MOVRELD_B32_si : SOP1_Real_si <0x30, S_MOVRELD_B32>;
-def S_MOVRELD_B64_si : SOP1_Real_si <0x31, S_MOVRELD_B64>;
-def S_CBRANCH_JOIN_si : SOP1_Real_si <0x32, S_CBRANCH_JOIN>;
-def S_MOV_REGRD_B32_si : SOP1_Real_si <0x33, S_MOV_REGRD_B32>;
-def S_ABS_I32_si : SOP1_Real_si <0x34, S_ABS_I32>;
-def S_MOV_FED_B32_si : SOP1_Real_si <0x35, S_MOV_FED_B32>;
-
-def S_ADD_U32_si : SOP2_Real_si <0x00, S_ADD_U32>;
-def S_ADD_I32_si : SOP2_Real_si <0x02, S_ADD_I32>;
-def S_SUB_U32_si : SOP2_Real_si <0x01, S_SUB_U32>;
-def S_SUB_I32_si : SOP2_Real_si <0x03, S_SUB_I32>;
-def S_ADDC_U32_si : SOP2_Real_si <0x04, S_ADDC_U32>;
-def S_SUBB_U32_si : SOP2_Real_si <0x05, S_SUBB_U32>;
-def S_MIN_I32_si : SOP2_Real_si <0x06, S_MIN_I32>;
-def S_MIN_U32_si : SOP2_Real_si <0x07, S_MIN_U32>;
-def S_MAX_I32_si : SOP2_Real_si <0x08, S_MAX_I32>;
-def S_MAX_U32_si : SOP2_Real_si <0x09, S_MAX_U32>;
-def S_CSELECT_B32_si : SOP2_Real_si <0x0a, S_CSELECT_B32>;
-def S_CSELECT_B64_si : SOP2_Real_si <0x0b, S_CSELECT_B64>;
-def S_AND_B32_si : SOP2_Real_si <0x0e, S_AND_B32>;
-def S_AND_B64_si : SOP2_Real_si <0x0f, S_AND_B64>;
-def S_OR_B32_si : SOP2_Real_si <0x10, S_OR_B32>;
-def S_OR_B64_si : SOP2_Real_si <0x11, S_OR_B64>;
-def S_XOR_B32_si : SOP2_Real_si <0x12, S_XOR_B32>;
-def S_XOR_B64_si : SOP2_Real_si <0x13, S_XOR_B64>;
-def S_ANDN2_B32_si : SOP2_Real_si <0x14, S_ANDN2_B32>;
-def S_ANDN2_B64_si : SOP2_Real_si <0x15, S_ANDN2_B64>;
-def S_ORN2_B32_si : SOP2_Real_si <0x16, S_ORN2_B32>;
-def S_ORN2_B64_si : SOP2_Real_si <0x17, S_ORN2_B64>;
-def S_NAND_B32_si : SOP2_Real_si <0x18, S_NAND_B32>;
-def S_NAND_B64_si : SOP2_Real_si <0x19, S_NAND_B64>;
-def S_NOR_B32_si : SOP2_Real_si <0x1a, S_NOR_B32>;
-def S_NOR_B64_si : SOP2_Real_si <0x1b, S_NOR_B64>;
-def S_XNOR_B32_si : SOP2_Real_si <0x1c, S_XNOR_B32>;
-def S_XNOR_B64_si : SOP2_Real_si <0x1d, S_XNOR_B64>;
-def S_LSHL_B32_si : SOP2_Real_si <0x1e, S_LSHL_B32>;
-def S_LSHL_B64_si : SOP2_Real_si <0x1f, S_LSHL_B64>;
-def S_LSHR_B32_si : SOP2_Real_si <0x20, S_LSHR_B32>;
-def S_LSHR_B64_si : SOP2_Real_si <0x21, S_LSHR_B64>;
-def S_ASHR_I32_si : SOP2_Real_si <0x22, S_ASHR_I32>;
-def S_ASHR_I64_si : SOP2_Real_si <0x23, S_ASHR_I64>;
-def S_BFM_B32_si : SOP2_Real_si <0x24, S_BFM_B32>;
-def S_BFM_B64_si : SOP2_Real_si <0x25, S_BFM_B64>;
-def S_MUL_I32_si : SOP2_Real_si <0x26, S_MUL_I32>;
-def S_BFE_U32_si : SOP2_Real_si <0x27, S_BFE_U32>;
-def S_BFE_I32_si : SOP2_Real_si <0x28, S_BFE_I32>;
-def S_BFE_U64_si : SOP2_Real_si <0x29, S_BFE_U64>;
-def S_BFE_I64_si : SOP2_Real_si <0x2a, S_BFE_I64>;
-def S_CBRANCH_G_FORK_si : SOP2_Real_si <0x2b, S_CBRANCH_G_FORK>;
-def S_ABSDIFF_I32_si : SOP2_Real_si <0x2c, S_ABSDIFF_I32>;
-
-def S_MOVK_I32_si : SOPK_Real_si <0x00, S_MOVK_I32>;
-def S_CMOVK_I32_si : SOPK_Real_si <0x02, S_CMOVK_I32>;
-def S_CMPK_EQ_I32_si : SOPK_Real_si <0x03, S_CMPK_EQ_I32>;
-def S_CMPK_LG_I32_si : SOPK_Real_si <0x04, S_CMPK_LG_I32>;
-def S_CMPK_GT_I32_si : SOPK_Real_si <0x05, S_CMPK_GT_I32>;
-def S_CMPK_GE_I32_si : SOPK_Real_si <0x06, S_CMPK_GE_I32>;
-def S_CMPK_LT_I32_si : SOPK_Real_si <0x07, S_CMPK_LT_I32>;
-def S_CMPK_LE_I32_si : SOPK_Real_si <0x08, S_CMPK_LE_I32>;
-def S_CMPK_EQ_U32_si : SOPK_Real_si <0x09, S_CMPK_EQ_U32>;
-def S_CMPK_LG_U32_si : SOPK_Real_si <0x0a, S_CMPK_LG_U32>;
-def S_CMPK_GT_U32_si : SOPK_Real_si <0x0b, S_CMPK_GT_U32>;
-def S_CMPK_GE_U32_si : SOPK_Real_si <0x0c, S_CMPK_GE_U32>;
-def S_CMPK_LT_U32_si : SOPK_Real_si <0x0d, S_CMPK_LT_U32>;
-def S_CMPK_LE_U32_si : SOPK_Real_si <0x0e, S_CMPK_LE_U32>;
-def S_ADDK_I32_si : SOPK_Real_si <0x0f, S_ADDK_I32>;
-def S_MULK_I32_si : SOPK_Real_si <0x10, S_MULK_I32>;
-def S_CBRANCH_I_FORK_si : SOPK_Real_si <0x11, S_CBRANCH_I_FORK>;
-def S_GETREG_B32_si : SOPK_Real_si <0x12, S_GETREG_B32>;
-def S_SETREG_B32_si : SOPK_Real_si <0x13, S_SETREG_B32>;
-//def S_GETREG_REGRD_B32_si : SOPK_Real_si <0x14, S_GETREG_REGRD_B32>; // see pseudo for comments
-def S_SETREG_IMM32_B32_si : SOPK_Real64<0x15, S_SETREG_IMM32_B32>,
- Select_si<S_SETREG_IMM32_B32.Mnemonic>;
+//===----------------------------------------------------------------------===//
+// SOP2 - GFX10.
+//===----------------------------------------------------------------------===//
+
+multiclass SOP2_Real_gfx10<bits<7> op> {
+ def _gfx10 : SOP2_Real<op, !cast<SOP2_Pseudo>(NAME)>,
+ Select_gfx10<!cast<SOP2_Pseudo>(NAME).Mnemonic>;
+}
+
+defm S_LSHL1_ADD_U32 : SOP2_Real_gfx10<0x02e>;
+defm S_LSHL2_ADD_U32 : SOP2_Real_gfx10<0x02f>;
+defm S_LSHL3_ADD_U32 : SOP2_Real_gfx10<0x030>;
+defm S_LSHL4_ADD_U32 : SOP2_Real_gfx10<0x031>;
+defm S_PACK_LL_B32_B16 : SOP2_Real_gfx10<0x032>;
+defm S_PACK_LH_B32_B16 : SOP2_Real_gfx10<0x033>;
+defm S_PACK_HH_B32_B16 : SOP2_Real_gfx10<0x034>;
+defm S_MUL_HI_U32 : SOP2_Real_gfx10<0x035>;
+defm S_MUL_HI_I32 : SOP2_Real_gfx10<0x036>;
+
+//===----------------------------------------------------------------------===//
+// SOP2 - GFX6, GFX7.
+//===----------------------------------------------------------------------===//
+multiclass SOP2_Real_gfx6_gfx7<bits<7> op> {
+ def _gfx6_gfx7 : SOP2_Real<op, !cast<SOP_Pseudo>(NAME)>,
+ Select_gfx6_gfx7<!cast<SOP_Pseudo>(NAME).Mnemonic>;
+}
+
+multiclass SOP2_Real_gfx6_gfx7_gfx10<bits<7> op> :
+ SOP2_Real_gfx6_gfx7<op>, SOP2_Real_gfx10<op>;
+
+defm S_CBRANCH_G_FORK : SOP2_Real_gfx6_gfx7<0x02b>;
+
+defm S_ADD_U32 : SOP2_Real_gfx6_gfx7_gfx10<0x000>;
+defm S_SUB_U32 : SOP2_Real_gfx6_gfx7_gfx10<0x001>;
+defm S_ADD_I32 : SOP2_Real_gfx6_gfx7_gfx10<0x002>;
+defm S_SUB_I32 : SOP2_Real_gfx6_gfx7_gfx10<0x003>;
+defm S_ADDC_U32 : SOP2_Real_gfx6_gfx7_gfx10<0x004>;
+defm S_SUBB_U32 : SOP2_Real_gfx6_gfx7_gfx10<0x005>;
+defm S_MIN_I32 : SOP2_Real_gfx6_gfx7_gfx10<0x006>;
+defm S_MIN_U32 : SOP2_Real_gfx6_gfx7_gfx10<0x007>;
+defm S_MAX_I32 : SOP2_Real_gfx6_gfx7_gfx10<0x008>;
+defm S_MAX_U32 : SOP2_Real_gfx6_gfx7_gfx10<0x009>;
+defm S_CSELECT_B32 : SOP2_Real_gfx6_gfx7_gfx10<0x00a>;
+defm S_CSELECT_B64 : SOP2_Real_gfx6_gfx7_gfx10<0x00b>;
+defm S_AND_B32 : SOP2_Real_gfx6_gfx7_gfx10<0x00e>;
+defm S_AND_B64 : SOP2_Real_gfx6_gfx7_gfx10<0x00f>;
+defm S_OR_B32 : SOP2_Real_gfx6_gfx7_gfx10<0x010>;
+defm S_OR_B64 : SOP2_Real_gfx6_gfx7_gfx10<0x011>;
+defm S_XOR_B32 : SOP2_Real_gfx6_gfx7_gfx10<0x012>;
+defm S_XOR_B64 : SOP2_Real_gfx6_gfx7_gfx10<0x013>;
+defm S_ANDN2_B32 : SOP2_Real_gfx6_gfx7_gfx10<0x014>;
+defm S_ANDN2_B64 : SOP2_Real_gfx6_gfx7_gfx10<0x015>;
+defm S_ORN2_B32 : SOP2_Real_gfx6_gfx7_gfx10<0x016>;
+defm S_ORN2_B64 : SOP2_Real_gfx6_gfx7_gfx10<0x017>;
+defm S_NAND_B32 : SOP2_Real_gfx6_gfx7_gfx10<0x018>;
+defm S_NAND_B64 : SOP2_Real_gfx6_gfx7_gfx10<0x019>;
+defm S_NOR_B32 : SOP2_Real_gfx6_gfx7_gfx10<0x01a>;
+defm S_NOR_B64 : SOP2_Real_gfx6_gfx7_gfx10<0x01b>;
+defm S_XNOR_B32 : SOP2_Real_gfx6_gfx7_gfx10<0x01c>;
+defm S_XNOR_B64 : SOP2_Real_gfx6_gfx7_gfx10<0x01d>;
+defm S_LSHL_B32 : SOP2_Real_gfx6_gfx7_gfx10<0x01e>;
+defm S_LSHL_B64 : SOP2_Real_gfx6_gfx7_gfx10<0x01f>;
+defm S_LSHR_B32 : SOP2_Real_gfx6_gfx7_gfx10<0x020>;
+defm S_LSHR_B64 : SOP2_Real_gfx6_gfx7_gfx10<0x021>;
+defm S_ASHR_I32 : SOP2_Real_gfx6_gfx7_gfx10<0x022>;
+defm S_ASHR_I64 : SOP2_Real_gfx6_gfx7_gfx10<0x023>;
+defm S_BFM_B32 : SOP2_Real_gfx6_gfx7_gfx10<0x024>;
+defm S_BFM_B64 : SOP2_Real_gfx6_gfx7_gfx10<0x025>;
+defm S_MUL_I32 : SOP2_Real_gfx6_gfx7_gfx10<0x026>;
+defm S_BFE_U32 : SOP2_Real_gfx6_gfx7_gfx10<0x027>;
+defm S_BFE_I32 : SOP2_Real_gfx6_gfx7_gfx10<0x028>;
+defm S_BFE_U64 : SOP2_Real_gfx6_gfx7_gfx10<0x029>;
+defm S_BFE_I64 : SOP2_Real_gfx6_gfx7_gfx10<0x02a>;
+defm S_ABSDIFF_I32 : SOP2_Real_gfx6_gfx7_gfx10<0x02c>;
+
+//===----------------------------------------------------------------------===//
+// SOPK - GFX10.
+//===----------------------------------------------------------------------===//
+
+multiclass SOPK_Real32_gfx10<bits<5> op> {
+ def _gfx10 : SOPK_Real32<op, !cast<SOPK_Pseudo>(NAME)>,
+ Select_gfx10<!cast<SOPK_Pseudo>(NAME).Mnemonic>;
+}
+
+multiclass SOPK_Real64_gfx10<bits<5> op> {
+ def _gfx10 : SOPK_Real64<op, !cast<SOPK_Pseudo>(NAME)>,
+ Select_gfx10<!cast<SOPK_Pseudo>(NAME).Mnemonic>;
+}
+
+defm S_VERSION : SOPK_Real32_gfx10<0x001>;
+defm S_CALL_B64 : SOPK_Real32_gfx10<0x016>;
+defm S_WAITCNT_VSCNT : SOPK_Real32_gfx10<0x017>;
+defm S_WAITCNT_VMCNT : SOPK_Real32_gfx10<0x018>;
+defm S_WAITCNT_EXPCNT : SOPK_Real32_gfx10<0x019>;
+defm S_WAITCNT_LGKMCNT : SOPK_Real32_gfx10<0x01a>;
+defm S_SUBVECTOR_LOOP_BEGIN : SOPK_Real32_gfx10<0x01b>;
+defm S_SUBVECTOR_LOOP_END : SOPK_Real32_gfx10<0x01c>;
+
+//===----------------------------------------------------------------------===//
+// SOPK - GFX6, GFX7.
+//===----------------------------------------------------------------------===//
+
+multiclass SOPK_Real32_gfx6_gfx7<bits<5> op> {
+ def _gfx6_gfx7 : SOPK_Real32<op, !cast<SOPK_Pseudo>(NAME)>,
+ Select_gfx6_gfx7<!cast<SOPK_Pseudo>(NAME).Mnemonic>;
+}
+
+multiclass SOPK_Real64_gfx6_gfx7<bits<5> op> {
+ def _gfx6_gfx7 : SOPK_Real64<op, !cast<SOPK_Pseudo>(NAME)>,
+ Select_gfx6_gfx7<!cast<SOPK_Pseudo>(NAME).Mnemonic>;
+}
+
+multiclass SOPK_Real32_gfx6_gfx7_gfx10<bits<5> op> :
+ SOPK_Real32_gfx6_gfx7<op>, SOPK_Real32_gfx10<op>;
+
+multiclass SOPK_Real64_gfx6_gfx7_gfx10<bits<5> op> :
+ SOPK_Real64_gfx6_gfx7<op>, SOPK_Real64_gfx10<op>;
+
+defm S_CBRANCH_I_FORK : SOPK_Real32_gfx6_gfx7<0x011>;
+
+defm S_MOVK_I32 : SOPK_Real32_gfx6_gfx7_gfx10<0x000>;
+defm S_CMOVK_I32 : SOPK_Real32_gfx6_gfx7_gfx10<0x002>;
+defm S_CMPK_EQ_I32 : SOPK_Real32_gfx6_gfx7_gfx10<0x003>;
+defm S_CMPK_LG_I32 : SOPK_Real32_gfx6_gfx7_gfx10<0x004>;
+defm S_CMPK_GT_I32 : SOPK_Real32_gfx6_gfx7_gfx10<0x005>;
+defm S_CMPK_GE_I32 : SOPK_Real32_gfx6_gfx7_gfx10<0x006>;
+defm S_CMPK_LT_I32 : SOPK_Real32_gfx6_gfx7_gfx10<0x007>;
+defm S_CMPK_LE_I32 : SOPK_Real32_gfx6_gfx7_gfx10<0x008>;
+defm S_CMPK_EQ_U32 : SOPK_Real32_gfx6_gfx7_gfx10<0x009>;
+defm S_CMPK_LG_U32 : SOPK_Real32_gfx6_gfx7_gfx10<0x00a>;
+defm S_CMPK_GT_U32 : SOPK_Real32_gfx6_gfx7_gfx10<0x00b>;
+defm S_CMPK_GE_U32 : SOPK_Real32_gfx6_gfx7_gfx10<0x00c>;
+defm S_CMPK_LT_U32 : SOPK_Real32_gfx6_gfx7_gfx10<0x00d>;
+defm S_CMPK_LE_U32 : SOPK_Real32_gfx6_gfx7_gfx10<0x00e>;
+defm S_ADDK_I32 : SOPK_Real32_gfx6_gfx7_gfx10<0x00f>;
+defm S_MULK_I32 : SOPK_Real32_gfx6_gfx7_gfx10<0x010>;
+defm S_GETREG_B32 : SOPK_Real32_gfx6_gfx7_gfx10<0x012>;
+defm S_SETREG_B32 : SOPK_Real32_gfx6_gfx7_gfx10<0x013>;
+defm S_SETREG_IMM32_B32 : SOPK_Real64_gfx6_gfx7_gfx10<0x015>;
+
+//===----------------------------------------------------------------------===//
+// GFX8, GFX9 (VI).
+//===----------------------------------------------------------------------===//
class Select_vi<string opName> :
SIMCInstr<opName, SIEncodingFamily.VI> {
- list<Predicate> AssemblerPredicates = [isVI];
- string DecoderNamespace = "VI";
+ list<Predicate> AssemblerPredicates = [isGFX8GFX9];
+ string DecoderNamespace = "GFX8";
}
class SOP1_Real_vi<bits<8> op, SOP1_Pseudo ps> :
diff --git a/lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.cpp b/lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.cpp
index e4c442db3016..30cf12337c6e 100644
--- a/lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.cpp
+++ b/lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.cpp
@@ -1,9 +1,8 @@
//===-- TargetInfo/AMDGPUTargetInfo.cpp - TargetInfo for AMDGPU -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -11,7 +10,7 @@
//
//===----------------------------------------------------------------------===//
-#include "AMDGPUTargetMachine.h"
+#include "TargetInfo/AMDGPUTargetInfo.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.h b/lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.h
new file mode 100644
index 000000000000..1e6dbd90b0c1
--- /dev/null
+++ b/lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.h
@@ -0,0 +1,29 @@
+//===-- TargetInfo/AMDGPUTargetInfo.h - TargetInfo for AMDGPU ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_TARGETINFO_AMDGPUTARGETINFO_H
+#define LLVM_LIB_TARGET_AMDGPU_TARGETINFO_AMDGPUTARGETINFO_H
+
+namespace llvm {
+
+class Target;
+
+/// The target which supports all AMD GPUs. This will eventually
+/// be deprecated and there will be a R600 target and a GCN target.
+Target &getTheAMDGPUTarget();
+
+/// The target for GCN GPUs
+Target &getTheGCNTarget();
+
+}
+
+#endif // LLVM_LIB_TARGET_AMDGPU_TARGETINFO_AMDGPUTARGETINFO_H
diff --git a/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp b/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
index 9eb4c6513cce..075e08986c0c 100644
--- a/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
+++ b/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
@@ -1,9 +1,8 @@
//===-- AMDGPUAsmUtils.cpp - AsmParser/InstPrinter common -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "AMDGPUAsmUtils.h"
@@ -23,8 +22,8 @@ const char* const IdSymbolic[] = {
nullptr,
nullptr,
nullptr,
- nullptr,
- nullptr,
+ "MSG_GS_ALLOC_REQ",
+ "MSG_GET_DOORBELL",
nullptr,
nullptr,
nullptr,
@@ -69,7 +68,17 @@ const char* const IdSymbolic[] = {
nullptr,
nullptr,
nullptr,
- "HW_REG_SH_MEM_BASES"
+ "HW_REG_SH_MEM_BASES",
+ "HW_REG_TBA_LO",
+ "HW_REG_TBA_HI",
+ "HW_REG_TMA_LO",
+ "HW_REG_TMA_HI",
+ "HW_REG_FLAT_SCR_LO",
+ "HW_REG_FLAT_SCR_HI",
+ "HW_REG_XNACK_MASK",
+ nullptr, // HW_ID1, no predictable values
+ nullptr, // HW_ID2, no predictable values
+ "HW_REG_POPS_PACKER"
};
} // namespace Hwreg
@@ -86,5 +95,18 @@ const char* const IdSymbolic[] = {
};
} // namespace Swizzle
+
+namespace VGPRIndexMode {
+
+// This must be in sync with llvm::AMDGPU::VGPRIndexMode::Id enum members, see SIDefines.h.
+const char* const IdSymbolic[] = {
+ "SRC0",
+ "SRC1",
+ "SRC2",
+ "DST",
+};
+
+} // namespace VGPRIndexMode
+
} // namespace AMDGPU
} // namespace llvm
diff --git a/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.h b/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.h
index ebb2be22b487..cd91c5f6edd5 100644
--- a/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.h
+++ b/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.h
@@ -1,9 +1,8 @@
//===-- AMDGPUAsmUtils.h - AsmParser/InstPrinter common ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -31,6 +30,13 @@ namespace Swizzle { // Symbolic names for the swizzle(...) syntax.
extern const char* const IdSymbolic[];
} // namespace Swizzle
+
+namespace VGPRIndexMode { // Symbolic names for the gpr_idx(...) syntax.
+
+extern const char* const IdSymbolic[];
+
+} // namespace VGPRIndexMode
+
} // namespace AMDGPU
} // namespace llvm
diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 54c866bdc63c..e90f40e6abea 100644
--- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -1,9 +1,8 @@
//===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -11,6 +10,7 @@
#include "AMDGPUTargetTransformInfo.h"
#include "AMDGPU.h"
#include "SIDefines.h"
+#include "AMDGPUAsmUtils.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/BinaryFormat/ELF.h"
@@ -85,7 +85,9 @@ unsigned getExpcntBitWidth() { return 3; }
unsigned getLgkmcntBitShift() { return 8; }
/// \returns Lgkmcnt bit width.
-unsigned getLgkmcntBitWidth() { return 4; }
+unsigned getLgkmcntBitWidth(unsigned VersionMajor) {
+ return (VersionMajor >= 10) ? 6 : 4;
+}
/// \returns Vmcnt bit shift (higher bits).
unsigned getVmcntBitShiftHi() { return 14; }
@@ -99,18 +101,11 @@ namespace llvm {
namespace AMDGPU {
-struct MIMGInfo {
- uint16_t Opcode;
- uint16_t BaseOpcode;
- uint8_t MIMGEncoding;
- uint8_t VDataDwords;
- uint8_t VAddrDwords;
-};
-
#define GET_MIMGBaseOpcodesTable_IMPL
#define GET_MIMGDimInfoTable_IMPL
#define GET_MIMGInfoTable_IMPL
#define GET_MIMGLZMappingTable_IMPL
+#define GET_MIMGMIPMappingTable_IMPL
#include "AMDGPUGenSearchableTables.inc"
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
@@ -120,6 +115,11 @@ int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
return Info ? Info->Opcode : -1;
}
+const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc) {
+ const MIMGInfo *Info = getMIMGInfo(Opc);
+ return Info ? getMIMGBaseOpcodeInfo(Info->BaseOpcode) : nullptr;
+}
+
int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) {
const MIMGInfo *OrigInfo = getMIMGInfo(Opc);
const MIMGInfo *NewInfo =
@@ -230,7 +230,8 @@ unsigned getEUsPerCU(const MCSubtargetInfo *STI) {
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
unsigned FlatWorkGroupSize) {
- if (!STI->getFeatureBits().test(FeatureGCN))
+ assert(FlatWorkGroupSize != 0);
+ if (STI->getTargetTriple().getArch() != Triple::amdgcn)
return 8;
unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize);
if (N == 1)
@@ -279,6 +280,8 @@ unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI) {
IsaVersion Version = getIsaVersion(STI->getCPU());
+ if (Version.Major >= 10)
+ return getAddressableNumSGPRs(STI);
if (Version.Major >= 8)
return 16;
return 8;
@@ -300,6 +303,8 @@ unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI) {
return FIXED_NUM_SGPRS_FOR_INIT_BUG;
IsaVersion Version = getIsaVersion(STI->getCPU());
+ if (Version.Major >= 10)
+ return 106;
if (Version.Major >= 8)
return 102;
return 104;
@@ -308,6 +313,10 @@ unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI) {
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
assert(WavesPerEU != 0);
+ IsaVersion Version = getIsaVersion(STI->getCPU());
+ if (Version.Major >= 10)
+ return 0;
+
if (WavesPerEU >= getMaxWavesPerEU())
return 0;
@@ -322,8 +331,10 @@ unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
bool Addressable) {
assert(WavesPerEU != 0);
- IsaVersion Version = getIsaVersion(STI->getCPU());
unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI);
+ IsaVersion Version = getIsaVersion(STI->getCPU());
+ if (Version.Major >= 10)
+ return Addressable ? AddressableNumSGPRs : 108;
if (Version.Major >= 8 && !Addressable)
AddressableNumSGPRs = 112;
unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU;
@@ -340,6 +351,9 @@ unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
ExtraSGPRs = 2;
IsaVersion Version = getIsaVersion(STI->getCPU());
+ if (Version.Major >= 10)
+ return ExtraSGPRs;
+
if (Version.Major < 8) {
if (FlatScrUsed)
ExtraSGPRs = 4;
@@ -366,12 +380,17 @@ unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) {
return NumSGPRs / getSGPREncodingGranule(STI) - 1;
}
-unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI) {
- return 4;
+unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI,
+ Optional<bool> EnableWavefrontSize32) {
+ bool IsWave32 = EnableWavefrontSize32 ?
+ *EnableWavefrontSize32 :
+ STI->getFeatureBits().test(FeatureWavefrontSize32);
+ return IsWave32 ? 8 : 4;
}
-unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI) {
- return getVGPRAllocGranule(STI);
+unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI,
+ Optional<bool> EnableWavefrontSize32) {
+ return getVGPRAllocGranule(STI, EnableWavefrontSize32);
}
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) {
@@ -402,10 +421,12 @@ unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
return std::min(MaxNumVGPRs, AddressableNumVGPRs);
}
-unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs) {
- NumVGPRs = alignTo(std::max(1u, NumVGPRs), getVGPREncodingGranule(STI));
+unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs,
+ Optional<bool> EnableWavefrontSize32) {
+ NumVGPRs = alignTo(std::max(1u, NumVGPRs),
+ getVGPREncodingGranule(STI, EnableWavefrontSize32));
// VGPRBlocks is actual number of VGPR blocks minus 1.
- return NumVGPRs / getVGPREncodingGranule(STI) - 1;
+ return NumVGPRs / getVGPREncodingGranule(STI, EnableWavefrontSize32) - 1;
}
} // end namespace IsaInfo
@@ -423,7 +444,6 @@ void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
Header.amd_machine_version_minor = Version.Minor;
Header.amd_machine_version_stepping = Version.Stepping;
Header.kernel_code_entry_byte_offset = sizeof(Header);
- // wavefront_size is specified as a power of 2: 2^6 = 64 threads.
Header.wavefront_size = 6;
// If the code object does not support indirect functions, then the value must
@@ -435,11 +455,25 @@ void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
Header.kernarg_segment_alignment = 4;
Header.group_segment_alignment = 4;
Header.private_segment_alignment = 4;
+
+ if (Version.Major >= 10) {
+ if (STI->getFeatureBits().test(FeatureWavefrontSize32)) {
+ Header.wavefront_size = 5;
+ Header.code_properties |= AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
+ }
+ Header.compute_pgm_resource_registers |=
+ S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) |
+ S_00B848_MEM_ORDERED(1);
+ }
}
-amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor() {
+amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(
+ const MCSubtargetInfo *STI) {
+ IsaVersion Version = getIsaVersion(STI->getCPU());
+
amdhsa::kernel_descriptor_t KD;
memset(&KD, 0, sizeof(KD));
+
AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64,
amdhsa::FLOAT_DENORM_MODE_FLUSH_NONE);
@@ -449,6 +483,16 @@ amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor() {
amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 1);
AMDHSA_BITS_SET(KD.compute_pgm_rsrc2,
amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, 1);
+ if (Version.Major >= 10) {
+ AMDHSA_BITS_SET(KD.kernel_code_properties,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
+ STI->getFeatureBits().test(FeatureWavefrontSize32) ? 1 : 0);
+ AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_WGP_MODE,
+ STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1);
+ AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_MEM_ORDERED, 1);
+ }
return KD;
}
@@ -523,13 +567,14 @@ unsigned getExpcntBitMask(const IsaVersion &Version) {
}
unsigned getLgkmcntBitMask(const IsaVersion &Version) {
- return (1 << getLgkmcntBitWidth()) - 1;
+ return (1 << getLgkmcntBitWidth(Version.Major)) - 1;
}
unsigned getWaitcntBitMask(const IsaVersion &Version) {
unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo());
unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth());
- unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth());
+ unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(),
+ getLgkmcntBitWidth(Version.Major));
unsigned Waitcnt = VmcntLo | Expcnt | Lgkmcnt;
if (Version.Major < 9)
return Waitcnt;
@@ -555,7 +600,8 @@ unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) {
}
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) {
- return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
+ return unpackBits(Waitcnt, getLgkmcntBitShift(),
+ getLgkmcntBitWidth(Version.Major));
}
void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
@@ -591,7 +637,8 @@ unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
unsigned Lgkmcnt) {
- return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
+ return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(),
+ getLgkmcntBitWidth(Version.Major));
}
unsigned encodeWaitcnt(const IsaVersion &Version,
@@ -607,6 +654,181 @@ unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded) {
return encodeWaitcnt(Version, Decoded.VmCnt, Decoded.ExpCnt, Decoded.LgkmCnt);
}
+//===----------------------------------------------------------------------===//
+// hwreg
+//===----------------------------------------------------------------------===//
+
+namespace Hwreg {
+
+int64_t getHwregId(const StringRef Name) {
+ for (int Id = ID_SYMBOLIC_FIRST_; Id < ID_SYMBOLIC_LAST_; ++Id) {
+ if (IdSymbolic[Id] && Name == IdSymbolic[Id])
+ return Id;
+ }
+ return ID_UNKNOWN_;
+}
+
+static unsigned getLastSymbolicHwreg(const MCSubtargetInfo &STI) {
+ if (isSI(STI) || isCI(STI) || isVI(STI))
+ return ID_SYMBOLIC_FIRST_GFX9_;
+ else if (isGFX9(STI))
+ return ID_SYMBOLIC_FIRST_GFX10_;
+ else
+ return ID_SYMBOLIC_LAST_;
+}
+
+bool isValidHwreg(int64_t Id, const MCSubtargetInfo &STI) {
+ return ID_SYMBOLIC_FIRST_ <= Id && Id < getLastSymbolicHwreg(STI) &&
+ IdSymbolic[Id];
+}
+
+bool isValidHwreg(int64_t Id) {
+ return 0 <= Id && isUInt<ID_WIDTH_>(Id);
+}
+
+bool isValidHwregOffset(int64_t Offset) {
+ return 0 <= Offset && isUInt<OFFSET_WIDTH_>(Offset);
+}
+
+bool isValidHwregWidth(int64_t Width) {
+ return 0 <= (Width - 1) && isUInt<WIDTH_M1_WIDTH_>(Width - 1);
+}
+
+uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width) {
+ return (Id << ID_SHIFT_) |
+ (Offset << OFFSET_SHIFT_) |
+ ((Width - 1) << WIDTH_M1_SHIFT_);
+}
+
+StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI) {
+ return isValidHwreg(Id, STI) ? IdSymbolic[Id] : "";
+}
+
+void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width) {
+ Id = (Val & ID_MASK_) >> ID_SHIFT_;
+ Offset = (Val & OFFSET_MASK_) >> OFFSET_SHIFT_;
+ Width = ((Val & WIDTH_M1_MASK_) >> WIDTH_M1_SHIFT_) + 1;
+}
+
+} // namespace Hwreg
+
+//===----------------------------------------------------------------------===//
+// SendMsg
+//===----------------------------------------------------------------------===//
+
+namespace SendMsg {
+
+int64_t getMsgId(const StringRef Name) {
+ for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) {
+ if (IdSymbolic[i] && Name == IdSymbolic[i])
+ return i;
+ }
+ return ID_UNKNOWN_;
+}
+
+static bool isValidMsgId(int64_t MsgId) {
+ return (ID_GAPS_FIRST_ <= MsgId && MsgId < ID_GAPS_LAST_) && IdSymbolic[MsgId];
+}
+
+bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI, bool Strict) {
+ if (Strict) {
+ if (MsgId == ID_GS_ALLOC_REQ || MsgId == ID_GET_DOORBELL)
+ return isGFX9(STI) || isGFX10(STI);
+ else
+ return isValidMsgId(MsgId);
+ } else {
+ return 0 <= MsgId && isUInt<ID_WIDTH_>(MsgId);
+ }
+}
+
+StringRef getMsgName(int64_t MsgId) {
+ return isValidMsgId(MsgId)? IdSymbolic[MsgId] : "";
+}
+
+int64_t getMsgOpId(int64_t MsgId, const StringRef Name) {
+ const char* const *S = (MsgId == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic;
+ const int F = (MsgId == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_;
+ const int L = (MsgId == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_;
+ for (int i = F; i < L; ++i) {
+ if (Name == S[i]) {
+ return i;
+ }
+ }
+ return OP_UNKNOWN_;
+}
+
+bool isValidMsgOp(int64_t MsgId, int64_t OpId, bool Strict) {
+
+ if (!Strict)
+ return 0 <= OpId && isUInt<OP_WIDTH_>(OpId);
+
+ switch(MsgId)
+ {
+ case ID_GS:
+ return (OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_) && OpId != OP_GS_NOP;
+ case ID_GS_DONE:
+ return OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_;
+ case ID_SYSMSG:
+ return OP_SYS_FIRST_ <= OpId && OpId < OP_SYS_LAST_;
+ default:
+ return OpId == OP_NONE_;
+ }
+}
+
+StringRef getMsgOpName(int64_t MsgId, int64_t OpId) {
+ assert(msgRequiresOp(MsgId));
+ return (MsgId == ID_SYSMSG)? OpSysSymbolic[OpId] : OpGsSymbolic[OpId];
+}
+
+bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, bool Strict) {
+
+ if (!Strict)
+ return 0 <= StreamId && isUInt<STREAM_ID_WIDTH_>(StreamId);
+
+ switch(MsgId)
+ {
+ case ID_GS:
+ return STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_;
+ case ID_GS_DONE:
+ return (OpId == OP_GS_NOP)?
+ (StreamId == STREAM_ID_NONE_) :
+ (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_);
+ default:
+ return StreamId == STREAM_ID_NONE_;
+ }
+}
+
+bool msgRequiresOp(int64_t MsgId) {
+ return MsgId == ID_GS || MsgId == ID_GS_DONE || MsgId == ID_SYSMSG;
+}
+
+bool msgSupportsStream(int64_t MsgId, int64_t OpId) {
+ return (MsgId == ID_GS || MsgId == ID_GS_DONE) && OpId != OP_GS_NOP;
+}
+
+void decodeMsg(unsigned Val,
+ uint16_t &MsgId,
+ uint16_t &OpId,
+ uint16_t &StreamId) {
+ MsgId = Val & ID_MASK_;
+ OpId = (Val & OP_MASK_) >> OP_SHIFT_;
+ StreamId = (Val & STREAM_ID_MASK_) >> STREAM_ID_SHIFT_;
+}
+
+uint64_t encodeMsg(uint64_t MsgId,
+ uint64_t OpId,
+ uint64_t StreamId) {
+ return (MsgId << ID_SHIFT_) |
+ (OpId << OP_SHIFT_) |
+ (StreamId << STREAM_ID_SHIFT_);
+}
+
+} // namespace SendMsg
+
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+
unsigned getInitialPSInputAddr(const Function &F) {
return getIntegerAttribute(F, "InitialPSInputAddr", 0);
}
@@ -679,6 +901,10 @@ bool isGFX9(const MCSubtargetInfo &STI) {
return STI.getFeatureBits()[AMDGPU::FeatureGFX9];
}
+bool isGFX10(const MCSubtargetInfo &STI) {
+ return STI.getFeatureBits()[AMDGPU::FeatureGFX10];
+}
+
bool isGCN3Encoding(const MCSubtargetInfo &STI) {
return STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding];
}
@@ -704,46 +930,46 @@ bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI) {
CASE_CI_VI(FLAT_SCR) \
CASE_CI_VI(FLAT_SCR_LO) \
CASE_CI_VI(FLAT_SCR_HI) \
- CASE_VI_GFX9(TTMP0) \
- CASE_VI_GFX9(TTMP1) \
- CASE_VI_GFX9(TTMP2) \
- CASE_VI_GFX9(TTMP3) \
- CASE_VI_GFX9(TTMP4) \
- CASE_VI_GFX9(TTMP5) \
- CASE_VI_GFX9(TTMP6) \
- CASE_VI_GFX9(TTMP7) \
- CASE_VI_GFX9(TTMP8) \
- CASE_VI_GFX9(TTMP9) \
- CASE_VI_GFX9(TTMP10) \
- CASE_VI_GFX9(TTMP11) \
- CASE_VI_GFX9(TTMP12) \
- CASE_VI_GFX9(TTMP13) \
- CASE_VI_GFX9(TTMP14) \
- CASE_VI_GFX9(TTMP15) \
- CASE_VI_GFX9(TTMP0_TTMP1) \
- CASE_VI_GFX9(TTMP2_TTMP3) \
- CASE_VI_GFX9(TTMP4_TTMP5) \
- CASE_VI_GFX9(TTMP6_TTMP7) \
- CASE_VI_GFX9(TTMP8_TTMP9) \
- CASE_VI_GFX9(TTMP10_TTMP11) \
- CASE_VI_GFX9(TTMP12_TTMP13) \
- CASE_VI_GFX9(TTMP14_TTMP15) \
- CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3) \
- CASE_VI_GFX9(TTMP4_TTMP5_TTMP6_TTMP7) \
- CASE_VI_GFX9(TTMP8_TTMP9_TTMP10_TTMP11) \
- CASE_VI_GFX9(TTMP12_TTMP13_TTMP14_TTMP15) \
- CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
- CASE_VI_GFX9(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
- CASE_VI_GFX9(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
- CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
+ CASE_VI_GFX9_GFX10(TTMP0) \
+ CASE_VI_GFX9_GFX10(TTMP1) \
+ CASE_VI_GFX9_GFX10(TTMP2) \
+ CASE_VI_GFX9_GFX10(TTMP3) \
+ CASE_VI_GFX9_GFX10(TTMP4) \
+ CASE_VI_GFX9_GFX10(TTMP5) \
+ CASE_VI_GFX9_GFX10(TTMP6) \
+ CASE_VI_GFX9_GFX10(TTMP7) \
+ CASE_VI_GFX9_GFX10(TTMP8) \
+ CASE_VI_GFX9_GFX10(TTMP9) \
+ CASE_VI_GFX9_GFX10(TTMP10) \
+ CASE_VI_GFX9_GFX10(TTMP11) \
+ CASE_VI_GFX9_GFX10(TTMP12) \
+ CASE_VI_GFX9_GFX10(TTMP13) \
+ CASE_VI_GFX9_GFX10(TTMP14) \
+ CASE_VI_GFX9_GFX10(TTMP15) \
+ CASE_VI_GFX9_GFX10(TTMP0_TTMP1) \
+ CASE_VI_GFX9_GFX10(TTMP2_TTMP3) \
+ CASE_VI_GFX9_GFX10(TTMP4_TTMP5) \
+ CASE_VI_GFX9_GFX10(TTMP6_TTMP7) \
+ CASE_VI_GFX9_GFX10(TTMP8_TTMP9) \
+ CASE_VI_GFX9_GFX10(TTMP10_TTMP11) \
+ CASE_VI_GFX9_GFX10(TTMP12_TTMP13) \
+ CASE_VI_GFX9_GFX10(TTMP14_TTMP15) \
+ CASE_VI_GFX9_GFX10(TTMP0_TTMP1_TTMP2_TTMP3) \
+ CASE_VI_GFX9_GFX10(TTMP4_TTMP5_TTMP6_TTMP7) \
+ CASE_VI_GFX9_GFX10(TTMP8_TTMP9_TTMP10_TTMP11) \
+ CASE_VI_GFX9_GFX10(TTMP12_TTMP13_TTMP14_TTMP15) \
+ CASE_VI_GFX9_GFX10(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
+ CASE_VI_GFX9_GFX10(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
+ CASE_VI_GFX9_GFX10(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
+ CASE_VI_GFX9_GFX10(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
}
#define CASE_CI_VI(node) \
assert(!isSI(STI)); \
case node: return isCI(STI) ? node##_ci : node##_vi;
-#define CASE_VI_GFX9(node) \
- case node: return isGFX9(STI) ? node##_gfx9 : node##_vi;
+#define CASE_VI_GFX9_GFX10(node) \
+ case node: return (isGFX9(STI) || isGFX10(STI)) ? node##_gfx9_gfx10 : node##_vi;
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
if (STI.getTargetTriple().getArch() == Triple::r600)
@@ -752,17 +978,17 @@ unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
}
#undef CASE_CI_VI
-#undef CASE_VI_GFX9
+#undef CASE_VI_GFX9_GFX10
#define CASE_CI_VI(node) case node##_ci: case node##_vi: return node;
-#define CASE_VI_GFX9(node) case node##_vi: case node##_gfx9: return node;
+#define CASE_VI_GFX9_GFX10(node) case node##_vi: case node##_gfx9_gfx10: return node;
unsigned mc2PseudoReg(unsigned Reg) {
MAP_REG2REG
}
#undef CASE_CI_VI
-#undef CASE_VI_GFX9
+#undef CASE_VI_GFX9_GFX10
#undef MAP_REG2REG
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
@@ -779,10 +1005,17 @@ bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
case AMDGPU::OPERAND_REG_IMM_FP32:
case AMDGPU::OPERAND_REG_IMM_FP64:
case AMDGPU::OPERAND_REG_IMM_FP16:
+ case AMDGPU::OPERAND_REG_IMM_V2FP16:
+ case AMDGPU::OPERAND_REG_IMM_V2INT16:
case AMDGPU::OPERAND_REG_INLINE_C_FP32:
case AMDGPU::OPERAND_REG_INLINE_C_FP64:
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
+ case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
+ case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
+ case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
+ case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
return true;
default:
return false;
@@ -802,28 +1035,46 @@ unsigned getRegBitWidth(unsigned RCID) {
switch (RCID) {
case AMDGPU::SGPR_32RegClassID:
case AMDGPU::VGPR_32RegClassID:
+ case AMDGPU::VRegOrLds_32RegClassID:
+ case AMDGPU::AGPR_32RegClassID:
case AMDGPU::VS_32RegClassID:
+ case AMDGPU::AV_32RegClassID:
case AMDGPU::SReg_32RegClassID:
case AMDGPU::SReg_32_XM0RegClassID:
+ case AMDGPU::SRegOrLds_32RegClassID:
return 32;
case AMDGPU::SGPR_64RegClassID:
case AMDGPU::VS_64RegClassID:
+ case AMDGPU::AV_64RegClassID:
case AMDGPU::SReg_64RegClassID:
case AMDGPU::VReg_64RegClassID:
+ case AMDGPU::AReg_64RegClassID:
case AMDGPU::SReg_64_XEXECRegClassID:
return 64;
+ case AMDGPU::SGPR_96RegClassID:
+ case AMDGPU::SReg_96RegClassID:
case AMDGPU::VReg_96RegClassID:
return 96;
case AMDGPU::SGPR_128RegClassID:
case AMDGPU::SReg_128RegClassID:
case AMDGPU::VReg_128RegClassID:
+ case AMDGPU::AReg_128RegClassID:
return 128;
+ case AMDGPU::SGPR_160RegClassID:
+ case AMDGPU::SReg_160RegClassID:
+ case AMDGPU::VReg_160RegClassID:
+ return 160;
case AMDGPU::SReg_256RegClassID:
case AMDGPU::VReg_256RegClassID:
return 256;
case AMDGPU::SReg_512RegClassID:
case AMDGPU::VReg_512RegClassID:
+ case AMDGPU::AReg_512RegClassID:
return 512;
+ case AMDGPU::SReg_1024RegClassID:
+ case AMDGPU::VReg_1024RegClassID:
+ case AMDGPU::AReg_1024RegClassID:
+ return 1024;
default:
llvm_unreachable("Unexpected register class");
}
@@ -905,6 +1156,13 @@ bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) {
assert(HasInv2Pi);
+ if (isInt<16>(Literal) || isUInt<16>(Literal)) {
+ int16_t Trunc = static_cast<int16_t>(Literal);
+ return AMDGPU::isInlinableLiteral16(Trunc, HasInv2Pi);
+ }
+ if (!(Literal & 0xffff))
+ return AMDGPU::isInlinableLiteral16(Literal >> 16, HasInv2Pi);
+
int16_t Lo16 = static_cast<int16_t>(Literal);
int16_t Hi16 = static_cast<int16_t>(Literal >> 16);
return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi);
@@ -936,15 +1194,19 @@ bool isArgPassedInSGPR(const Argument *A) {
}
}
+static bool hasSMEMByteOffset(const MCSubtargetInfo &ST) {
+ return isGCN3Encoding(ST) || isGFX10(ST);
+}
+
int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
- if (isGCN3Encoding(ST))
+ if (hasSMEMByteOffset(ST))
return ByteOffset;
return ByteOffset >> 2;
}
bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
int64_t EncodedOffset = getSMRDEncodedOffset(ST, ByteOffset);
- return isGCN3Encoding(ST) ?
+ return (hasSMEMByteOffset(ST)) ?
isUInt<20>(EncodedOffset) : isUInt<8>(EncodedOffset);
}
@@ -994,6 +1256,19 @@ bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
return true;
}
+SIModeRegisterDefaults::SIModeRegisterDefaults(const Function &F) {
+ *this = getDefaultForCallingConv(F.getCallingConv());
+
+ StringRef IEEEAttr = F.getFnAttribute("amdgpu-ieee").getValueAsString();
+ if (!IEEEAttr.empty())
+ IEEE = IEEEAttr == "true";
+
+ StringRef DX10ClampAttr
+ = F.getFnAttribute("amdgpu-dx10-clamp").getValueAsString();
+ if (!DX10ClampAttr.empty())
+ DX10Clamp = DX10ClampAttr == "true";
+}
+
namespace {
struct SourceOfDivergence {
@@ -1009,5 +1284,6 @@ const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr);
bool isIntrinsicSourceOfDivergence(unsigned IntrID) {
return lookupSourceOfDivergence(IntrID);
}
+
} // namespace AMDGPU
} // namespace llvm
diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 20123ed4ac81..209ef7eef749 100644
--- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -1,9 +1,8 @@
//===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -46,6 +45,7 @@ namespace AMDGPU {
#define GET_MIMGDim_DECL
#define GET_MIMGEncoding_DECL
#define GET_MIMGLZMapping_DECL
+#define GET_MIMGMIPMapping_DECL
#include "AMDGPUGenSearchableTables.inc"
namespace IsaInfo {
@@ -150,10 +150,18 @@ unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
/// \returns VGPR allocation granularity for given subtarget \p STI.
-unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI);
+///
+/// For subtargets which support it, \p EnableWavefrontSize32 should match
+/// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
+unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI,
+ Optional<bool> EnableWavefrontSize32 = None);
/// \returns VGPR encoding granularity for given subtarget \p STI.
-unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI);
+///
+/// For subtargets which support it, \p EnableWavefrontSize32 should match
+/// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
+unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI,
+ Optional<bool> EnableWavefrontSize32 = None);
/// \returns Total number of VGPRs for given subtarget \p STI.
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI);
@@ -171,13 +179,20 @@ unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
/// \returns Number of VGPR blocks needed for given subtarget \p STI when
/// \p NumVGPRs are used.
-unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
+///
+/// For subtargets which support it, \p EnableWavefrontSize32 should match the
+/// ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
+unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs,
+ Optional<bool> EnableWavefrontSize32 = None);
} // end namespace IsaInfo
LLVM_READONLY
int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx);
+LLVM_READONLY
+int getSOPPWithRelaxation(uint16_t Opcode);
+
struct MIMGBaseOpcodeInfo {
MIMGBaseOpcode BaseOpcode;
bool Store;
@@ -201,26 +216,53 @@ struct MIMGDimInfo {
uint8_t NumCoords;
uint8_t NumGradients;
bool DA;
+ uint8_t Encoding;
+ const char *AsmSuffix;
};
LLVM_READONLY
-const MIMGDimInfo *getMIMGDimInfo(unsigned Dim);
+const MIMGDimInfo *getMIMGDimInfo(unsigned DimEnum);
+
+LLVM_READONLY
+const MIMGDimInfo *getMIMGDimInfoByEncoding(uint8_t DimEnc);
+
+LLVM_READONLY
+const MIMGDimInfo *getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix);
struct MIMGLZMappingInfo {
MIMGBaseOpcode L;
MIMGBaseOpcode LZ;
};
+struct MIMGMIPMappingInfo {
+ MIMGBaseOpcode MIP;
+ MIMGBaseOpcode NONMIP;
+};
+
LLVM_READONLY
const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L);
LLVM_READONLY
+const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned L);
+
+LLVM_READONLY
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
unsigned VDataDwords, unsigned VAddrDwords);
LLVM_READONLY
int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels);
+struct MIMGInfo {
+ uint16_t Opcode;
+ uint16_t BaseOpcode;
+ uint8_t MIMGEncoding;
+ uint8_t VDataDwords;
+ uint8_t VAddrDwords;
+};
+
+LLVM_READONLY
+const MIMGInfo *getMIMGInfo(unsigned Opc);
+
LLVM_READONLY
int getMUBUFBaseOpcode(unsigned Opc);
@@ -245,7 +287,8 @@ int getMCOpcode(uint16_t Opcode, unsigned Gen);
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
const MCSubtargetInfo *STI);
-amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor();
+amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(
+ const MCSubtargetInfo *STI);
bool isGroupSegment(const GlobalValue *GV);
bool isGlobalSegment(const GlobalValue *GV);
@@ -285,21 +328,30 @@ struct Waitcnt {
unsigned VmCnt = ~0u;
unsigned ExpCnt = ~0u;
unsigned LgkmCnt = ~0u;
+ unsigned VsCnt = ~0u;
Waitcnt() {}
- Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt)
- : VmCnt(VmCnt), ExpCnt(ExpCnt), LgkmCnt(LgkmCnt) {}
+ Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt)
+ : VmCnt(VmCnt), ExpCnt(ExpCnt), LgkmCnt(LgkmCnt), VsCnt(VsCnt) {}
+
+ static Waitcnt allZero(const IsaVersion &Version) {
+ return Waitcnt(0, 0, 0, Version.Major >= 10 ? 0 : ~0u);
+ }
+ static Waitcnt allZeroExceptVsCnt() { return Waitcnt(0, 0, 0, ~0u); }
- static Waitcnt allZero() { return Waitcnt(0, 0, 0); }
+ bool hasWait() const {
+ return VmCnt != ~0u || ExpCnt != ~0u || LgkmCnt != ~0u || VsCnt != ~0u;
+ }
bool dominates(const Waitcnt &Other) const {
return VmCnt <= Other.VmCnt && ExpCnt <= Other.ExpCnt &&
- LgkmCnt <= Other.LgkmCnt;
+ LgkmCnt <= Other.LgkmCnt && VsCnt <= Other.VsCnt;
}
Waitcnt combined(const Waitcnt &Other) const {
return Waitcnt(std::min(VmCnt, Other.VmCnt), std::min(ExpCnt, Other.ExpCnt),
- std::min(LgkmCnt, Other.LgkmCnt));
+ std::min(LgkmCnt, Other.LgkmCnt),
+ std::min(VsCnt, Other.VsCnt));
}
};
@@ -332,7 +384,8 @@ unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt);
/// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9 only)
/// \p Vmcnt = \p Waitcnt[3:0] | \p Waitcnt[15:14] (gfx9+ only)
/// \p Expcnt = \p Waitcnt[6:4]
-/// \p Lgkmcnt = \p Waitcnt[11:8]
+/// \p Lgkmcnt = \p Waitcnt[11:8] (pre-gfx10 only)
+/// \p Lgkmcnt = \p Waitcnt[13:8] (gfx10+ only)
void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt);
@@ -357,7 +410,8 @@ unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
/// Waitcnt[3:0] = \p Vmcnt (pre-gfx9 only)
/// Waitcnt[3:0] = \p Vmcnt[3:0] (gfx9+ only)
/// Waitcnt[6:4] = \p Expcnt
-/// Waitcnt[11:8] = \p Lgkmcnt
+/// Waitcnt[11:8] = \p Lgkmcnt (pre-gfx10 only)
+/// Waitcnt[13:8] = \p Lgkmcnt (gfx10+ only)
/// Waitcnt[15:14] = \p Vmcnt[5:4] (gfx9+ only)
///
/// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
@@ -367,6 +421,75 @@ unsigned encodeWaitcnt(const IsaVersion &Version,
unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded);
+namespace Hwreg {
+
+LLVM_READONLY
+int64_t getHwregId(const StringRef Name);
+
+LLVM_READNONE
+bool isValidHwreg(int64_t Id, const MCSubtargetInfo &STI);
+
+LLVM_READNONE
+bool isValidHwreg(int64_t Id);
+
+LLVM_READNONE
+bool isValidHwregOffset(int64_t Offset);
+
+LLVM_READNONE
+bool isValidHwregWidth(int64_t Width);
+
+LLVM_READNONE
+uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width);
+
+LLVM_READNONE
+StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI);
+
+void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width);
+
+} // namespace Hwreg
+
+namespace SendMsg {
+
+LLVM_READONLY
+int64_t getMsgId(const StringRef Name);
+
+LLVM_READONLY
+int64_t getMsgOpId(int64_t MsgId, const StringRef Name);
+
+LLVM_READNONE
+StringRef getMsgName(int64_t MsgId);
+
+LLVM_READNONE
+StringRef getMsgOpName(int64_t MsgId, int64_t OpId);
+
+LLVM_READNONE
+bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI, bool Strict = true);
+
+LLVM_READNONE
+bool isValidMsgOp(int64_t MsgId, int64_t OpId, bool Strict = true);
+
+LLVM_READNONE
+bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, bool Strict = true);
+
+LLVM_READNONE
+bool msgRequiresOp(int64_t MsgId);
+
+LLVM_READNONE
+bool msgSupportsStream(int64_t MsgId, int64_t OpId);
+
+void decodeMsg(unsigned Val,
+ uint16_t &MsgId,
+ uint16_t &OpId,
+ uint16_t &StreamId);
+
+LLVM_READNONE
+uint64_t encodeMsg(uint64_t MsgId,
+ uint64_t OpId,
+ uint64_t StreamId);
+
+} // namespace SendMsg
+
+
unsigned getInitialPSInputAddr(const Function &F);
LLVM_READNONE
@@ -399,6 +522,7 @@ bool isSI(const MCSubtargetInfo &STI);
bool isCI(const MCSubtargetInfo &STI);
bool isVI(const MCSubtargetInfo &STI);
bool isGFX9(const MCSubtargetInfo &STI);
+bool isGFX10(const MCSubtargetInfo &STI);
/// Is Reg - scalar register
bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI);
@@ -440,6 +564,8 @@ inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
case AMDGPU::OPERAND_REG_IMM_FP32:
case AMDGPU::OPERAND_REG_INLINE_C_INT32:
case AMDGPU::OPERAND_REG_INLINE_C_FP32:
+ case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
+ case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
return 4;
case AMDGPU::OPERAND_REG_IMM_INT64:
@@ -454,6 +580,12 @@ inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
+ case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
+ case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
+ case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
+ case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
+ case AMDGPU::OPERAND_REG_IMM_V2INT16:
+ case AMDGPU::OPERAND_REG_IMM_V2FP16:
return 2;
default:
@@ -496,6 +628,45 @@ bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
/// \returns true if the intrinsic is divergent
bool isIntrinsicSourceOfDivergence(unsigned IntrID);
+
+// Track defaults for fields in the MODE registser.
+struct SIModeRegisterDefaults {
+ /// Floating point opcodes that support exception flag gathering quiet and
+ /// propagate signaling NaN inputs per IEEE 754-2008. Min_dx10 and max_dx10
+ /// become IEEE 754- 2008 compliant due to signaling NaN propagation and
+ /// quieting.
+ bool IEEE : 1;
+
+ /// Used by the vector ALU to force DX10-style treatment of NaNs: when set,
+ /// clamp NaN to zero; otherwise, pass NaN through.
+ bool DX10Clamp : 1;
+
+ // TODO: FP mode fields
+
+ SIModeRegisterDefaults() :
+ IEEE(true),
+ DX10Clamp(true) {}
+
+ SIModeRegisterDefaults(const Function &F);
+
+ static SIModeRegisterDefaults getDefaultForCallingConv(CallingConv::ID CC) {
+ SIModeRegisterDefaults Mode;
+ Mode.DX10Clamp = true;
+ Mode.IEEE = AMDGPU::isCompute(CC);
+ return Mode;
+ }
+
+ bool operator ==(const SIModeRegisterDefaults Other) const {
+ return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp;
+ }
+
+ // FIXME: Inlining should be OK for dx10-clamp, since the caller's mode should
+ // be able to override.
+ bool isInlineCompatible(SIModeRegisterDefaults CalleeMode) const {
+ return *this == CalleeMode;
+ }
+};
+
} // end namespace AMDGPU
} // end namespace llvm
diff --git a/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp b/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
new file mode 100644
index 000000000000..db20d5ccf5f9
--- /dev/null
+++ b/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
@@ -0,0 +1,723 @@
+//===-- AMDGPUPALMetadata.cpp - Accumulate and print AMDGPU PAL metadata -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+///
+/// This class has methods called by AMDGPUAsmPrinter to accumulate and print
+/// the PAL metadata.
+//
+//===----------------------------------------------------------------------===//
+//
+
+#include "AMDGPUPALMetadata.h"
+#include "AMDGPU.h"
+#include "AMDGPUAsmPrinter.h"
+#include "MCTargetDesc/AMDGPUTargetStreamer.h"
+#include "SIDefines.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/Support/AMDGPUMetadata.h"
+#include "llvm/Support/EndianStream.h"
+
+using namespace llvm;
+using namespace llvm::AMDGPU;
+
+// Read the PAL metadata from IR metadata, where it was put by the frontend.
+void AMDGPUPALMetadata::readFromIR(Module &M) {
+ auto NamedMD = M.getNamedMetadata("amdgpu.pal.metadata.msgpack");
+ if (NamedMD && NamedMD->getNumOperands()) {
+ // This is the new msgpack format for metadata. It is a NamedMD containing
+ // an MDTuple containing an MDString containing the msgpack data.
+ BlobType = ELF::NT_AMDGPU_METADATA;
+ auto MDN = dyn_cast<MDTuple>(NamedMD->getOperand(0));
+ if (MDN && MDN->getNumOperands()) {
+ if (auto MDS = dyn_cast<MDString>(MDN->getOperand(0)))
+ setFromMsgPackBlob(MDS->getString());
+ }
+ return;
+ }
+ BlobType = ELF::NT_AMD_AMDGPU_PAL_METADATA;
+ NamedMD = M.getNamedMetadata("amdgpu.pal.metadata");
+ if (!NamedMD || !NamedMD->getNumOperands())
+ return;
+ // This is the old reg=value pair format for metadata. It is a NamedMD
+ // containing an MDTuple containing a number of MDNodes each of which is an
+ // integer value, and each two integer values forms a key=value pair that we
+ // store as Registers[key]=value in the map.
+ auto Tuple = dyn_cast<MDTuple>(NamedMD->getOperand(0));
+ if (!Tuple)
+ return;
+ for (unsigned I = 0, E = Tuple->getNumOperands() & -2; I != E; I += 2) {
+ auto Key = mdconst::dyn_extract<ConstantInt>(Tuple->getOperand(I));
+ auto Val = mdconst::dyn_extract<ConstantInt>(Tuple->getOperand(I + 1));
+ if (!Key || !Val)
+ continue;
+ setRegister(Key->getZExtValue(), Val->getZExtValue());
+ }
+}
+
+// Set PAL metadata from a binary blob from the applicable .note record.
+// Returns false if bad format. Blob must remain valid for the lifetime of the
+// Metadata.
+bool AMDGPUPALMetadata::setFromBlob(unsigned Type, StringRef Blob) {
+ BlobType = Type;
+ if (Type == ELF::NT_AMD_AMDGPU_PAL_METADATA)
+ return setFromLegacyBlob(Blob);
+ return setFromMsgPackBlob(Blob);
+}
+
+// Set PAL metadata from legacy (array of key=value pairs) blob.
+bool AMDGPUPALMetadata::setFromLegacyBlob(StringRef Blob) {
+ auto Data = reinterpret_cast<const uint32_t *>(Blob.data());
+ for (unsigned I = 0; I != Blob.size() / sizeof(uint32_t) / 2; ++I)
+ setRegister(Data[I * 2], Data[I * 2 + 1]);
+ return true;
+}
+
+// Set PAL metadata from msgpack blob.
+bool AMDGPUPALMetadata::setFromMsgPackBlob(StringRef Blob) {
+ msgpack::Reader Reader(Blob);
+ return MsgPackDoc.readFromBlob(Blob, /*Multi=*/false);
+}
+
+// Given the calling convention, calculate the register number for rsrc1. In
+// principle the register number could change in future hardware, but we know
+// it is the same for gfx6-9 (except that LS and ES don't exist on gfx9), so
+// we can use fixed values.
+static unsigned getRsrc1Reg(CallingConv::ID CC) {
+ switch (CC) {
+ default:
+ return PALMD::R_2E12_COMPUTE_PGM_RSRC1;
+ case CallingConv::AMDGPU_LS:
+ return PALMD::R_2D4A_SPI_SHADER_PGM_RSRC1_LS;
+ case CallingConv::AMDGPU_HS:
+ return PALMD::R_2D0A_SPI_SHADER_PGM_RSRC1_HS;
+ case CallingConv::AMDGPU_ES:
+ return PALMD::R_2CCA_SPI_SHADER_PGM_RSRC1_ES;
+ case CallingConv::AMDGPU_GS:
+ return PALMD::R_2C8A_SPI_SHADER_PGM_RSRC1_GS;
+ case CallingConv::AMDGPU_VS:
+ return PALMD::R_2C4A_SPI_SHADER_PGM_RSRC1_VS;
+ case CallingConv::AMDGPU_PS:
+ return PALMD::R_2C0A_SPI_SHADER_PGM_RSRC1_PS;
+ }
+}
+
+// Calculate the PAL metadata key for *S_SCRATCH_SIZE. It can be used
+// with a constant offset to access any non-register shader-specific PAL
+// metadata key.
+static unsigned getScratchSizeKey(CallingConv::ID CC) {
+ switch (CC) {
+ case CallingConv::AMDGPU_PS:
+ return PALMD::Key::PS_SCRATCH_SIZE;
+ case CallingConv::AMDGPU_VS:
+ return PALMD::Key::VS_SCRATCH_SIZE;
+ case CallingConv::AMDGPU_GS:
+ return PALMD::Key::GS_SCRATCH_SIZE;
+ case CallingConv::AMDGPU_ES:
+ return PALMD::Key::ES_SCRATCH_SIZE;
+ case CallingConv::AMDGPU_HS:
+ return PALMD::Key::HS_SCRATCH_SIZE;
+ case CallingConv::AMDGPU_LS:
+ return PALMD::Key::LS_SCRATCH_SIZE;
+ default:
+ return PALMD::Key::CS_SCRATCH_SIZE;
+ }
+}
+
+// Set the rsrc1 register in the metadata for a particular shader stage.
+// In fact this ORs the value into any previous setting of the register.
+void AMDGPUPALMetadata::setRsrc1(CallingConv::ID CC, unsigned Val) {
+ setRegister(getRsrc1Reg(CC), Val);
+}
+
+// Set the rsrc2 register in the metadata for a particular shader stage.
+// In fact this ORs the value into any previous setting of the register.
+void AMDGPUPALMetadata::setRsrc2(CallingConv::ID CC, unsigned Val) {
+ setRegister(getRsrc1Reg(CC) + 1, Val);
+}
+
+// Set the SPI_PS_INPUT_ENA register in the metadata.
+// In fact this ORs the value into any previous setting of the register.
+void AMDGPUPALMetadata::setSpiPsInputEna(unsigned Val) {
+ setRegister(PALMD::R_A1B3_SPI_PS_INPUT_ENA, Val);
+}
+
+// Set the SPI_PS_INPUT_ADDR register in the metadata.
+// In fact this ORs the value into any previous setting of the register.
+void AMDGPUPALMetadata::setSpiPsInputAddr(unsigned Val) {
+ setRegister(PALMD::R_A1B4_SPI_PS_INPUT_ADDR, Val);
+}
+
+// Get a register from the metadata, or 0 if not currently set.
+unsigned AMDGPUPALMetadata::getRegister(unsigned Reg) {
+ auto Regs = getRegisters();
+ auto It = Regs.find(MsgPackDoc.getNode(Reg));
+ if (It == Regs.end())
+ return 0;
+ auto N = It->second;
+ if (N.getKind() != msgpack::Type::UInt)
+ return 0;
+ return N.getUInt();
+}
+
+// Set a register in the metadata.
+// In fact this ORs the value into any previous setting of the register.
+void AMDGPUPALMetadata::setRegister(unsigned Reg, unsigned Val) {
+ if (!isLegacy()) {
+ // In the new MsgPack format, ignore register numbered >= 0x10000000. It
+ // is a PAL ABI pseudo-register in the old non-MsgPack format.
+ if (Reg >= 0x10000000)
+ return;
+ }
+ auto &N = getRegisters()[MsgPackDoc.getNode(Reg)];
+ if (N.getKind() == msgpack::Type::UInt)
+ Val |= N.getUInt();
+ N = N.getDocument()->getNode(Val);
+}
+
+// Set the entry point name for one shader.
+void AMDGPUPALMetadata::setEntryPoint(unsigned CC, StringRef Name) {
+ if (isLegacy())
+ return;
+ // Msgpack format.
+ getHwStage(CC)[".entry_point"] = MsgPackDoc.getNode(Name, /*Copy=*/true);
+}
+
+// Set the number of used vgprs in the metadata. This is an optional
+// advisory record for logging etc; wave dispatch actually uses the rsrc1
+// register for the shader stage to determine the number of vgprs to
+// allocate.
+void AMDGPUPALMetadata::setNumUsedVgprs(CallingConv::ID CC, unsigned Val) {
+ if (isLegacy()) {
+ // Old non-msgpack format.
+ unsigned NumUsedVgprsKey = getScratchSizeKey(CC) +
+ PALMD::Key::VS_NUM_USED_VGPRS -
+ PALMD::Key::VS_SCRATCH_SIZE;
+ setRegister(NumUsedVgprsKey, Val);
+ return;
+ }
+ // Msgpack format.
+ getHwStage(CC)[".vgpr_count"] = MsgPackDoc.getNode(Val);
+}
+
+// Set the number of used sgprs in the metadata. This is an optional advisory
+// record for logging etc; wave dispatch actually uses the rsrc1 register for
+// the shader stage to determine the number of sgprs to allocate.
+void AMDGPUPALMetadata::setNumUsedSgprs(CallingConv::ID CC, unsigned Val) {
+ if (isLegacy()) {
+ // Old non-msgpack format.
+ unsigned NumUsedSgprsKey = getScratchSizeKey(CC) +
+ PALMD::Key::VS_NUM_USED_SGPRS -
+ PALMD::Key::VS_SCRATCH_SIZE;
+ setRegister(NumUsedSgprsKey, Val);
+ return;
+ }
+ // Msgpack format.
+ getHwStage(CC)[".sgpr_count"] = MsgPackDoc.getNode(Val);
+}
+
+// Set the scratch size in the metadata.
+void AMDGPUPALMetadata::setScratchSize(CallingConv::ID CC, unsigned Val) {
+ if (isLegacy()) {
+ // Old non-msgpack format.
+ setRegister(getScratchSizeKey(CC), Val);
+ return;
+ }
+ // Msgpack format.
+ getHwStage(CC)[".scratch_memory_size"] = MsgPackDoc.getNode(Val);
+}
+
+// Set the hardware register bit in PAL metadata to enable wave32 on the
+// shader of the given calling convention.
+void AMDGPUPALMetadata::setWave32(unsigned CC) {
+ switch (CC) {
+ case CallingConv::AMDGPU_HS:
+ setRegister(PALMD::R_A2D5_VGT_SHADER_STAGES_EN, S_028B54_HS_W32_EN(1));
+ break;
+ case CallingConv::AMDGPU_GS:
+ setRegister(PALMD::R_A2D5_VGT_SHADER_STAGES_EN, S_028B54_GS_W32_EN(1));
+ break;
+ case CallingConv::AMDGPU_VS:
+ setRegister(PALMD::R_A2D5_VGT_SHADER_STAGES_EN, S_028B54_VS_W32_EN(1));
+ break;
+ case CallingConv::AMDGPU_PS:
+ setRegister(PALMD::R_A1B6_SPI_PS_IN_CONTROL, S_0286D8_PS_W32_EN(1));
+ break;
+ case CallingConv::AMDGPU_CS:
+ setRegister(PALMD::R_2E00_COMPUTE_DISPATCH_INITIATOR,
+ S_00B800_CS_W32_EN(1));
+ break;
+ }
+}
+
+// Convert a register number to name, for display by toString().
+// Returns nullptr if none.
+static const char *getRegisterName(unsigned RegNum) {
+ // Table of registers.
+ static const struct RegInfo {
+ unsigned Num;
+ const char *Name;
+ } RegInfoTable[] = {
+ // Registers that code generation sets/modifies metadata for.
+ {PALMD::R_2C4A_SPI_SHADER_PGM_RSRC1_VS, "SPI_SHADER_PGM_RSRC1_VS"},
+ {PALMD::R_2C4A_SPI_SHADER_PGM_RSRC1_VS + 1, "SPI_SHADER_PGM_RSRC2_VS"},
+ {PALMD::R_2D4A_SPI_SHADER_PGM_RSRC1_LS, "SPI_SHADER_PGM_RSRC1_LS"},
+ {PALMD::R_2D4A_SPI_SHADER_PGM_RSRC1_LS + 1, "SPI_SHADER_PGM_RSRC2_LS"},
+ {PALMD::R_2D0A_SPI_SHADER_PGM_RSRC1_HS, "SPI_SHADER_PGM_RSRC1_HS"},
+ {PALMD::R_2D0A_SPI_SHADER_PGM_RSRC1_HS + 1, "SPI_SHADER_PGM_RSRC2_HS"},
+ {PALMD::R_2CCA_SPI_SHADER_PGM_RSRC1_ES, "SPI_SHADER_PGM_RSRC1_ES"},
+ {PALMD::R_2CCA_SPI_SHADER_PGM_RSRC1_ES + 1, "SPI_SHADER_PGM_RSRC2_ES"},
+ {PALMD::R_2C8A_SPI_SHADER_PGM_RSRC1_GS, "SPI_SHADER_PGM_RSRC1_GS"},
+ {PALMD::R_2C8A_SPI_SHADER_PGM_RSRC1_GS + 1, "SPI_SHADER_PGM_RSRC2_GS"},
+ {PALMD::R_2E00_COMPUTE_DISPATCH_INITIATOR, "COMPUTE_DISPATCH_INITIATOR"},
+ {PALMD::R_2E12_COMPUTE_PGM_RSRC1, "COMPUTE_PGM_RSRC1"},
+ {PALMD::R_2E12_COMPUTE_PGM_RSRC1 + 1, "COMPUTE_PGM_RSRC2"},
+ {PALMD::R_2C0A_SPI_SHADER_PGM_RSRC1_PS, "SPI_SHADER_PGM_RSRC1_PS"},
+ {PALMD::R_2C0A_SPI_SHADER_PGM_RSRC1_PS + 1, "SPI_SHADER_PGM_RSRC2_PS"},
+ {PALMD::R_A1B3_SPI_PS_INPUT_ENA, "SPI_PS_INPUT_ENA"},
+ {PALMD::R_A1B4_SPI_PS_INPUT_ADDR, "SPI_PS_INPUT_ADDR"},
+ {PALMD::R_A1B6_SPI_PS_IN_CONTROL, "SPI_PS_IN_CONTROL"},
+ {PALMD::R_A2D5_VGT_SHADER_STAGES_EN, "VGT_SHADER_STAGES_EN"},
+
+ // Registers not known to code generation.
+ {0x2c07, "SPI_SHADER_PGM_RSRC3_PS"},
+ {0x2c46, "SPI_SHADER_PGM_RSRC3_VS"},
+ {0x2c87, "SPI_SHADER_PGM_RSRC3_GS"},
+ {0x2cc7, "SPI_SHADER_PGM_RSRC3_ES"},
+ {0x2d07, "SPI_SHADER_PGM_RSRC3_HS"},
+ {0x2d47, "SPI_SHADER_PGM_RSRC3_LS"},
+
+ {0xa1c3, "SPI_SHADER_POS_FORMAT"},
+ {0xa1b1, "SPI_VS_OUT_CONFIG"},
+ {0xa207, "PA_CL_VS_OUT_CNTL"},
+ {0xa204, "PA_CL_CLIP_CNTL"},
+ {0xa206, "PA_CL_VTE_CNTL"},
+ {0xa2f9, "PA_SU_VTX_CNTL"},
+ {0xa293, "PA_SC_MODE_CNTL_1"},
+ {0xa2a1, "VGT_PRIMITIVEID_EN"},
+ {0x2c81, "SPI_SHADER_PGM_RSRC4_GS"},
+ {0x2e18, "COMPUTE_TMPRING_SIZE"},
+ {0xa1b5, "SPI_INTERP_CONTROL_0"},
+ {0xa1ba, "SPI_TMPRING_SIZE"},
+ {0xa1c4, "SPI_SHADER_Z_FORMAT"},
+ {0xa1c5, "SPI_SHADER_COL_FORMAT"},
+ {0xa203, "DB_SHADER_CONTROL"},
+ {0xa08f, "CB_SHADER_MASK"},
+ {0xa191, "SPI_PS_INPUT_CNTL_0"},
+ {0xa192, "SPI_PS_INPUT_CNTL_1"},
+ {0xa193, "SPI_PS_INPUT_CNTL_2"},
+ {0xa194, "SPI_PS_INPUT_CNTL_3"},
+ {0xa195, "SPI_PS_INPUT_CNTL_4"},
+ {0xa196, "SPI_PS_INPUT_CNTL_5"},
+ {0xa197, "SPI_PS_INPUT_CNTL_6"},
+ {0xa198, "SPI_PS_INPUT_CNTL_7"},
+ {0xa199, "SPI_PS_INPUT_CNTL_8"},
+ {0xa19a, "SPI_PS_INPUT_CNTL_9"},
+ {0xa19b, "SPI_PS_INPUT_CNTL_10"},
+ {0xa19c, "SPI_PS_INPUT_CNTL_11"},
+ {0xa19d, "SPI_PS_INPUT_CNTL_12"},
+ {0xa19e, "SPI_PS_INPUT_CNTL_13"},
+ {0xa19f, "SPI_PS_INPUT_CNTL_14"},
+ {0xa1a0, "SPI_PS_INPUT_CNTL_15"},
+ {0xa1a1, "SPI_PS_INPUT_CNTL_16"},
+ {0xa1a2, "SPI_PS_INPUT_CNTL_17"},
+ {0xa1a3, "SPI_PS_INPUT_CNTL_18"},
+ {0xa1a4, "SPI_PS_INPUT_CNTL_19"},
+ {0xa1a5, "SPI_PS_INPUT_CNTL_20"},
+ {0xa1a6, "SPI_PS_INPUT_CNTL_21"},
+ {0xa1a7, "SPI_PS_INPUT_CNTL_22"},
+ {0xa1a8, "SPI_PS_INPUT_CNTL_23"},
+ {0xa1a9, "SPI_PS_INPUT_CNTL_24"},
+ {0xa1aa, "SPI_PS_INPUT_CNTL_25"},
+ {0xa1ab, "SPI_PS_INPUT_CNTL_26"},
+ {0xa1ac, "SPI_PS_INPUT_CNTL_27"},
+ {0xa1ad, "SPI_PS_INPUT_CNTL_28"},
+ {0xa1ae, "SPI_PS_INPUT_CNTL_29"},
+ {0xa1af, "SPI_PS_INPUT_CNTL_30"},
+ {0xa1b0, "SPI_PS_INPUT_CNTL_31"},
+
+ {0xa2ce, "VGT_GS_MAX_VERT_OUT"},
+ {0xa2ab, "VGT_ESGS_RING_ITEMSIZE"},
+ {0xa290, "VGT_GS_MODE"},
+ {0xa291, "VGT_GS_ONCHIP_CNTL"},
+ {0xa2d7, "VGT_GS_VERT_ITEMSIZE"},
+ {0xa2d8, "VGT_GS_VERT_ITEMSIZE_1"},
+ {0xa2d9, "VGT_GS_VERT_ITEMSIZE_2"},
+ {0xa2da, "VGT_GS_VERT_ITEMSIZE_3"},
+ {0xa298, "VGT_GSVS_RING_OFFSET_1"},
+ {0xa299, "VGT_GSVS_RING_OFFSET_2"},
+ {0xa29a, "VGT_GSVS_RING_OFFSET_3"},
+
+ {0xa2e4, "VGT_GS_INSTANCE_CNT"},
+ {0xa297, "VGT_GS_PER_VS"},
+ {0xa29b, "VGT_GS_OUT_PRIM_TYPE"},
+ {0xa2ac, "VGT_GSVS_RING_ITEMSIZE"},
+
+ {0xa2ad, "VGT_REUSE_OFF"},
+ {0xa1b8, "SPI_BARYC_CNTL"},
+
+ {0x2c4c, "SPI_SHADER_USER_DATA_VS_0"},
+ {0x2c4d, "SPI_SHADER_USER_DATA_VS_1"},
+ {0x2c4e, "SPI_SHADER_USER_DATA_VS_2"},
+ {0x2c4f, "SPI_SHADER_USER_DATA_VS_3"},
+ {0x2c50, "SPI_SHADER_USER_DATA_VS_4"},
+ {0x2c51, "SPI_SHADER_USER_DATA_VS_5"},
+ {0x2c52, "SPI_SHADER_USER_DATA_VS_6"},
+ {0x2c53, "SPI_SHADER_USER_DATA_VS_7"},
+ {0x2c54, "SPI_SHADER_USER_DATA_VS_8"},
+ {0x2c55, "SPI_SHADER_USER_DATA_VS_9"},
+ {0x2c56, "SPI_SHADER_USER_DATA_VS_10"},
+ {0x2c57, "SPI_SHADER_USER_DATA_VS_11"},
+ {0x2c58, "SPI_SHADER_USER_DATA_VS_12"},
+ {0x2c59, "SPI_SHADER_USER_DATA_VS_13"},
+ {0x2c5a, "SPI_SHADER_USER_DATA_VS_14"},
+ {0x2c5b, "SPI_SHADER_USER_DATA_VS_15"},
+ {0x2c5c, "SPI_SHADER_USER_DATA_VS_16"},
+ {0x2c5d, "SPI_SHADER_USER_DATA_VS_17"},
+ {0x2c5e, "SPI_SHADER_USER_DATA_VS_18"},
+ {0x2c5f, "SPI_SHADER_USER_DATA_VS_19"},
+ {0x2c60, "SPI_SHADER_USER_DATA_VS_20"},
+ {0x2c61, "SPI_SHADER_USER_DATA_VS_21"},
+ {0x2c62, "SPI_SHADER_USER_DATA_VS_22"},
+ {0x2c63, "SPI_SHADER_USER_DATA_VS_23"},
+ {0x2c64, "SPI_SHADER_USER_DATA_VS_24"},
+ {0x2c65, "SPI_SHADER_USER_DATA_VS_25"},
+ {0x2c66, "SPI_SHADER_USER_DATA_VS_26"},
+ {0x2c67, "SPI_SHADER_USER_DATA_VS_27"},
+ {0x2c68, "SPI_SHADER_USER_DATA_VS_28"},
+ {0x2c69, "SPI_SHADER_USER_DATA_VS_29"},
+ {0x2c6a, "SPI_SHADER_USER_DATA_VS_30"},
+ {0x2c6b, "SPI_SHADER_USER_DATA_VS_31"},
+
+ {0x2ccc, "SPI_SHADER_USER_DATA_ES_0"},
+ {0x2ccd, "SPI_SHADER_USER_DATA_ES_1"},
+ {0x2cce, "SPI_SHADER_USER_DATA_ES_2"},
+ {0x2ccf, "SPI_SHADER_USER_DATA_ES_3"},
+ {0x2cd0, "SPI_SHADER_USER_DATA_ES_4"},
+ {0x2cd1, "SPI_SHADER_USER_DATA_ES_5"},
+ {0x2cd2, "SPI_SHADER_USER_DATA_ES_6"},
+ {0x2cd3, "SPI_SHADER_USER_DATA_ES_7"},
+ {0x2cd4, "SPI_SHADER_USER_DATA_ES_8"},
+ {0x2cd5, "SPI_SHADER_USER_DATA_ES_9"},
+ {0x2cd6, "SPI_SHADER_USER_DATA_ES_10"},
+ {0x2cd7, "SPI_SHADER_USER_DATA_ES_11"},
+ {0x2cd8, "SPI_SHADER_USER_DATA_ES_12"},
+ {0x2cd9, "SPI_SHADER_USER_DATA_ES_13"},
+ {0x2cda, "SPI_SHADER_USER_DATA_ES_14"},
+ {0x2cdb, "SPI_SHADER_USER_DATA_ES_15"},
+ {0x2cdc, "SPI_SHADER_USER_DATA_ES_16"},
+ {0x2cdd, "SPI_SHADER_USER_DATA_ES_17"},
+ {0x2cde, "SPI_SHADER_USER_DATA_ES_18"},
+ {0x2cdf, "SPI_SHADER_USER_DATA_ES_19"},
+ {0x2ce0, "SPI_SHADER_USER_DATA_ES_20"},
+ {0x2ce1, "SPI_SHADER_USER_DATA_ES_21"},
+ {0x2ce2, "SPI_SHADER_USER_DATA_ES_22"},
+ {0x2ce3, "SPI_SHADER_USER_DATA_ES_23"},
+ {0x2ce4, "SPI_SHADER_USER_DATA_ES_24"},
+ {0x2ce5, "SPI_SHADER_USER_DATA_ES_25"},
+ {0x2ce6, "SPI_SHADER_USER_DATA_ES_26"},
+ {0x2ce7, "SPI_SHADER_USER_DATA_ES_27"},
+ {0x2ce8, "SPI_SHADER_USER_DATA_ES_28"},
+ {0x2ce9, "SPI_SHADER_USER_DATA_ES_29"},
+ {0x2cea, "SPI_SHADER_USER_DATA_ES_30"},
+ {0x2ceb, "SPI_SHADER_USER_DATA_ES_31"},
+
+ {0x2c0c, "SPI_SHADER_USER_DATA_PS_0"},
+ {0x2c0d, "SPI_SHADER_USER_DATA_PS_1"},
+ {0x2c0e, "SPI_SHADER_USER_DATA_PS_2"},
+ {0x2c0f, "SPI_SHADER_USER_DATA_PS_3"},
+ {0x2c10, "SPI_SHADER_USER_DATA_PS_4"},
+ {0x2c11, "SPI_SHADER_USER_DATA_PS_5"},
+ {0x2c12, "SPI_SHADER_USER_DATA_PS_6"},
+ {0x2c13, "SPI_SHADER_USER_DATA_PS_7"},
+ {0x2c14, "SPI_SHADER_USER_DATA_PS_8"},
+ {0x2c15, "SPI_SHADER_USER_DATA_PS_9"},
+ {0x2c16, "SPI_SHADER_USER_DATA_PS_10"},
+ {0x2c17, "SPI_SHADER_USER_DATA_PS_11"},
+ {0x2c18, "SPI_SHADER_USER_DATA_PS_12"},
+ {0x2c19, "SPI_SHADER_USER_DATA_PS_13"},
+ {0x2c1a, "SPI_SHADER_USER_DATA_PS_14"},
+ {0x2c1b, "SPI_SHADER_USER_DATA_PS_15"},
+ {0x2c1c, "SPI_SHADER_USER_DATA_PS_16"},
+ {0x2c1d, "SPI_SHADER_USER_DATA_PS_17"},
+ {0x2c1e, "SPI_SHADER_USER_DATA_PS_18"},
+ {0x2c1f, "SPI_SHADER_USER_DATA_PS_19"},
+ {0x2c20, "SPI_SHADER_USER_DATA_PS_20"},
+ {0x2c21, "SPI_SHADER_USER_DATA_PS_21"},
+ {0x2c22, "SPI_SHADER_USER_DATA_PS_22"},
+ {0x2c23, "SPI_SHADER_USER_DATA_PS_23"},
+ {0x2c24, "SPI_SHADER_USER_DATA_PS_24"},
+ {0x2c25, "SPI_SHADER_USER_DATA_PS_25"},
+ {0x2c26, "SPI_SHADER_USER_DATA_PS_26"},
+ {0x2c27, "SPI_SHADER_USER_DATA_PS_27"},
+ {0x2c28, "SPI_SHADER_USER_DATA_PS_28"},
+ {0x2c29, "SPI_SHADER_USER_DATA_PS_29"},
+ {0x2c2a, "SPI_SHADER_USER_DATA_PS_30"},
+ {0x2c2b, "SPI_SHADER_USER_DATA_PS_31"},
+
+ {0x2e40, "COMPUTE_USER_DATA_0"},
+ {0x2e41, "COMPUTE_USER_DATA_1"},
+ {0x2e42, "COMPUTE_USER_DATA_2"},
+ {0x2e43, "COMPUTE_USER_DATA_3"},
+ {0x2e44, "COMPUTE_USER_DATA_4"},
+ {0x2e45, "COMPUTE_USER_DATA_5"},
+ {0x2e46, "COMPUTE_USER_DATA_6"},
+ {0x2e47, "COMPUTE_USER_DATA_7"},
+ {0x2e48, "COMPUTE_USER_DATA_8"},
+ {0x2e49, "COMPUTE_USER_DATA_9"},
+ {0x2e4a, "COMPUTE_USER_DATA_10"},
+ {0x2e4b, "COMPUTE_USER_DATA_11"},
+ {0x2e4c, "COMPUTE_USER_DATA_12"},
+ {0x2e4d, "COMPUTE_USER_DATA_13"},
+ {0x2e4e, "COMPUTE_USER_DATA_14"},
+ {0x2e4f, "COMPUTE_USER_DATA_15"},
+
+ {0x2e07, "COMPUTE_NUM_THREAD_X"},
+ {0x2e08, "COMPUTE_NUM_THREAD_Y"},
+ {0x2e09, "COMPUTE_NUM_THREAD_Z"},
+ {0xa2db, "VGT_TF_PARAM"},
+ {0xa2d6, "VGT_LS_HS_CONFIG"},
+ {0xa287, "VGT_HOS_MIN_TESS_LEVEL"},
+ {0xa286, "VGT_HOS_MAX_TESS_LEVEL"},
+ {0xa2f8, "PA_SC_AA_CONFIG"},
+ {0xa310, "PA_SC_SHADER_CONTROL"},
+ {0xa313, "PA_SC_CONSERVATIVE_RASTERIZATION_CNTL"},
+
+ {0x2d0c, "SPI_SHADER_USER_DATA_LS_0"},
+ {0x2d0d, "SPI_SHADER_USER_DATA_LS_1"},
+ {0x2d0e, "SPI_SHADER_USER_DATA_LS_2"},
+ {0x2d0f, "SPI_SHADER_USER_DATA_LS_3"},
+ {0x2d10, "SPI_SHADER_USER_DATA_LS_4"},
+ {0x2d11, "SPI_SHADER_USER_DATA_LS_5"},
+ {0x2d12, "SPI_SHADER_USER_DATA_LS_6"},
+ {0x2d13, "SPI_SHADER_USER_DATA_LS_7"},
+ {0x2d14, "SPI_SHADER_USER_DATA_LS_8"},
+ {0x2d15, "SPI_SHADER_USER_DATA_LS_9"},
+ {0x2d16, "SPI_SHADER_USER_DATA_LS_10"},
+ {0x2d17, "SPI_SHADER_USER_DATA_LS_11"},
+ {0x2d18, "SPI_SHADER_USER_DATA_LS_12"},
+ {0x2d19, "SPI_SHADER_USER_DATA_LS_13"},
+ {0x2d1a, "SPI_SHADER_USER_DATA_LS_14"},
+ {0x2d1b, "SPI_SHADER_USER_DATA_LS_15"},
+ {0x2d1c, "SPI_SHADER_USER_DATA_LS_16"},
+ {0x2d1d, "SPI_SHADER_USER_DATA_LS_17"},
+ {0x2d1e, "SPI_SHADER_USER_DATA_LS_18"},
+ {0x2d1f, "SPI_SHADER_USER_DATA_LS_19"},
+ {0x2d20, "SPI_SHADER_USER_DATA_LS_20"},
+ {0x2d21, "SPI_SHADER_USER_DATA_LS_21"},
+ {0x2d22, "SPI_SHADER_USER_DATA_LS_22"},
+ {0x2d23, "SPI_SHADER_USER_DATA_LS_23"},
+ {0x2d24, "SPI_SHADER_USER_DATA_LS_24"},
+ {0x2d25, "SPI_SHADER_USER_DATA_LS_25"},
+ {0x2d26, "SPI_SHADER_USER_DATA_LS_26"},
+ {0x2d27, "SPI_SHADER_USER_DATA_LS_27"},
+ {0x2d28, "SPI_SHADER_USER_DATA_LS_28"},
+ {0x2d29, "SPI_SHADER_USER_DATA_LS_29"},
+ {0x2d2a, "SPI_SHADER_USER_DATA_LS_30"},
+ {0x2d2b, "SPI_SHADER_USER_DATA_LS_31"},
+
+ {0xa2aa, "IA_MULTI_VGT_PARAM"},
+ {0xa2a5, "VGT_GS_MAX_PRIMS_PER_SUBGROUP"},
+ {0xa2e6, "VGT_STRMOUT_BUFFER_CONFIG"},
+ {0xa2e5, "VGT_STRMOUT_CONFIG"},
+ {0xa2b5, "VGT_STRMOUT_VTX_STRIDE_0"},
+ {0xa2b9, "VGT_STRMOUT_VTX_STRIDE_1"},
+ {0xa2bd, "VGT_STRMOUT_VTX_STRIDE_2"},
+ {0xa2c1, "VGT_STRMOUT_VTX_STRIDE_3"},
+ {0xa316, "VGT_VERTEX_REUSE_BLOCK_CNTL"},
+
+ {0, nullptr}};
+ auto Entry = RegInfoTable;
+ for (; Entry->Num && Entry->Num != RegNum; ++Entry)
+ ;
+ return Entry->Name;
+}
+
+// Convert the accumulated PAL metadata into an asm directive.
+void AMDGPUPALMetadata::toString(std::string &String) {
+ String.clear();
+ if (!BlobType)
+ return;
+ raw_string_ostream Stream(String);
+ if (isLegacy()) {
+ if (MsgPackDoc.getRoot().getKind() == msgpack::Type::Nil)
+ return;
+ // Old linear reg=val format.
+ Stream << '\t' << AMDGPU::PALMD::AssemblerDirective << ' ';
+ auto Regs = getRegisters();
+ for (auto I = Regs.begin(), E = Regs.end(); I != E; ++I) {
+ if (I != Regs.begin())
+ Stream << ',';
+ unsigned Reg = I->first.getUInt();
+ unsigned Val = I->second.getUInt();
+ Stream << "0x" << Twine::utohexstr(Reg) << ",0x" << Twine::utohexstr(Val);
+ }
+ Stream << '\n';
+ return;
+ }
+
+ // New msgpack-based format -- output as YAML (with unsigned numbers in hex),
+ // but first change the registers map to use names.
+ MsgPackDoc.setHexMode();
+ auto &RegsObj = refRegisters();
+ auto OrigRegs = RegsObj.getMap();
+ RegsObj = MsgPackDoc.getMapNode();
+ for (auto I : OrigRegs) {
+ auto Key = I.first;
+ if (const char *RegName = getRegisterName(Key.getUInt())) {
+ std::string KeyName = Key.toString();
+ KeyName += " (";
+ KeyName += RegName;
+ KeyName += ')';
+ Key = MsgPackDoc.getNode(KeyName, /*Copy=*/true);
+ }
+ RegsObj.getMap()[Key] = I.second;
+ }
+
+ // Output as YAML.
+ Stream << '\t' << AMDGPU::PALMD::AssemblerDirectiveBegin << '\n';
+ MsgPackDoc.toYAML(Stream);
+ Stream << '\t' << AMDGPU::PALMD::AssemblerDirectiveEnd << '\n';
+
+ // Restore original registers map.
+ RegsObj = OrigRegs;
+}
+
+// Convert the accumulated PAL metadata into a binary blob for writing as
+// a .note record of the specified AMD type. Returns an empty blob if
+// there is no PAL metadata,
+void AMDGPUPALMetadata::toBlob(unsigned Type, std::string &Blob) {
+ if (Type == ELF::NT_AMD_AMDGPU_PAL_METADATA)
+ toLegacyBlob(Blob);
+ else if (Type)
+ toMsgPackBlob(Blob);
+}
+
+void AMDGPUPALMetadata::toLegacyBlob(std::string &Blob) {
+ Blob.clear();
+ auto Registers = getRegisters();
+ if (Registers.getMap().empty())
+ return;
+ raw_string_ostream OS(Blob);
+ support::endian::Writer EW(OS, support::endianness::little);
+ for (auto I : Registers.getMap()) {
+ EW.write(uint32_t(I.first.getUInt()));
+ EW.write(uint32_t(I.second.getUInt()));
+ }
+}
+
+void AMDGPUPALMetadata::toMsgPackBlob(std::string &Blob) {
+ Blob.clear();
+ MsgPackDoc.writeToBlob(Blob);
+}
+
+// Set PAL metadata from YAML text. Returns false if failed.
+bool AMDGPUPALMetadata::setFromString(StringRef S) {
+ BlobType = ELF::NT_AMDGPU_METADATA;
+ if (!MsgPackDoc.fromYAML(S))
+ return false;
+
+ // In the registers map, some keys may be of the form "0xa191
+ // (SPI_PS_INPUT_CNTL_0)", in which case the YAML input code made it a
+ // string. We need to turn it into a number.
+ auto &RegsObj = refRegisters();
+ auto OrigRegs = RegsObj;
+ RegsObj = MsgPackDoc.getMapNode();
+ Registers = RegsObj.getMap();
+ bool Ok = true;
+ for (auto I : OrigRegs.getMap()) {
+ auto Key = I.first;
+ if (Key.getKind() == msgpack::Type::String) {
+ StringRef S = Key.getString();
+ uint64_t Val;
+ if (S.consumeInteger(0, Val)) {
+ Ok = false;
+ errs() << "Unrecognized PAL metadata register key '" << S << "'\n";
+ continue;
+ }
+ Key = MsgPackDoc.getNode(uint64_t(Val));
+ }
+ Registers.getMap()[Key] = I.second;
+ }
+ return Ok;
+}
+
+// Reference (create if necessary) the node for the registers map.
+msgpack::DocNode &AMDGPUPALMetadata::refRegisters() {
+ auto &N =
+ MsgPackDoc.getRoot()
+ .getMap(/*Convert=*/true)[MsgPackDoc.getNode("amdpal.pipelines")]
+ .getArray(/*Convert=*/true)[0]
+ .getMap(/*Convert=*/true)[MsgPackDoc.getNode(".registers")];
+ N.getMap(/*Convert=*/true);
+ return N;
+}
+
+// Get (create if necessary) the registers map.
+msgpack::MapDocNode AMDGPUPALMetadata::getRegisters() {
+ if (Registers.isEmpty())
+ Registers = refRegisters();
+ return Registers.getMap();
+}
+
+// Return the PAL metadata hardware shader stage name.
+static const char *getStageName(CallingConv::ID CC) {
+ switch (CC) {
+ case CallingConv::AMDGPU_PS:
+ return ".ps";
+ case CallingConv::AMDGPU_VS:
+ return ".vs";
+ case CallingConv::AMDGPU_GS:
+ return ".gs";
+ case CallingConv::AMDGPU_ES:
+ return ".es";
+ case CallingConv::AMDGPU_HS:
+ return ".hs";
+ case CallingConv::AMDGPU_LS:
+ return ".ls";
+ default:
+ return ".cs";
+ }
+}
+
+// Get (create if necessary) the .hardware_stages entry for the given calling
+// convention.
+msgpack::MapDocNode AMDGPUPALMetadata::getHwStage(unsigned CC) {
+ if (HwStages.isEmpty())
+ HwStages = MsgPackDoc.getRoot()
+ .getMap(/*Convert=*/true)["amdpal.pipelines"]
+ .getArray(/*Convert=*/true)[0]
+ .getMap(/*Convert=*/true)[".hardware_stages"]
+ .getMap(/*Convert=*/true);
+ return HwStages.getMap()[getStageName(CC)].getMap(/*Convert=*/true);
+}
+
+// Get .note record vendor name of metadata blob to be emitted.
+const char *AMDGPUPALMetadata::getVendor() const {
+ return isLegacy() ? ElfNote::NoteNameV2 : ElfNote::NoteNameV3;
+}
+
+// Get .note record type of metadata blob to be emitted:
+// ELF::NT_AMD_AMDGPU_PAL_METADATA (legacy key=val format), or
+// ELF::NT_AMDGPU_METADATA (MsgPack format), or
+// 0 (no PAL metadata).
+unsigned AMDGPUPALMetadata::getType() const {
+ return BlobType;
+}
+
+// Return whether the blob type is legacy PAL metadata.
+bool AMDGPUPALMetadata::isLegacy() const {
+ return BlobType == ELF::NT_AMD_AMDGPU_PAL_METADATA;
+}
+
+// Set legacy PAL metadata format.
+void AMDGPUPALMetadata::setLegacy() {
+ BlobType = ELF::NT_AMD_AMDGPU_PAL_METADATA;
+}
+
diff --git a/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h b/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h
new file mode 100644
index 000000000000..0f17c157b206
--- /dev/null
+++ b/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h
@@ -0,0 +1,135 @@
+//===-- AMDGPUPALMetadata.h - PAL metadata handling -------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// PAL metadata handling
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUPALMETADATA_H
+#define LLVM_LIB_TARGET_AMDGPU_AMDGPUPALMETADATA_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/MsgPackDocument.h"
+#include <map>
+
+namespace llvm {
+
+class AMDGPUTargetStreamer;
+class formatted_raw_ostream;
+class MCStreamer;
+class Module;
+
+class AMDGPUPALMetadata {
+ unsigned BlobType = 0;
+ msgpack::Document MsgPackDoc;
+ msgpack::DocNode Registers;
+ msgpack::DocNode HwStages;
+
+public:
+ // Read the amdgpu.pal.metadata supplied by the frontend, ready for
+ // per-function modification.
+ void readFromIR(Module &M);
+
+ // Set PAL metadata from a binary blob from the applicable .note record.
+ // Returns false if bad format. Blob must remain valid for the lifetime of
+ // the Metadata.
+ bool setFromBlob(unsigned Type, StringRef Blob);
+
+ // Set the rsrc1 register in the metadata for a particular shader stage.
+ // In fact this ORs the value into any previous setting of the register.
+ void setRsrc1(unsigned CC, unsigned Val);
+
+ // Set the rsrc2 register in the metadata for a particular shader stage.
+ // In fact this ORs the value into any previous setting of the register.
+ void setRsrc2(unsigned CC, unsigned Val);
+
+ // Set the SPI_PS_INPUT_ENA register in the metadata.
+ // In fact this ORs the value into any previous setting of the register.
+ void setSpiPsInputEna(unsigned Val);
+
+ // Set the SPI_PS_INPUT_ADDR register in the metadata.
+ // In fact this ORs the value into any previous setting of the register.
+ void setSpiPsInputAddr(unsigned Val);
+
+ // Get a register from the metadata, or 0 if not currently set.
+ unsigned getRegister(unsigned Reg);
+
+ // Set a register in the metadata.
+ // In fact this ORs the value into any previous setting of the register.
+ void setRegister(unsigned Reg, unsigned Val);
+
+ // Set the entry point name for one shader.
+ void setEntryPoint(unsigned CC, StringRef Name);
+
+ // Set the number of used vgprs in the metadata. This is an optional advisory
+ // record for logging etc; wave dispatch actually uses the rsrc1 register for
+ // the shader stage to determine the number of vgprs to allocate.
+ void setNumUsedVgprs(unsigned CC, unsigned Val);
+
+ // Set the number of used sgprs in the metadata. This is an optional advisory
+ // record for logging etc; wave dispatch actually uses the rsrc1 register for
+ // the shader stage to determine the number of sgprs to allocate.
+ void setNumUsedSgprs(unsigned CC, unsigned Val);
+
+ // Set the scratch size in the metadata.
+ void setScratchSize(unsigned CC, unsigned Val);
+
+ // Set the hardware register bit in PAL metadata to enable wave32 on the
+ // shader of the given calling convention.
+ void setWave32(unsigned CC);
+
+ // Emit the accumulated PAL metadata as asm directives.
+ // This is called from AMDGPUTargetAsmStreamer::Finish().
+ void toString(std::string &S);
+
+ // Set PAL metadata from YAML text.
+ bool setFromString(StringRef S);
+
+ // Get .note record vendor name of metadata blob to be emitted.
+ const char *getVendor() const;
+
+ // Get .note record type of metadata blob to be emitted:
+ // ELF::NT_AMD_AMDGPU_PAL_METADATA (legacy key=val format), or
+ // ELF::NT_AMDGPU_METADATA (MsgPack format), or
+ // 0 (no PAL metadata).
+ unsigned getType() const;
+
+ // Emit the accumulated PAL metadata as a binary blob.
+ // This is called from AMDGPUTargetELFStreamer::Finish().
+ void toBlob(unsigned Type, std::string &S);
+
+ // Get the msgpack::Document for the PAL metadata.
+ msgpack::Document *getMsgPackDoc() { return &MsgPackDoc; }
+
+ // Set legacy PAL metadata format.
+ void setLegacy();
+
+private:
+ // Return whether the blob type is legacy PAL metadata.
+ bool isLegacy() const;
+
+ // Reference (create if necessary) the node for the registers map.
+ msgpack::DocNode &refRegisters();
+
+ // Get (create if necessary) the registers map.
+ msgpack::MapDocNode getRegisters();
+
+ // Get (create if necessary) the .hardware_stages entry for the given calling
+ // convention.
+ msgpack::MapDocNode getHwStage(unsigned CC);
+
+ bool setFromLegacyBlob(StringRef Blob);
+ bool setFromMsgPackBlob(StringRef Blob);
+ void toLegacyBlob(std::string &Blob);
+ void toMsgPackBlob(std::string &Blob);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUPALMETADATA_H
diff --git a/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h b/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h
index 82ffdef8e674..95ad3f35d18f 100644
--- a/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h
+++ b/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h
@@ -1,9 +1,8 @@
//===--------------------- AMDKernelCodeTInfo.h ---------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -83,6 +82,9 @@ COMPPGM1(priv, compute_pgm_rsrc1_priv, PRIV
COMPPGM1(enable_dx10_clamp, compute_pgm_rsrc1_dx10_clamp, DX10_CLAMP),
COMPPGM1(debug_mode, compute_pgm_rsrc1_debug_mode, DEBUG_MODE),
COMPPGM1(enable_ieee_mode, compute_pgm_rsrc1_ieee_mode, IEEE_MODE),
+COMPPGM1(enable_wgp_mode, compute_pgm_rsrc1_wgp_mode, WGP_MODE),
+COMPPGM1(enable_mem_ordered, compute_pgm_rsrc1_mem_ordered, MEM_ORDERED),
+COMPPGM1(enable_fwd_progress, compute_pgm_rsrc1_fwd_progress, FWD_PROGRESS),
// TODO: bulky
// TODO: cdbg_user
COMPPGM2(enable_sgpr_private_segment_wave_byte_offset, compute_pgm_rsrc2_scratch_en, SCRATCH_EN),
@@ -107,6 +109,7 @@ CODEPROP(enable_sgpr_private_segment_size, ENABLE_SGPR_PRIVATE_SEGMENT_SIZE),
CODEPROP(enable_sgpr_grid_workgroup_count_x, ENABLE_SGPR_GRID_WORKGROUP_COUNT_X),
CODEPROP(enable_sgpr_grid_workgroup_count_y, ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y),
CODEPROP(enable_sgpr_grid_workgroup_count_z, ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z),
+CODEPROP(enable_wavefront_size32, ENABLE_WAVEFRONT_SIZE32),
CODEPROP(enable_ordered_append_gds, ENABLE_ORDERED_APPEND_GDS),
CODEPROP(private_element_size, PRIVATE_ELEMENT_SIZE),
CODEPROP(is_ptr64, IS_PTR64),
diff --git a/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp b/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp
index 20059f4a1ed7..443e2cc45ac0 100644
--- a/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp
+++ b/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp
@@ -1,9 +1,8 @@
//===- AMDKernelCodeTUtils.cpp --------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.h b/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.h
index ef9f9bdb6bcb..a87325a78df3 100644
--- a/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.h
+++ b/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.h
@@ -1,9 +1,8 @@
//===- AMDGPUKernelCodeTUtils.h - helpers for amd_kernel_code_t -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AMDGPU/VIInstrFormats.td b/lib/Target/AMDGPU/VIInstrFormats.td
index 1fd1c1e21527..bd65a495fa72 100644
--- a/lib/Target/AMDGPU/VIInstrFormats.td
+++ b/lib/Target/AMDGPU/VIInstrFormats.td
@@ -1,9 +1,8 @@
//===-- VIInstrFormats.td - VI Instruction Encodings ----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AMDGPU/VIInstructions.td b/lib/Target/AMDGPU/VIInstructions.td
index b45c8fc9c7d5..ec7d8875a746 100644
--- a/lib/Target/AMDGPU/VIInstructions.td
+++ b/lib/Target/AMDGPU/VIInstructions.td
@@ -1,9 +1,8 @@
//===-- VIInstructions.td - VI Instruction Defintions ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// Instruction definitions for VI and newer.
diff --git a/lib/Target/AMDGPU/VOP1Instructions.td b/lib/Target/AMDGPU/VOP1Instructions.td
index 68446ab79720..6bc416ed7d4b 100644
--- a/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/lib/Target/AMDGPU/VOP1Instructions.td
@@ -1,9 +1,8 @@
//===-- VOP1Instructions.td - Vector Instruction Defintions ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -15,7 +14,7 @@ class VOP1e <bits<8> op, VOPProfile P> : Enc32 {
bits<8> vdst;
bits<9> src0;
- let Inst{8-0} = !if(P.HasSrc0, src0{8-0}, 0);
+ let Inst{8-0} = !if(P.HasSrc0, src0{8-0}, ?);
let Inst{16-9} = op;
let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0);
let Inst{31-25} = 0x3f; //encoding
@@ -48,7 +47,6 @@ class VOP1_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], bit VOP1On
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
- let SubtargetPredicate = isGCN;
let VOP1 = 1;
let VALU = 1;
@@ -144,7 +142,7 @@ defm V_MOV_B32 : VOP1Inst <"v_mov_b32", VOP_I32_I32>;
// TODO: Make profile for this, there is VOP3 encoding also
def V_READFIRSTLANE_B32 :
InstSI <(outs SReg_32:$vdst),
- (ins VGPR_32:$src0),
+ (ins VRegOrLds_32:$src0),
"v_readfirstlane_b32 $vdst, $src0",
[(set i32:$vdst, (int_amdgcn_readfirstlane i32:$src0))]>,
Enc32 {
@@ -156,7 +154,6 @@ def V_READFIRSTLANE_B32 :
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
- let SubtargetPredicate = isGCN;
let VOP1 = 1;
let VALU = 1;
@@ -172,9 +169,16 @@ def V_READFIRSTLANE_B32 :
let Inst{31-25} = 0x3f; //encoding
}
-let SchedRW = [WriteQuarterRate32] in {
-defm V_CVT_I32_F64 : VOP1Inst <"v_cvt_i32_f64", VOP_I32_F64, fp_to_sint>;
+let SchedRW = [WriteDoubleCvt] in {
+defm V_CVT_I32_F64 : VOP1Inst <"v_cvt_i32_f64", VOP_I32_F64, fp_to_sint>;
defm V_CVT_F64_I32 : VOP1Inst <"v_cvt_f64_i32", VOP1_F64_I32, sint_to_fp>;
+defm V_CVT_F32_F64 : VOP1Inst <"v_cvt_f32_f64", VOP_F32_F64, fpround>;
+defm V_CVT_F64_F32 : VOP1Inst <"v_cvt_f64_f32", VOP_F64_F32, fpextend>;
+defm V_CVT_U32_F64 : VOP1Inst <"v_cvt_u32_f64", VOP_I32_F64, fp_to_uint>;
+defm V_CVT_F64_U32 : VOP1Inst <"v_cvt_f64_u32", VOP1_F64_I32, uint_to_fp>;
+} // End SchedRW = [WriteDoubleCvt]
+
+let SchedRW = [WriteQuarterRate32] in {
defm V_CVT_F32_I32 : VOP1Inst <"v_cvt_f32_i32", VOP1_F32_I32, sint_to_fp>;
defm V_CVT_F32_U32 : VOP1Inst <"v_cvt_f32_u32", VOP1_F32_I32, uint_to_fp>;
defm V_CVT_U32_F32 : VOP1Inst <"v_cvt_u32_f32", VOP_I32_F32, fp_to_uint>;
@@ -186,15 +190,12 @@ defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_F16, fpextend>;
defm V_CVT_RPI_I32_F32 : VOP1Inst <"v_cvt_rpi_i32_f32", VOP_I32_F32, cvt_rpi_i32_f32>;
defm V_CVT_FLR_I32_F32 : VOP1Inst <"v_cvt_flr_i32_f32", VOP_I32_F32, cvt_flr_i32_f32>;
defm V_CVT_OFF_F32_I4 : VOP1Inst <"v_cvt_off_f32_i4", VOP1_F32_I32>;
-defm V_CVT_F32_F64 : VOP1Inst <"v_cvt_f32_f64", VOP_F32_F64, fpround>;
-defm V_CVT_F64_F32 : VOP1Inst <"v_cvt_f64_f32", VOP_F64_F32, fpextend>;
+} // End SchedRW = [WriteQuarterRate32]
+
defm V_CVT_F32_UBYTE0 : VOP1Inst <"v_cvt_f32_ubyte0", VOP1_F32_I32, AMDGPUcvt_f32_ubyte0>;
defm V_CVT_F32_UBYTE1 : VOP1Inst <"v_cvt_f32_ubyte1", VOP1_F32_I32, AMDGPUcvt_f32_ubyte1>;
defm V_CVT_F32_UBYTE2 : VOP1Inst <"v_cvt_f32_ubyte2", VOP1_F32_I32, AMDGPUcvt_f32_ubyte2>;
defm V_CVT_F32_UBYTE3 : VOP1Inst <"v_cvt_f32_ubyte3", VOP1_F32_I32, AMDGPUcvt_f32_ubyte3>;
-defm V_CVT_U32_F64 : VOP1Inst <"v_cvt_u32_f64", VOP_I32_F64, fp_to_uint>;
-defm V_CVT_F64_U32 : VOP1Inst <"v_cvt_f64_u32", VOP1_F64_I32, uint_to_fp>;
-} // End SchedRW = [WriteQuarterRate32]
defm V_FRACT_F32 : VOP1Inst <"v_fract_f32", VOP_F32_F32, AMDGPUfract>;
defm V_TRUNC_F32 : VOP1Inst <"v_trunc_f32", VOP_F32_F32, ftrunc>;
@@ -271,6 +272,7 @@ def VOP_MOVRELD : VOPProfile<[untyped, i32, untyped, untyped]> {
let InsDPP = (ins DstRC:$vdst, DstRC:$old, Src0RC32:$src0,
dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
+ let InsDPP16 = !con(InsDPP, (ins FI:$fi));
let InsSDWA = (ins Src0RC32:$vdst, Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0,
clampmod:$clamp, omod:$omod, dst_sel:$dst_sel, dst_unused:$dst_unused,
@@ -279,6 +281,7 @@ def VOP_MOVRELD : VOPProfile<[untyped, i32, untyped, untyped]> {
let Asm32 = getAsm32<1, 1>.ret;
let Asm64 = getAsm64<1, 1, 0, 0, 1>.ret;
let AsmDPP = getAsmDPP<1, 1, 0>.ret;
+ let AsmDPP16 = getAsmDPP16<1, 1, 0>.ret;
let AsmSDWA = getAsmSDWA<1, 1>.ret;
let AsmSDWA9 = getAsmSDWA9<1, 0, 1>.ret;
@@ -305,41 +308,43 @@ defm V_MOVRELSD_B32 : VOP1Inst <"v_movrelsd_b32", VOP_NO_EXT<VOP_I32_I32>>;
defm V_MOV_FED_B32 : VOP1Inst <"v_mov_fed_b32", VOP_I32_I32>;
-// These instruction only exist on SI and CI
-let SubtargetPredicate = isSICI in {
-
-let SchedRW = [WriteQuarterRate32] in {
-defm V_LOG_CLAMP_F32 : VOP1Inst <"v_log_clamp_f32", VOP_F32_F32, int_amdgcn_log_clamp>;
-defm V_RCP_CLAMP_F32 : VOP1Inst <"v_rcp_clamp_f32", VOP_F32_F32>;
-defm V_RCP_LEGACY_F32 : VOP1Inst <"v_rcp_legacy_f32", VOP_F32_F32, AMDGPUrcp_legacy>;
-defm V_RSQ_CLAMP_F32 : VOP1Inst <"v_rsq_clamp_f32", VOP_F32_F32, AMDGPUrsq_clamp>;
-defm V_RSQ_LEGACY_F32 : VOP1Inst <"v_rsq_legacy_f32", VOP_F32_F32, AMDGPUrsq_legacy>;
-} // End SchedRW = [WriteQuarterRate32]
-
-let SchedRW = [WriteDouble] in {
-defm V_RCP_CLAMP_F64 : VOP1Inst <"v_rcp_clamp_f64", VOP_F64_F64>;
-defm V_RSQ_CLAMP_F64 : VOP1Inst <"v_rsq_clamp_f64", VOP_F64_F64, AMDGPUrsq_clamp>;
-} // End SchedRW = [WriteDouble]
-
-} // End SubtargetPredicate = isSICI
-
-
-let SubtargetPredicate = isCIVI in {
-
-let SchedRW = [WriteDoubleAdd] in {
-defm V_TRUNC_F64 : VOP1Inst <"v_trunc_f64", VOP_F64_F64, ftrunc>;
-defm V_CEIL_F64 : VOP1Inst <"v_ceil_f64", VOP_F64_F64, fceil>;
-defm V_FLOOR_F64 : VOP1Inst <"v_floor_f64", VOP_F64_F64, ffloor>;
-defm V_RNDNE_F64 : VOP1Inst <"v_rndne_f64", VOP_F64_F64, frint>;
-} // End SchedRW = [WriteDoubleAdd]
-
-let SchedRW = [WriteQuarterRate32] in {
-defm V_LOG_LEGACY_F32 : VOP1Inst <"v_log_legacy_f32", VOP_F32_F32>;
-defm V_EXP_LEGACY_F32 : VOP1Inst <"v_exp_legacy_f32", VOP_F32_F32>;
-} // End SchedRW = [WriteQuarterRate32]
-
-} // End SubtargetPredicate = isCIVI
-
+let SubtargetPredicate = isGFX6GFX7 in {
+ let SchedRW = [WriteQuarterRate32] in {
+ defm V_LOG_CLAMP_F32 :
+ VOP1Inst<"v_log_clamp_f32", VOP_F32_F32, int_amdgcn_log_clamp>;
+ defm V_RCP_CLAMP_F32 :
+ VOP1Inst<"v_rcp_clamp_f32", VOP_F32_F32>;
+ defm V_RCP_LEGACY_F32 :
+ VOP1Inst<"v_rcp_legacy_f32", VOP_F32_F32, AMDGPUrcp_legacy>;
+ defm V_RSQ_CLAMP_F32 :
+ VOP1Inst<"v_rsq_clamp_f32", VOP_F32_F32, AMDGPUrsq_clamp>;
+ defm V_RSQ_LEGACY_F32 :
+ VOP1Inst<"v_rsq_legacy_f32", VOP_F32_F32, AMDGPUrsq_legacy>;
+ } // End SchedRW = [WriteQuarterRate32]
+
+ let SchedRW = [WriteDouble] in {
+ defm V_RCP_CLAMP_F64 :
+ VOP1Inst<"v_rcp_clamp_f64", VOP_F64_F64>;
+ defm V_RSQ_CLAMP_F64 :
+ VOP1Inst<"v_rsq_clamp_f64", VOP_F64_F64, AMDGPUrsq_clamp>;
+ } // End SchedRW = [WriteDouble]
+} // End SubtargetPredicate = isGFX6GFX7
+
+let SubtargetPredicate = isGFX7GFX8GFX9 in {
+ let SchedRW = [WriteQuarterRate32] in {
+ defm V_LOG_LEGACY_F32 : VOP1Inst<"v_log_legacy_f32", VOP_F32_F32>;
+ defm V_EXP_LEGACY_F32 : VOP1Inst<"v_exp_legacy_f32", VOP_F32_F32>;
+ } // End SchedRW = [WriteQuarterRate32]
+} // End SubtargetPredicate = isGFX7GFX8GFX9
+
+let SubtargetPredicate = isGFX7Plus in {
+ let SchedRW = [WriteDoubleAdd] in {
+ defm V_TRUNC_F64 : VOP1Inst<"v_trunc_f64", VOP_F64_F64, ftrunc>;
+ defm V_CEIL_F64 : VOP1Inst<"v_ceil_f64", VOP_F64_F64, fceil>;
+ defm V_RNDNE_F64 : VOP1Inst<"v_rndne_f64", VOP_F64_F64, frint>;
+ defm V_FLOOR_F64 : VOP1Inst<"v_floor_f64", VOP_F64_F64, ffloor>;
+ } // End SchedRW = [WriteDoubleAdd]
+} // End SubtargetPredicate = isGFX7Plus
let SubtargetPredicate = Has16BitInsts in {
@@ -393,125 +398,279 @@ def VOP_SWAP_I32 : VOPProfile<[i32, i32, i32, untyped]> {
let Ins64 = (ins);
}
-let SubtargetPredicate = isGFX9 in {
- let Constraints = "$vdst = $src1, $vdst1 = $src0",
- DisableEncoding="$vdst1,$src1",
- SchedRW = [Write64Bit, Write64Bit] in {
-// Never VOP3. Takes as long as 2 v_mov_b32s
-def V_SWAP_B32 : VOP1_Pseudo <"v_swap_b32", VOP_SWAP_I32, [], 1>;
+let SubtargetPredicate = isGFX9Plus in {
+ def V_SWAP_B32 : VOP1_Pseudo<"v_swap_b32", VOP_SWAP_I32, [], 1> {
+ let Constraints = "$vdst = $src1, $vdst1 = $src0";
+ let DisableEncoding = "$vdst1,$src1";
+ let SchedRW = [Write64Bit, Write64Bit];
+ }
+
+ defm V_SAT_PK_U8_I16 : VOP1Inst<"v_sat_pk_u8_i16", VOP_I32_I32>;
+ defm V_CVT_NORM_I16_F16 : VOP1Inst<"v_cvt_norm_i16_f16", VOP_I16_F16>;
+ defm V_CVT_NORM_U16_F16 : VOP1Inst<"v_cvt_norm_u16_f16", VOP_I16_F16>;
+} // End SubtargetPredicate = isGFX9Plus
+
+let SubtargetPredicate = isGFX9Only in {
+ defm V_SCREEN_PARTITION_4SE_B32 : VOP1Inst <"v_screen_partition_4se_b32", VOP_I32_I32>;
+} // End SubtargetPredicate = isGFX9Only
+
+let SubtargetPredicate = isGFX10Plus in {
+ defm V_PIPEFLUSH : VOP1Inst<"v_pipeflush", VOP_NONE>;
+
+ let Uses = [M0] in {
+ // FIXME-GFX10: Should V_MOVRELSD_2_B32 be VOP_NO_EXT?
+ defm V_MOVRELSD_2_B32 :
+ VOP1Inst<"v_movrelsd_2_b32", VOP_NO_EXT<VOP_I32_I32>>;
+
+ def V_SWAPREL_B32 : VOP1_Pseudo<"v_swaprel_b32", VOP_SWAP_I32, [], 1> {
+ let Constraints = "$vdst = $src1, $vdst1 = $src0";
+ let DisableEncoding = "$vdst1,$src1";
+ let SchedRW = [Write64Bit, Write64Bit];
+ }
+ } // End Uses = [M0]
+} // End SubtargetPredicate = isGFX10Plus
+
+//===----------------------------------------------------------------------===//
+// Target-specific instruction encodings.
+//===----------------------------------------------------------------------===//
+
+class VOP1_DPP<bits<8> op, VOP1_Pseudo ps, VOPProfile p = ps.Pfl, bit isDPP16 = 0> :
+ VOP_DPP<ps.OpName, p, isDPP16> {
+ let hasSideEffects = ps.hasSideEffects;
+ let Defs = ps.Defs;
+ let SchedRW = ps.SchedRW;
+ let Uses = ps.Uses;
+
+ bits<8> vdst;
+ let Inst{8-0} = 0xfa;
+ let Inst{16-9} = op;
+ let Inst{24-17} = !if(p.EmitDst, vdst{7-0}, 0);
+ let Inst{31-25} = 0x3f;
}
-defm V_SCREEN_PARTITION_4SE_B32 : VOP1Inst <"v_screen_partition_4se_b32", VOP_I32_I32>;
+class VOP1_DPP16<bits<8> op, VOP1_Pseudo ps, VOPProfile p = ps.Pfl> :
+ VOP1_DPP<op, ps, p, 1> {
+ let AssemblerPredicate = !if(p.HasExt, HasDPP16, DisableInst);
+ let SubtargetPredicate = HasDPP16;
+}
-defm V_SAT_PK_U8_I16 : VOP1Inst<"v_sat_pk_u8_i16", VOP_I32_I32>;
-defm V_CVT_NORM_I16_F16 : VOP1Inst<"v_cvt_norm_i16_f16", VOP_I16_F16>;
-defm V_CVT_NORM_U16_F16 : VOP1Inst<"v_cvt_norm_u16_f16", VOP_I16_F16>;
+class VOP1_DPP8<bits<8> op, VOP1_Pseudo ps, VOPProfile p = ps.Pfl> :
+ VOP_DPP8<ps.OpName, p> {
+ let hasSideEffects = ps.hasSideEffects;
+ let Defs = ps.Defs;
+ let SchedRW = ps.SchedRW;
+ let Uses = ps.Uses;
-} // End SubtargetPredicate = isGFX9
+ bits<8> vdst;
+ let Inst{8-0} = fi;
+ let Inst{16-9} = op;
+ let Inst{24-17} = !if(p.EmitDst, vdst{7-0}, 0);
+ let Inst{31-25} = 0x3f;
+
+ let AssemblerPredicate = !if(p.HasExt, HasDPP8, DisableInst);
+ let SubtargetPredicate = HasDPP8;
+}
//===----------------------------------------------------------------------===//
-// Target
+// GFX10.
//===----------------------------------------------------------------------===//
+let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
+ multiclass VOP1Only_Real_gfx10<bits<9> op> {
+ def _gfx10 :
+ VOP1_Real<!cast<VOP1_Pseudo>(NAME), SIEncodingFamily.GFX10>,
+ VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME).Pfl>;
+ }
+ multiclass VOP1_Real_e32_gfx10<bits<9> op> {
+ def _e32_gfx10 :
+ VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX10>,
+ VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>;
+ }
+ multiclass VOP1_Real_e64_gfx10<bits<9> op> {
+ def _e64_gfx10 :
+ VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>,
+ VOP3e_gfx10<{0, 1, 1, op{6-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
+ }
+ multiclass VOP1_Real_sdwa_gfx10<bits<9> op> {
+ def _sdwa_gfx10 :
+ VOP_SDWA10_Real<!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>,
+ VOP1_SDWA9Ae<op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl> {
+ let DecoderNamespace = "SDWA10";
+ }
+ }
+ multiclass VOP1_Real_dpp_gfx10<bits<9> op> {
+ def _dpp_gfx10 : VOP1_DPP16<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32")> {
+ let DecoderNamespace = "SDWA10";
+ }
+ }
+ multiclass VOP1_Real_dpp8_gfx10<bits<9> op> {
+ def _dpp8_gfx10 : VOP1_DPP8<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32")> {
+ let DecoderNamespace = "DPP8";
+ }
+ }
+} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10"
+
+multiclass VOP1_Real_gfx10_no_dpp<bits<9> op> :
+ VOP1_Real_e32_gfx10<op>, VOP1_Real_e64_gfx10<op>,
+ VOP1_Real_sdwa_gfx10<op>;
+
+multiclass VOP1_Real_gfx10_no_dpp8<bits<9> op> :
+ VOP1_Real_e32_gfx10<op>, VOP1_Real_e64_gfx10<op>,
+ VOP1_Real_sdwa_gfx10<op>, VOP1_Real_dpp_gfx10<op>;
+
+multiclass VOP1_Real_gfx10<bits<9> op> :
+ VOP1_Real_gfx10_no_dpp8<op>, VOP1_Real_dpp8_gfx10<op>;
+
+defm V_PIPEFLUSH : VOP1_Real_gfx10<0x01b>;
+defm V_MOVRELSD_2_B32 : VOP1_Real_gfx10<0x048>;
+defm V_CVT_F16_U16 : VOP1_Real_gfx10<0x050>;
+defm V_CVT_F16_I16 : VOP1_Real_gfx10<0x051>;
+defm V_CVT_U16_F16 : VOP1_Real_gfx10<0x052>;
+defm V_CVT_I16_F16 : VOP1_Real_gfx10<0x053>;
+defm V_RCP_F16 : VOP1_Real_gfx10<0x054>;
+defm V_SQRT_F16 : VOP1_Real_gfx10<0x055>;
+defm V_RSQ_F16 : VOP1_Real_gfx10<0x056>;
+defm V_LOG_F16 : VOP1_Real_gfx10<0x057>;
+defm V_EXP_F16 : VOP1_Real_gfx10<0x058>;
+defm V_FREXP_MANT_F16 : VOP1_Real_gfx10<0x059>;
+defm V_FREXP_EXP_I16_F16 : VOP1_Real_gfx10<0x05a>;
+defm V_FLOOR_F16 : VOP1_Real_gfx10<0x05b>;
+defm V_CEIL_F16 : VOP1_Real_gfx10<0x05c>;
+defm V_TRUNC_F16 : VOP1_Real_gfx10<0x05d>;
+defm V_RNDNE_F16 : VOP1_Real_gfx10<0x05e>;
+defm V_FRACT_F16 : VOP1_Real_gfx10<0x05f>;
+defm V_SIN_F16 : VOP1_Real_gfx10<0x060>;
+defm V_COS_F16 : VOP1_Real_gfx10<0x061>;
+defm V_SAT_PK_U8_I16 : VOP1_Real_gfx10<0x062>;
+defm V_CVT_NORM_I16_F16 : VOP1_Real_gfx10<0x063>;
+defm V_CVT_NORM_U16_F16 : VOP1_Real_gfx10<0x064>;
+
+defm V_SWAP_B32 : VOP1Only_Real_gfx10<0x065>;
+defm V_SWAPREL_B32 : VOP1Only_Real_gfx10<0x068>;
+
//===----------------------------------------------------------------------===//
-// SI
+// GFX7, GFX10.
//===----------------------------------------------------------------------===//
-multiclass VOP1_Real_si <bits<9> op> {
- let AssemblerPredicates = [isSICI], DecoderNamespace = "SICI" in {
- def _e32_si :
+let AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7" in {
+ multiclass VOP1_Real_e32_gfx7<bits<9> op> {
+ def _e32_gfx7 :
VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.SI>,
VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>;
- def _e64_si :
+ }
+ multiclass VOP1_Real_e64_gfx7<bits<9> op> {
+ def _e64_gfx7 :
VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>,
- VOP3e_si <{1, 1, op{6-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
+ VOP3e_gfx6_gfx7<{1, 1, op{6-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
}
-}
+} // End AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7"
-defm V_NOP : VOP1_Real_si <0x0>;
-defm V_MOV_B32 : VOP1_Real_si <0x1>;
-defm V_CVT_I32_F64 : VOP1_Real_si <0x3>;
-defm V_CVT_F64_I32 : VOP1_Real_si <0x4>;
-defm V_CVT_F32_I32 : VOP1_Real_si <0x5>;
-defm V_CVT_F32_U32 : VOP1_Real_si <0x6>;
-defm V_CVT_U32_F32 : VOP1_Real_si <0x7>;
-defm V_CVT_I32_F32 : VOP1_Real_si <0x8>;
-defm V_MOV_FED_B32 : VOP1_Real_si <0x9>;
-defm V_CVT_F16_F32 : VOP1_Real_si <0xa>;
-defm V_CVT_F32_F16 : VOP1_Real_si <0xb>;
-defm V_CVT_RPI_I32_F32 : VOP1_Real_si <0xc>;
-defm V_CVT_FLR_I32_F32 : VOP1_Real_si <0xd>;
-defm V_CVT_OFF_F32_I4 : VOP1_Real_si <0xe>;
-defm V_CVT_F32_F64 : VOP1_Real_si <0xf>;
-defm V_CVT_F64_F32 : VOP1_Real_si <0x10>;
-defm V_CVT_F32_UBYTE0 : VOP1_Real_si <0x11>;
-defm V_CVT_F32_UBYTE1 : VOP1_Real_si <0x12>;
-defm V_CVT_F32_UBYTE2 : VOP1_Real_si <0x13>;
-defm V_CVT_F32_UBYTE3 : VOP1_Real_si <0x14>;
-defm V_CVT_U32_F64 : VOP1_Real_si <0x15>;
-defm V_CVT_F64_U32 : VOP1_Real_si <0x16>;
-defm V_FRACT_F32 : VOP1_Real_si <0x20>;
-defm V_TRUNC_F32 : VOP1_Real_si <0x21>;
-defm V_CEIL_F32 : VOP1_Real_si <0x22>;
-defm V_RNDNE_F32 : VOP1_Real_si <0x23>;
-defm V_FLOOR_F32 : VOP1_Real_si <0x24>;
-defm V_EXP_F32 : VOP1_Real_si <0x25>;
-defm V_LOG_CLAMP_F32 : VOP1_Real_si <0x26>;
-defm V_LOG_F32 : VOP1_Real_si <0x27>;
-defm V_RCP_CLAMP_F32 : VOP1_Real_si <0x28>;
-defm V_RCP_LEGACY_F32 : VOP1_Real_si <0x29>;
-defm V_RCP_F32 : VOP1_Real_si <0x2a>;
-defm V_RCP_IFLAG_F32 : VOP1_Real_si <0x2b>;
-defm V_RSQ_CLAMP_F32 : VOP1_Real_si <0x2c>;
-defm V_RSQ_LEGACY_F32 : VOP1_Real_si <0x2d>;
-defm V_RSQ_F32 : VOP1_Real_si <0x2e>;
-defm V_RCP_F64 : VOP1_Real_si <0x2f>;
-defm V_RCP_CLAMP_F64 : VOP1_Real_si <0x30>;
-defm V_RSQ_F64 : VOP1_Real_si <0x31>;
-defm V_RSQ_CLAMP_F64 : VOP1_Real_si <0x32>;
-defm V_SQRT_F32 : VOP1_Real_si <0x33>;
-defm V_SQRT_F64 : VOP1_Real_si <0x34>;
-defm V_SIN_F32 : VOP1_Real_si <0x35>;
-defm V_COS_F32 : VOP1_Real_si <0x36>;
-defm V_NOT_B32 : VOP1_Real_si <0x37>;
-defm V_BFREV_B32 : VOP1_Real_si <0x38>;
-defm V_FFBH_U32 : VOP1_Real_si <0x39>;
-defm V_FFBL_B32 : VOP1_Real_si <0x3a>;
-defm V_FFBH_I32 : VOP1_Real_si <0x3b>;
-defm V_FREXP_EXP_I32_F64 : VOP1_Real_si <0x3c>;
-defm V_FREXP_MANT_F64 : VOP1_Real_si <0x3d>;
-defm V_FRACT_F64 : VOP1_Real_si <0x3e>;
-defm V_FREXP_EXP_I32_F32 : VOP1_Real_si <0x3f>;
-defm V_FREXP_MANT_F32 : VOP1_Real_si <0x40>;
-defm V_CLREXCP : VOP1_Real_si <0x41>;
-defm V_MOVRELD_B32 : VOP1_Real_si <0x42>;
-defm V_MOVRELS_B32 : VOP1_Real_si <0x43>;
-defm V_MOVRELSD_B32 : VOP1_Real_si <0x44>;
+multiclass VOP1_Real_gfx7<bits<9> op> :
+ VOP1_Real_e32_gfx7<op>, VOP1_Real_e64_gfx7<op>;
+
+multiclass VOP1_Real_gfx7_gfx10<bits<9> op> :
+ VOP1_Real_gfx7<op>, VOP1_Real_gfx10<op>;
+
+defm V_LOG_LEGACY_F32 : VOP1_Real_gfx7<0x045>;
+defm V_EXP_LEGACY_F32 : VOP1_Real_gfx7<0x046>;
+
+defm V_TRUNC_F64 : VOP1_Real_gfx7_gfx10<0x017>;
+defm V_CEIL_F64 : VOP1_Real_gfx7_gfx10<0x018>;
+defm V_RNDNE_F64 : VOP1_Real_gfx7_gfx10<0x019>;
+defm V_FLOOR_F64 : VOP1_Real_gfx7_gfx10<0x01a>;
//===----------------------------------------------------------------------===//
-// CI
+// GFX6, GFX7, GFX10.
//===----------------------------------------------------------------------===//
-multiclass VOP1_Real_ci <bits<9> op> {
- let AssemblerPredicates = [isCIOnly], DecoderNamespace = "CI" in {
- def _e32_ci :
+let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in {
+ multiclass VOP1_Real_e32_gfx6_gfx7<bits<9> op> {
+ def _e32_gfx6_gfx7 :
VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.SI>,
VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>;
- def _e64_ci :
+ }
+ multiclass VOP1_Real_e64_gfx6_gfx7<bits<9> op> {
+ def _e64_gfx6_gfx7 :
VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>,
- VOP3e_si <{1, 1, op{6-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
+ VOP3e_gfx6_gfx7<{1, 1, op{6-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
}
-}
-
-defm V_TRUNC_F64 : VOP1_Real_ci <0x17>;
-defm V_CEIL_F64 : VOP1_Real_ci <0x18>;
-defm V_FLOOR_F64 : VOP1_Real_ci <0x1A>;
-defm V_RNDNE_F64 : VOP1_Real_ci <0x19>;
-defm V_LOG_LEGACY_F32 : VOP1_Real_ci <0x45>;
-defm V_EXP_LEGACY_F32 : VOP1_Real_ci <0x46>;
+} // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7"
+
+multiclass VOP1_Real_gfx6_gfx7<bits<9> op> :
+ VOP1_Real_e32_gfx6_gfx7<op>, VOP1_Real_e64_gfx6_gfx7<op>;
+
+multiclass VOP1_Real_gfx6_gfx7_gfx10<bits<9> op> :
+ VOP1_Real_gfx6_gfx7<op>, VOP1_Real_gfx10<op>;
+
+multiclass VOP1_Real_gfx6_gfx7_gfx10_no_dpp8<bits<9> op> :
+ VOP1_Real_gfx6_gfx7<op>, VOP1_Real_gfx10_no_dpp8<op>;
+
+multiclass VOP1_Real_gfx6_gfx7_gfx10_no_dpp<bits<9> op> :
+ VOP1_Real_gfx6_gfx7<op>, VOP1_Real_gfx10_no_dpp<op>;
+
+defm V_LOG_CLAMP_F32 : VOP1_Real_gfx6_gfx7<0x026>;
+defm V_RCP_CLAMP_F32 : VOP1_Real_gfx6_gfx7<0x028>;
+defm V_RCP_LEGACY_F32 : VOP1_Real_gfx6_gfx7<0x029>;
+defm V_RSQ_CLAMP_F32 : VOP1_Real_gfx6_gfx7<0x02c>;
+defm V_RSQ_LEGACY_F32 : VOP1_Real_gfx6_gfx7<0x02d>;
+defm V_RCP_CLAMP_F64 : VOP1_Real_gfx6_gfx7<0x030>;
+defm V_RSQ_CLAMP_F64 : VOP1_Real_gfx6_gfx7<0x032>;
+
+defm V_NOP : VOP1_Real_gfx6_gfx7_gfx10<0x000>;
+defm V_MOV_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x001>;
+defm V_CVT_I32_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x003>;
+defm V_CVT_F64_I32 : VOP1_Real_gfx6_gfx7_gfx10<0x004>;
+defm V_CVT_F32_I32 : VOP1_Real_gfx6_gfx7_gfx10<0x005>;
+defm V_CVT_F32_U32 : VOP1_Real_gfx6_gfx7_gfx10<0x006>;
+defm V_CVT_U32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x007>;
+defm V_CVT_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x008>;
+defm V_MOV_FED_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x009>;
+defm V_CVT_F16_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00a>;
+defm V_CVT_F32_F16 : VOP1_Real_gfx6_gfx7_gfx10<0x00b>;
+defm V_CVT_RPI_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00c>;
+defm V_CVT_FLR_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00d>;
+defm V_CVT_OFF_F32_I4 : VOP1_Real_gfx6_gfx7_gfx10<0x00e>;
+defm V_CVT_F32_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x00f>;
+defm V_CVT_F64_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x010>;
+defm V_CVT_F32_UBYTE0 : VOP1_Real_gfx6_gfx7_gfx10<0x011>;
+defm V_CVT_F32_UBYTE1 : VOP1_Real_gfx6_gfx7_gfx10<0x012>;
+defm V_CVT_F32_UBYTE2 : VOP1_Real_gfx6_gfx7_gfx10<0x013>;
+defm V_CVT_F32_UBYTE3 : VOP1_Real_gfx6_gfx7_gfx10<0x014>;
+defm V_CVT_U32_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x015>;
+defm V_CVT_F64_U32 : VOP1_Real_gfx6_gfx7_gfx10<0x016>;
+defm V_FRACT_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x020>;
+defm V_TRUNC_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x021>;
+defm V_CEIL_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x022>;
+defm V_RNDNE_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x023>;
+defm V_FLOOR_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x024>;
+defm V_EXP_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x025>;
+defm V_LOG_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x027>;
+defm V_RCP_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x02a>;
+defm V_RCP_IFLAG_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x02b>;
+defm V_RSQ_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x02e>;
+defm V_RCP_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x02f>;
+defm V_RSQ_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x031>;
+defm V_SQRT_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x033>;
+defm V_SQRT_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x034>;
+defm V_SIN_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x035>;
+defm V_COS_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x036>;
+defm V_NOT_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x037>;
+defm V_BFREV_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x038>;
+defm V_FFBH_U32 : VOP1_Real_gfx6_gfx7_gfx10<0x039>;
+defm V_FFBL_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x03a>;
+defm V_FFBH_I32 : VOP1_Real_gfx6_gfx7_gfx10<0x03b>;
+defm V_FREXP_EXP_I32_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x03c>;
+defm V_FREXP_MANT_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x03d>;
+defm V_FRACT_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x03e>;
+defm V_FREXP_EXP_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x03f>;
+defm V_FREXP_MANT_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x040>;
+defm V_CLREXCP : VOP1_Real_gfx6_gfx7_gfx10<0x041>;
+defm V_MOVRELD_B32 : VOP1_Real_gfx6_gfx7_gfx10_no_dpp<0x042>;
+defm V_MOVRELS_B32 : VOP1_Real_gfx6_gfx7_gfx10_no_dpp8<0x043>;
+defm V_MOVRELSD_B32 : VOP1_Real_gfx6_gfx7_gfx10_no_dpp8<0x044>;
//===----------------------------------------------------------------------===//
-// VI
+// GFX8, GFX9 (VI).
//===----------------------------------------------------------------------===//
class VOP1_DPPe <bits<8> op, VOP1_DPP_Pseudo ps, VOPProfile P = ps.Pfl> :
@@ -524,7 +683,7 @@ class VOP1_DPPe <bits<8> op, VOP1_DPP_Pseudo ps, VOPProfile P = ps.Pfl> :
}
multiclass VOP1Only_Real_vi <bits<10> op> {
- let AssemblerPredicates = [isVI], DecoderNamespace = "VI" in {
+ let AssemblerPredicates = [isGFX8GFX9], DecoderNamespace = "GFX8" in {
def _vi :
VOP1_Real<!cast<VOP1_Pseudo>(NAME), SIEncodingFamily.VI>,
VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME).Pfl>;
@@ -532,7 +691,7 @@ multiclass VOP1Only_Real_vi <bits<10> op> {
}
multiclass VOP1_Real_e32e64_vi <bits<10> op> {
- let AssemblerPredicates = [isVI], DecoderNamespace = "VI" in {
+ let AssemblerPredicates = [isGFX8GFX9], DecoderNamespace = "GFX8" in {
def _e32_vi :
VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.VI>,
VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>;
@@ -649,7 +808,7 @@ def V_MOV_B32_indirect : VPseudoInstSI<(outs),
PseudoInstExpansion<(V_MOV_B32_e32_vi getVALUDstForVT<i32>.ret:$vdst,
getVOPSrc0ForVT<i32>.ret:$src0)> {
let VOP1 = 1;
- let SubtargetPredicate = isVI;
+ let SubtargetPredicate = isGFX8GFX9;
}
// This is a pseudo variant of the v_movreld_b32 instruction in which the
@@ -672,7 +831,7 @@ def V_MOVRELD_B32_V4 : V_MOVRELD_B32_pseudo<VReg_128>;
def V_MOVRELD_B32_V8 : V_MOVRELD_B32_pseudo<VReg_256>;
def V_MOVRELD_B32_V16 : V_MOVRELD_B32_pseudo<VReg_512>;
-let OtherPredicates = [isVI] in {
+let OtherPredicates = [isGFX8GFX9] in {
def : GCNPat <
(i32 (int_amdgcn_mov_dpp i32:$src, imm:$dpp_ctrl, imm:$row_mask, imm:$bank_mask,
@@ -690,6 +849,9 @@ def : GCNPat <
(as_i1imm $bound_ctrl))
>;
+} // End OtherPredicates = [isGFX8GFX9]
+
+let OtherPredicates = [isGFX8Plus] in {
def : GCNPat<
(i32 (anyext i16:$src)),
(COPY $src)
@@ -712,14 +874,14 @@ def : GCNPat <
(EXTRACT_SUBREG $src, sub0)
>;
-} // End OtherPredicates = [isVI]
+} // End OtherPredicates = [isGFX8Plus]
//===----------------------------------------------------------------------===//
// GFX9
//===----------------------------------------------------------------------===//
multiclass VOP1_Real_gfx9 <bits<10> op> {
- let AssemblerPredicates = [isGFX9], DecoderNamespace = "GFX9" in {
+ let AssemblerPredicates = [isGFX9Only], DecoderNamespace = "GFX9" in {
defm NAME : VOP1_Real_e32e64_vi <op>;
}
@@ -735,3 +897,30 @@ multiclass VOP1_Real_gfx9 <bits<10> op> {
}
defm V_SCREEN_PARTITION_4SE_B32 : VOP1_Real_gfx9 <0x37>;
+
+//===----------------------------------------------------------------------===//
+// GFX10
+//===----------------------------------------------------------------------===//
+
+let OtherPredicates = [isGFX10Plus] in {
+def : GCNPat <
+ (i32 (int_amdgcn_mov_dpp8 i32:$src, imm:$dpp8)),
+ (V_MOV_B32_dpp8_gfx10 $src, $src, (as_i32imm $dpp8), (i32 DPP8Mode.FI_0))
+>;
+
+def : GCNPat <
+ (i32 (int_amdgcn_mov_dpp i32:$src, imm:$dpp_ctrl, imm:$row_mask, imm:$bank_mask,
+ imm:$bound_ctrl)),
+ (V_MOV_B32_dpp_gfx10 $src, $src, (as_i32imm $dpp_ctrl),
+ (as_i32imm $row_mask), (as_i32imm $bank_mask),
+ (as_i1imm $bound_ctrl), (i32 0))
+>;
+
+def : GCNPat <
+ (i32 (int_amdgcn_update_dpp i32:$old, i32:$src, imm:$dpp_ctrl, imm:$row_mask,
+ imm:$bank_mask, imm:$bound_ctrl)),
+ (V_MOV_B32_dpp_gfx10 $old, $src, (as_i32imm $dpp_ctrl),
+ (as_i32imm $row_mask), (as_i32imm $bank_mask),
+ (as_i1imm $bound_ctrl), (i32 0))
+>;
+} // End OtherPredicates = [isGFX10Plus]
diff --git a/lib/Target/AMDGPU/VOP2Instructions.td b/lib/Target/AMDGPU/VOP2Instructions.td
index e3fd7b5f9fad..1b30cd2ed516 100644
--- a/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/lib/Target/AMDGPU/VOP2Instructions.td
@@ -1,9 +1,8 @@
//===-- VOP2Instructions.td - Vector Instruction Defintions ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -69,7 +68,6 @@ class VOP2_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], string suf
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
- let SubtargetPredicate = isGCN;
let VOP2 = 1;
let VALU = 1;
@@ -177,7 +175,9 @@ multiclass VOP2bInst <string opName,
let SchedRW = [Write32Bit, WriteSALU] in {
let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC] in {
def _e32 : VOP2_Pseudo <opName, P, VOPPatOrNull<node,P>.ret>,
- Commutable_REV<revOp#"_e32", !eq(revOp, opName)>;
+ Commutable_REV<revOp#"_e32", !eq(revOp, opName)> {
+ let usesCustomInserter = !eq(P.NumSrcArgs, 2);
+ }
def _sdwa : VOP2_SDWA_Pseudo <opName, P> {
let AsmMatchConverter = "cvtSdwaVOP2b";
@@ -192,6 +192,23 @@ multiclass VOP2bInst <string opName,
}
}
+class VOP2bInstAlias <VOP2_Pseudo ps, Instruction inst,
+ string OpName, string opnd> :
+ InstAlias <OpName#" "#!subst("vcc", opnd, ps.Pfl.Asm32),
+ (inst ps.Pfl.DstRC:$vdst, ps.Pfl.Src0RC32:$src0,
+ ps.Pfl.Src1RC32:$src1)>,
+ PredicateControl {
+}
+
+multiclass VOP2bInstAliases<VOP2_Pseudo ps, VOP2_Real inst, string OpName> {
+ let WaveSizePredicate = isWave32 in {
+ def : VOP2bInstAlias<ps, inst, OpName, "vcc_lo">;
+ }
+ let WaveSizePredicate = isWave64 in {
+ def : VOP2bInstAlias<ps, inst, OpName, "vcc">;
+ }
+}
+
multiclass VOP2eInst <string opName,
VOPProfile P,
SDPatternOperator node = null_frag,
@@ -216,6 +233,22 @@ multiclass VOP2eInst <string opName,
}
}
+class VOP2eInstAlias <VOP2_Pseudo ps, Instruction inst, string opnd> :
+ InstAlias <ps.OpName#" "#ps.Pfl.Asm32#", "#opnd,
+ (inst ps.Pfl.DstRC:$vdst, ps.Pfl.Src0RC32:$src0,
+ ps.Pfl.Src1RC32:$src1)>,
+ PredicateControl {
+}
+
+multiclass VOP2eInstAliases<VOP2_Pseudo ps, VOP2_Real inst> {
+ let WaveSizePredicate = isWave32 in {
+ def : VOP2eInstAlias<ps, inst, "vcc_lo">;
+ }
+ let WaveSizePredicate = isWave64 in {
+ def : VOP2eInstAlias<ps, inst, "vcc">;
+ }
+}
+
class VOP_MADAK <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
field Operand ImmOpType = !if(!eq(vt.Size, 32), f32kimm, f16kimm);
field dag Ins32 = (ins VCSrc_f32:$src0, VGPR_32:$src1, ImmOpType:$imm);
@@ -244,15 +277,22 @@ def VOP_MADMK_F32 : VOP_MADMK <f32>;
// FIXME: Remove src2_modifiers. It isn't used, so is wasting memory
// and processing time but it makes it easier to convert to mad.
-class VOP_MAC <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
+class VOP_MAC <ValueType vt0, ValueType vt1=vt0> : VOPProfile <[vt0, vt1, vt1, vt0]> {
let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VGPR_32:$src2);
let Ins64 = getIns64<Src0RC64, Src1RC64, RegisterOperand<VGPR_32>, 3,
- 0, HasModifiers, HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret;
+ 0, HasModifiers, HasModifiers, HasOMod,
+ Src0Mod, Src1Mod, Src2Mod>.ret;
let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0,
Src1ModDPP:$src1_modifiers, Src1DPP:$src1,
VGPR_32:$src2, // stub argument
dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
+ let InsDPP16 = !con(InsDPP, (ins FI:$fi));
+
+ let InsDPP8 = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0,
+ Src1ModDPP:$src1_modifiers, Src1DPP:$src1,
+ VGPR_32:$src2, // stub argument
+ dpp8:$dpp8, FI:$fi);
let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0,
Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1,
@@ -260,11 +300,13 @@ class VOP_MAC <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
clampmod:$clamp, omod:$omod,
dst_sel:$dst_sel, dst_unused:$dst_unused,
src0_sel:$src0_sel, src1_sel:$src1_sel);
- let Asm32 = getAsm32<1, 2, vt>.ret;
- let Asm64 = getAsm64<1, 2, 0, HasModifiers, HasOMod, vt>.ret;
- let AsmDPP = getAsmDPP<1, 2, HasModifiers, vt>.ret;
- let AsmSDWA = getAsmSDWA<1, 2, vt>.ret;
- let AsmSDWA9 = getAsmSDWA9<1, 1, 2, vt>.ret;
+ let Asm32 = getAsm32<1, 2, vt0>.ret;
+ let Asm64 = getAsm64<1, 2, 0, HasModifiers, HasOMod, vt0>.ret;
+ let AsmDPP = getAsmDPP<1, 2, HasModifiers, vt0>.ret;
+ let AsmDPP16 = getAsmDPP16<1, 2, HasModifiers, vt0>.ret;
+ let AsmDPP8 = getAsmDPP8<1, 2, 0, vt0>.ret;
+ let AsmSDWA = getAsmSDWA<1, 2, vt0>.ret;
+ let AsmSDWA9 = getAsmSDWA9<1, 1, 2, vt0>.ret;
let HasSrc2 = 0;
let HasSrc2Mods = 0;
@@ -272,38 +314,51 @@ class VOP_MAC <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
let HasExtDPP = 1;
let HasExtSDWA = 1;
let HasExtSDWA9 = 0;
+ let TieRegDPP = "$src2";
}
def VOP_MAC_F16 : VOP_MAC <f16>;
def VOP_MAC_F32 : VOP_MAC <f32>;
+class VOP_DOT_ACC<ValueType vt0, ValueType vt1> : VOP_MAC<vt0, vt1> {
+ let HasClamp = 0;
+ let HasExtSDWA = 0;
+ let HasModifiers = 1;
+ let HasOpSel = 0;
+ let IsPacked = 0;
+}
+
+def VOP_DOT_ACC_F32_V2F16 : VOP_DOT_ACC<f32, v2f16> {
+ let Src0ModDPP = FPVRegInputMods;
+ let Src1ModDPP = FPVRegInputMods;
+}
+def VOP_DOT_ACC_I32_I32 : VOP_DOT_ACC<i32, i32>;
+
// Write out to vcc or arbitrary SGPR.
-def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped]> {
+def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped], 0, /*EnableClamp=*/1> {
let Asm32 = "$vdst, vcc, $src0, $src1";
- let Asm64 = "$vdst, $sdst, $src0, $src1";
+ let Asm64 = "$vdst, $sdst, $src0, $src1$clamp";
let AsmSDWA = "$vdst, vcc, $src0_modifiers, $src1_modifiers$clamp $dst_sel $dst_unused $src0_sel $src1_sel";
let AsmSDWA9 = "$vdst, vcc, $src0_modifiers, $src1_modifiers$clamp $dst_sel $dst_unused $src0_sel $src1_sel";
let AsmDPP = "$vdst, vcc, $src0, $src1 $dpp_ctrl$row_mask$bank_mask$bound_ctrl";
+ let AsmDPP8 = "$vdst, vcc, $src0, $src1 $dpp8$fi";
+ let AsmDPP16 = AsmDPP#"$fi";
let Outs32 = (outs DstRC:$vdst);
- let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst);
+ let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst);
}
// Write out to vcc or arbitrary SGPR and read in from vcc or
// arbitrary SGPR.
-def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> {
- // We use VCSrc_b32 to exclude literal constants, even though the
- // encoding normally allows them since the implicit VCC use means
- // using one would always violate the constant bus
- // restriction. SGPRs are still allowed because it should
- // technically be possible to use VCC again as src0.
- let Src0RC32 = VCSrc_b32;
+def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1], 0, /*EnableClamp=*/1> {
let Asm32 = "$vdst, vcc, $src0, $src1, vcc";
- let Asm64 = "$vdst, $sdst, $src0, $src1, $src2";
+ let Asm64 = "$vdst, $sdst, $src0, $src1, $src2$clamp";
let AsmSDWA = "$vdst, vcc, $src0_modifiers, $src1_modifiers, vcc $clamp $dst_sel $dst_unused $src0_sel $src1_sel";
let AsmSDWA9 = "$vdst, vcc, $src0_modifiers, $src1_modifiers, vcc $clamp $dst_sel $dst_unused $src0_sel $src1_sel";
let AsmDPP = "$vdst, vcc, $src0, $src1, vcc $dpp_ctrl$row_mask$bank_mask$bound_ctrl";
+ let AsmDPP8 = "$vdst, vcc, $src0, $src1, vcc $dpp8$fi";
+ let AsmDPP16 = AsmDPP#"$fi";
let Outs32 = (outs DstRC:$vdst);
- let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst);
+ let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst);
// Suppress src2 implied by type since the 32-bit encoding uses an
// implicit VCC use.
@@ -320,20 +375,23 @@ def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> {
Src1DPP:$src1,
dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
+ let InsDPP16 = !con(InsDPP, (ins FI:$fi));
+
let HasExt = 1;
let HasExtDPP = 1;
let HasExtSDWA = 1;
let HasExtSDWA9 = 1;
}
-// Read in from vcc or arbitrary SGPR
-def VOP2e_I32_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> {
- let Src0RC32 = VCSrc_b32; // See comment in def VOP2b_I32_I1_I32_I32_I1 above.
- let Asm32 = "$vdst, $src0, $src1, vcc";
- let Asm64 = "$vdst, $src0, $src1, $src2";
+// Read in from vcc or arbitrary SGPR.
+def VOP2e_I32_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1], /*EnableF32SrcMods=*/1> {
+ let Asm32 = "$vdst, $src0, $src1";
+ let Asm64 = "$vdst, $src0_modifiers, $src1_modifiers, $src2";
let AsmSDWA = "$vdst, $src0_modifiers, $src1_modifiers, vcc $clamp $dst_sel $dst_unused $src0_sel $src1_sel";
let AsmSDWA9 = "$vdst, $src0_modifiers, $src1_modifiers, vcc $clamp $dst_sel $dst_unused $src0_sel $src1_sel";
let AsmDPP = "$vdst, $src0, $src1, vcc $dpp_ctrl$row_mask$bank_mask$bound_ctrl";
+ let AsmDPP8 = "$vdst, $src0, $src1, vcc $dpp8$fi";
+ let AsmDPP16 = AsmDPP#"$fi";
let Outs32 = (outs DstRC:$vdst);
let Outs64 = (outs DstRC:$vdst);
@@ -349,10 +407,12 @@ def VOP2e_I32_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> {
src0_sel:$src0_sel, src1_sel:$src1_sel);
let InsDPP = (ins DstRCDPP:$old,
- Src0DPP:$src0,
- Src1DPP:$src1,
+ Src0ModDPP:$src0_modifiers, Src0DPP:$src0,
+ Src1ModDPP:$src1_modifiers, Src1DPP:$src1,
dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
+ let InsDPP16 = !con(InsDPP, (ins FI:$fi));
+
let HasExt = 1;
let HasExtDPP = 1;
let HasExtSDWA = 1;
@@ -362,7 +422,7 @@ def VOP2e_I32_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> {
def VOP_READLANE : VOPProfile<[i32, i32, i32]> {
let Outs32 = (outs SReg_32:$vdst);
let Outs64 = Outs32;
- let Ins32 = (ins VGPR_32:$src0, SCSrc_b32:$src1);
+ let Ins32 = (ins VRegOrLds_32:$src0, SCSrc_b32:$src1);
let Ins64 = Ins32;
let Asm32 = " $vdst, $src0, $src1";
let Asm64 = Asm32;
@@ -393,8 +453,6 @@ def VOP_WRITELANE : VOPProfile<[i32, i32, i32, i32]> {
// VOP2 Instructions
//===----------------------------------------------------------------------===//
-let SubtargetPredicate = isGCN, Predicates = [isGCN] in {
-
defm V_CNDMASK_B32 : VOP2eInst <"v_cndmask_b32", VOP2e_I32_I32_I32_I1>;
def V_MADMK_F32 : VOP2_Pseudo <"v_madmk_f32", VOP_MADMK_F32, []>;
@@ -414,9 +472,9 @@ defm V_MIN_I32 : VOP2Inst <"v_min_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, smin>;
defm V_MAX_I32 : VOP2Inst <"v_max_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, smax>;
defm V_MIN_U32 : VOP2Inst <"v_min_u32", VOP_PAT_GEN<VOP_I32_I32_I32>, umin>;
defm V_MAX_U32 : VOP2Inst <"v_max_u32", VOP_PAT_GEN<VOP_I32_I32_I32>, umax>;
-defm V_LSHRREV_B32 : VOP2Inst <"v_lshrrev_b32", VOP_I32_I32_I32, null_frag, "v_lshr_b32">;
-defm V_ASHRREV_I32 : VOP2Inst <"v_ashrrev_i32", VOP_I32_I32_I32, null_frag, "v_ashr_i32">;
-defm V_LSHLREV_B32 : VOP2Inst <"v_lshlrev_b32", VOP_I32_I32_I32, null_frag, "v_lshl_b32">;
+defm V_LSHRREV_B32 : VOP2Inst <"v_lshrrev_b32", VOP_I32_I32_I32, lshr_rev, "v_lshr_b32">;
+defm V_ASHRREV_I32 : VOP2Inst <"v_ashrrev_i32", VOP_I32_I32_I32, ashr_rev, "v_ashr_i32">;
+defm V_LSHLREV_B32 : VOP2Inst <"v_lshlrev_b32", VOP_I32_I32_I32, lshl_rev, "v_lshl_b32">;
defm V_AND_B32 : VOP2Inst <"v_and_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, and>;
defm V_OR_B32 : VOP2Inst <"v_or_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, or>;
defm V_XOR_B32 : VOP2Inst <"v_xor_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, xor>;
@@ -442,9 +500,9 @@ defm V_SUBBREV_U32 : VOP2bInst <"v_subbrev_u32", VOP2b_I32_I1_I32_I32_I1, null_f
let SubtargetPredicate = HasAddNoCarryInsts in {
-defm V_ADD_U32 : VOP2Inst <"v_add_u32", VOP_I32_I32_I32, null_frag, "v_add_u32", 1>;
-defm V_SUB_U32 : VOP2Inst <"v_sub_u32", VOP_I32_I32_I32, null_frag, "v_sub_u32", 1>;
-defm V_SUBREV_U32 : VOP2Inst <"v_subrev_u32", VOP_I32_I32_I32, null_frag, "v_sub_u32", 1>;
+defm V_ADD_U32 : VOP2Inst <"v_add_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_add_u32", 1>;
+defm V_SUB_U32 : VOP2Inst <"v_sub_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_sub_u32", 1>;
+defm V_SUBREV_U32 : VOP2Inst <"v_subrev_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_sub_u32", 1>;
}
} // End isCommutable = 1
@@ -472,32 +530,20 @@ defm V_CVT_PKRTZ_F16_F32 : VOP2Inst <"v_cvt_pkrtz_f16_f32", VOP_NO_EXT<VOP_V2F16
defm V_CVT_PK_U16_U32 : VOP2Inst <"v_cvt_pk_u16_u32", VOP_NO_EXT<VOP_V2I16_I32_I32>, AMDGPUpk_u16_u32>;
defm V_CVT_PK_I16_I32 : VOP2Inst <"v_cvt_pk_i16_i32", VOP_NO_EXT<VOP_V2I16_I32_I32>, AMDGPUpk_i16_i32>;
-} // End SubtargetPredicate = isGCN, Predicates = [isGCN]
-
-def : GCNPat<
- (AMDGPUadde i32:$src0, i32:$src1, i1:$src2),
- (V_ADDC_U32_e64 $src0, $src1, $src2)
->;
-
-def : GCNPat<
- (AMDGPUsube i32:$src0, i32:$src1, i1:$src2),
- (V_SUBB_U32_e64 $src0, $src1, $src2)
->;
-
-// These instructions only exist on SI and CI
-let SubtargetPredicate = isSICI, Predicates = [isSICI] in {
+let SubtargetPredicate = isGFX6GFX7 in {
defm V_MIN_LEGACY_F32 : VOP2Inst <"v_min_legacy_f32", VOP_F32_F32_F32, AMDGPUfmin_legacy>;
defm V_MAX_LEGACY_F32 : VOP2Inst <"v_max_legacy_f32", VOP_F32_F32_F32, AMDGPUfmax_legacy>;
+} // End SubtargetPredicate = isGFX6GFX7
+let SubtargetPredicate = isGFX6GFX7GFX10 in {
let isCommutable = 1 in {
defm V_MAC_LEGACY_F32 : VOP2Inst <"v_mac_legacy_f32", VOP_F32_F32_F32>;
-defm V_LSHR_B32 : VOP2Inst <"v_lshr_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, srl>;
-defm V_ASHR_I32 : VOP2Inst <"v_ashr_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, sra>;
-defm V_LSHL_B32 : VOP2Inst <"v_lshl_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, shl>;
+defm V_LSHR_B32 : VOP2Inst <"v_lshr_b32", VOP_I32_I32_I32>;
+defm V_ASHR_I32 : VOP2Inst <"v_ashr_i32", VOP_I32_I32_I32>;
+defm V_LSHL_B32 : VOP2Inst <"v_lshl_b32", VOP_I32_I32_I32>;
} // End isCommutable = 1
-
-} // End let SubtargetPredicate = SICI, Predicates = [isSICI]
+} // End SubtargetPredicate = isGFX6GFX7GFX10
class DivergentBinOp<SDPatternOperator Op, VOP_Pseudo Inst> :
GCNPat<
@@ -508,29 +554,29 @@ class DivergentBinOp<SDPatternOperator Op, VOP_Pseudo Inst> :
)
>;
-let AddedComplexity = 1 in {
- def : DivergentBinOp<srl, V_LSHRREV_B32_e64>;
- def : DivergentBinOp<sra, V_ASHRREV_I32_e64>;
- def : DivergentBinOp<shl, V_LSHLREV_B32_e64>;
-}
+class DivergentClampingBinOp<SDPatternOperator Op, VOP_Pseudo Inst> :
+ GCNPat<
+ (getDivergentFrag<Op>.ret Inst.Pfl.Src0VT:$src0, Inst.Pfl.Src1VT:$src1),
+ !if(!cast<Commutable_REV>(Inst).IsOrig,
+ (Inst $src0, $src1, 0),
+ (Inst $src1, $src0, 0)
+ )
+ >;
+
+def : DivergentBinOp<srl, V_LSHRREV_B32_e64>;
+def : DivergentBinOp<sra, V_ASHRREV_I32_e64>;
+def : DivergentBinOp<shl, V_LSHLREV_B32_e64>;
let SubtargetPredicate = HasAddNoCarryInsts in {
- def : DivergentBinOp<add, V_ADD_U32_e32>;
- def : DivergentBinOp<sub, V_SUB_U32_e32>;
- def : DivergentBinOp<sub, V_SUBREV_U32_e32>;
+ def : DivergentClampingBinOp<add, V_ADD_U32_e64>;
+ def : DivergentClampingBinOp<sub, V_SUB_U32_e64>;
}
+let SubtargetPredicate = isGFX6GFX7GFX8GFX9, Predicates = [isGFX6GFX7GFX8GFX9] in {
+def : DivergentClampingBinOp<add, V_ADD_I32_e64>;
+def : DivergentClampingBinOp<sub, V_SUB_I32_e64>;
+}
-def : DivergentBinOp<add, V_ADD_I32_e32>;
-
-def : DivergentBinOp<add, V_ADD_I32_e64>;
-def : DivergentBinOp<sub, V_SUB_I32_e32>;
-
-def : DivergentBinOp<sub, V_SUBREV_I32_e32>;
-
-def : DivergentBinOp<srl, V_LSHRREV_B32_e32>;
-def : DivergentBinOp<sra, V_ASHRREV_I32_e32>;
-def : DivergentBinOp<shl, V_LSHLREV_B32_e32>;
def : DivergentBinOp<adde, V_ADDC_U32_e32>;
def : DivergentBinOp<sube, V_SUBB_U32_e32>;
@@ -604,56 +650,133 @@ defm V_FMAC_F32 : VOP2Inst <"v_fmac_f32", VOP_MAC_F32>;
} // End SubtargetPredicate = HasDLInsts
-// Note: 16-bit instructions produce a 0 result in the high 16-bits.
-multiclass Arithmetic_i16_Pats <SDPatternOperator op, Instruction inst> {
+let Constraints = "$vdst = $src2",
+ DisableEncoding="$src2",
+ isConvertibleToThreeAddress = 1,
+ isCommutable = 1 in {
+ let SubtargetPredicate = HasDot5Insts in
+ defm V_DOT2C_F32_F16 : VOP2Inst_e32<"v_dot2c_f32_f16", VOP_DOT_ACC_F32_V2F16>;
+ let SubtargetPredicate = HasDot6Insts in
+ defm V_DOT4C_I32_I8 : VOP2Inst_e32<"v_dot4c_i32_i8", VOP_DOT_ACC_I32_I32>;
+
+ let SubtargetPredicate = HasDot4Insts in
+ defm V_DOT2C_I32_I16 : VOP2Inst_e32<"v_dot2c_i32_i16", VOP_DOT_ACC_I32_I32>;
+ let SubtargetPredicate = HasDot3Insts in
+ defm V_DOT8C_I32_I4 : VOP2Inst_e32<"v_dot8c_i32_i4", VOP_DOT_ACC_I32_I32>;
+}
+
+let AddedComplexity = 30 in {
+ def : GCNPat<
+ (f32 (AMDGPUfdot2 v2f16:$src0, v2f16:$src1, f32:$src2, (i1 DSTCLAMP.NONE))),
+ (f32 (V_DOT2C_F32_F16_e32 $src0, $src1, $src2))
+ > {
+ let SubtargetPredicate = HasDot5Insts;
+ }
+ def : GCNPat<
+ (i32 (int_amdgcn_sdot4 i32:$src0, i32:$src1, i32:$src2, (i1 DSTCLAMP.NONE))),
+ (i32 (V_DOT4C_I32_I8_e32 $src0, $src1, $src2))
+ > {
+ let SubtargetPredicate = HasDot6Insts;
+ }
+ def : GCNPat<
+ (i32 (int_amdgcn_sdot2 v2i16:$src0, v2i16:$src1, i32:$src2, (i1 DSTCLAMP.NONE))),
+ (i32 (V_DOT2C_I32_I16_e32 $src0, $src1, $src2))
+ > {
+ let SubtargetPredicate = HasDot4Insts;
+ }
+ def : GCNPat<
+ (i32 (int_amdgcn_sdot8 i32:$src0, i32:$src1, i32:$src2, (i1 DSTCLAMP.NONE))),
+ (i32 (V_DOT8C_I32_I4_e32 $src0, $src1, $src2))
+ > {
+ let SubtargetPredicate = HasDot3Insts;
+ }
+} // End AddedComplexity = 30
+
+let SubtargetPredicate = isGFX10Plus in {
+
+def V_FMAMK_F32 : VOP2_Pseudo<"v_fmamk_f32", VOP_MADMK_F32, [], "">;
+let FPDPRounding = 1 in
+def V_FMAMK_F16 : VOP2_Pseudo <"v_fmamk_f16", VOP_MADMK_F16, [], "">;
+
+let isCommutable = 1 in {
+def V_FMAAK_F32 : VOP2_Pseudo<"v_fmaak_f32", VOP_MADAK_F32, [], "">;
+let FPDPRounding = 1 in
+def V_FMAAK_F16 : VOP2_Pseudo <"v_fmaak_f16", VOP_MADAK_F16, [], "">;
+} // End isCommutable = 1
+
+let Constraints = "$vdst = $src2",
+ DisableEncoding="$src2",
+ isConvertibleToThreeAddress = 1,
+ isCommutable = 1 in {
+defm V_FMAC_F16 : VOP2Inst <"v_fmac_f16", VOP_MAC_F16>;
+}
+
+} // End SubtargetPredicate = isGFX10Plus
+
+let SubtargetPredicate = HasPkFmacF16Inst in {
+defm V_PK_FMAC_F16 : VOP2Inst<"v_pk_fmac_f16", VOP_V2F16_V2F16_V2F16>;
+} // End SubtargetPredicate = HasPkFmacF16Inst
+
+// Note: 16-bit instructions produce a 0 result in the high 16-bits
+// on GFX8 and GFX9 and preserve high 16 bits on GFX10+
+def ClearHI16 : OutPatFrag<(ops node:$op),
+ (V_AND_B32_e64 $op, (V_MOV_B32_e32 (i32 0xffff)))>;
+
+multiclass Arithmetic_i16_Pats <SDPatternOperator op, Instruction inst,
+ bit PreservesHI16 = 0> {
def : GCNPat<
(op i16:$src0, i16:$src1),
- (inst $src0, $src1)
+ !if(!eq(PreservesHI16,1), (ClearHI16 (inst $src0, $src1)), (inst $src0, $src1))
>;
def : GCNPat<
(i32 (zext (op i16:$src0, i16:$src1))),
- (inst $src0, $src1)
+ !if(!eq(PreservesHI16,1), (ClearHI16 (inst $src0, $src1)), (inst $src0, $src1))
>;
def : GCNPat<
(i64 (zext (op i16:$src0, i16:$src1))),
(REG_SEQUENCE VReg_64,
- (inst $src0, $src1), sub0,
+ !if(!eq(PreservesHI16,1), (ClearHI16 (inst $src0, $src1)), (inst $src0, $src1)),
+ sub0,
(V_MOV_B32_e32 (i32 0)), sub1)
>;
-
}
-multiclass Bits_OpsRev_i16_Pats <SDPatternOperator op, Instruction inst> {
+multiclass Bits_OpsRev_i16_Pats <SDPatternOperator op, Instruction inst,
+ bit PreservesHI16 = 0> {
def : GCNPat<
(op i16:$src0, i16:$src1),
- (inst $src1, $src0)
+ !if(!eq(PreservesHI16,1), (ClearHI16 (inst $src1, $src0)), (inst $src1, $src0))
>;
def : GCNPat<
(i32 (zext (op i16:$src0, i16:$src1))),
- (inst $src1, $src0)
+ !if(!eq(PreservesHI16,1), (ClearHI16 (inst $src1, $src0)), (inst $src1, $src0))
>;
def : GCNPat<
(i64 (zext (op i16:$src0, i16:$src1))),
(REG_SEQUENCE VReg_64,
- (inst $src1, $src0), sub0,
+ !if(!eq(PreservesHI16,1), (ClearHI16 (inst $src1, $src0)), (inst $src1, $src0)),
+ sub0,
(V_MOV_B32_e32 (i32 0)), sub1)
>;
}
class ZExt_i16_i1_Pat <SDNode ext> : GCNPat <
(i16 (ext i1:$src)),
- (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src)
+ (V_CNDMASK_B32_e64 (i32 0/*src0mod*/), (i32 0/*src0*/),
+ (i32 0/*src1mod*/), (i32 1/*src1*/),
+ $src)
>;
let Predicates = [Has16BitInsts] in {
+let Predicates = [Has16BitInsts, isGFX7GFX8GFX9] in {
defm : Arithmetic_i16_Pats<add, V_ADD_U16_e64>;
defm : Arithmetic_i16_Pats<mul, V_MUL_LO_U16_e64>;
defm : Arithmetic_i16_Pats<sub, V_SUB_U16_e64>;
@@ -661,6 +784,17 @@ defm : Arithmetic_i16_Pats<smin, V_MIN_I16_e64>;
defm : Arithmetic_i16_Pats<smax, V_MAX_I16_e64>;
defm : Arithmetic_i16_Pats<umin, V_MIN_U16_e64>;
defm : Arithmetic_i16_Pats<umax, V_MAX_U16_e64>;
+}
+
+let Predicates = [Has16BitInsts, isGFX10Plus] in {
+defm : Arithmetic_i16_Pats<add, V_ADD_U16_e64, 1>;
+defm : Arithmetic_i16_Pats<mul, V_MUL_LO_U16_e64, 1>;
+defm : Arithmetic_i16_Pats<sub, V_SUB_U16_e64, 1>;
+defm : Arithmetic_i16_Pats<smin, V_MIN_I16_e64, 1>;
+defm : Arithmetic_i16_Pats<smax, V_MAX_I16_e64, 1>;
+defm : Arithmetic_i16_Pats<umin, V_MIN_U16_e64, 1>;
+defm : Arithmetic_i16_Pats<umax, V_MAX_U16_e64, 1>;
+}
def : GCNPat <
(and i16:$src0, i16:$src1),
@@ -677,16 +811,25 @@ def : GCNPat <
(V_XOR_B32_e64 $src0, $src1)
>;
+let Predicates = [Has16BitInsts, isGFX7GFX8GFX9] in {
defm : Bits_OpsRev_i16_Pats<shl, V_LSHLREV_B16_e64>;
defm : Bits_OpsRev_i16_Pats<srl, V_LSHRREV_B16_e64>;
defm : Bits_OpsRev_i16_Pats<sra, V_ASHRREV_I16_e64>;
+}
+
+let Predicates = [Has16BitInsts, isGFX10Plus] in {
+defm : Bits_OpsRev_i16_Pats<shl, V_LSHLREV_B16_e64, 1>;
+defm : Bits_OpsRev_i16_Pats<srl, V_LSHRREV_B16_e64, 1>;
+defm : Bits_OpsRev_i16_Pats<sra, V_ASHRREV_I16_e64, 1>;
+}
def : ZExt_i16_i1_Pat<zext>;
def : ZExt_i16_i1_Pat<anyext>;
def : GCNPat <
(i16 (sext i1:$src)),
- (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src)
+ (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
+ /*src1mod*/(i32 0), /*src1*/(i32 -1), $src)
>;
// Undo sub x, c -> add x, -c canonicalization since c is more likely
@@ -697,105 +840,334 @@ def : GCNPat<
(V_SUB_U16_e64 $src0, NegSubInlineConst16:$src1)
>;
-} // End Predicates = [Has16BitInsts]
+} // End Predicates = [Has16BitInsts, isGFX7GFX8GFX9]
+
//===----------------------------------------------------------------------===//
-// SI
+// Target-specific instruction encodings.
//===----------------------------------------------------------------------===//
-let AssemblerPredicates = [isSICI], DecoderNamespace = "SICI" in {
+class VOP2_DPP<bits<6> op, VOP2_Pseudo ps,
+ string opName = ps.OpName, VOPProfile p = ps.Pfl,
+ bit IsDPP16 = 0> :
+ VOP_DPP<opName, p, IsDPP16> {
+ let hasSideEffects = ps.hasSideEffects;
+ let Defs = ps.Defs;
+ let SchedRW = ps.SchedRW;
+ let Uses = ps.Uses;
-multiclass VOP2_Real_si <bits<6> op> {
- def _si :
- VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.SI>,
- VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>;
+ bits<8> vdst;
+ bits<8> src1;
+ let Inst{8-0} = 0xfa;
+ let Inst{16-9} = !if(p.HasSrc1, src1{7-0}, 0);
+ let Inst{24-17} = !if(p.EmitDst, vdst{7-0}, 0);
+ let Inst{30-25} = op;
+ let Inst{31} = 0x0;
}
-multiclass VOP2_Real_MADK_si <bits<6> op> {
- def _si : VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.SI>,
- VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>;
+class VOP2_DPP16<bits<6> op, VOP2_Pseudo ps,
+ string opName = ps.OpName, VOPProfile p = ps.Pfl> :
+ VOP2_DPP<op, ps, opName, p, 1> {
+ let AssemblerPredicate = !if(p.HasExt, HasDPP16, DisableInst);
+ let SubtargetPredicate = HasDPP16;
}
-multiclass VOP2_Real_e32_si <bits<6> op> {
- def _e32_si :
- VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.SI>,
- VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>;
+class VOP2_DPP8<bits<6> op, VOP2_Pseudo ps,
+ string opName = ps.OpName, VOPProfile p = ps.Pfl> :
+ VOP_DPP8<ps.OpName, p> {
+ let hasSideEffects = ps.hasSideEffects;
+ let Defs = ps.Defs;
+ let SchedRW = ps.SchedRW;
+ let Uses = ps.Uses;
+
+ bits<8> vdst;
+ bits<8> src1;
+
+ let Inst{8-0} = fi;
+ let Inst{16-9} = !if(p.HasSrc1, src1{7-0}, 0);
+ let Inst{24-17} = !if(p.EmitDst, vdst{7-0}, 0);
+ let Inst{30-25} = op;
+ let Inst{31} = 0x0;
+
+ let AssemblerPredicate = !if(p.HasExt, HasDPP8, DisableInst);
+ let SubtargetPredicate = HasDPP8;
}
-multiclass VOP2_Real_e32e64_si <bits<6> op> : VOP2_Real_e32_si<op> {
- def _e64_si :
- VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>,
- VOP3e_si <{1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
-}
-
-multiclass VOP2be_Real_e32e64_si <bits<6> op> : VOP2_Real_e32_si<op> {
- def _e64_si :
- VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>,
- VOP3be_si <{1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
-}
-
-} // End AssemblerPredicates = [isSICI], DecoderNamespace = "SICI"
-
-defm V_CNDMASK_B32 : VOP2_Real_e32e64_si <0x0>;
-defm V_ADD_F32 : VOP2_Real_e32e64_si <0x3>;
-defm V_SUB_F32 : VOP2_Real_e32e64_si <0x4>;
-defm V_SUBREV_F32 : VOP2_Real_e32e64_si <0x5>;
-defm V_MUL_LEGACY_F32 : VOP2_Real_e32e64_si <0x7>;
-defm V_MUL_F32 : VOP2_Real_e32e64_si <0x8>;
-defm V_MUL_I32_I24 : VOP2_Real_e32e64_si <0x9>;
-defm V_MUL_HI_I32_I24 : VOP2_Real_e32e64_si <0xa>;
-defm V_MUL_U32_U24 : VOP2_Real_e32e64_si <0xb>;
-defm V_MUL_HI_U32_U24 : VOP2_Real_e32e64_si <0xc>;
-defm V_MIN_F32 : VOP2_Real_e32e64_si <0xf>;
-defm V_MAX_F32 : VOP2_Real_e32e64_si <0x10>;
-defm V_MIN_I32 : VOP2_Real_e32e64_si <0x11>;
-defm V_MAX_I32 : VOP2_Real_e32e64_si <0x12>;
-defm V_MIN_U32 : VOP2_Real_e32e64_si <0x13>;
-defm V_MAX_U32 : VOP2_Real_e32e64_si <0x14>;
-defm V_LSHRREV_B32 : VOP2_Real_e32e64_si <0x16>;
-defm V_ASHRREV_I32 : VOP2_Real_e32e64_si <0x18>;
-defm V_LSHLREV_B32 : VOP2_Real_e32e64_si <0x1a>;
-defm V_AND_B32 : VOP2_Real_e32e64_si <0x1b>;
-defm V_OR_B32 : VOP2_Real_e32e64_si <0x1c>;
-defm V_XOR_B32 : VOP2_Real_e32e64_si <0x1d>;
-defm V_MAC_F32 : VOP2_Real_e32e64_si <0x1f>;
-defm V_MADMK_F32 : VOP2_Real_MADK_si <0x20>;
-defm V_MADAK_F32 : VOP2_Real_MADK_si <0x21>;
-defm V_ADD_I32 : VOP2be_Real_e32e64_si <0x25>;
-defm V_SUB_I32 : VOP2be_Real_e32e64_si <0x26>;
-defm V_SUBREV_I32 : VOP2be_Real_e32e64_si <0x27>;
-defm V_ADDC_U32 : VOP2be_Real_e32e64_si <0x28>;
-defm V_SUBB_U32 : VOP2be_Real_e32e64_si <0x29>;
-defm V_SUBBREV_U32 : VOP2be_Real_e32e64_si <0x2a>;
-
-defm V_READLANE_B32 : VOP2_Real_si <0x01>;
-
-let InOperandList = (ins SSrc_b32:$src0, SCSrc_b32:$src1, VSrc_b32:$vdst_in) in {
-defm V_WRITELANE_B32 : VOP2_Real_si <0x02>;
-}
-
-defm V_MAC_LEGACY_F32 : VOP2_Real_e32e64_si <0x6>;
-defm V_MIN_LEGACY_F32 : VOP2_Real_e32e64_si <0xd>;
-defm V_MAX_LEGACY_F32 : VOP2_Real_e32e64_si <0xe>;
-defm V_LSHR_B32 : VOP2_Real_e32e64_si <0x15>;
-defm V_ASHR_I32 : VOP2_Real_e32e64_si <0x17>;
-defm V_LSHL_B32 : VOP2_Real_e32e64_si <0x19>;
-
-defm V_BFM_B32 : VOP2_Real_e32e64_si <0x1e>;
-defm V_BCNT_U32_B32 : VOP2_Real_e32e64_si <0x22>;
-defm V_MBCNT_LO_U32_B32 : VOP2_Real_e32e64_si <0x23>;
-defm V_MBCNT_HI_U32_B32 : VOP2_Real_e32e64_si <0x24>;
-defm V_LDEXP_F32 : VOP2_Real_e32e64_si <0x2b>;
-defm V_CVT_PKACCUM_U8_F32 : VOP2_Real_e32e64_si <0x2c>;
-defm V_CVT_PKNORM_I16_F32 : VOP2_Real_e32e64_si <0x2d>;
-defm V_CVT_PKNORM_U16_F32 : VOP2_Real_e32e64_si <0x2e>;
-defm V_CVT_PKRTZ_F16_F32 : VOP2_Real_e32e64_si <0x2f>;
-defm V_CVT_PK_U16_U32 : VOP2_Real_e32e64_si <0x30>;
-defm V_CVT_PK_I16_I32 : VOP2_Real_e32e64_si <0x31>;
+//===----------------------------------------------------------------------===//
+// GFX10.
+//===----------------------------------------------------------------------===//
+
+let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
+ //===------------------------------- VOP2 -------------------------------===//
+ multiclass VOP2Only_Real_MADK_gfx10<bits<6> op> {
+ def _gfx10 :
+ VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.GFX10>,
+ VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>;
+ }
+ multiclass VOP2Only_Real_MADK_gfx10_with_name<bits<6> op, string opName,
+ string asmName> {
+ def _gfx10 :
+ VOP2_Real<!cast<VOP2_Pseudo>(opName), SIEncodingFamily.GFX10>,
+ VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(opName).Pfl> {
+ VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName);
+ let AsmString = asmName # ps.AsmOperands;
+ }
+ }
+ multiclass VOP2_Real_e32_gfx10<bits<6> op> {
+ def _e32_gfx10 :
+ VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX10>,
+ VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>;
+ }
+ multiclass VOP2_Real_e64_gfx10<bits<6> op> {
+ def _e64_gfx10 :
+ VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>,
+ VOP3e_gfx10<{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
+ }
+ multiclass VOP2_Real_sdwa_gfx10<bits<6> op> {
+ def _sdwa_gfx10 :
+ VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>,
+ VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl> {
+ let DecoderNamespace = "SDWA10";
+ }
+ }
+ multiclass VOP2_Real_dpp_gfx10<bits<6> op> {
+ def _dpp_gfx10 : VOP2_DPP16<op, !cast<VOP2_Pseudo>(NAME#"_e32")> {
+ let DecoderNamespace = "SDWA10";
+ }
+ }
+ multiclass VOP2_Real_dpp8_gfx10<bits<6> op> {
+ def _dpp8_gfx10 : VOP2_DPP8<op, !cast<VOP2_Pseudo>(NAME#"_e32")> {
+ let DecoderNamespace = "DPP8";
+ }
+ }
+
+ //===------------------------- VOP2 (with name) -------------------------===//
+ multiclass VOP2_Real_e32_gfx10_with_name<bits<6> op, string opName,
+ string asmName> {
+ def _e32_gfx10 :
+ VOP2_Real<!cast<VOP2_Pseudo>(opName#"_e32"), SIEncodingFamily.GFX10>,
+ VOP2e<op{5-0}, !cast<VOP2_Pseudo>(opName#"_e32").Pfl> {
+ VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName#"_e32");
+ let AsmString = asmName # ps.AsmOperands;
+ }
+ }
+ multiclass VOP2_Real_e64_gfx10_with_name<bits<6> op, string opName,
+ string asmName> {
+ def _e64_gfx10 :
+ VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.GFX10>,
+ VOP3e_gfx10<{0, 1, 0, 0, op{5-0}},
+ !cast<VOP3_Pseudo>(opName#"_e64").Pfl> {
+ VOP3_Pseudo ps = !cast<VOP3_Pseudo>(opName#"_e64");
+ let AsmString = asmName # ps.AsmOperands;
+ }
+ }
+ let DecoderNamespace = "SDWA10" in {
+ multiclass VOP2_Real_sdwa_gfx10_with_name<bits<6> op, string opName,
+ string asmName> {
+ def _sdwa_gfx10 :
+ VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>,
+ VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> {
+ VOP2_SDWA_Pseudo ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa");
+ let AsmString = asmName # ps.AsmOperands;
+ }
+ }
+ multiclass VOP2_Real_dpp_gfx10_with_name<bits<6> op, string opName,
+ string asmName> {
+ def _dpp_gfx10 : VOP2_DPP16<op, !cast<VOP2_Pseudo>(opName#"_e32")> {
+ VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName#"_e32");
+ let AsmString = asmName # ps.Pfl.AsmDPP16;
+ }
+ }
+ multiclass VOP2_Real_dpp8_gfx10_with_name<bits<6> op, string opName,
+ string asmName> {
+ def _dpp8_gfx10 : VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> {
+ VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName#"_e32");
+ let AsmString = asmName # ps.Pfl.AsmDPP8;
+ let DecoderNamespace = "DPP8";
+ }
+ }
+ } // End DecoderNamespace = "SDWA10"
+
+ //===------------------------------ VOP2be ------------------------------===//
+ multiclass VOP2be_Real_gfx10<bits<6> op, string opName, string asmName> {
+ def _e32_gfx10 :
+ VOP2_Real<!cast<VOP2_Pseudo>(opName#"_e32"), SIEncodingFamily.GFX10>,
+ VOP2e<op{5-0}, !cast<VOP2_Pseudo>(opName#"_e32").Pfl> {
+ VOP2_Pseudo Ps = !cast<VOP2_Pseudo>(opName#"_e32");
+ let AsmString = asmName # !subst(", vcc", "", Ps.AsmOperands);
+ }
+ def _e64_gfx10 :
+ VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.GFX10>,
+ VOP3be_gfx10<{0, 1, 0, 0, op{5-0}},
+ !cast<VOP3_Pseudo>(opName#"_e64").Pfl> {
+ VOP3_Pseudo Ps = !cast<VOP3_Pseudo>(opName#"_e64");
+ let AsmString = asmName # Ps.AsmOperands;
+ }
+ def _sdwa_gfx10 :
+ VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>,
+ VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> {
+ VOP2_SDWA_Pseudo Ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa");
+ let AsmString = asmName # !subst(", vcc", "", Ps.AsmOperands);
+ let DecoderNamespace = "SDWA10";
+ }
+ def _dpp_gfx10 :
+ VOP2_DPP16<op, !cast<VOP2_Pseudo>(opName#"_e32"), asmName> {
+ string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16;
+ let AsmString = asmName # !subst(", vcc", "", AsmDPP);
+ let DecoderNamespace = "SDWA10";
+ }
+ def _dpp8_gfx10 :
+ VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32"), asmName> {
+ string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8;
+ let AsmString = asmName # !subst(", vcc", "", AsmDPP8);
+ let DecoderNamespace = "DPP8";
+ }
+
+ let WaveSizePredicate = isWave32 in {
+ def _sdwa_w32_gfx10 :
+ Base_VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>,
+ VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> {
+ VOP2_SDWA_Pseudo Ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa");
+ let AsmString = asmName # !subst("vcc", "vcc_lo", Ps.AsmOperands);
+ let isAsmParserOnly = 1;
+ let DecoderNamespace = "SDWA10";
+ }
+ def _dpp_w32_gfx10 :
+ VOP2_DPP16<op, !cast<VOP2_Pseudo>(opName#"_e32"), asmName> {
+ string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16;
+ let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP);
+ let isAsmParserOnly = 1;
+ }
+ def _dpp8_w32_gfx10 :
+ VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32"), asmName> {
+ string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8;
+ let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP8);
+ let isAsmParserOnly = 1;
+ }
+ } // End WaveSizePredicate = isWave32
+
+ let WaveSizePredicate = isWave64 in {
+ def _sdwa_w64_gfx10 :
+ Base_VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>,
+ VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> {
+ VOP2_SDWA_Pseudo Ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa");
+ let AsmString = asmName # Ps.AsmOperands;
+ let isAsmParserOnly = 1;
+ let DecoderNamespace = "SDWA10";
+ }
+ def _dpp_w64_gfx10 :
+ VOP2_DPP16<op, !cast<VOP2_Pseudo>(opName#"_e32"), asmName> {
+ string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16;
+ let AsmString = asmName # AsmDPP;
+ let isAsmParserOnly = 1;
+ }
+ def _dpp8_w64_gfx10 :
+ VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32"), asmName> {
+ string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8;
+ let AsmString = asmName # AsmDPP8;
+ let isAsmParserOnly = 1;
+ }
+ } // End WaveSizePredicate = isWave64
+ }
+ //===----------------------------- VOP3Only -----------------------------===//
+ multiclass VOP3Only_Real_gfx10<bits<10> op> {
+ def _e64_gfx10 :
+ VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>,
+ VOP3e_gfx10<op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
+ }
+
+ //===---------------------------- VOP3beOnly ----------------------------===//
+ multiclass VOP3beOnly_Real_gfx10<bits<10> op, string opName, string asmName> {
+ def _e64_gfx10 :
+ VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.GFX10>,
+ VOP3be_gfx10<op, !cast<VOP3_Pseudo>(opName#"_e64").Pfl> {
+ VOP3_Pseudo Ps = !cast<VOP3_Pseudo>(opName#"_e64");
+ let AsmString = asmName # Ps.AsmOperands;
+ }
+ }
+} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10"
+
+multiclass Base_VOP2_Real_gfx10<bits<6> op> :
+ VOP2_Real_e32_gfx10<op>, VOP2_Real_e64_gfx10<op>;
+
+multiclass VOP2_Real_gfx10<bits<6> op> :
+ VOP2_Real_e32_gfx10<op>, VOP2_Real_e64_gfx10<op>,
+ VOP2_Real_sdwa_gfx10<op>, VOP2_Real_dpp_gfx10<op>, VOP2_Real_dpp8_gfx10<op>;
+
+multiclass VOP2_Real_gfx10_with_name<bits<6> op, string opName,
+ string asmName> :
+ VOP2_Real_e32_gfx10_with_name<op, opName, asmName>,
+ VOP2_Real_e64_gfx10_with_name<op, opName, asmName>,
+ VOP2_Real_sdwa_gfx10_with_name<op, opName, asmName>,
+ VOP2_Real_dpp_gfx10_with_name<op, opName, asmName>,
+ VOP2_Real_dpp8_gfx10_with_name<op, opName, asmName>;
+
+defm V_CNDMASK_B32 : Base_VOP2_Real_gfx10<0x001>;
+defm V_XNOR_B32 : VOP2_Real_gfx10<0x01e>;
+defm V_FMAC_F32 : VOP2_Real_gfx10<0x02b>;
+defm V_FMAMK_F32 : VOP2Only_Real_MADK_gfx10<0x02c>;
+defm V_FMAAK_F32 : VOP2Only_Real_MADK_gfx10<0x02d>;
+defm V_ADD_F16 : VOP2_Real_gfx10<0x032>;
+defm V_SUB_F16 : VOP2_Real_gfx10<0x033>;
+defm V_SUBREV_F16 : VOP2_Real_gfx10<0x034>;
+defm V_MUL_F16 : VOP2_Real_gfx10<0x035>;
+defm V_FMAC_F16 : VOP2_Real_gfx10<0x036>;
+defm V_FMAMK_F16 : VOP2Only_Real_MADK_gfx10<0x037>;
+defm V_FMAAK_F16 : VOP2Only_Real_MADK_gfx10<0x038>;
+defm V_MAX_F16 : VOP2_Real_gfx10<0x039>;
+defm V_MIN_F16 : VOP2_Real_gfx10<0x03a>;
+defm V_LDEXP_F16 : VOP2_Real_gfx10<0x03b>;
+defm V_PK_FMAC_F16 : VOP2_Real_e32_gfx10<0x03c>;
+
+// VOP2 no carry-in, carry-out.
+defm V_ADD_NC_U32 :
+ VOP2_Real_gfx10_with_name<0x025, "V_ADD_U32", "v_add_nc_u32">;
+defm V_SUB_NC_U32 :
+ VOP2_Real_gfx10_with_name<0x026, "V_SUB_U32", "v_sub_nc_u32">;
+defm V_SUBREV_NC_U32 :
+ VOP2_Real_gfx10_with_name<0x027, "V_SUBREV_U32", "v_subrev_nc_u32">;
+
+// VOP2 carry-in, carry-out.
+defm V_ADD_CO_CI_U32 :
+ VOP2be_Real_gfx10<0x028, "V_ADDC_U32", "v_add_co_ci_u32">;
+defm V_SUB_CO_CI_U32 :
+ VOP2be_Real_gfx10<0x029, "V_SUBB_U32", "v_sub_co_ci_u32">;
+defm V_SUBREV_CO_CI_U32 :
+ VOP2be_Real_gfx10<0x02a, "V_SUBBREV_U32", "v_subrev_co_ci_u32">;
+
+// VOP3 only.
+defm V_BFM_B32 : VOP3Only_Real_gfx10<0x363>;
+defm V_BCNT_U32_B32 : VOP3Only_Real_gfx10<0x364>;
+defm V_MBCNT_LO_U32_B32 : VOP3Only_Real_gfx10<0x365>;
+defm V_MBCNT_HI_U32_B32 : VOP3Only_Real_gfx10<0x366>;
+defm V_LDEXP_F32 : VOP3Only_Real_gfx10<0x362>;
+defm V_CVT_PKNORM_I16_F32 : VOP3Only_Real_gfx10<0x368>;
+defm V_CVT_PKNORM_U16_F32 : VOP3Only_Real_gfx10<0x369>;
+defm V_CVT_PK_U16_U32 : VOP3Only_Real_gfx10<0x36a>;
+defm V_CVT_PK_I16_I32 : VOP3Only_Real_gfx10<0x36b>;
+
+// VOP3 carry-in, carry-out.
+defm V_ADD_CO_U32 :
+ VOP3beOnly_Real_gfx10<0x30f, "V_ADD_I32", "v_add_co_u32">;
+defm V_SUB_CO_U32 :
+ VOP3beOnly_Real_gfx10<0x310, "V_SUB_I32", "v_sub_co_u32">;
+defm V_SUBREV_CO_U32 :
+ VOP3beOnly_Real_gfx10<0x319, "V_SUBREV_I32", "v_subrev_co_u32">;
+
+let SubtargetPredicate = isGFX10Plus in {
+ defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_gfx10>;
+
+ defm : VOP2bInstAliases<
+ V_ADDC_U32_e32, V_ADD_CO_CI_U32_e32_gfx10, "v_add_co_ci_u32">;
+ defm : VOP2bInstAliases<
+ V_SUBB_U32_e32, V_SUB_CO_CI_U32_e32_gfx10, "v_sub_co_ci_u32">;
+ defm : VOP2bInstAliases<
+ V_SUBBREV_U32_e32, V_SUBREV_CO_CI_U32_e32_gfx10, "v_subrev_co_ci_u32">;
+} // End SubtargetPredicate = isGFX10Plus
//===----------------------------------------------------------------------===//
-// VI
+// GFX6, GFX7, GFX10.
//===----------------------------------------------------------------------===//
class VOP2_DPPe <bits<6> op, VOP2_DPP_Pseudo ps, VOPProfile P = ps.Pfl> :
@@ -809,7 +1181,111 @@ class VOP2_DPPe <bits<6> op, VOP2_DPP_Pseudo ps, VOPProfile P = ps.Pfl> :
let Inst{31} = 0x0; //encoding
}
-let AssemblerPredicates = [isVI], DecoderNamespace = "VI" in {
+let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in {
+ multiclass VOP2Only_Real_gfx6_gfx7<bits<6> op> {
+ def _gfx6_gfx7 :
+ VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.SI>,
+ VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>;
+ }
+ multiclass VOP2Only_Real_MADK_gfx6_gfx7<bits<6> op> {
+ def _gfx6_gfx7 :
+ VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.SI>,
+ VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>;
+ }
+ multiclass VOP2_Real_e32_gfx6_gfx7<bits<6> op> {
+ def _e32_gfx6_gfx7 :
+ VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.SI>,
+ VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>;
+ }
+ multiclass VOP2_Real_e64_gfx6_gfx7<bits<6> op> {
+ def _e64_gfx6_gfx7 :
+ VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>,
+ VOP3e_gfx6_gfx7<{1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
+ }
+ multiclass VOP2be_Real_e64_gfx6_gfx7<bits<6> op> {
+ def _e64_gfx6_gfx7 :
+ VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>,
+ VOP3be_gfx6_gfx7<{1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
+ }
+} // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7"
+
+multiclass VOP2Only_Real_MADK_gfx6_gfx7_gfx10<bits<6> op> :
+ VOP2Only_Real_MADK_gfx6_gfx7<op>, VOP2Only_Real_MADK_gfx10<op>;
+
+multiclass VOP2_Real_gfx6_gfx7<bits<6> op> :
+ VOP2_Real_e32_gfx6_gfx7<op>, VOP2_Real_e64_gfx6_gfx7<op>;
+
+multiclass VOP2_Real_gfx6_gfx7_gfx10<bits<6> op> :
+ VOP2_Real_gfx6_gfx7<op>, VOP2_Real_gfx10<op>;
+
+multiclass VOP2be_Real_gfx6_gfx7<bits<6> op> :
+ VOP2_Real_e32_gfx6_gfx7<op>, VOP2be_Real_e64_gfx6_gfx7<op>;
+
+defm V_CNDMASK_B32 : VOP2_Real_gfx6_gfx7<0x000>;
+defm V_MIN_LEGACY_F32 : VOP2_Real_gfx6_gfx7<0x00d>;
+defm V_MAX_LEGACY_F32 : VOP2_Real_gfx6_gfx7<0x00e>;
+defm V_LSHR_B32 : VOP2_Real_gfx6_gfx7<0x015>;
+defm V_ASHR_I32 : VOP2_Real_gfx6_gfx7<0x017>;
+defm V_LSHL_B32 : VOP2_Real_gfx6_gfx7<0x019>;
+defm V_BFM_B32 : VOP2_Real_gfx6_gfx7<0x01e>;
+defm V_BCNT_U32_B32 : VOP2_Real_gfx6_gfx7<0x022>;
+defm V_MBCNT_LO_U32_B32 : VOP2_Real_gfx6_gfx7<0x023>;
+defm V_MBCNT_HI_U32_B32 : VOP2_Real_gfx6_gfx7<0x024>;
+defm V_LDEXP_F32 : VOP2_Real_gfx6_gfx7<0x02b>;
+defm V_CVT_PKACCUM_U8_F32 : VOP2_Real_gfx6_gfx7<0x02c>;
+defm V_CVT_PKNORM_I16_F32 : VOP2_Real_gfx6_gfx7<0x02d>;
+defm V_CVT_PKNORM_U16_F32 : VOP2_Real_gfx6_gfx7<0x02e>;
+defm V_CVT_PK_U16_U32 : VOP2_Real_gfx6_gfx7<0x030>;
+defm V_CVT_PK_I16_I32 : VOP2_Real_gfx6_gfx7<0x031>;
+defm V_ADD_I32 : VOP2be_Real_gfx6_gfx7<0x025>;
+defm V_SUB_I32 : VOP2be_Real_gfx6_gfx7<0x026>;
+defm V_SUBREV_I32 : VOP2be_Real_gfx6_gfx7<0x027>;
+defm V_ADDC_U32 : VOP2be_Real_gfx6_gfx7<0x028>;
+defm V_SUBB_U32 : VOP2be_Real_gfx6_gfx7<0x029>;
+defm V_SUBBREV_U32 : VOP2be_Real_gfx6_gfx7<0x02a>;
+
+defm V_READLANE_B32 : VOP2Only_Real_gfx6_gfx7<0x001>;
+
+let InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VSrc_b32:$vdst_in) in {
+ defm V_WRITELANE_B32 : VOP2Only_Real_gfx6_gfx7<0x002>;
+} // End InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VSrc_b32:$vdst_in)
+
+let SubtargetPredicate = isGFX6GFX7 in {
+ defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_gfx6_gfx7>;
+} // End SubtargetPredicate = isGFX6GFX7
+
+defm V_ADD_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x003>;
+defm V_SUB_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x004>;
+defm V_SUBREV_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x005>;
+defm V_MAC_LEGACY_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x006>;
+defm V_MUL_LEGACY_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x007>;
+defm V_MUL_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x008>;
+defm V_MUL_I32_I24 : VOP2_Real_gfx6_gfx7_gfx10<0x009>;
+defm V_MUL_HI_I32_I24 : VOP2_Real_gfx6_gfx7_gfx10<0x00a>;
+defm V_MUL_U32_U24 : VOP2_Real_gfx6_gfx7_gfx10<0x00b>;
+defm V_MUL_HI_U32_U24 : VOP2_Real_gfx6_gfx7_gfx10<0x00c>;
+defm V_MIN_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x00f>;
+defm V_MAX_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x010>;
+defm V_MIN_I32 : VOP2_Real_gfx6_gfx7_gfx10<0x011>;
+defm V_MAX_I32 : VOP2_Real_gfx6_gfx7_gfx10<0x012>;
+defm V_MIN_U32 : VOP2_Real_gfx6_gfx7_gfx10<0x013>;
+defm V_MAX_U32 : VOP2_Real_gfx6_gfx7_gfx10<0x014>;
+defm V_LSHRREV_B32 : VOP2_Real_gfx6_gfx7_gfx10<0x016>;
+defm V_ASHRREV_I32 : VOP2_Real_gfx6_gfx7_gfx10<0x018>;
+defm V_LSHLREV_B32 : VOP2_Real_gfx6_gfx7_gfx10<0x01a>;
+defm V_AND_B32 : VOP2_Real_gfx6_gfx7_gfx10<0x01b>;
+defm V_OR_B32 : VOP2_Real_gfx6_gfx7_gfx10<0x01c>;
+defm V_XOR_B32 : VOP2_Real_gfx6_gfx7_gfx10<0x01d>;
+defm V_MAC_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x01f>;
+defm V_CVT_PKRTZ_F16_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x02f>;
+defm V_MADMK_F32 : VOP2Only_Real_MADK_gfx6_gfx7_gfx10<0x020>;
+defm V_MADAK_F32 : VOP2Only_Real_MADK_gfx6_gfx7_gfx10<0x021>;
+
+//===----------------------------------------------------------------------===//
+// GFX8, GFX9 (VI).
+//===----------------------------------------------------------------------===//
+
+let AssemblerPredicates = [isGFX8GFX9], DecoderNamespace = "GFX8" in {
multiclass VOP2_Real_MADK_vi <bits<6> op> {
def _vi : VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.VI>,
@@ -843,7 +1319,7 @@ multiclass Base_VOP2_Real_e32e64_vi <bits<6> op> :
VOP2_Real_e32_vi<op>,
VOP2_Real_e64_vi<{0, 1, 0, 0, op{5-0}}>;
-} // End AssemblerPredicates = [isVI], DecoderNamespace = "VI"
+} // End AssemblerPredicates = [isGFX8GFX9], DecoderNamespace = "GFX8"
multiclass VOP2_SDWA_Real <bits<6> op> {
def _sdwa_vi :
@@ -857,7 +1333,7 @@ multiclass VOP2_SDWA9_Real <bits<6> op> {
VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl>;
}
-let AssemblerPredicates = [isVIOnly] in {
+let AssemblerPredicates = [isGFX8Only] in {
multiclass VOP2be_Real_e32e64_vi_only <bits<6> op, string OpName, string AsmName> {
def _e32_vi :
@@ -865,14 +1341,14 @@ multiclass VOP2be_Real_e32e64_vi_only <bits<6> op, string OpName, string AsmName
VOP2e<op{5-0}, !cast<VOP2_Pseudo>(OpName#"_e32").Pfl> {
VOP2_Pseudo ps = !cast<VOP2_Pseudo>(OpName#"_e32");
let AsmString = AsmName # ps.AsmOperands;
- let DecoderNamespace = "VI";
+ let DecoderNamespace = "GFX8";
}
def _e64_vi :
VOP3_Real<!cast<VOP3_Pseudo>(OpName#"_e64"), SIEncodingFamily.VI>,
VOP3be_vi <{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(OpName#"_e64").Pfl> {
VOP3_Pseudo ps = !cast<VOP3_Pseudo>(OpName#"_e64");
let AsmString = AsmName # ps.AsmOperands;
- let DecoderNamespace = "VI";
+ let DecoderNamespace = "GFX8";
}
def _sdwa_vi :
VOP_SDWA_Real <!cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa")>,
@@ -890,7 +1366,7 @@ multiclass VOP2be_Real_e32e64_vi_only <bits<6> op, string OpName, string AsmName
}
}
-let AssemblerPredicates = [isGFX9] in {
+let AssemblerPredicates = [isGFX9Only] in {
multiclass VOP2be_Real_e32e64_gfx9 <bits<6> op, string OpName, string AsmName> {
def _e32_gfx9 :
@@ -946,7 +1422,7 @@ multiclass VOP2_Real_e32e64_gfx9 <bits<6> op> {
}
}
-} // AssemblerPredicates = [isGFX9]
+} // AssemblerPredicates = [isGFX9Only]
multiclass VOP2_Real_e32e64_vi <bits<6> op> :
Base_VOP2_Real_e32e64_vi<op>, VOP2_SDWA_Real<op>, VOP2_SDWA9_Real<op> {
@@ -1035,7 +1511,7 @@ defm V_MIN_U16 : VOP2_Real_e32e64_vi <0x31>;
defm V_MIN_I16 : VOP2_Real_e32e64_vi <0x32>;
defm V_LDEXP_F16 : VOP2_Real_e32e64_vi <0x33>;
-let SubtargetPredicate = isVI in {
+let SubtargetPredicate = isGFX8GFX9 in {
// Aliases to simplify matching of floating-point instructions that
// are VOP2 on SI and VOP3 on VI.
@@ -1055,7 +1531,20 @@ def : SI2_VI3Alias <"v_cvt_pknorm_i16_f32", V_CVT_PKNORM_I16_F32_e64_vi>;
def : SI2_VI3Alias <"v_cvt_pknorm_u16_f32", V_CVT_PKNORM_U16_F32_e64_vi>;
def : SI2_VI3Alias <"v_cvt_pkrtz_f16_f32", V_CVT_PKRTZ_F16_F32_e64_vi>;
-} // End SubtargetPredicate = isVI
+defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_vi>;
+
+} // End SubtargetPredicate = isGFX8GFX9
+
+let SubtargetPredicate = isGFX9Only in {
+
+defm : VOP2bInstAliases<V_ADD_I32_e32, V_ADD_CO_U32_e32_gfx9, "v_add_co_u32">;
+defm : VOP2bInstAliases<V_ADDC_U32_e32, V_ADDC_CO_U32_e32_gfx9, "v_addc_co_u32">;
+defm : VOP2bInstAliases<V_SUB_I32_e32, V_SUB_CO_U32_e32_gfx9, "v_sub_co_u32">;
+defm : VOP2bInstAliases<V_SUBB_U32_e32, V_SUBB_CO_U32_e32_gfx9, "v_subb_co_u32">;
+defm : VOP2bInstAliases<V_SUBREV_I32_e32, V_SUBREV_CO_U32_e32_gfx9, "v_subrev_co_u32">;
+defm : VOP2bInstAliases<V_SUBBREV_U32_e32, V_SUBBREV_CO_U32_e32_gfx9, "v_subbrev_co_u32">;
+
+} // End SubtargetPredicate = isGFX9Only
let SubtargetPredicate = HasDLInsts in {
@@ -1063,3 +1552,35 @@ defm V_FMAC_F32 : VOP2_Real_e32e64_vi <0x3b>;
defm V_XNOR_B32 : VOP2_Real_e32e64_vi <0x3d>;
} // End SubtargetPredicate = HasDLInsts
+
+multiclass VOP2_Real_DOT_ACC_gfx9<bits<6> op> : VOP2_Real_e32_vi<op> {
+ def _dpp : VOP2_DPP<op, !cast<VOP2_Pseudo>(NAME#"_e32")>;
+}
+
+multiclass VOP2_Real_DOT_ACC_gfx10<bits<6> op> :
+ VOP2_Real_e32_gfx10<op>,
+ VOP2_Real_dpp_gfx10<op>,
+ VOP2_Real_dpp8_gfx10<op>;
+
+let SubtargetPredicate = HasDot5Insts in {
+ defm V_DOT2C_F32_F16 : VOP2_Real_DOT_ACC_gfx9<0x37>;
+ // NB: Opcode conflicts with V_DOT8C_I32_I4
+ // This opcode exists in gfx 10.1* only
+ defm V_DOT2C_F32_F16 : VOP2_Real_DOT_ACC_gfx10<0x02>;
+}
+
+let SubtargetPredicate = HasDot6Insts in {
+ defm V_DOT4C_I32_I8 : VOP2_Real_DOT_ACC_gfx9<0x39>;
+ defm V_DOT4C_I32_I8 : VOP2_Real_DOT_ACC_gfx10<0x0d>;
+}
+
+let SubtargetPredicate = HasDot4Insts in {
+ defm V_DOT2C_I32_I16 : VOP2_Real_DOT_ACC_gfx9<0x38>;
+}
+let SubtargetPredicate = HasDot3Insts in {
+ defm V_DOT8C_I32_I4 : VOP2_Real_DOT_ACC_gfx9<0x3a>;
+}
+
+let SubtargetPredicate = HasPkFmacF16Inst in {
+defm V_PK_FMAC_F16 : VOP2_Real_e32_vi<0x3c>;
+} // End SubtargetPredicate = HasPkFmacF16Inst
diff --git a/lib/Target/AMDGPU/VOP3Instructions.td b/lib/Target/AMDGPU/VOP3Instructions.td
index 4b8c1f208a0e..21dbef9240e1 100644
--- a/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/lib/Target/AMDGPU/VOP3Instructions.td
@@ -1,9 +1,8 @@
//===-- VOP3Instructions.td - Vector Instruction Defintions ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -111,6 +110,11 @@ class getVOP3ClampPat<VOPProfile P, SDPatternOperator node> {
ret1));
}
+class getVOP3MAIPat<VOPProfile P, SDPatternOperator node> {
+ list<dag> ret = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2,
+ imm:$cbsz, imm:$abid, imm:$blgp))];
+}
+
class VOP3Inst<string OpName, VOPProfile P, SDPatternOperator node = null_frag, bit VOP3Only = 0> :
VOP3_Pseudo<OpName, P,
!if(P.HasOpSel,
@@ -121,7 +125,9 @@ class VOP3Inst<string OpName, VOPProfile P, SDPatternOperator node = null_frag,
getVOP3ModPat<P, node>.ret,
!if(P.HasIntClamp,
getVOP3ClampPat<P, node>.ret,
- getVOP3Pat<P, node>.ret))),
+ !if (P.IsMAI,
+ getVOP3MAIPat<P, node>.ret,
+ getVOP3Pat<P, node>.ret)))),
VOP3Only, 0, P.HasOpSel> {
let IntClamp = P.HasIntClamp;
@@ -144,33 +150,27 @@ def VOP_F64_F64_F64_F64_VCC : VOPProfile<[f64, f64, f64, f64]> {
}
}
-class getVOP3VCC<VOPProfile P, SDPatternOperator node> {
- list<dag> ret =
- [(set P.DstVT:$vdst,
- (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)),
- (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)),
- (P.Src2VT (VOP3Mods P.Src2VT:$src2, i32:$src2_modifiers)),
- (i1 VCC)))];
-}
-
-class VOP3Features<bit Clamp, bit OpSel, bit Packed> {
+class VOP3Features<bit Clamp, bit OpSel, bit Packed, bit MAI> {
bit HasClamp = Clamp;
bit HasOpSel = OpSel;
bit IsPacked = Packed;
+ bit IsMAI = MAI;
}
-def VOP3_REGULAR : VOP3Features<0, 0, 0>;
-def VOP3_CLAMP : VOP3Features<1, 0, 0>;
-def VOP3_OPSEL : VOP3Features<1, 1, 0>;
-def VOP3_PACKED : VOP3Features<1, 1, 1>;
+def VOP3_REGULAR : VOP3Features<0, 0, 0, 0>;
+def VOP3_CLAMP : VOP3Features<1, 0, 0, 0>;
+def VOP3_OPSEL : VOP3Features<1, 1, 0, 0>;
+def VOP3_PACKED : VOP3Features<1, 1, 1, 0>;
+def VOP3_MAI : VOP3Features<0, 0, 0, 1>;
class VOP3_Profile<VOPProfile P, VOP3Features Features = VOP3_REGULAR> : VOPProfile<P.ArgVT> {
let HasClamp = !if(Features.HasClamp, 1, P.HasClamp);
let HasOpSel = !if(Features.HasOpSel, 1, P.HasOpSel);
+ let IsMAI = !if(Features.IsMAI, 1, P.IsMAI);
let IsPacked = !if(Features.IsPacked, 1, P.IsPacked);
- let HasModifiers = !if(Features.IsPacked, 1, P.HasModifiers);
+ let HasModifiers = !if(Features.IsPacked, !if(Features.IsMAI, 0, 1), P.HasModifiers);
// FIXME: Hack to stop printing _e64
let Outs64 = (outs DstRC.RegClass:$vdst);
@@ -191,8 +191,9 @@ class VOP3_Profile<VOPProfile P, VOP3Features Features = VOP3_REGULAR> : VOPProf
class VOP3b_Profile<ValueType vt> : VOPProfile<[vt, vt, vt, vt]> {
// v_div_scale_{f32|f64} do not support input modifiers.
let HasModifiers = 0;
+ let HasClamp = 0;
let HasOMod = 0;
- let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst);
+ let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst);
let Asm64 = " $vdst, $sdst, $src0, $src1, $src2";
}
@@ -212,7 +213,7 @@ def VOP3b_I64_I1_I32_I32_I64 : VOPProfile<[i64, i32, i32, i64]> {
// FIXME: Hack to stop printing _e64
let DstRC = RegisterOperand<VReg_64>;
- let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst);
+ let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst);
let Asm64 = " $vdst, $sdst, $src0, $src1, $src2$clamp";
}
@@ -303,7 +304,7 @@ def V_MAX_F64 : VOP3Inst <"v_max_f64", VOP3_Profile<VOP_F64_F64_F64>, fmaxnum_li
} // End SchedRW = [WriteDoubleAdd]
let SchedRW = [WriteQuarterRate32] in {
-def V_MUL_LO_U32 : VOP3Inst <"v_mul_lo_u32", VOP3_Profile<VOP_I32_I32_I32>>;
+def V_MUL_LO_U32 : VOP3Inst <"v_mul_lo_u32", VOP3_Profile<VOP_I32_I32_I32>, mul>;
def V_MUL_HI_U32 : VOP3Inst <"v_mul_hi_u32", VOP3_Profile<VOP_I32_I32_I32>, mulhu>;
def V_MUL_LO_I32 : VOP3Inst <"v_mul_lo_i32", VOP3_Profile<VOP_I32_I32_I32>>;
def V_MUL_HI_I32 : VOP3Inst <"v_mul_hi_i32", VOP3_Profile<VOP_I32_I32_I32>, mulhs>;
@@ -315,8 +316,7 @@ let Uses = [VCC, EXEC] in {
// if (vcc)
// result *= 2^32
//
-def V_DIV_FMAS_F32 : VOP3_Pseudo <"v_div_fmas_f32", VOP_F32_F32_F32_F32_VCC,
- getVOP3VCC<VOP_F32_F32_F32_F32_VCC, AMDGPUdiv_fmas>.ret> {
+def V_DIV_FMAS_F32 : VOP3_Pseudo <"v_div_fmas_f32", VOP_F32_F32_F32_F32_VCC, []> {
let SchedRW = [WriteFloatFMA];
}
// v_div_fmas_f64:
@@ -324,8 +324,7 @@ def V_DIV_FMAS_F32 : VOP3_Pseudo <"v_div_fmas_f32", VOP_F32_F32_F32_F32_VCC,
// if (vcc)
// result *= 2^64
//
-def V_DIV_FMAS_F64 : VOP3_Pseudo <"v_div_fmas_f64", VOP_F64_F64_F64_F64_VCC,
- getVOP3VCC<VOP_F64_F64_F64_F64_VCC, AMDGPUdiv_fmas>.ret> {
+def V_DIV_FMAS_F64 : VOP3_Pseudo <"v_div_fmas_f64", VOP_F64_F64_F64_F64_VCC, []> {
let SchedRW = [WriteDouble];
let FPDPRounding = 1;
}
@@ -386,22 +385,21 @@ def V_TRIG_PREOP_F64 : VOP3Inst <"v_trig_preop_f64", VOP3_Profile<VOP_F64_F64_I3
}
let SchedRW = [Write64Bit] in {
-// These instructions only exist on SI and CI
-let SubtargetPredicate = isSICI, Predicates = [isSICI] in {
+let SubtargetPredicate = isGFX6GFX7GFX10, Predicates = [isGFX6GFX7GFX10] in {
def V_LSHL_B64 : VOP3Inst <"v_lshl_b64", VOP3_Profile<VOP_PAT_GEN<VOP_I64_I64_I32>>, shl>;
def V_LSHR_B64 : VOP3Inst <"v_lshr_b64", VOP3_Profile<VOP_PAT_GEN<VOP_I64_I64_I32>>, srl>;
def V_ASHR_I64 : VOP3Inst <"v_ashr_i64", VOP3_Profile<VOP_PAT_GEN<VOP_I64_I64_I32>>, sra>;
def V_MULLIT_F32 : VOP3Inst <"v_mullit_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
-} // End SubtargetPredicate = isSICI, Predicates = [isSICI]
+} // End SubtargetPredicate = isGFX6GFX7GFX10, Predicates = [isGFX6GFX7GFX10]
-let SubtargetPredicate = isVI in {
-def V_LSHLREV_B64 : VOP3Inst <"v_lshlrev_b64", VOP3_Profile<VOP_I64_I32_I64>>;
-def V_LSHRREV_B64 : VOP3Inst <"v_lshrrev_b64", VOP3_Profile<VOP_I64_I32_I64>>;
-def V_ASHRREV_I64 : VOP3Inst <"v_ashrrev_i64", VOP3_Profile<VOP_I64_I32_I64>>;
-} // End SubtargetPredicate = isVI
+let SubtargetPredicate = isGFX8Plus in {
+def V_LSHLREV_B64 : VOP3Inst <"v_lshlrev_b64", VOP3_Profile<VOP_I64_I32_I64>, lshl_rev>;
+def V_LSHRREV_B64 : VOP3Inst <"v_lshrrev_b64", VOP3_Profile<VOP_I64_I32_I64>, lshr_rev>;
+def V_ASHRREV_I64 : VOP3Inst <"v_ashrrev_i64", VOP3_Profile<VOP_I64_I32_I64>, ashr_rev>;
+} // End SubtargetPredicate = isGFX8Plus
} // End SchedRW = [Write64Bit]
-let Predicates = [isVI] in {
+let Predicates = [isGFX8Plus] in {
def : GCNPat <
(getDivergentFrag<shl>.ret i64:$x, i32:$y),
(V_LSHLREV_B64 $y, $x)
@@ -417,7 +415,13 @@ def : AMDGPUPat <
}
-let SubtargetPredicate = isCIVI in {
+let SchedRW = [Write32Bit] in {
+let SubtargetPredicate = isGFX8Plus in {
+def V_PERM_B32 : VOP3Inst <"v_perm_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUperm>;
+} // End SubtargetPredicate = isGFX8Plus
+} // End SchedRW = [Write32Bit]
+
+let SubtargetPredicate = isGFX7Plus in {
let Constraints = "@earlyclobber $vdst", SchedRW = [WriteQuarterRate32] in {
def V_QSAD_PK_U16_U8 : VOP3Inst <"v_qsad_pk_u16_u8", VOP3_Profile<VOP_I64_I64_I32_I64, VOP3_CLAMP>>;
@@ -431,27 +435,27 @@ def V_MAD_I64_I32 : VOP3Inst <"v_mad_i64_i32", VOP3b_I64_I1_I32_I32_I64>;
} // End SchedRW = [WriteDouble, WriteSALU]
} // End isCommutable = 1
-} // End SubtargetPredicate = isCIVI
+} // End SubtargetPredicate = isGFX7Plus
def V_DIV_FIXUP_F16 : VOP3Inst <"v_div_fixup_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, AMDGPUdiv_fixup> {
- let Predicates = [Has16BitInsts, isVIOnly];
+ let Predicates = [Has16BitInsts, isGFX8Only];
let FPDPRounding = 1;
}
def V_DIV_FIXUP_F16_gfx9 : VOP3Inst <"v_div_fixup_f16_gfx9",
VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUdiv_fixup> {
let renamedInGFX9 = 1;
- let Predicates = [Has16BitInsts, isGFX9];
+ let Predicates = [Has16BitInsts, isGFX9Plus];
let FPDPRounding = 1;
}
def V_FMA_F16 : VOP3Inst <"v_fma_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, fma> {
- let Predicates = [Has16BitInsts, isVIOnly];
+ let Predicates = [Has16BitInsts, isGFX8Only];
let FPDPRounding = 1;
}
def V_FMA_F16_gfx9 : VOP3Inst <"v_fma_f16_gfx9", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, fma> {
let renamedInGFX9 = 1;
- let Predicates = [Has16BitInsts, isGFX9];
+ let Predicates = [Has16BitInsts, isGFX9Plus];
let FPDPRounding = 1;
}
@@ -463,36 +467,58 @@ def V_MAD_I16 : VOP3Inst <"v_mad_i16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_CL
let FPDPRounding = 1 in {
def V_MAD_F16 : VOP3Inst <"v_mad_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, fmad>;
let Uses = [M0, EXEC] in {
-def V_INTERP_P2_F16 : VOP3Interp <"v_interp_p2_f16", VOP3_INTERP16<[f16, f32, i32, f32]>>;
+def V_INTERP_P2_F16 : VOP3Interp <"v_interp_p2_f16", VOP3_INTERP16<[f16, f32, i32, f32]>,
+ [(set f16:$vdst, (AMDGPUinterp_p2_f16 f32:$src0, (i32 imm:$attrchan),
+ (i32 imm:$attr),
+ (i32 imm:$src0_modifiers),
+ (f32 VRegSrc_32:$src2),
+ (i32 imm:$src2_modifiers),
+ (i1 imm:$high),
+ (i1 imm:$clamp)))]>;
} // End Uses = [M0, EXEC]
} // End FPDPRounding = 1
} // End renamedInGFX9 = 1
-let SubtargetPredicate = isGFX9 in {
+let SubtargetPredicate = isGFX9Only in {
def V_MAD_F16_gfx9 : VOP3Inst <"v_mad_f16_gfx9", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>> {
let FPDPRounding = 1;
}
+} // End SubtargetPredicate = isGFX9Only
+
+let SubtargetPredicate = isGFX9Plus in {
def V_MAD_U16_gfx9 : VOP3Inst <"v_mad_u16_gfx9", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>>;
def V_MAD_I16_gfx9 : VOP3Inst <"v_mad_i16_gfx9", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>>;
def V_INTERP_P2_F16_gfx9 : VOP3Interp <"v_interp_p2_f16_gfx9", VOP3_INTERP16<[f16, f32, i32, f32]>>;
-} // End SubtargetPredicate = isGFX9
+} // End SubtargetPredicate = isGFX9Plus
let Uses = [M0, EXEC], FPDPRounding = 1 in {
-def V_INTERP_P1LL_F16 : VOP3Interp <"v_interp_p1ll_f16", VOP3_INTERP16<[f32, f32, i32, untyped]>>;
-def V_INTERP_P1LV_F16 : VOP3Interp <"v_interp_p1lv_f16", VOP3_INTERP16<[f32, f32, i32, f16]>>;
+def V_INTERP_P1LL_F16 : VOP3Interp <"v_interp_p1ll_f16", VOP3_INTERP16<[f32, f32, i32, untyped]>,
+ [(set f32:$vdst, (AMDGPUinterp_p1ll_f16 f32:$src0, (i32 imm:$attrchan),
+ (i32 imm:$attr),
+ (i32 imm:$src0_modifiers),
+ (i1 imm:$high),
+ (i1 imm:$clamp),
+ (i32 imm:$omod)))]>;
+def V_INTERP_P1LV_F16 : VOP3Interp <"v_interp_p1lv_f16", VOP3_INTERP16<[f32, f32, i32, f16]>,
+ [(set f32:$vdst, (AMDGPUinterp_p1lv_f16 f32:$src0, (i32 imm:$attrchan),
+ (i32 imm:$attr),
+ (i32 imm:$src0_modifiers),
+ (f32 VRegSrc_32:$src2),
+ (i32 imm:$src2_modifiers),
+ (i1 imm:$high),
+ (i1 imm:$clamp),
+ (i32 imm:$omod)))]>;
} // End Uses = [M0, EXEC], FPDPRounding = 1
} // End SubtargetPredicate = Has16BitInsts, isCommutable = 1
-let SubtargetPredicate = isVI in {
+let SubtargetPredicate = isGFX8GFX9 in {
def V_INTERP_P1_F32_e64 : VOP3Interp <"v_interp_p1_f32", VOP3_INTERP>;
def V_INTERP_P2_F32_e64 : VOP3Interp <"v_interp_p2_f32", VOP3_INTERP>;
def V_INTERP_MOV_F32_e64 : VOP3Interp <"v_interp_mov_f32", VOP3_INTERP_MOV>;
+} // End SubtargetPredicate = isGFX8GFX9
-def V_PERM_B32 : VOP3Inst <"v_perm_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUperm>;
-} // End SubtargetPredicate = isVI
-
-let Predicates = [Has16BitInsts] in {
+let Predicates = [Has16BitInsts, isGFX6GFX7GFX8GFX9] in {
multiclass Ternary_i16_Pats <SDPatternOperator op1, SDPatternOperator op2,
Instruction inst, SDPatternOperator op3> {
@@ -506,7 +532,23 @@ def : GCNPat <
defm: Ternary_i16_Pats<mul, add, V_MAD_U16, zext>;
defm: Ternary_i16_Pats<mul, add, V_MAD_I16, sext>;
-} // End Predicates = [Has16BitInsts]
+} // End Predicates = [Has16BitInsts, isGFX6GFX7GFX8GFX9]
+
+let Predicates = [Has16BitInsts, isGFX10Plus] in {
+
+multiclass Ternary_i16_Pats_gfx9<SDPatternOperator op1, SDPatternOperator op2,
+ Instruction inst, SDPatternOperator op3> {
+def : GCNPat <
+ (op2 (op1 i16:$src0, i16:$src1), i16:$src2),
+ (inst SRCMODS.NONE, $src0, SRCMODS.NONE, $src1, SRCMODS.NONE, $src2, DSTCLAMP.NONE)
+>;
+
+}
+
+defm: Ternary_i16_Pats_gfx9<mul, add, V_MAD_U16_gfx9, zext>;
+defm: Ternary_i16_Pats_gfx9<mul, add, V_MAD_I16_gfx9, sext>;
+
+} // End Predicates = [Has16BitInsts, isGFX10Plus]
class ThreeOpFrag<SDPatternOperator op1, SDPatternOperator op2> : PatFrag<
(ops node:$x, node:$y, node:$z),
@@ -528,7 +570,9 @@ class ThreeOpFrag<SDPatternOperator op1, SDPatternOperator op2> : PatFrag<
if (!Operands[i]->isDivergent() &&
!isInlineImmediate(Operands[i].getNode())) {
ConstantBusUses++;
- if (ConstantBusUses >= 2)
+ // This uses AMDGPU::V_ADD3_U32, but all three operand instructions
+ // have the same constant bus limit.
+ if (ConstantBusUses > Subtarget->getConstantBusLimit(AMDGPU::V_ADD3_U32))
return false;
}
}
@@ -539,7 +583,7 @@ class ThreeOpFrag<SDPatternOperator op1, SDPatternOperator op2> : PatFrag<
let PredicateCodeUsesOperands = 1;
}
-let SubtargetPredicate = isGFX9 in {
+let SubtargetPredicate = isGFX9Plus in {
def V_PACK_B32_F16 : VOP3Inst <"v_pack_b32_f16", VOP3_Profile<VOP_B32_F16_F16, VOP3_OPSEL>>;
def V_LSHL_ADD_U32 : VOP3Inst <"v_lshl_add_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
def V_ADD_LSHL_U32 : VOP3Inst <"v_add_lshl_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
@@ -589,7 +633,38 @@ def : ThreeOp_i32_Pats<and, or, V_AND_OR_B32>;
def : ThreeOp_i32_Pats<or, or, V_OR3_B32>;
def : ThreeOp_i32_Pats<xor, add, V_XAD_U32>;
-} // End SubtargetPredicate = isGFX9
+} // End SubtargetPredicate = isGFX9Plus
+
+def VOP3_PERMLANE_Profile : VOP3_Profile<VOPProfile <[i32, i32, i32, i32]>, VOP3_OPSEL> {
+ let Src0RC64 = VRegSrc_32;
+ let Src1RC64 = SCSrc_b32;
+ let Src2RC64 = SCSrc_b32;
+ let InsVOP3OpSel = (ins IntOpSelMods:$src0_modifiers, VRegSrc_32:$src0,
+ IntOpSelMods:$src1_modifiers, SCSrc_b32:$src1,
+ IntOpSelMods:$src2_modifiers, SCSrc_b32:$src2,
+ VGPR_32:$vdst_in, op_sel:$op_sel);
+ let HasClamp = 0;
+ let HasOMod = 0;
+}
+
+let SubtargetPredicate = isGFX10Plus in {
+ def V_XOR3_B32 : VOP3Inst <"v_xor3_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
+ def : ThreeOp_i32_Pats<xor, xor, V_XOR3_B32>;
+
+ let Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in {
+ def V_PERMLANE16_B32 : VOP3Inst <"v_permlane16_b32", VOP3_PERMLANE_Profile>;
+ def V_PERMLANEX16_B32 : VOP3Inst <"v_permlanex16_b32", VOP3_PERMLANE_Profile>;
+ } // End $vdst = $vdst_in, DisableEncoding $vdst_in
+
+ def : GCNPat<
+ (int_amdgcn_permlane16 i32:$vdst_in, i32:$src0, i32:$src1, i32:$src2, imm:$fi, imm:$bc),
+ (V_PERMLANE16_B32 (as_i1imm $fi), $src0, (as_i1imm $bc), $src1, 0, $src2, $vdst_in)
+ >;
+ def : GCNPat<
+ (int_amdgcn_permlanex16 i32:$vdst_in, i32:$src0, i32:$src1, i32:$src2, imm:$fi, imm:$bc),
+ (V_PERMLANEX16_B32 (as_i1imm $fi), $src0, (as_i1imm $bc), $src1, 0, $src2, $vdst_in)
+ >;
+} // End SubtargetPredicate = isGFX10Plus
//===----------------------------------------------------------------------===//
// Integer Clamp Patterns
@@ -631,111 +706,239 @@ def : IntClampPat<V_MQSAD_PK_U16_U8, int_amdgcn_mqsad_pk_u16_u8>;
def : IntClampPat<V_QSAD_PK_U16_U8, int_amdgcn_qsad_pk_u16_u8>;
def : IntClampPat<V_MQSAD_U32_U8, int_amdgcn_mqsad_u32_u8>;
+
//===----------------------------------------------------------------------===//
-// Target
+// Target-specific instruction encodings.
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
-// SI
+// GFX10.
//===----------------------------------------------------------------------===//
-let AssemblerPredicates = [isSICI], DecoderNamespace = "SICI" in {
-
-multiclass VOP3_Real_si<bits<9> op> {
- def _si : VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.SI>,
- VOP3e_si <op, !cast<VOP3_Pseudo>(NAME).Pfl>;
-}
-
-multiclass VOP3be_Real_si<bits<9> op> {
- def _si : VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.SI>,
- VOP3be_si <op, !cast<VOP3_Pseudo>(NAME).Pfl>;
-}
-
-} // End AssemblerPredicates = [isSICI], DecoderNamespace = "SICI"
-
-defm V_MAD_LEGACY_F32 : VOP3_Real_si <0x140>;
-defm V_MAD_F32 : VOP3_Real_si <0x141>;
-defm V_MAD_I32_I24 : VOP3_Real_si <0x142>;
-defm V_MAD_U32_U24 : VOP3_Real_si <0x143>;
-defm V_CUBEID_F32 : VOP3_Real_si <0x144>;
-defm V_CUBESC_F32 : VOP3_Real_si <0x145>;
-defm V_CUBETC_F32 : VOP3_Real_si <0x146>;
-defm V_CUBEMA_F32 : VOP3_Real_si <0x147>;
-defm V_BFE_U32 : VOP3_Real_si <0x148>;
-defm V_BFE_I32 : VOP3_Real_si <0x149>;
-defm V_BFI_B32 : VOP3_Real_si <0x14a>;
-defm V_FMA_F32 : VOP3_Real_si <0x14b>;
-defm V_FMA_F64 : VOP3_Real_si <0x14c>;
-defm V_LERP_U8 : VOP3_Real_si <0x14d>;
-defm V_ALIGNBIT_B32 : VOP3_Real_si <0x14e>;
-defm V_ALIGNBYTE_B32 : VOP3_Real_si <0x14f>;
-defm V_MULLIT_F32 : VOP3_Real_si <0x150>;
-defm V_MIN3_F32 : VOP3_Real_si <0x151>;
-defm V_MIN3_I32 : VOP3_Real_si <0x152>;
-defm V_MIN3_U32 : VOP3_Real_si <0x153>;
-defm V_MAX3_F32 : VOP3_Real_si <0x154>;
-defm V_MAX3_I32 : VOP3_Real_si <0x155>;
-defm V_MAX3_U32 : VOP3_Real_si <0x156>;
-defm V_MED3_F32 : VOP3_Real_si <0x157>;
-defm V_MED3_I32 : VOP3_Real_si <0x158>;
-defm V_MED3_U32 : VOP3_Real_si <0x159>;
-defm V_SAD_U8 : VOP3_Real_si <0x15a>;
-defm V_SAD_HI_U8 : VOP3_Real_si <0x15b>;
-defm V_SAD_U16 : VOP3_Real_si <0x15c>;
-defm V_SAD_U32 : VOP3_Real_si <0x15d>;
-defm V_CVT_PK_U8_F32 : VOP3_Real_si <0x15e>;
-defm V_DIV_FIXUP_F32 : VOP3_Real_si <0x15f>;
-defm V_DIV_FIXUP_F64 : VOP3_Real_si <0x160>;
-defm V_LSHL_B64 : VOP3_Real_si <0x161>;
-defm V_LSHR_B64 : VOP3_Real_si <0x162>;
-defm V_ASHR_I64 : VOP3_Real_si <0x163>;
-defm V_ADD_F64 : VOP3_Real_si <0x164>;
-defm V_MUL_F64 : VOP3_Real_si <0x165>;
-defm V_MIN_F64 : VOP3_Real_si <0x166>;
-defm V_MAX_F64 : VOP3_Real_si <0x167>;
-defm V_LDEXP_F64 : VOP3_Real_si <0x168>;
-defm V_MUL_LO_U32 : VOP3_Real_si <0x169>;
-defm V_MUL_HI_U32 : VOP3_Real_si <0x16a>;
-defm V_MUL_LO_I32 : VOP3_Real_si <0x16b>;
-defm V_MUL_HI_I32 : VOP3_Real_si <0x16c>;
-defm V_DIV_SCALE_F32 : VOP3be_Real_si <0x16d>;
-defm V_DIV_SCALE_F64 : VOP3be_Real_si <0x16e>;
-defm V_DIV_FMAS_F32 : VOP3_Real_si <0x16f>;
-defm V_DIV_FMAS_F64 : VOP3_Real_si <0x170>;
-defm V_MSAD_U8 : VOP3_Real_si <0x171>;
-defm V_MQSAD_PK_U16_U8 : VOP3_Real_si <0x173>;
-defm V_TRIG_PREOP_F64 : VOP3_Real_si <0x174>;
+let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
+ multiclass VOP3_Real_gfx10<bits<10> op> {
+ def _gfx10 :
+ VOP3_Real<!cast<VOP_Pseudo>(NAME), SIEncodingFamily.GFX10>,
+ VOP3e_gfx10<op, !cast<VOP_Pseudo>(NAME).Pfl>;
+ }
+ multiclass VOP3_Real_gfx10_with_name<bits<10> op, string opName,
+ string asmName> {
+ def _gfx10 :
+ VOP3_Real<!cast<VOP3_Pseudo>(opName), SIEncodingFamily.GFX10>,
+ VOP3e_gfx10<op, !cast<VOP3_Pseudo>(opName).Pfl> {
+ VOP3_Pseudo ps = !cast<VOP3_Pseudo>(opName);
+ let AsmString = asmName # ps.AsmOperands;
+ }
+ }
+ multiclass VOP3be_Real_gfx10<bits<10> op> {
+ def _gfx10 :
+ VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.GFX10>,
+ VOP3be_gfx10<op, !cast<VOP3_Pseudo>(NAME).Pfl>;
+ }
+ multiclass VOP3Interp_Real_gfx10<bits<10> op> {
+ def _gfx10 :
+ VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.GFX10>,
+ VOP3Interp_gfx10<op, !cast<VOP3_Pseudo>(NAME).Pfl>;
+ }
+ multiclass VOP3OpSel_Real_gfx10<bits<10> op> {
+ def _gfx10 :
+ VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.GFX10>,
+ VOP3OpSel_gfx10<op, !cast<VOP3_Pseudo>(NAME).Pfl>;
+ }
+ multiclass VOP3OpSel_Real_gfx10_with_name<bits<10> op, string opName,
+ string asmName> {
+ def _gfx10 :
+ VOP3_Real<!cast<VOP3_Pseudo>(opName), SIEncodingFamily.GFX10>,
+ VOP3OpSel_gfx10<op, !cast<VOP3_Pseudo>(opName).Pfl> {
+ VOP3_Pseudo ps = !cast<VOP3_Pseudo>(opName);
+ let AsmString = asmName # ps.AsmOperands;
+ }
+ }
+} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10"
+
+defm V_READLANE_B32 : VOP3_Real_gfx10<0x360>;
+
+let InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VSrc_b32:$vdst_in) in {
+ defm V_WRITELANE_B32 : VOP3_Real_gfx10<0x361>;
+} // End InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VSrc_b32:$vdst_in)
+
+defm V_XOR3_B32 : VOP3_Real_gfx10<0x178>;
+defm V_LSHLREV_B64 : VOP3_Real_gfx10<0x2ff>;
+defm V_LSHRREV_B64 : VOP3_Real_gfx10<0x300>;
+defm V_ASHRREV_I64 : VOP3_Real_gfx10<0x301>;
+defm V_PERM_B32 : VOP3_Real_gfx10<0x344>;
+defm V_XAD_U32 : VOP3_Real_gfx10<0x345>;
+defm V_LSHL_ADD_U32 : VOP3_Real_gfx10<0x346>;
+defm V_ADD_LSHL_U32 : VOP3_Real_gfx10<0x347>;
+defm V_ADD3_U32 : VOP3_Real_gfx10<0x36d>;
+defm V_LSHL_OR_B32 : VOP3_Real_gfx10<0x36f>;
+defm V_AND_OR_B32 : VOP3_Real_gfx10<0x371>;
+defm V_OR3_B32 : VOP3_Real_gfx10<0x372>;
+
+// TODO-GFX10: add MC tests for v_add/sub_nc_i16
+defm V_ADD_NC_I16 :
+ VOP3OpSel_Real_gfx10_with_name<0x30d, "V_ADD_I16", "v_add_nc_i16">;
+defm V_SUB_NC_I16 :
+ VOP3OpSel_Real_gfx10_with_name<0x30e, "V_SUB_I16", "v_sub_nc_i16">;
+defm V_SUB_NC_I32 :
+ VOP3_Real_gfx10_with_name<0x376, "V_SUB_I32_gfx9", "v_sub_nc_i32">;
+defm V_ADD_NC_I32 :
+ VOP3_Real_gfx10_with_name<0x37f, "V_ADD_I32_gfx9", "v_add_nc_i32">;
+
+defm V_INTERP_P1LL_F16 : VOP3Interp_Real_gfx10<0x342>;
+defm V_INTERP_P1LV_F16 : VOP3Interp_Real_gfx10<0x343>;
+defm V_INTERP_P2_F16 : VOP3Interp_Real_gfx10<0x35a>;
+
+defm V_PACK_B32_F16 : VOP3OpSel_Real_gfx10<0x311>;
+defm V_CVT_PKNORM_I16_F16 : VOP3OpSel_Real_gfx10<0x312>;
+defm V_CVT_PKNORM_U16_F16 : VOP3OpSel_Real_gfx10<0x313>;
+
+defm V_MIN3_F16 : VOP3OpSel_Real_gfx10<0x351>;
+defm V_MIN3_I16 : VOP3OpSel_Real_gfx10<0x352>;
+defm V_MIN3_U16 : VOP3OpSel_Real_gfx10<0x353>;
+defm V_MAX3_F16 : VOP3OpSel_Real_gfx10<0x354>;
+defm V_MAX3_I16 : VOP3OpSel_Real_gfx10<0x355>;
+defm V_MAX3_U16 : VOP3OpSel_Real_gfx10<0x356>;
+defm V_MED3_F16 : VOP3OpSel_Real_gfx10<0x357>;
+defm V_MED3_I16 : VOP3OpSel_Real_gfx10<0x358>;
+defm V_MED3_U16 : VOP3OpSel_Real_gfx10<0x359>;
+defm V_MAD_U32_U16 : VOP3OpSel_Real_gfx10<0x373>;
+defm V_MAD_I32_I16 : VOP3OpSel_Real_gfx10<0x375>;
+
+defm V_MAD_U16 :
+ VOP3OpSel_Real_gfx10_with_name<0x340, "V_MAD_U16_gfx9", "v_mad_u16">;
+defm V_FMA_F16 :
+ VOP3OpSel_Real_gfx10_with_name<0x34b, "V_FMA_F16_gfx9", "v_fma_f16">;
+defm V_MAD_I16 :
+ VOP3OpSel_Real_gfx10_with_name<0x35e, "V_MAD_I16_gfx9", "v_mad_i16">;
+defm V_DIV_FIXUP_F16 :
+ VOP3OpSel_Real_gfx10_with_name<0x35f, "V_DIV_FIXUP_F16_gfx9", "v_div_fixup_f16">;
+
+// FIXME-GFX10-OPSEL: Need to add "selective" opsel support to some of these
+// (they do not support SDWA or DPP).
+defm V_ADD_NC_U16 : VOP3_Real_gfx10_with_name<0x303, "V_ADD_U16_e64", "v_add_nc_u16">;
+defm V_SUB_NC_U16 : VOP3_Real_gfx10_with_name<0x304, "V_SUB_U16_e64", "v_sub_nc_u16">;
+defm V_MUL_LO_U16 : VOP3_Real_gfx10_with_name<0x305, "V_MUL_LO_U16_e64", "v_mul_lo_u16">;
+defm V_LSHRREV_B16 : VOP3_Real_gfx10_with_name<0x307, "V_LSHRREV_B16_e64", "v_lshrrev_b16">;
+defm V_ASHRREV_I16 : VOP3_Real_gfx10_with_name<0x308, "V_ASHRREV_I16_e64", "v_ashrrev_i16">;
+defm V_MAX_U16 : VOP3_Real_gfx10_with_name<0x309, "V_MAX_U16_e64", "v_max_u16">;
+defm V_MAX_I16 : VOP3_Real_gfx10_with_name<0x30a, "V_MAX_I16_e64", "v_max_i16">;
+defm V_MIN_U16 : VOP3_Real_gfx10_with_name<0x30b, "V_MIN_U16_e64", "v_min_u16">;
+defm V_MIN_I16 : VOP3_Real_gfx10_with_name<0x30c, "V_MIN_I16_e64", "v_min_i16">;
+defm V_LSHLREV_B16 : VOP3_Real_gfx10_with_name<0x314, "V_LSHLREV_B16_e64", "v_lshlrev_b16">;
+defm V_PERMLANE16_B32 : VOP3OpSel_Real_gfx10<0x377>;
+defm V_PERMLANEX16_B32 : VOP3OpSel_Real_gfx10<0x378>;
//===----------------------------------------------------------------------===//
-// CI
+// GFX7, GFX10.
//===----------------------------------------------------------------------===//
-multiclass VOP3_Real_ci<bits<9> op> {
- def _ci : VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.SI>,
- VOP3e_si <op, !cast<VOP3_Pseudo>(NAME).Pfl> {
- let AssemblerPredicates = [isCIOnly];
- let DecoderNamespace = "CI";
+let AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7" in {
+ multiclass VOP3_Real_gfx7<bits<10> op> {
+ def _gfx7 :
+ VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.SI>,
+ VOP3e_gfx6_gfx7<op{8-0}, !cast<VOP3_Pseudo>(NAME).Pfl>;
}
-}
-
-multiclass VOP3be_Real_ci<bits<9> op> {
- def _ci : VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.SI>,
- VOP3be_si <op, !cast<VOP3_Pseudo>(NAME).Pfl> {
- let AssemblerPredicates = [isCIOnly];
- let DecoderNamespace = "CI";
+ multiclass VOP3be_Real_gfx7<bits<10> op> {
+ def _gfx7 :
+ VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.SI>,
+ VOP3be_gfx6_gfx7<op{8-0}, !cast<VOP3_Pseudo>(NAME).Pfl>;
}
-}
+} // End AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7"
+
+multiclass VOP3_Real_gfx7_gfx10<bits<10> op> :
+ VOP3_Real_gfx7<op>, VOP3_Real_gfx10<op>;
+
+multiclass VOP3be_Real_gfx7_gfx10<bits<10> op> :
+ VOP3be_Real_gfx7<op>, VOP3be_Real_gfx10<op>;
+
+defm V_QSAD_PK_U16_U8 : VOP3_Real_gfx7_gfx10<0x172>;
+defm V_MQSAD_U32_U8 : VOP3_Real_gfx7_gfx10<0x175>;
+defm V_MAD_U64_U32 : VOP3be_Real_gfx7_gfx10<0x176>;
+defm V_MAD_I64_I32 : VOP3be_Real_gfx7_gfx10<0x177>;
-defm V_QSAD_PK_U16_U8 : VOP3_Real_ci <0x172>;
-defm V_MQSAD_U32_U8 : VOP3_Real_ci <0x175>;
-defm V_MAD_U64_U32 : VOP3be_Real_ci <0x176>;
-defm V_MAD_I64_I32 : VOP3be_Real_ci <0x177>;
+//===----------------------------------------------------------------------===//
+// GFX6, GFX7, GFX10.
+//===----------------------------------------------------------------------===//
+
+let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in {
+ multiclass VOP3_Real_gfx6_gfx7<bits<10> op> {
+ def _gfx6_gfx7 :
+ VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.SI>,
+ VOP3e_gfx6_gfx7<op{8-0}, !cast<VOP3_Pseudo>(NAME).Pfl>;
+ }
+ multiclass VOP3be_Real_gfx6_gfx7<bits<10> op> {
+ def _gfx6_gfx7 :
+ VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.SI>,
+ VOP3be_gfx6_gfx7<op{8-0}, !cast<VOP3_Pseudo>(NAME).Pfl>;
+ }
+} // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7"
+
+multiclass VOP3_Real_gfx6_gfx7_gfx10<bits<10> op> :
+ VOP3_Real_gfx6_gfx7<op>, VOP3_Real_gfx10<op>;
+
+multiclass VOP3be_Real_gfx6_gfx7_gfx10<bits<10> op> :
+ VOP3be_Real_gfx6_gfx7<op>, VOP3be_Real_gfx10<op>;
+
+defm V_LSHL_B64 : VOP3_Real_gfx6_gfx7<0x161>;
+defm V_LSHR_B64 : VOP3_Real_gfx6_gfx7<0x162>;
+defm V_ASHR_I64 : VOP3_Real_gfx6_gfx7<0x163>;
+
+defm V_MAD_LEGACY_F32 : VOP3_Real_gfx6_gfx7_gfx10<0x140>;
+defm V_MAD_F32 : VOP3_Real_gfx6_gfx7_gfx10<0x141>;
+defm V_MAD_I32_I24 : VOP3_Real_gfx6_gfx7_gfx10<0x142>;
+defm V_MAD_U32_U24 : VOP3_Real_gfx6_gfx7_gfx10<0x143>;
+defm V_CUBEID_F32 : VOP3_Real_gfx6_gfx7_gfx10<0x144>;
+defm V_CUBESC_F32 : VOP3_Real_gfx6_gfx7_gfx10<0x145>;
+defm V_CUBETC_F32 : VOP3_Real_gfx6_gfx7_gfx10<0x146>;
+defm V_CUBEMA_F32 : VOP3_Real_gfx6_gfx7_gfx10<0x147>;
+defm V_BFE_U32 : VOP3_Real_gfx6_gfx7_gfx10<0x148>;
+defm V_BFE_I32 : VOP3_Real_gfx6_gfx7_gfx10<0x149>;
+defm V_BFI_B32 : VOP3_Real_gfx6_gfx7_gfx10<0x14a>;
+defm V_FMA_F32 : VOP3_Real_gfx6_gfx7_gfx10<0x14b>;
+defm V_FMA_F64 : VOP3_Real_gfx6_gfx7_gfx10<0x14c>;
+defm V_LERP_U8 : VOP3_Real_gfx6_gfx7_gfx10<0x14d>;
+defm V_ALIGNBIT_B32 : VOP3_Real_gfx6_gfx7_gfx10<0x14e>;
+defm V_ALIGNBYTE_B32 : VOP3_Real_gfx6_gfx7_gfx10<0x14f>;
+defm V_MULLIT_F32 : VOP3_Real_gfx6_gfx7_gfx10<0x150>;
+defm V_MIN3_F32 : VOP3_Real_gfx6_gfx7_gfx10<0x151>;
+defm V_MIN3_I32 : VOP3_Real_gfx6_gfx7_gfx10<0x152>;
+defm V_MIN3_U32 : VOP3_Real_gfx6_gfx7_gfx10<0x153>;
+defm V_MAX3_F32 : VOP3_Real_gfx6_gfx7_gfx10<0x154>;
+defm V_MAX3_I32 : VOP3_Real_gfx6_gfx7_gfx10<0x155>;
+defm V_MAX3_U32 : VOP3_Real_gfx6_gfx7_gfx10<0x156>;
+defm V_MED3_F32 : VOP3_Real_gfx6_gfx7_gfx10<0x157>;
+defm V_MED3_I32 : VOP3_Real_gfx6_gfx7_gfx10<0x158>;
+defm V_MED3_U32 : VOP3_Real_gfx6_gfx7_gfx10<0x159>;
+defm V_SAD_U8 : VOP3_Real_gfx6_gfx7_gfx10<0x15a>;
+defm V_SAD_HI_U8 : VOP3_Real_gfx6_gfx7_gfx10<0x15b>;
+defm V_SAD_U16 : VOP3_Real_gfx6_gfx7_gfx10<0x15c>;
+defm V_SAD_U32 : VOP3_Real_gfx6_gfx7_gfx10<0x15d>;
+defm V_CVT_PK_U8_F32 : VOP3_Real_gfx6_gfx7_gfx10<0x15e>;
+defm V_DIV_FIXUP_F32 : VOP3_Real_gfx6_gfx7_gfx10<0x15f>;
+defm V_DIV_FIXUP_F64 : VOP3_Real_gfx6_gfx7_gfx10<0x160>;
+defm V_ADD_F64 : VOP3_Real_gfx6_gfx7_gfx10<0x164>;
+defm V_MUL_F64 : VOP3_Real_gfx6_gfx7_gfx10<0x165>;
+defm V_MIN_F64 : VOP3_Real_gfx6_gfx7_gfx10<0x166>;
+defm V_MAX_F64 : VOP3_Real_gfx6_gfx7_gfx10<0x167>;
+defm V_LDEXP_F64 : VOP3_Real_gfx6_gfx7_gfx10<0x168>;
+defm V_MUL_LO_U32 : VOP3_Real_gfx6_gfx7_gfx10<0x169>;
+defm V_MUL_HI_U32 : VOP3_Real_gfx6_gfx7_gfx10<0x16a>;
+defm V_MUL_LO_I32 : VOP3_Real_gfx6_gfx7_gfx10<0x16b>;
+defm V_MUL_HI_I32 : VOP3_Real_gfx6_gfx7_gfx10<0x16c>;
+defm V_DIV_FMAS_F32 : VOP3_Real_gfx6_gfx7_gfx10<0x16f>;
+defm V_DIV_FMAS_F64 : VOP3_Real_gfx6_gfx7_gfx10<0x170>;
+defm V_MSAD_U8 : VOP3_Real_gfx6_gfx7_gfx10<0x171>;
+defm V_MQSAD_PK_U16_U8 : VOP3_Real_gfx6_gfx7_gfx10<0x173>;
+defm V_TRIG_PREOP_F64 : VOP3_Real_gfx6_gfx7_gfx10<0x174>;
+defm V_DIV_SCALE_F32 : VOP3be_Real_gfx6_gfx7_gfx10<0x16d>;
+defm V_DIV_SCALE_F64 : VOP3be_Real_gfx6_gfx7_gfx10<0x16e>;
//===----------------------------------------------------------------------===//
-// VI
+// GFX8, GFX9 (VI).
//===----------------------------------------------------------------------===//
-let AssemblerPredicates = [isVI], DecoderNamespace = "VI" in {
+let AssemblerPredicates = [isGFX8GFX9], DecoderNamespace = "GFX8" in {
multiclass VOP3_Real_vi<bits<10> op> {
def _vi : VOP3_Real<!cast<VOP_Pseudo>(NAME), SIEncodingFamily.VI>,
@@ -757,9 +960,9 @@ multiclass VOP3Interp_Real_vi<bits<10> op> {
VOP3Interp_vi <op, !cast<VOP_Pseudo>(NAME).Pfl>;
}
-} // End AssemblerPredicates = [isVI], DecoderNamespace = "VI"
+} // End AssemblerPredicates = [isGFX8GFX9], DecoderNamespace = "GFX8"
-let AssemblerPredicates = [isVIOnly], DecoderNamespace = "VI" in {
+let AssemblerPredicates = [isGFX8Only], DecoderNamespace = "GFX8" in {
multiclass VOP3_F16_Real_vi<bits<10> op> {
def _vi : VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.VI>,
@@ -771,9 +974,9 @@ multiclass VOP3Interp_F16_Real_vi<bits<10> op> {
VOP3Interp_vi <op, !cast<VOP3_Pseudo>(NAME).Pfl>;
}
-} // End AssemblerPredicates = [isVIOnly], DecoderNamespace = "VI"
+} // End AssemblerPredicates = [isGFX8Only], DecoderNamespace = "GFX8"
-let AssemblerPredicates = [isGFX9], DecoderNamespace = "GFX9" in {
+let AssemblerPredicates = [isGFX9Only], DecoderNamespace = "GFX9" in {
multiclass VOP3_F16_Real_gfx9<bits<10> op, string OpName, string AsmName> {
def _gfx9 : VOP3_Real<!cast<VOP3_Pseudo>(OpName), SIEncodingFamily.GFX9>,
@@ -807,7 +1010,7 @@ multiclass VOP3_Real_gfx9<bits<10> op, string AsmName> {
}
}
-} // End AssemblerPredicates = [isGFX9], DecoderNamespace = "GFX9"
+} // End AssemblerPredicates = [isGFX9Only], DecoderNamespace = "GFX9"
defm V_MAD_U64_U32 : VOP3be_Real_vi <0x1E8>;
defm V_MAD_I64_I32 : VOP3be_Real_vi <0x1E9>;
diff --git a/lib/Target/AMDGPU/VOP3PInstructions.td b/lib/Target/AMDGPU/VOP3PInstructions.td
index 91b45583c848..55ee5f6577cf 100644
--- a/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -1,9 +1,8 @@
//===-- VOP3PInstructions.td - Vector Instruction Defintions --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -70,6 +69,16 @@ def V_PK_LSHLREV_B16 : VOP3PInst<"v_pk_lshlrev_b16", VOP3_Profile<VOP_V2I16_V2I1
def V_PK_ASHRREV_I16 : VOP3PInst<"v_pk_ashrrev_i16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, ashr_rev>;
def V_PK_LSHRREV_B16 : VOP3PInst<"v_pk_lshrrev_b16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, lshr_rev>;
+
+// Undo sub x, c -> add x, -c canonicalization since c is more likely
+// an inline immediate than -c.
+// The constant will be emitted as a mov, and folded later.
+// TODO: We could directly encode the immediate now
+def : GCNPat<
+ (add (v2i16 (VOP3PMods0 v2i16:$src0, i32:$src0_modifiers, i1:$clamp)), NegSubInlineConstV216:$src1),
+ (V_PK_SUB_U16 $src0_modifiers, $src0, SRCMODS.OP_SEL_1, NegSubInlineConstV216:$src1, $clamp)
+>;
+
multiclass MadFmaMixPats<SDPatternOperator fma_like,
Instruction mix_inst,
Instruction mixlo_inst,
@@ -239,29 +248,39 @@ class UDot2Pat<Instruction Inst> : GCNPat <
(AMDGPUmul_u24_oneuse (and i32:$src0, (i32 65535)),
(and i32:$src1, (i32 65535)))
),
- (Inst (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))
->;
+ (Inst (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))> {
+ let SubtargetPredicate = !cast<VOP_Pseudo>(Inst).SubtargetPredicate;
+}
class SDot2Pat<Instruction Inst> : GCNPat <
(add (add_oneuse (AMDGPUmul_i24_oneuse (sra i32:$src0, (i32 16)),
(sra i32:$src1, (i32 16))), i32:$src2),
(AMDGPUmul_i24_oneuse (sext_inreg i32:$src0, i16),
(sext_inreg i32:$src1, i16))),
- (Inst (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))
->;
+ (Inst (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))> {
+ let SubtargetPredicate = !cast<VOP_Pseudo>(Inst).SubtargetPredicate;
+}
-let SubtargetPredicate = HasDotInsts in {
+let SubtargetPredicate = HasDot2Insts in {
def V_DOT2_F32_F16 : VOP3PInst<"v_dot2_f32_f16", VOP3_Profile<VOP_F32_V2F16_V2F16_F32>>;
def V_DOT2_I32_I16 : VOP3PInst<"v_dot2_i32_i16", VOP3_Profile<VOP_I32_V2I16_V2I16_I32>>;
def V_DOT2_U32_U16 : VOP3PInst<"v_dot2_u32_u16", VOP3_Profile<VOP_I32_V2I16_V2I16_I32>>;
-def V_DOT4_I32_I8 : VOP3PInst<"v_dot4_i32_i8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>>;
def V_DOT4_U32_U8 : VOP3PInst<"v_dot4_u32_u8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>>;
-def V_DOT8_I32_I4 : VOP3PInst<"v_dot8_i32_i4", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>>;
def V_DOT8_U32_U4 : VOP3PInst<"v_dot8_u32_u4", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>>;
+} // End SubtargetPredicate = HasDot2Insts
+
+let SubtargetPredicate = HasDot1Insts in {
+
+def V_DOT4_I32_I8 : VOP3PInst<"v_dot4_i32_i8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>>;
+def V_DOT8_I32_I4 : VOP3PInst<"v_dot8_i32_i4", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>>;
+
+} // End SubtargetPredicate = HasDot1Insts
+
multiclass DotPats<SDPatternOperator dot_op,
VOP3PInst dot_inst> {
+ let SubtargetPredicate = dot_inst.SubtargetPredicate in
def : GCNPat <
(dot_op (dot_inst.Pfl.Src0VT (VOP3PMods0 dot_inst.Pfl.Src0VT:$src0, i32:$src0_modifiers)),
(dot_inst.Pfl.Src1VT (VOP3PMods dot_inst.Pfl.Src1VT:$src1, i32:$src1_modifiers)),
@@ -281,12 +300,14 @@ def : UDot2Pat<V_DOT2_U32_U16>;
def : SDot2Pat<V_DOT2_I32_I16>;
foreach Type = ["U", "I"] in
+ let SubtargetPredicate = !cast<VOP_Pseudo>("V_DOT4_"#Type#"32_"#Type#8).SubtargetPredicate in
def : GCNPat <
!cast<dag>(!foldl((i32 i32:$src2), [0, 1, 2, 3], lhs, y,
(add_oneuse lhs, (!cast<PatFrag>("Mul"#Type#"_Elt"#y) i32:$src0, i32:$src1)))),
(!cast<VOP3PInst>("V_DOT4_"#Type#"32_"#Type#8) (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))>;
foreach Type = ["U", "I"] in
+ let SubtargetPredicate = !cast<VOP_Pseudo>("V_DOT8_"#Type#"32_"#Type#4).SubtargetPredicate in
def : GCNPat <
!cast<dag>(!foldl((add_oneuse i32:$src2, (!cast<PatFrag>("Mul"#Type#"0_4bit") i32:$src0, i32:$src1)),
[1, 2, 3, 4, 5, 6, 7], lhs, y,
@@ -296,19 +317,101 @@ foreach Type = ["U", "I"] in
// Different variants of dot8 code-gen dag patterns are not generated through table-gen due to a huge increase
// in the compile time. Directly handle the pattern generated by the FE here.
foreach Type = ["U", "I"] in
+ let SubtargetPredicate = !cast<VOP_Pseudo>("V_DOT8_"#Type#"32_"#Type#4).SubtargetPredicate in
def : GCNPat <
!cast<dag>(!foldl((add_oneuse i32:$src2, (!cast<PatFrag>("Mul"#Type#"0_4bit") i32:$src0, i32:$src1)),
[7, 1, 2, 3, 4, 5, 6], lhs, y,
(NonACAdd_oneuse lhs, (!cast<PatFrag>("Mul"#Type#y#"_4bit") i32:$src0, i32:$src1)))),
(!cast<VOP3PInst>("V_DOT8_"#Type#"32_"#Type#4) (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))>;
-} // End SubtargetPredicate = HasDotInsts
+def ADst_32 : VOPDstOperand<AGPR_32>;
+def ADst_128 : VOPDstOperand<AReg_128>;
+def ADst_512 : VOPDstOperand<AReg_512>;
+def ADst_1024 : VOPDstOperand<AReg_1024>;
+
+def VOPProfileAccRead : VOP3_Profile<VOP_I32_I32, VOP3_MAI> {
+ let Src0RC64 = ARegSrc_32;
+}
+
+def VOPProfileAccWrite : VOP3_Profile<VOP_I32_I32, VOP3_MAI> {
+ let DstRC = ADst_32;
+ let Src0RC64 = VISrc_b32;
+}
+
+class VOPProfileMAI<VOPProfile P, RegisterOperand _SrcRC, RegisterOperand _DstRC,
+ RegisterOperand SrcABRC = AVSrc_32>
+ : VOP3_Profile<P, VOP3_MAI> {
+ let DstRC = _DstRC;
+ let Src0RC64 = SrcABRC;
+ let Src1RC64 = SrcABRC;
+ let Src2RC64 = _SrcRC;
+ let HasOpSel = 0;
+ let HasClamp = 0;
+ let HasModifiers = 0;
+ let Asm64 = " $vdst, $src0, $src1, $src2$cbsz$abid$blgp";
+ let Ins64 = (ins Src0RC64:$src0, Src1RC64:$src1, Src2RC64:$src2, cbsz:$cbsz, abid:$abid, blgp:$blgp);
+}
+
+def VOPProfileMAI_F32_F32_X4 : VOPProfileMAI<VOP_V4F32_F32_F32_V4F32, AISrc_128_f32, ADst_128>;
+def VOPProfileMAI_F32_F32_X16 : VOPProfileMAI<VOP_V16F32_F32_F32_V16F32, AISrc_512_f32, ADst_512>;
+def VOPProfileMAI_F32_F32_X32 : VOPProfileMAI<VOP_V32F32_F32_F32_V32F32, AISrc_1024_f32, ADst_1024>;
+def VOPProfileMAI_I32_I32_X4 : VOPProfileMAI<VOP_V4I32_I32_I32_V4I32, AISrc_128_b32, ADst_128>;
+def VOPProfileMAI_I32_I32_X16 : VOPProfileMAI<VOP_V16I32_I32_I32_V16I32, AISrc_512_b32, ADst_512>;
+def VOPProfileMAI_I32_I32_X32 : VOPProfileMAI<VOP_V32I32_I32_I32_V32I32, AISrc_1024_b32, ADst_1024>;
+def VOPProfileMAI_F32_V2I16_X4 : VOPProfileMAI<VOP_V4F32_V2I16_V2I16_V4F32, AISrc_128_b32, ADst_128>;
+def VOPProfileMAI_F32_V2I16_X16 : VOPProfileMAI<VOP_V16F32_V2I16_V2I16_V16F32, AISrc_512_b32, ADst_512>;
+def VOPProfileMAI_F32_V2I16_X32 : VOPProfileMAI<VOP_V32F32_V2I16_V2I16_V32F32, AISrc_1024_b32, ADst_1024>;
+def VOPProfileMAI_F32_V4F16_X4 : VOPProfileMAI<VOP_V4F32_V4F16_V4F16_V4F32, AISrc_128_b32, ADst_128, AVSrc_64>;
+def VOPProfileMAI_F32_V4F16_X16 : VOPProfileMAI<VOP_V16F32_V4F16_V4F16_V16F32, AISrc_512_b32, ADst_512, AVSrc_64>;
+def VOPProfileMAI_F32_V4F16_X32 : VOPProfileMAI<VOP_V32F32_V4F16_V4F16_V32F32, AISrc_1024_b32, ADst_1024, AVSrc_64>;
+
+let Predicates = [HasMAIInsts] in {
+def V_ACCVGPR_READ_B32 : VOP3Inst<"v_accvgpr_read_b32", VOPProfileAccRead>;
+def V_ACCVGPR_WRITE_B32 : VOP3Inst<"v_accvgpr_write_b32", VOPProfileAccWrite> {
+ let isMoveImm = 1;
+}
+
+let isConvergent = 1 in {
+def V_MFMA_F32_4X4X1F32 : VOP3Inst<"v_mfma_f32_4x4x1f32", VOPProfileMAI_F32_F32_X4, int_amdgcn_mfma_f32_4x4x1f32>;
+def V_MFMA_F32_4X4X4F16 : VOP3Inst<"v_mfma_f32_4x4x4f16", VOPProfileMAI_F32_V4F16_X4, int_amdgcn_mfma_f32_4x4x4f16>;
+def V_MFMA_I32_4X4X4I8 : VOP3Inst<"v_mfma_i32_4x4x4i8", VOPProfileMAI_I32_I32_X4, int_amdgcn_mfma_i32_4x4x4i8>;
+def V_MFMA_F32_4X4X2BF16 : VOP3Inst<"v_mfma_f32_4x4x2bf16", VOPProfileMAI_F32_V2I16_X4, int_amdgcn_mfma_f32_4x4x2bf16>;
+def V_MFMA_F32_16X16X1F32 : VOP3Inst<"v_mfma_f32_16x16x1f32", VOPProfileMAI_F32_F32_X16, int_amdgcn_mfma_f32_16x16x1f32>;
+def V_MFMA_F32_16X16X4F32 : VOP3Inst<"v_mfma_f32_16x16x4f32", VOPProfileMAI_F32_F32_X4, int_amdgcn_mfma_f32_16x16x4f32>;
+def V_MFMA_F32_16X16X4F16 : VOP3Inst<"v_mfma_f32_16x16x4f16", VOPProfileMAI_F32_V4F16_X16, int_amdgcn_mfma_f32_16x16x4f16>;
+def V_MFMA_F32_16X16X16F16 : VOP3Inst<"v_mfma_f32_16x16x16f16", VOPProfileMAI_F32_V4F16_X4, int_amdgcn_mfma_f32_16x16x16f16>;
+def V_MFMA_I32_16X16X4I8 : VOP3Inst<"v_mfma_i32_16x16x4i8", VOPProfileMAI_I32_I32_X16, int_amdgcn_mfma_i32_16x16x4i8>;
+def V_MFMA_I32_16X16X16I8 : VOP3Inst<"v_mfma_i32_16x16x16i8", VOPProfileMAI_I32_I32_X4, int_amdgcn_mfma_i32_16x16x16i8>;
+def V_MFMA_F32_16X16X2BF16 : VOP3Inst<"v_mfma_f32_16x16x2bf16", VOPProfileMAI_F32_V2I16_X16, int_amdgcn_mfma_f32_16x16x2bf16>;
+def V_MFMA_F32_16X16X8BF16 : VOP3Inst<"v_mfma_f32_16x16x8bf16", VOPProfileMAI_F32_V2I16_X4, int_amdgcn_mfma_f32_16x16x8bf16>;
+def V_MFMA_F32_32X32X1F32 : VOP3Inst<"v_mfma_f32_32x32x1f32", VOPProfileMAI_F32_F32_X32, int_amdgcn_mfma_f32_32x32x1f32>;
+def V_MFMA_F32_32X32X2F32 : VOP3Inst<"v_mfma_f32_32x32x2f32", VOPProfileMAI_F32_F32_X16, int_amdgcn_mfma_f32_32x32x2f32>;
+def V_MFMA_F32_32X32X4F16 : VOP3Inst<"v_mfma_f32_32x32x4f16", VOPProfileMAI_F32_V4F16_X32, int_amdgcn_mfma_f32_32x32x4f16>;
+def V_MFMA_F32_32X32X8F16 : VOP3Inst<"v_mfma_f32_32x32x8f16", VOPProfileMAI_F32_V4F16_X16, int_amdgcn_mfma_f32_32x32x8f16>;
+def V_MFMA_I32_32X32X4I8 : VOP3Inst<"v_mfma_i32_32x32x4i8", VOPProfileMAI_I32_I32_X32, int_amdgcn_mfma_i32_32x32x4i8>;
+def V_MFMA_I32_32X32X8I8 : VOP3Inst<"v_mfma_i32_32x32x8i8", VOPProfileMAI_I32_I32_X16, int_amdgcn_mfma_i32_32x32x8i8>;
+def V_MFMA_F32_32X32X2BF16 : VOP3Inst<"v_mfma_f32_32x32x2bf16", VOPProfileMAI_F32_V2I16_X32, int_amdgcn_mfma_f32_32x32x2bf16>;
+def V_MFMA_F32_32X32X4BF16 : VOP3Inst<"v_mfma_f32_32x32x4bf16", VOPProfileMAI_F32_V2I16_X16, int_amdgcn_mfma_f32_32x32x4bf16>;
+} // End isConvergent = 1
+
+} // End SubtargetPredicate = HasMAIInsts
+
+def : MnemonicAlias<"v_accvgpr_read", "v_accvgpr_read_b32">;
+def : MnemonicAlias<"v_accvgpr_write", "v_accvgpr_write_b32">;
multiclass VOP3P_Real_vi<bits<10> op> {
def _vi : VOP3P_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.VI>,
VOP3Pe <op, !cast<VOP3_Pseudo>(NAME).Pfl> {
let AssemblerPredicates = [HasVOP3PInsts];
- let DecoderNamespace = "VI";
+ let DecoderNamespace = "GFX8";
+ }
+}
+
+multiclass VOP3P_Real_MAI<bits<10> op> {
+ def _vi : VOP3P_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.VI>,
+ VOP3Pe_MAI <op, !cast<VOP3_Pseudo>(NAME).Pfl> {
+ let AssemblerPredicates = [HasMAIInsts];
+ let DecoderNamespace = "GFX8";
}
}
@@ -352,14 +455,97 @@ defm V_FMA_MIXHI_F16 : VOP3P_Real_vi <0x3a2>;
}
-let SubtargetPredicate = HasDotInsts in {
+let SubtargetPredicate = HasDot2Insts in {
defm V_DOT2_F32_F16 : VOP3P_Real_vi <0x3a3>;
defm V_DOT2_I32_I16 : VOP3P_Real_vi <0x3a6>;
defm V_DOT2_U32_U16 : VOP3P_Real_vi <0x3a7>;
-defm V_DOT4_I32_I8 : VOP3P_Real_vi <0x3a8>;
defm V_DOT4_U32_U8 : VOP3P_Real_vi <0x3a9>;
-defm V_DOT8_I32_I4 : VOP3P_Real_vi <0x3aa>;
defm V_DOT8_U32_U4 : VOP3P_Real_vi <0x3ab>;
-} // End SubtargetPredicate = HasDotInsts
+} // End SubtargetPredicate = HasDot2Insts
+
+let SubtargetPredicate = HasDot1Insts in {
+
+defm V_DOT4_I32_I8 : VOP3P_Real_vi <0x3a8>;
+defm V_DOT8_I32_I4 : VOP3P_Real_vi <0x3aa>;
+
+} // End SubtargetPredicate = HasDot1Insts
+
+let SubtargetPredicate = HasMAIInsts in {
+
+defm V_ACCVGPR_READ_B32 : VOP3P_Real_MAI <0x3d8>;
+defm V_ACCVGPR_WRITE_B32 : VOP3P_Real_MAI <0x3d9>;
+defm V_MFMA_F32_32X32X1F32 : VOP3P_Real_MAI <0x3c0>;
+defm V_MFMA_F32_16X16X1F32 : VOP3P_Real_MAI <0x3c1>;
+defm V_MFMA_F32_4X4X1F32 : VOP3P_Real_MAI <0x3c2>;
+defm V_MFMA_F32_32X32X2F32 : VOP3P_Real_MAI <0x3c4>;
+defm V_MFMA_F32_16X16X4F32 : VOP3P_Real_MAI <0x3c5>;
+defm V_MFMA_F32_32X32X4F16 : VOP3P_Real_MAI <0x3c8>;
+defm V_MFMA_F32_16X16X4F16 : VOP3P_Real_MAI <0x3c9>;
+defm V_MFMA_F32_4X4X4F16 : VOP3P_Real_MAI <0x3ca>;
+defm V_MFMA_F32_32X32X8F16 : VOP3P_Real_MAI <0x3cc>;
+defm V_MFMA_F32_16X16X16F16 : VOP3P_Real_MAI <0x3cd>;
+defm V_MFMA_I32_32X32X4I8 : VOP3P_Real_MAI <0x3d0>;
+defm V_MFMA_I32_16X16X4I8 : VOP3P_Real_MAI <0x3d1>;
+defm V_MFMA_I32_4X4X4I8 : VOP3P_Real_MAI <0x3d2>;
+defm V_MFMA_I32_32X32X8I8 : VOP3P_Real_MAI <0x3d4>;
+defm V_MFMA_I32_16X16X16I8 : VOP3P_Real_MAI <0x3d5>;
+defm V_MFMA_F32_32X32X2BF16 : VOP3P_Real_MAI <0x3e8>;
+defm V_MFMA_F32_16X16X2BF16 : VOP3P_Real_MAI <0x3e9>;
+defm V_MFMA_F32_4X4X2BF16 : VOP3P_Real_MAI <0x3eb>;
+defm V_MFMA_F32_32X32X4BF16 : VOP3P_Real_MAI <0x3ec>;
+defm V_MFMA_F32_16X16X8BF16 : VOP3P_Real_MAI <0x3ed>;
+
+} // End SubtargetPredicate = HasMAIInsts
+
+//===----------------------------------------------------------------------===//
+// GFX10.
+//===----------------------------------------------------------------------===//
+
+let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
+ multiclass VOP3P_Real_gfx10<bits<10> op> {
+ def _gfx10 : VOP3P_Real<!cast<VOP3P_Pseudo>(NAME), SIEncodingFamily.GFX10>,
+ VOP3Pe_gfx10 <op, !cast<VOP3P_Pseudo>(NAME).Pfl>;
+ }
+} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10"
+
+defm V_PK_MAD_I16 : VOP3P_Real_gfx10<0x000>;
+defm V_PK_MUL_LO_U16 : VOP3P_Real_gfx10<0x001>;
+defm V_PK_ADD_I16 : VOP3P_Real_gfx10<0x002>;
+defm V_PK_SUB_I16 : VOP3P_Real_gfx10<0x003>;
+defm V_PK_LSHLREV_B16 : VOP3P_Real_gfx10<0x004>;
+defm V_PK_LSHRREV_B16 : VOP3P_Real_gfx10<0x005>;
+defm V_PK_ASHRREV_I16 : VOP3P_Real_gfx10<0x006>;
+defm V_PK_MAX_I16 : VOP3P_Real_gfx10<0x007>;
+defm V_PK_MIN_I16 : VOP3P_Real_gfx10<0x008>;
+defm V_PK_MAD_U16 : VOP3P_Real_gfx10<0x009>;
+defm V_PK_ADD_U16 : VOP3P_Real_gfx10<0x00a>;
+defm V_PK_SUB_U16 : VOP3P_Real_gfx10<0x00b>;
+defm V_PK_MAX_U16 : VOP3P_Real_gfx10<0x00c>;
+defm V_PK_MIN_U16 : VOP3P_Real_gfx10<0x00d>;
+defm V_PK_FMA_F16 : VOP3P_Real_gfx10<0x00e>;
+defm V_PK_ADD_F16 : VOP3P_Real_gfx10<0x00f>;
+defm V_PK_MUL_F16 : VOP3P_Real_gfx10<0x010>;
+defm V_PK_MIN_F16 : VOP3P_Real_gfx10<0x011>;
+defm V_PK_MAX_F16 : VOP3P_Real_gfx10<0x012>;
+defm V_FMA_MIX_F32 : VOP3P_Real_gfx10<0x020>;
+defm V_FMA_MIXLO_F16 : VOP3P_Real_gfx10<0x021>;
+defm V_FMA_MIXHI_F16 : VOP3P_Real_gfx10<0x022>;
+
+let SubtargetPredicate = HasDot2Insts in {
+
+defm V_DOT2_F32_F16 : VOP3P_Real_gfx10 <0x013>;
+defm V_DOT2_I32_I16 : VOP3P_Real_gfx10 <0x014>;
+defm V_DOT2_U32_U16 : VOP3P_Real_gfx10 <0x015>;
+defm V_DOT4_U32_U8 : VOP3P_Real_gfx10 <0x017>;
+defm V_DOT8_U32_U4 : VOP3P_Real_gfx10 <0x019>;
+
+} // End SubtargetPredicate = HasDot2Insts
+
+let SubtargetPredicate = HasDot1Insts in {
+
+defm V_DOT4_I32_I8 : VOP3P_Real_gfx10 <0x016>;
+defm V_DOT8_I32_I4 : VOP3P_Real_gfx10 <0x018>;
+
+} // End SubtargetPredicate = HasDot1Insts
diff --git a/lib/Target/AMDGPU/VOPCInstructions.td b/lib/Target/AMDGPU/VOPCInstructions.td
index 091cac8cd35c..b3513e383d10 100644
--- a/lib/Target/AMDGPU/VOPCInstructions.td
+++ b/lib/Target/AMDGPU/VOPCInstructions.td
@@ -1,9 +1,8 @@
//===-- VOPCInstructions.td - Vector Instruction Defintions ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -54,14 +53,29 @@ class VOPC_SDWA9e <bits<8> op, VOPProfile P> : VOP_SDWA9Be <P> {
// an explicit $dst.
class VOPC_Profile<list<SchedReadWrite> sched, ValueType vt0, ValueType vt1 = vt0> :
VOPProfile <[i1, vt0, vt1, untyped]> {
- let Asm32 = "vcc, $src0, $src1";
+ let Asm32 = "$src0, $src1";
// The destination for 32-bit encoding is implicit.
let HasDst32 = 0;
- let Outs64 = (outs VOPDstS64:$sdst);
+ let Outs64 = (outs VOPDstS64orS32:$sdst);
list<SchedReadWrite> Schedule = sched;
}
-class VOPC_Pseudo <string opName, VOPC_Profile P, list<dag> pattern=[]> :
+class VOPC_NoSdst_Profile<list<SchedReadWrite> sched, ValueType vt0,
+ ValueType vt1 = vt0> :
+ VOPC_Profile<sched, vt0, vt1> {
+ let Outs64 = (outs );
+ let OutsSDWA = (outs );
+ let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0,
+ Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1,
+ src0_sel:$src0_sel, src1_sel:$src1_sel);
+ let Asm64 = !if(isFloatType<Src0VT>.ret, "$src0_modifiers, $src1_modifiers$clamp",
+ "$src0, $src1");
+ let AsmSDWA9 = "$src0_modifiers, $src1_modifiers $src0_sel $src1_sel";
+ let EmitDst = 0;
+}
+
+class VOPC_Pseudo <string opName, VOPC_Profile P, list<dag> pattern=[],
+ bit DefVcc = 1> :
InstSI<(outs), P.Ins32, "", pattern>,
VOP <opName>,
SIMCInstr<opName#"_e32", SIEncodingFamily.NONE> {
@@ -81,9 +95,7 @@ class VOPC_Pseudo <string opName, VOPC_Profile P, list<dag> pattern=[]> :
let VALU = 1;
let VOPC = 1;
let Uses = [EXEC];
- let Defs = [VCC];
-
- let SubtargetPredicate = isGCN;
+ let Defs = !if(DefVcc, [VCC], []);
VOPProfile Pfl = P;
}
@@ -115,8 +127,9 @@ class VOPC_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
}
// This class is used only with VOPC instructions. Use $sdst for out operand
-class VOPCInstAlias <VOP3_Pseudo ps, Instruction inst, VOPProfile p = ps.Pfl> :
- InstAlias <ps.OpName#" "#p.Asm32, (inst)>, PredicateControl {
+class VOPCInstAlias <VOP3_Pseudo ps, Instruction inst,
+ string Asm32 = ps.Pfl.Asm32, VOPProfile p = ps.Pfl> :
+ InstAlias <ps.OpName#" "#Asm32, (inst)>, PredicateControl {
field bit isCompare;
field bit isCommutable;
@@ -149,6 +162,27 @@ class VOPCInstAlias <VOP3_Pseudo ps, Instruction inst, VOPProfile p = ps.Pfl> :
let SubtargetPredicate = AssemblerPredicate;
}
+multiclass VOPCInstAliases <string OpName, string Arch> {
+ def : VOPCInstAlias <!cast<VOP3_Pseudo>(OpName#"_e64"),
+ !cast<Instruction>(OpName#"_e32_"#Arch)>;
+ let WaveSizePredicate = isWave32 in {
+ def : VOPCInstAlias <!cast<VOP3_Pseudo>(OpName#"_e64"),
+ !cast<Instruction>(OpName#"_e32_"#Arch),
+ "vcc_lo, "#!cast<VOP3_Pseudo>(OpName#"_e64").Pfl.Asm32>;
+ }
+ let WaveSizePredicate = isWave64 in {
+ def : VOPCInstAlias <!cast<VOP3_Pseudo>(OpName#"_e64"),
+ !cast<Instruction>(OpName#"_e32_"#Arch),
+ "vcc, "#!cast<VOP3_Pseudo>(OpName#"_e64").Pfl.Asm32>;
+ }
+}
+
+multiclass VOPCXInstAliases <string OpName, string Arch> {
+ def : VOPCInstAlias <!cast<VOP3_Pseudo>(OpName#"_e64"),
+ !cast<Instruction>(OpName#"_e32_"#Arch)>;
+}
+
+
class getVOPCPat64 <PatLeaf cond, VOPProfile P> : LetDummies {
list<dag> ret = !if(P.HasModifiers,
[(set i1:$sdst,
@@ -161,6 +195,10 @@ class getVOPCPat64 <PatLeaf cond, VOPProfile P> : LetDummies {
[(set i1:$sdst, (setcc P.Src0VT:$src0, P.Src1VT:$src1, cond))]);
}
+class VCMPXNoSDstTable <bit has_sdst, string Name> {
+ bit HasSDst = has_sdst;
+ string NoSDstOp = Name;
+}
multiclass VOPC_Pseudos <string opName,
VOPC_Profile P,
@@ -169,7 +207,8 @@ multiclass VOPC_Pseudos <string opName,
bit DefExec = 0> {
def _e32 : VOPC_Pseudo <opName, P>,
- Commutable_REV<revOp#"_e32", !eq(revOp, opName)> {
+ Commutable_REV<revOp#"_e32", !eq(revOp, opName)>,
+ VCMPXNoSDstTable<1, opName#"_e32"> {
let Defs = !if(DefExec, [VCC, EXEC], [VCC]);
let SchedRW = P.Schedule;
let isConvergent = DefExec;
@@ -178,7 +217,8 @@ multiclass VOPC_Pseudos <string opName,
}
def _e64 : VOP3_Pseudo<opName, P, getVOPCPat64<cond, P>.ret>,
- Commutable_REV<revOp#"_e64", !eq(revOp, opName)> {
+ Commutable_REV<revOp#"_e64", !eq(revOp, opName)>,
+ VCMPXNoSDstTable<1, opName#"_e64"> {
let Defs = !if(DefExec, [EXEC], []);
let SchedRW = P.Schedule;
let isCompare = 1;
@@ -193,6 +233,44 @@ multiclass VOPC_Pseudos <string opName,
}
}
+let SubtargetPredicate = HasSdstCMPX in {
+multiclass VOPCX_Pseudos <string opName,
+ VOPC_Profile P, VOPC_Profile P_NoSDst,
+ PatLeaf cond = COND_NULL,
+ string revOp = opName> :
+ VOPC_Pseudos <opName, P, cond, revOp, 1> {
+
+ def _nosdst_e32 : VOPC_Pseudo <opName#"_nosdst", P_NoSDst, [], 0>,
+ Commutable_REV<revOp#"_nosdst_e32", !eq(revOp, opName)>,
+ VCMPXNoSDstTable<0, opName#"_e32"> {
+ let Defs = [EXEC];
+ let SchedRW = P_NoSDst.Schedule;
+ let isConvergent = 1;
+ let isCompare = 1;
+ let isCommutable = 1;
+ let SubtargetPredicate = HasNoSdstCMPX;
+ }
+
+ def _nosdst_e64 : VOP3_Pseudo<opName#"_nosdst", P_NoSDst>,
+ Commutable_REV<revOp#"_nosdst_e64", !eq(revOp, opName)>,
+ VCMPXNoSDstTable<0, opName#"_e64"> {
+ let Defs = [EXEC];
+ let SchedRW = P_NoSDst.Schedule;
+ let isCompare = 1;
+ let isCommutable = 1;
+ let SubtargetPredicate = HasNoSdstCMPX;
+ }
+
+ def _nosdst_sdwa : VOPC_SDWA_Pseudo <opName#"_nosdst", P_NoSDst> {
+ let Defs = [EXEC];
+ let SchedRW = P_NoSDst.Schedule;
+ let isConvergent = 1;
+ let isCompare = 1;
+ let SubtargetPredicate = HasNoSdstCMPX;
+ }
+}
+} // End SubtargetPredicate = HasSdstCMPX
+
def VOPC_I1_F16_F16 : VOPC_Profile<[Write32Bit], f16>;
def VOPC_I1_F32_F32 : VOPC_Profile<[Write32Bit], f32>;
def VOPC_I1_F64_F64 : VOPC_Profile<[WriteDoubleAdd], f64>;
@@ -200,6 +278,13 @@ def VOPC_I1_I16_I16 : VOPC_Profile<[Write32Bit], i16>;
def VOPC_I1_I32_I32 : VOPC_Profile<[Write32Bit], i32>;
def VOPC_I1_I64_I64 : VOPC_Profile<[Write64Bit], i64>;
+def VOPC_F16_F16 : VOPC_NoSdst_Profile<[Write32Bit], f16>;
+def VOPC_F32_F32 : VOPC_NoSdst_Profile<[Write32Bit], f32>;
+def VOPC_F64_F64 : VOPC_NoSdst_Profile<[Write64Bit], f64>;
+def VOPC_I16_I16 : VOPC_NoSdst_Profile<[Write32Bit], i16>;
+def VOPC_I32_I32 : VOPC_NoSdst_Profile<[Write32Bit], i32>;
+def VOPC_I64_I64 : VOPC_NoSdst_Profile<[Write64Bit], i64>;
+
multiclass VOPC_F16 <string opName, PatLeaf cond = COND_NULL, string revOp = opName> :
VOPC_Pseudos <opName, VOPC_I1_F16_F16, cond, revOp, 0>;
@@ -219,22 +304,22 @@ multiclass VOPC_I64 <string opName, PatLeaf cond = COND_NULL, string revOp = opN
VOPC_Pseudos <opName, VOPC_I1_I64_I64, cond, revOp, 0>;
multiclass VOPCX_F16 <string opName, string revOp = opName> :
- VOPC_Pseudos <opName, VOPC_I1_F16_F16, COND_NULL, revOp, 1>;
+ VOPCX_Pseudos <opName, VOPC_I1_F16_F16, VOPC_F16_F16, COND_NULL, revOp>;
multiclass VOPCX_F32 <string opName, string revOp = opName> :
- VOPC_Pseudos <opName, VOPC_I1_F32_F32, COND_NULL, revOp, 1>;
+ VOPCX_Pseudos <opName, VOPC_I1_F32_F32, VOPC_F32_F32, COND_NULL, revOp>;
multiclass VOPCX_F64 <string opName, string revOp = opName> :
- VOPC_Pseudos <opName, VOPC_I1_F64_F64, COND_NULL, revOp, 1>;
+ VOPCX_Pseudos <opName, VOPC_I1_F64_F64, VOPC_F64_F64, COND_NULL, revOp>;
multiclass VOPCX_I16 <string opName, string revOp = opName> :
- VOPC_Pseudos <opName, VOPC_I1_I16_I16, COND_NULL, revOp, 1>;
+ VOPCX_Pseudos <opName, VOPC_I1_I16_I16, VOPC_I16_I16, COND_NULL, revOp>;
multiclass VOPCX_I32 <string opName, string revOp = opName> :
- VOPC_Pseudos <opName, VOPC_I1_I32_I32, COND_NULL, revOp, 1>;
+ VOPCX_Pseudos <opName, VOPC_I1_I32_I32, VOPC_I32_I32, COND_NULL, revOp>;
multiclass VOPCX_I64 <string opName, string revOp = opName> :
- VOPC_Pseudos <opName, VOPC_I1_I64_I64, COND_NULL, revOp, 1>;
+ VOPCX_Pseudos <opName, VOPC_I1_I64_I64, VOPC_I64_I64, COND_NULL, revOp>;
//===----------------------------------------------------------------------===//
@@ -309,7 +394,7 @@ defm V_CMPX_NEQ_F64 : VOPCX_F64 <"v_cmpx_neq_f64">;
defm V_CMPX_NLT_F64 : VOPCX_F64 <"v_cmpx_nlt_f64">;
defm V_CMPX_TRU_F64 : VOPCX_F64 <"v_cmpx_tru_f64">;
-let SubtargetPredicate = isSICI in {
+let SubtargetPredicate = isGFX6GFX7 in {
defm V_CMPS_F_F32 : VOPC_F32 <"v_cmps_f_f32">;
defm V_CMPS_LT_F32 : VOPC_F32 <"v_cmps_lt_f32", COND_NULL, "v_cmps_gt_f32">;
@@ -379,7 +464,7 @@ defm V_CMPSX_NEQ_F64 : VOPCX_F64 <"v_cmpsx_neq_f64">;
defm V_CMPSX_NLT_F64 : VOPCX_F64 <"v_cmpsx_nlt_f64">;
defm V_CMPSX_TRU_F64 : VOPCX_F64 <"v_cmpsx_tru_f64">;
-} // End SubtargetPredicate = isSICI
+} // End SubtargetPredicate = isGFX6GFX7
let SubtargetPredicate = Has16BitInsts in {
@@ -546,6 +631,18 @@ class VOPC_Class_Profile<list<SchedReadWrite> sched, ValueType vt> :
let HasOMod = 0;
}
+class VOPC_Class_NoSdst_Profile<list<SchedReadWrite> sched, ValueType vt> :
+ VOPC_Class_Profile<sched, vt> {
+ let Outs64 = (outs );
+ let OutsSDWA = (outs );
+ let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0,
+ Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1,
+ src0_sel:$src0_sel, src1_sel:$src1_sel);
+ let Asm64 = "$src0_modifiers, $src1";
+ let AsmSDWA9 = "$src0_modifiers, $src1_modifiers $src0_sel $src1_sel";
+ let EmitDst = 0;
+}
+
class getVOPCClassPat64 <VOPProfile P> {
list<dag> ret =
[(set i1:$sdst,
@@ -556,46 +653,85 @@ class getVOPCClassPat64 <VOPProfile P> {
// Special case for class instructions which only have modifiers on
// the 1st source operand.
-multiclass VOPC_Class_Pseudos <string opName, VOPC_Profile p, bit DefExec> {
- def _e32 : VOPC_Pseudo <opName, p> {
- let Defs = !if(DefExec, [VCC, EXEC], [VCC]);
+multiclass VOPC_Class_Pseudos <string opName, VOPC_Profile p, bit DefExec,
+ bit DefVcc = 1> {
+ def _e32 : VOPC_Pseudo <opName, p>,
+ VCMPXNoSDstTable<1, opName#"_e32"> {
+ let Defs = !if(DefExec, !if(DefVcc, [VCC, EXEC], [EXEC]),
+ !if(DefVcc, [VCC], []));
let SchedRW = p.Schedule;
let isConvergent = DefExec;
}
- def _e64 : VOP3_Pseudo<opName, p, getVOPCClassPat64<p>.ret> {
+ def _e64 : VOP3_Pseudo<opName, p, getVOPCClassPat64<p>.ret>,
+ VCMPXNoSDstTable<1, opName#"_e64"> {
let Defs = !if(DefExec, [EXEC], []);
let SchedRW = p.Schedule;
}
def _sdwa : VOPC_SDWA_Pseudo <opName, p> {
- let Defs = !if(DefExec, [VCC, EXEC], [VCC]);
+ let Defs = !if(DefExec, !if(DefVcc, [VCC, EXEC], [EXEC]),
+ !if(DefVcc, [VCC], []));
let SchedRW = p.Schedule;
let isConvergent = DefExec;
}
}
+let SubtargetPredicate = HasSdstCMPX in {
+multiclass VOPCX_Class_Pseudos <string opName,
+ VOPC_Profile P,
+ VOPC_Profile P_NoSDst> :
+ VOPC_Class_Pseudos <opName, P, 1, 1> {
+
+ def _nosdst_e32 : VOPC_Pseudo <opName#"_nosdst", P_NoSDst, [], 0>,
+ VCMPXNoSDstTable<0, opName#"_e32"> {
+ let Defs = [EXEC];
+ let SchedRW = P_NoSDst.Schedule;
+ let isConvergent = 1;
+ let SubtargetPredicate = HasNoSdstCMPX;
+ }
+
+ def _nosdst_e64 : VOP3_Pseudo<opName#"_nosdst", P_NoSDst>,
+ VCMPXNoSDstTable<0, opName#"_e64"> {
+ let Defs = [EXEC];
+ let SchedRW = P_NoSDst.Schedule;
+ let SubtargetPredicate = HasNoSdstCMPX;
+ }
+
+ def _nosdst_sdwa : VOPC_SDWA_Pseudo <opName#"_nosdst", P_NoSDst> {
+ let Defs = [EXEC];
+ let SchedRW = P_NoSDst.Schedule;
+ let isConvergent = 1;
+ let SubtargetPredicate = HasNoSdstCMPX;
+ }
+}
+} // End SubtargetPredicate = HasSdstCMPX
+
def VOPC_I1_F16_I32 : VOPC_Class_Profile<[Write32Bit], f16>;
def VOPC_I1_F32_I32 : VOPC_Class_Profile<[Write32Bit], f32>;
def VOPC_I1_F64_I32 : VOPC_Class_Profile<[WriteDoubleAdd], f64>;
+def VOPC_F16_I32 : VOPC_Class_NoSdst_Profile<[Write32Bit], f16>;
+def VOPC_F32_I32 : VOPC_Class_NoSdst_Profile<[Write32Bit], f32>;
+def VOPC_F64_I32 : VOPC_Class_NoSdst_Profile<[Write64Bit], f64>;
+
multiclass VOPC_CLASS_F16 <string opName> :
VOPC_Class_Pseudos <opName, VOPC_I1_F16_I32, 0>;
multiclass VOPCX_CLASS_F16 <string opName> :
- VOPC_Class_Pseudos <opName, VOPC_I1_F16_I32, 1>;
+ VOPCX_Class_Pseudos <opName, VOPC_I1_F16_I32, VOPC_F16_I32>;
multiclass VOPC_CLASS_F32 <string opName> :
VOPC_Class_Pseudos <opName, VOPC_I1_F32_I32, 0>;
multiclass VOPCX_CLASS_F32 <string opName> :
- VOPC_Class_Pseudos <opName, VOPC_I1_F32_I32, 1>;
+ VOPCX_Class_Pseudos <opName, VOPC_I1_F32_I32, VOPC_F32_I32>;
multiclass VOPC_CLASS_F64 <string opName> :
VOPC_Class_Pseudos <opName, VOPC_I1_F64_I32, 0>;
multiclass VOPCX_CLASS_F64 <string opName> :
- VOPC_Class_Pseudos <opName, VOPC_I1_F64_I32, 1>;
+ VOPCX_Class_Pseudos <opName, VOPC_I1_F64_I32, VOPC_F64_I32>;
defm V_CMP_CLASS_F32 : VOPC_CLASS_F32 <"v_cmp_class_f32">;
defm V_CMPX_CLASS_F32 : VOPCX_CLASS_F32 <"v_cmpx_class_f32">;
@@ -608,342 +744,471 @@ defm V_CMPX_CLASS_F16 : VOPCX_CLASS_F16 <"v_cmpx_class_f16">;
// V_ICMPIntrinsic Pattern.
//===----------------------------------------------------------------------===//
-class ICMP_Pattern <PatLeaf cond, Instruction inst, ValueType vt> : GCNPat <
- (AMDGPUsetcc vt:$src0, vt:$src1, cond),
- (inst $src0, $src1)
->;
-
-def : ICMP_Pattern <COND_EQ, V_CMP_EQ_U32_e64, i32>;
-def : ICMP_Pattern <COND_NE, V_CMP_NE_U32_e64, i32>;
-def : ICMP_Pattern <COND_UGT, V_CMP_GT_U32_e64, i32>;
-def : ICMP_Pattern <COND_UGE, V_CMP_GE_U32_e64, i32>;
-def : ICMP_Pattern <COND_ULT, V_CMP_LT_U32_e64, i32>;
-def : ICMP_Pattern <COND_ULE, V_CMP_LE_U32_e64, i32>;
-def : ICMP_Pattern <COND_SGT, V_CMP_GT_I32_e64, i32>;
-def : ICMP_Pattern <COND_SGE, V_CMP_GE_I32_e64, i32>;
-def : ICMP_Pattern <COND_SLT, V_CMP_LT_I32_e64, i32>;
-def : ICMP_Pattern <COND_SLE, V_CMP_LE_I32_e64, i32>;
-
-def : ICMP_Pattern <COND_EQ, V_CMP_EQ_U64_e64, i64>;
-def : ICMP_Pattern <COND_NE, V_CMP_NE_U64_e64, i64>;
-def : ICMP_Pattern <COND_UGT, V_CMP_GT_U64_e64, i64>;
-def : ICMP_Pattern <COND_UGE, V_CMP_GE_U64_e64, i64>;
-def : ICMP_Pattern <COND_ULT, V_CMP_LT_U64_e64, i64>;
-def : ICMP_Pattern <COND_ULE, V_CMP_LE_U64_e64, i64>;
-def : ICMP_Pattern <COND_SGT, V_CMP_GT_I64_e64, i64>;
-def : ICMP_Pattern <COND_SGE, V_CMP_GE_I64_e64, i64>;
-def : ICMP_Pattern <COND_SLT, V_CMP_LT_I64_e64, i64>;
-def : ICMP_Pattern <COND_SLE, V_CMP_LE_I64_e64, i64>;
-
-def : ICMP_Pattern <COND_EQ, V_CMP_EQ_U16_e64, i16>;
-def : ICMP_Pattern <COND_NE, V_CMP_NE_U16_e64, i16>;
-def : ICMP_Pattern <COND_UGT, V_CMP_GT_U16_e64, i16>;
-def : ICMP_Pattern <COND_UGE, V_CMP_GE_U16_e64, i16>;
-def : ICMP_Pattern <COND_ULT, V_CMP_LT_U16_e64, i16>;
-def : ICMP_Pattern <COND_ULE, V_CMP_LE_U16_e64, i16>;
-def : ICMP_Pattern <COND_SGT, V_CMP_GT_I16_e64, i16>;
-def : ICMP_Pattern <COND_SGE, V_CMP_GE_I16_e64, i16>;
-def : ICMP_Pattern <COND_SLT, V_CMP_LT_I16_e64, i16>;
-def : ICMP_Pattern <COND_SLE, V_CMP_LE_I16_e64, i16>;
-
-class FCMP_Pattern <PatLeaf cond, Instruction inst, ValueType vt> : GCNPat <
- (i64 (AMDGPUsetcc (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)),
- (vt (VOP3Mods vt:$src1, i32:$src1_modifiers)), cond)),
- (inst $src0_modifiers, $src0, $src1_modifiers, $src1,
- DSTCLAMP.NONE)
->;
-
-def : FCMP_Pattern <COND_OEQ, V_CMP_EQ_F32_e64, f32>;
-def : FCMP_Pattern <COND_ONE, V_CMP_NEQ_F32_e64, f32>;
-def : FCMP_Pattern <COND_OGT, V_CMP_GT_F32_e64, f32>;
-def : FCMP_Pattern <COND_OGE, V_CMP_GE_F32_e64, f32>;
-def : FCMP_Pattern <COND_OLT, V_CMP_LT_F32_e64, f32>;
-def : FCMP_Pattern <COND_OLE, V_CMP_LE_F32_e64, f32>;
-
-def : FCMP_Pattern <COND_OEQ, V_CMP_EQ_F64_e64, f64>;
-def : FCMP_Pattern <COND_ONE, V_CMP_NEQ_F64_e64, f64>;
-def : FCMP_Pattern <COND_OGT, V_CMP_GT_F64_e64, f64>;
-def : FCMP_Pattern <COND_OGE, V_CMP_GE_F64_e64, f64>;
-def : FCMP_Pattern <COND_OLT, V_CMP_LT_F64_e64, f64>;
-def : FCMP_Pattern <COND_OLE, V_CMP_LE_F64_e64, f64>;
-
-def : FCMP_Pattern <COND_OEQ, V_CMP_EQ_F16_e64, f16>;
-def : FCMP_Pattern <COND_ONE, V_CMP_NEQ_F16_e64, f16>;
-def : FCMP_Pattern <COND_OGT, V_CMP_GT_F16_e64, f16>;
-def : FCMP_Pattern <COND_OGE, V_CMP_GE_F16_e64, f16>;
-def : FCMP_Pattern <COND_OLT, V_CMP_LT_F16_e64, f16>;
-def : FCMP_Pattern <COND_OLE, V_CMP_LE_F16_e64, f16>;
-
-
-def : FCMP_Pattern <COND_UEQ, V_CMP_NLG_F32_e64, f32>;
-def : FCMP_Pattern <COND_UNE, V_CMP_NEQ_F32_e64, f32>;
-def : FCMP_Pattern <COND_UGT, V_CMP_NLE_F32_e64, f32>;
-def : FCMP_Pattern <COND_UGE, V_CMP_NLT_F32_e64, f32>;
-def : FCMP_Pattern <COND_ULT, V_CMP_NGE_F32_e64, f32>;
-def : FCMP_Pattern <COND_ULE, V_CMP_NGT_F32_e64, f32>;
-
-def : FCMP_Pattern <COND_UEQ, V_CMP_NLG_F64_e64, f64>;
-def : FCMP_Pattern <COND_UNE, V_CMP_NEQ_F64_e64, f64>;
-def : FCMP_Pattern <COND_UGT, V_CMP_NLE_F64_e64, f64>;
-def : FCMP_Pattern <COND_UGE, V_CMP_NLT_F64_e64, f64>;
-def : FCMP_Pattern <COND_ULT, V_CMP_NGE_F64_e64, f64>;
-def : FCMP_Pattern <COND_ULE, V_CMP_NGT_F64_e64, f64>;
-
-def : FCMP_Pattern <COND_UEQ, V_CMP_NLG_F16_e64, f16>;
-def : FCMP_Pattern <COND_UNE, V_CMP_NEQ_F16_e64, f16>;
-def : FCMP_Pattern <COND_UGT, V_CMP_NLE_F16_e64, f16>;
-def : FCMP_Pattern <COND_UGE, V_CMP_NLT_F16_e64, f16>;
-def : FCMP_Pattern <COND_ULT, V_CMP_NGE_F16_e64, f16>;
-def : FCMP_Pattern <COND_ULE, V_CMP_NGT_F16_e64, f16>;
+// We need to use COPY_TO_REGCLASS to w/a the problem when ReplaceAllUsesWith()
+// complaints it cannot replace i1 <-> i64/i32 if node was not morphed in place.
+multiclass ICMP_Pattern <PatLeaf cond, Instruction inst, ValueType vt> {
+ let WaveSizePredicate = isWave64 in
+ def : GCNPat <
+ (i64 (AMDGPUsetcc vt:$src0, vt:$src1, cond)),
+ (i64 (COPY_TO_REGCLASS (inst $src0, $src1), SReg_64))
+ >;
+
+ let WaveSizePredicate = isWave32 in
+ def : GCNPat <
+ (i32 (AMDGPUsetcc vt:$src0, vt:$src1, cond)),
+ (i32 (COPY_TO_REGCLASS (inst $src0, $src1), SReg_32))
+ >;
+}
+
+defm : ICMP_Pattern <COND_EQ, V_CMP_EQ_U32_e64, i32>;
+defm : ICMP_Pattern <COND_NE, V_CMP_NE_U32_e64, i32>;
+defm : ICMP_Pattern <COND_UGT, V_CMP_GT_U32_e64, i32>;
+defm : ICMP_Pattern <COND_UGE, V_CMP_GE_U32_e64, i32>;
+defm : ICMP_Pattern <COND_ULT, V_CMP_LT_U32_e64, i32>;
+defm : ICMP_Pattern <COND_ULE, V_CMP_LE_U32_e64, i32>;
+defm : ICMP_Pattern <COND_SGT, V_CMP_GT_I32_e64, i32>;
+defm : ICMP_Pattern <COND_SGE, V_CMP_GE_I32_e64, i32>;
+defm : ICMP_Pattern <COND_SLT, V_CMP_LT_I32_e64, i32>;
+defm : ICMP_Pattern <COND_SLE, V_CMP_LE_I32_e64, i32>;
+
+defm : ICMP_Pattern <COND_EQ, V_CMP_EQ_U64_e64, i64>;
+defm : ICMP_Pattern <COND_NE, V_CMP_NE_U64_e64, i64>;
+defm : ICMP_Pattern <COND_UGT, V_CMP_GT_U64_e64, i64>;
+defm : ICMP_Pattern <COND_UGE, V_CMP_GE_U64_e64, i64>;
+defm : ICMP_Pattern <COND_ULT, V_CMP_LT_U64_e64, i64>;
+defm : ICMP_Pattern <COND_ULE, V_CMP_LE_U64_e64, i64>;
+defm : ICMP_Pattern <COND_SGT, V_CMP_GT_I64_e64, i64>;
+defm : ICMP_Pattern <COND_SGE, V_CMP_GE_I64_e64, i64>;
+defm : ICMP_Pattern <COND_SLT, V_CMP_LT_I64_e64, i64>;
+defm : ICMP_Pattern <COND_SLE, V_CMP_LE_I64_e64, i64>;
+
+defm : ICMP_Pattern <COND_EQ, V_CMP_EQ_U16_e64, i16>;
+defm : ICMP_Pattern <COND_NE, V_CMP_NE_U16_e64, i16>;
+defm : ICMP_Pattern <COND_UGT, V_CMP_GT_U16_e64, i16>;
+defm : ICMP_Pattern <COND_UGE, V_CMP_GE_U16_e64, i16>;
+defm : ICMP_Pattern <COND_ULT, V_CMP_LT_U16_e64, i16>;
+defm : ICMP_Pattern <COND_ULE, V_CMP_LE_U16_e64, i16>;
+defm : ICMP_Pattern <COND_SGT, V_CMP_GT_I16_e64, i16>;
+defm : ICMP_Pattern <COND_SGE, V_CMP_GE_I16_e64, i16>;
+defm : ICMP_Pattern <COND_SLT, V_CMP_LT_I16_e64, i16>;
+defm : ICMP_Pattern <COND_SLE, V_CMP_LE_I16_e64, i16>;
+
+multiclass FCMP_Pattern <PatLeaf cond, Instruction inst, ValueType vt> {
+ let WaveSizePredicate = isWave64 in
+ def : GCNPat <
+ (i64 (AMDGPUsetcc (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)),
+ (vt (VOP3Mods vt:$src1, i32:$src1_modifiers)), cond)),
+ (i64 (COPY_TO_REGCLASS (inst $src0_modifiers, $src0, $src1_modifiers, $src1,
+ DSTCLAMP.NONE), SReg_64))
+ >;
+
+ let WaveSizePredicate = isWave32 in
+ def : GCNPat <
+ (i32 (AMDGPUsetcc (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)),
+ (vt (VOP3Mods vt:$src1, i32:$src1_modifiers)), cond)),
+ (i32 (COPY_TO_REGCLASS (inst $src0_modifiers, $src0, $src1_modifiers, $src1,
+ DSTCLAMP.NONE), SReg_32))
+ >;
+}
+
+defm : FCMP_Pattern <COND_OEQ, V_CMP_EQ_F32_e64, f32>;
+defm : FCMP_Pattern <COND_ONE, V_CMP_NEQ_F32_e64, f32>;
+defm : FCMP_Pattern <COND_OGT, V_CMP_GT_F32_e64, f32>;
+defm : FCMP_Pattern <COND_OGE, V_CMP_GE_F32_e64, f32>;
+defm : FCMP_Pattern <COND_OLT, V_CMP_LT_F32_e64, f32>;
+defm : FCMP_Pattern <COND_OLE, V_CMP_LE_F32_e64, f32>;
+
+defm : FCMP_Pattern <COND_OEQ, V_CMP_EQ_F64_e64, f64>;
+defm : FCMP_Pattern <COND_ONE, V_CMP_NEQ_F64_e64, f64>;
+defm : FCMP_Pattern <COND_OGT, V_CMP_GT_F64_e64, f64>;
+defm : FCMP_Pattern <COND_OGE, V_CMP_GE_F64_e64, f64>;
+defm : FCMP_Pattern <COND_OLT, V_CMP_LT_F64_e64, f64>;
+defm : FCMP_Pattern <COND_OLE, V_CMP_LE_F64_e64, f64>;
+
+defm : FCMP_Pattern <COND_OEQ, V_CMP_EQ_F16_e64, f16>;
+defm : FCMP_Pattern <COND_ONE, V_CMP_NEQ_F16_e64, f16>;
+defm : FCMP_Pattern <COND_OGT, V_CMP_GT_F16_e64, f16>;
+defm : FCMP_Pattern <COND_OGE, V_CMP_GE_F16_e64, f16>;
+defm : FCMP_Pattern <COND_OLT, V_CMP_LT_F16_e64, f16>;
+defm : FCMP_Pattern <COND_OLE, V_CMP_LE_F16_e64, f16>;
+
+
+defm : FCMP_Pattern <COND_UEQ, V_CMP_NLG_F32_e64, f32>;
+defm : FCMP_Pattern <COND_UNE, V_CMP_NEQ_F32_e64, f32>;
+defm : FCMP_Pattern <COND_UGT, V_CMP_NLE_F32_e64, f32>;
+defm : FCMP_Pattern <COND_UGE, V_CMP_NLT_F32_e64, f32>;
+defm : FCMP_Pattern <COND_ULT, V_CMP_NGE_F32_e64, f32>;
+defm : FCMP_Pattern <COND_ULE, V_CMP_NGT_F32_e64, f32>;
+
+defm : FCMP_Pattern <COND_UEQ, V_CMP_NLG_F64_e64, f64>;
+defm : FCMP_Pattern <COND_UNE, V_CMP_NEQ_F64_e64, f64>;
+defm : FCMP_Pattern <COND_UGT, V_CMP_NLE_F64_e64, f64>;
+defm : FCMP_Pattern <COND_UGE, V_CMP_NLT_F64_e64, f64>;
+defm : FCMP_Pattern <COND_ULT, V_CMP_NGE_F64_e64, f64>;
+defm : FCMP_Pattern <COND_ULE, V_CMP_NGT_F64_e64, f64>;
+
+defm : FCMP_Pattern <COND_UEQ, V_CMP_NLG_F16_e64, f16>;
+defm : FCMP_Pattern <COND_UNE, V_CMP_NEQ_F16_e64, f16>;
+defm : FCMP_Pattern <COND_UGT, V_CMP_NLE_F16_e64, f16>;
+defm : FCMP_Pattern <COND_UGE, V_CMP_NLT_F16_e64, f16>;
+defm : FCMP_Pattern <COND_ULT, V_CMP_NGE_F16_e64, f16>;
+defm : FCMP_Pattern <COND_ULE, V_CMP_NGT_F16_e64, f16>;
//===----------------------------------------------------------------------===//
-// Target
+// Target-specific instruction encodings.
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
-// SI
+// GFX10.
//===----------------------------------------------------------------------===//
-multiclass VOPC_Real_si <bits<9> op> {
- let AssemblerPredicates = [isSICI], DecoderNamespace = "SICI" in {
- def _e32_si :
- VOPC_Real<!cast<VOPC_Pseudo>(NAME#"_e32"), SIEncodingFamily.SI>,
- VOPCe<op{7-0}>;
-
- def _e64_si :
- VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>,
- VOP3a_si <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> {
- // Encoding used for VOPC instructions encoded as VOP3
- // Differs from VOP3e by destination name (sdst) as VOPC doesn't have vector dst
- bits<8> sdst;
- let Inst{7-0} = sdst;
- }
+let AssemblerPredicate = isGFX10Plus in {
+ multiclass VOPC_Real_gfx10<bits<9> op> {
+ let DecoderNamespace = "GFX10" in {
+ def _e32_gfx10 :
+ VOPC_Real<!cast<VOPC_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX10>,
+ VOPCe<op{7-0}>;
+ def _e64_gfx10 :
+ VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>,
+ VOP3a_gfx10<{0, op}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> {
+ // Encoding used for VOPC instructions encoded as VOP3 differs from
+ // VOP3e by destination name (sdst) as VOPC doesn't have vector dst.
+ bits<8> sdst;
+ let Inst{7-0} = sdst;
+ }
+ } // End DecoderNamespace = "GFX10"
+
+ def _sdwa_gfx10 :
+ VOP_SDWA10_Real<!cast<VOPC_SDWA_Pseudo>(NAME#"_sdwa")>,
+ VOPC_SDWA9e<op{7-0}, !cast<VOPC_SDWA_Pseudo>(NAME#"_sdwa").Pfl>;
+
+ defm : VOPCInstAliases<NAME, "gfx10">;
}
- def : VOPCInstAlias <!cast<VOP3_Pseudo>(NAME#"_e64"),
- !cast<Instruction>(NAME#"_e32_si")> {
- let AssemblerPredicate = isSICI;
+
+ multiclass VOPCX_Real_gfx10<bits<9> op> {
+ let DecoderNamespace = "GFX10" in {
+ def _e32_gfx10 :
+ VOPC_Real<!cast<VOPC_Pseudo>(NAME#"_nosdst_e32"), SIEncodingFamily.GFX10>,
+ VOPCe<op{7-0}> {
+ let AsmString = !subst("_nosdst", "", !cast<VOPC_Pseudo>(NAME#"_nosdst_e32").PseudoInstr)
+ # " " # !cast<VOPC_Pseudo>(NAME#"_nosdst_e32").AsmOperands;
+ }
+
+ def _e64_gfx10 :
+ VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_nosdst_e64"), SIEncodingFamily.GFX10>,
+ VOP3a_gfx10<{0, op}, !cast<VOP3_Pseudo>(NAME#"_nosdst_e64").Pfl> {
+ let Inst{7-0} = ?; // sdst
+ let AsmString = !subst("_nosdst", "", !cast<VOP3_Pseudo>(NAME#"_nosdst_e64").Mnemonic)
+ # "{_e64} " # !cast<VOP3_Pseudo>(NAME#"_nosdst_e64").AsmOperands;
+ }
+ } // End DecoderNamespace = "GFX10"
+
+ def _sdwa_gfx10 :
+ VOP_SDWA10_Real<!cast<VOPC_SDWA_Pseudo>(NAME#"_nosdst_sdwa")>,
+ VOPC_SDWA9e<op{7-0}, !cast<VOPC_SDWA_Pseudo>(NAME#"_nosdst_sdwa").Pfl> {
+ let AsmString = !subst("_nosdst", "", !cast<VOPC_SDWA_Pseudo>(NAME#"_nosdst_sdwa").Mnemonic)
+ # "{_sdwa} " # !cast<VOPC_SDWA_Pseudo>(NAME#"_nosdst_sdwa").AsmOperands9;
+ }
+
+ defm : VOPCXInstAliases<NAME, "gfx10">;
}
-}
+} // End AssemblerPredicate = isGFX10Plus
+
+defm V_CMP_LT_I16 : VOPC_Real_gfx10<0x089>;
+defm V_CMP_EQ_I16 : VOPC_Real_gfx10<0x08a>;
+defm V_CMP_LE_I16 : VOPC_Real_gfx10<0x08b>;
+defm V_CMP_GT_I16 : VOPC_Real_gfx10<0x08c>;
+defm V_CMP_NE_I16 : VOPC_Real_gfx10<0x08d>;
+defm V_CMP_GE_I16 : VOPC_Real_gfx10<0x08e>;
+defm V_CMP_CLASS_F16 : VOPC_Real_gfx10<0x08f>;
+defm V_CMPX_LT_I16 : VOPCX_Real_gfx10<0x099>;
+defm V_CMPX_EQ_I16 : VOPCX_Real_gfx10<0x09a>;
+defm V_CMPX_LE_I16 : VOPCX_Real_gfx10<0x09b>;
+defm V_CMPX_GT_I16 : VOPCX_Real_gfx10<0x09c>;
+defm V_CMPX_NE_I16 : VOPCX_Real_gfx10<0x09d>;
+defm V_CMPX_GE_I16 : VOPCX_Real_gfx10<0x09e>;
+defm V_CMPX_CLASS_F16 : VOPCX_Real_gfx10<0x09f>;
+defm V_CMP_LT_U16 : VOPC_Real_gfx10<0x0a9>;
+defm V_CMP_EQ_U16 : VOPC_Real_gfx10<0x0aa>;
+defm V_CMP_LE_U16 : VOPC_Real_gfx10<0x0ab>;
+defm V_CMP_GT_U16 : VOPC_Real_gfx10<0x0ac>;
+defm V_CMP_NE_U16 : VOPC_Real_gfx10<0x0ad>;
+defm V_CMP_GE_U16 : VOPC_Real_gfx10<0x0ae>;
+defm V_CMPX_LT_U16 : VOPCX_Real_gfx10<0x0b9>;
+defm V_CMPX_EQ_U16 : VOPCX_Real_gfx10<0x0ba>;
+defm V_CMPX_LE_U16 : VOPCX_Real_gfx10<0x0bb>;
+defm V_CMPX_GT_U16 : VOPCX_Real_gfx10<0x0bc>;
+defm V_CMPX_NE_U16 : VOPCX_Real_gfx10<0x0bd>;
+defm V_CMPX_GE_U16 : VOPCX_Real_gfx10<0x0be>;
+defm V_CMP_F_F16 : VOPC_Real_gfx10<0x0c8>;
+defm V_CMP_LT_F16 : VOPC_Real_gfx10<0x0c9>;
+defm V_CMP_EQ_F16 : VOPC_Real_gfx10<0x0ca>;
+defm V_CMP_LE_F16 : VOPC_Real_gfx10<0x0cb>;
+defm V_CMP_GT_F16 : VOPC_Real_gfx10<0x0cc>;
+defm V_CMP_LG_F16 : VOPC_Real_gfx10<0x0cd>;
+defm V_CMP_GE_F16 : VOPC_Real_gfx10<0x0ce>;
+defm V_CMP_O_F16 : VOPC_Real_gfx10<0x0cf>;
+defm V_CMPX_F_F16 : VOPCX_Real_gfx10<0x0d8>;
+defm V_CMPX_LT_F16 : VOPCX_Real_gfx10<0x0d9>;
+defm V_CMPX_EQ_F16 : VOPCX_Real_gfx10<0x0da>;
+defm V_CMPX_LE_F16 : VOPCX_Real_gfx10<0x0db>;
+defm V_CMPX_GT_F16 : VOPCX_Real_gfx10<0x0dc>;
+defm V_CMPX_LG_F16 : VOPCX_Real_gfx10<0x0dd>;
+defm V_CMPX_GE_F16 : VOPCX_Real_gfx10<0x0de>;
+defm V_CMPX_O_F16 : VOPCX_Real_gfx10<0x0df>;
+defm V_CMP_U_F16 : VOPC_Real_gfx10<0x0e8>;
+defm V_CMP_NGE_F16 : VOPC_Real_gfx10<0x0e9>;
+defm V_CMP_NLG_F16 : VOPC_Real_gfx10<0x0ea>;
+defm V_CMP_NGT_F16 : VOPC_Real_gfx10<0x0eb>;
+defm V_CMP_NLE_F16 : VOPC_Real_gfx10<0x0ec>;
+defm V_CMP_NEQ_F16 : VOPC_Real_gfx10<0x0ed>;
+defm V_CMP_NLT_F16 : VOPC_Real_gfx10<0x0ee>;
+defm V_CMP_TRU_F16 : VOPC_Real_gfx10<0x0ef>;
+defm V_CMPX_U_F16 : VOPCX_Real_gfx10<0x0f8>;
+defm V_CMPX_NGE_F16 : VOPCX_Real_gfx10<0x0f9>;
+defm V_CMPX_NLG_F16 : VOPCX_Real_gfx10<0x0fa>;
+defm V_CMPX_NGT_F16 : VOPCX_Real_gfx10<0x0fb>;
+defm V_CMPX_NLE_F16 : VOPCX_Real_gfx10<0x0fc>;
+defm V_CMPX_NEQ_F16 : VOPCX_Real_gfx10<0x0fd>;
+defm V_CMPX_NLT_F16 : VOPCX_Real_gfx10<0x0fe>;
+defm V_CMPX_TRU_F16 : VOPCX_Real_gfx10<0x0ff>;
-defm V_CMP_F_F32 : VOPC_Real_si <0x0>;
-defm V_CMP_LT_F32 : VOPC_Real_si <0x1>;
-defm V_CMP_EQ_F32 : VOPC_Real_si <0x2>;
-defm V_CMP_LE_F32 : VOPC_Real_si <0x3>;
-defm V_CMP_GT_F32 : VOPC_Real_si <0x4>;
-defm V_CMP_LG_F32 : VOPC_Real_si <0x5>;
-defm V_CMP_GE_F32 : VOPC_Real_si <0x6>;
-defm V_CMP_O_F32 : VOPC_Real_si <0x7>;
-defm V_CMP_U_F32 : VOPC_Real_si <0x8>;
-defm V_CMP_NGE_F32 : VOPC_Real_si <0x9>;
-defm V_CMP_NLG_F32 : VOPC_Real_si <0xa>;
-defm V_CMP_NGT_F32 : VOPC_Real_si <0xb>;
-defm V_CMP_NLE_F32 : VOPC_Real_si <0xc>;
-defm V_CMP_NEQ_F32 : VOPC_Real_si <0xd>;
-defm V_CMP_NLT_F32 : VOPC_Real_si <0xe>;
-defm V_CMP_TRU_F32 : VOPC_Real_si <0xf>;
-
-defm V_CMPX_F_F32 : VOPC_Real_si <0x10>;
-defm V_CMPX_LT_F32 : VOPC_Real_si <0x11>;
-defm V_CMPX_EQ_F32 : VOPC_Real_si <0x12>;
-defm V_CMPX_LE_F32 : VOPC_Real_si <0x13>;
-defm V_CMPX_GT_F32 : VOPC_Real_si <0x14>;
-defm V_CMPX_LG_F32 : VOPC_Real_si <0x15>;
-defm V_CMPX_GE_F32 : VOPC_Real_si <0x16>;
-defm V_CMPX_O_F32 : VOPC_Real_si <0x17>;
-defm V_CMPX_U_F32 : VOPC_Real_si <0x18>;
-defm V_CMPX_NGE_F32 : VOPC_Real_si <0x19>;
-defm V_CMPX_NLG_F32 : VOPC_Real_si <0x1a>;
-defm V_CMPX_NGT_F32 : VOPC_Real_si <0x1b>;
-defm V_CMPX_NLE_F32 : VOPC_Real_si <0x1c>;
-defm V_CMPX_NEQ_F32 : VOPC_Real_si <0x1d>;
-defm V_CMPX_NLT_F32 : VOPC_Real_si <0x1e>;
-defm V_CMPX_TRU_F32 : VOPC_Real_si <0x1f>;
-
-defm V_CMP_F_F64 : VOPC_Real_si <0x20>;
-defm V_CMP_LT_F64 : VOPC_Real_si <0x21>;
-defm V_CMP_EQ_F64 : VOPC_Real_si <0x22>;
-defm V_CMP_LE_F64 : VOPC_Real_si <0x23>;
-defm V_CMP_GT_F64 : VOPC_Real_si <0x24>;
-defm V_CMP_LG_F64 : VOPC_Real_si <0x25>;
-defm V_CMP_GE_F64 : VOPC_Real_si <0x26>;
-defm V_CMP_O_F64 : VOPC_Real_si <0x27>;
-defm V_CMP_U_F64 : VOPC_Real_si <0x28>;
-defm V_CMP_NGE_F64 : VOPC_Real_si <0x29>;
-defm V_CMP_NLG_F64 : VOPC_Real_si <0x2a>;
-defm V_CMP_NGT_F64 : VOPC_Real_si <0x2b>;
-defm V_CMP_NLE_F64 : VOPC_Real_si <0x2c>;
-defm V_CMP_NEQ_F64 : VOPC_Real_si <0x2d>;
-defm V_CMP_NLT_F64 : VOPC_Real_si <0x2e>;
-defm V_CMP_TRU_F64 : VOPC_Real_si <0x2f>;
-
-defm V_CMPX_F_F64 : VOPC_Real_si <0x30>;
-defm V_CMPX_LT_F64 : VOPC_Real_si <0x31>;
-defm V_CMPX_EQ_F64 : VOPC_Real_si <0x32>;
-defm V_CMPX_LE_F64 : VOPC_Real_si <0x33>;
-defm V_CMPX_GT_F64 : VOPC_Real_si <0x34>;
-defm V_CMPX_LG_F64 : VOPC_Real_si <0x35>;
-defm V_CMPX_GE_F64 : VOPC_Real_si <0x36>;
-defm V_CMPX_O_F64 : VOPC_Real_si <0x37>;
-defm V_CMPX_U_F64 : VOPC_Real_si <0x38>;
-defm V_CMPX_NGE_F64 : VOPC_Real_si <0x39>;
-defm V_CMPX_NLG_F64 : VOPC_Real_si <0x3a>;
-defm V_CMPX_NGT_F64 : VOPC_Real_si <0x3b>;
-defm V_CMPX_NLE_F64 : VOPC_Real_si <0x3c>;
-defm V_CMPX_NEQ_F64 : VOPC_Real_si <0x3d>;
-defm V_CMPX_NLT_F64 : VOPC_Real_si <0x3e>;
-defm V_CMPX_TRU_F64 : VOPC_Real_si <0x3f>;
-
-defm V_CMPS_F_F32 : VOPC_Real_si <0x40>;
-defm V_CMPS_LT_F32 : VOPC_Real_si <0x41>;
-defm V_CMPS_EQ_F32 : VOPC_Real_si <0x42>;
-defm V_CMPS_LE_F32 : VOPC_Real_si <0x43>;
-defm V_CMPS_GT_F32 : VOPC_Real_si <0x44>;
-defm V_CMPS_LG_F32 : VOPC_Real_si <0x45>;
-defm V_CMPS_GE_F32 : VOPC_Real_si <0x46>;
-defm V_CMPS_O_F32 : VOPC_Real_si <0x47>;
-defm V_CMPS_U_F32 : VOPC_Real_si <0x48>;
-defm V_CMPS_NGE_F32 : VOPC_Real_si <0x49>;
-defm V_CMPS_NLG_F32 : VOPC_Real_si <0x4a>;
-defm V_CMPS_NGT_F32 : VOPC_Real_si <0x4b>;
-defm V_CMPS_NLE_F32 : VOPC_Real_si <0x4c>;
-defm V_CMPS_NEQ_F32 : VOPC_Real_si <0x4d>;
-defm V_CMPS_NLT_F32 : VOPC_Real_si <0x4e>;
-defm V_CMPS_TRU_F32 : VOPC_Real_si <0x4f>;
-
-defm V_CMPSX_F_F32 : VOPC_Real_si <0x50>;
-defm V_CMPSX_LT_F32 : VOPC_Real_si <0x51>;
-defm V_CMPSX_EQ_F32 : VOPC_Real_si <0x52>;
-defm V_CMPSX_LE_F32 : VOPC_Real_si <0x53>;
-defm V_CMPSX_GT_F32 : VOPC_Real_si <0x54>;
-defm V_CMPSX_LG_F32 : VOPC_Real_si <0x55>;
-defm V_CMPSX_GE_F32 : VOPC_Real_si <0x56>;
-defm V_CMPSX_O_F32 : VOPC_Real_si <0x57>;
-defm V_CMPSX_U_F32 : VOPC_Real_si <0x58>;
-defm V_CMPSX_NGE_F32 : VOPC_Real_si <0x59>;
-defm V_CMPSX_NLG_F32 : VOPC_Real_si <0x5a>;
-defm V_CMPSX_NGT_F32 : VOPC_Real_si <0x5b>;
-defm V_CMPSX_NLE_F32 : VOPC_Real_si <0x5c>;
-defm V_CMPSX_NEQ_F32 : VOPC_Real_si <0x5d>;
-defm V_CMPSX_NLT_F32 : VOPC_Real_si <0x5e>;
-defm V_CMPSX_TRU_F32 : VOPC_Real_si <0x5f>;
-
-defm V_CMPS_F_F64 : VOPC_Real_si <0x60>;
-defm V_CMPS_LT_F64 : VOPC_Real_si <0x61>;
-defm V_CMPS_EQ_F64 : VOPC_Real_si <0x62>;
-defm V_CMPS_LE_F64 : VOPC_Real_si <0x63>;
-defm V_CMPS_GT_F64 : VOPC_Real_si <0x64>;
-defm V_CMPS_LG_F64 : VOPC_Real_si <0x65>;
-defm V_CMPS_GE_F64 : VOPC_Real_si <0x66>;
-defm V_CMPS_O_F64 : VOPC_Real_si <0x67>;
-defm V_CMPS_U_F64 : VOPC_Real_si <0x68>;
-defm V_CMPS_NGE_F64 : VOPC_Real_si <0x69>;
-defm V_CMPS_NLG_F64 : VOPC_Real_si <0x6a>;
-defm V_CMPS_NGT_F64 : VOPC_Real_si <0x6b>;
-defm V_CMPS_NLE_F64 : VOPC_Real_si <0x6c>;
-defm V_CMPS_NEQ_F64 : VOPC_Real_si <0x6d>;
-defm V_CMPS_NLT_F64 : VOPC_Real_si <0x6e>;
-defm V_CMPS_TRU_F64 : VOPC_Real_si <0x6f>;
-
-defm V_CMPSX_F_F64 : VOPC_Real_si <0x70>;
-defm V_CMPSX_LT_F64 : VOPC_Real_si <0x71>;
-defm V_CMPSX_EQ_F64 : VOPC_Real_si <0x72>;
-defm V_CMPSX_LE_F64 : VOPC_Real_si <0x73>;
-defm V_CMPSX_GT_F64 : VOPC_Real_si <0x74>;
-defm V_CMPSX_LG_F64 : VOPC_Real_si <0x75>;
-defm V_CMPSX_GE_F64 : VOPC_Real_si <0x76>;
-defm V_CMPSX_O_F64 : VOPC_Real_si <0x77>;
-defm V_CMPSX_U_F64 : VOPC_Real_si <0x78>;
-defm V_CMPSX_NGE_F64 : VOPC_Real_si <0x79>;
-defm V_CMPSX_NLG_F64 : VOPC_Real_si <0x7a>;
-defm V_CMPSX_NGT_F64 : VOPC_Real_si <0x7b>;
-defm V_CMPSX_NLE_F64 : VOPC_Real_si <0x7c>;
-defm V_CMPSX_NEQ_F64 : VOPC_Real_si <0x7d>;
-defm V_CMPSX_NLT_F64 : VOPC_Real_si <0x7e>;
-defm V_CMPSX_TRU_F64 : VOPC_Real_si <0x7f>;
-
-defm V_CMP_F_I32 : VOPC_Real_si <0x80>;
-defm V_CMP_LT_I32 : VOPC_Real_si <0x81>;
-defm V_CMP_EQ_I32 : VOPC_Real_si <0x82>;
-defm V_CMP_LE_I32 : VOPC_Real_si <0x83>;
-defm V_CMP_GT_I32 : VOPC_Real_si <0x84>;
-defm V_CMP_NE_I32 : VOPC_Real_si <0x85>;
-defm V_CMP_GE_I32 : VOPC_Real_si <0x86>;
-defm V_CMP_T_I32 : VOPC_Real_si <0x87>;
-
-defm V_CMPX_F_I32 : VOPC_Real_si <0x90>;
-defm V_CMPX_LT_I32 : VOPC_Real_si <0x91>;
-defm V_CMPX_EQ_I32 : VOPC_Real_si <0x92>;
-defm V_CMPX_LE_I32 : VOPC_Real_si <0x93>;
-defm V_CMPX_GT_I32 : VOPC_Real_si <0x94>;
-defm V_CMPX_NE_I32 : VOPC_Real_si <0x95>;
-defm V_CMPX_GE_I32 : VOPC_Real_si <0x96>;
-defm V_CMPX_T_I32 : VOPC_Real_si <0x97>;
-
-defm V_CMP_F_I64 : VOPC_Real_si <0xa0>;
-defm V_CMP_LT_I64 : VOPC_Real_si <0xa1>;
-defm V_CMP_EQ_I64 : VOPC_Real_si <0xa2>;
-defm V_CMP_LE_I64 : VOPC_Real_si <0xa3>;
-defm V_CMP_GT_I64 : VOPC_Real_si <0xa4>;
-defm V_CMP_NE_I64 : VOPC_Real_si <0xa5>;
-defm V_CMP_GE_I64 : VOPC_Real_si <0xa6>;
-defm V_CMP_T_I64 : VOPC_Real_si <0xa7>;
-
-defm V_CMPX_F_I64 : VOPC_Real_si <0xb0>;
-defm V_CMPX_LT_I64 : VOPC_Real_si <0xb1>;
-defm V_CMPX_EQ_I64 : VOPC_Real_si <0xb2>;
-defm V_CMPX_LE_I64 : VOPC_Real_si <0xb3>;
-defm V_CMPX_GT_I64 : VOPC_Real_si <0xb4>;
-defm V_CMPX_NE_I64 : VOPC_Real_si <0xb5>;
-defm V_CMPX_GE_I64 : VOPC_Real_si <0xb6>;
-defm V_CMPX_T_I64 : VOPC_Real_si <0xb7>;
-
-defm V_CMP_F_U32 : VOPC_Real_si <0xc0>;
-defm V_CMP_LT_U32 : VOPC_Real_si <0xc1>;
-defm V_CMP_EQ_U32 : VOPC_Real_si <0xc2>;
-defm V_CMP_LE_U32 : VOPC_Real_si <0xc3>;
-defm V_CMP_GT_U32 : VOPC_Real_si <0xc4>;
-defm V_CMP_NE_U32 : VOPC_Real_si <0xc5>;
-defm V_CMP_GE_U32 : VOPC_Real_si <0xc6>;
-defm V_CMP_T_U32 : VOPC_Real_si <0xc7>;
-
-defm V_CMPX_F_U32 : VOPC_Real_si <0xd0>;
-defm V_CMPX_LT_U32 : VOPC_Real_si <0xd1>;
-defm V_CMPX_EQ_U32 : VOPC_Real_si <0xd2>;
-defm V_CMPX_LE_U32 : VOPC_Real_si <0xd3>;
-defm V_CMPX_GT_U32 : VOPC_Real_si <0xd4>;
-defm V_CMPX_NE_U32 : VOPC_Real_si <0xd5>;
-defm V_CMPX_GE_U32 : VOPC_Real_si <0xd6>;
-defm V_CMPX_T_U32 : VOPC_Real_si <0xd7>;
-
-defm V_CMP_F_U64 : VOPC_Real_si <0xe0>;
-defm V_CMP_LT_U64 : VOPC_Real_si <0xe1>;
-defm V_CMP_EQ_U64 : VOPC_Real_si <0xe2>;
-defm V_CMP_LE_U64 : VOPC_Real_si <0xe3>;
-defm V_CMP_GT_U64 : VOPC_Real_si <0xe4>;
-defm V_CMP_NE_U64 : VOPC_Real_si <0xe5>;
-defm V_CMP_GE_U64 : VOPC_Real_si <0xe6>;
-defm V_CMP_T_U64 : VOPC_Real_si <0xe7>;
-
-defm V_CMPX_F_U64 : VOPC_Real_si <0xf0>;
-defm V_CMPX_LT_U64 : VOPC_Real_si <0xf1>;
-defm V_CMPX_EQ_U64 : VOPC_Real_si <0xf2>;
-defm V_CMPX_LE_U64 : VOPC_Real_si <0xf3>;
-defm V_CMPX_GT_U64 : VOPC_Real_si <0xf4>;
-defm V_CMPX_NE_U64 : VOPC_Real_si <0xf5>;
-defm V_CMPX_GE_U64 : VOPC_Real_si <0xf6>;
-defm V_CMPX_T_U64 : VOPC_Real_si <0xf7>;
-
-defm V_CMP_CLASS_F32 : VOPC_Real_si <0x88>;
-defm V_CMPX_CLASS_F32 : VOPC_Real_si <0x98>;
-defm V_CMP_CLASS_F64 : VOPC_Real_si <0xa8>;
-defm V_CMPX_CLASS_F64 : VOPC_Real_si <0xb8>;
+//===----------------------------------------------------------------------===//
+// GFX6, GFX7, GFX10.
+//===----------------------------------------------------------------------===//
+
+let AssemblerPredicate = isGFX6GFX7 in {
+ multiclass VOPC_Real_gfx6_gfx7<bits<9> op> {
+ let DecoderNamespace = "GFX6GFX7" in {
+ def _e32_gfx6_gfx7 :
+ VOPC_Real<!cast<VOPC_Pseudo>(NAME#"_e32"), SIEncodingFamily.SI>,
+ VOPCe<op{7-0}>;
+ def _e64_gfx6_gfx7 :
+ VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>,
+ VOP3a_gfx6_gfx7<op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> {
+ // Encoding used for VOPC instructions encoded as VOP3 differs from
+ // VOP3e by destination name (sdst) as VOPC doesn't have vector dst.
+ bits<8> sdst;
+ let Inst{7-0} = sdst;
+ }
+ } // End DecoderNamespace = "GFX6GFX7"
+
+ defm : VOPCInstAliases<NAME, "gfx6_gfx7">;
+ }
+} // End AssemblerPredicate = isGFX6GFX7
+
+multiclass VOPC_Real_gfx6_gfx7_gfx10<bits<9> op> :
+ VOPC_Real_gfx6_gfx7<op>, VOPC_Real_gfx10<op>;
+
+multiclass VOPCX_Real_gfx6_gfx7<bits<9> op> :
+ VOPC_Real_gfx6_gfx7<op>;
+
+multiclass VOPCX_Real_gfx6_gfx7_gfx10 <bits<9> op> :
+ VOPC_Real_gfx6_gfx7<op>, VOPCX_Real_gfx10<op>;
+
+defm V_CMP_F_F32 : VOPC_Real_gfx6_gfx7_gfx10<0x000>;
+defm V_CMP_LT_F32 : VOPC_Real_gfx6_gfx7_gfx10<0x001>;
+defm V_CMP_EQ_F32 : VOPC_Real_gfx6_gfx7_gfx10<0x002>;
+defm V_CMP_LE_F32 : VOPC_Real_gfx6_gfx7_gfx10<0x003>;
+defm V_CMP_GT_F32 : VOPC_Real_gfx6_gfx7_gfx10<0x004>;
+defm V_CMP_LG_F32 : VOPC_Real_gfx6_gfx7_gfx10<0x005>;
+defm V_CMP_GE_F32 : VOPC_Real_gfx6_gfx7_gfx10<0x006>;
+defm V_CMP_O_F32 : VOPC_Real_gfx6_gfx7_gfx10<0x007>;
+defm V_CMP_U_F32 : VOPC_Real_gfx6_gfx7_gfx10<0x008>;
+defm V_CMP_NGE_F32 : VOPC_Real_gfx6_gfx7_gfx10<0x009>;
+defm V_CMP_NLG_F32 : VOPC_Real_gfx6_gfx7_gfx10<0x00a>;
+defm V_CMP_NGT_F32 : VOPC_Real_gfx6_gfx7_gfx10<0x00b>;
+defm V_CMP_NLE_F32 : VOPC_Real_gfx6_gfx7_gfx10<0x00c>;
+defm V_CMP_NEQ_F32 : VOPC_Real_gfx6_gfx7_gfx10<0x00d>;
+defm V_CMP_NLT_F32 : VOPC_Real_gfx6_gfx7_gfx10<0x00e>;
+defm V_CMP_TRU_F32 : VOPC_Real_gfx6_gfx7_gfx10<0x00f>;
+defm V_CMPX_F_F32 : VOPCX_Real_gfx6_gfx7_gfx10<0x010>;
+defm V_CMPX_LT_F32 : VOPCX_Real_gfx6_gfx7_gfx10<0x011>;
+defm V_CMPX_EQ_F32 : VOPCX_Real_gfx6_gfx7_gfx10<0x012>;
+defm V_CMPX_LE_F32 : VOPCX_Real_gfx6_gfx7_gfx10<0x013>;
+defm V_CMPX_GT_F32 : VOPCX_Real_gfx6_gfx7_gfx10<0x014>;
+defm V_CMPX_LG_F32 : VOPCX_Real_gfx6_gfx7_gfx10<0x015>;
+defm V_CMPX_GE_F32 : VOPCX_Real_gfx6_gfx7_gfx10<0x016>;
+defm V_CMPX_O_F32 : VOPCX_Real_gfx6_gfx7_gfx10<0x017>;
+defm V_CMPX_U_F32 : VOPCX_Real_gfx6_gfx7_gfx10<0x018>;
+defm V_CMPX_NGE_F32 : VOPCX_Real_gfx6_gfx7_gfx10<0x019>;
+defm V_CMPX_NLG_F32 : VOPCX_Real_gfx6_gfx7_gfx10<0x01a>;
+defm V_CMPX_NGT_F32 : VOPCX_Real_gfx6_gfx7_gfx10<0x01b>;
+defm V_CMPX_NLE_F32 : VOPCX_Real_gfx6_gfx7_gfx10<0x01c>;
+defm V_CMPX_NEQ_F32 : VOPCX_Real_gfx6_gfx7_gfx10<0x01d>;
+defm V_CMPX_NLT_F32 : VOPCX_Real_gfx6_gfx7_gfx10<0x01e>;
+defm V_CMPX_TRU_F32 : VOPCX_Real_gfx6_gfx7_gfx10<0x01f>;
+defm V_CMP_F_F64 : VOPC_Real_gfx6_gfx7_gfx10<0x020>;
+defm V_CMP_LT_F64 : VOPC_Real_gfx6_gfx7_gfx10<0x021>;
+defm V_CMP_EQ_F64 : VOPC_Real_gfx6_gfx7_gfx10<0x022>;
+defm V_CMP_LE_F64 : VOPC_Real_gfx6_gfx7_gfx10<0x023>;
+defm V_CMP_GT_F64 : VOPC_Real_gfx6_gfx7_gfx10<0x024>;
+defm V_CMP_LG_F64 : VOPC_Real_gfx6_gfx7_gfx10<0x025>;
+defm V_CMP_GE_F64 : VOPC_Real_gfx6_gfx7_gfx10<0x026>;
+defm V_CMP_O_F64 : VOPC_Real_gfx6_gfx7_gfx10<0x027>;
+defm V_CMP_U_F64 : VOPC_Real_gfx6_gfx7_gfx10<0x028>;
+defm V_CMP_NGE_F64 : VOPC_Real_gfx6_gfx7_gfx10<0x029>;
+defm V_CMP_NLG_F64 : VOPC_Real_gfx6_gfx7_gfx10<0x02a>;
+defm V_CMP_NGT_F64 : VOPC_Real_gfx6_gfx7_gfx10<0x02b>;
+defm V_CMP_NLE_F64 : VOPC_Real_gfx6_gfx7_gfx10<0x02c>;
+defm V_CMP_NEQ_F64 : VOPC_Real_gfx6_gfx7_gfx10<0x02d>;
+defm V_CMP_NLT_F64 : VOPC_Real_gfx6_gfx7_gfx10<0x02e>;
+defm V_CMP_TRU_F64 : VOPC_Real_gfx6_gfx7_gfx10<0x02f>;
+defm V_CMPX_F_F64 : VOPCX_Real_gfx6_gfx7_gfx10<0x030>;
+defm V_CMPX_LT_F64 : VOPCX_Real_gfx6_gfx7_gfx10<0x031>;
+defm V_CMPX_EQ_F64 : VOPCX_Real_gfx6_gfx7_gfx10<0x032>;
+defm V_CMPX_LE_F64 : VOPCX_Real_gfx6_gfx7_gfx10<0x033>;
+defm V_CMPX_GT_F64 : VOPCX_Real_gfx6_gfx7_gfx10<0x034>;
+defm V_CMPX_LG_F64 : VOPCX_Real_gfx6_gfx7_gfx10<0x035>;
+defm V_CMPX_GE_F64 : VOPCX_Real_gfx6_gfx7_gfx10<0x036>;
+defm V_CMPX_O_F64 : VOPCX_Real_gfx6_gfx7_gfx10<0x037>;
+defm V_CMPX_U_F64 : VOPCX_Real_gfx6_gfx7_gfx10<0x038>;
+defm V_CMPX_NGE_F64 : VOPCX_Real_gfx6_gfx7_gfx10<0x039>;
+defm V_CMPX_NLG_F64 : VOPCX_Real_gfx6_gfx7_gfx10<0x03a>;
+defm V_CMPX_NGT_F64 : VOPCX_Real_gfx6_gfx7_gfx10<0x03b>;
+defm V_CMPX_NLE_F64 : VOPCX_Real_gfx6_gfx7_gfx10<0x03c>;
+defm V_CMPX_NEQ_F64 : VOPCX_Real_gfx6_gfx7_gfx10<0x03d>;
+defm V_CMPX_NLT_F64 : VOPCX_Real_gfx6_gfx7_gfx10<0x03e>;
+defm V_CMPX_TRU_F64 : VOPCX_Real_gfx6_gfx7_gfx10<0x03f>;
+defm V_CMPS_F_F32 : VOPC_Real_gfx6_gfx7<0x040>;
+defm V_CMPS_LT_F32 : VOPC_Real_gfx6_gfx7<0x041>;
+defm V_CMPS_EQ_F32 : VOPC_Real_gfx6_gfx7<0x042>;
+defm V_CMPS_LE_F32 : VOPC_Real_gfx6_gfx7<0x043>;
+defm V_CMPS_GT_F32 : VOPC_Real_gfx6_gfx7<0x044>;
+defm V_CMPS_LG_F32 : VOPC_Real_gfx6_gfx7<0x045>;
+defm V_CMPS_GE_F32 : VOPC_Real_gfx6_gfx7<0x046>;
+defm V_CMPS_O_F32 : VOPC_Real_gfx6_gfx7<0x047>;
+defm V_CMPS_U_F32 : VOPC_Real_gfx6_gfx7<0x048>;
+defm V_CMPS_NGE_F32 : VOPC_Real_gfx6_gfx7<0x049>;
+defm V_CMPS_NLG_F32 : VOPC_Real_gfx6_gfx7<0x04a>;
+defm V_CMPS_NGT_F32 : VOPC_Real_gfx6_gfx7<0x04b>;
+defm V_CMPS_NLE_F32 : VOPC_Real_gfx6_gfx7<0x04c>;
+defm V_CMPS_NEQ_F32 : VOPC_Real_gfx6_gfx7<0x04d>;
+defm V_CMPS_NLT_F32 : VOPC_Real_gfx6_gfx7<0x04e>;
+defm V_CMPS_TRU_F32 : VOPC_Real_gfx6_gfx7<0x04f>;
+defm V_CMPSX_F_F32 : VOPCX_Real_gfx6_gfx7<0x050>;
+defm V_CMPSX_LT_F32 : VOPCX_Real_gfx6_gfx7<0x051>;
+defm V_CMPSX_EQ_F32 : VOPCX_Real_gfx6_gfx7<0x052>;
+defm V_CMPSX_LE_F32 : VOPCX_Real_gfx6_gfx7<0x053>;
+defm V_CMPSX_GT_F32 : VOPCX_Real_gfx6_gfx7<0x054>;
+defm V_CMPSX_LG_F32 : VOPCX_Real_gfx6_gfx7<0x055>;
+defm V_CMPSX_GE_F32 : VOPCX_Real_gfx6_gfx7<0x056>;
+defm V_CMPSX_O_F32 : VOPCX_Real_gfx6_gfx7<0x057>;
+defm V_CMPSX_U_F32 : VOPCX_Real_gfx6_gfx7<0x058>;
+defm V_CMPSX_NGE_F32 : VOPCX_Real_gfx6_gfx7<0x059>;
+defm V_CMPSX_NLG_F32 : VOPCX_Real_gfx6_gfx7<0x05a>;
+defm V_CMPSX_NGT_F32 : VOPCX_Real_gfx6_gfx7<0x05b>;
+defm V_CMPSX_NLE_F32 : VOPCX_Real_gfx6_gfx7<0x05c>;
+defm V_CMPSX_NEQ_F32 : VOPCX_Real_gfx6_gfx7<0x05d>;
+defm V_CMPSX_NLT_F32 : VOPCX_Real_gfx6_gfx7<0x05e>;
+defm V_CMPSX_TRU_F32 : VOPCX_Real_gfx6_gfx7<0x05f>;
+defm V_CMPS_F_F64 : VOPC_Real_gfx6_gfx7<0x060>;
+defm V_CMPS_LT_F64 : VOPC_Real_gfx6_gfx7<0x061>;
+defm V_CMPS_EQ_F64 : VOPC_Real_gfx6_gfx7<0x062>;
+defm V_CMPS_LE_F64 : VOPC_Real_gfx6_gfx7<0x063>;
+defm V_CMPS_GT_F64 : VOPC_Real_gfx6_gfx7<0x064>;
+defm V_CMPS_LG_F64 : VOPC_Real_gfx6_gfx7<0x065>;
+defm V_CMPS_GE_F64 : VOPC_Real_gfx6_gfx7<0x066>;
+defm V_CMPS_O_F64 : VOPC_Real_gfx6_gfx7<0x067>;
+defm V_CMPS_U_F64 : VOPC_Real_gfx6_gfx7<0x068>;
+defm V_CMPS_NGE_F64 : VOPC_Real_gfx6_gfx7<0x069>;
+defm V_CMPS_NLG_F64 : VOPC_Real_gfx6_gfx7<0x06a>;
+defm V_CMPS_NGT_F64 : VOPC_Real_gfx6_gfx7<0x06b>;
+defm V_CMPS_NLE_F64 : VOPC_Real_gfx6_gfx7<0x06c>;
+defm V_CMPS_NEQ_F64 : VOPC_Real_gfx6_gfx7<0x06d>;
+defm V_CMPS_NLT_F64 : VOPC_Real_gfx6_gfx7<0x06e>;
+defm V_CMPS_TRU_F64 : VOPC_Real_gfx6_gfx7<0x06f>;
+defm V_CMPSX_F_F64 : VOPCX_Real_gfx6_gfx7<0x070>;
+defm V_CMPSX_LT_F64 : VOPCX_Real_gfx6_gfx7<0x071>;
+defm V_CMPSX_EQ_F64 : VOPCX_Real_gfx6_gfx7<0x072>;
+defm V_CMPSX_LE_F64 : VOPCX_Real_gfx6_gfx7<0x073>;
+defm V_CMPSX_GT_F64 : VOPCX_Real_gfx6_gfx7<0x074>;
+defm V_CMPSX_LG_F64 : VOPCX_Real_gfx6_gfx7<0x075>;
+defm V_CMPSX_GE_F64 : VOPCX_Real_gfx6_gfx7<0x076>;
+defm V_CMPSX_O_F64 : VOPCX_Real_gfx6_gfx7<0x077>;
+defm V_CMPSX_U_F64 : VOPCX_Real_gfx6_gfx7<0x078>;
+defm V_CMPSX_NGE_F64 : VOPCX_Real_gfx6_gfx7<0x079>;
+defm V_CMPSX_NLG_F64 : VOPCX_Real_gfx6_gfx7<0x07a>;
+defm V_CMPSX_NGT_F64 : VOPCX_Real_gfx6_gfx7<0x07b>;
+defm V_CMPSX_NLE_F64 : VOPCX_Real_gfx6_gfx7<0x07c>;
+defm V_CMPSX_NEQ_F64 : VOPCX_Real_gfx6_gfx7<0x07d>;
+defm V_CMPSX_NLT_F64 : VOPCX_Real_gfx6_gfx7<0x07e>;
+defm V_CMPSX_TRU_F64 : VOPCX_Real_gfx6_gfx7<0x07f>;
+defm V_CMP_F_I32 : VOPC_Real_gfx6_gfx7_gfx10<0x080>;
+defm V_CMP_LT_I32 : VOPC_Real_gfx6_gfx7_gfx10<0x081>;
+defm V_CMP_EQ_I32 : VOPC_Real_gfx6_gfx7_gfx10<0x082>;
+defm V_CMP_LE_I32 : VOPC_Real_gfx6_gfx7_gfx10<0x083>;
+defm V_CMP_GT_I32 : VOPC_Real_gfx6_gfx7_gfx10<0x084>;
+defm V_CMP_NE_I32 : VOPC_Real_gfx6_gfx7_gfx10<0x085>;
+defm V_CMP_GE_I32 : VOPC_Real_gfx6_gfx7_gfx10<0x086>;
+defm V_CMP_T_I32 : VOPC_Real_gfx6_gfx7_gfx10<0x087>;
+defm V_CMP_CLASS_F32 : VOPC_Real_gfx6_gfx7_gfx10<0x088>;
+defm V_CMPX_F_I32 : VOPCX_Real_gfx6_gfx7_gfx10<0x090>;
+defm V_CMPX_LT_I32 : VOPCX_Real_gfx6_gfx7_gfx10<0x091>;
+defm V_CMPX_EQ_I32 : VOPCX_Real_gfx6_gfx7_gfx10<0x092>;
+defm V_CMPX_LE_I32 : VOPCX_Real_gfx6_gfx7_gfx10<0x093>;
+defm V_CMPX_GT_I32 : VOPCX_Real_gfx6_gfx7_gfx10<0x094>;
+defm V_CMPX_NE_I32 : VOPCX_Real_gfx6_gfx7_gfx10<0x095>;
+defm V_CMPX_GE_I32 : VOPCX_Real_gfx6_gfx7_gfx10<0x096>;
+defm V_CMPX_T_I32 : VOPCX_Real_gfx6_gfx7_gfx10<0x097>;
+defm V_CMPX_CLASS_F32 : VOPCX_Real_gfx6_gfx7_gfx10<0x098>;
+defm V_CMP_F_I64 : VOPC_Real_gfx6_gfx7_gfx10<0x0a0>;
+defm V_CMP_LT_I64 : VOPC_Real_gfx6_gfx7_gfx10<0x0a1>;
+defm V_CMP_EQ_I64 : VOPC_Real_gfx6_gfx7_gfx10<0x0a2>;
+defm V_CMP_LE_I64 : VOPC_Real_gfx6_gfx7_gfx10<0x0a3>;
+defm V_CMP_GT_I64 : VOPC_Real_gfx6_gfx7_gfx10<0x0a4>;
+defm V_CMP_NE_I64 : VOPC_Real_gfx6_gfx7_gfx10<0x0a5>;
+defm V_CMP_GE_I64 : VOPC_Real_gfx6_gfx7_gfx10<0x0a6>;
+defm V_CMP_T_I64 : VOPC_Real_gfx6_gfx7_gfx10<0x0a7>;
+defm V_CMP_CLASS_F64 : VOPC_Real_gfx6_gfx7_gfx10<0x0a8>;
+defm V_CMPX_F_I64 : VOPCX_Real_gfx6_gfx7_gfx10<0x0b0>;
+defm V_CMPX_LT_I64 : VOPCX_Real_gfx6_gfx7_gfx10<0x0b1>;
+defm V_CMPX_EQ_I64 : VOPCX_Real_gfx6_gfx7_gfx10<0x0b2>;
+defm V_CMPX_LE_I64 : VOPCX_Real_gfx6_gfx7_gfx10<0x0b3>;
+defm V_CMPX_GT_I64 : VOPCX_Real_gfx6_gfx7_gfx10<0x0b4>;
+defm V_CMPX_NE_I64 : VOPCX_Real_gfx6_gfx7_gfx10<0x0b5>;
+defm V_CMPX_GE_I64 : VOPCX_Real_gfx6_gfx7_gfx10<0x0b6>;
+defm V_CMPX_T_I64 : VOPCX_Real_gfx6_gfx7_gfx10<0x0b7>;
+defm V_CMPX_CLASS_F64 : VOPCX_Real_gfx6_gfx7_gfx10<0x0b8>;
+defm V_CMP_F_U32 : VOPC_Real_gfx6_gfx7_gfx10<0x0c0>;
+defm V_CMP_LT_U32 : VOPC_Real_gfx6_gfx7_gfx10<0x0c1>;
+defm V_CMP_EQ_U32 : VOPC_Real_gfx6_gfx7_gfx10<0x0c2>;
+defm V_CMP_LE_U32 : VOPC_Real_gfx6_gfx7_gfx10<0x0c3>;
+defm V_CMP_GT_U32 : VOPC_Real_gfx6_gfx7_gfx10<0x0c4>;
+defm V_CMP_NE_U32 : VOPC_Real_gfx6_gfx7_gfx10<0x0c5>;
+defm V_CMP_GE_U32 : VOPC_Real_gfx6_gfx7_gfx10<0x0c6>;
+defm V_CMP_T_U32 : VOPC_Real_gfx6_gfx7_gfx10<0x0c7>;
+defm V_CMPX_F_U32 : VOPCX_Real_gfx6_gfx7_gfx10<0x0d0>;
+defm V_CMPX_LT_U32 : VOPCX_Real_gfx6_gfx7_gfx10<0x0d1>;
+defm V_CMPX_EQ_U32 : VOPCX_Real_gfx6_gfx7_gfx10<0x0d2>;
+defm V_CMPX_LE_U32 : VOPCX_Real_gfx6_gfx7_gfx10<0x0d3>;
+defm V_CMPX_GT_U32 : VOPCX_Real_gfx6_gfx7_gfx10<0x0d4>;
+defm V_CMPX_NE_U32 : VOPCX_Real_gfx6_gfx7_gfx10<0x0d5>;
+defm V_CMPX_GE_U32 : VOPCX_Real_gfx6_gfx7_gfx10<0x0d6>;
+defm V_CMPX_T_U32 : VOPCX_Real_gfx6_gfx7_gfx10<0x0d7>;
+defm V_CMP_F_U64 : VOPC_Real_gfx6_gfx7_gfx10<0x0e0>;
+defm V_CMP_LT_U64 : VOPC_Real_gfx6_gfx7_gfx10<0x0e1>;
+defm V_CMP_EQ_U64 : VOPC_Real_gfx6_gfx7_gfx10<0x0e2>;
+defm V_CMP_LE_U64 : VOPC_Real_gfx6_gfx7_gfx10<0x0e3>;
+defm V_CMP_GT_U64 : VOPC_Real_gfx6_gfx7_gfx10<0x0e4>;
+defm V_CMP_NE_U64 : VOPC_Real_gfx6_gfx7_gfx10<0x0e5>;
+defm V_CMP_GE_U64 : VOPC_Real_gfx6_gfx7_gfx10<0x0e6>;
+defm V_CMP_T_U64 : VOPC_Real_gfx6_gfx7_gfx10<0x0e7>;
+defm V_CMPX_F_U64 : VOPCX_Real_gfx6_gfx7_gfx10<0x0f0>;
+defm V_CMPX_LT_U64 : VOPCX_Real_gfx6_gfx7_gfx10<0x0f1>;
+defm V_CMPX_EQ_U64 : VOPCX_Real_gfx6_gfx7_gfx10<0x0f2>;
+defm V_CMPX_LE_U64 : VOPCX_Real_gfx6_gfx7_gfx10<0x0f3>;
+defm V_CMPX_GT_U64 : VOPCX_Real_gfx6_gfx7_gfx10<0x0f4>;
+defm V_CMPX_NE_U64 : VOPCX_Real_gfx6_gfx7_gfx10<0x0f5>;
+defm V_CMPX_GE_U64 : VOPCX_Real_gfx6_gfx7_gfx10<0x0f6>;
+defm V_CMPX_T_U64 : VOPCX_Real_gfx6_gfx7_gfx10<0x0f7>;
//===----------------------------------------------------------------------===//
-// VI
+// GFX8, GFX9 (VI).
//===----------------------------------------------------------------------===//
multiclass VOPC_Real_vi <bits<10> op> {
- let AssemblerPredicates = [isVI], DecoderNamespace = "VI" in {
+ let AssemblerPredicates = [isGFX8GFX9], DecoderNamespace = "GFX8" in {
def _e32_vi :
VOPC_Real<!cast<VOPC_Pseudo>(NAME#"_e32"), SIEncodingFamily.VI>,
VOPCe<op{7-0}>;
@@ -966,9 +1231,8 @@ multiclass VOPC_Real_vi <bits<10> op> {
VOP_SDWA9_Real <!cast<VOPC_SDWA_Pseudo>(NAME#"_sdwa")>,
VOPC_SDWA9e <op{7-0}, !cast<VOPC_SDWA_Pseudo>(NAME#"_sdwa").Pfl>;
- def : VOPCInstAlias <!cast<VOP3_Pseudo>(NAME#"_e64"),
- !cast<Instruction>(NAME#"_e32_vi")> {
- let AssemblerPredicate = isVI;
+ let AssemblerPredicate = isGFX8GFX9 in {
+ defm : VOPCInstAliases<NAME, "vi">;
}
}
diff --git a/lib/Target/AMDGPU/VOPInstructions.td b/lib/Target/AMDGPU/VOPInstructions.td
index 7de7d90d27b3..677095a354be 100644
--- a/lib/Target/AMDGPU/VOPInstructions.td
+++ b/lib/Target/AMDGPU/VOPInstructions.td
@@ -1,9 +1,8 @@
//===-- VOPInstructions.td - Vector Instruction Defintions ----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -91,6 +90,7 @@ class VOP3_Pseudo <string opName, VOPProfile P, list<dag> pattern = [],
let VOP3_OPSEL = isVop3OpSel;
let IsPacked = P.IsPacked;
+ let IsMAI = P.IsMAI;
let AsmOperands = !if(isVop3OpSel,
P.AsmVOP3OpSel,
@@ -100,7 +100,6 @@ class VOP3_Pseudo <string opName, VOPProfile P, list<dag> pattern = [],
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
- let SubtargetPredicate = isGCN;
// Because SGPRs may be allowed if there are multiple operands, we
// need a post-isel hook to insert copies in order to avoid
@@ -190,9 +189,15 @@ class VOP3a<VOPProfile P> : Enc64 {
let Inst{63} = !if(P.HasSrc2Mods, src2_modifiers{0}, 0);
}
-class VOP3a_si <bits<9> op, VOPProfile P> : VOP3a<P> {
+class VOP3a_gfx6_gfx7<bits<9> op, VOPProfile p> : VOP3a<p> {
+ let Inst{11} = !if(p.HasClamp, clamp{0}, 0);
let Inst{25-17} = op;
- let Inst{11} = !if(P.HasClamp, clamp{0}, 0);
+}
+
+class VOP3a_gfx10<bits<10> op, VOPProfile p> : VOP3a<p> {
+ let Inst{15} = !if(p.HasClamp, clamp{0}, 0);
+ let Inst{25-16} = op;
+ let Inst{31-26} = 0x35;
}
class VOP3a_vi <bits<10> op, VOPProfile P> : VOP3a<P> {
@@ -200,9 +205,14 @@ class VOP3a_vi <bits<10> op, VOPProfile P> : VOP3a<P> {
let Inst{15} = !if(P.HasClamp, clamp{0}, 0);
}
-class VOP3e_si <bits<9> op, VOPProfile P> : VOP3a_si <op, P> {
+class VOP3e_gfx6_gfx7<bits<9> op, VOPProfile p> : VOP3a_gfx6_gfx7<op, p> {
bits<8> vdst;
- let Inst{7-0} = !if(P.EmitDst, vdst{7-0}, 0);
+ let Inst{7-0} = !if(p.EmitDst, vdst{7-0}, 0);
+}
+
+class VOP3e_gfx10<bits<10> op, VOPProfile p> : VOP3a_gfx10<op, p> {
+ bits<8> vdst;
+ let Inst{7-0} = !if(p.EmitDst, vdst{7-0}, 0);
}
class VOP3e_vi <bits<10> op, VOPProfile P> : VOP3a_vi <op, P> {
@@ -217,6 +227,13 @@ class VOP3OpSel_gfx9 <bits<10> op, VOPProfile P> : VOP3e_vi <op, P> {
let Inst{14} = !if(P.HasDst, src0_modifiers{3}, 0);
}
+class VOP3OpSel_gfx10<bits<10> op, VOPProfile p> : VOP3e_gfx10<op, p> {
+ let Inst{11} = !if(p.HasSrc0, src0_modifiers{2}, 0);
+ let Inst{12} = !if(p.HasSrc1, src1_modifiers{2}, 0);
+ let Inst{13} = !if(p.HasSrc2, src2_modifiers{2}, 0);
+ let Inst{14} = !if(p.HasDst, src0_modifiers{3}, 0);
+}
+
// NB: For V_INTERP* opcodes, src0 is encoded as src1 and vice versa
class VOP3Interp_vi <bits<10> op, VOPProfile P> : VOP3e_vi <op, P> {
bits<2> attrchan;
@@ -236,6 +253,21 @@ class VOP3Interp_vi <bits<10> op, VOPProfile P> : VOP3e_vi <op, P> {
let Inst{49-41} = src0;
}
+class VOP3Interp_gfx10<bits<10> op, VOPProfile p> : VOP3e_gfx10<op, p> {
+ bits<6> attr;
+ bits<2> attrchan;
+ bits<1> high;
+
+ let Inst{8} = 0;
+ let Inst{9} = !if(p.HasSrc0Mods, src0_modifiers{1}, 0);
+ let Inst{37-32} = attr;
+ let Inst{39-38} = attrchan;
+ let Inst{40} = !if(p.HasHigh, high, 0);
+ let Inst{49-41} = src0;
+ let Inst{61} = 0;
+ let Inst{62} = !if(p.HasSrc0Mods, src0_modifiers{0}, 0);
+}
+
class VOP3be <VOPProfile P> : Enc64 {
bits<8> vdst;
bits<2> src0_modifiers;
@@ -295,10 +327,51 @@ class VOP3Pe <bits<10> op, VOPProfile P> : Enc64 {
let Inst{63} = !if(P.HasSrc2Mods, src2_modifiers{0}, 0); // neg (lo)
}
-class VOP3be_si <bits<9> op, VOPProfile P> : VOP3be<P> {
+class VOP3Pe_MAI <bits<10> op, VOPProfile P> : Enc64 {
+ bits<8> vdst;
+ bits<10> src0;
+ bits<10> src1;
+ bits<9> src2;
+ bits<3> blgp;
+ bits<3> cbsz;
+ bits<4> abid;
+ bits<1> clamp;
+
+ let Inst{7-0} = vdst;
+
+ let Inst{10-8} = !if(P.HasSrc1, cbsz, 0);
+ let Inst{14-11} = !if(P.HasSrc1, abid, 0);
+
+ let Inst{15} = !if(P.HasClamp, clamp{0}, 0);
+
+ let Inst{25-16} = op;
+ let Inst{31-26} = 0x34; //encoding
+ let Inst{40-32} = !if(P.HasSrc0, src0{8-0}, 0);
+ let Inst{49-41} = !if(P.HasSrc1, src1{8-0}, 0);
+ let Inst{58-50} = !if(P.HasSrc2, src2, 0);
+
+ let Inst{59} = !if(P.HasSrc0, src0{9}, 0); // acc(0)
+ let Inst{60} = !if(P.HasSrc1, src1{9}, 0); // acc(1)
+
+ let Inst{63-61} = !if(P.HasSrc1, blgp, 0);
+}
+
+
+class VOP3Pe_gfx10 <bits<10> op, VOPProfile P> : VOP3Pe<op, P> {
+ let Inst{31-26} = 0x33; //encoding
+}
+
+class VOP3be_gfx6_gfx7<bits<9> op, VOPProfile p> : VOP3be<p> {
let Inst{25-17} = op;
}
+class VOP3be_gfx10<bits<10> op, VOPProfile p> : VOP3be<p> {
+ bits<1> clamp;
+ let Inst{15} = !if(p.HasClamp, clamp{0}, 0);
+ let Inst{25-16} = op;
+ let Inst{31-26} = 0x35;
+}
+
class VOP3be_vi <bits<10> op, VOPProfile P> : VOP3be<P> {
bits<1> clamp;
let Inst{25-16} = op;
@@ -393,7 +466,7 @@ class VOP_SDWA9Ae<VOPProfile P> : VOP_SDWA9e<P> {
class VOP_SDWA9Be<VOPProfile P> : VOP_SDWA9e<P> {
bits<8> sdst; // {vcc_sdst{0}, sdst{6-0}}
- let Inst{46-40} = !if(P.EmitDst, sdst{6-0}, 0);
+ let Inst{46-40} = !if(P.EmitDst, sdst{6-0}, ?);
let Inst{47} = !if(P.EmitDst, sdst{7}, 0);
}
@@ -456,9 +529,8 @@ class VOP_SDWA_Real <VOP_SDWA_Pseudo ps> :
let TSFlags = ps.TSFlags;
}
-class VOP_SDWA9_Real <VOP_SDWA_Pseudo ps> :
- InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands9, []>,
- SIMCInstr <ps.PseudoInstr, SIEncodingFamily.SDWA9> {
+class Base_VOP_SDWA9_Real <VOP_SDWA_Pseudo ps> :
+ InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands9, []> {
let isPseudo = 0;
let isCodeGenOnly = 0;
@@ -485,7 +557,20 @@ class VOP_SDWA9_Real <VOP_SDWA_Pseudo ps> :
let TSFlags = ps.TSFlags;
}
-class VOP_DPPe<VOPProfile P> : Enc64 {
+class VOP_SDWA9_Real <VOP_SDWA_Pseudo ps> :
+ Base_VOP_SDWA9_Real <ps >,
+ SIMCInstr <ps.PseudoInstr, SIEncodingFamily.SDWA9>;
+
+class Base_VOP_SDWA10_Real<VOP_SDWA_Pseudo ps> : Base_VOP_SDWA9_Real<ps> {
+ let SubtargetPredicate = !if(ps.Pfl.HasExtSDWA9, HasSDWA10, DisableInst);
+ let AssemblerPredicate = !if(ps.Pfl.HasExtSDWA9, HasSDWA10, DisableInst);
+ let DecoderNamespace = "SDWA10";
+}
+
+class VOP_SDWA10_Real<VOP_SDWA_Pseudo ps> :
+ Base_VOP_SDWA10_Real<ps>, SIMCInstr<ps.PseudoInstr, SIEncodingFamily.SDWA10>;
+
+class VOP_DPPe<VOPProfile P, bit IsDPP16=0> : Enc64 {
bits<2> src0_modifiers;
bits<8> src0;
bits<2> src1_modifiers;
@@ -493,9 +578,11 @@ class VOP_DPPe<VOPProfile P> : Enc64 {
bits<1> bound_ctrl;
bits<4> bank_mask;
bits<4> row_mask;
+ bit fi;
let Inst{39-32} = !if(P.HasSrc0, src0{7-0}, 0);
let Inst{48-40} = dpp_ctrl;
+ let Inst{50} = !if(IsDPP16, fi, ?);
let Inst{51} = bound_ctrl;
let Inst{52} = !if(P.HasSrc0Mods, src0_modifiers{0}, 0); // src0_neg
let Inst{53} = !if(P.HasSrc0Mods, src0_modifiers{1}, 0); // src0_abs
@@ -533,8 +620,8 @@ class VOP_DPP_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
let AssemblerPredicate = !if(P.HasExtDPP, HasDPP, DisableInst);
let AsmVariantName = !if(P.HasExtDPP, AMDGPUAsmVariants.DPP,
AMDGPUAsmVariants.Disable);
- let Constraints = !if(P.NumSrcArgs, "$old = $vdst", "");
- let DisableEncoding = !if(P.NumSrcArgs, "$old", "");
+ let Constraints = !if(P.NumSrcArgs, P.TieRegDPP # " = $vdst", "");
+ let DisableEncoding = !if(P.NumSrcArgs, P.TieRegDPP, "");
let DecoderNamespace = "DPP";
VOPProfile Pfl = P;
@@ -568,6 +655,67 @@ class VOP_DPP_Real <VOP_DPP_Pseudo ps, int EncodingFamily> :
let TSFlags = ps.TSFlags;
}
+class VOP_DPP <string OpName, VOPProfile P, bit IsDPP16,
+ dag InsDPP = !if(IsDPP16, P.InsDPP16, P.InsDPP),
+ string AsmDPP = !if(IsDPP16, P.AsmDPP16, P.AsmDPP)> :
+ InstSI <P.OutsDPP, InsDPP, OpName#AsmDPP, []>,
+ VOP_DPPe<P, IsDPP16> {
+
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let UseNamedOperandTable = 1;
+
+ let VALU = 1;
+ let DPP = 1;
+ let Size = 8;
+
+ let AsmMatchConverter = !if(!eq(P.HasModifiers,1), "cvtDPP", "");
+ let SubtargetPredicate = HasDPP;
+ let AssemblerPredicate = !if(P.HasExtDPP, HasDPP, DisableInst);
+ let AsmVariantName = !if(P.HasExtDPP, AMDGPUAsmVariants.DPP,
+ AMDGPUAsmVariants.Disable);
+ let Constraints = !if(P.NumSrcArgs, P.TieRegDPP # " = $vdst", "");
+ let DisableEncoding = !if(P.NumSrcArgs, P.TieRegDPP, "");
+ let DecoderNamespace = "DPP";
+}
+
+class VOP_DPP8e<VOPProfile P> : Enc64 {
+ bits<8> src0;
+ bits<24> dpp8;
+ bits<9> fi;
+
+ let Inst{39-32} = !if(P.HasSrc0, src0{7-0}, 0);
+ let Inst{63-40} = dpp8{23-0};
+}
+
+class VOP_DPP8<string OpName, VOPProfile P> :
+ InstSI<P.OutsDPP8, P.InsDPP8, OpName#P.AsmDPP8, []>,
+ VOP_DPP8e<P> {
+
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let UseNamedOperandTable = 1;
+
+ let VALU = 1;
+ let DPP = 1;
+ let Size = 8;
+
+ let AsmMatchConverter = "cvtDPP8";
+ let SubtargetPredicate = HasDPP8;
+ let AssemblerPredicate = !if(P.HasExt, HasDPP8, DisableInst);
+ let AsmVariantName = !if(P.HasExt, AMDGPUAsmVariants.DPP,
+ AMDGPUAsmVariants.Disable);
+ let Constraints = !if(P.NumSrcArgs, P.TieRegDPP # " = $vdst", "");
+ let DisableEncoding = !if(P.NumSrcArgs, P.TieRegDPP, "");
+}
+
+def DPP8Mode {
+ int FI_0 = 0xE9;
+ int FI_1 = 0xEA;
+}
+
class getNumNodeArgs<SDPatternOperator Op> {
SDNode N = !cast<SDNode>(Op);
SDTypeProfile TP = N.TypeProfile;
diff --git a/lib/Target/ARC/ARC.h b/lib/Target/ARC/ARC.h
index 65f6ed67eb5b..cbbf0233706d 100644
--- a/lib/Target/ARC/ARC.h
+++ b/lib/Target/ARC/ARC.h
@@ -1,9 +1,8 @@
//===- ARC.h - Top-level interface for ARC representation -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -26,6 +25,7 @@ class ARCTargetMachine;
FunctionPass *createARCISelDag(ARCTargetMachine &TM,
CodeGenOpt::Level OptLevel);
FunctionPass *createARCExpandPseudosPass();
+FunctionPass *createARCOptAddrMode();
FunctionPass *createARCBranchFinalizePass();
} // end namespace llvm
diff --git a/lib/Target/ARC/ARC.td b/lib/Target/ARC/ARC.td
index 6635630c62a3..846f1bb6735e 100644
--- a/lib/Target/ARC/ARC.td
+++ b/lib/Target/ARC/ARC.td
@@ -1,9 +1,8 @@
//===- ARC.td - Describe the ARC Target Machine ------------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARC/ARCAsmPrinter.cpp b/lib/Target/ARC/ARCAsmPrinter.cpp
index 8c13da0484fd..5c3e2c9e773c 100644
--- a/lib/Target/ARC/ARCAsmPrinter.cpp
+++ b/lib/Target/ARC/ARCAsmPrinter.cpp
@@ -1,9 +1,8 @@
//===- ARCAsmPrinter.cpp - ARC LLVM assembly writer -------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -13,28 +12,18 @@
//===----------------------------------------------------------------------===//
#include "ARC.h"
-#include "ARCInstrInfo.h"
#include "ARCMCInstLower.h"
#include "ARCSubtarget.h"
#include "ARCTargetMachine.h"
-#include "ARCTargetStreamer.h"
-#include "InstPrinter/ARCInstPrinter.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
+#include "MCTargetDesc/ARCInstPrinter.h"
+#include "TargetInfo/ARCTargetInfo.h"
#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSymbolELF.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-#include <algorithm>
using namespace llvm;
@@ -44,7 +33,6 @@ namespace {
class ARCAsmPrinter : public AsmPrinter {
ARCMCInstLower MCInstLowering;
- ARCTargetStreamer &getTargetStreamer();
public:
explicit ARCAsmPrinter(TargetMachine &TM,
@@ -58,10 +46,6 @@ public:
} // end anonymous namespace
-ARCTargetStreamer &ARCAsmPrinter::getTargetStreamer() {
- return static_cast<ARCTargetStreamer &>(*OutStreamer->getTargetStreamer());
-}
-
void ARCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
SmallString<128> Str;
raw_svector_ostream O(Str);
diff --git a/lib/Target/ARC/ARCBranchFinalize.cpp b/lib/Target/ARC/ARCBranchFinalize.cpp
index 3b410fa383b7..633c081b3137 100644
--- a/lib/Target/ARC/ARCBranchFinalize.cpp
+++ b/lib/Target/ARC/ARCBranchFinalize.cpp
@@ -1,9 +1,8 @@
//===- ARCBranchFinalize.cpp - ARC conditional branches ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/ARC/ARCCallingConv.td b/lib/Target/ARC/ARCCallingConv.td
index b7d37bc2a41f..098e03e36bca 100644
--- a/lib/Target/ARC/ARCCallingConv.td
+++ b/lib/Target/ARC/ARCCallingConv.td
@@ -1,9 +1,8 @@
//===- ARCCallingConv.td - Calling Conventions for ARC -----*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// This describes the calling conventions for ARC architecture.
diff --git a/lib/Target/ARC/ARCExpandPseudos.cpp b/lib/Target/ARC/ARCExpandPseudos.cpp
index 3177735c0529..a1646d17605f 100644
--- a/lib/Target/ARC/ARCExpandPseudos.cpp
+++ b/lib/Target/ARC/ARCExpandPseudos.cpp
@@ -1,9 +1,8 @@
//===- ARCExpandPseudosPass - ARC expand pseudo loads -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/ARC/ARCFrameLowering.cpp b/lib/Target/ARC/ARCFrameLowering.cpp
index ca59cb2baaa7..d8946d97deff 100644
--- a/lib/Target/ARC/ARCFrameLowering.cpp
+++ b/lib/Target/ARC/ARCFrameLowering.cpp
@@ -1,9 +1,8 @@
//===- ARCFrameLowering.cpp - ARC Frame Information -------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -65,6 +64,8 @@ static void generateStackAdjustment(MachineBasicBlock &MBB,
assert((AbsAmount % 4 == 0) && "Stack adjustments must be 4-byte aligned.");
if (isUInt<6>(AbsAmount))
AdjOp = Positive ? ARC::ADD_rru6 : ARC::SUB_rru6;
+ else if (isInt<12>(AbsAmount))
+ AdjOp = Positive ? ARC::ADD_rrs12 : ARC::SUB_rrs12;
else
AdjOp = Positive ? ARC::ADD_rrlimm : ARC::SUB_rrlimm;
@@ -134,8 +135,12 @@ void ARCFrameLowering::emitPrologue(MachineFunction &MF,
// Add in the varargs area here first.
LLVM_DEBUG(dbgs() << "Varargs\n");
unsigned VarArgsBytes = MFI.getObjectSize(AFI->getVarArgsFrameIndex());
- BuildMI(MBB, MBBI, dl, TII->get(ARC::SUB_rru6))
- .addReg(ARC::SP)
+ unsigned Opc = ARC::SUB_rrlimm;
+ if (isUInt<6>(VarArgsBytes))
+ Opc = ARC::SUB_rru6;
+ else if (isInt<12>(VarArgsBytes))
+ Opc = ARC::SUB_rrs12;
+ BuildMI(MBB, MBBI, dl, TII->get(Opc), ARC::SP)
.addReg(ARC::SP)
.addImm(VarArgsBytes);
}
@@ -247,7 +252,10 @@ void ARCFrameLowering::emitEpilogue(MachineFunction &MF,
// Then, replace the frame pointer by (new) [sp,StackSize-4].
// Then, move the stack pointer the rest of the way (sp = sp + StackSize).
if (hasFP(MF)) {
- BuildMI(MBB, MBBI, DebugLoc(), TII->get(ARC::SUB_rru6), ARC::SP)
+ unsigned Opc = ARC::SUB_rrlimm;
+ if (isUInt<6>(StackSize))
+ Opc = ARC::SUB_rru6;
+ BuildMI(MBB, MBBI, DebugLoc(), TII->get(Opc), ARC::SP)
.addReg(ARC::FP)
.addImm(StackSize);
AmountAboveFunclet += 4;
@@ -271,19 +279,28 @@ void ARCFrameLowering::emitEpilogue(MachineFunction &MF,
}
// Move the stack pointer up to the point of the funclet.
- if (StackSize - AmountAboveFunclet) {
- BuildMI(MBB, MBBI, MBB.findDebugLoc(MBBI), TII->get(ARC::ADD_rru6))
- .addReg(ARC::SP)
+ if (unsigned MoveAmount = StackSize - AmountAboveFunclet) {
+ unsigned Opc = ARC::ADD_rrlimm;
+ if (isUInt<6>(MoveAmount))
+ Opc = ARC::ADD_rru6;
+ else if (isInt<12>(MoveAmount))
+ Opc = ARC::ADD_rrs12;
+ BuildMI(MBB, MBBI, MBB.findDebugLoc(MBBI), TII->get(Opc), ARC::SP)
.addReg(ARC::SP)
.addImm(StackSize - AmountAboveFunclet);
}
if (StackSlotsUsedByFunclet) {
+ // This part of the adjustment will always be < 64 bytes.
BuildMI(MBB, MBBI, MBB.findDebugLoc(MBBI), TII->get(ARC::BL))
.addExternalSymbol(load_funclet_name[Last - ARC::R15])
.addReg(ARC::BLINK, RegState::Implicit | RegState::Kill);
- BuildMI(MBB, MBBI, MBB.findDebugLoc(MBBI), TII->get(ARC::ADD_rru6))
- .addReg(ARC::SP)
+ unsigned Opc = ARC::ADD_rrlimm;
+ if (isUInt<6>(4 * StackSlotsUsedByFunclet))
+ Opc = ARC::ADD_rru6;
+ else if (isInt<12>(4 * StackSlotsUsedByFunclet))
+ Opc = ARC::ADD_rrs12;
+ BuildMI(MBB, MBBI, MBB.findDebugLoc(MBBI), TII->get(Opc), ARC::SP)
.addReg(ARC::SP)
.addImm(4 * (StackSlotsUsedByFunclet));
}
@@ -294,8 +311,8 @@ void ARCFrameLowering::emitEpilogue(MachineFunction &MF,
// Now, pop fp if necessary.
if (hasFP(MF)) {
BuildMI(MBB, MBBI, MBB.findDebugLoc(MBBI), TII->get(ARC::LD_AB_rs9))
- .addReg(ARC::SP, RegState::Define)
.addReg(ARC::FP, RegState::Define)
+ .addReg(ARC::SP, RegState::Define)
.addReg(ARC::SP)
.addImm(4);
}
@@ -305,7 +322,12 @@ void ARCFrameLowering::emitEpilogue(MachineFunction &MF,
// Add in the varargs area here first.
LLVM_DEBUG(dbgs() << "Varargs\n");
unsigned VarArgsBytes = MFI.getObjectSize(AFI->getVarArgsFrameIndex());
- BuildMI(MBB, MBBI, MBB.findDebugLoc(MBBI), TII->get(ARC::ADD_rru6))
+ unsigned Opc = ARC::ADD_rrlimm;
+ if (isUInt<6>(VarArgsBytes))
+ Opc = ARC::ADD_rru6;
+ else if (isInt<12>(VarArgsBytes))
+ Opc = ARC::ADD_rrs12;
+ BuildMI(MBB, MBBI, MBB.findDebugLoc(MBBI), TII->get(Opc))
.addReg(ARC::SP)
.addReg(ARC::SP)
.addImm(VarArgsBytes);
@@ -431,7 +453,14 @@ static void emitRegUpdate(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI, DebugLoc dl,
unsigned Reg, int NumBytes, bool IsAdd,
const ARCInstrInfo *TII) {
- unsigned Opc = IsAdd ? ARC::ADD_rru6 : ARC::SUB_rru6;
+ unsigned Opc;
+ if (isUInt<6>(NumBytes))
+ Opc = IsAdd ? ARC::ADD_rru6 : ARC::SUB_rru6;
+ else if (isInt<12>(NumBytes))
+ Opc = IsAdd ? ARC::ADD_rrs12 : ARC::SUB_rrs12;
+ else
+ Opc = IsAdd ? ARC::ADD_rrlimm : ARC::SUB_rrlimm;
+
BuildMI(MBB, MBBI, dl, TII->get(Opc), Reg)
.addReg(Reg, RegState::Kill)
.addImm(NumBytes);
diff --git a/lib/Target/ARC/ARCFrameLowering.h b/lib/Target/ARC/ARCFrameLowering.h
index c042bec016ca..41b559d16761 100644
--- a/lib/Target/ARC/ARCFrameLowering.h
+++ b/lib/Target/ARC/ARCFrameLowering.h
@@ -1,9 +1,8 @@
//===- ARCFrameLowering.h - Define frame lowering for ARC -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/ARC/ARCISelDAGToDAG.cpp b/lib/Target/ARC/ARCISelDAGToDAG.cpp
index 8dbd3d5bf036..f639c4e6f0ff 100644
--- a/lib/Target/ARC/ARCISelDAGToDAG.cpp
+++ b/lib/Target/ARC/ARCISelDAGToDAG.cpp
@@ -1,9 +1,8 @@
//===- ARCISelDAGToDAG.cpp - ARC dag to dag inst selector -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/ARC/ARCISelLowering.cpp b/lib/Target/ARC/ARCISelLowering.cpp
index bf98af801406..847d23f0abdb 100644
--- a/lib/Target/ARC/ARCISelLowering.cpp
+++ b/lib/Target/ARC/ARCISelLowering.cpp
@@ -1,9 +1,8 @@
//===- ARCISelLowering.cpp - ARC DAG Lowering Impl --------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/ARC/ARCISelLowering.h b/lib/Target/ARC/ARCISelLowering.h
index fec01b13a866..4b72bfdaee9c 100644
--- a/lib/Target/ARC/ARCISelLowering.h
+++ b/lib/Target/ARC/ARCISelLowering.h
@@ -1,9 +1,8 @@
//===- ARCISelLowering.h - ARC DAG Lowering Interface -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/ARC/ARCInstrFormats.td b/lib/Target/ARC/ARCInstrFormats.td
index 0a49b83ef16a..e4902a73ed49 100644
--- a/lib/Target/ARC/ARCInstrFormats.td
+++ b/lib/Target/ARC/ARCInstrFormats.td
@@ -1,9 +1,8 @@
//===- ARCInstrFormats.td - ARC Instruction Formats --------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -56,6 +55,44 @@ def GPR32Reduced : Operand<iAny> {
let DecoderMethod = "DecodeGBR32ShortRegister";
}
+// Helper classes for load/store instructions
+class DataSizeMode<bits<2> mode, string instSfx, string asmSfx> {
+ bits<2> Value = mode;
+ string InstSuffix = instSfx;
+ string AsmSuffix = asmSfx;
+}
+class ExtMode<bit mode, string instSfx, string asmSfx> {
+ bit Value = mode;
+ string InstSuffix = instSfx;
+ string AsmSuffix = asmSfx;
+}
+
+class AddrMode<bits<2> mode, string instSfx, string asmSfx> {
+ bits<2> Value = mode;
+ string InstSuffix = instSfx;
+ string AsmSuffix = asmSfx;
+}
+
+class CacheMode<bit mode, string instSfx, string asmSfx> {
+ bit Value = mode;
+ string InstSuffix = instSfx;
+ string AsmSuffix = asmSfx;
+}
+
+def ByteSM : DataSizeMode<0b01, "B", "b">;
+def HalfSM : DataSizeMode<0b10, "H", "h">;
+def WordSM : DataSizeMode<0b00, "", "">;
+
+def NoEM : ExtMode<0, "", "">;
+def SignedEM : ExtMode<1, "_X", ".x">;
+
+def NoAM : AddrMode<0b00, "", "">;
+def PreIncAM : AddrMode<0b01, "_AW", ".aw">;
+def PostIncAM : AddrMode<0b10, "_AB", ".ab">;
+
+def NoCC : CacheMode<0b0, "", "">;
+def UncachedCC : CacheMode<0b1, "_DI", ".di">;
+
class InstARC<int sz, dag outs, dag ins, string asmstr, list<dag> pattern>
: Instruction, Encoding64 {
@@ -65,6 +102,18 @@ class InstARC<int sz, dag outs, dag ins, string asmstr, list<dag> pattern>
let AsmString = asmstr;
let Pattern = pattern;
let Size = sz;
+
+ // Load/Store instruction properties
+ DataSizeMode ZZ = WordSM;
+ ExtMode X = NoEM;
+ AddrMode AA = NoAM;
+ CacheMode DI = NoCC;
+
+ // Field used for relation models
+ string BaseOpcode = "";
+
+ //TSFlags
+ let TSFlags{1-0} = AA.Value;
}
// ARC pseudo instructions format
@@ -355,6 +404,8 @@ class F32_LD_RS9<bit x, bits<2> aa, bit di, bits<2> zz, dag outs, dag ins,
let Inst{8-7} = zz;
let Inst{6} = x;
let Inst{5-0} = A;
+
+ let BaseOpcode = "ld_rs9";
}
class F32_LD_ADDR<bit x, bits<2> aa, bit di, bits<2> zz, dag outs, dag ins,
@@ -364,6 +415,8 @@ class F32_LD_ADDR<bit x, bits<2> aa, bit di, bits<2> zz, dag outs, dag ins,
let B = addr{14-9};
let S9 = addr{8-0};
+
+ let BaseOpcode = "ld_rs9";
}
@@ -388,6 +441,8 @@ class F32_LD_LIMM<bit x, bit di, bits<2> zz, dag outs, dag ins,
let Inst{6} = x;
let Inst{5-0} = A;
let DecoderMethod = "DecodeLdLImmInstruction";
+
+ let BaseOpcode = "ld_limm";
}
// Register + LImm load. The 32-bit immediate address is in Inst[63-32].
@@ -416,6 +471,8 @@ class F32_LD_RLIMM<bit x, bits<2> aa, bit di, bits<2> zz, dag outs, dag ins,
let Inst{11-6} = LImmReg;
let Inst{5-0} = A;
let DecoderMethod = "DecodeLdRLImmInstruction";
+
+ let BaseOpcode = "ld_rlimm";
}
// Register + S9 Store. (B + S9)
@@ -438,6 +495,8 @@ class F32_ST_RS9<bits<2> aa, bit di, bits<2> zz, dag outs, dag ins,
let Inst{4-3} = aa;
let Inst{2-1} = zz;
let Inst{0} = 0;
+
+ let BaseOpcode = "st_rs9";
}
class F32_ST_ADDR<bits<2> aa, bit di, bits<2> zz, dag outs, dag ins,
@@ -447,6 +506,8 @@ class F32_ST_ADDR<bits<2> aa, bit di, bits<2> zz, dag outs, dag ins,
let B = addr{14-9};
let S9 = addr{8-0};
+
+ let BaseOpcode = "st_rs9";
}
// LImm Store.
@@ -470,6 +531,8 @@ class F32_ST_LIMM<bit di, bits<2> zz, dag outs, dag ins,
let Inst{2-1} = zz;
let Inst{0} = 0;
let DecoderMethod = "DecodeStLImmInstruction";
+
+ let BaseOpcode = "st_limm";
}
// Compact Move/Load.
diff --git a/lib/Target/ARC/ARCInstrInfo.cpp b/lib/Target/ARC/ARCInstrInfo.cpp
index a8084f16893b..2a660e3c4dd1 100644
--- a/lib/Target/ARC/ARCInstrInfo.cpp
+++ b/lib/Target/ARC/ARCInstrInfo.cpp
@@ -1,9 +1,8 @@
//===- ARCInstrInfo.cpp - ARC Instruction Information -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -28,6 +27,19 @@ using namespace llvm;
#include "ARCGenInstrInfo.inc"
#define DEBUG_TYPE "arc-inst-info"
+
+enum AddrIncType {
+ NoAddInc = 0,
+ PreInc = 1,
+ PostInc = 2,
+ Scaled = 3
+};
+
+enum TSFlagsConstants {
+ TSF_AddrModeOff = 0,
+ TSF_AddModeMask = 3
+};
+
// Pin the vtable to this file.
void ARCInstrInfo::anchor() {}
@@ -389,10 +401,42 @@ unsigned ARCInstrInfo::insertBranch(MachineBasicBlock &MBB,
}
unsigned ARCInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
- if (MI.getOpcode() == TargetOpcode::INLINEASM) {
+ if (MI.isInlineAsm()) {
const MachineFunction *MF = MI.getParent()->getParent();
const char *AsmStr = MI.getOperand(0).getSymbolName();
return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
}
return MI.getDesc().getSize();
}
+
+bool ARCInstrInfo::isPostIncrement(const MachineInstr &MI) const {
+ const MCInstrDesc &MID = MI.getDesc();
+ const uint64_t F = MID.TSFlags;
+ return ((F >> TSF_AddrModeOff) & TSF_AddModeMask) == PostInc;
+}
+
+bool ARCInstrInfo::isPreIncrement(const MachineInstr &MI) const {
+ const MCInstrDesc &MID = MI.getDesc();
+ const uint64_t F = MID.TSFlags;
+ return ((F >> TSF_AddrModeOff) & TSF_AddModeMask) == PreInc;
+}
+
+bool ARCInstrInfo::getBaseAndOffsetPosition(const MachineInstr &MI,
+ unsigned &BasePos,
+ unsigned &OffsetPos) const {
+ if (!MI.mayLoad() && !MI.mayStore())
+ return false;
+
+ BasePos = 1;
+ OffsetPos = 2;
+
+ if (isPostIncrement(MI) || isPreIncrement(MI)) {
+ BasePos++;
+ OffsetPos++;
+ }
+
+ if (!MI.getOperand(BasePos).isReg() || !MI.getOperand(OffsetPos).isImm())
+ return false;
+
+ return true;
+}
diff --git a/lib/Target/ARC/ARCInstrInfo.h b/lib/Target/ARC/ARCInstrInfo.h
index f965dd4ff7f8..1289b37c37b3 100644
--- a/lib/Target/ARC/ARCInstrInfo.h
+++ b/lib/Target/ARC/ARCInstrInfo.h
@@ -1,9 +1,8 @@
//===- ARCInstrInfo.h - ARC Instruction Information -------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -82,6 +81,16 @@ public:
bool
reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
+
+ bool isPostIncrement(const MachineInstr &MI) const override;
+
+ // ARC-specific
+ bool isPreIncrement(const MachineInstr &MI) const;
+
+ virtual bool getBaseAndOffsetPosition(const MachineInstr &MI,
+ unsigned &BasePos,
+ unsigned &OffsetPos) const override;
+
// Emit code before MBBI to load immediate value into physical register Reg.
// Returns an iterator to the new instruction.
MachineBasicBlock::iterator loadImmediate(MachineBasicBlock &MBB,
diff --git a/lib/Target/ARC/ARCInstrInfo.td b/lib/Target/ARC/ARCInstrInfo.td
index 525098c4ff66..311d998f3d86 100644
--- a/lib/Target/ARC/ARCInstrInfo.td
+++ b/lib/Target/ARC/ARCInstrInfo.td
@@ -1,9 +1,8 @@
//===- ARCInstrInfo.td - Target Description for ARC --------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -788,50 +787,47 @@ let isReturn = 1, isTerminator = 1 in {
// Load/Store instructions.
//----------------------------------------------------------------------------
+// Filter class for load/store mappings
+class ArcLdStRel;
+
// Load instruction variants:
// Control bits: x, aa, di, zz
// x - sign extend.
// aa - incrementing mode. (N/A for LIMM).
// di - uncached.
// zz - data size.
-multiclass ArcLdInst<bits<2> zz, string asmop> {
- let mayLoad = 1 in {
- def _rs9 : F32_LD_ADDR<0, 0b00, 0, zz,
- (outs GPR32:$A), (ins MEMrs9:$addr),
- !strconcat(asmop, "\t$A, [$addr]"), []>;
-
- def _limm : F32_LD_LIMM<0, 0, zz,
- (outs GPR32:$A), (ins MEMii:$addr),
- !strconcat(asmop, "\t$A, [$addr]"), []>;
-
- def _rlimm : F32_LD_RLIMM<0, 0b00, 0, zz,
- (outs GPR32:$A), (ins MEMrlimm:$addr),
- !strconcat(asmop, "\t$A, [$addr]"), []>;
-
- def _X_rs9 : F32_LD_ADDR<1, 0b00, 0, zz,
- (outs GPR32:$A), (ins MEMrs9:$addr),
- !strconcat(asmop, ".x\t$A, [$addr]"), []>;
-
- def _X_limm : F32_LD_LIMM<1, 0, zz,
- (outs GPR32:$A), (ins MEMii:$addr),
- !strconcat(asmop, ".x\t$A, [$addr]"), []>;
-
- def _X_rlimm : F32_LD_RLIMM<1, 0b00, 0, zz,
- (outs GPR32:$A), (ins MEMrlimm:$addr),
- !strconcat(asmop, ".x\t$A, [$addr]"), []>;
-
- def _AB_rs9 : F32_LD_RS9<0, 0b10, 0, zz,
- (outs GPR32:$addrout, GPR32:$A),
- (ins GPR32:$B, immS<9>:$S9),
- !strconcat(asmop, ".ab\t$A, [$B,$S9]"), []>
- { let Constraints = "$addrout = $B"; }
+multiclass ArcLdInst<DataSizeMode zz, ExtMode x, CacheMode di, string asmop> {
+ let mayLoad = 1, ZZ = zz, X = x, DI = di in {
+ def _rs9: F32_LD_ADDR<x.Value, NoAM.Value, di.Value, zz.Value,
+ (outs GPR32:$A), (ins MEMrs9:$addr),
+ !strconcat(asmop, "\t$A, [$addr]"), []>, ArcLdStRel;
+
+ def _limm: F32_LD_LIMM<x.Value, di.Value, zz.Value,
+ (outs GPR32:$A), (ins MEMii:$addr),
+ !strconcat(asmop, "\t$A, [$addr]"), []>, ArcLdStRel;
+
+ def _rlimm: F32_LD_RLIMM<x.Value, NoAM.Value, di.Value, zz.Value,
+ (outs GPR32:$A), (ins MEMrlimm:$addr),
+ !strconcat(asmop, "\t$A, [$addr]"), []>, ArcLdStRel;
+
+ foreach aa = [PreIncAM, PostIncAM] in {
+ def aa.InstSuffix#_rs9: F32_LD_RS9<x.Value, aa.Value, di.Value, zz.Value,
+ (outs GPR32:$A, GPR32:$addrout),
+ (ins GPR32:$B, immS<9>:$S9),
+ asmop#aa.AsmSuffix#"\t$A, [$B,$S9]", []>, ArcLdStRel
+ { let Constraints = "$addrout = $B"; let AA = aa; }
+ }
+ }
+}
+
+foreach di = [NoCC, UncachedCC] in {
+ defm LD#di.InstSuffix : ArcLdInst<WordSM, NoEM, di, "ld"#di.AsmSuffix>;
+ foreach zz = [ByteSM, HalfSM] in {
+ foreach x = [NoEM, SignedEM] in {
+ defm LD#zz.InstSuffix#x.InstSuffix#di.InstSuffix : ArcLdInst<zz, x, di, "ld"#zz.AsmSuffix#x.AsmSuffix#di.AsmSuffix>;
+ }
}
}
-
-// Load instruction definitions.
-defm LD : ArcLdInst<0b00, "ld">;
-defm LDH : ArcLdInst<0b10, "ldh">;
-defm LDB : ArcLdInst<0b01, "ldb">;
// Load instruction patterns.
// 32-bit loads.
@@ -873,25 +869,32 @@ def : Pat<(sextloadi8 AddrModeS9:$addr),(LDB_X_rs9 AddrModeS9:$addr)>;
// aa - incrementing mode. (N/A for LIMM).
// di - uncached.
// zz - data size.
-multiclass ArcStInst<bits<2> zz, string asmop> {
- let mayStore = 1 in {
- def _rs9 : F32_ST_ADDR<0b00, 0, zz, (outs), (ins GPR32:$C, MEMrs9:$addr),
- !strconcat(asmop, "\t$C, [$addr]"), []>;
-
- def _limm : F32_ST_LIMM<0, zz, (outs), (ins GPR32:$C, MEMii:$addr),
- !strconcat(asmop, "\t$C, [$addr]"), []>;
-
- def _AW_rs9 : F32_ST_RS9<0b01, 0, zz, (outs GPR32:$addrout),
- (ins GPR32:$C, GPR32:$B, immS<9>:$S9),
- !strconcat(asmop, ".aw\t$C, [$B,$S9]"), []>
- { let Constraints = "$addrout = $B"; }
+multiclass ArcStInst<DataSizeMode zz, CacheMode di, string asmop> {
+ let mayStore = 1, ZZ = zz, DI = di in {
+ def _rs9: F32_ST_ADDR<NoAM.Value, di.Value, zz.Value,
+ (outs), (ins GPR32:$C, MEMrs9:$addr),
+ !strconcat(asmop, "\t$C, [$addr]"), []>, ArcLdStRel;
+
+ def _limm: F32_ST_LIMM<di.Value, zz.Value,
+ (outs), (ins GPR32:$C, MEMii:$addr),
+ !strconcat(asmop, "\t$C, [$addr]"), []>, ArcLdStRel;
+
+
+ foreach aa = [PreIncAM, PostIncAM] in {
+ def aa.InstSuffix#_rs9: F32_ST_RS9<aa.Value, di.Value, zz.Value,
+ (outs GPR32:$addrout),
+ (ins GPR32:$C, GPR32:$B, immS<9>:$S9),
+ asmop#aa.AsmSuffix#"\t$C, [$B,$S9]", []>, ArcLdStRel
+ { let Constraints = "$addrout = $B"; let AA = aa; }
+ }
}
}
-// Store instruction definitions.
-defm ST : ArcStInst<0b00, "st">;
-defm STH : ArcStInst<0b10, "sth">;
-defm STB : ArcStInst<0b01, "stb">;
+foreach di = [NoCC, UncachedCC] in {
+ foreach zz = [ByteSM, HalfSM, WordSM] in {
+ defm ST#zz.InstSuffix#di.InstSuffix : ArcStInst<zz, di, "st"#zz.AsmSuffix#di.AsmSuffix>;
+ }
+}
// Store instruction patterns.
// 32-bit stores
@@ -912,3 +915,10 @@ def : Pat<(truncstorei8 i32:$C, AddrModeS9:$addr),
def : Pat<(truncstorei8 i32:$C, AddrModeImm:$addr),
(STB_limm i32:$C, AddrModeImm:$addr)>;
+def getPostIncOpcode : InstrMapping {
+ let FilterClass = "ArcLdStRel";
+ let RowFields = [ "BaseOpcode", "ZZ", "DI", "X"];
+ let ColFields = [ "AA" ];
+ let KeyCol = [ "NoAM" ];
+ let ValueCols = [["PostIncAM"]];
+}
diff --git a/lib/Target/ARC/ARCMCInstLower.cpp b/lib/Target/ARC/ARCMCInstLower.cpp
index 43b087a57204..62462b77eccf 100644
--- a/lib/Target/ARC/ARCMCInstLower.cpp
+++ b/lib/Target/ARC/ARCMCInstLower.cpp
@@ -1,9 +1,8 @@
//===- ARCMCInstLower.cpp - ARC MachineInstr to MCInst ----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/lib/Target/ARC/ARCMCInstLower.h b/lib/Target/ARC/ARCMCInstLower.h
index 9a698f26334a..24a7f68c695d 100644
--- a/lib/Target/ARC/ARCMCInstLower.h
+++ b/lib/Target/ARC/ARCMCInstLower.h
@@ -1,9 +1,8 @@
//===- ARCMCInstLower.h - Lower MachineInstr to MCInst ----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARC/ARCMachineFunctionInfo.cpp b/lib/Target/ARC/ARCMachineFunctionInfo.cpp
index 7672f8d2c6dd..9cd9661ae245 100644
--- a/lib/Target/ARC/ARCMachineFunctionInfo.cpp
+++ b/lib/Target/ARC/ARCMachineFunctionInfo.cpp
@@ -1,9 +1,8 @@
//===- ARCMachineFunctionInfo.cpp - ARC machine func info -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARC/ARCMachineFunctionInfo.h b/lib/Target/ARC/ARCMachineFunctionInfo.h
index 95ad294e3668..31aa5b93246c 100644
--- a/lib/Target/ARC/ARCMachineFunctionInfo.h
+++ b/lib/Target/ARC/ARCMachineFunctionInfo.h
@@ -1,9 +1,8 @@
//===- ARCMachineFunctionInfo.h - ARC machine function info -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/ARC/ARCOptAddrMode.cpp b/lib/Target/ARC/ARCOptAddrMode.cpp
new file mode 100644
index 000000000000..c922b99c57b0
--- /dev/null
+++ b/lib/Target/ARC/ARCOptAddrMode.cpp
@@ -0,0 +1,507 @@
+//===- ARCOptAddrMode.cpp ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This pass folds LD/ST + ADD pairs into Pre/Post-increment form of
+/// load/store instructions.
+//===----------------------------------------------------------------------===//
+
+#include "ARC.h"
+#define GET_INSTRMAP_INFO
+#include "ARCInstrInfo.h"
+#include "ARCTargetMachine.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define OPTADDRMODE_DESC "ARC load/store address mode"
+#define OPTADDRMODE_NAME "arc-addr-mode"
+#define DEBUG_TYPE "arc-addr-mode"
+
+namespace llvm {
+FunctionPass *createARCOptAddrMode();
+void initializeARCOptAddrModePass(PassRegistry &);
+} // end namespace llvm
+
+namespace {
+class ARCOptAddrMode : public MachineFunctionPass {
+public:
+ static char ID;
+
+ ARCOptAddrMode() : MachineFunctionPass(ID) {}
+
+ StringRef getPassName() const override { return OPTADDRMODE_DESC; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+private:
+ const ARCSubtarget *AST = nullptr;
+ const ARCInstrInfo *AII = nullptr;
+ MachineRegisterInfo *MRI = nullptr;
+ MachineDominatorTree *MDT = nullptr;
+
+ // Tries to combine \p Ldst with increment of its base register to form
+ // single post-increment instruction.
+ MachineInstr *tryToCombine(MachineInstr &Ldst);
+
+ // Returns true if result of \p Add is not used before \p Ldst
+ bool noUseOfAddBeforeLoadOrStore(const MachineInstr *Add,
+ const MachineInstr *Ldst);
+
+ // Returns true if load/store instruction \p Ldst can be hoisted up to
+ // instruction \p To
+ bool canHoistLoadStoreTo(MachineInstr *Ldst, MachineInstr *To);
+
+ // Returns true if load/store instruction \p Ldst can be sunk down
+ // to instruction \p To
+ bool canSinkLoadStoreTo(MachineInstr *Ldst, MachineInstr *To);
+
+ // Check if instructions \p Ldst and \p Add can be moved to become adjacent
+ // If they can return instruction which need not to move.
+ // If \p Uses is not null, fill it with instructions after \p Ldst which use
+ // \p Ldst's base register
+ MachineInstr *canJoinInstructions(MachineInstr *Ldst, MachineInstr *Add,
+ SmallVectorImpl<MachineInstr *> *Uses);
+
+ // Returns true if all instruction in \p Uses array can be adjusted
+ // to accomodate increment of register \p BaseReg by \p Incr
+ bool canFixPastUses(const ArrayRef<MachineInstr *> &Uses,
+ MachineOperand &Incr, unsigned BaseReg);
+
+ // Update all instructions in \p Uses to accomodate increment
+ // of \p BaseReg by \p Offset
+ void fixPastUses(ArrayRef<MachineInstr *> Uses, unsigned BaseReg,
+ int64_t Offset);
+
+ // Change instruction \p Ldst to postincrement form.
+ // \p NewBase is register to hold update base value
+ // \p NewOffset is instruction's new offset
+ void changeToAddrMode(MachineInstr &Ldst, unsigned NewOpcode,
+ unsigned NewBase, MachineOperand &NewOffset);
+
+ bool processBasicBlock(MachineBasicBlock &MBB);
+};
+
+} // end anonymous namespace
+
+char ARCOptAddrMode::ID = 0;
+INITIALIZE_PASS_BEGIN(ARCOptAddrMode, OPTADDRMODE_NAME, OPTADDRMODE_DESC, false,
+ false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(ARCOptAddrMode, OPTADDRMODE_NAME, OPTADDRMODE_DESC, false,
+ false)
+
+// Return true if \p Off can be used as immediate offset
+// operand of load/store instruction (S9 literal)
+static bool isValidLoadStoreOffset(int64_t Off) { return isInt<9>(Off); }
+
+// Return true if \p Off can be used as immediate operand of
+// ADD/SUB instruction (U6 literal)
+static bool isValidIncrementOffset(int64_t Off) { return isUInt<6>(Off); }
+
+static bool isAddConstantOp(const MachineInstr &MI, int64_t &Amount) {
+ int64_t Sign = 1;
+ switch (MI.getOpcode()) {
+ case ARC::SUB_rru6:
+ Sign = -1;
+ LLVM_FALLTHROUGH;
+ case ARC::ADD_rru6:
+ assert(MI.getOperand(2).isImm() && "Expected immediate operand");
+ Amount = Sign * MI.getOperand(2).getImm();
+ return true;
+ default:
+ return false;
+ }
+}
+
+// Return true if \p MI dominates of uses of virtual register \p VReg
+static bool dominatesAllUsesOf(const MachineInstr *MI, unsigned VReg,
+ MachineDominatorTree *MDT,
+ MachineRegisterInfo *MRI) {
+
+ assert(TargetRegisterInfo::isVirtualRegister(VReg) &&
+ "Expected virtual register!");
+
+ for (auto it = MRI->use_nodbg_begin(VReg), end = MRI->use_nodbg_end();
+ it != end; ++it) {
+ MachineInstr *User = it->getParent();
+ if (User->isPHI()) {
+ unsigned BBOperandIdx = User->getOperandNo(&*it) + 1;
+ MachineBasicBlock *MBB = User->getOperand(BBOperandIdx).getMBB();
+ if (MBB->empty()) {
+ const MachineBasicBlock *InstBB = MI->getParent();
+ assert(InstBB != MBB && "Instruction found in empty MBB");
+ if (!MDT->dominates(InstBB, MBB))
+ return false;
+ continue;
+ }
+ User = &*MBB->rbegin();
+ }
+
+ if (!MDT->dominates(MI, User))
+ return false;
+ }
+ return true;
+}
+
+// Return true if \p MI is load/store instruction with immediate offset
+// which can be adjusted by \p Disp
+static bool isLoadStoreThatCanHandleDisplacement(const TargetInstrInfo *TII,
+ const MachineInstr &MI,
+ int64_t Disp) {
+ unsigned BasePos, OffPos;
+ if (!TII->getBaseAndOffsetPosition(MI, BasePos, OffPos))
+ return false;
+ const MachineOperand &MO = MI.getOperand(OffPos);
+ if (!MO.isImm())
+ return false;
+ int64_t Offset = MO.getImm() + Disp;
+ return isValidLoadStoreOffset(Offset);
+}
+
+bool ARCOptAddrMode::noUseOfAddBeforeLoadOrStore(const MachineInstr *Add,
+ const MachineInstr *Ldst) {
+ unsigned R = Add->getOperand(0).getReg();
+ return dominatesAllUsesOf(Ldst, R, MDT, MRI);
+}
+
+MachineInstr *ARCOptAddrMode::tryToCombine(MachineInstr &Ldst) {
+ assert((Ldst.mayLoad() || Ldst.mayStore()) && "LD/ST instruction expected");
+
+ unsigned BasePos, OffsetPos;
+
+ LLVM_DEBUG(dbgs() << "[ABAW] tryToCombine " << Ldst);
+ if (!AII->getBaseAndOffsetPosition(Ldst, BasePos, OffsetPos)) {
+ LLVM_DEBUG(dbgs() << "[ABAW] Not a recognized load/store\n");
+ return nullptr;
+ }
+
+ MachineOperand &Base = Ldst.getOperand(BasePos);
+ MachineOperand &Offset = Ldst.getOperand(OffsetPos);
+
+ assert(Base.isReg() && "Base operand must be register");
+ if (!Offset.isImm()) {
+ LLVM_DEBUG(dbgs() << "[ABAW] Offset is not immediate\n");
+ return nullptr;
+ }
+
+ unsigned B = Base.getReg();
+ if (TargetRegisterInfo::isStackSlot(B) ||
+ !TargetRegisterInfo::isVirtualRegister(B)) {
+ LLVM_DEBUG(dbgs() << "[ABAW] Base is not VReg\n");
+ return nullptr;
+ }
+
+ // TODO: try to generate address preincrement
+ if (Offset.getImm() != 0) {
+ LLVM_DEBUG(dbgs() << "[ABAW] Non-zero offset\n");
+ return nullptr;
+ }
+
+ for (auto &Add : MRI->use_nodbg_instructions(B)) {
+ int64_t Incr;
+ if (!isAddConstantOp(Add, Incr))
+ continue;
+ if (!isValidLoadStoreOffset(Incr))
+ continue;
+
+ SmallVector<MachineInstr *, 8> Uses;
+ MachineInstr *MoveTo = canJoinInstructions(&Ldst, &Add, &Uses);
+
+ if (!MoveTo)
+ continue;
+
+ if (!canFixPastUses(Uses, Add.getOperand(2), B))
+ continue;
+
+ LLVM_DEBUG(MachineInstr *First = &Ldst; MachineInstr *Last = &Add;
+ if (MDT->dominates(Last, First)) std::swap(First, Last);
+ dbgs() << "[ABAW] Instructions " << *First << " and " << *Last
+ << " combined\n";
+
+ );
+
+ MachineInstr *Result = Ldst.getNextNode();
+ if (MoveTo == &Add) {
+ Ldst.removeFromParent();
+ Add.getParent()->insertAfter(Add.getIterator(), &Ldst);
+ }
+ if (Result == &Add)
+ Result = Result->getNextNode();
+
+ fixPastUses(Uses, B, Incr);
+
+ int NewOpcode = ARC::getPostIncOpcode(Ldst.getOpcode());
+ assert(NewOpcode > 0 && "No postincrement form found");
+ unsigned NewBaseReg = Add.getOperand(0).getReg();
+ changeToAddrMode(Ldst, NewOpcode, NewBaseReg, Add.getOperand(2));
+ Add.eraseFromParent();
+
+ return Result;
+ }
+ return nullptr;
+}
+
+MachineInstr *
+ARCOptAddrMode::canJoinInstructions(MachineInstr *Ldst, MachineInstr *Add,
+ SmallVectorImpl<MachineInstr *> *Uses) {
+ assert(Ldst && Add && "NULL instruction passed");
+
+ MachineInstr *First = Add;
+ MachineInstr *Last = Ldst;
+ if (MDT->dominates(Ldst, Add))
+ std::swap(First, Last);
+ else if (!MDT->dominates(Add, Ldst))
+ return nullptr;
+
+ LLVM_DEBUG(dbgs() << "canJoinInstructions: " << *First << *Last);
+
+ unsigned BasePos, OffPos;
+
+ if (!AII->getBaseAndOffsetPosition(*Ldst, BasePos, OffPos)) {
+ LLVM_DEBUG(
+ dbgs()
+ << "[canJoinInstructions] Cannot determine base/offset position\n");
+ return nullptr;
+ }
+
+ unsigned BaseReg = Ldst->getOperand(BasePos).getReg();
+
+ // prohibit this:
+ // v1 = add v0, c
+ // st v1, [v0, 0]
+ // and this
+ // st v0, [v0, 0]
+ // v1 = add v0, c
+ if (Ldst->mayStore() && Ldst->getOperand(0).isReg()) {
+ unsigned StReg = Ldst->getOperand(0).getReg();
+ if (Add->getOperand(0).getReg() == StReg || BaseReg == StReg) {
+ LLVM_DEBUG(dbgs() << "[canJoinInstructions] Store uses result of Add\n");
+ return nullptr;
+ }
+ }
+
+ SmallVector<MachineInstr *, 4> UsesAfterLdst;
+ SmallVector<MachineInstr *, 4> UsesAfterAdd;
+ for (MachineInstr &MI : MRI->use_nodbg_instructions(BaseReg)) {
+ if (&MI == Ldst || &MI == Add)
+ continue;
+ if (&MI != Add && MDT->dominates(Ldst, &MI))
+ UsesAfterLdst.push_back(&MI);
+ else if (!MDT->dominates(&MI, Ldst))
+ return nullptr;
+ if (MDT->dominates(Add, &MI))
+ UsesAfterAdd.push_back(&MI);
+ }
+
+ MachineInstr *Result = nullptr;
+
+ if (First == Add) {
+ // n = add b, i
+ // ...
+ // x = ld [b, o] or x = ld [n, o]
+
+ if (noUseOfAddBeforeLoadOrStore(First, Last)) {
+ Result = Last;
+ LLVM_DEBUG(dbgs() << "[canJoinInstructions] Can sink Add down to Ldst\n");
+ } else if (canHoistLoadStoreTo(Ldst, Add)) {
+ Result = First;
+ LLVM_DEBUG(dbgs() << "[canJoinInstructions] Can hoist Ldst to Add\n");
+ }
+ } else {
+ // x = ld [b, o]
+ // ...
+ // n = add b, i
+ Result = First;
+ LLVM_DEBUG(dbgs() << "[canJoinInstructions] Can hoist Add to Ldst\n");
+ }
+ if (Result && Uses)
+ *Uses = (Result == Ldst) ? UsesAfterLdst : UsesAfterAdd;
+ return Result;
+}
+
+bool ARCOptAddrMode::canFixPastUses(const ArrayRef<MachineInstr *> &Uses,
+ MachineOperand &Incr, unsigned BaseReg) {
+
+ assert(Incr.isImm() && "Expected immediate increment");
+ int64_t NewOffset = Incr.getImm();
+ for (MachineInstr *MI : Uses) {
+ int64_t Dummy;
+ if (isAddConstantOp(*MI, Dummy)) {
+ if (isValidIncrementOffset(Dummy + NewOffset))
+ continue;
+ return false;
+ }
+ if (isLoadStoreThatCanHandleDisplacement(AII, *MI, -NewOffset))
+ continue;
+ LLVM_DEBUG(dbgs() << "Instruction cannot handle displacement " << -NewOffset
+ << ": " << *MI);
+ return false;
+ }
+ return true;
+}
+
+void ARCOptAddrMode::fixPastUses(ArrayRef<MachineInstr *> Uses,
+ unsigned NewBase, int64_t NewOffset) {
+
+ for (MachineInstr *MI : Uses) {
+ int64_t Amount;
+ unsigned BasePos, OffPos;
+ if (isAddConstantOp(*MI, Amount)) {
+ NewOffset += Amount;
+ assert(isValidIncrementOffset(NewOffset) &&
+ "New offset won't fit into ADD instr");
+ BasePos = 1;
+ OffPos = 2;
+ } else if (AII->getBaseAndOffsetPosition(*MI, BasePos, OffPos)) {
+ MachineOperand &MO = MI->getOperand(OffPos);
+ assert(MO.isImm() && "expected immediate operand");
+ NewOffset += MO.getImm();
+ assert(isValidLoadStoreOffset(NewOffset) &&
+ "New offset won't fit into LD/ST");
+ } else
+ llvm_unreachable("unexpected instruction");
+
+ MI->getOperand(BasePos).setReg(NewBase);
+ MI->getOperand(OffPos).setImm(NewOffset);
+ }
+}
+
+bool ARCOptAddrMode::canHoistLoadStoreTo(MachineInstr *Ldst, MachineInstr *To) {
+ if (Ldst->getParent() != To->getParent())
+ return false;
+ MachineBasicBlock::const_iterator MI(To), ME(Ldst),
+ End(Ldst->getParent()->end());
+
+ bool IsStore = Ldst->mayStore();
+ for (; MI != ME && MI != End; ++MI) {
+ if (MI->isDebugValue())
+ continue;
+ if (MI->mayStore() || MI->isCall() || MI->isInlineAsm() ||
+ MI->hasUnmodeledSideEffects())
+ return false;
+ if (IsStore && MI->mayLoad())
+ return false;
+ }
+
+ for (auto &O : Ldst->explicit_operands()) {
+ if (!O.isReg() || !O.isUse())
+ continue;
+ MachineInstr *OpDef = MRI->getVRegDef(O.getReg());
+ if (!OpDef || !MDT->dominates(OpDef, To))
+ return false;
+ }
+ return true;
+}
+
+bool ARCOptAddrMode::canSinkLoadStoreTo(MachineInstr *Ldst, MachineInstr *To) {
+ // Can only sink load/store within same BB
+ if (Ldst->getParent() != To->getParent())
+ return false;
+ MachineBasicBlock::const_iterator MI(Ldst), ME(To),
+ End(Ldst->getParent()->end());
+
+ bool IsStore = Ldst->mayStore();
+ bool IsLoad = Ldst->mayLoad();
+
+ Register ValReg = IsLoad ? Ldst->getOperand(0).getReg() : Register();
+ for (; MI != ME && MI != End; ++MI) {
+ if (MI->isDebugValue())
+ continue;
+ if (MI->mayStore() || MI->isCall() || MI->isInlineAsm() ||
+ MI->hasUnmodeledSideEffects())
+ return false;
+ if (IsStore && MI->mayLoad())
+ return false;
+ if (ValReg && MI->readsVirtualRegister(ValReg))
+ return false;
+ }
+ return true;
+}
+
+void ARCOptAddrMode::changeToAddrMode(MachineInstr &Ldst, unsigned NewOpcode,
+ unsigned NewBase,
+ MachineOperand &NewOffset) {
+ bool IsStore = Ldst.mayStore();
+ unsigned BasePos, OffPos;
+ MachineOperand Src = MachineOperand::CreateImm(0xDEADBEEF);
+ AII->getBaseAndOffsetPosition(Ldst, BasePos, OffPos);
+
+ unsigned BaseReg = Ldst.getOperand(BasePos).getReg();
+
+ Ldst.RemoveOperand(OffPos);
+ Ldst.RemoveOperand(BasePos);
+
+ if (IsStore) {
+ Src = Ldst.getOperand(BasePos - 1);
+ Ldst.RemoveOperand(BasePos - 1);
+ }
+
+ Ldst.setDesc(AST->getInstrInfo()->get(NewOpcode));
+ Ldst.addOperand(MachineOperand::CreateReg(NewBase, true));
+ if (IsStore)
+ Ldst.addOperand(Src);
+ Ldst.addOperand(MachineOperand::CreateReg(BaseReg, false));
+ Ldst.addOperand(NewOffset);
+ LLVM_DEBUG(dbgs() << "[ABAW] New Ldst: " << Ldst);
+}
+
+bool ARCOptAddrMode::processBasicBlock(MachineBasicBlock &MBB) {
+ bool Changed = false;
+ for (auto MI = MBB.begin(), ME = MBB.end(); MI != ME; ++MI) {
+ if (MI->isDebugValue())
+ continue;
+ if (!MI->mayLoad() && !MI->mayStore())
+ continue;
+ if (ARC::getPostIncOpcode(MI->getOpcode()) < 0)
+ continue;
+ MachineInstr *Res = tryToCombine(*MI);
+ if (Res) {
+ Changed = true;
+ // Res points to the next instruction. Rewind to process it
+ MI = std::prev(Res->getIterator());
+ }
+ }
+ return Changed;
+}
+
+bool ARCOptAddrMode::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(MF.getFunction()))
+ return false;
+
+ AST = &MF.getSubtarget<ARCSubtarget>();
+ AII = AST->getInstrInfo();
+ MRI = &MF.getRegInfo();
+ MDT = &getAnalysis<MachineDominatorTree>();
+
+ bool Changed = false;
+ for (auto &MBB : MF)
+ Changed |= processBasicBlock(MBB);
+ return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+// Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+FunctionPass *llvm::createARCOptAddrMode() { return new ARCOptAddrMode(); }
diff --git a/lib/Target/ARC/ARCRegisterInfo.cpp b/lib/Target/ARC/ARCRegisterInfo.cpp
index 38ea3c93a2d4..9c8340ac8f81 100644
--- a/lib/Target/ARC/ARCRegisterInfo.cpp
+++ b/lib/Target/ARC/ARCRegisterInfo.cpp
@@ -1,9 +1,8 @@
//===- ARCRegisterInfo.cpp - ARC Register Information -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -83,9 +82,11 @@ static void ReplaceFrameIndex(MachineBasicBlock::iterator II,
switch (MI.getOpcode()) {
case ARC::LD_rs9:
assert((Offset % 4 == 0) && "LD needs 4 byte alignment.");
+ LLVM_FALLTHROUGH;
case ARC::LDH_rs9:
case ARC::LDH_X_rs9:
assert((Offset % 2 == 0) && "LDH needs 2 byte alignment.");
+ LLVM_FALLTHROUGH;
case ARC::LDB_rs9:
case ARC::LDB_X_rs9:
LLVM_DEBUG(dbgs() << "Building LDFI\n");
@@ -96,8 +97,10 @@ static void ReplaceFrameIndex(MachineBasicBlock::iterator II,
break;
case ARC::ST_rs9:
assert((Offset % 4 == 0) && "ST needs 4 byte alignment.");
+ LLVM_FALLTHROUGH;
case ARC::STH_rs9:
assert((Offset % 2 == 0) && "STH needs 2 byte alignment.");
+ LLVM_FALLTHROUGH;
case ARC::STB_rs9:
LLVM_DEBUG(dbgs() << "Building STFI\n");
BuildMI(MBB, II, dl, TII.get(MI.getOpcode()))
@@ -187,7 +190,7 @@ void ARCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// Special handling of DBG_VALUE instructions.
if (MI.isDebugValue()) {
- unsigned FrameReg = getFrameRegister(MF);
+ Register FrameReg = getFrameRegister(MF);
MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false /*isDef*/);
MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
return;
@@ -220,7 +223,7 @@ void ARCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
ObjSize, RS, SPAdj);
}
-unsigned ARCRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+Register ARCRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
const ARCFrameLowering *TFI = getFrameLowering(MF);
return TFI->hasFP(MF) ? ARC::FP : ARC::SP;
}
diff --git a/lib/Target/ARC/ARCRegisterInfo.h b/lib/Target/ARC/ARCRegisterInfo.h
index 53abae3ac7a5..af41234e9dda 100644
--- a/lib/Target/ARC/ARCRegisterInfo.h
+++ b/lib/Target/ARC/ARCRegisterInfo.h
@@ -1,9 +1,8 @@
//===- ARCRegisterInfo.h - ARC Register Information Impl --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -47,7 +46,7 @@ public:
CallingConv::ID CC) const override;
// Debug information queries.
- unsigned getFrameRegister(const MachineFunction &MF) const override;
+ Register getFrameRegister(const MachineFunction &MF) const override;
//! Return whether to emit frame moves
static bool needsFrameMoves(const MachineFunction &MF);
diff --git a/lib/Target/ARC/ARCRegisterInfo.td b/lib/Target/ARC/ARCRegisterInfo.td
index 6d8d1b3dfd25..4b6744ad73da 100644
--- a/lib/Target/ARC/ARCRegisterInfo.td
+++ b/lib/Target/ARC/ARCRegisterInfo.td
@@ -1,9 +1,8 @@
//===- ARCRegisterInfo.td - ARC Register defs --------------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARC/ARCSubtarget.cpp b/lib/Target/ARC/ARCSubtarget.cpp
index 2107a27bf786..bce2dbd2eaa6 100644
--- a/lib/Target/ARC/ARCSubtarget.cpp
+++ b/lib/Target/ARC/ARCSubtarget.cpp
@@ -1,9 +1,8 @@
//===- ARCSubtarget.cpp - ARC Subtarget Information -------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/ARC/ARCSubtarget.h b/lib/Target/ARC/ARCSubtarget.h
index 631d846f3c9c..0be797f753d5 100644
--- a/lib/Target/ARC/ARCSubtarget.h
+++ b/lib/Target/ARC/ARCSubtarget.h
@@ -1,9 +1,8 @@
//===- ARCSubtarget.h - Define Subtarget for the ARC ------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/ARC/ARCTargetMachine.cpp b/lib/Target/ARC/ARCTargetMachine.cpp
index 6f5bbd3b4ef3..9fb45d686c26 100644
--- a/lib/Target/ARC/ARCTargetMachine.cpp
+++ b/lib/Target/ARC/ARCTargetMachine.cpp
@@ -1,9 +1,8 @@
//===- ARCTargetMachine.cpp - Define TargetMachine for ARC ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -13,6 +12,7 @@
#include "ARCTargetMachine.h"
#include "ARC.h"
#include "ARCTargetTransformInfo.h"
+#include "TargetInfo/ARCTargetInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/CodeGen/TargetPassConfig.h"
@@ -75,7 +75,10 @@ bool ARCPassConfig::addInstSelector() {
void ARCPassConfig::addPreEmitPass() { addPass(createARCBranchFinalizePass()); }
-void ARCPassConfig::addPreRegAlloc() { addPass(createARCExpandPseudosPass()); }
+void ARCPassConfig::addPreRegAlloc() {
+ addPass(createARCExpandPseudosPass());
+ addPass(createARCOptAddrMode());
+}
// Force static initialization.
extern "C" void LLVMInitializeARCTarget() {
diff --git a/lib/Target/ARC/ARCTargetMachine.h b/lib/Target/ARC/ARCTargetMachine.h
index 18117e3409af..c5e8c3f2936d 100644
--- a/lib/Target/ARC/ARCTargetMachine.h
+++ b/lib/Target/ARC/ARCTargetMachine.h
@@ -1,9 +1,8 @@
//===- ARCTargetMachine.h - Define TargetMachine for ARC --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/ARC/ARCTargetStreamer.h b/lib/Target/ARC/ARCTargetStreamer.h
index 29fdfda661a4..abe89673316f 100644
--- a/lib/Target/ARC/ARCTargetStreamer.h
+++ b/lib/Target/ARC/ARCTargetStreamer.h
@@ -1,9 +1,8 @@
//===- ARCTargetStreamer.h - ARC Target Streamer ----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARC/ARCTargetTransformInfo.h b/lib/Target/ARC/ARCTargetTransformInfo.h
index 20a83d5ae4c7..3e34008902b5 100644
--- a/lib/Target/ARC/ARCTargetTransformInfo.h
+++ b/lib/Target/ARC/ARCTargetTransformInfo.h
@@ -1,9 +1,8 @@
//===- ARCTargetTransformInfo.h - ARC specific TTI --------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// \file
diff --git a/lib/Target/ARC/Disassembler/ARCDisassembler.cpp b/lib/Target/ARC/Disassembler/ARCDisassembler.cpp
index 3fc5a033dd5d..82da18617b91 100644
--- a/lib/Target/ARC/Disassembler/ARCDisassembler.cpp
+++ b/lib/Target/ARC/Disassembler/ARCDisassembler.cpp
@@ -1,9 +1,8 @@
//===- ARCDisassembler.cpp - Disassembler for ARC ---------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -15,6 +14,7 @@
#include "ARC.h"
#include "ARCRegisterInfo.h"
#include "MCTargetDesc/ARCMCTargetDesc.h"
+#include "TargetInfo/ARCTargetInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCFixedLenDisassembler.h"
diff --git a/lib/Target/ARC/MCTargetDesc/ARCInfo.h b/lib/Target/ARC/MCTargetDesc/ARCInfo.h
index 401b4c5e6613..57a77631a1fb 100644
--- a/lib/Target/ARC/MCTargetDesc/ARCInfo.h
+++ b/lib/Target/ARC/MCTargetDesc/ARCInfo.h
@@ -1,9 +1,8 @@
//===- ARCInfo.h - Additional ARC Info --------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/ARC/InstPrinter/ARCInstPrinter.cpp b/lib/Target/ARC/MCTargetDesc/ARCInstPrinter.cpp
index 9c820c2fc595..e3e0ea489957 100644
--- a/lib/Target/ARC/InstPrinter/ARCInstPrinter.cpp
+++ b/lib/Target/ARC/MCTargetDesc/ARCInstPrinter.cpp
@@ -1,9 +1,8 @@
//===- ARCInstPrinter.cpp - ARC MCInst to assembly syntax -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/ARC/InstPrinter/ARCInstPrinter.h b/lib/Target/ARC/MCTargetDesc/ARCInstPrinter.h
index bb3898a67cef..5ea58407f9ed 100644
--- a/lib/Target/ARC/InstPrinter/ARCInstPrinter.h
+++ b/lib/Target/ARC/MCTargetDesc/ARCInstPrinter.h
@@ -1,9 +1,8 @@
//===- ARCInstPrinter.h - Convert ARC MCInst to assembly syntax -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/lib/Target/ARC/MCTargetDesc/ARCMCAsmInfo.cpp b/lib/Target/ARC/MCTargetDesc/ARCMCAsmInfo.cpp
index 5d3fb52cfb45..10f93e292e9b 100644
--- a/lib/Target/ARC/MCTargetDesc/ARCMCAsmInfo.cpp
+++ b/lib/Target/ARC/MCTargetDesc/ARCMCAsmInfo.cpp
@@ -1,9 +1,8 @@
//===- ARCMCAsmInfo.cpp - ARC asm properties --------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARC/MCTargetDesc/ARCMCAsmInfo.h b/lib/Target/ARC/MCTargetDesc/ARCMCAsmInfo.h
index 997a370fee8d..a086bd88d459 100644
--- a/lib/Target/ARC/MCTargetDesc/ARCMCAsmInfo.h
+++ b/lib/Target/ARC/MCTargetDesc/ARCMCAsmInfo.h
@@ -1,9 +1,8 @@
//===- ARCMCAsmInfo.h - ARC asm properties ----------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/ARC/MCTargetDesc/ARCMCTargetDesc.cpp b/lib/Target/ARC/MCTargetDesc/ARCMCTargetDesc.cpp
index 17be15f730de..aa4818cd57ac 100644
--- a/lib/Target/ARC/MCTargetDesc/ARCMCTargetDesc.cpp
+++ b/lib/Target/ARC/MCTargetDesc/ARCMCTargetDesc.cpp
@@ -1,9 +1,8 @@
//===- ARCMCTargetDesc.cpp - ARC Target Descriptions ------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -12,9 +11,11 @@
//===----------------------------------------------------------------------===//
#include "ARCMCTargetDesc.h"
+#include "ARCInstPrinter.h"
#include "ARCMCAsmInfo.h"
#include "ARCTargetStreamer.h"
-#include "InstPrinter/ARCInstPrinter.h"
+#include "TargetInfo/ARCTargetInfo.h"
+#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
diff --git a/lib/Target/ARC/MCTargetDesc/ARCMCTargetDesc.h b/lib/Target/ARC/MCTargetDesc/ARCMCTargetDesc.h
index dd152a6a34f9..ab06ce46d99f 100644
--- a/lib/Target/ARC/MCTargetDesc/ARCMCTargetDesc.h
+++ b/lib/Target/ARC/MCTargetDesc/ARCMCTargetDesc.h
@@ -1,9 +1,8 @@
//===- ARCMCTargetDesc.h - ARC Target Descriptions --------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -20,8 +19,6 @@ namespace llvm {
class Target;
-Target &getTheARCTarget();
-
} // end namespace llvm
// Defines symbolic names for ARC registers. This defines a mapping from
diff --git a/lib/Target/ARC/TargetInfo/ARCTargetInfo.cpp b/lib/Target/ARC/TargetInfo/ARCTargetInfo.cpp
index 460b0a9f3e9b..59b9f806d590 100644
--- a/lib/Target/ARC/TargetInfo/ARCTargetInfo.cpp
+++ b/lib/Target/ARC/TargetInfo/ARCTargetInfo.cpp
@@ -1,13 +1,12 @@
//===- ARCTargetInfo.cpp - ARC Target Implementation ----------- *- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-#include "ARC.h"
+#include "TargetInfo/ARCTargetInfo.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/lib/Target/ARC/TargetInfo/ARCTargetInfo.h b/lib/Target/ARC/TargetInfo/ARCTargetInfo.h
new file mode 100644
index 000000000000..6a9d2685f422
--- /dev/null
+++ b/lib/Target/ARC/TargetInfo/ARCTargetInfo.h
@@ -0,0 +1,20 @@
+//===- ARCTargetInfo.h - ARC Target Implementation ------------- *- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_ARC_TARGETINFO_ARCTARGETINFO_H
+#define LLVM_LIB_TARGET_ARC_TARGETINFO_ARCTARGETINFO_H
+
+namespace llvm {
+
+class Target;
+
+Target &getTheARCTarget();
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_ARC_TARGETINFO_ARCTARGETINFO_H
diff --git a/lib/Target/ARM/A15SDOptimizer.cpp b/lib/Target/ARM/A15SDOptimizer.cpp
index be88fe4ddb14..fb238bfc9cbc 100644
--- a/lib/Target/ARM/A15SDOptimizer.cpp
+++ b/lib/Target/ARM/A15SDOptimizer.cpp
@@ -1,9 +1,8 @@
//=== A15SDOptimizerPass.cpp - Optimize DPR and SPR register accesses on A15==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
index b5cc45c5cc94..bf8ed6562fe7 100644
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@@ -1,9 +1,8 @@
//===-- ARM.h - Top-level interface for ARM representation ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -36,7 +35,7 @@ class MachineInstr;
class MCInst;
class PassRegistry;
-
+FunctionPass *createARMLowOverheadLoopsPass();
Pass *createARMParallelDSPPass();
FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM,
CodeGenOpt::Level OptLevel);
@@ -47,6 +46,7 @@ FunctionPass *createARMCodeGenPreparePass();
FunctionPass *createARMConstantIslandPass();
FunctionPass *createMLxExpansionPass();
FunctionPass *createThumb2ITBlockPass();
+FunctionPass *createMVEVPTBlockPass();
FunctionPass *createARMOptimizeBarriersPass();
FunctionPass *createThumb2SizeReductionPass(
std::function<bool(const Function &)> Ftor = nullptr);
@@ -57,11 +57,6 @@ createARMInstructionSelector(const ARMBaseTargetMachine &TM, const ARMSubtarget
void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
ARMAsmPrinter &AP);
-void computeBlockSize(MachineFunction *MF, MachineBasicBlock *MBB,
- BasicBlockInfo &BBI);
-std::vector<BasicBlockInfo> computeAllBlockSizes(MachineFunction *MF);
-
-
void initializeARMParallelDSPPass(PassRegistry &);
void initializeARMLoadStoreOptPass(PassRegistry &);
void initializeARMPreAllocLoadStoreOptPass(PassRegistry &);
@@ -69,6 +64,9 @@ void initializeARMCodeGenPreparePass(PassRegistry &);
void initializeARMConstantIslandsPass(PassRegistry &);
void initializeARMExpandPseudoPass(PassRegistry &);
void initializeThumb2SizeReducePass(PassRegistry &);
+void initializeThumb2ITBlockPass(PassRegistry &);
+void initializeMVEVPTBlockPass(PassRegistry &);
+void initializeARMLowOverheadLoopsPass(PassRegistry &);
} // end namespace llvm
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index 3db60f1c16d6..b687db12eaf5 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -1,9 +1,8 @@
//===-- ARM.td - Describe the ARM Target Machine -----------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -33,12 +32,59 @@ def ModeSoftFloat : SubtargetFeature<"soft-float","UseSoftFloat",
//
// Floating Point, HW Division and Neon Support
-def FeatureVFP2 : SubtargetFeature<"vfp2", "HasVFPv2", "true",
- "Enable VFP2 instructions">;
-def FeatureVFP3 : SubtargetFeature<"vfp3", "HasVFPv3", "true",
- "Enable VFP3 instructions",
- [FeatureVFP2]>;
+// FP loads/stores/moves, shared between VFP and MVE (even in the integer-only
+// version).
+def FeatureFPRegs : SubtargetFeature<"fpregs", "HasFPRegs", "true",
+ "Enable FP registers">;
+
+// 16-bit FP loads/stores/moves, shared between VFP (with the v8.2A FP16
+// extension) and MVE (even in the integer-only version).
+def FeatureFPRegs16 : SubtargetFeature<"fpregs16", "HasFPRegs16", "true",
+ "Enable 16-bit FP registers",
+ [FeatureFPRegs]>;
+
+def FeatureFPRegs64 : SubtargetFeature<"fpregs64", "HasFPRegs64", "true",
+ "Enable 64-bit FP registers",
+ [FeatureFPRegs]>;
+
+def FeatureFP64 : SubtargetFeature<"fp64", "HasFP64", "true",
+ "Floating point unit supports "
+ "double precision",
+ [FeatureFPRegs64]>;
+
+def FeatureD32 : SubtargetFeature<"d32", "HasD32", "true",
+ "Extend FP to 32 double registers">;
+
+multiclass VFPver<string name, string query, string description,
+ list<SubtargetFeature> prev = [],
+ list<SubtargetFeature> otherimplies = []> {
+ def _D16_SP: SubtargetFeature<
+ name#"d16sp", query#"D16SP", "true",
+ description#" with only 16 d-registers and no double precision",
+ !foreach(v, prev, !cast<SubtargetFeature>(v # "_D16_SP")) # otherimplies>;
+ def _SP: SubtargetFeature<
+ name#"sp", query#"SP", "true",
+ description#" with no double precision",
+ !foreach(v, prev, !cast<SubtargetFeature>(v # "_SP")) #
+ otherimplies # [FeatureD32, !cast<SubtargetFeature>(NAME # "_D16_SP")]>;
+ def _D16: SubtargetFeature<
+ name#"d16", query#"D16", "true",
+ description#" with only 16 d-registers",
+ !foreach(v, prev, !cast<SubtargetFeature>(v # "_D16")) #
+ otherimplies # [FeatureFP64, !cast<SubtargetFeature>(NAME # "_D16_SP")]>;
+ def "": SubtargetFeature<
+ name, query, "true", description,
+ prev # otherimplies # [
+ !cast<SubtargetFeature>(NAME # "_D16"),
+ !cast<SubtargetFeature>(NAME # "_SP")]>;
+}
+
+defm FeatureVFP2: VFPver<"vfp2", "HasVFPv2", "Enable VFP2 instructions",
+ [], [FeatureFPRegs]>;
+
+defm FeatureVFP3: VFPver<"vfp3", "HasVFPv3", "Enable VFP3 instructions",
+ [FeatureVFP2]>;
def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true",
"Enable NEON instructions",
@@ -48,31 +94,22 @@ def FeatureFP16 : SubtargetFeature<"fp16", "HasFP16", "true",
"Enable half-precision "
"floating point">;
-def FeatureVFP4 : SubtargetFeature<"vfp4", "HasVFPv4", "true",
- "Enable VFP4 instructions",
- [FeatureVFP3, FeatureFP16]>;
+defm FeatureVFP4: VFPver<"vfp4", "HasVFPv4", "Enable VFP4 instructions",
+ [FeatureVFP3], [FeatureFP16]>;
-def FeatureFPARMv8 : SubtargetFeature<"fp-armv8", "HasFPARMv8",
- "true", "Enable ARMv8 FP",
- [FeatureVFP4]>;
+defm FeatureFPARMv8: VFPver<"fp-armv8", "HasFPARMv8", "Enable ARMv8 FP",
+ [FeatureVFP4]>;
def FeatureFullFP16 : SubtargetFeature<"fullfp16", "HasFullFP16", "true",
"Enable full half-precision "
"floating point",
- [FeatureFPARMv8]>;
+ [FeatureFPARMv8_D16_SP, FeatureFPRegs16]>;
def FeatureFP16FML : SubtargetFeature<"fp16fml", "HasFP16FML", "true",
"Enable full half-precision "
"floating point fml instructions",
[FeatureFullFP16]>;
-def FeatureVFPOnlySP : SubtargetFeature<"fp-only-sp", "FPOnlySP", "true",
- "Floating point unit supports "
- "single precision only">;
-
-def FeatureD16 : SubtargetFeature<"d16", "HasD16", "true",
- "Restrict FP to 16 double registers">;
-
def FeatureHWDivThumb : SubtargetFeature<"hwdiv",
"HasHardwareDivideInThumb", "true",
"Enable divide instructions in Thumb">;
@@ -368,6 +405,12 @@ def FeatureUseAA : SubtargetFeature<"use-aa", "UseAA", "true",
def FeatureSB : SubtargetFeature<"sb", "HasSB", "true",
"Enable v8.5a Speculation Barrier" >;
+// Armv8.1-M extensions
+
+def FeatureLOB : SubtargetFeature<"lob", "HasLOB", "true",
+ "Enable Low Overhead Branch "
+ "extensions">;
+
//===----------------------------------------------------------------------===//
// ARM architecture class
//
@@ -461,6 +504,19 @@ def HasV8_5aOps : SubtargetFeature<"v8.5a", "HasV8_5aOps", "true",
"Support ARM v8.5a instructions",
[HasV8_4aOps, FeatureSB]>;
+def HasV8_1MMainlineOps : SubtargetFeature<
+ "v8.1m.main", "HasV8_1MMainlineOps", "true",
+ "Support ARM v8-1M Mainline instructions",
+ [HasV8MMainlineOps]>;
+def HasMVEIntegerOps : SubtargetFeature<
+ "mve", "HasMVEIntegerOps", "true",
+ "Support M-Class Vector Extension with integer ops",
+ [HasV8_1MMainlineOps, FeatureDSP, FeatureFPRegs16, FeatureFPRegs64]>;
+def HasMVEFloatOps : SubtargetFeature<
+ "mve.fp", "HasMVEFloatOps", "true",
+ "Support M-Class Vector Extension with integer and floating ops",
+ [HasMVEIntegerOps, FeatureFPARMv8_D16_SP, FeatureFullFP16]>;
+
//===----------------------------------------------------------------------===//
// ARM Processor subtarget features.
//
@@ -495,6 +551,8 @@ def ProcA73 : SubtargetFeature<"a73", "ARMProcFamily", "CortexA73",
"Cortex-A73 ARM processors", []>;
def ProcA75 : SubtargetFeature<"a75", "ARMProcFamily", "CortexA75",
"Cortex-A75 ARM processors", []>;
+def ProcA76 : SubtargetFeature<"a76", "ARMProcFamily", "CortexA76",
+ "Cortex-A76 ARM processors", []>;
def ProcKrait : SubtargetFeature<"krait", "ARMProcFamily", "Krait",
"Qualcomm Krait processors", []>;
@@ -744,6 +802,18 @@ def ARMv8mMainline : Architecture<"armv8-m.main", "ARMv8mMainline",
FeatureAcquireRelease,
FeatureMClass]>;
+def ARMv81mMainline : Architecture<"armv8.1-m.main", "ARMv81mMainline",
+ [HasV8_1MMainlineOps,
+ FeatureNoARM,
+ ModeThumb,
+ FeatureDB,
+ FeatureHWDivThumb,
+ Feature8MSecExt,
+ FeatureAcquireRelease,
+ FeatureMClass,
+ FeatureRAS,
+ FeatureLOB]>;
+
// Aliases
def IWMMXT : Architecture<"iwmmxt", "ARMv5te", [ARMv5te]>;
def IWMMXT2 : Architecture<"iwmmxt2", "ARMv5te", [ARMv5te]>;
@@ -757,6 +827,7 @@ def ARMv7s : Architecture<"armv7s", "ARMv7a", [ARMv7a]>;
// ARM schedules.
//===----------------------------------------------------------------------===//
//
+include "ARMPredicates.td"
include "ARMSchedule.td"
//===----------------------------------------------------------------------===//
@@ -942,14 +1013,12 @@ def : ProcessorModel<"cortex-r4f", CortexA8Model, [ARMv7r, ProcR4,
FeatureHasRetAddrStack,
FeatureSlowFPBrcc,
FeatureHasSlowFPVMLx,
- FeatureVFP3,
- FeatureD16,
+ FeatureVFP3_D16,
FeatureAvoidPartialCPSR]>;
def : ProcessorModel<"cortex-r5", CortexA8Model, [ARMv7r, ProcR5,
FeatureHasRetAddrStack,
- FeatureVFP3,
- FeatureD16,
+ FeatureVFP3_D16,
FeatureSlowFPBrcc,
FeatureHWDivARM,
FeatureHasSlowFPVMLx,
@@ -957,8 +1026,7 @@ def : ProcessorModel<"cortex-r5", CortexA8Model, [ARMv7r, ProcR5,
def : ProcessorModel<"cortex-r7", CortexA8Model, [ARMv7r, ProcR7,
FeatureHasRetAddrStack,
- FeatureVFP3,
- FeatureD16,
+ FeatureVFP3_D16,
FeatureFP16,
FeatureMP,
FeatureSlowFPBrcc,
@@ -968,8 +1036,7 @@ def : ProcessorModel<"cortex-r7", CortexA8Model, [ARMv7r, ProcR7,
def : ProcessorModel<"cortex-r8", CortexA8Model, [ARMv7r,
FeatureHasRetAddrStack,
- FeatureVFP3,
- FeatureD16,
+ FeatureVFP3_D16,
FeatureFP16,
FeatureMP,
FeatureSlowFPBrcc,
@@ -977,39 +1044,52 @@ def : ProcessorModel<"cortex-r8", CortexA8Model, [ARMv7r,
FeatureHasSlowFPVMLx,
FeatureAvoidPartialCPSR]>;
-def : ProcessorModel<"cortex-m3", CortexM3Model, [ARMv7m,
+def : ProcessorModel<"cortex-m3", CortexM4Model, [ARMv7m,
ProcM3,
FeaturePrefLoopAlign32,
+ FeatureUseMISched,
+ FeatureUseAA,
FeatureHasNoBranchPredictor]>;
-def : ProcessorModel<"sc300", CortexM3Model, [ARMv7m,
+def : ProcessorModel<"sc300", CortexM4Model, [ARMv7m,
ProcM3,
+ FeatureUseMISched,
+ FeatureUseAA,
FeatureHasNoBranchPredictor]>;
-def : ProcessorModel<"cortex-m4", CortexM3Model, [ARMv7em,
- FeatureVFP4,
- FeatureVFPOnlySP,
- FeatureD16,
+def : ProcessorModel<"cortex-m4", CortexM4Model, [ARMv7em,
+ FeatureVFP4_D16_SP,
FeaturePrefLoopAlign32,
FeatureHasSlowFPVMLx,
+ FeatureUseMISched,
+ FeatureUseAA,
FeatureHasNoBranchPredictor]>;
def : ProcNoItin<"cortex-m7", [ARMv7em,
- FeatureFPARMv8,
- FeatureD16]>;
+ FeatureFPARMv8_D16]>;
def : ProcNoItin<"cortex-m23", [ARMv8mBaseline,
FeatureNoMovt]>;
-def : ProcessorModel<"cortex-m33", CortexM3Model, [ARMv8mMainline,
+def : ProcessorModel<"cortex-m33", CortexM4Model, [ARMv8mMainline,
FeatureDSP,
- FeatureFPARMv8,
- FeatureD16,
- FeatureVFPOnlySP,
+ FeatureFPARMv8_D16_SP,
FeaturePrefLoopAlign32,
FeatureHasSlowFPVMLx,
+ FeatureUseMISched,
+ FeatureUseAA,
FeatureHasNoBranchPredictor]>;
+def : ProcessorModel<"cortex-m35p", CortexM4Model, [ARMv8mMainline,
+ FeatureDSP,
+ FeatureFPARMv8_D16_SP,
+ FeaturePrefLoopAlign32,
+ FeatureHasSlowFPVMLx,
+ FeatureUseMISched,
+ FeatureUseAA,
+ FeatureHasNoBranchPredictor]>;
+
+
def : ProcNoItin<"cortex-a32", [ARMv8a,
FeatureHWDivThumb,
FeatureHWDivARM,
@@ -1060,6 +1140,22 @@ def : ProcNoItin<"cortex-a75", [ARMv82a, ProcA75,
FeatureHWDivARM,
FeatureDotProd]>;
+def : ProcNoItin<"cortex-a76", [ARMv82a, ProcA76,
+ FeatureHWDivThumb,
+ FeatureHWDivARM,
+ FeatureCrypto,
+ FeatureCRC,
+ FeatureFullFP16,
+ FeatureDotProd]>;
+
+def : ProcNoItin<"cortex-a76ae", [ARMv82a, ProcA76,
+ FeatureHWDivThumb,
+ FeatureHWDivARM,
+ FeatureCrypto,
+ FeatureCRC,
+ FeatureFullFP16,
+ FeatureDotProd]>;
+
def : ProcessorModel<"cyclone", SwiftModel, [ARMv8a, ProcSwift,
FeatureHasRetAddrStack,
FeatureNEONForFP,
@@ -1081,6 +1177,9 @@ def : ProcNoItin<"exynos-m3", [ARMv8a, ProcExynos]>;
def : ProcNoItin<"exynos-m4", [ARMv82a, ProcExynos,
FeatureFullFP16,
FeatureDotProd]>;
+def : ProcNoItin<"exynos-m5", [ARMv82a, ProcExynos,
+ FeatureFullFP16,
+ FeatureDotProd]>;
def : ProcNoItin<"kryo", [ARMv8a, ProcKryo,
FeatureHWDivThumb,
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index b7cd3a0c2dae..e29077266fcd 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -1,9 +1,8 @@
//===-- ARMAsmPrinter.cpp - Print machine code to an ARM .s file ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -18,9 +17,10 @@
#include "ARMMachineFunctionInfo.h"
#include "ARMTargetMachine.h"
#include "ARMTargetObjectFile.h"
-#include "InstPrinter/ARMInstPrinter.h"
#include "MCTargetDesc/ARMAddressingModes.h"
+#include "MCTargetDesc/ARMInstPrinter.h"
#include "MCTargetDesc/ARMMCExpr.h"
+#include "TargetInfo/ARMTargetInfo.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/BinaryFormat/COFF.h"
@@ -120,13 +120,13 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
// Calculate this function's optimization goal.
unsigned OptimizationGoal;
- if (F.hasFnAttribute(Attribute::OptimizeNone))
+ if (F.hasOptNone())
// For best debugging illusion, speed and small size sacrificed
OptimizationGoal = 6;
- else if (F.optForMinSize())
+ else if (F.hasMinSize())
// Aggressively for small size, speed and debug illusion sacrificed
OptimizationGoal = 4;
- else if (F.optForSize())
+ else if (F.hasOptSize())
// For small size, but speed and debugging illusion preserved
OptimizationGoal = 3;
else if (TM.getOptLevel() == CodeGenOpt::Aggressive)
@@ -184,10 +184,21 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
return false;
}
+void ARMAsmPrinter::PrintSymbolOperand(const MachineOperand &MO,
+ raw_ostream &O) {
+ assert(MO.isGlobal() && "caller should check MO.isGlobal");
+ unsigned TF = MO.getTargetFlags();
+ if (TF & ARMII::MO_LO16)
+ O << ":lower16:";
+ else if (TF & ARMII::MO_HI16)
+ O << ":upper16:";
+ GetARMGVSymbol(MO.getGlobal(), TF)->print(O, MAI);
+ printOffset(MO.getOffset(), O);
+}
+
void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
raw_ostream &O) {
const MachineOperand &MO = MI->getOperand(OpNum);
- unsigned TF = MO.getTargetFlags();
switch (MO.getType()) {
default: llvm_unreachable("<unknown operand type>");
@@ -204,27 +215,20 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
break;
}
case MachineOperand::MO_Immediate: {
- int64_t Imm = MO.getImm();
O << '#';
+ unsigned TF = MO.getTargetFlags();
if (TF == ARMII::MO_LO16)
O << ":lower16:";
else if (TF == ARMII::MO_HI16)
O << ":upper16:";
- O << Imm;
+ O << MO.getImm();
break;
}
case MachineOperand::MO_MachineBasicBlock:
MO.getMBB()->getSymbol()->print(O, MAI);
return;
case MachineOperand::MO_GlobalAddress: {
- const GlobalValue *GV = MO.getGlobal();
- if (TF & ARMII::MO_LO16)
- O << ":lower16:";
- else if (TF & ARMII::MO_HI16)
- O << ":upper16:";
- GetARMGVSymbol(GV, TF)->print(O, MAI);
-
- printOffset(MO.getOffset(), O);
+ PrintSymbolOperand(MO, O);
break;
}
case MachineOperand::MO_ConstantPoolIndex:
@@ -256,8 +260,7 @@ GetARMJTIPICJumpTableLabel(unsigned uid) const {
}
bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
- unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &O) {
+ const char *ExtraCode, raw_ostream &O) {
// Does this asm operand have a single letter operand modifier?
if (ExtraCode && ExtraCode[0]) {
if (ExtraCode[1] != 0) return true; // Unknown modifier.
@@ -265,20 +268,7 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
switch (ExtraCode[0]) {
default:
// See if this is a generic print operand
- return AsmPrinter::PrintAsmOperand(MI, OpNum, AsmVariant, ExtraCode, O);
- case 'a': // Print as a memory address.
- if (MI->getOperand(OpNum).isReg()) {
- O << "["
- << ARMInstPrinter::getRegisterName(MI->getOperand(OpNum).getReg())
- << "]";
- return false;
- }
- LLVM_FALLTHROUGH;
- case 'c': // Don't print "#" before an immediate operand.
- if (!MI->getOperand(OpNum).isImm())
- return true;
- O << MI->getOperand(OpNum).getImm();
- return false;
+ return AsmPrinter::PrintAsmOperand(MI, OpNum, ExtraCode, O);
case 'P': // Print a VFP double precision register.
case 'q': // Print a NEON quad precision register.
printOperand(MI, OpNum, O);
@@ -444,8 +434,7 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
}
bool ARMAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
- unsigned OpNum, unsigned AsmVariant,
- const char *ExtraCode,
+ unsigned OpNum, const char *ExtraCode,
raw_ostream &O) {
// Does this asm operand have a single letter operand modifier?
if (ExtraCode && ExtraCode[0]) {
@@ -668,7 +657,7 @@ void ARMAsmPrinter::emitAttributes() {
ATS.emitAttribute(ARMBuildAttrs::ABI_FP_denormal,
ARMBuildAttrs::IEEEDenormals);
else {
- if (!STI.hasVFP2()) {
+ if (!STI.hasVFP2Base()) {
// When the target doesn't have an FPU (by design or
// intention), the assumptions made on the software support
// mirror that of the equivalent hardware support *if it
@@ -678,7 +667,7 @@ void ARMAsmPrinter::emitAttributes() {
if (STI.hasV7Ops())
ATS.emitAttribute(ARMBuildAttrs::ABI_FP_denormal,
ARMBuildAttrs::PreserveFPSign);
- } else if (STI.hasVFP3()) {
+ } else if (STI.hasVFP3Base()) {
// In VFPv4, VFPv4U, VFPv3, or VFPv3U, it is preserved. That is,
// the sign bit of the zero matches the sign bit of the input or
// result that is being flushed to zero.
@@ -773,6 +762,14 @@ void ARMAsmPrinter::emitAttributes() {
//===----------------------------------------------------------------------===//
+static MCSymbol *getBFLabel(StringRef Prefix, unsigned FunctionNumber,
+ unsigned LabelId, MCContext &Ctx) {
+
+ MCSymbol *Label = Ctx.getOrCreateSymbol(Twine(Prefix)
+ + "BF" + Twine(FunctionNumber) + "_" + Twine(LabelId));
+ return Label;
+}
+
static MCSymbol *getPICLabel(StringRef Prefix, unsigned FunctionNumber,
unsigned LabelId, MCContext &Ctx) {
@@ -1074,7 +1071,6 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
const TargetRegisterInfo *TargetRegInfo =
MF.getSubtarget().getRegisterInfo();
const MachineRegisterInfo &MachineRegInfo = MF.getRegInfo();
- const ARMFunctionInfo &AFI = *MF.getInfo<ARMFunctionInfo>();
unsigned FramePtr = TargetRegInfo->getFrameRegister(MF);
unsigned Opc = MI->getOpcode();
@@ -1138,7 +1134,12 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
Pad += Width;
continue;
}
- RegList.push_back(MO.getReg());
+ // Check for registers that are remapped (for a Thumb1 prologue that
+ // saves high registers).
+ unsigned Reg = MO.getReg();
+ if (unsigned RemappedReg = AFI->EHPrologueRemappedRegs.lookup(Reg))
+ Reg = RemappedReg;
+ RegList.push_back(Reg);
}
break;
case ARM::STR_PRE_IMM:
@@ -1188,7 +1189,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
unsigned CPI = MI->getOperand(1).getIndex();
const MachineConstantPool *MCP = MF.getConstantPool();
if (CPI >= MCP->getConstants().size())
- CPI = AFI.getOriginalCPIdx(CPI);
+ CPI = AFI->getOriginalCPIdx(CPI);
assert(CPI != -1U && "Invalid constpool index");
// Derive the actual offset.
@@ -1218,8 +1219,12 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
} else if (DstReg == ARM::SP) {
MI->print(errs());
llvm_unreachable("Unsupported opcode for unwinding information");
- }
- else {
+ } else if (Opc == ARM::tMOVr) {
+ // If a Thumb1 function spills r8-r11, we copy the values to low
+ // registers before pushing them. Record the copy so we can emit the
+ // correct ".save" later.
+ AFI->EHPrologueRemappedRegs[DstReg] = SrcReg;
+ } else {
MI->print(errs());
llvm_unreachable("Unsupported opcode for unwinding information");
}
@@ -1447,6 +1452,66 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
EmitToStreamer(*OutStreamer, TmpInst);
return;
}
+ case ARM::t2BFi:
+ case ARM::t2BFic:
+ case ARM::t2BFLi:
+ case ARM::t2BFr:
+ case ARM::t2BFLr: {
+ // This is a Branch Future instruction.
+
+ const MCExpr *BranchLabel = MCSymbolRefExpr::create(
+ getBFLabel(DL.getPrivateGlobalPrefix(), getFunctionNumber(),
+ MI->getOperand(0).getIndex(), OutContext),
+ OutContext);
+
+ auto MCInst = MCInstBuilder(Opc).addExpr(BranchLabel);
+ if (MI->getOperand(1).isReg()) {
+ // For BFr/BFLr
+ MCInst.addReg(MI->getOperand(1).getReg());
+ } else {
+ // For BFi/BFLi/BFic
+ const MCExpr *BranchTarget;
+ if (MI->getOperand(1).isMBB())
+ BranchTarget = MCSymbolRefExpr::create(
+ MI->getOperand(1).getMBB()->getSymbol(), OutContext);
+ else if (MI->getOperand(1).isGlobal()) {
+ const GlobalValue *GV = MI->getOperand(1).getGlobal();
+ BranchTarget = MCSymbolRefExpr::create(
+ GetARMGVSymbol(GV, MI->getOperand(1).getTargetFlags()), OutContext);
+ } else if (MI->getOperand(1).isSymbol()) {
+ BranchTarget = MCSymbolRefExpr::create(
+ GetExternalSymbolSymbol(MI->getOperand(1).getSymbolName()),
+ OutContext);
+ } else
+ llvm_unreachable("Unhandled operand kind in Branch Future instruction");
+
+ MCInst.addExpr(BranchTarget);
+ }
+
+ if (Opc == ARM::t2BFic) {
+ const MCExpr *ElseLabel = MCSymbolRefExpr::create(
+ getBFLabel(DL.getPrivateGlobalPrefix(), getFunctionNumber(),
+ MI->getOperand(2).getIndex(), OutContext),
+ OutContext);
+ MCInst.addExpr(ElseLabel);
+ MCInst.addImm(MI->getOperand(3).getImm());
+ } else {
+ MCInst.addImm(MI->getOperand(2).getImm())
+ .addReg(MI->getOperand(3).getReg());
+ }
+
+ EmitToStreamer(*OutStreamer, MCInst);
+ return;
+ }
+ case ARM::t2BF_LabelPseudo: {
+ // This is a pseudo op for a label used by a branch future instruction
+
+ // Emit the label.
+ OutStreamer->EmitLabel(getBFLabel(DL.getPrivateGlobalPrefix(),
+ getFunctionNumber(),
+ MI->getOperand(0).getIndex(), OutContext));
+ return;
+ }
case ARM::tPICADD: {
// This is a pseudo op for a label + instruction sequence, which looks like:
// LPC0:
diff --git a/lib/Target/ARM/ARMAsmPrinter.h b/lib/Target/ARM/ARMAsmPrinter.h
index 0ba4bc05d6f7..a4b37fa2331f 100644
--- a/lib/Target/ARM/ARMAsmPrinter.h
+++ b/lib/Target/ARM/ARMAsmPrinter.h
@@ -1,9 +1,8 @@
//===-- ARMAsmPrinter.h - ARM implementation of AsmPrinter ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -76,12 +75,11 @@ public:
void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &O);
+ void PrintSymbolOperand(const MachineOperand &MO, raw_ostream &O) override;
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
- unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &O) override;
+ const char *ExtraCode, raw_ostream &O) override;
bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
- unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &O) override;
+ const char *ExtraCode, raw_ostream &O) override;
void emitInlineAsmEnd(const MCSubtargetInfo &StartInfo,
const MCSubtargetInfo *EndInfo) const override;
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index bbebed59c851..222aa85856a2 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -1,9 +1,8 @@
//===-- ARMBaseInstrInfo.cpp - ARM Instruction Information ----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -134,7 +133,7 @@ ARMBaseInstrInfo::CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
ScheduleHazardRecognizer *ARMBaseInstrInfo::
CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
const ScheduleDAG *DAG) const {
- if (Subtarget.isThumb2() || Subtarget.hasVFP2())
+ if (Subtarget.isThumb2() || Subtarget.hasVFP2Base())
return (ScheduleHazardRecognizer *)new ARMHazardRecognizer(II, DAG);
return TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG);
}
@@ -707,15 +706,7 @@ unsigned ARMBaseInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
if (MCID.getSize())
return MCID.getSize();
- // If this machine instr is an inline asm, measure it.
- if (MI.getOpcode() == ARM::INLINEASM) {
- unsigned Size = getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
- if (!MF->getInfo<ARMFunctionInfo>()->isThumbFunction())
- Size = alignTo(Size, 4);
- return Size;
- }
- unsigned Opc = MI.getOpcode();
- switch (Opc) {
+ switch (MI.getOpcode()) {
default:
// pseudo-instruction sizes are zero.
return 0;
@@ -752,6 +743,14 @@ unsigned ARMBaseInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
return 12;
case ARM::SPACE:
return MI.getOperand(1).getImm();
+ case ARM::INLINEASM:
+ case ARM::INLINEASM_BR: {
+ // If this machine instr is an inline asm, measure it.
+ unsigned Size = getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
+ if (!MF->getInfo<ARMFunctionInfo>()->isThumbFunction())
+ Size = alignTo(Size, 4);
+ return Size;
+ }
}
}
@@ -806,6 +805,28 @@ void ARMBaseInstrInfo::copyToCPSR(MachineBasicBlock &MBB,
.addReg(ARM::CPSR, RegState::Implicit | RegState::Define);
}
+void llvm::addUnpredicatedMveVpredNOp(MachineInstrBuilder &MIB) {
+ MIB.addImm(ARMVCC::None);
+ MIB.addReg(0);
+}
+
+void llvm::addUnpredicatedMveVpredROp(MachineInstrBuilder &MIB,
+ unsigned DestReg) {
+ addUnpredicatedMveVpredNOp(MIB);
+ MIB.addReg(DestReg, RegState::Undef);
+}
+
+void llvm::addPredicatedMveVpredNOp(MachineInstrBuilder &MIB, unsigned Cond) {
+ MIB.addImm(Cond);
+ MIB.addReg(ARM::VPR, RegState::Implicit);
+}
+
+void llvm::addPredicatedMveVpredROp(MachineInstrBuilder &MIB,
+ unsigned Cond, unsigned Inactive) {
+ addPredicatedMveVpredNOp(MIB, Cond);
+ MIB.addReg(Inactive);
+}
+
void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
const DebugLoc &DL, unsigned DestReg,
@@ -831,17 +852,20 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Opc = ARM::VMOVRS;
else if (SPRDest && GPRSrc)
Opc = ARM::VMOVSR;
- else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && !Subtarget.isFPOnlySP())
+ else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && Subtarget.hasFP64())
Opc = ARM::VMOVD;
else if (ARM::QPRRegClass.contains(DestReg, SrcReg))
- Opc = ARM::VORRq;
+ Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR;
if (Opc) {
MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg);
MIB.addReg(SrcReg, getKillRegState(KillSrc));
- if (Opc == ARM::VORRq)
+ if (Opc == ARM::VORRq || Opc == ARM::MVE_VORR)
MIB.addReg(SrcReg, getKillRegState(KillSrc));
- MIB.add(predOps(ARMCC::AL));
+ if (Opc == ARM::MVE_VORR)
+ addUnpredicatedMveVpredROp(MIB, DestReg);
+ else
+ MIB.add(predOps(ARMCC::AL));
return;
}
@@ -852,11 +876,11 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// Use VORRq when possible.
if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) {
- Opc = ARM::VORRq;
+ Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR;
BeginIdx = ARM::qsub_0;
SubRegs = 2;
} else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) {
- Opc = ARM::VORRq;
+ Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR;
BeginIdx = ARM::qsub_0;
SubRegs = 4;
// Fall back to VMOVD.
@@ -891,7 +915,8 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
BeginIdx = ARM::dsub_0;
SubRegs = 4;
Spacing = 2;
- } else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && Subtarget.isFPOnlySP()) {
+ } else if (ARM::DPRRegClass.contains(DestReg, SrcReg) &&
+ !Subtarget.hasFP64()) {
Opc = ARM::VMOVS;
BeginIdx = ARM::ssub_0;
SubRegs = 2;
@@ -901,6 +926,30 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
} else if (DestReg == ARM::CPSR) {
copyToCPSR(MBB, I, SrcReg, KillSrc, Subtarget);
return;
+ } else if (DestReg == ARM::VPR) {
+ assert(ARM::GPRRegClass.contains(SrcReg));
+ BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMSR_P0), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc))
+ .add(predOps(ARMCC::AL));
+ return;
+ } else if (SrcReg == ARM::VPR) {
+ assert(ARM::GPRRegClass.contains(DestReg));
+ BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMRS_P0), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc))
+ .add(predOps(ARMCC::AL));
+ return;
+ } else if (DestReg == ARM::FPSCR_NZCV) {
+ assert(ARM::GPRRegClass.contains(SrcReg));
+ BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMSR_FPSCR_NZCVQC), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc))
+ .add(predOps(ARMCC::AL));
+ return;
+ } else if (SrcReg == ARM::FPSCR_NZCV) {
+ assert(ARM::GPRRegClass.contains(DestReg));
+ BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMRS_FPSCR_NZCVQC), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc))
+ .add(predOps(ARMCC::AL));
+ return;
}
assert(Opc && "Impossible reg-to-reg copy");
@@ -925,10 +974,15 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
DstRegs.insert(Dst);
#endif
Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst).addReg(Src);
- // VORR takes two source operands.
- if (Opc == ARM::VORRq)
+ // VORR (NEON or MVE) takes two source operands.
+ if (Opc == ARM::VORRq || Opc == ARM::MVE_VORR) {
Mov.addReg(Src);
- Mov = Mov.add(predOps(ARMCC::AL));
+ }
+ // MVE VORR takes predicate operands in place of an ordinary condition.
+ if (Opc == ARM::MVE_VORR)
+ addUnpredicatedMveVpredROp(Mov, Dst);
+ else
+ Mov = Mov.add(predOps(ARMCC::AL));
// MOVr can set CC.
if (Opc == ARM::MOVr)
Mov = Mov.add(condCodeOp());
@@ -1010,6 +1064,13 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
.addImm(0)
.addMemOperand(MMO)
.add(predOps(ARMCC::AL));
+ } else if (ARM::VCCRRegClass.hasSubClassEq(RC)) {
+ BuildMI(MBB, I, DebugLoc(), get(ARM::VSTR_P0_off))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
} else
llvm_unreachable("Unknown reg class!");
break;
@@ -1042,7 +1103,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
llvm_unreachable("Unknown reg class!");
break;
case 16:
- if (ARM::DPairRegClass.hasSubClassEq(RC)) {
+ if (ARM::DPairRegClass.hasSubClassEq(RC) && Subtarget.hasNEON()) {
// Use aligned spills if the stack can be realigned.
if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
BuildMI(MBB, I, DebugLoc(), get(ARM::VST1q64))
@@ -1058,6 +1119,14 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
.addMemOperand(MMO)
.add(predOps(ARMCC::AL));
}
+ } else if (ARM::QPRRegClass.hasSubClassEq(RC) &&
+ Subtarget.hasMVEIntegerOps()) {
+ auto MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::MVE_VSTRWU32));
+ MIB.addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addMemOperand(MMO);
+ addUnpredicatedMveVpredNOp(MIB);
} else
llvm_unreachable("Unknown reg class!");
break;
@@ -1155,6 +1224,13 @@ unsigned ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
return MI.getOperand(0).getReg();
}
break;
+ case ARM::VSTR_P0_off:
+ if (MI.getOperand(0).isFI() && MI.getOperand(1).isImm() &&
+ MI.getOperand(1).getImm() == 0) {
+ FrameIndex = MI.getOperand(0).getIndex();
+ return ARM::P0;
+ }
+ break;
case ARM::VST1q64:
case ARM::VST1d64TPseudo:
case ARM::VST1d64QPseudo:
@@ -1177,7 +1253,8 @@ unsigned ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
unsigned ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr &MI,
int &FrameIndex) const {
SmallVector<const MachineMemOperand *, 1> Accesses;
- if (MI.mayStore() && hasStoreToStackSlot(MI, Accesses)) {
+ if (MI.mayStore() && hasStoreToStackSlot(MI, Accesses) &&
+ Accesses.size() == 1) {
FrameIndex =
cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
->getFrameIndex();
@@ -1224,6 +1301,12 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
.addImm(0)
.addMemOperand(MMO)
.add(predOps(ARMCC::AL));
+ } else if (ARM::VCCRRegClass.hasSubClassEq(RC)) {
+ BuildMI(MBB, I, DL, get(ARM::VLDR_P0_off), DestReg)
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
} else
llvm_unreachable("Unknown reg class!");
break;
@@ -1260,7 +1343,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
llvm_unreachable("Unknown reg class!");
break;
case 16:
- if (ARM::DPairRegClass.hasSubClassEq(RC)) {
+ if (ARM::DPairRegClass.hasSubClassEq(RC) && Subtarget.hasNEON()) {
if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg)
.addFrameIndex(FI)
@@ -1273,6 +1356,13 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
.addMemOperand(MMO)
.add(predOps(ARMCC::AL));
}
+ } else if (ARM::QPRRegClass.hasSubClassEq(RC) &&
+ Subtarget.hasMVEIntegerOps()) {
+ auto MIB = BuildMI(MBB, I, DL, get(ARM::MVE_VLDRWU32), DestReg);
+ MIB.addFrameIndex(FI)
+ .addImm(0)
+ .addMemOperand(MMO);
+ addUnpredicatedMveVpredNOp(MIB);
} else
llvm_unreachable("Unknown reg class!");
break;
@@ -1369,6 +1459,13 @@ unsigned ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
return MI.getOperand(0).getReg();
}
break;
+ case ARM::VLDR_P0_off:
+ if (MI.getOperand(0).isFI() && MI.getOperand(1).isImm() &&
+ MI.getOperand(1).getImm() == 0) {
+ FrameIndex = MI.getOperand(0).getIndex();
+ return ARM::P0;
+ }
+ break;
case ARM::VLD1q64:
case ARM::VLD1d8TPseudo:
case ARM::VLD1d16TPseudo:
@@ -1397,7 +1494,8 @@ unsigned ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
unsigned ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr &MI,
int &FrameIndex) const {
SmallVector<const MachineMemOperand *, 1> Accesses;
- if (MI.mayLoad() && hasLoadFromStackSlot(MI, Accesses)) {
+ if (MI.mayLoad() && hasLoadFromStackSlot(MI, Accesses) &&
+ Accesses.size() == 1) {
FrameIndex =
cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
->getFrameIndex();
@@ -1480,7 +1578,7 @@ bool ARMBaseInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
// copyPhysReg() calls. Look for VMOVS instructions that can legally be
// widened to VMOVD. We prefer the VMOVD when possible because it may be
// changed into a VORR that can go down the NEON pipeline.
- if (!MI.isCopy() || Subtarget.dontWidenVMOVS() || Subtarget.isFPOnlySP())
+ if (!MI.isCopy() || Subtarget.dontWidenVMOVS() || !Subtarget.hasFP64())
return false;
// Look for a copy between even S-registers. That is where we keep floats
@@ -1898,24 +1996,15 @@ isProfitableToIfCvt(MachineBasicBlock &MBB,
// If we are optimizing for size, see if the branch in the predecessor can be
// lowered to cbn?z by the constant island lowering pass, and return false if
// so. This results in a shorter instruction sequence.
- if (MBB.getParent()->getFunction().optForSize()) {
+ if (MBB.getParent()->getFunction().hasOptSize()) {
MachineBasicBlock *Pred = *MBB.pred_begin();
if (!Pred->empty()) {
MachineInstr *LastMI = &*Pred->rbegin();
if (LastMI->getOpcode() == ARM::t2Bcc) {
- MachineBasicBlock::iterator CmpMI = LastMI;
- if (CmpMI != Pred->begin()) {
- --CmpMI;
- if (CmpMI->getOpcode() == ARM::tCMPi8 ||
- CmpMI->getOpcode() == ARM::t2CMPri) {
- unsigned Reg = CmpMI->getOperand(0).getReg();
- unsigned PredReg = 0;
- ARMCC::CondCodes P = getInstrPredicate(*CmpMI, PredReg);
- if (P == ARMCC::AL && CmpMI->getOperand(1).getImm() == 0 &&
- isARMLowRegister(Reg))
- return false;
- }
- }
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ MachineInstr *CmpMI = findCMPToFoldIntoCBZ(LastMI, TRI);
+ if (CmpMI)
+ return false;
}
}
}
@@ -1932,6 +2021,15 @@ isProfitableToIfCvt(MachineBasicBlock &TBB,
if (!TCycles)
return false;
+ // In thumb code we often end up trading one branch for a IT block, and
+ // if we are cloning the instruction can increase code size. Prevent
+ // blocks with multiple predecesors from being ifcvted to prevent this
+ // cloning.
+ if (Subtarget.isThumb2() && TBB.getParent()->getFunction().hasMinSize()) {
+ if (TBB.pred_size() != 1 || FBB.pred_size() != 1)
+ return false;
+ }
+
// Attempt to estimate the relative costs of predication versus branching.
// Here we scale up each component of UnpredCost to avoid precision issue when
// scaling TCycles/FCycles by Probability.
@@ -2040,9 +2138,9 @@ MachineInstr *ARMBaseInstrInfo::commuteInstructionImpl(MachineInstr &MI,
/// Identify instructions that can be folded into a MOVCC instruction, and
/// return the defining instruction.
-static MachineInstr *canFoldIntoMOVCC(unsigned Reg,
- const MachineRegisterInfo &MRI,
- const TargetInstrInfo *TII) {
+MachineInstr *
+ARMBaseInstrInfo::canFoldIntoMOVCC(unsigned Reg, const MachineRegisterInfo &MRI,
+ const TargetInstrInfo *TII) const {
if (!TargetRegisterInfo::isVirtualRegister(Reg))
return nullptr;
if (!MRI.hasOneNonDBGUse(Reg))
@@ -2050,8 +2148,8 @@ static MachineInstr *canFoldIntoMOVCC(unsigned Reg,
MachineInstr *MI = MRI.getVRegDef(Reg);
if (!MI)
return nullptr;
- // MI is folded into the MOVCC by predicating it.
- if (!MI->isPredicable())
+ // Check if MI can be predicated and folded into the MOVCC.
+ if (!isPredicable(*MI))
return nullptr;
// Check if MI has any non-dead defs or physreg uses. This also detects
// predicated instructions which will be reading CPSR.
@@ -2266,7 +2364,7 @@ bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget,
unsigned NumBytes) {
// This optimisation potentially adds lots of load and store
// micro-operations, it's only really a great benefit to code-size.
- if (!MF.getFunction().optForMinSize())
+ if (!Subtarget.hasMinSize())
return false;
// If only one register is pushed/popped, LLVM can use an LDR/STR
@@ -2332,6 +2430,8 @@ bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget,
for (int CurRegEnc = FirstRegEnc - 1; CurRegEnc >= 0 && RegsNeeded;
--CurRegEnc) {
unsigned CurReg = RegClass->getRegister(CurRegEnc);
+ if (IsT1PushPop && CurReg > ARM::R7)
+ continue;
if (!IsPop) {
// Pushing any register is completely harmless, mark the register involved
// as undef since we don't care about its value and must not restore it
@@ -2389,7 +2489,7 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
bool isSub = false;
// Memory operands in inline assembly always use AddrMode2.
- if (Opcode == ARM::INLINEASM)
+ if (Opcode == ARM::INLINEASM || Opcode == ARM::INLINEASM_BR)
AddrMode = ARMII::AddrMode2;
if (Opcode == ARM::ADDri) {
@@ -2473,6 +2573,15 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
NumBits = 8;
Scale = 2;
break;
+ case ARMII::AddrModeT2_i7:
+ case ARMII::AddrModeT2_i7s2:
+ case ARMII::AddrModeT2_i7s4:
+ ImmIdx = FrameRegIdx+1;
+ InstrOffs = MI.getOperand(ImmIdx).getImm();
+ NumBits = 7;
+ Scale = (AddrMode == ARMII::AddrModeT2_i7s2 ? 2 :
+ AddrMode == ARMII::AddrModeT2_i7s4 ? 4 : 1);
+ break;
default:
llvm_unreachable("Unsupported addressing mode!");
}
@@ -2543,6 +2652,7 @@ bool ARMBaseInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
return true;
case ARM::CMPrr:
case ARM::t2CMPrr:
+ case ARM::tCMPr:
SrcReg = MI.getOperand(0).getReg();
SrcReg2 = MI.getOperand(1).getReg();
CmpMask = ~0;
@@ -2619,32 +2729,62 @@ inline static ARMCC::CondCodes getCmpToAddCondition(ARMCC::CondCodes CC) {
/// This function can be extended later on.
inline static bool isRedundantFlagInstr(const MachineInstr *CmpI,
unsigned SrcReg, unsigned SrcReg2,
- int ImmValue, const MachineInstr *OI) {
- if ((CmpI->getOpcode() == ARM::CMPrr ||
- CmpI->getOpcode() == ARM::t2CMPrr) &&
- (OI->getOpcode() == ARM::SUBrr ||
- OI->getOpcode() == ARM::t2SUBrr) &&
+ int ImmValue, const MachineInstr *OI,
+ bool &IsThumb1) {
+ if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) &&
+ (OI->getOpcode() == ARM::SUBrr || OI->getOpcode() == ARM::t2SUBrr) &&
((OI->getOperand(1).getReg() == SrcReg &&
OI->getOperand(2).getReg() == SrcReg2) ||
(OI->getOperand(1).getReg() == SrcReg2 &&
- OI->getOperand(2).getReg() == SrcReg)))
+ OI->getOperand(2).getReg() == SrcReg))) {
+ IsThumb1 = false;
return true;
+ }
- if ((CmpI->getOpcode() == ARM::CMPri ||
- CmpI->getOpcode() == ARM::t2CMPri) &&
- (OI->getOpcode() == ARM::SUBri ||
- OI->getOpcode() == ARM::t2SUBri) &&
+ if (CmpI->getOpcode() == ARM::tCMPr && OI->getOpcode() == ARM::tSUBrr &&
+ ((OI->getOperand(2).getReg() == SrcReg &&
+ OI->getOperand(3).getReg() == SrcReg2) ||
+ (OI->getOperand(2).getReg() == SrcReg2 &&
+ OI->getOperand(3).getReg() == SrcReg))) {
+ IsThumb1 = true;
+ return true;
+ }
+
+ if ((CmpI->getOpcode() == ARM::CMPri || CmpI->getOpcode() == ARM::t2CMPri) &&
+ (OI->getOpcode() == ARM::SUBri || OI->getOpcode() == ARM::t2SUBri) &&
OI->getOperand(1).getReg() == SrcReg &&
- OI->getOperand(2).getImm() == ImmValue)
+ OI->getOperand(2).getImm() == ImmValue) {
+ IsThumb1 = false;
+ return true;
+ }
+
+ if (CmpI->getOpcode() == ARM::tCMPi8 &&
+ (OI->getOpcode() == ARM::tSUBi8 || OI->getOpcode() == ARM::tSUBi3) &&
+ OI->getOperand(2).getReg() == SrcReg &&
+ OI->getOperand(3).getImm() == ImmValue) {
+ IsThumb1 = true;
return true;
+ }
if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) &&
(OI->getOpcode() == ARM::ADDrr || OI->getOpcode() == ARM::t2ADDrr ||
OI->getOpcode() == ARM::ADDri || OI->getOpcode() == ARM::t2ADDri) &&
OI->getOperand(0).isReg() && OI->getOperand(1).isReg() &&
OI->getOperand(0).getReg() == SrcReg &&
- OI->getOperand(1).getReg() == SrcReg2)
+ OI->getOperand(1).getReg() == SrcReg2) {
+ IsThumb1 = false;
+ return true;
+ }
+
+ if (CmpI->getOpcode() == ARM::tCMPr &&
+ (OI->getOpcode() == ARM::tADDi3 || OI->getOpcode() == ARM::tADDi8 ||
+ OI->getOpcode() == ARM::tADDrr) &&
+ OI->getOperand(0).getReg() == SrcReg &&
+ OI->getOperand(2).getReg() == SrcReg2) {
+ IsThumb1 = true;
return true;
+ }
+
return false;
}
@@ -2662,6 +2802,17 @@ static bool isOptimizeCompareCandidate(MachineInstr *MI, bool &IsThumb1) {
case ARM::tSUBi3:
case ARM::tSUBi8:
case ARM::tMUL:
+ case ARM::tADC:
+ case ARM::tSBC:
+ case ARM::tRSB:
+ case ARM::tAND:
+ case ARM::tORR:
+ case ARM::tEOR:
+ case ARM::tBIC:
+ case ARM::tMVN:
+ case ARM::tASRri:
+ case ARM::tASRrr:
+ case ARM::tROR:
IsThumb1 = true;
LLVM_FALLTHROUGH;
case ARM::RSBrr:
@@ -2761,7 +2912,8 @@ bool ARMBaseInstrInfo::optimizeCompareInstr(
// For CMPri w/ CmpValue != 0, a SubAdd may still be a candidate.
// Thus we cannot return here.
if (CmpInstr.getOpcode() == ARM::CMPri ||
- CmpInstr.getOpcode() == ARM::t2CMPri)
+ CmpInstr.getOpcode() == ARM::t2CMPri ||
+ CmpInstr.getOpcode() == ARM::tCMPi8)
MI = nullptr;
else
return false;
@@ -2783,20 +2935,22 @@ bool ARMBaseInstrInfo::optimizeCompareInstr(
// CMP. This peephole works on the vregs, so is still in SSA form. As a
// consequence, the movs won't redefine/kill the MUL operands which would
// make this reordering illegal.
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
if (MI && IsThumb1) {
--I;
- bool CanReorder = true;
- const bool HasStmts = I != E;
- for (; I != E; --I) {
- if (I->getOpcode() != ARM::tMOVi8) {
- CanReorder = false;
- break;
+ if (I != E && !MI->readsRegister(ARM::CPSR, TRI)) {
+ bool CanReorder = true;
+ for (; I != E; --I) {
+ if (I->getOpcode() != ARM::tMOVi8) {
+ CanReorder = false;
+ break;
+ }
+ }
+ if (CanReorder) {
+ MI = MI->removeFromParent();
+ E = CmpInstr;
+ CmpInstr.getParent()->insert(E, MI);
}
- }
- if (HasStmts && CanReorder) {
- MI = MI->removeFromParent();
- E = CmpInstr;
- CmpInstr.getParent()->insert(E, MI);
}
I = CmpInstr;
E = MI;
@@ -2804,12 +2958,13 @@ bool ARMBaseInstrInfo::optimizeCompareInstr(
// Check that CPSR isn't set between the comparison instruction and the one we
// want to change. At the same time, search for SubAdd.
- const TargetRegisterInfo *TRI = &getRegisterInfo();
+ bool SubAddIsThumb1 = false;
do {
const MachineInstr &Instr = *--I;
// Check whether CmpInstr can be made redundant by the current instruction.
- if (isRedundantFlagInstr(&CmpInstr, SrcReg, SrcReg2, CmpValue, &Instr)) {
+ if (isRedundantFlagInstr(&CmpInstr, SrcReg, SrcReg2, CmpValue, &Instr,
+ SubAddIsThumb1)) {
SubAdd = &*I;
break;
}
@@ -2824,14 +2979,25 @@ bool ARMBaseInstrInfo::optimizeCompareInstr(
// change. We can't do this transformation.
return false;
- } while (I != B);
+ if (I == B) {
+ // In some cases, we scan the use-list of an instruction for an AND;
+ // that AND is in the same BB, but may not be scheduled before the
+ // corresponding TST. In that case, bail out.
+ //
+ // FIXME: We could try to reschedule the AND.
+ return false;
+ }
+ } while (true);
// Return false if no candidates exist.
if (!MI && !SubAdd)
return false;
- // The single candidate is called MI.
- if (!MI) MI = SubAdd;
+ // If we found a SubAdd, use it as it will be closer to the CMP
+ if (SubAdd) {
+ MI = SubAdd;
+ IsThumb1 = SubAddIsThumb1;
+ }
// We can't use a predicated instruction - it doesn't always write the flags.
if (isPredicated(*MI))
@@ -2899,9 +3065,13 @@ bool ARMBaseInstrInfo::optimizeCompareInstr(
// operands will be modified.
unsigned Opc = SubAdd->getOpcode();
bool IsSub = Opc == ARM::SUBrr || Opc == ARM::t2SUBrr ||
- Opc == ARM::SUBri || Opc == ARM::t2SUBri;
- if (!IsSub || (SrcReg2 != 0 && SubAdd->getOperand(1).getReg() == SrcReg2 &&
- SubAdd->getOperand(2).getReg() == SrcReg)) {
+ Opc == ARM::SUBri || Opc == ARM::t2SUBri ||
+ Opc == ARM::tSUBrr || Opc == ARM::tSUBi3 ||
+ Opc == ARM::tSUBi8;
+ unsigned OpI = Opc != ARM::tSUBrr ? 1 : 2;
+ if (!IsSub ||
+ (SrcReg2 != 0 && SubAdd->getOperand(OpI).getReg() == SrcReg2 &&
+ SubAdd->getOperand(OpI + 1).getReg() == SrcReg)) {
// VSel doesn't support condition code update.
if (IsInstrVSel)
return false;
@@ -2979,9 +3149,10 @@ bool ARMBaseInstrInfo::shouldSink(const MachineInstr &MI) const {
++Next;
unsigned SrcReg, SrcReg2;
int CmpMask, CmpValue;
+ bool IsThumb1;
if (Next != MI.getParent()->end() &&
analyzeCompare(*Next, SrcReg, SrcReg2, CmpMask, CmpValue) &&
- isRedundantFlagInstr(&*Next, SrcReg, SrcReg2, CmpValue, &MI))
+ isRedundantFlagInstr(&*Next, SrcReg, SrcReg2, CmpValue, &MI, IsThumb1))
return false;
return true;
}
@@ -3372,7 +3543,12 @@ unsigned ARMBaseInstrInfo::getNumLDMAddresses(const MachineInstr &MI) const {
I != E; ++I) {
Size += (*I)->getSize();
}
- return Size / 4;
+ // FIXME: The scheduler currently can't handle values larger than 16. But
+ // the values can actually go up to 32 for floating-point load/store
+ // multiple (VLDMIA etc.). Also, the way this code is reasoning about memory
+ // operations isn't right; we could end up with "extra" memory operands for
+ // various reasons, like tail merge merging two memory operations.
+ return std::min(Size / 4, 16U);
}
static unsigned getNumMicroOpsSingleIssuePlusExtras(unsigned Opc,
@@ -4093,7 +4269,7 @@ int ARMBaseInstrInfo::getOperandLatencyImpl(
// instructions).
if (Latency > 0 && Subtarget.isThumb2()) {
const MachineFunction *MF = DefMI.getParent()->getParent();
- // FIXME: Use Function::optForSize().
+ // FIXME: Use Function::hasOptSize().
if (MF->getFunction().hasFnAttribute(Attribute::OptimizeForSize))
--Latency;
}
@@ -4517,6 +4693,31 @@ bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr &MI,
ErrInfo = "Pseudo flag setting opcodes only exist in Selection DAG";
return false;
}
+ if (MI.getOpcode() == ARM::tMOVr && !Subtarget.hasV6Ops()) {
+ // Make sure we don't generate a lo-lo mov that isn't supported.
+ if (!ARM::hGPRRegClass.contains(MI.getOperand(0).getReg()) &&
+ !ARM::hGPRRegClass.contains(MI.getOperand(1).getReg())) {
+ ErrInfo = "Non-flag-setting Thumb1 mov is v6-only";
+ return false;
+ }
+ }
+ if (MI.getOpcode() == ARM::tPUSH ||
+ MI.getOpcode() == ARM::tPOP ||
+ MI.getOpcode() == ARM::tPOP_RET) {
+ for (int i = 2, e = MI.getNumOperands(); i < e; ++i) {
+ if (MI.getOperand(i).isImplicit() ||
+ !MI.getOperand(i).isReg())
+ continue;
+ unsigned Reg = MI.getOperand(i).getReg();
+ if (Reg < ARM::R0 || Reg > ARM::R7) {
+ if (!(MI.getOpcode() == ARM::tPUSH && Reg == ARM::LR) &&
+ !(MI.getOpcode() == ARM::tPOP_RET && Reg == ARM::PC)) {
+ ErrInfo = "Unsupported register in Thumb1 push/pop";
+ return false;
+ }
+ }
+ }
+ }
return true;
}
@@ -5107,3 +5308,44 @@ ARMBaseInstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
{MO_NONLAZY, "arm-nonlazy"}};
return makeArrayRef(TargetFlags);
}
+
+bool llvm::registerDefinedBetween(unsigned Reg,
+ MachineBasicBlock::iterator From,
+ MachineBasicBlock::iterator To,
+ const TargetRegisterInfo *TRI) {
+ for (auto I = From; I != To; ++I)
+ if (I->modifiesRegister(Reg, TRI))
+ return true;
+ return false;
+}
+
+MachineInstr *llvm::findCMPToFoldIntoCBZ(MachineInstr *Br,
+ const TargetRegisterInfo *TRI) {
+ // Search backwards to the instruction that defines CSPR. This may or not
+ // be a CMP, we check that after this loop. If we find another instruction
+ // that reads cpsr, we return nullptr.
+ MachineBasicBlock::iterator CmpMI = Br;
+ while (CmpMI != Br->getParent()->begin()) {
+ --CmpMI;
+ if (CmpMI->modifiesRegister(ARM::CPSR, TRI))
+ break;
+ if (CmpMI->readsRegister(ARM::CPSR, TRI))
+ break;
+ }
+
+ // Check that this inst is a CMP r[0-7], #0 and that the register
+ // is not redefined between the cmp and the br.
+ if (CmpMI->getOpcode() != ARM::tCMPi8 && CmpMI->getOpcode() != ARM::t2CMPri)
+ return nullptr;
+ unsigned Reg = CmpMI->getOperand(0).getReg();
+ unsigned PredReg = 0;
+ ARMCC::CondCodes Pred = getInstrPredicate(*CmpMI, PredReg);
+ if (Pred != ARMCC::AL || CmpMI->getOperand(1).getImm() != 0)
+ return nullptr;
+ if (!isARMLowRegister(Reg))
+ return nullptr;
+ if (registerDefinedBetween(Reg, CmpMI->getNextNode(), Br, TRI))
+ return nullptr;
+
+ return &*CmpMI;
+}
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index de1f307083ba..c28983fcc15c 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -1,9 +1,8 @@
//===-- ARMBaseInstrInfo.h - ARM Base Instruction Information ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -399,6 +398,11 @@ private:
void expandMEMCPY(MachineBasicBlock::iterator) const;
+ /// Identify instructions that can be folded into a MOVCC instruction, and
+ /// return the defining instruction.
+ MachineInstr *canFoldIntoMOVCC(unsigned Reg, const MachineRegisterInfo &MRI,
+ const TargetInstrInfo *TII) const;
+
private:
/// Modeling special VFP / NEON fp MLA / MLS hazards.
@@ -478,6 +482,21 @@ bool isUncondBranchOpcode(int Opc) {
return Opc == ARM::B || Opc == ARM::tB || Opc == ARM::t2B;
}
+static inline bool isVPTOpcode(int Opc) {
+ return Opc == ARM::MVE_VPTv16i8 || Opc == ARM::MVE_VPTv16u8 ||
+ Opc == ARM::MVE_VPTv16s8 || Opc == ARM::MVE_VPTv8i16 ||
+ Opc == ARM::MVE_VPTv8u16 || Opc == ARM::MVE_VPTv8s16 ||
+ Opc == ARM::MVE_VPTv4i32 || Opc == ARM::MVE_VPTv4u32 ||
+ Opc == ARM::MVE_VPTv4s32 || Opc == ARM::MVE_VPTv4f32 ||
+ Opc == ARM::MVE_VPTv8f16 || Opc == ARM::MVE_VPTv16i8r ||
+ Opc == ARM::MVE_VPTv16u8r || Opc == ARM::MVE_VPTv16s8r ||
+ Opc == ARM::MVE_VPTv8i16r || Opc == ARM::MVE_VPTv8u16r ||
+ Opc == ARM::MVE_VPTv8s16r || Opc == ARM::MVE_VPTv4i32r ||
+ Opc == ARM::MVE_VPTv4u32r || Opc == ARM::MVE_VPTv4s32r ||
+ Opc == ARM::MVE_VPTv4f32r || Opc == ARM::MVE_VPTv8f16r ||
+ Opc == ARM::MVE_VPST;
+}
+
static inline
bool isCondBranchOpcode(int Opc) {
return Opc == ARM::Bcc || Opc == ARM::tBcc || Opc == ARM::t2Bcc;
@@ -505,6 +524,28 @@ static inline bool isPushOpcode(int Opc) {
Opc == ARM::STMDB_UPD || Opc == ARM::VSTMDDB_UPD;
}
+/// isValidCoprocessorNumber - decide whether an explicit coprocessor
+/// number is legal in generic instructions like CDP. The answer can
+/// vary with the subtarget.
+static inline bool isValidCoprocessorNumber(unsigned Num,
+ const FeatureBitset& featureBits) {
+ // Armv8-A disallows everything *other* than 111x (CP14 and CP15).
+ if (featureBits[ARM::HasV8Ops] && (Num & 0xE) != 0xE)
+ return false;
+
+ // Armv7 disallows 101x (CP10 and CP11), which clash with VFP/NEON.
+ if (featureBits[ARM::HasV7Ops] && (Num & 0xE) == 0xA)
+ return false;
+
+ // Armv8.1-M also disallows 100x (CP8,CP9) and 111x (CP14,CP15)
+ // which clash with MVE.
+ if (featureBits[ARM::HasV8_1MMainlineOps] &&
+ ((Num & 0xE) == 0x8 || (Num & 0xE) == 0xE))
+ return false;
+
+ return true;
+}
+
/// getInstrPredicate - If instruction is predicated, returns its predicate
/// condition, otherwise returns AL. It also returns the condition code
/// register by reference.
@@ -512,12 +553,6 @@ ARMCC::CondCodes getInstrPredicate(const MachineInstr &MI, unsigned &PredReg);
unsigned getMatchingCondBranchOpcode(unsigned Opc);
-/// Determine if MI can be folded into an ARM MOVCC instruction, and return the
-/// opcode of the SSA instruction representing the conditional MI.
-unsigned canFoldARMInstrIntoMOVCC(unsigned Reg,
- MachineInstr *&MI,
- const MachineRegisterInfo &MRI);
-
/// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether
/// the instruction is encoded with an 'S' bit is determined by the optional
/// CPSR def operand.
@@ -568,6 +603,23 @@ bool rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
unsigned FrameReg, int &Offset,
const ARMBaseInstrInfo &TII);
+/// Return true if Reg is defd between From and To
+bool registerDefinedBetween(unsigned Reg, MachineBasicBlock::iterator From,
+ MachineBasicBlock::iterator To,
+ const TargetRegisterInfo *TRI);
+
+/// Search backwards from a tBcc to find a tCMPi8 against 0, meaning
+/// we can convert them to a tCBZ or tCBNZ. Return nullptr if not found.
+MachineInstr *findCMPToFoldIntoCBZ(MachineInstr *Br,
+ const TargetRegisterInfo *TRI);
+
+void addUnpredicatedMveVpredNOp(MachineInstrBuilder &MIB);
+void addUnpredicatedMveVpredROp(MachineInstrBuilder &MIB, unsigned DestReg);
+
+void addPredicatedMveVpredNOp(MachineInstrBuilder &MIB, unsigned Cond);
+void addPredicatedMveVpredROp(MachineInstrBuilder &MIB, unsigned Cond,
+ unsigned Inactive);
+
} // end namespace llvm
#endif // LLVM_LIB_TARGET_ARM_ARMBASEINSTRINFO_H
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 02b3daf3c6fd..dc99b37742da 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -1,9 +1,8 @@
//===-- ARMBaseRegisterInfo.cpp - ARM Register Information ----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -150,7 +149,7 @@ ARMBaseRegisterInfo::getTLSCallPreservedMask(const MachineFunction &MF) const {
const uint32_t *
ARMBaseRegisterInfo::getSjLjDispatchPreservedMask(const MachineFunction &MF) const {
const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
- if (!STI.useSoftFloat() && STI.hasVFP2() && !STI.isThumb1Only())
+ if (!STI.useSoftFloat() && STI.hasVFP2Base() && !STI.isThumb1Only())
return CSR_NoRegs_RegMask;
else
return CSR_FPRegs_RegMask;
@@ -194,7 +193,7 @@ getReservedRegs(const MachineFunction &MF) const {
if (STI.isR9Reserved())
markSuperRegs(Reserved, ARM::R9);
// Reserve D16-D31 if the subtarget doesn't support them.
- if (!STI.hasVFP3() || STI.hasD16()) {
+ if (!STI.hasD32()) {
static_assert(ARM::D31 == ARM::D16 + 15, "Register list not consecutive!");
for (unsigned R = 0; R < 16; ++R)
markSuperRegs(Reserved, ARM::D16 + R);
@@ -204,6 +203,8 @@ getReservedRegs(const MachineFunction &MF) const {
for (MCSubRegIterator SI(Reg, this); SI.isValid(); ++SI)
if (Reserved.test(*SI))
markSuperRegs(Reserved, Reg);
+ // For v8.1m architecture
+ markSuperRegs(Reserved, ARM::ZR);
assert(checkAllSuperRegsMarked(Reserved));
return Reserved;
@@ -369,29 +370,35 @@ bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
const ARMFrameLowering *TFI = getFrameLowering(MF);
- // When outgoing call frames are so large that we adjust the stack pointer
- // around the call, we can no longer use the stack pointer to reach the
- // emergency spill slot.
+ // If we have stack realignment and VLAs, we have no pointer to use to
+ // access the stack. If we have stack realignment, and a large call frame,
+ // we have no place to allocate the emergency spill slot.
if (needsStackRealignment(MF) && !TFI->hasReservedCallFrame(MF))
return true;
// Thumb has trouble with negative offsets from the FP. Thumb2 has a limited
// negative range for ldr/str (255), and thumb1 is positive offsets only.
+ //
// It's going to be better to use the SP or Base Pointer instead. When there
// are variable sized objects, we can't reference off of the SP, so we
// reserve a Base Pointer.
- if (AFI->isThumbFunction() && MFI.hasVarSizedObjects()) {
- // Conservatively estimate whether the negative offset from the frame
- // pointer will be sufficient to reach. If a function has a smallish
- // frame, it's less likely to have lots of spills and callee saved
- // space, so it's all more likely to be within range of the frame pointer.
- // If it's wrong, the scavenger will still enable access to work, it just
- // won't be optimal.
- if (AFI->isThumb2Function() && MFI.getLocalFrameSize() < 128)
- return false;
+ //
+ // For Thumb2, estimate whether a negative offset from the frame pointer
+ // will be sufficient to reach the whole stack frame. If a function has a
+ // smallish frame, it's less likely to have lots of spills and callee saved
+ // space, so it's all more likely to be within range of the frame pointer.
+ // If it's wrong, the scavenger will still enable access to work, it just
+ // won't be optimal. (We should always be able to reach the emergency
+ // spill slot from the frame pointer.)
+ if (AFI->isThumb2Function() && MFI.hasVarSizedObjects() &&
+ MFI.getLocalFrameSize() >= 128)
+ return true;
+ // For Thumb1, if sp moves, nothing is in range, so force a base pointer.
+ // This is necessary for correctness in cases where we need an emergency
+ // spill slot. (In Thumb1, we can't use a negative offset from the frame
+ // pointer.)
+ if (AFI->isThumb1OnlyFunction() && !TFI->hasReservedCallFrame(MF))
return true;
- }
-
return false;
}
@@ -425,7 +432,7 @@ cannotEliminateFrame(const MachineFunction &MF) const {
|| needsStackRealignment(MF);
}
-unsigned
+Register
ARMBaseRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
const ARMFrameLowering *TFI = getFrameLowering(MF);
@@ -785,7 +792,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
int PIdx = MI.findFirstPredOperandIdx();
ARMCC::CondCodes Pred = (PIdx == -1)
? ARMCC::AL : (ARMCC::CondCodes)MI.getOperand(PIdx).getImm();
- unsigned PredReg = (PIdx == -1) ? 0 : MI.getOperand(PIdx+1).getReg();
+ Register PredReg = (PIdx == -1) ? Register() : MI.getOperand(PIdx+1).getReg();
if (Offset == 0)
// Must be addrmode4/6.
MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false, false, false);
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h
index 45d29ebc0bd3..7e2c72b4d712 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -1,9 +1,8 @@
//===-- ARMBaseRegisterInfo.h - ARM Register Information Impl ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -174,7 +173,7 @@ public:
bool cannotEliminateFrame(const MachineFunction &MF) const;
// Debug information queries.
- unsigned getFrameRegister(const MachineFunction &MF) const override;
+ Register getFrameRegister(const MachineFunction &MF) const override;
unsigned getBaseRegister() const { return BasePtr; }
bool isLowRegister(unsigned Reg) const;
diff --git a/lib/Target/ARM/ARMBasicBlockInfo.cpp b/lib/Target/ARM/ARMBasicBlockInfo.cpp
new file mode 100644
index 000000000000..2de90e816b33
--- /dev/null
+++ b/lib/Target/ARM/ARMBasicBlockInfo.cpp
@@ -0,0 +1,146 @@
+//===--- ARMBasicBlockInfo.cpp - Utilities for block sizes ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMBasicBlockInfo.h"
+#include "ARMMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include <vector>
+
+#define DEBUG_TYPE "arm-bb-utils"
+
+using namespace llvm;
+
+namespace llvm {
+
+// mayOptimizeThumb2Instruction - Returns true if optimizeThumb2Instructions
+// below may shrink MI.
+static bool
+mayOptimizeThumb2Instruction(const MachineInstr *MI) {
+ switch(MI->getOpcode()) {
+ // optimizeThumb2Instructions.
+ case ARM::t2LEApcrel:
+ case ARM::t2LDRpci:
+ // optimizeThumb2Branches.
+ case ARM::t2B:
+ case ARM::t2Bcc:
+ case ARM::tBcc:
+ // optimizeThumb2JumpTables.
+ case ARM::t2BR_JT:
+ case ARM::tBR_JTr:
+ return true;
+ }
+ return false;
+}
+
+void ARMBasicBlockUtils::computeBlockSize(MachineBasicBlock *MBB) {
+ LLVM_DEBUG(dbgs() << "computeBlockSize: " << MBB->getName() << "\n");
+ BasicBlockInfo &BBI = BBInfo[MBB->getNumber()];
+ BBI.Size = 0;
+ BBI.Unalign = 0;
+ BBI.PostAlign = 0;
+
+ for (MachineInstr &I : *MBB) {
+ BBI.Size += TII->getInstSizeInBytes(I);
+ // For inline asm, getInstSizeInBytes returns a conservative estimate.
+ // The actual size may be smaller, but still a multiple of the instr size.
+ if (I.isInlineAsm())
+ BBI.Unalign = isThumb ? 1 : 2;
+ // Also consider instructions that may be shrunk later.
+ else if (isThumb && mayOptimizeThumb2Instruction(&I))
+ BBI.Unalign = 1;
+ }
+
+ // tBR_JTr contains a .align 2 directive.
+ if (!MBB->empty() && MBB->back().getOpcode() == ARM::tBR_JTr) {
+ BBI.PostAlign = 2;
+ MBB->getParent()->ensureAlignment(2);
+ }
+}
+
+/// getOffsetOf - Return the current offset of the specified machine instruction
+/// from the start of the function. This offset changes as stuff is moved
+/// around inside the function.
+unsigned ARMBasicBlockUtils::getOffsetOf(MachineInstr *MI) const {
+ const MachineBasicBlock *MBB = MI->getParent();
+
+ // The offset is composed of two things: the sum of the sizes of all MBB's
+ // before this instruction's block, and the offset from the start of the block
+ // it is in.
+ unsigned Offset = BBInfo[MBB->getNumber()].Offset;
+
+ // Sum instructions before MI in MBB.
+ for (MachineBasicBlock::const_iterator I = MBB->begin(); &*I != MI; ++I) {
+ assert(I != MBB->end() && "Didn't find MI in its own basic block?");
+ Offset += TII->getInstSizeInBytes(*I);
+ }
+ return Offset;
+}
+
+/// isBBInRange - Returns true if the distance between specific MI and
+/// specific BB can fit in MI's displacement field.
+bool ARMBasicBlockUtils::isBBInRange(MachineInstr *MI,
+ MachineBasicBlock *DestBB,
+ unsigned MaxDisp) const {
+ unsigned PCAdj = isThumb ? 4 : 8;
+ unsigned BrOffset = getOffsetOf(MI) + PCAdj;
+ unsigned DestOffset = BBInfo[DestBB->getNumber()].Offset;
+
+ LLVM_DEBUG(dbgs() << "Branch of destination " << printMBBReference(*DestBB)
+ << " from " << printMBBReference(*MI->getParent())
+ << " max delta=" << MaxDisp << " from " << getOffsetOf(MI)
+ << " to " << DestOffset << " offset "
+ << int(DestOffset - BrOffset) << "\t" << *MI);
+
+ if (BrOffset <= DestOffset) {
+ // Branch before the Dest.
+ if (DestOffset-BrOffset <= MaxDisp)
+ return true;
+ } else {
+ if (BrOffset-DestOffset <= MaxDisp)
+ return true;
+ }
+ return false;
+}
+
+void ARMBasicBlockUtils::adjustBBOffsetsAfter(MachineBasicBlock *BB) {
+ assert(BB->getParent() == &MF &&
+ "Basic block is not a child of the current function.\n");
+
+ unsigned BBNum = BB->getNumber();
+ LLVM_DEBUG(dbgs() << "Adjust block:\n"
+ << " - name: " << BB->getName() << "\n"
+ << " - number: " << BB->getNumber() << "\n"
+ << " - function: " << MF.getName() << "\n"
+ << " - blocks: " << MF.getNumBlockIDs() << "\n");
+
+ for(unsigned i = BBNum + 1, e = MF.getNumBlockIDs(); i < e; ++i) {
+ // Get the offset and known bits at the end of the layout predecessor.
+ // Include the alignment of the current block.
+ unsigned LogAlign = MF.getBlockNumbered(i)->getAlignment();
+ unsigned Offset = BBInfo[i - 1].postOffset(LogAlign);
+ unsigned KnownBits = BBInfo[i - 1].postKnownBits(LogAlign);
+
+ // This is where block i begins. Stop if the offset is already correct,
+ // and we have updated 2 blocks. This is the maximum number of blocks
+ // changed before calling this function.
+ if (i > BBNum + 2 &&
+ BBInfo[i].Offset == Offset &&
+ BBInfo[i].KnownBits == KnownBits)
+ break;
+
+ BBInfo[i].Offset = Offset;
+ BBInfo[i].KnownBits = KnownBits;
+ }
+}
+
+} // end namespace llvm
diff --git a/lib/Target/ARM/ARMBasicBlockInfo.h b/lib/Target/ARM/ARMBasicBlockInfo.h
index e0cb0aa676a6..400bba351cec 100644
--- a/lib/Target/ARM/ARMBasicBlockInfo.h
+++ b/lib/Target/ARM/ARMBasicBlockInfo.h
@@ -1,9 +1,8 @@
//===-- ARMBasicBlockInfo.h - Basic Block Information -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -14,12 +13,16 @@
#ifndef LLVM_LIB_TARGET_ARM_ARMBASICBLOCKINFO_H
#define LLVM_LIB_TARGET_ARM_ARMBASICBLOCKINFO_H
+#include "ARMBaseInstrInfo.h"
+#include "ARMMachineFunctionInfo.h"
#include "llvm/Support/MathExtras.h"
#include <algorithm>
#include <cstdint>
namespace llvm {
+using BBInfoVector = SmallVectorImpl<BasicBlockInfo>;
+
/// UnknownPadding - Return the worst case padding that could result from
/// unknown offset bits. This does not include alignment padding caused by
/// known offset bits.
@@ -104,6 +107,54 @@ struct BasicBlockInfo {
}
};
+class ARMBasicBlockUtils {
+
+private:
+ MachineFunction &MF;
+ bool isThumb = false;
+ const ARMBaseInstrInfo *TII = nullptr;
+ SmallVector<BasicBlockInfo, 8> BBInfo;
+
+public:
+ ARMBasicBlockUtils(MachineFunction &MF) : MF(MF) {
+ TII =
+ static_cast<const ARMBaseInstrInfo*>(MF.getSubtarget().getInstrInfo());
+ isThumb = MF.getInfo<ARMFunctionInfo>()->isThumbFunction();
+ }
+
+ void computeAllBlockSizes() {
+ BBInfo.resize(MF.getNumBlockIDs());
+ for (MachineBasicBlock &MBB : MF)
+ computeBlockSize(&MBB);
+ }
+
+ void computeBlockSize(MachineBasicBlock *MBB);
+
+ unsigned getOffsetOf(MachineInstr *MI) const;
+
+ unsigned getOffsetOf(MachineBasicBlock *MBB) const {
+ return BBInfo[MBB->getNumber()].Offset;
+ }
+
+ void adjustBBOffsetsAfter(MachineBasicBlock *MBB);
+
+ void adjustBBSize(MachineBasicBlock *MBB, int Size) {
+ BBInfo[MBB->getNumber()].Size += Size;
+ }
+
+ bool isBBInRange(MachineInstr *MI, MachineBasicBlock *DestBB,
+ unsigned MaxDisp) const;
+
+ void insert(unsigned BBNum, BasicBlockInfo BBI) {
+ BBInfo.insert(BBInfo.begin() + BBNum, BBI);
+ }
+
+ void clear() { BBInfo.clear(); }
+
+ BBInfoVector &getBBInfo() { return BBInfo; }
+
+};
+
} // end namespace llvm
#endif // LLVM_LIB_TARGET_ARM_ARMBASICBLOCKINFO_H
diff --git a/lib/Target/ARM/ARMCallLowering.cpp b/lib/Target/ARM/ARMCallLowering.cpp
index 8e80c32bcf89..0cbe6e1871e4 100644
--- a/lib/Target/ARM/ARMCallLowering.cpp
+++ b/lib/Target/ARM/ARMCallLowering.cpp
@@ -1,9 +1,8 @@
//===- llvm/lib/Target/ARM/ARMCallLowering.cpp - Call lowering ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -56,7 +55,7 @@ ARMCallLowering::ARMCallLowering(const ARMTargetLowering &TLI)
static bool isSupportedType(const DataLayout &DL, const ARMTargetLowering &TLI,
Type *T) {
if (T->isArrayTy())
- return true;
+ return isSupportedType(DL, TLI, T->getArrayElementType());
if (T->isStructTy()) {
// For now we only allow homogeneous structs that we can manipulate with
@@ -65,7 +64,7 @@ static bool isSupportedType(const DataLayout &DL, const ARMTargetLowering &TLI,
for (unsigned i = 1, e = StructT->getNumElements(); i != e; ++i)
if (StructT->getElementType(i) != StructT->getElementType(0))
return false;
- return true;
+ return isSupportedType(DL, TLI, StructT->getElementType(0));
}
EVT VT = TLI.getValueType(DL, T, true);
@@ -91,27 +90,27 @@ struct OutgoingValueHandler : public CallLowering::ValueHandler {
MachineInstrBuilder &MIB, CCAssignFn *AssignFn)
: ValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {}
- unsigned getStackAddress(uint64_t Size, int64_t Offset,
+ Register getStackAddress(uint64_t Size, int64_t Offset,
MachinePointerInfo &MPO) override {
assert((Size == 1 || Size == 2 || Size == 4 || Size == 8) &&
"Unsupported size");
LLT p0 = LLT::pointer(0, 32);
LLT s32 = LLT::scalar(32);
- unsigned SPReg = MRI.createGenericVirtualRegister(p0);
- MIRBuilder.buildCopy(SPReg, ARM::SP);
+ Register SPReg = MRI.createGenericVirtualRegister(p0);
+ MIRBuilder.buildCopy(SPReg, Register(ARM::SP));
- unsigned OffsetReg = MRI.createGenericVirtualRegister(s32);
+ Register OffsetReg = MRI.createGenericVirtualRegister(s32);
MIRBuilder.buildConstant(OffsetReg, Offset);
- unsigned AddrReg = MRI.createGenericVirtualRegister(p0);
+ Register AddrReg = MRI.createGenericVirtualRegister(p0);
MIRBuilder.buildGEP(AddrReg, SPReg, OffsetReg);
MPO = MachinePointerInfo::getStack(MIRBuilder.getMF(), Offset);
return AddrReg;
}
- void assignValueToReg(unsigned ValVReg, unsigned PhysReg,
+ void assignValueToReg(Register ValVReg, Register PhysReg,
CCValAssign &VA) override {
assert(VA.isRegLoc() && "Value shouldn't be assigned to reg");
assert(VA.getLocReg() == PhysReg && "Assigning to the wrong reg?");
@@ -119,25 +118,27 @@ struct OutgoingValueHandler : public CallLowering::ValueHandler {
assert(VA.getValVT().getSizeInBits() <= 64 && "Unsupported value size");
assert(VA.getLocVT().getSizeInBits() <= 64 && "Unsupported location size");
- unsigned ExtReg = extendRegister(ValVReg, VA);
+ Register ExtReg = extendRegister(ValVReg, VA);
MIRBuilder.buildCopy(PhysReg, ExtReg);
MIB.addUse(PhysReg, RegState::Implicit);
}
- void assignValueToAddress(unsigned ValVReg, unsigned Addr, uint64_t Size,
+ void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size,
MachinePointerInfo &MPO, CCValAssign &VA) override {
assert((Size == 1 || Size == 2 || Size == 4 || Size == 8) &&
"Unsupported size");
- unsigned ExtReg = extendRegister(ValVReg, VA);
+ Register ExtReg = extendRegister(ValVReg, VA);
auto MMO = MIRBuilder.getMF().getMachineMemOperand(
MPO, MachineMemOperand::MOStore, VA.getLocVT().getStoreSize(),
- /* Alignment */ 0);
+ /* Alignment */ 1);
MIRBuilder.buildStore(ExtReg, Addr, *MMO);
}
unsigned assignCustomValue(const CallLowering::ArgInfo &Arg,
ArrayRef<CCValAssign> VAs) override {
+ assert(Arg.Regs.size() == 1 && "Can't handle multple regs yet");
+
CCValAssign VA = VAs[0];
assert(VA.needsCustom() && "Value doesn't need custom handling");
assert(VA.getValVT() == MVT::f64 && "Unsupported type");
@@ -152,9 +153,9 @@ struct OutgoingValueHandler : public CallLowering::ValueHandler {
assert(VA.isRegLoc() && "Value should be in reg");
assert(NextVA.isRegLoc() && "Value should be in reg");
- unsigned NewRegs[] = {MRI.createGenericVirtualRegister(LLT::scalar(32)),
+ Register NewRegs[] = {MRI.createGenericVirtualRegister(LLT::scalar(32)),
MRI.createGenericVirtualRegister(LLT::scalar(32))};
- MIRBuilder.buildUnmerge(NewRegs, Arg.Reg);
+ MIRBuilder.buildUnmerge(NewRegs, Arg.Regs[0]);
bool IsLittle = MIRBuilder.getMF().getSubtarget<ARMSubtarget>().isLittle();
if (!IsLittle)
@@ -183,18 +184,17 @@ struct OutgoingValueHandler : public CallLowering::ValueHandler {
} // end anonymous namespace
-void ARMCallLowering::splitToValueTypes(
- const ArgInfo &OrigArg, SmallVectorImpl<ArgInfo> &SplitArgs,
- MachineFunction &MF, const SplitArgTy &PerformArgSplit) const {
+void ARMCallLowering::splitToValueTypes(const ArgInfo &OrigArg,
+ SmallVectorImpl<ArgInfo> &SplitArgs,
+ MachineFunction &MF) const {
const ARMTargetLowering &TLI = *getTLI<ARMTargetLowering>();
LLVMContext &Ctx = OrigArg.Ty->getContext();
const DataLayout &DL = MF.getDataLayout();
- MachineRegisterInfo &MRI = MF.getRegInfo();
const Function &F = MF.getFunction();
SmallVector<EVT, 4> SplitVTs;
- SmallVector<uint64_t, 4> Offsets;
- ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs, &Offsets, 0);
+ ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs, nullptr, nullptr, 0);
+ assert(OrigArg.Regs.size() == SplitVTs.size() && "Regs / types mismatch");
if (SplitVTs.size() == 1) {
// Even if there is no splitting to do, we still want to replace the
@@ -202,12 +202,12 @@ void ARMCallLowering::splitToValueTypes(
auto Flags = OrigArg.Flags;
unsigned OriginalAlignment = DL.getABITypeAlignment(OrigArg.Ty);
Flags.setOrigAlign(OriginalAlignment);
- SplitArgs.emplace_back(OrigArg.Reg, SplitVTs[0].getTypeForEVT(Ctx), Flags,
- OrigArg.IsFixed);
+ SplitArgs.emplace_back(OrigArg.Regs[0], SplitVTs[0].getTypeForEVT(Ctx),
+ Flags, OrigArg.IsFixed);
return;
}
- unsigned FirstRegIdx = SplitArgs.size();
+ // Create one ArgInfo for each virtual register.
for (unsigned i = 0, e = SplitVTs.size(); i != e; ++i) {
EVT SplitVT = SplitVTs[i];
Type *SplitTy = SplitVT.getTypeForEVT(Ctx);
@@ -225,19 +225,16 @@ void ARMCallLowering::splitToValueTypes(
Flags.setInConsecutiveRegsLast();
}
- SplitArgs.push_back(
- ArgInfo{MRI.createGenericVirtualRegister(getLLTForType(*SplitTy, DL)),
- SplitTy, Flags, OrigArg.IsFixed});
+ // FIXME: We also want to split SplitTy further.
+ Register PartReg = OrigArg.Regs[i];
+ SplitArgs.emplace_back(PartReg, SplitTy, Flags, OrigArg.IsFixed);
}
-
- for (unsigned i = 0; i < Offsets.size(); ++i)
- PerformArgSplit(SplitArgs[FirstRegIdx + i].Reg, Offsets[i] * 8);
}
/// Lower the return value for the already existing \p Ret. This assumes that
/// \p MIRBuilder's insertion point is correct.
bool ARMCallLowering::lowerReturnVal(MachineIRBuilder &MIRBuilder,
- const Value *Val, ArrayRef<unsigned> VRegs,
+ const Value *Val, ArrayRef<Register> VRegs,
MachineInstrBuilder &Ret) const {
if (!Val)
// Nothing to do here.
@@ -251,35 +248,22 @@ bool ARMCallLowering::lowerReturnVal(MachineIRBuilder &MIRBuilder,
if (!isSupportedType(DL, TLI, Val->getType()))
return false;
- SmallVector<EVT, 4> SplitEVTs;
- ComputeValueVTs(TLI, DL, Val->getType(), SplitEVTs);
- assert(VRegs.size() == SplitEVTs.size() &&
- "For each split Type there should be exactly one VReg.");
-
- SmallVector<ArgInfo, 4> SplitVTs;
- LLVMContext &Ctx = Val->getType()->getContext();
- for (unsigned i = 0; i < SplitEVTs.size(); ++i) {
- ArgInfo CurArgInfo(VRegs[i], SplitEVTs[i].getTypeForEVT(Ctx));
- setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F);
-
- SmallVector<unsigned, 4> Regs;
- splitToValueTypes(
- CurArgInfo, SplitVTs, MF,
- [&](unsigned Reg, uint64_t Offset) { Regs.push_back(Reg); });
- if (Regs.size() > 1)
- MIRBuilder.buildUnmerge(Regs, VRegs[i]);
- }
+ ArgInfo OrigRetInfo(VRegs, Val->getType());
+ setArgFlags(OrigRetInfo, AttributeList::ReturnIndex, DL, F);
+
+ SmallVector<ArgInfo, 4> SplitRetInfos;
+ splitToValueTypes(OrigRetInfo, SplitRetInfos, MF);
CCAssignFn *AssignFn =
TLI.CCAssignFnForReturn(F.getCallingConv(), F.isVarArg());
OutgoingValueHandler RetHandler(MIRBuilder, MF.getRegInfo(), Ret, AssignFn);
- return handleAssignments(MIRBuilder, SplitVTs, RetHandler);
+ return handleAssignments(MIRBuilder, SplitRetInfos, RetHandler);
}
bool ARMCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
const Value *Val,
- ArrayRef<unsigned> VRegs) const {
+ ArrayRef<Register> VRegs) const {
assert(!Val == VRegs.empty() && "Return value without a vreg");
auto const &ST = MIRBuilder.getMF().getSubtarget<ARMSubtarget>();
@@ -302,7 +286,9 @@ struct IncomingValueHandler : public CallLowering::ValueHandler {
CCAssignFn AssignFn)
: ValueHandler(MIRBuilder, MRI, AssignFn) {}
- unsigned getStackAddress(uint64_t Size, int64_t Offset,
+ bool isArgumentHandler() const override { return true; }
+
+ Register getStackAddress(uint64_t Size, int64_t Offset,
MachinePointerInfo &MPO) override {
assert((Size == 1 || Size == 2 || Size == 4 || Size == 8) &&
"Unsupported size");
@@ -319,7 +305,7 @@ struct IncomingValueHandler : public CallLowering::ValueHandler {
return AddrReg;
}
- void assignValueToAddress(unsigned ValVReg, unsigned Addr, uint64_t Size,
+ void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size,
MachinePointerInfo &MPO, CCValAssign &VA) override {
assert((Size == 1 || Size == 2 || Size == 4 || Size == 8) &&
"Unsupported size");
@@ -332,22 +318,22 @@ struct IncomingValueHandler : public CallLowering::ValueHandler {
assert(MRI.getType(ValVReg).isScalar() && "Only scalars supported atm");
auto LoadVReg = MRI.createGenericVirtualRegister(LLT::scalar(32));
- buildLoad(LoadVReg, Addr, Size, /* Alignment */ 0, MPO);
+ buildLoad(LoadVReg, Addr, Size, /* Alignment */ 1, MPO);
MIRBuilder.buildTrunc(ValVReg, LoadVReg);
} else {
// If the value is not extended, a simple load will suffice.
- buildLoad(ValVReg, Addr, Size, /* Alignment */ 0, MPO);
+ buildLoad(ValVReg, Addr, Size, /* Alignment */ 1, MPO);
}
}
- void buildLoad(unsigned Val, unsigned Addr, uint64_t Size, unsigned Alignment,
+ void buildLoad(Register Val, Register Addr, uint64_t Size, unsigned Alignment,
MachinePointerInfo &MPO) {
auto MMO = MIRBuilder.getMF().getMachineMemOperand(
MPO, MachineMemOperand::MOLoad, Size, Alignment);
MIRBuilder.buildLoad(Val, Addr, *MMO);
}
- void assignValueToReg(unsigned ValVReg, unsigned PhysReg,
+ void assignValueToReg(Register ValVReg, Register PhysReg,
CCValAssign &VA) override {
assert(VA.isRegLoc() && "Value shouldn't be assigned to reg");
assert(VA.getLocReg() == PhysReg && "Assigning to the wrong reg?");
@@ -376,6 +362,8 @@ struct IncomingValueHandler : public CallLowering::ValueHandler {
unsigned assignCustomValue(const ARMCallLowering::ArgInfo &Arg,
ArrayRef<CCValAssign> VAs) override {
+ assert(Arg.Regs.size() == 1 && "Can't handle multple regs yet");
+
CCValAssign VA = VAs[0];
assert(VA.needsCustom() && "Value doesn't need custom handling");
assert(VA.getValVT() == MVT::f64 && "Unsupported type");
@@ -390,7 +378,7 @@ struct IncomingValueHandler : public CallLowering::ValueHandler {
assert(VA.isRegLoc() && "Value should be in reg");
assert(NextVA.isRegLoc() && "Value should be in reg");
- unsigned NewRegs[] = {MRI.createGenericVirtualRegister(LLT::scalar(32)),
+ Register NewRegs[] = {MRI.createGenericVirtualRegister(LLT::scalar(32)),
MRI.createGenericVirtualRegister(LLT::scalar(32))};
assignValueToReg(NewRegs[0], VA.getLocReg(), VA);
@@ -400,7 +388,7 @@ struct IncomingValueHandler : public CallLowering::ValueHandler {
if (!IsLittle)
std::swap(NewRegs[0], NewRegs[1]);
- MIRBuilder.buildMerge(Arg.Reg, NewRegs);
+ MIRBuilder.buildMerge(Arg.Regs[0], NewRegs);
return 1;
}
@@ -423,9 +411,9 @@ struct FormalArgHandler : public IncomingValueHandler {
} // end anonymous namespace
-bool ARMCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
- const Function &F,
- ArrayRef<unsigned> VRegs) const {
+bool ARMCallLowering::lowerFormalArguments(
+ MachineIRBuilder &MIRBuilder, const Function &F,
+ ArrayRef<ArrayRef<Register>> VRegs) const {
auto &TLI = *getTLI<ARMTargetLowering>();
auto Subtarget = TLI.getSubtarget();
@@ -456,21 +444,13 @@ bool ARMCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
FormalArgHandler ArgHandler(MIRBuilder, MIRBuilder.getMF().getRegInfo(),
AssignFn);
- SmallVector<ArgInfo, 8> ArgInfos;
- SmallVector<unsigned, 4> SplitRegs;
+ SmallVector<ArgInfo, 8> SplitArgInfos;
unsigned Idx = 0;
for (auto &Arg : F.args()) {
- ArgInfo AInfo(VRegs[Idx], Arg.getType());
- setArgFlags(AInfo, Idx + AttributeList::FirstArgIndex, DL, F);
-
- SplitRegs.clear();
-
- splitToValueTypes(AInfo, ArgInfos, MF, [&](unsigned Reg, uint64_t Offset) {
- SplitRegs.push_back(Reg);
- });
+ ArgInfo OrigArgInfo(VRegs[Idx], Arg.getType());
- if (!SplitRegs.empty())
- MIRBuilder.buildMerge(VRegs[Idx], SplitRegs);
+ setArgFlags(OrigArgInfo, Idx + AttributeList::FirstArgIndex, DL, F);
+ splitToValueTypes(OrigArgInfo, SplitArgInfos, MF);
Idx++;
}
@@ -478,7 +458,7 @@ bool ARMCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
if (!MBB.empty())
MIRBuilder.setInstr(*MBB.begin());
- if (!handleAssignments(MIRBuilder, ArgInfos, ArgHandler))
+ if (!handleAssignments(MIRBuilder, SplitArgInfos, ArgHandler))
return false;
// Move back to the end of the basic block.
@@ -540,19 +520,19 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
// Create the call instruction so we can add the implicit uses of arg
// registers, but don't insert it yet.
- bool isDirect = !Callee.isReg();
- auto CallOpcode = getCallOpcode(STI, isDirect);
+ bool IsDirect = !Callee.isReg();
+ auto CallOpcode = getCallOpcode(STI, IsDirect);
auto MIB = MIRBuilder.buildInstrNoInsert(CallOpcode);
- bool isThumb = STI.isThumb();
- if (isThumb)
+ bool IsThumb = STI.isThumb();
+ if (IsThumb)
MIB.add(predOps(ARMCC::AL));
MIB.add(Callee);
- if (!isDirect) {
+ if (!IsDirect) {
auto CalleeReg = Callee.getReg();
if (CalleeReg && !TRI->isPhysicalRegister(CalleeReg)) {
- unsigned CalleeIdx = isThumb ? 2 : 0;
+ unsigned CalleeIdx = IsThumb ? 2 : 0;
MIB->getOperand(CalleeIdx).setReg(constrainOperandRegClass(
MF, *TRI, MRI, *STI.getInstrInfo(), *STI.getRegBankInfo(),
*MIB.getInstr(), MIB->getDesc(), Callee, CalleeIdx));
@@ -561,27 +541,22 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
MIB.addRegMask(TRI->getCallPreservedMask(MF, CallConv));
+ bool IsVarArg = false;
SmallVector<ArgInfo, 8> ArgInfos;
for (auto Arg : OrigArgs) {
if (!isSupportedType(DL, TLI, Arg.Ty))
return false;
if (!Arg.IsFixed)
- return false;
+ IsVarArg = true;
if (Arg.Flags.isByVal())
return false;
- SmallVector<unsigned, 8> Regs;
- splitToValueTypes(Arg, ArgInfos, MF, [&](unsigned Reg, uint64_t Offset) {
- Regs.push_back(Reg);
- });
-
- if (Regs.size() > 1)
- MIRBuilder.buildUnmerge(Regs, Arg.Reg);
+ splitToValueTypes(Arg, ArgInfos, MF);
}
- auto ArgAssignFn = TLI.CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
+ auto ArgAssignFn = TLI.CCAssignFnForCall(CallConv, IsVarArg);
OutgoingValueHandler ArgHandler(MIRBuilder, MRI, MIB, ArgAssignFn);
if (!handleAssignments(MIRBuilder, ArgInfos, ArgHandler))
return false;
@@ -594,22 +569,11 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
return false;
ArgInfos.clear();
- SmallVector<unsigned, 8> SplitRegs;
- splitToValueTypes(OrigRet, ArgInfos, MF,
- [&](unsigned Reg, uint64_t Offset) {
- SplitRegs.push_back(Reg);
- });
-
- auto RetAssignFn = TLI.CCAssignFnForReturn(CallConv, /*IsVarArg=*/false);
+ splitToValueTypes(OrigRet, ArgInfos, MF);
+ auto RetAssignFn = TLI.CCAssignFnForReturn(CallConv, IsVarArg);
CallReturnHandler RetHandler(MIRBuilder, MRI, MIB, RetAssignFn);
if (!handleAssignments(MIRBuilder, ArgInfos, RetHandler))
return false;
-
- if (!SplitRegs.empty()) {
- // We have split the value and allocated each individual piece, now build
- // it up again.
- MIRBuilder.buildMerge(OrigRet.Reg, SplitRegs);
- }
}
// We now know the size of the stack - update the ADJCALLSTACKDOWN
diff --git a/lib/Target/ARM/ARMCallLowering.h b/lib/Target/ARM/ARMCallLowering.h
index 45a988a2f00e..794127b5ebc7 100644
--- a/lib/Target/ARM/ARMCallLowering.h
+++ b/lib/Target/ARM/ARMCallLowering.h
@@ -1,9 +1,8 @@
//===- llvm/lib/Target/ARM/ARMCallLowering.h - Call lowering ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -34,10 +33,10 @@ public:
ARMCallLowering(const ARMTargetLowering &TLI);
bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val,
- ArrayRef<unsigned> VRegs) const override;
+ ArrayRef<Register> VRegs) const override;
bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
- ArrayRef<unsigned> VRegs) const override;
+ ArrayRef<ArrayRef<Register>> VRegs) const override;
bool lowerCall(MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv,
const MachineOperand &Callee, const ArgInfo &OrigRet,
@@ -45,17 +44,14 @@ public:
private:
bool lowerReturnVal(MachineIRBuilder &MIRBuilder, const Value *Val,
- ArrayRef<unsigned> VRegs,
+ ArrayRef<Register> VRegs,
MachineInstrBuilder &Ret) const;
- using SplitArgTy = std::function<void(unsigned Reg, uint64_t Offset)>;
-
/// Split an argument into one or more arguments that the CC lowering can cope
- /// with (e.g. replace pointers with integers).
+ /// with.
void splitToValueTypes(const ArgInfo &OrigArg,
SmallVectorImpl<ArgInfo> &SplitArgs,
- MachineFunction &MF,
- const SplitArgTy &PerformArgSplit) const;
+ MachineFunction &MF) const;
};
} // end namespace llvm
diff --git a/lib/Target/ARM/ARMCallingConv.cpp b/lib/Target/ARM/ARMCallingConv.cpp
new file mode 100644
index 000000000000..5ede7c67f7c2
--- /dev/null
+++ b/lib/Target/ARM/ARMCallingConv.cpp
@@ -0,0 +1,284 @@
+//=== ARMCallingConv.cpp - ARM Custom CC Routines ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the custom routines for the ARM Calling Convention that
+// aren't done by tablegen, and includes the table generated implementations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMCallingConv.h"
+#include "ARMSubtarget.h"
+#include "ARMRegisterInfo.h"
+using namespace llvm;
+
+// APCS f64 is in register pairs, possibly split to stack
+static bool f64AssignAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ CCState &State, bool CanFail) {
+ static const MCPhysReg RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
+
+ // Try to get the first register.
+ if (unsigned Reg = State.AllocateReg(RegList))
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ else {
+ // For the 2nd half of a v2f64, do not fail.
+ if (CanFail)
+ return false;
+
+ // Put the whole thing on the stack.
+ State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
+ State.AllocateStack(8, 4),
+ LocVT, LocInfo));
+ return true;
+ }
+
+ // Try to get the second register.
+ if (unsigned Reg = State.AllocateReg(RegList))
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ else
+ State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
+ State.AllocateStack(4, 4),
+ LocVT, LocInfo));
+ return true;
+}
+
+static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ if (!f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, true))
+ return false;
+ if (LocVT == MVT::v2f64 &&
+ !f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, false))
+ return false;
+ return true; // we handled it
+}
+
+// AAPCS f64 is in aligned register pairs
+static bool f64AssignAAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ CCState &State, bool CanFail) {
+ static const MCPhysReg HiRegList[] = { ARM::R0, ARM::R2 };
+ static const MCPhysReg LoRegList[] = { ARM::R1, ARM::R3 };
+ static const MCPhysReg ShadowRegList[] = { ARM::R0, ARM::R1 };
+ static const MCPhysReg GPRArgRegs[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
+
+ unsigned Reg = State.AllocateReg(HiRegList, ShadowRegList);
+ if (Reg == 0) {
+
+ // If we had R3 unallocated only, now we still must to waste it.
+ Reg = State.AllocateReg(GPRArgRegs);
+ assert((!Reg || Reg == ARM::R3) && "Wrong GPRs usage for f64");
+
+ // For the 2nd half of a v2f64, do not just fail.
+ if (CanFail)
+ return false;
+
+ // Put the whole thing on the stack.
+ State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
+ State.AllocateStack(8, 8),
+ LocVT, LocInfo));
+ return true;
+ }
+
+ unsigned i;
+ for (i = 0; i < 2; ++i)
+ if (HiRegList[i] == Reg)
+ break;
+
+ unsigned T = State.AllocateReg(LoRegList[i]);
+ (void)T;
+ assert(T == LoRegList[i] && "Could not allocate register");
+
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
+ LocVT, LocInfo));
+ return true;
+}
+
+static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ if (!f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, true))
+ return false;
+ if (LocVT == MVT::v2f64 &&
+ !f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, false))
+ return false;
+ return true; // we handled it
+}
+
+static bool f64RetAssign(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo, CCState &State) {
+ static const MCPhysReg HiRegList[] = { ARM::R0, ARM::R2 };
+ static const MCPhysReg LoRegList[] = { ARM::R1, ARM::R3 };
+
+ unsigned Reg = State.AllocateReg(HiRegList, LoRegList);
+ if (Reg == 0)
+ return false; // we didn't handle it
+
+ unsigned i;
+ for (i = 0; i < 2; ++i)
+ if (HiRegList[i] == Reg)
+ break;
+
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
+ LocVT, LocInfo));
+ return true;
+}
+
+static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ if (!f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State))
+ return false;
+ if (LocVT == MVT::v2f64 && !f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State))
+ return false;
+ return true; // we handled it
+}
+
+static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ return RetCC_ARM_APCS_Custom_f64(ValNo, ValVT, LocVT, LocInfo, ArgFlags,
+ State);
+}
+
+static const MCPhysReg RRegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
+
+static const MCPhysReg SRegList[] = { ARM::S0, ARM::S1, ARM::S2, ARM::S3,
+ ARM::S4, ARM::S5, ARM::S6, ARM::S7,
+ ARM::S8, ARM::S9, ARM::S10, ARM::S11,
+ ARM::S12, ARM::S13, ARM::S14, ARM::S15 };
+static const MCPhysReg DRegList[] = { ARM::D0, ARM::D1, ARM::D2, ARM::D3,
+ ARM::D4, ARM::D5, ARM::D6, ARM::D7 };
+static const MCPhysReg QRegList[] = { ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3 };
+
+
+// Allocate part of an AAPCS HFA or HVA. We assume that each member of the HA
+// has InConsecutiveRegs set, and that the last member also has
+// InConsecutiveRegsLast set. We must process all members of the HA before
+// we can allocate it, as we need to know the total number of registers that
+// will be needed in order to (attempt to) allocate a contiguous block.
+static bool CC_ARM_AAPCS_Custom_Aggregate(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
+
+ // AAPCS HFAs must have 1-4 elements, all of the same type
+ if (PendingMembers.size() > 0)
+ assert(PendingMembers[0].getLocVT() == LocVT);
+
+ // Add the argument to the list to be allocated once we know the size of the
+ // aggregate. Store the type's required alignmnent as extra info for later: in
+ // the [N x i64] case all trace has been removed by the time we actually get
+ // to do allocation.
+ PendingMembers.push_back(CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo,
+ ArgFlags.getOrigAlign()));
+
+ if (!ArgFlags.isInConsecutiveRegsLast())
+ return true;
+
+ // Try to allocate a contiguous block of registers, each of the correct
+ // size to hold one member.
+ auto &DL = State.getMachineFunction().getDataLayout();
+ unsigned StackAlign = DL.getStackAlignment();
+ unsigned Align = std::min(PendingMembers[0].getExtraInfo(), StackAlign);
+
+ ArrayRef<MCPhysReg> RegList;
+ switch (LocVT.SimpleTy) {
+ case MVT::i32: {
+ RegList = RRegList;
+ unsigned RegIdx = State.getFirstUnallocated(RegList);
+
+ // First consume all registers that would give an unaligned object. Whether
+ // we go on stack or in regs, no-one will be using them in future.
+ unsigned RegAlign = alignTo(Align, 4) / 4;
+ while (RegIdx % RegAlign != 0 && RegIdx < RegList.size())
+ State.AllocateReg(RegList[RegIdx++]);
+
+ break;
+ }
+ case MVT::f16:
+ case MVT::f32:
+ RegList = SRegList;
+ break;
+ case MVT::v4f16:
+ case MVT::f64:
+ RegList = DRegList;
+ break;
+ case MVT::v8f16:
+ case MVT::v2f64:
+ RegList = QRegList;
+ break;
+ default:
+ llvm_unreachable("Unexpected member type for block aggregate");
+ break;
+ }
+
+ unsigned RegResult = State.AllocateRegBlock(RegList, PendingMembers.size());
+ if (RegResult) {
+ for (SmallVectorImpl<CCValAssign>::iterator It = PendingMembers.begin();
+ It != PendingMembers.end(); ++It) {
+ It->convertToReg(RegResult);
+ State.addLoc(*It);
+ ++RegResult;
+ }
+ PendingMembers.clear();
+ return true;
+ }
+
+ // Register allocation failed, we'll be needing the stack
+ unsigned Size = LocVT.getSizeInBits() / 8;
+ if (LocVT == MVT::i32 && State.getNextStackOffset() == 0) {
+ // If nothing else has used the stack until this point, a non-HFA aggregate
+ // can be split between regs and stack.
+ unsigned RegIdx = State.getFirstUnallocated(RegList);
+ for (auto &It : PendingMembers) {
+ if (RegIdx >= RegList.size())
+ It.convertToMem(State.AllocateStack(Size, Size));
+ else
+ It.convertToReg(State.AllocateReg(RegList[RegIdx++]));
+
+ State.addLoc(It);
+ }
+ PendingMembers.clear();
+ return true;
+ } else if (LocVT != MVT::i32)
+ RegList = SRegList;
+
+ // Mark all regs as unavailable (AAPCS rule C.2.vfp for VFP, C.6 for core)
+ for (auto Reg : RegList)
+ State.AllocateReg(Reg);
+
+ // After the first item has been allocated, the rest are packed as tightly as
+ // possible. (E.g. an incoming i64 would have starting Align of 8, but we'll
+ // be allocating a bunch of i32 slots).
+ unsigned RestAlign = std::min(Align, Size);
+
+ for (auto &It : PendingMembers) {
+ It.convertToMem(State.AllocateStack(Size, Align));
+ State.addLoc(It);
+ Align = RestAlign;
+ }
+
+ // All pending members have now been allocated
+ PendingMembers.clear();
+
+ // This will be allocated by the last member of the aggregate
+ return true;
+}
+
+// Include the table generated calling convention implementations.
+#include "ARMGenCallingConv.inc"
diff --git a/lib/Target/ARM/ARMCallingConv.h b/lib/Target/ARM/ARMCallingConv.h
index 543165de38d0..615634551d90 100644
--- a/lib/Target/ARM/ARMCallingConv.h
+++ b/lib/Target/ARM/ARMCallingConv.h
@@ -1,292 +1,50 @@
//=== ARMCallingConv.h - ARM Custom Calling Convention Routines -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
-// This file contains the custom routines for the ARM Calling Convention that
-// aren't done by tablegen.
+// This file declares the entry points for ARM calling convention analysis.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIB_TARGET_ARM_ARMCALLINGCONV_H
#define LLVM_LIB_TARGET_ARM_ARMCALLINGCONV_H
-#include "ARM.h"
-#include "ARMBaseInstrInfo.h"
-#include "ARMSubtarget.h"
#include "llvm/CodeGen/CallingConvLower.h"
-#include "llvm/CodeGen/TargetInstrInfo.h"
-#include "llvm/IR/CallingConv.h"
namespace llvm {
-// APCS f64 is in register pairs, possibly split to stack
-static bool f64AssignAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- CCState &State, bool CanFail) {
- static const MCPhysReg RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
-
- // Try to get the first register.
- if (unsigned Reg = State.AllocateReg(RegList))
- State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
- else {
- // For the 2nd half of a v2f64, do not fail.
- if (CanFail)
- return false;
-
- // Put the whole thing on the stack.
- State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
- State.AllocateStack(8, 4),
- LocVT, LocInfo));
- return true;
- }
-
- // Try to get the second register.
- if (unsigned Reg = State.AllocateReg(RegList))
- State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
- else
- State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
- State.AllocateStack(4, 4),
- LocVT, LocInfo));
- return true;
-}
-
-static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State) {
- if (!f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, true))
- return false;
- if (LocVT == MVT::v2f64 &&
- !f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, false))
- return false;
- return true; // we handled it
-}
-
-// AAPCS f64 is in aligned register pairs
-static bool f64AssignAAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- CCState &State, bool CanFail) {
- static const MCPhysReg HiRegList[] = { ARM::R0, ARM::R2 };
- static const MCPhysReg LoRegList[] = { ARM::R1, ARM::R3 };
- static const MCPhysReg ShadowRegList[] = { ARM::R0, ARM::R1 };
- static const MCPhysReg GPRArgRegs[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
-
- unsigned Reg = State.AllocateReg(HiRegList, ShadowRegList);
- if (Reg == 0) {
-
- // If we had R3 unallocated only, now we still must to waste it.
- Reg = State.AllocateReg(GPRArgRegs);
- assert((!Reg || Reg == ARM::R3) && "Wrong GPRs usage for f64");
-
- // For the 2nd half of a v2f64, do not just fail.
- if (CanFail)
- return false;
-
- // Put the whole thing on the stack.
- State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
- State.AllocateStack(8, 8),
- LocVT, LocInfo));
- return true;
- }
-
- unsigned i;
- for (i = 0; i < 2; ++i)
- if (HiRegList[i] == Reg)
- break;
-
- unsigned T = State.AllocateReg(LoRegList[i]);
- (void)T;
- assert(T == LoRegList[i] && "Could not allocate register");
-
- State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
- State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
- LocVT, LocInfo));
- return true;
-}
-
-static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State) {
- if (!f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, true))
- return false;
- if (LocVT == MVT::v2f64 &&
- !f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, false))
- return false;
- return true; // we handled it
-}
-
-static bool f64RetAssign(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
- CCValAssign::LocInfo &LocInfo, CCState &State) {
- static const MCPhysReg HiRegList[] = { ARM::R0, ARM::R2 };
- static const MCPhysReg LoRegList[] = { ARM::R1, ARM::R3 };
-
- unsigned Reg = State.AllocateReg(HiRegList, LoRegList);
- if (Reg == 0)
- return false; // we didn't handle it
-
- unsigned i;
- for (i = 0; i < 2; ++i)
- if (HiRegList[i] == Reg)
- break;
-
- State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
- State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
- LocVT, LocInfo));
- return true;
-}
-
-static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State) {
- if (!f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State))
- return false;
- if (LocVT == MVT::v2f64 && !f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State))
- return false;
- return true; // we handled it
-}
-
-static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State) {
- return RetCC_ARM_APCS_Custom_f64(ValNo, ValVT, LocVT, LocInfo, ArgFlags,
- State);
-}
-
-static const MCPhysReg RRegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
-
-static const MCPhysReg SRegList[] = { ARM::S0, ARM::S1, ARM::S2, ARM::S3,
- ARM::S4, ARM::S5, ARM::S6, ARM::S7,
- ARM::S8, ARM::S9, ARM::S10, ARM::S11,
- ARM::S12, ARM::S13, ARM::S14, ARM::S15 };
-static const MCPhysReg DRegList[] = { ARM::D0, ARM::D1, ARM::D2, ARM::D3,
- ARM::D4, ARM::D5, ARM::D6, ARM::D7 };
-static const MCPhysReg QRegList[] = { ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3 };
-
-
-// Allocate part of an AAPCS HFA or HVA. We assume that each member of the HA
-// has InConsecutiveRegs set, and that the last member also has
-// InConsecutiveRegsLast set. We must process all members of the HA before
-// we can allocate it, as we need to know the total number of registers that
-// will be needed in order to (attempt to) allocate a contiguous block.
-static bool CC_ARM_AAPCS_Custom_Aggregate(unsigned &ValNo, MVT &ValVT,
- MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State) {
- SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
-
- // AAPCS HFAs must have 1-4 elements, all of the same type
- if (PendingMembers.size() > 0)
- assert(PendingMembers[0].getLocVT() == LocVT);
-
- // Add the argument to the list to be allocated once we know the size of the
- // aggregate. Store the type's required alignmnent as extra info for later: in
- // the [N x i64] case all trace has been removed by the time we actually get
- // to do allocation.
- PendingMembers.push_back(CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo,
- ArgFlags.getOrigAlign()));
-
- if (!ArgFlags.isInConsecutiveRegsLast())
- return true;
-
- // Try to allocate a contiguous block of registers, each of the correct
- // size to hold one member.
- auto &DL = State.getMachineFunction().getDataLayout();
- unsigned StackAlign = DL.getStackAlignment();
- unsigned Align = std::min(PendingMembers[0].getExtraInfo(), StackAlign);
-
- ArrayRef<MCPhysReg> RegList;
- switch (LocVT.SimpleTy) {
- case MVT::i32: {
- RegList = RRegList;
- unsigned RegIdx = State.getFirstUnallocated(RegList);
-
- // First consume all registers that would give an unaligned object. Whether
- // we go on stack or in regs, no-one will be using them in future.
- unsigned RegAlign = alignTo(Align, 4) / 4;
- while (RegIdx % RegAlign != 0 && RegIdx < RegList.size())
- State.AllocateReg(RegList[RegIdx++]);
-
- break;
- }
- case MVT::f16:
- case MVT::f32:
- RegList = SRegList;
- break;
- case MVT::v4f16:
- case MVT::f64:
- RegList = DRegList;
- break;
- case MVT::v8f16:
- case MVT::v2f64:
- RegList = QRegList;
- break;
- default:
- llvm_unreachable("Unexpected member type for block aggregate");
- break;
- }
-
- unsigned RegResult = State.AllocateRegBlock(RegList, PendingMembers.size());
- if (RegResult) {
- for (SmallVectorImpl<CCValAssign>::iterator It = PendingMembers.begin();
- It != PendingMembers.end(); ++It) {
- It->convertToReg(RegResult);
- State.addLoc(*It);
- ++RegResult;
- }
- PendingMembers.clear();
- return true;
- }
-
- // Register allocation failed, we'll be needing the stack
- unsigned Size = LocVT.getSizeInBits() / 8;
- if (LocVT == MVT::i32 && State.getNextStackOffset() == 0) {
- // If nothing else has used the stack until this point, a non-HFA aggregate
- // can be split between regs and stack.
- unsigned RegIdx = State.getFirstUnallocated(RegList);
- for (auto &It : PendingMembers) {
- if (RegIdx >= RegList.size())
- It.convertToMem(State.AllocateStack(Size, Size));
- else
- It.convertToReg(State.AllocateReg(RegList[RegIdx++]));
-
- State.addLoc(It);
- }
- PendingMembers.clear();
- return true;
- } else if (LocVT != MVT::i32)
- RegList = SRegList;
-
- // Mark all regs as unavailable (AAPCS rule C.2.vfp for VFP, C.6 for core)
- for (auto Reg : RegList)
- State.AllocateReg(Reg);
-
- // After the first item has been allocated, the rest are packed as tightly as
- // possible. (E.g. an incoming i64 would have starting Align of 8, but we'll
- // be allocating a bunch of i32 slots).
- unsigned RestAlign = std::min(Align, Size);
-
- for (auto &It : PendingMembers) {
- It.convertToMem(State.AllocateStack(Size, Align));
- State.addLoc(It);
- Align = RestAlign;
- }
-
- // All pending members have now been allocated
- PendingMembers.clear();
-
- // This will be allocated by the last member of the aggregate
- return true;
-}
-
-} // End llvm namespace
+bool CC_ARM_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
+ CCState &State);
+bool CC_ARM_AAPCS_VFP(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
+ CCState &State);
+bool CC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
+ CCState &State);
+bool CC_ARM_APCS_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
+ CCState &State);
+bool FastCC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
+ CCState &State);
+bool RetCC_ARM_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
+ CCState &State);
+bool RetCC_ARM_AAPCS_VFP(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
+ CCState &State);
+bool RetCC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
+ CCState &State);
+bool RetFastCC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
+ CCState &State);
+
+} // namespace llvm
#endif
diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td
index f173e423f3e4..61d2d83ddc40 100644
--- a/lib/Target/ARM/ARMCallingConv.td
+++ b/lib/Target/ARM/ARMCallingConv.td
@@ -1,9 +1,8 @@
//===-- ARMCallingConv.td - Calling Conventions for ARM ----*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// This describes the calling conventions for ARM architecture.
@@ -16,6 +15,7 @@ class CCIfAlign<string Align, CCAction A>:
//===----------------------------------------------------------------------===//
// ARM APCS Calling Convention
//===----------------------------------------------------------------------===//
+let Entry = 1 in
def CC_ARM_APCS : CallingConv<[
// Handles byval parameters.
@@ -30,8 +30,8 @@ def CC_ARM_APCS : CallingConv<[
CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R8]>>>,
// Handle all vector types as either f64 or v2f64.
- CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
- CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+ CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v8f16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
// f64 and v2f64 are passed in adjacent GPRs, possibly split onto the stack
CCIfType<[f64, v2f64], CCCustom<"CC_ARM_APCS_Custom_f64">>,
@@ -44,6 +44,7 @@ def CC_ARM_APCS : CallingConv<[
CCIfType<[v2f64], CCAssignToStack<16, 4>>
]>;
+let Entry = 1 in
def RetCC_ARM_APCS : CallingConv<[
CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
CCIfType<[f32], CCBitConvertToType<i32>>,
@@ -55,8 +56,8 @@ def RetCC_ARM_APCS : CallingConv<[
CCIfSwiftError<CCIfType<[i32], CCAssignToReg<[R8]>>>,
// Handle all vector types as either f64 or v2f64.
- CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
- CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+ CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v8f16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
CCIfType<[f64, v2f64], CCCustom<"RetCC_ARM_APCS_Custom_f64">>,
@@ -67,10 +68,11 @@ def RetCC_ARM_APCS : CallingConv<[
//===----------------------------------------------------------------------===//
// ARM APCS Calling Convention for FastCC (when VFP2 or later is available)
//===----------------------------------------------------------------------===//
+let Entry = 1 in
def FastCC_ARM_APCS : CallingConv<[
// Handle all vector types as either f64 or v2f64.
- CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
- CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+ CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v8f16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>,
CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
@@ -86,10 +88,11 @@ def FastCC_ARM_APCS : CallingConv<[
CCDelegateTo<CC_ARM_APCS>
]>;
+let Entry = 1 in
def RetFastCC_ARM_APCS : CallingConv<[
// Handle all vector types as either f64 or v2f64.
- CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
- CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+ CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v8f16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>,
CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
@@ -102,10 +105,11 @@ def RetFastCC_ARM_APCS : CallingConv<[
// ARM APCS Calling Convention for GHC
//===----------------------------------------------------------------------===//
+let Entry = 1 in
def CC_ARM_APCS_GHC : CallingConv<[
// Handle all vector types as either f64 or v2f64.
- CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
- CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+ CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v8f16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
CCIfType<[v2f64], CCAssignToReg<[Q4, Q5]>>,
CCIfType<[f64], CCAssignToReg<[D8, D9, D10, D11]>>,
@@ -152,6 +156,7 @@ def RetCC_ARM_AAPCS_Common : CallingConv<[
// ARM AAPCS (EABI) Calling Convention
//===----------------------------------------------------------------------===//
+let Entry = 1 in
def CC_ARM_AAPCS : CallingConv<[
// Handles byval parameters.
CCIfByVal<CCPassByVal<4, 4>>,
@@ -160,8 +165,8 @@ def CC_ARM_AAPCS : CallingConv<[
CCIfNest<CCAssignToReg<[R12]>>,
// Handle all vector types as either f64 or v2f64.
- CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType<f64>>,
- CCIfType<[v2i64, v4i32, v8i16, v8f16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+ CCIfType<[v1i64, v2i32, v4i16, v4f16, v4f16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v8f16, v8f16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
// Pass SwiftSelf in a callee saved register.
CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>,
@@ -174,10 +179,11 @@ def CC_ARM_AAPCS : CallingConv<[
CCDelegateTo<CC_ARM_AAPCS_Common>
]>;
+let Entry = 1 in
def RetCC_ARM_AAPCS : CallingConv<[
// Handle all vector types as either f64 or v2f64.
- CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType<f64>>,
- CCIfType<[v2i64, v4i32, v8i16, v8f16,v16i8, v4f32], CCBitConvertToType<v2f64>>,
+ CCIfType<[v1i64, v2i32, v4i16, v4f16, v4f16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v8f16, v8f16,v16i8, v4f32], CCBitConvertToType<v2f64>>,
// Pass SwiftSelf in a callee saved register.
CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>,
@@ -196,13 +202,14 @@ def RetCC_ARM_AAPCS : CallingConv<[
// Also used for FastCC (when VFP2 or later is available)
//===----------------------------------------------------------------------===//
+let Entry = 1 in
def CC_ARM_AAPCS_VFP : CallingConv<[
// Handles byval parameters.
CCIfByVal<CCPassByVal<4, 4>>,
// Handle all vector types as either f64 or v2f64.
- CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType<f64>>,
- CCIfType<[v2i64, v4i32, v8i16, v8f16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+ CCIfType<[v1i64, v2i32, v4i16, v4f16, v4f16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v8f16, v8f16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
// Pass SwiftSelf in a callee saved register.
CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>,
@@ -220,10 +227,11 @@ def CC_ARM_AAPCS_VFP : CallingConv<[
CCDelegateTo<CC_ARM_AAPCS_Common>
]>;
+let Entry = 1 in
def RetCC_ARM_AAPCS_VFP : CallingConv<[
// Handle all vector types as either f64 or v2f64.
- CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType<f64>>,
- CCIfType<[v2i64, v4i32, v8i16, v8f16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+ CCIfType<[v1i64, v2i32, v4i16, v4f16, v4f16, v8i8, v2f32], CCBitConvertToType<f64>>,
+ CCIfType<[v2i64, v4i32, v8i16, v8f16, v8f16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
// Pass SwiftSelf in a callee saved register.
CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[R10]>>>,
diff --git a/lib/Target/ARM/ARMCodeGenPrepare.cpp b/lib/Target/ARM/ARMCodeGenPrepare.cpp
index b631c2bc687b..2fc5f4aaab50 100644
--- a/lib/Target/ARM/ARMCodeGenPrepare.cpp
+++ b/lib/Target/ARM/ARMCodeGenPrepare.cpp
@@ -1,9 +1,8 @@
//===----- ARMCodeGenPrepare.cpp ------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -114,15 +113,20 @@ class IRPromoter {
SmallPtrSet<Value*, 8> Promoted;
Module *M = nullptr;
LLVMContext &Ctx;
+ // The type we promote to: always i32
IntegerType *ExtTy = nullptr;
+ // The type of the value that the search began from, either i8 or i16.
+ // This defines the max range of the values that we allow in the promoted
+ // tree.
IntegerType *OrigTy = nullptr;
- SmallPtrSetImpl<Value*> *Visited;
+ SetVector<Value*> *Visited;
SmallPtrSetImpl<Value*> *Sources;
SmallPtrSetImpl<Instruction*> *Sinks;
SmallPtrSetImpl<Instruction*> *SafeToPromote;
+ SmallPtrSetImpl<Instruction*> *SafeWrap;
void ReplaceAllUsersOfWith(Value *From, Value *To);
- void PrepareConstants(void);
+ void PrepareWrappingAdds(void);
void ExtendSources(void);
void ConvertTruncs(void);
void PromoteTree(void);
@@ -135,10 +139,11 @@ public:
void Mutate(Type *OrigTy,
- SmallPtrSetImpl<Value*> &Visited,
+ SetVector<Value*> &Visited,
SmallPtrSetImpl<Value*> &Sources,
SmallPtrSetImpl<Instruction*> &Sinks,
- SmallPtrSetImpl<Instruction*> &SafeToPromote);
+ SmallPtrSetImpl<Instruction*> &SafeToPromote,
+ SmallPtrSetImpl<Instruction*> &SafeWrap);
};
class ARMCodeGenPrepare : public FunctionPass {
@@ -146,8 +151,9 @@ class ARMCodeGenPrepare : public FunctionPass {
IRPromoter *Promoter = nullptr;
std::set<Value*> AllVisited;
SmallPtrSet<Instruction*, 8> SafeToPromote;
+ SmallPtrSet<Instruction*, 4> SafeWrap;
- bool isSafeOverflow(Instruction *I);
+ bool isSafeWrap(Instruction *I);
bool isSupportedValue(Value *V);
bool isLegalToPromote(Value *V);
bool TryToPromote(Value *V);
@@ -172,13 +178,17 @@ public:
}
-static bool generateSignBits(Value *V) {
+static bool GenerateSignBits(Value *V) {
+ if (auto *Arg = dyn_cast<Argument>(V))
+ return Arg->hasSExtAttr();
+
if (!isa<Instruction>(V))
return false;
unsigned Opc = cast<Instruction>(V)->getOpcode();
return Opc == Instruction::AShr || Opc == Instruction::SDiv ||
- Opc == Instruction::SRem;
+ Opc == Instruction::SRem || Opc == Instruction::SExt ||
+ Opc == Instruction::SIToFP;
}
static bool EqualTypeSize(Value *V) {
@@ -271,19 +281,14 @@ static bool isSink(Value *V) {
return isa<CallInst>(V);
}
-/// Return whether the instruction can be promoted within any modifications to
-/// its operands or result.
-bool ARMCodeGenPrepare::isSafeOverflow(Instruction *I) {
- // FIXME Do we need NSW too?
- if (isa<OverflowingBinaryOperator>(I) && I->hasNoUnsignedWrap())
- return true;
-
- // We can support a, potentially, overflowing instruction (I) if:
+/// Return whether this instruction can safely wrap.
+bool ARMCodeGenPrepare::isSafeWrap(Instruction *I) {
+ // We can support a, potentially, wrapping instruction (I) if:
// - It is only used by an unsigned icmp.
// - The icmp uses a constant.
- // - The overflowing value (I) is decreasing, i.e would underflow - wrapping
+ // - The wrapping value (I) is decreasing, i.e would underflow - wrapping
// around zero to become a larger number than before.
- // - The underflowing instruction (I) also uses a constant.
+ // - The wrapping instruction (I) also uses a constant.
//
// We can then use the two constants to calculate whether the result would
// wrap in respect to itself in the original bitwidth. If it doesn't wrap,
@@ -327,7 +332,7 @@ bool ARMCodeGenPrepare::isSafeOverflow(Instruction *I) {
// - (255 >= 254) == (0xFFFFFFFF >= 254) == true
//
// To demonstrate why we can't handle increasing values:
- //
+ //
// %add = add i8 %a, 2
// %cmp = icmp ult i8 %add, 127
//
@@ -385,6 +390,7 @@ bool ARMCodeGenPrepare::isSafeOverflow(Instruction *I) {
return false;
LLVM_DEBUG(dbgs() << "ARM CGP: Allowing safe overflow for " << *I << "\n");
+ SafeWrap.insert(I);
return true;
}
@@ -408,13 +414,16 @@ static bool shouldPromote(Value *V) {
/// Return whether we can safely mutate V's type to ExtTy without having to be
/// concerned with zero extending or truncation.
static bool isPromotedResultSafe(Value *V) {
+ if (GenerateSignBits(V))
+ return false;
+
if (!isa<Instruction>(V))
return true;
- if (generateSignBits(V))
- return false;
+ if (!isa<OverflowingBinaryOperator>(V))
+ return true;
- return !isa<OverflowingBinaryOperator>(V);
+ return cast<Instruction>(V)->hasNoUnsignedWrap();
}
/// Return the intrinsic for the instruction that can perform the same
@@ -462,61 +471,34 @@ void IRPromoter::ReplaceAllUsersOfWith(Value *From, Value *To) {
InstsToRemove.insert(I);
}
-void IRPromoter::PrepareConstants() {
+void IRPromoter::PrepareWrappingAdds() {
+ LLVM_DEBUG(dbgs() << "ARM CGP: Prepare underflowing adds.\n");
IRBuilder<> Builder{Ctx};
- // First step is to prepare the instructions for mutation. Most constants
- // just need to be zero extended into their new type, but complications arise
- // because:
- // - For nuw binary operators, negative immediates would need sign extending;
- // however, instead we'll change them to positive and zext them. We can do
- // this because:
- // > The operators that can wrap are: add, sub, mul and shl.
- // > shl interprets its second operand as unsigned and if the first operand
- // is an immediate, it will need zext to be nuw.
- // > I'm assuming mul has to interpret immediates as unsigned for nuw.
- // > Which leaves the nuw add and sub to be handled; as with shl, if an
- // immediate is used as operand 0, it will need zext to be nuw.
- // - We also allow add and sub to safely overflow in certain circumstances
- // and only when the value (operand 0) is being decreased.
- //
- // For adds and subs, that are either nuw or safely wrap and use a negative
- // immediate as operand 1, we create an equivalent instruction using a
- // positive immediate. That positive immediate can then be zext along with
- // all the other immediates later.
- for (auto *V : *Visited) {
- if (!isa<Instruction>(V))
- continue;
-
- auto *I = cast<Instruction>(V);
- if (SafeToPromote->count(I)) {
-
- if (!isa<OverflowingBinaryOperator>(I))
- continue;
- if (auto *Const = dyn_cast<ConstantInt>(I->getOperand(1))) {
- if (!Const->isNegative())
- break;
+ // For adds that safely wrap and use a negative immediate as operand 1, we
+ // create an equivalent instruction using a positive immediate.
+ // That positive immediate can then be zext along with all the other
+ // immediates later.
+ for (auto *I : *SafeWrap) {
+ if (I->getOpcode() != Instruction::Add)
+ continue;
- unsigned Opc = I->getOpcode();
- if (Opc != Instruction::Add && Opc != Instruction::Sub)
- continue;
+ LLVM_DEBUG(dbgs() << "ARM CGP: Adjusting " << *I << "\n");
+ assert((isa<ConstantInt>(I->getOperand(1)) &&
+ cast<ConstantInt>(I->getOperand(1))->isNegative()) &&
+ "Wrapping should have a negative immediate as the second operand");
- LLVM_DEBUG(dbgs() << "ARM CGP: Adjusting " << *I << "\n");
- auto *NewConst = ConstantInt::get(Ctx, Const->getValue().abs());
- Builder.SetInsertPoint(I);
- Value *NewVal = Opc == Instruction::Sub ?
- Builder.CreateAdd(I->getOperand(0), NewConst) :
- Builder.CreateSub(I->getOperand(0), NewConst);
- LLVM_DEBUG(dbgs() << "ARM CGP: New equivalent: " << *NewVal << "\n");
-
- if (auto *NewInst = dyn_cast<Instruction>(NewVal)) {
- NewInst->copyIRFlags(I);
- NewInsts.insert(NewInst);
- }
- InstsToRemove.insert(I);
- I->replaceAllUsesWith(NewVal);
- }
+ auto Const = cast<ConstantInt>(I->getOperand(1));
+ auto *NewConst = ConstantInt::get(Ctx, Const->getValue().abs());
+ Builder.SetInsertPoint(I);
+ Value *NewVal = Builder.CreateSub(I->getOperand(0), NewConst);
+ if (auto *NewInst = dyn_cast<Instruction>(NewVal)) {
+ NewInst->copyIRFlags(I);
+ NewInsts.insert(NewInst);
}
+ InstsToRemove.insert(I);
+ I->replaceAllUsesWith(NewVal);
+ LLVM_DEBUG(dbgs() << "ARM CGP: New equivalent: " << *NewVal << "\n");
}
for (auto *I : NewInsts)
Visited->insert(I);
@@ -605,7 +587,7 @@ void IRPromoter::PromoteTree() {
if (!shouldPromote(I) || SafeToPromote->count(I) || NewInsts.count(I))
continue;
-
+
assert(EnableDSP && "DSP intrinisc insertion not enabled!");
// Replace unsafe instructions with appropriate intrinsic calls.
@@ -683,13 +665,14 @@ void IRPromoter::TruncateSinks() {
}
void IRPromoter::Cleanup() {
+ LLVM_DEBUG(dbgs() << "ARM CGP: Cleanup..\n");
// Some zexts will now have become redundant, along with their trunc
// operands, so remove them
for (auto V : *Visited) {
- if (!isa<CastInst>(V))
+ if (!isa<ZExtInst>(V))
continue;
- auto ZExt = cast<CastInst>(V);
+ auto ZExt = cast<ZExtInst>(V);
if (ZExt->getDestTy() != ExtTy)
continue;
@@ -701,9 +684,11 @@ void IRPromoter::Cleanup() {
continue;
}
- // For any truncs that we insert to handle zexts, we can replace the
- // result of the zext with the input to the trunc.
- if (NewInsts.count(Src) && isa<ZExtInst>(V) && isa<TruncInst>(Src)) {
+ // Unless they produce a value that is narrower than ExtTy, we can
+ // replace the result of the zext with the input of a newly inserted
+ // trunc.
+ if (NewInsts.count(Src) && isa<TruncInst>(Src) &&
+ Src->getType() == OrigTy) {
auto *Trunc = cast<TruncInst>(Src);
assert(Trunc->getOperand(0)->getType() == ExtTy &&
"expected inserted trunc to be operating on i32");
@@ -721,9 +706,12 @@ void IRPromoter::Cleanup() {
NewInsts.clear();
TruncTysMap.clear();
Promoted.clear();
+ SafeToPromote->clear();
+ SafeWrap->clear();
}
void IRPromoter::ConvertTruncs() {
+ LLVM_DEBUG(dbgs() << "ARM CGP: Converting truncs..\n");
IRBuilder<> Builder{Ctx};
for (auto *V : *Visited) {
@@ -731,12 +719,13 @@ void IRPromoter::ConvertTruncs() {
continue;
auto *Trunc = cast<TruncInst>(V);
- assert(LessThanTypeSize(Trunc) && "expected narrow trunc");
-
Builder.SetInsertPoint(Trunc);
- unsigned NumBits =
- cast<IntegerType>(Trunc->getType())->getScalarSizeInBits();
- ConstantInt *Mask = ConstantInt::get(Ctx, APInt::getMaxValue(NumBits));
+ IntegerType *SrcTy = cast<IntegerType>(Trunc->getOperand(0)->getType());
+ IntegerType *DestTy = cast<IntegerType>(TruncTysMap[Trunc][0]);
+
+ unsigned NumBits = DestTy->getScalarSizeInBits();
+ ConstantInt *Mask =
+ ConstantInt::get(SrcTy, APInt::getMaxValue(NumBits).getZExtValue());
Value *Masked = Builder.CreateAnd(Trunc->getOperand(0), Mask);
if (auto *I = dyn_cast<Instruction>(Masked))
@@ -747,10 +736,11 @@ void IRPromoter::ConvertTruncs() {
}
void IRPromoter::Mutate(Type *OrigTy,
- SmallPtrSetImpl<Value*> &Visited,
+ SetVector<Value*> &Visited,
SmallPtrSetImpl<Value*> &Sources,
SmallPtrSetImpl<Instruction*> &Sinks,
- SmallPtrSetImpl<Instruction*> &SafeToPromote) {
+ SmallPtrSetImpl<Instruction*> &SafeToPromote,
+ SmallPtrSetImpl<Instruction*> &SafeWrap) {
LLVM_DEBUG(dbgs() << "ARM CGP: Promoting use-def chains to from "
<< ARMCodeGenPrepare::TypeSize << " to 32-bits\n");
@@ -763,6 +753,7 @@ void IRPromoter::Mutate(Type *OrigTy,
this->Sources = &Sources;
this->Sinks = &Sinks;
this->SafeToPromote = &SafeToPromote;
+ this->SafeWrap = &SafeWrap;
// Cache original types of the values that will likely need truncating
for (auto *I : Sinks) {
@@ -778,22 +769,28 @@ void IRPromoter::Mutate(Type *OrigTy,
TruncTysMap[I].push_back(I->getOperand(i)->getType());
}
}
+ for (auto *V : Visited) {
+ if (!isa<TruncInst>(V) || Sources.count(V))
+ continue;
+ auto *Trunc = cast<TruncInst>(V);
+ TruncTysMap[Trunc].push_back(Trunc->getDestTy());
+ }
- // Convert adds and subs using negative immediates to equivalent instructions
- // that use positive constants.
- PrepareConstants();
+ // Convert adds using negative immediates to equivalent instructions that use
+ // positive constants.
+ PrepareWrappingAdds();
// Insert zext instructions between sources and their users.
ExtendSources();
- // Convert any truncs, that aren't sources, into AND masks.
- ConvertTruncs();
-
// Promote visited instructions, mutating their types in place. Also insert
// DSP intrinsics, if enabled, for adds and subs which would be unsafe to
// promote.
PromoteTree();
+ // Convert any truncs, that aren't sources, into AND masks.
+ ConvertTruncs();
+
// Insert trunc instructions for use by calls, stores etc...
TruncateSinks();
@@ -819,6 +816,11 @@ bool ARMCodeGenPrepare::isSupportedValue(Value *V) {
return EqualTypeSize(I->getOperand(0));
}
+ if (GenerateSignBits(V)) {
+ LLVM_DEBUG(dbgs() << "ARM CGP: No, instruction can generate sign bits.\n");
+ return false;
+ }
+
// Memory instructions
if (isa<StoreInst>(V) || isa<GetElementPtrInst>(V))
return true;
@@ -835,9 +837,6 @@ bool ARMCodeGenPrepare::isSupportedValue(Value *V) {
isa<LoadInst>(V))
return isSupportedType(V);
- if (isa<SExtInst>(V))
- return false;
-
if (auto *Cast = dyn_cast<CastInst>(V))
return isSupportedType(Cast) || isSupportedType(Cast->getOperand(0));
@@ -854,10 +853,6 @@ bool ARMCodeGenPrepare::isSupportedValue(Value *V) {
if (!isSupportedType(V))
return false;
- if (generateSignBits(V)) {
- LLVM_DEBUG(dbgs() << "ARM CGP: No, instruction can generate sign bits.\n");
- return false;
- }
return true;
}
@@ -873,7 +868,7 @@ bool ARMCodeGenPrepare::isLegalToPromote(Value *V) {
if (SafeToPromote.count(I))
return true;
- if (isPromotedResultSafe(V) || isSafeOverflow(I)) {
+ if (isPromotedResultSafe(V) || isSafeWrap(I)) {
SafeToPromote.insert(I);
return true;
}
@@ -911,6 +906,7 @@ bool ARMCodeGenPrepare::TryToPromote(Value *V) {
return false;
SafeToPromote.clear();
+ SafeWrap.clear();
if (!isSupportedValue(V) || !shouldPromote(V) || !isLegalToPromote(V))
return false;
@@ -921,7 +917,7 @@ bool ARMCodeGenPrepare::TryToPromote(Value *V) {
SetVector<Value*> WorkList;
SmallPtrSet<Value*, 8> Sources;
SmallPtrSet<Instruction*, 4> Sinks;
- SmallPtrSet<Value*, 16> CurrentVisited;
+ SetVector<Value*> CurrentVisited;
WorkList.insert(V);
// Return true if V was added to the worklist as a supported instruction,
@@ -1009,7 +1005,8 @@ bool ARMCodeGenPrepare::TryToPromote(Value *V) {
if (ToPromote < 2)
return false;
- Promoter->Mutate(OrigTy, CurrentVisited, Sources, Sinks, SafeToPromote);
+ Promoter->Mutate(OrigTy, CurrentVisited, Sources, Sinks, SafeToPromote,
+ SafeWrap);
return true;
}
diff --git a/lib/Target/ARM/ARMComputeBlockSize.cpp b/lib/Target/ARM/ARMComputeBlockSize.cpp
deleted file mode 100644
index b263e9d86c42..000000000000
--- a/lib/Target/ARM/ARMComputeBlockSize.cpp
+++ /dev/null
@@ -1,81 +0,0 @@
-//===--- ARMComputeBlockSize.cpp - Compute machine block sizes ------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "ARM.h"
-#include "ARMBaseInstrInfo.h"
-#include "ARMBasicBlockInfo.h"
-#include "ARMMachineFunctionInfo.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/TargetSubtargetInfo.h"
-#include <vector>
-
-using namespace llvm;
-
-namespace llvm {
-
-// mayOptimizeThumb2Instruction - Returns true if optimizeThumb2Instructions
-// below may shrink MI.
-static bool
-mayOptimizeThumb2Instruction(const MachineInstr *MI) {
- switch(MI->getOpcode()) {
- // optimizeThumb2Instructions.
- case ARM::t2LEApcrel:
- case ARM::t2LDRpci:
- // optimizeThumb2Branches.
- case ARM::t2B:
- case ARM::t2Bcc:
- case ARM::tBcc:
- // optimizeThumb2JumpTables.
- case ARM::t2BR_JT:
- case ARM::tBR_JTr:
- return true;
- }
- return false;
-}
-
-void computeBlockSize(MachineFunction *MF, MachineBasicBlock *MBB,
- BasicBlockInfo &BBI) {
- const ARMBaseInstrInfo *TII =
- static_cast<const ARMBaseInstrInfo *>(MF->getSubtarget().getInstrInfo());
- bool isThumb = MF->getInfo<ARMFunctionInfo>()->isThumbFunction();
- BBI.Size = 0;
- BBI.Unalign = 0;
- BBI.PostAlign = 0;
-
- for (MachineInstr &I : *MBB) {
- BBI.Size += TII->getInstSizeInBytes(I);
- // For inline asm, getInstSizeInBytes returns a conservative estimate.
- // The actual size may be smaller, but still a multiple of the instr size.
- if (I.isInlineAsm())
- BBI.Unalign = isThumb ? 1 : 2;
- // Also consider instructions that may be shrunk later.
- else if (isThumb && mayOptimizeThumb2Instruction(&I))
- BBI.Unalign = 1;
- }
-
- // tBR_JTr contains a .align 2 directive.
- if (!MBB->empty() && MBB->back().getOpcode() == ARM::tBR_JTr) {
- BBI.PostAlign = 2;
- MBB->getParent()->ensureAlignment(2);
- }
-}
-
-std::vector<BasicBlockInfo> computeAllBlockSizes(MachineFunction *MF) {
- std::vector<BasicBlockInfo> BBInfo;
- BBInfo.resize(MF->getNumBlockIDs());
-
- for (MachineBasicBlock &MBB : *MF)
- computeBlockSize(MF, &MBB, BBInfo[MBB.getNumber()]);
-
- return BBInfo;
-}
-
-} // end namespace llvm
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index 5e97c4cb35e3..60e5d7bf6098 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -1,9 +1,8 @@
//===- ARMConstantIslandPass.cpp - ARM constant islands -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -98,7 +97,7 @@ namespace {
/// CPE - A constant pool entry that has been placed somewhere, which
/// tracks a list of users.
class ARMConstantIslands : public MachineFunctionPass {
- std::vector<BasicBlockInfo> BBInfo;
+ std::unique_ptr<ARMBasicBlockUtils> BBUtils = nullptr;
/// WaterList - A sorted list of basic blocks where islands could be placed
/// (i.e. blocks that don't fall through to the following block, due
@@ -244,7 +243,6 @@ namespace {
void initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs);
MachineBasicBlock *splitBlockBeforeInstr(MachineInstr *MI);
void updateForInsertedWaterBlock(MachineBasicBlock *NewBB);
- void adjustBBOffsetsAfter(MachineBasicBlock *BB);
bool decrementCPEReferenceCount(unsigned CPI, MachineInstr* CPEMI);
unsigned getCombinedIndex(const MachineInstr *CPEMI);
int findInRangeCPEntry(CPUser& U, unsigned UserOffset);
@@ -260,7 +258,6 @@ namespace {
bool DoDump = false);
bool isWaterInRange(unsigned UserOffset, MachineBasicBlock *Water,
CPUser &U, unsigned &Growth);
- bool isBBInRange(MachineInstr *MI, MachineBasicBlock *BB, unsigned Disp);
bool fixupImmediateBr(ImmBranch &Br);
bool fixupConditionalBr(ImmBranch &Br);
bool fixupUnconditionalBr(ImmBranch &Br);
@@ -275,7 +272,6 @@ namespace {
MachineBasicBlock *adjustJTTargetBlockForward(MachineBasicBlock *BB,
MachineBasicBlock *JTBB);
- unsigned getOffsetOf(MachineInstr *MI) const;
unsigned getUserOffset(CPUser&) const;
void dumpBBs();
void verify();
@@ -296,9 +292,10 @@ char ARMConstantIslands::ID = 0;
/// verify - check BBOffsets, BBSizes, alignment of islands
void ARMConstantIslands::verify() {
#ifndef NDEBUG
+ BBInfoVector &BBInfo = BBUtils->getBBInfo();
assert(std::is_sorted(MF->begin(), MF->end(),
- [this](const MachineBasicBlock &LHS,
- const MachineBasicBlock &RHS) {
+ [&BBInfo](const MachineBasicBlock &LHS,
+ const MachineBasicBlock &RHS) {
return BBInfo[LHS.getNumber()].postOffset() <
BBInfo[RHS.getNumber()].postOffset();
}));
@@ -324,6 +321,7 @@ void ARMConstantIslands::verify() {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// print block size and offset information - debugging
LLVM_DUMP_METHOD void ARMConstantIslands::dumpBBs() {
+ BBInfoVector &BBInfo = BBUtils->getBBInfo();
LLVM_DEBUG({
for (unsigned J = 0, E = BBInfo.size(); J !=E; ++J) {
const BasicBlockInfo &BBI = BBInfo[J];
@@ -340,6 +338,7 @@ LLVM_DUMP_METHOD void ARMConstantIslands::dumpBBs() {
bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
MF = &mf;
MCP = mf.getConstantPool();
+ BBUtils = std::unique_ptr<ARMBasicBlockUtils>(new ARMBasicBlockUtils(mf));
LLVM_DEBUG(dbgs() << "***** ARMConstantIslands: "
<< MCP->getConstants().size() << " CP entries, aligned to "
@@ -467,7 +466,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
LLVM_DEBUG(dbgs() << '\n'; dumpBBs());
- BBInfo.clear();
+ BBUtils->clear();
WaterList.clear();
CPUsers.clear();
CPEntries.clear();
@@ -684,14 +683,14 @@ void ARMConstantIslands::scanFunctionJumpTables() {
void ARMConstantIslands::
initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
- BBInfo = computeAllBlockSizes(MF);
-
+ BBUtils->computeAllBlockSizes();
+ BBInfoVector &BBInfo = BBUtils->getBBInfo();
// The known bits of the entry block offset are determined by the function
// alignment.
BBInfo.front().KnownBits = MF->getAlignment();
// Compute block offsets and known bits.
- adjustBBOffsetsAfter(&MF->front());
+ BBUtils->adjustBBOffsetsAfter(&MF->front());
// Now go back through the instructions and build up our data structures.
for (MachineBasicBlock &MBB : *MF) {
@@ -856,25 +855,6 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
}
}
-/// getOffsetOf - Return the current offset of the specified machine instruction
-/// from the start of the function. This offset changes as stuff is moved
-/// around inside the function.
-unsigned ARMConstantIslands::getOffsetOf(MachineInstr *MI) const {
- MachineBasicBlock *MBB = MI->getParent();
-
- // The offset is composed of two things: the sum of the sizes of all MBB's
- // before this instruction's block, and the offset from the start of the block
- // it is in.
- unsigned Offset = BBInfo[MBB->getNumber()].Offset;
-
- // Sum instructions before MI in MBB.
- for (MachineBasicBlock::iterator I = MBB->begin(); &*I != MI; ++I) {
- assert(I != MBB->end() && "Didn't find MI in its own basic block?");
- Offset += TII->getInstSizeInBytes(*I);
- }
- return Offset;
-}
-
/// CompareMBBNumbers - Little predicate function to sort the WaterList by MBB
/// ID.
static bool CompareMBBNumbers(const MachineBasicBlock *LHS,
@@ -891,13 +871,11 @@ void ARMConstantIslands::updateForInsertedWaterBlock(MachineBasicBlock *NewBB) {
// Insert an entry into BBInfo to align it properly with the (newly
// renumbered) block numbers.
- BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo());
+ BBUtils->insert(NewBB->getNumber(), BasicBlockInfo());
// Next, update WaterList. Specifically, we need to add NewMBB as having
// available water after it.
- water_iterator IP =
- std::lower_bound(WaterList.begin(), WaterList.end(), NewBB,
- CompareMBBNumbers);
+ water_iterator IP = llvm::lower_bound(WaterList, NewBB, CompareMBBNumbers);
WaterList.insert(IP, NewBB);
}
@@ -942,15 +920,13 @@ MachineBasicBlock *ARMConstantIslands::splitBlockBeforeInstr(MachineInstr *MI) {
// Insert an entry into BBInfo to align it properly with the (newly
// renumbered) block numbers.
- BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo());
+ BBUtils->insert(NewBB->getNumber(), BasicBlockInfo());
// Next, update WaterList. Specifically, we need to add OrigMBB as having
// available water after it (but not if it's already there, which happens
// when splitting before a conditional branch that is followed by an
// unconditional branch - in that case we want to insert NewBB).
- water_iterator IP =
- std::lower_bound(WaterList.begin(), WaterList.end(), OrigBB,
- CompareMBBNumbers);
+ water_iterator IP = llvm::lower_bound(WaterList, OrigBB, CompareMBBNumbers);
MachineBasicBlock* WaterBB = *IP;
if (WaterBB == OrigBB)
WaterList.insert(std::next(IP), NewBB);
@@ -963,14 +939,14 @@ MachineBasicBlock *ARMConstantIslands::splitBlockBeforeInstr(MachineInstr *MI) {
// the new jump we added. (It should be possible to do this without
// recounting everything, but it's very confusing, and this is rarely
// executed.)
- computeBlockSize(MF, OrigBB, BBInfo[OrigBB->getNumber()]);
+ BBUtils->computeBlockSize(OrigBB);
// Figure out how large the NewMBB is. As the second half of the original
// block, it may contain a tablejump.
- computeBlockSize(MF, NewBB, BBInfo[NewBB->getNumber()]);
+ BBUtils->computeBlockSize(NewBB);
// All BBOffsets following these blocks must be modified.
- adjustBBOffsetsAfter(OrigBB);
+ BBUtils->adjustBBOffsetsAfter(OrigBB);
return NewBB;
}
@@ -979,7 +955,9 @@ MachineBasicBlock *ARMConstantIslands::splitBlockBeforeInstr(MachineInstr *MI) {
/// displacement computation. Update U.KnownAlignment to match its current
/// basic block location.
unsigned ARMConstantIslands::getUserOffset(CPUser &U) const {
- unsigned UserOffset = getOffsetOf(U.MI);
+ unsigned UserOffset = BBUtils->getOffsetOf(U.MI);
+
+ SmallVectorImpl<BasicBlockInfo> &BBInfo = BBUtils->getBBInfo();
const BasicBlockInfo &BBI = BBInfo[U.MI->getParent()->getNumber()];
unsigned KnownBits = BBI.internalKnownBits();
@@ -1028,6 +1006,7 @@ bool ARMConstantIslands::isOffsetInRange(unsigned UserOffset,
bool ARMConstantIslands::isWaterInRange(unsigned UserOffset,
MachineBasicBlock* Water, CPUser &U,
unsigned &Growth) {
+ BBInfoVector &BBInfo = BBUtils->getBBInfo();
unsigned CPELogAlign = getCPELogAlign(U.CPEMI);
unsigned CPEOffset = BBInfo[Water->getNumber()].postOffset(CPELogAlign);
unsigned NextBlockOffset, NextBlockAlignment;
@@ -1068,10 +1047,11 @@ bool ARMConstantIslands::isWaterInRange(unsigned UserOffset,
bool ARMConstantIslands::isCPEntryInRange(MachineInstr *MI, unsigned UserOffset,
MachineInstr *CPEMI, unsigned MaxDisp,
bool NegOk, bool DoDump) {
- unsigned CPEOffset = getOffsetOf(CPEMI);
+ unsigned CPEOffset = BBUtils->getOffsetOf(CPEMI);
if (DoDump) {
LLVM_DEBUG({
+ BBInfoVector &BBInfo = BBUtils->getBBInfo();
unsigned Block = MI->getParent()->getNumber();
const BasicBlockInfo &BBI = BBInfo[Block];
dbgs() << "User of CPE#" << CPEMI->getOperand(0).getImm()
@@ -1104,28 +1084,6 @@ static bool BBIsJumpedOver(MachineBasicBlock *MBB) {
}
#endif // NDEBUG
-void ARMConstantIslands::adjustBBOffsetsAfter(MachineBasicBlock *BB) {
- unsigned BBNum = BB->getNumber();
- for(unsigned i = BBNum + 1, e = MF->getNumBlockIDs(); i < e; ++i) {
- // Get the offset and known bits at the end of the layout predecessor.
- // Include the alignment of the current block.
- unsigned LogAlign = MF->getBlockNumbered(i)->getAlignment();
- unsigned Offset = BBInfo[i - 1].postOffset(LogAlign);
- unsigned KnownBits = BBInfo[i - 1].postKnownBits(LogAlign);
-
- // This is where block i begins. Stop if the offset is already correct,
- // and we have updated 2 blocks. This is the maximum number of blocks
- // changed before calling this function.
- if (i > BBNum + 2 &&
- BBInfo[i].Offset == Offset &&
- BBInfo[i].KnownBits == KnownBits)
- break;
-
- BBInfo[i].Offset = Offset;
- BBInfo[i].KnownBits = KnownBits;
- }
-}
-
/// decrementCPEReferenceCount - find the constant pool entry with index CPI
/// and instruction CPEMI, and decrement its refcount. If the refcount
/// becomes 0 remove the entry and instruction. Returns true if we removed
@@ -1241,6 +1199,7 @@ bool ARMConstantIslands::findAvailableWater(CPUser &U, unsigned UserOffset,
// When a CP access is out of range, BB0 may be used as water. However,
// inserting islands between BB0 and BB1 makes other accesses out of range.
MachineBasicBlock *UserBB = U.MI->getParent();
+ BBInfoVector &BBInfo = BBUtils->getBBInfo();
unsigned MinNoSplitDisp =
BBInfo[UserBB->getNumber()].postOffset(getCPELogAlign(U.CPEMI));
if (CloserWater && MinNoSplitDisp > U.getMaxDisp() / 2)
@@ -1297,6 +1256,7 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
MachineInstr *CPEMI = U.CPEMI;
unsigned CPELogAlign = getCPELogAlign(CPEMI);
MachineBasicBlock *UserMBB = UserMI->getParent();
+ BBInfoVector &BBInfo = BBUtils->getBBInfo();
const BasicBlockInfo &UserBBI = BBInfo[UserMBB->getNumber()];
// If the block does not end in an unconditional branch already, and if the
@@ -1328,8 +1288,8 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
unsigned MaxDisp = getUnconditionalBrDisp(UncondBr);
ImmBranches.push_back(ImmBranch(&UserMBB->back(),
MaxDisp, false, UncondBr));
- computeBlockSize(MF, UserMBB, BBInfo[UserMBB->getNumber()]);
- adjustBBOffsetsAfter(UserMBB);
+ BBUtils->computeBlockSize(UserMBB);
+ BBUtils->adjustBBOffsetsAfter(UserMBB);
return;
}
}
@@ -1538,8 +1498,8 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex,
NewIsland->setAlignment(getCPELogAlign(U.CPEMI));
// Increase the size of the island block to account for the new entry.
- BBInfo[NewIsland->getNumber()].Size += Size;
- adjustBBOffsetsAfter(&*--NewIsland->getIterator());
+ BBUtils->adjustBBSize(NewIsland, Size);
+ BBUtils->adjustBBOffsetsAfter(&*--NewIsland->getIterator());
// Finally, change the CPI in the instruction operand to be ID.
for (unsigned i = 0, e = UserMI->getNumOperands(); i != e; ++i)
@@ -1550,7 +1510,8 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex,
LLVM_DEBUG(
dbgs() << " Moved CPE to #" << ID << " CPI=" << CPI
- << format(" offset=%#x\n", BBInfo[NewIsland->getNumber()].Offset));
+ << format(" offset=%#x\n",
+ BBUtils->getBBInfo()[NewIsland->getNumber()].Offset));
return true;
}
@@ -1561,7 +1522,8 @@ void ARMConstantIslands::removeDeadCPEMI(MachineInstr *CPEMI) {
MachineBasicBlock *CPEBB = CPEMI->getParent();
unsigned Size = CPEMI->getOperand(2).getImm();
CPEMI->eraseFromParent();
- BBInfo[CPEBB->getNumber()].Size -= Size;
+ BBInfoVector &BBInfo = BBUtils->getBBInfo();
+ BBUtils->adjustBBSize(CPEBB, -Size);
// All succeeding offsets have the current size value added in, fix this.
if (CPEBB->empty()) {
BBInfo[CPEBB->getNumber()].Size = 0;
@@ -1572,7 +1534,7 @@ void ARMConstantIslands::removeDeadCPEMI(MachineInstr *CPEMI) {
// Entries are sorted by descending alignment, so realign from the front.
CPEBB->setAlignment(getCPELogAlign(&*CPEBB->begin()));
- adjustBBOffsetsAfter(CPEBB);
+ BBUtils->adjustBBOffsetsAfter(CPEBB);
// An island has only one predecessor BB and one successor BB. Check if
// this BB's predecessor jumps directly to this BB's successor. This
// shouldn't happen currently.
@@ -1597,30 +1559,6 @@ bool ARMConstantIslands::removeUnusedCPEntries() {
return MadeChange;
}
-/// isBBInRange - Returns true if the distance between specific MI and
-/// specific BB can fit in MI's displacement field.
-bool ARMConstantIslands::isBBInRange(MachineInstr *MI,MachineBasicBlock *DestBB,
- unsigned MaxDisp) {
- unsigned PCAdj = isThumb ? 4 : 8;
- unsigned BrOffset = getOffsetOf(MI) + PCAdj;
- unsigned DestOffset = BBInfo[DestBB->getNumber()].Offset;
-
- LLVM_DEBUG(dbgs() << "Branch of destination " << printMBBReference(*DestBB)
- << " from " << printMBBReference(*MI->getParent())
- << " max delta=" << MaxDisp << " from " << getOffsetOf(MI)
- << " to " << DestOffset << " offset "
- << int(DestOffset - BrOffset) << "\t" << *MI);
-
- if (BrOffset <= DestOffset) {
- // Branch before the Dest.
- if (DestOffset-BrOffset <= MaxDisp)
- return true;
- } else {
- if (BrOffset-DestOffset <= MaxDisp)
- return true;
- }
- return false;
-}
/// fixupImmediateBr - Fix up an immediate branch whose destination is too far
/// away to fit in its displacement field.
@@ -1629,7 +1567,7 @@ bool ARMConstantIslands::fixupImmediateBr(ImmBranch &Br) {
MachineBasicBlock *DestBB = MI->getOperand(0).getMBB();
// Check to see if the DestBB is already in-range.
- if (isBBInRange(MI, DestBB, Br.MaxDisp))
+ if (BBUtils->isBBInRange(MI, DestBB, Br.MaxDisp))
return false;
if (!Br.isCond)
@@ -1648,11 +1586,15 @@ ARMConstantIslands::fixupUnconditionalBr(ImmBranch &Br) {
if (!isThumb1)
llvm_unreachable("fixupUnconditionalBr is Thumb1 only!");
+ if (!AFI->isLRSpilled())
+ report_fatal_error("underestimated function size");
+
// Use BL to implement far jump.
Br.MaxDisp = (1 << 21) * 2;
MI->setDesc(TII->get(ARM::tBfar));
+ BBInfoVector &BBInfo = BBUtils->getBBInfo();
BBInfo[MBB->getNumber()].Size += 2;
- adjustBBOffsetsAfter(MBB);
+ BBUtils->adjustBBOffsetsAfter(MBB);
HasFarJump = true;
++NumUBrFixed;
@@ -1699,7 +1641,7 @@ ARMConstantIslands::fixupConditionalBr(ImmBranch &Br) {
// bne L2
// b L1
MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB();
- if (isBBInRange(MI, NewDest, Br.MaxDisp)) {
+ if (BBUtils->isBBInRange(MI, NewDest, Br.MaxDisp)) {
LLVM_DEBUG(
dbgs() << " Invert Bcc condition and swap its destination with "
<< *BMI);
@@ -1716,7 +1658,7 @@ ARMConstantIslands::fixupConditionalBr(ImmBranch &Br) {
// No need for the branch to the next block. We're adding an unconditional
// branch to the destination.
int delta = TII->getInstSizeInBytes(MBB->back());
- BBInfo[MBB->getNumber()].Size -= delta;
+ BBUtils->adjustBBSize(MBB, -delta);
MBB->back().eraseFromParent();
// The conditional successor will be swapped between the BBs after this, so
@@ -1737,21 +1679,21 @@ ARMConstantIslands::fixupConditionalBr(ImmBranch &Br) {
BuildMI(MBB, DebugLoc(), TII->get(MI->getOpcode()))
.addMBB(NextBB).addImm(CC).addReg(CCReg);
Br.MI = &MBB->back();
- BBInfo[MBB->getNumber()].Size += TII->getInstSizeInBytes(MBB->back());
+ BBUtils->adjustBBSize(MBB, TII->getInstSizeInBytes(MBB->back()));
if (isThumb)
BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr))
.addMBB(DestBB)
.add(predOps(ARMCC::AL));
else
BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB);
- BBInfo[MBB->getNumber()].Size += TII->getInstSizeInBytes(MBB->back());
+ BBUtils->adjustBBSize(MBB, TII->getInstSizeInBytes(MBB->back()));
unsigned MaxDisp = getUnconditionalBrDisp(Br.UncondBr);
ImmBranches.push_back(ImmBranch(&MBB->back(), MaxDisp, false, Br.UncondBr));
// Remove the old conditional branch. It may or may not still be in MBB.
- BBInfo[MI->getParent()->getNumber()].Size -= TII->getInstSizeInBytes(*MI);
+ BBUtils->adjustBBSize(MI->getParent(), -TII->getInstSizeInBytes(*MI));
MI->eraseFromParent();
- adjustBBOffsetsAfter(MBB);
+ BBUtils->adjustBBOffsetsAfter(MBB);
return true;
}
@@ -1826,8 +1768,8 @@ bool ARMConstantIslands::optimizeThumb2Instructions() {
LLVM_DEBUG(dbgs() << "Shrink: " << *U.MI);
U.MI->setDesc(TII->get(NewOpc));
MachineBasicBlock *MBB = U.MI->getParent();
- BBInfo[MBB->getNumber()].Size -= 2;
- adjustBBOffsetsAfter(MBB);
+ BBUtils->adjustBBSize(MBB, -2);
+ BBUtils->adjustBBOffsetsAfter(MBB);
++NumT2CPShrunk;
MadeChange = true;
}
@@ -1866,12 +1808,12 @@ bool ARMConstantIslands::optimizeThumb2Branches() {
if (NewOpc) {
unsigned MaxOffs = ((1 << (Bits-1))-1) * Scale;
MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB();
- if (isBBInRange(Br.MI, DestBB, MaxOffs)) {
+ if (BBUtils->isBBInRange(Br.MI, DestBB, MaxOffs)) {
LLVM_DEBUG(dbgs() << "Shrink branch: " << *Br.MI);
Br.MI->setDesc(TII->get(NewOpc));
MachineBasicBlock *MBB = Br.MI->getParent();
- BBInfo[MBB->getNumber()].Size -= 2;
- adjustBBOffsetsAfter(MBB);
+ BBUtils->adjustBBSize(MBB, -2);
+ BBUtils->adjustBBOffsetsAfter(MBB);
++NumT2BrShrunk;
MadeChange = true;
}
@@ -1898,34 +1840,47 @@ bool ARMConstantIslands::optimizeThumb2Branches() {
MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB();
// Check if the distance is within 126. Subtract starting offset by 2
// because the cmp will be eliminated.
- unsigned BrOffset = getOffsetOf(Br.MI) + 4 - 2;
+ unsigned BrOffset = BBUtils->getOffsetOf(Br.MI) + 4 - 2;
+ BBInfoVector &BBInfo = BBUtils->getBBInfo();
unsigned DestOffset = BBInfo[DestBB->getNumber()].Offset;
- if (BrOffset < DestOffset && (DestOffset - BrOffset) <= 126) {
- MachineBasicBlock::iterator CmpMI = Br.MI;
- if (CmpMI != Br.MI->getParent()->begin()) {
- --CmpMI;
- if (CmpMI->getOpcode() == ARM::tCMPi8) {
- unsigned Reg = CmpMI->getOperand(0).getReg();
- Pred = getInstrPredicate(*CmpMI, PredReg);
- if (Pred == ARMCC::AL &&
- CmpMI->getOperand(1).getImm() == 0 &&
- isARMLowRegister(Reg)) {
- MachineBasicBlock *MBB = Br.MI->getParent();
- LLVM_DEBUG(dbgs() << "Fold: " << *CmpMI << " and: " << *Br.MI);
- MachineInstr *NewBR =
- BuildMI(*MBB, CmpMI, Br.MI->getDebugLoc(), TII->get(NewOpc))
- .addReg(Reg).addMBB(DestBB,Br.MI->getOperand(0).getTargetFlags());
- CmpMI->eraseFromParent();
- Br.MI->eraseFromParent();
- Br.MI = NewBR;
- BBInfo[MBB->getNumber()].Size -= 2;
- adjustBBOffsetsAfter(MBB);
- ++NumCBZ;
- MadeChange = true;
- }
- }
+ if (BrOffset >= DestOffset || (DestOffset - BrOffset) > 126)
+ continue;
+
+ // Search backwards to find a tCMPi8
+ auto *TRI = STI->getRegisterInfo();
+ MachineInstr *CmpMI = findCMPToFoldIntoCBZ(Br.MI, TRI);
+ if (!CmpMI || CmpMI->getOpcode() != ARM::tCMPi8)
+ continue;
+
+ unsigned Reg = CmpMI->getOperand(0).getReg();
+
+ // Check for Kill flags on Reg. If they are present remove them and set kill
+ // on the new CBZ.
+ MachineBasicBlock::iterator KillMI = Br.MI;
+ bool RegKilled = false;
+ do {
+ --KillMI;
+ if (KillMI->killsRegister(Reg, TRI)) {
+ KillMI->clearRegisterKills(Reg, TRI);
+ RegKilled = true;
+ break;
}
- }
+ } while (KillMI != CmpMI);
+
+ // Create the new CBZ/CBNZ
+ MachineBasicBlock *MBB = Br.MI->getParent();
+ LLVM_DEBUG(dbgs() << "Fold: " << *CmpMI << " and: " << *Br.MI);
+ MachineInstr *NewBR =
+ BuildMI(*MBB, Br.MI, Br.MI->getDebugLoc(), TII->get(NewOpc))
+ .addReg(Reg, getKillRegState(RegKilled))
+ .addMBB(DestBB, Br.MI->getOperand(0).getTargetFlags());
+ CmpMI->eraseFromParent();
+ Br.MI->eraseFromParent();
+ Br.MI = NewBR;
+ BBInfo[MBB->getNumber()].Size -= 2;
+ BBUtils->adjustBBOffsetsAfter(MBB);
+ ++NumCBZ;
+ MadeChange = true;
}
return MadeChange;
@@ -2085,16 +2040,6 @@ static void RemoveDeadAddBetweenLEAAndJT(MachineInstr *LEAMI,
DeadSize += 4;
}
-static bool registerDefinedBetween(unsigned Reg,
- MachineBasicBlock::iterator From,
- MachineBasicBlock::iterator To,
- const TargetRegisterInfo *TRI) {
- for (auto I = From; I != To; ++I)
- if (I->modifiesRegister(Reg, TRI))
- return true;
- return false;
-}
-
/// optimizeThumb2JumpTables - Use tbb / tbh instructions to generate smaller
/// jumptables when it's possible.
bool ARMConstantIslands::optimizeThumb2JumpTables() {
@@ -2117,8 +2062,9 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() {
bool ByteOk = true;
bool HalfWordOk = true;
- unsigned JTOffset = getOffsetOf(MI) + 4;
+ unsigned JTOffset = BBUtils->getOffsetOf(MI) + 4;
const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+ BBInfoVector &BBInfo = BBUtils->getBBInfo();
for (unsigned j = 0, ee = JTBBs.size(); j != ee; ++j) {
MachineBasicBlock *MBB = JTBBs[j];
unsigned DstOffset = BBInfo[MBB->getNumber()].Offset;
@@ -2281,7 +2227,7 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() {
int Delta = OrigSize - NewSize + DeadSize;
BBInfo[MBB->getNumber()].Size -= Delta;
- adjustBBOffsetsAfter(MBB);
+ BBUtils->adjustBBOffsetsAfter(MBB);
++NumTBs;
MadeChange = true;
diff --git a/lib/Target/ARM/ARMConstantPoolValue.cpp b/lib/Target/ARM/ARMConstantPoolValue.cpp
index 236c4fab2a5c..3bdb0e1ef62d 100644
--- a/lib/Target/ARM/ARMConstantPoolValue.cpp
+++ b/lib/Target/ARM/ARMConstantPoolValue.cpp
@@ -1,9 +1,8 @@
//===- ARMConstantPoolValue.cpp - ARM constantpool value ------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h
index 55194ed94532..660b7fc88d82 100644
--- a/lib/Target/ARM/ARMConstantPoolValue.h
+++ b/lib/Target/ARM/ARMConstantPoolValue.h
@@ -1,9 +1,8 @@
//===- ARMConstantPoolValue.h - ARM constantpool value ----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index eecd0a10dc7d..b32ba3eeea18 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -1,9 +1,8 @@
//===-- ARMExpandPseudoInsts.cpp - Expand pseudo instructions -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -24,6 +23,7 @@
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Support/Debug.h"
using namespace llvm;
@@ -423,8 +423,7 @@ static const NEONLdStTableEntry *LookupNEONLdSt(unsigned Opcode) {
}
#endif
- auto I = std::lower_bound(std::begin(NEONLdStTable),
- std::end(NEONLdStTable), Opcode);
+ auto I = llvm::lower_bound(NEONLdStTable, Opcode);
if (I != std::end(NEONLdStTable) && I->PseudoOpc == Opcode)
return I;
return nullptr;
@@ -470,6 +469,7 @@ static void GetDSubRegs(unsigned Reg, NEONRegSpacing RegSpc,
void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) {
MachineInstr &MI = *MBBI;
MachineBasicBlock &MBB = *MI.getParent();
+ LLVM_DEBUG(dbgs() << "Expanding: "; MI.dump());
const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
assert(TableEntry && TableEntry->IsLoad && "NEONLdStTable lookup failed");
@@ -571,8 +571,8 @@ void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) {
// Transfer memoperands.
MIB.cloneMemRefs(MI);
-
MI.eraseFromParent();
+ LLVM_DEBUG(dbgs() << "To: "; MIB.getInstr()->dump(););
}
/// ExpandVST - Translate VST pseudo instructions with Q, QQ or QQQQ register
@@ -580,6 +580,7 @@ void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) {
void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) {
MachineInstr &MI = *MBBI;
MachineBasicBlock &MBB = *MI.getParent();
+ LLVM_DEBUG(dbgs() << "Expanding: "; MI.dump());
const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
assert(TableEntry && !TableEntry->IsLoad && "NEONLdStTable lookup failed");
@@ -646,8 +647,8 @@ void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) {
// Transfer memoperands.
MIB.cloneMemRefs(MI);
-
MI.eraseFromParent();
+ LLVM_DEBUG(dbgs() << "To: "; MIB.getInstr()->dump(););
}
/// ExpandLaneOp - Translate VLD*LN and VST*LN instructions with Q, QQ or QQQQ
@@ -655,6 +656,7 @@ void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) {
void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator &MBBI) {
MachineInstr &MI = *MBBI;
MachineBasicBlock &MBB = *MI.getParent();
+ LLVM_DEBUG(dbgs() << "Expanding: "; MI.dump());
const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
assert(TableEntry && "NEONLdStTable lookup failed");
@@ -745,6 +747,7 @@ void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI,
unsigned Opc, bool IsExt) {
MachineInstr &MI = *MBBI;
MachineBasicBlock &MBB = *MI.getParent();
+ LLVM_DEBUG(dbgs() << "Expanding: "; MI.dump());
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc));
unsigned OpIdx = 0;
@@ -774,6 +777,7 @@ void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI,
MIB.addReg(SrcReg, RegState::Implicit | getKillRegState(SrcIsKill));
TransferImpOps(MI, MIB, MIB);
MI.eraseFromParent();
+ LLVM_DEBUG(dbgs() << "To: "; MIB.getInstr()->dump(););
}
static bool IsAnAddressOperand(const MachineOperand &MO) {
@@ -830,6 +834,7 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
const MachineOperand &MO = MI.getOperand(isCC ? 2 : 1);
bool RequiresBundling = STI->isTargetWindows() && IsAnAddressOperand(MO);
MachineInstrBuilder LO16, HI16;
+ LLVM_DEBUG(dbgs() << "Expanding: "; MI.dump());
if (!STI->hasV6T2Ops() &&
(Opcode == ARM::MOVi32imm || Opcode == ARM::MOVCCi32imm)) {
@@ -911,6 +916,8 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
LO16.add(makeImplicit(MI.getOperand(1)));
TransferImpOps(MI, LO16, HI16);
MI.eraseFromParent();
+ LLVM_DEBUG(dbgs() << "To: "; LO16.getInstr()->dump(););
+ LLVM_DEBUG(dbgs() << "And: "; HI16.getInstr()->dump(););
}
/// Expand a CMP_SWAP pseudo-inst to an ldrex/strex loop as simply as
@@ -1930,11 +1937,16 @@ bool ARMExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
TRI = STI->getRegisterInfo();
AFI = MF.getInfo<ARMFunctionInfo>();
+ LLVM_DEBUG(dbgs() << "********** ARM EXPAND PSEUDO INSTRUCTIONS **********\n"
+ << "********** Function: " << MF.getName() << '\n');
+
bool Modified = false;
for (MachineBasicBlock &MBB : MF)
Modified |= ExpandMBB(MBB);
if (VerifyARMPseudo)
MF.verify(this, "After expanding ARM pseudo instructions.");
+
+ LLVM_DEBUG(dbgs() << "***************************************************\n");
return Modified;
}
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index a50abfdbee44..6e274d269bf2 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -1,9 +1,8 @@
//===- ARMFastISel.cpp - ARM FastISel implementation ----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -245,8 +244,6 @@ class ARMFastISel final : public FastISel {
} // end anonymous namespace
-#include "ARMGenCallingConv.inc"
-
// DefinesOptionalPredicate - This is different from DefinesPredicate in that
// we don't care about implicit defs here, just places we'll need to add a
// default CCReg argument. Sets CPSR if we're setting CPSR instead of CCR.
@@ -444,7 +441,7 @@ unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, MVT VT) {
}
// Require VFP2 for loading fp constants.
- if (!Subtarget->hasVFP2()) return false;
+ if (!Subtarget->hasVFP2Base()) return false;
// MachineConstantPool wants an explicit alignment.
unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
@@ -500,7 +497,7 @@ unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) {
}
unsigned ResultReg = 0;
- if (Subtarget->useMovt(*FuncInfo.MF))
+ if (Subtarget->useMovt())
ResultReg = fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
if (ResultReg)
@@ -558,7 +555,7 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) {
bool IsPositionIndependent = isPositionIndependent();
// Use movw+movt when possible, it avoids constant pool entries.
// Non-darwin targets only support static movt relocations in FastISel.
- if (Subtarget->useMovt(*FuncInfo.MF) &&
+ if (Subtarget->useMovt() &&
(Subtarget->isTargetMachO() || !IsPositionIndependent)) {
unsigned Opc;
unsigned char TF = 0;
@@ -972,7 +969,7 @@ bool ARMFastISel::ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
break;
case MVT::f32:
- if (!Subtarget->hasVFP2()) return false;
+ if (!Subtarget->hasVFP2Base()) return false;
// Unaligned loads need special handling. Floats require word-alignment.
if (Alignment && Alignment < 4) {
needVMOV = true;
@@ -985,7 +982,8 @@ bool ARMFastISel::ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
}
break;
case MVT::f64:
- if (!Subtarget->hasVFP2()) return false;
+ // Can load and store double precision even without FeatureFP64
+ if (!Subtarget->hasVFP2Base()) return false;
// FIXME: Unaligned loads need special handling. Doublewords require
// word-alignment.
if (Alignment && Alignment < 4)
@@ -1110,7 +1108,7 @@ bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr,
}
break;
case MVT::f32:
- if (!Subtarget->hasVFP2()) return false;
+ if (!Subtarget->hasVFP2Base()) return false;
// Unaligned stores need special handling. Floats require word-alignment.
if (Alignment && Alignment < 4) {
unsigned MoveReg = createResultReg(TLI.getRegClassFor(MVT::i32));
@@ -1125,7 +1123,8 @@ bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr,
}
break;
case MVT::f64:
- if (!Subtarget->hasVFP2()) return false;
+ // Can load and store double precision even without FeatureFP64
+ if (!Subtarget->hasVFP2Base()) return false;
// FIXME: Unaligned stores need special handling. Doublewords require
// word-alignment.
if (Alignment && Alignment < 4)
@@ -1356,10 +1355,10 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
if (!SrcEVT.isSimple()) return false;
MVT SrcVT = SrcEVT.getSimpleVT();
- if (Ty->isFloatTy() && !Subtarget->hasVFP2())
+ if (Ty->isFloatTy() && !Subtarget->hasVFP2Base())
return false;
- if (Ty->isDoubleTy() && (!Subtarget->hasVFP2() || Subtarget->isFPOnlySP()))
+ if (Ty->isDoubleTy() && (!Subtarget->hasVFP2Base() || !Subtarget->hasFP64()))
return false;
// Check to see if the 2nd operand is a constant that we can encode directly
@@ -1509,7 +1508,7 @@ bool ARMFastISel::SelectCmp(const Instruction *I) {
bool ARMFastISel::SelectFPExt(const Instruction *I) {
// Make sure we have VFP and that we're extending float to double.
- if (!Subtarget->hasVFP2() || Subtarget->isFPOnlySP()) return false;
+ if (!Subtarget->hasVFP2Base() || !Subtarget->hasFP64()) return false;
Value *V = I->getOperand(0);
if (!I->getType()->isDoubleTy() ||
@@ -1528,7 +1527,7 @@ bool ARMFastISel::SelectFPExt(const Instruction *I) {
bool ARMFastISel::SelectFPTrunc(const Instruction *I) {
// Make sure we have VFP and that we're truncating double to float.
- if (!Subtarget->hasVFP2() || Subtarget->isFPOnlySP()) return false;
+ if (!Subtarget->hasVFP2Base() || !Subtarget->hasFP64()) return false;
Value *V = I->getOperand(0);
if (!(I->getType()->isFloatTy() &&
@@ -1547,7 +1546,7 @@ bool ARMFastISel::SelectFPTrunc(const Instruction *I) {
bool ARMFastISel::SelectIToFP(const Instruction *I, bool isSigned) {
// Make sure we have VFP.
- if (!Subtarget->hasVFP2()) return false;
+ if (!Subtarget->hasVFP2Base()) return false;
MVT DstVT;
Type *Ty = I->getType();
@@ -1579,7 +1578,7 @@ bool ARMFastISel::SelectIToFP(const Instruction *I, bool isSigned) {
unsigned Opc;
if (Ty->isFloatTy()) Opc = isSigned ? ARM::VSITOS : ARM::VUITOS;
- else if (Ty->isDoubleTy() && !Subtarget->isFPOnlySP())
+ else if (Ty->isDoubleTy() && Subtarget->hasFP64())
Opc = isSigned ? ARM::VSITOD : ARM::VUITOD;
else return false;
@@ -1592,7 +1591,7 @@ bool ARMFastISel::SelectIToFP(const Instruction *I, bool isSigned) {
bool ARMFastISel::SelectFPToI(const Instruction *I, bool isSigned) {
// Make sure we have VFP.
- if (!Subtarget->hasVFP2()) return false;
+ if (!Subtarget->hasVFP2Base()) return false;
MVT DstVT;
Type *RetTy = I->getType();
@@ -1605,7 +1604,7 @@ bool ARMFastISel::SelectFPToI(const Instruction *I, bool isSigned) {
unsigned Opc;
Type *OpTy = I->getOperand(0)->getType();
if (OpTy->isFloatTy()) Opc = isSigned ? ARM::VTOSIZS : ARM::VTOUIZS;
- else if (OpTy->isDoubleTy() && !Subtarget->isFPOnlySP())
+ else if (OpTy->isDoubleTy() && Subtarget->hasFP64())
Opc = isSigned ? ARM::VTOSIZD : ARM::VTOUIZD;
else return false;
@@ -1811,9 +1810,9 @@ bool ARMFastISel::SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode) {
// if we have them.
// FIXME: It'd be nice to use NEON instructions.
Type *Ty = I->getType();
- if (Ty->isFloatTy() && !Subtarget->hasVFP2())
+ if (Ty->isFloatTy() && !Subtarget->hasVFP2Base())
return false;
- if (Ty->isDoubleTy() && (!Subtarget->hasVFP2() || Subtarget->isFPOnlySP()))
+ if (Ty->isDoubleTy() && (!Subtarget->hasVFP2Base() || !Subtarget->hasFP64()))
return false;
unsigned Opc;
@@ -1855,7 +1854,7 @@ CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC,
default:
report_fatal_error("Unsupported calling convention");
case CallingConv::Fast:
- if (Subtarget->hasVFP2() && !isVarArg) {
+ if (Subtarget->hasVFP2Base() && !isVarArg) {
if (!Subtarget->isAAPCS_ABI())
return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
// For AAPCS ABI targets, just use VFP variant of the calling convention.
@@ -1866,7 +1865,7 @@ CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC,
case CallingConv::CXX_FAST_TLS:
// Use target triple & subtarget features to do actual dispatch.
if (Subtarget->isAAPCS_ABI()) {
- if (Subtarget->hasVFP2() &&
+ if (Subtarget->hasVFP2Base() &&
TM.Options.FloatABIType == FloatABI::Hard && !isVarArg)
return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
else
@@ -1935,11 +1934,11 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
case MVT::i32:
break;
case MVT::f32:
- if (!Subtarget->hasVFP2())
+ if (!Subtarget->hasVFP2Base())
return false;
break;
case MVT::f64:
- if (!Subtarget->hasVFP2())
+ if (!Subtarget->hasVFP2Base())
return false;
break;
}
diff --git a/lib/Target/ARM/ARMFeatures.h b/lib/Target/ARM/ARMFeatures.h
index 8c0df4c2cbf9..5cd7006c22fc 100644
--- a/lib/Target/ARM/ARMFeatures.h
+++ b/lib/Target/ARM/ARMFeatures.h
@@ -1,9 +1,8 @@
//===-- ARMFeatures.h - Checks for ARM instruction features -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index a9d87ced31f3..bedb779bcba0 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -1,9 +1,8 @@
//===- ARMFrameLowering.cpp - ARM Frame Information -----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -30,6 +29,7 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -344,6 +344,10 @@ static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI,
/// as assignCalleeSavedSpillSlots() hasn't run at this point. Instead we use
/// this to produce a conservative estimate that we check in an assert() later.
static int getMaxFPOffset(const Function &F, const ARMFunctionInfo &AFI) {
+ // For Thumb1, push.w isn't available, so the first push will always push
+ // r7 and lr onto the stack first.
+ if (AFI.isThumb1OnlyFunction())
+ return -AFI.getArgRegsSaveSize() - (2 * 4);
// This is a conservative estimation: Assume the frame pointer being r7 and
// pc("r15") up to r8 getting spilled before (= 8 registers).
return -AFI.getArgRegsSaveSize() - (8 * 4);
@@ -954,8 +958,12 @@ ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
}
}
// Use the base pointer if we have one.
- if (RegInfo->hasBasePointer(MF))
+ // FIXME: Maybe prefer sp on Thumb1 if it's legal and the offset is cheaper?
+ // That can happen if we forced a base pointer for a large call frame.
+ if (RegInfo->hasBasePointer(MF)) {
FrameReg = RegInfo->getBaseRegister();
+ Offset -= SPAdj;
+ }
return Offset;
}
@@ -1476,13 +1484,17 @@ bool ARMFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
}
// FIXME: Make generic?
-static unsigned GetFunctionSizeInBytes(const MachineFunction &MF,
- const ARMBaseInstrInfo &TII) {
+static unsigned EstimateFunctionSizeInBytes(const MachineFunction &MF,
+ const ARMBaseInstrInfo &TII) {
unsigned FnSize = 0;
for (auto &MBB : MF) {
for (auto &MI : MBB)
FnSize += TII.getInstSizeInBytes(MI);
}
+ if (MF.getJumpTableInfo())
+ for (auto &Table: MF.getJumpTableInfo()->getJumpTables())
+ FnSize += Table.MBBs.size() * 4;
+ FnSize += MF.getConstantPool()->getConstants().size() * 4;
return FnSize;
}
@@ -1726,7 +1738,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
bool ForceLRSpill = false;
if (!LRSpilled && AFI->isThumb1OnlyFunction()) {
- unsigned FnSize = GetFunctionSizeInBytes(MF, TII);
+ unsigned FnSize = EstimateFunctionSizeInBytes(MF, TII);
// Force LR to be spilled if the Thumb function size is > 2048. This enables
// use of BL to implement far jump. If it turns out that it's not needed
// then the branch fix up path will undo it.
@@ -1771,13 +1783,59 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
}
EstimatedStackSize += 16; // For possible paddings.
- unsigned EstimatedRSStackSizeLimit = estimateRSStackSizeLimit(MF, this);
+ unsigned EstimatedRSStackSizeLimit, EstimatedRSFixedSizeLimit;
+ if (AFI->isThumb1OnlyFunction()) {
+ // For Thumb1, don't bother to iterate over the function. The only
+ // instruction that requires an emergency spill slot is a store to a
+ // frame index.
+ //
+ // tSTRspi, which is used for sp-relative accesses, has an 8-bit unsigned
+ // immediate. tSTRi, which is used for bp- and fp-relative accesses, has
+ // a 5-bit unsigned immediate.
+ //
+ // We could try to check if the function actually contains a tSTRspi
+ // that might need the spill slot, but it's not really important.
+ // Functions with VLAs or extremely large call frames are rare, and
+ // if a function is allocating more than 1KB of stack, an extra 4-byte
+ // slot probably isn't relevant.
+ if (RegInfo->hasBasePointer(MF))
+ EstimatedRSStackSizeLimit = (1U << 5) * 4;
+ else
+ EstimatedRSStackSizeLimit = (1U << 8) * 4;
+ EstimatedRSFixedSizeLimit = (1U << 5) * 4;
+ } else {
+ EstimatedRSStackSizeLimit = estimateRSStackSizeLimit(MF, this);
+ EstimatedRSFixedSizeLimit = EstimatedRSStackSizeLimit;
+ }
+ // Final estimate of whether sp or bp-relative accesses might require
+ // scavenging.
+ bool HasLargeStack = EstimatedStackSize > EstimatedRSStackSizeLimit;
+
+ // If the stack pointer moves and we don't have a base pointer, the
+ // estimate logic doesn't work. The actual offsets might be larger when
+ // we're constructing a call frame, or we might need to use negative
+ // offsets from fp.
+ bool HasMovingSP = MFI.hasVarSizedObjects() ||
+ (MFI.adjustsStack() && !canSimplifyCallFramePseudos(MF));
+ bool HasBPOrFixedSP = RegInfo->hasBasePointer(MF) || !HasMovingSP;
+
+ // If we have a frame pointer, we assume arguments will be accessed
+ // relative to the frame pointer. Check whether fp-relative accesses to
+ // arguments require scavenging.
+ //
+ // We could do slightly better on Thumb1; in some cases, an sp-relative
+ // offset would be legal even though an fp-relative offset is not.
int MaxFPOffset = getMaxFPOffset(MF.getFunction(), *AFI);
- bool BigFrameOffsets = EstimatedStackSize >= EstimatedRSStackSizeLimit ||
- MFI.hasVarSizedObjects() ||
- (MFI.adjustsStack() && !canSimplifyCallFramePseudos(MF)) ||
- // For large argument stacks fp relative addressed may overflow.
- (HasFP && (MaxFixedOffset - MaxFPOffset) >= (int)EstimatedRSStackSizeLimit);
+ bool HasLargeArgumentList =
+ HasFP && (MaxFixedOffset - MaxFPOffset) > (int)EstimatedRSFixedSizeLimit;
+
+ bool BigFrameOffsets = HasLargeStack || !HasBPOrFixedSP ||
+ HasLargeArgumentList;
+ LLVM_DEBUG(dbgs() << "EstimatedLimit: " << EstimatedRSStackSizeLimit
+ << "; EstimatedStack" << EstimatedStackSize
+ << "; EstimatedFPStack" << MaxFixedOffset - MaxFPOffset
+ << "; BigFrameOffsets: " << BigFrameOffsets
+ << "\n");
if (BigFrameOffsets ||
!CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) {
AFI->setHasStackFrame(true);
@@ -1802,8 +1860,17 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
CS1Spilled = true;
}
- // This is true when we inserted a spill for an unused register that can now
- // be used for register scavenging.
+ // This is true when we inserted a spill for a callee-save GPR which is
+ // not otherwise used by the function. This guaranteees it is possible
+ // to scavenge a register to hold the address of a stack slot. On Thumb1,
+ // the register must be a valid operand to tSTRi, i.e. r4-r7. For other
+ // subtargets, this is any GPR, i.e. r4-r11 or lr.
+ //
+ // If we don't insert a spill, we instead allocate an emergency spill
+ // slot, which can be used by scavenging to spill an arbitrary register.
+ //
+ // We currently don't try to figure out whether any specific instruction
+ // requires scavening an additional register.
bool ExtraCSSpill = false;
if (AFI->isThumb1OnlyFunction()) {
@@ -1912,7 +1979,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
NumGPRSpills++;
CS1Spilled = true;
assert(!MRI.isReserved(Reg) && "Should not be reserved");
- if (!MRI.isPhysRegUsed(Reg))
+ if (Reg != ARM::LR && !MRI.isPhysRegUsed(Reg))
ExtraCSSpill = true;
UnspilledCS1GPRs.erase(llvm::find(UnspilledCS1GPRs, Reg));
if (Reg == ARM::LR)
@@ -1937,7 +2004,8 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
UnspilledCS1GPRs.erase(LRPos);
ForceLRSpill = false;
- if (!MRI.isReserved(ARM::LR) && !MRI.isPhysRegUsed(ARM::LR))
+ if (!MRI.isReserved(ARM::LR) && !MRI.isPhysRegUsed(ARM::LR) &&
+ !AFI->isThumb1OnlyFunction())
ExtraCSSpill = true;
}
@@ -1959,7 +2027,8 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
SavedRegs.set(Reg);
LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
<< " to make up alignment\n");
- if (!MRI.isReserved(Reg) && !MRI.isPhysRegUsed(Reg))
+ if (!MRI.isReserved(Reg) && !MRI.isPhysRegUsed(Reg) &&
+ !(Reg == ARM::LR && AFI->isThumb1OnlyFunction()))
ExtraCSSpill = true;
break;
}
@@ -1988,8 +2057,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
unsigned Reg = UnspilledCS1GPRs.back();
UnspilledCS1GPRs.pop_back();
if (!MRI.isReserved(Reg) &&
- (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg) ||
- Reg == ARM::LR)) {
+ (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg))) {
Extras.push_back(Reg);
NumExtras--;
}
@@ -2012,10 +2080,10 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
ExtraCSSpill = true;
}
}
- if (!ExtraCSSpill && !AFI->isThumb1OnlyFunction()) {
- // note: Thumb1 functions spill to R12, not the stack. Reserve a slot
- // closest to SP or frame pointer.
+ if (!ExtraCSSpill) {
+ // Reserve a slot closest to SP or frame pointer.
assert(RS && "Register scavenging not provided");
+ LLVM_DEBUG(dbgs() << "Reserving emergency spill slot\n");
const TargetRegisterClass &RC = ARM::GPRRegClass;
unsigned Size = TRI->getSpillSize(RC);
unsigned Align = TRI->getSpillAlignment(RC);
@@ -2028,6 +2096,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
SavedRegs.set(ARM::LR);
AFI->setLRIsSpilledForFarJump(true);
}
+ AFI->setLRIsSpilled(SavedRegs.test(ARM::LR));
}
MachineBasicBlock::iterator ARMFrameLowering::eliminateCallFramePseudoInstr(
diff --git a/lib/Target/ARM/ARMFrameLowering.h b/lib/Target/ARM/ARMFrameLowering.h
index 2f7e23840e75..7544ca3c38d6 100644
--- a/lib/Target/ARM/ARMFrameLowering.h
+++ b/lib/Target/ARM/ARMFrameLowering.h
@@ -1,9 +1,8 @@
//===- ARMTargetFrameLowering.h - Define frame lowering for ARM -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/ARMHazardRecognizer.cpp b/lib/Target/ARM/ARMHazardRecognizer.cpp
index d5dacbe08770..0fa32a0abeff 100644
--- a/lib/Target/ARM/ARMHazardRecognizer.cpp
+++ b/lib/Target/ARM/ARMHazardRecognizer.cpp
@@ -1,9 +1,8 @@
//===-- ARMHazardRecognizer.cpp - ARM postra hazard recognizer ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/ARMHazardRecognizer.h b/lib/Target/ARM/ARMHazardRecognizer.h
index ccf09db69937..b5ac694e01f7 100644
--- a/lib/Target/ARM/ARMHazardRecognizer.h
+++ b/lib/Target/ARM/ARMHazardRecognizer.h
@@ -1,9 +1,8 @@
//===-- ARMHazardRecognizer.h - ARM Hazard Recognizers ----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 8e0e82388251..b349627b67b1 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -1,9 +1,8 @@
//===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -120,8 +119,7 @@ public:
SDValue &Offset, SDValue &Opc);
bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
SDValue &Offset, SDValue &Opc);
- bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset,
- int Lwb, int Upb, bool FP16);
+ bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, bool FP16);
bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset);
bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset);
bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
@@ -131,6 +129,7 @@ public:
// Thumb Addressing Modes:
bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
+ bool SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, SDValue &Offset);
bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
SDValue &OffImm);
bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
@@ -147,6 +146,9 @@ public:
SDValue &OffImm);
bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
SDValue &OffImm);
+ template<unsigned Shift>
+ bool SelectT2AddrModeImm7(SDValue N, SDValue &Base,
+ SDValue &OffImm);
bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
SDValue &OffReg, SDValue &ShImm);
bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
@@ -452,8 +454,10 @@ unsigned ARMDAGToDAGISel::ConstantMaterializationCost(unsigned Val) const {
if (Subtarget->isThumb()) {
if (Val <= 255) return 1; // MOV
if (Subtarget->hasV6T2Ops() &&
- (Val <= 0xffff || ARM_AM::getT2SOImmValSplatVal(Val) != -1))
- return 1; // MOVW
+ (Val <= 0xffff || // MOV
+ ARM_AM::getT2SOImmVal(Val) != -1 || // MOVW
+ ARM_AM::getT2SOImmVal(~Val) != -1)) // MVN
+ return 1;
if (Val <= 510) return 2; // MOV + ADDi8
if (~Val <= 255) return 2; // MOV + MVN
if (ARM_AM::isThumbImmShiftedVal(Val)) return 2; // MOV + LSL
@@ -463,7 +467,7 @@ unsigned ARMDAGToDAGISel::ConstantMaterializationCost(unsigned Val) const {
if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW
if (ARM_AM::isSOImmTwoPartVal(Val)) return 2; // two instrs
}
- if (Subtarget->useMovt(*MF)) return 2; // MOVW + MOVT
+ if (Subtarget->useMovt()) return 2; // MOVW + MOVT
return 3; // Literal pool load
}
@@ -900,7 +904,7 @@ bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
}
bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset,
- int Lwb, int Upb, bool FP16) {
+ bool FP16) {
if (!CurDAG->isBaseWithConstantOffset(N)) {
Base = N;
if (N.getOpcode() == ISD::FrameIndex) {
@@ -922,7 +926,7 @@ bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offse
int RHSC;
const int Scale = FP16 ? 2 : 4;
- if (isScaledConstantInRange(N.getOperand(1), Scale, Lwb, Upb, RHSC)) {
+ if (isScaledConstantInRange(N.getOperand(1), Scale, -255, 256, RHSC)) {
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
@@ -960,16 +964,12 @@ bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offse
bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
SDValue &Base, SDValue &Offset) {
- int Lwb = -256 + 1;
- int Upb = 256;
- return IsAddressingMode5(N, Base, Offset, Lwb, Upb, /*FP16=*/ false);
+ return IsAddressingMode5(N, Base, Offset, /*FP16=*/ false);
}
bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N,
SDValue &Base, SDValue &Offset) {
- int Lwb = -512 + 1;
- int Upb = 512;
- return IsAddressingMode5(N, Base, Offset, Lwb, Upb, /*FP16=*/ true);
+ return IsAddressingMode5(N, Base, Offset, /*FP16=*/ true);
}
bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
@@ -1033,8 +1033,22 @@ bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
// Thumb Addressing Modes
//===----------------------------------------------------------------------===//
-bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N,
- SDValue &Base, SDValue &Offset){
+static bool shouldUseZeroOffsetLdSt(SDValue N) {
+ // Negative numbers are difficult to materialise in thumb1. If we are
+ // selecting the add of a negative, instead try to select ri with a zero
+ // offset, so create the add node directly which will become a sub.
+ if (N.getOpcode() != ISD::ADD)
+ return false;
+
+ // Look for an imm which is not legal for ld/st, but is legal for sub.
+ if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1)))
+ return C->getSExtValue() < 0 && C->getSExtValue() >= -255;
+
+ return false;
+}
+
+bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N, SDValue &Base,
+ SDValue &Offset) {
if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
if (!NC || !NC->isNullValue())
@@ -1049,9 +1063,22 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N,
return true;
}
+bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, SDValue &Base,
+ SDValue &Offset) {
+ if (shouldUseZeroOffsetLdSt(N))
+ return false; // Select ri instead
+ return SelectThumbAddrModeRRSext(N, Base, Offset);
+}
+
bool
ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
SDValue &Base, SDValue &OffImm) {
+ if (shouldUseZeroOffsetLdSt(N)) {
+ Base = N;
+ OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
+ return true;
+ }
+
if (!CurDAG->isBaseWithConstantOffset(N)) {
if (N.getOpcode() == ISD::ADD) {
return false; // We want to select register offset instead
@@ -1117,25 +1144,28 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
if (!CurDAG->isBaseWithConstantOffset(N))
return false;
- RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
- if (N.getOperand(0).getOpcode() == ISD::FrameIndex ||
- (LHSR && LHSR->getReg() == ARM::SP)) {
+ if (N.getOperand(0).getOpcode() == ISD::FrameIndex) {
// If the RHS is + imm8 * scale, fold into addr mode.
int RHSC;
if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
Base = N.getOperand(0);
- if (Base.getOpcode() == ISD::FrameIndex) {
- int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ // Make sure the offset is inside the object, or we might fail to
+ // allocate an emergency spill slot. (An out-of-range access is UB, but
+ // it could show up anyway.)
+ MachineFrameInfo &MFI = MF->getFrameInfo();
+ if (RHSC * 4 < MFI.getObjectSize(FI)) {
// For LHS+RHS to result in an offset that's a multiple of 4 the object
// indexed by the LHS must be 4-byte aligned.
- MachineFrameInfo &MFI = MF->getFrameInfo();
- if (MFI.getObjectAlignment(FI) < 4)
+ if (!MFI.isFixedObjectIndex(FI) && MFI.getObjectAlignment(FI) < 4)
MFI.setObjectAlignment(FI, 4);
- Base = CurDAG->getTargetFrameIndex(
- FI, TLI->getPointerTy(CurDAG->getDataLayout()));
+ if (MFI.getObjectAlignment(FI) >= 4) {
+ Base = CurDAG->getTargetFrameIndex(
+ FI, TLI->getPointerTy(CurDAG->getDataLayout()));
+ OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
+ return true;
+ }
}
- OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
- return true;
}
}
@@ -1248,6 +1278,35 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
return false;
}
+template<unsigned Shift>
+bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N,
+ SDValue &Base, SDValue &OffImm) {
+ if (N.getOpcode() == ISD::SUB ||
+ CurDAG->isBaseWithConstantOffset(N)) {
+ if (auto RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ int RHSC = (int)RHS->getZExtValue();
+ if (N.getOpcode() == ISD::SUB)
+ RHSC = -RHSC;
+
+ if (isShiftedInt<7, Shift>(RHSC)) {
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(
+ FI, TLI->getPointerTy(CurDAG->getDataLayout()));
+ }
+ OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
+ return true;
+ }
+ }
+ }
+
+ // Base only.
+ Base = N;
+ OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
+ return true;
+}
+
bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
SDValue &Base,
SDValue &OffReg, SDValue &ShImm) {
@@ -2072,10 +2131,12 @@ void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
default: llvm_unreachable("unhandled vld/vst lane type");
// Double-register operations:
case MVT::v8i8: OpcodeIndex = 0; break;
+ case MVT::v4f16:
case MVT::v4i16: OpcodeIndex = 1; break;
case MVT::v2f32:
case MVT::v2i32: OpcodeIndex = 2; break;
// Quad-register operations:
+ case MVT::v8f16:
case MVT::v8i16: OpcodeIndex = 0; break;
case MVT::v4f32:
case MVT::v4i32: OpcodeIndex = 1; break;
@@ -2192,7 +2253,10 @@ void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic,
case MVT::v8i8:
case MVT::v16i8: OpcodeIndex = 0; break;
case MVT::v4i16:
- case MVT::v8i16: OpcodeIndex = 1; break;
+ case MVT::v8i16:
+ case MVT::v4f16:
+ case MVT::v8f16:
+ OpcodeIndex = 1; break;
case MVT::v2f32:
case MVT::v2i32:
case MVT::v4f32:
@@ -2577,6 +2641,44 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
switch (N->getOpcode()) {
default: break;
+ case ISD::STORE: {
+ // For Thumb1, match an sp-relative store in C++. This is a little
+ // unfortunate, but I don't think I can make the chain check work
+ // otherwise. (The chain of the store has to be the same as the chain
+ // of the CopyFromReg, or else we can't replace the CopyFromReg with
+ // a direct reference to "SP".)
+ //
+ // This is only necessary on Thumb1 because Thumb1 sp-relative stores use
+ // a different addressing mode from other four-byte stores.
+ //
+ // This pattern usually comes up with call arguments.
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ SDValue Ptr = ST->getBasePtr();
+ if (Subtarget->isThumb1Only() && ST->isUnindexed()) {
+ int RHSC = 0;
+ if (Ptr.getOpcode() == ISD::ADD &&
+ isScaledConstantInRange(Ptr.getOperand(1), /*Scale=*/4, 0, 256, RHSC))
+ Ptr = Ptr.getOperand(0);
+
+ if (Ptr.getOpcode() == ISD::CopyFromReg &&
+ cast<RegisterSDNode>(Ptr.getOperand(1))->getReg() == ARM::SP &&
+ Ptr.getOperand(0) == ST->getChain()) {
+ SDValue Ops[] = {ST->getValue(),
+ CurDAG->getRegister(ARM::SP, MVT::i32),
+ CurDAG->getTargetConstant(RHSC, dl, MVT::i32),
+ getAL(CurDAG, dl),
+ CurDAG->getRegister(0, MVT::i32),
+ ST->getChain()};
+ MachineSDNode *ResNode =
+ CurDAG->getMachineNode(ARM::tSTRspi, dl, MVT::Other, Ops);
+ MachineMemOperand *MemOp = ST->getMemOperand();
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
+ ReplaceNode(N, ResNode);
+ return;
+ }
+ }
+ break;
+ }
case ISD::WRITE_REGISTER:
if (tryWriteRegister(N))
return;
@@ -2586,6 +2688,7 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
return;
break;
case ISD::INLINEASM:
+ case ISD::INLINEASM_BR:
if (tryInlineAsm(N))
return;
break;
@@ -2895,6 +2998,16 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
// Other cases are autogenerated.
break;
}
+ case ARMISD::WLS: {
+ SDValue Ops[] = { N->getOperand(1), // Loop count
+ N->getOperand(2), // Exit target
+ N->getOperand(0) };
+ SDNode *LoopStart =
+ CurDAG->getMachineNode(ARM::t2WhileLoopStart, dl, MVT::Other, Ops);
+ ReplaceUses(N, LoopStart);
+ CurDAG->RemoveDeadNode(N);
+ return;
+ }
case ARMISD::BRCOND: {
// Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
// Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
@@ -2922,6 +3035,36 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
unsigned CC = (unsigned) cast<ConstantSDNode>(N2)->getZExtValue();
if (InFlag.getOpcode() == ARMISD::CMPZ) {
+ if (InFlag.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) {
+ SDValue Int = InFlag.getOperand(0);
+ uint64_t ID = cast<ConstantSDNode>(Int->getOperand(1))->getZExtValue();
+
+ // Handle low-overhead loops.
+ if (ID == Intrinsic::loop_decrement_reg) {
+ SDValue Elements = Int.getOperand(2);
+ SDValue Size = CurDAG->getTargetConstant(
+ cast<ConstantSDNode>(Int.getOperand(3))->getZExtValue(), dl,
+ MVT::i32);
+
+ SDValue Args[] = { Elements, Size, Int.getOperand(0) };
+ SDNode *LoopDec =
+ CurDAG->getMachineNode(ARM::t2LoopDec, dl,
+ CurDAG->getVTList(MVT::i32, MVT::Other),
+ Args);
+ ReplaceUses(Int.getNode(), LoopDec);
+
+ SDValue EndArgs[] = { SDValue(LoopDec, 0), N1, Chain };
+ SDNode *LoopEnd =
+ CurDAG->getMachineNode(ARM::t2LoopEnd, dl, MVT::Other, EndArgs);
+
+ ReplaceUses(N, LoopEnd);
+ CurDAG->RemoveDeadNode(N);
+ CurDAG->RemoveDeadNode(InFlag.getNode());
+ CurDAG->RemoveDeadNode(Int.getNode());
+ return;
+ }
+ }
+
bool SwitchEQNEToPLMI;
SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
InFlag = N->getOperand(4);
@@ -3979,9 +4122,9 @@ bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){
// If an opcode was found then we can lower the read to a VFP instruction.
if (Opcode) {
- if (!Subtarget->hasVFP2())
+ if (!Subtarget->hasVFP2Base())
return false;
- if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8())
+ if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8Base())
return false;
Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
@@ -4090,7 +4233,7 @@ bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){
.Default(0);
if (Opcode) {
- if (!Subtarget->hasVFP2())
+ if (!Subtarget->hasVFP2Base())
return false;
Ops = { N->getOperand(2), getAL(CurDAG, DL),
CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
@@ -4290,7 +4433,7 @@ bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){
if (!Changed)
return false;
- SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N),
+ SDValue New = CurDAG->getNode(N->getOpcode(), SDLoc(N),
CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
New->setNodeId(-1);
ReplaceNode(N, New.getNode());
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 21de0f6a7630..18bb9bf3eccc 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -1,9 +1,8 @@
//===- ARMISelLowering.cpp - ARM DAG Lowering Implementation --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -80,6 +79,7 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
@@ -113,6 +113,7 @@
#include <vector>
using namespace llvm;
+using namespace llvm::PatternMatch;
#define DEBUG_TYPE "arm-isel"
@@ -220,6 +221,121 @@ void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
}
+void ARMTargetLowering::setAllExpand(MVT VT) {
+ for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
+ setOperationAction(Opc, VT, Expand);
+
+ // We support these really simple operations even on types where all
+ // the actual arithmetic has to be broken down into simpler
+ // operations or turned into library calls.
+ setOperationAction(ISD::BITCAST, VT, Legal);
+ setOperationAction(ISD::LOAD, VT, Legal);
+ setOperationAction(ISD::STORE, VT, Legal);
+ setOperationAction(ISD::UNDEF, VT, Legal);
+}
+
+void ARMTargetLowering::addAllExtLoads(const MVT From, const MVT To,
+ LegalizeAction Action) {
+ setLoadExtAction(ISD::EXTLOAD, From, To, Action);
+ setLoadExtAction(ISD::ZEXTLOAD, From, To, Action);
+ setLoadExtAction(ISD::SEXTLOAD, From, To, Action);
+}
+
+void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
+ const MVT IntTypes[] = { MVT::v16i8, MVT::v8i16, MVT::v4i32 };
+
+ for (auto VT : IntTypes) {
+ addRegisterClass(VT, &ARM::QPRRegClass);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ setOperationAction(ISD::SHL, VT, Custom);
+ setOperationAction(ISD::SRA, VT, Custom);
+ setOperationAction(ISD::SRL, VT, Custom);
+ setOperationAction(ISD::SMIN, VT, Legal);
+ setOperationAction(ISD::SMAX, VT, Legal);
+ setOperationAction(ISD::UMIN, VT, Legal);
+ setOperationAction(ISD::UMAX, VT, Legal);
+ setOperationAction(ISD::ABS, VT, Legal);
+
+ // No native support for these.
+ setOperationAction(ISD::UDIV, VT, Expand);
+ setOperationAction(ISD::SDIV, VT, Expand);
+ setOperationAction(ISD::UREM, VT, Expand);
+ setOperationAction(ISD::SREM, VT, Expand);
+
+ if (!HasMVEFP) {
+ setOperationAction(ISD::SINT_TO_FP, VT, Expand);
+ setOperationAction(ISD::UINT_TO_FP, VT, Expand);
+ setOperationAction(ISD::FP_TO_SINT, VT, Expand);
+ setOperationAction(ISD::FP_TO_UINT, VT, Expand);
+ }
+ }
+
+ const MVT FloatTypes[] = { MVT::v8f16, MVT::v4f32 };
+ for (auto VT : FloatTypes) {
+ addRegisterClass(VT, &ARM::QPRRegClass);
+ if (!HasMVEFP)
+ setAllExpand(VT);
+
+ // These are legal or custom whether we have MVE.fp or not
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT.getVectorElementType(), Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, VT.getVectorElementType(), Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal);
+
+ if (HasMVEFP) {
+ setOperationAction(ISD::FMINNUM, VT, Legal);
+ setOperationAction(ISD::FMAXNUM, VT, Legal);
+ setOperationAction(ISD::FROUND, VT, Legal);
+
+ // No native support for these.
+ setOperationAction(ISD::FDIV, VT, Expand);
+ setOperationAction(ISD::FREM, VT, Expand);
+ setOperationAction(ISD::FSQRT, VT, Expand);
+ setOperationAction(ISD::FSIN, VT, Expand);
+ setOperationAction(ISD::FCOS, VT, Expand);
+ setOperationAction(ISD::FPOW, VT, Expand);
+ setOperationAction(ISD::FLOG, VT, Expand);
+ setOperationAction(ISD::FLOG2, VT, Expand);
+ setOperationAction(ISD::FLOG10, VT, Expand);
+ setOperationAction(ISD::FEXP, VT, Expand);
+ setOperationAction(ISD::FEXP2, VT, Expand);
+ setOperationAction(ISD::FNEARBYINT, VT, Expand);
+ }
+ }
+
+ // We 'support' these types up to bitcast/load/store level, regardless of
+ // MVE integer-only / float support. Only doing FP data processing on the FP
+ // vector types is inhibited at integer-only level.
+ const MVT LongTypes[] = { MVT::v2i64, MVT::v2f64 };
+ for (auto VT : LongTypes) {
+ addRegisterClass(VT, &ARM::QPRRegClass);
+ setAllExpand(VT);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ }
+ // We can do bitwise operations on v2i64 vectors
+ setOperationAction(ISD::AND, MVT::v2i64, Legal);
+ setOperationAction(ISD::OR, MVT::v2i64, Legal);
+ setOperationAction(ISD::XOR, MVT::v2i64, Legal);
+
+ // It is legal to extload from v4i8 to v4i16 or v4i32.
+ addAllExtLoads(MVT::v8i16, MVT::v8i8, Legal);
+ addAllExtLoads(MVT::v4i32, MVT::v4i16, Legal);
+ addAllExtLoads(MVT::v4i32, MVT::v4i8, Legal);
+
+ // Some truncating stores are legal too.
+ setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
+ setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
+ setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
+}
+
ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
const ARMSubtarget &STI)
: TargetLowering(TM), Subtarget(&STI) {
@@ -240,7 +356,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
if (Subtarget->isTargetMachO()) {
// Uses VFP for Thumb libfuncs if available.
- if (Subtarget->isThumb() && Subtarget->hasVFP2() &&
+ if (Subtarget->isThumb() && Subtarget->hasVFP2Base() &&
Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) {
static const struct {
const RTLIB::Libcall Op;
@@ -509,10 +625,14 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
else
addRegisterClass(MVT::i32, &ARM::GPRRegClass);
- if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
- !Subtarget->isThumb1Only()) {
+ if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only() &&
+ Subtarget->hasFPRegs()) {
addRegisterClass(MVT::f32, &ARM::SPRRegClass);
addRegisterClass(MVT::f64, &ARM::DPRRegClass);
+ if (!Subtarget->hasVFP2Base())
+ setAllExpand(MVT::f32);
+ if (!Subtarget->hasFP64())
+ setAllExpand(MVT::f64);
}
if (Subtarget->hasFullFP16()) {
@@ -528,9 +648,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
for (MVT VT : MVT::vector_valuetypes()) {
for (MVT InnerVT : MVT::vector_valuetypes()) {
setTruncStoreAction(VT, InnerVT, Expand);
- setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
- setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
- setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
+ addAllExtLoads(VT, InnerVT, Expand);
}
setOperationAction(ISD::MULHS, VT, Expand);
@@ -547,6 +665,13 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::READ_REGISTER, MVT::i64, Custom);
setOperationAction(ISD::WRITE_REGISTER, MVT::i64, Custom);
+ if (Subtarget->hasMVEIntegerOps())
+ addMVEVectorTypes(Subtarget->hasMVEFloatOps());
+
+ // Combine low-overhead loop intrinsics so that we can lower i1 types.
+ if (Subtarget->hasLOB())
+ setTargetDAGCombine(ISD::BRCOND);
+
if (Subtarget->hasNEON()) {
addDRTypeForNEON(MVT::v2f32);
addDRTypeForNEON(MVT::v8i8);
@@ -565,11 +690,11 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
addQRTypeForNEON(MVT::v8f16);
addDRTypeForNEON(MVT::v4f16);
}
+ }
+ if (Subtarget->hasMVEIntegerOps() || Subtarget->hasNEON()) {
// v2f64 is legal so that QR subregs can be extracted as f64 elements, but
- // neither Neon nor VFP support any arithmetic operations on it.
- // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
- // supported for v4f32.
+ // none of Neon, MVE or VFP supports any arithmetic operations on it.
setOperationAction(ISD::FADD, MVT::v2f64, Expand);
setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
@@ -603,7 +728,11 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
setOperationAction(ISD::FMA, MVT::v2f64, Expand);
+ }
+ if (Subtarget->hasNEON()) {
+ // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
+ // supported for v4f32.
setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
setOperationAction(ISD::FSIN, MVT::v4f32, Expand);
setOperationAction(ISD::FCOS, MVT::v4f32, Expand);
@@ -697,7 +826,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i64, Custom);
// NEON only has FMA instructions as of VFP4.
- if (!Subtarget->hasVFP4()) {
+ if (!Subtarget->hasVFP4Base()) {
setOperationAction(ISD::FMA, MVT::v2f32, Expand);
setOperationAction(ISD::FMA, MVT::v4f32, Expand);
}
@@ -711,9 +840,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::SIGN_EXTEND);
setTargetDAGCombine(ISD::ZERO_EXTEND);
setTargetDAGCombine(ISD::ANY_EXTEND);
- setTargetDAGCombine(ISD::BUILD_VECTOR);
- setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
- setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
setTargetDAGCombine(ISD::STORE);
setTargetDAGCombine(ISD::FP_TO_SINT);
setTargetDAGCombine(ISD::FP_TO_UINT);
@@ -731,7 +857,13 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
}
}
- if (Subtarget->isFPOnlySP()) {
+ if (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) {
+ setTargetDAGCombine(ISD::BUILD_VECTOR);
+ setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
+ setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
+ }
+
+ if (!Subtarget->hasFP64()) {
// When targeting a floating-point unit with only single-precision
// operations, f64 is legal for the few double-precision instructions which
// are present However, no double-precision operations other than moves,
@@ -767,9 +899,19 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FP_TO_SINT, MVT::f64, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::f64, Custom);
setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
+ }
+
+ if (!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()){
setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom);
+ setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
}
+ if (!Subtarget->hasFP16())
+ setOperationAction(ISD::FP_EXTEND, MVT::f32, Custom);
+
+ if (!Subtarget->hasFP64())
+ setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
+
computeRegisterProperties(Subtarget->getRegisterInfo());
// ARM does not have floating-point extending loads.
@@ -832,6 +974,11 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SRA, MVT::i64, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
+ // MVE lowers 64 bit shifts to lsll and lsrl
+ // assuming that ISD::SRL and SRA of i64 are already marked custom
+ if (Subtarget->hasMVEIntegerOps())
+ setOperationAction(ISD::SHL, MVT::i64, Custom);
+
// Expand to __aeabi_l{lsl,lsr,asr} calls for Thumb1.
if (Subtarget->isThumb1Only()) {
setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
@@ -1029,7 +1176,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
}
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
- if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
+ if (!Subtarget->useSoftFloat() && Subtarget->hasFPRegs() &&
!Subtarget->isThumb1Only()) {
// Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
// iff target supports vfp2.
@@ -1079,7 +1226,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
setOperationAction(ISD::FREM, MVT::f64, Expand);
setOperationAction(ISD::FREM, MVT::f32, Expand);
- if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
+ if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2Base() &&
!Subtarget->isThumb1Only()) {
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
@@ -1087,7 +1234,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FPOW, MVT::f64, Expand);
setOperationAction(ISD::FPOW, MVT::f32, Expand);
- if (!Subtarget->hasVFP4()) {
+ if (!Subtarget->hasVFP4Base()) {
setOperationAction(ISD::FMA, MVT::f64, Expand);
setOperationAction(ISD::FMA, MVT::f32, Expand);
}
@@ -1095,7 +1242,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
// Various VFP goodness
if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) {
// FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.
- if (!Subtarget->hasFPARMv8() || Subtarget->isFPOnlySP()) {
+ if (!Subtarget->hasFPARMv8Base() || !Subtarget->hasFP64()) {
setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
}
@@ -1115,7 +1262,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
}
// FP-ARMv8 implements a lot of rounding-like FP operations.
- if (Subtarget->hasFPARMv8()) {
+ if (Subtarget->hasFPARMv8Base()) {
setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
setOperationAction(ISD::FCEIL, MVT::f32, Legal);
setOperationAction(ISD::FROUND, MVT::f32, Legal);
@@ -1124,12 +1271,14 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FRINT, MVT::f32, Legal);
setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
- setOperationAction(ISD::FMINNUM, MVT::v2f32, Legal);
- setOperationAction(ISD::FMAXNUM, MVT::v2f32, Legal);
- setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
- setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
+ if (Subtarget->hasNEON()) {
+ setOperationAction(ISD::FMINNUM, MVT::v2f32, Legal);
+ setOperationAction(ISD::FMAXNUM, MVT::v2f32, Legal);
+ setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
+ setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
+ }
- if (!Subtarget->isFPOnlySP()) {
+ if (Subtarget->hasFP64()) {
setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
setOperationAction(ISD::FCEIL, MVT::f64, Legal);
setOperationAction(ISD::FROUND, MVT::f64, Legal);
@@ -1141,6 +1290,24 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
}
}
+ // FP16 often need to be promoted to call lib functions
+ if (Subtarget->hasFullFP16()) {
+ setOperationAction(ISD::FREM, MVT::f16, Promote);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand);
+ setOperationAction(ISD::FSIN, MVT::f16, Promote);
+ setOperationAction(ISD::FCOS, MVT::f16, Promote);
+ setOperationAction(ISD::FSINCOS, MVT::f16, Promote);
+ setOperationAction(ISD::FPOWI, MVT::f16, Promote);
+ setOperationAction(ISD::FPOW, MVT::f16, Promote);
+ setOperationAction(ISD::FEXP, MVT::f16, Promote);
+ setOperationAction(ISD::FEXP2, MVT::f16, Promote);
+ setOperationAction(ISD::FLOG, MVT::f16, Promote);
+ setOperationAction(ISD::FLOG10, MVT::f16, Promote);
+ setOperationAction(ISD::FLOG2, MVT::f16, Promote);
+
+ setOperationAction(ISD::FROUND, MVT::f16, Legal);
+ }
+
if (Subtarget->hasNEON()) {
// vmin and vmax aren't available in a scalar form, so we use
// a NEON instruction with an undef lane instead.
@@ -1177,11 +1344,13 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
if (Subtarget->hasV6Ops())
setTargetDAGCombine(ISD::SRL);
+ if (Subtarget->isThumb1Only())
+ setTargetDAGCombine(ISD::SHL);
setStackPointerRegisterToSaveRestore(ARM::SP);
if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() ||
- !Subtarget->hasVFP2())
+ !Subtarget->hasVFP2Base() || Subtarget->hasMinSize())
setSchedulingPreference(Sched::RegPressure);
else
setSchedulingPreference(Sched::Hybrid);
@@ -1204,6 +1373,9 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setPrefLoopAlignment(Subtarget->getPrefLoopAlignment());
setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);
+
+ if (Subtarget->isThumb() || Subtarget->isThumb2())
+ setTargetDAGCombine(ISD::ABS);
}
bool ARMTargetLowering::useSoftFloat() const {
@@ -1288,6 +1460,10 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::SSAT: return "ARMISD::SSAT";
case ARMISD::USAT: return "ARMISD::USAT";
+ case ARMISD::ASRL: return "ARMISD::ASRL";
+ case ARMISD::LSRL: return "ARMISD::LSRL";
+ case ARMISD::LSLL: return "ARMISD::LSLL";
+
case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG";
case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG";
case ARMISD::RRX: return "ARMISD::RRX";
@@ -1332,23 +1508,25 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::VCGTU: return "ARMISD::VCGTU";
case ARMISD::VTST: return "ARMISD::VTST";
- case ARMISD::VSHL: return "ARMISD::VSHL";
- case ARMISD::VSHRs: return "ARMISD::VSHRs";
- case ARMISD::VSHRu: return "ARMISD::VSHRu";
- case ARMISD::VRSHRs: return "ARMISD::VRSHRs";
- case ARMISD::VRSHRu: return "ARMISD::VRSHRu";
- case ARMISD::VRSHRN: return "ARMISD::VRSHRN";
- case ARMISD::VQSHLs: return "ARMISD::VQSHLs";
- case ARMISD::VQSHLu: return "ARMISD::VQSHLu";
- case ARMISD::VQSHLsu: return "ARMISD::VQSHLsu";
- case ARMISD::VQSHRNs: return "ARMISD::VQSHRNs";
- case ARMISD::VQSHRNu: return "ARMISD::VQSHRNu";
- case ARMISD::VQSHRNsu: return "ARMISD::VQSHRNsu";
- case ARMISD::VQRSHRNs: return "ARMISD::VQRSHRNs";
- case ARMISD::VQRSHRNu: return "ARMISD::VQRSHRNu";
- case ARMISD::VQRSHRNsu: return "ARMISD::VQRSHRNsu";
- case ARMISD::VSLI: return "ARMISD::VSLI";
- case ARMISD::VSRI: return "ARMISD::VSRI";
+ case ARMISD::VSHLs: return "ARMISD::VSHLs";
+ case ARMISD::VSHLu: return "ARMISD::VSHLu";
+ case ARMISD::VSHLIMM: return "ARMISD::VSHLIMM";
+ case ARMISD::VSHRsIMM: return "ARMISD::VSHRsIMM";
+ case ARMISD::VSHRuIMM: return "ARMISD::VSHRuIMM";
+ case ARMISD::VRSHRsIMM: return "ARMISD::VRSHRsIMM";
+ case ARMISD::VRSHRuIMM: return "ARMISD::VRSHRuIMM";
+ case ARMISD::VRSHRNIMM: return "ARMISD::VRSHRNIMM";
+ case ARMISD::VQSHLsIMM: return "ARMISD::VQSHLsIMM";
+ case ARMISD::VQSHLuIMM: return "ARMISD::VQSHLuIMM";
+ case ARMISD::VQSHLsuIMM: return "ARMISD::VQSHLsuIMM";
+ case ARMISD::VQSHRNsIMM: return "ARMISD::VQSHRNsIMM";
+ case ARMISD::VQSHRNuIMM: return "ARMISD::VQSHRNuIMM";
+ case ARMISD::VQSHRNsuIMM: return "ARMISD::VQSHRNsuIMM";
+ case ARMISD::VQRSHRNsIMM: return "ARMISD::VQRSHRNsIMM";
+ case ARMISD::VQRSHRNuIMM: return "ARMISD::VQRSHRNuIMM";
+ case ARMISD::VQRSHRNsuIMM: return "ARMISD::VQRSHRNsuIMM";
+ case ARMISD::VSLIIMM: return "ARMISD::VSLIIMM";
+ case ARMISD::VSRIIMM: return "ARMISD::VSRIIMM";
case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu";
case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs";
case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM";
@@ -1410,6 +1588,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::VST2LN_UPD: return "ARMISD::VST2LN_UPD";
case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD";
case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD";
+ case ARMISD::WLS: return "ARMISD::WLS";
}
return nullptr;
}
@@ -1423,11 +1602,14 @@ EVT ARMTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
/// getRegClassFor - Return the register class that should be used for the
/// specified value type.
-const TargetRegisterClass *ARMTargetLowering::getRegClassFor(MVT VT) const {
+const TargetRegisterClass *
+ARMTargetLowering::getRegClassFor(MVT VT, bool isDivergent) const {
+ (void)isDivergent;
// Map v4i64 to QQ registers but do not make the type legal. Similarly map
// v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
- // load / store 4 to 8 consecutive D registers.
- if (Subtarget->hasNEON()) {
+ // load / store 4 to 8 consecutive NEON D registers, or 2 to 4 consecutive
+ // MVE Q registers.
+ if (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) {
if (VT == MVT::v4i64)
return &ARM::QQPRRegClass;
if (VT == MVT::v8i64)
@@ -1590,8 +1772,6 @@ static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
// Calling Convention Implementation
//===----------------------------------------------------------------------===//
-#include "ARMGenCallingConv.inc"
-
/// getEffectiveCallingConv - Get the effective calling convention, taking into
/// account presence of floating point hardware and calling convention
/// limitations, such as support for variadic functions.
@@ -1613,7 +1793,7 @@ ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
case CallingConv::C:
if (!Subtarget->isAAPCS_ABI())
return CallingConv::ARM_APCS;
- else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() &&
+ else if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() &&
getTargetMachine().Options.FloatABIType == FloatABI::Hard &&
!isVarArg)
return CallingConv::ARM_AAPCS_VFP;
@@ -1622,10 +1802,11 @@ ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
case CallingConv::Fast:
case CallingConv::CXX_FAST_TLS:
if (!Subtarget->isAAPCS_ABI()) {
- if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)
+ if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() && !isVarArg)
return CallingConv::Fast;
return CallingConv::ARM_APCS;
- } else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)
+ } else if (Subtarget->hasVFP2Base() &&
+ !Subtarget->isThumb1Only() && !isVarArg)
return CallingConv::ARM_AAPCS_VFP;
else
return CallingConv::ARM_AAPCS;
@@ -1807,29 +1988,42 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
bool isVarArg = CLI.IsVarArg;
MachineFunction &MF = DAG.getMachineFunction();
- bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
- bool isThisReturn = false;
- bool isSibCall = false;
+ bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
+ bool isThisReturn = false;
auto Attr = MF.getFunction().getFnAttribute("disable-tail-calls");
+ bool PreferIndirect = false;
// Disable tail calls if they're not supported.
if (!Subtarget->supportsTailCall() || Attr.getValueAsString() == "true")
isTailCall = false;
+ if (isa<GlobalAddressSDNode>(Callee)) {
+ // If we're optimizing for minimum size and the function is called three or
+ // more times in this block, we can improve codesize by calling indirectly
+ // as BLXr has a 16-bit encoding.
+ auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
+ if (CLI.CS) {
+ auto *BB = CLI.CS.getParent();
+ PreferIndirect = Subtarget->isThumb() && Subtarget->hasMinSize() &&
+ count_if(GV->users(), [&BB](const User *U) {
+ return isa<Instruction>(U) &&
+ cast<Instruction>(U)->getParent() == BB;
+ }) > 2;
+ }
+ }
if (isTailCall) {
// Check if it's really possible to do a tail call.
- isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
- isVarArg, isStructRet, MF.getFunction().hasStructRetAttr(),
- Outs, OutVals, Ins, DAG);
+ isTailCall = IsEligibleForTailCallOptimization(
+ Callee, CallConv, isVarArg, isStructRet,
+ MF.getFunction().hasStructRetAttr(), Outs, OutVals, Ins, DAG,
+ PreferIndirect);
if (!isTailCall && CLI.CS && CLI.CS.isMustTailCall())
report_fatal_error("failed to perform tail call elimination on a call "
"site marked musttail");
// We don't support GuaranteedTailCallOpt for ARM, only automatically
// detected sibcalls.
- if (isTailCall) {
+ if (isTailCall)
++NumTailCalls;
- isSibCall = true;
- }
}
// Analyze operands of the call, assigning locations to each operand.
@@ -1841,14 +2035,14 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Get a count of how many bytes are to be pushed on the stack.
unsigned NumBytes = CCInfo.getNextStackOffset();
- // For tail calls, memory operands are available in our caller's stack.
- if (isSibCall)
+ if (isTailCall) {
+ // For tail calls, memory operands are available in our caller's stack.
NumBytes = 0;
-
- // Adjust the stack pointer for the new arguments...
- // These operations are automatically eliminated by the prolog/epilog pass
- if (!isSibCall)
+ } else {
+ // Adjust the stack pointer for the new arguments...
+ // These operations are automatically eliminated by the prolog/epilog pass
Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
+ }
SDValue StackPtr =
DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout()));
@@ -1970,7 +2164,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
Ops));
}
- } else if (!isSibCall) {
+ } else if (!isTailCall) {
assert(VA.isMemLoc());
MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
@@ -1984,32 +2178,10 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Build a sequence of copy-to-reg nodes chained together with token chain
// and flag operands which copy the outgoing args into the appropriate regs.
SDValue InFlag;
- // Tail call byval lowering might overwrite argument registers so in case of
- // tail call optimization the copies to registers are lowered later.
- if (!isTailCall)
- for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
- Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
- RegsToPass[i].second, InFlag);
- InFlag = Chain.getValue(1);
- }
-
- // For tail calls lower the arguments to the 'real' stack slot.
- if (isTailCall) {
- // Force all the incoming stack arguments to be loaded from the stack
- // before any new outgoing arguments are stored to the stack, because the
- // outgoing stack slots may alias the incoming argument stack slots, and
- // the alias isn't otherwise explicit. This is slightly more conservative
- // than necessary, because it means that each store effectively depends
- // on every argument instead of just those arguments it would clobber.
-
- // Do not flag preceding copytoreg stuff together with the following stuff.
- InFlag = SDValue();
- for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
- Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
- RegsToPass[i].second, InFlag);
- InFlag = Chain.getValue(1);
- }
- InFlag = SDValue();
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
}
// If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
@@ -2064,17 +2236,6 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
}
} else if (isa<GlobalAddressSDNode>(Callee)) {
- // If we're optimizing for minimum size and the function is called three or
- // more times in this block, we can improve codesize by calling indirectly
- // as BLXr has a 16-bit encoding.
- auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
- auto *BB = CLI.CS.getParent();
- bool PreferIndirect =
- Subtarget->isThumb() && MF.getFunction().optForMinSize() &&
- count_if(GV->users(), [&BB](const User *U) {
- return isa<Instruction>(U) && cast<Instruction>(U)->getParent() == BB;
- }) > 2;
-
if (!PreferIndirect) {
isDirect = true;
bool isDef = GV->isStrongDefinitionForLinker();
@@ -2098,7 +2259,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
unsigned TargetFlags = GV->hasDLLImportStorageClass()
? ARMII::MO_DLLIMPORT
: ARMII::MO_NO_FLAG;
- Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, /*Offset=*/0,
+ Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, /*offset=*/0,
TargetFlags);
if (GV->hasDLLImportStorageClass())
Callee =
@@ -2142,7 +2303,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
CallOpc = ARMISD::CALL_NOLINK;
else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() &&
// Emit regular call when code size is the priority
- !MF.getFunction().optForMinSize())
+ !Subtarget->hasMinSize())
// "mov lr, pc; b _foo" to avoid confusing the RSP
CallOpc = ARMISD::CALL_NOLINK;
else
@@ -2306,28 +2467,25 @@ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
/// IsEligibleForTailCallOptimization - Check whether the call is eligible
/// for tail call optimization. Targets which want to do tail call
/// optimization should implement this function.
-bool
-ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
- CallingConv::ID CalleeCC,
- bool isVarArg,
- bool isCalleeStructRet,
- bool isCallerStructRet,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- SelectionDAG& DAG) const {
+bool ARMTargetLowering::IsEligibleForTailCallOptimization(
+ SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
+ bool isCalleeStructRet, bool isCallerStructRet,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG,
+ const bool isIndirect) const {
MachineFunction &MF = DAG.getMachineFunction();
const Function &CallerF = MF.getFunction();
CallingConv::ID CallerCC = CallerF.getCallingConv();
assert(Subtarget->supportsTailCall());
- // Tail calls to function pointers cannot be optimized for Thumb1 if the args
+ // Indirect tail calls cannot be optimized for Thumb1 if the args
// to the call take up r0-r3. The reason is that there are no legal registers
// left to hold the pointer to the function to be called.
if (Subtarget->isThumb1Only() && Outs.size() >= 4 &&
- !isa<GlobalAddressSDNode>(Callee.getNode()))
- return false;
+ (!isa<GlobalAddressSDNode>(Callee.getNode()) || isIndirect))
+ return false;
// Look for obvious safe cases to perform tail call optimization that do not
// require ABI changes. This is what gcc calls sibcall.
@@ -2756,7 +2914,7 @@ SDValue ARMTargetLowering::LowerConstantPool(SDValue Op,
auto M = const_cast<Module*>(DAG.getMachineFunction().
getFunction().getParent());
auto GV = new GlobalVariable(
- *M, T, /*isConst=*/true, GlobalVariable::InternalLinkage, C,
+ *M, T, /*isConstant=*/true, GlobalVariable::InternalLinkage, C,
Twine(DAG.getDataLayout().getPrivateGlobalPrefix()) + "CP" +
Twine(DAG.getMachineFunction().getFunctionNumber()) + "_" +
Twine(AFI->createPICLabelUId())
@@ -3225,7 +3383,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
} else if (Subtarget->isRWPI() && !IsRO) {
// SB-relative.
SDValue RelAddr;
- if (Subtarget->useMovt(DAG.getMachineFunction())) {
+ if (Subtarget->useMovt()) {
++NumMovwMovt;
SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_SBREL);
RelAddr = DAG.getNode(ARMISD::Wrapper, dl, PtrVT, G);
@@ -3245,7 +3403,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
// If we have T2 ops, we can materialize the address directly via movt/movw
// pair. This is always cheaper.
- if (Subtarget->useMovt(DAG.getMachineFunction())) {
+ if (Subtarget->useMovt()) {
++NumMovwMovt;
// FIXME: Once remat is capable of dealing with instructions with register
// operands, expand this into two nodes.
@@ -3268,7 +3426,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
SDLoc dl(Op);
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
- if (Subtarget->useMovt(DAG.getMachineFunction()))
+ if (Subtarget->useMovt())
++NumMovwMovt;
// FIXME: Once remat is capable of dealing with instructions with register
@@ -3288,7 +3446,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
SelectionDAG &DAG) const {
assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported");
- assert(Subtarget->useMovt(DAG.getMachineFunction()) &&
+ assert(Subtarget->useMovt() &&
"Windows on ARM expects to use movw/movt");
assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
"ROPI/RWPI not currently supported for Windows");
@@ -3309,7 +3467,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
// FIXME: Once remat is capable of dealing with instructions with register
// operands, expand this into two nodes.
Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT,
- DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*Offset=*/0,
+ DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*offset=*/0,
TargetFlags));
if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB))
Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
@@ -3615,7 +3773,8 @@ void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
// argument passed via stack.
int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr,
CCInfo.getInRegsParamsCount(),
- CCInfo.getNextStackOffset(), 4);
+ CCInfo.getNextStackOffset(),
+ std::max(4U, TotalArgRegsSaveSize));
AFI->setVarArgsFrameIndex(FrameIndex);
}
@@ -3891,6 +4050,22 @@ SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
}
ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
+
+ // If the RHS is a constant zero then the V (overflow) flag will never be
+ // set. This can allow us to simplify GE to PL or LT to MI, which can be
+ // simpler for other passes (like the peephole optimiser) to deal with.
+ if (isNullConstant(RHS)) {
+ switch (CondCode) {
+ default: break;
+ case ARMCC::GE:
+ CondCode = ARMCC::PL;
+ break;
+ case ARMCC::LT:
+ CondCode = ARMCC::MI;
+ break;
+ }
+ }
+
ARMISD::NodeType CompareType;
switch (CondCode) {
default:
@@ -3910,7 +4085,7 @@ SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS,
SelectionDAG &DAG, const SDLoc &dl,
bool InvalidOnQNaN) const {
- assert(!Subtarget->isFPOnlySP() || RHS.getValueType() != MVT::f64);
+ assert(Subtarget->hasFP64() || RHS.getValueType() != MVT::f64);
SDValue Cmp;
SDValue C = DAG.getConstant(InvalidOnQNaN, dl, MVT::i32);
if (!isFloatingPointZero(RHS))
@@ -4175,18 +4350,18 @@ static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
// Start by selecting the GE condition code for opcodes that return true for
// 'equality'
if (CC == ISD::SETUGE || CC == ISD::SETOGE || CC == ISD::SETOLE ||
- CC == ISD::SETULE)
+ CC == ISD::SETULE || CC == ISD::SETGE || CC == ISD::SETLE)
CondCode = ARMCC::GE;
// and GT for opcodes that return false for 'equality'.
else if (CC == ISD::SETUGT || CC == ISD::SETOGT || CC == ISD::SETOLT ||
- CC == ISD::SETULT)
+ CC == ISD::SETULT || CC == ISD::SETGT || CC == ISD::SETLT)
CondCode = ARMCC::GT;
// Since we are constrained to GE/GT, if the opcode contains 'less', we need
// to swap the compare operands.
if (CC == ISD::SETOLE || CC == ISD::SETULE || CC == ISD::SETOLT ||
- CC == ISD::SETULT)
+ CC == ISD::SETULT || CC == ISD::SETLE || CC == ISD::SETLT)
swpCmpOps = true;
// Both GT and GE are ordered comparisons, and return false for 'unordered'.
@@ -4212,8 +4387,9 @@ static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
}
// 'unordered or not equal' is 'anything but equal', so use the EQ condition
- // code and swap the VSEL operands.
- if (CC == ISD::SETUNE) {
+ // code and swap the VSEL operands. Also do this if we don't care about the
+ // unordered case.
+ if (CC == ISD::SETUNE || CC == ISD::SETNE) {
CondCode = ARMCC::EQ;
swpVselOps = true;
}
@@ -4222,7 +4398,7 @@ static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
SDValue ARMTargetLowering::getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal,
SDValue TrueVal, SDValue ARMcc, SDValue CCR,
SDValue Cmp, SelectionDAG &DAG) const {
- if (Subtarget->isFPOnlySP() && VT == MVT::f64) {
+ if (!Subtarget->hasFP64() && VT == MVT::f64) {
FalseVal = DAG.getNode(ARMISD::VMOVRRD, dl,
DAG.getVTList(MVT::i32, MVT::i32), FalseVal);
TrueVal = DAG.getNode(ARMISD::VMOVRRD, dl,
@@ -4428,6 +4604,16 @@ static bool isLowerSaturatingConditional(const SDValue &Op, SDValue &V,
return false;
}
+bool ARMTargetLowering::isUnsupportedFloatingType(EVT VT) const {
+ if (VT == MVT::f32)
+ return !Subtarget->hasVFP2Base();
+ if (VT == MVT::f64)
+ return !Subtarget->hasFP64();
+ if (VT == MVT::f16)
+ return !Subtarget->hasFullFP16();
+ return false;
+}
+
SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
SDLoc dl(Op);
@@ -4471,9 +4657,9 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue TrueVal = Op.getOperand(2);
SDValue FalseVal = Op.getOperand(3);
- if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) {
- DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC,
- dl);
+ if (isUnsupportedFloatingType(LHS.getValueType())) {
+ DAG.getTargetLoweringInfo().softenSetCCOperands(
+ DAG, LHS.getValueType(), LHS, RHS, CC, dl);
// If softenSetCCOperands only returned one value, we should compare it to
// zero.
@@ -4494,8 +4680,9 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
// inverting the compare condition, swapping 'less' and 'greater') and
// sometimes need to swap the operands to the VSEL (which inverts the
// condition in the sense of firing whenever the previous condition didn't)
- if (Subtarget->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
- TrueVal.getValueType() == MVT::f64)) {
+ if (Subtarget->hasFPARMv8Base() && (TrueVal.getValueType() == MVT::f16 ||
+ TrueVal.getValueType() == MVT::f32 ||
+ TrueVal.getValueType() == MVT::f64)) {
ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
if (CondCode == ARMCC::LT || CondCode == ARMCC::LE ||
CondCode == ARMCC::VC || CondCode == ARMCC::NE) {
@@ -4507,6 +4694,9 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue ARMcc;
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
+ // Choose GE over PL, which vsel does now support
+ if (cast<ConstantSDNode>(ARMcc)->getZExtValue() == ARMCC::PL)
+ ARMcc = DAG.getConstant(ARMCC::GE, dl, MVT::i32);
return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
}
@@ -4514,12 +4704,15 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
bool InvalidOnQNaN;
FPCCToARMCC(CC, CondCode, CondCode2, InvalidOnQNaN);
- // Normalize the fp compare. If RHS is zero we keep it there so we match
- // CMPFPw0 instead of CMPFP.
- if (Subtarget->hasFPARMv8() && !isFloatingPointZero(RHS) &&
- (TrueVal.getValueType() == MVT::f16 ||
- TrueVal.getValueType() == MVT::f32 ||
- TrueVal.getValueType() == MVT::f64)) {
+ // Normalize the fp compare. If RHS is zero we prefer to keep it there so we
+ // match CMPFPw0 instead of CMPFP, though we don't do this for f16 because we
+ // must use VSEL (limited condition codes), due to not having conditional f16
+ // moves.
+ if (Subtarget->hasFPARMv8Base() &&
+ !(isFloatingPointZero(RHS) && TrueVal.getValueType() != MVT::f16) &&
+ (TrueVal.getValueType() == MVT::f16 ||
+ TrueVal.getValueType() == MVT::f32 ||
+ TrueVal.getValueType() == MVT::f64)) {
bool swpCmpOps = false;
bool swpVselOps = false;
checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps);
@@ -4708,9 +4901,9 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue Dest = Op.getOperand(4);
SDLoc dl(Op);
- if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) {
- DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC,
- dl);
+ if (isUnsupportedFloatingType(LHS.getValueType())) {
+ DAG.getTargetLoweringInfo().softenSetCCOperands(
+ DAG, LHS.getValueType(), LHS, RHS, CC, dl);
// If softenSetCCOperands only returned one value, we should compare it to
// zero.
@@ -4855,7 +5048,7 @@ SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
if (VT.isVector())
return LowerVectorFP_TO_INT(Op, DAG);
- if (Subtarget->isFPOnlySP() && Op.getOperand(0).getValueType() == MVT::f64) {
+ if (isUnsupportedFloatingType(Op.getOperand(0).getValueType())) {
RTLIB::Libcall LC;
if (Op.getOpcode() == ISD::FP_TO_SINT)
LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(),
@@ -4919,7 +5112,7 @@ SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
if (VT.isVector())
return LowerVectorINT_TO_FP(Op, DAG);
- if (Subtarget->isFPOnlySP() && Op.getValueType() == MVT::f64) {
+ if (isUnsupportedFloatingType(VT)) {
RTLIB::Libcall LC;
if (Op.getOpcode() == ISD::SINT_TO_FP)
LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(),
@@ -4952,7 +5145,7 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
DAG.getTargetConstant(EncodedVal, dl, MVT::i32));
EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64;
if (VT == MVT::f64)
- Mask = DAG.getNode(ARMISD::VSHL, dl, OpVT,
+ Mask = DAG.getNode(ARMISD::VSHLIMM, dl, OpVT,
DAG.getNode(ISD::BITCAST, dl, OpVT, Mask),
DAG.getConstant(32, dl, MVT::i32));
else /*if (VT == MVT::f32)*/
@@ -4960,11 +5153,11 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
if (SrcVT == MVT::f32) {
Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1);
if (VT == MVT::f64)
- Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT,
+ Tmp1 = DAG.getNode(ARMISD::VSHLIMM, dl, OpVT,
DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1),
DAG.getConstant(32, dl, MVT::i32));
} else if (VT == MVT::f32)
- Tmp1 = DAG.getNode(ARMISD::VSHRu, dl, MVT::v1i64,
+ Tmp1 = DAG.getNode(ARMISD::VSHRuIMM, dl, MVT::v1i64,
DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1),
DAG.getConstant(32, dl, MVT::i32));
Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0);
@@ -5469,40 +5662,100 @@ static SDValue LowerCTPOP(SDNode *N, SelectionDAG &DAG,
return Res;
}
+/// Getvshiftimm - Check if this is a valid build_vector for the immediate
+/// operand of a vector shift operation, where all the elements of the
+/// build_vector must have the same constant integer value.
+static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
+ // Ignore bit_converts.
+ while (Op.getOpcode() == ISD::BITCAST)
+ Op = Op.getOperand(0);
+ BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
+ APInt SplatBits, SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ if (!BVN ||
+ !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
+ ElementBits) ||
+ SplatBitSize > ElementBits)
+ return false;
+ Cnt = SplatBits.getSExtValue();
+ return true;
+}
+
+/// isVShiftLImm - Check if this is a valid build_vector for the immediate
+/// operand of a vector shift left operation. That value must be in the range:
+/// 0 <= Value < ElementBits for a left shift; or
+/// 0 <= Value <= ElementBits for a long left shift.
+static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
+ assert(VT.isVector() && "vector shift count is not a vector type");
+ int64_t ElementBits = VT.getScalarSizeInBits();
+ if (!getVShiftImm(Op, ElementBits, Cnt))
+ return false;
+ return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits);
+}
+
+/// isVShiftRImm - Check if this is a valid build_vector for the immediate
+/// operand of a vector shift right operation. For a shift opcode, the value
+/// is positive, but for an intrinsic the value count must be negative. The
+/// absolute value must be in the range:
+/// 1 <= |Value| <= ElementBits for a right shift; or
+/// 1 <= |Value| <= ElementBits/2 for a narrow right shift.
+static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic,
+ int64_t &Cnt) {
+ assert(VT.isVector() && "vector shift count is not a vector type");
+ int64_t ElementBits = VT.getScalarSizeInBits();
+ if (!getVShiftImm(Op, ElementBits, Cnt))
+ return false;
+ if (!isIntrinsic)
+ return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits));
+ if (Cnt >= -(isNarrow ? ElementBits / 2 : ElementBits) && Cnt <= -1) {
+ Cnt = -Cnt;
+ return true;
+ }
+ return false;
+}
+
static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
const ARMSubtarget *ST) {
EVT VT = N->getValueType(0);
SDLoc dl(N);
+ int64_t Cnt;
if (!VT.isVector())
return SDValue();
- // Lower vector shifts on NEON to use VSHL.
- assert(ST->hasNEON() && "unexpected vector shift");
+ // We essentially have two forms here. Shift by an immediate and shift by a
+ // vector register (there are also shift by a gpr, but that is just handled
+ // with a tablegen pattern). We cannot easily match shift by an immediate in
+ // tablegen so we do that here and generate a VSHLIMM/VSHRsIMM/VSHRuIMM.
+ // For shifting by a vector, we don't have VSHR, only VSHL (which can be
+ // signed or unsigned, and a negative shift indicates a shift right).
+ if (N->getOpcode() == ISD::SHL) {
+ if (isVShiftLImm(N->getOperand(1), VT, false, Cnt))
+ return DAG.getNode(ARMISD::VSHLIMM, dl, VT, N->getOperand(0),
+ DAG.getConstant(Cnt, dl, MVT::i32));
+ return DAG.getNode(ARMISD::VSHLu, dl, VT, N->getOperand(0),
+ N->getOperand(1));
+ }
- // Left shifts translate directly to the vshiftu intrinsic.
- if (N->getOpcode() == ISD::SHL)
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::arm_neon_vshiftu, dl,
- MVT::i32),
- N->getOperand(0), N->getOperand(1));
+ assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
+ "unexpected vector shift opcode");
- assert((N->getOpcode() == ISD::SRA ||
- N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode");
+ if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
+ unsigned VShiftOpc =
+ (N->getOpcode() == ISD::SRA ? ARMISD::VSHRsIMM : ARMISD::VSHRuIMM);
+ return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0),
+ DAG.getConstant(Cnt, dl, MVT::i32));
+ }
- // NEON uses the same intrinsics for both left and right shifts. For
- // right shifts, the shift amounts are negative, so negate the vector of
- // shift amounts.
+ // Other right shifts we don't have operations for (we use a shift left by a
+ // negative number).
EVT ShiftVT = N->getOperand(1).getValueType();
- SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT,
- getZeroVector(ShiftVT, DAG, dl),
- N->getOperand(1));
- Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ?
- Intrinsic::arm_neon_vshifts :
- Intrinsic::arm_neon_vshiftu);
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(vshiftInt, dl, MVT::i32),
- N->getOperand(0), NegatedCount);
+ SDValue NegatedCount = DAG.getNode(
+ ISD::SUB, dl, ShiftVT, getZeroVector(ShiftVT, DAG, dl), N->getOperand(1));
+ unsigned VShiftOpc =
+ (N->getOpcode() == ISD::SRA ? ARMISD::VSHLs : ARMISD::VSHLu);
+ return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0), NegatedCount);
}
static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG,
@@ -5514,15 +5767,59 @@ static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG,
if (VT != MVT::i64)
return SDValue();
- assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
+ assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA ||
+ N->getOpcode() == ISD::SHL) &&
"Unknown shift to lower!");
+ unsigned ShOpc = N->getOpcode();
+ if (ST->hasMVEIntegerOps()) {
+ SDValue ShAmt = N->getOperand(1);
+ unsigned ShPartsOpc = ARMISD::LSLL;
+ ConstantSDNode *Con = dyn_cast<ConstantSDNode>(ShAmt);
+
+ // If the shift amount is greater than 32 then do the default optimisation
+ if (Con && Con->getZExtValue() > 32)
+ return SDValue();
+
+ // Extract the lower 32 bits of the shift amount if it's an i64
+ if (ShAmt->getValueType(0) == MVT::i64)
+ ShAmt = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, ShAmt,
+ DAG.getConstant(0, dl, MVT::i32));
+
+ if (ShOpc == ISD::SRL) {
+ if (!Con)
+ // There is no t2LSRLr instruction so negate and perform an lsll if the
+ // shift amount is in a register, emulating a right shift.
+ ShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
+ DAG.getConstant(0, dl, MVT::i32), ShAmt);
+ else
+ // Else generate an lsrl on the immediate shift amount
+ ShPartsOpc = ARMISD::LSRL;
+ } else if (ShOpc == ISD::SRA)
+ ShPartsOpc = ARMISD::ASRL;
+
+ // Lower 32 bits of the destination/source
+ SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
+ DAG.getConstant(0, dl, MVT::i32));
+ // Upper 32 bits of the destination/source
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
+ DAG.getConstant(1, dl, MVT::i32));
+
+ // Generate the shift operation as computed above
+ Lo = DAG.getNode(ShPartsOpc, dl, DAG.getVTList(MVT::i32, MVT::i32), Lo, Hi,
+ ShAmt);
+ // The upper 32 bits come from the second return value of lsll
+ Hi = SDValue(Lo.getNode(), 1);
+ return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
+ }
+
// We only lower SRA, SRL of 1 here, all others use generic lowering.
- if (!isOneConstant(N->getOperand(1)))
+ if (!isOneConstant(N->getOperand(1)) || N->getOpcode() == ISD::SHL)
return SDValue();
// If we are in thumb mode, we don't have RRX.
- if (ST->isThumb1Only()) return SDValue();
+ if (ST->isThumb1Only())
+ return SDValue();
// Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr.
SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
@@ -5731,7 +6028,7 @@ static SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) {
}
/// isNEONModifiedImm - Check if the specified splat value corresponds to a
-/// valid vector constant for a NEON instruction with a "modified immediate"
+/// valid vector constant for a NEON or MVE instruction with a "modified immediate"
/// operand (e.g., VMOV). If so, return the encoded value.
static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
unsigned SplatBitSize, SelectionDAG &DAG,
@@ -5817,6 +6114,10 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
break;
}
+ // cmode == 0b1101 is not supported for MVE VMVN
+ if (type == MVEVMVNModImm)
+ return SDValue();
+
if ((SplatBits & ~0xffffff) == 0 &&
((SplatBits | SplatUndef) & 0xffff) == 0xffff) {
// Value = 0x00nnffff: Op=x, Cmode=1101.
@@ -5902,12 +6203,12 @@ SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
}
}
- if (!ST->hasVFP3())
+ if (!ST->hasVFP3Base())
return SDValue();
// Use the default (constant pool) lowering for double constants when we have
// an SP-only FPU
- if (IsDouble && Subtarget->isFPOnlySP())
+ if (IsDouble && !Subtarget->hasFP64())
return SDValue();
// Try splatting with a VMOV.f32...
@@ -6383,13 +6684,15 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
if (SplatUndef.isAllOnesValue())
return DAG.getUNDEF(VT);
- if (SplatBitSize <= 64) {
+ if ((ST->hasNEON() && SplatBitSize <= 64) ||
+ (ST->hasMVEIntegerOps() && SplatBitSize <= 32)) {
// Check if an immediate VMOV works.
EVT VmovVT;
SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
SplatUndef.getZExtValue(), SplatBitSize,
DAG, dl, VmovVT, VT.is128BitVector(),
VMOVModImm);
+
if (Val.getNode()) {
SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val);
return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
@@ -6397,10 +6700,10 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
// Try an immediate VMVN.
uint64_t NegatedImm = (~SplatBits).getZExtValue();
- Val = isNEONModifiedImm(NegatedImm,
- SplatUndef.getZExtValue(), SplatBitSize,
- DAG, dl, VmovVT, VT.is128BitVector(),
- VMVNModImm);
+ Val = isNEONModifiedImm(
+ NegatedImm, SplatUndef.getZExtValue(), SplatBitSize,
+ DAG, dl, VmovVT, VT.is128BitVector(),
+ ST->hasMVEIntegerOps() ? MVEVMVNModImm : VMVNModImm);
if (Val.getNode()) {
SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
@@ -6515,10 +6818,13 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
}
if (VT.getVectorElementType().isFloatingPoint()) {
SmallVector<SDValue, 8> Ops;
+ MVT FVT = VT.getVectorElementType().getSimpleVT();
+ assert(FVT == MVT::f32 || FVT == MVT::f16);
+ MVT IVT = (FVT == MVT::f32) ? MVT::i32 : MVT::i16;
for (unsigned i = 0; i < NumElts; ++i)
- Ops.push_back(DAG.getNode(ISD::BITCAST, dl, MVT::i32,
+ Ops.push_back(DAG.getNode(ISD::BITCAST, dl, IVT,
Op.getOperand(i)));
- EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
+ EVT VecVT = EVT::getVectorVT(*DAG.getContext(), IVT, NumElts);
SDValue Val = DAG.getBuildVector(VecVT, dl, Ops);
Val = LowerBUILD_VECTOR(Val, DAG, ST);
if (Val.getNode())
@@ -6544,7 +6850,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
return shuffle;
}
- if (VT.is128BitVector() && VT != MVT::v2f64 && VT != MVT::v4f32) {
+ if (ST->hasNEON() && VT.is128BitVector() && VT != MVT::v2f64 && VT != MVT::v4f32) {
// If we haven't found an efficient lowering, try splitting a 128-bit vector
// into two 64-bit vectors; we might discover a better way to lower it.
SmallVector<SDValue, 64> Ops(Op->op_begin(), Op->op_begin() + NumElts);
@@ -6799,6 +7105,38 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
return DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
}
+enum ShuffleOpCodes {
+ OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
+ OP_VREV,
+ OP_VDUP0,
+ OP_VDUP1,
+ OP_VDUP2,
+ OP_VDUP3,
+ OP_VEXT1,
+ OP_VEXT2,
+ OP_VEXT3,
+ OP_VUZPL, // VUZP, left result
+ OP_VUZPR, // VUZP, right result
+ OP_VZIPL, // VZIP, left result
+ OP_VZIPR, // VZIP, right result
+ OP_VTRNL, // VTRN, left result
+ OP_VTRNR // VTRN, right result
+};
+
+static bool isLegalMVEShuffleOp(unsigned PFEntry) {
+ unsigned OpNum = (PFEntry >> 26) & 0x0F;
+ switch (OpNum) {
+ case OP_COPY:
+ case OP_VREV:
+ case OP_VDUP0:
+ case OP_VDUP1:
+ case OP_VDUP2:
+ case OP_VDUP3:
+ return true;
+ }
+ return false;
+}
+
/// isShuffleMaskLegal - Targets can use this to indicate that they only
/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
@@ -6820,7 +7158,7 @@ bool ARMTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
unsigned Cost = (PFEntry >> 30);
- if (Cost <= 4)
+ if (Cost <= 4 && (Subtarget->hasNEON() || isLegalMVEShuffleOp(PFEntry)))
return true;
}
@@ -6828,15 +7166,22 @@ bool ARMTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
unsigned Imm, WhichResult;
unsigned EltSize = VT.getScalarSizeInBits();
- return (EltSize >= 32 ||
- ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
- isVREVMask(M, VT, 64) ||
- isVREVMask(M, VT, 32) ||
- isVREVMask(M, VT, 16) ||
- isVEXTMask(M, VT, ReverseVEXT, Imm) ||
- isVTBLMask(M, VT) ||
- isNEONTwoResultShuffleMask(M, VT, WhichResult, isV_UNDEF) ||
- ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(M, VT)));
+ if (EltSize >= 32 ||
+ ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
+ isVREVMask(M, VT, 64) ||
+ isVREVMask(M, VT, 32) ||
+ isVREVMask(M, VT, 16))
+ return true;
+ else if (Subtarget->hasNEON() &&
+ (isVEXTMask(M, VT, ReverseVEXT, Imm) ||
+ isVTBLMask(M, VT) ||
+ isNEONTwoResultShuffleMask(M, VT, WhichResult, isV_UNDEF)))
+ return true;
+ else if (Subtarget->hasNEON() && (VT == MVT::v8i16 || VT == MVT::v16i8) &&
+ isReverseMask(M, VT))
+ return true;
+ else
+ return false;
}
/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
@@ -6848,24 +7193,6 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
- enum {
- OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
- OP_VREV,
- OP_VDUP0,
- OP_VDUP1,
- OP_VDUP2,
- OP_VDUP3,
- OP_VEXT1,
- OP_VEXT2,
- OP_VEXT3,
- OP_VUZPL, // VUZP, left result
- OP_VUZPR, // VUZP, right result
- OP_VZIPL, // VZIP, left result
- OP_VZIPR, // VZIP, right result
- OP_VTRNL, // VTRN, left result
- OP_VTRNR // VTRN, right result
- };
-
if (OpNum == OP_COPY) {
if (LHSID == (1*9+2)*9+3) return LHS;
assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
@@ -6955,7 +7282,8 @@ static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op,
DAG.getConstant(ExtractNum, DL, MVT::i32));
}
-static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
+static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
SDLoc dl(Op);
@@ -6999,9 +7327,9 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
DAG.getConstant(Lane, dl, MVT::i32));
}
- bool ReverseVEXT;
- unsigned Imm;
- if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
+ bool ReverseVEXT = false;
+ unsigned Imm = 0;
+ if (ST->hasNEON() && isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
if (ReverseVEXT)
std::swap(V1, V2);
return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2,
@@ -7015,7 +7343,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
if (isVREVMask(ShuffleMask, VT, 16))
return DAG.getNode(ARMISD::VREV16, dl, VT, V1);
- if (V2->isUndef() && isSingletonVEXTMask(ShuffleMask, VT, Imm)) {
+ if (ST->hasNEON() && V2->isUndef() && isSingletonVEXTMask(ShuffleMask, VT, Imm)) {
return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V1,
DAG.getConstant(Imm, dl, MVT::i32));
}
@@ -7025,14 +7353,16 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
// source operands and with masks corresponding to both results of one of
// these operations, DAG memoization will ensure that a single node is
// used for both shuffles.
- unsigned WhichResult;
- bool isV_UNDEF;
- if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask(
- ShuffleMask, VT, WhichResult, isV_UNDEF)) {
- if (isV_UNDEF)
- V2 = V1;
- return DAG.getNode(ShuffleOpc, dl, DAG.getVTList(VT, VT), V1, V2)
- .getValue(WhichResult);
+ unsigned WhichResult = 0;
+ bool isV_UNDEF = false;
+ if (ST->hasNEON()) {
+ if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask(
+ ShuffleMask, VT, WhichResult, isV_UNDEF)) {
+ if (isV_UNDEF)
+ V2 = V1;
+ return DAG.getNode(ShuffleOpc, dl, DAG.getVTList(VT, VT), V1, V2)
+ .getValue(WhichResult);
+ }
}
// Also check for these shuffles through CONCAT_VECTORS: we canonicalize
@@ -7050,7 +7380,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
// ->
// concat(VZIP(v1, v2):0, :1)
//
- if (V1->getOpcode() == ISD::CONCAT_VECTORS && V2->isUndef()) {
+ if (ST->hasNEON() && V1->getOpcode() == ISD::CONCAT_VECTORS && V2->isUndef()) {
SDValue SubV1 = V1->getOperand(0);
SDValue SubV2 = V1->getOperand(1);
EVT SubVT = SubV1.getValueType();
@@ -7092,8 +7422,18 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
unsigned Cost = (PFEntry >> 30);
- if (Cost <= 4)
- return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
+ if (Cost <= 4) {
+ if (ST->hasNEON())
+ return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
+ else if (isLegalMVEShuffleOp(PFEntry)) {
+ unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
+ unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
+ unsigned PFEntryLHS = PerfectShuffleTable[LHSID];
+ unsigned PFEntryRHS = PerfectShuffleTable[RHSID];
+ if (isLegalMVEShuffleOp(PFEntryLHS) && isLegalMVEShuffleOp(PFEntryRHS))
+ return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
+ }
+ }
}
// Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs.
@@ -7118,22 +7458,50 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(ISD::BITCAST, dl, VT, Val);
}
- if ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(ShuffleMask, VT))
+ if (ST->hasNEON() && (VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(ShuffleMask, VT))
return LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(Op, DAG);
- if (VT == MVT::v8i8)
+ if (ST->hasNEON() && VT == MVT::v8i8)
if (SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG))
return NewOp;
return SDValue();
}
-static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
+SDValue ARMTargetLowering::
+LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
// INSERT_VECTOR_ELT is legal only for immediate indexes.
SDValue Lane = Op.getOperand(2);
if (!isa<ConstantSDNode>(Lane))
return SDValue();
+ SDValue Elt = Op.getOperand(1);
+ EVT EltVT = Elt.getValueType();
+ if (getTypeAction(*DAG.getContext(), EltVT) ==
+ TargetLowering::TypePromoteFloat) {
+ // INSERT_VECTOR_ELT doesn't want f16 operands promoting to f32,
+ // but the type system will try to do that if we don't intervene.
+ // Reinterpret any such vector-element insertion as one with the
+ // corresponding integer types.
+
+ SDLoc dl(Op);
+
+ EVT IEltVT = MVT::getIntegerVT(EltVT.getScalarSizeInBits());
+ assert(getTypeAction(*DAG.getContext(), IEltVT) !=
+ TargetLowering::TypePromoteFloat);
+
+ SDValue VecIn = Op.getOperand(0);
+ EVT VecVT = VecIn.getValueType();
+ EVT IVecVT = EVT::getVectorVT(*DAG.getContext(), IEltVT,
+ VecVT.getVectorNumElements());
+
+ SDValue IElt = DAG.getNode(ISD::BITCAST, dl, IEltVT, Elt);
+ SDValue IVecIn = DAG.getNode(ISD::BITCAST, dl, IVecVT, VecIn);
+ SDValue IVecOut = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, IVecVT,
+ IVecIn, IElt, Lane);
+ return DAG.getNode(ISD::BITCAST, dl, VecVT, IVecOut);
+ }
+
return Op;
}
@@ -7809,8 +8177,7 @@ ARMTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
return SDValue();
const auto &ST = static_cast<const ARMSubtarget&>(DAG.getSubtarget());
- const auto &MF = DAG.getMachineFunction();
- const bool MinSize = MF.getFunction().optForMinSize();
+ const bool MinSize = ST.hasMinSize();
const bool HasDivide = ST.isThumb() ? ST.hasDivideInThumbMode()
: ST.hasDivideInARMMode();
@@ -8063,7 +8430,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SETCCCARRY: return LowerSETCCCARRY(Op, DAG);
case ISD::ConstantFP: return LowerConstantFP(Op, DAG, Subtarget);
case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget);
- case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
+ case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
@@ -8149,6 +8516,7 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
break;
case ISD::SRL:
case ISD::SRA:
+ case ISD::SHL:
Res = Expand64BitShift(N, DAG, Subtarget);
break;
case ISD::SREM:
@@ -8175,6 +8543,10 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
return;
case ISD::INTRINSIC_WO_CHAIN:
return ReplaceLongIntrinsic(N, Results, DAG);
+ case ISD::ABS:
+ lowerABS(N, Results, DAG);
+ return ;
+
}
if (Res.getNode())
Results.push_back(Res);
@@ -8980,7 +9352,7 @@ ARMTargetLowering::EmitStructByval(MachineInstr &MI,
// Load an immediate to varEnd.
unsigned varEnd = MRI.createVirtualRegister(TRC);
- if (Subtarget->useMovt(*MF)) {
+ if (Subtarget->useMovt()) {
unsigned Vtmp = varEnd;
if ((LoopSize & 0xFFFF0000) != 0)
Vtmp = MRI.createVirtualRegister(TRC);
@@ -9003,18 +9375,23 @@ ARMTargetLowering::EmitStructByval(MachineInstr &MI,
if (Align == 0)
Align = MF->getDataLayout().getTypeAllocSize(C->getType());
unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
+ MachineMemOperand *CPMMO =
+ MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(*MF),
+ MachineMemOperand::MOLoad, 4, 4);
if (IsThumb)
BuildMI(*BB, MI, dl, TII->get(ARM::tLDRpci))
.addReg(varEnd, RegState::Define)
.addConstantPoolIndex(Idx)
- .add(predOps(ARMCC::AL));
+ .add(predOps(ARMCC::AL))
+ .addMemOperand(CPMMO);
else
BuildMI(*BB, MI, dl, TII->get(ARM::LDRcp))
.addReg(varEnd, RegState::Define)
.addConstantPoolIndex(Idx)
.addImm(0)
- .add(predOps(ARMCC::AL));
+ .add(predOps(ARMCC::AL))
+ .addMemOperand(CPMMO);
}
BB->addSuccessor(loopMBB);
@@ -9262,7 +9639,8 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
.add(MI.getOperand(2)) // Rn
.add(MI.getOperand(3)) // PredImm
.add(MI.getOperand(4)) // PredReg
- .add(MI.getOperand(0)); // Rt
+ .add(MI.getOperand(0)) // Rt
+ .cloneMemRefs(MI);
MI.eraseFromParent();
return BB;
}
@@ -10372,6 +10750,22 @@ static SDValue PerformAddeSubeCombine(SDNode *N,
return SDValue();
}
+static SDValue PerformABSCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+ SDValue res;
+ SelectionDAG &DAG = DCI.DAG;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ if (TLI.isOperationLegal(N->getOpcode(), N->getValueType(0)))
+ return SDValue();
+
+ if (!TLI.expandABS(N, res, DAG))
+ return SDValue();
+
+ return res;
+}
+
/// PerformADDECombine - Target-specific dag combine transform from
/// ARMISD::ADDC, ARMISD::ADDE, and ISD::MUL_LOHI to MLAL or
/// ARMISD::ADDC, ARMISD::ADDE and ARMISD::UMLAL to ARMISD::UMAAL
@@ -10419,11 +10813,28 @@ ARMTargetLowering::isDesirableToCommuteWithShift(const SDNode *N,
if (Level == BeforeLegalizeTypes)
return true;
- if (Subtarget->isThumb() && Subtarget->isThumb1Only())
+ if (N->getOpcode() != ISD::SHL)
return true;
- if (N->getOpcode() != ISD::SHL)
+ if (Subtarget->isThumb1Only()) {
+ // Avoid making expensive immediates by commuting shifts. (This logic
+ // only applies to Thumb1 because ARM and Thumb2 immediates can be shifted
+ // for free.)
+ if (N->getOpcode() != ISD::SHL)
+ return true;
+ SDValue N1 = N->getOperand(0);
+ if (N1->getOpcode() != ISD::ADD && N1->getOpcode() != ISD::AND &&
+ N1->getOpcode() != ISD::OR && N1->getOpcode() != ISD::XOR)
+ return true;
+ if (auto *Const = dyn_cast<ConstantSDNode>(N1->getOperand(1))) {
+ if (Const->getAPIntValue().ult(256))
+ return false;
+ if (N1->getOpcode() == ISD::ADD && Const->getAPIntValue().slt(0) &&
+ Const->getAPIntValue().sgt(-256))
+ return false;
+ }
return true;
+ }
// Turn off commute-with-shift transform after legalization, so it doesn't
// conflict with PerformSHLSimplify. (We could try to detect when
@@ -10432,9 +10843,8 @@ ARMTargetLowering::isDesirableToCommuteWithShift(const SDNode *N,
return false;
}
-bool
-ARMTargetLowering::shouldFoldShiftPairToMask(const SDNode *N,
- CombineLevel Level) const {
+bool ARMTargetLowering::shouldFoldConstantShiftPairToMask(
+ const SDNode *N, CombineLevel Level) const {
if (!Subtarget->isThumb1Only())
return true;
@@ -10444,6 +10854,15 @@ ARMTargetLowering::shouldFoldShiftPairToMask(const SDNode *N,
return false;
}
+bool ARMTargetLowering::preferIncOfAddToSubOfNot(EVT VT) const {
+ if (!Subtarget->hasNEON()) {
+ if (Subtarget->isThumb1Only())
+ return VT.getScalarSizeInBits() <= 32;
+ return true;
+ }
+ return VT.isScalarInteger();
+}
+
static SDValue PerformSHLSimplify(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *ST) {
@@ -10830,7 +11249,7 @@ static SDValue PerformANDCombine(SDNode *N,
APInt SplatBits, SplatUndef;
unsigned SplatBitSize;
bool HasAnyUndefs;
- if (BVN &&
+ if (BVN && Subtarget->hasNEON() &&
BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
if (SplatBitSize <= 64) {
EVT VbicVT;
@@ -11308,7 +11727,7 @@ static SDValue PerformVMOVRRDCombine(SDNode *N,
const ARMSubtarget *Subtarget) {
// vmovrrd(vmovdrr x, y) -> x,y
SDValue InDouble = N->getOperand(0);
- if (InDouble.getOpcode() == ARMISD::VMOVDRR && !Subtarget->isFPOnlySP())
+ if (InDouble.getOpcode() == ARMISD::VMOVDRR && Subtarget->hasFP64())
return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1));
// vmovrrd(load f64) -> (load i32), (load i32)
@@ -11329,9 +11748,11 @@ static SDValue PerformVMOVRRDCombine(SDNode *N,
SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
DAG.getConstant(4, DL, MVT::i32));
- SDValue NewLD2 = DAG.getLoad(
- MVT::i32, DL, NewLD1.getValue(1), OffsetPtr, LD->getPointerInfo(),
- std::min(4U, LD->getAlignment() / 2), LD->getMemOperand()->getFlags());
+
+ SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, LD->getChain(), OffsetPtr,
+ LD->getPointerInfo().getWithOffset(4),
+ std::min(4U, LD->getAlignment()),
+ LD->getMemOperand()->getFlags());
DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1));
if (DCI.DAG.getDataLayout().isBigEndian())
@@ -11922,10 +12343,14 @@ static SDValue PerformVDUPLANECombine(SDNode *N,
/// PerformVDUPCombine - Target-specific dag combine xforms for ARMISD::VDUP.
static SDValue PerformVDUPCombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI) {
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
SelectionDAG &DAG = DCI.DAG;
SDValue Op = N->getOperand(0);
+ if (!Subtarget->hasNEON())
+ return SDValue();
+
// Match VDUP(LOAD) -> VLD1DUP.
// We match this pattern here rather than waiting for isel because the
// transform is only legal for unindexed loads.
@@ -12132,11 +12557,11 @@ static SDValue PerformVCVTCombine(SDNode *N, SelectionDAG &DAG,
MVT IntTy = N->getSimpleValueType(0).getVectorElementType();
uint32_t IntBits = IntTy.getSizeInBits();
unsigned NumLanes = Op.getValueType().getVectorNumElements();
- if (FloatBits != 32 || IntBits > 32 || NumLanes > 4) {
+ if (FloatBits != 32 || IntBits > 32 || (NumLanes != 4 && NumLanes != 2)) {
// These instructions only exist converting from f32 to i32. We can handle
// smaller integers by generating an extra truncate, but larger ones would
- // be lossy. We also can't handle more then 4 lanes, since these intructions
- // only support v2i32/v4i32 types.
+ // be lossy. We also can't handle anything other than 2 or 4 lanes, since
+ // these intructions only support v2i32/v4i32 types.
return SDValue();
}
@@ -12190,11 +12615,11 @@ static SDValue PerformVDIVCombine(SDNode *N, SelectionDAG &DAG,
MVT IntTy = Op.getOperand(0).getSimpleValueType().getVectorElementType();
uint32_t IntBits = IntTy.getSizeInBits();
unsigned NumLanes = Op.getValueType().getVectorNumElements();
- if (FloatBits != 32 || IntBits > 32 || NumLanes > 4) {
+ if (FloatBits != 32 || IntBits > 32 || (NumLanes != 4 && NumLanes != 2)) {
// These instructions only exist converting from i32 to f32. We can handle
// smaller integers by generating an extra extend, but larger ones would
- // be lossy. We also can't handle more then 4 lanes, since these intructions
- // only support v2i32/v4i32 types.
+ // be lossy. We also can't handle anything other than 2 or 4 lanes, since
+ // these intructions only support v2i32/v4i32 types.
return SDValue();
}
@@ -12220,58 +12645,6 @@ static SDValue PerformVDIVCombine(SDNode *N, SelectionDAG &DAG,
ConvInput, DAG.getConstant(C, dl, MVT::i32));
}
-/// Getvshiftimm - Check if this is a valid build_vector for the immediate
-/// operand of a vector shift operation, where all the elements of the
-/// build_vector must have the same constant integer value.
-static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
- // Ignore bit_converts.
- while (Op.getOpcode() == ISD::BITCAST)
- Op = Op.getOperand(0);
- BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
- APInt SplatBits, SplatUndef;
- unsigned SplatBitSize;
- bool HasAnyUndefs;
- if (! BVN || ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
- HasAnyUndefs, ElementBits) ||
- SplatBitSize > ElementBits)
- return false;
- Cnt = SplatBits.getSExtValue();
- return true;
-}
-
-/// isVShiftLImm - Check if this is a valid build_vector for the immediate
-/// operand of a vector shift left operation. That value must be in the range:
-/// 0 <= Value < ElementBits for a left shift; or
-/// 0 <= Value <= ElementBits for a long left shift.
-static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
- assert(VT.isVector() && "vector shift count is not a vector type");
- int64_t ElementBits = VT.getScalarSizeInBits();
- if (! getVShiftImm(Op, ElementBits, Cnt))
- return false;
- return (Cnt >= 0 && (isLong ? Cnt-1 : Cnt) < ElementBits);
-}
-
-/// isVShiftRImm - Check if this is a valid build_vector for the immediate
-/// operand of a vector shift right operation. For a shift opcode, the value
-/// is positive, but for an intrinsic the value count must be negative. The
-/// absolute value must be in the range:
-/// 1 <= |Value| <= ElementBits for a right shift; or
-/// 1 <= |Value| <= ElementBits/2 for a narrow right shift.
-static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic,
- int64_t &Cnt) {
- assert(VT.isVector() && "vector shift count is not a vector type");
- int64_t ElementBits = VT.getScalarSizeInBits();
- if (! getVShiftImm(Op, ElementBits, Cnt))
- return false;
- if (!isIntrinsic)
- return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits/2 : ElementBits));
- if (Cnt >= -(isNarrow ? ElementBits/2 : ElementBits) && Cnt <= -1) {
- Cnt = -Cnt;
- return true;
- }
- return false;
-}
-
/// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
@@ -12307,12 +12680,12 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
case Intrinsic::arm_neon_vshifts:
case Intrinsic::arm_neon_vshiftu:
if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) {
- VShiftOpc = ARMISD::VSHL;
+ VShiftOpc = ARMISD::VSHLIMM;
break;
}
if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) {
- VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ?
- ARMISD::VSHRs : ARMISD::VSHRu);
+ VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ? ARMISD::VSHRsIMM
+ : ARMISD::VSHRuIMM);
break;
}
return SDValue();
@@ -12357,29 +12730,41 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
// Opcode already set above.
break;
case Intrinsic::arm_neon_vrshifts:
- VShiftOpc = ARMISD::VRSHRs; break;
+ VShiftOpc = ARMISD::VRSHRsIMM;
+ break;
case Intrinsic::arm_neon_vrshiftu:
- VShiftOpc = ARMISD::VRSHRu; break;
+ VShiftOpc = ARMISD::VRSHRuIMM;
+ break;
case Intrinsic::arm_neon_vrshiftn:
- VShiftOpc = ARMISD::VRSHRN; break;
+ VShiftOpc = ARMISD::VRSHRNIMM;
+ break;
case Intrinsic::arm_neon_vqshifts:
- VShiftOpc = ARMISD::VQSHLs; break;
+ VShiftOpc = ARMISD::VQSHLsIMM;
+ break;
case Intrinsic::arm_neon_vqshiftu:
- VShiftOpc = ARMISD::VQSHLu; break;
+ VShiftOpc = ARMISD::VQSHLuIMM;
+ break;
case Intrinsic::arm_neon_vqshiftsu:
- VShiftOpc = ARMISD::VQSHLsu; break;
+ VShiftOpc = ARMISD::VQSHLsuIMM;
+ break;
case Intrinsic::arm_neon_vqshiftns:
- VShiftOpc = ARMISD::VQSHRNs; break;
+ VShiftOpc = ARMISD::VQSHRNsIMM;
+ break;
case Intrinsic::arm_neon_vqshiftnu:
- VShiftOpc = ARMISD::VQSHRNu; break;
+ VShiftOpc = ARMISD::VQSHRNuIMM;
+ break;
case Intrinsic::arm_neon_vqshiftnsu:
- VShiftOpc = ARMISD::VQSHRNsu; break;
+ VShiftOpc = ARMISD::VQSHRNsuIMM;
+ break;
case Intrinsic::arm_neon_vqrshiftns:
- VShiftOpc = ARMISD::VQRSHRNs; break;
+ VShiftOpc = ARMISD::VQRSHRNsIMM;
+ break;
case Intrinsic::arm_neon_vqrshiftnu:
- VShiftOpc = ARMISD::VQRSHRNu; break;
+ VShiftOpc = ARMISD::VQRSHRNuIMM;
+ break;
case Intrinsic::arm_neon_vqrshiftnsu:
- VShiftOpc = ARMISD::VQRSHRNsu; break;
+ VShiftOpc = ARMISD::VQRSHRNsuIMM;
+ break;
}
SDLoc dl(N);
@@ -12393,9 +12778,9 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
unsigned VShiftOpc = 0;
if (isVShiftLImm(N->getOperand(3), VT, false, Cnt))
- VShiftOpc = ARMISD::VSLI;
+ VShiftOpc = ARMISD::VSLIIMM;
else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt))
- VShiftOpc = ARMISD::VSRI;
+ VShiftOpc = ARMISD::VSRIIMM;
else {
llvm_unreachable("invalid shift count for vsli/vsri intrinsic");
}
@@ -12420,8 +12805,10 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
/// combining instead of DAG legalizing because the build_vectors for 64-bit
/// vector element shift counts are generally not legal, and it is hard to see
/// their values after they get legalized to loads from a constant pool.
-static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG,
+static SDValue PerformShiftCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *ST) {
+ SelectionDAG &DAG = DCI.DAG;
EVT VT = N->getValueType(0);
if (N->getOpcode() == ISD::SRL && VT == MVT::i32 && ST->hasV6Ops()) {
// Canonicalize (srl (bswap x), 16) to (rotr (bswap x), 16) if the high
@@ -12436,12 +12823,47 @@ static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG,
}
}
+ if (ST->isThumb1Only() && N->getOpcode() == ISD::SHL && VT == MVT::i32 &&
+ N->getOperand(0)->getOpcode() == ISD::AND &&
+ N->getOperand(0)->hasOneUse()) {
+ if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
+ return SDValue();
+ // Look for the pattern (shl (and x, AndMask), ShiftAmt). This doesn't
+ // usually show up because instcombine prefers to canonicalize it to
+ // (and (shl x, ShiftAmt) (shl AndMask, ShiftAmt)), but the shift can come
+ // out of GEP lowering in some cases.
+ SDValue N0 = N->getOperand(0);
+ ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (!ShiftAmtNode)
+ return SDValue();
+ uint32_t ShiftAmt = static_cast<uint32_t>(ShiftAmtNode->getZExtValue());
+ ConstantSDNode *AndMaskNode = dyn_cast<ConstantSDNode>(N0->getOperand(1));
+ if (!AndMaskNode)
+ return SDValue();
+ uint32_t AndMask = static_cast<uint32_t>(AndMaskNode->getZExtValue());
+ // Don't transform uxtb/uxth.
+ if (AndMask == 255 || AndMask == 65535)
+ return SDValue();
+ if (isMask_32(AndMask)) {
+ uint32_t MaskedBits = countLeadingZeros(AndMask);
+ if (MaskedBits > ShiftAmt) {
+ SDLoc DL(N);
+ SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
+ DAG.getConstant(MaskedBits, DL, MVT::i32));
+ return DAG.getNode(
+ ISD::SRL, DL, MVT::i32, SHL,
+ DAG.getConstant(MaskedBits - ShiftAmt, DL, MVT::i32));
+ }
+ }
+ }
+
// Nothing to be done for scalar shifts.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (!VT.isVector() || !TLI.isTypeLegal(VT))
return SDValue();
+ if (ST->hasMVEIntegerOps() && VT == MVT::v2i64)
+ return SDValue();
- assert(ST->hasNEON() && "unexpected vector shift");
int64_t Cnt;
switch (N->getOpcode()) {
@@ -12450,7 +12872,7 @@ static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG,
case ISD::SHL:
if (isVShiftLImm(N->getOperand(1), VT, false, Cnt)) {
SDLoc dl(N);
- return DAG.getNode(ARMISD::VSHL, dl, VT, N->getOperand(0),
+ return DAG.getNode(ARMISD::VSHLIMM, dl, VT, N->getOperand(0),
DAG.getConstant(Cnt, dl, MVT::i32));
}
break;
@@ -12458,8 +12880,8 @@ static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG,
case ISD::SRA:
case ISD::SRL:
if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
- unsigned VShiftOpc = (N->getOpcode() == ISD::SRA ?
- ARMISD::VSHRs : ARMISD::VSHRu);
+ unsigned VShiftOpc =
+ (N->getOpcode() == ISD::SRA ? ARMISD::VSHRsIMM : ARMISD::VSHRuIMM);
SDLoc dl(N);
return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0),
DAG.getConstant(Cnt, dl, MVT::i32));
@@ -12606,6 +13028,45 @@ SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &D
return V;
}
+static SDValue PerformHWLoopCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *ST) {
+ // Look for (brcond (xor test.set.loop.iterations, -1)
+ SDValue CC = N->getOperand(1);
+ unsigned Opc = CC->getOpcode();
+ SDValue Int;
+
+ if ((Opc == ISD::XOR || Opc == ISD::SETCC) &&
+ (CC->getOperand(0)->getOpcode() == ISD::INTRINSIC_W_CHAIN)) {
+
+ assert((isa<ConstantSDNode>(CC->getOperand(1)) &&
+ cast<ConstantSDNode>(CC->getOperand(1))->isOne()) &&
+ "Expected to compare against 1");
+
+ Int = CC->getOperand(0);
+ } else if (CC->getOpcode() == ISD::INTRINSIC_W_CHAIN)
+ Int = CC;
+ else
+ return SDValue();
+
+ unsigned IntOp = cast<ConstantSDNode>(Int.getOperand(1))->getZExtValue();
+ if (IntOp != Intrinsic::test_set_loop_iterations)
+ return SDValue();
+
+ SDLoc dl(Int);
+ SDValue Chain = N->getOperand(0);
+ SDValue Elements = Int.getOperand(2);
+ SDValue ExitBlock = N->getOperand(2);
+
+ // TODO: Once we start supporting tail predication, we can add another
+ // operand to WLS for the number of elements processed in a vector loop.
+
+ SDValue Ops[] = { Chain, Elements, ExitBlock };
+ SDValue Res = DCI.DAG.getNode(ARMISD::WLS, dl, MVT::Other, Ops);
+ DCI.DAG.ReplaceAllUsesOfValueWith(Int.getValue(1), Int.getOperand(0));
+ return Res;
+}
+
/// PerformBRCONDCombine - Target-specific DAG combining for ARMISD::BRCOND.
SDValue
ARMTargetLowering::PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const {
@@ -12779,15 +13240,21 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
// On Thumb1, the DAG above may be further combined if z is a power of 2
// (z == 2 ^ K).
// CMOV (SUBS x, y), z, !=, (SUBS x, y):1 ->
- // merge t3, t4
- // where t1 = (SUBCARRY (SUB x, y), z, 0)
- // t2 = (SUBCARRY (SUB x, y), t1:0, t1:1)
- // t3 = if K != 0 then (SHL t2:0, K) else t2:0
- // t4 = (SUB 1, t2:1) [ we want a carry, not a borrow ]
+ // t1 = (USUBO (SUB x, y), 1)
+ // t2 = (SUBCARRY (SUB x, y), t1:0, t1:1)
+ // Result = if K != 0 then (SHL t2:0, K) else t2:0
+ //
+ // This also handles the special case of comparing against zero; it's
+ // essentially, the same pattern, except there's no SUBS:
+ // CMOV x, z, !=, (CMPZ x, 0) ->
+ // t1 = (USUBO x, 1)
+ // t2 = (SUBCARRY x, t1:0, t1:1)
+ // Result = if K != 0 then (SHL t2:0, K) else t2:0
const APInt *TrueConst;
if (Subtarget->isThumb1Only() && CC == ARMCC::NE &&
- (FalseVal.getOpcode() == ARMISD::SUBS) &&
- (FalseVal.getOperand(0) == LHS) && (FalseVal.getOperand(1) == RHS) &&
+ ((FalseVal.getOpcode() == ARMISD::SUBS &&
+ FalseVal.getOperand(0) == LHS && FalseVal.getOperand(1) == RHS) ||
+ (FalseVal == LHS && isNullConstant(RHS))) &&
(TrueConst = isPowerOf2Constant(TrueVal))) {
SDVTList VTs = DAG.getVTList(VT, MVT::i32);
unsigned ShiftAmount = TrueConst->logBase2();
@@ -12795,10 +13262,6 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
TrueVal = DAG.getConstant(1, dl, VT);
SDValue Subc = DAG.getNode(ISD::USUBO, dl, VTs, FalseVal, TrueVal);
Res = DAG.getNode(ISD::SUBCARRY, dl, VTs, FalseVal, Subc, Subc.getValue(1));
- // Make it a carry, not a borrow.
- SDValue Carry = DAG.getNode(
- ISD::SUB, dl, VT, DAG.getConstant(1, dl, MVT::i32), Res.getValue(1));
- Res = DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Res, Carry);
if (ShiftAmount)
Res = DAG.getNode(ISD::SHL, dl, VT, Res,
@@ -12826,6 +13289,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
switch (N->getOpcode()) {
default: break;
+ case ISD::ABS: return PerformABSCombine(N, DCI, Subtarget);
case ARMISD::ADDE: return PerformADDECombine(N, DCI, Subtarget);
case ARMISD::UMLAL: return PerformUMLALCombine(N, DCI.DAG, Subtarget);
case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget);
@@ -12834,6 +13298,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::OR: return PerformORCombine(N, DCI, Subtarget);
case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget);
case ISD::AND: return PerformANDCombine(N, DCI, Subtarget);
+ case ISD::BRCOND: return PerformHWLoopCombine(N, DCI, Subtarget);
case ARMISD::ADDC:
case ARMISD::SUBC: return PerformAddcSubcCombine(N, DCI, Subtarget);
case ARMISD::SUBE: return PerformAddeSubeCombine(N, DCI, Subtarget);
@@ -12845,7 +13310,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI);
case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG);
case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI);
- case ARMISD::VDUP: return PerformVDUPCombine(N, DCI);
+ case ARMISD::VDUP: return PerformVDUPCombine(N, DCI, Subtarget);
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
return PerformVCVTCombine(N, DCI.DAG, Subtarget);
@@ -12854,7 +13319,8 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG);
case ISD::SHL:
case ISD::SRA:
- case ISD::SRL: return PerformShiftCombine(N, DCI.DAG, Subtarget);
+ case ISD::SRL:
+ return PerformShiftCombine(N, DCI, Subtarget);
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget);
@@ -12957,9 +13423,9 @@ bool ARMTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc,
return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE);
}
-bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
- unsigned,
- unsigned,
+bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned,
+ unsigned Alignment,
+ MachineMemOperand::Flags,
bool *Fast) const {
// Depends what it gets converted into if the type is weird.
if (!VT.isSimple())
@@ -12967,23 +13433,18 @@ bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
// The AllowsUnaliged flag models the SCTLR.A setting in ARM cpus
bool AllowsUnaligned = Subtarget->allowsUnalignedMem();
+ auto Ty = VT.getSimpleVT().SimpleTy;
- switch (VT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- case MVT::i16:
- case MVT::i32: {
+ if (Ty == MVT::i8 || Ty == MVT::i16 || Ty == MVT::i32) {
// Unaligned access can use (for example) LRDB, LRDH, LDR
if (AllowsUnaligned) {
if (Fast)
*Fast = Subtarget->hasV7Ops();
return true;
}
- return false;
}
- case MVT::f64:
- case MVT::v2f64: {
+
+ if (Ty == MVT::f64 || Ty == MVT::v2f64) {
// For any little-endian targets with neon, we can support unaligned ld/st
// of D and Q (e.g. {D0,D1}) registers by using vld1.i8/vst1.i8.
// A big-endian target may also explicitly support unaligned accesses
@@ -12992,9 +13453,54 @@ bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
*Fast = true;
return true;
}
- return false;
}
+
+ if (!Subtarget->hasMVEIntegerOps())
+ return false;
+ if (Ty != MVT::v16i8 && Ty != MVT::v8i16 && Ty != MVT::v8f16 &&
+ Ty != MVT::v4i32 && Ty != MVT::v4f32 && Ty != MVT::v2i64 &&
+ Ty != MVT::v2f64 &&
+ // These are for truncated stores
+ Ty != MVT::v4i8 && Ty != MVT::v8i8 && Ty != MVT::v4i16)
+ return false;
+
+ if (Subtarget->isLittle()) {
+ // In little-endian MVE, the store instructions VSTRB.U8,
+ // VSTRH.U16 and VSTRW.U32 all store the vector register in
+ // exactly the same format, and differ only in the range of
+ // their immediate offset field and the required alignment.
+ //
+ // In particular, VSTRB.U8 can store a vector at byte alignment.
+ // So at this stage we can simply say that loads/stores of all
+ // 128-bit wide vector types are permitted at any alignment,
+ // because we know at least _one_ instruction can manage that.
+ //
+ // Later on we might find that some of those loads are better
+ // generated as VLDRW.U32 if alignment permits, to take
+ // advantage of the larger immediate range. But for the moment,
+ // all that matters is that if we don't lower the load then
+ // _some_ instruction can handle it.
+ if (Fast)
+ *Fast = true;
+ return true;
+ } else {
+ // In big-endian MVE, those instructions aren't so similar
+ // after all, because they reorder the bytes of the vector
+ // differently. So this time we can only store a particular
+ // kind of vector if its alignment is at least the element
+ // type. And we can't store vectors of i64 or f64 at all
+ // without having to do some postprocessing, because there's
+ // no VSTRD.U64.
+ if (Ty == MVT::v16i8 ||
+ ((Ty == MVT::v8i16 || Ty == MVT::v8f16) && Alignment >= 2) ||
+ ((Ty == MVT::v4i32 || Ty == MVT::v4f32) && Alignment >= 4)) {
+ if (Fast)
+ *Fast = true;
+ return true;
+ }
}
+
+ return false;
}
static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign,
@@ -13003,24 +13509,24 @@ static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign,
(DstAlign == 0 || DstAlign % AlignCheck == 0));
}
-EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size,
- unsigned DstAlign, unsigned SrcAlign,
- bool IsMemset, bool ZeroMemset,
- bool MemcpyStrSrc,
- MachineFunction &MF) const {
- const Function &F = MF.getFunction();
-
+EVT ARMTargetLowering::getOptimalMemOpType(
+ uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset,
+ bool ZeroMemset, bool MemcpyStrSrc,
+ const AttributeList &FuncAttributes) const {
// See if we can use NEON instructions for this...
if ((!IsMemset || ZeroMemset) && Subtarget->hasNEON() &&
- !F.hasFnAttribute(Attribute::NoImplicitFloat)) {
+ !FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)) {
bool Fast;
if (Size >= 16 &&
(memOpAlign(SrcAlign, DstAlign, 16) ||
- (allowsMisalignedMemoryAccesses(MVT::v2f64, 0, 1, &Fast) && Fast))) {
+ (allowsMisalignedMemoryAccesses(MVT::v2f64, 0, 1,
+ MachineMemOperand::MONone, &Fast) &&
+ Fast))) {
return MVT::v2f64;
} else if (Size >= 8 &&
(memOpAlign(SrcAlign, DstAlign, 8) ||
- (allowsMisalignedMemoryAccesses(MVT::f64, 0, 1, &Fast) &&
+ (allowsMisalignedMemoryAccesses(
+ MVT::f64, 0, 1, MachineMemOperand::MONone, &Fast) &&
Fast))) {
return MVT::f64;
}
@@ -13089,6 +13595,46 @@ bool ARMTargetLowering::isFNegFree(EVT VT) const {
return false;
}
+/// Check if Ext1 and Ext2 are extends of the same type, doubling the bitwidth
+/// of the vector elements.
+static bool areExtractExts(Value *Ext1, Value *Ext2) {
+ auto areExtDoubled = [](Instruction *Ext) {
+ return Ext->getType()->getScalarSizeInBits() ==
+ 2 * Ext->getOperand(0)->getType()->getScalarSizeInBits();
+ };
+
+ if (!match(Ext1, m_ZExtOrSExt(m_Value())) ||
+ !match(Ext2, m_ZExtOrSExt(m_Value())) ||
+ !areExtDoubled(cast<Instruction>(Ext1)) ||
+ !areExtDoubled(cast<Instruction>(Ext2)))
+ return false;
+
+ return true;
+}
+
+/// Check if sinking \p I's operands to I's basic block is profitable, because
+/// the operands can be folded into a target instruction, e.g.
+/// sext/zext can be folded into vsubl.
+bool ARMTargetLowering::shouldSinkOperands(Instruction *I,
+ SmallVectorImpl<Use *> &Ops) const {
+ if (!Subtarget->hasNEON() || !I->getType()->isVectorTy())
+ return false;
+
+ switch (I->getOpcode()) {
+ case Instruction::Sub:
+ case Instruction::Add: {
+ if (!areExtractExts(I->getOperand(0), I->getOperand(1)))
+ return false;
+ Ops.push_back(&I->getOperandUse(0));
+ Ops.push_back(&I->getOperandUse(1));
+ return true;
+ }
+ default:
+ return false;
+ }
+ return false;
+}
+
bool ARMTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
EVT VT = ExtVal.getValueType();
@@ -13105,7 +13651,7 @@ bool ARMTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
SDNode *U = *ExtVal->use_begin();
if ((U->getOpcode() == ISD::ADD || U->getOpcode() == ISD::SUB ||
- U->getOpcode() == ISD::SHL || U->getOpcode() == ARMISD::VSHL))
+ U->getOpcode() == ISD::SHL || U->getOpcode() == ARMISD::VSHLIMM))
return false;
return true;
@@ -13142,7 +13688,6 @@ static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
unsigned Scale = 1;
switch (VT.getSimpleVT().SimpleTy) {
- default: return false;
case MVT::i1:
case MVT::i8:
// Scale == 1;
@@ -13151,7 +13696,8 @@ static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
// Scale == 2;
Scale = 2;
break;
- case MVT::i32:
+ default:
+ // On thumb1 we load most things (i32, i64, floats, etc) with a LDR
// Scale == 4;
Scale = 4;
break;
@@ -13159,38 +13705,58 @@ static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
if ((V & (Scale - 1)) != 0)
return false;
- V /= Scale;
- return V == (V & ((1LL << 5) - 1));
+ return isUInt<5>(V / Scale);
}
static bool isLegalT2AddressImmediate(int64_t V, EVT VT,
const ARMSubtarget *Subtarget) {
- bool isNeg = false;
+ if (!VT.isInteger() && !VT.isFloatingPoint())
+ return false;
+ if (VT.isVector() && Subtarget->hasNEON())
+ return false;
+ if (VT.isVector() && VT.isFloatingPoint() && Subtarget->hasMVEIntegerOps() &&
+ !Subtarget->hasMVEFloatOps())
+ return false;
+
+ bool IsNeg = false;
if (V < 0) {
- isNeg = true;
- V = - V;
+ IsNeg = true;
+ V = -V;
}
- switch (VT.getSimpleVT().SimpleTy) {
- default: return false;
- case MVT::i1:
- case MVT::i8:
- case MVT::i16:
- case MVT::i32:
- // + imm12 or - imm8
- if (isNeg)
- return V == (V & ((1LL << 8) - 1));
- return V == (V & ((1LL << 12) - 1));
- case MVT::f32:
- case MVT::f64:
- // Same as ARM mode. FIXME: NEON?
- if (!Subtarget->hasVFP2())
- return false;
- if ((V & 3) != 0)
+ unsigned NumBytes = std::max(VT.getSizeInBits() / 8, 1U);
+
+ // MVE: size * imm7
+ if (VT.isVector() && Subtarget->hasMVEIntegerOps()) {
+ switch (VT.getSimpleVT().getVectorElementType().SimpleTy) {
+ case MVT::i32:
+ case MVT::f32:
+ return isShiftedUInt<7,2>(V);
+ case MVT::i16:
+ case MVT::f16:
+ return isShiftedUInt<7,1>(V);
+ case MVT::i8:
+ return isUInt<7>(V);
+ default:
return false;
- V >>= 2;
- return V == (V & ((1LL << 8) - 1));
+ }
}
+
+ // half VLDR: 2 * imm8
+ if (VT.isFloatingPoint() && NumBytes == 2 && Subtarget->hasFPRegs16())
+ return isShiftedUInt<8, 1>(V);
+ // VLDR and LDRD: 4 * imm8
+ if ((VT.isFloatingPoint() && Subtarget->hasVFP2Base()) || NumBytes == 8)
+ return isShiftedUInt<8, 2>(V);
+
+ if (NumBytes == 1 || NumBytes == 2 || NumBytes == 4) {
+ // + imm12 or - imm8
+ if (IsNeg)
+ return isUInt<8>(V);
+ return isUInt<12>(V);
+ }
+
+ return false;
}
/// isLegalAddressImmediate - Return true if the integer value can be used
@@ -13218,18 +13784,15 @@ static bool isLegalAddressImmediate(int64_t V, EVT VT,
case MVT::i8:
case MVT::i32:
// +- imm12
- return V == (V & ((1LL << 12) - 1));
+ return isUInt<12>(V);
case MVT::i16:
// +- imm8
- return V == (V & ((1LL << 8) - 1));
+ return isUInt<8>(V);
case MVT::f32:
case MVT::f64:
- if (!Subtarget->hasVFP2()) // FIXME: NEON?
+ if (!Subtarget->hasVFP2Base()) // FIXME: NEON?
return false;
- if ((V & 3) != 0)
- return false;
- V >>= 2;
- return V == (V & ((1LL << 8) - 1));
+ return isShiftedUInt<8, 2>(V);
}
}
@@ -13649,13 +14212,13 @@ void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
EVT VT = Op.getValueType();
const unsigned DstSz = VT.getScalarSizeInBits();
const unsigned SrcSz = VecVT.getVectorElementType().getSizeInBits();
+ (void)SrcSz;
assert(SrcSz == Known.getBitWidth());
assert(DstSz > SrcSz);
if (Op.getOpcode() == ARMISD::VGETLANEs)
Known = Known.sext(DstSz);
else {
- Known = Known.zext(DstSz);
- Known.Zero.setBitsFrom(SrcSz);
+ Known = Known.zext(DstSz, true /* extended bits are known zero */);
}
assert(DstSz == Known.getBitWidth());
break;
@@ -13790,7 +14353,7 @@ const char *ARMTargetLowering::LowerXConstraint(EVT ConstraintVT) const {
// Although we are correct (we are free to emit anything, without
// constraints), we might break use cases that would expect us to be more
// efficient and emit something else.
- if (!Subtarget->hasVFP2())
+ if (!Subtarget->hasVFP2Base())
return "r";
if (ConstraintVT.isFloatingPoint())
return "w";
@@ -13822,6 +14385,7 @@ ARMTargetLowering::getConstraintType(StringRef Constraint) const {
} else if (Constraint.size() == 2) {
switch (Constraint[0]) {
default: break;
+ case 'T': return C_RegisterClass;
// All 'U+' constraints are addresses.
case 'U': return C_Memory;
}
@@ -13867,7 +14431,8 @@ using RCPair = std::pair<unsigned, const TargetRegisterClass *>;
RCPair ARMTargetLowering::getRegForInlineAsmConstraint(
const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
- if (Constraint.size() == 1) {
+ switch (Constraint.size()) {
+ case 1:
// GCC ARM Constraint Letters
switch (Constraint[0]) {
case 'l': // Low regs or general regs.
@@ -13913,7 +14478,25 @@ RCPair ARMTargetLowering::getRegForInlineAsmConstraint(
return RCPair(0U, &ARM::QPR_VFP2RegClass);
break;
}
+ break;
+
+ case 2:
+ if (Constraint[0] == 'T') {
+ switch (Constraint[1]) {
+ default:
+ break;
+ case 'e':
+ return RCPair(0U, &ARM::tGPREvenRegClass);
+ case 'o':
+ return RCPair(0U, &ARM::tGPROddRegClass);
+ }
+ }
+ break;
+
+ default:
+ break;
}
+
if (StringRef("{cc}").equals_lower(Constraint))
return std::make_pair(unsigned(ARM::CPSR), &ARM::CCRRegClass);
@@ -14272,28 +14855,107 @@ ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const
}
SDValue ARMTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
- assert(Op.getValueType() == MVT::f64 && Subtarget->isFPOnlySP() &&
+ SDValue SrcVal = Op.getOperand(0);
+ const unsigned DstSz = Op.getValueType().getSizeInBits();
+ const unsigned SrcSz = SrcVal.getValueType().getSizeInBits();
+ assert(DstSz > SrcSz && DstSz <= 64 && SrcSz >= 16 &&
"Unexpected type for custom-lowering FP_EXTEND");
+ assert((!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) &&
+ "With both FP DP and 16, any FP conversion is legal!");
+
+ assert(!(DstSz == 32 && Subtarget->hasFP16()) &&
+ "With FP16, 16 to 32 conversion is legal!");
+
+ // Either we are converting from 16 -> 64, without FP16 and/or
+ // FP.double-precision or without Armv8-fp. So we must do it in two
+ // steps.
+ // Or we are converting from 32 -> 64 without fp.double-precision or 16 -> 32
+ // without FP16. So we must do a function call.
+ SDLoc Loc(Op);
RTLIB::Libcall LC;
- LC = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType());
+ if (SrcSz == 16) {
+ // Instruction from 16 -> 32
+ if (Subtarget->hasFP16())
+ SrcVal = DAG.getNode(ISD::FP_EXTEND, Loc, MVT::f32, SrcVal);
+ // Lib call from 16 -> 32
+ else {
+ LC = RTLIB::getFPEXT(MVT::f16, MVT::f32);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL &&
+ "Unexpected type for custom-lowering FP_EXTEND");
+ SrcVal =
+ makeLibCall(DAG, LC, MVT::f32, SrcVal, /*isSigned*/ false, Loc).first;
+ }
+ }
- SDValue SrcVal = Op.getOperand(0);
- return makeLibCall(DAG, LC, Op.getValueType(), SrcVal, /*isSigned*/ false,
- SDLoc(Op)).first;
+ if (DstSz != 64)
+ return SrcVal;
+ // For sure now SrcVal is 32 bits
+ if (Subtarget->hasFP64()) // Instruction from 32 -> 64
+ return DAG.getNode(ISD::FP_EXTEND, Loc, MVT::f64, SrcVal);
+
+ LC = RTLIB::getFPEXT(MVT::f32, MVT::f64);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL &&
+ "Unexpected type for custom-lowering FP_EXTEND");
+ return makeLibCall(DAG, LC, MVT::f64, SrcVal, /*isSigned*/ false, Loc).first;
}
SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
- assert(Op.getOperand(0).getValueType() == MVT::f64 &&
- Subtarget->isFPOnlySP() &&
+ SDValue SrcVal = Op.getOperand(0);
+ EVT SrcVT = SrcVal.getValueType();
+ EVT DstVT = Op.getValueType();
+ const unsigned DstSz = Op.getValueType().getSizeInBits();
+ const unsigned SrcSz = SrcVT.getSizeInBits();
+ (void)DstSz;
+ assert(DstSz < SrcSz && SrcSz <= 64 && DstSz >= 16 &&
"Unexpected type for custom-lowering FP_ROUND");
- RTLIB::Libcall LC;
- LC = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), Op.getValueType());
+ assert((!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) &&
+ "With both FP DP and 16, any FP conversion is legal!");
- SDValue SrcVal = Op.getOperand(0);
- return makeLibCall(DAG, LC, Op.getValueType(), SrcVal, /*isSigned*/ false,
- SDLoc(Op)).first;
+ SDLoc Loc(Op);
+
+ // Instruction from 32 -> 16 if hasFP16 is valid
+ if (SrcSz == 32 && Subtarget->hasFP16())
+ return Op;
+
+ // Lib call from 32 -> 16 / 64 -> [32, 16]
+ RTLIB::Libcall LC = RTLIB::getFPROUND(SrcVT, DstVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL &&
+ "Unexpected type for custom-lowering FP_ROUND");
+ return makeLibCall(DAG, LC, DstVT, SrcVal, /*isSigned*/ false, Loc).first;
+}
+
+void ARMTargetLowering::lowerABS(SDNode *N, SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const {
+ assert(N->getValueType(0) == MVT::i64 && "Unexpected type (!= i64) on ABS.");
+ MVT HalfT = MVT::i32;
+ SDLoc dl(N);
+ SDValue Hi, Lo, Tmp;
+
+ if (!isOperationLegalOrCustom(ISD::ADDCARRY, HalfT) ||
+ !isOperationLegalOrCustom(ISD::UADDO, HalfT))
+ return ;
+
+ unsigned OpTypeBits = HalfT.getScalarSizeInBits();
+ SDVTList VTList = DAG.getVTList(HalfT, MVT::i1);
+
+ Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(0),
+ DAG.getConstant(0, dl, HalfT));
+ Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(0),
+ DAG.getConstant(1, dl, HalfT));
+
+ Tmp = DAG.getNode(ISD::SRA, dl, HalfT, Hi,
+ DAG.getConstant(OpTypeBits - 1, dl,
+ getShiftAmountTy(HalfT, DAG.getDataLayout())));
+ Lo = DAG.getNode(ISD::UADDO, dl, VTList, Tmp, Lo);
+ Hi = DAG.getNode(ISD::ADDCARRY, dl, VTList, Tmp, Hi,
+ SDValue(Lo.getNode(), 1));
+ Hi = DAG.getNode(ISD::XOR, dl, HalfT, Tmp, Hi);
+ Lo = DAG.getNode(ISD::XOR, dl, HalfT, Tmp, Lo);
+
+ Results.push_back(Lo);
+ Results.push_back(Hi);
}
bool
@@ -14314,14 +14976,15 @@ bool ARM::isBitFieldInvertedMask(unsigned v) {
/// isFPImmLegal - Returns true if the target can instruction select the
/// specified FP immediate natively. If false, the legalizer will
/// materialize the FP immediate as a load from a constant pool.
-bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
- if (!Subtarget->hasVFP3())
+bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
+ bool ForCodeSize) const {
+ if (!Subtarget->hasVFP3Base())
return false;
if (VT == MVT::f16 && Subtarget->hasFullFP16())
return ARM_AM::getFP16Imm(Imm) != -1;
if (VT == MVT::f32)
return ARM_AM::getFP32Imm(Imm) != -1;
- if (VT == MVT::f64 && !Subtarget->isFPOnlySP())
+ if (VT == MVT::f64 && Subtarget->hasFP64())
return ARM_AM::getFP64Imm(Imm) != -1;
return false;
}
@@ -14590,6 +15253,9 @@ ARMTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
// and up to 64 bits on the non-M profiles
TargetLowering::AtomicExpansionKind
ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
+ if (AI->isFloatingPointOperation())
+ return AtomicExpansionKind::CmpXChg;
+
unsigned Size = AI->getType()->getPrimitiveSizeInBits();
bool hasAtomicRMW = !Subtarget->isThumb() || Subtarget->hasV8MBaselineOps();
return (Size <= (Subtarget->isMClass() ? 32U : 64U) && hasAtomicRMW)
@@ -14621,6 +15287,36 @@ bool ARMTargetLowering::useLoadStackGuardNode() const {
return Subtarget->isTargetMachO();
}
+void ARMTargetLowering::insertSSPDeclarations(Module &M) const {
+ if (!Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
+ return TargetLowering::insertSSPDeclarations(M);
+
+ // MSVC CRT has a global variable holding security cookie.
+ M.getOrInsertGlobal("__security_cookie",
+ Type::getInt8PtrTy(M.getContext()));
+
+ // MSVC CRT has a function to validate security cookie.
+ FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
+ "__security_check_cookie", Type::getVoidTy(M.getContext()),
+ Type::getInt8PtrTy(M.getContext()));
+ if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee()))
+ F->addAttribute(1, Attribute::AttrKind::InReg);
+}
+
+Value *ARMTargetLowering::getSDagStackGuard(const Module &M) const {
+ // MSVC CRT has a global variable holding security cookie.
+ if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
+ return M.getGlobalVariable("__security_cookie");
+ return TargetLowering::getSDagStackGuard(M);
+}
+
+Function *ARMTargetLowering::getSSPStackGuardCheck(const Module &M) const {
+ // MSVC CRT has a function to validate security cookie.
+ if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
+ return M.getFunction("__security_check_cookie");
+ return TargetLowering::getSSPStackGuardCheck(M);
+}
+
bool ARMTargetLowering::canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
unsigned &Cost) const {
// If we do not have NEON, vector types are not natively supported.
@@ -14658,6 +15354,10 @@ bool ARMTargetLowering::isCheapToSpeculateCtlz() const {
return Subtarget->hasV6T2Ops();
}
+bool ARMTargetLowering::shouldExpandShift(SelectionDAG &DAG, SDNode *N) const {
+ return !Subtarget->hasMinSize();
+}
+
Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
AtomicOrdering Ord) const {
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
@@ -14850,8 +15550,9 @@ bool ARMTargetLowering::lowerInterleavedLoad(
// If we're generating more than one load, compute the base address of
// subsequent loads as an offset from the previous.
if (LoadCount > 0)
- BaseAddr = Builder.CreateConstGEP1_32(
- BaseAddr, VecTy->getVectorNumElements() * Factor);
+ BaseAddr =
+ Builder.CreateConstGEP1_32(VecTy->getVectorElementType(), BaseAddr,
+ VecTy->getVectorNumElements() * Factor);
SmallVector<Value *, 2> Ops;
Ops.push_back(Builder.CreateBitCast(BaseAddr, Int8Ptr));
@@ -14990,7 +15691,8 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
// If we generating more than one store, we compute the base address of
// subsequent stores as an offset from the previous.
if (StoreCount > 0)
- BaseAddr = Builder.CreateConstGEP1_32(BaseAddr, LaneLen * Factor);
+ BaseAddr = Builder.CreateConstGEP1_32(SubVecTy->getVectorElementType(),
+ BaseAddr, LaneLen * Factor);
SmallVector<Value *, 6> Ops;
Ops.push_back(Builder.CreateBitCast(BaseAddr, Int8Ptr));
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 7a9fc739fc13..1675ec59a354 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -1,9 +1,8 @@
//===- ARMISelLowering.h - ARM DAG Lowering Interface -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -77,6 +76,10 @@ class VectorType;
PIC_ADD, // Add with a PC operand and a PIC label.
+ ASRL, // MVE long arithmetic shift right.
+ LSRL, // MVE long shift right.
+ LSLL, // MVE long shift left.
+
CMP, // ARM compare instructions.
CMN, // ARM CMN instructions.
CMPZ, // ARM compare that sets only Z flag.
@@ -122,6 +125,8 @@ class VectorType;
WIN__CHKSTK, // Windows' __chkstk call to do stack probing.
WIN__DBZCHK, // Windows' divide by zero check
+ WLS, // Low-overhead loops, While Loop Start
+
VCEQ, // Vector compare equal.
VCEQZ, // Vector compare equal to zero.
VCGE, // Vector compare greater than or equal.
@@ -134,32 +139,36 @@ class VectorType;
VCGTU, // Vector compare unsigned greater than.
VTST, // Vector test bits.
+ // Vector shift by vector
+ VSHLs, // ...left/right by signed
+ VSHLu, // ...left/right by unsigned
+
// Vector shift by immediate:
- VSHL, // ...left
- VSHRs, // ...right (signed)
- VSHRu, // ...right (unsigned)
+ VSHLIMM, // ...left
+ VSHRsIMM, // ...right (signed)
+ VSHRuIMM, // ...right (unsigned)
// Vector rounding shift by immediate:
- VRSHRs, // ...right (signed)
- VRSHRu, // ...right (unsigned)
- VRSHRN, // ...right narrow
+ VRSHRsIMM, // ...right (signed)
+ VRSHRuIMM, // ...right (unsigned)
+ VRSHRNIMM, // ...right narrow
// Vector saturating shift by immediate:
- VQSHLs, // ...left (signed)
- VQSHLu, // ...left (unsigned)
- VQSHLsu, // ...left (signed to unsigned)
- VQSHRNs, // ...right narrow (signed)
- VQSHRNu, // ...right narrow (unsigned)
- VQSHRNsu, // ...right narrow (signed to unsigned)
+ VQSHLsIMM, // ...left (signed)
+ VQSHLuIMM, // ...left (unsigned)
+ VQSHLsuIMM, // ...left (signed to unsigned)
+ VQSHRNsIMM, // ...right narrow (signed)
+ VQSHRNuIMM, // ...right narrow (unsigned)
+ VQSHRNsuIMM, // ...right narrow (signed to unsigned)
// Vector saturating rounding shift by immediate:
- VQRSHRNs, // ...right narrow (signed)
- VQRSHRNu, // ...right narrow (unsigned)
- VQRSHRNsu, // ...right narrow (signed to unsigned)
+ VQRSHRNsIMM, // ...right narrow (signed)
+ VQRSHRNuIMM, // ...right narrow (unsigned)
+ VQRSHRNsuIMM, // ...right narrow (signed to unsigned)
// Vector shift and insert:
- VSLI, // ...left
- VSRI, // ...right
+ VSLIIMM, // ...left
+ VSRIIMM, // ...right
// Vector get lane (VMOV scalar to ARM core register)
// (These are used for 8- and 16-bit element types only.)
@@ -322,17 +331,21 @@ class VectorType;
/// is "fast" by reference in the second argument.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace,
unsigned Align,
+ MachineMemOperand::Flags Flags,
bool *Fast) const override;
EVT getOptimalMemOpType(uint64_t Size,
unsigned DstAlign, unsigned SrcAlign,
bool IsMemset, bool ZeroMemset,
bool MemcpyStrSrc,
- MachineFunction &MF) const override;
+ const AttributeList &FuncAttributes) const override;
bool isTruncateFree(Type *SrcTy, Type *DstTy) const override;
bool isTruncateFree(EVT SrcVT, EVT DstVT) const override;
bool isZExtFree(SDValue Val, EVT VT2) const override;
+ bool shouldSinkOperands(Instruction *I,
+ SmallVectorImpl<Use *> &Ops) const override;
+
bool isFNegFree(EVT VT) const override;
bool isVectorLoadExtDesirable(SDValue ExtVal) const override;
@@ -454,7 +467,8 @@ class VectorType;
/// getRegClassFor - Return the register class that should be used for the
/// specified value type.
- const TargetRegisterClass *getRegClassFor(MVT VT) const override;
+ const TargetRegisterClass *
+ getRegClassFor(MVT VT, bool isDivergent = false) const override;
/// Returns true if a cast between SrcAS and DestAS is a noop.
bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
@@ -479,7 +493,8 @@ class VectorType;
/// isFPImmLegal - Returns true if the target can instruction select the
/// specified FP immediate natively. If false, the legalizer will
/// materialize the FP immediate as a load from a constant pool.
- bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
+ bool isFPImmLegal(const APFloat &Imm, EVT VT,
+ bool ForCodeSize = false) const override;
bool getTgtMemIntrinsic(IntrinsicInfo &Info,
const CallInst &I,
@@ -544,6 +559,10 @@ class VectorType;
bool useLoadStackGuardNode() const override;
+ void insertSSPDeclarations(Module &M) const override;
+ Value *getSDagStackGuard(const Module &M) const override;
+ Function *getSSPStackGuardCheck(const Module &M) const override;
+
bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
unsigned &Cost) const override;
@@ -568,6 +587,8 @@ class VectorType;
return HasStandaloneRem;
}
+ bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override;
+
CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool isVarArg) const;
CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC, bool isVarArg) const;
@@ -593,8 +614,11 @@ class VectorType;
bool isDesirableToCommuteWithShift(const SDNode *N,
CombineLevel Level) const override;
- bool shouldFoldShiftPairToMask(const SDNode *N,
- CombineLevel Level) const override;
+ bool shouldFoldConstantShiftPairToMask(const SDNode *N,
+ CombineLevel Level) const override;
+
+ bool preferIncOfAddToSubOfNot(EVT VT) const override;
+
protected:
std::pair<const TargetRegisterClass *, uint8_t>
findRepresentativeClass(const TargetRegisterInfo *TRI,
@@ -680,6 +704,7 @@ class VectorType;
const ARMSubtarget *ST) const;
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *ST) const;
+ SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDivRem(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDIV_Windows(SDValue Op, SelectionDAG &DAG, bool Signed) const;
@@ -693,6 +718,8 @@ class VectorType;
SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
+ void lowerABS(SDNode *N, SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const;
unsigned getRegisterByName(const char* RegName, EVT VT,
SelectionDAG &DAG) const override;
@@ -755,15 +782,13 @@ class VectorType;
/// IsEligibleForTailCallOptimization - Check whether the call is eligible
/// for tail call optimization. Targets which want to do tail call
/// optimization should implement this function.
- bool IsEligibleForTailCallOptimization(SDValue Callee,
- CallingConv::ID CalleeCC,
- bool isVarArg,
- bool isCalleeStructRet,
- bool isCallerStructRet,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- SelectionDAG& DAG) const;
+ bool IsEligibleForTailCallOptimization(
+ SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
+ bool isCalleeStructRet, bool isCallerStructRet,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG,
+ const bool isIndirect) const;
bool CanLowerReturn(CallingConv::ID CallConv,
MachineFunction &MF, bool isVarArg,
@@ -781,6 +806,8 @@ class VectorType;
bool shouldConsiderGEPOffsetSplit() const override { return true; }
+ bool isUnsupportedFloatingType(EVT VT) const;
+
SDValue getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal, SDValue TrueVal,
SDValue ARMcc, SDValue CCR, SDValue Cmp,
SelectionDAG &DAG) const;
@@ -806,11 +833,15 @@ class VectorType;
MachineBasicBlock *MBB) const;
MachineBasicBlock *EmitLowered__dbzchk(MachineInstr &MI,
MachineBasicBlock *MBB) const;
+ void addMVEVectorTypes(bool HasMVEFP);
+ void addAllExtLoads(const MVT From, const MVT To, LegalizeAction Action);
+ void setAllExpand(MVT VT);
};
enum NEONModImmType {
VMOVModImm,
VMVNModImm,
+ MVEVMVNModImm,
OtherModImm
};
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index 0df48ba61299..bc93a058720c 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -1,9 +1,8 @@
//===-- ARMInstrFormats.td - ARM Instruction Formats -------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -110,6 +109,9 @@ def AddrModeT2_i8s4 : AddrMode<15>;
def AddrMode_i12 : AddrMode<16>;
def AddrMode5FP16 : AddrMode<17>;
def AddrModeT2_ldrex : AddrMode<18>;
+def AddrModeT2_i7s4 : AddrMode<19>;
+def AddrModeT2_i7s2 : AddrMode<20>;
+def AddrModeT2_i7 : AddrMode<21>;
// Load / store index mode.
class IndexMode<bits<2> val> {
@@ -121,14 +123,15 @@ def IndexModePost : IndexMode<2>;
def IndexModeUpd : IndexMode<3>;
// Instruction execution domain.
-class Domain<bits<3> val> {
- bits<3> Value = val;
+class Domain<bits<4> val> {
+ bits<4> Value = val;
}
def GenericDomain : Domain<0>;
def VFPDomain : Domain<1>; // Instructions in VFP domain only
def NeonDomain : Domain<2>; // Instructions in Neon domain only
def VFPNeonDomain : Domain<3>; // Instructions in both VFP & Neon domains
def VFPNeonA8Domain : Domain<5>; // Instructions in VFP & Neon under A8
+def MVEDomain : Domain<8>; // Instructions in MVE and ARMv8.1m
//===----------------------------------------------------------------------===//
// ARM special operands.
@@ -185,6 +188,86 @@ def s_cc_out : OptionalDefOperand<OtherVT, (ops CCR), (ops (i32 CPSR))> {
let DecoderMethod = "DecodeCCOutOperand";
}
+// VPT predicate
+
+def VPTPredNOperand : AsmOperandClass {
+ let Name = "VPTPredN";
+ let PredicateMethod = "isVPTPred";
+}
+def VPTPredROperand : AsmOperandClass {
+ let Name = "VPTPredR";
+ let PredicateMethod = "isVPTPred";
+}
+def undef_tied_input;
+
+// Operand classes for the cluster of MC operands describing a
+// VPT-predicated MVE instruction.
+//
+// There are two of these classes. Both of them have the same first
+// two options:
+//
+// $cond (an integer) indicates the instruction's predication status:
+// * ARMVCC::None means it's unpredicated
+// * ARMVCC::Then means it's in a VPT block and appears with the T suffix
+// * ARMVCC::Else means it's in a VPT block and appears with the E suffix.
+// During code generation, unpredicated and predicated instructions
+// are indicated by setting this parameter to 'None' or to 'Then'; the
+// third value 'Else' is only used for assembly and disassembly.
+//
+// $cond_reg (type VCCR) gives the input predicate register. This is
+// always either zero_reg or VPR, but needs to be modelled as an
+// explicit operand so that it can be register-allocated and spilled
+// when these operands are used in code generation).
+//
+// For 'vpred_r', there's an extra operand $inactive, which specifies
+// the vector register which will supply any lanes of the output
+// register that the predication mask prevents from being written by
+// this instruction. It's always tied to the actual output register
+// (i.e. must be allocated into the same physical reg), but again,
+// code generation will need to model it as a separate input value.
+//
+// 'vpred_n' doesn't have that extra operand: it only has $cond and
+// $cond_reg. This variant is used for any instruction that can't, or
+// doesn't want to, tie $inactive to the output register. Sometimes
+// that's because another input parameter is already tied to it (e.g.
+// instructions that both read and write their Qd register even when
+// unpredicated, either because they only partially overwrite it like
+// a narrowing integer conversion, or simply because the instruction
+// encoding doesn't have enough register fields to make the output
+// independent of all inputs). It can also be because the instruction
+// is defined to set disabled output lanes to zero rather than leaving
+// them unchanged (vector loads), or because it doesn't output a
+// vector register at all (stores, compares). In any of these
+// situations it's unnecessary to have an extra operand tied to the
+// output, and inconvenient to leave it there unused.
+
+// Base class for both kinds of vpred.
+class vpred_ops<dag extra_op, dag extra_mi> : OperandWithDefaultOps<OtherVT,
+ !con((ops (i32 0), (i32 zero_reg)), extra_op)> {
+ let PrintMethod = "printVPTPredicateOperand";
+ let OperandNamespace = "ARM";
+ let MIOperandInfo = !con((ops i32imm:$cond, VCCR:$cond_reg), extra_mi);
+
+ // For convenience, we provide a string value that can be appended
+ // to the constraints string. It's empty for vpred_n, and for
+ // vpred_r it ties the $inactive operand to the output q-register
+ // (which by convention will be called $Qd).
+ string vpred_constraint;
+}
+
+def vpred_r : vpred_ops<(ops (v4i32 undef_tied_input)), (ops MQPR:$inactive)> {
+ let ParserMatchClass = VPTPredROperand;
+ let OperandType = "OPERAND_VPRED_R";
+ let DecoderMethod = "DecodeVpredROperand";
+ let vpred_constraint = ",$Qd = $vp.inactive";
+}
+
+def vpred_n : vpred_ops<(ops), (ops)> {
+ let ParserMatchClass = VPTPredNOperand;
+ let OperandType = "OPERAND_VPRED_N";
+ let vpred_constraint = "";
+}
+
// ARM special operands for disassembly only.
//
def SetEndAsmOperand : ImmAsmOperand<0,1> {
@@ -285,6 +368,8 @@ class VFP3InstAlias<string Asm, dag Result, bit EmitPriority = 0>
: InstAlias<Asm, Result, EmitPriority>, Requires<[HasVFP3]>;
class NEONInstAlias<string Asm, dag Result, bit EmitPriority = 0>
: InstAlias<Asm, Result, EmitPriority>, Requires<[HasNEON]>;
+class MVEInstAlias<string Asm, dag Result, bit EmitPriority = 1>
+ : InstAlias<Asm, Result, EmitPriority>, Requires<[HasMVEInt, IsThumb]>;
class VFP2MnemonicAlias<string src, string dst> : MnemonicAlias<src, dst>,
@@ -325,8 +410,8 @@ class InstTemplate<AddrMode am, int sz, IndexMode im,
let TSFlags{12-7} = Form;
let TSFlags{13} = isUnaryDataProc;
let TSFlags{14} = canXformTo16Bit;
- let TSFlags{17-15} = D.Value;
- let TSFlags{18} = thumbArithFlagSetting;
+ let TSFlags{18-15} = D.Value;
+ let TSFlags{19} = thumbArithFlagSetting;
let Constraints = cstr;
let Itinerary = itin;
@@ -382,6 +467,8 @@ class VFP2AsmPseudo<string asm, dag iops, dag oops = (outs)>
: AsmPseudoInst<asm, iops, oops>, Requires<[HasVFP2]>;
class NEONAsmPseudo<string asm, dag iops, dag oops = (outs)>
: AsmPseudoInst<asm, iops, oops>, Requires<[HasNEON]>;
+class MVEAsmPseudo<string asm, dag iops, dag oops = (outs)>
+ : AsmPseudoInst<asm, iops, oops>, Requires<[HasMVEInt]>;
// Pseudo instructions for the code generator.
class PseudoInst<dag oops, dag iops, InstrItinClass itin, list<dag> pattern>
@@ -1556,6 +1643,8 @@ class AHI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops,
// Loads & stores operate on both NEON and VFP pipelines.
let D = VFPNeonDomain;
+
+ let isUnpredicable = 1; // FP16 instructions cannot in general be conditional
}
// VFP Load / store multiple pseudo instructions.
@@ -1903,6 +1992,8 @@ class AHuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
let Inst{11-8} = 0b1001; // Half precision
let Inst{7-6} = opcod4;
let Inst{4} = opcod5;
+
+ let isUnpredicable = 1; // FP16 instructions cannot in general be conditional
}
// Half precision, unary, non-predicated
@@ -1931,6 +2022,8 @@ class AHuInp<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
let Inst{11-8} = 0b1001; // Half precision
let Inst{7-6} = opcod4;
let Inst{4} = opcod5;
+
+ let isUnpredicable = 1; // FP16 instructions cannot in general be conditional
}
// Half precision, binary
@@ -1957,6 +2050,8 @@ class AHbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, dag iops,
let Inst{11-8} = 0b1001; // Half precision
let Inst{6} = op6;
let Inst{4} = op4;
+
+ let isUnpredicable = 1; // FP16 instructions cannot in general be conditional
}
// Half precision, binary, not predicated
@@ -1986,6 +2081,8 @@ class AHbInp<bits<5> opcod1, bits<2> opcod2, bit opcod3, dag oops, dag iops,
let Inst{11-8} = 0b1001; // Half precision
let Inst{6} = opcod3;
let Inst{4} = 0;
+
+ let isUnpredicable = 1; // FP16 instructions cannot in general be conditional
}
// VFP conversion instructions
@@ -2494,7 +2591,7 @@ class NEONFPPat<dag pattern, dag result> : Pat<pattern, result> {
// VFP/NEON Instruction aliases for type suffices.
// Note: When EmitPriority == 1, the alias will be used for printing
class VFPDataTypeInstAlias<string opc, string dt, string asm, dag Result, bit EmitPriority = 0> :
- InstAlias<!strconcat(opc, dt, "\t", asm), Result, EmitPriority>, Requires<[HasVFP2]>;
+ InstAlias<!strconcat(opc, dt, "\t", asm), Result, EmitPriority>, Requires<[HasFPRegs]>;
// Note: When EmitPriority == 1, the alias will be used for printing
multiclass VFPDTAnyInstAlias<string opc, string asm, dag Result, bit EmitPriority = 0> {
diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp
index bcc31f5fa4cc..388c889349b7 100644
--- a/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/lib/Target/ARM/ARMInstrInfo.cpp
@@ -1,9 +1,8 @@
//===-- ARMInstrInfo.cpp - ARM Instruction Information --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -95,7 +94,7 @@ void ARMInstrInfo::expandLoadStackGuard(MachineBasicBlock::iterator MI) const {
const ARMSubtarget &Subtarget = MF.getSubtarget<ARMSubtarget>();
const TargetMachine &TM = MF.getTarget();
- if (!Subtarget.useMovt(MF)) {
+ if (!Subtarget.useMovt()) {
if (TM.isPositionIndependent())
expandLoadStackGuardBase(MI, ARM::LDRLIT_ga_pcrel, ARM::LDRi12);
else
diff --git a/lib/Target/ARM/ARMInstrInfo.h b/lib/Target/ARM/ARMInstrInfo.h
index c87fb97448c9..042b53f0f8c3 100644
--- a/lib/Target/ARM/ARMInstrInfo.h
+++ b/lib/Target/ARM/ARMInstrInfo.h
@@ -1,9 +1,8 @@
//===-- ARMInstrInfo.h - ARM Instruction Information ------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 13abdc9687ec..e35145463852 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -1,9 +1,8 @@
//===- ARMInstrInfo.td - Target Description for ARM Target -*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -100,6 +99,18 @@ def SDT_LongMac : SDTypeProfile<2, 4, [SDTCisVT<0, i32>,
SDTCisSameAs<0, 4>,
SDTCisSameAs<0, 5>]>;
+// ARMlsll, ARMlsrl, ARMasrl
+def SDT_ARMIntShiftParts : SDTypeProfile<2, 3, [SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>,
+ SDTCisInt<0>,
+ SDTCisInt<4>]>;
+
+// TODO Add another operand for 'Size' so that we can re-use this node when we
+// start supporting *TP versions.
+def SDT_ARMWhileLoop : SDTypeProfile<0, 2, [SDTCisVT<0, i32>,
+ SDTCisVT<1, OtherVT>]>;
+
def ARMSmlald : SDNode<"ARMISD::SMLALD", SDT_LongMac>;
def ARMSmlaldx : SDNode<"ARMISD::SMLALDX", SDT_LongMac>;
def ARMSmlsld : SDNode<"ARMISD::SMLSLD", SDT_LongMac>;
@@ -172,6 +183,10 @@ def ARMcmpZ : SDNode<"ARMISD::CMPZ", SDT_ARMCmp,
def ARMpic_add : SDNode<"ARMISD::PIC_ADD", SDT_ARMPICAdd>;
+def ARMasrl : SDNode<"ARMISD::ASRL", SDT_ARMIntShiftParts, []>;
+def ARMlsrl : SDNode<"ARMISD::LSRL", SDT_ARMIntShiftParts, []>;
+def ARMlsll : SDNode<"ARMISD::LSLL", SDT_ARMIntShiftParts, []>;
+
def ARMsrl_flag : SDNode<"ARMISD::SRL_FLAG", SDTIntUnaryOp, [SDNPOutGlue]>;
def ARMsra_flag : SDNode<"ARMISD::SRA_FLAG", SDTIntUnaryOp, [SDNPOutGlue]>;
def ARMrrx : SDNode<"ARMISD::RRX" , SDTIntUnaryOp, [SDNPInGlue ]>;
@@ -214,189 +229,44 @@ def ARMsmlalbt : SDNode<"ARMISD::SMLALBT", SDT_LongMac, []>;
def ARMsmlaltb : SDNode<"ARMISD::SMLALTB", SDT_LongMac, []>;
def ARMsmlaltt : SDNode<"ARMISD::SMLALTT", SDT_LongMac, []>;
-//===----------------------------------------------------------------------===//
-// ARM Instruction Predicate Definitions.
-//
-def HasV4T : Predicate<"Subtarget->hasV4TOps()">,
- AssemblerPredicate<"HasV4TOps", "armv4t">;
-def NoV4T : Predicate<"!Subtarget->hasV4TOps()">;
-def HasV5T : Predicate<"Subtarget->hasV5TOps()">,
- AssemblerPredicate<"HasV5TOps", "armv5t">;
-def NoV5T : Predicate<"!Subtarget->hasV5TOps()">;
-def HasV5TE : Predicate<"Subtarget->hasV5TEOps()">,
- AssemblerPredicate<"HasV5TEOps", "armv5te">;
-def HasV6 : Predicate<"Subtarget->hasV6Ops()">,
- AssemblerPredicate<"HasV6Ops", "armv6">;
-def NoV6 : Predicate<"!Subtarget->hasV6Ops()">;
-def HasV6M : Predicate<"Subtarget->hasV6MOps()">,
- AssemblerPredicate<"HasV6MOps",
- "armv6m or armv6t2">;
-def HasV8MBaseline : Predicate<"Subtarget->hasV8MBaselineOps()">,
- AssemblerPredicate<"HasV8MBaselineOps",
- "armv8m.base">;
-def HasV8MMainline : Predicate<"Subtarget->hasV8MMainlineOps()">,
- AssemblerPredicate<"HasV8MMainlineOps",
- "armv8m.main">;
-def HasV6T2 : Predicate<"Subtarget->hasV6T2Ops()">,
- AssemblerPredicate<"HasV6T2Ops", "armv6t2">;
-def NoV6T2 : Predicate<"!Subtarget->hasV6T2Ops()">;
-def HasV6K : Predicate<"Subtarget->hasV6KOps()">,
- AssemblerPredicate<"HasV6KOps", "armv6k">;
-def NoV6K : Predicate<"!Subtarget->hasV6KOps()">;
-def HasV7 : Predicate<"Subtarget->hasV7Ops()">,
- AssemblerPredicate<"HasV7Ops", "armv7">;
-def HasV8 : Predicate<"Subtarget->hasV8Ops()">,
- AssemblerPredicate<"HasV8Ops", "armv8">;
-def PreV8 : Predicate<"!Subtarget->hasV8Ops()">,
- AssemblerPredicate<"!HasV8Ops", "armv7 or earlier">;
-def HasV8_1a : Predicate<"Subtarget->hasV8_1aOps()">,
- AssemblerPredicate<"HasV8_1aOps", "armv8.1a">;
-def HasV8_2a : Predicate<"Subtarget->hasV8_2aOps()">,
- AssemblerPredicate<"HasV8_2aOps", "armv8.2a">;
-def HasV8_3a : Predicate<"Subtarget->hasV8_3aOps()">,
- AssemblerPredicate<"HasV8_3aOps", "armv8.3a">;
-def HasV8_4a : Predicate<"Subtarget->hasV8_4aOps()">,
- AssemblerPredicate<"HasV8_4aOps", "armv8.4a">;
-def HasV8_5a : Predicate<"Subtarget->hasV8_5aOps()">,
- AssemblerPredicate<"HasV8_5aOps", "armv8.5a">;
-def NoVFP : Predicate<"!Subtarget->hasVFP2()">;
-def HasVFP2 : Predicate<"Subtarget->hasVFP2()">,
- AssemblerPredicate<"FeatureVFP2", "VFP2">;
-def HasVFP3 : Predicate<"Subtarget->hasVFP3()">,
- AssemblerPredicate<"FeatureVFP3", "VFP3">;
-def HasVFP4 : Predicate<"Subtarget->hasVFP4()">,
- AssemblerPredicate<"FeatureVFP4", "VFP4">;
-def HasDPVFP : Predicate<"!Subtarget->isFPOnlySP()">,
- AssemblerPredicate<"!FeatureVFPOnlySP",
- "double precision VFP">;
-def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">,
- AssemblerPredicate<"FeatureFPARMv8", "FPARMv8">;
-def HasNEON : Predicate<"Subtarget->hasNEON()">,
- AssemblerPredicate<"FeatureNEON", "NEON">;
-def HasSHA2 : Predicate<"Subtarget->hasSHA2()">,
- AssemblerPredicate<"FeatureSHA2", "sha2">;
-def HasAES : Predicate<"Subtarget->hasAES()">,
- AssemblerPredicate<"FeatureAES", "aes">;
-def HasCrypto : Predicate<"Subtarget->hasCrypto()">,
- AssemblerPredicate<"FeatureCrypto", "crypto">;
-def HasDotProd : Predicate<"Subtarget->hasDotProd()">,
- AssemblerPredicate<"FeatureDotProd", "dotprod">;
-def HasCRC : Predicate<"Subtarget->hasCRC()">,
- AssemblerPredicate<"FeatureCRC", "crc">;
-def HasRAS : Predicate<"Subtarget->hasRAS()">,
- AssemblerPredicate<"FeatureRAS", "ras">;
-def HasFP16 : Predicate<"Subtarget->hasFP16()">,
- AssemblerPredicate<"FeatureFP16","half-float conversions">;
-def HasFullFP16 : Predicate<"Subtarget->hasFullFP16()">,
- AssemblerPredicate<"FeatureFullFP16","full half-float">;
-def HasFP16FML : Predicate<"Subtarget->hasFP16FML()">,
- AssemblerPredicate<"FeatureFP16FML","full half-float fml">;
-def HasDivideInThumb : Predicate<"Subtarget->hasDivideInThumbMode()">,
- AssemblerPredicate<"FeatureHWDivThumb", "divide in THUMB">;
-def HasDivideInARM : Predicate<"Subtarget->hasDivideInARMMode()">,
- AssemblerPredicate<"FeatureHWDivARM", "divide in ARM">;
-def HasDSP : Predicate<"Subtarget->hasDSP()">,
- AssemblerPredicate<"FeatureDSP", "dsp">;
-def HasDB : Predicate<"Subtarget->hasDataBarrier()">,
- AssemblerPredicate<"FeatureDB",
- "data-barriers">;
-def HasDFB : Predicate<"Subtarget->hasFullDataBarrier()">,
- AssemblerPredicate<"FeatureDFB",
- "full-data-barrier">;
-def HasV7Clrex : Predicate<"Subtarget->hasV7Clrex()">,
- AssemblerPredicate<"FeatureV7Clrex",
- "v7 clrex">;
-def HasAcquireRelease : Predicate<"Subtarget->hasAcquireRelease()">,
- AssemblerPredicate<"FeatureAcquireRelease",
- "acquire/release">;
-def HasMP : Predicate<"Subtarget->hasMPExtension()">,
- AssemblerPredicate<"FeatureMP",
- "mp-extensions">;
-def HasVirtualization: Predicate<"false">,
- AssemblerPredicate<"FeatureVirtualization",
- "virtualization-extensions">;
-def HasTrustZone : Predicate<"Subtarget->hasTrustZone()">,
- AssemblerPredicate<"FeatureTrustZone",
- "TrustZone">;
-def Has8MSecExt : Predicate<"Subtarget->has8MSecExt()">,
- AssemblerPredicate<"Feature8MSecExt",
- "ARMv8-M Security Extensions">;
-def HasZCZ : Predicate<"Subtarget->hasZeroCycleZeroing()">;
-def UseNEONForFP : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">;
-def DontUseNEONForFP : Predicate<"!Subtarget->useNEONForSinglePrecisionFP()">;
-def IsThumb : Predicate<"Subtarget->isThumb()">,
- AssemblerPredicate<"ModeThumb", "thumb">;
-def IsThumb1Only : Predicate<"Subtarget->isThumb1Only()">;
-def IsThumb2 : Predicate<"Subtarget->isThumb2()">,
- AssemblerPredicate<"ModeThumb,FeatureThumb2",
- "thumb2">;
-def IsMClass : Predicate<"Subtarget->isMClass()">,
- AssemblerPredicate<"FeatureMClass", "armv*m">;
-def IsNotMClass : Predicate<"!Subtarget->isMClass()">,
- AssemblerPredicate<"!FeatureMClass",
- "!armv*m">;
-def IsARM : Predicate<"!Subtarget->isThumb()">,
- AssemblerPredicate<"!ModeThumb", "arm-mode">;
-def IsMachO : Predicate<"Subtarget->isTargetMachO()">;
-def IsNotMachO : Predicate<"!Subtarget->isTargetMachO()">;
-def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">;
-def IsWindows : Predicate<"Subtarget->isTargetWindows()">;
-def IsNotWindows : Predicate<"!Subtarget->isTargetWindows()">;
-def IsReadTPHard : Predicate<"Subtarget->isReadTPHard()">;
-def IsReadTPSoft : Predicate<"!Subtarget->isReadTPHard()">;
-def UseNaClTrap : Predicate<"Subtarget->useNaClTrap()">,
- AssemblerPredicate<"FeatureNaClTrap", "NaCl">;
-def DontUseNaClTrap : Predicate<"!Subtarget->useNaClTrap()">;
-
-def UseNegativeImmediates :
- Predicate<"false">,
- AssemblerPredicate<"!FeatureNoNegativeImmediates",
- "NegativeImmediates">;
-
-// FIXME: Eventually this will be just "hasV6T2Ops".
-let RecomputePerFunction = 1 in {
- def UseMovt : Predicate<"Subtarget->useMovt(*MF)">;
- def DontUseMovt : Predicate<"!Subtarget->useMovt(*MF)">;
- def UseMovtInPic : Predicate<"Subtarget->useMovt(*MF) && Subtarget->allowPositionIndependentMovt()">;
- def DontUseMovtInPic : Predicate<"!Subtarget->useMovt(*MF) || !Subtarget->allowPositionIndependentMovt()">;
-
- def UseFPVMLx: Predicate<"((Subtarget->useFPVMLx() &&"
- " TM.Options.AllowFPOpFusion != FPOpFusion::Fast) ||"
- "MF->getFunction().optForMinSize())">;
-}
-def UseMulOps : Predicate<"Subtarget->useMulOps()">;
-
-// Prefer fused MAC for fp mul + add over fp VMLA / VMLS if they are available.
-// But only select them if more precision in FP computation is allowed, and when
-// they are not slower than a mul + add sequence.
-// Do not use them for Darwin platforms.
-def UseFusedMAC : Predicate<"(TM.Options.AllowFPOpFusion =="
- " FPOpFusion::Fast && "
- " Subtarget->hasVFP4()) && "
- "!Subtarget->isTargetDarwin() &&"
- "Subtarget->useFPVMLx()">;
-
-def HasFastVGETLNi32 : Predicate<"!Subtarget->hasSlowVGETLNi32()">;
-def HasSlowVGETLNi32 : Predicate<"Subtarget->hasSlowVGETLNi32()">;
-
-def HasFastVDUP32 : Predicate<"!Subtarget->hasSlowVDUP32()">;
-def HasSlowVDUP32 : Predicate<"Subtarget->hasSlowVDUP32()">;
-
-def UseVMOVSR : Predicate<"Subtarget->preferVMOVSR() ||"
- "!Subtarget->useNEONForSinglePrecisionFP()">;
-def DontUseVMOVSR : Predicate<"!Subtarget->preferVMOVSR() &&"
- "Subtarget->useNEONForSinglePrecisionFP()">;
-
-let RecomputePerFunction = 1 in {
- def IsLE : Predicate<"MF->getDataLayout().isLittleEndian()">;
- def IsBE : Predicate<"MF->getDataLayout().isBigEndian()">;
-}
-
-def GenExecuteOnly : Predicate<"Subtarget->genExecuteOnly()">;
-
-// Armv8.5-A extensions
-def HasSB : Predicate<"Subtarget->hasSB()">,
- AssemblerPredicate<"FeatureSB", "sb">;
+// Vector operations shared between NEON and MVE
+
+def ARMvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>;
+
+// VDUPLANE can produce a quad-register result from a double-register source,
+// so the result is not constrained to match the source.
+def ARMvduplane : SDNode<"ARMISD::VDUPLANE",
+ SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTCisVT<2, i32>]>>;
+
+def SDTARMVSHUF : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
+def ARMvrev64 : SDNode<"ARMISD::VREV64", SDTARMVSHUF>;
+def ARMvrev32 : SDNode<"ARMISD::VREV32", SDTARMVSHUF>;
+def ARMvrev16 : SDNode<"ARMISD::VREV16", SDTARMVSHUF>;
+
+def SDTARMVGETLN : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisInt<1>,
+ SDTCisVT<2, i32>]>;
+def ARMvgetlaneu : SDNode<"ARMISD::VGETLANEu", SDTARMVGETLN>;
+def ARMvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>;
+
+def SDTARMVMOVIMM : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>;
+def ARMvmovImm : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>;
+def ARMvmvnImm : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>;
+def ARMvmovFPImm : SDNode<"ARMISD::VMOVFPIMM", SDTARMVMOVIMM>;
+
+
+def SDTARMVSHIMM : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
+ SDTCisVT<2, i32>]>;
+def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>,]>;
+def ARMvshlImm : SDNode<"ARMISD::VSHLIMM", SDTARMVSHIMM>;
+def ARMvshrsImm : SDNode<"ARMISD::VSHRsIMM", SDTARMVSHIMM>;
+def ARMvshruImm : SDNode<"ARMISD::VSHRuIMM", SDTARMVSHIMM>;
+def ARMvshls : SDNode<"ARMISD::VSHLs", SDTARMVSH>;
+def ARMvshlu : SDNode<"ARMISD::VSHLu", SDTARMVSH>;
+
+def ARMWLS : SDNode<"ARMISD::WLS", SDT_ARMWhileLoop,
+ [SDNPHasChain]>;
//===----------------------------------------------------------------------===//
// ARM Flag Definitions.
@@ -552,6 +422,16 @@ def reglist : Operand<i32> {
let DecoderMethod = "DecodeRegListOperand";
}
+// A list of general purpose registers and APSR separated by comma.
+// Used by CLRM
+def RegListWithAPSRAsmOperand : AsmOperandClass { let Name = "RegListWithAPSR"; }
+def reglist_with_apsr : Operand<i32> {
+ let EncoderMethod = "getRegisterListOpValue";
+ let ParserMatchClass = RegListWithAPSRAsmOperand;
+ let PrintMethod = "printRegisterList";
+ let DecoderMethod = "DecodeRegListOperand";
+}
+
def GPRPairOp : RegisterOperand<GPRPair, "printGPRPairOperand">;
def DPRRegListAsmOperand : AsmOperandClass {
@@ -576,6 +456,21 @@ def spr_reglist : Operand<i32> {
let DecoderMethod = "DecodeSPRRegListOperand";
}
+def FPSRegListWithVPRAsmOperand : AsmOperandClass { let Name =
+ "FPSRegListWithVPR"; }
+def fp_sreglist_with_vpr : Operand<i32> {
+ let EncoderMethod = "getRegisterListOpValue";
+ let ParserMatchClass = FPSRegListWithVPRAsmOperand;
+ let PrintMethod = "printRegisterList";
+}
+def FPDRegListWithVPRAsmOperand : AsmOperandClass { let Name =
+ "FPDRegListWithVPR"; }
+def fp_dreglist_with_vpr : Operand<i32> {
+ let EncoderMethod = "getRegisterListOpValue";
+ let ParserMatchClass = FPDRegListWithVPRAsmOperand;
+ let PrintMethod = "printRegisterList";
+}
+
// An operand for the CONSTPOOL_ENTRY pseudo-instruction.
def cpinst_operand : Operand<i32> {
let PrintMethod = "printCPInstOperand";
@@ -621,6 +516,55 @@ def rot_imm : Operand<i32>, PatLeaf<(i32 imm), [{
let ParserMatchClass = RotImmAsmOperand;
}
+// Power-of-two operand for MVE VIDUP and friends, which encode
+// {1,2,4,8} as its log to base 2, i.e. as {0,1,2,3} respectively
+def MVE_VIDUP_imm_asmoperand : AsmOperandClass {
+ let Name = "VIDUP_imm";
+ let PredicateMethod = "isPowerTwoInRange<1,8>";
+ let RenderMethod = "addPowerTwoOperands";
+ let DiagnosticString = "vector increment immediate must be 1, 2, 4 or 8";
+}
+def MVE_VIDUP_imm : Operand<i32> {
+ let EncoderMethod = "getPowerTwoOpValue";
+ let DecoderMethod = "DecodePowerTwoOperand<0,3>";
+ let ParserMatchClass = MVE_VIDUP_imm_asmoperand;
+}
+
+// Pair vector indexing
+class MVEPairVectorIndexOperand<string start, string end> : AsmOperandClass {
+ let Name = "MVEPairVectorIndex"#start;
+ let RenderMethod = "addMVEPairVectorIndexOperands";
+ let PredicateMethod = "isMVEPairVectorIndex<"#start#", "#end#">";
+}
+
+class MVEPairVectorIndex<string opval> : Operand<i32> {
+ let PrintMethod = "printVectorIndex";
+ let EncoderMethod = "getMVEPairVectorIndexOpValue<"#opval#">";
+ let DecoderMethod = "DecodeMVEPairVectorIndexOperand<"#opval#">";
+ let MIOperandInfo = (ops i32imm);
+}
+
+def MVEPairVectorIndex0 : MVEPairVectorIndex<"0"> {
+ let ParserMatchClass = MVEPairVectorIndexOperand<"0", "1">;
+}
+
+def MVEPairVectorIndex2 : MVEPairVectorIndex<"2"> {
+ let ParserMatchClass = MVEPairVectorIndexOperand<"2", "3">;
+}
+
+// Vector indexing
+class MVEVectorIndexOperand<int NumLanes> : AsmOperandClass {
+ let Name = "MVEVectorIndex"#NumLanes;
+ let RenderMethod = "addMVEVectorIndexOperands";
+ let PredicateMethod = "isVectorIndexInRange<"#NumLanes#">";
+}
+
+class MVEVectorIndex<int NumLanes> : Operand<i32> {
+ let PrintMethod = "printVectorIndex";
+ let ParserMatchClass = MVEVectorIndexOperand<NumLanes>;
+ let MIOperandInfo = (ops i32imm);
+}
+
// shift_imm: An integer that encodes a shift amount and the type of shift
// (asr or lsl). The 6-bit immediate encodes as:
// {5} 0 ==> lsl
@@ -718,24 +662,11 @@ def mod_imm_neg : Operand<i32>, PatLeaf<(imm), [{
}
/// arm_i32imm - True for +V6T2, or when isSOImmTwoParVal()
-def arm_i32imm : PatLeaf<(imm), [{
- if (Subtarget->useMovt(*MF))
+def arm_i32imm : IntImmLeaf<i32, [{
+ if (Subtarget->useMovt())
return true;
- return ARM_AM::isSOImmTwoPartVal((unsigned)N->getZExtValue());
-}]> {
- // Ideally this would be an IntImmLeaf, but then we wouldn't have access to
- // the MachineFunction.
- let GISelPredicateCode = [{
- const auto &MF = *MI.getParent()->getParent();
- if (STI.useMovt(MF))
- return true;
-
- const auto &MO = MI.getOperand(1);
- if (!MO.isCImm())
- return false;
- return ARM_AM::isSOImmTwoPartVal(MO.getCImm()->getZExtValue());
- }];
-}
+ return ARM_AM::isSOImmTwoPartVal(Imm.getZExtValue());
+}]>;
/// imm0_1 predicate - Immediate in the range [0,1].
def Imm0_1AsmOperand: ImmAsmOperand<0,1> { let Name = "Imm0_1"; }
@@ -952,6 +883,32 @@ def imm1_16 : Operand<i32>, ImmLeaf<i32, [{
let ParserMatchClass = Imm1_16AsmOperand;
}
+def MVEShiftImm1_7AsmOperand: ImmAsmOperand<1,7> {
+ let Name = "MVEShiftImm1_7";
+ // Reason we're doing this is because instruction vshll.s8 t1 encoding
+ // accepts 1,7 but the t2 encoding accepts 8. By doing this we can get a
+ // better diagnostic message if someone uses bigger immediate than the t1/t2
+ // encodings allow.
+ let DiagnosticString = "operand must be an immediate in the range [1,8]";
+}
+def mve_shift_imm1_7 : Operand<i32> {
+ let ParserMatchClass = MVEShiftImm1_7AsmOperand;
+ let EncoderMethod = "getMVEShiftImmOpValue";
+}
+
+def MVEShiftImm1_15AsmOperand: ImmAsmOperand<1,15> {
+ let Name = "MVEShiftImm1_15";
+ // Reason we're doing this is because instruction vshll.s16 t1 encoding
+ // accepts 1,15 but the t2 encoding accepts 16. By doing this we can get a
+ // better diagnostic message if someone uses bigger immediate than the t1/t2
+ // encodings allow.
+ let DiagnosticString = "operand must be an immediate in the range [1,16]";
+}
+def mve_shift_imm1_15 : Operand<i32> {
+ let ParserMatchClass = MVEShiftImm1_15AsmOperand;
+ let EncoderMethod = "getMVEShiftImmOpValue";
+}
+
// Define ARM specific addressing modes.
// addrmode_imm12 := reg +/- imm12
//
@@ -1332,6 +1289,15 @@ def addr_offset_none : MemOperand,
let MIOperandInfo = (ops GPR:$base);
}
+// t_addr_offset_none := reg [r0-r7]
+def MemNoOffsetTAsmOperand : AsmOperandClass { let Name = "MemNoOffsetT"; }
+def t_addr_offset_none : MemOperand {
+ let PrintMethod = "printAddrMode7Operand";
+ let DecoderMethod = "DecodetGPRRegisterClass";
+ let ParserMatchClass = MemNoOffsetTAsmOperand;
+ let MIOperandInfo = (ops tGPR:$base);
+}
+
def nohash_imm : Operand<i32> {
let PrintMethod = "printNoHashImmediate";
}
@@ -5932,6 +5898,12 @@ include "ARMInstrVFP.td"
include "ARMInstrNEON.td"
//===----------------------------------------------------------------------===//
+// MVE Support
+//
+
+include "ARMInstrMVE.td"
+
+//===----------------------------------------------------------------------===//
// Assembler aliases
//
diff --git a/lib/Target/ARM/ARMInstrMVE.td b/lib/Target/ARM/ARMInstrMVE.td
new file mode 100644
index 000000000000..3e7ae55c7fc8
--- /dev/null
+++ b/lib/Target/ARM/ARMInstrMVE.td
@@ -0,0 +1,4591 @@
+//===-- ARMInstrMVE.td - MVE support for ARM ---------------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the ARM MVE instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+class ExpandImmAsmOp<string shift> : AsmOperandClass {
+ let Name = !strconcat("ExpandImm", shift);
+ let PredicateMethod = !strconcat("isExpImm<", shift, ">");
+ let RenderMethod = "addImmOperands";
+}
+class InvertedExpandImmAsmOp<string shift, string size> : AsmOperandClass {
+ let Name = !strconcat("InvertedExpandImm", shift, "_", size);
+ let PredicateMethod = !strconcat("isInvertedExpImm<", shift, ",", size, ">");
+ let RenderMethod = "addImmOperands";
+}
+
+class ExpandImm<string shift> : Operand<i32> {
+ let ParserMatchClass = ExpandImmAsmOp<shift>;
+ let EncoderMethod = !strconcat("getExpandedImmOpValue<",shift,",false>");
+ let DecoderMethod = !strconcat("DecodeExpandedImmOperand<",shift,">");
+ let PrintMethod = "printExpandedImmOperand";
+}
+class InvertedExpandImm<string shift, string size> : Operand<i32> {
+ let ParserMatchClass = InvertedExpandImmAsmOp<shift, size>;
+ let EncoderMethod = !strconcat("getExpandedImmOpValue<",shift,",true>");
+ let PrintMethod = "printExpandedImmOperand";
+ // No decoder method needed, because this operand type is only used
+ // by aliases (VAND and VORN)
+}
+
+def expzero00 : ExpandImm<"0">;
+def expzero08 : ExpandImm<"8">;
+def expzero16 : ExpandImm<"16">;
+def expzero24 : ExpandImm<"24">;
+
+def expzero00inv16 : InvertedExpandImm<"0", "16">;
+def expzero08inv16 : InvertedExpandImm<"8", "16">;
+
+def expzero00inv32 : InvertedExpandImm<"0", "32">;
+def expzero08inv32 : InvertedExpandImm<"8", "32">;
+def expzero16inv32 : InvertedExpandImm<"16", "32">;
+def expzero24inv32 : InvertedExpandImm<"24", "32">;
+
+// VPT condition mask
+def vpt_mask : Operand<i32> {
+ let PrintMethod = "printVPTMask";
+ let ParserMatchClass = it_mask_asmoperand;
+ let EncoderMethod = "getVPTMaskOpValue";
+ let DecoderMethod = "DecodeVPTMaskOperand";
+}
+
+// VPT/VCMP restricted predicate for sign invariant types
+def pred_restricted_i_asmoperand : AsmOperandClass {
+ let Name = "CondCodeRestrictedI";
+ let RenderMethod = "addITCondCodeOperands";
+ let PredicateMethod = "isITCondCodeRestrictedI";
+ let ParserMethod = "parseITCondCode";
+ let DiagnosticString = "condition code for sign-independent integer "#
+ "comparison must be EQ or NE";
+}
+
+// VPT/VCMP restricted predicate for signed types
+def pred_restricted_s_asmoperand : AsmOperandClass {
+ let Name = "CondCodeRestrictedS";
+ let RenderMethod = "addITCondCodeOperands";
+ let PredicateMethod = "isITCondCodeRestrictedS";
+ let ParserMethod = "parseITCondCode";
+ let DiagnosticString = "condition code for signed integer "#
+ "comparison must be EQ, NE, LT, GT, LE or GE";
+}
+
+// VPT/VCMP restricted predicate for unsigned types
+def pred_restricted_u_asmoperand : AsmOperandClass {
+ let Name = "CondCodeRestrictedU";
+ let RenderMethod = "addITCondCodeOperands";
+ let PredicateMethod = "isITCondCodeRestrictedU";
+ let ParserMethod = "parseITCondCode";
+ let DiagnosticString = "condition code for unsigned integer "#
+ "comparison must be EQ, NE, HS or HI";
+}
+
+// VPT/VCMP restricted predicate for floating point
+def pred_restricted_fp_asmoperand : AsmOperandClass {
+ let Name = "CondCodeRestrictedFP";
+ let RenderMethod = "addITCondCodeOperands";
+ let PredicateMethod = "isITCondCodeRestrictedFP";
+ let ParserMethod = "parseITCondCode";
+ let DiagnosticString = "condition code for floating-point "#
+ "comparison must be EQ, NE, LT, GT, LE or GE";
+}
+
+class VCMPPredicateOperand : Operand<i32>;
+
+def pred_basic_i : VCMPPredicateOperand {
+ let PrintMethod = "printMandatoryRestrictedPredicateOperand";
+ let ParserMatchClass = pred_restricted_i_asmoperand;
+ let DecoderMethod = "DecodeRestrictedIPredicateOperand";
+ let EncoderMethod = "getRestrictedCondCodeOpValue";
+}
+
+def pred_basic_u : VCMPPredicateOperand {
+ let PrintMethod = "printMandatoryRestrictedPredicateOperand";
+ let ParserMatchClass = pred_restricted_u_asmoperand;
+ let DecoderMethod = "DecodeRestrictedUPredicateOperand";
+ let EncoderMethod = "getRestrictedCondCodeOpValue";
+}
+
+def pred_basic_s : VCMPPredicateOperand {
+ let PrintMethod = "printMandatoryRestrictedPredicateOperand";
+ let ParserMatchClass = pred_restricted_s_asmoperand;
+ let DecoderMethod = "DecodeRestrictedSPredicateOperand";
+ let EncoderMethod = "getRestrictedCondCodeOpValue";
+}
+
+def pred_basic_fp : VCMPPredicateOperand {
+ let PrintMethod = "printMandatoryRestrictedPredicateOperand";
+ let ParserMatchClass = pred_restricted_fp_asmoperand;
+ let DecoderMethod = "DecodeRestrictedFPPredicateOperand";
+ let EncoderMethod = "getRestrictedCondCodeOpValue";
+}
+
+// Register list operands for interleaving load/stores
+def VecList2QAsmOperand : AsmOperandClass {
+ let Name = "VecListTwoMQ";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addMVEVecListOperands";
+ let DiagnosticString = "operand must be a list of two consecutive "#
+ "q-registers in range [q0,q7]";
+}
+
+def VecList2Q : RegisterOperand<QQPR, "printMVEVectorListTwoQ"> {
+ let ParserMatchClass = VecList2QAsmOperand;
+ let PrintMethod = "printMVEVectorList<2>";
+}
+
+def VecList4QAsmOperand : AsmOperandClass {
+ let Name = "VecListFourMQ";
+ let ParserMethod = "parseVectorList";
+ let RenderMethod = "addMVEVecListOperands";
+ let DiagnosticString = "operand must be a list of four consecutive "#
+ "q-registers in range [q0,q7]";
+}
+
+def VecList4Q : RegisterOperand<QQQQPR, "printMVEVectorListFourQ"> {
+ let ParserMatchClass = VecList4QAsmOperand;
+ let PrintMethod = "printMVEVectorList<4>";
+}
+
+// taddrmode_imm7 := reg[r0-r7] +/- (imm7 << shift)
+class TMemImm7ShiftOffsetAsmOperand<int shift> : AsmOperandClass {
+ let Name = "TMemImm7Shift"#shift#"Offset";
+ let PredicateMethod = "isMemImm7ShiftedOffset<"#shift#",ARM::tGPRRegClassID>";
+ let RenderMethod = "addMemImmOffsetOperands";
+}
+
+class taddrmode_imm7<int shift> : MemOperand {
+ let ParserMatchClass = TMemImm7ShiftOffsetAsmOperand<shift>;
+ // They are printed the same way as the T2 imm8 version
+ let PrintMethod = "printT2AddrModeImm8Operand<false>";
+ // This can also be the same as the T2 version.
+ let EncoderMethod = "getT2AddrModeImmOpValue<7,"#shift#">";
+ let DecoderMethod = "DecodeTAddrModeImm7<"#shift#">";
+ let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm);
+}
+
+// t2addrmode_imm7 := reg +/- (imm7)
+class MemImm7ShiftOffsetAsmOperand<int shift> : AsmOperandClass {
+ let Name = "MemImm7Shift"#shift#"Offset";
+ let PredicateMethod = "isMemImm7ShiftedOffset<" # shift #
+ ",ARM::GPRnopcRegClassID>";
+ let RenderMethod = "addMemImmOffsetOperands";
+}
+
+def MemImm7Shift0OffsetAsmOperand : MemImm7ShiftOffsetAsmOperand<0>;
+def MemImm7Shift1OffsetAsmOperand : MemImm7ShiftOffsetAsmOperand<1>;
+def MemImm7Shift2OffsetAsmOperand : MemImm7ShiftOffsetAsmOperand<2>;
+class T2AddrMode_Imm7<int shift> : MemOperand,
+ ComplexPattern<i32, 2, "SelectT2AddrModeImm7<"#shift#">", []> {
+ let EncoderMethod = "getT2AddrModeImmOpValue<7,"#shift#">";
+ let DecoderMethod = "DecodeT2AddrModeImm7<"#shift#", 0>";
+ let ParserMatchClass =
+ !cast<AsmOperandClass>("MemImm7Shift"#shift#"OffsetAsmOperand");
+ let MIOperandInfo = (ops GPRnopc:$base, i32imm:$offsimm);
+}
+
+class t2addrmode_imm7<int shift> : T2AddrMode_Imm7<shift> {
+ // They are printed the same way as the imm8 version
+ let PrintMethod = "printT2AddrModeImm8Operand<false>";
+}
+
+class MemImm7ShiftOffsetWBAsmOperand<int shift> : AsmOperandClass {
+ let Name = "MemImm7Shift"#shift#"OffsetWB";
+ let PredicateMethod = "isMemImm7ShiftedOffset<" # shift #
+ ",ARM::rGPRRegClassID>";
+ let RenderMethod = "addMemImmOffsetOperands";
+}
+
+def MemImm7Shift0OffsetWBAsmOperand : MemImm7ShiftOffsetWBAsmOperand<0>;
+def MemImm7Shift1OffsetWBAsmOperand : MemImm7ShiftOffsetWBAsmOperand<1>;
+def MemImm7Shift2OffsetWBAsmOperand : MemImm7ShiftOffsetWBAsmOperand<2>;
+
+class t2addrmode_imm7_pre<int shift> : T2AddrMode_Imm7<shift> {
+ // They are printed the same way as the imm8 version
+ let PrintMethod = "printT2AddrModeImm8Operand<true>";
+ let ParserMatchClass =
+ !cast<AsmOperandClass>("MemImm7Shift"#shift#"OffsetWBAsmOperand");
+ let DecoderMethod = "DecodeT2AddrModeImm7<"#shift#", 1>";
+ let MIOperandInfo = (ops rGPR:$base, i32imm:$offsim);
+}
+
+class t2am_imm7shiftOffsetAsmOperand<int shift>
+ : AsmOperandClass { let Name = "Imm7Shift"#shift; }
+def t2am_imm7shift0OffsetAsmOperand : t2am_imm7shiftOffsetAsmOperand<0>;
+def t2am_imm7shift1OffsetAsmOperand : t2am_imm7shiftOffsetAsmOperand<1>;
+def t2am_imm7shift2OffsetAsmOperand : t2am_imm7shiftOffsetAsmOperand<2>;
+
+class t2am_imm7_offset<int shift> : MemOperand {
+ // They are printed the same way as the imm8 version
+ let PrintMethod = "printT2AddrModeImm8OffsetOperand";
+ let ParserMatchClass =
+ !cast<AsmOperandClass>("t2am_imm7shift"#shift#"OffsetAsmOperand");
+ let EncoderMethod = "getT2ScaledImmOpValue<7,"#shift#">";
+ let DecoderMethod = "DecodeT2Imm7<"#shift#">";
+}
+
+// Operands for gather/scatter loads of the form [Rbase, Qoffsets]
+class MemRegRQOffsetAsmOperand<int shift> : AsmOperandClass {
+ let Name = "MemRegRQS"#shift#"Offset";
+ let PredicateMethod = "isMemRegRQOffset<"#shift#">";
+ let RenderMethod = "addMemRegRQOffsetOperands";
+}
+
+def MemRegRQS0OffsetAsmOperand : MemRegRQOffsetAsmOperand<0>;
+def MemRegRQS1OffsetAsmOperand : MemRegRQOffsetAsmOperand<1>;
+def MemRegRQS2OffsetAsmOperand : MemRegRQOffsetAsmOperand<2>;
+def MemRegRQS3OffsetAsmOperand : MemRegRQOffsetAsmOperand<3>;
+
+// mve_addr_rq_shift := reg + vreg{ << UXTW #shift}
+class mve_addr_rq_shift<int shift> : MemOperand {
+ let EncoderMethod = "getMveAddrModeRQOpValue";
+ let PrintMethod = "printMveAddrModeRQOperand<"#shift#">";
+ let ParserMatchClass =
+ !cast<AsmOperandClass>("MemRegRQS"#shift#"OffsetAsmOperand");
+ let DecoderMethod = "DecodeMveAddrModeRQ";
+ let MIOperandInfo = (ops GPRnopc:$base, MQPR:$offsreg);
+}
+
+class MemRegQOffsetAsmOperand<int shift> : AsmOperandClass {
+ let Name = "MemRegQS"#shift#"Offset";
+ let PredicateMethod = "isMemRegQOffset<"#shift#">";
+ let RenderMethod = "addMemImmOffsetOperands";
+}
+
+def MemRegQS2OffsetAsmOperand : MemRegQOffsetAsmOperand<2>;
+def MemRegQS3OffsetAsmOperand : MemRegQOffsetAsmOperand<3>;
+
+// mve_addr_q_shift := vreg {+ #imm7s2/4}
+class mve_addr_q_shift<int shift> : MemOperand {
+ let EncoderMethod = "getMveAddrModeQOpValue<"#shift#">";
+ // Can be printed same way as other reg + imm operands
+ let PrintMethod = "printT2AddrModeImm8Operand<false>";
+ let ParserMatchClass =
+ !cast<AsmOperandClass>("MemRegQS"#shift#"OffsetAsmOperand");
+ let DecoderMethod = "DecodeMveAddrModeQ<"#shift#">";
+ let MIOperandInfo = (ops MQPR:$base, i32imm:$imm);
+}
+
+// --------- Start of base classes for the instructions themselves
+
+class MVE_MI<dag oops, dag iops, InstrItinClass itin, string asm,
+ string ops, string cstr, list<dag> pattern>
+ : Thumb2XI<oops, iops, AddrModeNone, 4, itin, !strconcat(asm, "\t", ops), cstr,
+ pattern>,
+ Requires<[HasMVEInt]> {
+ let D = MVEDomain;
+ let DecoderNamespace = "MVE";
+}
+
+// MVE_p is used for most predicated instructions, to add the cluster
+// of input operands that provides the VPT suffix (none, T or E) and
+// the input predicate register.
+class MVE_p<dag oops, dag iops, InstrItinClass itin, string iname,
+ string suffix, string ops, vpred_ops vpred, string cstr,
+ list<dag> pattern=[]>
+ : MVE_MI<oops, !con(iops, (ins vpred:$vp)), itin,
+ // If the instruction has a suffix, like vadd.f32, then the
+ // VPT predication suffix goes before the dot, so the full
+ // name has to be "vadd${vp}.f32".
+ !strconcat(iname, "${vp}",
+ !if(!eq(suffix, ""), "", !strconcat(".", suffix))),
+ ops, !strconcat(cstr, vpred.vpred_constraint), pattern> {
+ let Inst{31-29} = 0b111;
+ let Inst{27-26} = 0b11;
+}
+
+class MVE_f<dag oops, dag iops, InstrItinClass itin, string iname,
+ string suffix, string ops, vpred_ops vpred, string cstr,
+ list<dag> pattern=[]>
+ : MVE_p<oops, iops, itin, iname, suffix, ops, vpred, cstr, pattern> {
+ let Predicates = [HasMVEFloat];
+}
+
+class MVE_MI_with_pred<dag oops, dag iops, InstrItinClass itin, string asm,
+ string ops, string cstr, list<dag> pattern>
+ : Thumb2I<oops, iops, AddrModeNone, 4, itin, asm, !strconcat("\t", ops), cstr,
+ pattern>,
+ Requires<[HasV8_1MMainline, HasMVEInt]> {
+ let D = MVEDomain;
+ let DecoderNamespace = "MVE";
+}
+
+class MVE_VMOV_lane_base<dag oops, dag iops, InstrItinClass itin, string asm,
+ string suffix, string ops, string cstr,
+ list<dag> pattern>
+ : Thumb2I<oops, iops, AddrModeNone, 4, itin, asm,
+ !if(!eq(suffix, ""), "", "." # suffix) # "\t" # ops,
+ cstr, pattern>,
+ Requires<[HasV8_1MMainline, HasMVEInt]> {
+ let D = MVEDomain;
+ let DecoderNamespace = "MVE";
+}
+
+class MVE_ScalarShift<string iname, dag oops, dag iops, string asm, string cstr,
+ list<dag> pattern=[]>
+ : MVE_MI_with_pred<oops, iops, NoItinerary, iname, asm, cstr, pattern> {
+ let Inst{31-20} = 0b111010100101;
+ let Inst{8} = 0b1;
+
+}
+
+class MVE_ScalarShiftSingleReg<string iname, dag iops, string asm, string cstr,
+ list<dag> pattern=[]>
+ : MVE_ScalarShift<iname, (outs rGPR:$RdaDest), iops, asm, cstr, pattern> {
+ bits<4> RdaDest;
+
+ let Inst{19-16} = RdaDest{3-0};
+}
+
+class MVE_ScalarShiftSRegImm<string iname, bits<2> op5_4, list<dag> pattern=[]>
+ : MVE_ScalarShiftSingleReg<iname, (ins rGPR:$RdaSrc, long_shift:$imm),
+ "$RdaSrc, $imm", "$RdaDest = $RdaSrc", pattern> {
+ bits<5> imm;
+
+ let Inst{15} = 0b0;
+ let Inst{14-12} = imm{4-2};
+ let Inst{11-8} = 0b1111;
+ let Inst{7-6} = imm{1-0};
+ let Inst{5-4} = op5_4{1-0};
+ let Inst{3-0} = 0b1111;
+}
+
+def MVE_SQSHL : MVE_ScalarShiftSRegImm<"sqshl", 0b11>;
+def MVE_SRSHR : MVE_ScalarShiftSRegImm<"srshr", 0b10>;
+def MVE_UQSHL : MVE_ScalarShiftSRegImm<"uqshl", 0b00>;
+def MVE_URSHR : MVE_ScalarShiftSRegImm<"urshr", 0b01>;
+
+class MVE_ScalarShiftSRegReg<string iname, bits<2> op5_4, list<dag> pattern=[]>
+ : MVE_ScalarShiftSingleReg<iname, (ins rGPR:$RdaSrc, rGPR:$Rm),
+ "$RdaSrc, $Rm", "$RdaDest = $RdaSrc", pattern> {
+ bits<4> Rm;
+
+ let Inst{15-12} = Rm{3-0};
+ let Inst{11-8} = 0b1111;
+ let Inst{7-6} = 0b00;
+ let Inst{5-4} = op5_4{1-0};
+ let Inst{3-0} = 0b1101;
+}
+
+def MVE_SQRSHR : MVE_ScalarShiftSRegReg<"sqrshr", 0b10>;
+def MVE_UQRSHL : MVE_ScalarShiftSRegReg<"uqrshl", 0b00>;
+
+class MVE_ScalarShiftDoubleReg<string iname, dag iops, string asm,
+ string cstr, list<dag> pattern=[]>
+ : MVE_ScalarShift<iname, (outs tGPREven:$RdaLo, tGPROdd:$RdaHi),
+ iops, asm, cstr, pattern> {
+ bits<4> RdaLo;
+ bits<4> RdaHi;
+
+ let Inst{19-17} = RdaLo{3-1};
+ let Inst{11-9} = RdaHi{3-1};
+}
+
+class MVE_ScalarShiftDRegImm<string iname, bits<2> op5_4, bit op16,
+ list<dag> pattern=[]>
+ : MVE_ScalarShiftDoubleReg<
+ iname, (ins tGPREven:$RdaLo_src, tGPROdd:$RdaHi_src, long_shift:$imm),
+ "$RdaLo, $RdaHi, $imm", "$RdaLo = $RdaLo_src,$RdaHi = $RdaHi_src",
+ pattern> {
+ bits<5> imm;
+
+ let Inst{16} = op16;
+ let Inst{15} = 0b0;
+ let Inst{14-12} = imm{4-2};
+ let Inst{7-6} = imm{1-0};
+ let Inst{5-4} = op5_4{1-0};
+ let Inst{3-0} = 0b1111;
+}
+
+class MVE_ScalarShiftDRegReg<string iname, bit op5, bit op16,
+ list<dag> pattern=[]>
+ : MVE_ScalarShiftDoubleReg<
+ iname, (ins tGPREven:$RdaLo_src, tGPROdd:$RdaHi_src, rGPR:$Rm),
+ "$RdaLo, $RdaHi, $Rm", "@earlyclobber $RdaHi,@earlyclobber $RdaLo,"
+ "$RdaLo = $RdaLo_src,$RdaHi = $RdaHi_src",
+ pattern> {
+ bits<4> Rm;
+
+ let Inst{16} = op16;
+ let Inst{15-12} = Rm{3-0};
+ let Inst{7-6} = 0b00;
+ let Inst{5} = op5;
+ let Inst{4} = 0b0;
+ let Inst{3-0} = 0b1101;
+
+ // Custom decoder method because of the following overlapping encodings:
+ // ASRL and SQRSHR
+ // LSLL and UQRSHL
+ // SQRSHRL and SQRSHR
+ // UQRSHLL and UQRSHL
+ let DecoderMethod = "DecodeMVEOverlappingLongShift";
+}
+
+def MVE_ASRLr : MVE_ScalarShiftDRegReg<"asrl", 0b1, 0b0, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi,
+ (ARMasrl tGPREven:$RdaLo_src,
+ tGPROdd:$RdaHi_src, rGPR:$Rm))]>;
+def MVE_ASRLi : MVE_ScalarShiftDRegImm<"asrl", 0b10, ?, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi,
+ (ARMasrl tGPREven:$RdaLo_src,
+ tGPROdd:$RdaHi_src, (i32 imm:$imm)))]>;
+def MVE_LSLLr : MVE_ScalarShiftDRegReg<"lsll", 0b0, 0b0, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi,
+ (ARMlsll tGPREven:$RdaLo_src,
+ tGPROdd:$RdaHi_src, rGPR:$Rm))]>;
+def MVE_LSLLi : MVE_ScalarShiftDRegImm<"lsll", 0b00, ?, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi,
+ (ARMlsll tGPREven:$RdaLo_src,
+ tGPROdd:$RdaHi_src, (i32 imm:$imm)))]>;
+def MVE_LSRL : MVE_ScalarShiftDRegImm<"lsrl", 0b01, ?, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi,
+ (ARMlsrl tGPREven:$RdaLo_src,
+ tGPROdd:$RdaHi_src, (i32 imm:$imm)))]>;
+
+def MVE_SQRSHRL : MVE_ScalarShiftDRegReg<"sqrshrl", 0b1, 0b1>;
+def MVE_SQSHLL : MVE_ScalarShiftDRegImm<"sqshll", 0b11, 0b1>;
+def MVE_SRSHRL : MVE_ScalarShiftDRegImm<"srshrl", 0b10, 0b1>;
+
+def MVE_UQRSHLL : MVE_ScalarShiftDRegReg<"uqrshll", 0b0, 0b1>;
+def MVE_UQSHLL : MVE_ScalarShiftDRegImm<"uqshll", 0b00, 0b1>;
+def MVE_URSHRL : MVE_ScalarShiftDRegImm<"urshrl", 0b01, 0b1>;
+
+// start of mve_rDest instructions
+
+class MVE_rDest<dag oops, dag iops, InstrItinClass itin,
+ string iname, string suffix,
+ string ops, string cstr, list<dag> pattern=[]>
+// Always use vpred_n and not vpred_r: with the output register being
+// a GPR and not a vector register, there can't be any question of
+// what to put in its inactive lanes.
+ : MVE_p<oops, iops, itin, iname, suffix, ops, vpred_n, cstr, pattern> {
+
+ let Inst{25-23} = 0b101;
+ let Inst{11-9} = 0b111;
+ let Inst{4} = 0b0;
+}
+
+class MVE_VABAV<string suffix, bit U, bits<2> size, list<dag> pattern=[]>
+ : MVE_rDest<(outs rGPR:$Rda), (ins rGPR:$Rda_src, MQPR:$Qn, MQPR:$Qm),
+ NoItinerary, "vabav", suffix, "$Rda, $Qn, $Qm", "$Rda = $Rda_src",
+ pattern> {
+ bits<4> Qm;
+ bits<4> Qn;
+ bits<4> Rda;
+
+ let Inst{28} = U;
+ let Inst{22} = 0b0;
+ let Inst{21-20} = size{1-0};
+ let Inst{19-17} = Qn{2-0};
+ let Inst{16} = 0b0;
+ let Inst{15-12} = Rda{3-0};
+ let Inst{8} = 0b1;
+ let Inst{7} = Qn{3};
+ let Inst{6} = 0b0;
+ let Inst{5} = Qm{3};
+ let Inst{3-1} = Qm{2-0};
+ let Inst{0} = 0b1;
+}
+
+def MVE_VABAVs8 : MVE_VABAV<"s8", 0b0, 0b00>;
+def MVE_VABAVs16 : MVE_VABAV<"s16", 0b0, 0b01>;
+def MVE_VABAVs32 : MVE_VABAV<"s32", 0b0, 0b10>;
+def MVE_VABAVu8 : MVE_VABAV<"u8", 0b1, 0b00>;
+def MVE_VABAVu16 : MVE_VABAV<"u16", 0b1, 0b01>;
+def MVE_VABAVu32 : MVE_VABAV<"u32", 0b1, 0b10>;
+
+class MVE_VADDV<string iname, string suffix, dag iops, string cstr,
+ bit A, bit U, bits<2> size, list<dag> pattern=[]>
+ : MVE_rDest<(outs tGPREven:$Rda), iops, NoItinerary,
+ iname, suffix, "$Rda, $Qm", cstr, pattern> {
+ bits<3> Qm;
+ bits<4> Rda;
+
+ let Inst{28} = U;
+ let Inst{22-20} = 0b111;
+ let Inst{19-18} = size{1-0};
+ let Inst{17-16} = 0b01;
+ let Inst{15-13} = Rda{3-1};
+ let Inst{12} = 0b0;
+ let Inst{8-6} = 0b100;
+ let Inst{5} = A;
+ let Inst{3-1} = Qm{2-0};
+ let Inst{0} = 0b0;
+}
+
+multiclass MVE_VADDV_A<string suffix, bit U, bits<2> size,
+ list<dag> pattern=[]> {
+ def acc : MVE_VADDV<"vaddva", suffix,
+ (ins tGPREven:$Rda_src, MQPR:$Qm), "$Rda = $Rda_src",
+ 0b1, U, size, pattern>;
+ def no_acc : MVE_VADDV<"vaddv", suffix,
+ (ins MQPR:$Qm), "",
+ 0b0, U, size, pattern>;
+}
+
+defm MVE_VADDVs8 : MVE_VADDV_A<"s8", 0b0, 0b00>;
+defm MVE_VADDVs16 : MVE_VADDV_A<"s16", 0b0, 0b01>;
+defm MVE_VADDVs32 : MVE_VADDV_A<"s32", 0b0, 0b10>;
+defm MVE_VADDVu8 : MVE_VADDV_A<"u8", 0b1, 0b00>;
+defm MVE_VADDVu16 : MVE_VADDV_A<"u16", 0b1, 0b01>;
+defm MVE_VADDVu32 : MVE_VADDV_A<"u32", 0b1, 0b10>;
+
+class MVE_VADDLV<string iname, string suffix, dag iops, string cstr,
+ bit A, bit U, list<dag> pattern=[]>
+ : MVE_rDest<(outs tGPREven:$RdaLo, tGPROdd:$RdaHi), iops, NoItinerary, iname,
+ suffix, "$RdaLo, $RdaHi, $Qm", cstr, pattern> {
+ bits<3> Qm;
+ bits<4> RdaLo;
+ bits<4> RdaHi;
+
+ let Inst{28} = U;
+ let Inst{22-20} = RdaHi{3-1};
+ let Inst{19-18} = 0b10;
+ let Inst{17-16} = 0b01;
+ let Inst{15-13} = RdaLo{3-1};
+ let Inst{12} = 0b0;
+ let Inst{8-6} = 0b100;
+ let Inst{5} = A;
+ let Inst{3-1} = Qm{2-0};
+ let Inst{0} = 0b0;
+}
+
+multiclass MVE_VADDLV_A<string suffix, bit U, list<dag> pattern=[]> {
+ def acc : MVE_VADDLV<"vaddlva", suffix,
+ (ins tGPREven:$RdaLo_src, tGPROdd:$RdaHi_src, MQPR:$Qm),
+ "$RdaLo = $RdaLo_src,$RdaHi = $RdaHi_src",
+ 0b1, U, pattern>;
+ def no_acc : MVE_VADDLV<"vaddlv", suffix,
+ (ins MQPR:$Qm), "",
+ 0b0, U, pattern>;
+}
+
+
+defm MVE_VADDLVs32 : MVE_VADDLV_A<"s32", 0b0>;
+defm MVE_VADDLVu32 : MVE_VADDLV_A<"u32", 0b1>;
+
+class MVE_VMINMAXNMV<string iname, string suffix, bit sz,
+ bit bit_17, bit bit_7, list<dag> pattern=[]>
+ : MVE_rDest<(outs rGPR:$RdaDest), (ins rGPR:$RdaSrc, MQPR:$Qm),
+ NoItinerary, iname, suffix, "$RdaSrc, $Qm",
+ "$RdaDest = $RdaSrc", pattern> {
+ bits<3> Qm;
+ bits<4> RdaDest;
+
+ let Inst{28} = sz;
+ let Inst{22-20} = 0b110;
+ let Inst{19-18} = 0b11;
+ let Inst{17} = bit_17;
+ let Inst{16} = 0b0;
+ let Inst{15-12} = RdaDest{3-0};
+ let Inst{8} = 0b1;
+ let Inst{7} = bit_7;
+ let Inst{6-5} = 0b00;
+ let Inst{3-1} = Qm{2-0};
+ let Inst{0} = 0b0;
+
+ let Predicates = [HasMVEFloat];
+}
+
+multiclass MVE_VMINMAXNMV_fty<string iname, bit bit_7, list<dag> pattern=[]> {
+ def f32 : MVE_VMINMAXNMV<iname, "f32", 0b0, 0b1, bit_7, pattern>;
+ def f16 : MVE_VMINMAXNMV<iname, "f16", 0b1, 0b1, bit_7, pattern>;
+}
+
+defm MVE_VMINNMV : MVE_VMINMAXNMV_fty<"vminnmv", 0b1>;
+defm MVE_VMAXNMV : MVE_VMINMAXNMV_fty<"vmaxnmv", 0b0>;
+
+multiclass MVE_VMINMAXNMAV_fty<string iname, bit bit_7, list<dag> pattern=[]> {
+ def f32 : MVE_VMINMAXNMV<iname, "f32", 0b0, 0b0, bit_7, pattern>;
+ def f16 : MVE_VMINMAXNMV<iname, "f16", 0b1, 0b0, bit_7, pattern>;
+}
+
+defm MVE_VMINNMAV : MVE_VMINMAXNMAV_fty<"vminnmav", 0b1>;
+defm MVE_VMAXNMAV : MVE_VMINMAXNMAV_fty<"vmaxnmav", 0b0>;
+
+class MVE_VMINMAXV<string iname, string suffix, bit U, bits<2> size,
+ bit bit_17, bit bit_7, list<dag> pattern=[]>
+ : MVE_rDest<(outs rGPR:$RdaDest), (ins rGPR:$RdaSrc, MQPR:$Qm), NoItinerary,
+ iname, suffix, "$RdaSrc, $Qm", "$RdaDest = $RdaSrc", pattern> {
+ bits<3> Qm;
+ bits<4> RdaDest;
+
+ let Inst{28} = U;
+ let Inst{22-20} = 0b110;
+ let Inst{19-18} = size{1-0};
+ let Inst{17} = bit_17;
+ let Inst{16} = 0b0;
+ let Inst{15-12} = RdaDest{3-0};
+ let Inst{8} = 0b1;
+ let Inst{7} = bit_7;
+ let Inst{6-5} = 0b00;
+ let Inst{3-1} = Qm{2-0};
+ let Inst{0} = 0b0;
+}
+
+multiclass MVE_VMINMAXV_ty<string iname, bit bit_7, list<dag> pattern=[]> {
+ def s8 : MVE_VMINMAXV<iname, "s8", 0b0, 0b00, 0b1, bit_7>;
+ def s16 : MVE_VMINMAXV<iname, "s16", 0b0, 0b01, 0b1, bit_7>;
+ def s32 : MVE_VMINMAXV<iname, "s32", 0b0, 0b10, 0b1, bit_7>;
+ def u8 : MVE_VMINMAXV<iname, "u8", 0b1, 0b00, 0b1, bit_7>;
+ def u16 : MVE_VMINMAXV<iname, "u16", 0b1, 0b01, 0b1, bit_7>;
+ def u32 : MVE_VMINMAXV<iname, "u32", 0b1, 0b10, 0b1, bit_7>;
+}
+
+defm MVE_VMINV : MVE_VMINMAXV_ty<"vminv", 0b1>;
+defm MVE_VMAXV : MVE_VMINMAXV_ty<"vmaxv", 0b0>;
+
+multiclass MVE_VMINMAXAV_ty<string iname, bit bit_7, list<dag> pattern=[]> {
+ def s8 : MVE_VMINMAXV<iname, "s8", 0b0, 0b00, 0b0, bit_7>;
+ def s16 : MVE_VMINMAXV<iname, "s16", 0b0, 0b01, 0b0, bit_7>;
+ def s32 : MVE_VMINMAXV<iname, "s32", 0b0, 0b10, 0b0, bit_7>;
+}
+
+defm MVE_VMINAV : MVE_VMINMAXAV_ty<"vminav", 0b1>;
+defm MVE_VMAXAV : MVE_VMINMAXAV_ty<"vmaxav", 0b0>;
+
+class MVE_VMLAMLSDAV<string iname, string suffix, dag iops, string cstr,
+ bit sz, bit bit_28, bit A, bit X, bit bit_8, bit bit_0,
+ list<dag> pattern=[]>
+ : MVE_rDest<(outs tGPREven:$RdaDest), iops, NoItinerary, iname, suffix,
+ "$RdaDest, $Qn, $Qm", cstr, pattern> {
+ bits<4> RdaDest;
+ bits<3> Qm;
+ bits<3> Qn;
+
+ let Inst{28} = bit_28;
+ let Inst{22-20} = 0b111;
+ let Inst{19-17} = Qn{2-0};
+ let Inst{16} = sz;
+ let Inst{15-13} = RdaDest{3-1};
+ let Inst{12} = X;
+ let Inst{8} = bit_8;
+ let Inst{7-6} = 0b00;
+ let Inst{5} = A;
+ let Inst{3-1} = Qm{2-0};
+ let Inst{0} = bit_0;
+}
+
+multiclass MVE_VMLAMLSDAV_X<string iname, string suffix, dag iops, string cstr,
+ bit sz, bit bit_28, bit A, bit bit_8, bit bit_0,
+ list<dag> pattern=[]> {
+ def _noexch : MVE_VMLAMLSDAV<iname, suffix, iops, cstr, sz,
+ bit_28, A, 0b0, bit_8, bit_0, pattern>;
+ def _exch : MVE_VMLAMLSDAV<iname # "x", suffix, iops, cstr, sz,
+ bit_28, A, 0b1, bit_8, bit_0, pattern>;
+}
+
+multiclass MVE_VMLAMLSDAV_XA<string iname, string suffix, bit sz, bit bit_28,
+ bit bit_8, bit bit_0, list<dag> pattern=[]> {
+ defm _noacc : MVE_VMLAMLSDAV_X<iname, suffix, (ins MQPR:$Qn, MQPR:$Qm), "",
+ sz, bit_28, 0b0, bit_8, bit_0, pattern>;
+ defm _acc : MVE_VMLAMLSDAV_X<iname # "a", suffix,
+ (ins tGPREven:$RdaSrc, MQPR:$Qn, MQPR:$Qm),
+ "$RdaDest = $RdaSrc",
+ sz, bit_28, 0b1, bit_8, bit_0, pattern>;
+}
+
+multiclass MVE_VMLADAV_multi<string suffix, bit sz, bit U, bit bit_8,
+ list<dag> pattern=[]> {
+ defm "" : MVE_VMLAMLSDAV_XA<"vmladav", suffix, sz, U, bit_8, 0b0, pattern>;
+}
+
+defm MVE_VMLADAVs16 : MVE_VMLADAV_multi<"s16", 0b0, 0b0, 0b0>;
+defm MVE_VMLADAVs32 : MVE_VMLADAV_multi<"s32", 0b1, 0b0, 0b0>;
+defm MVE_VMLADAVu16 : MVE_VMLADAV_multi<"u16", 0b0, 0b1, 0b0>;
+defm MVE_VMLADAVu32 : MVE_VMLADAV_multi<"u32", 0b1, 0b1, 0b0>;
+
+defm MVE_VMLADAVs8 : MVE_VMLADAV_multi<"s8", 0b0, 0b0, 0b1>;
+defm MVE_VMLADAVu8 : MVE_VMLADAV_multi<"u8", 0b0, 0b1, 0b1>;
+
+// vmlav aliases vmladav
+foreach acc = ["_acc", "_noacc"] in {
+ foreach suffix = ["s8", "s16", "s32", "u8", "u16", "u32"] in {
+ def : MVEInstAlias<!strconcat("vmlav", !if(!eq(acc, "_acc"), "a", ""),
+ "${vp}.", suffix, "\t$RdaDest, $Qn, $Qm"),
+ (!cast<Instruction>("MVE_VMLADAV"#suffix#acc#"_noexch")
+ tGPREven:$RdaDest, MQPR:$Qn, MQPR:$Qm, vpred_n:$vp)>;
+ }
+}
+
+multiclass MVE_VMLSDAV_multi<string suffix, bit sz, bit bit_28,
+ list<dag> pattern=[]> {
+ defm "" : MVE_VMLAMLSDAV_XA<"vmlsdav", suffix, sz, bit_28, 0b0, 0b1, pattern>;
+}
+
+defm MVE_VMLSDAVs8 : MVE_VMLSDAV_multi<"s8", 0, 0b1>;
+defm MVE_VMLSDAVs16 : MVE_VMLSDAV_multi<"s16", 0, 0b0>;
+defm MVE_VMLSDAVs32 : MVE_VMLSDAV_multi<"s32", 1, 0b0>;
+
+// Base class for VMLALDAV and VMLSLDAV, VRMLALDAVH, VRMLSLDAVH
+class MVE_VMLALDAVBase<string iname, string suffix, dag iops, string cstr,
+ bit sz, bit bit_28, bit A, bit X, bit bit_8, bit bit_0,
+ list<dag> pattern=[]>
+ : MVE_rDest<(outs tGPREven:$RdaLoDest, tGPROdd:$RdaHiDest), iops, NoItinerary,
+ iname, suffix, "$RdaLoDest, $RdaHiDest, $Qn, $Qm", cstr, pattern> {
+ bits<4> RdaLoDest;
+ bits<4> RdaHiDest;
+ bits<3> Qm;
+ bits<3> Qn;
+
+ let Inst{28} = bit_28;
+ let Inst{22-20} = RdaHiDest{3-1};
+ let Inst{19-17} = Qn{2-0};
+ let Inst{16} = sz;
+ let Inst{15-13} = RdaLoDest{3-1};
+ let Inst{12} = X;
+ let Inst{8} = bit_8;
+ let Inst{7-6} = 0b00;
+ let Inst{5} = A;
+ let Inst{3-1} = Qm{2-0};
+ let Inst{0} = bit_0;
+}
+
+multiclass MVE_VMLALDAVBase_X<string iname, string suffix, dag iops,
+ string cstr, bit sz, bit bit_28, bit A,
+ bit bit_8, bit bit_0, list<dag> pattern=[]> {
+ def _noexch : MVE_VMLALDAVBase<iname, suffix, iops, cstr, sz,
+ bit_28, A, 0b0, bit_8, bit_0, pattern>;
+ def _exch : MVE_VMLALDAVBase<iname # "x", suffix, iops, cstr, sz,
+ bit_28, A, 0b1, bit_8, bit_0, pattern>;
+}
+
+multiclass MVE_VMLALDAVBase_XA<string iname, string suffix, bit sz, bit bit_28,
+ bit bit_8, bit bit_0, list<dag> pattern=[]> {
+ defm _noacc : MVE_VMLALDAVBase_X<
+ iname, suffix, (ins MQPR:$Qn, MQPR:$Qm), "",
+ sz, bit_28, 0b0, bit_8, bit_0, pattern>;
+ defm _acc : MVE_VMLALDAVBase_X<
+ iname # "a", suffix, (ins tGPREven:$RdaLoSrc, tGPROdd:$RdaHiSrc,
+ MQPR:$Qn, MQPR:$Qm),
+ "$RdaLoDest = $RdaLoSrc,$RdaHiDest = $RdaHiSrc",
+ sz, bit_28, 0b1, bit_8, bit_0, pattern>;
+}
+
+multiclass MVE_VRMLALDAVH_multi<string suffix, bit U, list<dag> pattern=[]> {
+ defm "" : MVE_VMLALDAVBase_XA<
+ "vrmlaldavh", suffix, 0b0, U, 0b1, 0b0, pattern>;
+}
+
+defm MVE_VRMLALDAVHs32 : MVE_VRMLALDAVH_multi<"s32", 0>;
+defm MVE_VRMLALDAVHu32 : MVE_VRMLALDAVH_multi<"u32", 1>;
+
+// vrmlalvh aliases for vrmlaldavh
+def : MVEInstAlias<"vrmlalvh${vp}.s32\t$RdaLo, $RdaHi, $Qn, $Qm",
+ (MVE_VRMLALDAVHs32_noacc_noexch
+ tGPREven:$RdaLo, tGPROdd:$RdaHi,
+ MQPR:$Qn, MQPR:$Qm, vpred_n:$vp)>;
+def : MVEInstAlias<"vrmlalvha${vp}.s32\t$RdaLo, $RdaHi, $Qn, $Qm",
+ (MVE_VRMLALDAVHs32_acc_noexch
+ tGPREven:$RdaLo, tGPROdd:$RdaHi,
+ MQPR:$Qn, MQPR:$Qm, vpred_n:$vp)>;
+def : MVEInstAlias<"vrmlalvh${vp}.u32\t$RdaLo, $RdaHi, $Qn, $Qm",
+ (MVE_VRMLALDAVHu32_noacc_noexch
+ tGPREven:$RdaLo, tGPROdd:$RdaHi,
+ MQPR:$Qn, MQPR:$Qm, vpred_n:$vp)>;
+def : MVEInstAlias<"vrmlalvha${vp}.u32\t$RdaLo, $RdaHi, $Qn, $Qm",
+ (MVE_VRMLALDAVHu32_acc_noexch
+ tGPREven:$RdaLo, tGPROdd:$RdaHi,
+ MQPR:$Qn, MQPR:$Qm, vpred_n:$vp)>;
+
+multiclass MVE_VMLALDAV_multi<string suffix, bit sz, bit U,
+ list<dag> pattern=[]> {
+ defm "" : MVE_VMLALDAVBase_XA<"vmlaldav", suffix, sz, U, 0b0, 0b0, pattern>;
+}
+
+defm MVE_VMLALDAVs16 : MVE_VMLALDAV_multi<"s16", 0b0, 0b0>;
+defm MVE_VMLALDAVs32 : MVE_VMLALDAV_multi<"s32", 0b1, 0b0>;
+defm MVE_VMLALDAVu16 : MVE_VMLALDAV_multi<"u16", 0b0, 0b1>;
+defm MVE_VMLALDAVu32 : MVE_VMLALDAV_multi<"u32", 0b1, 0b1>;
+
+// vmlalv aliases vmlaldav
+foreach acc = ["_acc", "_noacc"] in {
+ foreach suffix = ["s16", "s32", "u16", "u32"] in {
+ def : MVEInstAlias<!strconcat("vmlalv", !if(!eq(acc, "_acc"), "a", ""),
+ "${vp}.", suffix, "\t$RdaLoDest, $RdaHiDest, $Qn, $Qm"),
+ (!cast<Instruction>("MVE_VMLALDAV"#suffix#acc#"_noexch")
+ tGPREven:$RdaLoDest, tGPROdd:$RdaHiDest,
+ MQPR:$Qn, MQPR:$Qm, vpred_n:$vp)>;
+ }
+}
+
+multiclass MVE_VMLSLDAV_multi<string iname, string suffix, bit sz,
+ bit bit_28, list<dag> pattern=[]> {
+ defm "" : MVE_VMLALDAVBase_XA<iname, suffix, sz, bit_28, 0b0, 0b1, pattern>;
+}
+
+defm MVE_VMLSLDAVs16 : MVE_VMLSLDAV_multi<"vmlsldav", "s16", 0b0, 0b0>;
+defm MVE_VMLSLDAVs32 : MVE_VMLSLDAV_multi<"vmlsldav", "s32", 0b1, 0b0>;
+defm MVE_VRMLSLDAVHs32 : MVE_VMLSLDAV_multi<"vrmlsldavh", "s32", 0b0, 0b1>;
+
+// end of mve_rDest instructions
+
+// start of mve_comp instructions
+
+class MVE_comp<InstrItinClass itin, string iname, string suffix,
+ string cstr, list<dag> pattern=[]>
+ : MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm), itin, iname, suffix,
+ "$Qd, $Qn, $Qm", vpred_r, cstr, pattern> {
+ bits<4> Qd;
+ bits<4> Qn;
+ bits<4> Qm;
+
+ let Inst{22} = Qd{3};
+ let Inst{19-17} = Qn{2-0};
+ let Inst{16} = 0b0;
+ let Inst{15-13} = Qd{2-0};
+ let Inst{12} = 0b0;
+ let Inst{10-9} = 0b11;
+ let Inst{7} = Qn{3};
+ let Inst{5} = Qm{3};
+ let Inst{3-1} = Qm{2-0};
+ let Inst{0} = 0b0;
+}
+
+class MVE_VMINMAXNM<string iname, string suffix, bit sz, bit bit_21,
+ list<dag> pattern=[]>
+ : MVE_comp<NoItinerary, iname, suffix, "", pattern> {
+
+ let Inst{28} = 0b1;
+ let Inst{25-24} = 0b11;
+ let Inst{23} = 0b0;
+ let Inst{21} = bit_21;
+ let Inst{20} = sz;
+ let Inst{11} = 0b1;
+ let Inst{8} = 0b1;
+ let Inst{6} = 0b1;
+ let Inst{4} = 0b1;
+
+ let Predicates = [HasMVEFloat];
+}
+
+def MVE_VMAXNMf32 : MVE_VMINMAXNM<"vmaxnm", "f32", 0b0, 0b0>;
+def MVE_VMAXNMf16 : MVE_VMINMAXNM<"vmaxnm", "f16", 0b1, 0b0>;
+
+let Predicates = [HasMVEFloat] in {
+ def : Pat<(v4f32 (fmaxnum (v4f32 MQPR:$val1), (v4f32 MQPR:$val2))),
+ (v4f32 (MVE_VMAXNMf32 (v4f32 MQPR:$val1), (v4f32 MQPR:$val2)))>;
+ def : Pat<(v8f16 (fmaxnum (v8f16 MQPR:$val1), (v8f16 MQPR:$val2))),
+ (v8f16 (MVE_VMAXNMf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2)))>;
+}
+
+def MVE_VMINNMf32 : MVE_VMINMAXNM<"vminnm", "f32", 0b0, 0b1>;
+def MVE_VMINNMf16 : MVE_VMINMAXNM<"vminnm", "f16", 0b1, 0b1>;
+
+let Predicates = [HasMVEFloat] in {
+ def : Pat<(v4f32 (fminnum (v4f32 MQPR:$val1), (v4f32 MQPR:$val2))),
+ (v4f32 (MVE_VMINNMf32 (v4f32 MQPR:$val1), (v4f32 MQPR:$val2)))>;
+ def : Pat<(v8f16 (fminnum (v8f16 MQPR:$val1), (v8f16 MQPR:$val2))),
+ (v8f16 (MVE_VMINNMf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2)))>;
+}
+
+
+class MVE_VMINMAX<string iname, string suffix, bit U, bits<2> size,
+ bit bit_4, list<dag> pattern=[]>
+ : MVE_comp<NoItinerary, iname, suffix, "", pattern> {
+
+ let Inst{28} = U;
+ let Inst{25-24} = 0b11;
+ let Inst{23} = 0b0;
+ let Inst{21-20} = size{1-0};
+ let Inst{11} = 0b0;
+ let Inst{8} = 0b0;
+ let Inst{6} = 0b1;
+ let Inst{4} = bit_4;
+}
+
+multiclass MVE_VMINMAX_all_sizes<string iname, bit bit_4> {
+ def s8 : MVE_VMINMAX<iname, "s8", 0b0, 0b00, bit_4>;
+ def s16 : MVE_VMINMAX<iname, "s16", 0b0, 0b01, bit_4>;
+ def s32 : MVE_VMINMAX<iname, "s32", 0b0, 0b10, bit_4>;
+ def u8 : MVE_VMINMAX<iname, "u8", 0b1, 0b00, bit_4>;
+ def u16 : MVE_VMINMAX<iname, "u16", 0b1, 0b01, bit_4>;
+ def u32 : MVE_VMINMAX<iname, "u32", 0b1, 0b10, bit_4>;
+}
+
+defm MVE_VMAX : MVE_VMINMAX_all_sizes<"vmax", 0b0>;
+defm MVE_VMIN : MVE_VMINMAX_all_sizes<"vmin", 0b1>;
+
+let Predicates = [HasMVEInt] in {
+ def : Pat<(v16i8 (smin (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
+ (v16i8 (MVE_VMINs8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
+ def : Pat<(v8i16 (smin (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
+ (v8i16 (MVE_VMINs16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
+ def : Pat<(v4i32 (smin (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
+ (v4i32 (MVE_VMINs32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
+
+ def : Pat<(v16i8 (smax (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
+ (v16i8 (MVE_VMAXs8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
+ def : Pat<(v8i16 (smax (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
+ (v8i16 (MVE_VMAXs16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
+ def : Pat<(v4i32 (smax (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
+ (v4i32 (MVE_VMAXs32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
+
+ def : Pat<(v16i8 (umin (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
+ (v16i8 (MVE_VMINu8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
+ def : Pat<(v8i16 (umin (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
+ (v8i16 (MVE_VMINu16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
+ def : Pat<(v4i32 (umin (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
+ (v4i32 (MVE_VMINu32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
+
+ def : Pat<(v16i8 (umax (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
+ (v16i8 (MVE_VMAXu8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
+ def : Pat<(v8i16 (umax (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
+ (v8i16 (MVE_VMAXu16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
+ def : Pat<(v4i32 (umax (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
+ (v4i32 (MVE_VMAXu32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
+}
+
+// end of mve_comp instructions
+
+// start of mve_bit instructions
+
+class MVE_bit_arith<dag oops, dag iops, string iname, string suffix,
+ string ops, string cstr, list<dag> pattern=[]>
+ : MVE_p<oops, iops, NoItinerary, iname, suffix, ops, vpred_r, cstr, pattern> {
+ bits<4> Qd;
+ bits<4> Qm;
+
+ let Inst{22} = Qd{3};
+ let Inst{15-13} = Qd{2-0};
+ let Inst{5} = Qm{3};
+ let Inst{3-1} = Qm{2-0};
+}
+
+def MVE_VBIC : MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm),
+ "vbic", "", "$Qd, $Qn, $Qm", ""> {
+ bits<4> Qn;
+
+ let Inst{28} = 0b0;
+ let Inst{25-23} = 0b110;
+ let Inst{21-20} = 0b01;
+ let Inst{19-17} = Qn{2-0};
+ let Inst{16} = 0b0;
+ let Inst{12-8} = 0b00001;
+ let Inst{7} = Qn{3};
+ let Inst{6} = 0b1;
+ let Inst{4} = 0b1;
+ let Inst{0} = 0b0;
+}
+
+class MVE_VREV<string iname, string suffix, bits<2> size, bits<2> bit_8_7>
+ : MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qm), iname,
+ suffix, "$Qd, $Qm", ""> {
+
+ let Inst{28} = 0b1;
+ let Inst{25-23} = 0b111;
+ let Inst{21-20} = 0b11;
+ let Inst{19-18} = size;
+ let Inst{17-16} = 0b00;
+ let Inst{12-9} = 0b0000;
+ let Inst{8-7} = bit_8_7;
+ let Inst{6} = 0b1;
+ let Inst{4} = 0b0;
+ let Inst{0} = 0b0;
+}
+
+def MVE_VREV64_8 : MVE_VREV<"vrev64", "8", 0b00, 0b00>;
+def MVE_VREV64_16 : MVE_VREV<"vrev64", "16", 0b01, 0b00>;
+def MVE_VREV64_32 : MVE_VREV<"vrev64", "32", 0b10, 0b00>;
+
+def MVE_VREV32_8 : MVE_VREV<"vrev32", "8", 0b00, 0b01>;
+def MVE_VREV32_16 : MVE_VREV<"vrev32", "16", 0b01, 0b01>;
+
+def MVE_VREV16_8 : MVE_VREV<"vrev16", "8", 0b00, 0b10>;
+
+let Predicates = [HasMVEInt] in {
+ def : Pat<(v4i32 (ARMvrev64 (v4i32 MQPR:$src))),
+ (v4i32 (MVE_VREV64_32 (v4i32 MQPR:$src)))>;
+ def : Pat<(v8i16 (ARMvrev64 (v8i16 MQPR:$src))),
+ (v8i16 (MVE_VREV64_16 (v8i16 MQPR:$src)))>;
+ def : Pat<(v16i8 (ARMvrev64 (v16i8 MQPR:$src))),
+ (v16i8 (MVE_VREV64_8 (v16i8 MQPR:$src)))>;
+
+ def : Pat<(v8i16 (ARMvrev32 (v8i16 MQPR:$src))),
+ (v8i16 (MVE_VREV32_16 (v8i16 MQPR:$src)))>;
+ def : Pat<(v16i8 (ARMvrev32 (v16i8 MQPR:$src))),
+ (v16i8 (MVE_VREV32_8 (v16i8 MQPR:$src)))>;
+
+ def : Pat<(v16i8 (ARMvrev16 (v16i8 MQPR:$src))),
+ (v16i8 (MVE_VREV16_8 (v16i8 MQPR:$src)))>;
+
+ def : Pat<(v4f32 (ARMvrev64 (v4f32 MQPR:$src))),
+ (v4f32 (MVE_VREV64_32 (v4f32 MQPR:$src)))>;
+ def : Pat<(v8f16 (ARMvrev64 (v8f16 MQPR:$src))),
+ (v8f16 (MVE_VREV64_16 (v8f16 MQPR:$src)))>;
+ def : Pat<(v8f16 (ARMvrev32 (v8f16 MQPR:$src))),
+ (v8f16 (MVE_VREV32_16 (v8f16 MQPR:$src)))>;
+}
+
+def MVE_VMVN : MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qm),
+ "vmvn", "", "$Qd, $Qm", ""> {
+ let Inst{28} = 0b1;
+ let Inst{25-23} = 0b111;
+ let Inst{21-16} = 0b110000;
+ let Inst{12-6} = 0b0010111;
+ let Inst{4} = 0b0;
+ let Inst{0} = 0b0;
+}
+
+let Predicates = [HasMVEInt] in {
+ def : Pat<(v16i8 (vnotq (v16i8 MQPR:$val1))),
+ (v16i8 (MVE_VMVN (v16i8 MQPR:$val1)))>;
+ def : Pat<(v8i16 (vnotq (v8i16 MQPR:$val1))),
+ (v8i16 (MVE_VMVN (v8i16 MQPR:$val1)))>;
+ def : Pat<(v4i32 (vnotq (v4i32 MQPR:$val1))),
+ (v4i32 (MVE_VMVN (v4i32 MQPR:$val1)))>;
+ def : Pat<(v2i64 (vnotq (v2i64 MQPR:$val1))),
+ (v2i64 (MVE_VMVN (v2i64 MQPR:$val1)))>;
+}
+
+class MVE_bit_ops<string iname, bits<2> bit_21_20, bit bit_28>
+ : MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm),
+ iname, "", "$Qd, $Qn, $Qm", ""> {
+ bits<4> Qn;
+
+ let Inst{28} = bit_28;
+ let Inst{25-23} = 0b110;
+ let Inst{21-20} = bit_21_20;
+ let Inst{19-17} = Qn{2-0};
+ let Inst{16} = 0b0;
+ let Inst{12-8} = 0b00001;
+ let Inst{7} = Qn{3};
+ let Inst{6} = 0b1;
+ let Inst{4} = 0b1;
+ let Inst{0} = 0b0;
+}
+
+def MVE_VEOR : MVE_bit_ops<"veor", 0b00, 0b1>;
+def MVE_VORN : MVE_bit_ops<"vorn", 0b11, 0b0>;
+def MVE_VORR : MVE_bit_ops<"vorr", 0b10, 0b0>;
+def MVE_VAND : MVE_bit_ops<"vand", 0b00, 0b0>;
+
+// add ignored suffixes as aliases
+
+foreach s=["s8", "s16", "s32", "u8", "u16", "u32", "i8", "i16", "i32", "f16", "f32"] in {
+ def : MVEInstAlias<"vbic${vp}." # s # "\t$QdSrc, $QnSrc, $QmSrc",
+ (MVE_VBIC MQPR:$QdSrc, MQPR:$QnSrc, MQPR:$QmSrc, vpred_r:$vp)>;
+ def : MVEInstAlias<"veor${vp}." # s # "\t$QdSrc, $QnSrc, $QmSrc",
+ (MVE_VEOR MQPR:$QdSrc, MQPR:$QnSrc, MQPR:$QmSrc, vpred_r:$vp)>;
+ def : MVEInstAlias<"vorn${vp}." # s # "\t$QdSrc, $QnSrc, $QmSrc",
+ (MVE_VORN MQPR:$QdSrc, MQPR:$QnSrc, MQPR:$QmSrc, vpred_r:$vp)>;
+ def : MVEInstAlias<"vorr${vp}." # s # "\t$QdSrc, $QnSrc, $QmSrc",
+ (MVE_VORR MQPR:$QdSrc, MQPR:$QnSrc, MQPR:$QmSrc, vpred_r:$vp)>;
+ def : MVEInstAlias<"vand${vp}." # s # "\t$QdSrc, $QnSrc, $QmSrc",
+ (MVE_VAND MQPR:$QdSrc, MQPR:$QnSrc, MQPR:$QmSrc, vpred_r:$vp)>;
+}
+
+let Predicates = [HasMVEInt] in {
+ def : Pat<(v16i8 (and (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
+ (v16i8 (MVE_VAND (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
+ def : Pat<(v8i16 (and (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
+ (v8i16 (MVE_VAND (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
+ def : Pat<(v4i32 (and (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
+ (v4i32 (MVE_VAND (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
+ def : Pat<(v2i64 (and (v2i64 MQPR:$val1), (v2i64 MQPR:$val2))),
+ (v2i64 (MVE_VAND (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>;
+
+ def : Pat<(v16i8 (or (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
+ (v16i8 (MVE_VORR (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
+ def : Pat<(v8i16 (or (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
+ (v8i16 (MVE_VORR (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
+ def : Pat<(v4i32 (or (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
+ (v4i32 (MVE_VORR (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
+ def : Pat<(v2i64 (or (v2i64 MQPR:$val1), (v2i64 MQPR:$val2))),
+ (v2i64 (MVE_VORR (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>;
+
+ def : Pat<(v16i8 (xor (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
+ (v16i8 (MVE_VEOR (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
+ def : Pat<(v8i16 (xor (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
+ (v8i16 (MVE_VEOR (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
+ def : Pat<(v4i32 (xor (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
+ (v4i32 (MVE_VEOR (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
+ def : Pat<(v2i64 (xor (v2i64 MQPR:$val1), (v2i64 MQPR:$val2))),
+ (v2i64 (MVE_VEOR (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>;
+
+ def : Pat<(v16i8 (and (v16i8 MQPR:$val1), (vnotq MQPR:$val2))),
+ (v16i8 (MVE_VBIC (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
+ def : Pat<(v8i16 (and (v8i16 MQPR:$val1), (vnotq MQPR:$val2))),
+ (v8i16 (MVE_VBIC (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
+ def : Pat<(v4i32 (and (v4i32 MQPR:$val1), (vnotq MQPR:$val2))),
+ (v4i32 (MVE_VBIC (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
+ def : Pat<(v2i64 (and (v2i64 MQPR:$val1), (vnotq MQPR:$val2))),
+ (v2i64 (MVE_VBIC (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>;
+
+ def : Pat<(v16i8 (or (v16i8 MQPR:$val1), (vnotq MQPR:$val2))),
+ (v16i8 (MVE_VORN (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
+ def : Pat<(v8i16 (or (v8i16 MQPR:$val1), (vnotq MQPR:$val2))),
+ (v8i16 (MVE_VORN (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
+ def : Pat<(v4i32 (or (v4i32 MQPR:$val1), (vnotq MQPR:$val2))),
+ (v4i32 (MVE_VORN (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
+ def : Pat<(v2i64 (or (v2i64 MQPR:$val1), (vnotq MQPR:$val2))),
+ (v2i64 (MVE_VORN (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>;
+}
+
+class MVE_bit_cmode<string iname, string suffix, bits<4> cmode, dag inOps>
+ : MVE_p<(outs MQPR:$Qd), inOps, NoItinerary,
+ iname, suffix, "$Qd, $imm", vpred_n, "$Qd = $Qd_src"> {
+ bits<8> imm;
+ bits<4> Qd;
+
+ let Inst{28} = imm{7};
+ let Inst{27-23} = 0b11111;
+ let Inst{22} = Qd{3};
+ let Inst{21-19} = 0b000;
+ let Inst{18-16} = imm{6-4};
+ let Inst{15-13} = Qd{2-0};
+ let Inst{12} = 0b0;
+ let Inst{11-8} = cmode;
+ let Inst{7-6} = 0b01;
+ let Inst{4} = 0b1;
+ let Inst{3-0} = imm{3-0};
+}
+
+class MVE_VORR<string suffix, bits<4> cmode, ExpandImm imm_type>
+ : MVE_bit_cmode<"vorr", suffix, cmode, (ins MQPR:$Qd_src, imm_type:$imm)> {
+ let Inst{5} = 0b0;
+}
+
+def MVE_VORRIZ0v4i32 : MVE_VORR<"i32", 0b0001, expzero00>;
+def MVE_VORRIZ0v8i16 : MVE_VORR<"i16", 0b1001, expzero00>;
+def MVE_VORRIZ8v4i32 : MVE_VORR<"i32", 0b0011, expzero08>;
+def MVE_VORRIZ8v8i16 : MVE_VORR<"i16", 0b1011, expzero08>;
+def MVE_VORRIZ16v4i32 : MVE_VORR<"i32", 0b0101, expzero16>;
+def MVE_VORRIZ24v4i32 : MVE_VORR<"i32", 0b0111, expzero24>;
+
+def MVE_VORNIZ0v4i32 : MVEAsmPseudo<"vorn${vp}.i32\t$Qd, $imm",
+ (ins MQPR:$Qd_src, expzero00inv32:$imm, vpred_n:$vp), (outs MQPR:$Qd)>;
+def MVE_VORNIZ0v8i16 : MVEAsmPseudo<"vorn${vp}.i16\t$Qd, $imm",
+ (ins MQPR:$Qd_src, expzero00inv16:$imm, vpred_n:$vp), (outs MQPR:$Qd)>;
+def MVE_VORNIZ8v4i32 : MVEAsmPseudo<"vorn${vp}.i32\t$Qd, $imm",
+ (ins MQPR:$Qd_src, expzero08inv32:$imm, vpred_n:$vp), (outs MQPR:$Qd)>;
+def MVE_VORNIZ8v8i16 : MVEAsmPseudo<"vorn${vp}.i16\t$Qd, $imm",
+ (ins MQPR:$Qd_src, expzero08inv16:$imm, vpred_n:$vp), (outs MQPR:$Qd)>;
+def MVE_VORNIZ16v4i32 : MVEAsmPseudo<"vorn${vp}.i32\t$Qd, $imm",
+ (ins MQPR:$Qd_src, expzero16inv32:$imm, vpred_n:$vp), (outs MQPR:$Qd)>;
+def MVE_VORNIZ24v4i32 : MVEAsmPseudo<"vorn${vp}.i32\t$Qd, $imm",
+ (ins MQPR:$Qd_src, expzero24inv32:$imm, vpred_n:$vp), (outs MQPR:$Qd)>;
+
+def MVE_VMOV : MVEInstAlias<"vmov${vp}\t$Qd, $Qm",
+ (MVE_VORR MQPR:$Qd, MQPR:$Qm, MQPR:$Qm, vpred_r:$vp)>;
+
+class MVE_VBIC<string suffix, bits<4> cmode, ExpandImm imm_type>
+ : MVE_bit_cmode<"vbic", suffix, cmode, (ins MQPR:$Qd_src, imm_type:$imm)> {
+ let Inst{5} = 0b1;
+}
+
+def MVE_VBICIZ0v4i32 : MVE_VBIC<"i32", 0b0001, expzero00>;
+def MVE_VBICIZ0v8i16 : MVE_VBIC<"i16", 0b1001, expzero00>;
+def MVE_VBICIZ8v4i32 : MVE_VBIC<"i32", 0b0011, expzero08>;
+def MVE_VBICIZ8v8i16 : MVE_VBIC<"i16", 0b1011, expzero08>;
+def MVE_VBICIZ16v4i32 : MVE_VBIC<"i32", 0b0101, expzero16>;
+def MVE_VBICIZ24v4i32 : MVE_VBIC<"i32", 0b0111, expzero24>;
+
+def MVE_VANDIZ0v4i32 : MVEAsmPseudo<"vand${vp}.i32\t$Qda, $imm",
+ (ins MQPR:$Qda_src, expzero00inv32:$imm, vpred_n:$vp), (outs MQPR:$Qda)>;
+def MVE_VANDIZ0v8i16 : MVEAsmPseudo<"vand${vp}.i16\t$Qda, $imm",
+ (ins MQPR:$Qda_src, expzero00inv16:$imm, vpred_n:$vp), (outs MQPR:$Qda)>;
+def MVE_VANDIZ8v4i32 : MVEAsmPseudo<"vand${vp}.i32\t$Qda, $imm",
+ (ins MQPR:$Qda_src, expzero08inv32:$imm, vpred_n:$vp), (outs MQPR:$Qda)>;
+def MVE_VANDIZ8v8i16 : MVEAsmPseudo<"vand${vp}.i16\t$Qda, $imm",
+ (ins MQPR:$Qda_src, expzero08inv16:$imm, vpred_n:$vp), (outs MQPR:$Qda)>;
+def MVE_VANDIZ16v4i32 : MVEAsmPseudo<"vand${vp}.i32\t$Qda, $imm",
+ (ins MQPR:$Qda_src, expzero16inv32:$imm, vpred_n:$vp), (outs MQPR:$Qda)>;
+def MVE_VANDIZ24v4i32 : MVEAsmPseudo<"vand${vp}.i32\t$Qda, $imm",
+ (ins MQPR:$Qda_src, expzero24inv32:$imm, vpred_n:$vp), (outs MQPR:$Qda)>;
+
+class MVE_VMOV_lane_direction {
+ bit bit_20;
+ dag oops;
+ dag iops;
+ string ops;
+ string cstr;
+}
+def MVE_VMOV_from_lane : MVE_VMOV_lane_direction {
+ let bit_20 = 0b1;
+ let oops = (outs rGPR:$Rt);
+ let iops = (ins MQPR:$Qd);
+ let ops = "$Rt, $Qd$Idx";
+ let cstr = "";
+}
+def MVE_VMOV_to_lane : MVE_VMOV_lane_direction {
+ let bit_20 = 0b0;
+ let oops = (outs MQPR:$Qd);
+ let iops = (ins MQPR:$Qd_src, rGPR:$Rt);
+ let ops = "$Qd$Idx, $Rt";
+ let cstr = "$Qd = $Qd_src";
+}
+
+class MVE_VMOV_lane<string suffix, bit U, dag indexop,
+ MVE_VMOV_lane_direction dir>
+ : MVE_VMOV_lane_base<dir.oops, !con(dir.iops, indexop), NoItinerary,
+ "vmov", suffix, dir.ops, dir.cstr, []> {
+ bits<4> Qd;
+ bits<4> Rt;
+
+ let Inst{31-24} = 0b11101110;
+ let Inst{23} = U;
+ let Inst{20} = dir.bit_20;
+ let Inst{19-17} = Qd{2-0};
+ let Inst{15-12} = Rt{3-0};
+ let Inst{11-8} = 0b1011;
+ let Inst{7} = Qd{3};
+ let Inst{4-0} = 0b10000;
+}
+
+class MVE_VMOV_lane_32<MVE_VMOV_lane_direction dir>
+ : MVE_VMOV_lane<"32", 0b0, (ins MVEVectorIndex<4>:$Idx), dir> {
+ bits<2> Idx;
+ let Inst{22} = 0b0;
+ let Inst{6-5} = 0b00;
+ let Inst{16} = Idx{1};
+ let Inst{21} = Idx{0};
+
+ let Predicates = [HasFPRegsV8_1M];
+}
+
+class MVE_VMOV_lane_16<string suffix, bit U, MVE_VMOV_lane_direction dir>
+ : MVE_VMOV_lane<suffix, U, (ins MVEVectorIndex<8>:$Idx), dir> {
+ bits<3> Idx;
+ let Inst{22} = 0b0;
+ let Inst{5} = 0b1;
+ let Inst{16} = Idx{2};
+ let Inst{21} = Idx{1};
+ let Inst{6} = Idx{0};
+}
+
+class MVE_VMOV_lane_8<string suffix, bit U, MVE_VMOV_lane_direction dir>
+ : MVE_VMOV_lane<suffix, U, (ins MVEVectorIndex<16>:$Idx), dir> {
+ bits<4> Idx;
+ let Inst{22} = 0b1;
+ let Inst{16} = Idx{3};
+ let Inst{21} = Idx{2};
+ let Inst{6} = Idx{1};
+ let Inst{5} = Idx{0};
+}
+
+def MVE_VMOV_from_lane_32 : MVE_VMOV_lane_32< MVE_VMOV_from_lane>;
+def MVE_VMOV_to_lane_32 : MVE_VMOV_lane_32< MVE_VMOV_to_lane>;
+def MVE_VMOV_from_lane_s16 : MVE_VMOV_lane_16<"s16", 0b0, MVE_VMOV_from_lane>;
+def MVE_VMOV_from_lane_u16 : MVE_VMOV_lane_16<"u16", 0b1, MVE_VMOV_from_lane>;
+def MVE_VMOV_to_lane_16 : MVE_VMOV_lane_16< "16", 0b0, MVE_VMOV_to_lane>;
+def MVE_VMOV_from_lane_s8 : MVE_VMOV_lane_8 < "s8", 0b0, MVE_VMOV_from_lane>;
+def MVE_VMOV_from_lane_u8 : MVE_VMOV_lane_8 < "u8", 0b1, MVE_VMOV_from_lane>;
+def MVE_VMOV_to_lane_8 : MVE_VMOV_lane_8 < "8", 0b0, MVE_VMOV_to_lane>;
+
+let Predicates = [HasMVEInt] in {
+ def : Pat<(extractelt (v2f64 MQPR:$src), imm:$lane),
+ (f64 (EXTRACT_SUBREG MQPR:$src, (DSubReg_f64_reg imm:$lane)))>;
+ def : Pat<(insertelt (v2f64 MQPR:$src1), DPR:$src2, imm:$lane),
+ (INSERT_SUBREG (v2f64 (COPY_TO_REGCLASS MQPR:$src1, MQPR)), DPR:$src2, (DSubReg_f64_reg imm:$lane))>;
+
+ def : Pat<(extractelt (v4i32 MQPR:$src), imm:$lane),
+ (COPY_TO_REGCLASS
+ (i32 (EXTRACT_SUBREG MQPR:$src, (SSubReg_f32_reg imm:$lane))), rGPR)>;
+ def : Pat<(insertelt (v4i32 MQPR:$src1), rGPR:$src2, imm:$lane),
+ (MVE_VMOV_to_lane_32 MQPR:$src1, rGPR:$src2, imm:$lane)>;
+
+ def : Pat<(vector_insert (v16i8 MQPR:$src1), rGPR:$src2, imm:$lane),
+ (MVE_VMOV_to_lane_8 MQPR:$src1, rGPR:$src2, imm:$lane)>;
+ def : Pat<(vector_insert (v8i16 MQPR:$src1), rGPR:$src2, imm:$lane),
+ (MVE_VMOV_to_lane_16 MQPR:$src1, rGPR:$src2, imm:$lane)>;
+
+ def : Pat<(ARMvgetlanes (v16i8 MQPR:$src), imm:$lane),
+ (MVE_VMOV_from_lane_s8 MQPR:$src, imm:$lane)>;
+ def : Pat<(ARMvgetlanes (v8i16 MQPR:$src), imm:$lane),
+ (MVE_VMOV_from_lane_s16 MQPR:$src, imm:$lane)>;
+ def : Pat<(ARMvgetlaneu (v16i8 MQPR:$src), imm:$lane),
+ (MVE_VMOV_from_lane_u8 MQPR:$src, imm:$lane)>;
+ def : Pat<(ARMvgetlaneu (v8i16 MQPR:$src), imm:$lane),
+ (MVE_VMOV_from_lane_u16 MQPR:$src, imm:$lane)>;
+
+ def : Pat<(v16i8 (scalar_to_vector GPR:$src)),
+ (MVE_VMOV_to_lane_8 (v16i8 (IMPLICIT_DEF)), rGPR:$src, (i32 0))>;
+ def : Pat<(v8i16 (scalar_to_vector GPR:$src)),
+ (MVE_VMOV_to_lane_16 (v8i16 (IMPLICIT_DEF)), rGPR:$src, (i32 0))>;
+ def : Pat<(v4i32 (scalar_to_vector GPR:$src)),
+ (MVE_VMOV_to_lane_32 (v4i32 (IMPLICIT_DEF)), rGPR:$src, (i32 0))>;
+
+ // Floating point patterns, still enabled under HasMVEInt
+ def : Pat<(extractelt (v4f32 MQPR:$src), imm:$lane),
+ (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG MQPR:$src, (SSubReg_f32_reg imm:$lane))), SPR)>;
+ def : Pat<(insertelt (v4f32 MQPR:$src1), (f32 SPR:$src2), imm:$lane),
+ (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS MQPR:$src1, MQPR)), SPR:$src2, (SSubReg_f32_reg imm:$lane))>;
+
+ def : Pat<(insertelt (v8f16 MQPR:$src1), HPR:$src2, imm:$lane),
+ (MVE_VMOV_to_lane_16 MQPR:$src1, (COPY_TO_REGCLASS HPR:$src2, rGPR), imm:$lane)>;
+ def : Pat<(extractelt (v8f16 MQPR:$src), imm:$lane),
+ (COPY_TO_REGCLASS (MVE_VMOV_from_lane_u16 MQPR:$src, imm:$lane), HPR)>;
+
+ def : Pat<(v4f32 (scalar_to_vector SPR:$src)),
+ (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>;
+ def : Pat<(v4f32 (scalar_to_vector GPR:$src)),
+ (MVE_VMOV_to_lane_32 (v4f32 (IMPLICIT_DEF)), rGPR:$src, (i32 0))>;
+ def : Pat<(v8f16 (scalar_to_vector HPR:$src)),
+ (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), HPR:$src, ssub_0)>;
+ def : Pat<(v8f16 (scalar_to_vector GPR:$src)),
+ (MVE_VMOV_to_lane_16 (v8f16 (IMPLICIT_DEF)), rGPR:$src, (i32 0))>;
+}
+
+// end of mve_bit instructions
+
+// start of MVE Integer instructions
+
+class MVE_int<string iname, string suffix, bits<2> size, list<dag> pattern=[]>
+ : MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm), NoItinerary,
+ iname, suffix, "$Qd, $Qn, $Qm", vpred_r, "", pattern> {
+ bits<4> Qd;
+ bits<4> Qn;
+ bits<4> Qm;
+
+ let Inst{22} = Qd{3};
+ let Inst{21-20} = size;
+ let Inst{19-17} = Qn{2-0};
+ let Inst{15-13} = Qd{2-0};
+ let Inst{7} = Qn{3};
+ let Inst{6} = 0b1;
+ let Inst{5} = Qm{3};
+ let Inst{3-1} = Qm{2-0};
+}
+
+class MVE_VMULt1<string suffix, bits<2> size, list<dag> pattern=[]>
+ : MVE_int<"vmul", suffix, size, pattern> {
+
+ let Inst{28} = 0b0;
+ let Inst{25-23} = 0b110;
+ let Inst{16} = 0b0;
+ let Inst{12-8} = 0b01001;
+ let Inst{4} = 0b1;
+ let Inst{0} = 0b0;
+}
+
+def MVE_VMULt1i8 : MVE_VMULt1<"i8", 0b00>;
+def MVE_VMULt1i16 : MVE_VMULt1<"i16", 0b01>;
+def MVE_VMULt1i32 : MVE_VMULt1<"i32", 0b10>;
+
+let Predicates = [HasMVEInt] in {
+ def : Pat<(v16i8 (mul (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
+ (v16i8 (MVE_VMULt1i8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
+ def : Pat<(v8i16 (mul (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
+ (v8i16 (MVE_VMULt1i16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
+ def : Pat<(v4i32 (mul (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
+ (v4i32 (MVE_VMULt1i32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
+}
+
+class MVE_VQxDMULH<string iname, string suffix, bits<2> size, bit rounding,
+ list<dag> pattern=[]>
+ : MVE_int<iname, suffix, size, pattern> {
+
+ let Inst{28} = rounding;
+ let Inst{25-23} = 0b110;
+ let Inst{16} = 0b0;
+ let Inst{12-8} = 0b01011;
+ let Inst{4} = 0b0;
+ let Inst{0} = 0b0;
+}
+
+class MVE_VQDMULH<string suffix, bits<2> size, list<dag> pattern=[]>
+ : MVE_VQxDMULH<"vqdmulh", suffix, size, 0b0, pattern>;
+class MVE_VQRDMULH<string suffix, bits<2> size, list<dag> pattern=[]>
+ : MVE_VQxDMULH<"vqrdmulh", suffix, size, 0b1, pattern>;
+
+def MVE_VQDMULHi8 : MVE_VQDMULH<"s8", 0b00>;
+def MVE_VQDMULHi16 : MVE_VQDMULH<"s16", 0b01>;
+def MVE_VQDMULHi32 : MVE_VQDMULH<"s32", 0b10>;
+
+def MVE_VQRDMULHi8 : MVE_VQRDMULH<"s8", 0b00>;
+def MVE_VQRDMULHi16 : MVE_VQRDMULH<"s16", 0b01>;
+def MVE_VQRDMULHi32 : MVE_VQRDMULH<"s32", 0b10>;
+
+class MVE_VADDSUB<string iname, string suffix, bits<2> size, bit subtract,
+ list<dag> pattern=[]>
+ : MVE_int<iname, suffix, size, pattern> {
+
+ let Inst{28} = subtract;
+ let Inst{25-23} = 0b110;
+ let Inst{16} = 0b0;
+ let Inst{12-8} = 0b01000;
+ let Inst{4} = 0b0;
+ let Inst{0} = 0b0;
+}
+
+class MVE_VADD<string suffix, bits<2> size, list<dag> pattern=[]>
+ : MVE_VADDSUB<"vadd", suffix, size, 0b0, pattern>;
+class MVE_VSUB<string suffix, bits<2> size, list<dag> pattern=[]>
+ : MVE_VADDSUB<"vsub", suffix, size, 0b1, pattern>;
+
+def MVE_VADDi8 : MVE_VADD<"i8", 0b00>;
+def MVE_VADDi16 : MVE_VADD<"i16", 0b01>;
+def MVE_VADDi32 : MVE_VADD<"i32", 0b10>;
+
+let Predicates = [HasMVEInt] in {
+ def : Pat<(v16i8 (add (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
+ (v16i8 (MVE_VADDi8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
+ def : Pat<(v8i16 (add (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
+ (v8i16 (MVE_VADDi16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
+ def : Pat<(v4i32 (add (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
+ (v4i32 (MVE_VADDi32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
+}
+
+def MVE_VSUBi8 : MVE_VSUB<"i8", 0b00>;
+def MVE_VSUBi16 : MVE_VSUB<"i16", 0b01>;
+def MVE_VSUBi32 : MVE_VSUB<"i32", 0b10>;
+
+let Predicates = [HasMVEInt] in {
+ def : Pat<(v16i8 (sub (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
+ (v16i8 (MVE_VSUBi8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
+ def : Pat<(v8i16 (sub (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
+ (v8i16 (MVE_VSUBi16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
+ def : Pat<(v4i32 (sub (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
+ (v4i32 (MVE_VSUBi32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
+}
+
+class MVE_VQADDSUB<string iname, string suffix, bit U, bit subtract,
+ bits<2> size, list<dag> pattern=[]>
+ : MVE_int<iname, suffix, size, pattern> {
+
+ let Inst{28} = U;
+ let Inst{25-23} = 0b110;
+ let Inst{16} = 0b0;
+ let Inst{12-10} = 0b000;
+ let Inst{9} = subtract;
+ let Inst{8} = 0b0;
+ let Inst{4} = 0b1;
+ let Inst{0} = 0b0;
+}
+
+class MVE_VQADD<string suffix, bit U, bits<2> size, list<dag> pattern=[]>
+ : MVE_VQADDSUB<"vqadd", suffix, U, 0b0, size, pattern>;
+class MVE_VQSUB<string suffix, bit U, bits<2> size, list<dag> pattern=[]>
+ : MVE_VQADDSUB<"vqsub", suffix, U, 0b1, size, pattern>;
+
+def MVE_VQADDs8 : MVE_VQADD<"s8", 0b0, 0b00>;
+def MVE_VQADDs16 : MVE_VQADD<"s16", 0b0, 0b01>;
+def MVE_VQADDs32 : MVE_VQADD<"s32", 0b0, 0b10>;
+def MVE_VQADDu8 : MVE_VQADD<"u8", 0b1, 0b00>;
+def MVE_VQADDu16 : MVE_VQADD<"u16", 0b1, 0b01>;
+def MVE_VQADDu32 : MVE_VQADD<"u32", 0b1, 0b10>;
+
+def MVE_VQSUBs8 : MVE_VQSUB<"s8", 0b0, 0b00>;
+def MVE_VQSUBs16 : MVE_VQSUB<"s16", 0b0, 0b01>;
+def MVE_VQSUBs32 : MVE_VQSUB<"s32", 0b0, 0b10>;
+def MVE_VQSUBu8 : MVE_VQSUB<"u8", 0b1, 0b00>;
+def MVE_VQSUBu16 : MVE_VQSUB<"u16", 0b1, 0b01>;
+def MVE_VQSUBu32 : MVE_VQSUB<"u32", 0b1, 0b10>;
+
+class MVE_VABD_int<string suffix, bit U, bits<2> size, list<dag> pattern=[]>
+ : MVE_int<"vabd", suffix, size, pattern> {
+
+ let Inst{28} = U;
+ let Inst{25-23} = 0b110;
+ let Inst{16} = 0b0;
+ let Inst{12-8} = 0b00111;
+ let Inst{4} = 0b0;
+ let Inst{0} = 0b0;
+}
+
+def MVE_VABDs8 : MVE_VABD_int<"s8", 0b0, 0b00>;
+def MVE_VABDs16 : MVE_VABD_int<"s16", 0b0, 0b01>;
+def MVE_VABDs32 : MVE_VABD_int<"s32", 0b0, 0b10>;
+def MVE_VABDu8 : MVE_VABD_int<"u8", 0b1, 0b00>;
+def MVE_VABDu16 : MVE_VABD_int<"u16", 0b1, 0b01>;
+def MVE_VABDu32 : MVE_VABD_int<"u32", 0b1, 0b10>;
+
+class MVE_VRHADD<string suffix, bit U, bits<2> size, list<dag> pattern=[]>
+ : MVE_int<"vrhadd", suffix, size, pattern> {
+
+ let Inst{28} = U;
+ let Inst{25-23} = 0b110;
+ let Inst{16} = 0b0;
+ let Inst{12-8} = 0b00001;
+ let Inst{4} = 0b0;
+ let Inst{0} = 0b0;
+}
+
+def MVE_VRHADDs8 : MVE_VRHADD<"s8", 0b0, 0b00>;
+def MVE_VRHADDs16 : MVE_VRHADD<"s16", 0b0, 0b01>;
+def MVE_VRHADDs32 : MVE_VRHADD<"s32", 0b0, 0b10>;
+def MVE_VRHADDu8 : MVE_VRHADD<"u8", 0b1, 0b00>;
+def MVE_VRHADDu16 : MVE_VRHADD<"u16", 0b1, 0b01>;
+def MVE_VRHADDu32 : MVE_VRHADD<"u32", 0b1, 0b10>;
+
+class MVE_VHADDSUB<string iname, string suffix, bit U, bit subtract,
+ bits<2> size, list<dag> pattern=[]>
+ : MVE_int<iname, suffix, size, pattern> {
+
+ let Inst{28} = U;
+ let Inst{25-23} = 0b110;
+ let Inst{16} = 0b0;
+ let Inst{12-10} = 0b000;
+ let Inst{9} = subtract;
+ let Inst{8} = 0b0;
+ let Inst{4} = 0b0;
+ let Inst{0} = 0b0;
+}
+
+class MVE_VHADD<string suffix, bit U, bits<2> size,
+ list<dag> pattern=[]>
+ : MVE_VHADDSUB<"vhadd", suffix, U, 0b0, size, pattern>;
+class MVE_VHSUB<string suffix, bit U, bits<2> size,
+ list<dag> pattern=[]>
+ : MVE_VHADDSUB<"vhsub", suffix, U, 0b1, size, pattern>;
+
+def MVE_VHADDs8 : MVE_VHADD<"s8", 0b0, 0b00>;
+def MVE_VHADDs16 : MVE_VHADD<"s16", 0b0, 0b01>;
+def MVE_VHADDs32 : MVE_VHADD<"s32", 0b0, 0b10>;
+def MVE_VHADDu8 : MVE_VHADD<"u8", 0b1, 0b00>;
+def MVE_VHADDu16 : MVE_VHADD<"u16", 0b1, 0b01>;
+def MVE_VHADDu32 : MVE_VHADD<"u32", 0b1, 0b10>;
+
+def MVE_VHSUBs8 : MVE_VHSUB<"s8", 0b0, 0b00>;
+def MVE_VHSUBs16 : MVE_VHSUB<"s16", 0b0, 0b01>;
+def MVE_VHSUBs32 : MVE_VHSUB<"s32", 0b0, 0b10>;
+def MVE_VHSUBu8 : MVE_VHSUB<"u8", 0b1, 0b00>;
+def MVE_VHSUBu16 : MVE_VHSUB<"u16", 0b1, 0b01>;
+def MVE_VHSUBu32 : MVE_VHSUB<"u32", 0b1, 0b10>;
+
+class MVE_VDUP<string suffix, bit B, bit E, list<dag> pattern=[]>
+ : MVE_p<(outs MQPR:$Qd), (ins rGPR:$Rt), NoItinerary,
+ "vdup", suffix, "$Qd, $Rt", vpred_r, "", pattern> {
+ bits<4> Qd;
+ bits<4> Rt;
+
+ let Inst{28} = 0b0;
+ let Inst{25-23} = 0b101;
+ let Inst{22} = B;
+ let Inst{21-20} = 0b10;
+ let Inst{19-17} = Qd{2-0};
+ let Inst{16} = 0b0;
+ let Inst{15-12} = Rt;
+ let Inst{11-8} = 0b1011;
+ let Inst{7} = Qd{3};
+ let Inst{6} = 0b0;
+ let Inst{5} = E;
+ let Inst{4-0} = 0b10000;
+}
+
+def MVE_VDUP32 : MVE_VDUP<"32", 0b0, 0b0>;
+def MVE_VDUP16 : MVE_VDUP<"16", 0b0, 0b1>;
+def MVE_VDUP8 : MVE_VDUP<"8", 0b1, 0b0>;
+
+let Predicates = [HasMVEInt] in {
+ def : Pat<(v16i8 (ARMvdup (i32 rGPR:$elem))),
+ (MVE_VDUP8 rGPR:$elem)>;
+ def : Pat<(v8i16 (ARMvdup (i32 rGPR:$elem))),
+ (MVE_VDUP16 rGPR:$elem)>;
+ def : Pat<(v4i32 (ARMvdup (i32 rGPR:$elem))),
+ (MVE_VDUP32 rGPR:$elem)>;
+
+ def : Pat<(v4i32 (ARMvduplane (v4i32 MQPR:$src), imm:$lane)),
+ (MVE_VDUP32 (MVE_VMOV_from_lane_32 MQPR:$src, imm:$lane))>;
+ // For the 16-bit and 8-bit vduplanes we don't care about the signedness
+ // of the lane move operation as we only want the lowest 8/16 bits anyway.
+ def : Pat<(v8i16 (ARMvduplane (v8i16 MQPR:$src), imm:$lane)),
+ (MVE_VDUP16 (MVE_VMOV_from_lane_u16 MQPR:$src, imm:$lane))>;
+ def : Pat<(v16i8 (ARMvduplane (v16i8 MQPR:$src), imm:$lane)),
+ (MVE_VDUP8 (MVE_VMOV_from_lane_u8 MQPR:$src, imm:$lane))>;
+
+ def : Pat<(v4f32 (ARMvdup (f32 SPR:$elem))),
+ (v4f32 (MVE_VDUP32 (i32 (COPY_TO_REGCLASS (f32 SPR:$elem), rGPR))))>;
+ def : Pat<(v8f16 (ARMvdup (f16 HPR:$elem))),
+ (v8f16 (MVE_VDUP16 (i32 (COPY_TO_REGCLASS (f16 HPR:$elem), rGPR))))>;
+
+ def : Pat<(v4f32 (ARMvduplane (v4f32 MQPR:$src), imm:$lane)),
+ (MVE_VDUP32 (MVE_VMOV_from_lane_32 MQPR:$src, imm:$lane))>;
+ def : Pat<(v8f16 (ARMvduplane (v8f16 MQPR:$src), imm:$lane)),
+ (MVE_VDUP16 (MVE_VMOV_from_lane_u16 MQPR:$src, imm:$lane))>;
+}
+
+
+class MVEIntSingleSrc<string iname, string suffix, bits<2> size,
+ list<dag> pattern=[]>
+ : MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qm), NoItinerary,
+ iname, suffix, "$Qd, $Qm", vpred_r, "", pattern> {
+ bits<4> Qd;
+ bits<4> Qm;
+
+ let Inst{22} = Qd{3};
+ let Inst{19-18} = size{1-0};
+ let Inst{15-13} = Qd{2-0};
+ let Inst{5} = Qm{3};
+ let Inst{3-1} = Qm{2-0};
+}
+
+class MVE_VCLSCLZ<string iname, string suffix, bits<2> size,
+ bit count_zeroes, list<dag> pattern=[]>
+ : MVEIntSingleSrc<iname, suffix, size, pattern> {
+
+ let Inst{28} = 0b1;
+ let Inst{25-23} = 0b111;
+ let Inst{21-20} = 0b11;
+ let Inst{17-16} = 0b00;
+ let Inst{12-8} = 0b00100;
+ let Inst{7} = count_zeroes;
+ let Inst{6} = 0b1;
+ let Inst{4} = 0b0;
+ let Inst{0} = 0b0;
+}
+
+def MVE_VCLSs8 : MVE_VCLSCLZ<"vcls", "s8", 0b00, 0b0>;
+def MVE_VCLSs16 : MVE_VCLSCLZ<"vcls", "s16", 0b01, 0b0>;
+def MVE_VCLSs32 : MVE_VCLSCLZ<"vcls", "s32", 0b10, 0b0>;
+
+def MVE_VCLZs8 : MVE_VCLSCLZ<"vclz", "i8", 0b00, 0b1>;
+def MVE_VCLZs16 : MVE_VCLSCLZ<"vclz", "i16", 0b01, 0b1>;
+def MVE_VCLZs32 : MVE_VCLSCLZ<"vclz", "i32", 0b10, 0b1>;
+
+class MVE_VABSNEG_int<string iname, string suffix, bits<2> size, bit negate,
+ list<dag> pattern=[]>
+ : MVEIntSingleSrc<iname, suffix, size, pattern> {
+
+ let Inst{28} = 0b1;
+ let Inst{25-23} = 0b111;
+ let Inst{21-20} = 0b11;
+ let Inst{17-16} = 0b01;
+ let Inst{12-8} = 0b00011;
+ let Inst{7} = negate;
+ let Inst{6} = 0b1;
+ let Inst{4} = 0b0;
+ let Inst{0} = 0b0;
+}
+
+def MVE_VABSs8 : MVE_VABSNEG_int<"vabs", "s8", 0b00, 0b0>;
+def MVE_VABSs16 : MVE_VABSNEG_int<"vabs", "s16", 0b01, 0b0>;
+def MVE_VABSs32 : MVE_VABSNEG_int<"vabs", "s32", 0b10, 0b0>;
+
+let Predicates = [HasMVEInt] in {
+ def : Pat<(v16i8 (abs (v16i8 MQPR:$v))),
+ (v16i8 (MVE_VABSs8 $v))>;
+ def : Pat<(v8i16 (abs (v8i16 MQPR:$v))),
+ (v8i16 (MVE_VABSs16 $v))>;
+ def : Pat<(v4i32 (abs (v4i32 MQPR:$v))),
+ (v4i32 (MVE_VABSs32 $v))>;
+}
+
+def MVE_VNEGs8 : MVE_VABSNEG_int<"vneg", "s8", 0b00, 0b1>;
+def MVE_VNEGs16 : MVE_VABSNEG_int<"vneg", "s16", 0b01, 0b1>;
+def MVE_VNEGs32 : MVE_VABSNEG_int<"vneg", "s32", 0b10, 0b1>;
+
+let Predicates = [HasMVEInt] in {
+ def : Pat<(v16i8 (vnegq (v16i8 MQPR:$v))),
+ (v16i8 (MVE_VNEGs8 $v))>;
+ def : Pat<(v8i16 (vnegq (v8i16 MQPR:$v))),
+ (v8i16 (MVE_VNEGs16 $v))>;
+ def : Pat<(v4i32 (vnegq (v4i32 MQPR:$v))),
+ (v4i32 (MVE_VNEGs32 $v))>;
+}
+
+class MVE_VQABSNEG<string iname, string suffix, bits<2> size,
+ bit negate, list<dag> pattern=[]>
+ : MVEIntSingleSrc<iname, suffix, size, pattern> {
+
+ let Inst{28} = 0b1;
+ let Inst{25-23} = 0b111;
+ let Inst{21-20} = 0b11;
+ let Inst{17-16} = 0b00;
+ let Inst{12-8} = 0b00111;
+ let Inst{7} = negate;
+ let Inst{6} = 0b1;
+ let Inst{4} = 0b0;
+ let Inst{0} = 0b0;
+}
+
+def MVE_VQABSs8 : MVE_VQABSNEG<"vqabs", "s8", 0b00, 0b0>;
+def MVE_VQABSs16 : MVE_VQABSNEG<"vqabs", "s16", 0b01, 0b0>;
+def MVE_VQABSs32 : MVE_VQABSNEG<"vqabs", "s32", 0b10, 0b0>;
+
+def MVE_VQNEGs8 : MVE_VQABSNEG<"vqneg", "s8", 0b00, 0b1>;
+def MVE_VQNEGs16 : MVE_VQABSNEG<"vqneg", "s16", 0b01, 0b1>;
+def MVE_VQNEGs32 : MVE_VQABSNEG<"vqneg", "s32", 0b10, 0b1>;
+
+class MVE_mod_imm<string iname, string suffix, bits<4> cmode, bit op,
+ dag iops, list<dag> pattern=[]>
+ : MVE_p<(outs MQPR:$Qd), iops, NoItinerary, iname, suffix, "$Qd, $imm",
+ vpred_r, "", pattern> {
+ bits<13> imm;
+ bits<4> Qd;
+
+ let Inst{28} = imm{7};
+ let Inst{25-23} = 0b111;
+ let Inst{22} = Qd{3};
+ let Inst{21-19} = 0b000;
+ let Inst{18-16} = imm{6-4};
+ let Inst{15-13} = Qd{2-0};
+ let Inst{12} = 0b0;
+ let Inst{11-8} = cmode{3-0};
+ let Inst{7-6} = 0b01;
+ let Inst{5} = op;
+ let Inst{4} = 0b1;
+ let Inst{3-0} = imm{3-0};
+
+ let DecoderMethod = "DecodeMVEModImmInstruction";
+}
+
+let isReMaterializable = 1 in {
+let isAsCheapAsAMove = 1 in {
+def MVE_VMOVimmi8 : MVE_mod_imm<"vmov", "i8", {1,1,1,0}, 0b0, (ins nImmSplatI8:$imm)>;
+def MVE_VMOVimmi16 : MVE_mod_imm<"vmov", "i16", {1,0,?,0}, 0b0, (ins nImmSplatI16:$imm)> {
+ let Inst{9} = imm{9};
+}
+def MVE_VMOVimmi32 : MVE_mod_imm<"vmov", "i32", {?,?,?,?}, 0b0, (ins nImmVMOVI32:$imm)> {
+ let Inst{11-8} = imm{11-8};
+}
+def MVE_VMOVimmi64 : MVE_mod_imm<"vmov", "i64", {1,1,1,0}, 0b1, (ins nImmSplatI64:$imm)>;
+def MVE_VMOVimmf32 : MVE_mod_imm<"vmov", "f32", {1,1,1,1}, 0b0, (ins nImmVMOVF32:$imm)>;
+} // let isAsCheapAsAMove = 1
+
+def MVE_VMVNimmi16 : MVE_mod_imm<"vmvn", "i16", {1,0,?,0}, 0b1, (ins nImmSplatI16:$imm)> {
+ let Inst{9} = imm{9};
+}
+def MVE_VMVNimmi32 : MVE_mod_imm<"vmvn", "i32", {?,?,?,?}, 0b1, (ins nImmVMOVI32:$imm)> {
+ let Inst{11-8} = imm{11-8};
+}
+} // let isReMaterializable = 1
+
+let Predicates = [HasMVEInt] in {
+ def : Pat<(v16i8 (ARMvmovImm timm:$simm)),
+ (v16i8 (MVE_VMOVimmi8 nImmSplatI8:$simm))>;
+ def : Pat<(v8i16 (ARMvmovImm timm:$simm)),
+ (v8i16 (MVE_VMOVimmi16 nImmSplatI16:$simm))>;
+ def : Pat<(v4i32 (ARMvmovImm timm:$simm)),
+ (v4i32 (MVE_VMOVimmi32 nImmVMOVI32:$simm))>;
+
+ def : Pat<(v8i16 (ARMvmvnImm timm:$simm)),
+ (v8i16 (MVE_VMVNimmi16 nImmSplatI16:$simm))>;
+ def : Pat<(v4i32 (ARMvmvnImm timm:$simm)),
+ (v4i32 (MVE_VMVNimmi32 nImmVMOVI32:$simm))>;
+
+ def : Pat<(v4f32 (ARMvmovFPImm timm:$simm)),
+ (v4f32 (MVE_VMOVimmf32 nImmVMOVF32:$simm))>;
+}
+
+class MVE_VMINMAXA<string iname, string suffix, bits<2> size,
+ bit bit_12, list<dag> pattern=[]>
+ : MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qd_src, MQPR:$Qm),
+ NoItinerary, iname, suffix, "$Qd, $Qm", vpred_n, "$Qd = $Qd_src",
+ pattern> {
+ bits<4> Qd;
+ bits<4> Qm;
+
+ let Inst{28} = 0b0;
+ let Inst{25-23} = 0b100;
+ let Inst{22} = Qd{3};
+ let Inst{21-20} = 0b11;
+ let Inst{19-18} = size;
+ let Inst{17-16} = 0b11;
+ let Inst{15-13} = Qd{2-0};
+ let Inst{12} = bit_12;
+ let Inst{11-6} = 0b111010;
+ let Inst{5} = Qm{3};
+ let Inst{4} = 0b0;
+ let Inst{3-1} = Qm{2-0};
+ let Inst{0} = 0b1;
+}
+
+def MVE_VMAXAs8 : MVE_VMINMAXA<"vmaxa", "s8", 0b00, 0b0>;
+def MVE_VMAXAs16 : MVE_VMINMAXA<"vmaxa", "s16", 0b01, 0b0>;
+def MVE_VMAXAs32 : MVE_VMINMAXA<"vmaxa", "s32", 0b10, 0b0>;
+
+def MVE_VMINAs8 : MVE_VMINMAXA<"vmina", "s8", 0b00, 0b1>;
+def MVE_VMINAs16 : MVE_VMINMAXA<"vmina", "s16", 0b01, 0b1>;
+def MVE_VMINAs32 : MVE_VMINMAXA<"vmina", "s32", 0b10, 0b1>;
+
+// end of MVE Integer instructions
+
+// start of mve_imm_shift instructions
+
+def MVE_VSHLC : MVE_p<(outs rGPR:$RdmDest, MQPR:$Qd),
+ (ins MQPR:$QdSrc, rGPR:$RdmSrc, long_shift:$imm),
+ NoItinerary, "vshlc", "", "$QdSrc, $RdmSrc, $imm",
+ vpred_n, "$RdmDest = $RdmSrc,$Qd = $QdSrc"> {
+ bits<5> imm;
+ bits<4> Qd;
+ bits<4> RdmDest;
+
+ let Inst{28} = 0b0;
+ let Inst{25-23} = 0b101;
+ let Inst{22} = Qd{3};
+ let Inst{21} = 0b1;
+ let Inst{20-16} = imm{4-0};
+ let Inst{15-13} = Qd{2-0};
+ let Inst{12-4} = 0b011111100;
+ let Inst{3-0} = RdmDest{3-0};
+}
+
+class MVE_shift_imm<dag oops, dag iops, string iname, string suffix,
+ string ops, vpred_ops vpred, string cstr,
+ list<dag> pattern=[]>
+ : MVE_p<oops, iops, NoItinerary, iname, suffix, ops, vpred, cstr, pattern> {
+ bits<4> Qd;
+ bits<4> Qm;
+
+ let Inst{22} = Qd{3};
+ let Inst{15-13} = Qd{2-0};
+ let Inst{5} = Qm{3};
+ let Inst{3-1} = Qm{2-0};
+}
+
+class MVE_VMOVL<string iname, string suffix, bits<2> sz, bit U,
+ list<dag> pattern=[]>
+ : MVE_shift_imm<(outs MQPR:$Qd), (ins MQPR:$Qm),
+ iname, suffix, "$Qd, $Qm", vpred_r, "",
+ pattern> {
+ let Inst{28} = U;
+ let Inst{25-23} = 0b101;
+ let Inst{21} = 0b1;
+ let Inst{20-19} = sz{1-0};
+ let Inst{18-16} = 0b000;
+ let Inst{11-6} = 0b111101;
+ let Inst{4} = 0b0;
+ let Inst{0} = 0b0;
+}
+
+multiclass MVE_VMOVL_shift_half<string iname, string suffix, bits<2> sz, bit U,
+ list<dag> pattern=[]> {
+ def bh : MVE_VMOVL<!strconcat(iname, "b"), suffix, sz, U, pattern> {
+ let Inst{12} = 0b0;
+ }
+ def th : MVE_VMOVL<!strconcat(iname, "t"), suffix, sz, U, pattern> {
+ let Inst{12} = 0b1;
+ }
+}
+
+defm MVE_VMOVLs8 : MVE_VMOVL_shift_half<"vmovl", "s8", 0b01, 0b0>;
+defm MVE_VMOVLu8 : MVE_VMOVL_shift_half<"vmovl", "u8", 0b01, 0b1>;
+defm MVE_VMOVLs16 : MVE_VMOVL_shift_half<"vmovl", "s16", 0b10, 0b0>;
+defm MVE_VMOVLu16 : MVE_VMOVL_shift_half<"vmovl", "u16", 0b10, 0b1>;
+
+let Predicates = [HasMVEInt] in {
+ def : Pat<(sext_inreg (v4i32 MQPR:$src), v4i16),
+ (MVE_VMOVLs16bh MQPR:$src)>;
+ def : Pat<(sext_inreg (v8i16 MQPR:$src), v8i8),
+ (MVE_VMOVLs8bh MQPR:$src)>;
+ def : Pat<(sext_inreg (v4i32 MQPR:$src), v4i8),
+ (MVE_VMOVLs16bh (MVE_VMOVLs8bh MQPR:$src))>;
+
+ // zext_inreg 16 -> 32
+ def : Pat<(and (v4i32 MQPR:$src), (v4i32 (ARMvmovImm (i32 0xCFF)))),
+ (MVE_VMOVLu16bh MQPR:$src)>;
+ // zext_inreg 8 -> 16
+ def : Pat<(and (v8i16 MQPR:$src), (v8i16 (ARMvmovImm (i32 0x8FF)))),
+ (MVE_VMOVLu8bh MQPR:$src)>;
+}
+
+
+class MVE_VSHLL_imm<string iname, string suffix, bit U, bit th,
+ dag immops, list<dag> pattern=[]>
+ : MVE_shift_imm<(outs MQPR:$Qd), !con((ins MQPR:$Qm), immops),
+ iname, suffix, "$Qd, $Qm, $imm", vpred_r, "", pattern> {
+ let Inst{28} = U;
+ let Inst{25-23} = 0b101;
+ let Inst{21} = 0b1;
+ let Inst{12} = th;
+ let Inst{11-6} = 0b111101;
+ let Inst{4} = 0b0;
+ let Inst{0} = 0b0;
+}
+
+// The immediate VSHLL instructions accept shift counts from 1 up to
+// the lane width (8 or 16), but the full-width shifts have an
+// entirely separate encoding, given below with 'lw' in the name.
+
+class MVE_VSHLL_imm8<string iname, string suffix,
+ bit U, bit th, list<dag> pattern=[]>
+ : MVE_VSHLL_imm<iname, suffix, U, th, (ins mve_shift_imm1_7:$imm), pattern> {
+ bits<3> imm;
+ let Inst{20-19} = 0b01;
+ let Inst{18-16} = imm;
+}
+
+class MVE_VSHLL_imm16<string iname, string suffix,
+ bit U, bit th, list<dag> pattern=[]>
+ : MVE_VSHLL_imm<iname, suffix, U, th, (ins mve_shift_imm1_15:$imm), pattern> {
+ bits<4> imm;
+ let Inst{20} = 0b1;
+ let Inst{19-16} = imm;
+}
+
+def MVE_VSHLL_imms8bh : MVE_VSHLL_imm8 <"vshllb", "s8", 0b0, 0b0>;
+def MVE_VSHLL_imms8th : MVE_VSHLL_imm8 <"vshllt", "s8", 0b0, 0b1>;
+def MVE_VSHLL_immu8bh : MVE_VSHLL_imm8 <"vshllb", "u8", 0b1, 0b0>;
+def MVE_VSHLL_immu8th : MVE_VSHLL_imm8 <"vshllt", "u8", 0b1, 0b1>;
+def MVE_VSHLL_imms16bh : MVE_VSHLL_imm16<"vshllb", "s16", 0b0, 0b0>;
+def MVE_VSHLL_imms16th : MVE_VSHLL_imm16<"vshllt", "s16", 0b0, 0b1>;
+def MVE_VSHLL_immu16bh : MVE_VSHLL_imm16<"vshllb", "u16", 0b1, 0b0>;
+def MVE_VSHLL_immu16th : MVE_VSHLL_imm16<"vshllt", "u16", 0b1, 0b1>;
+
+class MVE_VSHLL_by_lane_width<string iname, string suffix, bits<2> size,
+ bit U, string ops, list<dag> pattern=[]>
+ : MVE_shift_imm<(outs MQPR:$Qd), (ins MQPR:$Qm),
+ iname, suffix, ops, vpred_r, "", pattern> {
+ let Inst{28} = U;
+ let Inst{25-23} = 0b100;
+ let Inst{21-20} = 0b11;
+ let Inst{19-18} = size{1-0};
+ let Inst{17-16} = 0b01;
+ let Inst{11-6} = 0b111000;
+ let Inst{4} = 0b0;
+ let Inst{0} = 0b1;
+}
+
+multiclass MVE_VSHLL_lw<string iname, string suffix, bits<2> sz, bit U,
+ string ops, list<dag> pattern=[]> {
+ def bh : MVE_VSHLL_by_lane_width<iname#"b", suffix, sz, U, ops, pattern> {
+ let Inst{12} = 0b0;
+ }
+ def th : MVE_VSHLL_by_lane_width<iname#"t", suffix, sz, U, ops, pattern> {
+ let Inst{12} = 0b1;
+ }
+}
+
+defm MVE_VSHLL_lws8 : MVE_VSHLL_lw<"vshll", "s8", 0b00, 0b0, "$Qd, $Qm, #8">;
+defm MVE_VSHLL_lws16 : MVE_VSHLL_lw<"vshll", "s16", 0b01, 0b0, "$Qd, $Qm, #16">;
+defm MVE_VSHLL_lwu8 : MVE_VSHLL_lw<"vshll", "u8", 0b00, 0b1, "$Qd, $Qm, #8">;
+defm MVE_VSHLL_lwu16 : MVE_VSHLL_lw<"vshll", "u16", 0b01, 0b1, "$Qd, $Qm, #16">;
+
+class MVE_VxSHRN<string iname, string suffix, bit bit_12, bit bit_28,
+ dag immops, list<dag> pattern=[]>
+ : MVE_shift_imm<(outs MQPR:$Qd), !con((ins MQPR:$QdSrc, MQPR:$Qm), immops),
+ iname, suffix, "$Qd, $Qm, $imm", vpred_n, "$Qd = $QdSrc",
+ pattern> {
+ bits<5> imm;
+
+ let Inst{28} = bit_28;
+ let Inst{25-23} = 0b101;
+ let Inst{21} = 0b0;
+ let Inst{20-16} = imm{4-0};
+ let Inst{12} = bit_12;
+ let Inst{11-6} = 0b111111;
+ let Inst{4} = 0b0;
+ let Inst{0} = 0b1;
+}
+
+def MVE_VRSHRNi16bh : MVE_VxSHRN<
+ "vrshrnb", "i16", 0b0, 0b1, (ins shr_imm8:$imm)> {
+ let Inst{20-19} = 0b01;
+}
+def MVE_VRSHRNi16th : MVE_VxSHRN<
+ "vrshrnt", "i16", 0b1, 0b1,(ins shr_imm8:$imm)> {
+ let Inst{20-19} = 0b01;
+}
+def MVE_VRSHRNi32bh : MVE_VxSHRN<
+ "vrshrnb", "i32", 0b0, 0b1, (ins shr_imm16:$imm)> {
+ let Inst{20} = 0b1;
+}
+def MVE_VRSHRNi32th : MVE_VxSHRN<
+ "vrshrnt", "i32", 0b1, 0b1, (ins shr_imm16:$imm)> {
+ let Inst{20} = 0b1;
+}
+
+def MVE_VSHRNi16bh : MVE_VxSHRN<
+ "vshrnb", "i16", 0b0, 0b0, (ins shr_imm8:$imm)> {
+ let Inst{20-19} = 0b01;
+}
+def MVE_VSHRNi16th : MVE_VxSHRN<
+ "vshrnt", "i16", 0b1, 0b0, (ins shr_imm8:$imm)> {
+ let Inst{20-19} = 0b01;
+}
+def MVE_VSHRNi32bh : MVE_VxSHRN<
+ "vshrnb", "i32", 0b0, 0b0, (ins shr_imm16:$imm)> {
+ let Inst{20} = 0b1;
+}
+def MVE_VSHRNi32th : MVE_VxSHRN<
+ "vshrnt", "i32", 0b1, 0b0, (ins shr_imm16:$imm)> {
+ let Inst{20} = 0b1;
+}
+
+class MVE_VxQRSHRUN<string iname, string suffix, bit bit_28, bit bit_12, dag immops,
+ list<dag> pattern=[]>
+ : MVE_shift_imm<(outs MQPR:$Qd), !con((ins MQPR:$QdSrc, MQPR:$Qm), immops),
+ iname, suffix, "$Qd, $Qm, $imm", vpred_n, "$Qd = $QdSrc",
+ pattern> {
+ bits<5> imm;
+
+ let Inst{28} = bit_28;
+ let Inst{25-23} = 0b101;
+ let Inst{21} = 0b0;
+ let Inst{20-16} = imm{4-0};
+ let Inst{12} = bit_12;
+ let Inst{11-6} = 0b111111;
+ let Inst{4} = 0b0;
+ let Inst{0} = 0b0;
+}
+
+def MVE_VQRSHRUNs16bh : MVE_VxQRSHRUN<
+ "vqrshrunb", "s16", 0b1, 0b0, (ins shr_imm8:$imm)> {
+ let Inst{20-19} = 0b01;
+}
+def MVE_VQRSHRUNs16th : MVE_VxQRSHRUN<
+ "vqrshrunt", "s16", 0b1, 0b1, (ins shr_imm8:$imm)> {
+ let Inst{20-19} = 0b01;
+}
+def MVE_VQRSHRUNs32bh : MVE_VxQRSHRUN<
+ "vqrshrunb", "s32", 0b1, 0b0, (ins shr_imm16:$imm)> {
+ let Inst{20} = 0b1;
+}
+def MVE_VQRSHRUNs32th : MVE_VxQRSHRUN<
+ "vqrshrunt", "s32", 0b1, 0b1, (ins shr_imm16:$imm)> {
+ let Inst{20} = 0b1;
+}
+
+def MVE_VQSHRUNs16bh : MVE_VxQRSHRUN<
+ "vqshrunb", "s16", 0b0, 0b0, (ins shr_imm8:$imm)> {
+ let Inst{20-19} = 0b01;
+}
+def MVE_VQSHRUNs16th : MVE_VxQRSHRUN<
+ "vqshrunt", "s16", 0b0, 0b1, (ins shr_imm8:$imm)> {
+ let Inst{20-19} = 0b01;
+}
+def MVE_VQSHRUNs32bh : MVE_VxQRSHRUN<
+ "vqshrunb", "s32", 0b0, 0b0, (ins shr_imm16:$imm)> {
+ let Inst{20} = 0b1;
+}
+def MVE_VQSHRUNs32th : MVE_VxQRSHRUN<
+ "vqshrunt", "s32", 0b0, 0b1, (ins shr_imm16:$imm)> {
+ let Inst{20} = 0b1;
+}
+
+class MVE_VxQRSHRN<string iname, string suffix, bit bit_0, bit bit_12,
+ dag immops, list<dag> pattern=[]>
+ : MVE_shift_imm<(outs MQPR:$Qd), !con((ins MQPR:$QdSrc, MQPR:$Qm), immops),
+ iname, suffix, "$Qd, $Qm, $imm", vpred_n, "$Qd = $QdSrc",
+ pattern> {
+ bits<5> imm;
+
+ let Inst{25-23} = 0b101;
+ let Inst{21} = 0b0;
+ let Inst{20-16} = imm{4-0};
+ let Inst{12} = bit_12;
+ let Inst{11-6} = 0b111101;
+ let Inst{4} = 0b0;
+ let Inst{0} = bit_0;
+}
+
+multiclass MVE_VxQRSHRN_types<string iname, bit bit_0, bit bit_12> {
+ def s16 : MVE_VxQRSHRN<iname, "s16", bit_0, bit_12, (ins shr_imm8:$imm)> {
+ let Inst{28} = 0b0;
+ let Inst{20-19} = 0b01;
+ }
+ def u16 : MVE_VxQRSHRN<iname, "u16", bit_0, bit_12, (ins shr_imm8:$imm)> {
+ let Inst{28} = 0b1;
+ let Inst{20-19} = 0b01;
+ }
+ def s32 : MVE_VxQRSHRN<iname, "s32", bit_0, bit_12, (ins shr_imm16:$imm)> {
+ let Inst{28} = 0b0;
+ let Inst{20} = 0b1;
+ }
+ def u32 : MVE_VxQRSHRN<iname, "u32", bit_0, bit_12, (ins shr_imm16:$imm)> {
+ let Inst{28} = 0b1;
+ let Inst{20} = 0b1;
+ }
+}
+
+defm MVE_VQRSHRNbh : MVE_VxQRSHRN_types<"vqrshrnb", 0b1, 0b0>;
+defm MVE_VQRSHRNth : MVE_VxQRSHRN_types<"vqrshrnt", 0b1, 0b1>;
+defm MVE_VQSHRNbh : MVE_VxQRSHRN_types<"vqshrnb", 0b0, 0b0>;
+defm MVE_VQSHRNth : MVE_VxQRSHRN_types<"vqshrnt", 0b0, 0b1>;
+
+// end of mve_imm_shift instructions
+
+// start of mve_shift instructions
+
+class MVE_shift_by_vec<string iname, string suffix, bit U,
+ bits<2> size, bit bit_4, bit bit_8>
+ : MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qm, MQPR:$Qn), NoItinerary,
+ iname, suffix, "$Qd, $Qm, $Qn", vpred_r, "", []> {
+ // Shift instructions which take a vector of shift counts
+ bits<4> Qd;
+ bits<4> Qm;
+ bits<4> Qn;
+
+ let Inst{28} = U;
+ let Inst{25-24} = 0b11;
+ let Inst{23} = 0b0;
+ let Inst{22} = Qd{3};
+ let Inst{21-20} = size;
+ let Inst{19-17} = Qn{2-0};
+ let Inst{16} = 0b0;
+ let Inst{15-13} = Qd{2-0};
+ let Inst{12-9} = 0b0010;
+ let Inst{8} = bit_8;
+ let Inst{7} = Qn{3};
+ let Inst{6} = 0b1;
+ let Inst{5} = Qm{3};
+ let Inst{4} = bit_4;
+ let Inst{3-1} = Qm{2-0};
+ let Inst{0} = 0b0;
+}
+
+multiclass mve_shift_by_vec_multi<string iname, bit bit_4, bit bit_8> {
+ def s8 : MVE_shift_by_vec<iname, "s8", 0b0, 0b00, bit_4, bit_8>;
+ def s16 : MVE_shift_by_vec<iname, "s16", 0b0, 0b01, bit_4, bit_8>;
+ def s32 : MVE_shift_by_vec<iname, "s32", 0b0, 0b10, bit_4, bit_8>;
+ def u8 : MVE_shift_by_vec<iname, "u8", 0b1, 0b00, bit_4, bit_8>;
+ def u16 : MVE_shift_by_vec<iname, "u16", 0b1, 0b01, bit_4, bit_8>;
+ def u32 : MVE_shift_by_vec<iname, "u32", 0b1, 0b10, bit_4, bit_8>;
+}
+
+defm MVE_VSHL_by_vec : mve_shift_by_vec_multi<"vshl", 0b0, 0b0>;
+defm MVE_VQSHL_by_vec : mve_shift_by_vec_multi<"vqshl", 0b1, 0b0>;
+defm MVE_VQRSHL_by_vec : mve_shift_by_vec_multi<"vqrshl", 0b1, 0b1>;
+defm MVE_VRSHL_by_vec : mve_shift_by_vec_multi<"vrshl", 0b0, 0b1>;
+
+let Predicates = [HasMVEInt] in {
+ def : Pat<(v4i32 (ARMvshlu (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn))),
+ (v4i32 (MVE_VSHL_by_vecu32 (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn)))>;
+ def : Pat<(v8i16 (ARMvshlu (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn))),
+ (v8i16 (MVE_VSHL_by_vecu16 (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn)))>;
+ def : Pat<(v16i8 (ARMvshlu (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn))),
+ (v16i8 (MVE_VSHL_by_vecu8 (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn)))>;
+
+ def : Pat<(v4i32 (ARMvshls (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn))),
+ (v4i32 (MVE_VSHL_by_vecs32 (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn)))>;
+ def : Pat<(v8i16 (ARMvshls (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn))),
+ (v8i16 (MVE_VSHL_by_vecs16 (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn)))>;
+ def : Pat<(v16i8 (ARMvshls (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn))),
+ (v16i8 (MVE_VSHL_by_vecs8 (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn)))>;
+}
+
+class MVE_shift_with_imm<string iname, string suffix, dag oops, dag iops,
+ string ops, vpred_ops vpred, string cstr,
+ list<dag> pattern=[]>
+ : MVE_p<oops, iops, NoItinerary, iname, suffix, ops, vpred, cstr, pattern> {
+ bits<4> Qd;
+ bits<4> Qm;
+
+ let Inst{23} = 0b1;
+ let Inst{22} = Qd{3};
+ let Inst{15-13} = Qd{2-0};
+ let Inst{12-11} = 0b00;
+ let Inst{7-6} = 0b01;
+ let Inst{5} = Qm{3};
+ let Inst{4} = 0b1;
+ let Inst{3-1} = Qm{2-0};
+ let Inst{0} = 0b0;
+}
+
+class MVE_VSxI_imm<string iname, string suffix, bit bit_8, dag imm>
+ : MVE_shift_with_imm<iname, suffix, (outs MQPR:$Qd),
+ !con((ins MQPR:$Qd_src, MQPR:$Qm), imm),
+ "$Qd, $Qm, $imm", vpred_n, "$Qd = $Qd_src"> {
+ bits<6> imm;
+ let Inst{28} = 0b1;
+ let Inst{25-24} = 0b11;
+ let Inst{21-16} = imm;
+ let Inst{10-9} = 0b10;
+ let Inst{8} = bit_8;
+}
+
+def MVE_VSRIimm8 : MVE_VSxI_imm<"vsri", "8", 0b0, (ins shr_imm8:$imm)> {
+ let Inst{21-19} = 0b001;
+}
+
+def MVE_VSRIimm16 : MVE_VSxI_imm<"vsri", "16", 0b0, (ins shr_imm16:$imm)> {
+ let Inst{21-20} = 0b01;
+}
+
+def MVE_VSRIimm32 : MVE_VSxI_imm<"vsri", "32", 0b0, (ins shr_imm32:$imm)> {
+ let Inst{21} = 0b1;
+}
+
+def MVE_VSLIimm8 : MVE_VSxI_imm<"vsli", "8", 0b1, (ins imm0_7:$imm)> {
+ let Inst{21-19} = 0b001;
+}
+
+def MVE_VSLIimm16 : MVE_VSxI_imm<"vsli", "16", 0b1, (ins imm0_15:$imm)> {
+ let Inst{21-20} = 0b01;
+}
+
+def MVE_VSLIimm32 : MVE_VSxI_imm<"vsli", "32", 0b1,(ins imm0_31:$imm)> {
+ let Inst{21} = 0b1;
+}
+
+class MVE_VQSHL_imm<string suffix, dag imm>
+ : MVE_shift_with_imm<"vqshl", suffix, (outs MQPR:$Qd),
+ !con((ins MQPR:$Qm), imm), "$Qd, $Qm, $imm",
+ vpred_r, ""> {
+ bits<6> imm;
+
+ let Inst{25-24} = 0b11;
+ let Inst{21-16} = imm;
+ let Inst{10-8} = 0b111;
+}
+
+def MVE_VSLIimms8 : MVE_VQSHL_imm<"s8", (ins imm0_7:$imm)> {
+ let Inst{28} = 0b0;
+ let Inst{21-19} = 0b001;
+}
+
+def MVE_VSLIimmu8 : MVE_VQSHL_imm<"u8", (ins imm0_7:$imm)> {
+ let Inst{28} = 0b1;
+ let Inst{21-19} = 0b001;
+}
+
+def MVE_VSLIimms16 : MVE_VQSHL_imm<"s16", (ins imm0_15:$imm)> {
+ let Inst{28} = 0b0;
+ let Inst{21-20} = 0b01;
+}
+
+def MVE_VSLIimmu16 : MVE_VQSHL_imm<"u16", (ins imm0_15:$imm)> {
+ let Inst{28} = 0b1;
+ let Inst{21-20} = 0b01;
+}
+
+def MVE_VSLIimms32 : MVE_VQSHL_imm<"s32", (ins imm0_31:$imm)> {
+ let Inst{28} = 0b0;
+ let Inst{21} = 0b1;
+}
+
+def MVE_VSLIimmu32 : MVE_VQSHL_imm<"u32", (ins imm0_31:$imm)> {
+ let Inst{28} = 0b1;
+ let Inst{21} = 0b1;
+}
+
+class MVE_VQSHLU_imm<string suffix, dag imm>
+ : MVE_shift_with_imm<"vqshlu", suffix, (outs MQPR:$Qd),
+ !con((ins MQPR:$Qm), imm), "$Qd, $Qm, $imm",
+ vpred_r, ""> {
+ bits<6> imm;
+
+ let Inst{28} = 0b1;
+ let Inst{25-24} = 0b11;
+ let Inst{21-16} = imm;
+ let Inst{10-8} = 0b110;
+}
+
+def MVE_VQSHLU_imms8 : MVE_VQSHLU_imm<"s8", (ins imm0_7:$imm)> {
+ let Inst{21-19} = 0b001;
+}
+
+def MVE_VQSHLU_imms16 : MVE_VQSHLU_imm<"s16", (ins imm0_15:$imm)> {
+ let Inst{21-20} = 0b01;
+}
+
+def MVE_VQSHLU_imms32 : MVE_VQSHLU_imm<"s32", (ins imm0_31:$imm)> {
+ let Inst{21} = 0b1;
+}
+
+class MVE_VRSHR_imm<string suffix, dag imm>
+ : MVE_shift_with_imm<"vrshr", suffix, (outs MQPR:$Qd),
+ !con((ins MQPR:$Qm), imm), "$Qd, $Qm, $imm",
+ vpred_r, ""> {
+ bits<6> imm;
+
+ let Inst{25-24} = 0b11;
+ let Inst{21-16} = imm;
+ let Inst{10-8} = 0b010;
+}
+
+def MVE_VRSHR_imms8 : MVE_VRSHR_imm<"s8", (ins shr_imm8:$imm)> {
+ let Inst{28} = 0b0;
+ let Inst{21-19} = 0b001;
+}
+
+def MVE_VRSHR_immu8 : MVE_VRSHR_imm<"u8", (ins shr_imm8:$imm)> {
+ let Inst{28} = 0b1;
+ let Inst{21-19} = 0b001;
+}
+
+def MVE_VRSHR_imms16 : MVE_VRSHR_imm<"s16", (ins shr_imm16:$imm)> {
+ let Inst{28} = 0b0;
+ let Inst{21-20} = 0b01;
+}
+
+def MVE_VRSHR_immu16 : MVE_VRSHR_imm<"u16", (ins shr_imm16:$imm)> {
+ let Inst{28} = 0b1;
+ let Inst{21-20} = 0b01;
+}
+
+def MVE_VRSHR_imms32 : MVE_VRSHR_imm<"s32", (ins shr_imm32:$imm)> {
+ let Inst{28} = 0b0;
+ let Inst{21} = 0b1;
+}
+
+def MVE_VRSHR_immu32 : MVE_VRSHR_imm<"u32", (ins shr_imm32:$imm)> {
+ let Inst{28} = 0b1;
+ let Inst{21} = 0b1;
+}
+
+class MVE_VSHR_imm<string suffix, dag imm>
+ : MVE_shift_with_imm<"vshr", suffix, (outs MQPR:$Qd),
+ !con((ins MQPR:$Qm), imm), "$Qd, $Qm, $imm",
+ vpred_r, ""> {
+ bits<6> imm;
+
+ let Inst{25-24} = 0b11;
+ let Inst{21-16} = imm;
+ let Inst{10-8} = 0b000;
+}
+
+def MVE_VSHR_imms8 : MVE_VSHR_imm<"s8", (ins shr_imm8:$imm)> {
+ let Inst{28} = 0b0;
+ let Inst{21-19} = 0b001;
+}
+
+def MVE_VSHR_immu8 : MVE_VSHR_imm<"u8", (ins shr_imm8:$imm)> {
+ let Inst{28} = 0b1;
+ let Inst{21-19} = 0b001;
+}
+
+def MVE_VSHR_imms16 : MVE_VSHR_imm<"s16", (ins shr_imm16:$imm)> {
+ let Inst{28} = 0b0;
+ let Inst{21-20} = 0b01;
+}
+
+def MVE_VSHR_immu16 : MVE_VSHR_imm<"u16", (ins shr_imm16:$imm)> {
+ let Inst{28} = 0b1;
+ let Inst{21-20} = 0b01;
+}
+
+def MVE_VSHR_imms32 : MVE_VSHR_imm<"s32", (ins shr_imm32:$imm)> {
+ let Inst{28} = 0b0;
+ let Inst{21} = 0b1;
+}
+
+def MVE_VSHR_immu32 : MVE_VSHR_imm<"u32", (ins shr_imm32:$imm)> {
+ let Inst{28} = 0b1;
+ let Inst{21} = 0b1;
+}
+
+class MVE_VSHL_imm<string suffix, dag imm>
+ : MVE_shift_with_imm<"vshl", suffix, (outs MQPR:$Qd),
+ !con((ins MQPR:$Qm), imm), "$Qd, $Qm, $imm",
+ vpred_r, ""> {
+ bits<6> imm;
+
+ let Inst{28} = 0b0;
+ let Inst{25-24} = 0b11;
+ let Inst{21-16} = imm;
+ let Inst{10-8} = 0b101;
+}
+
+def MVE_VSHL_immi8 : MVE_VSHL_imm<"i8", (ins imm0_7:$imm)> {
+ let Inst{21-19} = 0b001;
+}
+
+def MVE_VSHL_immi16 : MVE_VSHL_imm<"i16", (ins imm0_15:$imm)> {
+ let Inst{21-20} = 0b01;
+}
+
+def MVE_VSHL_immi32 : MVE_VSHL_imm<"i32", (ins imm0_31:$imm)> {
+ let Inst{21} = 0b1;
+}
+
+let Predicates = [HasMVEInt] in {
+ def : Pat<(v4i32 (ARMvshlImm (v4i32 MQPR:$src), imm0_31:$imm)),
+ (v4i32 (MVE_VSHL_immi32 (v4i32 MQPR:$src), imm0_31:$imm))>;
+ def : Pat<(v8i16 (ARMvshlImm (v8i16 MQPR:$src), imm0_15:$imm)),
+ (v8i16 (MVE_VSHL_immi16 (v8i16 MQPR:$src), imm0_15:$imm))>;
+ def : Pat<(v16i8 (ARMvshlImm (v16i8 MQPR:$src), imm0_7:$imm)),
+ (v16i8 (MVE_VSHL_immi8 (v16i8 MQPR:$src), imm0_7:$imm))>;
+
+ def : Pat<(v4i32 (ARMvshruImm (v4i32 MQPR:$src), imm0_31:$imm)),
+ (v4i32 (MVE_VSHR_immu32 (v4i32 MQPR:$src), imm0_31:$imm))>;
+ def : Pat<(v8i16 (ARMvshruImm (v8i16 MQPR:$src), imm0_15:$imm)),
+ (v8i16 (MVE_VSHR_immu16 (v8i16 MQPR:$src), imm0_15:$imm))>;
+ def : Pat<(v16i8 (ARMvshruImm (v16i8 MQPR:$src), imm0_7:$imm)),
+ (v16i8 (MVE_VSHR_immu8 (v16i8 MQPR:$src), imm0_7:$imm))>;
+
+ def : Pat<(v4i32 (ARMvshrsImm (v4i32 MQPR:$src), imm0_31:$imm)),
+ (v4i32 (MVE_VSHR_imms32 (v4i32 MQPR:$src), imm0_31:$imm))>;
+ def : Pat<(v8i16 (ARMvshrsImm (v8i16 MQPR:$src), imm0_15:$imm)),
+ (v8i16 (MVE_VSHR_imms16 (v8i16 MQPR:$src), imm0_15:$imm))>;
+ def : Pat<(v16i8 (ARMvshrsImm (v16i8 MQPR:$src), imm0_7:$imm)),
+ (v16i8 (MVE_VSHR_imms8 (v16i8 MQPR:$src), imm0_7:$imm))>;
+}
+
+// end of mve_shift instructions
+
+// start of MVE Floating Point instructions
+
+class MVE_float<string iname, string suffix, dag oops, dag iops, string ops,
+ vpred_ops vpred, string cstr, list<dag> pattern=[]>
+ : MVE_f<oops, iops, NoItinerary, iname, suffix, ops, vpred, cstr, pattern> {
+ bits<4> Qm;
+
+ let Inst{12} = 0b0;
+ let Inst{6} = 0b1;
+ let Inst{5} = Qm{3};
+ let Inst{3-1} = Qm{2-0};
+ let Inst{0} = 0b0;
+}
+
+class MVE_VRINT<string rmode, bits<3> op, string suffix, bits<2> size,
+ list<dag> pattern=[]>
+ : MVE_float<!strconcat("vrint", rmode), suffix, (outs MQPR:$Qd),
+ (ins MQPR:$Qm), "$Qd, $Qm", vpred_r, "", pattern> {
+ bits<4> Qd;
+
+ let Inst{28} = 0b1;
+ let Inst{25-23} = 0b111;
+ let Inst{22} = Qd{3};
+ let Inst{21-20} = 0b11;
+ let Inst{19-18} = size;
+ let Inst{17-16} = 0b10;
+ let Inst{15-13} = Qd{2-0};
+ let Inst{11-10} = 0b01;
+ let Inst{9-7} = op{2-0};
+ let Inst{4} = 0b0;
+
+}
+
+multiclass MVE_VRINT_ops<string suffix, bits<2> size, list<dag> pattern=[]> {
+ def N : MVE_VRINT<"n", 0b000, suffix, size, pattern>;
+ def X : MVE_VRINT<"x", 0b001, suffix, size, pattern>;
+ def A : MVE_VRINT<"a", 0b010, suffix, size, pattern>;
+ def Z : MVE_VRINT<"z", 0b011, suffix, size, pattern>;
+ def M : MVE_VRINT<"m", 0b101, suffix, size, pattern>;
+ def P : MVE_VRINT<"p", 0b111, suffix, size, pattern>;
+}
+
+defm MVE_VRINTf16 : MVE_VRINT_ops<"f16", 0b01>;
+defm MVE_VRINTf32 : MVE_VRINT_ops<"f32", 0b10>;
+
+let Predicates = [HasMVEFloat] in {
+ def : Pat<(v4f32 (frint (v4f32 MQPR:$val1))),
+ (v4f32 (MVE_VRINTf32X (v4f32 MQPR:$val1)))>;
+ def : Pat<(v8f16 (frint (v8f16 MQPR:$val1))),
+ (v8f16 (MVE_VRINTf16X (v8f16 MQPR:$val1)))>;
+ def : Pat<(v4f32 (fround (v4f32 MQPR:$val1))),
+ (v4f32 (MVE_VRINTf32A (v4f32 MQPR:$val1)))>;
+ def : Pat<(v8f16 (fround (v8f16 MQPR:$val1))),
+ (v8f16 (MVE_VRINTf16A (v8f16 MQPR:$val1)))>;
+ def : Pat<(v4f32 (ftrunc (v4f32 MQPR:$val1))),
+ (v4f32 (MVE_VRINTf32Z (v4f32 MQPR:$val1)))>;
+ def : Pat<(v8f16 (ftrunc (v8f16 MQPR:$val1))),
+ (v8f16 (MVE_VRINTf16Z (v8f16 MQPR:$val1)))>;
+ def : Pat<(v4f32 (ffloor (v4f32 MQPR:$val1))),
+ (v4f32 (MVE_VRINTf32M (v4f32 MQPR:$val1)))>;
+ def : Pat<(v8f16 (ffloor (v8f16 MQPR:$val1))),
+ (v8f16 (MVE_VRINTf16M (v8f16 MQPR:$val1)))>;
+ def : Pat<(v4f32 (fceil (v4f32 MQPR:$val1))),
+ (v4f32 (MVE_VRINTf32P (v4f32 MQPR:$val1)))>;
+ def : Pat<(v8f16 (fceil (v8f16 MQPR:$val1))),
+ (v8f16 (MVE_VRINTf16P (v8f16 MQPR:$val1)))>;
+}
+
+class MVEFloatArithNeon<string iname, string suffix, bit size,
+ dag oops, dag iops, string ops,
+ vpred_ops vpred, string cstr, list<dag> pattern=[]>
+ : MVE_float<iname, suffix, oops, iops, ops, vpred, cstr, pattern> {
+ let Inst{20} = size;
+ let Inst{16} = 0b0;
+}
+
+class MVE_VMUL_fp<string suffix, bit size, list<dag> pattern=[]>
+ : MVEFloatArithNeon<"vmul", suffix, size, (outs MQPR:$Qd),
+ (ins MQPR:$Qn, MQPR:$Qm), "$Qd, $Qn, $Qm", vpred_r, "",
+ pattern> {
+ bits<4> Qd;
+ bits<4> Qn;
+
+ let Inst{28} = 0b1;
+ let Inst{25-23} = 0b110;
+ let Inst{22} = Qd{3};
+ let Inst{21} = 0b0;
+ let Inst{19-17} = Qn{2-0};
+ let Inst{15-13} = Qd{2-0};
+ let Inst{12-8} = 0b01101;
+ let Inst{7} = Qn{3};
+ let Inst{4} = 0b1;
+}
+
+def MVE_VMULf32 : MVE_VMUL_fp<"f32", 0b0>;
+def MVE_VMULf16 : MVE_VMUL_fp<"f16", 0b1>;
+
+let Predicates = [HasMVEFloat] in {
+ def : Pat<(v4f32 (fmul (v4f32 MQPR:$val1), (v4f32 MQPR:$val2))),
+ (v4f32 (MVE_VMULf32 (v4f32 MQPR:$val1), (v4f32 MQPR:$val2)))>;
+ def : Pat<(v8f16 (fmul (v8f16 MQPR:$val1), (v8f16 MQPR:$val2))),
+ (v8f16 (MVE_VMULf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2)))>;
+}
+
+class MVE_VCMLA<string suffix, bit size, list<dag> pattern=[]>
+ : MVEFloatArithNeon<"vcmla", suffix, size, (outs MQPR:$Qd),
+ (ins MQPR:$Qd_src, MQPR:$Qn, MQPR:$Qm, complexrotateop:$rot),
+ "$Qd, $Qn, $Qm, $rot", vpred_n, "$Qd = $Qd_src", pattern> {
+ bits<4> Qd;
+ bits<4> Qn;
+ bits<2> rot;
+
+ let Inst{28} = 0b1;
+ let Inst{25} = 0b0;
+ let Inst{24-23} = rot;
+ let Inst{22} = Qd{3};
+ let Inst{21} = 0b1;
+ let Inst{19-17} = Qn{2-0};
+ let Inst{15-13} = Qd{2-0};
+ let Inst{12-8} = 0b01000;
+ let Inst{7} = Qn{3};
+ let Inst{4} = 0b0;
+}
+
+def MVE_VCMLAf16 : MVE_VCMLA<"f16", 0b0>;
+def MVE_VCMLAf32 : MVE_VCMLA<"f32", 0b1>;
+
+class MVE_VADDSUBFMA_fp<string iname, string suffix, bit size, bit bit_4,
+ bit bit_8, bit bit_21, dag iops=(ins),
+ vpred_ops vpred=vpred_r, string cstr="",
+ list<dag> pattern=[]>
+ : MVEFloatArithNeon<iname, suffix, size, (outs MQPR:$Qd),
+ !con(iops, (ins MQPR:$Qn, MQPR:$Qm)), "$Qd, $Qn, $Qm",
+ vpred, cstr, pattern> {
+ bits<4> Qd;
+ bits<4> Qn;
+
+ let Inst{28} = 0b0;
+ let Inst{25-23} = 0b110;
+ let Inst{22} = Qd{3};
+ let Inst{21} = bit_21;
+ let Inst{19-17} = Qn{2-0};
+ let Inst{15-13} = Qd{2-0};
+ let Inst{11-9} = 0b110;
+ let Inst{8} = bit_8;
+ let Inst{7} = Qn{3};
+ let Inst{4} = bit_4;
+}
+
+def MVE_VFMAf32 : MVE_VADDSUBFMA_fp<"vfma", "f32", 0b0, 0b1, 0b0, 0b0,
+ (ins MQPR:$Qd_src), vpred_n, "$Qd = $Qd_src">;
+def MVE_VFMAf16 : MVE_VADDSUBFMA_fp<"vfma", "f16", 0b1, 0b1, 0b0, 0b0,
+ (ins MQPR:$Qd_src), vpred_n, "$Qd = $Qd_src">;
+
+def MVE_VFMSf32 : MVE_VADDSUBFMA_fp<"vfms", "f32", 0b0, 0b1, 0b0, 0b1,
+ (ins MQPR:$Qd_src), vpred_n, "$Qd = $Qd_src">;
+def MVE_VFMSf16 : MVE_VADDSUBFMA_fp<"vfms", "f16", 0b1, 0b1, 0b0, 0b1,
+ (ins MQPR:$Qd_src), vpred_n, "$Qd = $Qd_src">;
+
+def MVE_VADDf32 : MVE_VADDSUBFMA_fp<"vadd", "f32", 0b0, 0b0, 0b1, 0b0>;
+def MVE_VADDf16 : MVE_VADDSUBFMA_fp<"vadd", "f16", 0b1, 0b0, 0b1, 0b0>;
+
+let Predicates = [HasMVEFloat] in {
+ def : Pat<(v4f32 (fadd (v4f32 MQPR:$val1), (v4f32 MQPR:$val2))),
+ (v4f32 (MVE_VADDf32 (v4f32 MQPR:$val1), (v4f32 MQPR:$val2)))>;
+ def : Pat<(v8f16 (fadd (v8f16 MQPR:$val1), (v8f16 MQPR:$val2))),
+ (v8f16 (MVE_VADDf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2)))>;
+}
+
+def MVE_VSUBf32 : MVE_VADDSUBFMA_fp<"vsub", "f32", 0b0, 0b0, 0b1, 0b1>;
+def MVE_VSUBf16 : MVE_VADDSUBFMA_fp<"vsub", "f16", 0b1, 0b0, 0b1, 0b1>;
+
+let Predicates = [HasMVEFloat] in {
+ def : Pat<(v4f32 (fsub (v4f32 MQPR:$val1), (v4f32 MQPR:$val2))),
+ (v4f32 (MVE_VSUBf32 (v4f32 MQPR:$val1), (v4f32 MQPR:$val2)))>;
+ def : Pat<(v8f16 (fsub (v8f16 MQPR:$val1), (v8f16 MQPR:$val2))),
+ (v8f16 (MVE_VSUBf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2)))>;
+}
+
+class MVE_VCADD<string suffix, bit size, list<dag> pattern=[]>
+ : MVEFloatArithNeon<"vcadd", suffix, size, (outs MQPR:$Qd),
+ (ins MQPR:$Qn, MQPR:$Qm, complexrotateopodd:$rot),
+ "$Qd, $Qn, $Qm, $rot", vpred_r, "", pattern> {
+ bits<4> Qd;
+ bits<4> Qn;
+ bit rot;
+
+ let Inst{28} = 0b1;
+ let Inst{25} = 0b0;
+ let Inst{24} = rot;
+ let Inst{23} = 0b1;
+ let Inst{22} = Qd{3};
+ let Inst{21} = 0b0;
+ let Inst{19-17} = Qn{2-0};
+ let Inst{15-13} = Qd{2-0};
+ let Inst{12-8} = 0b01000;
+ let Inst{7} = Qn{3};
+ let Inst{4} = 0b0;
+}
+
+def MVE_VCADDf16 : MVE_VCADD<"f16", 0b0>;
+def MVE_VCADDf32 : MVE_VCADD<"f32", 0b1>;
+
+class MVE_VABD_fp<string suffix, bit size>
+ : MVE_float<"vabd", suffix, (outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm),
+ "$Qd, $Qn, $Qm", vpred_r, ""> {
+ bits<4> Qd;
+ bits<4> Qn;
+
+ let Inst{28} = 0b1;
+ let Inst{25-23} = 0b110;
+ let Inst{22} = Qd{3};
+ let Inst{21} = 0b1;
+ let Inst{20} = size;
+ let Inst{19-17} = Qn{2-0};
+ let Inst{16} = 0b0;
+ let Inst{15-13} = Qd{2-0};
+ let Inst{11-8} = 0b1101;
+ let Inst{7} = Qn{3};
+ let Inst{4} = 0b0;
+}
+
+def MVE_VABDf32 : MVE_VABD_fp<"f32", 0b0>;
+def MVE_VABDf16 : MVE_VABD_fp<"f16", 0b1>;
+
+class MVE_VCVT_fix<string suffix, bit fsi, bit U, bit op,
+ Operand imm_operand_type, list<dag> pattern=[]>
+ : MVE_float<"vcvt", suffix,
+ (outs MQPR:$Qd), (ins MQPR:$Qm, imm_operand_type:$imm6),
+ "$Qd, $Qm, $imm6", vpred_r, "", pattern> {
+ bits<4> Qd;
+ bits<6> imm6;
+
+ let Inst{28} = U;
+ let Inst{25-23} = 0b111;
+ let Inst{22} = Qd{3};
+ let Inst{21} = 0b1;
+ let Inst{19-16} = imm6{3-0};
+ let Inst{15-13} = Qd{2-0};
+ let Inst{11-10} = 0b11;
+ let Inst{9} = fsi;
+ let Inst{8} = op;
+ let Inst{7} = 0b0;
+ let Inst{4} = 0b1;
+
+ let DecoderMethod = "DecodeMVEVCVTt1fp";
+}
+
+class MVE_VCVT_imm_asmop<int Bits> : AsmOperandClass {
+ let PredicateMethod = "isImmediate<1," # Bits # ">";
+ let DiagnosticString =
+ "MVE fixed-point immediate operand must be between 1 and " # Bits;
+ let Name = "MVEVcvtImm" # Bits;
+ let RenderMethod = "addImmOperands";
+}
+class MVE_VCVT_imm<int Bits>: Operand<i32> {
+ let ParserMatchClass = MVE_VCVT_imm_asmop<Bits>;
+ let EncoderMethod = "getNEONVcvtImm32OpValue";
+ let DecoderMethod = "DecodeVCVTImmOperand";
+}
+
+class MVE_VCVT_fix_f32<string suffix, bit U, bit op>
+ : MVE_VCVT_fix<suffix, 0b1, U, op, MVE_VCVT_imm<32>> {
+ let Inst{20} = imm6{4};
+}
+class MVE_VCVT_fix_f16<string suffix, bit U, bit op>
+ : MVE_VCVT_fix<suffix, 0b0, U, op, MVE_VCVT_imm<16>> {
+ let Inst{20} = 0b1;
+}
+
+def MVE_VCVTf16s16_fix : MVE_VCVT_fix_f16<"f16.s16", 0b0, 0b0>;
+def MVE_VCVTs16f16_fix : MVE_VCVT_fix_f16<"s16.f16", 0b0, 0b1>;
+def MVE_VCVTf16u16_fix : MVE_VCVT_fix_f16<"f16.u16", 0b1, 0b0>;
+def MVE_VCVTu16f16_fix : MVE_VCVT_fix_f16<"u16.f16", 0b1, 0b1>;
+def MVE_VCVTf32s32_fix : MVE_VCVT_fix_f32<"f32.s32", 0b0, 0b0>;
+def MVE_VCVTs32f32_fix : MVE_VCVT_fix_f32<"s32.f32", 0b0, 0b1>;
+def MVE_VCVTf32u32_fix : MVE_VCVT_fix_f32<"f32.u32", 0b1, 0b0>;
+def MVE_VCVTu32f32_fix : MVE_VCVT_fix_f32<"u32.f32", 0b1, 0b1>;
+
+class MVE_VCVT_fp_int_anpm<string suffix, bits<2> size, bit op, string anpm,
+ bits<2> rm, list<dag> pattern=[]>
+ : MVE_float<!strconcat("vcvt", anpm), suffix, (outs MQPR:$Qd),
+ (ins MQPR:$Qm), "$Qd, $Qm", vpred_r, "", pattern> {
+ bits<4> Qd;
+
+ let Inst{28} = 0b1;
+ let Inst{25-23} = 0b111;
+ let Inst{22} = Qd{3};
+ let Inst{21-20} = 0b11;
+ let Inst{19-18} = size;
+ let Inst{17-16} = 0b11;
+ let Inst{15-13} = Qd{2-0};
+ let Inst{12-10} = 0b000;
+ let Inst{9-8} = rm;
+ let Inst{7} = op;
+ let Inst{4} = 0b0;
+}
+
+multiclass MVE_VCVT_fp_int_anpm_multi<string suffix, bits<2> size, bit op,
+ list<dag> pattern=[]> {
+ def a : MVE_VCVT_fp_int_anpm<suffix, size, op, "a", 0b00>;
+ def n : MVE_VCVT_fp_int_anpm<suffix, size, op, "n", 0b01>;
+ def p : MVE_VCVT_fp_int_anpm<suffix, size, op, "p", 0b10>;
+ def m : MVE_VCVT_fp_int_anpm<suffix, size, op, "m", 0b11>;
+}
+
+// This defines instructions such as MVE_VCVTu16f16a, with an explicit
+// rounding-mode suffix on the mnemonic. The class below will define
+// the bare MVE_VCVTu16f16 (with implied rounding toward zero).
+defm MVE_VCVTs16f16 : MVE_VCVT_fp_int_anpm_multi<"s16.f16", 0b01, 0b0>;
+defm MVE_VCVTu16f16 : MVE_VCVT_fp_int_anpm_multi<"u16.f16", 0b01, 0b1>;
+defm MVE_VCVTs32f32 : MVE_VCVT_fp_int_anpm_multi<"s32.f32", 0b10, 0b0>;
+defm MVE_VCVTu32f32 : MVE_VCVT_fp_int_anpm_multi<"u32.f32", 0b10, 0b1>;
+
+class MVE_VCVT_fp_int<string suffix, bits<2> size, bits<2> op,
+ list<dag> pattern=[]>
+ : MVE_float<"vcvt", suffix, (outs MQPR:$Qd),
+ (ins MQPR:$Qm), "$Qd, $Qm", vpred_r, "", pattern> {
+ bits<4> Qd;
+
+ let Inst{28} = 0b1;
+ let Inst{25-23} = 0b111;
+ let Inst{22} = Qd{3};
+ let Inst{21-20} = 0b11;
+ let Inst{19-18} = size;
+ let Inst{17-16} = 0b11;
+ let Inst{15-13} = Qd{2-0};
+ let Inst{12-9} = 0b0011;
+ let Inst{8-7} = op;
+ let Inst{4} = 0b0;
+}
+
+// The unsuffixed VCVT for float->int implicitly rounds toward zero,
+// which I reflect here in the llvm instruction names
+def MVE_VCVTs16f16z : MVE_VCVT_fp_int<"s16.f16", 0b01, 0b10>;
+def MVE_VCVTu16f16z : MVE_VCVT_fp_int<"u16.f16", 0b01, 0b11>;
+def MVE_VCVTs32f32z : MVE_VCVT_fp_int<"s32.f32", 0b10, 0b10>;
+def MVE_VCVTu32f32z : MVE_VCVT_fp_int<"u32.f32", 0b10, 0b11>;
+// Whereas VCVT for int->float rounds to nearest
+def MVE_VCVTf16s16n : MVE_VCVT_fp_int<"f16.s16", 0b01, 0b00>;
+def MVE_VCVTf16u16n : MVE_VCVT_fp_int<"f16.u16", 0b01, 0b01>;
+def MVE_VCVTf32s32n : MVE_VCVT_fp_int<"f32.s32", 0b10, 0b00>;
+def MVE_VCVTf32u32n : MVE_VCVT_fp_int<"f32.u32", 0b10, 0b01>;
+
+let Predicates = [HasMVEFloat] in {
+ def : Pat<(v4i32 (fp_to_sint (v4f32 MQPR:$src))),
+ (v4i32 (MVE_VCVTs32f32z (v4f32 MQPR:$src)))>;
+ def : Pat<(v4i32 (fp_to_uint (v4f32 MQPR:$src))),
+ (v4i32 (MVE_VCVTu32f32z (v4f32 MQPR:$src)))>;
+ def : Pat<(v8i16 (fp_to_sint (v8f16 MQPR:$src))),
+ (v8i16 (MVE_VCVTs16f16z (v8f16 MQPR:$src)))>;
+ def : Pat<(v8i16 (fp_to_uint (v8f16 MQPR:$src))),
+ (v8i16 (MVE_VCVTu16f16z (v8f16 MQPR:$src)))>;
+ def : Pat<(v4f32 (sint_to_fp (v4i32 MQPR:$src))),
+ (v4f32 (MVE_VCVTf32s32n (v4i32 MQPR:$src)))>;
+ def : Pat<(v4f32 (uint_to_fp (v4i32 MQPR:$src))),
+ (v4f32 (MVE_VCVTf32u32n (v4i32 MQPR:$src)))>;
+ def : Pat<(v8f16 (sint_to_fp (v8i16 MQPR:$src))),
+ (v8f16 (MVE_VCVTf16s16n (v8i16 MQPR:$src)))>;
+ def : Pat<(v8f16 (uint_to_fp (v8i16 MQPR:$src))),
+ (v8f16 (MVE_VCVTf16u16n (v8i16 MQPR:$src)))>;
+}
+
+class MVE_VABSNEG_fp<string iname, string suffix, bits<2> size, bit negate,
+ list<dag> pattern=[]>
+ : MVE_float<iname, suffix, (outs MQPR:$Qd),
+ (ins MQPR:$Qm), "$Qd, $Qm", vpred_r, "", pattern> {
+ bits<4> Qd;
+
+ let Inst{28} = 0b1;
+ let Inst{25-23} = 0b111;
+ let Inst{22} = Qd{3};
+ let Inst{21-20} = 0b11;
+ let Inst{19-18} = size;
+ let Inst{17-16} = 0b01;
+ let Inst{15-13} = Qd{2-0};
+ let Inst{11-8} = 0b0111;
+ let Inst{7} = negate;
+ let Inst{4} = 0b0;
+}
+
+def MVE_VABSf16 : MVE_VABSNEG_fp<"vabs", "f16", 0b01, 0b0>;
+def MVE_VABSf32 : MVE_VABSNEG_fp<"vabs", "f32", 0b10, 0b0>;
+
+let Predicates = [HasMVEFloat] in {
+ def : Pat<(v8f16 (fabs MQPR:$src)),
+ (MVE_VABSf16 MQPR:$src)>;
+ def : Pat<(v4f32 (fabs MQPR:$src)),
+ (MVE_VABSf32 MQPR:$src)>;
+}
+
+def MVE_VNEGf16 : MVE_VABSNEG_fp<"vneg", "f16", 0b01, 0b1>;
+def MVE_VNEGf32 : MVE_VABSNEG_fp<"vneg", "f32", 0b10, 0b1>;
+
+let Predicates = [HasMVEFloat] in {
+ def : Pat<(v8f16 (fneg MQPR:$src)),
+ (MVE_VNEGf16 MQPR:$src)>;
+ def : Pat<(v4f32 (fneg MQPR:$src)),
+ (MVE_VNEGf32 MQPR:$src)>;
+}
+
+class MVE_VMAXMINNMA<string iname, string suffix, bit size, bit bit_12,
+ list<dag> pattern=[]>
+ : MVE_f<(outs MQPR:$Qd), (ins MQPR:$Qd_src, MQPR:$Qm),
+ NoItinerary, iname, suffix, "$Qd, $Qm", vpred_n, "$Qd = $Qd_src",
+ pattern> {
+ bits<4> Qd;
+ bits<4> Qm;
+
+ let Inst{28} = size;
+ let Inst{25-23} = 0b100;
+ let Inst{22} = Qd{3};
+ let Inst{21-16} = 0b111111;
+ let Inst{15-13} = Qd{2-0};
+ let Inst{12} = bit_12;
+ let Inst{11-6} = 0b111010;
+ let Inst{5} = Qm{3};
+ let Inst{4} = 0b0;
+ let Inst{3-1} = Qm{2-0};
+ let Inst{0} = 0b1;
+}
+
+def MVE_VMAXNMAf32 : MVE_VMAXMINNMA<"vmaxnma", "f32", 0b0, 0b0>;
+def MVE_VMAXNMAf16 : MVE_VMAXMINNMA<"vmaxnma", "f16", 0b1, 0b0>;
+
+def MVE_VMINNMAf32 : MVE_VMAXMINNMA<"vminnma", "f32", 0b0, 0b1>;
+def MVE_VMINNMAf16 : MVE_VMAXMINNMA<"vminnma", "f16", 0b1, 0b1>;
+
+// end of MVE Floating Point instructions
+
+// start of MVE compares
+
+class MVE_VCMPqq<string suffix, bit bit_28, bits<2> bits_21_20,
+ VCMPPredicateOperand predtype, list<dag> pattern=[]>
+ : MVE_p<(outs VCCR:$P0), (ins MQPR:$Qn, MQPR:$Qm, predtype:$fc),
+ NoItinerary, "vcmp", suffix, "$fc, $Qn, $Qm", vpred_n, "", pattern> {
+ // Base class for comparing two vector registers
+ bits<3> fc;
+ bits<4> Qn;
+ bits<4> Qm;
+
+ let Inst{28} = bit_28;
+ let Inst{25-22} = 0b1000;
+ let Inst{21-20} = bits_21_20;
+ let Inst{19-17} = Qn{2-0};
+ let Inst{16-13} = 0b1000;
+ let Inst{12} = fc{2};
+ let Inst{11-8} = 0b1111;
+ let Inst{7} = fc{0};
+ let Inst{6} = 0b0;
+ let Inst{5} = Qm{3};
+ let Inst{4} = 0b0;
+ let Inst{3-1} = Qm{2-0};
+ let Inst{0} = fc{1};
+
+ let Constraints = "";
+
+ // We need a custom decoder method for these instructions because of
+ // the output VCCR operand, which isn't encoded in the instruction
+ // bits anywhere (there is only one choice for it) but has to be
+ // included in the MC operands so that codegen will be able to track
+ // its data flow between instructions, spill/reload it when
+ // necessary, etc. There seems to be no way to get the Tablegen
+ // decoder to emit an operand that isn't affected by any instruction
+ // bit.
+ let DecoderMethod = "DecodeMVEVCMP<false," # predtype.DecoderMethod # ">";
+}
+
+class MVE_VCMPqqf<string suffix, bit size>
+ : MVE_VCMPqq<suffix, size, 0b11, pred_basic_fp> {
+ let Predicates = [HasMVEFloat];
+}
+
+class MVE_VCMPqqi<string suffix, bits<2> size>
+ : MVE_VCMPqq<suffix, 0b1, size, pred_basic_i> {
+ let Inst{12} = 0b0;
+ let Inst{0} = 0b0;
+}
+
+class MVE_VCMPqqu<string suffix, bits<2> size>
+ : MVE_VCMPqq<suffix, 0b1, size, pred_basic_u> {
+ let Inst{12} = 0b0;
+ let Inst{0} = 0b1;
+}
+
+class MVE_VCMPqqs<string suffix, bits<2> size>
+ : MVE_VCMPqq<suffix, 0b1, size, pred_basic_s> {
+ let Inst{12} = 0b1;
+}
+
+def MVE_VCMPf32 : MVE_VCMPqqf<"f32", 0b0>;
+def MVE_VCMPf16 : MVE_VCMPqqf<"f16", 0b1>;
+
+def MVE_VCMPi8 : MVE_VCMPqqi<"i8", 0b00>;
+def MVE_VCMPi16 : MVE_VCMPqqi<"i16", 0b01>;
+def MVE_VCMPi32 : MVE_VCMPqqi<"i32", 0b10>;
+
+def MVE_VCMPu8 : MVE_VCMPqqu<"u8", 0b00>;
+def MVE_VCMPu16 : MVE_VCMPqqu<"u16", 0b01>;
+def MVE_VCMPu32 : MVE_VCMPqqu<"u32", 0b10>;
+
+def MVE_VCMPs8 : MVE_VCMPqqs<"s8", 0b00>;
+def MVE_VCMPs16 : MVE_VCMPqqs<"s16", 0b01>;
+def MVE_VCMPs32 : MVE_VCMPqqs<"s32", 0b10>;
+
+class MVE_VCMPqr<string suffix, bit bit_28, bits<2> bits_21_20,
+ VCMPPredicateOperand predtype, list<dag> pattern=[]>
+ : MVE_p<(outs VCCR:$P0), (ins MQPR:$Qn, GPRwithZR:$Rm, predtype:$fc),
+ NoItinerary, "vcmp", suffix, "$fc, $Qn, $Rm", vpred_n, "", pattern> {
+ // Base class for comparing a vector register with a scalar
+ bits<3> fc;
+ bits<4> Qn;
+ bits<4> Rm;
+
+ let Inst{28} = bit_28;
+ let Inst{25-22} = 0b1000;
+ let Inst{21-20} = bits_21_20;
+ let Inst{19-17} = Qn{2-0};
+ let Inst{16-13} = 0b1000;
+ let Inst{12} = fc{2};
+ let Inst{11-8} = 0b1111;
+ let Inst{7} = fc{0};
+ let Inst{6} = 0b1;
+ let Inst{5} = fc{1};
+ let Inst{4} = 0b0;
+ let Inst{3-0} = Rm{3-0};
+
+ let Constraints = "";
+ // Custom decoder method, for the same reason as MVE_VCMPqq
+ let DecoderMethod = "DecodeMVEVCMP<true," # predtype.DecoderMethod # ">";
+}
+
+class MVE_VCMPqrf<string suffix, bit size>
+ : MVE_VCMPqr<suffix, size, 0b11, pred_basic_fp> {
+ let Predicates = [HasMVEFloat];
+}
+
+class MVE_VCMPqri<string suffix, bits<2> size>
+ : MVE_VCMPqr<suffix, 0b1, size, pred_basic_i> {
+ let Inst{12} = 0b0;
+ let Inst{5} = 0b0;
+}
+
+class MVE_VCMPqru<string suffix, bits<2> size>
+ : MVE_VCMPqr<suffix, 0b1, size, pred_basic_u> {
+ let Inst{12} = 0b0;
+ let Inst{5} = 0b1;
+}
+
+class MVE_VCMPqrs<string suffix, bits<2> size>
+ : MVE_VCMPqr<suffix, 0b1, size, pred_basic_s> {
+ let Inst{12} = 0b1;
+}
+
+def MVE_VCMPf32r : MVE_VCMPqrf<"f32", 0b0>;
+def MVE_VCMPf16r : MVE_VCMPqrf<"f16", 0b1>;
+
+def MVE_VCMPi8r : MVE_VCMPqri<"i8", 0b00>;
+def MVE_VCMPi16r : MVE_VCMPqri<"i16", 0b01>;
+def MVE_VCMPi32r : MVE_VCMPqri<"i32", 0b10>;
+
+def MVE_VCMPu8r : MVE_VCMPqru<"u8", 0b00>;
+def MVE_VCMPu16r : MVE_VCMPqru<"u16", 0b01>;
+def MVE_VCMPu32r : MVE_VCMPqru<"u32", 0b10>;
+
+def MVE_VCMPs8r : MVE_VCMPqrs<"s8", 0b00>;
+def MVE_VCMPs16r : MVE_VCMPqrs<"s16", 0b01>;
+def MVE_VCMPs32r : MVE_VCMPqrs<"s32", 0b10>;
+
+// end of MVE compares
+
+// start of MVE_qDest_qSrc
+
+class MVE_qDest_qSrc<string iname, string suffix, dag oops, dag iops,
+ string ops, vpred_ops vpred, string cstr,
+ list<dag> pattern=[]>
+ : MVE_p<oops, iops, NoItinerary, iname, suffix,
+ ops, vpred, cstr, pattern> {
+ bits<4> Qd;
+ bits<4> Qm;
+
+ let Inst{25-23} = 0b100;
+ let Inst{22} = Qd{3};
+ let Inst{15-13} = Qd{2-0};
+ let Inst{11-9} = 0b111;
+ let Inst{6} = 0b0;
+ let Inst{5} = Qm{3};
+ let Inst{4} = 0b0;
+ let Inst{3-1} = Qm{2-0};
+}
+
+class MVE_VQxDMLxDH<string iname, bit exch, bit round, bit subtract,
+ string suffix, bits<2> size, list<dag> pattern=[]>
+ : MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd),
+ (ins MQPR:$Qd_src, MQPR:$Qn, MQPR:$Qm), "$Qd, $Qn, $Qm",
+ vpred_n, "$Qd = $Qd_src", pattern> {
+ bits<4> Qn;
+
+ let Inst{28} = subtract;
+ let Inst{21-20} = size;
+ let Inst{19-17} = Qn{2-0};
+ let Inst{16} = 0b0;
+ let Inst{12} = exch;
+ let Inst{8} = 0b0;
+ let Inst{7} = Qn{3};
+ let Inst{0} = round;
+}
+
+multiclass MVE_VQxDMLxDH_multi<string iname, bit exch,
+ bit round, bit subtract> {
+ def s8 : MVE_VQxDMLxDH<iname, exch, round, subtract, "s8", 0b00>;
+ def s16 : MVE_VQxDMLxDH<iname, exch, round, subtract, "s16", 0b01>;
+ def s32 : MVE_VQxDMLxDH<iname, exch, round, subtract, "s32", 0b10>;
+}
+
+defm MVE_VQDMLADH : MVE_VQxDMLxDH_multi<"vqdmladh", 0b0, 0b0, 0b0>;
+defm MVE_VQDMLADHX : MVE_VQxDMLxDH_multi<"vqdmladhx", 0b1, 0b0, 0b0>;
+defm MVE_VQRDMLADH : MVE_VQxDMLxDH_multi<"vqrdmladh", 0b0, 0b1, 0b0>;
+defm MVE_VQRDMLADHX : MVE_VQxDMLxDH_multi<"vqrdmladhx", 0b1, 0b1, 0b0>;
+defm MVE_VQDMLSDH : MVE_VQxDMLxDH_multi<"vqdmlsdh", 0b0, 0b0, 0b1>;
+defm MVE_VQDMLSDHX : MVE_VQxDMLxDH_multi<"vqdmlsdhx", 0b1, 0b0, 0b1>;
+defm MVE_VQRDMLSDH : MVE_VQxDMLxDH_multi<"vqrdmlsdh", 0b0, 0b1, 0b1>;
+defm MVE_VQRDMLSDHX : MVE_VQxDMLxDH_multi<"vqrdmlsdhx", 0b1, 0b1, 0b1>;
+
+class MVE_VCMUL<string iname, string suffix, bit size, list<dag> pattern=[]>
+ : MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd),
+ (ins MQPR:$Qn, MQPR:$Qm, complexrotateop:$rot),
+ "$Qd, $Qn, $Qm, $rot", vpred_r, "", pattern> {
+ bits<4> Qn;
+ bits<2> rot;
+
+ let Inst{28} = size;
+ let Inst{21-20} = 0b11;
+ let Inst{19-17} = Qn{2-0};
+ let Inst{16} = 0b0;
+ let Inst{12} = rot{1};
+ let Inst{8} = 0b0;
+ let Inst{7} = Qn{3};
+ let Inst{0} = rot{0};
+
+ let Predicates = [HasMVEFloat];
+}
+
+def MVE_VCMULf16 : MVE_VCMUL<"vcmul", "f16", 0b0>;
+def MVE_VCMULf32 : MVE_VCMUL<"vcmul", "f32", 0b1>;
+
+class MVE_VMULL<string iname, string suffix, bit bit_28, bits<2> bits_21_20,
+ bit T, list<dag> pattern=[]>
+ : MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd),
+ (ins MQPR:$Qn, MQPR:$Qm), "$Qd, $Qn, $Qm",
+ vpred_r, "", pattern> {
+ bits<4> Qd;
+ bits<4> Qn;
+ bits<4> Qm;
+
+ let Inst{28} = bit_28;
+ let Inst{21-20} = bits_21_20;
+ let Inst{19-17} = Qn{2-0};
+ let Inst{16} = 0b1;
+ let Inst{12} = T;
+ let Inst{8} = 0b0;
+ let Inst{7} = Qn{3};
+ let Inst{0} = 0b0;
+}
+
+multiclass MVE_VMULL_multi<string iname, string suffix,
+ bit bit_28, bits<2> bits_21_20> {
+ def bh : MVE_VMULL<iname # "b", suffix, bit_28, bits_21_20, 0b0>;
+ def th : MVE_VMULL<iname # "t", suffix, bit_28, bits_21_20, 0b1>;
+}
+
+// For integer multiplies, bits 21:20 encode size, and bit 28 signedness.
+// For polynomial multiplies, bits 21:20 take the unused value 0b11, and
+// bit 28 switches to encoding the size.
+
+defm MVE_VMULLs8 : MVE_VMULL_multi<"vmull", "s8", 0b0, 0b00>;
+defm MVE_VMULLs16 : MVE_VMULL_multi<"vmull", "s16", 0b0, 0b01>;
+defm MVE_VMULLs32 : MVE_VMULL_multi<"vmull", "s32", 0b0, 0b10>;
+defm MVE_VMULLu8 : MVE_VMULL_multi<"vmull", "u8", 0b1, 0b00>;
+defm MVE_VMULLu16 : MVE_VMULL_multi<"vmull", "u16", 0b1, 0b01>;
+defm MVE_VMULLu32 : MVE_VMULL_multi<"vmull", "u32", 0b1, 0b10>;
+defm MVE_VMULLp8 : MVE_VMULL_multi<"vmull", "p8", 0b0, 0b11>;
+defm MVE_VMULLp16 : MVE_VMULL_multi<"vmull", "p16", 0b1, 0b11>;
+
+class MVE_VxMULH<string iname, string suffix, bit U, bits<2> size,
+ bit round, list<dag> pattern=[]>
+ : MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd),
+ (ins MQPR:$Qn, MQPR:$Qm), "$Qd, $Qn, $Qm",
+ vpred_r, "", pattern> {
+ bits<4> Qn;
+
+ let Inst{28} = U;
+ let Inst{21-20} = size;
+ let Inst{19-17} = Qn{2-0};
+ let Inst{16} = 0b1;
+ let Inst{12} = round;
+ let Inst{8} = 0b0;
+ let Inst{7} = Qn{3};
+ let Inst{0} = 0b1;
+}
+
+def MVE_VMULHs8 : MVE_VxMULH<"vmulh", "s8", 0b0, 0b00, 0b0>;
+def MVE_VMULHs16 : MVE_VxMULH<"vmulh", "s16", 0b0, 0b01, 0b0>;
+def MVE_VMULHs32 : MVE_VxMULH<"vmulh", "s32", 0b0, 0b10, 0b0>;
+def MVE_VMULHu8 : MVE_VxMULH<"vmulh", "u8", 0b1, 0b00, 0b0>;
+def MVE_VMULHu16 : MVE_VxMULH<"vmulh", "u16", 0b1, 0b01, 0b0>;
+def MVE_VMULHu32 : MVE_VxMULH<"vmulh", "u32", 0b1, 0b10, 0b0>;
+
+def MVE_VRMULHs8 : MVE_VxMULH<"vrmulh", "s8", 0b0, 0b00, 0b1>;
+def MVE_VRMULHs16 : MVE_VxMULH<"vrmulh", "s16", 0b0, 0b01, 0b1>;
+def MVE_VRMULHs32 : MVE_VxMULH<"vrmulh", "s32", 0b0, 0b10, 0b1>;
+def MVE_VRMULHu8 : MVE_VxMULH<"vrmulh", "u8", 0b1, 0b00, 0b1>;
+def MVE_VRMULHu16 : MVE_VxMULH<"vrmulh", "u16", 0b1, 0b01, 0b1>;
+def MVE_VRMULHu32 : MVE_VxMULH<"vrmulh", "u32", 0b1, 0b10, 0b1>;
+
+class MVE_VxMOVxN<string iname, string suffix, bit bit_28, bit bit_17,
+ bits<2> size, bit T, list<dag> pattern=[]>
+ : MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd),
+ (ins MQPR:$Qd_src, MQPR:$Qm), "$Qd, $Qm",
+ vpred_n, "$Qd = $Qd_src", pattern> {
+
+ let Inst{28} = bit_28;
+ let Inst{21-20} = 0b11;
+ let Inst{19-18} = size;
+ let Inst{17} = bit_17;
+ let Inst{16} = 0b1;
+ let Inst{12} = T;
+ let Inst{8} = 0b0;
+ let Inst{7} = !if(!eq(bit_17, 0), 1, 0);
+ let Inst{0} = 0b1;
+}
+
+multiclass MVE_VxMOVxN_halves<string iname, string suffix,
+ bit bit_28, bit bit_17, bits<2> size> {
+ def bh : MVE_VxMOVxN<iname # "b", suffix, bit_28, bit_17, size, 0b0>;
+ def th : MVE_VxMOVxN<iname # "t", suffix, bit_28, bit_17, size, 0b1>;
+}
+
+defm MVE_VMOVNi16 : MVE_VxMOVxN_halves<"vmovn", "i16", 0b1, 0b0, 0b00>;
+defm MVE_VMOVNi32 : MVE_VxMOVxN_halves<"vmovn", "i32", 0b1, 0b0, 0b01>;
+defm MVE_VQMOVNs16 : MVE_VxMOVxN_halves<"vqmovn", "s16", 0b0, 0b1, 0b00>;
+defm MVE_VQMOVNs32 : MVE_VxMOVxN_halves<"vqmovn", "s32", 0b0, 0b1, 0b01>;
+defm MVE_VQMOVNu16 : MVE_VxMOVxN_halves<"vqmovn", "u16", 0b1, 0b1, 0b00>;
+defm MVE_VQMOVNu32 : MVE_VxMOVxN_halves<"vqmovn", "u32", 0b1, 0b1, 0b01>;
+defm MVE_VQMOVUNs16 : MVE_VxMOVxN_halves<"vqmovun", "s16", 0b0, 0b0, 0b00>;
+defm MVE_VQMOVUNs32 : MVE_VxMOVxN_halves<"vqmovun", "s32", 0b0, 0b0, 0b01>;
+
+class MVE_VCVT_ff<string iname, string suffix, bit op, bit T,
+ list<dag> pattern=[]>
+ : MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd), (ins MQPR:$Qd_src, MQPR:$Qm),
+ "$Qd, $Qm", vpred_n, "$Qd = $Qd_src", pattern> {
+ let Inst{28} = op;
+ let Inst{21-16} = 0b111111;
+ let Inst{12} = T;
+ let Inst{8-7} = 0b00;
+ let Inst{0} = 0b1;
+
+ let Predicates = [HasMVEFloat];
+}
+
+multiclass MVE_VCVT_ff_halves<string suffix, bit op> {
+ def bh : MVE_VCVT_ff<"vcvtb", suffix, op, 0b0>;
+ def th : MVE_VCVT_ff<"vcvtt", suffix, op, 0b1>;
+}
+
+defm MVE_VCVTf16f32 : MVE_VCVT_ff_halves<"f16.f32", 0b0>;
+defm MVE_VCVTf32f16 : MVE_VCVT_ff_halves<"f32.f16", 0b1>;
+
+class MVE_VxCADD<string iname, string suffix, bits<2> size, bit halve,
+ list<dag> pattern=[]>
+ : MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd),
+ (ins MQPR:$Qn, MQPR:$Qm, complexrotateopodd:$rot),
+ "$Qd, $Qn, $Qm, $rot", vpred_r, "",
+ pattern> {
+ bits<4> Qn;
+ bit rot;
+
+ let Inst{28} = halve;
+ let Inst{21-20} = size;
+ let Inst{19-17} = Qn{2-0};
+ let Inst{16} = 0b0;
+ let Inst{12} = rot;
+ let Inst{8} = 0b1;
+ let Inst{7} = Qn{3};
+ let Inst{0} = 0b0;
+}
+
+def MVE_VCADDi8 : MVE_VxCADD<"vcadd", "i8", 0b00, 0b1>;
+def MVE_VCADDi16 : MVE_VxCADD<"vcadd", "i16", 0b01, 0b1>;
+def MVE_VCADDi32 : MVE_VxCADD<"vcadd", "i32", 0b10, 0b1>;
+
+def MVE_VHCADDs8 : MVE_VxCADD<"vhcadd", "s8", 0b00, 0b0>;
+def MVE_VHCADDs16 : MVE_VxCADD<"vhcadd", "s16", 0b01, 0b0>;
+def MVE_VHCADDs32 : MVE_VxCADD<"vhcadd", "s32", 0b10, 0b0>;
+
+class MVE_VADCSBC<string iname, bit I, bit subtract,
+ dag carryin, list<dag> pattern=[]>
+ : MVE_qDest_qSrc<iname, "i32", (outs MQPR:$Qd, cl_FPSCR_NZCV:$carryout),
+ !con((ins MQPR:$Qn, MQPR:$Qm), carryin),
+ "$Qd, $Qn, $Qm", vpred_r, "", pattern> {
+ bits<4> Qn;
+
+ let Inst{28} = subtract;
+ let Inst{21-20} = 0b11;
+ let Inst{19-17} = Qn{2-0};
+ let Inst{16} = 0b0;
+ let Inst{12} = I;
+ let Inst{8} = 0b1;
+ let Inst{7} = Qn{3};
+ let Inst{0} = 0b0;
+
+ // Custom decoder method in order to add the FPSCR operand(s), which
+ // Tablegen won't do right
+ let DecoderMethod = "DecodeMVEVADCInstruction";
+}
+
+def MVE_VADC : MVE_VADCSBC<"vadc", 0b0, 0b0, (ins cl_FPSCR_NZCV:$carryin)>;
+def MVE_VADCI : MVE_VADCSBC<"vadci", 0b1, 0b0, (ins)>;
+
+def MVE_VSBC : MVE_VADCSBC<"vsbc", 0b0, 0b1, (ins cl_FPSCR_NZCV:$carryin)>;
+def MVE_VSBCI : MVE_VADCSBC<"vsbci", 0b1, 0b1, (ins)>;
+
+class MVE_VQDMULL<string iname, string suffix, bit size, bit T,
+ list<dag> pattern=[]>
+ : MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd),
+ (ins MQPR:$Qn, MQPR:$Qm), "$Qd, $Qn, $Qm",
+ vpred_r, "", pattern> {
+ bits<4> Qn;
+
+ let Inst{28} = size;
+ let Inst{21-20} = 0b11;
+ let Inst{19-17} = Qn{2-0};
+ let Inst{16} = 0b0;
+ let Inst{12} = T;
+ let Inst{8} = 0b1;
+ let Inst{7} = Qn{3};
+ let Inst{0} = 0b1;
+}
+
+multiclass MVE_VQDMULL_halves<string suffix, bit size> {
+ def bh : MVE_VQDMULL<"vqdmullb", suffix, size, 0b0>;
+ def th : MVE_VQDMULL<"vqdmullt", suffix, size, 0b1>;
+}
+
+defm MVE_VQDMULLs16 : MVE_VQDMULL_halves<"s16", 0b0>;
+defm MVE_VQDMULLs32 : MVE_VQDMULL_halves<"s32", 0b1>;
+
+// end of mve_qDest_qSrc
+
+// start of mve_qDest_rSrc
+
+class MVE_qr_base<dag oops, dag iops, InstrItinClass itin, string iname,
+ string suffix, string ops, vpred_ops vpred, string cstr,
+ list<dag> pattern=[]>
+ : MVE_p<oops, iops, NoItinerary, iname, suffix, ops, vpred, cstr, pattern> {
+ bits<4> Qd;
+ bits<4> Qn;
+ bits<4> Rm;
+
+ let Inst{25-23} = 0b100;
+ let Inst{22} = Qd{3};
+ let Inst{19-17} = Qn{2-0};
+ let Inst{15-13} = Qd{2-0};
+ let Inst{11-9} = 0b111;
+ let Inst{7} = Qn{3};
+ let Inst{6} = 0b1;
+ let Inst{4} = 0b0;
+ let Inst{3-0} = Rm{3-0};
+}
+
+class MVE_qDest_rSrc<string iname, string suffix, list<dag> pattern=[]>
+ : MVE_qr_base<(outs MQPR:$Qd), (ins MQPR:$Qn, rGPR:$Rm),
+ NoItinerary, iname, suffix, "$Qd, $Qn, $Rm", vpred_r, "",
+ pattern>;
+
+class MVE_qDestSrc_rSrc<string iname, string suffix, list<dag> pattern=[]>
+ : MVE_qr_base<(outs MQPR:$Qd), (ins MQPR:$Qd_src, MQPR:$Qn, rGPR:$Rm),
+ NoItinerary, iname, suffix, "$Qd, $Qn, $Rm", vpred_n, "$Qd = $Qd_src",
+ pattern>;
+
+class MVE_qDest_single_rSrc<string iname, string suffix, list<dag> pattern=[]>
+ : MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qd_src, rGPR:$Rm), NoItinerary, iname,
+ suffix, "$Qd, $Rm", vpred_n, "$Qd = $Qd_src", pattern> {
+ bits<4> Qd;
+ bits<4> Rm;
+
+ let Inst{22} = Qd{3};
+ let Inst{15-13} = Qd{2-0};
+ let Inst{3-0} = Rm{3-0};
+}
+
+class MVE_VADDSUB_qr<string iname, string suffix, bits<2> size,
+ bit bit_5, bit bit_12, bit bit_16,
+ bit bit_28, list<dag> pattern=[]>
+ : MVE_qDest_rSrc<iname, suffix, pattern> {
+
+ let Inst{28} = bit_28;
+ let Inst{21-20} = size;
+ let Inst{16} = bit_16;
+ let Inst{12} = bit_12;
+ let Inst{8} = 0b1;
+ let Inst{5} = bit_5;
+}
+
+multiclass MVE_VADDSUB_qr_sizes<string iname, string suffix,
+ bit bit_5, bit bit_12, bit bit_16,
+ bit bit_28, list<dag> pattern=[]> {
+ def "8" : MVE_VADDSUB_qr<iname, suffix#"8", 0b00,
+ bit_5, bit_12, bit_16, bit_28>;
+ def "16" : MVE_VADDSUB_qr<iname, suffix#"16", 0b01,
+ bit_5, bit_12, bit_16, bit_28>;
+ def "32" : MVE_VADDSUB_qr<iname, suffix#"32", 0b10,
+ bit_5, bit_12, bit_16, bit_28>;
+}
+
+defm MVE_VADD_qr_i : MVE_VADDSUB_qr_sizes<"vadd", "i", 0b0, 0b0, 0b1, 0b0>;
+defm MVE_VQADD_qr_s : MVE_VADDSUB_qr_sizes<"vqadd", "s", 0b1, 0b0, 0b0, 0b0>;
+defm MVE_VQADD_qr_u : MVE_VADDSUB_qr_sizes<"vqadd", "u", 0b1, 0b0, 0b0, 0b1>;
+
+defm MVE_VSUB_qr_i : MVE_VADDSUB_qr_sizes<"vsub", "i", 0b0, 0b1, 0b1, 0b0>;
+defm MVE_VQSUB_qr_s : MVE_VADDSUB_qr_sizes<"vqsub", "s", 0b1, 0b1, 0b0, 0b0>;
+defm MVE_VQSUB_qr_u : MVE_VADDSUB_qr_sizes<"vqsub", "u", 0b1, 0b1, 0b0, 0b1>;
+
+class MVE_VQDMULL_qr<string iname, string suffix, bit size,
+ bit T, list<dag> pattern=[]>
+ : MVE_qDest_rSrc<iname, suffix, pattern> {
+
+ let Inst{28} = size;
+ let Inst{21-20} = 0b11;
+ let Inst{16} = 0b0;
+ let Inst{12} = T;
+ let Inst{8} = 0b1;
+ let Inst{5} = 0b1;
+}
+
+multiclass MVE_VQDMULL_qr_halves<string suffix, bit size> {
+ def bh : MVE_VQDMULL_qr<"vqdmullb", suffix, size, 0b0>;
+ def th : MVE_VQDMULL_qr<"vqdmullt", suffix, size, 0b1>;
+}
+
+defm MVE_VQDMULL_qr_s16 : MVE_VQDMULL_qr_halves<"s16", 0b0>;
+defm MVE_VQDMULL_qr_s32 : MVE_VQDMULL_qr_halves<"s32", 0b1>;
+
+class MVE_VxADDSUB_qr<string iname, string suffix,
+ bit bit_28, bits<2> bits_21_20, bit subtract,
+ list<dag> pattern=[]>
+ : MVE_qDest_rSrc<iname, suffix, pattern> {
+
+ let Inst{28} = bit_28;
+ let Inst{21-20} = bits_21_20;
+ let Inst{16} = 0b0;
+ let Inst{12} = subtract;
+ let Inst{8} = 0b1;
+ let Inst{5} = 0b0;
+}
+
+def MVE_VHADD_qr_s8 : MVE_VxADDSUB_qr<"vhadd", "s8", 0b0, 0b00, 0b0>;
+def MVE_VHADD_qr_s16 : MVE_VxADDSUB_qr<"vhadd", "s16", 0b0, 0b01, 0b0>;
+def MVE_VHADD_qr_s32 : MVE_VxADDSUB_qr<"vhadd", "s32", 0b0, 0b10, 0b0>;
+def MVE_VHADD_qr_u8 : MVE_VxADDSUB_qr<"vhadd", "u8", 0b1, 0b00, 0b0>;
+def MVE_VHADD_qr_u16 : MVE_VxADDSUB_qr<"vhadd", "u16", 0b1, 0b01, 0b0>;
+def MVE_VHADD_qr_u32 : MVE_VxADDSUB_qr<"vhadd", "u32", 0b1, 0b10, 0b0>;
+
+def MVE_VHSUB_qr_s8 : MVE_VxADDSUB_qr<"vhsub", "s8", 0b0, 0b00, 0b1>;
+def MVE_VHSUB_qr_s16 : MVE_VxADDSUB_qr<"vhsub", "s16", 0b0, 0b01, 0b1>;
+def MVE_VHSUB_qr_s32 : MVE_VxADDSUB_qr<"vhsub", "s32", 0b0, 0b10, 0b1>;
+def MVE_VHSUB_qr_u8 : MVE_VxADDSUB_qr<"vhsub", "u8", 0b1, 0b00, 0b1>;
+def MVE_VHSUB_qr_u16 : MVE_VxADDSUB_qr<"vhsub", "u16", 0b1, 0b01, 0b1>;
+def MVE_VHSUB_qr_u32 : MVE_VxADDSUB_qr<"vhsub", "u32", 0b1, 0b10, 0b1>;
+
+let Predicates = [HasMVEFloat] in {
+ def MVE_VADD_qr_f32 : MVE_VxADDSUB_qr<"vadd", "f32", 0b0, 0b11, 0b0>;
+ def MVE_VADD_qr_f16 : MVE_VxADDSUB_qr<"vadd", "f16", 0b1, 0b11, 0b0>;
+
+ def MVE_VSUB_qr_f32 : MVE_VxADDSUB_qr<"vsub", "f32", 0b0, 0b11, 0b1>;
+ def MVE_VSUB_qr_f16 : MVE_VxADDSUB_qr<"vsub", "f16", 0b1, 0b11, 0b1>;
+}
+
+class MVE_VxSHL_qr<string iname, string suffix, bit U, bits<2> size,
+ bit bit_7, bit bit_17, list<dag> pattern=[]>
+ : MVE_qDest_single_rSrc<iname, suffix, pattern> {
+
+ let Inst{28} = U;
+ let Inst{25-23} = 0b100;
+ let Inst{21-20} = 0b11;
+ let Inst{19-18} = size;
+ let Inst{17} = bit_17;
+ let Inst{16} = 0b1;
+ let Inst{12-8} = 0b11110;
+ let Inst{7} = bit_7;
+ let Inst{6-4} = 0b110;
+}
+
+multiclass MVE_VxSHL_qr_types<string iname, bit bit_7, bit bit_17> {
+ def s8 : MVE_VxSHL_qr<iname, "s8", 0b0, 0b00, bit_7, bit_17>;
+ def s16 : MVE_VxSHL_qr<iname, "s16", 0b0, 0b01, bit_7, bit_17>;
+ def s32 : MVE_VxSHL_qr<iname, "s32", 0b0, 0b10, bit_7, bit_17>;
+ def u8 : MVE_VxSHL_qr<iname, "u8", 0b1, 0b00, bit_7, bit_17>;
+ def u16 : MVE_VxSHL_qr<iname, "u16", 0b1, 0b01, bit_7, bit_17>;
+ def u32 : MVE_VxSHL_qr<iname, "u32", 0b1, 0b10, bit_7, bit_17>;
+}
+
+defm MVE_VSHL_qr : MVE_VxSHL_qr_types<"vshl", 0b0, 0b0>;
+defm MVE_VRSHL_qr : MVE_VxSHL_qr_types<"vrshl", 0b0, 0b1>;
+defm MVE_VQSHL_qr : MVE_VxSHL_qr_types<"vqshl", 0b1, 0b0>;
+defm MVE_VQRSHL_qr : MVE_VxSHL_qr_types<"vqrshl", 0b1, 0b1>;
+
+let Predicates = [HasMVEInt] in {
+ def : Pat<(v4i32 (ARMvshlu (v4i32 MQPR:$Qm), (v4i32 (ARMvdup GPR:$Rm)))),
+ (v4i32 (MVE_VSHL_qru32 (v4i32 MQPR:$Qm), GPR:$Rm))>;
+ def : Pat<(v8i16 (ARMvshlu (v8i16 MQPR:$Qm), (v8i16 (ARMvdup GPR:$Rm)))),
+ (v8i16 (MVE_VSHL_qru16 (v8i16 MQPR:$Qm), GPR:$Rm))>;
+ def : Pat<(v16i8 (ARMvshlu (v16i8 MQPR:$Qm), (v16i8 (ARMvdup GPR:$Rm)))),
+ (v16i8 (MVE_VSHL_qru8 (v16i8 MQPR:$Qm), GPR:$Rm))>;
+
+ def : Pat<(v4i32 (ARMvshls (v4i32 MQPR:$Qm), (v4i32 (ARMvdup GPR:$Rm)))),
+ (v4i32 (MVE_VSHL_qrs32 (v4i32 MQPR:$Qm), GPR:$Rm))>;
+ def : Pat<(v8i16 (ARMvshls (v8i16 MQPR:$Qm), (v8i16 (ARMvdup GPR:$Rm)))),
+ (v8i16 (MVE_VSHL_qrs16 (v8i16 MQPR:$Qm), GPR:$Rm))>;
+ def : Pat<(v16i8 (ARMvshls (v16i8 MQPR:$Qm), (v16i8 (ARMvdup GPR:$Rm)))),
+ (v16i8 (MVE_VSHL_qrs8 (v16i8 MQPR:$Qm), GPR:$Rm))>;
+}
+
+class MVE_VBRSR<string iname, string suffix, bits<2> size, list<dag> pattern=[]>
+ : MVE_qDest_rSrc<iname, suffix, pattern> {
+
+ let Inst{28} = 0b1;
+ let Inst{21-20} = size;
+ let Inst{16} = 0b1;
+ let Inst{12} = 0b1;
+ let Inst{8} = 0b0;
+ let Inst{5} = 0b1;
+}
+
+def MVE_VBRSR8 : MVE_VBRSR<"vbrsr", "8", 0b00>;
+def MVE_VBRSR16 : MVE_VBRSR<"vbrsr", "16", 0b01>;
+def MVE_VBRSR32 : MVE_VBRSR<"vbrsr", "32", 0b10>;
+
+class MVE_VMUL_qr_int<string iname, string suffix,
+ bits<2> size, list<dag> pattern=[]>
+ : MVE_qDest_rSrc<iname, suffix, pattern> {
+
+ let Inst{28} = 0b0;
+ let Inst{21-20} = size;
+ let Inst{16} = 0b1;
+ let Inst{12} = 0b1;
+ let Inst{8} = 0b0;
+ let Inst{5} = 0b1;
+}
+
+def MVE_VMUL_qr_i8 : MVE_VMUL_qr_int<"vmul", "i8", 0b00>;
+def MVE_VMUL_qr_i16 : MVE_VMUL_qr_int<"vmul", "i16", 0b01>;
+def MVE_VMUL_qr_i32 : MVE_VMUL_qr_int<"vmul", "i32", 0b10>;
+
+class MVE_VxxMUL_qr<string iname, string suffix,
+ bit bit_28, bits<2> bits_21_20, list<dag> pattern=[]>
+ : MVE_qDest_rSrc<iname, suffix, pattern> {
+
+ let Inst{28} = bit_28;
+ let Inst{21-20} = bits_21_20;
+ let Inst{16} = 0b1;
+ let Inst{12} = 0b0;
+ let Inst{8} = 0b0;
+ let Inst{5} = 0b1;
+}
+
+def MVE_VQDMULH_qr_s8 : MVE_VxxMUL_qr<"vqdmulh", "s8", 0b0, 0b00>;
+def MVE_VQDMULH_qr_s16 : MVE_VxxMUL_qr<"vqdmulh", "s16", 0b0, 0b01>;
+def MVE_VQDMULH_qr_s32 : MVE_VxxMUL_qr<"vqdmulh", "s32", 0b0, 0b10>;
+
+def MVE_VQRDMULH_qr_s8 : MVE_VxxMUL_qr<"vqrdmulh", "s8", 0b1, 0b00>;
+def MVE_VQRDMULH_qr_s16 : MVE_VxxMUL_qr<"vqrdmulh", "s16", 0b1, 0b01>;
+def MVE_VQRDMULH_qr_s32 : MVE_VxxMUL_qr<"vqrdmulh", "s32", 0b1, 0b10>;
+
+let Predicates = [HasMVEFloat] in {
+ def MVE_VMUL_qr_f16 : MVE_VxxMUL_qr<"vmul", "f16", 0b1, 0b11>;
+ def MVE_VMUL_qr_f32 : MVE_VxxMUL_qr<"vmul", "f32", 0b0, 0b11>;
+}
+
+class MVE_VFMAMLA_qr<string iname, string suffix,
+ bit bit_28, bits<2> bits_21_20, bit S,
+ list<dag> pattern=[]>
+ : MVE_qDestSrc_rSrc<iname, suffix, pattern> {
+
+ let Inst{28} = bit_28;
+ let Inst{21-20} = bits_21_20;
+ let Inst{16} = 0b1;
+ let Inst{12} = S;
+ let Inst{8} = 0b0;
+ let Inst{5} = 0b0;
+}
+
+def MVE_VMLA_qr_s8 : MVE_VFMAMLA_qr<"vmla", "s8", 0b0, 0b00, 0b0>;
+def MVE_VMLA_qr_s16 : MVE_VFMAMLA_qr<"vmla", "s16", 0b0, 0b01, 0b0>;
+def MVE_VMLA_qr_s32 : MVE_VFMAMLA_qr<"vmla", "s32", 0b0, 0b10, 0b0>;
+def MVE_VMLA_qr_u8 : MVE_VFMAMLA_qr<"vmla", "u8", 0b1, 0b00, 0b0>;
+def MVE_VMLA_qr_u16 : MVE_VFMAMLA_qr<"vmla", "u16", 0b1, 0b01, 0b0>;
+def MVE_VMLA_qr_u32 : MVE_VFMAMLA_qr<"vmla", "u32", 0b1, 0b10, 0b0>;
+
+def MVE_VMLAS_qr_s8 : MVE_VFMAMLA_qr<"vmlas", "s8", 0b0, 0b00, 0b1>;
+def MVE_VMLAS_qr_s16 : MVE_VFMAMLA_qr<"vmlas", "s16", 0b0, 0b01, 0b1>;
+def MVE_VMLAS_qr_s32 : MVE_VFMAMLA_qr<"vmlas", "s32", 0b0, 0b10, 0b1>;
+def MVE_VMLAS_qr_u8 : MVE_VFMAMLA_qr<"vmlas", "u8", 0b1, 0b00, 0b1>;
+def MVE_VMLAS_qr_u16 : MVE_VFMAMLA_qr<"vmlas", "u16", 0b1, 0b01, 0b1>;
+def MVE_VMLAS_qr_u32 : MVE_VFMAMLA_qr<"vmlas", "u32", 0b1, 0b10, 0b1>;
+
+let Predicates = [HasMVEFloat] in {
+ def MVE_VFMA_qr_f16 : MVE_VFMAMLA_qr<"vfma", "f16", 0b1, 0b11, 0b0>;
+ def MVE_VFMA_qr_f32 : MVE_VFMAMLA_qr<"vfma", "f32", 0b0, 0b11, 0b0>;
+ def MVE_VFMA_qr_Sf16 : MVE_VFMAMLA_qr<"vfmas", "f16", 0b1, 0b11, 0b1>;
+ def MVE_VFMA_qr_Sf32 : MVE_VFMAMLA_qr<"vfmas", "f32", 0b0, 0b11, 0b1>;
+}
+
+class MVE_VQDMLAH_qr<string iname, string suffix, bit U, bits<2> size,
+ bit bit_5, bit bit_12, list<dag> pattern=[]>
+ : MVE_qDestSrc_rSrc<iname, suffix, pattern> {
+
+ let Inst{28} = U;
+ let Inst{21-20} = size;
+ let Inst{16} = 0b0;
+ let Inst{12} = bit_12;
+ let Inst{8} = 0b0;
+ let Inst{5} = bit_5;
+}
+
+multiclass MVE_VQDMLAH_qr_types<string iname, bit bit_5, bit bit_12> {
+ def s8 : MVE_VQDMLAH_qr<iname, "s8", 0b0, 0b00, bit_5, bit_12>;
+ def s16 : MVE_VQDMLAH_qr<iname, "s16", 0b0, 0b01, bit_5, bit_12>;
+ def s32 : MVE_VQDMLAH_qr<iname, "s32", 0b0, 0b10, bit_5, bit_12>;
+}
+
+defm MVE_VQDMLAH_qr : MVE_VQDMLAH_qr_types<"vqdmlah", 0b1, 0b0>;
+defm MVE_VQRDMLAH_qr : MVE_VQDMLAH_qr_types<"vqrdmlah", 0b0, 0b0>;
+defm MVE_VQDMLASH_qr : MVE_VQDMLAH_qr_types<"vqdmlash", 0b1, 0b1>;
+defm MVE_VQRDMLASH_qr : MVE_VQDMLAH_qr_types<"vqrdmlash", 0b0, 0b1>;
+
+class MVE_VxDUP<string iname, string suffix, bits<2> size, bit bit_12,
+ list<dag> pattern=[]>
+ : MVE_p<(outs MQPR:$Qd, tGPREven:$Rn),
+ (ins tGPREven:$Rn_src, MVE_VIDUP_imm:$imm), NoItinerary,
+ iname, suffix, "$Qd, $Rn, $imm", vpred_r, "$Rn = $Rn_src",
+ pattern> {
+ bits<4> Qd;
+ bits<4> Rn;
+ bits<2> imm;
+
+ let Inst{28} = 0b0;
+ let Inst{25-23} = 0b100;
+ let Inst{22} = Qd{3};
+ let Inst{21-20} = size;
+ let Inst{19-17} = Rn{3-1};
+ let Inst{16} = 0b1;
+ let Inst{15-13} = Qd{2-0};
+ let Inst{12} = bit_12;
+ let Inst{11-8} = 0b1111;
+ let Inst{7} = imm{1};
+ let Inst{6-1} = 0b110111;
+ let Inst{0} = imm{0};
+}
+
+def MVE_VIDUPu8 : MVE_VxDUP<"vidup", "u8", 0b00, 0b0>;
+def MVE_VIDUPu16 : MVE_VxDUP<"vidup", "u16", 0b01, 0b0>;
+def MVE_VIDUPu32 : MVE_VxDUP<"vidup", "u32", 0b10, 0b0>;
+
+def MVE_VDDUPu8 : MVE_VxDUP<"vddup", "u8", 0b00, 0b1>;
+def MVE_VDDUPu16 : MVE_VxDUP<"vddup", "u16", 0b01, 0b1>;
+def MVE_VDDUPu32 : MVE_VxDUP<"vddup", "u32", 0b10, 0b1>;
+
+class MVE_VxWDUP<string iname, string suffix, bits<2> size, bit bit_12,
+ list<dag> pattern=[]>
+ : MVE_p<(outs MQPR:$Qd, tGPREven:$Rn),
+ (ins tGPREven:$Rn_src, tGPROdd:$Rm, MVE_VIDUP_imm:$imm), NoItinerary,
+ iname, suffix, "$Qd, $Rn, $Rm, $imm", vpred_r, "$Rn = $Rn_src",
+ pattern> {
+ bits<4> Qd;
+ bits<4> Rm;
+ bits<4> Rn;
+ bits<2> imm;
+
+ let Inst{28} = 0b0;
+ let Inst{25-23} = 0b100;
+ let Inst{22} = Qd{3};
+ let Inst{21-20} = size;
+ let Inst{19-17} = Rn{3-1};
+ let Inst{16} = 0b1;
+ let Inst{15-13} = Qd{2-0};
+ let Inst{12} = bit_12;
+ let Inst{11-8} = 0b1111;
+ let Inst{7} = imm{1};
+ let Inst{6-4} = 0b110;
+ let Inst{3-1} = Rm{3-1};
+ let Inst{0} = imm{0};
+}
+
+def MVE_VIWDUPu8 : MVE_VxWDUP<"viwdup", "u8", 0b00, 0b0>;
+def MVE_VIWDUPu16 : MVE_VxWDUP<"viwdup", "u16", 0b01, 0b0>;
+def MVE_VIWDUPu32 : MVE_VxWDUP<"viwdup", "u32", 0b10, 0b0>;
+
+def MVE_VDWDUPu8 : MVE_VxWDUP<"vdwdup", "u8", 0b00, 0b1>;
+def MVE_VDWDUPu16 : MVE_VxWDUP<"vdwdup", "u16", 0b01, 0b1>;
+def MVE_VDWDUPu32 : MVE_VxWDUP<"vdwdup", "u32", 0b10, 0b1>;
+
+class MVE_VCTP<string suffix, bits<2> size, list<dag> pattern=[]>
+ : MVE_p<(outs VCCR:$P0), (ins rGPR:$Rn), NoItinerary, "vctp", suffix,
+ "$Rn", vpred_n, "", pattern> {
+ bits<4> Rn;
+
+ let Inst{28-27} = 0b10;
+ let Inst{26-22} = 0b00000;
+ let Inst{21-20} = size;
+ let Inst{19-16} = Rn{3-0};
+ let Inst{15-11} = 0b11101;
+ let Inst{10-0} = 0b00000000001;
+ let Unpredictable{10-0} = 0b11111111111;
+
+ let Constraints = "";
+ let DecoderMethod = "DecodeMveVCTP";
+}
+
+def MVE_VCTP8 : MVE_VCTP<"8", 0b00>;
+def MVE_VCTP16 : MVE_VCTP<"16", 0b01>;
+def MVE_VCTP32 : MVE_VCTP<"32", 0b10>;
+def MVE_VCTP64 : MVE_VCTP<"64", 0b11>;
+
+// end of mve_qDest_rSrc
+
+// start of coproc mov
+
+class MVE_VMOV_64bit<dag oops, dag iops, bit to_qreg, string ops, string cstr>
+ : MVE_VMOV_lane_base<oops, !con(iops, (ins MVEPairVectorIndex2:$idx,
+ MVEPairVectorIndex0:$idx2)),
+ NoItinerary, "vmov", "", ops, cstr, []> {
+ bits<5> Rt;
+ bits<5> Rt2;
+ bits<4> Qd;
+ bit idx;
+ bit idx2;
+
+ let Inst{31-23} = 0b111011000;
+ let Inst{22} = Qd{3};
+ let Inst{21} = 0b0;
+ let Inst{20} = to_qreg;
+ let Inst{19-16} = Rt2{3-0};
+ let Inst{15-13} = Qd{2-0};
+ let Inst{12-5} = 0b01111000;
+ let Inst{4} = idx2;
+ let Inst{3-0} = Rt{3-0};
+}
+
+// The assembly syntax for these instructions mentions the vector
+// register name twice, e.g.
+//
+// vmov q2[2], q2[0], r0, r1
+// vmov r0, r1, q2[2], q2[0]
+//
+// which needs a bit of juggling with MC operand handling.
+//
+// For the move _into_ a vector register, the MC operand list also has
+// to mention the register name twice: once as the output, and once as
+// an extra input to represent where the unchanged half of the output
+// register comes from (when this instruction is used in code
+// generation). So we arrange that the first mention of the vector reg
+// in the instruction is considered by the AsmMatcher to be the output
+// ($Qd), and the second one is the input ($QdSrc). Binding them
+// together with the existing 'tie' constraint is enough to enforce at
+// register allocation time that they have to be the same register.
+//
+// For the move _from_ a vector register, there's no way to get round
+// the fact that both instances of that register name have to be
+// inputs. They have to be the same register again, but this time, we
+// can't use a tie constraint, because that has to be between an
+// output and an input operand. So this time, we have to arrange that
+// the q-reg appears just once in the MC operand list, in spite of
+// being mentioned twice in the asm syntax - which needs a custom
+// AsmMatchConverter.
+
+def MVE_VMOV_q_rr : MVE_VMOV_64bit<(outs MQPR:$Qd),
+ (ins MQPR:$QdSrc, rGPR:$Rt, rGPR:$Rt2),
+ 0b1, "$Qd$idx, $QdSrc$idx2, $Rt, $Rt2",
+ "$Qd = $QdSrc"> {
+ let DecoderMethod = "DecodeMVEVMOVDRegtoQ";
+}
+
+def MVE_VMOV_rr_q : MVE_VMOV_64bit<(outs rGPR:$Rt, rGPR:$Rt2), (ins MQPR:$Qd),
+ 0b0, "$Rt, $Rt2, $Qd$idx, $Qd$idx2", ""> {
+ let DecoderMethod = "DecodeMVEVMOVQtoDReg";
+ let AsmMatchConverter = "cvtMVEVMOVQtoDReg";
+}
+
+// end of coproc mov
+
+// start of MVE interleaving load/store
+
+// Base class for the family of interleaving/deinterleaving
+// load/stores with names like VLD20.8 and VST43.32.
+class MVE_vldst24_base<bit writeback, bit fourregs, bits<2> stage, bits<2> size,
+ bit load, dag Oops, dag loadIops, dag wbIops,
+ string iname, string ops,
+ string cstr, list<dag> pattern=[]>
+ : MVE_MI<Oops, !con(loadIops, wbIops), NoItinerary, iname, ops, cstr, pattern> {
+ bits<4> VQd;
+ bits<4> Rn;
+
+ let Inst{31-22} = 0b1111110010;
+ let Inst{21} = writeback;
+ let Inst{20} = load;
+ let Inst{19-16} = Rn;
+ let Inst{15-13} = VQd{2-0};
+ let Inst{12-9} = 0b1111;
+ let Inst{8-7} = size;
+ let Inst{6-5} = stage;
+ let Inst{4-1} = 0b0000;
+ let Inst{0} = fourregs;
+
+ let mayLoad = load;
+ let mayStore = !eq(load,0);
+}
+
+// A parameter class used to encapsulate all the ways the writeback
+// variants of VLD20 and friends differ from the non-writeback ones.
+class MVE_vldst24_writeback<bit b, dag Oo, dag Io,
+ string sy="", string c="", string n=""> {
+ bit writeback = b;
+ dag Oops = Oo;
+ dag Iops = Io;
+ string syntax = sy;
+ string cstr = c;
+ string id_suffix = n;
+}
+
+// Another parameter class that encapsulates the differences between VLD2x
+// and VLD4x.
+class MVE_vldst24_nvecs<int n, list<int> s, bit b, RegisterOperand vl> {
+ int nvecs = n;
+ list<int> stages = s;
+ bit bit0 = b;
+ RegisterOperand VecList = vl;
+}
+
+// A third parameter class that distinguishes VLDnn.8 from .16 from .32.
+class MVE_vldst24_lanesize<int i, bits<2> b> {
+ int lanesize = i;
+ bits<2> sizebits = b;
+}
+
+// A base class for each direction of transfer: one for load, one for
+// store. I can't make these a fourth independent parametric tuple
+// class, because they have to take the nvecs tuple class as a
+// parameter, in order to find the right VecList operand type.
+
+class MVE_vld24_base<MVE_vldst24_nvecs n, bits<2> pat, bits<2> size,
+ MVE_vldst24_writeback wb, string iname,
+ list<dag> pattern=[]>
+ : MVE_vldst24_base<wb.writeback, n.bit0, pat, size, 1,
+ !con((outs n.VecList:$VQd), wb.Oops),
+ (ins n.VecList:$VQdSrc), wb.Iops,
+ iname, "$VQd, $Rn" # wb.syntax,
+ wb.cstr # ",$VQdSrc = $VQd", pattern>;
+
+class MVE_vst24_base<MVE_vldst24_nvecs n, bits<2> pat, bits<2> size,
+ MVE_vldst24_writeback wb, string iname,
+ list<dag> pattern=[]>
+ : MVE_vldst24_base<wb.writeback, n.bit0, pat, size, 0,
+ wb.Oops, (ins n.VecList:$VQd), wb.Iops,
+ iname, "$VQd, $Rn" # wb.syntax,
+ wb.cstr, pattern>;
+
+// Actually define all the interleaving loads and stores, by a series
+// of nested foreaches over number of vectors (VLD2/VLD4); stage
+// within one of those series (VLDx0/VLDx1/VLDx2/VLDx3); size of
+// vector lane; writeback or no writeback.
+foreach n = [MVE_vldst24_nvecs<2, [0,1], 0, VecList2Q>,
+ MVE_vldst24_nvecs<4, [0,1,2,3], 1, VecList4Q>] in
+foreach stage = n.stages in
+foreach s = [MVE_vldst24_lanesize< 8, 0b00>,
+ MVE_vldst24_lanesize<16, 0b01>,
+ MVE_vldst24_lanesize<32, 0b10>] in
+foreach wb = [MVE_vldst24_writeback<
+ 1, (outs rGPR:$wb), (ins t2_nosp_addr_offset_none:$Rn),
+ "!", "$Rn.base = $wb", "_wb">,
+ MVE_vldst24_writeback<0, (outs), (ins t2_addr_offset_none:$Rn)>] in {
+
+ // For each case within all of those foreaches, define the actual
+ // instructions. The def names are made by gluing together pieces
+ // from all the parameter classes, and will end up being things like
+ // MVE_VLD20_8 and MVE_VST43_16_wb.
+
+ def "MVE_VLD" # n.nvecs # stage # "_" # s.lanesize # wb.id_suffix
+ : MVE_vld24_base<n, stage, s.sizebits, wb,
+ "vld" # n.nvecs # stage # "." # s.lanesize>;
+
+ def "MVE_VST" # n.nvecs # stage # "_" # s.lanesize # wb.id_suffix
+ : MVE_vst24_base<n, stage, s.sizebits, wb,
+ "vst" # n.nvecs # stage # "." # s.lanesize>;
+}
+
+// end of MVE interleaving load/store
+
+// start of MVE predicable load/store
+
+// A parameter class for the direction of transfer.
+class MVE_ldst_direction<bit b, dag Oo, dag Io, string c=""> {
+ bit load = b;
+ dag Oops = Oo;
+ dag Iops = Io;
+ string cstr = c;
+}
+def MVE_ld: MVE_ldst_direction<1, (outs MQPR:$Qd), (ins), ",@earlyclobber $Qd">;
+def MVE_st: MVE_ldst_direction<0, (outs), (ins MQPR:$Qd)>;
+
+// A parameter class for the size of memory access in a load.
+class MVE_memsz<bits<2> e, int s, AddrMode m, string mn, list<string> types> {
+ bits<2> encoding = e; // opcode bit(s) for encoding
+ int shift = s; // shift applied to immediate load offset
+ AddrMode AM = m;
+
+ // For instruction aliases: define the complete list of type
+ // suffixes at this size, and the canonical ones for loads and
+ // stores.
+ string MnemonicLetter = mn;
+ int TypeBits = !shl(8, s);
+ string CanonLoadSuffix = ".u" # TypeBits;
+ string CanonStoreSuffix = "." # TypeBits;
+ list<string> suffixes = !foreach(letter, types, "." # letter # TypeBits);
+}
+
+// Instances of MVE_memsz.
+//
+// (memD doesn't need an AddrMode, because those are only for
+// contiguous loads, and memD is only used by gather/scatters.)
+def MVE_memB: MVE_memsz<0b00, 0, AddrModeT2_i7, "b", ["", "u", "s"]>;
+def MVE_memH: MVE_memsz<0b01, 1, AddrModeT2_i7s2, "h", ["", "u", "s", "f"]>;
+def MVE_memW: MVE_memsz<0b10, 2, AddrModeT2_i7s4, "w", ["", "u", "s", "f"]>;
+def MVE_memD: MVE_memsz<0b11, 3, ?, "d", ["", "u", "s", "f"]>;
+
+// This is the base class for all the MVE loads and stores other than
+// the interleaving ones. All the non-interleaving loads/stores share
+// the characteristic that they operate on just one vector register,
+// so they are VPT-predicable.
+//
+// The predication operand is vpred_n, for both loads and stores. For
+// store instructions, the reason is obvious: if there is no output
+// register, there can't be a need for an input parameter giving the
+// output register's previous value. Load instructions also don't need
+// that input parameter, because unlike MVE data processing
+// instructions, predicated loads are defined to set the inactive
+// lanes of the output register to zero, instead of preserving their
+// input values.
+class MVE_VLDRSTR_base<MVE_ldst_direction dir, bit U, bit P, bit W, bit opc,
+ dag oops, dag iops, string asm, string suffix,
+ string ops, string cstr, list<dag> pattern=[]>
+ : MVE_p<oops, iops, NoItinerary, asm, suffix, ops, vpred_n, cstr, pattern> {
+ bits<3> Qd;
+
+ let Inst{28} = U;
+ let Inst{25} = 0b0;
+ let Inst{24} = P;
+ let Inst{22} = 0b0;
+ let Inst{21} = W;
+ let Inst{20} = dir.load;
+ let Inst{15-13} = Qd{2-0};
+ let Inst{12} = opc;
+ let Inst{11-9} = 0b111;
+
+ let mayLoad = dir.load;
+ let mayStore = !eq(dir.load,0);
+}
+
+// Contiguous load and store instructions. These come in two main
+// categories: same-size loads/stores in which 128 bits of vector
+// register is transferred to or from 128 bits of memory in the most
+// obvious way, and widening loads / narrowing stores, in which the
+// size of memory accessed is less than the size of a vector register,
+// so the load instructions sign- or zero-extend each memory value
+// into a wider vector lane, and the store instructions truncate
+// correspondingly.
+//
+// The instruction mnemonics for these two classes look reasonably
+// similar, but the actual encodings are different enough to need two
+// separate base classes.
+
+// Contiguous, same size
+class MVE_VLDRSTR_cs<MVE_ldst_direction dir, MVE_memsz memsz, bit P, bit W,
+ dag oops, dag iops, string asm, string suffix,
+ IndexMode im, string ops, string cstr>
+ : MVE_VLDRSTR_base<dir, 0, P, W, 1, oops, iops, asm, suffix, ops, cstr> {
+ bits<12> addr;
+ let Inst{23} = addr{7};
+ let Inst{19-16} = addr{11-8};
+ let Inst{8-7} = memsz.encoding;
+ let Inst{6-0} = addr{6-0};
+}
+
+// Contiguous, widening/narrowing
+class MVE_VLDRSTR_cw<MVE_ldst_direction dir, MVE_memsz memsz, bit U,
+ bit P, bit W, bits<2> size, dag oops, dag iops,
+ string asm, string suffix, IndexMode im,
+ string ops, string cstr>
+ : MVE_VLDRSTR_base<dir, U, P, W, 0, oops, iops, asm, suffix, ops, cstr> {
+ bits<11> addr;
+ let Inst{23} = addr{7};
+ let Inst{19} = memsz.encoding{0}; // enough to tell 16- from 32-bit
+ let Inst{18-16} = addr{10-8};
+ let Inst{8-7} = size;
+ let Inst{6-0} = addr{6-0};
+
+ let IM = im;
+}
+
+// Multiclass wrapper on each of the _cw and _cs base classes, to
+// generate three writeback modes (none, preindex, postindex).
+
+multiclass MVE_VLDRSTR_cw_m<MVE_ldst_direction dir, MVE_memsz memsz,
+ string asm, string suffix, bit U, bits<2> size> {
+ let AM = memsz.AM in {
+ def "" : MVE_VLDRSTR_cw<
+ dir, memsz, U, 1, 0, size,
+ dir.Oops, !con(dir.Iops, (ins taddrmode_imm7<memsz.shift>:$addr)),
+ asm, suffix, IndexModeNone, "$Qd, $addr", "">;
+
+ def _pre : MVE_VLDRSTR_cw<
+ dir, memsz, U, 1, 1, size,
+ !con((outs tGPR:$wb), dir.Oops),
+ !con(dir.Iops, (ins taddrmode_imm7<memsz.shift>:$addr)),
+ asm, suffix, IndexModePre, "$Qd, $addr!", "$addr.base = $wb"> {
+ let DecoderMethod = "DecodeMVE_MEM_1_pre<"#memsz.shift#">";
+ }
+
+ def _post : MVE_VLDRSTR_cw<
+ dir, memsz, U, 0, 1, size,
+ !con((outs tGPR:$wb), dir.Oops),
+ !con(dir.Iops, (ins t_addr_offset_none:$Rn,
+ t2am_imm7_offset<memsz.shift>:$addr)),
+ asm, suffix, IndexModePost, "$Qd, $Rn$addr", "$Rn.base = $wb"> {
+ bits<4> Rn;
+ let Inst{18-16} = Rn{2-0};
+ }
+ }
+}
+
+multiclass MVE_VLDRSTR_cs_m<MVE_ldst_direction dir, MVE_memsz memsz,
+ string asm, string suffix> {
+ let AM = memsz.AM in {
+ def "" : MVE_VLDRSTR_cs<
+ dir, memsz, 1, 0,
+ dir.Oops, !con(dir.Iops, (ins t2addrmode_imm7<memsz.shift>:$addr)),
+ asm, suffix, IndexModeNone, "$Qd, $addr", "">;
+
+ def _pre : MVE_VLDRSTR_cs<
+ dir, memsz, 1, 1,
+ !con((outs rGPR:$wb), dir.Oops),
+ !con(dir.Iops, (ins t2addrmode_imm7_pre<memsz.shift>:$addr)),
+ asm, suffix, IndexModePre, "$Qd, $addr!", "$addr.base = $wb"> {
+ let DecoderMethod = "DecodeMVE_MEM_2_pre<"#memsz.shift#">";
+ }
+
+ def _post : MVE_VLDRSTR_cs<
+ dir, memsz, 0, 1,
+ !con((outs rGPR:$wb), dir.Oops),
+ // We need an !if here to select the base register class,
+ // because it's legal to write back to SP in a load of this
+ // type, but not in a store.
+ !con(dir.Iops, (ins !if(dir.load, t2_addr_offset_none,
+ t2_nosp_addr_offset_none):$Rn,
+ t2am_imm7_offset<memsz.shift>:$addr)),
+ asm, suffix, IndexModePost, "$Qd, $Rn$addr", "$Rn.base = $wb"> {
+ bits<4> Rn;
+ let Inst{19-16} = Rn{3-0};
+ }
+ }
+}
+
+// Now actually declare all the contiguous load/stores, via those
+// multiclasses. The instruction ids coming out of this are the bare
+// names shown in the defm, with _pre or _post appended for writeback,
+// e.g. MVE_VLDRBS16, MVE_VSTRB16_pre, MVE_VSTRHU16_post.
+
+defm MVE_VLDRBS16: MVE_VLDRSTR_cw_m<MVE_ld, MVE_memB, "vldrb", "s16", 0, 0b01>;
+defm MVE_VLDRBS32: MVE_VLDRSTR_cw_m<MVE_ld, MVE_memB, "vldrb", "s32", 0, 0b10>;
+defm MVE_VLDRBU16: MVE_VLDRSTR_cw_m<MVE_ld, MVE_memB, "vldrb", "u16", 1, 0b01>;
+defm MVE_VLDRBU32: MVE_VLDRSTR_cw_m<MVE_ld, MVE_memB, "vldrb", "u32", 1, 0b10>;
+defm MVE_VLDRHS32: MVE_VLDRSTR_cw_m<MVE_ld, MVE_memH, "vldrh", "s32", 0, 0b10>;
+defm MVE_VLDRHU32: MVE_VLDRSTR_cw_m<MVE_ld, MVE_memH, "vldrh", "u32", 1, 0b10>;
+
+defm MVE_VLDRBU8: MVE_VLDRSTR_cs_m<MVE_ld, MVE_memB, "vldrb", "u8">;
+defm MVE_VLDRHU16: MVE_VLDRSTR_cs_m<MVE_ld, MVE_memH, "vldrh", "u16">;
+defm MVE_VLDRWU32: MVE_VLDRSTR_cs_m<MVE_ld, MVE_memW, "vldrw", "u32">;
+
+defm MVE_VSTRB16: MVE_VLDRSTR_cw_m<MVE_st, MVE_memB, "vstrb", "16", 0, 0b01>;
+defm MVE_VSTRB32: MVE_VLDRSTR_cw_m<MVE_st, MVE_memB, "vstrb", "32", 0, 0b10>;
+defm MVE_VSTRH32: MVE_VLDRSTR_cw_m<MVE_st, MVE_memH, "vstrh", "32", 0, 0b10>;
+
+defm MVE_VSTRBU8 : MVE_VLDRSTR_cs_m<MVE_st, MVE_memB, "vstrb", "8">;
+defm MVE_VSTRHU16: MVE_VLDRSTR_cs_m<MVE_st, MVE_memH, "vstrh", "16">;
+defm MVE_VSTRWU32: MVE_VLDRSTR_cs_m<MVE_st, MVE_memW, "vstrw", "32">;
+
+// Gather loads / scatter stores whose address operand is of the form
+// [Rn,Qm], i.e. a single GPR as the common base address, plus a
+// vector of offset from it. ('Load/store this sequence of elements of
+// the same array.')
+//
+// Like the contiguous family, these loads and stores can widen the
+// loaded values / truncate the stored ones, or they can just
+// load/store the same size of memory and vector lane. But unlike the
+// contiguous family, there's no particular difference in encoding
+// between those two cases.
+//
+// This family also comes with the option to scale the offset values
+// in Qm by the size of the loaded memory (i.e. to treat them as array
+// indices), or not to scale them (to treat them as plain byte offsets
+// in memory, so that perhaps the loaded values are unaligned). The
+// scaled instructions' address operand in assembly looks like
+// [Rn,Qm,UXTW #2] or similar.
+
+// Base class.
+class MVE_VLDRSTR_rq<MVE_ldst_direction dir, MVE_memsz memsz, bit U,
+ bits<2> size, bit os, string asm, string suffix, int shift>
+ : MVE_VLDRSTR_base<dir, U, 0b0, 0b0, 0, dir.Oops,
+ !con(dir.Iops, (ins mve_addr_rq_shift<shift>:$addr)),
+ asm, suffix, "$Qd, $addr", dir.cstr> {
+ bits<7> addr;
+ let Inst{23} = 0b1;
+ let Inst{19-16} = addr{6-3};
+ let Inst{8-7} = size;
+ let Inst{6} = memsz.encoding{1};
+ let Inst{5} = 0;
+ let Inst{4} = memsz.encoding{0};
+ let Inst{3-1} = addr{2-0};
+ let Inst{0} = os;
+}
+
+// Multiclass that defines the scaled and unscaled versions of an
+// instruction, when the memory size is wider than a byte. The scaled
+// version gets the default name like MVE_VLDRBU16_rq; the unscaled /
+// potentially unaligned version gets a "_u" suffix, e.g.
+// MVE_VLDRBU16_rq_u.
+multiclass MVE_VLDRSTR_rq_w<MVE_ldst_direction dir, MVE_memsz memsz,
+ string asm, string suffix, bit U, bits<2> size> {
+ def _u : MVE_VLDRSTR_rq<dir, memsz, U, size, 0, asm, suffix, 0>;
+ def "" : MVE_VLDRSTR_rq<dir, memsz, U, size, 1, asm, suffix, memsz.shift>;
+}
+
+// Subclass of MVE_VLDRSTR_rq with the same API as that multiclass,
+// for use when the memory size is one byte, so there's no 'scaled'
+// version of the instruction at all. (This is encoded as if it were
+// unscaled, but named in the default way with no _u suffix.)
+class MVE_VLDRSTR_rq_b<MVE_ldst_direction dir, MVE_memsz memsz,
+ string asm, string suffix, bit U, bits<2> size>
+ : MVE_VLDRSTR_rq<dir, memsz, U, size, 0, asm, suffix, 0>;
+
+// Actually define all the loads and stores in this family.
+
+def MVE_VLDRBU8_rq : MVE_VLDRSTR_rq_b<MVE_ld, MVE_memB, "vldrb","u8", 1,0b00>;
+def MVE_VLDRBU16_rq: MVE_VLDRSTR_rq_b<MVE_ld, MVE_memB, "vldrb","u16", 1,0b01>;
+def MVE_VLDRBS16_rq: MVE_VLDRSTR_rq_b<MVE_ld, MVE_memB, "vldrb","s16", 0,0b01>;
+def MVE_VLDRBU32_rq: MVE_VLDRSTR_rq_b<MVE_ld, MVE_memB, "vldrb","u32", 1,0b10>;
+def MVE_VLDRBS32_rq: MVE_VLDRSTR_rq_b<MVE_ld, MVE_memB, "vldrb","s32", 0,0b10>;
+
+defm MVE_VLDRHU16_rq: MVE_VLDRSTR_rq_w<MVE_ld, MVE_memH, "vldrh","u16", 1,0b01>;
+defm MVE_VLDRHU32_rq: MVE_VLDRSTR_rq_w<MVE_ld, MVE_memH, "vldrh","u32", 1,0b10>;
+defm MVE_VLDRHS32_rq: MVE_VLDRSTR_rq_w<MVE_ld, MVE_memH, "vldrh","s32", 0,0b10>;
+defm MVE_VLDRWU32_rq: MVE_VLDRSTR_rq_w<MVE_ld, MVE_memW, "vldrw","u32", 1,0b10>;
+defm MVE_VLDRDU64_rq: MVE_VLDRSTR_rq_w<MVE_ld, MVE_memD, "vldrd","u64", 1,0b11>;
+
+def MVE_VSTRB8_rq : MVE_VLDRSTR_rq_b<MVE_st, MVE_memB, "vstrb","8", 0,0b00>;
+def MVE_VSTRB16_rq : MVE_VLDRSTR_rq_b<MVE_st, MVE_memB, "vstrb","16", 0,0b01>;
+def MVE_VSTRB32_rq : MVE_VLDRSTR_rq_b<MVE_st, MVE_memB, "vstrb","32", 0,0b10>;
+
+defm MVE_VSTRH16_rq : MVE_VLDRSTR_rq_w<MVE_st, MVE_memH, "vstrh","16", 0,0b01>;
+defm MVE_VSTRH32_rq : MVE_VLDRSTR_rq_w<MVE_st, MVE_memH, "vstrh","32", 0,0b10>;
+defm MVE_VSTRW32_rq : MVE_VLDRSTR_rq_w<MVE_st, MVE_memW, "vstrw","32", 0,0b10>;
+defm MVE_VSTRD64_rq : MVE_VLDRSTR_rq_w<MVE_st, MVE_memD, "vstrd","64", 0,0b11>;
+
+// Gather loads / scatter stores whose address operand is of the form
+// [Qm,#imm], i.e. a vector containing a full base address for each
+// loaded item, plus an immediate offset applied consistently to all
+// of them. ('Load/store the same field from this vector of pointers
+// to a structure type.')
+//
+// This family requires the vector lane size to be at least 32 bits
+// (so there's room for an address in each lane at all). It has no
+// widening/narrowing variants. But it does support preindex
+// writeback, in which the address vector is updated to hold the
+// addresses actually loaded from.
+
+// Base class.
+class MVE_VLDRSTR_qi<MVE_ldst_direction dir, MVE_memsz memsz, bit W, dag wbops,
+ string asm, string wbAsm, string suffix, string cstr = "">
+ : MVE_VLDRSTR_base<dir, 1, 1, W, 1, !con(wbops, dir.Oops),
+ !con(dir.Iops, (ins mve_addr_q_shift<memsz.shift>:$addr)),
+ asm, suffix, "$Qd, $addr" # wbAsm, cstr # dir.cstr> {
+ bits<11> addr;
+ let Inst{23} = addr{7};
+ let Inst{19-17} = addr{10-8};
+ let Inst{16} = 0;
+ let Inst{8} = memsz.encoding{0}; // enough to distinguish 32- from 64-bit
+ let Inst{7} = 0;
+ let Inst{6-0} = addr{6-0};
+}
+
+// Multiclass that generates the non-writeback and writeback variants.
+multiclass MVE_VLDRSTR_qi_m<MVE_ldst_direction dir, MVE_memsz memsz,
+ string asm, string suffix> {
+ def "" : MVE_VLDRSTR_qi<dir, memsz, 0, (outs), asm, "", suffix>;
+ def _pre : MVE_VLDRSTR_qi<dir, memsz, 1, (outs MQPR:$wb), asm, "!", suffix,
+ "$addr.base = $wb"> {
+ let DecoderMethod="DecodeMVE_MEM_3_pre<"#memsz.shift#">";
+ }
+}
+
+// Actual instruction definitions.
+defm MVE_VLDRWU32_qi: MVE_VLDRSTR_qi_m<MVE_ld, MVE_memW, "vldrw", "u32">;
+defm MVE_VLDRDU64_qi: MVE_VLDRSTR_qi_m<MVE_ld, MVE_memD, "vldrd", "u64">;
+defm MVE_VSTRW32_qi: MVE_VLDRSTR_qi_m<MVE_st, MVE_memW, "vstrw", "32">;
+defm MVE_VSTRD64_qi: MVE_VLDRSTR_qi_m<MVE_st, MVE_memD, "vstrd", "64">;
+
+// Define aliases for all the instructions where memory size and
+// vector lane size are the same. These are mnemonic aliases, so they
+// apply consistently across all of the above families - contiguous
+// loads, and both the rq and qi types of gather/scatter.
+//
+// Rationale: As long as you're loading (for example) 16-bit memory
+// values into 16-bit vector lanes, you can think of them as signed or
+// unsigned integers, fp16 or just raw 16-bit blobs and it makes no
+// difference. So we permit all of vldrh.16, vldrh.u16, vldrh.s16,
+// vldrh.f16 and treat them all as equivalent to the canonical
+// spelling (which happens to be .u16 for loads, and just .16 for
+// stores).
+
+foreach vpt_cond = ["", "t", "e"] in
+foreach memsz = [MVE_memB, MVE_memH, MVE_memW, MVE_memD] in
+foreach suffix = memsz.suffixes in {
+
+ // These foreaches are conceptually ifs, implemented by iterating a
+ // dummy variable over a list with 0 or 1 elements depending on the
+ // condition. The idea is to iterate over _nearly_ all the suffixes
+ // in memsz.suffixes, but omit the one we want all the others to alias.
+
+ foreach _ = !if(!ne(suffix, memsz.CanonLoadSuffix), [1], []<int>) in
+ def : MnemonicAlias<
+ "vldr" # memsz.MnemonicLetter # vpt_cond # suffix,
+ "vldr" # memsz.MnemonicLetter # vpt_cond # memsz.CanonLoadSuffix>;
+
+ foreach _ = !if(!ne(suffix, memsz.CanonStoreSuffix), [1], []<int>) in
+ def : MnemonicAlias<
+ "vstr" # memsz.MnemonicLetter # vpt_cond # suffix,
+ "vstr" # memsz.MnemonicLetter # vpt_cond # memsz.CanonStoreSuffix>;
+}
+
+// end of MVE predicable load/store
+
+class MVE_VPT<string suffix, bits<2> size, dag iops, string asm, list<dag> pattern=[]>
+ : MVE_MI<(outs ), iops, NoItinerary, !strconcat("vpt", "${Mk}", ".", suffix), asm, "", pattern> {
+ bits<3> fc;
+ bits<4> Mk;
+ bits<3> Qn;
+
+ let Inst{31-23} = 0b111111100;
+ let Inst{22} = Mk{3};
+ let Inst{21-20} = size;
+ let Inst{19-17} = Qn{2-0};
+ let Inst{16} = 0b1;
+ let Inst{15-13} = Mk{2-0};
+ let Inst{12} = fc{2};
+ let Inst{11-8} = 0b1111;
+ let Inst{7} = fc{0};
+ let Inst{4} = 0b0;
+
+ let Defs = [VPR, P0];
+}
+
+class MVE_VPTt1<string suffix, bits<2> size, dag iops>
+ : MVE_VPT<suffix, size, iops, "$fc, $Qn, $Qm"> {
+ bits<4> Qm;
+ bits<4> Mk;
+
+ let Inst{6} = 0b0;
+ let Inst{5} = Qm{3};
+ let Inst{3-1} = Qm{2-0};
+ let Inst{0} = fc{1};
+}
+
+class MVE_VPTt1i<string suffix, bits<2> size>
+ : MVE_VPTt1<suffix, size,
+ (ins vpt_mask:$Mk, pred_basic_i:$fc, MQPR:$Qn, MQPR:$Qm)> {
+ let Inst{12} = 0b0;
+ let Inst{0} = 0b0;
+}
+
+def MVE_VPTv4i32 : MVE_VPTt1i<"i32", 0b10>;
+def MVE_VPTv8i16 : MVE_VPTt1i<"i16", 0b01>;
+def MVE_VPTv16i8 : MVE_VPTt1i<"i8", 0b00>;
+
+class MVE_VPTt1u<string suffix, bits<2> size>
+ : MVE_VPTt1<suffix, size,
+ (ins vpt_mask:$Mk, pred_basic_u:$fc, MQPR:$Qn, MQPR:$Qm)> {
+ let Inst{12} = 0b0;
+ let Inst{0} = 0b1;
+}
+
+def MVE_VPTv4u32 : MVE_VPTt1u<"u32", 0b10>;
+def MVE_VPTv8u16 : MVE_VPTt1u<"u16", 0b01>;
+def MVE_VPTv16u8 : MVE_VPTt1u<"u8", 0b00>;
+
+class MVE_VPTt1s<string suffix, bits<2> size>
+ : MVE_VPTt1<suffix, size,
+ (ins vpt_mask:$Mk, pred_basic_s:$fc, MQPR:$Qn, MQPR:$Qm)> {
+ let Inst{12} = 0b1;
+}
+
+def MVE_VPTv4s32 : MVE_VPTt1s<"s32", 0b10>;
+def MVE_VPTv8s16 : MVE_VPTt1s<"s16", 0b01>;
+def MVE_VPTv16s8 : MVE_VPTt1s<"s8", 0b00>;
+
+class MVE_VPTt2<string suffix, bits<2> size, dag iops>
+ : MVE_VPT<suffix, size, iops,
+ "$fc, $Qn, $Rm"> {
+ bits<4> Rm;
+ bits<3> fc;
+ bits<4> Mk;
+
+ let Inst{6} = 0b1;
+ let Inst{5} = fc{1};
+ let Inst{3-0} = Rm{3-0};
+}
+
+class MVE_VPTt2i<string suffix, bits<2> size>
+ : MVE_VPTt2<suffix, size,
+ (ins vpt_mask:$Mk, pred_basic_i:$fc, MQPR:$Qn, GPRwithZR:$Rm)> {
+ let Inst{12} = 0b0;
+ let Inst{5} = 0b0;
+}
+
+def MVE_VPTv4i32r : MVE_VPTt2i<"i32", 0b10>;
+def MVE_VPTv8i16r : MVE_VPTt2i<"i16", 0b01>;
+def MVE_VPTv16i8r : MVE_VPTt2i<"i8", 0b00>;
+
+class MVE_VPTt2u<string suffix, bits<2> size>
+ : MVE_VPTt2<suffix, size,
+ (ins vpt_mask:$Mk, pred_basic_u:$fc, MQPR:$Qn, GPRwithZR:$Rm)> {
+ let Inst{12} = 0b0;
+ let Inst{5} = 0b1;
+}
+
+def MVE_VPTv4u32r : MVE_VPTt2u<"u32", 0b10>;
+def MVE_VPTv8u16r : MVE_VPTt2u<"u16", 0b01>;
+def MVE_VPTv16u8r : MVE_VPTt2u<"u8", 0b00>;
+
+class MVE_VPTt2s<string suffix, bits<2> size>
+ : MVE_VPTt2<suffix, size,
+ (ins vpt_mask:$Mk, pred_basic_s:$fc, MQPR:$Qn, GPRwithZR:$Rm)> {
+ let Inst{12} = 0b1;
+}
+
+def MVE_VPTv4s32r : MVE_VPTt2s<"s32", 0b10>;
+def MVE_VPTv8s16r : MVE_VPTt2s<"s16", 0b01>;
+def MVE_VPTv16s8r : MVE_VPTt2s<"s8", 0b00>;
+
+
+class MVE_VPTf<string suffix, bit size, dag iops, string asm, list<dag> pattern=[]>
+ : MVE_MI<(outs ), iops, NoItinerary, !strconcat("vpt", "${Mk}", ".", suffix), asm,
+ "", pattern> {
+ bits<3> fc;
+ bits<4> Mk;
+ bits<3> Qn;
+
+ let Inst{31-29} = 0b111;
+ let Inst{28} = size;
+ let Inst{27-23} = 0b11100;
+ let Inst{22} = Mk{3};
+ let Inst{21-20} = 0b11;
+ let Inst{19-17} = Qn{2-0};
+ let Inst{16} = 0b1;
+ let Inst{15-13} = Mk{2-0};
+ let Inst{12} = fc{2};
+ let Inst{11-8} = 0b1111;
+ let Inst{7} = fc{0};
+ let Inst{4} = 0b0;
+
+ let Defs = [P0];
+ let Predicates = [HasMVEFloat];
+}
+
+class MVE_VPTft1<string suffix, bit size>
+ : MVE_VPTf<suffix, size, (ins vpt_mask:$Mk, pred_basic_fp:$fc, MQPR:$Qn, MQPR:$Qm),
+ "$fc, $Qn, $Qm"> {
+ bits<3> fc;
+ bits<4> Qm;
+
+ let Inst{6} = 0b0;
+ let Inst{5} = Qm{3};
+ let Inst{3-1} = Qm{2-0};
+ let Inst{0} = fc{1};
+}
+
+def MVE_VPTv4f32 : MVE_VPTft1<"f32", 0b0>;
+def MVE_VPTv8f16 : MVE_VPTft1<"f16", 0b1>;
+
+class MVE_VPTft2<string suffix, bit size>
+ : MVE_VPTf<suffix, size, (ins vpt_mask:$Mk, pred_basic_fp:$fc, MQPR:$Qn, GPRwithZR:$Rm),
+ "$fc, $Qn, $Rm"> {
+ bits<3> fc;
+ bits<4> Rm;
+
+ let Inst{6} = 0b1;
+ let Inst{5} = fc{1};
+ let Inst{3-0} = Rm{3-0};
+}
+
+def MVE_VPTv4f32r : MVE_VPTft2<"f32", 0b0>;
+def MVE_VPTv8f16r : MVE_VPTft2<"f16", 0b1>;
+
+def MVE_VPST : MVE_MI<(outs ), (ins vpt_mask:$Mk), NoItinerary,
+ !strconcat("vpst", "${Mk}"), "", "", []> {
+ bits<4> Mk;
+
+ let Inst{31-23} = 0b111111100;
+ let Inst{22} = Mk{3};
+ let Inst{21-16} = 0b110001;
+ let Inst{15-13} = Mk{2-0};
+ let Inst{12-0} = 0b0111101001101;
+ let Unpredictable{12} = 0b1;
+ let Unpredictable{7} = 0b1;
+ let Unpredictable{5} = 0b1;
+
+ let Defs = [P0];
+}
+
+def MVE_VPSEL : MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm), NoItinerary,
+ "vpsel", "", "$Qd, $Qn, $Qm", vpred_n, "", []> {
+ bits<4> Qn;
+ bits<4> Qd;
+ bits<4> Qm;
+
+ let Inst{28} = 0b1;
+ let Inst{25-23} = 0b100;
+ let Inst{22} = Qd{3};
+ let Inst{21-20} = 0b11;
+ let Inst{19-17} = Qn{2-0};
+ let Inst{16} = 0b1;
+ let Inst{15-13} = Qd{2-0};
+ let Inst{12-9} = 0b0111;
+ let Inst{8} = 0b1;
+ let Inst{7} = Qn{3};
+ let Inst{6} = 0b0;
+ let Inst{5} = Qm{3};
+ let Inst{4} = 0b0;
+ let Inst{3-1} = Qm{2-0};
+ let Inst{0} = 0b1;
+}
+
+foreach suffix = ["s8", "s16", "s32", "u8", "u16", "u32",
+ "i8", "i16", "i32", "f16", "f32"] in
+def : MVEInstAlias<"vpsel${vp}." # suffix # "\t$Qd, $Qn, $Qm",
+ (MVE_VPSEL MQPR:$Qd, MQPR:$Qn, MQPR:$Qm, vpred_n:$vp)>;
+
+def MVE_VPNOT : MVE_p<(outs), (ins), NoItinerary,
+ "vpnot", "", "", vpred_n, "", []> {
+ let Inst{31-0} = 0b11111110001100010000111101001101;
+ let Unpredictable{19-17} = 0b111;
+ let Unpredictable{12} = 0b1;
+ let Unpredictable{7} = 0b1;
+ let Unpredictable{5} = 0b1;
+ let Defs = [P0];
+ let Uses = [P0];
+
+ let Constraints = "";
+}
+
+class MVE_loltp_start<dag iops, string asm, string ops, bits<2> size>
+ : t2LOL<(outs GPRlr:$LR), iops, asm, ops> {
+ bits<4> Rn;
+ let Predicates = [HasMVEInt];
+ let Inst{22} = 0b0;
+ let Inst{21-20} = size;
+ let Inst{19-16} = Rn{3-0};
+ let Inst{12} = 0b0;
+}
+
+class MVE_DLSTP<string asm, bits<2> size>
+ : MVE_loltp_start<(ins rGPR:$Rn), asm, "$LR, $Rn", size> {
+ let Inst{13} = 0b1;
+ let Inst{11-1} = 0b00000000000;
+ let Unpredictable{10-1} = 0b1111111111;
+}
+
+class MVE_WLSTP<string asm, bits<2> size>
+ : MVE_loltp_start<(ins rGPR:$Rn, wlslabel_u11:$label),
+ asm, "$LR, $Rn, $label", size> {
+ bits<11> label;
+ let Inst{13} = 0b0;
+ let Inst{11} = label{0};
+ let Inst{10-1} = label{10-1};
+}
+
+def MVE_DLSTP_8 : MVE_DLSTP<"dlstp.8", 0b00>;
+def MVE_DLSTP_16 : MVE_DLSTP<"dlstp.16", 0b01>;
+def MVE_DLSTP_32 : MVE_DLSTP<"dlstp.32", 0b10>;
+def MVE_DLSTP_64 : MVE_DLSTP<"dlstp.64", 0b11>;
+
+def MVE_WLSTP_8 : MVE_WLSTP<"wlstp.8", 0b00>;
+def MVE_WLSTP_16 : MVE_WLSTP<"wlstp.16", 0b01>;
+def MVE_WLSTP_32 : MVE_WLSTP<"wlstp.32", 0b10>;
+def MVE_WLSTP_64 : MVE_WLSTP<"wlstp.64", 0b11>;
+
+class MVE_loltp_end<dag oops, dag iops, string asm, string ops>
+ : t2LOL<oops, iops, asm, ops> {
+ let Predicates = [HasMVEInt];
+ let Inst{22-21} = 0b00;
+ let Inst{19-16} = 0b1111;
+ let Inst{12} = 0b0;
+}
+
+def MVE_LETP : MVE_loltp_end<(outs GPRlr:$LRout),
+ (ins GPRlr:$LRin, lelabel_u11:$label),
+ "letp", "$LRin, $label"> {
+ bits<11> label;
+ let Inst{20} = 0b1;
+ let Inst{13} = 0b0;
+ let Inst{11} = label{0};
+ let Inst{10-1} = label{10-1};
+}
+
+def MVE_LCTP : MVE_loltp_end<(outs), (ins pred:$p), "lctp${p}", ""> {
+ let Inst{20} = 0b0;
+ let Inst{13} = 0b1;
+ let Inst{11-1} = 0b00000000000;
+ let Unpredictable{21-20} = 0b11;
+ let Unpredictable{11-1} = 0b11111111111;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Patterns
+//===----------------------------------------------------------------------===//
+
+class MVE_unpred_vector_store_typed<ValueType Ty, Instruction RegImmInst,
+ PatFrag StoreKind, int shift>
+ : Pat<(StoreKind (Ty MQPR:$val), t2addrmode_imm7<shift>:$addr),
+ (RegImmInst (Ty MQPR:$val), t2addrmode_imm7<shift>:$addr)>;
+
+multiclass MVE_unpred_vector_store<Instruction RegImmInst, PatFrag StoreKind,
+ int shift> {
+ def : MVE_unpred_vector_store_typed<v16i8, RegImmInst, StoreKind, shift>;
+ def : MVE_unpred_vector_store_typed<v8i16, RegImmInst, StoreKind, shift>;
+ def : MVE_unpred_vector_store_typed<v8f16, RegImmInst, StoreKind, shift>;
+ def : MVE_unpred_vector_store_typed<v4i32, RegImmInst, StoreKind, shift>;
+ def : MVE_unpred_vector_store_typed<v4f32, RegImmInst, StoreKind, shift>;
+ def : MVE_unpred_vector_store_typed<v2i64, RegImmInst, StoreKind, shift>;
+ def : MVE_unpred_vector_store_typed<v2f64, RegImmInst, StoreKind, shift>;
+}
+
+class MVE_unpred_vector_load_typed<ValueType Ty, Instruction RegImmInst,
+ PatFrag LoadKind, int shift>
+ : Pat<(Ty (LoadKind t2addrmode_imm7<shift>:$addr)),
+ (Ty (RegImmInst t2addrmode_imm7<shift>:$addr))>;
+
+multiclass MVE_unpred_vector_load<Instruction RegImmInst, PatFrag LoadKind,
+ int shift> {
+ def : MVE_unpred_vector_load_typed<v16i8, RegImmInst, LoadKind, shift>;
+ def : MVE_unpred_vector_load_typed<v8i16, RegImmInst, LoadKind, shift>;
+ def : MVE_unpred_vector_load_typed<v8f16, RegImmInst, LoadKind, shift>;
+ def : MVE_unpred_vector_load_typed<v4i32, RegImmInst, LoadKind, shift>;
+ def : MVE_unpred_vector_load_typed<v4f32, RegImmInst, LoadKind, shift>;
+ def : MVE_unpred_vector_load_typed<v2i64, RegImmInst, LoadKind, shift>;
+ def : MVE_unpred_vector_load_typed<v2f64, RegImmInst, LoadKind, shift>;
+}
+
+let Predicates = [HasMVEInt, IsLE] in {
+ defm : MVE_unpred_vector_store<MVE_VSTRBU8, byte_alignedstore, 0>;
+ defm : MVE_unpred_vector_store<MVE_VSTRHU16, hword_alignedstore, 1>;
+ defm : MVE_unpred_vector_store<MVE_VSTRWU32, alignedstore32, 2>;
+
+ defm : MVE_unpred_vector_load<MVE_VLDRBU8, byte_alignedload, 0>;
+ defm : MVE_unpred_vector_load<MVE_VLDRHU16, hword_alignedload, 1>;
+ defm : MVE_unpred_vector_load<MVE_VLDRWU32, alignedload32, 2>;
+
+ def : Pat<(v16i1 (load t2addrmode_imm7<2>:$addr)),
+ (v16i1 (VLDR_P0_off t2addrmode_imm7<2>:$addr))>;
+ def : Pat<(v8i1 (load t2addrmode_imm7<2>:$addr)),
+ (v8i1 (VLDR_P0_off t2addrmode_imm7<2>:$addr))>;
+ def : Pat<(v4i1 (load t2addrmode_imm7<2>:$addr)),
+ (v4i1 (VLDR_P0_off t2addrmode_imm7<2>:$addr))>;
+}
+
+let Predicates = [HasMVEInt, IsBE] in {
+ def : MVE_unpred_vector_store_typed<v16i8, MVE_VSTRBU8, store, 0>;
+ def : MVE_unpred_vector_store_typed<v8i16, MVE_VSTRHU16, alignedstore16, 1>;
+ def : MVE_unpred_vector_store_typed<v8f16, MVE_VSTRHU16, alignedstore16, 1>;
+ def : MVE_unpred_vector_store_typed<v4i32, MVE_VSTRWU32, alignedstore32, 2>;
+ def : MVE_unpred_vector_store_typed<v4f32, MVE_VSTRWU32, alignedstore32, 2>;
+
+ def : MVE_unpred_vector_load_typed<v16i8, MVE_VLDRBU8, load, 0>;
+ def : MVE_unpred_vector_load_typed<v8i16, MVE_VLDRHU16, alignedload16, 1>;
+ def : MVE_unpred_vector_load_typed<v8f16, MVE_VLDRHU16, alignedload16, 1>;
+ def : MVE_unpred_vector_load_typed<v4i32, MVE_VLDRWU32, alignedload32, 2>;
+ def : MVE_unpred_vector_load_typed<v4f32, MVE_VLDRWU32, alignedload32, 2>;
+}
+
+
+// Widening/Narrowing Loads/Stores
+
+let Predicates = [HasMVEInt] in {
+ def : Pat<(truncstorevi8 (v8i16 MQPR:$val), t2addrmode_imm7<1>:$addr),
+ (MVE_VSTRB16 MQPR:$val, t2addrmode_imm7<1>:$addr)>;
+ def : Pat<(truncstorevi8 (v4i32 MQPR:$val), t2addrmode_imm7<1>:$addr),
+ (MVE_VSTRB32 MQPR:$val, t2addrmode_imm7<1>:$addr)>;
+ def : Pat<(truncstorevi16 (v4i32 MQPR:$val), t2addrmode_imm7<2>:$addr),
+ (MVE_VSTRH32 MQPR:$val, t2addrmode_imm7<2>:$addr)>;
+}
+
+multiclass MVEExtLoad<string DestLanes, string DestElemBits,
+ string SrcElemBits, string SrcElemType,
+ Operand am> {
+ def _Any : Pat<(!cast<ValueType>("v" # DestLanes # "i" # DestElemBits)
+ (!cast<PatFrag>("extloadvi" # SrcElemBits) am:$addr)),
+ (!cast<Instruction>("MVE_VLDR" # SrcElemType # "U" # DestElemBits)
+ am:$addr)>;
+ def _Z : Pat<(!cast<ValueType>("v" # DestLanes # "i" # DestElemBits)
+ (!cast<PatFrag>("zextloadvi" # SrcElemBits) am:$addr)),
+ (!cast<Instruction>("MVE_VLDR" # SrcElemType # "U" # DestElemBits)
+ am:$addr)>;
+ def _S : Pat<(!cast<ValueType>("v" # DestLanes # "i" # DestElemBits)
+ (!cast<PatFrag>("sextloadvi" # SrcElemBits) am:$addr)),
+ (!cast<Instruction>("MVE_VLDR" # SrcElemType # "S" # DestElemBits)
+ am:$addr)>;
+}
+
+let Predicates = [HasMVEInt] in {
+ defm : MVEExtLoad<"4", "32", "8", "B", t2addrmode_imm7<1>>;
+ defm : MVEExtLoad<"8", "16", "8", "B", t2addrmode_imm7<1>>;
+ defm : MVEExtLoad<"4", "32", "16", "H", t2addrmode_imm7<2>>;
+}
+
+
+// Bit convert patterns
+
+let Predicates = [HasMVEInt] in {
+ def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>;
+ def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>;
+
+ def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>;
+ def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>;
+
+ def : Pat<(v8i16 (bitconvert (v8f16 QPR:$src))), (v8i16 QPR:$src)>;
+ def : Pat<(v8f16 (bitconvert (v8i16 QPR:$src))), (v8f16 QPR:$src)>;
+}
+
+let Predicates = [IsLE,HasMVEInt] in {
+ def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>;
+ def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>;
+ def : Pat<(v2f64 (bitconvert (v8f16 QPR:$src))), (v2f64 QPR:$src)>;
+ def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>;
+ def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>;
+
+ def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>;
+ def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>;
+ def : Pat<(v2i64 (bitconvert (v8f16 QPR:$src))), (v2i64 QPR:$src)>;
+ def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>;
+ def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>;
+
+ def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>;
+ def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>;
+ def : Pat<(v4f32 (bitconvert (v8f16 QPR:$src))), (v4f32 QPR:$src)>;
+ def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>;
+ def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>;
+
+ def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>;
+ def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>;
+ def : Pat<(v4i32 (bitconvert (v8f16 QPR:$src))), (v4i32 QPR:$src)>;
+ def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>;
+ def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>;
+
+ def : Pat<(v8f16 (bitconvert (v2f64 QPR:$src))), (v8f16 QPR:$src)>;
+ def : Pat<(v8f16 (bitconvert (v2i64 QPR:$src))), (v8f16 QPR:$src)>;
+ def : Pat<(v8f16 (bitconvert (v4f32 QPR:$src))), (v8f16 QPR:$src)>;
+ def : Pat<(v8f16 (bitconvert (v4i32 QPR:$src))), (v8f16 QPR:$src)>;
+ def : Pat<(v8f16 (bitconvert (v16i8 QPR:$src))), (v8f16 QPR:$src)>;
+
+ def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>;
+ def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>;
+ def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>;
+ def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>;
+ def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>;
+
+ def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>;
+ def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>;
+ def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>;
+ def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>;
+ def : Pat<(v16i8 (bitconvert (v8f16 QPR:$src))), (v16i8 QPR:$src)>;
+ def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>;
+}
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 96986e74415b..806681df102c 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -1,9 +1,8 @@
//===-- ARMInstrNEON.td - NEON support for ARM -------------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -497,45 +496,30 @@ def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVCMP>;
// Types for vector shift by immediates. The "SHX" version is for long and
// narrow operations where the source and destination vectors have different
// types. The "SHINS" version is for shift and insert operations.
-def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
- SDTCisVT<2, i32>]>;
-def SDTARMVSHX : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
- SDTCisVT<2, i32>]>;
-def SDTARMVSHINS : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
- SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>;
-
-def NEONvshl : SDNode<"ARMISD::VSHL", SDTARMVSH>;
-def NEONvshrs : SDNode<"ARMISD::VSHRs", SDTARMVSH>;
-def NEONvshru : SDNode<"ARMISD::VSHRu", SDTARMVSH>;
-def NEONvshrn : SDNode<"ARMISD::VSHRN", SDTARMVSHX>;
+def SDTARMVSHXIMM : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
+ SDTCisVT<2, i32>]>;
+def SDTARMVSHINSIMM : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>;
-def NEONvrshrs : SDNode<"ARMISD::VRSHRs", SDTARMVSH>;
-def NEONvrshru : SDNode<"ARMISD::VRSHRu", SDTARMVSH>;
-def NEONvrshrn : SDNode<"ARMISD::VRSHRN", SDTARMVSHX>;
+def NEONvshrnImm : SDNode<"ARMISD::VSHRNIMM", SDTARMVSHXIMM>;
-def NEONvqshls : SDNode<"ARMISD::VQSHLs", SDTARMVSH>;
-def NEONvqshlu : SDNode<"ARMISD::VQSHLu", SDTARMVSH>;
-def NEONvqshlsu : SDNode<"ARMISD::VQSHLsu", SDTARMVSH>;
-def NEONvqshrns : SDNode<"ARMISD::VQSHRNs", SDTARMVSHX>;
-def NEONvqshrnu : SDNode<"ARMISD::VQSHRNu", SDTARMVSHX>;
-def NEONvqshrnsu : SDNode<"ARMISD::VQSHRNsu", SDTARMVSHX>;
+def NEONvrshrsImm : SDNode<"ARMISD::VRSHRsIMM", SDTARMVSHIMM>;
+def NEONvrshruImm : SDNode<"ARMISD::VRSHRuIMM", SDTARMVSHIMM>;
+def NEONvrshrnImm : SDNode<"ARMISD::VRSHRNIMM", SDTARMVSHXIMM>;
-def NEONvqrshrns : SDNode<"ARMISD::VQRSHRNs", SDTARMVSHX>;
-def NEONvqrshrnu : SDNode<"ARMISD::VQRSHRNu", SDTARMVSHX>;
-def NEONvqrshrnsu : SDNode<"ARMISD::VQRSHRNsu", SDTARMVSHX>;
+def NEONvqshlsImm : SDNode<"ARMISD::VQSHLsIMM", SDTARMVSHIMM>;
+def NEONvqshluImm : SDNode<"ARMISD::VQSHLuIMM", SDTARMVSHIMM>;
+def NEONvqshlsuImm : SDNode<"ARMISD::VQSHLsuIMM", SDTARMVSHIMM>;
+def NEONvqshrnsImm : SDNode<"ARMISD::VQSHRNsIMM", SDTARMVSHXIMM>;
+def NEONvqshrnuImm : SDNode<"ARMISD::VQSHRNuIMM", SDTARMVSHXIMM>;
+def NEONvqshrnsuImm : SDNode<"ARMISD::VQSHRNsuIMM", SDTARMVSHXIMM>;
-def NEONvsli : SDNode<"ARMISD::VSLI", SDTARMVSHINS>;
-def NEONvsri : SDNode<"ARMISD::VSRI", SDTARMVSHINS>;
+def NEONvqrshrnsImm : SDNode<"ARMISD::VQRSHRNsIMM", SDTARMVSHXIMM>;
+def NEONvqrshrnuImm : SDNode<"ARMISD::VQRSHRNuIMM", SDTARMVSHXIMM>;
+def NEONvqrshrnsuImm : SDNode<"ARMISD::VQRSHRNsuIMM", SDTARMVSHXIMM>;
-def SDTARMVGETLN : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisInt<1>,
- SDTCisVT<2, i32>]>;
-def NEONvgetlaneu : SDNode<"ARMISD::VGETLANEu", SDTARMVGETLN>;
-def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>;
-
-def SDTARMVMOVIMM : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>;
-def NEONvmovImm : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>;
-def NEONvmvnImm : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>;
-def NEONvmovFPImm : SDNode<"ARMISD::VMOVFPIMM", SDTARMVMOVIMM>;
+def NEONvsliImm : SDNode<"ARMISD::VSLIIMM", SDTARMVSHINSIMM>;
+def NEONvsriImm : SDNode<"ARMISD::VSRIIMM", SDTARMVSHINSIMM>;
def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
SDTCisVT<2, i32>]>;
@@ -548,23 +532,10 @@ def NEONvbsl : SDNode<"ARMISD::VBSL",
SDTCisSameAs<0, 2>,
SDTCisSameAs<0, 3>]>>;
-def NEONvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>;
-
-// VDUPLANE can produce a quad-register result from a double-register source,
-// so the result is not constrained to match the source.
-def NEONvduplane : SDNode<"ARMISD::VDUPLANE",
- SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
- SDTCisVT<2, i32>]>>;
-
def SDTARMVEXT : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>;
def NEONvext : SDNode<"ARMISD::VEXT", SDTARMVEXT>;
-def SDTARMVSHUF : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
-def NEONvrev64 : SDNode<"ARMISD::VREV64", SDTARMVSHUF>;
-def NEONvrev32 : SDNode<"ARMISD::VREV32", SDTARMVSHUF>;
-def NEONvrev16 : SDNode<"ARMISD::VREV16", SDTARMVSHUF>;
-
def SDTARMVSHUF2 : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
SDTCisSameAs<0, 2>,
SDTCisSameAs<0, 3>]>;
@@ -585,14 +556,14 @@ def NEONvtbl1 : SDNode<"ARMISD::VTBL1", SDTARMVTBL1>;
def NEONvtbl2 : SDNode<"ARMISD::VTBL2", SDTARMVTBL2>;
-def NEONimmAllZerosV: PatLeaf<(NEONvmovImm (i32 timm)), [{
+def NEONimmAllZerosV: PatLeaf<(ARMvmovImm (i32 timm)), [{
ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
unsigned EltBits = 0;
uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits);
return (EltBits == 32 && EltVal == 0);
}]>;
-def NEONimmAllOnesV: PatLeaf<(NEONvmovImm (i32 timm)), [{
+def NEONimmAllOnesV: PatLeaf<(ARMvmovImm (i32 timm)), [{
ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
unsigned EltBits = 0;
uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits);
@@ -1118,6 +1089,13 @@ def VLD1LNq8Pseudo : VLD1QLNPseudo<v16i8, extloadi8>;
def VLD1LNq16Pseudo : VLD1QLNPseudo<v8i16, extloadi16>;
def VLD1LNq32Pseudo : VLD1QLNPseudo<v4i32, load>;
+let Predicates = [HasNEON] in {
+def : Pat<(vector_insert (v4f16 DPR:$src),
+ (f16 (load addrmode6:$addr)), imm:$lane),
+ (VLD1LNd16 addrmode6:$addr, DPR:$src, imm:$lane)>;
+def : Pat<(vector_insert (v8f16 QPR:$src),
+ (f16 (load addrmode6:$addr)), imm:$lane),
+ (VLD1LNq16Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
def : Pat<(vector_insert (v2f32 DPR:$src),
(f32 (load addrmode6:$addr)), imm:$lane),
(VLD1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>;
@@ -1139,6 +1117,7 @@ def : Pat<(insert_subvector undef, (v4f16 DPR:$src), (i32 0)),
(INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
def : Pat<(insert_subvector (v16i8 undef), (v8i8 DPR:$src), (i32 0)),
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
+}
let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
@@ -1404,7 +1383,7 @@ class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp,
(ins AddrMode:$Rn),
IIC_VLD1dup, "vld1", Dt, "$Vd, $Rn", "",
[(set VecListOneDAllLanes:$Vd,
- (Ty (NEONvdup (i32 (LoadOp AddrMode:$Rn)))))]>,
+ (Ty (ARMvdup (i32 (LoadOp AddrMode:$Rn)))))]>,
Sched<[WriteVLD2]> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
@@ -1417,8 +1396,10 @@ def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16,
def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load,
addrmode6dupalign32>;
-def : Pat<(v2f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
+let Predicates = [HasNEON] in {
+def : Pat<(v2f32 (ARMvdup (f32 (load addrmode6dup:$addr)))),
(VLD1DUPd32 addrmode6:$addr)>;
+}
class VLD1QDUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp,
Operand AddrMode>
@@ -1426,7 +1407,7 @@ class VLD1QDUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp,
(ins AddrMode:$Rn), IIC_VLD1dup,
"vld1", Dt, "$Vd, $Rn", "",
[(set VecListDPairAllLanes:$Vd,
- (Ty (NEONvdup (i32 (LoadOp AddrMode:$Rn)))))]> {
+ (Ty (ARMvdup (i32 (LoadOp AddrMode:$Rn)))))]> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD1DupInstruction";
@@ -1439,8 +1420,10 @@ def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16", v8i16, extloadi16,
def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32", v4i32, load,
addrmode6dupalign32>;
-def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
+let Predicates = [HasNEON] in {
+def : Pat<(v4f32 (ARMvdup (f32 (load addrmode6dup:$addr)))),
(VLD1DUPq32 addrmode6:$addr)>;
+}
let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
// ...with address register writeback:
@@ -2152,11 +2135,11 @@ class VST1QLNPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
}
def VST1LNd8 : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8,
- NEONvgetlaneu, addrmode6> {
+ ARMvgetlaneu, addrmode6> {
let Inst{7-5} = lane{2-0};
}
def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16,
- NEONvgetlaneu, addrmode6> {
+ ARMvgetlaneu, addrmode6> {
let Inst{7-6} = lane{1-0};
let Inst{4} = Rn{4};
}
@@ -2167,15 +2150,22 @@ def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt,
let Inst{5-4} = Rn{5-4};
}
-def VST1LNq8Pseudo : VST1QLNPseudo<v16i8, truncstorei8, NEONvgetlaneu>;
-def VST1LNq16Pseudo : VST1QLNPseudo<v8i16, truncstorei16, NEONvgetlaneu>;
+def VST1LNq8Pseudo : VST1QLNPseudo<v16i8, truncstorei8, ARMvgetlaneu>;
+def VST1LNq16Pseudo : VST1QLNPseudo<v8i16, truncstorei16, ARMvgetlaneu>;
def VST1LNq32Pseudo : VST1QLNPseudo<v4i32, store, extractelt>;
+let Predicates = [HasNEON] in {
def : Pat<(store (extractelt (v2f32 DPR:$src), imm:$lane), addrmode6:$addr),
(VST1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>;
def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr),
(VST1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
+def : Pat<(store (extractelt (v4f16 DPR:$src), imm:$lane), addrmode6:$addr),
+ (VST1LNd16 addrmode6:$addr, DPR:$src, imm:$lane)>;
+def : Pat<(store (extractelt (v8f16 QPR:$src), imm:$lane), addrmode6:$addr),
+ (VST1LNq16Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
+}
+
// ...with address register writeback:
class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
PatFrag StoreOp, SDNode ExtractOp, Operand AdrMode>
@@ -2196,11 +2186,11 @@ class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
}
def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8,
- NEONvgetlaneu, addrmode6> {
+ ARMvgetlaneu, addrmode6> {
let Inst{7-5} = lane{2-0};
}
def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16,
- NEONvgetlaneu, addrmode6> {
+ ARMvgetlaneu, addrmode6> {
let Inst{7-6} = lane{1-0};
let Inst{4} = Rn{4};
}
@@ -2210,8 +2200,8 @@ def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store,
let Inst{5-4} = Rn{5-4};
}
-def VST1LNq8Pseudo_UPD : VST1QLNWBPseudo<v16i8, post_truncsti8, NEONvgetlaneu>;
-def VST1LNq16Pseudo_UPD : VST1QLNWBPseudo<v8i16, post_truncsti16,NEONvgetlaneu>;
+def VST1LNq8Pseudo_UPD : VST1QLNWBPseudo<v16i8, post_truncsti8, ARMvgetlaneu>;
+def VST1LNq16Pseudo_UPD : VST1QLNWBPseudo<v8i16, post_truncsti16,ARMvgetlaneu>;
def VST1LNq32Pseudo_UPD : VST1QLNWBPseudo<v4i32, post_store, extractelt>;
let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in {
@@ -2440,37 +2430,45 @@ def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
} // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1
// Use vld1/vst1 for unaligned f64 load / store
+let Predicates = [IsLE,HasNEON] in {
def : Pat<(f64 (hword_alignedload addrmode6:$addr)),
- (VLD1d16 addrmode6:$addr)>, Requires<[IsLE]>;
+ (VLD1d16 addrmode6:$addr)>;
def : Pat<(hword_alignedstore (f64 DPR:$value), addrmode6:$addr),
- (VST1d16 addrmode6:$addr, DPR:$value)>, Requires<[IsLE]>;
+ (VST1d16 addrmode6:$addr, DPR:$value)>;
def : Pat<(f64 (byte_alignedload addrmode6:$addr)),
- (VLD1d8 addrmode6:$addr)>, Requires<[IsLE]>;
+ (VLD1d8 addrmode6:$addr)>;
def : Pat<(byte_alignedstore (f64 DPR:$value), addrmode6:$addr),
- (VST1d8 addrmode6:$addr, DPR:$value)>, Requires<[IsLE]>;
+ (VST1d8 addrmode6:$addr, DPR:$value)>;
+}
+let Predicates = [IsBE,HasNEON] in {
def : Pat<(f64 (non_word_alignedload addrmode6:$addr)),
- (VLD1d64 addrmode6:$addr)>, Requires<[IsBE]>;
+ (VLD1d64 addrmode6:$addr)>;
def : Pat<(non_word_alignedstore (f64 DPR:$value), addrmode6:$addr),
- (VST1d64 addrmode6:$addr, DPR:$value)>, Requires<[IsBE]>;
+ (VST1d64 addrmode6:$addr, DPR:$value)>;
+}
// Use vld1/vst1 for Q and QQ. Also use them for unaligned v2f64
// load / store if it's legal.
+let Predicates = [HasNEON] in {
def : Pat<(v2f64 (dword_alignedload addrmode6:$addr)),
(VLD1q64 addrmode6:$addr)>;
def : Pat<(dword_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
(VST1q64 addrmode6:$addr, QPR:$value)>;
+}
+let Predicates = [IsLE,HasNEON] in {
def : Pat<(v2f64 (word_alignedload addrmode6:$addr)),
- (VLD1q32 addrmode6:$addr)>, Requires<[IsLE]>;
+ (VLD1q32 addrmode6:$addr)>;
def : Pat<(word_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
- (VST1q32 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>;
+ (VST1q32 addrmode6:$addr, QPR:$value)>;
def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)),
- (VLD1q16 addrmode6:$addr)>, Requires<[IsLE]>;
+ (VLD1q16 addrmode6:$addr)>;
def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
- (VST1q16 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>;
+ (VST1q16 addrmode6:$addr, QPR:$value)>;
def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)),
- (VLD1q8 addrmode6:$addr)>, Requires<[IsLE]>;
+ (VLD1q8 addrmode6:$addr)>;
def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
- (VST1q8 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>;
+ (VST1q8 addrmode6:$addr, QPR:$value)>;
+}
//===----------------------------------------------------------------------===//
// NEON pattern fragments
@@ -2505,6 +2503,13 @@ def SSubReg_f32_reg : SDNodeXForm<imm, [{
MVT::i32);
}]>;
+// Extract S sub-registers of Q/D registers containing a given f16 lane.
+def SSubReg_f16_reg : SDNodeXForm<imm, [{
+ assert(ARM::ssub_3 == ARM::ssub_0+3 && "Unexpected subreg numbering");
+ return CurDAG->getTargetConstant(ARM::ssub_0 + N->getZExtValue()/2, SDLoc(N),
+ MVT::i32);
+}]>;
+
// Translate lane numbers from Q registers to D subregs.
def SubReg_i8_lane : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(N->getZExtValue() & 7, SDLoc(N), MVT::i32);
@@ -2666,7 +2671,7 @@ class N3VDSL<bits<2> op21_20, bits<4> op11_8,
NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
[(set (Ty DPR:$Vd),
(Ty (ShOp (Ty DPR:$Vn),
- (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),imm:$lane)))))]> {
+ (Ty (ARMvduplane (Ty DPR_VFP2:$Vm),imm:$lane)))))]> {
// All of these have a two-operand InstAlias.
let TwoOperandAliasConstraint = "$Vn = $Vd";
let isCommutable = 0;
@@ -2678,7 +2683,7 @@ class N3VDSL16<bits<2> op21_20, bits<4> op11_8,
NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane","",
[(set (Ty DPR:$Vd),
(Ty (ShOp (Ty DPR:$Vn),
- (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> {
+ (Ty (ARMvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> {
// All of these have a two-operand InstAlias.
let TwoOperandAliasConstraint = "$Vn = $Vd";
let isCommutable = 0;
@@ -2714,7 +2719,7 @@ class N3VQSL<bits<2> op21_20, bits<4> op11_8,
NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
[(set (ResTy QPR:$Vd),
(ResTy (ShOp (ResTy QPR:$Vn),
- (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
+ (ResTy (ARMvduplane (OpTy DPR_VFP2:$Vm),
imm:$lane)))))]> {
// All of these have a two-operand InstAlias.
let TwoOperandAliasConstraint = "$Vn = $Vd";
@@ -2727,7 +2732,7 @@ class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt,
NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane", "",
[(set (ResTy QPR:$Vd),
(ResTy (ShOp (ResTy QPR:$Vn),
- (ResTy (NEONvduplane (OpTy DPR_8:$Vm),
+ (ResTy (ARMvduplane (OpTy DPR_8:$Vm),
imm:$lane)))))]> {
// All of these have a two-operand InstAlias.
let TwoOperandAliasConstraint = "$Vn = $Vd";
@@ -2762,7 +2767,7 @@ class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
[(set (Ty DPR:$Vd),
(Ty (IntOp (Ty DPR:$Vn),
- (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),
+ (Ty (ARMvduplane (Ty DPR_VFP2:$Vm),
imm:$lane)))))]> {
let isCommutable = 0;
}
@@ -2774,7 +2779,7 @@ class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
[(set (Ty DPR:$Vd),
(Ty (IntOp (Ty DPR:$Vn),
- (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> {
+ (Ty (ARMvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> {
let isCommutable = 0;
}
class N3VDIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
@@ -2829,7 +2834,7 @@ class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
[(set (ResTy QPR:$Vd),
(ResTy (IntOp (ResTy QPR:$Vn),
- (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
+ (ResTy (ARMvduplane (OpTy DPR_VFP2:$Vm),
imm:$lane)))))]> {
let isCommutable = 0;
}
@@ -2841,7 +2846,7 @@ class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
[(set (ResTy QPR:$Vd),
(ResTy (IntOp (ResTy QPR:$Vn),
- (ResTy (NEONvduplane (OpTy DPR_8:$Vm),
+ (ResTy (ARMvduplane (OpTy DPR_8:$Vm),
imm:$lane)))))]> {
let isCommutable = 0;
}
@@ -2877,7 +2882,7 @@ class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
[(set (Ty DPR:$Vd),
(Ty (ShOp (Ty DPR:$src1),
(Ty (MulOp DPR:$Vn,
- (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),
+ (Ty (ARMvduplane (Ty DPR_VFP2:$Vm),
imm:$lane)))))))]>;
class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt,
@@ -2890,7 +2895,7 @@ class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
[(set (Ty DPR:$Vd),
(Ty (ShOp (Ty DPR:$src1),
(Ty (MulOp DPR:$Vn,
- (Ty (NEONvduplane (Ty DPR_8:$Vm),
+ (Ty (ARMvduplane (Ty DPR_8:$Vm),
imm:$lane)))))))]>;
class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
@@ -2912,7 +2917,7 @@ class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
[(set (ResTy QPR:$Vd),
(ResTy (ShOp (ResTy QPR:$src1),
(ResTy (MulOp QPR:$Vn,
- (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
+ (ResTy (ARMvduplane (OpTy DPR_VFP2:$Vm),
imm:$lane)))))))]>;
class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt,
@@ -2926,7 +2931,7 @@ class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
[(set (ResTy QPR:$Vd),
(ResTy (ShOp (ResTy QPR:$src1),
(ResTy (MulOp QPR:$Vn,
- (ResTy (NEONvduplane (OpTy DPR_8:$Vm),
+ (ResTy (ARMvduplane (OpTy DPR_8:$Vm),
imm:$lane)))))))]>;
// Neon Intrinsic-Op instructions (VABA): double- and quad-register.
@@ -2986,7 +2991,7 @@ class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8,
[(set QPR:$Vd,
(OpNode (TyQ QPR:$src1),
(TyQ (MulOp (TyD DPR:$Vn),
- (TyD (NEONvduplane (TyD DPR_VFP2:$Vm),
+ (TyD (ARMvduplane (TyD DPR_VFP2:$Vm),
imm:$lane))))))]>;
class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
InstrItinClass itin, string OpcodeStr, string Dt,
@@ -2998,7 +3003,7 @@ class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
[(set QPR:$Vd,
(OpNode (TyQ QPR:$src1),
(TyQ (MulOp (TyD DPR:$Vn),
- (TyD (NEONvduplane (TyD DPR_8:$Vm),
+ (TyD (ARMvduplane (TyD DPR_8:$Vm),
imm:$lane))))))]>;
// Long Intrinsic-Op vector operations with explicit extend (VABAL).
@@ -3034,7 +3039,7 @@ class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
[(set (ResTy QPR:$Vd),
(ResTy (IntOp (ResTy QPR:$src1),
(OpTy DPR:$Vn),
- (OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
+ (OpTy (ARMvduplane (OpTy DPR_VFP2:$Vm),
imm:$lane)))))]>;
class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8,
InstrItinClass itin, string OpcodeStr, string Dt,
@@ -3047,7 +3052,7 @@ class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8,
[(set (ResTy QPR:$Vd),
(ResTy (IntOp (ResTy QPR:$src1),
(OpTy DPR:$Vn),
- (OpTy (NEONvduplane (OpTy DPR_8:$Vm),
+ (OpTy (ARMvduplane (OpTy DPR_8:$Vm),
imm:$lane)))))]>;
// Narrowing 3-register intrinsics.
@@ -3080,7 +3085,7 @@ class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8,
NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
[(set QPR:$Vd,
(TyQ (OpNode (TyD DPR:$Vn),
- (TyD (NEONvduplane (TyD DPR_VFP2:$Vm),imm:$lane)))))]>;
+ (TyD (ARMvduplane (TyD DPR_VFP2:$Vm),imm:$lane)))))]>;
class N3VLSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType TyQ, ValueType TyD, SDNode OpNode>
@@ -3089,7 +3094,7 @@ class N3VLSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
[(set QPR:$Vd,
(TyQ (OpNode (TyD DPR:$Vn),
- (TyD (NEONvduplane (TyD DPR_8:$Vm), imm:$lane)))))]>;
+ (TyD (ARMvduplane (TyD DPR_8:$Vm), imm:$lane)))))]>;
// Long 3-register operations with explicitly extended operands.
class N3VLExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
@@ -3145,7 +3150,7 @@ class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
[(set (ResTy QPR:$Vd),
(ResTy (IntOp (OpTy DPR:$Vn),
- (OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
+ (OpTy (ARMvduplane (OpTy DPR_VFP2:$Vm),
imm:$lane)))))]>;
class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
InstrItinClass itin, string OpcodeStr, string Dt,
@@ -3155,7 +3160,7 @@ class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
[(set (ResTy QPR:$Vd),
(ResTy (IntOp (OpTy DPR:$Vn),
- (OpTy (NEONvduplane (OpTy DPR_8:$Vm),
+ (OpTy (ARMvduplane (OpTy DPR_8:$Vm),
imm:$lane)))))]>;
// Wide 3-register operations.
@@ -4087,72 +4092,72 @@ multiclass N2VShInsL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
string OpcodeStr> {
// 64-bit vector types.
def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
- N2RegVShLFrm, OpcodeStr, "8", v8i8, NEONvsli> {
+ N2RegVShLFrm, OpcodeStr, "8", v8i8, NEONvsliImm> {
let Inst{21-19} = 0b001; // imm6 = 001xxx
}
def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
- N2RegVShLFrm, OpcodeStr, "16", v4i16, NEONvsli> {
+ N2RegVShLFrm, OpcodeStr, "16", v4i16, NEONvsliImm> {
let Inst{21-20} = 0b01; // imm6 = 01xxxx
}
def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
- N2RegVShLFrm, OpcodeStr, "32", v2i32, NEONvsli> {
+ N2RegVShLFrm, OpcodeStr, "32", v2i32, NEONvsliImm> {
let Inst{21} = 0b1; // imm6 = 1xxxxx
}
def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, i32imm,
- N2RegVShLFrm, OpcodeStr, "64", v1i64, NEONvsli>;
+ N2RegVShLFrm, OpcodeStr, "64", v1i64, NEONvsliImm>;
// imm6 = xxxxxx
// 128-bit vector types.
def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
- N2RegVShLFrm, OpcodeStr, "8", v16i8, NEONvsli> {
+ N2RegVShLFrm, OpcodeStr, "8", v16i8, NEONvsliImm> {
let Inst{21-19} = 0b001; // imm6 = 001xxx
}
def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
- N2RegVShLFrm, OpcodeStr, "16", v8i16, NEONvsli> {
+ N2RegVShLFrm, OpcodeStr, "16", v8i16, NEONvsliImm> {
let Inst{21-20} = 0b01; // imm6 = 01xxxx
}
def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
- N2RegVShLFrm, OpcodeStr, "32", v4i32, NEONvsli> {
+ N2RegVShLFrm, OpcodeStr, "32", v4i32, NEONvsliImm> {
let Inst{21} = 0b1; // imm6 = 1xxxxx
}
def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, i32imm,
- N2RegVShLFrm, OpcodeStr, "64", v2i64, NEONvsli>;
+ N2RegVShLFrm, OpcodeStr, "64", v2i64, NEONvsliImm>;
// imm6 = xxxxxx
}
multiclass N2VShInsR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
string OpcodeStr> {
// 64-bit vector types.
def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm8,
- N2RegVShRFrm, OpcodeStr, "8", v8i8, NEONvsri> {
+ N2RegVShRFrm, OpcodeStr, "8", v8i8, NEONvsriImm> {
let Inst{21-19} = 0b001; // imm6 = 001xxx
}
def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm16,
- N2RegVShRFrm, OpcodeStr, "16", v4i16, NEONvsri> {
+ N2RegVShRFrm, OpcodeStr, "16", v4i16, NEONvsriImm> {
let Inst{21-20} = 0b01; // imm6 = 01xxxx
}
def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm32,
- N2RegVShRFrm, OpcodeStr, "32", v2i32, NEONvsri> {
+ N2RegVShRFrm, OpcodeStr, "32", v2i32, NEONvsriImm> {
let Inst{21} = 0b1; // imm6 = 1xxxxx
}
def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, shr_imm64,
- N2RegVShRFrm, OpcodeStr, "64", v1i64, NEONvsri>;
+ N2RegVShRFrm, OpcodeStr, "64", v1i64, NEONvsriImm>;
// imm6 = xxxxxx
// 128-bit vector types.
def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm8,
- N2RegVShRFrm, OpcodeStr, "8", v16i8, NEONvsri> {
+ N2RegVShRFrm, OpcodeStr, "8", v16i8, NEONvsriImm> {
let Inst{21-19} = 0b001; // imm6 = 001xxx
}
def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm16,
- N2RegVShRFrm, OpcodeStr, "16", v8i16, NEONvsri> {
+ N2RegVShRFrm, OpcodeStr, "16", v8i16, NEONvsriImm> {
let Inst{21-20} = 0b01; // imm6 = 01xxxx
}
def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm32,
- N2RegVShRFrm, OpcodeStr, "32", v4i32, NEONvsri> {
+ N2RegVShRFrm, OpcodeStr, "32", v4i32, NEONvsriImm> {
let Inst{21} = 0b1; // imm6 = 1xxxxx
}
def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, shr_imm64,
- N2RegVShRFrm, OpcodeStr, "64", v2i64, NEONvsri>;
+ N2RegVShRFrm, OpcodeStr, "64", v2i64, NEONvsriImm>;
// imm6 = xxxxxx
}
@@ -4251,12 +4256,14 @@ defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i", null_frag, 1>;
defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i",
int_arm_neon_vraddhn, 1>;
-def : Pat<(v8i8 (trunc (NEONvshru (add (v8i16 QPR:$Vn), QPR:$Vm), 8))),
+let Predicates = [HasNEON] in {
+def : Pat<(v8i8 (trunc (ARMvshruImm (add (v8i16 QPR:$Vn), QPR:$Vm), 8))),
(VADDHNv8i8 QPR:$Vn, QPR:$Vm)>;
-def : Pat<(v4i16 (trunc (NEONvshru (add (v4i32 QPR:$Vn), QPR:$Vm), 16))),
+def : Pat<(v4i16 (trunc (ARMvshruImm (add (v4i32 QPR:$Vn), QPR:$Vm), 16))),
(VADDHNv4i16 QPR:$Vn, QPR:$Vm)>;
-def : Pat<(v2i32 (trunc (NEONvshru (add (v2i64 QPR:$Vn), QPR:$Vm), 32))),
+def : Pat<(v2i32 (trunc (ARMvshruImm (add (v2i64 QPR:$Vn), QPR:$Vm), 32))),
(VADDHNv2i32 QPR:$Vn, QPR:$Vm)>;
+}
// Vector Multiply Operations.
@@ -4287,47 +4294,49 @@ def VMULslhq : N3VQSL16<0b01, 0b1001, "vmul", "f16", v8f16,
v4f16, fmul>,
Requires<[HasNEON,HasFullFP16]>;
+let Predicates = [HasNEON] in {
def : Pat<(v8i16 (mul (v8i16 QPR:$src1),
- (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))),
+ (v8i16 (ARMvduplane (v8i16 QPR:$src2), imm:$lane)))),
(v8i16 (VMULslv8i16 (v8i16 QPR:$src1),
(v4i16 (EXTRACT_SUBREG QPR:$src2,
(DSubReg_i16_reg imm:$lane))),
(SubReg_i16_lane imm:$lane)))>;
def : Pat<(v4i32 (mul (v4i32 QPR:$src1),
- (v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))),
+ (v4i32 (ARMvduplane (v4i32 QPR:$src2), imm:$lane)))),
(v4i32 (VMULslv4i32 (v4i32 QPR:$src1),
(v2i32 (EXTRACT_SUBREG QPR:$src2,
(DSubReg_i32_reg imm:$lane))),
(SubReg_i32_lane imm:$lane)))>;
def : Pat<(v4f32 (fmul (v4f32 QPR:$src1),
- (v4f32 (NEONvduplane (v4f32 QPR:$src2), imm:$lane)))),
+ (v4f32 (ARMvduplane (v4f32 QPR:$src2), imm:$lane)))),
(v4f32 (VMULslfq (v4f32 QPR:$src1),
(v2f32 (EXTRACT_SUBREG QPR:$src2,
(DSubReg_i32_reg imm:$lane))),
(SubReg_i32_lane imm:$lane)))>;
def : Pat<(v8f16 (fmul (v8f16 QPR:$src1),
- (v8f16 (NEONvduplane (v8f16 QPR:$src2), imm:$lane)))),
+ (v8f16 (ARMvduplane (v8f16 QPR:$src2), imm:$lane)))),
(v8f16 (VMULslhq(v8f16 QPR:$src1),
(v4f16 (EXTRACT_SUBREG QPR:$src2,
(DSubReg_i16_reg imm:$lane))),
(SubReg_i16_lane imm:$lane)))>;
-def : Pat<(v2f32 (fmul DPR:$Rn, (NEONvdup (f32 SPR:$Rm)))),
+def : Pat<(v2f32 (fmul DPR:$Rn, (ARMvdup (f32 SPR:$Rm)))),
(VMULslfd DPR:$Rn,
(INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0),
(i32 0))>;
-def : Pat<(v4f16 (fmul DPR:$Rn, (NEONvdup (f16 HPR:$Rm)))),
+def : Pat<(v4f16 (fmul DPR:$Rn, (ARMvdup (f16 HPR:$Rm)))),
(VMULslhd DPR:$Rn,
(INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), HPR:$Rm, ssub_0),
(i32 0))>;
-def : Pat<(v4f32 (fmul QPR:$Rn, (NEONvdup (f32 SPR:$Rm)))),
+def : Pat<(v4f32 (fmul QPR:$Rn, (ARMvdup (f32 SPR:$Rm)))),
(VMULslfq QPR:$Rn,
(INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0),
(i32 0))>;
-def : Pat<(v8f16 (fmul QPR:$Rn, (NEONvdup (f16 HPR:$Rm)))),
+def : Pat<(v8f16 (fmul QPR:$Rn, (ARMvdup (f16 HPR:$Rm)))),
(VMULslhq QPR:$Rn,
(INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), HPR:$Rm, ssub_0),
(i32 0))>;
+}
// VQDMULH : Vector Saturating Doubling Multiply Returning High Half
defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D,
@@ -4336,20 +4345,23 @@ defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D,
defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D,
IIC_VMULi16Q, IIC_VMULi32Q,
"vqdmulh", "s", int_arm_neon_vqdmulh>;
+
+let Predicates = [HasNEON] in {
def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1),
- (v8i16 (NEONvduplane (v8i16 QPR:$src2),
+ (v8i16 (ARMvduplane (v8i16 QPR:$src2),
imm:$lane)))),
(v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1),
(v4i16 (EXTRACT_SUBREG QPR:$src2,
(DSubReg_i16_reg imm:$lane))),
(SubReg_i16_lane imm:$lane)))>;
def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1),
- (v4i32 (NEONvduplane (v4i32 QPR:$src2),
+ (v4i32 (ARMvduplane (v4i32 QPR:$src2),
imm:$lane)))),
(v4i32 (VQDMULHslv4i32 (v4i32 QPR:$src1),
(v2i32 (EXTRACT_SUBREG QPR:$src2,
(DSubReg_i32_reg imm:$lane))),
(SubReg_i32_lane imm:$lane)))>;
+}
// VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half
defm VQRDMULH : N3VInt_HS<1, 0, 0b1011, 0, N3RegFrm,
@@ -4358,20 +4370,23 @@ defm VQRDMULH : N3VInt_HS<1, 0, 0b1011, 0, N3RegFrm,
defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D,
IIC_VMULi16Q, IIC_VMULi32Q,
"vqrdmulh", "s", int_arm_neon_vqrdmulh>;
+
+let Predicates = [HasNEON] in {
def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1),
- (v8i16 (NEONvduplane (v8i16 QPR:$src2),
+ (v8i16 (ARMvduplane (v8i16 QPR:$src2),
imm:$lane)))),
(v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1),
(v4i16 (EXTRACT_SUBREG QPR:$src2,
(DSubReg_i16_reg imm:$lane))),
(SubReg_i16_lane imm:$lane)))>;
def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1),
- (v4i32 (NEONvduplane (v4i32 QPR:$src2),
+ (v4i32 (ARMvduplane (v4i32 QPR:$src2),
imm:$lane)))),
(v4i32 (VQRDMULHslv4i32 (v4i32 QPR:$src1),
(v2i32 (EXTRACT_SUBREG QPR:$src2,
(DSubReg_i32_reg imm:$lane))),
(SubReg_i32_lane imm:$lane)))>;
+}
// VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D)
let PostEncoderMethod = "NEONThumb2DataIPostEncoder",
@@ -4427,9 +4442,10 @@ def VMLAslhq : N3VQMulOpSL16<0b01, 0b0001, IIC_VMACQ, "vmla", "f16",
v8f16, v4f16, fmul, fadd>,
Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
+let Predicates = [HasNEON] in {
def : Pat<(v8i16 (add (v8i16 QPR:$src1),
(mul (v8i16 QPR:$src2),
- (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))),
+ (v8i16 (ARMvduplane (v8i16 QPR:$src3), imm:$lane))))),
(v8i16 (VMLAslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2),
(v4i16 (EXTRACT_SUBREG QPR:$src3,
(DSubReg_i16_reg imm:$lane))),
@@ -4437,15 +4453,16 @@ def : Pat<(v8i16 (add (v8i16 QPR:$src1),
def : Pat<(v4i32 (add (v4i32 QPR:$src1),
(mul (v4i32 QPR:$src2),
- (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))),
+ (v4i32 (ARMvduplane (v4i32 QPR:$src3), imm:$lane))))),
(v4i32 (VMLAslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2),
(v2i32 (EXTRACT_SUBREG QPR:$src3,
(DSubReg_i32_reg imm:$lane))),
(SubReg_i32_lane imm:$lane)))>;
+}
def : Pat<(v4f32 (fadd_mlx (v4f32 QPR:$src1),
(fmul_su (v4f32 QPR:$src2),
- (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))),
+ (v4f32 (ARMvduplane (v4f32 QPR:$src3), imm:$lane))))),
(v4f32 (VMLAslfq (v4f32 QPR:$src1),
(v4f32 QPR:$src2),
(v2f32 (EXTRACT_SUBREG QPR:$src3,
@@ -4497,7 +4514,7 @@ let Predicates = [HasNEON, HasV8_1a] in {
(v4i16 DPR:$src1),
(v4i16 (int_arm_neon_vqrdmulh
(v4i16 DPR:$Vn),
- (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm),
+ (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm),
imm:$lane)))))),
(v4i16 (VQRDMLAHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm,
imm:$lane))>;
@@ -4505,7 +4522,7 @@ let Predicates = [HasNEON, HasV8_1a] in {
(v2i32 DPR:$src1),
(v2i32 (int_arm_neon_vqrdmulh
(v2i32 DPR:$Vn),
- (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm),
+ (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm),
imm:$lane)))))),
(v2i32 (VQRDMLAHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm,
imm:$lane))>;
@@ -4513,7 +4530,7 @@ let Predicates = [HasNEON, HasV8_1a] in {
(v8i16 QPR:$src1),
(v8i16 (int_arm_neon_vqrdmulh
(v8i16 QPR:$src2),
- (v8i16 (NEONvduplane (v8i16 QPR:$src3),
+ (v8i16 (ARMvduplane (v8i16 QPR:$src3),
imm:$lane)))))),
(v8i16 (VQRDMLAHslv8i16 (v8i16 QPR:$src1),
(v8i16 QPR:$src2),
@@ -4525,7 +4542,7 @@ let Predicates = [HasNEON, HasV8_1a] in {
(v4i32 QPR:$src1),
(v4i32 (int_arm_neon_vqrdmulh
(v4i32 QPR:$src2),
- (v4i32 (NEONvduplane (v4i32 QPR:$src3),
+ (v4i32 (ARMvduplane (v4i32 QPR:$src3),
imm:$lane)))))),
(v4i32 (VQRDMLAHslv4i32 (v4i32 QPR:$src1),
(v4i32 QPR:$src2),
@@ -4567,14 +4584,14 @@ let Predicates = [HasNEON, HasV8_1a] in {
(v4i16 DPR:$src1),
(v4i16 (int_arm_neon_vqrdmulh
(v4i16 DPR:$Vn),
- (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm),
+ (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm),
imm:$lane)))))),
(v4i16 (VQRDMLSHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane))>;
def : Pat<(v2i32 (int_arm_neon_vqsubs
(v2i32 DPR:$src1),
(v2i32 (int_arm_neon_vqrdmulh
(v2i32 DPR:$Vn),
- (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm),
+ (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm),
imm:$lane)))))),
(v2i32 (VQRDMLSHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm,
imm:$lane))>;
@@ -4582,7 +4599,7 @@ let Predicates = [HasNEON, HasV8_1a] in {
(v8i16 QPR:$src1),
(v8i16 (int_arm_neon_vqrdmulh
(v8i16 QPR:$src2),
- (v8i16 (NEONvduplane (v8i16 QPR:$src3),
+ (v8i16 (ARMvduplane (v8i16 QPR:$src3),
imm:$lane)))))),
(v8i16 (VQRDMLSHslv8i16 (v8i16 QPR:$src1),
(v8i16 QPR:$src2),
@@ -4594,7 +4611,7 @@ let Predicates = [HasNEON, HasV8_1a] in {
(v4i32 QPR:$src1),
(v4i32 (int_arm_neon_vqrdmulh
(v4i32 QPR:$src2),
- (v4i32 (NEONvduplane (v4i32 QPR:$src3),
+ (v4i32 (ARMvduplane (v4i32 QPR:$src3),
imm:$lane)))))),
(v4i32 (VQRDMLSHslv4i32 (v4i32 QPR:$src1),
(v4i32 QPR:$src2),
@@ -4608,6 +4625,7 @@ defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
"vqdmlal", "s", null_frag>;
defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", null_frag>;
+let Predicates = [HasNEON] in {
def : Pat<(v4i32 (int_arm_neon_vqadds (v4i32 QPR:$src1),
(v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
(v4i16 DPR:$Vm))))),
@@ -4618,14 +4636,15 @@ def : Pat<(v2i64 (int_arm_neon_vqadds (v2i64 QPR:$src1),
(VQDMLALv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
def : Pat<(v4i32 (int_arm_neon_vqadds (v4i32 QPR:$src1),
(v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
- (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm),
+ (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm),
imm:$lane)))))),
(VQDMLALslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>;
def : Pat<(v2i64 (int_arm_neon_vqadds (v2i64 QPR:$src1),
(v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
- (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm),
+ (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm),
imm:$lane)))))),
(VQDMLALslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>;
+}
// VMLS : Vector Multiply Subtract (integer and floating-point)
defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
@@ -4657,9 +4676,10 @@ def VMLSslhq : N3VQMulOpSL16<0b01, 0b0101, IIC_VMACQ, "vmls", "f16",
v8f16, v4f16, fmul, fsub>,
Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
+let Predicates = [HasNEON] in {
def : Pat<(v8i16 (sub (v8i16 QPR:$src1),
(mul (v8i16 QPR:$src2),
- (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))),
+ (v8i16 (ARMvduplane (v8i16 QPR:$src3), imm:$lane))))),
(v8i16 (VMLSslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2),
(v4i16 (EXTRACT_SUBREG QPR:$src3,
(DSubReg_i16_reg imm:$lane))),
@@ -4667,15 +4687,16 @@ def : Pat<(v8i16 (sub (v8i16 QPR:$src1),
def : Pat<(v4i32 (sub (v4i32 QPR:$src1),
(mul (v4i32 QPR:$src2),
- (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))),
+ (v4i32 (ARMvduplane (v4i32 QPR:$src3), imm:$lane))))),
(v4i32 (VMLSslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2),
(v2i32 (EXTRACT_SUBREG QPR:$src3,
(DSubReg_i32_reg imm:$lane))),
(SubReg_i32_lane imm:$lane)))>;
+}
def : Pat<(v4f32 (fsub_mlx (v4f32 QPR:$src1),
(fmul_su (v4f32 QPR:$src2),
- (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))),
+ (v4f32 (ARMvduplane (v4f32 QPR:$src3), imm:$lane))))),
(v4f32 (VMLSslfq (v4f32 QPR:$src1), (v4f32 QPR:$src2),
(v2f32 (EXTRACT_SUBREG QPR:$src3,
(DSubReg_i32_reg imm:$lane))),
@@ -4696,6 +4717,7 @@ defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D,
"vqdmlsl", "s", null_frag>;
defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b0111, "vqdmlsl", "s", null_frag>;
+let Predicates = [HasNEON] in {
def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1),
(v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
(v4i16 DPR:$Vm))))),
@@ -4706,14 +4728,15 @@ def : Pat<(v2i64 (int_arm_neon_vqsubs (v2i64 QPR:$src1),
(VQDMLSLv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1),
(v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
- (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm),
+ (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm),
imm:$lane)))))),
(VQDMLSLslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>;
def : Pat<(v2i64 (int_arm_neon_vqsubs (v2i64 QPR:$src1),
(v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
- (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm),
+ (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm),
imm:$lane)))))),
(VQDMLSLslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>;
+}
// Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations.
def VFMAfd : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32",
@@ -4754,16 +4777,16 @@ def : Pat<(v8f16 (fma QPR:$Vn, QPR:$Vm, QPR:$src1)),
Requires<[HasNEON,HasFullFP16]>;
def : Pat<(v2f32 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)),
(VFMAfd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
- Requires<[HasVFP4]>;
+ Requires<[HasNEON,HasVFP4]>;
def : Pat<(v4f32 (fma QPR:$Vn, QPR:$Vm, QPR:$src1)),
(VFMAfq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
- Requires<[HasVFP4]>;
+ Requires<[HasNEON,HasVFP4]>;
def : Pat<(v2f32 (fma (fneg DPR:$Vn), DPR:$Vm, DPR:$src1)),
(VFMSfd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
- Requires<[HasVFP4]>;
+ Requires<[HasNEON,HasVFP4]>;
def : Pat<(v4f32 (fma (fneg QPR:$Vn), QPR:$Vm, QPR:$src1)),
(VFMSfq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
- Requires<[HasVFP4]>;
+ Requires<[HasNEON,HasVFP4]>;
// ARMv8.2a dot product instructions.
// We put them in the VFPV8 decoder namespace because the ARM and Thumb
@@ -4808,7 +4831,7 @@ multiclass DOTI<string opc, string dt, bit Q, bit U, RegisterClass Ty,
(AccumType (OpNode (AccumType Ty:$Vd),
(InputType Ty:$Vn),
(InputType (bitconvert (AccumType
- (NEONvduplane (AccumType Ty:$Vm),
+ (ARMvduplane (AccumType Ty:$Vm),
VectorIndex32:$lane)))))),
(!cast<Instruction>(NAME) Ty:$Vd, Ty:$Vn, RHS, VectorIndex32:$lane)>;
}
@@ -4991,12 +5014,14 @@ defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", null_frag, 0>;
defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i",
int_arm_neon_vrsubhn, 0>;
-def : Pat<(v8i8 (trunc (NEONvshru (sub (v8i16 QPR:$Vn), QPR:$Vm), 8))),
+let Predicates = [HasNEON] in {
+def : Pat<(v8i8 (trunc (ARMvshruImm (sub (v8i16 QPR:$Vn), QPR:$Vm), 8))),
(VSUBHNv8i8 QPR:$Vn, QPR:$Vm)>;
-def : Pat<(v4i16 (trunc (NEONvshru (sub (v4i32 QPR:$Vn), QPR:$Vm), 16))),
+def : Pat<(v4i16 (trunc (ARMvshruImm (sub (v4i32 QPR:$Vn), QPR:$Vm), 16))),
(VSUBHNv4i16 QPR:$Vn, QPR:$Vm)>;
-def : Pat<(v2i32 (trunc (NEONvshru (sub (v2i64 QPR:$Vn), QPR:$Vm), 32))),
+def : Pat<(v2i32 (trunc (ARMvshruImm (sub (v2i64 QPR:$Vn), QPR:$Vm), 32))),
(VSUBHNv2i32 QPR:$Vn, QPR:$Vm)>;
+}
// Vector Comparisons.
@@ -5122,10 +5147,11 @@ class N3VCP8F16Q0<string asm, RegisterClass Td, RegisterClass Tn,
: N3VCP8Q0<op1, op2, 0, op3, (outs Td:$Vd), (ins Tn:$Vn, Tm:$Vm), NoItinerary,
asm, "f16", "$Vd, $Vn, $Vm", "", []>;
-class VFMQ0<string opc, bits<2> S>
+// Vd, Vs, Vs[0-15], Idx[0-1]
+class VFMD<string opc, string type, bits<2> S>
: N3VLaneCP8<0, S, 0, 1, (outs DPR:$Vd),
- (ins SPR:$Vn, SPR:$Vm, VectorIndex32:$idx),
- IIC_VMACD, opc, "f16", "$Vd, $Vn, $Vm$idx", "", []> {
+ (ins SPR:$Vn, SPR_8:$Vm, VectorIndex32:$idx),
+ IIC_VMACD, opc, type, "$Vd, $Vn, $Vm$idx", "", []> {
bit idx;
let Inst{3} = idx;
let Inst{19-16} = Vn{4-1};
@@ -5134,10 +5160,11 @@ class VFMQ0<string opc, bits<2> S>
let Inst{2-0} = Vm{3-1};
}
-class VFMQ1<string opc, bits<2> S>
+// Vq, Vd, Vd[0-7], Idx[0-3]
+class VFMQ<string opc, string type, bits<2> S>
: N3VLaneCP8<0, S, 1, 1, (outs QPR:$Vd),
- (ins DPR:$Vn, DPR:$Vm, VectorIndex16:$idx),
- IIC_VMACD, opc, "f16", "$Vd, $Vn, $Vm$idx", "", []> {
+ (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$idx),
+ IIC_VMACD, opc, type, "$Vd, $Vn, $Vm$idx", "", []> {
bits<2> idx;
let Inst{5} = idx{1};
let Inst{3} = idx{0};
@@ -5149,10 +5176,10 @@ def VFMALD : N3VCP8F16Q0<"vfmal", DPR, SPR, SPR, 0b00, 0b10, 1>;
def VFMSLD : N3VCP8F16Q0<"vfmsl", DPR, SPR, SPR, 0b01, 0b10, 1>;
def VFMALQ : N3VCP8F16Q1<"vfmal", QPR, DPR, DPR, 0b00, 0b10, 1>;
def VFMSLQ : N3VCP8F16Q1<"vfmsl", QPR, DPR, DPR, 0b01, 0b10, 1>;
-def VFMALDI : VFMQ0<"vfmal", 0b00>;
-def VFMSLDI : VFMQ0<"vfmsl", 0b01>;
-def VFMALQI : VFMQ1<"vfmal", 0b00>;
-def VFMSLQI : VFMQ1<"vfmsl", 0b01>;
+def VFMALDI : VFMD<"vfmal", "f16", 0b00>;
+def VFMSLDI : VFMD<"vfmsl", "f16", 0b01>;
+def VFMALQI : VFMQ<"vfmal", "f16", 0b00>;
+def VFMSLQI : VFMQ<"vfmsl", "f16", 0b01>;
}
} // HasNEON, HasFP16FML
@@ -5308,28 +5335,28 @@ let isReMaterializable = 1 in {
def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$Vd),
(ins nImmSplatI16:$SIMM), IIC_VMOVImm,
"vmvn", "i16", "$Vd, $SIMM", "",
- [(set DPR:$Vd, (v4i16 (NEONvmvnImm timm:$SIMM)))]> {
+ [(set DPR:$Vd, (v4i16 (ARMvmvnImm timm:$SIMM)))]> {
let Inst{9} = SIMM{9};
}
def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$Vd),
(ins nImmSplatI16:$SIMM), IIC_VMOVImm,
"vmvn", "i16", "$Vd, $SIMM", "",
- [(set QPR:$Vd, (v8i16 (NEONvmvnImm timm:$SIMM)))]> {
+ [(set QPR:$Vd, (v8i16 (ARMvmvnImm timm:$SIMM)))]> {
let Inst{9} = SIMM{9};
}
def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$Vd),
(ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
"vmvn", "i32", "$Vd, $SIMM", "",
- [(set DPR:$Vd, (v2i32 (NEONvmvnImm timm:$SIMM)))]> {
+ [(set DPR:$Vd, (v2i32 (ARMvmvnImm timm:$SIMM)))]> {
let Inst{11-8} = SIMM{11-8};
}
def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$Vd),
(ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
"vmvn", "i32", "$Vd, $SIMM", "",
- [(set QPR:$Vd, (v4i32 (NEONvmvnImm timm:$SIMM)))]> {
+ [(set QPR:$Vd, (v4i32 (ARMvmvnImm timm:$SIMM)))]> {
let Inst{11-8} = SIMM{11-8};
}
}
@@ -5343,8 +5370,10 @@ def VMVNq : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0,
(outs QPR:$Vd), (ins QPR:$Vm), IIC_VSUBiD,
"vmvn", "$Vd, $Vm", "",
[(set QPR:$Vd, (v4i32 (vnotq QPR:$Vm)))]>;
+let Predicates = [HasNEON] in {
def : Pat<(v2i32 (vnotd DPR:$src)), (VMVNd DPR:$src)>;
def : Pat<(v4i32 (vnotq QPR:$src)), (VMVNq QPR:$src)>;
+}
// VBSL : Vector Bitwise Select
def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
@@ -5353,36 +5382,31 @@ def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
"vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
[(set DPR:$Vd,
(v2i32 (NEONvbsl DPR:$src1, DPR:$Vn, DPR:$Vm)))]>;
+let Predicates = [HasNEON] in {
def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 DPR:$src1),
(v8i8 DPR:$Vn), (v8i8 DPR:$Vm))),
- (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
- Requires<[HasNEON]>;
+ (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 DPR:$src1),
(v4i16 DPR:$Vn), (v4i16 DPR:$Vm))),
- (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
- Requires<[HasNEON]>;
+ (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 DPR:$src1),
(v2i32 DPR:$Vn), (v2i32 DPR:$Vm))),
- (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
- Requires<[HasNEON]>;
+ (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 DPR:$src1),
(v2f32 DPR:$Vn), (v2f32 DPR:$Vm))),
- (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
- Requires<[HasNEON]>;
+ (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 DPR:$src1),
(v1i64 DPR:$Vn), (v1i64 DPR:$Vm))),
- (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
- Requires<[HasNEON]>;
+ (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd),
(and DPR:$Vm, (vnotd DPR:$Vd)))),
- (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>,
- Requires<[HasNEON]>;
+ (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>;
def : Pat<(v1i64 (or (and DPR:$Vn, DPR:$Vd),
(and DPR:$Vm, (vnotd DPR:$Vd)))),
- (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>,
- Requires<[HasNEON]>;
+ (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>;
+}
def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
(ins QPR:$src1, QPR:$Vn, QPR:$Vm),
@@ -5391,35 +5415,30 @@ def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
[(set QPR:$Vd,
(v4i32 (NEONvbsl QPR:$src1, QPR:$Vn, QPR:$Vm)))]>;
+let Predicates = [HasNEON] in {
def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 QPR:$src1),
(v16i8 QPR:$Vn), (v16i8 QPR:$Vm))),
- (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
- Requires<[HasNEON]>;
+ (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 QPR:$src1),
(v8i16 QPR:$Vn), (v8i16 QPR:$Vm))),
- (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
- Requires<[HasNEON]>;
+ (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 QPR:$src1),
(v4i32 QPR:$Vn), (v4i32 QPR:$Vm))),
- (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
- Requires<[HasNEON]>;
+ (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 QPR:$src1),
(v4f32 QPR:$Vn), (v4f32 QPR:$Vm))),
- (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
- Requires<[HasNEON]>;
+ (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 QPR:$src1),
(v2i64 QPR:$Vn), (v2i64 QPR:$Vm))),
- (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
- Requires<[HasNEON]>;
+ (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd),
(and QPR:$Vm, (vnotq QPR:$Vd)))),
- (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>,
- Requires<[HasNEON]>;
+ (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>;
def : Pat<(v2i64 (or (and QPR:$Vn, QPR:$Vd),
(and QPR:$Vm, (vnotq QPR:$Vd)))),
- (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>,
- Requires<[HasNEON]>;
+ (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>;
+}
// VBIF : Vector Bitwise Insert if False
// like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst",
@@ -5479,24 +5498,28 @@ defm VABDLs : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q,
defm VABDLu : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q,
"vabdl", "u", int_arm_neon_vabdu, zext, 1>;
+let Predicates = [HasNEON] in {
def : Pat<(v8i16 (abs (sub (zext (v8i8 DPR:$opA)), (zext (v8i8 DPR:$opB))))),
(VABDLuv8i16 DPR:$opA, DPR:$opB)>;
def : Pat<(v4i32 (abs (sub (zext (v4i16 DPR:$opA)), (zext (v4i16 DPR:$opB))))),
(VABDLuv4i32 DPR:$opA, DPR:$opB)>;
+}
// ISD::ABS is not legal for v2i64, so VABDL needs to be matched from the
// shift/xor pattern for ABS.
def abd_shr :
PatFrag<(ops node:$in1, node:$in2, node:$shift),
- (NEONvshrs (sub (zext node:$in1),
+ (ARMvshrsImm (sub (zext node:$in1),
(zext node:$in2)), (i32 $shift))>;
+let Predicates = [HasNEON] in {
def : Pat<(xor (v4i32 (bitconvert (v2i64 (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)))),
(v4i32 (bitconvert (v2i64 (add (sub (zext (v2i32 DPR:$opA)),
(zext (v2i32 DPR:$opB))),
(abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)))))),
(VABDLuv2i64 DPR:$opA, DPR:$opB)>;
+}
// VABA : Vector Absolute Difference and Accumulate
defm VABAs : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
@@ -5536,22 +5559,22 @@ def VMAXhq : N3VQInt<0, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VBINQ,
// VMAXNM
let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
- def VMAXNMNDf : N3VDIntnp<0b00110, 0b00, 0b1111, 0, 1,
- N3RegFrm, NoItinerary, "vmaxnm", "f32",
- v2f32, v2f32, fmaxnum, 1>,
- Requires<[HasV8, HasNEON]>;
- def VMAXNMNQf : N3VQIntnp<0b00110, 0b00, 0b1111, 1, 1,
- N3RegFrm, NoItinerary, "vmaxnm", "f32",
- v4f32, v4f32, fmaxnum, 1>,
- Requires<[HasV8, HasNEON]>;
- def VMAXNMNDh : N3VDIntnp<0b00110, 0b01, 0b1111, 0, 1,
- N3RegFrm, NoItinerary, "vmaxnm", "f16",
- v4f16, v4f16, fmaxnum, 1>,
- Requires<[HasV8, HasNEON, HasFullFP16]>;
- def VMAXNMNQh : N3VQIntnp<0b00110, 0b01, 0b1111, 1, 1,
- N3RegFrm, NoItinerary, "vmaxnm", "f16",
- v8f16, v8f16, fmaxnum, 1>,
- Requires<[HasV8, HasNEON, HasFullFP16]>;
+ def NEON_VMAXNMNDf : N3VDIntnp<0b00110, 0b00, 0b1111, 0, 1,
+ N3RegFrm, NoItinerary, "vmaxnm", "f32",
+ v2f32, v2f32, fmaxnum, 1>,
+ Requires<[HasV8, HasNEON]>;
+ def NEON_VMAXNMNQf : N3VQIntnp<0b00110, 0b00, 0b1111, 1, 1,
+ N3RegFrm, NoItinerary, "vmaxnm", "f32",
+ v4f32, v4f32, fmaxnum, 1>,
+ Requires<[HasV8, HasNEON]>;
+ def NEON_VMAXNMNDh : N3VDIntnp<0b00110, 0b01, 0b1111, 0, 1,
+ N3RegFrm, NoItinerary, "vmaxnm", "f16",
+ v4f16, v4f16, fmaxnum, 1>,
+ Requires<[HasV8, HasNEON, HasFullFP16]>;
+ def NEON_VMAXNMNQh : N3VQIntnp<0b00110, 0b01, 0b1111, 1, 1,
+ N3RegFrm, NoItinerary, "vmaxnm", "f16",
+ v8f16, v8f16, fmaxnum, 1>,
+ Requires<[HasV8, HasNEON, HasFullFP16]>;
}
// VMIN : Vector Minimum
@@ -5578,22 +5601,22 @@ def VMINhq : N3VQInt<0, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VBINQ,
// VMINNM
let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
- def VMINNMNDf : N3VDIntnp<0b00110, 0b10, 0b1111, 0, 1,
- N3RegFrm, NoItinerary, "vminnm", "f32",
- v2f32, v2f32, fminnum, 1>,
- Requires<[HasV8, HasNEON]>;
- def VMINNMNQf : N3VQIntnp<0b00110, 0b10, 0b1111, 1, 1,
- N3RegFrm, NoItinerary, "vminnm", "f32",
- v4f32, v4f32, fminnum, 1>,
- Requires<[HasV8, HasNEON]>;
- def VMINNMNDh : N3VDIntnp<0b00110, 0b11, 0b1111, 0, 1,
- N3RegFrm, NoItinerary, "vminnm", "f16",
- v4f16, v4f16, fminnum, 1>,
- Requires<[HasV8, HasNEON, HasFullFP16]>;
- def VMINNMNQh : N3VQIntnp<0b00110, 0b11, 0b1111, 1, 1,
- N3RegFrm, NoItinerary, "vminnm", "f16",
- v8f16, v8f16, fminnum, 1>,
- Requires<[HasV8, HasNEON, HasFullFP16]>;
+ def NEON_VMINNMNDf : N3VDIntnp<0b00110, 0b10, 0b1111, 0, 1,
+ N3RegFrm, NoItinerary, "vminnm", "f32",
+ v2f32, v2f32, fminnum, 1>,
+ Requires<[HasV8, HasNEON]>;
+ def NEON_VMINNMNQf : N3VQIntnp<0b00110, 0b10, 0b1111, 1, 1,
+ N3RegFrm, NoItinerary, "vminnm", "f32",
+ v4f32, v4f32, fminnum, 1>,
+ Requires<[HasV8, HasNEON]>;
+ def NEON_VMINNMNDh : N3VDIntnp<0b00110, 0b11, 0b1111, 0, 1,
+ N3RegFrm, NoItinerary, "vminnm", "f16",
+ v4f16, v4f16, fminnum, 1>,
+ Requires<[HasV8, HasNEON, HasFullFP16]>;
+ def NEON_VMINNMNQh : N3VQIntnp<0b00110, 0b11, 0b1111, 1, 1,
+ N3RegFrm, NoItinerary, "vminnm", "f16",
+ v8f16, v8f16, fminnum, 1>,
+ Requires<[HasV8, HasNEON, HasFullFP16]>;
}
// Vector Pairwise Operations.
@@ -5754,20 +5777,57 @@ defm VSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm,
IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ,
"vshl", "u", int_arm_neon_vshiftu>;
+let Predicates = [HasNEON] in {
+def : Pat<(v8i8 (ARMvshls (v8i8 DPR:$Dn), (v8i8 DPR:$Dm))),
+ (VSHLsv8i8 DPR:$Dn, DPR:$Dm)>;
+def : Pat<(v4i16 (ARMvshls (v4i16 DPR:$Dn), (v4i16 DPR:$Dm))),
+ (VSHLsv4i16 DPR:$Dn, DPR:$Dm)>;
+def : Pat<(v2i32 (ARMvshls (v2i32 DPR:$Dn), (v2i32 DPR:$Dm))),
+ (VSHLsv2i32 DPR:$Dn, DPR:$Dm)>;
+def : Pat<(v1i64 (ARMvshls (v1i64 DPR:$Dn), (v1i64 DPR:$Dm))),
+ (VSHLsv1i64 DPR:$Dn, DPR:$Dm)>;
+def : Pat<(v16i8 (ARMvshls (v16i8 QPR:$Dn), (v16i8 QPR:$Dm))),
+ (VSHLsv16i8 QPR:$Dn, QPR:$Dm)>;
+def : Pat<(v8i16 (ARMvshls (v8i16 QPR:$Dn), (v8i16 QPR:$Dm))),
+ (VSHLsv8i16 QPR:$Dn, QPR:$Dm)>;
+def : Pat<(v4i32 (ARMvshls (v4i32 QPR:$Dn), (v4i32 QPR:$Dm))),
+ (VSHLsv4i32 QPR:$Dn, QPR:$Dm)>;
+def : Pat<(v2i64 (ARMvshls (v2i64 QPR:$Dn), (v2i64 QPR:$Dm))),
+ (VSHLsv2i64 QPR:$Dn, QPR:$Dm)>;
+
+def : Pat<(v8i8 (ARMvshlu (v8i8 DPR:$Dn), (v8i8 DPR:$Dm))),
+ (VSHLuv8i8 DPR:$Dn, DPR:$Dm)>;
+def : Pat<(v4i16 (ARMvshlu (v4i16 DPR:$Dn), (v4i16 DPR:$Dm))),
+ (VSHLuv4i16 DPR:$Dn, DPR:$Dm)>;
+def : Pat<(v2i32 (ARMvshlu (v2i32 DPR:$Dn), (v2i32 DPR:$Dm))),
+ (VSHLuv2i32 DPR:$Dn, DPR:$Dm)>;
+def : Pat<(v1i64 (ARMvshlu (v1i64 DPR:$Dn), (v1i64 DPR:$Dm))),
+ (VSHLuv1i64 DPR:$Dn, DPR:$Dm)>;
+def : Pat<(v16i8 (ARMvshlu (v16i8 QPR:$Dn), (v16i8 QPR:$Dm))),
+ (VSHLuv16i8 QPR:$Dn, QPR:$Dm)>;
+def : Pat<(v8i16 (ARMvshlu (v8i16 QPR:$Dn), (v8i16 QPR:$Dm))),
+ (VSHLuv8i16 QPR:$Dn, QPR:$Dm)>;
+def : Pat<(v4i32 (ARMvshlu (v4i32 QPR:$Dn), (v4i32 QPR:$Dm))),
+ (VSHLuv4i32 QPR:$Dn, QPR:$Dm)>;
+def : Pat<(v2i64 (ARMvshlu (v2i64 QPR:$Dn), (v2i64 QPR:$Dm))),
+ (VSHLuv2i64 QPR:$Dn, QPR:$Dm)>;
+
+}
+
// VSHL : Vector Shift Left (Immediate)
-defm VSHLi : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl>;
+defm VSHLi : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", ARMvshlImm>;
// VSHR : Vector Shift Right (Immediate)
defm VSHRs : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", "VSHRs",
- NEONvshrs>;
+ ARMvshrsImm>;
defm VSHRu : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", "VSHRu",
- NEONvshru>;
+ ARMvshruImm>;
// VSHLL : Vector Shift Left Long
defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s",
- PatFrag<(ops node:$LHS, node:$RHS), (NEONvshl (sext node:$LHS), node:$RHS)>>;
+ PatFrag<(ops node:$LHS, node:$RHS), (ARMvshlImm (sext node:$LHS), node:$RHS)>>;
defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u",
- PatFrag<(ops node:$LHS, node:$RHS), (NEONvshl (zext node:$LHS), node:$RHS)>>;
+ PatFrag<(ops node:$LHS, node:$RHS), (ARMvshlImm (zext node:$LHS), node:$RHS)>>;
// VSHLL : Vector Shift Left Long (with maximum shift count)
class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7,
@@ -5785,36 +5845,40 @@ def VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16",
def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32",
v2i64, v2i32, imm32>;
-def : Pat<(v8i16 (NEONvshl (zext (v8i8 DPR:$Rn)), (i32 8))),
+let Predicates = [HasNEON] in {
+def : Pat<(v8i16 (ARMvshlImm (zext (v8i8 DPR:$Rn)), (i32 8))),
(VSHLLi8 DPR:$Rn, 8)>;
-def : Pat<(v4i32 (NEONvshl (zext (v4i16 DPR:$Rn)), (i32 16))),
+def : Pat<(v4i32 (ARMvshlImm (zext (v4i16 DPR:$Rn)), (i32 16))),
(VSHLLi16 DPR:$Rn, 16)>;
-def : Pat<(v2i64 (NEONvshl (zext (v2i32 DPR:$Rn)), (i32 32))),
+def : Pat<(v2i64 (ARMvshlImm (zext (v2i32 DPR:$Rn)), (i32 32))),
(VSHLLi32 DPR:$Rn, 32)>;
-def : Pat<(v8i16 (NEONvshl (sext (v8i8 DPR:$Rn)), (i32 8))),
+def : Pat<(v8i16 (ARMvshlImm (sext (v8i8 DPR:$Rn)), (i32 8))),
(VSHLLi8 DPR:$Rn, 8)>;
-def : Pat<(v4i32 (NEONvshl (sext (v4i16 DPR:$Rn)), (i32 16))),
+def : Pat<(v4i32 (ARMvshlImm (sext (v4i16 DPR:$Rn)), (i32 16))),
(VSHLLi16 DPR:$Rn, 16)>;
-def : Pat<(v2i64 (NEONvshl (sext (v2i32 DPR:$Rn)), (i32 32))),
+def : Pat<(v2i64 (ARMvshlImm (sext (v2i32 DPR:$Rn)), (i32 32))),
(VSHLLi32 DPR:$Rn, 32)>;
-def : Pat<(v8i16 (NEONvshl (anyext (v8i8 DPR:$Rn)), (i32 8))),
+def : Pat<(v8i16 (ARMvshlImm (anyext (v8i8 DPR:$Rn)), (i32 8))),
(VSHLLi8 DPR:$Rn, 8)>;
-def : Pat<(v4i32 (NEONvshl (anyext (v4i16 DPR:$Rn)), (i32 16))),
+def : Pat<(v4i32 (ARMvshlImm (anyext (v4i16 DPR:$Rn)), (i32 16))),
(VSHLLi16 DPR:$Rn, 16)>;
-def : Pat<(v2i64 (NEONvshl (anyext (v2i32 DPR:$Rn)), (i32 32))),
+def : Pat<(v2i64 (ARMvshlImm (anyext (v2i32 DPR:$Rn)), (i32 32))),
(VSHLLi32 DPR:$Rn, 32)>;
+}
// VSHRN : Vector Shift Right and Narrow
defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i",
PatFrag<(ops node:$Rn, node:$amt),
- (trunc (NEONvshrs node:$Rn, node:$amt))>>;
+ (trunc (ARMvshrsImm node:$Rn, node:$amt))>>;
-def : Pat<(v8i8 (trunc (NEONvshru (v8i16 QPR:$Vn), shr_imm8:$amt))),
+let Predicates = [HasNEON] in {
+def : Pat<(v8i8 (trunc (ARMvshruImm (v8i16 QPR:$Vn), shr_imm8:$amt))),
(VSHRNv8i8 QPR:$Vn, shr_imm8:$amt)>;
-def : Pat<(v4i16 (trunc (NEONvshru (v4i32 QPR:$Vn), shr_imm16:$amt))),
+def : Pat<(v4i16 (trunc (ARMvshruImm (v4i32 QPR:$Vn), shr_imm16:$amt))),
(VSHRNv4i16 QPR:$Vn, shr_imm16:$amt)>;
-def : Pat<(v2i32 (trunc (NEONvshru (v2i64 QPR:$Vn), shr_imm32:$amt))),
+def : Pat<(v2i32 (trunc (ARMvshruImm (v2i64 QPR:$Vn), shr_imm32:$amt))),
(VSHRNv2i32 QPR:$Vn, shr_imm32:$amt)>;
+}
// VRSHL : Vector Rounding Shift
defm VRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 0, N3RegVShFrm,
@@ -5825,13 +5889,13 @@ defm VRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 0, N3RegVShFrm,
"vrshl", "u", int_arm_neon_vrshiftu>;
// VRSHR : Vector Rounding Shift Right
defm VRSHRs : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", "VRSHRs",
- NEONvrshrs>;
+ NEONvrshrsImm>;
defm VRSHRu : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", "VRSHRu",
- NEONvrshru>;
+ NEONvrshruImm>;
// VRSHRN : Vector Rounding Shift Right and Narrow
defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i",
- NEONvrshrn>;
+ NEONvrshrnImm>;
// VQSHL : Vector Saturating Shift
defm VQSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 1, N3RegVShFrm,
@@ -5841,21 +5905,21 @@ defm VQSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 1, N3RegVShFrm,
IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
"vqshl", "u", int_arm_neon_vqshiftu>;
// VQSHL : Vector Saturating Shift Left (Immediate)
-defm VQSHLsi : N2VShL_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshls>;
-defm VQSHLui : N2VShL_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshlu>;
+defm VQSHLsi : N2VShL_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshlsImm>;
+defm VQSHLui : N2VShL_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshluImm>;
// VQSHLU : Vector Saturating Shift Left (Immediate, Unsigned)
-defm VQSHLsu : N2VShL_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsu>;
+defm VQSHLsu : N2VShL_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsuImm>;
// VQSHRN : Vector Saturating Shift Right and Narrow
defm VQSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s",
- NEONvqshrns>;
+ NEONvqshrnsImm>;
defm VQSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "u",
- NEONvqshrnu>;
+ NEONvqshrnuImm>;
// VQSHRUN : Vector Saturating Shift Right and Narrow (Unsigned)
defm VQSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s",
- NEONvqshrnsu>;
+ NEONvqshrnsuImm>;
// VQRSHL : Vector Saturating Rounding Shift
defm VQRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 1, N3RegVShFrm,
@@ -5867,20 +5931,20 @@ defm VQRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 1, N3RegVShFrm,
// VQRSHRN : Vector Saturating Rounding Shift Right and Narrow
defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "s",
- NEONvqrshrns>;
+ NEONvqrshrnsImm>;
defm VQRSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "u",
- NEONvqrshrnu>;
+ NEONvqrshrnuImm>;
// VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned)
defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun", "s",
- NEONvqrshrnsu>;
+ NEONvqrshrnsuImm>;
// VSRA : Vector Shift Right and Accumulate
-defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", NEONvshrs>;
-defm VSRAu : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", NEONvshru>;
+defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", ARMvshrsImm>;
+defm VSRAu : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", ARMvshruImm>;
// VRSRA : Vector Rounding Shift Right and Accumulate
-defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrs>;
-defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshru>;
+defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrsImm>;
+defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshruImm>;
// VSLI : Vector Shift Left and Insert
defm VSLI : N2VShInsL_QHSD<1, 1, 0b0101, 1, "vsli">;
@@ -5957,12 +6021,14 @@ def VNEGhq : N2V<0b11, 0b11, 0b01, 0b01, 0b01111, 1, 0,
[(set QPR:$Vd, (v8f16 (fneg QPR:$Vm)))]>,
Requires<[HasNEON, HasFullFP16]>;
+let Predicates = [HasNEON] in {
def : Pat<(v8i8 (vnegd DPR:$src)), (VNEGs8d DPR:$src)>;
def : Pat<(v4i16 (vnegd DPR:$src)), (VNEGs16d DPR:$src)>;
def : Pat<(v2i32 (vnegd DPR:$src)), (VNEGs32d DPR:$src)>;
def : Pat<(v16i8 (vnegq QPR:$src)), (VNEGs8q QPR:$src)>;
def : Pat<(v8i16 (vnegq QPR:$src)), (VNEGs16q QPR:$src)>;
def : Pat<(v4i32 (vnegq QPR:$src)), (VNEGs32q QPR:$src)>;
+}
// VQNEG : Vector Saturating Negate
defm VQNEG : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0,
@@ -6014,57 +6080,57 @@ let isReMaterializable = 1, isAsCheapAsAMove=1 in {
def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$Vd),
(ins nImmSplatI8:$SIMM), IIC_VMOVImm,
"vmov", "i8", "$Vd, $SIMM", "",
- [(set DPR:$Vd, (v8i8 (NEONvmovImm timm:$SIMM)))]>;
+ [(set DPR:$Vd, (v8i8 (ARMvmovImm timm:$SIMM)))]>;
def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$Vd),
(ins nImmSplatI8:$SIMM), IIC_VMOVImm,
"vmov", "i8", "$Vd, $SIMM", "",
- [(set QPR:$Vd, (v16i8 (NEONvmovImm timm:$SIMM)))]>;
+ [(set QPR:$Vd, (v16i8 (ARMvmovImm timm:$SIMM)))]>;
def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$Vd),
(ins nImmSplatI16:$SIMM), IIC_VMOVImm,
"vmov", "i16", "$Vd, $SIMM", "",
- [(set DPR:$Vd, (v4i16 (NEONvmovImm timm:$SIMM)))]> {
+ [(set DPR:$Vd, (v4i16 (ARMvmovImm timm:$SIMM)))]> {
let Inst{9} = SIMM{9};
}
def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$Vd),
(ins nImmSplatI16:$SIMM), IIC_VMOVImm,
"vmov", "i16", "$Vd, $SIMM", "",
- [(set QPR:$Vd, (v8i16 (NEONvmovImm timm:$SIMM)))]> {
+ [(set QPR:$Vd, (v8i16 (ARMvmovImm timm:$SIMM)))]> {
let Inst{9} = SIMM{9};
}
def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 0, 1, (outs DPR:$Vd),
(ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
"vmov", "i32", "$Vd, $SIMM", "",
- [(set DPR:$Vd, (v2i32 (NEONvmovImm timm:$SIMM)))]> {
+ [(set DPR:$Vd, (v2i32 (ARMvmovImm timm:$SIMM)))]> {
let Inst{11-8} = SIMM{11-8};
}
def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 0, 1, (outs QPR:$Vd),
(ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
"vmov", "i32", "$Vd, $SIMM", "",
- [(set QPR:$Vd, (v4i32 (NEONvmovImm timm:$SIMM)))]> {
+ [(set QPR:$Vd, (v4i32 (ARMvmovImm timm:$SIMM)))]> {
let Inst{11-8} = SIMM{11-8};
}
def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$Vd),
(ins nImmSplatI64:$SIMM), IIC_VMOVImm,
"vmov", "i64", "$Vd, $SIMM", "",
- [(set DPR:$Vd, (v1i64 (NEONvmovImm timm:$SIMM)))]>;
+ [(set DPR:$Vd, (v1i64 (ARMvmovImm timm:$SIMM)))]>;
def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$Vd),
(ins nImmSplatI64:$SIMM), IIC_VMOVImm,
"vmov", "i64", "$Vd, $SIMM", "",
- [(set QPR:$Vd, (v2i64 (NEONvmovImm timm:$SIMM)))]>;
+ [(set QPR:$Vd, (v2i64 (ARMvmovImm timm:$SIMM)))]>;
def VMOVv2f32 : N1ModImm<1, 0b000, 0b1111, 0, 0, 0, 1, (outs DPR:$Vd),
(ins nImmVMOVF32:$SIMM), IIC_VMOVImm,
"vmov", "f32", "$Vd, $SIMM", "",
- [(set DPR:$Vd, (v2f32 (NEONvmovFPImm timm:$SIMM)))]>;
+ [(set DPR:$Vd, (v2f32 (ARMvmovFPImm timm:$SIMM)))]>;
def VMOVv4f32 : N1ModImm<1, 0b000, 0b1111, 0, 1, 0, 1, (outs QPR:$Vd),
(ins nImmVMOVF32:$SIMM), IIC_VMOVImm,
"vmov", "f32", "$Vd, $SIMM", "",
- [(set QPR:$Vd, (v4f32 (NEONvmovFPImm timm:$SIMM)))]>;
+ [(set QPR:$Vd, (v4f32 (ARMvmovFPImm timm:$SIMM)))]>;
} // isReMaterializable, isAsCheapAsAMove
// Add support for bytes replication feature, so it could be GAS compatible.
@@ -6144,7 +6210,7 @@ let AddedComplexity = 50, isAsCheapAsAMove = 1, isReMaterializable = 1 in {
def VGETLNs8 : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?},
(outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane),
IIC_VMOVSI, "vmov", "s8", "$R, $V$lane",
- [(set GPR:$R, (NEONvgetlanes (v8i8 DPR:$V),
+ [(set GPR:$R, (ARMvgetlanes (v8i8 DPR:$V),
imm:$lane))]> {
let Inst{21} = lane{2};
let Inst{6-5} = lane{1-0};
@@ -6152,7 +6218,7 @@ def VGETLNs8 : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?},
def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1},
(outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane),
IIC_VMOVSI, "vmov", "s16", "$R, $V$lane",
- [(set GPR:$R, (NEONvgetlanes (v4i16 DPR:$V),
+ [(set GPR:$R, (ARMvgetlanes (v4i16 DPR:$V),
imm:$lane))]> {
let Inst{21} = lane{1};
let Inst{6} = lane{0};
@@ -6160,7 +6226,7 @@ def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1},
def VGETLNu8 : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?},
(outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane),
IIC_VMOVSI, "vmov", "u8", "$R, $V$lane",
- [(set GPR:$R, (NEONvgetlaneu (v8i8 DPR:$V),
+ [(set GPR:$R, (ARMvgetlaneu (v8i8 DPR:$V),
imm:$lane))]> {
let Inst{21} = lane{2};
let Inst{6-5} = lane{1-0};
@@ -6168,7 +6234,7 @@ def VGETLNu8 : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?},
def VGETLNu16 : NVGetLane<{1,1,1,0,1,0,?,1}, 0b1011, {?,1},
(outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane),
IIC_VMOVSI, "vmov", "u16", "$R, $V$lane",
- [(set GPR:$R, (NEONvgetlaneu (v4i16 DPR:$V),
+ [(set GPR:$R, (ARMvgetlaneu (v4i16 DPR:$V),
imm:$lane))]> {
let Inst{21} = lane{1};
let Inst{6} = lane{0};
@@ -6178,26 +6244,28 @@ def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00,
IIC_VMOVSI, "vmov", "32", "$R, $V$lane",
[(set GPR:$R, (extractelt (v2i32 DPR:$V),
imm:$lane))]>,
- Requires<[HasVFP2, HasFastVGETLNi32]> {
+ Requires<[HasFPRegs, HasFastVGETLNi32]> {
let Inst{21} = lane{0};
}
+let Predicates = [HasNEON] in {
// def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td
-def : Pat<(NEONvgetlanes (v16i8 QPR:$src), imm:$lane),
+def : Pat<(ARMvgetlanes (v16i8 QPR:$src), imm:$lane),
(VGETLNs8 (v8i8 (EXTRACT_SUBREG QPR:$src,
(DSubReg_i8_reg imm:$lane))),
(SubReg_i8_lane imm:$lane))>;
-def : Pat<(NEONvgetlanes (v8i16 QPR:$src), imm:$lane),
+def : Pat<(ARMvgetlanes (v8i16 QPR:$src), imm:$lane),
(VGETLNs16 (v4i16 (EXTRACT_SUBREG QPR:$src,
(DSubReg_i16_reg imm:$lane))),
(SubReg_i16_lane imm:$lane))>;
-def : Pat<(NEONvgetlaneu (v16i8 QPR:$src), imm:$lane),
+def : Pat<(ARMvgetlaneu (v16i8 QPR:$src), imm:$lane),
(VGETLNu8 (v8i8 (EXTRACT_SUBREG QPR:$src,
(DSubReg_i8_reg imm:$lane))),
(SubReg_i8_lane imm:$lane))>;
-def : Pat<(NEONvgetlaneu (v8i16 QPR:$src), imm:$lane),
+def : Pat<(ARMvgetlaneu (v8i16 QPR:$src), imm:$lane),
(VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src,
(DSubReg_i16_reg imm:$lane))),
(SubReg_i16_lane imm:$lane))>;
+}
def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane),
(VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src,
(DSubReg_i32_reg imm:$lane))),
@@ -6211,6 +6279,7 @@ def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane),
(COPY_TO_REGCLASS
(i32 (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>,
Requires<[HasNEON, HasSlowVGETLNi32]>;
+let Predicates = [HasNEON] in {
def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2),
(EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)),
(SSubReg_f32_reg imm:$src2))>;
@@ -6221,7 +6290,36 @@ def : Pat<(extractelt (v4f32 QPR:$src1), imm:$src2),
// (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>;
def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2),
(EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>;
+}
+
+def imm_even : ImmLeaf<i32, [{ return (Imm & 1) == 0; }]>;
+def imm_odd : ImmLeaf<i32, [{ return (Imm & 1) == 1; }]>;
+
+let Predicates = [HasNEON] in {
+def : Pat<(extractelt (v4f16 DPR:$src), imm_even:$lane),
+ (EXTRACT_SUBREG
+ (v2f32 (COPY_TO_REGCLASS (v4f16 DPR:$src), DPR_VFP2)),
+ (SSubReg_f16_reg imm_even:$lane))>;
+def : Pat<(extractelt (v4f16 DPR:$src), imm_odd:$lane),
+ (COPY_TO_REGCLASS
+ (VMOVH (EXTRACT_SUBREG
+ (v2f32 (COPY_TO_REGCLASS (v4f16 DPR:$src), DPR_VFP2)),
+ (SSubReg_f16_reg imm_odd:$lane))),
+ HPR)>;
+
+def : Pat<(extractelt (v8f16 QPR:$src), imm_even:$lane),
+ (EXTRACT_SUBREG
+ (v4f32 (COPY_TO_REGCLASS (v8f16 QPR:$src), QPR_VFP2)),
+ (SSubReg_f16_reg imm_even:$lane))>;
+
+def : Pat<(extractelt (v8f16 QPR:$src), imm_odd:$lane),
+ (COPY_TO_REGCLASS
+ (VMOVH (EXTRACT_SUBREG
+ (v4f32 (COPY_TO_REGCLASS (v8f16 QPR:$src), QPR_VFP2)),
+ (SSubReg_f16_reg imm_odd:$lane))),
+ HPR)>;
+}
// VMOV : Vector Set Lane (move ARM core register to scalar)
@@ -6254,6 +6352,8 @@ def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V),
let isInsertSubreg = 1;
}
}
+
+let Predicates = [HasNEON] in {
def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane),
(v16i8 (INSERT_SUBREG QPR:$src1,
(v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1,
@@ -6280,6 +6380,15 @@ def : Pat<(v4f32 (insertelt QPR:$src1, SPR:$src2, imm:$src3)),
(INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2)),
SPR:$src2, (SSubReg_f32_reg imm:$src3))>;
+def : Pat<(insertelt (v4f16 DPR:$src1), HPR:$src2, imm:$lane),
+ (v4f16 (VSETLNi16 DPR:$src1, (VMOVRH $src2), imm:$lane))>;
+def : Pat<(insertelt (v8f16 QPR:$src1), HPR:$src2, imm:$lane),
+ (v8f16 (INSERT_SUBREG QPR:$src1,
+ (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1,
+ (DSubReg_i16_reg imm:$lane))),
+ (VMOVRH $src2), (SubReg_i16_lane imm:$lane))),
+ (DSubReg_i16_reg imm:$lane)))>;
+
//def : Pat<(v2i64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)),
// (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>;
def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)),
@@ -6311,17 +6420,18 @@ def : Pat<(v4i32 (scalar_to_vector GPR:$src)),
(INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
(VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
dsub_0)>;
+}
// VDUP : Vector Duplicate (from ARM core register to all elements)
class VDUPD<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty>
: NVDup<opcod1, 0b1011, opcod3, (outs DPR:$V), (ins GPR:$R),
IIC_VMOVIS, "vdup", Dt, "$V, $R",
- [(set DPR:$V, (Ty (NEONvdup (i32 GPR:$R))))]>;
+ [(set DPR:$V, (Ty (ARMvdup (i32 GPR:$R))))]>;
class VDUPQ<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty>
: NVDup<opcod1, 0b1011, opcod3, (outs QPR:$V), (ins GPR:$R),
IIC_VMOVIS, "vdup", Dt, "$V, $R",
- [(set QPR:$V, (Ty (NEONvdup (i32 GPR:$R))))]>;
+ [(set QPR:$V, (Ty (ARMvdup (i32 GPR:$R))))]>;
def VDUP8d : VDUPD<0b11101100, 0b00, "8", v8i8>;
def VDUP16d : VDUPD<0b11101000, 0b01, "16", v4i16>;
@@ -6331,15 +6441,16 @@ def VDUP8q : VDUPQ<0b11101110, 0b00, "8", v16i8>;
def VDUP16q : VDUPQ<0b11101010, 0b01, "16", v8i16>;
def VDUP32q : VDUPQ<0b11101010, 0b00, "32", v4i32>;
-// NEONvdup patterns for uarchs with fast VDUP.32.
-def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>,
+// ARMvdup patterns for uarchs with fast VDUP.32.
+def : Pat<(v2f32 (ARMvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>,
Requires<[HasNEON,HasFastVDUP32]>;
-def : Pat<(v4f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32q GPR:$R)>;
+def : Pat<(v4f32 (ARMvdup (f32 (bitconvert GPR:$R)))), (VDUP32q GPR:$R)>,
+ Requires<[HasNEON]>;
-// NEONvdup patterns for uarchs with slow VDUP.32 - use VMOVDRR instead.
-def : Pat<(v2i32 (NEONvdup (i32 GPR:$R))), (VMOVDRR GPR:$R, GPR:$R)>,
+// ARMvdup patterns for uarchs with slow VDUP.32 - use VMOVDRR instead.
+def : Pat<(v2i32 (ARMvdup (i32 GPR:$R))), (VMOVDRR GPR:$R, GPR:$R)>,
Requires<[HasNEON,HasSlowVDUP32]>;
-def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VMOVDRR GPR:$R, GPR:$R)>,
+def : Pat<(v2f32 (ARMvdup (f32 (bitconvert GPR:$R)))), (VMOVDRR GPR:$R, GPR:$R)>,
Requires<[HasNEON,HasSlowVDUP32]>;
// VDUP : Vector Duplicate Lane (from scalar to all elements)
@@ -6348,13 +6459,13 @@ class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt,
ValueType Ty, Operand IdxTy>
: NVDupLane<op19_16, 0, (outs DPR:$Vd), (ins DPR:$Vm, IdxTy:$lane),
IIC_VMOVD, OpcodeStr, Dt, "$Vd, $Vm$lane",
- [(set DPR:$Vd, (Ty (NEONvduplane (Ty DPR:$Vm), imm:$lane)))]>;
+ [(set DPR:$Vd, (Ty (ARMvduplane (Ty DPR:$Vm), imm:$lane)))]>;
class VDUPLNQ<bits<4> op19_16, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, Operand IdxTy>
: NVDupLane<op19_16, 1, (outs QPR:$Vd), (ins DPR:$Vm, IdxTy:$lane),
IIC_VMOVQ, OpcodeStr, Dt, "$Vd, $Vm$lane",
- [(set QPR:$Vd, (ResTy (NEONvduplane (OpTy DPR:$Vm),
+ [(set QPR:$Vd, (ResTy (ARMvduplane (OpTy DPR:$Vm),
VectorIndex32:$lane)))]>;
// Inst{19-16} is partially specified depending on the element size.
@@ -6384,48 +6495,50 @@ def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32, VectorIndex32> {
let Inst{19} = lane{0};
}
-def : Pat<(v4f16 (NEONvduplane (v4f16 DPR:$Vm), imm:$lane)),
+let Predicates = [HasNEON] in {
+def : Pat<(v4f16 (ARMvduplane (v4f16 DPR:$Vm), imm:$lane)),
(VDUPLN32d DPR:$Vm, imm:$lane)>;
-def : Pat<(v2f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)),
+def : Pat<(v2f32 (ARMvduplane (v2f32 DPR:$Vm), imm:$lane)),
(VDUPLN32d DPR:$Vm, imm:$lane)>;
-def : Pat<(v4f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)),
+def : Pat<(v4f32 (ARMvduplane (v2f32 DPR:$Vm), imm:$lane)),
(VDUPLN32q DPR:$Vm, imm:$lane)>;
-def : Pat<(v16i8 (NEONvduplane (v16i8 QPR:$src), imm:$lane)),
+def : Pat<(v16i8 (ARMvduplane (v16i8 QPR:$src), imm:$lane)),
(v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src,
(DSubReg_i8_reg imm:$lane))),
(SubReg_i8_lane imm:$lane)))>;
-def : Pat<(v8i16 (NEONvduplane (v8i16 QPR:$src), imm:$lane)),
+def : Pat<(v8i16 (ARMvduplane (v8i16 QPR:$src), imm:$lane)),
(v8i16 (VDUPLN16q (v4i16 (EXTRACT_SUBREG QPR:$src,
(DSubReg_i16_reg imm:$lane))),
(SubReg_i16_lane imm:$lane)))>;
-def : Pat<(v8f16 (NEONvduplane (v8f16 QPR:$src), imm:$lane)),
+def : Pat<(v8f16 (ARMvduplane (v8f16 QPR:$src), imm:$lane)),
(v8f16 (VDUPLN16q (v4f16 (EXTRACT_SUBREG QPR:$src,
(DSubReg_i16_reg imm:$lane))),
(SubReg_i16_lane imm:$lane)))>;
-def : Pat<(v4i32 (NEONvduplane (v4i32 QPR:$src), imm:$lane)),
+def : Pat<(v4i32 (ARMvduplane (v4i32 QPR:$src), imm:$lane)),
(v4i32 (VDUPLN32q (v2i32 (EXTRACT_SUBREG QPR:$src,
(DSubReg_i32_reg imm:$lane))),
(SubReg_i32_lane imm:$lane)))>;
-def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)),
+def : Pat<(v4f32 (ARMvduplane (v4f32 QPR:$src), imm:$lane)),
(v4f32 (VDUPLN32q (v2f32 (EXTRACT_SUBREG QPR:$src,
(DSubReg_i32_reg imm:$lane))),
(SubReg_i32_lane imm:$lane)))>;
-def : Pat<(v4f16 (NEONvdup HPR:$src)),
+def : Pat<(v4f16 (ARMvdup HPR:$src)),
(v4f16 (VDUPLN16d (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)),
HPR:$src, ssub_0), (i32 0)))>;
-def : Pat<(v2f32 (NEONvdup (f32 SPR:$src))),
+def : Pat<(v2f32 (ARMvdup (f32 SPR:$src))),
(v2f32 (VDUPLN32d (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
SPR:$src, ssub_0), (i32 0)))>;
-def : Pat<(v4f32 (NEONvdup (f32 SPR:$src))),
+def : Pat<(v4f32 (ARMvdup (f32 SPR:$src))),
(v4f32 (VDUPLN32q (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
SPR:$src, ssub_0), (i32 0)))>;
-def : Pat<(v8f16 (NEONvdup HPR:$src)),
+def : Pat<(v8f16 (ARMvdup HPR:$src)),
(v8f16 (VDUPLN16q (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)),
HPR:$src, ssub_0), (i32 0)))>;
+}
// VMOVN : Vector Narrowing Move
defm VMOVN : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVN,
@@ -6440,9 +6553,12 @@ defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD,
// VMOVL : Vector Lengthening Move
defm VMOVLs : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>;
defm VMOVLu : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>;
+
+let Predicates = [HasNEON] in {
def : Pat<(v8i16 (anyext (v8i8 DPR:$Vm))), (VMOVLuv8i16 DPR:$Vm)>;
def : Pat<(v4i32 (anyext (v4i16 DPR:$Vm))), (VMOVLuv4i32 DPR:$Vm)>;
def : Pat<(v2i64 (anyext (v2i32 DPR:$Vm))), (VMOVLuv2i64 DPR:$Vm)>;
+}
// Vector Conversions.
@@ -6621,24 +6737,29 @@ class VREV64D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
: N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$Vd),
(ins DPR:$Vm), IIC_VMOVD,
OpcodeStr, Dt, "$Vd, $Vm", "",
- [(set DPR:$Vd, (Ty (NEONvrev64 (Ty DPR:$Vm))))]>;
+ [(set DPR:$Vd, (Ty (ARMvrev64 (Ty DPR:$Vm))))]>;
class VREV64Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
: N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$Vd),
(ins QPR:$Vm), IIC_VMOVQ,
OpcodeStr, Dt, "$Vd, $Vm", "",
- [(set QPR:$Vd, (Ty (NEONvrev64 (Ty QPR:$Vm))))]>;
+ [(set QPR:$Vd, (Ty (ARMvrev64 (Ty QPR:$Vm))))]>;
def VREV64d8 : VREV64D<0b00, "vrev64", "8", v8i8>;
def VREV64d16 : VREV64D<0b01, "vrev64", "16", v4i16>;
def VREV64d32 : VREV64D<0b10, "vrev64", "32", v2i32>;
-def : Pat<(v2f32 (NEONvrev64 (v2f32 DPR:$Vm))), (VREV64d32 DPR:$Vm)>;
+let Predicates = [HasNEON] in {
+def : Pat<(v2f32 (ARMvrev64 (v2f32 DPR:$Vm))), (VREV64d32 DPR:$Vm)>;
+}
def VREV64q8 : VREV64Q<0b00, "vrev64", "8", v16i8>;
def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>;
def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>;
-def : Pat<(v4f32 (NEONvrev64 (v4f32 QPR:$Vm))), (VREV64q32 QPR:$Vm)>;
-def : Pat<(v8f16 (NEONvrev64 (v8f16 QPR:$Vm))), (VREV64q16 QPR:$Vm)>;
-def : Pat<(v4f16 (NEONvrev64 (v4f16 DPR:$Vm))), (VREV64d16 DPR:$Vm)>;
+
+let Predicates = [HasNEON] in {
+def : Pat<(v4f32 (ARMvrev64 (v4f32 QPR:$Vm))), (VREV64q32 QPR:$Vm)>;
+def : Pat<(v8f16 (ARMvrev64 (v8f16 QPR:$Vm))), (VREV64q16 QPR:$Vm)>;
+def : Pat<(v4f16 (ARMvrev64 (v4f16 DPR:$Vm))), (VREV64d16 DPR:$Vm)>;
+}
// VREV32 : Vector Reverse elements within 32-bit words
@@ -6646,12 +6767,12 @@ class VREV32D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
: N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$Vd),
(ins DPR:$Vm), IIC_VMOVD,
OpcodeStr, Dt, "$Vd, $Vm", "",
- [(set DPR:$Vd, (Ty (NEONvrev32 (Ty DPR:$Vm))))]>;
+ [(set DPR:$Vd, (Ty (ARMvrev32 (Ty DPR:$Vm))))]>;
class VREV32Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
: N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$Vd),
(ins QPR:$Vm), IIC_VMOVQ,
OpcodeStr, Dt, "$Vd, $Vm", "",
- [(set QPR:$Vd, (Ty (NEONvrev32 (Ty QPR:$Vm))))]>;
+ [(set QPR:$Vd, (Ty (ARMvrev32 (Ty QPR:$Vm))))]>;
def VREV32d8 : VREV32D<0b00, "vrev32", "8", v8i8>;
def VREV32d16 : VREV32D<0b01, "vrev32", "16", v4i16>;
@@ -6665,12 +6786,12 @@ class VREV16D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
: N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$Vd),
(ins DPR:$Vm), IIC_VMOVD,
OpcodeStr, Dt, "$Vd, $Vm", "",
- [(set DPR:$Vd, (Ty (NEONvrev16 (Ty DPR:$Vm))))]>;
+ [(set DPR:$Vd, (Ty (ARMvrev16 (Ty DPR:$Vm))))]>;
class VREV16Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
: N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$Vd),
(ins QPR:$Vm), IIC_VMOVQ,
OpcodeStr, Dt, "$Vd, $Vm", "",
- [(set QPR:$Vd, (Ty (NEONvrev16 (Ty QPR:$Vm))))]>;
+ [(set QPR:$Vd, (Ty (ARMvrev16 (Ty QPR:$Vm))))]>;
def VREV16d8 : VREV16D<0b00, "vrev16", "8", v8i8>;
def VREV16q8 : VREV16Q<0b00, "vrev16", "8", v16i8>;
@@ -6681,7 +6802,8 @@ def VREV16q8 : VREV16Q<0b00, "vrev16", "8", v16i8>;
class AlignedVEXTq<ValueType DestTy, ValueType SrcTy, SDNodeXForm LaneCVT>
: Pat<(DestTy (vector_extract_subvec (SrcTy QPR:$src), (i32 imm:$start))),
- (EXTRACT_SUBREG (SrcTy QPR:$src), (LaneCVT imm:$start))>;
+ (EXTRACT_SUBREG (SrcTy QPR:$src), (LaneCVT imm:$start))>,
+ Requires<[HasNEON]>;
def : AlignedVEXTq<v8i8, v16i8, DSubReg_i8_reg>;
@@ -6693,6 +6815,7 @@ def : AlignedVEXTq<v1i64, v2i64, DSubReg_f64_reg>;
def : AlignedVEXTq<v2f32, v4f32, DSubReg_i32_reg>;
+def : AlignedVEXTq<v4f16, v8f16, DSubReg_i16_reg>; // v8f16 -> v4f16
// VEXT : Vector Extract
@@ -6728,15 +6851,19 @@ def VEXTd16 : VEXTd<"vext", "16", v4i16, imm0_3> {
let Inst{10-9} = index{1-0};
let Inst{8} = 0b0;
}
+let Predicates = [HasNEON] in {
def : Pat<(v4f16 (NEONvext (v4f16 DPR:$Vn), (v4f16 DPR:$Vm), (i32 imm:$index))),
(VEXTd16 DPR:$Vn, DPR:$Vm, imm:$index)>;
+}
def VEXTd32 : VEXTd<"vext", "32", v2i32, imm0_1> {
let Inst{10} = index{0};
let Inst{9-8} = 0b00;
}
+let Predicates = [HasNEON] in {
def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn), (v2f32 DPR:$Vm), (i32 imm:$index))),
(VEXTd32 DPR:$Vn, DPR:$Vm, imm:$index)>;
+}
def VEXTq8 : VEXTq<"vext", "8", v16i8, imm0_15> {
let Inst{11-8} = index{3-0};
@@ -6745,8 +6872,10 @@ def VEXTq16 : VEXTq<"vext", "16", v8i16, imm0_7> {
let Inst{11-9} = index{2-0};
let Inst{8} = 0b0;
}
+let Predicates = [HasNEON] in {
def : Pat<(v8f16 (NEONvext (v8f16 QPR:$Vn), (v8f16 QPR:$Vm), (i32 imm:$index))),
(VEXTq16 QPR:$Vn, QPR:$Vm, imm:$index)>;
+}
def VEXTq32 : VEXTq<"vext", "32", v4i32, imm0_3> {
let Inst{11-10} = index{1-0};
@@ -6756,8 +6885,10 @@ def VEXTq64 : VEXTq<"vext", "64", v2i64, imm0_1> {
let Inst{11} = index{0};
let Inst{10-8} = 0b000;
}
+let Predicates = [HasNEON] in {
def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn), (v4f32 QPR:$Vm), (i32 imm:$index))),
(VEXTq32 QPR:$Vn, QPR:$Vm, imm:$index)>;
+}
// VTRN : Vector Transpose
@@ -6857,6 +6988,7 @@ def VTBX4Pseudo
IIC_VTBX4, "$orig = $dst", []>;
} // DecoderMethod = "DecodeTBLInstruction"
+let Predicates = [HasNEON] in {
def : Pat<(v8i8 (NEONvtbl2 v8i8:$Vn0, v8i8:$Vn1, v8i8:$Vm)),
(v8i8 (VTBL2 (REG_SEQUENCE DPair, v8i8:$Vn0, dsub_0,
v8i8:$Vn1, dsub_1),
@@ -6899,6 +7031,7 @@ def : Pat<(v8i8 (int_arm_neon_vtbx4 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1,
v8i8:$Vn2, dsub_2,
v8i8:$Vn3, dsub_3),
v8i8:$Vm))>;
+}
// VRINT : Vector Rounding
multiclass VRINT_FPI<string op, bits<3> op9_7, SDPatternOperator Int> {
@@ -6989,6 +7122,7 @@ def SHA256H : N3SHA3Op<"256h", 0b00110, 0b00, int_arm_neon_sha256h>;
def SHA256H2 : N3SHA3Op<"256h2", 0b00110, 0b01, int_arm_neon_sha256h2>;
def SHA256SU1 : N3SHA3Op<"256su1", 0b00110, 0b10, int_arm_neon_sha256su1>;
+let Predicates = [HasNEON] in {
def : Pat<(i32 (int_arm_neon_sha1h i32:$Rn)),
(COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG
(SHA1H (SUBREG_TO_REG (i64 0),
@@ -7016,6 +7150,7 @@ def : Pat<(v4i32 (int_arm_neon_sha1p v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)),
(f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)),
ssub_0),
v4i32:$wk)>;
+}
//===----------------------------------------------------------------------===//
// NEON instructions for single-precision FP math
@@ -7123,171 +7258,228 @@ def : Pat<(arm_vmovsr GPR:$a),
Requires<[HasNEON, DontUseVMOVSR]>;
//===----------------------------------------------------------------------===//
-// Non-Instruction Patterns
+// Non-Instruction Patterns or Endiness - Revert Patterns
//===----------------------------------------------------------------------===//
// bit_convert
-let Predicates = [IsLE] in {
+// 64 bit conversions
+let Predicates = [HasNEON] in {
+def : Pat<(f64 (bitconvert (v1i64 DPR:$src))), (f64 DPR:$src)>;
+def : Pat<(v1i64 (bitconvert (f64 DPR:$src))), (v1i64 DPR:$src)>;
+
+def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>;
+def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>;
+
+def : Pat<(v4i16 (bitconvert (v4f16 DPR:$src))), (v4i16 DPR:$src)>;
+def : Pat<(v4f16 (bitconvert (v4i16 DPR:$src))), (v4f16 DPR:$src)>;
+
+// 128 bit conversions
+def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>;
+def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>;
+
+def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>;
+def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>;
+
+def : Pat<(v8i16 (bitconvert (v8f16 QPR:$src))), (v8i16 QPR:$src)>;
+def : Pat<(v8f16 (bitconvert (v8i16 QPR:$src))), (v8f16 QPR:$src)>;
+}
+
+let Predicates = [IsLE,HasNEON] in {
+ // 64 bit conversions
+ def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (f64 DPR:$src)>;
+ def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (f64 DPR:$src)>;
+ def : Pat<(f64 (bitconvert (v4f16 DPR:$src))), (f64 DPR:$src)>;
+ def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (f64 DPR:$src)>;
+ def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (f64 DPR:$src)>;
+
+ def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>;
def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>;
+ def : Pat<(v1i64 (bitconvert (v4f16 DPR:$src))), (v1i64 DPR:$src)>;
def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>;
def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (v1i64 DPR:$src)>;
-}
-def : Pat<(v1i64 (bitconvert (f64 DPR:$src))), (v1i64 DPR:$src)>;
-let Predicates = [IsLE] in {
- def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>;
+
+ def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (v2f32 DPR:$src)>;
+ def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>;
+ def : Pat<(v2f32 (bitconvert (v4f16 DPR:$src))), (v2f32 DPR:$src)>;
+ def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>;
+ def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (v2f32 DPR:$src)>;
+
+ def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (v2i32 DPR:$src)>;
def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>;
+ def : Pat<(v2i32 (bitconvert (v4f16 DPR:$src))), (v2i32 DPR:$src)>;
def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>;
def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (v2i32 DPR:$src)>;
- def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (v2i32 DPR:$src)>;
-}
-def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>;
-let Predicates = [IsLE] in {
+
+ def : Pat<(v4f16 (bitconvert (f64 DPR:$src))), (v4f16 DPR:$src)>;
+ def : Pat<(v4f16 (bitconvert (v1i64 DPR:$src))), (v4f16 DPR:$src)>;
+ def : Pat<(v4f16 (bitconvert (v2f32 DPR:$src))), (v4f16 DPR:$src)>;
+ def : Pat<(v4f16 (bitconvert (v2i32 DPR:$src))), (v4f16 DPR:$src)>;
+ def : Pat<(v4f16 (bitconvert (v8i8 DPR:$src))), (v4f16 DPR:$src)>;
+
+ def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (v4i16 DPR:$src)>;
def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>;
+ def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>;
def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>;
def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (v4i16 DPR:$src)>;
- def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (v4i16 DPR:$src)>;
- def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>;
+
+ def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (v8i8 DPR:$src)>;
def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (v8i8 DPR:$src)>;
+ def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (v8i8 DPR:$src)>;
def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (v8i8 DPR:$src)>;
+ def : Pat<(v8i8 (bitconvert (v4f16 DPR:$src))), (v8i8 DPR:$src)>;
def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (v8i8 DPR:$src)>;
- def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (v8i8 DPR:$src)>;
- def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (v8i8 DPR:$src)>;
-}
-def : Pat<(f64 (bitconvert (v1i64 DPR:$src))), (f64 DPR:$src)>;
-let Predicates = [IsLE] in {
- def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (f64 DPR:$src)>;
- def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (f64 DPR:$src)>;
- def : Pat<(f64 (bitconvert (v4f16 DPR:$src))), (f64 DPR:$src)>;
- def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (f64 DPR:$src)>;
- def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (f64 DPR:$src)>;
- def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (v2f32 DPR:$src)>;
- def : Pat<(v4f16 (bitconvert (f64 DPR:$src))), (v4f16 DPR:$src)>;
- def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>;
-}
-def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>;
-let Predicates = [IsLE] in {
- def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>;
- def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (v2f32 DPR:$src)>;
-}
-let Predicates = [IsLE] in {
+ // 128 bit conversions
+ def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>;
+ def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>;
+ def : Pat<(v2f64 (bitconvert (v8f16 QPR:$src))), (v2f64 QPR:$src)>;
+ def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>;
+ def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>;
+
+ def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>;
def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>;
+ def : Pat<(v2i64 (bitconvert (v8f16 QPR:$src))), (v2i64 QPR:$src)>;
def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>;
def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>;
-}
-def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>;
-let Predicates = [IsLE] in {
- def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>;
+
+ def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>;
+ def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>;
+ def : Pat<(v4f32 (bitconvert (v8f16 QPR:$src))), (v4f32 QPR:$src)>;
+ def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>;
+ def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>;
+
+ def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>;
def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>;
+ def : Pat<(v4i32 (bitconvert (v8f16 QPR:$src))), (v4i32 QPR:$src)>;
def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>;
def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>;
- def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>;
-}
-def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>;
-let Predicates = [IsLE] in {
+
+ def : Pat<(v8f16 (bitconvert (v2f64 QPR:$src))), (v8f16 QPR:$src)>;
+ def : Pat<(v8f16 (bitconvert (v2i64 QPR:$src))), (v8f16 QPR:$src)>;
+ def : Pat<(v8f16 (bitconvert (v4f32 QPR:$src))), (v8f16 QPR:$src)>;
+ def : Pat<(v8f16 (bitconvert (v4i32 QPR:$src))), (v8f16 QPR:$src)>;
+ def : Pat<(v8f16 (bitconvert (v16i8 QPR:$src))), (v8f16 QPR:$src)>;
+
+ def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>;
def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>;
+ def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>;
def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>;
def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>;
- def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>;
- def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>;
- def : Pat<(v8f16 (bitconvert (v2f64 QPR:$src))), (v8f16 QPR:$src)>;
+
+ def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>;
def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>;
+ def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>;
def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>;
+ def : Pat<(v16i8 (bitconvert (v8f16 QPR:$src))), (v16i8 QPR:$src)>;
def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>;
- def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>;
- def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>;
- def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>;
-}
-def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>;
-let Predicates = [IsLE] in {
- def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>;
- def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>;
- def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>;
-}
-def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>;
-let Predicates = [IsLE] in {
- def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>;
- def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>;
- def : Pat<(v2f64 (bitconvert (v8f16 QPR:$src))), (v2f64 QPR:$src)>;
- def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>;
- def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>;
}
-let Predicates = [IsBE] in {
+let Predicates = [IsBE,HasNEON] in {
// 64 bit conversions
+ def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>;
+ def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>;
+ def : Pat<(f64 (bitconvert (v4f16 DPR:$src))), (VREV64d16 DPR:$src)>;
+ def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>;
+ def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (VREV64d8 DPR:$src)>;
+
+ def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>;
def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>;
+ def : Pat<(v1i64 (bitconvert (v4f16 DPR:$src))), (VREV64d16 DPR:$src)>;
def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>;
def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (VREV64d8 DPR:$src)>;
- def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>;
+
+ def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (VREV64d32 DPR:$src)>;
+ def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>;
+ def : Pat<(v2f32 (bitconvert (v4f16 DPR:$src))), (VREV32d16 DPR:$src)>;
+ def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>;
+ def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (VREV32d8 DPR:$src)>;
+
+ def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (VREV64d32 DPR:$src)>;
def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>;
+ def : Pat<(v2i32 (bitconvert (v4f16 DPR:$src))), (VREV32d16 DPR:$src)>;
def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>;
def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (VREV32d8 DPR:$src)>;
- def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (VREV64d32 DPR:$src)>;
+
+ def : Pat<(v4f16 (bitconvert (f64 DPR:$src))), (VREV64d16 DPR:$src)>;
+ def : Pat<(v4f16 (bitconvert (v1i64 DPR:$src))), (VREV64d16 DPR:$src)>;
+ def : Pat<(v4f16 (bitconvert (v2f32 DPR:$src))), (VREV32d16 DPR:$src)>;
+ def : Pat<(v4f16 (bitconvert (v2i32 DPR:$src))), (VREV32d16 DPR:$src)>;
+ def : Pat<(v4f16 (bitconvert (v8i8 DPR:$src))), (VREV16d8 DPR:$src)>;
+
+ def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (VREV64d16 DPR:$src)>;
def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (VREV64d16 DPR:$src)>;
+ def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (VREV32d16 DPR:$src)>;
def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (VREV32d16 DPR:$src)>;
def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (VREV16d8 DPR:$src)>;
- def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (VREV64d16 DPR:$src)>;
- def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (VREV32d16 DPR:$src)>;
+
+ def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (VREV64d8 DPR:$src)>;
def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (VREV64d8 DPR:$src)>;
+ def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (VREV32d8 DPR:$src)>;
def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (VREV32d8 DPR:$src)>;
+ def : Pat<(v8i8 (bitconvert (v4f16 DPR:$src))), (VREV16d8 DPR:$src)>;
def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (VREV16d8 DPR:$src)>;
- def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (VREV64d8 DPR:$src)>;
- def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (VREV32d8 DPR:$src)>;
- def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>;
- def : Pat<(f64 (bitconvert (v4f16 DPR:$src))), (VREV64d16 DPR:$src)>;
- def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>;
- def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (VREV64d8 DPR:$src)>;
- def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>;
- def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (VREV64d32 DPR:$src)>;
- def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>;
- def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>;
- def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (VREV32d8 DPR:$src)>;
// 128 bit conversions
+ def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>;
+ def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>;
+ def : Pat<(v2f64 (bitconvert (v8f16 QPR:$src))), (VREV64q16 QPR:$src)>;
+ def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>;
+ def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (VREV64q8 QPR:$src)>;
+
+ def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>;
def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>;
+ def : Pat<(v2i64 (bitconvert (v8f16 QPR:$src))), (VREV64q16 QPR:$src)>;
def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>;
def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (VREV64q8 QPR:$src)>;
- def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>;
+
+ def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>;
+ def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>;
+ def : Pat<(v4f32 (bitconvert (v8f16 QPR:$src))), (VREV32q16 QPR:$src)>;
+ def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>;
+ def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (VREV32q8 QPR:$src)>;
+
+ def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>;
def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>;
+ def : Pat<(v4i32 (bitconvert (v8f16 QPR:$src))), (VREV32q16 QPR:$src)>;
def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>;
def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (VREV32q8 QPR:$src)>;
- def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>;
+
+ def : Pat<(v8f16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>;
+ def : Pat<(v8f16 (bitconvert (v2i64 QPR:$src))), (VREV64q16 QPR:$src)>;
+ def : Pat<(v8f16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>;
+ def : Pat<(v8f16 (bitconvert (v4i32 QPR:$src))), (VREV32q16 QPR:$src)>;
+ def : Pat<(v8f16 (bitconvert (v16i8 QPR:$src))), (VREV16q8 QPR:$src)>;
+
+ def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>;
def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (VREV64q16 QPR:$src)>;
+ def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>;
def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (VREV32q16 QPR:$src)>;
def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (VREV16q8 QPR:$src)>;
- def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>;
- def : Pat<(v8f16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>;
- def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>;
+
+ def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (VREV64q8 QPR:$src)>;
def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (VREV64q8 QPR:$src)>;
+ def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (VREV32q8 QPR:$src)>;
def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (VREV32q8 QPR:$src)>;
+ def : Pat<(v16i8 (bitconvert (v8f16 QPR:$src))), (VREV16q8 QPR:$src)>;
def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (VREV16q8 QPR:$src)>;
- def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (VREV64q8 QPR:$src)>;
- def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (VREV32q8 QPR:$src)>;
- def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>;
- def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>;
- def : Pat<(v4f32 (bitconvert (v8f16 QPR:$src))), (VREV32q16 QPR:$src)>;
- def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (VREV32q8 QPR:$src)>;
- def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>;
- def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>;
- def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>;
- def : Pat<(v2f64 (bitconvert (v8f16 QPR:$src))), (VREV64q16 QPR:$src)>;
- def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (VREV64q8 QPR:$src)>;
- def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>;
}
// Use VLD1/VST1 + VREV for non-word-aligned v2f64 load/store on Big Endian
+let Predicates = [IsBE,HasNEON] in {
def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)),
- (VREV64q8 (VLD1q8 addrmode6:$addr))>, Requires<[IsBE]>;
+ (VREV64q8 (VLD1q8 addrmode6:$addr))>;
def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
- (VST1q8 addrmode6:$addr, (VREV64q8 QPR:$value))>, Requires<[IsBE]>;
+ (VST1q8 addrmode6:$addr, (VREV64q8 QPR:$value))>;
def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)),
- (VREV64q16 (VLD1q16 addrmode6:$addr))>, Requires<[IsBE]>;
+ (VREV64q16 (VLD1q16 addrmode6:$addr))>;
def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
- (VST1q16 addrmode6:$addr, (VREV64q16 QPR:$value))>, Requires<[IsBE]>;
+ (VST1q16 addrmode6:$addr, (VREV64q16 QPR:$value))>;
+}
// Fold extracting an element out of a v2i32 into a vfp register.
def : Pat<(f32 (bitconvert (i32 (extractelt (v2i32 DPR:$src), imm:$lane)))),
- (f32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>;
+ (f32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>,
+ Requires<[HasNEON]>;
// Vector lengthening move with load, matching extending loads.
@@ -7301,17 +7493,20 @@ multiclass Lengthen_Single<string DestLanes, string DestTy, string SrcTy> {
def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
(!cast<PatFrag>("extloadvi" # SrcTy) addrmode6:$addr)),
(!cast<Instruction>("VMOVLuv" # DestLanes # DestTy)
- (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>;
+ (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>,
+ Requires<[HasNEON]>;
def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
(!cast<PatFrag>("zextloadvi" # SrcTy) addrmode6:$addr)),
(!cast<Instruction>("VMOVLuv" # DestLanes # DestTy)
- (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>;
+ (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>,
+ Requires<[HasNEON]>;
def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
(!cast<PatFrag>("sextloadvi" # SrcTy) addrmode6:$addr)),
(!cast<Instruction>("VMOVLsv" # DestLanes # DestTy)
- (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>;
+ (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>,
+ Requires<[HasNEON]>;
}
}
@@ -7328,17 +7523,20 @@ multiclass Lengthen_HalfSingle<string DestLanes, string DestTy, string SrcTy,
(!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
(VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
- dsub_0)>;
+ dsub_0)>,
+ Requires<[HasNEON]>;
def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
(!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
(VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
- dsub_0)>;
+ dsub_0)>,
+ Requires<[HasNEON]>;
def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
(!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy)
(VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
- dsub_0)>;
+ dsub_0)>,
+ Requires<[HasNEON]>;
}
// The following class definition is basically a copy of the
@@ -7352,19 +7550,22 @@ multiclass Lengthen_HalfSingle_Big_Endian<string DestLanes, string DestTy, strin
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
(!cast<Instruction>("VREV32d" # RevLanes)
(VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
- dsub_0)>;
+ dsub_0)>,
+ Requires<[HasNEON]>;
def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
(!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
(!cast<Instruction>("VREV32d" # RevLanes)
(VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
- dsub_0)>;
+ dsub_0)>,
+ Requires<[HasNEON]>;
def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
(!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy)
(!cast<Instruction>("VREV32d" # RevLanes)
(VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
- dsub_0)>;
+ dsub_0)>,
+ Requires<[HasNEON]>;
}
// extload, zextload and sextload for a lengthening load followed by another
@@ -7386,19 +7587,22 @@ multiclass Lengthen_Double<string DestLanes, string DestTy, string SrcTy,
(!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
(VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
- dsub_0))>;
+ dsub_0))>,
+ Requires<[HasNEON]>;
def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
(!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
(!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
(VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
- dsub_0))>;
+ dsub_0))>,
+ Requires<[HasNEON]>;
def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
(!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
(!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
(VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
- dsub_0))>;
+ dsub_0))>,
+ Requires<[HasNEON]>;
}
// The following class definition is basically a copy of the
@@ -7414,21 +7618,24 @@ multiclass Lengthen_Double_Big_Endian<string DestLanes, string DestTy, string Sr
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
(!cast<Instruction>("VREV32d" # RevLanes)
(VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
- dsub_0))>;
+ dsub_0))>,
+ Requires<[HasNEON]>;
def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
(!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
(!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
(!cast<Instruction>("VREV32d" # RevLanes)
(VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
- dsub_0))>;
+ dsub_0))>,
+ Requires<[HasNEON]>;
def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
(!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
(!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
(!cast<Instruction>("VREV32d" # RevLanes)
(VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
- dsub_0))>;
+ dsub_0))>,
+ Requires<[HasNEON]>;
}
// extload, zextload and sextload for a lengthening load followed by another
@@ -7451,21 +7658,24 @@ multiclass Lengthen_HalfDouble<string DestLanes, string DestTy, string SrcTy,
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
(VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
dsub_0)),
- dsub_0)>;
+ dsub_0)>,
+ Requires<[HasNEON]>;
def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
(!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)),
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
(VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
dsub_0)),
- dsub_0)>;
+ dsub_0)>,
+ Requires<[HasNEON]>;
def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
(!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)),
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
(VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
dsub_0)),
- dsub_0)>;
+ dsub_0)>,
+ Requires<[HasNEON]>;
}
// The following class definition is basically a copy of the
@@ -7482,7 +7692,8 @@ multiclass Lengthen_HalfDouble_Big_Endian<string DestLanes, string DestTy, strin
(!cast<Instruction>("VREV16d8")
(VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
dsub_0)),
- dsub_0)>;
+ dsub_0)>,
+ Requires<[HasNEON]>;
def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
(!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)),
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
@@ -7490,7 +7701,8 @@ multiclass Lengthen_HalfDouble_Big_Endian<string DestLanes, string DestTy, strin
(!cast<Instruction>("VREV16d8")
(VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
dsub_0)),
- dsub_0)>;
+ dsub_0)>,
+ Requires<[HasNEON]>;
def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
(!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)),
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
@@ -7498,14 +7710,15 @@ multiclass Lengthen_HalfDouble_Big_Endian<string DestLanes, string DestTy, strin
(!cast<Instruction>("VREV16d8")
(VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
dsub_0)),
- dsub_0)>;
+ dsub_0)>,
+ Requires<[HasNEON]>;
}
defm : Lengthen_Single<"8", "i16", "8">; // v8i8 -> v8i16
defm : Lengthen_Single<"4", "i32", "16">; // v4i16 -> v4i32
defm : Lengthen_Single<"2", "i64", "32">; // v2i32 -> v2i64
-let Predicates = [IsLE] in {
+let Predicates = [HasNEON,IsLE] in {
defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16
defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32
@@ -7517,7 +7730,7 @@ let Predicates = [IsLE] in {
defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64">;
}
-let Predicates = [IsBE] in {
+let Predicates = [HasNEON,IsBE] in {
defm : Lengthen_HalfSingle_Big_Endian<"4", "i16", "i8", "8", "i16", "8">; // v4i8 -> v4i16
defm : Lengthen_HalfSingle_Big_Endian<"2", "i32", "i16", "4", "i32", "16">; // v2i16 -> v2i32
@@ -7530,7 +7743,7 @@ let Predicates = [IsBE] in {
}
// Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64
-let Predicates = [IsLE] in {
+let Predicates = [HasNEON,IsLE] in {
def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)),
(VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
(VLD1LNd16 addrmode6:$addr,
@@ -7547,7 +7760,7 @@ let Predicates = [IsLE] in {
// The following patterns are basically a copy of the patterns above,
// however with an additional VREV16d instruction to convert data
// loaded by VLD1LN into proper vector format in big endian mode.
-let Predicates = [IsBE] in {
+let Predicates = [HasNEON,IsBE] in {
def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)),
(VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
(!cast<Instruction>("VREV16d8")
@@ -7565,6 +7778,7 @@ let Predicates = [IsBE] in {
(f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>;
}
+let Predicates = [HasNEON] in {
def : Pat<(v2i64 (concat_vectors DPR:$Dn, DPR:$Dm)),
(REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
def : Pat<(v4i32 (concat_vectors DPR:$Dn, DPR:$Dm)),
@@ -7575,6 +7789,9 @@ def : Pat<(v16i8 (concat_vectors DPR:$Dn, DPR:$Dm)),
(REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
def : Pat<(v4f32 (concat_vectors DPR:$Dn, DPR:$Dm)),
(REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
+def : Pat<(v8f16 (concat_vectors DPR:$Dn, DPR:$Dm)),
+ (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
+}
//===----------------------------------------------------------------------===//
// Assembler aliases
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index b20b34eaa6a9..cfeb13c6acb6 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -1,9 +1,8 @@
//===-- ARMInstrThumb.td - Thumb support for ARM -----------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -188,6 +187,19 @@ def t_addrmode_rr : MemOperand,
let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg);
}
+// t_addrmode_rr_sext := reg + reg
+//
+// This is similar to t_addrmode_rr, but uses different heuristics for
+// ldrsb/ldrsh.
+def t_addrmode_rr_sext : MemOperand,
+ ComplexPattern<i32, 2, "SelectThumbAddrModeRRSext", []> {
+ let EncoderMethod = "getThumbAddrModeRegRegOpValue";
+ let PrintMethod = "printThumbAddrModeRROperand";
+ let DecoderMethod = "DecodeThumbAddrModeRR";
+ let ParserMatchClass = t_addrmode_rr_asm_operand;
+ let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg);
+}
+
// t_addrmode_rrs := reg + reg
//
// We use separate scaled versions because the Select* functions need
@@ -651,7 +663,7 @@ let canFoldAsLoad = 1, isReMaterializable = 1, AddedComplexity = 10 in
def tLDRpci : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i,
"ldr", "\t$Rt, $addr",
[(set tGPR:$Rt, (load (ARMWrapper tconstpool:$addr)))]>,
- T1Encoding<{0,1,0,0,1,?}> {
+ T1Encoding<{0,1,0,0,1,?}>, Sched<[WriteLd]> {
// A6.2 & A8.6.59
bits<3> Rt;
bits<8> addr;
@@ -665,7 +677,7 @@ let canFoldAsLoad = 1 in
def tLDRspi : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_sp:$addr), IIC_iLoad_i,
"ldr", "\t$Rt, $addr",
[(set tGPR:$Rt, (load t_addrmode_sp:$addr))]>,
- T1LdStSP<{1,?,?}> {
+ T1LdStSP<{1,?,?}>, Sched<[WriteLd]> {
bits<3> Rt;
bits<8> addr;
let Inst{10-8} = Rt;
@@ -716,39 +728,39 @@ multiclass thumb_st_rr_ri_enc<bits<3> reg_opc, bits<4> imm_opc,
defm tLDR : thumb_ld_rr_ri_enc<0b100, 0b0110, t_addrmode_rr,
t_addrmode_is4, AddrModeT1_4,
IIC_iLoad_r, IIC_iLoad_i, "ldr",
- load>;
+ load>, Sched<[WriteLd]>;
// A8.6.64 & A8.6.61
defm tLDRB : thumb_ld_rr_ri_enc<0b110, 0b0111, t_addrmode_rr,
t_addrmode_is1, AddrModeT1_1,
IIC_iLoad_bh_r, IIC_iLoad_bh_i, "ldrb",
- zextloadi8>;
+ zextloadi8>, Sched<[WriteLd]>;
// A8.6.76 & A8.6.73
defm tLDRH : thumb_ld_rr_ri_enc<0b101, 0b1000, t_addrmode_rr,
t_addrmode_is2, AddrModeT1_2,
IIC_iLoad_bh_r, IIC_iLoad_bh_i, "ldrh",
- zextloadi16>;
+ zextloadi16>, Sched<[WriteLd]>;
let AddedComplexity = 10 in
def tLDRSB : // A8.6.80
- T1pILdStEncode<0b011, (outs tGPR:$Rt), (ins t_addrmode_rr:$addr),
+ T1pILdStEncode<0b011, (outs tGPR:$Rt), (ins t_addrmode_rr_sext:$addr),
AddrModeT1_1, IIC_iLoad_bh_r,
"ldrsb", "\t$Rt, $addr",
- [(set tGPR:$Rt, (sextloadi8 t_addrmode_rr:$addr))]>;
+ [(set tGPR:$Rt, (sextloadi8 t_addrmode_rr_sext:$addr))]>, Sched<[WriteLd]>;
let AddedComplexity = 10 in
def tLDRSH : // A8.6.84
- T1pILdStEncode<0b111, (outs tGPR:$Rt), (ins t_addrmode_rr:$addr),
+ T1pILdStEncode<0b111, (outs tGPR:$Rt), (ins t_addrmode_rr_sext:$addr),
AddrModeT1_2, IIC_iLoad_bh_r,
"ldrsh", "\t$Rt, $addr",
- [(set tGPR:$Rt, (sextloadi16 t_addrmode_rr:$addr))]>;
+ [(set tGPR:$Rt, (sextloadi16 t_addrmode_rr_sext:$addr))]>, Sched<[WriteLd]>;
def tSTRspi : T1pIs<(outs), (ins tGPR:$Rt, t_addrmode_sp:$addr), IIC_iStore_i,
"str", "\t$Rt, $addr",
[(store tGPR:$Rt, t_addrmode_sp:$addr)]>,
- T1LdStSP<{0,?,?}> {
+ T1LdStSP<{0,?,?}>, Sched<[WriteST]> {
bits<3> Rt;
bits<8> addr;
let Inst{10-8} = Rt;
@@ -759,19 +771,19 @@ def tSTRspi : T1pIs<(outs), (ins tGPR:$Rt, t_addrmode_sp:$addr), IIC_iStore_i,
defm tSTR : thumb_st_rr_ri_enc<0b000, 0b0110, t_addrmode_rr,
t_addrmode_is4, AddrModeT1_4,
IIC_iStore_r, IIC_iStore_i, "str",
- store>;
+ store>, Sched<[WriteST]>;
// A8.6.197 & A8.6.195
defm tSTRB : thumb_st_rr_ri_enc<0b010, 0b0111, t_addrmode_rr,
t_addrmode_is1, AddrModeT1_1,
IIC_iStore_bh_r, IIC_iStore_bh_i, "strb",
- truncstorei8>;
+ truncstorei8>, Sched<[WriteST]>;
// A8.6.207 & A8.6.205
defm tSTRH : thumb_st_rr_ri_enc<0b001, 0b1000, t_addrmode_rr,
t_addrmode_is2, AddrModeT1_2,
IIC_iStore_bh_r, IIC_iStore_bh_i, "strh",
- truncstorei16>;
+ truncstorei16>, Sched<[WriteST]>;
//===----------------------------------------------------------------------===//
@@ -799,8 +811,8 @@ def tLDMIA_UPD :
"$Rn = $wb", IIC_iLoad_mu>,
PseudoInstExpansion<(tLDMIA tGPR:$Rn, pred:$p, reglist:$regs)> {
let Size = 2;
- let OutOperandList = (outs GPR:$wb);
- let InOperandList = (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops);
+ let OutOperandList = (outs tGPR:$wb);
+ let InOperandList = (ins tGPR:$Rn, pred:$p, reglist:$regs, variable_ops);
let Pattern = [];
let isCodeGenOnly = 1;
let isPseudo = 1;
@@ -809,7 +821,7 @@ def tLDMIA_UPD :
// There is no non-writeback version of STM for Thumb.
let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
-def tSTMIA_UPD : Thumb1I<(outs GPR:$wb),
+def tSTMIA_UPD : Thumb1I<(outs tGPR:$wb),
(ins tGPR:$Rn, pred:$p, reglist:$regs, variable_ops),
AddrModeNone, 2, IIC_iStore_mu,
"stm${p}\t$Rn!, $regs", "$Rn = $wb", []>,
@@ -831,7 +843,7 @@ let mayLoad = 1, Uses = [SP], Defs = [SP], hasExtraDefRegAllocReq = 1,
def tPOP : T1I<(outs), (ins pred:$p, reglist:$regs, variable_ops),
IIC_iPop,
"pop${p}\t$regs", []>,
- T1Misc<{1,1,0,?,?,?,?}> {
+ T1Misc<{1,1,0,?,?,?,?}>, Sched<[WriteLd]> {
bits<16> regs;
let Inst{8} = regs{15};
let Inst{7-0} = regs{7-0};
@@ -841,7 +853,7 @@ let mayStore = 1, Uses = [SP], Defs = [SP], hasExtraSrcRegAllocReq = 1 in
def tPUSH : T1I<(outs), (ins pred:$p, reglist:$regs, variable_ops),
IIC_iStore_m,
"push${p}\t$regs", []>,
- T1Misc<{0,1,0,?,?,?,?}> {
+ T1Misc<{0,1,0,?,?,?,?}>, Sched<[WriteST]> {
bits<16> regs;
let Inst{8} = regs{14};
let Inst{7-0} = regs{7-0};
@@ -1202,7 +1214,7 @@ def tMUL : // A8.6.105 T1
Thumb1sI<(outs tGPR:$Rd), (ins tGPR:$Rn, tGPR:$Rm), AddrModeNone, 2,
IIC_iMUL32, "mul", "\t$Rd, $Rn, $Rm", "$Rm = $Rd",
[(set tGPR:$Rd, (mul tGPR:$Rn, tGPR:$Rm))]>,
- T1DataProcessing<0b1101> {
+ T1DataProcessing<0b1101>, Sched<[WriteMUL32, ReadMUL, ReadMUL]> {
bits<3> Rd;
bits<3> Rn;
let Inst{5-3} = Rn;
@@ -1499,12 +1511,13 @@ def tInt_eh_sjlj_setjmp : ThumbXI<(outs),(ins tGPR:$src, tGPR:$val),
// FIXME: Non-IOS version(s)
let isBarrier = 1, hasSideEffects = 1, isTerminator = 1, isCodeGenOnly = 1,
Defs = [ R7, LR, SP ] in
-def tInt_eh_sjlj_longjmp : XI<(outs), (ins GPR:$src, GPR:$scratch),
+def tInt_eh_sjlj_longjmp : XI<(outs), (ins tGPR:$src, tGPR:$scratch),
AddrModeNone, 0, IndexModeNone,
Pseudo, NoItinerary, "", "",
- [(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>,
+ [(ARMeh_sjlj_longjmp tGPR:$src, tGPR:$scratch)]>,
Requires<[IsThumb,IsNotWindows]>;
+// (Windows is Thumb2-only)
let isBarrier = 1, hasSideEffects = 1, isTerminator = 1, isCodeGenOnly = 1,
Defs = [ R11, LR, SP ] in
def tInt_WIN_eh_sjlj_longjmp
@@ -1599,16 +1612,16 @@ def : T1Pat<(extloadi16 t_addrmode_rr:$addr), (tLDRHr t_addrmode_rr:$addr)>;
// and expand it just after ISel.
let usesCustomInserter = 1, mayLoad =1,
Constraints = "$Rn = $Rn_wb,@earlyclobber $Rn_wb" in
- def tLDR_postidx: tPseudoInst<(outs rGPR:$Rt, rGPR:$Rn_wb),
- (ins rGPR:$Rn, pred:$p),
+ def tLDR_postidx: tPseudoInst<(outs tGPR:$Rt, tGPR:$Rn_wb),
+ (ins tGPR:$Rn, pred:$p),
4, IIC_iStore_ru,
[]>;
// post-inc STR -> STM r0!, {r1}. The layout of this (because it doesn't def
// multiple registers) is the same in ISel as MachineInstr, so there's no need
// for a pseudo.
-def : T1Pat<(post_store rGPR:$Rt, rGPR:$Rn, 4),
- (tSTMIA_UPD rGPR:$Rn, rGPR:$Rt)>;
+def : T1Pat<(post_store tGPR:$Rt, tGPR:$Rn, 4),
+ (tSTMIA_UPD tGPR:$Rn, tGPR:$Rt)>;
// If it's impossible to use [r,r] address mode for sextload, select to
// ldr{b|h} + sxt{b|h} instead.
@@ -1677,9 +1690,9 @@ def : T1Pat<(i32 imm256_510:$src),
// be expanded into two instructions late to allow if-conversion and
// scheduling.
let isReMaterializable = 1 in
-def tLDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp),
+def tLDRpci_pic : PseudoInst<(outs tGPR:$dst), (ins i32imm:$addr, pclabel:$cp),
NoItinerary,
- [(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)),
+ [(set tGPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)),
imm:$cp))]>,
Requires<[IsThumb, IsThumb1Only]>;
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 7a6673b49d57..7cbfaba7a8eb 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -1,9 +1,8 @@
//===-- ARMInstrThumb2.td - Thumb2 support for ARM ---------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -26,6 +25,7 @@ def it_mask_asmoperand : AsmOperandClass { let Name = "ITMask"; }
def it_mask : Operand<i32> {
let PrintMethod = "printThumbITMask";
let ParserMatchClass = it_mask_asmoperand;
+ let EncoderMethod = "getITMaskOpValue";
}
// t2_shift_imm: An integer that encodes a shift amount and the type of shift
@@ -40,6 +40,16 @@ def t2_shift_imm : Operand<i32> {
let DecoderMethod = "DecodeT2ShifterImmOperand";
}
+def mve_shift_imm : AsmOperandClass {
+ let Name = "MVELongShift";
+ let RenderMethod = "addImmOperands";
+ let DiagnosticString = "operand must be an immediate in the range [1,32]";
+}
+def long_shift : Operand<i32> {
+ let ParserMatchClass = mve_shift_imm;
+ let DecoderMethod = "DecodeLongShiftOperand";
+}
+
// Shifted operands. No register controlled shifts for Thumb2.
// Note: We do not support rrx shifted operands yet.
def t2_so_reg : Operand<i32>, // reg imm
@@ -151,6 +161,26 @@ def lo5AllOne : PatLeaf<(i32 imm), [{
// Define Thumb2 specific addressing modes.
+// t2_addr_offset_none := reg
+def MemNoOffsetT2AsmOperand
+ : AsmOperandClass { let Name = "MemNoOffsetT2"; }
+def t2_addr_offset_none : MemOperand {
+ let PrintMethod = "printAddrMode7Operand";
+ let DecoderMethod = "DecodeGPRnopcRegisterClass";
+ let ParserMatchClass = MemNoOffsetT2AsmOperand;
+ let MIOperandInfo = (ops GPRnopc:$base);
+}
+
+// t2_nosp_addr_offset_none := reg
+def MemNoOffsetT2NoSpAsmOperand
+ : AsmOperandClass { let Name = "MemNoOffsetT2NoSp"; }
+def t2_nosp_addr_offset_none : MemOperand {
+ let PrintMethod = "printAddrMode7Operand";
+ let DecoderMethod = "DecoderGPRRegisterClass";
+ let ParserMatchClass = MemNoOffsetT2NoSpAsmOperand;
+ let MIOperandInfo = (ops rGPR:$base);
+}
+
// t2addrmode_imm12 := reg + imm12
def t2addrmode_imm12_asmoperand : AsmOperandClass {let Name="MemUImm12Offset";}
def t2addrmode_imm12 : MemOperand,
@@ -182,31 +212,40 @@ def t2adrlabel : Operand<i32> {
}
// t2addrmode_posimm8 := reg + imm8
-def MemPosImm8OffsetAsmOperand : AsmOperandClass {let Name="MemPosImm8Offset";}
+def MemPosImm8OffsetAsmOperand : AsmOperandClass {
+ let Name="MemPosImm8Offset";
+ let RenderMethod = "addMemImmOffsetOperands";
+}
def t2addrmode_posimm8 : MemOperand {
let PrintMethod = "printT2AddrModeImm8Operand<false>";
- let EncoderMethod = "getT2AddrModeImm8OpValue";
+ let EncoderMethod = "getT2AddrModeImmOpValue<8,0>";
let DecoderMethod = "DecodeT2AddrModeImm8";
let ParserMatchClass = MemPosImm8OffsetAsmOperand;
let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
}
// t2addrmode_negimm8 := reg - imm8
-def MemNegImm8OffsetAsmOperand : AsmOperandClass {let Name="MemNegImm8Offset";}
+def MemNegImm8OffsetAsmOperand : AsmOperandClass {
+ let Name="MemNegImm8Offset";
+ let RenderMethod = "addMemImmOffsetOperands";
+}
def t2addrmode_negimm8 : MemOperand,
ComplexPattern<i32, 2, "SelectT2AddrModeImm8", []> {
let PrintMethod = "printT2AddrModeImm8Operand<false>";
- let EncoderMethod = "getT2AddrModeImm8OpValue";
+ let EncoderMethod = "getT2AddrModeImmOpValue<8,0>";
let DecoderMethod = "DecodeT2AddrModeImm8";
let ParserMatchClass = MemNegImm8OffsetAsmOperand;
let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
}
// t2addrmode_imm8 := reg +/- imm8
-def MemImm8OffsetAsmOperand : AsmOperandClass { let Name = "MemImm8Offset"; }
+def MemImm8OffsetAsmOperand : AsmOperandClass {
+ let Name = "MemImm8Offset";
+ let RenderMethod = "addMemImmOffsetOperands";
+}
class T2AddrMode_Imm8 : MemOperand,
ComplexPattern<i32, 2, "SelectT2AddrModeImm8", []> {
- let EncoderMethod = "getT2AddrModeImm8OpValue";
+ let EncoderMethod = "getT2AddrModeImmOpValue<8,0>";
let DecoderMethod = "DecodeT2AddrModeImm8";
let ParserMatchClass = MemImm8OffsetAsmOperand;
let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
@@ -248,10 +287,38 @@ def t2addrmode_imm8s4_pre : T2AddrMode_Imm8s4 {
def t2am_imm8s4_offset_asmoperand : AsmOperandClass { let Name = "Imm8s4"; }
def t2am_imm8s4_offset : MemOperand {
let PrintMethod = "printT2AddrModeImm8s4OffsetOperand";
- let EncoderMethod = "getT2Imm8s4OpValue";
+ let EncoderMethod = "getT2ScaledImmOpValue<8,2>";
let DecoderMethod = "DecodeT2Imm8S4";
}
+// t2addrmode_imm7s4 := reg +/- (imm7 << 2)
+def MemImm7s4OffsetAsmOperand : AsmOperandClass {let Name = "MemImm7s4Offset";}
+class T2AddrMode_Imm7s4 : MemOperand {
+ let EncoderMethod = "getT2AddrModeImm7s4OpValue";
+ let DecoderMethod = "DecodeT2AddrModeImm7<2,0>";
+ let ParserMatchClass = MemImm7s4OffsetAsmOperand;
+ let MIOperandInfo = (ops GPRnopc:$base, i32imm:$offsimm);
+}
+
+def t2addrmode_imm7s4 : T2AddrMode_Imm7s4 {
+ // They are printed the same way as the imm8 version
+ let PrintMethod = "printT2AddrModeImm8s4Operand<false>";
+}
+
+def t2addrmode_imm7s4_pre : T2AddrMode_Imm7s4 {
+ // They are printed the same way as the imm8 version
+ let PrintMethod = "printT2AddrModeImm8s4Operand<true>";
+}
+
+def t2am_imm7s4_offset_asmoperand : AsmOperandClass { let Name = "Imm7s4"; }
+def t2am_imm7s4_offset : MemOperand {
+ // They are printed the same way as the imm8 version
+ let PrintMethod = "printT2AddrModeImm8s4OffsetOperand";
+ let ParserMatchClass = t2am_imm7s4_offset_asmoperand;
+ let EncoderMethod = "getT2ScaledImmOpValue<7,2>";
+ let DecoderMethod = "DecodeT2Imm7S4";
+}
+
// t2addrmode_imm0_1020s4 := reg + (imm8 << 2)
def MemImm0_1020s4OffsetAsmOperand : AsmOperandClass {
let Name = "MemImm0_1020s4Offset";
@@ -290,6 +357,75 @@ def addrmode_tbh : MemOperand {
let MIOperandInfo = (ops GPR:$Rn, rGPR:$Rm);
}
+// Define ARMv8.1-M specific addressing modes.
+
+// Label operands for BF/BFL/WLS/DLS/LE
+class BFLabelOp<string signed, string isNeg, string zeroPermitted, string size,
+ string fixup>
+ : Operand<OtherVT> {
+ let EncoderMethod = !strconcat("getBFTargetOpValue<", isNeg, ", ",
+ fixup, ">");
+ let OperandType = "OPERAND_PCREL";
+ let DecoderMethod = !strconcat("DecodeBFLabelOperand<", signed, ", ",
+ isNeg, ", ", zeroPermitted, ", ", size, ">");
+}
+def bflabel_u4 : BFLabelOp<"false", "false", "false", "4", "ARM::fixup_bf_branch">;
+def bflabel_s12 : BFLabelOp<"true", "false", "true", "12", "ARM::fixup_bfc_target">;
+def bflabel_s16 : BFLabelOp<"true", "false", "true", "16", "ARM::fixup_bf_target">;
+def bflabel_s18 : BFLabelOp<"true", "false", "true", "18", "ARM::fixup_bfl_target">;
+
+def wlslabel_u11_asmoperand : AsmOperandClass {
+ let Name = "WLSLabel";
+ let RenderMethod = "addImmOperands";
+ let PredicateMethod = "isUnsignedOffset<11, 1>";
+ let DiagnosticString =
+ "loop end is out of range or not a positive multiple of 2";
+}
+def wlslabel_u11 : BFLabelOp<"false", "false", "true", "11", "ARM::fixup_wls"> {
+ let ParserMatchClass = wlslabel_u11_asmoperand;
+}
+def lelabel_u11_asmoperand : AsmOperandClass {
+ let Name = "LELabel";
+ let RenderMethod = "addImmOperands";
+ let PredicateMethod = "isLEOffset";
+ let DiagnosticString =
+ "loop start is out of range or not a negative multiple of 2";
+}
+def lelabel_u11 : BFLabelOp<"false", "true", "true", "11", "ARM::fixup_le"> {
+ let ParserMatchClass = lelabel_u11_asmoperand;
+}
+
+def bfafter_target : Operand<OtherVT> {
+ let EncoderMethod = "getBFAfterTargetOpValue";
+ let OperandType = "OPERAND_PCREL";
+ let DecoderMethod = "DecodeBFAfterTargetOperand";
+}
+
+// pred operand excluding AL
+def pred_noal_asmoperand : AsmOperandClass {
+ let Name = "CondCodeNoAL";
+ let RenderMethod = "addITCondCodeOperands";
+ let PredicateMethod = "isITCondCodeNoAL";
+ let ParserMethod = "parseITCondCode";
+}
+def pred_noal : Operand<i32> {
+ let PrintMethod = "printMandatoryPredicateOperand";
+ let ParserMatchClass = pred_noal_asmoperand;
+ let DecoderMethod = "DecodePredNoALOperand";
+}
+
+
+// CSEL aliases inverted predicate
+def pred_noal_inv_asmoperand : AsmOperandClass {
+ let Name = "CondCodeNoALInv";
+ let RenderMethod = "addITCondCodeInvOperands";
+ let PredicateMethod = "isITCondCodeNoAL";
+ let ParserMethod = "parseITCondCode";
+}
+def pred_noal_inv : Operand<i32> {
+ let PrintMethod = "printMandatoryInvertedPredicateOperand";
+ let ParserMatchClass = pred_noal_inv_asmoperand;
+}
//===----------------------------------------------------------------------===//
// Multiclass helpers...
//
@@ -604,6 +740,17 @@ multiclass T2I_bin_irs<bits<4> opcod, string opc,
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
+ let Inst{15} = 0b0;
+ // In most of these instructions, and most versions of the Arm
+ // architecture, bit 15 of this encoding is listed as (0) rather
+ // than 0, i.e. setting it to 1 is UNPREDICTABLE or a soft-fail
+ // rather than a hard failure. In v8.1-M, this requirement is
+ // upgraded to a hard one for ORR, so that the encodings with 1
+ // in this bit can be reused for other instructions (such as
+ // CSEL). Setting Unpredictable{15} = 1 here would reintroduce
+ // that encoding clash in the auto- generated MC decoder, so I
+ // comment it out.
+ let Unpredictable{15} = !if(!eq(opcod, 0b0010), 0b0, 0b1);
let Inst{14-12} = 0b000; // imm3
let Inst{7-6} = 0b00; // imm2
let Inst{5-4} = 0b00; // type
@@ -617,6 +764,8 @@ multiclass T2I_bin_irs<bits<4> opcod, string opc,
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
+ let Inst{15} = 0;
+ let Unpredictable{15} = !if(!eq(opcod, 0b0010), 0b0, 0b1); // see above
}
// Assembly aliases for optional destination operand when it's the same
// as the source operand.
@@ -880,6 +1029,7 @@ multiclass T2I_sh_ir<bits<2> opcod, string opc, Operand ty, SDNode opnode> {
let Inst{31-27} = 0b11101;
let Inst{26-21} = 0b010010;
let Inst{19-16} = 0b1111; // Rn
+ let Inst{15} = 0b0;
let Inst{5-4} = opcod;
}
// register
@@ -923,15 +1073,15 @@ multiclass T2I_sh_ir<bits<2> opcod, string opc, Operand ty, SDNode opnode> {
/// T2I_cmp_irs - Defines a set of (op r, {so_imm|r|so_reg}) cmp / test
/// patterns. Similar to T2I_bin_irs except the instruction does not produce
/// a explicit result, only implicitly set CPSR.
-multiclass T2I_cmp_irs<bits<4> opcod, string opc,
+multiclass T2I_cmp_irs<bits<4> opcod, string opc, RegisterClass LHSGPR,
InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
SDPatternOperator opnode> {
let isCompare = 1, Defs = [CPSR] in {
// shifted imm
def ri : T2OneRegCmpImm<
- (outs), (ins GPRnopc:$Rn, t2_so_imm:$imm), iii,
+ (outs), (ins LHSGPR:$Rn, t2_so_imm:$imm), iii,
opc, ".w\t$Rn, $imm",
- [(opnode GPRnopc:$Rn, t2_so_imm:$imm)]>, Sched<[WriteCMP]> {
+ [(opnode LHSGPR:$Rn, t2_so_imm:$imm)]>, Sched<[WriteCMP]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
let Inst{24-21} = opcod;
@@ -941,9 +1091,9 @@ let isCompare = 1, Defs = [CPSR] in {
}
// register
def rr : T2TwoRegCmp<
- (outs), (ins GPRnopc:$Rn, rGPR:$Rm), iir,
+ (outs), (ins LHSGPR:$Rn, rGPR:$Rm), iir,
opc, ".w\t$Rn, $Rm",
- [(opnode GPRnopc:$Rn, rGPR:$Rm)]>, Sched<[WriteCMP]> {
+ [(opnode LHSGPR:$Rn, rGPR:$Rm)]>, Sched<[WriteCMP]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
@@ -955,9 +1105,9 @@ let isCompare = 1, Defs = [CPSR] in {
}
// shifted register
def rs : T2OneRegCmpShiftedReg<
- (outs), (ins GPRnopc:$Rn, t2_so_reg:$ShiftedRm), iis,
+ (outs), (ins LHSGPR:$Rn, t2_so_reg:$ShiftedRm), iis,
opc, ".w\t$Rn, $ShiftedRm",
- [(opnode GPRnopc:$Rn, t2_so_reg:$ShiftedRm)]>,
+ [(opnode LHSGPR:$Rn, t2_so_reg:$ShiftedRm)]>,
Sched<[WriteCMPsi]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
@@ -971,9 +1121,9 @@ let isCompare = 1, Defs = [CPSR] in {
// No alias here for 'rr' version as not all instantiations of this
// multiclass want one (CMP in particular, does not).
def : t2InstAlias<!strconcat(opc, "${p}", " $Rn, $imm"),
- (!cast<Instruction>(NAME#"ri") GPRnopc:$Rn, t2_so_imm:$imm, pred:$p)>;
+ (!cast<Instruction>(NAME#"ri") LHSGPR:$Rn, t2_so_imm:$imm, pred:$p)>;
def : t2InstAlias<!strconcat(opc, "${p}", " $Rn, $shift"),
- (!cast<Instruction>(NAME#"rs") GPRnopc:$Rn, t2_so_reg:$shift, pred:$p)>;
+ (!cast<Instruction>(NAME#"rs") LHSGPR:$Rn, t2_so_reg:$shift, pred:$p)>;
}
/// T2I_ld - Defines a set of (op r, {imm12|imm8|so_reg}) load patterns.
@@ -1334,7 +1484,8 @@ def t2LDRB_PRE : T2Ipreldst<0, 0b00, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
def t2LDRB_POST : T2Ipostldst<0, 0b00, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
(ins addr_offset_none:$Rn, t2am_imm8_offset:$offset),
AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
- "ldrb", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>;
+ "ldrb", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>,
+ Sched<[WriteLd]>;
def t2LDRH_PRE : T2Ipreldst<0, 0b01, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
(ins t2addrmode_imm8_pre:$addr),
@@ -1872,6 +2023,7 @@ def t2MOVr : T2sTwoReg<(outs GPRnopc:$Rd), (ins GPRnopc:$Rm), IIC_iMOVr,
let Inst{26-25} = 0b01;
let Inst{24-21} = 0b0010;
let Inst{19-16} = 0b1111; // Rn
+ let Inst{15} = 0b0;
let Inst{14-12} = 0b000;
let Inst{7-4} = 0b0000;
}
@@ -2148,6 +2300,11 @@ def : T2Pat<(add GPR:$src, imm0_4095_neg:$imm),
def : T2Pat<(add GPR:$src, imm0_65535_neg:$imm),
(t2SUBrr GPR:$src, (t2MOVi16 (imm_neg_XFORM imm:$imm)))>;
+// Do the same for v8m targets since they support movw with a 16-bit value.
+def : T1Pat<(add tGPR:$src, imm0_65535_neg:$imm),
+ (tSUBrr tGPR:$src, (t2MOVi16 (imm_neg_XFORM imm:$imm)))>,
+ Requires<[HasV8MBaseline]>;
+
let AddedComplexity = 1 in
def : T2Pat<(ARMaddc rGPR:$src, imm1_255_neg:$imm),
(t2SUBSri rGPR:$src, imm1_255_neg:$imm)>;
@@ -2327,14 +2484,14 @@ class T2SatI<dag iops, string opc, string asm>
def t2SSAT: T2SatI<(ins imm1_32:$sat_imm, rGPR:$Rn, t2_shift_imm:$sh),
"ssat", "\t$Rd, $sat_imm, $Rn$sh">,
- Requires<[IsThumb2]> {
+ Requires<[IsThumb2]>, Sched<[WriteALU]> {
let Inst{23-22} = 0b00;
let Inst{5} = 0;
}
def t2SSAT16: T2SatI<(ins imm1_16:$sat_imm, rGPR:$Rn),
"ssat16", "\t$Rd, $sat_imm, $Rn">,
- Requires<[IsThumb2, HasDSP]> {
+ Requires<[IsThumb2, HasDSP]>, Sched<[WriteALU]> {
let Inst{23-22} = 0b00;
let sh = 0b100000;
let Inst{4} = 0;
@@ -2342,13 +2499,13 @@ def t2SSAT16: T2SatI<(ins imm1_16:$sat_imm, rGPR:$Rn),
def t2USAT: T2SatI<(ins imm0_31:$sat_imm, rGPR:$Rn, t2_shift_imm:$sh),
"usat", "\t$Rd, $sat_imm, $Rn$sh">,
- Requires<[IsThumb2]> {
+ Requires<[IsThumb2]>, Sched<[WriteALU]> {
let Inst{23-22} = 0b10;
}
def t2USAT16: T2SatI<(ins imm0_15:$sat_imm, rGPR:$Rn),
"usat16", "\t$Rd, $sat_imm, $Rn">,
- Requires<[IsThumb2, HasDSP]> {
+ Requires<[IsThumb2, HasDSP]>, Sched<[WriteALU]> {
let Inst{23-22} = 0b10;
let sh = 0b100000;
let Inst{4} = 0;
@@ -2395,6 +2552,8 @@ def t2RRX : T2sTwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
let Inst{26-25} = 0b01;
let Inst{24-21} = 0b0010;
let Inst{19-16} = 0b1111; // Rn
+ let Inst{15} = 0b0;
+ let Unpredictable{15} = 0b1;
let Inst{14-12} = 0b000;
let Inst{7-4} = 0b0011;
}
@@ -2472,7 +2631,7 @@ class T2TwoRegBitFI<dag oops, dag iops, InstrItinClass itin,
let Constraints = "$src = $Rd" in
def t2BFC : T2BitFI<(outs rGPR:$Rd), (ins rGPR:$src, bf_inv_mask_imm:$imm),
IIC_iUNAsi, "bfc", "\t$Rd, $imm",
- [(set rGPR:$Rd, (and rGPR:$src, bf_inv_mask_imm:$imm))]> {
+ [(set rGPR:$Rd, (and rGPR:$src, bf_inv_mask_imm:$imm))]>, Sched<[WriteALU]> {
let Inst{31-27} = 0b11110;
let Inst{26} = 0; // should be 0.
let Inst{25} = 1;
@@ -2488,7 +2647,7 @@ def t2BFC : T2BitFI<(outs rGPR:$Rd), (ins rGPR:$src, bf_inv_mask_imm:$imm),
def t2SBFX: T2TwoRegBitFI<
(outs rGPR:$Rd), (ins rGPR:$Rn, imm0_31:$lsb, imm1_32:$msb),
- IIC_iUNAsi, "sbfx", "\t$Rd, $Rn, $lsb, $msb", []> {
+ IIC_iUNAsi, "sbfx", "\t$Rd, $Rn, $lsb, $msb", []>, Sched<[WriteALU]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 1;
let Inst{24-20} = 0b10100;
@@ -2497,7 +2656,7 @@ def t2SBFX: T2TwoRegBitFI<
def t2UBFX: T2TwoRegBitFI<
(outs rGPR:$Rd), (ins rGPR:$Rn, imm0_31:$lsb, imm1_32:$msb),
- IIC_iUNAsi, "ubfx", "\t$Rd, $Rn, $lsb, $msb", []> {
+ IIC_iUNAsi, "ubfx", "\t$Rd, $Rn, $lsb, $msb", []>, Sched<[WriteALU]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 1;
let Inst{24-20} = 0b11100;
@@ -2523,7 +2682,7 @@ let Constraints = "$src = $Rd" in {
(ins rGPR:$src, rGPR:$Rn, bf_inv_mask_imm:$imm),
IIC_iBITi, "bfi", "\t$Rd, $Rn, $imm",
[(set rGPR:$Rd, (ARMbfi rGPR:$src, rGPR:$Rn,
- bf_inv_mask_imm:$imm))]> {
+ bf_inv_mask_imm:$imm))]>, Sched<[WriteALU]> {
let Inst{31-27} = 0b11110;
let Inst{26} = 0; // should be 0.
let Inst{25} = 1;
@@ -2597,7 +2756,8 @@ def : T2Pat<(and rGPR:$src, t2_so_imm_not:$imm),
// top16Zero - answer true if the upper 16 bits of $src are 0, false otherwise
def top16Zero: PatLeaf<(i32 rGPR:$src), [{
- return CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 16));
+ return !SDValue(N,0)->getValueType(0).isVector() &&
+ CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 16));
}]>;
// so_imm_notSext is needed instead of so_imm_not, as the value of imm
@@ -3054,7 +3214,7 @@ def t2CRC32CW : T2I_crc32<1, 0b10, "cw", int_arm_crc32cw>;
//===----------------------------------------------------------------------===//
// Comparison Instructions...
//
-defm t2CMP : T2I_cmp_irs<0b1101, "cmp",
+defm t2CMP : T2I_cmp_irs<0b1101, "cmp", GPRnopc,
IIC_iCMPi, IIC_iCMPr, IIC_iCMPsi, ARMcmp>;
def : T2Pat<(ARMcmpZ GPRnopc:$lhs, t2_so_imm:$imm),
@@ -3122,10 +3282,10 @@ def : T2Pat<(ARMcmp GPR:$src, t2_so_imm_neg:$imm),
def : T2Pat<(ARMcmpZ GPRnopc:$src, t2_so_imm_neg:$imm),
(t2CMNri GPRnopc:$src, t2_so_imm_neg:$imm)>;
-defm t2TST : T2I_cmp_irs<0b0000, "tst",
+defm t2TST : T2I_cmp_irs<0b0000, "tst", rGPR,
IIC_iTSTi, IIC_iTSTr, IIC_iTSTsi,
BinOpFrag<(ARMcmpZ (and_su node:$LHS, node:$RHS), 0)>>;
-defm t2TEQ : T2I_cmp_irs<0b0100, "teq",
+defm t2TEQ : T2I_cmp_irs<0b0100, "teq", rGPR,
IIC_iTSTi, IIC_iTSTr, IIC_iTSTsi,
BinOpFrag<(ARMcmpZ (xor_su node:$LHS, node:$RHS), 0)>>;
@@ -3277,17 +3437,17 @@ def t2LDREXB : T2I_ldrex<0b0100, (outs rGPR:$Rt), (ins addr_offset_none:$addr),
AddrModeNone, 4, NoItinerary,
"ldrexb", "\t$Rt, $addr", "",
[(set rGPR:$Rt, (ldrex_1 addr_offset_none:$addr))]>,
- Requires<[IsThumb, HasV8MBaseline]>;
+ Requires<[IsThumb, HasV8MBaseline]>, Sched<[WriteLd]>;
def t2LDREXH : T2I_ldrex<0b0101, (outs rGPR:$Rt), (ins addr_offset_none:$addr),
AddrModeNone, 4, NoItinerary,
"ldrexh", "\t$Rt, $addr", "",
[(set rGPR:$Rt, (ldrex_2 addr_offset_none:$addr))]>,
- Requires<[IsThumb, HasV8MBaseline]>;
+ Requires<[IsThumb, HasV8MBaseline]>, Sched<[WriteLd]>;
def t2LDREX : Thumb2I<(outs rGPR:$Rt), (ins t2addrmode_imm0_1020s4:$addr),
AddrModeT2_ldrex, 4, NoItinerary,
"ldrex", "\t$Rt, $addr", "",
[(set rGPR:$Rt, (ldrex_4 t2addrmode_imm0_1020s4:$addr))]>,
- Requires<[IsThumb, HasV8MBaseline]> {
+ Requires<[IsThumb, HasV8MBaseline]>, Sched<[WriteLd]> {
bits<4> Rt;
bits<12> addr;
let Inst{31-27} = 0b11101;
@@ -3303,7 +3463,7 @@ def t2LDREXD : T2I_ldrex<0b0111, (outs rGPR:$Rt, rGPR:$Rt2),
AddrModeNone, 4, NoItinerary,
"ldrexd", "\t$Rt, $Rt2, $addr", "",
[], {?, ?, ?, ?}>,
- Requires<[IsThumb2, IsNotMClass]> {
+ Requires<[IsThumb2, IsNotMClass]>, Sched<[WriteLd]> {
bits<4> Rt2;
let Inst{11-8} = Rt2;
}
@@ -3311,17 +3471,17 @@ def t2LDAEXB : T2I_ldrex<0b1100, (outs rGPR:$Rt), (ins addr_offset_none:$addr),
AddrModeNone, 4, NoItinerary,
"ldaexb", "\t$Rt, $addr", "",
[(set rGPR:$Rt, (ldaex_1 addr_offset_none:$addr))]>,
- Requires<[IsThumb, HasAcquireRelease, HasV7Clrex]>;
+ Requires<[IsThumb, HasAcquireRelease, HasV7Clrex]>, Sched<[WriteLd]>;
def t2LDAEXH : T2I_ldrex<0b1101, (outs rGPR:$Rt), (ins addr_offset_none:$addr),
AddrModeNone, 4, NoItinerary,
"ldaexh", "\t$Rt, $addr", "",
[(set rGPR:$Rt, (ldaex_2 addr_offset_none:$addr))]>,
- Requires<[IsThumb, HasAcquireRelease, HasV7Clrex]>;
+ Requires<[IsThumb, HasAcquireRelease, HasV7Clrex]>, Sched<[WriteLd]>;
def t2LDAEX : Thumb2I<(outs rGPR:$Rt), (ins addr_offset_none:$addr),
AddrModeNone, 4, NoItinerary,
"ldaex", "\t$Rt, $addr", "",
[(set rGPR:$Rt, (ldaex_4 addr_offset_none:$addr))]>,
- Requires<[IsThumb, HasAcquireRelease, HasV7Clrex]> {
+ Requires<[IsThumb, HasAcquireRelease, HasV7Clrex]>, Sched<[WriteLd]> {
bits<4> Rt;
bits<4> addr;
let Inst{31-27} = 0b11101;
@@ -3337,7 +3497,7 @@ def t2LDAEXD : T2I_ldrex<0b1111, (outs rGPR:$Rt, rGPR:$Rt2),
AddrModeNone, 4, NoItinerary,
"ldaexd", "\t$Rt, $Rt2, $addr", "",
[], {?, ?, ?, ?}>, Requires<[IsThumb,
- HasAcquireRelease, HasV7Clrex, IsNotMClass]> {
+ HasAcquireRelease, HasV7Clrex, IsNotMClass]>, Sched<[WriteLd]> {
bits<4> Rt2;
let Inst{11-8} = Rt2;
@@ -3352,14 +3512,14 @@ def t2STREXB : T2I_strex<0b0100, (outs rGPR:$Rd),
"strexb", "\t$Rd, $Rt, $addr", "",
[(set rGPR:$Rd,
(strex_1 rGPR:$Rt, addr_offset_none:$addr))]>,
- Requires<[IsThumb, HasV8MBaseline]>;
+ Requires<[IsThumb, HasV8MBaseline]>, Sched<[WriteST]>;
def t2STREXH : T2I_strex<0b0101, (outs rGPR:$Rd),
(ins rGPR:$Rt, addr_offset_none:$addr),
AddrModeNone, 4, NoItinerary,
"strexh", "\t$Rd, $Rt, $addr", "",
[(set rGPR:$Rd,
(strex_2 rGPR:$Rt, addr_offset_none:$addr))]>,
- Requires<[IsThumb, HasV8MBaseline]>;
+ Requires<[IsThumb, HasV8MBaseline]>, Sched<[WriteST]>;
def t2STREX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt,
t2addrmode_imm0_1020s4:$addr),
@@ -3367,7 +3527,7 @@ def t2STREX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt,
"strex", "\t$Rd, $Rt, $addr", "",
[(set rGPR:$Rd,
(strex_4 rGPR:$Rt, t2addrmode_imm0_1020s4:$addr))]>,
- Requires<[IsThumb, HasV8MBaseline]> {
+ Requires<[IsThumb, HasV8MBaseline]>, Sched<[WriteST]> {
bits<4> Rd;
bits<4> Rt;
bits<12> addr;
@@ -3384,7 +3544,7 @@ def t2STREXD : T2I_strex<0b0111, (outs rGPR:$Rd),
AddrModeNone, 4, NoItinerary,
"strexd", "\t$Rd, $Rt, $Rt2, $addr", "", [],
{?, ?, ?, ?}>,
- Requires<[IsThumb2, IsNotMClass]> {
+ Requires<[IsThumb2, IsNotMClass]>, Sched<[WriteST]> {
bits<4> Rt2;
let Inst{11-8} = Rt2;
}
@@ -3395,7 +3555,7 @@ def t2STLEXB : T2I_strex<0b1100, (outs rGPR:$Rd),
[(set rGPR:$Rd,
(stlex_1 rGPR:$Rt, addr_offset_none:$addr))]>,
Requires<[IsThumb, HasAcquireRelease,
- HasV7Clrex]>;
+ HasV7Clrex]>, Sched<[WriteST]>;
def t2STLEXH : T2I_strex<0b1101, (outs rGPR:$Rd),
(ins rGPR:$Rt, addr_offset_none:$addr),
@@ -3404,7 +3564,7 @@ def t2STLEXH : T2I_strex<0b1101, (outs rGPR:$Rd),
[(set rGPR:$Rd,
(stlex_2 rGPR:$Rt, addr_offset_none:$addr))]>,
Requires<[IsThumb, HasAcquireRelease,
- HasV7Clrex]>;
+ HasV7Clrex]>, Sched<[WriteST]>;
def t2STLEX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt,
addr_offset_none:$addr),
@@ -3412,7 +3572,8 @@ def t2STLEX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt,
"stlex", "\t$Rd, $Rt, $addr", "",
[(set rGPR:$Rd,
(stlex_4 rGPR:$Rt, addr_offset_none:$addr))]>,
- Requires<[IsThumb, HasAcquireRelease, HasV7Clrex]> {
+ Requires<[IsThumb, HasAcquireRelease, HasV7Clrex]>,
+ Sched<[WriteST]> {
bits<4> Rd;
bits<4> Rt;
bits<4> addr;
@@ -3429,7 +3590,7 @@ def t2STLEXD : T2I_strex<0b1111, (outs rGPR:$Rd),
AddrModeNone, 4, NoItinerary,
"stlexd", "\t$Rd, $Rt, $Rt2, $addr", "", [],
{?, ?, ?, ?}>, Requires<[IsThumb, HasAcquireRelease,
- HasV7Clrex, IsNotMClass]> {
+ HasV7Clrex, IsNotMClass]>, Sched<[WriteST]> {
bits<4> Rt2;
let Inst{11-8} = Rt2;
}
@@ -4547,9 +4708,9 @@ def : t2InstAlias<"sub${s}${p} $Rdn, $ShiftedRm",
def : t2InstAlias<"cmn${p} $Rn, $Rm",
(t2CMNzrr GPRnopc:$Rn, rGPR:$Rm, pred:$p)>;
def : t2InstAlias<"teq${p} $Rn, $Rm",
- (t2TEQrr GPRnopc:$Rn, rGPR:$Rm, pred:$p)>;
+ (t2TEQrr rGPR:$Rn, rGPR:$Rm, pred:$p)>;
def : t2InstAlias<"tst${p} $Rn, $Rm",
- (t2TSTrr GPRnopc:$Rn, rGPR:$Rm, pred:$p)>;
+ (t2TSTrr rGPR:$Rn, rGPR:$Rm, pred:$p)>;
// Memory barriers
def : InstAlias<"dmb${p}", (t2DMB 0xf, pred:$p), 0>, Requires<[HasDB]>;
@@ -4888,3 +5049,227 @@ def : t2InstAlias<"pld${p} $addr",
def : InstAlias<"pli${p} $addr",
(t2PLIpci t2ldr_pcrel_imm12:$addr, pred:$p), 0>,
Requires<[IsThumb2,HasV7]>;
+
+
+//===----------------------------------------------------------------------===//
+// ARMv8.1m instructions
+//
+
+class V8_1MI<dag oops, dag iops, AddrMode am, InstrItinClass itin, string asm,
+ string ops, string cstr, list<dag> pattern>
+ : Thumb2XI<oops, iops, am, 4, itin, !strconcat(asm, "\t", ops), cstr,
+ pattern>,
+ Requires<[HasV8_1MMainline]>;
+
+def t2CLRM : V8_1MI<(outs),
+ (ins pred:$p, reglist_with_apsr:$regs, variable_ops),
+ AddrModeNone, NoItinerary, "clrm", "${p}\t$regs", "", []> {
+ bits<16> regs;
+
+ let Inst{31-16} = 0b1110100010011111;
+ let Inst{15-14} = regs{15-14};
+ let Inst{13} = 0b0;
+ let Inst{12-0} = regs{12-0};
+}
+
+class t2BF<dag iops, string asm, string ops>
+ : V8_1MI<(outs ), iops, AddrModeNone, NoItinerary, asm, ops, "", []> {
+
+ let Inst{31-27} = 0b11110;
+ let Inst{15-14} = 0b11;
+ let Inst{12} = 0b0;
+ let Inst{0} = 0b1;
+
+ let Predicates = [IsThumb2, HasV8_1MMainline, HasLOB];
+}
+
+def t2BF_LabelPseudo
+ : t2PseudoInst<(outs ), (ins pclabel:$cp), 0, NoItinerary, []> {
+ let isTerminator = 1;
+ let Predicates = [IsThumb2, HasV8_1MMainline, HasLOB];
+}
+
+def t2BFi : t2BF<(ins bflabel_u4:$b_label, bflabel_s16:$label, pred:$p),
+ !strconcat("bf", "${p}"), "$b_label, $label"> {
+ bits<4> b_label;
+ bits<16> label;
+
+ let Inst{26-23} = b_label{3-0};
+ let Inst{22-21} = 0b10;
+ let Inst{20-16} = label{15-11};
+ let Inst{13} = 0b1;
+ let Inst{11} = label{0};
+ let Inst{10-1} = label{10-1};
+}
+
+def t2BFic : t2BF<(ins bflabel_u4:$b_label, bflabel_s12:$label,
+ bfafter_target:$ba_label, pred_noal:$bcond), "bfcsel",
+ "$b_label, $label, $ba_label, $bcond"> {
+ bits<4> bcond;
+ bits<12> label;
+ bits<1> ba_label;
+ bits<4> b_label;
+
+ let Inst{26-23} = b_label{3-0};
+ let Inst{22} = 0b0;
+ let Inst{21-18} = bcond{3-0};
+ let Inst{17} = ba_label{0};
+ let Inst{16} = label{11};
+ let Inst{13} = 0b1;
+ let Inst{11} = label{0};
+ let Inst{10-1} = label{10-1};
+}
+
+def t2BFr : t2BF<(ins bflabel_u4:$b_label, rGPR:$Rn, pred:$p),
+ !strconcat("bfx", "${p}"), "$b_label, $Rn"> {
+ bits<4> b_label;
+ bits<4> Rn;
+
+ let Inst{26-23} = b_label{3-0};
+ let Inst{22-20} = 0b110;
+ let Inst{19-16} = Rn{3-0};
+ let Inst{13-1} = 0b1000000000000;
+}
+
+def t2BFLi : t2BF<(ins bflabel_u4:$b_label, bflabel_s18:$label, pred:$p),
+ !strconcat("bfl", "${p}"), "$b_label, $label"> {
+ bits<4> b_label;
+ bits<18> label;
+
+ let Inst{26-23} = b_label{3-0};
+ let Inst{22-16} = label{17-11};
+ let Inst{13} = 0b0;
+ let Inst{11} = label{0};
+ let Inst{10-1} = label{10-1};
+}
+
+def t2BFLr : t2BF<(ins bflabel_u4:$b_label, rGPR:$Rn, pred:$p),
+ !strconcat("bflx", "${p}"), "$b_label, $Rn"> {
+ bits<4> b_label;
+ bits<4> Rn;
+
+ let Inst{26-23} = b_label{3-0};
+ let Inst{22-20} = 0b111;
+ let Inst{19-16} = Rn{3-0};
+ let Inst{13-1} = 0b1000000000000;
+}
+
+class t2LOL<dag oops, dag iops, string asm, string ops>
+ : V8_1MI<oops, iops, AddrModeNone, NoItinerary, asm, ops, "", [] > {
+ let Inst{31-23} = 0b111100000;
+ let Inst{15-14} = 0b11;
+ let Inst{0} = 0b1;
+ let isBranch = 1;
+ let isTerminator = 1;
+ let DecoderMethod = "DecodeLOLoop";
+ let Predicates = [IsThumb2, HasV8_1MMainline, HasLOB];
+}
+
+let isNotDuplicable = 1 in {
+def t2WLS : t2LOL<(outs GPRlr:$LR),
+ (ins rGPR:$Rn, wlslabel_u11:$label),
+ "wls", "$LR, $Rn, $label"> {
+ bits<4> Rn;
+ bits<11> label;
+ let Inst{22-20} = 0b100;
+ let Inst{19-16} = Rn{3-0};
+ let Inst{13-12} = 0b00;
+ let Inst{11} = label{0};
+ let Inst{10-1} = label{10-1};
+ let usesCustomInserter = 1;
+}
+
+def t2DLS : t2LOL<(outs GPRlr:$LR), (ins rGPR:$Rn),
+ "dls", "$LR, $Rn"> {
+ bits<4> Rn;
+ let isBranch = 0;
+ let isTerminator = 0;
+ let Inst{22-20} = 0b100;
+ let Inst{19-16} = Rn{3-0};
+ let Inst{13-1} = 0b1000000000000;
+ let usesCustomInserter = 1;
+}
+
+def t2LEUpdate : t2LOL<(outs GPRlr:$LRout),
+ (ins GPRlr:$LRin, lelabel_u11:$label),
+ "le", "$LRin, $label"> {
+ bits<11> label;
+ let Inst{22-16} = 0b0001111;
+ let Inst{13-12} = 0b00;
+ let Inst{11} = label{0};
+ let Inst{10-1} = label{10-1};
+ let usesCustomInserter = 1;
+}
+
+def t2LE : t2LOL<(outs ), (ins lelabel_u11:$label), "le", "$label"> {
+ bits<11> label;
+ let Inst{22-16} = 0b0101111;
+ let Inst{13-12} = 0b00;
+ let Inst{11} = label{0};
+ let Inst{10-1} = label{10-1};
+}
+
+def t2DoLoopStart :
+ t2PseudoInst<(outs), (ins rGPR:$elts), 4, IIC_Br,
+ [(int_set_loop_iterations rGPR:$elts)]>, Sched<[WriteBr]>;
+
+def t2LoopDec :
+ t2PseudoInst<(outs GPRlr:$Rm), (ins GPRlr:$Rn, imm0_7:$size),
+ 4, IIC_Br, []>, Sched<[WriteBr]>;
+
+let isBranch = 1, isTerminator = 1, hasSideEffects = 1 in {
+def t2WhileLoopStart :
+ t2PseudoInst<(outs),
+ (ins rGPR:$elts, brtarget:$target),
+ 4, IIC_Br, []>,
+ Sched<[WriteBr]>;
+
+def t2LoopEnd :
+ t2PseudoInst<(outs), (ins GPRlr:$elts, brtarget:$target),
+ 8, IIC_Br, []>, Sched<[WriteBr]>;
+
+} // end isBranch, isTerminator, hasSideEffects
+
+} // end isNotDuplicable
+
+class CS<string iname, bits<4> opcode, list<dag> pattern=[]>
+ : V8_1MI<(outs rGPR:$Rd), (ins GPRwithZR:$Rn, GPRwithZRnosp:$Rm, pred_noal:$fcond),
+ AddrModeNone, NoItinerary, iname, "$Rd, $Rn, $Rm, $fcond", "", pattern> {
+ bits<4> Rd;
+ bits<4> Rm;
+ bits<4> Rn;
+ bits<4> fcond;
+
+ let Inst{31-20} = 0b111010100101;
+ let Inst{19-16} = Rn{3-0};
+ let Inst{15-12} = opcode;
+ let Inst{11-8} = Rd{3-0};
+ let Inst{7-4} = fcond{3-0};
+ let Inst{3-0} = Rm{3-0};
+
+ let Uses = [CPSR];
+}
+
+def t2CSEL : CS<"csel", 0b1000>;
+def t2CSINC : CS<"csinc", 0b1001>;
+def t2CSINV : CS<"csinv", 0b1010>;
+def t2CSNEG : CS<"csneg", 0b1011>;
+
+
+// CS aliases.
+let Predicates = [HasV8_1MMainline] in {
+ def : InstAlias<"csetm\t$Rd, $fcond",
+ (t2CSINV rGPR:$Rd, ZR, ZR, pred_noal_inv:$fcond)>;
+
+ def : InstAlias<"cset\t$Rd, $fcond",
+ (t2CSINC rGPR:$Rd, ZR, ZR, pred_noal_inv:$fcond)>;
+
+ def : InstAlias<"cinc\t$Rd, $Rn, $fcond",
+ (t2CSINC rGPR:$Rd, GPRwithZRnosp:$Rn, GPRwithZRnosp:$Rn, pred_noal_inv:$fcond)>;
+
+ def : InstAlias<"cinv\t$Rd, $Rn, $fcond",
+ (t2CSINV rGPR:$Rd, GPRwithZRnosp:$Rn, GPRwithZRnosp:$Rn, pred_noal_inv:$fcond)>;
+
+ def : InstAlias<"cneg\t$Rd, $Rn, $fcond",
+ (t2CSNEG rGPR:$Rd, GPRwithZRnosp:$Rn, GPRwithZRnosp:$Rn, pred_noal_inv:$fcond)>;
+}
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index b58730c452f7..a0dd25de07ee 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -1,9 +1,8 @@
//===-- ARMInstrVFP.td - VFP support for ARM ---------------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -53,28 +52,50 @@ def vfp_f16imm : Operand<f16>,
let ParserMatchClass = FPImmOperand;
}
-def vfp_f32imm : Operand<f32>,
- PatLeaf<(f32 fpimm), [{
- return ARM_AM::getFP32Imm(N->getValueAPF()) != -1;
- }], SDNodeXForm<fpimm, [{
+def vfp_f32imm_xform : SDNodeXForm<fpimm, [{
APFloat InVal = N->getValueAPF();
uint32_t enc = ARM_AM::getFP32Imm(InVal);
return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32);
- }]>> {
+ }]>;
+
+def gi_vfp_f32imm : GICustomOperandRenderer<"renderVFPF32Imm">,
+ GISDNodeXFormEquiv<vfp_f32imm_xform>;
+
+def vfp_f32imm : Operand<f32>,
+ PatLeaf<(f32 fpimm), [{
+ return ARM_AM::getFP32Imm(N->getValueAPF()) != -1;
+ }], vfp_f32imm_xform> {
let PrintMethod = "printFPImmOperand";
let ParserMatchClass = FPImmOperand;
+ let GISelPredicateCode = [{
+ const auto &MO = MI.getOperand(1);
+ if (!MO.isFPImm())
+ return false;
+ return ARM_AM::getFP32Imm(MO.getFPImm()->getValueAPF()) != -1;
+ }];
}
-def vfp_f64imm : Operand<f64>,
- PatLeaf<(f64 fpimm), [{
- return ARM_AM::getFP64Imm(N->getValueAPF()) != -1;
- }], SDNodeXForm<fpimm, [{
+def vfp_f64imm_xform : SDNodeXForm<fpimm, [{
APFloat InVal = N->getValueAPF();
uint32_t enc = ARM_AM::getFP64Imm(InVal);
return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32);
- }]>> {
+ }]>;
+
+def gi_vfp_f64imm : GICustomOperandRenderer<"renderVFPF64Imm">,
+ GISDNodeXFormEquiv<vfp_f64imm_xform>;
+
+def vfp_f64imm : Operand<f64>,
+ PatLeaf<(f64 fpimm), [{
+ return ARM_AM::getFP64Imm(N->getValueAPF()) != -1;
+ }], vfp_f64imm_xform> {
let PrintMethod = "printFPImmOperand";
let ParserMatchClass = FPImmOperand;
+ let GISelPredicateCode = [{
+ const auto &MO = MI.getOperand(1);
+ if (!MO.isFPImm())
+ return false;
+ return ARM_AM::getFP64Imm(MO.getFPImm()->getValueAPF()) != -1;
+ }];
}
def alignedload16 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
@@ -120,39 +141,45 @@ let canFoldAsLoad = 1, isReMaterializable = 1 in {
def VLDRD : ADI5<0b1101, 0b01, (outs DPR:$Dd), (ins addrmode5:$addr),
IIC_fpLoad64, "vldr", "\t$Dd, $addr",
- [(set DPR:$Dd, (f64 (alignedload32 addrmode5:$addr)))]>;
+ [(set DPR:$Dd, (f64 (alignedload32 addrmode5:$addr)))]>,
+ Requires<[HasFPRegs]>;
def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$Sd), (ins addrmode5:$addr),
IIC_fpLoad32, "vldr", "\t$Sd, $addr",
- [(set SPR:$Sd, (alignedload32 addrmode5:$addr))]> {
+ [(set SPR:$Sd, (alignedload32 addrmode5:$addr))]>,
+ Requires<[HasFPRegs]> {
// Some single precision VFP instructions may be executed on both NEON and VFP
// pipelines.
let D = VFPNeonDomain;
}
+let isUnpredicable = 1 in
def VLDRH : AHI5<0b1101, 0b01, (outs HPR:$Sd), (ins addrmode5fp16:$addr),
IIC_fpLoad16, "vldr", ".16\t$Sd, $addr",
[(set HPR:$Sd, (alignedload16 addrmode5fp16:$addr))]>,
- Requires<[HasFullFP16]>;
+ Requires<[HasFPRegs16]>;
} // End of 'let canFoldAsLoad = 1, isReMaterializable = 1 in'
def VSTRD : ADI5<0b1101, 0b00, (outs), (ins DPR:$Dd, addrmode5:$addr),
IIC_fpStore64, "vstr", "\t$Dd, $addr",
- [(alignedstore32 (f64 DPR:$Dd), addrmode5:$addr)]>;
+ [(alignedstore32 (f64 DPR:$Dd), addrmode5:$addr)]>,
+ Requires<[HasFPRegs]>;
def VSTRS : ASI5<0b1101, 0b00, (outs), (ins SPR:$Sd, addrmode5:$addr),
IIC_fpStore32, "vstr", "\t$Sd, $addr",
- [(alignedstore32 SPR:$Sd, addrmode5:$addr)]> {
+ [(alignedstore32 SPR:$Sd, addrmode5:$addr)]>,
+ Requires<[HasFPRegs]> {
// Some single precision VFP instructions may be executed on both NEON and VFP
// pipelines.
let D = VFPNeonDomain;
}
+let isUnpredicable = 1 in
def VSTRH : AHI5<0b1101, 0b00, (outs), (ins HPR:$Sd, addrmode5fp16:$addr),
IIC_fpStore16, "vstr", ".16\t$Sd, $addr",
[(alignedstore16 HPR:$Sd, addrmode5fp16:$addr)]>,
- Requires<[HasFullFP16]>;
+ Requires<[HasFPRegs16]>;
//===----------------------------------------------------------------------===//
// Load / store multiple Instructions.
@@ -160,6 +187,7 @@ def VSTRH : AHI5<0b1101, 0b00, (outs), (ins HPR:$Sd, addrmode5fp16:$addr),
multiclass vfp_ldst_mult<string asm, bit L_bit,
InstrItinClass itin, InstrItinClass itin_upd> {
+ let Predicates = [HasFPRegs] in {
// Double Precision
def DIA :
AXDI4<(outs), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops),
@@ -227,6 +255,7 @@ multiclass vfp_ldst_mult<string asm, bit L_bit,
// VFP pipelines.
let D = VFPNeonDomain;
}
+ }
}
let hasSideEffects = 0 in {
@@ -273,13 +302,13 @@ def VLSTM : AXSI4<(outs), (ins GPRnopc:$Rn, pred:$p), IndexModeNone,
}
def : InstAlias<"vpush${p} $r", (VSTMDDB_UPD SP, pred:$p, dpr_reglist:$r), 0>,
- Requires<[HasVFP2]>;
+ Requires<[HasFPRegs]>;
def : InstAlias<"vpush${p} $r", (VSTMSDB_UPD SP, pred:$p, spr_reglist:$r), 0>,
- Requires<[HasVFP2]>;
+ Requires<[HasFPRegs]>;
def : InstAlias<"vpop${p} $r", (VLDMDIA_UPD SP, pred:$p, dpr_reglist:$r), 0>,
- Requires<[HasVFP2]>;
+ Requires<[HasFPRegs]>;
def : InstAlias<"vpop${p} $r", (VLDMSIA_UPD SP, pred:$p, spr_reglist:$r), 0>,
- Requires<[HasVFP2]>;
+ Requires<[HasFPRegs]>;
defm : VFPDTAnyInstAlias<"vpush${p}", "$r",
(VSTMSDB_UPD SP, pred:$p, spr_reglist:$r)>;
defm : VFPDTAnyInstAlias<"vpush${p}", "$r",
@@ -295,6 +324,7 @@ defm : VFPDTAnyInstAlias<"vpop${p}", "$r",
// However, there is no UAL syntax for them, so we keep them around for
// (dis)assembly only.
multiclass vfp_ldstx_mult<string asm, bit L_bit> {
+ let Predicates = [HasFPRegs] in {
// Unknown precision
def XIA :
AXXI4<(outs), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops),
@@ -317,6 +347,7 @@ multiclass vfp_ldstx_mult<string asm, bit L_bit> {
let Inst{21} = 1; // Writeback
let Inst{20} = L_bit;
}
+ }
}
defm FLDM : vfp_ldstx_mult<"fldm", 1>;
@@ -452,7 +483,7 @@ def VNMULH : AHbI<0b11100, 0b10, 1, 0,
multiclass vsel_inst<string op, bits<2> opc, int CC> {
let DecoderNamespace = "VFPV8", PostEncoderMethod = "",
- Uses = [CPSR], AddedComplexity = 4 in {
+ Uses = [CPSR], AddedComplexity = 4, isUnpredicable = 1 in {
def H : AHbInp<0b11100, opc, 0,
(outs HPR:$Sd), (ins HPR:$Sn, HPR:$Sm),
NoItinerary, !strconcat("vsel", op, ".f16\t$Sd, $Sn, $Sm"),
@@ -480,7 +511,8 @@ defm VSELEQ : vsel_inst<"eq", 0b00, 0>;
defm VSELVS : vsel_inst<"vs", 0b01, 6>;
multiclass vmaxmin_inst<string op, bit opc, SDNode SD> {
- let DecoderNamespace = "VFPV8", PostEncoderMethod = "" in {
+ let DecoderNamespace = "VFPV8", PostEncoderMethod = "",
+ isUnpredicable = 1 in {
def H : AHbInp<0b11101, 0b00, opc,
(outs HPR:$Sd), (ins HPR:$Sn, HPR:$Sm),
NoItinerary, !strconcat(op, ".f16\t$Sd, $Sn, $Sm"),
@@ -501,8 +533,8 @@ multiclass vmaxmin_inst<string op, bit opc, SDNode SD> {
}
}
-defm VMAXNM : vmaxmin_inst<"vmaxnm", 0, fmaxnum>;
-defm VMINNM : vmaxmin_inst<"vminnm", 1, fminnum>;
+defm VFP_VMAXNM : vmaxmin_inst<"vmaxnm", 0, fmaxnum>;
+defm VFP_VMINNM : vmaxmin_inst<"vminnm", 1, fminnum>;
// Match reassociated forms only if not sign dependent rounding.
def : Pat<(fmul (fneg DPR:$a), (f64 DPR:$b)),
@@ -571,9 +603,9 @@ def VABSS : ASuIn<0b11101, 0b11, 0b0000, 0b11, 0,
}
def VABSH : AHuI<0b11101, 0b11, 0b0000, 0b11, 0,
- (outs SPR:$Sd), (ins SPR:$Sm),
+ (outs HPR:$Sd), (ins HPR:$Sm),
IIC_fpUNA16, "vabs", ".f16\t$Sd, $Sm",
- []>;
+ [(set HPR:$Sd, (fabs (f16 HPR:$Sm)))]>;
let Defs = [FPSCR_NZCV] in {
def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0,
@@ -682,8 +714,8 @@ def VCVTBHS: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
Requires<[HasFP16]>,
Sched<[WriteFPCVT]>;
-def : FullFP16Pat<(f32 (fpextend HPR:$Sm)),
- (VCVTBHS (COPY_TO_REGCLASS HPR:$Sm, SPR))>;
+def : FP16Pat<(f32 (fpextend HPR:$Sm)),
+ (VCVTBHS (COPY_TO_REGCLASS HPR:$Sm, SPR))>;
def : FP16Pat<(f16_to_fp GPR:$a),
(VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>;
@@ -693,8 +725,8 @@ def VCVTBSH: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
Requires<[HasFP16]>,
Sched<[WriteFPCVT]>;
-def : FullFP16Pat<(f16 (fpround SPR:$Sm)),
- (COPY_TO_REGCLASS (VCVTBSH SPR:$Sm), HPR)>;
+def : FP16Pat<(f16 (fpround SPR:$Sm)),
+ (COPY_TO_REGCLASS (VCVTBSH SPR:$Sm), HPR)>;
def : FP16Pat<(fp_to_f16 SPR:$a),
(i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>;
@@ -825,7 +857,7 @@ multiclass vcvt_inst<string opc, bits<2> rm,
let Inst{17-16} = rm;
- // Encode instruction operands
+ // Encode instruction operands.
let Inst{3-0} = Dm{3-0};
let Inst{5} = Dm{4};
let Inst{8} = 1;
@@ -906,9 +938,9 @@ def VNEGH : AHuI<0b11101, 0b11, 0b0001, 0b01, 0,
multiclass vrint_inst_zrx<string opc, bit op, bit op2, SDPatternOperator node> {
def H : AHuI<0b11101, 0b11, 0b0110, 0b11, 0,
- (outs SPR:$Sd), (ins SPR:$Sm),
+ (outs HPR:$Sd), (ins HPR:$Sm),
NoItinerary, !strconcat("vrint", opc), ".f16\t$Sd, $Sm",
- []>,
+ [(set (f16 HPR:$Sd), (node (f16 HPR:$Sm)))]>,
Requires<[HasFullFP16]> {
let Inst{7} = op2;
let Inst{16} = op;
@@ -948,11 +980,12 @@ defm VRINTX : vrint_inst_zrx<"x", 1, 0, frint>;
multiclass vrint_inst_anpm<string opc, bits<2> rm,
SDPatternOperator node = null_frag> {
- let PostEncoderMethod = "", DecoderNamespace = "VFPV8" in {
+ let PostEncoderMethod = "", DecoderNamespace = "VFPV8",
+ isUnpredicable = 1 in {
def H : AHuInp<0b11101, 0b11, 0b1000, 0b01, 0,
- (outs SPR:$Sd), (ins SPR:$Sm),
+ (outs HPR:$Sd), (ins HPR:$Sm),
NoItinerary, !strconcat("vrint", opc, ".f16\t$Sd, $Sm"),
- []>,
+ [(set (f16 HPR:$Sd), (node (f16 HPR:$Sm)))]>,
Requires<[HasFullFP16]> {
let Inst{17-16} = rm;
}
@@ -998,22 +1031,24 @@ def VSQRTS : ASuI<0b11101, 0b11, 0b0001, 0b11, 0,
Sched<[WriteFPSQRT32]>;
def VSQRTH : AHuI<0b11101, 0b11, 0b0001, 0b11, 0,
- (outs SPR:$Sd), (ins SPR:$Sm),
+ (outs HPR:$Sd), (ins HPR:$Sm),
IIC_fpSQRT16, "vsqrt", ".f16\t$Sd, $Sm",
- []>;
+ [(set HPR:$Sd, (fsqrt (f16 HPR:$Sm)))]>;
let hasSideEffects = 0 in {
let isMoveReg = 1 in {
def VMOVD : ADuI<0b11101, 0b11, 0b0000, 0b01, 0,
(outs DPR:$Dd), (ins DPR:$Dm),
- IIC_fpUNA64, "vmov", ".f64\t$Dd, $Dm", []>;
+ IIC_fpUNA64, "vmov", ".f64\t$Dd, $Dm", []>,
+ Requires<[HasFPRegs64]>;
def VMOVS : ASuI<0b11101, 0b11, 0b0000, 0b01, 0,
(outs SPR:$Sd), (ins SPR:$Sm),
- IIC_fpUNA32, "vmov", ".f32\t$Sd, $Sm", []>;
+ IIC_fpUNA32, "vmov", ".f32\t$Sd, $Sm", []>,
+ Requires<[HasFPRegs]>;
} // isMoveReg
-let PostEncoderMethod = "", DecoderNamespace = "VFPV8" in {
+let PostEncoderMethod = "", DecoderNamespace = "VFPV8", isUnpredicable = 1 in {
def VMOVH : ASuInp<0b11101, 0b11, 0b0000, 0b01, 0,
(outs SPR:$Sd), (ins SPR:$Sm),
IIC_fpUNA16, "vmovx.f16\t$Sd, $Sm", []>,
@@ -1035,6 +1070,7 @@ def VMOVRS : AVConv2I<0b11100001, 0b1010,
(outs GPR:$Rt), (ins SPR:$Sn),
IIC_fpMOVSI, "vmov", "\t$Rt, $Sn",
[(set GPR:$Rt, (bitconvert SPR:$Sn))]>,
+ Requires<[HasFPRegs]>,
Sched<[WriteFPMOV]> {
// Instruction operands.
bits<4> Rt;
@@ -1058,7 +1094,7 @@ def VMOVSR : AVConv4I<0b11100000, 0b1010,
(outs SPR:$Sn), (ins GPR:$Rt),
IIC_fpMOVIS, "vmov", "\t$Sn, $Rt",
[(set SPR:$Sn, (bitconvert GPR:$Rt))]>,
- Requires<[HasVFP2, UseVMOVSR]>,
+ Requires<[HasFPRegs, UseVMOVSR]>,
Sched<[WriteFPMOV]> {
// Instruction operands.
bits<5> Sn;
@@ -1084,6 +1120,7 @@ def VMOVRRD : AVConv3I<0b11000101, 0b1011,
(outs GPR:$Rt, GPR:$Rt2), (ins DPR:$Dm),
IIC_fpMOVDI, "vmov", "\t$Rt, $Rt2, $Dm",
[(set GPR:$Rt, GPR:$Rt2, (arm_fmrrd DPR:$Dm))]>,
+ Requires<[HasFPRegs]>,
Sched<[WriteFPMOV]> {
// Instruction operands.
bits<5> Dm;
@@ -1112,6 +1149,7 @@ def VMOVRRS : AVConv3I<0b11000101, 0b1010,
(outs GPR:$Rt, GPR:$Rt2), (ins SPR:$src1, SPR:$src2),
IIC_fpMOVDI, "vmov", "\t$Rt, $Rt2, $src1, $src2",
[/* For disassembly only; pattern left blank */]>,
+ Requires<[HasFPRegs]>,
Sched<[WriteFPMOV]> {
bits<5> src1;
bits<4> Rt;
@@ -1139,6 +1177,7 @@ def VMOVDRR : AVConv5I<0b11000100, 0b1011,
(outs DPR:$Dm), (ins GPR:$Rt, GPR:$Rt2),
IIC_fpMOVID, "vmov", "\t$Dm, $Rt, $Rt2",
[(set DPR:$Dm, (arm_fmdrr GPR:$Rt, GPR:$Rt2))]>,
+ Requires<[HasFPRegs]>,
Sched<[WriteFPMOV]> {
// Instruction operands.
bits<5> Dm;
@@ -1183,6 +1222,7 @@ def VMOVSRR : AVConv5I<0b11000100, 0b1010,
(outs SPR:$dst1, SPR:$dst2), (ins GPR:$src1, GPR:$src2),
IIC_fpMOVID, "vmov", "\t$dst1, $dst2, $src1, $src2",
[/* For disassembly only; pattern left blank */]>,
+ Requires<[HasFPRegs]>,
Sched<[WriteFPMOV]> {
// Instruction operands.
bits<5> dst1;
@@ -1206,10 +1246,10 @@ def VMOVSRR : AVConv5I<0b11000100, 0b1010,
// Move H->R, clearing top 16 bits
def VMOVRH : AVConv2I<0b11100001, 0b1001,
- (outs GPR:$Rt), (ins HPR:$Sn),
+ (outs rGPR:$Rt), (ins HPR:$Sn),
IIC_fpMOVSI, "vmov", ".f16\t$Rt, $Sn",
- [(set GPR:$Rt, (arm_vmovrh HPR:$Sn))]>,
- Requires<[HasFullFP16]>,
+ [(set rGPR:$Rt, (arm_vmovrh HPR:$Sn))]>,
+ Requires<[HasFPRegs16]>,
Sched<[WriteFPMOV]> {
// Instruction operands.
bits<4> Rt;
@@ -1222,14 +1262,16 @@ def VMOVRH : AVConv2I<0b11100001, 0b1001,
let Inst{6-5} = 0b00;
let Inst{3-0} = 0b0000;
+
+ let isUnpredicable = 1;
}
// Move R->H, clearing top 16 bits
def VMOVHR : AVConv4I<0b11100000, 0b1001,
- (outs HPR:$Sn), (ins GPR:$Rt),
+ (outs HPR:$Sn), (ins rGPR:$Rt),
IIC_fpMOVIS, "vmov", ".f16\t$Sn, $Rt",
- [(set HPR:$Sn, (arm_vmovhr GPR:$Rt))]>,
- Requires<[HasFullFP16]>,
+ [(set HPR:$Sn, (arm_vmovhr rGPR:$Rt))]>,
+ Requires<[HasFPRegs16]>,
Sched<[WriteFPMOV]> {
// Instruction operands.
bits<5> Sn;
@@ -1242,6 +1284,8 @@ def VMOVHR : AVConv4I<0b11100000, 0b1001,
let Inst{6-5} = 0b00;
let Inst{3-0} = 0b0000;
+
+ let isUnpredicable = 1;
}
// FMRDH: SPR -> GPR
@@ -1348,6 +1392,7 @@ def VSITOH : AVConv1IHs_Encode<0b11101, 0b11, 0b1000, 0b1001,
[]>,
Sched<[WriteFPCVT]> {
let Inst{7} = 1; // s32
+ let isUnpredicable = 1;
}
def : VFPNoNEONPat<(f16 (sint_to_fp GPR:$a)),
@@ -1393,6 +1438,7 @@ def VUITOH : AVConv1IHs_Encode<0b11101, 0b11, 0b1000, 0b1001,
[]>,
Sched<[WriteFPCVT]> {
let Inst{7} = 0; // u32
+ let isUnpredicable = 1;
}
def : VFPNoNEONPat<(f16 (uint_to_fp GPR:$a)),
@@ -1497,6 +1543,7 @@ def VTOSIZH : AVConv1IsH_Encode<0b11101, 0b11, 0b1101, 0b1001,
[]>,
Sched<[WriteFPCVT]> {
let Inst{7} = 1; // Z bit
+ let isUnpredicable = 1;
}
def : VFPNoNEONPat<(i32 (fp_to_sint HPR:$a)),
@@ -1543,6 +1590,7 @@ def VTOUIZH : AVConv1IsH_Encode<0b11101, 0b11, 0b1100, 0b1001,
[]>,
Sched<[WriteFPCVT]> {
let Inst{7} = 1; // Z bit
+ let isUnpredicable = 1;
}
def : VFPNoNEONPat<(i32 (fp_to_uint HPR:$a)),
@@ -1572,6 +1620,7 @@ def VTOSIRH : AVConv1IsH_Encode<0b11101, 0b11, 0b1101, 0b1001,
[]>,
Sched<[WriteFPCVT]> {
let Inst{7} = 0; // Z bit
+ let isUnpredicable = 1;
}
def VTOUIRD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011,
@@ -1596,6 +1645,7 @@ def VTOUIRH : AVConv1IsH_Encode<0b11101, 0b11, 0b1100, 0b1001,
[]>,
Sched<[WriteFPCVT]> {
let Inst{7} = 0; // Z bit
+ let isUnpredicable = 1;
}
}
@@ -1643,6 +1693,8 @@ class AVConv1XInsD_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4,
let Predicates = [HasVFP2, HasDPVFP];
}
+let isUnpredicable = 1 in {
+
def VTOSHH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1001, 0,
(outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
IIC_fpCVTHI, "vcvt", ".s16.f16\t$dst, $a, $fbits", []>,
@@ -1667,6 +1719,8 @@ def VTOULH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1001, 1,
Requires<[HasFullFP16]>,
Sched<[WriteFPCVT]>;
+} // End of 'let isUnpredicable = 1 in'
+
def VTOSHS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1010, 0,
(outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits", []>,
@@ -1722,6 +1776,8 @@ def VTOULD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1111, 0b1011, 1,
// Fixed-Point to FP:
+let isUnpredicable = 1 in {
+
def VSHTOH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1001, 0,
(outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
IIC_fpCVTIH, "vcvt", ".f16.s16\t$dst, $a, $fbits", []>,
@@ -1746,6 +1802,8 @@ def VULTOH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1001, 1,
Requires<[HasFullFP16]>,
Sched<[WriteFPCVT]>;
+} // End of 'let isUnpredicable = 1 in'
+
def VSHTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1010, 0,
(outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits", []>,
@@ -2030,6 +2088,9 @@ def : Pat<(f64 (fma DPR:$Dn, DPR:$Dm, DPR:$Ddin)),
def : Pat<(f32 (fma SPR:$Sn, SPR:$Sm, SPR:$Sdin)),
(VFMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
+def : Pat<(f16 (fma HPR:$Sn, HPR:$Sm, HPR:$Sdin)),
+ (VFMAH HPR:$Sdin, HPR:$Sn, HPR:$Sm)>,
+ Requires<[HasFullFP16]>;
def VFMSD : ADbI<0b11101, 0b10, 1, 0,
(outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
@@ -2208,13 +2269,13 @@ def VMOVDcc : PseudoInst<(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm, cmovpred:$p),
IIC_fpUNA64,
[(set (f64 DPR:$Dd),
(ARMcmov DPR:$Dn, DPR:$Dm, cmovpred:$p))]>,
- RegConstraint<"$Dn = $Dd">, Requires<[HasVFP2,HasDPVFP]>;
+ RegConstraint<"$Dn = $Dd">, Requires<[HasFPRegs64]>;
def VMOVScc : PseudoInst<(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm, cmovpred:$p),
IIC_fpUNA32,
[(set (f32 SPR:$Sd),
(ARMcmov SPR:$Sn, SPR:$Sm, cmovpred:$p))]>,
- RegConstraint<"$Sn = $Sd">, Requires<[HasVFP2]>;
+ RegConstraint<"$Sn = $Sd">, Requires<[HasFPRegs]>;
} // hasSideEffects
//===----------------------------------------------------------------------===//
@@ -2238,15 +2299,16 @@ class MovFromVFP<bits<4> opc19_16, dag oops, dag iops, string opc, string asm,
let Inst{3-0} = 0b0000;
}
-// APSR is the application level alias of CPSR. This FPSCR N, Z, C, V flags
-// to APSR.
-let Defs = [CPSR], Uses = [FPSCR_NZCV], Rt = 0b1111 /* apsr_nzcv */ in
-def FMSTAT : MovFromVFP<0b0001 /* fpscr */, (outs), (ins),
- "vmrs", "\tAPSR_nzcv, fpscr", [(arm_fmstat)]>;
-
let DecoderMethod = "DecodeForVMRSandVMSR" in {
+ // APSR is the application level alias of CPSR. This FPSCR N, Z, C, V flags
+ // to APSR.
+ let Defs = [CPSR], Uses = [FPSCR_NZCV], Predicates = [HasFPRegs],
+ Rt = 0b1111 /* apsr_nzcv */ in
+ def FMSTAT : MovFromVFP<0b0001 /* fpscr */, (outs), (ins),
+ "vmrs", "\tAPSR_nzcv, fpscr", [(arm_fmstat)]>;
+
// Application level FPSCR -> GPR
- let hasSideEffects = 1, Uses = [FPSCR] in
+ let hasSideEffects = 1, Uses = [FPSCR], Predicates = [HasFPRegs] in
def VMRS : MovFromVFP<0b0001 /* fpscr */, (outs GPRnopc:$Rt), (ins),
"vmrs", "\t$Rt, fpscr",
[(set GPRnopc:$Rt, (int_arm_get_fpscr))]>;
@@ -2269,6 +2331,33 @@ let DecoderMethod = "DecodeForVMRSandVMSR" in {
"vmrs", "\t$Rt, fpinst", []>;
def VMRS_FPINST2 : MovFromVFP<0b1010 /* fpinst2 */, (outs GPRnopc:$Rt),
(ins), "vmrs", "\t$Rt, fpinst2", []>;
+ let Predicates = [HasV8_1MMainline, HasFPRegs] in {
+ // System level FPSCR_NZCVQC -> GPR
+ def VMRS_FPSCR_NZCVQC
+ : MovFromVFP<0b0010 /* fpscr_nzcvqc */,
+ (outs GPR:$Rt), (ins cl_FPSCR_NZCV:$fpscr_in),
+ "vmrs", "\t$Rt, fpscr_nzcvqc", []>;
+ }
+ }
+ let Predicates = [HasV8_1MMainline, Has8MSecExt] in {
+ // System level FPSCR -> GPR, with context saving for security extensions
+ def VMRS_FPCXTNS : MovFromVFP<0b1110 /* fpcxtns */, (outs GPR:$Rt), (ins),
+ "vmrs", "\t$Rt, fpcxtns", []>;
+ }
+ let Predicates = [HasV8_1MMainline, Has8MSecExt] in {
+ // System level FPSCR -> GPR, with context saving for security extensions
+ def VMRS_FPCXTS : MovFromVFP<0b1111 /* fpcxts */, (outs GPR:$Rt), (ins),
+ "vmrs", "\t$Rt, fpcxts", []>;
+ }
+
+ let Predicates = [HasV8_1MMainline, HasMVEInt] in {
+ // System level VPR/P0 -> GPR
+ let Uses = [VPR] in
+ def VMRS_VPR : MovFromVFP<0b1100 /* vpr */, (outs GPR:$Rt), (ins),
+ "vmrs", "\t$Rt, vpr", []>;
+
+ def VMRS_P0 : MovFromVFP<0b1101 /* p0 */, (outs GPR:$Rt), (ins VCCR:$cond),
+ "vmrs", "\t$Rt, p0", []>;
}
}
@@ -2291,10 +2380,12 @@ class MovToVFP<bits<4> opc19_16, dag oops, dag iops, string opc, string asm,
let Inst{11-8} = 0b1010;
let Inst{7} = 0;
let Inst{4} = 1;
+ let Predicates = [HasVFP2];
}
let DecoderMethod = "DecodeForVMRSandVMSR" in {
let Defs = [FPSCR] in {
+ let Predicates = [HasFPRegs] in
// Application level GPR -> FPSCR
def VMSR : MovToVFP<0b0001 /* fpscr */, (outs), (ins GPRnopc:$src),
"vmsr", "\tfpscr, $src",
@@ -2310,6 +2401,33 @@ let DecoderMethod = "DecodeForVMRSandVMSR" in {
def VMSR_FPINST2 : MovToVFP<0b1010 /* fpinst2 */, (outs), (ins GPRnopc:$src),
"vmsr", "\tfpinst2, $src", []>;
}
+ let Predicates = [HasV8_1MMainline, Has8MSecExt] in {
+ // System level GPR -> FPSCR with context saving for security extensions
+ def VMSR_FPCXTNS : MovToVFP<0b1110 /* fpcxtns */, (outs), (ins GPR:$src),
+ "vmsr", "\tfpcxtns, $src", []>;
+ }
+ let Predicates = [HasV8_1MMainline, Has8MSecExt] in {
+ // System level GPR -> FPSCR with context saving for security extensions
+ def VMSR_FPCXTS : MovToVFP<0b1111 /* fpcxts */, (outs), (ins GPR:$src),
+ "vmsr", "\tfpcxts, $src", []>;
+ }
+ let Predicates = [HasV8_1MMainline, HasFPRegs] in {
+ // System level GPR -> FPSCR_NZCVQC
+ def VMSR_FPSCR_NZCVQC
+ : MovToVFP<0b0010 /* fpscr_nzcvqc */,
+ (outs cl_FPSCR_NZCV:$fpscr_out), (ins GPR:$src),
+ "vmsr", "\tfpscr_nzcvqc, $src", []>;
+ }
+
+ let Predicates = [HasV8_1MMainline, HasMVEInt] in {
+ // System level GPR -> VPR/P0
+ let Defs = [VPR] in
+ def VMSR_VPR : MovToVFP<0b1100 /* vpr */, (outs), (ins GPR:$src),
+ "vmsr", "\tvpr, $src", []>;
+
+ def VMSR_P0 : MovToVFP<0b1101 /* p0 */, (outs VCCR:$cond), (ins GPR:$src),
+ "vmsr", "\tp0, $src", []>;
+ }
}
//===----------------------------------------------------------------------===//
@@ -2371,6 +2489,8 @@ def FCONSTH : VFPAI<(outs HPR:$Sd), (ins vfp_f16imm:$imm),
let Inst{11-8} = 0b1001; // Half precision
let Inst{7-4} = 0b0000;
let Inst{3-0} = imm{3-0};
+
+ let isUnpredicable = 1;
}
}
@@ -2426,7 +2546,7 @@ def : VFP2DPInstAlias<"fcmpzd${p} $val", (VCMPZD DPR:$val, pred:$p)>;
def : VFP2InstAlias<"fcmpzs${p} $val", (VCMPZS SPR:$val, pred:$p)>;
-def : VFP2InstAlias<"fmstat${p}", (FMSTAT pred:$p)>;
+def : InstAlias<"fmstat${p}", (FMSTAT pred:$p), 0>, Requires<[HasFPRegs]>;
def : VFP2InstAlias<"fadds${p} $Sd, $Sn, $Sm",
(VADDS SPR:$Sd, SPR:$Sn, SPR:$Sm, pred:$p)>;
def : VFP2DPInstAlias<"faddd${p} $Dd, $Dn, $Dm",
@@ -2484,3 +2604,126 @@ def : VFP3InstAlias<"fconstd${p} $Dd, $val",
(FCONSTD DPR:$Dd, vfp_f64imm:$val, pred:$p)>;
def : VFP3InstAlias<"fconsts${p} $Sd, $val",
(FCONSTS SPR:$Sd, vfp_f32imm:$val, pred:$p)>;
+
+def VSCCLRMD : VFPXI<(outs), (ins pred:$p, fp_dreglist_with_vpr:$regs, variable_ops),
+ AddrModeNone, 4, IndexModeNone, VFPMiscFrm, NoItinerary,
+ "vscclrm{$p}\t$regs", "", []>, Sched<[]> {
+ bits<13> regs;
+ let Inst{31-23} = 0b111011001;
+ let Inst{22} = regs{12};
+ let Inst{21-16} = 0b011111;
+ let Inst{15-12} = regs{11-8};
+ let Inst{11-8} = 0b1011;
+ let Inst{7-0} = regs{7-0};
+
+ let DecoderMethod = "DecodeVSCCLRM";
+
+ list<Predicate> Predicates = [HasV8_1MMainline, Has8MSecExt];
+}
+
+def VSCCLRMS : VFPXI<(outs), (ins pred:$p, fp_sreglist_with_vpr:$regs, variable_ops),
+ AddrModeNone, 4, IndexModeNone, VFPMiscFrm, NoItinerary,
+ "vscclrm{$p}\t$regs", "", []>, Sched<[]> {
+ bits<13> regs;
+ let Inst{31-23} = 0b111011001;
+ let Inst{22} = regs{8};
+ let Inst{21-16} = 0b011111;
+ let Inst{15-12} = regs{12-9};
+ let Inst{11-8} = 0b1010;
+ let Inst{7-0} = regs{7-0};
+
+ let DecoderMethod = "DecodeVSCCLRM";
+
+ list<Predicate> Predicates = [HasV8_1MMainline, Has8MSecExt];
+}
+
+//===----------------------------------------------------------------------===//
+// Store VFP System Register to memory.
+//
+
+class vfp_vstrldr<bit opc, bit P, bit W, bits<4> SysReg, string sysreg,
+ dag oops, dag iops, IndexMode im, string Dest, string cstr>
+ : VFPI<oops, iops, AddrModeT2_i7s4, 4, im, VFPLdStFrm, IIC_fpSTAT,
+ !if(opc,"vldr","vstr"), !strconcat("\t", sysreg, ", ", Dest), cstr, []>,
+ Sched<[]> {
+ bits<12> addr;
+ let Inst{27-25} = 0b110;
+ let Inst{24} = P;
+ let Inst{23} = addr{7};
+ let Inst{22} = SysReg{3};
+ let Inst{21} = W;
+ let Inst{20} = opc;
+ let Inst{19-16} = addr{11-8};
+ let Inst{15-13} = SysReg{2-0};
+ let Inst{12-7} = 0b011111;
+ let Inst{6-0} = addr{6-0};
+ list<Predicate> Predicates = [HasFPRegs, HasV8_1MMainline];
+ let mayLoad = opc;
+ let mayStore = !if(opc, 0b0, 0b1);
+ let hasSideEffects = 1;
+}
+
+multiclass vfp_vstrldr_sysreg<bit opc, bits<4> SysReg, string sysreg,
+ dag oops=(outs), dag iops=(ins)> {
+ def _off :
+ vfp_vstrldr<opc, 1, 0, SysReg, sysreg,
+ oops, !con(iops, (ins t2addrmode_imm7s4:$addr)),
+ IndexModePost, "$addr", "" > {
+ let DecoderMethod = "DecodeVSTRVLDR_SYSREG<false>";
+ }
+
+ def _pre :
+ vfp_vstrldr<opc, 1, 1, SysReg, sysreg,
+ !con(oops, (outs GPRnopc:$wb)),
+ !con(iops, (ins t2addrmode_imm7s4_pre:$addr)),
+ IndexModePre, "$addr!", "$addr.base = $wb"> {
+ let DecoderMethod = "DecodeVSTRVLDR_SYSREG<true>";
+ }
+
+ def _post :
+ vfp_vstrldr<opc, 0, 1, SysReg, sysreg,
+ !con(oops, (outs GPRnopc:$wb)),
+ !con(iops, (ins t2_addr_offset_none:$Rn,
+ t2am_imm7s4_offset:$addr)),
+ IndexModePost, "$Rn$addr", "$Rn.base = $wb"> {
+ bits<4> Rn;
+ let Inst{19-16} = Rn{3-0};
+ let DecoderMethod = "DecodeVSTRVLDR_SYSREG<true>";
+ }
+}
+
+let Defs = [FPSCR] in {
+ defm VSTR_FPSCR : vfp_vstrldr_sysreg<0b0,0b0001, "fpscr">;
+ defm VSTR_FPSCR_NZCVQC : vfp_vstrldr_sysreg<0b0,0b0010, "fpscr_nzcvqc">;
+
+ let Predicates = [HasV8_1MMainline, Has8MSecExt] in {
+ defm VSTR_FPCXTNS : vfp_vstrldr_sysreg<0b0,0b1110, "fpcxtns">;
+ defm VSTR_FPCXTS : vfp_vstrldr_sysreg<0b0,0b1111, "fpcxts">;
+ }
+}
+
+let Predicates = [HasV8_1MMainline, HasMVEInt] in {
+ let Uses = [VPR] in {
+ defm VSTR_VPR : vfp_vstrldr_sysreg<0b0,0b1100, "vpr">;
+ }
+ defm VSTR_P0 : vfp_vstrldr_sysreg<0b0,0b1101, "p0",
+ (outs), (ins VCCR:$P0)>;
+}
+
+let Uses = [FPSCR] in {
+ defm VLDR_FPSCR : vfp_vstrldr_sysreg<0b1,0b0001, "fpscr">;
+ defm VLDR_FPSCR_NZCVQC : vfp_vstrldr_sysreg<0b1,0b0010, "fpscr_nzcvqc">;
+
+ let Predicates = [HasV8_1MMainline, Has8MSecExt] in {
+ defm VLDR_FPCXTNS : vfp_vstrldr_sysreg<0b1,0b1110, "fpcxtns">;
+ defm VLDR_FPCXTS : vfp_vstrldr_sysreg<0b1,0b1111, "fpcxts">;
+ }
+}
+
+let Predicates = [HasV8_1MMainline, HasMVEInt] in {
+ let Defs = [VPR] in {
+ defm VLDR_VPR : vfp_vstrldr_sysreg<0b1,0b1100, "vpr">;
+ }
+ defm VLDR_P0 : vfp_vstrldr_sysreg<0b1,0b1101, "p0",
+ (outs VCCR:$P0), (ins)>;
+}
diff --git a/lib/Target/ARM/ARMInstructionSelector.cpp b/lib/Target/ARM/ARMInstructionSelector.cpp
index 293e734c97cd..4485a474a6df 100644
--- a/lib/Target/ARM/ARMInstructionSelector.cpp
+++ b/lib/Target/ARM/ARMInstructionSelector.cpp
@@ -1,9 +1,8 @@
//===- ARMInstructionSelector.cpp ----------------------------*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -76,6 +75,11 @@ private:
const ARMRegisterBankInfo &RBI;
const ARMSubtarget &STI;
+ // FIXME: This is necessary because DAGISel uses "Subtarget->" and GlobalISel
+ // uses "STI." in the code generated by TableGen. If we want to reuse some of
+ // the custom C++ predicates written for DAGISel, we need to have both around.
+ const ARMSubtarget *Subtarget = &STI;
+
// Store the opcodes that we might need, so we don't have to check what kind
// of subtarget (ARM vs Thumb) we have all the time.
struct OpcodeCache {
@@ -98,6 +102,27 @@ private:
unsigned STORE8;
unsigned LOAD8;
+ unsigned ADDrr;
+ unsigned ADDri;
+
+ // Used for G_ICMP
+ unsigned CMPrr;
+ unsigned MOVi;
+ unsigned MOVCCi;
+
+ // Used for G_SELECT
+ unsigned MOVCCr;
+
+ unsigned TSTri;
+ unsigned Bcc;
+
+ // Used for G_GLOBAL_VALUE
+ unsigned MOVi32imm;
+ unsigned ConstPoolLoad;
+ unsigned MOV_ga_pcrel;
+ unsigned LDRLIT_ga_pcrel;
+ unsigned LDRLIT_ga_abs;
+
OpcodeCache(const ARMSubtarget &STI);
} const Opcodes;
@@ -112,6 +137,9 @@ private:
unsigned selectLoadStoreOpCode(unsigned Opc, unsigned RegBank,
unsigned Size) const;
+ void renderVFPF32Imm(MachineInstrBuilder &New, const MachineInstr &Old) const;
+ void renderVFPF64Imm(MachineInstrBuilder &New, const MachineInstr &Old) const;
+
#define GET_GLOBALISEL_PREDICATES_DECL
#include "ARMGenGlobalISel.inc"
#undef GET_GLOBALISEL_PREDICATES_DECL
@@ -204,7 +232,7 @@ static bool selectMergeValues(MachineInstrBuilder &MIB,
MachineRegisterInfo &MRI,
const TargetRegisterInfo &TRI,
const RegisterBankInfo &RBI) {
- assert(TII.getSubtarget().hasVFP2() && "Can't select merge without VFP");
+ assert(TII.getSubtarget().hasVFP2Base() && "Can't select merge without VFP");
// We only support G_MERGE_VALUES as a way to stick together two scalar GPRs
// into one DPR.
@@ -235,7 +263,8 @@ static bool selectUnmergeValues(MachineInstrBuilder &MIB,
MachineRegisterInfo &MRI,
const TargetRegisterInfo &TRI,
const RegisterBankInfo &RBI) {
- assert(TII.getSubtarget().hasVFP2() && "Can't select unmerge without VFP");
+ assert(TII.getSubtarget().hasVFP2Base() &&
+ "Can't select unmerge without VFP");
// We only support G_UNMERGE_VALUES as a way to break up one DPR into two
// GPRs.
@@ -285,6 +314,24 @@ ARMInstructionSelector::OpcodeCache::OpcodeCache(const ARMSubtarget &STI) {
STORE_OPCODE(STORE8, STRBi12);
STORE_OPCODE(LOAD8, LDRBi12);
+
+ STORE_OPCODE(ADDrr, ADDrr);
+ STORE_OPCODE(ADDri, ADDri);
+
+ STORE_OPCODE(CMPrr, CMPrr);
+ STORE_OPCODE(MOVi, MOVi);
+ STORE_OPCODE(MOVCCi, MOVCCi);
+
+ STORE_OPCODE(MOVCCr, MOVCCr);
+
+ STORE_OPCODE(TSTri, TSTri);
+ STORE_OPCODE(Bcc, Bcc);
+
+ STORE_OPCODE(MOVi32imm, MOVi32imm);
+ ConstPoolLoad = isThumb ? ARM::t2LDRpci : ARM::LDRi12;
+ STORE_OPCODE(MOV_ga_pcrel, MOV_ga_pcrel);
+ LDRLIT_ga_pcrel = isThumb ? ARM::tLDRLIT_ga_pcrel : ARM::LDRLIT_ga_pcrel;
+ LDRLIT_ga_abs = isThumb ? ARM::tLDRLIT_ga_abs : ARM::LDRLIT_ga_abs;
#undef MAP_OPCODE
}
@@ -408,10 +455,11 @@ getComparePreds(CmpInst::Predicate Pred) {
}
struct ARMInstructionSelector::CmpConstants {
- CmpConstants(unsigned CmpOpcode, unsigned FlagsOpcode, unsigned OpRegBank,
- unsigned OpSize)
+ CmpConstants(unsigned CmpOpcode, unsigned FlagsOpcode, unsigned SelectOpcode,
+ unsigned OpRegBank, unsigned OpSize)
: ComparisonOpcode(CmpOpcode), ReadFlagsOpcode(FlagsOpcode),
- OperandRegBankID(OpRegBank), OperandSize(OpSize) {}
+ SelectResultOpcode(SelectOpcode), OperandRegBankID(OpRegBank),
+ OperandSize(OpSize) {}
// The opcode used for performing the comparison.
const unsigned ComparisonOpcode;
@@ -420,6 +468,9 @@ struct ARMInstructionSelector::CmpConstants {
// ARM::INSTRUCTION_LIST_END if we don't need to read the flags.
const unsigned ReadFlagsOpcode;
+ // The opcode used for materializing the result of the comparison.
+ const unsigned SelectResultOpcode;
+
// The assumed register bank ID for the operands.
const unsigned OperandRegBankID;
@@ -439,7 +490,7 @@ struct ARMInstructionSelector::InsertInfo {
void ARMInstructionSelector::putConstant(InsertInfo I, unsigned DestReg,
unsigned Constant) const {
- (void)BuildMI(I.MBB, I.InsertBefore, I.DbgLoc, TII.get(ARM::MOVi))
+ (void)BuildMI(I.MBB, I.InsertBefore, I.DbgLoc, TII.get(Opcodes.MOVi))
.addDef(DestReg)
.addImm(Constant)
.add(predOps(ARMCC::AL))
@@ -542,7 +593,8 @@ bool ARMInstructionSelector::insertComparison(CmpConstants Helper, InsertInfo I,
}
// Select either 1 or the previous result based on the value of the flags.
- auto Mov1I = BuildMI(I.MBB, I.InsertBefore, I.DbgLoc, TII.get(ARM::MOVCCi))
+ auto Mov1I = BuildMI(I.MBB, I.InsertBefore, I.DbgLoc,
+ TII.get(Helper.SelectResultOpcode))
.addDef(ResReg)
.addUse(PrevRes)
.addImm(1)
@@ -569,7 +621,7 @@ bool ARMInstructionSelector::selectGlobal(MachineInstrBuilder &MIB,
auto &MBB = *MIB->getParent();
auto &MF = *MBB.getParent();
- bool UseMovt = STI.useMovt(MF);
+ bool UseMovt = STI.useMovt();
unsigned Size = TM.getPointerSize(0);
unsigned Alignment = 4;
@@ -577,7 +629,9 @@ bool ARMInstructionSelector::selectGlobal(MachineInstrBuilder &MIB,
auto addOpsForConstantPoolLoad = [&MF, Alignment,
Size](MachineInstrBuilder &MIB,
const GlobalValue *GV, bool IsSBREL) {
- assert(MIB->getOpcode() == ARM::LDRi12 && "Unsupported instruction");
+ assert((MIB->getOpcode() == ARM::LDRi12 ||
+ MIB->getOpcode() == ARM::t2LDRpci) &&
+ "Unsupported instruction");
auto ConstPool = MF.getConstantPool();
auto CPIndex =
// For SB relative entries we need a target-specific constant pool.
@@ -587,21 +641,38 @@ bool ARMInstructionSelector::selectGlobal(MachineInstrBuilder &MIB,
ARMConstantPoolConstant::Create(GV, ARMCP::SBREL), Alignment)
: ConstPool->getConstantPoolIndex(GV, Alignment);
MIB.addConstantPoolIndex(CPIndex, /*Offset*/ 0, /*TargetFlags*/ 0)
- .addMemOperand(
- MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF),
- MachineMemOperand::MOLoad, Size, Alignment))
- .addImm(0)
- .add(predOps(ARMCC::AL));
+ .addMemOperand(MF.getMachineMemOperand(
+ MachinePointerInfo::getConstantPool(MF), MachineMemOperand::MOLoad,
+ Size, Alignment));
+ if (MIB->getOpcode() == ARM::LDRi12)
+ MIB.addImm(0);
+ MIB.add(predOps(ARMCC::AL));
+ };
+
+ auto addGOTMemOperand = [this, &MF, Alignment](MachineInstrBuilder &MIB) {
+ MIB.addMemOperand(MF.getMachineMemOperand(
+ MachinePointerInfo::getGOT(MF), MachineMemOperand::MOLoad,
+ TM.getProgramPointerSize(), Alignment));
};
if (TM.isPositionIndependent()) {
bool Indirect = STI.isGVIndirectSymbol(GV);
+
+ // For ARM mode, we have different pseudoinstructions for direct accesses
+ // and indirect accesses, and the ones for indirect accesses include the
+ // load from GOT. For Thumb mode, we use the same pseudoinstruction for both
+ // direct and indirect accesses, and we need to manually generate the load
+ // from GOT.
+ bool UseOpcodeThatLoads = Indirect && !STI.isThumb();
+
// FIXME: Taking advantage of MOVT for ELF is pretty involved, so we don't
// support it yet. See PR28229.
unsigned Opc =
UseMovt && !STI.isTargetELF()
- ? (Indirect ? ARM::MOV_ga_pcrel_ldr : ARM::MOV_ga_pcrel)
- : (Indirect ? ARM::LDRLIT_ga_pcrel_ldr : ARM::LDRLIT_ga_pcrel);
+ ? (UseOpcodeThatLoads ? (unsigned)ARM::MOV_ga_pcrel_ldr
+ : Opcodes.MOV_ga_pcrel)
+ : (UseOpcodeThatLoads ? (unsigned)ARM::LDRLIT_ga_pcrel_ldr
+ : Opcodes.LDRLIT_ga_pcrel);
MIB->setDesc(TII.get(Opc));
int TargetFlags = ARMII::MO_NO_FLAG;
@@ -611,17 +682,35 @@ bool ARMInstructionSelector::selectGlobal(MachineInstrBuilder &MIB,
TargetFlags |= ARMII::MO_GOT;
MIB->getOperand(1).setTargetFlags(TargetFlags);
- if (Indirect)
- MIB.addMemOperand(MF.getMachineMemOperand(
- MachinePointerInfo::getGOT(MF), MachineMemOperand::MOLoad,
- TM.getProgramPointerSize(), Alignment));
+ if (Indirect) {
+ if (!UseOpcodeThatLoads) {
+ auto ResultReg = MIB->getOperand(0).getReg();
+ auto AddressReg = MRI.createVirtualRegister(&ARM::GPRRegClass);
+
+ MIB->getOperand(0).setReg(AddressReg);
+
+ auto InsertBefore = std::next(MIB->getIterator());
+ auto MIBLoad = BuildMI(MBB, InsertBefore, MIB->getDebugLoc(),
+ TII.get(Opcodes.LOAD32))
+ .addDef(ResultReg)
+ .addReg(AddressReg)
+ .addImm(0)
+ .add(predOps(ARMCC::AL));
+ addGOTMemOperand(MIBLoad);
+
+ if (!constrainSelectedInstRegOperands(*MIBLoad, TII, TRI, RBI))
+ return false;
+ } else {
+ addGOTMemOperand(MIB);
+ }
+ }
return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
}
bool isReadOnly = STI.getTargetLowering()->isReadOnly(GV);
if (STI.isROPI() && isReadOnly) {
- unsigned Opc = UseMovt ? ARM::MOV_ga_pcrel : ARM::LDRLIT_ga_pcrel;
+ unsigned Opc = UseMovt ? Opcodes.MOV_ga_pcrel : Opcodes.LDRLIT_ga_pcrel;
MIB->setDesc(TII.get(Opc));
return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
}
@@ -630,19 +719,19 @@ bool ARMInstructionSelector::selectGlobal(MachineInstrBuilder &MIB,
MachineInstrBuilder OffsetMIB;
if (UseMovt) {
OffsetMIB = BuildMI(MBB, *MIB, MIB->getDebugLoc(),
- TII.get(ARM::MOVi32imm), Offset);
+ TII.get(Opcodes.MOVi32imm), Offset);
OffsetMIB.addGlobalAddress(GV, /*Offset*/ 0, ARMII::MO_SBREL);
} else {
// Load the offset from the constant pool.
- OffsetMIB =
- BuildMI(MBB, *MIB, MIB->getDebugLoc(), TII.get(ARM::LDRi12), Offset);
+ OffsetMIB = BuildMI(MBB, *MIB, MIB->getDebugLoc(),
+ TII.get(Opcodes.ConstPoolLoad), Offset);
addOpsForConstantPoolLoad(OffsetMIB, GV, /*IsSBREL*/ true);
}
if (!constrainSelectedInstRegOperands(*OffsetMIB, TII, TRI, RBI))
return false;
// Add the offset to the SB register.
- MIB->setDesc(TII.get(ARM::ADDrr));
+ MIB->setDesc(TII.get(Opcodes.ADDrr));
MIB->RemoveOperand(1);
MIB.addReg(ARM::R9) // FIXME: don't hardcode R9
.addReg(Offset)
@@ -654,18 +743,18 @@ bool ARMInstructionSelector::selectGlobal(MachineInstrBuilder &MIB,
if (STI.isTargetELF()) {
if (UseMovt) {
- MIB->setDesc(TII.get(ARM::MOVi32imm));
+ MIB->setDesc(TII.get(Opcodes.MOVi32imm));
} else {
// Load the global's address from the constant pool.
- MIB->setDesc(TII.get(ARM::LDRi12));
+ MIB->setDesc(TII.get(Opcodes.ConstPoolLoad));
MIB->RemoveOperand(1);
addOpsForConstantPoolLoad(MIB, GV, /*IsSBREL*/ false);
}
} else if (STI.isTargetMachO()) {
if (UseMovt)
- MIB->setDesc(TII.get(ARM::MOVi32imm));
+ MIB->setDesc(TII.get(Opcodes.MOVi32imm));
else
- MIB->setDesc(TII.get(ARM::LDRLIT_ga_abs));
+ MIB->setDesc(TII.get(Opcodes.LDRLIT_ga_abs));
} else {
LLVM_DEBUG(dbgs() << "Object format not supported yet\n");
return false;
@@ -680,13 +769,13 @@ bool ARMInstructionSelector::selectSelect(MachineInstrBuilder &MIB,
auto InsertBefore = std::next(MIB->getIterator());
auto &DbgLoc = MIB->getDebugLoc();
- // Compare the condition to 0.
+ // Compare the condition to 1.
auto CondReg = MIB->getOperand(1).getReg();
assert(validReg(MRI, CondReg, 1, ARM::GPRRegBankID) &&
"Unsupported types for select operation");
- auto CmpI = BuildMI(MBB, InsertBefore, DbgLoc, TII.get(ARM::CMPri))
+ auto CmpI = BuildMI(MBB, InsertBefore, DbgLoc, TII.get(Opcodes.TSTri))
.addUse(CondReg)
- .addImm(0)
+ .addImm(1)
.add(predOps(ARMCC::AL));
if (!constrainSelectedInstRegOperands(*CmpI, TII, TRI, RBI))
return false;
@@ -699,7 +788,7 @@ bool ARMInstructionSelector::selectSelect(MachineInstrBuilder &MIB,
assert(validOpRegPair(MRI, ResReg, TrueReg, 32, ARM::GPRRegBankID) &&
validOpRegPair(MRI, TrueReg, FalseReg, 32, ARM::GPRRegBankID) &&
"Unsupported types for select operation");
- auto Mov1I = BuildMI(MBB, InsertBefore, DbgLoc, TII.get(ARM::MOVCCr))
+ auto Mov1I = BuildMI(MBB, InsertBefore, DbgLoc, TII.get(Opcodes.MOVCCr))
.addDef(ResReg)
.addUse(TrueReg)
.addUse(FalseReg)
@@ -713,12 +802,37 @@ bool ARMInstructionSelector::selectSelect(MachineInstrBuilder &MIB,
bool ARMInstructionSelector::selectShift(unsigned ShiftOpc,
MachineInstrBuilder &MIB) const {
+ assert(!STI.isThumb() && "Unsupported subtarget");
MIB->setDesc(TII.get(ARM::MOVsr));
MIB.addImm(ShiftOpc);
MIB.add(predOps(ARMCC::AL)).add(condCodeOp());
return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
}
+void ARMInstructionSelector::renderVFPF32Imm(
+ MachineInstrBuilder &NewInstBuilder, const MachineInstr &OldInst) const {
+ assert(OldInst.getOpcode() == TargetOpcode::G_FCONSTANT &&
+ "Expected G_FCONSTANT");
+
+ APFloat FPImmValue = OldInst.getOperand(1).getFPImm()->getValueAPF();
+ int FPImmEncoding = ARM_AM::getFP32Imm(FPImmValue);
+ assert(FPImmEncoding != -1 && "Invalid immediate value");
+
+ NewInstBuilder.addImm(FPImmEncoding);
+}
+
+void ARMInstructionSelector::renderVFPF64Imm(
+ MachineInstrBuilder &NewInstBuilder, const MachineInstr &OldInst) const {
+ assert(OldInst.getOpcode() == TargetOpcode::G_FCONSTANT &&
+ "Expected G_FCONSTANT");
+
+ APFloat FPImmValue = OldInst.getOperand(1).getFPImm()->getValueAPF();
+ int FPImmEncoding = ARM_AM::getFP64Imm(FPImmValue);
+ assert(FPImmEncoding != -1 && "Invalid immediate value");
+
+ NewInstBuilder.addImm(FPImmEncoding);
+}
+
bool ARMInstructionSelector::select(MachineInstr &I,
CodeGenCoverage &CoverageInfo) const {
assert(I.getParent() && "Instruction should be in a basic block!");
@@ -748,12 +862,8 @@ bool ARMInstructionSelector::select(MachineInstr &I,
isSExt = true;
LLVM_FALLTHROUGH;
case G_ZEXT: {
- LLT DstTy = MRI.getType(I.getOperand(0).getReg());
- // FIXME: Smaller destination sizes coming soon!
- if (DstTy.getSizeInBits() != 32) {
- LLVM_DEBUG(dbgs() << "Unsupported destination size for extension");
- return false;
- }
+ assert(MRI.getType(I.getOperand(0).getReg()).getSizeInBits() <= 32 &&
+ "Unsupported destination size for extension");
LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
unsigned SrcSize = SrcTy.getSizeInBits();
@@ -869,10 +979,32 @@ bool ARMInstructionSelector::select(MachineInstr &I,
}
}
+ assert(!STI.isThumb() && "Unsupported subtarget");
I.setDesc(TII.get(ARM::MOVi));
MIB.add(predOps(ARMCC::AL)).add(condCodeOp());
break;
}
+ case G_FCONSTANT: {
+ // Load from constant pool
+ unsigned Size = MRI.getType(I.getOperand(0).getReg()).getSizeInBits() / 8;
+ unsigned Alignment = Size;
+
+ assert((Size == 4 || Size == 8) && "Unsupported FP constant type");
+ auto LoadOpcode = Size == 4 ? ARM::VLDRS : ARM::VLDRD;
+
+ auto ConstPool = MF.getConstantPool();
+ auto CPIndex =
+ ConstPool->getConstantPoolIndex(I.getOperand(1).getFPImm(), Alignment);
+ MIB->setDesc(TII.get(LoadOpcode));
+ MIB->RemoveOperand(1);
+ MIB.addConstantPoolIndex(CPIndex, /*Offset*/ 0, /*TargetFlags*/ 0)
+ .addMemOperand(
+ MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF),
+ MachineMemOperand::MOLoad, Size, Alignment))
+ .addImm(0)
+ .add(predOps(ARMCC::AL));
+ break;
+ }
case G_INTTOPTR:
case G_PTRTOINT: {
auto SrcReg = I.getOperand(1).getReg();
@@ -900,17 +1032,17 @@ bool ARMInstructionSelector::select(MachineInstr &I,
case G_SELECT:
return selectSelect(MIB, MRI);
case G_ICMP: {
- CmpConstants Helper(ARM::CMPrr, ARM::INSTRUCTION_LIST_END,
- ARM::GPRRegBankID, 32);
+ CmpConstants Helper(Opcodes.CMPrr, ARM::INSTRUCTION_LIST_END,
+ Opcodes.MOVCCi, ARM::GPRRegBankID, 32);
return selectCmp(Helper, MIB, MRI);
}
case G_FCMP: {
- assert(STI.hasVFP2() && "Can't select fcmp without VFP");
+ assert(STI.hasVFP2Base() && "Can't select fcmp without VFP");
unsigned OpReg = I.getOperand(2).getReg();
unsigned Size = MRI.getType(OpReg).getSizeInBits();
- if (Size == 64 && STI.isFPOnlySP()) {
+ if (Size == 64 && !STI.hasFP64()) {
LLVM_DEBUG(dbgs() << "Subtarget only supports single precision");
return false;
}
@@ -920,7 +1052,7 @@ bool ARMInstructionSelector::select(MachineInstr &I,
}
CmpConstants Helper(Size == 32 ? ARM::VCMPS : ARM::VCMPD, ARM::FMSTAT,
- ARM::FPRRegBankID, Size);
+ Opcodes.MOVCCi, ARM::FPRRegBankID, Size);
return selectCmp(Helper, MIB, MRI);
}
case G_LSHR:
@@ -931,13 +1063,13 @@ bool ARMInstructionSelector::select(MachineInstr &I,
return selectShift(ARM_AM::ShiftOpc::lsl, MIB);
}
case G_GEP:
- I.setDesc(TII.get(ARM::ADDrr));
+ I.setDesc(TII.get(Opcodes.ADDrr));
MIB.add(predOps(ARMCC::AL)).add(condCodeOp());
break;
case G_FRAME_INDEX:
// Add 0 to the given frame index and hope it will eventually be folded into
// the user(s).
- I.setDesc(TII.get(ARM::ADDri));
+ I.setDesc(TII.get(Opcodes.ADDri));
MIB.addImm(0).add(predOps(ARMCC::AL)).add(condCodeOp());
break;
case G_GLOBAL_VALUE:
@@ -956,13 +1088,31 @@ bool ARMInstructionSelector::select(MachineInstr &I,
LLT ValTy = MRI.getType(Reg);
const auto ValSize = ValTy.getSizeInBits();
- assert((ValSize != 64 || STI.hasVFP2()) &&
+ assert((ValSize != 64 || STI.hasVFP2Base()) &&
"Don't know how to load/store 64-bit value without VFP");
const auto NewOpc = selectLoadStoreOpCode(I.getOpcode(), RegBank, ValSize);
if (NewOpc == G_LOAD || NewOpc == G_STORE)
return false;
+ if (ValSize == 1 && NewOpc == Opcodes.STORE8) {
+ // Before storing a 1-bit value, make sure to clear out any unneeded bits.
+ unsigned OriginalValue = I.getOperand(0).getReg();
+
+ unsigned ValueToStore = MRI.createVirtualRegister(&ARM::GPRRegClass);
+ I.getOperand(0).setReg(ValueToStore);
+
+ auto InsertBefore = I.getIterator();
+ auto AndI = BuildMI(MBB, InsertBefore, I.getDebugLoc(), TII.get(Opcodes.AND))
+ .addDef(ValueToStore)
+ .addUse(OriginalValue)
+ .addImm(1)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
+ if (!constrainSelectedInstRegOperands(*AndI, TII, TRI, RBI))
+ return false;
+ }
+
I.setDesc(TII.get(NewOpc));
if (NewOpc == ARM::LDRH || NewOpc == ARM::STRH)
@@ -988,17 +1138,19 @@ bool ARMInstructionSelector::select(MachineInstr &I,
}
// Set the flags.
- auto Test = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(ARM::TSTri))
- .addReg(I.getOperand(0).getReg())
- .addImm(1)
- .add(predOps(ARMCC::AL));
+ auto Test =
+ BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Opcodes.TSTri))
+ .addReg(I.getOperand(0).getReg())
+ .addImm(1)
+ .add(predOps(ARMCC::AL));
if (!constrainSelectedInstRegOperands(*Test, TII, TRI, RBI))
return false;
// Branch conditionally.
- auto Branch = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(ARM::Bcc))
- .add(I.getOperand(1))
- .add(predOps(ARMCC::NE, ARM::CPSR));
+ auto Branch =
+ BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Opcodes.Bcc))
+ .add(I.getOperand(1))
+ .add(predOps(ARMCC::NE, ARM::CPSR));
if (!constrainSelectedInstRegOperands(*Branch, TII, TRI, RBI))
return false;
I.eraseFromParent();
diff --git a/lib/Target/ARM/ARMLegalizerInfo.cpp b/lib/Target/ARM/ARMLegalizerInfo.cpp
index 4a0c24d58474..73a57b297ad6 100644
--- a/lib/Target/ARM/ARMLegalizerInfo.cpp
+++ b/lib/Target/ARM/ARMLegalizerInfo.cpp
@@ -1,9 +1,8 @@
//===- ARMLegalizerInfo.cpp --------------------------------------*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -83,41 +82,29 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) {
}
getActionDefinitionsBuilder({G_SEXT, G_ZEXT, G_ANYEXT})
- .legalForCartesianProduct({s32}, {s1, s8, s16});
+ .legalForCartesianProduct({s8, s16, s32}, {s1, s8, s16});
- getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR})
+ getActionDefinitionsBuilder({G_MUL, G_AND, G_OR, G_XOR})
.legalFor({s32})
.minScalar(0, s32);
- getActionDefinitionsBuilder(G_INTTOPTR).legalFor({{p0, s32}});
- getActionDefinitionsBuilder(G_PTRTOINT).legalFor({{s32, p0}});
-
- getActionDefinitionsBuilder(G_CONSTANT)
- .legalFor({s32, p0})
- .clampScalar(0, s32, s32);
-
- // We're keeping these builders around because we'll want to add support for
- // floating point to them.
- auto &LoadStoreBuilder =
- getActionDefinitionsBuilder({G_LOAD, G_STORE})
- .legalForTypesWithMemSize({
- {s1, p0, 8},
- {s8, p0, 8},
- {s16, p0, 16},
- {s32, p0, 32},
- {p0, p0, 32}});
-
- if (ST.isThumb()) {
- // FIXME: merge with the code for non-Thumb.
- computeTables();
- verify(*ST.getInstrInfo());
- return;
- }
+ if (ST.hasNEON())
+ getActionDefinitionsBuilder({G_ADD, G_SUB})
+ .legalFor({s32, s64})
+ .minScalar(0, s32);
+ else
+ getActionDefinitionsBuilder({G_ADD, G_SUB})
+ .legalFor({s32})
+ .minScalar(0, s32);
- getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0});
- getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
+ getActionDefinitionsBuilder({G_ASHR, G_LSHR, G_SHL})
+ .legalFor({{s32, s32}})
+ .minScalar(0, s32)
+ .clampScalar(1, s32, s32);
- if (ST.hasDivideInARMMode())
+ bool HasHWDivide = (!ST.isThumb() && ST.hasDivideInARMMode()) ||
+ (ST.isThumb() && ST.hasDivideInThumbMode());
+ if (HasHWDivide)
getActionDefinitionsBuilder({G_SDIV, G_UDIV})
.legalFor({s32})
.clampScalar(0, s32, s32);
@@ -128,7 +115,7 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) {
for (unsigned Op : {G_SREM, G_UREM}) {
setLegalizeScalarToDifferentSizeStrategy(Op, 0, widen_8_16);
- if (ST.hasDivideInARMMode())
+ if (HasHWDivide)
setAction({Op, s32}, Lower);
else if (AEABI(ST))
setAction({Op, s32}, Custom);
@@ -136,46 +123,57 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) {
setAction({Op, s32}, Libcall);
}
- getActionDefinitionsBuilder({G_ASHR, G_LSHR, G_SHL}).legalFor({s32});
-
- if (ST.hasV5TOps()) {
- getActionDefinitionsBuilder(G_CTLZ)
- .legalFor({s32})
- .clampScalar(0, s32, s32);
- getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF)
- .lowerFor({s32})
- .clampScalar(0, s32, s32);
- } else {
- getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF)
- .libcallFor({s32})
- .clampScalar(0, s32, s32);
- getActionDefinitionsBuilder(G_CTLZ)
- .lowerFor({s32})
- .clampScalar(0, s32, s32);
- }
-
- getActionDefinitionsBuilder(G_GEP).legalFor({{p0, s32}});
-
- getActionDefinitionsBuilder(G_SELECT).legalForCartesianProduct({s32, p0},
- {s1});
+ getActionDefinitionsBuilder(G_INTTOPTR)
+ .legalFor({{p0, s32}})
+ .minScalar(1, s32);
+ getActionDefinitionsBuilder(G_PTRTOINT)
+ .legalFor({{s32, p0}})
+ .minScalar(0, s32);
- getActionDefinitionsBuilder(G_BRCOND).legalFor({s1});
+ getActionDefinitionsBuilder(G_CONSTANT)
+ .legalFor({s32, p0})
+ .clampScalar(0, s32, s32);
getActionDefinitionsBuilder(G_ICMP)
.legalForCartesianProduct({s1}, {s32, p0})
.minScalar(1, s32);
+ getActionDefinitionsBuilder(G_SELECT)
+ .legalForCartesianProduct({s32, p0}, {s1})
+ .minScalar(0, s32);
+
// We're keeping these builders around because we'll want to add support for
// floating point to them.
+ auto &LoadStoreBuilder = getActionDefinitionsBuilder({G_LOAD, G_STORE})
+ .legalForTypesWithMemDesc({{s1, p0, 8, 8},
+ {s8, p0, 8, 8},
+ {s16, p0, 16, 8},
+ {s32, p0, 32, 8},
+ {p0, p0, 32, 8}})
+ .unsupportedIfMemSizeNotPow2();
+
+ getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});
+ getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0});
+
auto &PhiBuilder =
- getActionDefinitionsBuilder(G_PHI).legalFor({s32, p0}).minScalar(0, s32);
+ getActionDefinitionsBuilder(G_PHI)
+ .legalFor({s32, p0})
+ .minScalar(0, s32);
+
+ getActionDefinitionsBuilder(G_GEP)
+ .legalFor({{p0, s32}})
+ .minScalar(1, s32);
- if (!ST.useSoftFloat() && ST.hasVFP2()) {
+ getActionDefinitionsBuilder(G_BRCOND).legalFor({s1});
+
+ if (!ST.useSoftFloat() && ST.hasVFP2Base()) {
getActionDefinitionsBuilder(
{G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FCONSTANT, G_FNEG})
.legalFor({s32, s64});
- LoadStoreBuilder.legalFor({{s64, p0}});
+ LoadStoreBuilder
+ .legalForTypesWithMemDesc({{s64, p0, 64, 32}})
+ .maxScalar(0, s32);
PhiBuilder.legalFor({s64});
getActionDefinitionsBuilder(G_FCMP).legalForCartesianProduct({s1},
@@ -219,13 +217,33 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) {
.libcallForCartesianProduct({s32, s64}, {s32});
}
- if (!ST.useSoftFloat() && ST.hasVFP4())
+ if (!ST.useSoftFloat() && ST.hasVFP4Base())
getActionDefinitionsBuilder(G_FMA).legalFor({s32, s64});
else
getActionDefinitionsBuilder(G_FMA).libcallFor({s32, s64});
getActionDefinitionsBuilder({G_FREM, G_FPOW}).libcallFor({s32, s64});
+ if (ST.hasV5TOps()) {
+ getActionDefinitionsBuilder(G_CTLZ)
+ .legalFor({s32, s32})
+ .clampScalar(1, s32, s32)
+ .clampScalar(0, s32, s32);
+ getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF)
+ .lowerFor({s32, s32})
+ .clampScalar(1, s32, s32)
+ .clampScalar(0, s32, s32);
+ } else {
+ getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF)
+ .libcallFor({s32, s32})
+ .clampScalar(1, s32, s32)
+ .clampScalar(0, s32, s32);
+ getActionDefinitionsBuilder(G_CTLZ)
+ .lowerFor({s32, s32})
+ .clampScalar(1, s32, s32)
+ .clampScalar(0, s32, s32);
+ }
+
computeTables();
verify(*ST.getInstrInfo());
}
@@ -351,7 +369,7 @@ bool ARMLegalizerInfo::legalizeCustom(MachineInstr &MI,
return false;
case G_SREM:
case G_UREM: {
- unsigned OriginalResult = MI.getOperand(0).getReg();
+ Register OriginalResult = MI.getOperand(0).getReg();
auto Size = MRI.getType(OriginalResult).getSizeInBits();
if (Size != 32)
return false;
@@ -360,24 +378,17 @@ bool ARMLegalizerInfo::legalizeCustom(MachineInstr &MI,
MI.getOpcode() == G_SREM ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32;
// Our divmod libcalls return a struct containing the quotient and the
- // remainder. We need to create a virtual register for it.
+ // remainder. Create a new, unused register for the quotient and use the
+ // destination of the original instruction for the remainder.
Type *ArgTy = Type::getInt32Ty(Ctx);
StructType *RetTy = StructType::get(Ctx, {ArgTy, ArgTy}, /* Packed */ true);
- auto RetVal = MRI.createGenericVirtualRegister(
- getLLTForType(*RetTy, MIRBuilder.getMF().getDataLayout()));
-
- auto Status = createLibcall(MIRBuilder, Libcall, {RetVal, RetTy},
+ Register RetRegs[] = {MRI.createGenericVirtualRegister(LLT::scalar(32)),
+ OriginalResult};
+ auto Status = createLibcall(MIRBuilder, Libcall, {RetRegs, RetTy},
{{MI.getOperand(1).getReg(), ArgTy},
{MI.getOperand(2).getReg(), ArgTy}});
if (Status != LegalizerHelper::Legalized)
return false;
-
- // The remainder is the second result of divmod. Split the return value into
- // a new, unused register for the quotient and the destination of the
- // original instruction for the remainder.
- MIRBuilder.buildUnmerge(
- {MRI.createGenericVirtualRegister(LLT::scalar(32)), OriginalResult},
- RetVal);
break;
}
case G_FCMP: {
@@ -405,7 +416,7 @@ bool ARMLegalizerInfo::legalizeCustom(MachineInstr &MI,
auto *ArgTy = OpSize == 32 ? Type::getFloatTy(Ctx) : Type::getDoubleTy(Ctx);
auto *RetTy = Type::getInt32Ty(Ctx);
- SmallVector<unsigned, 2> Results;
+ SmallVector<Register, 2> Results;
for (auto Libcall : Libcalls) {
auto LibcallResult = MRI.createGenericVirtualRegister(LLT::scalar(32));
auto Status =
diff --git a/lib/Target/ARM/ARMLegalizerInfo.h b/lib/Target/ARM/ARMLegalizerInfo.h
index 527bf87f1093..e95f8cf76103 100644
--- a/lib/Target/ARM/ARMLegalizerInfo.h
+++ b/lib/Target/ARM/ARMLegalizerInfo.h
@@ -1,9 +1,8 @@
//===- ARMLegalizerInfo ------------------------------------------*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 6da7430a8e51..90a1ce238c3f 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -1,9 +1,8 @@
//===- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -174,12 +173,14 @@ namespace {
MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
- ArrayRef<std::pair<unsigned, bool>> Regs);
+ ArrayRef<std::pair<unsigned, bool>> Regs,
+ ArrayRef<MachineInstr*> Instrs);
MachineInstr *CreateLoadStoreDouble(
MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
- ArrayRef<std::pair<unsigned, bool>> Regs) const;
+ ArrayRef<std::pair<unsigned, bool>> Regs,
+ ArrayRef<MachineInstr*> Instrs) const;
void FormCandidates(const MemOpQueue &MemOps);
MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand);
bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
@@ -623,7 +624,8 @@ MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(
MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
- ArrayRef<std::pair<unsigned, bool>> Regs) {
+ ArrayRef<std::pair<unsigned, bool>> Regs,
+ ArrayRef<MachineInstr*> Instrs) {
unsigned NumRegs = Regs.size();
assert(NumRegs > 1);
@@ -815,6 +817,8 @@ MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(
for (const std::pair<unsigned, bool> &R : Regs)
MIB.addReg(R.first, getDefRegState(isDef) | getKillRegState(R.second));
+ MIB.cloneMergedMemRefs(Instrs);
+
return MIB.getInstr();
}
@@ -822,7 +826,8 @@ MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(
MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
- ArrayRef<std::pair<unsigned, bool>> Regs) const {
+ ArrayRef<std::pair<unsigned, bool>> Regs,
+ ArrayRef<MachineInstr*> Instrs) const {
bool IsLoad = isi32Load(Opcode);
assert((IsLoad || isi32Store(Opcode)) && "Must have integer load or store");
unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8;
@@ -838,6 +843,7 @@ MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(
.addReg(Regs[1].first, getKillRegState(Regs[1].second));
}
MIB.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
+ MIB.cloneMergedMemRefs(Instrs);
return MIB.getInstr();
}
@@ -895,10 +901,11 @@ MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {
MachineInstr *Merged = nullptr;
if (Cand.CanMergeToLSDouble)
Merged = CreateLoadStoreDouble(MBB, InsertBefore, Offset, Base, BaseKill,
- Opcode, Pred, PredReg, DL, Regs);
+ Opcode, Pred, PredReg, DL, Regs,
+ Cand.Instrs);
if (!Merged && Cand.CanMergeToLSMulti)
Merged = CreateLoadStoreMulti(MBB, InsertBefore, Offset, Base, BaseKill,
- Opcode, Pred, PredReg, DL, Regs);
+ Opcode, Pred, PredReg, DL, Regs, Cand.Instrs);
if (!Merged)
return nullptr;
@@ -1287,7 +1294,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
// can still change to a writeback form as that will save us 2 bytes
// of code size. It can create WAW hazards though, so only do it if
// we're minimizing code size.
- if (!MBB.getParent()->getFunction().optForMinSize() || !BaseKill)
+ if (!STI->hasMinSize() || !BaseKill)
return false;
bool HighRegsUsed = false;
@@ -1436,14 +1443,16 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
.addReg(Base, getKillRegState(isLd ? BaseKill : false))
.addImm(Pred).addReg(PredReg)
.addReg(MO.getReg(), (isLd ? getDefRegState(true) :
- getKillRegState(MO.isKill())));
+ getKillRegState(MO.isKill())))
+ .cloneMemRefs(*MI);
} else if (isLd) {
if (isAM2) {
// LDR_PRE, LDR_POST
if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
.addReg(Base, RegState::Define)
- .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
+ .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg)
+ .cloneMemRefs(*MI);
} else {
int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
@@ -1451,7 +1460,8 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
.addReg(Base)
.addReg(0)
.addImm(Imm)
- .add(predOps(Pred, PredReg));
+ .add(predOps(Pred, PredReg))
+ .cloneMemRefs(*MI);
}
} else {
// t2LDR_PRE, t2LDR_POST
@@ -1459,7 +1469,8 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
.addReg(Base, RegState::Define)
.addReg(Base)
.addImm(Offset)
- .add(predOps(Pred, PredReg));
+ .add(predOps(Pred, PredReg))
+ .cloneMemRefs(*MI);
}
} else {
MachineOperand &MO = MI->getOperand(0);
@@ -1474,14 +1485,16 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
.addReg(Base)
.addReg(0)
.addImm(Imm)
- .add(predOps(Pred, PredReg));
+ .add(predOps(Pred, PredReg))
+ .cloneMemRefs(*MI);
} else {
// t2STR_PRE, t2STR_POST
BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
.addReg(MO.getReg(), getKillRegState(MO.isKill()))
.addReg(Base)
.addImm(Offset)
- .add(predOps(Pred, PredReg));
+ .add(predOps(Pred, PredReg))
+ .cloneMemRefs(*MI);
}
}
MBB.erase(MBBI);
@@ -1541,7 +1554,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const {
// Transfer implicit operands.
for (const MachineOperand &MO : MI.implicit_operands())
MIB.add(MO);
- MIB.setMemRefs(MI.memoperands());
+ MIB.cloneMemRefs(MI);
MBB.erase(MBBI);
return true;
@@ -1581,7 +1594,9 @@ static bool isMemoryOp(const MachineInstr &MI) {
const MachineMemOperand &MMO = **MI.memoperands_begin();
// Don't touch volatile memory accesses - we may be changing their order.
- if (MMO.isVolatile())
+ // TODO: We could allow unordered and monotonic atomics here, but we need to
+ // make sure the resulting ldm/stm is correctly marked as atomic.
+ if (MMO.isVolatile() || MMO.isAtomic())
return false;
// Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
@@ -1607,19 +1622,26 @@ static void InsertLDR_STR(MachineBasicBlock &MBB,
bool isDef, unsigned NewOpc, unsigned Reg,
bool RegDeadKill, bool RegUndef, unsigned BaseReg,
bool BaseKill, bool BaseUndef, ARMCC::CondCodes Pred,
- unsigned PredReg, const TargetInstrInfo *TII) {
+ unsigned PredReg, const TargetInstrInfo *TII,
+ MachineInstr *MI) {
if (isDef) {
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
TII->get(NewOpc))
.addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
.addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
+ // FIXME: This is overly conservative; the new instruction accesses 4
+ // bytes, not 8.
+ MIB.cloneMemRefs(*MI);
} else {
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
TII->get(NewOpc))
.addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
.addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
+ // FIXME: This is overly conservative; the new instruction accesses 4
+ // bytes, not 8.
+ MIB.cloneMemRefs(*MI);
}
}
@@ -1677,7 +1699,8 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
.addReg(BaseReg, getKillRegState(BaseKill))
.addImm(Pred).addReg(PredReg)
.addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
- .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
+ .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill))
+ .cloneMemRefs(*MI);
++NumLDRD2LDM;
} else {
BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
@@ -1686,7 +1709,8 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
.addReg(EvenReg,
getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
.addReg(OddReg,
- getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
+ getKillRegState(OddDeadKill) | getUndefRegState(OddUndef))
+ .cloneMemRefs(*MI);
++NumSTRD2STM;
}
} else {
@@ -1704,9 +1728,10 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
if (isLd && TRI->regsOverlap(EvenReg, BaseReg)) {
assert(!TRI->regsOverlap(OddReg, BaseReg));
InsertLDR_STR(MBB, MBBI, OffImm + 4, isLd, NewOpc2, OddReg, OddDeadKill,
- false, BaseReg, false, BaseUndef, Pred, PredReg, TII);
+ false, BaseReg, false, BaseUndef, Pred, PredReg, TII, MI);
InsertLDR_STR(MBB, MBBI, OffImm, isLd, NewOpc, EvenReg, EvenDeadKill,
- false, BaseReg, BaseKill, BaseUndef, Pred, PredReg, TII);
+ false, BaseReg, BaseKill, BaseUndef, Pred, PredReg, TII,
+ MI);
} else {
if (OddReg == EvenReg && EvenDeadKill) {
// If the two source operands are the same, the kill marker is
@@ -1719,9 +1744,11 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
if (EvenReg == BaseReg)
EvenDeadKill = false;
InsertLDR_STR(MBB, MBBI, OffImm, isLd, NewOpc, EvenReg, EvenDeadKill,
- EvenUndef, BaseReg, false, BaseUndef, Pred, PredReg, TII);
+ EvenUndef, BaseReg, false, BaseUndef, Pred, PredReg, TII,
+ MI);
InsertLDR_STR(MBB, MBBI, OffImm + 4, isLd, NewOpc2, OddReg, OddDeadKill,
- OddUndef, BaseReg, BaseKill, BaseUndef, Pred, PredReg, TII);
+ OddUndef, BaseReg, BaseKill, BaseUndef, Pred, PredReg, TII,
+ MI);
}
if (isLd)
++NumLDRD2LDR;
@@ -2048,6 +2075,11 @@ char ARMPreAllocLoadStoreOpt::ID = 0;
INITIALIZE_PASS(ARMPreAllocLoadStoreOpt, "arm-prera-ldst-opt",
ARM_PREALLOC_LOAD_STORE_OPT_NAME, false, false)
+// Limit the number of instructions to be rescheduled.
+// FIXME: tune this limit, and/or come up with some better heuristics.
+static cl::opt<unsigned> InstReorderLimit("arm-prera-ldst-opt-reorder-limit",
+ cl::init(8), cl::Hidden);
+
bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
if (AssumeMisalignedLoadStores || skipFunction(Fn.getFunction()))
return false;
@@ -2140,7 +2172,8 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
// At the moment, we ignore the memoryoperand's value.
// If we want to use AliasAnalysis, we should check it accordingly.
if (!Op0->hasOneMemOperand() ||
- (*Op0->memoperands_begin())->isVolatile())
+ (*Op0->memoperands_begin())->isVolatile() ||
+ (*Op0->memoperands_begin())->isAtomic())
return false;
unsigned Align = (*Op0->memoperands_begin())->getAlignment();
@@ -2223,7 +2256,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
}
// Don't try to reschedule too many instructions.
- if (NumMove == 8) // FIXME: Tune this limit.
+ if (NumMove == InstReorderLimit)
break;
// Found a mergable instruction; save information about it.
@@ -2351,10 +2384,13 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
bool RetVal = false;
DenseMap<MachineInstr*, unsigned> MI2LocMap;
- DenseMap<unsigned, SmallVector<MachineInstr *, 4>> Base2LdsMap;
- DenseMap<unsigned, SmallVector<MachineInstr *, 4>> Base2StsMap;
- SmallVector<unsigned, 4> LdBases;
- SmallVector<unsigned, 4> StBases;
+ using MapIt = DenseMap<unsigned, SmallVector<MachineInstr *, 4>>::iterator;
+ using Base2InstMap = DenseMap<unsigned, SmallVector<MachineInstr *, 4>>;
+ using BaseVec = SmallVector<unsigned, 4>;
+ Base2InstMap Base2LdsMap;
+ Base2InstMap Base2StsMap;
+ BaseVec LdBases;
+ BaseVec StBases;
unsigned Loc = 0;
MachineBasicBlock::iterator MBBI = MBB->begin();
@@ -2381,41 +2417,28 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
bool isLd = isLoadSingle(Opc);
unsigned Base = MI.getOperand(1).getReg();
int Offset = getMemoryOpOffset(MI);
-
bool StopHere = false;
- if (isLd) {
- DenseMap<unsigned, SmallVector<MachineInstr *, 4>>::iterator BI =
- Base2LdsMap.find(Base);
- if (BI != Base2LdsMap.end()) {
- for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
- if (Offset == getMemoryOpOffset(*BI->second[i])) {
- StopHere = true;
- break;
- }
- }
- if (!StopHere)
- BI->second.push_back(&MI);
- } else {
- Base2LdsMap[Base].push_back(&MI);
- LdBases.push_back(Base);
+ auto FindBases = [&] (Base2InstMap &Base2Ops, BaseVec &Bases) {
+ MapIt BI = Base2Ops.find(Base);
+ if (BI == Base2Ops.end()) {
+ Base2Ops[Base].push_back(&MI);
+ Bases.push_back(Base);
+ return;
}
- } else {
- DenseMap<unsigned, SmallVector<MachineInstr *, 4>>::iterator BI =
- Base2StsMap.find(Base);
- if (BI != Base2StsMap.end()) {
- for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
- if (Offset == getMemoryOpOffset(*BI->second[i])) {
- StopHere = true;
- break;
- }
+ for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
+ if (Offset == getMemoryOpOffset(*BI->second[i])) {
+ StopHere = true;
+ break;
}
- if (!StopHere)
- BI->second.push_back(&MI);
- } else {
- Base2StsMap[Base].push_back(&MI);
- StBases.push_back(Base);
}
- }
+ if (!StopHere)
+ BI->second.push_back(&MI);
+ };
+
+ if (isLd)
+ FindBases(Base2LdsMap, LdBases);
+ else
+ FindBases(Base2StsMap, StBases);
if (StopHere) {
// Found a duplicate (a base+offset combination that's seen earlier).
diff --git a/lib/Target/ARM/ARMLowOverheadLoops.cpp b/lib/Target/ARM/ARMLowOverheadLoops.cpp
new file mode 100644
index 000000000000..cedf3bd3c74e
--- /dev/null
+++ b/lib/Target/ARM/ARMLowOverheadLoops.cpp
@@ -0,0 +1,384 @@
+//===-- ARMLowOverheadLoops.cpp - CodeGen Low-overhead Loops ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// Finalize v8.1-m low-overhead loops by converting the associated pseudo
+/// instructions into machine operations.
+/// The expectation is that the loop contains three pseudo instructions:
+/// - t2*LoopStart - placed in the preheader or pre-preheader. The do-loop
+/// form should be in the preheader, whereas the while form should be in the
+/// preheaders only predecessor. TODO: Could DoLoopStart get moved into the
+/// pre-preheader?
+/// - t2LoopDec - placed within in the loop body.
+/// - t2LoopEnd - the loop latch terminator.
+///
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMBaseRegisterInfo.h"
+#include "ARMBasicBlockInfo.h"
+#include "ARMSubtarget.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "arm-low-overhead-loops"
+#define ARM_LOW_OVERHEAD_LOOPS_NAME "ARM Low Overhead Loops pass"
+
+namespace {
+
+ class ARMLowOverheadLoops : public MachineFunctionPass {
+ const ARMBaseInstrInfo *TII = nullptr;
+ MachineRegisterInfo *MRI = nullptr;
+ std::unique_ptr<ARMBasicBlockUtils> BBUtils = nullptr;
+
+ public:
+ static char ID;
+
+ ARMLowOverheadLoops() : MachineFunctionPass(ID) { }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ bool ProcessLoop(MachineLoop *ML);
+
+ void RevertWhile(MachineInstr *MI) const;
+
+ void RevertLoopDec(MachineInstr *MI) const;
+
+ void RevertLoopEnd(MachineInstr *MI) const;
+
+ void Expand(MachineLoop *ML, MachineInstr *Start,
+ MachineInstr *Dec, MachineInstr *End, bool Revert);
+
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoVRegs);
+ }
+
+ StringRef getPassName() const override {
+ return ARM_LOW_OVERHEAD_LOOPS_NAME;
+ }
+ };
+}
+
+char ARMLowOverheadLoops::ID = 0;
+
+INITIALIZE_PASS(ARMLowOverheadLoops, DEBUG_TYPE, ARM_LOW_OVERHEAD_LOOPS_NAME,
+ false, false)
+
+bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction &MF) {
+ if (!static_cast<const ARMSubtarget&>(MF.getSubtarget()).hasLOB())
+ return false;
+
+ LLVM_DEBUG(dbgs() << "ARM Loops on " << MF.getName() << " ------------- \n");
+
+ auto &MLI = getAnalysis<MachineLoopInfo>();
+ MRI = &MF.getRegInfo();
+ TII = static_cast<const ARMBaseInstrInfo*>(
+ MF.getSubtarget().getInstrInfo());
+ BBUtils = std::unique_ptr<ARMBasicBlockUtils>(new ARMBasicBlockUtils(MF));
+ BBUtils->computeAllBlockSizes();
+ BBUtils->adjustBBOffsetsAfter(&MF.front());
+
+ bool Changed = false;
+ for (auto ML : MLI) {
+ if (!ML->getParentLoop())
+ Changed |= ProcessLoop(ML);
+ }
+ return Changed;
+}
+
+bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) {
+
+ bool Changed = false;
+
+ // Process inner loops first.
+ for (auto I = ML->begin(), E = ML->end(); I != E; ++I)
+ Changed |= ProcessLoop(*I);
+
+ LLVM_DEBUG(dbgs() << "ARM Loops: Processing " << *ML);
+
+ auto IsLoopStart = [](MachineInstr &MI) {
+ return MI.getOpcode() == ARM::t2DoLoopStart ||
+ MI.getOpcode() == ARM::t2WhileLoopStart;
+ };
+
+ // Search the given block for a loop start instruction. If one isn't found,
+ // and there's only one predecessor block, search that one too.
+ std::function<MachineInstr*(MachineBasicBlock*)> SearchForStart =
+ [&IsLoopStart, &SearchForStart](MachineBasicBlock *MBB) -> MachineInstr* {
+ for (auto &MI : *MBB) {
+ if (IsLoopStart(MI))
+ return &MI;
+ }
+ if (MBB->pred_size() == 1)
+ return SearchForStart(*MBB->pred_begin());
+ return nullptr;
+ };
+
+ MachineInstr *Start = nullptr;
+ MachineInstr *Dec = nullptr;
+ MachineInstr *End = nullptr;
+ bool Revert = false;
+
+ // Search the preheader for the start intrinsic, or look through the
+ // predecessors of the header to find exactly one set.iterations intrinsic.
+ // FIXME: I don't see why we shouldn't be supporting multiple predecessors
+ // with potentially multiple set.loop.iterations, so we need to enable this.
+ if (auto *Preheader = ML->getLoopPreheader()) {
+ Start = SearchForStart(Preheader);
+ } else {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Failed to find loop preheader!\n"
+ << " - Performing manual predecessor search.\n");
+ MachineBasicBlock *Pred = nullptr;
+ for (auto *MBB : ML->getHeader()->predecessors()) {
+ if (!ML->contains(MBB)) {
+ if (Pred) {
+ LLVM_DEBUG(dbgs() << " - Found multiple out-of-loop preds.\n");
+ Start = nullptr;
+ break;
+ }
+ Pred = MBB;
+ Start = SearchForStart(MBB);
+ }
+ }
+ }
+
+ // Find the low-overhead loop components and decide whether or not to fall
+ // back to a normal loop.
+ for (auto *MBB : reverse(ML->getBlocks())) {
+ for (auto &MI : *MBB) {
+ if (MI.getOpcode() == ARM::t2LoopDec)
+ Dec = &MI;
+ else if (MI.getOpcode() == ARM::t2LoopEnd)
+ End = &MI;
+ else if (MI.getDesc().isCall())
+ // TODO: Though the call will require LE to execute again, does this
+ // mean we should revert? Always executing LE hopefully should be
+ // faster than performing a sub,cmp,br or even subs,br.
+ Revert = true;
+
+ if (!Dec)
+ continue;
+
+ // If we find that we load/store LR between LoopDec and LoopEnd, expect
+ // that the decremented value has been spilled to the stack. Because
+ // this value isn't actually going to be produced until the latch, by LE,
+ // we would need to generate a real sub. The value is also likely to be
+ // reloaded for use of LoopEnd - in which in case we'd need to perform
+ // an add because it gets negated again by LE! The other option is to
+ // then generate the other form of LE which doesn't perform the sub.
+ if (MI.mayLoad() || MI.mayStore())
+ Revert =
+ MI.getOperand(0).isReg() && MI.getOperand(0).getReg() == ARM::LR;
+ }
+
+ if (Dec && End && Revert)
+ break;
+ }
+
+ if (!Start && !Dec && !End) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Not a low-overhead loop.\n");
+ return Changed;
+ } if (!(Start && Dec && End)) {
+ report_fatal_error("Failed to find all loop components");
+ }
+
+ if (!End->getOperand(1).isMBB() ||
+ End->getOperand(1).getMBB() != ML->getHeader())
+ report_fatal_error("Expected LoopEnd to target Loop Header");
+
+ // The WLS and LE instructions have 12-bits for the label offset. WLS
+ // requires a positive offset, while LE uses negative.
+ if (BBUtils->getOffsetOf(End) < BBUtils->getOffsetOf(ML->getHeader()) ||
+ !BBUtils->isBBInRange(End, ML->getHeader(), 4094)) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: LE offset is out-of-range\n");
+ Revert = true;
+ }
+ if (Start->getOpcode() == ARM::t2WhileLoopStart &&
+ (BBUtils->getOffsetOf(Start) >
+ BBUtils->getOffsetOf(Start->getOperand(1).getMBB()) ||
+ !BBUtils->isBBInRange(Start, Start->getOperand(1).getMBB(), 4094))) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: WLS offset is out-of-range!\n");
+ Revert = true;
+ }
+
+ LLVM_DEBUG(dbgs() << "ARM Loops:\n - Found Loop Start: " << *Start
+ << " - Found Loop Dec: " << *Dec
+ << " - Found Loop End: " << *End);
+
+ Expand(ML, Start, Dec, End, Revert);
+ return true;
+}
+
+// WhileLoopStart holds the exit block, so produce a cmp lr, 0 and then a
+// beq that branches to the exit branch.
+// FIXME: Need to check that we're not trashing the CPSR when generating the
+// cmp. We could also try to generate a cbz if the value in LR is also in
+// another low register.
+void ARMLowOverheadLoops::RevertWhile(MachineInstr *MI) const {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to cmp: " << *MI);
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(),
+ TII->get(ARM::t2CMPri));
+ MIB.addReg(ARM::LR);
+ MIB.addImm(0);
+ MIB.addImm(ARMCC::AL);
+ MIB.addReg(ARM::CPSR);
+
+ // TODO: Try to use tBcc instead
+ MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2Bcc));
+ MIB.add(MI->getOperand(1)); // branch target
+ MIB.addImm(ARMCC::EQ); // condition code
+ MIB.addReg(ARM::CPSR);
+ MI->eraseFromParent();
+}
+
+// TODO: Check flags so that we can possibly generate a tSubs or tSub.
+void ARMLowOverheadLoops::RevertLoopDec(MachineInstr *MI) const {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to sub: " << *MI);
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(),
+ TII->get(ARM::t2SUBri));
+ MIB.addDef(ARM::LR);
+ MIB.add(MI->getOperand(1));
+ MIB.add(MI->getOperand(2));
+ MIB.addImm(ARMCC::AL);
+ MIB.addReg(0);
+ MIB.addReg(0);
+ MI->eraseFromParent();
+}
+
+// Generate a subs, or sub and cmp, and a branch instead of an LE.
+// FIXME: Need to check that we're not trashing the CPSR when generating
+// the cmp.
+void ARMLowOverheadLoops::RevertLoopEnd(MachineInstr *MI) const {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to cmp, br: " << *MI);
+
+ // Create cmp
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(),
+ TII->get(ARM::t2CMPri));
+ MIB.addReg(ARM::LR);
+ MIB.addImm(0);
+ MIB.addImm(ARMCC::AL);
+ MIB.addReg(ARM::CPSR);
+
+ // TODO Try to use tBcc instead.
+ // Create bne
+ MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2Bcc));
+ MIB.add(MI->getOperand(1)); // branch target
+ MIB.addImm(ARMCC::NE); // condition code
+ MIB.addReg(ARM::CPSR);
+ MI->eraseFromParent();
+}
+
+void ARMLowOverheadLoops::Expand(MachineLoop *ML, MachineInstr *Start,
+ MachineInstr *Dec, MachineInstr *End,
+ bool Revert) {
+
+ auto ExpandLoopStart = [this](MachineLoop *ML, MachineInstr *Start) {
+ // The trip count should already been held in LR since the instructions
+ // within the loop can only read and write to LR. So, there should be a
+ // mov to setup the count. WLS/DLS perform this move, so find the original
+ // and delete it - inserting WLS/DLS in its place.
+ MachineBasicBlock *MBB = Start->getParent();
+ MachineInstr *InsertPt = Start;
+ for (auto &I : MRI->def_instructions(ARM::LR)) {
+ if (I.getParent() != MBB)
+ continue;
+
+ // Always execute.
+ if (!I.getOperand(2).isImm() || I.getOperand(2).getImm() != ARMCC::AL)
+ continue;
+
+ // Only handle move reg, if the trip count it will need moving into a reg
+ // before the setup instruction anyway.
+ if (!I.getDesc().isMoveReg() ||
+ !I.getOperand(1).isIdenticalTo(Start->getOperand(0)))
+ continue;
+ InsertPt = &I;
+ break;
+ }
+
+ unsigned Opc = Start->getOpcode() == ARM::t2DoLoopStart ?
+ ARM::t2DLS : ARM::t2WLS;
+ MachineInstrBuilder MIB =
+ BuildMI(*MBB, InsertPt, InsertPt->getDebugLoc(), TII->get(Opc));
+
+ MIB.addDef(ARM::LR);
+ MIB.add(Start->getOperand(0));
+ if (Opc == ARM::t2WLS)
+ MIB.add(Start->getOperand(1));
+
+ if (InsertPt != Start)
+ InsertPt->eraseFromParent();
+ Start->eraseFromParent();
+ LLVM_DEBUG(dbgs() << "ARM Loops: Inserted start: " << *MIB);
+ return &*MIB;
+ };
+
+ // Combine the LoopDec and LoopEnd instructions into LE(TP).
+ auto ExpandLoopEnd = [this](MachineLoop *ML, MachineInstr *Dec,
+ MachineInstr *End) {
+ MachineBasicBlock *MBB = End->getParent();
+ MachineInstrBuilder MIB = BuildMI(*MBB, End, End->getDebugLoc(),
+ TII->get(ARM::t2LEUpdate));
+ MIB.addDef(ARM::LR);
+ MIB.add(End->getOperand(0));
+ MIB.add(End->getOperand(1));
+ LLVM_DEBUG(dbgs() << "ARM Loops: Inserted LE: " << *MIB);
+
+ End->eraseFromParent();
+ Dec->eraseFromParent();
+ return &*MIB;
+ };
+
+ // TODO: We should be able to automatically remove these branches before we
+ // get here - probably by teaching analyzeBranch about the pseudo
+ // instructions.
+ // If there is an unconditional branch, after I, that just branches to the
+ // next block, remove it.
+ auto RemoveDeadBranch = [](MachineInstr *I) {
+ MachineBasicBlock *BB = I->getParent();
+ MachineInstr *Terminator = &BB->instr_back();
+ if (Terminator->isUnconditionalBranch() && I != Terminator) {
+ MachineBasicBlock *Succ = Terminator->getOperand(0).getMBB();
+ if (BB->isLayoutSuccessor(Succ)) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Removing branch: " << *Terminator);
+ Terminator->eraseFromParent();
+ }
+ }
+ };
+
+ if (Revert) {
+ if (Start->getOpcode() == ARM::t2WhileLoopStart)
+ RevertWhile(Start);
+ else
+ Start->eraseFromParent();
+ RevertLoopDec(Dec);
+ RevertLoopEnd(End);
+ } else {
+ Start = ExpandLoopStart(ML, Start);
+ RemoveDeadBranch(Start);
+ End = ExpandLoopEnd(ML, Dec, End);
+ RemoveDeadBranch(End);
+ }
+}
+
+FunctionPass *llvm::createARMLowOverheadLoopsPass() {
+ return new ARMLowOverheadLoops();
+}
diff --git a/lib/Target/ARM/ARMMCInstLower.cpp b/lib/Target/ARM/ARMMCInstLower.cpp
index 48b02d40b246..90c5ad025e56 100644
--- a/lib/Target/ARM/ARMMCInstLower.cpp
+++ b/lib/Target/ARM/ARMMCInstLower.cpp
@@ -1,9 +1,8 @@
//===-- ARMMCInstLower.cpp - Convert ARM MachineInstr to an MCInst --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.cpp b/lib/Target/ARM/ARMMachineFunctionInfo.cpp
index e25d36b57616..3b676ca4c883 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.cpp
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.cpp
@@ -1,9 +1,8 @@
//===-- ARMMachineFunctionInfo.cpp - ARM machine function info ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h
index 91310e81e398..90d794cd27b1 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -1,9 +1,8 @@
//===-- ARMMachineFunctionInfo.h - ARM machine function info ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -62,6 +61,10 @@ class ARMFunctionInfo : public MachineFunctionInfo {
/// enable far jump.
bool LRSpilledForFarJump = false;
+ /// LRSpilled - True if the LR register has been for spilled for
+ /// any reason, so it's legal to emit an ARM::tBfar (i.e. "bl").
+ bool LRSpilled = false;
+
/// FramePtrSpillOffset - If HasStackFrame, this records the frame pointer
/// spill stack offset.
unsigned FramePtrSpillOffset = 0;
@@ -151,6 +154,9 @@ public:
bool shouldRestoreSPFromFP() const { return RestoreSPFromFP; }
void setShouldRestoreSPFromFP(bool s) { RestoreSPFromFP = s; }
+ bool isLRSpilled() const { return LRSpilled; }
+ void setLRIsSpilled(bool s) { LRSpilled = s; }
+
bool isLRSpilledForFarJump() const { return LRSpilledForFarJump; }
void setLRIsSpilledForFarJump(bool s) { LRSpilledForFarJump = s; }
@@ -239,6 +245,8 @@ public:
void setPromotedConstpoolIncrease(int Sz) {
PromotedGlobalsIncrease = Sz;
}
+
+ DenseMap<unsigned, unsigned> EHPrologueRemappedRegs;
};
} // end namespace llvm
diff --git a/lib/Target/ARM/ARMMacroFusion.cpp b/lib/Target/ARM/ARMMacroFusion.cpp
index df1da9d8e474..38bf28ba8219 100644
--- a/lib/Target/ARM/ARMMacroFusion.cpp
+++ b/lib/Target/ARM/ARMMacroFusion.cpp
@@ -1,9 +1,8 @@
//===- ARMMacroFusion.cpp - ARM Macro Fusion ----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/ARM/ARMMacroFusion.h b/lib/Target/ARM/ARMMacroFusion.h
index b3abd7b593a1..4896a4a2544d 100644
--- a/lib/Target/ARM/ARMMacroFusion.h
+++ b/lib/Target/ARM/ARMMacroFusion.h
@@ -1,9 +1,8 @@
//===- ARMMacroFusion.h - ARM Macro Fusion ------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/ARM/ARMOptimizeBarriersPass.cpp b/lib/Target/ARM/ARMOptimizeBarriersPass.cpp
index cff4a256100d..348895da713f 100644
--- a/lib/Target/ARM/ARMOptimizeBarriersPass.cpp
+++ b/lib/Target/ARM/ARMOptimizeBarriersPass.cpp
@@ -1,10 +1,9 @@
//===-- ARMOptimizeBarriersPass - two DMBs without a memory access in between,
//removed one -===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===------------------------------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/ARMParallelDSP.cpp b/lib/Target/ARM/ARMParallelDSP.cpp
index fc3258914f92..5389d09bf7d7 100644
--- a/lib/Target/ARM/ARMParallelDSP.cpp
+++ b/lib/Target/ARM/ARMParallelDSP.cpp
@@ -1,9 +1,8 @@
//===- ParallelDSP.cpp - Parallel DSP Pass --------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -49,12 +48,12 @@ DisableParallelDSP("disable-arm-parallel-dsp", cl::Hidden, cl::init(false),
namespace {
struct OpChain;
struct BinOpChain;
- struct Reduction;
+ class Reduction;
using OpChainList = SmallVector<std::unique_ptr<OpChain>, 8>;
using ReductionList = SmallVector<Reduction, 8>;
using ValueList = SmallVector<Value*, 8>;
- using MemInstList = SmallVector<Instruction*, 8>;
+ using MemInstList = SmallVector<LoadInst*, 8>;
using PMACPair = std::pair<BinOpChain*,BinOpChain*>;
using PMACPairList = SmallVector<PMACPair, 8>;
using Instructions = SmallVector<Instruction*,16>;
@@ -64,31 +63,24 @@ namespace {
Instruction *Root;
ValueList AllValues;
MemInstList VecLd; // List of all load instructions.
- MemLocList MemLocs; // All memory locations read by this tree.
+ MemInstList Loads;
bool ReadOnly = true;
OpChain(Instruction *I, ValueList &vl) : Root(I), AllValues(vl) { }
virtual ~OpChain() = default;
- void SetMemoryLocations() {
- const auto Size = LocationSize::unknown();
+ void PopulateLoads() {
for (auto *V : AllValues) {
- if (auto *I = dyn_cast<Instruction>(V)) {
- if (I->mayWriteToMemory())
- ReadOnly = false;
- if (auto *Ld = dyn_cast<LoadInst>(V))
- MemLocs.push_back(MemoryLocation(Ld->getPointerOperand(), Size));
- }
+ if (auto *Ld = dyn_cast<LoadInst>(V))
+ Loads.push_back(Ld);
}
}
unsigned size() const { return AllValues.size(); }
};
- // 'BinOpChain' and 'Reduction' are just some bookkeeping data structures.
- // 'Reduction' contains the phi-node and accumulator statement from where we
- // start pattern matching, and 'BinOpChain' the multiplication
- // instructions that are candidates for parallel execution.
+ // 'BinOpChain' holds the multiplication instructions that are candidates
+ // for parallel execution.
struct BinOpChain : public OpChain {
ValueList LHS; // List of all (narrow) left hand operands.
ValueList RHS; // List of all (narrow) right hand operands.
@@ -103,15 +95,85 @@ namespace {
bool AreSymmetrical(BinOpChain *Other);
};
- struct Reduction {
- PHINode *Phi; // The Phi-node from where we start
- // pattern matching.
- Instruction *AccIntAdd; // The accumulating integer add statement,
- // i.e, the reduction statement.
- OpChainList MACCandidates; // The MAC candidates associated with
- // this reduction statement.
- PMACPairList PMACPairs;
- Reduction (PHINode *P, Instruction *Acc) : Phi(P), AccIntAdd(Acc) { };
+ /// Represent a sequence of multiply-accumulate operations with the aim to
+ /// perform the multiplications in parallel.
+ class Reduction {
+ Instruction *Root = nullptr;
+ Value *Acc = nullptr;
+ OpChainList Muls;
+ PMACPairList MulPairs;
+ SmallPtrSet<Instruction*, 4> Adds;
+
+ public:
+ Reduction() = delete;
+
+ Reduction (Instruction *Add) : Root(Add) { }
+
+ /// Record an Add instruction that is a part of the this reduction.
+ void InsertAdd(Instruction *I) { Adds.insert(I); }
+
+ /// Record a BinOpChain, rooted at a Mul instruction, that is a part of
+ /// this reduction.
+ void InsertMul(Instruction *I, ValueList &LHS, ValueList &RHS) {
+ Muls.push_back(make_unique<BinOpChain>(I, LHS, RHS));
+ }
+
+ /// Add the incoming accumulator value, returns true if a value had not
+ /// already been added. Returning false signals to the user that this
+ /// reduction already has a value to initialise the accumulator.
+ bool InsertAcc(Value *V) {
+ if (Acc)
+ return false;
+ Acc = V;
+ return true;
+ }
+
+ /// Set two BinOpChains, rooted at muls, that can be executed as a single
+ /// parallel operation.
+ void AddMulPair(BinOpChain *Mul0, BinOpChain *Mul1) {
+ MulPairs.push_back(std::make_pair(Mul0, Mul1));
+ }
+
+ /// Return true if enough mul operations are found that can be executed in
+ /// parallel.
+ bool CreateParallelPairs();
+
+ /// Return the add instruction which is the root of the reduction.
+ Instruction *getRoot() { return Root; }
+
+ /// Return the incoming value to be accumulated. This maybe null.
+ Value *getAccumulator() { return Acc; }
+
+ /// Return the set of adds that comprise the reduction.
+ SmallPtrSetImpl<Instruction*> &getAdds() { return Adds; }
+
+ /// Return the BinOpChain, rooted at mul instruction, that comprise the
+ /// the reduction.
+ OpChainList &getMuls() { return Muls; }
+
+ /// Return the BinOpChain, rooted at mul instructions, that have been
+ /// paired for parallel execution.
+ PMACPairList &getMulPairs() { return MulPairs; }
+
+ /// To finalise, replace the uses of the root with the intrinsic call.
+ void UpdateRoot(Instruction *SMLAD) {
+ Root->replaceAllUsesWith(SMLAD);
+ }
+ };
+
+ class WidenedLoad {
+ LoadInst *NewLd = nullptr;
+ SmallVector<LoadInst*, 4> Loads;
+
+ public:
+ WidenedLoad(SmallVectorImpl<LoadInst*> &Lds, LoadInst *Wide)
+ : NewLd(Wide) {
+ for (auto *I : Lds)
+ Loads.push_back(I);
+ }
+ LoadInst *getLoad() {
+ return NewLd;
+ }
};
class ARMParallelDSP : public LoopPass {
@@ -124,28 +186,37 @@ namespace {
const DataLayout *DL;
Module *M;
std::map<LoadInst*, LoadInst*> LoadPairs;
- std::map<LoadInst*, SmallVector<LoadInst*, 4>> SequentialLoads;
+ SmallPtrSet<LoadInst*, 4> OffsetLoads;
+ std::map<LoadInst*, std::unique_ptr<WidenedLoad>> WideLoads;
+
+ template<unsigned>
+ bool IsNarrowSequence(Value *V, ValueList &VL);
- bool RecordSequentialLoads(BasicBlock *Header);
- bool InsertParallelMACs(Reduction &Reduction);
+ bool RecordMemoryOps(BasicBlock *BB);
+ void InsertParallelMACs(Reduction &Reduction);
bool AreSequentialLoads(LoadInst *Ld0, LoadInst *Ld1, MemInstList &VecMem);
- void CreateParallelMACPairs(Reduction &R);
- Instruction *CreateSMLADCall(LoadInst *VecLd0, LoadInst *VecLd1,
- Instruction *Acc, bool Exchange,
- Instruction *InsertAfter);
+ LoadInst* CreateWideLoad(SmallVectorImpl<LoadInst*> &Loads,
+ IntegerType *LoadTy);
+ bool CreateParallelPairs(Reduction &R);
/// Try to match and generate: SMLAD, SMLADX - Signed Multiply Accumulate
/// Dual performs two signed 16x16-bit multiplications. It adds the
/// products to a 32-bit accumulate operand. Optionally, the instruction can
/// exchange the halfwords of the second operand before performing the
/// arithmetic.
- bool MatchSMLAD(Function &F);
+ bool MatchSMLAD(Loop *L);
public:
static char ID;
ARMParallelDSP() : LoopPass(ID) { }
+ bool doInitialization(Loop *L, LPPassManager &LPM) override {
+ LoadPairs.clear();
+ WideLoads.clear();
+ return true;
+ }
+
void getAnalysisUsage(AnalysisUsage &AU) const override {
LoopPass::getAnalysisUsage(AU);
AU.addRequired<AssumptionCacheTracker>();
@@ -183,6 +254,9 @@ namespace {
return false;
}
+ if (!TheLoop->getLoopPreheader())
+ InsertPreheaderForLoop(L, DT, LI, nullptr, true);
+
Function &F = *Header->getParent();
M = F.getParent();
DL = &M->getDataLayout();
@@ -202,31 +276,62 @@ namespace {
return false;
}
+ if (!ST->isLittle()) {
+ LLVM_DEBUG(dbgs() << "Only supporting little endian: not running pass "
+ << "ARMParallelDSP\n");
+ return false;
+ }
+
LoopAccessInfo LAI(L, SE, TLI, AA, DT, LI);
- bool Changes = false;
LLVM_DEBUG(dbgs() << "\n== Parallel DSP pass ==\n");
LLVM_DEBUG(dbgs() << " - " << F.getName() << "\n\n");
- if (!RecordSequentialLoads(Header)) {
+ if (!RecordMemoryOps(Header)) {
LLVM_DEBUG(dbgs() << " - No sequential loads found.\n");
return false;
}
- Changes = MatchSMLAD(F);
+ bool Changes = MatchSMLAD(L);
return Changes;
}
};
}
+template<typename MemInst>
+static bool AreSequentialAccesses(MemInst *MemOp0, MemInst *MemOp1,
+ const DataLayout &DL, ScalarEvolution &SE) {
+ if (isConsecutiveAccess(MemOp0, MemOp1, DL, SE))
+ return true;
+ return false;
+}
+
+bool ARMParallelDSP::AreSequentialLoads(LoadInst *Ld0, LoadInst *Ld1,
+ MemInstList &VecMem) {
+ if (!Ld0 || !Ld1)
+ return false;
+
+ if (!LoadPairs.count(Ld0) || LoadPairs[Ld0] != Ld1)
+ return false;
+
+ LLVM_DEBUG(dbgs() << "Loads are sequential and valid:\n";
+ dbgs() << "Ld0:"; Ld0->dump();
+ dbgs() << "Ld1:"; Ld1->dump();
+ );
+
+ VecMem.clear();
+ VecMem.push_back(Ld0);
+ VecMem.push_back(Ld1);
+ return true;
+}
+
// MaxBitwidth: the maximum supported bitwidth of the elements in the DSP
// instructions, which is set to 16. So here we should collect all i8 and i16
// narrow operations.
// TODO: we currently only collect i16, and will support i8 later, so that's
// why we check that types are equal to MaxBitWidth, and not <= MaxBitWidth.
template<unsigned MaxBitWidth>
-static bool IsNarrowSequence(Value *V, ValueList &VL) {
- LLVM_DEBUG(dbgs() << "Is narrow sequence? "; V->dump());
+bool ARMParallelDSP::IsNarrowSequence(Value *V, ValueList &VL) {
ConstantInt *CInt;
if (match(V, m_ConstantInt(CInt))) {
@@ -236,7 +341,7 @@ static bool IsNarrowSequence(Value *V, ValueList &VL) {
auto *I = dyn_cast<Instruction>(V);
if (!I)
- return false;
+ return false;
Value *Val, *LHS, *RHS;
if (match(V, m_Trunc(m_Value(Val)))) {
@@ -245,108 +350,253 @@ static bool IsNarrowSequence(Value *V, ValueList &VL) {
} else if (match(V, m_Add(m_Value(LHS), m_Value(RHS)))) {
// TODO: we need to implement sadd16/sadd8 for this, which enables to
// also do the rewrite for smlad8.ll, but it is unsupported for now.
- LLVM_DEBUG(dbgs() << "No, unsupported Op:\t"; I->dump());
return false;
} else if (match(V, m_ZExtOrSExt(m_Value(Val)))) {
- if (cast<CastInst>(I)->getSrcTy()->getIntegerBitWidth() != MaxBitWidth) {
- LLVM_DEBUG(dbgs() << "No, wrong SrcTy size: " <<
- cast<CastInst>(I)->getSrcTy()->getIntegerBitWidth() << "\n");
+ if (cast<CastInst>(I)->getSrcTy()->getIntegerBitWidth() != MaxBitWidth)
return false;
- }
if (match(Val, m_Load(m_Value()))) {
- LLVM_DEBUG(dbgs() << "Yes, found narrow Load:\t"; Val->dump());
+ auto *Ld = cast<LoadInst>(Val);
+
+ // Check that these load could be paired.
+ if (!LoadPairs.count(Ld) && !OffsetLoads.count(Ld))
+ return false;
+
VL.push_back(Val);
VL.push_back(I);
return true;
}
}
- LLVM_DEBUG(dbgs() << "No, unsupported Op:\t"; I->dump());
return false;
}
-template<typename MemInst>
-static bool AreSequentialAccesses(MemInst *MemOp0, MemInst *MemOp1,
- const DataLayout &DL, ScalarEvolution &SE) {
- if (!MemOp0->isSimple() || !MemOp1->isSimple()) {
- LLVM_DEBUG(dbgs() << "No, not touching volatile access\n");
- return false;
- }
- if (isConsecutiveAccess(MemOp0, MemOp1, DL, SE)) {
- LLVM_DEBUG(dbgs() << "OK: accesses are consecutive.\n");
- return true;
+/// Iterate through the block and record base, offset pairs of loads which can
+/// be widened into a single load.
+bool ARMParallelDSP::RecordMemoryOps(BasicBlock *BB) {
+ SmallVector<LoadInst*, 8> Loads;
+ SmallVector<Instruction*, 8> Writes;
+
+ // Collect loads and instruction that may write to memory. For now we only
+ // record loads which are simple, sign-extended and have a single user.
+ // TODO: Allow zero-extended loads.
+ for (auto &I : *BB) {
+ if (I.mayWriteToMemory())
+ Writes.push_back(&I);
+ auto *Ld = dyn_cast<LoadInst>(&I);
+ if (!Ld || !Ld->isSimple() ||
+ !Ld->hasOneUse() || !isa<SExtInst>(Ld->user_back()))
+ continue;
+ Loads.push_back(Ld);
}
- LLVM_DEBUG(dbgs() << "No, accesses aren't consecutive.\n");
- return false;
-}
-bool ARMParallelDSP::AreSequentialLoads(LoadInst *Ld0, LoadInst *Ld1,
- MemInstList &VecMem) {
- if (!Ld0 || !Ld1)
- return false;
+ using InstSet = std::set<Instruction*>;
+ using DepMap = std::map<Instruction*, InstSet>;
+ DepMap RAWDeps;
- LLVM_DEBUG(dbgs() << "Are consecutive loads:\n";
- dbgs() << "Ld0:"; Ld0->dump();
- dbgs() << "Ld1:"; Ld1->dump();
- );
+ // Record any writes that may alias a load.
+ const auto Size = LocationSize::unknown();
+ for (auto Read : Loads) {
+ for (auto Write : Writes) {
+ MemoryLocation ReadLoc =
+ MemoryLocation(Read->getPointerOperand(), Size);
- if (!Ld0->hasOneUse() || !Ld1->hasOneUse()) {
- LLVM_DEBUG(dbgs() << "No, load has more than one use.\n");
- return false;
+ if (!isModOrRefSet(intersectModRef(AA->getModRefInfo(Write, ReadLoc),
+ ModRefInfo::ModRef)))
+ continue;
+ if (DT->dominates(Write, Read))
+ RAWDeps[Read].insert(Write);
+ }
}
- if (!LoadPairs.count(Ld0) || LoadPairs[Ld0] != Ld1)
- return false;
+ // Check whether there's not a write between the two loads which would
+ // prevent them from being safely merged.
+ auto SafeToPair = [&](LoadInst *Base, LoadInst *Offset) {
+ LoadInst *Dominator = DT->dominates(Base, Offset) ? Base : Offset;
+ LoadInst *Dominated = DT->dominates(Base, Offset) ? Offset : Base;
- VecMem.clear();
- VecMem.push_back(Ld0);
- VecMem.push_back(Ld1);
- return true;
-}
+ if (RAWDeps.count(Dominated)) {
+ InstSet &WritesBefore = RAWDeps[Dominated];
-/// Iterate through the block and record base, offset pairs of loads as well as
-/// maximal sequences of sequential loads.
-bool ARMParallelDSP::RecordSequentialLoads(BasicBlock *Header) {
- SmallVector<LoadInst*, 8> Loads;
- for (auto &I : *Header) {
- auto *Ld = dyn_cast<LoadInst>(&I);
- if (!Ld)
- continue;
- Loads.push_back(Ld);
- }
+ for (auto Before : WritesBefore) {
- std::map<LoadInst*, LoadInst*> BaseLoads;
+ // We can't move the second load backward, past a write, to merge
+ // with the first load.
+ if (DT->dominates(Dominator, Before))
+ return false;
+ }
+ }
+ return true;
+ };
- for (auto *Ld0 : Loads) {
- for (auto *Ld1 : Loads) {
- if (Ld0 == Ld1)
+ // Record base, offset load pairs.
+ for (auto *Base : Loads) {
+ for (auto *Offset : Loads) {
+ if (Base == Offset)
continue;
- if (AreSequentialAccesses<LoadInst>(Ld0, Ld1, *DL, *SE)) {
- LoadPairs[Ld0] = Ld1;
- if (BaseLoads.count(Ld0)) {
- LoadInst *Base = BaseLoads[Ld0];
- BaseLoads[Ld1] = Base;
- SequentialLoads[Base].push_back(Ld1);
- } else {
- BaseLoads[Ld1] = Ld0;
- SequentialLoads[Ld0].push_back(Ld1);
- }
+ if (AreSequentialAccesses<LoadInst>(Base, Offset, *DL, *SE) &&
+ SafeToPair(Base, Offset)) {
+ LoadPairs[Base] = Offset;
+ OffsetLoads.insert(Offset);
+ break;
}
}
}
+
+ LLVM_DEBUG(if (!LoadPairs.empty()) {
+ dbgs() << "Consecutive load pairs:\n";
+ for (auto &MapIt : LoadPairs) {
+ LLVM_DEBUG(dbgs() << *MapIt.first << ", "
+ << *MapIt.second << "\n");
+ }
+ });
return LoadPairs.size() > 1;
}
-void ARMParallelDSP::CreateParallelMACPairs(Reduction &R) {
- OpChainList &Candidates = R.MACCandidates;
- PMACPairList &PMACPairs = R.PMACPairs;
- const unsigned Elems = Candidates.size();
+// Loop Pass that needs to identify integer add/sub reductions of 16-bit vector
+// multiplications.
+// To use SMLAD:
+// 1) we first need to find integer add then look for this pattern:
+//
+// acc0 = ...
+// ld0 = load i16
+// sext0 = sext i16 %ld0 to i32
+// ld1 = load i16
+// sext1 = sext i16 %ld1 to i32
+// mul0 = mul %sext0, %sext1
+// ld2 = load i16
+// sext2 = sext i16 %ld2 to i32
+// ld3 = load i16
+// sext3 = sext i16 %ld3 to i32
+// mul1 = mul i32 %sext2, %sext3
+// add0 = add i32 %mul0, %acc0
+// acc1 = add i32 %add0, %mul1
+//
+// Which can be selected to:
+//
+// ldr r0
+// ldr r1
+// smlad r2, r0, r1, r2
+//
+// If constants are used instead of loads, these will need to be hoisted
+// out and into a register.
+//
+// If loop invariants are used instead of loads, these need to be packed
+// before the loop begins.
+//
+bool ARMParallelDSP::MatchSMLAD(Loop *L) {
+ // Search recursively back through the operands to find a tree of values that
+ // form a multiply-accumulate chain. The search records the Add and Mul
+ // instructions that form the reduction and allows us to find a single value
+ // to be used as the initial input to the accumlator.
+ std::function<bool(Value*, Reduction&)> Search = [&]
+ (Value *V, Reduction &R) -> bool {
+
+ // If we find a non-instruction, try to use it as the initial accumulator
+ // value. This may have already been found during the search in which case
+ // this function will return false, signaling a search fail.
+ auto *I = dyn_cast<Instruction>(V);
+ if (!I)
+ return R.InsertAcc(V);
+
+ switch (I->getOpcode()) {
+ default:
+ break;
+ case Instruction::PHI:
+ // Could be the accumulator value.
+ return R.InsertAcc(V);
+ case Instruction::Add: {
+ // Adds should be adding together two muls, or another add and a mul to
+ // be within the mac chain. One of the operands may also be the
+ // accumulator value at which point we should stop searching.
+ bool ValidLHS = Search(I->getOperand(0), R);
+ bool ValidRHS = Search(I->getOperand(1), R);
+ if (!ValidLHS && !ValidLHS)
+ return false;
+ else if (ValidLHS && ValidRHS) {
+ R.InsertAdd(I);
+ return true;
+ } else {
+ R.InsertAdd(I);
+ return R.InsertAcc(I);
+ }
+ }
+ case Instruction::Mul: {
+ Value *MulOp0 = I->getOperand(0);
+ Value *MulOp1 = I->getOperand(1);
+ if (isa<SExtInst>(MulOp0) && isa<SExtInst>(MulOp1)) {
+ ValueList LHS;
+ ValueList RHS;
+ if (IsNarrowSequence<16>(MulOp0, LHS) &&
+ IsNarrowSequence<16>(MulOp1, RHS)) {
+ R.InsertMul(I, LHS, RHS);
+ return true;
+ }
+ }
+ return false;
+ }
+ case Instruction::SExt:
+ return Search(I->getOperand(0), R);
+ }
+ return false;
+ };
+
+ bool Changed = false;
+ SmallPtrSet<Instruction*, 4> AllAdds;
+ BasicBlock *Latch = L->getLoopLatch();
+
+ for (Instruction &I : reverse(*Latch)) {
+ if (I.getOpcode() != Instruction::Add)
+ continue;
+
+ if (AllAdds.count(&I))
+ continue;
+
+ const auto *Ty = I.getType();
+ if (!Ty->isIntegerTy(32) && !Ty->isIntegerTy(64))
+ continue;
+
+ Reduction R(&I);
+ if (!Search(&I, R))
+ continue;
+
+ if (!CreateParallelPairs(R))
+ continue;
+
+ InsertParallelMACs(R);
+ Changed = true;
+ AllAdds.insert(R.getAdds().begin(), R.getAdds().end());
+ }
+
+ return Changed;
+}
+
+bool ARMParallelDSP::CreateParallelPairs(Reduction &R) {
+
+ // Not enough mul operations to make a pair.
+ if (R.getMuls().size() < 2)
+ return false;
- if (Elems < 2)
- return;
+ // Check that the muls operate directly upon sign extended loads.
+ for (auto &MulChain : R.getMuls()) {
+ // A mul has 2 operands, and a narrow op consist of sext and a load; thus
+ // we expect at least 4 items in this operand value list.
+ if (MulChain->size() < 4) {
+ LLVM_DEBUG(dbgs() << "Operand list too short.\n");
+ return false;
+ }
+ MulChain->PopulateLoads();
+ ValueList &LHS = static_cast<BinOpChain*>(MulChain.get())->LHS;
+ ValueList &RHS = static_cast<BinOpChain*>(MulChain.get())->RHS;
+
+ // Use +=2 to skip over the expected extend instructions.
+ for (unsigned i = 0, e = LHS.size(); i < e; i += 2) {
+ if (!isa<LoadInst>(LHS[i]) || !isa<LoadInst>(RHS[i]))
+ return false;
+ }
+ }
- auto CanPair = [&](BinOpChain *PMul0, BinOpChain *PMul1) {
+ auto CanPair = [&](Reduction &R, BinOpChain *PMul0, BinOpChain *PMul1) {
if (!PMul0->AreSymmetrical(PMul1))
return false;
@@ -363,23 +613,22 @@ void ARMParallelDSP::CreateParallelMACPairs(Reduction &R) {
if (!Ld0 || !Ld1 || !Ld2 || !Ld3)
return false;
- LLVM_DEBUG(dbgs() << "Looking at operands " << x << ":\n"
- << "\t Ld0: " << *Ld0 << "\n"
- << "\t Ld1: " << *Ld1 << "\n"
- << "and operands " << x + 2 << ":\n"
- << "\t Ld2: " << *Ld2 << "\n"
- << "\t Ld3: " << *Ld3 << "\n");
+ LLVM_DEBUG(dbgs() << "Loads:\n"
+ << " - " << *Ld0 << "\n"
+ << " - " << *Ld1 << "\n"
+ << " - " << *Ld2 << "\n"
+ << " - " << *Ld3 << "\n");
if (AreSequentialLoads(Ld0, Ld1, PMul0->VecLd)) {
if (AreSequentialLoads(Ld2, Ld3, PMul1->VecLd)) {
LLVM_DEBUG(dbgs() << "OK: found two pairs of parallel loads!\n");
- PMACPairs.push_back(std::make_pair(PMul0, PMul1));
+ R.AddMulPair(PMul0, PMul1);
return true;
} else if (AreSequentialLoads(Ld3, Ld2, PMul1->VecLd)) {
LLVM_DEBUG(dbgs() << "OK: found two pairs of parallel loads!\n");
LLVM_DEBUG(dbgs() << " exchanging Ld2 and Ld3\n");
PMul1->Exchange = true;
- PMACPairs.push_back(std::make_pair(PMul0, PMul1));
+ R.AddMulPair(PMul0, PMul1);
return true;
}
} else if (AreSequentialLoads(Ld1, Ld0, PMul0->VecLd) &&
@@ -389,16 +638,18 @@ void ARMParallelDSP::CreateParallelMACPairs(Reduction &R) {
LLVM_DEBUG(dbgs() << " and swapping muls\n");
PMul0->Exchange = true;
// Only the second operand can be exchanged, so swap the muls.
- PMACPairs.push_back(std::make_pair(PMul1, PMul0));
+ R.AddMulPair(PMul1, PMul0);
return true;
}
}
return false;
};
+ OpChainList &Muls = R.getMuls();
+ const unsigned Elems = Muls.size();
SmallPtrSet<const Instruction*, 4> Paired;
for (unsigned i = 0; i < Elems; ++i) {
- BinOpChain *PMul0 = static_cast<BinOpChain*>(Candidates[i].get());
+ BinOpChain *PMul0 = static_cast<BinOpChain*>(Muls[i].get());
if (Paired.count(PMul0->Root))
continue;
@@ -406,7 +657,7 @@ void ARMParallelDSP::CreateParallelMACPairs(Reduction &R) {
if (i == j)
continue;
- BinOpChain *PMul1 = static_cast<BinOpChain*>(Candidates[j].get());
+ BinOpChain *PMul1 = static_cast<BinOpChain*>(Muls[j].get());
if (Paired.count(PMul1->Root))
continue;
@@ -417,315 +668,133 @@ void ARMParallelDSP::CreateParallelMACPairs(Reduction &R) {
assert(PMul0 != PMul1 && "expected different chains");
- LLVM_DEBUG(dbgs() << "\nCheck parallel muls:\n";
- dbgs() << "- "; Mul0->dump();
- dbgs() << "- "; Mul1->dump());
-
- LLVM_DEBUG(dbgs() << "OK: mul operands list match:\n");
- if (CanPair(PMul0, PMul1)) {
+ if (CanPair(R, PMul0, PMul1)) {
Paired.insert(Mul0);
Paired.insert(Mul1);
break;
}
}
}
+ return !R.getMulPairs().empty();
}
-bool ARMParallelDSP::InsertParallelMACs(Reduction &Reduction) {
- Instruction *Acc = Reduction.Phi;
- Instruction *InsertAfter = Reduction.AccIntAdd;
-
- for (auto &Pair : Reduction.PMACPairs) {
- BinOpChain *PMul0 = Pair.first;
- BinOpChain *PMul1 = Pair.second;
- LLVM_DEBUG(dbgs() << "Found parallel MACs!!\n";
- dbgs() << "- "; PMul0->Root->dump();
- dbgs() << "- "; PMul1->Root->dump());
-
- auto *VecLd0 = cast<LoadInst>(PMul0->VecLd[0]);
- auto *VecLd1 = cast<LoadInst>(PMul1->VecLd[0]);
- Acc = CreateSMLADCall(VecLd0, VecLd1, Acc, PMul1->Exchange, InsertAfter);
- InsertAfter = Acc;
- }
-
- if (Acc != Reduction.Phi) {
- LLVM_DEBUG(dbgs() << "Replace Accumulate: "; Acc->dump());
- Reduction.AccIntAdd->replaceAllUsesWith(Acc);
- return true;
- }
- return false;
-}
-
-static void MatchReductions(Function &F, Loop *TheLoop, BasicBlock *Header,
- ReductionList &Reductions) {
- RecurrenceDescriptor RecDesc;
- const bool HasFnNoNaNAttr =
- F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true";
- const BasicBlock *Latch = TheLoop->getLoopLatch();
-
- // We need a preheader as getIncomingValueForBlock assumes there is one.
- if (!TheLoop->getLoopPreheader()) {
- LLVM_DEBUG(dbgs() << "No preheader found, bailing out\n");
- return;
- }
-
- for (PHINode &Phi : Header->phis()) {
- const auto *Ty = Phi.getType();
- if (!Ty->isIntegerTy(32) && !Ty->isIntegerTy(64))
- continue;
-
- const bool IsReduction =
- RecurrenceDescriptor::AddReductionVar(&Phi,
- RecurrenceDescriptor::RK_IntegerAdd,
- TheLoop, HasFnNoNaNAttr, RecDesc);
- if (!IsReduction)
- continue;
-
- Instruction *Acc = dyn_cast<Instruction>(Phi.getIncomingValueForBlock(Latch));
- if (!Acc)
- continue;
-
- Reductions.push_back(Reduction(&Phi, Acc));
- }
-
- LLVM_DEBUG(
- dbgs() << "\nAccumulating integer additions (reductions) found:\n";
- for (auto &R : Reductions) {
- dbgs() << "- "; R.Phi->dump();
- dbgs() << "-> "; R.AccIntAdd->dump();
- }
- );
-}
-
-static void AddMACCandidate(OpChainList &Candidates,
- Instruction *Mul,
- Value *MulOp0, Value *MulOp1) {
- LLVM_DEBUG(dbgs() << "OK, found acc mul:\t"; Mul->dump());
- assert(Mul->getOpcode() == Instruction::Mul &&
- "expected mul instruction");
- ValueList LHS;
- ValueList RHS;
- if (IsNarrowSequence<16>(MulOp0, LHS) &&
- IsNarrowSequence<16>(MulOp1, RHS)) {
- LLVM_DEBUG(dbgs() << "OK, found narrow mul: "; Mul->dump());
- Candidates.push_back(make_unique<BinOpChain>(Mul, LHS, RHS));
- }
-}
-
-static void MatchParallelMACSequences(Reduction &R,
- OpChainList &Candidates) {
- Instruction *Acc = R.AccIntAdd;
- LLVM_DEBUG(dbgs() << "\n- Analysing:\t" << *Acc);
-
- // Returns false to signal the search should be stopped.
- std::function<bool(Value*)> Match =
- [&Candidates, &Match](Value *V) -> bool {
- auto *I = dyn_cast<Instruction>(V);
- if (!I)
- return false;
-
- switch (I->getOpcode()) {
- case Instruction::Add:
- if (Match(I->getOperand(0)) || (Match(I->getOperand(1))))
- return true;
- break;
- case Instruction::Mul: {
- Value *MulOp0 = I->getOperand(0);
- Value *MulOp1 = I->getOperand(1);
- if (isa<SExtInst>(MulOp0) && isa<SExtInst>(MulOp1))
- AddMACCandidate(Candidates, I, MulOp0, MulOp1);
- return false;
- }
- case Instruction::SExt:
- return Match(I->getOperand(0));
- }
- return false;
+void ARMParallelDSP::InsertParallelMACs(Reduction &R) {
+
+ auto CreateSMLADCall = [&](SmallVectorImpl<LoadInst*> &VecLd0,
+ SmallVectorImpl<LoadInst*> &VecLd1,
+ Value *Acc, bool Exchange,
+ Instruction *InsertAfter) {
+ // Replace the reduction chain with an intrinsic call
+ IntegerType *Ty = IntegerType::get(M->getContext(), 32);
+ LoadInst *WideLd0 = WideLoads.count(VecLd0[0]) ?
+ WideLoads[VecLd0[0]]->getLoad() : CreateWideLoad(VecLd0, Ty);
+ LoadInst *WideLd1 = WideLoads.count(VecLd1[0]) ?
+ WideLoads[VecLd1[0]]->getLoad() : CreateWideLoad(VecLd1, Ty);
+
+ Value* Args[] = { WideLd0, WideLd1, Acc };
+ Function *SMLAD = nullptr;
+ if (Exchange)
+ SMLAD = Acc->getType()->isIntegerTy(32) ?
+ Intrinsic::getDeclaration(M, Intrinsic::arm_smladx) :
+ Intrinsic::getDeclaration(M, Intrinsic::arm_smlaldx);
+ else
+ SMLAD = Acc->getType()->isIntegerTy(32) ?
+ Intrinsic::getDeclaration(M, Intrinsic::arm_smlad) :
+ Intrinsic::getDeclaration(M, Intrinsic::arm_smlald);
+
+ IRBuilder<NoFolder> Builder(InsertAfter->getParent(),
+ ++BasicBlock::iterator(InsertAfter));
+ Instruction *Call = Builder.CreateCall(SMLAD, Args);
+ NumSMLAD++;
+ return Call;
};
- while (Match (Acc));
- LLVM_DEBUG(dbgs() << "Finished matching MAC sequences, found "
- << Candidates.size() << " candidates.\n");
-}
-
-// Collects all instructions that are not part of the MAC chains, which is the
-// set of instructions that can potentially alias with the MAC operands.
-static void AliasCandidates(BasicBlock *Header, Instructions &Reads,
- Instructions &Writes) {
- for (auto &I : *Header) {
- if (I.mayReadFromMemory())
- Reads.push_back(&I);
- if (I.mayWriteToMemory())
- Writes.push_back(&I);
- }
-}
-
-// Check whether statements in the basic block that write to memory alias with
-// the memory locations accessed by the MAC-chains.
-// TODO: we need the read statements when we accept more complicated chains.
-static bool AreAliased(AliasAnalysis *AA, Instructions &Reads,
- Instructions &Writes, OpChainList &MACCandidates) {
- LLVM_DEBUG(dbgs() << "Alias checks:\n");
- for (auto &MAC : MACCandidates) {
- LLVM_DEBUG(dbgs() << "mul: "; MAC->Root->dump());
-
- // At the moment, we allow only simple chains that only consist of reads,
- // accumulate their result with an integer add, and thus that don't write
- // memory, and simply bail if they do.
- if (!MAC->ReadOnly)
- return true;
-
- // Now for all writes in the basic block, check that they don't alias with
- // the memory locations accessed by our MAC-chain:
- for (auto *I : Writes) {
- LLVM_DEBUG(dbgs() << "- "; I->dump());
- assert(MAC->MemLocs.size() >= 2 && "expecting at least 2 memlocs");
- for (auto &MemLoc : MAC->MemLocs) {
- if (isModOrRefSet(intersectModRef(AA->getModRefInfo(I, MemLoc),
- ModRefInfo::ModRef))) {
- LLVM_DEBUG(dbgs() << "Yes, aliases found\n");
- return true;
- }
- }
- }
- }
-
- LLVM_DEBUG(dbgs() << "OK: no aliases found!\n");
- return false;
-}
+ Instruction *InsertAfter = R.getRoot();
+ Value *Acc = R.getAccumulator();
+ if (!Acc)
+ Acc = ConstantInt::get(IntegerType::get(M->getContext(), 32), 0);
-static bool CheckMACMemory(OpChainList &Candidates) {
- for (auto &C : Candidates) {
- // A mul has 2 operands, and a narrow op consist of sext and a load; thus
- // we expect at least 4 items in this operand value list.
- if (C->size() < 4) {
- LLVM_DEBUG(dbgs() << "Operand list too short.\n");
- return false;
- }
- C->SetMemoryLocations();
- ValueList &LHS = static_cast<BinOpChain*>(C.get())->LHS;
- ValueList &RHS = static_cast<BinOpChain*>(C.get())->RHS;
+ LLVM_DEBUG(dbgs() << "Root: " << *InsertAfter << "\n"
+ << "Acc: " << *Acc << "\n");
+ for (auto &Pair : R.getMulPairs()) {
+ BinOpChain *PMul0 = Pair.first;
+ BinOpChain *PMul1 = Pair.second;
+ LLVM_DEBUG(dbgs() << "Muls:\n"
+ << "- " << *PMul0->Root << "\n"
+ << "- " << *PMul1->Root << "\n");
- // Use +=2 to skip over the expected extend instructions.
- for (unsigned i = 0, e = LHS.size(); i < e; i += 2) {
- if (!isa<LoadInst>(LHS[i]) || !isa<LoadInst>(RHS[i]))
- return false;
- }
+ Acc = CreateSMLADCall(PMul0->VecLd, PMul1->VecLd, Acc, PMul1->Exchange,
+ InsertAfter);
+ InsertAfter = cast<Instruction>(Acc);
}
- return true;
+ R.UpdateRoot(cast<Instruction>(Acc));
}
-// Loop Pass that needs to identify integer add/sub reductions of 16-bit vector
-// multiplications.
-// To use SMLAD:
-// 1) we first need to find integer add reduction PHIs,
-// 2) then from the PHI, look for this pattern:
-//
-// acc0 = phi i32 [0, %entry], [%acc1, %loop.body]
-// ld0 = load i16
-// sext0 = sext i16 %ld0 to i32
-// ld1 = load i16
-// sext1 = sext i16 %ld1 to i32
-// mul0 = mul %sext0, %sext1
-// ld2 = load i16
-// sext2 = sext i16 %ld2 to i32
-// ld3 = load i16
-// sext3 = sext i16 %ld3 to i32
-// mul1 = mul i32 %sext2, %sext3
-// add0 = add i32 %mul0, %acc0
-// acc1 = add i32 %add0, %mul1
-//
-// Which can be selected to:
-//
-// ldr.h r0
-// ldr.h r1
-// smlad r2, r0, r1, r2
-//
-// If constants are used instead of loads, these will need to be hoisted
-// out and into a register.
-//
-// If loop invariants are used instead of loads, these need to be packed
-// before the loop begins.
-//
-bool ARMParallelDSP::MatchSMLAD(Function &F) {
- BasicBlock *Header = L->getHeader();
- LLVM_DEBUG(dbgs() << "= Matching SMLAD =\n";
- dbgs() << "Header block:\n"; Header->dump();
- dbgs() << "Loop info:\n\n"; L->dump());
+LoadInst* ARMParallelDSP::CreateWideLoad(SmallVectorImpl<LoadInst*> &Loads,
+ IntegerType *LoadTy) {
+ assert(Loads.size() == 2 && "currently only support widening two loads");
- bool Changed = false;
- ReductionList Reductions;
- MatchReductions(F, L, Header, Reductions);
+ LoadInst *Base = Loads[0];
+ LoadInst *Offset = Loads[1];
- for (auto &R : Reductions) {
- OpChainList MACCandidates;
- MatchParallelMACSequences(R, MACCandidates);
- if (!CheckMACMemory(MACCandidates))
- continue;
+ Instruction *BaseSExt = dyn_cast<SExtInst>(Base->user_back());
+ Instruction *OffsetSExt = dyn_cast<SExtInst>(Offset->user_back());
- R.MACCandidates = std::move(MACCandidates);
+ assert((BaseSExt && OffsetSExt)
+ && "Loads should have a single, extending, user");
- LLVM_DEBUG(dbgs() << "MAC candidates:\n";
- for (auto &M : R.MACCandidates)
- M->Root->dump();
- dbgs() << "\n";);
- }
+ std::function<void(Value*, Value*)> MoveBefore =
+ [&](Value *A, Value *B) -> void {
+ if (!isa<Instruction>(A) || !isa<Instruction>(B))
+ return;
- // Collect all instructions that may read or write memory. Our alias
- // analysis checks bail out if any of these instructions aliases with an
- // instruction from the MAC-chain.
- Instructions Reads, Writes;
- AliasCandidates(Header, Reads, Writes);
+ auto *Source = cast<Instruction>(A);
+ auto *Sink = cast<Instruction>(B);
- for (auto &R : Reductions) {
- if (AreAliased(AA, Reads, Writes, R.MACCandidates))
- return false;
- CreateParallelMACPairs(R);
- Changed |= InsertParallelMACs(R);
- }
+ if (DT->dominates(Source, Sink) ||
+ Source->getParent() != Sink->getParent() ||
+ isa<PHINode>(Source) || isa<PHINode>(Sink))
+ return;
- LLVM_DEBUG(if (Changed) dbgs() << "Header block:\n"; Header->dump(););
- return Changed;
-}
+ Source->moveBefore(Sink);
+ for (auto &U : Source->uses())
+ MoveBefore(Source, U.getUser());
+ };
-static LoadInst *CreateLoadIns(IRBuilder<NoFolder> &IRB, LoadInst &BaseLoad,
- const Type *LoadTy) {
- const unsigned AddrSpace = BaseLoad.getPointerAddressSpace();
+ // Insert the load at the point of the original dominating load.
+ LoadInst *DomLoad = DT->dominates(Base, Offset) ? Base : Offset;
+ IRBuilder<NoFolder> IRB(DomLoad->getParent(),
+ ++BasicBlock::iterator(DomLoad));
- Value *VecPtr = IRB.CreateBitCast(BaseLoad.getPointerOperand(),
+ // Bitcast the pointer to a wider type and create the wide load, while making
+ // sure to maintain the original alignment as this prevents ldrd from being
+ // generated when it could be illegal due to memory alignment.
+ const unsigned AddrSpace = DomLoad->getPointerAddressSpace();
+ Value *VecPtr = IRB.CreateBitCast(Base->getPointerOperand(),
LoadTy->getPointerTo(AddrSpace));
- return IRB.CreateAlignedLoad(VecPtr, BaseLoad.getAlignment());
-}
-
-Instruction *ARMParallelDSP::CreateSMLADCall(LoadInst *VecLd0, LoadInst *VecLd1,
- Instruction *Acc, bool Exchange,
- Instruction *InsertAfter) {
- LLVM_DEBUG(dbgs() << "Create SMLAD intrinsic using:\n"
- << "- " << *VecLd0 << "\n"
- << "- " << *VecLd1 << "\n"
- << "- " << *Acc << "\n"
- << "Exchange: " << Exchange << "\n");
-
- IRBuilder<NoFolder> Builder(InsertAfter->getParent(),
- ++BasicBlock::iterator(InsertAfter));
-
- // Replace the reduction chain with an intrinsic call
- const Type *Ty = IntegerType::get(M->getContext(), 32);
- LoadInst *NewLd0 = CreateLoadIns(Builder, VecLd0[0], Ty);
- LoadInst *NewLd1 = CreateLoadIns(Builder, VecLd1[0], Ty);
- Value* Args[] = { NewLd0, NewLd1, Acc };
- Function *SMLAD = nullptr;
- if (Exchange)
- SMLAD = Acc->getType()->isIntegerTy(32) ?
- Intrinsic::getDeclaration(M, Intrinsic::arm_smladx) :
- Intrinsic::getDeclaration(M, Intrinsic::arm_smlaldx);
- else
- SMLAD = Acc->getType()->isIntegerTy(32) ?
- Intrinsic::getDeclaration(M, Intrinsic::arm_smlad) :
- Intrinsic::getDeclaration(M, Intrinsic::arm_smlald);
- CallInst *Call = Builder.CreateCall(SMLAD, Args);
- NumSMLAD++;
- return Call;
+ LoadInst *WideLoad = IRB.CreateAlignedLoad(LoadTy, VecPtr,
+ Base->getAlignment());
+
+ // Make sure everything is in the correct order in the basic block.
+ MoveBefore(Base->getPointerOperand(), VecPtr);
+ MoveBefore(VecPtr, WideLoad);
+
+ // From the wide load, create two values that equal the original two loads.
+ // Loads[0] needs trunc while Loads[1] needs a lshr and trunc.
+ // TODO: Support big-endian as well.
+ Value *Bottom = IRB.CreateTrunc(WideLoad, Base->getType());
+ BaseSExt->setOperand(0, Bottom);
+
+ IntegerType *OffsetTy = cast<IntegerType>(Offset->getType());
+ Value *ShiftVal = ConstantInt::get(LoadTy, OffsetTy->getBitWidth());
+ Value *Top = IRB.CreateLShr(WideLoad, ShiftVal);
+ Value *Trunc = IRB.CreateTrunc(Top, OffsetTy);
+ OffsetSExt->setOperand(0, Trunc);
+
+ WideLoads.emplace(std::make_pair(Base,
+ make_unique<WidenedLoad>(Loads, WideLoad)));
+ return WideLoad;
}
// Compare the value lists in Other to this chain.
@@ -741,7 +810,6 @@ bool BinOpChain::AreSymmetrical(BinOpChain *Other) {
}
const unsigned Pairs = VL0.size();
- LLVM_DEBUG(dbgs() << "Number of operand pairs: " << Pairs << "\n");
for (unsigned i = 0; i < Pairs; ++i) {
const Value *V0 = VL0[i];
@@ -749,24 +817,17 @@ bool BinOpChain::AreSymmetrical(BinOpChain *Other) {
const auto *Inst0 = dyn_cast<Instruction>(V0);
const auto *Inst1 = dyn_cast<Instruction>(V1);
- LLVM_DEBUG(dbgs() << "Pair " << i << ":\n";
- dbgs() << "mul1: "; V0->dump();
- dbgs() << "mul2: "; V1->dump());
-
if (!Inst0 || !Inst1)
return false;
- if (Inst0->isSameOperationAs(Inst1)) {
- LLVM_DEBUG(dbgs() << "OK: same operation found!\n");
+ if (Inst0->isSameOperationAs(Inst1))
continue;
- }
const APInt *C0, *C1;
if (!(match(V0, m_APInt(C0)) && match(V1, m_APInt(C1)) && C0 == C1))
return false;
}
- LLVM_DEBUG(dbgs() << "OK: found symmetrical operand lists.\n");
return true;
};
diff --git a/lib/Target/ARM/ARMPerfectShuffle.h b/lib/Target/ARM/ARMPerfectShuffle.h
index 3ff0bee7e5bf..d519490c9c57 100644
--- a/lib/Target/ARM/ARMPerfectShuffle.h
+++ b/lib/Target/ARM/ARMPerfectShuffle.h
@@ -1,9 +1,8 @@
//===-- ARMPerfectShuffle.h - NEON Perfect Shuffle Table --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/ARM/ARMPredicates.td b/lib/Target/ARM/ARMPredicates.td
new file mode 100644
index 000000000000..0b6b40de80dd
--- /dev/null
+++ b/lib/Target/ARM/ARMPredicates.td
@@ -0,0 +1,211 @@
+//===-- ARMPredicates.td - ARM Instruction Predicates ------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+def HasV4T : Predicate<"Subtarget->hasV4TOps()">,
+ AssemblerPredicate<"HasV4TOps", "armv4t">;
+def NoV4T : Predicate<"!Subtarget->hasV4TOps()">;
+def HasV5T : Predicate<"Subtarget->hasV5TOps()">,
+ AssemblerPredicate<"HasV5TOps", "armv5t">;
+def NoV5T : Predicate<"!Subtarget->hasV5TOps()">;
+def HasV5TE : Predicate<"Subtarget->hasV5TEOps()">,
+ AssemblerPredicate<"HasV5TEOps", "armv5te">;
+def HasV6 : Predicate<"Subtarget->hasV6Ops()">,
+ AssemblerPredicate<"HasV6Ops", "armv6">;
+def NoV6 : Predicate<"!Subtarget->hasV6Ops()">;
+def HasV6M : Predicate<"Subtarget->hasV6MOps()">,
+ AssemblerPredicate<"HasV6MOps",
+ "armv6m or armv6t2">;
+def HasV8MBaseline : Predicate<"Subtarget->hasV8MBaselineOps()">,
+ AssemblerPredicate<"HasV8MBaselineOps",
+ "armv8m.base">;
+def HasV8MMainline : Predicate<"Subtarget->hasV8MMainlineOps()">,
+ AssemblerPredicate<"HasV8MMainlineOps",
+ "armv8m.main">;
+def HasV8_1MMainline : Predicate<"Subtarget->hasV8_1MMainlineOps()">,
+ AssemblerPredicate<"HasV8_1MMainlineOps",
+ "armv8.1m.main">;
+def HasMVEInt : Predicate<"Subtarget->hasMVEIntegerOps()">,
+ AssemblerPredicate<"HasMVEIntegerOps",
+ "mve">;
+def HasMVEFloat : Predicate<"Subtarget->hasMVEFloatOps()">,
+ AssemblerPredicate<"HasMVEFloatOps",
+ "mve.fp">;
+def HasFPRegs : Predicate<"Subtarget->hasFPRegs()">,
+ AssemblerPredicate<"FeatureFPRegs",
+ "fp registers">;
+def HasFPRegs16 : Predicate<"Subtarget->hasFPRegs16()">,
+ AssemblerPredicate<"FeatureFPRegs16",
+ "16-bit fp registers">;
+def HasFPRegs64 : Predicate<"Subtarget->hasFPRegs64()">,
+ AssemblerPredicate<"FeatureFPRegs64",
+ "64-bit fp registers">;
+def HasFPRegsV8_1M : Predicate<"Subtarget->hasFPRegs() && Subtarget->hasV8_1MMainlineOps()">,
+ AssemblerPredicate<"FeatureFPRegs,HasV8_1MMainlineOps",
+ "armv8.1m.main with FP or MVE">;
+def HasV6T2 : Predicate<"Subtarget->hasV6T2Ops()">,
+ AssemblerPredicate<"HasV6T2Ops", "armv6t2">;
+def NoV6T2 : Predicate<"!Subtarget->hasV6T2Ops()">;
+def HasV6K : Predicate<"Subtarget->hasV6KOps()">,
+ AssemblerPredicate<"HasV6KOps", "armv6k">;
+def NoV6K : Predicate<"!Subtarget->hasV6KOps()">;
+def HasV7 : Predicate<"Subtarget->hasV7Ops()">,
+ AssemblerPredicate<"HasV7Ops", "armv7">;
+def HasV8 : Predicate<"Subtarget->hasV8Ops()">,
+ AssemblerPredicate<"HasV8Ops", "armv8">;
+def PreV8 : Predicate<"!Subtarget->hasV8Ops()">,
+ AssemblerPredicate<"!HasV8Ops", "armv7 or earlier">;
+def HasV8_1a : Predicate<"Subtarget->hasV8_1aOps()">,
+ AssemblerPredicate<"HasV8_1aOps", "armv8.1a">;
+def HasV8_2a : Predicate<"Subtarget->hasV8_2aOps()">,
+ AssemblerPredicate<"HasV8_2aOps", "armv8.2a">;
+def HasV8_3a : Predicate<"Subtarget->hasV8_3aOps()">,
+ AssemblerPredicate<"HasV8_3aOps", "armv8.3a">;
+def HasV8_4a : Predicate<"Subtarget->hasV8_4aOps()">,
+ AssemblerPredicate<"HasV8_4aOps", "armv8.4a">;
+def HasV8_5a : Predicate<"Subtarget->hasV8_5aOps()">,
+ AssemblerPredicate<"HasV8_5aOps", "armv8.5a">;
+def NoVFP : Predicate<"!Subtarget->hasVFP2Base()">;
+def HasVFP2 : Predicate<"Subtarget->hasVFP2Base()">,
+ AssemblerPredicate<"FeatureVFP2_D16_SP", "VFP2">;
+def HasVFP3 : Predicate<"Subtarget->hasVFP3Base()">,
+ AssemblerPredicate<"FeatureVFP3_D16_SP", "VFP3">;
+def HasVFP4 : Predicate<"Subtarget->hasVFP4Base()">,
+ AssemblerPredicate<"FeatureVFP4_D16_SP", "VFP4">;
+def HasDPVFP : Predicate<"Subtarget->hasFP64()">,
+ AssemblerPredicate<"FeatureFP64",
+ "double precision VFP">;
+def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8Base()">,
+ AssemblerPredicate<"FeatureFPARMv8_D16_SP", "FPARMv8">;
+def HasNEON : Predicate<"Subtarget->hasNEON()">,
+ AssemblerPredicate<"FeatureNEON", "NEON">;
+def HasSHA2 : Predicate<"Subtarget->hasSHA2()">,
+ AssemblerPredicate<"FeatureSHA2", "sha2">;
+def HasAES : Predicate<"Subtarget->hasAES()">,
+ AssemblerPredicate<"FeatureAES", "aes">;
+def HasCrypto : Predicate<"Subtarget->hasCrypto()">,
+ AssemblerPredicate<"FeatureCrypto", "crypto">;
+def HasDotProd : Predicate<"Subtarget->hasDotProd()">,
+ AssemblerPredicate<"FeatureDotProd", "dotprod">;
+def HasCRC : Predicate<"Subtarget->hasCRC()">,
+ AssemblerPredicate<"FeatureCRC", "crc">;
+def HasRAS : Predicate<"Subtarget->hasRAS()">,
+ AssemblerPredicate<"FeatureRAS", "ras">;
+def HasLOB : Predicate<"Subtarget->hasLOB()">,
+ AssemblerPredicate<"FeatureLOB", "lob">;
+def HasFP16 : Predicate<"Subtarget->hasFP16()">,
+ AssemblerPredicate<"FeatureFP16","half-float conversions">;
+def HasFullFP16 : Predicate<"Subtarget->hasFullFP16()">,
+ AssemblerPredicate<"FeatureFullFP16","full half-float">;
+def HasFP16FML : Predicate<"Subtarget->hasFP16FML()">,
+ AssemblerPredicate<"FeatureFP16FML","full half-float fml">;
+def HasDivideInThumb : Predicate<"Subtarget->hasDivideInThumbMode()">,
+ AssemblerPredicate<"FeatureHWDivThumb", "divide in THUMB">;
+def HasDivideInARM : Predicate<"Subtarget->hasDivideInARMMode()">,
+ AssemblerPredicate<"FeatureHWDivARM", "divide in ARM">;
+def HasDSP : Predicate<"Subtarget->hasDSP()">,
+ AssemblerPredicate<"FeatureDSP", "dsp">;
+def HasDB : Predicate<"Subtarget->hasDataBarrier()">,
+ AssemblerPredicate<"FeatureDB",
+ "data-barriers">;
+def HasDFB : Predicate<"Subtarget->hasFullDataBarrier()">,
+ AssemblerPredicate<"FeatureDFB",
+ "full-data-barrier">;
+def HasV7Clrex : Predicate<"Subtarget->hasV7Clrex()">,
+ AssemblerPredicate<"FeatureV7Clrex",
+ "v7 clrex">;
+def HasAcquireRelease : Predicate<"Subtarget->hasAcquireRelease()">,
+ AssemblerPredicate<"FeatureAcquireRelease",
+ "acquire/release">;
+def HasMP : Predicate<"Subtarget->hasMPExtension()">,
+ AssemblerPredicate<"FeatureMP",
+ "mp-extensions">;
+def HasVirtualization: Predicate<"false">,
+ AssemblerPredicate<"FeatureVirtualization",
+ "virtualization-extensions">;
+def HasTrustZone : Predicate<"Subtarget->hasTrustZone()">,
+ AssemblerPredicate<"FeatureTrustZone",
+ "TrustZone">;
+def Has8MSecExt : Predicate<"Subtarget->has8MSecExt()">,
+ AssemblerPredicate<"Feature8MSecExt",
+ "ARMv8-M Security Extensions">;
+def HasZCZ : Predicate<"Subtarget->hasZeroCycleZeroing()">;
+def UseNEONForFP : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">;
+def DontUseNEONForFP : Predicate<"!Subtarget->useNEONForSinglePrecisionFP()">;
+def IsThumb : Predicate<"Subtarget->isThumb()">,
+ AssemblerPredicate<"ModeThumb", "thumb">;
+def IsThumb1Only : Predicate<"Subtarget->isThumb1Only()">;
+def IsThumb2 : Predicate<"Subtarget->isThumb2()">,
+ AssemblerPredicate<"ModeThumb,FeatureThumb2",
+ "thumb2">;
+def IsMClass : Predicate<"Subtarget->isMClass()">,
+ AssemblerPredicate<"FeatureMClass", "armv*m">;
+def IsNotMClass : Predicate<"!Subtarget->isMClass()">,
+ AssemblerPredicate<"!FeatureMClass",
+ "!armv*m">;
+def IsARM : Predicate<"!Subtarget->isThumb()">,
+ AssemblerPredicate<"!ModeThumb", "arm-mode">;
+def IsMachO : Predicate<"Subtarget->isTargetMachO()">;
+def IsNotMachO : Predicate<"!Subtarget->isTargetMachO()">;
+def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">;
+def IsWindows : Predicate<"Subtarget->isTargetWindows()">;
+def IsNotWindows : Predicate<"!Subtarget->isTargetWindows()">;
+def IsReadTPHard : Predicate<"Subtarget->isReadTPHard()">;
+def IsReadTPSoft : Predicate<"!Subtarget->isReadTPHard()">;
+def UseNaClTrap : Predicate<"Subtarget->useNaClTrap()">,
+ AssemblerPredicate<"FeatureNaClTrap", "NaCl">;
+def DontUseNaClTrap : Predicate<"!Subtarget->useNaClTrap()">;
+
+def UseNegativeImmediates :
+ Predicate<"false">,
+ AssemblerPredicate<"!FeatureNoNegativeImmediates",
+ "NegativeImmediates">;
+
+// FIXME: Eventually this will be just "hasV6T2Ops".
+let RecomputePerFunction = 1 in {
+ def UseMovt : Predicate<"Subtarget->useMovt()">;
+ def DontUseMovt : Predicate<"!Subtarget->useMovt()">;
+ def UseMovtInPic : Predicate<"Subtarget->useMovt() && Subtarget->allowPositionIndependentMovt()">;
+ def DontUseMovtInPic : Predicate<"!Subtarget->useMovt() || !Subtarget->allowPositionIndependentMovt()">;
+
+ def UseFPVMLx: Predicate<"((Subtarget->useFPVMLx() &&"
+ " TM.Options.AllowFPOpFusion != FPOpFusion::Fast) ||"
+ "Subtarget->hasMinSize())">;
+}
+def UseMulOps : Predicate<"Subtarget->useMulOps()">;
+
+// Prefer fused MAC for fp mul + add over fp VMLA / VMLS if they are available.
+// But only select them if more precision in FP computation is allowed, and when
+// they are not slower than a mul + add sequence.
+// Do not use them for Darwin platforms.
+def UseFusedMAC : Predicate<"(TM.Options.AllowFPOpFusion =="
+ " FPOpFusion::Fast && "
+ " Subtarget->hasVFP4Base()) && "
+ "!Subtarget->isTargetDarwin() &&"
+ "Subtarget->useFPVMLx()">;
+
+def HasFastVGETLNi32 : Predicate<"!Subtarget->hasSlowVGETLNi32()">;
+def HasSlowVGETLNi32 : Predicate<"Subtarget->hasSlowVGETLNi32()">;
+
+def HasFastVDUP32 : Predicate<"!Subtarget->hasSlowVDUP32()">;
+def HasSlowVDUP32 : Predicate<"Subtarget->hasSlowVDUP32()">;
+
+def UseVMOVSR : Predicate<"Subtarget->preferVMOVSR() ||"
+ "!Subtarget->useNEONForSinglePrecisionFP()">;
+def DontUseVMOVSR : Predicate<"!Subtarget->preferVMOVSR() &&"
+ "Subtarget->useNEONForSinglePrecisionFP()">;
+
+let RecomputePerFunction = 1 in {
+ def IsLE : Predicate<"MF->getDataLayout().isLittleEndian()">;
+ def IsBE : Predicate<"MF->getDataLayout().isBigEndian()">;
+}
+
+def GenExecuteOnly : Predicate<"Subtarget->genExecuteOnly()">;
+
+// Armv8.5-A extensions
+def HasSB : Predicate<"Subtarget->hasSB()">,
+ AssemblerPredicate<"FeatureSB", "sb">;
diff --git a/lib/Target/ARM/ARMRegisterBankInfo.cpp b/lib/Target/ARM/ARMRegisterBankInfo.cpp
index 4f28f2dafc70..b100150175fc 100644
--- a/lib/Target/ARM/ARMRegisterBankInfo.cpp
+++ b/lib/Target/ARM/ARMRegisterBankInfo.cpp
@@ -1,9 +1,8 @@
//===- ARMRegisterBankInfo.cpp -----------------------------------*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -161,6 +160,10 @@ ARMRegisterBankInfo::ARMRegisterBankInfo(const TargetRegisterInfo &TRI)
"Subclass not added?");
assert(RBGPR.covers(*TRI.getRegClass(ARM::tGPR_and_tcGPRRegClassID)) &&
"Subclass not added?");
+ assert(RBGPR.covers(*TRI.getRegClass(ARM::tGPREven_and_tGPR_and_tcGPRRegClassID)) &&
+ "Subclass not added?");
+ assert(RBGPR.covers(*TRI.getRegClass(ARM::tGPROdd_and_tcGPRRegClassID)) &&
+ "Subclass not added?");
assert(RBGPR.getSize() == 32 && "GPRs should hold up to 32-bit");
#ifndef NDEBUG
@@ -182,6 +185,13 @@ const RegisterBank &ARMRegisterBankInfo::getRegBankFromRegClass(
case tGPR_and_tcGPRRegClassID:
case tcGPRRegClassID:
case tGPRRegClassID:
+ case tGPREvenRegClassID:
+ case tGPROddRegClassID:
+ case tGPR_and_tGPREvenRegClassID:
+ case tGPR_and_tGPROddRegClassID:
+ case tGPREven_and_tcGPRRegClassID:
+ case tGPREven_and_tGPR_and_tcGPRRegClassID:
+ case tGPROdd_and_tcGPRRegClassID:
return getRegBank(ARM::GPRRegBankID);
case HPRRegClassID:
case SPR_8RegClassID:
@@ -218,7 +228,15 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
switch (Opc) {
case G_ADD:
- case G_SUB:
+ case G_SUB: {
+ // Integer operations where the source and destination are in the
+ // same register class.
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+ OperandsMapping = Ty.getSizeInBits() == 64
+ ? &ARM::ValueMappings[ARM::DPR3OpsIdx]
+ : &ARM::ValueMappings[ARM::GPR3OpsIdx];
+ break;
+ }
case G_MUL:
case G_AND:
case G_OR:
@@ -337,6 +355,14 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
&ARM::ValueMappings[ARM::GPR3OpsIdx]});
break;
}
+ case G_FCONSTANT: {
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+ OperandsMapping = getOperandsMapping(
+ {Ty.getSizeInBits() == 64 ? &ARM::ValueMappings[ARM::DPR3OpsIdx]
+ : &ARM::ValueMappings[ARM::SPR3OpsIdx],
+ nullptr});
+ break;
+ }
case G_CONSTANT:
case G_FRAME_INDEX:
case G_GLOBAL_VALUE:
@@ -424,6 +450,19 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OperandsMapping =
getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx], nullptr});
break;
+ case DBG_VALUE: {
+ SmallVector<const ValueMapping *, 4> OperandBanks(NumOperands);
+ const MachineOperand &MaybeReg = MI.getOperand(0);
+ if (MaybeReg.isReg() && MaybeReg.getReg()) {
+ unsigned Size = MRI.getType(MaybeReg.getReg()).getSizeInBits();
+ if (Size > 32 && Size != 64)
+ return getInvalidInstructionMapping();
+ OperandBanks[0] = Size == 64 ? &ARM::ValueMappings[ARM::DPR3OpsIdx]
+ : &ARM::ValueMappings[ARM::GPR3OpsIdx];
+ }
+ OperandsMapping = getOperandsMapping(OperandBanks);
+ break;
+ }
default:
return getInvalidInstructionMapping();
}
@@ -433,7 +472,7 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
for (const auto &Mapping : OperandsMapping[i]) {
assert(
(Mapping.RegBank->getID() != ARM::FPRRegBankID ||
- MF.getSubtarget<ARMSubtarget>().hasVFP2()) &&
+ MF.getSubtarget<ARMSubtarget>().hasVFP2Base()) &&
"Trying to use floating point register bank on target without vfp");
}
}
diff --git a/lib/Target/ARM/ARMRegisterBankInfo.h b/lib/Target/ARM/ARMRegisterBankInfo.h
index 9650b358f319..1961f7af49bb 100644
--- a/lib/Target/ARM/ARMRegisterBankInfo.h
+++ b/lib/Target/ARM/ARMRegisterBankInfo.h
@@ -1,9 +1,8 @@
//===- ARMRegisterBankInfo ---------------------------------------*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/lib/Target/ARM/ARMRegisterBanks.td b/lib/Target/ARM/ARMRegisterBanks.td
index 6e3834da3bb5..e4ebf793f9b0 100644
--- a/lib/Target/ARM/ARMRegisterBanks.td
+++ b/lib/Target/ARM/ARMRegisterBanks.td
@@ -1,9 +1,8 @@
//=- ARMRegisterBank.td - Describe the AArch64 Banks ---------*- tablegen -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/ARM/ARMRegisterInfo.cpp b/lib/Target/ARM/ARMRegisterInfo.cpp
index e6e8cdf965e2..6649750bb388 100644
--- a/lib/Target/ARM/ARMRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMRegisterInfo.cpp
@@ -1,9 +1,8 @@
//===-- ARMRegisterInfo.cpp - ARM Register Information --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/ARM/ARMRegisterInfo.h b/lib/Target/ARM/ARMRegisterInfo.h
index e2e650e4af93..87c0f322d3b3 100644
--- a/lib/Target/ARM/ARMRegisterInfo.h
+++ b/lib/Target/ARM/ARMRegisterInfo.h
@@ -1,9 +1,8 @@
//===-- ARMRegisterInfo.h - ARM Register Information Impl -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td
index dc56186cb54a..92ae26b3729d 100644
--- a/lib/Target/ARM/ARMRegisterInfo.td
+++ b/lib/Target/ARM/ARMRegisterInfo.td
@@ -1,9 +1,8 @@
//===-- ARMRegisterInfo.td - ARM Register defs -------------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -14,7 +13,8 @@ include "ARMSystemRegister.td"
//===----------------------------------------------------------------------===//
// Registers are identified with 4-bit ID numbers.
-class ARMReg<bits<16> Enc, string n, list<Register> subregs = []> : Register<n> {
+class ARMReg<bits<16> Enc, string n, list<Register> subregs = [],
+ list<string> altNames = []> : Register<n, altNames> {
let HWEncoding = Enc;
let Namespace = "ARM";
let SubRegs = subregs;
@@ -27,6 +27,11 @@ class ARMFReg<bits<16> Enc, string n> : Register<n> {
let Namespace = "ARM";
}
+let Namespace = "ARM",
+ FallbackRegAltNameIndex = NoRegAltName in {
+ def RegNamesRaw : RegAltNameIndex;
+}
+
// Subregister indices.
let Namespace = "ARM" in {
def qqsub_0 : SubRegIndex<256>;
@@ -84,9 +89,11 @@ def R9 : ARMReg< 9, "r9">, DwarfRegNum<[9]>;
def R10 : ARMReg<10, "r10">, DwarfRegNum<[10]>;
def R11 : ARMReg<11, "r11">, DwarfRegNum<[11]>;
def R12 : ARMReg<12, "r12">, DwarfRegNum<[12]>;
-def SP : ARMReg<13, "sp">, DwarfRegNum<[13]>;
-def LR : ARMReg<14, "lr">, DwarfRegNum<[14]>;
-def PC : ARMReg<15, "pc">, DwarfRegNum<[15]>;
+let RegAltNameIndices = [RegNamesRaw] in {
+def SP : ARMReg<13, "sp", [], ["r13"]>, DwarfRegNum<[13]>;
+def LR : ARMReg<14, "lr", [], ["r14"]>, DwarfRegNum<[14]>;
+def PC : ARMReg<15, "pc", [], ["r15"]>, DwarfRegNum<[15]>;
+}
}
// Float registers
@@ -190,6 +197,17 @@ def MVFR0 : ARMReg<7, "mvfr0">;
def FPEXC : ARMReg<8, "fpexc">;
def FPINST : ARMReg<9, "fpinst">;
def FPINST2 : ARMReg<10, "fpinst2">;
+// These encodings aren't actual instruction encodings, their encoding depends
+// on the instruction they are used in and for VPR 32 was chosen such that it
+// always comes last in spr_reglist_with_vpr.
+def VPR : ARMReg<32, "vpr">;
+def FPSCR_NZCVQC
+ : ARMReg<2, "fpscr_nzcvqc">;
+def P0 : ARMReg<13, "p0">;
+def FPCXTNS : ARMReg<14, "fpcxtns">;
+def FPCXTS : ARMReg<15, "fpcxts">;
+
+def ZR : ARMReg<15, "zr">, DwarfRegNum<[15]>;
// Register classes.
//
@@ -209,9 +227,10 @@ def GPR : RegisterClass<"ARM", [i32], 32, (add (sequence "R%u", 0, 12),
// know how to spill them. If we make our prologue/epilogue code smarter at
// some point, we can go back to using the above allocation orders for the
// Thumb1 instructions that know how to use hi regs.
- let AltOrders = [(add LR, GPR), (trunc GPR, 8)];
+ let AltOrders = [(add LR, GPR), (trunc GPR, 8),
+ (add (trunc GPR, 8), R12, LR, (shl GPR, 8))];
let AltOrderSelect = [{
- return 1 + MF.getSubtarget<ARMSubtarget>().isThumb1Only();
+ return MF.getSubtarget<ARMSubtarget>().getGPRAllocationOrder(MF);
}];
let DiagnosticString = "operand must be a register in range [r0, r15]";
}
@@ -220,9 +239,10 @@ def GPR : RegisterClass<"ARM", [i32], 32, (add (sequence "R%u", 0, 12),
// certain operand slots, particularly as the destination. Primarily
// useful for disassembly.
def GPRnopc : RegisterClass<"ARM", [i32], 32, (sub GPR, PC)> {
- let AltOrders = [(add LR, GPRnopc), (trunc GPRnopc, 8)];
+ let AltOrders = [(add LR, GPRnopc), (trunc GPRnopc, 8),
+ (add (trunc GPRnopc, 8), R12, LR, (shl GPRnopc, 8))];
let AltOrderSelect = [{
- return 1 + MF.getSubtarget<ARMSubtarget>().isThumb1Only();
+ return MF.getSubtarget<ARMSubtarget>().getGPRAllocationOrder(MF);
}];
let DiagnosticString = "operand must be a register in range [r0, r14]";
}
@@ -238,6 +258,27 @@ def GPRwithAPSR : RegisterClass<"ARM", [i32], 32, (add (sub GPR, PC), APSR_NZCV)
let DiagnosticString = "operand must be a register in range [r0, r14] or apsr_nzcv";
}
+// GPRs without the PC and SP registers but with APSR. Used by CLRM instruction.
+def GPRwithAPSRnosp : RegisterClass<"ARM", [i32], 32, (add (sequence "R%u", 0, 12), LR, APSR)> {
+ let isAllocatable = 0;
+}
+
+def GPRwithZR : RegisterClass<"ARM", [i32], 32, (add (sub GPR, PC), ZR)> {
+ let AltOrders = [(add LR, GPRwithZR), (trunc GPRwithZR, 8)];
+ let AltOrderSelect = [{
+ return 1 + MF.getSubtarget<ARMSubtarget>().isThumb1Only();
+ }];
+ let DiagnosticString = "operand must be a register in range [r0, r14] or zr";
+}
+
+def GPRwithZRnosp : RegisterClass<"ARM", [i32], 32, (sub GPRwithZR, SP)> {
+ let AltOrders = [(add LR, GPRwithZRnosp), (trunc GPRwithZRnosp, 8)];
+ let AltOrderSelect = [{
+ return 1 + MF.getSubtarget<ARMSubtarget>().isThumb1Only();
+ }];
+ let DiagnosticString = "operand must be a register in range [r0, r12] or r14 or zr";
+}
+
// GPRsp - Only the SP is legal. Used by Thumb1 instructions that want the
// implied SP argument list.
// FIXME: It would be better to not use this at all and refactor the
@@ -247,14 +288,19 @@ def GPRsp : RegisterClass<"ARM", [i32], 32, (add SP)> {
let DiagnosticString = "operand must be a register sp";
}
+// GPRlr - Only LR is legal. Used by ARMv8.1-M Low Overhead Loop instructions
+// where LR is the only legal loop counter register.
+def GPRlr : RegisterClass<"ARM", [i32], 32, (add LR)>;
+
// restricted GPR register class. Many Thumb2 instructions allow the full
// register range for operands, but have undefined behaviours when PC
// or SP (R13 or R15) are used. The ARM ISA refers to these operands
// via the BadReg() pseudo-code description.
def rGPR : RegisterClass<"ARM", [i32], 32, (sub GPR, SP, PC)> {
- let AltOrders = [(add LR, rGPR), (trunc rGPR, 8)];
+ let AltOrders = [(add LR, rGPR), (trunc rGPR, 8),
+ (add (trunc rGPR, 8), R12, LR, (shl rGPR, 8))];
let AltOrderSelect = [{
- return 1 + MF.getSubtarget<ARMSubtarget>().isThumb1Only();
+ return MF.getSubtarget<ARMSubtarget>().getGPRAllocationOrder(MF);
}];
let DiagnosticType = "rGPR";
}
@@ -285,12 +331,38 @@ def tcGPR : RegisterClass<"ARM", [i32], 32, (add R0, R1, R2, R3, R12)> {
}];
}
+def tGPROdd : RegisterClass<"ARM", [i32], 32, (add R1, R3, R5, R7, R9, R11)> {
+ let AltOrders = [(and tGPROdd, tGPR)];
+ let AltOrderSelect = [{
+ return MF.getSubtarget<ARMSubtarget>().isThumb1Only();
+ }];
+ let DiagnosticString =
+ "operand must be an odd-numbered register in range [r1,r11]";
+}
+
+def tGPREven : RegisterClass<"ARM", [i32], 32, (add R0, R2, R4, R6, R8, R10, R12, LR)> {
+ let AltOrders = [(and tGPREven, tGPR)];
+ let AltOrderSelect = [{
+ return MF.getSubtarget<ARMSubtarget>().isThumb1Only();
+ }];
+ let DiagnosticString = "operand must be an even-numbered register";
+}
+
// Condition code registers.
def CCR : RegisterClass<"ARM", [i32], 32, (add CPSR)> {
let CopyCost = -1; // Don't allow copying of status registers.
let isAllocatable = 0;
}
+// MVE Condition code register.
+def VCCR : RegisterClass<"ARM", [i32, v16i1, v8i1, v4i1], 32, (add VPR)> {
+// let CopyCost = -1; // Don't allow copying of status registers.
+}
+
+// FPSCR, when the flags at the top of it are used as the input or
+// output to an instruction such as MVE VADC.
+def cl_FPSCR_NZCV : RegisterClass<"ARM", [i32], 32, (add FPSCR_NZCV)>;
+
// Scalar single precision floating point register class..
// FIXME: Allocation order changed to s0, s2, ... or s0, s4, ... as a quick hack
// to avoid partial-write dependencies on D or Q (depending on platform)
@@ -302,7 +374,7 @@ def SPR : RegisterClass<"ARM", [f32], 32, (sequence "S%u", 0, 31)> {
(decimate (rotl SPR, 1), 4),
(decimate (rotl SPR, 1), 2))];
let AltOrderSelect = [{
- return 1 + MF.getSubtarget<ARMSubtarget>().useStride4VFPs(MF);
+ return 1 + MF.getSubtarget<ARMSubtarget>().useStride4VFPs();
}];
let DiagnosticString = "operand must be a register in range [s0, s31]";
}
@@ -314,7 +386,7 @@ def HPR : RegisterClass<"ARM", [f16], 32, (sequence "S%u", 0, 31)> {
(decimate (rotl HPR, 1), 4),
(decimate (rotl HPR, 1), 2))];
let AltOrderSelect = [{
- return 1 + MF.getSubtarget<ARMSubtarget>().useStride4VFPs(MF);
+ return 1 + MF.getSubtarget<ARMSubtarget>().useStride4VFPs();
}];
let DiagnosticString = "operand must be a register in range [s0, s31]";
}
@@ -336,11 +408,18 @@ def DPR : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32, v4f16], 6
let AltOrders = [(rotl DPR, 16),
(add (decimate (rotl DPR, 16), 2), (rotl DPR, 16))];
let AltOrderSelect = [{
- return 1 + MF.getSubtarget<ARMSubtarget>().useStride4VFPs(MF);
+ return 1 + MF.getSubtarget<ARMSubtarget>().useStride4VFPs();
}];
let DiagnosticType = "DPR";
}
+// Scalar single and double precision floating point and VPR register class,
+// this is only used for parsing, don't use it anywhere else as the size and
+// types don't match!
+def FPWithVPR : RegisterClass<"ARM", [f32], 32, (add SPR, DPR, VPR)> {
+ let isAllocatable = 0;
+}
+
// Subset of DPR that are accessible with VFP2 (and so that also have
// 32-bit SPR subregs).
def DPR_VFP2 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32, v4f16], 64,
@@ -359,8 +438,10 @@ def DPR_8 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32, v4f16],
def QPR : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, v8f16], 128,
(sequence "Q%u", 0, 15)> {
// Allocate non-VFP2 aliases Q8-Q15 first.
- let AltOrders = [(rotl QPR, 8)];
- let AltOrderSelect = [{ return 1; }];
+ let AltOrders = [(rotl QPR, 8), (trunc QPR, 8)];
+ let AltOrderSelect = [{
+ return 1 + MF.getSubtarget<ARMSubtarget>().hasMVEIntegerOps();
+ }];
let DiagnosticString = "operand must be a register in range [q0, q15]";
}
@@ -376,6 +457,12 @@ def QPR_8 : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
let DiagnosticString = "operand must be a register in range [q0, q3]";
}
+// MVE 128-bit vector register class. This class is only really needed for
+// parsing assembly, since we still have to truncate the register set in the QPR
+// class anyway.
+def MQPR : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, v8f16],
+ 128, (trunc QPR, 8)>;
+
// Pseudo-registers representing odd-even pairs of D registers. The even-odd
// pairs are already represented by the Q registers.
// These are needed by NEON instructions requiring two consecutive D registers.
@@ -390,8 +477,11 @@ def DPair : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
128, (interleave QPR, TuplesOE2D)> {
// Allocate starting at non-VFP2 registers D16-D31 first.
// Prefer even-odd pairs as they are easier to copy.
- let AltOrders = [(add (rotl QPR, 8), (rotl DPair, 16))];
- let AltOrderSelect = [{ return 1; }];
+ let AltOrders = [(add (rotl QPR, 8), (rotl DPair, 16)),
+ (add (trunc QPR, 8), (trunc DPair, 16))];
+ let AltOrderSelect = [{
+ return 1 + MF.getSubtarget<ARMSubtarget>().hasMVEIntegerOps();
+ }];
}
// Pseudo-registers representing even-odd pairs of GPRs from R1 to R13/SP.
diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td
index ed5a3a7bb696..ce74d325c4e5 100644
--- a/lib/Target/ARM/ARMSchedule.td
+++ b/lib/Target/ARM/ARMSchedule.td
@@ -1,9 +1,8 @@
//===-- ARMSchedule.td - ARM Scheduling Definitions --------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
@@ -425,4 +424,4 @@ include "ARMScheduleA9.td"
include "ARMScheduleSwift.td"
include "ARMScheduleR52.td"
include "ARMScheduleA57.td"
-include "ARMScheduleM3.td"
+include "ARMScheduleM4.td"
diff --git a/lib/Target/ARM/ARMScheduleA57.td b/lib/Target/ARM/ARMScheduleA57.td
index 63f975ba6e39..a79f3348f338 100644
--- a/lib/Target/ARM/ARMScheduleA57.td
+++ b/lib/Target/ARM/ARMScheduleA57.td
@@ -1,9 +1,8 @@
//=- ARMScheduleA57.td - ARM Cortex-A57 Scheduling Defs -----*- tablegen -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -95,6 +94,9 @@ def CortexA57Model : SchedMachineModel {
// FIXME: Remove when all errors have been fixed.
let FullInstRWOverlapCheck = 0;
+
+ let UnsupportedFeatures = [HasV8_1MMainline, HasMVEInt, HasMVEFloat,
+ HasFPRegsV8_1M];
}
//===----------------------------------------------------------------------===//
@@ -1175,7 +1177,8 @@ def : InstRW<[A57Write_8cyc_1V], (instregex
// ASIMD FP max/min
def : InstRW<[A57Write_5cyc_1V], (instregex
- "(VMAX|VMIN)(fd|fq|hd|hq)", "(VPMAX|VPMIN)(f|h)", "VMAXNM", "VMINNM")>;
+ "(VMAX|VMIN)(fd|fq|hd|hq)", "(VPMAX|VPMIN)(f|h)", "(NEON|VFP)_VMAXNM",
+ "(NEON|VFP)_VMINNM")>;
// ASIMD FP multiply
def A57WriteVMUL_VecFP : SchedWriteRes<[A57UnitV]> { let Latency = 5; }
diff --git a/lib/Target/ARM/ARMScheduleA57WriteRes.td b/lib/Target/ARM/ARMScheduleA57WriteRes.td
index 670717dc7c13..5ba61503686e 100644
--- a/lib/Target/ARM/ARMScheduleA57WriteRes.td
+++ b/lib/Target/ARM/ARMScheduleA57WriteRes.td
@@ -1,9 +1,8 @@
//=- ARMScheduleA57WriteRes.td - ARM Cortex-A57 Write Res ---*- tablegen -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/ARM/ARMScheduleA8.td b/lib/Target/ARM/ARMScheduleA8.td
index ba380cba100f..1be0ee4334a8 100644
--- a/lib/Target/ARM/ARMScheduleA8.td
+++ b/lib/Target/ARM/ARMScheduleA8.td
@@ -1,9 +1,8 @@
//=- ARMScheduleA8.td - ARM Cortex-A8 Scheduling Definitions -*- tablegen -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td
index fc301c589269..21d32bde4710 100644
--- a/lib/Target/ARM/ARMScheduleA9.td
+++ b/lib/Target/ARM/ARMScheduleA9.td
@@ -1,9 +1,8 @@
//=- ARMScheduleA9.td - ARM Cortex-A9 Scheduling Definitions -*- tablegen -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/ARM/ARMScheduleM3.td b/lib/Target/ARM/ARMScheduleM3.td
deleted file mode 100644
index 93f8299f9bd0..000000000000
--- a/lib/Target/ARM/ARMScheduleM3.td
+++ /dev/null
@@ -1,21 +0,0 @@
-//=- ARMScheduleM3.td - ARM Cortex-M3 Scheduling Definitions -*- tablegen -*-=//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the machine model for the ARM Cortex-M3 processor.
-//
-//===----------------------------------------------------------------------===//
-
-def CortexM3Model : SchedMachineModel {
- let IssueWidth = 1; // Only IT can be dual-issued, so assume single-issue
- let MicroOpBufferSize = 0; // In-order
- let LoadLatency = 2; // Latency when not pipelined, not pc-relative
- let MispredictPenalty = 2; // Best case branch taken cost
-
- let CompleteModel = 0;
-}
diff --git a/lib/Target/ARM/ARMScheduleM4.td b/lib/Target/ARM/ARMScheduleM4.td
new file mode 100644
index 000000000000..38c8ea2b4f35
--- /dev/null
+++ b/lib/Target/ARM/ARMScheduleM4.td
@@ -0,0 +1,119 @@
+//==- ARMScheduleM4.td - Cortex-M4 Scheduling Definitions -*- tablegen -*-====//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the SchedRead/Write data for the ARM Cortex-M4 processor.
+//
+//===----------------------------------------------------------------------===//
+
+def CortexM4Model : SchedMachineModel {
+ let IssueWidth = 1; // Only IT can be dual-issued, so assume single-issue
+ let MicroOpBufferSize = 0; // In-order
+ let LoadLatency = 2; // Latency when not pipelined, not pc-relative
+ let MispredictPenalty = 2; // Best case branch taken cost
+ let PostRAScheduler = 1;
+
+ let CompleteModel = 0;
+}
+
+
+// We model the entire cpu as a single pipeline with a BufferSize = 0 since
+// Cortex-M4 is in-order.
+
+def M4Unit : ProcResource<1> { let BufferSize = 0; }
+
+
+let SchedModel = CortexM4Model in {
+
+// Some definitions of latencies we apply to different instructions
+
+class M4UnitL1<SchedWrite write> : WriteRes<write, [M4Unit]> { let Latency = 1; }
+class M4UnitL2<SchedWrite write> : WriteRes<write, [M4Unit]> { let Latency = 2; }
+class M4UnitL3<SchedWrite write> : WriteRes<write, [M4Unit]> { let Latency = 3; }
+class M4UnitL14<SchedWrite write> : WriteRes<write, [M4Unit]> { let Latency = 14; }
+def M4UnitL1_wr : SchedWriteRes<[M4Unit]> { let Latency = 1; }
+def M4UnitL2_wr : SchedWriteRes<[M4Unit]> { let Latency = 2; }
+class M4UnitL1I<dag instr> : InstRW<[M4UnitL1_wr], instr>;
+class M4UnitL2I<dag instr> : InstRW<[M4UnitL2_wr], instr>;
+
+
+// Loads, MAC's and DIV all get a higher latency of 2
+def : M4UnitL2<WriteLd>;
+def : M4UnitL2<WriteMAC32>;
+def : M4UnitL2<WriteMAC64Hi>;
+def : M4UnitL2<WriteMAC64Lo>;
+def : M4UnitL2<WriteMAC16>;
+def : M4UnitL2<WriteDIV>;
+
+def : M4UnitL2I<(instregex "(t|t2)LDM")>;
+
+
+// Stores we use a latency of 1 as they have no outputs
+
+def : M4UnitL1<WriteST>;
+def : M4UnitL1I<(instregex "(t|t2)STM")>;
+
+
+// Everything else has a Latency of 1
+
+def : M4UnitL1<WriteALU>;
+def : M4UnitL1<WriteALUsi>;
+def : M4UnitL1<WriteALUsr>;
+def : M4UnitL1<WriteALUSsr>;
+def : M4UnitL1<WriteBr>;
+def : M4UnitL1<WriteBrL>;
+def : M4UnitL1<WriteBrTbl>;
+def : M4UnitL1<WriteCMPsi>;
+def : M4UnitL1<WriteCMPsr>;
+def : M4UnitL1<WriteCMP>;
+def : M4UnitL1<WriteMUL32>;
+def : M4UnitL1<WriteMUL64Hi>;
+def : M4UnitL1<WriteMUL64Lo>;
+def : M4UnitL1<WriteMUL16>;
+def : M4UnitL1<WriteNoop>;
+def : M4UnitL1<WritePreLd>;
+def : M4UnitL1I<(instregex "(t|t2)MOV")>;
+def : M4UnitL1I<(instrs COPY)>;
+def : M4UnitL1I<(instregex "t2IT")>;
+def : M4UnitL1I<(instregex "t2SEL", "t2USAD8",
+ "t2(S|Q|SH|U|UQ|UH)(ADD16|ASX|SAX|SUB16|ADD8|SUB8)", "t2USADA8", "(t|t2)REV")>;
+
+def : ReadAdvance<ReadALU, 0>;
+def : ReadAdvance<ReadALUsr, 0>;
+def : ReadAdvance<ReadMUL, 0>;
+def : ReadAdvance<ReadMAC, 0>;
+
+// Most FP instructions are single-cycle latency, except MAC's, Div's and Sqrt's.
+// Loads still take 2 cycles.
+
+def : M4UnitL1<WriteFPCVT>;
+def : M4UnitL1<WriteFPMOV>;
+def : M4UnitL1<WriteFPALU32>;
+def : M4UnitL1<WriteFPALU64>;
+def : M4UnitL1<WriteFPMUL32>;
+def : M4UnitL1<WriteFPMUL64>;
+def : M4UnitL2I<(instregex "VLD")>;
+def : M4UnitL1I<(instregex "VST")>;
+def : M4UnitL3<WriteFPMAC32>;
+def : M4UnitL3<WriteFPMAC64>;
+def : M4UnitL14<WriteFPDIV32>;
+def : M4UnitL14<WriteFPDIV64>;
+def : M4UnitL14<WriteFPSQRT32>;
+def : M4UnitL14<WriteFPSQRT64>;
+def : M4UnitL1<WriteVLD1>;
+def : M4UnitL1<WriteVLD2>;
+def : M4UnitL1<WriteVLD3>;
+def : M4UnitL1<WriteVLD4>;
+def : M4UnitL1<WriteVST1>;
+def : M4UnitL1<WriteVST2>;
+def : M4UnitL1<WriteVST3>;
+def : M4UnitL1<WriteVST4>;
+
+def : ReadAdvance<ReadFPMUL, 0>;
+def : ReadAdvance<ReadFPMAC, 0>;
+
+}
diff --git a/lib/Target/ARM/ARMScheduleR52.td b/lib/Target/ARM/ARMScheduleR52.td
index 11bce45161b3..d1cbf754b5a1 100644
--- a/lib/Target/ARM/ARMScheduleR52.td
+++ b/lib/Target/ARM/ARMScheduleR52.td
@@ -1,9 +1,8 @@
//==- ARMScheduleR52.td - Cortex-R52 Scheduling Definitions -*- tablegen -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/ARM/ARMScheduleSwift.td b/lib/Target/ARM/ARMScheduleSwift.td
index 87984648139b..00a44599b1b2 100644
--- a/lib/Target/ARM/ARMScheduleSwift.td
+++ b/lib/Target/ARM/ARMScheduleSwift.td
@@ -1,9 +1,8 @@
//=- ARMScheduleSwift.td - Swift Scheduling Definitions -*- tablegen -*----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/ARM/ARMScheduleV6.td b/lib/Target/ARM/ARMScheduleV6.td
index 57d0bfb65049..9b86097329c0 100644
--- a/lib/Target/ARM/ARMScheduleV6.td
+++ b/lib/Target/ARM/ARMScheduleV6.td
@@ -1,9 +1,8 @@
//===-- ARMScheduleV6.td - ARM v6 Scheduling Definitions ---*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
index 4d685158e258..cade06e8c109 100644
--- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp
+++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
@@ -1,9 +1,8 @@
//===-- ARMSelectionDAGInfo.cpp - ARM SelectionDAG Info -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -171,7 +170,7 @@ SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(
// Code size optimisation: do not inline memcpy if expansion results in
// more instructions than the libary call.
- if (NumMEMCPYs > 1 && DAG.getMachineFunction().getFunction().optForMinSize()) {
+ if (NumMEMCPYs > 1 && Subtarget.hasMinSize()) {
return SDValue();
}
diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.h b/lib/Target/ARM/ARMSelectionDAGInfo.h
index 2ddb42c95397..b8a86ae7310f 100644
--- a/lib/Target/ARM/ARMSelectionDAGInfo.h
+++ b/lib/Target/ARM/ARMSelectionDAGInfo.h
@@ -1,9 +1,8 @@
//===-- ARMSelectionDAGInfo.h - ARM SelectionDAG Info -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index b1d0761e3231..978faed776b0 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -1,9 +1,8 @@
//===-- ARMSubtarget.cpp - ARM Subtarget Information ----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -93,10 +92,12 @@ ARMFrameLowering *ARMSubtarget::initializeFrameLowering(StringRef CPU,
ARMSubtarget::ARMSubtarget(const Triple &TT, const std::string &CPU,
const std::string &FS,
- const ARMBaseTargetMachine &TM, bool IsLittle)
+ const ARMBaseTargetMachine &TM, bool IsLittle,
+ bool MinSize)
: ARMGenSubtargetInfo(TT, CPU, FS), UseMulOps(UseFusedMulOps),
- CPUString(CPU), IsLittle(IsLittle), TargetTriple(TT), Options(TM.Options),
- TM(TM), FrameLowering(initializeFrameLowering(CPU, FS)),
+ CPUString(CPU), OptMinSize(MinSize), IsLittle(IsLittle),
+ TargetTriple(TT), Options(TM.Options), TM(TM),
+ FrameLowering(initializeFrameLowering(CPU, FS)),
// At this point initializeSubtargetDependencies has been called so
// we can query directly.
InstrInfo(isThumb1Only()
@@ -283,6 +284,7 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
case CortexA72:
case CortexA73:
case CortexA75:
+ case CortexA76:
case CortexR4:
case CortexR4F:
case CortexR5:
@@ -359,6 +361,13 @@ unsigned ARMSubtarget::getMispredictionPenalty() const {
}
bool ARMSubtarget::enableMachineScheduler() const {
+ // The MachineScheduler can increase register usage, so we use more high
+ // registers and end up with more T2 instructions that cannot be converted to
+ // T1 instructions. At least until we do better at converting to thumb1
+ // instructions, on cortex-m at Oz where we are size-paranoid, don't use the
+ // Machine scheduler, relying on the DAG register pressure scheduler instead.
+ if (isMClass() && hasMinSize())
+ return false;
// Enable the MachineScheduler before register allocation for subtargets
// with the use-misched feature.
return useMachineScheduler();
@@ -374,20 +383,20 @@ bool ARMSubtarget::enablePostRAScheduler() const {
bool ARMSubtarget::enableAtomicExpand() const { return hasAnyDataBarrier(); }
-bool ARMSubtarget::useStride4VFPs(const MachineFunction &MF) const {
+bool ARMSubtarget::useStride4VFPs() const {
// For general targets, the prologue can grow when VFPs are allocated with
// stride 4 (more vpush instructions). But WatchOS uses a compact unwind
// format which it's more important to get right.
return isTargetWatchABI() ||
- (useWideStrideVFP() && !MF.getFunction().optForMinSize());
+ (useWideStrideVFP() && !OptMinSize);
}
-bool ARMSubtarget::useMovt(const MachineFunction &MF) const {
+bool ARMSubtarget::useMovt() const {
// NOTE Windows on ARM needs to use mov.w/mov.t pairs to materialise 32-bit
// immediates as it is inherently position independent, and may be out of
// range otherwise.
return !NoMovt && hasV8MBaselineOps() &&
- (isTargetWindows() || !MF.getFunction().optForMinSize() || genExecuteOnly());
+ (isTargetWindows() || !OptMinSize || genExecuteOnly());
}
bool ARMSubtarget::useFastISel() const {
@@ -404,3 +413,45 @@ bool ARMSubtarget::useFastISel() const {
((isTargetMachO() && !isThumb1Only()) ||
(isTargetLinux() && !isThumb()) || (isTargetNaCl() && !isThumb()));
}
+
+unsigned ARMSubtarget::getGPRAllocationOrder(const MachineFunction &MF) const {
+ // The GPR register class has multiple possible allocation orders, with
+ // tradeoffs preferred by different sub-architectures and optimisation goals.
+ // The allocation orders are:
+ // 0: (the default tablegen order, not used)
+ // 1: r14, r0-r13
+ // 2: r0-r7
+ // 3: r0-r7, r12, lr, r8-r11
+ // Note that the register allocator will change this order so that
+ // callee-saved registers are used later, as they require extra work in the
+ // prologue/epilogue (though we sometimes override that).
+
+ // For thumb1-only targets, only the low registers are allocatable.
+ if (isThumb1Only())
+ return 2;
+
+ // Allocate low registers first, so we can select more 16-bit instructions.
+ // We also (in ignoreCSRForAllocationOrder) override the default behaviour
+ // with regards to callee-saved registers, because pushing extra registers is
+ // much cheaper (in terms of code size) than using high registers. After
+ // that, we allocate r12 (doesn't need to be saved), lr (saving it means we
+ // can return with the pop, don't need an extra "bx lr") and then the rest of
+ // the high registers.
+ if (isThumb2() && MF.getFunction().hasMinSize())
+ return 3;
+
+ // Otherwise, allocate in the default order, using LR first because saving it
+ // allows a shorter epilogue sequence.
+ return 1;
+}
+
+bool ARMSubtarget::ignoreCSRForAllocationOrder(const MachineFunction &MF,
+ unsigned PhysReg) const {
+ // To minimize code size in Thumb2, we prefer the usage of low regs (lower
+ // cost per use) so we can use narrow encoding. By default, caller-saved
+ // registers (e.g. lr, r12) are always allocated first, regardless of
+ // their cost per use. When optForMinSize, we prefer the low regs even if
+ // they are CSR because usually push/pop can be folded into existing ones.
+ return isThumb2() && MF.getFunction().hasMinSize() &&
+ ARM::GPRRegClass.contains(PhysReg);
+}
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index 11841b4467a2..c2b0f052b843 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -1,9 +1,8 @@
//===-- ARMSubtarget.h - Define Subtarget for the ARM ----------*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -60,6 +59,7 @@ protected:
CortexA72,
CortexA73,
CortexA75,
+ CortexA76,
CortexA8,
CortexA9,
CortexM3,
@@ -110,7 +110,8 @@ protected:
ARMv8a,
ARMv8mBaseline,
ARMv8mMainline,
- ARMv8r
+ ARMv8r,
+ ARMv81mMainline,
};
public:
@@ -157,6 +158,9 @@ protected:
bool HasV8_5aOps = false;
bool HasV8MBaselineOps = false;
bool HasV8MMainlineOps = false;
+ bool HasV8_1MMainlineOps = false;
+ bool HasMVEIntegerOps = false;
+ bool HasMVEFloatOps = false;
/// HasVFPv2, HasVFPv3, HasVFPv4, HasFPARMv8, HasNEON - Specify what
/// floating point ISAs are supported.
@@ -165,6 +169,24 @@ protected:
bool HasVFPv4 = false;
bool HasFPARMv8 = false;
bool HasNEON = false;
+ bool HasFPRegs = false;
+ bool HasFPRegs16 = false;
+ bool HasFPRegs64 = false;
+
+ /// Versions of the VFP flags restricted to single precision, or to
+ /// 16 d-registers, or both.
+ bool HasVFPv2SP = false;
+ bool HasVFPv3SP = false;
+ bool HasVFPv4SP = false;
+ bool HasFPARMv8SP = false;
+ bool HasVFPv2D16 = false;
+ bool HasVFPv3D16 = false;
+ bool HasVFPv4D16 = false;
+ bool HasFPARMv8D16 = false;
+ bool HasVFPv2D16SP = false;
+ bool HasVFPv3D16SP = false;
+ bool HasVFPv4D16SP = false;
+ bool HasFPARMv8D16SP = false;
/// HasDotProd - True if the ARMv8.2A dot product instructions are supported.
bool HasDotProd = false;
@@ -232,9 +254,9 @@ protected:
/// HasFP16FML - True if subtarget supports half-precision FP fml operations
bool HasFP16FML = false;
- /// HasD16 - True if subtarget is limited to 16 double precision
+ /// HasD32 - True if subtarget has the full 32 double precision
/// FP registers for VFPv3.
- bool HasD16 = false;
+ bool HasD32 = false;
/// HasHardwareDivide - True if subtarget supports [su]div in Thumb mode
bool HasHardwareDivideInThumb = false;
@@ -291,9 +313,9 @@ protected:
/// extension.
bool HasVirtualization = false;
- /// FPOnlySP - If true, the floating point unit only supports single
+ /// HasFP64 - If true, the floating point unit supports double
/// precision.
- bool FPOnlySP = false;
+ bool HasFP64 = false;
/// If true, the processor supports the Performance Monitor Extensions. These
/// include a generic cycle-counter as well as more fine-grained (often
@@ -321,6 +343,9 @@ protected:
/// HasRAS - if true, the processor supports RAS extensions
bool HasRAS = false;
+ /// HasLOB - if true, the processor supports the Low Overhead Branch extension
+ bool HasLOB = false;
+
/// If true, the instructions "vmov.i32 d0, #0" and "vmov.i32 q0, #0" are
/// particularly effective at zeroing a VFP register.
bool HasZeroCycleZeroing = false;
@@ -446,6 +471,10 @@ protected:
/// What alignment is preferred for loop bodies, in log2(bytes).
unsigned PrefLoopAlignment = 0;
+ /// OptMinSize - True if we're optimising for minimum code size, equal to
+ /// the function attribute.
+ bool OptMinSize = false;
+
/// IsLittle - The target is Little Endian
bool IsLittle;
@@ -468,7 +497,8 @@ public:
/// of the specified triple.
///
ARMSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS,
- const ARMBaseTargetMachine &TM, bool IsLittle);
+ const ARMBaseTargetMachine &TM, bool IsLittle,
+ bool MinSize = false);
/// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
/// that still makes it profitable to inline the call.
@@ -546,6 +576,12 @@ public:
bool hasV8_5aOps() const { return HasV8_5aOps; }
bool hasV8MBaselineOps() const { return HasV8MBaselineOps; }
bool hasV8MMainlineOps() const { return HasV8MMainlineOps; }
+ bool hasV8_1MMainlineOps() const { return HasV8_1MMainlineOps; }
+ bool hasMVEIntegerOps() const { return HasMVEIntegerOps; }
+ bool hasMVEFloatOps() const { return HasMVEFloatOps; }
+ bool hasFPRegs() const { return HasFPRegs; }
+ bool hasFPRegs16() const { return HasFPRegs16; }
+ bool hasFPRegs64() const { return HasFPRegs64; }
/// @{
/// These functions are obsolete, please consider adding subtarget features
@@ -564,10 +600,10 @@ public:
bool hasARMOps() const { return !NoARM; }
- bool hasVFP2() const { return HasVFPv2; }
- bool hasVFP3() const { return HasVFPv3; }
- bool hasVFP4() const { return HasVFPv4; }
- bool hasFPARMv8() const { return HasFPARMv8; }
+ bool hasVFP2Base() const { return HasVFPv2D16SP; }
+ bool hasVFP3Base() const { return HasVFPv3D16SP; }
+ bool hasVFP4Base() const { return HasVFPv4D16SP; }
+ bool hasFPARMv8Base() const { return HasFPARMv8D16SP; }
bool hasNEON() const { return HasNEON; }
bool hasSHA2() const { return HasSHA2; }
bool hasAES() const { return HasAES; }
@@ -575,6 +611,7 @@ public:
bool hasDotProd() const { return HasDotProd; }
bool hasCRC() const { return HasCRC; }
bool hasRAS() const { return HasRAS; }
+ bool hasLOB() const { return HasLOB; }
bool hasVirtualization() const { return HasVirtualization; }
bool useNEONForSinglePrecisionFP() const {
@@ -596,7 +633,7 @@ public:
bool useFPVMLx() const { return !SlowFPVMLx; }
bool hasVMLxForwarding() const { return HasVMLxForwarding; }
bool isFPBrccSlow() const { return SlowFPBrcc; }
- bool isFPOnlySP() const { return FPOnlySP; }
+ bool hasFP64() const { return HasFP64; }
bool hasPerfMon() const { return HasPerfMon; }
bool hasTrustZone() const { return HasTrustZone; }
bool has8MSecExt() const { return Has8MSecExt; }
@@ -633,7 +670,7 @@ public:
bool genExecuteOnly() const { return GenExecuteOnly; }
bool hasFP16() const { return HasFP16; }
- bool hasD16() const { return HasD16; }
+ bool hasD32() const { return HasD32; }
bool hasFullFP16() const { return HasFullFP16; }
bool hasFP16FML() const { return HasFP16FML; }
@@ -710,6 +747,7 @@ public:
bool disablePostRAScheduler() const { return DisablePostRAScheduler; }
bool useSoftFloat() const { return UseSoftFloat; }
bool isThumb() const { return InThumbMode; }
+ bool hasMinSize() const { return OptMinSize; }
bool isThumb1Only() const { return InThumbMode && !HasThumb2; }
bool isThumb2() const { return InThumbMode && HasThumb2; }
bool hasThumb2() const { return HasThumb2; }
@@ -736,9 +774,9 @@ public:
isThumb1Only();
}
- bool useStride4VFPs(const MachineFunction &MF) const;
+ bool useStride4VFPs() const;
- bool useMovt(const MachineFunction &MF) const;
+ bool useMovt() const;
bool supportsTailCall() const { return SupportsTailCall; }
@@ -818,6 +856,10 @@ public:
unsigned getPrefLoopAlignment() const {
return PrefLoopAlignment;
}
+
+ bool ignoreCSRForAllocationOrder(const MachineFunction &MF,
+ unsigned PhysReg) const override;
+ unsigned getGPRAllocationOrder(const MachineFunction &MF) const;
};
} // end namespace llvm
diff --git a/lib/Target/ARM/ARMSystemRegister.td b/lib/Target/ARM/ARMSystemRegister.td
index ad1d37168e08..f21c7f0246f9 100644
--- a/lib/Target/ARM/ARMSystemRegister.td
+++ b/lib/Target/ARM/ARMSystemRegister.td
@@ -1,9 +1,8 @@
//===-- ARMSystemRegister.td - ARM Register defs -------------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index ec02c840d5e1..7f0aae1739b3 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -1,9 +1,8 @@
//===-- ARMTargetMachine.cpp - Define TargetMachine for ARM ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -17,6 +16,7 @@
#include "ARMTargetObjectFile.h"
#include "ARMTargetTransformInfo.h"
#include "MCTargetDesc/ARMMCTargetDesc.h"
+#include "TargetInfo/ARMTargetInfo.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
@@ -95,6 +95,8 @@ extern "C" void LLVMInitializeARMTarget() {
initializeARMExecutionDomainFixPass(Registry);
initializeARMExpandPseudoPass(Registry);
initializeThumb2SizeReducePass(Registry);
+ initializeMVEVPTBlockPass(Registry);
+ initializeARMLowOverheadLoopsPass(Registry);
}
static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
@@ -142,6 +144,10 @@ static std::string computeDataLayout(const Triple &TT, StringRef CPU,
// Pointers are 32 bits and aligned to 32 bits.
Ret += "-p:32:32";
+ // Function pointers are aligned to 8 bits (because the LSB stores the
+ // ARM/Thumb state).
+ Ret += "-Fi8";
+
// ABIs other than APCS have 64 bit integers with natural alignment.
if (ABI != ARMBaseTargetMachine::ARM_ABI_APCS)
Ret += "-i64:64";
@@ -264,13 +270,20 @@ ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const {
if (SoftFloat)
FS += FS.empty() ? "+soft-float" : ",+soft-float";
- auto &I = SubtargetMap[CPU + FS];
+ // Use the optminsize to identify the subtarget, but don't use it in the
+ // feature string.
+ std::string Key = CPU + FS;
+ if (F.hasMinSize())
+ Key += "+minsize";
+
+ auto &I = SubtargetMap[Key];
if (!I) {
// This needs to be done before we create a new subtarget since any
// creation will depend on the TM and the code generation flags on the
// function that reside in TargetOptions.
resetTargetOptions(F);
- I = llvm::make_unique<ARMSubtarget>(TargetTriple, CPU, FS, *this, isLittle);
+ I = llvm::make_unique<ARMSubtarget>(TargetTriple, CPU, FS, *this, isLittle,
+ F.hasMinSize());
if (!I->isThumb() && !I->hasARMOps())
F.getContext().emitError("Function '" + F.getName() + "' uses ARM "
@@ -351,6 +364,8 @@ public:
void addPreRegAlloc() override;
void addPreSched2() override;
void addPreEmitPass() override;
+
+ std::unique_ptr<CSEConfigBase> getCSEConfig() const override;
};
class ARMExecutionDomainFix : public ExecutionDomainFix {
@@ -375,6 +390,10 @@ TargetPassConfig *ARMBaseTargetMachine::createPassConfig(PassManagerBase &PM) {
return new ARMPassConfig(*this, PM);
}
+std::unique_ptr<CSEConfigBase> ARMPassConfig::getCSEConfig() const {
+ return getStandardCSEConfigForOpt(TM->getOptLevel());
+}
+
void ARMPassConfig::addIRPasses() {
if (TM->Options.ThreadModel == ThreadModel::Single)
addPass(createLowerAtomicPass());
@@ -393,6 +412,10 @@ void ARMPassConfig::addIRPasses() {
TargetPassConfig::addIRPasses();
+ // Run the parallel DSP pass.
+ if (getOptLevel() == CodeGenOpt::Aggressive)
+ addPass(createARMParallelDSPPass());
+
// Match interleaved memory accesses to ldN/stN intrinsics.
if (TM->getOptLevel() != CodeGenOpt::None)
addPass(createInterleavedAccessPass());
@@ -405,9 +428,6 @@ void ARMPassConfig::addCodeGenPrepare() {
}
bool ARMPassConfig::addPreISel() {
- if (getOptLevel() != CodeGenOpt::None)
- addPass(createARMParallelDSPPass());
-
if ((TM->getOptLevel() != CodeGenOpt::None &&
EnableGlobalMerge == cl::BOU_UNSET) ||
EnableGlobalMerge == cl::BOU_TRUE) {
@@ -427,6 +447,9 @@ bool ARMPassConfig::addPreISel() {
MergeExternalByDefault));
}
+ if (TM->getOptLevel() != CodeGenOpt::None)
+ addPass(createHardwareLoopsPass());
+
return false;
}
@@ -490,6 +513,7 @@ void ARMPassConfig::addPreSched2() {
return !MF.getSubtarget<ARMSubtarget>().isThumb1Only();
}));
}
+ addPass(createMVEVPTBlockPass());
addPass(createThumb2ITBlockPass());
}
@@ -506,4 +530,5 @@ void ARMPassConfig::addPreEmitPass() {
addPass(createARMOptimizeBarriersPass());
addPass(createARMConstantIslandPass());
+ addPass(createARMLowOverheadLoopsPass());
}
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index 2c791998e702..cb8650d8139b 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -1,9 +1,8 @@
//===-- ARMTargetMachine.h - Define TargetMachine for ARM -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/ARM/ARMTargetObjectFile.cpp b/lib/Target/ARM/ARMTargetObjectFile.cpp
index 9c13359cba71..891329d3f297 100644
--- a/lib/Target/ARM/ARMTargetObjectFile.cpp
+++ b/lib/Target/ARM/ARMTargetObjectFile.cpp
@@ -1,9 +1,8 @@
//===-- llvm/Target/ARMTargetObjectFile.cpp - ARM Object Info Impl --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/ARMTargetObjectFile.h b/lib/Target/ARM/ARMTargetObjectFile.h
index 0dc0882809c0..7b15dcc61f56 100644
--- a/lib/Target/ARM/ARMTargetObjectFile.h
+++ b/lib/Target/ARM/ARMTargetObjectFile.h
@@ -1,9 +1,8 @@
//===-- llvm/Target/ARMTargetObjectFile.h - ARM Object Info -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp
index f72bb8632eb7..2a8ec734a05f 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -1,9 +1,8 @@
//===- ARMTargetTransformInfo.cpp - ARM specific TTI ----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -22,6 +21,7 @@
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Type.h"
#include "llvm/MC/SubtargetFeature.h"
#include "llvm/Support/Casting.h"
@@ -36,6 +36,10 @@ using namespace llvm;
#define DEBUG_TYPE "armtti"
+static cl::opt<bool> DisableLowOverheadLoops(
+ "disable-arm-loloops", cl::Hidden, cl::init(true),
+ cl::desc("Disable the generation of low-overhead loops"));
+
bool ARMTTIImpl::areInlineCompatible(const Function *Caller,
const Function *Callee) const {
const TargetMachine &TM = getTLI()->getTargetMachine();
@@ -107,9 +111,13 @@ int ARMTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
Idx == 1)
return 0;
- if (Opcode == Instruction::And)
- // Conversion to BIC is free, and means we can use ~Imm instead.
- return std::min(getIntImmCost(Imm, Ty), getIntImmCost(~Imm, Ty));
+ if (Opcode == Instruction::And) {
+ // UXTB/UXTH
+ if (Imm == 255 || Imm == 65535)
+ return 0;
+ // Conversion to BIC is free, and means we can use ~Imm instead.
+ return std::min(getIntImmCost(Imm, Ty), getIntImmCost(~Imm, Ty));
+ }
if (Opcode == Instruction::Add)
// Conversion to SUB is free, and means we can use -Imm instead.
@@ -398,6 +406,40 @@ int ARMTTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
return 1;
}
+int ARMTTIImpl::getMemcpyCost(const Instruction *I) {
+ const MemCpyInst *MI = dyn_cast<MemCpyInst>(I);
+ assert(MI && "MemcpyInst expected");
+ ConstantInt *C = dyn_cast<ConstantInt>(MI->getLength());
+
+ // To model the cost of a library call, we assume 1 for the call, and
+ // 3 for the argument setup.
+ const unsigned LibCallCost = 4;
+
+ // If 'size' is not a constant, a library call will be generated.
+ if (!C)
+ return LibCallCost;
+
+ const unsigned Size = C->getValue().getZExtValue();
+ const unsigned DstAlign = MI->getDestAlignment();
+ const unsigned SrcAlign = MI->getSourceAlignment();
+ const Function *F = I->getParent()->getParent();
+ const unsigned Limit = TLI->getMaxStoresPerMemmove(F->hasMinSize());
+ std::vector<EVT> MemOps;
+
+ // MemOps will be poplulated with a list of data types that needs to be
+ // loaded and stored. That's why we multiply the number of elements by 2 to
+ // get the cost for this memcpy.
+ if (getTLI()->findOptimalMemOpLowering(
+ MemOps, Limit, Size, DstAlign, SrcAlign, false /*IsMemset*/,
+ false /*ZeroMemset*/, false /*MemcpyStrSrc*/, false /*AllowOverlap*/,
+ MI->getDestAddressSpace(), MI->getSourceAddressSpace(),
+ F->getAttributes()))
+ return MemOps.size() * 2;
+
+ // If we can't find an optimal memop lowering, return the default cost
+ return LibCallCost;
+}
+
int ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
Type *SubTp) {
if (Kind == TTI::SK_Broadcast) {
@@ -590,6 +632,222 @@ int ARMTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
UseMaskForCond, UseMaskForGaps);
}
+bool ARMTTIImpl::isLoweredToCall(const Function *F) {
+ if (!F->isIntrinsic())
+ BaseT::isLoweredToCall(F);
+
+ // Assume all Arm-specific intrinsics map to an instruction.
+ if (F->getName().startswith("llvm.arm"))
+ return false;
+
+ switch (F->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::powi:
+ case Intrinsic::sin:
+ case Intrinsic::cos:
+ case Intrinsic::pow:
+ case Intrinsic::log:
+ case Intrinsic::log10:
+ case Intrinsic::log2:
+ case Intrinsic::exp:
+ case Intrinsic::exp2:
+ return true;
+ case Intrinsic::sqrt:
+ case Intrinsic::fabs:
+ case Intrinsic::copysign:
+ case Intrinsic::floor:
+ case Intrinsic::ceil:
+ case Intrinsic::trunc:
+ case Intrinsic::rint:
+ case Intrinsic::nearbyint:
+ case Intrinsic::round:
+ case Intrinsic::canonicalize:
+ case Intrinsic::lround:
+ case Intrinsic::llround:
+ case Intrinsic::lrint:
+ case Intrinsic::llrint:
+ if (F->getReturnType()->isDoubleTy() && !ST->hasFP64())
+ return true;
+ if (F->getReturnType()->isHalfTy() && !ST->hasFullFP16())
+ return true;
+ // Some operations can be handled by vector instructions and assume
+ // unsupported vectors will be expanded into supported scalar ones.
+ // TODO Handle scalar operations properly.
+ return !ST->hasFPARMv8Base() && !ST->hasVFP2Base();
+ case Intrinsic::masked_store:
+ case Intrinsic::masked_load:
+ case Intrinsic::masked_gather:
+ case Intrinsic::masked_scatter:
+ return !ST->hasMVEIntegerOps();
+ case Intrinsic::sadd_with_overflow:
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::ssub_with_overflow:
+ case Intrinsic::usub_with_overflow:
+ case Intrinsic::sadd_sat:
+ case Intrinsic::uadd_sat:
+ case Intrinsic::ssub_sat:
+ case Intrinsic::usub_sat:
+ return false;
+ }
+
+ return BaseT::isLoweredToCall(F);
+}
+
+bool ARMTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
+ AssumptionCache &AC,
+ TargetLibraryInfo *LibInfo,
+ HardwareLoopInfo &HWLoopInfo) {
+ // Low-overhead branches are only supported in the 'low-overhead branch'
+ // extension of v8.1-m.
+ if (!ST->hasLOB() || DisableLowOverheadLoops)
+ return false;
+
+ if (!SE.hasLoopInvariantBackedgeTakenCount(L))
+ return false;
+
+ const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
+ if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
+ return false;
+
+ const SCEV *TripCountSCEV =
+ SE.getAddExpr(BackedgeTakenCount,
+ SE.getOne(BackedgeTakenCount->getType()));
+
+ // We need to store the trip count in LR, a 32-bit register.
+ if (SE.getUnsignedRangeMax(TripCountSCEV).getBitWidth() > 32)
+ return false;
+
+ // Making a call will trash LR and clear LO_BRANCH_INFO, so there's little
+ // point in generating a hardware loop if that's going to happen.
+ auto MaybeCall = [this](Instruction &I) {
+ const ARMTargetLowering *TLI = getTLI();
+ unsigned ISD = TLI->InstructionOpcodeToISD(I.getOpcode());
+ EVT VT = TLI->getValueType(DL, I.getType(), true);
+ if (TLI->getOperationAction(ISD, VT) == TargetLowering::LibCall)
+ return true;
+
+ // Check if an intrinsic will be lowered to a call and assume that any
+ // other CallInst will generate a bl.
+ if (auto *Call = dyn_cast<CallInst>(&I)) {
+ if (isa<IntrinsicInst>(Call)) {
+ if (const Function *F = Call->getCalledFunction())
+ return isLoweredToCall(F);
+ }
+ return true;
+ }
+
+ // FPv5 provides conversions between integer, double-precision,
+ // single-precision, and half-precision formats.
+ switch (I.getOpcode()) {
+ default:
+ break;
+ case Instruction::FPToSI:
+ case Instruction::FPToUI:
+ case Instruction::SIToFP:
+ case Instruction::UIToFP:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ return !ST->hasFPARMv8Base();
+ }
+
+ // FIXME: Unfortunately the approach of checking the Operation Action does
+ // not catch all cases of Legalization that use library calls. Our
+ // Legalization step categorizes some transformations into library calls as
+ // Custom, Expand or even Legal when doing type legalization. So for now
+ // we have to special case for instance the SDIV of 64bit integers and the
+ // use of floating point emulation.
+ if (VT.isInteger() && VT.getSizeInBits() >= 64) {
+ switch (ISD) {
+ default:
+ break;
+ case ISD::SDIV:
+ case ISD::UDIV:
+ case ISD::SREM:
+ case ISD::UREM:
+ case ISD::SDIVREM:
+ case ISD::UDIVREM:
+ return true;
+ }
+ }
+
+ // Assume all other non-float operations are supported.
+ if (!VT.isFloatingPoint())
+ return false;
+
+ // We'll need a library call to handle most floats when using soft.
+ if (TLI->useSoftFloat()) {
+ switch (I.getOpcode()) {
+ default:
+ return true;
+ case Instruction::Alloca:
+ case Instruction::Load:
+ case Instruction::Store:
+ case Instruction::Select:
+ case Instruction::PHI:
+ return false;
+ }
+ }
+
+ // We'll need a libcall to perform double precision operations on a single
+ // precision only FPU.
+ if (I.getType()->isDoubleTy() && !ST->hasFP64())
+ return true;
+
+ // Likewise for half precision arithmetic.
+ if (I.getType()->isHalfTy() && !ST->hasFullFP16())
+ return true;
+
+ return false;
+ };
+
+ auto IsHardwareLoopIntrinsic = [](Instruction &I) {
+ if (auto *Call = dyn_cast<IntrinsicInst>(&I)) {
+ switch (Call->getIntrinsicID()) {
+ default:
+ break;
+ case Intrinsic::set_loop_iterations:
+ case Intrinsic::test_set_loop_iterations:
+ case Intrinsic::loop_decrement:
+ case Intrinsic::loop_decrement_reg:
+ return true;
+ }
+ }
+ return false;
+ };
+
+ // Scan the instructions to see if there's any that we know will turn into a
+ // call or if this loop is already a low-overhead loop.
+ auto ScanLoop = [&](Loop *L) {
+ for (auto *BB : L->getBlocks()) {
+ for (auto &I : *BB) {
+ if (MaybeCall(I) || IsHardwareLoopIntrinsic(I))
+ return false;
+ }
+ }
+ return true;
+ };
+
+ // Visit inner loops.
+ for (auto Inner : *L)
+ if (!ScanLoop(Inner))
+ return false;
+
+ if (!ScanLoop(L))
+ return false;
+
+ // TODO: Check whether the trip count calculation is expensive. If L is the
+ // inner loop but we know it has a low trip count, calculating that trip
+ // count (in the parent loop) may be detrimental.
+
+ LLVMContext &C = L->getHeader()->getContext();
+ HWLoopInfo.CounterInReg = true;
+ HWLoopInfo.IsNestingLegal = false;
+ HWLoopInfo.PerformEntryTest = true;
+ HWLoopInfo.CountType = Type::getInt32Ty(C);
+ HWLoopInfo.LoopDecrement = ConstantInt::get(HWLoopInfo.CountType, 1);
+ return true;
+}
+
void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP) {
// Only currently enable these preferences for M-Class cores.
@@ -599,7 +857,7 @@ void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
// Disable loop unrolling for Oz and Os.
UP.OptSizeThreshold = 0;
UP.PartialOptSizeThreshold = 0;
- if (L->getHeader()->getParent()->optForSize())
+ if (L->getHeader()->getParent()->hasOptSize())
return;
// Only enable on Thumb-2 targets.
@@ -645,6 +903,7 @@ void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
UP.Partial = true;
UP.Runtime = true;
+ UP.UpperBound = true;
UP.UnrollRemainder = true;
UP.DefaultUnrollRuntimeCount = 4;
UP.UnrollAndJam = true;
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.h b/lib/Target/ARM/ARMTargetTransformInfo.h
index 2dd143d48a15..52f6ea4a6e2f 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -1,9 +1,8 @@
//===- ARMTargetTransformInfo.h - ARM specific TTI --------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -49,7 +48,7 @@ class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
const ARMTargetLowering *TLI;
// Currently the following features are excluded from InlineFeatureWhitelist.
- // ModeThumb, FeatureNoARM, ModeSoftFloat, FeatureVFPOnlySP, FeatureD16
+ // ModeThumb, FeatureNoARM, ModeSoftFloat, FeatureFP64, FeatureD32
// Depending on whether they are set or unset, different
// instructions/registers are available. For example, inlining a callee with
// -thumb-mode in a caller with +thumb-mode, may cause the assembler to
@@ -94,6 +93,12 @@ public:
bool enableInterleavedAccessVectorization() { return true; }
+ bool shouldFavorBackedgeIndex(const Loop *L) const {
+ if (L->getHeader()->getParent()->hasOptSize())
+ return false;
+ return ST->isMClass() && ST->isThumb2() && L->getNumBlocks() == 1;
+ }
+
/// Floating-point computation using ARMv8 AArch32 Advanced
/// SIMD instructions remains unchanged from ARMv7. Only AArch64 SIMD
/// is IEEE-754 compliant, but it's not covered in this target.
@@ -143,6 +148,8 @@ public:
return ST->getMaxInterleaveFactor();
}
+ int getMemcpyCost(const Instruction *I);
+
int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
@@ -173,6 +180,12 @@ public:
bool UseMaskForCond = false,
bool UseMaskForGaps = false);
+ bool isLoweredToCall(const Function *F);
+ bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
+ AssumptionCache &AC,
+ TargetLibraryInfo *LibInfo,
+ HardwareLoopInfo &HWLoopInfo);
+
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP);
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 3832b0112b87..1da9452f1d22 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -1,19 +1,20 @@
//===- ARMAsmParser.cpp - Parse ARM assembly to MCInst instructions -------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "ARMFeatures.h"
-#include "InstPrinter/ARMInstPrinter.h"
+#include "ARMBaseInstrInfo.h"
#include "Utils/ARMBaseInfo.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "MCTargetDesc/ARMBaseInfo.h"
+#include "MCTargetDesc/ARMInstPrinter.h"
#include "MCTargetDesc/ARMMCExpr.h"
#include "MCTargetDesc/ARMMCTargetDesc.h"
+#include "TargetInfo/ARMTargetInfo.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/None.h"
@@ -69,6 +70,10 @@
using namespace llvm;
+namespace llvm {
+extern const MCInstrDesc ARMInsts[];
+} // end namespace llvm
+
namespace {
enum class ImplicitItModeTy { Always, Never, ARMOnly, ThumbOnly };
@@ -90,6 +95,16 @@ static cl::opt<bool> AddBuildAttributes("arm-add-build-attributes",
enum VectorLaneTy { NoLanes, AllLanes, IndexedLane };
+static inline unsigned extractITMaskBit(unsigned Mask, unsigned Position) {
+ // Position==0 means we're not in an IT block at all. Position==1
+ // means we want the first state bit, which is always 0 (Then).
+ // Position==2 means we want the second state bit, stored at bit 3
+ // of Mask, and so on downwards. So (5 - Position) will shift the
+ // right bit down to bit 0, including the always-0 bit at bit 4 for
+ // the mandatory initial Then.
+ return (Mask >> (5 - Position) & 1);
+}
+
class UnwindContext {
using Locs = SmallVector<SMLoc, 4>;
@@ -165,6 +180,7 @@ public:
}
};
+
class ARMAsmParser : public MCTargetAsmParser {
const MCRegisterInfo *MRI;
UnwindContext UC;
@@ -225,11 +241,10 @@ class ARMAsmParser : public MCTargetAsmParser {
}
// Emit the IT instruction
- unsigned Mask = getITMaskEncoding();
MCInst ITInst;
ITInst.setOpcode(ARM::t2IT);
ITInst.addOperand(MCOperand::createImm(ITState.Cond));
- ITInst.addOperand(MCOperand::createImm(Mask));
+ ITInst.addOperand(MCOperand::createImm(ITState.Mask));
Out.EmitInstruction(ITInst, getSTI());
// Emit the conditonal instructions
@@ -287,27 +302,10 @@ class ARMAsmParser : public MCTargetAsmParser {
return MRI->getSubReg(QReg, ARM::dsub_0);
}
- // Get the encoding of the IT mask, as it will appear in an IT instruction.
- unsigned getITMaskEncoding() {
- assert(inITBlock());
- unsigned Mask = ITState.Mask;
- unsigned TZ = countTrailingZeros(Mask);
- if ((ITState.Cond & 1) == 0) {
- assert(Mask && TZ <= 3 && "illegal IT mask value!");
- Mask ^= (0xE << TZ) & 0xF;
- }
- return Mask;
- }
-
// Get the condition code corresponding to the current IT block slot.
ARMCC::CondCodes currentITCond() {
- unsigned MaskBit;
- if (ITState.CurPosition == 1)
- MaskBit = 1;
- else
- MaskBit = (ITState.Mask >> (5 - ITState.CurPosition)) & 1;
-
- return MaskBit ? ITState.Cond : ARMCC::getOppositeCondition(ITState.Cond);
+ unsigned MaskBit = extractITMaskBit(ITState.Mask, ITState.CurPosition);
+ return MaskBit ? ARMCC::getOppositeCondition(ITState.Cond) : ITState.Cond;
}
// Invert the condition of the current IT block slot without changing any
@@ -337,7 +335,7 @@ class ARMAsmParser : public MCTargetAsmParser {
// Keep any existing condition bits.
NewMask |= ITState.Mask & (0xE << TZ);
// Insert the new condition bit.
- NewMask |= (Cond == ITState.Cond) << TZ;
+ NewMask |= (Cond != ITState.Cond) << TZ;
// Move the trailing 1 down one bit.
NewMask |= 1 << (TZ - 1);
ITState.Mask = NewMask;
@@ -352,9 +350,10 @@ class ARMAsmParser : public MCTargetAsmParser {
ITState.IsExplicit = false;
}
- // Create a new explicit IT block with the given condition and mask. The mask
- // should be in the parsed format, with a 1 implying 't', regardless of the
- // low bit of the condition.
+ // Create a new explicit IT block with the given condition and mask.
+ // The mask should be in the format used in ARMOperand and
+ // MCOperand, with a 1 implying 'e', regardless of the low bit of
+ // the condition.
void startExplicitITBlock(ARMCC::CondCodes Cond, unsigned Mask) {
assert(!inITBlock());
ITState.Cond = Cond;
@@ -363,6 +362,18 @@ class ARMAsmParser : public MCTargetAsmParser {
ITState.IsExplicit = true;
}
+ struct {
+ unsigned Mask : 4;
+ unsigned CurPosition;
+ } VPTState;
+ bool inVPTBlock() { return VPTState.CurPosition != ~0U; }
+ void forwardVPTPosition() {
+ if (!inVPTBlock()) return;
+ unsigned TZ = countTrailingZeros(VPTState.Mask);
+ if (++VPTState.CurPosition == 5 - TZ)
+ VPTState.CurPosition = ~0U;
+ }
+
void Note(SMLoc L, const Twine &Msg, SMRange Range = None) {
return getParser().Note(L, Msg, Range);
}
@@ -383,7 +394,7 @@ class ARMAsmParser : public MCTargetAsmParser {
int tryParseRegister();
bool tryParseRegisterWithWriteBack(OperandVector &);
int tryParseShiftRegister(OperandVector &);
- bool parseRegisterList(OperandVector &);
+ bool parseRegisterList(OperandVector &, bool EnforceOrder = true);
bool parseMemory(OperandVector &);
bool parseOperand(OperandVector &, StringRef Mnemonic);
bool parsePrefix(ARMMCExpr::VariantKind &RefKind);
@@ -421,12 +432,15 @@ class ARMAsmParser : public MCTargetAsmParser {
bool parseDirectiveAlign(SMLoc L);
bool parseDirectiveThumbSet(SMLoc L);
- StringRef splitMnemonic(StringRef Mnemonic, unsigned &PredicationCode,
- bool &CarrySetting, unsigned &ProcessorIMod,
- StringRef &ITMask);
- void getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst,
- bool &CanAcceptCarrySet,
- bool &CanAcceptPredicationCode);
+ bool isMnemonicVPTPredicable(StringRef Mnemonic, StringRef ExtraToken);
+ StringRef splitMnemonic(StringRef Mnemonic, StringRef ExtraToken,
+ unsigned &PredicationCode,
+ unsigned &VPTPredicationCode, bool &CarrySetting,
+ unsigned &ProcessorIMod, StringRef &ITMask);
+ void getMnemonicAcceptInfo(StringRef Mnemonic, StringRef ExtraToken,
+ StringRef FullInst, bool &CanAcceptCarrySet,
+ bool &CanAcceptPredicationCode,
+ bool &CanAcceptVPTPredicationCode);
void tryConvertingToTwoOperandForm(StringRef Mnemonic, bool CarrySetting,
OperandVector &Operands);
@@ -478,7 +492,15 @@ class ARMAsmParser : public MCTargetAsmParser {
bool hasV8MMainline() const {
return getSTI().getFeatureBits()[ARM::HasV8MMainlineOps];
}
-
+ bool hasV8_1MMainline() const {
+ return getSTI().getFeatureBits()[ARM::HasV8_1MMainlineOps];
+ }
+ bool hasMVE() const {
+ return getSTI().getFeatureBits()[ARM::HasMVEIntegerOps];
+ }
+ bool hasMVEFloat() const {
+ return getSTI().getFeatureBits()[ARM::HasMVEFloatOps];
+ }
bool has8MSecExt() const {
return getSTI().getFeatureBits()[ARM::Feature8MSecExt];
}
@@ -491,8 +513,8 @@ class ARMAsmParser : public MCTargetAsmParser {
return getSTI().getFeatureBits()[ARM::FeatureDSP];
}
- bool hasD16() const {
- return getSTI().getFeatureBits()[ARM::FeatureD16];
+ bool hasD32() const {
+ return getSTI().getFeatureBits()[ARM::FeatureD32];
}
bool hasV8_1aOps() const {
@@ -505,7 +527,7 @@ class ARMAsmParser : public MCTargetAsmParser {
void SwitchMode() {
MCSubtargetInfo &STI = copySTI();
- uint64_t FB = ComputeAvailableFeatures(STI.ToggleFeature(ARM::ModeThumb));
+ auto FB = ComputeAvailableFeatures(STI.ToggleFeature(ARM::ModeThumb));
setAvailableFeatures(FB);
}
@@ -556,11 +578,13 @@ class ARMAsmParser : public MCTargetAsmParser {
// Asm Match Converter Methods
void cvtThumbMultiply(MCInst &Inst, const OperandVector &);
void cvtThumbBranches(MCInst &Inst, const OperandVector &);
+ void cvtMVEVMOVQtoDReg(MCInst &Inst, const OperandVector &);
bool validateInstruction(MCInst &Inst, const OperandVector &Ops);
bool processInstruction(MCInst &Inst, const OperandVector &Ops, MCStreamer &Out);
bool shouldOmitCCOutOperand(StringRef Mnemonic, OperandVector &Operands);
bool shouldOmitPredicateOperand(StringRef Mnemonic, OperandVector &Operands);
+ bool shouldOmitVectorPredicateOperand(StringRef Mnemonic, OperandVector &Operands);
bool isITBlockTerminator(MCInst &Inst) const;
void fixupGNULDRDAlias(StringRef Mnemonic, OperandVector &Operands);
bool validateLDRDSTRD(MCInst &Inst, const OperandVector &Operands,
@@ -597,6 +621,8 @@ public:
// Not in an ITBlock to start with.
ITState.CurPosition = ~0U;
+ VPTState.CurPosition = ~0U;
+
NextSymbolIsThumb = false;
}
@@ -642,6 +668,7 @@ public:
class ARMOperand : public MCParsedAsmOperand {
enum KindTy {
k_CondCode,
+ k_VPTPred,
k_CCOut,
k_ITCondMask,
k_CoprocNum,
@@ -659,8 +686,11 @@ class ARMOperand : public MCParsedAsmOperand {
k_VectorIndex,
k_Register,
k_RegisterList,
+ k_RegisterListWithAPSR,
k_DPRRegisterList,
k_SPRRegisterList,
+ k_FPSRegisterListWithVPR,
+ k_FPDRegisterListWithVPR,
k_VectorList,
k_VectorListAllLanes,
k_VectorListIndexed,
@@ -681,6 +711,10 @@ class ARMOperand : public MCParsedAsmOperand {
ARMCC::CondCodes Val;
};
+ struct VCCOp {
+ ARMVCC::VPTCodes Val;
+ };
+
struct CopOp {
unsigned Val;
};
@@ -797,6 +831,7 @@ class ARMOperand : public MCParsedAsmOperand {
union {
struct CCOp CC;
+ struct VCCOp VCC;
struct CopOp Cop;
struct CoprocOptionOp CoprocOption;
struct MBOptOp MBOpt;
@@ -845,6 +880,11 @@ public:
return CC.Val;
}
+ ARMVCC::VPTCodes getVPTPred() const {
+ assert(isVPTPred() && "Invalid access!");
+ return VCC.Val;
+ }
+
unsigned getCoproc() const {
assert((Kind == k_CoprocNum || Kind == k_CoprocReg) && "Invalid access!");
return Cop.Val;
@@ -861,8 +901,11 @@ public:
}
const SmallVectorImpl<unsigned> &getRegList() const {
- assert((Kind == k_RegisterList || Kind == k_DPRRegisterList ||
- Kind == k_SPRRegisterList) && "Invalid access!");
+ assert((Kind == k_RegisterList || Kind == k_RegisterListWithAPSR ||
+ Kind == k_DPRRegisterList || Kind == k_SPRRegisterList ||
+ Kind == k_FPSRegisterListWithVPR ||
+ Kind == k_FPDRegisterListWithVPR) &&
+ "Invalid access!");
return Registers;
}
@@ -915,6 +958,7 @@ public:
bool isCoprocReg() const { return Kind == k_CoprocReg; }
bool isCoprocOption() const { return Kind == k_CoprocOption; }
bool isCondCode() const { return Kind == k_CondCode; }
+ bool isVPTPred() const { return Kind == k_VPTPred; }
bool isCCOut() const { return Kind == k_CCOut; }
bool isITMask() const { return Kind == k_ITCondMask; }
bool isITCondCode() const { return Kind == k_CondCode; }
@@ -970,6 +1014,18 @@ public:
return false;
}
+ // checks whether this operand is an offset suitable for the LE /
+ // LETP instructions in Arm v8.1M
+ bool isLEOffset() const {
+ if (!isImm()) return false;
+ if (isa<MCSymbolRefExpr>(Imm.Val)) return true;
+ if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Imm.Val)) {
+ int64_t Val = CE->getValue();
+ return Val < 0 && Val >= -4094 && (Val & 1) == 0;
+ }
+ return false;
+ }
+
// checks whether this operand is a memory operand computed as an offset
// applied to PC. the offset may have 8 bits of magnitude and is represented
// with two bits of shift. textually it may be either [pc, #imm], #imm or
@@ -982,7 +1038,7 @@ public:
if (!CE) return false;
Val = CE->getValue();
}
- else if (isMem()) {
+ else if (isGPRMem()) {
if(!Memory.OffsetImm || Memory.OffsetRegNum) return false;
if(Memory.BaseRegNum != ARM::PC) return false;
Val = Memory.OffsetImm->getValue();
@@ -1016,7 +1072,14 @@ public:
int64_t Value = CE->getValue();
return ((Value & 3) == 0) && Value >= N && Value <= M;
}
-
+ template<int64_t N, int64_t M>
+ bool isImmediateS2() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return ((Value & 1) == 0) && Value >= N && Value <= M;
+ }
bool isFBits16() const {
return isImmediate<0, 17>();
}
@@ -1026,6 +1089,21 @@ public:
bool isImm8s4() const {
return isImmediateS4<-1020, 1020>();
}
+ bool isImm7s4() const {
+ return isImmediateS4<-508, 508>();
+ }
+ bool isImm7Shift0() const {
+ return isImmediate<-127, 127>();
+ }
+ bool isImm7Shift1() const {
+ return isImmediateS2<-255, 255>();
+ }
+ bool isImm7Shift2() const {
+ return isImmediateS4<-511, 511>();
+ }
+ bool isImm7() const {
+ return isImmediate<-127, 127>();
+ }
bool isImm0_1020s4() const {
return isImmediateS4<0, 1020>();
}
@@ -1098,6 +1176,34 @@ public:
return isImmediate<1, 33>();
}
+ template<int shift>
+ bool isExpImmValue(uint64_t Value) const {
+ uint64_t mask = (1 << shift) - 1;
+ if ((Value & mask) != 0 || (Value >> shift) > 0xff)
+ return false;
+ return true;
+ }
+
+ template<int shift>
+ bool isExpImm() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+
+ return isExpImmValue<shift>(CE->getValue());
+ }
+
+ template<int shift, int size>
+ bool isInvertedExpImm() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+
+ uint64_t OriginalValue = CE->getValue();
+ uint64_t InvertedValue = OriginalValue ^ (((uint64_t)1 << size) - 1);
+ return isExpImmValue<shift>(InvertedValue);
+ }
+
bool isPKHLSLImm() const {
return isImmediate<0, 32>();
}
@@ -1167,13 +1273,34 @@ public:
bool isReg() const override { return Kind == k_Register; }
bool isRegList() const { return Kind == k_RegisterList; }
+ bool isRegListWithAPSR() const {
+ return Kind == k_RegisterListWithAPSR || Kind == k_RegisterList;
+ }
bool isDPRRegList() const { return Kind == k_DPRRegisterList; }
bool isSPRRegList() const { return Kind == k_SPRRegisterList; }
+ bool isFPSRegListWithVPR() const { return Kind == k_FPSRegisterListWithVPR; }
+ bool isFPDRegListWithVPR() const { return Kind == k_FPDRegisterListWithVPR; }
bool isToken() const override { return Kind == k_Token; }
bool isMemBarrierOpt() const { return Kind == k_MemBarrierOpt; }
bool isInstSyncBarrierOpt() const { return Kind == k_InstSyncBarrierOpt; }
bool isTraceSyncBarrierOpt() const { return Kind == k_TraceSyncBarrierOpt; }
bool isMem() const override {
+ return isGPRMem() || isMVEMem();
+ }
+ bool isMVEMem() const {
+ if (Kind != k_Memory)
+ return false;
+ if (Memory.BaseRegNum &&
+ !ARMMCRegisterClasses[ARM::GPRRegClassID].contains(Memory.BaseRegNum) &&
+ !ARMMCRegisterClasses[ARM::MQPRRegClassID].contains(Memory.BaseRegNum))
+ return false;
+ if (Memory.OffsetRegNum &&
+ !ARMMCRegisterClasses[ARM::MQPRRegClassID].contains(
+ Memory.OffsetRegNum))
+ return false;
+ return true;
+ }
+ bool isGPRMem() const {
if (Kind != k_Memory)
return false;
if (Memory.BaseRegNum &&
@@ -1198,6 +1325,16 @@ public:
RegShiftedImm.SrcReg);
}
bool isRotImm() const { return Kind == k_RotateImmediate; }
+
+ template<unsigned Min, unsigned Max>
+ bool isPowerTwoInRange() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value > 0 && countPopulation((uint64_t)Value) == 1 &&
+ Value >= Min && Value <= Max;
+ }
bool isModImm() const { return Kind == k_ModifiedImmediate; }
bool isModImmNot() const {
@@ -1243,14 +1380,50 @@ public:
return isPostIdxRegShifted() && PostIdxReg.ShiftTy == ARM_AM::no_shift;
}
bool isMemNoOffset(bool alignOK = false, unsigned Alignment = 0) const {
- if (!isMem())
+ if (!isGPRMem())
+ return false;
+ // No offset of any kind.
+ return Memory.OffsetRegNum == 0 && Memory.OffsetImm == nullptr &&
+ (alignOK || Memory.Alignment == Alignment);
+ }
+ bool isMemNoOffsetT2(bool alignOK = false, unsigned Alignment = 0) const {
+ if (!isGPRMem())
+ return false;
+
+ if (!ARMMCRegisterClasses[ARM::GPRnopcRegClassID].contains(
+ Memory.BaseRegNum))
+ return false;
+
+ // No offset of any kind.
+ return Memory.OffsetRegNum == 0 && Memory.OffsetImm == nullptr &&
+ (alignOK || Memory.Alignment == Alignment);
+ }
+ bool isMemNoOffsetT2NoSp(bool alignOK = false, unsigned Alignment = 0) const {
+ if (!isGPRMem())
+ return false;
+
+ if (!ARMMCRegisterClasses[ARM::rGPRRegClassID].contains(
+ Memory.BaseRegNum))
return false;
+
+ // No offset of any kind.
+ return Memory.OffsetRegNum == 0 && Memory.OffsetImm == nullptr &&
+ (alignOK || Memory.Alignment == Alignment);
+ }
+ bool isMemNoOffsetT(bool alignOK = false, unsigned Alignment = 0) const {
+ if (!isGPRMem())
+ return false;
+
+ if (!ARMMCRegisterClasses[ARM::tGPRRegClassID].contains(
+ Memory.BaseRegNum))
+ return false;
+
// No offset of any kind.
return Memory.OffsetRegNum == 0 && Memory.OffsetImm == nullptr &&
(alignOK || Memory.Alignment == Alignment);
}
bool isMemPCRelImm12() const {
- if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+ if (!isGPRMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
return false;
// Base register must be PC.
if (Memory.BaseRegNum != ARM::PC)
@@ -1337,7 +1510,7 @@ public:
}
bool isAddrMode2() const {
- if (!isMem() || Memory.Alignment != 0) return false;
+ if (!isGPRMem() || Memory.Alignment != 0) return false;
// Check for register offset.
if (Memory.OffsetRegNum) return true;
// Immediate offset in range [-4095, 4095].
@@ -1362,7 +1535,7 @@ public:
// and we reject it.
if (isImm() && !isa<MCConstantExpr>(getImm()))
return true;
- if (!isMem() || Memory.Alignment != 0) return false;
+ if (!isGPRMem() || Memory.Alignment != 0) return false;
// No shifts are legal for AM3.
if (Memory.ShiftType != ARM_AM::no_shift) return false;
// Check for register offset.
@@ -1396,7 +1569,7 @@ public:
// and we reject it.
if (isImm() && !isa<MCConstantExpr>(getImm()))
return true;
- if (!isMem() || Memory.Alignment != 0) return false;
+ if (!isGPRMem() || Memory.Alignment != 0) return false;
// Check for register offset.
if (Memory.OffsetRegNum) return false;
// Immediate offset in range [-1020, 1020] and a multiple of 4.
@@ -1412,7 +1585,7 @@ public:
// and we reject it.
if (isImm() && !isa<MCConstantExpr>(getImm()))
return true;
- if (!isMem() || Memory.Alignment != 0) return false;
+ if (!isGPRMem() || Memory.Alignment != 0) return false;
// Check for register offset.
if (Memory.OffsetRegNum) return false;
// Immediate offset in range [-510, 510] and a multiple of 2.
@@ -1423,14 +1596,14 @@ public:
}
bool isMemTBB() const {
- if (!isMem() || !Memory.OffsetRegNum || Memory.isNegative ||
+ if (!isGPRMem() || !Memory.OffsetRegNum || Memory.isNegative ||
Memory.ShiftType != ARM_AM::no_shift || Memory.Alignment != 0)
return false;
return true;
}
bool isMemTBH() const {
- if (!isMem() || !Memory.OffsetRegNum || Memory.isNegative ||
+ if (!isGPRMem() || !Memory.OffsetRegNum || Memory.isNegative ||
Memory.ShiftType != ARM_AM::lsl || Memory.ShiftImm != 1 ||
Memory.Alignment != 0 )
return false;
@@ -1438,13 +1611,13 @@ public:
}
bool isMemRegOffset() const {
- if (!isMem() || !Memory.OffsetRegNum || Memory.Alignment != 0)
+ if (!isGPRMem() || !Memory.OffsetRegNum || Memory.Alignment != 0)
return false;
return true;
}
bool isT2MemRegOffset() const {
- if (!isMem() || !Memory.OffsetRegNum || Memory.isNegative ||
+ if (!isGPRMem() || !Memory.OffsetRegNum || Memory.isNegative ||
Memory.Alignment != 0 || Memory.BaseRegNum == ARM::PC)
return false;
// Only lsl #{0, 1, 2, 3} allowed.
@@ -1458,7 +1631,7 @@ public:
bool isMemThumbRR() const {
// Thumb reg+reg addressing is simple. Just two registers, a base and
// an offset. No shifts, negations or any other complicating factors.
- if (!isMem() || !Memory.OffsetRegNum || Memory.isNegative ||
+ if (!isGPRMem() || !Memory.OffsetRegNum || Memory.isNegative ||
Memory.ShiftType != ARM_AM::no_shift || Memory.Alignment != 0)
return false;
return isARMLowRegister(Memory.BaseRegNum) &&
@@ -1466,7 +1639,7 @@ public:
}
bool isMemThumbRIs4() const {
- if (!isMem() || Memory.OffsetRegNum != 0 ||
+ if (!isGPRMem() || Memory.OffsetRegNum != 0 ||
!isARMLowRegister(Memory.BaseRegNum) || Memory.Alignment != 0)
return false;
// Immediate offset, multiple of 4 in range [0, 124].
@@ -1476,7 +1649,7 @@ public:
}
bool isMemThumbRIs2() const {
- if (!isMem() || Memory.OffsetRegNum != 0 ||
+ if (!isGPRMem() || Memory.OffsetRegNum != 0 ||
!isARMLowRegister(Memory.BaseRegNum) || Memory.Alignment != 0)
return false;
// Immediate offset, multiple of 4 in range [0, 62].
@@ -1486,7 +1659,7 @@ public:
}
bool isMemThumbRIs1() const {
- if (!isMem() || Memory.OffsetRegNum != 0 ||
+ if (!isGPRMem() || Memory.OffsetRegNum != 0 ||
!isARMLowRegister(Memory.BaseRegNum) || Memory.Alignment != 0)
return false;
// Immediate offset in range [0, 31].
@@ -1496,7 +1669,7 @@ public:
}
bool isMemThumbSPI() const {
- if (!isMem() || Memory.OffsetRegNum != 0 ||
+ if (!isGPRMem() || Memory.OffsetRegNum != 0 ||
Memory.BaseRegNum != ARM::SP || Memory.Alignment != 0)
return false;
// Immediate offset, multiple of 4 in range [0, 1020].
@@ -1511,7 +1684,7 @@ public:
// and we reject it.
if (isImm() && !isa<MCConstantExpr>(getImm()))
return true;
- if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+ if (!isGPRMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
return false;
// Immediate offset a multiple of 4 in range [-1020, 1020].
if (!Memory.OffsetImm) return true;
@@ -1520,9 +1693,24 @@ public:
return (Val >= -1020 && Val <= 1020 && (Val & 3) == 0) ||
Val == std::numeric_limits<int32_t>::min();
}
-
+ bool isMemImm7s4Offset() const {
+ // If we have an immediate that's not a constant, treat it as a label
+ // reference needing a fixup. If it is a constant, it's something else
+ // and we reject it.
+ if (isImm() && !isa<MCConstantExpr>(getImm()))
+ return true;
+ if (!isGPRMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0 ||
+ !ARMMCRegisterClasses[ARM::GPRnopcRegClassID].contains(
+ Memory.BaseRegNum))
+ return false;
+ // Immediate offset a multiple of 4 in range [-508, 508].
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+ // Special case, #-0 is INT32_MIN.
+ return (Val >= -508 && Val <= 508 && (Val & 3) == 0) || Val == INT32_MIN;
+ }
bool isMemImm0_1020s4Offset() const {
- if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+ if (!isGPRMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
return false;
// Immediate offset a multiple of 4 in range [0, 1020].
if (!Memory.OffsetImm) return true;
@@ -1531,7 +1719,7 @@ public:
}
bool isMemImm8Offset() const {
- if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+ if (!isGPRMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
return false;
// Base reg of PC isn't allowed for these encodings.
if (Memory.BaseRegNum == ARM::PC) return false;
@@ -1542,8 +1730,81 @@ public:
(Val > -256 && Val < 256);
}
+ template<unsigned Bits, unsigned RegClassID>
+ bool isMemImm7ShiftedOffset() const {
+ if (!isGPRMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0 ||
+ !ARMMCRegisterClasses[RegClassID].contains(Memory.BaseRegNum))
+ return false;
+
+ // Expect an immediate offset equal to an element of the range
+ // [-127, 127], shifted left by Bits.
+
+ if (!Memory.OffsetImm) return true;
+ int64_t Val = Memory.OffsetImm->getValue();
+
+ // INT32_MIN is a special-case value (indicating the encoding with
+ // zero offset and the subtract bit set)
+ if (Val == INT32_MIN)
+ return true;
+
+ unsigned Divisor = 1U << Bits;
+
+ // Check that the low bits are zero
+ if (Val % Divisor != 0)
+ return false;
+
+ // Check that the remaining offset is within range.
+ Val /= Divisor;
+ return (Val >= -127 && Val <= 127);
+ }
+
+ template <int shift> bool isMemRegRQOffset() const {
+ if (!isMVEMem() || Memory.OffsetImm != 0 || Memory.Alignment != 0)
+ return false;
+
+ if (!ARMMCRegisterClasses[ARM::GPRnopcRegClassID].contains(
+ Memory.BaseRegNum))
+ return false;
+ if (!ARMMCRegisterClasses[ARM::MQPRRegClassID].contains(
+ Memory.OffsetRegNum))
+ return false;
+
+ if (shift == 0 && Memory.ShiftType != ARM_AM::no_shift)
+ return false;
+
+ if (shift > 0 &&
+ (Memory.ShiftType != ARM_AM::uxtw || Memory.ShiftImm != shift))
+ return false;
+
+ return true;
+ }
+
+ template <int shift> bool isMemRegQOffset() const {
+ if (!isMVEMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+ return false;
+
+ if (!ARMMCRegisterClasses[ARM::MQPRRegClassID].contains(
+ Memory.BaseRegNum))
+ return false;
+
+ if(!Memory.OffsetImm) return true;
+ static_assert(shift < 56,
+ "Such that we dont shift by a value higher than 62");
+ int64_t Val = Memory.OffsetImm->getValue();
+
+ // The value must be a multiple of (1 << shift)
+ if ((Val & ((1U << shift) - 1)) != 0)
+ return false;
+
+ // And be in the right range, depending on the amount that it is shifted
+ // by. Shift 0, is equal to 7 unsigned bits, the sign bit is set
+ // separately.
+ int64_t Range = (1U << (7+shift)) - 1;
+ return (Val == INT32_MIN) || (Val > -Range && Val < Range);
+ }
+
bool isMemPosImm8Offset() const {
- if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+ if (!isGPRMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
return false;
// Immediate offset in range [0, 255].
if (!Memory.OffsetImm) return true;
@@ -1552,7 +1813,7 @@ public:
}
bool isMemNegImm8Offset() const {
- if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+ if (!isGPRMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
return false;
// Base reg of PC isn't allowed for these encodings.
if (Memory.BaseRegNum == ARM::PC) return false;
@@ -1564,7 +1825,7 @@ public:
}
bool isMemUImm12Offset() const {
- if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+ if (!isGPRMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
return false;
// Immediate offset in range [0, 4095].
if (!Memory.OffsetImm) return true;
@@ -1580,7 +1841,7 @@ public:
if (isImm() && !isa<MCConstantExpr>(getImm()))
return true;
- if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+ if (!isGPRMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
return false;
// Immediate offset in range [-4095, 4095].
if (!Memory.OffsetImm) return true;
@@ -1631,6 +1892,12 @@ public:
return VectorList.Count == 1;
}
+ bool isVecListTwoMQ() const {
+ return isSingleSpacedVectorList() && VectorList.Count == 2 &&
+ ARMMCRegisterClasses[ARM::MQPRRegClassID].contains(
+ VectorList.RegNum);
+ }
+
bool isVecListDPair() const {
if (!isSingleSpacedVectorList()) return false;
return (ARMMCRegisterClasses[ARM::DPairRegClassID]
@@ -1664,6 +1931,12 @@ public:
return VectorList.Count == 4;
}
+ bool isVecListFourMQ() const {
+ return isSingleSpacedVectorList() && VectorList.Count == 4 &&
+ ARMMCRegisterClasses[ARM::MQPRRegClassID].contains(
+ VectorList.RegNum);
+ }
+
bool isSingleSpacedVectorAllLanes() const {
return Kind == k_VectorListAllLanes && !VectorList.isDoubleSpaced;
}
@@ -1806,23 +2079,24 @@ public:
return VectorList.Count == 4 && VectorList.LaneIndex <= 1;
}
- bool isVectorIndex8() const {
- if (Kind != k_VectorIndex) return false;
- return VectorIndex.Val < 8;
- }
+ bool isVectorIndex() const { return Kind == k_VectorIndex; }
- bool isVectorIndex16() const {
+ template <unsigned NumLanes>
+ bool isVectorIndexInRange() const {
if (Kind != k_VectorIndex) return false;
- return VectorIndex.Val < 4;
+ return VectorIndex.Val < NumLanes;
}
- bool isVectorIndex32() const {
- if (Kind != k_VectorIndex) return false;
- return VectorIndex.Val < 2;
- }
- bool isVectorIndex64() const {
+ bool isVectorIndex8() const { return isVectorIndexInRange<8>(); }
+ bool isVectorIndex16() const { return isVectorIndexInRange<4>(); }
+ bool isVectorIndex32() const { return isVectorIndexInRange<2>(); }
+ bool isVectorIndex64() const { return isVectorIndexInRange<1>(); }
+
+ template<int PermittedValue, int OtherPermittedValue>
+ bool isMVEPairVectorIndex() const {
if (Kind != k_VectorIndex) return false;
- return VectorIndex.Val < 1;
+ return VectorIndex.Val == PermittedValue ||
+ VectorIndex.Val == OtherPermittedValue;
}
bool isNEONi8splat() const {
@@ -1992,6 +2266,51 @@ public:
return (Value % Angle == Remainder && Value <= 270);
}
+ bool isMVELongShift() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ // Must be a constant.
+ if (!CE) return false;
+ uint64_t Value = CE->getValue();
+ return Value >= 1 && Value <= 32;
+ }
+
+ bool isITCondCodeNoAL() const {
+ if (!isITCondCode()) return false;
+ ARMCC::CondCodes CC = getCondCode();
+ return CC != ARMCC::AL;
+ }
+
+ bool isITCondCodeRestrictedI() const {
+ if (!isITCondCode())
+ return false;
+ ARMCC::CondCodes CC = getCondCode();
+ return CC == ARMCC::EQ || CC == ARMCC::NE;
+ }
+
+ bool isITCondCodeRestrictedS() const {
+ if (!isITCondCode())
+ return false;
+ ARMCC::CondCodes CC = getCondCode();
+ return CC == ARMCC::LT || CC == ARMCC::GT || CC == ARMCC::LE ||
+ CC == ARMCC::GE;
+ }
+
+ bool isITCondCodeRestrictedU() const {
+ if (!isITCondCode())
+ return false;
+ ARMCC::CondCodes CC = getCondCode();
+ return CC == ARMCC::HS || CC == ARMCC::HI;
+ }
+
+ bool isITCondCodeRestrictedFP() const {
+ if (!isITCondCode())
+ return false;
+ ARMCC::CondCodes CC = getCondCode();
+ return CC == ARMCC::EQ || CC == ARMCC::NE || CC == ARMCC::LT ||
+ CC == ARMCC::GT || CC == ARMCC::LE || CC == ARMCC::GE;
+ }
+
void addExpr(MCInst &Inst, const MCExpr *Expr) const {
// Add as immediates when possible. Null MCExpr = 0.
if (!Expr)
@@ -2019,6 +2338,30 @@ public:
Inst.addOperand(MCOperand::createReg(RegNum));
}
+ void addVPTPredNOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createImm(unsigned(getVPTPred())));
+ unsigned RegNum = getVPTPred() == ARMVCC::None ? 0: ARM::P0;
+ Inst.addOperand(MCOperand::createReg(RegNum));
+ }
+
+ void addVPTPredROperands(MCInst &Inst, unsigned N) const {
+ assert(N == 3 && "Invalid number of operands!");
+ addVPTPredNOperands(Inst, N-1);
+ unsigned RegNum;
+ if (getVPTPred() == ARMVCC::None) {
+ RegNum = 0;
+ } else {
+ unsigned NextOpIndex = Inst.getNumOperands();
+ const MCInstrDesc &MCID = ARMInsts[Inst.getOpcode()];
+ int TiedOp = MCID.getOperandConstraint(NextOpIndex, MCOI::TIED_TO);
+ assert(TiedOp >= 0 &&
+ "Inactive register in vpred_r is not tied to an output!");
+ RegNum = Inst.getOperand(TiedOp).getReg();
+ }
+ Inst.addOperand(MCOperand::createReg(RegNum));
+ }
+
void addCoprocNumOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
Inst.addOperand(MCOperand::createImm(getCoproc()));
@@ -2044,6 +2387,11 @@ public:
Inst.addOperand(MCOperand::createImm(unsigned(getCondCode())));
}
+ void addITCondCodeInvOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createImm(unsigned(ARMCC::getOppositeCondition(getCondCode()))));
+ }
+
void addCCOutOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
Inst.addOperand(MCOperand::createReg(getReg()));
@@ -2089,6 +2437,14 @@ public:
Inst.addOperand(MCOperand::createReg(*I));
}
+ void addRegListWithAPSROperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const SmallVectorImpl<unsigned> &RegList = getRegList();
+ for (SmallVectorImpl<unsigned>::const_iterator
+ I = RegList.begin(), E = RegList.end(); I != E; ++I)
+ Inst.addOperand(MCOperand::createReg(*I));
+ }
+
void addDPRRegListOperands(MCInst &Inst, unsigned N) const {
addRegListOperands(Inst, N);
}
@@ -2097,6 +2453,14 @@ public:
addRegListOperands(Inst, N);
}
+ void addFPSRegListWithVPROperands(MCInst &Inst, unsigned N) const {
+ addRegListOperands(Inst, N);
+ }
+
+ void addFPDRegListWithVPROperands(MCInst &Inst, unsigned N) const {
+ addRegListOperands(Inst, N);
+ }
+
void addRotImmOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
// Encoded as val>>3. The printer handles display as 8, 16, 24.
@@ -2184,6 +2548,42 @@ public:
Inst.addOperand(MCOperand::createImm(CE->getValue()));
}
+ void addImm7s4Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // FIXME: We really want to scale the value here, but the VSTR/VLDR_VSYSR
+ // instruction don't encode operands that way yet.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::createImm(CE->getValue()));
+ }
+
+ void addImm7Shift0Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ assert(CE != nullptr && "Invalid operand type!");
+ Inst.addOperand(MCOperand::createImm(CE->getValue()));
+ }
+
+ void addImm7Shift1Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ assert(CE != nullptr && "Invalid operand type!");
+ Inst.addOperand(MCOperand::createImm(CE->getValue()));
+ }
+
+ void addImm7Shift2Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ assert(CE != nullptr && "Invalid operand type!");
+ Inst.addOperand(MCOperand::createImm(CE->getValue()));
+ }
+
+ void addImm7Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ assert(CE != nullptr && "Invalid operand type!");
+ Inst.addOperand(MCOperand::createImm(CE->getValue()));
+ }
+
void addImm0_1020s4Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
// The immediate is scaled by four in the encoding and is stored
@@ -2293,7 +2693,7 @@ public:
return;
}
- assert(isMem() && "Unknown value type!");
+ assert(isGPRMem() && "Unknown value type!");
assert(isa<MCConstantExpr>(Memory.OffsetImm) && "Unknown value type!");
Inst.addOperand(MCOperand::createImm(Memory.OffsetImm->getValue()));
}
@@ -2318,6 +2718,21 @@ public:
Inst.addOperand(MCOperand::createReg(Memory.BaseRegNum));
}
+ void addMemNoOffsetT2Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createReg(Memory.BaseRegNum));
+ }
+
+ void addMemNoOffsetT2NoSpOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createReg(Memory.BaseRegNum));
+ }
+
+ void addMemNoOffsetTOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createReg(Memory.BaseRegNum));
+ }
+
void addMemPCRelImm12Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
int32_t Imm = Memory.OffsetImm->getValue();
@@ -2535,6 +2950,22 @@ public:
Inst.addOperand(MCOperand::createImm(Val));
}
+ void addMemImm7s4OffsetOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ // If we have an immediate that's not a constant, treat it as a label
+ // reference needing a fixup. If it is a constant, it's something else
+ // and we reject it.
+ if (isImm()) {
+ Inst.addOperand(MCOperand::createExpr(getImm()));
+ Inst.addOperand(MCOperand::createImm(0));
+ return;
+ }
+
+ int64_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() : 0;
+ Inst.addOperand(MCOperand::createReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::createImm(Val));
+ }
+
void addMemImm0_1020s4OffsetOperands(MCInst &Inst, unsigned N) const {
assert(N == 2 && "Invalid number of operands!");
// The lower two bits are always zero and as such are not encoded.
@@ -2543,19 +2974,17 @@ public:
Inst.addOperand(MCOperand::createImm(Val));
}
- void addMemImm8OffsetOperands(MCInst &Inst, unsigned N) const {
+ void addMemImmOffsetOperands(MCInst &Inst, unsigned N) const {
assert(N == 2 && "Invalid number of operands!");
int64_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() : 0;
Inst.addOperand(MCOperand::createReg(Memory.BaseRegNum));
Inst.addOperand(MCOperand::createImm(Val));
}
- void addMemPosImm8OffsetOperands(MCInst &Inst, unsigned N) const {
- addMemImm8OffsetOperands(Inst, N);
- }
-
- void addMemNegImm8OffsetOperands(MCInst &Inst, unsigned N) const {
- addMemImm8OffsetOperands(Inst, N);
+ void addMemRegRQOffsetOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createReg(Memory.BaseRegNum));
+ Inst.addOperand(MCOperand::createReg(Memory.OffsetRegNum));
}
void addMemUImm12OffsetOperands(MCInst &Inst, unsigned N) const {
@@ -2699,6 +3128,12 @@ public:
Inst.addOperand(MCOperand::createImm(Imm));
}
+ void addPowerTwoOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::createImm(CE->getValue()));
+ }
+
void addMSRMaskOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
Inst.addOperand(MCOperand::createImm(unsigned(getMSRMask())));
@@ -2719,6 +3154,37 @@ public:
Inst.addOperand(MCOperand::createReg(VectorList.RegNum));
}
+ void addMVEVecListOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+
+ // When we come here, the VectorList field will identify a range
+ // of q-registers by its base register and length, and it will
+ // have already been error-checked to be the expected length of
+ // range and contain only q-regs in the range q0-q7. So we can
+ // count on the base register being in the range q0-q6 (for 2
+ // regs) or q0-q4 (for 4)
+ //
+ // The MVE instructions taking a register range of this kind will
+ // need an operand in the QQPR or QQQQPR class, representing the
+ // entire range as a unit. So we must translate into that class,
+ // by finding the index of the base register in the MQPR reg
+ // class, and returning the super-register at the corresponding
+ // index in the target class.
+
+ const MCRegisterClass *RC_in = &ARMMCRegisterClasses[ARM::MQPRRegClassID];
+ const MCRegisterClass *RC_out = (VectorList.Count == 2) ?
+ &ARMMCRegisterClasses[ARM::QQPRRegClassID] :
+ &ARMMCRegisterClasses[ARM::QQQQPRRegClassID];
+
+ unsigned I, E = RC_out->getNumRegs();
+ for (I = 0; I < E; I++)
+ if (RC_in->getRegister(I) == VectorList.RegNum)
+ break;
+ assert(I < E && "Invalid vector list start register!");
+
+ Inst.addOperand(MCOperand::createReg(RC_out->getRegister(I)));
+ }
+
void addVecListIndexedOperands(MCInst &Inst, unsigned N) const {
assert(N == 2 && "Invalid number of operands!");
Inst.addOperand(MCOperand::createReg(VectorList.RegNum));
@@ -2745,6 +3211,16 @@ public:
Inst.addOperand(MCOperand::createImm(getVectorIndex()));
}
+ void addMVEVectorIndexOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createImm(getVectorIndex()));
+ }
+
+ void addMVEPairVectorIndexOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createImm(getVectorIndex()));
+ }
+
void addNEONi8splatOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
// The immediate encodes the type of constant as well as the value.
@@ -2913,6 +3389,15 @@ public:
return Op;
}
+ static std::unique_ptr<ARMOperand> CreateVPTPred(ARMVCC::VPTCodes CC,
+ SMLoc S) {
+ auto Op = make_unique<ARMOperand>(k_VPTPred);
+ Op->VCC.Val = CC;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
static std::unique_ptr<ARMOperand> CreateCoprocNum(unsigned CopVal, SMLoc S) {
auto Op = make_unique<ARMOperand>(k_CoprocNum);
Op->Cop.Val = CopVal;
@@ -3044,19 +3529,31 @@ public:
assert(Regs.size() > 0 && "RegList contains no registers?");
KindTy Kind = k_RegisterList;
- if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Regs.front().second))
- Kind = k_DPRRegisterList;
- else if (ARMMCRegisterClasses[ARM::SPRRegClassID].
- contains(Regs.front().second))
- Kind = k_SPRRegisterList;
+ if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(
+ Regs.front().second)) {
+ if (Regs.back().second == ARM::VPR)
+ Kind = k_FPDRegisterListWithVPR;
+ else
+ Kind = k_DPRRegisterList;
+ } else if (ARMMCRegisterClasses[ARM::SPRRegClassID].contains(
+ Regs.front().second)) {
+ if (Regs.back().second == ARM::VPR)
+ Kind = k_FPSRegisterListWithVPR;
+ else
+ Kind = k_SPRRegisterList;
+ }
// Sort based on the register encoding values.
array_pod_sort(Regs.begin(), Regs.end());
+ if (Kind == k_RegisterList && Regs.back().second == ARM::APSR)
+ Kind = k_RegisterListWithAPSR;
+
auto Op = make_unique<ARMOperand>(Kind);
for (SmallVectorImpl<std::pair<unsigned, unsigned>>::const_iterator
I = Regs.begin(), E = Regs.end(); I != E; ++I)
Op->Registers.push_back(I->second);
+
Op->StartLoc = StartLoc;
Op->EndLoc = EndLoc;
return Op;
@@ -3217,15 +3714,18 @@ void ARMOperand::print(raw_ostream &OS) const {
case k_CondCode:
OS << "<ARMCC::" << ARMCondCodeToString(getCondCode()) << ">";
break;
+ case k_VPTPred:
+ OS << "<ARMVCC::" << ARMVPTPredToString(getVPTPred()) << ">";
+ break;
case k_CCOut:
OS << "<ccout " << RegName(getReg()) << ">";
break;
case k_ITCondMask: {
static const char *const MaskStr[] = {
- "(invalid)", "(teee)", "(tee)", "(teet)",
- "(te)", "(tete)", "(tet)", "(tett)",
- "(t)", "(ttee)", "(tte)", "(ttet)",
- "(tt)", "(ttte)", "(ttt)", "(tttt)"
+ "(invalid)", "(tttt)", "(ttt)", "(ttte)",
+ "(tt)", "(ttet)", "(tte)", "(ttee)",
+ "(t)", "(tett)", "(tet)", "(tete)",
+ "(te)", "(teet)", "(tee)", "(teee)",
};
assert((ITMask.Mask & 0xf) == ITMask.Mask);
OS << "<it-mask " << MaskStr[ITMask.Mask] << ">";
@@ -3324,8 +3824,11 @@ void ARMOperand::print(raw_ostream &OS) const {
<< ", width: " << Bitfield.Width << ">";
break;
case k_RegisterList:
+ case k_RegisterListWithAPSR:
case k_DPRRegisterList:
- case k_SPRRegisterList: {
+ case k_SPRRegisterList:
+ case k_FPSRegisterListWithVPR:
+ case k_FPDRegisterListWithVPR: {
OS << "<register_list ";
const SmallVectorImpl<unsigned> &RegList = getRegList();
@@ -3423,7 +3926,7 @@ int ARMAsmParser::tryParseRegister() {
}
// Some FPUs only have 16 D registers, so D16-D31 are invalid
- if (hasD16() && RegNum >= ARM::D16 && RegNum <= ARM::D31)
+ if (!hasD32() && RegNum >= ARM::D16 && RegNum <= ARM::D31)
return -1;
Parser.Lex(); // Eat identifier token.
@@ -3662,11 +4165,10 @@ ARMAsmParser::parseCoprocNumOperand(OperandVector &Operands) {
if (Tok.isNot(AsmToken::Identifier))
return MatchOperand_NoMatch;
- int Num = MatchCoprocessorOperandName(Tok.getString(), 'p');
+ int Num = MatchCoprocessorOperandName(Tok.getString().lower(), 'p');
if (Num == -1)
return MatchOperand_NoMatch;
- // ARMv7 and v8 don't allow cp10/cp11 due to VFP/NEON specific instructions
- if ((hasV7Ops() || hasV8Ops()) && (Num == 10 || Num == 11))
+ if (!isValidCoprocessorNumber(Num, getSTI().getFeatureBits()))
return MatchOperand_NoMatch;
Parser.Lex(); // Eat identifier token.
@@ -3685,7 +4187,7 @@ ARMAsmParser::parseCoprocRegOperand(OperandVector &Operands) {
if (Tok.isNot(AsmToken::Identifier))
return MatchOperand_NoMatch;
- int Reg = MatchCoprocessorOperandName(Tok.getString(), 'c');
+ int Reg = MatchCoprocessorOperandName(Tok.getString().lower(), 'c');
if (Reg == -1)
return MatchOperand_NoMatch;
@@ -3752,7 +4254,8 @@ static unsigned getNextRegister(unsigned Reg) {
}
/// Parse a register list.
-bool ARMAsmParser::parseRegisterList(OperandVector &Operands) {
+bool ARMAsmParser::parseRegisterList(OperandVector &Operands,
+ bool EnforceOrder) {
MCAsmParser &Parser = getParser();
if (Parser.getTok().isNot(AsmToken::LCurly))
return TokError("Token is not a Left Curly Brace");
@@ -3785,6 +4288,8 @@ bool ARMAsmParser::parseRegisterList(OperandVector &Operands) {
RC = &ARMMCRegisterClasses[ARM::DPRRegClassID];
else if (ARMMCRegisterClasses[ARM::SPRRegClassID].contains(Reg))
RC = &ARMMCRegisterClasses[ARM::SPRRegClassID];
+ else if (ARMMCRegisterClasses[ARM::GPRwithAPSRnospRegClassID].contains(Reg))
+ RC = &ARMMCRegisterClasses[ARM::GPRwithAPSRnospRegClassID];
else
return Error(RegLoc, "invalid register in register list");
@@ -3838,14 +4343,32 @@ bool ARMAsmParser::parseRegisterList(OperandVector &Operands) {
Reg = getDRegFromQReg(Reg);
isQReg = true;
}
+ if (!RC->contains(Reg) &&
+ RC->getID() == ARMMCRegisterClasses[ARM::GPRRegClassID].getID() &&
+ ARMMCRegisterClasses[ARM::GPRwithAPSRnospRegClassID].contains(Reg)) {
+ // switch the register classes, as GPRwithAPSRnospRegClassID is a partial
+ // subset of GPRRegClassId except it contains APSR as well.
+ RC = &ARMMCRegisterClasses[ARM::GPRwithAPSRnospRegClassID];
+ }
+ if (Reg == ARM::VPR && (RC == &ARMMCRegisterClasses[ARM::SPRRegClassID] ||
+ RC == &ARMMCRegisterClasses[ARM::DPRRegClassID])) {
+ RC = &ARMMCRegisterClasses[ARM::FPWithVPRRegClassID];
+ EReg = MRI->getEncodingValue(Reg);
+ Registers.push_back(std::pair<unsigned, unsigned>(EReg, Reg));
+ continue;
+ }
// The register must be in the same register class as the first.
if (!RC->contains(Reg))
return Error(RegLoc, "invalid register in register list");
- // List must be monotonically increasing.
- if (MRI->getEncodingValue(Reg) < MRI->getEncodingValue(OldReg)) {
+ // In most cases, the list must be monotonically increasing. An
+ // exception is CLRM, which is order-independent anyway, so
+ // there's no potential for confusion if you write clrm {r2,r1}
+ // instead of clrm {r1,r2}.
+ if (EnforceOrder &&
+ MRI->getEncodingValue(Reg) < MRI->getEncodingValue(OldReg)) {
if (ARMMCRegisterClasses[ARM::GPRRegClassID].contains(Reg))
Warning(RegLoc, "register list not in ascending order");
- else
+ else if (!ARMMCRegisterClasses[ARM::GPRwithAPSRnospRegClassID].contains(Reg))
return Error(RegLoc, "register list not in ascending order");
}
if (MRI->getEncodingValue(Reg) == MRI->getEncodingValue(OldReg)) {
@@ -3855,6 +4378,7 @@ bool ARMAsmParser::parseRegisterList(OperandVector &Operands) {
}
// VFP register lists must also be contiguous.
if (RC != &ARMMCRegisterClasses[ARM::GPRRegClassID] &&
+ RC != &ARMMCRegisterClasses[ARM::GPRwithAPSRnospRegClassID] &&
Reg != OldReg + 1)
return Error(RegLoc, "non-contiguous register range");
EReg = MRI->getEncodingValue(Reg);
@@ -3944,7 +4468,7 @@ ARMAsmParser::parseVectorList(OperandVector &Operands) {
// As an extension (to match gas), support a plain D register or Q register
// (without encosing curly braces) as a single or double entry list,
// respectively.
- if (Parser.getTok().is(AsmToken::Identifier)) {
+ if (!hasMVE() && Parser.getTok().is(AsmToken::Identifier)) {
SMLoc E = Parser.getTok().getEndLoc();
int Reg = tryParseRegister();
if (Reg == -1)
@@ -4012,9 +4536,14 @@ ARMAsmParser::parseVectorList(OperandVector &Operands) {
unsigned Count = 1;
int Spacing = 0;
unsigned FirstReg = Reg;
+
+ if (hasMVE() && !ARMMCRegisterClasses[ARM::MQPRRegClassID].contains(Reg)) {
+ Error(Parser.getTok().getLoc(), "vector register in range Q0-Q7 expected");
+ return MatchOperand_ParseFail;
+ }
// The list is of D registers, but we also allow Q regs and just interpret
// them as the two D sub-registers.
- if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) {
+ else if (!hasMVE() && ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) {
FirstReg = Reg = getDRegFromQReg(Reg);
Spacing = 1; // double-spacing requires explicit D registers, otherwise
// it's ambiguous with four-register single spaced.
@@ -4044,14 +4573,17 @@ ARMAsmParser::parseVectorList(OperandVector &Operands) {
return MatchOperand_ParseFail;
}
// Allow Q regs and just interpret them as the two D sub-registers.
- if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(EndReg))
+ if (!hasMVE() && ARMMCRegisterClasses[ARM::QPRRegClassID].contains(EndReg))
EndReg = getDRegFromQReg(EndReg) + 1;
// If the register is the same as the start reg, there's nothing
// more to do.
if (Reg == EndReg)
continue;
// The register must be in the same register class as the first.
- if (!ARMMCRegisterClasses[ARM::DPRRegClassID].contains(EndReg)) {
+ if ((hasMVE() &&
+ !ARMMCRegisterClasses[ARM::MQPRRegClassID].contains(EndReg)) ||
+ (!hasMVE() &&
+ !ARMMCRegisterClasses[ARM::DPRRegClassID].contains(EndReg))) {
Error(AfterMinusLoc, "invalid register in register list");
return MatchOperand_ParseFail;
}
@@ -4084,13 +4616,21 @@ ARMAsmParser::parseVectorList(OperandVector &Operands) {
Error(RegLoc, "register expected");
return MatchOperand_ParseFail;
}
+
+ if (hasMVE()) {
+ if (!ARMMCRegisterClasses[ARM::MQPRRegClassID].contains(Reg)) {
+ Error(RegLoc, "vector register in range Q0-Q7 expected");
+ return MatchOperand_ParseFail;
+ }
+ Spacing = 1;
+ }
// vector register lists must be contiguous.
// It's OK to use the enumeration values directly here rather, as the
// VFP register classes have the enum sorted properly.
//
// The list is of D registers, but we also allow Q regs and just interpret
// them as the two D sub-registers.
- if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) {
+ else if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) {
if (!Spacing)
Spacing = 1; // Register range implies a single spaced list.
else if (Spacing == 2) {
@@ -4151,30 +4691,20 @@ ARMAsmParser::parseVectorList(OperandVector &Operands) {
switch (LaneKind) {
case NoLanes:
+ case AllLanes: {
// Two-register operands have been converted to the
// composite register classes.
- if (Count == 2) {
- const MCRegisterClass *RC = (Spacing == 1) ?
- &ARMMCRegisterClasses[ARM::DPairRegClassID] :
- &ARMMCRegisterClasses[ARM::DPairSpcRegClassID];
- FirstReg = MRI->getMatchingSuperReg(FirstReg, ARM::dsub_0, RC);
- }
- Operands.push_back(ARMOperand::CreateVectorList(FirstReg, Count,
- (Spacing == 2), S, E));
- break;
- case AllLanes:
- // Two-register operands have been converted to the
- // composite register classes.
- if (Count == 2) {
+ if (Count == 2 && !hasMVE()) {
const MCRegisterClass *RC = (Spacing == 1) ?
&ARMMCRegisterClasses[ARM::DPairRegClassID] :
&ARMMCRegisterClasses[ARM::DPairSpcRegClassID];
FirstReg = MRI->getMatchingSuperReg(FirstReg, ARM::dsub_0, RC);
}
- Operands.push_back(ARMOperand::CreateVectorListAllLanes(FirstReg, Count,
- (Spacing == 2),
- S, E));
+ auto Create = (LaneKind == NoLanes ? ARMOperand::CreateVectorList :
+ ARMOperand::CreateVectorListAllLanes);
+ Operands.push_back(Create(FirstReg, Count, (Spacing == 2), S, E));
break;
+ }
case IndexedLane:
Operands.push_back(ARMOperand::CreateVectorListIndexed(FirstReg, Count,
LaneIndex,
@@ -5061,6 +5591,21 @@ void ARMAsmParser::cvtThumbBranches(MCInst &Inst,
((ARMOperand &)*Operands[CondOp]).addCondCodeOperands(Inst, 2);
}
+void ARMAsmParser::cvtMVEVMOVQtoDReg(
+ MCInst &Inst, const OperandVector &Operands) {
+
+ // mnemonic, condition code, Rt, Rt2, Qd, idx, Qd again, idx2
+ assert(Operands.size() == 8);
+
+ ((ARMOperand &)*Operands[2]).addRegOperands(Inst, 1); // Rt
+ ((ARMOperand &)*Operands[3]).addRegOperands(Inst, 1); // Rt2
+ ((ARMOperand &)*Operands[4]).addRegOperands(Inst, 1); // Qd
+ ((ARMOperand &)*Operands[5]).addMVEPairVectorIndexOperands(Inst, 1); // idx
+ // skip second copy of Qd in Operands[6]
+ ((ARMOperand &)*Operands[7]).addMVEPairVectorIndexOperands(Inst, 1); // idx2
+ ((ARMOperand &)*Operands[1]).addCondCodeOperands(Inst, 2); // condition code
+}
+
/// Parse an ARM memory expression, return false if successful else return true
/// or an error. The first token must be a '[' when called.
bool ARMAsmParser::parseMemory(OperandVector &Operands) {
@@ -5275,6 +5820,8 @@ bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St,
St = ARM_AM::ror;
else if (ShiftName == "rrx" || ShiftName == "RRX")
St = ARM_AM::rrx;
+ else if (ShiftName == "uxtw" || ShiftName == "UXTW")
+ St = ARM_AM::uxtw;
else
return Error(Loc, "illegal shift operator");
Parser.Lex(); // Eat shift type token.
@@ -5463,7 +6010,7 @@ bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
case AsmToken::LBrac:
return parseMemory(Operands);
case AsmToken::LCurly:
- return parseRegisterList(Operands);
+ return parseRegisterList(Operands, !Mnemonic.startswith("clr"));
case AsmToken::Dollar:
case AsmToken::Hash:
// #42 -> immediate.
@@ -5595,6 +6142,9 @@ bool ARMAsmParser::parsePrefix(ARMMCExpr::VariantKind &RefKind) {
case MCObjectFileInfo::IsWasm:
CurrentFormat = WASM;
break;
+ case MCObjectFileInfo::IsXCOFF:
+ llvm_unreachable("unexpected object format");
+ break;
}
if (~Prefix->SupportedFormats & CurrentFormat) {
@@ -5621,11 +6171,14 @@ bool ARMAsmParser::parsePrefix(ARMMCExpr::VariantKind &RefKind) {
// FIXME: Would be nice to autogen this.
// FIXME: This is a bit of a maze of special cases.
StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic,
+ StringRef ExtraToken,
unsigned &PredicationCode,
+ unsigned &VPTPredicationCode,
bool &CarrySetting,
unsigned &ProcessorIMod,
StringRef &ITMask) {
PredicationCode = ARMCC::AL;
+ VPTPredicationCode = ARMVCC::None;
CarrySetting = false;
ProcessorIMod = 0;
@@ -5649,7 +6202,12 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic,
Mnemonic == "bxns" || Mnemonic == "blxns" ||
Mnemonic == "vudot" || Mnemonic == "vsdot" ||
Mnemonic == "vcmla" || Mnemonic == "vcadd" ||
- Mnemonic == "vfmal" || Mnemonic == "vfmsl")
+ Mnemonic == "vfmal" || Mnemonic == "vfmsl" ||
+ Mnemonic == "wls" || Mnemonic == "le" || Mnemonic == "dls" ||
+ Mnemonic == "csel" || Mnemonic == "csinc" ||
+ Mnemonic == "csinv" || Mnemonic == "csneg" || Mnemonic == "cinc" ||
+ Mnemonic == "cinv" || Mnemonic == "cneg" || Mnemonic == "cset" ||
+ Mnemonic == "csetm")
return Mnemonic;
// First, split out any predication code. Ignore mnemonics we know aren't
@@ -5657,7 +6215,18 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic,
if (Mnemonic != "adcs" && Mnemonic != "bics" && Mnemonic != "movs" &&
Mnemonic != "muls" && Mnemonic != "smlals" && Mnemonic != "smulls" &&
Mnemonic != "umlals" && Mnemonic != "umulls" && Mnemonic != "lsls" &&
- Mnemonic != "sbcs" && Mnemonic != "rscs") {
+ Mnemonic != "sbcs" && Mnemonic != "rscs" &&
+ !(hasMVE() &&
+ (Mnemonic == "vmine" ||
+ Mnemonic == "vshle" || Mnemonic == "vshlt" || Mnemonic == "vshllt" ||
+ Mnemonic == "vrshle" || Mnemonic == "vrshlt" ||
+ Mnemonic == "vmvne" || Mnemonic == "vorne" ||
+ Mnemonic == "vnege" || Mnemonic == "vnegt" ||
+ Mnemonic == "vmule" || Mnemonic == "vmult" ||
+ Mnemonic == "vrintne" ||
+ Mnemonic == "vcmult" || Mnemonic == "vcmule" ||
+ Mnemonic == "vpsele" || Mnemonic == "vpselt" ||
+ Mnemonic.startswith("vq")))) {
unsigned CC = ARMCondCodeFromString(Mnemonic.substr(Mnemonic.size()-2));
if (CC != ~0U) {
Mnemonic = Mnemonic.slice(0, Mnemonic.size() - 2);
@@ -5677,7 +6246,8 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic,
Mnemonic == "fsts" || Mnemonic == "fcpys" || Mnemonic == "fdivs" ||
Mnemonic == "fmuls" || Mnemonic == "fcmps" || Mnemonic == "fcmpzs" ||
Mnemonic == "vfms" || Mnemonic == "vfnms" || Mnemonic == "fconsts" ||
- Mnemonic == "bxns" || Mnemonic == "blxns" ||
+ Mnemonic == "bxns" || Mnemonic == "blxns" || Mnemonic == "vfmas" ||
+ Mnemonic == "vmlas" ||
(Mnemonic == "movs" && isThumb()))) {
Mnemonic = Mnemonic.slice(0, Mnemonic.size() - 1);
CarrySetting = true;
@@ -5698,12 +6268,36 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic,
}
}
+ if (isMnemonicVPTPredicable(Mnemonic, ExtraToken) && Mnemonic != "vmovlt" &&
+ Mnemonic != "vshllt" && Mnemonic != "vrshrnt" && Mnemonic != "vshrnt" &&
+ Mnemonic != "vqrshrunt" && Mnemonic != "vqshrunt" &&
+ Mnemonic != "vqrshrnt" && Mnemonic != "vqshrnt" && Mnemonic != "vmullt" &&
+ Mnemonic != "vqmovnt" && Mnemonic != "vqmovunt" &&
+ Mnemonic != "vqmovnt" && Mnemonic != "vmovnt" && Mnemonic != "vqdmullt" &&
+ Mnemonic != "vpnot" && Mnemonic != "vcvtt" && Mnemonic != "vcvt") {
+ unsigned CC = ARMVectorCondCodeFromString(Mnemonic.substr(Mnemonic.size()-1));
+ if (CC != ~0U) {
+ Mnemonic = Mnemonic.slice(0, Mnemonic.size()-1);
+ VPTPredicationCode = CC;
+ }
+ return Mnemonic;
+ }
+
// The "it" instruction has the condition mask on the end of the mnemonic.
if (Mnemonic.startswith("it")) {
ITMask = Mnemonic.slice(2, Mnemonic.size());
Mnemonic = Mnemonic.slice(0, 2);
}
+ if (Mnemonic.startswith("vpst")) {
+ ITMask = Mnemonic.slice(4, Mnemonic.size());
+ Mnemonic = Mnemonic.slice(0, 4);
+ }
+ else if (Mnemonic.startswith("vpt")) {
+ ITMask = Mnemonic.slice(3, Mnemonic.size());
+ Mnemonic = Mnemonic.slice(0, 3);
+ }
+
return Mnemonic;
}
@@ -5711,9 +6305,14 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic,
/// inclusion of carry set or predication code operands.
//
// FIXME: It would be nice to autogen this.
-void ARMAsmParser::getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst,
+void ARMAsmParser::getMnemonicAcceptInfo(StringRef Mnemonic,
+ StringRef ExtraToken,
+ StringRef FullInst,
bool &CanAcceptCarrySet,
- bool &CanAcceptPredicationCode) {
+ bool &CanAcceptPredicationCode,
+ bool &CanAcceptVPTPredicationCode) {
+ CanAcceptVPTPredicationCode = isMnemonicVPTPredicable(Mnemonic, ExtraToken);
+
CanAcceptCarrySet =
Mnemonic == "and" || Mnemonic == "lsl" || Mnemonic == "lsr" ||
Mnemonic == "rrx" || Mnemonic == "ror" || Mnemonic == "sub" ||
@@ -5742,7 +6341,18 @@ void ARMAsmParser::getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst,
Mnemonic == "vcmla" || Mnemonic == "vcadd" ||
Mnemonic == "vfmal" || Mnemonic == "vfmsl" ||
Mnemonic == "sb" || Mnemonic == "ssbb" ||
- Mnemonic == "pssbb") {
+ Mnemonic == "pssbb" ||
+ Mnemonic == "bfcsel" || Mnemonic == "wls" ||
+ Mnemonic == "dls" || Mnemonic == "le" || Mnemonic == "csel" ||
+ Mnemonic == "csinc" || Mnemonic == "csinv" || Mnemonic == "csneg" ||
+ Mnemonic == "cinc" || Mnemonic == "cinv" || Mnemonic == "cneg" ||
+ Mnemonic == "cset" || Mnemonic == "csetm" ||
+ Mnemonic.startswith("vpt") || Mnemonic.startswith("vpst") ||
+ (hasMVE() &&
+ (Mnemonic.startswith("vst2") || Mnemonic.startswith("vld2") ||
+ Mnemonic.startswith("vst4") || Mnemonic.startswith("vld4") ||
+ Mnemonic.startswith("wlstp") || Mnemonic.startswith("dlstp") ||
+ Mnemonic.startswith("letp")))) {
// These mnemonics are never predicable
CanAcceptPredicationCode = false;
} else if (!isThumb()) {
@@ -5976,7 +6586,8 @@ bool ARMAsmParser::shouldOmitPredicateOperand(StringRef Mnemonic,
OperandVector &Operands) {
// VRINT{Z, X} have a predicate operand in VFP, but not in NEON
unsigned RegIdx = 3;
- if ((Mnemonic == "vrintz" || Mnemonic == "vrintx") &&
+ if ((((Mnemonic == "vrintz" || Mnemonic == "vrintx") && !hasMVE()) ||
+ Mnemonic == "vrintr") &&
(static_cast<ARMOperand &>(*Operands[2]).getToken() == ".f32" ||
static_cast<ARMOperand &>(*Operands[2]).getToken() == ".f16")) {
if (static_cast<ARMOperand &>(*Operands[3]).isToken() &&
@@ -5994,6 +6605,47 @@ bool ARMAsmParser::shouldOmitPredicateOperand(StringRef Mnemonic,
return false;
}
+bool ARMAsmParser::shouldOmitVectorPredicateOperand(StringRef Mnemonic,
+ OperandVector &Operands) {
+ if (!hasMVE() || Operands.size() < 3)
+ return true;
+
+ if (Mnemonic.startswith("vld2") || Mnemonic.startswith("vld4") ||
+ Mnemonic.startswith("vst2") || Mnemonic.startswith("vst4"))
+ return true;
+
+ if (Mnemonic.startswith("vctp") || Mnemonic.startswith("vpnot"))
+ return false;
+
+ if (Mnemonic.startswith("vmov") &&
+ !(Mnemonic.startswith("vmovl") || Mnemonic.startswith("vmovn") ||
+ Mnemonic.startswith("vmovx"))) {
+ for (auto &Operand : Operands) {
+ if (static_cast<ARMOperand &>(*Operand).isVectorIndex() ||
+ ((*Operand).isReg() &&
+ (ARMMCRegisterClasses[ARM::SPRRegClassID].contains(
+ (*Operand).getReg()) ||
+ ARMMCRegisterClasses[ARM::DPRRegClassID].contains(
+ (*Operand).getReg())))) {
+ return true;
+ }
+ }
+ return false;
+ } else {
+ for (auto &Operand : Operands) {
+ // We check the larger class QPR instead of just the legal class
+ // MQPR, to more accurately report errors when using Q registers
+ // outside of the allowed range.
+ if (static_cast<ARMOperand &>(*Operand).isVectorIndex() ||
+ (Operand->isReg() &&
+ (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(
+ Operand->getReg()))))
+ return false;
+ }
+ return true;
+ }
+}
+
static bool isDataTypeToken(StringRef Tok) {
return Tok == ".8" || Tok == ".16" || Tok == ".32" || Tok == ".64" ||
Tok == ".i8" || Tok == ".i16" || Tok == ".i32" || Tok == ".i64" ||
@@ -6010,7 +6662,8 @@ static bool doesIgnoreDataTypeSuffix(StringRef Mnemonic, StringRef DT) {
return Mnemonic.startswith("vldm") || Mnemonic.startswith("vstm");
}
-static void applyMnemonicAliases(StringRef &Mnemonic, uint64_t Features,
+static void applyMnemonicAliases(StringRef &Mnemonic,
+ const FeatureBitset &Features,
unsigned VariantID);
// The GNU assembler has aliases of ldrd and strd with the second register
@@ -6033,7 +6686,7 @@ void ARMAsmParser::fixupGNULDRDAlias(StringRef Mnemonic,
if (!Op2.isReg())
return;
- if (!Op3.isMem())
+ if (!Op3.isGPRMem())
return;
const MCRegisterClass &GPR = MRI->getRegClass(ARM::GPRRegClassID);
@@ -6068,7 +6721,7 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
// The generic tblgen'erated code does this later, at the start of
// MatchInstructionImpl(), but that's too late for aliases that include
// any sort of suffix.
- uint64_t AvailableFeatures = getAvailableFeatures();
+ const FeatureBitset &AvailableFeatures = getAvailableFeatures();
unsigned AssemblerDialect = getParser().getAssemblerDialect();
applyMnemonicAliases(Name, AvailableFeatures, AssemblerDialect);
@@ -6084,14 +6737,16 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
// Create the leading tokens for the mnemonic, split by '.' characters.
size_t Start = 0, Next = Name.find('.');
StringRef Mnemonic = Name.slice(Start, Next);
+ StringRef ExtraToken = Name.slice(Next, Name.find(' ', Next + 1));
// Split out the predication code and carry setting flag from the mnemonic.
unsigned PredicationCode;
+ unsigned VPTPredicationCode;
unsigned ProcessorIMod;
bool CarrySetting;
StringRef ITMask;
- Mnemonic = splitMnemonic(Mnemonic, PredicationCode, CarrySetting,
- ProcessorIMod, ITMask);
+ Mnemonic = splitMnemonic(Mnemonic, ExtraToken, PredicationCode, VPTPredicationCode,
+ CarrySetting, ProcessorIMod, ITMask);
// In Thumb1, only the branch (B) instruction can be predicated.
if (isThumbOne() && PredicationCode != ARMCC::AL && Mnemonic != "b") {
@@ -6100,15 +6755,24 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
Operands.push_back(ARMOperand::CreateToken(Mnemonic, NameLoc));
- // Handle the IT instruction ITMask. Convert it to a bitmask. This
- // is the mask as it will be for the IT encoding if the conditional
- // encoding has a '1' as it's bit0 (i.e. 't' ==> '1'). In the case
- // where the conditional bit0 is zero, the instruction post-processing
- // will adjust the mask accordingly.
- if (Mnemonic == "it") {
- SMLoc Loc = SMLoc::getFromPointer(NameLoc.getPointer() + 2);
+ // Handle the mask for IT and VPT instructions. In ARMOperand and
+ // MCOperand, this is stored in a format independent of the
+ // condition code: the lowest set bit indicates the end of the
+ // encoding, and above that, a 1 bit indicates 'else', and an 0
+ // indicates 'then'. E.g.
+ // IT -> 1000
+ // ITx -> x100 (ITT -> 0100, ITE -> 1100)
+ // ITxy -> xy10 (e.g. ITET -> 1010)
+ // ITxyz -> xyz1 (e.g. ITEET -> 1101)
+ if (Mnemonic == "it" || Mnemonic.startswith("vpt") ||
+ Mnemonic.startswith("vpst")) {
+ SMLoc Loc = Mnemonic == "it" ? SMLoc::getFromPointer(NameLoc.getPointer() + 2) :
+ Mnemonic == "vpt" ? SMLoc::getFromPointer(NameLoc.getPointer() + 3) :
+ SMLoc::getFromPointer(NameLoc.getPointer() + 4);
if (ITMask.size() > 3) {
- return Error(Loc, "too many conditions on IT instruction");
+ if (Mnemonic == "it")
+ return Error(Loc, "too many conditions on IT instruction");
+ return Error(Loc, "too many conditions on VPT instruction");
}
unsigned Mask = 8;
for (unsigned i = ITMask.size(); i != 0; --i) {
@@ -6117,7 +6781,7 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
return Error(Loc, "illegal IT block condition mask '" + ITMask + "'");
}
Mask >>= 1;
- if (ITMask[i - 1] == 't')
+ if (ITMask[i - 1] == 'e')
Mask |= 8;
}
Operands.push_back(ARMOperand::CreateITMask(Mask, Loc));
@@ -6133,8 +6797,9 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
// ConditionCode operands to match the mnemonic "as written" and then we let
// the matcher deal with finding the right instruction or generating an
// appropriate error.
- bool CanAcceptCarrySet, CanAcceptPredicationCode;
- getMnemonicAcceptInfo(Mnemonic, Name, CanAcceptCarrySet, CanAcceptPredicationCode);
+ bool CanAcceptCarrySet, CanAcceptPredicationCode, CanAcceptVPTPredicationCode;
+ getMnemonicAcceptInfo(Mnemonic, ExtraToken, Name, CanAcceptCarrySet,
+ CanAcceptPredicationCode, CanAcceptVPTPredicationCode);
// If we had a carry-set on an instruction that can't do that, issue an
// error.
@@ -6149,6 +6814,13 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
"' is not predicable, but condition code specified");
}
+ // If we had a VPT predication code on an instruction that can't do that, issue an
+ // error.
+ if (!CanAcceptVPTPredicationCode && VPTPredicationCode != ARMVCC::None) {
+ return Error(NameLoc, "instruction '" + Mnemonic +
+ "' is not VPT predicable, but VPT code T/E is specified");
+ }
+
// Add the carry setting operand, if necessary.
if (CanAcceptCarrySet) {
SMLoc Loc = SMLoc::getFromPointer(NameLoc.getPointer() + Mnemonic.size());
@@ -6161,7 +6833,24 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc Loc = SMLoc::getFromPointer(NameLoc.getPointer() + Mnemonic.size() +
CarrySetting);
Operands.push_back(ARMOperand::CreateCondCode(
- ARMCC::CondCodes(PredicationCode), Loc));
+ ARMCC::CondCodes(PredicationCode), Loc));
+ }
+
+ // Add the VPT predication code operand, if necessary.
+ // FIXME: We don't add them for the instructions filtered below as these can
+ // have custom operands which need special parsing. This parsing requires
+ // the operand to be in the same place in the OperandVector as their
+ // definition in tblgen. Since these instructions may also have the
+ // scalar predication operand we do not add the vector one and leave until
+ // now to fix it up.
+ if (CanAcceptVPTPredicationCode && Mnemonic != "vmov" &&
+ !Mnemonic.startswith("vcmp") &&
+ !(Mnemonic.startswith("vcvt") && Mnemonic != "vcvta" &&
+ Mnemonic != "vcvtn" && Mnemonic != "vcvtp" && Mnemonic != "vcvtm")) {
+ SMLoc Loc = SMLoc::getFromPointer(NameLoc.getPointer() + Mnemonic.size() +
+ CarrySetting);
+ Operands.push_back(ARMOperand::CreateVPTPred(
+ ARMVCC::VPTCodes(VPTPredicationCode), Loc));
}
// Add the processor imod operand, if necessary.
@@ -6177,7 +6866,7 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
while (Next != StringRef::npos) {
Start = Next;
Next = Name.find('.', Start + 1);
- StringRef ExtraToken = Name.slice(Start, Next);
+ ExtraToken = Name.slice(Start, Next);
// Some NEON instructions have an optional datatype suffix that is
// completely ignored. Check for that.
@@ -6233,57 +6922,173 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
// Some instructions have the same mnemonic, but don't always
// have a predicate. Distinguish them here and delete the
- // predicate if needed.
+ // appropriate predicate if needed. This could be either the scalar
+ // predication code or the vector predication code.
if (PredicationCode == ARMCC::AL &&
shouldOmitPredicateOperand(Mnemonic, Operands))
Operands.erase(Operands.begin() + 1);
- // ARM mode 'blx' need special handling, as the register operand version
- // is predicable, but the label operand version is not. So, we can't rely
- // on the Mnemonic based checking to correctly figure out when to put
- // a k_CondCode operand in the list. If we're trying to match the label
- // version, remove the k_CondCode operand here.
- if (!isThumb() && Mnemonic == "blx" && Operands.size() == 3 &&
- static_cast<ARMOperand &>(*Operands[2]).isImm())
- Operands.erase(Operands.begin() + 1);
- // Adjust operands of ldrexd/strexd to MCK_GPRPair.
- // ldrexd/strexd require even/odd GPR pair. To enforce this constraint,
- // a single GPRPair reg operand is used in the .td file to replace the two
- // GPRs. However, when parsing from asm, the two GRPs cannot be automatically
- // expressed as a GPRPair, so we have to manually merge them.
- // FIXME: We would really like to be able to tablegen'erate this.
- if (!isThumb() && Operands.size() > 4 &&
- (Mnemonic == "ldrexd" || Mnemonic == "strexd" || Mnemonic == "ldaexd" ||
- Mnemonic == "stlexd")) {
- bool isLoad = (Mnemonic == "ldrexd" || Mnemonic == "ldaexd");
- unsigned Idx = isLoad ? 2 : 3;
- ARMOperand &Op1 = static_cast<ARMOperand &>(*Operands[Idx]);
- ARMOperand &Op2 = static_cast<ARMOperand &>(*Operands[Idx + 1]);
-
- const MCRegisterClass& MRC = MRI->getRegClass(ARM::GPRRegClassID);
- // Adjust only if Op1 and Op2 are GPRs.
- if (Op1.isReg() && Op2.isReg() && MRC.contains(Op1.getReg()) &&
- MRC.contains(Op2.getReg())) {
- unsigned Reg1 = Op1.getReg();
- unsigned Reg2 = Op2.getReg();
- unsigned Rt = MRI->getEncodingValue(Reg1);
- unsigned Rt2 = MRI->getEncodingValue(Reg2);
-
- // Rt2 must be Rt + 1 and Rt must be even.
- if (Rt + 1 != Rt2 || (Rt & 1)) {
- return Error(Op2.getStartLoc(),
- isLoad ? "destination operands must be sequential"
- : "source operands must be sequential");
+ if (hasMVE()) {
+ if (!shouldOmitVectorPredicateOperand(Mnemonic, Operands) &&
+ Mnemonic == "vmov" && PredicationCode == ARMCC::LT) {
+ // Very nasty hack to deal with the vector predicated variant of vmovlt
+ // the scalar predicated vmov with condition 'lt'. We can not tell them
+ // apart until we have parsed their operands.
+ Operands.erase(Operands.begin() + 1);
+ Operands.erase(Operands.begin());
+ SMLoc MLoc = SMLoc::getFromPointer(NameLoc.getPointer());
+ SMLoc PLoc = SMLoc::getFromPointer(NameLoc.getPointer() +
+ Mnemonic.size() - 1 + CarrySetting);
+ Operands.insert(Operands.begin(),
+ ARMOperand::CreateVPTPred(ARMVCC::None, PLoc));
+ Operands.insert(Operands.begin(),
+ ARMOperand::CreateToken(StringRef("vmovlt"), MLoc));
+ } else if (Mnemonic == "vcvt" && PredicationCode == ARMCC::NE &&
+ !shouldOmitVectorPredicateOperand(Mnemonic, Operands)) {
+ // Another nasty hack to deal with the ambiguity between vcvt with scalar
+ // predication 'ne' and vcvtn with vector predication 'e'. As above we
+ // can only distinguish between the two after we have parsed their
+ // operands.
+ Operands.erase(Operands.begin() + 1);
+ Operands.erase(Operands.begin());
+ SMLoc MLoc = SMLoc::getFromPointer(NameLoc.getPointer());
+ SMLoc PLoc = SMLoc::getFromPointer(NameLoc.getPointer() +
+ Mnemonic.size() - 1 + CarrySetting);
+ Operands.insert(Operands.begin(),
+ ARMOperand::CreateVPTPred(ARMVCC::Else, PLoc));
+ Operands.insert(Operands.begin(),
+ ARMOperand::CreateToken(StringRef("vcvtn"), MLoc));
+ } else if (Mnemonic == "vmul" && PredicationCode == ARMCC::LT &&
+ !shouldOmitVectorPredicateOperand(Mnemonic, Operands)) {
+ // Another hack, this time to distinguish between scalar predicated vmul
+ // with 'lt' predication code and the vector instruction vmullt with
+ // vector predication code "none"
+ Operands.erase(Operands.begin() + 1);
+ Operands.erase(Operands.begin());
+ SMLoc MLoc = SMLoc::getFromPointer(NameLoc.getPointer());
+ Operands.insert(Operands.begin(),
+ ARMOperand::CreateToken(StringRef("vmullt"), MLoc));
+ }
+ // For vmov and vcmp, as mentioned earlier, we did not add the vector
+ // predication code, since these may contain operands that require
+ // special parsing. So now we have to see if they require vector
+ // predication and replace the scalar one with the vector predication
+ // operand if that is the case.
+ else if (Mnemonic == "vmov" || Mnemonic.startswith("vcmp") ||
+ (Mnemonic.startswith("vcvt") && !Mnemonic.startswith("vcvta") &&
+ !Mnemonic.startswith("vcvtn") && !Mnemonic.startswith("vcvtp") &&
+ !Mnemonic.startswith("vcvtm"))) {
+ if (!shouldOmitVectorPredicateOperand(Mnemonic, Operands)) {
+ // We could not split the vector predicate off vcvt because it might
+ // have been the scalar vcvtt instruction. Now we know its a vector
+ // instruction, we still need to check whether its the vector
+ // predicated vcvt with 'Then' predication or the vector vcvtt. We can
+ // distinguish the two based on the suffixes, if it is any of
+ // ".f16.f32", ".f32.f16", ".f16.f64" or ".f64.f16" then it is the vcvtt.
+ if (Mnemonic.startswith("vcvtt") && Operands.size() >= 4) {
+ auto Sz1 = static_cast<ARMOperand &>(*Operands[2]);
+ auto Sz2 = static_cast<ARMOperand &>(*Operands[3]);
+ if (!(Sz1.isToken() && Sz1.getToken().startswith(".f") &&
+ Sz2.isToken() && Sz2.getToken().startswith(".f"))) {
+ Operands.erase(Operands.begin());
+ SMLoc MLoc = SMLoc::getFromPointer(NameLoc.getPointer());
+ VPTPredicationCode = ARMVCC::Then;
+
+ Mnemonic = Mnemonic.substr(0, 4);
+ Operands.insert(Operands.begin(),
+ ARMOperand::CreateToken(Mnemonic, MLoc));
+ }
+ }
+ Operands.erase(Operands.begin() + 1);
+ SMLoc PLoc = SMLoc::getFromPointer(NameLoc.getPointer() +
+ Mnemonic.size() + CarrySetting);
+ Operands.insert(Operands.begin() + 1,
+ ARMOperand::CreateVPTPred(
+ ARMVCC::VPTCodes(VPTPredicationCode), PLoc));
+ }
+ } else if (CanAcceptVPTPredicationCode) {
+ // For all other instructions, make sure only one of the two
+ // predication operands is left behind, depending on whether we should
+ // use the vector predication.
+ if (shouldOmitVectorPredicateOperand(Mnemonic, Operands)) {
+ if (CanAcceptPredicationCode)
+ Operands.erase(Operands.begin() + 2);
+ else
+ Operands.erase(Operands.begin() + 1);
+ } else if (CanAcceptPredicationCode && PredicationCode == ARMCC::AL) {
+ Operands.erase(Operands.begin() + 1);
}
- unsigned NewReg = MRI->getMatchingSuperReg(Reg1, ARM::gsub_0,
- &(MRI->getRegClass(ARM::GPRPairRegClassID)));
- Operands[Idx] =
- ARMOperand::CreateReg(NewReg, Op1.getStartLoc(), Op2.getEndLoc());
- Operands.erase(Operands.begin() + Idx + 1);
}
}
+ if (VPTPredicationCode != ARMVCC::None) {
+ bool usedVPTPredicationCode = false;
+ for (unsigned I = 1; I < Operands.size(); ++I)
+ if (static_cast<ARMOperand &>(*Operands[I]).isVPTPred())
+ usedVPTPredicationCode = true;
+ if (!usedVPTPredicationCode) {
+ // If we have a VPT predication code and we haven't just turned it
+ // into an operand, then it was a mistake for splitMnemonic to
+ // separate it from the rest of the mnemonic in the first place,
+ // and this may lead to wrong disassembly (e.g. scalar floating
+ // point VCMPE is actually a different instruction from VCMP, so
+ // we mustn't treat them the same). In that situation, glue it
+ // back on.
+ Mnemonic = Name.slice(0, Mnemonic.size() + 1);
+ Operands.erase(Operands.begin());
+ Operands.insert(Operands.begin(),
+ ARMOperand::CreateToken(Mnemonic, NameLoc));
+ }
+ }
+
+ // ARM mode 'blx' need special handling, as the register operand version
+ // is predicable, but the label operand version is not. So, we can't rely
+ // on the Mnemonic based checking to correctly figure out when to put
+ // a k_CondCode operand in the list. If we're trying to match the label
+ // version, remove the k_CondCode operand here.
+ if (!isThumb() && Mnemonic == "blx" && Operands.size() == 3 &&
+ static_cast<ARMOperand &>(*Operands[2]).isImm())
+ Operands.erase(Operands.begin() + 1);
+
+ // Adjust operands of ldrexd/strexd to MCK_GPRPair.
+ // ldrexd/strexd require even/odd GPR pair. To enforce this constraint,
+ // a single GPRPair reg operand is used in the .td file to replace the two
+ // GPRs. However, when parsing from asm, the two GRPs cannot be
+ // automatically
+ // expressed as a GPRPair, so we have to manually merge them.
+ // FIXME: We would really like to be able to tablegen'erate this.
+ if (!isThumb() && Operands.size() > 4 &&
+ (Mnemonic == "ldrexd" || Mnemonic == "strexd" || Mnemonic == "ldaexd" ||
+ Mnemonic == "stlexd")) {
+ bool isLoad = (Mnemonic == "ldrexd" || Mnemonic == "ldaexd");
+ unsigned Idx = isLoad ? 2 : 3;
+ ARMOperand &Op1 = static_cast<ARMOperand &>(*Operands[Idx]);
+ ARMOperand &Op2 = static_cast<ARMOperand &>(*Operands[Idx + 1]);
+
+ const MCRegisterClass &MRC = MRI->getRegClass(ARM::GPRRegClassID);
+ // Adjust only if Op1 and Op2 are GPRs.
+ if (Op1.isReg() && Op2.isReg() && MRC.contains(Op1.getReg()) &&
+ MRC.contains(Op2.getReg())) {
+ unsigned Reg1 = Op1.getReg();
+ unsigned Reg2 = Op2.getReg();
+ unsigned Rt = MRI->getEncodingValue(Reg1);
+ unsigned Rt2 = MRI->getEncodingValue(Reg2);
+
+ // Rt2 must be Rt + 1 and Rt must be even.
+ if (Rt + 1 != Rt2 || (Rt & 1)) {
+ return Error(Op2.getStartLoc(),
+ isLoad ? "destination operands must be sequential"
+ : "source operands must be sequential");
+ }
+ unsigned NewReg = MRI->getMatchingSuperReg(
+ Reg1, ARM::gsub_0, &(MRI->getRegClass(ARM::GPRPairRegClassID)));
+ Operands[Idx] =
+ ARMOperand::CreateReg(NewReg, Op1.getStartLoc(), Op2.getEndLoc());
+ Operands.erase(Operands.begin() + Idx + 1);
+ }
+ }
+
// GNU Assembler extension (compatibility).
fixupGNULDRDAlias(Mnemonic, Operands);
@@ -6442,6 +7247,17 @@ bool ARMAsmParser::validateLDRDSTRD(MCInst &Inst,
return false;
}
+static int findFirstVectorPredOperandIdx(const MCInstrDesc &MCID) {
+ for (unsigned i = 0; i < MCID.NumOperands; ++i) {
+ if (ARM::isVpred(MCID.OpInfo[i].OperandType))
+ return i;
+ }
+ return -1;
+}
+
+static bool isVectorPredicable(const MCInstrDesc &MCID) {
+ return findFirstVectorPredOperandIdx(MCID) != -1;
+}
// FIXME: We would really like to be able to tablegen'erate this.
bool ARMAsmParser::validateInstruction(MCInst &Inst,
@@ -6473,12 +7289,25 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
} else if (isThumbTwo() && MCID.isPredicable() &&
Inst.getOperand(MCID.findFirstPredOperandIdx()).getImm() !=
ARMCC::AL && Inst.getOpcode() != ARM::tBcc &&
- Inst.getOpcode() != ARM::t2Bcc) {
+ Inst.getOpcode() != ARM::t2Bcc &&
+ Inst.getOpcode() != ARM::t2BFic) {
return Error(Loc, "predicated instructions must be in IT block");
} else if (!isThumb() && !useImplicitITARM() && MCID.isPredicable() &&
Inst.getOperand(MCID.findFirstPredOperandIdx()).getImm() !=
ARMCC::AL) {
return Warning(Loc, "predicated instructions should be in IT block");
+ } else if (!MCID.isPredicable()) {
+ // Check the instruction doesn't have a predicate operand anyway
+ // that it's not allowed to use. Sometimes this happens in order
+ // to keep instructions the same shape even though one cannot
+ // legally be predicated, e.g. vmul.f16 vs vmul.f32.
+ for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) {
+ if (MCID.OpInfo[i].isPredicate()) {
+ if (Inst.getOperand(i).getImm() != ARMCC::AL)
+ return Error(Loc, "instruction is not predicable");
+ break;
+ }
+ }
}
// PC-setting instructions in an IT block, but not the last instruction of
@@ -6487,6 +7316,28 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
return Error(Loc, "instruction must be outside of IT block or the last instruction in an IT block");
}
+ if (inVPTBlock() && !instIsBreakpoint(Inst)) {
+ unsigned Bit = extractITMaskBit(VPTState.Mask, VPTState.CurPosition);
+ if (!isVectorPredicable(MCID))
+ return Error(Loc, "instruction in VPT block must be predicable");
+ unsigned Pred = Inst.getOperand(findFirstVectorPredOperandIdx(MCID)).getImm();
+ unsigned VPTPred = Bit ? ARMVCC::Else : ARMVCC::Then;
+ if (Pred != VPTPred) {
+ SMLoc PredLoc;
+ for (unsigned I = 1; I < Operands.size(); ++I)
+ if (static_cast<ARMOperand &>(*Operands[I]).isVPTPred())
+ PredLoc = Operands[I]->getStartLoc();
+ return Error(PredLoc, "incorrect predication in VPT block; got '" +
+ StringRef(ARMVPTPredToString(ARMVCC::VPTCodes(Pred))) +
+ "', but expected '" +
+ ARMVPTPredToString(ARMVCC::VPTCodes(VPTPred)) + "'");
+ }
+ }
+ else if (isVectorPredicable(MCID) &&
+ Inst.getOperand(findFirstVectorPredOperandIdx(MCID)).getImm() !=
+ ARMVCC::None)
+ return Error(Loc, "VPT predicated instructions must be in VPT block");
+
const unsigned Opcode = Inst.getOpcode();
switch (Opcode) {
case ARM::t2IT: {
@@ -6496,11 +7347,10 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
unsigned Cond = Inst.getOperand(0).getImm();
unsigned Mask = Inst.getOperand(1).getImm();
- // Mask hasn't been modified to the IT instruction encoding yet so
- // conditions only allowing a 't' are a block of 1s starting at bit 3
- // followed by all 0s. Easiest way is to just list the 4 possibilities.
- if (Cond == ARMCC::AL && Mask != 8 && Mask != 12 && Mask != 14 &&
- Mask != 15)
+ // Conditions only allowing a 't' are those with no set bit except
+ // the lowest-order one that indicates the end of the sequence. In
+ // other words, powers of 2.
+ if (Cond == ARMCC::AL && countPopulation(Mask) != 1)
return Error(Loc, "unpredictable IT predicate sequence");
break;
}
@@ -6609,6 +7459,54 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
"destination register and base register can't be identical");
return false;
}
+
+ case ARM::MVE_VLDRBU8_rq:
+ case ARM::MVE_VLDRBU16_rq:
+ case ARM::MVE_VLDRBS16_rq:
+ case ARM::MVE_VLDRBU32_rq:
+ case ARM::MVE_VLDRBS32_rq:
+ case ARM::MVE_VLDRHU16_rq:
+ case ARM::MVE_VLDRHU16_rq_u:
+ case ARM::MVE_VLDRHU32_rq:
+ case ARM::MVE_VLDRHU32_rq_u:
+ case ARM::MVE_VLDRHS32_rq:
+ case ARM::MVE_VLDRHS32_rq_u:
+ case ARM::MVE_VLDRWU32_rq:
+ case ARM::MVE_VLDRWU32_rq_u:
+ case ARM::MVE_VLDRDU64_rq:
+ case ARM::MVE_VLDRDU64_rq_u:
+ case ARM::MVE_VLDRWU32_qi:
+ case ARM::MVE_VLDRWU32_qi_pre:
+ case ARM::MVE_VLDRDU64_qi:
+ case ARM::MVE_VLDRDU64_qi_pre: {
+ // Qd must be different from Qm.
+ unsigned QdIdx = 0, QmIdx = 2;
+ bool QmIsPointer = false;
+ switch (Opcode) {
+ case ARM::MVE_VLDRWU32_qi:
+ case ARM::MVE_VLDRDU64_qi:
+ QmIdx = 1;
+ QmIsPointer = true;
+ break;
+ case ARM::MVE_VLDRWU32_qi_pre:
+ case ARM::MVE_VLDRDU64_qi_pre:
+ QdIdx = 1;
+ QmIsPointer = true;
+ break;
+ }
+
+ const unsigned Qd = MRI->getEncodingValue(Inst.getOperand(QdIdx).getReg());
+ const unsigned Qm = MRI->getEncodingValue(Inst.getOperand(QmIdx).getReg());
+
+ if (Qd == Qm) {
+ return Error(Operands[3]->getStartLoc(),
+ Twine("destination vector register and vector ") +
+ (QmIsPointer ? "pointer" : "offset") +
+ " register can't be identical");
+ }
+ return false;
+ }
+
case ARM::SBFX:
case ARM::t2SBFX:
case ARM::UBFX:
@@ -6776,6 +7674,20 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
}
break;
+ case ARM::t2ADDri:
+ case ARM::t2ADDri12:
+ case ARM::t2ADDrr:
+ case ARM::t2ADDrs:
+ case ARM::t2SUBri:
+ case ARM::t2SUBri12:
+ case ARM::t2SUBrr:
+ case ARM::t2SUBrs:
+ if (Inst.getOperand(0).getReg() == ARM::SP &&
+ Inst.getOperand(1).getReg() != ARM::SP)
+ return Error(Operands[4]->getStartLoc(),
+ "source register must be sp if destination is sp");
+ break;
+
// Final range checking for Thumb unconditional branch instructions.
case ARM::tB:
if (!(static_cast<ARMOperand &>(*Operands[2])).isSignedOffset<11, 1>())
@@ -6845,6 +7757,61 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
"code specified");
break;
}
+ case ARM::t2BFi:
+ case ARM::t2BFr:
+ case ARM::t2BFLi:
+ case ARM::t2BFLr: {
+ if (!static_cast<ARMOperand &>(*Operands[2]).isUnsignedOffset<4, 1>() ||
+ (Inst.getOperand(0).isImm() && Inst.getOperand(0).getImm() == 0))
+ return Error(Operands[2]->getStartLoc(),
+ "branch location out of range or not a multiple of 2");
+
+ if (Opcode == ARM::t2BFi) {
+ if (!static_cast<ARMOperand &>(*Operands[3]).isSignedOffset<16, 1>())
+ return Error(Operands[3]->getStartLoc(),
+ "branch target out of range or not a multiple of 2");
+ } else if (Opcode == ARM::t2BFLi) {
+ if (!static_cast<ARMOperand &>(*Operands[3]).isSignedOffset<18, 1>())
+ return Error(Operands[3]->getStartLoc(),
+ "branch target out of range or not a multiple of 2");
+ }
+ break;
+ }
+ case ARM::t2BFic: {
+ if (!static_cast<ARMOperand &>(*Operands[1]).isUnsignedOffset<4, 1>() ||
+ (Inst.getOperand(0).isImm() && Inst.getOperand(0).getImm() == 0))
+ return Error(Operands[1]->getStartLoc(),
+ "branch location out of range or not a multiple of 2");
+
+ if (!static_cast<ARMOperand &>(*Operands[2]).isSignedOffset<16, 1>())
+ return Error(Operands[2]->getStartLoc(),
+ "branch target out of range or not a multiple of 2");
+
+ assert(Inst.getOperand(0).isImm() == Inst.getOperand(2).isImm() &&
+ "branch location and else branch target should either both be "
+ "immediates or both labels");
+
+ if (Inst.getOperand(0).isImm() && Inst.getOperand(2).isImm()) {
+ int Diff = Inst.getOperand(2).getImm() - Inst.getOperand(0).getImm();
+ if (Diff != 4 && Diff != 2)
+ return Error(
+ Operands[3]->getStartLoc(),
+ "else branch target must be 2 or 4 greater than the branch location");
+ }
+ break;
+ }
+ case ARM::t2CLRM: {
+ for (unsigned i = 2; i < Inst.getNumOperands(); i++) {
+ if (Inst.getOperand(i).isReg() &&
+ !ARMMCRegisterClasses[ARM::GPRwithAPSRnospRegClassID].contains(
+ Inst.getOperand(i).getReg())) {
+ return Error(Operands[2]->getStartLoc(),
+ "invalid register in register list. Valid registers are "
+ "r0-r12, lr/r14 and APSR.");
+ }
+ }
+ break;
+ }
case ARM::DSB:
case ARM::t2DSB: {
@@ -6892,6 +7859,39 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
"list of registers must be at least 1 and at most 16");
break;
}
+ case ARM::MVE_VQDMULLs32bh:
+ case ARM::MVE_VQDMULLs32th:
+ case ARM::MVE_VCMULf32:
+ case ARM::MVE_VMULLs32bh:
+ case ARM::MVE_VMULLs32th:
+ case ARM::MVE_VMULLu32bh:
+ case ARM::MVE_VMULLu32th: {
+ if (Operands[3]->getReg() == Operands[4]->getReg()) {
+ return Error (Operands[3]->getStartLoc(),
+ "Qd register and Qn register can't be identical");
+ }
+ if (Operands[3]->getReg() == Operands[5]->getReg()) {
+ return Error (Operands[3]->getStartLoc(),
+ "Qd register and Qm register can't be identical");
+ }
+ break;
+ }
+ case ARM::MVE_VMOV_rr_q: {
+ if (Operands[4]->getReg() != Operands[6]->getReg())
+ return Error (Operands[4]->getStartLoc(), "Q-registers must be the same");
+ if (static_cast<ARMOperand &>(*Operands[5]).getVectorIndex() !=
+ static_cast<ARMOperand &>(*Operands[7]).getVectorIndex() + 2)
+ return Error (Operands[5]->getStartLoc(), "Q-register indexes must be 2 and 0 or 3 and 1");
+ break;
+ }
+ case ARM::MVE_VMOV_q_rr: {
+ if (Operands[2]->getReg() != Operands[4]->getReg())
+ return Error (Operands[2]->getStartLoc(), "Q-registers must be the same");
+ if (static_cast<ARMOperand &>(*Operands[3]).getVectorIndex() !=
+ static_cast<ARMOperand &>(*Operands[5]).getVectorIndex() + 2)
+ return Error (Operands[3]->getStartLoc(), "Q-register indexes must be 2 and 0 or 3 and 1");
+ break;
+ }
}
return false;
@@ -7168,6 +8168,50 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
}
switch (Inst.getOpcode()) {
+ case ARM::MVE_VORNIZ0v4i32:
+ case ARM::MVE_VORNIZ0v8i16:
+ case ARM::MVE_VORNIZ8v4i32:
+ case ARM::MVE_VORNIZ8v8i16:
+ case ARM::MVE_VORNIZ16v4i32:
+ case ARM::MVE_VORNIZ24v4i32:
+ case ARM::MVE_VANDIZ0v4i32:
+ case ARM::MVE_VANDIZ0v8i16:
+ case ARM::MVE_VANDIZ8v4i32:
+ case ARM::MVE_VANDIZ8v8i16:
+ case ARM::MVE_VANDIZ16v4i32:
+ case ARM::MVE_VANDIZ24v4i32: {
+ unsigned Opcode;
+ bool imm16 = false;
+ switch(Inst.getOpcode()) {
+ case ARM::MVE_VORNIZ0v4i32: Opcode = ARM::MVE_VORRIZ0v4i32; break;
+ case ARM::MVE_VORNIZ0v8i16: Opcode = ARM::MVE_VORRIZ0v8i16; imm16 = true; break;
+ case ARM::MVE_VORNIZ8v4i32: Opcode = ARM::MVE_VORRIZ8v4i32; break;
+ case ARM::MVE_VORNIZ8v8i16: Opcode = ARM::MVE_VORRIZ8v8i16; imm16 = true; break;
+ case ARM::MVE_VORNIZ16v4i32: Opcode = ARM::MVE_VORRIZ16v4i32; break;
+ case ARM::MVE_VORNIZ24v4i32: Opcode = ARM::MVE_VORRIZ24v4i32; break;
+ case ARM::MVE_VANDIZ0v4i32: Opcode = ARM::MVE_VBICIZ0v4i32; break;
+ case ARM::MVE_VANDIZ0v8i16: Opcode = ARM::MVE_VBICIZ0v8i16; imm16 = true; break;
+ case ARM::MVE_VANDIZ8v4i32: Opcode = ARM::MVE_VBICIZ8v4i32; break;
+ case ARM::MVE_VANDIZ8v8i16: Opcode = ARM::MVE_VBICIZ8v8i16; imm16 = true; break;
+ case ARM::MVE_VANDIZ16v4i32: Opcode = ARM::MVE_VBICIZ16v4i32; break;
+ case ARM::MVE_VANDIZ24v4i32: Opcode = ARM::MVE_VBICIZ24v4i32; break;
+ default: llvm_unreachable("unexpected opcode");
+ }
+
+ MCInst TmpInst;
+ TmpInst.setOpcode(Opcode);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+
+ // invert immediate
+ unsigned imm = ~Inst.getOperand(2).getImm() & (imm16 ? 0xffff : 0xffffffff);
+ TmpInst.addOperand(MCOperand::createImm(imm));
+
+ TmpInst.addOperand(Inst.getOperand(3));
+ TmpInst.addOperand(Inst.getOperand(4));
+ Inst = TmpInst;
+ return true;
+ }
// Alias for alternate form of 'ldr{,b}t Rt, [Rn], #imm' instruction.
case ARM::LDRT_POST:
case ARM::LDRBT_POST: {
@@ -8990,15 +10034,11 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
}
case ARM::ITasm:
case ARM::t2IT: {
- MCOperand &MO = Inst.getOperand(1);
- unsigned Mask = MO.getImm();
- ARMCC::CondCodes Cond = ARMCC::CondCodes(Inst.getOperand(0).getImm());
-
// Set up the IT block state according to the IT instruction we just
// matched.
assert(!inITBlock() && "nested IT blocks?!");
- startExplicitITBlock(Cond, Mask);
- MO.setImm(getITMaskEncoding());
+ startExplicitITBlock(ARMCC::CondCodes(Inst.getOperand(0).getImm()),
+ Inst.getOperand(1).getImm());
break;
}
case ARM::t2LSLrr:
@@ -9074,6 +10114,35 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
return true;
}
return false;
+ case ARM::MVE_VPST:
+ case ARM::MVE_VPTv16i8:
+ case ARM::MVE_VPTv8i16:
+ case ARM::MVE_VPTv4i32:
+ case ARM::MVE_VPTv16u8:
+ case ARM::MVE_VPTv8u16:
+ case ARM::MVE_VPTv4u32:
+ case ARM::MVE_VPTv16s8:
+ case ARM::MVE_VPTv8s16:
+ case ARM::MVE_VPTv4s32:
+ case ARM::MVE_VPTv4f32:
+ case ARM::MVE_VPTv8f16:
+ case ARM::MVE_VPTv16i8r:
+ case ARM::MVE_VPTv8i16r:
+ case ARM::MVE_VPTv4i32r:
+ case ARM::MVE_VPTv16u8r:
+ case ARM::MVE_VPTv8u16r:
+ case ARM::MVE_VPTv4u32r:
+ case ARM::MVE_VPTv16s8r:
+ case ARM::MVE_VPTv8s16r:
+ case ARM::MVE_VPTv4s32r:
+ case ARM::MVE_VPTv4f32r:
+ case ARM::MVE_VPTv8f16r: {
+ assert(!inVPTBlock() && "Nested VPT blocks are not allowed");
+ MCOperand &MO = Inst.getOperand(0);
+ VPTState.Mask = MO.getImm();
+ VPTState.CurPosition = 0;
+ break;
+ }
}
return false;
}
@@ -9138,18 +10207,50 @@ unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
return Match_RequiresV8;
}
- // Use of SP for VMRS/VMSR is only allowed in ARM mode with the exception of
- // ARMv8-A.
- if ((Inst.getOpcode() == ARM::VMRS || Inst.getOpcode() == ARM::VMSR) &&
- Inst.getOperand(0).getReg() == ARM::SP && (isThumb() && !hasV8Ops()))
- return Match_InvalidOperand;
+ switch (Inst.getOpcode()) {
+ case ARM::VMRS:
+ case ARM::VMSR:
+ case ARM::VMRS_FPCXTS:
+ case ARM::VMRS_FPCXTNS:
+ case ARM::VMSR_FPCXTS:
+ case ARM::VMSR_FPCXTNS:
+ case ARM::VMRS_FPSCR_NZCVQC:
+ case ARM::VMSR_FPSCR_NZCVQC:
+ case ARM::FMSTAT:
+ case ARM::VMRS_VPR:
+ case ARM::VMRS_P0:
+ case ARM::VMSR_VPR:
+ case ARM::VMSR_P0:
+ // Use of SP for VMRS/VMSR is only allowed in ARM mode with the exception of
+ // ARMv8-A.
+ if (Inst.getOperand(0).isReg() && Inst.getOperand(0).getReg() == ARM::SP &&
+ (isThumb() && !hasV8Ops()))
+ return Match_InvalidOperand;
+ break;
+ default:
+ break;
+ }
for (unsigned I = 0; I < MCID.NumOperands; ++I)
if (MCID.OpInfo[I].RegClass == ARM::rGPRRegClassID) {
// rGPRRegClass excludes PC, and also excluded SP before ARMv8
- if ((Inst.getOperand(I).getReg() == ARM::SP) && !hasV8Ops())
+ const auto &Op = Inst.getOperand(I);
+ if (!Op.isReg()) {
+ // This can happen in awkward cases with tied operands, e.g. a
+ // writeback load/store with a complex addressing mode in
+ // which there's an output operand corresponding to the
+ // updated written-back base register: the Tablegen-generated
+ // AsmMatcher will have written a placeholder operand to that
+ // slot in the form of an immediate 0, because it can't
+ // generate the register part of the complex addressing-mode
+ // operand ahead of time.
+ continue;
+ }
+
+ unsigned Reg = Op.getReg();
+ if ((Reg == ARM::SP) && !hasV8Ops())
return Match_RequiresV8;
- else if (Inst.getOperand(I).getReg() == ARM::PC)
+ else if (Reg == ARM::PC)
return Match_InvalidOperand;
}
@@ -9268,7 +10369,7 @@ unsigned ARMAsmParser::MatchInstruction(OperandVector &Operands, MCInst &Inst,
return PlainMatchResult;
}
-static std::string ARMMnemonicSpellCheck(StringRef S, uint64_t FBS,
+static std::string ARMMnemonicSpellCheck(StringRef S, const FeatureBitset &FBS,
unsigned VariantID = 0);
static const char *getSubtargetFeatureName(uint64_t Val);
@@ -9296,6 +10397,7 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
// Still progress the IT block, otherwise one wrong condition causes
// nasty cascading errors.
forwardITPosition();
+ forwardVPTPosition();
return true;
}
@@ -9322,6 +10424,7 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
// and process gets a consistent answer about whether we're in an IT
// block.
forwardITPosition();
+ forwardVPTPosition();
// ITasm is an ARM mode pseudo-instruction that just sets the ITblock and
// doesn't actually encode.
@@ -9341,7 +10444,7 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
ReportNearMisses(NearMisses, IDLoc, Operands);
return true;
case Match_MnemonicFail: {
- uint64_t FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
+ FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
std::string Suggestion = ARMMnemonicSpellCheck(
((ARMOperand &)*Operands[0]).getToken(), FBS);
return Error(IDLoc, "invalid instruction" + Suggestion,
@@ -10384,11 +11487,11 @@ ARMAsmParser::getCustomOperandDiag(ARMMatchResultTy MatchError) {
: "operand must be a register in range [r0, r12] or r14";
// DPR contains 16 registers for some FPUs, and 32 for others.
case Match_DPR:
- return hasD16() ? "operand must be a register in range [d0, d15]"
- : "operand must be a register in range [d0, d31]";
+ return hasD32() ? "operand must be a register in range [d0, d31]"
+ : "operand must be a register in range [d0, d15]";
case Match_DPR_RegList:
- return hasD16() ? "operand must be a list of registers in range [d0, d15]"
- : "operand must be a list of registers in range [d0, d31]";
+ return hasD32() ? "operand must be a list of registers in range [d0, d31]"
+ : "operand must be a list of registers in range [d0, d15]";
// For all other diags, use the static string from tablegen.
default:
@@ -10416,7 +11519,7 @@ ARMAsmParser::FilterNearMisses(SmallVectorImpl<NearMissInfo> &NearMissesIn,
// variants of an instruction that take 8- and 16-bit immediates, we want
// to only report the widest one.
std::multimap<unsigned, unsigned> OperandMissesSeen;
- SmallSet<uint64_t, 4> FeatureMissesSeen;
+ SmallSet<FeatureBitset, 4> FeatureMissesSeen;
bool ReportedTooFewOperands = false;
// Process the near-misses in reverse order, so that we see more general ones
@@ -10467,7 +11570,7 @@ ARMAsmParser::FilterNearMisses(SmallVectorImpl<NearMissInfo> &NearMissesIn,
break;
}
case NearMissInfo::NearMissFeature: {
- uint64_t MissingFeatures = I.getFeatures();
+ const FeatureBitset &MissingFeatures = I.getFeatures();
// Don't report the same set of features twice.
if (FeatureMissesSeen.count(MissingFeatures))
break;
@@ -10475,20 +11578,21 @@ ARMAsmParser::FilterNearMisses(SmallVectorImpl<NearMissInfo> &NearMissesIn,
// Special case: don't report a feature set which includes arm-mode for
// targets that don't have ARM mode.
- if ((MissingFeatures & Feature_IsARM) && !hasARM())
+ if (MissingFeatures.test(Feature_IsARMBit) && !hasARM())
break;
// Don't report any near-misses that both require switching instruction
// set, and adding other subtarget features.
- if (isThumb() && (MissingFeatures & Feature_IsARM) &&
- (MissingFeatures & ~Feature_IsARM))
+ if (isThumb() && MissingFeatures.test(Feature_IsARMBit) &&
+ MissingFeatures.count() > 1)
break;
- if (!isThumb() && (MissingFeatures & Feature_IsThumb) &&
- (MissingFeatures & ~Feature_IsThumb))
+ if (!isThumb() && MissingFeatures.test(Feature_IsThumbBit) &&
+ MissingFeatures.count() > 1)
break;
- if (!isThumb() && (MissingFeatures & Feature_IsThumb2) &&
- (MissingFeatures & ~(Feature_IsThumb2 | Feature_IsThumb)))
+ if (!isThumb() && MissingFeatures.test(Feature_IsThumb2Bit) &&
+ (MissingFeatures & ~FeatureBitset({Feature_IsThumb2Bit,
+ Feature_IsThumbBit})).any())
break;
- if (isMClass() && (MissingFeatures & Feature_HasNEON))
+ if (isMClass() && MissingFeatures.test(Feature_HasNEONBit))
break;
NearMissMessage Message;
@@ -10496,14 +11600,10 @@ ARMAsmParser::FilterNearMisses(SmallVectorImpl<NearMissInfo> &NearMissesIn,
raw_svector_ostream OS(Message.Message);
OS << "instruction requires:";
- uint64_t Mask = 1;
- for (unsigned MaskPos = 0; MaskPos < (sizeof(MissingFeatures) * 8 - 1);
- ++MaskPos) {
- if (MissingFeatures & Mask) {
- OS << " " << getSubtargetFeatureName(MissingFeatures & Mask);
- }
- Mask <<= 1;
- }
+ for (unsigned i = 0, e = MissingFeatures.size(); i != e; ++i)
+ if (MissingFeatures.test(i))
+ OS << ' ' << getSubtargetFeatureName(i);
+
NearMissesOut.emplace_back(Message);
break;
@@ -10579,38 +11679,44 @@ void ARMAsmParser::ReportNearMisses(SmallVectorImpl<NearMissInfo> &NearMisses,
}
}
-// FIXME: This structure should be moved inside ARMTargetParser
-// when we start to table-generate them, and we can use the ARM
-// flags below, that were generated by table-gen.
-static const struct {
- const unsigned Kind;
- const uint64_t ArchCheck;
- const FeatureBitset Features;
-} Extensions[] = {
- { ARM::AEK_CRC, Feature_HasV8, {ARM::FeatureCRC} },
- { ARM::AEK_CRYPTO, Feature_HasV8,
- {ARM::FeatureCrypto, ARM::FeatureNEON, ARM::FeatureFPARMv8} },
- { ARM::AEK_FP, Feature_HasV8, {ARM::FeatureFPARMv8} },
- { (ARM::AEK_HWDIVTHUMB | ARM::AEK_HWDIVARM), Feature_HasV7 | Feature_IsNotMClass,
- {ARM::FeatureHWDivThumb, ARM::FeatureHWDivARM} },
- { ARM::AEK_MP, Feature_HasV7 | Feature_IsNotMClass, {ARM::FeatureMP} },
- { ARM::AEK_SIMD, Feature_HasV8, {ARM::FeatureNEON, ARM::FeatureFPARMv8} },
- { ARM::AEK_SEC, Feature_HasV6K, {ARM::FeatureTrustZone} },
- // FIXME: Only available in A-class, isel not predicated
- { ARM::AEK_VIRT, Feature_HasV7, {ARM::FeatureVirtualization} },
- { ARM::AEK_FP16, Feature_HasV8_2a, {ARM::FeatureFPARMv8, ARM::FeatureFullFP16} },
- { ARM::AEK_RAS, Feature_HasV8, {ARM::FeatureRAS} },
- // FIXME: Unsupported extensions.
- { ARM::AEK_OS, Feature_None, {} },
- { ARM::AEK_IWMMXT, Feature_None, {} },
- { ARM::AEK_IWMMXT2, Feature_None, {} },
- { ARM::AEK_MAVERICK, Feature_None, {} },
- { ARM::AEK_XSCALE, Feature_None, {} },
-};
-
/// parseDirectiveArchExtension
/// ::= .arch_extension [no]feature
bool ARMAsmParser::parseDirectiveArchExtension(SMLoc L) {
+ // FIXME: This structure should be moved inside ARMTargetParser
+ // when we start to table-generate them, and we can use the ARM
+ // flags below, that were generated by table-gen.
+ static const struct {
+ const unsigned Kind;
+ const FeatureBitset ArchCheck;
+ const FeatureBitset Features;
+ } Extensions[] = {
+ { ARM::AEK_CRC, {Feature_HasV8Bit}, {ARM::FeatureCRC} },
+ { ARM::AEK_CRYPTO, {Feature_HasV8Bit},
+ {ARM::FeatureCrypto, ARM::FeatureNEON, ARM::FeatureFPARMv8} },
+ { ARM::AEK_FP, {Feature_HasV8Bit},
+ {ARM::FeatureVFP2_D16_SP, ARM::FeatureFPARMv8} },
+ { (ARM::AEK_HWDIVTHUMB | ARM::AEK_HWDIVARM),
+ {Feature_HasV7Bit, Feature_IsNotMClassBit},
+ {ARM::FeatureHWDivThumb, ARM::FeatureHWDivARM} },
+ { ARM::AEK_MP, {Feature_HasV7Bit, Feature_IsNotMClassBit},
+ {ARM::FeatureMP} },
+ { ARM::AEK_SIMD, {Feature_HasV8Bit},
+ {ARM::FeatureNEON, ARM::FeatureVFP2_D16_SP, ARM::FeatureFPARMv8} },
+ { ARM::AEK_SEC, {Feature_HasV6KBit}, {ARM::FeatureTrustZone} },
+ // FIXME: Only available in A-class, isel not predicated
+ { ARM::AEK_VIRT, {Feature_HasV7Bit}, {ARM::FeatureVirtualization} },
+ { ARM::AEK_FP16, {Feature_HasV8_2aBit},
+ {ARM::FeatureFPARMv8, ARM::FeatureFullFP16} },
+ { ARM::AEK_RAS, {Feature_HasV8Bit}, {ARM::FeatureRAS} },
+ { ARM::AEK_LOB, {Feature_HasV8_1MMainlineBit}, {ARM::FeatureLOB} },
+ // FIXME: Unsupported extensions.
+ { ARM::AEK_OS, {}, {} },
+ { ARM::AEK_IWMMXT, {}, {} },
+ { ARM::AEK_IWMMXT2, {}, {} },
+ { ARM::AEK_MAVERICK, {}, {} },
+ { ARM::AEK_XSCALE, {}, {} },
+ };
+
MCAsmParser &Parser = getParser();
if (getLexer().isNot(AsmToken::Identifier))
@@ -10646,12 +11752,12 @@ bool ARMAsmParser::parseDirectiveArchExtension(SMLoc L) {
"allowed for the current base architecture");
MCSubtargetInfo &STI = copySTI();
- FeatureBitset ToggleFeatures = EnableFeature
- ? (~STI.getFeatureBits() & Extension.Features)
- : ( STI.getFeatureBits() & Extension.Features);
-
- uint64_t Features =
- ComputeAvailableFeatures(STI.ToggleFeature(ToggleFeatures));
+ if (EnableFeature) {
+ STI.SetFeatureBitsTransitively(Extension.Features);
+ } else {
+ STI.ClearFeatureBitsTransitively(Extension.Features);
+ }
+ FeatureBitset Features = ComputeAvailableFeatures(STI.getFeatureBits());
setAvailableFeatures(Features);
return false;
}
@@ -10675,6 +11781,18 @@ unsigned ARMAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
if (CE->getValue() == 0)
return Match_Success;
break;
+ case MCK__35_8:
+ if (Op.isImm())
+ if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op.getImm()))
+ if (CE->getValue() == 8)
+ return Match_Success;
+ break;
+ case MCK__35_16:
+ if (Op.isImm())
+ if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op.getImm()))
+ if (CE->getValue() == 16)
+ return Match_Success;
+ break;
case MCK_ModImm:
if (Op.isImm()) {
const MCExpr *SOExpr = Op.getImm();
@@ -10698,3 +11816,76 @@ unsigned ARMAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
}
return Match_InvalidOperand;
}
+
+bool ARMAsmParser::isMnemonicVPTPredicable(StringRef Mnemonic,
+ StringRef ExtraToken) {
+ if (!hasMVE())
+ return false;
+
+ return Mnemonic.startswith("vabav") || Mnemonic.startswith("vaddv") ||
+ Mnemonic.startswith("vaddlv") || Mnemonic.startswith("vminnmv") ||
+ Mnemonic.startswith("vminnmav") || Mnemonic.startswith("vminv") ||
+ Mnemonic.startswith("vminav") || Mnemonic.startswith("vmaxnmv") ||
+ Mnemonic.startswith("vmaxnmav") || Mnemonic.startswith("vmaxv") ||
+ Mnemonic.startswith("vmaxav") || Mnemonic.startswith("vmladav") ||
+ Mnemonic.startswith("vrmlaldavh") || Mnemonic.startswith("vrmlalvh") ||
+ Mnemonic.startswith("vmlsdav") || Mnemonic.startswith("vmlav") ||
+ Mnemonic.startswith("vmlaldav") || Mnemonic.startswith("vmlalv") ||
+ Mnemonic.startswith("vmaxnm") || Mnemonic.startswith("vminnm") ||
+ Mnemonic.startswith("vmax") || Mnemonic.startswith("vmin") ||
+ Mnemonic.startswith("vshlc") || Mnemonic.startswith("vmovlt") ||
+ Mnemonic.startswith("vmovlb") || Mnemonic.startswith("vshll") ||
+ Mnemonic.startswith("vrshrn") || Mnemonic.startswith("vshrn") ||
+ Mnemonic.startswith("vqrshrun") || Mnemonic.startswith("vqshrun") ||
+ Mnemonic.startswith("vqrshrn") || Mnemonic.startswith("vqshrn") ||
+ Mnemonic.startswith("vbic") || Mnemonic.startswith("vrev64") ||
+ Mnemonic.startswith("vrev32") || Mnemonic.startswith("vrev16") ||
+ Mnemonic.startswith("vmvn") || Mnemonic.startswith("veor") ||
+ Mnemonic.startswith("vorn") || Mnemonic.startswith("vorr") ||
+ Mnemonic.startswith("vand") || Mnemonic.startswith("vmul") ||
+ Mnemonic.startswith("vqrdmulh") || Mnemonic.startswith("vqdmulh") ||
+ Mnemonic.startswith("vsub") || Mnemonic.startswith("vadd") ||
+ Mnemonic.startswith("vqsub") || Mnemonic.startswith("vqadd") ||
+ Mnemonic.startswith("vabd") || Mnemonic.startswith("vrhadd") ||
+ Mnemonic.startswith("vhsub") || Mnemonic.startswith("vhadd") ||
+ Mnemonic.startswith("vdup") || Mnemonic.startswith("vcls") ||
+ Mnemonic.startswith("vclz") || Mnemonic.startswith("vneg") ||
+ Mnemonic.startswith("vabs") || Mnemonic.startswith("vqneg") ||
+ Mnemonic.startswith("vqabs") ||
+ (Mnemonic.startswith("vrint") && Mnemonic != "vrintr") ||
+ Mnemonic.startswith("vcmla") || Mnemonic.startswith("vfma") ||
+ Mnemonic.startswith("vfms") || Mnemonic.startswith("vcadd") ||
+ Mnemonic.startswith("vadd") || Mnemonic.startswith("vsub") ||
+ Mnemonic.startswith("vshl") || Mnemonic.startswith("vqshl") ||
+ Mnemonic.startswith("vqrshl") || Mnemonic.startswith("vrshl") ||
+ Mnemonic.startswith("vsri") || Mnemonic.startswith("vsli") ||
+ Mnemonic.startswith("vrshr") || Mnemonic.startswith("vshr") ||
+ Mnemonic.startswith("vpsel") || Mnemonic.startswith("vcmp") ||
+ Mnemonic.startswith("vqdmladh") || Mnemonic.startswith("vqrdmladh") ||
+ Mnemonic.startswith("vqdmlsdh") || Mnemonic.startswith("vqrdmlsdh") ||
+ Mnemonic.startswith("vcmul") || Mnemonic.startswith("vrmulh") ||
+ Mnemonic.startswith("vqmovn") || Mnemonic.startswith("vqmovun") ||
+ Mnemonic.startswith("vmovnt") || Mnemonic.startswith("vmovnb") ||
+ Mnemonic.startswith("vmaxa") || Mnemonic.startswith("vmaxnma") ||
+ Mnemonic.startswith("vhcadd") || Mnemonic.startswith("vadc") ||
+ Mnemonic.startswith("vsbc") || Mnemonic.startswith("vrshr") ||
+ Mnemonic.startswith("vshr") || Mnemonic.startswith("vstrb") ||
+ Mnemonic.startswith("vldrb") ||
+ (Mnemonic.startswith("vstrh") && Mnemonic != "vstrhi") ||
+ (Mnemonic.startswith("vldrh") && Mnemonic != "vldrhi") ||
+ Mnemonic.startswith("vstrw") || Mnemonic.startswith("vldrw") ||
+ Mnemonic.startswith("vldrd") || Mnemonic.startswith("vstrd") ||
+ Mnemonic.startswith("vqdmull") || Mnemonic.startswith("vbrsr") ||
+ Mnemonic.startswith("vfmas") || Mnemonic.startswith("vmlas") ||
+ Mnemonic.startswith("vmla") || Mnemonic.startswith("vqdmlash") ||
+ Mnemonic.startswith("vqdmlah") || Mnemonic.startswith("vqrdmlash") ||
+ Mnemonic.startswith("vqrdmlah") || Mnemonic.startswith("viwdup") ||
+ Mnemonic.startswith("vdwdup") || Mnemonic.startswith("vidup") ||
+ Mnemonic.startswith("vddup") || Mnemonic.startswith("vctp") ||
+ Mnemonic.startswith("vpnot") || Mnemonic.startswith("vbic") ||
+ Mnemonic.startswith("vrmlsldavh") || Mnemonic.startswith("vmlsldav") ||
+ Mnemonic.startswith("vcvt") ||
+ (Mnemonic.startswith("vmov") &&
+ !(ExtraToken == ".f16" || ExtraToken == ".32" ||
+ ExtraToken == ".16" || ExtraToken == ".8"));
+}
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 61bec04678dd..673691ebd93e 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -1,15 +1,16 @@
//===- ARMDisassembler.cpp - Disassembler for ARM/Thumb ISA ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
+#include "ARMBaseInstrInfo.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "MCTargetDesc/ARMBaseInfo.h"
#include "MCTargetDesc/ARMMCTargetDesc.h"
+#include "TargetInfo/ARMTargetInfo.h"
#include "Utils/ARMBaseInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
@@ -63,22 +64,19 @@ namespace {
return ITStates.size() == 1;
}
- // Called when decoding an IT instruction. Sets the IT state for the following
- // instructions that for the IT block. Firstcond and Mask correspond to the
- // fields in the IT instruction encoding.
+ // Called when decoding an IT instruction. Sets the IT state for
+ // the following instructions that for the IT block. Firstcond
+ // corresponds to the field in the IT instruction encoding; Mask
+ // is in the MCOperand format in which 1 means 'else' and 0 'then'.
void setITState(char Firstcond, char Mask) {
// (3 - the number of trailing zeros) is the number of then / else.
- unsigned CondBit0 = Firstcond & 1;
unsigned NumTZ = countTrailingZeros<uint8_t>(Mask);
unsigned char CCBits = static_cast<unsigned char>(Firstcond & 0xf);
assert(NumTZ <= 3 && "Invalid IT mask!");
// push condition codes onto the stack the correct order for the pops
for (unsigned Pos = NumTZ+1; Pos <= 3; ++Pos) {
- bool T = ((Mask >> Pos) & 1) == CondBit0;
- if (T)
- ITStates.push_back(CCBits);
- else
- ITStates.push_back(CCBits ^ 1);
+ unsigned Else = (Mask >> Pos) & 1;
+ ITStates.push_back(CCBits ^ Else);
}
ITStates.push_back(CCBits);
}
@@ -87,6 +85,47 @@ namespace {
std::vector<unsigned char> ITStates;
};
+ class VPTStatus
+ {
+ public:
+ unsigned getVPTPred() {
+ unsigned Pred = ARMVCC::None;
+ if (instrInVPTBlock())
+ Pred = VPTStates.back();
+ return Pred;
+ }
+
+ void advanceVPTState() {
+ VPTStates.pop_back();
+ }
+
+ bool instrInVPTBlock() {
+ return !VPTStates.empty();
+ }
+
+ bool instrLastInVPTBlock() {
+ return VPTStates.size() == 1;
+ }
+
+ void setVPTState(char Mask) {
+ // (3 - the number of trailing zeros) is the number of then / else.
+ unsigned NumTZ = countTrailingZeros<uint8_t>(Mask);
+ assert(NumTZ <= 3 && "Invalid VPT mask!");
+ // push predicates onto the stack the correct order for the pops
+ for (unsigned Pos = NumTZ+1; Pos <= 3; ++Pos) {
+ bool T = ((Mask >> Pos) & 1) == 0;
+ if (T)
+ VPTStates.push_back(ARMVCC::Then);
+ else
+ VPTStates.push_back(ARMVCC::Else);
+ }
+ VPTStates.push_back(ARMVCC::Then);
+ }
+
+ private:
+ SmallVector<unsigned char, 4> VPTStates;
+ };
+
/// ARM disassembler for all ARM platforms.
class ARMDisassembler : public MCDisassembler {
public:
@@ -100,27 +139,23 @@ public:
ArrayRef<uint8_t> Bytes, uint64_t Address,
raw_ostream &VStream,
raw_ostream &CStream) const override;
-};
-/// Thumb disassembler for all Thumb platforms.
-class ThumbDisassembler : public MCDisassembler {
-public:
- ThumbDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) :
- MCDisassembler(STI, Ctx) {
- }
-
- ~ThumbDisassembler() override = default;
+private:
+ DecodeStatus getARMInstruction(MCInst &Instr, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes, uint64_t Address,
+ raw_ostream &VStream,
+ raw_ostream &CStream) const;
- DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
- ArrayRef<uint8_t> Bytes, uint64_t Address,
- raw_ostream &VStream,
- raw_ostream &CStream) const override;
+ DecodeStatus getThumbInstruction(MCInst &Instr, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes, uint64_t Address,
+ raw_ostream &VStream,
+ raw_ostream &CStream) const;
-private:
mutable ITStatus ITBlock;
+ mutable VPTStatus VPTBlock;
DecodeStatus AddThumbPredicate(MCInst&) const;
- void UpdateThumbVFPPredicate(MCInst&) const;
+ void UpdateThumbVFPPredicate(DecodeStatus &, MCInst&) const;
};
} // end anonymous namespace
@@ -144,12 +179,23 @@ static bool Check(DecodeStatus &Out, DecodeStatus In) {
// Definitions are further down.
static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeCLRMGPRRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodetGPROddRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodetGPREvenRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
static DecodeStatus DecodeGPRnopcRegisterClass(MCInst &Inst,
unsigned RegNo, uint64_t Address,
const void *Decoder);
static DecodeStatus DecodeGPRwithAPSRRegisterClass(MCInst &Inst,
unsigned RegNo, uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeGPRwithZRRegisterClass(MCInst &Inst,
+ unsigned RegNo, uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeGPRwithZRnospRegisterClass(
+ MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder);
static DecodeStatus DecodetGPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodetcGPRRegisterClass(MCInst &Inst, unsigned RegNo,
@@ -166,12 +212,20 @@ static DecodeStatus DecodeDPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeDPR_8RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeSPR_8RegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
static DecodeStatus DecodeDPR_VFP2RegisterClass(MCInst &Inst,
unsigned RegNo,
uint64_t Address,
const void *Decoder);
static DecodeStatus DecodeQPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeMQPRRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeQQPRRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeQQQQPRRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
static DecodeStatus DecodeDPairRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeDPairSpacedRegisterClass(MCInst &Inst,
@@ -262,6 +316,10 @@ static DecodeStatus DecodeVLD4DupInstruction(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeNEONModImmInstruction(MCInst &Inst,unsigned Val,
uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeMVEModImmInstruction(MCInst &Inst,unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeMVEVADCInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
static DecodeStatus DecodeVSHLMaxInstruction(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeShiftRight8Imm(MCInst &Inst, unsigned Val,
@@ -276,6 +334,11 @@ static DecodeStatus DecodeTBLInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodePostIdxReg(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeMveAddrModeRQ(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+template<int shift>
+static DecodeStatus DecodeMveAddrModeQ(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
static DecodeStatus DecodeCoprocessor(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeMemBarrierOption(MCInst &Inst, unsigned Insn,
@@ -324,6 +387,8 @@ static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVCVTImmOperand(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
static DecodeStatus DecodeNEONComplexLane64Instruction(MCInst &Inst,
unsigned Val,
uint64_t Address,
@@ -359,14 +424,28 @@ static DecodeStatus DecodeT2LoadLabel(MCInst &Inst, unsigned Insn,
uint64_t Address, const void* Decoder);
static DecodeStatus DecodeT2Imm8S4(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2Imm7S4(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
static DecodeStatus DecodeT2AddrModeImm8s4(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2AddrModeImm7s4(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const void *Decoder);
static DecodeStatus DecodeT2AddrModeImm0_1020s4(MCInst &Inst,unsigned Val,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeT2Imm8(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
+template<int shift>
+static DecodeStatus DecodeT2Imm7(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
static DecodeStatus DecodeT2AddrModeImm8(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
+template<int shift>
+static DecodeStatus DecodeTAddrModeImm7(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+template<int shift, int WriteBack>
+static DecodeStatus DecodeT2AddrModeImm7(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
static DecodeStatus DecodeThumbAddSPImm(MCInst &Inst, uint16_t Val,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeThumbAddSPReg(MCInst &Inst, uint16_t Insn,
@@ -409,6 +488,82 @@ static DecodeStatus DecoderForMRRC2AndMCRR2(MCInst &Inst, unsigned Val,
static DecodeStatus DecodeForVMRSandVMSR(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
+template <bool isSigned, bool isNeg, bool zeroPermitted, int size>
+static DecodeStatus DecodeBFLabelOperand(MCInst &Inst, unsigned val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeBFAfterTargetOperand(MCInst &Inst, unsigned val,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodePredNoALOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeLOLoop(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeLongShiftOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeVSCCLRM(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeVPTMaskOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVpredROperand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeRestrictedIPredicateOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeRestrictedSPredicateOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeRestrictedUPredicateOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeRestrictedFPPredicateOperand(MCInst &Inst,
+ unsigned Val,
+ uint64_t Address,
+ const void *Decoder);
+template<bool Writeback>
+static DecodeStatus DecodeVSTRVLDR_SYSREG(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+template<int shift>
+static DecodeStatus DecodeMVE_MEM_1_pre(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+template<int shift>
+static DecodeStatus DecodeMVE_MEM_2_pre(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+template<int shift>
+static DecodeStatus DecodeMVE_MEM_3_pre(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+template<unsigned MinLog, unsigned MaxLog>
+static DecodeStatus DecodePowerTwoOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const void *Decoder);
+template <int shift>
+static DecodeStatus DecodeExpandedImmOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const void *Decoder);
+template<unsigned start>
+static DecodeStatus DecodeMVEPairVectorIndexOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeMVEVMOVQtoDReg(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeMVEVMOVDRegtoQ(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeMVEVCVTt1fp(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+typedef DecodeStatus OperandDecoder(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+template<bool scalar, OperandDecoder predicate_decoder>
+static DecodeStatus DecodeMVEVCMP(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeMveVCTP(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeMVEOverlappingLongShift(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
#include "ARMGenDisassemblerTables.inc"
static MCDisassembler *createARMDisassembler(const Target &T,
@@ -417,12 +572,6 @@ static MCDisassembler *createARMDisassembler(const Target &T,
return new ARMDisassembler(STI, Ctx);
}
-static MCDisassembler *createThumbDisassembler(const Target &T,
- const MCSubtargetInfo &STI,
- MCContext &Ctx) {
- return new ThumbDisassembler(STI, Ctx);
-}
-
// Post-decoding checks
static DecodeStatus checkDecodedInstruction(MCInst &MI, uint64_t &Size,
uint64_t Address, raw_ostream &OS,
@@ -440,6 +589,18 @@ static DecodeStatus checkDecodedInstruction(MCInst &MI, uint64_t &Size,
return MCDisassembler::SoftFail;
return Result;
}
+ case ARM::t2ADDri:
+ case ARM::t2ADDri12:
+ case ARM::t2ADDrr:
+ case ARM::t2ADDrs:
+ case ARM::t2SUBri:
+ case ARM::t2SUBri12:
+ case ARM::t2SUBrr:
+ case ARM::t2SUBrs:
+ if (MI.getOperand(0).getReg() == ARM::SP &&
+ MI.getOperand(1).getReg() != ARM::SP)
+ return MCDisassembler::SoftFail;
+ return Result;
default: return Result;
}
}
@@ -448,6 +609,16 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
ArrayRef<uint8_t> Bytes,
uint64_t Address, raw_ostream &OS,
raw_ostream &CS) const {
+ if (STI.getFeatureBits()[ARM::ModeThumb])
+ return getThumbInstruction(MI, Size, Bytes, Address, OS, CS);
+ return getARMInstruction(MI, Size, Bytes, Address, OS, CS);
+}
+
+DecodeStatus ARMDisassembler::getARMInstruction(MCInst &MI, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes,
+ uint64_t Address,
+ raw_ostream &OS,
+ raw_ostream &CS) const {
CommentStream = &CS;
assert(!STI.getFeatureBits()[ARM::ModeThumb] &&
@@ -569,12 +740,22 @@ static void AddThumb1SBit(MCInst &MI, bool InITBlock) {
MI.insert(I, MCOperand::createReg(InITBlock ? 0 : ARM::CPSR));
}
+static bool isVectorPredicable(unsigned Opcode) {
+ const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+ unsigned short NumOps = ARMInsts[Opcode].NumOperands;
+ for (unsigned i = 0; i < NumOps; ++i) {
+ if (ARM::isVpred(OpInfo[i].OperandType))
+ return true;
+ }
+ return false;
+}
+
// Most Thumb instructions don't have explicit predicates in the
// encoding, but rather get their predicates from IT context. We need
// to fix up the predicate operands using this context information as a
// post-pass.
MCDisassembler::DecodeStatus
-ThumbDisassembler::AddThumbPredicate(MCInst &MI) const {
+ARMDisassembler::AddThumbPredicate(MCInst &MI) const {
MCDisassembler::DecodeStatus S = Success;
const FeatureBitset &FeatureBits = getSubtargetInfo().getFeatureBits();
@@ -590,6 +771,10 @@ ThumbDisassembler::AddThumbPredicate(MCInst &MI) const {
case ARM::t2CPS3p:
case ARM::t2CPS2p:
case ARM::t2CPS1p:
+ case ARM::t2CSEL:
+ case ARM::t2CSINC:
+ case ARM::t2CSINV:
+ case ARM::t2CSNEG:
case ARM::tMOVSr:
case ARM::tSETEND:
// Some instructions (mostly conditional branches) are not
@@ -616,37 +801,66 @@ ThumbDisassembler::AddThumbPredicate(MCInst &MI) const {
break;
}
- // If we're in an IT block, base the predicate on that. Otherwise,
+ // Warn on non-VPT predicable instruction in a VPT block and a VPT
+ // predicable instruction in an IT block
+ if ((!isVectorPredicable(MI.getOpcode()) && VPTBlock.instrInVPTBlock()) ||
+ (isVectorPredicable(MI.getOpcode()) && ITBlock.instrInITBlock()))
+ S = SoftFail;
+
+ // If we're in an IT/VPT block, base the predicate on that. Otherwise,
// assume a predicate of AL.
- unsigned CC;
- CC = ITBlock.getITCC();
- if (CC == 0xF)
- CC = ARMCC::AL;
- if (ITBlock.instrInITBlock())
+ unsigned CC = ARMCC::AL;
+ unsigned VCC = ARMVCC::None;
+ if (ITBlock.instrInITBlock()) {
+ CC = ITBlock.getITCC();
ITBlock.advanceITState();
+ } else if (VPTBlock.instrInVPTBlock()) {
+ VCC = VPTBlock.getVPTPred();
+ VPTBlock.advanceVPTState();
+ }
const MCOperandInfo *OpInfo = ARMInsts[MI.getOpcode()].OpInfo;
unsigned short NumOps = ARMInsts[MI.getOpcode()].NumOperands;
- MCInst::iterator I = MI.begin();
- for (unsigned i = 0; i < NumOps; ++i, ++I) {
- if (I == MI.end()) break;
- if (OpInfo[i].isPredicate()) {
- I = MI.insert(I, MCOperand::createImm(CC));
- ++I;
- if (CC == ARMCC::AL)
- MI.insert(I, MCOperand::createReg(0));
- else
- MI.insert(I, MCOperand::createReg(ARM::CPSR));
- return S;
- }
+
+ MCInst::iterator CCI = MI.begin();
+ for (unsigned i = 0; i < NumOps; ++i, ++CCI) {
+ if (OpInfo[i].isPredicate() || CCI == MI.end()) break;
}
- I = MI.insert(I, MCOperand::createImm(CC));
- ++I;
- if (CC == ARMCC::AL)
- MI.insert(I, MCOperand::createReg(0));
- else
- MI.insert(I, MCOperand::createReg(ARM::CPSR));
+ if (ARMInsts[MI.getOpcode()].isPredicable()) {
+ CCI = MI.insert(CCI, MCOperand::createImm(CC));
+ ++CCI;
+ if (CC == ARMCC::AL)
+ MI.insert(CCI, MCOperand::createReg(0));
+ else
+ MI.insert(CCI, MCOperand::createReg(ARM::CPSR));
+ } else if (CC != ARMCC::AL) {
+ Check(S, SoftFail);
+ }
+
+ MCInst::iterator VCCI = MI.begin();
+ unsigned VCCPos;
+ for (VCCPos = 0; VCCPos < NumOps; ++VCCPos, ++VCCI) {
+ if (ARM::isVpred(OpInfo[VCCPos].OperandType) || VCCI == MI.end()) break;
+ }
+
+ if (isVectorPredicable(MI.getOpcode())) {
+ VCCI = MI.insert(VCCI, MCOperand::createImm(VCC));
+ ++VCCI;
+ if (VCC == ARMVCC::None)
+ MI.insert(VCCI, MCOperand::createReg(0));
+ else
+ MI.insert(VCCI, MCOperand::createReg(ARM::P0));
+ if (OpInfo[VCCPos].OperandType == ARM::OPERAND_VPRED_R) {
+ int TiedOp = ARMInsts[MI.getOpcode()].getOperandConstraint(
+ VCCPos + 2, MCOI::TIED_TO);
+ assert(TiedOp >= 0 &&
+ "Inactive register in vpred_r is not tied to an output!");
+ MI.insert(VCCI, MI.getOperand(TiedOp));
+ }
+ } else if (VCC != ARMVCC::None) {
+ Check(S, SoftFail);
+ }
return S;
}
@@ -656,19 +870,26 @@ ThumbDisassembler::AddThumbPredicate(MCInst &MI) const {
// mode, the auto-generated decoder will give them an (incorrect)
// predicate operand. We need to rewrite these operands based on the IT
// context as a post-pass.
-void ThumbDisassembler::UpdateThumbVFPPredicate(MCInst &MI) const {
+void ARMDisassembler::UpdateThumbVFPPredicate(
+ DecodeStatus &S, MCInst &MI) const {
unsigned CC;
CC = ITBlock.getITCC();
if (CC == 0xF)
CC = ARMCC::AL;
if (ITBlock.instrInITBlock())
ITBlock.advanceITState();
+ else if (VPTBlock.instrInVPTBlock()) {
+ CC = VPTBlock.getVPTPred();
+ VPTBlock.advanceVPTState();
+ }
const MCOperandInfo *OpInfo = ARMInsts[MI.getOpcode()].OpInfo;
MCInst::iterator I = MI.begin();
unsigned short NumOps = ARMInsts[MI.getOpcode()].NumOperands;
for (unsigned i = 0; i < NumOps; ++i, ++I) {
if (OpInfo[i].isPredicate() ) {
+ if (CC != ARMCC::AL && !ARMInsts[MI.getOpcode()].isPredicable())
+ Check(S, SoftFail);
I->setImm(CC);
++I;
if (CC == ARMCC::AL)
@@ -680,11 +901,11 @@ void ThumbDisassembler::UpdateThumbVFPPredicate(MCInst &MI) const {
}
}
-DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
- ArrayRef<uint8_t> Bytes,
- uint64_t Address,
- raw_ostream &OS,
- raw_ostream &CS) const {
+DecodeStatus ARMDisassembler::getThumbInstruction(MCInst &MI, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes,
+ uint64_t Address,
+ raw_ostream &OS,
+ raw_ostream &CS) const {
CommentStream = &CS;
assert(STI.getFeatureBits()[ARM::ModeThumb] &&
@@ -751,6 +972,27 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
uint32_t Insn32 =
(Bytes[3] << 8) | (Bytes[2] << 0) | (Bytes[1] << 24) | (Bytes[0] << 16);
+
+ Result =
+ decodeInstruction(DecoderTableMVE32, MI, Insn32, Address, this, STI);
+ if (Result != MCDisassembler::Fail) {
+ Size = 4;
+
+ // Nested VPT blocks are UNPREDICTABLE. Must be checked before we add
+ // the VPT predicate.
+ if (isVPTOpcode(MI.getOpcode()) && VPTBlock.instrInVPTBlock())
+ Result = MCDisassembler::SoftFail;
+
+ Check(Result, AddThumbPredicate(MI));
+
+ if (isVPTOpcode(MI.getOpcode())) {
+ unsigned Mask = MI.getOperand(0).getImm();
+ VPTBlock.setVPTState(Mask);
+ }
+
+ return Result;
+ }
+
Result =
decodeInstruction(DecoderTableThumb32, MI, Insn32, Address, this, STI);
if (Result != MCDisassembler::Fail) {
@@ -766,7 +1008,7 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
if (Result != MCDisassembler::Fail) {
Size = 4;
Check(Result, AddThumbPredicate(MI));
- return Result;
+ return checkDecodedInstruction(MI, Size, Address, OS, CS, Insn32, Result);
}
if (fieldFromInstruction(Insn32, 28, 4) == 0xE) {
@@ -774,7 +1016,7 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
decodeInstruction(DecoderTableVFP32, MI, Insn32, Address, this, STI);
if (Result != MCDisassembler::Fail) {
Size = 4;
- UpdateThumbVFPPredicate(MI);
+ UpdateThumbVFPPredicate(Result, MI);
return Result;
}
}
@@ -861,9 +1103,9 @@ extern "C" void LLVMInitializeARMDisassembler() {
TargetRegistry::RegisterMCDisassembler(getTheARMBETarget(),
createARMDisassembler);
TargetRegistry::RegisterMCDisassembler(getTheThumbLETarget(),
- createThumbDisassembler);
+ createARMDisassembler);
TargetRegistry::RegisterMCDisassembler(getTheThumbBETarget(),
- createThumbDisassembler);
+ createARMDisassembler);
}
static const uint16_t GPRDecoderTable[] = {
@@ -873,6 +1115,13 @@ static const uint16_t GPRDecoderTable[] = {
ARM::R12, ARM::SP, ARM::LR, ARM::PC
};
+static const uint16_t CLRMGPRDecoderTable[] = {
+ ARM::R0, ARM::R1, ARM::R2, ARM::R3,
+ ARM::R4, ARM::R5, ARM::R6, ARM::R7,
+ ARM::R8, ARM::R9, ARM::R10, ARM::R11,
+ ARM::R12, 0, ARM::LR, ARM::APSR
+};
+
static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder) {
if (RegNo > 15)
@@ -883,6 +1132,20 @@ static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, unsigned RegNo,
return MCDisassembler::Success;
}
+static DecodeStatus DecodeCLRMGPRRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ if (RegNo > 15)
+ return MCDisassembler::Fail;
+
+ unsigned Register = CLRMGPRDecoderTable[RegNo];
+ if (Register == 0)
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::createReg(Register));
+ return MCDisassembler::Success;
+}
+
static DecodeStatus
DecodeGPRnopcRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder) {
@@ -911,6 +1174,34 @@ DecodeGPRwithAPSRRegisterClass(MCInst &Inst, unsigned RegNo,
return S;
}
+static DecodeStatus
+DecodeGPRwithZRRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ if (RegNo == 15)
+ {
+ Inst.addOperand(MCOperand::createReg(ARM::ZR));
+ return MCDisassembler::Success;
+ }
+
+ if (RegNo == 13)
+ Check(S, MCDisassembler::SoftFail);
+
+ Check(S, DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder));
+ return S;
+}
+
+static DecodeStatus
+DecodeGPRwithZRnospRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+ if (RegNo == 13)
+ return MCDisassembler::Fail;
+ Check(S, DecodeGPRwithZRRegisterClass(Inst, RegNo, Address, Decoder));
+ return S;
+}
+
static DecodeStatus DecodetGPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder) {
if (RegNo > 7)
@@ -1024,9 +1315,9 @@ static DecodeStatus DecodeDPRRegisterClass(MCInst &Inst, unsigned RegNo,
const FeatureBitset &featureBits =
((const MCDisassembler*)Decoder)->getSubtargetInfo().getFeatureBits();
- bool hasD16 = featureBits[ARM::FeatureD16];
+ bool hasD32 = featureBits[ARM::FeatureD32];
- if (RegNo > 31 || (hasD16 && RegNo > 15))
+ if (RegNo > 31 || (!hasD32 && RegNo > 15))
return MCDisassembler::Fail;
unsigned Register = DPRDecoderTable[RegNo];
@@ -1041,6 +1332,13 @@ static DecodeStatus DecodeDPR_8RegisterClass(MCInst &Inst, unsigned RegNo,
return DecodeDPRRegisterClass(Inst, RegNo, Address, Decoder);
}
+static DecodeStatus DecodeSPR_8RegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 15)
+ return MCDisassembler::Fail;
+ return DecodeSPRRegisterClass(Inst, RegNo, Address, Decoder);
+}
+
static DecodeStatus
DecodeDPR_VFP2RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder) {
@@ -1111,16 +1409,19 @@ static DecodeStatus DecodeDPairSpacedRegisterClass(MCInst &Inst,
static DecodeStatus DecodePredicateOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
if (Val == 0xF) return MCDisassembler::Fail;
// AL predicate is not allowed on Thumb1 branches.
if (Inst.getOpcode() == ARM::tBcc && Val == 0xE)
return MCDisassembler::Fail;
+ if (Val != ARMCC::AL && !ARMInsts[Inst.getOpcode()].isPredicable())
+ Check(S, MCDisassembler::SoftFail);
Inst.addOperand(MCOperand::createImm(Val));
if (Val == ARMCC::AL) {
Inst.addOperand(MCOperand::createReg(0));
} else
Inst.addOperand(MCOperand::createReg(ARM::CPSR));
- return MCDisassembler::Success;
+ return S;
}
static DecodeStatus DecodeCCOutOperand(MCInst &Inst, unsigned Val,
@@ -1210,6 +1511,7 @@ static DecodeStatus DecodeRegListOperand(MCInst &Inst, unsigned Val,
bool NeedDisjointWriteback = false;
unsigned WritebackReg = 0;
+ bool CLRM = false;
switch (Inst.getOpcode()) {
default:
break;
@@ -1224,17 +1526,26 @@ static DecodeStatus DecodeRegListOperand(MCInst &Inst, unsigned Val,
NeedDisjointWriteback = true;
WritebackReg = Inst.getOperand(0).getReg();
break;
+ case ARM::t2CLRM:
+ CLRM = true;
+ break;
}
// Empty register lists are not allowed.
if (Val == 0) return MCDisassembler::Fail;
for (unsigned i = 0; i < 16; ++i) {
if (Val & (1 << i)) {
- if (!Check(S, DecodeGPRRegisterClass(Inst, i, Address, Decoder)))
- return MCDisassembler::Fail;
- // Writeback not allowed if Rn is in the target list.
- if (NeedDisjointWriteback && WritebackReg == Inst.end()[-1].getReg())
- Check(S, MCDisassembler::SoftFail);
+ if (CLRM) {
+ if (!Check(S, DecodeCLRMGPRRegisterClass(Inst, i, Address, Decoder))) {
+ return MCDisassembler::Fail;
+ }
+ } else {
+ if (!Check(S, DecodeGPRRegisterClass(Inst, i, Address, Decoder)))
+ return MCDisassembler::Fail;
+ // Writeback not allowed if Rn is in the target list.
+ if (NeedDisjointWriteback && WritebackReg == Inst.end()[-1].getReg())
+ Check(S, MCDisassembler::SoftFail);
+ }
}
}
@@ -1327,6 +1638,8 @@ static DecodeStatus DecodeCopMemInstruction(MCInst &Inst, unsigned Insn,
unsigned imm = fieldFromInstruction(Insn, 0, 8);
unsigned Rn = fieldFromInstruction(Insn, 16, 4);
unsigned U = fieldFromInstruction(Insn, 23, 1);
+ const FeatureBitset &featureBits =
+ ((const MCDisassembler*)Decoder)->getSubtargetInfo().getFeatureBits();
switch (Inst.getOpcode()) {
case ARM::LDC_OFFSET:
@@ -1361,15 +1674,42 @@ static DecodeStatus DecodeCopMemInstruction(MCInst &Inst, unsigned Insn,
case ARM::t2STCL_PRE:
case ARM::t2STCL_POST:
case ARM::t2STCL_OPTION:
- if (coproc == 0xA || coproc == 0xB)
+ case ARM::t2LDC2_OFFSET:
+ case ARM::t2LDC2L_OFFSET:
+ case ARM::t2LDC2_PRE:
+ case ARM::t2LDC2L_PRE:
+ case ARM::t2STC2_OFFSET:
+ case ARM::t2STC2L_OFFSET:
+ case ARM::t2STC2_PRE:
+ case ARM::t2STC2L_PRE:
+ case ARM::LDC2_OFFSET:
+ case ARM::LDC2L_OFFSET:
+ case ARM::LDC2_PRE:
+ case ARM::LDC2L_PRE:
+ case ARM::STC2_OFFSET:
+ case ARM::STC2L_OFFSET:
+ case ARM::STC2_PRE:
+ case ARM::STC2L_PRE:
+ case ARM::t2LDC2_OPTION:
+ case ARM::t2STC2_OPTION:
+ case ARM::t2LDC2_POST:
+ case ARM::t2LDC2L_POST:
+ case ARM::t2STC2_POST:
+ case ARM::t2STC2L_POST:
+ case ARM::LDC2_POST:
+ case ARM::LDC2L_POST:
+ case ARM::STC2_POST:
+ case ARM::STC2L_POST:
+ if (coproc == 0xA || coproc == 0xB ||
+ (featureBits[ARM::HasV8_1MMainlineOps] &&
+ (coproc == 0x8 || coproc == 0x9 || coproc == 0xA || coproc == 0xB ||
+ coproc == 0xE || coproc == 0xF)))
return MCDisassembler::Fail;
break;
default:
break;
}
- const FeatureBitset &featureBits =
- ((const MCDisassembler*)Decoder)->getSubtargetInfo().getFeatureBits();
if (featureBits[ARM::HasV8Ops] && (coproc != 14))
return MCDisassembler::Fail;
@@ -3150,6 +3490,60 @@ DecodeNEONModImmInstruction(MCInst &Inst, unsigned Insn,
return S;
}
+static DecodeStatus
+DecodeMVEModImmInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Qd = ((fieldFromInstruction(Insn, 22, 1) << 3) |
+ fieldFromInstruction(Insn, 13, 3));
+ unsigned cmode = fieldFromInstruction(Insn, 8, 4);
+ unsigned imm = fieldFromInstruction(Insn, 0, 4);
+ imm |= fieldFromInstruction(Insn, 16, 3) << 4;
+ imm |= fieldFromInstruction(Insn, 28, 1) << 7;
+ imm |= cmode << 8;
+ imm |= fieldFromInstruction(Insn, 5, 1) << 12;
+
+ if (cmode == 0xF && Inst.getOpcode() == ARM::MVE_VMVNimmi32)
+ return MCDisassembler::Fail;
+
+ if (!Check(S, DecodeMQPRRegisterClass(Inst, Qd, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::createImm(imm));
+
+ Inst.addOperand(MCOperand::createImm(ARMVCC::None));
+ Inst.addOperand(MCOperand::createReg(0));
+ Inst.addOperand(MCOperand::createImm(0));
+
+ return S;
+}
+
+static DecodeStatus DecodeMVEVADCInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Qd = fieldFromInstruction(Insn, 13, 3);
+ Qd |= fieldFromInstruction(Insn, 22, 1) << 3;
+ if (!Check(S, DecodeMQPRRegisterClass(Inst, Qd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::createReg(ARM::FPSCR_NZCV));
+
+ unsigned Qn = fieldFromInstruction(Insn, 17, 3);
+ Qn |= fieldFromInstruction(Insn, 7, 1) << 3;
+ if (!Check(S, DecodeMQPRRegisterClass(Inst, Qn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ unsigned Qm = fieldFromInstruction(Insn, 1, 3);
+ Qm |= fieldFromInstruction(Insn, 5, 1) << 3;
+ if (!Check(S, DecodeMQPRRegisterClass(Inst, Qm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!fieldFromInstruction(Insn, 12, 1)) // I bit clear => need input FPSCR
+ Inst.addOperand(MCOperand::createReg(ARM::FPSCR_NZCV));
+ Inst.addOperand(MCOperand::createImm(Qd));
+
+ return S;
+}
+
static DecodeStatus DecodeVSHLMaxInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -3706,6 +4100,21 @@ static DecodeStatus DecodeT2Imm8S4(MCInst &Inst, unsigned Val,
return MCDisassembler::Success;
}
+static DecodeStatus DecodeT2Imm7S4(MCInst &Inst, unsigned Val, uint64_t Address,
+ const void *Decoder) {
+ if (Val == 0)
+ Inst.addOperand(MCOperand::createImm(INT32_MIN));
+ else {
+ int imm = Val & 0x7F;
+
+ if (!(Val & 0x80))
+ imm *= -1;
+ Inst.addOperand(MCOperand::createImm(imm * 4));
+ }
+
+ return MCDisassembler::Success;
+}
+
static DecodeStatus DecodeT2AddrModeImm8s4(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -3721,6 +4130,22 @@ static DecodeStatus DecodeT2AddrModeImm8s4(MCInst &Inst, unsigned Val,
return S;
}
+static DecodeStatus DecodeT2AddrModeImm7s4(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction(Val, 8, 4);
+ unsigned imm = fieldFromInstruction(Val, 0, 8);
+
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeT2Imm7S4(Inst, imm, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
static DecodeStatus DecodeT2AddrModeImm0_1020s4(MCInst &Inst,unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -3748,6 +4173,21 @@ static DecodeStatus DecodeT2Imm8(MCInst &Inst, unsigned Val,
return MCDisassembler::Success;
}
+template<int shift>
+static DecodeStatus DecodeT2Imm7(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ int imm = Val & 0x7F;
+ if (Val == 0)
+ imm = INT32_MIN;
+ else if (!(Val & 0x80))
+ imm *= -1;
+ if (imm != INT32_MIN)
+ imm *= (1U << shift);
+ Inst.addOperand(MCOperand::createImm(imm));
+
+ return MCDisassembler::Success;
+}
+
static DecodeStatus DecodeT2AddrModeImm8(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -3794,6 +4234,42 @@ static DecodeStatus DecodeT2AddrModeImm8(MCInst &Inst, unsigned Val,
return S;
}
+template<int shift>
+static DecodeStatus DecodeTAddrModeImm7(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction(Val, 8, 3);
+ unsigned imm = fieldFromInstruction(Val, 0, 8);
+
+ if (!Check(S, DecodetGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeT2Imm7<shift>(Inst, imm, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+template<int shift, int WriteBack>
+static DecodeStatus DecodeT2AddrModeImm7(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Rn = fieldFromInstruction(Val, 8, 4);
+ unsigned imm = fieldFromInstruction(Val, 0, 8);
+ if (WriteBack) {
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ } else if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeT2Imm7<shift>(Inst, imm, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
static DecodeStatus DecodeT2LdStPre(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -3941,6 +4417,43 @@ static DecodeStatus DecodePostIdxReg(MCInst &Inst, unsigned Insn,
return S;
}
+static DecodeStatus DecodeMveAddrModeRQ(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+ unsigned Rn = fieldFromInstruction(Insn, 3, 4);
+ unsigned Qm = fieldFromInstruction(Insn, 0, 3);
+
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeMQPRRegisterClass(Inst, Qm, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+template<int shift>
+static DecodeStatus DecodeMveAddrModeQ(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+ unsigned Qm = fieldFromInstruction(Insn, 8, 3);
+ int imm = fieldFromInstruction(Insn, 0, 7);
+
+ if (!Check(S, DecodeMQPRRegisterClass(Inst, Qm, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ if(!fieldFromInstruction(Insn, 7, 1)) {
+ if (imm == 0)
+ imm = INT32_MIN; // indicate -0
+ else
+ imm *= -1;
+ }
+ if (imm != INT32_MIN)
+ imm *= (1U << shift);
+ Inst.addOperand(MCOperand::createImm(imm));
+
+ return S;
+}
+
static DecodeStatus DecodeThumbBLXOffset(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
// Val is passed in as S:J1:J2:imm10H:imm10L:'0'
@@ -3973,7 +4486,7 @@ static DecodeStatus DecodeCoprocessor(MCInst &Inst, unsigned Val,
const FeatureBitset &featureBits =
((const MCDisassembler*)Decoder)->getSubtargetInfo().getFeatureBits();
- if (featureBits[ARM::HasV8Ops] && !(Val == 14 || Val == 15))
+ if (!isValidCoprocessorNumber(Val, featureBits))
return MCDisassembler::Fail;
Inst.addOperand(MCOperand::createImm(Val));
@@ -4981,6 +5494,16 @@ static DecodeStatus DecodeIT(MCInst &Inst, unsigned Insn,
if (mask == 0x0)
return MCDisassembler::Fail;
+ // IT masks are encoded as a sequence of replacement low-order bits
+ // for the condition code. So if the low bit of the starting
+ // condition code is 1, then we have to flip all the bits above the
+ // terminating bit (which is the lowest 1 bit).
+ if (pred & 1) {
+ unsigned LowBit = mask & -mask;
+ unsigned BitsAboveLowBit = 0xF & (-LowBit << 1);
+ mask ^= BitsAboveLowBit;
+ }
+
Inst.addOperand(MCOperand::createImm(pred));
Inst.addOperand(MCOperand::createImm(mask));
return S;
@@ -5341,14 +5864,37 @@ static DecodeStatus DecodeForVMRSandVMSR(MCInst &Inst, unsigned Val,
((const MCDisassembler *)Decoder)->getSubtargetInfo().getFeatureBits();
DecodeStatus S = MCDisassembler::Success;
- unsigned Rt = fieldFromInstruction(Val, 12, 4);
+ // Add explicit operand for the destination sysreg, for cases where
+ // we have to model it for code generation purposes.
+ switch (Inst.getOpcode()) {
+ case ARM::VMSR_FPSCR_NZCVQC:
+ Inst.addOperand(MCOperand::createReg(ARM::FPSCR_NZCV));
+ break;
+ case ARM::VMSR_P0:
+ Inst.addOperand(MCOperand::createReg(ARM::VPR));
+ break;
+ }
- if (featureBits[ARM::ModeThumb] && !featureBits[ARM::HasV8Ops]) {
- if (Rt == 13 || Rt == 15)
- S = MCDisassembler::SoftFail;
- Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder));
- } else
- Check(S, DecodeGPRnopcRegisterClass(Inst, Rt, Address, Decoder));
+ if (Inst.getOpcode() != ARM::FMSTAT) {
+ unsigned Rt = fieldFromInstruction(Val, 12, 4);
+
+ if (featureBits[ARM::ModeThumb] && !featureBits[ARM::HasV8Ops]) {
+ if (Rt == 13 || Rt == 15)
+ S = MCDisassembler::SoftFail;
+ Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder));
+ } else
+ Check(S, DecodeGPRnopcRegisterClass(Inst, Rt, Address, Decoder));
+ }
+
+ // Add explicit operand for the source sysreg, similarly to above.
+ switch (Inst.getOpcode()) {
+ case ARM::VMRS_FPSCR_NZCVQC:
+ Inst.addOperand(MCOperand::createReg(ARM::FPSCR_NZCV));
+ break;
+ case ARM::VMRS_P0:
+ Inst.addOperand(MCOperand::createReg(ARM::VPR));
+ break;
+ }
if (featureBits[ARM::ModeThumb]) {
Inst.addOperand(MCOperand::createImm(ARMCC::AL));
@@ -5361,3 +5907,668 @@ static DecodeStatus DecodeForVMRSandVMSR(MCInst &Inst, unsigned Val,
return S;
}
+
+template <bool isSigned, bool isNeg, bool zeroPermitted, int size>
+static DecodeStatus DecodeBFLabelOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+ if (Val == 0 && !zeroPermitted)
+ S = MCDisassembler::Fail;
+
+ uint64_t DecVal;
+ if (isSigned)
+ DecVal = SignExtend32<size + 1>(Val << 1);
+ else
+ DecVal = (Val << 1);
+
+ if (!tryAddingSymbolicOperand(Address, Address + DecVal + 4, true, 4, Inst,
+ Decoder))
+ Inst.addOperand(MCOperand::createImm(isNeg ? -DecVal : DecVal));
+ return S;
+}
+
+static DecodeStatus DecodeBFAfterTargetOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const void *Decoder) {
+
+ uint64_t LocImm = Inst.getOperand(0).getImm();
+ Val = LocImm + (2 << Val);
+ if (!tryAddingSymbolicOperand(Address, Address + Val + 4, true, 4, Inst,
+ Decoder))
+ Inst.addOperand(MCOperand::createImm(Val));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodePredNoALOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const void *Decoder) {
+ if (Val >= ARMCC::AL) // also exclude the non-condition NV
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::createImm(Val));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeLOLoop(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ if (Inst.getOpcode() == ARM::MVE_LCTP)
+ return S;
+
+ unsigned Imm = fieldFromInstruction(Insn, 11, 1) |
+ fieldFromInstruction(Insn, 1, 10) << 1;
+ switch (Inst.getOpcode()) {
+ case ARM::t2LEUpdate:
+ case ARM::MVE_LETP:
+ Inst.addOperand(MCOperand::createReg(ARM::LR));
+ Inst.addOperand(MCOperand::createReg(ARM::LR));
+ LLVM_FALLTHROUGH;
+ case ARM::t2LE:
+ if (!Check(S, DecodeBFLabelOperand<false, true, true, 11>(
+ Inst, Imm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ case ARM::t2WLS:
+ case ARM::MVE_WLSTP_8:
+ case ARM::MVE_WLSTP_16:
+ case ARM::MVE_WLSTP_32:
+ case ARM::MVE_WLSTP_64:
+ Inst.addOperand(MCOperand::createReg(ARM::LR));
+ if (!Check(S,
+ DecoderGPRRegisterClass(Inst, fieldFromInstruction(Insn, 16, 4),
+ Address, Decoder)) ||
+ !Check(S, DecodeBFLabelOperand<false, false, true, 11>(
+ Inst, Imm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ case ARM::t2DLS:
+ case ARM::MVE_DLSTP_8:
+ case ARM::MVE_DLSTP_16:
+ case ARM::MVE_DLSTP_32:
+ case ARM::MVE_DLSTP_64:
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ if (Rn == 0xF) {
+ // Enforce all the rest of the instruction bits in LCTP, which
+ // won't have been reliably checked based on LCTP's own tablegen
+ // record, because we came to this decode by a roundabout route.
+ uint32_t CanonicalLCTP = 0xF00FE001, SBZMask = 0x00300FFE;
+ if ((Insn & ~SBZMask) != CanonicalLCTP)
+ return MCDisassembler::Fail; // a mandatory bit is wrong: hard fail
+ if (Insn != CanonicalLCTP)
+ Check(S, MCDisassembler::SoftFail); // an SBZ bit is wrong: soft fail
+
+ Inst.setOpcode(ARM::MVE_LCTP);
+ } else {
+ Inst.addOperand(MCOperand::createReg(ARM::LR));
+ if (!Check(S, DecoderGPRRegisterClass(Inst,
+ fieldFromInstruction(Insn, 16, 4),
+ Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+ break;
+ }
+ return S;
+}
+
+static DecodeStatus DecodeLongShiftOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ if (Val == 0)
+ Val = 32;
+
+ Inst.addOperand(MCOperand::createImm(Val));
+
+ return S;
+}
+
+static DecodeStatus DecodetGPROddRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if ((RegNo) + 1 > 11)
+ return MCDisassembler::Fail;
+
+ unsigned Register = GPRDecoderTable[(RegNo) + 1];
+ Inst.addOperand(MCOperand::createReg(Register));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodetGPREvenRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if ((RegNo) > 14)
+ return MCDisassembler::Fail;
+
+ unsigned Register = GPRDecoderTable[(RegNo)];
+ Inst.addOperand(MCOperand::createReg(Register));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeVSCCLRM(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ Inst.addOperand(MCOperand::createImm(ARMCC::AL));
+ Inst.addOperand(MCOperand::createReg(0));
+ if (Inst.getOpcode() == ARM::VSCCLRMD) {
+ unsigned reglist = (fieldFromInstruction(Insn, 1, 7) << 1) |
+ (fieldFromInstruction(Insn, 12, 4) << 8) |
+ (fieldFromInstruction(Insn, 22, 1) << 12);
+ if (!Check(S, DecodeDPRRegListOperand(Inst, reglist, Address, Decoder))) {
+ return MCDisassembler::Fail;
+ }
+ } else {
+ unsigned reglist = fieldFromInstruction(Insn, 0, 8) |
+ (fieldFromInstruction(Insn, 22, 1) << 8) |
+ (fieldFromInstruction(Insn, 12, 4) << 9);
+ if (!Check(S, DecodeSPRRegListOperand(Inst, reglist, Address, Decoder))) {
+ return MCDisassembler::Fail;
+ }
+ }
+ Inst.addOperand(MCOperand::createReg(ARM::VPR));
+
+ return S;
+}
+
+static DecodeStatus DecodeMQPRRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ if (RegNo > 7)
+ return MCDisassembler::Fail;
+
+ unsigned Register = QPRDecoderTable[RegNo];
+ Inst.addOperand(MCOperand::createReg(Register));
+ return MCDisassembler::Success;
+}
+
+static const uint16_t QQPRDecoderTable[] = {
+ ARM::Q0_Q1, ARM::Q1_Q2, ARM::Q2_Q3, ARM::Q3_Q4,
+ ARM::Q4_Q5, ARM::Q5_Q6, ARM::Q6_Q7
+};
+
+static DecodeStatus DecodeQQPRRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ if (RegNo > 6)
+ return MCDisassembler::Fail;
+
+ unsigned Register = QQPRDecoderTable[RegNo];
+ Inst.addOperand(MCOperand::createReg(Register));
+ return MCDisassembler::Success;
+}
+
+static const uint16_t QQQQPRDecoderTable[] = {
+ ARM::Q0_Q1_Q2_Q3, ARM::Q1_Q2_Q3_Q4, ARM::Q2_Q3_Q4_Q5,
+ ARM::Q3_Q4_Q5_Q6, ARM::Q4_Q5_Q6_Q7
+};
+
+static DecodeStatus DecodeQQQQPRRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ if (RegNo > 4)
+ return MCDisassembler::Fail;
+
+ unsigned Register = QQQQPRDecoderTable[RegNo];
+ Inst.addOperand(MCOperand::createReg(Register));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeVPTMaskOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ // Parse VPT mask and encode it in the MCInst as an immediate with the same
+ // format as the it_mask. That is, from the second 'e|t' encode 'e' as 1 and
+ // 't' as 0 and finish with a 1.
+ unsigned Imm = 0;
+ // We always start with a 't'.
+ unsigned CurBit = 0;
+ for (int i = 3; i >= 0; --i) {
+ // If the bit we are looking at is not the same as last one, invert the
+ // CurBit, if it is the same leave it as is.
+ CurBit ^= (Val >> i) & 1U;
+
+ // Encode the CurBit at the right place in the immediate.
+ Imm |= (CurBit << i);
+
+ // If we are done, finish the encoding with a 1.
+ if ((Val & ~(~0U << i)) == 0) {
+ Imm |= 1U << i;
+ break;
+ }
+ }
+
+ Inst.addOperand(MCOperand::createImm(Imm));
+
+ return S;
+}
+
+static DecodeStatus DecodeVpredROperand(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ // The vpred_r operand type includes an MQPR register field derived
+ // from the encoding. But we don't actually want to add an operand
+ // to the MCInst at this stage, because AddThumbPredicate will do it
+ // later, and will infer the register number from the TIED_TO
+ // constraint. So this is a deliberately empty decoder method that
+ // will inhibit the auto-generated disassembly code from adding an
+ // operand at all.
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeRestrictedIPredicateOperand(MCInst &Inst,
+ unsigned Val,
+ uint64_t Address,
+ const void *Decoder) {
+ Inst.addOperand(MCOperand::createImm((Val & 0x1) == 0 ? ARMCC::EQ : ARMCC::NE));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeRestrictedSPredicateOperand(MCInst &Inst,
+ unsigned Val,
+ uint64_t Address,
+ const void *Decoder) {
+ unsigned Code;
+ switch (Val & 0x3) {
+ case 0:
+ Code = ARMCC::GE;
+ break;
+ case 1:
+ Code = ARMCC::LT;
+ break;
+ case 2:
+ Code = ARMCC::GT;
+ break;
+ case 3:
+ Code = ARMCC::LE;
+ break;
+ }
+ Inst.addOperand(MCOperand::createImm(Code));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeRestrictedUPredicateOperand(MCInst &Inst,
+ unsigned Val,
+ uint64_t Address,
+ const void *Decoder) {
+ Inst.addOperand(MCOperand::createImm((Val & 0x1) == 0 ? ARMCC::HS : ARMCC::HI));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeRestrictedFPPredicateOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const void *Decoder) {
+ unsigned Code;
+ switch (Val) {
+ default:
+ return MCDisassembler::Fail;
+ case 0:
+ Code = ARMCC::EQ;
+ break;
+ case 1:
+ Code = ARMCC::NE;
+ break;
+ case 4:
+ Code = ARMCC::GE;
+ break;
+ case 5:
+ Code = ARMCC::LT;
+ break;
+ case 6:
+ Code = ARMCC::GT;
+ break;
+ case 7:
+ Code = ARMCC::LE;
+ break;
+ }
+
+ Inst.addOperand(MCOperand::createImm(Code));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeVCVTImmOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned DecodedVal = 64 - Val;
+
+ switch (Inst.getOpcode()) {
+ case ARM::MVE_VCVTf16s16_fix:
+ case ARM::MVE_VCVTs16f16_fix:
+ case ARM::MVE_VCVTf16u16_fix:
+ case ARM::MVE_VCVTu16f16_fix:
+ if (DecodedVal > 16)
+ return MCDisassembler::Fail;
+ break;
+ case ARM::MVE_VCVTf32s32_fix:
+ case ARM::MVE_VCVTs32f32_fix:
+ case ARM::MVE_VCVTf32u32_fix:
+ case ARM::MVE_VCVTu32f32_fix:
+ if (DecodedVal > 32)
+ return MCDisassembler::Fail;
+ break;
+ }
+
+ Inst.addOperand(MCOperand::createImm(64 - Val));
+
+ return S;
+}
+
+static unsigned FixedRegForVSTRVLDR_SYSREG(unsigned Opcode) {
+ switch (Opcode) {
+ case ARM::VSTR_P0_off:
+ case ARM::VSTR_P0_pre:
+ case ARM::VSTR_P0_post:
+ case ARM::VLDR_P0_off:
+ case ARM::VLDR_P0_pre:
+ case ARM::VLDR_P0_post:
+ return ARM::P0;
+ default:
+ return 0;
+ }
+}
+
+template<bool Writeback>
+static DecodeStatus DecodeVSTRVLDR_SYSREG(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const void *Decoder) {
+ switch (Inst.getOpcode()) {
+ case ARM::VSTR_FPSCR_pre:
+ case ARM::VSTR_FPSCR_NZCVQC_pre:
+ case ARM::VLDR_FPSCR_pre:
+ case ARM::VLDR_FPSCR_NZCVQC_pre:
+ case ARM::VSTR_FPSCR_off:
+ case ARM::VSTR_FPSCR_NZCVQC_off:
+ case ARM::VLDR_FPSCR_off:
+ case ARM::VLDR_FPSCR_NZCVQC_off:
+ case ARM::VSTR_FPSCR_post:
+ case ARM::VSTR_FPSCR_NZCVQC_post:
+ case ARM::VLDR_FPSCR_post:
+ case ARM::VLDR_FPSCR_NZCVQC_post:
+ const FeatureBitset &featureBits =
+ ((const MCDisassembler *)Decoder)->getSubtargetInfo().getFeatureBits();
+
+ if (!featureBits[ARM::HasMVEIntegerOps] && !featureBits[ARM::FeatureVFP2])
+ return MCDisassembler::Fail;
+ }
+
+ DecodeStatus S = MCDisassembler::Success;
+ if (unsigned Sysreg = FixedRegForVSTRVLDR_SYSREG(Inst.getOpcode()))
+ Inst.addOperand(MCOperand::createReg(Sysreg));
+ unsigned Rn = fieldFromInstruction(Val, 16, 4);
+ unsigned addr = fieldFromInstruction(Val, 0, 7) |
+ (fieldFromInstruction(Val, 23, 1) << 7) | (Rn << 8);
+
+ if (Writeback) {
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+ if (!Check(S, DecodeT2AddrModeImm7s4(Inst, addr, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::createImm(ARMCC::AL));
+ Inst.addOperand(MCOperand::createReg(0));
+
+ return S;
+}
+
+static inline DecodeStatus DecodeMVE_MEM_pre(
+ MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder,
+ unsigned Rn, OperandDecoder RnDecoder, OperandDecoder AddrDecoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Qd = fieldFromInstruction(Val, 13, 3);
+ unsigned addr = fieldFromInstruction(Val, 0, 7) |
+ (fieldFromInstruction(Val, 23, 1) << 7) | (Rn << 8);
+
+ if (!Check(S, RnDecoder(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeMQPRRegisterClass(Inst, Qd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, AddrDecoder(Inst, addr, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+template <int shift>
+static DecodeStatus DecodeMVE_MEM_1_pre(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ return DecodeMVE_MEM_pre(Inst, Val, Address, Decoder,
+ fieldFromInstruction(Val, 16, 3),
+ DecodetGPRRegisterClass,
+ DecodeTAddrModeImm7<shift>);
+}
+
+template <int shift>
+static DecodeStatus DecodeMVE_MEM_2_pre(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ return DecodeMVE_MEM_pre(Inst, Val, Address, Decoder,
+ fieldFromInstruction(Val, 16, 4),
+ DecoderGPRRegisterClass,
+ DecodeT2AddrModeImm7<shift,1>);
+}
+
+template <int shift>
+static DecodeStatus DecodeMVE_MEM_3_pre(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ return DecodeMVE_MEM_pre(Inst, Val, Address, Decoder,
+ fieldFromInstruction(Val, 17, 3),
+ DecodeMQPRRegisterClass,
+ DecodeMveAddrModeQ<shift>);
+}
+
+template<unsigned MinLog, unsigned MaxLog>
+static DecodeStatus DecodePowerTwoOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ if (Val < MinLog || Val > MaxLog)
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::createImm(1LL << Val));
+ return S;
+}
+
+template <int shift>
+static DecodeStatus DecodeExpandedImmOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const void *Decoder) {
+ Val <<= shift;
+
+ Inst.addOperand(MCOperand::createImm(Val));
+ return MCDisassembler::Success;
+}
+
+template<unsigned start>
+static DecodeStatus DecodeMVEPairVectorIndexOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ Inst.addOperand(MCOperand::createImm(start + Val));
+
+ return S;
+}
+
+static DecodeStatus DecodeMVEVMOVQtoDReg(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+ unsigned Rt = fieldFromInstruction(Insn, 0, 4);
+ unsigned Rt2 = fieldFromInstruction(Insn, 16, 4);
+ unsigned Qd = ((fieldFromInstruction(Insn, 22, 1) << 3) |
+ fieldFromInstruction(Insn, 13, 3));
+ unsigned index = fieldFromInstruction(Insn, 4, 1);
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt2, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeMQPRRegisterClass(Inst, Qd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeMVEPairVectorIndexOperand<2>(Inst, index, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeMVEPairVectorIndexOperand<0>(Inst, index, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeMVEVMOVDRegtoQ(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+ unsigned Rt = fieldFromInstruction(Insn, 0, 4);
+ unsigned Rt2 = fieldFromInstruction(Insn, 16, 4);
+ unsigned Qd = ((fieldFromInstruction(Insn, 22, 1) << 3) |
+ fieldFromInstruction(Insn, 13, 3));
+ unsigned index = fieldFromInstruction(Insn, 4, 1);
+
+ if (!Check(S, DecodeMQPRRegisterClass(Inst, Qd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeMQPRRegisterClass(Inst, Qd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt2, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeMVEPairVectorIndexOperand<2>(Inst, index, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeMVEPairVectorIndexOperand<0>(Inst, index, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeMVEOverlappingLongShift(
+ MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned RdaLo = fieldFromInstruction(Insn, 17, 3) << 1;
+ unsigned RdaHi = fieldFromInstruction(Insn, 9, 3) << 1;
+ unsigned Rm = fieldFromInstruction(Insn, 12, 4);
+
+ if (RdaHi == 14) {
+ // This value of RdaHi (really indicating pc, because RdaHi has to
+ // be an odd-numbered register, so the low bit will be set by the
+ // decode function below) indicates that we must decode as SQRSHR
+ // or UQRSHL, which both have a single Rda register field with all
+ // four bits.
+ unsigned Rda = fieldFromInstruction(Insn, 16, 4);
+
+ switch (Inst.getOpcode()) {
+ case ARM::MVE_ASRLr:
+ case ARM::MVE_SQRSHRL:
+ Inst.setOpcode(ARM::MVE_SQRSHR);
+ break;
+ case ARM::MVE_LSLLr:
+ case ARM::MVE_UQRSHLL:
+ Inst.setOpcode(ARM::MVE_UQRSHL);
+ break;
+ default:
+ llvm_unreachable("Unexpected starting opcode!");
+ }
+
+ // Rda as output parameter
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rda, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ // Rda again as input parameter
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rda, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ // Rm, the amount to shift by
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+ }
+
+ // Otherwise, we decode as whichever opcode our caller has already
+ // put into Inst. Those all look the same:
+
+ // RdaLo,RdaHi as output parameters
+ if (!Check(S, DecodetGPREvenRegisterClass(Inst, RdaLo, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodetGPROddRegisterClass(Inst, RdaHi, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ // RdaLo,RdaHi again as input parameters
+ if (!Check(S, DecodetGPREvenRegisterClass(Inst, RdaLo, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodetGPROddRegisterClass(Inst, RdaHi, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ // Rm, the amount to shift by
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeMVEVCVTt1fp(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+ unsigned Qd = ((fieldFromInstruction(Insn, 22, 1) << 3) |
+ fieldFromInstruction(Insn, 13, 3));
+ unsigned Qm = ((fieldFromInstruction(Insn, 5, 1) << 3) |
+ fieldFromInstruction(Insn, 1, 3));
+ unsigned imm6 = fieldFromInstruction(Insn, 16, 6);
+
+ if (!Check(S, DecodeMQPRRegisterClass(Inst, Qd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeMQPRRegisterClass(Inst, Qm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeVCVTImmOperand(Inst, imm6, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+template<bool scalar, OperandDecoder predicate_decoder>
+static DecodeStatus DecodeMVEVCMP(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+ Inst.addOperand(MCOperand::createReg(ARM::VPR));
+ unsigned Qn = fieldFromInstruction(Insn, 17, 3);
+ if (!Check(S, DecodeMQPRRegisterClass(Inst, Qn, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ unsigned fc;
+
+ if (scalar) {
+ fc = fieldFromInstruction(Insn, 12, 1) << 2 |
+ fieldFromInstruction(Insn, 7, 1) |
+ fieldFromInstruction(Insn, 5, 1) << 1;
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+ if (!Check(S, DecodeGPRwithZRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ } else {
+ fc = fieldFromInstruction(Insn, 12, 1) << 2 |
+ fieldFromInstruction(Insn, 7, 1) |
+ fieldFromInstruction(Insn, 0, 1) << 1;
+ unsigned Qm = fieldFromInstruction(Insn, 5, 1) << 4 |
+ fieldFromInstruction(Insn, 1, 3);
+ if (!Check(S, DecodeMQPRRegisterClass(Inst, Qm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ }
+
+ if (!Check(S, predicate_decoder(Inst, fc, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::createImm(ARMVCC::None));
+ Inst.addOperand(MCOperand::createReg(0));
+ Inst.addOperand(MCOperand::createImm(0));
+
+ return S;
+}
+
+static DecodeStatus DecodeMveVCTP(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+ Inst.addOperand(MCOperand::createReg(ARM::VPR));
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ return S;
+}
diff --git a/lib/Target/ARM/LICENSE.TXT b/lib/Target/ARM/LICENSE.TXT
deleted file mode 100755
index 68afea12ed44..000000000000
--- a/lib/Target/ARM/LICENSE.TXT
+++ /dev/null
@@ -1,47 +0,0 @@
-ARM Limited
-
-Software Grant License Agreement ("Agreement")
-
-Except for the license granted herein to you, ARM Limited ("ARM") reserves all
-right, title, and interest in and to the Software (defined below).
-
-Definition
-
-"Software" means the code and documentation as well as any original work of
-authorship, including any modifications or additions to an existing work, that
-is intentionally submitted by ARM to llvm.org (http://llvm.org) ("LLVM") for
-inclusion in, or documentation of, any of the products owned or managed by LLVM
-(the "Work"). For the purposes of this definition, "submitted" means any form of
-electronic, verbal, or written communication sent to LLVM or its
-representatives, including but not limited to communication on electronic
-mailing lists, source code control systems, and issue tracking systems that are
-managed by, or on behalf of, LLVM for the purpose of discussing and improving
-the Work, but excluding communication that is conspicuously marked otherwise.
-
-1. Grant of Copyright License. Subject to the terms and conditions of this
- Agreement, ARM hereby grants to you and to recipients of the Software
- distributed by LLVM a perpetual, worldwide, non-exclusive, no-charge,
- royalty-free, irrevocable copyright license to reproduce, prepare derivative
- works of, publicly display, publicly perform, sublicense, and distribute the
- Software and such derivative works.
-
-2. Grant of Patent License. Subject to the terms and conditions of this
- Agreement, ARM hereby grants you and to recipients of the Software
- distributed by LLVM a perpetual, worldwide, non-exclusive, no-charge,
- royalty-free, irrevocable (except as stated in this section) patent license
- to make, have made, use, offer to sell, sell, import, and otherwise transfer
- the Work, where such license applies only to those patent claims licensable
- by ARM that are necessarily infringed by ARM's Software alone or by
- combination of the Software with the Work to which such Software was
- submitted. If any entity institutes patent litigation against ARM or any
- other entity (including a cross-claim or counterclaim in a lawsuit) alleging
- that ARM's Software, or the Work to which ARM has contributed constitutes
- direct or contributory patent infringement, then any patent licenses granted
- to that entity under this Agreement for the Software or Work shall terminate
- as of the date such litigation is filed.
-
-Unless required by applicable law or agreed to in writing, the software is
-provided on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
-either express or implied, including, without limitation, any warranties or
-conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-PARTICULAR PURPOSE.
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h b/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
index e1ea5964cf67..7732a6485a85 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
@@ -1,9 +1,8 @@
//===-- ARMAddressingModes.h - ARM Addressing Modes -------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -31,7 +30,8 @@ namespace ARM_AM {
lsl,
lsr,
ror,
- rrx
+ rrx,
+ uxtw
};
enum AddrOpc {
@@ -49,6 +49,7 @@ namespace ARM_AM {
case ARM_AM::lsr: return "lsr";
case ARM_AM::ror: return "ror";
case ARM_AM::rrx: return "rrx";
+ case ARM_AM::uxtw: return "uxtw";
}
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index c2a07d4ddcef..aeab5be78ab4 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -1,9 +1,8 @@
//===-- ARMAsmBackend.cpp - ARM Assembler Backend -------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -30,6 +29,7 @@
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/MC/MCAsmLayout.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/EndianStream.h"
#include "llvm/Support/ErrorHandling.h"
@@ -47,6 +47,13 @@ public:
};
} // end anonymous namespace
+Optional<MCFixupKind> ARMAsmBackend::getFixupKind(StringRef Name) const {
+ if (STI.getTargetTriple().isOSBinFormatELF() && Name == "R_ARM_NONE")
+ return FK_NONE;
+
+ return MCAsmBackend::getFixupKind(Name);
+}
+
const MCFixupKindInfo &ARMAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
const static MCFixupKindInfo InfosLE[ARM::NumTargetFixupKinds] = {
// This table *must* be in the order that the fixup_* kinds are defined in
@@ -98,6 +105,13 @@ const MCFixupKindInfo &ARMAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
{"fixup_t2_movw_lo16", 0, 20, 0},
{"fixup_arm_mod_imm", 0, 12, 0},
{"fixup_t2_so_imm", 0, 26, 0},
+ {"fixup_bf_branch", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_bf_target", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_bfl_target", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_bfc_target", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_bfcsel_else_target", 0, 32, 0},
+ {"fixup_wls", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_le", 0, 32, MCFixupKindInfo::FKF_IsPCRel}
};
const static MCFixupKindInfo InfosBE[ARM::NumTargetFixupKinds] = {
// This table *must* be in the order that the fixup_* kinds are defined in
@@ -149,6 +163,13 @@ const MCFixupKindInfo &ARMAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
{"fixup_t2_movw_lo16", 12, 20, 0},
{"fixup_arm_mod_imm", 20, 12, 0},
{"fixup_t2_so_imm", 26, 6, 0},
+ {"fixup_bf_branch", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_bf_target", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_bfl_target", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_bfc_target", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_bfcsel_else_target", 0, 32, 0},
+ {"fixup_wls", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"fixup_le", 0, 32, MCFixupKindInfo::FKF_IsPCRel}
};
if (Kind < FirstTargetFixupKind)
@@ -203,6 +224,13 @@ bool ARMAsmBackend::mayNeedRelaxation(const MCInst &Inst,
return false;
}
+static const char *checkPCRelOffset(uint64_t Value, int64_t Min, int64_t Max) {
+ int64_t Offset = int64_t(Value) - 4;
+ if (Offset < Min || Offset > Max)
+ return "out of range pc-relative fixup value";
+ return nullptr;
+}
+
const char *ARMAsmBackend::reasonForFixupRelaxation(const MCFixup &Fixup,
uint64_t Value) const {
switch ((unsigned)Fixup.getKind()) {
@@ -250,6 +278,32 @@ const char *ARMAsmBackend::reasonForFixupRelaxation(const MCFixup &Fixup,
return "will be converted to nop";
break;
}
+ case ARM::fixup_bf_branch:
+ return checkPCRelOffset(Value, 0, 30);
+ case ARM::fixup_bf_target:
+ return checkPCRelOffset(Value, -0x10000, +0xfffe);
+ case ARM::fixup_bfl_target:
+ return checkPCRelOffset(Value, -0x40000, +0x3fffe);
+ case ARM::fixup_bfc_target:
+ return checkPCRelOffset(Value, -0x1000, +0xffe);
+ case ARM::fixup_wls:
+ return checkPCRelOffset(Value, 0, +0xffe);
+ case ARM::fixup_le:
+ // The offset field in the LE and LETP instructions is an 11-bit
+ // value shifted left by 2 (i.e. 0,2,4,...,4094), and it is
+ // interpreted as a negative offset from the value read from pc,
+ // i.e. from instruction_address+4.
+ //
+ // So an LE instruction can in principle address the instruction
+ // immediately after itself, or (not very usefully) the address
+ // half way through the 4-byte LE.
+ return checkPCRelOffset(Value, -0xffe, 0);
+ case ARM::fixup_bfcsel_else_target: {
+ if (Value != 2 && Value != 4)
+ return "out of range label-relative fixup value";
+ break;
+ }
+
default:
llvm_unreachable("Unexpected fixup kind in reasonForFixupRelaxation()!");
}
@@ -384,6 +438,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm,
default:
Ctx.reportError(Fixup.getLoc(), "bad relocation fixup type");
return 0;
+ case FK_NONE:
case FK_Data_1:
case FK_Data_2:
case FK_Data_4:
@@ -753,6 +808,60 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm,
EncValue |= (Value & 0xff);
return swapHalfWords(EncValue, Endian == support::little);
}
+ case ARM::fixup_bf_branch: {
+ const char *FixupDiagnostic = reasonForFixupRelaxation(Fixup, Value);
+ if (FixupDiagnostic) {
+ Ctx.reportError(Fixup.getLoc(), FixupDiagnostic);
+ return 0;
+ }
+ uint32_t out = (((Value - 4) >> 1) & 0xf) << 23;
+ return swapHalfWords(out, Endian == support::little);
+ }
+ case ARM::fixup_bf_target:
+ case ARM::fixup_bfl_target:
+ case ARM::fixup_bfc_target: {
+ const char *FixupDiagnostic = reasonForFixupRelaxation(Fixup, Value);
+ if (FixupDiagnostic) {
+ Ctx.reportError(Fixup.getLoc(), FixupDiagnostic);
+ return 0;
+ }
+ uint32_t out = 0;
+ uint32_t HighBitMask = (Kind == ARM::fixup_bf_target ? 0xf800 :
+ Kind == ARM::fixup_bfl_target ? 0x3f800 : 0x800);
+ out |= (((Value - 4) >> 1) & 0x1) << 11;
+ out |= (((Value - 4) >> 1) & 0x7fe);
+ out |= (((Value - 4) >> 1) & HighBitMask) << 5;
+ return swapHalfWords(out, Endian == support::little);
+ }
+ case ARM::fixup_bfcsel_else_target: {
+ // If this is a fixup of a branch future's else target then it should be a
+ // constant MCExpr representing the distance between the branch targetted
+ // and the instruction after that same branch.
+ Value = Target.getConstant();
+
+ const char *FixupDiagnostic = reasonForFixupRelaxation(Fixup, Value);
+ if (FixupDiagnostic) {
+ Ctx.reportError(Fixup.getLoc(), FixupDiagnostic);
+ return 0;
+ }
+ uint32_t out = ((Value >> 2) & 1) << 17;
+ return swapHalfWords(out, Endian == support::little);
+ }
+ case ARM::fixup_wls:
+ case ARM::fixup_le: {
+ const char *FixupDiagnostic = reasonForFixupRelaxation(Fixup, Value);
+ if (FixupDiagnostic) {
+ Ctx.reportError(Fixup.getLoc(), FixupDiagnostic);
+ return 0;
+ }
+ uint64_t real_value = Value - 4;
+ uint32_t out = 0;
+ if (Kind == ARM::fixup_le)
+ real_value = -real_value;
+ out |= ((real_value >> 1) & 0x1) << 11;
+ out |= ((real_value >> 1) & 0x7fe);
+ return swapHalfWords(out, Endian == support::little);
+ }
}
}
@@ -762,7 +871,9 @@ bool ARMAsmBackend::shouldForceRelocation(const MCAssembler &Asm,
const MCSymbolRefExpr *A = Target.getSymA();
const MCSymbol *Sym = A ? &A->getSymbol() : nullptr;
const unsigned FixupKind = Fixup.getKind() ;
- if ((unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_bl) {
+ if (FixupKind == FK_NONE)
+ return true;
+ if (FixupKind == ARM::fixup_arm_thumb_bl) {
assert(Sym && "How did we resolve this?");
// If the symbol is external the linker will handle it.
@@ -804,6 +915,9 @@ static unsigned getFixupKindNumBytes(unsigned Kind) {
default:
llvm_unreachable("Unknown fixup kind!");
+ case FK_NONE:
+ return 0;
+
case FK_Data_1:
case ARM::fixup_arm_thumb_bcc:
case ARM::fixup_arm_thumb_cp:
@@ -842,6 +956,13 @@ static unsigned getFixupKindNumBytes(unsigned Kind) {
case ARM::fixup_t2_movt_hi16:
case ARM::fixup_t2_movw_lo16:
case ARM::fixup_t2_so_imm:
+ case ARM::fixup_bf_branch:
+ case ARM::fixup_bf_target:
+ case ARM::fixup_bfl_target:
+ case ARM::fixup_bfc_target:
+ case ARM::fixup_bfcsel_else_target:
+ case ARM::fixup_wls:
+ case ARM::fixup_le:
return 4;
case FK_SecRel_2:
@@ -858,6 +979,9 @@ static unsigned getFixupKindContainerSizeBytes(unsigned Kind) {
default:
llvm_unreachable("Unknown fixup kind!");
+ case FK_NONE:
+ return 0;
+
case FK_Data_1:
return 1;
case FK_Data_2:
@@ -876,6 +1000,7 @@ static unsigned getFixupKindContainerSizeBytes(unsigned Kind) {
case ARM::fixup_arm_pcrel_10_unscaled:
case ARM::fixup_arm_ldst_pcrel_12:
case ARM::fixup_arm_pcrel_10:
+ case ARM::fixup_arm_pcrel_9:
case ARM::fixup_arm_adr_pcrel_12:
case ARM::fixup_arm_uncondbl:
case ARM::fixup_arm_condbl:
@@ -895,6 +1020,13 @@ static unsigned getFixupKindContainerSizeBytes(unsigned Kind) {
case ARM::fixup_t2_movw_lo16:
case ARM::fixup_arm_mod_imm:
case ARM::fixup_t2_so_imm:
+ case ARM::fixup_bf_branch:
+ case ARM::fixup_bf_target:
+ case ARM::fixup_bfl_target:
+ case ARM::fixup_bfc_target:
+ case ARM::fixup_bfcsel_else_target:
+ case ARM::fixup_wls:
+ case ARM::fixup_le:
// Instruction size is 4 bytes.
return 4;
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
index 88c476bf65f4..67722a5e5b64 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
@@ -1,9 +1,8 @@
//===-- ARMAsmBackend.h - ARM Assembler Backend -----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -38,6 +37,8 @@ public:
// different.
bool hasNOP() const { return STI.getFeatureBits()[ARM::HasV6T2Ops]; }
+ Optional<MCFixupKind> getFixupKind(StringRef Name) const override;
+
const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
index de1bfaf203e4..87e56940f46d 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
@@ -1,9 +1,8 @@
//===-- ARMAsmBackendDarwin.h ARM Asm Backend Darwin ----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h
index 86a583b19cf7..5d735114d441 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h
@@ -1,9 +1,8 @@
//===-- ARMAsmBackendELF.h ARM Asm Backend ELF -----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h
index 553922d20f43..8cd7a4a00ead 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h
@@ -1,9 +1,8 @@
//===-- ARMAsmBackendWinCOFF.h - ARM Asm Backend WinCOFF --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
index 33c32d5464af..c4daafe8ee97 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
@@ -1,9 +1,8 @@
//===-- ARMBaseInfo.h - Top level definitions for ARM -------- --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -203,6 +202,9 @@ namespace ARMII {
AddrMode_i12 = 16,
AddrMode5FP16 = 17, // i8 * 2
AddrModeT2_ldrex = 18, // i8 * 4, with unscaled offset in MCInst
+ AddrModeT2_i7s4 = 19, // i7 * 4
+ AddrModeT2_i7s2 = 20, // i7 * 2
+ AddrModeT2_i7 = 21, // i7 * 1
};
inline static const char *AddrModeToString(AddrMode addrmode) {
@@ -226,6 +228,9 @@ namespace ARMII {
case AddrModeT2_i8s4: return "AddrModeT2_i8s4";
case AddrMode_i12: return "AddrMode_i12";
case AddrModeT2_ldrex:return "AddrModeT2_ldrex";
+ case AddrModeT2_i7s4: return "AddrModeT2_i7s4";
+ case AddrModeT2_i7s2: return "AddrModeT2_i7s2";
+ case AddrModeT2_i7: return "AddrModeT2_i7";
}
}
@@ -386,16 +391,17 @@ namespace ARMII {
// instruction. Used by the parser to determine whether to require the 'S'
// suffix on the mnemonic (when not in an IT block) or preclude it (when
// in an IT block).
- ThumbArithFlagSetting = 1 << 18,
+ ThumbArithFlagSetting = 1 << 19,
//===------------------------------------------------------------------===//
// Code domain.
DomainShift = 15,
- DomainMask = 7 << DomainShift,
+ DomainMask = 15 << DomainShift,
DomainGeneral = 0 << DomainShift,
DomainVFP = 1 << DomainShift,
DomainNEON = 2 << DomainShift,
DomainNEONA8 = 4 << DomainShift,
+ DomainMVE = 8 << DomainShift,
//===------------------------------------------------------------------===//
// Field shifts - such shifts are used to set field while generating
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
index b8ba7584911b..fda19eea1de6 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
@@ -1,9 +1,8 @@
//===-- ARMELFObjectWriter.cpp - ARM ELF Writer ---------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -138,12 +137,20 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
default:
return ELF::R_ARM_THM_CALL;
}
+ case ARM::fixup_bf_target:
+ return ELF::R_ARM_THM_BF16;
+ case ARM::fixup_bfc_target:
+ return ELF::R_ARM_THM_BF12;
+ case ARM::fixup_bfl_target:
+ return ELF::R_ARM_THM_BF18;
}
}
switch ((unsigned)Fixup.getKind()) {
default:
Ctx.reportFatalError(Fixup.getLoc(), "unsupported relocation on symbol");
return ELF::R_ARM_NONE;
+ case FK_NONE:
+ return ELF::R_ARM_NONE;
case FK_Data_1:
switch (Modifier) {
default:
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index d3744fffac32..f51fbdcd84da 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -1,9 +1,8 @@
//===- lib/MC/ARMELFStreamer.cpp - ELF Object Output for ARM --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -485,8 +484,8 @@ public:
/// This function is the one used to emit instruction data into the ELF
/// streamer. We override it to add the appropriate mapping symbol if
/// necessary.
- void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
- bool) override {
+ void EmitInstruction(const MCInst &Inst,
+ const MCSubtargetInfo &STI) override {
if (IsThumb)
EmitThumbMappingSymbol();
else
diff --git a/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h b/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
index 831589ba0581..bdf04a208b24 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
@@ -1,9 +1,8 @@
//===-- ARMFixupKinds.h - ARM Specific Fixup Entries ------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -104,6 +103,15 @@ enum Fixups {
// Fixup for Thumb2 8-bit rotated operand
fixup_t2_so_imm,
+ // Fixups for Branch Future.
+ fixup_bf_branch,
+ fixup_bf_target,
+ fixup_bfl_target,
+ fixup_bfc_target,
+ fixup_bfcsel_else_target,
+ fixup_wls,
+ fixup_le,
+
// Marker
LastTargetFixupKind,
NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp
index 2f84719c4c4f..45be1ee96342 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp
@@ -1,9 +1,8 @@
//===-- ARMInstPrinter.cpp - Convert ARM MCInst to assembly syntax --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -73,8 +72,20 @@ ARMInstPrinter::ARMInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
const MCRegisterInfo &MRI)
: MCInstPrinter(MAI, MII, MRI) {}
+bool ARMInstPrinter::applyTargetSpecificCLOption(StringRef Opt) {
+ if (Opt == "reg-names-std") {
+ DefaultAltIdx = ARM::NoRegAltName;
+ return true;
+ }
+ if (Opt == "reg-names-raw") {
+ DefaultAltIdx = ARM::RegNamesRaw;
+ return true;
+ }
+ return false;
+}
+
void ARMInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
- OS << markup("<reg:") << getRegisterName(RegNo) << markup(">");
+ OS << markup("<reg:") << getRegisterName(RegNo, DefaultAltIdx) << markup(">");
}
void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
@@ -592,6 +603,40 @@ void ARMInstPrinter::printPostIdxImm8s4Operand(const MCInst *MI, unsigned OpNum,
<< markup(">");
}
+template<int shift>
+void ARMInstPrinter::printMveAddrModeRQOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum + 1);
+
+ O << markup("<mem:") << "[";
+ printRegName(O, MO1.getReg());
+ O << ", ";
+ printRegName(O, MO2.getReg());
+
+ if (shift > 0)
+ printRegImmShift(O, ARM_AM::uxtw, shift, UseMarkup);
+
+ O << "]" << markup(">");
+}
+
+void ARMInstPrinter::printMveAddrModeQOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ const MCOperand &MO2 = MI->getOperand(OpNum + 1);
+
+ O << markup("<mem:") << "[";
+ printRegName(O, MO1.getReg());
+
+ int64_t Imm = MO2.getImm();
+ if (Imm != 0)
+ O << ", " << markup("<imm:") << '#' << Imm << markup(">");
+
+ O << "]" << markup(">");
+}
+
void ARMInstPrinter::printLdStmModeOperand(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI,
raw_ostream &O) {
@@ -760,11 +805,13 @@ void ARMInstPrinter::printPKHASRShiftImm(const MCInst *MI, unsigned OpNum,
void ARMInstPrinter::printRegisterList(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI,
raw_ostream &O) {
- assert(std::is_sorted(MI->begin() + OpNum, MI->end(),
- [&](const MCOperand &LHS, const MCOperand &RHS) {
- return MRI.getEncodingValue(LHS.getReg()) <
- MRI.getEncodingValue(RHS.getReg());
- }));
+ if (MI->getOpcode() != ARM::t2CLRM) {
+ assert(std::is_sorted(MI->begin() + OpNum, MI->end(),
+ [&](const MCOperand &LHS, const MCOperand &RHS) {
+ return MRI.getEncodingValue(LHS.getReg()) <
+ MRI.getEncodingValue(RHS.getReg());
+ }));
+ }
O << "{";
for (unsigned i = OpNum, e = MI->getNumOperands(); i != e; ++i) {
@@ -919,6 +966,15 @@ void ARMInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNum,
O << ARMCondCodeToString(CC);
}
+void ARMInstPrinter::printMandatoryRestrictedPredicateOperand(
+ const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ if ((ARMCC::CondCodes)MI->getOperand(OpNum).getImm() == ARMCC::HS)
+ O << "cs";
+ else
+ printMandatoryPredicateOperand(MI, OpNum, STI, O);
+}
+
void ARMInstPrinter::printMandatoryPredicateOperand(const MCInst *MI,
unsigned OpNum,
const MCSubtargetInfo &STI,
@@ -927,6 +983,14 @@ void ARMInstPrinter::printMandatoryPredicateOperand(const MCInst *MI,
O << ARMCondCodeToString(CC);
}
+void ARMInstPrinter::printMandatoryInvertedPredicateOperand(const MCInst *MI,
+ unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm();
+ O << ARMCondCodeToString(ARMCC::getOppositeCondition(CC));
+}
+
void ARMInstPrinter::printSBitModifierOperand(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI,
raw_ostream &O) {
@@ -1009,16 +1073,13 @@ void ARMInstPrinter::printThumbITMask(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
// (3 - the number of trailing zeros) is the number of then / else.
unsigned Mask = MI->getOperand(OpNum).getImm();
- unsigned Firstcond = MI->getOperand(OpNum - 1).getImm();
- unsigned CondBit0 = Firstcond & 1;
unsigned NumTZ = countTrailingZeros(Mask);
assert(NumTZ <= 3 && "Invalid IT mask!");
for (unsigned Pos = 3, e = NumTZ; Pos > e; --Pos) {
- bool T = ((Mask >> Pos) & 1) == CondBit0;
- if (T)
- O << 't';
- else
+ if ((Mask >> Pos) & 1)
O << 'e';
+ else
+ O << 't';
}
}
@@ -1561,6 +1622,20 @@ void ARMInstPrinter::printVectorListFourSpaced(const MCInst *MI, unsigned OpNum,
O << "}";
}
+template<unsigned NumRegs>
+void ARMInstPrinter::printMVEVectorList(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ unsigned Reg = MI->getOperand(OpNum).getReg();
+ const char *Prefix = "{";
+ for (unsigned i = 0; i < NumRegs; i++) {
+ O << Prefix;
+ printRegName(O, MRI.getSubReg(Reg, ARM::qsub_0 + i));
+ Prefix = ", ";
+ }
+ O << "}";
+}
+
template<int64_t Angle, int64_t Remainder>
void ARMInstPrinter::printComplexRotationOp(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
@@ -1569,3 +1644,35 @@ void ARMInstPrinter::printComplexRotationOp(const MCInst *MI, unsigned OpNo,
O << "#" << (Val * Angle) + Remainder;
}
+void ARMInstPrinter::printVPTPredicateOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ ARMVCC::VPTCodes CC = (ARMVCC::VPTCodes)MI->getOperand(OpNum).getImm();
+ if (CC != ARMVCC::None)
+ O << ARMVPTPredToString(CC);
+}
+
+void ARMInstPrinter::printVPTMask(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ // (3 - the number of trailing zeroes) is the number of them / else.
+ unsigned Mask = MI->getOperand(OpNum).getImm();
+ unsigned NumTZ = countTrailingZeros(Mask);
+ assert(NumTZ <= 3 && "Invalid VPT mask!");
+ for (unsigned Pos = 3, e = NumTZ; Pos > e; --Pos) {
+ bool T = ((Mask >> Pos) & 1) == 0;
+ if (T)
+ O << 't';
+ else
+ O << 'e';
+ }
+}
+
+void ARMInstPrinter::printExpandedImmOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ uint32_t Val = MI->getOperand(OpNum).getImm();
+ O << markup("<imm:") << "#0x";
+ O.write_hex(Val);
+ O << markup(">");
+}
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h
index afc8515136bc..69026956b60e 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h
@@ -1,9 +1,8 @@
//===- ARMInstPrinter.h - Convert ARM MCInst to assembly syntax -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -11,9 +10,10 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIB_TARGET_ARM_INSTPRINTER_ARMINSTPRINTER_H
-#define LLVM_LIB_TARGET_ARM_INSTPRINTER_ARMINSTPRINTER_H
+#ifndef LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMINSTPRINTER_H
+#define LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMINSTPRINTER_H
+#include "MCTargetDesc/ARMMCTargetDesc.h"
#include "llvm/MC/MCInstPrinter.h"
namespace llvm {
@@ -23,6 +23,8 @@ public:
ARMInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
const MCRegisterInfo &MRI);
+ bool applyTargetSpecificCLOption(StringRef Opt) override;
+
void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
const MCSubtargetInfo &STI) override;
void printRegName(raw_ostream &OS, unsigned RegNo) const override;
@@ -36,7 +38,8 @@ public:
unsigned PrintMethodIdx,
const MCSubtargetInfo &STI,
raw_ostream &O);
- static const char *getRegisterName(unsigned RegNo);
+ static const char *getRegisterName(unsigned RegNo,
+ unsigned AltIdx = ARM::NoRegAltName);
void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
@@ -167,6 +170,13 @@ public:
void printMandatoryPredicateOperand(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI,
raw_ostream &O);
+ void printMandatoryRestrictedPredicateOperand(const MCInst *MI,
+ unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O);
+ void printMandatoryInvertedPredicateOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O);
void printSBitModifierOperand(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI, raw_ostream &O);
void printRegisterList(const MCInst *MI, unsigned OpNum,
@@ -233,11 +243,30 @@ public:
const MCSubtargetInfo &STI, raw_ostream &O);
void printVectorListFourSpaced(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI, raw_ostream &O);
+ template<unsigned NumRegs>
+ void printMVEVectorList(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
template<int64_t Angle, int64_t Remainder>
void printComplexRotationOp(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI, raw_ostream &O);
+ // MVE
+ void printVPTPredicateOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O);
+ void printVPTMask(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ template<int shift>
+ void printMveAddrModeRQOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printMveAddrModeQOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printExpandedImmOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+
+private:
+ unsigned DefaultAltIdx = ARM::NoRegAltName;
};
} // end namespace llvm
-#endif // LLVM_LIB_TARGET_ARM_INSTPRINTER_ARMINSTPRINTER_H
+#endif // LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMINSTPRINTER_H
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
index 3ee63ac374b3..d30d15df3d00 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
@@ -1,9 +1,8 @@
//===-- ARMMCAsmInfo.cpp - ARM asm properties -----------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
index 5e548162bec6..55d7b299674d 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
@@ -1,9 +1,8 @@
//===-- ARMMCAsmInfo.h - ARM asm properties --------------------*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
index b37b8073548f..dca6fe37d49a 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
@@ -1,9 +1,8 @@
//===-- ARM/ARMMCCodeEmitter.cpp - Convert ARM code to machine code -------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -50,7 +49,7 @@ namespace {
class ARMMCCodeEmitter : public MCCodeEmitter {
const MCInstrInfo &MCII;
- const MCContext &CTX;
+ MCContext &CTX;
bool IsLittleEndian;
public:
@@ -163,6 +162,15 @@ public:
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
+ uint32_t getITMaskOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ /// getMVEShiftImmOpValue - Return encoding info for the 'sz:imm5'
+ /// operand.
+ uint32_t getMVEShiftImmOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
/// getAddrModeImm12OpValue - Return encoding info for 'reg +/- imm12'
/// operand.
@@ -181,18 +189,37 @@ public:
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
+ /// getT2AddrModeImm7s4OpValue - Return encoding info for 'reg +/- imm7<<2'
+ /// operand.
+ uint32_t getT2AddrModeImm7s4OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
/// getT2AddrModeImm0_1020s4OpValue - Return encoding info for 'reg + imm8<<2'
/// operand.
uint32_t getT2AddrModeImm0_1020s4OpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
- /// getT2Imm8s4OpValue - Return encoding info for '+/- imm8<<2'
+ /// getT2ScaledImmOpValue - Return encoding info for '+/- immX<<Y'
/// operand.
- uint32_t getT2Imm8s4OpValue(const MCInst &MI, unsigned OpIdx,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
+ template<unsigned Bits, unsigned Shift>
+ uint32_t getT2ScaledImmOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ /// getMveAddrModeRQOpValue - Return encoding info for 'reg, vreg'
+ /// operand.
+ uint32_t getMveAddrModeRQOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ /// getMveAddrModeQOpValue - Return encoding info for 'reg +/- imm7<<{shift}'
+ /// operand.
+ template<int shift>
+ uint32_t getMveAddrModeQOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
/// getLdStSORegOpValue - Return encoding info for 'reg +/- reg shop imm'
/// operand as needed by load/store instructions.
@@ -224,8 +251,9 @@ public:
case ARM_AM::asr: return 2;
case ARM_AM::ror:
case ARM_AM::rrx: return 3;
+ default:
+ llvm_unreachable("Invalid ShiftOpc!");
}
- llvm_unreachable("Invalid ShiftOpc!");
}
/// getAddrMode2OffsetOpValue - Return encoding for am2offset operands.
@@ -283,40 +311,6 @@ public:
return MI.getOperand(Op).getReg() == ARM::CPSR;
}
- /// getSOImmOpValue - Return an encoded 12-bit shifted-immediate value.
- unsigned getSOImmOpValue(const MCInst &MI, unsigned Op,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- const MCOperand &MO = MI.getOperand(Op);
-
- // We expect MO to be an immediate or an expression,
- // if it is an immediate - that's fine, just encode the value.
- // Otherwise - create a Fixup.
- if (MO.isExpr()) {
- const MCExpr *Expr = MO.getExpr();
- // In instruction code this value always encoded as lowest 12 bits,
- // so we don't have to perform any specific adjustments.
- // Due to requirements of relocatable records we have to use FK_Data_4.
- // See ARMELFObjectWriter::ExplicitRelSym and
- // ARMELFObjectWriter::GetRelocTypeInner for more details.
- MCFixupKind Kind = MCFixupKind(FK_Data_4);
- Fixups.push_back(MCFixup::create(0, Expr, Kind, MI.getLoc()));
- return 0;
- }
-
- unsigned SoImm = MO.getImm();
- int SoImmVal = ARM_AM::getSOImmVal(SoImm);
- assert(SoImmVal != -1 && "Not a valid so_imm value!");
-
- // Encode rotate_imm.
- unsigned Binary = (ARM_AM::getSOImmValRot((unsigned)SoImmVal) >> 1)
- << ARMII::SoRotImmShift;
-
- // Encode immed_8.
- Binary |= ARM_AM::getSOImmValImm((unsigned)SoImmVal);
- return Binary;
- }
-
unsigned getModImmOpValue(const MCInst &MI, unsigned Op,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &ST) const {
@@ -358,7 +352,8 @@ public:
unsigned getT2AddrModeSORegOpValue(const MCInst &MI, unsigned OpNum,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
- unsigned getT2AddrModeImm8OpValue(const MCInst &MI, unsigned OpNum,
+ template<unsigned Bits, unsigned Shift>
+ unsigned getT2AddrModeImmOpValue(const MCInst &MI, unsigned OpNum,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
unsigned getT2AddrModeImm8OffsetOpValue(const MCInst &MI, unsigned OpNum,
@@ -418,6 +413,14 @@ public:
unsigned getThumbSRImmOpValue(const MCInst &MI, unsigned Op,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
+ template <uint8_t shift, bool invert>
+ unsigned getExpandedImmOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ static_assert(shift <= 32, "Shift count must be less than or equal to 32.");
+ const MCOperand MO = MI.getOperand(Op);
+ return (invert ? (MO.getImm() ^ 0xff) : MO.getImm()) >> shift;
+ }
unsigned NEONThumb2DataIPostEncoder(const MCInst &MI,
unsigned EncodedValue,
@@ -436,6 +439,10 @@ public:
unsigned EncodedValue,
const MCSubtargetInfo &STI) const;
+ uint32_t getPowerTwoOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
void EmitByte(unsigned char C, raw_ostream &OS) const {
OS << (char)C;
}
@@ -451,6 +458,26 @@ public:
void encodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const override;
+
+ template <bool isNeg, ARM::Fixups fixup>
+ uint32_t getBFTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ uint32_t getBFAfterTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ uint32_t getVPTMaskOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ uint32_t getRestrictedCondCodeOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ template <unsigned size>
+ uint32_t getMVEPairVectorIndexOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
};
} // end anonymous namespace
@@ -537,7 +564,15 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
unsigned Reg = MO.getReg();
unsigned RegNo = CTX.getRegisterInfo()->getEncodingValue(Reg);
- // Q registers are encoded as 2x their register number.
+ // In NEON, Q registers are encoded as 2x their register number,
+ // because they're using the same indices as the D registers they
+ // overlap. In MVE, there are no 64-bit vector instructions, so
+ // the encodings all refer to Q-registers by their literal
+ // register number.
+
+ if (STI.getFeatureBits()[ARM::HasMVEIntegerOps])
+ return RegNo;
+
switch (Reg) {
default:
return RegNo;
@@ -849,6 +884,33 @@ getT2AdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
return Val;
}
+/// getITMaskOpValue - Return the architectural encoding of an IT
+/// predication mask, given the MCOperand format.
+uint32_t ARMMCCodeEmitter::
+getITMaskOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand MaskMO = MI.getOperand(OpIdx);
+ assert(MaskMO.isImm() && "Unexpected operand type!");
+
+ unsigned Mask = MaskMO.getImm();
+
+ // IT masks are encoded as a sequence of replacement low-order bits
+ // for the condition code. So if the low bit of the starting
+ // condition code is 1, then we have to flip all the bits above the
+ // terminating bit (which is the lowest 1 bit).
+ assert(OpIdx > 0 && "IT mask appears first!");
+ const MCOperand CondMO = MI.getOperand(OpIdx-1);
+ assert(CondMO.isImm() && "Unexpected operand type!");
+ if (CondMO.getImm() & 1) {
+ unsigned LowBit = Mask & -Mask;
+ unsigned BitsAboveLowBit = 0xF & (-LowBit << 1);
+ Mask ^= BitsAboveLowBit;
+ }
+
+ return Mask;
+}
+
/// getThumbAdrLabelOpValue - Return encoding info for 8-bit immediate ADR label
/// target.
uint32_t ARMMCCodeEmitter::
@@ -878,6 +940,41 @@ getThumbAddrModeRegRegOpValue(const MCInst &MI, unsigned OpIdx,
return (Rm << 3) | Rn;
}
+/// getMVEShiftImmOpValue - Return encoding info for the 'sz:imm5'
+/// operand.
+uint32_t
+ARMMCCodeEmitter::getMVEShiftImmOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ // {4-0} = szimm5
+ // The value we are trying to encode is an immediate between either the
+ // range of [1-7] or [1-15] depending on whether we are dealing with the
+ // u8/s8 or the u16/s16 variants respectively.
+ // This value is encoded as follows, if ShiftImm is the value within those
+ // ranges then the encoding szimm5 = ShiftImm + size, where size is either 8
+ // or 16.
+
+ unsigned Size, ShiftImm;
+ switch(MI.getOpcode()) {
+ case ARM::MVE_VSHLL_imms16bh:
+ case ARM::MVE_VSHLL_imms16th:
+ case ARM::MVE_VSHLL_immu16bh:
+ case ARM::MVE_VSHLL_immu16th:
+ Size = 16;
+ break;
+ case ARM::MVE_VSHLL_imms8bh:
+ case ARM::MVE_VSHLL_imms8th:
+ case ARM::MVE_VSHLL_immu8bh:
+ case ARM::MVE_VSHLL_immu8th:
+ Size = 8;
+ break;
+ default:
+ llvm_unreachable("Use of operand not supported by this instruction");
+ }
+ ShiftImm = MI.getOperand(OpIdx).getImm();
+ return Size + ShiftImm;
+}
+
/// getAddrModeImm12OpValue - Return encoding info for 'reg +/- imm12' operand.
uint32_t ARMMCCodeEmitter::
getAddrModeImm12OpValue(const MCInst &MI, unsigned OpIdx,
@@ -929,12 +1026,11 @@ getAddrModeImm12OpValue(const MCInst &MI, unsigned OpIdx,
return Binary;
}
-/// getT2Imm8s4OpValue - Return encoding info for
-/// '+/- imm8<<2' operand.
+template<unsigned Bits, unsigned Shift>
uint32_t ARMMCCodeEmitter::
-getT2Imm8s4OpValue(const MCInst &MI, unsigned OpIdx,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
+getT2ScaledImmOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
// FIXME: The immediate operand should have already been encoded like this
// before ever getting here. The encoder method should just need to combine
// the MI operands for the register and the offset into a single
@@ -942,25 +1038,75 @@ getT2Imm8s4OpValue(const MCInst &MI, unsigned OpIdx,
// style, unfortunately. As-is, we can't represent the distinct encoding
// for #-0.
- // {8} = (U)nsigned (add == '1', sub == '0')
- // {7-0} = imm8
- int32_t Imm8 = MI.getOperand(OpIdx).getImm();
- bool isAdd = Imm8 >= 0;
+ // {Bits} = (U)nsigned (add == '1', sub == '0')
+ // {(Bits-1)-0} = immediate
+ int32_t Imm = MI.getOperand(OpIdx).getImm();
+ bool isAdd = Imm >= 0;
// Immediate is always encoded as positive. The 'U' bit controls add vs sub.
- if (Imm8 < 0)
- Imm8 = -(uint32_t)Imm8;
+ if (Imm < 0)
+ Imm = -(uint32_t)Imm;
- // Scaled by 4.
- Imm8 /= 4;
+ Imm >>= Shift;
- uint32_t Binary = Imm8 & 0xff;
+ uint32_t Binary = Imm & ((1U << Bits) - 1);
// Immediate is always encoded as positive. The 'U' bit controls add vs sub.
if (isAdd)
- Binary |= (1 << 8);
+ Binary |= (1U << Bits);
return Binary;
}
+/// getMveAddrModeRQOpValue - Return encoding info for 'reg, vreg'
+/// operand.
+uint32_t ARMMCCodeEmitter::
+getMveAddrModeRQOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ // {6-3} Rn
+ // {2-0} Qm
+ const MCOperand &M0 = MI.getOperand(OpIdx);
+ const MCOperand &M1 = MI.getOperand(OpIdx + 1);
+
+ unsigned Rn = CTX.getRegisterInfo()->getEncodingValue(M0.getReg());
+ unsigned Qm = CTX.getRegisterInfo()->getEncodingValue(M1.getReg());
+
+ assert(Qm < 8 && "Qm is supposed to be encodable in 3 bits");
+
+ return (Rn << 3) | Qm;
+}
+
+/// getMveAddrModeRQOpValue - Return encoding info for 'reg, vreg'
+/// operand.
+template<int shift>
+uint32_t ARMMCCodeEmitter::
+getMveAddrModeQOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ // {10-8} Qm
+ // {7-0} Imm
+ const MCOperand &M0 = MI.getOperand(OpIdx);
+ const MCOperand &M1 = MI.getOperand(OpIdx + 1);
+
+ unsigned Qm = CTX.getRegisterInfo()->getEncodingValue(M0.getReg());
+ int32_t Imm = M1.getImm();
+
+ bool isAdd = Imm >= 0;
+
+ Imm >>= shift;
+
+ if (!isAdd)
+ Imm = -(uint32_t)Imm;
+
+ Imm &= 0x7f;
+
+ if (isAdd)
+ Imm |= 0x80;
+
+ assert(Qm < 8 && "Qm is supposed to be encodable in 3 bits");
+
+ return (Qm << 8) | Imm;
+}
+
/// getT2AddrModeImm8s4OpValue - Return encoding info for
/// 'reg +/- imm8<<2' operand.
uint32_t ARMMCCodeEmitter::
@@ -1002,6 +1148,33 @@ getT2AddrModeImm8s4OpValue(const MCInst &MI, unsigned OpIdx,
return Binary;
}
+/// getT2AddrModeImm7s4OpValue - Return encoding info for
+/// 'reg +/- imm7<<2' operand.
+uint32_t
+ARMMCCodeEmitter::getT2AddrModeImm7s4OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ // {11-8} = reg
+ // {7} = (A)dd (add == '1', sub == '0')
+ // {6-0} = imm7
+ unsigned Reg, Imm7;
+ // If The first operand isn't a register, we have a label reference.
+ bool isAdd = EncodeAddrModeOpValues(MI, OpIdx, Reg, Imm7, Fixups, STI);
+
+ // FIXME: The immediate operand should have already been encoded like this
+ // before ever getting here. The encoder method should just need to combine
+ // the MI operands for the register and the offset into a single
+ // representation for the complex operand in the .td file. This isn't just
+ // style, unfortunately. As-is, we can't represent the distinct encoding
+ // for #-0.
+ uint32_t Binary = (Imm7 >> 2) & 0xff;
+ // Immediate is always encoded as positive. The 'A' bit controls add vs sub.
+ if (isAdd)
+ Binary |= (1 << 7);
+ Binary |= (Reg << 8);
+ return Binary;
+}
+
/// getT2AddrModeImm0_1020s4OpValue - Return encoding info for
/// 'reg + imm8<<2' operand.
uint32_t ARMMCCodeEmitter::
@@ -1434,25 +1607,29 @@ getT2AddrModeSORegOpValue(const MCInst &MI, unsigned OpNum,
return Value;
}
+template<unsigned Bits, unsigned Shift>
unsigned ARMMCCodeEmitter::
-getT2AddrModeImm8OpValue(const MCInst &MI, unsigned OpNum,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
+getT2AddrModeImmOpValue(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
const MCOperand &MO1 = MI.getOperand(OpNum);
const MCOperand &MO2 = MI.getOperand(OpNum+1);
// FIXME: Needs fixup support.
unsigned Value = CTX.getRegisterInfo()->getEncodingValue(MO1.getReg());
- // Even though the immediate is 8 bits long, we need 9 bits in order
+ // If the immediate is B bits long, we need B+1 bits in order
// to represent the (inverse of the) sign bit.
- Value <<= 9;
+ Value <<= (Bits + 1);
int32_t tmp = (int32_t)MO2.getImm();
- if (tmp < 0)
+ if (tmp == INT32_MIN) { // represents subtracting zero rather than adding it
+ tmp = 0;
+ } else if (tmp < 0) {
tmp = abs(tmp);
- else
- Value |= 256; // Set the ADD bit
- Value |= tmp & 255;
+ } else {
+ Value |= (1U << Bits); // Set the ADD bit
+ }
+ Value |= (tmp >> Shift) & ((1U << Bits) - 1);
return Value;
}
@@ -1534,7 +1711,7 @@ unsigned ARMMCCodeEmitter::
getRegisterListOpValue(const MCInst &MI, unsigned Op,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
- // VLDM/VSTM:
+ // VLDM/VSTM/VSCCLRM:
// {12-8} = Vd
// {7-0} = Number of registers
//
@@ -1543,28 +1720,40 @@ getRegisterListOpValue(const MCInst &MI, unsigned Op,
unsigned Reg = MI.getOperand(Op).getReg();
bool SPRRegs = ARMMCRegisterClasses[ARM::SPRRegClassID].contains(Reg);
bool DPRRegs = ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg);
+ bool CLRMRegs = MI.getOpcode() == ARM::t2CLRM;
unsigned Binary = 0;
if (SPRRegs || DPRRegs) {
- // VLDM/VSTM
+ // VLDM/VSTM/VSCCLRM
unsigned RegNo = CTX.getRegisterInfo()->getEncodingValue(Reg);
unsigned NumRegs = (MI.getNumOperands() - Op) & 0xff;
Binary |= (RegNo & 0x1f) << 8;
+
+ // Ignore VPR
+ if (MI.getOpcode() == ARM::VSCCLRMD || MI.getOpcode() == ARM::VSCCLRMS)
+ --NumRegs;
if (SPRRegs)
Binary |= NumRegs;
else
Binary |= NumRegs * 2;
} else {
const MCRegisterInfo &MRI = *CTX.getRegisterInfo();
- assert(std::is_sorted(MI.begin() + Op, MI.end(),
- [&](const MCOperand &LHS, const MCOperand &RHS) {
- return MRI.getEncodingValue(LHS.getReg()) <
- MRI.getEncodingValue(RHS.getReg());
- }));
+ if (!CLRMRegs) {
+ assert(std::is_sorted(MI.begin() + Op, MI.end(),
+ [&](const MCOperand &LHS, const MCOperand &RHS) {
+ return MRI.getEncodingValue(LHS.getReg()) <
+ MRI.getEncodingValue(RHS.getReg());
+ }));
+ }
for (unsigned I = Op, E = MI.getNumOperands(); I < E; ++I) {
- unsigned RegNo = MRI.getEncodingValue(MI.getOperand(I).getReg());
+ unsigned RegNo;
+ if (CLRMRegs && MI.getOperand(I).getReg() == ARM::APSR) {
+ RegNo = 15;
+ } else {
+ RegNo = MRI.getEncodingValue(MI.getOperand(I).getReg());
+ }
Binary |= 1 << RegNo;
}
}
@@ -1710,6 +1899,120 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS,
++MCNumEmitted; // Keep track of the # of mi's emitted.
}
+template <bool isNeg, ARM::Fixups fixup>
+uint32_t
+ARMMCCodeEmitter::getBFTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand MO = MI.getOperand(OpIdx);
+ if (MO.isExpr())
+ return ::getBranchTargetOpValue(MI, OpIdx, fixup, Fixups, STI);
+ return isNeg ? -(MO.getImm() >> 1) : (MO.getImm() >> 1);
+}
+
+uint32_t
+ARMMCCodeEmitter::getBFAfterTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand MO = MI.getOperand(OpIdx);
+ const MCOperand BranchMO = MI.getOperand(0);
+
+ if (MO.isExpr()) {
+ assert(BranchMO.isExpr());
+ const MCExpr *DiffExpr = MCBinaryExpr::createSub(
+ MO.getExpr(), BranchMO.getExpr(), CTX);
+ MCFixupKind Kind = MCFixupKind(ARM::fixup_bfcsel_else_target);
+ Fixups.push_back(llvm::MCFixup::create(0, DiffExpr, Kind, MI.getLoc()));
+ return 0;
+ }
+
+ assert(MO.isImm() && BranchMO.isImm());
+ int Diff = MO.getImm() - BranchMO.getImm();
+ assert(Diff == 4 || Diff == 2);
+
+ return Diff == 4;
+}
+
+uint32_t ARMMCCodeEmitter::getVPTMaskOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI)const {
+ const MCOperand MO = MI.getOperand(OpIdx);
+ assert(MO.isImm() && "Unexpected operand type!");
+
+ int Value = MO.getImm();
+ int Imm = 0;
+
+ // VPT Masks are actually encoded as a series of invert/don't invert bits,
+ // rather than true/false bits.
+ unsigned PrevBit = 0;
+ for (int i = 3; i >= 0; --i) {
+ unsigned Bit = (Value >> i) & 1;
+
+ // Check if we are at the end of the mask.
+ if ((Value & ~(~0U << i)) == 0) {
+ Imm |= (1 << i);
+ break;
+ }
+
+ // Convert the bit in the mask based on the previous bit.
+ if (Bit != PrevBit)
+ Imm |= (1 << i);
+
+ PrevBit = Bit;
+ }
+
+ return Imm;
+}
+
+uint32_t ARMMCCodeEmitter::getRestrictedCondCodeOpValue(
+ const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+
+ const MCOperand MO = MI.getOperand(OpIdx);
+ assert(MO.isImm() && "Unexpected operand type!");
+
+ switch (MO.getImm()) {
+ default:
+ assert(0 && "Unexpected Condition!");
+ return 0;
+ case ARMCC::HS:
+ case ARMCC::EQ:
+ return 0;
+ case ARMCC::HI:
+ case ARMCC::NE:
+ return 1;
+ case ARMCC::GE:
+ return 4;
+ case ARMCC::LT:
+ return 5;
+ case ARMCC::GT:
+ return 6;
+ case ARMCC::LE:
+ return 7;
+ }
+}
+
+uint32_t ARMMCCodeEmitter::
+getPowerTwoOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ assert(MO.isImm() && "Unexpected operand type!");
+ return countTrailingZeros((uint64_t)MO.getImm());
+}
+
+template <unsigned start>
+uint32_t ARMMCCodeEmitter::
+getMVEPairVectorIndexOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand MO = MI.getOperand(OpIdx);
+ assert(MO.isImm() && "Unexpected operand type!");
+
+ int Value = MO.getImm();
+ return Value - start;
+}
+
#include "ARMGenMCCodeEmitter.inc"
MCCodeEmitter *llvm::createARMLEMCCodeEmitter(const MCInstrInfo &MCII,
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
index 306f068312f5..fbad05fb1759 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
@@ -1,9 +1,8 @@
//===-- ARMMCExpr.cpp - ARM specific MC expression classes ----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
index 75dde8008fca..033a43288f3e 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
@@ -1,9 +1,8 @@
//===-- ARMMCExpr.h - ARM specific MC expression classes --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index 46434007a854..90022a8d88a6 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -1,9 +1,8 @@
//===-- ARMMCTargetDesc.cpp - ARM Target Descriptions ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -13,8 +12,9 @@
#include "ARMMCTargetDesc.h"
#include "ARMBaseInfo.h"
+#include "ARMInstPrinter.h"
#include "ARMMCAsmInfo.h"
-#include "InstPrinter/ARMInstPrinter.h"
+#include "TargetInfo/ARMTargetInfo.h"
#include "llvm/ADT/Triple.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCCodeEmitter.h"
@@ -277,14 +277,29 @@ class ThumbMCInstrAnalysis : public ARMMCInstrAnalysis {
public:
ThumbMCInstrAnalysis(const MCInstrInfo *Info) : ARMMCInstrAnalysis(Info) {}
- bool evaluateBranch(const MCInst &Inst, uint64_t Addr,
- uint64_t Size, uint64_t &Target) const override {
+ bool evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size,
+ uint64_t &Target) const override {
+ unsigned OpId;
+ switch (Inst.getOpcode()) {
+ default:
+ OpId = 0;
+ break;
+ case ARM::t2WLS:
+ case ARM::t2LEUpdate:
+ OpId = 2;
+ break;
+ case ARM::t2LE:
+ OpId = 1;
+ break;
+ }
+
// We only handle PCRel branches for now.
- if (Info->get(Inst.getOpcode()).OpInfo[0].OperandType!=MCOI::OPERAND_PCREL)
+ if (Info->get(Inst.getOpcode()).OpInfo[OpId].OperandType !=
+ MCOI::OPERAND_PCREL)
return false;
- int64_t Imm = Inst.getOperand(0).getImm();
- Target = Addr+Imm+4; // In Thumb mode the PC is always off by 4 bytes.
+ // In Thumb mode the PC is always off by 4 bytes.
+ Target = Addr + Inst.getOperand(OpId).getImm() + 4;
return true;
}
};
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
index 3ee004592ac6..9cbbd56225ef 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
@@ -1,9 +1,8 @@
//===-- ARMMCTargetDesc.h - ARM Target Descriptions -------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -15,6 +14,7 @@
#define LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMMCTARGETDESC_H
#include "llvm/Support/DataTypes.h"
+#include "llvm/MC/MCInstrDesc.h"
#include <memory>
#include <string>
@@ -39,11 +39,6 @@ class Triple;
class raw_ostream;
class raw_pwrite_stream;
-Target &getTheARMLETarget();
-Target &getTheThumbLETarget();
-Target &getTheARMBETarget();
-Target &getTheThumbBETarget();
-
namespace ARM_MC {
std::string ParseARMTriple(const Triple &TT, StringRef CPU);
@@ -100,6 +95,20 @@ createARMWinCOFFObjectWriter(bool Is64Bit);
/// Construct ARM Mach-O relocation info.
MCRelocationInfo *createARMMachORelocationInfo(MCContext &Ctx);
+
+namespace ARM {
+enum OperandType {
+ OPERAND_VPRED_R = MCOI::OPERAND_FIRST_TARGET,
+ OPERAND_VPRED_N,
+};
+inline bool isVpred(OperandType op) {
+ return op == OPERAND_VPRED_R || op == OPERAND_VPRED_N;
+}
+inline bool isVpred(uint8_t op) {
+ return isVpred(static_cast<OperandType>(op));
+}
+} // end namespace ARM
+
} // End llvm namespace
// Defines symbolic names for ARM registers. This defines a mapping from
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp
index 6259c98321f4..886b7e7bc84e 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp
@@ -1,9 +1,8 @@
//===- ARMMachORelocationInfo.cpp -----------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
index 0ced8195790d..c49885023cb2 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
@@ -1,9 +1,8 @@
//===-- ARMMachObjectWriter.cpp - ARM Mach Object Writer ------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
index 91836cff95c8..b863517c0cca 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
@@ -1,9 +1,8 @@
//===- ARMTargetStreamer.cpp - ARMTargetStreamer class --*- C++ -*---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -125,7 +124,9 @@ static ARMBuildAttrs::CPUArch getArchForCPU(const MCSubtargetInfo &STI) {
if (STI.hasFeature(ARM::FeatureRClass))
return ARMBuildAttrs::v8_R;
return ARMBuildAttrs::v8_A;
- } else if (STI.hasFeature(ARM::HasV8MMainlineOps))
+ } else if (STI.hasFeature(ARM::HasV8_1MMainlineOps))
+ return ARMBuildAttrs::v8_1_M_Main;
+ else if (STI.hasFeature(ARM::HasV8MMainlineOps))
return ARMBuildAttrs::v8_M_Main;
else if (STI.hasFeature(ARM::HasV7Ops)) {
if (STI.hasFeature(ARM::FeatureMClass) && STI.hasFeature(ARM::FeatureDSP))
@@ -223,37 +224,37 @@ void ARMTargetStreamer::emitTargetAttributes(const MCSubtargetInfo &STI) {
? ARMBuildAttrs::AllowNeonARMv8_1a
: ARMBuildAttrs::AllowNeonARMv8);
} else {
- if (STI.hasFeature(ARM::FeatureFPARMv8))
+ if (STI.hasFeature(ARM::FeatureFPARMv8_D16_SP))
// FPv5 and FP-ARMv8 have the same instructions, so are modeled as one
// FPU, but there are two different names for it depending on the CPU.
- emitFPU(STI.hasFeature(ARM::FeatureD16)
- ? (STI.hasFeature(ARM::FeatureVFPOnlySP) ? ARM::FK_FPV5_SP_D16
- : ARM::FK_FPV5_D16)
- : ARM::FK_FP_ARMV8);
- else if (STI.hasFeature(ARM::FeatureVFP4))
- emitFPU(STI.hasFeature(ARM::FeatureD16)
- ? (STI.hasFeature(ARM::FeatureVFPOnlySP) ? ARM::FK_FPV4_SP_D16
- : ARM::FK_VFPV4_D16)
- : ARM::FK_VFPV4);
- else if (STI.hasFeature(ARM::FeatureVFP3))
+ emitFPU(STI.hasFeature(ARM::FeatureD32)
+ ? ARM::FK_FP_ARMV8
+ : (STI.hasFeature(ARM::FeatureFP64) ? ARM::FK_FPV5_D16
+ : ARM::FK_FPV5_SP_D16));
+ else if (STI.hasFeature(ARM::FeatureVFP4_D16_SP))
+ emitFPU(STI.hasFeature(ARM::FeatureD32)
+ ? ARM::FK_VFPV4
+ : (STI.hasFeature(ARM::FeatureFP64) ? ARM::FK_VFPV4_D16
+ : ARM::FK_FPV4_SP_D16));
+ else if (STI.hasFeature(ARM::FeatureVFP3_D16_SP))
emitFPU(
- STI.hasFeature(ARM::FeatureD16)
- // +d16
- ? (STI.hasFeature(ARM::FeatureVFPOnlySP)
- ? (STI.hasFeature(ARM::FeatureFP16) ? ARM::FK_VFPV3XD_FP16
- : ARM::FK_VFPV3XD)
- : (STI.hasFeature(ARM::FeatureFP16)
+ STI.hasFeature(ARM::FeatureD32)
+ // +d32
+ ? (STI.hasFeature(ARM::FeatureFP16) ? ARM::FK_VFPV3_FP16
+ : ARM::FK_VFPV3)
+ // -d32
+ : (STI.hasFeature(ARM::FeatureFP64)
+ ? (STI.hasFeature(ARM::FeatureFP16)
? ARM::FK_VFPV3_D16_FP16
- : ARM::FK_VFPV3_D16))
- // -d16
- : (STI.hasFeature(ARM::FeatureFP16) ? ARM::FK_VFPV3_FP16
- : ARM::FK_VFPV3));
- else if (STI.hasFeature(ARM::FeatureVFP2))
+ : ARM::FK_VFPV3_D16)
+ : (STI.hasFeature(ARM::FeatureFP16) ? ARM::FK_VFPV3XD_FP16
+ : ARM::FK_VFPV3XD)));
+ else if (STI.hasFeature(ARM::FeatureVFP2_D16_SP))
emitFPU(ARM::FK_VFPV2);
}
// ABI_HardFP_use attribute to indicate single precision FP.
- if (STI.hasFeature(ARM::FeatureVFPOnlySP))
+ if (STI.hasFeature(ARM::FeatureVFP2_D16_SP) && !STI.hasFeature(ARM::FeatureFP64))
emitAttribute(ARMBuildAttrs::ABI_HardFP_use,
ARMBuildAttrs::HardFPSinglePrecision);
@@ -263,6 +264,11 @@ void ARMTargetStreamer::emitTargetAttributes(const MCSubtargetInfo &STI) {
if (STI.hasFeature(ARM::FeatureMP))
emitAttribute(ARMBuildAttrs::MPextension_use, ARMBuildAttrs::AllowMP);
+ if (STI.hasFeature(ARM::HasMVEFloatOps))
+ emitAttribute(ARMBuildAttrs::MVE_arch, ARMBuildAttrs::AllowMVEIntegerAndFloat);
+ else if (STI.hasFeature(ARM::HasMVEIntegerOps))
+ emitAttribute(ARMBuildAttrs::MVE_arch, ARMBuildAttrs::AllowMVEInteger);
+
// Hardware divide in ARM mode is part of base arch, starting from ARMv8.
// If only Thumb hwdiv is present, it must also be in base arch (ARMv7-R/M).
// It is not possible to produce DisallowDIV: if hwdiv is present in the base
diff --git a/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp b/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
index d3ab83bbccbc..38667d686b85 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
@@ -1,9 +1,8 @@
//===-- ARMUnwindOpAsm.cpp - ARM Unwind Opcodes Assembler -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h b/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h
index a7bfbdf4938e..c3134c04b33a 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h
@@ -1,9 +1,8 @@
//===-- ARMUnwindOpAsm.h - ARM Unwind Opcodes Assembler ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
index 30cbde1ca71f..054a95dd1e12 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
@@ -1,9 +1,8 @@
//===-- ARMWinCOFFObjectWriter.cpp - ARM Windows COFF Object Writer -- C++ -==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
index 32cb3dcdcad8..2e816bea5e91 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
@@ -1,9 +1,8 @@
//===-- ARMWinCOFFStreamer.cpp - ARM Target WinCOFF Streamer ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp
index 7f03e1463c1d..4b25986b90a7 100644
--- a/lib/Target/ARM/MLxExpansionPass.cpp
+++ b/lib/Target/ARM/MLxExpansionPass.cpp
@@ -1,9 +1,8 @@
//===-- MLxExpansionPass.cpp - Expand MLx instrs to avoid hazards ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp b/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp
index b0491a4108a6..86cb907abfa3 100644
--- a/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp
+++ b/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp
@@ -1,13 +1,12 @@
//===-- ARMTargetInfo.cpp - ARM Target Implementation ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/ARMMCTargetDesc.h"
+#include "TargetInfo/ARMTargetInfo.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/lib/Target/ARM/TargetInfo/ARMTargetInfo.h b/lib/Target/ARM/TargetInfo/ARMTargetInfo.h
new file mode 100644
index 000000000000..c217dd5c4612
--- /dev/null
+++ b/lib/Target/ARM/TargetInfo/ARMTargetInfo.h
@@ -0,0 +1,23 @@
+//===-- ARMTargetInfo.h - ARM Target Implementation -------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_ARM_TARGETINFO_ARMTARGETINFO_H
+#define LLVM_LIB_TARGET_ARM_TARGETINFO_ARMTARGETINFO_H
+
+namespace llvm {
+
+class Target;
+
+Target &getTheARMLETarget();
+Target &getTheARMBETarget();
+Target &getTheThumbLETarget();
+Target &getTheThumbBETarget();
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_ARM_TARGETINFO_ARMTARGETINFO_H
diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp
index 5c745e112b2e..426e9a0ed9b8 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -1,9 +1,8 @@
//===- Thumb1FrameLowering.cpp - Thumb1 Frame Information -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -64,15 +63,52 @@ bool Thumb1FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const{
return !MFI.hasVarSizedObjects();
}
-static void emitSPUpdate(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator &MBBI,
- const TargetInstrInfo &TII, const DebugLoc &dl,
- const ThumbRegisterInfo &MRI, int NumBytes,
- unsigned MIFlags = MachineInstr::NoFlags) {
+static void
+emitPrologueEpilogueSPUpdate(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ const TargetInstrInfo &TII, const DebugLoc &dl,
+ const ThumbRegisterInfo &MRI, int NumBytes,
+ unsigned ScratchReg, unsigned MIFlags) {
+ // If it would take more than three instructions to adjust the stack pointer
+ // using tADDspi/tSUBspi, load an immediate instead.
+ if (std::abs(NumBytes) > 508 * 3) {
+ // We use a different codepath here from the normal
+ // emitThumbRegPlusImmediate so we don't have to deal with register
+ // scavenging. (Scavenging could try to use the emergency spill slot
+ // before we've actually finished setting up the stack.)
+ if (ScratchReg == ARM::NoRegister)
+ report_fatal_error("Failed to emit Thumb1 stack adjustment");
+ MachineFunction &MF = *MBB.getParent();
+ const ARMSubtarget &ST = MF.getSubtarget<ARMSubtarget>();
+ if (ST.genExecuteOnly()) {
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ScratchReg)
+ .addImm(NumBytes).setMIFlags(MIFlags);
+ } else {
+ MRI.emitLoadConstPool(MBB, MBBI, dl, ScratchReg, 0, NumBytes, ARMCC::AL,
+ 0, MIFlags);
+ }
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDhirr), ARM::SP)
+ .addReg(ARM::SP).addReg(ScratchReg, RegState::Kill)
+ .add(predOps(ARMCC::AL));
+ return;
+ }
+ // FIXME: This is assuming the heuristics in emitThumbRegPlusImmediate
+ // won't change.
emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, TII,
MRI, MIFlags);
+
}
+static void emitCallSPUpdate(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ const TargetInstrInfo &TII, const DebugLoc &dl,
+ const ThumbRegisterInfo &MRI, int NumBytes,
+ unsigned MIFlags = MachineInstr::NoFlags) {
+ emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, TII,
+ MRI, MIFlags);
+}
+
+
MachineBasicBlock::iterator Thumb1FrameLowering::
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
@@ -96,10 +132,10 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
// Replace the pseudo instruction with a new instruction...
unsigned Opc = Old.getOpcode();
if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
- emitSPUpdate(MBB, I, TII, dl, *RegInfo, -Amount);
+ emitCallSPUpdate(MBB, I, TII, dl, *RegInfo, -Amount);
} else {
assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
- emitSPUpdate(MBB, I, TII, dl, *RegInfo, Amount);
+ emitCallSPUpdate(MBB, I, TII, dl, *RegInfo, Amount);
}
}
}
@@ -142,8 +178,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
int FramePtrSpillFI = 0;
if (ArgRegsSaveSize) {
- emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -ArgRegsSaveSize,
- MachineInstr::FrameSetup);
+ emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -ArgRegsSaveSize,
+ ARM::NoRegister, MachineInstr::FrameSetup);
CFAOffset -= ArgRegsSaveSize;
unsigned CFIIndex = MF.addFrameInst(
MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
@@ -154,8 +190,9 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
if (!AFI->hasStackFrame()) {
if (NumBytes - ArgRegsSaveSize != 0) {
- emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -(NumBytes - ArgRegsSaveSize),
- MachineInstr::FrameSetup);
+ emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo,
+ -(NumBytes - ArgRegsSaveSize),
+ ARM::NoRegister, MachineInstr::FrameSetup);
CFAOffset -= NumBytes - ArgRegsSaveSize;
unsigned CFIIndex = MF.addFrameInst(
MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
@@ -332,8 +369,20 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
if (NumBytes) {
// Insert it after all the callee-save spills.
- emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes,
- MachineInstr::FrameSetup);
+ //
+ // For a large stack frame, we might need a scratch register to store
+ // the size of the frame. We know all callee-save registers are free
+ // at this point in the prologue, so pick one.
+ unsigned ScratchRegister = ARM::NoRegister;
+ for (auto &I : CSI) {
+ unsigned Reg = I.getReg();
+ if (isARMLowRegister(Reg) && !(HasFP && Reg == FramePtr)) {
+ ScratchRegister = Reg;
+ break;
+ }
+ }
+ emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes,
+ ScratchRegister, MachineInstr::FrameSetup);
if (!HasFP) {
CFAOffset -= NumBytes;
unsigned CFIIndex = MF.addFrameInst(
@@ -438,7 +487,9 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
if (!AFI->hasStackFrame()) {
if (NumBytes - ArgRegsSaveSize != 0)
- emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes - ArgRegsSaveSize);
+ emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo,
+ NumBytes - ArgRegsSaveSize, ARM::NoRegister,
+ MachineInstr::NoFlags);
} else {
// Unwind MBBI to point to first LDR / VLDRD.
if (MBBI != MBB.begin()) {
@@ -473,13 +524,27 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
.addReg(FramePtr)
.add(predOps(ARMCC::AL));
} else {
+ // For a large stack frame, we might need a scratch register to store
+ // the size of the frame. We know all callee-save registers are free
+ // at this point in the epilogue, so pick one.
+ unsigned ScratchRegister = ARM::NoRegister;
+ bool HasFP = hasFP(MF);
+ for (auto &I : MFI.getCalleeSavedInfo()) {
+ unsigned Reg = I.getReg();
+ if (isARMLowRegister(Reg) && !(HasFP && Reg == FramePtr)) {
+ ScratchRegister = Reg;
+ break;
+ }
+ }
if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tBX_RET &&
&MBB.front() != &*MBBI && std::prev(MBBI)->getOpcode() == ARM::tPOP) {
MachineBasicBlock::iterator PMBBI = std::prev(MBBI);
if (!tryFoldSPUpdateIntoPushPop(STI, MF, &*PMBBI, NumBytes))
- emitSPUpdate(MBB, PMBBI, TII, dl, *RegInfo, NumBytes);
+ emitPrologueEpilogueSPUpdate(MBB, PMBBI, TII, dl, *RegInfo, NumBytes,
+ ScratchRegister, MachineInstr::NoFlags);
} else if (!tryFoldSPUpdateIntoPushPop(STI, MF, &*MBBI, NumBytes))
- emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes);
+ emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes,
+ ScratchRegister, MachineInstr::NoFlags);
}
}
@@ -666,7 +731,9 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB,
// Advance past the pop instruction.
MBBI++;
// Increment the SP.
- emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize + 4);
+ emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo,
+ ArgRegsSaveSize + 4, ARM::NoRegister,
+ MachineInstr::NoFlags);
return true;
}
@@ -707,7 +774,8 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB,
.add(predOps(ARMCC::AL))
.addReg(PopReg, RegState::Define);
- emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize);
+ emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize,
+ ARM::NoRegister, MachineInstr::NoFlags);
BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
.addReg(ARM::LR, RegState::Define)
@@ -821,8 +889,9 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
findNextOrderedReg(std::begin(AllCopyRegs), CopyRegs, AllCopyRegsEnd);
// Create the PUSH, but don't insert it yet (the MOVs need to come first).
- MachineInstrBuilder PushMIB =
- BuildMI(MF, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL));
+ MachineInstrBuilder PushMIB = BuildMI(MF, DL, TII.get(ARM::tPUSH))
+ .add(predOps(ARMCC::AL))
+ .setMIFlags(MachineInstr::FrameSetup);
SmallVector<unsigned, 4> RegsToPush;
while (HiRegToSave != AllHighRegsEnd && CopyReg != AllCopyRegsEnd) {
@@ -835,7 +904,8 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr))
.addReg(*CopyReg, RegState::Define)
.addReg(*HiRegToSave, getKillRegState(isKill))
- .add(predOps(ARMCC::AL));
+ .add(predOps(ARMCC::AL))
+ .setMIFlags(MachineInstr::FrameSetup);
// Record the register that must be added to the PUSH.
RegsToPush.push_back(*CopyReg);
diff --git a/lib/Target/ARM/Thumb1FrameLowering.h b/lib/Target/ARM/Thumb1FrameLowering.h
index a4d6451ccf12..61af48712b6c 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.h
+++ b/lib/Target/ARM/Thumb1FrameLowering.h
@@ -1,9 +1,8 @@
//===- Thumb1FrameLowering.h - Thumb1-specific frame info stuff ---*- C++ -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp
index 11aa285fc939..f57d93a2e83d 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -1,9 +1,8 @@
//===-- Thumb1InstrInfo.cpp - Thumb-1 Instruction Information -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/ARM/Thumb1InstrInfo.h b/lib/Target/ARM/Thumb1InstrInfo.h
index 9f04a3ed262f..bc433e7a7a93 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.h
+++ b/lib/Target/ARM/Thumb1InstrInfo.h
@@ -1,9 +1,8 @@
//===-- Thumb1InstrInfo.h - Thumb-1 Instruction Information -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp
index e0a5f7f04fa9..3143eb9840ed 100644
--- a/lib/Target/ARM/Thumb2ITBlockPass.cpp
+++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -1,9 +1,8 @@
//===-- Thumb2ITBlockPass.cpp - Insert Thumb-2 IT blocks ------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -32,13 +31,16 @@
using namespace llvm;
#define DEBUG_TYPE "thumb2-it"
+#define PASS_NAME "Thumb IT blocks insertion pass"
STATISTIC(NumITs, "Number of IT blocks inserted");
STATISTIC(NumMovedInsts, "Number of predicated instructions moved");
+using RegisterSet = SmallSet<unsigned, 4>;
+
namespace {
- class Thumb2ITBlockPass : public MachineFunctionPass {
+ class Thumb2ITBlock : public MachineFunctionPass {
public:
static char ID;
@@ -47,7 +49,7 @@ namespace {
const TargetRegisterInfo *TRI;
ARMFunctionInfo *AFI;
- Thumb2ITBlockPass() : MachineFunctionPass(ID) {}
+ Thumb2ITBlock() : MachineFunctionPass(ID) {}
bool runOnMachineFunction(MachineFunction &Fn) override;
@@ -57,33 +59,32 @@ namespace {
}
StringRef getPassName() const override {
- return "Thumb IT blocks insertion pass";
+ return PASS_NAME;
}
private:
bool MoveCopyOutOfITBlock(MachineInstr *MI,
ARMCC::CondCodes CC, ARMCC::CondCodes OCC,
- SmallSet<unsigned, 4> &Defs,
- SmallSet<unsigned, 4> &Uses);
- bool InsertITInstructions(MachineBasicBlock &MBB);
+ RegisterSet &Defs, RegisterSet &Uses);
+ bool InsertITInstructions(MachineBasicBlock &Block);
};
- char Thumb2ITBlockPass::ID = 0;
+ char Thumb2ITBlock::ID = 0;
} // end anonymous namespace
+INITIALIZE_PASS(Thumb2ITBlock, DEBUG_TYPE, PASS_NAME, false, false)
+
/// TrackDefUses - Tracking what registers are being defined and used by
/// instructions in the IT block. This also tracks "dependencies", i.e. uses
/// in the IT block that are defined before the IT instruction.
-static void TrackDefUses(MachineInstr *MI,
- SmallSet<unsigned, 4> &Defs,
- SmallSet<unsigned, 4> &Uses,
+static void TrackDefUses(MachineInstr *MI, RegisterSet &Defs, RegisterSet &Uses,
const TargetRegisterInfo *TRI) {
- SmallVector<unsigned, 4> LocalDefs;
- SmallVector<unsigned, 4> LocalUses;
+ using RegList = SmallVector<unsigned, 4>;
+ RegList LocalDefs;
+ RegList LocalUses;
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
+ for (auto &MO : MI->operands()) {
if (!MO.isReg())
continue;
unsigned Reg = MO.getReg();
@@ -95,27 +96,21 @@ static void TrackDefUses(MachineInstr *MI,
LocalDefs.push_back(Reg);
}
- for (unsigned i = 0, e = LocalUses.size(); i != e; ++i) {
- unsigned Reg = LocalUses[i];
- for (MCSubRegIterator Subreg(Reg, TRI, /*IncludeSelf=*/true);
- Subreg.isValid(); ++Subreg)
- Uses.insert(*Subreg);
- }
+ auto InsertUsesDefs = [&](RegList &Regs, RegisterSet &UsesDefs) {
+ for (unsigned Reg : Regs)
+ for (MCSubRegIterator Subreg(Reg, TRI, /*IncludeSelf=*/true);
+ Subreg.isValid(); ++Subreg)
+ UsesDefs.insert(*Subreg);
+ };
- for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) {
- unsigned Reg = LocalDefs[i];
- for (MCSubRegIterator Subreg(Reg, TRI, /*IncludeSelf=*/true);
- Subreg.isValid(); ++Subreg)
- Defs.insert(*Subreg);
- if (Reg == ARM::CPSR)
- continue;
- }
+ InsertUsesDefs(LocalDefs, Defs);
+ InsertUsesDefs(LocalUses, Uses);
}
/// Clear kill flags for any uses in the given set. This will likely
/// conservatively remove more kill flags than are necessary, but removing them
/// is safer than incorrect kill flags remaining on instructions.
-static void ClearKillFlags(MachineInstr *MI, SmallSet<unsigned, 4> &Uses) {
+static void ClearKillFlags(MachineInstr *MI, RegisterSet &Uses) {
for (MachineOperand &MO : MI->operands()) {
if (!MO.isReg() || MO.isDef() || !MO.isKill())
continue;
@@ -138,10 +133,9 @@ static bool isCopy(MachineInstr *MI) {
}
bool
-Thumb2ITBlockPass::MoveCopyOutOfITBlock(MachineInstr *MI,
- ARMCC::CondCodes CC, ARMCC::CondCodes OCC,
- SmallSet<unsigned, 4> &Defs,
- SmallSet<unsigned, 4> &Uses) {
+Thumb2ITBlock::MoveCopyOutOfITBlock(MachineInstr *MI,
+ ARMCC::CondCodes CC, ARMCC::CondCodes OCC,
+ RegisterSet &Defs, RegisterSet &Uses) {
if (!isCopy(MI))
return false;
// llvm models select's as two-address instructions. That means a copy
@@ -181,10 +175,13 @@ Thumb2ITBlockPass::MoveCopyOutOfITBlock(MachineInstr *MI,
// Then peek at the next instruction to see if it's predicated on CC or OCC.
// If not, then there is nothing to be gained by moving the copy.
- MachineBasicBlock::iterator I = MI; ++I;
+ MachineBasicBlock::iterator I = MI;
+ ++I;
MachineBasicBlock::iterator E = MI->getParent()->end();
+
while (I != E && I->isDebugInstr())
++I;
+
if (I != E) {
unsigned NPredReg = 0;
ARMCC::CondCodes NCC = getITInstrPredicate(*I, NPredReg);
@@ -194,12 +191,11 @@ Thumb2ITBlockPass::MoveCopyOutOfITBlock(MachineInstr *MI,
return false;
}
-bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) {
+bool Thumb2ITBlock::InsertITInstructions(MachineBasicBlock &MBB) {
bool Modified = false;
-
- SmallSet<unsigned, 4> Defs;
- SmallSet<unsigned, 4> Uses;
+ RegisterSet Defs, Uses;
MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+
while (MBBI != E) {
MachineInstr *MI = &*MBBI;
DebugLoc dl = MI->getDebugLoc();
@@ -246,7 +242,7 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) {
unsigned NPredReg = 0;
ARMCC::CondCodes NCC = getITInstrPredicate(*NMI, NPredReg);
if (NCC == CC || NCC == OCC) {
- Mask |= (NCC & 1) << Pos;
+ Mask |= ((NCC ^ CC) & 1) << Pos;
// Add implicit use of ITSTATE.
NMI->addOperand(MachineOperand::CreateReg(ARM::ITSTATE, false/*ifDef*/,
true/*isImp*/, false/*isKill*/));
@@ -270,8 +266,6 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) {
// Finalize IT mask.
Mask |= (1 << Pos);
- // Tag along (firstcond[0] << 4) with the mask.
- Mask |= (CC & 1) << 4;
MIB.addImm(Mask);
// Last instruction in IT block kills ITSTATE.
@@ -288,7 +282,7 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) {
return Modified;
}
-bool Thumb2ITBlockPass::runOnMachineFunction(MachineFunction &Fn) {
+bool Thumb2ITBlock::runOnMachineFunction(MachineFunction &Fn) {
const ARMSubtarget &STI =
static_cast<const ARMSubtarget &>(Fn.getSubtarget());
if (!STI.isThumb2())
@@ -302,11 +296,8 @@ bool Thumb2ITBlockPass::runOnMachineFunction(MachineFunction &Fn) {
return false;
bool Modified = false;
- for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; ) {
- MachineBasicBlock &MBB = *MFI;
- ++MFI;
+ for (auto &MBB : Fn )
Modified |= InsertITInstructions(MBB);
- }
if (Modified)
AFI->setHasITBlocks(true);
@@ -316,6 +307,132 @@ bool Thumb2ITBlockPass::runOnMachineFunction(MachineFunction &Fn) {
/// createThumb2ITBlockPass - Returns an instance of the Thumb2 IT blocks
/// insertion pass.
-FunctionPass *llvm::createThumb2ITBlockPass() {
- return new Thumb2ITBlockPass();
+FunctionPass *llvm::createThumb2ITBlockPass() { return new Thumb2ITBlock(); }
+
+#undef DEBUG_TYPE
+#define DEBUG_TYPE "arm-mve-vpt"
+
+namespace {
+ class MVEVPTBlock : public MachineFunctionPass {
+ public:
+ static char ID;
+ const Thumb2InstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+
+ MVEVPTBlock() : MachineFunctionPass(ID) {}
+
+ bool runOnMachineFunction(MachineFunction &Fn) override;
+
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoVRegs);
+ }
+
+ StringRef getPassName() const override {
+ return "MVE VPT block insertion pass";
+ }
+
+ private:
+ bool InsertVPTBlocks(MachineBasicBlock &MBB);
+ };
+
+ char MVEVPTBlock::ID = 0;
+
+} // end anonymous namespace
+
+INITIALIZE_PASS(MVEVPTBlock, DEBUG_TYPE, "ARM MVE VPT block pass", false, false)
+
+enum VPTMaskValue {
+ T = 8, // 0b1000
+ TT = 4, // 0b0100
+ TE = 12, // 0b1100
+ TTT = 2, // 0b0010
+ TTE = 6, // 0b0110
+ TEE = 10, // 0b1010
+ TET = 14, // 0b1110
+ TTTT = 1, // 0b0001
+ TTTE = 3, // 0b0011
+ TTEE = 5, // 0b0101
+ TTET = 7, // 0b0111
+ TEEE = 9, // 0b1001
+ TEET = 11, // 0b1011
+ TETT = 13, // 0b1101
+ TETE = 15 // 0b1111
+};
+
+bool MVEVPTBlock::InsertVPTBlocks(MachineBasicBlock &Block) {
+ bool Modified = false;
+ MachineBasicBlock::iterator MBIter = Block.begin();
+ MachineBasicBlock::iterator EndIter = Block.end();
+
+ while (MBIter != EndIter) {
+ MachineInstr *MI = &*MBIter;
+ unsigned PredReg = 0;
+ DebugLoc dl = MI->getDebugLoc();
+
+ ARMVCC::VPTCodes Pred = getVPTInstrPredicate(*MI, PredReg);
+
+ // The idea of the predicate is that None, Then and Else are for use when
+ // handling assembly language: they correspond to the three possible
+ // suffixes "", "t" and "e" on the mnemonic. So when instructions are read
+ // from assembly source or disassembled from object code, you expect to see
+ // a mixture whenever there's a long VPT block. But in code generation, we
+ // hope we'll never generate an Else as input to this pass.
+
+ assert(Pred != ARMVCC::Else && "VPT block pass does not expect Else preds");
+
+ if (Pred == ARMVCC::None) {
+ ++MBIter;
+ continue;
+ }
+
+ MachineInstrBuilder MIBuilder =
+ BuildMI(Block, MBIter, dl, TII->get(ARM::MVE_VPST));
+ // The mask value for the VPST instruction is T = 0b1000 = 8
+ MIBuilder.addImm(VPTMaskValue::T);
+
+ MachineBasicBlock::iterator VPSTInsertPos = MIBuilder.getInstr();
+ int VPTInstCnt = 1;
+ ARMVCC::VPTCodes NextPred;
+
+ do {
+ ++MBIter;
+ NextPred = getVPTInstrPredicate(*MBIter, PredReg);
+ } while (NextPred != ARMVCC::None && NextPred == Pred && ++VPTInstCnt < 4);
+
+ MachineInstr *LastMI = &*MBIter;
+ finalizeBundle(Block, VPSTInsertPos.getInstrIterator(),
+ ++LastMI->getIterator());
+
+ Modified = true;
+ LLVM_DEBUG(dbgs() << "VPT block created for: "; MI->dump(););
+
+ ++MBIter;
+ }
+ return Modified;
+}
+
+bool MVEVPTBlock::runOnMachineFunction(MachineFunction &Fn) {
+ const ARMSubtarget &STI =
+ static_cast<const ARMSubtarget &>(Fn.getSubtarget());
+
+ if (!STI.isThumb2() || !STI.hasMVEIntegerOps())
+ return false;
+
+ TII = static_cast<const Thumb2InstrInfo *>(STI.getInstrInfo());
+ TRI = STI.getRegisterInfo();
+
+ LLVM_DEBUG(dbgs() << "********** ARM MVE VPT BLOCKS **********\n"
+ << "********** Function: " << Fn.getName() << '\n');
+
+ bool Modified = false;
+ for (MachineBasicBlock &MBB : Fn)
+ Modified |= InsertVPTBlocks(MBB);
+
+ LLVM_DEBUG(dbgs() << "**************************************\n");
+ return Modified;
}
+
+/// createMVEVPTBlock - Returns an instance of the MVE VPT block
+/// insertion pass.
+FunctionPass *llvm::createMVEVPTBlockPass() { return new MVEVPTBlock(); }
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index d567d3339049..5a965f7a6b9b 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -1,9 +1,8 @@
//===- Thumb2InstrInfo.cpp - Thumb-2 Instruction Information --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -162,7 +161,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
// otherwise).
if (TargetRegisterInfo::isVirtualRegister(SrcReg)) {
MachineRegisterInfo *MRI = &MF.getRegInfo();
- MRI->constrainRegClass(SrcReg, &ARM::GPRPair_with_gsub_1_in_rGPRRegClass);
+ MRI->constrainRegClass(SrcReg, &ARM::GPRPair_with_gsub_1_in_GPRwithAPSRnospRegClass);
}
MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::t2STRDi8));
@@ -204,7 +203,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
if (TargetRegisterInfo::isVirtualRegister(DestReg)) {
MachineRegisterInfo *MRI = &MF.getRegInfo();
MRI->constrainRegClass(DestReg,
- &ARM::GPRPair_with_gsub_1_in_rGPRRegClass);
+ &ARM::GPRPair_with_gsub_1_in_GPRwithAPSRnospRegClass);
}
MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::t2LDRDi8));
@@ -478,7 +477,7 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
bool isSub = false;
// Memory operands in inline assembly always use AddrModeT2_i12.
- if (Opcode == ARM::INLINEASM)
+ if (Opcode == ARM::INLINEASM || Opcode == ARM::INLINEASM_BR)
AddrMode = ARMII::AddrModeT2_i12; // FIXME. mode for thumb2?
if (Opcode == ARM::t2ADDri || Opcode == ARM::t2ADDri12) {
@@ -611,9 +610,23 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
Offset = -Offset;
isSub = true;
}
+ } else if (AddrMode == ARMII::AddrModeT2_i7s4 ||
+ AddrMode == ARMII::AddrModeT2_i7s2 ||
+ AddrMode == ARMII::AddrModeT2_i7) {
+ Offset += MI.getOperand(FrameRegIdx + 1).getImm();
+ unsigned OffsetMask;
+ switch (AddrMode) {
+ case ARMII::AddrModeT2_i7s4: NumBits = 9; OffsetMask = 0x3; break;
+ case ARMII::AddrModeT2_i7s2: NumBits = 8; OffsetMask = 0x1; break;
+ default: NumBits = 7; OffsetMask = 0x0; break;
+ }
+ // MCInst operand expects already scaled value.
+ Scale = 1;
+ assert((Offset & OffsetMask) == 0 && "Can't encode this offset!");
+ (void)OffsetMask; // squash unused-variable warning at -NDEBUG
} else if (AddrMode == ARMII::AddrModeT2_i8s4) {
Offset += MI.getOperand(FrameRegIdx + 1).getImm() * 4;
- NumBits = 10; // 8 bits scaled by 4
+ NumBits = 8 + 2;
// MCInst operand expects already scaled value.
Scale = 1;
assert((Offset & 3) == 0 && "Can't encode this offset!");
@@ -639,7 +652,7 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
// Replace the FrameIndex with fp/sp
MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
if (isSub) {
- if (AddrMode == ARMII::AddrMode5)
+ if (AddrMode == ARMII::AddrMode5 || AddrMode == ARMII::AddrMode5FP16)
// FIXME: Not consistent.
ImmedOffset |= 1 << NumBits;
else
@@ -653,7 +666,7 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
// Otherwise, offset doesn't fit. Pull in what we can to simplify
ImmedOffset = ImmedOffset & Mask;
if (isSub) {
- if (AddrMode == ARMII::AddrMode5)
+ if (AddrMode == ARMII::AddrMode5 || AddrMode == ARMII::AddrMode5FP16)
// FIXME: Not consistent.
ImmedOffset |= 1 << NumBits;
else {
@@ -678,3 +691,28 @@ ARMCC::CondCodes llvm::getITInstrPredicate(const MachineInstr &MI,
return ARMCC::AL;
return getInstrPredicate(MI, PredReg);
}
+
+int llvm::findFirstVPTPredOperandIdx(const MachineInstr &MI) {
+ const MCInstrDesc &MCID = MI.getDesc();
+
+ if (!MCID.OpInfo)
+ return -1;
+
+ for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i)
+ if (ARM::isVpred(MCID.OpInfo[i].OperandType))
+ return i;
+
+ return -1;
+}
+
+ARMVCC::VPTCodes llvm::getVPTInstrPredicate(const MachineInstr &MI,
+ unsigned &PredReg) {
+ int PIdx = findFirstVPTPredOperandIdx(MI);
+ if (PIdx == -1) {
+ PredReg = 0;
+ return ARMVCC::None;
+ }
+
+ PredReg = MI.getOperand(PIdx+1).getReg();
+ return (ARMVCC::VPTCodes)MI.getOperand(PIdx).getImm();
+}
diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h
index c834ba73bfea..a6712d5a0e72 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.h
+++ b/lib/Target/ARM/Thumb2InstrInfo.h
@@ -1,9 +1,8 @@
//===-- Thumb2InstrInfo.h - Thumb-2 Instruction Information -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -69,6 +68,12 @@ private:
/// to llvm::getInstrPredicate except it returns AL for conditional branch
/// instructions which are "predicated", but are not in IT blocks.
ARMCC::CondCodes getITInstrPredicate(const MachineInstr &MI, unsigned &PredReg);
+
+// getVPTInstrPredicate: VPT analogue of that, plus a helper function
+// corresponding to MachineInstr::findFirstPredOperandIdx.
+int findFirstVPTPredOperandIdx(const MachineInstr &MI);
+ARMVCC::VPTCodes getVPTInstrPredicate(const MachineInstr &MI,
+ unsigned &PredReg);
}
#endif
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index 65889fc4e28b..37a85fa38417 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -1,9 +1,8 @@
//===-- Thumb2SizeReduction.cpp - Thumb2 code size reduction pass -*- C++ -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -454,7 +453,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
break;
case ARM::t2LDR_POST:
case ARM::t2STR_POST: {
- if (!MBB.getParent()->getFunction().optForMinSize())
+ if (!MinimizeSize)
return false;
if (!MI->hasOneMemOperand() ||
@@ -1128,8 +1127,8 @@ bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) {
TII = static_cast<const Thumb2InstrInfo *>(STI->getInstrInfo());
// Optimizing / minimizing size? Minimizing size implies optimizing for size.
- OptimizeSize = MF.getFunction().optForSize();
- MinimizeSize = MF.getFunction().optForMinSize();
+ OptimizeSize = MF.getFunction().hasOptSize();
+ MinimizeSize = STI->hasMinSize();
BlockInfo.clear();
BlockInfo.resize(MF.getNumBlockIDs());
diff --git a/lib/Target/ARM/ThumbRegisterInfo.cpp b/lib/Target/ARM/ThumbRegisterInfo.cpp
index e4bdd40fb743..a96417ffce4d 100644
--- a/lib/Target/ARM/ThumbRegisterInfo.cpp
+++ b/lib/Target/ARM/ThumbRegisterInfo.cpp
@@ -1,9 +1,8 @@
//===-- ThumbRegisterInfo.cpp - Thumb-1 Register Information -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -447,63 +446,6 @@ void ThumbRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
(void)Done;
}
-/// saveScavengerRegister - Spill the register so it can be used by the
-/// register scavenger. Return true.
-bool ThumbRegisterInfo::saveScavengerRegister(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- MachineBasicBlock::iterator &UseMI, const TargetRegisterClass *RC,
- unsigned Reg) const {
-
- const ARMSubtarget &STI = MBB.getParent()->getSubtarget<ARMSubtarget>();
- if (!STI.isThumb1Only())
- return ARMBaseRegisterInfo::saveScavengerRegister(MBB, I, UseMI, RC, Reg);
-
- // Thumb1 can't use the emergency spill slot on the stack because
- // ldr/str immediate offsets must be positive, and if we're referencing
- // off the frame pointer (if, for example, there are alloca() calls in
- // the function, the offset will be negative. Use R12 instead since that's
- // a call clobbered register that we know won't be used in Thumb1 mode.
- const TargetInstrInfo &TII = *STI.getInstrInfo();
- DebugLoc DL;
- BuildMI(MBB, I, DL, TII.get(ARM::tMOVr))
- .addReg(ARM::R12, RegState::Define)
- .addReg(Reg, RegState::Kill)
- .add(predOps(ARMCC::AL));
-
- // The UseMI is where we would like to restore the register. If there's
- // interference with R12 before then, however, we'll need to restore it
- // before that instead and adjust the UseMI.
- bool done = false;
- for (MachineBasicBlock::iterator II = I; !done && II != UseMI ; ++II) {
- if (II->isDebugInstr())
- continue;
- // If this instruction affects R12, adjust our restore point.
- for (unsigned i = 0, e = II->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = II->getOperand(i);
- if (MO.isRegMask() && MO.clobbersPhysReg(ARM::R12)) {
- UseMI = II;
- done = true;
- break;
- }
- if (!MO.isReg() || MO.isUndef() || !MO.getReg() ||
- TargetRegisterInfo::isVirtualRegister(MO.getReg()))
- continue;
- if (MO.getReg() == ARM::R12) {
- UseMI = II;
- done = true;
- break;
- }
- }
- }
- // Restore the register from R12
- BuildMI(MBB, UseMI, DL, TII.get(ARM::tMOVr))
- .addReg(Reg, RegState::Define)
- .addReg(ARM::R12, RegState::Kill)
- .add(predOps(ARMCC::AL));
-
- return true;
-}
-
void ThumbRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj, unsigned FIOperandNum,
RegScavenger *RS) const {
@@ -619,3 +561,14 @@ void ThumbRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
if (MI.isPredicable())
MIB.add(predOps(ARMCC::AL));
}
+
+bool
+ThumbRegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const {
+ if (MF.getSubtarget<ARMSubtarget>().isThumb1Only()) {
+ // For Thumb1, the emergency spill slot must be some small positive
+ // offset from the base/stack pointer.
+ return false;
+ }
+ // For Thumb2, put the emergency spill slot next to FP.
+ return true;
+}
diff --git a/lib/Target/ARM/ThumbRegisterInfo.h b/lib/Target/ARM/ThumbRegisterInfo.h
index 75c3fe9ae8ad..08cf67284d4c 100644
--- a/lib/Target/ARM/ThumbRegisterInfo.h
+++ b/lib/Target/ARM/ThumbRegisterInfo.h
@@ -1,9 +1,8 @@
//===- ThumbRegisterInfo.h - Thumb Register Information Impl -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -52,14 +51,10 @@ public:
const ARMBaseInstrInfo &TII) const;
void resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
int64_t Offset) const override;
- bool saveScavengerRegister(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- MachineBasicBlock::iterator &UseMI,
- const TargetRegisterClass *RC,
- unsigned Reg) const override;
void eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj, unsigned FIOperandNum,
RegScavenger *RS = nullptr) const override;
+ bool useFPForScavengingIndex(const MachineFunction &MF) const override;
};
}
diff --git a/lib/Target/ARM/Utils/ARMBaseInfo.cpp b/lib/Target/ARM/Utils/ARMBaseInfo.cpp
index 534f78c6d4d2..4ace61cccd0f 100644
--- a/lib/Target/ARM/Utils/ARMBaseInfo.cpp
+++ b/lib/Target/ARM/Utils/ARMBaseInfo.cpp
@@ -1,9 +1,8 @@
//===-- ARMBaseInfo.cpp - ARM Base encoding information------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/ARM/Utils/ARMBaseInfo.h b/lib/Target/ARM/Utils/ARMBaseInfo.h
index f32d8223f53c..aa3aca359cb8 100644
--- a/lib/Target/ARM/Utils/ARMBaseInfo.h
+++ b/lib/Target/ARM/Utils/ARMBaseInfo.h
@@ -1,9 +1,8 @@
//===-- ARMBaseInfo.h - Top level definitions for ARM ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -67,6 +66,30 @@ inline static CondCodes getOppositeCondition(CondCodes CC) {
}
} // end namespace ARMCC
+namespace ARMVCC {
+ enum VPTCodes {
+ None = 0,
+ Then,
+ Else
+ };
+}
+
+inline static const char *ARMVPTPredToString(ARMVCC::VPTCodes CC) {
+ switch (CC) {
+ case ARMVCC::None: return "none";
+ case ARMVCC::Then: return "t";
+ case ARMVCC::Else: return "e";
+ }
+ llvm_unreachable("Unknown VPT code");
+}
+
+inline static unsigned ARMVectorCondCodeFromString(StringRef CC) {
+ return StringSwitch<unsigned>(CC.lower())
+ .Case("t", ARMVCC::Then)
+ .Case("e", ARMVCC::Else)
+ .Default(~0U);
+}
+
inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) {
switch (CC) {
case ARMCC::EQ: return "eq";
diff --git a/lib/Target/AVR/AVR.h b/lib/Target/AVR/AVR.h
index 48327fd377b2..f0746d73c95f 100644
--- a/lib/Target/AVR/AVR.h
+++ b/lib/Target/AVR/AVR.h
@@ -1,9 +1,8 @@
//===-- AVR.h - Top-level interface for AVR representation ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AVR/AVR.td b/lib/Target/AVR/AVR.td
index d03b983aa70b..53768f99df3b 100644
--- a/lib/Target/AVR/AVR.td
+++ b/lib/Target/AVR/AVR.td
@@ -1,9 +1,8 @@
//===-- AVR.td - Describe the AVR Target Machine ----------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===---------------------------------------------------------------------===//
// This is the top level entry point for the AVR target.
diff --git a/lib/Target/AVR/AVRAsmPrinter.cpp b/lib/Target/AVR/AVRAsmPrinter.cpp
index f9a6e77387b2..7586bd7b78fc 100644
--- a/lib/Target/AVR/AVRAsmPrinter.cpp
+++ b/lib/Target/AVR/AVRAsmPrinter.cpp
@@ -1,9 +1,8 @@
//===-- AVRAsmPrinter.cpp - AVR LLVM assembly writer ----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -15,7 +14,8 @@
#include "AVR.h"
#include "AVRMCInstLower.h"
#include "AVRSubtarget.h"
-#include "InstPrinter/AVRInstPrinter.h"
+#include "MCTargetDesc/AVRInstPrinter.h"
+#include "TargetInfo/AVRTargetInfo.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -43,16 +43,13 @@ public:
StringRef getPassName() const override { return "AVR Assembly Printer"; }
- void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O,
- const char *Modifier = 0);
+ void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O);
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
- unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &O) override;
+ const char *ExtraCode, raw_ostream &O) override;
bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
- unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &O) override;
+ const char *ExtraCode, raw_ostream &O) override;
void EmitInstruction(const MachineInstr *MI) override;
@@ -61,7 +58,7 @@ private:
};
void AVRAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
- raw_ostream &O, const char *Modifier) {
+ raw_ostream &O) {
const MachineOperand &MO = MI->getOperand(OpNo);
switch (MO.getType()) {
@@ -86,11 +83,10 @@ void AVRAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
}
bool AVRAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
- unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &O) {
+ const char *ExtraCode, raw_ostream &O) {
// Default asm printer can only deal with some extra codes,
// so try it first.
- bool Error = AsmPrinter::PrintAsmOperand(MI, OpNum, AsmVariant, ExtraCode, O);
+ bool Error = AsmPrinter::PrintAsmOperand(MI, OpNum, ExtraCode, O);
if (Error && ExtraCode && ExtraCode[0]) {
if (ExtraCode[1] != 0)
@@ -138,8 +134,7 @@ bool AVRAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
}
bool AVRAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
- unsigned OpNum, unsigned AsmVariant,
- const char *ExtraCode,
+ unsigned OpNum, const char *ExtraCode,
raw_ostream &O) {
if (ExtraCode && ExtraCode[0]) {
llvm_unreachable("This branch is not implemented yet");
diff --git a/lib/Target/AVR/AVRCallingConv.td b/lib/Target/AVR/AVRCallingConv.td
index 68dbce02706f..213e35fca66d 100644
--- a/lib/Target/AVR/AVRCallingConv.td
+++ b/lib/Target/AVR/AVRCallingConv.td
@@ -1,9 +1,8 @@
//===-- AVRCallingConv.td - Calling Conventions for AVR ----*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// This describes the calling conventions for AVR architecture.
diff --git a/lib/Target/AVR/AVRExpandPseudoInsts.cpp b/lib/Target/AVR/AVRExpandPseudoInsts.cpp
index 536a54759c77..c45b2d0e39c1 100644
--- a/lib/Target/AVR/AVRExpandPseudoInsts.cpp
+++ b/lib/Target/AVR/AVRExpandPseudoInsts.cpp
@@ -1,9 +1,8 @@
//===-- AVRExpandPseudoInsts.cpp - Expand pseudo instructions -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -583,8 +582,8 @@ bool AVRExpandPseudo::expand<AVR::LDWRdPtr>(Block &MBB, BlockIt MBBI) {
unsigned TmpReg = 0; // 0 for no temporary register
unsigned SrcReg = MI.getOperand(1).getReg();
bool SrcIsKill = MI.getOperand(1).isKill();
- OpLo = AVR::LDRdPtrPi;
- OpHi = AVR::LDRdPtr;
+ OpLo = AVR::LDRdPtr;
+ OpHi = AVR::LDDRdPtrQ;
TRI->splitReg(DstReg, DstLoReg, DstHiReg);
// Use a temporary register if src and dst registers are the same.
@@ -597,8 +596,7 @@ bool AVRExpandPseudo::expand<AVR::LDWRdPtr>(Block &MBB, BlockIt MBBI) {
// Load low byte.
auto MIBLO = buildMI(MBB, MBBI, OpLo)
.addReg(CurDstLoReg, RegState::Define)
- .addReg(SrcReg, RegState::Define)
- .addReg(SrcReg);
+ .addReg(SrcReg, RegState::Define);
// Push low byte onto stack if necessary.
if (TmpReg)
@@ -607,7 +605,8 @@ bool AVRExpandPseudo::expand<AVR::LDWRdPtr>(Block &MBB, BlockIt MBBI) {
// Load high byte.
auto MIBHI = buildMI(MBB, MBBI, OpHi)
.addReg(CurDstHiReg, RegState::Define)
- .addReg(SrcReg, getKillRegState(SrcIsKill));
+ .addReg(SrcReg, getKillRegState(SrcIsKill))
+ .addImm(1);
if (TmpReg) {
// Move the high byte into the final destination.
diff --git a/lib/Target/AVR/AVRFrameLowering.cpp b/lib/Target/AVR/AVRFrameLowering.cpp
index 3b7322365772..5e91bb8632c1 100644
--- a/lib/Target/AVR/AVRFrameLowering.cpp
+++ b/lib/Target/AVR/AVRFrameLowering.cpp
@@ -1,9 +1,8 @@
//===-- AVRFrameLowering.cpp - AVR Frame Information ----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -362,13 +361,12 @@ MachineBasicBlock::iterator AVRFrameLowering::eliminateCallFramePseudoInstr(
MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const {
const AVRSubtarget &STI = MF.getSubtarget<AVRSubtarget>();
- const TargetFrameLowering &TFI = *STI.getFrameLowering();
const AVRInstrInfo &TII = *STI.getInstrInfo();
// There is nothing to insert when the call frame memory is allocated during
// function entry. Delete the call frame pseudo and replace all pseudo stores
// with real store instructions.
- if (TFI.hasReservedCallFrame(MF)) {
+ if (hasReservedCallFrame(MF)) {
fixStackStores(MBB, MI, TII, false);
return MBB.erase(MI);
}
@@ -382,7 +380,7 @@ MachineBasicBlock::iterator AVRFrameLowering::eliminateCallFramePseudoInstr(
// For adjcallstackdown we convert it into an 'adiw reg, <amt>' handling
// the read and write of SP in I/O space.
if (Amount != 0) {
- assert(TFI.getStackAlignment() == 1 && "Unsupported stack alignment");
+ assert(getStackAlignment() == 1 && "Unsupported stack alignment");
if (Opcode == TII.getCallFrameSetupOpcode()) {
fixStackStores(MBB, MI, TII, true);
diff --git a/lib/Target/AVR/AVRFrameLowering.h b/lib/Target/AVR/AVRFrameLowering.h
index a0ba6c951276..a7658438232a 100644
--- a/lib/Target/AVR/AVRFrameLowering.h
+++ b/lib/Target/AVR/AVRFrameLowering.h
@@ -1,9 +1,8 @@
//===-- AVRFrameLowering.h - Define frame lowering for AVR ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AVR/AVRISelDAGToDAG.cpp b/lib/Target/AVR/AVRISelDAGToDAG.cpp
index 85abf42eaa67..5cb4441c4380 100644
--- a/lib/Target/AVR/AVRISelDAGToDAG.cpp
+++ b/lib/Target/AVR/AVRISelDAGToDAG.cpp
@@ -1,9 +1,8 @@
//===-- AVRISelDAGToDAG.cpp - A dag to dag inst selector for AVR ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AVR/AVRISelLowering.cpp b/lib/Target/AVR/AVRISelLowering.cpp
index 57fc978b54bb..b6ba5f22fafb 100644
--- a/lib/Target/AVR/AVRISelLowering.cpp
+++ b/lib/Target/AVR/AVRISelLowering.cpp
@@ -1,9 +1,8 @@
//===-- AVRISelLowering.cpp - AVR DAG Lowering Implementation -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -26,19 +25,21 @@
#include "AVR.h"
#include "AVRMachineFunctionInfo.h"
+#include "AVRSubtarget.h"
#include "AVRTargetMachine.h"
#include "MCTargetDesc/AVRMCTargetDesc.h"
namespace llvm {
-AVRTargetLowering::AVRTargetLowering(AVRTargetMachine &tm)
- : TargetLowering(tm) {
+AVRTargetLowering::AVRTargetLowering(const AVRTargetMachine &TM,
+ const AVRSubtarget &STI)
+ : TargetLowering(TM), Subtarget(STI) {
// Set up the register classes.
addRegisterClass(MVT::i8, &AVR::GPR8RegClass);
addRegisterClass(MVT::i16, &AVR::DREGSRegClass);
// Compute derived properties from the register classes.
- computeRegisterProperties(tm.getSubtargetImpl()->getRegisterInfo());
+ computeRegisterProperties(Subtarget.getRegisterInfo());
setBooleanContents(ZeroOrOneBooleanContent);
setBooleanVectorContents(ZeroOrOneBooleanContent);
@@ -88,9 +89,9 @@ AVRTargetLowering::AVRTargetLowering(AVRTargetMachine &tm)
setOperationAction(ISD::SRL_PARTS, MVT::i16, Expand);
setOperationAction(ISD::ROTL, MVT::i8, Custom);
- setOperationAction(ISD::ROTL, MVT::i16, Custom);
+ setOperationAction(ISD::ROTL, MVT::i16, Expand);
setOperationAction(ISD::ROTR, MVT::i8, Custom);
- setOperationAction(ISD::ROTR, MVT::i16, Custom);
+ setOperationAction(ISD::ROTR, MVT::i16, Expand);
setOperationAction(ISD::BR_CC, MVT::i8, Custom);
setOperationAction(ISD::BR_CC, MVT::i16, Custom);
@@ -163,6 +164,13 @@ AVRTargetLowering::AVRTargetLowering(AVRTargetMachine &tm)
setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand);
setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand);
+ // Expand multiplications to libcalls when there is
+ // no hardware MUL.
+ if (!Subtarget.supportsMultiplication()) {
+ setOperationAction(ISD::SMUL_LOHI, MVT::i8, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i8, Expand);
+ }
+
for (MVT VT : MVT::integer_valuetypes()) {
setOperationAction(ISD::MULHS, VT, Expand);
setOperationAction(ISD::MULHU, VT, Expand);
@@ -229,7 +237,7 @@ AVRTargetLowering::AVRTargetLowering(AVRTargetMachine &tm)
setLibcallName(RTLIB::COS_F32, "cos");
setMinFunctionAlignment(1);
- setMinimumJumpTableEntries(INT_MAX);
+ setMinimumJumpTableEntries(UINT_MAX);
}
const char *AVRTargetLowering::getTargetNodeName(unsigned Opcode) const {
@@ -935,7 +943,7 @@ static void analyzeStandardArguments(TargetLowering::CallLoweringInfo *CLI,
AVR::R19R18, AVR::R17R16, AVR::R15R14,
AVR::R13R12, AVR::R11R10, AVR::R9R8};
if (IsVarArg) {
- // Variadic functions do not need all the analisys below.
+ // Variadic functions do not need all the analysis below.
if (IsCall) {
CCInfo.AnalyzeCallOperands(*Outs, ArgCC_AVR_Vararg);
} else {
@@ -1270,8 +1278,7 @@ SDValue AVRTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
}
// Add a register mask operand representing the call-preserved registers.
- const AVRTargetMachine &TM = (const AVRTargetMachine &)getTargetMachine();
- const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo();
+ const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
const uint32_t *Mask =
TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv);
assert(Mask && "Missing call preserved mask for calling convention");
@@ -1433,8 +1440,7 @@ MachineBasicBlock *AVRTargetLowering::insertShift(MachineInstr &MI,
bool HasRepeatedOperand = false;
MachineFunction *F = BB->getParent();
MachineRegisterInfo &RI = F->getRegInfo();
- const AVRTargetMachine &TM = (const AVRTargetMachine &)getTargetMachine();
- const TargetInstrInfo &TII = *TM.getSubtargetImpl()->getInstrInfo();
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
DebugLoc dl = MI.getDebugLoc();
switch (MI.getOpcode()) {
@@ -1574,8 +1580,7 @@ static bool isCopyMulResult(MachineBasicBlock::iterator const &I) {
// it, but it works for now.
MachineBasicBlock *AVRTargetLowering::insertMul(MachineInstr &MI,
MachineBasicBlock *BB) const {
- const AVRTargetMachine &TM = (const AVRTargetMachine &)getTargetMachine();
- const TargetInstrInfo &TII = *TM.getSubtargetImpl()->getInstrInfo();
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
MachineBasicBlock::iterator I(MI);
++I; // in any case insert *after* the mul instruction
if (isCopyMulResult(I))
@@ -1629,6 +1634,15 @@ AVRTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MachineFunction *MF = MBB->getParent();
const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+ MachineBasicBlock *FallThrough = MBB->getFallThrough();
+
+ // If the current basic block falls through to another basic block,
+ // we must insert an unconditional branch to the fallthrough destination
+ // if we are to insert basic blocks at the prior fallthrough point.
+ if (FallThrough != nullptr) {
+ BuildMI(MBB, dl, TII.get(AVR::RJMPk)).addMBB(FallThrough);
+ }
+
MachineBasicBlock *trueMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *falseMBB = MF->CreateMachineBasicBlock(LLVM_BB);
@@ -1838,9 +1852,6 @@ std::pair<unsigned, const TargetRegisterClass *>
AVRTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
StringRef Constraint,
MVT VT) const {
- auto STI = static_cast<const AVRTargetMachine &>(this->getTargetMachine())
- .getSubtargetImpl();
-
// We only support i8 and i16.
//
//:FIXME: remove this assert for now since it gets sometimes executed
@@ -1884,8 +1895,8 @@ AVRTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
}
}
- return TargetLowering::getRegForInlineAsmConstraint(STI->getRegisterInfo(),
- Constraint, VT);
+ return TargetLowering::getRegForInlineAsmConstraint(
+ Subtarget.getRegisterInfo(), Constraint, VT);
}
void AVRTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
diff --git a/lib/Target/AVR/AVRISelLowering.h b/lib/Target/AVR/AVRISelLowering.h
index c90c65c81f70..ed2d0835903c 100644
--- a/lib/Target/AVR/AVRISelLowering.h
+++ b/lib/Target/AVR/AVRISelLowering.h
@@ -1,9 +1,8 @@
//===-- AVRISelLowering.h - AVR DAG Lowering Interface ----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -64,12 +63,14 @@ enum NodeType {
} // end of namespace AVRISD
+class AVRSubtarget;
class AVRTargetMachine;
/// Performs target lowering for the AVR.
class AVRTargetLowering : public TargetLowering {
public:
- explicit AVRTargetLowering(AVRTargetMachine &TM);
+ explicit AVRTargetLowering(const AVRTargetMachine &TM,
+ const AVRSubtarget &STI);
public:
MVT getScalarShiftAmountTy(const DataLayout &, EVT LHSTy) const override {
@@ -127,6 +128,11 @@ public:
unsigned getRegisterByName(const char* RegName, EVT VT,
SelectionDAG &DAG) const override;
+ bool shouldSplitFunctionArgumentsAsLittleEndian(const DataLayout &DL)
+ const override {
+ return false;
+ }
+
private:
SDValue getAVRCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue &AVRcc,
SelectionDAG &DAG, SDLoc dl) const;
@@ -164,6 +170,10 @@ private:
const SDLoc &dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
+protected:
+
+ const AVRSubtarget &Subtarget;
+
private:
MachineBasicBlock *insertShift(MachineInstr &MI, MachineBasicBlock *BB) const;
MachineBasicBlock *insertMul(MachineInstr &MI, MachineBasicBlock *BB) const;
diff --git a/lib/Target/AVR/AVRInstrFormats.td b/lib/Target/AVR/AVRInstrFormats.td
index ce5e606f9787..347e683cd47f 100644
--- a/lib/Target/AVR/AVRInstrFormats.td
+++ b/lib/Target/AVR/AVRInstrFormats.td
@@ -1,9 +1,8 @@
//===-- AVRInstrInfo.td - AVR Instruction Formats ----------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AVR/AVRInstrInfo.cpp b/lib/Target/AVR/AVRInstrInfo.cpp
index 0c32334167f0..ba7a95e92c5c 100644
--- a/lib/Target/AVR/AVRInstrInfo.cpp
+++ b/lib/Target/AVR/AVRInstrInfo.cpp
@@ -1,9 +1,8 @@
//===-- AVRInstrInfo.cpp - AVR Instruction Information --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -488,7 +487,8 @@ unsigned AVRInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
case TargetOpcode::KILL:
case TargetOpcode::DBG_VALUE:
return 0;
- case TargetOpcode::INLINEASM: {
+ case TargetOpcode::INLINEASM:
+ case TargetOpcode::INLINEASM_BR: {
const MachineFunction &MF = *MI.getParent()->getParent();
const AVRTargetMachine &TM = static_cast<const AVRTargetMachine&>(MF.getTarget());
const AVRSubtarget &STI = MF.getSubtarget<AVRSubtarget>();
diff --git a/lib/Target/AVR/AVRInstrInfo.h b/lib/Target/AVR/AVRInstrInfo.h
index 354edcec3466..ba74af325474 100644
--- a/lib/Target/AVR/AVRInstrInfo.h
+++ b/lib/Target/AVR/AVRInstrInfo.h
@@ -1,9 +1,8 @@
//===-- AVRInstrInfo.h - AVR Instruction Information ------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AVR/AVRInstrInfo.td b/lib/Target/AVR/AVRInstrInfo.td
index 5720af7d8df6..caca9b617609 100644
--- a/lib/Target/AVR/AVRInstrInfo.td
+++ b/lib/Target/AVR/AVRInstrInfo.td
@@ -1,9 +1,8 @@
//===-- AVRInstrInfo.td - AVR Instruction defs -------------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -90,6 +89,22 @@ def imm0_63_neg : PatLeaf<(imm),
def uimm6 : PatLeaf<(imm), [{ return isUInt<6>(N->getZExtValue()); }]>;
+// imm_com8_XFORM - Return the complement of a imm_com8 value
+def imm_com8_XFORM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(~((uint8_t)N->getZExtValue()), SDLoc(N),
+ MVT::i8);
+}]>;
+
+// imm_com8 - Match an immediate that is a complement
+// of a 8-bit immediate.
+// Note: this pattern doesn't require an encoder method and such, as it's
+// only used on aliases (Pat<> and InstAlias<>). The actual encoding
+// is handled by the destination instructions, which use imm_com8.
+def imm_com8_asmoperand : AsmOperandClass { let Name = "ImmCom8"; }
+def imm_com8 : Operand<i8> {
+ let ParserMatchClass = imm_com8_asmoperand;
+}
+
def ioaddr_XFORM : SDNodeXForm<imm,
[{
return CurDAG->getTargetConstant(uint8_t(N->getZExtValue()) - 0x20, SDLoc(N), MVT::i8);
@@ -157,13 +172,6 @@ def memspi : Operand<iPTR>
let MIOperandInfo = (ops GPRSP, i16imm);
}
-def imm_com8 : Operand<i8>
-{
- let EncoderMethod = "encodeComplement";
-
- let MIOperandInfo = (ops i8imm);
-}
-
def relbrtarget_7 : Operand<OtherVT>
{
let PrintMethod = "printPCRelImm";
@@ -1151,11 +1159,11 @@ isReMaterializable = 1 in
// LDW Rd+1:Rd, P
//
// Expands to:
- // ld Rd, P+
- // ld Rd+1, P
+ // ld Rd, P
+ // ldd Rd+1, P+1
let Constraints = "@earlyclobber $reg" in
def LDWRdPtr : Pseudo<(outs DREGS:$reg),
- (ins PTRREGS:$ptrreg),
+ (ins PTRDISPREGS:$ptrreg),
"ldw\t$reg, $ptrreg",
[(set i16:$reg, (load i16:$ptrreg))]>,
Requires<[HasSRAM]>;
@@ -1222,7 +1230,7 @@ isReMaterializable = 1 in
// ldd Rd, P+q
// ldd Rd+1, P+q+1
let Constraints = "@earlyclobber $dst" in
- def LDDWRdPtrQ : Pseudo<(outs DREGS_WITHOUT_Z_WORKAROUND:$dst),
+ def LDDWRdPtrQ : Pseudo<(outs DREGS_WITHOUT_YZ_WORKAROUND:$dst),
(ins memri:$memri),
"lddw\t$dst, $memri",
[(set i16:$dst, (load addr:$memri))]>,
@@ -1729,20 +1737,7 @@ def BLD : FRdB<0b00,
"bld\t$rd, $b",
[]>;
-// Set/clear bit in register operations.
-let Constraints = "$src = $rd",
-Defs = [SREG] in
-{
- // CBR Rd, K
- // Alias for `ANDI Rd, COM(K)` where COM(K) is the complement of K.
- // FIXME: This uses the 'complement' encoder. We need it to also use the
- // imm_ldi8 encoder. This will cause no fixups to be created on this instruction.
- def CBRRdK : FRdK<0b0111,
- (outs LD8:$rd),
- (ins LD8:$src, imm_com8:$k),
- "cbr\t$rd, $k",
- []>;
-}
+def CBR : InstAlias<"cbr\t$rd, $k", (ANDIRdK LD8:$rd, imm_com8:$k), 0>;
// CLR Rd
// Alias for EOR Rd, Rd
diff --git a/lib/Target/AVR/AVRMCInstLower.cpp b/lib/Target/AVR/AVRMCInstLower.cpp
index dfefd09bc4b8..49a318762b63 100644
--- a/lib/Target/AVR/AVRMCInstLower.cpp
+++ b/lib/Target/AVR/AVRMCInstLower.cpp
@@ -1,9 +1,8 @@
//===-- AVRMCInstLower.cpp - Convert AVR MachineInstr to an MCInst --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AVR/AVRMCInstLower.h b/lib/Target/AVR/AVRMCInstLower.h
index 2e2d1014485e..5e0f42ac16a7 100644
--- a/lib/Target/AVR/AVRMCInstLower.h
+++ b/lib/Target/AVR/AVRMCInstLower.h
@@ -1,9 +1,8 @@
//===-- AVRMCInstLower.h - Lower MachineInstr to MCInst ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AVR/AVRMachineFunctionInfo.h b/lib/Target/AVR/AVRMachineFunctionInfo.h
index cf0c73576301..5226e30491c3 100644
--- a/lib/Target/AVR/AVRMachineFunctionInfo.h
+++ b/lib/Target/AVR/AVRMachineFunctionInfo.h
@@ -1,9 +1,8 @@
//===-- AVRMachineFuctionInfo.h - AVR machine function info -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AVR/AVRRegisterInfo.cpp b/lib/Target/AVR/AVRRegisterInfo.cpp
index 808a85e459c1..a6b36f80485d 100644
--- a/lib/Target/AVR/AVRRegisterInfo.cpp
+++ b/lib/Target/AVR/AVRRegisterInfo.cpp
@@ -1,9 +1,8 @@
//===-- AVRRegisterInfo.cpp - AVR Register Information --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -17,6 +16,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
@@ -233,9 +233,9 @@ void AVRRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// No need to set SREG as dead here otherwise if the next instruction is a
// cond branch it will be using a dead register.
- New = BuildMI(MBB, std::next(II), dl, TII.get(SubOpc), AVR::R29R28)
- .addReg(AVR::R29R28, RegState::Kill)
- .addImm(Offset - 63 + 1);
+ BuildMI(MBB, std::next(II), dl, TII.get(SubOpc), AVR::R29R28)
+ .addReg(AVR::R29R28, RegState::Kill)
+ .addImm(Offset - 63 + 1);
Offset = 62;
}
@@ -245,7 +245,7 @@ void AVRRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
}
-unsigned AVRRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+Register AVRRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
if (TFI->hasFP(MF)) {
// The Y pointer register
@@ -273,4 +273,18 @@ void AVRRegisterInfo::splitReg(unsigned Reg,
HiReg = getSubReg(Reg, AVR::sub_hi);
}
+bool AVRRegisterInfo::shouldCoalesce(MachineInstr *MI,
+ const TargetRegisterClass *SrcRC,
+ unsigned SubReg,
+ const TargetRegisterClass *DstRC,
+ unsigned DstSubReg,
+ const TargetRegisterClass *NewRC,
+ LiveIntervals &LIS) const {
+ if(this->getRegClass(AVR::PTRDISPREGSRegClassID)->hasSubClassEq(NewRC)) {
+ return false;
+ }
+
+ return TargetRegisterInfo::shouldCoalesce(MI, SrcRC, SubReg, DstRC, DstSubReg, NewRC, LIS);
+}
+
} // end of namespace llvm
diff --git a/lib/Target/AVR/AVRRegisterInfo.h b/lib/Target/AVR/AVRRegisterInfo.h
index 104b336b9c48..8e6e63af3d57 100644
--- a/lib/Target/AVR/AVRRegisterInfo.h
+++ b/lib/Target/AVR/AVRRegisterInfo.h
@@ -1,9 +1,8 @@
//===-- AVRRegisterInfo.h - AVR Register Information Impl -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -42,7 +41,7 @@ public:
unsigned FIOperandNum,
RegScavenger *RS = NULL) const override;
- unsigned getFrameRegister(const MachineFunction &MF) const override;
+ Register getFrameRegister(const MachineFunction &MF) const override;
const TargetRegisterClass *
getPointerRegClass(const MachineFunction &MF,
@@ -56,6 +55,13 @@ public:
return true;
}
+ bool shouldCoalesce(MachineInstr *MI,
+ const TargetRegisterClass *SrcRC,
+ unsigned SubReg,
+ const TargetRegisterClass *DstRC,
+ unsigned DstSubReg,
+ const TargetRegisterClass *NewRC,
+ LiveIntervals &LIS) const override;
};
} // end namespace llvm
diff --git a/lib/Target/AVR/AVRRegisterInfo.td b/lib/Target/AVR/AVRRegisterInfo.td
index d55252bcac46..ea38fedd22ce 100644
--- a/lib/Target/AVR/AVRRegisterInfo.td
+++ b/lib/Target/AVR/AVRRegisterInfo.td
@@ -1,9 +1,8 @@
//===-- AVRRegisterInfo.td - AVR Register defs -------------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -166,14 +165,14 @@ def DREGS : RegisterClass<"AVR", [i16], 8,
// cannot use Z; it's simply a workaround a regalloc bug.
//
// More information can be found in PR39553.
-def DREGS_WITHOUT_Z_WORKAROUND : RegisterClass<"AVR", [i16], 8,
+def DREGS_WITHOUT_YZ_WORKAROUND : RegisterClass<"AVR", [i16], 8,
(
// Return value and arguments.
add R25R24, R19R18, R21R20, R23R22,
// Scratch registers.
R27R26,
// Callee saved registers.
- R29R28, R17R16, R15R14, R13R12, R11R10,
+ R17R16, R15R14, R13R12, R11R10,
R9R8, R7R6, R5R4, R3R2, R1R0
)>;
diff --git a/lib/Target/AVR/AVRRelaxMemOperations.cpp b/lib/Target/AVR/AVRRelaxMemOperations.cpp
index fdb09897eda8..6be901743e82 100644
--- a/lib/Target/AVR/AVRRelaxMemOperations.cpp
+++ b/lib/Target/AVR/AVRRelaxMemOperations.cpp
@@ -1,9 +1,8 @@
//===-- AVRRelaxMemOperations.cpp - Relax out of range loads/stores -------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AVR/AVRSelectionDAGInfo.h b/lib/Target/AVR/AVRSelectionDAGInfo.h
index 6474c8779330..3e7bd57f10cf 100644
--- a/lib/Target/AVR/AVRSelectionDAGInfo.h
+++ b/lib/Target/AVR/AVRSelectionDAGInfo.h
@@ -1,9 +1,8 @@
//===-- AVRSelectionDAGInfo.h - AVR SelectionDAG Info -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AVR/AVRSubtarget.cpp b/lib/Target/AVR/AVRSubtarget.cpp
index 556d69ec5234..6a41036fdd6c 100644
--- a/lib/Target/AVR/AVRSubtarget.cpp
+++ b/lib/Target/AVR/AVRSubtarget.cpp
@@ -1,9 +1,8 @@
//===-- AVRSubtarget.cpp - AVR Subtarget Information ----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -29,9 +28,9 @@
namespace llvm {
AVRSubtarget::AVRSubtarget(const Triple &TT, const std::string &CPU,
- const std::string &FS, AVRTargetMachine &TM)
+ const std::string &FS, const AVRTargetMachine &TM)
: AVRGenSubtargetInfo(TT, CPU, FS), InstrInfo(), FrameLowering(),
- TLInfo(TM), TSInfo(),
+ TLInfo(TM, initializeSubtargetDependencies(CPU, FS, TM)), TSInfo(),
// Subtarget features
m_hasSRAM(false), m_hasJMPCALL(false), m_hasIJMPCALL(false),
@@ -44,4 +43,12 @@ AVRSubtarget::AVRSubtarget(const Triple &TT, const std::string &CPU,
ParseSubtargetFeatures(CPU, FS);
}
+AVRSubtarget &
+AVRSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS,
+ const TargetMachine &TM) {
+ // Parse features string.
+ ParseSubtargetFeatures(CPU, FS);
+ return *this;
+}
+
} // end of namespace llvm
diff --git a/lib/Target/AVR/AVRSubtarget.h b/lib/Target/AVR/AVRSubtarget.h
index fa26738da190..da9289af7c8d 100644
--- a/lib/Target/AVR/AVRSubtarget.h
+++ b/lib/Target/AVR/AVRSubtarget.h
@@ -1,9 +1,8 @@
//===-- AVRSubtarget.h - Define Subtarget for the AVR -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -37,7 +36,7 @@ public:
//! \param FS The feature string.
//! \param TM The target machine.
AVRSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS,
- AVRTargetMachine &TM);
+ const AVRTargetMachine &TM);
const AVRInstrInfo *getInstrInfo() const override { return &InstrInfo; }
const TargetFrameLowering *getFrameLowering() const override { return &FrameLowering; }
@@ -49,6 +48,9 @@ public:
/// \note Definition of function is auto generated by `tblgen`.
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+ AVRSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS,
+ const TargetMachine &TM);
+
// Subtarget feature getters.
// See AVR.td for details.
bool hasSRAM() const { return m_hasSRAM; }
diff --git a/lib/Target/AVR/AVRTargetMachine.cpp b/lib/Target/AVR/AVRTargetMachine.cpp
index 9828cdab68c3..a36c8b0f9649 100644
--- a/lib/Target/AVR/AVRTargetMachine.cpp
+++ b/lib/Target/AVR/AVRTargetMachine.cpp
@@ -1,9 +1,8 @@
//===-- AVRTargetMachine.cpp - Define TargetMachine for AVR ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -22,6 +21,7 @@
#include "AVR.h"
#include "AVRTargetObjectFile.h"
#include "MCTargetDesc/AVRMCTargetDesc.h"
+#include "TargetInfo/AVRTargetInfo.h"
namespace llvm {
diff --git a/lib/Target/AVR/AVRTargetMachine.h b/lib/Target/AVR/AVRTargetMachine.h
index ffcf4350d45a..f9015c8741ea 100644
--- a/lib/Target/AVR/AVRTargetMachine.h
+++ b/lib/Target/AVR/AVRTargetMachine.h
@@ -1,9 +1,8 @@
//===-- AVRTargetMachine.h - Define TargetMachine for AVR -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AVR/AVRTargetObjectFile.cpp b/lib/Target/AVR/AVRTargetObjectFile.cpp
index 0cebb0f043f9..980096a09835 100644
--- a/lib/Target/AVR/AVRTargetObjectFile.cpp
+++ b/lib/Target/AVR/AVRTargetObjectFile.cpp
@@ -1,9 +1,8 @@
//===-- AVRTargetObjectFile.cpp - AVR Object Files ------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AVR/AVRTargetObjectFile.h b/lib/Target/AVR/AVRTargetObjectFile.h
index ba91036fd64c..53d8510d9a21 100644
--- a/lib/Target/AVR/AVRTargetObjectFile.h
+++ b/lib/Target/AVR/AVRTargetObjectFile.h
@@ -1,9 +1,8 @@
//===-- AVRTargetObjectFile.h - AVR Object Info -----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AVR/AsmParser/AVRAsmParser.cpp b/lib/Target/AVR/AsmParser/AVRAsmParser.cpp
index f2bb59265271..aac5644711e2 100644
--- a/lib/Target/AVR/AsmParser/AVRAsmParser.cpp
+++ b/lib/Target/AVR/AsmParser/AVRAsmParser.cpp
@@ -1,9 +1,8 @@
//===---- AVRAsmParser.cpp - Parse AVR assembly to MCInst instructions ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -12,6 +11,7 @@
#include "MCTargetDesc/AVRMCELFStreamer.h"
#include "MCTargetDesc/AVRMCExpr.h"
#include "MCTargetDesc/AVRMCTargetDesc.h"
+#include "TargetInfo/AVRTargetInfo.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/StringSwitch.h"
@@ -160,6 +160,22 @@ public:
addExpr(Inst, getImm());
}
+ void addImmCom8Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The operand is actually a imm8, but we have its bitwise
+ // negation in the assembly source, so twiddle it here.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::createImm(~(uint8_t)CE->getValue()));
+ }
+
+ bool isImmCom8() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return isUInt<8>(Value);
+ }
+
bool isReg() const { return Kind == k_Register; }
bool isImm() const { return Kind == k_Immediate; }
bool isToken() const { return Kind == k_Token; }
diff --git a/lib/Target/AVR/Disassembler/AVRDisassembler.cpp b/lib/Target/AVR/Disassembler/AVRDisassembler.cpp
index e69accfa9393..e203a5069c85 100644
--- a/lib/Target/AVR/Disassembler/AVRDisassembler.cpp
+++ b/lib/Target/AVR/Disassembler/AVRDisassembler.cpp
@@ -1,9 +1,8 @@
//===- AVRDisassembler.cpp - Disassembler for AVR ---------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -15,6 +14,7 @@
#include "AVRRegisterInfo.h"
#include "AVRSubtarget.h"
#include "MCTargetDesc/AVRMCTargetDesc.h"
+#include "TargetInfo/AVRTargetInfo.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
diff --git a/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp b/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp
index f81a57dd71e3..e92b16c8ee9d 100644
--- a/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp
+++ b/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp
@@ -1,9 +1,8 @@
//===-- AVRAsmBackend.cpp - AVR Asm Backend ------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h b/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h
index d48077c3ab8e..1e713db38145 100644
--- a/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h
+++ b/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h
@@ -1,9 +1,8 @@
//===-- AVRAsmBackend.h - AVR Asm Backend --------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AVR/MCTargetDesc/AVRELFObjectWriter.cpp b/lib/Target/AVR/MCTargetDesc/AVRELFObjectWriter.cpp
index 4a921a1601a9..6025e4b2437c 100644
--- a/lib/Target/AVR/MCTargetDesc/AVRELFObjectWriter.cpp
+++ b/lib/Target/AVR/MCTargetDesc/AVRELFObjectWriter.cpp
@@ -1,9 +1,8 @@
//===-- AVRELFObjectWriter.cpp - AVR ELF Writer ---------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.h b/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.h
index e5df6cc34e40..461f1660c952 100644
--- a/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.h
+++ b/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.h
@@ -1,9 +1,8 @@
//===----- AVRELFStreamer.h - AVR Target Streamer --------------*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AVR/MCTargetDesc/AVRFixupKinds.h b/lib/Target/AVR/MCTargetDesc/AVRFixupKinds.h
index cdb0b215bc60..b3504b89e4d3 100644
--- a/lib/Target/AVR/MCTargetDesc/AVRFixupKinds.h
+++ b/lib/Target/AVR/MCTargetDesc/AVRFixupKinds.h
@@ -1,9 +1,8 @@
//===-- AVRFixupKinds.h - AVR Specific Fixup Entries ------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AVR/InstPrinter/AVRInstPrinter.cpp b/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.cpp
index 0f34b8e18ff9..88ce9a25680e 100644
--- a/lib/Target/AVR/InstPrinter/AVRInstPrinter.cpp
+++ b/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.cpp
@@ -1,9 +1,8 @@
//===-- AVRInstPrinter.cpp - Convert AVR MCInst to assembly syntax --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AVR/InstPrinter/AVRInstPrinter.h b/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.h
index c9f65b922745..5b758a7503c9 100644
--- a/lib/Target/AVR/InstPrinter/AVRInstPrinter.h
+++ b/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.h
@@ -1,9 +1,8 @@
//===- AVRInstPrinter.h - Convert AVR MCInst to assembly syntax -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.cpp b/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.cpp
index 535bb012eb07..99b2172c562f 100644
--- a/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.cpp
+++ b/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.cpp
@@ -1,9 +1,8 @@
//===-- AVRMCAsmInfo.cpp - AVR asm properties -----------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -24,6 +23,7 @@ AVRMCAsmInfo::AVRMCAsmInfo(const Triple &TT) {
PrivateGlobalPrefix = ".L";
UsesELFSectionDirectiveForBSS = true;
UseIntegratedAssembler = true;
+ SupportsDebugInformation = true;
}
} // end of namespace llvm
diff --git a/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.h b/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.h
index cc2207a3cfae..b2fa18777bc0 100644
--- a/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.h
+++ b/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.h
@@ -1,9 +1,8 @@
//===-- AVRMCAsmInfo.h - AVR asm properties ---------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.cpp b/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.cpp
index 4dbbce8c205e..bc0488778685 100644
--- a/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.cpp
+++ b/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.cpp
@@ -1,9 +1,8 @@
//===-- AVRMCCodeEmitter.cpp - Convert AVR Code to Machine Code -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.h b/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.h
index 883abf8db78a..2e24d885c155 100644
--- a/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.h
+++ b/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.h
@@ -1,9 +1,8 @@
//===-- AVRMCCodeEmitter.h - Convert AVR Code to Machine Code -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AVR/MCTargetDesc/AVRMCELFStreamer.cpp b/lib/Target/AVR/MCTargetDesc/AVRMCELFStreamer.cpp
index 861acd47347f..d9169f90a765 100644
--- a/lib/Target/AVR/MCTargetDesc/AVRMCELFStreamer.cpp
+++ b/lib/Target/AVR/MCTargetDesc/AVRMCELFStreamer.cpp
@@ -1,9 +1,8 @@
//===--------- AVRMCELFStreamer.cpp - AVR subclass of MCELFStreamer -------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AVR/MCTargetDesc/AVRMCELFStreamer.h b/lib/Target/AVR/MCTargetDesc/AVRMCELFStreamer.h
index 12e805fc7d13..37a610bc4248 100644
--- a/lib/Target/AVR/MCTargetDesc/AVRMCELFStreamer.h
+++ b/lib/Target/AVR/MCTargetDesc/AVRMCELFStreamer.h
@@ -1,9 +1,8 @@
//===--------- AVRMCELFStreamer.h - AVR subclass of MCELFStreamer ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AVR/MCTargetDesc/AVRMCExpr.cpp b/lib/Target/AVR/MCTargetDesc/AVRMCExpr.cpp
index d4a67973af7f..0a53e5346779 100644
--- a/lib/Target/AVR/MCTargetDesc/AVRMCExpr.cpp
+++ b/lib/Target/AVR/MCTargetDesc/AVRMCExpr.cpp
@@ -1,9 +1,8 @@
//===-- AVRMCExpr.cpp - AVR specific MC expression classes ----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AVR/MCTargetDesc/AVRMCExpr.h b/lib/Target/AVR/MCTargetDesc/AVRMCExpr.h
index a166b0946749..3b696bab1715 100644
--- a/lib/Target/AVR/MCTargetDesc/AVRMCExpr.h
+++ b/lib/Target/AVR/MCTargetDesc/AVRMCExpr.h
@@ -1,9 +1,8 @@
//===-- AVRMCExpr.h - AVR specific MC expression classes --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp b/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp
index 8c39b5f4039e..f6607b26a065 100644
--- a/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp
+++ b/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp
@@ -1,9 +1,8 @@
//===-- AVRMCTargetDesc.cpp - AVR Target Descriptions ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -12,11 +11,12 @@
//===----------------------------------------------------------------------===//
#include "AVRELFStreamer.h"
+#include "AVRInstPrinter.h"
#include "AVRMCAsmInfo.h"
#include "AVRMCELFStreamer.h"
#include "AVRMCTargetDesc.h"
#include "AVRTargetStreamer.h"
-#include "InstPrinter/AVRInstPrinter.h"
+#include "TargetInfo/AVRTargetInfo.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCELFStreamer.h"
diff --git a/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.h b/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.h
index a764f15bd065..470db01ff468 100644
--- a/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.h
+++ b/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.h
@@ -1,9 +1,8 @@
//===-- AVRMCTargetDesc.h - AVR Target Descriptions -------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -33,8 +32,6 @@ class Target;
class Triple;
class raw_pwrite_stream;
-Target &getTheAVRTarget();
-
MCInstrInfo *createAVRMCInstrInfo();
/// Creates a machine code emitter for AVR.
diff --git a/lib/Target/AVR/MCTargetDesc/AVRTargetStreamer.cpp b/lib/Target/AVR/MCTargetDesc/AVRTargetStreamer.cpp
index 2b45d9adc7e9..3487a2bbb864 100644
--- a/lib/Target/AVR/MCTargetDesc/AVRTargetStreamer.cpp
+++ b/lib/Target/AVR/MCTargetDesc/AVRTargetStreamer.cpp
@@ -1,9 +1,8 @@
//===-- AVRTargetStreamer.cpp - AVR Target Streamer Methods ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/AVR/MCTargetDesc/AVRTargetStreamer.h b/lib/Target/AVR/MCTargetDesc/AVRTargetStreamer.h
index 815088b0a5de..5c4d1a22f6c6 100644
--- a/lib/Target/AVR/MCTargetDesc/AVRTargetStreamer.h
+++ b/lib/Target/AVR/MCTargetDesc/AVRTargetStreamer.h
@@ -1,9 +1,8 @@
//===-- AVRTargetStreamer.h - AVR Target Streamer --------------*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AVR/TargetInfo/AVRTargetInfo.cpp b/lib/Target/AVR/TargetInfo/AVRTargetInfo.cpp
index abe9cf45fcb3..c62d5cb85bc4 100644
--- a/lib/Target/AVR/TargetInfo/AVRTargetInfo.cpp
+++ b/lib/Target/AVR/TargetInfo/AVRTargetInfo.cpp
@@ -1,13 +1,12 @@
//===-- AVRTargetInfo.cpp - AVR Target Implementation ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-#include "llvm/IR/Module.h"
+#include "TargetInfo/AVRTargetInfo.h"
#include "llvm/Support/TargetRegistry.h"
namespace llvm {
Target &getTheAVRTarget() {
diff --git a/lib/Target/AVR/TargetInfo/AVRTargetInfo.h b/lib/Target/AVR/TargetInfo/AVRTargetInfo.h
new file mode 100644
index 000000000000..7e0186bbdae1
--- /dev/null
+++ b/lib/Target/AVR/TargetInfo/AVRTargetInfo.h
@@ -0,0 +1,18 @@
+//===-- AVRTargetInfo.h - AVR Target Implementation -------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AVR_TARGET_INFO_H
+#define LLVM_AVR_TARGET_INFO_H
+
+namespace llvm {
+class Target;
+
+Target &getTheAVRTarget();
+} // namespace llvm
+
+#endif // LLVM_AVR_TARGET_INFO_H
diff --git a/lib/Target/BPF/AsmParser/BPFAsmParser.cpp b/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
index 8890fb8adf4d..75885fd058a7 100644
--- a/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
+++ b/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
@@ -1,13 +1,13 @@
//===-- BPFAsmParser.cpp - Parse BPF assembly to MCInst instructions --===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/BPFMCTargetDesc.h"
+#include "TargetInfo/BPFTargetInfo.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/MC/MCContext.h"
@@ -126,7 +126,7 @@ public:
bool isMem() const override { return false; }
bool isConstantImm() const {
- return isImm() && dyn_cast<MCConstantExpr>(getImm());
+ return isImm() && isa<MCConstantExpr>(getImm());
}
int64_t getConstantImm() const {
diff --git a/lib/Target/BPF/BPF.h b/lib/Target/BPF/BPF.h
index 9749e369c2c1..d311fc154094 100644
--- a/lib/Target/BPF/BPF.h
+++ b/lib/Target/BPF/BPF.h
@@ -1,9 +1,8 @@
//===-- BPF.h - Top-level interface for BPF representation ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -16,11 +15,16 @@
namespace llvm {
class BPFTargetMachine;
+ModulePass *createBPFAbstractMemberAccess();
+
FunctionPass *createBPFISelDag(BPFTargetMachine &TM);
+FunctionPass *createBPFMISimplifyPatchablePass();
FunctionPass *createBPFMIPeepholePass();
FunctionPass *createBPFMIPreEmitPeepholePass();
FunctionPass *createBPFMIPreEmitCheckingPass();
+void initializeBPFAbstractMemberAccessPass(PassRegistry&);
+void initializeBPFMISimplifyPatchablePass(PassRegistry&);
void initializeBPFMIPeepholePass(PassRegistry&);
void initializeBPFMIPreEmitPeepholePass(PassRegistry&);
void initializeBPFMIPreEmitCheckingPass(PassRegistry&);
diff --git a/lib/Target/BPF/BPF.td b/lib/Target/BPF/BPF.td
index 877bd15f4f2b..fad966ff5a13 100644
--- a/lib/Target/BPF/BPF.td
+++ b/lib/Target/BPF/BPF.td
@@ -1,9 +1,8 @@
//===-- BPF.td - Describe the BPF Target Machine -----------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -21,6 +20,7 @@ class Proc<string Name, list<SubtargetFeature> Features>
def : Proc<"generic", []>;
def : Proc<"v1", []>;
def : Proc<"v2", []>;
+def : Proc<"v3", []>;
def : Proc<"probe", []>;
def DummyFeature : SubtargetFeature<"dummy", "isDummyMode",
diff --git a/lib/Target/BPF/BPFAbstractMemberAccess.cpp b/lib/Target/BPF/BPFAbstractMemberAccess.cpp
new file mode 100644
index 000000000000..51d4cbc8a429
--- /dev/null
+++ b/lib/Target/BPF/BPFAbstractMemberAccess.cpp
@@ -0,0 +1,482 @@
+//===------ BPFAbstractMemberAccess.cpp - Abstracting Member Accesses -----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass abstracted struct/union member accesses in order to support
+// compile-once run-everywhere (CO-RE). The CO-RE intends to compile the program
+// which can run on different kernels. In particular, if bpf program tries to
+// access a particular kernel data structure member, the details of the
+// intermediate member access will be remembered so bpf loader can do
+// necessary adjustment right before program loading.
+//
+// For example,
+//
+// struct s {
+// int a;
+// int b;
+// };
+// struct t {
+// struct s c;
+// int d;
+// };
+// struct t e;
+//
+// For the member access e.c.b, the compiler will generate code
+// &e + 4
+//
+// The compile-once run-everywhere instead generates the following code
+// r = 4
+// &e + r
+// The "4" in "r = 4" can be changed based on a particular kernel version.
+// For example, on a particular kernel version, if struct s is changed to
+//
+// struct s {
+// int new_field;
+// int a;
+// int b;
+// }
+//
+// By repeating the member access on the host, the bpf loader can
+// adjust "r = 4" as "r = 8".
+//
+// This feature relies on the following three intrinsic calls:
+// addr = preserve_array_access_index(base, dimension, index)
+// addr = preserve_union_access_index(base, di_index)
+// !llvm.preserve.access.index <union_ditype>
+// addr = preserve_struct_access_index(base, gep_index, di_index)
+// !llvm.preserve.access.index <struct_ditype>
+//
+//===----------------------------------------------------------------------===//
+
+#include "BPF.h"
+#include "BPFCORE.h"
+#include "BPFTargetMachine.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+#define DEBUG_TYPE "bpf-abstract-member-access"
+
+namespace llvm {
+const std::string BPFCoreSharedInfo::AmaAttr = "btf_ama";
+const std::string BPFCoreSharedInfo::PatchableExtSecName =
+ ".BPF.patchable_externs";
+} // namespace llvm
+
+using namespace llvm;
+
+namespace {
+
+class BPFAbstractMemberAccess final : public ModulePass {
+ StringRef getPassName() const override {
+ return "BPF Abstract Member Access";
+ }
+
+ bool runOnModule(Module &M) override;
+
+public:
+ static char ID;
+ BPFAbstractMemberAccess() : ModulePass(ID) {}
+
+private:
+ enum : uint32_t {
+ BPFPreserveArrayAI = 1,
+ BPFPreserveUnionAI = 2,
+ BPFPreserveStructAI = 3,
+ };
+
+ std::map<std::string, GlobalVariable *> GEPGlobals;
+ // A map to link preserve_*_access_index instrinsic calls.
+ std::map<CallInst *, std::pair<CallInst *, uint32_t>> AIChain;
+ // A map to hold all the base preserve_*_access_index instrinsic calls.
+ // The base call is not an input of any other preserve_*_access_index
+ // intrinsics.
+ std::map<CallInst *, uint32_t> BaseAICalls;
+
+ bool doTransformation(Module &M);
+
+ void traceAICall(CallInst *Call, uint32_t Kind);
+ void traceBitCast(BitCastInst *BitCast, CallInst *Parent, uint32_t Kind);
+ void traceGEP(GetElementPtrInst *GEP, CallInst *Parent, uint32_t Kind);
+ void collectAICallChains(Module &M, Function &F);
+
+ bool IsPreserveDIAccessIndexCall(const CallInst *Call, uint32_t &Kind);
+ bool removePreserveAccessIndexIntrinsic(Module &M);
+ void replaceWithGEP(std::vector<CallInst *> &CallList,
+ uint32_t NumOfZerosIndex, uint32_t DIIndex);
+
+ Value *computeBaseAndAccessStr(CallInst *Call, std::string &AccessStr,
+ std::string &AccessKey, uint32_t Kind,
+ MDNode *&TypeMeta);
+ bool getAccessIndex(const Value *IndexValue, uint64_t &AccessIndex);
+ bool transformGEPChain(Module &M, CallInst *Call, uint32_t Kind);
+};
+} // End anonymous namespace
+
+char BPFAbstractMemberAccess::ID = 0;
+INITIALIZE_PASS(BPFAbstractMemberAccess, DEBUG_TYPE,
+ "abstracting struct/union member accessees", false, false)
+
+ModulePass *llvm::createBPFAbstractMemberAccess() {
+ return new BPFAbstractMemberAccess();
+}
+
+bool BPFAbstractMemberAccess::runOnModule(Module &M) {
+ LLVM_DEBUG(dbgs() << "********** Abstract Member Accesses **********\n");
+
+ // Bail out if no debug info.
+ if (empty(M.debug_compile_units()))
+ return false;
+
+ return doTransformation(M);
+}
+
+/// Check whether a call is a preserve_*_access_index intrinsic call or not.
+bool BPFAbstractMemberAccess::IsPreserveDIAccessIndexCall(const CallInst *Call,
+ uint32_t &Kind) {
+ if (!Call)
+ return false;
+
+ const auto *GV = dyn_cast<GlobalValue>(Call->getCalledValue());
+ if (!GV)
+ return false;
+ if (GV->getName().startswith("llvm.preserve.array.access.index")) {
+ Kind = BPFPreserveArrayAI;
+ return true;
+ }
+ if (GV->getName().startswith("llvm.preserve.union.access.index")) {
+ Kind = BPFPreserveUnionAI;
+ return true;
+ }
+ if (GV->getName().startswith("llvm.preserve.struct.access.index")) {
+ Kind = BPFPreserveStructAI;
+ return true;
+ }
+
+ return false;
+}
+
+void BPFAbstractMemberAccess::replaceWithGEP(std::vector<CallInst *> &CallList,
+ uint32_t DimensionIndex,
+ uint32_t GEPIndex) {
+ for (auto Call : CallList) {
+ uint32_t Dimension = 1;
+ if (DimensionIndex > 0)
+ Dimension = cast<ConstantInt>(Call->getArgOperand(DimensionIndex))
+ ->getZExtValue();
+
+ Constant *Zero =
+ ConstantInt::get(Type::getInt32Ty(Call->getParent()->getContext()), 0);
+ SmallVector<Value *, 4> IdxList;
+ for (unsigned I = 0; I < Dimension; ++I)
+ IdxList.push_back(Zero);
+ IdxList.push_back(Call->getArgOperand(GEPIndex));
+
+ auto *GEP = GetElementPtrInst::CreateInBounds(Call->getArgOperand(0),
+ IdxList, "", Call);
+ Call->replaceAllUsesWith(GEP);
+ Call->eraseFromParent();
+ }
+}
+
+bool BPFAbstractMemberAccess::removePreserveAccessIndexIntrinsic(Module &M) {
+ std::vector<CallInst *> PreserveArrayIndexCalls;
+ std::vector<CallInst *> PreserveUnionIndexCalls;
+ std::vector<CallInst *> PreserveStructIndexCalls;
+ bool Found = false;
+
+ for (Function &F : M)
+ for (auto &BB : F)
+ for (auto &I : BB) {
+ auto *Call = dyn_cast<CallInst>(&I);
+ uint32_t Kind;
+ if (!IsPreserveDIAccessIndexCall(Call, Kind))
+ continue;
+
+ Found = true;
+ if (Kind == BPFPreserveArrayAI)
+ PreserveArrayIndexCalls.push_back(Call);
+ else if (Kind == BPFPreserveUnionAI)
+ PreserveUnionIndexCalls.push_back(Call);
+ else
+ PreserveStructIndexCalls.push_back(Call);
+ }
+
+ // do the following transformation:
+ // . addr = preserve_array_access_index(base, dimension, index)
+ // is transformed to
+ // addr = GEP(base, dimenion's zero's, index)
+ // . addr = preserve_union_access_index(base, di_index)
+ // is transformed to
+ // addr = base, i.e., all usages of "addr" are replaced by "base".
+ // . addr = preserve_struct_access_index(base, gep_index, di_index)
+ // is transformed to
+ // addr = GEP(base, 0, gep_index)
+ replaceWithGEP(PreserveArrayIndexCalls, 1, 2);
+ replaceWithGEP(PreserveStructIndexCalls, 0, 1);
+ for (auto Call : PreserveUnionIndexCalls) {
+ Call->replaceAllUsesWith(Call->getArgOperand(0));
+ Call->eraseFromParent();
+ }
+
+ return Found;
+}
+
+void BPFAbstractMemberAccess::traceAICall(CallInst *Call, uint32_t Kind) {
+ for (User *U : Call->users()) {
+ Instruction *Inst = dyn_cast<Instruction>(U);
+ if (!Inst)
+ continue;
+
+ if (auto *BI = dyn_cast<BitCastInst>(Inst)) {
+ traceBitCast(BI, Call, Kind);
+ } else if (auto *CI = dyn_cast<CallInst>(Inst)) {
+ uint32_t CIKind;
+ if (IsPreserveDIAccessIndexCall(CI, CIKind)) {
+ AIChain[CI] = std::make_pair(Call, Kind);
+ traceAICall(CI, CIKind);
+ } else {
+ BaseAICalls[Call] = Kind;
+ }
+ } else if (auto *GI = dyn_cast<GetElementPtrInst>(Inst)) {
+ if (GI->hasAllZeroIndices())
+ traceGEP(GI, Call, Kind);
+ else
+ BaseAICalls[Call] = Kind;
+ }
+ }
+}
+
+void BPFAbstractMemberAccess::traceBitCast(BitCastInst *BitCast,
+ CallInst *Parent, uint32_t Kind) {
+ for (User *U : BitCast->users()) {
+ Instruction *Inst = dyn_cast<Instruction>(U);
+ if (!Inst)
+ continue;
+
+ if (auto *BI = dyn_cast<BitCastInst>(Inst)) {
+ traceBitCast(BI, Parent, Kind);
+ } else if (auto *CI = dyn_cast<CallInst>(Inst)) {
+ uint32_t CIKind;
+ if (IsPreserveDIAccessIndexCall(CI, CIKind)) {
+ AIChain[CI] = std::make_pair(Parent, Kind);
+ traceAICall(CI, CIKind);
+ } else {
+ BaseAICalls[Parent] = Kind;
+ }
+ } else if (auto *GI = dyn_cast<GetElementPtrInst>(Inst)) {
+ if (GI->hasAllZeroIndices())
+ traceGEP(GI, Parent, Kind);
+ else
+ BaseAICalls[Parent] = Kind;
+ }
+ }
+}
+
+void BPFAbstractMemberAccess::traceGEP(GetElementPtrInst *GEP, CallInst *Parent,
+ uint32_t Kind) {
+ for (User *U : GEP->users()) {
+ Instruction *Inst = dyn_cast<Instruction>(U);
+ if (!Inst)
+ continue;
+
+ if (auto *BI = dyn_cast<BitCastInst>(Inst)) {
+ traceBitCast(BI, Parent, Kind);
+ } else if (auto *CI = dyn_cast<CallInst>(Inst)) {
+ uint32_t CIKind;
+ if (IsPreserveDIAccessIndexCall(CI, CIKind)) {
+ AIChain[CI] = std::make_pair(Parent, Kind);
+ traceAICall(CI, CIKind);
+ } else {
+ BaseAICalls[Parent] = Kind;
+ }
+ } else if (auto *GI = dyn_cast<GetElementPtrInst>(Inst)) {
+ if (GI->hasAllZeroIndices())
+ traceGEP(GI, Parent, Kind);
+ else
+ BaseAICalls[Parent] = Kind;
+ }
+ }
+}
+
+void BPFAbstractMemberAccess::collectAICallChains(Module &M, Function &F) {
+ AIChain.clear();
+ BaseAICalls.clear();
+
+ for (auto &BB : F)
+ for (auto &I : BB) {
+ uint32_t Kind;
+ auto *Call = dyn_cast<CallInst>(&I);
+ if (!IsPreserveDIAccessIndexCall(Call, Kind) ||
+ AIChain.find(Call) != AIChain.end())
+ continue;
+
+ traceAICall(Call, Kind);
+ }
+}
+
+/// Get access index from the preserve_*_access_index intrinsic calls.
+bool BPFAbstractMemberAccess::getAccessIndex(const Value *IndexValue,
+ uint64_t &AccessIndex) {
+ const ConstantInt *CV = dyn_cast<ConstantInt>(IndexValue);
+ if (!CV)
+ return false;
+
+ AccessIndex = CV->getValue().getZExtValue();
+ return true;
+}
+
+/// Compute the base of the whole preserve_*_access_index chains, i.e., the base
+/// pointer of the first preserve_*_access_index call, and construct the access
+/// string, which will be the name of a global variable.
+Value *BPFAbstractMemberAccess::computeBaseAndAccessStr(CallInst *Call,
+ std::string &AccessStr,
+ std::string &AccessKey,
+ uint32_t Kind,
+ MDNode *&TypeMeta) {
+ Value *Base = nullptr;
+ std::vector<uint64_t> AccessIndices;
+ uint64_t TypeNameIndex = 0;
+ std::string LastTypeName;
+
+ while (Call) {
+ // Base of original corresponding GEP
+ Base = Call->getArgOperand(0);
+
+ // Type Name
+ std::string TypeName;
+ MDNode *MDN;
+ if (Kind == BPFPreserveUnionAI || Kind == BPFPreserveStructAI) {
+ MDN = Call->getMetadata(LLVMContext::MD_preserve_access_index);
+ if (!MDN)
+ return nullptr;
+
+ DIType *Ty = dyn_cast<DIType>(MDN);
+ if (!Ty)
+ return nullptr;
+
+ TypeName = Ty->getName();
+ }
+
+ // Access Index
+ uint64_t AccessIndex;
+ uint32_t ArgIndex = (Kind == BPFPreserveUnionAI) ? 1 : 2;
+ if (!getAccessIndex(Call->getArgOperand(ArgIndex), AccessIndex))
+ return nullptr;
+
+ AccessIndices.push_back(AccessIndex);
+ if (TypeName.size()) {
+ TypeNameIndex = AccessIndices.size() - 1;
+ LastTypeName = TypeName;
+ TypeMeta = MDN;
+ }
+
+ Kind = AIChain[Call].second;
+ Call = AIChain[Call].first;
+ }
+
+ // The intial type name is required.
+ // FIXME: if the initial type access is an array index, e.g.,
+ // &a[3].b.c, only one dimentional array is supported.
+ if (!LastTypeName.size() || AccessIndices.size() > TypeNameIndex + 2)
+ return nullptr;
+
+ // Construct the type string AccessStr.
+ for (unsigned I = 0; I < AccessIndices.size(); ++I)
+ AccessStr = std::to_string(AccessIndices[I]) + ":" + AccessStr;
+
+ if (TypeNameIndex == AccessIndices.size() - 1)
+ AccessStr = "0:" + AccessStr;
+
+ // Access key is the type name + access string, uniquely identifying
+ // one kernel memory access.
+ AccessKey = LastTypeName + ":" + AccessStr;
+
+ return Base;
+}
+
+/// Call/Kind is the base preserve_*_access_index() call. Attempts to do
+/// transformation to a chain of relocable GEPs.
+bool BPFAbstractMemberAccess::transformGEPChain(Module &M, CallInst *Call,
+ uint32_t Kind) {
+ std::string AccessStr, AccessKey;
+ MDNode *TypeMeta = nullptr;
+ Value *Base =
+ computeBaseAndAccessStr(Call, AccessStr, AccessKey, Kind, TypeMeta);
+ if (!Base)
+ return false;
+
+ // Do the transformation
+ // For any original GEP Call and Base %2 like
+ // %4 = bitcast %struct.net_device** %dev1 to i64*
+ // it is transformed to:
+ // %6 = load __BTF_0:sk_buff:0:0:2:0:
+ // %7 = bitcast %struct.sk_buff* %2 to i8*
+ // %8 = getelementptr i8, i8* %7, %6
+ // %9 = bitcast i8* %8 to i64*
+ // using %9 instead of %4
+ // The original Call inst is removed.
+ BasicBlock *BB = Call->getParent();
+ GlobalVariable *GV;
+
+ if (GEPGlobals.find(AccessKey) == GEPGlobals.end()) {
+ GV = new GlobalVariable(M, Type::getInt64Ty(BB->getContext()), false,
+ GlobalVariable::ExternalLinkage, NULL, AccessStr);
+ GV->addAttribute(BPFCoreSharedInfo::AmaAttr);
+ // Set the metadata (debuginfo types) for the global.
+ if (TypeMeta)
+ GV->setMetadata(LLVMContext::MD_preserve_access_index, TypeMeta);
+ GEPGlobals[AccessKey] = GV;
+ } else {
+ GV = GEPGlobals[AccessKey];
+ }
+
+ // Load the global variable.
+ auto *LDInst = new LoadInst(Type::getInt64Ty(BB->getContext()), GV);
+ BB->getInstList().insert(Call->getIterator(), LDInst);
+
+ // Generate a BitCast
+ auto *BCInst = new BitCastInst(Base, Type::getInt8PtrTy(BB->getContext()));
+ BB->getInstList().insert(Call->getIterator(), BCInst);
+
+ // Generate a GetElementPtr
+ auto *GEP = GetElementPtrInst::Create(Type::getInt8Ty(BB->getContext()),
+ BCInst, LDInst);
+ BB->getInstList().insert(Call->getIterator(), GEP);
+
+ // Generate a BitCast
+ auto *BCInst2 = new BitCastInst(GEP, Call->getType());
+ BB->getInstList().insert(Call->getIterator(), BCInst2);
+
+ Call->replaceAllUsesWith(BCInst2);
+ Call->eraseFromParent();
+
+ return true;
+}
+
+bool BPFAbstractMemberAccess::doTransformation(Module &M) {
+ bool Transformed = false;
+
+ for (Function &F : M) {
+ // Collect PreserveDIAccessIndex Intrinsic call chains.
+ // The call chains will be used to generate the access
+ // patterns similar to GEP.
+ collectAICallChains(M, F);
+
+ for (auto &C : BaseAICalls)
+ Transformed = transformGEPChain(M, C.first, C.second) || Transformed;
+ }
+
+ return removePreserveAccessIndexIntrinsic(M) || Transformed;
+}
diff --git a/lib/Target/BPF/BPFAsmPrinter.cpp b/lib/Target/BPF/BPFAsmPrinter.cpp
index ada5eb923f40..e61e73468057 100644
--- a/lib/Target/BPF/BPFAsmPrinter.cpp
+++ b/lib/Target/BPF/BPFAsmPrinter.cpp
@@ -1,9 +1,8 @@
//===-- BPFAsmPrinter.cpp - BPF LLVM assembly writer ----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -17,7 +16,8 @@
#include "BPFMCInstLower.h"
#include "BPFTargetMachine.h"
#include "BTFDebug.h"
-#include "InstPrinter/BPFInstPrinter.h"
+#include "MCTargetDesc/BPFInstPrinter.h"
+#include "TargetInfo/BPFTargetInfo.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -38,27 +38,30 @@ class BPFAsmPrinter : public AsmPrinter {
public:
explicit BPFAsmPrinter(TargetMachine &TM,
std::unique_ptr<MCStreamer> Streamer)
- : AsmPrinter(TM, std::move(Streamer)) {}
+ : AsmPrinter(TM, std::move(Streamer)), BTF(nullptr) {}
StringRef getPassName() const override { return "BPF Assembly Printer"; }
bool doInitialization(Module &M) override;
void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &O);
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
- unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &O) override;
+ const char *ExtraCode, raw_ostream &O) override;
bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
- unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &O) override;
+ const char *ExtraCode, raw_ostream &O) override;
void EmitInstruction(const MachineInstr *MI) override;
+
+private:
+ BTFDebug *BTF;
};
} // namespace
bool BPFAsmPrinter::doInitialization(Module &M) {
AsmPrinter::doInitialization(M);
- if (MAI->doesSupportDebugInformation()) {
- Handlers.push_back(HandlerInfo(new BTFDebug(this), "emit",
+ // Only emit BTF when debuginfo available.
+ if (MAI->doesSupportDebugInformation() && !empty(M.debug_compile_units())) {
+ BTF = new BTFDebug(this);
+ Handlers.push_back(HandlerInfo(std::unique_ptr<BTFDebug>(BTF), "emit",
"Debug Info Emission", "BTF",
"BTF Emission"));
}
@@ -105,18 +108,16 @@ void BPFAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
}
bool BPFAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
- unsigned /*AsmVariant*/,
const char *ExtraCode, raw_ostream &O) {
if (ExtraCode && ExtraCode[0])
- return true; // BPF does not have special modifiers
+ return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O);
printOperand(MI, OpNo, O);
return false;
}
bool BPFAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
- unsigned OpNum, unsigned AsmVariant,
- const char *ExtraCode,
+ unsigned OpNum, const char *ExtraCode,
raw_ostream &O) {
assert(OpNum + 1 < MI->getNumOperands() && "Insufficient operands");
const MachineOperand &BaseMO = MI->getOperand(OpNum);
@@ -137,11 +138,12 @@ bool BPFAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
}
void BPFAsmPrinter::EmitInstruction(const MachineInstr *MI) {
-
- BPFMCInstLower MCInstLowering(OutContext, *this);
-
MCInst TmpInst;
- MCInstLowering.Lower(MI, TmpInst);
+
+ if (!BTF || !BTF->InstLower(MI, TmpInst)) {
+ BPFMCInstLower MCInstLowering(OutContext, *this);
+ MCInstLowering.Lower(MI, TmpInst);
+ }
EmitToStreamer(*OutStreamer, TmpInst);
}
diff --git a/lib/Target/BPF/BPFCORE.h b/lib/Target/BPF/BPFCORE.h
new file mode 100644
index 000000000000..e0950d95f8d7
--- /dev/null
+++ b/lib/Target/BPF/BPFCORE.h
@@ -0,0 +1,24 @@
+//===- BPFCORE.h - Common info for Compile-Once Run-EveryWhere -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_BPF_BPFCORE_H
+#define LLVM_LIB_TARGET_BPF_BPFCORE_H
+
+namespace llvm {
+
+class BPFCoreSharedInfo {
+public:
+ /// The attribute attached to globals representing a member offset
+ static const std::string AmaAttr;
+ /// The section name to identify a patchable external global
+ static const std::string PatchableExtSecName;
+};
+
+} // namespace llvm
+
+#endif
diff --git a/lib/Target/BPF/BPFCallingConv.td b/lib/Target/BPF/BPFCallingConv.td
index 637f9752ec42..ef4ef1930aa8 100644
--- a/lib/Target/BPF/BPFCallingConv.td
+++ b/lib/Target/BPF/BPFCallingConv.td
@@ -1,9 +1,8 @@
//===-- BPFCallingConv.td - Calling Conventions BPF --------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/BPF/BPFFrameLowering.cpp b/lib/Target/BPF/BPFFrameLowering.cpp
index c2806c85f24f..8812cfdd86da 100644
--- a/lib/Target/BPF/BPFFrameLowering.cpp
+++ b/lib/Target/BPF/BPFFrameLowering.cpp
@@ -1,9 +1,8 @@
//===-- BPFFrameLowering.cpp - BPF Frame Information ----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/BPF/BPFFrameLowering.h b/lib/Target/BPF/BPFFrameLowering.h
index b4ffa0713fa6..2dc6277d2244 100644
--- a/lib/Target/BPF/BPFFrameLowering.h
+++ b/lib/Target/BPF/BPFFrameLowering.h
@@ -1,9 +1,8 @@
//===-- BPFFrameLowering.h - Define frame lowering for BPF -----*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/BPF/BPFISelDAGToDAG.cpp b/lib/Target/BPF/BPFISelDAGToDAG.cpp
index 8b9bc08e144f..1bd705c55188 100644
--- a/lib/Target/BPF/BPFISelDAGToDAG.cpp
+++ b/lib/Target/BPF/BPFISelDAGToDAG.cpp
@@ -1,9 +1,8 @@
//===-- BPFISelDAGToDAG.cpp - A dag to dag inst selector for BPF ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/BPF/BPFISelLowering.cpp b/lib/Target/BPF/BPFISelLowering.cpp
index 9272cf692dc9..ff69941d26fb 100644
--- a/lib/Target/BPF/BPFISelLowering.cpp
+++ b/lib/Target/BPF/BPFISelLowering.cpp
@@ -1,9 +1,8 @@
//===-- BPFISelLowering.cpp - BPF DAG Lowering Implementation ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -106,7 +105,8 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM,
if (STI.getHasAlu32()) {
setOperationAction(ISD::BSWAP, MVT::i32, Promote);
- setOperationAction(ISD::BR_CC, MVT::i32, Promote);
+ setOperationAction(ISD::BR_CC, MVT::i32,
+ STI.getHasJmp32() ? Custom : Promote);
}
setOperationAction(ISD::CTTZ, MVT::i64, Custom);
@@ -163,6 +163,7 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM,
// CPU/Feature control
HasAlu32 = STI.getHasAlu32();
+ HasJmp32 = STI.getHasJmp32();
HasJmpExt = STI.getHasJmpExt();
}
@@ -507,7 +508,7 @@ SDValue BPFTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
NegateCC(LHS, RHS, CC);
return DAG.getNode(BPFISD::BR_CC, DL, Op.getValueType(), Chain, LHS, RHS,
- DAG.getConstant(CC, DL, MVT::i64), Dest);
+ DAG.getConstant(CC, DL, LHS.getValueType()), Dest);
}
SDValue BPFTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
@@ -677,36 +678,23 @@ BPFTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
int CC = MI.getOperand(3).getImm();
int NewCC;
switch (CC) {
- case ISD::SETGT:
- NewCC = isSelectRROp ? BPF::JSGT_rr : BPF::JSGT_ri;
- break;
- case ISD::SETUGT:
- NewCC = isSelectRROp ? BPF::JUGT_rr : BPF::JUGT_ri;
- break;
- case ISD::SETGE:
- NewCC = isSelectRROp ? BPF::JSGE_rr : BPF::JSGE_ri;
- break;
- case ISD::SETUGE:
- NewCC = isSelectRROp ? BPF::JUGE_rr : BPF::JUGE_ri;
- break;
- case ISD::SETEQ:
- NewCC = isSelectRROp ? BPF::JEQ_rr : BPF::JEQ_ri;
- break;
- case ISD::SETNE:
- NewCC = isSelectRROp ? BPF::JNE_rr : BPF::JNE_ri;
- break;
- case ISD::SETLT:
- NewCC = isSelectRROp ? BPF::JSLT_rr : BPF::JSLT_ri;
- break;
- case ISD::SETULT:
- NewCC = isSelectRROp ? BPF::JULT_rr : BPF::JULT_ri;
- break;
- case ISD::SETLE:
- NewCC = isSelectRROp ? BPF::JSLE_rr : BPF::JSLE_ri;
- break;
- case ISD::SETULE:
- NewCC = isSelectRROp ? BPF::JULE_rr : BPF::JULE_ri;
- break;
+#define SET_NEWCC(X, Y) \
+ case ISD::X: \
+ if (is32BitCmp && HasJmp32) \
+ NewCC = isSelectRROp ? BPF::Y##_rr_32 : BPF::Y##_ri_32; \
+ else \
+ NewCC = isSelectRROp ? BPF::Y##_rr : BPF::Y##_ri; \
+ break
+ SET_NEWCC(SETGT, JSGT);
+ SET_NEWCC(SETUGT, JUGT);
+ SET_NEWCC(SETGE, JSGE);
+ SET_NEWCC(SETUGE, JUGE);
+ SET_NEWCC(SETEQ, JEQ);
+ SET_NEWCC(SETNE, JNE);
+ SET_NEWCC(SETLT, JSLT);
+ SET_NEWCC(SETULT, JULT);
+ SET_NEWCC(SETLE, JSLE);
+ SET_NEWCC(SETULE, JULE);
default:
report_fatal_error("unimplemented select CondCode " + Twine(CC));
}
@@ -724,13 +712,13 @@ BPFTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
//
// We simply do extension for all situations in this method, but we will
// try to remove those unnecessary in BPFMIPeephole pass.
- if (is32BitCmp)
+ if (is32BitCmp && !HasJmp32)
LHS = EmitSubregExt(MI, BB, LHS, isSignedCmp);
if (isSelectRROp) {
unsigned RHS = MI.getOperand(2).getReg();
- if (is32BitCmp)
+ if (is32BitCmp && !HasJmp32)
RHS = EmitSubregExt(MI, BB, RHS, isSignedCmp);
BuildMI(BB, DL, TII.get(NewCC)).addReg(LHS).addReg(RHS).addMBB(Copy1MBB);
diff --git a/lib/Target/BPF/BPFISelLowering.h b/lib/Target/BPF/BPFISelLowering.h
index 0aa8b9ac57ac..b81bf4e1320d 100644
--- a/lib/Target/BPF/BPFISelLowering.h
+++ b/lib/Target/BPF/BPFISelLowering.h
@@ -1,9 +1,8 @@
//===-- BPFISelLowering.h - BPF DAG Lowering Interface ----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -56,6 +55,7 @@ public:
MachineBasicBlock *BB) const override;
bool getHasAlu32() const { return HasAlu32; }
+ bool getHasJmp32() const { return HasJmp32; }
bool getHasJmpExt() const { return HasJmpExt; }
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
@@ -66,6 +66,7 @@ public:
private:
// Control Instruction Selection Features
bool HasAlu32;
+ bool HasJmp32;
bool HasJmpExt;
SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
@@ -100,7 +101,7 @@ private:
EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
- MachineFunction &MF) const override {
+ const AttributeList &FuncAttributes) const override {
return Size >= 8 ? MVT::i64 : MVT::i32;
}
diff --git a/lib/Target/BPF/BPFInstrFormats.td b/lib/Target/BPF/BPFInstrFormats.td
index 92d4a62fd875..9f00dc85d789 100644
--- a/lib/Target/BPF/BPFInstrFormats.td
+++ b/lib/Target/BPF/BPFInstrFormats.td
@@ -1,9 +1,8 @@
//===-- BPFInstrFormats.td - BPF Instruction Formats -------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -17,6 +16,7 @@ def BPF_ST : BPFOpClass<0x2>;
def BPF_STX : BPFOpClass<0x3>;
def BPF_ALU : BPFOpClass<0x4>;
def BPF_JMP : BPFOpClass<0x5>;
+def BPF_JMP32 : BPFOpClass<0x6>;
def BPF_ALU64 : BPFOpClass<0x7>;
class BPFSrcType<bits<1> val> {
diff --git a/lib/Target/BPF/BPFInstrInfo.cpp b/lib/Target/BPF/BPFInstrInfo.cpp
index 4d47debdaa74..932f718d5490 100644
--- a/lib/Target/BPF/BPFInstrInfo.cpp
+++ b/lib/Target/BPF/BPFInstrInfo.cpp
@@ -1,9 +1,8 @@
//===-- BPFInstrInfo.cpp - BPF Instruction Information ----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/BPF/BPFInstrInfo.h b/lib/Target/BPF/BPFInstrInfo.h
index fb65a86a6d18..e4bd757da560 100644
--- a/lib/Target/BPF/BPFInstrInfo.h
+++ b/lib/Target/BPF/BPFInstrInfo.h
@@ -1,9 +1,8 @@
//===-- BPFInstrInfo.h - BPF Instruction Information ------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/BPF/BPFInstrInfo.td b/lib/Target/BPF/BPFInstrInfo.td
index aaef5fb706e0..c44702a78ec8 100644
--- a/lib/Target/BPF/BPFInstrInfo.td
+++ b/lib/Target/BPF/BPFInstrInfo.td
@@ -1,9 +1,8 @@
//===-- BPFInstrInfo.td - Target Description for BPF Target ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -102,6 +101,26 @@ def BPF_CC_LTU : PatLeaf<(i64 imm),
[{return (N->getZExtValue() == ISD::SETULT);}]>;
def BPF_CC_LEU : PatLeaf<(i64 imm),
[{return (N->getZExtValue() == ISD::SETULE);}]>;
+def BPF_CC_EQ_32 : PatLeaf<(i32 imm),
+ [{return (N->getZExtValue() == ISD::SETEQ);}]>;
+def BPF_CC_NE_32 : PatLeaf<(i32 imm),
+ [{return (N->getZExtValue() == ISD::SETNE);}]>;
+def BPF_CC_GE_32 : PatLeaf<(i32 imm),
+ [{return (N->getZExtValue() == ISD::SETGE);}]>;
+def BPF_CC_GT_32 : PatLeaf<(i32 imm),
+ [{return (N->getZExtValue() == ISD::SETGT);}]>;
+def BPF_CC_GTU_32 : PatLeaf<(i32 imm),
+ [{return (N->getZExtValue() == ISD::SETUGT);}]>;
+def BPF_CC_GEU_32 : PatLeaf<(i32 imm),
+ [{return (N->getZExtValue() == ISD::SETUGE);}]>;
+def BPF_CC_LE_32 : PatLeaf<(i32 imm),
+ [{return (N->getZExtValue() == ISD::SETLE);}]>;
+def BPF_CC_LT_32 : PatLeaf<(i32 imm),
+ [{return (N->getZExtValue() == ISD::SETLT);}]>;
+def BPF_CC_LTU_32 : PatLeaf<(i32 imm),
+ [{return (N->getZExtValue() == ISD::SETULT);}]>;
+def BPF_CC_LEU_32 : PatLeaf<(i32 imm),
+ [{return (N->getZExtValue() == ISD::SETULE);}]>;
// For arithmetic and jump instructions the 8-bit 'code'
// field is divided into three parts:
@@ -167,23 +186,57 @@ class JMP_RI<BPFJumpOp Opc, string OpcodeStr, PatLeaf Cond>
let BPFClass = BPF_JMP;
}
-multiclass J<BPFJumpOp Opc, string OpcodeStr, PatLeaf Cond> {
+class JMP_RR_32<BPFJumpOp Opc, string OpcodeStr, PatLeaf Cond>
+ : TYPE_ALU_JMP<Opc.Value, BPF_X.Value,
+ (outs),
+ (ins GPR32:$dst, GPR32:$src, brtarget:$BrDst),
+ "if $dst "#OpcodeStr#" $src goto $BrDst",
+ [(BPFbrcc i32:$dst, i32:$src, Cond, bb:$BrDst)]> {
+ bits<4> dst;
+ bits<4> src;
+ bits<16> BrDst;
+
+ let Inst{55-52} = src;
+ let Inst{51-48} = dst;
+ let Inst{47-32} = BrDst;
+ let BPFClass = BPF_JMP32;
+}
+
+class JMP_RI_32<BPFJumpOp Opc, string OpcodeStr, PatLeaf Cond>
+ : TYPE_ALU_JMP<Opc.Value, BPF_K.Value,
+ (outs),
+ (ins GPR32:$dst, i32imm:$imm, brtarget:$BrDst),
+ "if $dst "#OpcodeStr#" $imm goto $BrDst",
+ [(BPFbrcc i32:$dst, i32immSExt32:$imm, Cond, bb:$BrDst)]> {
+ bits<4> dst;
+ bits<16> BrDst;
+ bits<32> imm;
+
+ let Inst{51-48} = dst;
+ let Inst{47-32} = BrDst;
+ let Inst{31-0} = imm;
+ let BPFClass = BPF_JMP32;
+}
+
+multiclass J<BPFJumpOp Opc, string OpcodeStr, PatLeaf Cond, PatLeaf Cond32> {
def _rr : JMP_RR<Opc, OpcodeStr, Cond>;
def _ri : JMP_RI<Opc, OpcodeStr, Cond>;
+ def _rr_32 : JMP_RR_32<Opc, OpcodeStr, Cond32>;
+ def _ri_32 : JMP_RI_32<Opc, OpcodeStr, Cond32>;
}
let isBranch = 1, isTerminator = 1, hasDelaySlot=0 in {
// cmp+goto instructions
-defm JEQ : J<BPF_JEQ, "==", BPF_CC_EQ>;
-defm JUGT : J<BPF_JGT, ">", BPF_CC_GTU>;
-defm JUGE : J<BPF_JGE, ">=", BPF_CC_GEU>;
-defm JNE : J<BPF_JNE, "!=", BPF_CC_NE>;
-defm JSGT : J<BPF_JSGT, "s>", BPF_CC_GT>;
-defm JSGE : J<BPF_JSGE, "s>=", BPF_CC_GE>;
-defm JULT : J<BPF_JLT, "<", BPF_CC_LTU>;
-defm JULE : J<BPF_JLE, "<=", BPF_CC_LEU>;
-defm JSLT : J<BPF_JSLT, "s<", BPF_CC_LT>;
-defm JSLE : J<BPF_JSLE, "s<=", BPF_CC_LE>;
+defm JEQ : J<BPF_JEQ, "==", BPF_CC_EQ, BPF_CC_EQ_32>;
+defm JUGT : J<BPF_JGT, ">", BPF_CC_GTU, BPF_CC_GTU_32>;
+defm JUGE : J<BPF_JGE, ">=", BPF_CC_GEU, BPF_CC_GEU_32>;
+defm JNE : J<BPF_JNE, "!=", BPF_CC_NE, BPF_CC_NE_32>;
+defm JSGT : J<BPF_JSGT, "s>", BPF_CC_GT, BPF_CC_GT_32>;
+defm JSGE : J<BPF_JSGE, "s>=", BPF_CC_GE, BPF_CC_GE_32>;
+defm JULT : J<BPF_JLT, "<", BPF_CC_LTU, BPF_CC_LTU_32>;
+defm JULE : J<BPF_JLE, "<=", BPF_CC_LEU, BPF_CC_LEU_32>;
+defm JSLT : J<BPF_JSLT, "s<", BPF_CC_LT, BPF_CC_LT_32>;
+defm JSLE : J<BPF_JSLE, "s<=", BPF_CC_LE, BPF_CC_LE_32>;
}
// ALU instructions
@@ -561,11 +614,31 @@ class XADD<BPFWidthModifer SizeOp, string OpcodeStr, PatFrag OpNode>
let BPFClass = BPF_STX;
}
+class XADD32<BPFWidthModifer SizeOp, string OpcodeStr, PatFrag OpNode>
+ : TYPE_LD_ST<BPF_XADD.Value, SizeOp.Value,
+ (outs GPR32:$dst),
+ (ins MEMri:$addr, GPR32:$val),
+ "lock *("#OpcodeStr#" *)($addr) += $val",
+ [(set GPR32:$dst, (OpNode ADDRri:$addr, GPR32:$val))]> {
+ bits<4> dst;
+ bits<20> addr;
+
+ let Inst{51-48} = addr{19-16}; // base reg
+ let Inst{55-52} = dst;
+ let Inst{47-32} = addr{15-0}; // offset
+ let BPFClass = BPF_STX;
+}
+
let Constraints = "$dst = $val" in {
-def XADD32 : XADD<BPF_W, "u32", atomic_load_add_32>;
-def XADD64 : XADD<BPF_DW, "u64", atomic_load_add_64>;
-// undefined def XADD16 : XADD<1, "xadd16", atomic_load_add_16>;
-// undefined def XADD8 : XADD<2, "xadd8", atomic_load_add_8>;
+ let Predicates = [BPFNoALU32] in {
+ def XADDW : XADD<BPF_W, "u32", atomic_load_add_32>;
+ }
+
+ let Predicates = [BPFHasALU32], DecoderNamespace = "BPFALU32" in {
+ def XADDW32 : XADD32<BPF_W, "u32", atomic_load_add_32>;
+ }
+
+ def XADDD : XADD<BPF_DW, "u64", atomic_load_add_64>;
}
// bswap16, bswap32, bswap64
diff --git a/lib/Target/BPF/BPFMCInstLower.cpp b/lib/Target/BPF/BPFMCInstLower.cpp
index c8528e867310..846798a63cb7 100644
--- a/lib/Target/BPF/BPFMCInstLower.cpp
+++ b/lib/Target/BPF/BPFMCInstLower.cpp
@@ -1,9 +1,8 @@
//=-- BPFMCInstLower.cpp - Convert BPF MachineInstr to an MCInst ------------=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/BPF/BPFMCInstLower.h b/lib/Target/BPF/BPFMCInstLower.h
index eac811f4cf88..0622d20814d3 100644
--- a/lib/Target/BPF/BPFMCInstLower.h
+++ b/lib/Target/BPF/BPFMCInstLower.h
@@ -1,9 +1,8 @@
//===-- BPFMCInstLower.h - Lower MachineInstr to MCInst ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/BPF/BPFMIChecking.cpp b/lib/Target/BPF/BPFMIChecking.cpp
index 0a311378e777..4c46289656b4 100644
--- a/lib/Target/BPF/BPFMIChecking.cpp
+++ b/lib/Target/BPF/BPFMIChecking.cpp
@@ -1,9 +1,8 @@
//===-------------- BPFMIChecking.cpp - MI Checking Legality -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -62,14 +61,107 @@ void BPFMIPreEmitChecking::initialize(MachineFunction &MFParm) {
LLVM_DEBUG(dbgs() << "*** BPF PreEmit checking pass ***\n\n");
}
+// Make sure all Defs of XADD are dead, meaning any result of XADD insn is not
+// used.
+//
+// NOTE: BPF backend hasn't enabled sub-register liveness track, so when the
+// source and destination operands of XADD are GPR32, there is no sub-register
+// dead info. If we rely on the generic MachineInstr::allDefsAreDead, then we
+// will raise false alarm on GPR32 Def.
+//
+// To support GPR32 Def, ideally we could just enable sub-registr liveness track
+// on BPF backend, then allDefsAreDead could work on GPR32 Def. This requires
+// implementing TargetSubtargetInfo::enableSubRegLiveness on BPF.
+//
+// However, sub-register liveness tracking module inside LLVM is actually
+// designed for the situation where one register could be split into more than
+// one sub-registers for which case each sub-register could have their own
+// liveness and kill one of them doesn't kill others. So, tracking liveness for
+// each make sense.
+//
+// For BPF, each 64-bit register could only have one 32-bit sub-register. This
+// is exactly the case which LLVM think brings no benefits for doing
+// sub-register tracking, because the live range of sub-register must always
+// equal to its parent register, therefore liveness tracking is disabled even
+// the back-end has implemented enableSubRegLiveness. The detailed information
+// is at r232695:
+//
+// Author: Matthias Braun <matze@braunis.de>
+// Date: Thu Mar 19 00:21:58 2015 +0000
+// Do not track subregister liveness when it brings no benefits
+//
+// Hence, for BPF, we enhance MachineInstr::allDefsAreDead. Given the solo
+// sub-register always has the same liveness as its parent register, LLVM is
+// already attaching a implicit 64-bit register Def whenever the there is
+// a sub-register Def. The liveness of the implicit 64-bit Def is available.
+// For example, for "lock *(u32 *)(r0 + 4) += w9", the MachineOperand info could
+// be:
+//
+// $w9 = XADDW32 killed $r0, 4, $w9(tied-def 0),
+// implicit killed $r9, implicit-def dead $r9
+//
+// Even though w9 is not marked as Dead, the parent register r9 is marked as
+// Dead correctly, and it is safe to use such information or our purpose.
+static bool hasLiveDefs(const MachineInstr &MI, const TargetRegisterInfo *TRI) {
+ const MCRegisterClass *GPR64RegClass =
+ &BPFMCRegisterClasses[BPF::GPRRegClassID];
+ std::vector<unsigned> GPR32LiveDefs;
+ std::vector<unsigned> GPR64DeadDefs;
+
+ for (const MachineOperand &MO : MI.operands()) {
+ bool RegIsGPR64;
+
+ if (!MO.isReg() || MO.isUse())
+ continue;
+
+ RegIsGPR64 = GPR64RegClass->contains(MO.getReg());
+ if (!MO.isDead()) {
+ // It is a GPR64 live Def, we are sure it is live. */
+ if (RegIsGPR64)
+ return true;
+ // It is a GPR32 live Def, we are unsure whether it is really dead due to
+ // no sub-register liveness tracking. Push it to vector for deferred
+ // check.
+ GPR32LiveDefs.push_back(MO.getReg());
+ continue;
+ }
+
+ // Record any GPR64 dead Def as some unmarked GPR32 could be alias of its
+ // low 32-bit.
+ if (RegIsGPR64)
+ GPR64DeadDefs.push_back(MO.getReg());
+ }
+
+ // No GPR32 live Def, safe to return false.
+ if (GPR32LiveDefs.empty())
+ return false;
+
+ // No GPR64 dead Def, so all those GPR32 live Def can't have alias, therefore
+ // must be truely live, safe to return true.
+ if (GPR64DeadDefs.empty())
+ return true;
+
+ // Otherwise, return true if any aliased SuperReg of GPR32 is not dead.
+ std::vector<unsigned>::iterator search_begin = GPR64DeadDefs.begin();
+ std::vector<unsigned>::iterator search_end = GPR64DeadDefs.end();
+ for (auto I : GPR32LiveDefs)
+ for (MCSuperRegIterator SR(I, TRI); SR.isValid(); ++SR)
+ if (std::find(search_begin, search_end, *SR) == search_end)
+ return true;
+
+ return false;
+}
+
void BPFMIPreEmitChecking::checkingIllegalXADD(void) {
for (MachineBasicBlock &MBB : *MF) {
for (MachineInstr &MI : MBB) {
- if (MI.getOpcode() != BPF::XADD32 && MI.getOpcode() != BPF::XADD64)
+ if (MI.getOpcode() != BPF::XADDW &&
+ MI.getOpcode() != BPF::XADDD &&
+ MI.getOpcode() != BPF::XADDW32)
continue;
LLVM_DEBUG(MI.dump());
- if (!MI.allDefsAreDead()) {
+ if (hasLiveDefs(MI, TRI)) {
DebugLoc Empty;
const DebugLoc &DL = MI.getDebugLoc();
if (DL != Empty)
diff --git a/lib/Target/BPF/BPFMIPeephole.cpp b/lib/Target/BPF/BPFMIPeephole.cpp
index 9e984d0facfb..156ba793e359 100644
--- a/lib/Target/BPF/BPFMIPeephole.cpp
+++ b/lib/Target/BPF/BPFMIPeephole.cpp
@@ -1,9 +1,8 @@
//===-------------- BPFMIPeephole.cpp - MI Peephole Cleanups -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/BPF/BPFMISimplifyPatchable.cpp b/lib/Target/BPF/BPFMISimplifyPatchable.cpp
new file mode 100644
index 000000000000..e9114d7187e3
--- /dev/null
+++ b/lib/Target/BPF/BPFMISimplifyPatchable.cpp
@@ -0,0 +1,163 @@
+//===----- BPFMISimplifyPatchable.cpp - MI Simplify Patchable Insts -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass targets a subset of instructions like below
+// ld_imm64 r1, @global
+// ldd r2, r1, 0
+// add r3, struct_base_reg, r2
+//
+// Here @global should either present a AMA (abstruct member access) or
+// a patchable extern variable. And these two kinds of accesses
+// are subject to bpf load time patching. After this pass, the
+// code becomes
+// ld_imm64 r1, @global
+// add r3, struct_base_reg, r1
+//
+// Eventually, at BTF output stage, a relocation record will be generated
+// for ld_imm64 which should be replaced later by bpf loader:
+// r1 = <calculated offset> or <to_be_patched_extern_val>
+// add r3, struct_base_reg, r1
+// or
+// ld_imm64 r1, <to_be_patched_extern_val>
+// add r3, struct_base_reg, r1
+//
+//===----------------------------------------------------------------------===//
+
+#include "BPF.h"
+#include "BPFCORE.h"
+#include "BPFInstrInfo.h"
+#include "BPFTargetMachine.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "bpf-mi-simplify-patchable"
+
+namespace {
+
+struct BPFMISimplifyPatchable : public MachineFunctionPass {
+
+ static char ID;
+ const BPFInstrInfo *TII;
+ MachineFunction *MF;
+
+ BPFMISimplifyPatchable() : MachineFunctionPass(ID) {
+ initializeBPFMISimplifyPatchablePass(*PassRegistry::getPassRegistry());
+ }
+
+private:
+ // Initialize class variables.
+ void initialize(MachineFunction &MFParm);
+
+ bool removeLD(void);
+
+public:
+ // Main entry point for this pass.
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ if (!skipFunction(MF.getFunction())) {
+ initialize(MF);
+ }
+ return removeLD();
+ }
+};
+
+// Initialize class variables.
+void BPFMISimplifyPatchable::initialize(MachineFunction &MFParm) {
+ MF = &MFParm;
+ TII = MF->getSubtarget<BPFSubtarget>().getInstrInfo();
+ LLVM_DEBUG(dbgs() << "*** BPF simplify patchable insts pass ***\n\n");
+}
+
+/// Remove unneeded Load instructions.
+bool BPFMISimplifyPatchable::removeLD() {
+ MachineRegisterInfo *MRI = &MF->getRegInfo();
+ MachineInstr *ToErase = nullptr;
+ bool Changed = false;
+
+ for (MachineBasicBlock &MBB : *MF) {
+ for (MachineInstr &MI : MBB) {
+ if (ToErase) {
+ ToErase->eraseFromParent();
+ ToErase = nullptr;
+ }
+
+ // Ensure the register format is LOAD <reg>, <reg>, 0
+ if (MI.getOpcode() != BPF::LDD && MI.getOpcode() != BPF::LDW &&
+ MI.getOpcode() != BPF::LDH && MI.getOpcode() != BPF::LDB &&
+ MI.getOpcode() != BPF::LDW32 && MI.getOpcode() != BPF::LDH32 &&
+ MI.getOpcode() != BPF::LDB32)
+ continue;
+
+ if (!MI.getOperand(0).isReg() || !MI.getOperand(1).isReg())
+ continue;
+
+ if (!MI.getOperand(2).isImm() || MI.getOperand(2).getImm())
+ continue;
+
+ unsigned DstReg = MI.getOperand(0).getReg();
+ unsigned SrcReg = MI.getOperand(1).getReg();
+ int64_t ImmVal = MI.getOperand(2).getImm();
+
+ MachineInstr *DefInst = MRI->getUniqueVRegDef(SrcReg);
+ if (!DefInst)
+ continue;
+
+ bool IsCandidate = false;
+ if (DefInst->getOpcode() == BPF::LD_imm64) {
+ const MachineOperand &MO = DefInst->getOperand(1);
+ if (MO.isGlobal()) {
+ const GlobalValue *GVal = MO.getGlobal();
+ auto *GVar = dyn_cast<GlobalVariable>(GVal);
+ if (GVar) {
+ // Global variables representing structure offset or
+ // patchable extern globals.
+ if (GVar->hasAttribute(BPFCoreSharedInfo::AmaAttr)) {
+ assert(ImmVal == 0);
+ IsCandidate = true;
+ } else if (!GVar->hasInitializer() && GVar->hasExternalLinkage() &&
+ GVar->getSection() ==
+ BPFCoreSharedInfo::PatchableExtSecName) {
+ if (ImmVal == 0)
+ IsCandidate = true;
+ else
+ errs() << "WARNING: unhandled patchable extern "
+ << GVar->getName() << " with load offset " << ImmVal
+ << "\n";
+ }
+ }
+ }
+ }
+
+ if (!IsCandidate)
+ continue;
+
+ auto Begin = MRI->use_begin(DstReg), End = MRI->use_end();
+ decltype(End) NextI;
+ for (auto I = Begin; I != End; I = NextI) {
+ NextI = std::next(I);
+ I->setReg(SrcReg);
+ }
+
+ ToErase = &MI;
+ Changed = true;
+ }
+ }
+
+ return Changed;
+}
+
+} // namespace
+
+INITIALIZE_PASS(BPFMISimplifyPatchable, DEBUG_TYPE,
+ "BPF PreEmit SimplifyPatchable", false, false)
+
+char BPFMISimplifyPatchable::ID = 0;
+FunctionPass *llvm::createBPFMISimplifyPatchablePass() {
+ return new BPFMISimplifyPatchable();
+}
diff --git a/lib/Target/BPF/BPFRegisterInfo.cpp b/lib/Target/BPF/BPFRegisterInfo.cpp
index 635c11113151..714af06e11d9 100644
--- a/lib/Target/BPF/BPFRegisterInfo.cpp
+++ b/lib/Target/BPF/BPFRegisterInfo.cpp
@@ -1,9 +1,8 @@
//===-- BPFRegisterInfo.cpp - BPF Register Information ----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -122,6 +121,6 @@ void BPFRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
}
}
-unsigned BPFRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+Register BPFRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
return BPF::R10;
}
diff --git a/lib/Target/BPF/BPFRegisterInfo.h b/lib/Target/BPF/BPFRegisterInfo.h
index 4202850e9eb9..e7b870b720a4 100644
--- a/lib/Target/BPF/BPFRegisterInfo.h
+++ b/lib/Target/BPF/BPFRegisterInfo.h
@@ -1,9 +1,8 @@
//===-- BPFRegisterInfo.h - BPF Register Information Impl -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -33,7 +32,7 @@ struct BPFRegisterInfo : public BPFGenRegisterInfo {
unsigned FIOperandNum,
RegScavenger *RS = nullptr) const override;
- unsigned getFrameRegister(const MachineFunction &MF) const override;
+ Register getFrameRegister(const MachineFunction &MF) const override;
};
}
diff --git a/lib/Target/BPF/BPFRegisterInfo.td b/lib/Target/BPF/BPFRegisterInfo.td
index da1d6b505f84..88dec063be70 100644
--- a/lib/Target/BPF/BPFRegisterInfo.td
+++ b/lib/Target/BPF/BPFRegisterInfo.td
@@ -1,9 +1,8 @@
//===-- BPFRegisterInfo.td - BPF Register defs -------------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/BPF/BPFSelectionDAGInfo.cpp b/lib/Target/BPF/BPFSelectionDAGInfo.cpp
index 24d5f59bbfd7..a711294048ba 100644
--- a/lib/Target/BPF/BPFSelectionDAGInfo.cpp
+++ b/lib/Target/BPF/BPFSelectionDAGInfo.cpp
@@ -1,9 +1,8 @@
//===-- BPFSelectionDAGInfo.cpp - BPF SelectionDAG Info -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/BPF/BPFSelectionDAGInfo.h b/lib/Target/BPF/BPFSelectionDAGInfo.h
index 19d3c5769573..fb88c32ceb0c 100644
--- a/lib/Target/BPF/BPFSelectionDAGInfo.h
+++ b/lib/Target/BPF/BPFSelectionDAGInfo.h
@@ -1,9 +1,8 @@
//===-- BPFSelectionDAGInfo.h - BPF SelectionDAG Info -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/BPF/BPFSubtarget.cpp b/lib/Target/BPF/BPFSubtarget.cpp
index 56780bd9d46f..ab3452501b95 100644
--- a/lib/Target/BPF/BPFSubtarget.cpp
+++ b/lib/Target/BPF/BPFSubtarget.cpp
@@ -1,9 +1,8 @@
//===-- BPFSubtarget.cpp - BPF Subtarget Information ----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -36,6 +35,7 @@ BPFSubtarget &BPFSubtarget::initializeSubtargetDependencies(StringRef CPU,
void BPFSubtarget::initializeEnvironment() {
HasJmpExt = false;
+ HasJmp32 = false;
HasAlu32 = false;
UseDwarfRIS = false;
}
@@ -49,6 +49,11 @@ void BPFSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
HasJmpExt = true;
return;
}
+ if (CPU == "v3") {
+ HasJmpExt = true;
+ HasJmp32 = true;
+ return;
+ }
}
BPFSubtarget::BPFSubtarget(const Triple &TT, const std::string &CPU,
diff --git a/lib/Target/BPF/BPFSubtarget.h b/lib/Target/BPF/BPFSubtarget.h
index 60e56435fe4c..3da6a026ab7e 100644
--- a/lib/Target/BPF/BPFSubtarget.h
+++ b/lib/Target/BPF/BPFSubtarget.h
@@ -1,9 +1,8 @@
//===-- BPFSubtarget.h - Define Subtarget for the BPF -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -48,6 +47,10 @@ protected:
// whether the cpu supports jmp ext
bool HasJmpExt;
+ // whether the cpu supports jmp32 ext.
+ // NOTE: jmp32 is not enabled when alu32 enabled.
+ bool HasJmp32;
+
// whether the cpu supports alu32 instructions.
bool HasAlu32;
@@ -66,6 +69,7 @@ public:
// subtarget options. Definition of function is auto generated by tblgen.
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
bool getHasJmpExt() const { return HasJmpExt; }
+ bool getHasJmp32() const { return HasJmp32; }
bool getHasAlu32() const { return HasAlu32; }
bool getUseDwarfRIS() const { return UseDwarfRIS; }
diff --git a/lib/Target/BPF/BPFTargetMachine.cpp b/lib/Target/BPF/BPFTargetMachine.cpp
index 350465b118ed..24c0ff0f7f15 100644
--- a/lib/Target/BPF/BPFTargetMachine.cpp
+++ b/lib/Target/BPF/BPFTargetMachine.cpp
@@ -1,9 +1,8 @@
//===-- BPFTargetMachine.cpp - Define TargetMachine for BPF ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -14,6 +13,7 @@
#include "BPFTargetMachine.h"
#include "BPF.h"
#include "MCTargetDesc/BPFMCAsmInfo.h"
+#include "TargetInfo/BPFTargetInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/CodeGen/TargetPassConfig.h"
@@ -34,6 +34,7 @@ extern "C" void LLVMInitializeBPFTarget() {
RegisterTargetMachine<BPFTargetMachine> Z(getTheBPFTarget());
PassRegistry &PR = *PassRegistry::getPassRegistry();
+ initializeBPFAbstractMemberAccessPass(PR);
initializeBPFMIPeepholePass(PR);
}
@@ -68,6 +69,7 @@ BPFTargetMachine::BPFTargetMachine(const Target &T, const Triple &TT,
static_cast<BPFMCAsmInfo *>(const_cast<MCAsmInfo *>(AsmInfo.get()));
MAI->setDwarfUsesRelocationsAcrossSections(!Subtarget.getUseDwarfRIS());
}
+
namespace {
// BPF Code Generator Pass Configuration Options.
class BPFPassConfig : public TargetPassConfig {
@@ -79,6 +81,7 @@ public:
return getTM<BPFTargetMachine>();
}
+ void addIRPasses() override;
bool addInstSelector() override;
void addMachineSSAOptimization() override;
void addPreEmitPass() override;
@@ -89,6 +92,13 @@ TargetPassConfig *BPFTargetMachine::createPassConfig(PassManagerBase &PM) {
return new BPFPassConfig(*this, PM);
}
+void BPFPassConfig::addIRPasses() {
+
+ addPass(createBPFAbstractMemberAccess());
+
+ TargetPassConfig::addIRPasses();
+}
+
// Install an instruction selector pass using
// the ISelDag to gen BPF code.
bool BPFPassConfig::addInstSelector() {
@@ -98,6 +108,8 @@ bool BPFPassConfig::addInstSelector() {
}
void BPFPassConfig::addMachineSSAOptimization() {
+ addPass(createBPFMISimplifyPatchablePass());
+
// The default implementation must be called first as we want eBPF
// Peephole ran at last.
TargetPassConfig::addMachineSSAOptimization();
diff --git a/lib/Target/BPF/BPFTargetMachine.h b/lib/Target/BPF/BPFTargetMachine.h
index a560dd27335a..beac7bd862da 100644
--- a/lib/Target/BPF/BPFTargetMachine.h
+++ b/lib/Target/BPF/BPFTargetMachine.h
@@ -1,9 +1,8 @@
//===-- BPFTargetMachine.h - Define TargetMachine for BPF --- C++ ---===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/BPF/BTF.def b/lib/Target/BPF/BTF.def
index 54c5bc3cf092..2d2e9a04aa6d 100644
--- a/lib/Target/BPF/BTF.def
+++ b/lib/Target/BPF/BTF.def
@@ -1,9 +1,8 @@
//===- BTF.def - BTF definitions --------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -29,5 +28,7 @@ HANDLE_BTF_KIND(10, CONST)
HANDLE_BTF_KIND(11, RESTRICT)
HANDLE_BTF_KIND(12, FUNC)
HANDLE_BTF_KIND(13, FUNC_PROTO)
+HANDLE_BTF_KIND(14, VAR)
+HANDLE_BTF_KIND(15, DATASEC)
#undef HANDLE_BTF_KIND
diff --git a/lib/Target/BPF/BTF.h b/lib/Target/BPF/BTF.h
index 1e1680faf1b8..ad56716710a6 100644
--- a/lib/Target/BPF/BTF.h
+++ b/lib/Target/BPF/BTF.h
@@ -1,9 +1,8 @@
//===-- BTF.h --------------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -18,7 +17,7 @@
///
/// The binary layout for .BTF.ext section:
/// struct ExtHeader
-/// FuncInfo and LineInfo subsections
+/// FuncInfo, LineInfo, OffsetReloc and ExternReloc subsections
/// The FuncInfo subsection is defined as below:
/// BTFFuncInfo Size
/// struct SecFuncInfo for ELF section #1
@@ -33,6 +32,20 @@
/// struct SecLineInfo for ELF section #2
/// A number of struct BPFLineInfo for ELF section #2
/// ...
+/// The OffsetReloc subsection is defined as below:
+/// BPFOffsetReloc Size
+/// struct SecOffsetReloc for ELF section #1
+/// A number of struct BPFOffsetReloc for ELF section #1
+/// struct SecOffsetReloc for ELF section #2
+/// A number of struct BPFOffsetReloc for ELF section #2
+/// ...
+/// The ExternReloc subsection is defined as below:
+/// BPFExternReloc Size
+/// struct SecExternReloc for ELF section #1
+/// A number of struct BPFExternReloc for ELF section #1
+/// struct SecExternReloc for ELF section #2
+/// A number of struct BPFExternReloc for ELF section #2
+/// ...
///
/// The section formats are also defined at
/// https://github.com/torvalds/linux/blob/master/include/uapi/linux/btf.h
@@ -50,16 +63,21 @@ enum : uint32_t { MAGIC = 0xeB9F, VERSION = 1 };
/// Sizes in bytes of various things in the BTF format.
enum {
HeaderSize = 24,
- ExtHeaderSize = 24,
+ ExtHeaderSize = 40,
CommonTypeSize = 12,
BTFArraySize = 12,
BTFEnumSize = 8,
BTFMemberSize = 12,
BTFParamSize = 8,
+ BTFDataSecVarSize = 12,
SecFuncInfoSize = 8,
SecLineInfoSize = 8,
+ SecOffsetRelocSize = 8,
+ SecExternRelocSize = 8,
BPFFuncInfoSize = 8,
- BPFLineInfoSize = 16
+ BPFLineInfoSize = 16,
+ BPFOffsetRelocSize = 12,
+ BPFExternRelocSize = 8,
};
/// The .BTF section header definition.
@@ -77,7 +95,7 @@ struct Header {
};
enum : uint32_t {
- MAX_VLEN = 0xffff ///< Max # of struct/union/enum members or func args
+ MAX_VLEN = 0xffff ///< Max # of struct/union/enum members or func args
};
enum TypeKinds : uint8_t {
@@ -104,7 +122,7 @@ struct CommonType {
/// "Size" tells the size of the type it is describing.
///
/// "Type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT,
- /// FUNC and FUNC_PROTO.
+ /// FUNC, FUNC_PROTO and VAR.
/// "Type" is a type_id referring to another type.
union {
uint32_t Size;
@@ -122,7 +140,11 @@ struct CommonType {
// BTF_INT_BITS(VAL) : ((VAL) & 0x000000ff)
/// Attributes stored in the INT_ENCODING.
-enum : uint8_t { INT_SIGNED = (1 << 0), INT_CHAR = (1 << 1), INT_BOOL = (1 << 2) };
+enum : uint8_t {
+ INT_SIGNED = (1 << 0),
+ INT_CHAR = (1 << 1),
+ INT_BOOL = (1 << 2)
+};
/// BTF_KIND_ENUM is followed by multiple "struct BTFEnum".
/// The exact number of btf_enum is stored in the vlen (of the
@@ -163,6 +185,23 @@ struct BTFParam {
uint32_t Type;
};
+/// Variable scoping information.
+enum : uint8_t {
+ VAR_STATIC = 0, ///< Linkage: InternalLinkage
+ VAR_GLOBAL_ALLOCATED = 1, ///< Linkage: ExternalLinkage
+ VAR_GLOBAL_TENTATIVE = 2, ///< Linkage: CommonLinkage
+ VAR_GLOBAL_EXTERNAL = 3, ///< Linkage: ExternalLinkage
+};
+
+/// BTF_KIND_DATASEC are followed by multiple "struct BTFDataSecVar".
+/// The exist number of BTFDataSec is stored in the vlen (of the info
+/// in "struct CommonType").
+struct BTFDataSec {
+ uint32_t Type; ///< A BTF_KIND_VAR type
+ uint32_t Offset; ///< In-section offset
+ uint32_t Size; ///< Occupied memory size
+};
+
/// The .BTF.ext section header definition.
struct ExtHeader {
uint16_t Magic;
@@ -170,10 +209,14 @@ struct ExtHeader {
uint8_t Flags;
uint32_t HdrLen;
- uint32_t FuncInfoOff; ///< Offset of func info section
- uint32_t FuncInfoLen; ///< Length of func info section
- uint32_t LineInfoOff; ///< Offset of line info section
- uint32_t LineInfoLen; ///< Length of line info section
+ uint32_t FuncInfoOff; ///< Offset of func info section
+ uint32_t FuncInfoLen; ///< Length of func info section
+ uint32_t LineInfoOff; ///< Offset of line info section
+ uint32_t LineInfoLen; ///< Length of line info section
+ uint32_t OffsetRelocOff; ///< Offset of offset reloc section
+ uint32_t OffsetRelocLen; ///< Length of offset reloc section
+ uint32_t ExternRelocOff; ///< Offset of extern reloc section
+ uint32_t ExternRelocLen; ///< Length of extern reloc section
};
/// Specifying one function info.
@@ -199,10 +242,35 @@ struct BPFLineInfo {
/// Specifying line info's in one section.
struct SecLineInfo {
- uint32_t SecNameOff; ///< Section name index in the .BTF string tble
+ uint32_t SecNameOff; ///< Section name index in the .BTF string table
uint32_t NumLineInfo; ///< Number of line info's in this section
};
+/// Specifying one offset relocation.
+struct BPFOffsetReloc {
+ uint32_t InsnOffset; ///< Byte offset in this section
+ uint32_t TypeID; ///< TypeID for the relocation
+ uint32_t OffsetNameOff; ///< The string to traverse types
+};
+
+/// Specifying offset relocation's in one section.
+struct SecOffsetReloc {
+ uint32_t SecNameOff; ///< Section name index in the .BTF string table
+ uint32_t NumOffsetReloc; ///< Number of offset reloc's in this section
+};
+
+/// Specifying one offset relocation.
+struct BPFExternReloc {
+ uint32_t InsnOffset; ///< Byte offset in this section
+ uint32_t ExternNameOff; ///< The string for external variable
+};
+
+/// Specifying extern relocation's in one section.
+struct SecExternReloc {
+ uint32_t SecNameOff; ///< Section name index in the .BTF string table
+ uint32_t NumExternReloc; ///< Number of extern reloc's in this section
+};
+
} // End namespace BTF.
} // End namespace llvm.
diff --git a/lib/Target/BPF/BTFDebug.cpp b/lib/Target/BPF/BTFDebug.cpp
index 96efea4ba8ee..fa35c6619e21 100644
--- a/lib/Target/BPF/BTFDebug.cpp
+++ b/lib/Target/BPF/BTFDebug.cpp
@@ -1,9 +1,8 @@
//===- BTFDebug.cpp - BTF Generator ---------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -12,6 +11,9 @@
//===----------------------------------------------------------------------===//
#include "BTFDebug.h"
+#include "BPF.h"
+#include "BPFCORE.h"
+#include "MCTargetDesc/BPFMCTargetDesc.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
@@ -19,8 +21,7 @@
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
-#include <fstream>
-#include <sstream>
+#include "llvm/Support/LineIterator.h"
using namespace llvm;
@@ -39,8 +40,9 @@ void BTFTypeBase::emitType(MCStreamer &OS) {
OS.EmitIntValue(BTFType.Size, 4);
}
-BTFTypeDerived::BTFTypeDerived(const DIDerivedType *DTy, unsigned Tag)
- : DTy(DTy) {
+BTFTypeDerived::BTFTypeDerived(const DIDerivedType *DTy, unsigned Tag,
+ bool NeedsFixup)
+ : DTy(DTy), NeedsFixup(NeedsFixup) {
switch (Tag) {
case dwarf::DW_TAG_pointer_type:
Kind = BTF::BTF_KIND_PTR;
@@ -64,10 +66,17 @@ BTFTypeDerived::BTFTypeDerived(const DIDerivedType *DTy, unsigned Tag)
}
void BTFTypeDerived::completeType(BTFDebug &BDebug) {
+ if (IsCompleted)
+ return;
+ IsCompleted = true;
+
BTFType.NameOff = BDebug.addString(DTy->getName());
+ if (NeedsFixup)
+ return;
+
// The base type for PTR/CONST/VOLATILE could be void.
- const DIType *ResolvedType = DTy->getBaseType().resolve();
+ const DIType *ResolvedType = DTy->getBaseType();
if (!ResolvedType) {
assert((Kind == BTF::BTF_KIND_PTR || Kind == BTF::BTF_KIND_CONST ||
Kind == BTF::BTF_KIND_VOLATILE) &&
@@ -80,6 +89,10 @@ void BTFTypeDerived::completeType(BTFDebug &BDebug) {
void BTFTypeDerived::emitType(MCStreamer &OS) { BTFTypeBase::emitType(OS); }
+void BTFTypeDerived::setPointeeType(uint32_t PointeeType) {
+ BTFType.Type = PointeeType;
+}
+
/// Represent a struct/union forward declaration.
BTFTypeFwd::BTFTypeFwd(StringRef Name, bool IsUnion) : Name(Name) {
Kind = BTF::BTF_KIND_FWD;
@@ -88,6 +101,10 @@ BTFTypeFwd::BTFTypeFwd(StringRef Name, bool IsUnion) : Name(Name) {
}
void BTFTypeFwd::completeType(BTFDebug &BDebug) {
+ if (IsCompleted)
+ return;
+ IsCompleted = true;
+
BTFType.NameOff = BDebug.addString(Name);
}
@@ -121,6 +138,10 @@ BTFTypeInt::BTFTypeInt(uint32_t Encoding, uint32_t SizeInBits,
}
void BTFTypeInt::completeType(BTFDebug &BDebug) {
+ if (IsCompleted)
+ return;
+ IsCompleted = true;
+
BTFType.NameOff = BDebug.addString(Name);
}
@@ -137,6 +158,10 @@ BTFTypeEnum::BTFTypeEnum(const DICompositeType *ETy, uint32_t VLen) : ETy(ETy) {
}
void BTFTypeEnum::completeType(BTFDebug &BDebug) {
+ if (IsCompleted)
+ return;
+ IsCompleted = true;
+
BTFType.NameOff = BDebug.addString(ETy->getName());
DINodeArray Elements = ETy->getElements();
@@ -159,45 +184,29 @@ void BTFTypeEnum::emitType(MCStreamer &OS) {
}
}
-BTFTypeArray::BTFTypeArray(const DICompositeType *ATy) : ATy(ATy) {
+BTFTypeArray::BTFTypeArray(uint32_t ElemTypeId, uint32_t ElemSize,
+ uint32_t NumElems)
+ : ElemSize(ElemSize) {
Kind = BTF::BTF_KIND_ARRAY;
+ BTFType.NameOff = 0;
BTFType.Info = Kind << 24;
+ BTFType.Size = 0;
+
+ ArrayInfo.ElemType = ElemTypeId;
+ ArrayInfo.Nelems = NumElems;
}
-/// Represent a BTF array. BTF does not record array dimensions,
-/// so conceptually a BTF array is a one-dimensional array.
+/// Represent a BTF array.
void BTFTypeArray::completeType(BTFDebug &BDebug) {
- BTFType.NameOff = BDebug.addString(ATy->getName());
- BTFType.Size = 0;
-
- auto *BaseType = ATy->getBaseType().resolve();
- ArrayInfo.ElemType = BDebug.getTypeId(BaseType);
+ if (IsCompleted)
+ return;
+ IsCompleted = true;
// The IR does not really have a type for the index.
// A special type for array index should have been
// created during initial type traversal. Just
// retrieve that type id.
ArrayInfo.IndexType = BDebug.getArrayIndexTypeId();
-
- // Get the number of array elements.
- // If the array size is 0, set the number of elements as 0.
- // Otherwise, recursively traverse the base types to
- // find the element size. The number of elements is
- // the totoal array size in bits divided by
- // element size in bits.
- uint64_t ArraySizeInBits = ATy->getSizeInBits();
- if (!ArraySizeInBits) {
- ArrayInfo.Nelems = 0;
- } else {
- uint32_t BaseTypeSize = BaseType->getSizeInBits();
- while (!BaseTypeSize) {
- const auto *DDTy = cast<DIDerivedType>(BaseType);
- BaseType = DDTy->getBaseType().resolve();
- assert(BaseType);
- BaseTypeSize = BaseType->getSizeInBits();
- }
- ArrayInfo.Nelems = ATy->getSizeInBits() / BaseTypeSize;
- }
}
void BTFTypeArray::emitType(MCStreamer &OS) {
@@ -207,6 +216,12 @@ void BTFTypeArray::emitType(MCStreamer &OS) {
OS.EmitIntValue(ArrayInfo.Nelems, 4);
}
+void BTFTypeArray::getLocInfo(uint32_t Loc, uint32_t &LocOffset,
+ uint32_t &ElementTypeId) {
+ ElementTypeId = ArrayInfo.ElemType;
+ LocOffset = Loc * ElemSize;
+}
+
/// Represent either a struct or a union.
BTFTypeStruct::BTFTypeStruct(const DICompositeType *STy, bool IsStruct,
bool HasBitField, uint32_t Vlen)
@@ -217,6 +232,10 @@ BTFTypeStruct::BTFTypeStruct(const DICompositeType *STy, bool IsStruct,
}
void BTFTypeStruct::completeType(BTFDebug &BDebug) {
+ if (IsCompleted)
+ return;
+ IsCompleted = true;
+
BTFType.NameOff = BDebug.addString(STy->getName());
// Add struct/union members.
@@ -232,7 +251,7 @@ void BTFTypeStruct::completeType(BTFDebug &BDebug) {
} else {
BTFMember.Offset = DDTy->getOffsetInBits();
}
- BTFMember.Type = BDebug.getTypeId(DDTy->getBaseType().resolve());
+ BTFMember.Type = BDebug.getTypeId(DDTy->getBaseType());
Members.push_back(BTFMember);
}
}
@@ -247,6 +266,17 @@ void BTFTypeStruct::emitType(MCStreamer &OS) {
}
}
+std::string BTFTypeStruct::getName() { return STy->getName(); }
+
+void BTFTypeStruct::getMemberInfo(uint32_t Loc, uint32_t &MemberOffset,
+ uint32_t &MemberType) {
+ MemberType = Members[Loc].Type;
+ MemberOffset =
+ HasBitField ? Members[Loc].Offset & 0xffffff : Members[Loc].Offset;
+}
+
+uint32_t BTFTypeStruct::getStructSize() { return STy->getSizeInBits() >> 3; }
+
/// The Func kind represents both subprogram and pointee of function
/// pointers. If the FuncName is empty, it represents a pointee of function
/// pointer. Otherwise, it represents a subprogram. The func arg names
@@ -261,8 +291,12 @@ BTFTypeFuncProto::BTFTypeFuncProto(
}
void BTFTypeFuncProto::completeType(BTFDebug &BDebug) {
+ if (IsCompleted)
+ return;
+ IsCompleted = true;
+
DITypeRefArray Elements = STy->getTypeArray();
- auto RetType = Elements[0].resolve();
+ auto RetType = Elements[0];
BTFType.Type = RetType ? BDebug.getTypeId(RetType) : 0;
BTFType.NameOff = 0;
@@ -270,7 +304,7 @@ void BTFTypeFuncProto::completeType(BTFDebug &BDebug) {
// to represent the vararg, encode the NameOff/Type to be 0.
for (unsigned I = 1, N = Elements.size(); I < N; ++I) {
struct BTF::BTFParam Param;
- auto Element = Elements[I].resolve();
+ auto Element = Elements[I];
if (Element) {
Param.NameOff = BDebug.addString(FuncArgNames[I]);
Param.Type = BDebug.getTypeId(Element);
@@ -298,11 +332,54 @@ BTFTypeFunc::BTFTypeFunc(StringRef FuncName, uint32_t ProtoTypeId)
}
void BTFTypeFunc::completeType(BTFDebug &BDebug) {
+ if (IsCompleted)
+ return;
+ IsCompleted = true;
+
BTFType.NameOff = BDebug.addString(Name);
}
void BTFTypeFunc::emitType(MCStreamer &OS) { BTFTypeBase::emitType(OS); }
+BTFKindVar::BTFKindVar(StringRef VarName, uint32_t TypeId, uint32_t VarInfo)
+ : Name(VarName) {
+ Kind = BTF::BTF_KIND_VAR;
+ BTFType.Info = Kind << 24;
+ BTFType.Type = TypeId;
+ Info = VarInfo;
+}
+
+void BTFKindVar::completeType(BTFDebug &BDebug) {
+ BTFType.NameOff = BDebug.addString(Name);
+}
+
+void BTFKindVar::emitType(MCStreamer &OS) {
+ BTFTypeBase::emitType(OS);
+ OS.EmitIntValue(Info, 4);
+}
+
+BTFKindDataSec::BTFKindDataSec(AsmPrinter *AsmPrt, std::string SecName)
+ : Asm(AsmPrt), Name(SecName) {
+ Kind = BTF::BTF_KIND_DATASEC;
+ BTFType.Info = Kind << 24;
+ BTFType.Size = 0;
+}
+
+void BTFKindDataSec::completeType(BTFDebug &BDebug) {
+ BTFType.NameOff = BDebug.addString(Name);
+ BTFType.Info |= Vars.size();
+}
+
+void BTFKindDataSec::emitType(MCStreamer &OS) {
+ BTFTypeBase::emitType(OS);
+
+ for (const auto &V : Vars) {
+ OS.EmitIntValue(std::get<0>(V), 4);
+ Asm->EmitLabelReference(std::get<1>(V), 4);
+ OS.EmitIntValue(std::get<2>(V), 4);
+ }
+}
+
uint32_t BTFStringTable::addString(StringRef S) {
// Check whether the string already exists.
for (auto &OffsetM : OffsetToIdMap) {
@@ -319,15 +396,18 @@ uint32_t BTFStringTable::addString(StringRef S) {
BTFDebug::BTFDebug(AsmPrinter *AP)
: DebugHandlerBase(AP), OS(*Asm->OutStreamer), SkipInstruction(false),
- LineInfoGenerated(false), SecNameOff(0), ArrayIndexTypeId(0) {
+ LineInfoGenerated(false), SecNameOff(0), ArrayIndexTypeId(0),
+ MapDefNotCollected(true) {
addString("\0");
}
-void BTFDebug::addType(std::unique_ptr<BTFTypeBase> TypeEntry,
- const DIType *Ty) {
+uint32_t BTFDebug::addType(std::unique_ptr<BTFTypeBase> TypeEntry,
+ const DIType *Ty) {
TypeEntry->setId(TypeEntries.size() + 1);
- DIToIdMap[Ty] = TypeEntry->getId();
+ uint32_t Id = TypeEntry->getId();
+ DIToIdMap[Ty] = Id;
TypeEntries.push_back(std::move(TypeEntry));
+ return Id;
}
uint32_t BTFDebug::addType(std::unique_ptr<BTFTypeBase> TypeEntry) {
@@ -337,7 +417,7 @@ uint32_t BTFDebug::addType(std::unique_ptr<BTFTypeBase> TypeEntry) {
return Id;
}
-void BTFDebug::visitBasicType(const DIBasicType *BTy) {
+void BTFDebug::visitBasicType(const DIBasicType *BTy, uint32_t &TypeId) {
// Only int types are supported in BTF.
uint32_t Encoding = BTy->getEncoding();
if (Encoding != dwarf::DW_ATE_boolean && Encoding != dwarf::DW_ATE_signed &&
@@ -350,7 +430,7 @@ void BTFDebug::visitBasicType(const DIBasicType *BTy) {
// DIToIdMap for cross-type reference check.
auto TypeEntry = llvm::make_unique<BTFTypeInt>(
Encoding, BTy->getSizeInBits(), BTy->getOffsetInBits(), BTy->getName());
- addType(std::move(TypeEntry), BTy);
+ TypeId = addType(std::move(TypeEntry), BTy);
}
/// Handle subprogram or subroutine types.
@@ -371,16 +451,17 @@ void BTFDebug::visitSubroutineType(
if (ForSubprog)
TypeId = addType(std::move(TypeEntry)); // For subprogram
else
- addType(std::move(TypeEntry), STy); // For func ptr
+ TypeId = addType(std::move(TypeEntry), STy); // For func ptr
// Visit return type and func arg types.
for (const auto Element : Elements) {
- visitTypeEntry(Element.resolve());
+ visitTypeEntry(Element);
}
}
/// Handle structure/union types.
-void BTFDebug::visitStructType(const DICompositeType *CTy, bool IsStruct) {
+void BTFDebug::visitStructType(const DICompositeType *CTy, bool IsStruct,
+ uint32_t &TypeId) {
const DINodeArray Elements = CTy->getElements();
uint32_t VLen = Elements.size();
if (VLen > BTF::MAX_VLEN)
@@ -398,16 +479,49 @@ void BTFDebug::visitStructType(const DICompositeType *CTy, bool IsStruct) {
auto TypeEntry =
llvm::make_unique<BTFTypeStruct>(CTy, IsStruct, HasBitField, VLen);
- addType(std::move(TypeEntry), CTy);
+ StructTypes.push_back(TypeEntry.get());
+ TypeId = addType(std::move(TypeEntry), CTy);
// Visit all struct members.
for (const auto *Element : Elements)
visitTypeEntry(cast<DIDerivedType>(Element));
}
-void BTFDebug::visitArrayType(const DICompositeType *CTy) {
- auto TypeEntry = llvm::make_unique<BTFTypeArray>(CTy);
- addType(std::move(TypeEntry), CTy);
+void BTFDebug::visitArrayType(const DICompositeType *CTy, uint32_t &TypeId) {
+ // Visit array element type.
+ uint32_t ElemTypeId, ElemSize;
+ const DIType *ElemType = CTy->getBaseType();
+ visitTypeEntry(ElemType, ElemTypeId, false, false);
+ ElemSize = ElemType->getSizeInBits() >> 3;
+
+ if (!CTy->getSizeInBits()) {
+ auto TypeEntry = llvm::make_unique<BTFTypeArray>(ElemTypeId, 0, 0);
+ ArrayTypes.push_back(TypeEntry.get());
+ ElemTypeId = addType(std::move(TypeEntry), CTy);
+ } else {
+ // Visit array dimensions.
+ DINodeArray Elements = CTy->getElements();
+ for (int I = Elements.size() - 1; I >= 0; --I) {
+ if (auto *Element = dyn_cast_or_null<DINode>(Elements[I]))
+ if (Element->getTag() == dwarf::DW_TAG_subrange_type) {
+ const DISubrange *SR = cast<DISubrange>(Element);
+ auto *CI = SR->getCount().dyn_cast<ConstantInt *>();
+ int64_t Count = CI->getSExtValue();
+
+ auto TypeEntry =
+ llvm::make_unique<BTFTypeArray>(ElemTypeId, ElemSize, Count);
+ ArrayTypes.push_back(TypeEntry.get());
+ if (I == 0)
+ ElemTypeId = addType(std::move(TypeEntry), CTy);
+ else
+ ElemTypeId = addType(std::move(TypeEntry));
+ ElemSize = ElemSize * Count;
+ }
+ }
+ }
+
+ // The array TypeId is the type id of the outermost dimension.
+ TypeId = ElemTypeId;
// The IR does not have a type for array index while BTF wants one.
// So create an array index type if there is none.
@@ -416,85 +530,162 @@ void BTFDebug::visitArrayType(const DICompositeType *CTy) {
0, "__ARRAY_SIZE_TYPE__");
ArrayIndexTypeId = addType(std::move(TypeEntry));
}
-
- // Visit array element type.
- visitTypeEntry(CTy->getBaseType().resolve());
}
-void BTFDebug::visitEnumType(const DICompositeType *CTy) {
+void BTFDebug::visitEnumType(const DICompositeType *CTy, uint32_t &TypeId) {
DINodeArray Elements = CTy->getElements();
uint32_t VLen = Elements.size();
if (VLen > BTF::MAX_VLEN)
return;
auto TypeEntry = llvm::make_unique<BTFTypeEnum>(CTy, VLen);
- addType(std::move(TypeEntry), CTy);
+ TypeId = addType(std::move(TypeEntry), CTy);
// No need to visit base type as BTF does not encode it.
}
/// Handle structure/union forward declarations.
-void BTFDebug::visitFwdDeclType(const DICompositeType *CTy, bool IsUnion) {
+void BTFDebug::visitFwdDeclType(const DICompositeType *CTy, bool IsUnion,
+ uint32_t &TypeId) {
auto TypeEntry = llvm::make_unique<BTFTypeFwd>(CTy->getName(), IsUnion);
- addType(std::move(TypeEntry), CTy);
+ TypeId = addType(std::move(TypeEntry), CTy);
}
/// Handle structure, union, array and enumeration types.
-void BTFDebug::visitCompositeType(const DICompositeType *CTy) {
+void BTFDebug::visitCompositeType(const DICompositeType *CTy,
+ uint32_t &TypeId) {
auto Tag = CTy->getTag();
if (Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type) {
// Handle forward declaration differently as it does not have members.
if (CTy->isForwardDecl())
- visitFwdDeclType(CTy, Tag == dwarf::DW_TAG_union_type);
+ visitFwdDeclType(CTy, Tag == dwarf::DW_TAG_union_type, TypeId);
else
- visitStructType(CTy, Tag == dwarf::DW_TAG_structure_type);
+ visitStructType(CTy, Tag == dwarf::DW_TAG_structure_type, TypeId);
} else if (Tag == dwarf::DW_TAG_array_type)
- visitArrayType(CTy);
+ visitArrayType(CTy, TypeId);
else if (Tag == dwarf::DW_TAG_enumeration_type)
- visitEnumType(CTy);
+ visitEnumType(CTy, TypeId);
}
/// Handle pointer, typedef, const, volatile, restrict and member types.
-void BTFDebug::visitDerivedType(const DIDerivedType *DTy) {
+void BTFDebug::visitDerivedType(const DIDerivedType *DTy, uint32_t &TypeId,
+ bool CheckPointer, bool SeenPointer) {
unsigned Tag = DTy->getTag();
+ /// Try to avoid chasing pointees, esp. structure pointees which may
+ /// unnecessary bring in a lot of types.
+ if (CheckPointer && !SeenPointer) {
+ SeenPointer = Tag == dwarf::DW_TAG_pointer_type;
+ }
+
+ if (CheckPointer && SeenPointer) {
+ const DIType *Base = DTy->getBaseType();
+ if (Base) {
+ if (const auto *CTy = dyn_cast<DICompositeType>(Base)) {
+ auto CTag = CTy->getTag();
+ if ((CTag == dwarf::DW_TAG_structure_type ||
+ CTag == dwarf::DW_TAG_union_type) &&
+ !CTy->isForwardDecl()) {
+ /// Find a candidate, generate a fixup. Later on the struct/union
+ /// pointee type will be replaced with either a real type or
+ /// a forward declaration.
+ auto TypeEntry = llvm::make_unique<BTFTypeDerived>(DTy, Tag, true);
+ auto &Fixup = FixupDerivedTypes[CTy->getName()];
+ Fixup.first = CTag == dwarf::DW_TAG_union_type;
+ Fixup.second.push_back(TypeEntry.get());
+ TypeId = addType(std::move(TypeEntry), DTy);
+ return;
+ }
+ }
+ }
+ }
+
if (Tag == dwarf::DW_TAG_pointer_type || Tag == dwarf::DW_TAG_typedef ||
Tag == dwarf::DW_TAG_const_type || Tag == dwarf::DW_TAG_volatile_type ||
Tag == dwarf::DW_TAG_restrict_type) {
- auto TypeEntry = llvm::make_unique<BTFTypeDerived>(DTy, Tag);
- addType(std::move(TypeEntry), DTy);
+ auto TypeEntry = llvm::make_unique<BTFTypeDerived>(DTy, Tag, false);
+ TypeId = addType(std::move(TypeEntry), DTy);
} else if (Tag != dwarf::DW_TAG_member) {
return;
}
// Visit base type of pointer, typedef, const, volatile, restrict or
// struct/union member.
- visitTypeEntry(DTy->getBaseType().resolve());
+ uint32_t TempTypeId = 0;
+ if (Tag == dwarf::DW_TAG_member)
+ visitTypeEntry(DTy->getBaseType(), TempTypeId, true, false);
+ else
+ visitTypeEntry(DTy->getBaseType(), TempTypeId, CheckPointer, SeenPointer);
}
-void BTFDebug::visitTypeEntry(const DIType *Ty) {
- if (!Ty || DIToIdMap.find(Ty) != DIToIdMap.end())
+void BTFDebug::visitTypeEntry(const DIType *Ty, uint32_t &TypeId,
+ bool CheckPointer, bool SeenPointer) {
+ if (!Ty || DIToIdMap.find(Ty) != DIToIdMap.end()) {
+ TypeId = DIToIdMap[Ty];
return;
+ }
- uint32_t TypeId;
if (const auto *BTy = dyn_cast<DIBasicType>(Ty))
- visitBasicType(BTy);
+ visitBasicType(BTy, TypeId);
else if (const auto *STy = dyn_cast<DISubroutineType>(Ty))
visitSubroutineType(STy, false, std::unordered_map<uint32_t, StringRef>(),
TypeId);
else if (const auto *CTy = dyn_cast<DICompositeType>(Ty))
- visitCompositeType(CTy);
+ visitCompositeType(CTy, TypeId);
else if (const auto *DTy = dyn_cast<DIDerivedType>(Ty))
- visitDerivedType(DTy);
+ visitDerivedType(DTy, TypeId, CheckPointer, SeenPointer);
else
llvm_unreachable("Unknown DIType");
}
+void BTFDebug::visitTypeEntry(const DIType *Ty) {
+ uint32_t TypeId;
+ visitTypeEntry(Ty, TypeId, false, false);
+}
+
+void BTFDebug::visitMapDefType(const DIType *Ty, uint32_t &TypeId) {
+ if (!Ty || DIToIdMap.find(Ty) != DIToIdMap.end()) {
+ TypeId = DIToIdMap[Ty];
+ return;
+ }
+
+ // MapDef type is a struct type
+ const auto *CTy = dyn_cast<DICompositeType>(Ty);
+ if (!CTy)
+ return;
+
+ auto Tag = CTy->getTag();
+ if (Tag != dwarf::DW_TAG_structure_type || CTy->isForwardDecl())
+ return;
+
+ // Record this type
+ const DINodeArray Elements = CTy->getElements();
+ bool HasBitField = false;
+ for (const auto *Element : Elements) {
+ auto E = cast<DIDerivedType>(Element);
+ if (E->isBitField()) {
+ HasBitField = true;
+ break;
+ }
+ }
+
+ auto TypeEntry =
+ llvm::make_unique<BTFTypeStruct>(CTy, true, HasBitField, Elements.size());
+ StructTypes.push_back(TypeEntry.get());
+ TypeId = addType(std::move(TypeEntry), CTy);
+
+ // Visit all struct members
+ for (const auto *Element : Elements) {
+ const auto *MemberType = cast<DIDerivedType>(Element);
+ visitTypeEntry(MemberType->getBaseType());
+ }
+}
+
/// Read file contents from the actual file or from the source
std::string BTFDebug::populateFileContent(const DISubprogram *SP) {
auto File = SP->getFile();
std::string FileName;
- if (File->getDirectory().size())
+ if (!File->getFilename().startswith("/") && File->getDirectory().size())
FileName = File->getDirectory().str() + "/" + File->getFilename().str();
else
FileName = File->getFilename();
@@ -507,16 +698,16 @@ std::string BTFDebug::populateFileContent(const DISubprogram *SP) {
std::string Line;
Content.push_back(Line); // Line 0 for empty string
+ std::unique_ptr<MemoryBuffer> Buf;
auto Source = File->getSource();
- if (Source) {
- std::istringstream InputString(Source.getValue());
- while (std::getline(InputString, Line))
- Content.push_back(Line);
- } else {
- std::ifstream InputFile(FileName);
- while (std::getline(InputFile, Line))
- Content.push_back(Line);
- }
+ if (Source)
+ Buf = MemoryBuffer::getMemBufferCopy(*Source);
+ else if (ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
+ MemoryBuffer::getFile(FileName))
+ Buf = std::move(*BufOrErr);
+ if (Buf)
+ for (line_iterator I(*Buf, false), E; I != E; ++I)
+ Content.push_back(*I);
FileContent[FileName] = Content;
return FileName;
@@ -547,6 +738,10 @@ void BTFDebug::emitCommonHeader() {
}
void BTFDebug::emitBTFSection() {
+ // Do not emit section if no types and only "" string.
+ if (!TypeEntries.size() && StringTable.getSize() == 1)
+ return;
+
MCContext &Ctx = OS.getContext();
OS.SwitchSection(Ctx.getELFSection(".BTF", ELF::SHT_PROGBITS, 0));
@@ -579,6 +774,11 @@ void BTFDebug::emitBTFSection() {
}
void BTFDebug::emitBTFExtSection() {
+ // Do not emit section if empty FuncInfoTable and LineInfoTable.
+ if (!FuncInfoTable.size() && !LineInfoTable.size() &&
+ !OffsetRelocTable.size() && !ExternRelocTable.size())
+ return;
+
MCContext &Ctx = OS.getContext();
OS.SwitchSection(Ctx.getELFSection(".BTF.ext", ELF::SHT_PROGBITS, 0));
@@ -588,6 +788,8 @@ void BTFDebug::emitBTFExtSection() {
// Account for FuncInfo/LineInfo record size as well.
uint32_t FuncLen = 4, LineLen = 4;
+ // Do not account for optional OffsetReloc/ExternReloc.
+ uint32_t OffsetRelocLen = 0, ExternRelocLen = 0;
for (const auto &FuncSec : FuncInfoTable) {
FuncLen += BTF::SecFuncInfoSize;
FuncLen += FuncSec.second.size() * BTF::BPFFuncInfoSize;
@@ -596,11 +798,28 @@ void BTFDebug::emitBTFExtSection() {
LineLen += BTF::SecLineInfoSize;
LineLen += LineSec.second.size() * BTF::BPFLineInfoSize;
}
+ for (const auto &OffsetRelocSec : OffsetRelocTable) {
+ OffsetRelocLen += BTF::SecOffsetRelocSize;
+ OffsetRelocLen += OffsetRelocSec.second.size() * BTF::BPFOffsetRelocSize;
+ }
+ for (const auto &ExternRelocSec : ExternRelocTable) {
+ ExternRelocLen += BTF::SecExternRelocSize;
+ ExternRelocLen += ExternRelocSec.second.size() * BTF::BPFExternRelocSize;
+ }
+
+ if (OffsetRelocLen)
+ OffsetRelocLen += 4;
+ if (ExternRelocLen)
+ ExternRelocLen += 4;
OS.EmitIntValue(0, 4);
OS.EmitIntValue(FuncLen, 4);
OS.EmitIntValue(FuncLen, 4);
OS.EmitIntValue(LineLen, 4);
+ OS.EmitIntValue(FuncLen + LineLen, 4);
+ OS.EmitIntValue(OffsetRelocLen, 4);
+ OS.EmitIntValue(FuncLen + LineLen + OffsetRelocLen, 4);
+ OS.EmitIntValue(ExternRelocLen, 4);
// Emit func_info table.
OS.AddComment("FuncInfo");
@@ -633,6 +852,39 @@ void BTFDebug::emitBTFExtSection() {
OS.EmitIntValue(LineInfo.LineNum << 10 | LineInfo.ColumnNum, 4);
}
}
+
+ // Emit offset reloc table.
+ if (OffsetRelocLen) {
+ OS.AddComment("OffsetReloc");
+ OS.EmitIntValue(BTF::BPFOffsetRelocSize, 4);
+ for (const auto &OffsetRelocSec : OffsetRelocTable) {
+ OS.AddComment("Offset reloc section string offset=" +
+ std::to_string(OffsetRelocSec.first));
+ OS.EmitIntValue(OffsetRelocSec.first, 4);
+ OS.EmitIntValue(OffsetRelocSec.second.size(), 4);
+ for (const auto &OffsetRelocInfo : OffsetRelocSec.second) {
+ Asm->EmitLabelReference(OffsetRelocInfo.Label, 4);
+ OS.EmitIntValue(OffsetRelocInfo.TypeID, 4);
+ OS.EmitIntValue(OffsetRelocInfo.OffsetNameOff, 4);
+ }
+ }
+ }
+
+ // Emit extern reloc table.
+ if (ExternRelocLen) {
+ OS.AddComment("ExternReloc");
+ OS.EmitIntValue(BTF::BPFExternRelocSize, 4);
+ for (const auto &ExternRelocSec : ExternRelocTable) {
+ OS.AddComment("Extern reloc section string offset=" +
+ std::to_string(ExternRelocSec.first));
+ OS.EmitIntValue(ExternRelocSec.first, 4);
+ OS.EmitIntValue(ExternRelocSec.second.size(), 4);
+ for (const auto &ExternRelocInfo : ExternRelocSec.second) {
+ Asm->EmitLabelReference(ExternRelocInfo.Label, 4);
+ OS.EmitIntValue(ExternRelocInfo.ExternNameOff, 4);
+ }
+ }
+ }
}
void BTFDebug::beginFunctionImpl(const MachineFunction *MF) {
@@ -645,18 +897,42 @@ void BTFDebug::beginFunctionImpl(const MachineFunction *MF) {
}
SkipInstruction = false;
+ // Collect MapDef types. Map definition needs to collect
+ // pointee types. Do it first. Otherwise, for the following
+ // case:
+ // struct m { ...};
+ // struct t {
+ // struct m *key;
+ // };
+ // foo(struct t *arg);
+ //
+ // struct mapdef {
+ // ...
+ // struct m *key;
+ // ...
+ // } __attribute__((section(".maps"))) hash_map;
+ //
+ // If subroutine foo is traversed first, a type chain
+ // "ptr->struct m(fwd)" will be created and later on
+ // when traversing mapdef, since "ptr->struct m" exists,
+ // the traversal of "struct m" will be omitted.
+ if (MapDefNotCollected) {
+ processGlobals(true);
+ MapDefNotCollected = false;
+ }
+
// Collect all types locally referenced in this function.
// Use RetainedNodes so we can collect all argument names
// even if the argument is not used.
std::unordered_map<uint32_t, StringRef> FuncArgNames;
for (const DINode *DN : SP->getRetainedNodes()) {
if (const auto *DV = dyn_cast<DILocalVariable>(DN)) {
- visitTypeEntry(DV->getType().resolve());
-
// Collect function arguments for subprogram func type.
uint32_t Arg = DV->getArg();
- if (Arg)
+ if (Arg) {
+ visitTypeEntry(DV->getType());
FuncArgNames[Arg] = DV->getName();
+ }
}
}
@@ -669,6 +945,9 @@ void BTFDebug::beginFunctionImpl(const MachineFunction *MF) {
llvm::make_unique<BTFTypeFunc>(SP->getName(), ProtoTypeId);
uint32_t FuncTypeId = addType(std::move(FuncTypeEntry));
+ for (const auto &TypeEntry : TypeEntries)
+ TypeEntry->completeType(*this);
+
// Construct funcinfo and the first lineinfo for the function.
MCSymbol *FuncLabel = Asm->getFunctionBegin();
BTFFuncInfo FuncInfo;
@@ -691,6 +970,133 @@ void BTFDebug::endFunctionImpl(const MachineFunction *MF) {
SecNameOff = 0;
}
+/// On-demand populate struct types as requested from abstract member
+/// accessing.
+unsigned BTFDebug::populateStructType(const DIType *Ty) {
+ unsigned Id;
+ visitTypeEntry(Ty, Id, false, false);
+ for (const auto &TypeEntry : TypeEntries)
+ TypeEntry->completeType(*this);
+ return Id;
+}
+
+// Find struct/array debuginfo types given a type id.
+void BTFDebug::setTypeFromId(uint32_t TypeId, BTFTypeStruct **PrevStructType,
+ BTFTypeArray **PrevArrayType) {
+ for (const auto &StructType : StructTypes) {
+ if (StructType->getId() == TypeId) {
+ *PrevStructType = StructType;
+ return;
+ }
+ }
+ for (const auto &ArrayType : ArrayTypes) {
+ if (ArrayType->getId() == TypeId) {
+ *PrevArrayType = ArrayType;
+ return;
+ }
+ }
+}
+
+/// Generate a struct member offset relocation.
+void BTFDebug::generateOffsetReloc(const MachineInstr *MI,
+ const MCSymbol *ORSym, DIType *RootTy,
+ StringRef AccessPattern) {
+ BTFTypeStruct *PrevStructType = nullptr;
+ BTFTypeArray *PrevArrayType = nullptr;
+ unsigned RootId = populateStructType(RootTy);
+ setTypeFromId(RootId, &PrevStructType, &PrevArrayType);
+ unsigned RootTySize = PrevStructType->getStructSize();
+
+ BTFOffsetReloc OffsetReloc;
+ OffsetReloc.Label = ORSym;
+ OffsetReloc.OffsetNameOff = addString(AccessPattern.drop_back());
+ OffsetReloc.TypeID = RootId;
+
+ uint32_t Start = 0, End = 0, Offset = 0;
+ bool FirstAccess = true;
+ for (auto C : AccessPattern) {
+ if (C != ':') {
+ End++;
+ } else {
+ std::string SubStr = AccessPattern.substr(Start, End - Start);
+ int Loc = std::stoi(SubStr);
+
+ if (FirstAccess) {
+ Offset = Loc * RootTySize;
+ FirstAccess = false;
+ } else if (PrevStructType) {
+ uint32_t MemberOffset, MemberTypeId;
+ PrevStructType->getMemberInfo(Loc, MemberOffset, MemberTypeId);
+
+ Offset += MemberOffset >> 3;
+ PrevStructType = nullptr;
+ setTypeFromId(MemberTypeId, &PrevStructType, &PrevArrayType);
+ } else if (PrevArrayType) {
+ uint32_t LocOffset, ElementTypeId;
+ PrevArrayType->getLocInfo(Loc, LocOffset, ElementTypeId);
+
+ Offset += LocOffset;
+ PrevArrayType = nullptr;
+ setTypeFromId(ElementTypeId, &PrevStructType, &PrevArrayType);
+ }
+ Start = End + 1;
+ End = Start;
+ }
+ }
+ AccessOffsets[RootTy->getName().str() + ":" + AccessPattern.str()] = Offset;
+ OffsetRelocTable[SecNameOff].push_back(OffsetReloc);
+}
+
+void BTFDebug::processLDimm64(const MachineInstr *MI) {
+ // If the insn is an LD_imm64, the following two cases
+ // will generate an .BTF.ext record.
+ //
+ // If the insn is "r2 = LD_imm64 @__BTF_...",
+ // add this insn into the .BTF.ext OffsetReloc subsection.
+ // Relocation looks like:
+ // . SecName:
+ // . InstOffset
+ // . TypeID
+ // . OffSetNameOff
+ // Later, the insn is replaced with "r2 = <offset>"
+ // where "<offset>" equals to the offset based on current
+ // type definitions.
+ //
+ // If the insn is "r2 = LD_imm64 @VAR" and VAR is
+ // a patchable external global, add this insn into the .BTF.ext
+ // ExternReloc subsection.
+ // Relocation looks like:
+ // . SecName:
+ // . InstOffset
+ // . ExternNameOff
+ // Later, the insn is replaced with "r2 = <value>" or
+ // "LD_imm64 r2, <value>" where "<value>" = 0.
+
+ // check whether this is a candidate or not
+ const MachineOperand &MO = MI->getOperand(1);
+ if (MO.isGlobal()) {
+ const GlobalValue *GVal = MO.getGlobal();
+ auto *GVar = dyn_cast<GlobalVariable>(GVal);
+ if (GVar && GVar->hasAttribute(BPFCoreSharedInfo::AmaAttr)) {
+ MCSymbol *ORSym = OS.getContext().createTempSymbol();
+ OS.EmitLabel(ORSym);
+
+ MDNode *MDN = GVar->getMetadata(LLVMContext::MD_preserve_access_index);
+ DIType *Ty = dyn_cast<DIType>(MDN);
+ generateOffsetReloc(MI, ORSym, Ty, GVar->getName());
+ } else if (GVar && !GVar->hasInitializer() && GVar->hasExternalLinkage() &&
+ GVar->getSection() == BPFCoreSharedInfo::PatchableExtSecName) {
+ MCSymbol *ORSym = OS.getContext().createTempSymbol();
+ OS.EmitLabel(ORSym);
+
+ BTFExternReloc ExternReloc;
+ ExternReloc.Label = ORSym;
+ ExternReloc.ExternNameOff = addString(GVar->getName());
+ ExternRelocTable[SecNameOff].push_back(ExternReloc);
+ }
+ }
+}
+
void BTFDebug::beginInstruction(const MachineInstr *MI) {
DebugHandlerBase::beginInstruction(MI);
@@ -711,6 +1117,9 @@ void BTFDebug::beginInstruction(const MachineInstr *MI) {
return;
}
+ if (MI->getOpcode() == BPF::LD_imm64)
+ processLDimm64(MI);
+
// Skip this instruction if no DebugLoc or the DebugLoc
// is the same as the previous instruction.
const DebugLoc &DL = MI->getDebugLoc();
@@ -739,13 +1148,145 @@ void BTFDebug::beginInstruction(const MachineInstr *MI) {
PrevInstLoc = DL;
}
-void BTFDebug::endModule() {
+void BTFDebug::processGlobals(bool ProcessingMapDef) {
// Collect all types referenced by globals.
const Module *M = MMI->getModule();
- for (const DICompileUnit *CUNode : M->debug_compile_units()) {
- for (const auto *GVE : CUNode->getGlobalVariables()) {
- DIGlobalVariable *GV = GVE->getVariable();
- visitTypeEntry(GV->getType().resolve());
+ for (const GlobalVariable &Global : M->globals()) {
+ // Ignore external globals for now.
+ if (!Global.hasInitializer() && Global.hasExternalLinkage())
+ continue;
+
+ // Decide the section name.
+ StringRef SecName;
+ if (Global.hasSection()) {
+ SecName = Global.getSection();
+ } else {
+ // data, bss, or readonly sections
+ if (Global.isConstant())
+ SecName = ".rodata";
+ else
+ SecName = Global.getInitializer()->isZeroValue() ? ".bss" : ".data";
+ }
+
+ if (ProcessingMapDef != SecName.startswith(".maps"))
+ continue;
+
+ SmallVector<DIGlobalVariableExpression *, 1> GVs;
+ Global.getDebugInfo(GVs);
+ uint32_t GVTypeId = 0;
+ for (auto *GVE : GVs) {
+ if (SecName.startswith(".maps"))
+ visitMapDefType(GVE->getVariable()->getType(), GVTypeId);
+ else
+ visitTypeEntry(GVE->getVariable()->getType(), GVTypeId, false, false);
+ break;
+ }
+
+ // Only support the following globals:
+ // . static variables
+ // . non-static global variables with section attributes
+ // Essentially means:
+ // . .bcc/.data/.rodata DataSec entities only contain static data
+ // . Other DataSec entities contain static or initialized global data.
+ // Initialized global data are mostly used for finding map key/value type
+ // id's. Whether DataSec is readonly or not can be found from
+ // corresponding ELF section flags.
+ auto Linkage = Global.getLinkage();
+ if (Linkage != GlobalValue::InternalLinkage &&
+ (Linkage != GlobalValue::ExternalLinkage || !Global.hasSection()))
+ continue;
+
+ uint32_t GVarInfo = Linkage == GlobalValue::ExternalLinkage
+ ? BTF::VAR_GLOBAL_ALLOCATED
+ : BTF::VAR_STATIC;
+ auto VarEntry =
+ llvm::make_unique<BTFKindVar>(Global.getName(), GVTypeId, GVarInfo);
+ uint32_t VarId = addType(std::move(VarEntry));
+
+ // Find or create a DataSec
+ if (DataSecEntries.find(SecName) == DataSecEntries.end()) {
+ DataSecEntries[SecName] = llvm::make_unique<BTFKindDataSec>(Asm, SecName);
+ }
+
+ // Calculate symbol size
+ const DataLayout &DL = Global.getParent()->getDataLayout();
+ uint32_t Size = DL.getTypeAllocSize(Global.getType()->getElementType());
+
+ DataSecEntries[SecName]->addVar(VarId, Asm->getSymbol(&Global), Size);
+ }
+}
+
+/// Emit proper patchable instructions.
+bool BTFDebug::InstLower(const MachineInstr *MI, MCInst &OutMI) {
+ if (MI->getOpcode() == BPF::LD_imm64) {
+ const MachineOperand &MO = MI->getOperand(1);
+ if (MO.isGlobal()) {
+ const GlobalValue *GVal = MO.getGlobal();
+ auto *GVar = dyn_cast<GlobalVariable>(GVal);
+ if (GVar && GVar->hasAttribute(BPFCoreSharedInfo::AmaAttr)) {
+ MDNode *MDN = GVar->getMetadata(LLVMContext::MD_preserve_access_index);
+ DIType *Ty = dyn_cast<DIType>(MDN);
+ std::string TypeName = Ty->getName();
+ int64_t Imm = AccessOffsets[TypeName + ":" + GVar->getName().str()];
+
+ // Emit "mov ri, <imm>" for abstract member accesses.
+ OutMI.setOpcode(BPF::MOV_ri);
+ OutMI.addOperand(MCOperand::createReg(MI->getOperand(0).getReg()));
+ OutMI.addOperand(MCOperand::createImm(Imm));
+ return true;
+ } else if (GVar && !GVar->hasInitializer() &&
+ GVar->hasExternalLinkage() &&
+ GVar->getSection() == BPFCoreSharedInfo::PatchableExtSecName) {
+ const IntegerType *IntTy = dyn_cast<IntegerType>(GVar->getValueType());
+ assert(IntTy);
+ // For patchable externals, emit "LD_imm64, ri, 0" if the external
+ // variable is 64bit width, emit "mov ri, 0" otherwise.
+ if (IntTy->getBitWidth() == 64)
+ OutMI.setOpcode(BPF::LD_imm64);
+ else
+ OutMI.setOpcode(BPF::MOV_ri);
+ OutMI.addOperand(MCOperand::createReg(MI->getOperand(0).getReg()));
+ OutMI.addOperand(MCOperand::createImm(0));
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+void BTFDebug::endModule() {
+ // Collect MapDef globals if not collected yet.
+ if (MapDefNotCollected) {
+ processGlobals(true);
+ MapDefNotCollected = false;
+ }
+
+ // Collect global types/variables except MapDef globals.
+ processGlobals(false);
+ for (auto &DataSec : DataSecEntries)
+ addType(std::move(DataSec.second));
+
+ // Fixups
+ for (auto &Fixup : FixupDerivedTypes) {
+ StringRef TypeName = Fixup.first;
+ bool IsUnion = Fixup.second.first;
+
+ // Search through struct types
+ uint32_t StructTypeId = 0;
+ for (const auto &StructType : StructTypes) {
+ if (StructType->getName() == TypeName) {
+ StructTypeId = StructType->getId();
+ break;
+ }
+ }
+
+ if (StructTypeId == 0) {
+ auto FwdTypeEntry = llvm::make_unique<BTFTypeFwd>(TypeName, IsUnion);
+ StructTypeId = addType(std::move(FwdTypeEntry));
+ }
+
+ for (auto &DType : Fixup.second.second) {
+ DType->setPointeeType(StructTypeId);
}
}
diff --git a/lib/Target/BPF/BTFDebug.h b/lib/Target/BPF/BTFDebug.h
index afd4ed87f63d..6c0cdde17d9b 100644
--- a/lib/Target/BPF/BTFDebug.h
+++ b/lib/Target/BPF/BTFDebug.h
@@ -1,9 +1,8 @@
//===- BTFDebug.h -----------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -33,10 +32,12 @@ class MachineFunction;
class BTFTypeBase {
protected:
uint8_t Kind;
+ bool IsCompleted;
uint32_t Id;
struct BTF::CommonType BTFType;
public:
+ BTFTypeBase() : IsCompleted(false) {}
virtual ~BTFTypeBase() = default;
void setId(uint32_t Id) { this->Id = Id; }
uint32_t getId() { return Id; }
@@ -55,11 +56,13 @@ public:
/// volatile, typedef and restrict.
class BTFTypeDerived : public BTFTypeBase {
const DIDerivedType *DTy;
+ bool NeedsFixup;
public:
- BTFTypeDerived(const DIDerivedType *Ty, unsigned Tag);
+ BTFTypeDerived(const DIDerivedType *Ty, unsigned Tag, bool NeedsFixup);
void completeType(BTFDebug &BDebug);
void emitType(MCStreamer &OS);
+ void setPointeeType(uint32_t PointeeType);
};
/// Handle struct or union forward declaration.
@@ -101,14 +104,15 @@ public:
/// Handle array type.
class BTFTypeArray : public BTFTypeBase {
- const DICompositeType *ATy;
+ uint32_t ElemSize;
struct BTF::BTFArray ArrayInfo;
public:
- BTFTypeArray(const DICompositeType *ATy);
+ BTFTypeArray(uint32_t ElemTypeId, uint32_t ElemSize, uint32_t NumElems);
uint32_t getSize() { return BTFTypeBase::getSize() + BTF::BTFArraySize; }
void completeType(BTFDebug &BDebug);
void emitType(MCStreamer &OS);
+ void getLocInfo(uint32_t Loc, uint32_t &LocOffset, uint32_t &ElementTypeId);
};
/// Handle struct/union type.
@@ -125,6 +129,9 @@ public:
}
void completeType(BTFDebug &BDebug);
void emitType(MCStreamer &OS);
+ std::string getName();
+ void getMemberInfo(uint32_t Loc, uint32_t &Offset, uint32_t &MemberType);
+ uint32_t getStructSize();
};
/// Handle function pointer.
@@ -154,6 +161,37 @@ public:
void emitType(MCStreamer &OS);
};
+/// Handle variable instances
+class BTFKindVar : public BTFTypeBase {
+ StringRef Name;
+ uint32_t Info;
+
+public:
+ BTFKindVar(StringRef VarName, uint32_t TypeId, uint32_t VarInfo);
+ uint32_t getSize() { return BTFTypeBase::getSize() + 4; }
+ void completeType(BTFDebug &BDebug);
+ void emitType(MCStreamer &OS);
+};
+
+/// Handle data sections
+class BTFKindDataSec : public BTFTypeBase {
+ AsmPrinter *Asm;
+ std::string Name;
+ std::vector<std::tuple<uint32_t, const MCSymbol *, uint32_t>> Vars;
+
+public:
+ BTFKindDataSec(AsmPrinter *AsmPrt, std::string SecName);
+ uint32_t getSize() {
+ return BTFTypeBase::getSize() + BTF::BTFDataSecVarSize * Vars.size();
+ }
+ void addVar(uint32_t Id, const MCSymbol *Sym, uint32_t Size) {
+ Vars.push_back(std::make_tuple(Id, Sym, Size));
+ }
+ std::string getName() { return Name; }
+ void completeType(BTFDebug &BDebug);
+ void emitType(MCStreamer &OS);
+};
+
/// String table.
class BTFStringTable {
/// String table size in bytes.
@@ -189,6 +227,19 @@ struct BTFLineInfo {
uint32_t ColumnNum; ///< the column number
};
+/// Represent one offset relocation.
+struct BTFOffsetReloc {
+ const MCSymbol *Label; ///< MCSymbol identifying insn for the reloc
+ uint32_t TypeID; ///< Type ID
+ uint32_t OffsetNameOff; ///< The string to traverse types
+};
+
+/// Represent one extern relocation.
+struct BTFExternReloc {
+ const MCSymbol *Label; ///< MCSymbol identifying insn for the reloc
+ uint32_t ExternNameOff; ///< The extern variable name
+};
+
/// Collect and emit BTF information.
class BTFDebug : public DebugHandlerBase {
MCStreamer &OS;
@@ -196,17 +247,26 @@ class BTFDebug : public DebugHandlerBase {
bool LineInfoGenerated;
uint32_t SecNameOff;
uint32_t ArrayIndexTypeId;
+ bool MapDefNotCollected;
BTFStringTable StringTable;
std::vector<std::unique_ptr<BTFTypeBase>> TypeEntries;
std::unordered_map<const DIType *, uint32_t> DIToIdMap;
- std::unordered_map<uint32_t, std::vector<BTFFuncInfo>> FuncInfoTable;
- std::unordered_map<uint32_t, std::vector<BTFLineInfo>> LineInfoTable;
+ std::map<uint32_t, std::vector<BTFFuncInfo>> FuncInfoTable;
+ std::map<uint32_t, std::vector<BTFLineInfo>> LineInfoTable;
+ std::map<uint32_t, std::vector<BTFOffsetReloc>> OffsetRelocTable;
+ std::map<uint32_t, std::vector<BTFExternReloc>> ExternRelocTable;
StringMap<std::vector<std::string>> FileContent;
+ std::map<std::string, std::unique_ptr<BTFKindDataSec>> DataSecEntries;
+ std::vector<BTFTypeStruct *> StructTypes;
+ std::vector<BTFTypeArray *> ArrayTypes;
+ std::map<std::string, int64_t> AccessOffsets;
+ std::map<StringRef, std::pair<bool, std::vector<BTFTypeDerived *>>>
+ FixupDerivedTypes;
/// Add types to TypeEntries.
/// @{
/// Add types to TypeEntries and DIToIdMap.
- void addType(std::unique_ptr<BTFTypeBase> TypeEntry, const DIType *Ty);
+ uint32_t addType(std::unique_ptr<BTFTypeBase> TypeEntry, const DIType *Ty);
/// Add types to TypeEntries only and return type id.
uint32_t addType(std::unique_ptr<BTFTypeBase> TypeEntry);
/// @}
@@ -214,17 +274,23 @@ class BTFDebug : public DebugHandlerBase {
/// IR type visiting functions.
/// @{
void visitTypeEntry(const DIType *Ty);
- void visitBasicType(const DIBasicType *BTy);
+ void visitTypeEntry(const DIType *Ty, uint32_t &TypeId, bool CheckPointer,
+ bool SeenPointer);
+ void visitBasicType(const DIBasicType *BTy, uint32_t &TypeId);
void visitSubroutineType(
const DISubroutineType *STy, bool ForSubprog,
const std::unordered_map<uint32_t, StringRef> &FuncArgNames,
uint32_t &TypeId);
- void visitFwdDeclType(const DICompositeType *CTy, bool IsUnion);
- void visitCompositeType(const DICompositeType *CTy);
- void visitStructType(const DICompositeType *STy, bool IsStruct);
- void visitArrayType(const DICompositeType *ATy);
- void visitEnumType(const DICompositeType *ETy);
- void visitDerivedType(const DIDerivedType *DTy);
+ void visitFwdDeclType(const DICompositeType *CTy, bool IsUnion,
+ uint32_t &TypeId);
+ void visitCompositeType(const DICompositeType *CTy, uint32_t &TypeId);
+ void visitStructType(const DICompositeType *STy, bool IsStruct,
+ uint32_t &TypeId);
+ void visitArrayType(const DICompositeType *ATy, uint32_t &TypeId);
+ void visitEnumType(const DICompositeType *ETy, uint32_t &TypeId);
+ void visitDerivedType(const DIDerivedType *DTy, uint32_t &TypeId,
+ bool CheckPointer, bool SeenPointer);
+ void visitMapDefType(const DIType *Ty, uint32_t &TypeId);
/// @}
/// Get the file content for the subprogram. Certain lines of the file
@@ -235,6 +301,23 @@ class BTFDebug : public DebugHandlerBase {
void constructLineInfo(const DISubprogram *SP, MCSymbol *Label, uint32_t Line,
uint32_t Column);
+ /// Generate types and variables for globals.
+ void processGlobals(bool ProcessingMapDef);
+
+ /// Generate one offset relocation record.
+ void generateOffsetReloc(const MachineInstr *MI, const MCSymbol *ORSym,
+ DIType *RootTy, StringRef AccessPattern);
+
+ /// Set the to-be-traversed Struct/Array Type based on TypeId.
+ void setTypeFromId(uint32_t TypeId, BTFTypeStruct **PrevStructType,
+ BTFTypeArray **PrevArrayType);
+
+ /// Populating unprocessed struct type.
+ unsigned populateStructType(const DIType *Ty);
+
+ /// Process LD_imm64 instructions.
+ void processLDimm64(const MachineInstr *MI);
+
/// Emit common header of .BTF and .BTF.ext sections.
void emitCommonHeader();
@@ -254,6 +337,9 @@ protected:
public:
BTFDebug(AsmPrinter *AP);
+ ///
+ bool InstLower(const MachineInstr *MI, MCInst &OutMI);
+
/// Get the special array index type id.
uint32_t getArrayIndexTypeId() {
assert(ArrayIndexTypeId);
diff --git a/lib/Target/BPF/Disassembler/BPFDisassembler.cpp b/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
index 9f80b762fe36..c845524ad657 100644
--- a/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
+++ b/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
@@ -1,9 +1,8 @@
//===- BPFDisassembler.cpp - Disassembler for BPF ---------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -12,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/BPFMCTargetDesc.h"
+#include "TargetInfo/BPFTargetInfo.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
@@ -40,7 +40,7 @@ public:
BPF_STX = 0x3,
BPF_ALU = 0x4,
BPF_JMP = 0x5,
- BPF_RES = 0x6,
+ BPF_JMP32 = 0x6,
BPF_ALU64 = 0x7
};
@@ -172,9 +172,10 @@ DecodeStatus BPFDisassembler::getInstruction(MCInst &Instr, uint64_t &Size,
if (Result == MCDisassembler::Fail) return MCDisassembler::Fail;
uint8_t InstClass = getInstClass(Insn);
+ uint8_t InstMode = getInstMode(Insn);
if ((InstClass == BPF_LDX || InstClass == BPF_STX) &&
getInstSize(Insn) != BPF_DW &&
- getInstMode(Insn) == BPF_MEM &&
+ (InstMode == BPF_MEM || InstMode == BPF_XADD) &&
STI.getFeatureBits()[BPF::ALU32])
Result = decodeInstruction(DecoderTableBPFALU3264, Instr, Insn, Address,
this, STI);
diff --git a/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp b/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
index 1822d8688fa2..ba35a175b9a7 100644
--- a/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
+++ b/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
@@ -1,9 +1,8 @@
//===-- BPFAsmBackend.cpp - BPF Assembler Backend -------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -73,12 +72,12 @@ void BPFAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
bool IsResolved,
const MCSubtargetInfo *STI) const {
if (Fixup.getKind() == FK_SecRel_4 || Fixup.getKind() == FK_SecRel_8) {
- if (Value) {
- MCContext &Ctx = Asm.getContext();
- Ctx.reportError(Fixup.getLoc(),
- "Unsupported relocation: try to compile with -O2 or above, "
- "or check your static variable usage");
- }
+ // The Value is 0 for global variables, and the in-section offset
+ // for static variables. Write to the immediate field of the inst.
+ assert(Value <= UINT32_MAX);
+ support::endian::write<uint32_t>(&Data[Fixup.getOffset() + 4],
+ static_cast<uint32_t>(Value),
+ Endian);
} else if (Fixup.getKind() == FK_Data_4) {
support::endian::write<uint32_t>(&Data[Fixup.getOffset()], Value, Endian);
} else if (Fixup.getKind() == FK_Data_8) {
diff --git a/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp b/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp
index 32e79d0f527e..057bbf5c3b06 100644
--- a/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp
+++ b/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp
@@ -1,9 +1,8 @@
//===-- BPFELFObjectWriter.cpp - BPF ELF Writer ---------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -51,21 +50,33 @@ unsigned BPFELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
case FK_Data_8:
return ELF::R_BPF_64_64;
case FK_Data_4:
- // .BTF.ext generates FK_Data_4 relocations for
- // insn offset by creating temporary labels.
- // The insn offset is within the code section and
- // already been fulfilled by applyFixup(). No
- // further relocation is needed.
if (const MCSymbolRefExpr *A = Target.getSymA()) {
- if (A->getSymbol().isTemporary()) {
- MCSection &Section = A->getSymbol().getSection();
+ const MCSymbol &Sym = A->getSymbol();
+
+ if (Sym.isDefined()) {
+ MCSection &Section = Sym.getSection();
const MCSectionELF *SectionELF = dyn_cast<MCSectionELF>(&Section);
assert(SectionELF && "Null section for reloc symbol");
- // The reloc symbol should be in text section.
unsigned Flags = SectionELF->getFlags();
- if ((Flags & ELF::SHF_ALLOC) && (Flags & ELF::SHF_EXECINSTR))
- return ELF::R_BPF_NONE;
+
+ if (Sym.isTemporary()) {
+ // .BTF.ext generates FK_Data_4 relocations for
+ // insn offset by creating temporary labels.
+ // The insn offset is within the code section and
+ // already been fulfilled by applyFixup(). No
+ // further relocation is needed.
+ // The reloc symbol should be in text section.
+ if ((Flags & ELF::SHF_ALLOC) && (Flags & ELF::SHF_EXECINSTR))
+ return ELF::R_BPF_NONE;
+ } else {
+ // .BTF generates FK_Data_4 relocations for variable
+ // offset in DataSec kind. Similar to the above .BTF.ext
+ // insn offset, no further relocation is needed.
+ // The reloc symbol should be in data section.
+ if ((Flags & ELF::SHF_ALLOC) && (Flags & ELF::SHF_WRITE))
+ return ELF::R_BPF_NONE;
+ }
}
}
return ELF::R_BPF_64_32;
diff --git a/lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp b/lib/Target/BPF/MCTargetDesc/BPFInstPrinter.cpp
index 20627da38817..079202994c8d 100644
--- a/lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp
+++ b/lib/Target/BPF/MCTargetDesc/BPFInstPrinter.cpp
@@ -1,9 +1,8 @@
//===-- BPFInstPrinter.cpp - Convert BPF MCInst to asm syntax -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -11,7 +10,7 @@
//
//===----------------------------------------------------------------------===//
-#include "BPFInstPrinter.h"
+#include "MCTargetDesc/BPFInstPrinter.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
diff --git a/lib/Target/BPF/InstPrinter/BPFInstPrinter.h b/lib/Target/BPF/MCTargetDesc/BPFInstPrinter.h
index bb0b0d71da53..8c9a0bc94cff 100644
--- a/lib/Target/BPF/InstPrinter/BPFInstPrinter.h
+++ b/lib/Target/BPF/MCTargetDesc/BPFInstPrinter.h
@@ -1,9 +1,8 @@
//===-- BPFInstPrinter.h - Convert BPF MCInst to asm syntax -------*- C++ -*--//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -11,8 +10,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIB_TARGET_BPF_INSTPRINTER_BPFINSTPRINTER_H
-#define LLVM_LIB_TARGET_BPF_INSTPRINTER_BPFINSTPRINTER_H
+#ifndef LLVM_LIB_TARGET_BPF_MCTARGETDESC_BPFINSTPRINTER_H
+#define LLVM_LIB_TARGET_BPF_MCTARGETDESC_BPFINSTPRINTER_H
#include "llvm/MC/MCInstPrinter.h"
diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h b/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h
index af3ad5315253..04a6a87cebc9 100644
--- a/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h
+++ b/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h
@@ -1,9 +1,8 @@
//===-- BPFMCAsmInfo.h - BPF asm properties -------------------*- C++ -*--====//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp b/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
index 437f658caf6e..f9abe76c976b 100644
--- a/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
+++ b/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
@@ -1,9 +1,8 @@
//===-- BPFMCCodeEmitter.cpp - Convert BPF code to machine code -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -64,9 +63,10 @@ public:
const MCSubtargetInfo &STI) const override;
private:
- uint64_t computeAvailableFeatures(const FeatureBitset &FB) const;
- void verifyInstructionPredicates(const MCInst &MI,
- uint64_t AvailableFeatures) const;
+ FeatureBitset computeAvailableFeatures(const FeatureBitset &FB) const;
+ void
+ verifyInstructionPredicates(const MCInst &MI,
+ const FeatureBitset &AvailableFeatures) const;
};
} // end anonymous namespace
diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
index 834b57527882..fa27b335f3a1 100644
--- a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
+++ b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
@@ -1,9 +1,8 @@
//===-- BPFMCTargetDesc.cpp - BPF Target Descriptions ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -12,9 +11,9 @@
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/BPFMCTargetDesc.h"
-#include "BPF.h"
-#include "InstPrinter/BPFInstPrinter.h"
+#include "MCTargetDesc/BPFInstPrinter.h"
#include "MCTargetDesc/BPFMCAsmInfo.h"
+#include "TargetInfo/BPFTargetInfo.h"
#include "llvm/MC/MCInstrAnalysis.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h
index 6d2f0a1601e6..1a391321f60d 100644
--- a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h
+++ b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h
@@ -1,9 +1,8 @@
//===-- BPFMCTargetDesc.h - BPF Target Descriptions -------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -34,10 +33,6 @@ class Triple;
class raw_ostream;
class raw_pwrite_stream;
-Target &getTheBPFleTarget();
-Target &getTheBPFbeTarget();
-Target &getTheBPFTarget();
-
MCCodeEmitter *createBPFMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
MCContext &Ctx);
diff --git a/lib/Target/BPF/TargetInfo/BPFTargetInfo.cpp b/lib/Target/BPF/TargetInfo/BPFTargetInfo.cpp
index 1f7b8a04d589..5dfa915034ba 100644
--- a/lib/Target/BPF/TargetInfo/BPFTargetInfo.cpp
+++ b/lib/Target/BPF/TargetInfo/BPFTargetInfo.cpp
@@ -1,30 +1,28 @@
//===-- BPFTargetInfo.cpp - BPF Target Implementation ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-#include "BPF.h"
+#include "TargetInfo/BPFTargetInfo.h"
#include "llvm/Support/TargetRegistry.h"
+
using namespace llvm;
-namespace llvm {
-Target &getTheBPFleTarget() {
+Target &llvm::getTheBPFleTarget() {
static Target TheBPFleTarget;
return TheBPFleTarget;
}
-Target &getTheBPFbeTarget() {
+Target &llvm::getTheBPFbeTarget() {
static Target TheBPFbeTarget;
return TheBPFbeTarget;
}
-Target &getTheBPFTarget() {
+Target &llvm::getTheBPFTarget() {
static Target TheBPFTarget;
return TheBPFTarget;
}
-} // namespace llvm
extern "C" void LLVMInitializeBPFTargetInfo() {
TargetRegistry::RegisterTarget(getTheBPFTarget(), "bpf", "BPF (host endian)",
diff --git a/lib/Target/BPF/TargetInfo/BPFTargetInfo.h b/lib/Target/BPF/TargetInfo/BPFTargetInfo.h
new file mode 100644
index 000000000000..150526c1a9db
--- /dev/null
+++ b/lib/Target/BPF/TargetInfo/BPFTargetInfo.h
@@ -0,0 +1,22 @@
+//===-- BPFTargetInfo.h - BPF Target Implementation -------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_BPF_TARGETINFO_BPFTARGETINFO_H
+#define LLVM_LIB_TARGET_BPF_TARGETINFO_BPFTARGETINFO_H
+
+namespace llvm {
+
+class Target;
+
+Target &getTheBPFleTarget();
+Target &getTheBPFbeTarget();
+Target &getTheBPFTarget();
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_BPF_TARGETINFO_BPFTARGETINFO_H
diff --git a/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp b/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
index 2eb1f0fc8bd9..0881bf841f90 100644
--- a/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
+++ b/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
@@ -1,15 +1,13 @@
//===-- HexagonAsmParser.cpp - Parse Hexagon asm to MCInst instructions----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "mcasmparser"
-#include "Hexagon.h"
#include "HexagonTargetStreamer.h"
#include "MCTargetDesc/HexagonMCChecker.h"
#include "MCTargetDesc/HexagonMCELFStreamer.h"
@@ -17,6 +15,7 @@
#include "MCTargetDesc/HexagonMCInstrInfo.h"
#include "MCTargetDesc/HexagonMCTargetDesc.h"
#include "MCTargetDesc/HexagonShuffler.h"
+#include "TargetInfo/HexagonTargetInfo.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
@@ -1684,8 +1683,8 @@ int HexagonAsmParser::processInstruction(MCInst &Inst,
int64_t Value;
MCExpr const &Expr = *Imm.getExpr();
bool Absolute = Expr.evaluateAsAbsolute(Value);
- assert(Absolute);
- (void)Absolute;
+ if (!Absolute)
+ return Match_InvalidOperand;
if (!HexagonMCInstrInfo::mustExtend(Expr) &&
((Value <= -256) || Value >= 256))
return Match_InvalidOperand;
@@ -1707,8 +1706,8 @@ int HexagonAsmParser::processInstruction(MCInst &Inst,
MCInst TmpInst;
int64_t Value;
bool Absolute = Imm.getExpr()->evaluateAsAbsolute(Value);
- assert(Absolute);
- (void)Absolute;
+ if (!Absolute)
+ return Match_InvalidOperand;
if (Value == 0) { // convert to $Rd = $Rs
TmpInst.setOpcode(Hexagon::A2_tfr);
MCOperand &Rd = Inst.getOperand(0);
@@ -1737,8 +1736,8 @@ int HexagonAsmParser::processInstruction(MCInst &Inst,
MCOperand &Imm = Inst.getOperand(2);
int64_t Value;
bool Absolute = Imm.getExpr()->evaluateAsAbsolute(Value);
- assert(Absolute);
- (void)Absolute;
+ if (!Absolute)
+ return Match_InvalidOperand;
if (Value == 0) { // convert to $Rdd = combine ($Rs[0], $Rs[1])
MCInst TmpInst;
unsigned int RegPairNum = RI->getEncodingValue(Rss.getReg());
@@ -1861,8 +1860,8 @@ int HexagonAsmParser::processInstruction(MCInst &Inst,
MCOperand &Imm = Inst.getOperand(2);
int64_t Value;
bool Absolute = Imm.getExpr()->evaluateAsAbsolute(Value);
- assert(Absolute);
- (void)Absolute;
+ if (!Absolute)
+ return Match_InvalidOperand;
if (Value == 0)
Inst.setOpcode(Hexagon::S2_vsathub);
else {
@@ -1881,8 +1880,8 @@ int HexagonAsmParser::processInstruction(MCInst &Inst,
MCOperand &Imm = Inst.getOperand(2);
int64_t Value;
bool Absolute = Imm.getExpr()->evaluateAsAbsolute(Value);
- assert(Absolute);
- (void)Absolute;
+ if (!Absolute)
+ return Match_InvalidOperand;
if (Value == 0) {
MCInst TmpInst;
unsigned int RegPairNum = RI->getEncodingValue(Rss.getReg());
diff --git a/lib/Target/Hexagon/BitTracker.cpp b/lib/Target/Hexagon/BitTracker.cpp
index 69529b0d1162..b7e95caf24fb 100644
--- a/lib/Target/Hexagon/BitTracker.cpp
+++ b/lib/Target/Hexagon/BitTracker.cpp
@@ -1,9 +1,8 @@
//===- BitTracker.cpp -----------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/BitTracker.h b/lib/Target/Hexagon/BitTracker.h
index 058225c0d812..efb21805b801 100644
--- a/lib/Target/Hexagon/BitTracker.h
+++ b/lib/Target/Hexagon/BitTracker.h
@@ -1,9 +1,8 @@
//===- BitTracker.h ---------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
index 428b42eba30d..99e3ee871570 100644
--- a/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
+++ b/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
@@ -1,9 +1,8 @@
//===- HexagonDisassembler.cpp - Disassembler for Hexagon ISA -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -13,6 +12,7 @@
#include "MCTargetDesc/HexagonMCChecker.h"
#include "MCTargetDesc/HexagonMCInstrInfo.h"
#include "MCTargetDesc/HexagonMCTargetDesc.h"
+#include "TargetInfo/HexagonTargetInfo.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/MC/MCContext.h"
@@ -149,7 +149,7 @@ static DecodeStatus s32_0ImmDecoder(MCInst &MI, unsigned tmp,
uint64_t /*Address*/, const void *Decoder);
static DecodeStatus brtargetDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
const void *Decoder);
-#include "HexagonDepDecoders.h"
+#include "HexagonDepDecoders.inc"
#include "HexagonGenDisassemblerTables.inc"
static MCDisassembler *createHexagonDisassembler(const Target &T,
diff --git a/lib/Target/Hexagon/Hexagon.h b/lib/Target/Hexagon/Hexagon.h
index c18492da803b..58dadf012da5 100644
--- a/lib/Target/Hexagon/Hexagon.h
+++ b/lib/Target/Hexagon/Hexagon.h
@@ -1,9 +1,8 @@
//=-- Hexagon.h - Top-level interface for Hexagon representation --*- C++ -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Hexagon/Hexagon.td b/lib/Target/Hexagon/Hexagon.td
index 868353e18832..26869391c7a3 100644
--- a/lib/Target/Hexagon/Hexagon.td
+++ b/lib/Target/Hexagon/Hexagon.td
@@ -1,9 +1,8 @@
//===-- Hexagon.td - Describe the Hexagon Target Machine --*- tablegen -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
index f44fb16e2d8e..b07d15609ede 100644
--- a/lib/Target/Hexagon/HexagonAsmPrinter.cpp
+++ b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
@@ -1,9 +1,8 @@
//===- HexagonAsmPrinter.cpp - Print machine instrs to Hexagon assembly ---===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -22,6 +21,7 @@
#include "MCTargetDesc/HexagonMCExpr.h"
#include "MCTargetDesc/HexagonMCInstrInfo.h"
#include "MCTargetDesc/HexagonMCTargetDesc.h"
+#include "TargetInfo/HexagonTargetInfo.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
@@ -92,9 +92,7 @@ void HexagonAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
GetCPISymbol(MO.getIndex())->print(O, MAI);
return;
case MachineOperand::MO_GlobalAddress:
- // Computing the address of a global symbol, not calling it.
- getSymbol(MO.getGlobal())->print(O, MAI);
- printOffset(MO.getOffset(), O);
+ PrintSymbolOperand(MO, O);
return;
}
}
@@ -114,7 +112,6 @@ bool HexagonAsmPrinter::isBlockOnlyReachableByFallthrough(
/// PrintAsmOperand - Print out an operand for an inline asm expression.
bool HexagonAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
- unsigned AsmVariant,
const char *ExtraCode,
raw_ostream &OS) {
// Does this asm operand have a single letter operand modifier?
@@ -125,11 +122,7 @@ bool HexagonAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
switch (ExtraCode[0]) {
default:
// See if this is a generic print operand
- return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, OS);
- case 'c': // Don't print "$" before a global var name or constant.
- // Hexagon never has a prefix.
- printOperand(MI, OpNo, OS);
- return false;
+ return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, OS);
case 'L':
case 'H': { // The highest-numbered register of a pair.
const MachineOperand &MO = MI->getOperand(OpNo);
@@ -161,7 +154,6 @@ bool HexagonAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
bool HexagonAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
unsigned OpNo,
- unsigned AsmVariant,
const char *ExtraCode,
raw_ostream &O) {
if (ExtraCode && ExtraCode[0])
diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.h b/lib/Target/Hexagon/HexagonAsmPrinter.h
index d0629d173a65..6c4b664e83f5 100755
--- a/lib/Target/Hexagon/HexagonAsmPrinter.h
+++ b/lib/Target/Hexagon/HexagonAsmPrinter.h
@@ -1,9 +1,8 @@
//===- HexagonAsmPrinter.h - Print machine code to an Hexagon .s file -----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -14,7 +13,6 @@
#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONASMPRINTER_H
#define LLVM_LIB_TARGET_HEXAGON_HEXAGONASMPRINTER_H
-#include "Hexagon.h"
#include "HexagonSubtarget.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -53,11 +51,9 @@ class TargetMachine;
void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O);
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
- unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &OS) override;
+ const char *ExtraCode, raw_ostream &OS) override;
bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
- unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &OS) override;
+ const char *ExtraCode, raw_ostream &OS) override;
};
} // end namespace llvm
diff --git a/lib/Target/Hexagon/HexagonBitSimplify.cpp b/lib/Target/Hexagon/HexagonBitSimplify.cpp
index 1bdebe557a8c..7b75d251ccd3 100644
--- a/lib/Target/Hexagon/HexagonBitSimplify.cpp
+++ b/lib/Target/Hexagon/HexagonBitSimplify.cpp
@@ -1,9 +1,8 @@
//===- HexagonBitSimplify.cpp ---------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/HexagonBitTracker.cpp b/lib/Target/Hexagon/HexagonBitTracker.cpp
index 92b6da871a4c..ba50faac2cf9 100644
--- a/lib/Target/Hexagon/HexagonBitTracker.cpp
+++ b/lib/Target/Hexagon/HexagonBitTracker.cpp
@@ -1,9 +1,8 @@
//===- HexagonBitTracker.cpp ----------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/HexagonBitTracker.h b/lib/Target/Hexagon/HexagonBitTracker.h
index f0b7c9d91950..02607d50f686 100644
--- a/lib/Target/Hexagon/HexagonBitTracker.h
+++ b/lib/Target/Hexagon/HexagonBitTracker.h
@@ -1,9 +1,8 @@
//===- HexagonBitTracker.h --------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/HexagonBlockRanges.cpp b/lib/Target/Hexagon/HexagonBlockRanges.cpp
index 48a4505458ae..999150fc8c6e 100644
--- a/lib/Target/Hexagon/HexagonBlockRanges.cpp
+++ b/lib/Target/Hexagon/HexagonBlockRanges.cpp
@@ -1,9 +1,8 @@
//===- HexagonBlockRanges.cpp ---------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/HexagonBlockRanges.h b/lib/Target/Hexagon/HexagonBlockRanges.h
index 4da5a970a659..61115e29a708 100644
--- a/lib/Target/Hexagon/HexagonBlockRanges.h
+++ b/lib/Target/Hexagon/HexagonBlockRanges.h
@@ -1,9 +1,8 @@
//===- HexagonBlockRanges.h -------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/HexagonBranchRelaxation.cpp b/lib/Target/Hexagon/HexagonBranchRelaxation.cpp
index 2fa7888dd02b..ee93739b2c7b 100644
--- a/lib/Target/Hexagon/HexagonBranchRelaxation.cpp
+++ b/lib/Target/Hexagon/HexagonBranchRelaxation.cpp
@@ -1,9 +1,8 @@
//===--- HexagonBranchRelaxation.cpp - Identify and relax long jumps ------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/HexagonCFGOptimizer.cpp b/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
index a22ac8c9fdf5..11a455ce4347 100644
--- a/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
+++ b/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
@@ -1,9 +1,8 @@
//===- HexagonCFGOptimizer.cpp - CFG optimizations ------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/HexagonCallingConv.td b/lib/Target/Hexagon/HexagonCallingConv.td
index ed2f87570d6b..5c31a81a1e87 100644
--- a/lib/Target/Hexagon/HexagonCallingConv.td
+++ b/lib/Target/Hexagon/HexagonCallingConv.td
@@ -1,9 +1,8 @@
//===- HexagonCallingConv.td ----------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/HexagonCommonGEP.cpp b/lib/Target/Hexagon/HexagonCommonGEP.cpp
index f315e24eba62..cf1b0a0f7daa 100644
--- a/lib/Target/Hexagon/HexagonCommonGEP.cpp
+++ b/lib/Target/Hexagon/HexagonCommonGEP.cpp
@@ -1,9 +1,8 @@
//===- HexagonCommonGEP.cpp -----------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -12,6 +11,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/FoldingSet.h"
#include "llvm/ADT/GraphTraits.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/LoopInfo.h"
@@ -71,7 +71,7 @@ namespace {
using NodeToValueMap = std::map<GepNode *, Value *>;
using NodeVect = std::vector<GepNode *>;
using NodeChildrenMap = std::map<GepNode *, NodeVect>;
- using UseSet = std::set<Use *>;
+ using UseSet = SetVector<Use *>;
using NodeToUsesMap = std::map<GepNode *, UseSet>;
// Numbering map for gep nodes. Used to keep track of ordering for
@@ -980,15 +980,13 @@ void HexagonCommonGEP::separateChainForNode(GepNode *Node, Use *U,
assert(UF != Uses.end());
UseSet &Us = UF->second;
UseSet NewUs;
- for (UseSet::iterator I = Us.begin(); I != Us.end(); ) {
- User *S = (*I)->getUser();
- UseSet::iterator Nx = std::next(I);
- if (S == R) {
- NewUs.insert(*I);
- Us.erase(I);
- }
- I = Nx;
+ for (Use *U : Us) {
+ if (U->getUser() == R)
+ NewUs.insert(U);
}
+ for (Use *U : NewUs)
+ Us.remove(U); // erase takes an iterator.
+
if (Us.empty()) {
Node->Flags &= ~GepNode::Used;
Uses.erase(UF);
diff --git a/lib/Target/Hexagon/HexagonConstExtenders.cpp b/lib/Target/Hexagon/HexagonConstExtenders.cpp
index ba9f638796eb..cfed0ecef272 100644
--- a/lib/Target/Hexagon/HexagonConstExtenders.cpp
+++ b/lib/Target/Hexagon/HexagonConstExtenders.cpp
@@ -1,9 +1,8 @@
//===- HexagonConstExtenders.cpp ------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/HexagonConstPropagation.cpp b/lib/Target/Hexagon/HexagonConstPropagation.cpp
index fa192391313e..d1fde5da5fe8 100644
--- a/lib/Target/Hexagon/HexagonConstPropagation.cpp
+++ b/lib/Target/Hexagon/HexagonConstPropagation.cpp
@@ -1,9 +1,8 @@
//===- HexagonConstPropagation.cpp ----------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -80,18 +79,21 @@ namespace {
// A representation of a register as it can appear in a MachineOperand,
// i.e. a pair register:subregister.
- struct Register {
+
+ // FIXME: Use TargetInstrInfo::RegSubRegPair. Also duplicated in
+ // HexagonGenPredicate
+ struct RegisterSubReg {
unsigned Reg, SubReg;
- explicit Register(unsigned R, unsigned SR = 0) : Reg(R), SubReg(SR) {}
- explicit Register(const MachineOperand &MO)
+ explicit RegisterSubReg(unsigned R, unsigned SR = 0) : Reg(R), SubReg(SR) {}
+ explicit RegisterSubReg(const MachineOperand &MO)
: Reg(MO.getReg()), SubReg(MO.getSubReg()) {}
void print(const TargetRegisterInfo *TRI = nullptr) const {
dbgs() << printReg(Reg, TRI, SubReg);
}
- bool operator== (const Register &R) const {
+ bool operator== (const RegisterSubReg &R) const {
return (Reg == R.Reg) && (SubReg == R.SubReg);
}
};
@@ -301,7 +303,7 @@ namespace {
using CellMap = MachineConstPropagator::CellMap;
virtual bool evaluate(const MachineInstr &MI, const CellMap &Inputs,
CellMap &Outputs) = 0;
- virtual bool evaluate(const Register &R, const LatticeCell &SrcC,
+ virtual bool evaluate(const RegisterSubReg &R, const LatticeCell &SrcC,
LatticeCell &Result) = 0;
virtual bool evaluate(const MachineInstr &BrI, const CellMap &Inputs,
SetVector<const MachineBasicBlock*> &Targets,
@@ -344,17 +346,17 @@ namespace {
// Helper functions.
- bool getCell(const Register &R, const CellMap &Inputs, LatticeCell &RC);
+ bool getCell(const RegisterSubReg &R, const CellMap &Inputs, LatticeCell &RC);
bool constToInt(const Constant *C, APInt &Val) const;
bool constToFloat(const Constant *C, APFloat &Val) const;
const ConstantInt *intToConst(const APInt &Val) const;
// Compares.
- bool evaluateCMPrr(uint32_t Cmp, const Register &R1, const Register &R2,
+ bool evaluateCMPrr(uint32_t Cmp, const RegisterSubReg &R1, const RegisterSubReg &R2,
const CellMap &Inputs, bool &Result);
- bool evaluateCMPri(uint32_t Cmp, const Register &R1, const APInt &A2,
+ bool evaluateCMPri(uint32_t Cmp, const RegisterSubReg &R1, const APInt &A2,
const CellMap &Inputs, bool &Result);
- bool evaluateCMPrp(uint32_t Cmp, const Register &R1, uint64_t Props2,
+ bool evaluateCMPrp(uint32_t Cmp, const RegisterSubReg &R1, uint64_t Props2,
const CellMap &Inputs, bool &Result);
bool evaluateCMPii(uint32_t Cmp, const APInt &A1, const APInt &A2,
bool &Result);
@@ -363,52 +365,52 @@ namespace {
bool evaluateCMPpp(uint32_t Cmp, uint32_t Props1, uint32_t Props2,
bool &Result);
- bool evaluateCOPY(const Register &R1, const CellMap &Inputs,
+ bool evaluateCOPY(const RegisterSubReg &R1, const CellMap &Inputs,
LatticeCell &Result);
// Logical operations.
- bool evaluateANDrr(const Register &R1, const Register &R2,
+ bool evaluateANDrr(const RegisterSubReg &R1, const RegisterSubReg &R2,
const CellMap &Inputs, LatticeCell &Result);
- bool evaluateANDri(const Register &R1, const APInt &A2,
+ bool evaluateANDri(const RegisterSubReg &R1, const APInt &A2,
const CellMap &Inputs, LatticeCell &Result);
bool evaluateANDii(const APInt &A1, const APInt &A2, APInt &Result);
- bool evaluateORrr(const Register &R1, const Register &R2,
+ bool evaluateORrr(const RegisterSubReg &R1, const RegisterSubReg &R2,
const CellMap &Inputs, LatticeCell &Result);
- bool evaluateORri(const Register &R1, const APInt &A2,
+ bool evaluateORri(const RegisterSubReg &R1, const APInt &A2,
const CellMap &Inputs, LatticeCell &Result);
bool evaluateORii(const APInt &A1, const APInt &A2, APInt &Result);
- bool evaluateXORrr(const Register &R1, const Register &R2,
+ bool evaluateXORrr(const RegisterSubReg &R1, const RegisterSubReg &R2,
const CellMap &Inputs, LatticeCell &Result);
- bool evaluateXORri(const Register &R1, const APInt &A2,
+ bool evaluateXORri(const RegisterSubReg &R1, const APInt &A2,
const CellMap &Inputs, LatticeCell &Result);
bool evaluateXORii(const APInt &A1, const APInt &A2, APInt &Result);
// Extensions.
- bool evaluateZEXTr(const Register &R1, unsigned Width, unsigned Bits,
+ bool evaluateZEXTr(const RegisterSubReg &R1, unsigned Width, unsigned Bits,
const CellMap &Inputs, LatticeCell &Result);
bool evaluateZEXTi(const APInt &A1, unsigned Width, unsigned Bits,
APInt &Result);
- bool evaluateSEXTr(const Register &R1, unsigned Width, unsigned Bits,
+ bool evaluateSEXTr(const RegisterSubReg &R1, unsigned Width, unsigned Bits,
const CellMap &Inputs, LatticeCell &Result);
bool evaluateSEXTi(const APInt &A1, unsigned Width, unsigned Bits,
APInt &Result);
// Leading/trailing bits.
- bool evaluateCLBr(const Register &R1, bool Zeros, bool Ones,
+ bool evaluateCLBr(const RegisterSubReg &R1, bool Zeros, bool Ones,
const CellMap &Inputs, LatticeCell &Result);
bool evaluateCLBi(const APInt &A1, bool Zeros, bool Ones, APInt &Result);
- bool evaluateCTBr(const Register &R1, bool Zeros, bool Ones,
+ bool evaluateCTBr(const RegisterSubReg &R1, bool Zeros, bool Ones,
const CellMap &Inputs, LatticeCell &Result);
bool evaluateCTBi(const APInt &A1, bool Zeros, bool Ones, APInt &Result);
// Bitfield extract.
- bool evaluateEXTRACTr(const Register &R1, unsigned Width, unsigned Bits,
+ bool evaluateEXTRACTr(const RegisterSubReg &R1, unsigned Width, unsigned Bits,
unsigned Offset, bool Signed, const CellMap &Inputs,
LatticeCell &Result);
bool evaluateEXTRACTi(const APInt &A1, unsigned Bits, unsigned Offset,
bool Signed, APInt &Result);
// Vector operations.
- bool evaluateSplatr(const Register &R1, unsigned Bits, unsigned Count,
+ bool evaluateSplatr(const RegisterSubReg &R1, unsigned Bits, unsigned Count,
const CellMap &Inputs, LatticeCell &Result);
bool evaluateSplati(const APInt &A1, unsigned Bits, unsigned Count,
APInt &Result);
@@ -620,7 +622,7 @@ void MachineConstPropagator::visitPHI(const MachineInstr &PN) {
LLVM_DEBUG(dbgs() << "Visiting FI(" << printMBBReference(*MB) << "): " << PN);
const MachineOperand &MD = PN.getOperand(0);
- Register DefR(MD);
+ RegisterSubReg DefR(MD);
assert(TargetRegisterInfo::isVirtualRegister(DefR.Reg));
bool Changed = false;
@@ -647,7 +649,7 @@ Bottomize:
continue;
}
const MachineOperand &SO = PN.getOperand(i);
- Register UseR(SO);
+ RegisterSubReg UseR(SO);
// If the input is not a virtual register, we don't really know what
// value it holds.
if (!TargetRegisterInfo::isVirtualRegister(UseR.Reg))
@@ -690,7 +692,7 @@ void MachineConstPropagator::visitNonBranch(const MachineInstr &MI) {
for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg() || !MO.isDef())
continue;
- Register DefR(MO);
+ RegisterSubReg DefR(MO);
// Only track virtual registers.
if (!TargetRegisterInfo::isVirtualRegister(DefR.Reg))
continue;
@@ -1066,7 +1068,7 @@ bool MachineConstPropagator::run(MachineFunction &MF) {
// --------------------------------------------------------------------
// Machine const evaluator.
-bool MachineConstEvaluator::getCell(const Register &R, const CellMap &Inputs,
+bool MachineConstEvaluator::getCell(const RegisterSubReg &R, const CellMap &Inputs,
LatticeCell &RC) {
if (!TargetRegisterInfo::isVirtualRegister(R.Reg))
return false;
@@ -1092,8 +1094,8 @@ const ConstantInt *MachineConstEvaluator::intToConst(const APInt &Val) const {
return ConstantInt::get(CX, Val);
}
-bool MachineConstEvaluator::evaluateCMPrr(uint32_t Cmp, const Register &R1,
- const Register &R2, const CellMap &Inputs, bool &Result) {
+bool MachineConstEvaluator::evaluateCMPrr(uint32_t Cmp, const RegisterSubReg &R1,
+ const RegisterSubReg &R2, const CellMap &Inputs, bool &Result) {
assert(Inputs.has(R1.Reg) && Inputs.has(R2.Reg));
LatticeCell LS1, LS2;
if (!getCell(R1, Inputs, LS1) || !getCell(R2, Inputs, LS2))
@@ -1131,7 +1133,7 @@ bool MachineConstEvaluator::evaluateCMPrr(uint32_t Cmp, const Register &R1,
return IsTrue || IsFalse;
}
-bool MachineConstEvaluator::evaluateCMPri(uint32_t Cmp, const Register &R1,
+bool MachineConstEvaluator::evaluateCMPri(uint32_t Cmp, const RegisterSubReg &R1,
const APInt &A2, const CellMap &Inputs, bool &Result) {
assert(Inputs.has(R1.Reg));
LatticeCell LS;
@@ -1158,7 +1160,7 @@ bool MachineConstEvaluator::evaluateCMPri(uint32_t Cmp, const Register &R1,
return IsTrue || IsFalse;
}
-bool MachineConstEvaluator::evaluateCMPrp(uint32_t Cmp, const Register &R1,
+bool MachineConstEvaluator::evaluateCMPrp(uint32_t Cmp, const RegisterSubReg &R1,
uint64_t Props2, const CellMap &Inputs, bool &Result) {
assert(Inputs.has(R1.Reg));
LatticeCell LS;
@@ -1351,13 +1353,13 @@ bool MachineConstEvaluator::evaluateCMPpp(uint32_t Cmp, uint32_t Props1,
return false;
}
-bool MachineConstEvaluator::evaluateCOPY(const Register &R1,
+bool MachineConstEvaluator::evaluateCOPY(const RegisterSubReg &R1,
const CellMap &Inputs, LatticeCell &Result) {
return getCell(R1, Inputs, Result);
}
-bool MachineConstEvaluator::evaluateANDrr(const Register &R1,
- const Register &R2, const CellMap &Inputs, LatticeCell &Result) {
+bool MachineConstEvaluator::evaluateANDrr(const RegisterSubReg &R1,
+ const RegisterSubReg &R2, const CellMap &Inputs, LatticeCell &Result) {
assert(Inputs.has(R1.Reg) && Inputs.has(R2.Reg));
const LatticeCell &L1 = Inputs.get(R2.Reg);
const LatticeCell &L2 = Inputs.get(R2.Reg);
@@ -1387,7 +1389,7 @@ bool MachineConstEvaluator::evaluateANDrr(const Register &R1,
return !Result.isBottom();
}
-bool MachineConstEvaluator::evaluateANDri(const Register &R1,
+bool MachineConstEvaluator::evaluateANDri(const RegisterSubReg &R1,
const APInt &A2, const CellMap &Inputs, LatticeCell &Result) {
assert(Inputs.has(R1.Reg));
if (A2 == -1)
@@ -1423,8 +1425,8 @@ bool MachineConstEvaluator::evaluateANDii(const APInt &A1,
return true;
}
-bool MachineConstEvaluator::evaluateORrr(const Register &R1,
- const Register &R2, const CellMap &Inputs, LatticeCell &Result) {
+bool MachineConstEvaluator::evaluateORrr(const RegisterSubReg &R1,
+ const RegisterSubReg &R2, const CellMap &Inputs, LatticeCell &Result) {
assert(Inputs.has(R1.Reg) && Inputs.has(R2.Reg));
const LatticeCell &L1 = Inputs.get(R2.Reg);
const LatticeCell &L2 = Inputs.get(R2.Reg);
@@ -1454,7 +1456,7 @@ bool MachineConstEvaluator::evaluateORrr(const Register &R1,
return !Result.isBottom();
}
-bool MachineConstEvaluator::evaluateORri(const Register &R1,
+bool MachineConstEvaluator::evaluateORri(const RegisterSubReg &R1,
const APInt &A2, const CellMap &Inputs, LatticeCell &Result) {
assert(Inputs.has(R1.Reg));
if (A2 == 0)
@@ -1490,8 +1492,8 @@ bool MachineConstEvaluator::evaluateORii(const APInt &A1,
return true;
}
-bool MachineConstEvaluator::evaluateXORrr(const Register &R1,
- const Register &R2, const CellMap &Inputs, LatticeCell &Result) {
+bool MachineConstEvaluator::evaluateXORrr(const RegisterSubReg &R1,
+ const RegisterSubReg &R2, const CellMap &Inputs, LatticeCell &Result) {
assert(Inputs.has(R1.Reg) && Inputs.has(R2.Reg));
LatticeCell LS1, LS2;
if (!getCell(R1, Inputs, LS1) || !getCell(R2, Inputs, LS2))
@@ -1519,7 +1521,7 @@ bool MachineConstEvaluator::evaluateXORrr(const Register &R1,
return !Result.isBottom();
}
-bool MachineConstEvaluator::evaluateXORri(const Register &R1,
+bool MachineConstEvaluator::evaluateXORri(const RegisterSubReg &R1,
const APInt &A2, const CellMap &Inputs, LatticeCell &Result) {
assert(Inputs.has(R1.Reg));
LatticeCell LS1;
@@ -1552,7 +1554,7 @@ bool MachineConstEvaluator::evaluateXORii(const APInt &A1,
return true;
}
-bool MachineConstEvaluator::evaluateZEXTr(const Register &R1, unsigned Width,
+bool MachineConstEvaluator::evaluateZEXTr(const RegisterSubReg &R1, unsigned Width,
unsigned Bits, const CellMap &Inputs, LatticeCell &Result) {
assert(Inputs.has(R1.Reg));
LatticeCell LS1;
@@ -1583,7 +1585,7 @@ bool MachineConstEvaluator::evaluateZEXTi(const APInt &A1, unsigned Width,
return true;
}
-bool MachineConstEvaluator::evaluateSEXTr(const Register &R1, unsigned Width,
+bool MachineConstEvaluator::evaluateSEXTr(const RegisterSubReg &R1, unsigned Width,
unsigned Bits, const CellMap &Inputs, LatticeCell &Result) {
assert(Inputs.has(R1.Reg));
LatticeCell LS1;
@@ -1648,7 +1650,7 @@ bool MachineConstEvaluator::evaluateSEXTi(const APInt &A1, unsigned Width,
return true;
}
-bool MachineConstEvaluator::evaluateCLBr(const Register &R1, bool Zeros,
+bool MachineConstEvaluator::evaluateCLBr(const RegisterSubReg &R1, bool Zeros,
bool Ones, const CellMap &Inputs, LatticeCell &Result) {
assert(Inputs.has(R1.Reg));
LatticeCell LS1;
@@ -1683,7 +1685,7 @@ bool MachineConstEvaluator::evaluateCLBi(const APInt &A1, bool Zeros,
return true;
}
-bool MachineConstEvaluator::evaluateCTBr(const Register &R1, bool Zeros,
+bool MachineConstEvaluator::evaluateCTBr(const RegisterSubReg &R1, bool Zeros,
bool Ones, const CellMap &Inputs, LatticeCell &Result) {
assert(Inputs.has(R1.Reg));
LatticeCell LS1;
@@ -1718,7 +1720,7 @@ bool MachineConstEvaluator::evaluateCTBi(const APInt &A1, bool Zeros,
return true;
}
-bool MachineConstEvaluator::evaluateEXTRACTr(const Register &R1,
+bool MachineConstEvaluator::evaluateEXTRACTr(const RegisterSubReg &R1,
unsigned Width, unsigned Bits, unsigned Offset, bool Signed,
const CellMap &Inputs, LatticeCell &Result) {
assert(Inputs.has(R1.Reg));
@@ -1776,7 +1778,7 @@ bool MachineConstEvaluator::evaluateEXTRACTi(const APInt &A1, unsigned Bits,
return true;
}
-bool MachineConstEvaluator::evaluateSplatr(const Register &R1,
+bool MachineConstEvaluator::evaluateSplatr(const RegisterSubReg &R1,
unsigned Bits, unsigned Count, const CellMap &Inputs,
LatticeCell &Result) {
assert(Inputs.has(R1.Reg));
@@ -1833,7 +1835,7 @@ namespace {
bool evaluate(const MachineInstr &MI, const CellMap &Inputs,
CellMap &Outputs) override;
- bool evaluate(const Register &R, const LatticeCell &SrcC,
+ bool evaluate(const RegisterSubReg &R, const LatticeCell &SrcC,
LatticeCell &Result) override;
bool evaluate(const MachineInstr &BrI, const CellMap &Inputs,
SetVector<const MachineBasicBlock*> &Targets, bool &FallsThru)
@@ -1848,7 +1850,7 @@ namespace {
const MachineOperand &MO);
void replaceWithNop(MachineInstr &MI);
- bool evaluateHexRSEQ32(Register RL, Register RH, const CellMap &Inputs,
+ bool evaluateHexRSEQ32(RegisterSubReg RL, RegisterSubReg RH, const CellMap &Inputs,
LatticeCell &Result);
bool evaluateHexCompare(const MachineInstr &MI, const CellMap &Inputs,
CellMap &Outputs);
@@ -1922,14 +1924,14 @@ bool HexagonConstEvaluator::evaluate(const MachineInstr &MI,
return false;
unsigned Opc = MI.getOpcode();
- Register DefR(MD);
+ RegisterSubReg DefR(MD);
assert(!DefR.SubReg);
if (!TargetRegisterInfo::isVirtualRegister(DefR.Reg))
return false;
if (MI.isCopy()) {
LatticeCell RC;
- Register SrcR(MI.getOperand(1));
+ RegisterSubReg SrcR(MI.getOperand(1));
bool Eval = evaluateCOPY(SrcR, Inputs, RC);
if (!Eval)
return false;
@@ -1951,7 +1953,7 @@ bool HexagonConstEvaluator::evaluate(const MachineInstr &MI,
const MachineOperand &OpLo = LoIs1 ? MI.getOperand(1) : MI.getOperand(3);
const MachineOperand &OpHi = LoIs1 ? MI.getOperand(3) : MI.getOperand(1);
LatticeCell RC;
- Register SrcRL(OpLo), SrcRH(OpHi);
+ RegisterSubReg SrcRL(OpLo), SrcRH(OpHi);
bool Eval = evaluateHexRSEQ32(SrcRL, SrcRH, Inputs, RC);
if (!Eval)
return false;
@@ -2038,7 +2040,7 @@ bool HexagonConstEvaluator::evaluate(const MachineInstr &MI,
int64_t B = MI.getOperand(2).getImm();
assert(B >=0 && B < 32);
APInt A(32, (1ull << B), false);
- Register R(MI.getOperand(1));
+ RegisterSubReg R(MI.getOperand(1));
LatticeCell RC = Outputs.get(DefR.Reg);
bool Eval = evaluateORri(R, A, Inputs, RC);
if (!Eval)
@@ -2078,7 +2080,7 @@ bool HexagonConstEvaluator::evaluate(const MachineInstr &MI,
using namespace Hexagon;
bool Ones = (Opc == S2_ct1) || (Opc == S2_ct1p);
- Register R1(MI.getOperand(1));
+ RegisterSubReg R1(MI.getOperand(1));
assert(Inputs.has(R1.Reg));
LatticeCell T;
bool Eval = evaluateCTBr(R1, !Ones, Ones, Inputs, T);
@@ -2110,7 +2112,7 @@ bool HexagonConstEvaluator::evaluate(const MachineInstr &MI,
bool OnlyZeros = (Opc == S2_cl0) || (Opc == S2_cl0p);
bool OnlyOnes = (Opc == S2_cl1) || (Opc == S2_cl1p);
- Register R1(MI.getOperand(1));
+ RegisterSubReg R1(MI.getOperand(1));
assert(Inputs.has(R1.Reg));
LatticeCell T;
bool Eval = evaluateCLBr(R1, !OnlyOnes, !OnlyZeros, Inputs, T);
@@ -2138,7 +2140,7 @@ bool HexagonConstEvaluator::evaluate(const MachineInstr &MI,
{
bool Signed = (Opc == Hexagon::S4_extract) ||
(Opc == Hexagon::S4_extractp);
- Register R1(MI.getOperand(1));
+ RegisterSubReg R1(MI.getOperand(1));
unsigned BW = getRegBitWidth(R1.Reg);
unsigned Bits = MI.getOperand(2).getImm();
unsigned Offset = MI.getOperand(3).getImm();
@@ -2189,7 +2191,7 @@ bool HexagonConstEvaluator::evaluate(const MachineInstr &MI,
return true;
}
-bool HexagonConstEvaluator::evaluate(const Register &R,
+bool HexagonConstEvaluator::evaluate(const RegisterSubReg &R,
const LatticeCell &Input, LatticeCell &Result) {
if (!R.SubReg) {
Result = Input;
@@ -2280,7 +2282,7 @@ Undetermined:
if (SimpleBranch) {
const MachineOperand &MD = BrI.getOperand(0);
- Register PR(MD);
+ RegisterSubReg PR(MD);
// If the condition operand has a subregister, this is not something
// we currently recognize.
if (PR.SubReg)
@@ -2502,7 +2504,7 @@ void HexagonConstEvaluator::replaceWithNop(MachineInstr &MI) {
MI.RemoveOperand(0);
}
-bool HexagonConstEvaluator::evaluateHexRSEQ32(Register RL, Register RH,
+bool HexagonConstEvaluator::evaluateHexRSEQ32(RegisterSubReg RL, RegisterSubReg RH,
const CellMap &Inputs, LatticeCell &Result) {
assert(Inputs.has(RL.Reg) && Inputs.has(RH.Reg));
LatticeCell LSL, LSH;
@@ -2571,7 +2573,7 @@ bool HexagonConstEvaluator::evaluateHexCompare(const MachineInstr &MI,
if (Computed) {
// Only create a zero/non-zero cell. At this time there isn't really
// much need for specific values.
- Register DefR(MI.getOperand(0));
+ RegisterSubReg DefR(MI.getOperand(0));
LatticeCell L = Outputs.get(DefR.Reg);
uint32_t P = Result ? ConstantProperties::NonZero
: ConstantProperties::Zero;
@@ -2591,9 +2593,9 @@ bool HexagonConstEvaluator::evaluateHexCompare2(unsigned Opc,
bool Reg1 = Src1.isReg(), Reg2 = Src2.isReg();
bool Imm1 = Src1.isImm(), Imm2 = Src2.isImm();
if (Reg1) {
- Register R1(Src1);
+ RegisterSubReg R1(Src1);
if (Reg2) {
- Register R2(Src2);
+ RegisterSubReg R2(Src2);
return evaluateCMPrr(Cmp, R1, R2, Inputs, Result);
} else if (Imm2) {
APInt A2 = getCmpImm(Opc, 2, Src2);
@@ -2602,7 +2604,7 @@ bool HexagonConstEvaluator::evaluateHexCompare2(unsigned Opc,
} else if (Imm1) {
APInt A1 = getCmpImm(Opc, 1, Src1);
if (Reg2) {
- Register R2(Src2);
+ RegisterSubReg R2(Src2);
uint32_t NegCmp = Comparison::negate(Cmp);
return evaluateCMPri(NegCmp, R2, A1, Inputs, Result);
} else if (Imm2) {
@@ -2621,7 +2623,7 @@ bool HexagonConstEvaluator::evaluateHexLogical(const MachineInstr &MI,
return false;
const MachineOperand &Src1 = MI.getOperand(1);
const MachineOperand &Src2 = MI.getOperand(2);
- Register R1(Src1);
+ RegisterSubReg R1(Src1);
bool Eval = false;
LatticeCell RC;
switch (Opc) {
@@ -2629,7 +2631,7 @@ bool HexagonConstEvaluator::evaluateHexLogical(const MachineInstr &MI,
return false;
case Hexagon::A2_and:
case Hexagon::A2_andp:
- Eval = evaluateANDrr(R1, Register(Src2), Inputs, RC);
+ Eval = evaluateANDrr(R1, RegisterSubReg(Src2), Inputs, RC);
break;
case Hexagon::A2_andir: {
if (!Src2.isImm())
@@ -2640,7 +2642,7 @@ bool HexagonConstEvaluator::evaluateHexLogical(const MachineInstr &MI,
}
case Hexagon::A2_or:
case Hexagon::A2_orp:
- Eval = evaluateORrr(R1, Register(Src2), Inputs, RC);
+ Eval = evaluateORrr(R1, RegisterSubReg(Src2), Inputs, RC);
break;
case Hexagon::A2_orir: {
if (!Src2.isImm())
@@ -2651,11 +2653,11 @@ bool HexagonConstEvaluator::evaluateHexLogical(const MachineInstr &MI,
}
case Hexagon::A2_xor:
case Hexagon::A2_xorp:
- Eval = evaluateXORrr(R1, Register(Src2), Inputs, RC);
+ Eval = evaluateXORrr(R1, RegisterSubReg(Src2), Inputs, RC);
break;
}
if (Eval) {
- Register DefR(MI.getOperand(0));
+ RegisterSubReg DefR(MI.getOperand(0));
Outputs.update(DefR.Reg, RC);
}
return Eval;
@@ -2664,7 +2666,7 @@ bool HexagonConstEvaluator::evaluateHexLogical(const MachineInstr &MI,
bool HexagonConstEvaluator::evaluateHexCondMove(const MachineInstr &MI,
const CellMap &Inputs, CellMap &Outputs) {
// Dst0 = Cond1 ? Src2 : Src3
- Register CR(MI.getOperand(1));
+ RegisterSubReg CR(MI.getOperand(1));
assert(Inputs.has(CR.Reg));
LatticeCell LS;
if (!getCell(CR, Inputs, LS))
@@ -2679,7 +2681,7 @@ bool HexagonConstEvaluator::evaluateHexCondMove(const MachineInstr &MI,
return false;
const MachineOperand &ValOp = MI.getOperand(TakeOp);
- Register DefR(MI.getOperand(0));
+ RegisterSubReg DefR(MI.getOperand(0));
LatticeCell RC = Outputs.get(DefR.Reg);
if (ValOp.isImm()) {
@@ -2692,7 +2694,7 @@ bool HexagonConstEvaluator::evaluateHexCondMove(const MachineInstr &MI,
return true;
}
if (ValOp.isReg()) {
- Register R(ValOp);
+ RegisterSubReg R(ValOp);
const LatticeCell &LR = Inputs.get(R.Reg);
LatticeCell LSR;
if (!evaluate(R, LR, LSR))
@@ -2707,7 +2709,7 @@ bool HexagonConstEvaluator::evaluateHexCondMove(const MachineInstr &MI,
bool HexagonConstEvaluator::evaluateHexExt(const MachineInstr &MI,
const CellMap &Inputs, CellMap &Outputs) {
// Dst0 = ext R1
- Register R1(MI.getOperand(1));
+ RegisterSubReg R1(MI.getOperand(1));
assert(Inputs.has(R1.Reg));
unsigned Opc = MI.getOpcode();
@@ -2724,6 +2726,8 @@ bool HexagonConstEvaluator::evaluateHexExt(const MachineInstr &MI,
case Hexagon::A2_sxtw:
Bits = 32;
break;
+ default:
+ llvm_unreachable("Unhandled extension opcode");
}
bool Signed = false;
@@ -2735,7 +2739,7 @@ bool HexagonConstEvaluator::evaluateHexExt(const MachineInstr &MI,
break;
}
- Register DefR(MI.getOperand(0));
+ RegisterSubReg DefR(MI.getOperand(0));
unsigned BW = getRegBitWidth(DefR.Reg);
LatticeCell RC = Outputs.get(DefR.Reg);
bool Eval = Signed ? evaluateSEXTr(R1, BW, Bits, Inputs, RC)
@@ -2749,8 +2753,8 @@ bool HexagonConstEvaluator::evaluateHexExt(const MachineInstr &MI,
bool HexagonConstEvaluator::evaluateHexVector1(const MachineInstr &MI,
const CellMap &Inputs, CellMap &Outputs) {
// DefR = op R1
- Register DefR(MI.getOperand(0));
- Register R1(MI.getOperand(1));
+ RegisterSubReg DefR(MI.getOperand(0));
+ RegisterSubReg R1(MI.getOperand(1));
assert(Inputs.has(R1.Reg));
LatticeCell RC = Outputs.get(DefR.Reg);
bool Eval;
@@ -2788,7 +2792,7 @@ bool HexagonConstEvaluator::rewriteHexConstDefs(MachineInstr &MI,
for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg() || !MO.isUse() || MO.isImplicit())
continue;
- Register R(MO);
+ RegisterSubReg R(MO);
if (!TargetRegisterInfo::isVirtualRegister(R.Reg))
continue;
HasUse = true;
@@ -2954,10 +2958,10 @@ bool HexagonConstEvaluator::rewriteHexConstUses(MachineInstr &MI,
// to DefR += mpyi(R, #imm),
// or DefR -= mpyi(R, #imm).
{
- Register DefR(MI.getOperand(0));
+ RegisterSubReg DefR(MI.getOperand(0));
assert(!DefR.SubReg);
- Register R2(MI.getOperand(2));
- Register R3(MI.getOperand(3));
+ RegisterSubReg R2(MI.getOperand(2));
+ RegisterSubReg R3(MI.getOperand(3));
assert(Inputs.has(R2.Reg) && Inputs.has(R3.Reg));
LatticeCell LS2, LS3;
// It is enough to get one of the input cells, since we will only try
@@ -2971,7 +2975,7 @@ bool HexagonConstEvaluator::rewriteHexConstUses(MachineInstr &MI,
if (Zero) {
// DefR == R1 (tied operands).
MachineOperand &Acc = MI.getOperand(1);
- Register R1(Acc);
+ RegisterSubReg R1(Acc);
unsigned NewR = R1.Reg;
if (R1.SubReg) {
// Generate COPY. FIXME: Replace with the register:subregister.
@@ -3018,8 +3022,8 @@ bool HexagonConstEvaluator::rewriteHexConstUses(MachineInstr &MI,
case Hexagon::A2_and:
{
- Register R1(MI.getOperand(1));
- Register R2(MI.getOperand(2));
+ RegisterSubReg R1(MI.getOperand(1));
+ RegisterSubReg R2(MI.getOperand(2));
assert(Inputs.has(R1.Reg) && Inputs.has(R2.Reg));
LatticeCell LS1, LS2;
unsigned CopyOf = 0;
@@ -3037,8 +3041,8 @@ bool HexagonConstEvaluator::rewriteHexConstUses(MachineInstr &MI,
if (!CopyOf)
return false;
MachineOperand &SO = MI.getOperand(CopyOf);
- Register SR(SO);
- Register DefR(MI.getOperand(0));
+ RegisterSubReg SR(SO);
+ RegisterSubReg DefR(MI.getOperand(0));
unsigned NewR = SR.Reg;
if (SR.SubReg) {
const TargetRegisterClass *RC = MRI->getRegClass(DefR.Reg);
@@ -3054,8 +3058,8 @@ bool HexagonConstEvaluator::rewriteHexConstUses(MachineInstr &MI,
case Hexagon::A2_or:
{
- Register R1(MI.getOperand(1));
- Register R2(MI.getOperand(2));
+ RegisterSubReg R1(MI.getOperand(1));
+ RegisterSubReg R2(MI.getOperand(2));
assert(Inputs.has(R1.Reg) && Inputs.has(R2.Reg));
LatticeCell LS1, LS2;
unsigned CopyOf = 0;
@@ -3069,8 +3073,8 @@ bool HexagonConstEvaluator::rewriteHexConstUses(MachineInstr &MI,
if (!CopyOf)
return false;
MachineOperand &SO = MI.getOperand(CopyOf);
- Register SR(SO);
- Register DefR(MI.getOperand(0));
+ RegisterSubReg SR(SO);
+ RegisterSubReg DefR(MI.getOperand(0));
unsigned NewR = SR.Reg;
if (SR.SubReg) {
const TargetRegisterClass *RC = MRI->getRegClass(DefR.Reg);
diff --git a/lib/Target/Hexagon/HexagonCopyToCombine.cpp b/lib/Target/Hexagon/HexagonCopyToCombine.cpp
index 28965b69e284..a09ccab483cf 100644
--- a/lib/Target/Hexagon/HexagonCopyToCombine.cpp
+++ b/lib/Target/Hexagon/HexagonCopyToCombine.cpp
@@ -1,9 +1,8 @@
//===------- HexagonCopyToCombine.cpp - Hexagon Copy-To-Combine Pass ------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// This pass replaces transfer instructions by combine instructions.
@@ -255,8 +254,8 @@ static bool isUnsafeToMoveAcross(MachineInstr &MI, unsigned UseReg,
MI.isMetaInstruction();
}
-static unsigned UseReg(const MachineOperand& MO) {
- return MO.isReg() ? MO.getReg() : 0;
+static Register UseReg(const MachineOperand& MO) {
+ return MO.isReg() ? MO.getReg() : Register();
}
/// isSafeToMoveTogether - Returns true if it is safe to move I1 next to I2 such
diff --git a/lib/Target/Hexagon/HexagonDepArch.h b/lib/Target/Hexagon/HexagonDepArch.h
index dff2b2f471d0..529be7ef0ac7 100644
--- a/lib/Target/Hexagon/HexagonDepArch.h
+++ b/lib/Target/Hexagon/HexagonDepArch.h
@@ -1,9 +1,8 @@
//===----------------------------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// Automatically generated file, please consult code owner before editing.
diff --git a/lib/Target/Hexagon/HexagonDepArch.td b/lib/Target/Hexagon/HexagonDepArch.td
index f1aadae555c8..115cf2383a7a 100644
--- a/lib/Target/Hexagon/HexagonDepArch.td
+++ b/lib/Target/Hexagon/HexagonDepArch.td
@@ -1,9 +1,8 @@
//===----------------------------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// Automatically generated file, please consult code owner before editing.
diff --git a/lib/Target/Hexagon/HexagonDepDecoders.h b/lib/Target/Hexagon/HexagonDepDecoders.inc
index 9f78412f45d2..10068abce7ec 100644
--- a/lib/Target/Hexagon/HexagonDepDecoders.h
+++ b/lib/Target/Hexagon/HexagonDepDecoders.inc
@@ -1,9 +1,8 @@
//===----------------------------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// Automatically generated file, please consult code owner before editing.
diff --git a/lib/Target/Hexagon/HexagonDepIICHVX.td b/lib/Target/Hexagon/HexagonDepIICHVX.td
index 9e3dea9f3e9b..fefbbfd3f1ac 100644
--- a/lib/Target/Hexagon/HexagonDepIICHVX.td
+++ b/lib/Target/Hexagon/HexagonDepIICHVX.td
@@ -1,9 +1,8 @@
//===----------------------------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// Automatically generated file, please consult code owner before editing.
diff --git a/lib/Target/Hexagon/HexagonDepIICScalar.td b/lib/Target/Hexagon/HexagonDepIICScalar.td
index 9da25952fb1c..34da0be02d19 100644
--- a/lib/Target/Hexagon/HexagonDepIICScalar.td
+++ b/lib/Target/Hexagon/HexagonDepIICScalar.td
@@ -1,9 +1,8 @@
//===----------------------------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// Automatically generated file, please consult code owner before editing.
diff --git a/lib/Target/Hexagon/HexagonDepITypes.h b/lib/Target/Hexagon/HexagonDepITypes.h
index 81e3971e21d2..358345e027d8 100644
--- a/lib/Target/Hexagon/HexagonDepITypes.h
+++ b/lib/Target/Hexagon/HexagonDepITypes.h
@@ -1,9 +1,8 @@
//===----------------------------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// Automatically generated file, please consult code owner before editing.
diff --git a/lib/Target/Hexagon/HexagonDepITypes.td b/lib/Target/Hexagon/HexagonDepITypes.td
index f694062a5232..91c02b84b87c 100644
--- a/lib/Target/Hexagon/HexagonDepITypes.td
+++ b/lib/Target/Hexagon/HexagonDepITypes.td
@@ -1,9 +1,8 @@
//===----------------------------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// Automatically generated file, please consult code owner before editing.
diff --git a/lib/Target/Hexagon/HexagonDepInstrFormats.td b/lib/Target/Hexagon/HexagonDepInstrFormats.td
index ffe212ef9d97..c08d9a388d3e 100644
--- a/lib/Target/Hexagon/HexagonDepInstrFormats.td
+++ b/lib/Target/Hexagon/HexagonDepInstrFormats.td
@@ -1,9 +1,8 @@
//===----------------------------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// Automatically generated file, please consult code owner before editing.
diff --git a/lib/Target/Hexagon/HexagonDepInstrInfo.td b/lib/Target/Hexagon/HexagonDepInstrInfo.td
index 3ef1c49eb7ee..a49051888c77 100644
--- a/lib/Target/Hexagon/HexagonDepInstrInfo.td
+++ b/lib/Target/Hexagon/HexagonDepInstrInfo.td
@@ -1,9 +1,8 @@
//===----------------------------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// Automatically generated file, please consult code owner before editing.
diff --git a/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td b/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td
index 2346fa572626..2ce1419e4790 100644
--- a/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td
+++ b/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td
@@ -1,9 +1,8 @@
//===----------------------------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// Automatically generated file, please consult code owner before editing.
diff --git a/lib/Target/Hexagon/HexagonDepMappings.td b/lib/Target/Hexagon/HexagonDepMappings.td
index b3132d41b903..22ee495b25e6 100644
--- a/lib/Target/Hexagon/HexagonDepMappings.td
+++ b/lib/Target/Hexagon/HexagonDepMappings.td
@@ -1,9 +1,8 @@
//===----------------------------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// Automatically generated file, please consult code owner before editing.
diff --git a/lib/Target/Hexagon/HexagonDepOperands.td b/lib/Target/Hexagon/HexagonDepOperands.td
index ef2d4fa45702..fdba7b971258 100644
--- a/lib/Target/Hexagon/HexagonDepOperands.td
+++ b/lib/Target/Hexagon/HexagonDepOperands.td
@@ -1,9 +1,8 @@
//===----------------------------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// Automatically generated file, please consult code owner before editing.
diff --git a/lib/Target/Hexagon/HexagonDepTimingClasses.h b/lib/Target/Hexagon/HexagonDepTimingClasses.h
index 0fd55e8b7997..b6be74f848bb 100644
--- a/lib/Target/Hexagon/HexagonDepTimingClasses.h
+++ b/lib/Target/Hexagon/HexagonDepTimingClasses.h
@@ -1,9 +1,8 @@
//===----------------------------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// Automatically generated file, please consult code owner before editing.
diff --git a/lib/Target/Hexagon/HexagonEarlyIfConv.cpp b/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
index 8e2f5093038e..c1f32e54e98d 100644
--- a/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
+++ b/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
@@ -1,9 +1,8 @@
//===- HexagonEarlyIfConv.cpp ---------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Hexagon/HexagonExpandCondsets.cpp b/lib/Target/Hexagon/HexagonExpandCondsets.cpp
index 1a762c0c9de7..c343e426ac7d 100644
--- a/lib/Target/Hexagon/HexagonExpandCondsets.cpp
+++ b/lib/Target/Hexagon/HexagonExpandCondsets.cpp
@@ -1,9 +1,8 @@
//===- HexagonExpandCondsets.cpp ------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -734,7 +733,7 @@ bool HexagonExpandCondsets::isPredicable(MachineInstr *MI) {
HasDef = true;
}
for (auto &Mo : MI->memoperands())
- if (Mo->isVolatile())
+ if (Mo->isVolatile() || Mo->isAtomic())
return false;
return true;
}
diff --git a/lib/Target/Hexagon/HexagonFixupHwLoops.cpp b/lib/Target/Hexagon/HexagonFixupHwLoops.cpp
index e9067e2285a8..f7edc168de4a 100644
--- a/lib/Target/Hexagon/HexagonFixupHwLoops.cpp
+++ b/lib/Target/Hexagon/HexagonFixupHwLoops.cpp
@@ -1,9 +1,8 @@
//===---- HexagonFixupHwLoops.cpp - Fixup HW loops too far from LOOPn. ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
// The loop start address in the LOOPn instruction is encoded as a distance
// from the LOOPn instruction itself. If the start address is too far from
diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp
index f5736546a87c..3368ee4fb3b9 100644
--- a/lib/Target/Hexagon/HexagonFrameLowering.cpp
+++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -1,9 +1,8 @@
//===- HexagonFrameLowering.cpp - Define frame lowering -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//
//===----------------------------------------------------------------------===//
@@ -375,17 +374,17 @@ static bool isRestoreCall(unsigned Opc) {
}
static inline bool isOptNone(const MachineFunction &MF) {
- return MF.getFunction().hasFnAttribute(Attribute::OptimizeNone) ||
+ return MF.getFunction().hasOptNone() ||
MF.getTarget().getOptLevel() == CodeGenOpt::None;
}
static inline bool isOptSize(const MachineFunction &MF) {
const Function &F = MF.getFunction();
- return F.optForSize() && !F.optForMinSize();
+ return F.hasOptSize() && !F.hasMinSize();
}
static inline bool isMinSize(const MachineFunction &MF) {
- return MF.getFunction().optForMinSize();
+ return MF.getFunction().hasMinSize();
}
/// Implements shrink-wrapping of the stack frame. By default, stack frame
@@ -2102,7 +2101,7 @@ void HexagonFrameLowering::optimizeSpillSlots(MachineFunction &MF,
}
if (!Bad) {
for (auto *Mo : In.memoperands()) {
- if (!Mo->isVolatile())
+ if (!Mo->isVolatile() && !Mo->isAtomic())
continue;
Bad = true;
break;
diff --git a/lib/Target/Hexagon/HexagonFrameLowering.h b/lib/Target/Hexagon/HexagonFrameLowering.h
index d65d870750f8..65e8c7686640 100644
--- a/lib/Target/Hexagon/HexagonFrameLowering.h
+++ b/lib/Target/Hexagon/HexagonFrameLowering.h
@@ -1,9 +1,8 @@
//==- HexagonFrameLowering.h - Define frame lowering for Hexagon -*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/HexagonGenExtract.cpp b/lib/Target/Hexagon/HexagonGenExtract.cpp
index 08a016b74650..3417c74e359b 100644
--- a/lib/Target/Hexagon/HexagonGenExtract.cpp
+++ b/lib/Target/Hexagon/HexagonGenExtract.cpp
@@ -1,9 +1,8 @@
//===- HexagonGenExtract.cpp ----------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -211,7 +210,7 @@ bool HexagonGenExtract::convert(Instruction *In) {
Intrinsic::ID IntId = (BW == 32) ? Intrinsic::hexagon_S2_extractu
: Intrinsic::hexagon_S2_extractup;
Module *Mod = BB->getParent()->getParent();
- Value *ExtF = Intrinsic::getDeclaration(Mod, IntId);
+ Function *ExtF = Intrinsic::getDeclaration(Mod, IntId);
Value *NewIn = IRB.CreateCall(ExtF, {BF, IRB.getInt32(W), IRB.getInt32(SR)});
if (SL != 0)
NewIn = IRB.CreateShl(NewIn, SL, CSL->getName());
diff --git a/lib/Target/Hexagon/HexagonGenInsert.cpp b/lib/Target/Hexagon/HexagonGenInsert.cpp
index e3492e7374e9..81025c1c5325 100644
--- a/lib/Target/Hexagon/HexagonGenInsert.cpp
+++ b/lib/Target/Hexagon/HexagonGenInsert.cpp
@@ -1,9 +1,8 @@
//===- HexagonGenInsert.cpp -----------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -437,7 +436,7 @@ namespace {
} // end anonymous namespace
void OrderedRegisterList::insert(unsigned VR) {
- iterator L = std::lower_bound(Seq.begin(), Seq.end(), VR, Ord);
+ iterator L = llvm::lower_bound(Seq, VR, Ord);
if (L == Seq.end())
Seq.push_back(VR);
else
@@ -450,7 +449,7 @@ void OrderedRegisterList::insert(unsigned VR) {
}
void OrderedRegisterList::remove(unsigned VR) {
- iterator L = std::lower_bound(Seq.begin(), Seq.end(), VR, Ord);
+ iterator L = llvm::lower_bound(Seq, VR, Ord);
if (L != Seq.end())
Seq.erase(L);
}
diff --git a/lib/Target/Hexagon/HexagonGenMux.cpp b/lib/Target/Hexagon/HexagonGenMux.cpp
index e5af96468af1..cdafbc20ab86 100644
--- a/lib/Target/Hexagon/HexagonGenMux.cpp
+++ b/lib/Target/Hexagon/HexagonGenMux.cpp
@@ -1,9 +1,8 @@
//===- HexagonGenMux.cpp --------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -304,8 +303,8 @@ bool HexagonGenMux::genMuxInBlock(MachineBasicBlock &B) {
std::advance(It2, MaxX);
MachineInstr &Def1 = *It1, &Def2 = *It2;
MachineOperand *Src1 = &Def1.getOperand(2), *Src2 = &Def2.getOperand(2);
- unsigned SR1 = Src1->isReg() ? Src1->getReg() : 0;
- unsigned SR2 = Src2->isReg() ? Src2->getReg() : 0;
+ Register SR1 = Src1->isReg() ? Src1->getReg() : Register();
+ Register SR2 = Src2->isReg() ? Src2->getReg() : Register();
bool Failure = false, CanUp = true, CanDown = true;
for (unsigned X = MinX+1; X < MaxX; X++) {
const DefUseInfo &DU = DUM.lookup(X);
diff --git a/lib/Target/Hexagon/HexagonGenPredicate.cpp b/lib/Target/Hexagon/HexagonGenPredicate.cpp
index c0d2de90467a..e991fa8b61c8 100644
--- a/lib/Target/Hexagon/HexagonGenPredicate.cpp
+++ b/lib/Target/Hexagon/HexagonGenPredicate.cpp
@@ -1,9 +1,8 @@
//===- HexagonGenPredicate.cpp --------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -46,17 +45,19 @@ namespace llvm {
namespace {
- struct Register {
+ // FIXME: Use TargetInstrInfo::RegSubRegPair
+ struct RegisterSubReg {
unsigned R, S;
- Register(unsigned r = 0, unsigned s = 0) : R(r), S(s) {}
- Register(const MachineOperand &MO) : R(MO.getReg()), S(MO.getSubReg()) {}
+ RegisterSubReg(unsigned r = 0, unsigned s = 0) : R(r), S(s) {}
+ RegisterSubReg(const MachineOperand &MO) : R(MO.getReg()), S(MO.getSubReg()) {}
+ RegisterSubReg(const Register &Reg) : R(Reg), S(0) {}
- bool operator== (const Register &Reg) const {
+ bool operator== (const RegisterSubReg &Reg) const {
return R == Reg.R && S == Reg.S;
}
- bool operator< (const Register &Reg) const {
+ bool operator< (const RegisterSubReg &Reg) const {
return R < Reg.R || (R == Reg.R && S < Reg.S);
}
};
@@ -64,10 +65,10 @@ namespace {
struct PrintRegister {
friend raw_ostream &operator<< (raw_ostream &OS, const PrintRegister &PR);
- PrintRegister(Register R, const TargetRegisterInfo &I) : Reg(R), TRI(I) {}
+ PrintRegister(RegisterSubReg R, const TargetRegisterInfo &I) : Reg(R), TRI(I) {}
private:
- Register Reg;
+ RegisterSubReg Reg;
const TargetRegisterInfo &TRI;
};
@@ -99,8 +100,8 @@ namespace {
private:
using VectOfInst = SetVector<MachineInstr *>;
- using SetOfReg = std::set<Register>;
- using RegToRegMap = std::map<Register, Register>;
+ using SetOfReg = std::set<RegisterSubReg>;
+ using RegToRegMap = std::map<RegisterSubReg, RegisterSubReg>;
const HexagonInstrInfo *TII = nullptr;
const HexagonRegisterInfo *TRI = nullptr;
@@ -111,12 +112,12 @@ namespace {
bool isPredReg(unsigned R);
void collectPredicateGPR(MachineFunction &MF);
- void processPredicateGPR(const Register &Reg);
+ void processPredicateGPR(const RegisterSubReg &Reg);
unsigned getPredForm(unsigned Opc);
bool isConvertibleToPredForm(const MachineInstr *MI);
bool isScalarCmp(unsigned Opc);
- bool isScalarPred(Register PredReg);
- Register getPredRegFor(const Register &Reg);
+ bool isScalarPred(RegisterSubReg PredReg);
+ RegisterSubReg getPredRegFor(const RegisterSubReg &Reg);
bool convertToPredForm(MachineInstr *MI);
bool eliminatePredCopies(MachineFunction &MF);
};
@@ -211,7 +212,7 @@ void HexagonGenPredicate::collectPredicateGPR(MachineFunction &MF) {
case Hexagon::C2_tfrpr:
case TargetOpcode::COPY:
if (isPredReg(MI->getOperand(1).getReg())) {
- Register RD = MI->getOperand(0);
+ RegisterSubReg RD = MI->getOperand(0);
if (TargetRegisterInfo::isVirtualRegister(RD.R))
PredGPRs.insert(RD);
}
@@ -221,7 +222,7 @@ void HexagonGenPredicate::collectPredicateGPR(MachineFunction &MF) {
}
}
-void HexagonGenPredicate::processPredicateGPR(const Register &Reg) {
+void HexagonGenPredicate::processPredicateGPR(const RegisterSubReg &Reg) {
LLVM_DEBUG(dbgs() << __func__ << ": " << printReg(Reg.R, TRI, Reg.S) << "\n");
using use_iterator = MachineRegisterInfo::use_iterator;
@@ -240,7 +241,7 @@ void HexagonGenPredicate::processPredicateGPR(const Register &Reg) {
}
}
-Register HexagonGenPredicate::getPredRegFor(const Register &Reg) {
+RegisterSubReg HexagonGenPredicate::getPredRegFor(const RegisterSubReg &Reg) {
// Create a predicate register for a given Reg. The newly created register
// will have its value copied from Reg, so that it can be later used as
// an operand in other instructions.
@@ -255,7 +256,7 @@ Register HexagonGenPredicate::getPredRegFor(const Register &Reg) {
unsigned Opc = DefI->getOpcode();
if (Opc == Hexagon::C2_tfrpr || Opc == TargetOpcode::COPY) {
assert(DefI->getOperand(0).isDef() && DefI->getOperand(1).isUse());
- Register PR = DefI->getOperand(1);
+ RegisterSubReg PR = DefI->getOperand(1);
G2P.insert(std::make_pair(Reg, PR));
LLVM_DEBUG(dbgs() << " -> " << PrintRegister(PR, *TRI) << '\n');
return PR;
@@ -272,10 +273,10 @@ Register HexagonGenPredicate::getPredRegFor(const Register &Reg) {
MachineBasicBlock::iterator DefIt = DefI;
BuildMI(B, std::next(DefIt), DL, TII->get(TargetOpcode::COPY), NewPR)
.addReg(Reg.R, 0, Reg.S);
- G2P.insert(std::make_pair(Reg, Register(NewPR)));
- LLVM_DEBUG(dbgs() << " -> !" << PrintRegister(Register(NewPR), *TRI)
+ G2P.insert(std::make_pair(Reg, RegisterSubReg(NewPR)));
+ LLVM_DEBUG(dbgs() << " -> !" << PrintRegister(RegisterSubReg(NewPR), *TRI)
<< '\n');
- return Register(NewPR);
+ return RegisterSubReg(NewPR);
}
llvm_unreachable("Invalid argument");
@@ -317,12 +318,12 @@ bool HexagonGenPredicate::isScalarCmp(unsigned Opc) {
return false;
}
-bool HexagonGenPredicate::isScalarPred(Register PredReg) {
- std::queue<Register> WorkQ;
+bool HexagonGenPredicate::isScalarPred(RegisterSubReg PredReg) {
+ std::queue<RegisterSubReg> WorkQ;
WorkQ.push(PredReg);
while (!WorkQ.empty()) {
- Register PR = WorkQ.front();
+ RegisterSubReg PR = WorkQ.front();
WorkQ.pop();
const MachineInstr *DefI = MRI->getVRegDef(PR.R);
if (!DefI)
@@ -351,7 +352,7 @@ bool HexagonGenPredicate::isScalarPred(Register PredReg) {
// Add operands to the queue.
for (const MachineOperand &MO : DefI->operands())
if (MO.isReg() && MO.isUse())
- WorkQ.push(Register(MO.getReg()));
+ WorkQ.push(RegisterSubReg(MO.getReg()));
break;
// All non-vector compares are ok, everything else is bad.
@@ -373,7 +374,7 @@ bool HexagonGenPredicate::convertToPredForm(MachineInstr *MI) {
MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg() || !MO.isUse())
continue;
- Register Reg(MO);
+ RegisterSubReg Reg(MO);
if (Reg.S && Reg.S != Hexagon::isub_lo)
return false;
if (!PredGPRs.count(Reg))
@@ -400,7 +401,7 @@ bool HexagonGenPredicate::convertToPredForm(MachineInstr *MI) {
// If it's a scalar predicate register, then all bits in it are
// the same. Otherwise, to determine whether all bits are 0 or not
// we would need to use any8.
- Register PR = getPredRegFor(MI->getOperand(1));
+ RegisterSubReg PR = getPredRegFor(MI->getOperand(1));
if (!isScalarPred(PR))
return false;
// This will skip the immediate argument when creating the predicate
@@ -411,19 +412,19 @@ bool HexagonGenPredicate::convertToPredForm(MachineInstr *MI) {
// Some sanity: check that def is in operand #0.
MachineOperand &Op0 = MI->getOperand(0);
assert(Op0.isDef());
- Register OutR(Op0);
+ RegisterSubReg OutR(Op0);
// Don't use getPredRegFor, since it will create an association between
// the argument and a created predicate register (i.e. it will insert a
// copy if a new predicate register is created).
const TargetRegisterClass *PredRC = &Hexagon::PredRegsRegClass;
- Register NewPR = MRI->createVirtualRegister(PredRC);
+ RegisterSubReg NewPR = MRI->createVirtualRegister(PredRC);
MachineInstrBuilder MIB = BuildMI(B, MI, DL, TII->get(NewOpc), NewPR.R);
// Add predicate counterparts of the GPRs.
for (unsigned i = 1; i < NumOps; ++i) {
- Register GPR = MI->getOperand(i);
- Register Pred = getPredRegFor(GPR);
+ RegisterSubReg GPR = MI->getOperand(i);
+ RegisterSubReg Pred = getPredRegFor(GPR);
MIB.addReg(Pred.R, 0, Pred.S);
}
LLVM_DEBUG(dbgs() << "generated: " << *MIB);
@@ -441,7 +442,7 @@ bool HexagonGenPredicate::convertToPredForm(MachineInstr *MI) {
// then the output will be a predicate register. Do not visit the
// users of it.
if (!isPredReg(NewOutR)) {
- Register R(NewOutR);
+ RegisterSubReg R(NewOutR);
PredGPRs.insert(R);
processPredicateGPR(R);
}
@@ -468,8 +469,8 @@ bool HexagonGenPredicate::eliminatePredCopies(MachineFunction &MF) {
for (MachineInstr &MI : MBB) {
if (MI.getOpcode() != TargetOpcode::COPY)
continue;
- Register DR = MI.getOperand(0);
- Register SR = MI.getOperand(1);
+ RegisterSubReg DR = MI.getOperand(0);
+ RegisterSubReg SR = MI.getOperand(1);
if (!TargetRegisterInfo::isVirtualRegister(DR.R))
continue;
if (!TargetRegisterInfo::isVirtualRegister(SR.R))
diff --git a/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
index 239cf49ca8a2..cecbaedb6d70 100644
--- a/lib/Target/Hexagon/HexagonHardwareLoops.cpp
+++ b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
@@ -1,9 +1,8 @@
//===- HexagonHardwareLoops.cpp - Identify and generate hardware loops ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Hexagon/HexagonHazardRecognizer.cpp b/lib/Target/Hexagon/HexagonHazardRecognizer.cpp
index 44f1f554c662..e45126bec6ef 100644
--- a/lib/Target/Hexagon/HexagonHazardRecognizer.cpp
+++ b/lib/Target/Hexagon/HexagonHazardRecognizer.cpp
@@ -1,9 +1,8 @@
//===-- HexagonHazardRecognizer.cpp - Hexagon Post RA Hazard Recognizer ---===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Hexagon/HexagonHazardRecognizer.h b/lib/Target/Hexagon/HexagonHazardRecognizer.h
index 2874d73ce819..53b9cb43b4b6 100644
--- a/lib/Target/Hexagon/HexagonHazardRecognizer.h
+++ b/lib/Target/Hexagon/HexagonHazardRecognizer.h
@@ -1,9 +1,8 @@
//===--- HexagonHazardRecognizer.h - Hexagon Post RA Hazard Recognizer ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// This file defines the hazard recognizer for scheduling on Hexagon.
diff --git a/lib/Target/Hexagon/HexagonIICHVX.td b/lib/Target/Hexagon/HexagonIICHVX.td
index a804c5a80d03..06e9c83cf306 100644
--- a/lib/Target/Hexagon/HexagonIICHVX.td
+++ b/lib/Target/Hexagon/HexagonIICHVX.td
@@ -1,9 +1,8 @@
//===--- HexagonIICHVX.td -------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -17,12 +16,14 @@ class HVXItin {
InstrStage<1, [CVI_XLANE,CVI_SHIFT, CVI_MPY0, CVI_MPY1]>],
[9, 7, 7, 7], [HVX_FWD, HVX_FWD, HVX_FWD]>,
- // Used by Gather Pseudo Instructions which are expanded into
- // V6_vgather* and V6_vS32b_new_ai. Even though these instructions
- // use CVI_ST resource, it's not included below to avoid having more than
- // 4 InstrStages and thus changing 'MaxResTerms' to 5.
+ // Used by gather pseudo-instructions which are expanded into V6_vgather*
+ // and V6_vS32b_new_ai. Even though these instructions use CVI_LD resource,
+ // it's not included below to avoid having more than 4 InstrStages and
+ // thus changing 'MaxResTerms' to 5. Instead, both SLOT0 and SLOT1 are
+ // used, which should be sufficient.
InstrItinData <CVI_GATHER_PSEUDO,
[InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_LD], 0>, InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_ST], 0>,
InstrStage<1, [CVI_MPY01, CVI_XLSHF]>]>];
}
diff --git a/lib/Target/Hexagon/HexagonIICScalar.td b/lib/Target/Hexagon/HexagonIICScalar.td
index 5fe713346e38..d37cc3a2cc3e 100644
--- a/lib/Target/Hexagon/HexagonIICScalar.td
+++ b/lib/Target/Hexagon/HexagonIICScalar.td
@@ -1,9 +1,8 @@
//===--- HexagonIICScalar.td ----------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
index 470b05bda4c6..605fcfc25559 100644
--- a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
+++ b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@@ -1,9 +1,8 @@
//===-- HexagonISelDAGToDAG.cpp - A dag to dag inst selector for Hexagon --===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -849,6 +848,9 @@ void HexagonDAGToDAGISel::SelectD2P(SDNode *N) {
void HexagonDAGToDAGISel::SelectV2Q(SDNode *N) {
const SDLoc &dl(N);
MVT ResTy = N->getValueType(0).getSimpleVT();
+ // The argument to V2Q should be a single vector.
+ MVT OpTy = N->getOperand(0).getValueType().getSimpleVT(); (void)OpTy;
+ assert(HST->getVectorLength() * 8 == OpTy.getSizeInBits());
SDValue C = CurDAG->getTargetConstant(-1, dl, MVT::i32);
SDNode *R = CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, C);
@@ -860,6 +862,8 @@ void HexagonDAGToDAGISel::SelectV2Q(SDNode *N) {
void HexagonDAGToDAGISel::SelectQ2V(SDNode *N) {
const SDLoc &dl(N);
MVT ResTy = N->getValueType(0).getSimpleVT();
+ // The result of V2Q should be a single vector.
+ assert(HST->getVectorLength() * 8 == ResTy.getSizeInBits());
SDValue C = CurDAG->getTargetConstant(-1, dl, MVT::i32);
SDNode *R = CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, C);
diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAG.h b/lib/Target/Hexagon/HexagonISelDAGToDAG.h
index f4f09dd4e758..65edb09603b3 100644
--- a/lib/Target/Hexagon/HexagonISelDAGToDAG.h
+++ b/lib/Target/Hexagon/HexagonISelDAGToDAG.h
@@ -1,9 +1,8 @@
//===-- HexagonISelDAGToDAG.h -----------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// Hexagon specific code to select Hexagon machine instructions for
diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
index b796e442d4fa..e7f1c345af1d 100644
--- a/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
+++ b/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
@@ -1,9 +1,8 @@
//===-- HexagonISelDAGToDAGHVX.cpp ----------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp
index 1edf3e498dfa..fef5a98cdb00 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -1,9 +1,8 @@
//===-- HexagonISelLowering.cpp - Hexagon DAG Lowering Implementation -----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -579,7 +578,8 @@ HexagonTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
unsigned LR = HRI.getRARegister();
- if (Op.getOpcode() != ISD::INLINEASM || HMFI.hasClobberLR())
+ if ((Op.getOpcode() != ISD::INLINEASM &&
+ Op.getOpcode() != ISD::INLINEASM_BR) || HMFI.hasClobberLR())
return Op;
unsigned NumOps = Op.getNumOperands();
@@ -1292,6 +1292,7 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
setOperationAction(ISD::INLINEASM, MVT::Other, Custom);
+ setOperationAction(ISD::INLINEASM_BR, MVT::Other, Custom);
setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
@@ -1324,7 +1325,7 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
if (EmitJumpTables)
setMinimumJumpTableEntries(MinimumJumpTables);
else
- setMinimumJumpTableEntries(std::numeric_limits<int>::max());
+ setMinimumJumpTableEntries(std::numeric_limits<unsigned>::max());
setOperationAction(ISD::BR_JT, MVT::Other, Expand);
setOperationAction(ISD::ABS, MVT::i32, Legal);
@@ -1333,8 +1334,8 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
// Hexagon has A4_addp_c and A4_subp_c that take and generate a carry bit,
// but they only operate on i64.
for (MVT VT : MVT::integer_valuetypes()) {
- setOperationAction(ISD::UADDO, VT, Expand);
- setOperationAction(ISD::USUBO, VT, Expand);
+ setOperationAction(ISD::UADDO, VT, Custom);
+ setOperationAction(ISD::USUBO, VT, Custom);
setOperationAction(ISD::SADDO, VT, Expand);
setOperationAction(ISD::SSUBO, VT, Expand);
setOperationAction(ISD::ADDCARRY, VT, Expand);
@@ -2619,7 +2620,6 @@ HexagonTargetLowering::LowerUnalignedLoad(SDValue Op, SelectionDAG &DAG)
const SDLoc &dl(Op);
const DataLayout &DL = DAG.getDataLayout();
LLVMContext &Ctx = *DAG.getContext();
- unsigned AS = LN->getAddressSpace();
// If the load aligning is disabled or the load can be broken up into two
// smaller legal loads, do the default (target-independent) expansion.
@@ -2629,15 +2629,15 @@ HexagonTargetLowering::LowerUnalignedLoad(SDValue Op, SelectionDAG &DAG)
DoDefault = true;
if (!AlignLoads) {
- if (allowsMemoryAccess(Ctx, DL, LN->getMemoryVT(), AS, HaveAlign))
+ if (allowsMemoryAccess(Ctx, DL, LN->getMemoryVT(), *LN->getMemOperand()))
return Op;
DoDefault = true;
}
- if (!DoDefault && 2*HaveAlign == NeedAlign) {
+ if (!DoDefault && (2 * HaveAlign) == NeedAlign) {
// The PartTy is the equivalent of "getLoadableTypeOfSize(HaveAlign)".
- MVT PartTy = HaveAlign <= 8 ? MVT::getIntegerVT(8*HaveAlign)
+ MVT PartTy = HaveAlign <= 8 ? MVT::getIntegerVT(8 * HaveAlign)
: MVT::getVectorVT(MVT::i8, HaveAlign);
- DoDefault = allowsMemoryAccess(Ctx, DL, PartTy, AS, HaveAlign);
+ DoDefault = allowsMemoryAccess(Ctx, DL, PartTy, *LN->getMemOperand());
}
if (DoDefault) {
std::pair<SDValue, SDValue> P = expandUnalignedLoad(LN, DAG);
@@ -2692,6 +2692,43 @@ HexagonTargetLowering::LowerUnalignedLoad(SDValue Op, SelectionDAG &DAG)
}
SDValue
+HexagonTargetLowering::LowerUAddSubO(SDValue Op, SelectionDAG &DAG) const {
+ SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
+ auto *CY = dyn_cast<ConstantSDNode>(Y);
+ if (!CY)
+ return SDValue();
+
+ const SDLoc &dl(Op);
+ SDVTList VTs = Op.getNode()->getVTList();
+ assert(VTs.NumVTs == 2);
+ assert(VTs.VTs[1] == MVT::i1);
+ unsigned Opc = Op.getOpcode();
+
+ if (CY) {
+ uint32_t VY = CY->getZExtValue();
+ assert(VY != 0 && "This should have been folded");
+ // X +/- 1
+ if (VY != 1)
+ return SDValue();
+
+ if (Opc == ISD::UADDO) {
+ SDValue Op = DAG.getNode(ISD::ADD, dl, VTs.VTs[0], {X, Y});
+ SDValue Ov = DAG.getSetCC(dl, MVT::i1, Op, getZero(dl, ty(Op), DAG),
+ ISD::SETEQ);
+ return DAG.getMergeValues({Op, Ov}, dl);
+ }
+ if (Opc == ISD::USUBO) {
+ SDValue Op = DAG.getNode(ISD::SUB, dl, VTs.VTs[0], {X, Y});
+ SDValue Ov = DAG.getSetCC(dl, MVT::i1, Op,
+ DAG.getConstant(-1, dl, ty(Op)), ISD::SETEQ);
+ return DAG.getMergeValues({Op, Ov}, dl);
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue
HexagonTargetLowering::LowerAddSubCarry(SDValue Op, SelectionDAG &DAG) const {
const SDLoc &dl(Op);
unsigned Opc = Op.getOpcode();
@@ -2741,7 +2778,7 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
unsigned Opc = Op.getOpcode();
// Handle INLINEASM first.
- if (Opc == ISD::INLINEASM)
+ if (Opc == ISD::INLINEASM || Opc == ISD::INLINEASM_BR)
return LowerINLINEASM(Op, DAG);
if (isHvxOperation(Op)) {
@@ -2768,6 +2805,8 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::BITCAST: return LowerBITCAST(Op, DAG);
case ISD::LOAD: return LowerLoad(Op, DAG);
case ISD::STORE: return LowerStore(Op, DAG);
+ case ISD::UADDO:
+ case ISD::USUBO: return LowerUAddSubO(Op, DAG);
case ISD::ADDCARRY:
case ISD::SUBCARRY: return LowerAddSubCarry(Op, DAG);
case ISD::SRA:
@@ -2923,7 +2962,8 @@ HexagonTargetLowering::getRegForInlineAsmConstraint(
/// isFPImmLegal - Returns true if the target can instruction select the
/// specified FP immediate natively. If false, the legalizer will
/// materialize the FP immediate as a load from a constant pool.
-bool HexagonTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+bool HexagonTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
+ bool ForCodeSize) const {
return true;
}
@@ -3047,7 +3087,7 @@ bool HexagonTargetLowering::IsEligibleForTailCallOptimization(
/// determined using generic target-independent logic.
EVT HexagonTargetLowering::getOptimalMemOpType(uint64_t Size,
unsigned DstAlign, unsigned SrcAlign, bool IsMemset, bool ZeroMemset,
- bool MemcpyStrSrc, MachineFunction &MF) const {
+ bool MemcpyStrSrc, const AttributeList &FuncAttributes) const {
auto Aligned = [](unsigned GivenA, unsigned MinA) -> bool {
return (GivenA % MinA) == 0;
@@ -3063,8 +3103,9 @@ EVT HexagonTargetLowering::getOptimalMemOpType(uint64_t Size,
return MVT::Other;
}
-bool HexagonTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
- unsigned AS, unsigned Align, bool *Fast) const {
+bool HexagonTargetLowering::allowsMisalignedMemoryAccesses(
+ EVT VT, unsigned AS, unsigned Align, MachineMemOperand::Flags Flags,
+ bool *Fast) const {
if (Fast)
*Fast = false;
return Subtarget.isHVXVectorType(VT.getSimpleVT());
@@ -3111,13 +3152,21 @@ Value *HexagonTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
AtomicOrdering Ord) const {
BasicBlock *BB = Builder.GetInsertBlock();
Module *M = BB->getParent()->getParent();
- Type *Ty = cast<PointerType>(Addr->getType())->getElementType();
+ auto PT = cast<PointerType>(Addr->getType());
+ Type *Ty = PT->getElementType();
unsigned SZ = Ty->getPrimitiveSizeInBits();
assert((SZ == 32 || SZ == 64) && "Only 32/64-bit atomic loads supported");
Intrinsic::ID IntID = (SZ == 32) ? Intrinsic::hexagon_L2_loadw_locked
: Intrinsic::hexagon_L4_loadd_locked;
- Value *Fn = Intrinsic::getDeclaration(M, IntID);
- return Builder.CreateCall(Fn, Addr, "larx");
+ Function *Fn = Intrinsic::getDeclaration(M, IntID);
+
+ PointerType *NewPtrTy
+ = Builder.getIntNTy(SZ)->getPointerTo(PT->getAddressSpace());
+ Addr = Builder.CreateBitCast(Addr, NewPtrTy);
+
+ Value *Call = Builder.CreateCall(Fn, Addr, "larx");
+
+ return Builder.CreateBitCast(Call, Ty);
}
/// Perform a store-conditional operation to Addr. Return the status of the
@@ -3128,10 +3177,17 @@ Value *HexagonTargetLowering::emitStoreConditional(IRBuilder<> &Builder,
Module *M = BB->getParent()->getParent();
Type *Ty = Val->getType();
unsigned SZ = Ty->getPrimitiveSizeInBits();
+
+ Type *CastTy = Builder.getIntNTy(SZ);
assert((SZ == 32 || SZ == 64) && "Only 32/64-bit atomic stores supported");
Intrinsic::ID IntID = (SZ == 32) ? Intrinsic::hexagon_S2_storew_locked
: Intrinsic::hexagon_S4_stored_locked;
- Value *Fn = Intrinsic::getDeclaration(M, IntID);
+ Function *Fn = Intrinsic::getDeclaration(M, IntID);
+
+ unsigned AS = Addr->getType()->getPointerAddressSpace();
+ Addr = Builder.CreateBitCast(Addr, CastTy->getPointerTo(AS));
+ Val = Builder.CreateBitCast(Val, CastTy);
+
Value *Call = Builder.CreateCall(Fn, {Addr, Val}, "stcx");
Value *Cmp = Builder.CreateICmpEQ(Call, Builder.getInt32(0), "");
Value *Ext = Builder.CreateZExt(Cmp, Type::getInt32Ty(M->getContext()));
diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h
index 265c37e6ae61..4e467cb22727 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/lib/Target/Hexagon/HexagonISelLowering.h
@@ -1,9 +1,8 @@
//===-- HexagonISelLowering.h - Hexagon DAG Lowering Interface --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -168,6 +167,7 @@ namespace HexagonISD {
SDValue LowerLoad(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerStore(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerUnalignedLoad(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerUAddSubO(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerAddSubCarry(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
@@ -285,7 +285,8 @@ namespace HexagonISD {
/// is legal. It is frequently not legal in PIC relocation models.
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
- bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
+ bool isFPImmLegal(const APFloat &Imm, EVT VT,
+ bool ForCodeSize) const override;
/// isLegalICmpImmediate - Return true if the specified immediate is legal
/// icmp immediate, that is the target has icmp instructions which can
@@ -295,10 +296,10 @@ namespace HexagonISD {
EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
unsigned SrcAlign, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
- MachineFunction &MF) const override;
+ const AttributeList &FuncAttributes) const override;
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace,
- unsigned Align, bool *Fast) const override;
+ unsigned Align, MachineMemOperand::Flags Flags, bool *Fast) const override;
/// Returns relocation base for the given PIC jumptable.
SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG)
diff --git a/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
index a6400b5d8266..345c657787a0 100644
--- a/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
+++ b/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
@@ -1,9 +1,8 @@
//===-- HexagonISelLoweringHVX.cpp --- Lowering HVX operations ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -1542,6 +1541,8 @@ HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SRL:
case ISD::SETCC:
case ISD::VSELECT:
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
case ISD::SIGN_EXTEND_INREG:
return SplitHvxPairOp(Op, DAG);
}
diff --git a/lib/Target/Hexagon/HexagonInstrFormats.td b/lib/Target/Hexagon/HexagonInstrFormats.td
index 2236140d5dd7..f156de671059 100644
--- a/lib/Target/Hexagon/HexagonInstrFormats.td
+++ b/lib/Target/Hexagon/HexagonInstrFormats.td
@@ -1,9 +1,8 @@
//==- HexagonInstrFormats.td - Hexagon Instruction Formats --*- tablegen -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/HexagonInstrFormatsV5.td b/lib/Target/Hexagon/HexagonInstrFormatsV5.td
index c8de5cbcc1e0..68ef2d2d3a8a 100644
--- a/lib/Target/Hexagon/HexagonInstrFormatsV5.td
+++ b/lib/Target/Hexagon/HexagonInstrFormatsV5.td
@@ -1,9 +1,8 @@
//==- HexagonInstrFormatsV5.td - Hexagon Instruction Formats --*- tablegen -==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Hexagon/HexagonInstrFormatsV60.td b/lib/Target/Hexagon/HexagonInstrFormatsV60.td
index 1347a655353f..86a82183a1ad 100644
--- a/lib/Target/Hexagon/HexagonInstrFormatsV60.td
+++ b/lib/Target/Hexagon/HexagonInstrFormatsV60.td
@@ -1,9 +1,8 @@
//==- HexagonInstrFormatsV60.td - Hexagon Instruction Formats -*- tablegen -==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Hexagon/HexagonInstrFormatsV65.td b/lib/Target/Hexagon/HexagonInstrFormatsV65.td
index cddb8777b417..eaecffe9c89e 100644
--- a/lib/Target/Hexagon/HexagonInstrFormatsV65.td
+++ b/lib/Target/Hexagon/HexagonInstrFormatsV65.td
@@ -1,9 +1,8 @@
//==- HexagonInstrFormatsV65.td - Hexagon Instruction Formats -*- tablegen -==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp
index de0d6c4d9e4e..a156de5ba128 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -1,9 +1,8 @@
//===- HexagonInstrInfo.cpp - Hexagon Instruction Information -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -698,11 +697,11 @@ bool HexagonInstrInfo::analyzeLoop(MachineLoop &L,
/// Generate code to reduce the loop iteration by one and check if the loop is
/// finished. Return the value/register of the new loop count. this function
/// assumes the nth iteration is peeled first.
-unsigned HexagonInstrInfo::reduceLoopCount(MachineBasicBlock &MBB,
- MachineInstr *IndVar, MachineInstr &Cmp,
- SmallVectorImpl<MachineOperand> &Cond,
- SmallVectorImpl<MachineInstr *> &PrevInsts,
- unsigned Iter, unsigned MaxIter) const {
+unsigned HexagonInstrInfo::reduceLoopCount(
+ MachineBasicBlock &MBB, MachineBasicBlock &PreHeader, MachineInstr *IndVar,
+ MachineInstr &Cmp, SmallVectorImpl<MachineOperand> &Cond,
+ SmallVectorImpl<MachineInstr *> &PrevInsts, unsigned Iter,
+ unsigned MaxIter) const {
// We expect a hardware loop currently. This means that IndVar is set
// to null, and the compare is the ENDLOOP instruction.
assert((!IndVar) && isEndLoopN(Cmp.getOpcode())
@@ -1314,6 +1313,38 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
return true;
}
+ case Hexagon::PS_crash: {
+ // Generate a misaligned load that is guaranteed to cause a crash.
+ class CrashPseudoSourceValue : public PseudoSourceValue {
+ public:
+ CrashPseudoSourceValue(const TargetInstrInfo &TII)
+ : PseudoSourceValue(TargetCustom, TII) {}
+
+ bool isConstant(const MachineFrameInfo *) const override {
+ return false;
+ }
+ bool isAliased(const MachineFrameInfo *) const override {
+ return false;
+ }
+ bool mayAlias(const MachineFrameInfo *) const override {
+ return false;
+ }
+ void printCustom(raw_ostream &OS) const override {
+ OS << "MisalignedCrash";
+ }
+ };
+
+ static const CrashPseudoSourceValue CrashPSV(*this);
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo(&CrashPSV),
+ MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile, 8, 1);
+ BuildMI(MBB, MI, DL, get(Hexagon::PS_loadrdabs), Hexagon::D13)
+ .addImm(0xBADC0FEE) // Misaligned load.
+ .addMemOperand(MMO);
+ MBB.erase(MI);
+ return true;
+ }
+
case Hexagon::PS_tailcall_i:
MI.setDesc(get(Hexagon::J2_jump));
return true;
@@ -1681,17 +1712,19 @@ bool HexagonInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
/// Hexagon counts the number of ##'s and adjust for that many
/// constant exenders.
unsigned HexagonInstrInfo::getInlineAsmLength(const char *Str,
- const MCAsmInfo &MAI) const {
+ const MCAsmInfo &MAI,
+ const TargetSubtargetInfo *STI) const {
StringRef AStr(Str);
// Count the number of instructions in the asm.
bool atInsnStart = true;
unsigned Length = 0;
+ const unsigned MaxInstLength = MAI.getMaxInstLength(STI);
for (; *Str; ++Str) {
if (*Str == '\n' || strncmp(Str, MAI.getSeparatorString(),
strlen(MAI.getSeparatorString())) == 0)
atInsnStart = true;
if (atInsnStart && !std::isspace(static_cast<unsigned char>(*Str))) {
- Length += MAI.getMaxInstLength();
+ Length += MaxInstLength;
atInsnStart = false;
}
if (atInsnStart && strncmp(Str, MAI.getCommentString().data(),
@@ -1823,7 +1856,8 @@ DFAPacketizer *HexagonInstrInfo::CreateTargetScheduleState(
// S2_storeri_io %r29, 132, killed %r1; flags: mem:ST4[FixedStack1]
// Currently AA considers the addresses in these instructions to be aliasing.
bool HexagonInstrInfo::areMemAccessesTriviallyDisjoint(
- MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA) const {
+ const MachineInstr &MIa, const MachineInstr &MIb,
+ AliasAnalysis *AA) const {
if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() ||
MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef())
return false;
@@ -2425,7 +2459,7 @@ bool HexagonInstrInfo::isPredicated(unsigned Opcode) const {
bool HexagonInstrInfo::isPredicateLate(unsigned Opcode) const {
const uint64_t F = get(Opcode).TSFlags;
- return ~(F >> HexagonII::PredicateLatePos) & HexagonII::PredicateLateMask;
+ return (F >> HexagonII::PredicateLatePos) & HexagonII::PredicateLateMask;
}
bool HexagonInstrInfo::isPredictedTaken(unsigned Opcode) const {
@@ -2894,7 +2928,7 @@ bool HexagonInstrInfo::addLatencyToSchedule(const MachineInstr &MI1,
/// Get the base register and byte offset of a load/store instr.
bool HexagonInstrInfo::getMemOperandWithOffset(
- MachineInstr &LdSt, MachineOperand *&BaseOp, int64_t &Offset,
+ const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
const TargetRegisterInfo *TRI) const {
unsigned AccessSize = 0;
BaseOp = getBaseAndOffset(LdSt, Offset, AccessSize);
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h
index 9b840762e88a..e0a999d0f4c4 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -1,9 +1,8 @@
//===- HexagonInstrInfo.h - Hexagon Instruction Information -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -140,7 +139,7 @@ public:
/// is finished. Return the value/register of the new loop count. We need
/// this function when peeling off one or more iterations of a loop. This
/// function assumes the nth iteration is peeled first.
- unsigned reduceLoopCount(MachineBasicBlock &MBB,
+ unsigned reduceLoopCount(MachineBasicBlock &MBB, MachineBasicBlock &PreHeader,
MachineInstr *IndVar, MachineInstr &Cmp,
SmallVectorImpl<MachineOperand> &Cond,
SmallVectorImpl<MachineInstr *> &PrevInsts,
@@ -216,7 +215,8 @@ public:
bool expandPostRAPseudo(MachineInstr &MI) const override;
/// Get the base register and byte offset of a load/store instr.
- bool getMemOperandWithOffset(MachineInstr &LdSt, MachineOperand *&BaseOp,
+ bool getMemOperandWithOffset(const MachineInstr &LdSt,
+ const MachineOperand *&BaseOp,
int64_t &Offset,
const TargetRegisterInfo *TRI) const override;
@@ -264,8 +264,10 @@ public:
/// Measure the specified inline asm to determine an approximation of its
/// length.
- unsigned getInlineAsmLength(const char *Str,
- const MCAsmInfo &MAI) const override;
+ unsigned getInlineAsmLength(
+ const char *Str,
+ const MCAsmInfo &MAI,
+ const TargetSubtargetInfo *STI = nullptr) const override;
/// Allocate and return a hazard recognizer to use for this target when
/// scheduling the machine instructions after register allocation.
@@ -296,7 +298,8 @@ public:
// memory addresses. This function returns true if two MIs access different
// memory addresses and false otherwise.
bool
- areMemAccessesTriviallyDisjoint(MachineInstr &MIa, MachineInstr &MIb,
+ areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
+ const MachineInstr &MIb,
AliasAnalysis *AA = nullptr) const override;
/// For instructions with a base and offset, return the position of the
diff --git a/lib/Target/Hexagon/HexagonIntrinsics.td b/lib/Target/Hexagon/HexagonIntrinsics.td
index 9cab5748bef2..cabfd783effa 100644
--- a/lib/Target/Hexagon/HexagonIntrinsics.td
+++ b/lib/Target/Hexagon/HexagonIntrinsics.td
@@ -1,9 +1,8 @@
//===-- HexagonIntrinsics.td - Instruction intrinsics ------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/HexagonIntrinsicsV5.td b/lib/Target/Hexagon/HexagonIntrinsicsV5.td
index a852394f2160..44f39a3e9b16 100644
--- a/lib/Target/Hexagon/HexagonIntrinsicsV5.td
+++ b/lib/Target/Hexagon/HexagonIntrinsicsV5.td
@@ -1,9 +1,8 @@
//===- HexagonIntrinsicsV5.td - V5 Instruction intrinsics --*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/HexagonIntrinsicsV60.td b/lib/Target/Hexagon/HexagonIntrinsicsV60.td
index 5e5c77b38e8e..a60c80beb5d6 100644
--- a/lib/Target/Hexagon/HexagonIntrinsicsV60.td
+++ b/lib/Target/Hexagon/HexagonIntrinsicsV60.td
@@ -1,9 +1,8 @@
//=- HexagonIntrinsicsV60.td - Target Description for Hexagon -*- tablegen *-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp b/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
index 985f41f3a7d9..ac48e1dc30b0 100644
--- a/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
+++ b/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
@@ -1,9 +1,8 @@
//===- HexagonLoopIdiomRecognition.cpp ------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -1001,6 +1000,7 @@ bool PolynomialMultiplyRecognize::isPromotableTo(Value *Val,
void PolynomialMultiplyRecognize::promoteTo(Instruction *In,
IntegerType *DestTy, BasicBlock *LoopB) {
Type *OrigTy = In->getType();
+ assert(!OrigTy->isVoidTy() && "Invalid instruction to promote");
// Leave boolean values alone.
if (!In->getType()->isIntegerTy(1))
@@ -1081,7 +1081,8 @@ bool PolynomialMultiplyRecognize::promoteTypes(BasicBlock *LoopB,
std::transform(LoopB->begin(), LoopB->end(), std::back_inserter(LoopIns),
[](Instruction &In) { return &In; });
for (Instruction *In : LoopIns)
- promoteTo(In, DestTy, LoopB);
+ if (!In->isTerminator())
+ promoteTo(In, DestTy, LoopB);
// Fix up the PHI nodes in the exit block.
Instruction *EndI = ExitB->getFirstNonPHI();
@@ -1522,7 +1523,7 @@ Value *PolynomialMultiplyRecognize::generate(BasicBlock::iterator At,
ParsedValues &PV) {
IRBuilder<> B(&*At);
Module *M = At->getParent()->getParent()->getParent();
- Value *PMF = Intrinsic::getDeclaration(M, Intrinsic::hexagon_M4_pmpyw);
+ Function *PMF = Intrinsic::getDeclaration(M, Intrinsic::hexagon_M4_pmpyw);
Value *P = PV.P, *Q = PV.Q, *P0 = P;
unsigned IC = PV.IterCount;
@@ -2252,10 +2253,8 @@ CleanupAndExit:
Type *Int32PtrTy = Type::getInt32PtrTy(Ctx);
Type *VoidTy = Type::getVoidTy(Ctx);
Module *M = Func->getParent();
- Constant *CF = M->getOrInsertFunction(HexagonVolatileMemcpyName, VoidTy,
- Int32PtrTy, Int32PtrTy, Int32Ty);
- Function *Fn = cast<Function>(CF);
- Fn->setLinkage(Function::ExternalLinkage);
+ FunctionCallee Fn = M->getOrInsertFunction(
+ HexagonVolatileMemcpyName, VoidTy, Int32PtrTy, Int32PtrTy, Int32Ty);
const SCEV *OneS = SE->getConstant(Int32Ty, 1);
const SCEV *BECount32 = SE->getTruncateOrZeroExtend(BECount, Int32Ty);
diff --git a/lib/Target/Hexagon/HexagonMCInstLower.cpp b/lib/Target/Hexagon/HexagonMCInstLower.cpp
index fb5752ade1de..d1a153920e5e 100644
--- a/lib/Target/Hexagon/HexagonMCInstLower.cpp
+++ b/lib/Target/Hexagon/HexagonMCInstLower.cpp
@@ -1,9 +1,8 @@
//===- HexagonMCInstLower.cpp - Convert Hexagon MachineInstr to an MCInst -===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Hexagon/HexagonMachineFunctionInfo.cpp b/lib/Target/Hexagon/HexagonMachineFunctionInfo.cpp
index 9579c8b6df16..aabae009d7c3 100644
--- a/lib/Target/Hexagon/HexagonMachineFunctionInfo.cpp
+++ b/lib/Target/Hexagon/HexagonMachineFunctionInfo.cpp
@@ -1,9 +1,8 @@
//= HexagonMachineFunctionInfo.cpp - Hexagon machine function info *- C++ -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/HexagonMachineFunctionInfo.h b/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
index d83bcbc41553..2961e16cc9dc 100644
--- a/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
+++ b/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
@@ -1,9 +1,8 @@
//=- HexagonMachineFunctionInfo.h - Hexagon machine function info -*- C++ -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/lib/Target/Hexagon/HexagonMachineScheduler.cpp
index 908ce24136c7..0e6555024303 100644
--- a/lib/Target/Hexagon/HexagonMachineScheduler.cpp
+++ b/lib/Target/Hexagon/HexagonMachineScheduler.cpp
@@ -1,9 +1,8 @@
//===- HexagonMachineScheduler.cpp - MI Scheduler for Hexagon -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -113,6 +112,7 @@ bool VLIWResourceModel::isResourceAvailable(SUnit *SU, bool IsTop) {
case TargetOpcode::IMPLICIT_DEF:
case TargetOpcode::COPY:
case TargetOpcode::INLINEASM:
+ case TargetOpcode::INLINEASM_BR:
break;
}
@@ -168,6 +168,7 @@ bool VLIWResourceModel::reserveResources(SUnit *SU, bool IsTop) {
case TargetOpcode::EH_LABEL:
case TargetOpcode::COPY:
case TargetOpcode::INLINEASM:
+ case TargetOpcode::INLINEASM_BR:
break;
}
Packet.push_back(SU);
diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.h b/lib/Target/Hexagon/HexagonMachineScheduler.h
index 585a7858ad2b..fb0a7abd339b 100644
--- a/lib/Target/Hexagon/HexagonMachineScheduler.h
+++ b/lib/Target/Hexagon/HexagonMachineScheduler.h
@@ -1,9 +1,8 @@
//===- HexagonMachineScheduler.h - Custom Hexagon MI scheduler --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Hexagon/HexagonMapAsm2IntrinV62.gen.td b/lib/Target/Hexagon/HexagonMapAsm2IntrinV62.gen.td
index b7b0de0efaea..2fcefe6a4ef6 100644
--- a/lib/Target/Hexagon/HexagonMapAsm2IntrinV62.gen.td
+++ b/lib/Target/Hexagon/HexagonMapAsm2IntrinV62.gen.td
@@ -1,9 +1,8 @@
//===--- HexagonMapAsm2IntrinV62.gen.td -----------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/HexagonMapAsm2IntrinV65.gen.td b/lib/Target/Hexagon/HexagonMapAsm2IntrinV65.gen.td
index c29a75e6fe74..7293075532c6 100644
--- a/lib/Target/Hexagon/HexagonMapAsm2IntrinV65.gen.td
+++ b/lib/Target/Hexagon/HexagonMapAsm2IntrinV65.gen.td
@@ -1,9 +1,8 @@
//===--- HexagonMapAsm2IntrinV65.gen.td -----------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/HexagonNewValueJump.cpp b/lib/Target/Hexagon/HexagonNewValueJump.cpp
index f2a6627c99be..db44901ca706 100644
--- a/lib/Target/Hexagon/HexagonNewValueJump.cpp
+++ b/lib/Target/Hexagon/HexagonNewValueJump.cpp
@@ -1,9 +1,8 @@
//===- HexagonNewValueJump.cpp - Hexagon Backend New Value Jump -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Hexagon/HexagonOperands.td b/lib/Target/Hexagon/HexagonOperands.td
index 232946ec1579..212cf03bee67 100644
--- a/lib/Target/Hexagon/HexagonOperands.td
+++ b/lib/Target/Hexagon/HexagonOperands.td
@@ -1,9 +1,8 @@
//===--- HexagonOperands.td -----------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/HexagonOptAddrMode.cpp b/lib/Target/Hexagon/HexagonOptAddrMode.cpp
index c3a5bd5d57bf..547da9fd598f 100644
--- a/lib/Target/Hexagon/HexagonOptAddrMode.cpp
+++ b/lib/Target/Hexagon/HexagonOptAddrMode.cpp
@@ -1,9 +1,8 @@
//===- HexagonOptAddrMode.cpp ---------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// This implements a Hexagon-specific pass to optimize addressing mode for
diff --git a/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp b/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp
index 101de3d8fbee..d00fc23102a5 100644
--- a/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp
+++ b/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp
@@ -1,9 +1,8 @@
//===- HexagonOptimizeSZextends.cpp - Remove unnecessary argument extends -===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Hexagon/HexagonPatterns.td b/lib/Target/Hexagon/HexagonPatterns.td
index 89177564057e..fb731f56bfbf 100644
--- a/lib/Target/Hexagon/HexagonPatterns.td
+++ b/lib/Target/Hexagon/HexagonPatterns.td
@@ -1,9 +1,8 @@
//==- HexagonPatterns.td - Target Description for Hexagon -*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -279,7 +278,7 @@ class Su_ni1<PatFrag Op>
if (hasOneUse(N)){
// Check if Op1 is an immediate operand.
SDValue Op1 = N->getOperand(1);
- return !dyn_cast<ConstantSDNode>(Op1);
+ return !isa<ConstantSDNode>(Op1);
}
return false;}],
Op.OperandTransform>;
@@ -3082,7 +3081,7 @@ def: Pat<(HexagonALLOCA I32:$Rs, (i32 imm:$A)),
def HexagonBARRIER: SDNode<"HexagonISD::BARRIER", SDTNone, [SDNPHasChain]>;
def: Pat<(HexagonBARRIER), (Y2_barrier)>;
-def: Pat<(trap), (J2_trap0 (i32 0))>;
+def: Pat<(trap), (PS_crash)>;
// Read cycle counter.
def SDTInt64Leaf: SDTypeProfile<1, 0, [SDTCisVT<0, i64>]>;
diff --git a/lib/Target/Hexagon/HexagonPatternsV65.td b/lib/Target/Hexagon/HexagonPatternsV65.td
index 50b76847b563..4cd45ecbe1a1 100644
--- a/lib/Target/Hexagon/HexagonPatternsV65.td
+++ b/lib/Target/Hexagon/HexagonPatternsV65.td
@@ -1,9 +1,8 @@
//==- HexagonPatternsV65.td -------------------------------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/HexagonPeephole.cpp b/lib/Target/Hexagon/HexagonPeephole.cpp
index 3c588a89b0da..8f761d2d4805 100644
--- a/lib/Target/Hexagon/HexagonPeephole.cpp
+++ b/lib/Target/Hexagon/HexagonPeephole.cpp
@@ -1,9 +1,8 @@
//===-- HexagonPeephole.cpp - Hexagon Peephole Optimiztions ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
// This peephole pass optimizes in the following cases.
// 1. Optimizes redundant sign extends for the following case
diff --git a/lib/Target/Hexagon/HexagonPseudo.td b/lib/Target/Hexagon/HexagonPseudo.td
index b9748c7e189c..7dd25d7d93d5 100644
--- a/lib/Target/Hexagon/HexagonPseudo.td
+++ b/lib/Target/Hexagon/HexagonPseudo.td
@@ -1,9 +1,8 @@
//===--- HexagonPseudo.td -------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -560,3 +559,8 @@ defm PS_storerh : NewCircularStore<IntRegs, HalfWordAccess>;
defm PS_storerf : NewCircularStore<IntRegs, HalfWordAccess>;
defm PS_storeri : NewCircularStore<IntRegs, WordAccess>;
defm PS_storerd : NewCircularStore<DoubleRegs, WordAccess>;
+
+// A pseudo that generates a runtime crash. This is used to implement
+// __builtin_trap.
+let hasSideEffects = 1, isPseudo = 1, isCodeGenOnly = 1, isSolo = 1 in
+def PS_crash: InstHexagon<(outs), (ins), "", [], "", PSEUDO, TypePSEUDO>;
diff --git a/lib/Target/Hexagon/HexagonRDFOpt.cpp b/lib/Target/Hexagon/HexagonRDFOpt.cpp
index 413bc8edf2b6..910a17540e6e 100644
--- a/lib/Target/Hexagon/HexagonRDFOpt.cpp
+++ b/lib/Target/Hexagon/HexagonRDFOpt.cpp
@@ -1,9 +1,8 @@
//===- HexagonRDFOpt.cpp --------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
index 9b8f4e07376f..4f5f750e5842 100644
--- a/lib/Target/Hexagon/HexagonRegisterInfo.cpp
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
@@ -1,9 +1,8 @@
//===-- HexagonRegisterInfo.cpp - Hexagon Register Information ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -287,7 +286,7 @@ unsigned HexagonRegisterInfo::getRARegister() const {
}
-unsigned HexagonRegisterInfo::getFrameRegister(const MachineFunction
+Register HexagonRegisterInfo::getFrameRegister(const MachineFunction
&MF) const {
const HexagonFrameLowering *TFI = getFrameLowering(MF);
if (TFI->hasFP(MF))
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.h b/lib/Target/Hexagon/HexagonRegisterInfo.h
index 3e7b63a462f0..fc166b5a3410 100644
--- a/lib/Target/Hexagon/HexagonRegisterInfo.h
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.h
@@ -1,9 +1,8 @@
//==- HexagonRegisterInfo.h - Hexagon Register Information Impl --*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -67,7 +66,7 @@ public:
// Debug information queries.
unsigned getRARegister() const;
- unsigned getFrameRegister(const MachineFunction &MF) const override;
+ Register getFrameRegister(const MachineFunction &MF) const override;
unsigned getFrameRegister() const;
unsigned getStackRegister() const;
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.td b/lib/Target/Hexagon/HexagonRegisterInfo.td
index da90911e2c05..f12189052699 100644
--- a/lib/Target/Hexagon/HexagonRegisterInfo.td
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.td
@@ -1,9 +1,8 @@
//===-- HexagonRegisterInfo.td - Hexagon Register defs -----*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/HexagonSchedule.td b/lib/Target/Hexagon/HexagonSchedule.td
index 1024198e9b3f..0834e9000460 100644
--- a/lib/Target/Hexagon/HexagonSchedule.td
+++ b/lib/Target/Hexagon/HexagonSchedule.td
@@ -1,9 +1,8 @@
//===- HexagonSchedule.td - Hexagon Scheduling Definitions -*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/HexagonScheduleV5.td b/lib/Target/Hexagon/HexagonScheduleV5.td
index 9a893f6dde02..ba0da2c196ab 100644
--- a/lib/Target/Hexagon/HexagonScheduleV5.td
+++ b/lib/Target/Hexagon/HexagonScheduleV5.td
@@ -1,9 +1,8 @@
//=-HexagonScheduleV5.td - HexagonV5 Scheduling Definitions --*- tablegen -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/HexagonScheduleV55.td b/lib/Target/Hexagon/HexagonScheduleV55.td
index ca738be5d6ef..f88dd5d2056d 100644
--- a/lib/Target/Hexagon/HexagonScheduleV55.td
+++ b/lib/Target/Hexagon/HexagonScheduleV55.td
@@ -1,9 +1,8 @@
//=-HexagonScheduleV55.td - HexagonV55 Scheduling Definitions -*- tablegen -*=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/HexagonScheduleV60.td b/lib/Target/Hexagon/HexagonScheduleV60.td
index 861a8d2b0339..c6539597a9e7 100644
--- a/lib/Target/Hexagon/HexagonScheduleV60.td
+++ b/lib/Target/Hexagon/HexagonScheduleV60.td
@@ -1,9 +1,8 @@
//=-HexagonScheduleV60.td - HexagonV60 Scheduling Definitions *- tablegen -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/HexagonScheduleV62.td b/lib/Target/Hexagon/HexagonScheduleV62.td
index 1c274191277c..782d76760992 100644
--- a/lib/Target/Hexagon/HexagonScheduleV62.td
+++ b/lib/Target/Hexagon/HexagonScheduleV62.td
@@ -1,9 +1,8 @@
//=-HexagonScheduleV62.td - HexagonV62 Scheduling Definitions *- tablegen -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Hexagon/HexagonScheduleV65.td b/lib/Target/Hexagon/HexagonScheduleV65.td
index 46a79d521795..ac64410e559b 100644
--- a/lib/Target/Hexagon/HexagonScheduleV65.td
+++ b/lib/Target/Hexagon/HexagonScheduleV65.td
@@ -1,9 +1,8 @@
//=-HexagonScheduleV65.td - HexagonV65 Scheduling Definitions *- tablegen -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/HexagonScheduleV66.td b/lib/Target/Hexagon/HexagonScheduleV66.td
index 38e3d21d3701..56dc59e2a948 100644
--- a/lib/Target/Hexagon/HexagonScheduleV66.td
+++ b/lib/Target/Hexagon/HexagonScheduleV66.td
@@ -1,9 +1,8 @@
//=-HexagonScheduleV66.td - HexagonV66 Scheduling Definitions *- tablegen -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp b/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp
index 002e87fb32ce..c5ba7ced4c30 100644
--- a/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp
+++ b/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp
@@ -1,9 +1,8 @@
//===-- HexagonSelectionDAGInfo.cpp - Hexagon SelectionDAG Info -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Hexagon/HexagonSelectionDAGInfo.h b/lib/Target/Hexagon/HexagonSelectionDAGInfo.h
index a83a8efb7588..af8b8318b059 100644
--- a/lib/Target/Hexagon/HexagonSelectionDAGInfo.h
+++ b/lib/Target/Hexagon/HexagonSelectionDAGInfo.h
@@ -1,9 +1,8 @@
//===-- HexagonSelectionDAGInfo.h - Hexagon SelectionDAG Info ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp b/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
index 55de25120943..bd4254aea276 100644
--- a/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
+++ b/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
@@ -1,9 +1,8 @@
//=== HexagonSplitConst32AndConst64.cpp - split CONST32/Const64 into HI/LO ===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Hexagon/HexagonSplitDouble.cpp b/lib/Target/Hexagon/HexagonSplitDouble.cpp
index e018785f24d8..013eede2d414 100644
--- a/lib/Target/Hexagon/HexagonSplitDouble.cpp
+++ b/lib/Target/Hexagon/HexagonSplitDouble.cpp
@@ -1,9 +1,8 @@
//===- HexagonSplitDouble.cpp ---------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -153,8 +152,8 @@ bool HexagonSplitDoubleRegs::isInduction(unsigned Reg, LoopRegMap &IRM) const {
}
bool HexagonSplitDoubleRegs::isVolatileInstr(const MachineInstr *MI) const {
- for (auto &I : MI->memoperands())
- if (I->isVolatile())
+ for (auto &MO : MI->memoperands())
+ if (MO->isVolatile() || MO->isAtomic())
return true;
return false;
}
diff --git a/lib/Target/Hexagon/HexagonStoreWidening.cpp b/lib/Target/Hexagon/HexagonStoreWidening.cpp
index 61c2121163b8..b8b61517ff95 100644
--- a/lib/Target/Hexagon/HexagonStoreWidening.cpp
+++ b/lib/Target/Hexagon/HexagonStoreWidening.cpp
@@ -1,9 +1,8 @@
//===- HexagonStoreWidening.cpp -------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// Replace sequences of "narrow" stores to adjacent memory locations with
@@ -338,8 +337,7 @@ bool HexagonStoreWidening::selectStores(InstrGroup::iterator Begin,
return false;
OG.push_back(FirstMI);
- MachineInstr *S1 = FirstMI, *S2 = *(Begin+1);
- InstrGroup::iterator I = Begin+1;
+ MachineInstr *S1 = FirstMI;
// Pow2Num will be the largest number of elements in OG such that the sum
// of sizes of stores 0...Pow2Num-1 will be a power of 2.
@@ -351,8 +349,8 @@ bool HexagonStoreWidening::selectStores(InstrGroup::iterator Begin,
// does not exceed the limit (MaxSize).
// Keep track of when the total size covered is a power of 2, since
// this is a size a single store can cover.
- while (I != End) {
- S2 = *I;
+ for (InstrGroup::iterator I = Begin + 1; I != End; ++I) {
+ MachineInstr *S2 = *I;
// Stores are sorted, so if S1 and S2 are not adjacent, there won't be
// any other store to fill the "hole".
if (!storesAreAdjacent(S1, S2))
@@ -372,7 +370,6 @@ bool HexagonStoreWidening::selectStores(InstrGroup::iterator Begin,
break;
S1 = S2;
- ++I;
}
// The stores don't add up to anything that can be widened. Clean up.
diff --git a/lib/Target/Hexagon/HexagonSubtarget.cpp b/lib/Target/Hexagon/HexagonSubtarget.cpp
index 9c77135c2f2f..7ec63a642b0c 100644
--- a/lib/Target/Hexagon/HexagonSubtarget.cpp
+++ b/lib/Target/Hexagon/HexagonSubtarget.cpp
@@ -1,9 +1,8 @@
//===- HexagonSubtarget.cpp - Hexagon Subtarget Information ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Hexagon/HexagonSubtarget.h b/lib/Target/Hexagon/HexagonSubtarget.h
index 3a5acb53682c..007423ef1902 100644
--- a/lib/Target/Hexagon/HexagonSubtarget.h
+++ b/lib/Target/Hexagon/HexagonSubtarget.h
@@ -1,9 +1,8 @@
//===- HexagonSubtarget.h - Define Subtarget for the Hexagon ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp
index ddfda7e27793..80b8480448fe 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -1,9 +1,8 @@
//===-- HexagonTargetMachine.cpp - Define TargetMachine for Hexagon -------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -17,6 +16,7 @@
#include "HexagonMachineScheduler.h"
#include "HexagonTargetObjectFile.h"
#include "HexagonTargetTransformInfo.h"
+#include "TargetInfo/HexagonTargetInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/LegacyPassManager.h"
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.h b/lib/Target/Hexagon/HexagonTargetMachine.h
index a7c6a3437fbc..7ee4474e90e3 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.h
+++ b/lib/Target/Hexagon/HexagonTargetMachine.h
@@ -1,9 +1,8 @@
//=-- HexagonTargetMachine.h - Define TargetMachine for Hexagon ---*- C++ -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
index 2185bf8eebc6..fdcc41a4ca41 100644
--- a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
+++ b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
@@ -1,9 +1,8 @@
//===-- HexagonTargetObjectFile.cpp ---------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -239,10 +238,7 @@ bool HexagonTargetObjectFile::isGlobalInSmallSection(const GlobalObject *GO,
return false;
}
- Type *GType = GVar->getType();
- if (PointerType *PT = dyn_cast<PointerType>(GType))
- GType = PT->getElementType();
-
+ Type *GType = GVar->getValueType();
if (isa<ArrayType>(GType)) {
LLVM_DEBUG(dbgs() << "no, is an array\n");
return false;
@@ -342,7 +338,7 @@ unsigned HexagonTargetObjectFile::getSmallestAddressableSize(const Type *Ty,
MCSection *HexagonTargetObjectFile::selectSmallSectionForGlobal(
const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
- const Type *GTy = GO->getType()->getElementType();
+ const Type *GTy = GO->getValueType();
unsigned Size = getSmallestAddressableSize(GTy, GO, TM);
// If we have -ffunction-section or -fdata-section then we should emit the
diff --git a/lib/Target/Hexagon/HexagonTargetObjectFile.h b/lib/Target/Hexagon/HexagonTargetObjectFile.h
index 18863630fde2..b36282578950 100644
--- a/lib/Target/Hexagon/HexagonTargetObjectFile.h
+++ b/lib/Target/Hexagon/HexagonTargetObjectFile.h
@@ -1,9 +1,8 @@
//===-- HexagonTargetObjectFile.h -----------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/HexagonTargetStreamer.h b/lib/Target/Hexagon/HexagonTargetStreamer.h
index e19c404450e6..c5200b76933e 100644
--- a/lib/Target/Hexagon/HexagonTargetStreamer.h
+++ b/lib/Target/Hexagon/HexagonTargetStreamer.h
@@ -1,9 +1,8 @@
//===-- HexagonTargetStreamer.h - Hexagon Target Streamer ------*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
index c942f645aa88..38062e8e922c 100644
--- a/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
+++ b/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
@@ -1,9 +1,8 @@
//===- HexagonTargetTransformInfo.cpp - Hexagon specific TTI pass ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
/// \file
/// This file implements a TargetTransformInfo analysis pass specific to the
@@ -161,14 +160,15 @@ unsigned HexagonTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
unsigned VecWidth = VecTy->getBitWidth();
if (useHVX() && isTypeForHVX(VecTy)) {
unsigned RegWidth = getRegisterBitWidth(true);
- Alignment = std::min(Alignment, RegWidth/8);
+ assert(RegWidth && "Non-zero vector register width expected");
// Cost of HVX loads.
if (VecWidth % RegWidth == 0)
return VecWidth / RegWidth;
// Cost of constructing HVX vector from scalar loads.
+ Alignment = std::min(Alignment, RegWidth / 8);
unsigned AlignWidth = 8 * std::max(1u, Alignment);
unsigned NumLoads = alignTo(VecWidth, AlignWidth) / AlignWidth;
- return 3*NumLoads;
+ return 3 * NumLoads;
}
// Non-HVX vectors.
diff --git a/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/lib/Target/Hexagon/HexagonTargetTransformInfo.h
index 5c6f85584ec2..27e8fc019007 100644
--- a/lib/Target/Hexagon/HexagonTargetTransformInfo.h
+++ b/lib/Target/Hexagon/HexagonTargetTransformInfo.h
@@ -1,9 +1,8 @@
//==- HexagonTargetTransformInfo.cpp - Hexagon specific TTI pass -*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
/// \file
/// This file implements a TargetTransformInfo analysis pass specific to the
diff --git a/lib/Target/Hexagon/HexagonVExtract.cpp b/lib/Target/Hexagon/HexagonVExtract.cpp
index 929ac2bd0d93..a9692f42e468 100644
--- a/lib/Target/Hexagon/HexagonVExtract.cpp
+++ b/lib/Target/Hexagon/HexagonVExtract.cpp
@@ -1,9 +1,8 @@
//===- HexagonVExtract.cpp ------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// This pass will replace multiple occurrences of V6_extractw from the same
diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
index 722699907ca0..3619e4c239d7 100644
--- a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
+++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
@@ -1,9 +1,8 @@
//===- HexagonPacketizer.cpp - VLIW packetizer ----------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.h b/lib/Target/Hexagon/HexagonVLIWPacketizer.h
index ca70cf967a46..daa86b6f5393 100644
--- a/lib/Target/Hexagon/HexagonVLIWPacketizer.h
+++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.h
@@ -1,9 +1,8 @@
//===- HexagonPacketizer.h - VLIW packetizer --------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp b/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp
index 9d1073346c72..e5df1d456c1e 100644
--- a/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp
+++ b/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp
@@ -1,9 +1,8 @@
//===- HexagonVectorLoopCarriedReuse.cpp ----------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -239,10 +238,17 @@ namespace {
// used over the backedge. This is teh value that gets reused from a
// previous iteration.
Instruction *BackedgeInst = nullptr;
+ std::map<Instruction *, DepChain *> DepChains;
+ int Iterations = -1;
ReuseValue() = default;
- void reset() { Inst2Replace = nullptr; BackedgeInst = nullptr; }
+ void reset() {
+ Inst2Replace = nullptr;
+ BackedgeInst = nullptr;
+ DepChains.clear();
+ Iterations = -1;
+ }
bool isDefined() { return Inst2Replace != nullptr; }
};
@@ -289,10 +295,10 @@ namespace {
void findDepChainFromPHI(Instruction *I, DepChain &D);
void reuseValue();
Value *findValueInBlock(Value *Op, BasicBlock *BB);
- bool isDepChainBtwn(Instruction *I1, Instruction *I2, int Iters);
- DepChain *getDepChainBtwn(Instruction *I1, Instruction *I2);
+ DepChain *getDepChainBtwn(Instruction *I1, Instruction *I2, int Iters);
bool isEquivalentOperation(Instruction *I1, Instruction *I2);
bool canReplace(Instruction *I);
+ bool isCallInstCommutative(CallInst *C);
};
} // end anonymous namespace
@@ -327,6 +333,70 @@ bool HexagonVectorLoopCarriedReuse::runOnLoop(Loop *L, LPPassManager &LPM) {
return doVLCR();
}
+bool HexagonVectorLoopCarriedReuse::isCallInstCommutative(CallInst *C) {
+ switch (C->getCalledFunction()->getIntrinsicID()) {
+ case Intrinsic::hexagon_V6_vaddb:
+ case Intrinsic::hexagon_V6_vaddb_128B:
+ case Intrinsic::hexagon_V6_vaddh:
+ case Intrinsic::hexagon_V6_vaddh_128B:
+ case Intrinsic::hexagon_V6_vaddw:
+ case Intrinsic::hexagon_V6_vaddw_128B:
+ case Intrinsic::hexagon_V6_vaddubh:
+ case Intrinsic::hexagon_V6_vaddubh_128B:
+ case Intrinsic::hexagon_V6_vadduhw:
+ case Intrinsic::hexagon_V6_vadduhw_128B:
+ case Intrinsic::hexagon_V6_vaddhw:
+ case Intrinsic::hexagon_V6_vaddhw_128B:
+ case Intrinsic::hexagon_V6_vmaxb:
+ case Intrinsic::hexagon_V6_vmaxb_128B:
+ case Intrinsic::hexagon_V6_vmaxh:
+ case Intrinsic::hexagon_V6_vmaxh_128B:
+ case Intrinsic::hexagon_V6_vmaxw:
+ case Intrinsic::hexagon_V6_vmaxw_128B:
+ case Intrinsic::hexagon_V6_vmaxub:
+ case Intrinsic::hexagon_V6_vmaxub_128B:
+ case Intrinsic::hexagon_V6_vmaxuh:
+ case Intrinsic::hexagon_V6_vmaxuh_128B:
+ case Intrinsic::hexagon_V6_vminub:
+ case Intrinsic::hexagon_V6_vminub_128B:
+ case Intrinsic::hexagon_V6_vminuh:
+ case Intrinsic::hexagon_V6_vminuh_128B:
+ case Intrinsic::hexagon_V6_vminb:
+ case Intrinsic::hexagon_V6_vminb_128B:
+ case Intrinsic::hexagon_V6_vminh:
+ case Intrinsic::hexagon_V6_vminh_128B:
+ case Intrinsic::hexagon_V6_vminw:
+ case Intrinsic::hexagon_V6_vminw_128B:
+ case Intrinsic::hexagon_V6_vmpyub:
+ case Intrinsic::hexagon_V6_vmpyub_128B:
+ case Intrinsic::hexagon_V6_vmpyuh:
+ case Intrinsic::hexagon_V6_vmpyuh_128B:
+ case Intrinsic::hexagon_V6_vavgub:
+ case Intrinsic::hexagon_V6_vavgub_128B:
+ case Intrinsic::hexagon_V6_vavgh:
+ case Intrinsic::hexagon_V6_vavgh_128B:
+ case Intrinsic::hexagon_V6_vavguh:
+ case Intrinsic::hexagon_V6_vavguh_128B:
+ case Intrinsic::hexagon_V6_vavgw:
+ case Intrinsic::hexagon_V6_vavgw_128B:
+ case Intrinsic::hexagon_V6_vavgb:
+ case Intrinsic::hexagon_V6_vavgb_128B:
+ case Intrinsic::hexagon_V6_vavguw:
+ case Intrinsic::hexagon_V6_vavguw_128B:
+ case Intrinsic::hexagon_V6_vabsdiffh:
+ case Intrinsic::hexagon_V6_vabsdiffh_128B:
+ case Intrinsic::hexagon_V6_vabsdiffub:
+ case Intrinsic::hexagon_V6_vabsdiffub_128B:
+ case Intrinsic::hexagon_V6_vabsdiffuh:
+ case Intrinsic::hexagon_V6_vabsdiffuh_128B:
+ case Intrinsic::hexagon_V6_vabsdiffw:
+ case Intrinsic::hexagon_V6_vabsdiffw_128B:
+ return true;
+ default:
+ return false;
+ }
+}
+
bool HexagonVectorLoopCarriedReuse::isEquivalentOperation(Instruction *I1,
Instruction *I2) {
if (!I1->isSameOperationAs(I2))
@@ -361,13 +431,19 @@ bool HexagonVectorLoopCarriedReuse::isEquivalentOperation(Instruction *I1,
bool HexagonVectorLoopCarriedReuse::canReplace(Instruction *I) {
const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
- if (II &&
- (II->getIntrinsicID() == Intrinsic::hexagon_V6_hi ||
- II->getIntrinsicID() == Intrinsic::hexagon_V6_lo)) {
+ if (!II)
+ return true;
+
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::hexagon_V6_hi:
+ case Intrinsic::hexagon_V6_lo:
+ case Intrinsic::hexagon_V6_hi_128B:
+ case Intrinsic::hexagon_V6_lo_128B:
LLVM_DEBUG(dbgs() << "Not considering for reuse: " << *II << "\n");
return false;
+ default:
+ return true;
}
- return true;
}
void HexagonVectorLoopCarriedReuse::findValueToReuse() {
for (auto *D : Dependences) {
@@ -428,27 +504,85 @@ void HexagonVectorLoopCarriedReuse::findValueToReuse() {
int NumOperands = I->getNumOperands();
- for (int OpNo = 0; OpNo < NumOperands; ++OpNo) {
- Value *Op = I->getOperand(OpNo);
- Instruction *OpInst = dyn_cast<Instruction>(Op);
- if (!OpInst)
- continue;
-
- Value *BEOp = BEUser->getOperand(OpNo);
- Instruction *BEOpInst = dyn_cast<Instruction>(BEOp);
-
- if (!isDepChainBtwn(OpInst, BEOpInst, Iters)) {
- BEUser = nullptr;
- break;
+ // Take operands of each PNUser one by one and try to find DepChain
+ // with every operand of the BEUser. If any of the operands of BEUser
+ // has DepChain with current operand of the PNUser, break the matcher
+ // loop. Keep doing this for Every PNUser operand. If PNUser operand
+ // does not have DepChain with any of the BEUser operand, break the
+ // outer matcher loop, mark the BEUser as null and reset the ReuseCandidate.
+ // This ensures that DepChain exist for all the PNUser operand with
+ // BEUser operand. This also ensures that DepChains are independent of
+ // the positions in PNUser and BEUser.
+ std::map<Instruction *, DepChain *> DepChains;
+ CallInst *C1 = dyn_cast<CallInst>(I);
+ if ((I && I->isCommutative()) || (C1 && isCallInstCommutative(C1))) {
+ bool Found = false;
+ for (int OpNo = 0; OpNo < NumOperands; ++OpNo) {
+ Value *Op = I->getOperand(OpNo);
+ Instruction *OpInst = dyn_cast<Instruction>(Op);
+ Found = false;
+ for (int T = 0; T < NumOperands; ++T) {
+ Value *BEOp = BEUser->getOperand(T);
+ Instruction *BEOpInst = dyn_cast<Instruction>(BEOp);
+ if (!OpInst && !BEOpInst) {
+ if (Op == BEOp) {
+ Found = true;
+ break;
+ }
+ }
+
+ if ((OpInst && !BEOpInst) || (!OpInst && BEOpInst))
+ continue;
+
+ DepChain *D = getDepChainBtwn(OpInst, BEOpInst, Iters);
+
+ if (D) {
+ Found = true;
+ DepChains[OpInst] = D;
+ break;
+ }
+ }
+ if (!Found) {
+ BEUser = nullptr;
+ break;
+ }
+ }
+ } else {
+
+ for (int OpNo = 0; OpNo < NumOperands; ++OpNo) {
+ Value *Op = I->getOperand(OpNo);
+ Value *BEOp = BEUser->getOperand(OpNo);
+
+ Instruction *OpInst = dyn_cast<Instruction>(Op);
+ if (!OpInst) {
+ if (Op == BEOp)
+ continue;
+ // Do not allow reuse to occur when the operands may be different
+ // values.
+ BEUser = nullptr;
+ break;
+ }
+
+ Instruction *BEOpInst = dyn_cast<Instruction>(BEOp);
+ DepChain *D = getDepChainBtwn(OpInst, BEOpInst, Iters);
+
+ if (D) {
+ DepChains[OpInst] = D;
+ } else {
+ BEUser = nullptr;
+ break;
+ }
}
}
if (BEUser) {
LLVM_DEBUG(dbgs() << "Found Value for reuse.\n");
ReuseCandidate.Inst2Replace = I;
ReuseCandidate.BackedgeInst = BEUser;
+ ReuseCandidate.DepChains = DepChains;
+ ReuseCandidate.Iterations = Iters;
return;
- } else
- ReuseCandidate.reset();
+ }
+ ReuseCandidate.reset();
}
}
}
@@ -468,27 +602,10 @@ void HexagonVectorLoopCarriedReuse::reuseValue() {
Instruction *Inst2Replace = ReuseCandidate.Inst2Replace;
Instruction *BEInst = ReuseCandidate.BackedgeInst;
int NumOperands = Inst2Replace->getNumOperands();
- std::map<Instruction *, DepChain *> DepChains;
- int Iterations = -1;
+ std::map<Instruction *, DepChain *> &DepChains = ReuseCandidate.DepChains;
+ int Iterations = ReuseCandidate.Iterations;
BasicBlock *LoopPH = CurLoop->getLoopPreheader();
-
- for (int i = 0; i < NumOperands; ++i) {
- Instruction *I = dyn_cast<Instruction>(Inst2Replace->getOperand(i));
- if(!I)
- continue;
- else {
- Instruction *J = cast<Instruction>(BEInst->getOperand(i));
- DepChain *D = getDepChainBtwn(I, J);
-
- assert(D &&
- "No DepChain between corresponding operands in ReuseCandidate\n");
- if (Iterations == -1)
- Iterations = D->iterations();
- assert(Iterations == D->iterations() && "Iterations mismatch");
- DepChains[I] = D;
- }
- }
-
+ assert(!DepChains.empty() && "No DepChains");
LLVM_DEBUG(dbgs() << "reuseValue is making the following changes\n");
SmallVector<Instruction *, 4> InstsInPreheader;
@@ -597,20 +714,11 @@ void HexagonVectorLoopCarriedReuse::findDepChainFromPHI(Instruction *I,
}
}
-bool HexagonVectorLoopCarriedReuse::isDepChainBtwn(Instruction *I1,
- Instruction *I2,
- int Iters) {
- for (auto *D : Dependences) {
- if (D->front() == I1 && D->back() == I2 && D->iterations() == Iters)
- return true;
- }
- return false;
-}
-
DepChain *HexagonVectorLoopCarriedReuse::getDepChainBtwn(Instruction *I1,
- Instruction *I2) {
+ Instruction *I2,
+ int Iters) {
for (auto *D : Dependences) {
- if (D->front() == I1 && D->back() == I2)
+ if (D->front() == I1 && D->back() == I2 && D->iterations() == Iters)
return D;
}
return nullptr;
diff --git a/lib/Target/Hexagon/HexagonVectorPrint.cpp b/lib/Target/Hexagon/HexagonVectorPrint.cpp
index 18d2f2f4acde..65a8dcd75bdc 100644
--- a/lib/Target/Hexagon/HexagonVectorPrint.cpp
+++ b/lib/Target/Hexagon/HexagonVectorPrint.cpp
@@ -1,9 +1,8 @@
//===- HexagonVectorPrint.cpp - Generate vector printing instructions -----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
index af1e5429d0c2..7c0770926abe 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
@@ -1,13 +1,11 @@
//===-- HexagonAsmBackend.cpp - Hexagon Assembler Backend -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-#include "Hexagon.h"
#include "HexagonFixupKinds.h"
#include "MCTargetDesc/HexagonBaseInfo.h"
#include "MCTargetDesc/HexagonMCChecker.h"
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
index 6543d8313900..3c64893bae45 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
@@ -1,9 +1,8 @@
//===- HexagonBaseInfo.h - Top level definitions for Hexagon ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp
index e82e6b559f62..f678bf49322e 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp
@@ -1,14 +1,13 @@
//===-- HexagonELFObjectWriter.cpp - Hexagon Target Descriptions ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-#include "Hexagon.h"
#include "MCTargetDesc/HexagonFixupKinds.h"
+#include "MCTargetDesc/HexagonMCTargetDesc.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCObjectWriter.h"
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonFixupKinds.h b/lib/Target/Hexagon/MCTargetDesc/HexagonFixupKinds.h
index 347327669ad9..8b0ddbcb949f 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonFixupKinds.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonFixupKinds.h
@@ -1,9 +1,8 @@
//===-- HexagonFixupKinds.h - Hexagon Specific Fixup Entries --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
index 687e79a7dbab..6b9e63f5ac9e 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
@@ -1,9 +1,8 @@
//===- HexagonInstPrinter.cpp - Convert Hexagon MCInst to assembly syntax -===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -12,7 +11,6 @@
//===----------------------------------------------------------------------===//
#include "HexagonInstPrinter.h"
-#include "HexagonAsmPrinter.h"
#include "MCTargetDesc/HexagonBaseInfo.h"
#include "MCTargetDesc/HexagonMCInstrInfo.h"
#include "llvm/MC/MCAsmInfo.h"
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h
index 17af046ce090..ca32c3c1f50f 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h
@@ -1,9 +1,8 @@
//===-- HexagonInstPrinter.h - Convert Hexagon MCInst to assembly syntax --===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
index 446b3b2ce668..f3da67562320 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
@@ -1,9 +1,8 @@
//===-- HexagonMCAsmInfo.cpp - Hexagon asm properties ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h
index efeff2436234..e1f0a26cf858 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h
@@ -1,9 +1,8 @@
//===-- HexagonTargetAsmInfo.h - Hexagon asm properties --------*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp
index 53f3cba052bc..fcd3758600c1 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp
@@ -1,9 +1,8 @@
//===----- HexagonMCChecker.cpp - Instruction bundle checking -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -13,7 +12,6 @@
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/HexagonMCChecker.h"
-#include "Hexagon.h"
#include "MCTargetDesc/HexagonBaseInfo.h"
#include "MCTargetDesc/HexagonMCInstrInfo.h"
#include "MCTargetDesc/HexagonMCShuffler.h"
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h
index 7577baace20c..bc55ade9ccd7 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h
@@ -1,9 +1,8 @@
//===- HexagonMCChecker.h - Instruction bundle checking ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
index 3382684803aa..95e23c99868a 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
@@ -1,14 +1,12 @@
//===- HexagonMCCodeEmitter.cpp - Hexagon Target Descriptions -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/HexagonMCCodeEmitter.h"
-#include "Hexagon.h"
#include "MCTargetDesc/HexagonBaseInfo.h"
#include "MCTargetDesc/HexagonFixupKinds.h"
#include "MCTargetDesc/HexagonMCExpr.h"
@@ -378,7 +376,7 @@ void HexagonMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
State.Bundle = &MI;
State.Index = 0;
size_t Last = HexagonMCInstrInfo::bundleSize(HMB) - 1;
- uint64_t Features = computeAvailableFeatures(STI.getFeatureBits());
+ FeatureBitset Features = computeAvailableFeatures(STI.getFeatureBits());
for (auto &I : HexagonMCInstrInfo::bundleInstructions(HMB)) {
MCInst &HMI = const_cast<MCInst &>(*I.getInst());
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h
index fcea63db23a3..9e86dc8e4989 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h
@@ -1,9 +1,8 @@
//===- HexagonMCCodeEmitter.h - Hexagon Target Descriptions -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -83,9 +82,10 @@ private:
// Return parse bits for instruction `MCI' inside bundle `MCB'
uint32_t parseBits(size_t Last, MCInst const &MCB, MCInst const &MCI) const;
- uint64_t computeAvailableFeatures(const FeatureBitset &FB) const;
- void verifyInstructionPredicates(const MCInst &MI,
- uint64_t AvailableFeatures) const;
+ FeatureBitset computeAvailableFeatures(const FeatureBitset &FB) const;
+ void
+ verifyInstructionPredicates(const MCInst &MI,
+ const FeatureBitset &AvailableFeatures) const;
};
} // end namespace llvm
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp
index 3eaef9ac7410..ed571188c1e8 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp
@@ -1,9 +1,8 @@
//=== HexagonMCCompound.cpp - Hexagon Compound checker -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -11,7 +10,6 @@
//
//===----------------------------------------------------------------------===//
-#include "Hexagon.h"
#include "MCTargetDesc/HexagonBaseInfo.h"
#include "MCTargetDesc/HexagonMCInstrInfo.h"
#include "MCTargetDesc/HexagonMCShuffler.h"
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp
index f0654d612b4b..3cbb8600ce7a 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp
@@ -1,9 +1,8 @@
//===- HexagonMCDuplexInfo.cpp - Instruction bundle checking --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp
index f304bc50530f..f2432883af6f 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp
@@ -1,9 +1,8 @@
//=== HexagonMCELFStreamer.cpp - Hexagon subclass of MCELFStreamer -------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -60,7 +59,7 @@ HexagonMCELFStreamer::HexagonMCELFStreamer(
MCII(createHexagonMCInstrInfo()) {}
void HexagonMCELFStreamer::EmitInstruction(const MCInst &MCB,
- const MCSubtargetInfo &STI, bool) {
+ const MCSubtargetInfo &STI) {
assert(MCB.getOpcode() == Hexagon::BUNDLE);
assert(HexagonMCInstrInfo::bundleSize(MCB) <= HEXAGON_PACKET_SIZE);
assert(HexagonMCInstrInfo::bundleSize(MCB) > 0);
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h
index c02bef8f06f7..6248bd25d433 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h
@@ -1,9 +1,8 @@
//===- HexagonMCELFStreamer.h - Hexagon subclass of MCElfStreamer ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -31,8 +30,7 @@ public:
std::unique_ptr<MCCodeEmitter> Emitter,
MCAssembler *Assembler);
- void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
- bool) override;
+ void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override;
void EmitSymbol(const MCInst &Inst);
void HexagonMCEmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
unsigned ByteAlignment,
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp
index f0689252b396..1e708ba1bcd3 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp
@@ -1,10 +1,9 @@
//===-- HexagonMCExpr.cpp - Hexagon specific MC expression classes
//----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.h
index acfd996ccf82..59b1326adf0c 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.h
@@ -1,9 +1,8 @@
//==- HexagonMCExpr.h - Hexagon specific MC expression classes --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
index a11aa92ccbe1..0750bfe74f76 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
@@ -1,9 +1,8 @@
//===- HexagonMCInstrInfo.cpp - Hexagon sub-class of MCInst ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -12,7 +11,6 @@
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/HexagonMCInstrInfo.h"
-#include "Hexagon.h"
#include "MCTargetDesc/HexagonBaseInfo.h"
#include "MCTargetDesc/HexagonMCChecker.h"
#include "MCTargetDesc/HexagonMCExpr.h"
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h
index d040bea23b6d..829f872c453e 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h
@@ -1,9 +1,8 @@
//===- HexagonMCInstrInfo.cpp - Utility functions on Hexagon MCInsts ------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp
index 4281144acaee..7d45b4fcfdde 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp
@@ -1,9 +1,8 @@
//===----- HexagonMCShuffler.cpp - MC bundle shuffling --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -15,7 +14,6 @@
#define DEBUG_TYPE "hexagon-shuffle"
#include "MCTargetDesc/HexagonMCShuffler.h"
-#include "Hexagon.h"
#include "MCTargetDesc/HexagonMCInstrInfo.h"
#include "MCTargetDesc/HexagonShuffler.h"
#include "llvm/MC/MCInst.h"
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h
index 59658999d24d..3410c0ddbd84 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h
@@ -1,9 +1,8 @@
//===- HexagonMCShuffler.h --------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
index 92ce7345f358..9c50b25156c3 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -1,9 +1,8 @@
//===-- HexagonMCTargetDesc.cpp - Hexagon Target Descriptions -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -12,13 +11,13 @@
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/HexagonMCTargetDesc.h"
-#include "Hexagon.h"
#include "HexagonDepArch.h"
#include "HexagonTargetStreamer.h"
#include "MCTargetDesc/HexagonInstPrinter.h"
#include "MCTargetDesc/HexagonMCAsmInfo.h"
#include "MCTargetDesc/HexagonMCELFStreamer.h"
#include "MCTargetDesc/HexagonMCInstrInfo.h"
+#include "TargetInfo/HexagonTargetInfo.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/ELF.h"
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
index d6ea664222d3..7b42460a2a1c 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
@@ -1,9 +1,8 @@
//===-- HexagonMCTargetDesc.h - Hexagon Target Descriptions -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -64,7 +63,6 @@ class StringRef;
class raw_ostream;
class raw_pwrite_stream;
-Target &getTheHexagonTarget();
extern cl::opt<bool> HexagonDisableCompound;
extern cl::opt<bool> HexagonDisableDuplex;
extern const InstrStage HexagonStages[];
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
index f4ee2bbfaaaa..18c7790a17cc 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
@@ -1,9 +1,8 @@
//===- HexagonShuffler.cpp - Instruction bundle shuffling -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -15,7 +14,6 @@
#define DEBUG_TYPE "hexagon-shuffle"
#include "MCTargetDesc/HexagonShuffler.h"
-#include "Hexagon.h"
#include "MCTargetDesc/HexagonBaseInfo.h"
#include "MCTargetDesc/HexagonMCInstrInfo.h"
#include "MCTargetDesc/HexagonMCTargetDesc.h"
@@ -23,6 +21,7 @@
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
index ef50c5bebbfb..bf3bad36dfe5 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
@@ -1,9 +1,8 @@
//===- HexagonShuffler.h - Instruction bundle shuffling ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -15,8 +14,8 @@
#ifndef LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONSHUFFLER_H
#define LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONSHUFFLER_H
-#include "Hexagon.h"
#include "MCTargetDesc/HexagonMCInstrInfo.h"
+#include "MCTargetDesc/HexagonMCTargetDesc.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
diff --git a/lib/Target/Hexagon/RDFCopy.cpp b/lib/Target/Hexagon/RDFCopy.cpp
index 4339fa2089d9..7702024f87bd 100644
--- a/lib/Target/Hexagon/RDFCopy.cpp
+++ b/lib/Target/Hexagon/RDFCopy.cpp
@@ -1,9 +1,8 @@
//===- RDFCopy.cpp --------------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Hexagon/RDFCopy.h b/lib/Target/Hexagon/RDFCopy.h
index 7b2e78bdf633..1450ab884849 100644
--- a/lib/Target/Hexagon/RDFCopy.h
+++ b/lib/Target/Hexagon/RDFCopy.h
@@ -1,9 +1,8 @@
//===- RDFCopy.h ------------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/RDFDeadCode.cpp b/lib/Target/Hexagon/RDFDeadCode.cpp
index 8dcd485d65e9..52178931aa6d 100644
--- a/lib/Target/Hexagon/RDFDeadCode.cpp
+++ b/lib/Target/Hexagon/RDFDeadCode.cpp
@@ -1,9 +1,8 @@
//===--- RDFDeadCode.cpp --------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Hexagon/RDFDeadCode.h b/lib/Target/Hexagon/RDFDeadCode.h
index 8977e730b855..7f91977e1d6c 100644
--- a/lib/Target/Hexagon/RDFDeadCode.h
+++ b/lib/Target/Hexagon/RDFDeadCode.h
@@ -1,9 +1,8 @@
//===--- RDFDeadCode.h ----------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Hexagon/RDFGraph.cpp b/lib/Target/Hexagon/RDFGraph.cpp
index d8ca08e70505..9d8f706b8a0f 100644
--- a/lib/Target/Hexagon/RDFGraph.cpp
+++ b/lib/Target/Hexagon/RDFGraph.cpp
@@ -1,9 +1,8 @@
//===- RDFGraph.cpp -------------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -55,7 +54,6 @@ raw_ostream &operator<< (raw_ostream &OS, const PrintLaneMaskOpt &P) {
return OS;
}
-template<>
raw_ostream &operator<< (raw_ostream &OS, const Print<RegisterRef> &P) {
auto &TRI = P.G.getTRI();
if (P.Obj.Reg > 0 && P.Obj.Reg < TRI.getNumRegs())
@@ -66,7 +64,6 @@ raw_ostream &operator<< (raw_ostream &OS, const Print<RegisterRef> &P) {
return OS;
}
-template<>
raw_ostream &operator<< (raw_ostream &OS, const Print<NodeId> &P) {
auto NA = P.G.addr<NodeBase*>(P.Obj);
uint16_t Attrs = NA.Addr->getAttrs();
@@ -116,7 +113,6 @@ static void printRefHeader(raw_ostream &OS, const NodeAddr<RefNode*> RA,
OS << '!';
}
-template<>
raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<DefNode*>> &P) {
printRefHeader(OS, P.Obj, P.G);
OS << '(';
@@ -134,7 +130,6 @@ raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<DefNode*>> &P) {
return OS;
}
-template<>
raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<UseNode*>> &P) {
printRefHeader(OS, P.Obj, P.G);
OS << '(';
@@ -146,7 +141,6 @@ raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<UseNode*>> &P) {
return OS;
}
-template<>
raw_ostream &operator<< (raw_ostream &OS,
const Print<NodeAddr<PhiUseNode*>> &P) {
printRefHeader(OS, P.Obj, P.G);
@@ -162,7 +156,6 @@ raw_ostream &operator<< (raw_ostream &OS,
return OS;
}
-template<>
raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<RefNode*>> &P) {
switch (P.Obj.Addr->getKind()) {
case NodeAttrs::Def:
@@ -178,7 +171,6 @@ raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<RefNode*>> &P) {
return OS;
}
-template<>
raw_ostream &operator<< (raw_ostream &OS, const Print<NodeList> &P) {
unsigned N = P.Obj.size();
for (auto I : P.Obj) {
@@ -189,7 +181,6 @@ raw_ostream &operator<< (raw_ostream &OS, const Print<NodeList> &P) {
return OS;
}
-template<>
raw_ostream &operator<< (raw_ostream &OS, const Print<NodeSet> &P) {
unsigned N = P.Obj.size();
for (auto I : P.Obj) {
@@ -224,16 +215,13 @@ namespace {
} // end anonymous namespace
-template<>
raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<PhiNode*>> &P) {
OS << Print<NodeId>(P.Obj.Id, P.G) << ": phi ["
<< PrintListV<RefNode*>(P.Obj.Addr->members(P.G), P.G) << ']';
return OS;
}
-template<>
-raw_ostream &operator<< (raw_ostream &OS,
- const Print<NodeAddr<StmtNode*>> &P) {
+raw_ostream &operator<<(raw_ostream &OS, const Print<NodeAddr<StmtNode *>> &P) {
const MachineInstr &MI = *P.Obj.Addr->getCode();
unsigned Opc = MI.getOpcode();
OS << Print<NodeId>(P.Obj.Id, P.G) << ": " << P.G.getTII().getName(Opc);
@@ -258,7 +246,6 @@ raw_ostream &operator<< (raw_ostream &OS,
return OS;
}
-template<>
raw_ostream &operator<< (raw_ostream &OS,
const Print<NodeAddr<InstrNode*>> &P) {
switch (P.Obj.Addr->getKind()) {
@@ -275,7 +262,6 @@ raw_ostream &operator<< (raw_ostream &OS,
return OS;
}
-template<>
raw_ostream &operator<< (raw_ostream &OS,
const Print<NodeAddr<BlockNode*>> &P) {
MachineBasicBlock *BB = P.Obj.Addr->getCode();
@@ -309,9 +295,7 @@ raw_ostream &operator<< (raw_ostream &OS,
return OS;
}
-template<>
-raw_ostream &operator<< (raw_ostream &OS,
- const Print<NodeAddr<FuncNode*>> &P) {
+raw_ostream &operator<<(raw_ostream &OS, const Print<NodeAddr<FuncNode *>> &P) {
OS << "DFG dump:[\n" << Print<NodeId>(P.Obj.Id, P.G) << ": Function: "
<< P.Obj.Addr->getCode()->getName() << '\n';
for (auto I : P.Obj.Addr->members(P.G))
@@ -320,7 +304,6 @@ raw_ostream &operator<< (raw_ostream &OS,
return OS;
}
-template<>
raw_ostream &operator<< (raw_ostream &OS, const Print<RegisterSet> &P) {
OS << '{';
for (auto I : P.Obj)
@@ -329,13 +312,11 @@ raw_ostream &operator<< (raw_ostream &OS, const Print<RegisterSet> &P) {
return OS;
}
-template<>
raw_ostream &operator<< (raw_ostream &OS, const Print<RegisterAggr> &P) {
P.Obj.print(OS);
return OS;
}
-template<>
raw_ostream &operator<< (raw_ostream &OS,
const Print<DataFlowGraph::DefStack> &P) {
for (auto I = P.Obj.top(), E = P.Obj.bottom(); I != E; ) {
diff --git a/lib/Target/Hexagon/RDFGraph.h b/lib/Target/Hexagon/RDFGraph.h
index e3abb0e22f76..585f43e116f9 100644
--- a/lib/Target/Hexagon/RDFGraph.h
+++ b/lib/Target/Hexagon/RDFGraph.h
@@ -1,9 +1,8 @@
//===- RDFGraph.h -----------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -925,10 +924,6 @@ namespace rdf {
return MM;
}
- template <typename T> struct Print;
- template <typename T>
- raw_ostream &operator<< (raw_ostream &OS, const Print<T> &P);
-
template <typename T>
struct Print {
Print(const T &x, const DataFlowGraph &g) : Obj(x), G(g) {}
@@ -943,6 +938,29 @@ namespace rdf {
: Print<NodeAddr<T>>(x, g) {}
};
+ raw_ostream &operator<<(raw_ostream &OS, const Print<RegisterRef> &P);
+ raw_ostream &operator<<(raw_ostream &OS, const Print<NodeId> &P);
+ raw_ostream &operator<<(raw_ostream &OS, const Print<NodeAddr<DefNode *>> &P);
+ raw_ostream &operator<<(raw_ostream &OS, const Print<NodeAddr<UseNode *>> &P);
+ raw_ostream &operator<<(raw_ostream &OS,
+ const Print<NodeAddr<PhiUseNode *>> &P);
+ raw_ostream &operator<<(raw_ostream &OS, const Print<NodeAddr<RefNode *>> &P);
+ raw_ostream &operator<<(raw_ostream &OS, const Print<NodeList> &P);
+ raw_ostream &operator<<(raw_ostream &OS, const Print<NodeSet> &P);
+ raw_ostream &operator<<(raw_ostream &OS, const Print<NodeAddr<PhiNode *>> &P);
+ raw_ostream &operator<<(raw_ostream &OS,
+ const Print<NodeAddr<StmtNode *>> &P);
+ raw_ostream &operator<<(raw_ostream &OS,
+ const Print<NodeAddr<InstrNode *>> &P);
+ raw_ostream &operator<<(raw_ostream &OS,
+ const Print<NodeAddr<BlockNode *>> &P);
+ raw_ostream &operator<<(raw_ostream &OS,
+ const Print<NodeAddr<FuncNode *>> &P);
+ raw_ostream &operator<<(raw_ostream &OS, const Print<RegisterSet> &P);
+ raw_ostream &operator<<(raw_ostream &OS, const Print<RegisterAggr> &P);
+ raw_ostream &operator<<(raw_ostream &OS,
+ const Print<DataFlowGraph::DefStack> &P);
+
} // end namespace rdf
} // end namespace llvm
diff --git a/lib/Target/Hexagon/RDFLiveness.cpp b/lib/Target/Hexagon/RDFLiveness.cpp
index 9ff48d25a026..9cd304aa10bc 100644
--- a/lib/Target/Hexagon/RDFLiveness.cpp
+++ b/lib/Target/Hexagon/RDFLiveness.cpp
@@ -1,9 +1,8 @@
//===- RDFLiveness.cpp ----------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -58,7 +57,6 @@ static cl::opt<unsigned> MaxRecNest("rdf-liveness-max-rec", cl::init(25),
namespace llvm {
namespace rdf {
- template<>
raw_ostream &operator<< (raw_ostream &OS, const Print<Liveness::RefMap> &P) {
OS << '{';
for (auto &I : P.Obj) {
diff --git a/lib/Target/Hexagon/RDFLiveness.h b/lib/Target/Hexagon/RDFLiveness.h
index eaeb4ea115b3..ea4890271726 100644
--- a/lib/Target/Hexagon/RDFLiveness.h
+++ b/lib/Target/Hexagon/RDFLiveness.h
@@ -1,9 +1,8 @@
//===- RDFLiveness.h --------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -143,6 +142,8 @@ namespace rdf {
unsigned Nest, unsigned MaxNest);
};
+ raw_ostream &operator<<(raw_ostream &OS, const Print<Liveness::RefMap> &P);
+
} // end namespace rdf
} // end namespace llvm
diff --git a/lib/Target/Hexagon/RDFRegisters.cpp b/lib/Target/Hexagon/RDFRegisters.cpp
index 9408c5dc3952..6e0f33695f0e 100644
--- a/lib/Target/Hexagon/RDFRegisters.cpp
+++ b/lib/Target/Hexagon/RDFRegisters.cpp
@@ -1,9 +1,8 @@
//===- RDFRegisters.cpp ---------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/RDFRegisters.h b/lib/Target/Hexagon/RDFRegisters.h
index 459850d87df1..646233bacda5 100644
--- a/lib/Target/Hexagon/RDFRegisters.h
+++ b/lib/Target/Hexagon/RDFRegisters.h
@@ -1,9 +1,8 @@
//===- RDFRegisters.h -------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp b/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp
index 78e2f2b2ddb3..d77b235d0077 100644
--- a/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp
+++ b/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp
@@ -1,14 +1,12 @@
//===-- HexagonTargetInfo.cpp - Hexagon Target Implementation ------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-#include "Hexagon.h"
-#include "llvm/IR/Module.h"
+#include "TargetInfo/HexagonTargetInfo.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.h b/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.h
new file mode 100644
index 000000000000..902b61cb5b6c
--- /dev/null
+++ b/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.h
@@ -0,0 +1,20 @@
+//===-- HexagonTargetInfo.h - Hexagon Target Implementation -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_HEXAGON_TARGETINFO_HEXAGONTARGETINFO_H
+#define LLVM_LIB_TARGET_HEXAGON_TARGETINFO_HEXAGONTARGETINFO_H
+
+namespace llvm {
+
+class Target;
+
+Target &getTheHexagonTarget();
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_HEXAGON_TARGETINFO_HEXAGONTARGETINFO_H
diff --git a/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp b/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
index a77b2b8f15ca..9af8a0b35b2f 100644
--- a/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
+++ b/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
@@ -1,16 +1,16 @@
//===-- LanaiAsmParser.cpp - Parse Lanai assembly to MCInst instructions --===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-#include "Lanai.h"
#include "LanaiAluCode.h"
#include "LanaiCondCode.h"
+#include "LanaiInstrInfo.h"
#include "MCTargetDesc/LanaiMCExpr.h"
+#include "TargetInfo/LanaiTargetInfo.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
diff --git a/lib/Target/Lanai/Disassembler/LanaiDisassembler.cpp b/lib/Target/Lanai/Disassembler/LanaiDisassembler.cpp
index 609b650e5d32..25ae7c521706 100644
--- a/lib/Target/Lanai/Disassembler/LanaiDisassembler.cpp
+++ b/lib/Target/Lanai/Disassembler/LanaiDisassembler.cpp
@@ -1,9 +1,8 @@
//===- LanaiDisassembler.cpp - Disassembler for Lanai -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -13,8 +12,10 @@
#include "LanaiDisassembler.h"
-#include "Lanai.h"
-#include "LanaiSubtarget.h"
+#include "LanaiAluCode.h"
+#include "LanaiCondCode.h"
+#include "LanaiInstrInfo.h"
+#include "TargetInfo/LanaiTargetInfo.h"
#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSubtargetInfo.h"
diff --git a/lib/Target/Lanai/Disassembler/LanaiDisassembler.h b/lib/Target/Lanai/Disassembler/LanaiDisassembler.h
index e0c19e8ea644..ae821df303d8 100644
--- a/lib/Target/Lanai/Disassembler/LanaiDisassembler.h
+++ b/lib/Target/Lanai/Disassembler/LanaiDisassembler.h
@@ -1,9 +1,8 @@
//===- LanaiDisassembler.cpp - Disassembler for Lanai -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Lanai/Lanai.h b/lib/Target/Lanai/Lanai.h
index c1fdf793305b..2f06ea91ab03 100644
--- a/lib/Target/Lanai/Lanai.h
+++ b/lib/Target/Lanai/Lanai.h
@@ -1,9 +1,8 @@
//===-- Lanai.h - Top-level interface for Lanai representation --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -15,12 +14,7 @@
#ifndef LLVM_LIB_TARGET_LANAI_LANAI_H
#define LLVM_LIB_TARGET_LANAI_LANAI_H
-#include "LanaiAluCode.h"
-#include "LanaiCondCode.h"
-#include "MCTargetDesc/LanaiBaseInfo.h"
-#include "MCTargetDesc/LanaiMCTargetDesc.h"
-#include "llvm/CodeGen/ISDOpcodes.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/Pass.h"
namespace llvm {
class FunctionPass;
@@ -45,7 +39,6 @@ FunctionPass *createLanaiMemAluCombinerPass();
// operations.
FunctionPass *createLanaiSetflagAluCombinerPass();
-Target &getTheLanaiTarget();
} // namespace llvm
#endif // LLVM_LIB_TARGET_LANAI_LANAI_H
diff --git a/lib/Target/Lanai/Lanai.td b/lib/Target/Lanai/Lanai.td
index 73d080457034..c6d949f42047 100644
--- a/lib/Target/Lanai/Lanai.td
+++ b/lib/Target/Lanai/Lanai.td
@@ -1,9 +1,8 @@
//===- Lanai.td - Describe the Lanai Target Machine --------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Lanai/LanaiAluCode.h b/lib/Target/Lanai/LanaiAluCode.h
index d5145694fe46..728332bff00b 100644
--- a/lib/Target/Lanai/LanaiAluCode.h
+++ b/lib/Target/Lanai/LanaiAluCode.h
@@ -1,9 +1,8 @@
//===-- LanaiAluCode.h - ALU operator encoding ----------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Lanai/LanaiAsmPrinter.cpp b/lib/Target/Lanai/LanaiAsmPrinter.cpp
index 607b2a97b29f..64d963475e1a 100644
--- a/lib/Target/Lanai/LanaiAsmPrinter.cpp
+++ b/lib/Target/Lanai/LanaiAsmPrinter.cpp
@@ -1,9 +1,8 @@
//===-- LanaiAsmPrinter.cpp - Lanai LLVM assembly writer ------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -12,11 +11,13 @@
//
//===----------------------------------------------------------------------===//
-#include "InstPrinter/LanaiInstPrinter.h"
-#include "Lanai.h"
+#include "MCTargetDesc/LanaiInstPrinter.h"
+#include "LanaiAluCode.h"
+#include "LanaiCondCode.h"
#include "LanaiInstrInfo.h"
#include "LanaiMCInstLower.h"
#include "LanaiTargetMachine.h"
+#include "TargetInfo/LanaiTargetInfo.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -49,8 +50,7 @@ public:
void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &O);
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
- unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &O) override;
+ const char *ExtraCode, raw_ostream &O) override;
void EmitInstruction(const MachineInstr *MI) override;
bool isBlockOnlyReachableByFallthrough(
const MachineBasicBlock *MBB) const override;
@@ -109,7 +109,6 @@ void LanaiAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
// PrintAsmOperand - Print out an operand for an inline asm expression.
bool LanaiAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
- unsigned /*AsmVariant*/,
const char *ExtraCode, raw_ostream &O) {
// Does this asm operand have a single letter operand modifier?
if (ExtraCode && ExtraCode[0]) {
@@ -139,7 +138,7 @@ bool LanaiAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
return false;
}
default:
- return true; // Unknown modifier.
+ return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O);
}
}
printOperand(MI, OpNo, O);
diff --git a/lib/Target/Lanai/LanaiCallingConv.td b/lib/Target/Lanai/LanaiCallingConv.td
index 056b329c33c5..e2306725290a 100644
--- a/lib/Target/Lanai/LanaiCallingConv.td
+++ b/lib/Target/Lanai/LanaiCallingConv.td
@@ -1,9 +1,8 @@
//===- LanaiCallingConv.td - Calling Conventions Lanai -------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Lanai/LanaiDelaySlotFiller.cpp b/lib/Target/Lanai/LanaiDelaySlotFiller.cpp
index ea76a1128373..09c63dca23e2 100644
--- a/lib/Target/Lanai/LanaiDelaySlotFiller.cpp
+++ b/lib/Target/Lanai/LanaiDelaySlotFiller.cpp
@@ -1,9 +1,8 @@
//===-- LanaiDelaySlotFiller.cpp - Lanai delay slot filler ----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Lanai/LanaiFrameLowering.cpp b/lib/Target/Lanai/LanaiFrameLowering.cpp
index 0723668c743e..142c09c504cc 100644
--- a/lib/Target/Lanai/LanaiFrameLowering.cpp
+++ b/lib/Target/Lanai/LanaiFrameLowering.cpp
@@ -1,9 +1,8 @@
//===-- LanaiFrameLowering.cpp - Lanai Frame Information ------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -13,8 +12,8 @@
#include "LanaiFrameLowering.h"
+#include "LanaiAluCode.h"
#include "LanaiInstrInfo.h"
-#include "LanaiMachineFunctionInfo.h"
#include "LanaiSubtarget.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
diff --git a/lib/Target/Lanai/LanaiFrameLowering.h b/lib/Target/Lanai/LanaiFrameLowering.h
index ca690d513fc2..5fe4535543ec 100644
--- a/lib/Target/Lanai/LanaiFrameLowering.h
+++ b/lib/Target/Lanai/LanaiFrameLowering.h
@@ -1,9 +1,8 @@
//===-- LanaiFrameLowering.h - Define frame lowering for Lanai --*- C++-*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -14,7 +13,6 @@
#ifndef LLVM_LIB_TARGET_LANAI_LANAIFRAMELOWERING_H
#define LLVM_LIB_TARGET_LANAI_LANAIFRAMELOWERING_H
-#include "Lanai.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
namespace llvm {
diff --git a/lib/Target/Lanai/LanaiISelDAGToDAG.cpp b/lib/Target/Lanai/LanaiISelDAGToDAG.cpp
index 5081cfbe4922..aadcdc43f560 100644
--- a/lib/Target/Lanai/LanaiISelDAGToDAG.cpp
+++ b/lib/Target/Lanai/LanaiISelDAGToDAG.cpp
@@ -1,9 +1,8 @@
//===-- LanaiISelDAGToDAG.cpp - A dag to dag inst selector for Lanai ------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -11,7 +10,7 @@
//
//===----------------------------------------------------------------------===//
-#include "Lanai.h"
+#include "LanaiAluCode.h"
#include "LanaiMachineFunctionInfo.h"
#include "LanaiRegisterInfo.h"
#include "LanaiSubtarget.h"
diff --git a/lib/Target/Lanai/LanaiISelLowering.cpp b/lib/Target/Lanai/LanaiISelLowering.cpp
index 0411704be6fb..1ed078bb433f 100644
--- a/lib/Target/Lanai/LanaiISelLowering.cpp
+++ b/lib/Target/Lanai/LanaiISelLowering.cpp
@@ -1,9 +1,8 @@
//===-- LanaiISelLowering.cpp - Lanai DAG Lowering Implementation ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Lanai/LanaiISelLowering.h b/lib/Target/Lanai/LanaiISelLowering.h
index 0cde633cb41a..e7b5755e9041 100644
--- a/lib/Target/Lanai/LanaiISelLowering.h
+++ b/lib/Target/Lanai/LanaiISelLowering.h
@@ -1,9 +1,8 @@
//===-- LanaiISelLowering.h - Lanai DAG Lowering Interface -....-*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Lanai/LanaiInstrFormats.td b/lib/Target/Lanai/LanaiInstrFormats.td
index 1bb6b3d26a49..4101aa912ade 100644
--- a/lib/Target/Lanai/LanaiInstrFormats.td
+++ b/lib/Target/Lanai/LanaiInstrFormats.td
@@ -1,9 +1,8 @@
//===- LanaiInstrFormats.td - Lanai Instruction Formats ----*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Lanai/LanaiInstrInfo.cpp b/lib/Target/Lanai/LanaiInstrInfo.cpp
index 196768fdc56a..700a86069102 100644
--- a/lib/Target/Lanai/LanaiInstrInfo.cpp
+++ b/lib/Target/Lanai/LanaiInstrInfo.cpp
@@ -1,9 +1,8 @@
//===-- LanaiInstrInfo.cpp - Lanai Instruction Information ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -11,10 +10,10 @@
//
//===----------------------------------------------------------------------===//
-#include "Lanai.h"
#include "LanaiInstrInfo.h"
-#include "LanaiMachineFunctionInfo.h"
-#include "LanaiTargetMachine.h"
+#include "LanaiAluCode.h"
+#include "LanaiCondCode.h"
+#include "MCTargetDesc/LanaiBaseInfo.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -87,7 +86,8 @@ void LanaiInstrInfo::loadRegFromStackSlot(
}
bool LanaiInstrInfo::areMemAccessesTriviallyDisjoint(
- MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis * /*AA*/) const {
+ const MachineInstr &MIa, const MachineInstr &MIb,
+ AliasAnalysis * /*AA*/) const {
assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
@@ -101,7 +101,7 @@ bool LanaiInstrInfo::areMemAccessesTriviallyDisjoint(
// the width doesn't overlap the offset of a higher memory access,
// then the memory accesses are different.
const TargetRegisterInfo *TRI = &getRegisterInfo();
- MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
+ const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
int64_t OffsetA = 0, OffsetB = 0;
unsigned int WidthA = 0, WidthB = 0;
if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) &&
@@ -756,7 +756,7 @@ unsigned LanaiInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
}
bool LanaiInstrInfo::getMemOperandWithOffsetWidth(
- MachineInstr &LdSt, MachineOperand *&BaseOp, int64_t &Offset,
+ const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
unsigned &Width, const TargetRegisterInfo * /*TRI*/) const {
// Handle only loads/stores with base register followed by immediate offset
// and with add as ALU op.
@@ -794,8 +794,8 @@ bool LanaiInstrInfo::getMemOperandWithOffsetWidth(
return true;
}
-bool LanaiInstrInfo::getMemOperandWithOffset(MachineInstr &LdSt,
- MachineOperand *&BaseOp,
+bool LanaiInstrInfo::getMemOperandWithOffset(const MachineInstr &LdSt,
+ const MachineOperand *&BaseOp,
int64_t &Offset,
const TargetRegisterInfo *TRI) const {
switch (LdSt.getOpcode()) {
diff --git a/lib/Target/Lanai/LanaiInstrInfo.h b/lib/Target/Lanai/LanaiInstrInfo.h
index bdcf9a361b5f..d71424aeb0b1 100644
--- a/lib/Target/Lanai/LanaiInstrInfo.h
+++ b/lib/Target/Lanai/LanaiInstrInfo.h
@@ -1,9 +1,8 @@
//===- LanaiInstrInfo.h - Lanai Instruction Information ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -36,7 +35,8 @@ public:
return RegisterInfo;
}
- bool areMemAccessesTriviallyDisjoint(MachineInstr &MIa, MachineInstr &MIb,
+ bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
+ const MachineInstr &MIb,
AliasAnalysis *AA) const override;
unsigned isLoadFromStackSlot(const MachineInstr &MI,
@@ -68,11 +68,13 @@ public:
bool expandPostRAPseudo(MachineInstr &MI) const override;
- bool getMemOperandWithOffset(MachineInstr &LdSt, MachineOperand *&BaseOp,
+ bool getMemOperandWithOffset(const MachineInstr &LdSt,
+ const MachineOperand *&BaseOp,
int64_t &Offset,
const TargetRegisterInfo *TRI) const override;
- bool getMemOperandWithOffsetWidth(MachineInstr &LdSt, MachineOperand *&BaseOp,
+ bool getMemOperandWithOffsetWidth(const MachineInstr &LdSt,
+ const MachineOperand *&BaseOp,
int64_t &Offset, unsigned &Width,
const TargetRegisterInfo *TRI) const;
diff --git a/lib/Target/Lanai/LanaiInstrInfo.td b/lib/Target/Lanai/LanaiInstrInfo.td
index 66192b4a4704..fcf89a0b52f6 100644
--- a/lib/Target/Lanai/LanaiInstrInfo.td
+++ b/lib/Target/Lanai/LanaiInstrInfo.td
@@ -1,9 +1,8 @@
//===-- LanaiInstrInfo.td - Target Description for Lanai Target -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Lanai/LanaiMCInstLower.cpp b/lib/Target/Lanai/LanaiMCInstLower.cpp
index 90ede6566acf..743f4f7c6e2f 100644
--- a/lib/Target/Lanai/LanaiMCInstLower.cpp
+++ b/lib/Target/Lanai/LanaiMCInstLower.cpp
@@ -1,9 +1,8 @@
//=-- LanaiMCInstLower.cpp - Convert Lanai MachineInstr to an MCInst --------=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Lanai/LanaiMCInstLower.h b/lib/Target/Lanai/LanaiMCInstLower.h
index 6d7818d63d87..00d3ebb05045 100644
--- a/lib/Target/Lanai/LanaiMCInstLower.h
+++ b/lib/Target/Lanai/LanaiMCInstLower.h
@@ -1,9 +1,8 @@
//===-- LanaiMCInstLower.h - Lower MachineInstr to MCInst -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Lanai/LanaiMachineFunctionInfo.cpp b/lib/Target/Lanai/LanaiMachineFunctionInfo.cpp
index c72271b67790..7b4e0750ba08 100644
--- a/lib/Target/Lanai/LanaiMachineFunctionInfo.cpp
+++ b/lib/Target/Lanai/LanaiMachineFunctionInfo.cpp
@@ -1,9 +1,8 @@
//===-- LanaiMachineFuctionInfo.cpp - Lanai machine function info ---===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Lanai/LanaiMachineFunctionInfo.h b/lib/Target/Lanai/LanaiMachineFunctionInfo.h
index 3bd9112a9e13..2c97c619c246 100644
--- a/lib/Target/Lanai/LanaiMachineFunctionInfo.h
+++ b/lib/Target/Lanai/LanaiMachineFunctionInfo.h
@@ -1,9 +1,8 @@
//===- LanaiMachineFuctionInfo.h - Lanai machine func info -------*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Lanai/LanaiMemAluCombiner.cpp b/lib/Target/Lanai/LanaiMemAluCombiner.cpp
index 54500b0e52e3..67443b771d3d 100644
--- a/lib/Target/Lanai/LanaiMemAluCombiner.cpp
+++ b/lib/Target/Lanai/LanaiMemAluCombiner.cpp
@@ -1,9 +1,8 @@
//===-- LanaiMemAluCombiner.cpp - Pass to combine memory & ALU operations -===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// Simple pass to combine memory and ALU operations
@@ -23,7 +22,7 @@
// in the same machine basic block into one machine instruction.
//===----------------------------------------------------------------------===//
-#include "Lanai.h"
+#include "LanaiAluCode.h"
#include "LanaiTargetMachine.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
@@ -159,7 +158,8 @@ bool isNonVolatileMemoryOp(const MachineInstr &MI) {
const MachineMemOperand *MemOperand = *MI.memoperands_begin();
// Don't move volatile memory accesses
- if (MemOperand->isVolatile())
+ // TODO: unclear if we need to be as conservative about atomics
+ if (MemOperand->isVolatile() || MemOperand->isAtomic())
return false;
return true;
diff --git a/lib/Target/Lanai/LanaiRegisterInfo.cpp b/lib/Target/Lanai/LanaiRegisterInfo.cpp
index 56a5e0ea2def..d3056a1eba8e 100644
--- a/lib/Target/Lanai/LanaiRegisterInfo.cpp
+++ b/lib/Target/Lanai/LanaiRegisterInfo.cpp
@@ -1,9 +1,8 @@
//===-- LanaiRegisterInfo.cpp - Lanai Register Information ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -12,8 +11,10 @@
//===----------------------------------------------------------------------===//
#include "LanaiRegisterInfo.h"
-#include "Lanai.h"
-#include "LanaiSubtarget.h"
+#include "LanaiAluCode.h"
+#include "LanaiCondCode.h"
+#include "LanaiFrameLowering.h"
+#include "LanaiInstrInfo.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -257,12 +258,12 @@ bool LanaiRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
unsigned LanaiRegisterInfo::getRARegister() const { return Lanai::RCA; }
-unsigned
+Register
LanaiRegisterInfo::getFrameRegister(const MachineFunction & /*MF*/) const {
return Lanai::FP;
}
-unsigned LanaiRegisterInfo::getBaseRegister() const { return Lanai::R14; }
+Register LanaiRegisterInfo::getBaseRegister() const { return Lanai::R14; }
const uint32_t *
LanaiRegisterInfo::getCallPreservedMask(const MachineFunction & /*MF*/,
diff --git a/lib/Target/Lanai/LanaiRegisterInfo.h b/lib/Target/Lanai/LanaiRegisterInfo.h
index 35f4788b2886..4e4da619d366 100644
--- a/lib/Target/Lanai/LanaiRegisterInfo.h
+++ b/lib/Target/Lanai/LanaiRegisterInfo.h
@@ -1,9 +1,8 @@
//===- LanaiRegisterInfo.h - Lanai Register Information Impl ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -43,8 +42,8 @@ struct LanaiRegisterInfo : public LanaiGenRegisterInfo {
// Debug information queries.
unsigned getRARegister() const;
- unsigned getFrameRegister(const MachineFunction &MF) const override;
- unsigned getBaseRegister() const;
+ Register getFrameRegister(const MachineFunction &MF) const override;
+ Register getBaseRegister() const;
bool hasBasePointer(const MachineFunction &MF) const;
int getDwarfRegNum(unsigned RegNum, bool IsEH) const;
diff --git a/lib/Target/Lanai/LanaiRegisterInfo.td b/lib/Target/Lanai/LanaiRegisterInfo.td
index cf8cfe30cce9..5879dfca8d65 100644
--- a/lib/Target/Lanai/LanaiRegisterInfo.td
+++ b/lib/Target/Lanai/LanaiRegisterInfo.td
@@ -1,9 +1,8 @@
//===- LanaiRegisterInfo.td - Lanai Register defs ------------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// Declarations that describe the Lanai register file
diff --git a/lib/Target/Lanai/LanaiSchedule.td b/lib/Target/Lanai/LanaiSchedule.td
index 7f931c4be8bb..32763c7fdf49 100644
--- a/lib/Target/Lanai/LanaiSchedule.td
+++ b/lib/Target/Lanai/LanaiSchedule.td
@@ -1,9 +1,8 @@
//=-LanaiSchedule.td - Lanai Scheduling Definitions --*- tablegen -*-=========//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Lanai/LanaiSelectionDAGInfo.cpp b/lib/Target/Lanai/LanaiSelectionDAGInfo.cpp
index b71c30fe3e05..dff87a3e264d 100644
--- a/lib/Target/Lanai/LanaiSelectionDAGInfo.cpp
+++ b/lib/Target/Lanai/LanaiSelectionDAGInfo.cpp
@@ -1,9 +1,8 @@
//===-- LanaiSelectionDAGInfo.cpp - Lanai SelectionDAG Info -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Lanai/LanaiSelectionDAGInfo.h b/lib/Target/Lanai/LanaiSelectionDAGInfo.h
index bfd2be2ede09..c5650a7c1f53 100644
--- a/lib/Target/Lanai/LanaiSelectionDAGInfo.h
+++ b/lib/Target/Lanai/LanaiSelectionDAGInfo.h
@@ -1,9 +1,8 @@
//===-- LanaiSelectionDAGInfo.h - Lanai SelectionDAG Info -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Lanai/LanaiSubtarget.cpp b/lib/Target/Lanai/LanaiSubtarget.cpp
index 0fa5e82a7a66..9a872c789bcc 100644
--- a/lib/Target/Lanai/LanaiSubtarget.cpp
+++ b/lib/Target/Lanai/LanaiSubtarget.cpp
@@ -1,9 +1,8 @@
//===- LanaiSubtarget.cpp - Lanai Subtarget Information -----------*- C++ -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Lanai/LanaiSubtarget.h b/lib/Target/Lanai/LanaiSubtarget.h
index 4bfa19920239..116c83a4df91 100644
--- a/lib/Target/Lanai/LanaiSubtarget.h
+++ b/lib/Target/Lanai/LanaiSubtarget.h
@@ -1,9 +1,8 @@
//=====-- LanaiSubtarget.h - Define Subtarget for the Lanai -----*- C++ -*--==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Lanai/LanaiTargetMachine.cpp b/lib/Target/Lanai/LanaiTargetMachine.cpp
index 10bd9e2c65d2..8ae0225629ab 100644
--- a/lib/Target/Lanai/LanaiTargetMachine.cpp
+++ b/lib/Target/Lanai/LanaiTargetMachine.cpp
@@ -1,9 +1,8 @@
//===-- LanaiTargetMachine.cpp - Define TargetMachine for Lanai ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -16,6 +15,7 @@
#include "Lanai.h"
#include "LanaiTargetObjectFile.h"
#include "LanaiTargetTransformInfo.h"
+#include "TargetInfo/LanaiTargetInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
diff --git a/lib/Target/Lanai/LanaiTargetMachine.h b/lib/Target/Lanai/LanaiTargetMachine.h
index 0db286ec13e7..d2ac40007e24 100644
--- a/lib/Target/Lanai/LanaiTargetMachine.h
+++ b/lib/Target/Lanai/LanaiTargetMachine.h
@@ -1,9 +1,8 @@
//===-- LanaiTargetMachine.h - Define TargetMachine for Lanai --- C++ ---===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Lanai/LanaiTargetObjectFile.cpp b/lib/Target/Lanai/LanaiTargetObjectFile.cpp
index 7d165e9c5f8c..b0f7c090bb8e 100644
--- a/lib/Target/Lanai/LanaiTargetObjectFile.cpp
+++ b/lib/Target/Lanai/LanaiTargetObjectFile.cpp
@@ -1,8 +1,7 @@
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Lanai/LanaiTargetObjectFile.h b/lib/Target/Lanai/LanaiTargetObjectFile.h
index 99ec1956da4b..938a1e675b6a 100644
--- a/lib/Target/Lanai/LanaiTargetObjectFile.h
+++ b/lib/Target/Lanai/LanaiTargetObjectFile.h
@@ -1,9 +1,8 @@
//===-- LanaiTargetObjectFile.h - Lanai Object Info -----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Lanai/LanaiTargetTransformInfo.h b/lib/Target/Lanai/LanaiTargetTransformInfo.h
index 3b5a1b88326b..63cc47dedce3 100644
--- a/lib/Target/Lanai/LanaiTargetTransformInfo.h
+++ b/lib/Target/Lanai/LanaiTargetTransformInfo.h
@@ -1,9 +1,8 @@
//===-- LanaiTargetTransformInfo.h - Lanai specific TTI ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp b/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp
index 82fa93ea5e5e..a6ce3d5eb4ff 100644
--- a/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp
+++ b/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp
@@ -1,9 +1,8 @@
//===-- LanaiAsmBackend.cpp - Lanai Assembler Backend ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Lanai/MCTargetDesc/LanaiBaseInfo.h b/lib/Target/Lanai/MCTargetDesc/LanaiBaseInfo.h
index ce7f83509c9b..1bc84014e736 100644
--- a/lib/Target/Lanai/MCTargetDesc/LanaiBaseInfo.h
+++ b/lib/Target/Lanai/MCTargetDesc/LanaiBaseInfo.h
@@ -1,9 +1,8 @@
//===-- LanaiBaseInfo.h - Top level definitions for Lanai MC ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp b/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp
index 7676891ef981..4313fa5a82b5 100644
--- a/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp
+++ b/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp
@@ -1,9 +1,8 @@
//===-- LanaiELFObjectWriter.cpp - Lanai ELF Writer -----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -35,7 +34,7 @@ protected:
LanaiELFObjectWriter::LanaiELFObjectWriter(uint8_t OSABI)
: MCELFObjectTargetWriter(/*Is64Bit_=*/false, OSABI, ELF::EM_LANAI,
- /*HasRelocationAddend=*/true) {}
+ /*HasRelocationAddend_=*/true) {}
unsigned LanaiELFObjectWriter::getRelocType(MCContext & /*Ctx*/,
const MCValue & /*Target*/,
diff --git a/lib/Target/Lanai/MCTargetDesc/LanaiFixupKinds.h b/lib/Target/Lanai/MCTargetDesc/LanaiFixupKinds.h
index 9ff8340d2922..1e692f8d31cb 100644
--- a/lib/Target/Lanai/MCTargetDesc/LanaiFixupKinds.h
+++ b/lib/Target/Lanai/MCTargetDesc/LanaiFixupKinds.h
@@ -1,9 +1,8 @@
//===-- LanaiFixupKinds.h - Lanai Specific Fixup Entries --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Lanai/InstPrinter/LanaiInstPrinter.cpp b/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.cpp
index 2fa411fcfd87..0d42612824b4 100644
--- a/lib/Target/Lanai/InstPrinter/LanaiInstPrinter.cpp
+++ b/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.cpp
@@ -1,9 +1,8 @@
//===-- LanaiInstPrinter.cpp - Convert Lanai MCInst to asm syntax ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -12,11 +11,14 @@
//===----------------------------------------------------------------------===//
#include "LanaiInstPrinter.h"
-#include "Lanai.h"
-#include "MCTargetDesc/LanaiMCExpr.h"
+#include "LanaiMCExpr.h"
+#include "LanaiAluCode.h"
+#include "LanaiCondCode.h"
+#include "MCTargetDesc/LanaiMCTargetDesc.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
diff --git a/lib/Target/Lanai/InstPrinter/LanaiInstPrinter.h b/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.h
index 59904fbaa318..721a129a859e 100644
--- a/lib/Target/Lanai/InstPrinter/LanaiInstPrinter.h
+++ b/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.h
@@ -1,9 +1,8 @@
//= LanaiInstPrinter.h - Convert Lanai MCInst to asm syntax -------*- C++ -*--//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -11,8 +10,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIB_TARGET_LANAI_INSTPRINTER_LANAIINSTPRINTER_H
-#define LLVM_LIB_TARGET_LANAI_INSTPRINTER_LANAIINSTPRINTER_H
+#ifndef LLVM_LIB_TARGET_LANAI_MCTARGETDESC_LANAIINSTPRINTER_H
+#define LLVM_LIB_TARGET_LANAI_MCTARGETDESC_LANAIINSTPRINTER_H
#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCInstPrinter.h"
@@ -63,4 +62,4 @@ private:
} // end namespace llvm
-#endif // LLVM_LIB_TARGET_LANAI_INSTPRINTER_LANAIINSTPRINTER_H
+#endif // LLVM_LIB_TARGET_LANAI_MCTARGETDESC_LANAIINSTPRINTER_H
diff --git a/lib/Target/Lanai/MCTargetDesc/LanaiMCAsmInfo.cpp b/lib/Target/Lanai/MCTargetDesc/LanaiMCAsmInfo.cpp
index 7e2705e67b6d..14d3dac26d1f 100644
--- a/lib/Target/Lanai/MCTargetDesc/LanaiMCAsmInfo.cpp
+++ b/lib/Target/Lanai/MCTargetDesc/LanaiMCAsmInfo.cpp
@@ -1,9 +1,8 @@
//===-- LanaiMCAsmInfo.cpp - Lanai asm properties -----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Lanai/MCTargetDesc/LanaiMCAsmInfo.h b/lib/Target/Lanai/MCTargetDesc/LanaiMCAsmInfo.h
index 3eef0592d2fa..265af425d037 100644
--- a/lib/Target/Lanai/MCTargetDesc/LanaiMCAsmInfo.h
+++ b/lib/Target/Lanai/MCTargetDesc/LanaiMCAsmInfo.h
@@ -1,9 +1,8 @@
//=====-- LanaiMCAsmInfo.h - Lanai asm properties -----------*- C++ -*--====//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp b/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp
index 21f4005aaf83..df4ee297155f 100644
--- a/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp
+++ b/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp
@@ -1,9 +1,8 @@
//===-- LanaiMCCodeEmitter.cpp - Convert Lanai code to machine code -------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -11,7 +10,7 @@
//
//===----------------------------------------------------------------------===//
-#include "Lanai.h"
+#include "LanaiAluCode.h"
#include "MCTargetDesc/LanaiBaseInfo.h"
#include "MCTargetDesc/LanaiFixupKinds.h"
#include "MCTargetDesc/LanaiMCExpr.h"
diff --git a/lib/Target/Lanai/MCTargetDesc/LanaiMCExpr.cpp b/lib/Target/Lanai/MCTargetDesc/LanaiMCExpr.cpp
index 201c95de07f4..56d5fbf40360 100644
--- a/lib/Target/Lanai/MCTargetDesc/LanaiMCExpr.cpp
+++ b/lib/Target/Lanai/MCTargetDesc/LanaiMCExpr.cpp
@@ -1,9 +1,8 @@
//===-- LanaiMCExpr.cpp - Lanai specific MC expression classes ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Lanai/MCTargetDesc/LanaiMCExpr.h b/lib/Target/Lanai/MCTargetDesc/LanaiMCExpr.h
index 5004d541ff70..c99af32d9102 100644
--- a/lib/Target/Lanai/MCTargetDesc/LanaiMCExpr.h
+++ b/lib/Target/Lanai/MCTargetDesc/LanaiMCExpr.h
@@ -1,9 +1,8 @@
//===-- LanaiMCExpr.h - Lanai specific MC expression classes ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp b/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp
index ddb01cdd2d8f..a9de0416fcac 100644
--- a/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp
+++ b/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp
@@ -1,9 +1,8 @@
//===-- LanaiMCTargetDesc.cpp - Lanai Target Descriptions -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -12,8 +11,9 @@
//===----------------------------------------------------------------------===//
#include "LanaiMCTargetDesc.h"
-#include "InstPrinter/LanaiInstPrinter.h"
+#include "LanaiInstPrinter.h"
#include "LanaiMCAsmInfo.h"
+#include "TargetInfo/LanaiTargetInfo.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/MC/MCInst.h"
diff --git a/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.h b/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.h
index 2d8828ea4fa9..cf66d3226659 100644
--- a/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.h
+++ b/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.h
@@ -1,9 +1,8 @@
//===-- LanaiMCTargetDesc.h - Lanai Target Descriptions ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -32,8 +31,6 @@ class Triple;
class StringRef;
class raw_pwrite_stream;
-Target &getTheLanaiTarget();
-
MCCodeEmitter *createLanaiMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
MCContext &Ctx);
diff --git a/lib/Target/Lanai/TargetInfo/LanaiTargetInfo.cpp b/lib/Target/Lanai/TargetInfo/LanaiTargetInfo.cpp
index ccf47b08fcff..93deb891dec5 100644
--- a/lib/Target/Lanai/TargetInfo/LanaiTargetInfo.cpp
+++ b/lib/Target/Lanai/TargetInfo/LanaiTargetInfo.cpp
@@ -1,23 +1,20 @@
//===-- LanaiTargetInfo.cpp - Lanai Target Implementation -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-#include "llvm/IR/Module.h"
+#include "TargetInfo/LanaiTargetInfo.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
-namespace llvm {
-Target &getTheLanaiTarget() {
+Target &llvm::getTheLanaiTarget() {
static Target TheLanaiTarget;
return TheLanaiTarget;
}
-} // namespace llvm
extern "C" void LLVMInitializeLanaiTargetInfo() {
RegisterTarget<Triple::lanai> X(getTheLanaiTarget(), "lanai", "Lanai",
diff --git a/lib/Target/Lanai/TargetInfo/LanaiTargetInfo.h b/lib/Target/Lanai/TargetInfo/LanaiTargetInfo.h
new file mode 100644
index 000000000000..429cf0234a60
--- /dev/null
+++ b/lib/Target/Lanai/TargetInfo/LanaiTargetInfo.h
@@ -0,0 +1,20 @@
+//===-- LanaiTargetInfo.h - Lanai Target Implementation ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_LANAI_TARGETINFO_LANAITARGETINFO_H
+#define LLVM_LIB_TARGET_LANAI_TARGETINFO_LANAITARGETINFO_H
+
+namespace llvm {
+
+class Target;
+
+Target &getTheLanaiTarget();
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_LANAI_TARGETINFO_LANAITARGETINFO_H
diff --git a/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp b/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp
index 1ad70ac72c73..a0ec14ae2381 100644
--- a/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp
+++ b/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp
@@ -1,15 +1,15 @@
//===- MSP430AsmParser.cpp - Parse MSP430 assembly to MCInst instructions -===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "MSP430.h"
#include "MSP430RegisterInfo.h"
#include "MCTargetDesc/MSP430MCTargetDesc.h"
+#include "TargetInfo/MSP430TargetInfo.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/StringSwitch.h"
diff --git a/lib/Target/MSP430/Disassembler/MSP430Disassembler.cpp b/lib/Target/MSP430/Disassembler/MSP430Disassembler.cpp
index e5da130f9bbb..59c12e24e8bf 100644
--- a/lib/Target/MSP430/Disassembler/MSP430Disassembler.cpp
+++ b/lib/Target/MSP430/Disassembler/MSP430Disassembler.cpp
@@ -1,9 +1,8 @@
//===-- MSP430Disassembler.cpp - Disassembler for MSP430 ------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -13,6 +12,7 @@
#include "MSP430.h"
#include "MCTargetDesc/MSP430MCTargetDesc.h"
+#include "TargetInfo/MSP430TargetInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCFixedLenDisassembler.h"
diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp b/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp
index bd69a9d8d795..365e5da74de0 100644
--- a/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp
@@ -1,9 +1,8 @@
//===-- MSP430AsmBackend.cpp - MSP430 Assembler Backend -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430ELFObjectWriter.cpp b/lib/Target/MSP430/MCTargetDesc/MSP430ELFObjectWriter.cpp
index e47db2400a05..38b7da32c246 100644
--- a/lib/Target/MSP430/MCTargetDesc/MSP430ELFObjectWriter.cpp
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430ELFObjectWriter.cpp
@@ -1,9 +1,8 @@
//===-- MSP430ELFObjectWriter.cpp - MSP430 ELF Writer ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430ELFStreamer.cpp b/lib/Target/MSP430/MCTargetDesc/MSP430ELFStreamer.cpp
index 9449cb278024..4e054f85ccc3 100644
--- a/lib/Target/MSP430/MCTargetDesc/MSP430ELFStreamer.cpp
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430ELFStreamer.cpp
@@ -1,9 +1,8 @@
//===-- MSP430ELFStreamer.cpp - MSP430 ELF Target Streamer Methods --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430FixupKinds.h b/lib/Target/MSP430/MCTargetDesc/MSP430FixupKinds.h
index 1eb6a2759423..68e41b0fb874 100644
--- a/lib/Target/MSP430/MCTargetDesc/MSP430FixupKinds.h
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430FixupKinds.h
@@ -1,9 +1,8 @@
//===-- MSP430FixupKinds.h - MSP430 Specific Fixup Entries ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp b/lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.cpp
index 4d62547bc65b..2f3c6ed3c17e 100644
--- a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.cpp
@@ -1,9 +1,8 @@
//===-- MSP430InstPrinter.cpp - Convert MSP430 MCInst to assembly syntax --===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h b/lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.h
index cd02c4fa645a..25451033236e 100644
--- a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.h
@@ -1,9 +1,8 @@
//= MSP430InstPrinter.h - Convert MSP430 MCInst to assembly syntax -*- C++ -*-//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -11,8 +10,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIB_TARGET_MSP430_INSTPRINTER_MSP430INSTPRINTER_H
-#define LLVM_LIB_TARGET_MSP430_INSTPRINTER_MSP430INSTPRINTER_H
+#ifndef LLVM_LIB_TARGET_MSP430_MCTARGETDESC_MSP430INSTPRINTER_H
+#define LLVM_LIB_TARGET_MSP430_MCTARGETDESC_MSP430INSTPRINTER_H
#include "llvm/MC/MCInstPrinter.h"
diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp
index 36e9a9c31075..db5a49dd22a7 100644
--- a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp
@@ -1,9 +1,8 @@
//===-- MSP430MCAsmInfo.cpp - MSP430 asm properties -----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -24,4 +23,5 @@ MSP430MCAsmInfo::MSP430MCAsmInfo(const Triple &TT) {
AlignmentIsInBytes = false;
UsesELFSectionDirectiveForBSS = true;
+ UseIntegratedAssembler = true;
}
diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h
index de486ec4b7bd..93979df037e6 100644
--- a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h
@@ -1,9 +1,8 @@
//===-- MSP430MCAsmInfo.h - MSP430 asm properties --------------*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCCodeEmitter.cpp b/lib/Target/MSP430/MCTargetDesc/MSP430MCCodeEmitter.cpp
index 06f9f307cb1a..cf57e87a073d 100644
--- a/lib/Target/MSP430/MCTargetDesc/MSP430MCCodeEmitter.cpp
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCCodeEmitter.cpp
@@ -1,9 +1,8 @@
//===-- MSP430MCCodeEmitter.cpp - Convert MSP430 code to machine code -----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
index b21145d3904a..da928733015f 100644
--- a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
@@ -1,9 +1,8 @@
//===-- MSP430MCTargetDesc.cpp - MSP430 Target Descriptions ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -12,8 +11,9 @@
//===----------------------------------------------------------------------===//
#include "MSP430MCTargetDesc.h"
-#include "InstPrinter/MSP430InstPrinter.h"
+#include "MSP430InstPrinter.h"
#include "MSP430MCAsmInfo.h"
+#include "TargetInfo/MSP430TargetInfo.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h
index e484c79c9ee9..02bfbe40c6bf 100644
--- a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h
@@ -1,9 +1,8 @@
//===-- MSP430MCTargetDesc.h - MSP430 Target Descriptions -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -30,8 +29,6 @@ class MCObjectTargetWriter;
class MCStreamer;
class MCTargetStreamer;
-Target &getTheMSP430Target();
-
/// Creates a machine code emitter for MSP430.
MCCodeEmitter *createMSP430MCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
diff --git a/lib/Target/MSP430/MSP430.h b/lib/Target/MSP430/MSP430.h
index 7a5314a10844..67f35b8034d9 100644
--- a/lib/Target/MSP430/MSP430.h
+++ b/lib/Target/MSP430/MSP430.h
@@ -1,9 +1,8 @@
//==-- MSP430.h - Top-level interface for MSP430 representation --*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/MSP430/MSP430.td b/lib/Target/MSP430/MSP430.td
index 8fa99dc13dd5..38aa30fcf4dd 100644
--- a/lib/Target/MSP430/MSP430.td
+++ b/lib/Target/MSP430/MSP430.td
@@ -1,9 +1,8 @@
//===-- MSP430.td - Describe the MSP430 Target Machine -----*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// This is the top level entry point for the MSP430 target.
diff --git a/lib/Target/MSP430/MSP430AsmPrinter.cpp b/lib/Target/MSP430/MSP430AsmPrinter.cpp
index f39c21fc8aa2..3a71a084d1af 100644
--- a/lib/Target/MSP430/MSP430AsmPrinter.cpp
+++ b/lib/Target/MSP430/MSP430AsmPrinter.cpp
@@ -1,9 +1,8 @@
//===-- MSP430AsmPrinter.cpp - MSP430 LLVM assembly writer ----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -12,11 +11,13 @@
//
//===----------------------------------------------------------------------===//
-#include "InstPrinter/MSP430InstPrinter.h"
+#include "MCTargetDesc/MSP430InstPrinter.h"
#include "MSP430.h"
#include "MSP430InstrInfo.h"
#include "MSP430MCInstLower.h"
#include "MSP430TargetMachine.h"
+#include "TargetInfo/MSP430TargetInfo.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -28,6 +29,7 @@
#include "llvm/IR/Module.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/TargetRegistry.h"
@@ -44,20 +46,34 @@ namespace {
StringRef getPassName() const override { return "MSP430 Assembly Printer"; }
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void PrintSymbolOperand(const MachineOperand &MO, raw_ostream &O) override;
void printOperand(const MachineInstr *MI, int OpNum,
raw_ostream &O, const char* Modifier = nullptr);
void printSrcMemOperand(const MachineInstr *MI, int OpNum,
raw_ostream &O);
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
- unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &O) override;
- bool PrintAsmMemoryOperand(const MachineInstr *MI,
- unsigned OpNo, unsigned AsmVariant,
+ const char *ExtraCode, raw_ostream &O) override;
+ bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
const char *ExtraCode, raw_ostream &O) override;
void EmitInstruction(const MachineInstr *MI) override;
+
+ void EmitInterruptVectorSection(MachineFunction &ISR);
};
} // end of anonymous namespace
+void MSP430AsmPrinter::PrintSymbolOperand(const MachineOperand &MO,
+ raw_ostream &O) {
+ uint64_t Offset = MO.getOffset();
+ if (Offset)
+ O << '(' << Offset << '+';
+
+ getSymbol(MO.getGlobal())->print(O, MAI);
+
+ if (Offset)
+ O << ')';
+}
void MSP430AsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
raw_ostream &O, const char *Modifier) {
@@ -76,25 +92,13 @@ void MSP430AsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
MO.getMBB()->getSymbol()->print(O, MAI);
return;
case MachineOperand::MO_GlobalAddress: {
- bool isMemOp = Modifier && !strcmp(Modifier, "mem");
- uint64_t Offset = MO.getOffset();
-
// If the global address expression is a part of displacement field with a
// register base, we should not emit any prefix symbol here, e.g.
- // mov.w &foo, r1
- // vs
// mov.w glb(r1), r2
// Otherwise (!) msp430-as will silently miscompile the output :(
if (!Modifier || strcmp(Modifier, "nohash"))
- O << (isMemOp ? '&' : '#');
- if (Offset)
- O << '(' << Offset << '+';
-
- getSymbol(MO.getGlobal())->print(O, MAI);
-
- if (Offset)
- O << ')';
-
+ O << '#';
+ PrintSymbolOperand(MO, O);
return;
}
}
@@ -108,12 +112,12 @@ void MSP430AsmPrinter::printSrcMemOperand(const MachineInstr *MI, int OpNum,
// Print displacement first
// Imm here is in fact global address - print extra modifier.
- if (Disp.isImm() && !Base.getReg())
+ if (Disp.isImm() && Base.getReg() == MSP430::SR)
O << '&';
printOperand(MI, OpNum+1, O, "nohash");
// Print register base field
- if (Base.getReg()) {
+ if (Base.getReg() != MSP430::SR && Base.getReg() != MSP430::PC) {
O << '(';
printOperand(MI, OpNum, O);
O << ')';
@@ -123,18 +127,17 @@ void MSP430AsmPrinter::printSrcMemOperand(const MachineInstr *MI, int OpNum,
/// PrintAsmOperand - Print out an operand for an inline asm expression.
///
bool MSP430AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
- unsigned AsmVariant,
const char *ExtraCode, raw_ostream &O) {
// Does this asm operand have a single letter operand modifier?
if (ExtraCode && ExtraCode[0])
- return true; // Unknown modifier.
+ return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O);
printOperand(MI, OpNo, O);
return false;
}
bool MSP430AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
- unsigned OpNo, unsigned AsmVariant,
+ unsigned OpNo,
const char *ExtraCode,
raw_ostream &O) {
if (ExtraCode && ExtraCode[0]) {
@@ -153,6 +156,32 @@ void MSP430AsmPrinter::EmitInstruction(const MachineInstr *MI) {
EmitToStreamer(*OutStreamer, TmpInst);
}
+void MSP430AsmPrinter::EmitInterruptVectorSection(MachineFunction &ISR) {
+ MCSection *Cur = OutStreamer->getCurrentSectionOnly();
+ const auto *F = &ISR.getFunction();
+ assert(F->hasFnAttribute("interrupt") &&
+ "Functions with MSP430_INTR CC should have 'interrupt' attribute");
+ StringRef IVIdx = F->getFnAttribute("interrupt").getValueAsString();
+ MCSection *IV = OutStreamer->getContext().getELFSection(
+ "__interrupt_vector_" + IVIdx,
+ ELF::SHT_PROGBITS, ELF::SHF_ALLOC | ELF::SHF_EXECINSTR);
+ OutStreamer->SwitchSection(IV);
+
+ const MCSymbol *FunctionSymbol = getSymbol(F);
+ OutStreamer->EmitSymbolValue(FunctionSymbol, TM.getProgramPointerSize());
+ OutStreamer->SwitchSection(Cur);
+}
+
+bool MSP430AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+ // Emit separate section for an interrupt vector if ISR
+ if (MF.getFunction().getCallingConv() == CallingConv::MSP430_INTR)
+ EmitInterruptVectorSection(MF);
+
+ SetupMachineFunction(MF);
+ EmitFunctionBody();
+ return false;
+}
+
// Force static initialization.
extern "C" void LLVMInitializeMSP430AsmPrinter() {
RegisterAsmPrinter<MSP430AsmPrinter> X(getTheMSP430Target());
diff --git a/lib/Target/MSP430/MSP430BranchSelector.cpp b/lib/Target/MSP430/MSP430BranchSelector.cpp
index 2b3495405545..45e7c26e4d30 100644
--- a/lib/Target/MSP430/MSP430BranchSelector.cpp
+++ b/lib/Target/MSP430/MSP430BranchSelector.cpp
@@ -1,9 +1,8 @@
//===-- MSP430BranchSelector.cpp - Emit long conditional branches ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/MSP430/MSP430CallingConv.td b/lib/Target/MSP430/MSP430CallingConv.td
index 0434f8abfbf4..49191fa5dd5f 100644
--- a/lib/Target/MSP430/MSP430CallingConv.td
+++ b/lib/Target/MSP430/MSP430CallingConv.td
@@ -1,9 +1,8 @@
//==- MSP430CallingConv.td - Calling Conventions for MSP430 -*- tablegen -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// This describes the calling conventions for MSP430 architecture.
diff --git a/lib/Target/MSP430/MSP430FrameLowering.cpp b/lib/Target/MSP430/MSP430FrameLowering.cpp
index 2421f09fbf59..de60ad9bd7e6 100644
--- a/lib/Target/MSP430/MSP430FrameLowering.cpp
+++ b/lib/Target/MSP430/MSP430FrameLowering.cpp
@@ -1,9 +1,8 @@
//===-- MSP430FrameLowering.cpp - MSP430 Frame Information ----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/MSP430/MSP430FrameLowering.h b/lib/Target/MSP430/MSP430FrameLowering.h
index 8807101f37ca..33ce3c70a2a3 100644
--- a/lib/Target/MSP430/MSP430FrameLowering.h
+++ b/lib/Target/MSP430/MSP430FrameLowering.h
@@ -1,9 +1,8 @@
//==- MSP430FrameLowering.h - Define frame lowering for MSP430 --*- C++ -*--==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
index 7a1998ad355d..23449585505e 100644
--- a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
+++ b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
@@ -1,9 +1,8 @@
//===-- MSP430ISelDAGToDAG.cpp - A dag to dag inst selector for MSP430 ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp
index 3e706134afc5..fedfb857bd0f 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -1,9 +1,8 @@
//===-- MSP430ISelLowering.cpp - MSP430 DAG Lowering Implementation ------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h
index 731bc1406711..ee6b6316d7a9 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.h
+++ b/lib/Target/MSP430/MSP430ISelLowering.h
@@ -1,9 +1,8 @@
//===-- MSP430ISelLowering.h - MSP430 DAG Lowering Interface ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/MSP430/MSP430InstrFormats.td b/lib/Target/MSP430/MSP430InstrFormats.td
index e2e4503db20c..36f40d6fc89d 100644
--- a/lib/Target/MSP430/MSP430InstrFormats.td
+++ b/lib/Target/MSP430/MSP430InstrFormats.td
@@ -1,9 +1,8 @@
//===-- MSP430InstrFormats.td - MSP430 Instruction Formats -*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp
index c136933a51bc..5c3a3fc69266 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.cpp
+++ b/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -1,9 +1,8 @@
//===-- MSP430InstrInfo.cpp - MSP430 Instruction Information --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -308,7 +307,8 @@ unsigned MSP430InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
case TargetOpcode::KILL:
case TargetOpcode::DBG_VALUE:
return 0;
- case TargetOpcode::INLINEASM: {
+ case TargetOpcode::INLINEASM:
+ case TargetOpcode::INLINEASM_BR: {
const MachineFunction *MF = MI.getParent()->getParent();
const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
return TII.getInlineAsmLength(MI.getOperand(0).getSymbolName(),
diff --git a/lib/Target/MSP430/MSP430InstrInfo.h b/lib/Target/MSP430/MSP430InstrInfo.h
index fee3bea9b8d6..13c50ad23adc 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.h
+++ b/lib/Target/MSP430/MSP430InstrInfo.h
@@ -1,9 +1,8 @@
//===-- MSP430InstrInfo.h - MSP430 Instruction Information ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/MSP430/MSP430InstrInfo.td b/lib/Target/MSP430/MSP430InstrInfo.td
index 25c81d94f75b..aaca3504822d 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.td
+++ b/lib/Target/MSP430/MSP430InstrInfo.td
@@ -1,9 +1,8 @@
//===-- MSP430InstrInfo.td - MSP430 Instruction defs -------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/MSP430/MSP430MCInstLower.cpp b/lib/Target/MSP430/MSP430MCInstLower.cpp
index 860c0006f782..1e57f33386e6 100644
--- a/lib/Target/MSP430/MSP430MCInstLower.cpp
+++ b/lib/Target/MSP430/MSP430MCInstLower.cpp
@@ -1,9 +1,8 @@
//===-- MSP430MCInstLower.cpp - Convert MSP430 MachineInstr to an MCInst --===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/MSP430/MSP430MCInstLower.h b/lib/Target/MSP430/MSP430MCInstLower.h
index ebd639744bcc..910ad4bb12d5 100644
--- a/lib/Target/MSP430/MSP430MCInstLower.h
+++ b/lib/Target/MSP430/MSP430MCInstLower.h
@@ -1,9 +1,8 @@
//===-- MSP430MCInstLower.h - Lower MachineInstr to MCInst ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/MSP430/MSP430MachineFunctionInfo.cpp b/lib/Target/MSP430/MSP430MachineFunctionInfo.cpp
index b442fc03b257..1d3a6d118bd6 100644
--- a/lib/Target/MSP430/MSP430MachineFunctionInfo.cpp
+++ b/lib/Target/MSP430/MSP430MachineFunctionInfo.cpp
@@ -1,9 +1,8 @@
//===-- MSP430MachineFunctionInfo.cpp - MSP430 machine function info ------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/MSP430/MSP430MachineFunctionInfo.h b/lib/Target/MSP430/MSP430MachineFunctionInfo.h
index fcaa8a1d6c72..2b2c8967a749 100644
--- a/lib/Target/MSP430/MSP430MachineFunctionInfo.h
+++ b/lib/Target/MSP430/MSP430MachineFunctionInfo.h
@@ -1,9 +1,8 @@
//=== MSP430MachineFunctionInfo.h - MSP430 machine function info -*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp
index 54e53e19eb54..afbb2f213b45 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.cpp
+++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp
@@ -1,9 +1,8 @@
//===-- MSP430RegisterInfo.cpp - MSP430 Register Information --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -155,7 +154,7 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
}
-unsigned MSP430RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+Register MSP430RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
const MSP430FrameLowering *TFI = getFrameLowering(MF);
return TFI->hasFP(MF) ? MSP430::FP : MSP430::SP;
}
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.h b/lib/Target/MSP430/MSP430RegisterInfo.h
index 47a5e147953e..c3eff93f55d2 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.h
+++ b/lib/Target/MSP430/MSP430RegisterInfo.h
@@ -1,9 +1,8 @@
//===-- MSP430RegisterInfo.h - MSP430 Register Information Impl -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -38,7 +37,7 @@ public:
RegScavenger *RS = nullptr) const override;
// Debug information queries.
- unsigned getFrameRegister(const MachineFunction &MF) const override;
+ Register getFrameRegister(const MachineFunction &MF) const override;
};
} // end namespace llvm
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.td b/lib/Target/MSP430/MSP430RegisterInfo.td
index 1e86bdf34a0b..11003dba383f 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.td
+++ b/lib/Target/MSP430/MSP430RegisterInfo.td
@@ -1,9 +1,8 @@
//===-- MSP430RegisterInfo.td - MSP430 Register defs -------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/MSP430/MSP430Subtarget.cpp b/lib/Target/MSP430/MSP430Subtarget.cpp
index 776a9dcb11d4..20168773cd53 100644
--- a/lib/Target/MSP430/MSP430Subtarget.cpp
+++ b/lib/Target/MSP430/MSP430Subtarget.cpp
@@ -1,9 +1,8 @@
//===-- MSP430Subtarget.cpp - MSP430 Subtarget Information ----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/MSP430/MSP430Subtarget.h b/lib/Target/MSP430/MSP430Subtarget.h
index 01a428056377..ab2b71e3bb1a 100644
--- a/lib/Target/MSP430/MSP430Subtarget.h
+++ b/lib/Target/MSP430/MSP430Subtarget.h
@@ -1,9 +1,8 @@
//===-- MSP430Subtarget.h - Define Subtarget for the MSP430 ----*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp
index 9f6ebba75ec6..8c4ca982c966 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.cpp
+++ b/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -1,9 +1,8 @@
//===-- MSP430TargetMachine.cpp - Define TargetMachine for MSP430 ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -13,6 +12,7 @@
#include "MSP430TargetMachine.h"
#include "MSP430.h"
+#include "TargetInfo/MSP430TargetInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/CodeGen/TargetPassConfig.h"
diff --git a/lib/Target/MSP430/MSP430TargetMachine.h b/lib/Target/MSP430/MSP430TargetMachine.h
index 4935b80cfdd9..96fbc3ba0377 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.h
+++ b/lib/Target/MSP430/MSP430TargetMachine.h
@@ -1,9 +1,8 @@
//===-- MSP430TargetMachine.h - Define TargetMachine for MSP430 -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp b/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp
index dfa21f580cb7..5da7d588079f 100644
--- a/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp
+++ b/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp
@@ -1,14 +1,12 @@
//===-- MSP430TargetInfo.cpp - MSP430 Target Implementation ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-#include "MSP430.h"
-#include "llvm/IR/Module.h"
+#include "TargetInfo/MSP430TargetInfo.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.h b/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.h
new file mode 100644
index 000000000000..17854244f28b
--- /dev/null
+++ b/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.h
@@ -0,0 +1,20 @@
+//===-- MSP430TargetInfo.h - MSP430 Target Implementation -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_MSP430_TARGETINFO_MSP430TARGETINFO_H
+#define LLVM_LIB_TARGET_MSP430_TARGETINFO_MSP430TARGETINFO_H
+
+namespace llvm {
+
+class Target;
+
+Target &getTheMSP430Target();
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_MSP430_TARGETINFO_MSP430TARGETINFO_H
diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index d2fed6861477..1f7d095bf49b 100644
--- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -1,9 +1,8 @@
//===-- MipsAsmParser.cpp - Parse Mips assembly to MCInst instructions ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -13,6 +12,7 @@
#include "MCTargetDesc/MipsMCExpr.h"
#include "MCTargetDesc/MipsMCTargetDesc.h"
#include "MipsTargetStreamer.h"
+#include "TargetInfo/MipsTargetInfo.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
@@ -29,6 +29,7 @@
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/MC/MCParser/MCAsmParserExtension.h"
+#include "llvm/MC/MCParser/MCAsmParserUtils.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/MC/MCParser/MCTargetAsmParser.h"
#include "llvm/MC/MCSectionELF.h"
@@ -65,10 +66,7 @@ class MCInstrInfo;
} // end namespace llvm
-static cl::opt<bool>
-EmitJalrReloc("mips-jalr-reloc", cl::Hidden,
- cl::desc("MIPS: Emit R_{MICRO}MIPS_JALR relocation with jalr"),
- cl::init(true));
+extern cl::opt<bool> EmitJalrReloc;
namespace {
@@ -148,6 +146,7 @@ class MipsAsmParser : public MCTargetAsmParser {
bool IsPicEnabled;
bool IsCpRestoreSet;
int CpRestoreOffset;
+ unsigned GPReg;
unsigned CpSaveLocation;
/// If true, then CpSaveLocation is a register, otherwise it's an offset.
bool CpSaveLocationIsRegister;
@@ -277,6 +276,15 @@ class MipsAsmParser : public MCTargetAsmParser {
bool expandUxw(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
const MCSubtargetInfo *STI);
+ bool expandSge(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
+ const MCSubtargetInfo *STI);
+
+ bool expandSgeImm(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
+ const MCSubtargetInfo *STI);
+
+ bool expandSgtImm(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
+ const MCSubtargetInfo *STI);
+
bool expandRotation(MCInst &Inst, SMLoc IDLoc,
MCStreamer &Out, const MCSubtargetInfo *STI);
bool expandRotationImm(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
@@ -304,6 +312,9 @@ class MipsAsmParser : public MCTargetAsmParser {
bool expandLoadStoreDMacro(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
const MCSubtargetInfo *STI, bool IsLoad);
+ bool expandStoreDM1Macro(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
+ const MCSubtargetInfo *STI);
+
bool expandSeq(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
const MCSubtargetInfo *STI);
@@ -324,6 +335,7 @@ class MipsAsmParser : public MCTargetAsmParser {
bool parseSetFeature(uint64_t Feature);
bool isPicAndNotNxxAbi(); // Used by .cpload, .cprestore, and .cpsetup.
bool parseDirectiveCpLoad(SMLoc Loc);
+ bool parseDirectiveCpLocal(SMLoc Loc);
bool parseDirectiveCpRestore(SMLoc Loc);
bool parseDirectiveCPSetup();
bool parseDirectiveCPReturn();
@@ -517,6 +529,7 @@ public:
IsCpRestoreSet = false;
CpRestoreOffset = -1;
+ GPReg = ABI.GetGlobalPtr();
const Triple &TheTriple = sti.getTargetTriple();
IsLittleEndian = TheTriple.isLittleEndian();
@@ -895,14 +908,6 @@ private:
.getRegister(RegIdx.Index);
}
- /// Coerce the register to FGRH32 and return the real register for the current
- /// target.
- unsigned getFGRH32Reg() const {
- assert(isRegIdx() && (RegIdx.Kind & RegKind_FGR) && "Invalid access!");
- return RegIdx.RegInfo->getRegClass(Mips::FGRH32RegClassID)
- .getRegister(RegIdx.Index);
- }
-
/// Coerce the register to FCC and return the real register for the current
/// target.
unsigned getFCCReg() const {
@@ -1100,11 +1105,6 @@ public:
"registers");
}
- void addFGRH32AsmRegOperands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::createReg(getFGRH32Reg()));
- }
-
void addFCCAsmRegOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
Inst.addOperand(MCOperand::createReg(getFCCReg()));
@@ -2043,7 +2043,7 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
const MCExpr *Lo16RelocExpr =
MipsMCExpr::create(MipsMCExpr::MEK_LO, JalExpr, getContext());
- TOut.emitRRX(Mips::LW, Mips::T9, Mips::GP,
+ TOut.emitRRX(Mips::LW, Mips::T9, GPReg,
MCOperand::createExpr(Got16RelocExpr), IDLoc, STI);
TOut.emitRRX(Mips::ADDiu, Mips::T9, Mips::T9,
MCOperand::createExpr(Lo16RelocExpr), IDLoc, STI);
@@ -2057,7 +2057,7 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
MipsMCExpr::create(MipsMCExpr::MEK_GOT_DISP, JalExpr, getContext());
TOut.emitRRX(ABI.ArePtrs64bit() ? Mips::LD : Mips::LW, Mips::T9,
- Mips::GP, MCOperand::createExpr(GotDispRelocExpr), IDLoc,
+ GPReg, MCOperand::createExpr(GotDispRelocExpr), IDLoc,
STI);
}
} else {
@@ -2068,7 +2068,7 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
const MCExpr *Call16RelocExpr =
MipsMCExpr::create(MipsMCExpr::MEK_GOT_CALL, JalExpr, getContext());
- TOut.emitRRX(ABI.ArePtrs64bit() ? Mips::LD : Mips::LW, Mips::T9, Mips::GP,
+ TOut.emitRRX(ABI.ArePtrs64bit() ? Mips::LD : Mips::LW, Mips::T9, GPReg,
MCOperand::createExpr(Call16RelocExpr), IDLoc, STI);
}
@@ -2485,6 +2485,19 @@ MipsAsmParser::tryExpandInstruction(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
case Mips::NORImm:
case Mips::NORImm64:
return expandAliasImmediate(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success;
+ case Mips::SGE:
+ case Mips::SGEU:
+ return expandSge(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success;
+ case Mips::SGEImm:
+ case Mips::SGEUImm:
+ case Mips::SGEImm64:
+ case Mips::SGEUImm64:
+ return expandSgeImm(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success;
+ case Mips::SGTImm:
+ case Mips::SGTUImm:
+ case Mips::SGTImm64:
+ case Mips::SGTUImm64:
+ return expandSgtImm(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success;
case Mips::SLTImm64:
if (isInt<16>(Inst.getOperand(2).getImm())) {
Inst.setOpcode(Mips::SLTi64);
@@ -2553,6 +2566,10 @@ MipsAsmParser::tryExpandInstruction(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
Inst.getOpcode() == Mips::LDMacro)
? MER_Fail
: MER_Success;
+ case Mips::SDC1_M1:
+ return expandStoreDM1Macro(Inst, IDLoc, Out, STI)
+ ? MER_Fail
+ : MER_Success;
case Mips::SEQMacro:
return expandSeq(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success;
case Mips::SEQIMacro:
@@ -2879,8 +2896,8 @@ bool MipsAsmParser::loadAndAddSymbolAddress(const MCExpr *SymExpr,
ELF::STB_LOCAL))) {
const MCExpr *CallExpr =
MipsMCExpr::create(MipsMCExpr::MEK_GOT_CALL, SymExpr, getContext());
- TOut.emitRRX(Mips::LW, DstReg, ABI.GetGlobalPtr(),
- MCOperand::createExpr(CallExpr), IDLoc, STI);
+ TOut.emitRRX(Mips::LW, DstReg, GPReg, MCOperand::createExpr(CallExpr),
+ IDLoc, STI);
return false;
}
@@ -2919,8 +2936,8 @@ bool MipsAsmParser::loadAndAddSymbolAddress(const MCExpr *SymExpr,
TmpReg = ATReg;
}
- TOut.emitRRX(Mips::LW, TmpReg, ABI.GetGlobalPtr(),
- MCOperand::createExpr(GotExpr), IDLoc, STI);
+ TOut.emitRRX(Mips::LW, TmpReg, GPReg, MCOperand::createExpr(GotExpr), IDLoc,
+ STI);
if (LoExpr)
TOut.emitRRX(Mips::ADDiu, TmpReg, TmpReg, MCOperand::createExpr(LoExpr),
@@ -2955,8 +2972,8 @@ bool MipsAsmParser::loadAndAddSymbolAddress(const MCExpr *SymExpr,
ELF::STB_LOCAL))) {
const MCExpr *CallExpr =
MipsMCExpr::create(MipsMCExpr::MEK_GOT_CALL, SymExpr, getContext());
- TOut.emitRRX(Mips::LD, DstReg, ABI.GetGlobalPtr(),
- MCOperand::createExpr(CallExpr), IDLoc, STI);
+ TOut.emitRRX(Mips::LD, DstReg, GPReg, MCOperand::createExpr(CallExpr),
+ IDLoc, STI);
return false;
}
@@ -2998,8 +3015,8 @@ bool MipsAsmParser::loadAndAddSymbolAddress(const MCExpr *SymExpr,
TmpReg = ATReg;
}
- TOut.emitRRX(Mips::LD, TmpReg, ABI.GetGlobalPtr(),
- MCOperand::createExpr(GotExpr), IDLoc, STI);
+ TOut.emitRRX(Mips::LD, TmpReg, GPReg, MCOperand::createExpr(GotExpr), IDLoc,
+ STI);
if (LoExpr)
TOut.emitRRX(Mips::DADDiu, TmpReg, TmpReg, MCOperand::createExpr(LoExpr),
@@ -3229,10 +3246,10 @@ bool MipsAsmParser::emitPartialAddress(MipsTargetStreamer &TOut, SMLoc IDLoc,
MipsMCExpr::create(MipsMCExpr::MEK_GOT, GotSym, getContext());
if(isABI_O32() || isABI_N32()) {
- TOut.emitRRX(Mips::LW, ATReg, Mips::GP, MCOperand::createExpr(GotExpr),
+ TOut.emitRRX(Mips::LW, ATReg, GPReg, MCOperand::createExpr(GotExpr),
IDLoc, STI);
} else { //isABI_N64()
- TOut.emitRRX(Mips::LD, ATReg, Mips::GP, MCOperand::createExpr(GotExpr),
+ TOut.emitRRX(Mips::LD, ATReg, GPReg, MCOperand::createExpr(GotExpr),
IDLoc, STI);
}
} else { //!IsPicEnabled
@@ -4293,6 +4310,143 @@ bool MipsAsmParser::expandUxw(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
return false;
}
+bool MipsAsmParser::expandSge(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
+ const MCSubtargetInfo *STI) {
+ MipsTargetStreamer &TOut = getTargetStreamer();
+
+ assert(Inst.getNumOperands() == 3 && "Invalid operand count");
+ assert(Inst.getOperand(0).isReg() &&
+ Inst.getOperand(1).isReg() &&
+ Inst.getOperand(2).isReg() && "Invalid instruction operand.");
+
+ unsigned DstReg = Inst.getOperand(0).getReg();
+ unsigned SrcReg = Inst.getOperand(1).getReg();
+ unsigned OpReg = Inst.getOperand(2).getReg();
+ unsigned OpCode;
+
+ warnIfNoMacro(IDLoc);
+
+ switch (Inst.getOpcode()) {
+ case Mips::SGE:
+ OpCode = Mips::SLT;
+ break;
+ case Mips::SGEU:
+ OpCode = Mips::SLTu;
+ break;
+ default:
+ llvm_unreachable("unexpected 'sge' opcode");
+ }
+
+ // $SrcReg >= $OpReg is equal to (not ($SrcReg < $OpReg))
+ TOut.emitRRR(OpCode, DstReg, SrcReg, OpReg, IDLoc, STI);
+ TOut.emitRRI(Mips::XORi, DstReg, DstReg, 1, IDLoc, STI);
+
+ return false;
+}
+
+bool MipsAsmParser::expandSgeImm(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
+ const MCSubtargetInfo *STI) {
+ MipsTargetStreamer &TOut = getTargetStreamer();
+
+ assert(Inst.getNumOperands() == 3 && "Invalid operand count");
+ assert(Inst.getOperand(0).isReg() &&
+ Inst.getOperand(1).isReg() &&
+ Inst.getOperand(2).isImm() && "Invalid instruction operand.");
+
+ unsigned DstReg = Inst.getOperand(0).getReg();
+ unsigned SrcReg = Inst.getOperand(1).getReg();
+ int64_t ImmValue = Inst.getOperand(2).getImm();
+ unsigned OpRegCode, OpImmCode;
+
+ warnIfNoMacro(IDLoc);
+
+ switch (Inst.getOpcode()) {
+ case Mips::SGEImm:
+ case Mips::SGEImm64:
+ OpRegCode = Mips::SLT;
+ OpImmCode = Mips::SLTi;
+ break;
+ case Mips::SGEUImm:
+ case Mips::SGEUImm64:
+ OpRegCode = Mips::SLTu;
+ OpImmCode = Mips::SLTiu;
+ break;
+ default:
+ llvm_unreachable("unexpected 'sge' opcode with immediate");
+ }
+
+ // $SrcReg >= Imm is equal to (not ($SrcReg < Imm))
+ if (isInt<16>(ImmValue)) {
+ // Use immediate version of STL.
+ TOut.emitRRI(OpImmCode, DstReg, SrcReg, ImmValue, IDLoc, STI);
+ TOut.emitRRI(Mips::XORi, DstReg, DstReg, 1, IDLoc, STI);
+ } else {
+ unsigned ImmReg = DstReg;
+ if (DstReg == SrcReg) {
+ unsigned ATReg = getATReg(Inst.getLoc());
+ if (!ATReg)
+ return true;
+ ImmReg = ATReg;
+ }
+
+ if (loadImmediate(ImmValue, ImmReg, Mips::NoRegister, isInt<32>(ImmValue),
+ false, IDLoc, Out, STI))
+ return true;
+
+ TOut.emitRRR(OpRegCode, DstReg, SrcReg, ImmReg, IDLoc, STI);
+ TOut.emitRRI(Mips::XORi, DstReg, DstReg, 1, IDLoc, STI);
+ }
+
+ return false;
+}
+
+bool MipsAsmParser::expandSgtImm(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
+ const MCSubtargetInfo *STI) {
+ MipsTargetStreamer &TOut = getTargetStreamer();
+
+ assert(Inst.getNumOperands() == 3 && "Invalid operand count");
+ assert(Inst.getOperand(0).isReg() &&
+ Inst.getOperand(1).isReg() &&
+ Inst.getOperand(2).isImm() && "Invalid instruction operand.");
+
+ unsigned DstReg = Inst.getOperand(0).getReg();
+ unsigned SrcReg = Inst.getOperand(1).getReg();
+ unsigned ImmReg = DstReg;
+ int64_t ImmValue = Inst.getOperand(2).getImm();
+ unsigned OpCode;
+
+ warnIfNoMacro(IDLoc);
+
+ switch (Inst.getOpcode()) {
+ case Mips::SGTImm:
+ case Mips::SGTImm64:
+ OpCode = Mips::SLT;
+ break;
+ case Mips::SGTUImm:
+ case Mips::SGTUImm64:
+ OpCode = Mips::SLTu;
+ break;
+ default:
+ llvm_unreachable("unexpected 'sgt' opcode with immediate");
+ }
+
+ if (DstReg == SrcReg) {
+ unsigned ATReg = getATReg(Inst.getLoc());
+ if (!ATReg)
+ return true;
+ ImmReg = ATReg;
+ }
+
+ if (loadImmediate(ImmValue, ImmReg, Mips::NoRegister, isInt<32>(ImmValue),
+ false, IDLoc, Out, STI))
+ return true;
+
+ // $SrcReg > $ImmReg is equal to $ImmReg < $SrcReg
+ TOut.emitRRR(OpCode, DstReg, ImmReg, SrcReg, IDLoc, STI);
+
+ return false;
+}
+
bool MipsAsmParser::expandAliasImmediate(MCInst &Inst, SMLoc IDLoc,
MCStreamer &Out,
const MCSubtargetInfo *STI) {
@@ -4859,61 +5013,110 @@ bool MipsAsmParser::expandLoadStoreDMacro(MCInst &Inst, SMLoc IDLoc,
return false;
}
+
+// Expand 's.d $<reg> offset($reg2)' to 'swc1 $<reg+1>, offset($reg2);
+// swc1 $<reg>, offset+4($reg2)'
+// or if little endian to 'swc1 $<reg>, offset($reg2);
+// swc1 $<reg+1>, offset+4($reg2)'
+// for Mips1.
+bool MipsAsmParser::expandStoreDM1Macro(MCInst &Inst, SMLoc IDLoc,
+ MCStreamer &Out,
+ const MCSubtargetInfo *STI) {
+ if (!isABI_O32())
+ return true;
+
+ warnIfNoMacro(IDLoc);
+
+ MipsTargetStreamer &TOut = getTargetStreamer();
+ unsigned Opcode = Mips::SWC1;
+ unsigned FirstReg = Inst.getOperand(0).getReg();
+ unsigned SecondReg = nextReg(FirstReg);
+ unsigned BaseReg = Inst.getOperand(1).getReg();
+ if (!SecondReg)
+ return true;
+
+ warnIfRegIndexIsAT(FirstReg, IDLoc);
+
+ assert(Inst.getOperand(2).isImm() &&
+ "Offset for macro is not immediate!");
+
+ MCOperand &FirstOffset = Inst.getOperand(2);
+ signed NextOffset = FirstOffset.getImm() + 4;
+ MCOperand SecondOffset = MCOperand::createImm(NextOffset);
+
+ if (!isInt<16>(FirstOffset.getImm()) || !isInt<16>(NextOffset))
+ return true;
+
+ if (!IsLittleEndian)
+ std::swap(FirstReg, SecondReg);
+
+ TOut.emitRRX(Opcode, FirstReg, BaseReg, FirstOffset, IDLoc, STI);
+ TOut.emitRRX(Opcode, SecondReg, BaseReg, SecondOffset, IDLoc, STI);
+
+ return false;
+}
+
bool MipsAsmParser::expandSeq(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
const MCSubtargetInfo *STI) {
+ MipsTargetStreamer &TOut = getTargetStreamer();
+
+ assert(Inst.getNumOperands() == 3 && "Invalid operand count");
+ assert(Inst.getOperand(0).isReg() &&
+ Inst.getOperand(1).isReg() &&
+ Inst.getOperand(2).isReg() && "Invalid instruction operand.");
+
+ unsigned DstReg = Inst.getOperand(0).getReg();
+ unsigned SrcReg = Inst.getOperand(1).getReg();
+ unsigned OpReg = Inst.getOperand(2).getReg();
warnIfNoMacro(IDLoc);
- MipsTargetStreamer &TOut = getTargetStreamer();
- if (Inst.getOperand(1).getReg() != Mips::ZERO &&
- Inst.getOperand(2).getReg() != Mips::ZERO) {
- TOut.emitRRR(Mips::XOR, Inst.getOperand(0).getReg(),
- Inst.getOperand(1).getReg(), Inst.getOperand(2).getReg(),
- IDLoc, STI);
- TOut.emitRRI(Mips::SLTiu, Inst.getOperand(0).getReg(),
- Inst.getOperand(0).getReg(), 1, IDLoc, STI);
+ if (SrcReg != Mips::ZERO && OpReg != Mips::ZERO) {
+ TOut.emitRRR(Mips::XOR, DstReg, SrcReg, OpReg, IDLoc, STI);
+ TOut.emitRRI(Mips::SLTiu, DstReg, DstReg, 1, IDLoc, STI);
return false;
}
- unsigned Reg = 0;
- if (Inst.getOperand(1).getReg() == Mips::ZERO) {
- Reg = Inst.getOperand(2).getReg();
- } else {
- Reg = Inst.getOperand(1).getReg();
- }
- TOut.emitRRI(Mips::SLTiu, Inst.getOperand(0).getReg(), Reg, 1, IDLoc, STI);
+ unsigned Reg = SrcReg == Mips::ZERO ? OpReg : SrcReg;
+ TOut.emitRRI(Mips::SLTiu, DstReg, Reg, 1, IDLoc, STI);
return false;
}
bool MipsAsmParser::expandSeqI(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
const MCSubtargetInfo *STI) {
- warnIfNoMacro(IDLoc);
MipsTargetStreamer &TOut = getTargetStreamer();
- unsigned Opc;
+ assert(Inst.getNumOperands() == 3 && "Invalid operand count");
+ assert(Inst.getOperand(0).isReg() &&
+ Inst.getOperand(1).isReg() &&
+ Inst.getOperand(2).isImm() && "Invalid instruction operand.");
+
+ unsigned DstReg = Inst.getOperand(0).getReg();
+ unsigned SrcReg = Inst.getOperand(1).getReg();
int64_t Imm = Inst.getOperand(2).getImm();
- unsigned Reg = Inst.getOperand(1).getReg();
+
+ warnIfNoMacro(IDLoc);
if (Imm == 0) {
- TOut.emitRRI(Mips::SLTiu, Inst.getOperand(0).getReg(),
- Inst.getOperand(1).getReg(), 1, IDLoc, STI);
+ TOut.emitRRI(Mips::SLTiu, DstReg, SrcReg, 1, IDLoc, STI);
return false;
- } else {
+ }
- if (Reg == Mips::ZERO) {
- Warning(IDLoc, "comparison is always false");
- TOut.emitRRR(isGP64bit() ? Mips::DADDu : Mips::ADDu,
- Inst.getOperand(0).getReg(), Reg, Reg, IDLoc, STI);
- return false;
- }
+ if (SrcReg == Mips::ZERO) {
+ Warning(IDLoc, "comparison is always false");
+ TOut.emitRRR(isGP64bit() ? Mips::DADDu : Mips::ADDu,
+ DstReg, SrcReg, SrcReg, IDLoc, STI);
+ return false;
+ }
- if (Imm > -0x8000 && Imm < 0) {
- Imm = -Imm;
- Opc = isGP64bit() ? Mips::DADDiu : Mips::ADDiu;
- } else {
- Opc = Mips::XORi;
- }
+ unsigned Opc;
+ if (Imm > -0x8000 && Imm < 0) {
+ Imm = -Imm;
+ Opc = isGP64bit() ? Mips::DADDiu : Mips::ADDiu;
+ } else {
+ Opc = Mips::XORi;
}
+
if (!isUInt<16>(Imm)) {
unsigned ATReg = getATReg(IDLoc);
if (!ATReg)
@@ -4923,17 +5126,13 @@ bool MipsAsmParser::expandSeqI(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
Out, STI))
return true;
- TOut.emitRRR(Mips::XOR, Inst.getOperand(0).getReg(),
- Inst.getOperand(1).getReg(), ATReg, IDLoc, STI);
- TOut.emitRRI(Mips::SLTiu, Inst.getOperand(0).getReg(),
- Inst.getOperand(0).getReg(), 1, IDLoc, STI);
+ TOut.emitRRR(Mips::XOR, DstReg, SrcReg, ATReg, IDLoc, STI);
+ TOut.emitRRI(Mips::SLTiu, DstReg, DstReg, 1, IDLoc, STI);
return false;
}
- TOut.emitRRI(Opc, Inst.getOperand(0).getReg(), Inst.getOperand(1).getReg(),
- Imm, IDLoc, STI);
- TOut.emitRRI(Mips::SLTiu, Inst.getOperand(0).getReg(),
- Inst.getOperand(0).getReg(), 1, IDLoc, STI);
+ TOut.emitRRI(Opc, DstReg, SrcReg, Imm, IDLoc, STI);
+ TOut.emitRRI(Mips::SLTiu, DstReg, DstReg, 1, IDLoc, STI);
return false;
}
@@ -6325,7 +6524,7 @@ bool MipsAsmParser::parseBracketSuffix(StringRef Name,
return false;
}
-static std::string MipsMnemonicSpellCheck(StringRef S, uint64_t FBS,
+static std::string MipsMnemonicSpellCheck(StringRef S, const FeatureBitset &FBS,
unsigned VariantID = 0);
bool MipsAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
@@ -6338,7 +6537,7 @@ bool MipsAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
// Check if we have valid mnemonic
if (!mnemonicIsValid(Name, 0)) {
- uint64_t FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
+ FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
std::string Suggestion = MipsMnemonicSpellCheck(Name, FBS);
return Error(NameLoc, "unknown instruction" + Suggestion);
}
@@ -6807,7 +7006,6 @@ bool MipsAsmParser::parseSetHardFloatDirective() {
bool MipsAsmParser::parseSetAssignment() {
StringRef Name;
- const MCExpr *Value;
MCAsmParser &Parser = getParser();
if (Parser.parseIdentifier(Name))
@@ -6825,17 +7023,16 @@ bool MipsAsmParser::parseSetAssignment() {
RegisterSets[Name] = Parser.getTok();
Parser.Lex(); // Eat identifier.
getContext().getOrCreateSymbol(Name);
- } else if (!Parser.parseExpression(Value)) {
- // Parse assignment of an expression including
- // symbolic registers:
- // .set $tmp, $BB0-$BB1
- // .set r2, $f2
- MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
- Sym->setVariableValue(Value);
- } else {
- return reportParseError("expected valid expression after comma");
+ return false;
}
+ MCSymbol *Sym;
+ const MCExpr *Value;
+ if (MCParserUtils::parseAssignmentExpression(Name, /* allow_redef */ true,
+ Parser, Sym, Value))
+ return true;
+ Sym->setVariableValue(Value);
+
return false;
}
@@ -7047,6 +7244,40 @@ bool MipsAsmParser::parseDirectiveCpLoad(SMLoc Loc) {
return false;
}
+bool MipsAsmParser::parseDirectiveCpLocal(SMLoc Loc) {
+ if (!isABI_N32() && !isABI_N64()) {
+ reportParseError(".cplocal is allowed only in N32 or N64 mode");
+ return false;
+ }
+
+ SmallVector<std::unique_ptr<MCParsedAsmOperand>, 1> Reg;
+ OperandMatchResultTy ResTy = parseAnyRegister(Reg);
+ if (ResTy == MatchOperand_NoMatch || ResTy == MatchOperand_ParseFail) {
+ reportParseError("expected register containing global pointer");
+ return false;
+ }
+
+ MipsOperand &RegOpnd = static_cast<MipsOperand &>(*Reg[0]);
+ if (!RegOpnd.isGPRAsmReg()) {
+ reportParseError(RegOpnd.getStartLoc(), "invalid register");
+ return false;
+ }
+
+ // If this is not the end of the statement, report an error.
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ reportParseError("unexpected token, expected end of statement");
+ return false;
+ }
+ getParser().Lex(); // Consume the EndOfStatement.
+
+ unsigned NewReg = RegOpnd.getGPR32Reg();
+ if (IsPicEnabled)
+ GPReg = NewReg;
+
+ getTargetStreamer().emitDirectiveCpLocal(NewReg);
+ return false;
+}
+
bool MipsAsmParser::parseDirectiveCpRestore(SMLoc Loc) {
MCAsmParser &Parser = getParser();
@@ -7897,6 +8128,10 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) {
parseDirectiveCpRestore(DirectiveID.getLoc());
return false;
}
+ if (IDVal == ".cplocal") {
+ parseDirectiveCpLocal(DirectiveID.getLoc());
+ return false;
+ }
if (IDVal == ".ent") {
StringRef SymbolName;
diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
index 27b27ff1e1e2..ef13507fe63a 100644
--- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
+++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
@@ -1,9 +1,8 @@
//===- MipsDisassembler.cpp - Disassembler for Mips -----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -13,6 +12,7 @@
#include "MCTargetDesc/MipsMCTargetDesc.h"
#include "Mips.h"
+#include "TargetInfo/MipsTargetInfo.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
@@ -541,15 +541,6 @@ static DecodeStatus DecodeMovePRegPair(MCInst &Inst, unsigned RegPair,
static DecodeStatus DecodeMovePOperands(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-namespace llvm {
-
-Target &getTheMipselTarget();
-Target &getTheMipsTarget();
-Target &getTheMips64Target();
-Target &getTheMips64elTarget();
-
-} // end namespace llvm
-
static MCDisassembler *createMipsDisassembler(
const Target &T,
const MCSubtargetInfo &STI,
diff --git a/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp b/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp
index 4a2b75b9ae46..fca1149453c9 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp
@@ -1,9 +1,8 @@
//===- MipsABIFlagsSection.cpp - Mips ELF ABI Flags Section ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h b/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h
index 68bf3829aab5..239e55495e9d 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h
@@ -1,9 +1,8 @@
//===- MipsABIFlagsSection.h - Mips ELF ABI Flags Section -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp b/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp
index 18d7dd99be34..bdd190fc17c9 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp
@@ -1,9 +1,8 @@
//===---- MipsABIInfo.cpp - Information about MIPS ABI's ------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -15,6 +14,13 @@
using namespace llvm;
+// Note: this option is defined here to be visible from libLLVMMipsAsmParser
+// and libLLVMMipsCodeGen
+cl::opt<bool>
+EmitJalrReloc("mips-jalr-reloc", cl::Hidden,
+ cl::desc("MIPS: Emit R_{MICRO}MIPS_JALR relocation with jalr"),
+ cl::init(true));
+
namespace {
static const MCPhysReg O32IntRegs[4] = {Mips::A0, Mips::A1, Mips::A2, Mips::A3};
diff --git a/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h b/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h
index 9372a3c2bb1f..534e6573b63c 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h
@@ -1,9 +1,8 @@
//===---- MipsABIInfo.h - Information about MIPS ABI's --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
index 265d1141cb0b..859f9cbbca07 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
@@ -1,9 +1,8 @@
//===-- MipsAsmBackend.cpp - Mips Asm Backend ----------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -303,7 +302,7 @@ void MipsAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
Optional<MCFixupKind> MipsAsmBackend::getFixupKind(StringRef Name) const {
return StringSwitch<Optional<MCFixupKind>>(Name)
- .Case("R_MIPS_NONE", (MCFixupKind)Mips::fixup_Mips_NONE)
+ .Case("R_MIPS_NONE", FK_NONE)
.Case("R_MIPS_32", FK_Data_4)
.Case("R_MIPS_GOT_PAGE", (MCFixupKind)Mips::fixup_Mips_GOT_PAGE)
.Case("R_MIPS_CALL_HI16", (MCFixupKind)Mips::fixup_Mips_CALL_HI16)
@@ -351,7 +350,6 @@ getFixupKindInfo(MCFixupKind Kind) const {
// MipsFixupKinds.h.
//
// name offset bits flags
- { "fixup_Mips_NONE", 0, 0, 0 },
{ "fixup_Mips_16", 0, 16, 0 },
{ "fixup_Mips_32", 0, 32, 0 },
{ "fixup_Mips_REL32", 0, 32, 0 },
@@ -431,7 +429,6 @@ getFixupKindInfo(MCFixupKind Kind) const {
// MipsFixupKinds.h.
//
// name offset bits flags
- { "fixup_Mips_NONE", 0, 0, 0 },
{ "fixup_Mips_16", 16, 16, 0 },
{ "fixup_Mips_32", 0, 32, 0 },
{ "fixup_Mips_REL32", 0, 32, 0 },
diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
index 30359132e92b..4d7e36995ae4 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
@@ -1,9 +1,8 @@
//===-- MipsAsmBackend.h - Mips Asm Backend ------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h b/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
index a90db2384c46..6d8cb264158f 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
@@ -1,9 +1,8 @@
//===-- MipsBaseInfo.h - Top level definitions for MIPS MC ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -89,7 +88,10 @@ namespace MipsII {
MO_GOT_HI16,
MO_GOT_LO16,
MO_CALL_HI16,
- MO_CALL_LO16
+ MO_CALL_LO16,
+
+ /// Helper operand used to generate R_MIPS_JALR
+ MO_JALR
};
enum {
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
index 8ace2895d681..cf7bae98a27f 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
@@ -1,9 +1,8 @@
//===-- MipsELFObjectWriter.cpp - Mips ELF Writer -------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -223,7 +222,7 @@ unsigned MipsELFObjectWriter::getRelocType(MCContext &Ctx,
unsigned Kind = (unsigned)Fixup.getKind();
switch (Kind) {
- case Mips::fixup_Mips_NONE:
+ case FK_NONE:
return ELF::R_MIPS_NONE;
case FK_Data_1:
Ctx.reportError(Fixup.getLoc(),
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
index 21b01e850967..1b83e9445fb5 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
@@ -1,9 +1,8 @@
//===-------- MipsELFStreamer.cpp - ELF Object Output ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -35,7 +34,7 @@ MipsELFStreamer::MipsELFStreamer(MCContext &Context,
}
void MipsELFStreamer::EmitInstruction(const MCInst &Inst,
- const MCSubtargetInfo &STI, bool) {
+ const MCSubtargetInfo &STI) {
MCELFStreamer::EmitInstruction(Inst, STI);
MCContext &Context = getContext();
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
index 56a0ff96c7bd..2febfbc69b6f 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
@@ -1,9 +1,8 @@
//===- MipsELFStreamer.h - ELF Object Output --------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -42,8 +41,7 @@ public:
/// \p Inst is actually emitted. For example, we can inspect the operands and
/// gather sufficient information that allows us to reason about the register
/// usage for the translation unit.
- void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
- bool = false) override;
+ void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override;
/// Overriding this function allows us to record all labels that should be
/// marked as microMIPS. Based on this data marking is done in
diff --git a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
index eedad16dddc3..b83d822bd8d0 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
@@ -1,9 +1,8 @@
//===-- MipsFixupKinds.h - Mips Specific Fixup Entries ----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -23,11 +22,8 @@ namespace Mips {
// in MipsAsmBackend.cpp.
//
enum Fixups {
- // Branch fixups resulting in R_MIPS_NONE.
- fixup_Mips_NONE = FirstTargetFixupKind,
-
// Branch fixups resulting in R_MIPS_16.
- fixup_Mips_16,
+ fixup_Mips_16 = FirstTargetFixupKind,
// Pure 32 bit data fixup resulting in - R_MIPS_32.
fixup_Mips_32,
diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp b/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.cpp
index 73732a40bb8a..fb290a8e3f26 100644
--- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.cpp
@@ -1,9 +1,8 @@
//===-- MipsInstPrinter.cpp - Convert Mips MCInst to assembly syntax ------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -12,8 +11,8 @@
//===----------------------------------------------------------------------===//
#include "MipsInstPrinter.h"
-#include "MCTargetDesc/MipsMCExpr.h"
#include "MipsInstrInfo.h"
+#include "MipsMCExpr.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h b/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.h
index f02443ee21d3..a34a5c1d6418 100644
--- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.h
@@ -1,9 +1,8 @@
//=== MipsInstPrinter.h - Convert Mips MCInst to assembly syntax -*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -11,8 +10,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIB_TARGET_MIPS_INSTPRINTER_MIPSINSTPRINTER_H
-#define LLVM_LIB_TARGET_MIPS_INSTPRINTER_MIPSINSTPRINTER_H
+#ifndef LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSINSTPRINTER_H
+#define LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSINSTPRINTER_H
#include "llvm/MC/MCInstPrinter.h"
namespace llvm {
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
index 1506b4a83649..ec78158d387d 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
@@ -1,9 +1,8 @@
//===-- MipsMCAsmInfo.cpp - Mips Asm Properties ---------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h
index d4ccf0349c16..867f4d223de4 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h
@@ -1,9 +1,8 @@
//===-- MipsMCAsmInfo.h - Mips Asm Info ------------------------*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
index f43a4d980f92..759a7fdb32b8 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
@@ -1,9 +1,8 @@
//===-- MipsMCCodeEmitter.cpp - Convert Mips Code to Machine Code ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -186,7 +185,7 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS,
// Check for unimplemented opcodes.
// Unfortunately in MIPS both NOP and SLL will come in with Binary == 0
// so we have to special check for them.
- unsigned Opcode = TmpInst.getOpcode();
+ const unsigned Opcode = TmpInst.getOpcode();
if ((Opcode != Mips::NOP) && (Opcode != Mips::SLL) &&
(Opcode != Mips::SLL_MM) && (Opcode != Mips::SLL_MMR6) && !Binary)
llvm_unreachable("unimplemented opcode in encodeInstruction()");
@@ -209,7 +208,6 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS,
if (Fixups.size() > N)
Fixups.pop_back();
- Opcode = NewOpcode;
TmpInst.setOpcode (NewOpcode);
Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI);
}
@@ -614,8 +612,9 @@ getExprOpValue(const MCExpr *Expr, SmallVectorImpl<MCFixup> &Fixups,
llvm_unreachable("Unhandled fixup kind!");
break;
case MipsMCExpr::MEK_DTPREL:
- llvm_unreachable("MEK_DTPREL is used for TLS DIEExpr only");
- break;
+ // MEK_DTPREL is used for marking TLS DIEExpr only
+ // and contains a regular sub-expression.
+ return getExprOpValue(MipsExpr->getSubExpr(), Fixups, STI);
case MipsMCExpr::MEK_CALL_HI16:
FixupKind = Mips::fixup_Mips_CALL_HI16;
break;
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h
index 09d50d4776ba..ff6e1d62b05f 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h
@@ -1,9 +1,8 @@
//===- MipsMCCodeEmitter.h - Convert Mips Code to Machine Code --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp
index 99857e083c6c..680806c4deb2 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp
@@ -1,9 +1,8 @@
//===-- MipsMCExpr.cpp - Mips specific MC expression classes --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -44,8 +43,10 @@ void MipsMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const {
llvm_unreachable("MEK_None and MEK_Special are invalid");
break;
case MEK_DTPREL:
- llvm_unreachable("MEK_DTPREL is used for TLS DIEExpr only");
- break;
+ // MEK_DTPREL is used for marking TLS DIEExpr only
+ // and contains a regular sub-expression.
+ getSubExpr()->print(OS, MAI, true);
+ return;
case MEK_CALL_HI16:
OS << "%call_hi";
break;
@@ -161,7 +162,9 @@ MipsMCExpr::evaluateAsRelocatableImpl(MCValue &Res,
case MEK_Special:
llvm_unreachable("MEK_None and MEK_Special are invalid");
case MEK_DTPREL:
- llvm_unreachable("MEK_DTPREL is used for TLS DIEExpr only");
+ // MEK_DTPREL is used for marking TLS DIEExpr only
+ // and contains a regular sub-expression.
+ return getSubExpr()->evaluateAsRelocatable(Res, Layout, Fixup);
case MEK_DTPREL_HI:
case MEK_DTPREL_LO:
case MEK_GOT:
@@ -249,9 +252,6 @@ void MipsMCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {
case MEK_Special:
llvm_unreachable("MEK_None and MEK_Special are invalid");
break;
- case MEK_DTPREL:
- llvm_unreachable("MEK_DTPREL is used for TLS DIEExpr only");
- break;
case MEK_CALL_HI16:
case MEK_CALL_LO16:
case MEK_GOT:
@@ -274,6 +274,7 @@ void MipsMCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {
if (const MipsMCExpr *E = dyn_cast<const MipsMCExpr>(getSubExpr()))
E->fixELFSymbolsInTLSFixups(Asm);
break;
+ case MEK_DTPREL:
case MEK_DTPREL_HI:
case MEK_DTPREL_LO:
case MEK_TLSLDM:
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h
index bf3274ab5d17..edc12e87e9b6 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h
@@ -1,9 +1,8 @@
//===- MipsMCExpr.h - Mips specific MC expression classes -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h b/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h
index 988629ed1bca..ad5aff6552f6 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h
@@ -1,9 +1,8 @@
//===-- MipsMCNaCl.h - NaCl-related declarations --------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
index a8cd7b0d9b03..ddeec03ba784 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
@@ -1,9 +1,8 @@
//===-- MipsMCTargetDesc.cpp - Mips Target Descriptions -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -12,12 +11,13 @@
//===----------------------------------------------------------------------===//
#include "MipsMCTargetDesc.h"
-#include "InstPrinter/MipsInstPrinter.h"
#include "MipsAsmBackend.h"
#include "MipsELFStreamer.h"
+#include "MipsInstPrinter.h"
#include "MipsMCAsmInfo.h"
#include "MipsMCNaCl.h"
#include "MipsTargetStreamer.h"
+#include "TargetInfo/MipsTargetInfo.h"
#include "llvm/ADT/Triple.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCELFStreamer.h"
@@ -85,7 +85,7 @@ static MCAsmInfo *createMipsMCAsmInfo(const MCRegisterInfo &MRI,
MCAsmInfo *MAI = new MipsMCAsmInfo(TT);
unsigned SP = MRI.getDwarfRegNum(Mips::SP, true);
- MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(nullptr, SP, 0);
+ MCCFIInstruction Inst = MCCFIInstruction::createDefCfaRegister(nullptr, SP);
MAI->addInitialFrameState(Inst);
return MAI;
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
index 4fc174ab5871..809be99ff3f4 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
@@ -1,9 +1,8 @@
//===-- MipsMCTargetDesc.h - Mips Target Descriptions -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -33,11 +32,6 @@ class Triple;
class raw_ostream;
class raw_pwrite_stream;
-Target &getTheMipsTarget();
-Target &getTheMipselTarget();
-Target &getTheMips64Target();
-Target &getTheMips64elTarget();
-
MCCodeEmitter *createMipsMCCodeEmitterEB(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
MCContext &Ctx);
diff --git a/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
index 6bf62ea618b4..c050db8a17fd 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
@@ -1,9 +1,8 @@
//===-- MipsNaClELFStreamer.cpp - ELF Object Output for Mips NaCl ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -144,8 +143,8 @@ private:
public:
/// This function is the one used to emit instruction data into the ELF
/// streamer. We override it to mask dangerous instructions.
- void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
- bool) override {
+ void EmitInstruction(const MCInst &Inst,
+ const MCSubtargetInfo &STI) override {
// Sandbox indirect jumps.
if (isIndirectJump(Inst)) {
if (PendingCall)
diff --git a/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp b/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp
index 2d84528e7469..b4ebb9d18b72 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp
@@ -1,9 +1,8 @@
//===- MipsOptionRecord.cpp - Abstraction for storing information ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
index 58f9717e1cc6..e3bdb3b140a8 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
@@ -1,9 +1,8 @@
//===-- MipsTargetStreamer.cpp - Mips Target Streamer Methods -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -12,7 +11,7 @@
//===----------------------------------------------------------------------===//
#include "MipsTargetStreamer.h"
-#include "InstPrinter/MipsInstPrinter.h"
+#include "MipsInstPrinter.h"
#include "MCTargetDesc/MipsABIInfo.h"
#include "MipsELFStreamer.h"
#include "MipsMCExpr.h"
@@ -36,7 +35,7 @@ static cl::opt<bool> RoundSectionSizes(
} // end anonymous namespace
MipsTargetStreamer::MipsTargetStreamer(MCStreamer &S)
- : MCTargetStreamer(S), ModuleDirectiveAllowed(true) {
+ : MCTargetStreamer(S), GPReg(Mips::GP), ModuleDirectiveAllowed(true) {
GPRInfoSet = FPRInfoSet = FrameInfoSet = false;
}
void MipsTargetStreamer::emitDirectiveSetMicroMips() {}
@@ -107,6 +106,23 @@ void MipsTargetStreamer::emitDirectiveSetDsp() { forbidModuleDirective(); }
void MipsTargetStreamer::emitDirectiveSetDspr2() { forbidModuleDirective(); }
void MipsTargetStreamer::emitDirectiveSetNoDsp() { forbidModuleDirective(); }
void MipsTargetStreamer::emitDirectiveCpLoad(unsigned RegNo) {}
+void MipsTargetStreamer::emitDirectiveCpLocal(unsigned RegNo) {
+ // .cplocal $reg
+ // This directive forces to use the alternate register for context pointer.
+ // For example
+ // .cplocal $4
+ // jal foo
+ // expands to
+ // ld $25, %call16(foo)($4)
+ // jalr $25
+
+ if (!getABI().IsN32() && !getABI().IsN64())
+ return;
+
+ GPReg = RegNo;
+
+ forbidModuleDirective();
+}
bool MipsTargetStreamer::emitDirectiveCpRestore(
int Offset, function_ref<unsigned()> GetATReg, SMLoc IDLoc,
const MCSubtargetInfo *STI) {
@@ -258,8 +274,7 @@ void MipsTargetStreamer::emitNop(SMLoc IDLoc, const MCSubtargetInfo *STI) {
/// Emit the $gp restore operation for .cprestore.
void MipsTargetStreamer::emitGPRestore(int Offset, SMLoc IDLoc,
const MCSubtargetInfo *STI) {
- emitLoadWithImmOffset(Mips::LW, Mips::GP, Mips::SP, Offset, Mips::GP, IDLoc,
- STI);
+ emitLoadWithImmOffset(Mips::LW, GPReg, Mips::SP, Offset, GPReg, IDLoc, STI);
}
/// Emit a store instruction with an immediate offset.
@@ -666,6 +681,12 @@ void MipsTargetAsmStreamer::emitDirectiveCpLoad(unsigned RegNo) {
forbidModuleDirective();
}
+void MipsTargetAsmStreamer::emitDirectiveCpLocal(unsigned RegNo) {
+ OS << "\t.cplocal\t$"
+ << StringRef(MipsInstPrinter::getRegisterName(RegNo)).lower() << "\n";
+ MipsTargetStreamer::emitDirectiveCpLocal(RegNo);
+}
+
bool MipsTargetAsmStreamer::emitDirectiveCpRestore(
int Offset, function_ref<unsigned()> GetATReg, SMLoc IDLoc,
const MCSubtargetInfo *STI) {
@@ -700,8 +721,11 @@ void MipsTargetAsmStreamer::emitDirectiveCpreturn(unsigned SaveLocation,
}
void MipsTargetAsmStreamer::emitDirectiveModuleFP() {
- OS << "\t.module\tfp=";
- OS << ABIFlagsSection.getFpABIString(ABIFlagsSection.getFpABI()) << "\n";
+ MipsABIFlagsSection::FpABIKind FpABI = ABIFlagsSection.getFpABI();
+ if (FpABI == MipsABIFlagsSection::FpABIKind::SOFT)
+ OS << "\t.module\tsoftfloat\n";
+ else
+ OS << "\t.module\tfp=" << ABIFlagsSection.getFpABIString(FpABI) << "\n";
}
void MipsTargetAsmStreamer::emitDirectiveSetFp(
@@ -1133,7 +1157,7 @@ void MipsTargetELFStreamer::emitDirectiveCpLoad(unsigned RegNo) {
MCInst TmpInst;
TmpInst.setOpcode(Mips::LUi);
- TmpInst.addOperand(MCOperand::createReg(Mips::GP));
+ TmpInst.addOperand(MCOperand::createReg(GPReg));
const MCExpr *HiSym = MipsMCExpr::create(
MipsMCExpr::MEK_HI,
MCSymbolRefExpr::create("_gp_disp", MCSymbolRefExpr::VK_None,
@@ -1145,8 +1169,8 @@ void MipsTargetELFStreamer::emitDirectiveCpLoad(unsigned RegNo) {
TmpInst.clear();
TmpInst.setOpcode(Mips::ADDiu);
- TmpInst.addOperand(MCOperand::createReg(Mips::GP));
- TmpInst.addOperand(MCOperand::createReg(Mips::GP));
+ TmpInst.addOperand(MCOperand::createReg(GPReg));
+ TmpInst.addOperand(MCOperand::createReg(GPReg));
const MCExpr *LoSym = MipsMCExpr::create(
MipsMCExpr::MEK_LO,
MCSymbolRefExpr::create("_gp_disp", MCSymbolRefExpr::VK_None,
@@ -1158,14 +1182,19 @@ void MipsTargetELFStreamer::emitDirectiveCpLoad(unsigned RegNo) {
TmpInst.clear();
TmpInst.setOpcode(Mips::ADDu);
- TmpInst.addOperand(MCOperand::createReg(Mips::GP));
- TmpInst.addOperand(MCOperand::createReg(Mips::GP));
+ TmpInst.addOperand(MCOperand::createReg(GPReg));
+ TmpInst.addOperand(MCOperand::createReg(GPReg));
TmpInst.addOperand(MCOperand::createReg(RegNo));
getStreamer().EmitInstruction(TmpInst, STI);
forbidModuleDirective();
}
+void MipsTargetELFStreamer::emitDirectiveCpLocal(unsigned RegNo) {
+ if (Pic)
+ MipsTargetStreamer::emitDirectiveCpLocal(RegNo);
+}
+
bool MipsTargetELFStreamer::emitDirectiveCpRestore(
int Offset, function_ref<unsigned()> GetATReg, SMLoc IDLoc,
const MCSubtargetInfo *STI) {
@@ -1182,7 +1211,7 @@ bool MipsTargetELFStreamer::emitDirectiveCpRestore(
return true;
// Store the $gp on the stack.
- emitStoreWithImmOffset(Mips::SW, Mips::GP, Mips::SP, Offset, GetATReg, IDLoc,
+ emitStoreWithImmOffset(Mips::SW, GPReg, Mips::SP, Offset, GetATReg, IDLoc,
STI);
return true;
}
@@ -1203,10 +1232,10 @@ void MipsTargetELFStreamer::emitDirectiveCpsetup(unsigned RegNo,
// Either store the old $gp in a register or on the stack
if (IsReg) {
// move $save, $gpreg
- emitRRR(Mips::OR64, RegOrOffset, Mips::GP, Mips::ZERO, SMLoc(), &STI);
+ emitRRR(Mips::OR64, RegOrOffset, GPReg, Mips::ZERO, SMLoc(), &STI);
} else {
// sd $gpreg, offset($sp)
- emitRRI(Mips::SD, Mips::GP, Mips::SP, RegOrOffset, SMLoc(), &STI);
+ emitRRI(Mips::SD, GPReg, Mips::SP, RegOrOffset, SMLoc(), &STI);
}
if (getABI().IsN32()) {
@@ -1219,11 +1248,11 @@ void MipsTargetELFStreamer::emitDirectiveCpsetup(unsigned RegNo,
MCA.getContext());
// lui $gp, %hi(__gnu_local_gp)
- emitRX(Mips::LUi, Mips::GP, MCOperand::createExpr(HiExpr), SMLoc(), &STI);
+ emitRX(Mips::LUi, GPReg, MCOperand::createExpr(HiExpr), SMLoc(), &STI);
// addiu $gp, $gp, %lo(__gnu_local_gp)
- emitRRX(Mips::ADDiu, Mips::GP, Mips::GP, MCOperand::createExpr(LoExpr),
- SMLoc(), &STI);
+ emitRRX(Mips::ADDiu, GPReg, GPReg, MCOperand::createExpr(LoExpr), SMLoc(),
+ &STI);
return;
}
@@ -1236,14 +1265,14 @@ void MipsTargetELFStreamer::emitDirectiveCpsetup(unsigned RegNo,
MCA.getContext());
// lui $gp, %hi(%neg(%gp_rel(funcSym)))
- emitRX(Mips::LUi, Mips::GP, MCOperand::createExpr(HiExpr), SMLoc(), &STI);
+ emitRX(Mips::LUi, GPReg, MCOperand::createExpr(HiExpr), SMLoc(), &STI);
// addiu $gp, $gp, %lo(%neg(%gp_rel(funcSym)))
- emitRRX(Mips::ADDiu, Mips::GP, Mips::GP, MCOperand::createExpr(LoExpr),
- SMLoc(), &STI);
+ emitRRX(Mips::ADDiu, GPReg, GPReg, MCOperand::createExpr(LoExpr), SMLoc(),
+ &STI);
// daddu $gp, $gp, $funcreg
- emitRRR(Mips::DADDu, Mips::GP, Mips::GP, RegNo, SMLoc(), &STI);
+ emitRRR(Mips::DADDu, GPReg, GPReg, RegNo, SMLoc(), &STI);
}
void MipsTargetELFStreamer::emitDirectiveCpreturn(unsigned SaveLocation,
@@ -1256,12 +1285,12 @@ void MipsTargetELFStreamer::emitDirectiveCpreturn(unsigned SaveLocation,
// Either restore the old $gp from a register or on the stack
if (SaveLocationIsRegister) {
Inst.setOpcode(Mips::OR);
- Inst.addOperand(MCOperand::createReg(Mips::GP));
+ Inst.addOperand(MCOperand::createReg(GPReg));
Inst.addOperand(MCOperand::createReg(SaveLocation));
Inst.addOperand(MCOperand::createReg(Mips::ZERO));
} else {
Inst.setOpcode(Mips::LD);
- Inst.addOperand(MCOperand::createReg(Mips::GP));
+ Inst.addOperand(MCOperand::createReg(GPReg));
Inst.addOperand(MCOperand::createReg(Mips::SP));
Inst.addOperand(MCOperand::createImm(SaveLocation));
}
diff --git a/lib/Target/Mips/MicroMips32r6InstrFormats.td b/lib/Target/Mips/MicroMips32r6InstrFormats.td
index ed5b8dd71a51..dbff0f6200f2 100644
--- a/lib/Target/Mips/MicroMips32r6InstrFormats.td
+++ b/lib/Target/Mips/MicroMips32r6InstrFormats.td
@@ -1,9 +1,8 @@
//=- MicroMips32r6InstrFormats.td - Mips32r6 Instruction Formats -*- tablegen -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Mips/MicroMips32r6InstrInfo.td b/lib/Target/Mips/MicroMips32r6InstrInfo.td
index 814918d25e70..425773dc57f1 100644
--- a/lib/Target/Mips/MicroMips32r6InstrInfo.td
+++ b/lib/Target/Mips/MicroMips32r6InstrInfo.td
@@ -1,9 +1,8 @@
//=- MicroMips32r6InstrInfo.td - MicroMips r6 Instruction Information -*- tablegen -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -246,6 +245,7 @@ class MADDF_D_MMR6_ENC : POOL32F_ARITHF_FM_MMR6<"maddf.d", 1, 0b110111000>;
class MSUBF_S_MMR6_ENC : POOL32F_ARITHF_FM_MMR6<"msubf.s", 0, 0b111111000>;
class MSUBF_D_MMR6_ENC : POOL32F_ARITHF_FM_MMR6<"msubf.d", 1, 0b111111000>;
class FMOV_S_MMR6_ENC : POOL32F_MOV_NEG_FM_MMR6<"mov.s", 0, 0b0000001>;
+class FMOV_D_MMR6_ENC : POOL32F_MOV_NEG_FM_MMR6<"mov.d", 1, 0b0000001>;
class FNEG_S_MMR6_ENC : POOL32F_MOV_NEG_FM_MMR6<"neg.s", 0, 0b0101101>;
class MAX_S_MMR6_ENC : POOL32F_MINMAX_FM<"max.s", 0, 0b000001011>;
class MAX_D_MMR6_ENC : POOL32F_MINMAX_FM<"max.d", 1, 0b000001011>;
@@ -460,6 +460,7 @@ class JALRC16_MMR6_DESC_BASE<string opstr, RegisterOperand RO>
let isCall = 1;
let hasDelaySlot = 0;
let Defs = [RA];
+ let hasPostISelHook = 1;
}
class JALRC16_MMR6_DESC : JALRC16_MMR6_DESC_BASE<"jalr", GPR32Opnd>;
@@ -889,6 +890,8 @@ class FMOV_FNEG_MMR6_DESC_BASE<string instr_asm, RegisterOperand DstRC,
}
class FMOV_S_MMR6_DESC
: FMOV_FNEG_MMR6_DESC_BASE<"mov.s", FGR32Opnd, FGR32Opnd, II_MOV_S>;
+class FMOV_D_MMR6_DESC
+ : FMOV_FNEG_MMR6_DESC_BASE<"mov.d", FGR64Opnd, FGR64Opnd, II_MOV_D>;
class FNEG_S_MMR6_DESC
: FMOV_FNEG_MMR6_DESC_BASE<"neg.s", FGR32Opnd, FGR32Opnd, II_NEG, fneg>;
@@ -1039,7 +1042,7 @@ class TRUNC_L_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"trunc.l.d", FGR64Opnd,
class TRUNC_W_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"trunc.w.s", FGR32Opnd,
FGR32Opnd, II_TRUNC>;
class TRUNC_W_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"trunc.w.d", FGR32Opnd,
- AFGR64Opnd, II_TRUNC>;
+ FGR64Opnd, II_TRUNC>;
class SQRT_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"sqrt.s", FGR32Opnd, FGR32Opnd,
II_SQRT_S, fsqrt>;
class SQRT_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"sqrt.d", AFGR64Opnd, AFGR64Opnd,
@@ -1210,7 +1213,7 @@ class SW16_MMR6_DESC : SB16_MMR6_DESC_BASE<"sw16", GPRMM16OpndZero, GPRMM16Opnd,
class SWSP_MMR6_DESC
: MicroMipsInst16<(outs), (ins GPR32Opnd:$rt, mem_mm_sp_imm5_lsl2:$offset),
!strconcat("sw", "\t$rt, $offset"), [], II_SW, FrmI>,
- MMR6Arch<"sw"> {
+ MMR6Arch<"swsp"> {
let DecoderMethod = "DecodeMemMMSPImm5Lsl2";
let mayStore = 1;
}
@@ -1461,6 +1464,8 @@ def MSUBF_D_MMR6 : R6MMR6Rel, MSUBF_D_MMR6_ENC, MSUBF_D_MMR6_DESC,
ISA_MICROMIPS32R6;
def FMOV_S_MMR6 : StdMMR6Rel, FMOV_S_MMR6_ENC, FMOV_S_MMR6_DESC,
ISA_MICROMIPS32R6;
+def FMOV_D_MMR6 : StdMMR6Rel, FMOV_D_MMR6_ENC, FMOV_D_MMR6_DESC,
+ ISA_MICROMIPS32R6;
def FNEG_S_MMR6 : StdMMR6Rel, FNEG_S_MMR6_ENC, FNEG_S_MMR6_DESC,
ISA_MICROMIPS32R6;
def MAX_S_MMR6 : R6MMR6Rel, MAX_S_MMR6_ENC, MAX_S_MMR6_DESC, ISA_MICROMIPS32R6;
@@ -1749,6 +1754,8 @@ def : MipsPat<(f32 fpimm0), (MTC1_MMR6 ZERO)>, ISA_MICROMIPS32R6;
def : MipsPat<(f32 fpimm0neg), (FNEG_S_MMR6 (MTC1_MMR6 ZERO))>, ISA_MICROMIPS32R6;
def : MipsPat<(MipsTruncIntFP FGR64Opnd:$src),
(TRUNC_W_D_MMR6 FGR64Opnd:$src)>, ISA_MICROMIPS32R6;
+def : MipsPat<(MipsTruncIntFP FGR32Opnd:$src),
+ (TRUNC_W_S_MMR6 FGR32Opnd:$src)>, ISA_MICROMIPS32R6;
def : MipsPat<(and GPRMM16:$src, immZExtAndi16:$imm),
(ANDI16_MMR6 GPRMM16:$src, immZExtAndi16:$imm)>,
@@ -1767,6 +1774,19 @@ let AddedComplexity = 41 in {
def : StoreRegImmPat<SDC1_D64_MMR6, f64>, FGR_64, ISA_MICROMIPS32R6;
}
+let isCall=1, hasDelaySlot=0, isCTI=1, Defs = [RA] in {
+ class JumpLinkMMR6<Instruction JumpInst, DAGOperand Opnd> :
+ PseudoSE<(outs), (ins calltarget:$target), [], II_JAL>,
+ PseudoInstExpansion<(JumpInst Opnd:$target)>;
+}
+
+def JAL_MMR6 : JumpLinkMMR6<BALC_MMR6, brtarget26_mm>, ISA_MICROMIPS32R6;
+
+def : MipsPat<(MipsJmpLink (i32 texternalsym:$dst)),
+ (JAL_MMR6 texternalsym:$dst)>, ISA_MICROMIPS32R6;
+def : MipsPat<(MipsJmpLink (iPTR tglobaladdr:$dst)),
+ (JAL_MMR6 tglobaladdr:$dst)>, ISA_MICROMIPS32R6;
+
def TAILCALL_MMR6 : TailCall<BC_MMR6, brtarget26_mm>, ISA_MICROMIPS32R6;
def TAILCALLREG_MMR6 : TailCallReg<JRC16_MM, GPR32Opnd>, ISA_MICROMIPS32R6;
diff --git a/lib/Target/Mips/MicroMipsDSPInstrFormats.td b/lib/Target/Mips/MicroMipsDSPInstrFormats.td
index 0d444dfc9fad..26b6cf8994ca 100644
--- a/lib/Target/Mips/MicroMipsDSPInstrFormats.td
+++ b/lib/Target/Mips/MicroMipsDSPInstrFormats.td
@@ -1,9 +1,8 @@
//===-- MicroMipsDSPInstrFormats.td - Instruction Formats --*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Mips/MicroMipsDSPInstrInfo.td b/lib/Target/Mips/MicroMipsDSPInstrInfo.td
index 132de6be750d..5a12568893af 100644
--- a/lib/Target/Mips/MicroMipsDSPInstrInfo.td
+++ b/lib/Target/Mips/MicroMipsDSPInstrInfo.td
@@ -1,9 +1,8 @@
//===- MicroMipsDSPInstrInfo.td - Micromips DSP instructions -*- tablegen *-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Mips/MicroMipsInstrFPU.td b/lib/Target/Mips/MicroMipsInstrFPU.td
index 1731afc1961f..5d87068ff407 100644
--- a/lib/Target/Mips/MicroMipsInstrFPU.td
+++ b/lib/Target/Mips/MicroMipsInstrFPU.td
@@ -1,9 +1,8 @@
//==- MicroMipsInstrFPU.td - microMIPS FPU Instruction Info -*- tablegen -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -114,8 +113,7 @@ multiclass ABSS_MMM<string opstr, InstrItinClass Itin,
ISA_MICROMIPS, FGR_32 {
string DecoderNamespace = "MicroMips";
}
- // FIXME: This needs to be part of the instruction mapping tables.
- def _D64_MM : ABSS_FT<opstr, FGR64Opnd, FGR64Opnd, Itin, OpNode>,
+ def _D64_MM : StdMMR6Rel, ABSS_FT<opstr, FGR64Opnd, FGR64Opnd, Itin, OpNode>,
ISA_MICROMIPS, FGR_64 {
string DecoderNamespace = "MicroMipsFP64";
}
@@ -124,7 +122,7 @@ multiclass ABSS_MMM<string opstr, InstrItinClass Itin,
defm FSQRT : ABSS_MMM<"sqrt.d", II_SQRT_D, fsqrt>, ROUND_W_FM_MM<1, 0x28>;
defm FABS : ABSS_MMM<"abs.d", II_SQRT_D, fabs>, ABS_FM_MM<1, 0xd>;
-let DecoderNamespace = "MicroMips" in {
+let DecoderNamespace = "MicroMips", AdditionalPredicates = [UseAbs] in {
def FABS_S_MM : MMRel, ABSS_FT<"abs.s", FGR32Opnd, FGR32Opnd, II_ABS, fabs>,
ABS_FM_MM<0, 0xd>, ISA_MICROMIPS;
}
@@ -266,7 +264,7 @@ let DecoderNamespace = "MicroMips" in {
ROUND_W_FM_MM<0b1, 0b01001000>, ISA_MICROMIPS, FGR_64;
def RSQRT_S_MM : MMRel, ABSS_FT<"rsqrt.s", FGR32Opnd, FGR32Opnd,
II_RECIP_S>,
- ROUND_W_FM_MM<0b0, 0b00001000>;
+ ROUND_W_FM_MM<0b0, 0b00001000>, ISA_MICROMIPS;
def RSQRT_D32_MM : MMRel, ABSS_FT<"rsqrt.d", AFGR64Opnd, AFGR64Opnd,
II_RECIP_D>,
ROUND_W_FM_MM<0b1, 0b00001000>, ISA_MICROMIPS, FGR_32 {
@@ -425,6 +423,11 @@ def : MipsPat<(f64 (fpextend FGR32Opnd:$src)),
def : MipsPat<(MipsTruncIntFP AFGR64Opnd:$src),
(TRUNC_W_MM AFGR64Opnd:$src)>, ISA_MICROMIPS32_NOT_MIPS32R6,
FGR_32;
+def : MipsPat<(MipsTruncIntFP FGR64Opnd:$src),
+ (CVT_W_D64_MM FGR64Opnd:$src)>, ISA_MICROMIPS32_NOT_MIPS32R6,
+ FGR_64;
+def : MipsPat<(MipsTruncIntFP FGR32Opnd:$src),
+ (TRUNC_W_S_MM FGR32Opnd:$src)>, ISA_MICROMIPS32_NOT_MIPS32R6;
// Selects
defm : MovzPats0<GPR32, FGR32, MOVZ_I_S_MM, SLT_MM, SLTu_MM, SLTi_MM, SLTiu_MM>,
diff --git a/lib/Target/Mips/MicroMipsInstrFormats.td b/lib/Target/Mips/MicroMipsInstrFormats.td
index 2a4cc279ef0d..e9fb9b310e3b 100644
--- a/lib/Target/Mips/MicroMipsInstrFormats.td
+++ b/lib/Target/Mips/MicroMipsInstrFormats.td
@@ -1,9 +1,8 @@
//===-- MicroMipsInstrFormats.td - microMIPS Inst Formats -*- tablegen -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Mips/MicroMipsInstrInfo.td b/lib/Target/Mips/MicroMipsInstrInfo.td
index af380a0ec71e..9b7f7b25fa94 100644
--- a/lib/Target/Mips/MicroMipsInstrInfo.td
+++ b/lib/Target/Mips/MicroMipsInstrInfo.td
@@ -1,9 +1,8 @@
//===--- MicroMipsInstrFormats.td - microMIPS Inst Defs -*- tablegen -*----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -426,6 +425,7 @@ class JumpLinkRegMM16<string opstr, RegisterOperand RO> :
let isCall = 1;
let hasDelaySlot = 1;
let Defs = [RA];
+ let hasPostISelHook = 1;
}
// 16-bit Jump Reg
@@ -654,7 +654,7 @@ def LWGP_MM : LoadGPMM16<"lw", GPRMM16Opnd, II_LW, mem_mm_gp_simm7_lsl2>,
LOAD_GP_FM_MM16<0x19>, ISA_MICROMIPS;
def LWSP_MM : LoadSPMM16<"lw", GPR32Opnd, II_LW, mem_mm_sp_imm5_lsl2>,
LOAD_STORE_SP_FM_MM16<0x12>, ISA_MICROMIPS;
-def SWSP_MM : StoreSPMM16<"sw", GPR32Opnd, II_SW, mem_mm_sp_imm5_lsl2>,
+def SWSP_MM : StoreSPMM16<"swsp", GPR32Opnd, II_SW, mem_mm_sp_imm5_lsl2>,
LOAD_STORE_SP_FM_MM16<0x32>, ISA_MICROMIPS32_NOT_MIPS32R6;
def ADDIUR1SP_MM : AddImmUR1SP<"addiur1sp", GPRMM16Opnd>, ADDIUR1SP_FM_MM16,
ISA_MICROMIPS;
@@ -694,6 +694,10 @@ def BREAK16_MM : BrkSdbbp16MM<"break16", II_BREAK>, BRKSDBBP16_FM_MM<0x28>,
def SDBBP16_MM : BrkSdbbp16MM<"sdbbp16", II_SDBBP>, BRKSDBBP16_FM_MM<0x2C>,
ISA_MICROMIPS32_NOT_MIPS32R6;
+class WaitMM<string opstr> :
+ InstSE<(outs), (ins uimm10:$code_), !strconcat(opstr, "\t$code_"), [],
+ II_WAIT, FrmOther, opstr>;
+
let DecoderNamespace = "MicroMips" in {
/// Load and Store Instructions - multiple
def SWM16_MM : StoreMultMM16<"swm16", II_SWM>, LWM_FM_MM16<0x5>,
@@ -706,13 +710,7 @@ let DecoderNamespace = "MicroMips" in {
def CTC2_MM : InstSE<(outs COP2Opnd:$impl), (ins GPR32Opnd:$rt),
"ctc2\t$rt, $impl", [], II_CTC2, FrmFR, "ctc2">,
POOL32A_CFTC2_FM_MM<0b1101110100>, ISA_MICROMIPS;
-}
-
-class WaitMM<string opstr> :
- InstSE<(outs), (ins uimm10:$code_), !strconcat(opstr, "\t$code_"), [],
- II_WAIT, FrmOther, opstr>;
-let DecoderNamespace = "MicroMips" in {
/// Compact Branch Instructions
def BEQZC_MM : CompactBranchMM<"beqzc", brtarget_mm, seteq, GPR32Opnd>,
COMPACT_BRANCH_FM_MM<0x7>, ISA_MICROMIPS32_NOT_MIPS32R6;
@@ -822,8 +820,7 @@ let DecoderNamespace = "MicroMips" in {
def SW_MM : Store<"sw", GPR32Opnd, null_frag, II_SW>, MMRel,
LW_FM_MM<0x3e>, ISA_MICROMIPS;
}
-}
-let DecoderNamespace = "MicroMips" in {
+
let DecoderMethod = "DecodeMemMMImm9" in {
def LBE_MM : MMRel, Load<"lbe", GPR32Opnd, null_frag, II_LBE>,
POOL32C_LHUE_FM_MM<0x18, 0x6, 0x4>, ISA_MICROMIPS, ASE_EVA;
@@ -881,8 +878,7 @@ let DecoderNamespace = "MicroMips" in {
def SWR_MM : MMRel, StoreLeftRightMM<"swr", MipsSWR, GPR32Opnd, mem_mm_12,
II_SWR>, LWL_FM_MM<0x9>,
ISA_MICROMIPS32_NOT_MIPS32R6;
-}
-let DecoderNamespace = "MicroMips" in {
+
/// Load and Store Instructions - multiple
def SWM32_MM : StoreMultMM<"swm32", II_SWM>, LWM_FM_MM<0xd>, ISA_MICROMIPS;
def LWM32_MM : LoadMultMM<"lwm32", II_LWM>, LWM_FM_MM<0x5>, ISA_MICROMIPS;
@@ -1125,7 +1121,8 @@ let AdditionalPredicates = [NotDSP] in {
ISA_MICROMIPS32_NOT_MIPS32R6;
}
-def TAILCALL_MM : TailCall<J_MM, jmptarget_mm>, ISA_MIPS1_NOT_32R6_64R6;
+def TAILCALL_MM : TailCall<J_MM, jmptarget_mm>,
+ ISA_MICROMIPS32_NOT_MIPS32R6;
def TAILCALLREG_MM : TailCallReg<JRC16_MM, GPR32Opnd>,
ISA_MICROMIPS32_NOT_MIPS32R6;
@@ -1139,9 +1136,7 @@ let DecoderNamespace = "MicroMips" in {
def LWU_MM : MMRel, LoadMM<"lwu", GPR32Opnd, zextloadi32, II_LWU,
mem_simm12>, LL_FM_MM<0xe>,
ISA_MICROMIPS32_NOT_MIPS32R6;
-}
-let DecoderNamespace = "MicroMips" in {
def MFGC0_MM : MMRel, MfCop0MM<"mfgc0", GPR32Opnd, COP0Opnd, II_MFGC0>,
POOL32A_MFTC0_FM_MM<0b10011, 0b111100>,
ISA_MICROMIPS32R5, ASE_VIRT;
@@ -1204,7 +1199,7 @@ def : MipsPat<(atomic_load_32 addr:$a), (LW_MM addr:$a)>, ISA_MICROMIPS;
def : MipsPat<(i32 immLi16:$imm),
(LI16_MM immLi16:$imm)>, ISA_MICROMIPS;
-defm : MaterializeImms<i32, ZERO, ADDiu_MM, LUi_MM, ORi_MM>, ISA_MICROMIPS;
+defm : MaterializeImms<i32, ZERO, ADDiu_MM, LUi_MM, ORi_MM>, ISA_MICROMIPS;
def : MipsPat<(not GPRMM16:$in),
(NOT16_MM GPRMM16:$in)>, ISA_MICROMIPS;
@@ -1453,3 +1448,6 @@ def : MipsInstAlias<"mtgc0 $rt, $rs",
def : MipsInstAlias<"mthgc0 $rt, $rs",
(MTHGC0_MM COP0Opnd:$rs, GPR32Opnd:$rt, 0), 0>,
ISA_MICROMIPS32R5, ASE_VIRT;
+def : MipsInstAlias<"sw $rt, $offset",
+ (SWSP_MM GPR32Opnd:$rt, mem_mm_sp_imm5_lsl2:$offset), 1>,
+ ISA_MICROMIPS;
diff --git a/lib/Target/Mips/MicroMipsSizeReduction.cpp b/lib/Target/Mips/MicroMipsSizeReduction.cpp
index f9062cc23da2..70af95592aa5 100644
--- a/lib/Target/Mips/MicroMipsSizeReduction.cpp
+++ b/lib/Target/Mips/MicroMipsSizeReduction.cpp
@@ -1,9 +1,8 @@
//=== MicroMipsSizeReduction.cpp - MicroMips size reduction pass --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///\file
diff --git a/lib/Target/Mips/Mips.h b/lib/Target/Mips/Mips.h
index 6bb7aecc867a..b3faaab436f0 100644
--- a/lib/Target/Mips/Mips.h
+++ b/lib/Target/Mips/Mips.h
@@ -1,9 +1,8 @@
//===-- Mips.h - Top-level interface for Mips representation ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td
index 2f3a1c399d3e..7b83ea8535ae 100644
--- a/lib/Target/Mips/Mips.td
+++ b/lib/Target/Mips/Mips.td
@@ -1,9 +1,8 @@
//===-- Mips.td - Describe the Mips Target Machine ---------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// This is the top level entry point for the Mips target.
@@ -83,6 +82,8 @@ def FeatureFPXX : SubtargetFeature<"fpxx", "IsFPXX", "true",
"Support for FPXX">;
def FeatureNaN2008 : SubtargetFeature<"nan2008", "IsNaN2008bit", "true",
"IEEE 754-2008 NaN encoding">;
+def FeatureAbs2008 : SubtargetFeature<"abs2008", "Abs2008", "true",
+ "Disable IEEE 754-2008 abs.fmt mode">;
def FeatureSingleFloat : SubtargetFeature<"single-float", "IsSingleFloat",
"true", "Only supports single precision float">;
def FeatureSoftFloat : SubtargetFeature<"soft-float", "IsSoftFloat", "true",
@@ -142,7 +143,7 @@ def FeatureMips32r6 : SubtargetFeature<"mips32r6", "MipsArchVersion",
"Mips32r6",
"Mips32r6 ISA Support [experimental]",
[FeatureMips32r5, FeatureFP64Bit,
- FeatureNaN2008]>;
+ FeatureNaN2008, FeatureAbs2008]>;
def FeatureMips64 : SubtargetFeature<"mips64", "MipsArchVersion",
"Mips64", "Mips64 ISA Support",
[FeatureMips5, FeatureMips32]>;
@@ -159,7 +160,7 @@ def FeatureMips64r6 : SubtargetFeature<"mips64r6", "MipsArchVersion",
"Mips64r6",
"Mips64r6 ISA Support [experimental]",
[FeatureMips32r6, FeatureMips64r5,
- FeatureNaN2008]>;
+ FeatureNaN2008, FeatureAbs2008]>;
def FeatureSym32 : SubtargetFeature<"sym32", "HasSym32", "true",
"Symbols are 32 bit on Mips64">;
diff --git a/lib/Target/Mips/Mips16FrameLowering.cpp b/lib/Target/Mips/Mips16FrameLowering.cpp
index 122c1f5377b6..5a2a916a6b7a 100644
--- a/lib/Target/Mips/Mips16FrameLowering.cpp
+++ b/lib/Target/Mips/Mips16FrameLowering.cpp
@@ -1,9 +1,8 @@
//===- Mips16FrameLowering.cpp - Mips16 Frame Information -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Mips/Mips16FrameLowering.h b/lib/Target/Mips/Mips16FrameLowering.h
index f7fa4dc3d86d..6b62453f8dfe 100644
--- a/lib/Target/Mips/Mips16FrameLowering.h
+++ b/lib/Target/Mips/Mips16FrameLowering.h
@@ -1,9 +1,8 @@
//===-- Mips16FrameLowering.h - Mips16 frame lowering ----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Mips/Mips16HardFloat.cpp b/lib/Target/Mips/Mips16HardFloat.cpp
index f237bb6d4006..e9a3c7ec4b19 100644
--- a/lib/Target/Mips/Mips16HardFloat.cpp
+++ b/lib/Target/Mips/Mips16HardFloat.cpp
@@ -1,9 +1,8 @@
//===- Mips16HardFloat.cpp for Mips16 Hard Float --------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -415,7 +414,7 @@ static bool fixupFPReturnAndCall(Function &F, Module *M,
Attribute::ReadNone);
A = A.addAttribute(C, AttributeList::FunctionIndex,
Attribute::NoInline);
- Value *F = (M->getOrInsertFunction(Name, A, MyVoid, T));
+ FunctionCallee F = (M->getOrInsertFunction(Name, A, MyVoid, T));
CallInst::Create(F, Params, "", &I);
} else if (const CallInst *CI = dyn_cast<CallInst>(&I)) {
FunctionType *FT = CI->getFunctionType();
diff --git a/lib/Target/Mips/Mips16HardFloatInfo.cpp b/lib/Target/Mips/Mips16HardFloatInfo.cpp
index 2eb6e5ddd2d9..8a02e8156175 100644
--- a/lib/Target/Mips/Mips16HardFloatInfo.cpp
+++ b/lib/Target/Mips/Mips16HardFloatInfo.cpp
@@ -1,9 +1,8 @@
//===---- Mips16HardFloatInfo.cpp for Mips16 Hard Float -----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Mips/Mips16HardFloatInfo.h b/lib/Target/Mips/Mips16HardFloatInfo.h
index 7295c287576d..b8c485b7e2e3 100644
--- a/lib/Target/Mips/Mips16HardFloatInfo.h
+++ b/lib/Target/Mips/Mips16HardFloatInfo.h
@@ -1,9 +1,8 @@
//===---- Mips16HardFloatInfo.h for Mips16 Hard Float --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Mips/Mips16ISelDAGToDAG.cpp b/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
index a0d5bd9ef305..3ab4f1e064da 100644
--- a/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
+++ b/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
@@ -1,9 +1,8 @@
//===-- Mips16ISelDAGToDAG.cpp - A Dag to Dag Inst Selector for Mips16 ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Mips/Mips16ISelDAGToDAG.h b/lib/Target/Mips/Mips16ISelDAGToDAG.h
index bbf8cc36f241..1ef194029f50 100644
--- a/lib/Target/Mips/Mips16ISelDAGToDAG.h
+++ b/lib/Target/Mips/Mips16ISelDAGToDAG.h
@@ -1,9 +1,8 @@
//===---- Mips16ISelDAGToDAG.h - A Dag to Dag Inst Selector for Mips ------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Mips/Mips16ISelLowering.cpp b/lib/Target/Mips/Mips16ISelLowering.cpp
index 79df622241a0..6d8e5aef2a3f 100644
--- a/lib/Target/Mips/Mips16ISelLowering.cpp
+++ b/lib/Target/Mips/Mips16ISelLowering.cpp
@@ -1,9 +1,8 @@
//===-- Mips16ISelLowering.h - Mips16 DAG Lowering Interface ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -156,11 +155,8 @@ llvm::createMips16TargetLowering(const MipsTargetMachine &TM,
return new Mips16TargetLowering(TM, STI);
}
-bool
-Mips16TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
- unsigned,
- unsigned,
- bool *Fast) const {
+bool Mips16TargetLowering::allowsMisalignedMemoryAccesses(
+ EVT VT, unsigned, unsigned, MachineMemOperand::Flags, bool *Fast) const {
return false;
}
@@ -463,8 +459,7 @@ getOpndList(SmallVectorImpl<SDValue> &Ops,
}
// one more look at list of intrinsics
const Mips16IntrinsicHelperType *Helper =
- std::lower_bound(std::begin(Mips16IntrinsicHelper),
- std::end(Mips16IntrinsicHelper), IntrinsicFind);
+ llvm::lower_bound(Mips16IntrinsicHelper, IntrinsicFind);
if (Helper != std::end(Mips16IntrinsicHelper) &&
*Helper == IntrinsicFind) {
Mips16HelperFunction = Helper->Helper;
diff --git a/lib/Target/Mips/Mips16ISelLowering.h b/lib/Target/Mips/Mips16ISelLowering.h
index 0ee0b816ef70..200249933577 100644
--- a/lib/Target/Mips/Mips16ISelLowering.h
+++ b/lib/Target/Mips/Mips16ISelLowering.h
@@ -1,9 +1,8 @@
//===-- Mips16ISelLowering.h - Mips16 DAG Lowering Interface ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -24,6 +23,7 @@ namespace llvm {
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace,
unsigned Align,
+ MachineMemOperand::Flags Flags,
bool *Fast) const override;
MachineBasicBlock *
diff --git a/lib/Target/Mips/Mips16InstrFormats.td b/lib/Target/Mips/Mips16InstrFormats.td
index 4ff68bef957e..f4ac160c2ba5 100644
--- a/lib/Target/Mips/Mips16InstrFormats.td
+++ b/lib/Target/Mips/Mips16InstrFormats.td
@@ -1,9 +1,8 @@
//===- Mips16InstrFormats.td - Mips Instruction Formats ----*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp
index efebc99b5dae..c234c309d760 100644
--- a/lib/Target/Mips/Mips16InstrInfo.cpp
+++ b/lib/Target/Mips/Mips16InstrInfo.cpp
@@ -1,9 +1,8 @@
//===- Mips16InstrInfo.cpp - Mips16 Instruction Information ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Mips/Mips16InstrInfo.h b/lib/Target/Mips/Mips16InstrInfo.h
index 6a802e4cce5d..dadcaa3055b3 100644
--- a/lib/Target/Mips/Mips16InstrInfo.h
+++ b/lib/Target/Mips/Mips16InstrInfo.h
@@ -1,9 +1,8 @@
//===- Mips16InstrInfo.h - Mips16 Instruction Information -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td
index b7a1b9ce41bf..36b6c73d1008 100644
--- a/lib/Target/Mips/Mips16InstrInfo.td
+++ b/lib/Target/Mips/Mips16InstrInfo.td
@@ -1,9 +1,8 @@
//===- Mips16InstrInfo.td - Target Description for Mips16 -*- tablegen -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -484,13 +483,11 @@ class SelT<string op1, string op2>:
//
// 32 bit constant
//
-def Constant32:
- MipsPseudo16<(outs), (ins simm32:$imm), "\t.word $imm", []>;
+def Constant32 : MipsPseudo16<(outs), (ins simm32:$imm), "\t.word $imm", []>;
-def LwConstant32:
+def LwConstant32 :
MipsPseudo16<(outs CPU16Regs:$rx), (ins simm32:$imm, simm32:$constid),
- "lw\t$rx, 1f\n\tb\t2f\n\t.align\t2\n1: \t.word\t$imm\n2:", []>;
-
+ "lw\t$rx, 1f\n\tb\t2f\n\t.align\t2\n1: \t.word\t$imm\n2:", []>;
//
// Some general instruction class info
diff --git a/lib/Target/Mips/Mips16RegisterInfo.cpp b/lib/Target/Mips/Mips16RegisterInfo.cpp
index 751afd5ed369..5703f585a6a2 100644
--- a/lib/Target/Mips/Mips16RegisterInfo.cpp
+++ b/lib/Target/Mips/Mips16RegisterInfo.cpp
@@ -1,9 +1,8 @@
//===-- Mips16RegisterInfo.cpp - MIPS16 Register Information --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Mips/Mips16RegisterInfo.h b/lib/Target/Mips/Mips16RegisterInfo.h
index d67a79b64033..fca78b43f96b 100644
--- a/lib/Target/Mips/Mips16RegisterInfo.h
+++ b/lib/Target/Mips/Mips16RegisterInfo.h
@@ -1,9 +1,8 @@
//===-- Mips16RegisterInfo.h - Mips16 Register Information ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Mips/Mips32r6InstrFormats.td b/lib/Target/Mips/Mips32r6InstrFormats.td
index 623af570a5e6..ccb6d1df777a 100644
--- a/lib/Target/Mips/Mips32r6InstrFormats.td
+++ b/lib/Target/Mips/Mips32r6InstrFormats.td
@@ -1,9 +1,8 @@
//=- Mips32r6InstrFormats.td - Mips32r6 Instruction Formats -*- tablegen -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Mips/Mips32r6InstrInfo.td b/lib/Target/Mips/Mips32r6InstrInfo.td
index 2bd0cf2d59a6..2c3048411a5c 100644
--- a/lib/Target/Mips/Mips32r6InstrInfo.td
+++ b/lib/Target/Mips/Mips32r6InstrInfo.td
@@ -1,9 +1,8 @@
//=- Mips32r6InstrInfo.td - Mips32r6 Instruction Information -*- tablegen -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -150,7 +149,6 @@ class SELEQZ_ENC : SPECIAL_3R_FM<0b00000, 0b110101>;
class SELNEZ_ENC : SPECIAL_3R_FM<0b00000, 0b110111>;
class LWPC_ENC : PCREL19_FM<OPCODE2_LWPC>;
-class LWUPC_ENC : PCREL19_FM<OPCODE2_LWUPC>;
class MAX_S_ENC : COP1_3R_FM<0b011101, FIELD_FMT_S>;
class MAX_D_ENC : COP1_3R_FM<0b011101, FIELD_FMT_D>;
@@ -326,7 +324,6 @@ class PCREL_DESC_BASE<string instr_asm, RegisterOperand GPROpnd,
class ADDIUPC_DESC : PCREL_DESC_BASE<"addiupc", GPR32Opnd, simm19_lsl2,
II_ADDIUPC>;
class LWPC_DESC: PCREL_DESC_BASE<"lwpc", GPR32Opnd, simm19_lsl2, II_LWPC>;
-class LWUPC_DESC: PCREL_DESC_BASE<"lwupc", GPR32Opnd, simm19_lsl2, II_LWUPC>;
class ALIGN_DESC_BASE<string instr_asm, RegisterOperand GPROpnd,
Operand ImmOpnd, InstrItinClass itin>
@@ -927,7 +924,6 @@ let AdditionalPredicates = [NotInMicroMips] in {
}
def LWPC : R6MMR6Rel, LWPC_ENC, LWPC_DESC, ISA_MIPS32R6;
let AdditionalPredicates = [NotInMicroMips] in {
- def LWUPC : R6MMR6Rel, LWUPC_ENC, LWUPC_DESC, ISA_MIPS32R6;
def MADDF_S : MADDF_S_ENC, MADDF_S_DESC, ISA_MIPS32R6, HARDFLOAT;
def MADDF_D : MADDF_D_ENC, MADDF_D_DESC, ISA_MIPS32R6, HARDFLOAT;
def MAXA_D : MAXA_D_ENC, MAXA_D_DESC, ISA_MIPS32R6, HARDFLOAT;
@@ -1105,7 +1101,7 @@ def : MipsPat<(select i32:$cond, immz, i32:$f),
// Pseudo instructions
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, hasDelaySlot = 1,
- hasExtraSrcRegAllocReq = 1, isCTI = 1, Defs = [AT] in {
+ hasExtraSrcRegAllocReq = 1, isCTI = 1, Defs = [AT], hasPostISelHook = 1 in {
class TailCallRegR6<Instruction JumpInst, Register RT, RegisterOperand RO> :
PseudoSE<(outs), (ins RO:$rs), [(MipsTailCall RO:$rs)], II_JR>,
PseudoInstExpansion<(JumpInst RT:$rt, RO:$rs)>;
diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td
index 5729182deafb..7f35280f7936 100644
--- a/lib/Target/Mips/Mips64InstrInfo.td
+++ b/lib/Target/Mips/Mips64InstrInfo.td
@@ -1,9 +1,8 @@
//===- Mips64InstrInfo.td - Mips64 Instruction Information -*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -250,7 +249,7 @@ def SC64 : SCBase<"sc", GPR32Opnd>, LW_FM<0x38>, PTR_64,
def JR64 : IndirectBranch<"jr", GPR64Opnd>, MTLO_FM<8>, PTR_64;
}
-def JALR64 : JumpLinkReg<"jalr", GPR64Opnd>, JALR_FM;
+def JALR64 : JumpLinkReg<"jalr", GPR64Opnd>, JALR_FM, PTR_64;
/// Jump and Branch Instructions
let isCodeGenOnly = 1 in {
@@ -267,14 +266,15 @@ let isCodeGenOnly = 1 in {
def BLTZ64 : CBranchZero<"bltz", brtarget, setlt, GPR64Opnd>, BGEZ_FM<1, 0>,
GPR_64;
let AdditionalPredicates = [NoIndirectJumpGuards] in
- def JALR64Pseudo : JumpLinkRegPseudo<GPR64Opnd, JALR, RA, GPR32Opnd>;
+ def JALR64Pseudo : JumpLinkRegPseudo<GPR64Opnd, JALR, RA, GPR32Opnd>,
+ PTR_64;
}
let AdditionalPredicates = [NotInMicroMips],
DecoderNamespace = "Mips64" in {
- def JR_HB64 : JR_HB_DESC<GPR64Opnd>, JR_HB_ENC, ISA_MIPS32_NOT_32R6_64R6;
- def JALR_HB64 : JALR_HB_DESC<GPR64Opnd>, JALR_HB_ENC, ISA_MIPS32R2;
+ def JR_HB64 : JR_HB_DESC<GPR64Opnd>, JR_HB_ENC, ISA_MIPS64_NOT_64R6;
+ def JALR_HB64 : JALR_HB_DESC<GPR64Opnd>, JALR_HB_ENC, ISA_MIPS64R2;
}
-def PseudoReturn64 : PseudoReturnBase<GPR64Opnd>;
+def PseudoReturn64 : PseudoReturnBase<GPR64Opnd>, GPR_64;
let AdditionalPredicates = [NotInMips16Mode, NotInMicroMips,
NoIndirectJumpGuards] in {
@@ -290,7 +290,7 @@ let AdditionalPredicates = [NotInMips16Mode, NotInMicroMips,
ISA_MIPS32R2_NOT_32R6_64R6, PTR_64;
def PseudoIndirectHazardBranch64 : PseudoIndirectBranchBase<JR_HB64,
GPR64Opnd>,
- ISA_MIPS32R2_NOT_32R6_64R6;
+ ISA_MIPS32R2_NOT_32R6_64R6, PTR_64;
}
/// Multiply and Divide Instructions.
@@ -332,17 +332,17 @@ def PseudoMTLOHI64 : PseudoMTLOHI<ACC128, GPR64>, ISA_MIPS3_NOT_32R6_64R6;
/// Sign Ext In Register Instructions.
def SEB64 : SignExtInReg<"seb", i8, GPR64Opnd, II_SEB>, SEB_FM<0x10, 0x20>,
- ISA_MIPS32R2;
+ ISA_MIPS32R2, GPR_64;
def SEH64 : SignExtInReg<"seh", i16, GPR64Opnd, II_SEH>, SEB_FM<0x18, 0x20>,
- ISA_MIPS32R2;
+ ISA_MIPS32R2, GPR_64;
}
/// Count Leading
let AdditionalPredicates = [NotInMicroMips] in {
def DCLZ : CountLeading0<"dclz", GPR64Opnd, II_DCLZ>, CLO_FM<0x24>,
- ISA_MIPS64_NOT_64R6;
+ ISA_MIPS64_NOT_64R6, GPR_64;
def DCLO : CountLeading1<"dclo", GPR64Opnd, II_DCLO>, CLO_FM<0x25>,
- ISA_MIPS64_NOT_64R6;
+ ISA_MIPS64_NOT_64R6, GPR_64;
/// Double Word Swap Bytes/HalfWords
def DSBH : SubwordSwap<"dsbh", GPR64Opnd, II_DSBH>, SEB_FM<2, 0x24>,
@@ -417,17 +417,25 @@ let isCodeGenOnly = 1, rs = 0, shamt = 0 in {
// explanation.
// Expands to: lui $dst, %highest/%higher/%hi/%lo($tgt)
-def LONG_BRANCH_LUi2Op_64 : PseudoSE<(outs GPR64Opnd:$dst),
- (ins brtarget:$tgt), []>, GPR_64;
+def LONG_BRANCH_LUi2Op_64 :
+ PseudoSE<(outs GPR64Opnd:$dst), (ins brtarget:$tgt), []>, GPR_64 {
+ bit hasNoSchedulingInfo = 1;
+}
// Expands to: addiu $dst, %highest/%higher/%hi/%lo($tgt)
-def LONG_BRANCH_DADDiu2Op : PseudoSE<(outs GPR64Opnd:$dst),
- (ins GPR64Opnd:$src, brtarget:$tgt), []>, GPR_64;
-
+def LONG_BRANCH_DADDiu2Op :
+ PseudoSE<(outs GPR64Opnd:$dst), (ins GPR64Opnd:$src, brtarget:$tgt), []>,
+ GPR_64 {
+ bit hasNoSchedulingInfo = 1;
+}
// Expands to: daddiu $dst, $src, %PART($tgt - $baltgt)
// where %PART may be %hi or %lo, depending on the relocation kind
// that $tgt is annotated with.
-def LONG_BRANCH_DADDiu : PseudoSE<(outs GPR64Opnd:$dst),
- (ins GPR64Opnd:$src, brtarget:$tgt, brtarget:$baltgt), []>, GPR_64;
+def LONG_BRANCH_DADDiu :
+ PseudoSE<(outs GPR64Opnd:$dst),
+ (ins GPR64Opnd:$src, brtarget:$tgt, brtarget:$baltgt), []>,
+ GPR_64 {
+ bit hasNoSchedulingInfo = 1;
+}
// Cavium Octeon cnMIPS instructions
let DecoderNamespace = "CnMips",
@@ -580,15 +588,15 @@ def DMTC2_OCTEON : MFC2OP<"dmtc2", GPR64Opnd, II_DMTC2>, MFC2OP_FM<0x12, 5>,
}
/// Move between CPU and coprocessor registers
-let DecoderNamespace = "Mips64", Predicates = [HasMips64] in {
+let DecoderNamespace = "Mips64" in {
def DMFC0 : MFC3OP<"dmfc0", GPR64Opnd, COP0Opnd, II_DMFC0>,
- MFC3OP_FM<0x10, 1, 0>, ISA_MIPS3;
+ MFC3OP_FM<0x10, 1, 0>, ISA_MIPS3, GPR_64;
def DMTC0 : MTC3OP<"dmtc0", COP0Opnd, GPR64Opnd, II_DMTC0>,
- MFC3OP_FM<0x10, 5, 0>, ISA_MIPS3;
+ MFC3OP_FM<0x10, 5, 0>, ISA_MIPS3, GPR_64;
def DMFC2 : MFC3OP<"dmfc2", GPR64Opnd, COP2Opnd, II_DMFC2>,
- MFC3OP_FM<0x12, 1, 0>, ISA_MIPS3;
+ MFC3OP_FM<0x12, 1, 0>, ISA_MIPS3, GPR_64;
def DMTC2 : MTC3OP<"dmtc2", COP2Opnd, GPR64Opnd, II_DMTC2>,
- MFC3OP_FM<0x12, 5, 0>, ISA_MIPS3;
+ MFC3OP_FM<0x12, 5, 0>, ISA_MIPS3, GPR_64;
}
/// Move between CPU and guest coprocessor registers (Virtualization ASE)
@@ -600,7 +608,7 @@ let DecoderNamespace = "Mips64" in {
}
let AdditionalPredicates = [UseIndirectJumpsHazard] in
- def JALRHB64Pseudo : JumpLinkRegPseudo<GPR64Opnd, JALR_HB64, RA_64>;
+ def JALRHB64Pseudo : JumpLinkRegPseudo<GPR64Opnd, JALR_HB64, RA_64>, PTR_64;
//===----------------------------------------------------------------------===//
// Arbitrary patterns that map to one or more instructions
@@ -845,7 +853,7 @@ def : MipsPat<(i64 (sext (i32 (sub GPR32:$src, GPR32:$src2)))),
(SUBu GPR32:$src, GPR32:$src2), sub_32)>;
def : MipsPat<(i64 (sext (i32 (mul GPR32:$src, GPR32:$src2)))),
(INSERT_SUBREG (i64 (IMPLICIT_DEF)),
- (MUL GPR32:$src, GPR32:$src2), sub_32)>, ISA_MIPS3_NOT_32R6_64R6;
+ (MUL GPR32:$src, GPR32:$src2), sub_32)>, ISA_MIPS32_NOT_32R6_64R6;
def : MipsPat<(i64 (sext (i32 (MipsMFHI ACC64:$src)))),
(INSERT_SUBREG (i64 (IMPLICIT_DEF)),
(PseudoMFHI ACC64:$src), sub_32)>;
@@ -1147,5 +1155,33 @@ def SLTUImm64 : MipsAsmPseudoInst<(outs GPR64Opnd:$rs),
def : MipsInstAlias<"sltu\t$rs, $imm", (SLTUImm64 GPR64Opnd:$rs, GPR64Opnd:$rs,
imm64:$imm)>, GPR_64;
+def SGEImm64 : MipsAsmPseudoInst<(outs GPR64Opnd:$rd),
+ (ins GPR64Opnd:$rs, imm64:$imm),
+ "sge\t$rd, $rs, $imm">, GPR_64;
+def : MipsInstAlias<"sge $rs, $imm", (SGEImm64 GPR64Opnd:$rs,
+ GPR64Opnd:$rs,
+ imm64:$imm), 0>, GPR_64;
+
+def SGEUImm64 : MipsAsmPseudoInst<(outs GPR64Opnd:$rd),
+ (ins GPR64Opnd:$rs, imm64:$imm),
+ "sgeu\t$rd, $rs, $imm">, GPR_64;
+def : MipsInstAlias<"sgeu $rs, $imm", (SGEUImm64 GPR64Opnd:$rs,
+ GPR64Opnd:$rs,
+ imm64:$imm), 0>, GPR_64;
+
+def SGTImm64 : MipsAsmPseudoInst<(outs GPR64Opnd:$rd),
+ (ins GPR64Opnd:$rs, imm64:$imm),
+ "sgt\t$rd, $rs, $imm">, GPR_64;
+def : MipsInstAlias<"sgt $rs, $imm", (SGTImm64 GPR64Opnd:$rs,
+ GPR64Opnd:$rs,
+ imm64:$imm), 0>, GPR_64;
+
+def SGTUImm64 : MipsAsmPseudoInst<(outs GPR64Opnd:$rd),
+ (ins GPR64Opnd:$rs, imm64:$imm),
+ "sgtu\t$rd, $rs, $imm">, GPR_64;
+def : MipsInstAlias<"sgtu $rs, $imm", (SGTUImm64 GPR64Opnd:$rs,
+ GPR64Opnd:$rs,
+ imm64:$imm), 0>, GPR_64;
+
def : MipsInstAlias<"rdhwr $rt, $rs",
(RDHWR64 GPR64Opnd:$rt, HWRegsOpnd:$rs, 0), 1>, GPR_64;
diff --git a/lib/Target/Mips/Mips64r6InstrInfo.td b/lib/Target/Mips/Mips64r6InstrInfo.td
index ac223bc77256..d746bb61f824 100644
--- a/lib/Target/Mips/Mips64r6InstrInfo.td
+++ b/lib/Target/Mips/Mips64r6InstrInfo.td
@@ -1,9 +1,8 @@
//=- Mips64r6InstrInfo.td - Mips64r6 Instruction Information -*- tablegen -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -37,6 +36,7 @@ class DMUHU_ENC : SPECIAL_3R_FM<0b00011, 0b011101>;
class DMUL_R6_ENC : SPECIAL_3R_FM<0b00010, 0b011100>;
class DMULU_ENC : SPECIAL_3R_FM<0b00010, 0b011101>;
class LDPC_ENC : PCREL18_FM<OPCODE3_LDPC>;
+class LWUPC_ENC : PCREL19_FM<OPCODE2_LWUPC>;
class LLD_R6_ENC : SPECIAL3_LL_SC_FM<OPCODE6_LLD>;
class SCD_R6_ENC : SPECIAL3_LL_SC_FM<OPCODE6_SCD>;
class CRC32D_ENC : SPECIAL3_2R_SZ_CRC<3,0>;
@@ -73,6 +73,7 @@ class DMUHU_DESC : MUL_R6_DESC_BASE<"dmuhu", GPR64Opnd, II_DMUHU, mulhu>;
class DMUL_R6_DESC : MUL_R6_DESC_BASE<"dmul", GPR64Opnd, II_DMUL, mul>;
class DMULU_DESC : MUL_R6_DESC_BASE<"dmulu", GPR64Opnd, II_DMUL>;
class LDPC_DESC : PCREL_DESC_BASE<"ldpc", GPR64Opnd, simm18_lsl3, II_LDPC>;
+class LWUPC_DESC : PCREL_DESC_BASE<"lwupc", GPR32Opnd, simm19_lsl2, II_LWUPC>;
class LLD_R6_DESC : LL_R6_DESC_BASE<"lld", GPR64Opnd, mem_simmptr, II_LLD>;
class SCD_R6_DESC : SC_R6_DESC_BASE<"scd", GPR64Opnd, II_SCD>;
class SELEQZ64_DESC : SELEQNE_Z_DESC_BASE<"seleqz", GPR64Opnd>;
@@ -148,6 +149,7 @@ let AdditionalPredicates = [NotInMicroMips] in {
def LLD_R6 : LLD_R6_ENC, LLD_R6_DESC, ISA_MIPS64R6;
}
def LDPC: LDPC_ENC, LDPC_DESC, ISA_MIPS64R6;
+def LWUPC : LWUPC_ENC, LWUPC_DESC, ISA_MIPS64R6;
def SCD_R6 : SCD_R6_ENC, SCD_R6_DESC, ISA_MIPS32R6;
let DecoderNamespace = "Mips32r6_64r6_GP64" in {
def SELEQZ64 : SELEQZ_ENC, SELEQZ64_DESC, ISA_MIPS32R6, GPR_64;
diff --git a/lib/Target/Mips/MipsAnalyzeImmediate.cpp b/lib/Target/Mips/MipsAnalyzeImmediate.cpp
index 4e17ee327ab6..ae2b83c414db 100644
--- a/lib/Target/Mips/MipsAnalyzeImmediate.cpp
+++ b/lib/Target/Mips/MipsAnalyzeImmediate.cpp
@@ -1,9 +1,8 @@
//===- MipsAnalyzeImmediate.cpp - Analyze Immediates ----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Mips/MipsAnalyzeImmediate.h b/lib/Target/Mips/MipsAnalyzeImmediate.h
index 1c520242fb8d..018b9d824526 100644
--- a/lib/Target/Mips/MipsAnalyzeImmediate.h
+++ b/lib/Target/Mips/MipsAnalyzeImmediate.h
@@ -1,9 +1,8 @@
//===- MipsAnalyzeImmediate.h - Analyze Immediates -------------*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp
index 362431fd42a6..db83fe49cec0 100644
--- a/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -1,9 +1,8 @@
//===- MipsAsmPrinter.cpp - Mips LLVM Assembly Printer --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -13,9 +12,9 @@
//===----------------------------------------------------------------------===//
#include "MipsAsmPrinter.h"
-#include "InstPrinter/MipsInstPrinter.h"
#include "MCTargetDesc/MipsABIInfo.h"
#include "MCTargetDesc/MipsBaseInfo.h"
+#include "MCTargetDesc/MipsInstPrinter.h"
#include "MCTargetDesc/MipsMCNaCl.h"
#include "MCTargetDesc/MipsMCTargetDesc.h"
#include "Mips.h"
@@ -24,6 +23,7 @@
#include "MipsSubtarget.h"
#include "MipsTargetMachine.h"
#include "MipsTargetStreamer.h"
+#include "TargetInfo/MipsTargetInfo.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
@@ -68,6 +68,8 @@ using namespace llvm;
#define DEBUG_TYPE "mips-asm-printer"
+extern cl::opt<bool> EmitJalrReloc;
+
MipsTargetStreamer &MipsAsmPrinter::getTargetStreamer() const {
return static_cast<MipsTargetStreamer &>(*OutStreamer->getTargetStreamer());
}
@@ -148,6 +150,40 @@ void MipsAsmPrinter::emitPseudoIndirectBranch(MCStreamer &OutStreamer,
EmitToStreamer(OutStreamer, TmpInst0);
}
+// If there is an MO_JALR operand, insert:
+//
+// .reloc tmplabel, R_{MICRO}MIPS_JALR, symbol
+// tmplabel:
+//
+// This is an optimization hint for the linker which may then replace
+// an indirect call with a direct branch.
+static void emitDirectiveRelocJalr(const MachineInstr &MI,
+ MCContext &OutContext,
+ TargetMachine &TM,
+ MCStreamer &OutStreamer,
+ const MipsSubtarget &Subtarget) {
+ for (unsigned int I = MI.getDesc().getNumOperands(), E = MI.getNumOperands();
+ I < E; ++I) {
+ MachineOperand MO = MI.getOperand(I);
+ if (MO.isMCSymbol() && (MO.getTargetFlags() & MipsII::MO_JALR)) {
+ MCSymbol *Callee = MO.getMCSymbol();
+ if (Callee && !Callee->getName().empty()) {
+ MCSymbol *OffsetLabel = OutContext.createTempSymbol();
+ const MCExpr *OffsetExpr =
+ MCSymbolRefExpr::create(OffsetLabel, OutContext);
+ const MCExpr *CaleeExpr =
+ MCSymbolRefExpr::create(Callee, OutContext);
+ OutStreamer.EmitRelocDirective
+ (*OffsetExpr,
+ Subtarget.inMicroMipsMode() ? "R_MICROMIPS_JALR" : "R_MIPS_JALR",
+ CaleeExpr, SMLoc(), *TM.getMCSubtargetInfo());
+ OutStreamer.EmitLabel(OffsetLabel);
+ return;
+ }
+ }
+ }
+}
+
void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MipsTargetStreamer &TS = getTargetStreamer();
unsigned Opc = MI->getOpcode();
@@ -207,6 +243,11 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) {
return;
}
+ if (EmitJalrReloc &&
+ (MI->isReturn() || MI->isCall() || MI->isIndirectBranch())) {
+ emitDirectiveRelocJalr(*MI, OutContext, TM, *OutStreamer, *Subtarget);
+ }
+
MachineBasicBlock::const_instr_iterator I = MI->getIterator();
MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
@@ -470,8 +511,7 @@ bool MipsAsmPrinter::isBlockOnlyReachableByFallthrough(const MachineBasicBlock*
// Print out an operand for an inline asm expression.
bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
- unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &O) {
+ const char *ExtraCode, raw_ostream &O) {
// Does this asm operand have a single letter operand modifier?
if (ExtraCode && ExtraCode[0]) {
if (ExtraCode[1] != 0) return true; // Unknown modifier.
@@ -480,7 +520,7 @@ bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
switch (ExtraCode[0]) {
default:
// See if this is a generic print operand
- return AsmPrinter::PrintAsmOperand(MI,OpNum,AsmVariant,ExtraCode,O);
+ return AsmPrinter::PrintAsmOperand(MI, OpNum, ExtraCode, O);
case 'X': // hex const int
if ((MO.getType()) != MachineOperand::MO_Immediate)
return true;
@@ -576,7 +616,7 @@ bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
}
bool MipsAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
- unsigned OpNum, unsigned AsmVariant,
+ unsigned OpNum,
const char *ExtraCode,
raw_ostream &O) {
assert(OpNum + 1 < MI->getNumOperands() && "Insufficient operands");
@@ -653,7 +693,7 @@ void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
return;
case MachineOperand::MO_GlobalAddress:
- getSymbol(MO.getGlobal())->print(O, MAI);
+ PrintSymbolOperand(MO, O);
break;
case MachineOperand::MO_BlockAddress: {
@@ -772,7 +812,8 @@ void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) {
// We should always emit a '.module fp=...' but binutils 2.24 does not accept
// it. We therefore emit it when it contradicts the ABI defaults (-mfpxx or
// -mfp64) and omit it otherwise.
- if (ABI.IsO32() && (STI.isABI_FPXX() || STI.isFP64bit()))
+ if ((ABI.IsO32() && (STI.isABI_FPXX() || STI.isFP64bit())) ||
+ STI.useSoftFloat())
TS.emitDirectiveModuleFP();
// We should always emit a '.module [no]oddspreg' but binutils 2.24 does not
diff --git a/lib/Target/Mips/MipsAsmPrinter.h b/lib/Target/Mips/MipsAsmPrinter.h
index eb58234e3e77..173a1312812e 100644
--- a/lib/Target/Mips/MipsAsmPrinter.h
+++ b/lib/Target/Mips/MipsAsmPrinter.h
@@ -1,9 +1,8 @@
//===- MipsAsmPrinter.h - Mips LLVM Assembly Printer -----------*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -146,11 +145,9 @@ public:
bool isBlockOnlyReachableByFallthrough(
const MachineBasicBlock* MBB) const override;
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
- unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &O) override;
+ const char *ExtraCode, raw_ostream &O) override;
bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
- unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &O) override;
+ const char *ExtraCode, raw_ostream &O) override;
void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
void printMemOperandEA(const MachineInstr *MI, int opNum, raw_ostream &O);
diff --git a/lib/Target/Mips/MipsBranchExpansion.cpp b/lib/Target/Mips/MipsBranchExpansion.cpp
index e59267c4fd9b..1523a6c020aa 100644
--- a/lib/Target/Mips/MipsBranchExpansion.cpp
+++ b/lib/Target/Mips/MipsBranchExpansion.cpp
@@ -1,9 +1,8 @@
//===----------------------- MipsBranchExpansion.cpp ----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/lib/Target/Mips/MipsCCState.cpp b/lib/Target/Mips/MipsCCState.cpp
index 90cb3f437bd5..ef48c850a1b8 100644
--- a/lib/Target/Mips/MipsCCState.cpp
+++ b/lib/Target/Mips/MipsCCState.cpp
@@ -1,9 +1,8 @@
//===---- MipsCCState.cpp - CCState with Mips specific extensions ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Mips/MipsCCState.h b/lib/Target/Mips/MipsCCState.h
index 27901699480b..fd2fd97c8f13 100644
--- a/lib/Target/Mips/MipsCCState.h
+++ b/lib/Target/Mips/MipsCCState.h
@@ -1,9 +1,8 @@
//===---- MipsCCState.h - CCState with Mips specific extensions -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Mips/MipsCallLowering.cpp b/lib/Target/Mips/MipsCallLowering.cpp
index c550fadf6632..da65689ecff5 100644
--- a/lib/Target/Mips/MipsCallLowering.cpp
+++ b/lib/Target/Mips/MipsCallLowering.cpp
@@ -1,9 +1,8 @@
//===- MipsCallLowering.cpp -------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -15,6 +14,7 @@
#include "MipsCallLowering.h"
#include "MipsCCState.h"
+#include "MipsMachineFunction.h"
#include "MipsTargetMachine.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
@@ -24,10 +24,10 @@ using namespace llvm;
MipsCallLowering::MipsCallLowering(const MipsTargetLowering &TLI)
: CallLowering(&TLI) {}
-bool MipsCallLowering::MipsHandler::assign(unsigned VReg,
- const CCValAssign &VA) {
+bool MipsCallLowering::MipsHandler::assign(Register VReg, const CCValAssign &VA,
+ const EVT &VT) {
if (VA.isRegLoc()) {
- assignValueToReg(VReg, VA);
+ assignValueToReg(VReg, VA, VT);
} else if (VA.isMemLoc()) {
assignValueToAddress(VReg, VA);
} else {
@@ -36,24 +36,25 @@ bool MipsCallLowering::MipsHandler::assign(unsigned VReg,
return true;
}
-bool MipsCallLowering::MipsHandler::assignVRegs(ArrayRef<unsigned> VRegs,
+bool MipsCallLowering::MipsHandler::assignVRegs(ArrayRef<Register> VRegs,
ArrayRef<CCValAssign> ArgLocs,
- unsigned ArgLocsStartIndex) {
+ unsigned ArgLocsStartIndex,
+ const EVT &VT) {
for (unsigned i = 0; i < VRegs.size(); ++i)
- if (!assign(VRegs[i], ArgLocs[ArgLocsStartIndex + i]))
+ if (!assign(VRegs[i], ArgLocs[ArgLocsStartIndex + i], VT))
return false;
return true;
}
void MipsCallLowering::MipsHandler::setLeastSignificantFirst(
- SmallVectorImpl<unsigned> &VRegs) {
+ SmallVectorImpl<Register> &VRegs) {
if (!MIRBuilder.getMF().getDataLayout().isLittleEndian())
std::reverse(VRegs.begin(), VRegs.end());
}
bool MipsCallLowering::MipsHandler::handle(
ArrayRef<CCValAssign> ArgLocs, ArrayRef<CallLowering::ArgInfo> Args) {
- SmallVector<unsigned, 4> VRegs;
+ SmallVector<Register, 4> VRegs;
unsigned SplitLength;
const Function &F = MIRBuilder.getMF().getFunction();
const DataLayout &DL = F.getParent()->getDataLayout();
@@ -65,6 +66,8 @@ bool MipsCallLowering::MipsHandler::handle(
EVT VT = TLI.getValueType(DL, Args[ArgsIndex].Ty);
SplitLength = TLI.getNumRegistersForCallingConv(F.getContext(),
F.getCallingConv(), VT);
+ assert(Args[ArgsIndex].Regs.size() == 1 && "Can't handle multple regs yet");
+
if (SplitLength > 1) {
VRegs.clear();
MVT RegisterVT = TLI.getRegisterTypeForCallingConv(
@@ -72,10 +75,11 @@ bool MipsCallLowering::MipsHandler::handle(
for (unsigned i = 0; i < SplitLength; ++i)
VRegs.push_back(MRI.createGenericVirtualRegister(LLT{RegisterVT}));
- if (!handleSplit(VRegs, ArgLocs, ArgLocsIndex, Args[ArgsIndex].Reg))
+ if (!handleSplit(VRegs, ArgLocs, ArgLocsIndex, Args[ArgsIndex].Regs[0],
+ VT))
return false;
} else {
- if (!assign(Args[ArgsIndex].Reg, ArgLocs[ArgLocsIndex]))
+ if (!assign(Args[ArgsIndex].Regs[0], ArgLocs[ArgLocsIndex], VT))
return false;
}
}
@@ -89,24 +93,25 @@ public:
: MipsHandler(MIRBuilder, MRI) {}
private:
- void assignValueToReg(unsigned ValVReg, const CCValAssign &VA) override;
+ void assignValueToReg(Register ValVReg, const CCValAssign &VA,
+ const EVT &VT) override;
- unsigned getStackAddress(const CCValAssign &VA,
+ Register getStackAddress(const CCValAssign &VA,
MachineMemOperand *&MMO) override;
- void assignValueToAddress(unsigned ValVReg, const CCValAssign &VA) override;
+ void assignValueToAddress(Register ValVReg, const CCValAssign &VA) override;
- bool handleSplit(SmallVectorImpl<unsigned> &VRegs,
+ bool handleSplit(SmallVectorImpl<Register> &VRegs,
ArrayRef<CCValAssign> ArgLocs, unsigned ArgLocsStartIndex,
- unsigned ArgsReg) override;
+ Register ArgsReg, const EVT &VT) override;
virtual void markPhysRegUsed(unsigned PhysReg) {
MIRBuilder.getMBB().addLiveIn(PhysReg);
}
- void buildLoad(unsigned Val, const CCValAssign &VA) {
+ void buildLoad(Register Val, const CCValAssign &VA) {
MachineMemOperand *MMO;
- unsigned Addr = getStackAddress(VA, MMO);
+ Register Addr = getStackAddress(VA, MMO);
MIRBuilder.buildLoad(Val, Addr, *MMO);
}
};
@@ -127,59 +132,88 @@ private:
} // end anonymous namespace
-void IncomingValueHandler::assignValueToReg(unsigned ValVReg,
- const CCValAssign &VA) {
- unsigned PhysReg = VA.getLocReg();
- switch (VA.getLocInfo()) {
- case CCValAssign::LocInfo::SExt:
- case CCValAssign::LocInfo::ZExt:
- case CCValAssign::LocInfo::AExt: {
- auto Copy = MIRBuilder.buildCopy(LLT{VA.getLocVT()}, PhysReg);
- MIRBuilder.buildTrunc(ValVReg, Copy);
- break;
- }
- default:
- MIRBuilder.buildCopy(ValVReg, PhysReg);
- break;
+void IncomingValueHandler::assignValueToReg(Register ValVReg,
+ const CCValAssign &VA,
+ const EVT &VT) {
+ const MipsSubtarget &STI =
+ static_cast<const MipsSubtarget &>(MIRBuilder.getMF().getSubtarget());
+ Register PhysReg = VA.getLocReg();
+ if (VT == MVT::f64 && PhysReg >= Mips::A0 && PhysReg <= Mips::A3) {
+ const MipsSubtarget &STI =
+ static_cast<const MipsSubtarget &>(MIRBuilder.getMF().getSubtarget());
+
+ MIRBuilder
+ .buildInstr(STI.isFP64bit() ? Mips::BuildPairF64_64
+ : Mips::BuildPairF64)
+ .addDef(ValVReg)
+ .addUse(PhysReg + (STI.isLittle() ? 0 : 1))
+ .addUse(PhysReg + (STI.isLittle() ? 1 : 0))
+ .constrainAllUses(MIRBuilder.getTII(), *STI.getRegisterInfo(),
+ *STI.getRegBankInfo());
+ markPhysRegUsed(PhysReg);
+ markPhysRegUsed(PhysReg + 1);
+ } else if (VT == MVT::f32 && PhysReg >= Mips::A0 && PhysReg <= Mips::A3) {
+ MIRBuilder.buildInstr(Mips::MTC1)
+ .addDef(ValVReg)
+ .addUse(PhysReg)
+ .constrainAllUses(MIRBuilder.getTII(), *STI.getRegisterInfo(),
+ *STI.getRegBankInfo());
+ markPhysRegUsed(PhysReg);
+ } else {
+ switch (VA.getLocInfo()) {
+ case CCValAssign::LocInfo::SExt:
+ case CCValAssign::LocInfo::ZExt:
+ case CCValAssign::LocInfo::AExt: {
+ auto Copy = MIRBuilder.buildCopy(LLT{VA.getLocVT()}, PhysReg);
+ MIRBuilder.buildTrunc(ValVReg, Copy);
+ break;
+ }
+ default:
+ MIRBuilder.buildCopy(ValVReg, PhysReg);
+ break;
+ }
+ markPhysRegUsed(PhysReg);
}
- markPhysRegUsed(PhysReg);
}
-unsigned IncomingValueHandler::getStackAddress(const CCValAssign &VA,
+Register IncomingValueHandler::getStackAddress(const CCValAssign &VA,
MachineMemOperand *&MMO) {
+ MachineFunction &MF = MIRBuilder.getMF();
unsigned Size = alignTo(VA.getValVT().getSizeInBits(), 8) / 8;
unsigned Offset = VA.getLocMemOffset();
- MachineFrameInfo &MFI = MIRBuilder.getMF().getFrameInfo();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
int FI = MFI.CreateFixedObject(Size, Offset, true);
MachinePointerInfo MPO =
MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI);
- MMO = MIRBuilder.getMF().getMachineMemOperand(MPO, MachineMemOperand::MOLoad,
- Size, /* Alignment */ 0);
- unsigned AddrReg = MRI.createGenericVirtualRegister(LLT::pointer(0, 32));
+ const TargetFrameLowering *TFL = MF.getSubtarget().getFrameLowering();
+ unsigned Align = MinAlign(TFL->getStackAlignment(), Offset);
+ MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOLoad, Size, Align);
+
+ Register AddrReg = MRI.createGenericVirtualRegister(LLT::pointer(0, 32));
MIRBuilder.buildFrameIndex(AddrReg, FI);
return AddrReg;
}
-void IncomingValueHandler::assignValueToAddress(unsigned ValVReg,
+void IncomingValueHandler::assignValueToAddress(Register ValVReg,
const CCValAssign &VA) {
if (VA.getLocInfo() == CCValAssign::SExt ||
VA.getLocInfo() == CCValAssign::ZExt ||
VA.getLocInfo() == CCValAssign::AExt) {
- unsigned LoadReg = MRI.createGenericVirtualRegister(LLT::scalar(32));
+ Register LoadReg = MRI.createGenericVirtualRegister(LLT::scalar(32));
buildLoad(LoadReg, VA);
MIRBuilder.buildTrunc(ValVReg, LoadReg);
} else
buildLoad(ValVReg, VA);
}
-bool IncomingValueHandler::handleSplit(SmallVectorImpl<unsigned> &VRegs,
+bool IncomingValueHandler::handleSplit(SmallVectorImpl<Register> &VRegs,
ArrayRef<CCValAssign> ArgLocs,
unsigned ArgLocsStartIndex,
- unsigned ArgsReg) {
- if (!assignVRegs(VRegs, ArgLocs, ArgLocsStartIndex))
+ Register ArgsReg, const EVT &VT) {
+ if (!assignVRegs(VRegs, ArgLocs, ArgLocsStartIndex, VT))
return false;
setLeastSignificantFirst(VRegs);
MIRBuilder.buildMerge(ArgsReg, VRegs);
@@ -194,78 +228,111 @@ public:
: MipsHandler(MIRBuilder, MRI), MIB(MIB) {}
private:
- void assignValueToReg(unsigned ValVReg, const CCValAssign &VA) override;
+ void assignValueToReg(Register ValVReg, const CCValAssign &VA,
+ const EVT &VT) override;
- unsigned getStackAddress(const CCValAssign &VA,
+ Register getStackAddress(const CCValAssign &VA,
MachineMemOperand *&MMO) override;
- void assignValueToAddress(unsigned ValVReg, const CCValAssign &VA) override;
+ void assignValueToAddress(Register ValVReg, const CCValAssign &VA) override;
- bool handleSplit(SmallVectorImpl<unsigned> &VRegs,
+ bool handleSplit(SmallVectorImpl<Register> &VRegs,
ArrayRef<CCValAssign> ArgLocs, unsigned ArgLocsStartIndex,
- unsigned ArgsReg) override;
+ Register ArgsReg, const EVT &VT) override;
- unsigned extendRegister(unsigned ValReg, const CCValAssign &VA);
+ Register extendRegister(Register ValReg, const CCValAssign &VA);
MachineInstrBuilder &MIB;
};
} // end anonymous namespace
-void OutgoingValueHandler::assignValueToReg(unsigned ValVReg,
- const CCValAssign &VA) {
- unsigned PhysReg = VA.getLocReg();
- unsigned ExtReg = extendRegister(ValVReg, VA);
- MIRBuilder.buildCopy(PhysReg, ExtReg);
- MIB.addUse(PhysReg, RegState::Implicit);
+void OutgoingValueHandler::assignValueToReg(Register ValVReg,
+ const CCValAssign &VA,
+ const EVT &VT) {
+ Register PhysReg = VA.getLocReg();
+ const MipsSubtarget &STI =
+ static_cast<const MipsSubtarget &>(MIRBuilder.getMF().getSubtarget());
+
+ if (VT == MVT::f64 && PhysReg >= Mips::A0 && PhysReg <= Mips::A3) {
+ MIRBuilder
+ .buildInstr(STI.isFP64bit() ? Mips::ExtractElementF64_64
+ : Mips::ExtractElementF64)
+ .addDef(PhysReg + (STI.isLittle() ? 1 : 0))
+ .addUse(ValVReg)
+ .addImm(1)
+ .constrainAllUses(MIRBuilder.getTII(), *STI.getRegisterInfo(),
+ *STI.getRegBankInfo());
+ MIRBuilder
+ .buildInstr(STI.isFP64bit() ? Mips::ExtractElementF64_64
+ : Mips::ExtractElementF64)
+ .addDef(PhysReg + (STI.isLittle() ? 0 : 1))
+ .addUse(ValVReg)
+ .addImm(0)
+ .constrainAllUses(MIRBuilder.getTII(), *STI.getRegisterInfo(),
+ *STI.getRegBankInfo());
+ } else if (VT == MVT::f32 && PhysReg >= Mips::A0 && PhysReg <= Mips::A3) {
+ MIRBuilder.buildInstr(Mips::MFC1)
+ .addDef(PhysReg)
+ .addUse(ValVReg)
+ .constrainAllUses(MIRBuilder.getTII(), *STI.getRegisterInfo(),
+ *STI.getRegBankInfo());
+ } else {
+ Register ExtReg = extendRegister(ValVReg, VA);
+ MIRBuilder.buildCopy(PhysReg, ExtReg);
+ MIB.addUse(PhysReg, RegState::Implicit);
+ }
}
-unsigned OutgoingValueHandler::getStackAddress(const CCValAssign &VA,
+Register OutgoingValueHandler::getStackAddress(const CCValAssign &VA,
MachineMemOperand *&MMO) {
+ MachineFunction &MF = MIRBuilder.getMF();
+ const TargetFrameLowering *TFL = MF.getSubtarget().getFrameLowering();
+
LLT p0 = LLT::pointer(0, 32);
LLT s32 = LLT::scalar(32);
- unsigned SPReg = MRI.createGenericVirtualRegister(p0);
- MIRBuilder.buildCopy(SPReg, Mips::SP);
+ Register SPReg = MRI.createGenericVirtualRegister(p0);
+ MIRBuilder.buildCopy(SPReg, Register(Mips::SP));
- unsigned OffsetReg = MRI.createGenericVirtualRegister(s32);
+ Register OffsetReg = MRI.createGenericVirtualRegister(s32);
unsigned Offset = VA.getLocMemOffset();
MIRBuilder.buildConstant(OffsetReg, Offset);
- unsigned AddrReg = MRI.createGenericVirtualRegister(p0);
+ Register AddrReg = MRI.createGenericVirtualRegister(p0);
MIRBuilder.buildGEP(AddrReg, SPReg, OffsetReg);
MachinePointerInfo MPO =
MachinePointerInfo::getStack(MIRBuilder.getMF(), Offset);
unsigned Size = alignTo(VA.getValVT().getSizeInBits(), 8) / 8;
- MMO = MIRBuilder.getMF().getMachineMemOperand(MPO, MachineMemOperand::MOStore,
- Size, /* Alignment */ 0);
+ unsigned Align = MinAlign(TFL->getStackAlignment(), Offset);
+ MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOStore, Size, Align);
return AddrReg;
}
-void OutgoingValueHandler::assignValueToAddress(unsigned ValVReg,
+void OutgoingValueHandler::assignValueToAddress(Register ValVReg,
const CCValAssign &VA) {
MachineMemOperand *MMO;
- unsigned Addr = getStackAddress(VA, MMO);
- unsigned ExtReg = extendRegister(ValVReg, VA);
+ Register Addr = getStackAddress(VA, MMO);
+ Register ExtReg = extendRegister(ValVReg, VA);
MIRBuilder.buildStore(ExtReg, Addr, *MMO);
}
-unsigned OutgoingValueHandler::extendRegister(unsigned ValReg,
+Register OutgoingValueHandler::extendRegister(Register ValReg,
const CCValAssign &VA) {
LLT LocTy{VA.getLocVT()};
switch (VA.getLocInfo()) {
case CCValAssign::SExt: {
- unsigned ExtReg = MRI.createGenericVirtualRegister(LocTy);
+ Register ExtReg = MRI.createGenericVirtualRegister(LocTy);
MIRBuilder.buildSExt(ExtReg, ValReg);
return ExtReg;
}
case CCValAssign::ZExt: {
- unsigned ExtReg = MRI.createGenericVirtualRegister(LocTy);
+ Register ExtReg = MRI.createGenericVirtualRegister(LocTy);
MIRBuilder.buildZExt(ExtReg, ValReg);
return ExtReg;
}
case CCValAssign::AExt: {
- unsigned ExtReg = MRI.createGenericVirtualRegister(LocTy);
+ Register ExtReg = MRI.createGenericVirtualRegister(LocTy);
MIRBuilder.buildAnyExt(ExtReg, ValReg);
return ExtReg;
}
@@ -278,13 +345,13 @@ unsigned OutgoingValueHandler::extendRegister(unsigned ValReg,
llvm_unreachable("unable to extend register");
}
-bool OutgoingValueHandler::handleSplit(SmallVectorImpl<unsigned> &VRegs,
+bool OutgoingValueHandler::handleSplit(SmallVectorImpl<Register> &VRegs,
ArrayRef<CCValAssign> ArgLocs,
unsigned ArgLocsStartIndex,
- unsigned ArgsReg) {
+ Register ArgsReg, const EVT &VT) {
MIRBuilder.buildUnmerge(VRegs, ArgsReg);
setLeastSignificantFirst(VRegs);
- if (!assignVRegs(VRegs, ArgLocs, ArgLocsStartIndex))
+ if (!assignVRegs(VRegs, ArgLocs, ArgLocsStartIndex, VT))
return false;
return true;
@@ -295,6 +362,8 @@ static bool isSupportedType(Type *T) {
return true;
if (T->isPointerTy())
return true;
+ if (T->isFloatingPointTy())
+ return true;
return false;
}
@@ -330,7 +399,7 @@ static void setLocInfo(SmallVectorImpl<CCValAssign> &ArgLocs,
bool MipsCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
const Value *Val,
- ArrayRef<unsigned> VRegs) const {
+ ArrayRef<Register> VRegs) const {
MachineInstrBuilder Ret = MIRBuilder.buildInstrNoInsert(Mips::RetRA);
@@ -376,9 +445,9 @@ bool MipsCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
return true;
}
-bool MipsCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
- const Function &F,
- ArrayRef<unsigned> VRegs) const {
+bool MipsCallLowering::lowerFormalArguments(
+ MachineIRBuilder &MIRBuilder, const Function &F,
+ ArrayRef<ArrayRef<Register>> VRegs) const {
// Quick exit if there aren't any args.
if (F.arg_empty())
@@ -444,7 +513,8 @@ bool MipsCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
if (Arg.Flags.isByVal() || Arg.Flags.isSRet())
return false;
}
- if (OrigRet.Reg && !isSupportedType(OrigRet.Ty))
+
+ if (OrigRet.Regs[0] && !isSupportedType(OrigRet.Ty))
return false;
MachineFunction &MF = MIRBuilder.getMF();
@@ -457,14 +527,22 @@ bool MipsCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
MachineInstrBuilder CallSeqStart =
MIRBuilder.buildInstr(Mips::ADJCALLSTACKDOWN);
- // FIXME: Add support for pic calling sequences, long call sequences for O32,
- // N32 and N64. First handle the case when Callee.isReg().
- if (Callee.isReg())
- return false;
+ const bool IsCalleeGlobalPIC =
+ Callee.isGlobal() && TM.isPositionIndependent();
- MachineInstrBuilder MIB = MIRBuilder.buildInstrNoInsert(Mips::JAL);
+ MachineInstrBuilder MIB = MIRBuilder.buildInstrNoInsert(
+ Callee.isReg() || IsCalleeGlobalPIC ? Mips::JALRPseudo : Mips::JAL);
MIB.addDef(Mips::SP, RegState::Implicit);
- MIB.add(Callee);
+ if (IsCalleeGlobalPIC) {
+ Register CalleeReg =
+ MF.getRegInfo().createGenericVirtualRegister(LLT::pointer(0, 32));
+ MachineInstr *CalleeGlobalValue =
+ MIRBuilder.buildGlobalValue(CalleeReg, Callee.getGlobal());
+ if (!Callee.getGlobal()->hasLocalLinkage())
+ CalleeGlobalValue->getOperand(1).setTargetFlags(MipsII::MO_GOT_CALL);
+ MIB.addUse(CalleeReg);
+ } else
+ MIB.add(Callee);
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
MIB.addRegMask(TRI->getCallPreservedMask(MF, F.getCallingConv()));
@@ -507,10 +585,21 @@ bool MipsCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
NextStackOffset = alignTo(NextStackOffset, StackAlignment);
CallSeqStart.addImm(NextStackOffset).addImm(0);
+ if (IsCalleeGlobalPIC) {
+ MIRBuilder.buildCopy(
+ Register(Mips::GP),
+ MF.getInfo<MipsFunctionInfo>()->getGlobalBaseRegForGlobalISel());
+ MIB.addDef(Mips::GP, RegState::Implicit);
+ }
MIRBuilder.insertInstr(MIB);
+ if (MIB->getOpcode() == Mips::JALRPseudo) {
+ const MipsSubtarget &STI =
+ static_cast<const MipsSubtarget &>(MIRBuilder.getMF().getSubtarget());
+ MIB.constrainAllUses(MIRBuilder.getTII(), *STI.getRegisterInfo(),
+ *STI.getRegBankInfo());
+ }
- if (OrigRet.Reg) {
-
+ if (OrigRet.Regs[0]) {
ArgInfos.clear();
SmallVector<unsigned, 8> OrigRetIndices;
diff --git a/lib/Target/Mips/MipsCallLowering.h b/lib/Target/Mips/MipsCallLowering.h
index 9916b04ef50c..11c2d53ad35d 100644
--- a/lib/Target/Mips/MipsCallLowering.h
+++ b/lib/Target/Mips/MipsCallLowering.h
@@ -1,9 +1,8 @@
//===- MipsCallLowering.h ---------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -35,37 +34,39 @@ public:
ArrayRef<CallLowering::ArgInfo> Args);
protected:
- bool assignVRegs(ArrayRef<unsigned> VRegs, ArrayRef<CCValAssign> ArgLocs,
- unsigned Index);
+ bool assignVRegs(ArrayRef<Register> VRegs, ArrayRef<CCValAssign> ArgLocs,
+ unsigned ArgLocsStartIndex, const EVT &VT);
- void setLeastSignificantFirst(SmallVectorImpl<unsigned> &VRegs);
+ void setLeastSignificantFirst(SmallVectorImpl<Register> &VRegs);
MachineIRBuilder &MIRBuilder;
MachineRegisterInfo &MRI;
private:
- bool assign(unsigned VReg, const CCValAssign &VA);
+ bool assign(Register VReg, const CCValAssign &VA, const EVT &VT);
- virtual unsigned getStackAddress(const CCValAssign &VA,
+ virtual Register getStackAddress(const CCValAssign &VA,
MachineMemOperand *&MMO) = 0;
- virtual void assignValueToReg(unsigned ValVReg, const CCValAssign &VA) = 0;
+ virtual void assignValueToReg(Register ValVReg, const CCValAssign &VA,
+ const EVT &VT) = 0;
- virtual void assignValueToAddress(unsigned ValVReg,
+ virtual void assignValueToAddress(Register ValVReg,
const CCValAssign &VA) = 0;
- virtual bool handleSplit(SmallVectorImpl<unsigned> &VRegs,
+ virtual bool handleSplit(SmallVectorImpl<Register> &VRegs,
ArrayRef<CCValAssign> ArgLocs,
- unsigned ArgLocsStartIndex, unsigned ArgsReg) = 0;
+ unsigned ArgLocsStartIndex, Register ArgsReg,
+ const EVT &VT) = 0;
};
MipsCallLowering(const MipsTargetLowering &TLI);
bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val,
- ArrayRef<unsigned> VRegs) const override;
+ ArrayRef<Register> VRegs) const override;
bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
- ArrayRef<unsigned> VRegs) const override;
+ ArrayRef<ArrayRef<Register>> VRegs) const override;
bool lowerCall(MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv,
const MachineOperand &Callee, const ArgInfo &OrigRet,
diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td
index b5df78f89a6b..88236d8e9abd 100644
--- a/lib/Target/Mips/MipsCallingConv.td
+++ b/lib/Target/Mips/MipsCallingConv.td
@@ -1,9 +1,8 @@
//===-- MipsCallingConv.td - Calling Conventions for Mips --*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// This describes the calling conventions for Mips architecture.
diff --git a/lib/Target/Mips/MipsCondMov.td b/lib/Target/Mips/MipsCondMov.td
index 0d7e3e200b5f..5affbcbc2101 100644
--- a/lib/Target/Mips/MipsCondMov.td
+++ b/lib/Target/Mips/MipsCondMov.td
@@ -1,9 +1,8 @@
//===-- MipsCondMov.td - Describe Mips Conditional Moves --*- tablegen -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -110,11 +109,11 @@ let AdditionalPredicates = [NotInMicroMips] in {
let isCodeGenOnly = 1 in {
def MOVZ_I_I64 : CMov_I_I_FT<"movz", GPR32Opnd, GPR64Opnd, II_MOVZ>,
- ADD_FM<0, 0xa>, INSN_MIPS4_32_NOT_32R6_64R6;
+ ADD_FM<0, 0xa>, INSN_MIPS4_32_NOT_32R6_64R6, GPR_64;
def MOVZ_I64_I : CMov_I_I_FT<"movz", GPR64Opnd, GPR32Opnd, II_MOVZ>,
- ADD_FM<0, 0xa>, INSN_MIPS4_32_NOT_32R6_64R6;
+ ADD_FM<0, 0xa>, INSN_MIPS4_32_NOT_32R6_64R6, GPR_64;
def MOVZ_I64_I64 : CMov_I_I_FT<"movz", GPR64Opnd, GPR64Opnd, II_MOVZ>,
- ADD_FM<0, 0xa>, INSN_MIPS4_32_NOT_32R6_64R6;
+ ADD_FM<0, 0xa>, INSN_MIPS4_32_NOT_32R6_64R6, GPR_64;
}
def MOVN_I_I : MMRel, CMov_I_I_FT<"movn", GPR32Opnd, GPR32Opnd, II_MOVN>,
@@ -122,11 +121,11 @@ let AdditionalPredicates = [NotInMicroMips] in {
let isCodeGenOnly = 1 in {
def MOVN_I_I64 : CMov_I_I_FT<"movn", GPR32Opnd, GPR64Opnd, II_MOVN>,
- ADD_FM<0, 0xb>, INSN_MIPS4_32_NOT_32R6_64R6;
+ ADD_FM<0, 0xb>, INSN_MIPS4_32_NOT_32R6_64R6, GPR_64;
def MOVN_I64_I : CMov_I_I_FT<"movn", GPR64Opnd, GPR32Opnd, II_MOVN>,
- ADD_FM<0, 0xb>, INSN_MIPS4_32_NOT_32R6_64R6;
+ ADD_FM<0, 0xb>, INSN_MIPS4_32_NOT_32R6_64R6, GPR_64;
def MOVN_I64_I64 : CMov_I_I_FT<"movn", GPR64Opnd, GPR64Opnd, II_MOVN>,
- ADD_FM<0, 0xb>, INSN_MIPS4_32_NOT_32R6_64R6;
+ ADD_FM<0, 0xb>, INSN_MIPS4_32_NOT_32R6_64R6, GPR_64;
}
def MOVZ_I_S : MMRel, CMov_I_F_FT<"movz.s", GPR32Opnd, FGR32Opnd, II_MOVZ_S>,
CMov_I_F_FM<18, 16>, INSN_MIPS4_32_NOT_32R6_64R6;
@@ -156,9 +155,11 @@ let AdditionalPredicates = [NotInMicroMips] in {
CMov_I_F_FM<19, 17>, INSN_MIPS4_32_NOT_32R6_64R6, FGR_64;
let isCodeGenOnly = 1 in {
def MOVZ_I64_D64 : CMov_I_F_FT<"movz.d", GPR64Opnd, FGR64Opnd, II_MOVZ_D>,
- CMov_I_F_FM<18, 17>, INSN_MIPS4_32_NOT_32R6_64R6, FGR_64;
+ CMov_I_F_FM<18, 17>,
+ INSN_MIPS4_32_NOT_32R6_64R6, GPR_64, FGR_64;
def MOVN_I64_D64 : CMov_I_F_FT<"movn.d", GPR64Opnd, FGR64Opnd, II_MOVN_D>,
- CMov_I_F_FM<19, 17>, INSN_MIPS4_32_NOT_32R6_64R6, FGR_64;
+ CMov_I_F_FM<19, 17>,
+ INSN_MIPS4_32_NOT_32R6_64R6, GPR_64, FGR_64;
}
}
@@ -262,7 +263,7 @@ let AdditionalPredicates = [NotInMicroMips] in {
}
// For targets that don't have conditional-move instructions
// we have to match SELECT nodes with pseudo instructions.
-let usesCustomInserter = 1 in {
+let usesCustomInserter = 1, hasNoSchedulingInfo = 1 in {
class Select_Pseudo<RegisterOperand RC> :
PseudoSE<(outs RC:$dst), (ins GPR32Opnd:$cond, RC:$T, RC:$F),
[(set RC:$dst, (select GPR32Opnd:$cond, RC:$T, RC:$F))]>,
@@ -297,7 +298,7 @@ def PseudoSELECTFP_F_S : SelectFP_Pseudo_F<FGR32Opnd>;
def PseudoSELECTFP_F_D32 : SelectFP_Pseudo_F<AFGR64Opnd>, FGR_32;
def PseudoSELECTFP_F_D64 : SelectFP_Pseudo_F<FGR64Opnd>, FGR_64;
-let usesCustomInserter = 1 in {
+let usesCustomInserter = 1, hasNoSchedulingInfo = 1 in {
class D_SELECT_CLASS<RegisterOperand RC> :
PseudoSE<(outs RC:$dst1, RC:$dst2),
(ins GPR32Opnd:$cond, RC:$a1, RC:$a2, RC:$b1, RC:$b2), []>,
diff --git a/lib/Target/Mips/MipsConstantIslandPass.cpp b/lib/Target/Mips/MipsConstantIslandPass.cpp
index 744523cc6cb9..eea28df7eda1 100644
--- a/lib/Target/Mips/MipsConstantIslandPass.cpp
+++ b/lib/Target/Mips/MipsConstantIslandPass.cpp
@@ -1,9 +1,8 @@
//===- MipsConstantIslandPass.cpp - Emit Pc Relative loads ----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -842,9 +841,7 @@ void MipsConstantIslands::updateForInsertedWaterBlock
// Next, update WaterList. Specifically, we need to add NewMBB as having
// available water after it.
- water_iterator IP =
- std::lower_bound(WaterList.begin(), WaterList.end(), NewBB,
- CompareMBBNumbers);
+ water_iterator IP = llvm::lower_bound(WaterList, NewBB, CompareMBBNumbers);
WaterList.insert(IP, NewBB);
}
@@ -894,9 +891,7 @@ MipsConstantIslands::splitBlockBeforeInstr(MachineInstr &MI) {
// available water after it (but not if it's already there, which happens
// when splitting before a conditional branch that is followed by an
// unconditional branch - in that case we want to insert NewBB).
- water_iterator IP =
- std::lower_bound(WaterList.begin(), WaterList.end(), OrigBB,
- CompareMBBNumbers);
+ water_iterator IP = llvm::lower_bound(WaterList, OrigBB, CompareMBBNumbers);
MachineBasicBlock* WaterBB = *IP;
if (WaterBB == OrigBB)
WaterList.insert(std::next(IP), NewBB);
diff --git a/lib/Target/Mips/MipsDSPInstrFormats.td b/lib/Target/Mips/MipsDSPInstrFormats.td
index 5f0763f5ea46..6f062d0f3c25 100644
--- a/lib/Target/Mips/MipsDSPInstrFormats.td
+++ b/lib/Target/Mips/MipsDSPInstrFormats.td
@@ -1,9 +1,8 @@
//===- MipsDSPInstrFormats.td - Mips Instruction Formats ---*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Mips/MipsDSPInstrInfo.td b/lib/Target/Mips/MipsDSPInstrInfo.td
index b9824220b558..daca8b907081 100644
--- a/lib/Target/Mips/MipsDSPInstrInfo.td
+++ b/lib/Target/Mips/MipsDSPInstrInfo.td
@@ -1,9 +1,8 @@
//===- MipsDSPInstrInfo.td - DSP ASE instructions -*- tablegen ------------*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -516,6 +515,7 @@ class MTHI_DESC_BASE<string instr_asm, RegisterOperand RO, InstrItinClass itin>
class BPOSGE32_PSEUDO_DESC_BASE<SDPatternOperator OpNode, InstrItinClass itin> :
MipsPseudo<(outs GPR32Opnd:$dst), (ins), [(set GPR32Opnd:$dst, (OpNode))]> {
+ bit hasNoSchedulingInfo = 1;
bit usesCustomInserter = 1;
}
@@ -1314,7 +1314,9 @@ def PseudoCMPU_LE_QB : PseudoCMP<CMPU_LE_QB>;
def PseudoPICK_PH : PseudoPICK<PICK_PH>;
def PseudoPICK_QB : PseudoPICK<PICK_QB>;
-def PseudoMTLOHI_DSP : PseudoMTLOHI<ACC64DSP, GPR32>;
+let AdditionalPredicates = [HasDSP] in {
+ def PseudoMTLOHI_DSP : PseudoMTLOHI<ACC64DSP, GPR32>;
+}
// Patterns.
class DSPPat<dag pattern, dag result, Predicate pred = HasDSP> :
diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp
index e3823e0dfdb8..aa07dac86828 100644
--- a/lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -1,9 +1,8 @@
//===- MipsDelaySlotFiller.cpp - Mips Delay Slot Filler -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -493,14 +492,12 @@ MemDefsUses::MemDefsUses(const DataLayout &DL, const MachineFrameInfo *MFI_)
bool MemDefsUses::hasHazard_(const MachineInstr &MI) {
bool HasHazard = false;
- SmallVector<ValueType, 4> Objs;
// Check underlying object list.
+ SmallVector<ValueType, 4> Objs;
if (getUnderlyingObjects(MI, Objs)) {
- for (SmallVectorImpl<ValueType>::const_iterator I = Objs.begin();
- I != Objs.end(); ++I)
- HasHazard |= updateDefsUses(*I, MI.mayStore());
-
+ for (ValueType VT : Objs)
+ HasHazard |= updateDefsUses(VT, MI.mayStore());
return HasHazard;
}
@@ -526,33 +523,32 @@ bool MemDefsUses::updateDefsUses(ValueType V, bool MayStore) {
bool MemDefsUses::
getUnderlyingObjects(const MachineInstr &MI,
SmallVectorImpl<ValueType> &Objects) const {
- if (!MI.hasOneMemOperand() ||
- (!(*MI.memoperands_begin())->getValue() &&
- !(*MI.memoperands_begin())->getPseudoValue()))
+ if (!MI.hasOneMemOperand())
return false;
- if (const PseudoSourceValue *PSV =
- (*MI.memoperands_begin())->getPseudoValue()) {
+ auto & MMO = **MI.memoperands_begin();
+
+ if (const PseudoSourceValue *PSV = MMO.getPseudoValue()) {
if (!PSV->isAliased(MFI))
return false;
Objects.push_back(PSV);
return true;
}
- const Value *V = (*MI.memoperands_begin())->getValue();
+ if (const Value *V = MMO.getValue()) {
+ SmallVector<const Value *, 4> Objs;
+ GetUnderlyingObjects(V, Objs, DL);
- SmallVector<Value *, 4> Objs;
- GetUnderlyingObjects(const_cast<Value *>(V), Objs, DL);
+ for (const Value *UValue : Objs) {
+ if (!isIdentifiedObject(V))
+ return false;
- for (SmallVectorImpl<Value *>::iterator I = Objs.begin(), E = Objs.end();
- I != E; ++I) {
- if (!isIdentifiedObject(V))
- return false;
-
- Objects.push_back(*I);
+ Objects.push_back(UValue);
+ }
+ return true;
}
- return true;
+ return false;
}
// Replace Branch with the compact branch instruction.
@@ -726,6 +722,7 @@ bool MipsDelaySlotFiller::searchRange(MachineBasicBlock &MBB, IterTy Begin,
// but we don't have enough information to make that decision.
if (InMicroMipsMode && TII->getInstSizeInBytes(*CurrI) == 2 &&
(Opcode == Mips::JR || Opcode == Mips::PseudoIndirectBranch ||
+ Opcode == Mips::PseudoIndirectBranch_MM ||
Opcode == Mips::PseudoReturn || Opcode == Mips::TAILCALL))
continue;
// Instructions LWP/SWP and MOVEP should not be in a delay slot as that
diff --git a/lib/Target/Mips/MipsEVAInstrFormats.td b/lib/Target/Mips/MipsEVAInstrFormats.td
index 61785d0e891a..9820e4dcfc88 100644
--- a/lib/Target/Mips/MipsEVAInstrFormats.td
+++ b/lib/Target/Mips/MipsEVAInstrFormats.td
@@ -1,9 +1,8 @@
//===- MipsEVAInstrFormats.td - Mips Instruction Formats ---*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Mips/MipsEVAInstrInfo.td b/lib/Target/Mips/MipsEVAInstrInfo.td
index ff54b1f17877..73cca8cfa5d9 100644
--- a/lib/Target/Mips/MipsEVAInstrInfo.td
+++ b/lib/Target/Mips/MipsEVAInstrInfo.td
@@ -1,9 +1,8 @@
//===- MipsEVAInstrInfo.td - EVA ASE instructions -*- tablegen ------------*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Mips/MipsExpandPseudo.cpp b/lib/Target/Mips/MipsExpandPseudo.cpp
index acf66d1fb1b2..65d84a6c44a0 100644
--- a/lib/Target/Mips/MipsExpandPseudo.cpp
+++ b/lib/Target/Mips/MipsExpandPseudo.cpp
@@ -1,9 +1,8 @@
//===-- MipsExpandPseudoInsts.cpp - Expand pseudo instructions ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Mips/MipsFastISel.cpp b/lib/Target/Mips/MipsFastISel.cpp
index 22ade31a72cd..123d3cc242f0 100644
--- a/lib/Target/Mips/MipsFastISel.cpp
+++ b/lib/Target/Mips/MipsFastISel.cpp
@@ -1,9 +1,8 @@
//===- MipsFastISel.cpp - Mips FastISel implementation --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -56,6 +55,7 @@
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSymbol.h"
@@ -75,6 +75,8 @@
using namespace llvm;
+extern cl::opt<bool> EmitJalrReloc;
+
namespace {
class MipsFastISel final : public FastISel {
@@ -951,21 +953,34 @@ bool MipsFastISel::selectBranch(const Instruction *I) {
//
MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
- // For now, just try the simplest case where it's fed by a compare.
+
+ // Fold the common case of a conditional branch with a comparison
+ // in the same block.
+ unsigned ZExtCondReg = 0;
if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
- MVT CIMVT =
- TLI.getValueType(DL, CI->getOperand(0)->getType(), true).getSimpleVT();
- if (CIMVT == MVT::i1)
+ if (CI->hasOneUse() && CI->getParent() == I->getParent()) {
+ ZExtCondReg = createResultReg(&Mips::GPR32RegClass);
+ if (!emitCmp(ZExtCondReg, CI))
+ return false;
+ }
+ }
+
+ // For the general case, we need to mask with 1.
+ if (ZExtCondReg == 0) {
+ unsigned CondReg = getRegForValue(BI->getCondition());
+ if (CondReg == 0)
return false;
- unsigned CondReg = getRegForValue(CI);
- BuildMI(*BrBB, FuncInfo.InsertPt, DbgLoc, TII.get(Mips::BGTZ))
- .addReg(CondReg)
- .addMBB(TBB);
- finishCondBranch(BI->getParent(), TBB, FBB);
- return true;
+ ZExtCondReg = emitIntExt(MVT::i1, CondReg, MVT::i32, true);
+ if (ZExtCondReg == 0)
+ return false;
}
- return false;
+
+ BuildMI(*BrBB, FuncInfo.InsertPt, DbgLoc, TII.get(Mips::BGTZ))
+ .addReg(ZExtCondReg)
+ .addMBB(TBB);
+ finishCondBranch(BI->getParent(), TBB, FBB);
+ return true;
}
bool MipsFastISel::selectCmp(const Instruction *I) {
@@ -1551,6 +1566,16 @@ bool MipsFastISel::fastLowerCall(CallLoweringInfo &CLI) {
CLI.Call = MIB;
+ if (EmitJalrReloc && !Subtarget->inMips16Mode()) {
+ // Attach callee address to the instruction, let asm printer emit
+ // .reloc R_MIPS_JALR.
+ if (Symbol)
+ MIB.addSym(Symbol, MipsII::MO_JALR);
+ else
+ MIB.addSym(FuncInfo.MF->getContext().getOrCreateSymbol(
+ Addr.getGlobalValue()->getName()), MipsII::MO_JALR);
+ }
+
// Finish off the call including any return values.
return finishCall(CLI, RetVT, NumBytes);
}
diff --git a/lib/Target/Mips/MipsFrameLowering.cpp b/lib/Target/Mips/MipsFrameLowering.cpp
index 27a85970da6f..8d5eabf59b71 100644
--- a/lib/Target/Mips/MipsFrameLowering.cpp
+++ b/lib/Target/Mips/MipsFrameLowering.cpp
@@ -1,9 +1,8 @@
//===-- MipsFrameLowering.cpp - Mips Frame Information --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Mips/MipsFrameLowering.h b/lib/Target/Mips/MipsFrameLowering.h
index 0ead56eddd2f..0537cfd1cb30 100644
--- a/lib/Target/Mips/MipsFrameLowering.h
+++ b/lib/Target/Mips/MipsFrameLowering.h
@@ -1,9 +1,8 @@
//===-- MipsFrameLowering.h - Define frame lowering for Mips ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
index f99f3a1b3e0a..9ba54d6bb73c 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -1,9 +1,8 @@
//===-- MipsISelDAGToDAG.cpp - A Dag to Dag Inst Selector for Mips --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.h b/lib/Target/Mips/MipsISelDAGToDAG.h
index 09003459d180..bae3bbf71f3b 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.h
+++ b/lib/Target/Mips/MipsISelDAGToDAG.h
@@ -1,9 +1,8 @@
//===---- MipsISelDAGToDAG.h - A Dag to Dag Inst Selector for Mips --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index 8c2a364cdfa9..0ff09007da4b 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -1,9 +1,8 @@
//===- MipsISelLowering.cpp - Mips DAG Lowering Implementation ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -13,8 +12,8 @@
//===----------------------------------------------------------------------===//
#include "MipsISelLowering.h"
-#include "InstPrinter/MipsInstPrinter.h"
#include "MCTargetDesc/MipsBaseInfo.h"
+#include "MCTargetDesc/MipsInstPrinter.h"
#include "MCTargetDesc/MipsMCTargetDesc.h"
#include "MipsCCState.h"
#include "MipsInstrInfo.h"
@@ -57,6 +56,7 @@
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CodeGen.h"
@@ -91,6 +91,8 @@ NoZeroDivCheck("mno-check-zero-division", cl::Hidden,
cl::desc("MIPS: Don't trap on integer division by zero."),
cl::init(false));
+extern cl::opt<bool> EmitJalrReloc;
+
static const MCPhysReg Mips64DPRegs[8] = {
Mips::D12_64, Mips::D13_64, Mips::D14_64, Mips::D15_64,
Mips::D16_64, Mips::D17_64, Mips::D18_64, Mips::D19_64
@@ -362,6 +364,11 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM,
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+ if (!(TM.Options.NoNaNsFPMath || Subtarget.inAbs2008Mode())) {
+ setOperationAction(ISD::FABS, MVT::f32, Custom);
+ setOperationAction(ISD::FABS, MVT::f64, Custom);
+ }
+
if (Subtarget.isGP64bit()) {
setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
@@ -1183,14 +1190,22 @@ bool MipsTargetLowering::isCheapToSpeculateCtlz() const {
return Subtarget.hasMips32();
}
+bool MipsTargetLowering::shouldFoldConstantShiftPairToMask(
+ const SDNode *N, CombineLevel Level) const {
+ if (N->getOperand(0).getValueType().isVector())
+ return false;
+ return true;
+}
+
void
MipsTargetLowering::LowerOperationWrapper(SDNode *N,
SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const {
SDValue Res = LowerOperation(SDValue(N, 0), DAG);
- for (unsigned I = 0, E = Res->getNumValues(); I != E; ++I)
- Results.push_back(Res.getValue(I));
+ if (Res)
+ for (unsigned I = 0, E = Res->getNumValues(); I != E; ++I)
+ Results.push_back(Res.getValue(I));
}
void
@@ -1216,6 +1231,7 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const
case ISD::VASTART: return lowerVASTART(Op, DAG);
case ISD::VAARG: return lowerVAARG(Op, DAG);
case ISD::FCOPYSIGN: return lowerFCOPYSIGN(Op, DAG);
+ case ISD::FABS: return lowerFABS(Op, DAG);
case ISD::FRAMEADDR: return lowerFRAMEADDR(Op, DAG);
case ISD::RETURNADDR: return lowerRETURNADDR(Op, DAG);
case ISD::EH_RETURN: return lowerEH_RETURN(Op, DAG);
@@ -1709,7 +1725,7 @@ MipsTargetLowering::emitAtomicCmpSwap(MachineInstr &MI,
assert((MI.getOpcode() == Mips::ATOMIC_CMP_SWAP_I32 ||
MI.getOpcode() == Mips::ATOMIC_CMP_SWAP_I64) &&
- "Unsupported atomic psseudo for EmitAtomicCmpSwap.");
+ "Unsupported atomic pseudo for EmitAtomicCmpSwap.");
const unsigned Size = MI.getOpcode() == Mips::ATOMIC_CMP_SWAP_I32 ? 4 : 8;
@@ -1735,12 +1751,10 @@ MipsTargetLowering::emitAtomicCmpSwap(MachineInstr &MI,
// after fast register allocation, the spills will end up outside of the
// blocks that their values are defined in, causing livein errors.
- unsigned DestCopy = MRI.createVirtualRegister(MRI.getRegClass(Dest));
unsigned PtrCopy = MRI.createVirtualRegister(MRI.getRegClass(Ptr));
unsigned OldValCopy = MRI.createVirtualRegister(MRI.getRegClass(OldVal));
unsigned NewValCopy = MRI.createVirtualRegister(MRI.getRegClass(NewVal));
- BuildMI(*BB, II, DL, TII->get(Mips::COPY), DestCopy).addReg(Dest);
BuildMI(*BB, II, DL, TII->get(Mips::COPY), PtrCopy).addReg(Ptr);
BuildMI(*BB, II, DL, TII->get(Mips::COPY), OldValCopy).addReg(OldVal);
BuildMI(*BB, II, DL, TII->get(Mips::COPY), NewValCopy).addReg(NewVal);
@@ -2293,11 +2307,79 @@ MipsTargetLowering::lowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
return lowerFCOPYSIGN32(Op, DAG, Subtarget.hasExtractInsert());
}
+static SDValue lowerFABS32(SDValue Op, SelectionDAG &DAG,
+ bool HasExtractInsert) {
+ SDLoc DL(Op);
+ SDValue Res, Const1 = DAG.getConstant(1, DL, MVT::i32);
+
+ // If operand is of type f64, extract the upper 32-bit. Otherwise, bitcast it
+ // to i32.
+ SDValue X = (Op.getValueType() == MVT::f32)
+ ? DAG.getNode(ISD::BITCAST, DL, MVT::i32, Op.getOperand(0))
+ : DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32,
+ Op.getOperand(0), Const1);
+
+ // Clear MSB.
+ if (HasExtractInsert)
+ Res = DAG.getNode(MipsISD::Ins, DL, MVT::i32,
+ DAG.getRegister(Mips::ZERO, MVT::i32),
+ DAG.getConstant(31, DL, MVT::i32), Const1, X);
+ else {
+ // TODO: Provide DAG patterns which transform (and x, cst)
+ // back to a (shl (srl x (clz cst)) (clz cst)) sequence.
+ SDValue SllX = DAG.getNode(ISD::SHL, DL, MVT::i32, X, Const1);
+ Res = DAG.getNode(ISD::SRL, DL, MVT::i32, SllX, Const1);
+ }
+
+ if (Op.getValueType() == MVT::f32)
+ return DAG.getNode(ISD::BITCAST, DL, MVT::f32, Res);
+
+ // FIXME: For mips32r2, the sequence of (BuildPairF64 (ins (ExtractElementF64
+ // Op 1), $zero, 31 1) (ExtractElementF64 Op 0)) and the Op has one use, we
+ // should be able to drop the usage of mfc1/mtc1 and rewrite the register in
+ // place.
+ SDValue LowX =
+ DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, Op.getOperand(0),
+ DAG.getConstant(0, DL, MVT::i32));
+ return DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, LowX, Res);
+}
+
+static SDValue lowerFABS64(SDValue Op, SelectionDAG &DAG,
+ bool HasExtractInsert) {
+ SDLoc DL(Op);
+ SDValue Res, Const1 = DAG.getConstant(1, DL, MVT::i32);
+
+ // Bitcast to integer node.
+ SDValue X = DAG.getNode(ISD::BITCAST, DL, MVT::i64, Op.getOperand(0));
+
+ // Clear MSB.
+ if (HasExtractInsert)
+ Res = DAG.getNode(MipsISD::Ins, DL, MVT::i64,
+ DAG.getRegister(Mips::ZERO_64, MVT::i64),
+ DAG.getConstant(63, DL, MVT::i32), Const1, X);
+ else {
+ SDValue SllX = DAG.getNode(ISD::SHL, DL, MVT::i64, X, Const1);
+ Res = DAG.getNode(ISD::SRL, DL, MVT::i64, SllX, Const1);
+ }
+
+ return DAG.getNode(ISD::BITCAST, DL, MVT::f64, Res);
+}
+
+SDValue MipsTargetLowering::lowerFABS(SDValue Op, SelectionDAG &DAG) const {
+ if ((ABI.IsN32() || ABI.IsN64()) && (Op.getValueType() == MVT::f64))
+ return lowerFABS64(Op, DAG, Subtarget.hasExtractInsert());
+
+ return lowerFABS32(Op, DAG, Subtarget.hasExtractInsert());
+}
+
SDValue MipsTargetLowering::
lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
// check the depth
- assert((cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() == 0) &&
- "Frame address can only be determined for current frame.");
+ if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() != 0) {
+ DAG.getContext()->emitError(
+ "return address can be determined only for current frame");
+ return SDValue();
+ }
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
MFI.setFrameAddressIsTaken(true);
@@ -2314,8 +2396,11 @@ SDValue MipsTargetLowering::lowerRETURNADDR(SDValue Op,
return SDValue();
// check the depth
- assert((cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() == 0) &&
- "Return address can be determined only for current frame.");
+ if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() != 0) {
+ DAG.getContext()->emitError(
+ "return address can be determined only for current frame");
+ return SDValue();
+ }
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
@@ -2879,6 +2964,54 @@ getOpndList(SmallVectorImpl<SDValue> &Ops,
Ops.push_back(InFlag);
}
+void MipsTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
+ SDNode *Node) const {
+ switch (MI.getOpcode()) {
+ default:
+ return;
+ case Mips::JALR:
+ case Mips::JALRPseudo:
+ case Mips::JALR64:
+ case Mips::JALR64Pseudo:
+ case Mips::JALR16_MM:
+ case Mips::JALRC16_MMR6:
+ case Mips::TAILCALLREG:
+ case Mips::TAILCALLREG64:
+ case Mips::TAILCALLR6REG:
+ case Mips::TAILCALL64R6REG:
+ case Mips::TAILCALLREG_MM:
+ case Mips::TAILCALLREG_MMR6: {
+ if (!EmitJalrReloc ||
+ Subtarget.inMips16Mode() ||
+ !isPositionIndependent() ||
+ Node->getNumOperands() < 1 ||
+ Node->getOperand(0).getNumOperands() < 2) {
+ return;
+ }
+ // We are after the callee address, set by LowerCall().
+ // If added to MI, asm printer will emit .reloc R_MIPS_JALR for the
+ // symbol.
+ const SDValue TargetAddr = Node->getOperand(0).getOperand(1);
+ StringRef Sym;
+ if (const GlobalAddressSDNode *G =
+ dyn_cast_or_null<const GlobalAddressSDNode>(TargetAddr)) {
+ Sym = G->getGlobal()->getName();
+ }
+ else if (const ExternalSymbolSDNode *ES =
+ dyn_cast_or_null<const ExternalSymbolSDNode>(TargetAddr)) {
+ Sym = ES->getSymbol();
+ }
+
+ if (Sym.empty())
+ return;
+
+ MachineFunction *MF = MI.getParent()->getParent();
+ MCSymbol *S = MF->getContext().getOrCreateSymbol(Sym);
+ MI.addOperand(MachineOperand::CreateMCSymbol(S, MipsII::MO_JALR));
+ }
+ }
+}
+
/// LowerCall - functions arguments are copied from virtual regs to
/// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
SDValue
@@ -2930,7 +3063,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// the maximum out going argument area (including the reserved area), and
// preallocates the stack space on entrance to the caller.
//
- // FIXME: We should do the same for efficency and space.
+ // FIXME: We should do the same for efficiency and space.
// Note: The check on the calling convention below must match
// MipsABIInfo::GetCalleeAllocdArgSizeInBytes().
@@ -4007,18 +4140,18 @@ MipsTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
return false;
}
-EVT MipsTargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
- unsigned SrcAlign,
- bool IsMemset, bool ZeroMemset,
- bool MemcpyStrSrc,
- MachineFunction &MF) const {
+EVT MipsTargetLowering::getOptimalMemOpType(
+ uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset,
+ bool ZeroMemset, bool MemcpyStrSrc,
+ const AttributeList &FuncAttributes) const {
if (Subtarget.hasMips64())
return MVT::i64;
return MVT::i32;
}
-bool MipsTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+bool MipsTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
+ bool ForCodeSize) const {
if (VT != MVT::f32 && VT != MVT::f64)
return false;
if (Imm.isNegZero())
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index e043f133a09f..2db60e9801f1 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -1,9 +1,8 @@
//===- MipsISelLowering.h - Mips DAG Lowering Interface ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -285,6 +284,8 @@ class TargetRegisterClass;
bool isCheapToSpeculateCttz() const override;
bool isCheapToSpeculateCtlz() const override;
+ bool shouldFoldConstantShiftPairToMask(const SDNode *N,
+ CombineLevel Level) const override;
/// Return the register type for a given MVT, ensuring vectors are treated
/// as a series of gpr sized integers.
@@ -341,6 +342,9 @@ class TargetRegisterClass;
EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *MBB) const override;
+ void AdjustInstrPostInstrSelection(MachineInstr &MI,
+ SDNode *Node) const override;
+
void HandleByVal(CCState *, unsigned &, unsigned) const override;
unsigned getRegisterByName(const char* RegName, EVT VT,
@@ -649,9 +653,11 @@ class TargetRegisterClass;
unsigned
getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
+ if (ConstraintCode == "o")
+ return InlineAsm::Constraint_o;
if (ConstraintCode == "R")
return InlineAsm::Constraint_R;
- else if (ConstraintCode == "ZC")
+ if (ConstraintCode == "ZC")
return InlineAsm::Constraint_ZC;
return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
}
@@ -666,12 +672,13 @@ class TargetRegisterClass;
unsigned SrcAlign,
bool IsMemset, bool ZeroMemset,
bool MemcpyStrSrc,
- MachineFunction &MF) const override;
+ const AttributeList &FuncAttributes) const override;
/// isFPImmLegal - Returns true if the target can instruction select the
/// specified FP immediate natively. If false, the legalizer will
/// materialize the FP immediate as a load from a constant pool.
- bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
+ bool isFPImmLegal(const APFloat &Imm, EVT VT,
+ bool ForCodeSize) const override;
unsigned getJumpTableEncoding() const override;
bool useSoftFloat() const override;
diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td
index 4cb8574e08f6..e94e107e64c2 100644
--- a/lib/Target/Mips/MipsInstrFPU.td
+++ b/lib/Target/Mips/MipsInstrFPU.td
@@ -1,9 +1,8 @@
//===-- MipsInstrFPU.td - Mips FPU Instruction Information -*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -143,7 +142,7 @@ multiclass ABSS_M<string opstr, InstrItinClass Itin,
SDPatternOperator OpNode= null_frag> {
def _D32 : MMRel, ABSS_FT<opstr, AFGR64Opnd, AFGR64Opnd, Itin, OpNode>,
FGR_32;
- def _D64 : ABSS_FT<opstr, FGR64Opnd, FGR64Opnd, Itin, OpNode>, FGR_64 {
+ def _D64 : StdMMR6Rel, ABSS_FT<opstr, FGR64Opnd, FGR64Opnd, Itin, OpNode>, FGR_64 {
string DecoderNamespace = "MipsFP64";
}
}
@@ -487,7 +486,7 @@ let isPseudo = 1, isCodeGenOnly = 1 in {
def PseudoCVT_D64_L : ABSS_FT<"", FGR64Opnd, GPR64Opnd, II_CVT>;
}
-let AdditionalPredicates = [NotInMicroMips] in {
+let AdditionalPredicates = [NotInMicroMips, UseAbs] in {
def FABS_S : MMRel, ABSS_FT<"abs.s", FGR32Opnd, FGR32Opnd, II_ABS, fabs>,
ABSS_FM<0x5, 16>, ISA_MIPS1;
defm FABS : ABSS_M<"abs.d", II_ABS, fabs>, ABSS_FM<0x5, 17>, ISA_MIPS1;
@@ -551,12 +550,7 @@ let AdditionalPredicates = [NotInMicroMips] in {
let isMoveReg = 1 in {
def FMOV_S : MMRel, ABSS_FT<"mov.s", FGR32Opnd, FGR32Opnd, II_MOV_S>,
ABSS_FM<0x6, 16>, ISA_MIPS1;
- def FMOV_D32 : MMRel, ABSS_FT<"mov.d", AFGR64Opnd, AFGR64Opnd, II_MOV_D>,
- ABSS_FM<0x6, 17>, ISA_MIPS1, FGR_32;
- def FMOV_D64 : ABSS_FT<"mov.d", FGR64Opnd, FGR64Opnd, II_MOV_D>,
- ABSS_FM<0x6, 17>, ISA_MIPS1, FGR_64 {
- let DecoderNamespace = "MipsFP64";
- }
+ defm FMOV : ABSS_M<"mov.d", II_MOV_D>, ABSS_FM<0x6, 17>, ISA_MIPS1;
} // isMoveReg
}
@@ -793,6 +787,11 @@ def LoadImmDoubleFGR : MipsAsmPseudoInst<(outs StrictlyFGR64Opnd:$rd),
"li.d\t$rd, $fpimm">,
FGR_64, HARDFLOAT;
+def SDC1_M1 : MipsAsmPseudoInst<(outs AFGR64Opnd:$fd),
+ (ins mem_simm16:$addr),
+ "s.d\t$fd, $addr">,
+ FGR_32, ISA_MIPS1, HARDFLOAT;
+
//===----------------------------------------------------------------------===//
// InstAliases.
//===----------------------------------------------------------------------===//
@@ -805,6 +804,9 @@ def : MipsInstAlias
def : MipsInstAlias
<"s.d $fd, $addr", (SDC164 FGR64Opnd:$fd, mem_simm16:$addr), 0>,
FGR_64, ISA_MIPS2, HARDFLOAT;
+def : MipsInstAlias
+ <"s.d $fd, $addr", (SDC1_M1 AFGR64Opnd:$fd, mem_simm16:$addr), 0>,
+ FGR_32, ISA_MIPS1, HARDFLOAT;
def : MipsInstAlias
<"l.s $fd, $addr", (LWC1 FGR32Opnd:$fd, mem_simm16:$addr), 0>,
diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td
index ebbdcdf0df89..14f01514f33f 100644
--- a/lib/Target/Mips/MipsInstrFormats.td
+++ b/lib/Target/Mips/MipsInstrFormats.td
@@ -1,9 +1,8 @@
//===-- MipsInstrFormats.td - Mips Instruction Formats -----*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -146,6 +145,7 @@ class PseudoSE<dag outs, dag ins, list<dag> pattern,
class MipsAsmPseudoInst<dag outs, dag ins, string asmstr>:
MipsInst<outs, ins, asmstr, [], IIPseudo, Pseudo> {
let isPseudo = 1;
+ let hasNoSchedulingInfo = 1;
let Pattern = [];
}
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp
index bfb4c775205d..fbd56206b249 100644
--- a/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/lib/Target/Mips/MipsInstrInfo.cpp
@@ -1,9 +1,8 @@
//===- MipsInstrInfo.cpp - Mips Instruction Information -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -578,7 +577,8 @@ unsigned MipsInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
switch (MI.getOpcode()) {
default:
return MI.getDesc().getSize();
- case TargetOpcode::INLINEASM: { // Inline Asm: Variable size.
+ case TargetOpcode::INLINEASM:
+ case TargetOpcode::INLINEASM_BR: { // Inline Asm: Variable size.
const MachineFunction *MF = MI.getParent()->getParent();
const char *AsmStr = MI.getOperand(0).getSymbolName();
return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
@@ -653,6 +653,16 @@ MipsInstrInfo::genInstrWithNewOpc(unsigned NewOpc,
MIB.addImm(0);
+ // If I has an MCSymbol operand (used by asm printer, to emit R_MIPS_JALR),
+ // add it to the new instruction.
+ for (unsigned J = I->getDesc().getNumOperands(), E = I->getNumOperands();
+ J < E; ++J) {
+ const MachineOperand &MO = I->getOperand(J);
+ if (MO.isMCSymbol() && (MO.getTargetFlags() & MipsII::MO_JALR))
+ MIB.addSym(MO.getMCSymbol(), MipsII::MO_JALR);
+ }
+
+
} else {
for (unsigned J = 0, E = I->getDesc().getNumOperands(); J < E; ++J) {
if (BranchWithZeroOperand && (unsigned)ZeroOperandPosition == J)
@@ -825,7 +835,8 @@ MipsInstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
{MO_GOT_HI16, "mips-got-hi16"},
{MO_GOT_LO16, "mips-got-lo16"},
{MO_CALL_HI16, "mips-call-hi16"},
- {MO_CALL_LO16, "mips-call-lo16"}
+ {MO_CALL_LO16, "mips-call-lo16"},
+ {MO_JALR, "mips-jalr"}
};
return makeArrayRef(Flags);
}
diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h
index 9d27b8f66211..a626c0c3fdb8 100644
--- a/lib/Target/Mips/MipsInstrInfo.h
+++ b/lib/Target/Mips/MipsInstrInfo.h
@@ -1,9 +1,8 @@
//===- MipsInstrInfo.h - Mips Instruction Information -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index d9398b7d6024..a4e85a38ab28 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -1,9 +1,8 @@
//===- MipsInstrInfo.td - Target Description for Mips Target -*- tablegen -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -221,6 +220,8 @@ def IsNotN64 : Predicate<"!Subtarget->isABI_N64()">;
def RelocNotPIC : Predicate<"!TM.isPositionIndependent()">;
def RelocPIC : Predicate<"TM.isPositionIndependent()">;
def NoNaNsFPMath : Predicate<"TM.Options.NoNaNsFPMath">;
+def UseAbs : Predicate<"Subtarget->inAbs2008Mode() ||"
+ "TM.Options.NoNaNsFPMath">;
def HasStdEnc : Predicate<"Subtarget->hasStandardEncoding()">,
AssemblerPredicate<"!FeatureMips16">;
def NotDSP : Predicate<"!Subtarget->hasDSP()">;
@@ -1623,11 +1624,15 @@ let isCall=1, hasDelaySlot=1, isCTI=1, Defs = [RA] in {
class JumpLinkRegPseudo<RegisterOperand RO, Instruction JALRInst,
Register RetReg, RegisterOperand ResRO = RO>:
PseudoSE<(outs), (ins RO:$rs), [(MipsJmpLink RO:$rs)], II_JALR>,
- PseudoInstExpansion<(JALRInst RetReg, ResRO:$rs)>;
+ PseudoInstExpansion<(JALRInst RetReg, ResRO:$rs)> {
+ let hasPostISelHook = 1;
+ }
class JumpLinkReg<string opstr, RegisterOperand RO>:
InstSE<(outs RO:$rd), (ins RO:$rs), !strconcat(opstr, "\t$rd, $rs"),
- [], II_JALR, FrmR, opstr>;
+ [], II_JALR, FrmR, opstr> {
+ let hasPostISelHook = 1;
+ }
class BGEZAL_FT<string opstr, DAGOperand opnd,
RegisterOperand RO> :
@@ -1646,7 +1651,9 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, hasDelaySlot = 1,
class TailCallReg<Instruction JumpInst, RegisterOperand RO> :
PseudoSE<(outs), (ins RO:$rs), [(MipsTailCall RO:$rs)], II_JR>,
- PseudoInstExpansion<(JumpInst RO:$rs)>;
+ PseudoInstExpansion<(JumpInst RO:$rs)> {
+ let hasPostISelHook = 1;
+ }
}
class BAL_BR_Pseudo<Instruction RealInst, DAGOperand opnd> :
@@ -1844,7 +1851,9 @@ class InsBase<string opstr, RegisterOperand RO, Operand PosOpnd,
// Atomic instructions with 2 source operands (ATOMIC_SWAP & ATOMIC_LOAD_*).
class Atomic2Ops<PatFrag Op, RegisterClass DRC> :
PseudoSE<(outs DRC:$dst), (ins PtrRC:$ptr, DRC:$incr),
- [(set DRC:$dst, (Op iPTR:$ptr, DRC:$incr))]>;
+ [(set DRC:$dst, (Op iPTR:$ptr, DRC:$incr))]> {
+ let hasNoSchedulingInfo = 1;
+}
class Atomic2OpsPostRA<RegisterClass RC> :
PseudoSE<(outs RC:$dst), (ins PtrRC:$ptr, RC:$incr), []> {
@@ -1861,7 +1870,9 @@ class Atomic2OpsSubwordPostRA<RegisterClass RC> :
// during ISelLowering, which produces the PostRA version of this instruction.
class AtomicCmpSwap<PatFrag Op, RegisterClass DRC> :
PseudoSE<(outs DRC:$dst), (ins PtrRC:$ptr, DRC:$cmp, DRC:$swap),
- [(set DRC:$dst, (Op iPTR:$ptr, DRC:$cmp, DRC:$swap))]>;
+ [(set DRC:$dst, (Op iPTR:$ptr, DRC:$cmp, DRC:$swap))]> {
+ let hasNoSchedulingInfo = 1;
+}
class AtomicCmpSwapPostRA<RegisterClass RC> :
PseudoSE<(outs RC:$dst), (ins PtrRC:$ptr, RC:$cmp, RC:$swap), []> {
@@ -1876,7 +1887,6 @@ class AtomicCmpSwapSubwordPostRA<RegisterClass RC> :
let mayStore = 1;
}
-
class LLBase<string opstr, RegisterOperand RO, DAGOperand MO = mem> :
InstSE<(outs RO:$rt), (ins MO:$addr), !strconcat(opstr, "\t$rt, $addr"),
[], II_LL, FrmI, opstr> {
@@ -1928,7 +1938,7 @@ let isReturn=1, isTerminator=1, isBarrier=1, hasCtrlDep=1, isCTI=1 in {
def ERet : PseudoSE<(outs), (ins), [(MipsERet)]>;
}
-let Defs = [SP], Uses = [SP], hasSideEffects = 1 in {
+let Defs = [SP], Uses = [SP], hasSideEffects = 1, hasNoSchedulingInfo = 1 in {
def ADJCALLSTACKDOWN : MipsPseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
[(callseq_start timm:$amt1, timm:$amt2)]>;
def ADJCALLSTACKUP : MipsPseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
@@ -2004,17 +2014,25 @@ let isPseudo = 1, isCodeGenOnly = 1, hasNoSchedulingInfo = 1 in {
// Expands to: lui $dst, %highest/%higher/%hi/%lo($tgt - $baltgt)
def LONG_BRANCH_LUi : PseudoSE<(outs GPR32Opnd:$dst),
- (ins brtarget:$tgt, brtarget:$baltgt), []>;
+ (ins brtarget:$tgt, brtarget:$baltgt), []> {
+ bit hasNoSchedulingInfo = 1;
+}
// Expands to: lui $dst, highest/%higher/%hi/%lo($tgt)
def LONG_BRANCH_LUi2Op : PseudoSE<(outs GPR32Opnd:$dst),
- (ins brtarget:$tgt), []>;
+ (ins brtarget:$tgt), []> {
+ bit hasNoSchedulingInfo = 1;
+}
// Expands to: addiu $dst, $src, %highest/%higher/%hi/%lo($tgt - $baltgt)
def LONG_BRANCH_ADDiu : PseudoSE<(outs GPR32Opnd:$dst),
- (ins GPR32Opnd:$src, brtarget:$tgt, brtarget:$baltgt), []>;
+ (ins GPR32Opnd:$src, brtarget:$tgt, brtarget:$baltgt), []> {
+ bit hasNoSchedulingInfo = 1;
+}
// Expands to: addiu $dst, $src, %highest/%higher/%hi/%lo($tgt)
def LONG_BRANCH_ADDiu2Op : PseudoSE<(outs GPR32Opnd:$dst),
- (ins GPR32Opnd:$src, brtarget:$tgt), []>;
+ (ins GPR32Opnd:$src, brtarget:$tgt), []> {
+ bit hasNoSchedulingInfo = 1;
+}
//===----------------------------------------------------------------------===//
// Instruction definition
@@ -2117,7 +2135,7 @@ let AdditionalPredicates = [NotInMicroMips] in {
LW_FM<0x28>, ISA_MIPS1;
def SH : Store<"sh", GPR32Opnd, truncstorei16, II_SH>, MMRel, LW_FM<0x29>,
ISA_MIPS1;
- def SW : Store<"sw", GPR32Opnd, store, II_SW>, MMRel, LW_FM<0x2b>, ISA_MIPS1;
+ def SW : StdMMR6Rel, Store<"sw", GPR32Opnd, store, II_SW>, MMRel, LW_FM<0x2b>, ISA_MIPS1;
}
/// load/store left/right
@@ -2324,12 +2342,12 @@ def SDT_MipsEHRET : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisPtrTy<1>]>;
def MIPSehret : SDNode<"MipsISD::EH_RETURN", SDT_MipsEHRET,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
-let Uses = [V0, V1], isTerminator = 1, isReturn = 1, isBarrier = 1, isCTI = 1 in {
+let Uses = [V0, V1], isTerminator = 1, isReturn = 1,
+ isBarrier = 1, isCTI = 1, hasNoSchedulingInfo = 1 in {
def MIPSeh_return32 : MipsPseudo<(outs), (ins GPR32:$spoff, GPR32:$dst),
- [(MIPSehret GPR32:$spoff, GPR32:$dst)]>;
- def MIPSeh_return64 : MipsPseudo<(outs), (ins GPR64:$spoff,
- GPR64:$dst),
- [(MIPSehret GPR64:$spoff, GPR64:$dst)]>;
+ [(MIPSehret GPR32:$spoff, GPR32:$dst)]>;
+ def MIPSeh_return64 : MipsPseudo<(outs), (ins GPR64:$spoff, GPR64:$dst),
+ [(MIPSehret GPR64:$spoff, GPR64:$dst)]>;
}
/// Multiply and Divide Instructions.
@@ -2675,18 +2693,64 @@ let AdditionalPredicates = [NotInMicroMips] in {
(SUBu GPR32Opnd:$rt, ZERO, GPR32Opnd:$rs), 1>, ISA_MIPS1;
def : MipsInstAlias<"negu $rt",
(SUBu GPR32Opnd:$rt, ZERO, GPR32Opnd:$rt), 1>, ISA_MIPS1;
+
+ def SGE : MipsAsmPseudoInst<(outs GPR32Opnd:$rd),
+ (ins GPR32Opnd:$rs, GPR32Opnd:$rt),
+ "sge\t$rd, $rs, $rt">, ISA_MIPS1;
+ def : MipsInstAlias<"sge $rs, $rt",
+ (SGE GPR32Opnd:$rs, GPR32Opnd:$rs, GPR32Opnd:$rt), 0>,
+ ISA_MIPS1;
+ def SGEImm : MipsAsmPseudoInst<(outs GPR32Opnd:$rd),
+ (ins GPR32Opnd:$rs, simm32:$imm),
+ "sge\t$rd, $rs, $imm">, GPR_32;
+ def : MipsInstAlias<"sge $rs, $imm", (SGEImm GPR32Opnd:$rs,
+ GPR32Opnd:$rs,
+ simm32:$imm), 0>,
+ GPR_32;
+
+ def SGEU : MipsAsmPseudoInst<(outs GPR32Opnd:$rd),
+ (ins GPR32Opnd:$rs, GPR32Opnd:$rt),
+ "sgeu\t$rd, $rs, $rt">, ISA_MIPS1;
+ def : MipsInstAlias<"sgeu $rs, $rt",
+ (SGEU GPR32Opnd:$rs, GPR32Opnd:$rs, GPR32Opnd:$rt), 0>,
+ ISA_MIPS1;
+ def SGEUImm : MipsAsmPseudoInst<(outs GPR32Opnd:$rd),
+ (ins GPR32Opnd:$rs, uimm32_coerced:$imm),
+ "sgeu\t$rd, $rs, $imm">, GPR_32;
+ def : MipsInstAlias<"sgeu $rs, $imm", (SGEUImm GPR32Opnd:$rs,
+ GPR32Opnd:$rs,
+ uimm32_coerced:$imm), 0>,
+ GPR_32;
+
def : MipsInstAlias<
"sgt $rd, $rs, $rt",
(SLT GPR32Opnd:$rd, GPR32Opnd:$rt, GPR32Opnd:$rs), 0>, ISA_MIPS1;
def : MipsInstAlias<
"sgt $rs, $rt",
(SLT GPR32Opnd:$rs, GPR32Opnd:$rt, GPR32Opnd:$rs), 0>, ISA_MIPS1;
+
+ def SGTImm : MipsAsmPseudoInst<(outs GPR32Opnd:$rd),
+ (ins GPR32Opnd:$rs, simm32:$imm),
+ "sgt\t$rd, $rs, $imm">, GPR_32;
+ def : MipsInstAlias<"sgt $rs, $imm", (SGTImm GPR32Opnd:$rs,
+ GPR32Opnd:$rs,
+ simm32:$imm), 0>,
+ GPR_32;
def : MipsInstAlias<
"sgtu $rd, $rs, $rt",
(SLTu GPR32Opnd:$rd, GPR32Opnd:$rt, GPR32Opnd:$rs), 0>, ISA_MIPS1;
def : MipsInstAlias<
"sgtu $$rs, $rt",
(SLTu GPR32Opnd:$rs, GPR32Opnd:$rt, GPR32Opnd:$rs), 0>, ISA_MIPS1;
+
+ def SGTUImm : MipsAsmPseudoInst<(outs GPR32Opnd:$rd),
+ (ins GPR32Opnd:$rs, uimm32_coerced:$imm),
+ "sgtu\t$rd, $rs, $imm">, GPR_32;
+ def : MipsInstAlias<"sgtu $rs, $imm", (SGTUImm GPR32Opnd:$rs,
+ GPR32Opnd:$rs,
+ uimm32_coerced:$imm), 0>,
+ GPR_32;
+
def : MipsInstAlias<
"not $rt, $rs",
(NOR GPR32Opnd:$rt, GPR32Opnd:$rs, ZERO), 0>, ISA_MIPS1;
@@ -2737,14 +2801,14 @@ let AdditionalPredicates = [NotInMicroMips] in {
def : MipsInstAlias<"bnez $rs,$offset",
(BNE GPR32Opnd:$rs, ZERO, brtarget:$offset), 0>,
ISA_MIPS1;
- def : MipsInstAlias<"bnezl $rs,$offset",
- (BNEL GPR32Opnd:$rs, ZERO, brtarget:$offset), 0>,
+ def : MipsInstAlias<"bnezl $rs, $offset",
+ (BNEL GPR32Opnd:$rs, ZERO, brtarget:$offset), 1>,
ISA_MIPS2;
def : MipsInstAlias<"beqz $rs,$offset",
(BEQ GPR32Opnd:$rs, ZERO, brtarget:$offset), 0>,
ISA_MIPS1;
- def : MipsInstAlias<"beqzl $rs,$offset",
- (BEQL GPR32Opnd:$rs, ZERO, brtarget:$offset), 0>,
+ def : MipsInstAlias<"beqzl $rs, $offset",
+ (BEQL GPR32Opnd:$rs, ZERO, brtarget:$offset), 1>,
ISA_MIPS2;
def : MipsInstAlias<"syscall", (SYSCALL 0), 1>, ISA_MIPS1;
diff --git a/lib/Target/Mips/MipsInstructionSelector.cpp b/lib/Target/Mips/MipsInstructionSelector.cpp
index b041590ee343..45a47ad3c087 100644
--- a/lib/Target/Mips/MipsInstructionSelector.cpp
+++ b/lib/Target/Mips/MipsInstructionSelector.cpp
@@ -1,9 +1,8 @@
//===- MipsInstructionSelector.cpp ------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -12,6 +11,8 @@
/// \todo This should be generated by TableGen.
//===----------------------------------------------------------------------===//
+#include "MCTargetDesc/MipsInstPrinter.h"
+#include "MipsMachineFunction.h"
#include "MipsRegisterBankInfo.h"
#include "MipsTargetMachine.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
@@ -37,6 +38,12 @@ public:
private:
bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
+ bool materialize32BitImm(Register DestReg, APInt Imm,
+ MachineIRBuilder &B) const;
+ bool selectCopy(MachineInstr &I, MachineRegisterInfo &MRI) const;
+ const TargetRegisterClass *
+ getRegClassForTypeOnBank(unsigned OpSize, const RegisterBank &RB,
+ const RegisterBankInfo &RBI) const;
const MipsTargetMachine &TM;
const MipsSubtarget &STI;
@@ -74,15 +81,24 @@ MipsInstructionSelector::MipsInstructionSelector(
{
}
-static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
- MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
- const RegisterBankInfo &RBI) {
- unsigned DstReg = I.getOperand(0).getReg();
+bool MipsInstructionSelector::selectCopy(MachineInstr &I,
+ MachineRegisterInfo &MRI) const {
+ Register DstReg = I.getOperand(0).getReg();
if (TargetRegisterInfo::isPhysicalRegister(DstReg))
return true;
- const TargetRegisterClass *RC = &Mips::GPR32RegClass;
+ const RegisterBank *RegBank = RBI.getRegBank(DstReg, MRI, TRI);
+ const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
+ const TargetRegisterClass *RC = &Mips::GPR32RegClass;
+ if (RegBank->getID() == Mips::FPRBRegBankID) {
+ if (DstSize == 32)
+ RC = &Mips::FGR32RegClass;
+ else if (DstSize == 64)
+ RC = STI.isFP64bit() ? &Mips::FGR64RegClass : &Mips::AFGR64RegClass;
+ else
+ llvm_unreachable("Unsupported destination size");
+ }
if (!RBI.constrainGenericRegister(DstReg, *RC, MRI)) {
LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
<< " operand\n");
@@ -91,6 +107,102 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
return true;
}
+const TargetRegisterClass *MipsInstructionSelector::getRegClassForTypeOnBank(
+ unsigned OpSize, const RegisterBank &RB,
+ const RegisterBankInfo &RBI) const {
+ if (RB.getID() == Mips::GPRBRegBankID)
+ return &Mips::GPR32RegClass;
+
+ if (RB.getID() == Mips::FPRBRegBankID)
+ return OpSize == 32
+ ? &Mips::FGR32RegClass
+ : STI.hasMips32r6() || STI.isFP64bit() ? &Mips::FGR64RegClass
+ : &Mips::AFGR64RegClass;
+
+ llvm_unreachable("getRegClassForTypeOnBank can't find register class.");
+ return nullptr;
+}
+
+bool MipsInstructionSelector::materialize32BitImm(Register DestReg, APInt Imm,
+ MachineIRBuilder &B) const {
+ assert(Imm.getBitWidth() == 32 && "Unsupported immediate size.");
+ // Ori zero extends immediate. Used for values with zeros in high 16 bits.
+ if (Imm.getHiBits(16).isNullValue()) {
+ MachineInstr *Inst = B.buildInstr(Mips::ORi, {DestReg}, {Register(Mips::ZERO)})
+ .addImm(Imm.getLoBits(16).getLimitedValue());
+ return constrainSelectedInstRegOperands(*Inst, TII, TRI, RBI);
+ }
+ // Lui places immediate in high 16 bits and sets low 16 bits to zero.
+ if (Imm.getLoBits(16).isNullValue()) {
+ MachineInstr *Inst = B.buildInstr(Mips::LUi, {DestReg}, {})
+ .addImm(Imm.getHiBits(16).getLimitedValue());
+ return constrainSelectedInstRegOperands(*Inst, TII, TRI, RBI);
+ }
+ // ADDiu sign extends immediate. Used for values with 1s in high 17 bits.
+ if (Imm.isSignedIntN(16)) {
+ MachineInstr *Inst = B.buildInstr(Mips::ADDiu, {DestReg}, {Register(Mips::ZERO)})
+ .addImm(Imm.getLoBits(16).getLimitedValue());
+ return constrainSelectedInstRegOperands(*Inst, TII, TRI, RBI);
+ }
+ // Values that cannot be materialized with single immediate instruction.
+ Register LUiReg = B.getMRI()->createVirtualRegister(&Mips::GPR32RegClass);
+ MachineInstr *LUi = B.buildInstr(Mips::LUi, {LUiReg}, {})
+ .addImm(Imm.getHiBits(16).getLimitedValue());
+ MachineInstr *ORi = B.buildInstr(Mips::ORi, {DestReg}, {LUiReg})
+ .addImm(Imm.getLoBits(16).getLimitedValue());
+ if (!constrainSelectedInstRegOperands(*LUi, TII, TRI, RBI))
+ return false;
+ if (!constrainSelectedInstRegOperands(*ORi, TII, TRI, RBI))
+ return false;
+ return true;
+}
+
+/// Returning Opc indicates that we failed to select MIPS instruction opcode.
+static unsigned selectLoadStoreOpCode(unsigned Opc, unsigned MemSizeInBytes,
+ unsigned RegBank, bool isFP64) {
+ bool isStore = Opc == TargetOpcode::G_STORE;
+ if (RegBank == Mips::GPRBRegBankID) {
+ if (isStore)
+ switch (MemSizeInBytes) {
+ case 4:
+ return Mips::SW;
+ case 2:
+ return Mips::SH;
+ case 1:
+ return Mips::SB;
+ default:
+ return Opc;
+ }
+ else
+ // Unspecified extending load is selected into zeroExtending load.
+ switch (MemSizeInBytes) {
+ case 4:
+ return Mips::LW;
+ case 2:
+ return Opc == TargetOpcode::G_SEXTLOAD ? Mips::LH : Mips::LHu;
+ case 1:
+ return Opc == TargetOpcode::G_SEXTLOAD ? Mips::LB : Mips::LBu;
+ default:
+ return Opc;
+ }
+ }
+
+ if (RegBank == Mips::FPRBRegBankID) {
+ switch (MemSizeInBytes) {
+ case 4:
+ return isStore ? Mips::SWC1 : Mips::LWC1;
+ case 8:
+ if (isFP64)
+ return isStore ? Mips::SDC164 : Mips::LDC164;
+ else
+ return isStore ? Mips::SDC1 : Mips::LDC1;
+ default:
+ return Opc;
+ }
+ }
+ return Opc;
+}
+
bool MipsInstructionSelector::select(MachineInstr &I,
CodeGenCoverage &CoverageInfo) const {
@@ -100,19 +212,52 @@ bool MipsInstructionSelector::select(MachineInstr &I,
if (!isPreISelGenericOpcode(I.getOpcode())) {
if (I.isCopy())
- return selectCopy(I, TII, MRI, TRI, RBI);
+ return selectCopy(I, MRI);
return true;
}
- if (selectImpl(I, CoverageInfo)) {
+ if (I.getOpcode() == Mips::G_MUL) {
+ MachineInstr *Mul = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::MUL))
+ .add(I.getOperand(0))
+ .add(I.getOperand(1))
+ .add(I.getOperand(2));
+ if (!constrainSelectedInstRegOperands(*Mul, TII, TRI, RBI))
+ return false;
+ Mul->getOperand(3).setIsDead(true);
+ Mul->getOperand(4).setIsDead(true);
+
+ I.eraseFromParent();
return true;
}
+ if (selectImpl(I, CoverageInfo))
+ return true;
+
MachineInstr *MI = nullptr;
using namespace TargetOpcode;
switch (I.getOpcode()) {
+ case G_UMULH: {
+ Register PseudoMULTuReg = MRI.createVirtualRegister(&Mips::ACC64RegClass);
+ MachineInstr *PseudoMULTu, *PseudoMove;
+
+ PseudoMULTu = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::PseudoMULTu))
+ .addDef(PseudoMULTuReg)
+ .add(I.getOperand(1))
+ .add(I.getOperand(2));
+ if (!constrainSelectedInstRegOperands(*PseudoMULTu, TII, TRI, RBI))
+ return false;
+
+ PseudoMove = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::PseudoMFHI))
+ .addDef(I.getOperand(0).getReg())
+ .addUse(PseudoMULTuReg);
+ if (!constrainSelectedInstRegOperands(*PseudoMove, TII, TRI, RBI))
+ return false;
+
+ I.eraseFromParent();
+ return true;
+ }
case G_GEP: {
MI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::ADDu))
.add(I.getOperand(0))
@@ -127,16 +272,46 @@ bool MipsInstructionSelector::select(MachineInstr &I,
.addImm(0);
break;
}
+ case G_BRCOND: {
+ MI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::BNE))
+ .add(I.getOperand(0))
+ .addUse(Mips::ZERO)
+ .add(I.getOperand(1));
+ break;
+ }
+ case G_PHI: {
+ const Register DestReg = I.getOperand(0).getReg();
+ const unsigned OpSize = MRI.getType(DestReg).getSizeInBits();
+
+ const TargetRegisterClass *DefRC = nullptr;
+ if (TargetRegisterInfo::isPhysicalRegister(DestReg))
+ DefRC = TRI.getRegClass(DestReg);
+ else
+ DefRC = getRegClassForTypeOnBank(OpSize,
+ *RBI.getRegBank(DestReg, MRI, TRI), RBI);
+
+ I.setDesc(TII.get(TargetOpcode::PHI));
+ return RBI.constrainGenericRegister(DestReg, *DefRC, MRI);
+ }
case G_STORE:
- case G_LOAD: {
- const unsigned DestReg = I.getOperand(0).getReg();
+ case G_LOAD:
+ case G_ZEXTLOAD:
+ case G_SEXTLOAD: {
+ const Register DestReg = I.getOperand(0).getReg();
const unsigned DestRegBank = RBI.getRegBank(DestReg, MRI, TRI)->getID();
const unsigned OpSize = MRI.getType(DestReg).getSizeInBits();
+ const unsigned OpMemSizeInBytes = (*I.memoperands_begin())->getSize();
- if (DestRegBank != Mips::GPRBRegBankID || OpSize != 32)
+ if (DestRegBank == Mips::GPRBRegBankID && OpSize != 32)
return false;
- const unsigned NewOpc = I.getOpcode() == G_STORE ? Mips::SW : Mips::LW;
+ if (DestRegBank == Mips::FPRBRegBankID && OpSize != 32 && OpSize != 64)
+ return false;
+
+ const unsigned NewOpc = selectLoadStoreOpCode(
+ I.getOpcode(), OpMemSizeInBytes, DestRegBank, STI.isFP64bit());
+ if (NewOpc == I.getOpcode())
+ return false;
MI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(NewOpc))
.add(I.getOperand(0))
@@ -149,7 +324,7 @@ bool MipsInstructionSelector::select(MachineInstr &I,
case G_UREM:
case G_SDIV:
case G_SREM: {
- unsigned HILOReg = MRI.createVirtualRegister(&Mips::ACC64RegClass);
+ Register HILOReg = MRI.createVirtualRegister(&Mips::ACC64RegClass);
bool IsSigned = I.getOpcode() == G_SREM || I.getOpcode() == G_SDIV;
bool IsDiv = I.getOpcode() == G_UDIV || I.getOpcode() == G_SDIV;
@@ -182,58 +357,150 @@ bool MipsInstructionSelector::select(MachineInstr &I,
break;
}
case G_CONSTANT: {
- int Imm = I.getOperand(1).getCImm()->getValue().getLimitedValue();
- unsigned LUiReg = MRI.createVirtualRegister(&Mips::GPR32RegClass);
- MachineInstr *LUi, *ORi;
+ MachineIRBuilder B(I);
+ if (!materialize32BitImm(I.getOperand(0).getReg(),
+ I.getOperand(1).getCImm()->getValue(), B))
+ return false;
- LUi = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::LUi))
- .addDef(LUiReg)
- .addImm(Imm >> 16);
+ I.eraseFromParent();
+ return true;
+ }
+ case G_FCONSTANT: {
+ const APFloat &FPimm = I.getOperand(1).getFPImm()->getValueAPF();
+ APInt APImm = FPimm.bitcastToAPInt();
+ unsigned Size = MRI.getType(I.getOperand(0).getReg()).getSizeInBits();
+
+ if (Size == 32) {
+ Register GPRReg = MRI.createVirtualRegister(&Mips::GPR32RegClass);
+ MachineIRBuilder B(I);
+ if (!materialize32BitImm(GPRReg, APImm, B))
+ return false;
- ORi = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::ORi))
- .addDef(I.getOperand(0).getReg())
- .addUse(LUiReg)
- .addImm(Imm & 0xFFFF);
+ MachineInstrBuilder MTC1 =
+ B.buildInstr(Mips::MTC1, {I.getOperand(0).getReg()}, {GPRReg});
+ if (!MTC1.constrainAllUses(TII, TRI, RBI))
+ return false;
+ }
+ if (Size == 64) {
+ Register GPRRegHigh = MRI.createVirtualRegister(&Mips::GPR32RegClass);
+ Register GPRRegLow = MRI.createVirtualRegister(&Mips::GPR32RegClass);
+ MachineIRBuilder B(I);
+ if (!materialize32BitImm(GPRRegHigh, APImm.getHiBits(32).trunc(32), B))
+ return false;
+ if (!materialize32BitImm(GPRRegLow, APImm.getLoBits(32).trunc(32), B))
+ return false;
+
+ MachineInstrBuilder PairF64 = B.buildInstr(
+ STI.isFP64bit() ? Mips::BuildPairF64_64 : Mips::BuildPairF64,
+ {I.getOperand(0).getReg()}, {GPRRegLow, GPRRegHigh});
+ if (!PairF64.constrainAllUses(TII, TRI, RBI))
+ return false;
+ }
- if (!constrainSelectedInstRegOperands(*LUi, TII, TRI, RBI))
+ I.eraseFromParent();
+ return true;
+ }
+ case G_FABS: {
+ unsigned Size = MRI.getType(I.getOperand(0).getReg()).getSizeInBits();
+ unsigned FABSOpcode =
+ Size == 32 ? Mips::FABS_S
+ : STI.isFP64bit() ? Mips::FABS_D64 : Mips::FABS_D32;
+ MI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(FABSOpcode))
+ .add(I.getOperand(0))
+ .add(I.getOperand(1));
+ break;
+ }
+ case G_FPTOSI: {
+ unsigned FromSize = MRI.getType(I.getOperand(1).getReg()).getSizeInBits();
+ unsigned ToSize = MRI.getType(I.getOperand(0).getReg()).getSizeInBits();
+ (void)ToSize;
+ assert((ToSize == 32) && "Unsupported integer size for G_FPTOSI");
+ assert((FromSize == 32 || FromSize == 64) &&
+ "Unsupported floating point size for G_FPTOSI");
+
+ unsigned Opcode;
+ if (FromSize == 32)
+ Opcode = Mips::TRUNC_W_S;
+ else
+ Opcode = STI.isFP64bit() ? Mips::TRUNC_W_D64 : Mips::TRUNC_W_D32;
+ unsigned ResultInFPR = MRI.createVirtualRegister(&Mips::FGR32RegClass);
+ MachineInstr *Trunc = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Opcode))
+ .addDef(ResultInFPR)
+ .addUse(I.getOperand(1).getReg());
+ if (!constrainSelectedInstRegOperands(*Trunc, TII, TRI, RBI))
return false;
- if (!constrainSelectedInstRegOperands(*ORi, TII, TRI, RBI))
+
+ MachineInstr *Move = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::MFC1))
+ .addDef(I.getOperand(0).getReg())
+ .addUse(ResultInFPR);
+ if (!constrainSelectedInstRegOperands(*Move, TII, TRI, RBI))
return false;
I.eraseFromParent();
return true;
}
case G_GLOBAL_VALUE: {
- if (MF.getTarget().isPositionIndependent())
- return false;
-
const llvm::GlobalValue *GVal = I.getOperand(1).getGlobal();
- unsigned LUiReg = MRI.createVirtualRegister(&Mips::GPR32RegClass);
- MachineInstr *LUi, *ADDiu;
+ if (MF.getTarget().isPositionIndependent()) {
+ MachineInstr *LWGOT = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::LW))
+ .addDef(I.getOperand(0).getReg())
+ .addReg(MF.getInfo<MipsFunctionInfo>()
+ ->getGlobalBaseRegForGlobalISel())
+ .addGlobalAddress(GVal);
+ // Global Values that don't have local linkage are handled differently
+ // when they are part of call sequence. MipsCallLowering::lowerCall
+ // creates G_GLOBAL_VALUE instruction as part of call sequence and adds
+ // MO_GOT_CALL flag when Callee doesn't have local linkage.
+ if (I.getOperand(1).getTargetFlags() == MipsII::MO_GOT_CALL)
+ LWGOT->getOperand(2).setTargetFlags(MipsII::MO_GOT_CALL);
+ else
+ LWGOT->getOperand(2).setTargetFlags(MipsII::MO_GOT);
+ LWGOT->addMemOperand(
+ MF, MF.getMachineMemOperand(MachinePointerInfo::getGOT(MF),
+ MachineMemOperand::MOLoad, 4, 4));
+ if (!constrainSelectedInstRegOperands(*LWGOT, TII, TRI, RBI))
+ return false;
- LUi = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::LUi))
- .addDef(LUiReg)
- .addGlobalAddress(GVal);
- LUi->getOperand(1).setTargetFlags(MipsII::MO_ABS_HI);
+ if (GVal->hasLocalLinkage()) {
+ Register LWGOTDef = MRI.createVirtualRegister(&Mips::GPR32RegClass);
+ LWGOT->getOperand(0).setReg(LWGOTDef);
- ADDiu = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::ADDiu))
+ MachineInstr *ADDiu =
+ BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::ADDiu))
.addDef(I.getOperand(0).getReg())
- .addUse(LUiReg)
+ .addReg(LWGOTDef)
.addGlobalAddress(GVal);
- ADDiu->getOperand(2).setTargetFlags(MipsII::MO_ABS_LO);
-
- if (!constrainSelectedInstRegOperands(*LUi, TII, TRI, RBI))
- return false;
- if (!constrainSelectedInstRegOperands(*ADDiu, TII, TRI, RBI))
- return false;
+ ADDiu->getOperand(2).setTargetFlags(MipsII::MO_ABS_LO);
+ if (!constrainSelectedInstRegOperands(*ADDiu, TII, TRI, RBI))
+ return false;
+ }
+ } else {
+ Register LUiReg = MRI.createVirtualRegister(&Mips::GPR32RegClass);
+
+ MachineInstr *LUi = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::LUi))
+ .addDef(LUiReg)
+ .addGlobalAddress(GVal);
+ LUi->getOperand(1).setTargetFlags(MipsII::MO_ABS_HI);
+ if (!constrainSelectedInstRegOperands(*LUi, TII, TRI, RBI))
+ return false;
+ MachineInstr *ADDiu =
+ BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::ADDiu))
+ .addDef(I.getOperand(0).getReg())
+ .addUse(LUiReg)
+ .addGlobalAddress(GVal);
+ ADDiu->getOperand(2).setTargetFlags(MipsII::MO_ABS_LO);
+ if (!constrainSelectedInstRegOperands(*ADDiu, TII, TRI, RBI))
+ return false;
+ }
I.eraseFromParent();
return true;
}
case G_ICMP: {
struct Instr {
- unsigned Opcode, Def, LHS, RHS;
- Instr(unsigned Opcode, unsigned Def, unsigned LHS, unsigned RHS)
+ unsigned Opcode;
+ Register Def, LHS, RHS;
+ Instr(unsigned Opcode, Register Def, Register LHS, Register RHS)
: Opcode(Opcode), Def(Def), LHS(LHS), RHS(RHS){};
bool hasImm() const {
@@ -244,10 +511,10 @@ bool MipsInstructionSelector::select(MachineInstr &I,
};
SmallVector<struct Instr, 2> Instructions;
- unsigned ICMPReg = I.getOperand(0).getReg();
- unsigned Temp = MRI.createVirtualRegister(&Mips::GPR32RegClass);
- unsigned LHS = I.getOperand(2).getReg();
- unsigned RHS = I.getOperand(3).getReg();
+ Register ICMPReg = I.getOperand(0).getReg();
+ Register Temp = MRI.createVirtualRegister(&Mips::GPR32RegClass);
+ Register LHS = I.getOperand(2).getReg();
+ Register RHS = I.getOperand(3).getReg();
CmpInst::Predicate Cond =
static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
@@ -309,6 +576,84 @@ bool MipsInstructionSelector::select(MachineInstr &I,
I.eraseFromParent();
return true;
}
+ case G_FCMP: {
+ unsigned MipsFCMPCondCode;
+ bool isLogicallyNegated;
+ switch (CmpInst::Predicate Cond = static_cast<CmpInst::Predicate>(
+ I.getOperand(1).getPredicate())) {
+ case CmpInst::FCMP_UNO: // Unordered
+ case CmpInst::FCMP_ORD: // Ordered (OR)
+ MipsFCMPCondCode = Mips::FCOND_UN;
+ isLogicallyNegated = Cond != CmpInst::FCMP_UNO;
+ break;
+ case CmpInst::FCMP_OEQ: // Equal
+ case CmpInst::FCMP_UNE: // Not Equal (NEQ)
+ MipsFCMPCondCode = Mips::FCOND_OEQ;
+ isLogicallyNegated = Cond != CmpInst::FCMP_OEQ;
+ break;
+ case CmpInst::FCMP_UEQ: // Unordered or Equal
+ case CmpInst::FCMP_ONE: // Ordered or Greater Than or Less Than (OGL)
+ MipsFCMPCondCode = Mips::FCOND_UEQ;
+ isLogicallyNegated = Cond != CmpInst::FCMP_UEQ;
+ break;
+ case CmpInst::FCMP_OLT: // Ordered or Less Than
+ case CmpInst::FCMP_UGE: // Unordered or Greater Than or Equal (UGE)
+ MipsFCMPCondCode = Mips::FCOND_OLT;
+ isLogicallyNegated = Cond != CmpInst::FCMP_OLT;
+ break;
+ case CmpInst::FCMP_ULT: // Unordered or Less Than
+ case CmpInst::FCMP_OGE: // Ordered or Greater Than or Equal (OGE)
+ MipsFCMPCondCode = Mips::FCOND_ULT;
+ isLogicallyNegated = Cond != CmpInst::FCMP_ULT;
+ break;
+ case CmpInst::FCMP_OLE: // Ordered or Less Than or Equal
+ case CmpInst::FCMP_UGT: // Unordered or Greater Than (UGT)
+ MipsFCMPCondCode = Mips::FCOND_OLE;
+ isLogicallyNegated = Cond != CmpInst::FCMP_OLE;
+ break;
+ case CmpInst::FCMP_ULE: // Unordered or Less Than or Equal
+ case CmpInst::FCMP_OGT: // Ordered or Greater Than (OGT)
+ MipsFCMPCondCode = Mips::FCOND_ULE;
+ isLogicallyNegated = Cond != CmpInst::FCMP_ULE;
+ break;
+ default:
+ return false;
+ }
+
+ // Default compare result in gpr register will be `true`.
+ // We will move `false` (MIPS::Zero) to gpr result when fcmp gives false
+ // using MOVF_I. When orignal predicate (Cond) is logically negated
+ // MipsFCMPCondCode, result is inverted i.e. MOVT_I is used.
+ unsigned MoveOpcode = isLogicallyNegated ? Mips::MOVT_I : Mips::MOVF_I;
+
+ unsigned TrueInReg = MRI.createVirtualRegister(&Mips::GPR32RegClass);
+ BuildMI(MBB, I, I.getDebugLoc(), TII.get(Mips::ADDiu))
+ .addDef(TrueInReg)
+ .addUse(Mips::ZERO)
+ .addImm(1);
+
+ unsigned Size = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
+ unsigned FCMPOpcode =
+ Size == 32 ? Mips::FCMP_S32
+ : STI.isFP64bit() ? Mips::FCMP_D64 : Mips::FCMP_D32;
+ MachineInstr *FCMP = BuildMI(MBB, I, I.getDebugLoc(), TII.get(FCMPOpcode))
+ .addUse(I.getOperand(2).getReg())
+ .addUse(I.getOperand(3).getReg())
+ .addImm(MipsFCMPCondCode);
+ if (!constrainSelectedInstRegOperands(*FCMP, TII, TRI, RBI))
+ return false;
+
+ MachineInstr *Move = BuildMI(MBB, I, I.getDebugLoc(), TII.get(MoveOpcode))
+ .addDef(I.getOperand(0).getReg())
+ .addUse(Mips::ZERO)
+ .addUse(Mips::FCC0)
+ .addUse(TrueInReg);
+ if (!constrainSelectedInstRegOperands(*Move, TII, TRI, RBI))
+ return false;
+
+ I.eraseFromParent();
+ return true;
+ }
default:
return false;
}
diff --git a/lib/Target/Mips/MipsLegalizerInfo.cpp b/lib/Target/Mips/MipsLegalizerInfo.cpp
index c629f02af00e..e442a81837ed 100644
--- a/lib/Target/Mips/MipsLegalizerInfo.cpp
+++ b/lib/Target/Mips/MipsLegalizerInfo.cpp
@@ -1,9 +1,8 @@
//===- MipsLegalizerInfo.cpp ------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -25,35 +24,65 @@ MipsLegalizerInfo::MipsLegalizerInfo(const MipsSubtarget &ST) {
const LLT s64 = LLT::scalar(64);
const LLT p0 = LLT::pointer(0, 32);
- getActionDefinitionsBuilder(G_ADD)
+ getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL})
.legalFor({s32})
.clampScalar(0, s32, s32);
- getActionDefinitionsBuilder(G_UADDE)
+ getActionDefinitionsBuilder({G_UADDO, G_UADDE, G_USUBO, G_USUBE, G_UMULO})
.lowerFor({{s32, s1}});
+ getActionDefinitionsBuilder(G_UMULH)
+ .legalFor({s32})
+ .maxScalar(0, s32);
+
getActionDefinitionsBuilder({G_LOAD, G_STORE})
- .legalForCartesianProduct({p0, s32}, {p0});
+ .legalForTypesWithMemDesc({{s32, p0, 8, 8},
+ {s32, p0, 16, 8},
+ {s32, p0, 32, 8},
+ {s64, p0, 64, 8},
+ {p0, p0, 32, 8}})
+ .minScalar(0, s32);
+
+ getActionDefinitionsBuilder(G_UNMERGE_VALUES)
+ .legalFor({{s32, s64}});
+
+ getActionDefinitionsBuilder(G_MERGE_VALUES)
+ .legalFor({{s64, s32}});
+
+ getActionDefinitionsBuilder({G_ZEXTLOAD, G_SEXTLOAD})
+ .legalForTypesWithMemDesc({{s32, p0, 8, 8},
+ {s32, p0, 16, 8}})
+ .minScalar(0, s32);
getActionDefinitionsBuilder(G_SELECT)
- .legalForCartesianProduct({p0, s32}, {s32})
+ .legalForCartesianProduct({p0, s32, s64}, {s32})
.minScalar(0, s32)
.minScalar(1, s32);
+ getActionDefinitionsBuilder(G_BRCOND)
+ .legalFor({s32})
+ .minScalar(0, s32);
+
+ getActionDefinitionsBuilder(G_PHI)
+ .legalFor({p0, s32, s64})
+ .minScalar(0, s32);
+
getActionDefinitionsBuilder({G_AND, G_OR, G_XOR})
.legalFor({s32})
.clampScalar(0, s32, s32);
- getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR})
- .legalFor({s32});
-
getActionDefinitionsBuilder({G_SDIV, G_SREM, G_UREM, G_UDIV})
.legalFor({s32})
.minScalar(0, s32)
.libcallFor({s64});
+ getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR})
+ .legalFor({s32, s32})
+ .minScalar(1, s32);
+
getActionDefinitionsBuilder(G_ICMP)
- .legalFor({{s32, s32}})
+ .legalForCartesianProduct({s32}, {s32, p0})
+ .clampScalar(1, s32, s32)
.minScalar(0, s32);
getActionDefinitionsBuilder(G_CONSTANT)
@@ -69,6 +98,46 @@ MipsLegalizerInfo::MipsLegalizerInfo(const MipsSubtarget &ST) {
getActionDefinitionsBuilder(G_GLOBAL_VALUE)
.legalFor({p0});
+ // FP instructions
+ getActionDefinitionsBuilder(G_FCONSTANT)
+ .legalFor({s32, s64});
+
+ getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FABS, G_FSQRT})
+ .legalFor({s32, s64});
+
+ getActionDefinitionsBuilder(G_FCMP)
+ .legalFor({{s32, s32}, {s32, s64}})
+ .minScalar(0, s32);
+
+ getActionDefinitionsBuilder({G_FCEIL, G_FFLOOR})
+ .libcallFor({s32, s64});
+
+ getActionDefinitionsBuilder(G_FPEXT)
+ .legalFor({{s64, s32}});
+
+ getActionDefinitionsBuilder(G_FPTRUNC)
+ .legalFor({{s32, s64}});
+
+ // FP to int conversion instructions
+ getActionDefinitionsBuilder(G_FPTOSI)
+ .legalForCartesianProduct({s32}, {s64, s32})
+ .libcallForCartesianProduct({s64}, {s64, s32})
+ .minScalar(0, s32);
+
+ getActionDefinitionsBuilder(G_FPTOUI)
+ .libcallForCartesianProduct({s64}, {s64, s32})
+ .minScalar(0, s32);
+
+ // Int to FP conversion instructions
+ getActionDefinitionsBuilder(G_SITOFP)
+ .legalForCartesianProduct({s64, s32}, {s32})
+ .libcallForCartesianProduct({s64, s32}, {s64})
+ .minScalar(1, s32);
+
+ getActionDefinitionsBuilder(G_UITOFP)
+ .libcallForCartesianProduct({s64, s32}, {s64})
+ .minScalar(1, s32);
+
computeTables();
verify(*ST.getInstrInfo());
}
diff --git a/lib/Target/Mips/MipsLegalizerInfo.h b/lib/Target/Mips/MipsLegalizerInfo.h
index 75fadd6cf613..e5021e081890 100644
--- a/lib/Target/Mips/MipsLegalizerInfo.h
+++ b/lib/Target/Mips/MipsLegalizerInfo.h
@@ -1,9 +1,8 @@
//===- MipsLegalizerInfo ----------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/lib/Target/Mips/MipsMCInstLower.cpp b/lib/Target/Mips/MipsMCInstLower.cpp
index 46b37ceae391..fd984058a2bf 100644
--- a/lib/Target/Mips/MipsMCInstLower.cpp
+++ b/lib/Target/Mips/MipsMCInstLower.cpp
@@ -1,9 +1,8 @@
//===- MipsMCInstLower.cpp - Convert Mips MachineInstr to MCInst ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -117,6 +116,8 @@ MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO,
case MipsII::MO_CALL_LO16:
TargetKind = MipsMCExpr::MEK_CALL_LO16;
break;
+ case MipsII::MO_JALR:
+ return MCOperand();
}
switch (MOTy) {
diff --git a/lib/Target/Mips/MipsMCInstLower.h b/lib/Target/Mips/MipsMCInstLower.h
index e19f21c98839..29af6f21de82 100644
--- a/lib/Target/Mips/MipsMCInstLower.h
+++ b/lib/Target/Mips/MipsMCInstLower.h
@@ -1,9 +1,8 @@
//===- MipsMCInstLower.h - Lower MachineInstr to MCInst --------*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Mips/MipsMSAInstrFormats.td b/lib/Target/Mips/MipsMSAInstrFormats.td
index d4e225678184..2bfc92c85e96 100644
--- a/lib/Target/Mips/MipsMSAInstrFormats.td
+++ b/lib/Target/Mips/MipsMSAInstrFormats.td
@@ -1,9 +1,8 @@
//===- MipsMSAInstrFormats.td - Mips Instruction Formats ---*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td
index eecc7c573df1..907ed9ef746f 100644
--- a/lib/Target/Mips/MipsMSAInstrInfo.td
+++ b/lib/Target/Mips/MipsMSAInstrInfo.td
@@ -1,9 +1,8 @@
//===- MipsMSAInstrInfo.td - MSA ASE instructions -*- tablegen ------------*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -1240,6 +1239,7 @@ class MSA_COPY_PSEUDO_BASE<SDPatternOperator OpNode, ValueType VecTy,
MSAPseudo<(outs RCD:$wd), (ins RCWS:$ws, ImmOp:$n),
[(set RCD:$wd, (OpNode (VecTy RCWS:$ws), Imm:$n))]> {
bit usesCustomInserter = 1;
+ bit hasNoSchedulingInfo = 1;
}
class MSA_I5_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -1447,6 +1447,7 @@ class MSA_INSERT_VIDX_PSEUDO_BASE<SDPatternOperator OpNode, ValueType Ty,
[(set ROWD:$wd, (OpNode (Ty ROWD:$wd_in), ROFS:$fs,
ROIdx:$n))]> {
bit usesCustomInserter = 1;
+ bit hasNoSchedulingInfo = 1;
string Constraints = "$wd = $wd_in";
}
@@ -2044,7 +2045,7 @@ class FEXDO_W_DESC : MSA_3RF_DESC_BASE<"fexdo.w", int_mips_fexdo_w,
// 1.0 when we only need to match ISD::FEXP2.
class FEXP2_W_DESC : MSA_3RF_DESC_BASE<"fexp2.w", mul_fexp2, MSA128WOpnd>;
class FEXP2_D_DESC : MSA_3RF_DESC_BASE<"fexp2.d", mul_fexp2, MSA128DOpnd>;
-let usesCustomInserter = 1 in {
+let usesCustomInserter = 1, hasNoSchedulingInfo = 1 in {
class FEXP2_W_1_PSEUDO_DESC :
MSAPseudo<(outs MSA128W:$wd), (ins MSA128W:$ws),
[(set MSA128W:$wd, (fexp2 MSA128W:$ws))]>;
@@ -3738,6 +3739,7 @@ class MSA_CBRANCH_PSEUDO_DESC_BASE<SDPatternOperator OpNode, ValueType TyNode,
(ins RCWS:$ws),
[(set GPR32:$dst, (OpNode (TyNode RCWS:$ws)))]> {
bit usesCustomInserter = 1;
+ bit hasNoSchedulingInfo = 1;
}
def SNZ_B_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAllNonZero, v16i8,
@@ -3765,52 +3767,38 @@ def SZ_V_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAnyZero, v16i8,
// Pseudoes used to implement transparent fp16 support.
let ASEPredicate = [HasMSA] in {
- def ST_F16 : MipsPseudo<(outs), (ins MSA128F16:$ws, mem_simm10:$addr),
- [(store (f16 MSA128F16:$ws), (addrimm10:$addr))]> {
- let usesCustomInserter = 1;
- }
-
- def LD_F16 : MipsPseudo<(outs MSA128F16:$ws), (ins mem_simm10:$addr),
- [(set MSA128F16:$ws, (f16 (load addrimm10:$addr)))]> {
- let usesCustomInserter = 1;
- }
-
- def MSA_FP_EXTEND_W_PSEUDO : MipsPseudo<(outs FGR32Opnd:$fd),
- (ins MSA128F16:$ws),
- [(set FGR32Opnd:$fd,
- (f32 (fpextend MSA128F16:$ws)))]> {
- let usesCustomInserter = 1;
- }
-
- def MSA_FP_ROUND_W_PSEUDO : MipsPseudo<(outs MSA128F16:$wd),
- (ins FGR32Opnd:$fs),
- [(set MSA128F16:$wd,
- (f16 (fpround FGR32Opnd:$fs)))]> {
- let usesCustomInserter = 1;
- }
-
- def MSA_FP_EXTEND_D_PSEUDO : MipsPseudo<(outs FGR64Opnd:$fd),
- (ins MSA128F16:$ws),
- [(set FGR64Opnd:$fd,
- (f64 (fpextend MSA128F16:$ws)))]> {
- let usesCustomInserter = 1;
- }
-
- def MSA_FP_ROUND_D_PSEUDO : MipsPseudo<(outs MSA128F16:$wd),
- (ins FGR64Opnd:$fs),
- [(set MSA128F16:$wd,
- (f16 (fpround FGR64Opnd:$fs)))]> {
- let usesCustomInserter = 1;
- }
-
- def : MipsPat<(MipsTruncIntFP MSA128F16:$ws),
- (TRUNC_W_D64 (MSA_FP_EXTEND_D_PSEUDO MSA128F16:$ws))>, ISA_MIPS1,
- ASE_MSA;
-
- def : MipsPat<(MipsFPCmp MSA128F16:$ws, MSA128F16:$wt, imm:$cond),
- (FCMP_S32 (MSA_FP_EXTEND_W_PSEUDO MSA128F16:$ws),
- (MSA_FP_EXTEND_W_PSEUDO MSA128F16:$wt), imm:$cond)>,
- ISA_MIPS1_NOT_32R6_64R6, ASE_MSA;
+ let usesCustomInserter = 1 in {
+ def ST_F16 :
+ MipsPseudo<(outs), (ins MSA128F16:$ws, mem_simm10:$addr),
+ [(store (f16 MSA128F16:$ws), (addrimm10:$addr))]>;
+ def LD_F16 :
+ MipsPseudo<(outs MSA128F16:$ws), (ins mem_simm10:$addr),
+ [(set MSA128F16:$ws, (f16 (load addrimm10:$addr)))]>;
+ }
+
+ let usesCustomInserter = 1, hasNoSchedulingInfo = 1 in {
+ def MSA_FP_EXTEND_W_PSEUDO :
+ MipsPseudo<(outs FGR32Opnd:$fd), (ins MSA128F16:$ws),
+ [(set FGR32Opnd:$fd, (f32 (fpextend MSA128F16:$ws)))]>;
+ def MSA_FP_ROUND_W_PSEUDO :
+ MipsPseudo<(outs MSA128F16:$wd), (ins FGR32Opnd:$fs),
+ [(set MSA128F16:$wd, (f16 (fpround FGR32Opnd:$fs)))]>;
+ def MSA_FP_EXTEND_D_PSEUDO :
+ MipsPseudo<(outs FGR64Opnd:$fd), (ins MSA128F16:$ws),
+ [(set FGR64Opnd:$fd, (f64 (fpextend MSA128F16:$ws)))]>;
+ def MSA_FP_ROUND_D_PSEUDO :
+ MipsPseudo<(outs MSA128F16:$wd), (ins FGR64Opnd:$fs),
+ [(set MSA128F16:$wd, (f16 (fpround FGR64Opnd:$fs)))]>;
+ }
+
+ def : MipsPat<(MipsTruncIntFP MSA128F16:$ws),
+ (TRUNC_W_D64 (MSA_FP_EXTEND_D_PSEUDO MSA128F16:$ws))>,
+ ISA_MIPS1, ASE_MSA;
+
+ def : MipsPat<(MipsFPCmp MSA128F16:$ws, MSA128F16:$wt, imm:$cond),
+ (FCMP_S32 (MSA_FP_EXTEND_W_PSEUDO MSA128F16:$ws),
+ (MSA_FP_EXTEND_W_PSEUDO MSA128F16:$wt), imm:$cond)>,
+ ISA_MIPS1_NOT_32R6_64R6, ASE_MSA;
}
def vsplati64_imm_eq_63 : PatLeaf<(bitconvert (v4i32 (build_vector))), [{
diff --git a/lib/Target/Mips/MipsMTInstrFormats.td b/lib/Target/Mips/MipsMTInstrFormats.td
index c2c22e2ad61c..22c290b1c114 100644
--- a/lib/Target/Mips/MipsMTInstrFormats.td
+++ b/lib/Target/Mips/MipsMTInstrFormats.td
@@ -1,9 +1,8 @@
//===-- MipsMTInstrFormats.td - Mips Instruction Formats ---*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Mips/MipsMTInstrInfo.td b/lib/Target/Mips/MipsMTInstrInfo.td
index 72e626cbec40..3edeb57b1876 100644
--- a/lib/Target/Mips/MipsMTInstrInfo.td
+++ b/lib/Target/Mips/MipsMTInstrInfo.td
@@ -1,9 +1,8 @@
//===-- MipsMTInstrInfo.td - Mips MT Instruction Infos -----*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Mips/MipsMachineFunction.cpp b/lib/Target/Mips/MipsMachineFunction.cpp
index 81b4352670c0..85b20fc58231 100644
--- a/lib/Target/Mips/MipsMachineFunction.cpp
+++ b/lib/Target/Mips/MipsMachineFunction.cpp
@@ -1,9 +1,8 @@
//===-- MipsMachineFunctionInfo.cpp - Private data used for Mips ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -45,13 +44,109 @@ static const TargetRegisterClass &getGlobalBaseRegClass(MachineFunction &MF) {
return Mips::GPR32RegClass;
}
-unsigned MipsFunctionInfo::getGlobalBaseReg() {
+Register MipsFunctionInfo::getGlobalBaseReg() {
if (!GlobalBaseReg)
GlobalBaseReg =
MF.getRegInfo().createVirtualRegister(&getGlobalBaseRegClass(MF));
return GlobalBaseReg;
}
+Register MipsFunctionInfo::getGlobalBaseRegForGlobalISel() {
+ if (!GlobalBaseReg) {
+ getGlobalBaseReg();
+ initGlobalBaseReg();
+ }
+ return GlobalBaseReg;
+}
+
+void MipsFunctionInfo::initGlobalBaseReg() {
+ if (!GlobalBaseReg)
+ return;
+
+ MachineBasicBlock &MBB = MF.front();
+ MachineBasicBlock::iterator I = MBB.begin();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ DebugLoc DL;
+ unsigned V0, V1;
+ const TargetRegisterClass *RC;
+ const MipsABIInfo &ABI =
+ static_cast<const MipsTargetMachine &>(MF.getTarget()).getABI();
+ RC = (ABI.IsN64()) ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
+
+ V0 = RegInfo.createVirtualRegister(RC);
+ V1 = RegInfo.createVirtualRegister(RC);
+
+ if (ABI.IsN64()) {
+ MF.getRegInfo().addLiveIn(Mips::T9_64);
+ MBB.addLiveIn(Mips::T9_64);
+
+ // lui $v0, %hi(%neg(%gp_rel(fname)))
+ // daddu $v1, $v0, $t9
+ // daddiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname)))
+ const GlobalValue *FName = &MF.getFunction();
+ BuildMI(MBB, I, DL, TII.get(Mips::LUi64), V0)
+ .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI);
+ BuildMI(MBB, I, DL, TII.get(Mips::DADDu), V1).addReg(V0)
+ .addReg(Mips::T9_64);
+ BuildMI(MBB, I, DL, TII.get(Mips::DADDiu), GlobalBaseReg).addReg(V1)
+ .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO);
+ return;
+ }
+
+ if (!MF.getTarget().isPositionIndependent()) {
+ // Set global register to __gnu_local_gp.
+ //
+ // lui $v0, %hi(__gnu_local_gp)
+ // addiu $globalbasereg, $v0, %lo(__gnu_local_gp)
+ BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0)
+ .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_HI);
+ BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V0)
+ .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_LO);
+ return;
+ }
+
+ MF.getRegInfo().addLiveIn(Mips::T9);
+ MBB.addLiveIn(Mips::T9);
+
+ if (ABI.IsN32()) {
+ // lui $v0, %hi(%neg(%gp_rel(fname)))
+ // addu $v1, $v0, $t9
+ // addiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname)))
+ const GlobalValue *FName = &MF.getFunction();
+ BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0)
+ .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI);
+ BuildMI(MBB, I, DL, TII.get(Mips::ADDu), V1).addReg(V0).addReg(Mips::T9);
+ BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V1)
+ .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO);
+ return;
+ }
+
+ assert(ABI.IsO32());
+
+ // For O32 ABI, the following instruction sequence is emitted to initialize
+ // the global base register:
+ //
+ // 0. lui $2, %hi(_gp_disp)
+ // 1. addiu $2, $2, %lo(_gp_disp)
+ // 2. addu $globalbasereg, $2, $t9
+ //
+ // We emit only the last instruction here.
+ //
+ // GNU linker requires that the first two instructions appear at the beginning
+ // of a function and no instructions be inserted before or between them.
+ // The two instructions are emitted during lowering to MC layer in order to
+ // avoid any reordering.
+ //
+ // Register $2 (Mips::V0) is added to the list of live-in registers to ensure
+ // the value instruction 1 (addiu) defines is valid when instruction 2 (addu)
+ // reads it.
+ MF.getRegInfo().addLiveIn(Mips::V0);
+ MBB.addLiveIn(Mips::V0);
+ BuildMI(MBB, I, DL, TII.get(Mips::ADDu), GlobalBaseReg)
+ .addReg(Mips::V0).addReg(Mips::T9);
+}
+
void MipsFunctionInfo::createEhDataRegsFI() {
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
for (int I = 0; I < 4; ++I) {
diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h
index 553a66703b26..aaa1e0e18441 100644
--- a/lib/Target/Mips/MipsMachineFunction.h
+++ b/lib/Target/Mips/MipsMachineFunction.h
@@ -1,9 +1,8 @@
//===- MipsMachineFunctionInfo.h - Private data used for Mips ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -33,7 +32,12 @@ public:
void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
bool globalBaseRegSet() const;
- unsigned getGlobalBaseReg();
+ Register getGlobalBaseReg();
+ Register getGlobalBaseRegForGlobalISel();
+
+ // Insert instructions to initialize the global base register in the
+ // first MBB of the function.
+ void initGlobalBaseReg();
int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; }
diff --git a/lib/Target/Mips/MipsOptimizePICCall.cpp b/lib/Target/Mips/MipsOptimizePICCall.cpp
index 27bc4843f410..5ef07a2d283e 100644
--- a/lib/Target/Mips/MipsOptimizePICCall.cpp
+++ b/lib/Target/Mips/MipsOptimizePICCall.cpp
@@ -1,9 +1,8 @@
//===- MipsOptimizePICCall.cpp - Optimize PIC Calls -----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Mips/MipsOptionRecord.h b/lib/Target/Mips/MipsOptionRecord.h
index 4708784063d3..7897095ef894 100644
--- a/lib/Target/Mips/MipsOptionRecord.h
+++ b/lib/Target/Mips/MipsOptionRecord.h
@@ -1,9 +1,8 @@
//===- MipsOptionRecord.h - Abstraction for storing information -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Mips/MipsOs16.cpp b/lib/Target/Mips/MipsOs16.cpp
index 4edcb3132ada..ac4e55f8a1f5 100644
--- a/lib/Target/Mips/MipsOs16.cpp
+++ b/lib/Target/Mips/MipsOs16.cpp
@@ -1,9 +1,8 @@
//===---- MipsOs16.cpp for Mips Option -Os16 --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Mips/MipsPreLegalizerCombiner.cpp b/lib/Target/Mips/MipsPreLegalizerCombiner.cpp
index 1cff1c8396ea..85076590d407 100644
--- a/lib/Target/Mips/MipsPreLegalizerCombiner.cpp
+++ b/lib/Target/Mips/MipsPreLegalizerCombiner.cpp
@@ -1,9 +1,8 @@
//=== lib/CodeGen/GlobalISel/MipsPreLegalizerCombiner.cpp --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -14,6 +13,7 @@
#include "MipsTargetMachine.h"
#include "llvm/CodeGen/GlobalISel/Combiner.h"
+#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/TargetPassConfig.h"
@@ -35,6 +35,16 @@ public:
bool MipsPreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
MachineInstr &MI,
MachineIRBuilder &B) const {
+ CombinerHelper Helper(Observer, B);
+
+ switch (MI.getOpcode()) {
+ default:
+ return false;
+ case TargetOpcode::G_LOAD:
+ case TargetOpcode::G_SEXTLOAD:
+ case TargetOpcode::G_ZEXTLOAD:
+ return Helper.tryCombineExtendingLoads(MI);
+ }
return false;
}
diff --git a/lib/Target/Mips/MipsRegisterBankInfo.cpp b/lib/Target/Mips/MipsRegisterBankInfo.cpp
index 6af1f10189df..d8bcf16afd50 100644
--- a/lib/Target/Mips/MipsRegisterBankInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterBankInfo.cpp
@@ -1,9 +1,8 @@
//===- MipsRegisterBankInfo.cpp ---------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -11,36 +10,55 @@
/// \todo This should be generated by TableGen.
//===----------------------------------------------------------------------===//
-#include "MipsInstrInfo.h"
#include "MipsRegisterBankInfo.h"
+#include "MipsInstrInfo.h"
+#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
+#include "llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#define GET_TARGET_REGBANK_IMPL
-#define DEBUG_TYPE "registerbankinfo"
-
#include "MipsGenRegisterBank.inc"
namespace llvm {
namespace Mips {
enum PartialMappingIdx {
PMI_GPR,
+ PMI_SPR,
+ PMI_DPR,
PMI_Min = PMI_GPR,
};
RegisterBankInfo::PartialMapping PartMappings[]{
- {0, 32, GPRBRegBank}
+ {0, 32, GPRBRegBank},
+ {0, 32, FPRBRegBank},
+ {0, 64, FPRBRegBank}
};
-enum ValueMappingIdx { InvalidIdx = 0, GPRIdx = 1 };
+enum ValueMappingIdx {
+ InvalidIdx = 0,
+ GPRIdx = 1,
+ SPRIdx = 4,
+ DPRIdx = 7
+};
RegisterBankInfo::ValueMapping ValueMappings[] = {
// invalid
{nullptr, 0},
- // 3 operands in GPRs
+ // up to 3 operands in GPRs
{&PartMappings[PMI_GPR - PMI_Min], 1},
{&PartMappings[PMI_GPR - PMI_Min], 1},
- {&PartMappings[PMI_GPR - PMI_Min], 1}};
+ {&PartMappings[PMI_GPR - PMI_Min], 1},
+ // up to 3 ops operands FPRs - single precission
+ {&PartMappings[PMI_SPR - PMI_Min], 1},
+ {&PartMappings[PMI_SPR - PMI_Min], 1},
+ {&PartMappings[PMI_SPR - PMI_Min], 1},
+ // up to 3 ops operands FPRs - double precission
+ {&PartMappings[PMI_DPR - PMI_Min], 1},
+ {&PartMappings[PMI_DPR - PMI_Min], 1},
+ {&PartMappings[PMI_DPR - PMI_Min], 1}
+};
} // end namespace Mips
} // end namespace llvm
@@ -62,30 +80,313 @@ const RegisterBank &MipsRegisterBankInfo::getRegBankFromRegClass(
case Mips::GPRMM16MoveP_and_CPU16Regs_and_GPRMM16ZeroRegClassID:
case Mips::GPRMM16MovePPairFirst_and_GPRMM16MovePPairSecondRegClassID:
case Mips::SP32RegClassID:
+ case Mips::GP32RegClassID:
return getRegBank(Mips::GPRBRegBankID);
+ case Mips::FGRCCRegClassID:
+ case Mips::FGR32RegClassID:
+ case Mips::FGR64RegClassID:
+ case Mips::AFGR64RegClassID:
+ return getRegBank(Mips::FPRBRegBankID);
default:
llvm_unreachable("Register class not supported");
}
}
+// Instructions where all register operands are floating point.
+static bool isFloatingPointOpcode(unsigned Opc) {
+ switch (Opc) {
+ case TargetOpcode::G_FCONSTANT:
+ case TargetOpcode::G_FADD:
+ case TargetOpcode::G_FSUB:
+ case TargetOpcode::G_FMUL:
+ case TargetOpcode::G_FDIV:
+ case TargetOpcode::G_FABS:
+ case TargetOpcode::G_FSQRT:
+ case TargetOpcode::G_FCEIL:
+ case TargetOpcode::G_FFLOOR:
+ case TargetOpcode::G_FPEXT:
+ case TargetOpcode::G_FPTRUNC:
+ return true;
+ default:
+ return false;
+ }
+}
+
+// Instructions where use operands are floating point registers.
+// Def operands are general purpose.
+static bool isFloatingPointOpcodeUse(unsigned Opc) {
+ switch (Opc) {
+ case TargetOpcode::G_FPTOSI:
+ case TargetOpcode::G_FPTOUI:
+ case TargetOpcode::G_FCMP:
+ case Mips::MFC1:
+ case Mips::ExtractElementF64:
+ case Mips::ExtractElementF64_64:
+ return true;
+ default:
+ return isFloatingPointOpcode(Opc);
+ }
+}
+
+// Instructions where def operands are floating point registers.
+// Use operands are general purpose.
+static bool isFloatingPointOpcodeDef(unsigned Opc) {
+ switch (Opc) {
+ case TargetOpcode::G_SITOFP:
+ case TargetOpcode::G_UITOFP:
+ case Mips::MTC1:
+ case Mips::BuildPairF64:
+ case Mips::BuildPairF64_64:
+ return true;
+ default:
+ return isFloatingPointOpcode(Opc);
+ }
+}
+
+static bool isAmbiguous(unsigned Opc) {
+ switch (Opc) {
+ case TargetOpcode::G_LOAD:
+ case TargetOpcode::G_STORE:
+ case TargetOpcode::G_PHI:
+ case TargetOpcode::G_SELECT:
+ return true;
+ default:
+ return false;
+ }
+}
+
+void MipsRegisterBankInfo::AmbiguousRegDefUseContainer::addDefUses(
+ Register Reg, const MachineRegisterInfo &MRI) {
+ assert(!MRI.getType(Reg).isPointer() &&
+ "Pointers are gprb, they should not be considered as ambiguous.\n");
+ for (MachineInstr &UseMI : MRI.use_instructions(Reg)) {
+ MachineInstr *NonCopyInstr = skipCopiesOutgoing(&UseMI);
+ // Copy with many uses.
+ if (NonCopyInstr->getOpcode() == TargetOpcode::COPY &&
+ !TargetRegisterInfo::isPhysicalRegister(
+ NonCopyInstr->getOperand(0).getReg()))
+ addDefUses(NonCopyInstr->getOperand(0).getReg(), MRI);
+ else
+ DefUses.push_back(skipCopiesOutgoing(&UseMI));
+ }
+}
+
+void MipsRegisterBankInfo::AmbiguousRegDefUseContainer::addUseDef(
+ Register Reg, const MachineRegisterInfo &MRI) {
+ assert(!MRI.getType(Reg).isPointer() &&
+ "Pointers are gprb, they should not be considered as ambiguous.\n");
+ MachineInstr *DefMI = MRI.getVRegDef(Reg);
+ UseDefs.push_back(skipCopiesIncoming(DefMI));
+}
+
+MachineInstr *
+MipsRegisterBankInfo::AmbiguousRegDefUseContainer::skipCopiesOutgoing(
+ MachineInstr *MI) const {
+ const MachineFunction &MF = *MI->getParent()->getParent();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ MachineInstr *Ret = MI;
+ while (Ret->getOpcode() == TargetOpcode::COPY &&
+ !TargetRegisterInfo::isPhysicalRegister(Ret->getOperand(0).getReg()) &&
+ MRI.hasOneUse(Ret->getOperand(0).getReg())) {
+ Ret = &(*MRI.use_instr_begin(Ret->getOperand(0).getReg()));
+ }
+ return Ret;
+}
+
+MachineInstr *
+MipsRegisterBankInfo::AmbiguousRegDefUseContainer::skipCopiesIncoming(
+ MachineInstr *MI) const {
+ const MachineFunction &MF = *MI->getParent()->getParent();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ MachineInstr *Ret = MI;
+ while (Ret->getOpcode() == TargetOpcode::COPY &&
+ !TargetRegisterInfo::isPhysicalRegister(Ret->getOperand(1).getReg()))
+ Ret = MRI.getVRegDef(Ret->getOperand(1).getReg());
+ return Ret;
+}
+
+MipsRegisterBankInfo::AmbiguousRegDefUseContainer::AmbiguousRegDefUseContainer(
+ const MachineInstr *MI) {
+ assert(isAmbiguous(MI->getOpcode()) &&
+ "Not implemented for non Ambiguous opcode.\n");
+
+ const MachineRegisterInfo &MRI = MI->getMF()->getRegInfo();
+
+ if (MI->getOpcode() == TargetOpcode::G_LOAD)
+ addDefUses(MI->getOperand(0).getReg(), MRI);
+
+ if (MI->getOpcode() == TargetOpcode::G_STORE)
+ addUseDef(MI->getOperand(0).getReg(), MRI);
+
+ if (MI->getOpcode() == TargetOpcode::G_PHI) {
+ addDefUses(MI->getOperand(0).getReg(), MRI);
+
+ for (unsigned i = 1; i < MI->getNumOperands(); i += 2)
+ addUseDef(MI->getOperand(i).getReg(), MRI);
+ }
+
+ if (MI->getOpcode() == TargetOpcode::G_SELECT) {
+ addDefUses(MI->getOperand(0).getReg(), MRI);
+
+ addUseDef(MI->getOperand(2).getReg(), MRI);
+ addUseDef(MI->getOperand(3).getReg(), MRI);
+ }
+}
+
+bool MipsRegisterBankInfo::TypeInfoForMF::visit(
+ const MachineInstr *MI, const MachineInstr *WaitingForTypeOfMI) {
+ assert(isAmbiguous(MI->getOpcode()) && "Visiting non-Ambiguous opcode.\n");
+ if (wasVisited(MI))
+ return true; // InstType has already been determined for MI.
+
+ startVisit(MI);
+ AmbiguousRegDefUseContainer DefUseContainer(MI);
+
+ // Visit instructions where MI's DEF operands are USED.
+ if (visitAdjacentInstrs(MI, DefUseContainer.getDefUses(), true))
+ return true;
+
+ // Visit instructions that DEFINE MI's USE operands.
+ if (visitAdjacentInstrs(MI, DefUseContainer.getUseDefs(), false))
+ return true;
+
+ // All MI's adjacent instructions, are ambiguous.
+ if (!WaitingForTypeOfMI) {
+ // This is chain of ambiguous instructions.
+ setTypes(MI, InstType::Ambiguous);
+ return true;
+ }
+ // Excluding WaitingForTypeOfMI, MI is either connected to chains of ambiguous
+ // instructions or has no other adjacent instructions. Anyway InstType could
+ // not be determined. There could be unexplored path from some of
+ // WaitingForTypeOfMI's adjacent instructions to an instruction with only one
+ // mapping available.
+ // We are done with this branch, add MI to WaitingForTypeOfMI's WaitingQueue,
+ // this way when WaitingForTypeOfMI figures out its InstType same InstType
+ // will be assigned to all instructions in this branch.
+ addToWaitingQueue(WaitingForTypeOfMI, MI);
+ return false;
+}
+
+bool MipsRegisterBankInfo::TypeInfoForMF::visitAdjacentInstrs(
+ const MachineInstr *MI, SmallVectorImpl<MachineInstr *> &AdjacentInstrs,
+ bool isDefUse) {
+ while (!AdjacentInstrs.empty()) {
+ MachineInstr *AdjMI = AdjacentInstrs.pop_back_val();
+
+ if (isDefUse ? isFloatingPointOpcodeUse(AdjMI->getOpcode())
+ : isFloatingPointOpcodeDef(AdjMI->getOpcode())) {
+ setTypes(MI, InstType::FloatingPoint);
+ return true;
+ }
+
+ // Determine InstType from register bank of phys register that is
+ // 'isDefUse ? def : use' of this copy.
+ if (AdjMI->getOpcode() == TargetOpcode::COPY) {
+ setTypesAccordingToPhysicalRegister(MI, AdjMI, isDefUse ? 0 : 1);
+ return true;
+ }
+
+ // Defaults to integer instruction. Includes G_MERGE_VALUES and
+ // G_UNMERGE_VALUES.
+ if (!isAmbiguous(AdjMI->getOpcode())) {
+ setTypes(MI, InstType::Integer);
+ return true;
+ }
+
+ // When AdjMI was visited first, MI has to continue to explore remaining
+ // adjacent instructions and determine InstType without visiting AdjMI.
+ if (!wasVisited(AdjMI) ||
+ getRecordedTypeForInstr(AdjMI) != InstType::NotDetermined) {
+ if (visit(AdjMI, MI)) {
+ // InstType is successfully determined and is same as for AdjMI.
+ setTypes(MI, getRecordedTypeForInstr(AdjMI));
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+void MipsRegisterBankInfo::TypeInfoForMF::setTypes(const MachineInstr *MI,
+ InstType InstTy) {
+ changeRecordedTypeForInstr(MI, InstTy);
+ for (const MachineInstr *WaitingInstr : getWaitingQueueFor(MI)) {
+ setTypes(WaitingInstr, InstTy);
+ }
+}
+
+void MipsRegisterBankInfo::TypeInfoForMF::setTypesAccordingToPhysicalRegister(
+ const MachineInstr *MI, const MachineInstr *CopyInst, unsigned Op) {
+ assert((TargetRegisterInfo::isPhysicalRegister(
+ CopyInst->getOperand(Op).getReg())) &&
+ "Copies of non physical registers should not be considered here.\n");
+
+ const MachineFunction &MF = *CopyInst->getMF();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+ const RegisterBankInfo &RBI =
+ *CopyInst->getMF()->getSubtarget().getRegBankInfo();
+ const RegisterBank *Bank =
+ RBI.getRegBank(CopyInst->getOperand(Op).getReg(), MRI, TRI);
+
+ if (Bank == &Mips::FPRBRegBank)
+ setTypes(MI, InstType::FloatingPoint);
+ else if (Bank == &Mips::GPRBRegBank)
+ setTypes(MI, InstType::Integer);
+ else
+ llvm_unreachable("Unsupported register bank.\n");
+}
+
+MipsRegisterBankInfo::InstType
+MipsRegisterBankInfo::TypeInfoForMF::determineInstType(const MachineInstr *MI) {
+ visit(MI, nullptr);
+ return getRecordedTypeForInstr(MI);
+}
+
+void MipsRegisterBankInfo::TypeInfoForMF::cleanupIfNewFunction(
+ llvm::StringRef FunctionName) {
+ if (MFName != FunctionName) {
+ MFName = FunctionName;
+ WaitingQueues.clear();
+ Types.clear();
+ }
+}
+
const RegisterBankInfo::InstructionMapping &
MipsRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
+ static TypeInfoForMF TI;
+
+ // Reset TI internal data when MF changes.
+ TI.cleanupIfNewFunction(MI.getMF()->getName());
+
unsigned Opc = MI.getOpcode();
+ const MachineFunction &MF = *MI.getParent()->getParent();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
- const RegisterBankInfo::InstructionMapping &Mapping = getInstrMappingImpl(MI);
- if (Mapping.isValid())
- return Mapping;
+ if (MI.getOpcode() != TargetOpcode::G_PHI) {
+ const RegisterBankInfo::InstructionMapping &Mapping =
+ getInstrMappingImpl(MI);
+ if (Mapping.isValid())
+ return Mapping;
+ }
using namespace TargetOpcode;
unsigned NumOperands = MI.getNumOperands();
const ValueMapping *OperandsMapping = &Mips::ValueMappings[Mips::GPRIdx];
+ unsigned MappingID = DefaultMappingID;
+ const unsigned CustomMappingID = 1;
switch (Opc) {
+ case G_TRUNC:
case G_ADD:
- case G_LOAD:
- case G_STORE:
+ case G_SUB:
+ case G_MUL:
+ case G_UMULH:
+ case G_ZEXTLOAD:
+ case G_SEXTLOAD:
case G_GEP:
case G_AND:
case G_OR:
@@ -99,9 +400,183 @@ MipsRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case G_UREM:
OperandsMapping = &Mips::ValueMappings[Mips::GPRIdx];
break;
+ case G_LOAD: {
+ unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+ InstType InstTy = InstType::Integer;
+ if (!MRI.getType(MI.getOperand(0).getReg()).isPointer()) {
+ InstTy = TI.determineInstType(&MI);
+ }
+
+ if (InstTy == InstType::FloatingPoint ||
+ (Size == 64 && InstTy == InstType::Ambiguous)) { // fprb
+ OperandsMapping =
+ getOperandsMapping({Size == 32 ? &Mips::ValueMappings[Mips::SPRIdx]
+ : &Mips::ValueMappings[Mips::DPRIdx],
+ &Mips::ValueMappings[Mips::GPRIdx]});
+ break;
+ } else { // gprb
+ OperandsMapping =
+ getOperandsMapping({Size <= 32 ? &Mips::ValueMappings[Mips::GPRIdx]
+ : &Mips::ValueMappings[Mips::DPRIdx],
+ &Mips::ValueMappings[Mips::GPRIdx]});
+ if (Size == 64)
+ MappingID = CustomMappingID;
+ }
+
+ break;
+ }
+ case G_STORE: {
+ unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+ InstType InstTy = InstType::Integer;
+ if (!MRI.getType(MI.getOperand(0).getReg()).isPointer()) {
+ InstTy = TI.determineInstType(&MI);
+ }
+
+ if (InstTy == InstType::FloatingPoint ||
+ (Size == 64 && InstTy == InstType::Ambiguous)) { // fprb
+ OperandsMapping =
+ getOperandsMapping({Size == 32 ? &Mips::ValueMappings[Mips::SPRIdx]
+ : &Mips::ValueMappings[Mips::DPRIdx],
+ &Mips::ValueMappings[Mips::GPRIdx]});
+ break;
+ } else { // gprb
+ OperandsMapping =
+ getOperandsMapping({Size <= 32 ? &Mips::ValueMappings[Mips::GPRIdx]
+ : &Mips::ValueMappings[Mips::DPRIdx],
+ &Mips::ValueMappings[Mips::GPRIdx]});
+ if (Size == 64)
+ MappingID = CustomMappingID;
+ }
+ break;
+ }
+ case G_PHI: {
+ unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+ InstType InstTy = InstType::Integer;
+ if (!MRI.getType(MI.getOperand(0).getReg()).isPointer()) {
+ InstTy = TI.determineInstType(&MI);
+ }
+
+ // PHI is copylike and should have one regbank in mapping for def register.
+ if (InstTy == InstType::Integer && Size == 64) { // fprb
+ OperandsMapping =
+ getOperandsMapping({&Mips::ValueMappings[Mips::DPRIdx]});
+ return getInstructionMapping(CustomMappingID, /*Cost=*/1, OperandsMapping,
+ /*NumOperands=*/1);
+ }
+ // Use default handling for PHI, i.e. set reg bank of def operand to match
+ // register banks of use operands.
+ const RegisterBankInfo::InstructionMapping &Mapping =
+ getInstrMappingImpl(MI);
+ return Mapping;
+ }
+ case G_SELECT: {
+ unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+ InstType InstTy = InstType::Integer;
+ if (!MRI.getType(MI.getOperand(0).getReg()).isPointer()) {
+ InstTy = TI.determineInstType(&MI);
+ }
+
+ if (InstTy == InstType::FloatingPoint ||
+ (Size == 64 && InstTy == InstType::Ambiguous)) { // fprb
+ const RegisterBankInfo::ValueMapping *Bank =
+ Size == 32 ? &Mips::ValueMappings[Mips::SPRIdx]
+ : &Mips::ValueMappings[Mips::DPRIdx];
+ OperandsMapping = getOperandsMapping(
+ {Bank, &Mips::ValueMappings[Mips::GPRIdx], Bank, Bank});
+ break;
+ } else { // gprb
+ const RegisterBankInfo::ValueMapping *Bank =
+ Size <= 32 ? &Mips::ValueMappings[Mips::GPRIdx]
+ : &Mips::ValueMappings[Mips::DPRIdx];
+ OperandsMapping = getOperandsMapping(
+ {Bank, &Mips::ValueMappings[Mips::GPRIdx], Bank, Bank});
+ if (Size == 64)
+ MappingID = CustomMappingID;
+ }
+ break;
+ }
+ case G_UNMERGE_VALUES: {
+ OperandsMapping = getOperandsMapping({&Mips::ValueMappings[Mips::GPRIdx],
+ &Mips::ValueMappings[Mips::GPRIdx],
+ &Mips::ValueMappings[Mips::DPRIdx]});
+ MappingID = CustomMappingID;
+ break;
+ }
+ case G_MERGE_VALUES: {
+ OperandsMapping = getOperandsMapping({&Mips::ValueMappings[Mips::DPRIdx],
+ &Mips::ValueMappings[Mips::GPRIdx],
+ &Mips::ValueMappings[Mips::GPRIdx]});
+ MappingID = CustomMappingID;
+ break;
+ }
+ case G_FADD:
+ case G_FSUB:
+ case G_FMUL:
+ case G_FDIV:
+ case G_FABS:
+ case G_FSQRT:{
+ unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+ assert((Size == 32 || Size == 64) && "Unsupported floating point size");
+ OperandsMapping = Size == 32 ? &Mips::ValueMappings[Mips::SPRIdx]
+ : &Mips::ValueMappings[Mips::DPRIdx];
+ break;
+ }
+ case G_FCONSTANT: {
+ unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+ assert((Size == 32 || Size == 64) && "Unsupported floating point size");
+ const RegisterBankInfo::ValueMapping *FPRValueMapping =
+ Size == 32 ? &Mips::ValueMappings[Mips::SPRIdx]
+ : &Mips::ValueMappings[Mips::DPRIdx];
+ OperandsMapping = getOperandsMapping({FPRValueMapping, nullptr});
+ break;
+ }
+ case G_FCMP: {
+ unsigned Size = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
+ assert((Size == 32 || Size == 64) && "Unsupported floating point size");
+ const RegisterBankInfo::ValueMapping *FPRValueMapping =
+ Size == 32 ? &Mips::ValueMappings[Mips::SPRIdx]
+ : &Mips::ValueMappings[Mips::DPRIdx];
+ OperandsMapping =
+ getOperandsMapping({&Mips::ValueMappings[Mips::GPRIdx], nullptr,
+ FPRValueMapping, FPRValueMapping});
+ break;
+ }
+ case G_FPEXT:
+ OperandsMapping = getOperandsMapping({&Mips::ValueMappings[Mips::DPRIdx],
+ &Mips::ValueMappings[Mips::SPRIdx]});
+ break;
+ case G_FPTRUNC:
+ OperandsMapping = getOperandsMapping({&Mips::ValueMappings[Mips::SPRIdx],
+ &Mips::ValueMappings[Mips::DPRIdx]});
+ break;
+ case G_FPTOSI: {
+ unsigned SizeFP = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
+ assert((MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() == 32) &&
+ "Unsupported integer size");
+ assert((SizeFP == 32 || SizeFP == 64) && "Unsupported floating point size");
+ OperandsMapping = getOperandsMapping({
+ &Mips::ValueMappings[Mips::GPRIdx],
+ SizeFP == 32 ? &Mips::ValueMappings[Mips::SPRIdx]
+ : &Mips::ValueMappings[Mips::DPRIdx],
+ });
+ break;
+ }
+ case G_SITOFP: {
+ unsigned SizeInt = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
+ unsigned SizeFP = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+ (void)SizeInt;
+ assert((SizeInt == 32) && "Unsupported integer size");
+ assert((SizeFP == 32 || SizeFP == 64) && "Unsupported floating point size");
+ OperandsMapping =
+ getOperandsMapping({SizeFP == 32 ? &Mips::ValueMappings[Mips::SPRIdx]
+ : &Mips::ValueMappings[Mips::DPRIdx],
+ &Mips::ValueMappings[Mips::GPRIdx]});
+ break;
+ }
case G_CONSTANT:
case G_FRAME_INDEX:
case G_GLOBAL_VALUE:
+ case G_BRCOND:
OperandsMapping =
getOperandsMapping({&Mips::ValueMappings[Mips::GPRIdx], nullptr});
break;
@@ -111,17 +586,92 @@ MipsRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
&Mips::ValueMappings[Mips::GPRIdx],
&Mips::ValueMappings[Mips::GPRIdx]});
break;
- case G_SELECT:
- OperandsMapping =
- getOperandsMapping({&Mips::ValueMappings[Mips::GPRIdx],
- &Mips::ValueMappings[Mips::GPRIdx],
- &Mips::ValueMappings[Mips::GPRIdx],
- &Mips::ValueMappings[Mips::GPRIdx]});
- break;
default:
return getInvalidInstructionMapping();
}
- return getInstructionMapping(DefaultMappingID, /*Cost=*/1, OperandsMapping,
+ return getInstructionMapping(MappingID, /*Cost=*/1, OperandsMapping,
NumOperands);
}
+
+using InstListTy = GISelWorkList<4>;
+namespace {
+class InstManager : public GISelChangeObserver {
+ InstListTy &InstList;
+
+public:
+ InstManager(InstListTy &Insts) : InstList(Insts) {}
+
+ void createdInstr(MachineInstr &MI) override { InstList.insert(&MI); }
+ void erasingInstr(MachineInstr &MI) override {}
+ void changingInstr(MachineInstr &MI) override {}
+ void changedInstr(MachineInstr &MI) override {}
+};
+} // end anonymous namespace
+
+/// Here we have to narrowScalar s64 operands to s32, combine away
+/// G_MERGE/G_UNMERGE and erase instructions that became dead in the process.
+/// We manually assign 32 bit gprb to register operands of all new instructions
+/// that got created in the process since they will not end up in RegBankSelect
+/// loop. Careful not to delete instruction after MI i.e. MI.getIterator()++.
+void MipsRegisterBankInfo::applyMappingImpl(
+ const OperandsMapper &OpdMapper) const {
+ MachineInstr &MI = OpdMapper.getMI();
+ InstListTy NewInstrs;
+ MachineIRBuilder B(MI);
+ MachineFunction *MF = MI.getMF();
+ MachineRegisterInfo &MRI = OpdMapper.getMRI();
+
+ InstManager NewInstrObserver(NewInstrs);
+ GISelObserverWrapper WrapperObserver(&NewInstrObserver);
+ LegalizerHelper Helper(*MF, WrapperObserver, B);
+ LegalizationArtifactCombiner ArtCombiner(
+ B, MF->getRegInfo(), *MF->getSubtarget().getLegalizerInfo());
+
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_LOAD:
+ case TargetOpcode::G_STORE:
+ case TargetOpcode::G_PHI:
+ case TargetOpcode::G_SELECT: {
+ Helper.narrowScalar(MI, 0, LLT::scalar(32));
+ // Handle new instructions.
+ while (!NewInstrs.empty()) {
+ MachineInstr *NewMI = NewInstrs.pop_back_val();
+ // This is new G_UNMERGE that was created during narrowScalar and will
+ // not be considered for regbank selection. RegBankSelect for mips
+ // visits/makes corresponding G_MERGE first. Combine them here.
+ if (NewMI->getOpcode() == TargetOpcode::G_UNMERGE_VALUES) {
+ SmallVector<MachineInstr *, 2> DeadInstrs;
+ ArtCombiner.tryCombineMerges(*NewMI, DeadInstrs);
+ for (MachineInstr *DeadMI : DeadInstrs)
+ DeadMI->eraseFromParent();
+ }
+ // This G_MERGE will be combined away when its corresponding G_UNMERGE
+ // gets regBankSelected.
+ else if (NewMI->getOpcode() == TargetOpcode::G_MERGE_VALUES)
+ continue;
+ else
+ // Manually set register banks for all register operands to 32 bit gprb.
+ for (auto Op : NewMI->operands()) {
+ if (Op.isReg()) {
+ assert(MRI.getType(Op.getReg()).getSizeInBits() == 32 &&
+ "Only 32 bit gprb is handled here.\n");
+ MRI.setRegBank(Op.getReg(), getRegBank(Mips::GPRBRegBankID));
+ }
+ }
+ }
+ return;
+ }
+ case TargetOpcode::G_UNMERGE_VALUES: {
+ SmallVector<MachineInstr *, 2> DeadInstrs;
+ ArtCombiner.tryCombineMerges(MI, DeadInstrs);
+ for (MachineInstr *DeadMI : DeadInstrs)
+ DeadMI->eraseFromParent();
+ return;
+ }
+ default:
+ break;
+ }
+
+ return applyDefaultMapping(OpdMapper);
+}
diff --git a/lib/Target/Mips/MipsRegisterBankInfo.h b/lib/Target/Mips/MipsRegisterBankInfo.h
index 64a79abaa74d..176813c031ed 100644
--- a/lib/Target/Mips/MipsRegisterBankInfo.h
+++ b/lib/Target/Mips/MipsRegisterBankInfo.h
@@ -1,9 +1,8 @@
//===- MipsRegisterBankInfo.h -----------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -38,6 +37,131 @@ public:
const InstructionMapping &
getInstrMapping(const MachineInstr &MI) const override;
+
+ void applyMappingImpl(const OperandsMapper &OpdMapper) const override;
+
+private:
+ /// Some instructions are used with both floating point and integer operands.
+ /// We assign InstType to such instructions as it helps us to avoid cross bank
+ /// copies. InstType deppends on context.
+ enum InstType {
+ /// Temporary type, when visit(..., nullptr) finishes will convert to one of
+ /// the remaining types: Integer, FloatingPoint or Ambiguous.
+ NotDetermined,
+ /// Connected with instruction that interprets 'bags of bits' as integers.
+ /// Select gprb to avoid cross bank copies.
+ Integer,
+ /// Connected with instruction that interprets 'bags of bits' as floating
+ /// point numbers. Select fprb to avoid cross bank copies.
+ FloatingPoint,
+ /// Represents moving 'bags of bits' around. Select same bank for entire
+ /// chain to avoid cross bank copies. Currently we select fprb for s64 and
+ /// gprb for s32 Ambiguous operands.
+ Ambiguous
+ };
+
+ /// Some generic instructions have operands that can be mapped to either fprb
+ /// or gprb e.g. for G_LOAD we consider only operand 0 as ambiguous, operand 1
+ /// is always gprb since it is a pointer.
+ /// This class provides containers for MI's ambiguous:
+ /// DefUses : MachineInstrs that use one of MI's ambiguous def operands.
+ /// UseDefs : MachineInstrs that define MI's ambiguous use operands.
+ class AmbiguousRegDefUseContainer {
+ SmallVector<MachineInstr *, 2> DefUses;
+ SmallVector<MachineInstr *, 2> UseDefs;
+
+ void addDefUses(Register Reg, const MachineRegisterInfo &MRI);
+ void addUseDef(Register Reg, const MachineRegisterInfo &MRI);
+
+ /// Skip copy instructions until we get to a non-copy instruction or to a
+ /// copy with phys register as def. Used during search for DefUses.
+ /// MI : %5 = COPY %4
+ /// %6 = COPY %5
+ /// $v0 = COPY %6 <- we want this one.
+ MachineInstr *skipCopiesOutgoing(MachineInstr *MI) const;
+
+ /// Skip copy instructions until we get to a non-copy instruction or to a
+ /// copy with phys register as use. Used during search for UseDefs.
+ /// %1 = COPY $a1 <- we want this one.
+ /// %2 = COPY %1
+ /// MI = %3 = COPY %2
+ MachineInstr *skipCopiesIncoming(MachineInstr *MI) const;
+
+ public:
+ AmbiguousRegDefUseContainer(const MachineInstr *MI);
+ SmallVectorImpl<MachineInstr *> &getDefUses() { return DefUses; }
+ SmallVectorImpl<MachineInstr *> &getUseDefs() { return UseDefs; }
+ };
+
+ class TypeInfoForMF {
+ /// MachineFunction name is used to recognise when MF changes.
+ std::string MFName = "";
+ /// <key, value> : value is vector of all MachineInstrs that are waiting for
+ /// key to figure out type of some of its ambiguous operands.
+ DenseMap<const MachineInstr *, SmallVector<const MachineInstr *, 2>>
+ WaitingQueues;
+ /// Recorded InstTypes for visited instructions.
+ DenseMap<const MachineInstr *, InstType> Types;
+
+ /// Recursively visit MI's adjacent instructions and find MI's InstType.
+ bool visit(const MachineInstr *MI, const MachineInstr *WaitingForTypeOfMI);
+
+ /// Visit MI's adjacent UseDefs or DefUses.
+ bool visitAdjacentInstrs(const MachineInstr *MI,
+ SmallVectorImpl<MachineInstr *> &AdjacentInstrs,
+ bool isDefUse);
+
+ /// Set type for MI, and recursively for all instructions that are
+ /// waiting for MI's type.
+ void setTypes(const MachineInstr *MI, InstType ITy);
+
+ /// InstType for MI is determined, set it to InstType that corresponds to
+ /// physical regisiter that is operand number Op in CopyInst.
+ void setTypesAccordingToPhysicalRegister(const MachineInstr *MI,
+ const MachineInstr *CopyInst,
+ unsigned Op);
+
+ /// Set default values for MI in order to start visit.
+ void startVisit(const MachineInstr *MI) {
+ Types.try_emplace(MI, InstType::NotDetermined);
+ WaitingQueues.try_emplace(MI);
+ }
+
+ /// Returns true if instruction was already visited. Type might not be
+ /// determined at this point but will be when visit(..., nullptr) finishes.
+ bool wasVisited(const MachineInstr *MI) const { return Types.count(MI); };
+
+ /// Returns recorded type for instruction.
+ const InstType &getRecordedTypeForInstr(const MachineInstr *MI) const {
+ assert(wasVisited(MI) && "Instruction was not visited!");
+ return Types.find(MI)->getSecond();
+ };
+
+ /// Change recorded type for instruction.
+ void changeRecordedTypeForInstr(const MachineInstr *MI, InstType InstTy) {
+ assert(wasVisited(MI) && "Instruction was not visited!");
+ Types.find(MI)->getSecond() = InstTy;
+ };
+
+ /// Returns WaitingQueue for instruction.
+ const SmallVectorImpl<const MachineInstr *> &
+ getWaitingQueueFor(const MachineInstr *MI) const {
+ assert(WaitingQueues.count(MI) && "Instruction was not visited!");
+ return WaitingQueues.find(MI)->getSecond();
+ };
+
+ /// Add WaitingForMI to MI's WaitingQueue.
+ void addToWaitingQueue(const MachineInstr *MI,
+ const MachineInstr *WaitingForMI) {
+ assert(WaitingQueues.count(MI) && "Instruction was not visited!");
+ WaitingQueues.find(MI)->getSecond().push_back(WaitingForMI);
+ };
+
+ public:
+ InstType determineInstType(const MachineInstr *MI);
+
+ void cleanupIfNewFunction(llvm::StringRef FunctionName);
+ };
};
} // end namespace llvm
#endif
diff --git a/lib/Target/Mips/MipsRegisterBanks.td b/lib/Target/Mips/MipsRegisterBanks.td
index 5f1687048fac..14a0181f8f11 100644
--- a/lib/Target/Mips/MipsRegisterBanks.td
+++ b/lib/Target/Mips/MipsRegisterBanks.td
@@ -1,9 +1,8 @@
//===- MipsRegisterBank.td ---------------------------------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -11,3 +10,5 @@
//===----------------------------------------------------------------------===//
def GPRBRegBank : RegisterBank<"GPRB", [GPR32]>;
+
+def FPRBRegBank : RegisterBank<"FPRB", [FGR64, AFGR64]>;
diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp
index 3c108c2ba9b7..7b02d126eb28 100644
--- a/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -1,9 +1,8 @@
//===- MipsRegisterInfo.cpp - MIPS Register Information -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -160,8 +159,6 @@ getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
const MipsSubtarget &Subtarget = MF.getSubtarget<MipsSubtarget>();
- using RegIter = TargetRegisterClass::const_iterator;
-
for (unsigned I = 0; I < array_lengthof(ReservedGPR32); ++I)
Reserved.set(ReservedGPR32[I]);
@@ -183,14 +180,12 @@ getReservedRegs(const MachineFunction &MF) const {
if (Subtarget.isFP64bit()) {
// Reserve all registers in AFGR64.
- for (RegIter Reg = Mips::AFGR64RegClass.begin(),
- EReg = Mips::AFGR64RegClass.end(); Reg != EReg; ++Reg)
- Reserved.set(*Reg);
+ for (MCPhysReg Reg : Mips::AFGR64RegClass)
+ Reserved.set(Reg);
} else {
// Reserve all registers in FGR64.
- for (RegIter Reg = Mips::FGR64RegClass.begin(),
- EReg = Mips::FGR64RegClass.end(); Reg != EReg; ++Reg)
- Reserved.set(*Reg);
+ for (MCPhysReg Reg : Mips::FGR64RegClass)
+ Reserved.set(Reg);
}
// Reserve FP if this function should have a dedicated frame pointer register.
if (Subtarget.getFrameLowering()->hasFP(MF)) {
@@ -222,14 +217,8 @@ getReservedRegs(const MachineFunction &MF) const {
Reserved.set(Mips::DSPOutFlag);
// Reserve MSA control registers.
- Reserved.set(Mips::MSAIR);
- Reserved.set(Mips::MSACSR);
- Reserved.set(Mips::MSAAccess);
- Reserved.set(Mips::MSASave);
- Reserved.set(Mips::MSAModify);
- Reserved.set(Mips::MSARequest);
- Reserved.set(Mips::MSAMap);
- Reserved.set(Mips::MSAUnmap);
+ for (MCPhysReg Reg : Mips::MSACtrlRegClass)
+ Reserved.set(Reg);
// Reserve RA if in mips16 mode.
if (Subtarget.inMips16Mode()) {
@@ -248,11 +237,6 @@ getReservedRegs(const MachineFunction &MF) const {
Reserved.set(Mips::GP_64);
}
- if (Subtarget.isABI_O32() && !Subtarget.useOddSPReg()) {
- for (const auto &Reg : Mips::OddSPRegClass)
- Reserved.set(Reg);
- }
-
return Reserved;
}
@@ -293,7 +277,7 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
eliminateFI(MI, FIOperandNum, FrameIndex, stackSize, spOffset);
}
-unsigned MipsRegisterInfo::
+Register MipsRegisterInfo::
getFrameRegister(const MachineFunction &MF) const {
const MipsSubtarget &Subtarget = MF.getSubtarget<MipsSubtarget>();
const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
@@ -322,8 +306,8 @@ bool MipsRegisterInfo::canRealignStack(const MachineFunction &MF) const {
unsigned FP = Subtarget.isGP32bit() ? Mips::FP : Mips::FP_64;
unsigned BP = Subtarget.isGP32bit() ? Mips::S7 : Mips::S7_64;
- // Support dynamic stack realignment only for targets with standard encoding.
- if (!Subtarget.hasStandardEncoding())
+ // Support dynamic stack realignment for all targets except Mips16.
+ if (Subtarget.inMips16Mode())
return false;
// We can't perform dynamic stack realignment if we can't reserve the
diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h
index b84aaad05eb5..4ed32b09718b 100644
--- a/lib/Target/Mips/MipsRegisterInfo.h
+++ b/lib/Target/Mips/MipsRegisterInfo.h
@@ -1,9 +1,8 @@
//===- MipsRegisterInfo.h - Mips Register Information Impl ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -70,7 +69,7 @@ public:
bool canRealignStack(const MachineFunction &MF) const override;
/// Debug information queries.
- unsigned getFrameRegister(const MachineFunction &MF) const override;
+ Register getFrameRegister(const MachineFunction &MF) const override;
/// Return GPR register class.
virtual const TargetRegisterClass *intRegClass(unsigned Size) const = 0;
diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td
index a943a0ad4094..8a6279da46b7 100644
--- a/lib/Target/Mips/MipsRegisterInfo.td
+++ b/lib/Target/Mips/MipsRegisterInfo.td
@@ -1,9 +1,8 @@
//===-- MipsRegisterInfo.td - Mips Register defs -----------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -259,6 +258,11 @@ let Namespace = "Mips" in {
def MSARequest : MipsReg<5, "5">;
def MSAMap : MipsReg<6, "6">;
def MSAUnmap : MipsReg<7, "7">;
+ // MSA-ASE fake control registers.
+ // These registers do not exist, but instructions like `cfcmsa`
+ // and `ctcmsa` allows to specify them.
+ foreach I = 8-31 in
+ def MSA#I : MipsReg<#I, ""#I>;
// Octeon multiplier and product registers
def MPL0 : MipsReg<0, "mpl0">;
@@ -383,10 +387,14 @@ def CPUSPReg : RegisterClass<"Mips", [i32], 32, (add SP)>, Unallocatable;
// 32bit fp:
// * FGR32 - 16 32-bit even registers
// * FGR32 - 32 32-bit registers (single float only mode)
-def FGR32 : RegisterClass<"Mips", [f32], 32, (sequence "F%u", 0, 31)>;
-
-def FGRH32 : RegisterClass<"Mips", [f32], 32, (sequence "F_HI%u", 0, 31)>,
- Unallocatable;
+def FGR32 : RegisterClass<"Mips", [f32], 32, (sequence "F%u", 0, 31)> {
+ // Do not allocate odd registers when given -mattr=+nooddspreg.
+ let AltOrders = [(decimate FGR32, 2)];
+ let AltOrderSelect = [{
+ const auto & S = MF.getSubtarget<MipsSubtarget>();
+ return S.isABI_O32() && !S.useOddSPReg();
+ }];
+}
def AFGR64 : RegisterClass<"Mips", [f64], 64, (add
// Return Values and Arguments
@@ -400,16 +408,14 @@ def AFGR64 : RegisterClass<"Mips", [f64], 64, (add
// Callee save
D10, D11, D12, D13, D14, D15)>;
-def FGR64 : RegisterClass<"Mips", [f64], 64, (sequence "D%u_64", 0, 31)>;
-
-// Used to reserve odd registers when given -mattr=+nooddspreg
-// FIXME: Remove double precision registers from this set.
-def OddSP : RegisterClass<"Mips", [f32], 32,
- (add (decimate (sequence "F%u", 1, 31), 2),
- (decimate (sequence "F_HI%u", 1, 31), 2),
- (decimate (sequence "D%u", 1, 15), 2),
- (decimate (sequence "D%u_64", 1, 31), 2))>,
- Unallocatable;
+def FGR64 : RegisterClass<"Mips", [f64], 64, (sequence "D%u_64", 0, 31)> {
+ // Do not allocate odd registers when given -mattr=+nooddspreg.
+ let AltOrders = [(decimate FGR64, 2)];
+ let AltOrderSelect = [{
+ const auto & S = MF.getSubtarget<MipsSubtarget>();
+ return S.isABI_O32() && !S.useOddSPReg();
+ }];
+}
// FP control registers.
def CCR : RegisterClass<"Mips", [i32], 32, (sequence "FCR%u", 0, 31)>,
@@ -437,7 +443,8 @@ def MSA128WEvens: RegisterClass<"Mips", [v4i32, v4f32], 128,
(decimate (sequence "W%u", 0, 31), 2)>;
def MSACtrl: RegisterClass<"Mips", [i32], 32, (add
- MSAIR, MSACSR, MSAAccess, MSASave, MSAModify, MSARequest, MSAMap, MSAUnmap)>;
+ MSAIR, MSACSR, MSAAccess, MSASave, MSAModify, MSARequest, MSAMap, MSAUnmap,
+ (sequence "MSA%u", 8, 31))>, Unallocatable;
// Hi/Lo Registers
def LO32 : RegisterClass<"Mips", [i32], 32, (add LO0)>;
@@ -591,11 +598,6 @@ def StrictlyFGR32AsmOperand : MipsAsmRegOperand {
let PredicateMethod = "isStrictlyFGRAsmReg";
}
-def FGRH32AsmOperand : MipsAsmRegOperand {
- let Name = "FGRH32AsmReg";
- let PredicateMethod = "isFGRAsmReg";
-}
-
def FCCRegsAsmOperand : MipsAsmRegOperand {
let Name = "FCCAsmReg";
}
@@ -703,10 +705,6 @@ def FGRCCOpnd : RegisterOperand<FGRCC> {
let ParserMatchClass = FGR32AsmOperand;
}
-def FGRH32Opnd : RegisterOperand<FGRH32> {
- let ParserMatchClass = FGRH32AsmOperand;
-}
-
def FCCRegsOpnd : RegisterOperand<FCC> {
let ParserMatchClass = FCCRegsAsmOperand;
}
diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp
index ef1b3c09bdc4..4c6cc1ef771c 100644
--- a/lib/Target/Mips/MipsSEFrameLowering.cpp
+++ b/lib/Target/Mips/MipsSEFrameLowering.cpp
@@ -1,9 +1,8 @@
//===- MipsSEFrameLowering.cpp - Mips32/64 Frame Information --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Mips/MipsSEFrameLowering.h b/lib/Target/Mips/MipsSEFrameLowering.h
index cb2119d6880b..78ffe161d9c6 100644
--- a/lib/Target/Mips/MipsSEFrameLowering.h
+++ b/lib/Target/Mips/MipsSEFrameLowering.h
@@ -1,9 +1,8 @@
//===- MipsSEFrameLowering.h - Mips32/64 frame lowering ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
index cf196b597278..703f99f37dd1 100644
--- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
@@ -1,9 +1,8 @@
//===-- MipsSEISelDAGToDAG.cpp - A Dag to Dag Inst Selector for MipsSE ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -76,18 +75,8 @@ void MipsSEDAGToDAGISel::addDSPCtrlRegOperands(bool IsDef, MachineInstr &MI,
}
unsigned MipsSEDAGToDAGISel::getMSACtrlReg(const SDValue RegIdx) const {
- switch (cast<ConstantSDNode>(RegIdx)->getZExtValue()) {
- default:
- llvm_unreachable("Could not map int to register");
- case 0: return Mips::MSAIR;
- case 1: return Mips::MSACSR;
- case 2: return Mips::MSAAccess;
- case 3: return Mips::MSASave;
- case 4: return Mips::MSAModify;
- case 5: return Mips::MSARequest;
- case 6: return Mips::MSAMap;
- case 7: return Mips::MSAUnmap;
- }
+ uint64_t RegNum = cast<ConstantSDNode>(RegIdx)->getZExtValue();
+ return Mips::MSACtrlRegClass.getRegister(RegNum);
}
bool MipsSEDAGToDAGISel::replaceUsesWithZeroReg(MachineRegisterInfo *MRI,
@@ -135,97 +124,8 @@ bool MipsSEDAGToDAGISel::replaceUsesWithZeroReg(MachineRegisterInfo *MRI,
return true;
}
-void MipsSEDAGToDAGISel::initGlobalBaseReg(MachineFunction &MF) {
- MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
-
- if (!MipsFI->globalBaseRegSet())
- return;
-
- MachineBasicBlock &MBB = MF.front();
- MachineBasicBlock::iterator I = MBB.begin();
- MachineRegisterInfo &RegInfo = MF.getRegInfo();
- const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
- DebugLoc DL;
- unsigned V0, V1, GlobalBaseReg = MipsFI->getGlobalBaseReg();
- const TargetRegisterClass *RC;
- const MipsABIInfo &ABI = static_cast<const MipsTargetMachine &>(TM).getABI();
- RC = (ABI.IsN64()) ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
-
- V0 = RegInfo.createVirtualRegister(RC);
- V1 = RegInfo.createVirtualRegister(RC);
-
- if (ABI.IsN64()) {
- MF.getRegInfo().addLiveIn(Mips::T9_64);
- MBB.addLiveIn(Mips::T9_64);
-
- // lui $v0, %hi(%neg(%gp_rel(fname)))
- // daddu $v1, $v0, $t9
- // daddiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname)))
- const GlobalValue *FName = &MF.getFunction();
- BuildMI(MBB, I, DL, TII.get(Mips::LUi64), V0)
- .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI);
- BuildMI(MBB, I, DL, TII.get(Mips::DADDu), V1).addReg(V0)
- .addReg(Mips::T9_64);
- BuildMI(MBB, I, DL, TII.get(Mips::DADDiu), GlobalBaseReg).addReg(V1)
- .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO);
- return;
- }
-
- if (!MF.getTarget().isPositionIndependent()) {
- // Set global register to __gnu_local_gp.
- //
- // lui $v0, %hi(__gnu_local_gp)
- // addiu $globalbasereg, $v0, %lo(__gnu_local_gp)
- BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0)
- .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_HI);
- BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V0)
- .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_LO);
- return;
- }
-
- MF.getRegInfo().addLiveIn(Mips::T9);
- MBB.addLiveIn(Mips::T9);
-
- if (ABI.IsN32()) {
- // lui $v0, %hi(%neg(%gp_rel(fname)))
- // addu $v1, $v0, $t9
- // addiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname)))
- const GlobalValue *FName = &MF.getFunction();
- BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0)
- .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI);
- BuildMI(MBB, I, DL, TII.get(Mips::ADDu), V1).addReg(V0).addReg(Mips::T9);
- BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V1)
- .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO);
- return;
- }
-
- assert(ABI.IsO32());
-
- // For O32 ABI, the following instruction sequence is emitted to initialize
- // the global base register:
- //
- // 0. lui $2, %hi(_gp_disp)
- // 1. addiu $2, $2, %lo(_gp_disp)
- // 2. addu $globalbasereg, $2, $t9
- //
- // We emit only the last instruction here.
- //
- // GNU linker requires that the first two instructions appear at the beginning
- // of a function and no instructions be inserted before or between them.
- // The two instructions are emitted during lowering to MC layer in order to
- // avoid any reordering.
- //
- // Register $2 (Mips::V0) is added to the list of live-in registers to ensure
- // the value instruction 1 (addiu) defines is valid when instruction 2 (addu)
- // reads it.
- MF.getRegInfo().addLiveIn(Mips::V0);
- MBB.addLiveIn(Mips::V0);
- BuildMI(MBB, I, DL, TII.get(Mips::ADDu), GlobalBaseReg)
- .addReg(Mips::V0).addReg(Mips::T9);
-}
-
void MipsSEDAGToDAGISel::processFunctionAfterISel(MachineFunction &MF) {
- initGlobalBaseReg(MF);
+ MF.getInfo<MipsFunctionInfo>()->initGlobalBaseReg();
MachineRegisterInfo *MRI = &MF.getRegInfo();
@@ -1337,6 +1237,7 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32));
return false;
case InlineAsm::Constraint_m:
+ case InlineAsm::Constraint_o:
if (selectAddrRegImm16(Op, Base, Offset)) {
OutOps.push_back(Base);
OutOps.push_back(Offset);
diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.h b/lib/Target/Mips/MipsSEISelDAGToDAG.h
index eb3657aae050..ce594e1fb4fa 100644
--- a/lib/Target/Mips/MipsSEISelDAGToDAG.h
+++ b/lib/Target/Mips/MipsSEISelDAGToDAG.h
@@ -1,9 +1,8 @@
//===-- MipsSEISelDAGToDAG.h - A Dag to Dag Inst Selector for MipsSE -----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -131,10 +130,6 @@ private:
void processFunctionAfterISel(MachineFunction &MF) override;
- // Insert instructions to initialize the global base register in the
- // first MBB of the function.
- void initGlobalBaseReg(MachineFunction &MF);
-
bool SelectInlineAsmMemoryOperand(const SDValue &Op,
unsigned ConstraintID,
std::vector<SDValue> &OutOps) override;
diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp
index a78e544c35f0..edf57a3840d1 100644
--- a/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -1,9 +1,8 @@
//===- MipsSEISelLowering.cpp - MipsSE DAG Lowering Interface -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -214,6 +213,11 @@ MipsSETargetLowering::MipsSETargetLowering(const MipsTargetMachine &TM,
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
+ if (Subtarget.hasMips32r2() && !Subtarget.useSoftFloat() &&
+ !Subtarget.hasMips64()) {
+ setOperationAction(ISD::BITCAST, MVT::i64, Custom);
+ }
+
if (NoDPLoadStore) {
setOperationAction(ISD::LOAD, MVT::f64, Custom);
setOperationAction(ISD::STORE, MVT::f64, Custom);
@@ -415,11 +419,8 @@ SDValue MipsSETargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
Op->getOperand(2));
}
-bool
-MipsSETargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
- unsigned,
- unsigned,
- bool *Fast) const {
+bool MipsSETargetLowering::allowsMisalignedMemoryAccesses(
+ EVT VT, unsigned, unsigned, MachineMemOperand::Flags, bool *Fast) const {
MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy;
if (Subtarget.systemSupportsUnalignedAccess()) {
@@ -463,6 +464,7 @@ SDValue MipsSETargetLowering::LowerOperation(SDValue Op,
case ISD::BUILD_VECTOR: return lowerBUILD_VECTOR(Op, DAG);
case ISD::VECTOR_SHUFFLE: return lowerVECTOR_SHUFFLE(Op, DAG);
case ISD::SELECT: return lowerSELECT(Op, DAG);
+ case ISD::BITCAST: return lowerBITCAST(Op, DAG);
}
return MipsTargetLowering::LowerOperation(Op, DAG);
@@ -714,8 +716,31 @@ static bool shouldTransformMulToShiftsAddsSubs(APInt C, EVT VT,
SelectionDAG &DAG,
const MipsSubtarget &Subtarget) {
// Estimate the number of operations the below transform will turn a
- // constant multiply into. The number is approximately how many powers
- // of two summed together that the constant can be broken down into.
+ // constant multiply into. The number is approximately equal to the minimal
+ // number of powers of two that constant can be broken down to by adding
+ // or subtracting them.
+ //
+ // If we have taken more than 12[1] / 8[2] steps to attempt the
+ // optimization for a native sized value, it is more than likely that this
+ // optimization will make things worse.
+ //
+ // [1] MIPS64 requires 6 instructions at most to materialize any constant,
+ // multiplication requires at least 4 cycles, but another cycle (or two)
+ // to retrieve the result from the HI/LO registers.
+ //
+ // [2] For MIPS32, more than 8 steps is expensive as the constant could be
+ // materialized in 2 instructions, multiplication requires at least 4
+ // cycles, but another cycle (or two) to retrieve the result from the
+ // HI/LO registers.
+ //
+ // TODO:
+ // - MaxSteps needs to consider the `VT` of the constant for the current
+ // target.
+ // - Consider to perform this optimization after type legalization.
+ // That allows to remove a workaround for types not supported natively.
+ // - Take in account `-Os, -Oz` flags because this optimization
+ // increases code size.
+ unsigned MaxSteps = Subtarget.isABI_O32() ? 8 : 12;
SmallVector<APInt, 16> WorkStack(1, C);
unsigned Steps = 0;
@@ -727,6 +752,9 @@ static bool shouldTransformMulToShiftsAddsSubs(APInt C, EVT VT,
if (Val == 0 || Val == 1)
continue;
+ if (Steps >= MaxSteps)
+ return false;
+
if (Val.isPowerOf2()) {
++Steps;
continue;
@@ -735,36 +763,15 @@ static bool shouldTransformMulToShiftsAddsSubs(APInt C, EVT VT,
APInt Floor = APInt(BitWidth, 1) << Val.logBase2();
APInt Ceil = Val.isNegative() ? APInt(BitWidth, 0)
: APInt(BitWidth, 1) << C.ceilLogBase2();
-
if ((Val - Floor).ule(Ceil - Val)) {
WorkStack.push_back(Floor);
WorkStack.push_back(Val - Floor);
- ++Steps;
- continue;
+ } else {
+ WorkStack.push_back(Ceil);
+ WorkStack.push_back(Ceil - Val);
}
- WorkStack.push_back(Ceil);
- WorkStack.push_back(Ceil - Val);
++Steps;
-
- // If we have taken more than 12[1] / 8[2] steps to attempt the
- // optimization for a native sized value, it is more than likely that this
- // optimization will make things worse.
- //
- // [1] MIPS64 requires 6 instructions at most to materialize any constant,
- // multiplication requires at least 4 cycles, but another cycle (or two)
- // to retrieve the result from the HI/LO registers.
- //
- // [2] For MIPS32, more than 8 steps is expensive as the constant could be
- // materialized in 2 instructions, multiplication requires at least 4
- // cycles, but another cycle (or two) to retrieve the result from the
- // HI/LO registers.
-
- if (Steps > 12 && (Subtarget.isABI_N32() || Subtarget.isABI_N64()))
- return false;
-
- if (Steps > 8 && Subtarget.isABI_O32())
- return false;
}
// If the value being multiplied is not supported natively, we have to pay
@@ -1221,6 +1228,36 @@ SDValue MipsSETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Nd.getMemOperand()->getFlags(), Nd.getAAInfo());
}
+SDValue MipsSETargetLowering::lowerBITCAST(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ MVT Src = Op.getOperand(0).getValueType().getSimpleVT();
+ MVT Dest = Op.getValueType().getSimpleVT();
+
+ // Bitcast i64 to double.
+ if (Src == MVT::i64 && Dest == MVT::f64) {
+ SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32,
+ Op.getOperand(0), DAG.getIntPtrConstant(0, DL));
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32,
+ Op.getOperand(0), DAG.getIntPtrConstant(1, DL));
+ return DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
+ }
+
+ // Bitcast double to i64.
+ if (Src == MVT::f64 && Dest == MVT::i64) {
+ SDValue Lo =
+ DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, Op.getOperand(0),
+ DAG.getConstant(0, DL, MVT::i32));
+ SDValue Hi =
+ DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, Op.getOperand(0),
+ DAG.getConstant(1, DL, MVT::i32));
+ return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi);
+ }
+
+ // Skip other cases of bitcast and use default lowering.
+ return SDValue();
+}
+
SDValue MipsSETargetLowering::lowerMulDiv(SDValue Op, unsigned NewOpc,
bool HasLo, bool HasHi,
SelectionDAG &DAG) const {
@@ -1379,9 +1416,10 @@ static SDValue lowerMSASplatZExt(SDValue Op, unsigned OpNr, SelectionDAG &DAG) {
static SDValue lowerMSASplatImm(SDValue Op, unsigned ImmOp, SelectionDAG &DAG,
bool IsSigned = false) {
+ auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp));
return DAG.getConstant(
APInt(Op->getValueType(0).getScalarType().getSizeInBits(),
- Op->getConstantOperandVal(ImmOp), IsSigned),
+ IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
SDLoc(Op), Op->getValueType(0));
}
@@ -3725,8 +3763,8 @@ MipsSETargetLowering::emitFPEXTEND_PSEUDO(MachineInstr &MI,
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
DebugLoc DL = MI.getDebugLoc();
- unsigned Fd = MI.getOperand(0).getReg();
- unsigned Ws = MI.getOperand(1).getReg();
+ Register Fd = MI.getOperand(0).getReg();
+ Register Ws = MI.getOperand(1).getReg();
MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
const TargetRegisterClass *GPRRC =
@@ -3734,10 +3772,10 @@ MipsSETargetLowering::emitFPEXTEND_PSEUDO(MachineInstr &MI,
unsigned MTC1Opc = IsFGR64onMips64
? Mips::DMTC1
: (IsFGR64onMips32 ? Mips::MTC1_D64 : Mips::MTC1);
- unsigned COPYOpc = IsFGR64onMips64 ? Mips::COPY_S_D : Mips::COPY_S_W;
+ Register COPYOpc = IsFGR64onMips64 ? Mips::COPY_S_D : Mips::COPY_S_W;
- unsigned Wtemp = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
- unsigned WPHI = Wtemp;
+ Register Wtemp = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
+ Register WPHI = Wtemp;
BuildMI(*BB, MI, DL, TII->get(Mips::FEXUPR_W), Wtemp).addReg(Ws);
if (IsFGR64) {
@@ -3746,15 +3784,15 @@ MipsSETargetLowering::emitFPEXTEND_PSEUDO(MachineInstr &MI,
}
// Perform the safety regclass copy mentioned above.
- unsigned Rtemp = RegInfo.createVirtualRegister(GPRRC);
- unsigned FPRPHI = IsFGR64onMips32
+ Register Rtemp = RegInfo.createVirtualRegister(GPRRC);
+ Register FPRPHI = IsFGR64onMips32
? RegInfo.createVirtualRegister(&Mips::FGR64RegClass)
: Fd;
BuildMI(*BB, MI, DL, TII->get(COPYOpc), Rtemp).addReg(WPHI).addImm(0);
BuildMI(*BB, MI, DL, TII->get(MTC1Opc), FPRPHI).addReg(Rtemp);
if (IsFGR64onMips32) {
- unsigned Rtemp2 = RegInfo.createVirtualRegister(GPRRC);
+ Register Rtemp2 = RegInfo.createVirtualRegister(GPRRC);
BuildMI(*BB, MI, DL, TII->get(Mips::COPY_S_W), Rtemp2)
.addReg(WPHI)
.addImm(1);
diff --git a/lib/Target/Mips/MipsSEISelLowering.h b/lib/Target/Mips/MipsSEISelLowering.h
index 761ff3b1fa4d..433d019332cf 100644
--- a/lib/Target/Mips/MipsSEISelLowering.h
+++ b/lib/Target/Mips/MipsSEISelLowering.h
@@ -1,9 +1,8 @@
//===- MipsSEISelLowering.h - MipsSE DAG Lowering Interface -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -41,9 +40,10 @@ class TargetRegisterClass;
void addMSAFloatType(MVT::SimpleValueType Ty,
const TargetRegisterClass *RC);
- bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS = 0,
- unsigned Align = 1,
- bool *Fast = nullptr) const override;
+ bool allowsMisalignedMemoryAccesses(
+ EVT VT, unsigned AS = 0, unsigned Align = 1,
+ MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
+ bool *Fast = nullptr) const override;
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
@@ -73,6 +73,7 @@ class TargetRegisterClass;
SDValue lowerLOAD(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerSTORE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerMulDiv(SDValue Op, unsigned NewOpc, bool HasLo, bool HasHi,
SelectionDAG &DAG) const;
diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp
index c7ab90ed2a3b..4e49f5e7d9d1 100644
--- a/lib/Target/Mips/MipsSEInstrInfo.cpp
+++ b/lib/Target/Mips/MipsSEInstrInfo.cpp
@@ -1,9 +1,8 @@
//===-- MipsSEInstrInfo.cpp - Mips32/64 Instruction Information -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -12,7 +11,7 @@
//===----------------------------------------------------------------------===//
#include "MipsSEInstrInfo.h"
-#include "InstPrinter/MipsInstPrinter.h"
+#include "MCTargetDesc/MipsInstPrinter.h"
#include "MipsAnalyzeImmediate.h"
#include "MipsMachineFunction.h"
#include "MipsTargetMachine.h"
@@ -447,6 +446,9 @@ bool MipsSEInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
case Mips::PseudoMTLOHI_DSP:
expandPseudoMTLoHi(MBB, MI, Mips::MTLO_DSP, Mips::MTHI_DSP, true);
break;
+ case Mips::PseudoMTLOHI_MM:
+ expandPseudoMTLoHi(MBB, MI, Mips::MTLO_MM, Mips::MTHI_MM, false);
+ break;
case Mips::PseudoCVT_S_W:
expandCvtFPInt(MBB, MI, Mips::CVT_S_W, Mips::MTC1, false);
break;
diff --git a/lib/Target/Mips/MipsSEInstrInfo.h b/lib/Target/Mips/MipsSEInstrInfo.h
index fce0fe5f58ad..3111d1c21a0a 100644
--- a/lib/Target/Mips/MipsSEInstrInfo.h
+++ b/lib/Target/Mips/MipsSEInstrInfo.h
@@ -1,9 +1,8 @@
//===-- MipsSEInstrInfo.h - Mips32/64 Instruction Information ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Mips/MipsSERegisterInfo.cpp b/lib/Target/Mips/MipsSERegisterInfo.cpp
index e7d720a4b769..f4b164d5c0ab 100644
--- a/lib/Target/Mips/MipsSERegisterInfo.cpp
+++ b/lib/Target/Mips/MipsSERegisterInfo.cpp
@@ -1,9 +1,8 @@
//===-- MipsSERegisterInfo.cpp - MIPS32/64 Register Information -== -------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Mips/MipsSERegisterInfo.h b/lib/Target/Mips/MipsSERegisterInfo.h
index ebae1909d233..82ddf40f56a7 100644
--- a/lib/Target/Mips/MipsSERegisterInfo.h
+++ b/lib/Target/Mips/MipsSERegisterInfo.h
@@ -1,9 +1,8 @@
//===-- MipsSERegisterInfo.h - Mips32/64 Register Information ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Mips/MipsSchedule.td b/lib/Target/Mips/MipsSchedule.td
index 410fa655a225..0c0ddeab22c4 100644
--- a/lib/Target/Mips/MipsSchedule.td
+++ b/lib/Target/Mips/MipsSchedule.td
@@ -1,9 +1,8 @@
//===-- MipsSchedule.td - Mips Scheduling Definitions ------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Mips/MipsScheduleGeneric.td b/lib/Target/Mips/MipsScheduleGeneric.td
index 80ffe7ada7c8..e8a0a30b8e9b 100644
--- a/lib/Target/Mips/MipsScheduleGeneric.td
+++ b/lib/Target/Mips/MipsScheduleGeneric.td
@@ -1,9 +1,8 @@
//=- MipsScheduleGeneric.td - Generic Scheduling Definitions -*- tablegen -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -25,11 +24,11 @@ def MipsGenericModel : SchedMachineModel {
int HighLatency = 37;
list<Predicate> UnsupportedFeatures = [];
- let CompleteModel = 0;
+ let CompleteModel = 1;
let PostRAScheduler = 1;
// FIXME: Remove when all errors have been fixed.
- let FullInstRWOverlapCheck = 0;
+ let FullInstRWOverlapCheck = 1;
}
let SchedModel = MipsGenericModel in {
@@ -42,35 +41,122 @@ def GenericIssueALU : ProcResource<1> { let Super = GenericALU; }
def GenericWriteALU : SchedWriteRes<[GenericIssueALU]>;
-// and, lui, nor, or, slti, sltiu, sub, subu, xor
-// add, addi, addiu, addu, andi, ori, rotr, se[bh], sllv?, sr[al]v?, slt, sltu,
-// xori
-def : ItinRW<[GenericWriteALU], [II_ADD, II_ADDU, II_ADDI, II_ADDIU, II_ANDI,
- II_AND, II_ANDI, II_CLO, II_CLZ, II_EXT,
- II_INS, II_LUI, II_MULT, II_MULTU, II_NOR,
- II_ORI, II_OR, II_ROTR, II_ROTRV, II_SEB,
- II_SEH, II_SLTI_SLTIU, II_SLT_SLTU, II_SLL,
- II_SRA, II_SRL, II_SLLV, II_SRAV, II_SRLV,
- II_SSNOP, II_SUB, II_SUBU, II_WSBH, II_XOR,
- II_XORI]>;
+// add, addi, addiu, addu, and, andi, clo, clz, ext, ins, lui, nor, or, ori,
+// rotr, rotrv, seb, seh, sll, sllv, slt, slti, sltiu, sltu, sra, srav, srl,
+// srlv, ssnop, sub, subu, wsbh, xor, xori
+def : InstRW<[GenericWriteALU], (instrs ADD, ADDi, ADDiu, ADDu, AND, ANDi,
+ CLO, CLZ, EXT, INS, LEA_ADDiu, LUi, NOP,
+ NOR, OR, ORi, ROTR, ROTRV, SEB, SEH, SLL,
+ SLLV, SLT, SLTi, SLTiu, SLTu, SRA, SRAV, SRL,
+ SRLV, SSNOP, SUB, SUBu, WSBH, XOR, XORi)>;
def : InstRW<[GenericWriteALU], (instrs COPY)>;
+// MIPSR6
+// ======
+
+// addiupc, align, aluipc, aui, auipc, bitswap, clo, clz, lsa, seleqz, selnez
+def : InstRW<[GenericWriteALU], (instrs ADDIUPC, ALIGN, ALUIPC, AUI,
+ AUIPC, BITSWAP, CLO_R6, CLZ_R6, LSA_R6,
+ SELEQZ, SELNEZ)>;
+
+// MIPS16e
+// =======
+
+def : InstRW<[GenericWriteALU], (instrs AddiuRxImmX16, AddiuRxRxImm16,
+ AddiuRxRxImmX16, AddiuRxRyOffMemX16,
+ AddiuRxPcImmX16, AddiuSpImm16, AddiuSpImmX16,
+ AdduRxRyRz16, AndRxRxRy16, CmpRxRy16,
+ CmpiRxImm16, CmpiRxImmX16, LiRxImm16,
+ LiRxImmX16, LiRxImmAlignX16, Move32R16,
+ MoveR3216, Mfhi16, Mflo16, NegRxRy16,
+ NotRxRy16, OrRxRxRy16, SebRx16, SehRx16,
+ SllX16, SllvRxRy16, SltiRxImm16,
+ SltiRxImmX16, SltiCCRxImmX16,
+ SltiuRxImm16, SltiuRxImmX16, SltiuCCRxImmX16,
+ SltRxRy16, SltCCRxRy16, SltuRxRy16,
+ SltuRxRyRz16, SltuCCRxRy16, SravRxRy16,
+ SraX16, SrlvRxRy16, SrlX16, SubuRxRyRz16,
+ XorRxRxRy16)>;
+
+def : InstRW<[GenericWriteALU], (instrs Constant32, LwConstant32,
+ GotPrologue16, CONSTPOOL_ENTRY)>;
+
+// microMIPS
+// =========
+
+def : InstRW<[GenericWriteALU], (instrs ADDIUPC_MM, ADDIUR1SP_MM, ADDIUR2_MM,
+ ADDIUS5_MM, ADDIUSP_MM, ADDU16_MM, ADD_MM,
+ ADDi_MM, ADDiu_MM, ADDu_MM, AND16_MM,
+ ANDI16_MM, AND_MM, ANDi_MM, CLO_MM, CLZ_MM,
+ EXT_MM, INS_MM, LEA_ADDiu_MM, LI16_MM,
+ LUi_MM, MOVE16_MM, MOVEP_MM, NOR_MM,
+ NOT16_MM, OR16_MM, OR_MM, ORi_MM, ROTRV_MM,
+ ROTR_MM, SEB_MM, SEH_MM, SLL16_MM, SLLV_MM,
+ SLL_MM, SLT_MM, SLTi_MM, SLTiu_MM, SLTu_MM,
+ SRAV_MM, SRA_MM, SRL16_MM, SRLV_MM, SRL_MM,
+ SSNOP_MM, SUBU16_MM, SUB_MM, SUBu_MM,
+ WSBH_MM, XOR16_MM, XOR_MM, XORi_MM)>;
+
+// microMIPS32r6
+// =============
+
+def : InstRW<[GenericWriteALU], (instrs ADDIUPC_MMR6, ADDIU_MMR6, ADDU16_MMR6,
+ ADDU_MMR6, ADD_MMR6, ALIGN_MMR6, ALUIPC_MMR6,
+ AND16_MMR6, ANDI16_MMR6, ANDI_MMR6, AND_MMR6,
+ AUIPC_MMR6, AUI_MMR6, BITSWAP_MMR6, CLO_MMR6,
+ CLZ_MMR6, EXT_MMR6, INS_MMR6, LI16_MMR6,
+ LSA_MMR6, LUI_MMR6, MOVE16_MMR6, NOR_MMR6,
+ NOT16_MMR6, OR16_MMR6, ORI_MMR6, OR_MMR6,
+ SELEQZ_MMR6, SELNEZ_MMR6, SLL16_MMR6,
+ SLL_MMR6, SRL16_MMR6, SSNOP_MMR6, SUBU16_MMR6,
+ SUBU_MMR6, SUB_MMR6, WSBH_MMR6, XOR16_MMR6,
+ XORI_MMR6, XOR_MMR6)>;
+
+// MIPS64
+// ======
+
+def : InstRW<[GenericWriteALU], (instrs AND64, ANDi64, DEXT64_32, DSLL64_32,
+ ORi64, SEB64, SEH64, SLL64_32, SLL64_64,
+ SLT64, SLTi64, SLTiu64, SLTu64, XOR64,
+ XORi64)>;
+
+def : InstRW<[GenericWriteALU], (instrs DADD, DADDi, DADDiu, DADDu, DCLO,
+ DCLZ, DEXT, DEXTM, DEXTU, DINS, DINSM, DINSU,
+ DROTR, DROTR32, DROTRV, DSBH, DSHD, DSLL,
+ DSLL32, DSLLV, DSRA, DSRA32, DSRAV, DSRL,
+ DSRL32, DSRLV, DSUB, DSUBu, LEA_ADDiu64,
+ LUi64, NOR64, OR64)>;
+
+// MIPS64R6
+// ========
+
+def : InstRW<[GenericWriteALU], (instrs DALIGN, DAHI, DATI, DAUI, DCLO_R6,
+ DCLZ_R6, DBITSWAP, DLSA, DLSA_R6, SELEQZ64,
+ SELNEZ64)>;
+
+
def GenericMDU : ProcResource<1> { let BufferSize = 1; }
def GenericIssueMDU : ProcResource<1> { let Super = GenericALU; }
def GenericIssueDIV : ProcResource<1> { let Super = GenericMDU; }
def GenericWriteHILO : SchedWriteRes<[GenericIssueMDU]>;
def GenericWriteALULong : SchedWriteRes<[GenericIssueALU]> { let Latency = 5; }
def GenericWriteMove : SchedWriteRes<[GenericIssueALU]> { let Latency = 2; }
+def GenericWriteMul : SchedWriteRes<[GenericIssueMDU]> { let Latency = 4; }
+
+def : InstRW<[GenericWriteHILO], (instrs MADD, MADDU, MSUB, MSUBU)>;
-def : ItinRW<[GenericWriteHILO], [II_MADD, II_MADDU, II_MSUB, II_MSUBU]>;
+def : InstRW<[GenericWriteHILO], (instrs PseudoMADD_MM, PseudoMADDU_MM,
+ PseudoMSUB_MM, PseudoMSUBU_MM,
+ PseudoMULT_MM, PseudoMULTu_MM)>;
+
+def : InstRW<[GenericWriteHILO], (instrs PseudoMADD, PseudoMADDU, PseudoMSUB,
+ PseudoMSUBU, PseudoMULT, PseudoMULTu)>;
def GenericWriteMDUtoGPR : SchedWriteRes<[GenericIssueMDU]> {
let Latency = 5;
}
-def : ItinRW<[GenericWriteMDUtoGPR], [II_MUL]>;
-
def GenericWriteDIV : SchedWriteRes<[GenericIssueDIV]> {
// Estimated worst case
let Latency = 33;
@@ -82,63 +168,105 @@ def GenericWriteDIVU : SchedWriteRes<[GenericIssueDIV]> {
let ResourceCycles = [31];
}
-def : ItinRW<[GenericWriteDIV], [II_DIV]>;
+// mul
+def : InstRW<[GenericWriteMDUtoGPR], (instrs MUL)>;
-def : ItinRW<[GenericWriteDIVU], [II_DIVU]>;
+// mult, multu
+def : InstRW<[GenericWriteMul], (instrs MULT, MULTu)>;
-// MIPS64
-// ======
+// div, sdiv
+def : InstRW<[GenericWriteDIV], (instrs PseudoSDIV, SDIV)>;
+
+def : InstRW<[GenericWriteDIVU], (instrs PseudoUDIV, UDIV)>;
+
+// mfhi, mflo, movn, mthi, mtlo, rdwhr
+def : InstRW<[GenericWriteALULong], (instrs MFHI, MFLO, PseudoMFHI,
+ PseudoMFLO)>;
+
+def : InstRW<[GenericWriteALULong], (instrs PseudoMFHI_MM, PseudoMFLO_MM)>;
-def : ItinRW<[GenericWriteALU], [II_DADDIU, II_DADDU, II_DADDI, II_DADD,
- II_DCLO, II_DCLZ, II_DROTR, II_DROTR32,
- II_DROTRV, II_DSBH, II_DSHD, II_DSLL,
- II_DSLL32, II_DSLLV, II_DSRA, II_DSRA32,
- II_DSRAV, II_DSRL, II_DSRL32, II_DSRLV,
- II_DSUBU, II_DSUB]>;
+def : InstRW<[GenericWriteMove], (instrs MTHI, MTLO, RDHWR, PseudoMTLOHI)>;
+def : InstRW<[GenericWriteMove], (instrs PseudoMTLOHI_MM)>;
-def : ItinRW<[GenericWriteDIV], [II_DDIV]>;
+def : InstRW<[GenericWriteALU], (instrs MOVN_I_I, MOVZ_I_I)>;
-def : ItinRW<[GenericWriteDIVU], [II_DDIVU]>;
+// MIPSR6
+// ======
-def : ItinRW<[GenericWriteMDUtoGPR], [II_DMUL]>;
+// muh, muhu, mulu, mul
+def : InstRW<[GenericWriteMul], (instrs MUH, MUHU, MULU, MUL_R6)>;
+
+// divu, udiv
+def : InstRW<[GenericWriteDIV], (instrs MOD, MODU, DIV, DIVU)>;
-def : ItinRW<[GenericWriteHILO], [II_DMULU, II_DMULT, II_DMULTU]>;
// MIPS16e
// =======
-def : ItinRW<[GenericWriteALU], [IIM16Alu, IIPseudo]>;
+def : InstRW<[GenericWriteHILO], (instrs MultRxRy16, MultuRxRy16,
+ MultRxRyRz16, MultuRxRyRz16)>;
+
+def : InstRW<[GenericWriteDIV], (instrs DivRxRy16)>;
+
+def : InstRW<[GenericWriteDIVU], (instrs DivuRxRy16)>;
// microMIPS
// =========
-def : ItinRW<[GenericWriteALU], [II_MOVE, II_LI, II_NOT]>;
+def : InstRW<[GenericWriteMul], (instrs MULT_MM, MULTu_MM, MADD_MM, MADDU_MM,
+ MSUB_MM, MSUBU_MM)>;
-// MIPSR6
+def : InstRW<[GenericWriteALULong], (instrs MUL_MM)>;
+
+def : InstRW<[GenericWriteDIV], (instrs SDIV_MM, SDIV_MM_Pseudo)>;
+
+def : InstRW<[GenericWriteDIVU], (instrs UDIV_MM, UDIV_MM_Pseudo)>;
+
+def : InstRW<[GenericWriteMove], (instrs MFHI16_MM, MFLO16_MM, MOVF_I_MM,
+ MOVT_I_MM, MFHI_MM, MFLO_MM, MTHI_MM,
+ MTLO_MM)>;
+
+def : InstRW<[GenericWriteMove], (instrs RDHWR_MM)>;
+
+// microMIPS32r6
+// =============
+
+def : InstRW<[GenericWriteMul], (instrs MUHU_MMR6, MUH_MMR6, MULU_MMR6,
+ MUL_MMR6)>;
+
+def : InstRW<[GenericWriteDIV], (instrs MODU_MMR6, MOD_MMR6, DIVU_MMR6,
+ DIV_MMR6)>;
+
+def : InstRW<[GenericWriteMove], (instrs RDHWR_MMR6)>;
+
+// MIPS64
// ======
-def GenericWriteMul : SchedWriteRes<[GenericIssueMDU]> { let Latency = 4; }
-def : ItinRW<[GenericWriteMul], [II_MUH, II_MUHU, II_MULU]>;
+def : InstRW<[GenericWriteHILO], (instrs DMULU, DMULT, DMULTu, PseudoDMULT,
+ PseudoDMULTu)>;
+
+def : InstRW<[GenericWriteDIV], (instrs DSDIV, PseudoDSDIV)>;
-def : ItinRW<[GenericWriteDIV], [II_MOD, II_MODU]>;
+def : InstRW<[GenericWriteDIVU], (instrs DUDIV, PseudoDUDIV)>;
+
+def : InstRW<[GenericWriteALULong], (instrs MFHI64, MFLO64, PseudoMFHI64,
+ PseudoMFLO64, PseudoMTLOHI64)>;
+
+def : InstRW<[GenericWriteMove], (instrs MTHI64, MTLO64, RDHWR64)>;
+
+// mov[zn]
+def : InstRW<[GenericWriteALU], (instrs MOVN_I_I64, MOVN_I64_I, MOVN_I64_I64,
+ MOVZ_I_I64, MOVZ_I64_I, MOVZ_I64_I64)>;
-def : ItinRW<[GenericWriteALU], [II_ADDIUPC, II_ALIGN, II_ALUIPC, II_AUI,
- II_AUIPC, II_BITSWAP, II_LSA, II_SELCCZ]>;
// MIPS64R6
// ========
-def : ItinRW<[GenericWriteALU], [II_DALIGN, II_DAHI, II_DATI, II_DAUI,
- II_DBITSWAP, II_DLSA]>;
-
-def : ItinRW<[GenericWriteMDUtoGPR], [II_DMUH, II_DMUHU]>;
-def : ItinRW<[GenericWriteDIV], [II_DMOD, II_DMODU]>;
+def : InstRW<[GenericWriteMDUtoGPR], (instrs DMUH, DMUHU, DMUL_R6)>;
-// clo, clz, di, mfhi, mflo
-def : ItinRW<[GenericWriteALULong], [II_MFHI_MFLO]>;
-def : ItinRW<[GenericWriteALU], [II_MOVN, II_MOVZ]>;
-def : ItinRW<[GenericWriteMove], [II_MTHI_MTLO, II_RDHWR]>;
+def : InstRW<[GenericWriteDIV], (instrs DDIV, DMOD)>;
+def : InstRW<[GenericWriteDIVU], (instrs DDIVU, DMODU)>;
// CTISTD Pipeline
// ---------------
@@ -155,31 +283,150 @@ def GenericWriteJumpAndLink : SchedWriteRes<[GenericIssueCTISTD]> {
// b, beq, beql, bg[et]z, bl[et]z, bne, bnel, j, syscall, jal, bltzal, jalx,
// jalr, jr.hb, jr, jalr.hb, jarlc, jialc
-def : ItinRW<[GenericWriteJump], [II_B, II_BCC, II_BCCZ, II_BCCZAL, II_J,
- II_JR, II_JR_HB, II_ERET, II_ERETNC,
- II_DERET]>;
+def : InstRW<[GenericWriteJump], (instrs B, BAL, BAL_BR, BEQ, BNE, BGTZ, BGEZ,
+ BLEZ, BLTZ, BLTZAL, J, JALX, JR, JR_HB, ERET,
+ ERet, ERETNC, DERET)>;
+
+def : InstRW<[GenericWriteJump], (instrs BEQL, BNEL, BGEZL, BGTZL, BLEZL,
+ BLTZL)>;
+
+def : InstRW<[GenericWriteJump], (instrs TAILCALL, TAILCALLREG,
+ TAILCALLREGHB, PseudoIndirectBranch,
+ PseudoIndirectHazardBranch, PseudoReturn,
+ RetRA)>;
+
+def : InstRW<[GenericWriteJumpAndLink], (instrs BGEZAL, JAL, JALR, JALR_HB,
+ JALRHBPseudo, JALRPseudo)>;
-def : ItinRW<[GenericWriteJumpAndLink], [II_JAL, II_JALR, II_JALR_HB,
- II_BC2CCZ]>;
+def : InstRW<[GenericWriteJumpAndLink], (instrs BGEZALL, BLTZALL)>;
-def : ItinRW<[GenericWriteJump], [II_JRC, II_JRADDIUSP]>;
+def GenericWriteTrap : SchedWriteRes<[GenericIssueCTISTD]>;
-def : ItinRW<[GenericWriteJumpAndLink], [II_BCCZALS, II_JALS, II_JALRS]>;
+def : InstRW<[GenericWriteTrap], (instrs BREAK, SYSCALL, TEQ, TEQI,
+ TGE, TGEI, TGEIU, TGEU, TNE,
+ TNEI, TLT, TLTI, TLTU, TTLTIU,
+ TRAP, SDBBP)>;
// MIPSR6
// ======
-def : ItinRW<[GenericWriteJumpAndLink], [II_BALC, II_JALRC, II_JIALC]>;
+def : InstRW<[GenericWriteJumpAndLink], (instrs BALC, BEQZALC, BGEZALC,
+ BGTZALC, BLEZALC, BLTZALC,
+ BNEZALC,
+ JIALC)>;
-def : ItinRW<[GenericWriteJump], [II_JIC, II_BC, II_BCCC, II_BCCZC]>;
+def : InstRW<[GenericWriteJump], (instrs BC, BC2EQZ, BC2NEZ, BEQC, BEQZC, BGEC,
+ BGEUC, BGEZC, BGTZC, BLEZC, BLTC, BLTUC,
+ BLTZC, BNEC, BNEZC, BNVC, BOVC, JIC, JR_HB_R6,
+ SIGRIE, PseudoIndirectBranchR6,
+ PseudoIndrectHazardBranchR6)>;
+def : InstRW<[GenericWriteJump], (instrs TAILCALLR6REG, TAILCALLHBR6REG)>;
-def GenericWriteTrap : SchedWriteRes<[GenericIssueCTISTD]>;
+def : InstRW<[GenericWriteTrap], (instrs SDBBP_R6)>;
+
+// MIPS16e
+// =======
+
+def : InstRW<[GenericWriteJump], (instrs Bimm16, BimmX16, BeqzRxImm16,
+ BeqzRxImmX16, BnezRxImm16, BnezRxImmX16,
+ Bteqz16, BteqzX16, BteqzT8CmpX16,
+ BteqzT8CmpiX16, BteqzT8SltX16,
+ BteqzT8SltuX16, BteqzT8SltiX16,
+ BteqzT8SltiuX16, Btnez16, BtnezX16,
+ BtnezT8CmpX16, BtnezT8CmpiX16,
+ BtnezT8SltX16, BtnezT8SltuX16,
+ BtnezT8SltiX16, BtnezT8SltiuX16, JrRa16,
+ JrcRa16, JrcRx16, RetRA16)>;
+
+def : InstRW<[GenericWriteJumpAndLink], (instrs Jal16, JalB16, JumpLinkReg16)>;
+
+def : InstRW<[GenericWriteTrap], (instrs Break16)>;
+
+def : InstRW<[GenericWriteALULong], (instrs SelBeqZ, SelTBteqZCmp,
+ SelTBteqZCmpi, SelTBteqZSlt,
+ SelTBteqZSlti, SelTBteqZSltu,
+ SelTBteqZSltiu, SelBneZ, SelTBtneZCmp,
+ SelTBtneZCmpi, SelTBtneZSlt,
+ SelTBtneZSlti, SelTBtneZSltu,
+ SelTBtneZSltiu)>;
+
+// microMIPS
+// =========
+
+def : InstRW<[GenericWriteJump], (instrs B16_MM, BAL_BR_MM, BC1F_MM, BC1T_MM,
+ BEQZ16_MM, BEQZC_MM, BEQ_MM, BGEZ_MM,
+ BGTZ_MM, BLEZ_MM, BLTZ_MM, BNEZ16_MM,
+ BNEZC_MM, BNE_MM, B_MM, DERET_MM, ERET_MM,
+ JR16_MM, JR_MM, J_MM, B_MM_Pseudo)>;
+
+def : InstRW<[GenericWriteJumpAndLink], (instrs BGEZALS_MM, BGEZAL_MM,
+ BLTZALS_MM, BLTZAL_MM, JALR16_MM,
+ JALRS16_MM, JALRS_MM, JALR_MM,
+ JALS_MM, JALX_MM, JAL_MM)>;
+
+def : InstRW<[GenericWriteJump], (instrs TAILCALLREG_MM, TAILCALL_MM,
+ PseudoIndirectBranch_MM)>;
+
+def : InstRW<[GenericWriteTrap], (instrs BREAK16_MM, BREAK_MM, SDBBP16_MM,
+ SDBBP_MM, SYSCALL_MM, TEQI_MM, TEQ_MM,
+ TGEIU_MM, TGEI_MM, TGEU_MM, TGE_MM, TLTIU_MM,
+ TLTI_MM, TLTU_MM, TLT_MM, TNEI_MM, TNE_MM,
+ TRAP_MM)>;
+
+// microMIPS32r6
+// =============
-def : ItinRW<[GenericWriteTrap], [II_BREAK, II_SYSCALL, II_TEQ, II_TEQI,
- II_TGE, II_TGEI, II_TGEIU, II_TGEU, II_TNE,
- II_TNEI, II_TLT, II_TLTI, II_TLTU, II_TTLTIU,
- II_TRAP, II_SDBBP, II_SIGRIE]>;
+def : InstRW<[GenericWriteJump], (instrs BC16_MMR6, BC1EQZC_MMR6, BC1NEZC_MMR6,
+ BC2EQZC_MMR6, BC2NEZC_MMR6, BC_MMR6,
+ BEQC_MMR6, BEQZC16_MMR6, BEQZC_MMR6,
+ BGEC_MMR6, BGEUC_MMR6, BGEZC_MMR6,
+ BGTZC_MMR6, BLEZC_MMR6, BLTC_MMR6,
+ BLTUC_MMR6, BLTZC_MMR6, BNEC_MMR6,
+ BNEZC16_MMR6, BNEZC_MMR6, BNVC_MMR6,
+ BOVC_MMR6, DERET_MMR6, ERETNC_MMR6, JAL_MMR6,
+ ERET_MMR6, JIC_MMR6, JRADDIUSP, JRC16_MM,
+ JRC16_MMR6, JRCADDIUSP_MMR6, SIGRIE_MMR6,
+ B_MMR6_Pseudo, PseudoIndirectBranch_MMR6)>;
+
+def : InstRW<[GenericWriteJumpAndLink], (instrs BALC_MMR6, BEQZALC_MMR6,
+ BGEZALC_MMR6, BGTZALC_MMR6,
+ BLEZALC_MMR6, BLTZALC_MMR6,
+ BNEZALC_MMR6, JALRC16_MMR6,
+ JALRC_HB_MMR6, JALRC_MMR6,
+ JIALC_MMR6)>;
+
+def : InstRW<[GenericWriteJump], (instrs TAILCALLREG_MMR6, TAILCALL_MMR6)>;
+
+def : InstRW<[GenericWriteTrap], (instrs BREAK16_MMR6, BREAK_MMR6, SDBBP_MMR6,
+ SDBBP16_MMR6)>;
+
+// MIPS64
+// ======
+
+def : InstRW<[GenericWriteJump], (instrs BEQ64, BGEZ64, BGTZ64, BLEZ64,
+ BLTZ64, BNE64, JR64)>;
+
+def : InstRW<[GenericWriteJumpAndLink], (instrs JALR64, JALR64Pseudo,
+ JALRHB64Pseudo, JALR_HB64)>;
+
+def : InstRW<[GenericWriteJump], (instrs JR_HB64, TAILCALLREG64,
+ TAILCALLREGHB64, PseudoReturn64)>;
+
+// MIPS64R6
+// ========
+
+def : InstRW<[GenericWriteJump], (instrs BEQC64, BEQZC64, BGEC64, BGEUC64,
+ BGEZC64, BGTZC64, BLEZC64, BLTC64, BLTUC64,
+ BLTZC64, BNEC64, BNEZC64, JIC64,
+ PseudoIndirectBranch64,
+ PseudoIndirectHazardBranch64)>;
+
+def : InstRW<[GenericWriteJumpAndLink], (instrs JIALC64)>;
+
+def : InstRW<[GenericWriteJump], (instrs JR_HB64_R6, TAILCALL64R6REG,
+ TAILCALLHB64R6REG, PseudoIndirectBranch64R6,
+ PseudoIndrectHazardBranch64R6)>;
// COP0 Pipeline
// =============
@@ -196,35 +443,100 @@ def GenericReadWriteCOP0Long : SchedWriteRes<[GenericIssueCOP0]> {
}
def GenericWriteCOP0Short : SchedWriteRes<[GenericIssueCOP0]>;
-def : ItinRW<[GenericWriteCOP0TLB], [II_TLBP, II_TLBR, II_TLBWI, II_TLBWR]>;
-def : ItinRW<[GenericWriteCOP0TLB], [II_TLBINV, II_TLBINVF]>;
+def : InstRW<[GenericWriteCOP0TLB], (instrs TLBP, TLBR, TLBWI, TLBWR)>;
+def : InstRW<[GenericWriteCOP0TLB], (instrs TLBINV, TLBINVF)>;
-def : ItinRW<[GenericReadCOP0], [II_MFC0]>;
-def : ItinRW<[GenericWriteCOP0], [II_MTC0]>;
+def : InstRW<[GenericReadCOP0], (instrs MFC0)>;
+def : InstRW<[GenericWriteCOP0], (instrs MTC0)>;
-def : ItinRW<[GenericWriteCOP0], [II_EVP, II_DVP]>;
+def : InstRW<[GenericWriteCOP0], (instrs EVP, DVP)>;
-// MIPSR5
-// ======
-def : ItinRW<[GenericReadCOP0], [II_MFHC0]>;
-def : ItinRW<[GenericWriteCOP0], [II_MTHC0]>;
+def : InstRW<[GenericWriteCOP0], (instrs DI, EI)>;
+
+def : InstRW<[GenericWriteCOP0], (instrs EHB, PAUSE, WAIT)>;
+
+// microMIPS
+// =========
+
+def : InstRW<[GenericWriteCOP0TLB], (instrs TLBP_MM, TLBR_MM, TLBWI_MM,
+ TLBWR_MM)>;
+
+def : InstRW<[GenericWriteCOP0], (instrs DI_MM, EI_MM)>;
+
+def : InstRW<[GenericWriteCOP0], (instrs EHB_MM, PAUSE_MM, WAIT_MM)>;
+
+
+// microMIPS32R6
+// =============
+
+def : InstRW<[GenericWriteCOP0], (instrs RDPGPR_MMR6, WRPGPR_MMR6)>;
+
+def : InstRW<[GenericWriteCOP0TLB], (instrs TLBINV_MMR6, TLBINVF_MMR6)>;
+
+def : InstRW<[GenericReadCOP0], (instrs MFHC0_MMR6, MFC0_MMR6, MFHC2_MMR6,
+ MFC2_MMR6)>;
+
+def : InstRW<[GenericWriteCOP0], (instrs MTHC0_MMR6, MTC0_MMR6, MTHC2_MMR6,
+ MTC2_MMR6)>;
+
+def : InstRW<[GenericWriteCOP0], (instrs EVP_MMR6, DVP_MMR6)>;
+
+def : InstRW<[GenericWriteCOP0], (instrs DI_MMR6, EI_MMR6)>;
+
+def : InstRW<[GenericWriteCOP0], (instrs EHB_MMR6, PAUSE_MMR6, WAIT_MMR6)>;
// MIPS64
// ======
-def : ItinRW<[GenericReadCOP0], [II_DMFC0]>;
-def : ItinRW<[GenericWriteCOP0], [II_DMTC0]>;
+def : InstRW<[GenericReadCOP0], (instrs DMFC0)>;
-def : ItinRW<[GenericWriteCOP0], [II_RDPGPR, II_WRPGPR]>;
+def : InstRW<[GenericWriteCOP0], (instrs DMTC0)>;
-def : ItinRW<[GenericWriteCOP0], [II_DI, II_EI]>;
-
-def : ItinRW<[GenericWriteCOP0], [II_EHB, II_PAUSE, II_WAIT]>;
def GenericCOP2 : ProcResource<1> { let BufferSize = 1; }
def GenericWriteCOPOther : SchedWriteRes<[GenericCOP2]>;
-def : ItinRW<[GenericWriteCOPOther], [II_MFC2, II_MTC2, II_DMFC2, II_DMTC2]>;
+def : InstRW<[GenericWriteCOPOther], (instrs MFC2, MTC2)>;
+
+def : InstRW<[GenericWriteCOPOther], (instrs DMFC2, DMTC2)>;
+
+// microMIPS32R6
+// =============
+
+// The latency and repeat rate of these instructions are implementation
+// dependant.
+def : InstRW<[GenericWriteMove], (instrs CFC2_MM, CTC2_MM)>;
+
+
+// MIPS MT ASE - hasMT
+// ====================
+
+def : InstRW<[GenericWriteMove], (instrs DMT, DVPE, EMT, EVPE, MFTR,
+ MTTR)>;
+
+def : InstRW<[GenericReadWriteCOP0Long], (instrs YIELD)>;
+
+def : InstRW<[GenericWriteCOP0Short], (instrs FORK)>;
+
+// MIPS Virtualization ASE
+// =======================
+
+def : InstRW<[GenericWriteCOP0Short], (instrs HYPCALL, TLBGINV, TLBGINVF, TLBGP,
+ TLBGR, TLBGWI, TLBGWR, MFGC0, MFHGC0,
+ MTGC0, MTHGC0)>;
+
+// MIPS64 Virtualization ASE
+// =========================
+
+def : InstRW<[GenericWriteCOP0Short], (instrs DMFGC0, DMTGC0)>;
+
+// microMIPS virtualization ASE
+// ============================
+
+def : InstRW<[GenericWriteCOP0Short], (instrs HYPCALL_MM, TLBGINVF_MM,
+ TLBGINV_MM, TLBGP_MM, TLBGR_MM,
+ TLBGWI_MM, TLBGWR_MM, MFGC0_MM,
+ MFHGC0_MM, MTGC0_MM, MTHGC0_MM)>;
// LDST Pipeline
// -------------
@@ -250,97 +562,168 @@ def GenericWriteLoadToOtherUnits : SchedWriteRes<[GenericIssueLDST]> {
}
// l[bhw], l[bh]u, ll
-def : ItinRW<[GenericWriteLoad], [II_LB, II_LBU, II_LH, II_LHU, II_LW, II_LL,
- II_LWC2, II_LWC3, II_LDC2, II_LDC3]>;
+def : InstRW<[GenericWriteLoad], (instrs LB, LBu, LH, LHu, LW, LL,
+ LWC2, LWC3, LDC2, LDC3)>;
// lw[lr]
-def : ItinRW<[GenericWriteLoad], [II_LWL, II_LWR]>;
+def : InstRW<[GenericWriteLoad], (instrs LWL, LWR)>;
-// MIPS64 loads
-def : ItinRW<[GenericWriteLoad], [II_LD, II_LLD, II_LWU]>;
+// s[bhw], sc, s[dw]c[23]
+def : InstRW<[GenericWriteStore], (instrs SB, SH, SW, SWC2, SWC3,
+ SDC2, SDC3)>;
-// ld[lr]
-def : ItinRW<[GenericWriteLoad], [II_LDL, II_LDR]>;
+// PreMIPSR6 sw[lr]
+def : InstRW<[GenericWriteStore], (instrs SWL, SWR)>;
-// MIPS32 EVA
-def : ItinRW<[GenericWriteLoad], [II_LBE, II_LBUE, II_LHE, II_LHUE, II_LWE,
- II_LLE]>;
+def : InstRW<[GenericWriteStoreSC], (instrs SC, SC_MMR6)>;
-def : ItinRW<[GenericWriteLoad], [II_LWLE, II_LWRE]>;
+// pref
+def : InstRW<[GenericWritePref], (instrs PREF)>;
+// cache
+def : InstRW<[GenericWriteCache], (instrs CACHE)>;
-// MIPS MT instructions
-// ====================
+// sync
+def : InstRW<[GenericWriteSync], (instrs SYNC, SYNCI)>;
-def : ItinRW<[GenericWriteMove], [II_DMT, II_DVPE, II_EMT, II_EVPE, II_MFTR,
- II_MTTR]>;
+// MIPSR6
+// ======
-def : ItinRW<[GenericReadWriteCOP0Long], [II_YIELD]>;
+def : InstRW<[GenericWriteLoad], (instrs LDC2_R6, LL_R6, LWC2_R6, LWPC)>;
-def : ItinRW<[GenericWriteCOP0Short], [II_FORK]>;
+def : InstRW<[GenericWriteStore], (instrs SWC2_R6, SDC2_R6)>;
-// MIPS32R6 and MIPS16e
-// ====================
+def : InstRW<[GenericWriteStoreSC], (instrs SC_R6)>;
-def : ItinRW<[GenericWriteLoad], [II_LWPC]>;
+def : InstRW<[GenericWritePref], (instrs PREF_R6)>;
-// MIPS64R6
-// ====================
+def : InstRW<[GenericWriteCache], (instrs CACHE_R6)>;
+
+def : InstRW<[GenericWriteSync], (instrs GINVI, GINVT)>;
-def : ItinRW<[GenericWriteLoad], [II_LWUPC, II_LDPC]>;
+// MIPS32 EVA
+// ==========
+def : InstRW<[GenericWriteLoad], (instrs LBE, LBuE, LHE, LHuE, LWE,
+ LLE)>;
-// s[bhw], sc, s[dw]c[23]
-def : ItinRW<[GenericWriteStore], [II_SB, II_SH, II_SW, II_SWC2, II_SWC3,
- II_SDC2, II_SDC3]>;
+def : InstRW<[GenericWriteStore], (instrs SBE, SHE, SWE, SCE)>;
-def : ItinRW<[GenericWriteStoreSC], [II_SC]>;
+def : InstRW<[GenericWriteLoad], (instrs LWLE, LWRE)>;
-// PreMIPSR6 sw[lr]
-def : ItinRW<[GenericWriteStore], [II_SWL, II_SWR]>;
+def : InstRW<[GenericWriteStore], (instrs SWLE, SWRE)>;
-// EVA ASE stores
-def : ItinRW<[GenericWriteStore], [II_SBE, II_SHE, II_SWE, II_SCE]>;
+def : InstRW<[GenericWritePref], (instrs PREFE)>;
-def : ItinRW<[GenericWriteStore], [II_SWLE, II_SWRE]>;
+def : InstRW<[GenericWriteCache], (instrs CACHEE)>;
-// MIPS64
-// ======
+// microMIPS EVA ASE - InMicroMipsMode, hasEVA
+// ===========================================
-def : ItinRW<[GenericWriteStore], [II_SD, II_SCD]>;
+def : InstRW<[GenericWriteLoad], (instrs LBE_MM, LBuE_MM, LHE_MM, LHuE_MM,
+ LWE_MM, LWLE_MM, LWRE_MM, LLE_MM)>;
-// PreMIPSR6 stores
-// ================
+def : InstRW<[GenericWriteStore], (instrs SBE_MM, SB_MM, SHE_MM, SWE_MM,
+ SWLE_MM, SWRE_MM, SCE_MM)>;
+
+def : InstRW<[GenericWritePref], (instrs PREFE_MM)>;
+def : InstRW<[GenericWriteCache], (instrs CACHEE_MM)>;
-def : ItinRW<[GenericWriteStore], [II_SDL, II_SDR]>;
// MIPS16e
// =======
-def : ItinRW<[GenericWriteLoad], [II_RESTORE]>;
+def : InstRW<[GenericWriteLoad], (instrs Restore16, RestoreX16,
+ LbRxRyOffMemX16,
+ LbuRxRyOffMemX16, LhRxRyOffMemX16,
+ LhuRxRyOffMemX16, LwRxRyOffMemX16,
+ LwRxSpImmX16, LwRxPcTcp16, LwRxPcTcpX16)>;
-def : ItinRW<[GenericWriteStore], [II_SAVE]>;
+def : InstRW<[GenericWriteStore], (instrs Save16, SaveX16, SbRxRyOffMemX16,
+ ShRxRyOffMemX16, SwRxRyOffMemX16,
+ SwRxSpImmX16)>;
// microMIPS
// =========
-def : ItinRW<[GenericWriteLoad], [II_LWM, II_LWP, II_LWXS]>;
+def : InstRW<[GenericWriteLoad], (instrs LBU16_MM, LB_MM, LBu_MM, LHU16_MM,
+ LH_MM, LHu_MM, LL_MM, LW16_MM, LWGP_MM,
+ LWL_MM, LWM16_MM, LWM32_MM, LWP_MM, LWR_MM,
+ LWSP_MM, LWU_MM, LWXS_MM, LW_MM)>;
-def : ItinRW<[GenericWriteStore], [II_SWM, II_SWP]>;
+def : InstRW<[GenericWriteStore], (instrs SB16_MM, SC_MM, SH16_MM, SH_MM,
+ SW16_MM, SWL_MM, SWM16_MM, SWM32_MM, SWM_MM,
+ SWP_MM, SWR_MM, SWSP_MM, SW_MM)>;
-// pref
-def : ItinRW<[GenericWritePref], [II_PREF]>;
-def : ItinRW<[GenericWritePref], [II_PREFE]>;
+def : InstRW<[GenericWritePref], (instrs PREF_MM, PREFX_MM)>;
-// cache
-def : ItinRW<[GenericWriteCache], [II_CACHE]>;
+def : InstRW<[GenericWriteCache], (instrs CACHE_MM)>;
-def : ItinRW<[GenericWriteCache], [II_CACHEE]>;
+def : InstRW<[GenericWriteSync], (instrs SYNC_MM, SYNCI_MM)>;
+def : InstRW<[GenericWriteSync], (instrs GINVI_MMR6, GINVT_MMR6)>;
-// sync
-def : ItinRW<[GenericWriteSync], [II_SYNC]>;
+// microMIPS32r6
+// =============
+
+def : InstRW<[GenericWriteLoad], (instrs LBU_MMR6, LB_MMR6, LDC2_MMR6, LL_MMR6,
+ LWM16_MMR6, LWC2_MMR6, LWPC_MMR6, LW_MMR6)>;
+
+def : InstRW<[GenericWriteStore], (instrs SB16_MMR6, SB_MMR6, SDC2_MMR6,
+ SH16_MMR6, SH_MMR6, SW16_MMR6, SWC2_MMR6,
+ SWM16_MMR6, SWSP_MMR6, SW_MMR6)>;
+
+def : InstRW<[GenericWriteSync], (instrs SYNC_MMR6, SYNCI_MMR6)>;
+
+def : InstRW<[GenericWritePref], (instrs PREF_MMR6)>;
-def : ItinRW<[GenericWriteSync], [II_SYNCI]>;
+def : InstRW<[GenericWriteCache], (instrs CACHE_MMR6)>;
+
+// MIPS64
+// ======
+
+def : InstRW<[GenericWriteLoad], (instrs LD, LL64, LLD, LWu, LB64, LBu64,
+ LH64, LHu64, LW64)>;
+
+// l[dw][lr]
+def : InstRW<[GenericWriteLoad], (instrs LWL64, LWR64, LDL, LDR)>;
+
+def : InstRW<[GenericWriteStore], (instrs SD, SC64, SCD, SB64, SH64, SW64,
+ SWL64, SWR64)>;
+
+def : InstRW<[GenericWriteStore], (instrs SDL, SDR)>;
+
+// MIPS64R6
+// ========
+
+def : InstRW<[GenericWriteLoad], (instrs LWUPC, LDPC)>;
+
+def : InstRW<[GenericWriteLoad], (instrs LLD_R6, LL64_R6)>;
+
+def : InstRW<[GenericWriteStoreSC], (instrs SC64_R6, SCD_R6)>;
+
+// MIPSR6 CRC ASE - hasCRC
+// =======================
+
+def : InstRW<[GenericWriteALU], (instrs CRC32B, CRC32H, CRC32W, CRC32CB,
+ CRC32CH, CRC32CW)>;
+
+// MIPS64R6 CRC ASE - hasCRC
+// -------------------------
+
+def : InstRW<[GenericWriteALU], (instrs CRC32D, CRC32CD)>;
+
+
+// Cavium Networks MIPS (cnMIPS) - Octeon, HasCnMips
+// =================================================
+
+def : InstRW<[GenericWriteALU], (instrs BADDu, BBIT0, BBIT032, BBIT1, BBIT132,
+ CINS, CINS32, CINS64_32, CINS_i32,
+ DMFC2_OCTEON, DMTC2_OCTEON, DPOP, EXTS,
+ EXTS32, MTM0, MTM1, MTM2, MTP0, MTP1, MTP2,
+ POP, SEQ, SEQi, SNE, SNEi, V3MULU, VMM0,
+ VMULU)>;
+
+def : InstRW<[GenericWriteMDUtoGPR], (instrs DMUL)>;
// FPU Pipelines
// =============
@@ -408,10 +791,10 @@ def GenericWriteFPUSqrtD : SchedWriteRes<[GenericFPUDivSqrt]> {
// ---------------------------------
//
// c.<cc>.[ds], bc1[tf], bc1[tf]l
-def : ItinRW<[GenericWriteFPUCmp], [II_C_CC_D, II_C_CC_S, II_BC1F, II_BC1T,
- II_BC1FL, II_BC1TL]>;
+def : InstRW<[GenericWriteFPUCmp], (instrs FCMP_D32, FCMP_D64, FCMP_S32, BC1F,
+ BC1T, BC1FL, BC1TL)>;
-def : ItinRW<[GenericWriteFPUCmp], [II_CMP_CC_D, II_CMP_CC_S]>;
+def : InstRW<[GenericWriteFPUCmp], (instregex "C_[A-Z]+_(S|D32|D64)$")>;
// Short Pipe
// ----------
@@ -419,21 +802,10 @@ def : ItinRW<[GenericWriteFPUCmp], [II_CMP_CC_D, II_CMP_CC_S]>;
// abs.[ds], abs.ps, add.[ds], neg.[ds], neg.ps, madd.s, msub.s, nmadd,s
// nmsub.s, sub.[ds], mul.s
-def : ItinRW<[GenericWriteFPUS], [II_ABS, II_ADD_D, II_ADD_S, II_MADD_S,
- II_MSUB_S, II_MUL_S, II_NEG, II_NMADD_S,
- II_NMSUB_S, II_SUB_S, II_SUB_D]>;
-// mov[tf].[ds]
-
-def : ItinRW<[GenericWriteFPUS], [II_MOVF_S, II_MOVF_D, II_MOVT_S, II_MOVT_D]>;
-
-// MIPSR6
-// ------
-//
-// sel(eq|ne).[ds], max.[ds], maxa.[ds], min.[ds], mina.[ds], class.[ds]
-def : ItinRW<[GenericWriteFPUS], [II_SELCCZ_S, II_SELCCZ_D, II_MAX_S,
- II_MAX_D, II_MAXA_S, II_MAXA_D, II_MIN_S,
- II_MIN_D, II_MINA_S, II_MINA_D, II_CLASS_S,
- II_CLASS_D]>;
+def : InstRW<[GenericWriteFPUS], (instrs FABS_S, FABS_D32, FABS_D64, FADD_D32,
+ FADD_D64, FADD_S, MADD_S, MSUB_S, FMUL_S,
+ FNEG_S, FNEG_D32, FNEG_D64, NMADD_S, NMSUB_S,
+ FSUB_S, FSUB_D32, FSUB_D64)>;
// Long Pipe
// ----------
@@ -445,71 +817,211 @@ def : ItinRW<[GenericWriteFPUS], [II_SELCCZ_S, II_SELCCZ_D, II_MAX_S,
// madd.d, msub.dm mul.d, mul.ps, nmadd.d, nmsub.d, ceil.[wl].[sd], cvt.d.[sw],
// cvt.s.[dw], cvt.w.[sd], cvt.[sw].ps, round.[lw].[ds], floor.[lw].ds,
// trunc.w.[ds], trunc.w.ps,
-def : ItinRW<[GenericWriteFPUL], [II_MADD_D, II_MSUB_D, II_MUL_D, II_NMADD_D,
- II_NMSUB_D, II_CEIL, II_CVT,
- II_FLOOR, II_ROUND, II_TRUNC]>;
+def : InstRW<[GenericWriteFPUL], (instrs CEIL_L_D64, CEIL_L_S, CEIL_W_D32,
+ CEIL_W_D64, CEIL_W_S, CVT_D32_S, CVT_D32_W,
+ CVT_D64_L, CVT_D64_S, CVT_D64_W, CVT_L_D64,
+ CVT_L_S, CVT_S_D32, CVT_S_D64, CVT_S_L,
+ CVT_S_W, CVT_W_D32, CVT_W_D64, CVT_W_S,
+ CVT_PS_S64, CVT_S_PL64, CVT_S_PU64,
+ FLOOR_L_D64, FLOOR_L_S, FLOOR_W_D32,
+ FLOOR_W_D64, FLOOR_W_S, FMUL_D32, FMUL_D64,
+ MADD_D32, MADD_D64, MSUB_D32, MSUB_D64,
+ NMADD_D32, NMADD_D64, NMSUB_D32, NMSUB_D64,
+ PLL_PS64, PLU_PS64,
+ ROUND_L_D64, ROUND_L_S, ROUND_W_D32,
+ ROUND_W_D64, ROUND_W_S, TRUNC_L_D64,
+ TRUNC_L_S, TRUNC_W_D32, TRUNC_W_D64,
+ TRUNC_W_S, PseudoTRUNC_W_D,
+ PseudoTRUNC_W_D32, PseudoTRUNC_W_S)>;
+
+// Pseudo convert instruction
+def : InstRW<[GenericWriteFPUL], (instrs PseudoCVT_D32_W, PseudoCVT_D64_L,
+ PseudoCVT_D64_W, PseudoCVT_S_L,
+ PseudoCVT_S_W)>;
// div.[ds], div.ps
-def : ItinRW<[GenericWriteFPUDivS], [II_DIV_S]>;
-def : ItinRW<[GenericWriteFPUDivD], [II_DIV_D]>;
+def : InstRW<[GenericWriteFPUDivS], (instrs FDIV_S)>;
+def : InstRW<[GenericWriteFPUDivD], (instrs FDIV_D32, FDIV_D64)>;
// sqrt.[ds], sqrt.ps
-def : ItinRW<[GenericWriteFPUSqrtS], [II_SQRT_S]>;
-def : ItinRW<[GenericWriteFPUSqrtD], [II_SQRT_D]>;
+def : InstRW<[GenericWriteFPUSqrtS], (instrs FSQRT_S)>;
+def : InstRW<[GenericWriteFPUSqrtD], (instrs FSQRT_D32, FSQRT_D64)>;
// rsqrt.[ds], recip.[ds]
-def : ItinRW<[GenericWriteFPURcpS], [II_RECIP_S, II_RSQRT_S]>;
-def : ItinRW<[GenericWriteFPURcpD], [II_RECIP_D, II_RSQRT_D]>;
+def : InstRW<[GenericWriteFPURcpS], (instrs RECIP_S, RSQRT_S)>;
+def : InstRW<[GenericWriteFPURcpD], (instrs RECIP_D32, RECIP_D64,
+ RSQRT_D32, RSQRT_D64)>;
-// MIPSR6
-// ======
-//
-// rint.[ds]
-def : ItinRW<[GenericWriteFPUL], [II_RINT_S, II_RINT_D]>;
// Load Pipe
// ---------
// ctc1, mtc1, mthc1, cfc1, mfc1, mfhc1
-def : ItinRW<[GenericWriteFPUMoveGPRFPU], [II_CFC1, II_CTC1, II_MFC1, II_MFHC1,
- II_MTC1, II_MTHC1]>;
+def : InstRW<[GenericWriteFPUMoveGPRFPU], (instrs BuildPairF64,
+ BuildPairF64_64, ExtractElementF64,
+ ExtractElementF64_64, CFC1, CTC1,
+ MFC1, MFC1_D64, MFHC1_D32,
+ MFHC1_D64, MTC1, MTC1_D64,
+ MTHC1_D32, MTHC1_D64)>;
// swc1, swxc1
-def : ItinRW<[GenericWriteFPUStore], [II_SDC1, II_SDXC1, II_SUXC1, II_SWC1,
- II_SWXC1]>;
+def : InstRW<[GenericWriteFPUStore], (instrs SDC1, SDC164, SDXC1, SDXC164,
+ SUXC1, SUXC164, SWC1, SWXC1)>;
+
+def : InstRW<[GenericWriteFPUMoveFP], (instrs FMOV_D32, FMOV_D64, FMOV_S)>;
+
// movn.[ds], movz.[ds]
-def : ItinRW<[GenericWriteFPUMoveFP], [II_MOV_D, II_MOV_S, II_MOVF, II_MOVT,
- II_MOVN_D, II_MOVN_S, II_MOVZ_D,
- II_MOVZ_S]>;
+def : InstRW<[GenericWriteFPUMoveFP], (instrs MOVF_I, MOVF_D32, MOVF_D64,
+ MOVF_S, MOVT_I, MOVT_D32, MOVT_D64,
+ MOVT_S, MOVN_I_D32, MOVN_I_D64,
+ MOVN_I_S, MOVZ_I_D32, MOVZ_I_D64,
+ MOVZ_I_S)>;
+
+def : InstRW<[GenericWriteFPUMoveFP], (instrs MOVT_I64, MOVF_I64, MOVZ_I64_S,
+ MOVN_I64_D64, MOVN_I64_S,
+ MOVZ_I64_D64)>;
// l[dw]x?c1
-def : ItinRW<[GenericWriteFPULoad], [II_LDC1, II_LDXC1, II_LUXC1, II_LWC1,
- II_LWXC1]>;
+def : InstRW<[GenericWriteFPULoad], (instrs LDC1, LDC164, LDXC1, LDXC164,
+ LUXC1, LUXC164, LWC1, LWXC1)>;
-// MIPS64
+// MIPSR6
// ======
-def : ItinRW<[GenericWriteFPUMoveGPRFPU], [II_DMFC1, II_DMTC1]>;
+// sel(eq|ne).[ds], max.[ds], maxa.[ds], min.[ds], mina.[ds], class.[ds]
+def : InstRW<[GenericWriteFPUS], (instrs SELEQZ_S, SELNEZ_S, SELEQZ_D, SELNEZ_D,
+ MAX_S, MAX_D, MAXA_S, MAXA_D, MIN_S, MIN_D,
+ MINA_S, MINA_D, CLASS_S, CLASS_D)>;
-// MIPSR6
-// ======
+def : InstRW<[GenericWriteFPUL], (instrs RINT_S, RINT_D)>;
-def : ItinRW<[GenericWriteFPUS], [II_MADDF_S, II_MSUBF_S]>;
+def : InstRW<[GenericWriteFPUCmp], (instrs BC1EQZ, BC1NEZ, SEL_D, SEL_S)>;
-def : ItinRW<[GenericWriteFPUS], [II_MADDF_D, II_MSUBF_D]>;
+def : InstRW<[GenericWriteFPUS], (instrs MADDF_S, MSUBF_S, MADDF_D, MSUBF_D)>;
-def : ItinRW<[GenericWriteFPUCmp], [II_BC1CCZ, II_SEL_D, II_SEL_S]>;
-// Cavium Networks MIPS (cnMIPS) - Octeon, HasCnMips
-// =================================================
+// microMIPS
+// =========
+
+def : InstRW<[GenericWriteFPUMoveFP], (instrs MOVF_D32_MM, MOVF_S_MM,
+ MOVN_I_D32_MM, MOVN_I_S_MM,
+ MOVT_D32_MM, MOVT_S_MM, MOVZ_I_D32_MM,
+ MOVZ_I_S_MM)>;
+
+
+// cvt.?.?, ceil.?, floor.?, round.?, trunc.? (n)madd.? (n)msub.?
+def : InstRW<[GenericWriteFPUL], (instrs CVT_D32_S_MM, CVT_D32_W_MM,
+ CVT_D64_S_MM, CVT_D64_W_MM, CVT_L_D64_MM,
+ CVT_L_S_MM, CVT_S_D32_MM, CVT_S_D64_MM,
+ CVT_S_W_MM, CVT_W_D32_MM, CVT_W_D64_MM,
+ CVT_W_S_MM, CEIL_W_MM, CEIL_W_S_MM,
+ FLOOR_W_MM, FLOOR_W_S_MM, NMADD_S_MM,
+ NMADD_D32_MM, NMSUB_S_MM, NMSUB_D32_MM,
+ MADD_S_MM, MADD_D32_MM, ROUND_W_MM,
+ ROUND_W_S_MM, TRUNC_W_MM, TRUNC_W_S_MM)>;
+
+def : InstRW<[GenericWriteFPUCmp], (instregex "^C_[A-Z]_(S|D32|D64)_MM$")>;
+def : InstRW<[GenericWriteFPUCmp], (instregex "^C_[A-Z][A-Z]_(S|D32|D64)_MM$")>;
+def : InstRW<[GenericWriteFPUCmp], (instregex "^C_[A-Z][A-Z][A-Z]_(S|D32|D64)_MM$")>;
+def : InstRW<[GenericWriteFPUCmp], (instregex "^C_NGLE_(S|D32|D64)_MM$")>;
+def : InstRW<[GenericWriteFPUCmp], (instrs FCMP_S32_MM, FCMP_D32_MM)>;
+
+def : InstRW<[GenericWriteFPUS], (instrs MFC1_MM, MFHC1_D32_MM, MFHC1_D64_MM,
+ MTC1_MM, MTC1_D64_MM,
+ MTHC1_D32_MM, MTHC1_D64_MM)>;
+
+def : InstRW<[GenericWriteFPUS], (instrs FABS_D32_MM, FABS_D64_MM, FABS_S_MM,
+ FNEG_D32_MM, FNEG_D64_MM, FNEG_S_MM,
+ FADD_D32_MM, FADD_D64_MM, FADD_S_MM,
+ FMOV_D32_MM, FMOV_D64_MM, FMOV_S_MM,
+ FMUL_D32_MM, FMUL_D64_MM, FMUL_S_MM,
+ FSUB_D32_MM, FSUB_D64_MM, FSUB_S_MM,
+ MSUB_S_MM, MSUB_D32_MM)>;
+
+def : InstRW<[GenericWriteFPUDivS], (instrs FDIV_S_MM)>;
+def : InstRW<[GenericWriteFPUDivD], (instrs FDIV_D32_MM, FDIV_D64_MM)>;
+
+def : InstRW<[GenericWriteFPUSqrtS], (instrs FSQRT_S_MM)>;
+def : InstRW<[GenericWriteFPUSqrtD], (instrs FSQRT_D32_MM, FSQRT_D64_MM)>;
+
+def : InstRW<[GenericWriteFPURcpS], (instrs RECIP_S_MM, RSQRT_S_MM)>;
+def : InstRW<[GenericWriteFPURcpD], (instrs RECIP_D32_MM, RECIP_D64_MM,
+ RSQRT_D32_MM, RSQRT_D64_MM)>;
+
+def : InstRW<[GenericWriteFPUStore], (instrs SDC1_MM, SWC1_MM, SUXC1_MM,
+ SWXC1_MM)>;
+
+def : InstRW<[GenericWriteFPUMoveGPRFPU], (instrs CFC1_MM, CTC1_MM)>;
+
+def : InstRW<[GenericWriteFPULoad], (instrs LDC1_MM, LUXC1_MM, LWC1_MM,
+ LWXC1_MM)>;
+
+// microMIPS32r6
+// =============
+
+def : InstRW<[GenericWriteFPUS], (instrs FNEG_S_MMR6)>;
+
+def : InstRW<[GenericWriteFPUCmp], (instregex "CMP_[A-Z][A-Z]_(S|D)_MMR6")>;
+def : InstRW<[GenericWriteFPUCmp],
+ (instregex "CMP_[A-Z][A-Z][A-Z]_(S|D)_MMR6")>;
+def : InstRW<[GenericWriteFPUCmp],
+ (instregex "CMP_[A-Z][A-Z][A-Z][A-Z]_(S|D)_MMR6")>;
+
+def : InstRW<[GenericWriteFPUL],
+ (instregex "CVT_(L|D|S|W)_(L|D|S|L|W)_MMR6")>;
-def : ItinRW<[GenericWriteALU], [II_SEQ_SNE, II_SEQI_SNEI, II_POP, II_BADDU,
- II_BBIT]>;
+def : InstRW<[GenericWriteFPUL],
+ (instregex "TRUNC_(L|W)_(D|S)_MMR6")>;
+
+def : InstRW<[GenericWriteFPUL],
+ (instregex "ROUND_(L|W)_(D|S)_MMR6")>;
+
+def : InstRW<[GenericWriteFPUL],
+ (instregex "FLOOR_(L|W)_(D|S)_MMR6")>;
+
+def : InstRW<[GenericWriteFPUL],
+ (instregex "CEIL_(L|W)_(S|D)_MMR6")>;
+
+def : InstRW<[GenericWriteFPUS],
+ (instrs MFC1_MMR6, MTC1_MMR6, CLASS_S_MMR6, CLASS_D_MMR6,
+ FADD_S_MMR6)>;
+
+def : InstRW<[GenericWriteFPUS], (instregex "M(IN|AX)_(S|D)_MMR6")>;
+
+def : InstRW<[GenericWriteFPUS], (instregex "M(IN|AX)A_(S|D)_MMR6")>;
+
+def : InstRW<[GenericWriteFPUS], (instregex "SEL(EQ|NE)Z_(S|D)_MMR6")>;
+
+def : InstRW<[GenericWriteFPUS], (instregex "SEL_(S|D)_MMR6")>;
+
+def : InstRW<[GenericWriteFPUL], (instrs RINT_S_MMR6, RINT_D_MMR6)>;
+
+def : InstRW<[GenericWriteFPUS], (instregex "M(ADD|SUB)F_(S|D)_MMR6")>;
+
+def : InstRW<[GenericWriteFPUS], (instrs FMOV_S_MMR6, FMUL_S_MMR6,
+ FSUB_S_MMR6, FMOV_D_MMR6)>;
+
+def : InstRW<[GenericWriteFPUL], (instrs FDIV_S_MMR6)>;
+
+def : InstRW<[GenericWriteFPUStore], (instrs SDC1_D64_MMR6)>;
+
+def : InstRW<[GenericWriteFPULoad], (instrs LDC1_D64_MMR6)>;
+
+// MIPS64
+// ======
+
+def : InstRW<[GenericWriteFPUMoveGPRFPU], (instrs DMFC1, DMTC1)>;
// MIPS DSP ASE, HasDSP
// ====================
+def : InstRW<[GenericWriteStore], (instrs SWDSP)>;
+
+def : InstRW<[GenericWriteLoad], (instrs LWDSP)>;
+
+def : InstRW<[GenericWriteMove], (instrs PseudoMTLOHI_DSP)>;
+
def GenericDSP : ProcResource<1> { let BufferSize = 1; }
def GenericDSPShort : SchedWriteRes<[GenericDSP]> { let Latency = 2; }
def GenericDSPLong : SchedWriteRes<[GenericDSP]> { let Latency = 6; }
@@ -634,6 +1146,11 @@ def : InstRW<[GenericDSPShort], (instregex "^SUBU_QB$")>;
def : InstRW<[GenericDSPShort], (instregex "^SUBU_S_QB$")>;
def : InstRW<[GenericDSPShort], (instregex "^WRDSP$")>;
+def : InstRW<[GenericDSPShort],
+ (instregex "^Pseudo(CMP|CMPU)_(EQ|LE|LT)_(PH|QB)$")>;
+def : InstRW<[GenericDSPShort],
+ (instregex "^PseudoPICK_(PH|QB)$")>;
+
// MIPS DSP R2 - hasDSP, HasDSPR2, InMicroMips
// ===========================================
@@ -687,6 +1204,10 @@ def : InstRW<[GenericDSPShort], (instregex "^SUBUH_R_QB$")>;
// microMIPS DSP R1 - HasDSP, InMicroMips
// ======================================
+def : InstRW<[GenericWriteLoad], (instrs LWDSP_MM)>;
+
+def : InstRW<[GenericWriteStore], (instrs SWDSP_MM)>;
+
def : InstRW<[GenericDSPShort], (instregex "^ABSQ_S_PH_MM$")>;
def : InstRW<[GenericDSPShort], (instregex "^ABSQ_S_W_MM$")>;
def : InstRW<[GenericDSPShort], (instregex "^ADDQ_PH_MM$")>;
@@ -740,7 +1261,6 @@ def : InstRW<[GenericDSPShort], (instregex "^MAQ_S_W_PHR_MM$")>;
def : InstRW<[GenericDSPShort], (instregex "^MFHI_DSP_MM$")>;
def : InstRW<[GenericDSPShort], (instregex "^MFLO_DSP_MM$")>;
def : InstRW<[GenericDSPShort], (instregex "^MODSUB_MM$")>;
-def : InstRW<[GenericDSPShort], (instregex "^MOVEP_MM$")>;
def : InstRW<[GenericDSPShort], (instregex "^MOVEP_MMR6$")>;
def : InstRW<[GenericDSPShort], (instregex "^MOVN_I_MM$")>;
def : InstRW<[GenericDSPShort], (instregex "^MOVZ_I_MM$")>;
@@ -902,12 +1422,14 @@ def : InstRW<[GenericWriteMSAShortInt], (instregex "^ADDVI?_[BHWD]$")>;
def : InstRW<[GenericWriteMSAShortInt], (instregex "^ASUB_[US].[BHWD]$")>;
def : InstRW<[GenericWriteMSAShortInt], (instregex "^AVER?_[US].[BHWD]$")>;
-// and.v, andi.b, move.v, ldi.[bhwd], xor.v, nor.v, xori.b, nori.b
+// and.v, andi.b, move.v, ldi.[bhwd], xor.v, nor.v, xori.b, nori.b, lsa
def : InstRW<[GenericWriteMSAShortLogic], (instregex "^MOVE_V$")>;
def : InstRW<[GenericWriteMSAShortLogic], (instregex "^LDI_[BHWD]$")>;
+def : InstRW<[GenericWriteMSAShortLogic], (instrs LSA)>;
def : InstRW<[GenericWriteMSAShortLogic], (instregex "^(AND|OR|[XN]OR)_V$")>;
def : InstRW<[GenericWriteMSAShortLogic], (instregex "^(AND|OR|[XN]OR)I_B$")>;
-def : InstRW<[GenericWriteMSAShortLogic], (instregex "^(AND|OR|[XN]OR)I_B$")>;
+def : InstRW<[GenericWriteMSAShortLogic],
+ (instregex "^(AND|OR|[XN]OR)_V_[DHW]_PSEUDO$")>;
// vshf.[bhwd], binsl.[bhwd], binsr.[bhwd], insert.[bhwd], sld?.[bhwd],
// bset.[bhwd], bclr.[bhwd], bneg.[bhwd], bsel_v, bseli_b
@@ -921,8 +1443,10 @@ def : InstRW<[GenericWriteMSAShortInt], (instregex "^(BCLR|BCLRI)_[BHWD]$")>;
def : InstRW<[GenericWriteMSAShortInt], (instregex "^(BNEG|BNEGI)_[BHWD]$")>;
def : InstRW<[GenericWriteMSAShortInt], (instregex "^(BSEL_V|BSELI_B)$")>;
def : InstRW<[GenericWriteMSAShortInt], (instregex "^BMN*Z.*$")>;
+def : InstRW<[GenericWriteMSAShortInt],
+ (instregex "^BSEL_(H|W|D|FW|FD)_PSEUDO$")>;
-// pcnt.[bhwd], sat_s.[bhwd], sat_u.bhwd]
+// pcnt.[bhwd], sat_s.[bhwd], sat_u.[bhwd]
def : InstRW<[GenericWriteMSAOther3], (instregex "^PCNT_[BHWD]$")>;
def : InstRW<[GenericWriteMSAOther3], (instregex "^SAT_(S|U)_[BHWD]$")>;
@@ -935,10 +1459,6 @@ def : InstRW<[GenericWriteMSAShortInt], (instregex "^SHF_[BHW]$")>;
def : InstRW<[GenericWriteMSAShortInt], (instregex "^FILL_[BHWD]$")>;
def : InstRW<[GenericWriteMSAShortInt], (instregex "^(SPLAT|SPLATI)_[BHWD]$")>;
-// pcnt.[bhwd], sat_s.[bhwd], sat_u.bhwd]
-def : InstRW<[GenericWriteMSAOther3], (instregex "^PCNT_[BHWD]$")>;
-def : InstRW<[GenericWriteMSAOther3], (instregex "^SAT_(S|U)_[BHWD]$")>;
-
// fexp2_w, fexp2_d
def : InstRW<[GenericWriteFPUS], (instregex "^FEXP2_(W|D)$")>;
@@ -953,6 +1473,15 @@ def : InstRW<[GenericWriteFPUS], (instregex "^CMP_LT_(S|D)$")>;
def : InstRW<[GenericWriteFPUS], (instregex "^CMP_ULT_(S|D)$")>;
def : InstRW<[GenericWriteFPUS], (instregex "^CMP_LE_(S|D)$")>;
def : InstRW<[GenericWriteFPUS], (instregex "^CMP_ULE_(S|D)$")>;
+def : InstRW<[GenericWriteFPUS], (instregex "^CMP_F_(D|S)$")>;
+def : InstRW<[GenericWriteFPUS], (instregex "^CMP_SAF_(D|S)$")>;
+def : InstRW<[GenericWriteFPUS], (instregex "^CMP_SEQ_(D|S)$")>;
+def : InstRW<[GenericWriteFPUS], (instregex "^CMP_SLE_(D|S)$")>;
+def : InstRW<[GenericWriteFPUS], (instregex "^CMP_SLT_(D|S)$")>;
+def : InstRW<[GenericWriteFPUS], (instregex "^CMP_SUEQ_(D|S)$")>;
+def : InstRW<[GenericWriteFPUS], (instregex "^CMP_SULE_(D|S)$")>;
+def : InstRW<[GenericWriteFPUS], (instregex "^CMP_SULT_(D|S)$")>;
+def : InstRW<[GenericWriteFPUS], (instregex "^CMP_SUN_(D|S)$")>;
def : InstRW<[GenericWriteFPUS], (instregex "^FS(AF|EQ|LT|LE|NE|OR)_(W|D)$")>;
def : InstRW<[GenericWriteFPUS], (instregex "^FSUEQ_(W|D)$")>;
def : InstRW<[GenericWriteFPUS], (instregex "^FSULE_(W|D)$")>;
@@ -995,7 +1524,6 @@ def : InstRW<[GenericWriteFPUS], (instregex "^FLOG2_(W|D)$")>;
// interleave right/left, interleave even/odd, insert
def : InstRW<[GenericWriteMSAShortLogic], (instregex "^(ILVR|ILVL)_[BHWD]$")>;
def : InstRW<[GenericWriteMSAShortLogic], (instregex "^(ILVEV|ILVOD)_[BHWD]$")>;
-def : InstRW<[GenericWriteMSAShortLogic], (instregex "^INSVE_[BHWD]$")>;
// subs_?.[bhwd], subsus_?.[bhwd], subsuu_?.[bhwd], subvi.[bhwd], subv.[bhwd],
def : InstRW<[GenericWriteMSAShortInt], (instregex "^SUBS_(S|U)_[BHWD]$")>;
@@ -1027,6 +1555,8 @@ def : InstRW<[GenericWriteMSAShortLogic], (instregex "^(SLL|SLLI)_[BHWD]$")>;
def : InstRW<[GenericWriteMSAShortLogic], (instregex "^(PCKEV|PCKOD)_[BHWD]$")>;
def : InstRW<[GenericWriteMSAShortLogic], (instregex "^(NLOC|NLZC)_[BHWD]$")>;
def : InstRW<[GenericWriteMSAShortLogic], (instregex "^INSVE_[BHWD]$")>;
+def : InstRW<[GenericWriteMSAShortLogic], (instregex "^INSERT_F(D|W)_PSEUDO$")>;
+def : InstRW<[GenericWriteMSAShortLogic], (instregex "^FILL_F(D|W)_PSEUDO$")>;
// dpadd_?.[bhwd], dpsub_?.[bhwd], dotp_?.[bhwd], msubv.[bhwd], maddv.[bhwd]
// mulv.[bhwd].
@@ -1062,5 +1592,23 @@ def : InstRW<[GenericWriteFPUMoveGPRFPU], (instregex "^COPY_U_[BHW]$")>;
def : InstRW<[GenericWriteFPUMoveGPRFPU], (instregex "^COPY_S_[BHWD]$")>;
def : InstRW<[GenericWriteFPUStore], (instregex "^ST_[BHWD]$")>;
+def : InstRW<[GenericWriteFPUStore], (instrs ST_F16)>;
def : InstRW<[GenericWriteFPULoad], (instregex "^LD_[BHWD]$")>;
+def : InstRW<[GenericWriteFPULoad], (instrs LD_F16)>;
+
+// Atomic instructions
+
+// FIXME: Define `WriteAtomic` in the MipsSchedule.td and
+// attach it to the Atomic2OpsPostRA, AtomicCmpSwapPostRA, ...
+// classes. Then just define resources for the `WriteAtomic` in each
+// machine models.
+def GenericAtomic : ProcResource<1> { let BufferSize = 1; }
+def GenericWriteAtomic : SchedWriteRes<[GenericAtomic]> { let Latency = 2; }
+
+def : InstRW<[GenericWriteAtomic],
+ (instregex "^ATOMIC_SWAP_I(8|16|32|64)_POSTRA$")>;
+def : InstRW<[GenericWriteAtomic],
+ (instregex "^ATOMIC_CMP_SWAP_I(8|16|32|64)_POSTRA$")>;
+def : InstRW<[GenericWriteAtomic],
+ (instregex "^ATOMIC_LOAD_(ADD|SUB|AND|OR|XOR|NAND)_I(8|16|32|64)_POSTRA$")>;
}
diff --git a/lib/Target/Mips/MipsScheduleP5600.td b/lib/Target/Mips/MipsScheduleP5600.td
index 846fa11494c7..f97b03bff08e 100644
--- a/lib/Target/Mips/MipsScheduleP5600.td
+++ b/lib/Target/Mips/MipsScheduleP5600.td
@@ -1,9 +1,8 @@
//==- MipsScheduleP5600.td - P5600 Scheduling Definitions --*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -13,12 +12,13 @@ def MipsP5600Model : SchedMachineModel {
int LoadLatency = 4;
int MispredictPenalty = 8; // TODO: Estimated
- let CompleteModel = 0;
+ let CompleteModel = 1;
let FullInstRWOverlapCheck = 1;
- list<Predicate> UnsupportedFeatures = [HasMips32r6, HasMips64r6,
- HasMips3, HasMips64r2, HasCnMips,
- InMicroMips, InMips16Mode,
+ list<Predicate> UnsupportedFeatures = [HasMips3, HasMips32r6, HasMips64,
+ HasMips64r2, HasMips64r5, HasMips64r6,
+ IsGP64bit, IsPTR64bit,
+ InMicroMips, InMips16Mode, HasCnMips,
HasDSP, HasDSPR2, HasMT, HasCRC];
}
@@ -59,15 +59,21 @@ def P5600WriteJumpAndLink : SchedWriteRes<[P5600IssueCTISTD, P5600CTISTD]> {
let Latency = 2;
}
+def P5600Nop : SchedWriteRes<[P5600IssueCTISTD]> {
+ let Latency = 0;
+}
+
+def : InstRW<[P5600Nop], (instrs SSNOP, NOP)>;
+
// b, beq, beql, bg[et]z, bl[et]z, bne, bnel, j, syscall, jal, bltzal,
// jalr, jr.hb, jr
def : InstRW<[P5600WriteJump], (instrs B, BAL, BAL_BR, BEQ, BEQL, BGEZ, BGEZAL,
BGEZALL, BGEZL, BGTZ, BGTZL, BLEZ, BLEZL, BLTZ,
BLTZAL, BLTZALL, BLTZL, BNE, BNEL, BREAK,
- DERET, ERET, ERETNC, J, JR, JR_HB,
+ DERET, ERET, ERet, ERETNC, J, JR, JR_HB,
PseudoIndirectBranch,
PseudoIndirectHazardBranch, PseudoReturn,
- SDBBP, SSNOP, SYSCALL, TAILCALL, TAILCALLREG,
+ SDBBP, SYSCALL, RetRA, TAILCALL, TAILCALLREG,
TAILCALLREGHB, TEQ, TEQI, TGE, TGEI, TGEIU,
TGEU, TLT, TLTI, TLTU, TNE, TNEI, TRAP,
TTLTIU, WAIT, PAUSE)>;
@@ -90,6 +96,11 @@ def : InstRW<[P5600COP2], (instrs MFC2, MTC2)> {
let Unsupported = 1;
}
+// MIPS Virtualization ASE
+// =======================
+def : InstRW<[P5600COP0], (instrs HYPCALL, MFGC0, MFHGC0, MTGC0, MTHGC0,
+ TLBGINV, TLBGINVF, TLBGP, TLBGR, TLBGWI, TLBGWR)>;
+
// LDST Pipeline
// -------------
@@ -288,6 +299,8 @@ def : InstRW<[P5600WriteMSAShortInt], (instregex "^(BCLR|BCLRI)_[BHWD]$")>;
def : InstRW<[P5600WriteMSAShortInt], (instregex "^(BNEG|BNEGI)_[BHWD]$")>;
def : InstRW<[P5600WriteMSAShortInt], (instregex "^(BSEL_V|BSELI_B)$")>;
def : InstRW<[P5600WriteMSAShortInt], (instregex "^BMN*Z.*$")>;
+def : InstRW<[P5600WriteMSAShortInt],
+ (instregex "^BSEL_(H|W|D|FW|FD)_PSEUDO$")>;
// pcnt.[bhwd], sat_s.[bhwd], sat_u.bhwd]
def : InstRW<[P5600WriteMSAOther3], (instregex "^PCNT_[BHWD]$")>;
@@ -335,6 +348,10 @@ def : InstRW<[P5600WriteMSAShortLogic], (instregex "^MOVE_V$")>;
def : InstRW<[P5600WriteMSAShortLogic], (instregex "^LDI_[BHWD]$")>;
def : InstRW<[P5600WriteMSAShortLogic], (instregex "^(AND|OR|[XN]OR)_V$")>;
def : InstRW<[P5600WriteMSAShortLogic], (instregex "^(AND|OR|[XN]OR)I_B$")>;
+def : InstRW<[P5600WriteMSAShortLogic],
+ (instregex "^(AND|OR|[XN]OR)_V_[DHW]_PSEUDO$")>;
+def : InstRW<[P5600WriteMSAShortLogic], (instregex "^FILL_F(D|W)_PSEUDO$")>;
+def : InstRW<[P5600WriteMSAShortLogic], (instregex "^INSERT_F(D|W)_PSEUDO$")>;
// fexp2_w, fexp2_d
def : InstRW<[P5600WriteFPUS], (instregex "^FEXP2_(W|D)$")>;
@@ -427,17 +444,19 @@ def : InstRW<[P5600WriteMSAShortLogic], (instregex "^(NLOC|NLZC)_[BHWD]$")>;
// ----------
//
// add.[ds], add.ps, cvt.d.[sw], cvt.s.[dw], cvt.w.[sd], cvt.[sw].ps,
-// cvt.ps.[sw], c.<cc>.[ds], c.<cc>.ps, mul.[ds], mul.ps, sub.[ds], sub.ps,
-// trunc.w.[ds], trunc.w.ps
+// cvt.ps.[sw], cvt.s.(pl|pu), c.<cc>.[ds], c.<cc>.ps, mul.[ds], mul.ps,
+// pl[lu].ps, sub.[ds], sub.ps, trunc.w.[ds], trunc.w.ps
def : InstRW<[P5600WriteFPUL],
(instrs FADD_D32, FADD_D64, FADD_S, FMUL_D32, FMUL_D64, FMUL_S,
FSUB_D32, FSUB_D64, FSUB_S)>;
def : InstRW<[P5600WriteFPUL], (instregex "^TRUNC_(L|W)_(S|D32|D64)$")>;
def : InstRW<[P5600WriteFPUL],
(instregex "^CVT_(S|D32|D64|L|W)_(S|D32|D64|L|W)$")>;
+def : InstRW<[P5600WriteFPUL], (instrs CVT_PS_S64, CVT_S_PL64, CVT_S_PU64)>;
def : InstRW<[P5600WriteFPUL], (instregex "^C_[A-Z]+_(S|D32|D64)$")>;
def : InstRW<[P5600WriteFPUL], (instregex "^FCMP_(S32|D32|D64)$")>;
def : InstRW<[P5600WriteFPUL], (instregex "^PseudoCVT_(S|D32|D64)_(L|W)$")>;
+def : InstRW<[P5600WriteFPUL], (instrs PLL_PS64, PLU_PS64)>;
// div.[ds], div.ps
def : InstRW<[P5600WriteFPUDivS], (instrs FDIV_S)>;
@@ -555,16 +574,20 @@ def : InstRW<[P5600WriteMoveFPUToGPR], (instrs BC1F, BC1FL, BC1T, BC1TL, CFC1,
ExtractElementF64_64)>;
// swc1, swxc1, st.[bhwd]
-def : InstRW<[P5600WriteStoreFPUS], (instrs SDC1, SDXC1, SUXC1, SWC1, SWXC1)>;
+def : InstRW<[P5600WriteStoreFPUS], (instrs SDC1, SDC164, SDXC1, SDXC164,
+ SWC1, SWXC1, SUXC1, SUXC164)>;
def : InstRW<[P5600WriteStoreFPUS], (instregex "^ST_[BHWD]$")>;
+def : InstRW<[P5600WriteStoreFPUS], (instrs ST_F16)>;
// movn.[ds], movz.[ds]
def : InstRW<[P5600WriteStoreFPUL], (instrs MOVN_I_D32, MOVN_I_D64, MOVN_I_S,
MOVZ_I_D32, MOVZ_I_D64, MOVZ_I_S)>;
// l[dw]x?c1, ld.[bhwd]
-def : InstRW<[P5600WriteLoadFPU], (instrs LDC1, LDXC1, LWC1, LWXC1, LUXC1)>;
+def : InstRW<[P5600WriteLoadFPU], (instrs LDC1, LDC164, LDXC1, LDXC164,
+ LWC1, LWXC1, LUXC1, LUXC164)>;
def : InstRW<[P5600WriteLoadFPU], (instregex "LD_[BHWD]")>;
+def : InstRW<[P5600WriteLoadFPU], (instrs LD_F16)>;
// Unsupported Instructions
// ========================
@@ -593,4 +616,20 @@ def : InstRW<[P5600WriteFPUL], (instregex "^ROUND_(L|W)_(S|D32|D64)$")>;
// Reason behind guess: rotr is in the same category and the two register forms
// generally follow the immediate forms in this category
def : InstRW<[P5600WriteEitherALU], (instrs ROTRV)>;
+
+// Atomic instructions
+
+// FIXME: Define `WriteAtomic` in the MipsSchedule.td and
+// attach it to the Atomic2OpsPostRA, AtomicCmpSwapPostRA, ...
+// classes. Then just define resources for the `WriteAtomic` in each
+// machine models.
+def P5600Atomic : ProcResource<1> { let BufferSize = 1; }
+def P5600WriteAtomic : SchedWriteRes<[P5600Atomic]> { let Latency = 2; }
+
+def : InstRW<[P5600WriteAtomic],
+ (instregex "^ATOMIC_SWAP_I(8|16|32|64)_POSTRA$")>;
+def : InstRW<[P5600WriteAtomic],
+ (instregex "^ATOMIC_CMP_SWAP_I(8|16|32|64)_POSTRA$")>;
+def : InstRW<[P5600WriteAtomic],
+ (instregex "^ATOMIC_LOAD_(ADD|SUB|AND|OR|XOR|NAND)_I(8|16|32|64)_POSTRA$")>;
}
diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp
index 0c39a45467c4..d021b3d021b1 100644
--- a/lib/Target/Mips/MipsSubtarget.cpp
+++ b/lib/Target/Mips/MipsSubtarget.cpp
@@ -1,9 +1,8 @@
//===-- MipsSubtarget.cpp - Mips Subtarget Information --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -73,7 +72,7 @@ MipsSubtarget::MipsSubtarget(const Triple &TT, StringRef CPU, StringRef FS,
unsigned StackAlignOverride)
: MipsGenSubtargetInfo(TT, CPU, FS), MipsArchVersion(MipsDefault),
IsLittle(little), IsSoftFloat(false), IsSingleFloat(false), IsFPXX(false),
- NoABICalls(false), IsFP64bit(false), UseOddSPReg(true),
+ NoABICalls(false), Abs2008(false), IsFP64bit(false), UseOddSPReg(true),
IsNaN2008bit(false), IsGP64bit(false), HasVFPU(false), HasCnMips(false),
HasMips3_32(false), HasMips3_32r2(false), HasMips4_32(false),
HasMips4_32r2(false), HasMips5_32r2(false), InMips16Mode(false),
@@ -109,6 +108,11 @@ MipsSubtarget::MipsSubtarget(const Triple &TT, StringRef CPU, StringRef FS,
"See -mattr=+fp64.",
false);
+ if (isFP64bit() && !hasMips64() && hasMips32() && !hasMips32r2())
+ report_fatal_error(
+ "FPU with 64-bit registers is not available on MIPS32 pre revision 2. "
+ "Use -mcpu=mips32r2 or greater.");
+
if (!isABI_O32() && !useOddSPReg())
report_fatal_error("-mattr=+nooddspreg requires the O32 ABI.", false);
@@ -129,11 +133,18 @@ MipsSubtarget::MipsSubtarget(const Triple &TT, StringRef CPU, StringRef FS,
report_fatal_error(
"indirect jumps with hazard barriers requires MIPS32R2 or later");
}
+ if (inAbs2008Mode() && hasMips32() && !hasMips32r2()) {
+ report_fatal_error("IEEE 754-2008 abs.fmt is not supported for the given "
+ "architecture.",
+ false);
+ }
+
if (hasMips32r6()) {
StringRef ISA = hasMips64r6() ? "MIPS64r6" : "MIPS32r6";
assert(isFP64bit());
assert(isNaN2008());
+ assert(inAbs2008Mode());
if (hasDSP())
report_fatal_error(ISA + " is not compatible with the DSP ASE", false);
}
diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h
index ad8f4848b870..aa1200579fc8 100644
--- a/lib/Target/Mips/MipsSubtarget.h
+++ b/lib/Target/Mips/MipsSubtarget.h
@@ -1,9 +1,8 @@
//===-- MipsSubtarget.h - Define Subtarget for the Mips ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -87,6 +86,9 @@ class MipsSubtarget : public MipsGenSubtargetInfo {
// NoABICalls - Disable SVR4-style position-independent code.
bool NoABICalls;
+ // Abs2008 - Use IEEE 754-2008 abs.fmt instruction.
+ bool Abs2008;
+
// IsFP64bit - The target processor has 64-bit floating point registers.
bool IsFP64bit;
@@ -273,6 +275,7 @@ public:
bool useOddSPReg() const { return UseOddSPReg; }
bool noOddSPReg() const { return !UseOddSPReg; }
bool isNaN2008() const { return IsNaN2008bit; }
+ bool inAbs2008Mode() const { return Abs2008; }
bool isGP64bit() const { return IsGP64bit; }
bool isGP32bit() const { return !IsGP64bit; }
unsigned getGPRSizeInBytes() const { return isGP64bit() ? 8 : 4; }
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index 8466298cf36f..c878abb042e4 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -1,9 +1,8 @@
//===-- MipsTargetMachine.cpp - Define TargetMachine for Mips -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -19,6 +18,7 @@
#include "MipsSEISelDAGToDAG.h"
#include "MipsSubtarget.h"
#include "MipsTargetObjectFile.h"
+#include "TargetInfo/MipsTargetInfo.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
@@ -205,8 +205,7 @@ MipsTargetMachine::getSubtargetImpl(const Function &F) const {
void MipsTargetMachine::resetSubtarget(MachineFunction *MF) {
LLVM_DEBUG(dbgs() << "resetSubtarget\n");
- Subtarget = const_cast<MipsSubtarget *>(getSubtargetImpl(MF->getFunction()));
- MF->setSubtarget(Subtarget);
+ Subtarget = &MF->getSubtarget<MipsSubtarget>();
}
namespace {
@@ -240,6 +239,8 @@ public:
bool addLegalizeMachineIR() override;
bool addRegBankSelect() override;
bool addGlobalInstructionSelect() override;
+
+ std::unique_ptr<CSEConfigBase> getCSEConfig() const override;
};
} // end anonymous namespace
@@ -248,6 +249,10 @@ TargetPassConfig *MipsTargetMachine::createPassConfig(PassManagerBase &PM) {
return new MipsPassConfig(*this, PM);
}
+std::unique_ptr<CSEConfigBase> MipsPassConfig::getCSEConfig() const {
+ return getStandardCSEConfigForOpt(TM->getOptLevel());
+}
+
void MipsPassConfig::addIRPasses() {
TargetPassConfig::addIRPasses();
addPass(createAtomicExpandPass());
diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h
index d9b73d151119..25300504a02d 100644
--- a/lib/Target/Mips/MipsTargetMachine.h
+++ b/lib/Target/Mips/MipsTargetMachine.h
@@ -1,9 +1,8 @@
//===- MipsTargetMachine.h - Define TargetMachine for Mips ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -30,7 +29,7 @@ class MipsTargetMachine : public LLVMTargetMachine {
std::unique_ptr<TargetLoweringObjectFile> TLOF;
// Selected ABI
MipsABIInfo ABI;
- MipsSubtarget *Subtarget;
+ const MipsSubtarget *Subtarget;
MipsSubtarget DefaultSubtarget;
MipsSubtarget NoMips16Subtarget;
MipsSubtarget Mips16Subtarget;
@@ -66,10 +65,6 @@ public:
bool isLittleEndian() const { return isLittle; }
const MipsABIInfo &getABI() const { return ABI; }
-
- bool isMachineVerifierClean() const override {
- return false;
- }
};
/// Mips32/64 big endian target machine.
diff --git a/lib/Target/Mips/MipsTargetObjectFile.cpp b/lib/Target/Mips/MipsTargetObjectFile.cpp
index f53ee0631b5e..0852b5a18c68 100644
--- a/lib/Target/Mips/MipsTargetObjectFile.cpp
+++ b/lib/Target/Mips/MipsTargetObjectFile.cpp
@@ -1,9 +1,8 @@
//===-- MipsTargetObjectFile.cpp - Mips Object Files ----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Mips/MipsTargetObjectFile.h b/lib/Target/Mips/MipsTargetObjectFile.h
index a37ec154ff79..bdf485f83260 100644
--- a/lib/Target/Mips/MipsTargetObjectFile.h
+++ b/lib/Target/Mips/MipsTargetObjectFile.h
@@ -1,9 +1,8 @@
//===-- llvm/Target/MipsTargetObjectFile.h - Mips Object Info ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Mips/MipsTargetStreamer.h b/lib/Target/Mips/MipsTargetStreamer.h
index a282366f6d40..1fa8ebadd643 100644
--- a/lib/Target/Mips/MipsTargetStreamer.h
+++ b/lib/Target/Mips/MipsTargetStreamer.h
@@ -1,9 +1,8 @@
//===-- MipsTargetStreamer.h - Mips Target Streamer ------------*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -92,6 +91,7 @@ public:
// PIC support
virtual void emitDirectiveCpLoad(unsigned RegNo);
+ virtual void emitDirectiveCpLocal(unsigned RegNo);
virtual bool emitDirectiveCpRestore(int Offset,
function_ref<unsigned()> GetATReg,
SMLoc IDLoc, const MCSubtargetInfo *STI);
@@ -200,6 +200,7 @@ protected:
bool FrameInfoSet;
int FrameOffset;
unsigned FrameReg;
+ unsigned GPReg;
unsigned ReturnReg;
private:
@@ -275,6 +276,7 @@ public:
// PIC support
void emitDirectiveCpLoad(unsigned RegNo) override;
+ void emitDirectiveCpLocal(unsigned RegNo) override;
/// Emit a .cprestore directive. If the offset is out of range then it will
/// be synthesized using the assembler temporary.
@@ -346,6 +348,7 @@ public:
// PIC support
void emitDirectiveCpLoad(unsigned RegNo) override;
+ void emitDirectiveCpLocal(unsigned RegNo) override;
bool emitDirectiveCpRestore(int Offset, function_ref<unsigned()> GetATReg,
SMLoc IDLoc, const MCSubtargetInfo *STI) override;
void emitDirectiveCpsetup(unsigned RegNo, int RegOrOffset,
diff --git a/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp b/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp
index 22be564b6502..0082ca34cdbd 100644
--- a/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp
+++ b/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp
@@ -1,14 +1,12 @@
//===-- MipsTargetInfo.cpp - Mips Target Implementation -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-#include "Mips.h"
-#include "llvm/IR/Module.h"
+#include "TargetInfo/MipsTargetInfo.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/lib/Target/Mips/TargetInfo/MipsTargetInfo.h b/lib/Target/Mips/TargetInfo/MipsTargetInfo.h
new file mode 100644
index 000000000000..d91a2719108d
--- /dev/null
+++ b/lib/Target/Mips/TargetInfo/MipsTargetInfo.h
@@ -0,0 +1,23 @@
+//===-- MipsTargetInfo.h - Mips Target Implementation -----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_MIPS_TARGETINFO_MIPSTARGETINFO_H
+#define LLVM_LIB_TARGET_MIPS_TARGETINFO_MIPSTARGETINFO_H
+
+namespace llvm {
+
+class Target;
+
+Target &getTheMipsTarget();
+Target &getTheMipselTarget();
+Target &getTheMips64Target();
+Target &getTheMips64elTarget();
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_MIPS_TARGETINFO_MIPSTARGETINFO_H
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h b/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
index 1cb92005979d..815b600fe93a 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
@@ -1,9 +1,8 @@
//===-- NVPTXBaseInfo.h - Top-level definitions for NVPTX -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
index b774fe169d71..b6eefe206268 100644
--- a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
@@ -1,9 +1,8 @@
//===-- NVPTXInstPrinter.cpp - PTX assembly instruction printing ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -11,7 +10,7 @@
//
//===----------------------------------------------------------------------===//
-#include "InstPrinter/NVPTXInstPrinter.h"
+#include "MCTargetDesc/NVPTXInstPrinter.h"
#include "MCTargetDesc/NVPTXBaseInfo.h"
#include "NVPTX.h"
#include "llvm/MC/MCExpr.h"
@@ -270,6 +269,20 @@ void NVPTXInstPrinter::printLdStCode(const MCInst *MI, int OpNum,
llvm_unreachable("Empty Modifier");
}
+void NVPTXInstPrinter::printMmaCode(const MCInst *MI, int OpNum, raw_ostream &O,
+ const char *Modifier) {
+ const MCOperand &MO = MI->getOperand(OpNum);
+ int Imm = (int)MO.getImm();
+ if (Modifier == nullptr || strcmp(Modifier, "version") == 0) {
+ O << Imm; // Just print out PTX version
+ } else if (strcmp(Modifier, "aligned") == 0) {
+ // PTX63 requires '.aligned' in the name of the instruction.
+ if (Imm >= 63)
+ O << ".aligned";
+ } else
+ llvm_unreachable("Unknown Modifier");
+}
+
void NVPTXInstPrinter::printMemOperand(const MCInst *MI, int OpNum,
raw_ostream &O, const char *Modifier) {
printOperand(MI, OpNum, O);
diff --git a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h b/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h
index f0f223aa057b..c38472925a29 100644
--- a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h
@@ -1,9 +1,8 @@
//= NVPTXInstPrinter.h - Convert NVPTX MCInst to assembly syntax --*- C++ -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -11,8 +10,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIB_TARGET_NVPTX_INSTPRINTER_NVPTXINSTPRINTER_H
-#define LLVM_LIB_TARGET_NVPTX_INSTPRINTER_NVPTXINSTPRINTER_H
+#ifndef LLVM_LIB_TARGET_NVPTX_MCTARGETDESC_NVPTXINSTPRINTER_H
+#define LLVM_LIB_TARGET_NVPTX_MCTARGETDESC_NVPTXINSTPRINTER_H
#include "llvm/MC/MCInstPrinter.h"
@@ -41,6 +40,8 @@ public:
const char *Modifier = nullptr);
void printLdStCode(const MCInst *MI, int OpNum,
raw_ostream &O, const char *Modifier = nullptr);
+ void printMmaCode(const MCInst *MI, int OpNum, raw_ostream &O,
+ const char *Modifier = nullptr);
void printMemOperand(const MCInst *MI, int OpNum,
raw_ostream &O, const char *Modifier = nullptr);
void printProtoIdent(const MCInst *MI, int OpNum,
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
index f6cbd23f01c4..556745825a15 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
@@ -1,9 +1,8 @@
//===-- NVPTXMCAsmInfo.cpp - NVPTX asm properties -------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -38,12 +37,11 @@ NVPTXMCAsmInfo::NVPTXMCAsmInfo(const Triple &TheTriple) {
HiddenDeclarationVisibilityAttr = HiddenVisibilityAttr = MCSA_Invalid;
ProtectedVisibilityAttr = MCSA_Invalid;
- // FIXME: remove comment once debug info is properly supported.
- Data8bitsDirective = "// .b8 ";
+ Data8bitsDirective = ".b8 ";
Data16bitsDirective = nullptr; // not supported
- Data32bitsDirective = "// .b32 ";
- Data64bitsDirective = "// .b64 ";
- ZeroDirective = "// .b8";
+ Data32bitsDirective = ".b32 ";
+ Data64bitsDirective = ".b64 ";
+ ZeroDirective = ".b8";
AsciiDirective = nullptr; // not supported
AscizDirective = nullptr; // not supported
SupportsQuotedNames = false;
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h
index 9fd7600cf67f..e888526da898 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h
@@ -1,9 +1,8 @@
//===-- NVPTXMCAsmInfo.h - NVPTX asm properties ----------------*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
index b1a77a17ec15..c8b85b2718a6 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
@@ -1,9 +1,8 @@
//===-- NVPTXMCTargetDesc.cpp - NVPTX Target Descriptions -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -11,10 +10,11 @@
//
//===----------------------------------------------------------------------===//
-#include "InstPrinter/NVPTXInstPrinter.h"
+#include "NVPTXInstPrinter.h"
#include "NVPTXMCAsmInfo.h"
#include "NVPTXMCTargetDesc.h"
#include "NVPTXTargetStreamer.h"
+#include "TargetInfo/NVPTXTargetInfo.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h
index 0c9ad977e7ec..e1691d2384e6 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h
@@ -1,9 +1,8 @@
//===-- NVPTXMCTargetDesc.h - NVPTX Target Descriptions ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -19,9 +18,6 @@
namespace llvm {
class Target;
-Target &getTheNVPTXTarget32();
-Target &getTheNVPTXTarget64();
-
} // End llvm namespace
// Defines symbolic names for PTX registers.
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp
index f7b4cf3a0f72..17f5ba7d900b 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp
@@ -1,9 +1,8 @@
//=====- NVPTXTargetStreamer.cpp - NVPTXTargetStreamer class ------------=====//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -31,6 +30,11 @@ void NVPTXTargetStreamer::outputDwarfFileDirectives() {
DwarfFiles.clear();
}
+void NVPTXTargetStreamer::closeLastSection() {
+ if (HasSections)
+ getStreamer().EmitRawText("\t}");
+}
+
void NVPTXTargetStreamer::emitDwarfFileDirective(StringRef Directive) {
DwarfFiles.emplace_back(Directive);
}
@@ -82,22 +86,27 @@ void NVPTXTargetStreamer::changeSection(const MCSection *CurSection,
raw_ostream &OS) {
assert(!SubSection && "SubSection is not null!");
const MCObjectFileInfo *FI = getStreamer().getContext().getObjectFileInfo();
- // FIXME: remove comment once debug info is properly supported.
// Emit closing brace for DWARF sections only.
if (isDwarfSection(FI, CurSection))
- OS << "//\t}\n";
+ OS << "\t}\n";
if (isDwarfSection(FI, Section)) {
// Emit DWARF .file directives in the outermost scope.
outputDwarfFileDirectives();
- OS << "//\t.section";
+ OS << "\t.section";
Section->PrintSwitchToSection(*getStreamer().getContext().getAsmInfo(),
FI->getTargetTriple(), OS, SubSection);
// DWARF sections are enclosed into braces - emit the open one.
- OS << "//\t{\n";
+ OS << "\t{\n";
+ HasSections = true;
}
}
void NVPTXTargetStreamer::emitRawBytes(StringRef Data) {
+ MCTargetStreamer::emitRawBytes(Data);
+ // TODO: enable this once the bug in the ptxas with the packed bytes is
+ // resolved. Currently, (it is confirmed by NVidia) it causes a crash in
+ // ptxas.
+#if 0
const MCAsmInfo *MAI = Streamer.getContext().getAsmInfo();
const char *Directive = MAI->getData8bitsDirective();
unsigned NumElements = Data.size();
@@ -121,5 +130,6 @@ void NVPTXTargetStreamer::emitRawBytes(StringRef Data) {
}
Streamer.EmitRawText(OS.str());
}
+#endif
}
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.h b/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.h
index f18e61cdca57..8185efadefdb 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.h
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.h
@@ -1,9 +1,8 @@
//=====-- NVPTXTargetStreamer.h - NVPTX Target Streamer ------*- C++ -*--=====//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -19,6 +18,7 @@ class MCSection;
class NVPTXTargetStreamer : public MCTargetStreamer {
private:
SmallVector<std::string, 4> DwarfFiles;
+ bool HasSections = false;
public:
NVPTXTargetStreamer(MCStreamer &S);
@@ -26,6 +26,8 @@ public:
/// Outputs the list of the DWARF '.file' directives to the streamer.
void outputDwarfFileDirectives();
+ /// Close last section.
+ void closeLastSection();
/// Record DWARF file directives for later output.
/// According to PTX ISA, CUDA Toolkit documentation, 11.5.3. Debugging
diff --git a/lib/Target/NVPTX/ManagedStringPool.h b/lib/Target/NVPTX/ManagedStringPool.h
index 7fc0156216f5..bbcbb4598040 100644
--- a/lib/Target/NVPTX/ManagedStringPool.h
+++ b/lib/Target/NVPTX/ManagedStringPool.h
@@ -1,9 +1,8 @@
//===-- ManagedStringPool.h - Managed String Pool ---------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/NVPTX/NVPTX.h b/lib/Target/NVPTX/NVPTX.h
index 07bfc58a8da7..6530c40ea100 100644
--- a/lib/Target/NVPTX/NVPTX.h
+++ b/lib/Target/NVPTX/NVPTX.h
@@ -1,9 +1,8 @@
//===-- NVPTX.h - Top-level interface for NVPTX representation --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -15,14 +14,8 @@
#ifndef LLVM_LIB_TARGET_NVPTX_NVPTX_H
#define LLVM_LIB_TARGET_NVPTX_NVPTX_H
-#include "MCTargetDesc/NVPTXBaseInfo.h"
-#include "llvm/ADT/StringMap.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/Value.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Target/TargetMachine.h"
-#include <cassert>
-#include <iosfwd>
+#include "llvm/Pass.h"
+#include "llvm/Support/CodeGen.h"
namespace llvm {
class NVPTXTargetMachine;
@@ -55,9 +48,6 @@ BasicBlockPass *createNVPTXLowerAllocaPass();
MachineFunctionPass *createNVPTXPeephole();
MachineFunctionPass *createNVPTXProxyRegErasurePass();
-Target &getTheNVPTXTarget32();
-Target &getTheNVPTXTarget64();
-
namespace NVPTX {
enum DrvInterface {
NVCL,
diff --git a/lib/Target/NVPTX/NVPTX.td b/lib/Target/NVPTX/NVPTX.td
index 3731b2f37f6c..1d947ef1ce62 100644
--- a/lib/Target/NVPTX/NVPTX.td
+++ b/lib/Target/NVPTX/NVPTX.td
@@ -1,9 +1,8 @@
//===- NVPTX.td - Describe the NVPTX Target Machine -----------*- tblgen -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// This is the top level entry point for the NVPTX target.
@@ -76,6 +75,8 @@ def PTX61 : SubtargetFeature<"ptx61", "PTXVersion", "61",
"Use PTX version 6.1">;
def PTX63 : SubtargetFeature<"ptx63", "PTXVersion", "63",
"Use PTX version 6.3">;
+def PTX64 : SubtargetFeature<"ptx64", "PTXVersion", "64",
+ "Use PTX version 6.4">;
//===----------------------------------------------------------------------===//
// NVPTX supported processors.
diff --git a/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp b/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp
index bf922eb8a195..f2c7751df1df 100644
--- a/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp
+++ b/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp
@@ -1,9 +1,8 @@
//===-- AllocaHoisting.cpp - Hoist allocas to the entry block --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/NVPTX/NVPTXAllocaHoisting.h b/lib/Target/NVPTX/NVPTXAllocaHoisting.h
index 7a6fc7d9b14d..d7de8e3a2f46 100644
--- a/lib/Target/NVPTX/NVPTXAllocaHoisting.h
+++ b/lib/Target/NVPTX/NVPTXAllocaHoisting.h
@@ -1,9 +1,8 @@
//===-- AllocaHoisting.h - Hosist allocas to the entry block ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 6284ad8b82e8..5f38b4a3c4c5 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -1,9 +1,8 @@
//===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -13,8 +12,8 @@
//===----------------------------------------------------------------------===//
#include "NVPTXAsmPrinter.h"
-#include "InstPrinter/NVPTXInstPrinter.h"
#include "MCTargetDesc/NVPTXBaseInfo.h"
+#include "MCTargetDesc/NVPTXInstPrinter.h"
#include "MCTargetDesc/NVPTXMCAsmInfo.h"
#include "MCTargetDesc/NVPTXTargetStreamer.h"
#include "NVPTX.h"
@@ -24,6 +23,7 @@
#include "NVPTXSubtarget.h"
#include "NVPTXTargetMachine.h"
#include "NVPTXUtilities.h"
+#include "TargetInfo/NVPTXTargetInfo.h"
#include "cl_common_defines.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
@@ -473,6 +473,9 @@ void NVPTXAsmPrinter::EmitFunctionEntryLabel() {
// Emit open brace for function body.
OutStreamer->EmitRawText(StringRef("{\n"));
setAndEmitFunctionVirtualRegisters(*MF);
+ // Emit initial .loc debug directive for correct relocation symbol data.
+ if (MMI && MMI->hasDebugInfo())
+ emitInitialRawDwarfLocDirective(*MF);
}
bool NVPTXAsmPrinter::runOnMachineFunction(MachineFunction &F) {
@@ -597,36 +600,6 @@ void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr,
O << getVirtualRegisterName(vr);
}
-void NVPTXAsmPrinter::printVecModifiedImmediate(
- const MachineOperand &MO, const char *Modifier, raw_ostream &O) {
- static const char vecelem[] = { '0', '1', '2', '3', '0', '1', '2', '3' };
- int Imm = (int) MO.getImm();
- if (0 == strcmp(Modifier, "vecelem"))
- O << "_" << vecelem[Imm];
- else if (0 == strcmp(Modifier, "vecv4comm1")) {
- if ((Imm < 0) || (Imm > 3))
- O << "//";
- } else if (0 == strcmp(Modifier, "vecv4comm2")) {
- if ((Imm < 4) || (Imm > 7))
- O << "//";
- } else if (0 == strcmp(Modifier, "vecv4pos")) {
- if (Imm < 0)
- Imm = 0;
- O << "_" << vecelem[Imm % 4];
- } else if (0 == strcmp(Modifier, "vecv2comm1")) {
- if ((Imm < 0) || (Imm > 1))
- O << "//";
- } else if (0 == strcmp(Modifier, "vecv2comm2")) {
- if ((Imm < 2) || (Imm > 3))
- O << "//";
- } else if (0 == strcmp(Modifier, "vecv2pos")) {
- if (Imm < 0)
- Imm = 0;
- O << "_" << vecelem[Imm % 2];
- } else
- llvm_unreachable("Unknown Modifier on immediate operand");
-}
-
void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) {
emitLinkageDirective(F, O);
if (isKernelFunction(*F))
@@ -899,9 +872,8 @@ void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O,
if (HasFullDebugInfo)
break;
}
- // FIXME: remove comment once debug info is properly supported.
if (MMI && MMI->hasDebugInfo() && HasFullDebugInfo)
- O << "//, debug";
+ O << ", debug";
O << "\n";
@@ -952,10 +924,13 @@ bool NVPTXAsmPrinter::doFinalization(Module &M) {
clearAnnotationCache(&M);
delete[] gv_array;
- // FIXME: remove comment once debug info is properly supported.
// Close the last emitted section
- if (HasDebugInfo)
- OutStreamer->EmitRawText("//\t}");
+ if (HasDebugInfo) {
+ static_cast<NVPTXTargetStreamer *>(OutStreamer->getTargetStreamer())
+ ->closeLastSection();
+ // Emit empty .debug_loc section for better support of the empty files.
+ OutStreamer->EmitRawText("\t.section\t.debug_loc\t{\t}");
+ }
// Output last DWARF .file directives, if any.
static_cast<NVPTXTargetStreamer *>(OutStreamer->getTargetStreamer())
@@ -2199,7 +2174,6 @@ void NVPTXAsmPrinter::printMCExpr(const MCExpr &Expr, raw_ostream &OS) {
/// PrintAsmOperand - Print out an operand for an inline asm expression.
///
bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
- unsigned AsmVariant,
const char *ExtraCode, raw_ostream &O) {
if (ExtraCode && ExtraCode[0]) {
if (ExtraCode[1] != 0)
@@ -2208,7 +2182,7 @@ bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
switch (ExtraCode[0]) {
default:
// See if this is a generic print operand
- return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
+ return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O);
case 'r':
break;
}
@@ -2219,9 +2193,10 @@ bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
return false;
}
-bool NVPTXAsmPrinter::PrintAsmMemoryOperand(
- const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant,
- const char *ExtraCode, raw_ostream &O) {
+bool NVPTXAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+ unsigned OpNo,
+ const char *ExtraCode,
+ raw_ostream &O) {
if (ExtraCode && ExtraCode[0])
return true; // Unknown modifier
@@ -2233,7 +2208,7 @@ bool NVPTXAsmPrinter::PrintAsmMemoryOperand(
}
void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
- raw_ostream &O, const char *Modifier) {
+ raw_ostream &O) {
const MachineOperand &MO = MI->getOperand(opNum);
switch (MO.getType()) {
case MachineOperand::MO_Register:
@@ -2245,29 +2220,23 @@ void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
} else {
emitVirtualRegister(MO.getReg(), O);
}
- return;
+ break;
case MachineOperand::MO_Immediate:
- if (!Modifier)
- O << MO.getImm();
- else if (strstr(Modifier, "vec") == Modifier)
- printVecModifiedImmediate(MO, Modifier, O);
- else
- llvm_unreachable(
- "Don't know how to handle modifier on immediate operand");
- return;
+ O << MO.getImm();
+ break;
case MachineOperand::MO_FPImmediate:
printFPConstant(MO.getFPImm(), O);
break;
case MachineOperand::MO_GlobalAddress:
- getSymbol(MO.getGlobal())->print(O, MAI);
+ PrintSymbolOperand(MO, O);
break;
case MachineOperand::MO_MachineBasicBlock:
MO.getMBB()->getSymbol()->print(O, MAI);
- return;
+ break;
default:
llvm_unreachable("Operand type not supported.");
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.h b/lib/Target/NVPTX/NVPTXAsmPrinter.h
index 44a09f5fe513..43ae57ac1262 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.h
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.h
@@ -1,9 +1,8 @@
//===-- NVPTXAsmPrinter.h - NVPTX LLVM assembly writer ----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -213,8 +212,6 @@ private:
MCOperand GetSymbolRef(const MCSymbol *Symbol);
unsigned encodeVirtualRegister(unsigned Reg);
- void printVecModifiedImmediate(const MachineOperand &MO, const char *Modifier,
- raw_ostream &O);
void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
const char *Modifier = nullptr);
void printModuleLevelGV(const GlobalVariable *GVar, raw_ostream &O,
@@ -231,13 +228,10 @@ private:
void printReturnValStr(const Function *, raw_ostream &O);
void printReturnValStr(const MachineFunction &MF, raw_ostream &O);
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
- unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &) override;
- void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
- const char *Modifier = nullptr);
+ const char *ExtraCode, raw_ostream &) override;
+ void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
- unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &) override;
+ const char *ExtraCode, raw_ostream &) override;
const MCExpr *lowerConstantForGV(const Constant *CV, bool ProcessingGeneric);
void printMCExpr(const MCExpr &Expr, raw_ostream &OS);
diff --git a/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp b/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp
index 41e9ae827180..a8a43cee9ab7 100644
--- a/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp
+++ b/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp
@@ -1,9 +1,8 @@
//===-- NVPTXAssignValidGlobalNames.cpp - Assign valid names to globals ---===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.cpp b/lib/Target/NVPTX/NVPTXFrameLowering.cpp
index e5e6637967b2..46f08b23d31a 100644
--- a/lib/Target/NVPTX/NVPTXFrameLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXFrameLowering.cpp
@@ -1,9 +1,8 @@
//=======- NVPTXFrameLowering.cpp - NVPTX Frame Information ---*- C++ -*-=====//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.h b/lib/Target/NVPTX/NVPTXFrameLowering.h
index 0a7856b9d5de..40269f58f06e 100644
--- a/lib/Target/NVPTX/NVPTXFrameLowering.h
+++ b/lib/Target/NVPTX/NVPTXFrameLowering.h
@@ -1,9 +1,8 @@
//===--- NVPTXFrameLowering.h - Define frame lowering for NVPTX -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
index fd63fdbaced6..b36d9b2e240a 100644
--- a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
+++ b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
@@ -1,9 +1,8 @@
//===-- GenericToNVVM.cpp - Convert generic module to NVVM module - C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index ffc6a59cd6c8..3d2447d75c77 100644
--- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -1,9 +1,8 @@
//===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -13,6 +12,7 @@
#include "NVPTXISelDAGToDAG.h"
#include "NVPTXUtilities.h"
+#include "MCTargetDesc/NVPTXBaseInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Instructions.h"
@@ -702,11 +702,11 @@ static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget,
// We use GetUnderlyingObjects() here instead of GetUnderlyingObject() mainly
// because the former looks through phi nodes while the latter does not. We
// need to look through phi nodes to handle pointer induction variables.
- SmallVector<Value *, 8> Objs;
- GetUnderlyingObjects(const_cast<Value *>(N->getMemOperand()->getValue()),
+ SmallVector<const Value *, 8> Objs;
+ GetUnderlyingObjects(N->getMemOperand()->getValue(),
Objs, F->getDataLayout());
- return all_of(Objs, [&](Value *V) {
+ return all_of(Objs, [&](const Value *V) {
if (auto *A = dyn_cast<const Argument>(V))
return IsKernelFn && A->onlyReadsMemory() && A->hasNoAliasAttr();
if (auto *GV = dyn_cast<const GlobalVariable>(V))
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index e911ba0c167d..e4e5069b7a80 100644
--- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -1,9 +1,8 @@
//===-- NVPTXISelDAGToDAG.h - A dag to dag inst selector for NVPTX --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -18,6 +17,7 @@
#include "NVPTXISelLowering.h"
#include "NVPTXRegisterInfo.h"
#include "NVPTXTargetMachine.h"
+#include "MCTargetDesc/NVPTXBaseInfo.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/Support/Compiler.h"
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp
index bec8ece29050..ae1aa98da0e8 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -1,9 +1,8 @@
//===-- NVPTXISelLowering.cpp - NVPTX DAG Lowering Implementation ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -547,13 +546,19 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
// These map to conversion instructions for scalar FP types.
for (const auto &Op : {ISD::FCEIL, ISD::FFLOOR, ISD::FNEARBYINT, ISD::FRINT,
- ISD::FROUND, ISD::FTRUNC}) {
+ ISD::FTRUNC}) {
setOperationAction(Op, MVT::f16, Legal);
setOperationAction(Op, MVT::f32, Legal);
setOperationAction(Op, MVT::f64, Legal);
setOperationAction(Op, MVT::v2f16, Expand);
}
+ setOperationAction(ISD::FROUND, MVT::f16, Promote);
+ setOperationAction(ISD::FROUND, MVT::v2f16, Expand);
+ setOperationAction(ISD::FROUND, MVT::f32, Custom);
+ setOperationAction(ISD::FROUND, MVT::f64, Custom);
+
+
// 'Expand' implements FCOPYSIGN without calling an external library.
setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::v2f16, Expand);
@@ -1503,7 +1508,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
for (unsigned j = 0, je = VTs.size(); j != je; ++j) {
// New store.
if (VectorInfo[j] & PVF_FIRST) {
- assert(StoreOperands.empty() && "Unfinished preceeding store.");
+ assert(StoreOperands.empty() && "Unfinished preceding store.");
StoreOperands.push_back(Chain);
StoreOperands.push_back(DAG.getConstant(paramCount, dl, MVT::i32));
StoreOperands.push_back(DAG.getConstant(Offsets[j], dl, MVT::i32));
@@ -2069,6 +2074,100 @@ SDValue NVPTXTargetLowering::LowerShiftLeftParts(SDValue Op,
}
}
+SDValue NVPTXTargetLowering::LowerFROUND(SDValue Op, SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+
+ if (VT == MVT::f32)
+ return LowerFROUND32(Op, DAG);
+
+ if (VT == MVT::f64)
+ return LowerFROUND64(Op, DAG);
+
+ llvm_unreachable("unhandled type");
+}
+
+// This is the the rounding method used in CUDA libdevice in C like code:
+// float roundf(float A)
+// {
+// float RoundedA = (float) (int) ( A > 0 ? (A + 0.5f) : (A - 0.5f));
+// RoundedA = abs(A) > 0x1.0p23 ? A : RoundedA;
+// return abs(A) < 0.5 ? (float)(int)A : RoundedA;
+// }
+SDValue NVPTXTargetLowering::LowerFROUND32(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc SL(Op);
+ SDValue A = Op.getOperand(0);
+ EVT VT = Op.getValueType();
+
+ SDValue AbsA = DAG.getNode(ISD::FABS, SL, VT, A);
+
+ // RoundedA = (float) (int) ( A > 0 ? (A + 0.5f) : (A - 0.5f))
+ SDValue Bitcast = DAG.getNode(ISD::BITCAST, SL, MVT::i32, A);
+ const int SignBitMask = 0x80000000;
+ SDValue Sign = DAG.getNode(ISD::AND, SL, MVT::i32, Bitcast,
+ DAG.getConstant(SignBitMask, SL, MVT::i32));
+ const int PointFiveInBits = 0x3F000000;
+ SDValue PointFiveWithSignRaw =
+ DAG.getNode(ISD::OR, SL, MVT::i32, Sign,
+ DAG.getConstant(PointFiveInBits, SL, MVT::i32));
+ SDValue PointFiveWithSign =
+ DAG.getNode(ISD::BITCAST, SL, VT, PointFiveWithSignRaw);
+ SDValue AdjustedA = DAG.getNode(ISD::FADD, SL, VT, A, PointFiveWithSign);
+ SDValue RoundedA = DAG.getNode(ISD::FTRUNC, SL, VT, AdjustedA);
+
+ // RoundedA = abs(A) > 0x1.0p23 ? A : RoundedA;
+ EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ SDValue IsLarge =
+ DAG.getSetCC(SL, SetCCVT, AbsA, DAG.getConstantFP(pow(2.0, 23.0), SL, VT),
+ ISD::SETOGT);
+ RoundedA = DAG.getNode(ISD::SELECT, SL, VT, IsLarge, A, RoundedA);
+
+ // return abs(A) < 0.5 ? (float)(int)A : RoundedA;
+ SDValue IsSmall =DAG.getSetCC(SL, SetCCVT, AbsA,
+ DAG.getConstantFP(0.5, SL, VT), ISD::SETOLT);
+ SDValue RoundedAForSmallA = DAG.getNode(ISD::FTRUNC, SL, VT, A);
+ return DAG.getNode(ISD::SELECT, SL, VT, IsSmall, RoundedAForSmallA, RoundedA);
+}
+
+// The implementation of round(double) is similar to that of round(float) in
+// that they both separate the value range into three regions and use a method
+// specific to the region to round the values. However, round(double) first
+// calculates the round of the absolute value and then adds the sign back while
+// round(float) directly rounds the value with sign.
+SDValue NVPTXTargetLowering::LowerFROUND64(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc SL(Op);
+ SDValue A = Op.getOperand(0);
+ EVT VT = Op.getValueType();
+
+ SDValue AbsA = DAG.getNode(ISD::FABS, SL, VT, A);
+
+ // double RoundedA = (double) (int) (abs(A) + 0.5f);
+ SDValue AdjustedA = DAG.getNode(ISD::FADD, SL, VT, AbsA,
+ DAG.getConstantFP(0.5, SL, VT));
+ SDValue RoundedA = DAG.getNode(ISD::FTRUNC, SL, VT, AdjustedA);
+
+ // RoundedA = abs(A) < 0.5 ? (double)0 : RoundedA;
+ EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ SDValue IsSmall =DAG.getSetCC(SL, SetCCVT, AbsA,
+ DAG.getConstantFP(0.5, SL, VT), ISD::SETOLT);
+ RoundedA = DAG.getNode(ISD::SELECT, SL, VT, IsSmall,
+ DAG.getConstantFP(0, SL, VT),
+ RoundedA);
+
+ // Add sign to rounded_A
+ RoundedA = DAG.getNode(ISD::FCOPYSIGN, SL, VT, RoundedA, A);
+ DAG.getNode(ISD::FTRUNC, SL, VT, A);
+
+ // RoundedA = abs(A) > 0x1.0p52 ? A : RoundedA;
+ SDValue IsLarge =
+ DAG.getSetCC(SL, SetCCVT, AbsA, DAG.getConstantFP(pow(2.0, 52.0), SL, VT),
+ ISD::SETOGT);
+ return DAG.getNode(ISD::SELECT, SL, VT, IsLarge, A, RoundedA);
+}
+
+
+
SDValue
NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
@@ -2099,6 +2198,8 @@ NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
return LowerShiftRightParts(Op, DAG);
case ISD::SELECT:
return LowerSelect(Op, DAG);
+ case ISD::FROUND:
+ return LowerFROUND(Op, DAG);
default:
llvm_unreachable("Custom lowering not defined for operation");
}
@@ -2130,7 +2231,7 @@ SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
LoadSDNode *Load = cast<LoadSDNode>(Op);
EVT MemVT = Load->getMemoryVT();
if (!allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
- Load->getAddressSpace(), Load->getAlignment())) {
+ *Load->getMemOperand())) {
SDValue Ops[2];
std::tie(Ops[0], Ops[1]) = expandUnalignedLoad(Load, DAG);
return DAG.getMergeValues(Ops, SDLoc(Op));
@@ -2173,7 +2274,7 @@ SDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
// stores and have to handle it here.
if (VT == MVT::v2f16 &&
!allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
- Store->getAddressSpace(), Store->getAlignment()))
+ *Store->getMemOperand()))
return expandUnalignedStore(Store, DAG);
if (VT.isVector())
@@ -3399,6 +3500,94 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
Info.align = 16;
return true;
}
+ case Intrinsic::nvvm_wmma_m16n16k16_load_a_s8_col:
+ case Intrinsic::nvvm_wmma_m16n16k16_load_a_s8_col_stride:
+ case Intrinsic::nvvm_wmma_m16n16k16_load_a_u8_col_stride:
+ case Intrinsic::nvvm_wmma_m16n16k16_load_a_u8_col:
+ case Intrinsic::nvvm_wmma_m16n16k16_load_a_s8_row:
+ case Intrinsic::nvvm_wmma_m16n16k16_load_a_s8_row_stride:
+ case Intrinsic::nvvm_wmma_m16n16k16_load_a_u8_row_stride:
+ case Intrinsic::nvvm_wmma_m16n16k16_load_a_u8_row:
+ case Intrinsic::nvvm_wmma_m16n16k16_load_b_s8_col:
+ case Intrinsic::nvvm_wmma_m16n16k16_load_b_s8_col_stride:
+ case Intrinsic::nvvm_wmma_m16n16k16_load_b_u8_col_stride:
+ case Intrinsic::nvvm_wmma_m16n16k16_load_b_u8_col:
+ case Intrinsic::nvvm_wmma_m16n16k16_load_b_s8_row:
+ case Intrinsic::nvvm_wmma_m16n16k16_load_b_s8_row_stride:
+ case Intrinsic::nvvm_wmma_m16n16k16_load_b_u8_row_stride:
+ case Intrinsic::nvvm_wmma_m16n16k16_load_b_u8_row: {
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = MVT::v2i32;
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = 0;
+ Info.flags = MachineMemOperand::MOLoad;
+ Info.align = 8;
+ return true;
+ }
+
+ case Intrinsic::nvvm_wmma_m32n8k16_load_a_s8_col:
+ case Intrinsic::nvvm_wmma_m32n8k16_load_a_s8_col_stride:
+ case Intrinsic::nvvm_wmma_m32n8k16_load_a_u8_col_stride:
+ case Intrinsic::nvvm_wmma_m32n8k16_load_a_u8_col:
+ case Intrinsic::nvvm_wmma_m32n8k16_load_a_s8_row:
+ case Intrinsic::nvvm_wmma_m32n8k16_load_a_s8_row_stride:
+ case Intrinsic::nvvm_wmma_m32n8k16_load_a_u8_row_stride:
+ case Intrinsic::nvvm_wmma_m32n8k16_load_a_u8_row:
+
+ case Intrinsic::nvvm_wmma_m8n32k16_load_b_s8_col:
+ case Intrinsic::nvvm_wmma_m8n32k16_load_b_s8_col_stride:
+ case Intrinsic::nvvm_wmma_m8n32k16_load_b_u8_col_stride:
+ case Intrinsic::nvvm_wmma_m8n32k16_load_b_u8_col:
+ case Intrinsic::nvvm_wmma_m8n32k16_load_b_s8_row:
+ case Intrinsic::nvvm_wmma_m8n32k16_load_b_s8_row_stride:
+ case Intrinsic::nvvm_wmma_m8n32k16_load_b_u8_row_stride:
+ case Intrinsic::nvvm_wmma_m8n32k16_load_b_u8_row: {
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = MVT::v4i32;
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = 0;
+ Info.flags = MachineMemOperand::MOLoad;
+ Info.align = 16;
+ return true;
+ }
+
+ case Intrinsic::nvvm_wmma_m32n8k16_load_b_s8_col:
+ case Intrinsic::nvvm_wmma_m32n8k16_load_b_s8_col_stride:
+ case Intrinsic::nvvm_wmma_m32n8k16_load_b_u8_col_stride:
+ case Intrinsic::nvvm_wmma_m32n8k16_load_b_u8_col:
+ case Intrinsic::nvvm_wmma_m32n8k16_load_b_s8_row:
+ case Intrinsic::nvvm_wmma_m32n8k16_load_b_s8_row_stride:
+ case Intrinsic::nvvm_wmma_m32n8k16_load_b_u8_row_stride:
+ case Intrinsic::nvvm_wmma_m32n8k16_load_b_u8_row:
+
+ case Intrinsic::nvvm_wmma_m8n32k16_load_a_s8_col:
+ case Intrinsic::nvvm_wmma_m8n32k16_load_a_s8_col_stride:
+ case Intrinsic::nvvm_wmma_m8n32k16_load_a_u8_col_stride:
+ case Intrinsic::nvvm_wmma_m8n32k16_load_a_u8_col:
+ case Intrinsic::nvvm_wmma_m8n32k16_load_a_s8_row:
+ case Intrinsic::nvvm_wmma_m8n32k16_load_a_s8_row_stride:
+ case Intrinsic::nvvm_wmma_m8n32k16_load_a_u8_row_stride:
+ case Intrinsic::nvvm_wmma_m8n32k16_load_a_u8_row:
+ case Intrinsic::nvvm_wmma_m8n8k128_load_a_b1_row:
+ case Intrinsic::nvvm_wmma_m8n8k128_load_a_b1_row_stride:
+ case Intrinsic::nvvm_wmma_m8n8k128_load_b_b1_col:
+ case Intrinsic::nvvm_wmma_m8n8k128_load_b_b1_col_stride:
+ case Intrinsic::nvvm_wmma_m8n8k32_load_a_s4_row:
+ case Intrinsic::nvvm_wmma_m8n8k32_load_a_s4_row_stride:
+ case Intrinsic::nvvm_wmma_m8n8k32_load_a_u4_row_stride:
+ case Intrinsic::nvvm_wmma_m8n8k32_load_a_u4_row:
+ case Intrinsic::nvvm_wmma_m8n8k32_load_b_s4_col:
+ case Intrinsic::nvvm_wmma_m8n8k32_load_b_s4_col_stride:
+ case Intrinsic::nvvm_wmma_m8n8k32_load_b_u4_col_stride:
+ case Intrinsic::nvvm_wmma_m8n8k32_load_b_u4_col: {
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = MVT::i32;
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = 0;
+ Info.flags = MachineMemOperand::MOLoad;
+ Info.align = 4;
+ return true;
+ }
case Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_col:
case Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_row:
@@ -3442,6 +3631,44 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
return true;
}
+ case Intrinsic::nvvm_wmma_m16n16k16_load_c_s32_col:
+ case Intrinsic::nvvm_wmma_m16n16k16_load_c_s32_col_stride:
+ case Intrinsic::nvvm_wmma_m16n16k16_load_c_s32_row:
+ case Intrinsic::nvvm_wmma_m16n16k16_load_c_s32_row_stride:
+ case Intrinsic::nvvm_wmma_m32n8k16_load_c_s32_col:
+ case Intrinsic::nvvm_wmma_m32n8k16_load_c_s32_col_stride:
+ case Intrinsic::nvvm_wmma_m32n8k16_load_c_s32_row:
+ case Intrinsic::nvvm_wmma_m32n8k16_load_c_s32_row_stride:
+ case Intrinsic::nvvm_wmma_m8n32k16_load_c_s32_col:
+ case Intrinsic::nvvm_wmma_m8n32k16_load_c_s32_col_stride:
+ case Intrinsic::nvvm_wmma_m8n32k16_load_c_s32_row:
+ case Intrinsic::nvvm_wmma_m8n32k16_load_c_s32_row_stride: {
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = MVT::v8i32;
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = 0;
+ Info.flags = MachineMemOperand::MOLoad;
+ Info.align = 16;
+ return true;
+ }
+
+ case Intrinsic::nvvm_wmma_m8n8k128_load_c_s32_col:
+ case Intrinsic::nvvm_wmma_m8n8k128_load_c_s32_col_stride:
+ case Intrinsic::nvvm_wmma_m8n8k128_load_c_s32_row:
+ case Intrinsic::nvvm_wmma_m8n8k128_load_c_s32_row_stride:
+ case Intrinsic::nvvm_wmma_m8n8k32_load_c_s32_col:
+ case Intrinsic::nvvm_wmma_m8n8k32_load_c_s32_col_stride:
+ case Intrinsic::nvvm_wmma_m8n8k32_load_c_s32_row:
+ case Intrinsic::nvvm_wmma_m8n8k32_load_c_s32_row_stride: {
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = MVT::v2i32;
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = 0;
+ Info.flags = MachineMemOperand::MOLoad;
+ Info.align = 8;
+ return true;
+ }
+
case Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_col:
case Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_row:
case Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_col_stride:
@@ -3484,8 +3711,44 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
return true;
}
- case Intrinsic::nvvm_atomic_load_add_f32:
- case Intrinsic::nvvm_atomic_load_add_f64:
+ case Intrinsic::nvvm_wmma_m16n16k16_store_d_s32_col:
+ case Intrinsic::nvvm_wmma_m16n16k16_store_d_s32_col_stride:
+ case Intrinsic::nvvm_wmma_m16n16k16_store_d_s32_row:
+ case Intrinsic::nvvm_wmma_m16n16k16_store_d_s32_row_stride:
+ case Intrinsic::nvvm_wmma_m32n8k16_store_d_s32_col:
+ case Intrinsic::nvvm_wmma_m32n8k16_store_d_s32_col_stride:
+ case Intrinsic::nvvm_wmma_m32n8k16_store_d_s32_row:
+ case Intrinsic::nvvm_wmma_m32n8k16_store_d_s32_row_stride:
+ case Intrinsic::nvvm_wmma_m8n32k16_store_d_s32_col:
+ case Intrinsic::nvvm_wmma_m8n32k16_store_d_s32_col_stride:
+ case Intrinsic::nvvm_wmma_m8n32k16_store_d_s32_row:
+ case Intrinsic::nvvm_wmma_m8n32k16_store_d_s32_row_stride: {
+ Info.opc = ISD::INTRINSIC_VOID;
+ Info.memVT = MVT::v8i32;
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = 0;
+ Info.flags = MachineMemOperand::MOStore;
+ Info.align = 16;
+ return true;
+ }
+
+ case Intrinsic::nvvm_wmma_m8n8k128_store_d_s32_col:
+ case Intrinsic::nvvm_wmma_m8n8k128_store_d_s32_col_stride:
+ case Intrinsic::nvvm_wmma_m8n8k128_store_d_s32_row:
+ case Intrinsic::nvvm_wmma_m8n8k128_store_d_s32_row_stride:
+ case Intrinsic::nvvm_wmma_m8n8k32_store_d_s32_col:
+ case Intrinsic::nvvm_wmma_m8n8k32_store_d_s32_col_stride:
+ case Intrinsic::nvvm_wmma_m8n8k32_store_d_s32_row:
+ case Intrinsic::nvvm_wmma_m8n8k32_store_d_s32_row_stride: {
+ Info.opc = ISD::INTRINSIC_VOID;
+ Info.memVT = MVT::v2i32;
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = 0;
+ Info.flags = MachineMemOperand::MOStore;
+ Info.align = 8;
+ return true;
+ }
+
case Intrinsic::nvvm_atomic_load_inc_32:
case Intrinsic::nvvm_atomic_load_dec_32:
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h
index 66fab2b6f480..ef645fc1e541 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -1,9 +1,8 @@
//===-- NVPTXISelLowering.h - NVPTX DAG Lowering Interface ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -557,6 +556,10 @@ private:
SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFROUND32(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFROUND64(SDValue Op, SelectionDAG &DAG) const;
+
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerLOADi1(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/NVPTX/NVPTXImageOptimizer.cpp b/lib/Target/NVPTX/NVPTXImageOptimizer.cpp
index ad1d7cbb52fc..74ab2f7b8453 100644
--- a/lib/Target/NVPTX/NVPTXImageOptimizer.cpp
+++ b/lib/Target/NVPTX/NVPTXImageOptimizer.cpp
@@ -1,9 +1,8 @@
//===-- NVPTXImageOptimizer.cpp - Image optimization pass -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/NVPTX/NVPTXInstrFormats.td b/lib/Target/NVPTX/NVPTXInstrFormats.td
index ffcb5d5273a2..77961c386827 100644
--- a/lib/Target/NVPTX/NVPTXInstrFormats.td
+++ b/lib/Target/NVPTX/NVPTXInstrFormats.td
@@ -1,9 +1,8 @@
//===- NVPTXInstrFormats.td - NVPTX Instruction Formats-------*- tblgen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/lib/Target/NVPTX/NVPTXInstrInfo.cpp
index 50815bff6c67..f928b44c91e0 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.cpp
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.cpp
@@ -1,9 +1,8 @@
//===- NVPTXInstrInfo.cpp - NVPTX Instruction Information -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.h b/lib/Target/NVPTX/NVPTXInstrInfo.h
index 4ab1bb481958..7c0912808f7b 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.h
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.h
@@ -1,9 +1,8 @@
//===- NVPTXInstrInfo.h - NVPTX Instruction Information----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the niversity of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.td b/lib/Target/NVPTX/NVPTXInstrInfo.td
index 02a40b9f5262..62da3c79f465 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -1,9 +1,8 @@
//===- NVPTXInstrInfo.td - NVPTX Instruction defs -------------*- tblgen-*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -143,9 +142,12 @@ def true : Predicate<"true">;
def hasPTX31 : Predicate<"Subtarget->getPTXVersion() >= 31">;
def hasPTX60 : Predicate<"Subtarget->getPTXVersion() >= 60">;
def hasPTX61 : Predicate<"Subtarget->getPTXVersion() >= 61">;
+def hasPTX63 : Predicate<"Subtarget->getPTXVersion() >= 63">;
def hasSM30 : Predicate<"Subtarget->getSmVersion() >= 30">;
def hasSM70 : Predicate<"Subtarget->getSmVersion() >= 70">;
+def hasSM72 : Predicate<"Subtarget->getSmVersion() >= 72">;
+def hasSM75 : Predicate<"Subtarget->getSmVersion() >= 75">;
def useShortPtr : Predicate<"useShortPointers()">;
def useFP16Math: Predicate<"Subtarget->allowFP16Math()">;
@@ -1549,6 +1551,10 @@ def LdStCode : Operand<i32> {
let PrintMethod = "printLdStCode";
}
+def MmaCode : Operand<i32> {
+ let PrintMethod = "printMmaCode";
+}
+
def SDTWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
def Wrapper : SDNode<"NVPTXISD::Wrapper", SDTWrapper>;
@@ -3003,15 +3009,6 @@ def : Pat<(ffloor Float32Regs:$a),
def : Pat<(ffloor Float64Regs:$a),
(CVT_f64_f64 Float64Regs:$a, CvtRMI)>;
-def : Pat<(f16 (fround Float16Regs:$a)),
- (CVT_f16_f16 Float16Regs:$a, CvtRNI)>;
-def : Pat<(fround Float32Regs:$a),
- (CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>, Requires<[doF32FTZ]>;
-def : Pat<(f32 (fround Float32Regs:$a)),
- (CVT_f32_f32 Float32Regs:$a, CvtRNI)>, Requires<[doNoF32FTZ]>;
-def : Pat<(f64 (fround Float64Regs:$a)),
- (CVT_f64_f64 Float64Regs:$a, CvtRNI)>;
-
def : Pat<(ftrunc Float16Regs:$a),
(CVT_f16_f16 Float16Regs:$a, CvtRZI)>;
def : Pat<(ftrunc Float32Regs:$a),
diff --git a/lib/Target/NVPTX/NVPTXIntrinsics.td b/lib/Target/NVPTX/NVPTXIntrinsics.td
index 47dcdcf6e0bd..1752d3e0575e 100644
--- a/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -1,9 +1,8 @@
//===- NVPTXIntrinsics.td - PTX Intrinsics Instructions -------*- tblgen -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -27,7 +26,35 @@ def immDouble1 : PatLeaf<(fpimm), [{
return (d==1.0);
}]>;
+def AS_match {
+ code generic = [{
+ return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC);
+ }];
+ code shared = [{
+ return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED);
+ }];
+ code global = [{
+ return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL);
+ }];
+}
+// A node that will be replaced with the current PTX version.
+class PTX {
+ SDNodeXForm PTXVerXform = SDNodeXForm<imm, [{
+ return getI32Imm(Subtarget->getPTXVersion(), SDLoc(N));
+ }]>;
+ // (i32 0) will be XForm'ed to the currently used PTX version.
+ dag version = (PTXVerXform (i32 0));
+}
+def ptx : PTX;
+
+// Generates list of n sequential register names.
+// E.g. RegNames<3,"r">.ret -> ["r0", "r1", "r2" ]
+class RegSeq<int n, string prefix> {
+ list<string> ret = !if(n, !listconcat(RegSeq<!add(n,-1), prefix>.ret,
+ [prefix # !add(n, -1)]),
+ []);
+}
//-----------------------------------
// Synchronization and shuffle functions
@@ -1007,17 +1034,11 @@ def INT_FNS_iii : INT_FNS_MBO<(ins i32imm:$mask, i32imm:$base, i32imm:$
//-----------------------------------
class ATOMIC_GLOBAL_CHK <dag ops, dag frag>
- : PatFrag<ops, frag, [{
- return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL);
-}]>;
+ : PatFrag<ops, frag, AS_match.global>;
class ATOMIC_SHARED_CHK <dag ops, dag frag>
- : PatFrag<ops, frag, [{
- return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED);
-}]>;
+ : PatFrag<ops, frag, AS_match.shared>;
class ATOMIC_GENERIC_CHK <dag ops, dag frag>
- : PatFrag<ops, frag, [{
- return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC);
-}]>;
+ : PatFrag<ops, frag, AS_match.generic>;
multiclass F_ATOMIC_2_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
@@ -1113,18 +1134,12 @@ def atomic_load_add_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
(atomic_load_add_64 node:$a, node:$b)>;
def atomic_load_add_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
(atomic_load_add_64 node:$a, node:$b)>;
-def atomic_load_add_f32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
- (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>;
-def atomic_load_add_f32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
- (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>;
-def atomic_load_add_f32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
- (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>;
-def atomic_load_add_f64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
- (int_nvvm_atomic_load_add_f64 node:$a, node:$b)>;
-def atomic_load_add_f64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
- (int_nvvm_atomic_load_add_f64 node:$a, node:$b)>;
-def atomic_load_add_f64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
- (int_nvvm_atomic_load_add_f64 node:$a, node:$b)>;
+def atomic_load_add_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
+ (atomic_load_fadd node:$a, node:$b)>;
+def atomic_load_add_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
+ (atomic_load_fadd node:$a, node:$b)>;
+def atomic_load_add_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
+ (atomic_load_fadd node:$a, node:$b)>;
defm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".add",
atomic_load_add_32_g, i32imm, imm>;
@@ -1145,18 +1160,18 @@ defm INT_PTX_ATOM_ADD_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".u64",
".add", atomic_load_add_64_gen, i64imm, imm>;
defm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2<Float32Regs, ".global", ".f32", ".add",
- atomic_load_add_f32_g, f32imm, fpimm>;
+ atomic_load_add_g, f32imm, fpimm>;
defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<Float32Regs, ".shared", ".f32", ".add",
- atomic_load_add_f32_s, f32imm, fpimm>;
+ atomic_load_add_s, f32imm, fpimm>;
defm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2<Float32Regs, "", ".f32", ".add",
- atomic_load_add_f32_gen, f32imm, fpimm>;
+ atomic_load_add_gen, f32imm, fpimm>;
defm INT_PTX_ATOM_ADD_G_F64 : F_ATOMIC_2<Float64Regs, ".global", ".f64", ".add",
- atomic_load_add_f64_g, f64imm, fpimm, [hasAtomAddF64]>;
+ atomic_load_add_g, f64imm, fpimm, [hasAtomAddF64]>;
defm INT_PTX_ATOM_ADD_S_F64 : F_ATOMIC_2<Float64Regs, ".shared", ".f64", ".add",
- atomic_load_add_f64_s, f64imm, fpimm, [hasAtomAddF64]>;
+ atomic_load_add_s, f64imm, fpimm, [hasAtomAddF64]>;
defm INT_PTX_ATOM_ADD_GEN_F64 : F_ATOMIC_2<Float64Regs, "", ".f64", ".add",
- atomic_load_add_f64_gen, f64imm, fpimm, [hasAtomAddF64]>;
+ atomic_load_add_gen, f64imm, fpimm, [hasAtomAddF64]>;
// atom_sub
@@ -7381,383 +7396,258 @@ def INT_PTX_SREG_WARPSIZE :
NVPTXInst<(outs Int32Regs:$dst), (ins), "mov.u32 \t$dst, WARP_SZ;",
[(set Int32Regs:$dst, (int_nvvm_read_ptx_sreg_warpsize))]>;
+// Helper class that represents a 'fragment' of an NVPTX *MMA instruction.
+// In addition to target-independent fields provided by WMMA_REGS, it adds
+// the fields commonly used to implement specific PTX instruction -- register
+// types and names, constraints, parts of assembly, etc.
+class WMMA_REGINFO<WMMA_REGS r>
+ : WMMA_REGS<r.geom, r.frag, r.ptx_elt_type> {
+ // NVPTX register types used to carry fragment data.
+ NVPTXRegClass regclass = !cond(
+ !eq(ptx_elt_type, "f16") : Float16x2Regs,
+ !eq(ptx_elt_type, "f32") : Float32Regs,
+ !eq(ptx_elt_type, "s32") : Int32Regs,
+ !eq(ptx_elt_type, "s8") : Int32Regs,
+ !eq(ptx_elt_type, "u8") : Int32Regs,
+ !eq(ptx_elt_type, "s4") : Int32Regs,
+ !eq(ptx_elt_type, "u4") : Int32Regs,
+ !eq(ptx_elt_type, "b1") : Int32Regs);
+
+ // Instruction input/output arguments for the fragment.
+ list<NVPTXRegClass> ptx_regs = !foreach(tmp, regs, regclass);
+
+ // List of register names for the fragment -- ["ra0", "ra1",...]
+ list<string> reg_names = RegSeq<!size(ptx_regs), "r"#frag>.ret;
+
+ // Generates "{{$r0, $r1,.... $rN-1}}" for use in asm string construction.
+ string regstring = "{{$" # !head(reg_names)
+ # !foldl("", !tail(reg_names), a, b,
+ !strconcat(a, ", $", b))
+ # "}}";
+
+ // Predicates for particular fragment variant. Technically those are
+ // per-instruction predicates, but currently all fragments that can be used in
+ // a given instruction are subject to the same constraints, so an instruction
+ // can use predicates from any of its fragments. If/when this is no
+ // longer the case, we can concat all per-fragment predicates to enforce that
+ // all fragments of the instruction are viable.
+ list<Predicate> Predicates = !cond(
+ // fp16 -> fp16/fp32 @ m16n16k16
+ !and(!eq(geom, "m16n16k16"),
+ !or(!eq(ptx_elt_type, "f16"),
+ !eq(ptx_elt_type, "f32"))) : [hasSM70, hasPTX60],
+
+ // fp16 -> fp16/fp32 @ m8n32k16/m32n8k16
+ !and(!or(!eq(geom, "m8n32k16"),
+ !eq(geom, "m32n8k16")),
+ !or(!eq(ptx_elt_type, "f16"),
+ !eq(ptx_elt_type, "f32"))) : [hasSM70, hasPTX61],
+
+ // u8/s8 -> s32 @ m16n16k16/m8n32k16/m32n8k16
+ !and(!or(!eq(geom,"m16n16k16"),
+ !eq(geom,"m8n32k16"),
+ !eq(geom,"m32n8k16")),
+ !or(!eq(ptx_elt_type, "u8"),
+ !eq(ptx_elt_type, "s8"),
+ !eq(ptx_elt_type, "s32"))) : [hasSM72, hasPTX63],
+
+ // u4/s4/b1 -> s32 @ m8n8k32 (u4/s4), m8n8k128(b1)
+ !or(!eq(geom,"m8n8k128"),
+ !eq(geom,"m8n8k32")) : [hasSM75, hasPTX63]);
+
+ // template DAGs for instruction inputs/output.
+ dag Outs = !dag(outs, ptx_regs, reg_names);
+ dag Ins = !dag(ins, ptx_regs, reg_names);
+}
+
+// Convert dag of arguments into a dag to match given intrinsic.
+class BuildPatternI<Intrinsic Intr, dag Ins> {
+ // Build a dag pattern that matches the intrinsic call.
+ dag ret = !foreach(tmp, Ins,
+ !subst(imem, ADDRvar,
+ !subst(MEMri64, ADDRri64,
+ !subst(MEMri, ADDRri,
+ !subst(ins, Intr, tmp)))));
+}
+
+// Same as above, but uses PatFrag instead of an Intrinsic.
+class BuildPatternPF<PatFrag Intr, dag Ins> {
+ // Build a dag pattern that matches the intrinsic call.
+ dag ret = !foreach(tmp, Ins,
+ !subst(imem, ADDRvar,
+ !subst(MEMri64, ADDRri64,
+ !subst(MEMri, ADDRri,
+ !subst(ins, Intr, tmp)))));
+}
+
+// Common WMMA-related fields used for building patterns for all MMA instructions.
+class WMMA_INSTR<string _Intr, list<dag> _Args>
+ : NVPTXInst<(outs), (ins), "?", []> {
+ Intrinsic Intr = !cast<Intrinsic>(_Intr);
+ // Concatenate all arguments into a single dag.
+ dag Args = !foldl((ins), _Args, a, b, !con(a,b));
+ // Pre-build the pattern to match (intrinsic arg0, arg1, ...).
+ dag IntrinsicPattern = BuildPatternI<!cast<Intrinsic>(Intr), Args>.ret;
+}
+
//
// wmma.load.[a|b|c].sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
//
-class EmptyNVPTXInst : NVPTXInst<(outs), (ins), "?", []>;
-
-class WMMA_LOAD_GALSTOS<string Geometry, string Abc, string Layout,
- string Space, string Type, NVPTXRegClass regclass,
- DAGOperand SrcOp, bit WithStride>
- : EmptyNVPTXInst,
- Requires<[!if(!eq(Geometry, "m16n16k16"),
- hasPTX60,
- hasPTX61),
- hasSM70]> {
- // Pattern (created by WMMA_LOAD_INTR_HELPER below) that matches the intrinsic
- // for this function.
- PatFrag IntrMatcher = !cast<PatFrag>("INT_WMMA_"
- # Geometry # "_load_"
- # !subst("c", "c_" # Type, Abc)
- # "_" # Layout
- # !subst(".", "_", Space)
- # !if(WithStride,"_stride", "")
- # "_Intr");
- dag OutsR03 = (outs regclass:$r0, regclass:$r1, regclass:$r2, regclass:$r3);
- dag OutsR47 = (outs regclass:$r4, regclass:$r5, regclass:$r6, regclass:$r7);
- dag Outs = !if(!eq(Abc#Type,"cf16"), OutsR03, !con(OutsR03, OutsR47));
-
- dag StrideArg = !if(WithStride, (ins Int32Regs:$ldm), (ins));
- dag Ins = !con((ins SrcOp:$src), StrideArg);
-
- // Build a dag pattern that matches the intrinsic call.
- // We want a dag that looks like this:
- // (set <output args>, (intrinsic <input arguments>)) where input and
- // output arguments are named patterns that would match corresponding
- // input/output arguments of the instruction.
- //
- // First we construct (set <output arguments>) from instruction's outs dag by
- // replacing dag operator 'outs' with 'set'.
- dag PatOuts = !foreach(tmp, Outs, !subst(outs, set, tmp));
- // Similarly, construct (intrinsic <input arguments>) sub-dag from
- // instruction's input arguments, only now we also need to replace operands
- // with patterns that would match them and the operator 'ins' with the
- // intrinsic.
- dag PatArgs = !foreach(tmp, Ins,
- !subst(imem, ADDRvar,
- !subst(MEMri64, ADDRri64,
- !subst(MEMri, ADDRri,
- !subst(ins, IntrMatcher, tmp)))));
- // Finally, consatenate both parts together. !con() requires both dags to have
- // the same operator, so we wrap PatArgs in a (set ...) dag.
- let Pattern = [!con(PatOuts, (set PatArgs))];
- let OutOperandList = Outs;
- let InOperandList = Ins;
+class WMMA_LOAD<WMMA_REGINFO Frag, string Layout, string Space, bit WithStride,
+ DAGOperand SrcOp>
+ : WMMA_INSTR<WMMA_NAME_LDST<"load", Frag, Layout, WithStride>.record,
+ [!con((ins SrcOp:$src),
+ !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>,
+ Requires<Frag.Predicates> {
+ // Load/store intrinsics are overloaded on pointer's address space.
+ // To match the right intrinsic, we need to build AS-constrained PatFrag.
+ // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....).
+ dag PFOperands = !if(WithStride, (ops node:$src, node:$ldm), (ops node:$src));
+ // Build PatFrag that only matches particular address space.
+ PatFrag IntrFrag = PatFrag<PFOperands,
+ !foreach(tmp, PFOperands, !subst(ops, Intr, tmp)),
+ !cond(!eq(Space, ".shared"): AS_match.shared,
+ !eq(Space, ".global"): AS_match.global,
+ 1: AS_match.generic)>;
+ // Build AS-constrained pattern.
+ let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
+
+ let OutOperandList = Frag.Outs;
+ let InOperandList = !con(Args, (ins MmaCode:$ptx));
let AsmString = "wmma.load."
- # Abc
+ # Frag.frag
# ".sync"
+ # "${ptx:aligned}"
# "." # Layout
- # "." # Geometry
+ # "." # Frag.geom
# Space
- # "." # Type # " \t"
- # !if(!eq(Abc#Type, "cf16"),
- "{{$r0, $r1, $r2, $r3}}",
- "{{$r0, $r1, $r2, $r3, $r4, $r5, $r6, $r7}}")
+ # "." # Frag.ptx_elt_type # " \t"
+ # Frag.regstring
# ", [$src]"
# !if(WithStride, ", $ldm", "")
# ";";
}
-class WMMA_LOAD_INTR_HELPER<string Geometry, string Abc, string Layout,
- string Space, string Type, bit WithStride>
- : PatFrag <(ops),(ops)> {
- // Intrinsic that matches this instruction.
- Intrinsic Intr = !cast<Intrinsic>("int_nvvm_wmma"
- # "_" # Geometry # "_load_"
- # Abc # "_" # Type # "_" # Layout
- # !if(WithStride,"_stride", ""));
- code match_generic = [{
- return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC);
- }];
- code match_shared = [{
- return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED);
- }];
- code match_global = [{
- return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL);
- }];
-
- let Operands = !if(WithStride, (ops node:$src, node:$ldm), (ops node:$src));
- let Fragments = [!foreach(tmp, Operands, !subst(ops, Intr, tmp))];
- let PredicateCode = !if(!eq(Space, ".shared"), match_shared,
- !if(!eq(Space, ".global"), match_global, match_generic));
-}
-
-multiclass WMMA_LOAD_GALSTS<string Geometry, string Abc, string Layout,
- string Space, string Type, NVPTXRegClass regclass,
- bit WithStride> {
- def _avar: WMMA_LOAD_GALSTOS<Geometry, Abc, Layout, Space, Type, regclass,
- imem, WithStride>;
- def _areg: WMMA_LOAD_GALSTOS<Geometry, Abc, Layout, Space, Type, regclass,
- Int32Regs, WithStride>;
- def _areg64: WMMA_LOAD_GALSTOS<Geometry, Abc, Layout, Space, Type, regclass,
- Int64Regs, WithStride>;
- def _ari: WMMA_LOAD_GALSTOS<Geometry, Abc, Layout, Space, Type, regclass,
- MEMri, WithStride>;
- def _ari64: WMMA_LOAD_GALSTOS<Geometry, Abc, Layout, Space, Type, regclass,
- MEMri64, WithStride>;
-}
-
-multiclass WMMA_LOAD_GALSTSh<string Geometry, string Abc, string Layout,
- string Space, string Type, NVPTXRegClass regclass,
- bit WithStride> {
- // Define a PatFrag that matches appropriate intrinsic that loads from the
- // given address space.
- def _Intr: WMMA_LOAD_INTR_HELPER<Geometry, Abc, Layout, Space, Type,
- WithStride>;
- defm NAME: WMMA_LOAD_GALSTS<Geometry, Abc, Layout, Space, Type, regclass,
- WithStride>;
-}
-
-multiclass WMMA_LOAD_GALST<string Geometry, string Abc, string Layout,
- string Space, string Type, NVPTXRegClass regclass> {
- defm _stride: WMMA_LOAD_GALSTSh<Geometry, Abc, Layout, Space, Type, regclass, 1>;
- defm NAME: WMMA_LOAD_GALSTSh<Geometry, Abc, Layout, Space, Type, regclass, 0>;
-}
-
-multiclass WMMA_LOAD_GALT<string Geometry, string Abc, string Layout,
- string Type, NVPTXRegClass regclass> {
- defm _global: WMMA_LOAD_GALST<Geometry, Abc, Layout, ".global",
- Type, regclass>;
- defm _shared: WMMA_LOAD_GALST<Geometry, Abc, Layout, ".shared",
- Type, regclass>;
- defm NAME: WMMA_LOAD_GALST<Geometry, Abc, Layout, "",
- Type, regclass>;
-}
-
-multiclass WMMA_LOAD_GAT<string Geometry, string Abc,
- string Type, NVPTXRegClass regclass> {
- defm _row: WMMA_LOAD_GALT<Geometry, Abc, "row", Type, regclass>;
- defm _col: WMMA_LOAD_GALT<Geometry, Abc, "col", Type, regclass>;
-}
-
-multiclass WMMA_LOAD_G<string Geometry> {
- defm _load_a: WMMA_LOAD_GAT<Geometry, "a", "f16", Float16x2Regs>;
- defm _load_b: WMMA_LOAD_GAT<Geometry, "b", "f16", Float16x2Regs>;
- defm _load_c_f16: WMMA_LOAD_GAT<Geometry, "c", "f16", Float16x2Regs>;
- defm _load_c_f32: WMMA_LOAD_GAT<Geometry, "c", "f32", Float32Regs>;
-}
-
-defm INT_WMMA_m32n8k16: WMMA_LOAD_G<"m32n8k16">;
-defm INT_WMMA_m16n16k16: WMMA_LOAD_G<"m16n16k16">;
-defm INT_WMMA_m8n32k16: WMMA_LOAD_G<"m8n32k16">;
-
//
// wmma.store.d.sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
//
-class WMMA_STORE_D_GLSTSO<string Geometry, string Layout, string Space,
- string Type, NVPTXRegClass regclass,
- bit WithStride, DAGOperand DstOp>
- : EmptyNVPTXInst,
- Requires<[!if(!eq(Geometry, "m16n16k16"),
- hasPTX60,
- hasPTX61),
- hasSM70]> {
- PatFrag IntrMatcher = !cast<PatFrag>("INT_WMMA"
- # "_" # Geometry # "_store_d"
- # "_" # Type
- # "_" # Layout
- # !subst(".", "_", Space)
- # !if(WithStride,"_stride", "")
- # "_Intr");
- dag InsR03 = (ins DstOp:$src, regclass:$r0, regclass:$r1,
- regclass:$r2, regclass:$r3);
- dag InsR47 = (ins regclass:$r4, regclass:$r5,
- regclass:$r6, regclass:$r7);
- dag InsR = !if(!eq(Type,"f16"), InsR03, !con(InsR03, InsR47));
- dag StrideArg = !if(WithStride, (ins Int32Regs:$ldm), (ins));
- dag Ins = !con(InsR, StrideArg);
-
- // Construct the pattern to match corresponding intrinsic call. See the
- // details in the comments in WMMA_LOAD_ALSTOS.
- dag PatArgs = !foreach(tmp, Ins,
- !subst(imem, ADDRvar,
- !subst(MEMri64, ADDRri64,
- !subst(MEMri, ADDRri,
- !subst(ins, IntrMatcher, tmp)))));
- let Pattern = [PatArgs];
+class WMMA_STORE_D<WMMA_REGINFO Frag, string Layout, string Space,
+ bit WithStride, DAGOperand DstOp>
+ : WMMA_INSTR<WMMA_NAME_LDST<"store", Frag, Layout, WithStride>.record,
+ [!con((ins DstOp:$dst),
+ Frag.Ins,
+ !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>,
+ Requires<Frag.Predicates> {
+
+ // Load/store intrinsics are overloaded on pointer's address space.
+ // To match the right intrinsic, we need to build AS-constrained PatFrag.
+ // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....).
+ dag PFOperands = !con((ops node:$dst),
+ !dag(ops, !foreach(tmp, Frag.regs, node), Frag.reg_names),
+ !if(WithStride, (ops node:$ldm), (ops)));
+ // Build PatFrag that only matches particular address space.
+ PatFrag IntrFrag = PatFrag<PFOperands,
+ !foreach(tmp, PFOperands, !subst(ops, Intr, tmp)),
+ !cond(!eq(Space, ".shared"): AS_match.shared,
+ !eq(Space, ".global"): AS_match.global,
+ 1: AS_match.generic)>;
+ // Build AS-constrained pattern.
+ let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
+
+ let InOperandList = !con(Args, (ins MmaCode:$ptx));
let OutOperandList = (outs);
- let InOperandList = Ins;
- let AsmString = "wmma.store.d.sync."
- # Layout
- # "." # Geometry
+ let AsmString = "wmma.store.d.sync"
+ # "${ptx:aligned}"
+ # "." # Layout
+ # "." # Frag.geom
# Space
- # "." # Type
- # " \t[$src],"
- # !if(!eq(Type,"f16"),
- "{{$r0, $r1, $r2, $r3}}",
- "{{$r0, $r1, $r2, $r3, $r4, $r5, $r6, $r7}}")
+ # "." # Frag.ptx_elt_type
+ # " \t[$dst],"
+ # Frag.regstring
# !if(WithStride, ", $ldm", "")
# ";";
-
-}
-
-class WMMA_STORE_INTR_HELPER<string Geometry, string Layout, string Space,
- string Type, bit WithStride>
- : PatFrag <(ops),(ops)> {
- // Intrinsic that matches this instruction.
- Intrinsic Intr = !cast<Intrinsic>("int_nvvm_wmma_"
- # Geometry
- # "_store_d"
- # "_" # Type
- # "_" # Layout
- # !if(WithStride, "_stride", ""));
- code match_generic = [{
- return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC);
- }];
- code match_shared = [{
- return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED);
- }];
- code match_global = [{
- return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL);
- }];
-
- dag Args = !if(!eq(Type,"f16"),
- (ops node:$dst, node:$r0, node:$r1, node:$r2, node:$r3),
- (ops node:$dst, node:$r0, node:$r1, node:$r2, node:$r3,
- node:$r4, node:$r5, node:$r6, node:$r7));
- dag StrideArg = !if(WithStride, (ops node:$ldm), (ops));
- let Operands = !con(Args, StrideArg);
- let Fragments = [!foreach(tmp, Operands, !subst(ops, Intr, tmp))];
- let PredicateCode = !if(!eq(Space, ".shared"), match_shared,
- !if(!eq(Space, ".global"), match_global, match_generic));
-}
-
-multiclass WMMA_STORE_D_GLSTS<string Geometry, string Layout, string Space,
- string Type, NVPTXRegClass regclass,
- bit WithStride> {
- def _avar: WMMA_STORE_D_GLSTSO<Geometry, Layout, Space, Type, regclass,
- WithStride, imem>;
- def _areg: WMMA_STORE_D_GLSTSO<Geometry, Layout, Space, Type, regclass,
- WithStride, Int32Regs>;
- def _areg64: WMMA_STORE_D_GLSTSO<Geometry, Layout, Space, Type, regclass,
- WithStride, Int64Regs>;
- def _ari: WMMA_STORE_D_GLSTSO<Geometry, Layout, Space, Type, regclass,
- WithStride, MEMri>;
- def _ari64: WMMA_STORE_D_GLSTSO<Geometry, Layout, Space, Type, regclass,
- WithStride, MEMri64>;
}
-multiclass WMMA_STORE_D_GLSTSh<string Geometry, string Layout, string Space,
- string Type, NVPTXRegClass regclass,
- bit WithStride> {
- // Define a PatFrag that matches appropriate intrinsic that loads from the
- // given address space.
- def _Intr: WMMA_STORE_INTR_HELPER<Geometry, Layout, Space, Type,
- WithStride>;
- defm NAME: WMMA_STORE_D_GLSTS<Geometry, Layout, Space, Type, regclass,
- WithStride>;
-}
-
-multiclass WMMA_STORE_D_GLST<string Geometry, string Layout, string Space,
- string Type, NVPTXRegClass regclass > {
- defm _stride: WMMA_STORE_D_GLSTSh<Geometry, Layout, Space, Type, regclass, 1>;
- defm NAME: WMMA_STORE_D_GLSTSh<Geometry, Layout, Space, Type, regclass, 0>;
-}
-
-multiclass WMMA_STORE_D_GLT<string Geometry, string Layout,
- string Type, NVPTXRegClass regclass> {
- defm _global: WMMA_STORE_D_GLST<Geometry, Layout, ".global", Type, regclass>;
- defm _shared: WMMA_STORE_D_GLST<Geometry, Layout, ".shared", Type, regclass>;
- defm NAME: WMMA_STORE_D_GLST<Geometry, Layout, "", Type, regclass>;
-}
-
-multiclass WMMA_STORE_D_GT<string Geometry, string Type,
- NVPTXRegClass regclass> {
- defm _row: WMMA_STORE_D_GLT<Geometry, "row", Type, regclass>;
- defm _col: WMMA_STORE_D_GLT<Geometry, "col", Type, regclass>;
-}
-
-multiclass WMMA_STORE_D_G<string Geometry> {
- defm _store_d_f16: WMMA_STORE_D_GT<Geometry, "f16", Float16x2Regs>;
- defm _store_d_f32: WMMA_STORE_D_GT<Geometry, "f32", Float32Regs>;
-}
-
-defm INT_WMMA_m32n8k16: WMMA_STORE_D_G<"m32n8k16">;
-defm INT_WMMA_m16n16k16: WMMA_STORE_D_G<"m16n16k16">;
-defm INT_WMMA_m8n32k16: WMMA_STORE_D_G<"m8n32k16">;
+// Create all load/store variants
+defset list<WMMA_INSTR> MMA_LDSTs = {
+ foreach layout = ["row", "col"] in {
+ foreach stride = [0, 1] in {
+ foreach space = [".global", ".shared", ""] in {
+ foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in {
+ foreach frag = NVVM_MMA_OPS.all_ld_ops in
+ foreach _ = NVVM_MMA_SUPPORTED<[frag], layout>.ret in
+ def : WMMA_LOAD<WMMA_REGINFO<frag>, layout, space, stride, addr>;
+ foreach frag = NVVM_MMA_OPS.all_st_ops in
+ foreach _ = NVVM_MMA_SUPPORTED<[frag], layout>.ret in
+ def : WMMA_STORE_D<WMMA_REGINFO<frag>, layout, space, stride, addr>;
+ } // addr
+ } // space
+ } // stride
+ } // layout
+} // defset
// WMMA.MMA
-class WMMA_MMA_GABDCS<string Geometry, string ALayout, string BLayout,
- string DType, NVPTXRegClass d_reg,
- string CType, NVPTXRegClass c_reg,
- NVPTXRegClass ab_reg,
- string Satfinite = "">
- : EmptyNVPTXInst,
- Requires<[!if(!eq(Geometry, "m16n16k16"),
- hasPTX60,
- hasPTX61),
- hasSM70]> {
- Intrinsic Intr = !cast<Intrinsic>("int_nvvm_wmma_"
- # Geometry
- # "_mma"
- # "_" # ALayout
- # "_" # BLayout
- # "_" # DType
- # "_" # CType
- # !subst(".", "_", Satfinite));
- dag Outs = !if(!eq(DType,"f16"),
- (outs d_reg:$d0, d_reg:$d1, d_reg:$d2, d_reg:$d3),
- (outs d_reg:$d0, d_reg:$d1, d_reg:$d2, d_reg:$d3,
- d_reg:$d4, d_reg:$d5, d_reg:$d6, d_reg:$d7));
- dag InsExtraCArgs = !if(!eq(CType,"f16"),
- (ins),
- (ins c_reg:$c4, c_reg:$c5, c_reg:$c6, c_reg:$c7));
- dag Ins = !con((ins ab_reg:$a0, ab_reg:$a1, ab_reg:$a2, ab_reg:$a3,
- ab_reg:$a4, ab_reg:$a5, ab_reg:$a6, ab_reg:$a7,
- ab_reg:$b0, ab_reg:$b1, ab_reg:$b2, ab_reg:$b3,
- ab_reg:$b4, ab_reg:$b5, ab_reg:$b6, ab_reg:$b7,
- c_reg:$c0, c_reg:$c1, c_reg:$c2, c_reg:$c3),
- InsExtraCArgs);
-
- // Construct the pattern to match corresponding intrinsic call. See the
- // details in the comments in WMMA_LOAD_ALSTOS.
- dag PatOuts = !foreach(tmp, Outs, !subst(outs, set, tmp));
- dag PatArgs = !foreach(tmp, Ins, !subst(ins, Intr, tmp));
- let Pattern = [!con(PatOuts, (set PatArgs))];
- let OutOperandList = Outs;
- let InOperandList = Ins;
- let AsmString = "wmma.mma.sync."
- # ALayout
+class WMMA_MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB,
+ WMMA_REGINFO FragC, WMMA_REGINFO FragD,
+ string ALayout, string BLayout, int Satfinite>
+ : WMMA_INSTR<WMMA_NAME_MMA<ALayout, BLayout, Satfinite, FragA, FragB, FragC, FragD>.record,
+ [FragA.Ins, FragB.Ins, FragC.Ins]>,
+ // Requires does not seem to have effect on Instruction w/o Patterns.
+ // We set it here anyways and propagate to the Pat<> we construct below.
+ Requires<FragA.Predicates> {
+ let OutOperandList = FragD.Outs;
+ let InOperandList = !con(Args, (ins MmaCode:$ptx));
+ string TypeList = !cond(
+ !eq(FragD.ptx_elt_type, "s32") : ".s32"
+ # "." # FragA.ptx_elt_type
+ # "." # FragB.ptx_elt_type
+ # ".s32",
+ 1: "." # FragD.ptx_elt_type # "." # FragC.ptx_elt_type,
+ );
+ let AsmString = "wmma.mma"
+ # !if(!eq(FragA.ptx_elt_type, "b1"), ".xor.popc", "")
+ # ".sync"
+ # "${ptx:aligned}"
+ # "." # ALayout
# "." # BLayout
- # "." # Geometry
- # "." # DType
- # "." # CType
- # Satfinite # "\n\t\t"
- # !if(!eq(DType,"f16"),
- "{{$d0, $d1, $d2, $d3}}, \n\t\t",
- "{{$d0, $d1, $d2, $d3, $d4, $d5, $d6, $d7}},\n\t\t")
- # "{{$a0, $a1, $a2, $a3, $a4, $a5, $a6, $a7}},\n\t\t"
- # "{{$b0, $b1, $b2, $b3, $b4, $b5, $b6, $b7}},\n\t\t"
- # !if(!eq(CType,"f16"),
- "{{$c0, $c1, $c2, $c3}};",
- "{{$c0, $c1, $c2, $c3, $c4, $c5, $c6, $c7}};");
-}
-
-multiclass WMMA_MMA_GABDC<string Geometry, string ALayout, string BLayout,
- string DType, NVPTXRegClass d_reg,
- string CType, NVPTXRegClass c_reg> {
- def _satfinite: WMMA_MMA_GABDCS<Geometry, ALayout, BLayout,
- DType, d_reg, CType, c_reg,
- Float16x2Regs, ".satfinite">;
- def NAME: WMMA_MMA_GABDCS<Geometry, ALayout, BLayout,
- DType, d_reg, CType, c_reg,
- Float16x2Regs>;
-}
-
-multiclass WMMA_MMA_GABD<string Geometry, string ALayout, string BLayout,
- string DType, NVPTXRegClass d_reg> {
- defm _f16: WMMA_MMA_GABDC<Geometry, ALayout, BLayout, DType, d_reg,
- "f16", Float16x2Regs>;
- defm _f32: WMMA_MMA_GABDC<Geometry, ALayout, BLayout, DType, d_reg,
- "f32", Float32Regs>;
-}
-
-multiclass WMMA_MMA_GAB<string Geometry, string ALayout, string BLayout> {
- defm _f16: WMMA_MMA_GABD<Geometry, ALayout, BLayout, "f16", Float16x2Regs>;
- defm _f32: WMMA_MMA_GABD<Geometry, ALayout, BLayout, "f32", Float32Regs>;
-}
-
-multiclass WMMA_MMA_GA<string Geometry, string ALayout> {
- defm _col: WMMA_MMA_GAB<Geometry, ALayout, "col">;
- defm _row: WMMA_MMA_GAB<Geometry, ALayout, "row">;
-}
-
-multiclass WMMA_MMA_G<string Geometry> {
- defm _col: WMMA_MMA_GA<Geometry, "col">;
- defm _row: WMMA_MMA_GA<Geometry, "row">;
+ # "." # FragA.geom
+ # TypeList
+ # !if(Satfinite, ".satfinite", "") # "\n\t\t"
+ # FragD.regstring # ",\n\t\t"
+ # FragA.regstring # ",\n\t\t"
+ # FragB.regstring # ",\n\t\t"
+ # FragC.regstring # ";";
}
-defm INT_WMMA_MMA_m32n8k16 : WMMA_MMA_G<"m32n8k16">;
-defm INT_WMMA_MMA_m16n16k16 : WMMA_MMA_G<"m16n16k16">;
-defm INT_WMMA_MMA_m8n32k16 : WMMA_MMA_G<"m8n32k16">;
+defset list<WMMA_INSTR> MMAs = {
+ foreach layout_a = ["row", "col"] in {
+ foreach layout_b = ["row", "col"] in {
+ foreach satf = [0, 1] in {
+ foreach op = NVVM_MMA_OPS.all_mma_ops in {
+ foreach _ = NVVM_MMA_SUPPORTED<op, layout_a, layout_b, satf>.ret in {
+ def : WMMA_MMA<WMMA_REGINFO<op[0]>,
+ WMMA_REGINFO<op[1]>,
+ WMMA_REGINFO<op[2]>,
+ WMMA_REGINFO<op[3]>,
+ layout_a, layout_b, satf>;
+ }
+ } // op
+ } // satf
+ } // layout_b
+ } // layout_a
+} // defset
+
+
+// Constructing non-flat DAGs is still a pain. I can't !subst a dag node with a
+// dag, so the ptx.version must be appended *after* foreach replaces 'ins' with
+// the instruction record.
+class WMMA_PAT<WMMA_INSTR wi>
+ : Pat<wi.IntrinsicPattern,
+ !con(!foreach(tmp, wi.Args, !subst(ins, wi, tmp)),
+ (wi ptx.version))>,
+ Requires<wi.Predicates>;
+
+// Build intrinsic->instruction patterns for all MMA instructions.
+foreach mma = !listconcat(MMAs, MMA_LDSTs) in
+ def : WMMA_PAT<mma>;
diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
index 52ced266b91c..0743a2986718 100644
--- a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
+++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
@@ -1,9 +1,8 @@
//===- NVPTXLowerAggrCopies.cpp - ------------------------------*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.h b/lib/Target/NVPTX/NVPTXLowerAggrCopies.h
index 3c39f53eb30a..59d5ef40e9ac 100644
--- a/lib/Target/NVPTX/NVPTXLowerAggrCopies.h
+++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.h
@@ -1,9 +1,8 @@
//===-- llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.h ------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/NVPTX/NVPTXLowerAlloca.cpp b/lib/Target/NVPTX/NVPTXLowerAlloca.cpp
index e94c1914029d..76fb9f3fa692 100644
--- a/lib/Target/NVPTX/NVPTXLowerAlloca.cpp
+++ b/lib/Target/NVPTX/NVPTXLowerAlloca.cpp
@@ -1,9 +1,8 @@
//===-- NVPTXLowerAlloca.cpp - Make alloca to use local memory =====--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -27,6 +26,7 @@
#include "NVPTX.h"
#include "NVPTXUtilities.h"
+#include "MCTargetDesc/NVPTXBaseInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
diff --git a/lib/Target/NVPTX/NVPTXLowerArgs.cpp b/lib/Target/NVPTX/NVPTXLowerArgs.cpp
index 139dc7fbeeda..c5e02e34e25e 100644
--- a/lib/Target/NVPTX/NVPTXLowerArgs.cpp
+++ b/lib/Target/NVPTX/NVPTXLowerArgs.cpp
@@ -1,9 +1,8 @@
//===-- NVPTXLowerArgs.cpp - Lower arguments ------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -92,6 +91,7 @@
#include "NVPTX.h"
#include "NVPTXTargetMachine.h"
#include "NVPTXUtilities.h"
+#include "MCTargetDesc/NVPTXBaseInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
@@ -170,7 +170,8 @@ void NVPTXLowerArgs::handleByValParam(Argument *Arg) {
Value *ArgInParam = new AddrSpaceCastInst(
Arg, PointerType::get(StructType, ADDRESS_SPACE_PARAM), Arg->getName(),
FirstInst);
- LoadInst *LI = new LoadInst(ArgInParam, Arg->getName(), FirstInst);
+ LoadInst *LI =
+ new LoadInst(StructType, ArgInParam, Arg->getName(), FirstInst);
new StoreInst(LI, AllocA, FirstInst);
}
diff --git a/lib/Target/NVPTX/NVPTXMCExpr.cpp b/lib/Target/NVPTX/NVPTXMCExpr.cpp
index a754a6a36dab..5ec1b2425e68 100644
--- a/lib/Target/NVPTX/NVPTXMCExpr.cpp
+++ b/lib/Target/NVPTX/NVPTXMCExpr.cpp
@@ -1,9 +1,8 @@
//===-- NVPTXMCExpr.cpp - NVPTX specific MC expression classes ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/NVPTX/NVPTXMCExpr.h b/lib/Target/NVPTX/NVPTXMCExpr.h
index 95741d9b0451..440fa1310003 100644
--- a/lib/Target/NVPTX/NVPTXMCExpr.h
+++ b/lib/Target/NVPTX/NVPTXMCExpr.h
@@ -1,9 +1,8 @@
//===-- NVPTXMCExpr.h - NVPTX specific MC expression classes ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h b/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h
index 5a9115f6f7f1..cf63fc33e621 100644
--- a/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h
+++ b/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h
@@ -1,9 +1,8 @@
//===-- NVPTXMachineFunctionInfo.h - NVPTX-specific Function Info --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/NVPTX/NVPTXPeephole.cpp b/lib/Target/NVPTX/NVPTXPeephole.cpp
index 02c32c68ee2c..629757db8707 100644
--- a/lib/Target/NVPTX/NVPTXPeephole.cpp
+++ b/lib/Target/NVPTX/NVPTXPeephole.cpp
@@ -1,9 +1,8 @@
//===-- NVPTXPeephole.cpp - NVPTX Peephole Optimiztions -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp b/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
index 2ca0ccf2dfa7..4c5a9adf1f65 100644
--- a/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
+++ b/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
@@ -1,9 +1,8 @@
//===-- NVPTXPrologEpilogPass.cpp - NVPTX prolog/epilog inserter ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -73,8 +72,8 @@ bool NVPTXPrologEpilogPass::runOnMachineFunction(MachineFunction &MF) {
TFI.getFrameIndexReference(MF, MI.getOperand(0).getIndex(), Reg);
MI.getOperand(0).ChangeToRegister(Reg, /*isDef=*/false);
MI.getOperand(0).setIsDebug();
- auto *DIExpr = DIExpression::prepend(MI.getDebugExpression(),
- DIExpression::NoDeref, Offset);
+ auto *DIExpr = DIExpression::prepend(
+ MI.getDebugExpression(), DIExpression::ApplyOffset, Offset);
MI.getOperand(3).setMetadata(DIExpr);
continue;
}
diff --git a/lib/Target/NVPTX/NVPTXProxyRegErasure.cpp b/lib/Target/NVPTX/NVPTXProxyRegErasure.cpp
index f60d841c1683..af50a7465d1a 100644
--- a/lib/Target/NVPTX/NVPTXProxyRegErasure.cpp
+++ b/lib/Target/NVPTX/NVPTXProxyRegErasure.cpp
@@ -1,9 +1,8 @@
//===- NVPTXProxyRegErasure.cpp - NVPTX Proxy Register Instruction Erasure -==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
index 755738329881..5cdec0925b26 100644
--- a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
+++ b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
@@ -1,9 +1,8 @@
//===- NVPTXRegisterInfo.cpp - NVPTX Register Information -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -127,6 +126,6 @@ void NVPTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
}
-unsigned NVPTXRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+Register NVPTXRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
return NVPTX::VRFrame;
}
diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.h b/lib/Target/NVPTX/NVPTXRegisterInfo.h
index 6185a0b54cac..9ef6940daf86 100644
--- a/lib/Target/NVPTX/NVPTXRegisterInfo.h
+++ b/lib/Target/NVPTX/NVPTXRegisterInfo.h
@@ -1,9 +1,8 @@
//===- NVPTXRegisterInfo.h - NVPTX Register Information Impl ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -43,7 +42,7 @@ public:
unsigned FIOperandNum,
RegScavenger *RS = nullptr) const override;
- unsigned getFrameRegister(const MachineFunction &MF) const override;
+ Register getFrameRegister(const MachineFunction &MF) const override;
ManagedStringPool *getStrPool() const {
return const_cast<ManagedStringPool *>(&ManagedStrPool);
diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.td b/lib/Target/NVPTX/NVPTXRegisterInfo.td
index f04764a9e9a3..4b755dcb55ff 100644
--- a/lib/Target/NVPTX/NVPTXRegisterInfo.td
+++ b/lib/Target/NVPTX/NVPTXRegisterInfo.td
@@ -1,9 +1,8 @@
//===-- NVPTXRegisterInfo.td - NVPTX Register defs ---------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp b/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
index 82befe4b101b..e213089e4085 100644
--- a/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
+++ b/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
@@ -1,9 +1,8 @@
//===-- NVPTXReplaceImageHandles.cpp - Replace image handles for Fermi ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -17,6 +16,7 @@
#include "NVPTXMachineFunctionInfo.h"
#include "NVPTXSubtarget.h"
#include "NVPTXTargetMachine.h"
+#include "MCTargetDesc/NVPTXBaseInfo.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
diff --git a/lib/Target/NVPTX/NVPTXSubtarget.cpp b/lib/Target/NVPTX/NVPTXSubtarget.cpp
index acbee86ae386..357826c2d19c 100644
--- a/lib/Target/NVPTX/NVPTXSubtarget.cpp
+++ b/lib/Target/NVPTX/NVPTXSubtarget.cpp
@@ -1,9 +1,8 @@
//===- NVPTXSubtarget.cpp - NVPTX Subtarget Information -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/NVPTX/NVPTXSubtarget.h b/lib/Target/NVPTX/NVPTXSubtarget.h
index b02822a099d9..0e9fa1fd3e56 100644
--- a/lib/Target/NVPTX/NVPTXSubtarget.h
+++ b/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -1,9 +1,8 @@
//=====-- NVPTXSubtarget.h - Define Subtarget for the NVPTX ---*- C++ -*--====//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 8ec0ddb9b3d5..11b3fe2fa3d3 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -1,9 +1,8 @@
//===-- NVPTXTargetMachine.cpp - Define TargetMachine for NVPTX -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -17,6 +16,7 @@
#include "NVPTXLowerAggrCopies.h"
#include "NVPTXTargetObjectFile.h"
#include "NVPTXTargetTransformInfo.h"
+#include "TargetInfo/NVPTXTargetInfo.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Analysis/TargetTransformInfo.h"
@@ -167,8 +167,16 @@ public:
void addMachineSSAOptimization() override;
FunctionPass *createTargetRegisterAllocator(bool) override;
- void addFastRegAlloc(FunctionPass *RegAllocPass) override;
- void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override;
+ void addFastRegAlloc() override;
+ void addOptimizedRegAlloc() override;
+
+ bool addRegAssignmentFast() override {
+ llvm_unreachable("should not be used");
+ }
+
+ bool addRegAssignmentOptimized() override {
+ llvm_unreachable("should not be used");
+ }
private:
// If the opt level is aggressive, add GVN; otherwise, add EarlyCSE. This
@@ -323,15 +331,12 @@ FunctionPass *NVPTXPassConfig::createTargetRegisterAllocator(bool) {
return nullptr; // No reg alloc
}
-void NVPTXPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {
- assert(!RegAllocPass && "NVPTX uses no regalloc!");
+void NVPTXPassConfig::addFastRegAlloc() {
addPass(&PHIEliminationID);
addPass(&TwoAddressInstructionPassID);
}
-void NVPTXPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
- assert(!RegAllocPass && "NVPTX uses no regalloc!");
-
+void NVPTXPassConfig::addOptimizedRegAlloc() {
addPass(&ProcessImplicitDefsID);
addPass(&LiveVariablesID);
addPass(&MachineLoopInfoID);
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.h b/lib/Target/NVPTX/NVPTXTargetMachine.h
index ca540b8e0389..d84600c74e29 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.h
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.h
@@ -1,9 +1,8 @@
//===-- NVPTXTargetMachine.h - Define TargetMachine for NVPTX ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/NVPTX/NVPTXTargetObjectFile.h b/lib/Target/NVPTX/NVPTXTargetObjectFile.h
index c706b053ab8f..ab2a93b75922 100644
--- a/lib/Target/NVPTX/NVPTXTargetObjectFile.h
+++ b/lib/Target/NVPTX/NVPTXTargetObjectFile.h
@@ -1,9 +1,8 @@
//===-- NVPTXTargetObjectFile.h - NVPTX Object Info -------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
index 307654aed37f..be0416f90fca 100644
--- a/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
+++ b/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
@@ -1,9 +1,8 @@
//===-- NVPTXTargetTransformInfo.cpp - NVPTX specific TTI -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -39,7 +38,6 @@ static bool readsLaneId(const IntrinsicInst *II) {
static bool isNVVMAtomic(const IntrinsicInst *II) {
switch (II->getIntrinsicID()) {
default: return false;
- case Intrinsic::nvvm_atomic_load_add_f32:
case Intrinsic::nvvm_atomic_load_inc_32:
case Intrinsic::nvvm_atomic_load_dec_32:
diff --git a/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
index 14e93f7447dd..b179a28fa713 100644
--- a/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
+++ b/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
@@ -1,9 +1,8 @@
//===-- NVPTXTargetTransformInfo.h - NVPTX specific TTI ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -17,8 +16,8 @@
#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXTARGETTRANSFORMINFO_H
#define LLVM_LIB_TARGET_NVPTX_NVPTXTARGETTRANSFORMINFO_H
-#include "NVPTX.h"
#include "NVPTXTargetMachine.h"
+#include "MCTargetDesc/NVPTXBaseInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/BasicTTIImpl.h"
#include "llvm/CodeGen/TargetLowering.h"
diff --git a/lib/Target/NVPTX/NVPTXUtilities.cpp b/lib/Target/NVPTX/NVPTXUtilities.cpp
index e464f474b1d5..665eb1383253 100644
--- a/lib/Target/NVPTX/NVPTXUtilities.cpp
+++ b/lib/Target/NVPTX/NVPTXUtilities.cpp
@@ -1,13 +1,13 @@
//===- NVPTXUtilities.cpp - Utility Functions -----------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file contains miscellaneous utility functions
+//
//===----------------------------------------------------------------------===//
#include "NVPTXUtilities.h"
diff --git a/lib/Target/NVPTX/NVPTXUtilities.h b/lib/Target/NVPTX/NVPTXUtilities.h
index a0cc4e78ac21..bf1524194cfb 100644
--- a/lib/Target/NVPTX/NVPTXUtilities.h
+++ b/lib/Target/NVPTX/NVPTXUtilities.h
@@ -1,9 +1,8 @@
//===-- NVPTXUtilities - Utilities -----------------------------*- C++ -*-====//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/NVPTX/NVVMIntrRange.cpp b/lib/Target/NVPTX/NVVMIntrRange.cpp
index 11277f5ba596..5cf7b6691e63 100644
--- a/lib/Target/NVPTX/NVVMIntrRange.cpp
+++ b/lib/Target/NVPTX/NVVMIntrRange.cpp
@@ -1,9 +1,8 @@
//===- NVVMIntrRange.cpp - Set !range metadata for NVVM intrinsics --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/NVPTX/NVVMReflect.cpp b/lib/Target/NVPTX/NVVMReflect.cpp
index 64c262664fda..634a052e2ee7 100644
--- a/lib/Target/NVPTX/NVVMReflect.cpp
+++ b/lib/Target/NVPTX/NVVMReflect.cpp
@@ -1,9 +1,8 @@
//===- NVVMReflect.cpp - NVVM Emulate conditional compilation -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp b/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp
index 803d643844f8..2c71ec58ec42 100644
--- a/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp
+++ b/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp
@@ -1,14 +1,12 @@
//===-- NVPTXTargetInfo.cpp - NVPTX Target Implementation -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-#include "NVPTX.h"
-#include "llvm/IR/Module.h"
+#include "TargetInfo/NVPTXTargetInfo.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.h b/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.h
new file mode 100644
index 000000000000..5c5691349ae9
--- /dev/null
+++ b/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.h
@@ -0,0 +1,21 @@
+//===-- NVPTXTargetInfo.h - NVPTX Target Implementation ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_NVPTX_TARGETINFO_NVPTXTARGETINFO_H
+#define LLVM_LIB_TARGET_NVPTX_TARGETINFO_NVPTXTARGETINFO_H
+
+namespace llvm {
+
+class Target;
+
+Target &getTheNVPTXTarget32();
+Target &getTheNVPTXTarget64();
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_NVPTX_TARGETINFO_NVPTXTARGETINFO_H
diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
index 8b3480f772e9..c9524da93acd 100644
--- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
+++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -1,15 +1,15 @@
//===-- PPCAsmParser.cpp - Parse PowerPC asm to MCInst instructions -------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/PPCMCExpr.h"
#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "PPCTargetStreamer.h"
+#include "TargetInfo/PowerPCTargetInfo.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
@@ -147,8 +147,7 @@ public:
: MCTargetAsmParser(Options, STI, MII) {
// Check for 64-bit vs. 32-bit pointer mode.
const Triple &TheTriple = STI.getTargetTriple();
- IsPPC64 = (TheTriple.getArch() == Triple::ppc64 ||
- TheTriple.getArch() == Triple::ppc64le);
+ IsPPC64 = TheTriple.isPPC64();
IsDarwin = TheTriple.isMacOSX();
// Initialize the set of available features.
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
@@ -1129,7 +1128,7 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
}
}
-static std::string PPCMnemonicSpellCheck(StringRef S, uint64_t FBS,
+static std::string PPCMnemonicSpellCheck(StringRef S, const FeatureBitset &FBS,
unsigned VariantID = 0);
bool PPCAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
@@ -1148,7 +1147,7 @@ bool PPCAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
case Match_MissingFeature:
return Error(IDLoc, "instruction use requires an option to be enabled");
case Match_MnemonicFail: {
- uint64_t FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
+ FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
std::string Suggestion = PPCMnemonicSpellCheck(
((PPCOperand &)*Operands[0]).getToken(), FBS);
return Error(IDLoc, "invalid instruction" + Suggestion,
diff --git a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
index 26869f250823..7a8af57961cb 100644
--- a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
+++ b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
@@ -1,13 +1,13 @@
//===------ PPCDisassembler.cpp - Disassembler for PowerPC ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/PPCMCTargetDesc.h"
+#include "TargetInfo/PowerPCTargetInfo.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
@@ -61,6 +61,14 @@ extern "C" void LLVMInitializePowerPCDisassembler() {
createPPCLEDisassembler);
}
+static DecodeStatus DecodePCRel24BranchTarget(MCInst &Inst, unsigned Imm,
+ uint64_t Addr,
+ const void *Decoder) {
+ int32_t Offset = SignExtend32<24>(Imm);
+ Inst.addOperand(MCOperand::createImm(Offset));
+ return MCDisassembler::Success;
+}
+
// FIXME: These can be generated by TableGen from the existing register
// encoding values!
@@ -78,12 +86,6 @@ static DecodeStatus DecodeCRRCRegisterClass(MCInst &Inst, uint64_t RegNo,
return decodeRegisterClass(Inst, RegNo, CRRegs);
}
-static DecodeStatus DecodeCRRC0RegisterClass(MCInst &Inst, uint64_t RegNo,
- uint64_t Address,
- const void *Decoder) {
- return decodeRegisterClass(Inst, RegNo, CRRegs);
-}
-
static DecodeStatus DecodeCRBITRCRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
const void *Decoder) {
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
index a405dd70c307..8778e916f7e4 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@@ -1,9 +1,8 @@
//===-- PPCAsmBackend.cpp - PPC Assembler Backend -------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -29,6 +28,7 @@ static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value) {
switch (Kind) {
default:
llvm_unreachable("Unknown fixup kind!");
+ case FK_NONE:
case FK_Data_1:
case FK_Data_2:
case FK_Data_4:
@@ -52,6 +52,8 @@ static unsigned getFixupKindNumBytes(unsigned Kind) {
switch (Kind) {
default:
llvm_unreachable("Unknown fixup kind!");
+ case FK_NONE:
+ return 0;
case FK_Data_1:
return 1;
case FK_Data_2:
@@ -74,10 +76,12 @@ static unsigned getFixupKindNumBytes(unsigned Kind) {
namespace {
class PPCAsmBackend : public MCAsmBackend {
- const Target &TheTarget;
+protected:
+ Triple TT;
public:
- PPCAsmBackend(const Target &T, support::endianness Endian)
- : MCAsmBackend(Endian), TheTarget(T) {}
+ PPCAsmBackend(const Target &T, const Triple &TT)
+ : MCAsmBackend(TT.isLittleEndian() ? support::little : support::big),
+ TT(TT) {}
unsigned getNumFixupKinds() const override {
return PPC::NumTargetFixupKinds;
@@ -136,9 +140,11 @@ public:
bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
const MCValue &Target) override {
- switch ((PPC::Fixups)Fixup.getKind()) {
+ switch ((unsigned)Fixup.getKind()) {
default:
return false;
+ case FK_NONE:
+ return true;
case PPC::fixup_ppc_br24:
case PPC::fixup_ppc_br24abs:
// If the target symbol has a local entry point we must not attempt
@@ -187,59 +193,76 @@ public:
return true;
}
-
- unsigned getPointerSize() const {
- StringRef Name = TheTarget.getName();
- if (Name == "ppc64" || Name == "ppc64le") return 8;
- assert(Name == "ppc32" && "Unknown target name!");
- return 4;
- }
};
} // end anonymous namespace
// FIXME: This should be in a separate file.
namespace {
- class DarwinPPCAsmBackend : public PPCAsmBackend {
- public:
- DarwinPPCAsmBackend(const Target &T) : PPCAsmBackend(T, support::big) { }
-
- std::unique_ptr<MCObjectTargetWriter>
- createObjectTargetWriter() const override {
- bool is64 = getPointerSize() == 8;
- return createPPCMachObjectWriter(
- /*Is64Bit=*/is64,
- (is64 ? MachO::CPU_TYPE_POWERPC64 : MachO::CPU_TYPE_POWERPC),
- MachO::CPU_SUBTYPE_POWERPC_ALL);
- }
- };
-
- class ELFPPCAsmBackend : public PPCAsmBackend {
- uint8_t OSABI;
- public:
- ELFPPCAsmBackend(const Target &T, support::endianness Endian,
- uint8_t OSABI)
- : PPCAsmBackend(T, Endian), OSABI(OSABI) {}
-
- std::unique_ptr<MCObjectTargetWriter>
- createObjectTargetWriter() const override {
- bool is64 = getPointerSize() == 8;
- return createPPCELFObjectWriter(is64, OSABI);
- }
- };
+
+class DarwinPPCAsmBackend : public PPCAsmBackend {
+public:
+ DarwinPPCAsmBackend(const Target &T, const Triple &TT)
+ : PPCAsmBackend(T, TT) {}
+
+ std::unique_ptr<MCObjectTargetWriter>
+ createObjectTargetWriter() const override {
+ bool Is64 = TT.isPPC64();
+ return createPPCMachObjectWriter(
+ /*Is64Bit=*/Is64,
+ (Is64 ? MachO::CPU_TYPE_POWERPC64 : MachO::CPU_TYPE_POWERPC),
+ MachO::CPU_SUBTYPE_POWERPC_ALL);
+ }
+};
+
+class ELFPPCAsmBackend : public PPCAsmBackend {
+public:
+ ELFPPCAsmBackend(const Target &T, const Triple &TT) : PPCAsmBackend(T, TT) {}
+
+ std::unique_ptr<MCObjectTargetWriter>
+ createObjectTargetWriter() const override {
+ uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TT.getOS());
+ bool Is64 = TT.isPPC64();
+ return createPPCELFObjectWriter(Is64, OSABI);
+ }
+
+ Optional<MCFixupKind> getFixupKind(StringRef Name) const override;
+};
+
+class XCOFFPPCAsmBackend : public PPCAsmBackend {
+public:
+ XCOFFPPCAsmBackend(const Target &T, const Triple &TT)
+ : PPCAsmBackend(T, TT) {}
+
+ std::unique_ptr<MCObjectTargetWriter>
+ createObjectTargetWriter() const override {
+ return createPPCXCOFFObjectWriter(TT.isArch64Bit());
+ }
+};
} // end anonymous namespace
+Optional<MCFixupKind> ELFPPCAsmBackend::getFixupKind(StringRef Name) const {
+ if (TT.isPPC64()) {
+ if (Name == "R_PPC64_NONE")
+ return FK_NONE;
+ } else {
+ if (Name == "R_PPC_NONE")
+ return FK_NONE;
+ }
+ return MCAsmBackend::getFixupKind(Name);
+}
+
MCAsmBackend *llvm::createPPCAsmBackend(const Target &T,
const MCSubtargetInfo &STI,
const MCRegisterInfo &MRI,
const MCTargetOptions &Options) {
const Triple &TT = STI.getTargetTriple();
if (TT.isOSDarwin())
- return new DarwinPPCAsmBackend(T);
+ return new DarwinPPCAsmBackend(T, TT);
+
+ if (TT.isOSBinFormatXCOFF())
+ return new XCOFFPPCAsmBackend(T, TT);
- uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TT.getOS());
- bool IsLittleEndian = TT.getArch() == Triple::ppc64le;
- return new ELFPPCAsmBackend(
- T, IsLittleEndian ? support::little : support::big, OSABI);
+ return new ELFPPCAsmBackend(T, TT);
}
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
index a3caf9a7a5ee..042ddf48d5df 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
@@ -1,9 +1,8 @@
//===-- PPCELFObjectWriter.cpp - PPC ELF Writer ---------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -134,6 +133,9 @@ unsigned PPCELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
} else {
switch ((unsigned)Fixup.getKind()) {
default: llvm_unreachable("invalid fixup kind!");
+ case FK_NONE:
+ Type = ELF::R_PPC_NONE;
+ break;
case PPC::fixup_ppc_br24abs:
Type = ELF::R_PPC_ADDR24;
break;
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
index dce443997ea5..845489788c86 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
@@ -1,9 +1,8 @@
//===-- PPCFixupKinds.h - PPC Specific Fixup Entries ------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp
index fc29e4effbb1..0e64ae55ab1c 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp
@@ -1,9 +1,8 @@
//===-- PPCInstPrinter.cpp - Convert PPC MCInst to assembly syntax --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -11,7 +10,7 @@
//
//===----------------------------------------------------------------------===//
-#include "PPCInstPrinter.h"
+#include "MCTargetDesc/PPCInstPrinter.h"
#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "MCTargetDesc/PPCPredicates.h"
#include "PPCInstrInfo.h"
@@ -382,8 +381,11 @@ void PPCInstPrinter::printBranchOperand(const MCInst *MI, unsigned OpNo,
// Branches can take an immediate operand. This is used by the branch
// selection pass to print .+8, an eight byte displacement from the PC.
- O << ".+";
- printAbsBranchOperand(MI, OpNo, O);
+ O << ".";
+ int32_t Imm = SignExtend32<32>((unsigned)MI->getOperand(OpNo).getImm() << 2);
+ if (Imm >= 0)
+ O << "+";
+ O << Imm;
}
void PPCInstPrinter::printAbsBranchOperand(const MCInst *MI, unsigned OpNo,
@@ -442,13 +444,22 @@ void PPCInstPrinter::printTLSCall(const MCInst *MI, unsigned OpNo,
// On PPC64, VariantKind is VK_None, but on PPC32, it's VK_PLT, and it must
// come at the _end_ of the expression.
const MCOperand &Op = MI->getOperand(OpNo);
- const MCSymbolRefExpr &refExp = cast<MCSymbolRefExpr>(*Op.getExpr());
- O << refExp.getSymbol().getName();
+ const MCSymbolRefExpr *RefExp = nullptr;
+ const MCConstantExpr *ConstExp = nullptr;
+ if (const MCBinaryExpr *BinExpr = dyn_cast<MCBinaryExpr>(Op.getExpr())) {
+ RefExp = cast<MCSymbolRefExpr>(BinExpr->getLHS());
+ ConstExp = cast<MCConstantExpr>(BinExpr->getRHS());
+ } else
+ RefExp = cast<MCSymbolRefExpr>(Op.getExpr());
+
+ O << RefExp->getSymbol().getName();
O << '(';
printOperand(MI, OpNo+1, O);
O << ')';
- if (refExp.getKind() != MCSymbolRefExpr::VK_None)
- O << '@' << MCSymbolRefExpr::getVariantKindName(refExp.getKind());
+ if (RefExp->getKind() != MCSymbolRefExpr::VK_None)
+ O << '@' << MCSymbolRefExpr::getVariantKindName(RefExp->getKind());
+ if (ConstExp != nullptr)
+ O << '+' << ConstExp->getValue();
}
/// showRegistersWithPercentPrefix - Check if this register name should be
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h
index 351ccefa2da2..725ae2a7081b 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h
@@ -1,9 +1,8 @@
//===- PPCInstPrinter.h - Convert PPC MCInst to assembly syntax -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -11,8 +10,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIB_TARGET_POWERPC_INSTPRINTER_PPCINSTPRINTER_H
-#define LLVM_LIB_TARGET_POWERPC_INSTPRINTER_PPCINSTPRINTER_H
+#ifndef LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCINSTPRINTER_H
+#define LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCINSTPRINTER_H
#include "llvm/ADT/Triple.h"
#include "llvm/MC/MCInstPrinter.h"
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
index fb7bf23509c7..5f0005ea1d7b 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
@@ -1,9 +1,8 @@
//===-- PPCMCAsmInfo.cpp - PPC asm properties -----------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -82,3 +81,9 @@ PPCELFMCAsmInfo::PPCELFMCAsmInfo(bool is64Bit, const Triple& T) {
UseIntegratedAssembler = true;
}
+void PPCXCOFFMCAsmInfo::anchor() {}
+
+PPCXCOFFMCAsmInfo::PPCXCOFFMCAsmInfo(bool Is64Bit, const Triple &T) {
+ assert(!IsLittleEndian && "Little-endian XCOFF not supported.");
+ CodePointerSize = CalleeSaveStackSlotSize = Is64Bit ? 8 : 4;
+}
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
index e252ac944d40..42cb62ad26a4 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
@@ -1,13 +1,12 @@
//===-- PPCMCAsmInfo.h - PPC asm properties --------------------*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
-// This file contains the declaration of the MCAsmInfoDarwin class.
+// This file contains the declarations of the PowerPC MCAsmInfo classes.
//
//===----------------------------------------------------------------------===//
@@ -16,6 +15,7 @@
#include "llvm/MC/MCAsmInfoDarwin.h"
#include "llvm/MC/MCAsmInfoELF.h"
+#include "llvm/MC/MCAsmInfoXCOFF.h"
namespace llvm {
class Triple;
@@ -34,6 +34,13 @@ public:
explicit PPCELFMCAsmInfo(bool is64Bit, const Triple &);
};
+class PPCXCOFFMCAsmInfo : public MCAsmInfoXCOFF {
+ virtual void anchor();
+
+public:
+ explicit PPCXCOFFMCAsmInfo(bool is64Bit, const Triple &);
+};
+
} // namespace llvm
#endif
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index 8c15ade6f9c4..676efc500455 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -1,9 +1,8 @@
//===-- PPCMCCodeEmitter.cpp - Convert PPC code to machine code -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -217,7 +216,7 @@ unsigned PPCMCCodeEmitter::getTLSRegEncoding(const MCInst &MI, unsigned OpNo,
Fixups.push_back(MCFixup::create(0, MO.getExpr(),
(MCFixupKind)PPC::fixup_ppc_nofixup));
const Triple &TT = STI.getTargetTriple();
- bool isPPC64 = TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le;
+ bool isPPC64 = TT.isPPC64();
return CTX.getRegisterInfo()->getEncodingValue(isPPC64 ? PPC::X13 : PPC::R2);
}
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h
index a4bcff4b9450..1324faa12553 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h
@@ -1,9 +1,8 @@
//===-- PPCMCCodeEmitter.h - Convert PPC code to machine code -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -99,9 +98,10 @@ public:
unsigned getInstSizeInBytes(const MCInst &MI) const;
private:
- uint64_t computeAvailableFeatures(const FeatureBitset &FB) const;
- void verifyInstructionPredicates(const MCInst &MI,
- uint64_t AvailableFeatures) const;
+ FeatureBitset computeAvailableFeatures(const FeatureBitset &FB) const;
+ void
+ verifyInstructionPredicates(const MCInst &MI,
+ const FeatureBitset &AvailableFeatures) const;
};
} // namespace llvm
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
index 32e6a0bdd65f..d467f5c4a439 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
@@ -1,9 +1,8 @@
//===-- PPCMCExpr.cpp - PPC specific MC expression classes ----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
index 8bb4791d13dd..449e2c34f74d 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
@@ -1,9 +1,8 @@
//===-- PPCMCExpr.h - PPC specific MC expression classes --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index a1e4e07b25af..90c3c8d20edb 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -1,9 +1,8 @@
//===-- PPCMCTargetDesc.cpp - PowerPC Target Descriptions -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -12,9 +11,11 @@
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/PPCMCTargetDesc.h"
-#include "InstPrinter/PPCInstPrinter.h"
+#include "MCTargetDesc/PPCInstPrinter.h"
#include "MCTargetDesc/PPCMCAsmInfo.h"
#include "PPCTargetStreamer.h"
+#include "TargetInfo/PowerPCTargetInfo.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/BinaryFormat/ELF.h"
@@ -47,9 +48,9 @@ using namespace llvm;
#define GET_REGINFO_MC_DESC
#include "PPCGenRegisterInfo.inc"
-// Pin the vtable to this file.
PPCTargetStreamer::PPCTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {}
+// Pin the vtable to this file.
PPCTargetStreamer::~PPCTargetStreamer() = default;
static MCInstrInfo *createPPCMCInstrInfo() {
@@ -82,6 +83,8 @@ static MCAsmInfo *createPPCMCAsmInfo(const MCRegisterInfo &MRI,
MCAsmInfo *MAI;
if (TheTriple.isOSDarwin())
MAI = new PPCMCAsmInfoDarwin(isPPC64, TheTriple);
+ else if (TheTriple.isOSBinFormatXCOFF())
+ MAI = new PPCXCOFFMCAsmInfo(isPPC64, TheTriple);
else
MAI = new PPCELFMCAsmInfo(isPPC64, TheTriple);
@@ -182,16 +185,33 @@ public:
void emitAssignment(MCSymbol *S, const MCExpr *Value) override {
auto *Symbol = cast<MCSymbolELF>(S);
+
// When encoding an assignment to set symbol A to symbol B, also copy
// the st_other bits encoding the local entry point offset.
- if (Value->getKind() != MCExpr::SymbolRef)
- return;
- const auto &RhsSym = cast<MCSymbolELF>(
- static_cast<const MCSymbolRefExpr *>(Value)->getSymbol());
- unsigned Other = Symbol->getOther();
+ if (copyLocalEntry(Symbol, Value))
+ UpdateOther.insert(Symbol);
+ else
+ UpdateOther.erase(Symbol);
+ }
+
+ void finish() override {
+ for (auto *Sym : UpdateOther)
+ copyLocalEntry(Sym, Sym->getVariableValue());
+ }
+
+private:
+ SmallPtrSet<MCSymbolELF *, 32> UpdateOther;
+
+ bool copyLocalEntry(MCSymbolELF *D, const MCExpr *S) {
+ auto *Ref = dyn_cast<const MCSymbolRefExpr>(S);
+ if (!Ref)
+ return false;
+ const auto &RhsSym = cast<MCSymbolELF>(Ref->getSymbol());
+ unsigned Other = D->getOther();
Other &= ~ELF::STO_PPC64_LOCAL_MASK;
Other |= RhsSym.getOther() & ELF::STO_PPC64_LOCAL_MASK;
- Symbol->setOther(Other);
+ D->setOther(Other);
+ return true;
}
};
@@ -217,6 +237,27 @@ public:
}
};
+class PPCTargetXCOFFStreamer : public PPCTargetStreamer {
+public:
+ PPCTargetXCOFFStreamer(MCStreamer &S) : PPCTargetStreamer(S) {}
+
+ void emitTCEntry(const MCSymbol &S) override {
+ report_fatal_error("TOC entries not supported yet.");
+ }
+
+ void emitMachine(StringRef CPU) override {
+ llvm_unreachable("Machine pseudo-ops are invalid for XCOFF.");
+ }
+
+ void emitAbiVersion(int AbiVersion) override {
+ llvm_unreachable("ABI-version pseudo-ops are invalid for XCOFF.");
+ }
+
+ void emitLocalEntry(MCSymbolELF *S, const MCExpr *LocalOffset) override {
+ llvm_unreachable("Local-entry pseudo-ops are invalid for XCOFF.");
+ }
+};
+
} // end anonymous namespace
static MCTargetStreamer *createAsmTargetStreamer(MCStreamer &S,
@@ -231,6 +272,8 @@ createObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) {
const Triple &TT = STI.getTargetTriple();
if (TT.isOSBinFormatELF())
return new PPCTargetELFStreamer(S);
+ if (TT.isOSBinFormatXCOFF())
+ return new PPCTargetXCOFFStreamer(S);
return new PPCTargetMachOStreamer(S);
}
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
index d6e450cba0d7..74b67bd2e928 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
@@ -1,9 +1,8 @@
//===-- PPCMCTargetDesc.h - PowerPC Target Descriptions ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -37,10 +36,6 @@ class Triple;
class StringRef;
class raw_pwrite_stream;
-Target &getThePPC32Target();
-Target &getThePPC64Target();
-Target &getThePPC64LETarget();
-
MCCodeEmitter *createPPCMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
MCContext &Ctx);
@@ -56,6 +51,9 @@ std::unique_ptr<MCObjectTargetWriter> createPPCELFObjectWriter(bool Is64Bit,
std::unique_ptr<MCObjectTargetWriter>
createPPCMachObjectWriter(bool Is64Bit, uint32_t CPUType, uint32_t CPUSubtype);
+/// Construct a PPC XCOFF object writer.
+std::unique_ptr<MCObjectTargetWriter> createPPCXCOFFObjectWriter(bool Is64Bit);
+
/// Returns true iff Val consists of one contiguous run of 1s with any number of
/// 0s on either side. The 1s are allowed to wrap from LSB to MSB, so
/// 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs. 0x0F0F0000 is not,
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
index ff6cf584da23..4cf7fd15fa75 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
@@ -1,9 +1,8 @@
//===-- PPCMachObjectWriter.cpp - PPC Mach-O Writer -----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp
index c2987b641c04..284e52c298a2 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp
@@ -1,9 +1,8 @@
//===-- PPCPredicates.cpp - PPC Branch Predicate Information --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
index 481ba3f09cc7..d686a8ea2a22 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
@@ -1,9 +1,8 @@
//===-- PPCPredicates.h - PPC Branch Predicate Information ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp
new file mode 100644
index 000000000000..9c661286d455
--- /dev/null
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp
@@ -0,0 +1,29 @@
+//===-- PPCXCOFFObjectWriter.cpp - PowerPC XCOFF Writer -------------------===//
+//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCMCTargetDesc.h"
+#include "llvm/MC/MCXCOFFObjectWriter.h"
+
+using namespace llvm;
+
+namespace {
+class PPCXCOFFObjectWriter : public MCXCOFFObjectTargetWriter {
+
+public:
+ PPCXCOFFObjectWriter(bool Is64Bit);
+};
+} // end anonymous namespace
+
+PPCXCOFFObjectWriter::PPCXCOFFObjectWriter(bool Is64Bit)
+ : MCXCOFFObjectTargetWriter(Is64Bit) {}
+
+std::unique_ptr<MCObjectTargetWriter>
+llvm::createPPCXCOFFObjectWriter(bool Is64Bit) {
+ return llvm::make_unique<PPCXCOFFObjectWriter>(Is64Bit);
+}
diff --git a/lib/Target/PowerPC/P9InstrResources.td b/lib/Target/PowerPC/P9InstrResources.td
index 17c37964c562..2a10322d3f49 100644
--- a/lib/Target/PowerPC/P9InstrResources.td
+++ b/lib/Target/PowerPC/P9InstrResources.td
@@ -1,22 +1,21 @@
-//===- P9InstrResources.td - P9 Instruction Resource Defs -*- tablegen -*-===//
+//===- P9InstrResources.td - P9 Instruction Resource Defs -*- tablegen -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
-// This file defines the resources required by P9 instructions. This is part
-// P9 processor model used for instruction scheduling. This file should contain
-// all of the instructions that may be used on Power 9. This is not just
-// instructions that are new on Power 9 but also instructions that were
+// This file defines the resources required by P9 instructions. This is part of
+// the P9 processor model used for instruction scheduling. This file should
+// contain all the instructions that may be used on Power 9. This is not
+// just instructions that are new on Power 9 but also instructions that were
// available on earlier architectures and are still used in Power 9.
//
// The makeup of the P9 CPU is modeled as follows:
// - Each CPU is made up of two superslices.
// - Each superslice is made up of two slices. Therefore, there are 4 slices
-// for each CPU.
+// for each CPU.
// - Up to 6 instructions can be dispatched to each CPU. Three per superslice.
// - Each CPU has:
// - One CY (Crypto) unit P9_CY_*
@@ -33,9 +32,8 @@
// Two cycle ALU vector operation that uses an entire superslice.
// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines
-// (EXECE, EXECO) and all three dispatches (DISP) to the given superslice.
-def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C,
- DISP_1C, DISP_1C, DISP_1C],
+// (EXECE, EXECO) and 1 dispatch (DISP) to the given superslice.
+def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
(instrs
(instregex "VADDU(B|H|W|D)M$"),
(instregex "VAND(C)?$"),
@@ -85,9 +83,9 @@ def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C,
)>;
// Restricted Dispatch ALU operation for 3 cycles. The operation runs on a
-// slingle slice. However, since it is Restricted it requires all 3 dispatches
+// single slice. However, since it is Restricted, it requires all 3 dispatches
// (DISP) for that superslice.
-def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_3SLOTS_1C],
(instrs
(instregex "TABORT(D|W)C(I)?$"),
(instregex "MTFSB(0|1)$"),
@@ -103,7 +101,7 @@ def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
)>;
// Standard Dispatch ALU operation for 3 cycles. Only one slice used.
-def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C],
(instrs
(instregex "XSMAX(C|J)?DP$"),
(instregex "XSMIN(C|J)?DP$"),
@@ -120,11 +118,11 @@ def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C, DISP_1C],
)>;
// Standard Dispatch ALU operation for 2 cycles. Only one slice used.
-def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C],
(instrs
(instregex "S(L|R)D$"),
(instregex "SRAD(I)?$"),
- (instregex "EXTSWSLI$"),
+ (instregex "EXTSWSLI_32_64$"),
(instregex "MFV(S)?RD$"),
(instregex "MTVSRD$"),
(instregex "MTVSRW(A|Z)$"),
@@ -160,6 +158,7 @@ def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C],
XSNEGDP,
XSCPSGNDP,
MFVSRWZ,
+ EXTSWSLI,
SRADI_32,
RLDIC,
RFEBB,
@@ -171,9 +170,9 @@ def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C],
)>;
// Restricted Dispatch ALU operation for 2 cycles. The operation runs on a
-// slingle slice. However, since it is Restricted it requires all 3 dispatches
-// (DISP) for that superslice.
-def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
+// single slice. However, since it is Restricted, it requires all 3 dispatches
+// (DISP) for that superslice.
+def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_3SLOTS_1C],
(instrs
(instregex "RLDC(L|R)$"),
(instregex "RLWIMI(8)?$"),
@@ -200,9 +199,8 @@ def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
// Three cycle ALU vector operation that uses an entire superslice.
// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines
-// (EXECE, EXECO) and all three dispatches (DISP) to the given superslice.
-def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C,
- DISP_1C, DISP_1C, DISP_1C],
+// (EXECE, EXECO) and 1 dispatch (DISP) to the given superslice.
+def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
(instrs
(instregex "M(T|F)VSCR$"),
(instregex "VCMPNEZ(B|H|W)$"),
@@ -285,10 +283,9 @@ def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C,
)>;
// 7 cycle DP vector operation that uses an entire superslice.
-// Uses both DP units (the even DPE and odd DPO units), two pipelines
-// (EXECE, EXECO) and all three dispatches (DISP) to the given superslice.
-def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C,
- DISP_1C, DISP_1C, DISP_1C],
+// Uses both DP units (the even DPE and odd DPO units), two pipelines (EXECE,
+// EXECO) and all three dispatches (DISP) to the given superslice.
+def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
(instrs
VADDFP,
VCTSXS,
@@ -395,18 +392,17 @@ def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C,
VSUMSWS
)>;
-
// 5 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three
-// dispatch units for the superslice.
-def : InstRW<[P9_DP_5C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
+// dispatch units for the superslice.
+def : InstRW<[P9_DP_5C, IP_EXEC_1C, DISP_3SLOTS_1C],
(instrs
- (instregex "MADD(HD|HDU|LD)$"),
+ (instregex "MADD(HD|HDU|LD|LD8)$"),
(instregex "MUL(HD|HW|LD|LI|LI8|LW)(U)?$")
)>;
// 7 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three
-// dispatch units for the superslice.
-def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
+// dispatch units for the superslice.
+def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_3SLOTS_1C],
(instrs
FRSP,
(instregex "FRI(N|P|Z|M)(D|S)$"),
@@ -448,26 +444,26 @@ def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
)>;
// 7 cycle Restricted DP operation and one 3 cycle ALU operation.
-// These operations can be done in parallel.
-// The DP is restricted so we need a full 5 dispatches.
+// These operations can be done in parallel. The DP is restricted so we need a
+// full 4 dispatches.
def : InstRW<[P9_DP_7C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ DISP_3SLOTS_1C, DISP_1C],
(instrs
(instregex "FSEL(D|S)o$")
)>;
// 5 Cycle Restricted DP operation and one 2 cycle ALU operation.
def : InstRW<[P9_DPOpAndALUOp_7C, IP_EXEC_1C, IP_EXEC_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ DISP_3SLOTS_1C, DISP_1C],
(instrs
(instregex "MUL(H|L)(D|W)(U)?o$")
)>;
// 7 cycle Restricted DP operation and one 3 cycle ALU operation.
-// These operations must be done sequentially.
-// The DP is restricted so we need a full 5 dispatches.
+// These operations must be done sequentially.The DP is restricted so we need a
+// full 4 dispatches.
def : InstRW<[P9_DPOpAndALU2Op_10C, IP_EXEC_1C, IP_EXEC_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ DISP_3SLOTS_1C, DISP_1C],
(instrs
(instregex "FRI(N|P|Z|M)(D|S)o$"),
(instregex "FRE(S)?o$"),
@@ -483,8 +479,8 @@ def : InstRW<[P9_DPOpAndALU2Op_10C, IP_EXEC_1C, IP_EXEC_1C,
FRSPo
)>;
-// 7 cycle DP operation. One DP unit, one EXEC pipeline and two dispatch units.
-def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C],
+// 7 cycle DP operation. One DP unit, one EXEC pipeline and 1 dispatch units.
+def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C],
(instrs
XSADDDP,
XSADDSP,
@@ -520,9 +516,9 @@ def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C],
)>;
// Three Cycle PM operation. Only one PM unit per superslice so we use the whole
-// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
-// dispatches.
-def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C, DISP_1C],
+// superslice. That includes both exec pipelines (EXECO, EXECE) and one
+// dispatch.
+def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C],
(instrs
(instregex "LVS(L|R)$"),
(instregex "VSPLTIS(W|H|B)$"),
@@ -628,9 +624,9 @@ def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C, DISP_1C],
)>;
// 12 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
-// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
-// dispatches.
-def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
+// superslice. That includes both exec pipelines (EXECO, EXECE) and one
+// dispatch.
+def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
(instrs
BCDSRo,
XSADDQP,
@@ -652,17 +648,17 @@ def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
)>;
// 23 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
-// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
-// dispatches.
-def : InstRW<[P9_DFU_23C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
+// superslice. That includes both exec pipelines (EXECO, EXECE) and one
+// dispatch.
+def : InstRW<[P9_DFU_23C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
(instrs
BCDCTSQo
)>;
// 24 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
-// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
-// dispatches.
-def : InstRW<[P9_DFU_24C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
+// superslice. That includes both exec pipelines (EXECO, EXECE) and one
+// dispatch.
+def : InstRW<[P9_DFU_24C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
(instrs
XSMADDQP,
XSMADDQPO,
@@ -677,39 +673,39 @@ def : InstRW<[P9_DFU_24C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
)>;
// 37 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
-// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
-// dispatches.
-def : InstRW<[P9_DFU_37C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
+// superslice. That includes both exec pipelines (EXECO, EXECE) and one
+// dispatch.
+def : InstRW<[P9_DFU_37C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
(instrs
BCDCFSQo
)>;
// 58 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
-// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
-// dispatches.
-def : InstRW<[P9_DFU_58C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
+// superslice. That includes both exec pipelines (EXECO, EXECE) and one
+// dispatch.
+def : InstRW<[P9_DFU_58C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
(instrs
XSDIVQP,
XSDIVQPO
)>;
// 76 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
-// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
-// dispatches.
-def : InstRW<[P9_DFU_76C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
+// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
+// dispatches.
+def : InstRW<[P9_DFU_76C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
(instrs
XSSQRTQP,
XSSQRTQPO
)>;
// 6 Cycle Load uses a single slice.
-def : InstRW<[P9_LS_6C, IP_AGEN_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_LS_6C, IP_AGEN_1C, DISP_1C],
(instrs
(instregex "LXVL(L)?")
)>;
// 5 Cycle Load uses a single slice.
-def : InstRW<[P9_LS_5C, IP_AGEN_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_LS_5C, IP_AGEN_1C, DISP_1C],
(instrs
(instregex "LVE(B|H|W)X$"),
(instregex "LVX(L)?"),
@@ -728,7 +724,7 @@ def : InstRW<[P9_LS_5C, IP_AGEN_1C, DISP_1C, DISP_1C],
)>;
// 4 Cycle Load uses a single slice.
-def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C],
(instrs
(instregex "DCB(F|T|ST)(EP)?$"),
(instregex "DCBZ(L)?(EP)?$"),
@@ -757,8 +753,8 @@ def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C, DISP_1C],
)>;
// 4 Cycle Restricted load uses a single slice but the dispatch for the whole
-// superslice.
-def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C],
+// superslice.
+def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_3SLOTS_1C],
(instrs
LFIWZX,
LFDX,
@@ -768,7 +764,7 @@ def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C],
// Cracked Load Instructions.
// Load instructions that can be done in parallel.
def : InstRW<[P9_LS_4C, P9_LS_4C, IP_AGEN_1C, IP_AGEN_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ DISP_PAIR_1C],
(instrs
SLBIA,
SLBIE,
@@ -782,17 +778,26 @@ def : InstRW<[P9_LS_4C, P9_LS_4C, IP_AGEN_1C, IP_AGEN_1C,
// Requires Load and ALU pieces totaling 6 cycles. The Load and ALU
// operations can be run in parallel.
def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ DISP_PAIR_1C, DISP_PAIR_1C],
+ (instrs
+ (instregex "L(W|H)ZU(X)?(8)?$")
+)>;
+
+// Cracked TEND Instruction.
+// Requires Load and ALU pieces totaling 6 cycles. The Load and ALU
+// operations can be run in parallel.
+def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C,
+ DISP_1C, DISP_1C],
(instrs
- (instregex "L(W|H)ZU(X)?(8)?$"),
TEND
)>;
+
// Cracked Store Instruction
// Consecutive Store and ALU instructions. The store is restricted and requires
// three dispatches.
def : InstRW<[P9_StoreAndALUOp_3C, IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ DISP_3SLOTS_1C, DISP_1C],
(instrs
(instregex "ST(B|H|W|D)CX$")
)>;
@@ -800,16 +805,16 @@ def : InstRW<[P9_StoreAndALUOp_3C, IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C,
// Cracked Load Instruction.
// Two consecutive load operations for a total of 8 cycles.
def : InstRW<[P9_LoadAndLoadOp_8C, IP_AGEN_1C, IP_AGEN_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ DISP_1C, DISP_1C],
(instrs
LDMX
)>;
// Cracked Load instruction.
// Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU
-// operations cannot be done at the same time and so their latencies are added.
+// operations cannot be done at the same time and so their latencies are added.
def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ DISP_1C, DISP_1C],
(instrs
(instregex "LHA(X)?(8)?$"),
(instregex "CP_PASTE(8)?o$"),
@@ -819,20 +824,19 @@ def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C,
// Cracked Restricted Load instruction.
// Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU
-// operations cannot be done at the same time and so their latencies are added.
+// operations cannot be done at the same time and so their latencies are added.
// Full 6 dispatches are required as this is both cracked and restricted.
def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ DISP_3SLOTS_1C, DISP_3SLOTS_1C],
(instrs
LFIWAX
)>;
// Cracked Load instruction.
// Requires consecutive Load and ALU pieces totaling 7 cycles. The Load and ALU
-// operations cannot be done at the same time and so their latencies are added.
+// operations cannot be done at the same time and so their latencies are added.
// Full 4 dispatches are required as this is a cracked instruction.
-def : InstRW<[P9_LoadAndALUOp_7C, IP_AGEN_1C, IP_EXEC_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_LoadAndALUOp_7C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C],
(instrs
LXSIWAX,
LIWAX
@@ -844,7 +848,7 @@ def : InstRW<[P9_LoadAndALUOp_7C, IP_AGEN_1C, IP_EXEC_1C,
// their latencies are added.
// Full 6 dispatches are required as this is a restricted instruction.
def : InstRW<[P9_LoadAndALU2Op_7C, IP_AGEN_1C, IP_EXEC_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ DISP_3SLOTS_1C, DISP_3SLOTS_1C],
(instrs
LFSX,
LFS
@@ -852,10 +856,9 @@ def : InstRW<[P9_LoadAndALU2Op_7C, IP_AGEN_1C, IP_EXEC_1C,
// Cracked Load instruction.
// Requires consecutive Load and ALU pieces totaling 8 cycles. The Load and ALU
-// operations cannot be done at the same time and so their latencies are added.
+// operations cannot be done at the same time and so their latencies are added.
// Full 4 dispatches are required as this is a cracked instruction.
-def : InstRW<[P9_LoadAndALU2Op_8C, IP_AGEN_1C, IP_EXEC_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_LoadAndALU2Op_8C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C],
(instrs
LXSSP,
LXSSPX,
@@ -866,7 +869,7 @@ def : InstRW<[P9_LoadAndALU2Op_8C, IP_AGEN_1C, IP_EXEC_1C,
// Cracked 3-Way Load Instruction
// Load with two ALU operations that depend on each other
def : InstRW<[P9_LoadAndALUOp_6C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ DISP_PAIR_1C, DISP_PAIR_1C, DISP_1C],
(instrs
(instregex "LHAU(X)?(8)?$"),
LWAUX
@@ -874,12 +877,11 @@ def : InstRW<[P9_LoadAndALUOp_6C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
// Cracked Load that requires the PM resource.
// Since the Load and the PM cannot be done at the same time the latencies are
-// added. Requires 8 cycles.
-// Since the PM requires the full superslice we need both EXECE, EXECO pipelines
-// as well as 3 dispatches for the PM. The Load requires the remaining 2
-// dispatches.
+// added. Requires 8 cycles. Since the PM requires the full superslice we need
+// both EXECE, EXECO pipelines as well as 1 dispatch for the PM. The Load
+// requires the remaining 1 dispatch.
def : InstRW<[P9_LoadAndPMOp_8C, IP_AGEN_1C, IP_EXECE_1C, IP_EXECO_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ DISP_1C, DISP_1C],
(instrs
LXVH8X,
LXVDSX,
@@ -887,8 +889,8 @@ def : InstRW<[P9_LoadAndPMOp_8C, IP_AGEN_1C, IP_EXECE_1C, IP_EXECO_1C,
)>;
// Single slice Restricted store operation. The restricted operation requires
-// all three dispatches for the superslice.
-def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C],
+// all three dispatches for the superslice.
+def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_3SLOTS_1C],
(instrs
(instregex "STF(S|D|IWX|SX|DX)$"),
(instregex "STXS(D|DX|SPX|IWX|IBX|IHX|SP)(v)?$"),
@@ -905,10 +907,9 @@ def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C],
)>;
// Vector Store Instruction
-// Requires the whole superslice and therefore requires all three dispatches
+// Requires the whole superslice and therefore requires one dispatch
// as well as both the Even and Odd exec pipelines.
-def : InstRW<[P9_LS_1C, IP_EXECE_1C, IP_EXECO_1C, IP_AGEN_1C,
- DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_LS_1C, IP_EXECE_1C, IP_EXECO_1C, IP_AGEN_1C, DISP_1C],
(instrs
(instregex "STVE(B|H|W)X$"),
(instregex "STVX(L)?$"),
@@ -916,18 +917,18 @@ def : InstRW<[P9_LS_1C, IP_EXECE_1C, IP_EXECO_1C, IP_AGEN_1C,
)>;
// 5 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
-// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
+// superslice. That includes both exec pipelines (EXECO, EXECE) and two
// dispatches.
-def : InstRW<[P9_DIV_5C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_DIV_5C, IP_EXECE_1C, IP_EXECO_1C, DISP_EVEN_1C],
(instrs
(instregex "MTCTR(8)?(loop)?$"),
(instregex "MTLR(8)?$")
)>;
// 12 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
-// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
+// superslice. That includes both exec pipelines (EXECO, EXECE) and two
// dispatches.
-def : InstRW<[P9_DIV_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_DIV_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_EVEN_1C],
(instrs
(instregex "M(T|F)VRSAVE(v)?$"),
(instregex "M(T|F)PMR$"),
@@ -938,10 +939,9 @@ def : InstRW<[P9_DIV_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
)>;
// 16 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
-// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
-// dispatches.
-def : InstRW<[P9_DIV_16C_8, IP_EXECO_1C, IP_EXECE_1C,
- DISP_1C, DISP_1C, DISP_1C],
+// superslice. That includes both exec pipelines (EXECO, EXECE) and two
+// dispatches.
+def : InstRW<[P9_DIV_16C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C],
(instrs
DIVW,
DIVWU,
@@ -949,10 +949,9 @@ def : InstRW<[P9_DIV_16C_8, IP_EXECO_1C, IP_EXECE_1C,
)>;
// 24 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
-// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
-// dispatches.
-def : InstRW<[P9_DIV_24C_8, IP_EXECO_1C, IP_EXECE_1C,
- DISP_1C, DISP_1C, DISP_1C],
+// superslice. That includes both exec pipelines (EXECO, EXECE) and two
+// dispatches.
+def : InstRW<[P9_DIV_24C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C],
(instrs
DIVWE,
DIVD,
@@ -964,29 +963,28 @@ def : InstRW<[P9_DIV_24C_8, IP_EXECO_1C, IP_EXECE_1C,
)>;
// 40 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
-// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
-// dispatches.
-def : InstRW<[P9_DIV_40C_8, IP_EXECO_1C, IP_EXECE_1C,
- DISP_1C, DISP_1C, DISP_1C],
+// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
+// dispatches.
+def : InstRW<[P9_DIV_40C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C],
(instrs
DIVDE,
DIVDEU
)>;
// Cracked DIV and ALU operation. Requires one full slice for the ALU operation
-// and one full superslice for the DIV operation since there is only one DIV
-// per superslice. Latency of DIV plus ALU is 26.
+// and one full superslice for the DIV operation since there is only one DIV per
+// superslice. Latency of DIV plus ALU is 26.
def : InstRW<[P9_IntDivAndALUOp_18C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ DISP_EVEN_1C, DISP_1C],
(instrs
(instregex "DIVW(U)?(O)?o$")
)>;
// Cracked DIV and ALU operation. Requires one full slice for the ALU operation
-// and one full superslice for the DIV operation since there is only one DIV
-// per superslice. Latency of DIV plus ALU is 26.
+// and one full superslice for the DIV operation since there is only one DIV per
+// superslice. Latency of DIV plus ALU is 26.
def : InstRW<[P9_IntDivAndALUOp_26C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ DISP_EVEN_1C, DISP_1C],
(instrs
DIVDo,
DIVDUo,
@@ -995,10 +993,10 @@ def : InstRW<[P9_IntDivAndALUOp_26C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
)>;
// Cracked DIV and ALU operation. Requires one full slice for the ALU operation
-// and one full superslice for the DIV operation since there is only one DIV
-// per superslice. Latency of DIV plus ALU is 42.
+// and one full superslice for the DIV operation since there is only one DIV per
+// superslice. Latency of DIV plus ALU is 42.
def : InstRW<[P9_IntDivAndALUOp_42C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ DISP_EVEN_1C, DISP_1C],
(instrs
DIVDEo,
DIVDEUo
@@ -1008,11 +1006,11 @@ def : InstRW<[P9_IntDivAndALUOp_42C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
// Cracked, restricted, ALU operations.
// Here the two ALU ops can actually be done in parallel and therefore the
-// latencies are not added together. Otherwise this is like having two
-// instructions running together on two pipelines and 6 dispatches.
-// ALU ops are 2 cycles each.
+// latencies are not added together. Otherwise this is like having two
+// instructions running together on two pipelines and 6 dispatches. ALU ops are
+// 2 cycles each.
def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ DISP_3SLOTS_1C, DISP_3SLOTS_1C],
(instrs
MTCRF,
MTCRF8
@@ -1020,11 +1018,11 @@ def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
// Cracked ALU operations.
// Here the two ALU ops can actually be done in parallel and therefore the
-// latencies are not added together. Otherwise this is like having two
-// instructions running together on two pipelines and 4 dispatches.
-// ALU ops are 2 cycles each.
+// latencies are not added together. Otherwise this is like having two
+// instructions running together on two pipelines and 2 dispatches. ALU ops are
+// 2 cycles each.
def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ DISP_1C, DISP_1C],
(instrs
(instregex "ADDC(8)?o$"),
(instregex "SUBFC(8)?o$")
@@ -1036,7 +1034,7 @@ def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
// One of the ALU ops is restricted the other is not so we have a total of
// 5 dispatches.
def : InstRW<[P9_ALU_2C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ DISP_3SLOTS_1C, DISP_1C],
(instrs
(instregex "F(N)?ABS(D|S)o$"),
(instregex "FCPSGN(D|S)o$"),
@@ -1046,22 +1044,22 @@ def : InstRW<[P9_ALU_2C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
// Cracked ALU operations.
// Here the two ALU ops can actually be done in parallel and therefore the
-// latencies are not added together. Otherwise this is like having two
-// instructions running together on two pipelines and 4 dispatches.
+// latencies are not added together. Otherwise this is like having two
+// instructions running together on two pipelines and 2 dispatches.
// ALU ops are 3 cycles each.
def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ DISP_1C, DISP_1C],
(instrs
MCRFS
)>;
// Cracked Restricted ALU operations.
// Here the two ALU ops can actually be done in parallel and therefore the
-// latencies are not added together. Otherwise this is like having two
-// instructions running together on two pipelines and 6 dispatches.
+// latencies are not added together. Otherwise this is like having two
+// instructions running together on two pipelines and 6 dispatches.
// ALU ops are 3 cycles each.
def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ DISP_3SLOTS_1C, DISP_3SLOTS_1C],
(instrs
(instregex "MTFSF(b|o)?$"),
(instregex "MTFSFI(o)?$")
@@ -1071,7 +1069,7 @@ def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
// The two ops cannot be done in parallel.
// One of the ALU ops is restricted and takes 3 dispatches.
def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ DISP_3SLOTS_1C, DISP_1C],
(instrs
(instregex "RLD(I)?C(R|L)o$"),
(instregex "RLW(IMI|INM|NM)(8)?o$"),
@@ -1086,7 +1084,7 @@ def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C,
// The two ops cannot be done in parallel.
// Both of the ALU ops are restricted and take 3 dispatches.
def : InstRW<[P9_ALU2OpAndALU2Op_6C, IP_EXEC_1C, IP_EXEC_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ DISP_3SLOTS_1C, DISP_3SLOTS_1C],
(instrs
(instregex "MFFS(L|CE|o)?$")
)>;
@@ -1095,143 +1093,141 @@ def : InstRW<[P9_ALU2OpAndALU2Op_6C, IP_EXEC_1C, IP_EXEC_1C,
// total of 6 cycles. All of the ALU operations are also restricted so each
// takes 3 dispatches for a total of 9.
def : InstRW<[P9_ALUOpAndALUOpAndALUOp_6C, IP_EXEC_1C, IP_EXEC_1C, IP_EXEC_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C,
- DISP_1C, DISP_1C],
+ DISP_3SLOTS_1C, DISP_3SLOTS_1C, DISP_3SLOTS_1C],
(instrs
(instregex "MFCR(8)?$")
)>;
// Cracked instruction made of two ALU ops.
// The two ops cannot be done in parallel.
-def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C, DISP_1C, DISP_1C],
(instrs
- (instregex "EXTSWSLIo$"),
+ (instregex "EXTSWSLI_32_64o$"),
(instregex "SRAD(I)?o$"),
+ EXTSWSLIo,
SLDo,
SRDo,
RLDICo
)>;
// 33 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
-def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_3SLOTS_1C],
(instrs
FDIV
)>;
// 33 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
def : InstRW<[P9_DPOpAndALU2Op_36C_8, IP_EXEC_1C, IP_EXEC_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ DISP_3SLOTS_1C, DISP_1C],
(instrs
FDIVo
)>;
// 36 Cycle DP Instruction.
// Instruction can be done on a single slice.
-def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_1C],
(instrs
XSSQRTDP
)>;
// 36 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
-def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_3SLOTS_1C],
(instrs
FSQRT
)>;
// 36 Cycle DP Vector Instruction.
def : InstRW<[P9_DPE_36C_10, P9_DPO_36C_10, IP_EXECE_1C, IP_EXECO_1C,
- DISP_1C, DISP_1C, DISP_1C],
+ DISP_1C],
(instrs
XVSQRTDP
)>;
// 27 Cycle DP Vector Instruction.
def : InstRW<[P9_DPE_27C_10, P9_DPO_27C_10, IP_EXECE_1C, IP_EXECO_1C,
- DISP_1C, DISP_1C, DISP_1C],
+ DISP_1C],
(instrs
XVSQRTSP
)>;
// 36 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
def : InstRW<[P9_DPOpAndALU2Op_39C_10, IP_EXEC_1C, IP_EXEC_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ DISP_3SLOTS_1C, DISP_1C],
(instrs
FSQRTo
)>;
// 26 Cycle DP Instruction.
-def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_1C],
(instrs
XSSQRTSP
)>;
// 26 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
-def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_3SLOTS_1C],
(instrs
FSQRTS
)>;
// 26 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
def : InstRW<[P9_DPOpAndALU2Op_29C_5, IP_EXEC_1C, IP_EXEC_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ DISP_3SLOTS_1C, DISP_1C],
(instrs
FSQRTSo
)>;
-// 33 Cycle DP Instruction. Takes one slice and 2 dispatches.
-def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C, DISP_1C],
+// 33 Cycle DP Instruction. Takes one slice and 1 dispatch.
+def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C],
(instrs
XSDIVDP
)>;
// 22 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
-def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_3SLOTS_1C],
(instrs
FDIVS
)>;
// 22 Cycle DP Instruction Restricted and Cracked with 2 Cycle ALU.
def : InstRW<[P9_DPOpAndALU2Op_25C_5, IP_EXEC_1C, IP_EXEC_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ DISP_3SLOTS_1C, DISP_1C],
(instrs
FDIVSo
)>;
-// 22 Cycle DP Instruction. Takes one slice and 2 dispatches.
-def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C, DISP_1C],
+// 22 Cycle DP Instruction. Takes one slice and 1 dispatch.
+def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C],
(instrs
XSDIVSP
)>;
// 24 Cycle DP Vector Instruction. Takes one full superslice.
-// Includes both EXECE, EXECO pipelines and all 3 dispatches for the given
-// superslice.
+// Includes both EXECE, EXECO pipelines and 1 dispatch for the given
+// superslice.
def : InstRW<[P9_DPE_24C_8, P9_DPO_24C_8, IP_EXECE_1C, IP_EXECO_1C,
- DISP_1C, DISP_1C, DISP_1C],
+ DISP_1C],
(instrs
XVDIVSP
)>;
// 33 Cycle DP Vector Instruction. Takes one full superslice.
-// Includes both EXECE, EXECO pipelines and all 3 dispatches for the given
-// superslice.
+// Includes both EXECE, EXECO pipelines and 1 dispatch for the given
+// superslice.
def : InstRW<[P9_DPE_33C_8, P9_DPO_33C_8, IP_EXECE_1C, IP_EXECO_1C,
- DISP_1C, DISP_1C, DISP_1C],
+ DISP_1C],
(instrs
XVDIVDP
)>;
// Instruction cracked into three pieces. One Load and two ALU operations.
// The Load and one of the ALU ops cannot be run at the same time and so the
-// latencies are added together for 6 cycles. The remainaing ALU is 2 cycles.
+// latencies are added together for 6 cycles. The remainaing ALU is 2 cycles.
// Both the load and the ALU that depends on it are restricted and so they take
-// a total of 6 dispatches. The final 2 dispatches come from the second ALU op.
+// a total of 7 dispatches. The final 2 dispatches come from the second ALU op.
// The two EXEC pipelines are for the 2 ALUs while the AGEN is for the load.
def : InstRW<[P9_LoadAndALU2Op_7C, P9_ALU_2C,
IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ DISP_3SLOTS_1C, DISP_3SLOTS_1C, DISP_1C],
(instrs
(instregex "LF(SU|SUX)$")
)>;
@@ -1240,7 +1236,7 @@ def : InstRW<[P9_LoadAndALU2Op_7C, P9_ALU_2C,
// the store and so it can be run at the same time as the store. The store is
// also restricted.
def : InstRW<[P9_LS_1C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ DISP_3SLOTS_1C, DISP_1C],
(instrs
(instregex "STF(S|D)U(X)?$"),
(instregex "ST(B|H|W|D)U(X)?(8)?$")
@@ -1249,20 +1245,19 @@ def : InstRW<[P9_LS_1C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
// Cracked instruction made up of a Load and an ALU. The ALU does not depend on
// the load and so it can be run at the same time as the load.
def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ DISP_PAIR_1C, DISP_PAIR_1C],
(instrs
(instregex "LBZU(X)?(8)?$"),
(instregex "LDU(X)?$")
)>;
-
// Cracked instruction made up of a Load and an ALU. The ALU does not depend on
-// the load and so it can be run at the same time as the load. The load is also
-// restricted. 3 dispatches are from the restricted load while the other two
-// are from the ALU. The AGEN pipeline is from the load and the EXEC pipeline
-// is required for the ALU.
+// the load and so it can be run at the same time as the load. The load is also
+// restricted. 3 dispatches are from the restricted load while the other two
+// are from the ALU. The AGEN pipeline is from the load and the EXEC pipeline
+// is required for the ALU.
def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+ DISP_3SLOTS_1C, DISP_1C],
(instrs
(instregex "LF(DU|DUX)$")
)>;
@@ -1270,9 +1265,9 @@ def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C,
// Crypto Instructions
// 6 Cycle CY operation. Only one CY unit per CPU so we use a whole
-// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
-// dispatches.
-def : InstRW<[P9_CY_6C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C, DISP_1C],
+// superslice. That includes both exec pipelines (EXECO, EXECE) and one
+// dispatch.
+def : InstRW<[P9_CY_6C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C],
(instrs
(instregex "VPMSUM(B|H|W|D)$"),
(instregex "V(N)?CIPHER(LAST)?$"),
@@ -1282,14 +1277,14 @@ def : InstRW<[P9_CY_6C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C, DISP_1C],
// Branch Instructions
// Two Cycle Branch
-def : InstRW<[P9_BR_2C, DISP_1C, DISP_1C],
+def : InstRW<[P9_BR_2C, DISP_BR_1C],
(instrs
(instregex "BCCCTR(L)?(8)?$"),
(instregex "BCCL(A|R|RL)?$"),
(instregex "BCCTR(L)?(8)?(n)?$"),
(instregex "BD(N)?Z(8|A|Am|Ap|m|p)?$"),
(instregex "BD(N)?ZL(A|Am|Ap|R|R8|RL|RLm|RLp|Rm|Rp|m|p)?$"),
- (instregex "BL(_TLS)?$"),
+ (instregex "BL(_TLS|_NOP)?$"),
(instregex "BL8(_TLS|_NOP|_NOP_TLS|_TLS_)?$"),
(instregex "BLA(8|8_NOP)?$"),
(instregex "BLR(8|L)?$"),
@@ -1313,8 +1308,7 @@ def : InstRW<[P9_BR_2C, DISP_1C, DISP_1C],
// Five Cycle Branch with a 2 Cycle ALU Op
// Operations must be done consecutively and not in parallel.
-def : InstRW<[P9_BROpAndALUOp_7C, IP_EXEC_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+def : InstRW<[P9_BROpAndALUOp_7C, IP_EXEC_1C, DISP_BR_1C, DISP_1C],
(instrs
ADDPCIS
)>;
@@ -1324,17 +1318,15 @@ def : InstRW<[P9_BROpAndALUOp_7C, IP_EXEC_1C,
// Atomic Load
def : InstRW<[P9_LS_1C, P9_LS_1C, P9_LS_4C, P9_LS_4C, P9_LS_4C,
IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C, IP_AGEN_1C,
- IP_AGEN_1C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C,
- DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C,
- DISP_1C],
+ IP_AGEN_1C, IP_AGEN_1C, DISP_1C, DISP_3SLOTS_1C,
+ DISP_3SLOTS_1C, DISP_1C, DISP_1C, DISP_1C],
(instrs
(instregex "L(D|W)AT$")
)>;
// Atomic Store
def : InstRW<[P9_LS_1C, P9_LS_4C, P9_LS_4C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C,
- IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C,
- DISP_1C],
+ IP_AGEN_1C, DISP_1C, DISP_3SLOTS_1C, DISP_1C],
(instrs
(instregex "ST(D|W)AT$")
)>;
@@ -1406,6 +1398,7 @@ def : InstRW<[],
MBAR,
MSYNC,
SLBSYNC,
+ SLBFEEo,
NAP,
STOP,
TRAP,
diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h
index bfc613af3dc0..c6951ab67b08 100644
--- a/lib/Target/PowerPC/PPC.h
+++ b/lib/Target/PowerPC/PPC.h
@@ -1,9 +1,8 @@
//===-- PPC.h - Top-level interface for PowerPC Target ----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -16,7 +15,6 @@
#define LLVM_LIB_TARGET_POWERPC_PPC_H
#include "llvm/Support/CodeGen.h"
-#include "MCTargetDesc/PPCMCTargetDesc.h"
// GCC #defines PPC on Linux but we use it as our namespace name
#undef PPC
@@ -57,12 +55,26 @@ namespace llvm {
MCOperand &OutMO, AsmPrinter &AP,
bool isDarwin);
+ void initializePPCCTRLoopsPass(PassRegistry&);
+#ifndef NDEBUG
+ void initializePPCCTRLoopsVerifyPass(PassRegistry&);
+#endif
+ void initializePPCLoopPreIncPrepPass(PassRegistry&);
+ void initializePPCTOCRegDepsPass(PassRegistry&);
+ void initializePPCEarlyReturnPass(PassRegistry&);
+ void initializePPCVSXCopyPass(PassRegistry&);
void initializePPCVSXFMAMutatePass(PassRegistry&);
+ void initializePPCVSXSwapRemovalPass(PassRegistry&);
+ void initializePPCReduceCRLogicalsPass(PassRegistry&);
+ void initializePPCBSelPass(PassRegistry&);
+ void initializePPCBranchCoalescingPass(PassRegistry&);
+ void initializePPCQPXLoadSplatPass(PassRegistry&);
void initializePPCBoolRetToIntPass(PassRegistry&);
void initializePPCExpandISELPass(PassRegistry &);
void initializePPCPreEmitPeepholePass(PassRegistry &);
void initializePPCTLSDynamicCallPass(PassRegistry &);
void initializePPCMIPeepholePass(PassRegistry&);
+
extern char &PPCVSXFMAMutateID;
namespace PPCII {
diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td
index 98e6e98e6974..8e94a2ae15e0 100644
--- a/lib/Target/PowerPC/PPC.td
+++ b/lib/Target/PowerPC/PPC.td
@@ -1,9 +1,8 @@
//===-- PPC.td - Describe the PowerPC Target Machine -------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -136,6 +135,9 @@ def FeatureQPX : SubtargetFeature<"qpx","HasQPX", "true",
def FeatureVSX : SubtargetFeature<"vsx","HasVSX", "true",
"Enable VSX instructions",
[FeatureAltivec]>;
+def FeatureTwoConstNR :
+ SubtargetFeature<"two-const-nr", "NeedsTwoConstNR", "true",
+ "Requires two constant Newton-Raphson computation">;
def FeatureP8Altivec : SubtargetFeature<"power8-altivec", "HasP8Altivec", "true",
"Enable POWER8 Altivec instructions",
[FeatureAltivec]>;
@@ -162,8 +164,12 @@ def FeatureHTM : SubtargetFeature<"htm", "HasHTM", "true",
"Enable Hardware Transactional Memory instructions">;
def FeatureMFTB : SubtargetFeature<"", "FeatureMFTB", "true",
"Implement mftb using the mfspr instruction">;
-def FeatureFusion : SubtargetFeature<"fusion", "HasFusion", "true",
- "Target supports add/load integer fusion.">;
+def FeaturePPCPreRASched:
+ SubtargetFeature<"ppc-prera-sched", "UsePPCPreRASchedStrategy", "true",
+ "Use PowerPC pre-RA scheduling strategy">;
+def FeaturePPCPostRASched:
+ SubtargetFeature<"ppc-postra-sched", "UsePPCPostRASchedStrategy", "true",
+ "Use PowerPC post-RA scheduling strategy">;
def FeatureFloat128 :
SubtargetFeature<"float128", "HasFloat128", "true",
"Enable the __float128 data type for IEEE-754R Binary128.",
@@ -191,6 +197,13 @@ def FeatureP9Vector : SubtargetFeature<"power9-vector", "HasP9Vector", "true",
"Enable POWER9 vector instructions",
[FeatureISA3_0, FeatureP8Vector,
FeatureP9Altivec]>;
+// A separate feature for this even though it is equivalent to P9Vector
+// because this is a feature of the implementation rather than the architecture
+// and may go away with future CPU's.
+def FeatureVectorsUseTwoUnits : SubtargetFeature<"vectors-use-two-units",
+ "VectorsUseTwoUnits",
+ "true",
+ "Vectors use two units">;
// Since new processors generally contain a superset of features of those that
// came before them, the idea is to make implementations of new processors
@@ -215,15 +228,15 @@ def ProcessorFeatures {
FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX,
Feature64Bit /*, Feature64BitRegs */,
FeatureBPERMD, FeatureExtDiv,
- FeatureMFTB, DeprecatedDST];
+ FeatureMFTB, DeprecatedDST, FeatureTwoConstNR];
list<SubtargetFeature> Power8SpecificFeatures =
[DirectivePwr8, FeatureP8Altivec, FeatureP8Vector, FeatureP8Crypto,
- FeatureHTM, FeatureDirectMove, FeatureICBT, FeaturePartwordAtomic,
- FeatureFusion];
+ FeatureHTM, FeatureDirectMove, FeatureICBT, FeaturePartwordAtomic];
list<SubtargetFeature> Power8FeatureList =
!listconcat(Power7FeatureList, Power8SpecificFeatures);
list<SubtargetFeature> Power9SpecificFeatures =
- [DirectivePwr9, FeatureP9Altivec, FeatureP9Vector, FeatureISA3_0];
+ [DirectivePwr9, FeatureP9Altivec, FeatureP9Vector, FeatureISA3_0,
+ FeatureVectorsUseTwoUnits, FeaturePPCPreRASched, FeaturePPCPostRASched];
list<SubtargetFeature> Power9FeatureList =
!listconcat(Power8FeatureList, Power9SpecificFeatures);
}
@@ -279,10 +292,9 @@ def getNonRecordFormOpcode : InstrMapping {
def getAltVSXFMAOpcode : InstrMapping {
let FilterClass = "AltVSXFMARel";
- // Instructions with the same BaseName and Interpretation64Bit values
- // form a row.
+ // Instructions with the same BaseName value form a row.
let RowFields = ["BaseName"];
- // Instructions with the same RC value form a column.
+ // Instructions with the same IsVSXFMAAlt value form a column.
let ColFields = ["IsVSXFMAAlt"];
// The key column are the (default) addend-killing instructions.
let KeyCol = ["0"];
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 04aa3c9b1e22..bd87ce06b4fb 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -1,9 +1,8 @@
//===-- PPCAsmPrinter.cpp - Print machine instrs to PowerPC assembly ------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -16,7 +15,7 @@
//
//===----------------------------------------------------------------------===//
-#include "InstPrinter/PPCInstPrinter.h"
+#include "MCTargetDesc/PPCInstPrinter.h"
#include "MCTargetDesc/PPCMCExpr.h"
#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "MCTargetDesc/PPCPredicates.h"
@@ -26,6 +25,7 @@
#include "PPCSubtarget.h"
#include "PPCTargetMachine.h"
#include "PPCTargetStreamer.h"
+#include "TargetInfo/PowerPCTargetInfo.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
@@ -95,68 +95,102 @@ public:
return AsmPrinter::doInitialization(M);
}
- void EmitInstruction(const MachineInstr *MI) override;
+ void EmitInstruction(const MachineInstr *MI) override;
+
+ /// This function is for PrintAsmOperand and PrintAsmMemoryOperand,
+ /// invoked by EmitMSInlineAsmStr and EmitGCCInlineAsmStr only.
+ /// The \p MI would be INLINEASM ONLY.
+ void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O);
+
+ void PrintSymbolOperand(const MachineOperand &MO, raw_ostream &O) override;
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ const char *ExtraCode, raw_ostream &O) override;
+ bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+ const char *ExtraCode, raw_ostream &O) override;
+
+ void EmitEndOfAsmFile(Module &M) override;
+
+ void LowerSTACKMAP(StackMaps &SM, const MachineInstr &MI);
+ void LowerPATCHPOINT(StackMaps &SM, const MachineInstr &MI);
+ void EmitTlsCall(const MachineInstr *MI, MCSymbolRefExpr::VariantKind VK);
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ Subtarget = &MF.getSubtarget<PPCSubtarget>();
+ bool Changed = AsmPrinter::runOnMachineFunction(MF);
+ emitXRayTable();
+ return Changed;
+ }
+};
- void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O);
+/// PPCLinuxAsmPrinter - PowerPC assembly printer, customized for Linux
+class PPCLinuxAsmPrinter : public PPCAsmPrinter {
+public:
+ explicit PPCLinuxAsmPrinter(TargetMachine &TM,
+ std::unique_ptr<MCStreamer> Streamer)
+ : PPCAsmPrinter(TM, std::move(Streamer)) {}
- bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
- unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &O) override;
- bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
- unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &O) override;
+ StringRef getPassName() const override {
+ return "Linux PPC Assembly Printer";
+ }
- void EmitEndOfAsmFile(Module &M) override;
+ bool doFinalization(Module &M) override;
+ void EmitStartOfAsmFile(Module &M) override;
- void LowerSTACKMAP(StackMaps &SM, const MachineInstr &MI);
- void LowerPATCHPOINT(StackMaps &SM, const MachineInstr &MI);
- void EmitTlsCall(const MachineInstr *MI, MCSymbolRefExpr::VariantKind VK);
- bool runOnMachineFunction(MachineFunction &MF) override {
- Subtarget = &MF.getSubtarget<PPCSubtarget>();
- bool Changed = AsmPrinter::runOnMachineFunction(MF);
- emitXRayTable();
- return Changed;
- }
- };
+ void EmitFunctionEntryLabel() override;
- /// PPCLinuxAsmPrinter - PowerPC assembly printer, customized for Linux
- class PPCLinuxAsmPrinter : public PPCAsmPrinter {
- public:
- explicit PPCLinuxAsmPrinter(TargetMachine &TM,
- std::unique_ptr<MCStreamer> Streamer)
- : PPCAsmPrinter(TM, std::move(Streamer)) {}
+ void EmitFunctionBodyStart() override;
+ void EmitFunctionBodyEnd() override;
+ void EmitInstruction(const MachineInstr *MI) override;
+};
- StringRef getPassName() const override {
- return "Linux PPC Assembly Printer";
- }
+/// PPCDarwinAsmPrinter - PowerPC assembly printer, customized for Darwin/Mac
+/// OS X
+class PPCDarwinAsmPrinter : public PPCAsmPrinter {
+public:
+ explicit PPCDarwinAsmPrinter(TargetMachine &TM,
+ std::unique_ptr<MCStreamer> Streamer)
+ : PPCAsmPrinter(TM, std::move(Streamer)) {}
- bool doFinalization(Module &M) override;
- void EmitStartOfAsmFile(Module &M) override;
+ StringRef getPassName() const override {
+ return "Darwin PPC Assembly Printer";
+ }
- void EmitFunctionEntryLabel() override;
+ bool doFinalization(Module &M) override;
+ void EmitStartOfAsmFile(Module &M) override;
+};
- void EmitFunctionBodyStart() override;
- void EmitFunctionBodyEnd() override;
- void EmitInstruction(const MachineInstr *MI) override;
- };
+class PPCAIXAsmPrinter : public PPCAsmPrinter {
+public:
+ PPCAIXAsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer)
+ : PPCAsmPrinter(TM, std::move(Streamer)) {}
- /// PPCDarwinAsmPrinter - PowerPC assembly printer, customized for Darwin/Mac
- /// OS X
- class PPCDarwinAsmPrinter : public PPCAsmPrinter {
- public:
- explicit PPCDarwinAsmPrinter(TargetMachine &TM,
- std::unique_ptr<MCStreamer> Streamer)
- : PPCAsmPrinter(TM, std::move(Streamer)) {}
+ StringRef getPassName() const override { return "AIX PPC Assembly Printer"; }
+};
- StringRef getPassName() const override {
- return "Darwin PPC Assembly Printer";
- }
+} // end anonymous namespace
- bool doFinalization(Module &M) override;
- void EmitStartOfAsmFile(Module &M) override;
- };
+void PPCAsmPrinter::PrintSymbolOperand(const MachineOperand &MO,
+ raw_ostream &O) {
+ // Computing the address of a global symbol, not calling it.
+ const GlobalValue *GV = MO.getGlobal();
+ MCSymbol *SymToPrint;
+
+ // External or weakly linked global variables need non-lazily-resolved stubs
+ if (Subtarget->hasLazyResolverStub(GV)) {
+ SymToPrint = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry(
+ SymToPrint);
+ if (!StubSym.getPointer())
+ StubSym = MachineModuleInfoImpl::StubValueTy(getSymbol(GV),
+ !GV->hasInternalLinkage());
+ } else {
+ SymToPrint = getSymbol(GV);
+ }
-} // end anonymous namespace
+ SymToPrint->print(O, MAI);
+
+ printOffset(MO.getOffset(), O);
+}
void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
raw_ostream &O) {
@@ -165,10 +199,8 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
switch (MO.getType()) {
case MachineOperand::MO_Register: {
- unsigned Reg = PPCInstrInfo::getRegNumForOperand(MI->getDesc(),
- MO.getReg(), OpNo);
-
- const char *RegName = PPCInstPrinter::getRegisterName(Reg);
+ // The MI is INLINEASM ONLY and UseVSXReg is always false.
+ const char *RegName = PPCInstPrinter::getRegisterName(MO.getReg());
// Linux assembler (Others?) does not take register mnemonics.
// FIXME - What about special registers used in mfspr/mtspr?
@@ -192,26 +224,7 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
GetBlockAddressSymbol(MO.getBlockAddress())->print(O, MAI);
return;
case MachineOperand::MO_GlobalAddress: {
- // Computing the address of a global symbol, not calling it.
- const GlobalValue *GV = MO.getGlobal();
- MCSymbol *SymToPrint;
-
- // External or weakly linked global variables need non-lazily-resolved stubs
- if (Subtarget->hasLazyResolverStub(GV)) {
- SymToPrint = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
- MachineModuleInfoImpl::StubValueTy &StubSym =
- MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry(
- SymToPrint);
- if (!StubSym.getPointer())
- StubSym = MachineModuleInfoImpl::StubValueTy(getSymbol(GV),
- !GV->hasInternalLinkage());
- } else {
- SymToPrint = getSymbol(GV);
- }
-
- SymToPrint->print(O, MAI);
-
- printOffset(MO.getOffset(), O);
+ PrintSymbolOperand(MO, O);
return;
}
@@ -224,7 +237,6 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
/// PrintAsmOperand - Print out an operand for an inline asm expression.
///
bool PPCAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
- unsigned AsmVariant,
const char *ExtraCode, raw_ostream &O) {
// Does this asm operand have a single letter operand modifier?
if (ExtraCode && ExtraCode[0]) {
@@ -233,9 +245,7 @@ bool PPCAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
switch (ExtraCode[0]) {
default:
// See if this is a generic print operand
- return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
- case 'c': // Don't print "$" before a global var name or constant.
- break; // PPC never has a prefix.
+ return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O);
case 'L': // Write second word of DImode reference.
// Verify that this operand has two consecutive registers.
if (!MI->getOperand(OpNo).isReg() ||
@@ -277,7 +287,6 @@ bool PPCAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
// assembler operand.
bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
- unsigned AsmVariant,
const char *ExtraCode,
raw_ostream &O) {
if (ExtraCode && ExtraCode[0]) {
@@ -460,6 +469,7 @@ void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI,
StringRef Name = "__tls_get_addr";
MCSymbol *TlsGetAddr = OutContext.getOrCreateSymbol(Name);
MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VK_None;
+ const Module *M = MF->getFunction().getParent();
assert(MI->getOperand(0).isReg() &&
((Subtarget->isPPC64() && MI->getOperand(0).getReg() == PPC::X3) ||
@@ -473,8 +483,14 @@ void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI,
if (!Subtarget->isPPC64() && !Subtarget->isDarwin() &&
isPositionIndependent())
Kind = MCSymbolRefExpr::VK_PLT;
- const MCSymbolRefExpr *TlsRef =
+ const MCExpr *TlsRef =
MCSymbolRefExpr::create(TlsGetAddr, Kind, OutContext);
+
+ // Add 32768 offset to the symbol so we follow up the latest GOT/PLT ABI.
+ if (Kind == MCSymbolRefExpr::VK_PLT && Subtarget->isSecurePlt() &&
+ M->getPICLevel() == PICLevel::BigPIC)
+ TlsRef = MCBinaryExpr::createAdd(
+ TlsRef, MCConstantExpr::create(32768, OutContext), OutContext);
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
MCSymbol *MOSymbol = getSymbol(GValue);
@@ -576,34 +592,30 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// Into: lwz %rt, .L0$poff - .L0$pb(%ri)
// add %rd, %rt, %ri
// or into (if secure plt mode is on):
- // addis r30, r30, .LTOC - .L0$pb@ha
- // addi r30, r30, .LTOC - .L0$pb@l
+ // addis r30, r30, {.LTOC,_GLOBAL_OFFSET_TABLE} - .L0$pb@ha
+ // addi r30, r30, {.LTOC,_GLOBAL_OFFSET_TABLE} - .L0$pb@l
// Get the offset from the GOT Base Register to the GOT
LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
if (Subtarget->isSecurePlt() && isPositionIndependent() ) {
unsigned PICR = TmpInst.getOperand(0).getReg();
- MCSymbol *LTOCSymbol = OutContext.getOrCreateSymbol(StringRef(".LTOC"));
+ MCSymbol *BaseSymbol = OutContext.getOrCreateSymbol(
+ M->getPICLevel() == PICLevel::SmallPIC ? "_GLOBAL_OFFSET_TABLE_"
+ : ".LTOC");
const MCExpr *PB =
- MCSymbolRefExpr::create(MF->getPICBaseSymbol(),
- OutContext);
+ MCSymbolRefExpr::create(MF->getPICBaseSymbol(), OutContext);
- const MCExpr *LTOCDeltaExpr =
- MCBinaryExpr::createSub(MCSymbolRefExpr::create(LTOCSymbol, OutContext),
- PB, OutContext);
+ const MCExpr *DeltaExpr = MCBinaryExpr::createSub(
+ MCSymbolRefExpr::create(BaseSymbol, OutContext), PB, OutContext);
- const MCExpr *LTOCDeltaHi =
- PPCMCExpr::createHa(LTOCDeltaExpr, false, OutContext);
- EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDIS)
- .addReg(PICR)
- .addReg(PICR)
- .addExpr(LTOCDeltaHi));
+ const MCExpr *DeltaHi = PPCMCExpr::createHa(DeltaExpr, false, OutContext);
+ EmitToStreamer(
+ *OutStreamer,
+ MCInstBuilder(PPC::ADDIS).addReg(PICR).addReg(PICR).addExpr(DeltaHi));
- const MCExpr *LTOCDeltaLo =
- PPCMCExpr::createLo(LTOCDeltaExpr, false, OutContext);
- EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDI)
- .addReg(PICR)
- .addReg(PICR)
- .addExpr(LTOCDeltaLo));
+ const MCExpr *DeltaLo = PPCMCExpr::createLo(DeltaExpr, false, OutContext);
+ EmitToStreamer(
+ *OutStreamer,
+ MCInstBuilder(PPC::ADDI).addReg(PICR).addReg(PICR).addExpr(DeltaLo));
return;
} else {
MCSymbol *PICOffset =
@@ -1640,6 +1652,9 @@ createPPCAsmPrinterPass(TargetMachine &tm,
std::unique_ptr<MCStreamer> &&Streamer) {
if (tm.getTargetTriple().isMacOSX())
return new PPCDarwinAsmPrinter(tm, std::move(Streamer));
+ if (tm.getTargetTriple().isOSAIX())
+ return new PPCAIXAsmPrinter(tm, std::move(Streamer));
+
return new PPCLinuxAsmPrinter(tm, std::move(Streamer));
}
diff --git a/lib/Target/PowerPC/PPCBoolRetToInt.cpp b/lib/Target/PowerPC/PPCBoolRetToInt.cpp
index 55e105dad0e5..104cf2ba3c00 100644
--- a/lib/Target/PowerPC/PPCBoolRetToInt.cpp
+++ b/lib/Target/PowerPC/PPCBoolRetToInt.cpp
@@ -1,9 +1,8 @@
//===- PPCBoolRetToInt.cpp ------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/PowerPC/PPCBranchCoalescing.cpp b/lib/Target/PowerPC/PPCBranchCoalescing.cpp
index bbb977f090c5..5e9a661f8f0b 100644
--- a/lib/Target/PowerPC/PPCBranchCoalescing.cpp
+++ b/lib/Target/PowerPC/PPCBranchCoalescing.cpp
@@ -1,9 +1,8 @@
//===-- CoalesceBranches.cpp - Coalesce blocks with the same condition ---===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -34,10 +33,6 @@ STATISTIC(NumBlocksCoalesced, "Number of blocks coalesced");
STATISTIC(NumPHINotMoved, "Number of PHI Nodes that cannot be merged");
STATISTIC(NumBlocksNotCoalesced, "Number of blocks not coalesced");
-namespace llvm {
- void initializePPCBranchCoalescingPass(PassRegistry&);
-}
-
//===----------------------------------------------------------------------===//
// PPCBranchCoalescing
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/PowerPC/PPCBranchSelector.cpp b/lib/Target/PowerPC/PPCBranchSelector.cpp
index 0d1bb9297bcb..793d690baec3 100644
--- a/lib/Target/PowerPC/PPCBranchSelector.cpp
+++ b/lib/Target/PowerPC/PPCBranchSelector.cpp
@@ -1,9 +1,8 @@
//===-- PPCBranchSelector.cpp - Emit long conditional branches ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -26,16 +25,13 @@
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetMachine.h"
+#include <algorithm>
using namespace llvm;
#define DEBUG_TYPE "ppc-branch-select"
STATISTIC(NumExpanded, "Number of branches expanded to long format");
-namespace llvm {
- void initializePPCBSelPass(PassRegistry&);
-}
-
namespace {
struct PPCBSel : public MachineFunctionPass {
static char ID;
@@ -48,6 +44,17 @@ namespace {
// size that is due to potential padding.
std::vector<std::pair<unsigned, unsigned>> BlockSizes;
+ // The first block number which has imprecise instruction address.
+ int FirstImpreciseBlock = -1;
+
+ unsigned GetAlignmentAdjustment(MachineBasicBlock &MBB, unsigned Offset);
+ unsigned ComputeBlockSizes(MachineFunction &Fn);
+ void modifyAdjustment(MachineFunction &Fn);
+ int computeBranchSize(MachineFunction &Fn,
+ const MachineBasicBlock *Src,
+ const MachineBasicBlock *Dest,
+ unsigned BrOffset);
+
bool runOnMachineFunction(MachineFunction &Fn) override;
MachineFunctionProperties getRequiredProperties() const override {
@@ -70,43 +77,47 @@ FunctionPass *llvm::createPPCBranchSelectionPass() {
return new PPCBSel();
}
-bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
- const PPCInstrInfo *TII =
- static_cast<const PPCInstrInfo *>(Fn.getSubtarget().getInstrInfo());
- // Give the blocks of the function a dense, in-order, numbering.
- Fn.RenumberBlocks();
- BlockSizes.resize(Fn.getNumBlockIDs());
-
- auto GetAlignmentAdjustment =
- [](MachineBasicBlock &MBB, unsigned Offset) -> unsigned {
- unsigned Align = MBB.getAlignment();
- if (!Align)
- return 0;
-
- unsigned AlignAmt = 1 << Align;
- unsigned ParentAlign = MBB.getParent()->getAlignment();
-
- if (Align <= ParentAlign)
- return OffsetToAlignment(Offset, AlignAmt);
-
- // The alignment of this MBB is larger than the function's alignment, so we
- // can't tell whether or not it will insert nops. Assume that it will.
- return AlignAmt + OffsetToAlignment(Offset, AlignAmt);
- };
+/// In order to make MBB aligned, we need to add an adjustment value to the
+/// original Offset.
+unsigned PPCBSel::GetAlignmentAdjustment(MachineBasicBlock &MBB,
+ unsigned Offset) {
+ unsigned Align = MBB.getAlignment();
+ if (!Align)
+ return 0;
+
+ unsigned AlignAmt = 1 << Align;
+ unsigned ParentAlign = MBB.getParent()->getAlignment();
+
+ if (Align <= ParentAlign)
+ return OffsetToAlignment(Offset, AlignAmt);
+
+ // The alignment of this MBB is larger than the function's alignment, so we
+ // can't tell whether or not it will insert nops. Assume that it will.
+ if (FirstImpreciseBlock < 0)
+ FirstImpreciseBlock = MBB.getNumber();
+ return AlignAmt + OffsetToAlignment(Offset, AlignAmt);
+}
- // We need to be careful about the offset of the first block in the function
- // because it might not have the function's alignment. This happens because,
- // under the ELFv2 ABI, for functions which require a TOC pointer, we add a
- // two-instruction sequence to the start of the function.
- // Note: This needs to be synchronized with the check in
- // PPCLinuxAsmPrinter::EmitFunctionBodyStart.
+/// We need to be careful about the offset of the first block in the function
+/// because it might not have the function's alignment. This happens because,
+/// under the ELFv2 ABI, for functions which require a TOC pointer, we add a
+/// two-instruction sequence to the start of the function.
+/// Note: This needs to be synchronized with the check in
+/// PPCLinuxAsmPrinter::EmitFunctionBodyStart.
+static inline unsigned GetInitialOffset(MachineFunction &Fn) {
unsigned InitialOffset = 0;
if (Fn.getSubtarget<PPCSubtarget>().isELFv2ABI() &&
!Fn.getRegInfo().use_empty(PPC::X2))
InitialOffset = 8;
+ return InitialOffset;
+}
+
+/// Measure each MBB and compute a size for the entire function.
+unsigned PPCBSel::ComputeBlockSizes(MachineFunction &Fn) {
+ const PPCInstrInfo *TII =
+ static_cast<const PPCInstrInfo *>(Fn.getSubtarget().getInstrInfo());
+ unsigned FuncSize = GetInitialOffset(Fn);
- // Measure each MBB and compute a size for the entire function.
- unsigned FuncSize = InitialOffset;
for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
++MFI) {
MachineBasicBlock *MBB = &*MFI;
@@ -124,13 +135,145 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
}
unsigned BlockSize = 0;
- for (MachineInstr &MI : *MBB)
+ for (MachineInstr &MI : *MBB) {
BlockSize += TII->getInstSizeInBytes(MI);
+ if (MI.isInlineAsm() && (FirstImpreciseBlock < 0))
+ FirstImpreciseBlock = MBB->getNumber();
+ }
BlockSizes[MBB->getNumber()].first = BlockSize;
FuncSize += BlockSize;
}
+ return FuncSize;
+}
+
+/// Modify the basic block align adjustment.
+void PPCBSel::modifyAdjustment(MachineFunction &Fn) {
+ unsigned Offset = GetInitialOffset(Fn);
+ for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+ ++MFI) {
+ MachineBasicBlock *MBB = &*MFI;
+
+ if (MBB->getNumber() > 0) {
+ auto &BS = BlockSizes[MBB->getNumber()-1];
+ BS.first -= BS.second;
+ Offset -= BS.second;
+
+ unsigned AlignExtra = GetAlignmentAdjustment(*MBB, Offset);
+
+ BS.first += AlignExtra;
+ BS.second = AlignExtra;
+
+ Offset += AlignExtra;
+ }
+
+ Offset += BlockSizes[MBB->getNumber()].first;
+ }
+}
+
+/// Determine the offset from the branch in Src block to the Dest block.
+/// BrOffset is the offset of the branch instruction inside Src block.
+int PPCBSel::computeBranchSize(MachineFunction &Fn,
+ const MachineBasicBlock *Src,
+ const MachineBasicBlock *Dest,
+ unsigned BrOffset) {
+ int BranchSize;
+ unsigned MaxAlign = 2;
+ bool NeedExtraAdjustment = false;
+ if (Dest->getNumber() <= Src->getNumber()) {
+ // If this is a backwards branch, the delta is the offset from the
+ // start of this block to this branch, plus the sizes of all blocks
+ // from this block to the dest.
+ BranchSize = BrOffset;
+ MaxAlign = std::max(MaxAlign, Src->getAlignment());
+
+ int DestBlock = Dest->getNumber();
+ BranchSize += BlockSizes[DestBlock].first;
+ for (unsigned i = DestBlock+1, e = Src->getNumber(); i < e; ++i) {
+ BranchSize += BlockSizes[i].first;
+ MaxAlign = std::max(MaxAlign,
+ Fn.getBlockNumbered(i)->getAlignment());
+ }
+
+ NeedExtraAdjustment = (FirstImpreciseBlock >= 0) &&
+ (DestBlock >= FirstImpreciseBlock);
+ } else {
+ // Otherwise, add the size of the blocks between this block and the
+ // dest to the number of bytes left in this block.
+ unsigned StartBlock = Src->getNumber();
+ BranchSize = BlockSizes[StartBlock].first - BrOffset;
+
+ MaxAlign = std::max(MaxAlign, Dest->getAlignment());
+ for (unsigned i = StartBlock+1, e = Dest->getNumber(); i != e; ++i) {
+ BranchSize += BlockSizes[i].first;
+ MaxAlign = std::max(MaxAlign,
+ Fn.getBlockNumbered(i)->getAlignment());
+ }
+
+ NeedExtraAdjustment = (FirstImpreciseBlock >= 0) &&
+ (Src->getNumber() >= FirstImpreciseBlock);
+ }
+
+ // We tend to over estimate code size due to large alignment and
+ // inline assembly. Usually it causes larger computed branch offset.
+ // But sometimes it may also causes smaller computed branch offset
+ // than actual branch offset. If the offset is close to the limit of
+ // encoding, it may cause problem at run time.
+ // Following is a simplified example.
+ //
+ // actual estimated
+ // address address
+ // ...
+ // bne Far 100 10c
+ // .p2align 4
+ // Near: 110 110
+ // ...
+ // Far: 8108 8108
+ //
+ // Actual offset: 0x8108 - 0x100 = 0x8008
+ // Computed offset: 0x8108 - 0x10c = 0x7ffc
+ //
+ // This example also shows when we can get the largest gap between
+ // estimated offset and actual offset. If there is an aligned block
+ // ABB between branch and target, assume its alignment is <align>
+ // bits. Now consider the accumulated function size FSIZE till the end
+ // of previous block PBB. If the estimated FSIZE is multiple of
+ // 2^<align>, we don't need any padding for the estimated address of
+ // ABB. If actual FSIZE at the end of PBB is 4 bytes more than
+ // multiple of 2^<align>, then we need (2^<align> - 4) bytes of
+ // padding. It also means the actual branch offset is (2^<align> - 4)
+ // larger than computed offset. Other actual FSIZE needs less padding
+ // bytes, so causes smaller gap between actual and computed offset.
+ //
+ // On the other hand, if the inline asm or large alignment occurs
+ // between the branch block and destination block, the estimated address
+ // can be <delta> larger than actual address. If padding bytes are
+ // needed for a later aligned block, the actual number of padding bytes
+ // is at most <delta> more than estimated padding bytes. So the actual
+ // aligned block address is less than or equal to the estimated aligned
+ // block address. So the actual branch offset is less than or equal to
+ // computed branch offset.
+ //
+ // The computed offset is at most ((1 << alignment) - 4) bytes smaller
+ // than actual offset. So we add this number to the offset for safety.
+ if (NeedExtraAdjustment)
+ BranchSize += (1 << MaxAlign) - 4;
+
+ return BranchSize;
+}
+
+bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
+ const PPCInstrInfo *TII =
+ static_cast<const PPCInstrInfo *>(Fn.getSubtarget().getInstrInfo());
+ // Give the blocks of the function a dense, in-order, numbering.
+ Fn.RenumberBlocks();
+ BlockSizes.resize(Fn.getNumBlockIDs());
+ FirstImpreciseBlock = -1;
+
+ // Measure each MBB and compute a size for the entire function.
+ unsigned FuncSize = ComputeBlockSizes(Fn);
+
// If the entire function is smaller than the displacement of a branch field,
// we know we don't need to shrink any branches in this function. This is a
// common case.
@@ -178,23 +321,7 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
// Determine the offset from the current branch to the destination
// block.
- int BranchSize;
- if (Dest->getNumber() <= MBB.getNumber()) {
- // If this is a backwards branch, the delta is the offset from the
- // start of this block to this branch, plus the sizes of all blocks
- // from this block to the dest.
- BranchSize = MBBStartOffset;
-
- for (unsigned i = Dest->getNumber(), e = MBB.getNumber(); i != e; ++i)
- BranchSize += BlockSizes[i].first;
- } else {
- // Otherwise, add the size of the blocks between this block and the
- // dest to the number of bytes left in this block.
- BranchSize = -MBBStartOffset;
-
- for (unsigned i = MBB.getNumber(), e = Dest->getNumber(); i != e; ++i)
- BranchSize += BlockSizes[i].first;
- }
+ int BranchSize = computeBranchSize(Fn, &MBB, Dest, MBBStartOffset);
// If this branch is in range, ignore it.
if (isInt<16>(BranchSize)) {
@@ -253,26 +380,7 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
if (MadeChange) {
// If we're going to iterate again, make sure we've updated our
// padding-based contributions to the block sizes.
- unsigned Offset = InitialOffset;
- for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
- ++MFI) {
- MachineBasicBlock *MBB = &*MFI;
-
- if (MBB->getNumber() > 0) {
- auto &BS = BlockSizes[MBB->getNumber()-1];
- BS.first -= BS.second;
- Offset -= BS.second;
-
- unsigned AlignExtra = GetAlignmentAdjustment(*MBB, Offset);
-
- BS.first += AlignExtra;
- BS.second = AlignExtra;
-
- Offset += AlignExtra;
- }
-
- Offset += BlockSizes[MBB->getNumber()].first;
- }
+ modifyAdjustment(Fn);
}
EverMadeChange |= MadeChange;
diff --git a/lib/Target/PowerPC/PPCCCState.cpp b/lib/Target/PowerPC/PPCCCState.cpp
index 5510a95430f5..5116f0d121f4 100644
--- a/lib/Target/PowerPC/PPCCCState.cpp
+++ b/lib/Target/PowerPC/PPCCCState.cpp
@@ -1,9 +1,8 @@
//===---- PPCCCState.cpp - CCState with PowerPC specific extensions ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/PowerPC/PPCCCState.h b/lib/Target/PowerPC/PPCCCState.h
index 9be9f11dbea3..e3499597474c 100644
--- a/lib/Target/PowerPC/PPCCCState.h
+++ b/lib/Target/PowerPC/PPCCCState.h
@@ -1,9 +1,8 @@
//===---- PPCCCState.h - CCState with PowerPC specific extensions -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp
index 6b9e2383e36f..2b8d9b87724f 100644
--- a/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -1,9 +1,8 @@
//===-- PPCCTRLoops.cpp - Identify and generate CTR loops -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -72,70 +71,7 @@ using namespace llvm;
static cl::opt<int> CTRLoopLimit("ppc-max-ctrloop", cl::Hidden, cl::init(-1));
#endif
-// The latency of mtctr is only justified if there are more than 4
-// comparisons that will be removed as a result.
-static cl::opt<unsigned>
-SmallCTRLoopThreshold("min-ctr-loop-threshold", cl::init(4), cl::Hidden,
- cl::desc("Loops with a constant trip count smaller than "
- "this value will not use the count register."));
-
-STATISTIC(NumCTRLoops, "Number of loops converted to CTR loops");
-
-namespace llvm {
- void initializePPCCTRLoopsPass(PassRegistry&);
-#ifndef NDEBUG
- void initializePPCCTRLoopsVerifyPass(PassRegistry&);
-#endif
-}
-
namespace {
- struct PPCCTRLoops : public FunctionPass {
-
-#ifndef NDEBUG
- static int Counter;
-#endif
-
- public:
- static char ID;
-
- PPCCTRLoops() : FunctionPass(ID) {
- initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<LoopInfoWrapperPass>();
- AU.addPreserved<LoopInfoWrapperPass>();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addRequired<ScalarEvolutionWrapperPass>();
- AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<TargetTransformInfoWrapperPass>();
- }
-
- private:
- bool mightUseCTR(BasicBlock *BB);
- bool convertToCTRLoop(Loop *L);
-
- private:
- const PPCTargetMachine *TM;
- const PPCSubtarget *STI;
- const PPCTargetLowering *TLI;
- const DataLayout *DL;
- const TargetLibraryInfo *LibInfo;
- const TargetTransformInfo *TTI;
- LoopInfo *LI;
- ScalarEvolution *SE;
- DominatorTree *DT;
- bool PreserveLCSSA;
- TargetSchedModel SchedModel;
- };
-
- char PPCCTRLoops::ID = 0;
-#ifndef NDEBUG
- int PPCCTRLoops::Counter = 0;
-#endif
#ifndef NDEBUG
struct PPCCTRLoopsVerify : public MachineFunctionPass {
@@ -161,16 +97,6 @@ namespace {
#endif // NDEBUG
} // end anonymous namespace
-INITIALIZE_PASS_BEGIN(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
- false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
-INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
- false, false)
-
-FunctionPass *llvm::createPPCCTRLoops() { return new PPCCTRLoops(); }
-
#ifndef NDEBUG
INITIALIZE_PASS_BEGIN(PPCCTRLoopsVerify, "ppc-ctr-loops-verify",
"PowerPC CTR Loops Verify", false, false)
@@ -183,511 +109,6 @@ FunctionPass *llvm::createPPCCTRLoopsVerify() {
}
#endif // NDEBUG
-bool PPCCTRLoops::runOnFunction(Function &F) {
- if (skipFunction(F))
- return false;
-
- auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
- if (!TPC)
- return false;
-
- TM = &TPC->getTM<PPCTargetMachine>();
- STI = TM->getSubtargetImpl(F);
- TLI = STI->getTargetLowering();
-
- LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
- DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- DL = &F.getParent()->getDataLayout();
- auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
- LibInfo = TLIP ? &TLIP->getTLI() : nullptr;
- PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
-
- bool MadeChange = false;
-
- for (LoopInfo::iterator I = LI->begin(), E = LI->end();
- I != E; ++I) {
- Loop *L = *I;
- if (!L->getParentLoop())
- MadeChange |= convertToCTRLoop(L);
- }
-
- return MadeChange;
-}
-
-static bool isLargeIntegerTy(bool Is32Bit, Type *Ty) {
- if (IntegerType *ITy = dyn_cast<IntegerType>(Ty))
- return ITy->getBitWidth() > (Is32Bit ? 32U : 64U);
-
- return false;
-}
-
-// Determining the address of a TLS variable results in a function call in
-// certain TLS models.
-static bool memAddrUsesCTR(const PPCTargetMachine &TM, const Value *MemAddr) {
- const auto *GV = dyn_cast<GlobalValue>(MemAddr);
- if (!GV) {
- // Recurse to check for constants that refer to TLS global variables.
- if (const auto *CV = dyn_cast<Constant>(MemAddr))
- for (const auto &CO : CV->operands())
- if (memAddrUsesCTR(TM, CO))
- return true;
-
- return false;
- }
-
- if (!GV->isThreadLocal())
- return false;
- TLSModel::Model Model = TM.getTLSModel(GV);
- return Model == TLSModel::GeneralDynamic || Model == TLSModel::LocalDynamic;
-}
-
-// Loop through the inline asm constraints and look for something that clobbers
-// ctr.
-static bool asmClobbersCTR(InlineAsm *IA) {
- InlineAsm::ConstraintInfoVector CIV = IA->ParseConstraints();
- for (unsigned i = 0, ie = CIV.size(); i < ie; ++i) {
- InlineAsm::ConstraintInfo &C = CIV[i];
- if (C.Type != InlineAsm::isInput)
- for (unsigned j = 0, je = C.Codes.size(); j < je; ++j)
- if (StringRef(C.Codes[j]).equals_lower("{ctr}"))
- return true;
- }
- return false;
-}
-
-bool PPCCTRLoops::mightUseCTR(BasicBlock *BB) {
- for (BasicBlock::iterator J = BB->begin(), JE = BB->end();
- J != JE; ++J) {
- if (CallInst *CI = dyn_cast<CallInst>(J)) {
- // Inline ASM is okay, unless it clobbers the ctr register.
- if (InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue())) {
- if (asmClobbersCTR(IA))
- return true;
- continue;
- }
-
- if (Function *F = CI->getCalledFunction()) {
- // Most intrinsics don't become function calls, but some might.
- // sin, cos, exp and log are always calls.
- unsigned Opcode = 0;
- if (F->getIntrinsicID() != Intrinsic::not_intrinsic) {
- switch (F->getIntrinsicID()) {
- default: continue;
- // If we have a call to ppc_is_decremented_ctr_nonzero, or ppc_mtctr
- // we're definitely using CTR.
- case Intrinsic::ppc_is_decremented_ctr_nonzero:
- case Intrinsic::ppc_mtctr:
- return true;
-
-// VisualStudio defines setjmp as _setjmp
-#if defined(_MSC_VER) && defined(setjmp) && \
- !defined(setjmp_undefined_for_msvc)
-# pragma push_macro("setjmp")
-# undef setjmp
-# define setjmp_undefined_for_msvc
-#endif
-
- case Intrinsic::setjmp:
-
-#if defined(_MSC_VER) && defined(setjmp_undefined_for_msvc)
- // let's return it to _setjmp state
-# pragma pop_macro("setjmp")
-# undef setjmp_undefined_for_msvc
-#endif
-
- case Intrinsic::longjmp:
-
- // Exclude eh_sjlj_setjmp; we don't need to exclude eh_sjlj_longjmp
- // because, although it does clobber the counter register, the
- // control can't then return to inside the loop unless there is also
- // an eh_sjlj_setjmp.
- case Intrinsic::eh_sjlj_setjmp:
-
- case Intrinsic::memcpy:
- case Intrinsic::memmove:
- case Intrinsic::memset:
- case Intrinsic::powi:
- case Intrinsic::log:
- case Intrinsic::log2:
- case Intrinsic::log10:
- case Intrinsic::exp:
- case Intrinsic::exp2:
- case Intrinsic::pow:
- case Intrinsic::sin:
- case Intrinsic::cos:
- return true;
- case Intrinsic::copysign:
- if (CI->getArgOperand(0)->getType()->getScalarType()->
- isPPC_FP128Ty())
- return true;
- else
- continue; // ISD::FCOPYSIGN is never a library call.
- case Intrinsic::sqrt: Opcode = ISD::FSQRT; break;
- case Intrinsic::floor: Opcode = ISD::FFLOOR; break;
- case Intrinsic::ceil: Opcode = ISD::FCEIL; break;
- case Intrinsic::trunc: Opcode = ISD::FTRUNC; break;
- case Intrinsic::rint: Opcode = ISD::FRINT; break;
- case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break;
- case Intrinsic::round: Opcode = ISD::FROUND; break;
- case Intrinsic::minnum: Opcode = ISD::FMINNUM; break;
- case Intrinsic::maxnum: Opcode = ISD::FMAXNUM; break;
- case Intrinsic::umul_with_overflow: Opcode = ISD::UMULO; break;
- case Intrinsic::smul_with_overflow: Opcode = ISD::SMULO; break;
- }
- }
-
- // PowerPC does not use [US]DIVREM or other library calls for
- // operations on regular types which are not otherwise library calls
- // (i.e. soft float or atomics). If adapting for targets that do,
- // additional care is required here.
-
- LibFunc Func;
- if (!F->hasLocalLinkage() && F->hasName() && LibInfo &&
- LibInfo->getLibFunc(F->getName(), Func) &&
- LibInfo->hasOptimizedCodeGen(Func)) {
- // Non-read-only functions are never treated as intrinsics.
- if (!CI->onlyReadsMemory())
- return true;
-
- // Conversion happens only for FP calls.
- if (!CI->getArgOperand(0)->getType()->isFloatingPointTy())
- return true;
-
- switch (Func) {
- default: return true;
- case LibFunc_copysign:
- case LibFunc_copysignf:
- continue; // ISD::FCOPYSIGN is never a library call.
- case LibFunc_copysignl:
- return true;
- case LibFunc_fabs:
- case LibFunc_fabsf:
- case LibFunc_fabsl:
- continue; // ISD::FABS is never a library call.
- case LibFunc_sqrt:
- case LibFunc_sqrtf:
- case LibFunc_sqrtl:
- Opcode = ISD::FSQRT; break;
- case LibFunc_floor:
- case LibFunc_floorf:
- case LibFunc_floorl:
- Opcode = ISD::FFLOOR; break;
- case LibFunc_nearbyint:
- case LibFunc_nearbyintf:
- case LibFunc_nearbyintl:
- Opcode = ISD::FNEARBYINT; break;
- case LibFunc_ceil:
- case LibFunc_ceilf:
- case LibFunc_ceill:
- Opcode = ISD::FCEIL; break;
- case LibFunc_rint:
- case LibFunc_rintf:
- case LibFunc_rintl:
- Opcode = ISD::FRINT; break;
- case LibFunc_round:
- case LibFunc_roundf:
- case LibFunc_roundl:
- Opcode = ISD::FROUND; break;
- case LibFunc_trunc:
- case LibFunc_truncf:
- case LibFunc_truncl:
- Opcode = ISD::FTRUNC; break;
- case LibFunc_fmin:
- case LibFunc_fminf:
- case LibFunc_fminl:
- Opcode = ISD::FMINNUM; break;
- case LibFunc_fmax:
- case LibFunc_fmaxf:
- case LibFunc_fmaxl:
- Opcode = ISD::FMAXNUM; break;
- }
- }
-
- if (Opcode) {
- EVT EVTy =
- TLI->getValueType(*DL, CI->getArgOperand(0)->getType(), true);
-
- if (EVTy == MVT::Other)
- return true;
-
- if (TLI->isOperationLegalOrCustom(Opcode, EVTy))
- continue;
- else if (EVTy.isVector() &&
- TLI->isOperationLegalOrCustom(Opcode, EVTy.getScalarType()))
- continue;
-
- return true;
- }
- }
-
- return true;
- } else if (isa<BinaryOperator>(J) &&
- J->getType()->getScalarType()->isPPC_FP128Ty()) {
- // Most operations on ppc_f128 values become calls.
- return true;
- } else if (isa<UIToFPInst>(J) || isa<SIToFPInst>(J) ||
- isa<FPToUIInst>(J) || isa<FPToSIInst>(J)) {
- CastInst *CI = cast<CastInst>(J);
- if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() ||
- CI->getDestTy()->getScalarType()->isPPC_FP128Ty() ||
- isLargeIntegerTy(!TM->isPPC64(), CI->getSrcTy()->getScalarType()) ||
- isLargeIntegerTy(!TM->isPPC64(), CI->getDestTy()->getScalarType()))
- return true;
- } else if (isLargeIntegerTy(!TM->isPPC64(),
- J->getType()->getScalarType()) &&
- (J->getOpcode() == Instruction::UDiv ||
- J->getOpcode() == Instruction::SDiv ||
- J->getOpcode() == Instruction::URem ||
- J->getOpcode() == Instruction::SRem)) {
- return true;
- } else if (!TM->isPPC64() &&
- isLargeIntegerTy(false, J->getType()->getScalarType()) &&
- (J->getOpcode() == Instruction::Shl ||
- J->getOpcode() == Instruction::AShr ||
- J->getOpcode() == Instruction::LShr)) {
- // Only on PPC32, for 128-bit integers (specifically not 64-bit
- // integers), these might be runtime calls.
- return true;
- } else if (isa<IndirectBrInst>(J) || isa<InvokeInst>(J)) {
- // On PowerPC, indirect jumps use the counter register.
- return true;
- } else if (SwitchInst *SI = dyn_cast<SwitchInst>(J)) {
- if (SI->getNumCases() + 1 >= (unsigned)TLI->getMinimumJumpTableEntries())
- return true;
- }
-
- // FREM is always a call.
- if (J->getOpcode() == Instruction::FRem)
- return true;
-
- if (STI->useSoftFloat()) {
- switch(J->getOpcode()) {
- case Instruction::FAdd:
- case Instruction::FSub:
- case Instruction::FMul:
- case Instruction::FDiv:
- case Instruction::FPTrunc:
- case Instruction::FPExt:
- case Instruction::FPToUI:
- case Instruction::FPToSI:
- case Instruction::UIToFP:
- case Instruction::SIToFP:
- case Instruction::FCmp:
- return true;
- }
- }
-
- for (Value *Operand : J->operands())
- if (memAddrUsesCTR(*TM, Operand))
- return true;
- }
-
- return false;
-}
-bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
- bool MadeChange = false;
-
- // Do not convert small short loops to CTR loop.
- unsigned ConstTripCount = SE->getSmallConstantTripCount(L);
- if (ConstTripCount && ConstTripCount < SmallCTRLoopThreshold) {
- SmallPtrSet<const Value *, 32> EphValues;
- auto AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
- *L->getHeader()->getParent());
- CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
- CodeMetrics Metrics;
- for (BasicBlock *BB : L->blocks())
- Metrics.analyzeBasicBlock(BB, *TTI, EphValues);
- // 6 is an approximate latency for the mtctr instruction.
- if (Metrics.NumInsts <= (6 * SchedModel.getIssueWidth()))
- return false;
- }
-
- // Process nested loops first.
- for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
- MadeChange |= convertToCTRLoop(*I);
- LLVM_DEBUG(dbgs() << "Nested loop converted\n");
- }
-
- // If a nested loop has been converted, then we can't convert this loop.
- if (MadeChange)
- return MadeChange;
-
- // Bail out if the loop has irreducible control flow.
- LoopBlocksRPO RPOT(L);
- RPOT.perform(LI);
- if (containsIrreducibleCFG<const BasicBlock *>(RPOT, *LI))
- return false;
-
-#ifndef NDEBUG
- // Stop trying after reaching the limit (if any).
- int Limit = CTRLoopLimit;
- if (Limit >= 0) {
- if (Counter >= CTRLoopLimit)
- return false;
- Counter++;
- }
-#endif
-
- // We don't want to spill/restore the counter register, and so we don't
- // want to use the counter register if the loop contains calls.
- for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
- I != IE; ++I)
- if (mightUseCTR(*I))
- return MadeChange;
-
- SmallVector<BasicBlock*, 4> ExitingBlocks;
- L->getExitingBlocks(ExitingBlocks);
-
- // If there is an exit edge known to be frequently taken,
- // we should not transform this loop.
- for (auto &BB : ExitingBlocks) {
- Instruction *TI = BB->getTerminator();
- if (!TI) continue;
-
- if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
- uint64_t TrueWeight = 0, FalseWeight = 0;
- if (!BI->isConditional() ||
- !BI->extractProfMetadata(TrueWeight, FalseWeight))
- continue;
-
- // If the exit path is more frequent than the loop path,
- // we return here without further analysis for this loop.
- bool TrueIsExit = !L->contains(BI->getSuccessor(0));
- if (( TrueIsExit && FalseWeight < TrueWeight) ||
- (!TrueIsExit && FalseWeight > TrueWeight))
- return MadeChange;
- }
- }
-
- BasicBlock *CountedExitBlock = nullptr;
- const SCEV *ExitCount = nullptr;
- BranchInst *CountedExitBranch = nullptr;
- for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(),
- IE = ExitingBlocks.end(); I != IE; ++I) {
- const SCEV *EC = SE->getExitCount(L, *I);
- LLVM_DEBUG(dbgs() << "Exit Count for " << *L << " from block "
- << (*I)->getName() << ": " << *EC << "\n");
- if (isa<SCEVCouldNotCompute>(EC))
- continue;
- if (const SCEVConstant *ConstEC = dyn_cast<SCEVConstant>(EC)) {
- if (ConstEC->getValue()->isZero())
- continue;
- } else if (!SE->isLoopInvariant(EC, L))
- continue;
-
- if (SE->getTypeSizeInBits(EC->getType()) > (TM->isPPC64() ? 64 : 32))
- continue;
-
- // If this exiting block is contained in a nested loop, it is not eligible
- // for insertion of the branch-and-decrement since the inner loop would
- // end up messing up the value in the CTR.
- if (LI->getLoopFor(*I) != L)
- continue;
-
- // We now have a loop-invariant count of loop iterations (which is not the
- // constant zero) for which we know that this loop will not exit via this
- // existing block.
-
- // We need to make sure that this block will run on every loop iteration.
- // For this to be true, we must dominate all blocks with backedges. Such
- // blocks are in-loop predecessors to the header block.
- bool NotAlways = false;
- for (pred_iterator PI = pred_begin(L->getHeader()),
- PIE = pred_end(L->getHeader()); PI != PIE; ++PI) {
- if (!L->contains(*PI))
- continue;
-
- if (!DT->dominates(*I, *PI)) {
- NotAlways = true;
- break;
- }
- }
-
- if (NotAlways)
- continue;
-
- // Make sure this blocks ends with a conditional branch.
- Instruction *TI = (*I)->getTerminator();
- if (!TI)
- continue;
-
- if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
- if (!BI->isConditional())
- continue;
-
- CountedExitBranch = BI;
- } else
- continue;
-
- // Note that this block may not be the loop latch block, even if the loop
- // has a latch block.
- CountedExitBlock = *I;
- ExitCount = EC;
- break;
- }
-
- if (!CountedExitBlock)
- return MadeChange;
-
- BasicBlock *Preheader = L->getLoopPreheader();
-
- // If we don't have a preheader, then insert one. If we already have a
- // preheader, then we can use it (except if the preheader contains a use of
- // the CTR register because some such uses might be reordered by the
- // selection DAG after the mtctr instruction).
- if (!Preheader || mightUseCTR(Preheader))
- Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA);
- if (!Preheader)
- return MadeChange;
-
- LLVM_DEBUG(dbgs() << "Preheader for exit count: " << Preheader->getName()
- << "\n");
-
- // Insert the count into the preheader and replace the condition used by the
- // selected branch.
- MadeChange = true;
-
- SCEVExpander SCEVE(*SE, *DL, "loopcnt");
- LLVMContext &C = SE->getContext();
- Type *CountType = TM->isPPC64() ? Type::getInt64Ty(C) : Type::getInt32Ty(C);
- if (!ExitCount->getType()->isPointerTy() &&
- ExitCount->getType() != CountType)
- ExitCount = SE->getZeroExtendExpr(ExitCount, CountType);
- ExitCount = SE->getAddExpr(ExitCount, SE->getOne(CountType));
- Value *ECValue =
- SCEVE.expandCodeFor(ExitCount, CountType, Preheader->getTerminator());
-
- IRBuilder<> CountBuilder(Preheader->getTerminator());
- Module *M = Preheader->getParent()->getParent();
- Value *MTCTRFunc = Intrinsic::getDeclaration(M, Intrinsic::ppc_mtctr,
- CountType);
- CountBuilder.CreateCall(MTCTRFunc, ECValue);
-
- IRBuilder<> CondBuilder(CountedExitBranch);
- Value *DecFunc =
- Intrinsic::getDeclaration(M, Intrinsic::ppc_is_decremented_ctr_nonzero);
- Value *NewCond = CondBuilder.CreateCall(DecFunc, {});
- Value *OldCond = CountedExitBranch->getCondition();
- CountedExitBranch->setCondition(NewCond);
-
- // The false branch must exit the loop.
- if (!L->contains(CountedExitBranch->getSuccessor(0)))
- CountedExitBranch->swapSuccessors();
-
- // The old condition may be dead now, and may have even created a dead PHI
- // (the original induction variable).
- RecursivelyDeleteTriviallyDeadInstructions(OldCond);
- // Run through the basic blocks of the loop and see if any of them have dead
- // PHIs that can be removed.
- for (auto I : L->blocks())
- DeleteDeadPHIs(I);
-
- ++NumCTRLoops;
- return MadeChange;
-}
-
#ifndef NDEBUG
static bool clobbersCTR(const MachineInstr &MI) {
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
diff --git a/lib/Target/PowerPC/PPCCallingConv.cpp b/lib/Target/PowerPC/PPCCallingConv.cpp
new file mode 100644
index 000000000000..77cdf5c939dc
--- /dev/null
+++ b/lib/Target/PowerPC/PPCCallingConv.cpp
@@ -0,0 +1,162 @@
+//===-- PPCCallingConv.h - --------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCRegisterInfo.h"
+#include "PPCCallingConv.h"
+#include "PPCSubtarget.h"
+#include "PPCCCState.h"
+using namespace llvm;
+
+inline bool CC_PPC_AnyReg_Error(unsigned &, MVT &, MVT &,
+ CCValAssign::LocInfo &, ISD::ArgFlagsTy &,
+ CCState &) {
+ llvm_unreachable("The AnyReg calling convention is only supported by the " \
+ "stackmap and patchpoint intrinsics.");
+ // gracefully fallback to PPC C calling convention on Release builds.
+ return false;
+}
+
+static bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ return true;
+}
+
+static bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ static const MCPhysReg ArgRegs[] = {
+ PPC::R3, PPC::R4, PPC::R5, PPC::R6,
+ PPC::R7, PPC::R8, PPC::R9, PPC::R10,
+ };
+ const unsigned NumArgRegs = array_lengthof(ArgRegs);
+
+ unsigned RegNum = State.getFirstUnallocated(ArgRegs);
+
+ // Skip one register if the first unallocated register has an even register
+ // number and there are still argument registers available which have not been
+ // allocated yet. RegNum is actually an index into ArgRegs, which means we
+ // need to skip a register if RegNum is odd.
+ if (RegNum != NumArgRegs && RegNum % 2 == 1) {
+ State.AllocateReg(ArgRegs[RegNum]);
+ }
+
+ // Always return false here, as this function only makes sure that the first
+ // unallocated register has an odd register number and does not actually
+ // allocate a register for the current argument.
+ return false;
+}
+
+static bool CC_PPC32_SVR4_Custom_SkipLastArgRegsPPCF128(
+ unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+ static const MCPhysReg ArgRegs[] = {
+ PPC::R3, PPC::R4, PPC::R5, PPC::R6,
+ PPC::R7, PPC::R8, PPC::R9, PPC::R10,
+ };
+ const unsigned NumArgRegs = array_lengthof(ArgRegs);
+
+ unsigned RegNum = State.getFirstUnallocated(ArgRegs);
+ int RegsLeft = NumArgRegs - RegNum;
+
+ // Skip if there is not enough registers left for long double type (4 gpr regs
+ // in soft float mode) and put long double argument on the stack.
+ if (RegNum != NumArgRegs && RegsLeft < 4) {
+ for (int i = 0; i < RegsLeft; i++) {
+ State.AllocateReg(ArgRegs[RegNum + i]);
+ }
+ }
+
+ return false;
+}
+
+static bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ static const MCPhysReg ArgRegs[] = {
+ PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
+ PPC::F8
+ };
+
+ const unsigned NumArgRegs = array_lengthof(ArgRegs);
+
+ unsigned RegNum = State.getFirstUnallocated(ArgRegs);
+
+ // If there is only one Floating-point register left we need to put both f64
+ // values of a split ppc_fp128 value on the stack.
+ if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) {
+ State.AllocateReg(ArgRegs[RegNum]);
+ }
+
+ // Always return false here, as this function only makes sure that the two f64
+ // values a ppc_fp128 value is split into are both passed in registers or both
+ // passed on the stack and does not actually allocate a register for the
+ // current argument.
+ return false;
+}
+
+// Split F64 arguments into two 32-bit consecutive registers.
+static bool CC_PPC32_SPE_CustomSplitFP64(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ static const MCPhysReg HiRegList[] = { PPC::R3, PPC::R5, PPC::R7, PPC::R9 };
+ static const MCPhysReg LoRegList[] = { PPC::R4, PPC::R6, PPC::R8, PPC::R10 };
+
+ // Try to get the first register.
+ unsigned Reg = State.AllocateReg(HiRegList);
+ if (!Reg)
+ return false;
+
+ unsigned i;
+ for (i = 0; i < sizeof(HiRegList) / sizeof(HiRegList[0]); ++i)
+ if (HiRegList[i] == Reg)
+ break;
+
+ unsigned T = State.AllocateReg(LoRegList[i]);
+ (void)T;
+ assert(T == LoRegList[i] && "Could not allocate register");
+
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
+ LocVT, LocInfo));
+ return true;
+}
+
+// Same as above, but for return values, so only allocate for R3 and R4
+static bool CC_PPC32_SPE_RetF64(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ static const MCPhysReg HiRegList[] = { PPC::R3 };
+ static const MCPhysReg LoRegList[] = { PPC::R4 };
+
+ // Try to get the first register.
+ unsigned Reg = State.AllocateReg(HiRegList, LoRegList);
+ if (!Reg)
+ return false;
+
+ unsigned i;
+ for (i = 0; i < sizeof(HiRegList) / sizeof(HiRegList[0]); ++i)
+ if (HiRegList[i] == Reg)
+ break;
+
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
+ LocVT, LocInfo));
+ return true;
+}
+
+#include "PPCGenCallingConv.inc"
diff --git a/lib/Target/PowerPC/PPCCallingConv.h b/lib/Target/PowerPC/PPCCallingConv.h
index eb904a858592..03d9be0a73d9 100644
--- a/lib/Target/PowerPC/PPCCallingConv.h
+++ b/lib/Target/PowerPC/PPCCallingConv.h
@@ -1,9 +1,8 @@
//=== PPCCallingConv.h - PPC Custom Calling Convention Routines -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -20,14 +19,27 @@
namespace llvm {
-inline bool CC_PPC_AnyReg_Error(unsigned &, MVT &, MVT &,
- CCValAssign::LocInfo &, ISD::ArgFlagsTy &,
- CCState &) {
- llvm_unreachable("The AnyReg calling convention is only supported by the " \
- "stackmap and patchpoint intrinsics.");
- // gracefully fallback to PPC C calling convention on Release builds.
- return false;
-}
+bool RetCC_PPC(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
+ CCState &State);
+bool RetCC_PPC64_ELF_FIS(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
+ CCState &State);
+bool RetCC_PPC_Cold(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
+ CCState &State);
+bool CC_PPC32_SVR4(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
+ CCState &State);
+bool CC_PPC64_ELF_FIS(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
+ CCState &State);
+bool CC_PPC32_SVR4_ByVal(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
+ CCState &State);
+bool CC_PPC32_SVR4_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State);
} // End llvm namespace
diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td
index 22842d516e7d..369b9ce1a711 100644
--- a/lib/Target/PowerPC/PPCCallingConv.td
+++ b/lib/Target/PowerPC/PPCCallingConv.td
@@ -1,9 +1,8 @@
//===- PPCCallingConv.td - Calling Conventions for PowerPC -*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -46,6 +45,7 @@ def RetCC_PPC64_AnyReg : CallingConv<[
]>;
// Return-value convention for PowerPC coldcc.
+let Entry = 1 in
def RetCC_PPC_Cold : CallingConv<[
// Use the same return registers as RetCC_PPC, but limited to only
// one return value. The remaining return values will be saved to
@@ -70,6 +70,7 @@ def RetCC_PPC_Cold : CallingConv<[
]>;
// Return-value convention for PowerPC
+let Entry = 1 in
def RetCC_PPC : CallingConv<[
CCIfCC<"CallingConv::AnyReg", CCDelegateTo<RetCC_PPC64_AnyReg>>,
@@ -90,7 +91,7 @@ def RetCC_PPC : CallingConv<[
CCIfSubtarget<"hasSPE()",
CCIfType<[f32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>>,
CCIfSubtarget<"hasSPE()",
- CCIfType<[f64], CCAssignToReg<[S3, S4, S5, S6, S7, S8, S9, S10]>>>,
+ CCIfType<[f64], CCCustom<"CC_PPC32_SPE_RetF64">>>,
// For P9, f128 are passed in vector registers.
CCIfType<[f128],
@@ -126,6 +127,7 @@ def CC_PPC64_AnyReg : CallingConv<[
// Simple calling convention for 64-bit ELF PowerPC fast isel.
// Only handle ints and floats. All ints are promoted to i64.
// Vector types and quadword ints are not handled.
+let Entry = 1 in
def CC_PPC64_ELF_FIS : CallingConv<[
CCIfCC<"CallingConv::AnyReg", CCDelegateTo<CC_PPC64_AnyReg>>,
@@ -141,6 +143,7 @@ def CC_PPC64_ELF_FIS : CallingConv<[
// All small ints are promoted to i64. Vector types, quadword ints,
// and multiple register returns are "supported" to avoid compile
// errors, but none are handled by the fast selector.
+let Entry = 1 in
def RetCC_PPC64_ELF_FIS : CallingConv<[
CCIfCC<"CallingConv::AnyReg", CCDelegateTo<RetCC_PPC64_AnyReg>>,
@@ -179,6 +182,9 @@ def CC_PPC32_SVR4_Common : CallingConv<[
CCIfType<[i32],
CCIfSplit<CCIfNotSubtarget<"useSoftFloat()",
CCCustom<"CC_PPC32_SVR4_Custom_AlignArgRegs">>>>,
+ CCIfType<[f64],
+ CCIfSubtarget<"hasSPE()",
+ CCCustom<"CC_PPC32_SVR4_Custom_AlignArgRegs">>>,
CCIfSplit<CCIfSubtarget<"useSoftFloat()",
CCIfOrigArgWasPPCF128<CCCustom<
"CC_PPC32_SVR4_Custom_SkipLastArgRegsPPCF128">>>>,
@@ -199,7 +205,7 @@ def CC_PPC32_SVR4_Common : CallingConv<[
CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>>,
CCIfType<[f64],
CCIfSubtarget<"hasSPE()",
- CCAssignToReg<[S3, S4, S5, S6, S7, S8, S9, S10]>>>,
+ CCCustom<"CC_PPC32_SPE_CustomSplitFP64">>>,
CCIfType<[f32],
CCIfSubtarget<"hasSPE()",
CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>>,
@@ -228,12 +234,14 @@ def CC_PPC32_SVR4_Common : CallingConv<[
// This calling convention puts vector arguments always on the stack. It is used
// to assign vector arguments which belong to the variable portion of the
// parameter list of a variable argument function.
+let Entry = 1 in
def CC_PPC32_SVR4_VarArg : CallingConv<[
CCDelegateTo<CC_PPC32_SVR4_Common>
]>;
// In contrast to CC_PPC32_SVR4_VarArg, this calling convention first tries to
// put vector arguments in vector registers before putting them on the stack.
+let Entry = 1 in
def CC_PPC32_SVR4 : CallingConv<[
// QPX vectors mirror the scalar FP convention.
CCIfType<[v4f64, v4f32, v4i1], CCIfSubtarget<"hasQPX()",
@@ -265,6 +273,7 @@ def CC_PPC32_SVR4 : CallingConv<[
// The only purpose of CC_PPC32_SVR4_Custom_Dummy is to skip arguments which are
// not passed by value.
+let Entry = 1 in
def CC_PPC32_SVR4_ByVal : CallingConv<[
CCIfByVal<CCPassByVal<4, 4>>,
@@ -300,6 +309,13 @@ def CSR_SVR432_Altivec : CalleeSavedRegs<(add CSR_SVR432, CSR_Altivec)>;
def CSR_SVR432_SPE : CalleeSavedRegs<(add CSR_SVR432_COMM, CSR_SPE)>;
+def CSR_AIX32 : CalleeSavedRegs<(add R13, R14, R15, R16, R17, R18, R19, R20,
+ R21, R22, R23, R24, R25, R26, R27, R28,
+ R29, R30, R31, F14, F15, F16, F17, F18,
+ F19, F20, F21, F22, F23, F24, F25, F26,
+ F27, F28, F29, F30, F31, CR2, CR3, CR4
+ )>;
+
def CSR_Darwin64 : CalleeSavedRegs<(add X13, X14, X15, X16, X17, X18, X19, X20,
X21, X22, X23, X24, X25, X26, X27, X28,
X29, X30, X31, F14, F15, F16, F17, F18,
@@ -316,6 +332,13 @@ def CSR_SVR464 : CalleeSavedRegs<(add X14, X15, X16, X17, X18, X19, X20,
F27, F28, F29, F30, F31, CR2, CR3, CR4
)>;
+def CSR_AIX64 : CalleeSavedRegs<(add X14, X15, X16, X17, X18, X19, X20,
+ X21, X22, X23, X24, X25, X26, X27, X28,
+ X29, X30, X31, F14, F15, F16, F17, F18,
+ F19, F20, F21, F22, F23, F24, F25, F26,
+ F27, F28, F29, F30, F31, CR2, CR3, CR4
+ )>;
+
// CSRs that are handled by prologue, epilogue.
def CSR_SRV464_TLS_PE : CalleeSavedRegs<(add)>;
@@ -343,15 +366,22 @@ def CSR_NoRegs : CalleeSavedRegs<(add)>;
// and value may be altered by inter-library calls.
// Do not include r12 as it is used as a scratch register.
// Do not include return registers r3, f1, v2.
-def CSR_SVR32_ColdCC : CalleeSavedRegs<(add (sequence "R%u", 4, 10),
- (sequence "R%u", 14, 31),
- F0, (sequence "F%u", 2, 31),
- (sequence "CR%u", 0, 7))>;
+def CSR_SVR32_ColdCC_Common : CalleeSavedRegs<(add (sequence "R%u", 4, 10),
+ (sequence "R%u", 14, 31),
+ (sequence "CR%u", 0, 7))>;
+
+def CSR_SVR32_ColdCC : CalleeSavedRegs<(add CSR_SVR32_ColdCC_Common,
+ F0, (sequence "F%u", 2, 31))>;
+
def CSR_SVR32_ColdCC_Altivec : CalleeSavedRegs<(add CSR_SVR32_ColdCC,
(sequence "V%u", 0, 1),
(sequence "V%u", 3, 31))>;
+def CSR_SVR32_ColdCC_SPE : CalleeSavedRegs<(add CSR_SVR32_ColdCC_Common,
+ (sequence "S%u", 4, 10),
+ (sequence "S%u", 14, 31))>;
+
def CSR_SVR64_ColdCC : CalleeSavedRegs<(add (sequence "X%u", 4, 10),
(sequence "X%u", 14, 31),
F0, (sequence "F%u", 2, 31),
diff --git a/lib/Target/PowerPC/PPCEarlyReturn.cpp b/lib/Target/PowerPC/PPCEarlyReturn.cpp
index ac931f7d0ec0..aa5d830b549e 100644
--- a/lib/Target/PowerPC/PPCEarlyReturn.cpp
+++ b/lib/Target/PowerPC/PPCEarlyReturn.cpp
@@ -1,9 +1,8 @@
//===------------- PPCEarlyReturn.cpp - Form Early Returns ----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -37,10 +36,6 @@ using namespace llvm;
STATISTIC(NumBCLR, "Number of early conditional returns");
STATISTIC(NumBLR, "Number of early returns");
-namespace llvm {
- void initializePPCEarlyReturnPass(PassRegistry&);
-}
-
namespace {
// PPCEarlyReturn pass - For simple functions without epilogue code, move
// returns up, and create conditional returns, to avoid unnecessary
@@ -184,11 +179,11 @@ public:
// nothing to do.
if (MF.size() < 2)
return Changed;
-
- for (MachineFunction::iterator I = MF.begin(); I != MF.end();) {
+
+ // We can't use a range-based for loop due to clobbering the iterator.
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E;) {
MachineBasicBlock &B = *I++;
- if (processBlock(B))
- Changed = true;
+ Changed |= processBlock(B);
}
return Changed;
diff --git a/lib/Target/PowerPC/PPCExpandISEL.cpp b/lib/Target/PowerPC/PPCExpandISEL.cpp
index a03e691ef5bb..e8ef451c7ec9 100644
--- a/lib/Target/PowerPC/PPCExpandISEL.cpp
+++ b/lib/Target/PowerPC/PPCExpandISEL.cpp
@@ -1,9 +1,8 @@
//===------------- PPCExpandISEL.cpp - Expand ISEL instruction ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp
index 3b2d92db78b9..264d6b590f95 100644
--- a/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/lib/Target/PowerPC/PPCFastISel.cpp
@@ -1,9 +1,8 @@
//===-- PPCFastISel.cpp - PowerPC FastISel implementation -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -152,6 +151,14 @@ class PPCFastISel final : public FastISel {
bool isVSSRCRegClass(const TargetRegisterClass *RC) const {
return RC->getID() == PPC::VSSRCRegClassID;
}
+ unsigned copyRegToRegClass(const TargetRegisterClass *ToRC,
+ unsigned SrcReg, unsigned Flag = 0,
+ unsigned SubReg = 0) {
+ unsigned TmpReg = createResultReg(ToRC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), TmpReg).addReg(SrcReg, Flag, SubReg);
+ return TmpReg;
+ }
bool PPCEmitCmp(const Value *Src1Value, const Value *Src2Value,
bool isZExt, unsigned DestReg,
const PPC::Predicate Pred);
@@ -187,7 +194,6 @@ class PPCFastISel final : public FastISel {
unsigned &NumBytes,
bool IsVarArg);
bool finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes);
- LLVM_ATTRIBUTE_UNUSED CCAssignFn *usePPC32CCs(unsigned Flag);
private:
#include "PPCGenFastISel.inc"
@@ -196,23 +202,6 @@ class PPCFastISel final : public FastISel {
} // end anonymous namespace
-#include "PPCGenCallingConv.inc"
-
-// Function whose sole purpose is to kill compiler warnings
-// stemming from unused functions included from PPCGenCallingConv.inc.
-CCAssignFn *PPCFastISel::usePPC32CCs(unsigned Flag) {
- if (Flag == 1)
- return CC_PPC32_SVR4;
- else if (Flag == 2)
- return CC_PPC32_SVR4_ByVal;
- else if (Flag == 3)
- return CC_PPC32_SVR4_VarArg;
- else if (Flag == 4)
- return RetCC_PPC_Cold;
- else
- return RetCC_PPC;
-}
-
static Optional<PPC::Predicate> getComparePred(CmpInst::Predicate Pred) {
switch (Pred) {
// These are not representable with any single compare.
@@ -874,7 +863,10 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
unsigned CmpOpc;
bool NeedsExt = false;
- auto RC = MRI.getRegClass(SrcReg1);
+
+ auto RC1 = MRI.getRegClass(SrcReg1);
+ auto RC2 = SrcReg2 != 0 ? MRI.getRegClass(SrcReg2) : nullptr;
+
switch (SrcVT.SimpleTy) {
default: return false;
case MVT::f32:
@@ -893,12 +885,10 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
}
} else {
CmpOpc = PPC::FCMPUS;
- if (isVSSRCRegClass(RC)) {
- unsigned TmpReg = createResultReg(&PPC::F4RCRegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::COPY), TmpReg).addReg(SrcReg1);
- SrcReg1 = TmpReg;
- }
+ if (isVSSRCRegClass(RC1))
+ SrcReg1 = copyRegToRegClass(&PPC::F4RCRegClass, SrcReg1);
+ if (RC2 && isVSSRCRegClass(RC2))
+ SrcReg2 = copyRegToRegClass(&PPC::F4RCRegClass, SrcReg2);
}
break;
case MVT::f64:
@@ -915,7 +905,7 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
CmpOpc = PPC::EFDCMPGT;
break;
}
- } else if (isVSFRCRegClass(RC)) {
+ } else if (isVSFRCRegClass(RC1) || (RC2 && isVSFRCRegClass(RC2))) {
CmpOpc = PPC::XSCMPUDP;
} else {
CmpOpc = PPC::FCMPUD;
@@ -997,12 +987,17 @@ bool PPCFastISel::SelectFPTrunc(const Instruction *I) {
// Round the result to single precision.
unsigned DestReg;
-
+ auto RC = MRI.getRegClass(SrcReg);
if (PPCSubTarget->hasSPE()) {
DestReg = createResultReg(&PPC::SPE4RCRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(PPC::EFSCFD), DestReg)
.addReg(SrcReg);
+ } else if (isVSFRCRegClass(RC)) {
+ DestReg = createResultReg(&PPC::VSSRCRegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(PPC::XSRSP), DestReg)
+ .addReg(SrcReg);
} else {
DestReg = createResultReg(&PPC::F4RCRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
@@ -1217,21 +1212,19 @@ bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
if (SrcReg == 0)
return false;
- // Convert f32 to f64 if necessary. This is just a meaningless copy
- // to get the register class right.
+ // Convert f32 to f64 or convert VSSRC to VSFRC if necessary. This is just a
+ // meaningless copy to get the register class right.
const TargetRegisterClass *InRC = MRI.getRegClass(SrcReg);
- if (InRC == &PPC::F4RCRegClass) {
- unsigned TmpReg = createResultReg(&PPC::F8RCRegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::COPY), TmpReg)
- .addReg(SrcReg);
- SrcReg = TmpReg;
- }
+ if (InRC == &PPC::F4RCRegClass)
+ SrcReg = copyRegToRegClass(&PPC::F8RCRegClass, SrcReg);
+ else if (InRC == &PPC::VSSRCRegClass)
+ SrcReg = copyRegToRegClass(&PPC::VSFRCRegClass, SrcReg);
// Determine the opcode for the conversion, which takes place
- // entirely within FPRs.
+ // entirely within FPRs or VSRs.
unsigned DestReg;
unsigned Opc;
+ auto RC = MRI.getRegClass(SrcReg);
if (PPCSubTarget->hasSPE()) {
DestReg = createResultReg(&PPC::GPRCRegClass);
@@ -1239,6 +1232,12 @@ bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
Opc = InRC == &PPC::SPE4RCRegClass ? PPC::EFSCTSIZ : PPC::EFDCTSIZ;
else
Opc = InRC == &PPC::SPE4RCRegClass ? PPC::EFSCTUIZ : PPC::EFDCTUIZ;
+ } else if (isVSFRCRegClass(RC)) {
+ DestReg = createResultReg(&PPC::VSFRCRegClass);
+ if (DstVT == MVT::i32)
+ Opc = IsSigned ? PPC::XSCVDPSXWS : PPC::XSCVDPUXWS;
+ else
+ Opc = IsSigned ? PPC::XSCVDPSXDS : PPC::XSCVDPUXDS;
} else {
DestReg = createResultReg(&PPC::F8RCRegClass);
if (DstVT == MVT::i32)
@@ -1520,11 +1519,7 @@ bool PPCFastISel::finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumByte
if (RetVT == CopyVT) {
const TargetRegisterClass *CpyRC = TLI.getRegClassFor(CopyVT);
- ResultReg = createResultReg(CpyRC);
-
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::COPY), ResultReg)
- .addReg(SourcePhysReg);
+ ResultReg = copyRegToRegClass(CpyRC, SourcePhysReg);
// If necessary, round the floating result to single precision.
} else if (CopyVT == MVT::f64) {
@@ -1537,12 +1532,9 @@ bool PPCFastISel::finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumByte
// used along the fast-isel path (not lowered), and downstream logic
// also doesn't like a direct subreg copy on a physical reg.)
} else if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32) {
- ResultReg = createResultReg(&PPC::GPRCRegClass);
// Convert physical register from G8RC to GPRC.
SourcePhysReg -= PPC::X0 - PPC::R0;
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::COPY), ResultReg)
- .addReg(SourcePhysReg);
+ ResultReg = copyRegToRegClass(&PPC::GPRCRegClass, SourcePhysReg);
}
assert(ResultReg && "ResultReg unset!");
@@ -1894,13 +1886,8 @@ bool PPCFastISel::SelectTrunc(const Instruction *I) {
return false;
// The only interesting case is when we need to switch register classes.
- if (SrcVT == MVT::i64) {
- unsigned ResultReg = createResultReg(&PPC::GPRCRegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::COPY),
- ResultReg).addReg(SrcReg, 0, PPC::sub_32);
- SrcReg = ResultReg;
- }
+ if (SrcVT == MVT::i64)
+ SrcReg = copyRegToRegClass(&PPC::GPRCRegClass, SrcReg, 0, PPC::sub_32);
updateValueMap(I, SrcReg);
return true;
@@ -1977,6 +1964,13 @@ bool PPCFastISel::fastSelectInstruction(const Instruction *I) {
case Instruction::Sub:
return SelectBinaryIntOp(I, ISD::SUB);
case Instruction::Call:
+ // On AIX, call lowering uses the DAG-ISEL path currently so that the
+ // callee of the direct function call instruction will be mapped to the
+ // symbol for the function's entry point, which is distinct from the
+ // function descriptor symbol. The latter is the symbol whose XCOFF symbol
+ // name is the C-linkage name of the source level function.
+ if (TM.getTargetTriple().isOSAIX())
+ break;
return selectCall(I);
case Instruction::Ret:
return SelectRet(I);
diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp
index 8263954994d2..ebfb1ef7f49b 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -1,9 +1,8 @@
//===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -30,7 +29,6 @@
using namespace llvm;
#define DEBUG_TYPE "framelowering"
-STATISTIC(NumNoNeedForFrame, "Number of functions without frames");
STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue");
STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue");
@@ -73,10 +71,10 @@ static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) {
}
static unsigned computeLinkageSize(const PPCSubtarget &STI) {
- if (STI.isDarwinABI() || STI.isPPC64())
+ if ((STI.isDarwinABI() || STI.isAIXABI()) || STI.isPPC64())
return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4);
- // SVR4 ABI:
+ // 32-bit SVR4 ABI:
return 8;
}
@@ -446,12 +444,27 @@ static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired();
}
+/// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum
+/// call frame size. Update the MachineFunction object with the stack size.
+unsigned
+PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF,
+ bool UseEstimate) const {
+ unsigned NewMaxCallFrameSize = 0;
+ unsigned FrameSize = determineFrameLayout(MF, UseEstimate,
+ &NewMaxCallFrameSize);
+ MF.getFrameInfo().setStackSize(FrameSize);
+ MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize);
+ return FrameSize;
+}
+
/// determineFrameLayout - Determine the size of the frame and maximum call
/// frame size.
-unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
- bool UpdateMF,
- bool UseEstimate) const {
- MachineFrameInfo &MFI = MF.getFrameInfo();
+unsigned
+PPCFrameLowering::determineFrameLayout(const MachineFunction &MF,
+ bool UseEstimate,
+ unsigned *NewMaxCallFrameSize) const {
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
// Get the number of bytes to allocate from the FrameInfo
unsigned FrameSize =
@@ -469,6 +482,7 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca.
!MFI.adjustsStack() && // No calls.
!MustSaveLR(MF, LR) && // No need to save LR.
+ !FI->mustSaveTOC() && // No need to save TOC.
!RegInfo->hasBasePointer(MF); // No special alignment.
// Note: for PPC32 SVR4ABI (Non-DarwinABI), we can still generate stackless
@@ -477,10 +491,7 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
// Check whether we can skip adjusting the stack pointer (by using red zone)
if (!DisableRedZone && CanUseRedZone && FitsInRedZone) {
- NumNoNeedForFrame++;
// No need for frame
- if (UpdateMF)
- MFI.setStackSize(0);
return 0;
}
@@ -496,9 +507,9 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
if (MFI.hasVarSizedObjects())
maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask;
- // Update maximum call frame size.
- if (UpdateMF)
- MFI.setMaxCallFrameSize(maxCallFrameSize);
+ // Update the new max call frame size if the caller passes in a valid pointer.
+ if (NewMaxCallFrameSize)
+ *NewMaxCallFrameSize = maxCallFrameSize;
// Include call frame size in total.
FrameSize += maxCallFrameSize;
@@ -506,10 +517,6 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
// Make sure the frame is aligned.
FrameSize = (FrameSize + AlignMask) & ~AlignMask;
- // Update frame info.
- if (UpdateMF)
- MFI.setStackSize(FrameSize);
-
return FrameSize;
}
@@ -690,7 +697,7 @@ PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const {
const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
MachineFunction &MF = *(MBB->getParent());
bool HasBP = RegInfo->hasBasePointer(MF);
- unsigned FrameSize = determineFrameLayout(MF, false);
+ unsigned FrameSize = determineFrameLayout(MF);
int NegFrameSize = -FrameSize;
bool IsLargeFrame = !isInt<16>(NegFrameSize);
MachineFrameInfo &MFI = MF.getFrameInfo();
@@ -713,6 +720,50 @@ bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
return findScratchRegister(TmpMBB, true);
}
+bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const {
+ const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+
+ // Abort if there is no register info or function info.
+ if (!RegInfo || !FI)
+ return false;
+
+ // Only move the stack update on ELFv2 ABI and PPC64.
+ if (!Subtarget.isELFv2ABI() || !Subtarget.isPPC64())
+ return false;
+
+ // Check the frame size first and return false if it does not fit the
+ // requirements.
+ // We need a non-zero frame size as well as a frame that will fit in the red
+ // zone. This is because by moving the stack pointer update we are now storing
+ // to the red zone until the stack pointer is updated. If we get an interrupt
+ // inside the prologue but before the stack update we now have a number of
+ // stores to the red zone and those stores must all fit.
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ unsigned FrameSize = MFI.getStackSize();
+ if (!FrameSize || FrameSize > Subtarget.getRedZoneSize())
+ return false;
+
+ // Frame pointers and base pointers complicate matters so don't do anything
+ // if we have them. For example having a frame pointer will sometimes require
+ // a copy of r1 into r31 and that makes keeping track of updates to r1 more
+ // difficult.
+ if (hasFP(MF) || RegInfo->hasBasePointer(MF))
+ return false;
+
+ // Calls to fast_cc functions use different rules for passing parameters on
+ // the stack from the ABI and using PIC base in the function imposes
+ // similar restrictions to using the base pointer. It is not generally safe
+ // to move the stack pointer update in these situations.
+ if (FI->hasFastCall() || FI->usesPICBase())
+ return false;
+
+ // Finally we can move the stack update if we do not require register
+ // scavenging. Register scavenging can introduce more spills and so
+ // may make the frame size larger than we have computed.
+ return !RegInfo->requiresFrameIndexScavenging(MF);
+}
+
void PPCFrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.begin();
@@ -748,7 +799,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
MBBI = MBB.begin();
// Work out frame sizes.
- unsigned FrameSize = determineFrameLayout(MF);
+ unsigned FrameSize = determineFrameLayoutAndUpdate(MF);
int NegFrameSize = -FrameSize;
if (!isInt<32>(NegFrameSize))
llvm_unreachable("Unhandled stack size!");
@@ -759,6 +810,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
// Check if the link register (LR) must be saved.
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
bool MustSaveLR = FI->mustSaveLR();
+ bool MustSaveTOC = FI->mustSaveTOC();
const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs();
bool MustSaveCR = !MustSaveCRs.empty();
// Do we have a frame pointer and/or base pointer for this function?
@@ -770,6 +822,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
unsigned BPReg = RegInfo->getBaseRegister(MF);
unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31;
unsigned LRReg = isPPC64 ? PPC::LR8 : PPC::LR;
+ unsigned TOCReg = isPPC64 ? PPC::X2 : PPC::R2;
unsigned ScratchReg = 0;
unsigned TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
// ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.)
@@ -855,6 +908,45 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
assert((isPPC64 || !MustSaveCR) &&
"Prologue CR saving supported only in 64-bit mode");
+ // Check if we can move the stack update instruction (stdu) down the prologue
+ // past the callee saves. Hopefully this will avoid the situation where the
+ // saves are waiting for the update on the store with update to complete.
+ MachineBasicBlock::iterator StackUpdateLoc = MBBI;
+ bool MovingStackUpdateDown = false;
+
+ // Check if we can move the stack update.
+ if (stackUpdateCanBeMoved(MF)) {
+ const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo();
+ for (CalleeSavedInfo CSI : Info) {
+ int FrIdx = CSI.getFrameIdx();
+ // If the frame index is not negative the callee saved info belongs to a
+ // stack object that is not a fixed stack object. We ignore non-fixed
+ // stack objects because we won't move the stack update pointer past them.
+ if (FrIdx >= 0)
+ continue;
+
+ if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) {
+ StackUpdateLoc++;
+ MovingStackUpdateDown = true;
+ } else {
+ // We need all of the Frame Indices to meet these conditions.
+ // If they do not, abort the whole operation.
+ StackUpdateLoc = MBBI;
+ MovingStackUpdateDown = false;
+ break;
+ }
+ }
+
+ // If the operation was not aborted then update the object offset.
+ if (MovingStackUpdateDown) {
+ for (CalleeSavedInfo CSI : Info) {
+ int FrIdx = CSI.getFrameIdx();
+ if (FrIdx < 0)
+ MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize);
+ }
+ }
+ }
+
// If we need to spill the CR and the LR but we don't have two separate
// registers available, we must spill them one at a time
if (MustSaveCR && SingleScratchReg && MustSaveLR) {
@@ -918,7 +1010,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
}
if (MustSaveLR)
- BuildMI(MBB, MBBI, dl, StoreInst)
+ BuildMI(MBB, StackUpdateLoc, dl, StoreInst)
.addReg(ScratchReg, getKillRegState(true))
.addImm(LROffset)
.addReg(SPReg);
@@ -986,7 +1078,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
HasSTUX = true;
} else if (!isLargeFrame) {
- BuildMI(MBB, MBBI, dl, StoreUpdtInst, SPReg)
+ BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg)
.addReg(SPReg)
.addImm(NegFrameSize)
.addReg(SPReg);
@@ -1004,6 +1096,16 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
HasSTUX = true;
}
+ // Save the TOC register after the stack pointer update if a prologue TOC
+ // save is required for the function.
+ if (MustSaveTOC) {
+ assert(isELFv2ABI && "TOC saves in the prologue only supported on ELFv2");
+ BuildMI(MBB, StackUpdateLoc, dl, TII.get(PPC::STD))
+ .addReg(TOCReg, getKillRegState(true))
+ .addImm(TOCSaveOffset)
+ .addReg(SPReg);
+ }
+
if (!HasRedZone) {
assert(!isPPC64 && "A red zone is always available on PPC64");
if (HasSTUX) {
@@ -1205,6 +1307,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
if (PPC::CRBITRCRegClass.contains(Reg))
continue;
+ if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
+ continue;
+
// For SVR4, don't emit a move for the CR spill slot if we haven't
// spilled CRs.
if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4)
@@ -1234,6 +1339,12 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
.addCFIIndex(CFIRegister);
} else {
int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx());
+ // We have changed the object offset above but we do not want to change
+ // the actual offsets in the CFI instruction so we have to undo the
+ // offset change here.
+ if (MovingStackUpdateDown)
+ Offset -= NegFrameSize;
+
unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
nullptr, MRI->getDwarfRegNum(Reg, true), Offset));
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
@@ -1380,6 +1491,32 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
unsigned RBReg = SPReg;
unsigned SPAdd = 0;
+ // Check if we can move the stack update instruction up the epilogue
+ // past the callee saves. This will allow the move to LR instruction
+ // to be executed before the restores of the callee saves which means
+ // that the callee saves can hide the latency from the MTLR instrcution.
+ MachineBasicBlock::iterator StackUpdateLoc = MBBI;
+ if (stackUpdateCanBeMoved(MF)) {
+ const std::vector<CalleeSavedInfo> & Info = MFI.getCalleeSavedInfo();
+ for (CalleeSavedInfo CSI : Info) {
+ int FrIdx = CSI.getFrameIdx();
+ // If the frame index is not negative the callee saved info belongs to a
+ // stack object that is not a fixed stack object. We ignore non-fixed
+ // stack objects because we won't move the update of the stack pointer
+ // past them.
+ if (FrIdx >= 0)
+ continue;
+
+ if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0)
+ StackUpdateLoc--;
+ else {
+ // Abort the operation as we can't update all CSR restores.
+ StackUpdateLoc = MBBI;
+ break;
+ }
+ }
+ }
+
if (FrameSize) {
// In the prologue, the loaded (or persistent) stack pointer value is
// offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red
@@ -1409,7 +1546,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
}
} else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) {
if (HasRedZone) {
- BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
+ BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg)
.addReg(SPReg)
.addImm(FrameSize);
} else {
@@ -1433,7 +1570,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
.addReg(FPReg);
RBReg = FPReg;
}
- BuildMI(MBB, MBBI, dl, LoadInst, RBReg)
+ BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg)
.addImm(0)
.addReg(SPReg);
}
@@ -1466,7 +1603,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
// a base register anyway, because it may happen to be R0.
bool LoadedLR = false;
if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) {
- BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
+ BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg)
.addImm(LROffset+SPAdd)
.addReg(RBReg);
LoadedLR = true;
@@ -1538,7 +1675,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
.addReg(TempReg, getKillRegState(i == e-1));
if (MustSaveLR)
- BuildMI(MBB, MBBI, dl, MTLRInst).addReg(ScratchReg);
+ BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg);
// Callee pop calling convention. Pop parameter/linkage area. Used for tail
// call optimization
@@ -1732,6 +1869,9 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
unsigned Reg = CSI[i].getReg();
+ assert((!MF.getInfo<PPCFunctionInfo>()->mustSaveTOC() ||
+ (Reg != PPC::X2 && Reg != PPC::R2)) &&
+ "Not expecting to try to spill R2 in a function that must save TOC");
if (PPC::GPRCRegClass.contains(Reg) ||
PPC::SPE4RCRegClass.contains(Reg)) {
HasGPSaveArea = true;
@@ -1947,7 +2087,7 @@ PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
// the 16-bit immediate. We don't know the complete frame size here
// because we've not yet computed callee-saved register spills or the
// needed alignment padding.
- unsigned StackSize = determineFrameLayout(MF, false, true);
+ unsigned StackSize = determineFrameLayout(MF, true);
MachineFrameInfo &MFI = MF.getFrameInfo();
if (MFI.hasVarSizedObjects() || spillsCR(MF) || spillsVRSAVE(MF) ||
hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize))) {
@@ -2041,6 +2181,8 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineFunction *MF = MBB.getParent();
const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
+ PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
+ bool MustSaveTOC = FI->mustSaveTOC();
DebugLoc DL;
bool CRSpilled = false;
MachineInstrBuilder CRMIB;
@@ -2071,6 +2213,10 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
continue;
}
+ // The actual spill will happen in the prologue.
+ if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
+ continue;
+
// Insert the spill to the stack frame.
if (IsCRField) {
PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
@@ -2198,6 +2344,8 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineFunction *MF = MBB.getParent();
const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
+ PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
+ bool MustSaveTOC = FI->mustSaveTOC();
bool CR2Spilled = false;
bool CR3Spilled = false;
bool CR4Spilled = false;
@@ -2220,6 +2368,9 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
if (Reg == PPC::VRSAVE && !Subtarget.isDarwinABI())
continue;
+ if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
+ continue;
+
if (Reg == PPC::CR2) {
CR2Spilled = true;
// The spill slot is associated only with CR2, which is the
diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h
index 69bd1484d6e5..d116e9fd22e1 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.h
+++ b/lib/Target/PowerPC/PPCFrameLowering.h
@@ -1,9 +1,8 @@
//===-- PPCFrameLowering.h - Define frame lowering for PowerPC --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -13,7 +12,6 @@
#ifndef LLVM_LIB_TARGET_POWERPC_PPCFRAMELOWERING_H
#define LLVM_LIB_TARGET_POWERPC_PPCFRAMELOWERING_H
-#include "PPC.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
@@ -73,12 +71,29 @@ class PPCFrameLowering: public TargetFrameLowering {
*/
void createTailCallBranchInstr(MachineBasicBlock &MBB) const;
+ /**
+ * Check if the conditions are correct to allow for the stack update
+ * to be moved past the CSR save/restore code.
+ */
+ bool stackUpdateCanBeMoved(MachineFunction &MF) const;
+
public:
PPCFrameLowering(const PPCSubtarget &STI);
- unsigned determineFrameLayout(MachineFunction &MF,
- bool UpdateMF = true,
- bool UseEstimate = false) const;
+ /**
+ * Determine the frame layout and update the machine function.
+ */
+ unsigned determineFrameLayoutAndUpdate(MachineFunction &MF,
+ bool UseEstimate = false) const;
+
+ /**
+ * Determine the frame layout but do not update the machine function.
+ * The MachineFunction object can be const in this case as it is not
+ * modified.
+ */
+ unsigned determineFrameLayout(const MachineFunction &MF,
+ bool UseEstimate = false,
+ unsigned *NewMaxCallFrameSize = nullptr) const;
/// emitProlog/emitEpilog - These methods insert prolog and epilog code into
/// the function.
diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
index 5f6966cecd61..391ebcc1a143 100644
--- a/lib/Target/PowerPC/PPCHazardRecognizers.cpp
+++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
@@ -1,9 +1,8 @@
//===-- PPCHazardRecognizers.cpp - PowerPC Hazard Recognizer Impls --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -12,9 +11,8 @@
//===----------------------------------------------------------------------===//
#include "PPCHazardRecognizers.h"
-#include "PPC.h"
#include "PPCInstrInfo.h"
-#include "PPCTargetMachine.h"
+#include "PPCSubtarget.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.h b/lib/Target/PowerPC/PPCHazardRecognizers.h
index 4b502147ca63..5b32147ca88d 100644
--- a/lib/Target/PowerPC/PPCHazardRecognizers.h
+++ b/lib/Target/PowerPC/PPCHazardRecognizers.h
@@ -1,9 +1,8 @@
//===-- PPCHazardRecognizers.h - PowerPC Hazard Recognizers -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 31acd0ff870f..543cac075f55 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -1,9 +1,8 @@
//===-- PPCISelDAGToDAG.cpp - PPC --pattern matching inst selector --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -219,13 +218,6 @@ namespace {
SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
const SDLoc &dl);
- /// SelectAddrImm - Returns true if the address N can be represented by
- /// a base register plus a signed 16-bit displacement [r+imm].
- bool SelectAddrImm(SDValue N, SDValue &Disp,
- SDValue &Base) {
- return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 0);
- }
-
/// SelectAddrImmOffs - Return true if the operand is valid for a preinc
/// immediate field. Note that the operand at this point is already the
/// result of a prior SelectAddressRegImm call.
@@ -239,26 +231,61 @@ namespace {
return false;
}
- /// SelectAddrIdx - Given the specified addressed, check to see if it can be
- /// represented as an indexed [r+r] operation. Returns false if it can
- /// be represented by [r+imm], which are preferred.
+ /// SelectAddrIdx - Given the specified address, check to see if it can be
+ /// represented as an indexed [r+r] operation.
+ /// This is for xform instructions whose associated displacement form is D.
+ /// The last parameter \p 0 means associated D form has no requirment for 16
+ /// bit signed displacement.
+ /// Returns false if it can be represented by [r+imm], which are preferred.
bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) {
- return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG);
+ return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, 0);
+ }
+
+ /// SelectAddrIdx4 - Given the specified address, check to see if it can be
+ /// represented as an indexed [r+r] operation.
+ /// This is for xform instructions whose associated displacement form is DS.
+ /// The last parameter \p 4 means associated DS form 16 bit signed
+ /// displacement must be a multiple of 4.
+ /// Returns false if it can be represented by [r+imm], which are preferred.
+ bool SelectAddrIdxX4(SDValue N, SDValue &Base, SDValue &Index) {
+ return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, 4);
+ }
+
+ /// SelectAddrIdx16 - Given the specified address, check to see if it can be
+ /// represented as an indexed [r+r] operation.
+ /// This is for xform instructions whose associated displacement form is DQ.
+ /// The last parameter \p 16 means associated DQ form 16 bit signed
+ /// displacement must be a multiple of 16.
+ /// Returns false if it can be represented by [r+imm], which are preferred.
+ bool SelectAddrIdxX16(SDValue N, SDValue &Base, SDValue &Index) {
+ return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, 16);
}
- /// SelectAddrIdxOnly - Given the specified addressed, force it to be
+ /// SelectAddrIdxOnly - Given the specified address, force it to be
/// represented as an indexed [r+r] operation.
bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) {
return PPCLowering->SelectAddressRegRegOnly(N, Base, Index, *CurDAG);
}
+
+ /// SelectAddrImm - Returns true if the address N can be represented by
+ /// a base register plus a signed 16-bit displacement [r+imm].
+ /// The last parameter \p 0 means D form has no requirment for 16 bit signed
+ /// displacement.
+ bool SelectAddrImm(SDValue N, SDValue &Disp,
+ SDValue &Base) {
+ return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 0);
+ }
/// SelectAddrImmX4 - Returns true if the address N can be represented by
- /// a base register plus a signed 16-bit displacement that is a multiple of 4.
- /// Suitable for use by STD and friends.
+ /// a base register plus a signed 16-bit displacement that is a multiple of
+ /// 4 (last parameter). Suitable for use by STD and friends.
bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) {
return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 4);
}
+ /// SelectAddrImmX16 - Returns true if the address N can be represented by
+ /// a base register plus a signed 16-bit displacement that is a multiple of
+ /// 16(last parameter). Suitable for use by STXV and friends.
bool SelectAddrImmX16(SDValue N, SDValue &Disp, SDValue &Base) {
return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 16);
}
@@ -412,7 +439,8 @@ SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) == MVT::i32) {
if (PPCSubTarget->isTargetELF()) {
GlobalBaseReg = PPC::R30;
- if (M->getPICLevel() == PICLevel::SmallPIC) {
+ if (!PPCSubTarget->isSecurePlt() &&
+ M->getPICLevel() == PICLevel::SmallPIC) {
BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MoveGOTtoLR));
BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
@@ -2373,7 +2401,7 @@ public:
// Here we try to match complex bit permutations into a set of
// rotate-and-shift/shift/and/or instructions, using a set of heuristics
- // known to produce optimial code for common cases (like i32 byte swapping).
+ // known to produce optimal code for common cases (like i32 byte swapping).
SDNode *Select(SDNode *N) {
Memoizer.clear();
auto Result =
@@ -4214,12 +4242,12 @@ static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG,
// Without this setb optimization, the outer SELECT_CC will be manually
// selected to SELECT_CC_I4/SELECT_CC_I8 Pseudo, then expand-isel-pseudos pass
- // transforms pseduo instruction to isel instruction. When there are more than
+ // transforms pseudo instruction to isel instruction. When there are more than
// one use for result like zext/sext, with current optimization we only see
// isel is replaced by setb but can't see any significant gain. Since
// setb has longer latency than original isel, we should avoid this. Another
// point is that setb requires comparison always kept, it can break the
- // oppotunity to get the comparison away if we have in future.
+ // opportunity to get the comparison away if we have in future.
if (!SetOrSelCC.hasOneUse() || (!InnerIsSel && !FalseRes.hasOneUse()))
return false;
@@ -4354,13 +4382,23 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
if (trySETCC(N))
return;
break;
-
- case PPCISD::CALL: {
- const Module *M = MF->getFunction().getParent();
-
+ // These nodes will be transformed into GETtlsADDR32 node, which
+ // later becomes BL_TLS __tls_get_addr(sym at tlsgd)@PLT
+ case PPCISD::ADDI_TLSLD_L_ADDR:
+ case PPCISD::ADDI_TLSGD_L_ADDR: {
+ const Module *Mod = MF->getFunction().getParent();
if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 ||
!PPCSubTarget->isSecurePlt() || !PPCSubTarget->isTargetELF() ||
- M->getPICLevel() == PICLevel::SmallPIC)
+ Mod->getPICLevel() == PICLevel::SmallPIC)
+ break;
+ // Attach global base pointer on GETtlsADDR32 node in order to
+ // generate secure plt code for TLS symbols.
+ getGlobalBaseReg();
+ } break;
+ case PPCISD::CALL: {
+ if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 ||
+ !TM.isPositionIndependent() || !PPCSubTarget->isSecurePlt() ||
+ !PPCSubTarget->isTargetELF())
break;
SDValue Op = N->getOperand(1);
@@ -5305,7 +5343,7 @@ SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) {
SDValue V = Queue.pop_back_val();
for (const SDValue &O : V.getNode()->ops()) {
- unsigned b;
+ unsigned b = 0;
uint64_t M = 0, A = 0;
SDValue OLHS, ORHS;
if (O.getOpcode() == ISD::OR) {
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 39608cb74bee..24d50074860d 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1,9 +1,8 @@
//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -45,6 +44,7 @@
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
@@ -70,8 +70,10 @@
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/Value.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSymbolXCOFF.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/Casting.h"
@@ -111,6 +113,9 @@ cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
static cl::opt<bool> DisableSCO("disable-ppc-sco",
cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
+static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
+cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
+
static cl::opt<bool> EnableQuadPrecision("enable-ppc-quad-precision",
cl::desc("enable quad precision float support on ppc"), cl::Hidden);
@@ -119,6 +124,8 @@ STATISTIC(NumSiblingCalls, "Number of sibling calls");
static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
+static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
+
// FIXME: Remove this once the bug has been fixed!
extern cl::opt<bool> ANDIGlueBug;
@@ -550,7 +557,18 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
// add/sub are legal for all supported vector VT's.
setOperationAction(ISD::ADD, VT, Legal);
setOperationAction(ISD::SUB, VT, Legal);
- setOperationAction(ISD::ABS, VT, Custom);
+
+ // For v2i64, these are only valid with P8Vector. This is corrected after
+ // the loop.
+ setOperationAction(ISD::SMAX, VT, Legal);
+ setOperationAction(ISD::SMIN, VT, Legal);
+ setOperationAction(ISD::UMAX, VT, Legal);
+ setOperationAction(ISD::UMIN, VT, Legal);
+
+ if (Subtarget.hasVSX()) {
+ setOperationAction(ISD::FMAXNUM, VT, Legal);
+ setOperationAction(ISD::FMINNUM, VT, Legal);
+ }
// Vector instructions introduced in P8
if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
@@ -635,11 +653,28 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
}
}
+ if (!Subtarget.hasP8Vector()) {
+ setOperationAction(ISD::SMAX, MVT::v2i64, Expand);
+ setOperationAction(ISD::SMIN, MVT::v2i64, Expand);
+ setOperationAction(ISD::UMAX, MVT::v2i64, Expand);
+ setOperationAction(ISD::UMIN, MVT::v2i64, Expand);
+ }
+
+ for (auto VT : {MVT::v2i64, MVT::v4i32, MVT::v8i16, MVT::v16i8})
+ setOperationAction(ISD::ABS, VT, Custom);
// We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
// with merges, splats, etc.
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
+ // Vector truncates to sub-word integer that fit in an Altivec/VSX register
+ // are cheap, so handle them before they get expanded to scalar.
+ setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);
+
setOperationAction(ISD::AND , MVT::v4i32, Legal);
setOperationAction(ISD::OR , MVT::v4i32, Legal);
setOperationAction(ISD::XOR , MVT::v4i32, Legal);
@@ -804,6 +839,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
setOperationAction(ISD::FABS, MVT::v4f32, Legal);
setOperationAction(ISD::FABS, MVT::v2f64, Legal);
+ setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal);
+ setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Legal);
if (Subtarget.hasDirectMove())
setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
@@ -866,6 +903,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::FPOWI, MVT::f128, Expand);
setOperationAction(ISD::FREM, MVT::f128, Expand);
}
+ setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
}
@@ -1060,6 +1098,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setTargetDAGCombine(ISD::SHL);
setTargetDAGCombine(ISD::SRA);
setTargetDAGCombine(ISD::SRL);
+ setTargetDAGCombine(ISD::MUL);
setTargetDAGCombine(ISD::SINT_TO_FP);
setTargetDAGCombine(ISD::BUILD_VECTOR);
if (Subtarget.hasFPCVT())
@@ -1232,22 +1271,6 @@ unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty,
return Align;
}
-unsigned PPCTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
- CallingConv:: ID CC,
- EVT VT) const {
- if (Subtarget.hasSPE() && VT == MVT::f64)
- return 2;
- return PPCTargetLowering::getNumRegisters(Context, VT);
-}
-
-MVT PPCTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
- CallingConv:: ID CC,
- EVT VT) const {
- if (Subtarget.hasSPE() && VT == MVT::f64)
- return MVT::i32;
- return PPCTargetLowering::getRegisterType(Context, VT);
-}
-
bool PPCTargetLowering::useSoftFloat() const {
return Subtarget.useSoftFloat();
}
@@ -1256,6 +1279,10 @@ bool PPCTargetLowering::hasSPE() const {
return Subtarget.hasSPE();
}
+bool PPCTargetLowering::preferIncOfAddToSubOfNot(EVT VT) const {
+ return VT.isScalarInteger();
+}
+
const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch ((PPCISD::NodeType)Opcode) {
case PPCISD::FIRST_NUMBER: break;
@@ -1365,7 +1392,11 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::QBFLT: return "PPCISD::QBFLT";
case PPCISD::QVLFSb: return "PPCISD::QVLFSb";
case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128";
+ case PPCISD::BUILD_SPE64: return "PPCISD::BUILD_SPE64";
+ case PPCISD::EXTRACT_SPE: return "PPCISD::EXTRACT_SPE";
case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI";
+ case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH";
+ case PPCISD::FP_EXTEND_LH: return "PPCISD::FP_EXTEND_LH";
}
return nullptr;
}
@@ -2202,16 +2233,43 @@ bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) {
return isIntS16Immediate(Op.getNode(), Imm);
}
+
+/// SelectAddressEVXRegReg - Given the specified address, check to see if it can
+/// be represented as an indexed [r+r] operation.
+bool PPCTargetLowering::SelectAddressEVXRegReg(SDValue N, SDValue &Base,
+ SDValue &Index,
+ SelectionDAG &DAG) const {
+ for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
+ UI != E; ++UI) {
+ if (MemSDNode *Memop = dyn_cast<MemSDNode>(*UI)) {
+ if (Memop->getMemoryVT() == MVT::f64) {
+ Base = N.getOperand(0);
+ Index = N.getOperand(1);
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
/// SelectAddressRegReg - Given the specified addressed, check to see if it
/// can be represented as an indexed [r+r] operation. Returns false if it
-/// can be more efficiently represented with [r+imm].
+/// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
+/// non-zero and N can be represented by a base register plus a signed 16-bit
+/// displacement, make a more precise judgement by checking (displacement % \p
+/// EncodingAlignment).
bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
- SDValue &Index,
- SelectionDAG &DAG) const {
+ SDValue &Index, SelectionDAG &DAG,
+ unsigned EncodingAlignment) const {
int16_t imm = 0;
if (N.getOpcode() == ISD::ADD) {
- if (isIntS16Immediate(N.getOperand(1), imm))
- return false; // r+i
+ // Is there any SPE load/store (f64), which can't handle 16bit offset?
+ // SPE load/store can only handle 8-bit offsets.
+ if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
+ return true;
+ if (isIntS16Immediate(N.getOperand(1), imm) &&
+ (!EncodingAlignment || !(imm % EncodingAlignment)))
+ return false; // r+i
if (N.getOperand(1).getOpcode() == PPCISD::Lo)
return false; // r+i
@@ -2219,8 +2277,9 @@ bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
Index = N.getOperand(1);
return true;
} else if (N.getOpcode() == ISD::OR) {
- if (isIntS16Immediate(N.getOperand(1), imm))
- return false; // r+i can fold it if we can.
+ if (isIntS16Immediate(N.getOperand(1), imm) &&
+ (!EncodingAlignment || !(imm % EncodingAlignment)))
+ return false; // r+i can fold it if we can.
// If this is an or of disjoint bitfields, we can codegen this as an add
// (for better address arithmetic) if the LHS and RHS of the OR are provably
@@ -2284,22 +2343,22 @@ static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
/// Returns true if the address N can be represented by a base register plus
/// a signed 16-bit displacement [r+imm], and if it is not better
-/// represented as reg+reg. If \p Alignment is non-zero, only accept
+/// represented as reg+reg. If \p EncodingAlignment is non-zero, only accept
/// displacements that are multiples of that value.
bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
SDValue &Base,
SelectionDAG &DAG,
- unsigned Alignment) const {
+ unsigned EncodingAlignment) const {
// FIXME dl should come from parent load or store, not from address
SDLoc dl(N);
// If this can be more profitably realized as r+r, fail.
- if (SelectAddressRegReg(N, Disp, Base, DAG))
+ if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
return false;
if (N.getOpcode() == ISD::ADD) {
int16_t imm = 0;
if (isIntS16Immediate(N.getOperand(1), imm) &&
- (!Alignment || (imm % Alignment) == 0)) {
+ (!EncodingAlignment || (imm % EncodingAlignment) == 0)) {
Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
@@ -2323,7 +2382,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
} else if (N.getOpcode() == ISD::OR) {
int16_t imm = 0;
if (isIntS16Immediate(N.getOperand(1), imm) &&
- (!Alignment || (imm % Alignment) == 0)) {
+ (!EncodingAlignment || (imm % EncodingAlignment) == 0)) {
// If this is an or of disjoint bitfields, we can codegen this as an add
// (for better address arithmetic) if the LHS and RHS of the OR are
// provably disjoint.
@@ -2349,7 +2408,8 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
// If this address fits entirely in a 16-bit sext immediate field, codegen
// this as "d, 0"
int16_t Imm;
- if (isIntS16Immediate(CN, Imm) && (!Alignment || (Imm % Alignment) == 0)) {
+ if (isIntS16Immediate(CN, Imm) &&
+ (!EncodingAlignment || (Imm % EncodingAlignment) == 0)) {
Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
CN->getValueType(0));
@@ -2359,7 +2419,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
// Handle 32-bit sext immediates with LIS + addr mode.
if ((CN->getValueType(0) == MVT::i32 ||
(int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
- (!Alignment || (CN->getZExtValue() % Alignment) == 0)) {
+ (!EncodingAlignment || (CN->getZExtValue() % EncodingAlignment) == 0)) {
int Addr = (int)CN->getZExtValue();
// Otherwise, break this down into an LIS + disp.
@@ -2416,24 +2476,45 @@ bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
/// Returns true if we should use a direct load into vector instruction
/// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
-static bool usePartialVectorLoads(SDNode *N) {
- if (!N->hasOneUse())
- return false;
+static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {
// If there are any other uses other than scalar to vector, then we should
// keep it as a scalar load -> direct move pattern to prevent multiple
- // loads. Currently, only check for i64 since we have lxsd/lfd to do this
- // efficiently, but no update equivalent.
- if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
- EVT MemVT = LD->getMemoryVT();
- if (MemVT.isSimple() && MemVT.getSimpleVT().SimpleTy == MVT::i64) {
- SDNode *User = *(LD->use_begin());
- if (User->getOpcode() == ISD::SCALAR_TO_VECTOR)
- return true;
- }
+ // loads.
+ LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
+ if (!LD)
+ return false;
+
+ EVT MemVT = LD->getMemoryVT();
+ if (!MemVT.isSimple())
+ return false;
+ switch(MemVT.getSimpleVT().SimpleTy) {
+ case MVT::i64:
+ break;
+ case MVT::i32:
+ if (!ST.hasP8Vector())
+ return false;
+ break;
+ case MVT::i16:
+ case MVT::i8:
+ if (!ST.hasP9Vector())
+ return false;
+ break;
+ default:
+ return false;
}
- return false;
+ SDValue LoadedVal(N, 0);
+ if (!LoadedVal.hasOneUse())
+ return false;
+
+ for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end();
+ UI != UE; ++UI)
+ if (UI.getUse().get().getResNo() == 0 &&
+ UI->getOpcode() != ISD::SCALAR_TO_VECTOR)
+ return false;
+
+ return true;
}
/// getPreIndexedAddressParts - returns true by value, base pointer and
@@ -2464,7 +2545,7 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
// Do not generate pre-inc forms for specific loads that feed scalar_to_vector
// instructions because we can fold these into a more efficient instruction
// instead, (such as LXSD).
- if (isLoad && usePartialVectorLoads(N)) {
+ if (isLoad && usePartialVectorLoads(N, Subtarget)) {
return false;
}
@@ -2745,7 +2826,8 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
const Module *M = DAG.getMachineFunction().getFunction().getParent();
PICLevel::Level picLevel = M->getPICLevel();
- TLSModel::Model Model = getTargetMachine().getTLSModel(GV);
+ const TargetMachine &TM = getTargetMachine();
+ TLSModel::Model Model = TM.getTLSModel(GV);
if (Model == TLSModel::LocalExec) {
SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
@@ -2769,8 +2851,14 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl,
PtrVT, GOTReg, TGA);
- } else
- GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
+ } else {
+ if (!TM.isPositionIndependent())
+ GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
+ else if (picLevel == PICLevel::SmallPIC)
+ GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
+ else
+ GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
+ }
SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl,
PtrVT, TGA, GOTPtr);
return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
@@ -3147,101 +3235,6 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
MachinePointerInfo(SV, nextOffset));
}
-#include "PPCGenCallingConv.inc"
-
-// Function whose sole purpose is to kill compiler warnings
-// stemming from unused functions included from PPCGenCallingConv.inc.
-CCAssignFn *PPCTargetLowering::useFastISelCCs(unsigned Flag) const {
- return Flag ? CC_PPC64_ELF_FIS : RetCC_PPC64_ELF_FIS;
-}
-
-bool llvm::CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State) {
- return true;
-}
-
-bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
- MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State) {
- static const MCPhysReg ArgRegs[] = {
- PPC::R3, PPC::R4, PPC::R5, PPC::R6,
- PPC::R7, PPC::R8, PPC::R9, PPC::R10,
- };
- const unsigned NumArgRegs = array_lengthof(ArgRegs);
-
- unsigned RegNum = State.getFirstUnallocated(ArgRegs);
-
- // Skip one register if the first unallocated register has an even register
- // number and there are still argument registers available which have not been
- // allocated yet. RegNum is actually an index into ArgRegs, which means we
- // need to skip a register if RegNum is odd.
- if (RegNum != NumArgRegs && RegNum % 2 == 1) {
- State.AllocateReg(ArgRegs[RegNum]);
- }
-
- // Always return false here, as this function only makes sure that the first
- // unallocated register has an odd register number and does not actually
- // allocate a register for the current argument.
- return false;
-}
-
-bool
-llvm::CC_PPC32_SVR4_Custom_SkipLastArgRegsPPCF128(unsigned &ValNo, MVT &ValVT,
- MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State) {
- static const MCPhysReg ArgRegs[] = {
- PPC::R3, PPC::R4, PPC::R5, PPC::R6,
- PPC::R7, PPC::R8, PPC::R9, PPC::R10,
- };
- const unsigned NumArgRegs = array_lengthof(ArgRegs);
-
- unsigned RegNum = State.getFirstUnallocated(ArgRegs);
- int RegsLeft = NumArgRegs - RegNum;
-
- // Skip if there is not enough registers left for long double type (4 gpr regs
- // in soft float mode) and put long double argument on the stack.
- if (RegNum != NumArgRegs && RegsLeft < 4) {
- for (int i = 0; i < RegsLeft; i++) {
- State.AllocateReg(ArgRegs[RegNum + i]);
- }
- }
-
- return false;
-}
-
-bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
- MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State) {
- static const MCPhysReg ArgRegs[] = {
- PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
- PPC::F8
- };
-
- const unsigned NumArgRegs = array_lengthof(ArgRegs);
-
- unsigned RegNum = State.getFirstUnallocated(ArgRegs);
-
- // If there is only one Floating-point register left we need to put both f64
- // values of a split ppc_fp128 value on the stack.
- if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) {
- State.AllocateReg(ArgRegs[RegNum]);
- }
-
- // Always return false here, as this function only makes sure that the two f64
- // values a ppc_fp128 value is split into are both passed in registers or both
- // passed on the stack and does not actually allocate a register for the
- // current argument.
- return false;
-}
-
/// FPR - The set of FP registers that should be allocated for arguments,
/// on Darwin.
static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
@@ -3449,7 +3442,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
// Reserve space for the linkage area on the stack.
unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
CCInfo.AllocateStack(LinkageSize, PtrByteSize);
- if (useSoftFloat() || hasSPE())
+ if (useSoftFloat())
CCInfo.PreAnalyzeFormalArguments(Ins);
CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
@@ -3482,7 +3475,8 @@ SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
if (Subtarget.hasVSX())
RC = &PPC::VSFRCRegClass;
else if (Subtarget.hasSPE())
- RC = &PPC::SPERCRegClass;
+ // SPE passes doubles in GPR pairs.
+ RC = &PPC::GPRCRegClass;
else
RC = &PPC::F8RCRegClass;
break;
@@ -3506,13 +3500,26 @@ SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
break;
}
- // Transform the arguments stored in physical registers into virtual ones.
- unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
- SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
- ValVT == MVT::i1 ? MVT::i32 : ValVT);
-
- if (ValVT == MVT::i1)
- ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
+ SDValue ArgValue;
+ // Transform the arguments stored in physical registers into
+ // virtual ones.
+ if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
+ assert(i + 1 < e && "No second half of double precision argument");
+ unsigned RegLo = MF.addLiveIn(VA.getLocReg(), RC);
+ unsigned RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
+ SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
+ SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
+ if (!Subtarget.isLittleEndian())
+ std::swap (ArgValueLo, ArgValueHi);
+ ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
+ ArgValueHi);
+ } else {
+ unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
+ ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
+ ValVT == MVT::i1 ? MVT::i32 : ValVT);
+ if (ValVT == MVT::i1)
+ ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
+ }
InVals.push_back(ArgValue);
} else {
@@ -4448,24 +4455,27 @@ static bool isFunctionGlobalAddress(SDValue Callee);
static bool
callsShareTOCBase(const Function *Caller, SDValue Callee,
const TargetMachine &TM) {
- // If !G, Callee can be an external symbol.
- GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
- if (!G)
- return false;
-
+ // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
+ // don't have enough information to determine if the caller and calle share
+ // the same TOC base, so we have to pessimistically assume they don't for
+ // correctness.
+ GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
+ if (!G)
+ return false;
+
+ const GlobalValue *GV = G->getGlobal();
// The medium and large code models are expected to provide a sufficiently
// large TOC to provide all data addressing needs of a module with a
// single TOC. Since each module will be addressed with a single TOC then we
// only need to check that caller and callee don't cross dso boundaries.
if (CodeModel::Medium == TM.getCodeModel() ||
CodeModel::Large == TM.getCodeModel())
- return TM.shouldAssumeDSOLocal(*Caller->getParent(), G->getGlobal());
+ return TM.shouldAssumeDSOLocal(*Caller->getParent(), GV);
// Otherwise we need to ensure callee and caller are in the same section,
// since the linker may allocate multiple TOCs, and we don't know which
// sections will belong to the same TOC base.
- const GlobalValue *GV = G->getGlobal();
if (!GV->isStrongDefinitionForLinker())
return false;
@@ -4917,6 +4927,7 @@ PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain,
bool isPPC64 = Subtarget.isPPC64();
bool isSVR4ABI = Subtarget.isSVR4ABI();
bool isELFv2ABI = Subtarget.isELFv2ABI();
+ bool isAIXABI = Subtarget.isAIXABI();
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
NodeTys.push_back(MVT::Other); // Returns a chain
@@ -4943,17 +4954,18 @@ PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain,
bool Local = TM.shouldAssumeDSOLocal(*Mod, GV);
bool UsePlt = !Local && Subtarget.isTargetELF() && !isPPC64;
+ // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
+ // every direct call is) turn it into a TargetGlobalAddress /
+ // TargetExternalSymbol node so that legalize doesn't hack it.
if (isFunctionGlobalAddress(Callee)) {
GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee);
+
// A call to a TLS address is actually an indirect call to a
// thread-specific pointer.
unsigned OpFlags = 0;
if (UsePlt)
OpFlags = PPCII::MO_PLT;
- // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
- // every direct call is) turn it into a TargetGlobalAddress /
- // TargetExternalSymbol node so that legalize doesn't hack it.
Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
Callee.getValueType(), 0, OpFlags);
needIndirectCall = false;
@@ -5095,17 +5107,18 @@ PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain,
Ops.push_back(DAG.getRegister(RegsToPass[i].first,
RegsToPass[i].second.getValueType()));
- // All calls, in both the ELF V1 and V2 ABIs, need the TOC register live
- // into the call.
- // We do need to reserve X2 to appease the verifier for the PATCHPOINT.
- if (isSVR4ABI && isPPC64) {
+ // All calls, in the AIX ABI and 64-bit ELF ABIs, need the TOC register
+ // live into the call.
+ // We do need to reserve R2/X2 to appease the verifier for the PATCHPOINT.
+ if ((isSVR4ABI && isPPC64) || isAIXABI) {
setUsesTOCBasePtr(DAG);
- // We cannot add X2 as an operand here for PATCHPOINT, because there is no
- // way to mark dependencies as implicit here. We will add the X2 dependency
- // in EmitInstrWithCustomInserter.
- if (!isPatchPoint)
- Ops.push_back(DAG.getRegister(PPC::X2, PtrVT));
+ // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
+ // no way to mark dependencies as implicit here.
+ // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
+ if (!isPatchPoint)
+ Ops.push_back(DAG.getRegister(isPPC64 ? PPC::X2
+ : PPC::R2, PtrVT));
}
return CallOpc;
@@ -5129,10 +5142,27 @@ SDValue PPCTargetLowering::LowerCallResult(
CCValAssign &VA = RVLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");
- SDValue Val = DAG.getCopyFromReg(Chain, dl,
- VA.getLocReg(), VA.getLocVT(), InFlag);
- Chain = Val.getValue(1);
- InFlag = Val.getValue(2);
+ SDValue Val;
+
+ if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
+ SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
+ InFlag);
+ Chain = Lo.getValue(1);
+ InFlag = Lo.getValue(2);
+ VA = RVLocs[++i]; // skip ahead to next loc
+ SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
+ InFlag);
+ Chain = Hi.getValue(1);
+ InFlag = Hi.getValue(2);
+ if (!Subtarget.isLittleEndian())
+ std::swap (Lo, Hi);
+ Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
+ } else {
+ Val = DAG.getCopyFromReg(Chain, dl,
+ VA.getLocReg(), VA.getLocVT(), InFlag);
+ Chain = Val.getValue(1);
+ InFlag = Val.getValue(2);
+ }
switch (VA.getLocInfo()) {
default: llvm_unreachable("Unknown loc info!");
@@ -5206,18 +5236,24 @@ SDValue PPCTargetLowering::FinishCall(
}
// Add a NOP immediately after the branch instruction when using the 64-bit
- // SVR4 ABI. At link time, if caller and callee are in a different module and
+ // SVR4 or the AIX ABI.
+ // At link time, if caller and callee are in a different module and
// thus have a different TOC, the call will be replaced with a call to a stub
// function which saves the current TOC, loads the TOC of the callee and
// branches to the callee. The NOP will be replaced with a load instruction
// which restores the TOC of the caller from the TOC save slot of the current
// stack frame. If caller and callee belong to the same module (and have the
- // same TOC), the NOP will remain unchanged.
+ // same TOC), the NOP will remain unchanged, or become some other NOP.
MachineFunction &MF = DAG.getMachineFunction();
- if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64() &&
- !isPatchPoint) {
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+ if (!isTailCall && !isPatchPoint &&
+ ((Subtarget.isSVR4ABI() && Subtarget.isPPC64()) ||
+ Subtarget.isAIXABI())) {
if (CallOpc == PPCISD::BCTRL) {
+ if (Subtarget.isAIXABI())
+ report_fatal_error("Indirect call on AIX is not implemented.");
+
// This is a call through a function pointer.
// Restore the caller TOC from the save area into R2.
// See PrepareCall() for more information about calls through function
@@ -5229,7 +5265,6 @@ SDValue PPCTargetLowering::FinishCall(
// allocated and an unnecessary move instruction being generated.
CallOpc = PPCISD::BCTRL_LOAD_TOC;
- EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT);
unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
@@ -5245,6 +5280,19 @@ SDValue PPCTargetLowering::FinishCall(
}
}
+ if (Subtarget.isAIXABI() && isFunctionGlobalAddress(Callee)) {
+ // On AIX, direct function calls reference the symbol for the function's
+ // entry point, which is named by inserting a "." before the function's
+ // C-linkage name.
+ GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee);
+ auto &Context = DAG.getMachineFunction().getMMI().getContext();
+ MCSymbol *S = Context.getOrCreateSymbol(Twine(".") +
+ Twine(G->getGlobal()->getName()));
+ Callee = DAG.getMCSymbol(S, PtrVT);
+ // Replace the GlobalAddressSDNode Callee with the MCSymbolSDNode.
+ Ops[1] = Callee;
+ }
+
Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
InFlag = Chain.getValue(1);
@@ -5314,16 +5362,20 @@ PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
!isTailCall)
Callee = LowerGlobalAddress(Callee, DAG);
- if (Subtarget.isSVR4ABI()) {
- if (Subtarget.isPPC64())
- return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg,
- isTailCall, isPatchPoint, Outs, OutVals, Ins,
- dl, DAG, InVals, CS);
- else
- return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg,
- isTailCall, isPatchPoint, Outs, OutVals, Ins,
- dl, DAG, InVals, CS);
- }
+ if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
+ return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg,
+ isTailCall, isPatchPoint, Outs, OutVals, Ins,
+ dl, DAG, InVals, CS);
+
+ if (Subtarget.isSVR4ABI())
+ return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg,
+ isTailCall, isPatchPoint, Outs, OutVals, Ins,
+ dl, DAG, InVals, CS);
+
+ if (Subtarget.isAIXABI())
+ return LowerCall_AIX(Chain, Callee, CallConv, isVarArg,
+ isTailCall, isPatchPoint, Outs, OutVals, Ins,
+ dl, DAG, InVals, CS);
return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg,
isTailCall, isPatchPoint, Outs, OutVals, Ins,
@@ -5444,12 +5496,15 @@ SDValue PPCTargetLowering::LowerCall_32SVR4(
bool seenFloatArg = false;
// Walk the register/memloc assignments, inserting copies/loads.
- for (unsigned i = 0, j = 0, e = ArgLocs.size();
+ // i - Tracks the index into the list of registers allocated for the call
+ // RealArgIdx - Tracks the index into the list of actual function arguments
+ // j - Tracks the index into the list of byval arguments
+ for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
i != e;
- ++i) {
+ ++i, ++RealArgIdx) {
CCValAssign &VA = ArgLocs[i];
- SDValue Arg = OutVals[i];
- ISD::ArgFlagsTy Flags = Outs[i].Flags;
+ SDValue Arg = OutVals[RealArgIdx];
+ ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;
if (Flags.isByVal()) {
// Argument is an aggregate which is passed by value, thus we need to
@@ -5498,7 +5553,17 @@ SDValue PPCTargetLowering::LowerCall_32SVR4(
if (VA.isRegLoc()) {
seenFloatArg |= VA.getLocVT().isFloatingPoint();
// Put argument in a physical register.
- RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
+ bool IsLE = Subtarget.isLittleEndian();
+ SDValue SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
+ DAG.getIntPtrConstant(IsLE ? 0 : 1, dl));
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0)));
+ SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
+ DAG.getIntPtrConstant(IsLE ? 1 : 0, dl));
+ RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(),
+ SVal.getValue(0)));
+ } else
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
} else {
// Put argument in the parameter list area of the current stack frame.
assert(VA.isMemLoc());
@@ -6613,6 +6678,128 @@ SDValue PPCTargetLowering::LowerCall_Darwin(
NumBytes, Ins, InVals, CS);
}
+
+SDValue PPCTargetLowering::LowerCall_AIX(
+ SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
+ bool isTailCall, bool isPatchPoint,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
+ SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
+ ImmutableCallSite CS) const {
+
+ assert((CallConv == CallingConv::C || CallConv == CallingConv::Fast) &&
+ "Unimplemented calling convention!");
+ if (isVarArg || isPatchPoint)
+ report_fatal_error("This call type is unimplemented on AIX.");
+
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+ bool isPPC64 = PtrVT == MVT::i64;
+ unsigned PtrByteSize = isPPC64 ? 8 : 4;
+ unsigned NumOps = Outs.size();
+
+
+ // Count how many bytes are to be pushed on the stack, including the linkage
+ // area, parameter list area.
+ // On XCOFF, we start with 24/48, which is reserved space for
+ // [SP][CR][LR][2 x reserved][TOC].
+ unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
+
+ // The prolog code of the callee may store up to 8 GPR argument registers to
+ // the stack, allowing va_start to index over them in memory if the callee
+ // is variadic.
+ // Because we cannot tell if this is needed on the caller side, we have to
+ // conservatively assume that it is needed. As such, make sure we have at
+ // least enough stack space for the caller to store the 8 GPRs.
+ unsigned NumBytes = LinkageSize + 8 * PtrByteSize;
+
+ // Adjust the stack pointer for the new arguments...
+ // These operations are automatically eliminated by the prolog/epilog
+ // inserter pass.
+ Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
+ SDValue CallSeqStart = Chain;
+
+ static const MCPhysReg GPR_32[] = { // 32-bit registers.
+ PPC::R3, PPC::R4, PPC::R5, PPC::R6,
+ PPC::R7, PPC::R8, PPC::R9, PPC::R10
+ };
+ static const MCPhysReg GPR_64[] = { // 64-bit registers.
+ PPC::X3, PPC::X4, PPC::X5, PPC::X6,
+ PPC::X7, PPC::X8, PPC::X9, PPC::X10
+ };
+
+ const unsigned NumGPRs = isPPC64 ? array_lengthof(GPR_64)
+ : array_lengthof(GPR_32);
+ const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
+ unsigned GPR_idx = 0;
+
+ SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+
+ if (isTailCall)
+ report_fatal_error("Handling of tail call is unimplemented!");
+ int SPDiff = 0;
+
+ for (unsigned i = 0; i != NumOps; ++i) {
+ SDValue Arg = OutVals[i];
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
+
+ // Promote integers if needed.
+ if (Arg.getValueType() == MVT::i1 ||
+ (isPPC64 && Arg.getValueType() == MVT::i32)) {
+ unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ Arg = DAG.getNode(ExtOp, dl, PtrVT, Arg);
+ }
+
+ // Note: "by value" is code for passing a structure by value, not
+ // basic types.
+ if (Flags.isByVal())
+ report_fatal_error("Passing structure by value is unimplemented!");
+
+ switch (Arg.getSimpleValueType().SimpleTy) {
+ default: llvm_unreachable("Unexpected ValueType for argument!");
+ case MVT::i1:
+ case MVT::i32:
+ case MVT::i64:
+ if (GPR_idx != NumGPRs)
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
+ else
+ report_fatal_error("Handling of placing parameters on the stack is "
+ "unimplemented!");
+ break;
+ case MVT::f32:
+ case MVT::f64:
+ case MVT::v4f32:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v16i8:
+ case MVT::v2f64:
+ case MVT::v2i64:
+ case MVT::v1i128:
+ case MVT::f128:
+ case MVT::v4f64:
+ case MVT::v4i1:
+ report_fatal_error("Handling of this parameter type is unimplemented!");
+ }
+ }
+
+ if (!isFunctionGlobalAddress(Callee) &&
+ !isa<ExternalSymbolSDNode>(Callee))
+ report_fatal_error("Handling of indirect call is unimplemented!");
+
+ // Build a sequence of copy-to-reg nodes chained together with token chain
+ // and flag operands which copy the outgoing args into the appropriate regs.
+ SDValue InFlag;
+ for (auto Reg : RegsToPass) {
+ Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint,
+ /* unused except on PPC64 ELFv1 */ false, DAG,
+ RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
+ NumBytes, Ins, InVals, CS);
+}
+
bool
PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
MachineFunction &MF, bool isVarArg,
@@ -6644,11 +6831,11 @@ PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
SmallVector<SDValue, 4> RetOps(1, Chain);
// Copy the result values into the output registers.
- for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) {
CCValAssign &VA = RVLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");
- SDValue Arg = OutVals[i];
+ SDValue Arg = OutVals[RealResIdx];
switch (VA.getLocInfo()) {
default: llvm_unreachable("Unknown loc info!");
@@ -6663,8 +6850,21 @@ PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
break;
}
-
- Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
+ if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
+ bool isLittleEndian = Subtarget.isLittleEndian();
+ // Legalize ret f64 -> ret 2 x i32.
+ SDValue SVal =
+ DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
+ DAG.getIntPtrConstant(isLittleEndian ? 0 : 1, dl));
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
+ SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
+ DAG.getIntPtrConstant(isLittleEndian ? 1 : 0, dl));
+ Flag = Chain.getValue(1);
+ VA = RVLocs[++i]; // skip ahead to next loc
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag);
+ } else
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
Flag = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
}
@@ -6890,6 +7090,61 @@ SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
Op.getOperand(0));
}
+SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
+ SelectionDAG &DAG) const {
+
+ // Implements a vector truncate that fits in a vector register as a shuffle.
+ // We want to legalize vector truncates down to where the source fits in
+ // a vector register (and target is therefore smaller than vector register
+ // size). At that point legalization will try to custom lower the sub-legal
+ // result and get here - where we can contain the truncate as a single target
+ // operation.
+
+ // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
+ // <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
+ //
+ // We will implement it for big-endian ordering as this (where x denotes
+ // undefined):
+ // < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
+ // < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
+ //
+ // The same operation in little-endian ordering will be:
+ // <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
+ // <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
+
+ assert(Op.getValueType().isVector() && "Vector type expected.");
+
+ SDLoc DL(Op);
+ SDValue N1 = Op.getOperand(0);
+ unsigned SrcSize = N1.getValueType().getSizeInBits();
+ assert(SrcSize <= 128 && "Source must fit in an Altivec/VSX vector");
+ SDValue WideSrc = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
+
+ EVT TrgVT = Op.getValueType();
+ unsigned TrgNumElts = TrgVT.getVectorNumElements();
+ EVT EltVT = TrgVT.getVectorElementType();
+ unsigned WideNumElts = 128 / EltVT.getSizeInBits();
+ EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
+
+ // First list the elements we want to keep.
+ unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
+ SmallVector<int, 16> ShuffV;
+ if (Subtarget.isLittleEndian())
+ for (unsigned i = 0; i < TrgNumElts; ++i)
+ ShuffV.push_back(i * SizeMult);
+ else
+ for (unsigned i = 1; i <= TrgNumElts; ++i)
+ ShuffV.push_back(i * SizeMult - 1);
+
+ // Populate the remaining elements with undefs.
+ for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
+ // ShuffV.push_back(i + WideNumElts);
+ ShuffV.push_back(WideNumElts + 1);
+
+ SDValue Conv = DAG.getNode(ISD::BITCAST, DL, WideVT, WideSrc);
+ return DAG.getVectorShuffle(WideVT, DL, Conv, DAG.getUNDEF(WideVT), ShuffV);
+}
+
/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
/// possible.
SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
@@ -9604,10 +9859,63 @@ SDValue PPCTargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
BifID = Intrinsic::ppc_altivec_vmaxsh;
else if (VT == MVT::v16i8)
BifID = Intrinsic::ppc_altivec_vmaxsb;
-
+
return BuildIntrinsicOp(BifID, X, Y, DAG, dl, VT);
}
+// Custom lowering for fpext vf32 to v2f64
+SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
+
+ assert(Op.getOpcode() == ISD::FP_EXTEND &&
+ "Should only be called for ISD::FP_EXTEND");
+
+ // We only want to custom lower an extend from v2f32 to v2f64.
+ if (Op.getValueType() != MVT::v2f64 ||
+ Op.getOperand(0).getValueType() != MVT::v2f32)
+ return SDValue();
+
+ SDLoc dl(Op);
+ SDValue Op0 = Op.getOperand(0);
+
+ switch (Op0.getOpcode()) {
+ default:
+ return SDValue();
+ case ISD::FADD:
+ case ISD::FMUL:
+ case ISD::FSUB: {
+ SDValue NewLoad[2];
+ for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) {
+ // Ensure both input are loads.
+ SDValue LdOp = Op0.getOperand(i);
+ if (LdOp.getOpcode() != ISD::LOAD)
+ return SDValue();
+ // Generate new load node.
+ LoadSDNode *LD = cast<LoadSDNode>(LdOp);
+ SDValue LoadOps[] = { LD->getChain(), LD->getBasePtr() };
+ NewLoad[i] =
+ DAG.getMemIntrinsicNode(PPCISD::LD_VSX_LH, dl,
+ DAG.getVTList(MVT::v4f32, MVT::Other),
+ LoadOps, LD->getMemoryVT(),
+ LD->getMemOperand());
+ }
+ SDValue NewOp = DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32,
+ NewLoad[0], NewLoad[1],
+ Op0.getNode()->getFlags());
+ return DAG.getNode(PPCISD::FP_EXTEND_LH, dl, MVT::v2f64, NewOp);
+ }
+ case ISD::LOAD: {
+ LoadSDNode *LD = cast<LoadSDNode>(Op0);
+ SDValue LoadOps[] = { LD->getChain(), LD->getBasePtr() };
+ SDValue NewLd =
+ DAG.getMemIntrinsicNode(PPCISD::LD_VSX_LH, dl,
+ DAG.getVTList(MVT::v4f32, MVT::Other),
+ LoadOps, LD->getMemoryVT(), LD->getMemOperand());
+ return DAG.getNode(PPCISD::FP_EXTEND_LH, dl, MVT::v2f64, NewLd);
+ }
+ }
+ llvm_unreachable("ERROR:Should return for all cases within swtich.");
+}
+
/// LowerOperation - Provide custom lowering hooks for some operations.
///
SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
@@ -9661,6 +9969,7 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
case ISD::MUL: return LowerMUL(Op, DAG);
case ISD::ABS: return LowerABS(Op, DAG);
+ case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
// For counter-based loop handling.
case ISD::INTRINSIC_W_CHAIN: return SDValue();
@@ -9701,7 +10010,7 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
}
case ISD::INTRINSIC_W_CHAIN: {
if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() !=
- Intrinsic::ppc_is_decremented_ctr_nonzero)
+ Intrinsic::loop_decrement)
break;
assert(N->getValueType(0) == MVT::i1 &&
@@ -9737,6 +10046,14 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
return;
Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
return;
+ case ISD::TRUNCATE: {
+ EVT TrgVT = N->getValueType(0);
+ if (TrgVT.isVector() &&
+ isOperationCustom(N->getOpcode(), TrgVT) &&
+ N->getOperand(0).getValueType().getSizeInBits() <= 128)
+ Results.push_back(LowerTRUNCATEVector(SDValue(N, 0), DAG));
+ return;
+ }
case ISD::BITCAST:
// Don't handle bitcast here.
return;
@@ -9822,10 +10139,10 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB,
MachineFunction *F = BB->getParent();
MachineFunction::iterator It = ++BB->getIterator();
- unsigned dest = MI.getOperand(0).getReg();
- unsigned ptrA = MI.getOperand(1).getReg();
- unsigned ptrB = MI.getOperand(2).getReg();
- unsigned incr = MI.getOperand(3).getReg();
+ Register dest = MI.getOperand(0).getReg();
+ Register ptrA = MI.getOperand(1).getReg();
+ Register ptrB = MI.getOperand(2).getReg();
+ Register incr = MI.getOperand(3).getReg();
DebugLoc dl = MI.getDebugLoc();
MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
@@ -9841,7 +10158,7 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB,
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
MachineRegisterInfo &RegInfo = F->getRegInfo();
- unsigned TmpReg = (!BinOpcode) ? incr :
+ Register TmpReg = (!BinOpcode) ? incr :
RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
: &PPC::GPRCRegClass);
@@ -9949,20 +10266,20 @@ MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary(
is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
- unsigned PtrReg = RegInfo.createVirtualRegister(RC);
- unsigned Shift1Reg = RegInfo.createVirtualRegister(GPRC);
- unsigned ShiftReg =
+ Register PtrReg = RegInfo.createVirtualRegister(RC);
+ Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
+ Register ShiftReg =
isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
- unsigned Incr2Reg = RegInfo.createVirtualRegister(GPRC);
- unsigned MaskReg = RegInfo.createVirtualRegister(GPRC);
- unsigned Mask2Reg = RegInfo.createVirtualRegister(GPRC);
- unsigned Mask3Reg = RegInfo.createVirtualRegister(GPRC);
- unsigned Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
- unsigned Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
- unsigned Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
- unsigned TmpDestReg = RegInfo.createVirtualRegister(GPRC);
- unsigned Ptr1Reg;
- unsigned TmpReg =
+ Register Incr2Reg = RegInfo.createVirtualRegister(GPRC);
+ Register MaskReg = RegInfo.createVirtualRegister(GPRC);
+ Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
+ Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
+ Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
+ Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
+ Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
+ Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
+ Register Ptr1Reg;
+ Register TmpReg =
(!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);
// thisMBB:
@@ -10764,23 +11081,23 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
- unsigned PtrReg = RegInfo.createVirtualRegister(RC);
- unsigned Shift1Reg = RegInfo.createVirtualRegister(GPRC);
- unsigned ShiftReg =
+ Register PtrReg = RegInfo.createVirtualRegister(RC);
+ Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
+ Register ShiftReg =
isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
- unsigned NewVal2Reg = RegInfo.createVirtualRegister(GPRC);
- unsigned NewVal3Reg = RegInfo.createVirtualRegister(GPRC);
- unsigned OldVal2Reg = RegInfo.createVirtualRegister(GPRC);
- unsigned OldVal3Reg = RegInfo.createVirtualRegister(GPRC);
- unsigned MaskReg = RegInfo.createVirtualRegister(GPRC);
- unsigned Mask2Reg = RegInfo.createVirtualRegister(GPRC);
- unsigned Mask3Reg = RegInfo.createVirtualRegister(GPRC);
- unsigned Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
- unsigned Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
- unsigned TmpDestReg = RegInfo.createVirtualRegister(GPRC);
- unsigned Ptr1Reg;
- unsigned TmpReg = RegInfo.createVirtualRegister(GPRC);
- unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
+ Register NewVal2Reg = RegInfo.createVirtualRegister(GPRC);
+ Register NewVal3Reg = RegInfo.createVirtualRegister(GPRC);
+ Register OldVal2Reg = RegInfo.createVirtualRegister(GPRC);
+ Register OldVal3Reg = RegInfo.createVirtualRegister(GPRC);
+ Register MaskReg = RegInfo.createVirtualRegister(GPRC);
+ Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
+ Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
+ Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
+ Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
+ Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
+ Register Ptr1Reg;
+ Register TmpReg = RegInfo.createVirtualRegister(GPRC);
+ Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
// thisMBB:
// ...
// fallthrough --> loopMBB
@@ -10968,7 +11285,147 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MachineRegisterInfo &RegInfo = F->getRegInfo();
unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
- return BB;
+ BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
+ MI.getOperand(0).getReg())
+ .addReg(CRReg);
+ } else if (MI.getOpcode() == PPC::TBEGIN_RET) {
+ DebugLoc Dl = MI.getDebugLoc();
+ unsigned Imm = MI.getOperand(1).getImm();
+ BuildMI(*BB, MI, Dl, TII->get(PPC::TBEGIN)).addImm(Imm);
+ BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
+ MI.getOperand(0).getReg())
+ .addReg(PPC::CR0EQ);
+ } else if (MI.getOpcode() == PPC::SETRNDi) {
+ DebugLoc dl = MI.getDebugLoc();
+ unsigned OldFPSCRReg = MI.getOperand(0).getReg();
+
+ // Save FPSCR value.
+ BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
+
+ // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
+ // the following settings:
+ // 00 Round to nearest
+ // 01 Round to 0
+ // 10 Round to +inf
+ // 11 Round to -inf
+
+ // When the operand is immediate, using the two least significant bits of
+ // the immediate to set the bits 62:63 of FPSCR.
+ unsigned Mode = MI.getOperand(1).getImm();
+ BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0))
+ .addImm(31);
+
+ BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0))
+ .addImm(30);
+ } else if (MI.getOpcode() == PPC::SETRND) {
+ DebugLoc dl = MI.getDebugLoc();
+
+ // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
+ // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
+ // If the target doesn't have DirectMove, we should use stack to do the
+ // conversion, because the target doesn't have the instructions like mtvsrd
+ // or mfvsrd to do this conversion directly.
+ auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {
+ if (Subtarget.hasDirectMove()) {
+ BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg)
+ .addReg(SrcReg);
+ } else {
+ // Use stack to do the register copy.
+ unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
+ MachineRegisterInfo &RegInfo = F->getRegInfo();
+ const TargetRegisterClass *RC = RegInfo.getRegClass(SrcReg);
+ if (RC == &PPC::F8RCRegClass) {
+ // Copy register from F8RCRegClass to G8RCRegclass.
+ assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
+ "Unsupported RegClass.");
+
+ StoreOp = PPC::STFD;
+ LoadOp = PPC::LD;
+ } else {
+ // Copy register from G8RCRegClass to F8RCRegclass.
+ assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
+ (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
+ "Unsupported RegClass.");
+ }
+
+ MachineFrameInfo &MFI = F->getFrameInfo();
+ int FrameIdx = MFI.CreateStackObject(8, 8, false);
+
+ MachineMemOperand *MMOStore = F->getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
+ MachineMemOperand::MOStore, MFI.getObjectSize(FrameIdx),
+ MFI.getObjectAlignment(FrameIdx));
+
+ // Store the SrcReg into the stack.
+ BuildMI(*BB, MI, dl, TII->get(StoreOp))
+ .addReg(SrcReg)
+ .addImm(0)
+ .addFrameIndex(FrameIdx)
+ .addMemOperand(MMOStore);
+
+ MachineMemOperand *MMOLoad = F->getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
+ MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIdx),
+ MFI.getObjectAlignment(FrameIdx));
+
+ // Load from the stack where SrcReg is stored, and save to DestReg,
+ // so we have done the RegClass conversion from RegClass::SrcReg to
+ // RegClass::DestReg.
+ BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg)
+ .addImm(0)
+ .addFrameIndex(FrameIdx)
+ .addMemOperand(MMOLoad);
+ }
+ };
+
+ unsigned OldFPSCRReg = MI.getOperand(0).getReg();
+
+ // Save FPSCR value.
+ BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
+
+ // When the operand is gprc register, use two least significant bits of the
+ // register and mtfsf instruction to set the bits 62:63 of FPSCR.
+ //
+ // copy OldFPSCRTmpReg, OldFPSCRReg
+ // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
+ // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
+ // copy NewFPSCRReg, NewFPSCRTmpReg
+ // mtfsf 255, NewFPSCRReg
+ MachineOperand SrcOp = MI.getOperand(1);
+ MachineRegisterInfo &RegInfo = F->getRegInfo();
+ unsigned OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
+
+ copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);
+
+ unsigned ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
+ unsigned ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
+
+ // The first operand of INSERT_SUBREG should be a register which has
+ // subregisters, we only care about its RegClass, so we should use an
+ // IMPLICIT_DEF register.
+ BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg);
+ BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg)
+ .addReg(ImDefReg)
+ .add(SrcOp)
+ .addImm(1);
+
+ unsigned NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
+ BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg)
+ .addReg(OldFPSCRTmpReg)
+ .addReg(ExtSrcReg)
+ .addImm(0)
+ .addImm(62);
+
+ unsigned NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
+ copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);
+
+ // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
+ // bits of FPSCR.
+ BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF))
+ .addImm(255)
+ .addReg(NewFPSCRReg)
+ .addImm(0)
+ .addImm(0);
} else {
llvm_unreachable("Unexpected instr type to insert");
}
@@ -11006,7 +11463,9 @@ SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
if (RefinementSteps == ReciprocalEstimate::Unspecified)
RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
- UseOneConstNR = true;
+ // The Newton-Raphson computation with a single constant does not provide
+ // enough accuracy on some CPUs.
+ UseOneConstNR = !Subtarget.needsTwoConstNR();
return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
}
return SDValue();
@@ -12062,9 +12521,14 @@ static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG) {
"Should be called with a BUILD_VECTOR node");
SDLoc dl(N);
+
+ // Return early for non byte-sized type, as they can't be consecutive.
+ if (!N->getValueType(0).getVectorElementType().isByteSized())
+ return SDValue();
+
bool InputsAreConsecutiveLoads = true;
bool InputsAreReverseConsecutive = true;
- unsigned ElemSize = N->getValueType(0).getScalarSizeInBits() / 8;
+ unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize();
SDValue FirstInput = N->getOperand(0);
bool IsRoundOfExtLoad = false;
@@ -12332,9 +12796,8 @@ SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
if (!Ext1Op || !Ext2Op)
return SDValue();
- if (Ext1.getValueType() != MVT::i32 ||
- Ext2.getValueType() != MVT::i32)
- if (Ext1.getOperand(0) != Ext2.getOperand(0))
+ if (Ext1.getOperand(0).getValueType() != MVT::v4i32 ||
+ Ext1.getOperand(0) != Ext2.getOperand(0))
return SDValue();
int FirstElem = Ext1Op->getZExtValue();
@@ -12664,6 +13127,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
return combineSRA(N, DCI);
case ISD::SRL:
return combineSRL(N, DCI);
+ case ISD::MUL:
+ return combineMUL(N, DCI);
case PPCISD::SHL:
if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
return N->getOperand(0);
@@ -13246,7 +13711,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() ==
- Intrinsic::ppc_is_decremented_ctr_nonzero) {
+ Intrinsic::loop_decrement) {
// We now need to make the intrinsic dead (it cannot be instruction
// selected).
@@ -13272,14 +13737,14 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
if (LHS.getOpcode() == ISD::AND &&
LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN &&
cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() ==
- Intrinsic::ppc_is_decremented_ctr_nonzero &&
+ Intrinsic::loop_decrement &&
isa<ConstantSDNode>(LHS.getOperand(1)) &&
!isNullConstant(LHS.getOperand(1)))
LHS = LHS.getOperand(0);
if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() ==
- Intrinsic::ppc_is_decremented_ctr_nonzero &&
+ Intrinsic::loop_decrement &&
isa<ConstantSDNode>(RHS)) {
assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&
"Counter decrement comparison is not EQ or NE");
@@ -13355,9 +13820,9 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
}
case ISD::BUILD_VECTOR:
return DAGCombineBuildVector(N, DCI);
- case ISD::ABS:
+ case ISD::ABS:
return combineABS(N, DCI);
- case ISD::VSELECT:
+ case ISD::VSELECT:
return combineVSelect(N, DCI);
}
@@ -13453,6 +13918,15 @@ unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
if (!ML)
break;
+ if (!DisableInnermostLoopAlign32) {
+ // If the nested loop is an innermost loop, prefer to a 32-byte alignment,
+ // so that we can decrease cache misses and branch-prediction misses.
+ // Actual alignment of the loop will depend on the hotness check and other
+ // logic in alignBlocks.
+ if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty())
+ return 5;
+ }
+
const PPCInstrInfo *TII = Subtarget.getInstrInfo();
// For small loops (between 5 and 8 instructions), align to a 32-byte
@@ -13502,7 +13976,7 @@ PPCTargetLowering::getConstraintType(StringRef Constraint) const {
return C_RegisterClass;
} else if (Constraint == "wa" || Constraint == "wd" ||
Constraint == "wf" || Constraint == "ws" ||
- Constraint == "wi") {
+ Constraint == "wi" || Constraint == "ww") {
return C_RegisterClass; // VSX registers.
}
return TargetLowering::getConstraintType(Constraint);
@@ -13530,10 +14004,12 @@ PPCTargetLowering::getSingleConstraintMatchWeight(
StringRef(constraint) == "wf") &&
type->isVectorTy())
return CW_Register;
- else if (StringRef(constraint) == "ws" && type->isDoubleTy())
- return CW_Register;
else if (StringRef(constraint) == "wi" && type->isIntegerTy(64))
return CW_Register; // just hold 64-bit integers data.
+ else if (StringRef(constraint) == "ws" && type->isDoubleTy())
+ return CW_Register;
+ else if (StringRef(constraint) == "ww" && type->isFloatTy())
+ return CW_Register;
switch (*constraint) {
default:
@@ -13619,7 +14095,7 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
Constraint == "wf" || Constraint == "wi") &&
Subtarget.hasVSX()) {
return std::make_pair(0U, &PPC::VSRCRegClass);
- } else if (Constraint == "ws" && Subtarget.hasVSX()) {
+ } else if ((Constraint == "ws" || Constraint == "ww") && Subtarget.hasVSX()) {
if (VT == MVT::f32 && Subtarget.hasP8Vector())
return std::make_pair(0U, &PPC::VSSRCRegClass);
else
@@ -13865,7 +14341,7 @@ bool PPCTargetLowering::isAccessedAsGotIndirect(SDValue GA) const {
if (CModel == CodeModel::Small || CModel == CodeModel::Large)
return true;
- // JumpTable and BlockAddress are accessed as got-indirect.
+ // JumpTable and BlockAddress are accessed as got-indirect.
if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA))
return true;
@@ -14082,18 +14558,16 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
/// source is constant so it does not need to be loaded.
/// It returns EVT::Other if the type should be determined using generic
/// target-independent logic.
-EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
- unsigned DstAlign, unsigned SrcAlign,
- bool IsMemset, bool ZeroMemset,
- bool MemcpyStrSrc,
- MachineFunction &MF) const {
+EVT PPCTargetLowering::getOptimalMemOpType(
+ uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset,
+ bool ZeroMemset, bool MemcpyStrSrc,
+ const AttributeList &FuncAttributes) const {
if (getTargetMachine().getOptLevel() != CodeGenOpt::None) {
- const Function &F = MF.getFunction();
// When expanding a memset, require at least two QPX instructions to cover
// the cost of loading the value to be stored from the constant pool.
if (Subtarget.hasQPX() && Size >= 32 && (!IsMemset || Size >= 64) &&
(!SrcAlign || SrcAlign >= 32) && (!DstAlign || DstAlign >= 32) &&
- !F.hasFnAttribute(Attribute::NoImplicitFloat)) {
+ !FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)) {
return MVT::v4f64;
}
@@ -14178,6 +14652,7 @@ bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const {
bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
unsigned,
unsigned,
+ MachineMemOperand::Flags,
bool *Fast) const {
if (DisablePPCUnaligned)
return false;
@@ -14324,7 +14799,7 @@ void PPCTargetLowering::insertCopiesSplitCSR(
BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
.addReg(*I);
- // Insert the copy-back instructions right before the terminator
+ // Insert the copy-back instructions right before the terminator.
for (auto *Exit : Exits)
BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
TII->get(TargetOpcode::COPY), *I)
@@ -14345,7 +14820,8 @@ void PPCTargetLowering::insertSSPDeclarations(Module &M) const {
return TargetLowering::insertSSPDeclarations(M);
}
-bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
+ bool ForCodeSize) const {
if (!VT.isSimple() || !Subtarget.hasVSX())
return false;
@@ -14585,6 +15061,89 @@ SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
return SDValue();
}
+SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+
+ ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1));
+ if (!ConstOpOrElement)
+ return SDValue();
+
+ // An imul is usually smaller than the alternative sequence for legal type.
+ if (DAG.getMachineFunction().getFunction().hasMinSize() &&
+ isOperationLegal(ISD::MUL, N->getValueType(0)))
+ return SDValue();
+
+ auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
+ switch (this->Subtarget.getDarwinDirective()) {
+ default:
+ // TODO: enhance the condition for subtarget before pwr8
+ return false;
+ case PPC::DIR_PWR8:
+ // type mul add shl
+ // scalar 4 1 1
+ // vector 7 2 2
+ return true;
+ case PPC::DIR_PWR9:
+ // type mul add shl
+ // scalar 5 2 2
+ // vector 7 2 2
+
+ // The cycle RATIO of related operations are showed as a table above.
+ // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
+ // scalar and vector type. For 2 instrs patterns, add/sub + shl
+ // are 4, it is always profitable; but for 3 instrs patterns
+ // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
+ // So we should only do it for vector type.
+ return IsAddOne && IsNeg ? VT.isVector() : true;
+ }
+ };
+
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+
+ const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
+ bool IsNeg = MulAmt.isNegative();
+ APInt MulAmtAbs = MulAmt.abs();
+
+ if ((MulAmtAbs - 1).isPowerOf2()) {
+ // (mul x, 2^N + 1) => (add (shl x, N), x)
+ // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
+
+ if (!IsProfitable(IsNeg, true, VT))
+ return SDValue();
+
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 =
+ DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+ DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT));
+ SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
+
+ if (!IsNeg)
+ return Res;
+
+ return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
+ } else if ((MulAmtAbs + 1).isPowerOf2()) {
+ // (mul x, 2^N - 1) => (sub (shl x, N), x)
+ // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
+
+ if (!IsProfitable(IsNeg, false, VT))
+ return SDValue();
+
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 =
+ DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+ DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT));
+
+ if (!IsNeg)
+ return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0);
+ else
+ return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
+
+ } else {
+ return SDValue();
+ }
+}
+
bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
// Only duplicate to increase tail-calls for the 64bit SysV ABIs.
if (!Subtarget.isSVR4ABI() || !Subtarget.isPPC64())
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 30acd60eba6f..97422c6eda36 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -1,9 +1,8 @@
//===-- PPCISelLowering.h - PPC32 DAG Lowering Interface --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -15,7 +14,6 @@
#ifndef LLVM_LIB_TARGET_POWERPC_PPCISELLOWERING_H
#define LLVM_LIB_TARGET_POWERPC_PPCISELLOWERING_H
-#include "PPC.h"
#include "PPCInstrInfo.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -41,7 +39,7 @@ namespace llvm {
// the enum. The order of elements in this enum matters!
// Values that are added after this entry:
// STBRX = ISD::FIRST_TARGET_MEMORY_OPCODE
- // are considerd memory opcodes and are treated differently than entries
+ // are considered memory opcodes and are treated differently than entries
// that come before it. For example, ADD or MUL should be placed before
// the ISD::FIRST_TARGET_MEMORY_OPCODE while a LOAD or STORE should come
// after it.
@@ -161,7 +159,7 @@ namespace llvm {
/// CALL - A direct function call.
/// CALL_NOP is a call with the special NOP which follows 64-bit
- /// SVR4 calls.
+ /// SVR4 calls and 32-bit/64-bit AIX calls.
CALL, CALL_NOP,
/// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a
@@ -193,9 +191,18 @@ namespace llvm {
/// Direct move from a GPR to a VSX register (zero)
MTVSRZ,
- /// Direct move of 2 consective GPR to a VSX register.
+ /// Direct move of 2 consecutive GPR to a VSX register.
BUILD_FP128,
+ /// BUILD_SPE64 and EXTRACT_SPE are analogous to BUILD_PAIR and
+ /// EXTRACT_ELEMENT but take f64 arguments instead of i64, as i64 is
+ /// unsupported for this target.
+ /// Merge 2 GPRs to a single SPE register.
+ BUILD_SPE64,
+
+ /// Extract SPE register component, second argument is high or low.
+ EXTRACT_SPE,
+
/// Extract a subvector from signed integer vector and convert to FP.
/// It is primarily used to convert a (widened) illegal integer vector
/// type to a legal floating point vector type.
@@ -265,11 +272,11 @@ namespace llvm {
CR6UNSET,
/// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by initial-exec TLS
- /// on PPC32.
+ /// for non-position independent code on PPC32.
PPC32_GOT,
/// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by general dynamic and
- /// local dynamic TLS on PPC32.
+ /// local dynamic TLS and position indendepent code on PPC32.
PPC32_PICGOT,
/// G8RC = ADDIS_GOT_TPREL_HA %x2, Symbol - Used by the initial-exec
@@ -405,6 +412,9 @@ namespace llvm {
/// representation.
QBFLT,
+ /// Custom extend v4f32 to v2f64.
+ FP_EXTEND_LH,
+
/// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a
/// byte-swapping store instruction. It byte-swaps the low "Type" bits of
/// the GPRC input, then stores it through Ptr. Type can be either i16 or
@@ -446,6 +456,10 @@ namespace llvm {
/// an xxswapd.
LXVD2X,
+ /// VSRC, CHAIN = LD_VSX_LH CHAIN, Ptr - This is a floating-point load of a
+ /// v2f32 value into the lower half of a VSR register.
+ LD_VSX_LH,
+
/// CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian.
/// Maps directly to an stxvd2x instruction that will be preceded by
/// an xxswapd.
@@ -620,6 +634,8 @@ namespace llvm {
return true;
}
+ bool preferIncOfAddToSubOfNot(EVT VT) const override;
+
bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
return VT.isScalarInteger();
}
@@ -653,18 +669,27 @@ namespace llvm {
ISD::MemIndexedMode &AM,
SelectionDAG &DAG) const override;
+ /// SelectAddressEVXRegReg - Given the specified addressed, check to see if
+ /// it can be more efficiently represented as [r+imm].
+ bool SelectAddressEVXRegReg(SDValue N, SDValue &Base, SDValue &Index,
+ SelectionDAG &DAG) const;
+
/// SelectAddressRegReg - Given the specified addressed, check to see if it
- /// can be represented as an indexed [r+r] operation. Returns false if it
- /// can be more efficiently represented with [r+imm].
+ /// can be more efficiently represented as [r+imm]. If \p EncodingAlignment
+ /// is non-zero, only accept displacement which is not suitable for [r+imm].
+ /// Returns false if it can be represented by [r+imm], which are preferred.
bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index,
- SelectionDAG &DAG) const;
+ SelectionDAG &DAG,
+ unsigned EncodingAlignment = 0) const;
/// SelectAddressRegImm - Returns true if the address N can be represented
/// by a base register plus a signed 16-bit displacement [r+imm], and if it
- /// is not better represented as reg+reg. If Aligned is true, only accept
- /// displacements suitable for STD and friends, i.e. multiples of 4.
+ /// is not better represented as reg+reg. If \p EncodingAlignment is
+ /// non-zero, only accept displacements suitable for instruction encoding
+ /// requirement, i.e. multiples of 4 for DS form.
bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base,
- SelectionDAG &DAG, unsigned Alignment) const;
+ SelectionDAG &DAG,
+ unsigned EncodingAlignment) const;
/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
/// represented as an indexed [r+r] operation.
@@ -833,14 +858,14 @@ namespace llvm {
EVT
getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
- MachineFunction &MF) const override;
+ const AttributeList &FuncAttributes) const override;
/// Is unaligned memory access allowed for the given type, and is it fast
/// relative to software emulation.
- bool allowsMisalignedMemoryAccesses(EVT VT,
- unsigned AddrSpace,
- unsigned Align = 1,
- bool *Fast = nullptr) const override;
+ bool allowsMisalignedMemoryAccesses(
+ EVT VT, unsigned AddrSpace, unsigned Align = 1,
+ MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
+ bool *Fast = nullptr) const override;
/// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
/// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
@@ -888,7 +913,8 @@ namespace llvm {
bool useLoadStackGuardNode() const override;
void insertSSPDeclarations(Module &M) const override;
- bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
+ bool isFPImmLegal(const APFloat &Imm, EVT VT,
+ bool ForCodeSize) const override;
unsigned getJumpTableEncoding() const override;
bool isJumpTableRelative() const override;
@@ -898,14 +924,6 @@ namespace llvm {
unsigned JTI,
MCContext &Ctx) const override;
- unsigned getNumRegistersForCallingConv(LLVMContext &Context,
- CallingConv:: ID CC,
- EVT VT) const override;
-
- MVT getRegisterTypeForCallingConv(LLVMContext &Context,
- CallingConv:: ID CC,
- EVT VT) const override;
-
private:
struct ReuseLoadInfo {
SDValue Ptr;
@@ -953,6 +971,8 @@ namespace llvm {
SDValue LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
const SDLoc &dl) const;
+ SDValue LowerTRUNCATEVector(SDValue Op, SelectionDAG &DAG) const;
+
SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const;
SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const;
@@ -1019,6 +1039,7 @@ namespace llvm {
SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVectorLoad(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVectorStore(SDValue Op, SelectionDAG &DAG) const;
@@ -1106,6 +1127,15 @@ namespace llvm {
const SDLoc &dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals,
ImmutableCallSite CS) const;
+ SDValue LowerCall_AIX(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool isTailCall, bool isPatchPoint,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ const SDLoc &dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals,
+ ImmutableCallSite CS) const;
SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
@@ -1119,6 +1149,7 @@ namespace llvm {
SDValue combineSHL(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineSRA(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineSRL(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue combineMUL(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineADD(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineTRUNCATE(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineSetCC(SDNode *N, DAGCombinerInfo &DCI) const;
@@ -1137,8 +1168,6 @@ namespace llvm {
int &RefinementSteps) const override;
unsigned combineRepeatedFPDivisors() const override;
- CCAssignFn *useFastISelCCs(unsigned Flag) const;
-
SDValue
combineElementTruncationToVectorTruncation(SDNode *N,
DAGCombinerInfo &DCI) const;
@@ -1169,30 +1198,6 @@ namespace llvm {
} // end namespace PPC
- bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State);
-
- bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
- MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State);
-
- bool
- CC_PPC32_SVR4_Custom_SkipLastArgRegsPPCF128(unsigned &ValNo, MVT &ValVT,
- MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State);
-
- bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
- MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State);
-
bool isIntS16Immediate(SDNode *N, int16_t &Imm);
bool isIntS16Immediate(SDValue Op, int16_t &Imm);
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index 2ce6ad3293eb..d598567f8e4e 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -1,9 +1,8 @@
//===-- PPCInstr64Bit.td - The PowerPC 64-bit Support ------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -168,7 +167,7 @@ let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1,
XLForm_2_ext_and_DSForm_1<19, 528, 20, 0, 1, 58, 0, (outs),
(ins memrix:$src),
"bctrl\n\tld 2, $src", IIC_BrB,
- [(PPCbctrl_load_toc ixaddr:$src)]>,
+ [(PPCbctrl_load_toc iaddrX4:$src)]>,
Requires<[In64BitMode]>;
}
@@ -193,6 +192,12 @@ def : Pat<(PPCcall (i64 texternalsym:$dst)),
def : Pat<(PPCcall_nop (i64 texternalsym:$dst)),
(BL8_NOP texternalsym:$dst)>;
+// Calls for AIX
+def : Pat<(PPCcall (i64 mcsym:$dst)),
+ (BL8 mcsym:$dst)>;
+def : Pat<(PPCcall_nop (i64 mcsym:$dst)),
+ (BL8_NOP mcsym:$dst)>;
+
// Atomic operations
// FIXME: some of these might be used with constant operands. This will result
// in constant materialization instructions that may be redundant. We currently
@@ -383,7 +388,7 @@ def MTCTR8 : XFXForm_7_ext<31, 467, 9, (outs), (ins g8rc:$rS),
PPC970_DGroup_First, PPC970_Unit_FXU;
}
let hasSideEffects = 1, Defs = [CTR8] in {
-let Pattern = [(int_ppc_mtctr i64:$rS)] in
+let Pattern = [(int_set_loop_iterations i64:$rS)] in
def MTCTR8loop : XFXForm_7_ext<31, 467, 9, (outs), (ins g8rc:$rS),
"mtctr $rS", IIC_SprMTSPR>,
PPC970_DGroup_First, PPC970_Unit_FXU;
@@ -720,10 +725,17 @@ defm SRADI : XSForm_1rc<31, 413, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH),
"sradi", "$rA, $rS, $SH", IIC_IntRotateDI,
[(set i64:$rA, (sra i64:$rS, (i32 imm:$SH)))]>, isPPC64;
-defm EXTSWSLI : XSForm_1r<31, 445, (outs g8rc:$rA), (ins gprc:$rS, u6imm:$SH),
- "extswsli", "$rA, $rS, $SH", IIC_IntRotateDI,
- [(set i64:$rA, (PPCextswsli i32:$rS, (i32 imm:$SH)))]>,
- isPPC64, Requires<[IsISA3_0]>;
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in
+defm EXTSWSLI_32_64 : XSForm_1r<31, 445, (outs g8rc:$rA),
+ (ins gprc:$rS, u6imm:$SH),
+ "extswsli", "$rA, $rS, $SH", IIC_IntRotateDI,
+ [(set i64:$rA,
+ (PPCextswsli i32:$rS, (i32 imm:$SH)))]>,
+ isPPC64, Requires<[IsISA3_0]>;
+
+defm EXTSWSLI : XSForm_1rc<31, 445, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH),
+ "extswsli", "$rA, $rS, $SH", IIC_IntRotateDI,
+ []>, isPPC64, Requires<[IsISA3_0]>;
// For fast-isel:
let isCodeGenOnly = 1, Defs = [CARRY] in
@@ -773,13 +785,21 @@ def DIVDE : XOForm_1<31, 425, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
let Predicates = [IsISA3_0] in {
def MADDHD : VAForm_1a<48, (outs g8rc :$RT), (ins g8rc:$RA, g8rc:$RB, g8rc:$RC),
"maddhd $RT, $RA, $RB, $RC", IIC_IntMulHD, []>, isPPC64;
-def MADDHDU : VAForm_1a<49, (outs g8rc :$RT), (ins g8rc:$RA, g8rc:$RB, g8rc:$RC),
+def MADDHDU : VAForm_1a<49,
+ (outs g8rc :$RT), (ins g8rc:$RA, g8rc:$RB, g8rc:$RC),
"maddhdu $RT, $RA, $RB, $RC", IIC_IntMulHD, []>, isPPC64;
-def MADDLD : VAForm_1a<51, (outs g8rc :$RT), (ins g8rc:$RA, g8rc:$RB, g8rc:$RC),
- "maddld $RT, $RA, $RB, $RC", IIC_IntMulHD, []>, isPPC64;
+def MADDLD : VAForm_1a<51, (outs gprc :$RT), (ins gprc:$RA, gprc:$RB, gprc:$RC),
+ "maddld $RT, $RA, $RB, $RC", IIC_IntMulHD,
+ [(set i32:$RT, (add_without_simm16 (mul_without_simm16 i32:$RA, i32:$RB), i32:$RC))]>,
+ isPPC64;
def SETB : XForm_44<31, 128, (outs gprc:$RT), (ins crrc:$BFA),
"setb $RT, $BFA", IIC_IntGeneral>, isPPC64;
let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
+ def MADDLD8 : VAForm_1a<51,
+ (outs g8rc :$RT), (ins g8rc:$RA, g8rc:$RB, g8rc:$RC),
+ "maddld $RT, $RA, $RB, $RC", IIC_IntMulHD,
+ [(set i64:$RT, (add_without_simm16 (mul_without_simm16 i64:$RA, i64:$RB), i64:$RC))]>,
+ isPPC64;
def SETB8 : XForm_44<31, 128, (outs g8rc:$RT), (ins crrc:$BFA),
"setb $RT, $BFA", IIC_IntGeneral>, isPPC64;
}
@@ -911,7 +931,7 @@ def LHA8: DForm_1<42, (outs g8rc:$rD), (ins memri:$src),
def LWA : DSForm_1<58, 2, (outs g8rc:$rD), (ins memrix:$src),
"lwa $rD, $src", IIC_LdStLWA,
[(set i64:$rD,
- (aligned4sextloadi32 ixaddr:$src))]>, isPPC64,
+ (aligned4sextloadi32 iaddrX4:$src))]>, isPPC64,
PPC970_DGroup_Cracked;
let Interpretation64Bit = 1, isCodeGenOnly = 1 in
def LHAX8: XForm_1_memOp<31, 343, (outs g8rc:$rD), (ins memrr:$src),
@@ -920,7 +940,7 @@ def LHAX8: XForm_1_memOp<31, 343, (outs g8rc:$rD), (ins memrr:$src),
PPC970_DGroup_Cracked;
def LWAX : XForm_1_memOp<31, 341, (outs g8rc:$rD), (ins memrr:$src),
"lwax $rD, $src", IIC_LdStLHA,
- [(set i64:$rD, (sextloadi32 xaddr:$src))]>, isPPC64,
+ [(set i64:$rD, (sextloadi32 xaddrX4:$src))]>, isPPC64,
PPC970_DGroup_Cracked;
// For fast-isel:
let isCodeGenOnly = 1, mayLoad = 1 in {
@@ -1022,7 +1042,7 @@ def LWZUX8 : XForm_1_memOp<31, 55, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
let PPC970_Unit = 2 in {
def LD : DSForm_1<58, 0, (outs g8rc:$rD), (ins memrix:$src),
"ld $rD, $src", IIC_LdStLD,
- [(set i64:$rD, (aligned4load ixaddr:$src))]>, isPPC64;
+ [(set i64:$rD, (aligned4load iaddrX4:$src))]>, isPPC64;
// The following four definitions are selected for small code model only.
// Otherwise, we need to create two instructions to form a 32-bit offset,
// so we have a custom matcher for TOC_ENTRY in PPCDAGToDAGIsel::Select().
@@ -1045,7 +1065,7 @@ def LDtocBA: PPCEmitTimePseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
def LDX : XForm_1_memOp<31, 21, (outs g8rc:$rD), (ins memrr:$src),
"ldx $rD, $src", IIC_LdStLD,
- [(set i64:$rD, (load xaddr:$src))]>, isPPC64;
+ [(set i64:$rD, (load xaddrX4:$src))]>, isPPC64;
def LDBRX : XForm_1_memOp<31, 532, (outs g8rc:$rD), (ins memrr:$src),
"ldbrx $rD, $src", IIC_LdStLoad,
[(set i64:$rD, (PPClbrx xoaddr:$src, i64))]>, isPPC64;
@@ -1214,10 +1234,10 @@ def STWX8 : XForm_8_memOp<31, 151, (outs), (ins g8rc:$rS, memrr:$dst),
// Normal 8-byte stores.
def STD : DSForm_1<62, 0, (outs), (ins g8rc:$rS, memrix:$dst),
"std $rS, $dst", IIC_LdStSTD,
- [(aligned4store i64:$rS, ixaddr:$dst)]>, isPPC64;
+ [(aligned4store i64:$rS, iaddrX4:$dst)]>, isPPC64;
def STDX : XForm_8_memOp<31, 149, (outs), (ins g8rc:$rS, memrr:$dst),
"stdx $rS, $dst", IIC_LdStSTD,
- [(store i64:$rS, xaddr:$dst)]>, isPPC64,
+ [(store i64:$rS, xaddrX4:$dst)]>, isPPC64,
PPC970_DGroup_Cracked;
def STDBRX: XForm_8_memOp<31, 660, (outs), (ins g8rc:$rS, memrr:$dst),
"stdbrx $rS, $dst", IIC_LdStStore,
@@ -1433,11 +1453,11 @@ def : Pat<(unaligned4store i64:$rS, xoaddr:$dst),
(STDX $rS, xoaddr:$dst)>;
// 64-bits atomic loads and stores
-def : Pat<(atomic_load_64 ixaddr:$src), (LD memrix:$src)>;
-def : Pat<(atomic_load_64 xaddr:$src), (LDX memrr:$src)>;
+def : Pat<(atomic_load_64 iaddrX4:$src), (LD memrix:$src)>;
+def : Pat<(atomic_load_64 xaddrX4:$src), (LDX memrr:$src)>;
-def : Pat<(atomic_store_64 ixaddr:$ptr, i64:$val), (STD g8rc:$val, memrix:$ptr)>;
-def : Pat<(atomic_store_64 xaddr:$ptr, i64:$val), (STDX g8rc:$val, memrr:$ptr)>;
+def : Pat<(atomic_store_64 iaddrX4:$ptr, i64:$val), (STD g8rc:$val, memrix:$ptr)>;
+def : Pat<(atomic_store_64 xaddrX4:$ptr, i64:$val), (STDX g8rc:$val, memrr:$ptr)>;
let Predicates = [IsISA3_0] in {
diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td
index 69b19e45c3e9..8176c5120a83 100644
--- a/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -1,9 +1,8 @@
//===-- PPCInstrAltivec.td - The PowerPC Altivec Extension -*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -822,7 +821,9 @@ def VCMPGTSWo : VCMPo<902, "vcmpgtsw. $vD, $vA, $vB", v4i32>;
def VCMPGTUW : VCMP <646, "vcmpgtuw $vD, $vA, $vB" , v4i32>;
def VCMPGTUWo : VCMPo<646, "vcmpgtuw. $vD, $vA, $vB", v4i32>;
-let isCodeGenOnly = 1 in {
+let isCodeGenOnly = 1, isMoveImm = 1, isAsCheapAsAMove = 1,
+ isReMaterializable = 1 in {
+
def V_SET0B : VXForm_setzero<1220, (outs vrrc:$vD), (ins),
"vxor $vD, $vD, $vD", IIC_VecFP,
[(set v16i8:$vD, (v16i8 immAllZerosV))]>;
@@ -899,6 +900,32 @@ def : Pat<(v1i128 (bitconvert (v4i32 VRRC:$src))), (v1i128 VRRC:$src)>;
def : Pat<(v1i128 (bitconvert (v4f32 VRRC:$src))), (v1i128 VRRC:$src)>;
def : Pat<(v1i128 (bitconvert (v2i64 VRRC:$src))), (v1i128 VRRC:$src)>;
+// Max/Min
+def : Pat<(v16i8 (umax v16i8:$src1, v16i8:$src2)),
+ (v16i8 (VMAXUB $src1, $src2))>;
+def : Pat<(v16i8 (smax v16i8:$src1, v16i8:$src2)),
+ (v16i8 (VMAXSB $src1, $src2))>;
+def : Pat<(v8i16 (umax v8i16:$src1, v8i16:$src2)),
+ (v8i16 (VMAXUH $src1, $src2))>;
+def : Pat<(v8i16 (smax v8i16:$src1, v8i16:$src2)),
+ (v8i16 (VMAXSH $src1, $src2))>;
+def : Pat<(v4i32 (umax v4i32:$src1, v4i32:$src2)),
+ (v4i32 (VMAXUW $src1, $src2))>;
+def : Pat<(v4i32 (smax v4i32:$src1, v4i32:$src2)),
+ (v4i32 (VMAXSW $src1, $src2))>;
+def : Pat<(v16i8 (umin v16i8:$src1, v16i8:$src2)),
+ (v16i8 (VMINUB $src1, $src2))>;
+def : Pat<(v16i8 (smin v16i8:$src1, v16i8:$src2)),
+ (v16i8 (VMINSB $src1, $src2))>;
+def : Pat<(v8i16 (umin v8i16:$src1, v8i16:$src2)),
+ (v8i16 (VMINUH $src1, $src2))>;
+def : Pat<(v8i16 (smin v8i16:$src1, v8i16:$src2)),
+ (v8i16 (VMINSH $src1, $src2))>;
+def : Pat<(v4i32 (umin v4i32:$src1, v4i32:$src2)),
+ (v4i32 (VMINUW $src1, $src2))>;
+def : Pat<(v4i32 (smin v4i32:$src1, v4i32:$src2)),
+ (v4i32 (VMINSW $src1, $src2))>;
+
// Shuffles.
// Match vsldoi(x,x), vpkuwum(x,x), vpkuhum(x,x)
diff --git a/lib/Target/PowerPC/PPCInstrBuilder.h b/lib/Target/PowerPC/PPCInstrBuilder.h
index cf71b1c59869..323f7e39adf7 100644
--- a/lib/Target/PowerPC/PPCInstrBuilder.h
+++ b/lib/Target/PowerPC/PPCInstrBuilder.h
@@ -1,9 +1,8 @@
//===-- PPCInstrBuilder.h - Aides for building PPC insts --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td
index 2fe765dd99e1..a48eb1690695 100644
--- a/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/lib/Target/PowerPC/PPCInstrFormats.td
@@ -1,9 +1,8 @@
//===- PowerPCInstrFormats.td - PowerPC Instruction Formats --*- tablegen -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -38,14 +37,6 @@ class I<bits<6> opcode, dag OOL, dag IOL, string asmstr, InstrItinClass itin>
let TSFlags{2} = PPC970_Cracked;
let TSFlags{5-3} = PPC970_Unit;
- /// Indicate that the VSX instruction is to use VSX numbering/encoding.
- /// Since ISA 3.0, there are scalar instructions that use the upper
- /// half of the VSX register set only. Rather than adding further complexity
- /// to the register class set, the VSX registers just include the Altivec
- /// registers and this flag decides the numbering to be used for them.
- bits<1> UseVSXReg = 0;
- let TSFlags{6} = UseVSXReg;
-
// Indicate that this instruction is of type X-Form Load or Store
bits<1> XFormMemOp = 0;
let TSFlags{7} = XFormMemOp;
@@ -74,7 +65,6 @@ class PPC970_Unit_VALU { bits<3> PPC970_Unit = 5; }
class PPC970_Unit_VPERM { bits<3> PPC970_Unit = 6; }
class PPC970_Unit_BRU { bits<3> PPC970_Unit = 7; }
-class UseVSXReg { bits<1> UseVSXReg = 1; }
class XFormMemOp { bits<1> XFormMemOp = 1; }
// Two joined instructions; used to emit two adjacent instructions as one.
@@ -730,6 +720,7 @@ class XForm_25_memOp<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
: XForm_base_r3xo_memOp<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
}
+// [PO RT /// RB XO RC]
class XForm_26<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
@@ -1193,9 +1184,9 @@ class XX2_RD6_DCMX7_RS6<bits<6> opcode, bits<4> xo1, bits<3> xo2,
let Inst{11-15} = DCMX{4-0};
let Inst{16-20} = XB{4-0};
let Inst{21-24} = xo1;
- let Inst{25} = DCMX{5};
+ let Inst{25} = DCMX{6};
let Inst{26-28} = xo2;
- let Inst{29} = DCMX{6};
+ let Inst{29} = DCMX{5};
let Inst{30} = XB{5};
let Inst{31} = XT{5};
}
diff --git a/lib/Target/PowerPC/PPCInstrHTM.td b/lib/Target/PowerPC/PPCInstrHTM.td
index 0efe797c765d..104b57a70a2e 100644
--- a/lib/Target/PowerPC/PPCInstrHTM.td
+++ b/lib/Target/PowerPC/PPCInstrHTM.td
@@ -1,9 +1,8 @@
//===-- PPCInstrHTM.td - The PowerPC Hardware Transactional Memory -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -21,55 +20,53 @@ def HTM_get_imm : SDNodeXForm<imm, [{
}]>;
let hasSideEffects = 1 in {
-def TCHECK_RET : PPCCustomInserterPseudo<(outs crrc:$out), (ins), "#TCHECK_RET", []>;
+def TCHECK_RET : PPCCustomInserterPseudo<(outs gprc:$out), (ins), "#TCHECK_RET", []>;
+def TBEGIN_RET : PPCCustomInserterPseudo<(outs gprc:$out), (ins u1imm:$R), "#TBEGIN_RET", []>;
}
let Predicates = [HasHTM] in {
+let Defs = [CR0] in {
def TBEGIN : XForm_htm0 <31, 654,
- (outs crrc0:$ret), (ins u1imm:$R), "tbegin. $R", IIC_SprMTSPR, []>;
+ (outs), (ins u1imm:$R), "tbegin. $R", IIC_SprMTSPR, []>;
def TEND : XForm_htm1 <31, 686,
- (outs crrc0:$ret), (ins u1imm:$A), "tend. $A", IIC_SprMTSPR, []>;
+ (outs), (ins u1imm:$A), "tend. $A", IIC_SprMTSPR, []>;
def TABORT : XForm_base_r3xo <31, 910,
- (outs crrc0:$ret), (ins gprc:$A), "tabort. $A", IIC_SprMTSPR,
+ (outs), (ins gprc:$A), "tabort. $A", IIC_SprMTSPR,
[]>, isDOT {
let RST = 0;
let B = 0;
}
def TABORTWC : XForm_base_r3xo <31, 782,
- (outs crrc0:$ret), (ins u5imm:$RTS, gprc:$A, gprc:$B),
+ (outs), (ins u5imm:$RTS, gprc:$A, gprc:$B),
"tabortwc. $RTS, $A, $B", IIC_SprMTSPR, []>,
isDOT;
def TABORTWCI : XForm_base_r3xo <31, 846,
- (outs crrc0:$ret), (ins u5imm:$RTS, gprc:$A, u5imm:$B),
+ (outs), (ins u5imm:$RTS, gprc:$A, u5imm:$B),
"tabortwci. $RTS, $A, $B", IIC_SprMTSPR, []>,
isDOT;
def TABORTDC : XForm_base_r3xo <31, 814,
- (outs crrc0:$ret), (ins u5imm:$RTS, gprc:$A, gprc:$B),
+ (outs), (ins u5imm:$RTS, gprc:$A, gprc:$B),
"tabortdc. $RTS, $A, $B", IIC_SprMTSPR, []>,
isDOT;
def TABORTDCI : XForm_base_r3xo <31, 878,
- (outs crrc0:$ret), (ins u5imm:$RTS, gprc:$A, u5imm:$B),
+ (outs), (ins u5imm:$RTS, gprc:$A, u5imm:$B),
"tabortdci. $RTS, $A, $B", IIC_SprMTSPR, []>,
isDOT;
def TSR : XForm_htm2 <31, 750,
- (outs crrc0:$ret), (ins u1imm:$L), "tsr. $L", IIC_SprMTSPR, []>,
+ (outs), (ins u1imm:$L), "tsr. $L", IIC_SprMTSPR, []>,
isDOT;
-def TCHECK : XForm_htm3 <31, 718,
- (outs), (ins crrc:$BF), "tcheck $BF", IIC_SprMTSPR, []>;
-
-
def TRECLAIM : XForm_base_r3xo <31, 942,
- (outs crrc:$ret), (ins gprc:$A), "treclaim. $A",
+ (outs), (ins gprc:$A), "treclaim. $A",
IIC_SprMTSPR, []>,
isDOT {
let RST = 0;
@@ -77,13 +74,17 @@ def TRECLAIM : XForm_base_r3xo <31, 942,
}
def TRECHKPT : XForm_base_r3xo <31, 1006,
- (outs crrc:$ret), (ins), "trechkpt.", IIC_SprMTSPR, []>,
+ (outs), (ins), "trechkpt.", IIC_SprMTSPR, []>,
isDOT {
let RST = 0;
let A = 0;
let B = 0;
}
+}
+
+def TCHECK : XForm_htm3 <31, 718,
+ (outs crrc:$BF), (ins), "tcheck $BF", IIC_SprMTSPR, []>;
// Builtins
// All HTM instructions, with the exception of tcheck, set CR0 with the
@@ -94,15 +95,11 @@ def TRECHKPT : XForm_base_r3xo <31, 1006,
// tbegin builtin API which defines a return value of 1 as success.
def : Pat<(int_ppc_tbegin i32:$R),
- (XORI
- (EXTRACT_SUBREG (
- TBEGIN (HTM_get_imm imm:$R)), sub_eq),
- 1)>;
+ (XORI (TBEGIN_RET(HTM_get_imm imm:$R)), 1)>;
def : Pat<(int_ppc_tend i32:$R),
(TEND (HTM_get_imm imm:$R))>;
-
def : Pat<(int_ppc_tabort i32:$R),
(TABORT $R)>;
@@ -167,6 +164,8 @@ def : Pat<(int_ppc_tsuspend),
(TSR 0)>;
def : Pat<(i64 (int_ppc_ttest)),
- (RLDICL (i64 (COPY (TABORTWCI 0, ZERO, 0))), 36, 28)>;
+ (RLDICL (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ (TABORTWCI 0, (LI 0), 0), sub_32)),
+ 36, 28)>;
} // [HasHTM]
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index d754ce2990d2..a787bdd56b9d 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -1,9 +1,8 @@
//===-- PPCInstrInfo.cpp - PowerPC Instruction Information ----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -333,6 +332,17 @@ bool PPCInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
case PPC::ADDIStocHA:
case PPC::ADDItocL:
case PPC::LOAD_STACK_GUARD:
+ case PPC::XXLXORz:
+ case PPC::XXLXORspz:
+ case PPC::XXLXORdpz:
+ case PPC::V_SET0B:
+ case PPC::V_SET0H:
+ case PPC::V_SET0:
+ case PPC::V_SETALLONESB:
+ case PPC::V_SETALLONESH:
+ case PPC::V_SETALLONES:
+ case PPC::CRSET:
+ case PPC::CRUNSET:
return true;
}
return false;
@@ -381,9 +391,9 @@ MachineInstr *PPCInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
// Swap op1/op2
assert(((OpIdx1 == 1 && OpIdx2 == 2) || (OpIdx1 == 2 && OpIdx2 == 1)) &&
"Only the operands 1 and 2 can be swapped in RLSIMI/RLWIMIo.");
- unsigned Reg0 = MI.getOperand(0).getReg();
- unsigned Reg1 = MI.getOperand(1).getReg();
- unsigned Reg2 = MI.getOperand(2).getReg();
+ Register Reg0 = MI.getOperand(0).getReg();
+ Register Reg1 = MI.getOperand(1).getReg();
+ Register Reg2 = MI.getOperand(2).getReg();
unsigned SubReg1 = MI.getOperand(1).getSubReg();
unsigned SubReg2 = MI.getOperand(2).getSubReg();
bool Reg1IsKill = MI.getOperand(1).isKill();
@@ -411,7 +421,7 @@ MachineInstr *PPCInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
if (NewMI) {
// Create a new instruction.
- unsigned Reg0 = ChangeReg0 ? Reg2 : MI.getOperand(0).getReg();
+ Register Reg0 = ChangeReg0 ? Reg2 : MI.getOperand(0).getReg();
bool Reg0IsDead = MI.getOperand(0).isDead();
return BuildMI(MF, MI.getDebugLoc(), MI.getDesc())
.addReg(Reg0, RegState::Define | getDeadRegState(Reg0IsDead))
@@ -942,12 +952,16 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
return;
} else if (PPC::G8RCRegClass.contains(SrcReg) &&
PPC::VSFRCRegClass.contains(DestReg)) {
+ assert(Subtarget.hasDirectMove() &&
+ "Subtarget doesn't support directmove, don't know how to copy.");
BuildMI(MBB, I, DL, get(PPC::MTVSRD), DestReg).addReg(SrcReg);
NumGPRtoVSRSpill++;
getKillRegState(KillSrc);
return;
} else if (PPC::VSFRCRegClass.contains(SrcReg) &&
PPC::G8RCRegClass.contains(DestReg)) {
+ assert(Subtarget.hasDirectMove() &&
+ "Subtarget doesn't support directmove, don't know how to copy.");
BuildMI(MBB, I, DL, get(PPC::MFVSRD), DestReg).addReg(SrcReg);
getKillRegState(KillSrc);
return;
@@ -963,7 +977,6 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
return;
}
-
unsigned Opc;
if (PPC::GPRCRegClass.contains(DestReg, SrcReg))
Opc = PPC::OR;
@@ -996,6 +1009,8 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Opc = PPC::QVFMRb;
else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg))
Opc = PPC::CROR;
+ else if (PPC::SPE4RCRegClass.contains(DestReg, SrcReg))
+ Opc = PPC::OR;
else if (PPC::SPERCRegClass.contains(DestReg, SrcReg))
Opc = PPC::EVOR;
else
@@ -1066,6 +1081,10 @@ unsigned PPCInstrInfo::getStoreOpcodeForSpill(unsigned Reg,
OpcodeIndex = SOK_Float8Spill;
} else if (PPC::F4RCRegClass.contains(Reg)) {
OpcodeIndex = SOK_Float4Spill;
+ } else if (PPC::SPERCRegClass.contains(Reg)) {
+ OpcodeIndex = SOK_SPESpill;
+ } else if (PPC::SPE4RCRegClass.contains(Reg)) {
+ OpcodeIndex = SOK_SPE4Spill;
} else if (PPC::CRRCRegClass.contains(Reg)) {
OpcodeIndex = SOK_CRSpill;
} else if (PPC::CRBITRCRegClass.contains(Reg)) {
@@ -1152,6 +1171,10 @@ PPCInstrInfo::getLoadOpcodeForSpill(unsigned Reg,
OpcodeIndex = SOK_Float8Spill;
} else if (PPC::F4RCRegClass.contains(Reg)) {
OpcodeIndex = SOK_Float4Spill;
+ } else if (PPC::SPERCRegClass.contains(Reg)) {
+ OpcodeIndex = SOK_SPESpill;
+ } else if (PPC::SPE4RCRegClass.contains(Reg)) {
+ OpcodeIndex = SOK_SPE4Spill;
} else if (PPC::CRRCRegClass.contains(Reg)) {
OpcodeIndex = SOK_CRSpill;
} else if (PPC::CRBITRCRegClass.contains(Reg)) {
@@ -1632,6 +1655,7 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
if (OpC == PPC::FCMPUS || OpC == PPC::FCMPUD)
return false;
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
// The record forms set the condition register based on a signed comparison
// with zero (so says the ISA manual). This is not as straightforward as it
// seems, however, because this is always a 64-bit comparison on PPC64, even
@@ -1645,6 +1669,11 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
bool is32BitUnsignedCompare = OpC == PPC::CMPLWI || OpC == PPC::CMPLW;
bool is64BitUnsignedCompare = OpC == PPC::CMPLDI || OpC == PPC::CMPLD;
+ // Look through copies unless that gets us to a physical register.
+ unsigned ActualSrc = TRI->lookThruCopyLike(SrcReg, MRI);
+ if (TargetRegisterInfo::isVirtualRegister(ActualSrc))
+ SrcReg = ActualSrc;
+
// Get the unique definition of SrcReg.
MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
if (!MI) return false;
@@ -1745,7 +1774,6 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
return false;
PPC::Predicate Pred = (PPC::Predicate)UseMI->getOperand(0).getImm();
- PPC::Predicate NewPred = Pred;
unsigned PredCond = PPC::getPredicateCondition(Pred);
unsigned PredHint = PPC::getPredicateHint(Pred);
int16_t Immed = (int16_t)Value;
@@ -1755,25 +1783,23 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
if (Immed == -1 && PredCond == PPC::PRED_GT)
// We convert "greater than -1" into "greater than or equal to 0",
// since we are assuming signed comparison by !equalityOnly
- NewPred = PPC::getPredicate(PPC::PRED_GE, PredHint);
+ Pred = PPC::getPredicate(PPC::PRED_GE, PredHint);
else if (Immed == -1 && PredCond == PPC::PRED_LE)
// We convert "less than or equal to -1" into "less than 0".
- NewPred = PPC::getPredicate(PPC::PRED_LT, PredHint);
+ Pred = PPC::getPredicate(PPC::PRED_LT, PredHint);
else if (Immed == 1 && PredCond == PPC::PRED_LT)
// We convert "less than 1" into "less than or equal to 0".
- NewPred = PPC::getPredicate(PPC::PRED_LE, PredHint);
+ Pred = PPC::getPredicate(PPC::PRED_LE, PredHint);
else if (Immed == 1 && PredCond == PPC::PRED_GE)
// We convert "greater than or equal to 1" into "greater than 0".
- NewPred = PPC::getPredicate(PPC::PRED_GT, PredHint);
+ Pred = PPC::getPredicate(PPC::PRED_GT, PredHint);
else
return false;
- PredsToUpdate.push_back(std::make_pair(&(UseMI->getOperand(0)),
- NewPred));
+ PredsToUpdate.push_back(std::make_pair(&(UseMI->getOperand(0)), Pred));
}
// Search for Sub.
- const TargetRegisterInfo *TRI = &getRegisterInfo();
--I;
// Get ready to iterate backward from CmpInstr.
@@ -1992,7 +2018,7 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
unsigned PPCInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
unsigned Opcode = MI.getOpcode();
- if (Opcode == PPC::INLINEASM) {
+ if (Opcode == PPC::INLINEASM || Opcode == PPC::INLINEASM_BR) {
const MachineFunction *MF = MI.getParent()->getParent();
const char *AsmStr = MI.getOperand(0).getSymbolName();
return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
@@ -2358,13 +2384,6 @@ MachineInstr *PPCInstrInfo::getForwardingDefMI(
MachineBasicBlock::reverse_iterator E = MI.getParent()->rend(), It = MI;
It++;
unsigned Reg = MI.getOperand(i).getReg();
- // MachineInstr::readsRegister only returns true if the machine
- // instruction reads the exact register or its super-register. It
- // does not consider uses of sub-registers which seems like strange
- // behaviour. Nonetheless, if we end up with a 64-bit register here,
- // get the corresponding 32-bit register to check.
- if (PPC::G8RCRegClass.contains(Reg))
- Reg = Reg - PPC::X0 + PPC::R0;
// Is this register defined by some form of add-immediate (including
// load-immediate) within this basic block?
@@ -2381,7 +2400,7 @@ MachineInstr *PPCInstrInfo::getForwardingDefMI(
return &*It;
}
break;
- } else if (It->readsRegister(Reg, &getRegisterInfo()))
+ } else if (It->readsRegister(Reg, &getRegisterInfo()))
// If we see another use of this reg between the def and the MI,
// we want to flat it so the def isn't deleted.
SeenIntermediateUse = true;
@@ -2424,6 +2443,83 @@ const unsigned *PPCInstrInfo::getLoadOpcodesForSpillArray() const {
return OpcodesForSpill[(Subtarget.hasP9Vector()) ? 1 : 0];
}
+void PPCInstrInfo::fixupIsDeadOrKill(MachineInstr &StartMI, MachineInstr &EndMI,
+ unsigned RegNo) const {
+ const MachineRegisterInfo &MRI =
+ StartMI.getParent()->getParent()->getRegInfo();
+ if (MRI.isSSA())
+ return;
+
+ // Instructions between [StartMI, EndMI] should be in same basic block.
+ assert((StartMI.getParent() == EndMI.getParent()) &&
+ "Instructions are not in same basic block");
+
+ bool IsKillSet = false;
+
+ auto clearOperandKillInfo = [=] (MachineInstr &MI, unsigned Index) {
+ MachineOperand &MO = MI.getOperand(Index);
+ if (MO.isReg() && MO.isUse() && MO.isKill() &&
+ getRegisterInfo().regsOverlap(MO.getReg(), RegNo))
+ MO.setIsKill(false);
+ };
+
+ // Set killed flag for EndMI.
+ // No need to do anything if EndMI defines RegNo.
+ int UseIndex =
+ EndMI.findRegisterUseOperandIdx(RegNo, false, &getRegisterInfo());
+ if (UseIndex != -1) {
+ EndMI.getOperand(UseIndex).setIsKill(true);
+ IsKillSet = true;
+ // Clear killed flag for other EndMI operands related to RegNo. In some
+ // upexpected cases, killed may be set multiple times for same register
+ // operand in same MI.
+ for (int i = 0, e = EndMI.getNumOperands(); i != e; ++i)
+ if (i != UseIndex)
+ clearOperandKillInfo(EndMI, i);
+ }
+
+ // Walking the inst in reverse order (EndMI -> StartMI].
+ MachineBasicBlock::reverse_iterator It = EndMI;
+ MachineBasicBlock::reverse_iterator E = EndMI.getParent()->rend();
+ // EndMI has been handled above, skip it here.
+ It++;
+ MachineOperand *MO = nullptr;
+ for (; It != E; ++It) {
+ // Skip insturctions which could not be a def/use of RegNo.
+ if (It->isDebugInstr() || It->isPosition())
+ continue;
+
+ // Clear killed flag for all It operands related to RegNo. In some
+ // upexpected cases, killed may be set multiple times for same register
+ // operand in same MI.
+ for (int i = 0, e = It->getNumOperands(); i != e; ++i)
+ clearOperandKillInfo(*It, i);
+
+ // If killed is not set, set killed for its last use or set dead for its def
+ // if no use found.
+ if (!IsKillSet) {
+ if ((MO = It->findRegisterUseOperand(RegNo, false, &getRegisterInfo()))) {
+ // Use found, set it killed.
+ IsKillSet = true;
+ MO->setIsKill(true);
+ continue;
+ } else if ((MO = It->findRegisterDefOperand(RegNo, false, true,
+ &getRegisterInfo()))) {
+ // No use found, set dead for its def.
+ assert(&*It == &StartMI && "No new def between StartMI and EndMI.");
+ MO->setIsDead(true);
+ break;
+ }
+ }
+
+ if ((&*It) == &StartMI)
+ break;
+ }
+ // Ensure RegMo liveness is killed after EndMI.
+ assert((IsKillSet || (MO && MO->isDead())) &&
+ "RegNo should be killed or dead");
+}
+
// If this instruction has an immediate form and one of its operands is a
// result of a load-immediate or an add-immediate, convert it to
// the immediate form if the constant is in range.
@@ -2440,8 +2536,9 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
return false;
assert(ForwardingOperand < MI.getNumOperands() &&
"The forwarding operand needs to be valid at this point");
- bool KillFwdDefMI = !SeenIntermediateUse &&
- MI.getOperand(ForwardingOperand).isKill();
+ bool IsForwardingOperandKilled = MI.getOperand(ForwardingOperand).isKill();
+ bool KillFwdDefMI = !SeenIntermediateUse && IsForwardingOperandKilled;
+ unsigned ForwardingOperandReg = MI.getOperand(ForwardingOperand).getReg();
if (KilledDef && KillFwdDefMI)
*KilledDef = DefMI;
@@ -2450,8 +2547,9 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
// If this is a reg+reg instruction that has a reg+imm form,
// and one of the operands is produced by an add-immediate,
// try to convert it.
- if (HasImmForm && transformToImmFormFedByAdd(MI, III, ForwardingOperand,
- *DefMI, KillFwdDefMI))
+ if (HasImmForm &&
+ transformToImmFormFedByAdd(MI, III, ForwardingOperand, *DefMI,
+ KillFwdDefMI))
return true;
if ((DefMI->getOpcode() != PPC::LI && DefMI->getOpcode() != PPC::LI8) ||
@@ -2466,7 +2564,7 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
// If this is a reg+reg instruction that has a reg+imm form,
// and one of the operands is produced by LI, convert it now.
if (HasImmForm)
- return transformToImmFormFedByLI(MI, III, ForwardingOperand, SExtImm);
+ return transformToImmFormFedByLI(MI, III, ForwardingOperand, *DefMI, SExtImm);
bool ReplaceWithLI = false;
bool Is64BitLI = false;
@@ -2486,6 +2584,8 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
case PPC::CMPLDI: {
// Doing this post-RA would require dataflow analysis to reliably find uses
// of the CR register set by the compare.
+ // No need to fixup killed/dead flag since this transformation is only valid
+ // before RA.
if (PostRA)
return false;
// If a compare-immediate is fed by an immediate and is itself an input of
@@ -2662,6 +2762,14 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
if (KilledDef && SetCR)
*KilledDef = nullptr;
replaceInstrWithLI(MI, LII);
+
+ // Fixup killed/dead flag after transformation.
+ // Pattern:
+ // ForwardingOperandReg = LI imm1
+ // y = op2 imm2, ForwardingOperandReg(killed)
+ if (IsForwardingOperandKilled)
+ fixupIsDeadOrKill(*DefMI, MI, ForwardingOperandReg);
+
LLVM_DEBUG(dbgs() << "With:\n");
LLVM_DEBUG(MI.dump());
return true;
@@ -2669,10 +2777,6 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
return false;
}
-static bool isVFReg(unsigned Reg) {
- return PPC::VFRCRegClass.contains(Reg);
-}
-
bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
ImmInstrInfo &III, bool PostRA) const {
unsigned Opc = MI.getOpcode();
@@ -3007,7 +3111,7 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
break;
case PPC::LXSSPX:
if (PostRA) {
- if (isVFReg(MI.getOperand(0).getReg()))
+ if (isVFRegister(MI.getOperand(0).getReg()))
III.ImmOpcode = PPC::LXSSP;
else {
III.ImmOpcode = PPC::LFS;
@@ -3021,7 +3125,7 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
break;
case PPC::LXSDX:
if (PostRA) {
- if (isVFReg(MI.getOperand(0).getReg()))
+ if (isVFRegister(MI.getOperand(0).getReg()))
III.ImmOpcode = PPC::LXSD;
else {
III.ImmOpcode = PPC::LFD;
@@ -3039,7 +3143,7 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
break;
case PPC::STXSSPX:
if (PostRA) {
- if (isVFReg(MI.getOperand(0).getReg()))
+ if (isVFRegister(MI.getOperand(0).getReg()))
III.ImmOpcode = PPC::STXSSP;
else {
III.ImmOpcode = PPC::STFS;
@@ -3053,7 +3157,7 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
break;
case PPC::STXSDX:
if (PostRA) {
- if (isVFReg(MI.getOperand(0).getReg()))
+ if (isVFRegister(MI.getOperand(0).getReg()))
III.ImmOpcode = PPC::STXSD;
else {
III.ImmOpcode = PPC::STFD;
@@ -3110,7 +3214,7 @@ static void swapMIOperands(MachineInstr &MI, unsigned Op1, unsigned Op2) {
}
}
-// Check if the 'MI' that has the index OpNoForForwarding
+// Check if the 'MI' that has the index OpNoForForwarding
// meets the requirement described in the ImmInstrInfo.
bool PPCInstrInfo::isUseMIElgibleForForwarding(MachineInstr &MI,
const ImmInstrInfo &III,
@@ -3156,7 +3260,7 @@ bool PPCInstrInfo::isDefMIElgibleForForwarding(MachineInstr &DefMI,
MachineOperand *&RegMO) const {
unsigned Opc = DefMI.getOpcode();
if (Opc != PPC::ADDItocL && Opc != PPC::ADDI && Opc != PPC::ADDI8)
- return false;
+ return false;
assert(DefMI.getNumOperands() >= 3 &&
"Add inst must have at least three operands");
@@ -3169,11 +3273,10 @@ bool PPCInstrInfo::isDefMIElgibleForForwarding(MachineInstr &DefMI,
return isAnImmediateOperand(*ImmMO);
}
-bool PPCInstrInfo::isRegElgibleForForwarding(const MachineOperand &RegMO,
- const MachineInstr &DefMI,
- const MachineInstr &MI,
- bool KillDefMI
- ) const {
+bool PPCInstrInfo::isRegElgibleForForwarding(
+ const MachineOperand &RegMO, const MachineInstr &DefMI,
+ const MachineInstr &MI, bool KillDefMI,
+ bool &IsFwdFeederRegKilled) const {
// x = addi y, imm
// ...
// z = lfdx 0, x -> z = lfd imm(y)
@@ -3184,14 +3287,7 @@ bool PPCInstrInfo::isRegElgibleForForwarding(const MachineOperand &RegMO,
if (MRI.isSSA())
return false;
- // MachineInstr::readsRegister only returns true if the machine
- // instruction reads the exact register or its super-register. It
- // does not consider uses of sub-registers which seems like strange
- // behaviour. Nonetheless, if we end up with a 64-bit register here,
- // get the corresponding 32-bit register to check.
unsigned Reg = RegMO.getReg();
- if (PPC::G8RCRegClass.contains(Reg))
- Reg = Reg - PPC::X0 + PPC::R0;
// Walking the inst in reverse(MI-->DefMI) to get the last DEF of the Reg.
MachineBasicBlock::const_reverse_iterator It = MI;
@@ -3200,15 +3296,17 @@ bool PPCInstrInfo::isRegElgibleForForwarding(const MachineOperand &RegMO,
for (; It != E; ++It) {
if (It->modifiesRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI)
return false;
+ else if (It->killsRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI)
+ IsFwdFeederRegKilled = true;
// Made it to DefMI without encountering a clobber.
if ((&*It) == &DefMI)
break;
}
assert((&*It) == &DefMI && "DefMI is missing");
- // If DefMI also uses the register to be forwarded, we can only forward it
+ // If DefMI also defines the register to be forwarded, we can only forward it
// if DefMI is being erased.
- if (DefMI.readsRegister(Reg, &getRegisterInfo()))
+ if (DefMI.modifiesRegister(Reg, &getRegisterInfo()))
return KillDefMI;
return true;
@@ -3271,11 +3369,9 @@ bool PPCInstrInfo::isImmElgibleForForwarding(const MachineOperand &ImmMO,
// is the literal zero, attempt to forward the source of the add-immediate to
// the corresponding D-Form instruction with the displacement coming from
// the immediate being added.
-bool PPCInstrInfo::transformToImmFormFedByAdd(MachineInstr &MI,
- const ImmInstrInfo &III,
- unsigned OpNoForForwarding,
- MachineInstr &DefMI,
- bool KillDefMI) const {
+bool PPCInstrInfo::transformToImmFormFedByAdd(
+ MachineInstr &MI, const ImmInstrInfo &III, unsigned OpNoForForwarding,
+ MachineInstr &DefMI, bool KillDefMI) const {
// RegMO ImmMO
// | |
// x = addi reg, imm <----- DefMI
@@ -3300,10 +3396,19 @@ bool PPCInstrInfo::transformToImmFormFedByAdd(MachineInstr &MI,
if (!isImmElgibleForForwarding(*ImmMO, DefMI, III, Imm))
return false;
+ bool IsFwdFeederRegKilled = false;
// Check if the RegMO can be forwarded to MI.
- if (!isRegElgibleForForwarding(*RegMO, DefMI, MI, KillDefMI))
+ if (!isRegElgibleForForwarding(*RegMO, DefMI, MI, KillDefMI,
+ IsFwdFeederRegKilled))
return false;
+ // Get killed info in case fixup needed after transformation.
+ unsigned ForwardKilledOperandReg = ~0U;
+ MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
+ bool PostRA = !MRI.isSSA();
+ if (PostRA && MI.getOperand(OpNoForForwarding).isKill())
+ ForwardKilledOperandReg = MI.getOperand(OpNoForForwarding).getReg();
+
// We know that, the MI and DefMI both meet the pattern, and
// the Imm also meet the requirement with the new Imm-form.
// It is safe to do the transformation now.
@@ -3327,7 +3432,7 @@ bool PPCInstrInfo::transformToImmFormFedByAdd(MachineInstr &MI,
// Otherwise, it is Constant Pool Index(CPI) or Global,
// which is relocation in fact. We need to replace the special zero
// register with ImmMO.
- // Before that, we need to fixup the target flags for imm.
+ // Before that, we need to fixup the target flags for imm.
// For some reason, we miss to set the flag for the ImmMO if it is CPI.
if (DefMI.getOpcode() == PPC::ADDItocL)
ImmMO->setTargetFlags(PPCII::MO_TOC_LO);
@@ -3354,6 +3459,22 @@ bool PPCInstrInfo::transformToImmFormFedByAdd(MachineInstr &MI,
// Update the opcode.
MI.setDesc(get(III.ImmOpcode));
+ // Fix up killed/dead flag after transformation.
+ // Pattern 1:
+ // x = ADD KilledFwdFeederReg, imm
+ // n = opn KilledFwdFeederReg(killed), regn
+ // y = XOP 0, x
+ // Pattern 2:
+ // x = ADD reg(killed), imm
+ // y = XOP 0, x
+ if (IsFwdFeederRegKilled || RegMO->isKill())
+ fixupIsDeadOrKill(DefMI, MI, RegMO->getReg());
+ // Pattern 3:
+ // ForwardKilledOperandReg = ADD reg, imm
+ // y = XOP 0, ForwardKilledOperandReg(killed)
+ if (ForwardKilledOperandReg != ~0U)
+ fixupIsDeadOrKill(DefMI, MI, ForwardKilledOperandReg);
+
LLVM_DEBUG(dbgs() << "With:\n");
LLVM_DEBUG(MI.dump());
@@ -3363,6 +3484,7 @@ bool PPCInstrInfo::transformToImmFormFedByAdd(MachineInstr &MI,
bool PPCInstrInfo::transformToImmFormFedByLI(MachineInstr &MI,
const ImmInstrInfo &III,
unsigned ConstantOpNo,
+ MachineInstr &DefMI,
int64_t Imm) const {
MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
bool PostRA = !MRI.isSSA();
@@ -3401,6 +3523,11 @@ bool PPCInstrInfo::transformToImmFormFedByLI(MachineInstr &MI,
return false;
}
+ // Get killed info in case fixup needed after transformation.
+ unsigned ForwardKilledOperandReg = ~0U;
+ if (PostRA && MI.getOperand(ConstantOpNo).isKill())
+ ForwardKilledOperandReg = MI.getOperand(ConstantOpNo).getReg();
+
unsigned Opc = MI.getOpcode();
bool SpecialShift32 =
Opc == PPC::SLW || Opc == PPC::SLWo || Opc == PPC::SRW || Opc == PPC::SRWo;
@@ -3483,6 +3610,13 @@ bool PPCInstrInfo::transformToImmFormFedByLI(MachineInstr &MI,
}
}
}
+
+ // Fix up killed/dead flag after transformation.
+ // Pattern:
+ // ForwardKilledOperandReg = LI imm
+ // y = XOP reg, ForwardKilledOperandReg(killed)
+ if (ForwardKilledOperandReg != ~0U)
+ fixupIsDeadOrKill(DefMI, MI, ForwardKilledOperandReg);
return true;
}
@@ -3784,3 +3918,133 @@ PPCInstrInfo::isSignOrZeroExtended(const MachineInstr &MI, bool SignExt,
}
return false;
}
+
+bool PPCInstrInfo::isBDNZ(unsigned Opcode) const {
+ return (Opcode == (Subtarget.isPPC64() ? PPC::BDNZ8 : PPC::BDNZ));
+}
+
+bool PPCInstrInfo::analyzeLoop(MachineLoop &L, MachineInstr *&IndVarInst,
+ MachineInstr *&CmpInst) const {
+ MachineBasicBlock *LoopEnd = L.getBottomBlock();
+ MachineBasicBlock::iterator I = LoopEnd->getFirstTerminator();
+ // We really "analyze" only CTR loops right now.
+ if (I != LoopEnd->end() && isBDNZ(I->getOpcode())) {
+ IndVarInst = nullptr;
+ CmpInst = &*I;
+ return false;
+ }
+ return true;
+}
+
+MachineInstr *
+PPCInstrInfo::findLoopInstr(MachineBasicBlock &PreHeader) const {
+
+ unsigned LOOPi = (Subtarget.isPPC64() ? PPC::MTCTR8loop : PPC::MTCTRloop);
+
+ // The loop set-up instruction should be in preheader
+ for (auto &I : PreHeader.instrs())
+ if (I.getOpcode() == LOOPi)
+ return &I;
+ return nullptr;
+}
+
+unsigned PPCInstrInfo::reduceLoopCount(
+ MachineBasicBlock &MBB, MachineBasicBlock &PreHeader, MachineInstr *IndVar,
+ MachineInstr &Cmp, SmallVectorImpl<MachineOperand> &Cond,
+ SmallVectorImpl<MachineInstr *> &PrevInsts, unsigned Iter,
+ unsigned MaxIter) const {
+ // We expect a hardware loop currently. This means that IndVar is set
+ // to null, and the compare is the ENDLOOP instruction.
+ assert((!IndVar) && isBDNZ(Cmp.getOpcode()) && "Expecting a CTR loop");
+ MachineFunction *MF = MBB.getParent();
+ DebugLoc DL = Cmp.getDebugLoc();
+ MachineInstr *Loop = findLoopInstr(PreHeader);
+ if (!Loop)
+ return 0;
+ unsigned LoopCountReg = Loop->getOperand(0).getReg();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ MachineInstr *LoopCount = MRI.getUniqueVRegDef(LoopCountReg);
+
+ if (!LoopCount)
+ return 0;
+ // If the loop trip count is a compile-time value, then just change the
+ // value.
+ if (LoopCount->getOpcode() == PPC::LI8 || LoopCount->getOpcode() == PPC::LI) {
+ int64_t Offset = LoopCount->getOperand(1).getImm();
+ if (Offset <= 1) {
+ LoopCount->eraseFromParent();
+ Loop->eraseFromParent();
+ return 0;
+ }
+ LoopCount->getOperand(1).setImm(Offset - 1);
+ return Offset - 1;
+ }
+
+ // The loop trip count is a run-time value.
+ // We need to subtract one from the trip count,
+ // and insert branch later to check if we're done with the loop.
+
+ // Since BDZ/BDZ8 that we will insert will also decrease the ctr by 1,
+ // so we don't need to generate any thing here.
+ Cond.push_back(MachineOperand::CreateImm(0));
+ Cond.push_back(MachineOperand::CreateReg(
+ Subtarget.isPPC64() ? PPC::CTR8 : PPC::CTR, true));
+ return LoopCountReg;
+}
+
+// Return true if get the base operand, byte offset of an instruction and the
+// memory width. Width is the size of memory that is being loaded/stored.
+bool PPCInstrInfo::getMemOperandWithOffsetWidth(
+ const MachineInstr &LdSt,
+ const MachineOperand *&BaseReg,
+ int64_t &Offset,
+ unsigned &Width,
+ const TargetRegisterInfo *TRI) const {
+ assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
+
+ // Handle only loads/stores with base register followed by immediate offset.
+ if (LdSt.getNumExplicitOperands() != 3)
+ return false;
+ if (!LdSt.getOperand(1).isImm() || !LdSt.getOperand(2).isReg())
+ return false;
+
+ if (!LdSt.hasOneMemOperand())
+ return false;
+
+ Width = (*LdSt.memoperands_begin())->getSize();
+ Offset = LdSt.getOperand(1).getImm();
+ BaseReg = &LdSt.getOperand(2);
+ return true;
+}
+
+bool PPCInstrInfo::areMemAccessesTriviallyDisjoint(
+ const MachineInstr &MIa, const MachineInstr &MIb,
+ AliasAnalysis * /*AA*/) const {
+ assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
+ assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
+
+ if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() ||
+ MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef())
+ return false;
+
+ // Retrieve the base register, offset from the base register and width. Width
+ // is the size of memory that is being loaded/stored (e.g. 1, 2, 4). If
+ // base registers are identical, and the offset of a lower memory access +
+ // the width doesn't overlap the offset of a higher memory access,
+ // then the memory accesses are different.
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
+ int64_t OffsetA = 0, OffsetB = 0;
+ unsigned int WidthA = 0, WidthB = 0;
+ if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) &&
+ getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) {
+ if (BaseOpA->isIdenticalTo(*BaseOpB)) {
+ int LowOffset = std::min(OffsetA, OffsetB);
+ int HighOffset = std::max(OffsetA, OffsetB);
+ int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
+ if (LowOffset + LowWidth <= HighOffset)
+ return true;
+ }
+ }
+ return false;
+}
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index 7ed558b835af..70fb757e8f1e 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -1,9 +1,8 @@
//===-- PPCInstrInfo.h - PowerPC Instruction Information --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -14,7 +13,6 @@
#ifndef LLVM_LIB_TARGET_POWERPC_PPCINSTRINFO_H
#define LLVM_LIB_TARGET_POWERPC_PPCINSTRINFO_H
-#include "PPC.h"
#include "PPCRegisterInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
@@ -66,9 +64,6 @@ enum {
/// Shift count to bypass PPC970 flags
NewDef_Shift = 6,
- /// The VSX instruction that uses VSX register (vs0-vs63), instead of VMX
- /// register (v0-v31).
- UseVSXReg = 0x1 << NewDef_Shift,
/// This instruction is an X-Form memory operation.
XFormMemOp = 0x1 << (NewDef_Shift+1)
};
@@ -129,12 +124,12 @@ class PPCInstrInfo : public PPCGenInstrInfo {
// If the inst has imm-form and one of its operand is produced by a LI,
// put the imm into the inst directly and remove the LI if possible.
bool transformToImmFormFedByLI(MachineInstr &MI, const ImmInstrInfo &III,
- unsigned ConstantOpNo, int64_t Imm) const;
+ unsigned ConstantOpNo, MachineInstr &DefMI,
+ int64_t Imm) const;
// If the inst has imm-form and one of its operand is produced by an
// add-immediate, try to transform it when possible.
bool transformToImmFormFedByAdd(MachineInstr &MI, const ImmInstrInfo &III,
- unsigned ConstantOpNo,
- MachineInstr &DefMI,
+ unsigned ConstantOpNo, MachineInstr &DefMI,
bool KillDefMI) const;
// Try to find that, if the instruction 'MI' contains any operand that
// could be forwarded from some inst that feeds it. If yes, return the
@@ -159,8 +154,8 @@ class PPCInstrInfo : public PPCGenInstrInfo {
int64_t &Imm) const;
bool isRegElgibleForForwarding(const MachineOperand &RegMO,
const MachineInstr &DefMI,
- const MachineInstr &MI,
- bool KillDefMI) const;
+ const MachineInstr &MI, bool KillDefMI,
+ bool &IsFwdFeederRegKilled) const;
const unsigned *getStoreOpcodesForSpillArray() const;
const unsigned *getLoadOpcodesForSpillArray() const;
virtual void anchor();
@@ -362,6 +357,22 @@ public:
unsigned SrcReg2, int Mask, int Value,
const MachineRegisterInfo *MRI) const override;
+
+ /// Return true if get the base operand, byte offset of an instruction and
+ /// the memory width. Width is the size of memory that is being
+ /// loaded/stored (e.g. 1, 2, 4, 8).
+ bool getMemOperandWithOffsetWidth(const MachineInstr &LdSt,
+ const MachineOperand *&BaseOp,
+ int64_t &Offset, unsigned &Width,
+ const TargetRegisterInfo *TRI) const;
+
+ /// Return true if two MIs access different memory addresses and false
+ /// otherwise
+ bool
+ areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
+ const MachineInstr &MIb,
+ AliasAnalysis *AA = nullptr) const override;
+
/// GetInstSize - Return the number of bytes of code the specified
/// instruction may be. This returns the maximum number of bytes.
///
@@ -412,6 +423,18 @@ public:
bool convertToImmediateForm(MachineInstr &MI,
MachineInstr **KilledDef = nullptr) const;
+
+ /// Fixup killed/dead flag for register \p RegNo between instructions [\p
+ /// StartMI, \p EndMI]. Some PostRA transformations may violate register
+ /// killed/dead flags semantics, this function can be called to fix up. Before
+ /// calling this function,
+ /// 1. Ensure that \p RegNo liveness is killed after instruction \p EndMI.
+ /// 2. Ensure that there is no new definition between (\p StartMI, \p EndMI)
+ /// and possible definition for \p RegNo is \p StartMI or \p EndMI.
+ /// 3. Ensure that all instructions between [\p StartMI, \p EndMI] are in same
+ /// basic block.
+ void fixupIsDeadOrKill(MachineInstr &StartMI, MachineInstr &EndMI,
+ unsigned RegNo) const;
void replaceInstrWithLI(MachineInstr &MI, const LoadImmediateInfo &LII) const;
void replaceInstrOperandWithImm(MachineInstr &MI, unsigned OpNo,
int64_t Imm) const;
@@ -429,14 +452,55 @@ public:
/// operands).
static unsigned getRegNumForOperand(const MCInstrDesc &Desc, unsigned Reg,
unsigned OpNo) {
- if (Desc.TSFlags & PPCII::UseVSXReg) {
- if (isVRRegister(Reg))
- Reg = PPC::VSX32 + (Reg - PPC::V0);
- else if (isVFRegister(Reg))
- Reg = PPC::VSX32 + (Reg - PPC::VF0);
+ int16_t regClass = Desc.OpInfo[OpNo].RegClass;
+ switch (regClass) {
+ // We store F0-F31, VF0-VF31 in MCOperand and it should be F0-F31,
+ // VSX32-VSX63 during encoding/disassembling
+ case PPC::VSSRCRegClassID:
+ case PPC::VSFRCRegClassID:
+ if (isVFRegister(Reg))
+ return PPC::VSX32 + (Reg - PPC::VF0);
+ break;
+ // We store VSL0-VSL31, V0-V31 in MCOperand and it should be VSL0-VSL31,
+ // VSX32-VSX63 during encoding/disassembling
+ case PPC::VSRCRegClassID:
+ if (isVRRegister(Reg))
+ return PPC::VSX32 + (Reg - PPC::V0);
+ break;
+ // Other RegClass doesn't need mapping
+ default:
+ break;
}
return Reg;
}
+
+ /// Check \p Opcode is BDNZ (Decrement CTR and branch if it is still nonzero).
+ bool isBDNZ(unsigned Opcode) const;
+
+ /// Find the hardware loop instruction used to set-up the specified loop.
+ /// On PPC, we have two instructions used to set-up the hardware loop
+ /// (MTCTRloop, MTCTR8loop) with corresponding endloop (BDNZ, BDNZ8)
+ /// instructions to indicate the end of a loop.
+ MachineInstr *findLoopInstr(MachineBasicBlock &PreHeader) const;
+
+ /// Analyze the loop code to find the loop induction variable and compare used
+ /// to compute the number of iterations. Currently, we analyze loop that are
+ /// controlled using hardware loops. In this case, the induction variable
+ /// instruction is null. For all other cases, this function returns true,
+ /// which means we're unable to analyze it. \p IndVarInst and \p CmpInst will
+ /// return new values when we can analyze the readonly loop \p L, otherwise,
+ /// nothing got changed
+ bool analyzeLoop(MachineLoop &L, MachineInstr *&IndVarInst,
+ MachineInstr *&CmpInst) const override;
+ /// Generate code to reduce the loop iteration by one and check if the loop
+ /// is finished. Return the value/register of the new loop count. We need
+ /// this function when peeling off one or more iterations of a loop. This
+ /// function assumes the last iteration is peeled first.
+ unsigned reduceLoopCount(MachineBasicBlock &MBB, MachineBasicBlock &PreHeader,
+ MachineInstr *IndVar, MachineInstr &Cmp,
+ SmallVectorImpl<MachineOperand> &Cond,
+ SmallVectorImpl<MachineInstr *> &PrevInsts,
+ unsigned Iter, unsigned MaxIter) const override;
};
}
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index dd3f1ac79089..c313337047f0 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -1,9 +1,8 @@
//===-- PPCInstrInfo.td - The PowerPC Instruction Set ------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -231,6 +230,18 @@ def PPCbuild_fp128: SDNode<"PPCISD::BUILD_FP128",
SDTCisSameAs<1,2>]>,
[]>;
+def PPCbuild_spe64: SDNode<"PPCISD::BUILD_SPE64",
+ SDTypeProfile<1, 2,
+ [SDTCisVT<0, f64>, SDTCisVT<1,i32>,
+ SDTCisVT<1,i32>]>,
+ []>;
+
+def PPCextract_spe : SDNode<"PPCISD::EXTRACT_SPE",
+ SDTypeProfile<1, 2,
+ [SDTCisVT<0, i32>, SDTCisVT<1, f64>,
+ SDTCisPtrTy<2>]>,
+ []>;
+
// These are target-independent nodes, but have target-specific formats.
def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_PPCCallSeqStart,
[SDNPHasChain, SDNPOutGlue]>;
@@ -458,6 +469,17 @@ def nonQuadwOffsetStore : PatFrag<(ops node:$val, node:$ptr),
return !isOffsetMultipleOf(N, 16);
}]>;
+// PatFrag for binary operation whose operands are both non-constant
+class BinOpWithoutSImm16Operand<SDNode opcode> :
+ PatFrag<(ops node:$left, node:$right), (opcode node:$left, node:$right), [{
+ int16_t Imm;
+ return !isIntS16Immediate(N->getOperand(0), Imm)
+ && !isIntS16Immediate(N->getOperand(1), Imm);
+}]>;
+
+def add_without_simm16 : BinOpWithoutSImm16Operand<add>;
+def mul_without_simm16 : BinOpWithoutSImm16Operand<mul>;
+
//===----------------------------------------------------------------------===//
// PowerPC Flag Definitions.
@@ -546,10 +568,6 @@ def PPCRegCRRCAsmOperand : AsmOperandClass {
def crrc : RegisterOperand<CRRC> {
let ParserMatchClass = PPCRegCRRCAsmOperand;
}
-def crrc0 : RegisterOperand<CRRC0> {
- let ParserMatchClass = PPCRegCRRCAsmOperand;
-}
-
def PPCRegSPERCAsmOperand : AsmOperandClass {
let Name = "RegSPERC"; let PredicateMethod = "isRegNumber";
}
@@ -737,7 +755,9 @@ def abscondbrtarget : Operand<OtherVT> {
def calltarget : Operand<iPTR> {
let PrintMethod = "printBranchOperand";
let EncoderMethod = "getDirectBrEncoding";
+ let DecoderMethod = "DecodePCRel24BranchTarget";
let ParserMatchClass = PPCDirectBrAsmOperand;
+ let OperandType = "OPERAND_PCREL";
}
def abscalltarget : Operand<iPTR> {
let PrintMethod = "printAbsBranchOperand";
@@ -881,11 +901,24 @@ def pred : Operand<OtherVT> {
}
// Define PowerPC specific addressing mode.
-def iaddr : ComplexPattern<iPTR, 2, "SelectAddrImm", [], []>;
-def xaddr : ComplexPattern<iPTR, 2, "SelectAddrIdx", [], []>;
+
+// d-form
+def iaddr : ComplexPattern<iPTR, 2, "SelectAddrImm", [], []>; // "stb"
+// ds-form
+def iaddrX4 : ComplexPattern<iPTR, 2, "SelectAddrImmX4", [], []>; // "std"
+// dq-form
+def iaddrX16 : ComplexPattern<iPTR, 2, "SelectAddrImmX16", [], []>; // "stxv"
+
+// Below forms are all x-form addressing mode, use three different ones so we
+// can make a accurate check for x-form instructions in ISEL.
+// x-form addressing mode whose associated diplacement form is D.
+def xaddr : ComplexPattern<iPTR, 2, "SelectAddrIdx", [], []>; // "stbx"
+// x-form addressing mode whose associated diplacement form is DS.
+def xaddrX4 : ComplexPattern<iPTR, 2, "SelectAddrIdxX4", [], []>; // "stdx"
+// x-form addressing mode whose associated diplacement form is DQ.
+def xaddrX16 : ComplexPattern<iPTR, 2, "SelectAddrIdxX16", [], []>; // "stxvx"
+
def xoaddr : ComplexPattern<iPTR, 2, "SelectAddrIdxOnly",[], []>;
-def ixaddr : ComplexPattern<iPTR, 2, "SelectAddrImmX4", [], []>; // "std"
-def iqaddr : ComplexPattern<iPTR, 2, "SelectAddrImmX16", [], []>; // "stxv"
// The address in a single register. This is used with the SjLj
// pseudo-instructions.
@@ -1309,6 +1342,15 @@ let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
}
}
+// Set the float rounding mode.
+let Uses = [RM], Defs = [RM] in {
+def SETRNDi : PPCCustomInserterPseudo<(outs f8rc:$FRT), (ins u2imm:$RND),
+ "#SETRNDi", [(set f64:$FRT, (int_ppc_setrnd (i32 imm:$RND)))]>;
+
+def SETRND : PPCCustomInserterPseudo<(outs f8rc:$FRT), (ins gprc:$in),
+ "#SETRND", [(set f64:$FRT, (int_ppc_setrnd gprc :$in))]>;
+}
+
let Defs = [LR] in
def MovePCtoLR : PPCEmitTimePseudo<(outs), (ins), "#MovePCtoLR", []>,
PPC970_Unit_BRU;
@@ -1435,6 +1477,9 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR] in {
def BCLn : BForm_4<16, 4, 0, 1, (outs),
(ins crbitrc:$bi, condbrtarget:$dst),
"bcl 4, $bi, $dst">;
+ def BL_NOP : IForm_and_DForm_4_zero<18, 0, 1, 24,
+ (outs), (ins calltarget:$func),
+ "bl $func\n\tnop", IIC_BrB, []>;
}
}
let Uses = [CTR, RM] in {
@@ -2512,6 +2557,7 @@ def CRORC : XLForm_1<19, 417, (outs crbitrc:$CRD),
[(set i1:$CRD, (or i1:$CRA, (not i1:$CRB)))]>;
let isCodeGenOnly = 1 in {
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
def CRSET : XLForm_1_ext<19, 289, (outs crbitrc:$dst), (ins),
"creqv $dst, $dst, $dst", IIC_BrCR,
[(set i1:$dst, 1)]>;
@@ -2519,6 +2565,7 @@ def CRSET : XLForm_1_ext<19, 289, (outs crbitrc:$dst), (ins),
def CRUNSET: XLForm_1_ext<19, 193, (outs crbitrc:$dst), (ins),
"crxor $dst, $dst, $dst", IIC_BrCR,
[(set i1:$dst, 0)]>;
+}
let Defs = [CR1EQ], CRD = 6 in {
def CR6SET : XLForm_1_ext<19, 289, (outs), (ins),
@@ -2566,7 +2613,7 @@ def MTCTR : XFXForm_7_ext<31, 467, 9, (outs), (ins gprc:$rS),
PPC970_DGroup_First, PPC970_Unit_FXU;
}
let hasSideEffects = 1, isCodeGenOnly = 1, Defs = [CTR] in {
-let Pattern = [(int_ppc_mtctr i32:$rS)] in
+let Pattern = [(int_set_loop_iterations i32:$rS)] in
def MTCTRloop : XFXForm_7_ext<31, 467, 9, (outs), (ins gprc:$rS),
"mtctr $rS", IIC_SprMTSPR>,
PPC970_DGroup_First, PPC970_Unit_FXU;
@@ -2993,9 +3040,16 @@ def : Pat<(and (rotl i32:$in, i32:$sh), maskimm32:$imm),
// Calls
def : Pat<(PPCcall (i32 tglobaladdr:$dst)),
(BL tglobaladdr:$dst)>;
+
def : Pat<(PPCcall (i32 texternalsym:$dst)),
(BL texternalsym:$dst)>;
+// Calls for AIX only
+def : Pat<(PPCcall (i32 mcsym:$dst)),
+ (BL mcsym:$dst)>;
+def : Pat<(PPCcall_nop (i32 mcsym:$dst)),
+ (BL_NOP mcsym:$dst)>;
+
def : Pat<(PPCtc_return (i32 tglobaladdr:$dst), imm:$imm),
(TCRETURNdi tglobaladdr:$dst, imm:$imm)>;
@@ -4071,6 +4125,10 @@ def SLBMFEV : XLForm_1_gen<31, 851, (outs gprc:$RT), (ins gprc:$RB),
def SLBIA : XForm_0<31, 498, (outs), (ins), "slbia", IIC_SprSLBIA, []>;
+let Defs = [CR0] in
+def SLBFEEo : XForm_26<31, 979, (outs gprc:$RT), (ins gprc:$RB),
+ "slbfee. $RT, $RB", IIC_SprSLBFEE, []>, isDOT;
+
def TLBIA : XForm_0<31, 370, (outs), (ins),
"tlbia", IIC_SprTLBIA, []>;
diff --git a/lib/Target/PowerPC/PPCInstrQPX.td b/lib/Target/PowerPC/PPCInstrQPX.td
index ef589ad01fd7..d67041d46d9f 100644
--- a/lib/Target/PowerPC/PPCInstrQPX.td
+++ b/lib/Target/PowerPC/PPCInstrQPX.td
@@ -1,9 +1,8 @@
//===- PPCInstrQPX.td - The PowerPC QPX Extension --*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/PowerPC/PPCInstrSPE.td b/lib/Target/PowerPC/PPCInstrSPE.td
index 9f5891a45f22..935c3044ae47 100644
--- a/lib/Target/PowerPC/PPCInstrSPE.td
+++ b/lib/Target/PowerPC/PPCInstrSPE.td
@@ -1,9 +1,8 @@
//=======-- PPCInstrSPE.td - The PowerPC SPE Extension -*- tablegen -*-=======//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -512,7 +511,7 @@ def EVLWWSPLATX : EVXForm_1<792, (outs sperc:$RT), (ins memrr:$src),
def EVMERGEHI : EVXForm_1<556, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB),
"evmergehi $RT, $RA, $RB", IIC_VecGeneral, []>;
-def EVMERGELO : EVXForm_1<557, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB),
+def EVMERGELO : EVXForm_1<557, (outs sperc:$RT), (ins gprc:$RA, gprc:$RB),
"evmergelo $RT, $RA, $RB", IIC_VecGeneral, []>;
def EVMERGEHILO : EVXForm_1<558, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB),
"evmergehilo $RT, $RA, $RB", IIC_VecGeneral, []>;
@@ -887,4 +886,14 @@ def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETUGT)),
(SELECT_SPE (CRANDC $lhs, $rhs), $tval, $fval)>;
def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETNE)),
(SELECT_SPE (CRXOR $lhs, $rhs), $tval, $fval)>;
+
+
+def : Pat<(f64 (PPCbuild_spe64 i32:$rB, i32:$rA)),
+ (f64 (COPY_TO_REGCLASS (EVMERGELO $rA, $rB), SPERC))>;
+
+def : Pat<(i32 (PPCextract_spe f64:$rA, 1)),
+ (i32 (EXTRACT_SUBREG (EVMERGEHI $rA, $rA), sub_32))>;
+def : Pat<(i32 (PPCextract_spe f64:$rA, 0)),
+ (i32 (EXTRACT_SUBREG $rA, sub_32))>;
+
}
diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td
index 0f073388dc74..07f38a61d098 100644
--- a/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/lib/Target/PowerPC/PPCInstrVSX.td
@@ -1,9 +1,8 @@
//===- PPCInstrVSX.td - The PowerPC VSX Extension --*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -54,6 +53,15 @@ def PPCRegSPILLTOVSRRCAsmOperand : AsmOperandClass {
def spilltovsrrc : RegisterOperand<SPILLTOVSRRC> {
let ParserMatchClass = PPCRegSPILLTOVSRRCAsmOperand;
}
+
+def SDT_PPCldvsxlh : SDTypeProfile<1, 1, [
+ SDTCisVT<0, v4f32>, SDTCisPtrTy<1>
+]>;
+
+def SDT_PPCfpextlh : SDTypeProfile<1, 1, [
+ SDTCisVT<0, v2f64>, SDTCisVT<1, v4f32>
+]>;
+
// Little-endian-specific nodes.
def SDT_PPClxvd2x : SDTypeProfile<1, 1, [
SDTCisVT<0, v2f64>, SDTCisPtrTy<1>
@@ -85,6 +93,10 @@ def PPCuvec2fp: SDNode<"PPCISD::UINT_VEC_TO_FP", SDTVecConv, []>;
def PPCswapNoChain : SDNode<"PPCISD::SWAP_NO_CHAIN", SDT_PPCxxswapd>;
def PPCvabsd : SDNode<"PPCISD::VABSD", SDTVabsd, []>;
+def PPCfpextlh : SDNode<"PPCISD::FP_EXTEND_LH", SDT_PPCfpextlh, []>;
+def PPCldvsxlh : SDNode<"PPCISD::LD_VSX_LH", SDT_PPCldvsxlh,
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+
multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, string asmbase,
string asmstr, InstrItinClass itin, Intrinsic Int,
ValueType OutTy, ValueType InTy> {
@@ -124,7 +136,6 @@ def HasOnlySwappingMemOps : Predicate<"!PPCSubTarget->hasP9Vector()">;
let Predicates = [HasVSX] in {
let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
-let UseVSXReg = 1 in {
let hasSideEffects = 0 in { // VSX instructions don't have side effects.
let Uses = [RM] in {
@@ -841,12 +852,12 @@ let Uses = [RM] in {
"xxlxor $XT, $XA, $XB", IIC_VecGeneral,
[(set v4i32:$XT, (xor v4i32:$XA, v4i32:$XB))]>;
} // isCommutable
- let isCodeGenOnly = 1 in
- def XXLXORz : XX3Form_Zero<60, 154, (outs vsrc:$XT), (ins),
+
+ let isCodeGenOnly = 1, isMoveImm = 1, isAsCheapAsAMove = 1,
+ isReMaterializable = 1 in {
+ def XXLXORz : XX3Form_Zero<60, 154, (outs vsrc:$XT), (ins),
"xxlxor $XT, $XT, $XT", IIC_VecGeneral,
[(set v4i32:$XT, (v4i32 immAllZerosV))]>;
-
- let isCodeGenOnly = 1 in {
def XXLXORdpz : XX3Form_SetZero<60, 154,
(outs vsfrc:$XT), (ins),
"xxlxor $XT, $XT, $XT", IIC_VecGeneral,
@@ -895,11 +906,10 @@ let Uses = [RM] in {
(PPCxxsplt v4i32:$XB, imm32SExt16:$UIM))]>;
let isCodeGenOnly = 1 in
def XXSPLTWs : XX2Form_2<60, 164,
- (outs vsrc:$XT), (ins vfrc:$XB, u2imm:$UIM),
+ (outs vsrc:$XT), (ins vsfrc:$XB, u2imm:$UIM),
"xxspltw $XT, $XB, $UIM", IIC_VecPerm, []>;
} // hasSideEffects
-} // UseVSXReg = 1
// SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after
// instruction selection into a branch sequence.
@@ -961,6 +971,10 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
def : Pat<(v4i32 (vnot_ppc v4i32:$A)),
(v4i32 (XXLNOR $A, $A))>;
+def : Pat<(v4i32 (or (and (vnot_ppc v4i32:$C), v4i32:$A),
+ (and v4i32:$B, v4i32:$C))),
+ (v4i32 (XXSEL $A, $B, $C))>;
+
let Predicates = [IsBigEndian] in {
def : Pat<(v2f64 (scalar_to_vector f64:$A)),
(v2f64 (SUBREG_TO_REG (i64 1), $A, sub_64))>;
@@ -1063,6 +1077,8 @@ def : Pat<(v2f64 (PPCuvec2fp v4i32:$C, 0)),
def : Pat<(v2f64 (PPCuvec2fp v4i32:$C, 1)),
(v2f64 (XVCVUXWDP (v2i64 (XXMRGLW $C, $C))))>;
+def : Pat<(v2f64 (PPCfpextlh v4f32:$C)), (XVCVSPDP (XXMRGHW $C, $C))>;
+
// Loads.
let Predicates = [HasVSX, HasOnlySwappingMemOps] in {
def : Pat<(v2f64 (PPClxvd2x xoaddr:$src)), (LXVD2X xoaddr:$src)>;
@@ -1176,6 +1192,15 @@ def : Pat<(vselect v4i32:$vA, v4f32:$vB, v4f32:$vC),
def : Pat<(vselect v2i64:$vA, v2f64:$vB, v2f64:$vC),
(XXSEL $vC, $vB, $vA)>;
+def : Pat<(v4f32 (fmaxnum v4f32:$src1, v4f32:$src2)),
+ (v4f32 (XVMAXSP $src1, $src2))>;
+def : Pat<(v4f32 (fminnum v4f32:$src1, v4f32:$src2)),
+ (v4f32 (XVMINSP $src1, $src2))>;
+def : Pat<(v2f64 (fmaxnum v2f64:$src1, v2f64:$src2)),
+ (v2f64 (XVMAXDP $src1, $src2))>;
+def : Pat<(v2f64 (fminnum v2f64:$src1, v2f64:$src2)),
+ (v2f64 (XVMINDP $src1, $src2))>;
+
let Predicates = [IsLittleEndian] in {
def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))),
(f64 (XSCVSXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
@@ -1248,7 +1273,7 @@ def HasDirectMove : Predicate<"PPCSubTarget->hasDirectMove()">;
def NoP9Vector : Predicate<"!PPCSubTarget->hasP9Vector()">;
let Predicates = [HasP8Vector] in {
let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
- let isCommutable = 1, UseVSXReg = 1 in {
+ let isCommutable = 1 in {
def XXLEQV : XX3Form<60, 186,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xxleqv $XT, $XA, $XB", IIC_VecGeneral,
@@ -1258,12 +1283,11 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
"xxlnand $XT, $XA, $XB", IIC_VecGeneral,
[(set v4i32:$XT, (vnot_ppc (and v4i32:$XA,
v4i32:$XB)))]>;
- } // isCommutable, UseVSXReg
+ } // isCommutable
def : Pat<(int_ppc_vsx_xxleqv v4i32:$A, v4i32:$B),
(XXLEQV $A, $B)>;
- let UseVSXReg = 1 in {
def XXLORC : XX3Form<60, 170,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xxlorc $XT, $XA, $XB", IIC_VecGeneral,
@@ -1312,7 +1336,6 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
"#STIWX",
[(PPCstfiwx f64:$XT, xoaddr:$dst)]>;
} // mayStore
- } // UseVSXReg = 1
def : Pat<(f64 (extloadf32 xoaddr:$src)),
(COPY_TO_REGCLASS (XFLOADf32 xoaddr:$src), VSFRC)>;
@@ -1342,7 +1365,6 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETNE)),
(SELECT_VSSRC (CRXOR $lhs, $rhs), $tval, $fval)>;
- let UseVSXReg = 1 in {
// VSX Elementary Scalar FP arithmetic (SP)
let isCommutable = 1 in {
def XSADDSP : XX3Form<60, 0,
@@ -1354,7 +1376,10 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
"xsmulsp $XT, $XA, $XB", IIC_VecFP,
[(set f32:$XT, (fmul f32:$XA, f32:$XB))]>;
} // isCommutable
-
+ def XSSUBSP : XX3Form<60, 8,
+ (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB),
+ "xssubsp $XT, $XA, $XB", IIC_VecFP,
+ [(set f32:$XT, (fsub f32:$XA, f32:$XB))]>;
def XSDIVSP : XX3Form<60, 24,
(outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB),
"xsdivsp $XT, $XA, $XB", IIC_FPDivS,
@@ -1374,10 +1399,6 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
(outs vssrc:$XT), (ins vssrc:$XB),
"xsrsqrtesp $XT, $XB", IIC_VecFP,
[(set f32:$XT, (PPCfrsqrte f32:$XB))]>;
- def XSSUBSP : XX3Form<60, 8,
- (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB),
- "xssubsp $XT, $XA, $XB", IIC_VecFP,
- [(set f32:$XT, (fsub f32:$XA, f32:$XB))]>;
// FMA Instructions
let BaseName = "XSMADDASP" in {
@@ -1470,7 +1491,6 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
"xscvdpspn $XT, $XB", IIC_VecFP, []>;
def XSCVSPDPN : XX2Form<60, 331, (outs vssrc:$XT), (ins vsrc:$XB),
"xscvspdpn $XT, $XB", IIC_VecFP, []>;
- } // UseVSXReg = 1
let Predicates = [IsLittleEndian] in {
def : Pat<DWToSPExtractConv.El0SS1,
@@ -1514,10 +1534,22 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
(f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 4),
(STIWX (XSCVDPUXWS f64:$src), xoaddr:$dst)>;
+ def : Pat<(v2i64 (smax v2i64:$src1, v2i64:$src2)),
+ (v2i64 (VMAXSD (COPY_TO_REGCLASS $src1, VRRC),
+ (COPY_TO_REGCLASS $src2, VRRC)))>;
+ def : Pat<(v2i64 (umax v2i64:$src1, v2i64:$src2)),
+ (v2i64 (VMAXUD (COPY_TO_REGCLASS $src1, VRRC),
+ (COPY_TO_REGCLASS $src2, VRRC)))>;
+ def : Pat<(v2i64 (smin v2i64:$src1, v2i64:$src2)),
+ (v2i64 (VMINSD (COPY_TO_REGCLASS $src1, VRRC),
+ (COPY_TO_REGCLASS $src2, VRRC)))>;
+ def : Pat<(v2i64 (umin v2i64:$src1, v2i64:$src2)),
+ (v2i64 (VMINUD (COPY_TO_REGCLASS $src1, VRRC),
+ (COPY_TO_REGCLASS $src2, VRRC)))>;
} // AddedComplexity = 400
} // HasP8Vector
-let UseVSXReg = 1, AddedComplexity = 400 in {
+let AddedComplexity = 400 in {
let Predicates = [HasDirectMove] in {
// VSX direct move instructions
def MFVSRD : XX1_RS6_RD5_XO<31, 51, (outs g8rc:$rA), (ins vsfrc:$XT),
@@ -1525,7 +1557,7 @@ let Predicates = [HasDirectMove] in {
[(set i64:$rA, (PPCmfvsr f64:$XT))]>,
Requires<[In64BitMode]>;
let isCodeGenOnly = 1 in
- def MFVRD : XX1_RS6_RD5_XO<31, 51, (outs g8rc:$rA), (ins vrrc:$XT),
+ def MFVRD : XX1_RS6_RD5_XO<31, 51, (outs g8rc:$rA), (ins vsrc:$XT),
"mfvsrd $rA, $XT", IIC_VecGeneral,
[]>,
Requires<[In64BitMode]>;
@@ -1557,7 +1589,7 @@ let Predicates = [IsISA3_0, HasDirectMove] in {
[]>, Requires<[In64BitMode]>;
} // IsISA3_0, HasDirectMove
-} // UseVSXReg = 1
+} // AddedComplexity = 400
// We want to parse this from asm, but we don't want to emit this as it would
// be emitted with a VSX reg. So leave Emit = 0 here.
@@ -2415,7 +2447,6 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
list<dag> pattern>
: X_VT5_XO5_VB5_VSFR<opcode, xo2, xo, opc, pattern>, isDOT;
- let UseVSXReg = 1 in {
// [PO T XO B XO BX /]
class XX2_RT5_XO5_XB6<bits<6> opcode, bits<5> xo2, bits<9> xo, string opc,
list<dag> pattern>
@@ -2434,7 +2465,6 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
InstrItinClass itin, list<dag> pattern>
: XX3Form<opcode, xo, (outs xty:$XT), (ins aty:$XA, bty:$XB),
!strconcat(opc, " $XT, $XA, $XB"), itin, pattern>;
- } // UseVSXReg = 1
// [PO VRT VRA VRB XO /]
class X_VT5_VA5_VB5<bits<6> opcode, bits<10> xo, string opc,
@@ -2482,69 +2512,70 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
let isCommutable = 1 in {
def XSADDQP : X_VT5_VA5_VB5 <63, 4, "xsaddqp",
[(set f128:$vT, (fadd f128:$vA, f128:$vB))]>;
+ def XSMULQP : X_VT5_VA5_VB5 <63, 36, "xsmulqp",
+ [(set f128:$vT, (fmul f128:$vA, f128:$vB))]>;
+ }
+ def XSSUBQP : X_VT5_VA5_VB5 <63, 516, "xssubqp" ,
+ [(set f128:$vT, (fsub f128:$vA, f128:$vB))]>;
+ def XSDIVQP : X_VT5_VA5_VB5 <63, 548, "xsdivqp",
+ [(set f128:$vT, (fdiv f128:$vA, f128:$vB))]>;
+ // Square-Root
+ def XSSQRTQP : X_VT5_XO5_VB5 <63, 27, 804, "xssqrtqp",
+ [(set f128:$vT, (fsqrt f128:$vB))]>;
+ // (Negative) Multiply-{Add/Subtract}
+ def XSMADDQP : X_VT5_VA5_VB5_FMA <63, 388, "xsmaddqp",
+ [(set f128:$vT,
+ (fma f128:$vA, f128:$vB,
+ f128:$vTi))]>;
+ def XSMSUBQP : X_VT5_VA5_VB5_FMA <63, 420, "xsmsubqp" ,
+ [(set f128:$vT,
+ (fma f128:$vA, f128:$vB,
+ (fneg f128:$vTi)))]>;
+ def XSNMADDQP : X_VT5_VA5_VB5_FMA <63, 452, "xsnmaddqp",
+ [(set f128:$vT,
+ (fneg (fma f128:$vA, f128:$vB,
+ f128:$vTi)))]>;
+ def XSNMSUBQP : X_VT5_VA5_VB5_FMA <63, 484, "xsnmsubqp",
+ [(set f128:$vT,
+ (fneg (fma f128:$vA, f128:$vB,
+ (fneg f128:$vTi))))]>;
+
+ let isCommutable = 1 in {
def XSADDQPO : X_VT5_VA5_VB5_Ro<63, 4, "xsaddqpo",
[(set f128:$vT,
(int_ppc_addf128_round_to_odd
f128:$vA, f128:$vB))]>;
- def XSMULQP : X_VT5_VA5_VB5 <63, 36, "xsmulqp",
- [(set f128:$vT, (fmul f128:$vA, f128:$vB))]>;
def XSMULQPO : X_VT5_VA5_VB5_Ro<63, 36, "xsmulqpo",
[(set f128:$vT,
(int_ppc_mulf128_round_to_odd
f128:$vA, f128:$vB))]>;
}
-
- def XSSUBQP : X_VT5_VA5_VB5 <63, 516, "xssubqp" ,
- [(set f128:$vT, (fsub f128:$vA, f128:$vB))]>;
def XSSUBQPO : X_VT5_VA5_VB5_Ro<63, 516, "xssubqpo",
[(set f128:$vT,
(int_ppc_subf128_round_to_odd
f128:$vA, f128:$vB))]>;
- def XSDIVQP : X_VT5_VA5_VB5 <63, 548, "xsdivqp",
- [(set f128:$vT, (fdiv f128:$vA, f128:$vB))]>;
def XSDIVQPO : X_VT5_VA5_VB5_Ro<63, 548, "xsdivqpo",
[(set f128:$vT,
(int_ppc_divf128_round_to_odd
f128:$vA, f128:$vB))]>;
-
- // Square-Root
- def XSSQRTQP : X_VT5_XO5_VB5 <63, 27, 804, "xssqrtqp",
- [(set f128:$vT, (fsqrt f128:$vB))]>;
def XSSQRTQPO : X_VT5_XO5_VB5_Ro<63, 27, 804, "xssqrtqpo",
[(set f128:$vT,
(int_ppc_sqrtf128_round_to_odd f128:$vB))]>;
- // (Negative) Multiply-{Add/Subtract}
- def XSMADDQP : X_VT5_VA5_VB5_FMA <63, 388, "xsmaddqp",
- [(set f128:$vT,
- (fma f128:$vA, f128:$vB,
- f128:$vTi))]>;
def XSMADDQPO : X_VT5_VA5_VB5_FMA_Ro<63, 388, "xsmaddqpo",
[(set f128:$vT,
(int_ppc_fmaf128_round_to_odd
f128:$vA,f128:$vB,f128:$vTi))]>;
- def XSMSUBQP : X_VT5_VA5_VB5_FMA <63, 420, "xsmsubqp" ,
- [(set f128:$vT,
- (fma f128:$vA, f128:$vB,
- (fneg f128:$vTi)))]>;
def XSMSUBQPO : X_VT5_VA5_VB5_FMA_Ro<63, 420, "xsmsubqpo" ,
[(set f128:$vT,
(int_ppc_fmaf128_round_to_odd
f128:$vA, f128:$vB, (fneg f128:$vTi)))]>;
- def XSNMADDQP : X_VT5_VA5_VB5_FMA <63, 452, "xsnmaddqp",
- [(set f128:$vT,
- (fneg (fma f128:$vA, f128:$vB,
- f128:$vTi)))]>;
def XSNMADDQPO: X_VT5_VA5_VB5_FMA_Ro<63, 452, "xsnmaddqpo",
[(set f128:$vT,
(fneg (int_ppc_fmaf128_round_to_odd
f128:$vA, f128:$vB, f128:$vTi)))]>;
- def XSNMSUBQP : X_VT5_VA5_VB5_FMA <63, 484, "xsnmsubqp",
- [(set f128:$vT,
- (fneg (fma f128:$vA, f128:$vB,
- (fneg f128:$vTi))))]>;
def XSNMSUBQPO: X_VT5_VA5_VB5_FMA_Ro<63, 484, "xsnmsubqpo",
[(set f128:$vT,
(fneg (int_ppc_fmaf128_round_to_odd
@@ -2572,8 +2603,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
// DP/QP Compare Exponents
def XSCMPEXPDP : XX3Form_1<60, 59,
(outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB),
- "xscmpexpdp $crD, $XA, $XB", IIC_FPCompare, []>,
- UseVSXReg;
+ "xscmpexpdp $crD, $XA, $XB", IIC_FPCompare, []>;
def XSCMPEXPQP : X_BF3_VA5_VB5<63, 164, "xscmpexpqp", []>;
// DP Compare ==, >=, >, !=
@@ -2631,7 +2661,6 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
def : Pat<(f128 (uint_to_fp (i32 (load xoaddr:$src)))),
(f128 (XSCVUDQP (LIWZX xoaddr:$src)))>;
- let UseVSXReg = 1 in {
//===--------------------------------------------------------------------===//
// Round to Floating-Point Integer Instructions
@@ -2648,8 +2677,6 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
[(set v4f32:$XT,
(int_ppc_vsx_xvcvsphp v4f32:$XB))]>;
- } // UseVSXReg = 1
-
// Pattern for matching Vector HP -> Vector SP intrinsic. Defined as a
// separate pattern so that it can convert the input register class from
// VRRC(v8i16) to VSRC.
@@ -2691,7 +2718,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
// Insert Exponent DP/QP
// XT NOTE: XT.dword[1] = 0xUUUU_UUUU_UUUU_UUUU
def XSIEXPDP : XX1Form <60, 918, (outs vsrc:$XT), (ins g8rc:$rA, g8rc:$rB),
- "xsiexpdp $XT, $rA, $rB", IIC_VecFP, []>, UseVSXReg;
+ "xsiexpdp $XT, $rA, $rB", IIC_VecFP, []>;
// vB NOTE: only vB.dword[0] is used, that's why we don't use
// X_VT5_VA5_VB5 form
def XSIEXPQP : XForm_18<63, 868, (outs vrrc:$vT), (ins vrrc:$vA, vsfrc:$vB),
@@ -2712,7 +2739,6 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
(v2i64 (XSXEXPQP $vA)), sub_64)))>;
// Vector Insert Word
- let UseVSXReg = 1 in {
// XB NOTE: Only XB.dword[1] is used, but we use vsrc on XB.
def XXINSERTW :
XX2_RD6_UIM5_RS6<60, 181, (outs vsrc:$XT),
@@ -2726,7 +2752,6 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
def XXEXTRACTUW : XX2_RD6_UIM5_RS6<60, 165,
(outs vsfrc:$XT), (ins vsrc:$XB, u4imm:$UIMM),
"xxextractuw $XT, $XB, $UIMM", IIC_VecFP, []>;
- } // UseVSXReg = 1
// Vector Insert Exponent DP/SP
def XVIEXPDP : XX3_XT5_XA5_XB5<60, 248, "xviexpdp", vsrc, vsrc, vsrc,
@@ -2759,20 +2784,17 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
//===--------------------------------------------------------------------===//
// Test Data Class SP/DP/QP
- let UseVSXReg = 1 in {
def XSTSTDCSP : XX2_BF3_DCMX7_RS6<60, 298,
(outs crrc:$BF), (ins u7imm:$DCMX, vsfrc:$XB),
"xststdcsp $BF, $XB, $DCMX", IIC_VecFP, []>;
def XSTSTDCDP : XX2_BF3_DCMX7_RS6<60, 362,
(outs crrc:$BF), (ins u7imm:$DCMX, vsfrc:$XB),
"xststdcdp $BF, $XB, $DCMX", IIC_VecFP, []>;
- } // UseVSXReg = 1
def XSTSTDCQP : X_BF3_DCMX7_RS5 <63, 708,
(outs crrc:$BF), (ins u7imm:$DCMX, vrrc:$vB),
"xststdcqp $BF, $vB, $DCMX", IIC_VecFP, []>;
// Vector Test Data Class SP/DP
- let UseVSXReg = 1 in {
def XVTSTDCSP : XX2_RD6_DCMX7_RS6<60, 13, 5,
(outs vsrc:$XT), (ins u7imm:$DCMX, vsrc:$XB),
"xvtstdcsp $XT, $XB, $DCMX", IIC_VecFP,
@@ -2783,7 +2805,6 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
"xvtstdcdp $XT, $XB, $DCMX", IIC_VecFP,
[(set v2i64: $XT,
(int_ppc_vsx_xvtstdcdp v2f64:$XB, imm:$DCMX))]>;
- } // UseVSXReg = 1
//===--------------------------------------------------------------------===//
@@ -2824,7 +2845,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
// Vector Splat Immediate Byte
def XXSPLTIB : X_RD6_IMM8<60, 360, (outs vsrc:$XT), (ins u8imm:$IMM8),
- "xxspltib $XT, $IMM8", IIC_VecPerm, []>, UseVSXReg;
+ "xxspltib $XT, $IMM8", IIC_VecPerm, []>;
//===--------------------------------------------------------------------===//
// Vector/Scalar Load/Store Instructions
@@ -2834,7 +2855,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
let mayLoad = 1, mayStore = 0 in {
// Load Vector
def LXV : DQ_RD6_RS5_DQ12<61, 1, (outs vsrc:$XT), (ins memrix16:$src),
- "lxv $XT, $src", IIC_LdStLFD, []>, UseVSXReg;
+ "lxv $XT, $src", IIC_LdStLFD, []>;
// Load DWord
def LXSD : DSForm_1<57, 2, (outs vfrc:$vD), (ins memrix:$src),
"lxsd $vD, $src", IIC_LdStLFD, []>;
@@ -2847,7 +2868,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
class X_XT6_RA5_RB5<bits<6> opcode, bits<10> xo, string opc,
RegisterOperand vtype, list<dag> pattern>
: XX1Form_memOp<opcode, xo, (outs vtype:$XT), (ins memrr:$src),
- !strconcat(opc, " $XT, $src"), IIC_LdStLFD, pattern>, UseVSXReg;
+ !strconcat(opc, " $XT, $src"), IIC_LdStLFD, pattern>;
// Load as Integer Byte/Halfword & Zero Indexed
def LXSIBZX : X_XT6_RA5_RB5<31, 781, "lxsibzx", vsfrc,
@@ -2861,16 +2882,14 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
// Load Vector Indexed
def LXVX : X_XT6_RA5_RB5<31, 268, "lxvx" , vsrc,
- [(set v2f64:$XT, (load xaddr:$src))]>;
+ [(set v2f64:$XT, (load xaddrX16:$src))]>;
// Load Vector (Left-justified) with Length
def LXVL : XX1Form_memOp<31, 269, (outs vsrc:$XT), (ins memr:$src, g8rc:$rB),
"lxvl $XT, $src, $rB", IIC_LdStLoad,
- [(set v4i32:$XT, (int_ppc_vsx_lxvl addr:$src, i64:$rB))]>,
- UseVSXReg;
+ [(set v4i32:$XT, (int_ppc_vsx_lxvl addr:$src, i64:$rB))]>;
def LXVLL : XX1Form_memOp<31,301, (outs vsrc:$XT), (ins memr:$src, g8rc:$rB),
"lxvll $XT, $src, $rB", IIC_LdStLoad,
- [(set v4i32:$XT, (int_ppc_vsx_lxvll addr:$src, i64:$rB))]>,
- UseVSXReg;
+ [(set v4i32:$XT, (int_ppc_vsx_lxvll addr:$src, i64:$rB))]>;
// Load Vector Word & Splat Indexed
def LXVWSX : X_XT6_RA5_RB5<31, 364, "lxvwsx" , vsrc, []>;
@@ -2881,7 +2900,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
let mayStore = 1, mayLoad = 0 in {
// Store Vector
def STXV : DQ_RD6_RS5_DQ12<61, 5, (outs), (ins vsrc:$XT, memrix16:$dst),
- "stxv $XT, $dst", IIC_LdStSTFD, []>, UseVSXReg;
+ "stxv $XT, $dst", IIC_LdStSTFD, []>;
// Store DWord
def STXSD : DSForm_1<61, 2, (outs), (ins vfrc:$vS, memrix:$dst),
"stxsd $vS, $dst", IIC_LdStSTFD, []>;
@@ -2893,7 +2912,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
class X_XS6_RA5_RB5<bits<6> opcode, bits<10> xo, string opc,
RegisterOperand vtype, list<dag> pattern>
: XX1Form_memOp<opcode, xo, (outs), (ins vtype:$XT, memrr:$dst),
- !strconcat(opc, " $XT, $dst"), IIC_LdStSTFD, pattern>, UseVSXReg;
+ !strconcat(opc, " $XT, $dst"), IIC_LdStSTFD, pattern>;
// Store as Integer Byte/Halfword Indexed
def STXSIBX : X_XS6_RA5_RB5<31, 909, "stxsibx" , vsfrc,
@@ -2901,8 +2920,8 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
def STXSIHX : X_XS6_RA5_RB5<31, 941, "stxsihx" , vsfrc,
[(PPCstxsix f64:$XT, xoaddr:$dst, 2)]>;
let isCodeGenOnly = 1 in {
- def STXSIBXv : X_XS6_RA5_RB5<31, 909, "stxsibx" , vrrc, []>;
- def STXSIHXv : X_XS6_RA5_RB5<31, 941, "stxsihx" , vrrc, []>;
+ def STXSIBXv : X_XS6_RA5_RB5<31, 909, "stxsibx" , vsrc, []>;
+ def STXSIHXv : X_XS6_RA5_RB5<31, 941, "stxsihx" , vsrc, []>;
}
// Store Vector Halfword*8/Byte*16 Indexed
@@ -2911,21 +2930,19 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
// Store Vector Indexed
def STXVX : X_XS6_RA5_RB5<31, 396, "stxvx" , vsrc,
- [(store v2f64:$XT, xaddr:$dst)]>;
+ [(store v2f64:$XT, xaddrX16:$dst)]>;
// Store Vector (Left-justified) with Length
def STXVL : XX1Form_memOp<31, 397, (outs),
(ins vsrc:$XT, memr:$dst, g8rc:$rB),
"stxvl $XT, $dst, $rB", IIC_LdStLoad,
[(int_ppc_vsx_stxvl v4i32:$XT, addr:$dst,
- i64:$rB)]>,
- UseVSXReg;
+ i64:$rB)]>;
def STXVLL : XX1Form_memOp<31, 429, (outs),
(ins vsrc:$XT, memr:$dst, g8rc:$rB),
"stxvll $XT, $dst, $rB", IIC_LdStLoad,
[(int_ppc_vsx_stxvll v4i32:$XT, addr:$dst,
- i64:$rB)]>,
- UseVSXReg;
+ i64:$rB)]>;
} // mayStore
let Predicates = [IsLittleEndian] in {
@@ -3045,24 +3062,24 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
} // IsLittleEndian, HasP9Vector
// D-Form Load/Store
- def : Pat<(v4i32 (quadwOffsetLoad iqaddr:$src)), (LXV memrix16:$src)>;
- def : Pat<(v4f32 (quadwOffsetLoad iqaddr:$src)), (LXV memrix16:$src)>;
- def : Pat<(v2i64 (quadwOffsetLoad iqaddr:$src)), (LXV memrix16:$src)>;
- def : Pat<(v2f64 (quadwOffsetLoad iqaddr:$src)), (LXV memrix16:$src)>;
- def : Pat<(f128 (quadwOffsetLoad iqaddr:$src)),
+ def : Pat<(v4i32 (quadwOffsetLoad iaddrX16:$src)), (LXV memrix16:$src)>;
+ def : Pat<(v4f32 (quadwOffsetLoad iaddrX16:$src)), (LXV memrix16:$src)>;
+ def : Pat<(v2i64 (quadwOffsetLoad iaddrX16:$src)), (LXV memrix16:$src)>;
+ def : Pat<(v2f64 (quadwOffsetLoad iaddrX16:$src)), (LXV memrix16:$src)>;
+ def : Pat<(f128 (quadwOffsetLoad iaddrX16:$src)),
(COPY_TO_REGCLASS (LXV memrix16:$src), VRRC)>;
- def : Pat<(v4i32 (int_ppc_vsx_lxvw4x iqaddr:$src)), (LXV memrix16:$src)>;
- def : Pat<(v2f64 (int_ppc_vsx_lxvd2x iqaddr:$src)), (LXV memrix16:$src)>;
+ def : Pat<(v4i32 (int_ppc_vsx_lxvw4x iaddrX16:$src)), (LXV memrix16:$src)>;
+ def : Pat<(v2f64 (int_ppc_vsx_lxvd2x iaddrX16:$src)), (LXV memrix16:$src)>;
- def : Pat<(quadwOffsetStore v4f32:$rS, iqaddr:$dst), (STXV $rS, memrix16:$dst)>;
- def : Pat<(quadwOffsetStore v4i32:$rS, iqaddr:$dst), (STXV $rS, memrix16:$dst)>;
- def : Pat<(quadwOffsetStore v2f64:$rS, iqaddr:$dst), (STXV $rS, memrix16:$dst)>;
- def : Pat<(quadwOffsetStore f128:$rS, iqaddr:$dst),
+ def : Pat<(quadwOffsetStore v4f32:$rS, iaddrX16:$dst), (STXV $rS, memrix16:$dst)>;
+ def : Pat<(quadwOffsetStore v4i32:$rS, iaddrX16:$dst), (STXV $rS, memrix16:$dst)>;
+ def : Pat<(quadwOffsetStore v2f64:$rS, iaddrX16:$dst), (STXV $rS, memrix16:$dst)>;
+ def : Pat<(quadwOffsetStore f128:$rS, iaddrX16:$dst),
(STXV (COPY_TO_REGCLASS $rS, VSRC), memrix16:$dst)>;
- def : Pat<(quadwOffsetStore v2i64:$rS, iqaddr:$dst), (STXV $rS, memrix16:$dst)>;
- def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, iqaddr:$dst),
+ def : Pat<(quadwOffsetStore v2i64:$rS, iaddrX16:$dst), (STXV $rS, memrix16:$dst)>;
+ def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, iaddrX16:$dst),
(STXV $rS, memrix16:$dst)>;
- def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, iqaddr:$dst),
+ def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, iaddrX16:$dst),
(STXV $rS, memrix16:$dst)>;
@@ -3159,109 +3176,109 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
let Predicates = [IsBigEndian, HasP9Vector] in {
// Scalar stores of i8
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 0)), xoaddr:$dst),
- (STXSIBXv (v16i8 (VSLDOI $S, $S, 9)), xoaddr:$dst)>;
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 9)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 1)), xoaddr:$dst),
- (STXSIBXv (v16i8 (VSLDOI $S, $S, 10)), xoaddr:$dst)>;
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 2)), xoaddr:$dst),
- (STXSIBXv (v16i8 (VSLDOI $S, $S, 11)), xoaddr:$dst)>;
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 11)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 3)), xoaddr:$dst),
- (STXSIBXv (v16i8 (VSLDOI $S, $S, 12)), xoaddr:$dst)>;
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 4)), xoaddr:$dst),
- (STXSIBXv (v16i8 (VSLDOI $S, $S, 13)), xoaddr:$dst)>;
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 13)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 5)), xoaddr:$dst),
- (STXSIBXv (v16i8 (VSLDOI $S, $S, 14)), xoaddr:$dst)>;
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 6)), xoaddr:$dst),
- (STXSIBXv (v16i8 (VSLDOI $S, $S, 15)), xoaddr:$dst)>;
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 15)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 7)), xoaddr:$dst),
- (STXSIBXv $S, xoaddr:$dst)>;
+ (STXSIBXv (COPY_TO_REGCLASS $S, VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 8)), xoaddr:$dst),
- (STXSIBXv (v16i8 (VSLDOI $S, $S, 1)), xoaddr:$dst)>;
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 1)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 9)), xoaddr:$dst),
- (STXSIBXv (v16i8 (VSLDOI $S, $S, 2)), xoaddr:$dst)>;
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 10)), xoaddr:$dst),
- (STXSIBXv (v16i8 (VSLDOI $S, $S, 3)), xoaddr:$dst)>;
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 3)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 11)), xoaddr:$dst),
- (STXSIBXv (v16i8 (VSLDOI $S, $S, 4)), xoaddr:$dst)>;
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 12)), xoaddr:$dst),
- (STXSIBXv (v16i8 (VSLDOI $S, $S, 5)), xoaddr:$dst)>;
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 5)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 13)), xoaddr:$dst),
- (STXSIBXv (v16i8 (VSLDOI $S, $S, 6)), xoaddr:$dst)>;
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 14)), xoaddr:$dst),
- (STXSIBXv (v16i8 (VSLDOI $S, $S, 7)), xoaddr:$dst)>;
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 7)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 15)), xoaddr:$dst),
- (STXSIBXv (v16i8 (VSLDOI $S, $S, 8)), xoaddr:$dst)>;
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), xoaddr:$dst)>;
// Scalar stores of i16
def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 0)), xoaddr:$dst),
- (STXSIHXv (v16i8 (VSLDOI $S, $S, 10)), xoaddr:$dst)>;
+ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 1)), xoaddr:$dst),
- (STXSIHXv (v16i8 (VSLDOI $S, $S, 12)), xoaddr:$dst)>;
+ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 2)), xoaddr:$dst),
- (STXSIHXv (v16i8 (VSLDOI $S, $S, 14)), xoaddr:$dst)>;
+ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 3)), xoaddr:$dst),
- (STXSIHXv $S, xoaddr:$dst)>;
+ (STXSIHXv (COPY_TO_REGCLASS $S, VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 4)), xoaddr:$dst),
- (STXSIHXv (v16i8 (VSLDOI $S, $S, 2)), xoaddr:$dst)>;
+ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 5)), xoaddr:$dst),
- (STXSIHXv (v16i8 (VSLDOI $S, $S, 4)), xoaddr:$dst)>;
+ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 6)), xoaddr:$dst),
- (STXSIHXv (v16i8 (VSLDOI $S, $S, 6)), xoaddr:$dst)>;
+ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 7)), xoaddr:$dst),
- (STXSIHXv (v16i8 (VSLDOI $S, $S, 8)), xoaddr:$dst)>;
+ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), xoaddr:$dst)>;
} // IsBigEndian, HasP9Vector
let Predicates = [IsLittleEndian, HasP9Vector] in {
// Scalar stores of i8
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 0)), xoaddr:$dst),
- (STXSIBXv (v16i8 (VSLDOI $S, $S, 8)), xoaddr:$dst)>;
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 1)), xoaddr:$dst),
- (STXSIBXv (v16i8 (VSLDOI $S, $S, 7)), xoaddr:$dst)>;
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 7)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 2)), xoaddr:$dst),
- (STXSIBXv (v16i8 (VSLDOI $S, $S, 6)), xoaddr:$dst)>;
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 3)), xoaddr:$dst),
- (STXSIBXv (v16i8 (VSLDOI $S, $S, 5)), xoaddr:$dst)>;
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 5)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 4)), xoaddr:$dst),
- (STXSIBXv (v16i8 (VSLDOI $S, $S, 4)), xoaddr:$dst)>;
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 5)), xoaddr:$dst),
- (STXSIBXv (v16i8 (VSLDOI $S, $S, 3)), xoaddr:$dst)>;
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 3)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 6)), xoaddr:$dst),
- (STXSIBXv (v16i8 (VSLDOI $S, $S, 2)), xoaddr:$dst)>;
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 7)), xoaddr:$dst),
- (STXSIBXv (v16i8 (VSLDOI $S, $S, 1)), xoaddr:$dst)>;
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 1)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 8)), xoaddr:$dst),
- (STXSIBXv $S, xoaddr:$dst)>;
+ (STXSIBXv (COPY_TO_REGCLASS $S, VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 9)), xoaddr:$dst),
- (STXSIBXv (v16i8 (VSLDOI $S, $S, 15)), xoaddr:$dst)>;
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 15)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 10)), xoaddr:$dst),
- (STXSIBXv (v16i8 (VSLDOI $S, $S, 14)), xoaddr:$dst)>;
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 11)), xoaddr:$dst),
- (STXSIBXv (v16i8 (VSLDOI $S, $S, 13)), xoaddr:$dst)>;
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 13)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 12)), xoaddr:$dst),
- (STXSIBXv (v16i8 (VSLDOI $S, $S, 12)), xoaddr:$dst)>;
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 13)), xoaddr:$dst),
- (STXSIBXv (v16i8 (VSLDOI $S, $S, 11)), xoaddr:$dst)>;
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 11)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 14)), xoaddr:$dst),
- (STXSIBXv (v16i8 (VSLDOI $S, $S, 10)), xoaddr:$dst)>;
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 15)), xoaddr:$dst),
- (STXSIBXv (v16i8 (VSLDOI $S, $S, 9)), xoaddr:$dst)>;
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 9)), VSRC), xoaddr:$dst)>;
// Scalar stores of i16
def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 0)), xoaddr:$dst),
- (STXSIHXv (v16i8 (VSLDOI $S, $S, 8)), xoaddr:$dst)>;
+ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 1)), xoaddr:$dst),
- (STXSIHXv (v16i8 (VSLDOI $S, $S, 6)), xoaddr:$dst)>;
+ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 2)), xoaddr:$dst),
- (STXSIHXv (v16i8 (VSLDOI $S, $S, 4)), xoaddr:$dst)>;
+ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 3)), xoaddr:$dst),
- (STXSIHXv (v16i8 (VSLDOI $S, $S, 2)), xoaddr:$dst)>;
+ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 4)), xoaddr:$dst),
- (STXSIHXv $S, xoaddr:$dst)>;
+ (STXSIHXv (COPY_TO_REGCLASS $S, VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 5)), xoaddr:$dst),
- (STXSIHXv (v16i8 (VSLDOI $S, $S, 14)), xoaddr:$dst)>;
+ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 6)), xoaddr:$dst),
- (STXSIHXv (v16i8 (VSLDOI $S, $S, 12)), xoaddr:$dst)>;
+ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), xoaddr:$dst)>;
def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 7)), xoaddr:$dst),
- (STXSIHXv (v16i8 (VSLDOI $S, $S, 10)), xoaddr:$dst)>;
+ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), xoaddr:$dst)>;
} // IsLittleEndian, HasP9Vector
@@ -3273,53 +3290,97 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
def DFLOADf32 : PPCPostRAExpPseudo<(outs vssrc:$XT), (ins memrix:$src),
"#DFLOADf32",
- [(set f32:$XT, (load ixaddr:$src))]>;
+ [(set f32:$XT, (load iaddrX4:$src))]>;
def DFLOADf64 : PPCPostRAExpPseudo<(outs vsfrc:$XT), (ins memrix:$src),
"#DFLOADf64",
- [(set f64:$XT, (load ixaddr:$src))]>;
+ [(set f64:$XT, (load iaddrX4:$src))]>;
def DFSTOREf32 : PPCPostRAExpPseudo<(outs), (ins vssrc:$XT, memrix:$dst),
"#DFSTOREf32",
- [(store f32:$XT, ixaddr:$dst)]>;
+ [(store f32:$XT, iaddrX4:$dst)]>;
def DFSTOREf64 : PPCPostRAExpPseudo<(outs), (ins vsfrc:$XT, memrix:$dst),
"#DFSTOREf64",
- [(store f64:$XT, ixaddr:$dst)]>;
+ [(store f64:$XT, iaddrX4:$dst)]>;
- def : Pat<(f64 (extloadf32 ixaddr:$src)),
- (COPY_TO_REGCLASS (DFLOADf32 ixaddr:$src), VSFRC)>;
- def : Pat<(f32 (fpround (f64 (extloadf32 ixaddr:$src)))),
- (f32 (DFLOADf32 ixaddr:$src))>;
+ def : Pat<(f64 (extloadf32 iaddrX4:$src)),
+ (COPY_TO_REGCLASS (DFLOADf32 iaddrX4:$src), VSFRC)>;
+ def : Pat<(f32 (fpround (f64 (extloadf32 iaddrX4:$src)))),
+ (f32 (DFLOADf32 iaddrX4:$src))>;
+ def : Pat<(v4f32 (PPCldvsxlh xaddr:$src)),
+ (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC)>;
+ def : Pat<(v4f32 (PPCldvsxlh iaddrX4:$src)),
+ (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSRC)>;
let AddedComplexity = 400 in {
// The following pseudoinstructions are used to ensure the utilization
// of all 64 VSX registers.
let Predicates = [IsLittleEndian, HasP9Vector] in {
- def : Pat<(v2i64 (scalar_to_vector (i64 (load ixaddr:$src)))),
+ def : Pat<(v2i64 (scalar_to_vector (i64 (load iaddrX4:$src)))),
(v2i64 (XXPERMDIs
- (COPY_TO_REGCLASS (DFLOADf64 ixaddr:$src), VSRC), 2))>;
- def : Pat<(v2i64 (scalar_to_vector (i64 (load xaddr:$src)))),
+ (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSRC), 2))>;
+ def : Pat<(v2i64 (scalar_to_vector (i64 (load xaddrX4:$src)))),
(v2i64 (XXPERMDIs
- (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC), 2))>;
+ (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSRC), 2))>;
- def : Pat<(v2f64 (scalar_to_vector (f64 (load ixaddr:$src)))),
+ def : Pat<(v2f64 (scalar_to_vector (f64 (load iaddrX4:$src)))),
(v2f64 (XXPERMDIs
- (COPY_TO_REGCLASS (DFLOADf64 ixaddr:$src), VSRC), 2))>;
- def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddr:$src)))),
+ (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSRC), 2))>;
+ def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddrX4:$src)))),
(v2f64 (XXPERMDIs
- (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC), 2))>;
- }
+ (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSRC), 2))>;
+ def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xaddrX4:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
+ sub_64), xaddrX4:$src)>;
+ def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xaddrX4:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
+ sub_64), xaddrX4:$src)>;
+ def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xaddrX4:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddrX4:$src)>;
+ def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xaddrX4:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddrX4:$src)>;
+ def : Pat<(store (i64 (extractelt v2i64:$A, 0)), iaddrX4:$src),
+ (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
+ sub_64), iaddrX4:$src)>;
+ def : Pat<(store (f64 (extractelt v2f64:$A, 0)), iaddrX4:$src),
+ (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64),
+ iaddrX4:$src)>;
+ def : Pat<(store (i64 (extractelt v2i64:$A, 1)), iaddrX4:$src),
+ (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), iaddrX4:$src)>;
+ def : Pat<(store (f64 (extractelt v2f64:$A, 1)), iaddrX4:$src),
+ (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), iaddrX4:$src)>;
+ } // IsLittleEndian, HasP9Vector
let Predicates = [IsBigEndian, HasP9Vector] in {
- def : Pat<(v2i64 (scalar_to_vector (i64 (load ixaddr:$src)))),
- (v2i64 (COPY_TO_REGCLASS (DFLOADf64 ixaddr:$src), VSRC))>;
- def : Pat<(v2i64 (scalar_to_vector (i64 (load xaddr:$src)))),
- (v2i64 (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC))>;
-
- def : Pat<(v2f64 (scalar_to_vector (f64 (load ixaddr:$src)))),
- (v2f64 (COPY_TO_REGCLASS (DFLOADf64 ixaddr:$src), VSRC))>;
- def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddr:$src)))),
- (v2f64 (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC))>;
- }
+ def : Pat<(v2i64 (scalar_to_vector (i64 (load iaddrX4:$src)))),
+ (v2i64 (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSRC))>;
+ def : Pat<(v2i64 (scalar_to_vector (i64 (load xaddrX4:$src)))),
+ (v2i64 (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSRC))>;
+
+ def : Pat<(v2f64 (scalar_to_vector (f64 (load iaddrX4:$src)))),
+ (v2f64 (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSRC))>;
+ def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddrX4:$src)))),
+ (v2f64 (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSRC))>;
+ def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xaddrX4:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
+ sub_64), xaddrX4:$src)>;
+ def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xaddrX4:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
+ sub_64), xaddrX4:$src)>;
+ def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xaddrX4:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddrX4:$src)>;
+ def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xaddrX4:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddrX4:$src)>;
+ def : Pat<(store (i64 (extractelt v2i64:$A, 1)), iaddrX4:$src),
+ (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
+ sub_64), iaddrX4:$src)>;
+ def : Pat<(store (f64 (extractelt v2f64:$A, 1)), iaddrX4:$src),
+ (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
+ sub_64), iaddrX4:$src)>;
+ def : Pat<(store (i64 (extractelt v2i64:$A, 0)), iaddrX4:$src),
+ (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), iaddrX4:$src)>;
+ def : Pat<(store (f64 (extractelt v2f64:$A, 0)), iaddrX4:$src),
+ (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), iaddrX4:$src)>;
+ } // IsBigEndian, HasP9Vector
}
let Predicates = [IsBigEndian, HasP9Vector] in {
@@ -3455,14 +3516,14 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
} // IsLittleEndian, HasP9Vector
// Convert (Un)Signed DWord in memory -> QP
- def : Pat<(f128 (sint_to_fp (i64 (load xaddr:$src)))),
- (f128 (XSCVSDQP (LXSDX xaddr:$src)))>;
- def : Pat<(f128 (sint_to_fp (i64 (load ixaddr:$src)))),
- (f128 (XSCVSDQP (LXSD ixaddr:$src)))>;
- def : Pat<(f128 (uint_to_fp (i64 (load xaddr:$src)))),
- (f128 (XSCVUDQP (LXSDX xaddr:$src)))>;
- def : Pat<(f128 (uint_to_fp (i64 (load ixaddr:$src)))),
- (f128 (XSCVUDQP (LXSD ixaddr:$src)))>;
+ def : Pat<(f128 (sint_to_fp (i64 (load xaddrX4:$src)))),
+ (f128 (XSCVSDQP (LXSDX xaddrX4:$src)))>;
+ def : Pat<(f128 (sint_to_fp (i64 (load iaddrX4:$src)))),
+ (f128 (XSCVSDQP (LXSD iaddrX4:$src)))>;
+ def : Pat<(f128 (uint_to_fp (i64 (load xaddrX4:$src)))),
+ (f128 (XSCVUDQP (LXSDX xaddrX4:$src)))>;
+ def : Pat<(f128 (uint_to_fp (i64 (load iaddrX4:$src)))),
+ (f128 (XSCVUDQP (LXSD iaddrX4:$src)))>;
// Convert Unsigned HWord in memory -> QP
def : Pat<(f128 (uint_to_fp ScalarLoads.ZELi16)),
@@ -3483,13 +3544,13 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
// Instructions for store(fptosi).
// The 8-byte version is repeated here due to availability of D-Form STXSD.
def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xaddr:$dst, 8),
+ (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xaddrX4:$dst, 8),
(STXSDX (COPY_TO_REGCLASS (XSCVQPSDZ f128:$src), VFRC),
- xaddr:$dst)>;
+ xaddrX4:$dst)>;
def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), ixaddr:$dst, 8),
+ (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), iaddrX4:$dst, 8),
(STXSD (COPY_TO_REGCLASS (XSCVQPSDZ f128:$src), VFRC),
- ixaddr:$dst)>;
+ iaddrX4:$dst)>;
def : Pat<(PPCstore_scal_int_from_vsr
(f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xoaddr:$dst, 4),
(STXSIWX (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC), xoaddr:$dst)>;
@@ -3500,11 +3561,11 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
(f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xoaddr:$dst, 1),
(STXSIBX (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC), xoaddr:$dst)>;
def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xaddr:$dst, 8),
- (STXSDX (XSCVDPSXDS f64:$src), xaddr:$dst)>;
+ (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xaddrX4:$dst, 8),
+ (STXSDX (XSCVDPSXDS f64:$src), xaddrX4:$dst)>;
def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), ixaddr:$dst, 8),
- (STXSD (XSCVDPSXDS f64:$src), ixaddr:$dst)>;
+ (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), iaddrX4:$dst, 8),
+ (STXSD (XSCVDPSXDS f64:$src), iaddrX4:$dst)>;
def : Pat<(PPCstore_scal_int_from_vsr
(f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xoaddr:$dst, 2),
(STXSIHX (XSCVDPSXWS f64:$src), xoaddr:$dst)>;
@@ -3514,13 +3575,13 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
// Instructions for store(fptoui).
def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xaddr:$dst, 8),
+ (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xaddrX4:$dst, 8),
(STXSDX (COPY_TO_REGCLASS (XSCVQPUDZ f128:$src), VFRC),
- xaddr:$dst)>;
+ xaddrX4:$dst)>;
def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), ixaddr:$dst, 8),
+ (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), iaddrX4:$dst, 8),
(STXSD (COPY_TO_REGCLASS (XSCVQPUDZ f128:$src), VFRC),
- ixaddr:$dst)>;
+ iaddrX4:$dst)>;
def : Pat<(PPCstore_scal_int_from_vsr
(f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xoaddr:$dst, 4),
(STXSIWX (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC), xoaddr:$dst)>;
@@ -3531,11 +3592,11 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
(f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xoaddr:$dst, 1),
(STXSIBX (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC), xoaddr:$dst)>;
def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xaddr:$dst, 8),
- (STXSDX (XSCVDPUXDS f64:$src), xaddr:$dst)>;
+ (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xaddrX4:$dst, 8),
+ (STXSDX (XSCVDPUXDS f64:$src), xaddrX4:$dst)>;
def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), ixaddr:$dst, 8),
- (STXSD (XSCVDPUXDS f64:$src), ixaddr:$dst)>;
+ (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), iaddrX4:$dst, 8),
+ (STXSD (XSCVDPUXDS f64:$src), iaddrX4:$dst)>;
def : Pat<(PPCstore_scal_int_from_vsr
(f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 2),
(STXSIHX (XSCVDPUXWS f64:$src), xoaddr:$dst)>;
@@ -3668,13 +3729,13 @@ def FltToLongLoad {
dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (extloadf32 xoaddr:$A)))));
}
def FltToLongLoadP9 {
- dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (extloadf32 ixaddr:$A)))));
+ dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (extloadf32 iaddrX4:$A)))));
}
def FltToULongLoad {
dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (extloadf32 xoaddr:$A)))));
}
def FltToULongLoadP9 {
- dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (extloadf32 ixaddr:$A)))));
+ dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (extloadf32 iaddrX4:$A)))));
}
def FltToLong {
dag A = (i64 (PPCmfvsr (f64 (PPCfctidz (fpextend f32:$A)))));
@@ -3704,13 +3765,13 @@ def DblToIntLoad {
dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (load xoaddr:$A)))));
}
def DblToIntLoadP9 {
- dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (load ixaddr:$A)))));
+ dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (load iaddrX4:$A)))));
}
def DblToUIntLoad {
dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (load xoaddr:$A)))));
}
def DblToUIntLoadP9 {
- dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (load ixaddr:$A)))));
+ dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (load iaddrX4:$A)))));
}
def DblToLongLoad {
dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (load xoaddr:$A)))));
@@ -3834,8 +3895,38 @@ let AddedComplexity = 400 in {
def : Pat<DWToSPExtractConv.BVS,
(v4f32 (VPKUDUM (XXSLDWI (XVCVSXDSP $S1), (XVCVSXDSP $S1), 3),
(XXSLDWI (XVCVSXDSP $S2), (XVCVSXDSP $S2), 3)))>;
+ def : Pat<(store (i32 (extractelt v4i32:$A, 1)), xoaddr:$src),
+ (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+ def : Pat<(store (f32 (extractelt v4f32:$A, 1)), xoaddr:$src),
+ (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+
+ // Elements in a register on a BE system are in order <0, 1, 2, 3>.
+ // The store instructions store the second word from the left.
+ // So to align element zero, we need to modulo-left-shift by 3 words.
+ // Similar logic applies for elements 2 and 3.
+ foreach Idx = [ [0,3], [2,1], [3,2] ] in {
+ def : Pat<(store (i32 (extractelt v4i32:$A, !head(Idx))), xoaddr:$src),
+ (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
+ sub_64), xoaddr:$src)>;
+ def : Pat<(store (f32 (extractelt v4f32:$A, !head(Idx))), xoaddr:$src),
+ (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
+ sub_64), xoaddr:$src)>;
+ }
}
+ let Predicates = [HasP8Vector, IsBigEndian, NoP9Vector] in {
+ def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xoaddr:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+ def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xoaddr:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+ def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xoaddr:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64),
+ xoaddr:$src)>;
+ def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xoaddr:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64),
+ xoaddr:$src)>;
+ }
+
// Big endian, available on all targets with VSX
let Predicates = [IsBigEndian, HasVSX] in {
def : Pat<(v2f64 (build_vector f64:$A, f64:$B)),
@@ -3871,8 +3962,38 @@ let AddedComplexity = 400 in {
def : Pat<DWToSPExtractConv.BVS,
(v4f32 (VPKUDUM (XXSLDWI (XVCVSXDSP $S2), (XVCVSXDSP $S2), 3),
(XXSLDWI (XVCVSXDSP $S1), (XVCVSXDSP $S1), 3)))>;
+ def : Pat<(store (i32 (extractelt v4i32:$A, 2)), xoaddr:$src),
+ (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+ def : Pat<(store (f32 (extractelt v4f32:$A, 2)), xoaddr:$src),
+ (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+
+ // Elements in a register on a LE system are in order <3, 2, 1, 0>.
+ // The store instructions store the second word from the left.
+ // So to align element 3, we need to modulo-left-shift by 3 words.
+ // Similar logic applies for elements 0 and 1.
+ foreach Idx = [ [0,2], [1,1], [3,3] ] in {
+ def : Pat<(store (i32 (extractelt v4i32:$A, !head(Idx))), xoaddr:$src),
+ (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
+ sub_64), xoaddr:$src)>;
+ def : Pat<(store (f32 (extractelt v4f32:$A, !head(Idx))), xoaddr:$src),
+ (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
+ sub_64), xoaddr:$src)>;
+ }
}
+ let Predicates = [HasP8Vector, IsLittleEndian, NoP9Vector] in {
+ def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xoaddr:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64),
+ xoaddr:$src)>;
+ def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xoaddr:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64),
+ xoaddr:$src)>;
+ def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xoaddr:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+ def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xoaddr:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+ }
+
let Predicates = [IsLittleEndian, HasVSX] in {
// Little endian, available on all targets with VSX
def : Pat<(v2f64 (build_vector f64:$A, f64:$B)),
@@ -3969,17 +4090,17 @@ let AddedComplexity = 400 in {
(v4i32 (XVCVSPUXWS (LXVWSX xoaddr:$A)))>;
def : Pat<(v4i32 (scalar_to_vector DblToIntLoadP9.A)),
(v4i32 (XXSPLTW (COPY_TO_REGCLASS
- (XSCVDPSXWS (DFLOADf64 ixaddr:$A)), VSRC), 1))>;
+ (XSCVDPSXWS (DFLOADf64 iaddrX4:$A)), VSRC), 1))>;
def : Pat<(v4i32 (scalar_to_vector DblToUIntLoadP9.A)),
(v4i32 (XXSPLTW (COPY_TO_REGCLASS
- (XSCVDPUXWS (DFLOADf64 ixaddr:$A)), VSRC), 1))>;
+ (XSCVDPUXWS (DFLOADf64 iaddrX4:$A)), VSRC), 1))>;
def : Pat<(v2i64 (scalar_to_vector FltToLongLoadP9.A)),
(v2i64 (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS
- (DFLOADf32 ixaddr:$A),
+ (DFLOADf32 iaddrX4:$A),
VSFRC)), 0))>;
def : Pat<(v2i64 (scalar_to_vector FltToULongLoadP9.A)),
(v2i64 (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS
- (DFLOADf32 ixaddr:$A),
+ (DFLOADf32 iaddrX4:$A),
VSFRC)), 0))>;
}
diff --git a/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp b/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
index 0b57dd9b618d..4d45d96d4479 100644
--- a/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
+++ b/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
@@ -1,9 +1,8 @@
//===------ PPCLoopPreIncPrep.cpp - Loop Pre-Inc. AM Prep. Pass -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -65,12 +64,6 @@ static cl::opt<unsigned> MaxVars("ppc-preinc-prep-max-vars",
STATISTIC(PHINodeAlreadyExists, "PHI node already in pre-increment form");
-namespace llvm {
-
- void initializePPCLoopPreIncPrepPass(PassRegistry&);
-
-} // end namespace llvm
-
namespace {
class PPCLoopPreIncPrep : public FunctionPass {
@@ -338,7 +331,7 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
// iteration space), insert a new preheader for the loop.
if (!LoopPredecessor ||
!LoopPredecessor->getTerminator()->getType()->isVoidTy()) {
- LoopPredecessor = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA);
+ LoopPredecessor = InsertPreheaderForLoop(L, DT, LI, nullptr, PreserveLCSSA);
if (LoopPredecessor)
MadeChange = true;
}
diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp
index e731c0bc0c23..027e6bd1ba06 100644
--- a/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -1,9 +1,8 @@
//===-- PPCMCInstLower.cpp - Convert PPC MachineInstr to an MCInst --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -111,16 +110,16 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
RefKind = MCSymbolRefExpr::VK_PLT;
const MachineFunction *MF = MO.getParent()->getParent()->getParent();
+ const Module *M = MF->getFunction().getParent();
const PPCSubtarget *Subtarget = &(MF->getSubtarget<PPCSubtarget>());
const TargetMachine &TM = Printer.TM;
const MCExpr *Expr = MCSymbolRefExpr::create(Symbol, RefKind, Ctx);
- // -msecure-plt option works only in PIC mode. If secure plt mode
- // is on add 32768 to symbol.
+ // If -msecure-plt -fPIC, add 32768 to symbol.
if (Subtarget->isSecurePlt() && TM.isPositionIndependent() &&
+ M->getPICLevel() == PICLevel::BigPIC &&
MO.getTargetFlags() == PPCII::MO_PLT)
- Expr = MCBinaryExpr::createAdd(Expr,
- MCConstantExpr::create(32768, Ctx),
- Ctx);
+ Expr =
+ MCBinaryExpr::createAdd(Expr, MCConstantExpr::create(32768, Ctx), Ctx);
if (!MO.isJTI() && MO.getOffset())
Expr = MCBinaryExpr::createAdd(Expr,
diff --git a/lib/Target/PowerPC/PPCMIPeephole.cpp b/lib/Target/PowerPC/PPCMIPeephole.cpp
index 0068df19f0c8..446246358e96 100644
--- a/lib/Target/PowerPC/PPCMIPeephole.cpp
+++ b/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -1,9 +1,8 @@
//===-------------- PPCMIPeephole.cpp - MI Peephole Cleanups -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===---------------------------------------------------------------------===//
//
@@ -22,9 +21,12 @@
#include "PPC.h"
#include "PPCInstrBuilder.h"
#include "PPCInstrInfo.h"
+#include "PPCMachineFunctionInfo.h"
#include "PPCTargetMachine.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachinePostDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -38,6 +40,7 @@ using namespace llvm;
STATISTIC(RemoveTOCSave, "Number of TOC saves removed");
STATISTIC(MultiTOCSaves,
"Number of functions with multiple TOC saves that must be kept");
+STATISTIC(NumTOCSavesInPrologue, "Number of TOC saves placed in the prologue");
STATISTIC(NumEliminatedSExt, "Number of eliminated sign-extensions");
STATISTIC(NumEliminatedZExt, "Number of eliminated zero-extensions");
STATISTIC(NumOptADDLIs, "Number of optimized ADD instruction fed by LI");
@@ -48,6 +51,10 @@ STATISTIC(NumFunctionsEnteredInMIPeephole,
STATISTIC(NumFixedPointIterations,
"Number of fixed-point iterations converting reg-reg instructions "
"to reg-imm ones");
+STATISTIC(NumRotatesCollapsed,
+ "Number of pairs of rotate left, clear left/right collapsed");
+STATISTIC(NumEXTSWAndSLDICombined,
+ "Number of pairs of EXTSW and SLDI combined as EXTSWSLI");
static cl::opt<bool>
FixedPointRegToImm("ppc-reg-to-imm-fixed-point", cl::Hidden, cl::init(true),
@@ -83,6 +90,9 @@ struct PPCMIPeephole : public MachineFunctionPass {
private:
MachineDominatorTree *MDT;
+ MachinePostDominatorTree *MPDT;
+ MachineBlockFrequencyInfo *MBFI;
+ uint64_t EntryFreq;
// Initialize class variables.
void initialize(MachineFunction &MFParm);
@@ -93,6 +103,8 @@ private:
// Perform peepholes.
bool eliminateRedundantCompare(void);
bool eliminateRedundantTOCSaves(std::map<MachineInstr *, bool> &TOCSaves);
+ bool combineSEXTAndSHL(MachineInstr &MI, MachineInstr *&ToErase);
+ bool emitRLDICWhenLoweringJumpTables(MachineInstr &MI);
void UpdateTOCSaves(std::map<MachineInstr *, bool> &TOCSaves,
MachineInstr *MI);
@@ -100,7 +112,11 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<MachinePostDominatorTree>();
+ AU.addRequired<MachineBlockFrequencyInfo>();
AU.addPreserved<MachineDominatorTree>();
+ AU.addPreserved<MachinePostDominatorTree>();
+ AU.addPreserved<MachineBlockFrequencyInfo>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -118,6 +134,9 @@ void PPCMIPeephole::initialize(MachineFunction &MFParm) {
MF = &MFParm;
MRI = &MF->getRegInfo();
MDT = &getAnalysis<MachineDominatorTree>();
+ MPDT = &getAnalysis<MachinePostDominatorTree>();
+ MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
+ EntryFreq = MBFI->getEntryFreq();
TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo();
LLVM_DEBUG(dbgs() << "*** PowerPC MI peephole pass ***\n\n");
LLVM_DEBUG(MF->dump());
@@ -198,6 +217,30 @@ getKnownLeadingZeroCount(MachineInstr *MI, const PPCInstrInfo *TII) {
void PPCMIPeephole::UpdateTOCSaves(
std::map<MachineInstr *, bool> &TOCSaves, MachineInstr *MI) {
assert(TII->isTOCSaveMI(*MI) && "Expecting a TOC save instruction here");
+ assert(MF->getSubtarget<PPCSubtarget>().isELFv2ABI() &&
+ "TOC-save removal only supported on ELFv2");
+ PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
+
+ MachineBasicBlock *Entry = &MF->front();
+ uint64_t CurrBlockFreq = MBFI->getBlockFreq(MI->getParent()).getFrequency();
+
+ // If the block in which the TOC save resides is in a block that
+ // post-dominates Entry, or a block that is hotter than entry (keep in mind
+ // that early MachineLICM has already run so the TOC save won't be hoisted)
+ // we can just do the save in the prologue.
+ if (CurrBlockFreq > EntryFreq || MPDT->dominates(MI->getParent(), Entry))
+ FI->setMustSaveTOC(true);
+
+ // If we are saving the TOC in the prologue, all the TOC saves can be removed
+ // from the code.
+ if (FI->mustSaveTOC()) {
+ for (auto &TOCSave : TOCSaves)
+ TOCSave.second = false;
+ // Add new instruction to map.
+ TOCSaves[MI] = false;
+ return;
+ }
+
bool Keep = true;
for (auto It = TOCSaves.begin(); It != TOCSaves.end(); It++ ) {
MachineInstr *CurrInst = It->first;
@@ -758,6 +801,11 @@ bool PPCMIPeephole::simplifyCode(void) {
NumOptADDLIs++;
break;
}
+ case PPC::RLDICR: {
+ Simplified |= emitRLDICWhenLoweringJumpTables(MI) ||
+ combineSEXTAndSHL(MI, ToErase);
+ break;
+ }
}
}
@@ -771,6 +819,10 @@ bool PPCMIPeephole::simplifyCode(void) {
// Eliminate all the TOC save instructions which are redundant.
Simplified |= eliminateRedundantTOCSaves(TOCSaves);
+ PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
+ if (FI->mustSaveTOC())
+ NumTOCSavesInPrologue++;
+
// We try to eliminate redundant compare instruction.
Simplified |= eliminateRedundantCompare();
@@ -1275,10 +1327,136 @@ bool PPCMIPeephole::eliminateRedundantCompare(void) {
return Simplified;
}
+// We miss the opportunity to emit an RLDIC when lowering jump tables
+// since ISEL sees only a single basic block. When selecting, the clear
+// and shift left will be in different blocks.
+bool PPCMIPeephole::emitRLDICWhenLoweringJumpTables(MachineInstr &MI) {
+ if (MI.getOpcode() != PPC::RLDICR)
+ return false;
+
+ unsigned SrcReg = MI.getOperand(1).getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
+ return false;
+
+ MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
+ if (SrcMI->getOpcode() != PPC::RLDICL)
+ return false;
+
+ MachineOperand MOpSHSrc = SrcMI->getOperand(2);
+ MachineOperand MOpMBSrc = SrcMI->getOperand(3);
+ MachineOperand MOpSHMI = MI.getOperand(2);
+ MachineOperand MOpMEMI = MI.getOperand(3);
+ if (!(MOpSHSrc.isImm() && MOpMBSrc.isImm() && MOpSHMI.isImm() &&
+ MOpMEMI.isImm()))
+ return false;
+
+ uint64_t SHSrc = MOpSHSrc.getImm();
+ uint64_t MBSrc = MOpMBSrc.getImm();
+ uint64_t SHMI = MOpSHMI.getImm();
+ uint64_t MEMI = MOpMEMI.getImm();
+ uint64_t NewSH = SHSrc + SHMI;
+ uint64_t NewMB = MBSrc - SHMI;
+ if (NewMB > 63 || NewSH > 63)
+ return false;
+
+ // The bits cleared with RLDICL are [0, MBSrc).
+ // The bits cleared with RLDICR are (MEMI, 63].
+ // After the sequence, the bits cleared are:
+ // [0, MBSrc-SHMI) and (MEMI, 63).
+ //
+ // The bits cleared with RLDIC are [0, NewMB) and (63-NewSH, 63].
+ if ((63 - NewSH) != MEMI)
+ return false;
+
+ LLVM_DEBUG(dbgs() << "Converting pair: ");
+ LLVM_DEBUG(SrcMI->dump());
+ LLVM_DEBUG(MI.dump());
+
+ MI.setDesc(TII->get(PPC::RLDIC));
+ MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
+ MI.getOperand(2).setImm(NewSH);
+ MI.getOperand(3).setImm(NewMB);
+
+ LLVM_DEBUG(dbgs() << "To: ");
+ LLVM_DEBUG(MI.dump());
+ NumRotatesCollapsed++;
+ return true;
+}
+
+// For case in LLVM IR
+// entry:
+// %iconv = sext i32 %index to i64
+// br i1 undef label %true, label %false
+// true:
+// %ptr = getelementptr inbounds i32, i32* null, i64 %iconv
+// ...
+// PPCISelLowering::combineSHL fails to combine, because sext and shl are in
+// different BBs when conducting instruction selection. We can do a peephole
+// optimization to combine these two instructions into extswsli after
+// instruction selection.
+bool PPCMIPeephole::combineSEXTAndSHL(MachineInstr &MI,
+ MachineInstr *&ToErase) {
+ if (MI.getOpcode() != PPC::RLDICR)
+ return false;
+
+ if (!MF->getSubtarget<PPCSubtarget>().isISA3_0())
+ return false;
+
+ assert(MI.getNumOperands() == 4 && "RLDICR should have 4 operands");
+
+ MachineOperand MOpSHMI = MI.getOperand(2);
+ MachineOperand MOpMEMI = MI.getOperand(3);
+ if (!(MOpSHMI.isImm() && MOpMEMI.isImm()))
+ return false;
+
+ uint64_t SHMI = MOpSHMI.getImm();
+ uint64_t MEMI = MOpMEMI.getImm();
+ if (SHMI + MEMI != 63)
+ return false;
+
+ unsigned SrcReg = MI.getOperand(1).getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
+ return false;
+
+ MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
+ if (SrcMI->getOpcode() != PPC::EXTSW &&
+ SrcMI->getOpcode() != PPC::EXTSW_32_64)
+ return false;
+
+ // If the register defined by extsw has more than one use, combination is not
+ // needed.
+ if (!MRI->hasOneNonDBGUse(SrcReg))
+ return false;
+
+ LLVM_DEBUG(dbgs() << "Combining pair: ");
+ LLVM_DEBUG(SrcMI->dump());
+ LLVM_DEBUG(MI.dump());
+
+ MachineInstr *NewInstr =
+ BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(),
+ SrcMI->getOpcode() == PPC::EXTSW ? TII->get(PPC::EXTSWSLI)
+ : TII->get(PPC::EXTSWSLI_32_64),
+ MI.getOperand(0).getReg())
+ .add(SrcMI->getOperand(1))
+ .add(MOpSHMI);
+ (void)NewInstr;
+
+ LLVM_DEBUG(dbgs() << "TO: ");
+ LLVM_DEBUG(NewInstr->dump());
+ ++NumEXTSWAndSLDICombined;
+ ToErase = &MI;
+ // SrcMI, which is extsw, is of no use now, erase it.
+ SrcMI->eraseFromParent();
+ return true;
+}
+
} // end default namespace
INITIALIZE_PASS_BEGIN(PPCMIPeephole, DEBUG_TYPE,
"PowerPC MI Peephole Optimization", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
INITIALIZE_PASS_END(PPCMIPeephole, DEBUG_TYPE,
"PowerPC MI Peephole Optimization", false, false)
diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp b/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
index 3923417257e8..2f65d6a2855b 100644
--- a/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
+++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
@@ -1,9 +1,8 @@
//===-- PPCMachineFunctionInfo.cpp - Private data used for PowerPC --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/lib/Target/PowerPC/PPCMachineFunctionInfo.h
index 8a3f50aa9565..dfae19804d94 100644
--- a/lib/Target/PowerPC/PPCMachineFunctionInfo.h
+++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.h
@@ -1,9 +1,8 @@
//===-- PPCMachineFunctionInfo.h - Private data used for PowerPC --*- C++ -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -45,6 +44,12 @@ class PPCFunctionInfo : public MachineFunctionInfo {
/// PEI.
bool MustSaveLR;
+ /// MustSaveTOC - Indicates that the TOC save needs to be performed in the
+ /// prologue of the function. This is typically the case when there are
+ /// indirect calls in the function and it is more profitable to save the
+ /// TOC pointer in the prologue than in the block(s) containing the call(s).
+ bool MustSaveTOC = false;
+
/// Do we have to disable shrink-wrapping? This has to be set if we emit any
/// instructions that clobber LR in the entry block because discovering this
/// in PEI is too late (happens after shrink-wrapping);
@@ -152,6 +157,9 @@ public:
void setMustSaveLR(bool U) { MustSaveLR = U; }
bool mustSaveLR() const { return MustSaveLR; }
+ void setMustSaveTOC(bool U) { MustSaveTOC = U; }
+ bool mustSaveTOC() const { return MustSaveTOC; }
+
/// We certainly don't want to shrink wrap functions if we've emitted a
/// MovePCtoLR8 as that has to go into the entry, so the prologue definitely
/// has to go into the entry block.
diff --git a/lib/Target/PowerPC/PPCMachineScheduler.cpp b/lib/Target/PowerPC/PPCMachineScheduler.cpp
new file mode 100644
index 000000000000..a38c8f475066
--- /dev/null
+++ b/lib/Target/PowerPC/PPCMachineScheduler.cpp
@@ -0,0 +1,83 @@
+//===- PPCMachineScheduler.cpp - MI Scheduler for PowerPC -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCMachineScheduler.h"
+#include "MCTargetDesc/PPCMCTargetDesc.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+DisableAddiLoadHeuristic("disable-ppc-sched-addi-load",
+ cl::desc("Disable scheduling addi instruction before"
+ "load for ppc"), cl::Hidden);
+
+bool PPCPreRASchedStrategy::biasAddiLoadCandidate(SchedCandidate &Cand,
+ SchedCandidate &TryCand,
+ SchedBoundary &Zone) const {
+ if (DisableAddiLoadHeuristic)
+ return false;
+
+ auto isADDIInstr = [&] (const MachineInstr &Inst) {
+ return Inst.getOpcode() == PPC::ADDI || Inst.getOpcode() == PPC::ADDI8;
+ };
+
+ SchedCandidate &FirstCand = Zone.isTop() ? TryCand : Cand;
+ SchedCandidate &SecondCand = Zone.isTop() ? Cand : TryCand;
+ if (isADDIInstr(*FirstCand.SU->getInstr()) &&
+ SecondCand.SU->getInstr()->mayLoad()) {
+ TryCand.Reason = Stall;
+ return true;
+ }
+ if (FirstCand.SU->getInstr()->mayLoad() &&
+ isADDIInstr(*SecondCand.SU->getInstr())) {
+ TryCand.Reason = NoCand;
+ return true;
+ }
+
+ return false;
+}
+
+void PPCPreRASchedStrategy::tryCandidate(SchedCandidate &Cand,
+ SchedCandidate &TryCand,
+ SchedBoundary *Zone) const {
+ GenericScheduler::tryCandidate(Cand, TryCand, Zone);
+
+ if (!Cand.isValid() || !Zone)
+ return;
+
+ // Add powerpc specific heuristic only when TryCand isn't selected or
+ // selected as node order.
+ if (TryCand.Reason != NodeOrder && TryCand.Reason != NoCand)
+ return;
+
+ // There are some benefits to schedule the ADDI before the load to hide the
+ // latency, as RA may create a true dependency between the load and addi.
+ if (biasAddiLoadCandidate(Cand, TryCand, *Zone))
+ return;
+}
+
+void PPCPostRASchedStrategy::enterMBB(MachineBasicBlock *MBB) {
+ // Custom PPC PostRA specific behavior here.
+ PostGenericScheduler::enterMBB(MBB);
+}
+
+void PPCPostRASchedStrategy::leaveMBB() {
+ // Custom PPC PostRA specific behavior here.
+ PostGenericScheduler::leaveMBB();
+}
+
+void PPCPostRASchedStrategy::initialize(ScheduleDAGMI *Dag) {
+ // Custom PPC PostRA specific initialization here.
+ PostGenericScheduler::initialize(Dag);
+}
+
+SUnit *PPCPostRASchedStrategy::pickNode(bool &IsTopNode) {
+ // Custom PPC PostRA specific scheduling here.
+ return PostGenericScheduler::pickNode(IsTopNode);
+}
+
diff --git a/lib/Target/PowerPC/PPCMachineScheduler.h b/lib/Target/PowerPC/PPCMachineScheduler.h
new file mode 100644
index 000000000000..93532d9545a6
--- /dev/null
+++ b/lib/Target/PowerPC/PPCMachineScheduler.h
@@ -0,0 +1,49 @@
+//===- PPCMachineScheduler.h - Custom PowerPC MI scheduler --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Custom PowerPC MI scheduler.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_POWERPC_POWERPCMACHINESCHEDULER_H
+#define LLVM_LIB_TARGET_POWERPC_POWERPCMACHINESCHEDULER_H
+
+#include "llvm/CodeGen/MachineScheduler.h"
+
+namespace llvm {
+
+/// A MachineSchedStrategy implementation for PowerPC pre RA scheduling.
+class PPCPreRASchedStrategy : public GenericScheduler {
+public:
+ PPCPreRASchedStrategy(const MachineSchedContext *C) :
+ GenericScheduler(C) {}
+protected:
+ void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand,
+ SchedBoundary *Zone) const override;
+private:
+ bool biasAddiLoadCandidate(SchedCandidate &Cand,
+ SchedCandidate &TryCand,
+ SchedBoundary &Zone) const;
+};
+
+/// A MachineSchedStrategy implementation for PowerPC post RA scheduling.
+class PPCPostRASchedStrategy : public PostGenericScheduler {
+public:
+ PPCPostRASchedStrategy(const MachineSchedContext *C) :
+ PostGenericScheduler(C) {}
+
+protected:
+ void initialize(ScheduleDAGMI *Dag) override;
+ SUnit *pickNode(bool &IsTopNode) override;
+ void enterMBB(MachineBasicBlock *MBB) override;
+ void leaveMBB() override;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_POWERPC_POWERPCMACHINESCHEDULER_H
diff --git a/lib/Target/PowerPC/PPCPerfectShuffle.h b/lib/Target/PowerPC/PPCPerfectShuffle.h
index 8a1d68011c5f..d0d84efdbd20 100644
--- a/lib/Target/PowerPC/PPCPerfectShuffle.h
+++ b/lib/Target/PowerPC/PPCPerfectShuffle.h
@@ -1,9 +1,8 @@
//===-- PPCPerfectShuffle.h - Altivec Perfect Shuffle Table -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/PowerPC/PPCPfmCounters.td b/lib/Target/PowerPC/PPCPfmCounters.td
index d2a09f30c0f3..20b9efdc9df9 100644
--- a/lib/Target/PowerPC/PPCPfmCounters.td
+++ b/lib/Target/PowerPC/PPCPfmCounters.td
@@ -1,9 +1,8 @@
//===-- PPCPfmCounters.td - PPC Hardware Counters ----------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/PowerPC/PPCPreEmitPeephole.cpp b/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
index 4458b92ceb5e..d83c92276800 100644
--- a/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
+++ b/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
@@ -1,9 +1,8 @@
//===--------- PPCPreEmitPeephole.cpp - Late peephole optimizations -------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/PowerPC/PPCQPXLoadSplat.cpp b/lib/Target/PowerPC/PPCQPXLoadSplat.cpp
index 25b2b54cbe98..3a83cc27439c 100644
--- a/lib/Target/PowerPC/PPCQPXLoadSplat.cpp
+++ b/lib/Target/PowerPC/PPCQPXLoadSplat.cpp
@@ -1,9 +1,8 @@
//===----- PPCQPXLoadSplat.cpp - QPX Load Splat Simplification ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -31,10 +30,6 @@ using namespace llvm;
STATISTIC(NumSimplified, "Number of QPX load splats simplified");
-namespace llvm {
- void initializePPCQPXLoadSplatPass(PassRegistry&);
-}
-
namespace {
struct PPCQPXLoadSplat : public MachineFunctionPass {
static char ID;
diff --git a/lib/Target/PowerPC/PPCReduceCRLogicals.cpp b/lib/Target/PowerPC/PPCReduceCRLogicals.cpp
index 173fc18b9ebf..8eaa6dfe2bf7 100644
--- a/lib/Target/PowerPC/PPCReduceCRLogicals.cpp
+++ b/lib/Target/PowerPC/PPCReduceCRLogicals.cpp
@@ -1,9 +1,8 @@
//===---- PPCReduceCRLogicals.cpp - Reduce CR Bit Logical operations ------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===---------------------------------------------------------------------===//
//
@@ -49,10 +48,6 @@ STATISTIC(NumNotSplitChainCopies,
STATISTIC(NumNotSplitWrongOpcode,
"Number of blocks not split due to the wrong opcode.");
-namespace llvm {
- void initializePPCReduceCRLogicalsPass(PassRegistry&);
-}
-
/// Given a basic block \p Successor that potentially contains PHIs, this
/// function will look for any incoming values in the PHIs that are supposed to
/// be coming from \p OrigMBB but whose definition is actually in \p NewMBB.
@@ -171,9 +166,33 @@ static bool splitMBB(BlockSplitInfo &BSI) {
: *ThisMBB->succ_begin();
MachineBasicBlock *NewBRTarget =
BSI.BranchToFallThrough ? OrigFallThrough : OrigTarget;
- BranchProbability ProbToNewTarget =
- !BSI.MBPI ? BranchProbability::getUnknown()
- : BSI.MBPI->getEdgeProbability(ThisMBB, NewBRTarget);
+
+ // It's impossible to know the precise branch probability after the split.
+ // But it still needs to be reasonable, the whole probability to original
+ // targets should not be changed.
+ // After split NewBRTarget will get two incoming edges. Assume P0 is the
+ // original branch probability to NewBRTarget, P1 and P2 are new branch
+ // probabilies to NewBRTarget after split. If the two edge frequencies are
+ // same, then
+ // F * P1 = F * P0 / 2 ==> P1 = P0 / 2
+ // F * (1 - P1) * P2 = F * P1 ==> P2 = P1 / (1 - P1)
+ BranchProbability ProbToNewTarget, ProbFallThrough; // Prob for new Br.
+ BranchProbability ProbOrigTarget, ProbOrigFallThrough; // Prob for orig Br.
+ ProbToNewTarget = ProbFallThrough = BranchProbability::getUnknown();
+ ProbOrigTarget = ProbOrigFallThrough = BranchProbability::getUnknown();
+ if (BSI.MBPI) {
+ if (BSI.BranchToFallThrough) {
+ ProbToNewTarget = BSI.MBPI->getEdgeProbability(ThisMBB, OrigFallThrough) / 2;
+ ProbFallThrough = ProbToNewTarget.getCompl();
+ ProbOrigFallThrough = ProbToNewTarget / ProbToNewTarget.getCompl();
+ ProbOrigTarget = ProbOrigFallThrough.getCompl();
+ } else {
+ ProbToNewTarget = BSI.MBPI->getEdgeProbability(ThisMBB, OrigTarget) / 2;
+ ProbFallThrough = ProbToNewTarget.getCompl();
+ ProbOrigTarget = ProbToNewTarget / ProbToNewTarget.getCompl();
+ ProbOrigFallThrough = ProbOrigTarget.getCompl();
+ }
+ }
// Create a new basic block.
MachineBasicBlock::iterator InsertPoint = BSI.SplitBefore;
@@ -185,11 +204,16 @@ static bool splitMBB(BlockSplitInfo &BSI) {
// Move everything after SplitBefore into the new block.
NewMBB->splice(NewMBB->end(), ThisMBB, InsertPoint, ThisMBB->end());
NewMBB->transferSuccessors(ThisMBB);
+ if (!ProbOrigTarget.isUnknown()) {
+ auto MBBI = std::find(NewMBB->succ_begin(), NewMBB->succ_end(), OrigTarget);
+ NewMBB->setSuccProbability(MBBI, ProbOrigTarget);
+ MBBI = std::find(NewMBB->succ_begin(), NewMBB->succ_end(), OrigFallThrough);
+ NewMBB->setSuccProbability(MBBI, ProbOrigFallThrough);
+ }
- // Add the two successors to ThisMBB. The probabilities come from the
- // existing blocks if available.
+ // Add the two successors to ThisMBB.
ThisMBB->addSuccessor(NewBRTarget, ProbToNewTarget);
- ThisMBB->addSuccessor(NewMBB, ProbToNewTarget.getCompl());
+ ThisMBB->addSuccessor(NewMBB, ProbFallThrough);
// Add the branches to ThisMBB.
BuildMI(*ThisMBB, ThisMBB->end(), BSI.SplitBefore->getDebugLoc(),
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 3d067aa8e621..12554ea8d079 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -1,9 +1,8 @@
//===-- PPCRegisterInfo.cpp - PowerPC Register Information ----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -13,7 +12,6 @@
//===----------------------------------------------------------------------===//
#include "PPCRegisterInfo.h"
-#include "PPC.h"
#include "PPCFrameLowering.h"
#include "PPCInstrBuilder.h"
#include "PPCMachineFunctionInfo.h"
@@ -71,6 +69,14 @@ StackPtrConst("ppc-stack-ptr-caller-preserved",
"caller preserved registers can be LICM candidates"),
cl::init(true), cl::Hidden);
+static cl::opt<unsigned>
+MaxCRBitSpillDist("ppc-max-crbit-spill-dist",
+ cl::desc("Maximum search distance for definition of CR bit "
+ "spill on ppc"),
+ cl::Hidden, cl::init(100));
+
+static unsigned offsetMinAlignForOpcode(unsigned OpC);
+
PPCRegisterInfo::PPCRegisterInfo(const PPCTargetMachine &TM)
: PPCGenRegisterInfo(TM.isPPC64() ? PPC::LR8 : PPC::LR,
TM.isPPC64() ? 0 : 1,
@@ -153,30 +159,39 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
if (TM.isPPC64() && MF->getInfo<PPCFunctionInfo>()->isSplitCSR())
return CSR_SRV464_TLS_PE_SaveList;
- if (Subtarget.hasSPE())
- return CSR_SVR432_SPE_SaveList;
-
// On PPC64, we might need to save r2 (but only if it is not reserved).
bool SaveR2 = MF->getRegInfo().isAllocatable(PPC::X2);
+ // Cold calling convention CSRs.
if (MF->getFunction().getCallingConv() == CallingConv::Cold) {
- return TM.isPPC64()
- ? (Subtarget.hasAltivec()
- ? (SaveR2 ? CSR_SVR64_ColdCC_R2_Altivec_SaveList
- : CSR_SVR64_ColdCC_Altivec_SaveList)
- : (SaveR2 ? CSR_SVR64_ColdCC_R2_SaveList
- : CSR_SVR64_ColdCC_SaveList))
- : (Subtarget.hasAltivec() ? CSR_SVR32_ColdCC_Altivec_SaveList
- : CSR_SVR32_ColdCC_SaveList);
+ if (TM.isPPC64()) {
+ if (Subtarget.hasAltivec())
+ return SaveR2 ? CSR_SVR64_ColdCC_R2_Altivec_SaveList
+ : CSR_SVR64_ColdCC_Altivec_SaveList;
+ return SaveR2 ? CSR_SVR64_ColdCC_R2_SaveList
+ : CSR_SVR64_ColdCC_SaveList;
+ }
+ // 32-bit targets.
+ if (Subtarget.hasAltivec())
+ return CSR_SVR32_ColdCC_Altivec_SaveList;
+ else if (Subtarget.hasSPE())
+ return CSR_SVR32_ColdCC_SPE_SaveList;
+ return CSR_SVR32_ColdCC_SaveList;
}
-
- return TM.isPPC64()
- ? (Subtarget.hasAltivec()
- ? (SaveR2 ? CSR_SVR464_R2_Altivec_SaveList
- : CSR_SVR464_Altivec_SaveList)
- : (SaveR2 ? CSR_SVR464_R2_SaveList : CSR_SVR464_SaveList))
- : (Subtarget.hasAltivec() ? CSR_SVR432_Altivec_SaveList
- : CSR_SVR432_SaveList);
+ // Standard calling convention CSRs.
+ if (TM.isPPC64()) {
+ if (Subtarget.hasAltivec())
+ return SaveR2 ? CSR_SVR464_R2_Altivec_SaveList
+ : CSR_SVR464_Altivec_SaveList;
+ return SaveR2 ? CSR_SVR464_R2_SaveList
+ : CSR_SVR464_SaveList;
+ }
+ // 32-bit targets.
+ if (Subtarget.hasAltivec())
+ return CSR_SVR432_Altivec_SaveList;
+ else if (Subtarget.hasSPE())
+ return CSR_SVR432_SPE_SaveList;
+ return CSR_SVR432_SaveList;
}
const MCPhysReg *
@@ -221,18 +236,26 @@ PPCRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
: CSR_Darwin64_RegMask)
: (Subtarget.hasAltivec() ? CSR_Darwin32_Altivec_RegMask
: CSR_Darwin32_RegMask);
+ if (Subtarget.isAIXABI()) {
+ assert(!Subtarget.hasAltivec() && "Altivec is not implemented on AIX yet.");
+ return TM.isPPC64() ? CSR_AIX64_RegMask : CSR_AIX32_RegMask;
+ }
if (CC == CallingConv::Cold) {
return TM.isPPC64() ? (Subtarget.hasAltivec() ? CSR_SVR64_ColdCC_Altivec_RegMask
: CSR_SVR64_ColdCC_RegMask)
: (Subtarget.hasAltivec() ? CSR_SVR32_ColdCC_Altivec_RegMask
- : CSR_SVR32_ColdCC_RegMask);
+ : (Subtarget.hasSPE()
+ ? CSR_SVR32_ColdCC_SPE_RegMask
+ : CSR_SVR32_ColdCC_RegMask));
}
return TM.isPPC64() ? (Subtarget.hasAltivec() ? CSR_SVR464_Altivec_RegMask
: CSR_SVR464_RegMask)
: (Subtarget.hasAltivec() ? CSR_SVR432_Altivec_RegMask
- : CSR_SVR432_RegMask);
+ : (Subtarget.hasSPE()
+ ? CSR_SVR432_SPE_RegMask
+ : CSR_SVR432_RegMask));
}
const uint32_t*
@@ -288,6 +311,11 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
markSuperRegs(Reserved, PPC::R13); // Small Data Area pointer register
}
+ // Always reserve r2 on AIX for now.
+ // TODO: Make r2 allocatable on AIX/XCOFF for some leaf functions.
+ if (Subtarget.isAIXABI())
+ markSuperRegs(Reserved, PPC::R2); // System-reserved register
+
// On PPC64, r13 is the thread pointer. Never allocate this register.
if (TM.isPPC64())
markSuperRegs(Reserved, PPC::R13);
@@ -316,6 +344,51 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
return Reserved;
}
+bool PPCRegisterInfo::requiresFrameIndexScavenging(const MachineFunction &MF) const {
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
+ const PPCInstrInfo *InstrInfo = Subtarget.getInstrInfo();
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo();
+
+ // If the callee saved info is invalid we have to default to true for safety.
+ if (!MFI.isCalleeSavedInfoValid())
+ return true;
+
+ // We will require the use of X-Forms because the frame is larger than what
+ // can be represented in signed 16 bits that fit in the immediate of a D-Form.
+ // If we need an X-Form then we need a register to store the address offset.
+ unsigned FrameSize = MFI.getStackSize();
+ // Signed 16 bits means that the FrameSize cannot be more than 15 bits.
+ if (FrameSize & ~0x7FFF)
+ return true;
+
+ // The callee saved info is valid so it can be traversed.
+ // Checking for registers that need saving that do not have load or store
+ // forms where the address offset is an immediate.
+ for (unsigned i = 0; i < Info.size(); i++) {
+ int FrIdx = Info[i].getFrameIdx();
+ unsigned Reg = Info[i].getReg();
+
+ unsigned Opcode = InstrInfo->getStoreOpcodeForSpill(Reg);
+ if (!MFI.isFixedObjectIndex(FrIdx)) {
+ // This is not a fixed object. If it requires alignment then we may still
+ // need to use the XForm.
+ if (offsetMinAlignForOpcode(Opcode) > 1)
+ return true;
+ }
+
+ // This is eiher:
+ // 1) A fixed frame index object which we know are aligned so
+ // as long as we have a valid DForm/DSForm/DQForm (non XForm) we don't
+ // need to consider the alignement here.
+ // 2) A not fixed object but in that case we now know that the min required
+ // alignment is no more than 1 based on the previous check.
+ if (InstrInfo->isXFormMemOp(Opcode))
+ return true;
+ }
+ return false;
+}
+
bool PPCRegisterInfo::isCallerPreservedPhysReg(unsigned PhysReg,
const MachineFunction &MF) const {
assert(TargetRegisterInfo::isPhysicalRegister(PhysReg));
@@ -664,6 +737,7 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II,
MachineFunction &MF = *MBB.getParent();
const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
+ const TargetRegisterInfo* TRI = Subtarget.getRegisterInfo();
DebugLoc dl = MI.getDebugLoc();
bool LP64 = TM.isPPC64();
@@ -673,27 +747,59 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II,
unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
unsigned SrcReg = MI.getOperand(0).getReg();
- // We need to move the CR field that contains the CR bit we are spilling.
- // The super register may not be explicitly defined (i.e. it can be defined
- // by a CR-logical that only defines the subreg) so we state that the CR
- // field is undef. Also, in order to preserve the kill flag on the CR bit,
- // we add it as an implicit use.
- BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MFOCRF8 : PPC::MFOCRF), Reg)
+ // Search up the BB to find the definition of the CR bit.
+ MachineBasicBlock::reverse_iterator Ins;
+ unsigned CRBitSpillDistance = 0;
+ for (Ins = MI; Ins != MBB.rend(); Ins++) {
+ // Definition found.
+ if (Ins->modifiesRegister(SrcReg, TRI))
+ break;
+ // Unable to find CR bit definition within maximum search distance.
+ if (CRBitSpillDistance == MaxCRBitSpillDist) {
+ Ins = MI;
+ break;
+ }
+ // Skip debug instructions when counting CR bit spill distance.
+ if (!Ins->isDebugInstr())
+ CRBitSpillDistance++;
+ }
+
+ // Unable to find the definition of the CR bit in the MBB.
+ if (Ins == MBB.rend())
+ Ins = MI;
+
+ // There is no need to extract the CR bit if its value is already known.
+ switch (Ins->getOpcode()) {
+ case PPC::CRUNSET:
+ BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::LI8 : PPC::LI), Reg)
+ .addImm(0);
+ break;
+ case PPC::CRSET:
+ BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::LIS8 : PPC::LIS), Reg)
+ .addImm(-32768);
+ break;
+ default:
+ // We need to move the CR field that contains the CR bit we are spilling.
+ // The super register may not be explicitly defined (i.e. it can be defined
+ // by a CR-logical that only defines the subreg) so we state that the CR
+ // field is undef. Also, in order to preserve the kill flag on the CR bit,
+ // we add it as an implicit use.
+ BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MFOCRF8 : PPC::MFOCRF), Reg)
.addReg(getCRFromCRBit(SrcReg), RegState::Undef)
.addReg(SrcReg,
RegState::Implicit | getKillRegState(MI.getOperand(0).isKill()));
- // If the saved register wasn't CR0LT, shift the bits left so that the bit to
- // store is the first one. Mask all but that bit.
- unsigned Reg1 = Reg;
- Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
-
- // rlwinm rA, rA, ShiftBits, 0, 0.
- BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::RLWINM8 : PPC::RLWINM), Reg)
- .addReg(Reg1, RegState::Kill)
- .addImm(getEncodingValue(SrcReg))
- .addImm(0).addImm(0);
+ // If the saved register wasn't CR0LT, shift the bits left so that the bit
+ // to store is the first one. Mask all but that bit.
+ unsigned Reg1 = Reg;
+ Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
+ // rlwinm rA, rA, ShiftBits, 0, 0.
+ BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::RLWINM8 : PPC::RLWINM), Reg)
+ .addReg(Reg1, RegState::Kill)
+ .addImm(getEncodingValue(SrcReg))
+ .addImm(0).addImm(0);
+ }
addFrameReference(BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::STW8 : PPC::STW))
.addReg(Reg, RegState::Kill),
FrameIndex);
@@ -826,9 +932,7 @@ bool PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
}
// If the offset must be a multiple of some value, return what that value is.
-static unsigned offsetMinAlign(const MachineInstr &MI) {
- unsigned OpC = MI.getOpcode();
-
+static unsigned offsetMinAlignForOpcode(unsigned OpC) {
switch (OpC) {
default:
return 1;
@@ -847,12 +951,21 @@ static unsigned offsetMinAlign(const MachineInstr &MI) {
case PPC::STXSD:
case PPC::STXSSP:
return 4;
+ case PPC::EVLDD:
+ case PPC::EVSTDD:
+ return 8;
case PPC::LXV:
case PPC::STXV:
return 16;
}
}
+// If the offset must be a multiple of some value, return what that value is.
+static unsigned offsetMinAlign(const MachineInstr &MI) {
+ unsigned OpC = MI.getOpcode();
+ return offsetMinAlignForOpcode(OpC);
+}
+
// Return the OffsetOperandNo given the FIOperandNum (and the instruction).
static unsigned getOffsetONFromFION(const MachineInstr &MI,
unsigned FIOperandNum) {
@@ -963,7 +1076,10 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// happen in invalid code.
assert(OpC != PPC::DBG_VALUE &&
"This should be handled in a target-independent way");
- if (!noImmForm && ((isInt<16>(Offset) &&
+ bool OffsetFitsMnemonic = (OpC == PPC::EVSTDD || OpC == PPC::EVLDD) ?
+ isUInt<8>(Offset) :
+ isInt<16>(Offset);
+ if (!noImmForm && ((OffsetFitsMnemonic &&
((Offset % offsetMinAlign(MI)) == 0)) ||
OpC == TargetOpcode::STACKMAP ||
OpC == TargetOpcode::PATCHPOINT)) {
@@ -1001,7 +1117,8 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
if (noImmForm)
OperandBase = 1;
- else if (OpC != TargetOpcode::INLINEASM) {
+ else if (OpC != TargetOpcode::INLINEASM &&
+ OpC != TargetOpcode::INLINEASM_BR) {
assert(ImmToIdxMap.count(OpC) &&
"No indexed form of load or store available!");
unsigned NewOpcode = ImmToIdxMap.find(OpC)->second;
@@ -1016,7 +1133,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MI.getOperand(OperandBase + 1).ChangeToRegister(SReg, false, false, true);
}
-unsigned PPCRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+Register PPCRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
const PPCFrameLowering *TFI = getFrameLowering(MF);
if (!TM.isPPC64())
@@ -1025,7 +1142,7 @@ unsigned PPCRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
return TFI->hasFP(MF) ? PPC::X31 : PPC::X1;
}
-unsigned PPCRegisterInfo::getBaseRegister(const MachineFunction &MF) const {
+Register PPCRegisterInfo::getBaseRegister(const MachineFunction &MF) const {
const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
if (!hasBasePointer(MF))
return getFrameRegister(MF);
@@ -1080,7 +1197,7 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
MachineBasicBlock &MBB = *MI->getParent();
MachineFunction &MF = *MBB.getParent();
const PPCFrameLowering *TFI = getFrameLowering(MF);
- unsigned StackEst = TFI->determineFrameLayout(MF, false, true);
+ unsigned StackEst = TFI->determineFrameLayout(MF, true);
// If we likely don't need a stack frame, then we probably don't need a
// virtual base register either.
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h
index e93fe4ce3453..a50e05920cd4 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -1,9 +1,8 @@
//===-- PPCRegisterInfo.h - PowerPC Register Information Impl ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -15,13 +14,14 @@
#ifndef LLVM_LIB_TARGET_POWERPC_PPCREGISTERINFO_H
#define LLVM_LIB_TARGET_POWERPC_PPCREGISTERINFO_H
-#include "PPC.h"
+#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "llvm/ADT/DenseMap.h"
#define GET_REGINFO_HEADER
#include "PPCGenRegisterInfo.inc"
namespace llvm {
+class PPCTargetMachine;
inline static unsigned getCRFromCRBit(unsigned SrcReg) {
unsigned Reg = 0;
@@ -90,9 +90,7 @@ public:
return true;
}
- bool requiresFrameIndexScavenging(const MachineFunction &MF) const override {
- return true;
- }
+ bool requiresFrameIndexScavenging(const MachineFunction &MF) const override;
bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override {
return true;
@@ -134,10 +132,10 @@ public:
int64_t Offset) const override;
// Debug information queries.
- unsigned getFrameRegister(const MachineFunction &MF) const override;
+ Register getFrameRegister(const MachineFunction &MF) const override;
// Base pointer (stack realignment) support.
- unsigned getBaseRegister(const MachineFunction &MF) const;
+ Register getBaseRegister(const MachineFunction &MF) const;
bool hasBasePointer(const MachineFunction &MF) const;
/// stripRegisterPrefix - This method strips the character prefix from a
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td
index d0d29b6d2c7d..af0dff6347a6 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -1,9 +1,8 @@
//===-- PPCRegisterInfo.td - The PowerPC Register File -----*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -375,8 +374,6 @@ def CRBITRC : RegisterClass<"PPC", [i1], 32,
def CRRC : RegisterClass<"PPC", [i32], 32, (add CR0, CR1, CR5, CR6,
CR7, CR2, CR3, CR4)>;
-def CRRC0 : RegisterClass<"PPC", [i32], 32, (add CR0)>;
-
// The CTR registers are not allocatable because they're used by the
// decrement-and-branch instructions, and thus need to stay live across
// multiple basic blocks.
diff --git a/lib/Target/PowerPC/PPCSchedule.td b/lib/Target/PowerPC/PPCSchedule.td
index c8fe7d7eea78..4fa29d96ca14 100644
--- a/lib/Target/PowerPC/PPCSchedule.td
+++ b/lib/Target/PowerPC/PPCSchedule.td
@@ -1,9 +1,8 @@
//===-- PPCSchedule.td - PowerPC Scheduling Definitions ----*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -106,6 +105,7 @@ def IIC_VecVSL : InstrItinClass;
def IIC_VecVSR : InstrItinClass;
def IIC_SprMTMSRD : InstrItinClass;
def IIC_SprSLIE : InstrItinClass;
+def IIC_SprSLBFEE : InstrItinClass;
def IIC_SprSLBIE : InstrItinClass;
def IIC_SprSLBIEG : InstrItinClass;
def IIC_SprSLBMTE : InstrItinClass;
diff --git a/lib/Target/PowerPC/PPCSchedule440.td b/lib/Target/PowerPC/PPCSchedule440.td
index 646822eedbe0..708261fc7cc8 100644
--- a/lib/Target/PowerPC/PPCSchedule440.td
+++ b/lib/Target/PowerPC/PPCSchedule440.td
@@ -1,9 +1,8 @@
//===-- PPCSchedule440.td - PPC 440 Scheduling Definitions -*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/PowerPC/PPCScheduleA2.td b/lib/Target/PowerPC/PPCScheduleA2.td
index f34c1accc0fd..c2b298524e00 100644
--- a/lib/Target/PowerPC/PPCScheduleA2.td
+++ b/lib/Target/PowerPC/PPCScheduleA2.td
@@ -1,9 +1,8 @@
//===- PPCScheduleA2.td - PPC A2 Scheduling Definitions --*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/PowerPC/PPCScheduleE500.td b/lib/Target/PowerPC/PPCScheduleE500.td
index 479a970b2537..74744dda54f7 100644
--- a/lib/Target/PowerPC/PPCScheduleE500.td
+++ b/lib/Target/PowerPC/PPCScheduleE500.td
@@ -1,9 +1,8 @@
//===-- PPCScheduleE500.td - e500 Scheduling Defs ------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/PowerPC/PPCScheduleE500mc.td b/lib/Target/PowerPC/PPCScheduleE500mc.td
index d8bda073833f..1a1c041565b6 100644
--- a/lib/Target/PowerPC/PPCScheduleE500mc.td
+++ b/lib/Target/PowerPC/PPCScheduleE500mc.td
@@ -1,9 +1,8 @@
//===-- PPCScheduleE500mc.td - e500mc Scheduling Defs ------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/PowerPC/PPCScheduleE5500.td b/lib/Target/PowerPC/PPCScheduleE5500.td
index 3e50803955c4..4480d7fba4fb 100644
--- a/lib/Target/PowerPC/PPCScheduleE5500.td
+++ b/lib/Target/PowerPC/PPCScheduleE5500.td
@@ -1,9 +1,8 @@
//===-- PPCScheduleE500mc.td - e5500 Scheduling Defs -------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/PowerPC/PPCScheduleG3.td b/lib/Target/PowerPC/PPCScheduleG3.td
index 0995b7200d93..8f1907f2c016 100644
--- a/lib/Target/PowerPC/PPCScheduleG3.td
+++ b/lib/Target/PowerPC/PPCScheduleG3.td
@@ -1,9 +1,8 @@
//===-- PPCScheduleG3.td - PPC G3 Scheduling Definitions ---*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/PowerPC/PPCScheduleG4.td b/lib/Target/PowerPC/PPCScheduleG4.td
index 1b15c7b3c7ad..0eabc49d7841 100644
--- a/lib/Target/PowerPC/PPCScheduleG4.td
+++ b/lib/Target/PowerPC/PPCScheduleG4.td
@@ -1,9 +1,8 @@
//===-- PPCScheduleG4.td - PPC G4 Scheduling Definitions ---*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/PowerPC/PPCScheduleG4Plus.td b/lib/Target/PowerPC/PPCScheduleG4Plus.td
index 0044c3c6a449..9c84aec638d7 100644
--- a/lib/Target/PowerPC/PPCScheduleG4Plus.td
+++ b/lib/Target/PowerPC/PPCScheduleG4Plus.td
@@ -1,9 +1,8 @@
//===-- PPCScheduleG4Plus.td - PPC G4+ Scheduling Defs. ----*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/PowerPC/PPCScheduleG5.td b/lib/Target/PowerPC/PPCScheduleG5.td
index c802b80170fb..087073537796 100644
--- a/lib/Target/PowerPC/PPCScheduleG5.td
+++ b/lib/Target/PowerPC/PPCScheduleG5.td
@@ -1,9 +1,8 @@
//===-- PPCScheduleG5.td - PPC G5 Scheduling Definitions ---*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/PowerPC/PPCScheduleP7.td b/lib/Target/PowerPC/PPCScheduleP7.td
index 1d6e509819da..5a8c1eb2b837 100644
--- a/lib/Target/PowerPC/PPCScheduleP7.td
+++ b/lib/Target/PowerPC/PPCScheduleP7.td
@@ -1,9 +1,8 @@
//===-- PPCScheduleP7.td - PPC P7 Scheduling Definitions ---*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/PowerPC/PPCScheduleP8.td b/lib/Target/PowerPC/PPCScheduleP8.td
index ff39dfda7016..70a58f42a98a 100644
--- a/lib/Target/PowerPC/PPCScheduleP8.td
+++ b/lib/Target/PowerPC/PPCScheduleP8.td
@@ -1,9 +1,8 @@
//===-- PPCScheduleP8.td - PPC P8 Scheduling Definitions ---*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/PowerPC/PPCScheduleP9.td b/lib/Target/PowerPC/PPCScheduleP9.td
index a1e625c855e0..6a79cca89194 100644
--- a/lib/Target/PowerPC/PPCScheduleP9.td
+++ b/lib/Target/PowerPC/PPCScheduleP9.td
@@ -1,9 +1,8 @@
//===-- PPCScheduleP9.td - PPC P9 Scheduling Definitions ---*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -51,8 +50,21 @@ let SchedModel = P9Model in {
// ***************** Processor Resources *****************
- //Dispatcher:
- def DISPATCHER : ProcResource<12>;
+ // Dispatcher slots:
+ // x0, x1, x2, and x3 are the dedicated slice dispatch ports, where each
+ // corresponds to one of the four execution slices.
+ def DISPx02 : ProcResource<2>;
+ def DISPx13 : ProcResource<2>;
+ // The xa and xb ports can be used to send an iop to either of the two slices
+ // of the superslice, but are restricted to iops with only two primary sources.
+ def DISPxab : ProcResource<2>;
+ // b0 and b1 are dedicated dispatch ports into the branch slice.
+ def DISPb01 : ProcResource<2>;
+
+ // Any non BR dispatch ports
+ def DISP_NBR
+ : ProcResGroup<[ DISPx02, DISPx13, DISPxab]>;
+ def DISP_SS : ProcResGroup<[ DISPx02, DISPx13]>;
// Issue Ports
// An instruction can go down one of two issue queues.
@@ -117,8 +129,37 @@ let SchedModel = P9Model in {
// ***************** SchedWriteRes Definitions *****************
- //Dispatcher
- def DISP_1C : SchedWriteRes<[DISPATCHER]> {
+ // Dispatcher
+ // Dispatch Rules: '-' or 'V'
+ // Vector ('V') - vector iops (128-bit operand) take only one decode and
+ // dispatch slot but are dispatched to both the even and odd slices of a
+ // superslice.
+ def DISP_1C : SchedWriteRes<[DISP_NBR]> {
+ let NumMicroOps = 0;
+ let Latency = 1;
+ }
+ // Dispatch Rules: 'E'
+ // Even slice ('E')- certain operations must be sent only to an even slice.
+ // Also consumes odd dispatch slice slot of the same superslice at dispatch
+ def DISP_EVEN_1C : SchedWriteRes<[ DISPx02, DISPx13 ]> {
+ let NumMicroOps = 0;
+ let Latency = 1;
+ }
+ // Dispatch Rules: 'P'
+ // Paired ('P') - certain cracked and expanded iops are paired such that they
+ // must dispatch together to the same superslice.
+ def DISP_PAIR_1C : SchedWriteRes<[ DISP_SS, DISP_SS]> {
+ let NumMicroOps = 0;
+ let Latency = 1;
+ }
+ // Tuple Restricted ('R') - certain iops preclude dispatching more than one
+ // operation per slice for the super- slice to which they are dispatched
+ def DISP_3SLOTS_1C : SchedWriteRes<[DISPx02, DISPx13, DISPxab]> {
+ let NumMicroOps = 0;
+ let Latency = 1;
+ }
+ // Each execution and branch slice can receive up to two iops per cycle
+ def DISP_BR_1C : SchedWriteRes<[ DISPxab ]> {
let NumMicroOps = 0;
let Latency = 1;
}
@@ -148,7 +189,7 @@ let SchedModel = P9Model in {
// ALU Units
// An ALU may take either 2 or 3 cycles to complete the operation.
- // However, the ALU unit is only every busy for 1 cycle at a time and may
+ // However, the ALU unit is only ever busy for 1 cycle at a time and may
// receive new instructions each cycle.
def P9_ALU_2C : SchedWriteRes<[ALU]> {
let Latency = 2;
@@ -203,10 +244,6 @@ let SchedModel = P9Model in {
// DP Unit
// A DP unit may take from 2 to 36 cycles to complete.
// Some DP operations keep the unit busy for up to 10 cycles.
- def P9_DP_2C : SchedWriteRes<[DP]> {
- let Latency = 2;
- }
-
def P9_DP_5C : SchedWriteRes<[DP]> {
let Latency = 5;
}
@@ -228,11 +265,6 @@ let SchedModel = P9Model in {
let Latency = 22;
}
- def P9_DP_24C_8 : SchedWriteRes<[DP]> {
- let ResourceCycles = [8];
- let Latency = 24;
- }
-
def P9_DPO_24C_8 : SchedWriteRes<[DPO]> {
let ResourceCycles = [8];
let Latency = 24;
@@ -248,11 +280,6 @@ let SchedModel = P9Model in {
let Latency = 22;
}
- def P9_DP_27C_7 : SchedWriteRes<[DP]> {
- let ResourceCycles = [7];
- let Latency = 27;
- }
-
def P9_DPE_27C_10 : SchedWriteRes<[DP]> {
let ResourceCycles = [10];
let Latency = 27;
@@ -383,16 +410,12 @@ let SchedModel = P9Model in {
def P9_IntDivAndALUOp_26C_8 : WriteSequence<[P9_DIV_24C_8, P9_ALU_2C]>;
def P9_IntDivAndALUOp_42C_8 : WriteSequence<[P9_DIV_40C_8, P9_ALU_2C]>;
def P9_StoreAndALUOp_3C : WriteSequence<[P9_LS_1C, P9_ALU_2C]>;
- def P9_StoreAndALUOp_4C : WriteSequence<[P9_LS_1C, P9_ALU_3C]>;
def P9_ALUOpAndALUOp_4C : WriteSequence<[P9_ALU_2C, P9_ALU_2C]>;
def P9_ALU2OpAndALU2Op_6C : WriteSequence<[P9_ALU_3C, P9_ALU_3C]>;
def P9_ALUOpAndALUOpAndALUOp_6C :
WriteSequence<[P9_ALU_2C, P9_ALU_2C, P9_ALU_2C]>;
def P9_DPOpAndALUOp_7C : WriteSequence<[P9_DP_5C, P9_ALU_2C]>;
- def P9_DPOpAndALUOp_9C : WriteSequence<[P9_DP_7C, P9_ALU_2C]>;
def P9_DPOpAndALU2Op_10C : WriteSequence<[P9_DP_7C, P9_ALU_3C]>;
- def P9_DPOpAndALUOp_24C_5 : WriteSequence<[P9_DP_22C_5, P9_ALU_2C]>;
- def P9_DPOpAndALUOp_35C_8 : WriteSequence<[P9_DP_33C_8, P9_ALU_2C]>;
def P9_DPOpAndALU2Op_25C_5 : WriteSequence<[P9_DP_22C_5, P9_ALU_3C]>;
def P9_DPOpAndALU2Op_29C_5 : WriteSequence<[P9_DP_26C_5, P9_ALU_3C]>;
def P9_DPOpAndALU2Op_36C_8 : WriteSequence<[P9_DP_33C_8, P9_ALU_3C]>;
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp
index c0cbfd779cb9..6aa7528634d3 100644
--- a/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -1,9 +1,8 @@
//===-- PowerPCSubtarget.cpp - PPC Subtarget Information ------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -40,6 +39,11 @@ static cl::opt<bool> QPXStackUnaligned("qpx-stack-unaligned",
cl::desc("Even when QPX is enabled the stack is not 32-byte aligned"),
cl::Hidden);
+static cl::opt<bool>
+ EnableMachinePipeliner("ppc-enable-pipeliner",
+ cl::desc("Enable Machine Pipeliner for PPC"),
+ cl::init(false), cl::Hidden);
+
PPCSubtarget &PPCSubtarget::initializeSubtargetDependencies(StringRef CPU,
StringRef FS) {
initializeEnvironment();
@@ -68,6 +72,7 @@ void PPCSubtarget::initializeEnvironment() {
HasFPU = false;
HasQPX = false;
HasVSX = false;
+ NeedsTwoConstNR = false;
HasP8Vector = false;
HasP8Altivec = false;
HasP8Crypto = false;
@@ -103,11 +108,13 @@ void PPCSubtarget::initializeEnvironment() {
HasDirectMove = false;
IsQPXStackUnaligned = false;
HasHTM = false;
- HasFusion = false;
HasFloat128 = false;
IsISA3_0 = false;
UseLongCalls = false;
SecurePlt = false;
+ VectorsUseTwoUnits = false;
+ UsePPCPreRASchedStrategy = false;
+ UsePPCPostRASchedStrategy = false;
HasPOPCNTD = POPCNTD_Unavailable;
}
@@ -138,6 +145,10 @@ void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
if (isDarwin())
HasLazyResolverStubs = true;
+ if (TargetTriple.isOSNetBSD() || TargetTriple.isOSOpenBSD() ||
+ TargetTriple.isMusl())
+ SecurePlt = true;
+
if (HasSPE && IsPPC64)
report_fatal_error( "SPE is only supported for 32-bit targets.\n", false);
if (HasSPE && (HasAltivec || HasQPX || HasVSX || HasFPU))
@@ -175,10 +186,14 @@ bool PPCSubtarget::hasLazyResolverStub(const GlobalValue *GV) const {
return false;
}
-bool PPCSubtarget::enableMachineScheduler() const {
- return true;
+bool PPCSubtarget::enableMachineScheduler() const { return true; }
+
+bool PPCSubtarget::enableMachinePipeliner() const {
+ return (DarwinDirective == PPC::DIR_PWR9) && EnableMachinePipeliner;
}
+bool PPCSubtarget::useDFAforSMS() const { return false; }
+
// This overrides the PostRAScheduler bit in the SchedModel for each CPU.
bool PPCSubtarget::enablePostRAScheduler() const { return true; }
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index c56f254d6bec..55fec1cb6d99 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -1,9 +1,8 @@
//===-- PPCSubtarget.h - Define Subtarget for the PPC ----------*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -99,6 +98,7 @@ protected:
bool HasSPE;
bool HasQPX;
bool HasVSX;
+ bool NeedsTwoConstNR;
bool HasP8Vector;
bool HasP8Altivec;
bool HasP8Crypto;
@@ -131,11 +131,13 @@ protected:
bool HasPartwordAtomics;
bool HasDirectMove;
bool HasHTM;
- bool HasFusion;
bool HasFloat128;
bool IsISA3_0;
bool UseLongCalls;
bool SecurePlt;
+ bool VectorsUseTwoUnits;
+ bool UsePPCPreRASchedStrategy;
+ bool UsePPCPostRASchedStrategy;
POPCNTDKind HasPOPCNTD;
@@ -244,6 +246,7 @@ public:
bool hasFPU() const { return HasFPU; }
bool hasQPX() const { return HasQPX; }
bool hasVSX() const { return HasVSX; }
+ bool needsTwoConstNR() const { return NeedsTwoConstNR; }
bool hasP8Vector() const { return HasP8Vector; }
bool hasP8Altivec() const { return HasP8Altivec; }
bool hasP8Crypto() const { return HasP8Crypto; }
@@ -260,6 +263,7 @@ public:
bool isPPC4xx() const { return IsPPC4xx; }
bool isPPC6xx() const { return IsPPC6xx; }
bool isSecurePlt() const {return SecurePlt; }
+ bool vectorsUseTwoUnits() const {return VectorsUseTwoUnits; }
bool isE500() const { return IsE500; }
bool isFeatureMFTB() const { return FeatureMFTB; }
bool isDeprecatedDST() const { return DeprecatedDST; }
@@ -267,6 +271,8 @@ public:
bool hasInvariantFunctionDescriptors() const {
return HasInvariantFunctionDescriptors;
}
+ bool usePPCPreRASchedStrategy() const { return UsePPCPreRASchedStrategy; }
+ bool usePPCPostRASchedStrategy() const { return UsePPCPostRASchedStrategy; }
bool hasPartwordAtomics() const { return HasPartwordAtomics; }
bool hasDirectMove() const { return HasDirectMove; }
@@ -285,7 +291,6 @@ public:
}
bool hasHTM() const { return HasHTM; }
- bool hasFusion() const { return HasFusion; }
bool hasFloat128() const { return HasFloat128; }
bool isISA3_0() const { return IsISA3_0; }
bool useLongCalls() const { return UseLongCalls; }
@@ -307,16 +312,21 @@ public:
bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
bool isDarwinABI() const { return isTargetMachO() || isDarwin(); }
- bool isSVR4ABI() const { return !isDarwinABI(); }
+ bool isAIXABI() const { return TargetTriple.isOSAIX(); }
+ bool isSVR4ABI() const { return !isDarwinABI() && !isAIXABI(); }
bool isELFv2ABI() const;
/// Originally, this function return hasISEL(). Now we always enable it,
/// but may expand the ISEL instruction later.
bool enableEarlyIfConversion() const override { return true; }
- // Scheduling customization.
+ /// Scheduling customization.
bool enableMachineScheduler() const override;
- // This overrides the PostRAScheduler bit in the SchedModel for each CPU.
+ /// Pipeliner customization.
+ bool enableMachinePipeliner() const override;
+ /// Machine Pipeliner customization
+ bool useDFAforSMS() const override;
+ /// This overrides the PostRAScheduler bit in the SchedModel for each CPU.
bool enablePostRAScheduler() const override;
AntiDepBreakMode getAntiDepBreakMode() const override;
void getCriticalPathRCs(RegClassVector &CriticalPathRCs) const override;
diff --git a/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index ac36abbe8439..fb826c4a32f1 100644
--- a/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -1,9 +1,8 @@
//===---------- PPCTLSDynamicCall.cpp - TLS Dynamic Call Fixup ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -35,10 +34,6 @@ using namespace llvm;
#define DEBUG_TYPE "ppc-tls-dynamic-call"
-namespace llvm {
- void initializePPCTLSDynamicCallPass(PassRegistry&);
-}
-
namespace {
struct PPCTLSDynamicCall : public MachineFunctionPass {
static char ID;
diff --git a/lib/Target/PowerPC/PPCTOCRegDeps.cpp b/lib/Target/PowerPC/PPCTOCRegDeps.cpp
index 17345b6ca8d3..3eb0569fb955 100644
--- a/lib/Target/PowerPC/PPCTOCRegDeps.cpp
+++ b/lib/Target/PowerPC/PPCTOCRegDeps.cpp
@@ -1,9 +1,8 @@
//===-- PPCTOCRegDeps.cpp - Add Extra TOC Register Dependencies -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -83,10 +82,6 @@ using namespace llvm;
#define DEBUG_TYPE "ppc-toc-reg-deps"
-namespace llvm {
- void initializePPCTOCRegDepsPass(PassRegistry&);
-}
-
namespace {
// PPCTOCRegDeps pass - For simple functions without epilogue code, move
// returns up, and create conditional returns, to avoid unnecessary
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index 580d057602f5..ce00f848dd72 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -1,9 +1,8 @@
//===-- PPCTargetMachine.cpp - Define TargetMachine for PowerPC -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -14,9 +13,11 @@
#include "PPCTargetMachine.h"
#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "PPC.h"
+#include "PPCMachineScheduler.h"
#include "PPCSubtarget.h"
#include "PPCTargetObjectFile.h"
#include "PPCTargetTransformInfo.h"
+#include "TargetInfo/PowerPCTargetInfo.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
@@ -100,6 +101,19 @@ extern "C" void LLVMInitializePowerPCTarget() {
RegisterTargetMachine<PPCTargetMachine> C(getThePPC64LETarget());
PassRegistry &PR = *PassRegistry::getPassRegistry();
+#ifndef NDEBUG
+ initializePPCCTRLoopsVerifyPass(PR);
+#endif
+ initializePPCLoopPreIncPrepPass(PR);
+ initializePPCTOCRegDepsPass(PR);
+ initializePPCEarlyReturnPass(PR);
+ initializePPCVSXCopyPass(PR);
+ initializePPCVSXFMAMutatePass(PR);
+ initializePPCVSXSwapRemovalPass(PR);
+ initializePPCReduceCRLogicalsPass(PR);
+ initializePPCBSelPass(PR);
+ initializePPCBranchCoalescingPass(PR);
+ initializePPCQPXLoadSplatPass(PR);
initializePPCBoolRetToIntPass(PR);
initializePPCExpandISELPass(PR);
initializePPCPreEmitPeepholePass(PR);
@@ -199,6 +213,8 @@ static PPCTargetMachine::PPCABI computeTargetABI(const Triple &TT,
case Triple::ppc64le:
return PPCTargetMachine::PPC_ABI_ELFv2;
case Triple::ppc64:
+ if (TT.getEnvironment() == llvm::Triple::ELFv2)
+ return PPCTargetMachine::PPC_ABI_ELFv2;
return PPCTargetMachine::PPC_ABI_ELFv1;
default:
return PPCTargetMachine::PPC_ABI_UNKNOWN;
@@ -227,9 +243,9 @@ static CodeModel::Model getEffectivePPCCodeModel(const Triple &TT,
bool JIT) {
if (CM) {
if (*CM == CodeModel::Tiny)
- report_fatal_error("Target does not support the tiny CodeModel");
+ report_fatal_error("Target does not support the tiny CodeModel", false);
if (*CM == CodeModel::Kernel)
- report_fatal_error("Target does not support the kernel CodeModel");
+ report_fatal_error("Target does not support the kernel CodeModel", false);
return *CM;
}
if (!TT.isOSDarwin() && !JIT &&
@@ -238,6 +254,29 @@ static CodeModel::Model getEffectivePPCCodeModel(const Triple &TT,
return CodeModel::Small;
}
+
+static ScheduleDAGInstrs *createPPCMachineScheduler(MachineSchedContext *C) {
+ const PPCSubtarget &ST = C->MF->getSubtarget<PPCSubtarget>();
+ ScheduleDAGMILive *DAG =
+ new ScheduleDAGMILive(C, ST.usePPCPreRASchedStrategy() ?
+ llvm::make_unique<PPCPreRASchedStrategy>(C) :
+ llvm::make_unique<GenericScheduler>(C));
+ // add DAG Mutations here.
+ DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI));
+ return DAG;
+}
+
+static ScheduleDAGInstrs *createPPCPostMachineScheduler(
+ MachineSchedContext *C) {
+ const PPCSubtarget &ST = C->MF->getSubtarget<PPCSubtarget>();
+ ScheduleDAGMI *DAG =
+ new ScheduleDAGMI(C, ST.usePPCPostRASchedStrategy() ?
+ llvm::make_unique<PPCPostRASchedStrategy>(C) :
+ llvm::make_unique<PostGenericScheduler>(C), true);
+ // add DAG Mutations here.
+ return DAG;
+}
+
// The FeatureString here is a little subtle. We are modifying the feature
// string with what are (currently) non-function specific overrides as it goes
// into the LLVMTargetMachine constructor and then using the stored value in the
@@ -331,6 +370,14 @@ public:
void addPreRegAlloc() override;
void addPreSched2() override;
void addPreEmitPass() override;
+ ScheduleDAGInstrs *
+ createMachineScheduler(MachineSchedContext *C) const override {
+ return createPPCMachineScheduler(C);
+ }
+ ScheduleDAGInstrs *
+ createPostMachineScheduler(MachineSchedContext *C) const override {
+ return createPPCPostMachineScheduler(C);
+ }
};
} // end anonymous namespace
@@ -374,7 +421,7 @@ bool PPCPassConfig::addPreISel() {
addPass(createPPCLoopPreIncPrepPass(getPPCTargetMachine()));
if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None)
- addPass(createPPCCTRLoops());
+ addPass(createHardwareLoopsPass());
return false;
}
@@ -441,6 +488,9 @@ void PPCPassConfig::addPreRegAlloc() {
}
if (EnableExtraTOCRegDeps)
addPass(createPPCTOCRegDepsPass());
+
+ if (getOptLevel() != CodeGenOpt::None)
+ addPass(&MachinePipelinerID);
}
void PPCPassConfig::addPreSched2() {
@@ -469,3 +519,13 @@ TargetTransformInfo
PPCTargetMachine::getTargetTransformInfo(const Function &F) {
return TargetTransformInfo(PPCTTIImpl(this, F));
}
+
+static MachineSchedRegistry
+PPCPreRASchedRegistry("ppc-prera",
+ "Run PowerPC PreRA specific scheduler",
+ createPPCMachineScheduler);
+
+static MachineSchedRegistry
+PPCPostRASchedRegistry("ppc-postra",
+ "Run PowerPC PostRA specific scheduler",
+ createPPCPostMachineScheduler);
diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h
index 75b98a815ab4..fd1d14ae32d4 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/lib/Target/PowerPC/PPCTargetMachine.h
@@ -1,9 +1,8 @@
//===-- PPCTargetMachine.h - Define TargetMachine for PowerPC ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -59,10 +58,6 @@ public:
const Triple &TT = getTargetTriple();
return (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le);
};
-
- bool isMachineVerifierClean() const override {
- return false;
- }
};
} // end namespace llvm
diff --git a/lib/Target/PowerPC/PPCTargetObjectFile.cpp b/lib/Target/PowerPC/PPCTargetObjectFile.cpp
index a049dc3fda93..e237fab1b267 100644
--- a/lib/Target/PowerPC/PPCTargetObjectFile.cpp
+++ b/lib/Target/PowerPC/PPCTargetObjectFile.cpp
@@ -1,9 +1,8 @@
//===-- PPCTargetObjectFile.cpp - PPC Object Info -------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/PowerPC/PPCTargetObjectFile.h b/lib/Target/PowerPC/PPCTargetObjectFile.h
index 417b8ed0d612..78a5840c87c7 100644
--- a/lib/Target/PowerPC/PPCTargetObjectFile.h
+++ b/lib/Target/PowerPC/PPCTargetObjectFile.h
@@ -1,9 +1,8 @@
//===-- PPCTargetObjectFile.h - PPC Object Info -----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/PowerPC/PPCTargetStreamer.h b/lib/Target/PowerPC/PPCTargetStreamer.h
index 310fea9ef09f..e17361d997fd 100644
--- a/lib/Target/PowerPC/PPCTargetStreamer.h
+++ b/lib/Target/PowerPC/PPCTargetStreamer.h
@@ -1,9 +1,8 @@
//===- PPCTargetStreamer.h - PPC Target Streamer ----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index bc9bcab83a0a..ff3dfbfaca05 100644
--- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -1,17 +1,18 @@
//===-- PPCTargetTransformInfo.cpp - PPC specific TTI ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "PPCTargetTransformInfo.h"
+#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/BasicTTIImpl.h"
#include "llvm/CodeGen/CostTable.h"
#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetSchedule.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
using namespace llvm;
@@ -32,6 +33,13 @@ EnablePPCColdCC("ppc-enable-coldcc", cl::Hidden, cl::init(false),
cl::desc("Enable using coldcc calling conv for cold "
"internal functions"));
+// The latency of mtctr is only justified if there are more than 4
+// comparisons that will be removed as a result.
+static cl::opt<unsigned>
+SmallCTRLoopThreshold("min-ctr-loop-threshold", cl::init(4), cl::Hidden,
+ cl::desc("Loops with a constant trip count smaller than "
+ "this value will not use the count register."));
+
//===----------------------------------------------------------------------===//
//
// PPC cost model.
@@ -205,6 +213,341 @@ unsigned PPCTTIImpl::getUserCost(const User *U,
return BaseT::getUserCost(U, Operands);
}
+bool PPCTTIImpl::mightUseCTR(BasicBlock *BB,
+ TargetLibraryInfo *LibInfo) {
+ const PPCTargetMachine &TM = ST->getTargetMachine();
+
+ // Loop through the inline asm constraints and look for something that
+ // clobbers ctr.
+ auto asmClobbersCTR = [](InlineAsm *IA) {
+ InlineAsm::ConstraintInfoVector CIV = IA->ParseConstraints();
+ for (unsigned i = 0, ie = CIV.size(); i < ie; ++i) {
+ InlineAsm::ConstraintInfo &C = CIV[i];
+ if (C.Type != InlineAsm::isInput)
+ for (unsigned j = 0, je = C.Codes.size(); j < je; ++j)
+ if (StringRef(C.Codes[j]).equals_lower("{ctr}"))
+ return true;
+ }
+ return false;
+ };
+
+ // Determining the address of a TLS variable results in a function call in
+ // certain TLS models.
+ std::function<bool(const Value*)> memAddrUsesCTR =
+ [&memAddrUsesCTR, &TM](const Value *MemAddr) -> bool {
+ const auto *GV = dyn_cast<GlobalValue>(MemAddr);
+ if (!GV) {
+ // Recurse to check for constants that refer to TLS global variables.
+ if (const auto *CV = dyn_cast<Constant>(MemAddr))
+ for (const auto &CO : CV->operands())
+ if (memAddrUsesCTR(CO))
+ return true;
+
+ return false;
+ }
+
+ if (!GV->isThreadLocal())
+ return false;
+ TLSModel::Model Model = TM.getTLSModel(GV);
+ return Model == TLSModel::GeneralDynamic ||
+ Model == TLSModel::LocalDynamic;
+ };
+
+ auto isLargeIntegerTy = [](bool Is32Bit, Type *Ty) {
+ if (IntegerType *ITy = dyn_cast<IntegerType>(Ty))
+ return ITy->getBitWidth() > (Is32Bit ? 32U : 64U);
+
+ return false;
+ };
+
+ for (BasicBlock::iterator J = BB->begin(), JE = BB->end();
+ J != JE; ++J) {
+ if (CallInst *CI = dyn_cast<CallInst>(J)) {
+ // Inline ASM is okay, unless it clobbers the ctr register.
+ if (InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue())) {
+ if (asmClobbersCTR(IA))
+ return true;
+ continue;
+ }
+
+ if (Function *F = CI->getCalledFunction()) {
+ // Most intrinsics don't become function calls, but some might.
+ // sin, cos, exp and log are always calls.
+ unsigned Opcode = 0;
+ if (F->getIntrinsicID() != Intrinsic::not_intrinsic) {
+ switch (F->getIntrinsicID()) {
+ default: continue;
+ // If we have a call to ppc_is_decremented_ctr_nonzero, or ppc_mtctr
+ // we're definitely using CTR.
+ case Intrinsic::set_loop_iterations:
+ case Intrinsic::loop_decrement:
+ return true;
+
+// VisualStudio defines setjmp as _setjmp
+#if defined(_MSC_VER) && defined(setjmp) && \
+ !defined(setjmp_undefined_for_msvc)
+# pragma push_macro("setjmp")
+# undef setjmp
+# define setjmp_undefined_for_msvc
+#endif
+
+ case Intrinsic::setjmp:
+
+#if defined(_MSC_VER) && defined(setjmp_undefined_for_msvc)
+ // let's return it to _setjmp state
+# pragma pop_macro("setjmp")
+# undef setjmp_undefined_for_msvc
+#endif
+
+ case Intrinsic::longjmp:
+
+ // Exclude eh_sjlj_setjmp; we don't need to exclude eh_sjlj_longjmp
+ // because, although it does clobber the counter register, the
+ // control can't then return to inside the loop unless there is also
+ // an eh_sjlj_setjmp.
+ case Intrinsic::eh_sjlj_setjmp:
+
+ case Intrinsic::memcpy:
+ case Intrinsic::memmove:
+ case Intrinsic::memset:
+ case Intrinsic::powi:
+ case Intrinsic::log:
+ case Intrinsic::log2:
+ case Intrinsic::log10:
+ case Intrinsic::exp:
+ case Intrinsic::exp2:
+ case Intrinsic::pow:
+ case Intrinsic::sin:
+ case Intrinsic::cos:
+ return true;
+ case Intrinsic::copysign:
+ if (CI->getArgOperand(0)->getType()->getScalarType()->
+ isPPC_FP128Ty())
+ return true;
+ else
+ continue; // ISD::FCOPYSIGN is never a library call.
+ case Intrinsic::sqrt: Opcode = ISD::FSQRT; break;
+ case Intrinsic::floor: Opcode = ISD::FFLOOR; break;
+ case Intrinsic::ceil: Opcode = ISD::FCEIL; break;
+ case Intrinsic::trunc: Opcode = ISD::FTRUNC; break;
+ case Intrinsic::rint: Opcode = ISD::FRINT; break;
+ case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break;
+ case Intrinsic::round: Opcode = ISD::FROUND; break;
+ case Intrinsic::minnum: Opcode = ISD::FMINNUM; break;
+ case Intrinsic::maxnum: Opcode = ISD::FMAXNUM; break;
+ case Intrinsic::umul_with_overflow: Opcode = ISD::UMULO; break;
+ case Intrinsic::smul_with_overflow: Opcode = ISD::SMULO; break;
+ }
+ }
+
+ // PowerPC does not use [US]DIVREM or other library calls for
+ // operations on regular types which are not otherwise library calls
+ // (i.e. soft float or atomics). If adapting for targets that do,
+ // additional care is required here.
+
+ LibFunc Func;
+ if (!F->hasLocalLinkage() && F->hasName() && LibInfo &&
+ LibInfo->getLibFunc(F->getName(), Func) &&
+ LibInfo->hasOptimizedCodeGen(Func)) {
+ // Non-read-only functions are never treated as intrinsics.
+ if (!CI->onlyReadsMemory())
+ return true;
+
+ // Conversion happens only for FP calls.
+ if (!CI->getArgOperand(0)->getType()->isFloatingPointTy())
+ return true;
+
+ switch (Func) {
+ default: return true;
+ case LibFunc_copysign:
+ case LibFunc_copysignf:
+ continue; // ISD::FCOPYSIGN is never a library call.
+ case LibFunc_copysignl:
+ return true;
+ case LibFunc_fabs:
+ case LibFunc_fabsf:
+ case LibFunc_fabsl:
+ continue; // ISD::FABS is never a library call.
+ case LibFunc_sqrt:
+ case LibFunc_sqrtf:
+ case LibFunc_sqrtl:
+ Opcode = ISD::FSQRT; break;
+ case LibFunc_floor:
+ case LibFunc_floorf:
+ case LibFunc_floorl:
+ Opcode = ISD::FFLOOR; break;
+ case LibFunc_nearbyint:
+ case LibFunc_nearbyintf:
+ case LibFunc_nearbyintl:
+ Opcode = ISD::FNEARBYINT; break;
+ case LibFunc_ceil:
+ case LibFunc_ceilf:
+ case LibFunc_ceill:
+ Opcode = ISD::FCEIL; break;
+ case LibFunc_rint:
+ case LibFunc_rintf:
+ case LibFunc_rintl:
+ Opcode = ISD::FRINT; break;
+ case LibFunc_round:
+ case LibFunc_roundf:
+ case LibFunc_roundl:
+ Opcode = ISD::FROUND; break;
+ case LibFunc_trunc:
+ case LibFunc_truncf:
+ case LibFunc_truncl:
+ Opcode = ISD::FTRUNC; break;
+ case LibFunc_fmin:
+ case LibFunc_fminf:
+ case LibFunc_fminl:
+ Opcode = ISD::FMINNUM; break;
+ case LibFunc_fmax:
+ case LibFunc_fmaxf:
+ case LibFunc_fmaxl:
+ Opcode = ISD::FMAXNUM; break;
+ }
+ }
+
+ if (Opcode) {
+ EVT EVTy =
+ TLI->getValueType(DL, CI->getArgOperand(0)->getType(), true);
+
+ if (EVTy == MVT::Other)
+ return true;
+
+ if (TLI->isOperationLegalOrCustom(Opcode, EVTy))
+ continue;
+ else if (EVTy.isVector() &&
+ TLI->isOperationLegalOrCustom(Opcode, EVTy.getScalarType()))
+ continue;
+
+ return true;
+ }
+ }
+
+ return true;
+ } else if (isa<BinaryOperator>(J) &&
+ J->getType()->getScalarType()->isPPC_FP128Ty()) {
+ // Most operations on ppc_f128 values become calls.
+ return true;
+ } else if (isa<UIToFPInst>(J) || isa<SIToFPInst>(J) ||
+ isa<FPToUIInst>(J) || isa<FPToSIInst>(J)) {
+ CastInst *CI = cast<CastInst>(J);
+ if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() ||
+ CI->getDestTy()->getScalarType()->isPPC_FP128Ty() ||
+ isLargeIntegerTy(!TM.isPPC64(), CI->getSrcTy()->getScalarType()) ||
+ isLargeIntegerTy(!TM.isPPC64(), CI->getDestTy()->getScalarType()))
+ return true;
+ } else if (isLargeIntegerTy(!TM.isPPC64(),
+ J->getType()->getScalarType()) &&
+ (J->getOpcode() == Instruction::UDiv ||
+ J->getOpcode() == Instruction::SDiv ||
+ J->getOpcode() == Instruction::URem ||
+ J->getOpcode() == Instruction::SRem)) {
+ return true;
+ } else if (!TM.isPPC64() &&
+ isLargeIntegerTy(false, J->getType()->getScalarType()) &&
+ (J->getOpcode() == Instruction::Shl ||
+ J->getOpcode() == Instruction::AShr ||
+ J->getOpcode() == Instruction::LShr)) {
+ // Only on PPC32, for 128-bit integers (specifically not 64-bit
+ // integers), these might be runtime calls.
+ return true;
+ } else if (isa<IndirectBrInst>(J) || isa<InvokeInst>(J)) {
+ // On PowerPC, indirect jumps use the counter register.
+ return true;
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(J)) {
+ if (SI->getNumCases() + 1 >= (unsigned)TLI->getMinimumJumpTableEntries())
+ return true;
+ }
+
+ // FREM is always a call.
+ if (J->getOpcode() == Instruction::FRem)
+ return true;
+
+ if (ST->useSoftFloat()) {
+ switch(J->getOpcode()) {
+ case Instruction::FAdd:
+ case Instruction::FSub:
+ case Instruction::FMul:
+ case Instruction::FDiv:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::FCmp:
+ return true;
+ }
+ }
+
+ for (Value *Operand : J->operands())
+ if (memAddrUsesCTR(Operand))
+ return true;
+ }
+
+ return false;
+}
+
+bool PPCTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
+ AssumptionCache &AC,
+ TargetLibraryInfo *LibInfo,
+ HardwareLoopInfo &HWLoopInfo) {
+ const PPCTargetMachine &TM = ST->getTargetMachine();
+ TargetSchedModel SchedModel;
+ SchedModel.init(ST);
+
+ // Do not convert small short loops to CTR loop.
+ unsigned ConstTripCount = SE.getSmallConstantTripCount(L);
+ if (ConstTripCount && ConstTripCount < SmallCTRLoopThreshold) {
+ SmallPtrSet<const Value *, 32> EphValues;
+ CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
+ CodeMetrics Metrics;
+ for (BasicBlock *BB : L->blocks())
+ Metrics.analyzeBasicBlock(BB, *this, EphValues);
+ // 6 is an approximate latency for the mtctr instruction.
+ if (Metrics.NumInsts <= (6 * SchedModel.getIssueWidth()))
+ return false;
+ }
+
+ // We don't want to spill/restore the counter register, and so we don't
+ // want to use the counter register if the loop contains calls.
+ for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
+ I != IE; ++I)
+ if (mightUseCTR(*I, LibInfo))
+ return false;
+
+ SmallVector<BasicBlock*, 4> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+
+ // If there is an exit edge known to be frequently taken,
+ // we should not transform this loop.
+ for (auto &BB : ExitingBlocks) {
+ Instruction *TI = BB->getTerminator();
+ if (!TI) continue;
+
+ if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+ uint64_t TrueWeight = 0, FalseWeight = 0;
+ if (!BI->isConditional() ||
+ !BI->extractProfMetadata(TrueWeight, FalseWeight))
+ continue;
+
+ // If the exit path is more frequent than the loop path,
+ // we return here without further analysis for this loop.
+ bool TrueIsExit = !L->contains(BI->getSuccessor(0));
+ if (( TrueIsExit && FalseWeight < TrueWeight) ||
+ (!TrueIsExit && FalseWeight > TrueWeight))
+ return false;
+ }
+ }
+
+ LLVMContext &C = L->getHeader()->getContext();
+ HWLoopInfo.CountType = TM.isPPC64() ?
+ Type::getInt64Ty(C) : Type::getInt32Ty(C);
+ HWLoopInfo.LoopDecrement = ConstantInt::get(HWLoopInfo.CountType, 1);
+ return true;
+}
+
void PPCTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP) {
if (ST->getDarwinDirective() == PPC::DIR_A2) {
@@ -239,17 +582,12 @@ bool PPCTTIImpl::enableAggressiveInterleaving(bool LoopHasReductions) {
return LoopHasReductions;
}
-const PPCTTIImpl::TTI::MemCmpExpansionOptions *
-PPCTTIImpl::enableMemCmpExpansion(bool IsZeroCmp) const {
- static const auto Options = []() {
- TTI::MemCmpExpansionOptions Options;
- Options.LoadSizes.push_back(8);
- Options.LoadSizes.push_back(4);
- Options.LoadSizes.push_back(2);
- Options.LoadSizes.push_back(1);
- return Options;
- }();
- return &Options;
+PPCTTIImpl::TTI::MemCmpExpansionOptions
+PPCTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
+ TTI::MemCmpExpansionOptions Options;
+ Options.LoadSizes = {8, 4, 2, 1};
+ Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
+ return Options;
}
bool PPCTTIImpl::enableInterleavedAccessVectorization() {
@@ -324,6 +662,33 @@ unsigned PPCTTIImpl::getMaxInterleaveFactor(unsigned VF) {
return 2;
}
+// Adjust the cost of vector instructions on targets which there is overlap
+// between the vector and scalar units, thereby reducing the overall throughput
+// of vector code wrt. scalar code.
+int PPCTTIImpl::vectorCostAdjustment(int Cost, unsigned Opcode, Type *Ty1,
+ Type *Ty2) {
+ if (!ST->vectorsUseTwoUnits() || !Ty1->isVectorTy())
+ return Cost;
+
+ std::pair<int, MVT> LT1 = TLI->getTypeLegalizationCost(DL, Ty1);
+ // If type legalization involves splitting the vector, we don't want to
+ // double the cost at every step - only the last step.
+ if (LT1.first != 1 || !LT1.second.isVector())
+ return Cost;
+
+ int ISD = TLI->InstructionOpcodeToISD(Opcode);
+ if (TLI->isOperationExpand(ISD, LT1.second))
+ return Cost;
+
+ if (Ty2) {
+ std::pair<int, MVT> LT2 = TLI->getTypeLegalizationCost(DL, Ty2);
+ if (LT2.first != 1 || !LT2.second.isVector())
+ return Cost;
+ }
+
+ return Cost * 2;
+}
+
int PPCTTIImpl::getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info,
TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo,
@@ -331,8 +696,9 @@ int PPCTTIImpl::getArithmeticInstrCost(
assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
// Fallback to the default implementation.
- return BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info,
- Opd1PropInfo, Opd2PropInfo);
+ int Cost = BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info,
+ Opd1PropInfo, Opd2PropInfo);
+ return vectorCostAdjustment(Cost, Opcode, Ty, nullptr);
}
int PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
@@ -345,19 +711,22 @@ int PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
// instruction). We need one such shuffle instruction for each actual
// register (this is not true for arbitrary shuffles, but is true for the
// structured types of shuffles covered by TTI::ShuffleKind).
- return LT.first;
+ return vectorCostAdjustment(LT.first, Instruction::ShuffleVector, Tp,
+ nullptr);
}
int PPCTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
const Instruction *I) {
assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
- return BaseT::getCastInstrCost(Opcode, Dst, Src);
+ int Cost = BaseT::getCastInstrCost(Opcode, Dst, Src);
+ return vectorCostAdjustment(Cost, Opcode, Dst, Src);
}
int PPCTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
const Instruction *I) {
- return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
+ int Cost = BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
+ return vectorCostAdjustment(Cost, Opcode, ValTy, nullptr);
}
int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
@@ -366,18 +735,23 @@ int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
+ int Cost = BaseT::getVectorInstrCost(Opcode, Val, Index);
+ Cost = vectorCostAdjustment(Cost, Opcode, Val, nullptr);
+
if (ST->hasVSX() && Val->getScalarType()->isDoubleTy()) {
- // Double-precision scalars are already located in index #0.
- if (Index == 0)
+ // Double-precision scalars are already located in index #0 (or #1 if LE).
+ if (ISD == ISD::EXTRACT_VECTOR_ELT &&
+ Index == (ST->isLittleEndian() ? 1 : 0))
return 0;
- return BaseT::getVectorInstrCost(Opcode, Val, Index);
+ return Cost;
+
} else if (ST->hasQPX() && Val->getScalarType()->isFloatingPointTy()) {
// Floating point scalars are already located in index #0.
if (Index == 0)
return 0;
- return BaseT::getVectorInstrCost(Opcode, Val, Index);
+ return Cost;
}
// Estimated cost of a load-hit-store delay. This was obtained
@@ -394,9 +768,9 @@ int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
// these need to be estimated as very costly.
if (ISD == ISD::EXTRACT_VECTOR_ELT ||
ISD == ISD::INSERT_VECTOR_ELT)
- return LHSPenalty + BaseT::getVectorInstrCost(Opcode, Val, Index);
+ return LHSPenalty + Cost;
- return BaseT::getVectorInstrCost(Opcode, Val, Index);
+ return Cost;
}
int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
@@ -407,6 +781,7 @@ int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
"Invalid Opcode");
int Cost = BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
+ Cost = vectorCostAdjustment(Cost, Opcode, Src, nullptr);
bool IsAltivecType = ST->hasAltivec() &&
(LT.second == MVT::v16i8 || LT.second == MVT::v8i16 ||
@@ -500,3 +875,25 @@ int PPCTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
return Cost;
}
+bool PPCTTIImpl::canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE,
+ LoopInfo *LI, DominatorTree *DT,
+ AssumptionCache *AC, TargetLibraryInfo *LibInfo) {
+ // Process nested loops first.
+ for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
+ if (canSaveCmp(*I, BI, SE, LI, DT, AC, LibInfo))
+ return false; // Stop search.
+
+ HardwareLoopInfo HWLoopInfo(L);
+
+ if (!HWLoopInfo.canAnalyze(*LI))
+ return false;
+
+ if (!isHardwareLoopProfitable(L, *SE, *AC, LibInfo, HWLoopInfo))
+ return false;
+
+ if (!HWLoopInfo.isHardwareLoopCandidate(*SE, *LI, *DT))
+ return false;
+
+ *BI = HWLoopInfo.ExitBranch;
+ return true;
+}
diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.h b/lib/Target/PowerPC/PPCTargetTransformInfo.h
index 9221a910288a..5d76ee418b69 100644
--- a/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -1,9 +1,8 @@
//===-- PPCTargetTransformInfo.h - PPC specific TTI -------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -17,7 +16,6 @@
#ifndef LLVM_LIB_TARGET_POWERPC_PPCTARGETTRANSFORMINFO_H
#define LLVM_LIB_TARGET_POWERPC_PPCTARGETTRANSFORMINFO_H
-#include "PPC.h"
#include "PPCTargetMachine.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/BasicTTIImpl.h"
@@ -35,6 +33,7 @@ class PPCTTIImpl : public BasicTTIImplBase<PPCTTIImpl> {
const PPCSubtarget *getST() const { return ST; }
const PPCTargetLowering *getTLI() const { return TLI; }
+ bool mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo);
public:
explicit PPCTTIImpl(const PPCTargetMachine *TM, const Function &F)
@@ -54,6 +53,13 @@ public:
unsigned getUserCost(const User *U, ArrayRef<const Value *> Operands);
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
+ bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
+ AssumptionCache &AC,
+ TargetLibraryInfo *LibInfo,
+ HardwareLoopInfo &HWLoopInfo);
+ bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
+ DominatorTree *DT, AssumptionCache *AC,
+ TargetLibraryInfo *LibInfo);
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP);
@@ -63,14 +69,15 @@ public:
/// @{
bool useColdCCForColdCall(Function &F);
bool enableAggressiveInterleaving(bool LoopHasReductions);
- const TTI::MemCmpExpansionOptions *enableMemCmpExpansion(
- bool IsZeroCmp) const;
+ TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
+ bool IsZeroCmp) const;
bool enableInterleavedAccessVectorization();
unsigned getNumberOfRegisters(bool Vector);
unsigned getRegisterBitWidth(bool Vector) const;
unsigned getCacheLineSize();
unsigned getPrefetchDistance();
unsigned getMaxInterleaveFactor(unsigned VF);
+ int vectorCostAdjustment(int Cost, unsigned Opcode, Type *Ty1, Type *Ty2);
int getArithmeticInstrCost(
unsigned Opcode, Type *Ty,
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
diff --git a/lib/Target/PowerPC/PPCVSXCopy.cpp b/lib/Target/PowerPC/PPCVSXCopy.cpp
index 93fe3230ab81..719ed7b63878 100644
--- a/lib/Target/PowerPC/PPCVSXCopy.cpp
+++ b/lib/Target/PowerPC/PPCVSXCopy.cpp
@@ -1,9 +1,8 @@
//===-------------- PPCVSXCopy.cpp - VSX Copy Legalization ----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -37,10 +36,6 @@ using namespace llvm;
#define DEBUG_TYPE "ppc-vsx-copy"
-namespace llvm {
- void initializePPCVSXCopyPass(PassRegistry&);
-}
-
namespace {
// PPCVSXCopy pass - For copies between VSX registers and non-VSX registers
// (Altivec and scalar floating-point registers), we need to transform the
diff --git a/lib/Target/PowerPC/PPCVSXFMAMutate.cpp b/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
index 6586f503a7b8..ce78239df0a8 100644
--- a/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
+++ b/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
@@ -1,9 +1,8 @@
//===--------------- PPCVSXFMAMutate.cpp - VSX FMA Mutation ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
index 1be193e08c01..44175af7f9b6 100644
--- a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
+++ b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
@@ -1,9 +1,8 @@
//===----------- PPCVSXSwapRemoval.cpp - Remove VSX LE Swaps -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===---------------------------------------------------------------------===//
//
@@ -60,10 +59,6 @@ using namespace llvm;
#define DEBUG_TYPE "ppc-vsx-swaps"
-namespace llvm {
- void initializePPCVSXSwapRemovalPass(PassRegistry&);
-}
-
namespace {
// A PPCVSXSwapEntry is created for each machine instruction that
@@ -427,6 +422,7 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
// of opcodes having a common attribute in TableGen. Should this
// change, this is a prime candidate to use such a mechanism.
case PPC::INLINEASM:
+ case PPC::INLINEASM_BR:
case PPC::EXTRACT_SUBREG:
case PPC::INSERT_SUBREG:
case PPC::COPY_TO_REGCLASS:
diff --git a/lib/Target/PowerPC/README_P9.txt b/lib/Target/PowerPC/README_P9.txt
index d56f7cca7b21..c9984b7604bd 100644
--- a/lib/Target/PowerPC/README_P9.txt
+++ b/lib/Target/PowerPC/README_P9.txt
@@ -512,8 +512,8 @@ Fixed Point Facility:
"lxsdx $XT, $src", IIC_LdStLFD,
[(set f64:$XT, (load xoaddr:$src))]>;
- . (set f64:$XT, (load ixaddr:$src))
- (set f64:$XT, (store ixaddr:$dst))
+ . (set f64:$XT, (load iaddrX4:$src))
+ (set f64:$XT, (store iaddrX4:$dst))
- Load/Store SP, with conversion from/to DP: lxssp stxssp
. Similar to lxsspx/stxsspx:
@@ -521,8 +521,8 @@ Fixed Point Facility:
"lxsspx $XT, $src", IIC_LdStLFD,
[(set f32:$XT, (load xoaddr:$src))]>;
- . (set f32:$XT, (load ixaddr:$src))
- (set f32:$XT, (store ixaddr:$dst))
+ . (set f32:$XT, (load iaddrX4:$src))
+ (set f32:$XT, (store iaddrX4:$dst))
- Load as Integer Byte/Halfword & Zero Indexed: lxsibzx lxsihzx
. Similar to lxsiwzx:
diff --git a/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp b/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
index 979595264472..99b5dec74668 100644
--- a/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
+++ b/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
@@ -1,14 +1,12 @@
//===-- PowerPCTargetInfo.cpp - PowerPC Target Implementation -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-#include "PPC.h"
-#include "llvm/IR/Module.h"
+#include "TargetInfo/PowerPCTargetInfo.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.h b/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.h
new file mode 100644
index 000000000000..2d0afbfb1be0
--- /dev/null
+++ b/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.h
@@ -0,0 +1,22 @@
+//===-- PowerPCTargetInfo.h - PowerPC Target Implementation -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_POWERPC_TARGETINFO_POWERPCTARGETINFO_H
+#define LLVM_LIB_TARGET_POWERPC_TARGETINFO_POWERPCTARGETINFO_H
+
+namespace llvm {
+
+class Target;
+
+Target &getThePPC32Target();
+Target &getThePPC64Target();
+Target &getThePPC64LETarget();
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_POWERPC_TARGETINFO_POWERPCTARGETINFO_H
diff --git a/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index 1d1112cc5124..0172c6298772 100644
--- a/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -1,9 +1,8 @@
//===-- RISCVAsmParser.cpp - Parse RISCV assembly to MCInst instructions --===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -11,6 +10,7 @@
#include "MCTargetDesc/RISCVMCExpr.h"
#include "MCTargetDesc/RISCVMCTargetDesc.h"
#include "MCTargetDesc/RISCVTargetStreamer.h"
+#include "TargetInfo/RISCVTargetInfo.h"
#include "Utils/RISCVBaseInfo.h"
#include "Utils/RISCVMatInt.h"
#include "llvm/ADT/STLExtras.h"
@@ -21,6 +21,7 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstBuilder.h"
+#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/MC/MCParser/MCTargetAsmParser.h"
@@ -47,6 +48,7 @@ class RISCVAsmParser : public MCTargetAsmParser {
SMLoc getLoc() const { return getParser().getTok().getLoc(); }
bool isRV64() const { return getSTI().hasFeature(RISCV::Feature64Bit); }
+ bool isRV32E() const { return getSTI().hasFeature(RISCV::FeatureRV32E); }
RISCVTargetStreamer &getTargetStreamer() {
MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
@@ -79,14 +81,42 @@ class RISCVAsmParser : public MCTargetAsmParser {
// synthesize the desired immedate value into the destination register.
void emitLoadImm(unsigned DestReg, int64_t Value, MCStreamer &Out);
+ // Helper to emit a combination of AUIPC and SecondOpcode. Used to implement
+ // helpers such as emitLoadLocalAddress and emitLoadAddress.
+ void emitAuipcInstPair(MCOperand DestReg, MCOperand TmpReg,
+ const MCExpr *Symbol, RISCVMCExpr::VariantKind VKHi,
+ unsigned SecondOpcode, SMLoc IDLoc, MCStreamer &Out);
+
// Helper to emit pseudo instruction "lla" used in PC-rel addressing.
void emitLoadLocalAddress(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out);
+ // Helper to emit pseudo instruction "la" used in GOT/PC-rel addressing.
+ void emitLoadAddress(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out);
+
+ // Helper to emit pseudo instruction "la.tls.ie" used in initial-exec TLS
+ // addressing.
+ void emitLoadTLSIEAddress(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out);
+
+ // Helper to emit pseudo instruction "la.tls.gd" used in global-dynamic TLS
+ // addressing.
+ void emitLoadTLSGDAddress(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out);
+
+ // Helper to emit pseudo load/store instruction with a symbol.
+ void emitLoadStoreSymbol(MCInst &Inst, unsigned Opcode, SMLoc IDLoc,
+ MCStreamer &Out, bool HasTmpReg);
+
+ // Checks that a PseudoAddTPRel is using x4/tp in its second input operand.
+ // Enforcing this using a restricted register class for the second input
+ // operand of PseudoAddTPRel results in a poor diagnostic due to the fact
+ // 'add' is an overloaded mnemonic.
+ bool checkPseudoAddTPRel(MCInst &Inst, OperandVector &Operands);
+
/// Helper for processing MC instructions that have been successfully matched
/// by MatchAndEmitInstruction. Modifications to the emitted instructions,
/// like the expansion of pseudo instructions (e.g., "li"), can be performed
/// in this method.
- bool processInstruction(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out);
+ bool processInstruction(MCInst &Inst, SMLoc IDLoc, OperandVector &Operands,
+ MCStreamer &Out);
// Auto-generated instruction matching functions
#define GET_ASSEMBLER_HEADER
@@ -99,6 +129,7 @@ class RISCVAsmParser : public MCTargetAsmParser {
OperandMatchResultTy parseMemOpBaseReg(OperandVector &Operands);
OperandMatchResultTy parseOperandWithModifier(OperandVector &Operands);
OperandMatchResultTy parseBareSymbol(OperandVector &Operands);
+ OperandMatchResultTy parseCallSymbol(OperandVector &Operands);
OperandMatchResultTy parseJALOffset(OperandVector &Operands);
bool parseOperand(OperandVector &Operands, StringRef Mnemonic);
@@ -269,6 +300,27 @@ public:
VK == RISCVMCExpr::VK_RISCV_None;
}
+ bool isCallSymbol() const {
+ int64_t Imm;
+ RISCVMCExpr::VariantKind VK;
+ // Must be of 'immediate' type but not a constant.
+ if (!isImm() || evaluateConstantImm(getImm(), Imm, VK))
+ return false;
+ return RISCVAsmParser::classifySymbolRef(getImm(), VK, Imm) &&
+ (VK == RISCVMCExpr::VK_RISCV_CALL ||
+ VK == RISCVMCExpr::VK_RISCV_CALL_PLT);
+ }
+
+ bool isTPRelAddSymbol() const {
+ int64_t Imm;
+ RISCVMCExpr::VariantKind VK;
+ // Must be of 'immediate' type but not a constant.
+ if (!isImm() || evaluateConstantImm(getImm(), Imm, VK))
+ return false;
+ return RISCVAsmParser::classifySymbolRef(getImm(), VK, Imm) &&
+ VK == RISCVMCExpr::VK_RISCV_TPREL_ADD;
+ }
+
bool isCSRSystemRegister() const { return isSystemRegister(); }
/// Return true if the operand is a valid for the fence instruction e.g.
@@ -463,7 +515,8 @@ public:
IsValid = isInt<12>(Imm);
return IsValid && ((IsConstantImm && VK == RISCVMCExpr::VK_RISCV_None) ||
VK == RISCVMCExpr::VK_RISCV_LO ||
- VK == RISCVMCExpr::VK_RISCV_PCREL_LO);
+ VK == RISCVMCExpr::VK_RISCV_PCREL_LO ||
+ VK == RISCVMCExpr::VK_RISCV_TPREL_LO);
}
bool isSImm12Lsb0() const { return isBareSimmNLsb0<12>(); }
@@ -489,10 +542,12 @@ public:
bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
if (!IsConstantImm) {
IsValid = RISCVAsmParser::classifySymbolRef(getImm(), VK, Imm);
- return IsValid && VK == RISCVMCExpr::VK_RISCV_HI;
+ return IsValid && (VK == RISCVMCExpr::VK_RISCV_HI ||
+ VK == RISCVMCExpr::VK_RISCV_TPREL_HI);
} else {
return isUInt<20>(Imm) && (VK == RISCVMCExpr::VK_RISCV_None ||
- VK == RISCVMCExpr::VK_RISCV_HI);
+ VK == RISCVMCExpr::VK_RISCV_HI ||
+ VK == RISCVMCExpr::VK_RISCV_TPREL_HI);
}
}
@@ -505,10 +560,16 @@ public:
bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
if (!IsConstantImm) {
IsValid = RISCVAsmParser::classifySymbolRef(getImm(), VK, Imm);
- return IsValid && VK == RISCVMCExpr::VK_RISCV_PCREL_HI;
+ return IsValid && (VK == RISCVMCExpr::VK_RISCV_PCREL_HI ||
+ VK == RISCVMCExpr::VK_RISCV_GOT_HI ||
+ VK == RISCVMCExpr::VK_RISCV_TLS_GOT_HI ||
+ VK == RISCVMCExpr::VK_RISCV_TLS_GD_HI);
} else {
return isUInt<20>(Imm) && (VK == RISCVMCExpr::VK_RISCV_None ||
- VK == RISCVMCExpr::VK_RISCV_PCREL_HI);
+ VK == RISCVMCExpr::VK_RISCV_PCREL_HI ||
+ VK == RISCVMCExpr::VK_RISCV_GOT_HI ||
+ VK == RISCVMCExpr::VK_RISCV_TLS_GOT_HI ||
+ VK == RISCVMCExpr::VK_RISCV_TLS_GD_HI);
}
}
@@ -753,7 +814,7 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
default:
break;
case Match_Success:
- return processInstruction(Inst, IDLoc, Out);
+ return processInstruction(Inst, IDLoc, Operands, Out);
case Match_MissingFeature:
return Error(IDLoc, "instruction use requires an option to be enabled");
case Match_MnemonicFail:
@@ -844,8 +905,8 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
case Match_InvalidSImm12:
return generateImmOutOfRangeError(
Operands, ErrorInfo, -(1 << 11), (1 << 11) - 1,
- "operand must be a symbol with %lo/%pcrel_lo modifier or an integer in "
- "the range");
+ "operand must be a symbol with %lo/%pcrel_lo/%tprel_lo modifier or an "
+ "integer in the range");
case Match_InvalidSImm12Lsb0:
return generateImmOutOfRangeError(
Operands, ErrorInfo, -(1 << 11), (1 << 11) - 2,
@@ -856,13 +917,15 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
"immediate must be a multiple of 2 bytes in the range");
case Match_InvalidUImm20LUI:
return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 20) - 1,
- "operand must be a symbol with %hi() "
- "modifier or an integer in the range");
+ "operand must be a symbol with "
+ "%hi/%tprel_hi modifier or an integer in "
+ "the range");
case Match_InvalidUImm20AUIPC:
return generateImmOutOfRangeError(
Operands, ErrorInfo, 0, (1 << 20) - 1,
- "operand must be a symbol with %pcrel_hi() modifier or an integer in "
- "the range");
+ "operand must be a symbol with a "
+ "%pcrel_hi/%got_pcrel_hi/%tls_ie_pcrel_hi/%tls_gd_pcrel_hi modifier or "
+ "an integer in the range");
case Match_InvalidSImm21Lsb0JAL:
return generateImmOutOfRangeError(
Operands, ErrorInfo, -(1 << 20), (1 << 20) - 2,
@@ -888,11 +951,33 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc();
return Error(ErrorLoc, "operand must be a bare symbol name");
}
+ case Match_InvalidCallSymbol: {
+ SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc();
+ return Error(ErrorLoc, "operand must be a bare symbol name");
+ }
+ case Match_InvalidTPRelAddSymbol: {
+ SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc();
+ return Error(ErrorLoc, "operand must be a symbol with %tprel_add modifier");
+ }
}
llvm_unreachable("Unknown match type detected!");
}
+// Attempts to match Name as a register (either using the default name or
+// alternative ABI names), setting RegNo to the matching register. Upon
+// failure, returns true and sets RegNo to 0. If IsRV32E then registers
+// x16-x31 will be rejected.
+static bool matchRegisterNameHelper(bool IsRV32E, unsigned &RegNo,
+ StringRef Name) {
+ RegNo = MatchRegisterName(Name);
+ if (RegNo == 0)
+ RegNo = MatchRegisterAltName(Name);
+ if (IsRV32E && RegNo >= RISCV::X16 && RegNo <= RISCV::X31)
+ RegNo = 0;
+ return RegNo == 0;
+}
+
bool RISCVAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
SMLoc &EndLoc) {
const AsmToken &Tok = getParser().getTok();
@@ -901,42 +986,45 @@ bool RISCVAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
RegNo = 0;
StringRef Name = getLexer().getTok().getIdentifier();
- if (!MatchRegisterName(Name) || !MatchRegisterAltName(Name)) {
- getParser().Lex(); // Eat identifier token.
- return false;
- }
+ if (matchRegisterNameHelper(isRV32E(), RegNo, Name))
+ return Error(StartLoc, "invalid register name");
- return Error(StartLoc, "invalid register name");
+ getParser().Lex(); // Eat identifier token.
+ return false;
}
OperandMatchResultTy RISCVAsmParser::parseRegister(OperandVector &Operands,
bool AllowParens) {
SMLoc FirstS = getLoc();
bool HadParens = false;
- AsmToken Buf[2];
+ AsmToken LParen;
- // If this a parenthesised register name is allowed, parse it atomically
+ // If this is an LParen and a parenthesised register name is allowed, parse it
+ // atomically.
if (AllowParens && getLexer().is(AsmToken::LParen)) {
+ AsmToken Buf[2];
size_t ReadCount = getLexer().peekTokens(Buf);
if (ReadCount == 2 && Buf[1].getKind() == AsmToken::RParen) {
HadParens = true;
+ LParen = getParser().getTok();
getParser().Lex(); // Eat '('
}
}
switch (getLexer().getKind()) {
default:
+ if (HadParens)
+ getLexer().UnLex(LParen);
return MatchOperand_NoMatch;
case AsmToken::Identifier:
StringRef Name = getLexer().getTok().getIdentifier();
- unsigned RegNo = MatchRegisterName(Name);
+ unsigned RegNo;
+ matchRegisterNameHelper(isRV32E(), RegNo, Name);
+
if (RegNo == 0) {
- RegNo = MatchRegisterAltName(Name);
- if (RegNo == 0) {
- if (HadParens)
- getLexer().UnLex(Buf[0]);
- return MatchOperand_NoMatch;
- }
+ if (HadParens)
+ getLexer().UnLex(LParen);
+ return MatchOperand_NoMatch;
}
if (HadParens)
Operands.push_back(RISCVOperand::createToken("(", FirstS, isRV64()));
@@ -965,6 +1053,8 @@ RISCVAsmParser::parseCSRSystemRegister(OperandVector &Operands) {
case AsmToken::LParen:
case AsmToken::Minus:
case AsmToken::Plus:
+ case AsmToken::Exclaim:
+ case AsmToken::Tilde:
case AsmToken::Integer:
case AsmToken::String: {
if (getParser().parseExpression(Res))
@@ -1029,8 +1119,11 @@ OperandMatchResultTy RISCVAsmParser::parseImmediate(OperandVector &Operands) {
default:
return MatchOperand_NoMatch;
case AsmToken::LParen:
+ case AsmToken::Dot:
case AsmToken::Minus:
case AsmToken::Plus:
+ case AsmToken::Exclaim:
+ case AsmToken::Tilde:
case AsmToken::Integer:
case AsmToken::String:
case AsmToken::Identifier:
@@ -1094,11 +1187,54 @@ OperandMatchResultTy RISCVAsmParser::parseBareSymbol(OperandVector &Operands) {
return MatchOperand_NoMatch;
StringRef Identifier;
+ AsmToken Tok = getLexer().getTok();
+
+ if (getParser().parseIdentifier(Identifier))
+ return MatchOperand_ParseFail;
+
+ if (Identifier.consume_back("@plt")) {
+ Error(getLoc(), "'@plt' operand not valid for instruction");
+ return MatchOperand_ParseFail;
+ }
+
+ MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier);
+
+ if (Sym->isVariable()) {
+ const MCExpr *V = Sym->getVariableValue(/*SetUsed=*/false);
+ if (!isa<MCSymbolRefExpr>(V)) {
+ getLexer().UnLex(Tok); // Put back if it's not a bare symbol.
+ return MatchOperand_NoMatch;
+ }
+ Res = V;
+ } else
+ Res = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
+ Operands.push_back(RISCVOperand::createImm(Res, S, E, isRV64()));
+ return MatchOperand_Success;
+}
+
+OperandMatchResultTy RISCVAsmParser::parseCallSymbol(OperandVector &Operands) {
+ SMLoc S = getLoc();
+ SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
+ const MCExpr *Res;
+
+ if (getLexer().getKind() != AsmToken::Identifier)
+ return MatchOperand_NoMatch;
+
+ // Avoid parsing the register in `call rd, foo` as a call symbol.
+ if (getLexer().peekTok().getKind() != AsmToken::EndOfStatement)
+ return MatchOperand_NoMatch;
+
+ StringRef Identifier;
if (getParser().parseIdentifier(Identifier))
return MatchOperand_ParseFail;
+ RISCVMCExpr::VariantKind Kind = RISCVMCExpr::VK_RISCV_CALL;
+ if (Identifier.consume_back("@plt"))
+ Kind = RISCVMCExpr::VK_RISCV_CALL_PLT;
+
MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier);
Res = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
+ Res = RISCVMCExpr::create(Res, Kind, getContext());
Operands.push_back(RISCVOperand::createImm(Res, S, E, isRV64()));
return MatchOperand_Success;
}
@@ -1408,42 +1544,144 @@ void RISCVAsmParser::emitLoadImm(unsigned DestReg, int64_t Value,
}
}
-void RISCVAsmParser::emitLoadLocalAddress(MCInst &Inst, SMLoc IDLoc,
- MCStreamer &Out) {
- // The local load address pseudo-instruction "lla" is used in PC-relative
- // addressing of symbols:
- // lla rdest, symbol
- // expands to
- // TmpLabel: AUIPC rdest, %pcrel_hi(symbol)
- // ADDI rdest, %pcrel_lo(TmpLabel)
+void RISCVAsmParser::emitAuipcInstPair(MCOperand DestReg, MCOperand TmpReg,
+ const MCExpr *Symbol,
+ RISCVMCExpr::VariantKind VKHi,
+ unsigned SecondOpcode, SMLoc IDLoc,
+ MCStreamer &Out) {
+ // A pair of instructions for PC-relative addressing; expands to
+ // TmpLabel: AUIPC TmpReg, VKHi(symbol)
+ // OP DestReg, TmpReg, %pcrel_lo(TmpLabel)
MCContext &Ctx = getContext();
MCSymbol *TmpLabel = Ctx.createTempSymbol(
"pcrel_hi", /* AlwaysAddSuffix */ true, /* CanBeUnnamed */ false);
Out.EmitLabel(TmpLabel);
- MCOperand DestReg = Inst.getOperand(0);
- const RISCVMCExpr *Symbol = RISCVMCExpr::create(
- Inst.getOperand(1).getExpr(), RISCVMCExpr::VK_RISCV_PCREL_HI, Ctx);
-
+ const RISCVMCExpr *SymbolHi = RISCVMCExpr::create(Symbol, VKHi, Ctx);
emitToStreamer(
- Out, MCInstBuilder(RISCV::AUIPC).addOperand(DestReg).addExpr(Symbol));
+ Out, MCInstBuilder(RISCV::AUIPC).addOperand(TmpReg).addExpr(SymbolHi));
const MCExpr *RefToLinkTmpLabel =
RISCVMCExpr::create(MCSymbolRefExpr::create(TmpLabel, Ctx),
RISCVMCExpr::VK_RISCV_PCREL_LO, Ctx);
- emitToStreamer(Out, MCInstBuilder(RISCV::ADDI)
- .addOperand(DestReg)
+ emitToStreamer(Out, MCInstBuilder(SecondOpcode)
.addOperand(DestReg)
+ .addOperand(TmpReg)
.addExpr(RefToLinkTmpLabel));
}
+void RISCVAsmParser::emitLoadLocalAddress(MCInst &Inst, SMLoc IDLoc,
+ MCStreamer &Out) {
+ // The load local address pseudo-instruction "lla" is used in PC-relative
+ // addressing of local symbols:
+ // lla rdest, symbol
+ // expands to
+ // TmpLabel: AUIPC rdest, %pcrel_hi(symbol)
+ // ADDI rdest, rdest, %pcrel_lo(TmpLabel)
+ MCOperand DestReg = Inst.getOperand(0);
+ const MCExpr *Symbol = Inst.getOperand(1).getExpr();
+ emitAuipcInstPair(DestReg, DestReg, Symbol, RISCVMCExpr::VK_RISCV_PCREL_HI,
+ RISCV::ADDI, IDLoc, Out);
+}
+
+void RISCVAsmParser::emitLoadAddress(MCInst &Inst, SMLoc IDLoc,
+ MCStreamer &Out) {
+ // The load address pseudo-instruction "la" is used in PC-relative and
+ // GOT-indirect addressing of global symbols:
+ // la rdest, symbol
+ // expands to either (for non-PIC)
+ // TmpLabel: AUIPC rdest, %pcrel_hi(symbol)
+ // ADDI rdest, rdest, %pcrel_lo(TmpLabel)
+ // or (for PIC)
+ // TmpLabel: AUIPC rdest, %got_pcrel_hi(symbol)
+ // Lx rdest, %pcrel_lo(TmpLabel)(rdest)
+ MCOperand DestReg = Inst.getOperand(0);
+ const MCExpr *Symbol = Inst.getOperand(1).getExpr();
+ unsigned SecondOpcode;
+ RISCVMCExpr::VariantKind VKHi;
+ // FIXME: Should check .option (no)pic when implemented
+ if (getContext().getObjectFileInfo()->isPositionIndependent()) {
+ SecondOpcode = isRV64() ? RISCV::LD : RISCV::LW;
+ VKHi = RISCVMCExpr::VK_RISCV_GOT_HI;
+ } else {
+ SecondOpcode = RISCV::ADDI;
+ VKHi = RISCVMCExpr::VK_RISCV_PCREL_HI;
+ }
+ emitAuipcInstPair(DestReg, DestReg, Symbol, VKHi, SecondOpcode, IDLoc, Out);
+}
+
+void RISCVAsmParser::emitLoadTLSIEAddress(MCInst &Inst, SMLoc IDLoc,
+ MCStreamer &Out) {
+ // The load TLS IE address pseudo-instruction "la.tls.ie" is used in
+ // initial-exec TLS model addressing of global symbols:
+ // la.tls.ie rdest, symbol
+ // expands to
+ // TmpLabel: AUIPC rdest, %tls_ie_pcrel_hi(symbol)
+ // Lx rdest, %pcrel_lo(TmpLabel)(rdest)
+ MCOperand DestReg = Inst.getOperand(0);
+ const MCExpr *Symbol = Inst.getOperand(1).getExpr();
+ unsigned SecondOpcode = isRV64() ? RISCV::LD : RISCV::LW;
+ emitAuipcInstPair(DestReg, DestReg, Symbol, RISCVMCExpr::VK_RISCV_TLS_GOT_HI,
+ SecondOpcode, IDLoc, Out);
+}
+
+void RISCVAsmParser::emitLoadTLSGDAddress(MCInst &Inst, SMLoc IDLoc,
+ MCStreamer &Out) {
+ // The load TLS GD address pseudo-instruction "la.tls.gd" is used in
+ // global-dynamic TLS model addressing of global symbols:
+ // la.tls.gd rdest, symbol
+ // expands to
+ // TmpLabel: AUIPC rdest, %tls_gd_pcrel_hi(symbol)
+ // ADDI rdest, rdest, %pcrel_lo(TmpLabel)
+ MCOperand DestReg = Inst.getOperand(0);
+ const MCExpr *Symbol = Inst.getOperand(1).getExpr();
+ emitAuipcInstPair(DestReg, DestReg, Symbol, RISCVMCExpr::VK_RISCV_TLS_GD_HI,
+ RISCV::ADDI, IDLoc, Out);
+}
+
+void RISCVAsmParser::emitLoadStoreSymbol(MCInst &Inst, unsigned Opcode,
+ SMLoc IDLoc, MCStreamer &Out,
+ bool HasTmpReg) {
+ // The load/store pseudo-instruction does a pc-relative load with
+ // a symbol.
+ //
+ // The expansion looks like this
+ //
+ // TmpLabel: AUIPC tmp, %pcrel_hi(symbol)
+ // [S|L]X rd, %pcrel_lo(TmpLabel)(tmp)
+ MCOperand DestReg = Inst.getOperand(0);
+ unsigned SymbolOpIdx = HasTmpReg ? 2 : 1;
+ unsigned TmpRegOpIdx = HasTmpReg ? 1 : 0;
+ MCOperand TmpReg = Inst.getOperand(TmpRegOpIdx);
+ const MCExpr *Symbol = Inst.getOperand(SymbolOpIdx).getExpr();
+ emitAuipcInstPair(DestReg, TmpReg, Symbol, RISCVMCExpr::VK_RISCV_PCREL_HI,
+ Opcode, IDLoc, Out);
+}
+
+bool RISCVAsmParser::checkPseudoAddTPRel(MCInst &Inst,
+ OperandVector &Operands) {
+ assert(Inst.getOpcode() == RISCV::PseudoAddTPRel && "Invalid instruction");
+ assert(Inst.getOperand(2).isReg() && "Unexpected second operand kind");
+ if (Inst.getOperand(2).getReg() != RISCV::X4) {
+ SMLoc ErrorLoc = ((RISCVOperand &)*Operands[3]).getStartLoc();
+ return Error(ErrorLoc, "the second input operand must be tp/x4 when using "
+ "%tprel_add modifier");
+ }
+
+ return false;
+}
+
bool RISCVAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
+ OperandVector &Operands,
MCStreamer &Out) {
Inst.setLoc(IDLoc);
- if (Inst.getOpcode() == RISCV::PseudoLI) {
+ switch (Inst.getOpcode()) {
+ default:
+ break;
+ case RISCV::PseudoLI: {
unsigned Reg = Inst.getOperand(0).getReg();
const MCOperand &Op1 = Inst.getOperand(1);
if (Op1.isExpr()) {
@@ -1463,9 +1701,68 @@ bool RISCVAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
Imm = SignExtend64<32>(Imm);
emitLoadImm(Reg, Imm, Out);
return false;
- } else if (Inst.getOpcode() == RISCV::PseudoLLA) {
+ }
+ case RISCV::PseudoLLA:
emitLoadLocalAddress(Inst, IDLoc, Out);
return false;
+ case RISCV::PseudoLA:
+ emitLoadAddress(Inst, IDLoc, Out);
+ return false;
+ case RISCV::PseudoLA_TLS_IE:
+ emitLoadTLSIEAddress(Inst, IDLoc, Out);
+ return false;
+ case RISCV::PseudoLA_TLS_GD:
+ emitLoadTLSGDAddress(Inst, IDLoc, Out);
+ return false;
+ case RISCV::PseudoLB:
+ emitLoadStoreSymbol(Inst, RISCV::LB, IDLoc, Out, /*HasTmpReg=*/false);
+ return false;
+ case RISCV::PseudoLBU:
+ emitLoadStoreSymbol(Inst, RISCV::LBU, IDLoc, Out, /*HasTmpReg=*/false);
+ return false;
+ case RISCV::PseudoLH:
+ emitLoadStoreSymbol(Inst, RISCV::LH, IDLoc, Out, /*HasTmpReg=*/false);
+ return false;
+ case RISCV::PseudoLHU:
+ emitLoadStoreSymbol(Inst, RISCV::LHU, IDLoc, Out, /*HasTmpReg=*/false);
+ return false;
+ case RISCV::PseudoLW:
+ emitLoadStoreSymbol(Inst, RISCV::LW, IDLoc, Out, /*HasTmpReg=*/false);
+ return false;
+ case RISCV::PseudoLWU:
+ emitLoadStoreSymbol(Inst, RISCV::LWU, IDLoc, Out, /*HasTmpReg=*/false);
+ return false;
+ case RISCV::PseudoLD:
+ emitLoadStoreSymbol(Inst, RISCV::LD, IDLoc, Out, /*HasTmpReg=*/false);
+ return false;
+ case RISCV::PseudoFLW:
+ emitLoadStoreSymbol(Inst, RISCV::FLW, IDLoc, Out, /*HasTmpReg=*/true);
+ return false;
+ case RISCV::PseudoFLD:
+ emitLoadStoreSymbol(Inst, RISCV::FLD, IDLoc, Out, /*HasTmpReg=*/true);
+ return false;
+ case RISCV::PseudoSB:
+ emitLoadStoreSymbol(Inst, RISCV::SB, IDLoc, Out, /*HasTmpReg=*/true);
+ return false;
+ case RISCV::PseudoSH:
+ emitLoadStoreSymbol(Inst, RISCV::SH, IDLoc, Out, /*HasTmpReg=*/true);
+ return false;
+ case RISCV::PseudoSW:
+ emitLoadStoreSymbol(Inst, RISCV::SW, IDLoc, Out, /*HasTmpReg=*/true);
+ return false;
+ case RISCV::PseudoSD:
+ emitLoadStoreSymbol(Inst, RISCV::SD, IDLoc, Out, /*HasTmpReg=*/true);
+ return false;
+ case RISCV::PseudoFSW:
+ emitLoadStoreSymbol(Inst, RISCV::FSW, IDLoc, Out, /*HasTmpReg=*/true);
+ return false;
+ case RISCV::PseudoFSD:
+ emitLoadStoreSymbol(Inst, RISCV::FSD, IDLoc, Out, /*HasTmpReg=*/true);
+ return false;
+ case RISCV::PseudoAddTPRel:
+ if (checkPseudoAddTPRel(Inst, Operands))
+ return true;
+ break;
}
emitToStreamer(Out, Inst);
diff --git a/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
index eafa09d56315..36200c03f703 100644
--- a/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
+++ b/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
@@ -1,9 +1,8 @@
//===-- RISCVDisassembler.cpp - Disassembler for RISCV --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -12,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/RISCVMCTargetDesc.h"
+#include "TargetInfo/RISCVTargetInfo.h"
#include "Utils/RISCVBaseInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
@@ -70,7 +70,13 @@ static const unsigned GPRDecoderTable[] = {
static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
const void *Decoder) {
- if (RegNo > sizeof(GPRDecoderTable))
+ const FeatureBitset &FeatureBits =
+ static_cast<const MCDisassembler *>(Decoder)
+ ->getSubtargetInfo()
+ .getFeatureBits();
+ bool IsRV32E = FeatureBits[RISCV::FeatureRV32E];
+
+ if (RegNo > array_lengthof(GPRDecoderTable) || (IsRV32E && RegNo > 15))
return MCDisassembler::Fail;
// We must define our own mapping from RegNo to register identifier.
@@ -95,7 +101,7 @@ static const unsigned FPR32DecoderTable[] = {
static DecodeStatus DecodeFPR32RegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
const void *Decoder) {
- if (RegNo > sizeof(FPR32DecoderTable))
+ if (RegNo > array_lengthof(FPR32DecoderTable))
return MCDisassembler::Fail;
// We must define our own mapping from RegNo to register identifier.
@@ -131,7 +137,7 @@ static const unsigned FPR64DecoderTable[] = {
static DecodeStatus DecodeFPR64RegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
const void *Decoder) {
- if (RegNo > sizeof(FPR64DecoderTable))
+ if (RegNo > array_lengthof(FPR64DecoderTable))
return MCDisassembler::Fail;
// We must define our own mapping from RegNo to register identifier.
diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
index 7672fea5d95b..ee5f760ebcb0 100644
--- a/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
+++ b/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
@@ -1,9 +1,8 @@
//===-- RISCVAsmBackend.cpp - RISCV Assembler Backend ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -17,6 +16,7 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCValue.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
@@ -33,6 +33,10 @@ bool RISCVAsmBackend::shouldForceRelocation(const MCAssembler &Asm,
switch ((unsigned)Fixup.getKind()) {
default:
break;
+ case RISCV::fixup_riscv_got_hi20:
+ case RISCV::fixup_riscv_tls_got_hi20:
+ case RISCV::fixup_riscv_tls_gd_hi20:
+ return true;
case RISCV::fixup_riscv_pcrel_lo12_i:
case RISCV::fixup_riscv_pcrel_lo12_s:
// For pcrel_lo12, force a relocation if the target of the corresponding
@@ -48,6 +52,11 @@ bool RISCVAsmBackend::shouldForceRelocation(const MCAssembler &Asm,
default:
llvm_unreachable("Unexpected fixup kind for pcrel_lo12");
break;
+ case RISCV::fixup_riscv_got_hi20:
+ case RISCV::fixup_riscv_tls_got_hi20:
+ case RISCV::fixup_riscv_tls_gd_hi20:
+ ShouldForce = true;
+ break;
case RISCV::fixup_riscv_pcrel_hi20:
ShouldForce = T->getValue()->findAssociatedFragment() !=
Fixup.getValue()->findAssociatedFragment();
@@ -153,16 +162,12 @@ bool RISCVAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const {
return false;
// The canonical nop on RISC-V is addi x0, x0, 0.
- uint64_t Nop32Count = Count / 4;
- for (uint64_t i = Nop32Count; i != 0; --i)
+ for (; Count >= 4; Count -= 4)
OS.write("\x13\0\0\0", 4);
// The canonical nop on RVC is c.nop.
- if (HasStdExtC) {
- uint64_t Nop16Count = (Count - Nop32Count * 4) / 2;
- for (uint64_t i = Nop16Count; i != 0; --i)
- OS.write("\x01\0", 2);
- }
+ if (Count && HasStdExtC)
+ OS.write("\x01\0", 2);
return true;
}
@@ -173,6 +178,10 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
switch (Kind) {
default:
llvm_unreachable("Unknown fixup kind!");
+ case RISCV::fixup_riscv_got_hi20:
+ case RISCV::fixup_riscv_tls_got_hi20:
+ case RISCV::fixup_riscv_tls_gd_hi20:
+ llvm_unreachable("Relocation should be unconditionally forced\n");
case FK_Data_1:
case FK_Data_2:
case FK_Data_4:
@@ -180,12 +189,15 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
return Value;
case RISCV::fixup_riscv_lo12_i:
case RISCV::fixup_riscv_pcrel_lo12_i:
+ case RISCV::fixup_riscv_tprel_lo12_i:
return Value & 0xfff;
case RISCV::fixup_riscv_lo12_s:
case RISCV::fixup_riscv_pcrel_lo12_s:
+ case RISCV::fixup_riscv_tprel_lo12_s:
return (((Value >> 5) & 0x7f) << 25) | ((Value & 0x1f) << 7);
case RISCV::fixup_riscv_hi20:
case RISCV::fixup_riscv_pcrel_hi20:
+ case RISCV::fixup_riscv_tprel_hi20:
// Add 1 if bit 11 is 1, to compensate for low 12 bits being negative.
return ((Value + 0x800) >> 12) & 0xfffff;
case RISCV::fixup_riscv_jal: {
@@ -223,7 +235,8 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
Value = (Sbit << 31) | (Mid6 << 25) | (Lo4 << 8) | (Hi1 << 7);
return Value;
}
- case RISCV::fixup_riscv_call: {
+ case RISCV::fixup_riscv_call:
+ case RISCV::fixup_riscv_call_plt: {
// Jalr will add UpperImm with the sign-extended 12-bit LowerImm,
// we need to add 0x800ULL before extract upper bits to reflect the
// effect of the sign extension.
@@ -287,6 +300,60 @@ void RISCVAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
}
}
+// Linker relaxation may change code size. We have to insert Nops
+// for .align directive when linker relaxation enabled. So then Linker
+// could satisfy alignment by removing Nops.
+// The function return the total Nops Size we need to insert.
+bool RISCVAsmBackend::shouldInsertExtraNopBytesForCodeAlign(
+ const MCAlignFragment &AF, unsigned &Size) {
+ // Calculate Nops Size only when linker relaxation enabled.
+ if (!STI.getFeatureBits()[RISCV::FeatureRelax])
+ return false;
+
+ bool HasStdExtC = STI.getFeatureBits()[RISCV::FeatureStdExtC];
+ unsigned MinNopLen = HasStdExtC ? 2 : 4;
+
+ if (AF.getAlignment() <= MinNopLen) {
+ return false;
+ } else {
+ Size = AF.getAlignment() - MinNopLen;
+ return true;
+ }
+}
+
+// We need to insert R_RISCV_ALIGN relocation type to indicate the
+// position of Nops and the total bytes of the Nops have been inserted
+// when linker relaxation enabled.
+// The function insert fixup_riscv_align fixup which eventually will
+// transfer to R_RISCV_ALIGN relocation type.
+bool RISCVAsmBackend::shouldInsertFixupForCodeAlign(MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ MCAlignFragment &AF) {
+ // Insert the fixup only when linker relaxation enabled.
+ if (!STI.getFeatureBits()[RISCV::FeatureRelax])
+ return false;
+
+ // Calculate total Nops we need to insert. If there are none to insert
+ // then simply return.
+ unsigned Count;
+ if (!shouldInsertExtraNopBytesForCodeAlign(AF, Count) || (Count == 0))
+ return false;
+
+ MCContext &Ctx = Asm.getContext();
+ const MCExpr *Dummy = MCConstantExpr::create(0, Ctx);
+ // Create fixup_riscv_align fixup.
+ MCFixup Fixup =
+ MCFixup::create(0, Dummy, MCFixupKind(RISCV::fixup_riscv_align), SMLoc());
+
+ uint64_t FixedValue = 0;
+ MCValue NopBytes = MCValue::get(Count);
+
+ Asm.getWriter().recordRelocation(Asm, Layout, &AF, Fixup, NopBytes,
+ FixedValue);
+
+ return true;
+}
+
std::unique_ptr<MCObjectTargetWriter>
RISCVAsmBackend::createObjectTargetWriter() const {
return createRISCVELFObjectWriter(OSABI, Is64Bit);
@@ -298,5 +365,5 @@ MCAsmBackend *llvm::createRISCVAsmBackend(const Target &T,
const MCTargetOptions &Options) {
const Triple &TT = STI.getTargetTriple();
uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TT.getOS());
- return new RISCVAsmBackend(STI, OSABI, TT.isArch64Bit());
+ return new RISCVAsmBackend(STI, OSABI, TT.isArch64Bit(), Options);
}
diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h b/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h
index b98e45f4053f..254249c87dc8 100644
--- a/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h
+++ b/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h
@@ -1,9 +1,8 @@
//===-- RISCVAsmBackend.h - RISCV Assembler Backend -----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -12,6 +11,7 @@
#include "MCTargetDesc/RISCVFixupKinds.h"
#include "MCTargetDesc/RISCVMCTargetDesc.h"
+#include "Utils/RISCVBaseInfo.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCFixupKindInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
@@ -26,21 +26,45 @@ class RISCVAsmBackend : public MCAsmBackend {
uint8_t OSABI;
bool Is64Bit;
bool ForceRelocs = false;
+ const MCTargetOptions &TargetOptions;
+ RISCVABI::ABI TargetABI = RISCVABI::ABI_Unknown;
public:
- RISCVAsmBackend(const MCSubtargetInfo &STI, uint8_t OSABI, bool Is64Bit)
- : MCAsmBackend(support::little), STI(STI), OSABI(OSABI),
- Is64Bit(Is64Bit) {}
+ RISCVAsmBackend(const MCSubtargetInfo &STI, uint8_t OSABI, bool Is64Bit,
+ const MCTargetOptions &Options)
+ : MCAsmBackend(support::little), STI(STI), OSABI(OSABI), Is64Bit(Is64Bit),
+ TargetOptions(Options) {
+ TargetABI = RISCVABI::computeTargetABI(
+ STI.getTargetTriple(), STI.getFeatureBits(), Options.getABIName());
+ RISCVFeatures::validate(STI.getTargetTriple(), STI.getFeatureBits());
+ }
~RISCVAsmBackend() override {}
void setForceRelocs() { ForceRelocs = true; }
+ // Returns true if relocations will be forced for shouldForceRelocation by
+ // default. This will be true if relaxation is enabled or had previously
+ // been enabled.
+ bool willForceRelocations() const {
+ return ForceRelocs || STI.getFeatureBits()[RISCV::FeatureRelax];
+ }
+
// Generate diff expression relocations if the relax feature is enabled or had
// previously been enabled, otherwise it is safe for the assembler to
// calculate these internally.
bool requiresDiffExpressionRelocations() const override {
- return STI.getFeatureBits()[RISCV::FeatureRelax] || ForceRelocs;
+ return willForceRelocations();
}
+
+ // Return Size with extra Nop Bytes for alignment directive in code section.
+ bool shouldInsertExtraNopBytesForCodeAlign(const MCAlignFragment &AF,
+ unsigned &Size) override;
+
+ // Insert target specific fixup type for alignment directive in code section.
+ bool shouldInsertFixupForCodeAlign(MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ MCAlignFragment &AF) override;
+
void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
const MCValue &Target, MutableArrayRef<char> Data,
uint64_t Value, bool IsResolved,
@@ -80,12 +104,21 @@ public:
{ "fixup_riscv_pcrel_hi20", 12, 20, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_riscv_pcrel_lo12_i", 20, 12, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_riscv_pcrel_lo12_s", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_riscv_got_hi20", 12, 20, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_riscv_tprel_hi20", 12, 20, 0 },
+ { "fixup_riscv_tprel_lo12_i", 20, 12, 0 },
+ { "fixup_riscv_tprel_lo12_s", 0, 32, 0 },
+ { "fixup_riscv_tprel_add", 0, 0, 0 },
+ { "fixup_riscv_tls_got_hi20", 12, 20, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_riscv_tls_gd_hi20", 12, 20, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_riscv_jal", 12, 20, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_riscv_branch", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_riscv_rvc_jump", 2, 11, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_riscv_rvc_branch", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_riscv_call", 0, 64, MCFixupKindInfo::FKF_IsPCRel },
- { "fixup_riscv_relax", 0, 0, 0 }
+ { "fixup_riscv_call_plt", 0, 64, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_riscv_relax", 0, 0, 0 },
+ { "fixup_riscv_align", 0, 0, 0 }
};
static_assert((array_lengthof(Infos)) == RISCV::NumTargetFixupKinds,
"Not all fixup kinds added to Infos array");
@@ -107,6 +140,9 @@ public:
bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
+
+ const MCTargetOptions &getTargetOptions() const { return TargetOptions; }
+ RISCVABI::ABI getTargetABI() const { return TargetABI; }
};
}
diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp b/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp
index 9b88614aa693..3ccbc86d2619 100644
--- a/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp
+++ b/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp
@@ -1,9 +1,8 @@
//===-- RISCVELFObjectWriter.cpp - RISCV ELF Writer -----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -49,7 +48,42 @@ unsigned RISCVELFObjectWriter::getRelocType(MCContext &Ctx,
const MCFixup &Fixup,
bool IsPCRel) const {
// Determine the type of the relocation
- switch ((unsigned)Fixup.getKind()) {
+ unsigned Kind = Fixup.getKind();
+ if (IsPCRel) {
+ switch (Kind) {
+ default:
+ llvm_unreachable("invalid fixup kind!");
+ case FK_Data_4:
+ case FK_PCRel_4:
+ return ELF::R_RISCV_32_PCREL;
+ case RISCV::fixup_riscv_pcrel_hi20:
+ return ELF::R_RISCV_PCREL_HI20;
+ case RISCV::fixup_riscv_pcrel_lo12_i:
+ return ELF::R_RISCV_PCREL_LO12_I;
+ case RISCV::fixup_riscv_pcrel_lo12_s:
+ return ELF::R_RISCV_PCREL_LO12_S;
+ case RISCV::fixup_riscv_got_hi20:
+ return ELF::R_RISCV_GOT_HI20;
+ case RISCV::fixup_riscv_tls_got_hi20:
+ return ELF::R_RISCV_TLS_GOT_HI20;
+ case RISCV::fixup_riscv_tls_gd_hi20:
+ return ELF::R_RISCV_TLS_GD_HI20;
+ case RISCV::fixup_riscv_jal:
+ return ELF::R_RISCV_JAL;
+ case RISCV::fixup_riscv_branch:
+ return ELF::R_RISCV_BRANCH;
+ case RISCV::fixup_riscv_rvc_jump:
+ return ELF::R_RISCV_RVC_JUMP;
+ case RISCV::fixup_riscv_rvc_branch:
+ return ELF::R_RISCV_RVC_BRANCH;
+ case RISCV::fixup_riscv_call:
+ return ELF::R_RISCV_CALL;
+ case RISCV::fixup_riscv_call_plt:
+ return ELF::R_RISCV_CALL_PLT;
+ }
+ }
+
+ switch (Kind) {
default:
llvm_unreachable("invalid fixup kind!");
case FK_Data_4:
@@ -78,24 +112,18 @@ unsigned RISCVELFObjectWriter::getRelocType(MCContext &Ctx,
return ELF::R_RISCV_LO12_I;
case RISCV::fixup_riscv_lo12_s:
return ELF::R_RISCV_LO12_S;
- case RISCV::fixup_riscv_pcrel_hi20:
- return ELF::R_RISCV_PCREL_HI20;
- case RISCV::fixup_riscv_pcrel_lo12_i:
- return ELF::R_RISCV_PCREL_LO12_I;
- case RISCV::fixup_riscv_pcrel_lo12_s:
- return ELF::R_RISCV_PCREL_LO12_S;
- case RISCV::fixup_riscv_jal:
- return ELF::R_RISCV_JAL;
- case RISCV::fixup_riscv_branch:
- return ELF::R_RISCV_BRANCH;
- case RISCV::fixup_riscv_rvc_jump:
- return ELF::R_RISCV_RVC_JUMP;
- case RISCV::fixup_riscv_rvc_branch:
- return ELF::R_RISCV_RVC_BRANCH;
- case RISCV::fixup_riscv_call:
- return ELF::R_RISCV_CALL;
+ case RISCV::fixup_riscv_tprel_hi20:
+ return ELF::R_RISCV_TPREL_HI20;
+ case RISCV::fixup_riscv_tprel_lo12_i:
+ return ELF::R_RISCV_TPREL_LO12_I;
+ case RISCV::fixup_riscv_tprel_lo12_s:
+ return ELF::R_RISCV_TPREL_LO12_S;
+ case RISCV::fixup_riscv_tprel_add:
+ return ELF::R_RISCV_TPREL_ADD;
case RISCV::fixup_riscv_relax:
return ELF::R_RISCV_RELAX;
+ case RISCV::fixup_riscv_align:
+ return ELF::R_RISCV_ALIGN;
}
}
diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp b/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp
index a6ba1e41e964..40fa195f3790 100644
--- a/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp
+++ b/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp
@@ -1,9 +1,8 @@
//===-- RISCVELFStreamer.cpp - RISCV ELF Target Streamer Methods ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -12,7 +11,9 @@
//===----------------------------------------------------------------------===//
#include "RISCVELFStreamer.h"
+#include "MCTargetDesc/RISCVAsmBackend.h"
#include "RISCVMCTargetDesc.h"
+#include "Utils/RISCVBaseInfo.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCSubtargetInfo.h"
@@ -23,14 +24,35 @@ RISCVTargetELFStreamer::RISCVTargetELFStreamer(MCStreamer &S,
const MCSubtargetInfo &STI)
: RISCVTargetStreamer(S) {
MCAssembler &MCA = getStreamer().getAssembler();
-
const FeatureBitset &Features = STI.getFeatureBits();
+ auto &MAB = static_cast<RISCVAsmBackend &>(MCA.getBackend());
+ RISCVABI::ABI ABI = MAB.getTargetABI();
+ assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
unsigned EFlags = MCA.getELFHeaderEFlags();
if (Features[RISCV::FeatureStdExtC])
EFlags |= ELF::EF_RISCV_RVC;
+ switch (ABI) {
+ case RISCVABI::ABI_ILP32:
+ case RISCVABI::ABI_LP64:
+ break;
+ case RISCVABI::ABI_ILP32F:
+ case RISCVABI::ABI_LP64F:
+ EFlags |= ELF::EF_RISCV_FLOAT_ABI_SINGLE;
+ break;
+ case RISCVABI::ABI_ILP32D:
+ case RISCVABI::ABI_LP64D:
+ EFlags |= ELF::EF_RISCV_FLOAT_ABI_DOUBLE;
+ break;
+ case RISCVABI::ABI_ILP32E:
+ EFlags |= ELF::EF_RISCV_RVE;
+ break;
+ case RISCVABI::ABI_Unknown:
+ llvm_unreachable("Improperly initialised target ABI");
+ }
+
MCA.setELFHeaderEFlags(EFlags);
}
diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h b/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h
index 1f36bbc43882..138df786eaf3 100644
--- a/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h
+++ b/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h
@@ -1,9 +1,8 @@
//===-- RISCVELFStreamer.h - RISCV ELF Target Streamer ---------*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h b/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h
index 6a1224be774e..6c7933340608 100644
--- a/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h
+++ b/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h
@@ -1,9 +1,8 @@
//===-- RISCVFixupKinds.h - RISCV Specific Fixup Entries --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -35,6 +34,27 @@ enum Fixups {
// fixup_riscv_pcrel_lo12_s - 12-bit fixup corresponding to pcrel_lo(foo) for
// the S-type store instructions
fixup_riscv_pcrel_lo12_s,
+ // fixup_riscv_got_hi20 - 20-bit fixup corresponding to got_pcrel_hi(foo) for
+ // instructions like auipc
+ fixup_riscv_got_hi20,
+ // fixup_riscv_tprel_hi20 - 20-bit fixup corresponding to tprel_hi(foo) for
+ // instructions like lui
+ fixup_riscv_tprel_hi20,
+ // fixup_riscv_tprel_lo12_i - 12-bit fixup corresponding to tprel_lo(foo) for
+ // instructions like addi
+ fixup_riscv_tprel_lo12_i,
+ // fixup_riscv_tprel_lo12_s - 12-bit fixup corresponding to tprel_lo(foo) for
+ // the S-type store instructions
+ fixup_riscv_tprel_lo12_s,
+ // fixup_riscv_tprel_add - A fixup corresponding to %tprel_add(foo) for the
+ // add_tls instruction. Used to provide a hint to the linker.
+ fixup_riscv_tprel_add,
+ // fixup_riscv_tls_got_hi20 - 20-bit fixup corresponding to
+ // tls_ie_pcrel_hi(foo) for instructions like auipc
+ fixup_riscv_tls_got_hi20,
+ // fixup_riscv_tls_gd_hi20 - 20-bit fixup corresponding to
+ // tls_gd_pcrel_hi(foo) for instructions like auipc
+ fixup_riscv_tls_gd_hi20,
// fixup_riscv_jal - 20-bit fixup for symbol references in the jal
// instruction
fixup_riscv_jal,
@@ -50,9 +70,17 @@ enum Fixups {
// fixup_riscv_call - A fixup representing a call attached to the auipc
// instruction in a pair composed of adjacent auipc+jalr instructions.
fixup_riscv_call,
+ // fixup_riscv_call_plt - A fixup representing a procedure linkage table call
+ // attached to the auipc instruction in a pair composed of adjacent auipc+jalr
+ // instructions.
+ fixup_riscv_call_plt,
// fixup_riscv_relax - Used to generate an R_RISCV_RELAX relocation type,
// which indicates the linker may relax the instruction pair.
fixup_riscv_relax,
+ // fixup_riscv_align - Used to generate an R_RISCV_ALIGN relocation type,
+ // which indicates the linker should fixup the alignment after linker
+ // relaxation.
+ fixup_riscv_align,
// fixup_riscv_invalid - used as a sentinel and a marker, must be last fixup
fixup_riscv_invalid,
diff --git a/lib/Target/RISCV/InstPrinter/RISCVInstPrinter.cpp b/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
index 979c8f4e2fa7..fe37b70811d8 100644
--- a/lib/Target/RISCV/InstPrinter/RISCVInstPrinter.cpp
+++ b/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
@@ -1,9 +1,8 @@
//===-- RISCVInstPrinter.cpp - Convert RISCV MCInst to asm syntax ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/RISCV/InstPrinter/RISCVInstPrinter.h b/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h
index 0f9bed184996..5ca1d3fa20fe 100644
--- a/lib/Target/RISCV/InstPrinter/RISCVInstPrinter.h
+++ b/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h
@@ -1,9 +1,8 @@
//===-- RISCVInstPrinter.h - Convert RISCV MCInst to asm syntax ---*- C++ -*--//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -11,8 +10,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIB_TARGET_RISCV_INSTPRINTER_RISCVINSTPRINTER_H
-#define LLVM_LIB_TARGET_RISCV_INSTPRINTER_RISCVINSTPRINTER_H
+#ifndef LLVM_LIB_TARGET_RISCV_MCTARGETDESC_RISCVINSTPRINTER_H
+#define LLVM_LIB_TARGET_RISCV_MCTARGETDESC_RISCVINSTPRINTER_H
#include "MCTargetDesc/RISCVMCTargetDesc.h"
#include "llvm/MC/MCInstPrinter.h"
diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp b/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp
index 780dae410cd0..983629692883 100644
--- a/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp
+++ b/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp
@@ -1,9 +1,8 @@
//===-- RISCVMCAsmInfo.cpp - RISCV Asm properties -------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -22,6 +21,7 @@ RISCVMCAsmInfo::RISCVMCAsmInfo(const Triple &TT) {
CommentString = "#";
AlignmentIsInBytes = false;
SupportsDebugInformation = true;
+ ExceptionsType = ExceptionHandling::DwarfCFI;
Data16bitsDirective = "\t.half\t";
Data32bitsDirective = "\t.word\t";
}
diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.h b/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.h
index 901a1eba8af2..043fdb7c08c0 100644
--- a/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.h
+++ b/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.h
@@ -1,9 +1,8 @@
//===-- RISCVMCAsmInfo.h - RISCV Asm Info ----------------------*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp b/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
index c5a4ffc0e360..0fc775f63ed4 100644
--- a/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
+++ b/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
@@ -1,9 +1,8 @@
//===-- RISCVMCCodeEmitter.cpp - Convert RISCV code to machine code -------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -57,6 +56,10 @@ public:
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
+ void expandAddTPRel(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
/// TableGen'erated function for getting the binary encoding for an
/// instruction.
uint64_t getBinaryCodeForInstr(const MCInst &MI,
@@ -85,28 +88,34 @@ MCCodeEmitter *llvm::createRISCVMCCodeEmitter(const MCInstrInfo &MCII,
return new RISCVMCCodeEmitter(Ctx, MCII);
}
-// Expand PseudoCALL and PseudoTAIL to AUIPC and JALR with relocation types.
-// We expand PseudoCALL and PseudoTAIL while encoding, meaning AUIPC and JALR
-// won't go through RISCV MC to MC compressed instruction transformation. This
-// is acceptable because AUIPC has no 16-bit form and C_JALR have no immediate
-// operand field. We let linker relaxation deal with it. When linker
-// relaxation enabled, AUIPC and JALR have chance relax to JAL. If C extension
-// is enabled, JAL has chance relax to C_JAL.
+// Expand PseudoCALL(Reg) and PseudoTAIL to AUIPC and JALR with relocation
+// types. We expand PseudoCALL(Reg) and PseudoTAIL while encoding, meaning AUIPC
+// and JALR won't go through RISCV MC to MC compressed instruction
+// transformation. This is acceptable because AUIPC has no 16-bit form and
+// C_JALR have no immediate operand field. We let linker relaxation deal with
+// it. When linker relaxation enabled, AUIPC and JALR have chance relax to JAL.
+// If C extension is enabled, JAL has chance relax to C_JAL.
void RISCVMCCodeEmitter::expandFunctionCall(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
MCInst TmpInst;
- MCOperand Func = MI.getOperand(0);
- unsigned Ra = (MI.getOpcode() == RISCV::PseudoTAIL) ? RISCV::X6 : RISCV::X1;
+ MCOperand Func;
+ unsigned Ra;
+ if (MI.getOpcode() == RISCV::PseudoTAIL) {
+ Func = MI.getOperand(0);
+ Ra = RISCV::X6;
+ } else if (MI.getOpcode() == RISCV::PseudoCALLReg) {
+ Func = MI.getOperand(1);
+ Ra = MI.getOperand(0).getReg();
+ } else {
+ Func = MI.getOperand(0);
+ Ra = RISCV::X1;
+ }
uint32_t Binary;
assert(Func.isExpr() && "Expected expression");
- const MCExpr *Expr = Func.getExpr();
-
- // Create function call expression CallExpr for AUIPC.
- const MCExpr *CallExpr =
- RISCVMCExpr::create(Expr, RISCVMCExpr::VK_RISCV_CALL, Ctx);
+ const MCExpr *CallExpr = Func.getExpr();
// Emit AUIPC Ra, Func with R_RISCV_CALL relocation type.
TmpInst = MCInstBuilder(RISCV::AUIPC)
@@ -119,12 +128,50 @@ void RISCVMCCodeEmitter::expandFunctionCall(const MCInst &MI, raw_ostream &OS,
// Emit JALR X0, X6, 0
TmpInst = MCInstBuilder(RISCV::JALR).addReg(RISCV::X0).addReg(Ra).addImm(0);
else
- // Emit JALR X1, X1, 0
+ // Emit JALR Ra, Ra, 0
TmpInst = MCInstBuilder(RISCV::JALR).addReg(Ra).addReg(Ra).addImm(0);
Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI);
support::endian::write(OS, Binary, support::little);
}
+// Expand PseudoAddTPRel to a simple ADD with the correct relocation.
+void RISCVMCCodeEmitter::expandAddTPRel(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ MCOperand DestReg = MI.getOperand(0);
+ MCOperand SrcReg = MI.getOperand(1);
+ MCOperand TPReg = MI.getOperand(2);
+ assert(TPReg.isReg() && TPReg.getReg() == RISCV::X4 &&
+ "Expected thread pointer as second input to TP-relative add");
+
+ MCOperand SrcSymbol = MI.getOperand(3);
+ assert(SrcSymbol.isExpr() &&
+ "Expected expression as third input to TP-relative add");
+
+ const RISCVMCExpr *Expr = dyn_cast<RISCVMCExpr>(SrcSymbol.getExpr());
+ assert(Expr && Expr->getKind() == RISCVMCExpr::VK_RISCV_TPREL_ADD &&
+ "Expected tprel_add relocation on TP-relative symbol");
+
+ // Emit the correct tprel_add relocation for the symbol.
+ Fixups.push_back(MCFixup::create(
+ 0, Expr, MCFixupKind(RISCV::fixup_riscv_tprel_add), MI.getLoc()));
+
+ // Emit fixup_riscv_relax for tprel_add where the relax feature is enabled.
+ if (STI.getFeatureBits()[RISCV::FeatureRelax]) {
+ const MCConstantExpr *Dummy = MCConstantExpr::create(0, Ctx);
+ Fixups.push_back(MCFixup::create(
+ 0, Dummy, MCFixupKind(RISCV::fixup_riscv_relax), MI.getLoc()));
+ }
+
+ // Emit a normal ADD instruction with the given operands.
+ MCInst TmpInst = MCInstBuilder(RISCV::ADD)
+ .addOperand(DestReg)
+ .addOperand(SrcReg)
+ .addOperand(TPReg);
+ uint32_t Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI);
+ support::endian::write(OS, Binary, support::little);
+}
+
void RISCVMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
@@ -132,13 +179,20 @@ void RISCVMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
// Get byte count of instruction.
unsigned Size = Desc.getSize();
- if (MI.getOpcode() == RISCV::PseudoCALL ||
+ if (MI.getOpcode() == RISCV::PseudoCALLReg ||
+ MI.getOpcode() == RISCV::PseudoCALL ||
MI.getOpcode() == RISCV::PseudoTAIL) {
expandFunctionCall(MI, OS, Fixups, STI);
MCNumEmitted += 2;
return;
}
+ if (MI.getOpcode() == RISCV::PseudoAddTPRel) {
+ expandAddTPRel(MI, OS, Fixups, STI);
+ MCNumEmitted += 1;
+ return;
+ }
+
switch (Size) {
default:
llvm_unreachable("Unhandled encodeInstruction length!");
@@ -205,6 +259,7 @@ unsigned RISCVMCCodeEmitter::getImmOpValue(const MCInst &MI, unsigned OpNo,
const MCExpr *Expr = MO.getExpr();
MCExpr::ExprKind Kind = Expr->getKind();
RISCV::Fixups FixupKind = RISCV::fixup_riscv_invalid;
+ bool RelaxCandidate = false;
if (Kind == MCExpr::Target) {
const RISCVMCExpr *RVExpr = cast<RISCVMCExpr>(Expr);
@@ -212,6 +267,13 @@ unsigned RISCVMCCodeEmitter::getImmOpValue(const MCInst &MI, unsigned OpNo,
case RISCVMCExpr::VK_RISCV_None:
case RISCVMCExpr::VK_RISCV_Invalid:
llvm_unreachable("Unhandled fixup kind!");
+ case RISCVMCExpr::VK_RISCV_TPREL_ADD:
+ // tprel_add is only used to indicate that a relocation should be emitted
+ // for an add instruction used in TP-relative addressing. It should not be
+ // expanded as if representing an actual instruction operand and so to
+ // encounter it here is an error.
+ llvm_unreachable(
+ "VK_RISCV_TPREL_ADD should not represent an instruction operand");
case RISCVMCExpr::VK_RISCV_LO:
if (MIFrm == RISCVII::InstFormatI)
FixupKind = RISCV::fixup_riscv_lo12_i;
@@ -219,9 +281,11 @@ unsigned RISCVMCCodeEmitter::getImmOpValue(const MCInst &MI, unsigned OpNo,
FixupKind = RISCV::fixup_riscv_lo12_s;
else
llvm_unreachable("VK_RISCV_LO used with unexpected instruction format");
+ RelaxCandidate = true;
break;
case RISCVMCExpr::VK_RISCV_HI:
FixupKind = RISCV::fixup_riscv_hi20;
+ RelaxCandidate = true;
break;
case RISCVMCExpr::VK_RISCV_PCREL_LO:
if (MIFrm == RISCVII::InstFormatI)
@@ -231,12 +295,42 @@ unsigned RISCVMCCodeEmitter::getImmOpValue(const MCInst &MI, unsigned OpNo,
else
llvm_unreachable(
"VK_RISCV_PCREL_LO used with unexpected instruction format");
+ RelaxCandidate = true;
break;
case RISCVMCExpr::VK_RISCV_PCREL_HI:
FixupKind = RISCV::fixup_riscv_pcrel_hi20;
+ RelaxCandidate = true;
+ break;
+ case RISCVMCExpr::VK_RISCV_GOT_HI:
+ FixupKind = RISCV::fixup_riscv_got_hi20;
+ break;
+ case RISCVMCExpr::VK_RISCV_TPREL_LO:
+ if (MIFrm == RISCVII::InstFormatI)
+ FixupKind = RISCV::fixup_riscv_tprel_lo12_i;
+ else if (MIFrm == RISCVII::InstFormatS)
+ FixupKind = RISCV::fixup_riscv_tprel_lo12_s;
+ else
+ llvm_unreachable(
+ "VK_RISCV_TPREL_LO used with unexpected instruction format");
+ RelaxCandidate = true;
+ break;
+ case RISCVMCExpr::VK_RISCV_TPREL_HI:
+ FixupKind = RISCV::fixup_riscv_tprel_hi20;
+ RelaxCandidate = true;
+ break;
+ case RISCVMCExpr::VK_RISCV_TLS_GOT_HI:
+ FixupKind = RISCV::fixup_riscv_tls_got_hi20;
+ break;
+ case RISCVMCExpr::VK_RISCV_TLS_GD_HI:
+ FixupKind = RISCV::fixup_riscv_tls_gd_hi20;
break;
case RISCVMCExpr::VK_RISCV_CALL:
FixupKind = RISCV::fixup_riscv_call;
+ RelaxCandidate = true;
+ break;
+ case RISCVMCExpr::VK_RISCV_CALL_PLT:
+ FixupKind = RISCV::fixup_riscv_call_plt;
+ RelaxCandidate = true;
break;
}
} else if (Kind == MCExpr::SymbolRef &&
@@ -258,13 +352,15 @@ unsigned RISCVMCCodeEmitter::getImmOpValue(const MCInst &MI, unsigned OpNo,
MCFixup::create(0, Expr, MCFixupKind(FixupKind), MI.getLoc()));
++MCNumFixups;
- if (EnableRelax) {
- if (FixupKind == RISCV::fixup_riscv_call) {
- Fixups.push_back(
- MCFixup::create(0, Expr, MCFixupKind(RISCV::fixup_riscv_relax),
- MI.getLoc()));
- ++MCNumFixups;
- }
+ // Ensure an R_RISCV_RELAX relocation will be emitted if linker relaxation is
+ // enabled and the current fixup will result in a relocation that may be
+ // relaxed.
+ if (EnableRelax && RelaxCandidate) {
+ const MCConstantExpr *Dummy = MCConstantExpr::create(0, Ctx);
+ Fixups.push_back(
+ MCFixup::create(0, Dummy, MCFixupKind(RISCV::fixup_riscv_relax),
+ MI.getLoc()));
+ ++MCNumFixups;
}
return 0;
diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp b/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp
index 53648a5922c8..ae25ec818171 100644
--- a/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp
+++ b/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp
@@ -1,9 +1,8 @@
//===-- RISCVMCExpr.cpp - RISCV specific MC expression classes ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -12,9 +11,12 @@
//
//===----------------------------------------------------------------------===//
-#include "RISCV.h"
#include "RISCVMCExpr.h"
+#include "MCTargetDesc/RISCVAsmBackend.h"
+#include "RISCV.h"
#include "RISCVFixupKinds.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCStreamer.h"
@@ -32,11 +34,15 @@ const RISCVMCExpr *RISCVMCExpr::create(const MCExpr *Expr, VariantKind Kind,
}
void RISCVMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const {
- bool HasVariant =
- ((getKind() != VK_RISCV_None) && (getKind() != VK_RISCV_CALL));
+ VariantKind Kind = getKind();
+ bool HasVariant = ((Kind != VK_RISCV_None) && (Kind != VK_RISCV_CALL) &&
+ (Kind != VK_RISCV_CALL_PLT));
+
if (HasVariant)
OS << '%' << getVariantKindName(getKind()) << '(';
Expr->print(OS, MAI);
+ if (Kind == VK_RISCV_CALL_PLT)
+ OS << "@plt";
if (HasVariant)
OS << ')';
}
@@ -50,19 +56,30 @@ const MCFixup *RISCVMCExpr::getPCRelHiFixup() const {
if (!AUIPCSRE)
return nullptr;
- const auto *DF =
- dyn_cast_or_null<MCDataFragment>(AUIPCSRE->findAssociatedFragment());
+ const MCSymbol *AUIPCSymbol = &AUIPCSRE->getSymbol();
+ const auto *DF = dyn_cast_or_null<MCDataFragment>(AUIPCSymbol->getFragment());
+
if (!DF)
return nullptr;
- const MCSymbol *AUIPCSymbol = &AUIPCSRE->getSymbol();
+ uint64_t Offset = AUIPCSymbol->getOffset();
+ if (DF->getContents().size() == Offset) {
+ DF = dyn_cast_or_null<MCDataFragment>(DF->getNextNode());
+ if (!DF)
+ return nullptr;
+ Offset = 0;
+ }
+
for (const MCFixup &F : DF->getFixups()) {
- if (F.getOffset() != AUIPCSymbol->getOffset())
+ if (F.getOffset() != Offset)
continue;
switch ((unsigned)F.getKind()) {
default:
continue;
+ case RISCV::fixup_riscv_got_hi20:
+ case RISCV::fixup_riscv_tls_got_hi20:
+ case RISCV::fixup_riscv_tls_gd_hi20:
case RISCV::fixup_riscv_pcrel_hi20:
return &F;
}
@@ -79,6 +96,16 @@ bool RISCVMCExpr::evaluatePCRelLo(MCValue &Res, const MCAsmLayout *Layout,
// (<real target> + <offset from this fixup to the auipc fixup>). The Fixup
// is pcrel relative to the VK_RISCV_PCREL_LO fixup, so we need to add the
// offset to the VK_RISCV_PCREL_HI Fixup from VK_RISCV_PCREL_LO to correct.
+
+ // Don't try to evaluate if the fixup will be forced as a relocation (e.g.
+ // as linker relaxation is enabled). If we evaluated pcrel_lo in this case,
+ // the modified fixup will be converted into a relocation that no longer
+ // points to the pcrel_hi as the linker requires.
+ auto &RAB =
+ static_cast<RISCVAsmBackend &>(Layout->getAssembler().getBackend());
+ if (RAB.willForceRelocations())
+ return false;
+
MCValue AUIPCLoc;
if (!getSubExpr()->evaluateAsValue(AUIPCLoc, *Layout))
return false;
@@ -137,6 +164,12 @@ bool RISCVMCExpr::evaluateAsRelocatableImpl(MCValue &Res,
case VK_RISCV_HI:
case VK_RISCV_PCREL_LO:
case VK_RISCV_PCREL_HI:
+ case VK_RISCV_GOT_HI:
+ case VK_RISCV_TPREL_LO:
+ case VK_RISCV_TPREL_HI:
+ case VK_RISCV_TPREL_ADD:
+ case VK_RISCV_TLS_GOT_HI:
+ case VK_RISCV_TLS_GD_HI:
return false;
}
}
@@ -154,6 +187,12 @@ RISCVMCExpr::VariantKind RISCVMCExpr::getVariantKindForName(StringRef name) {
.Case("hi", VK_RISCV_HI)
.Case("pcrel_lo", VK_RISCV_PCREL_LO)
.Case("pcrel_hi", VK_RISCV_PCREL_HI)
+ .Case("got_pcrel_hi", VK_RISCV_GOT_HI)
+ .Case("tprel_lo", VK_RISCV_TPREL_LO)
+ .Case("tprel_hi", VK_RISCV_TPREL_HI)
+ .Case("tprel_add", VK_RISCV_TPREL_ADD)
+ .Case("tls_ie_pcrel_hi", VK_RISCV_TLS_GOT_HI)
+ .Case("tls_gd_pcrel_hi", VK_RISCV_TLS_GD_HI)
.Default(VK_RISCV_Invalid);
}
@@ -169,14 +208,71 @@ StringRef RISCVMCExpr::getVariantKindName(VariantKind Kind) {
return "pcrel_lo";
case VK_RISCV_PCREL_HI:
return "pcrel_hi";
+ case VK_RISCV_GOT_HI:
+ return "got_pcrel_hi";
+ case VK_RISCV_TPREL_LO:
+ return "tprel_lo";
+ case VK_RISCV_TPREL_HI:
+ return "tprel_hi";
+ case VK_RISCV_TPREL_ADD:
+ return "tprel_add";
+ case VK_RISCV_TLS_GOT_HI:
+ return "tls_ie_pcrel_hi";
+ case VK_RISCV_TLS_GD_HI:
+ return "tls_gd_pcrel_hi";
}
}
+static void fixELFSymbolsInTLSFixupsImpl(const MCExpr *Expr, MCAssembler &Asm) {
+ switch (Expr->getKind()) {
+ case MCExpr::Target:
+ llvm_unreachable("Can't handle nested target expression");
+ break;
+ case MCExpr::Constant:
+ break;
+
+ case MCExpr::Binary: {
+ const MCBinaryExpr *BE = cast<MCBinaryExpr>(Expr);
+ fixELFSymbolsInTLSFixupsImpl(BE->getLHS(), Asm);
+ fixELFSymbolsInTLSFixupsImpl(BE->getRHS(), Asm);
+ break;
+ }
+
+ case MCExpr::SymbolRef: {
+ // We're known to be under a TLS fixup, so any symbol should be
+ // modified. There should be only one.
+ const MCSymbolRefExpr &SymRef = *cast<MCSymbolRefExpr>(Expr);
+ cast<MCSymbolELF>(SymRef.getSymbol()).setType(ELF::STT_TLS);
+ break;
+ }
+
+ case MCExpr::Unary:
+ fixELFSymbolsInTLSFixupsImpl(cast<MCUnaryExpr>(Expr)->getSubExpr(), Asm);
+ break;
+ }
+}
+
+void RISCVMCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {
+ switch (getKind()) {
+ default:
+ return;
+ case VK_RISCV_TPREL_HI:
+ case VK_RISCV_TLS_GOT_HI:
+ case VK_RISCV_TLS_GD_HI:
+ break;
+ }
+
+ fixELFSymbolsInTLSFixupsImpl(getSubExpr(), Asm);
+}
+
bool RISCVMCExpr::evaluateAsConstant(int64_t &Res) const {
MCValue Value;
if (Kind == VK_RISCV_PCREL_HI || Kind == VK_RISCV_PCREL_LO ||
- Kind == VK_RISCV_CALL)
+ Kind == VK_RISCV_GOT_HI || Kind == VK_RISCV_TPREL_HI ||
+ Kind == VK_RISCV_TPREL_LO || Kind == VK_RISCV_TPREL_ADD ||
+ Kind == VK_RISCV_TLS_GOT_HI || Kind == VK_RISCV_TLS_GD_HI ||
+ Kind == VK_RISCV_CALL || Kind == VK_RISCV_CALL_PLT)
return false;
if (!getSubExpr()->evaluateAsRelocatable(Value, nullptr, nullptr))
diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.h b/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.h
index 4eafcc08b51f..b5a292dc1b1a 100644
--- a/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.h
+++ b/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.h
@@ -1,9 +1,8 @@
//===-- RISCVMCExpr.h - RISCV specific MC expression classes ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -29,7 +28,14 @@ public:
VK_RISCV_HI,
VK_RISCV_PCREL_LO,
VK_RISCV_PCREL_HI,
+ VK_RISCV_GOT_HI,
+ VK_RISCV_TPREL_LO,
+ VK_RISCV_TPREL_HI,
+ VK_RISCV_TPREL_ADD,
+ VK_RISCV_TLS_GOT_HI,
+ VK_RISCV_TLS_GD_HI,
VK_RISCV_CALL,
+ VK_RISCV_CALL_PLT,
VK_RISCV_Invalid
};
@@ -53,11 +59,11 @@ public:
const MCExpr *getSubExpr() const { return Expr; }
- /// Get the MCExpr of the VK_RISCV_PCREL_HI Fixup that the
- /// VK_RISCV_PCREL_LO points to.
+ /// Get the corresponding PC-relative HI fixup that a VK_RISCV_PCREL_LO
+ /// points to.
///
/// \returns nullptr if this isn't a VK_RISCV_PCREL_LO pointing to a
- /// VK_RISCV_PCREL_HI.
+ /// known PC-relative HI fixup.
const MCFixup *getPCRelHiFixup() const;
void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override;
@@ -68,8 +74,7 @@ public:
return getSubExpr()->findAssociatedFragment();
}
- // There are no TLS RISCVMCExprs at the moment.
- void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override {}
+ void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override;
bool evaluateAsConstant(int64_t &Res) const;
diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp b/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
index 133f3cd3d39a..bc45262ab2de 100644
--- a/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
+++ b/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
@@ -1,9 +1,8 @@
//===-- RISCVMCTargetDesc.cpp - RISCV Target Descriptions -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -12,10 +11,11 @@
//===----------------------------------------------------------------------===//
#include "RISCVMCTargetDesc.h"
-#include "InstPrinter/RISCVInstPrinter.h"
#include "RISCVELFStreamer.h"
+#include "RISCVInstPrinter.h"
#include "RISCVMCAsmInfo.h"
#include "RISCVTargetStreamer.h"
+#include "TargetInfo/RISCVTargetInfo.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCInstrInfo.h"
@@ -50,7 +50,13 @@ static MCRegisterInfo *createRISCVMCRegisterInfo(const Triple &TT) {
static MCAsmInfo *createRISCVMCAsmInfo(const MCRegisterInfo &MRI,
const Triple &TT) {
- return new RISCVMCAsmInfo(TT);
+ MCAsmInfo *MAI = new RISCVMCAsmInfo(TT);
+
+ unsigned SP = MRI.getDwarfRegNum(RISCV::X2, true);
+ MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(nullptr, SP, 0);
+ MAI->addInitialFrameState(Inst);
+
+ return MAI;
}
static MCSubtargetInfo *createRISCVMCSubtargetInfo(const Triple &TT,
diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h b/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h
index 0228253c08cb..b30997533ddf 100644
--- a/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h
+++ b/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h
@@ -1,9 +1,8 @@
//===-- RISCVMCTargetDesc.h - RISCV Target Descriptions ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -33,9 +32,6 @@ class Triple;
class raw_ostream;
class raw_pwrite_stream;
-Target &getTheRISCV32Target();
-Target &getTheRISCV64Target();
-
MCCodeEmitter *createRISCVMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
MCContext &Ctx);
diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp b/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp
index 8d5ef3dbd17f..913e1f744192 100644
--- a/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp
+++ b/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp
@@ -1,9 +1,8 @@
//===-- RISCVTargetStreamer.cpp - RISCV Target Streamer Methods -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h b/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h
index 74ec9e303933..1becc134b2a2 100644
--- a/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h
+++ b/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h
@@ -1,9 +1,8 @@
//===-- RISCVTargetStreamer.h - RISCV Target Streamer ----------*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/RISCV/RISCV.h b/lib/Target/RISCV/RISCV.h
index b25aee46200d..834a1d171143 100644
--- a/lib/Target/RISCV/RISCV.h
+++ b/lib/Target/RISCV/RISCV.h
@@ -1,9 +1,8 @@
//===-- RISCV.h - Top-level interface for RISCV -----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/RISCV/RISCV.td b/lib/Target/RISCV/RISCV.td
index 0e86e2bc5e98..e19b70b8e709 100644
--- a/lib/Target/RISCV/RISCV.td
+++ b/lib/Target/RISCV/RISCV.td
@@ -1,9 +1,8 @@
//===-- RISCV.td - Describe the RISCV Target Machine -------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -55,23 +54,29 @@ def IsRV32 : Predicate<"!Subtarget->is64Bit()">,
def RV64 : HwMode<"+64bit">;
def RV32 : HwMode<"-64bit">;
+def FeatureRV32E
+ : SubtargetFeature<"e", "IsRV32E", "true",
+ "Implements RV32E (provides 16 rather than 32 GPRs)">;
+def IsRV32E : Predicate<"Subtarget->isRV32E()">,
+ AssemblerPredicate<"FeatureRV32E">;
+
def FeatureRelax
: SubtargetFeature<"relax", "EnableLinkerRelax", "true",
"Enable Linker relaxation.">;
//===----------------------------------------------------------------------===//
-// Registers, calling conventions, instruction descriptions.
+// Named operands for CSR instructions.
//===----------------------------------------------------------------------===//
-include "RISCVRegisterInfo.td"
-include "RISCVCallingConv.td"
-include "RISCVInstrInfo.td"
+include "RISCVSystemOperands.td"
//===----------------------------------------------------------------------===//
-// Named operands for CSR instructions.
+// Registers, calling conventions, instruction descriptions.
//===----------------------------------------------------------------------===//
-include "RISCVSystemOperands.td"
+include "RISCVRegisterInfo.td"
+include "RISCVCallingConv.td"
+include "RISCVInstrInfo.td"
//===----------------------------------------------------------------------===//
// RISC-V processors supported.
diff --git a/lib/Target/RISCV/RISCVAsmPrinter.cpp b/lib/Target/RISCV/RISCVAsmPrinter.cpp
index bdf8e5d840b3..57631dcb5115 100644
--- a/lib/Target/RISCV/RISCVAsmPrinter.cpp
+++ b/lib/Target/RISCV/RISCVAsmPrinter.cpp
@@ -1,9 +1,8 @@
//===-- RISCVAsmPrinter.cpp - RISCV LLVM assembly writer ------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -13,9 +12,10 @@
//===----------------------------------------------------------------------===//
#include "RISCV.h"
-#include "InstPrinter/RISCVInstPrinter.h"
+#include "MCTargetDesc/RISCVInstPrinter.h"
#include "MCTargetDesc/RISCVMCExpr.h"
#include "RISCVTargetMachine.h"
+#include "TargetInfo/RISCVTargetInfo.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -43,11 +43,9 @@ public:
void EmitInstruction(const MachineInstr *MI) override;
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
- unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &OS) override;
+ const char *ExtraCode, raw_ostream &OS) override;
bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
- unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &OS) override;
+ const char *ExtraCode, raw_ostream &OS) override;
void EmitToStreamer(MCStreamer &S, const MCInst &Inst);
bool emitPseudoExpansionLowering(MCStreamer &OutStreamer,
@@ -84,39 +82,50 @@ void RISCVAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
bool RISCVAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
- unsigned AsmVariant,
const char *ExtraCode, raw_ostream &OS) {
- if (AsmVariant != 0)
- report_fatal_error("There are no defined alternate asm variants");
-
// First try the generic code, which knows about modifiers like 'c' and 'n'.
- if (!AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, OS))
+ if (!AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, OS))
return false;
- if (!ExtraCode) {
- const MachineOperand &MO = MI->getOperand(OpNo);
- switch (MO.getType()) {
- case MachineOperand::MO_Immediate:
- OS << MO.getImm();
- return false;
- case MachineOperand::MO_Register:
- OS << RISCVInstPrinter::getRegisterName(MO.getReg());
- return false;
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0)
+ return true; // Unknown modifier.
+
+ switch (ExtraCode[0]) {
default:
+ return true; // Unknown modifier.
+ case 'z': // Print zero register if zero, regular printing otherwise.
+ if (MO.isImm() && MO.getImm() == 0) {
+ OS << RISCVInstPrinter::getRegisterName(RISCV::X0);
+ return false;
+ }
break;
+ case 'i': // Literal 'i' if operand is not a register.
+ if (!MO.isReg())
+ OS << 'i';
+ return false;
}
}
+ switch (MO.getType()) {
+ case MachineOperand::MO_Immediate:
+ OS << MO.getImm();
+ return false;
+ case MachineOperand::MO_Register:
+ OS << RISCVInstPrinter::getRegisterName(MO.getReg());
+ return false;
+ default:
+ break;
+ }
+
return true;
}
bool RISCVAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
- unsigned OpNo, unsigned AsmVariant,
+ unsigned OpNo,
const char *ExtraCode,
raw_ostream &OS) {
- if (AsmVariant != 0)
- report_fatal_error("There are no defined alternate asm variants");
-
if (!ExtraCode) {
const MachineOperand &MO = MI->getOperand(OpNo);
// For now, we only support register memory operands in registers and
@@ -128,7 +137,7 @@ bool RISCVAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
return false;
}
- return AsmPrinter::PrintAsmMemoryOperand(MI, OpNo, AsmVariant, ExtraCode, OS);
+ return AsmPrinter::PrintAsmMemoryOperand(MI, OpNo, ExtraCode, OS);
}
// Force static initialization.
diff --git a/lib/Target/RISCV/RISCVCallingConv.td b/lib/Target/RISCV/RISCVCallingConv.td
index ef146258c383..db13e6e8beca 100644
--- a/lib/Target/RISCV/RISCVCallingConv.td
+++ b/lib/Target/RISCV/RISCVCallingConv.td
@@ -1,9 +1,8 @@
//===-- RISCVCallingConv.td - Calling Conventions RISCV ----*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -14,7 +13,16 @@
// The RISC-V calling convention is handled with custom code in
// RISCVISelLowering.cpp (CC_RISCV).
-def CSR : CalleeSavedRegs<(add X1, X3, X4, X8, X9, (sequence "X%u", 18, 27))>;
+def CSR_ILP32_LP64
+ : CalleeSavedRegs<(add X1, X3, X4, X8, X9, (sequence "X%u", 18, 27))>;
+
+def CSR_ILP32F_LP64F
+ : CalleeSavedRegs<(add CSR_ILP32_LP64,
+ F8_32, F9_32, (sequence "F%u_32", 18, 27))>;
+
+def CSR_ILP32D_LP64D
+ : CalleeSavedRegs<(add CSR_ILP32_LP64,
+ F8_64, F9_64, (sequence "F%u_64", 18, 27))>;
// Needed for implementation of RISCVRegisterInfo::getNoPreservedMask()
def CSR_NoRegs : CalleeSavedRegs<(add)>;
diff --git a/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
index 35c185aa5edd..1c5171a7b7a4 100644
--- a/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
+++ b/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
@@ -1,9 +1,8 @@
//===-- RISCVExpandPseudoInsts.cpp - Expand pseudo instructions -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -55,6 +54,22 @@ private:
bool expandAtomicCmpXchg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, bool IsMasked,
int Width, MachineBasicBlock::iterator &NextMBBI);
+ bool expandAuipcInstPair(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ MachineBasicBlock::iterator &NextMBBI,
+ unsigned FlagsHi, unsigned SecondOpcode);
+ bool expandLoadLocalAddress(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ MachineBasicBlock::iterator &NextMBBI);
+ bool expandLoadAddress(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ MachineBasicBlock::iterator &NextMBBI);
+ bool expandLoadTLSIEAddress(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ MachineBasicBlock::iterator &NextMBBI);
+ bool expandLoadTLSGDAddress(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ MachineBasicBlock::iterator &NextMBBI);
};
char RISCVExpandPseudo::ID = 0;
@@ -87,6 +102,9 @@ bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB,
case RISCV::PseudoAtomicLoadNand32:
return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, false, 32,
NextMBBI);
+ case RISCV::PseudoAtomicLoadNand64:
+ return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, false, 64,
+ NextMBBI);
case RISCV::PseudoMaskedAtomicSwap32:
return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xchg, true, 32,
NextMBBI);
@@ -111,8 +129,18 @@ bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB,
NextMBBI);
case RISCV::PseudoCmpXchg32:
return expandAtomicCmpXchg(MBB, MBBI, false, 32, NextMBBI);
+ case RISCV::PseudoCmpXchg64:
+ return expandAtomicCmpXchg(MBB, MBBI, false, 64, NextMBBI);
case RISCV::PseudoMaskedCmpXchg32:
return expandAtomicCmpXchg(MBB, MBBI, true, 32, NextMBBI);
+ case RISCV::PseudoLLA:
+ return expandLoadLocalAddress(MBB, MBBI, NextMBBI);
+ case RISCV::PseudoLA:
+ return expandLoadAddress(MBB, MBBI, NextMBBI);
+ case RISCV::PseudoLA_TLS_IE:
+ return expandLoadTLSIEAddress(MBB, MBBI, NextMBBI);
+ case RISCV::PseudoLA_TLS_GD:
+ return expandLoadTLSGDAddress(MBB, MBBI, NextMBBI);
}
return false;
@@ -152,12 +180,61 @@ static unsigned getSCForRMW32(AtomicOrdering Ordering) {
}
}
+static unsigned getLRForRMW64(AtomicOrdering Ordering) {
+ switch (Ordering) {
+ default:
+ llvm_unreachable("Unexpected AtomicOrdering");
+ case AtomicOrdering::Monotonic:
+ return RISCV::LR_D;
+ case AtomicOrdering::Acquire:
+ return RISCV::LR_D_AQ;
+ case AtomicOrdering::Release:
+ return RISCV::LR_D;
+ case AtomicOrdering::AcquireRelease:
+ return RISCV::LR_D_AQ;
+ case AtomicOrdering::SequentiallyConsistent:
+ return RISCV::LR_D_AQ_RL;
+ }
+}
+
+static unsigned getSCForRMW64(AtomicOrdering Ordering) {
+ switch (Ordering) {
+ default:
+ llvm_unreachable("Unexpected AtomicOrdering");
+ case AtomicOrdering::Monotonic:
+ return RISCV::SC_D;
+ case AtomicOrdering::Acquire:
+ return RISCV::SC_D;
+ case AtomicOrdering::Release:
+ return RISCV::SC_D_RL;
+ case AtomicOrdering::AcquireRelease:
+ return RISCV::SC_D_RL;
+ case AtomicOrdering::SequentiallyConsistent:
+ return RISCV::SC_D_AQ_RL;
+ }
+}
+
+static unsigned getLRForRMW(AtomicOrdering Ordering, int Width) {
+ if (Width == 32)
+ return getLRForRMW32(Ordering);
+ if (Width == 64)
+ return getLRForRMW64(Ordering);
+ llvm_unreachable("Unexpected LR width\n");
+}
+
+static unsigned getSCForRMW(AtomicOrdering Ordering, int Width) {
+ if (Width == 32)
+ return getSCForRMW32(Ordering);
+ if (Width == 64)
+ return getSCForRMW64(Ordering);
+ llvm_unreachable("Unexpected SC width\n");
+}
+
static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI,
DebugLoc DL, MachineBasicBlock *ThisMBB,
MachineBasicBlock *LoopMBB,
MachineBasicBlock *DoneMBB,
AtomicRMWInst::BinOp BinOp, int Width) {
- assert(Width == 32 && "RV64 atomic expansion currently unsupported");
unsigned DestReg = MI.getOperand(0).getReg();
unsigned ScratchReg = MI.getOperand(1).getReg();
unsigned AddrReg = MI.getOperand(2).getReg();
@@ -166,11 +243,11 @@ static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI,
static_cast<AtomicOrdering>(MI.getOperand(4).getImm());
// .loop:
- // lr.w dest, (addr)
+ // lr.[w|d] dest, (addr)
// binop scratch, dest, val
- // sc.w scratch, scratch, (addr)
+ // sc.[w|d] scratch, scratch, (addr)
// bnez scratch, loop
- BuildMI(LoopMBB, DL, TII->get(getLRForRMW32(Ordering)), DestReg)
+ BuildMI(LoopMBB, DL, TII->get(getLRForRMW(Ordering, Width)), DestReg)
.addReg(AddrReg);
switch (BinOp) {
default:
@@ -184,7 +261,7 @@ static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI,
.addImm(-1);
break;
}
- BuildMI(LoopMBB, DL, TII->get(getSCForRMW32(Ordering)), ScratchReg)
+ BuildMI(LoopMBB, DL, TII->get(getSCForRMW(Ordering, Width)), ScratchReg)
.addReg(AddrReg)
.addReg(ScratchReg);
BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
@@ -219,7 +296,7 @@ static void doMaskedAtomicBinOpExpansion(
const RISCVInstrInfo *TII, MachineInstr &MI, DebugLoc DL,
MachineBasicBlock *ThisMBB, MachineBasicBlock *LoopMBB,
MachineBasicBlock *DoneMBB, AtomicRMWInst::BinOp BinOp, int Width) {
- assert(Width == 32 && "RV64 atomic expansion currently unsupported");
+ assert(Width == 32 && "Should never need to expand masked 64-bit operations");
unsigned DestReg = MI.getOperand(0).getReg();
unsigned ScratchReg = MI.getOperand(1).getReg();
unsigned AddrReg = MI.getOperand(2).getReg();
@@ -333,7 +410,7 @@ bool RISCVExpandPseudo::expandAtomicMinMaxOp(
MachineBasicBlock::iterator &NextMBBI) {
assert(IsMasked == true &&
"Should only need to expand masked atomic max/min");
- assert(Width == 32 && "RV64 atomic expansion currently unsupported");
+ assert(Width == 32 && "Should never need to expand masked 64-bit operations");
MachineInstr &MI = *MBBI;
DebugLoc DL = MI.getDebugLoc();
@@ -451,7 +528,6 @@ bool RISCVExpandPseudo::expandAtomicMinMaxOp(
bool RISCVExpandPseudo::expandAtomicCmpXchg(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool IsMasked,
int Width, MachineBasicBlock::iterator &NextMBBI) {
- assert(Width == 32 && "RV64 atomic expansion currently unsupported");
MachineInstr &MI = *MBBI;
DebugLoc DL = MI.getDebugLoc();
MachineFunction *MF = MBB.getParent();
@@ -483,18 +559,18 @@ bool RISCVExpandPseudo::expandAtomicCmpXchg(
if (!IsMasked) {
// .loophead:
- // lr.w dest, (addr)
+ // lr.[w|d] dest, (addr)
// bne dest, cmpval, done
- BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW32(Ordering)), DestReg)
+ BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW(Ordering, Width)), DestReg)
.addReg(AddrReg);
BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BNE))
.addReg(DestReg)
.addReg(CmpValReg)
.addMBB(DoneMBB);
// .looptail:
- // sc.w scratch, newval, (addr)
+ // sc.[w|d] scratch, newval, (addr)
// bnez scratch, loophead
- BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW32(Ordering)), ScratchReg)
+ BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(Ordering, Width)), ScratchReg)
.addReg(AddrReg)
.addReg(NewValReg);
BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))
@@ -507,7 +583,7 @@ bool RISCVExpandPseudo::expandAtomicCmpXchg(
// and scratch, dest, mask
// bne scratch, cmpval, done
unsigned MaskReg = MI.getOperand(5).getReg();
- BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW32(Ordering)), DestReg)
+ BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW(Ordering, Width)), DestReg)
.addReg(AddrReg);
BuildMI(LoopHeadMBB, DL, TII->get(RISCV::AND), ScratchReg)
.addReg(DestReg)
@@ -525,7 +601,7 @@ bool RISCVExpandPseudo::expandAtomicCmpXchg(
// bnez scratch, loophead
insertMaskedMerge(TII, DL, LoopTailMBB, ScratchReg, DestReg, NewValReg,
MaskReg, ScratchReg);
- BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW32(Ordering)), ScratchReg)
+ BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(Ordering, Width)), ScratchReg)
.addReg(AddrReg)
.addReg(ScratchReg);
BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))
@@ -545,6 +621,90 @@ bool RISCVExpandPseudo::expandAtomicCmpXchg(
return true;
}
+bool RISCVExpandPseudo::expandAuipcInstPair(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ MachineBasicBlock::iterator &NextMBBI, unsigned FlagsHi,
+ unsigned SecondOpcode) {
+ MachineFunction *MF = MBB.getParent();
+ MachineInstr &MI = *MBBI;
+ DebugLoc DL = MI.getDebugLoc();
+
+ unsigned DestReg = MI.getOperand(0).getReg();
+ const MachineOperand &Symbol = MI.getOperand(1);
+
+ MachineBasicBlock *NewMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+
+ // Tell AsmPrinter that we unconditionally want the symbol of this label to be
+ // emitted.
+ NewMBB->setLabelMustBeEmitted();
+
+ MF->insert(++MBB.getIterator(), NewMBB);
+
+ BuildMI(NewMBB, DL, TII->get(RISCV::AUIPC), DestReg)
+ .addDisp(Symbol, 0, FlagsHi);
+ BuildMI(NewMBB, DL, TII->get(SecondOpcode), DestReg)
+ .addReg(DestReg)
+ .addMBB(NewMBB, RISCVII::MO_PCREL_LO);
+
+ // Move all the rest of the instructions to NewMBB.
+ NewMBB->splice(NewMBB->end(), &MBB, std::next(MBBI), MBB.end());
+ // Update machine-CFG edges.
+ NewMBB->transferSuccessorsAndUpdatePHIs(&MBB);
+ // Make the original basic block fall-through to the new.
+ MBB.addSuccessor(NewMBB);
+
+ // Make sure live-ins are correctly attached to this new basic block.
+ LivePhysRegs LiveRegs;
+ computeAndAddLiveIns(LiveRegs, *NewMBB);
+
+ NextMBBI = MBB.end();
+ MI.eraseFromParent();
+ return true;
+}
+
+bool RISCVExpandPseudo::expandLoadLocalAddress(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ MachineBasicBlock::iterator &NextMBBI) {
+ return expandAuipcInstPair(MBB, MBBI, NextMBBI, RISCVII::MO_PCREL_HI,
+ RISCV::ADDI);
+}
+
+bool RISCVExpandPseudo::expandLoadAddress(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ MachineBasicBlock::iterator &NextMBBI) {
+ MachineFunction *MF = MBB.getParent();
+
+ unsigned SecondOpcode;
+ unsigned FlagsHi;
+ if (MF->getTarget().isPositionIndependent()) {
+ const auto &STI = MF->getSubtarget<RISCVSubtarget>();
+ SecondOpcode = STI.is64Bit() ? RISCV::LD : RISCV::LW;
+ FlagsHi = RISCVII::MO_GOT_HI;
+ } else {
+ SecondOpcode = RISCV::ADDI;
+ FlagsHi = RISCVII::MO_PCREL_HI;
+ }
+ return expandAuipcInstPair(MBB, MBBI, NextMBBI, FlagsHi, SecondOpcode);
+}
+
+bool RISCVExpandPseudo::expandLoadTLSIEAddress(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ MachineBasicBlock::iterator &NextMBBI) {
+ MachineFunction *MF = MBB.getParent();
+
+ const auto &STI = MF->getSubtarget<RISCVSubtarget>();
+ unsigned SecondOpcode = STI.is64Bit() ? RISCV::LD : RISCV::LW;
+ return expandAuipcInstPair(MBB, MBBI, NextMBBI, RISCVII::MO_TLS_GOT_HI,
+ SecondOpcode);
+}
+
+bool RISCVExpandPseudo::expandLoadTLSGDAddress(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ MachineBasicBlock::iterator &NextMBBI) {
+ return expandAuipcInstPair(MBB, MBBI, NextMBBI, RISCVII::MO_TLS_GD_HI,
+ RISCV::ADDI);
+}
+
} // end of anonymous namespace
INITIALIZE_PASS(RISCVExpandPseudo, "riscv-expand-pseudo",
diff --git a/lib/Target/RISCV/RISCVFrameLowering.cpp b/lib/Target/RISCV/RISCVFrameLowering.cpp
index 74417899c8da..32c3b9684d2c 100644
--- a/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -1,9 +1,8 @@
//===-- RISCVFrameLowering.cpp - RISCV Frame Information ------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -19,6 +18,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/MC/MCDwarf.h"
using namespace llvm;
@@ -97,6 +97,8 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
MachineFrameInfo &MFI = MF.getFrameInfo();
auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
+ const RISCVRegisterInfo *RI = STI.getRegisterInfo();
+ const RISCVInstrInfo *TII = STI.getInstrInfo();
MachineBasicBlock::iterator MBBI = MBB.begin();
unsigned FPReg = getFPReg(STI);
@@ -120,6 +122,12 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
// Allocate space on the stack if necessary.
adjustReg(MBB, MBBI, DL, SPReg, SPReg, -StackSize, MachineInstr::FrameSetup);
+ // Emit ".cfi_def_cfa_offset StackSize"
+ unsigned CFIIndex = MF.addFrameInst(
+ MCCFIInstruction::createDefCfaOffset(nullptr, -StackSize));
+ BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+
// The frame pointer is callee-saved, and code has been generated for us to
// save it to the stack. We need to skip over the storing of callee-saved
// registers as the frame pointer must be modified after it has been saved
@@ -129,10 +137,28 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
std::advance(MBBI, CSI.size());
+ // Iterate over list of callee-saved registers and emit .cfi_offset
+ // directives.
+ for (const auto &Entry : CSI) {
+ int64_t Offset = MFI.getObjectOffset(Entry.getFrameIdx());
+ unsigned Reg = Entry.getReg();
+ unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
+ nullptr, RI->getDwarfRegNum(Reg, true), Offset));
+ BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ }
+
// Generate new FP.
- if (hasFP(MF))
+ if (hasFP(MF)) {
adjustReg(MBB, MBBI, DL, FPReg, SPReg,
StackSize - RVFI->getVarArgsSaveSize(), MachineInstr::FrameSetup);
+
+ // Emit ".cfi_def_cfa $fp, 0"
+ unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa(
+ nullptr, RI->getDwarfRegNum(FPReg, true), 0));
+ BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ }
}
void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
@@ -142,6 +168,7 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
MachineFrameInfo &MFI = MF.getFrameInfo();
auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
DebugLoc DL = MBBI->getDebugLoc();
+ const RISCVInstrInfo *TII = STI.getInstrInfo();
unsigned FPReg = getFPReg(STI);
unsigned SPReg = getSPReg(STI);
@@ -151,19 +178,58 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
auto LastFrameDestroy = std::prev(MBBI, MFI.getCalleeSavedInfo().size());
uint64_t StackSize = MFI.getStackSize();
+ uint64_t FPOffset = StackSize - RVFI->getVarArgsSaveSize();
// Restore the stack pointer using the value of the frame pointer. Only
// necessary if the stack pointer was modified, meaning the stack size is
// unknown.
if (RI->needsStackRealignment(MF) || MFI.hasVarSizedObjects()) {
assert(hasFP(MF) && "frame pointer should not have been eliminated");
- adjustReg(MBB, LastFrameDestroy, DL, SPReg, FPReg,
- -StackSize + RVFI->getVarArgsSaveSize(),
+ adjustReg(MBB, LastFrameDestroy, DL, SPReg, FPReg, -FPOffset,
MachineInstr::FrameDestroy);
}
+ if (hasFP(MF)) {
+ // To find the instruction restoring FP from stack.
+ for (auto &I = LastFrameDestroy; I != MBBI; ++I) {
+ if (I->mayLoad() && I->getOperand(0).isReg()) {
+ unsigned DestReg = I->getOperand(0).getReg();
+ if (DestReg == FPReg) {
+ // If there is frame pointer, after restoring $fp registers, we
+ // need adjust CFA to ($sp - FPOffset).
+ // Emit ".cfi_def_cfa $sp, -FPOffset"
+ unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa(
+ nullptr, RI->getDwarfRegNum(SPReg, true), -FPOffset));
+ BuildMI(MBB, std::next(I), DL,
+ TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ break;
+ }
+ }
+ }
+ }
+
+ // Add CFI directives for callee-saved registers.
+ const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
+ // Iterate over list of callee-saved registers and emit .cfi_restore
+ // directives.
+ for (const auto &Entry : CSI) {
+ unsigned Reg = Entry.getReg();
+ unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createRestore(
+ nullptr, RI->getDwarfRegNum(Reg, true)));
+ BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ }
+
// Deallocate stack
adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackSize, MachineInstr::FrameDestroy);
+
+ // After restoring $sp, we need to adjust CFA to $(sp + 0)
+ // Emit ".cfi_def_cfa_offset 0"
+ unsigned CFIIndex =
+ MF.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 0));
+ BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
}
int RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF,
diff --git a/lib/Target/RISCV/RISCVFrameLowering.h b/lib/Target/RISCV/RISCVFrameLowering.h
index ca653c2b9f17..0e045c3ff853 100644
--- a/lib/Target/RISCV/RISCVFrameLowering.h
+++ b/lib/Target/RISCV/RISCVFrameLowering.h
@@ -1,9 +1,8 @@
//===-- RISCVFrameLowering.h - Define frame lowering for RISCV -*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index aa80365feb83..d0a3af375a6d 100644
--- a/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -1,9 +1,8 @@
//===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISCV ------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -156,7 +155,15 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
return;
}
}
+ break;
}
+ case RISCVISD::READ_CYCLE_WIDE:
+ assert(!Subtarget->is64Bit() && "READ_CYCLE_WIDE is only used on riscv32");
+
+ ReplaceNode(Node, CurDAG->getMachineNode(RISCV::ReadCycleWide, DL, MVT::i32,
+ MVT::i32, MVT::Other,
+ Node->getOperand(0)));
+ return;
}
// Select the default instruction.
diff --git a/lib/Target/RISCV/RISCVISelLowering.cpp b/lib/Target/RISCV/RISCVISelLowering.cpp
index 508dcbd009ed..ce7b85911ab6 100644
--- a/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1,9 +1,8 @@
//===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -18,6 +17,8 @@
#include "RISCVRegisterInfo.h"
#include "RISCVSubtarget.h"
#include "RISCVTargetMachine.h"
+#include "Utils/RISCVMatInt.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -43,6 +44,24 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
const RISCVSubtarget &STI)
: TargetLowering(TM), Subtarget(STI) {
+ if (Subtarget.isRV32E())
+ report_fatal_error("Codegen not yet implemented for RV32E");
+
+ RISCVABI::ABI ABI = Subtarget.getTargetABI();
+ assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
+
+ switch (ABI) {
+ default:
+ report_fatal_error("Don't know how to lower this ABI");
+ case RISCVABI::ABI_ILP32:
+ case RISCVABI::ABI_ILP32F:
+ case RISCVABI::ABI_ILP32D:
+ case RISCVABI::ABI_LP64:
+ case RISCVABI::ABI_LP64F:
+ case RISCVABI::ABI_LP64D:
+ break;
+ }
+
MVT XLenVT = Subtarget.getXLenVT();
// Set up the register classes.
@@ -81,10 +100,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
if (Subtarget.is64Bit()) {
- setTargetDAGCombine(ISD::SHL);
- setTargetDAGCombine(ISD::SRL);
- setTargetDAGCombine(ISD::SRA);
- setTargetDAGCombine(ISD::ANY_EXTEND);
+ setOperationAction(ISD::SHL, MVT::i32, Custom);
+ setOperationAction(ISD::SRA, MVT::i32, Custom);
+ setOperationAction(ISD::SRL, MVT::i32, Custom);
}
if (!Subtarget.hasStdExtM()) {
@@ -97,14 +115,20 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::UREM, XLenVT, Expand);
}
+ if (Subtarget.is64Bit() && Subtarget.hasStdExtM()) {
+ setOperationAction(ISD::SDIV, MVT::i32, Custom);
+ setOperationAction(ISD::UDIV, MVT::i32, Custom);
+ setOperationAction(ISD::UREM, MVT::i32, Custom);
+ }
+
setOperationAction(ISD::SDIVREM, XLenVT, Expand);
setOperationAction(ISD::UDIVREM, XLenVT, Expand);
setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand);
setOperationAction(ISD::UMUL_LOHI, XLenVT, Expand);
- setOperationAction(ISD::SHL_PARTS, XLenVT, Expand);
- setOperationAction(ISD::SRL_PARTS, XLenVT, Expand);
- setOperationAction(ISD::SRA_PARTS, XLenVT, Expand);
+ setOperationAction(ISD::SHL_PARTS, XLenVT, Custom);
+ setOperationAction(ISD::SRL_PARTS, XLenVT, Custom);
+ setOperationAction(ISD::SRA_PARTS, XLenVT, Custom);
setOperationAction(ISD::ROTL, XLenVT, Expand);
setOperationAction(ISD::ROTR, XLenVT, Expand);
@@ -114,9 +138,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::CTPOP, XLenVT, Expand);
ISD::CondCode FPCCToExtend[] = {
- ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETO, ISD::SETUEQ,
- ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE,
- ISD::SETGT, ISD::SETGE, ISD::SETNE};
+ ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
+ ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT,
+ ISD::SETGE, ISD::SETNE};
ISD::NodeType FPOpToExtend[] = {
ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM};
@@ -133,6 +157,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(Op, MVT::f32, Expand);
}
+ if (Subtarget.hasStdExtF() && Subtarget.is64Bit())
+ setOperationAction(ISD::BITCAST, MVT::i32, Custom);
+
if (Subtarget.hasStdExtD()) {
setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
@@ -151,6 +178,13 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BlockAddress, XLenVT, Custom);
setOperationAction(ISD::ConstantPool, XLenVT, Custom);
+ setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom);
+
+ // TODO: On M-mode only targets, the cycle[h] CSR may not be present.
+ // Unfortunately this can't be determined just from the ISA naming string.
+ setOperationAction(ISD::READCYCLECOUNTER, MVT::i64,
+ Subtarget.is64Bit() ? Legal : Custom);
+
if (Subtarget.hasStdExtA()) {
setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
setMinCmpXchgSizeInBits(32);
@@ -276,6 +310,11 @@ bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const {
return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
}
+bool RISCVTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
+ return (VT == MVT::f32 && Subtarget.hasStdExtF()) ||
+ (VT == MVT::f64 && Subtarget.hasStdExtD());
+}
+
// Changes the condition code and swaps operands if necessary, so the SetCC
// operation matches one of the comparisons supported directly in the RISC-V
// ISA.
@@ -326,6 +365,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return lowerBlockAddress(Op, DAG);
case ISD::ConstantPool:
return lowerConstantPool(Op, DAG);
+ case ISD::GlobalTLSAddress:
+ return lowerGlobalTLSAddress(Op, DAG);
case ISD::SELECT:
return lowerSELECT(Op, DAG);
case ISD::VASTART:
@@ -334,6 +375,81 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return lowerFRAMEADDR(Op, DAG);
case ISD::RETURNADDR:
return lowerRETURNADDR(Op, DAG);
+ case ISD::SHL_PARTS:
+ return lowerShiftLeftParts(Op, DAG);
+ case ISD::SRA_PARTS:
+ return lowerShiftRightParts(Op, DAG, true);
+ case ISD::SRL_PARTS:
+ return lowerShiftRightParts(Op, DAG, false);
+ case ISD::BITCAST: {
+ assert(Subtarget.is64Bit() && Subtarget.hasStdExtF() &&
+ "Unexpected custom legalisation");
+ SDLoc DL(Op);
+ SDValue Op0 = Op.getOperand(0);
+ if (Op.getValueType() != MVT::f32 || Op0.getValueType() != MVT::i32)
+ return SDValue();
+ SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
+ SDValue FPConv = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
+ return FPConv;
+ }
+ }
+}
+
+static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty,
+ SelectionDAG &DAG, unsigned Flags) {
+ return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
+}
+
+static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty,
+ SelectionDAG &DAG, unsigned Flags) {
+ return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
+ Flags);
+}
+
+static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty,
+ SelectionDAG &DAG, unsigned Flags) {
+ return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlignment(),
+ N->getOffset(), Flags);
+}
+
+template <class NodeTy>
+SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
+ bool IsLocal) const {
+ SDLoc DL(N);
+ EVT Ty = getPointerTy(DAG.getDataLayout());
+
+ if (isPositionIndependent()) {
+ SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
+ if (IsLocal)
+ // Use PC-relative addressing to access the symbol. This generates the
+ // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
+ // %pcrel_lo(auipc)).
+ return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);
+
+ // Use PC-relative addressing to access the GOT for this symbol, then load
+ // the address from the GOT. This generates the pattern (PseudoLA sym),
+ // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
+ return SDValue(DAG.getMachineNode(RISCV::PseudoLA, DL, Ty, Addr), 0);
+ }
+
+ switch (getTargetMachine().getCodeModel()) {
+ default:
+ report_fatal_error("Unsupported code model for lowering");
+ case CodeModel::Small: {
+ // Generate a sequence for accessing addresses within the first 2 GiB of
+ // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
+ SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
+ SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
+ SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);
+ return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, AddrLo), 0);
+ }
+ case CodeModel::Medium: {
+ // Generate a sequence for accessing addresses within any 2GiB range within
+ // the address space. This generates the pattern (PseudoLLA sym), which
+ // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
+ SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
+ return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);
+ }
}
}
@@ -342,67 +458,145 @@ SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
SDLoc DL(Op);
EVT Ty = Op.getValueType();
GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
- const GlobalValue *GV = N->getGlobal();
int64_t Offset = N->getOffset();
MVT XLenVT = Subtarget.getXLenVT();
- if (isPositionIndependent())
- report_fatal_error("Unable to lowerGlobalAddress");
+ const GlobalValue *GV = N->getGlobal();
+ bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);
+ SDValue Addr = getAddr(N, DAG, IsLocal);
+
// In order to maximise the opportunity for common subexpression elimination,
// emit a separate ADD node for the global address offset instead of folding
// it in the global address node. Later peephole optimisations may choose to
// fold it back in when profitable.
- SDValue GAHi = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_HI);
- SDValue GALo = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_LO);
- SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, GAHi), 0);
- SDValue MNLo =
- SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, GALo), 0);
if (Offset != 0)
- return DAG.getNode(ISD::ADD, DL, Ty, MNLo,
+ return DAG.getNode(ISD::ADD, DL, Ty, Addr,
DAG.getConstant(Offset, DL, XLenVT));
- return MNLo;
+ return Addr;
}
SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
SelectionDAG &DAG) const {
- SDLoc DL(Op);
- EVT Ty = Op.getValueType();
BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
- const BlockAddress *BA = N->getBlockAddress();
- int64_t Offset = N->getOffset();
-
- if (isPositionIndependent())
- report_fatal_error("Unable to lowerBlockAddress");
- SDValue BAHi = DAG.getTargetBlockAddress(BA, Ty, Offset, RISCVII::MO_HI);
- SDValue BALo = DAG.getTargetBlockAddress(BA, Ty, Offset, RISCVII::MO_LO);
- SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, BAHi), 0);
- SDValue MNLo =
- SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, BALo), 0);
- return MNLo;
+ return getAddr(N, DAG);
}
SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
SelectionDAG &DAG) const {
+ ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
+
+ return getAddr(N, DAG);
+}
+
+SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
+ SelectionDAG &DAG,
+ bool UseGOT) const {
+ SDLoc DL(N);
+ EVT Ty = getPointerTy(DAG.getDataLayout());
+ const GlobalValue *GV = N->getGlobal();
+ MVT XLenVT = Subtarget.getXLenVT();
+
+ if (UseGOT) {
+ // Use PC-relative addressing to access the GOT for this TLS symbol, then
+ // load the address from the GOT and add the thread pointer. This generates
+ // the pattern (PseudoLA_TLS_IE sym), which expands to
+ // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
+ SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
+ SDValue Load =
+ SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
+
+ // Add the thread pointer.
+ SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
+ return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
+ }
+
+ // Generate a sequence for accessing the address relative to the thread
+ // pointer, with the appropriate adjustment for the thread pointer offset.
+ // This generates the pattern
+ // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
+ SDValue AddrHi =
+ DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_HI);
+ SDValue AddrAdd =
+ DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_ADD);
+ SDValue AddrLo =
+ DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_LO);
+
+ SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);
+ SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
+ SDValue MNAdd = SDValue(
+ DAG.getMachineNode(RISCV::PseudoAddTPRel, DL, Ty, MNHi, TPReg, AddrAdd),
+ 0);
+ return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNAdd, AddrLo), 0);
+}
+
+SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
+ SelectionDAG &DAG) const {
+ SDLoc DL(N);
+ EVT Ty = getPointerTy(DAG.getDataLayout());
+ IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
+ const GlobalValue *GV = N->getGlobal();
+
+ // Use a PC-relative addressing mode to access the global dynamic GOT address.
+ // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
+ // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
+ SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
+ SDValue Load =
+ SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
+
+ // Prepare argument list to generate call.
+ ArgListTy Args;
+ ArgListEntry Entry;
+ Entry.Node = Load;
+ Entry.Ty = CallTy;
+ Args.push_back(Entry);
+
+ // Setup call to __tls_get_addr.
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(DL)
+ .setChain(DAG.getEntryNode())
+ .setLibCallee(CallingConv::C, CallTy,
+ DAG.getExternalSymbol("__tls_get_addr", Ty),
+ std::move(Args));
+
+ return LowerCallTo(CLI).first;
+}
+
+SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
+ SelectionDAG &DAG) const {
SDLoc DL(Op);
EVT Ty = Op.getValueType();
- ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
- const Constant *CPA = N->getConstVal();
+ GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
int64_t Offset = N->getOffset();
- unsigned Alignment = N->getAlignment();
-
- if (!isPositionIndependent()) {
- SDValue CPAHi =
- DAG.getTargetConstantPool(CPA, Ty, Alignment, Offset, RISCVII::MO_HI);
- SDValue CPALo =
- DAG.getTargetConstantPool(CPA, Ty, Alignment, Offset, RISCVII::MO_LO);
- SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, CPAHi), 0);
- SDValue MNLo =
- SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, CPALo), 0);
- return MNLo;
- } else {
- report_fatal_error("Unable to lowerConstantPool");
+ MVT XLenVT = Subtarget.getXLenVT();
+
+ // Non-PIC TLS lowering should always use the LocalExec model.
+ TLSModel::Model Model = isPositionIndependent()
+ ? getTargetMachine().getTLSModel(N->getGlobal())
+ : TLSModel::LocalExec;
+
+ SDValue Addr;
+ switch (Model) {
+ case TLSModel::LocalExec:
+ Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
+ break;
+ case TLSModel::InitialExec:
+ Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
+ break;
+ case TLSModel::LocalDynamic:
+ case TLSModel::GeneralDynamic:
+ Addr = getDynamicTLSAddr(N, DAG);
+ break;
}
+
+ // In order to maximise the opportunity for common subexpression elimination,
+ // emit a separate ADD node for the global address offset instead of folding
+ // it in the global address node. Later peephole optimisations may choose to
+ // fold it back in when profitable.
+ if (Offset != 0)
+ return DAG.getNode(ISD::ADD, DL, Ty, Addr,
+ DAG.getConstant(Offset, DL, XLenVT));
+ return Addr;
}
SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
@@ -513,29 +707,184 @@ SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
}
-// Return true if the given node is a shift with a non-constant shift amount.
-static bool isVariableShift(SDValue Val) {
- switch (Val.getOpcode()) {
+SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ SDValue Lo = Op.getOperand(0);
+ SDValue Hi = Op.getOperand(1);
+ SDValue Shamt = Op.getOperand(2);
+ EVT VT = Lo.getValueType();
+
+ // if Shamt-XLEN < 0: // Shamt < XLEN
+ // Lo = Lo << Shamt
+ // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
+ // else:
+ // Lo = 0
+ // Hi = Lo << (Shamt-XLEN)
+
+ SDValue Zero = DAG.getConstant(0, DL, VT);
+ SDValue One = DAG.getConstant(1, DL, VT);
+ SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
+ SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
+ SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
+ SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
+
+ SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
+ SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
+ SDValue ShiftRightLo =
+ DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
+ SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
+ SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
+ SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
+
+ SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
+
+ Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
+ Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
+
+ SDValue Parts[2] = {Lo, Hi};
+ return DAG.getMergeValues(Parts, DL);
+}
+
+SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
+ bool IsSRA) const {
+ SDLoc DL(Op);
+ SDValue Lo = Op.getOperand(0);
+ SDValue Hi = Op.getOperand(1);
+ SDValue Shamt = Op.getOperand(2);
+ EVT VT = Lo.getValueType();
+
+ // SRA expansion:
+ // if Shamt-XLEN < 0: // Shamt < XLEN
+ // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
+ // Hi = Hi >>s Shamt
+ // else:
+ // Lo = Hi >>s (Shamt-XLEN);
+ // Hi = Hi >>s (XLEN-1)
+ //
+ // SRL expansion:
+ // if Shamt-XLEN < 0: // Shamt < XLEN
+ // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
+ // Hi = Hi >>u Shamt
+ // else:
+ // Lo = Hi >>u (Shamt-XLEN);
+ // Hi = 0;
+
+ unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
+
+ SDValue Zero = DAG.getConstant(0, DL, VT);
+ SDValue One = DAG.getConstant(1, DL, VT);
+ SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
+ SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
+ SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
+ SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
+
+ SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
+ SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
+ SDValue ShiftLeftHi =
+ DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
+ SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
+ SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
+ SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
+ SDValue HiFalse =
+ IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
+
+ SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
+
+ Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
+ Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
+
+ SDValue Parts[2] = {Lo, Hi};
+ return DAG.getMergeValues(Parts, DL);
+}
+
+// Returns the opcode of the target-specific SDNode that implements the 32-bit
+// form of the given Opcode.
+static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
+ switch (Opcode) {
default:
- return false;
+ llvm_unreachable("Unexpected opcode");
case ISD::SHL:
+ return RISCVISD::SLLW;
case ISD::SRA:
+ return RISCVISD::SRAW;
case ISD::SRL:
- return Val.getOperand(1).getOpcode() != ISD::Constant;
+ return RISCVISD::SRLW;
+ case ISD::SDIV:
+ return RISCVISD::DIVW;
+ case ISD::UDIV:
+ return RISCVISD::DIVUW;
+ case ISD::UREM:
+ return RISCVISD::REMUW;
}
}
-// Returns true if the given node is an sdiv, udiv, or urem with non-constant
-// operands.
-static bool isVariableSDivUDivURem(SDValue Val) {
- switch (Val.getOpcode()) {
+// Converts the given 32-bit operation to a target-specific SelectionDAG node.
+// Because i32 isn't a legal type for RV64, these operations would otherwise
+// be promoted to i64, making it difficult to select the SLLW/DIVUW/.../*W
+// later one because the fact the operation was originally of type i32 is
+// lost.
+static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG) {
+ SDLoc DL(N);
+ RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
+ SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
+ SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
+ SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
+ // ReplaceNodeResults requires we maintain the same type for the return value.
+ return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
+}
+
+void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const {
+ SDLoc DL(N);
+ switch (N->getOpcode()) {
default:
- return false;
+ llvm_unreachable("Don't know how to custom type legalize this operation!");
+ case ISD::READCYCLECOUNTER: {
+ assert(!Subtarget.is64Bit() &&
+ "READCYCLECOUNTER only has custom type legalization on riscv32");
+
+ SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
+ SDValue RCW =
+ DAG.getNode(RISCVISD::READ_CYCLE_WIDE, DL, VTs, N->getOperand(0));
+
+ Results.push_back(RCW);
+ Results.push_back(RCW.getValue(1));
+ Results.push_back(RCW.getValue(2));
+ break;
+ }
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
+ "Unexpected custom legalisation");
+ if (N->getOperand(1).getOpcode() == ISD::Constant)
+ return;
+ Results.push_back(customLegalizeToWOp(N, DAG));
+ break;
case ISD::SDIV:
case ISD::UDIV:
case ISD::UREM:
- return Val.getOperand(0).getOpcode() != ISD::Constant &&
- Val.getOperand(1).getOpcode() != ISD::Constant;
+ assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
+ Subtarget.hasStdExtM() && "Unexpected custom legalisation");
+ if (N->getOperand(0).getOpcode() == ISD::Constant ||
+ N->getOperand(1).getOpcode() == ISD::Constant)
+ return;
+ Results.push_back(customLegalizeToWOp(N, DAG));
+ break;
+ case ISD::BITCAST: {
+ assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
+ Subtarget.hasStdExtF() && "Unexpected custom legalisation");
+ SDLoc DL(N);
+ SDValue Op0 = N->getOperand(0);
+ if (Op0.getValueType() != MVT::f32)
+ return;
+ SDValue FPConv =
+ DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
+ break;
+ }
}
}
@@ -546,51 +895,225 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
switch (N->getOpcode()) {
default:
break;
- case ISD::SHL:
- case ISD::SRL:
- case ISD::SRA: {
- assert(Subtarget.getXLen() == 64 && "Combine should be 64-bit only");
- if (!DCI.isBeforeLegalize())
- break;
- SDValue RHS = N->getOperand(1);
- if (N->getValueType(0) != MVT::i32 || RHS->getOpcode() == ISD::Constant ||
- (RHS->getOpcode() == ISD::AssertZext &&
- cast<VTSDNode>(RHS->getOperand(1))->getVT().getSizeInBits() <= 5))
- break;
- SDValue LHS = N->getOperand(0);
- SDLoc DL(N);
- SDValue NewRHS =
- DAG.getNode(ISD::AssertZext, DL, RHS.getValueType(), RHS,
- DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), 5)));
- return DCI.CombineTo(
- N, DAG.getNode(N->getOpcode(), DL, LHS.getValueType(), LHS, NewRHS));
- }
- case ISD::ANY_EXTEND: {
- // If any-extending an i32 variable-length shift or sdiv/udiv/urem to i64,
- // then instead sign-extend in order to increase the chance of being able
- // to select the sllw/srlw/sraw/divw/divuw/remuw instructions.
- SDValue Src = N->getOperand(0);
- if (N->getValueType(0) != MVT::i64 || Src.getValueType() != MVT::i32)
- break;
- if (!isVariableShift(Src) &&
- !(Subtarget.hasStdExtM() && isVariableSDivUDivURem(Src)))
- break;
- SDLoc DL(N);
- return DCI.CombineTo(N, DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Src));
- }
case RISCVISD::SplitF64: {
+ SDValue Op0 = N->getOperand(0);
// If the input to SplitF64 is just BuildPairF64 then the operation is
// redundant. Instead, use BuildPairF64's operands directly.
+ if (Op0->getOpcode() == RISCVISD::BuildPairF64)
+ return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
+
+ SDLoc DL(N);
+
+ // It's cheaper to materialise two 32-bit integers than to load a double
+ // from the constant pool and transfer it to integer registers through the
+ // stack.
+ if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) {
+ APInt V = C->getValueAPF().bitcastToAPInt();
+ SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
+ SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
+ return DCI.CombineTo(N, Lo, Hi);
+ }
+
+ // This is a target-specific version of a DAGCombine performed in
+ // DAGCombiner::visitBITCAST. It performs the equivalent of:
+ // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
+ // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
+ if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
+ !Op0.getNode()->hasOneUse())
+ break;
+ SDValue NewSplitF64 =
+ DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
+ Op0.getOperand(0));
+ SDValue Lo = NewSplitF64.getValue(0);
+ SDValue Hi = NewSplitF64.getValue(1);
+ APInt SignBit = APInt::getSignMask(32);
+ if (Op0.getOpcode() == ISD::FNEG) {
+ SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
+ DAG.getConstant(SignBit, DL, MVT::i32));
+ return DCI.CombineTo(N, Lo, NewHi);
+ }
+ assert(Op0.getOpcode() == ISD::FABS);
+ SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
+ DAG.getConstant(~SignBit, DL, MVT::i32));
+ return DCI.CombineTo(N, Lo, NewHi);
+ }
+ case RISCVISD::SLLW:
+ case RISCVISD::SRAW:
+ case RISCVISD::SRLW: {
+ // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ APInt LHSMask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32);
+ APInt RHSMask = APInt::getLowBitsSet(RHS.getValueSizeInBits(), 5);
+ if ((SimplifyDemandedBits(N->getOperand(0), LHSMask, DCI)) ||
+ (SimplifyDemandedBits(N->getOperand(1), RHSMask, DCI)))
+ return SDValue();
+ break;
+ }
+ case RISCVISD::FMV_X_ANYEXTW_RV64: {
+ SDLoc DL(N);
SDValue Op0 = N->getOperand(0);
- if (Op0->getOpcode() != RISCVISD::BuildPairF64)
+ // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
+ // conversion is unnecessary and can be replaced with an ANY_EXTEND
+ // of the FMV_W_X_RV64 operand.
+ if (Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) {
+ SDValue AExtOp =
+ DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0.getOperand(0));
+ return DCI.CombineTo(N, AExtOp);
+ }
+
+ // This is a target-specific version of a DAGCombine performed in
+ // DAGCombiner::visitBITCAST. It performs the equivalent of:
+ // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
+ // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
+ if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
+ !Op0.getNode()->hasOneUse())
break;
- return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
+ SDValue NewFMV = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64,
+ Op0.getOperand(0));
+ APInt SignBit = APInt::getSignMask(32).sext(64);
+ if (Op0.getOpcode() == ISD::FNEG) {
+ return DCI.CombineTo(N,
+ DAG.getNode(ISD::XOR, DL, MVT::i64, NewFMV,
+ DAG.getConstant(SignBit, DL, MVT::i64)));
+ }
+ assert(Op0.getOpcode() == ISD::FABS);
+ return DCI.CombineTo(N,
+ DAG.getNode(ISD::AND, DL, MVT::i64, NewFMV,
+ DAG.getConstant(~SignBit, DL, MVT::i64)));
}
}
return SDValue();
}
+bool RISCVTargetLowering::isDesirableToCommuteWithShift(
+ const SDNode *N, CombineLevel Level) const {
+ // The following folds are only desirable if `(OP _, c1 << c2)` can be
+ // materialised in fewer instructions than `(OP _, c1)`:
+ //
+ // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
+ // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
+ SDValue N0 = N->getOperand(0);
+ EVT Ty = N0.getValueType();
+ if (Ty.isScalarInteger() &&
+ (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
+ auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
+ auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (C1 && C2) {
+ APInt C1Int = C1->getAPIntValue();
+ APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
+
+ // We can materialise `c1 << c2` into an add immediate, so it's "free",
+ // and the combine should happen, to potentially allow further combines
+ // later.
+ if (isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
+ return true;
+
+ // We can materialise `c1` in an add immediate, so it's "free", and the
+ // combine should be prevented.
+ if (isLegalAddImmediate(C1Int.getSExtValue()))
+ return false;
+
+ // Neither constant will fit into an immediate, so find materialisation
+ // costs.
+ int C1Cost = RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(),
+ Subtarget.is64Bit());
+ int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
+ ShiftedC1Int, Ty.getSizeInBits(), Subtarget.is64Bit());
+
+ // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
+ // combine should be prevented.
+ if (C1Cost < ShiftedC1Cost)
+ return false;
+ }
+ }
+ return true;
+}
+
+unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
+ SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
+ unsigned Depth) const {
+ switch (Op.getOpcode()) {
+ default:
+ break;
+ case RISCVISD::SLLW:
+ case RISCVISD::SRAW:
+ case RISCVISD::SRLW:
+ case RISCVISD::DIVW:
+ case RISCVISD::DIVUW:
+ case RISCVISD::REMUW:
+ // TODO: As the result is sign-extended, this is conservatively correct. A
+ // more precise answer could be calculated for SRAW depending on known
+ // bits in the shift amount.
+ return 33;
+ }
+
+ return 1;
+}
+
+MachineBasicBlock *emitReadCycleWidePseudo(MachineInstr &MI,
+ MachineBasicBlock *BB) {
+ assert(MI.getOpcode() == RISCV::ReadCycleWide && "Unexpected instruction");
+
+ // To read the 64-bit cycle CSR on a 32-bit target, we read the two halves.
+ // Should the count have wrapped while it was being read, we need to try
+ // again.
+ // ...
+ // read:
+ // rdcycleh x3 # load high word of cycle
+ // rdcycle x2 # load low word of cycle
+ // rdcycleh x4 # load high word of cycle
+ // bne x3, x4, read # check if high word reads match, otherwise try again
+ // ...
+
+ MachineFunction &MF = *BB->getParent();
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = ++BB->getIterator();
+
+ MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB);
+ MF.insert(It, LoopMBB);
+
+ MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVM_BB);
+ MF.insert(It, DoneMBB);
+
+ // Transfer the remainder of BB and its successor edges to DoneMBB.
+ DoneMBB->splice(DoneMBB->begin(), BB,
+ std::next(MachineBasicBlock::iterator(MI)), BB->end());
+ DoneMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ BB->addSuccessor(LoopMBB);
+
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ unsigned ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
+ unsigned LoReg = MI.getOperand(0).getReg();
+ unsigned HiReg = MI.getOperand(1).getReg();
+ DebugLoc DL = MI.getDebugLoc();
+
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+ BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
+ .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding)
+ .addReg(RISCV::X0);
+ BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
+ .addImm(RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding)
+ .addReg(RISCV::X0);
+ BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
+ .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding)
+ .addReg(RISCV::X0);
+
+ BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
+ .addReg(HiReg)
+ .addReg(ReadAgainReg)
+ .addMBB(LoopMBB);
+
+ LoopMBB->addSuccessor(LoopMBB);
+ LoopMBB->addSuccessor(DoneMBB);
+
+ MI.eraseFromParent();
+
+ return DoneMBB;
+}
+
static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI,
MachineBasicBlock *BB) {
assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
@@ -655,24 +1178,21 @@ static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI,
return BB;
}
-MachineBasicBlock *
-RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
- MachineBasicBlock *BB) const {
+static bool isSelectPseudo(MachineInstr &MI) {
switch (MI.getOpcode()) {
default:
- llvm_unreachable("Unexpected instr type to insert");
+ return false;
case RISCV::Select_GPR_Using_CC_GPR:
case RISCV::Select_FPR32_Using_CC_GPR:
case RISCV::Select_FPR64_Using_CC_GPR:
- break;
- case RISCV::BuildPairF64Pseudo:
- return emitBuildPairF64Pseudo(MI, BB);
- case RISCV::SplitF64Pseudo:
- return emitSplitF64Pseudo(MI, BB);
+ return true;
}
+}
- // To "insert" a SELECT instruction, we actually have to insert the triangle
- // control-flow pattern. The incoming instruction knows the destination vreg
+static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
+ MachineBasicBlock *BB) {
+ // To "insert" Select_* instructions, we actually have to insert the triangle
+ // control-flow pattern. The incoming instructions know the destination vreg
// to set, the condition code register to branch on, the true/false values to
// select between, and the condcode to use to select the appropriate branch.
//
@@ -682,6 +1202,54 @@ RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
// | IfFalseMBB
// | /
// TailMBB
+ //
+ // When we find a sequence of selects we attempt to optimize their emission
+ // by sharing the control flow. Currently we only handle cases where we have
+ // multiple selects with the exact same condition (same LHS, RHS and CC).
+ // The selects may be interleaved with other instructions if the other
+ // instructions meet some requirements we deem safe:
+ // - They are debug instructions. Otherwise,
+ // - They do not have side-effects, do not access memory and their inputs do
+ // not depend on the results of the select pseudo-instructions.
+ // The TrueV/FalseV operands of the selects cannot depend on the result of
+ // previous selects in the sequence.
+ // These conditions could be further relaxed. See the X86 target for a
+ // related approach and more information.
+ unsigned LHS = MI.getOperand(1).getReg();
+ unsigned RHS = MI.getOperand(2).getReg();
+ auto CC = static_cast<ISD::CondCode>(MI.getOperand(3).getImm());
+
+ SmallVector<MachineInstr *, 4> SelectDebugValues;
+ SmallSet<unsigned, 4> SelectDests;
+ SelectDests.insert(MI.getOperand(0).getReg());
+
+ MachineInstr *LastSelectPseudo = &MI;
+
+ for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
+ SequenceMBBI != E; ++SequenceMBBI) {
+ if (SequenceMBBI->isDebugInstr())
+ continue;
+ else if (isSelectPseudo(*SequenceMBBI)) {
+ if (SequenceMBBI->getOperand(1).getReg() != LHS ||
+ SequenceMBBI->getOperand(2).getReg() != RHS ||
+ SequenceMBBI->getOperand(3).getImm() != CC ||
+ SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
+ SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
+ break;
+ LastSelectPseudo = &*SequenceMBBI;
+ SequenceMBBI->collectDebugValues(SelectDebugValues);
+ SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
+ } else {
+ if (SequenceMBBI->hasUnmodeledSideEffects() ||
+ SequenceMBBI->mayLoadOrStore())
+ break;
+ if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
+ return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
+ }))
+ break;
+ }
+ }
+
const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
const BasicBlock *LLVM_BB = BB->getBasicBlock();
DebugLoc DL = MI.getDebugLoc();
@@ -694,20 +1262,23 @@ RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
F->insert(I, IfFalseMBB);
F->insert(I, TailMBB);
- // Move all remaining instructions to TailMBB.
- TailMBB->splice(TailMBB->begin(), HeadMBB,
- std::next(MachineBasicBlock::iterator(MI)), HeadMBB->end());
+
+ // Transfer debug instructions associated with the selects to TailMBB.
+ for (MachineInstr *DebugInstr : SelectDebugValues) {
+ TailMBB->push_back(DebugInstr->removeFromParent());
+ }
+
+ // Move all instructions after the sequence to TailMBB.
+ TailMBB->splice(TailMBB->end(), HeadMBB,
+ std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
// Update machine-CFG edges by transferring all successors of the current
- // block to the new block which will contain the Phi node for the select.
+ // block to the new block which will contain the Phi nodes for the selects.
TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
// Set the successors for HeadMBB.
HeadMBB->addSuccessor(IfFalseMBB);
HeadMBB->addSuccessor(TailMBB);
// Insert appropriate branch.
- unsigned LHS = MI.getOperand(1).getReg();
- unsigned RHS = MI.getOperand(2).getReg();
- auto CC = static_cast<ISD::CondCode>(MI.getOperand(3).getImm());
unsigned Opcode = getBranchOpcodeForIntCondCode(CC);
BuildMI(HeadMBB, DL, TII.get(Opcode))
@@ -718,18 +1289,50 @@ RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
// IfFalseMBB just falls through to TailMBB.
IfFalseMBB->addSuccessor(TailMBB);
- // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
- BuildMI(*TailMBB, TailMBB->begin(), DL, TII.get(RISCV::PHI),
- MI.getOperand(0).getReg())
- .addReg(MI.getOperand(4).getReg())
- .addMBB(HeadMBB)
- .addReg(MI.getOperand(5).getReg())
- .addMBB(IfFalseMBB);
+ // Create PHIs for all of the select pseudo-instructions.
+ auto SelectMBBI = MI.getIterator();
+ auto SelectEnd = std::next(LastSelectPseudo->getIterator());
+ auto InsertionPoint = TailMBB->begin();
+ while (SelectMBBI != SelectEnd) {
+ auto Next = std::next(SelectMBBI);
+ if (isSelectPseudo(*SelectMBBI)) {
+ // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
+ BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
+ TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
+ .addReg(SelectMBBI->getOperand(4).getReg())
+ .addMBB(HeadMBB)
+ .addReg(SelectMBBI->getOperand(5).getReg())
+ .addMBB(IfFalseMBB);
+ SelectMBBI->eraseFromParent();
+ }
+ SelectMBBI = Next;
+ }
- MI.eraseFromParent(); // The pseudo instruction is gone now.
+ F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
return TailMBB;
}
+MachineBasicBlock *
+RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
+ MachineBasicBlock *BB) const {
+ switch (MI.getOpcode()) {
+ default:
+ llvm_unreachable("Unexpected instr type to insert");
+ case RISCV::ReadCycleWide:
+ assert(!Subtarget.is64Bit() &&
+ "ReadCycleWrite is only to be used on riscv32");
+ return emitReadCycleWidePseudo(MI, BB);
+ case RISCV::Select_GPR_Using_CC_GPR:
+ case RISCV::Select_FPR32_Using_CC_GPR:
+ case RISCV::Select_FPR64_Using_CC_GPR:
+ return emitSelectPseudo(MI, BB);
+ case RISCV::BuildPairF64Pseudo:
+ return emitBuildPairF64Pseudo(MI, BB);
+ case RISCV::SplitF64Pseudo:
+ return emitSplitF64Pseudo(MI, BB);
+ }
+}
+
// Calling Convention Implementation.
// The expectations for frontend ABI lowering vary from target to target.
// Ideally, an LLVM frontend would be able to avoid worrying about many ABI
@@ -759,6 +1362,14 @@ static const MCPhysReg ArgGPRs[] = {
RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13,
RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17
};
+static const MCPhysReg ArgFPR32s[] = {
+ RISCV::F10_32, RISCV::F11_32, RISCV::F12_32, RISCV::F13_32,
+ RISCV::F14_32, RISCV::F15_32, RISCV::F16_32, RISCV::F17_32
+};
+static const MCPhysReg ArgFPR64s[] = {
+ RISCV::F10_64, RISCV::F11_64, RISCV::F12_64, RISCV::F13_64,
+ RISCV::F14_64, RISCV::F15_64, RISCV::F16_64, RISCV::F17_64
+};
// Pass a 2*XLEN argument that has been split into two XLEN values through
// registers or the stack as necessary.
@@ -799,22 +1410,59 @@ static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
}
// Implements the RISC-V calling convention. Returns true upon failure.
-static bool CC_RISCV(const DataLayout &DL, unsigned ValNo, MVT ValVT, MVT LocVT,
- CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
- CCState &State, bool IsFixed, bool IsRet, Type *OrigTy) {
+static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
+ MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
+ bool IsRet, Type *OrigTy) {
unsigned XLen = DL.getLargestLegalIntTypeSizeInBits();
assert(XLen == 32 || XLen == 64);
MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64;
- if (ValVT == MVT::f32) {
- LocVT = MVT::i32;
- LocInfo = CCValAssign::BCvt;
- }
// Any return value split in to more than two values can't be returned
// directly.
if (IsRet && ValNo > 1)
return true;
+ // UseGPRForF32 if targeting one of the soft-float ABIs, if passing a
+ // variadic argument, or if no F32 argument registers are available.
+ bool UseGPRForF32 = true;
+ // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a
+ // variadic argument, or if no F64 argument registers are available.
+ bool UseGPRForF64 = true;
+
+ switch (ABI) {
+ default:
+ llvm_unreachable("Unexpected ABI");
+ case RISCVABI::ABI_ILP32:
+ case RISCVABI::ABI_LP64:
+ break;
+ case RISCVABI::ABI_ILP32F:
+ case RISCVABI::ABI_LP64F:
+ UseGPRForF32 = !IsFixed;
+ break;
+ case RISCVABI::ABI_ILP32D:
+ case RISCVABI::ABI_LP64D:
+ UseGPRForF32 = !IsFixed;
+ UseGPRForF64 = !IsFixed;
+ break;
+ }
+
+ if (State.getFirstUnallocated(ArgFPR32s) == array_lengthof(ArgFPR32s))
+ UseGPRForF32 = true;
+ if (State.getFirstUnallocated(ArgFPR64s) == array_lengthof(ArgFPR64s))
+ UseGPRForF64 = true;
+
+ // From this point on, rely on UseGPRForF32, UseGPRForF64 and similar local
+ // variables rather than directly checking against the target ABI.
+
+ if (UseGPRForF32 && ValVT == MVT::f32) {
+ LocVT = XLenVT;
+ LocInfo = CCValAssign::BCvt;
+ } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) {
+ LocVT = MVT::i64;
+ LocInfo = CCValAssign::BCvt;
+ }
+
// If this is a variadic argument, the RISC-V calling convention requires
// that it is assigned an 'even' or 'aligned' register if it has 8-byte
// alignment (RV32) or 16-byte alignment (RV64). An aligned register should
@@ -838,8 +1486,9 @@ static bool CC_RISCV(const DataLayout &DL, unsigned ValNo, MVT ValVT, MVT LocVT,
assert(PendingLocs.size() == PendingArgFlags.size() &&
"PendingLocs and PendingArgFlags out of sync");
- // Handle passing f64 on RV32D with a soft float ABI.
- if (XLen == 32 && ValVT == MVT::f64) {
+ // Handle passing f64 on RV32D with a soft float ABI or when floating point
+ // registers are exhausted.
+ if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) {
assert(!ArgFlags.isSplit() && PendingLocs.empty() &&
"Can't lower f64 if it is split");
// Depending on available argument GPRS, f64 may be passed in a pair of
@@ -888,7 +1537,13 @@ static bool CC_RISCV(const DataLayout &DL, unsigned ValNo, MVT ValVT, MVT LocVT,
}
// Allocate to a register if possible, or else a stack slot.
- unsigned Reg = State.AllocateReg(ArgGPRs);
+ unsigned Reg;
+ if (ValVT == MVT::f32 && !UseGPRForF32)
+ Reg = State.AllocateReg(ArgFPR32s, ArgFPR64s);
+ else if (ValVT == MVT::f64 && !UseGPRForF64)
+ Reg = State.AllocateReg(ArgFPR64s, ArgFPR32s);
+ else
+ Reg = State.AllocateReg(ArgGPRs);
unsigned StackOffset = Reg ? 0 : State.AllocateStack(XLen / 8, XLen / 8);
// If we reach this point and PendingLocs is non-empty, we must be at the
@@ -909,15 +1564,17 @@ static bool CC_RISCV(const DataLayout &DL, unsigned ValNo, MVT ValVT, MVT LocVT,
return false;
}
- assert(LocVT == XLenVT && "Expected an XLenVT at this stage");
+ assert((!UseGPRForF32 || !UseGPRForF64 || LocVT == XLenVT) &&
+ "Expected an XLenVT at this stage");
if (Reg) {
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
return false;
}
- if (ValVT == MVT::f32) {
- LocVT = MVT::f32;
+ // When an f32 or f64 is passed on the stack, no bit-conversion is needed.
+ if (ValVT == MVT::f32 || ValVT == MVT::f64) {
+ LocVT = ValVT;
LocInfo = CCValAssign::Full;
}
State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
@@ -940,7 +1597,8 @@ void RISCVTargetLowering::analyzeInputArgs(
else if (Ins[i].isOrigArg())
ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
- if (CC_RISCV(MF.getDataLayout(), i, ArgVT, ArgVT, CCValAssign::Full,
+ RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
+ if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
ArgFlags, CCInfo, /*IsRet=*/true, IsRet, ArgTy)) {
LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
<< EVT(ArgVT).getEVTString() << '\n');
@@ -960,7 +1618,8 @@ void RISCVTargetLowering::analyzeOutputArgs(
ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
- if (CC_RISCV(MF.getDataLayout(), i, ArgVT, ArgVT, CCValAssign::Full,
+ RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
+ if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) {
LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
<< EVT(ArgVT).getEVTString() << "\n");
@@ -979,6 +1638,10 @@ static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
case CCValAssign::Full:
break;
case CCValAssign::BCvt:
+ if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) {
+ Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
+ break;
+ }
Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
break;
}
@@ -993,8 +1656,24 @@ static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
MachineRegisterInfo &RegInfo = MF.getRegInfo();
EVT LocVT = VA.getLocVT();
SDValue Val;
+ const TargetRegisterClass *RC;
+
+ switch (LocVT.getSimpleVT().SimpleTy) {
+ default:
+ llvm_unreachable("Unexpected register type");
+ case MVT::i32:
+ case MVT::i64:
+ RC = &RISCV::GPRRegClass;
+ break;
+ case MVT::f32:
+ RC = &RISCV::FPR32RegClass;
+ break;
+ case MVT::f64:
+ RC = &RISCV::FPR64RegClass;
+ break;
+ }
- unsigned VReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
+ unsigned VReg = RegInfo.createVirtualRegister(RC);
RegInfo.addLiveIn(VA.getLocReg(), VReg);
Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
@@ -1014,6 +1693,10 @@ static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
case CCValAssign::Full:
break;
case CCValAssign::BCvt:
+ if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) {
+ Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
+ break;
+ }
Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
break;
}
@@ -1040,6 +1723,7 @@ static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
llvm_unreachable("Unexpected CCValAssign::LocInfo");
case CCValAssign::Full:
case CCValAssign::Indirect:
+ case CCValAssign::BCvt:
ExtType = ISD::NON_EXTLOAD;
break;
}
@@ -1227,12 +1911,12 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
return Chain;
}
-/// IsEligibleForTailCallOptimization - Check whether the call is eligible
+/// isEligibleForTailCallOptimization - Check whether the call is eligible
/// for tail call optimization.
/// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
-bool RISCVTargetLowering::IsEligibleForTailCallOptimization(
- CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
- const SmallVector<CCValAssign, 16> &ArgLocs) const {
+bool RISCVTargetLowering::isEligibleForTailCallOptimization(
+ CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
+ const SmallVector<CCValAssign, 16> &ArgLocs) const {
auto &Callee = CLI.Callee;
auto CalleeCC = CLI.CallConv;
@@ -1335,8 +2019,7 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
// Check if it's really possible to do a tail call.
if (IsTailCall)
- IsTailCall = IsEligibleForTailCallOptimization(ArgCCInfo, CLI, MF,
- ArgLocs);
+ IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
if (IsTailCall)
++NumTailCalls;
@@ -1482,9 +2165,21 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
// TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
// split it and then direct call can be matched by PseudoCALL.
if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
- Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, 0);
+ const GlobalValue *GV = S->getGlobal();
+
+ unsigned OpFlags = RISCVII::MO_CALL;
+ if (!getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV))
+ OpFlags = RISCVII::MO_PLT;
+
+ Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags);
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
- Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, 0);
+ unsigned OpFlags = RISCVII::MO_CALL;
+
+ if (!getTargetMachine().shouldAssumeDSOLocal(*MF.getFunction().getParent(),
+ nullptr))
+ OpFlags = RISCVII::MO_PLT;
+
+ Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
}
// The first call operand is the chain and the second is the target address.
@@ -1567,8 +2262,9 @@ bool RISCVTargetLowering::CanLowerReturn(
for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
MVT VT = Outs[i].VT;
ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
- if (CC_RISCV(MF.getDataLayout(), i, VT, VT, CCValAssign::Full, ArgFlags,
- CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr))
+ RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
+ if (CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full,
+ ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr))
return false;
}
return true;
@@ -1679,6 +2375,24 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
return "RISCVISD::SplitF64";
case RISCVISD::TAIL:
return "RISCVISD::TAIL";
+ case RISCVISD::SLLW:
+ return "RISCVISD::SLLW";
+ case RISCVISD::SRAW:
+ return "RISCVISD::SRAW";
+ case RISCVISD::SRLW:
+ return "RISCVISD::SRLW";
+ case RISCVISD::DIVW:
+ return "RISCVISD::DIVW";
+ case RISCVISD::DIVUW:
+ return "RISCVISD::DIVUW";
+ case RISCVISD::REMUW:
+ return "RISCVISD::REMUW";
+ case RISCVISD::FMV_W_X_RV64:
+ return "RISCVISD::FMV_W_X_RV64";
+ case RISCVISD::FMV_X_ANYEXTW_RV64:
+ return "RISCVISD::FMV_X_ANYEXTW_RV64";
+ case RISCVISD::READ_CYCLE_WIDE:
+ return "RISCVISD::READ_CYCLE_WIDE";
}
return nullptr;
}
@@ -1701,6 +2415,44 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
}
+void RISCVTargetLowering::LowerAsmOperandForConstraint(
+ SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const {
+ // Currently only support length 1 constraints.
+ if (Constraint.length() == 1) {
+ switch (Constraint[0]) {
+ case 'I':
+ // Validate & create a 12-bit signed immediate operand.
+ if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
+ uint64_t CVal = C->getSExtValue();
+ if (isInt<12>(CVal))
+ Ops.push_back(
+ DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
+ }
+ return;
+ case 'J':
+ // Validate & create an integer zero operand.
+ if (auto *C = dyn_cast<ConstantSDNode>(Op))
+ if (C->getZExtValue() == 0)
+ Ops.push_back(
+ DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));
+ return;
+ case 'K':
+ // Validate & create a 5-bit unsigned immediate operand.
+ if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
+ uint64_t CVal = C->getZExtValue();
+ if (isUInt<5>(CVal))
+ Ops.push_back(
+ DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
+ }
+ return;
+ default:
+ break;
+ }
+ }
+ TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
+}
+
Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
Instruction *Inst,
AtomicOrdering Ord) const {
@@ -1721,6 +2473,12 @@ Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
TargetLowering::AtomicExpansionKind
RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
+ // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
+ // point operations can't be used in an lr/sc sequence without breaking the
+ // forward-progress guarantee.
+ if (AI->isFloatingPointOperation())
+ return AtomicExpansionKind::CmpXChg;
+
unsigned Size = AI->getType()->getPrimitiveSizeInBits();
if (Size == 8 || Size == 16)
return AtomicExpansionKind::MaskedIntrinsic;
@@ -1728,37 +2486,74 @@ RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
}
static Intrinsic::ID
-getIntrinsicForMaskedAtomicRMWBinOp32(AtomicRMWInst::BinOp BinOp) {
- switch (BinOp) {
- default:
- llvm_unreachable("Unexpected AtomicRMW BinOp");
- case AtomicRMWInst::Xchg:
- return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
- case AtomicRMWInst::Add:
- return Intrinsic::riscv_masked_atomicrmw_add_i32;
- case AtomicRMWInst::Sub:
- return Intrinsic::riscv_masked_atomicrmw_sub_i32;
- case AtomicRMWInst::Nand:
- return Intrinsic::riscv_masked_atomicrmw_nand_i32;
- case AtomicRMWInst::Max:
- return Intrinsic::riscv_masked_atomicrmw_max_i32;
- case AtomicRMWInst::Min:
- return Intrinsic::riscv_masked_atomicrmw_min_i32;
- case AtomicRMWInst::UMax:
- return Intrinsic::riscv_masked_atomicrmw_umax_i32;
- case AtomicRMWInst::UMin:
- return Intrinsic::riscv_masked_atomicrmw_umin_i32;
+getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) {
+ if (XLen == 32) {
+ switch (BinOp) {
+ default:
+ llvm_unreachable("Unexpected AtomicRMW BinOp");
+ case AtomicRMWInst::Xchg:
+ return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
+ case AtomicRMWInst::Add:
+ return Intrinsic::riscv_masked_atomicrmw_add_i32;
+ case AtomicRMWInst::Sub:
+ return Intrinsic::riscv_masked_atomicrmw_sub_i32;
+ case AtomicRMWInst::Nand:
+ return Intrinsic::riscv_masked_atomicrmw_nand_i32;
+ case AtomicRMWInst::Max:
+ return Intrinsic::riscv_masked_atomicrmw_max_i32;
+ case AtomicRMWInst::Min:
+ return Intrinsic::riscv_masked_atomicrmw_min_i32;
+ case AtomicRMWInst::UMax:
+ return Intrinsic::riscv_masked_atomicrmw_umax_i32;
+ case AtomicRMWInst::UMin:
+ return Intrinsic::riscv_masked_atomicrmw_umin_i32;
+ }
+ }
+
+ if (XLen == 64) {
+ switch (BinOp) {
+ default:
+ llvm_unreachable("Unexpected AtomicRMW BinOp");
+ case AtomicRMWInst::Xchg:
+ return Intrinsic::riscv_masked_atomicrmw_xchg_i64;
+ case AtomicRMWInst::Add:
+ return Intrinsic::riscv_masked_atomicrmw_add_i64;
+ case AtomicRMWInst::Sub:
+ return Intrinsic::riscv_masked_atomicrmw_sub_i64;
+ case AtomicRMWInst::Nand:
+ return Intrinsic::riscv_masked_atomicrmw_nand_i64;
+ case AtomicRMWInst::Max:
+ return Intrinsic::riscv_masked_atomicrmw_max_i64;
+ case AtomicRMWInst::Min:
+ return Intrinsic::riscv_masked_atomicrmw_min_i64;
+ case AtomicRMWInst::UMax:
+ return Intrinsic::riscv_masked_atomicrmw_umax_i64;
+ case AtomicRMWInst::UMin:
+ return Intrinsic::riscv_masked_atomicrmw_umin_i64;
+ }
}
+
+ llvm_unreachable("Unexpected XLen\n");
}
Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(
IRBuilder<> &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
- Value *Ordering = Builder.getInt32(static_cast<uint32_t>(AI->getOrdering()));
+ unsigned XLen = Subtarget.getXLen();
+ Value *Ordering =
+ Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
Type *Tys[] = {AlignedAddr->getType()};
Function *LrwOpScwLoop = Intrinsic::getDeclaration(
AI->getModule(),
- getIntrinsicForMaskedAtomicRMWBinOp32(AI->getOperation()), Tys);
+ getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys);
+
+ if (XLen == 64) {
+ Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
+ Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
+ ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
+ }
+
+ Value *Result;
// Must pass the shift amount needed to sign extend the loaded value prior
// to performing a signed comparison for min/max. ShiftAmt is the number of
@@ -1770,13 +2565,18 @@ Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(
const DataLayout &DL = AI->getModule()->getDataLayout();
unsigned ValWidth =
DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
- Value *SextShamt = Builder.CreateSub(
- Builder.getInt32(Subtarget.getXLen() - ValWidth), ShiftAmt);
- return Builder.CreateCall(LrwOpScwLoop,
- {AlignedAddr, Incr, Mask, SextShamt, Ordering});
+ Value *SextShamt =
+ Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
+ Result = Builder.CreateCall(LrwOpScwLoop,
+ {AlignedAddr, Incr, Mask, SextShamt, Ordering});
+ } else {
+ Result =
+ Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
}
- return Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
+ if (XLen == 64)
+ Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
+ return Result;
}
TargetLowering::AtomicExpansionKind
@@ -1791,10 +2591,31 @@ RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR(
Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
- Value *Ordering = Builder.getInt32(static_cast<uint32_t>(Ord));
+ unsigned XLen = Subtarget.getXLen();
+ Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
+ Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;
+ if (XLen == 64) {
+ CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
+ NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
+ Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
+ CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;
+ }
Type *Tys[] = {AlignedAddr->getType()};
- Function *MaskedCmpXchg = Intrinsic::getDeclaration(
- CI->getModule(), Intrinsic::riscv_masked_cmpxchg_i32, Tys);
- return Builder.CreateCall(MaskedCmpXchg,
- {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
+ Function *MaskedCmpXchg =
+ Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
+ Value *Result = Builder.CreateCall(
+ MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
+ if (XLen == 64)
+ Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
+ return Result;
+}
+
+unsigned RISCVTargetLowering::getExceptionPointerRegister(
+ const Constant *PersonalityFn) const {
+ return RISCV::X10;
+}
+
+unsigned RISCVTargetLowering::getExceptionSelectorRegister(
+ const Constant *PersonalityFn) const {
+ return RISCV::X11;
}
diff --git a/lib/Target/RISCV/RISCVISelLowering.h b/lib/Target/RISCV/RISCVISelLowering.h
index 6970900bb062..17db03bbb69e 100644
--- a/lib/Target/RISCV/RISCVISelLowering.h
+++ b/lib/Target/RISCV/RISCVISelLowering.h
@@ -1,9 +1,8 @@
//===-- RISCVISelLowering.h - RISCV DAG Lowering Interface ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -32,7 +31,27 @@ enum NodeType : unsigned {
SELECT_CC,
BuildPairF64,
SplitF64,
- TAIL
+ TAIL,
+ // RV64I shifts, directly matching the semantics of the named RISC-V
+ // instructions.
+ SLLW,
+ SRAW,
+ SRLW,
+ // 32-bit operations from RV64M that can't be simply matched with a pattern
+ // at instruction selection time.
+ DIVW,
+ DIVUW,
+ REMUW,
+ // FPR32<->GPR transfer operations for RV64. Needed as an i32<->f32 bitcast
+ // is not legal on RV64. FMV_W_X_RV64 matches the semantics of the FMV.W.X.
+ // FMV_X_ANYEXTW_RV64 is similar to FMV.X.W but has an any-extended result.
+ // This is a more convenient semantic for producing dagcombines that remove
+ // unnecessary GPR->FPR->GPR moves.
+ FMV_W_X_RV64,
+ FMV_X_ANYEXTW_RV64,
+ // READ_CYCLE_WIDE - A read of the 64-bit cycle CSR on a 32-bit target
+ // (returns (Lo, Hi)). It takes a chain operand.
+ READ_CYCLE_WIDE
};
}
@@ -56,11 +75,20 @@ public:
bool isZExtFree(SDValue Val, EVT VT2) const override;
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override;
+ bool hasBitPreservingFPLogic(EVT VT) const override;
+
// Provide custom lowering hooks for some operations.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
+ void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const override;
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
+ unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
+ const APInt &DemandedElts,
+ const SelectionDAG &DAG,
+ unsigned Depth) const override;
+
// This method returns the name of a target specific DAG node.
const char *getTargetNodeName(unsigned Opcode) const override;
@@ -68,6 +96,10 @@ public:
getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
StringRef Constraint, MVT VT) const override;
+ void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
+ std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const override;
+
MachineBasicBlock *
EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *BB) const override;
@@ -75,6 +107,10 @@ public:
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
EVT VT) const override;
+ bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
+ return VT.isScalarInteger();
+ }
+
bool shouldInsertFencesForAtomic(const Instruction *I) const override {
return isa<LoadInst>(I) || isa<StoreInst>(I);
}
@@ -83,6 +119,28 @@ public:
Instruction *emitTrailingFence(IRBuilder<> &Builder, Instruction *Inst,
AtomicOrdering Ord) const override;
+ ISD::NodeType getExtendForAtomicOps() const override {
+ return ISD::SIGN_EXTEND;
+ }
+
+ bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override {
+ if (DAG.getMachineFunction().getFunction().hasMinSize())
+ return false;
+ return true;
+ }
+ bool isDesirableToCommuteWithShift(const SDNode *N,
+ CombineLevel Level) const override;
+
+ /// If a physical register, this returns the register that receives the
+ /// exception address on entry to an EH pad.
+ unsigned
+ getExceptionPointerRegister(const Constant *PersonalityFn) const override;
+
+ /// If a physical register, this returns the register that receives the
+ /// exception typeid on entry to a landing pad.
+ unsigned
+ getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
+
private:
void analyzeInputArgs(MachineFunction &MF, CCState &CCInfo,
const SmallVectorImpl<ISD::InputArg> &Ins,
@@ -110,17 +168,29 @@ private:
Type *Ty) const override {
return true;
}
+
+ template <class NodeTy>
+ SDValue getAddr(NodeTy *N, SelectionDAG &DAG, bool IsLocal = true) const;
+
+ SDValue getStaticTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG,
+ bool UseGOT) const;
+ SDValue getDynamicTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG) const;
+
+ bool shouldConsiderGEPOffsetSplit() const override { return true; }
SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerSELECT(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, bool IsSRA) const;
- bool IsEligibleForTailCallOptimization(CCState &CCInfo,
- CallLoweringInfo &CLI, MachineFunction &MF,
- const SmallVector<CCValAssign, 16> &ArgLocs) const;
+ bool isEligibleForTailCallOptimization(
+ CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
+ const SmallVector<CCValAssign, 16> &ArgLocs) const;
TargetLowering::AtomicExpansionKind
shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
diff --git a/lib/Target/RISCV/RISCVInstrFormats.td b/lib/Target/RISCV/RISCVInstrFormats.td
index ebd676a6056e..7229ebfe1db0 100644
--- a/lib/Target/RISCV/RISCVInstrFormats.td
+++ b/lib/Target/RISCV/RISCVInstrFormats.td
@@ -1,9 +1,8 @@
//===-- RISCVInstrFormats.td - RISCV Instruction Formats ---*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -109,6 +108,35 @@ class Pseudo<dag outs, dag ins, list<dag> pattern, string opcodestr = "", string
let isCodeGenOnly = 1;
}
+// Pseudo load instructions.
+class PseudoLoad<string opcodestr, RegisterClass rdty = GPR>
+ : Pseudo<(outs rdty:$rd), (ins bare_symbol:$addr), [], opcodestr, "$rd, $addr"> {
+ let hasSideEffects = 0;
+ let mayLoad = 1;
+ let mayStore = 0;
+ let isCodeGenOnly = 0;
+ let isAsmParserOnly = 1;
+}
+
+class PseudoFloatLoad<string opcodestr, RegisterClass rdty = GPR>
+ : Pseudo<(outs rdty:$rd, GPR:$tmp), (ins bare_symbol:$addr), [], opcodestr, "$rd, $addr, $tmp"> {
+ let hasSideEffects = 0;
+ let mayLoad = 1;
+ let mayStore = 0;
+ let isCodeGenOnly = 0;
+ let isAsmParserOnly = 1;
+}
+
+// Pseudo store instructions.
+class PseudoStore<string opcodestr, RegisterClass rsty = GPR>
+ : Pseudo<(outs rsty:$rs, GPR:$tmp), (ins bare_symbol:$addr), [], opcodestr, "$rs, $addr, $tmp"> {
+ let hasSideEffects = 0;
+ let mayLoad = 0;
+ let mayStore = 1;
+ let isCodeGenOnly = 0;
+ let isAsmParserOnly = 1;
+}
+
// Instruction formats are listed in the order they appear in the RISC-V
// instruction set manual (R, I, S, B, U, J) with sub-formats (e.g. RVInstR4,
// RVInstRAtomic) sorted alphabetically.
diff --git a/lib/Target/RISCV/RISCVInstrFormatsC.td b/lib/Target/RISCV/RISCVInstrFormatsC.td
index bda8bbb558eb..690bec5181e2 100644
--- a/lib/Target/RISCV/RISCVInstrFormatsC.td
+++ b/lib/Target/RISCV/RISCVInstrFormatsC.td
@@ -1,9 +1,8 @@
//===-- RISCVInstrFormatsC.td - RISCV C Instruction Formats --*- tablegen -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/RISCV/RISCVInstrInfo.cpp b/lib/Target/RISCV/RISCVInstrInfo.cpp
index 76c74368ca11..99c8d2ef73de 100644
--- a/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -1,9 +1,8 @@
//===-- RISCVInstrInfo.cpp - RISCV Instruction Information ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -291,9 +290,9 @@ unsigned RISCVInstrInfo::removeBranch(MachineBasicBlock &MBB,
return 0;
// Remove the branch.
- I->eraseFromParent();
if (BytesRemoved)
*BytesRemoved += getInstSizeInBytes(*I);
+ I->eraseFromParent();
I = MBB.end();
@@ -304,9 +303,9 @@ unsigned RISCVInstrInfo::removeBranch(MachineBasicBlock &MBB,
return 1;
// Remove the branch.
- I->eraseFromParent();
if (BytesRemoved)
*BytesRemoved += getInstSizeInBytes(*I);
+ I->eraseFromParent();
return 2;
}
@@ -383,8 +382,8 @@ unsigned RISCVInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
.addMBB(&DestBB, RISCVII::MO_LO);
RS->enterBasicBlockEnd(MBB);
- unsigned Scav = RS->scavengeRegisterBackwards(
- RISCV::GPRRegClass, MachineBasicBlock::iterator(LuiMI), false, 0);
+ unsigned Scav = RS->scavengeRegisterBackwards(RISCV::GPRRegClass,
+ LuiMI.getIterator(), false, 0);
MRI.replaceRegWith(ScratchReg, Scav);
MRI.clearVirtRegs();
RS->setRegUsed(Scav);
@@ -437,10 +436,16 @@ unsigned RISCVInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
case TargetOpcode::KILL:
case TargetOpcode::DBG_VALUE:
return 0;
+ case RISCV::PseudoCALLReg:
case RISCV::PseudoCALL:
case RISCV::PseudoTAIL:
+ case RISCV::PseudoLLA:
+ case RISCV::PseudoLA:
+ case RISCV::PseudoLA_TLS_IE:
+ case RISCV::PseudoLA_TLS_GD:
return 8;
- case TargetOpcode::INLINEASM: {
+ case TargetOpcode::INLINEASM:
+ case TargetOpcode::INLINEASM_BR: {
const MachineFunction &MF = *MI.getParent()->getParent();
const auto &TM = static_cast<const RISCVTargetMachine &>(MF.getTarget());
return getInlineAsmLength(MI.getOperand(0).getSymbolName(),
@@ -448,3 +453,16 @@ unsigned RISCVInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
}
}
}
+
+bool RISCVInstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
+ const unsigned Opcode = MI.getOpcode();
+ switch(Opcode) {
+ default:
+ break;
+ case RISCV::ADDI:
+ case RISCV::ORI:
+ case RISCV::XORI:
+ return (MI.getOperand(1).isReg() && MI.getOperand(1).getReg() == RISCV::X0);
+ }
+ return MI.isAsCheapAsAMove();
+}
diff --git a/lib/Target/RISCV/RISCVInstrInfo.h b/lib/Target/RISCV/RISCVInstrInfo.h
index 1d3279c3d31e..ff098e660d19 100644
--- a/lib/Target/RISCV/RISCVInstrInfo.h
+++ b/lib/Target/RISCV/RISCVInstrInfo.h
@@ -1,9 +1,8 @@
//===-- RISCVInstrInfo.h - RISCV Instruction Information --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -79,6 +78,8 @@ public:
bool isBranchOffsetInRange(unsigned BranchOpc,
int64_t BrOffset) const override;
+
+ bool isAsCheapAsAMove(const MachineInstr &MI) const override;
};
}
#endif
diff --git a/lib/Target/RISCV/RISCVInstrInfo.td b/lib/Target/RISCV/RISCVInstrInfo.td
index d7cc13d4fabd..69bde15f1218 100644
--- a/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/lib/Target/RISCV/RISCVInstrInfo.td
@@ -1,9 +1,8 @@
//===-- RISCVInstrInfo.td - Target Description for RISCV ---*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -11,42 +10,48 @@
//
//===----------------------------------------------------------------------===//
-include "RISCVInstrFormats.td"
-
//===----------------------------------------------------------------------===//
// RISC-V specific DAG Nodes.
//===----------------------------------------------------------------------===//
-def SDT_RISCVCall : SDTypeProfile<0, -1, [SDTCisVT<0, XLenVT>]>;
-def SDT_RISCVCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>,
- SDTCisVT<1, i32>]>;
-def SDT_RISCVCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>,
- SDTCisVT<1, i32>]>;
-def SDT_RISCVSelectCC : SDTypeProfile<1, 5, [SDTCisSameAs<1, 2>,
- SDTCisSameAs<0, 4>,
- SDTCisSameAs<4, 5>]>;
-
-
-def Call : SDNode<"RISCVISD::CALL", SDT_RISCVCall,
- [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
- SDNPVariadic]>;
-def CallSeqStart : SDNode<"ISD::CALLSEQ_START", SDT_RISCVCallSeqStart,
- [SDNPHasChain, SDNPOutGlue]>;
-def CallSeqEnd : SDNode<"ISD::CALLSEQ_END", SDT_RISCVCallSeqEnd,
- [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
-def RetFlag : SDNode<"RISCVISD::RET_FLAG", SDTNone,
- [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
-def URetFlag : SDNode<"RISCVISD::URET_FLAG", SDTNone,
- [SDNPHasChain, SDNPOptInGlue]>;
-def SRetFlag : SDNode<"RISCVISD::SRET_FLAG", SDTNone,
- [SDNPHasChain, SDNPOptInGlue]>;
-def MRetFlag : SDNode<"RISCVISD::MRET_FLAG", SDTNone,
- [SDNPHasChain, SDNPOptInGlue]>;
-def SelectCC : SDNode<"RISCVISD::SELECT_CC", SDT_RISCVSelectCC,
- [SDNPInGlue]>;
-def Tail : SDNode<"RISCVISD::TAIL", SDT_RISCVCall,
- [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
- SDNPVariadic]>;
+// Target-independent type requirements, but with target-specific formats.
+def SDT_CallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>,
+ SDTCisVT<1, i32>]>;
+def SDT_CallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>,
+ SDTCisVT<1, i32>]>;
+
+// Target-dependent type requirements.
+def SDT_RISCVCall : SDTypeProfile<0, -1, [SDTCisVT<0, XLenVT>]>;
+def SDT_RISCVSelectCC : SDTypeProfile<1, 5, [SDTCisSameAs<1, 2>,
+ SDTCisSameAs<0, 4>,
+ SDTCisSameAs<4, 5>]>;
+
+// Target-independent nodes, but with target-specific formats.
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_CallSeqStart,
+ [SDNPHasChain, SDNPOutGlue]>;
+def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_CallSeqEnd,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+// Target-dependent nodes.
+def riscv_call : SDNode<"RISCVISD::CALL", SDT_RISCVCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+def riscv_ret_flag : SDNode<"RISCVISD::RET_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+def riscv_uret_flag : SDNode<"RISCVISD::URET_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue]>;
+def riscv_sret_flag : SDNode<"RISCVISD::SRET_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue]>;
+def riscv_mret_flag : SDNode<"RISCVISD::MRET_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue]>;
+def riscv_selectcc : SDNode<"RISCVISD::SELECT_CC", SDT_RISCVSelectCC,
+ [SDNPInGlue]>;
+def riscv_tail : SDNode<"RISCVISD::TAIL", SDT_RISCVCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+def riscv_sllw : SDNode<"RISCVISD::SLLW", SDTIntShiftOp>;
+def riscv_sraw : SDNode<"RISCVISD::SRAW", SDTIntShiftOp>;
+def riscv_srlw : SDNode<"RISCVISD::SRLW", SDTIntShiftOp>;
//===----------------------------------------------------------------------===//
// Operand and SDNode transformation definitions.
@@ -185,6 +190,30 @@ def bare_symbol : Operand<XLenVT> {
let ParserMatchClass = BareSymbol;
}
+def CallSymbol : AsmOperandClass {
+ let Name = "CallSymbol";
+ let RenderMethod = "addImmOperands";
+ let DiagnosticType = "InvalidCallSymbol";
+ let ParserMethod = "parseCallSymbol";
+}
+
+// A bare symbol used in call/tail only.
+def call_symbol : Operand<XLenVT> {
+ let ParserMatchClass = CallSymbol;
+}
+
+def TPRelAddSymbol : AsmOperandClass {
+ let Name = "TPRelAddSymbol";
+ let RenderMethod = "addImmOperands";
+ let DiagnosticType = "InvalidTPRelAddSymbol";
+ let ParserMethod = "parseOperandWithModifier";
+}
+
+// A bare symbol with the %tprel_add variant.
+def tprel_add_symbol : Operand<XLenVT> {
+ let ParserMatchClass = TPRelAddSymbol;
+}
+
def CSRSystemRegister : AsmOperandClass {
let Name = "CSRSystemRegister";
let ParserMethod = "parseCSRSystemRegister";
@@ -234,6 +263,12 @@ def HI20 : SDNodeXForm<imm, [{
}]>;
//===----------------------------------------------------------------------===//
+// Instruction Formats
+//===----------------------------------------------------------------------===//
+
+include "RISCVInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
// Instruction Class Templates
//===----------------------------------------------------------------------===//
@@ -307,7 +342,8 @@ class Priv<string opcodestr, bits<7> funct7>
// Instructions
//===----------------------------------------------------------------------===//
-let hasSideEffects = 0, isReMaterializable = 1, mayLoad = 0, mayStore = 0 in {
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in
def LUI : RVInstU<OPC_LUI, (outs GPR:$rd), (ins uimm20_lui:$imm20),
"lui", "$rd, $imm20">;
@@ -321,7 +357,7 @@ def JAL : RVInstJ<OPC_JAL, (outs GPR:$rd), (ins simm21_lsb0_jal:$imm20),
let isCall = 1 in
def JALR : RVInstI<0b000, OPC_JALR, (outs GPR:$rd),
(ins GPR:$rs1, simm12:$imm12),
- "jalr", "$rd, $rs1, $imm12">;
+ "jalr", "$rd, ${imm12}(${rs1})">;
} // hasSideEffects = 0, mayLoad = 0, mayStore = 0
def BEQ : BranchCC_rri<0b000, "beq">;
@@ -343,13 +379,17 @@ def SW : Store_rri<0b010, "sw">;
// ADDI isn't always rematerializable, but isReMaterializable will be used as
// a hint which is verified in isReallyTriviallyReMaterializable.
-let isReMaterializable = 1 in
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in
def ADDI : ALU_ri<0b000, "addi">;
def SLTI : ALU_ri<0b010, "slti">;
def SLTIU : ALU_ri<0b011, "sltiu">;
+
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
def XORI : ALU_ri<0b100, "xori">;
def ORI : ALU_ri<0b110, "ori">;
+}
+
def ANDI : ALU_ri<0b111, "andi">;
def SLLI : Shift_ri<0, 0b001, "slli">;
@@ -485,12 +525,6 @@ def SFENCE_VMA : RVInstR<0b0001001, 0b000, OPC_SYSTEM, (outs),
// Assembler Pseudo Instructions (User-Level ISA, Version 2.2, Chapter 20)
//===----------------------------------------------------------------------===//
-// TODO la
-// TODO lb lh lw
-// TODO RV64I: ld
-// TODO sb sh sw
-// TODO RV64I: sd
-
def : InstAlias<"nop", (ADDI X0, X0, 0)>;
// Note that the size is 32 because up to 8 32-bit instructions are needed to
@@ -502,6 +536,22 @@ let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Size = 32,
def PseudoLI : Pseudo<(outs GPR:$rd), (ins ixlenimm_li:$imm), [],
"li", "$rd, $imm">;
+def PseudoLB : PseudoLoad<"lb">;
+def PseudoLBU : PseudoLoad<"lbu">;
+def PseudoLH : PseudoLoad<"lh">;
+def PseudoLHU : PseudoLoad<"lhu">;
+def PseudoLW : PseudoLoad<"lw">;
+
+def PseudoSB : PseudoStore<"sb">;
+def PseudoSH : PseudoStore<"sh">;
+def PseudoSW : PseudoStore<"sw">;
+
+let Predicates = [IsRV64] in {
+def PseudoLWU : PseudoLoad<"lwu">;
+def PseudoLD : PseudoLoad<"ld">;
+def PseudoSD : PseudoStore<"sd">;
+} // Predicates = [IsRV64]
+
def : InstAlias<"mv $rd, $rs", (ADDI GPR:$rd, GPR:$rs, 0)>;
def : InstAlias<"not $rd, $rs", (XORI GPR:$rd, GPR:$rs, -1)>;
def : InstAlias<"neg $rd, $rs", (SUB GPR:$rd, X0, GPR:$rs)>;
@@ -547,27 +597,36 @@ def : InstAlias<"bgtu $rs, $rt, $offset",
def : InstAlias<"bleu $rs, $rt, $offset",
(BGEU GPR:$rt, GPR:$rs, simm13_lsb0:$offset), 0>;
-// "ret" has more weight since "ret" and "jr" alias the same "jalr" instruction.
-def : InstAlias<"j $offset", (JAL X0, simm21_lsb0_jal:$offset)>;
-def : InstAlias<"jal $offset", (JAL X1, simm21_lsb0_jal:$offset)>;
-def : InstAlias<"jr $rs", (JALR X0, GPR:$rs, 0)>;
-def : InstAlias<"jalr $rs", (JALR X1, GPR:$rs, 0)>;
-def : InstAlias<"ret", (JALR X0, X1, 0), 2>;
+def : InstAlias<"j $offset", (JAL X0, simm21_lsb0_jal:$offset)>;
+def : InstAlias<"jal $offset", (JAL X1, simm21_lsb0_jal:$offset)>;
+
+// Non-zero offset aliases of "jalr" are the lowest weight, followed by the
+// two-register form, then the one-register forms and finally "ret".
+def : InstAlias<"jr $rs", (JALR X0, GPR:$rs, 0), 3>;
+def : InstAlias<"jr ${offset}(${rs})", (JALR X0, GPR:$rs, simm12:$offset)>;
+def : InstAlias<"jalr $rs", (JALR X1, GPR:$rs, 0), 3>;
+def : InstAlias<"jalr ${offset}(${rs})", (JALR X1, GPR:$rs, simm12:$offset)>;
+def : InstAlias<"jalr $rd, $rs", (JALR GPR:$rd, GPR:$rs, 0), 2>;
+def : InstAlias<"ret", (JALR X0, X1, 0), 4>;
+
+// Non-canonical forms for jump targets also accepted by the assembler.
+def : InstAlias<"jr $rs, $offset", (JALR X0, GPR:$rs, simm12:$offset), 0>;
+def : InstAlias<"jalr $rs, $offset", (JALR X1, GPR:$rs, simm12:$offset), 0>;
+def : InstAlias<"jalr $rd, $rs, $offset", (JALR GPR:$rd, GPR:$rs, simm12:$offset), 0>;
+
// TODO call
// TODO tail
def : InstAlias<"fence", (FENCE 0xF, 0xF)>; // 0xF == iorw
-// CSR Addresses: 0xC00 == cycle, 0xC01 == time, 0xC02 == instret
-// 0xC80 == cycleh, 0xC81 == timeh, 0xC82 == instreth
-def : InstAlias<"rdinstret $rd", (CSRRS GPR:$rd, 0xC02, X0)>;
-def : InstAlias<"rdcycle $rd", (CSRRS GPR:$rd, 0xC00, X0)>;
-def : InstAlias<"rdtime $rd", (CSRRS GPR:$rd, 0xC01, X0)>;
+def : InstAlias<"rdinstret $rd", (CSRRS GPR:$rd, INSTRET.Encoding, X0)>;
+def : InstAlias<"rdcycle $rd", (CSRRS GPR:$rd, CYCLE.Encoding, X0)>;
+def : InstAlias<"rdtime $rd", (CSRRS GPR:$rd, TIME.Encoding, X0)>;
let Predicates = [IsRV32] in {
-def : InstAlias<"rdinstreth $rd", (CSRRS GPR:$rd, 0xC82, X0)>;
-def : InstAlias<"rdcycleh $rd", (CSRRS GPR:$rd, 0xC80, X0)>;
-def : InstAlias<"rdtimeh $rd", (CSRRS GPR:$rd, 0xC81, X0)>;
+def : InstAlias<"rdinstreth $rd", (CSRRS GPR:$rd, INSTRETH.Encoding, X0)>;
+def : InstAlias<"rdcycleh $rd", (CSRRS GPR:$rd, CYCLEH.Encoding, X0)>;
+def : InstAlias<"rdtimeh $rd", (CSRRS GPR:$rd, TIMEH.Encoding, X0)>;
} // Predicates = [IsRV32]
def : InstAlias<"csrr $rd, $csr", (CSRRS GPR:$rd, csr_sysreg:$csr, X0)>;
@@ -593,6 +652,24 @@ def : InstAlias<"sfence.vma", (SFENCE_VMA X0, X0)>;
def : InstAlias<"sfence.vma $rs", (SFENCE_VMA GPR:$rs, X0)>;
let EmitPriority = 0 in {
+def : InstAlias<"lb $rd, (${rs1})",
+ (LB GPR:$rd, GPR:$rs1, 0)>;
+def : InstAlias<"lh $rd, (${rs1})",
+ (LH GPR:$rd, GPR:$rs1, 0)>;
+def : InstAlias<"lw $rd, (${rs1})",
+ (LW GPR:$rd, GPR:$rs1, 0)>;
+def : InstAlias<"lbu $rd, (${rs1})",
+ (LBU GPR:$rd, GPR:$rs1, 0)>;
+def : InstAlias<"lhu $rd, (${rs1})",
+ (LHU GPR:$rd, GPR:$rs1, 0)>;
+
+def : InstAlias<"sb $rs2, (${rs1})",
+ (SB GPR:$rs2, GPR:$rs1, 0)>;
+def : InstAlias<"sh $rs2, (${rs1})",
+ (SH GPR:$rs2, GPR:$rs1, 0)>;
+def : InstAlias<"sw $rs2, (${rs1})",
+ (SW GPR:$rs2, GPR:$rs1, 0)>;
+
def : InstAlias<"add $rd, $rs1, $imm12",
(ADDI GPR:$rd, GPR:$rs1, simm12:$imm12)>;
def : InstAlias<"and $rd, $rs1, $imm12",
@@ -608,6 +685,13 @@ def : InstAlias<"srl $rd, $rs1, $shamt",
def : InstAlias<"sra $rd, $rs1, $shamt",
(SRAI GPR:$rd, GPR:$rs1, uimmlog2xlen:$shamt)>;
let Predicates = [IsRV64] in {
+def : InstAlias<"lwu $rd, (${rs1})",
+ (LWU GPR:$rd, GPR:$rs1, 0)>;
+def : InstAlias<"ld $rd, (${rs1})",
+ (LD GPR:$rd, GPR:$rs1, 0)>;
+def : InstAlias<"sd $rs2, (${rs1})",
+ (SD GPR:$rs2, GPR:$rs1, 0)>;
+
def : InstAlias<"addw $rd, $rs1, $imm12",
(ADDIW GPR:$rd, GPR:$rs1, simm12:$imm12)>;
def : InstAlias<"sllw $rd, $rs1, $shamt",
@@ -663,21 +747,9 @@ def sexti32 : PatFrags<(ops node:$src),
def assertzexti32 : PatFrag<(ops node:$src), (assertzext node:$src), [{
return cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32;
}]>;
-def assertzexti5 : PatFrag<(ops node:$src), (assertzext node:$src), [{
- return cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits() <= 5;
-}]>;
def zexti32 : PatFrags<(ops node:$src),
[(and node:$src, 0xffffffff),
(assertzexti32 node:$src)]>;
-// Defines a legal mask for (assertzexti5 (and src, mask)) to be combinable
-// with a shiftw operation. The mask mustn't modify the lower 5 bits or the
-// upper 32 bits.
-def shiftwamt_mask : ImmLeaf<XLenVT, [{
- return countTrailingOnes<uint64_t>(Imm) >= 5 && isUInt<32>(Imm);
-}]>;
-def shiftwamt : PatFrags<(ops node:$src),
- [(assertzexti5 (and node:$src, shiftwamt_mask)),
- (assertzexti5 node:$src)]>;
/// Immediates
@@ -714,6 +786,15 @@ def : PatGprGpr<shiftop<shl>, SLL>;
def : PatGprGpr<shiftop<srl>, SRL>;
def : PatGprGpr<shiftop<sra>, SRA>;
+// This is a special case of the ADD instruction used to facilitate the use of a
+// fourth operand to emit a relocation on a symbol relating to this instruction.
+// The relocation does not affect any bits of the instruction itself but is used
+// as a hint to the linker.
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCodeGenOnly = 0 in
+def PseudoAddTPRel : Pseudo<(outs GPR:$rd),
+ (ins GPR:$rs1, GPR:$rs2, tprel_add_symbol:$src), [],
+ "add", "$rd, $rs1, $rs2, $src">;
+
/// FrameIndex calculations
def : Pat<(add (i32 AddrFI:$Rs), simm12:$imm12),
@@ -732,8 +813,12 @@ def : PatGprSimm12<setult, SLTIU>;
// handled by a RISC-V instruction.
def : Pat<(seteq GPR:$rs1, 0), (SLTIU GPR:$rs1, 1)>;
def : Pat<(seteq GPR:$rs1, GPR:$rs2), (SLTIU (XOR GPR:$rs1, GPR:$rs2), 1)>;
+def : Pat<(seteq GPR:$rs1, simm12:$imm12),
+ (SLTIU (XORI GPR:$rs1, simm12:$imm12), 1)>;
def : Pat<(setne GPR:$rs1, 0), (SLTU X0, GPR:$rs1)>;
def : Pat<(setne GPR:$rs1, GPR:$rs2), (SLTU X0, (XOR GPR:$rs1, GPR:$rs2))>;
+def : Pat<(setne GPR:$rs1, simm12:$imm12),
+ (SLTU X0, (XORI GPR:$rs1, simm12:$imm12))>;
def : Pat<(setugt GPR:$rs1, GPR:$rs2), (SLTU GPR:$rs2, GPR:$rs1)>;
def : Pat<(setuge GPR:$rs1, GPR:$rs2), (XORI (SLTU GPR:$rs1, GPR:$rs2), 1)>;
def : Pat<(setule GPR:$rs1, GPR:$rs2), (XORI (SLTU GPR:$rs2, GPR:$rs1), 1)>;
@@ -746,7 +831,7 @@ class SelectCC_rrirr<RegisterClass valty, RegisterClass cmpty>
: Pseudo<(outs valty:$dst),
(ins cmpty:$lhs, cmpty:$rhs, ixlenimm:$imm,
valty:$truev, valty:$falsev),
- [(set valty:$dst, (SelectCC cmpty:$lhs, cmpty:$rhs,
+ [(set valty:$dst, (riscv_selectcc cmpty:$lhs, cmpty:$rhs,
(XLenVT imm:$imm), valty:$truev, valty:$falsev))]>;
def Select_GPR_Using_CC_GPR : SelectCC_rrirr<GPR, GPR>;
@@ -794,6 +879,17 @@ def : Pat<(brind GPR:$rs1), (PseudoBRIND GPR:$rs1, 0)>;
def : Pat<(brind (add GPR:$rs1, simm12:$imm12)),
(PseudoBRIND GPR:$rs1, simm12:$imm12)>;
+// PsuedoCALLReg is a generic pseudo instruction for calls which will eventually
+// expand to auipc and jalr while encoding, with any given register used as the
+// destination.
+// Define AsmString to print "call" when compile with -S flag.
+// Define isCodeGenOnly = 0 to support parsing assembly "call" instruction.
+let isCall = 1, isBarrier = 1, isCodeGenOnly = 0, hasSideEffects = 0,
+ mayStore = 0, mayLoad = 0 in
+def PseudoCALLReg : Pseudo<(outs GPR:$rd), (ins call_symbol:$func), []> {
+ let AsmString = "call\t$rd, $func";
+}
+
// PseudoCALL is a pseudo instruction which will eventually expand to auipc
// and jalr while encoding. This is desirable, as an auipc+jalr pair with
// R_RISCV_CALL and R_RISCV_RELAX relocations can be be relaxed by the linker
@@ -801,23 +897,24 @@ def : Pat<(brind (add GPR:$rs1, simm12:$imm12)),
// Define AsmString to print "call" when compile with -S flag.
// Define isCodeGenOnly = 0 to support parsing assembly "call" instruction.
let isCall = 1, Defs = [X1], isCodeGenOnly = 0 in
-def PseudoCALL : Pseudo<(outs), (ins bare_symbol:$func),
- [(Call tglobaladdr:$func)]> {
+def PseudoCALL : Pseudo<(outs), (ins call_symbol:$func), []> {
let AsmString = "call\t$func";
}
-def : Pat<(Call texternalsym:$func), (PseudoCALL texternalsym:$func)>;
+def : Pat<(riscv_call tglobaladdr:$func), (PseudoCALL tglobaladdr:$func)>;
+def : Pat<(riscv_call texternalsym:$func), (PseudoCALL texternalsym:$func)>;
-def : Pat<(URetFlag), (URET X0, X0)>;
-def : Pat<(SRetFlag), (SRET X0, X0)>;
-def : Pat<(MRetFlag), (MRET X0, X0)>;
+def : Pat<(riscv_uret_flag), (URET X0, X0)>;
+def : Pat<(riscv_sret_flag), (SRET X0, X0)>;
+def : Pat<(riscv_mret_flag), (MRET X0, X0)>;
let isCall = 1, Defs = [X1] in
-def PseudoCALLIndirect : Pseudo<(outs), (ins GPR:$rs1), [(Call GPR:$rs1)]>,
+def PseudoCALLIndirect : Pseudo<(outs), (ins GPR:$rs1),
+ [(riscv_call GPR:$rs1)]>,
PseudoInstExpansion<(JALR X1, GPR:$rs1, 0)>;
let isBarrier = 1, isReturn = 1, isTerminator = 1 in
-def PseudoRET : Pseudo<(outs), (ins), [(RetFlag)]>,
+def PseudoRET : Pseudo<(outs), (ins), [(riscv_ret_flag)]>,
PseudoInstExpansion<(JALR X0, X1, 0)>;
// PseudoTAIL is a pseudo instruction similar to PseudoCALL and will eventually
@@ -825,17 +922,18 @@ def PseudoRET : Pseudo<(outs), (ins), [(RetFlag)]>,
// Define AsmString to print "tail" when compile with -S flag.
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [X2],
isCodeGenOnly = 0 in
-def PseudoTAIL : Pseudo<(outs), (ins bare_symbol:$dst), []> {
+def PseudoTAIL : Pseudo<(outs), (ins call_symbol:$dst), []> {
let AsmString = "tail\t$dst";
}
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [X2] in
-def PseudoTAILIndirect : Pseudo<(outs), (ins GPRTC:$rs1), [(Tail GPRTC:$rs1)]>,
+def PseudoTAILIndirect : Pseudo<(outs), (ins GPRTC:$rs1),
+ [(riscv_tail GPRTC:$rs1)]>,
PseudoInstExpansion<(JALR X0, GPR:$rs1, 0)>;
-def : Pat<(Tail (iPTR tglobaladdr:$dst)),
+def : Pat<(riscv_tail (iPTR tglobaladdr:$dst)),
(PseudoTAIL texternalsym:$dst)>;
-def : Pat<(Tail (iPTR texternalsym:$dst)),
+def : Pat<(riscv_tail (iPTR texternalsym:$dst)),
(PseudoTAIL texternalsym:$dst)>;
let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCodeGenOnly = 0,
@@ -843,6 +941,21 @@ let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCodeGenOnly = 0,
def PseudoLLA : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [],
"lla", "$dst, $src">;
+let hasSideEffects = 0, mayLoad = 1, mayStore = 0, isCodeGenOnly = 0,
+ isAsmParserOnly = 1 in
+def PseudoLA : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [],
+ "la", "$dst, $src">;
+
+let hasSideEffects = 0, mayLoad = 1, mayStore = 0, isCodeGenOnly = 0,
+ isAsmParserOnly = 1 in
+def PseudoLA_TLS_IE : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [],
+ "la.tls.ie", "$dst, $src">;
+
+let hasSideEffects = 0, mayLoad = 1, mayStore = 0, isCodeGenOnly = 0,
+ isAsmParserOnly = 1 in
+def PseudoLA_TLS_GD : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [],
+ "la.tls.gd", "$dst, $src">;
+
/// Loads
multiclass LdPat<PatFrag LoadOp, RVInst Inst> {
@@ -906,9 +1019,9 @@ def : Pat<(atomic_fence (XLenVT 7), (imm)), (FENCE 0b11, 0b11)>;
// Pessimistically assume the stack pointer will be clobbered
let Defs = [X2], Uses = [X2] in {
def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
- [(CallSeqStart timm:$amt1, timm:$amt2)]>;
+ [(callseq_start timm:$amt1, timm:$amt2)]>;
def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
- [(CallSeqEnd timm:$amt1, timm:$amt2)]>;
+ [(callseq_end timm:$amt1, timm:$amt2)]>;
} // Defs = [X2], Uses = [X2]
/// RV64 patterns
@@ -935,28 +1048,9 @@ def : Pat<(sext_inreg (shl GPR:$rs1, uimm5:$shamt), i32),
def : Pat<(sra (sext_inreg GPR:$rs1, i32), uimm5:$shamt),
(SRAIW GPR:$rs1, uimm5:$shamt)>;
-// For variable-length shifts, we rely on assertzexti5 being inserted during
-// lowering (see RISCVTargetLowering::PerformDAGCombine). This enables us to
-// guarantee that selecting a 32-bit variable shift is legal (as the variable
-// shift is known to be <= 32). We must also be careful not to create
-// semantically incorrect patterns. For instance, selecting SRLW for
-// (srl (zexti32 GPR:$rs1), (shiftwamt GPR:$rs2)),
-// is not guaranteed to be safe, as we don't know whether the upper 32-bits of
-// the result are used or not (in the case where rs2=0, this is a
-// sign-extension operation).
-
-def : Pat<(sext_inreg (shl GPR:$rs1, (shiftwamt GPR:$rs2)), i32),
- (SLLW GPR:$rs1, GPR:$rs2)>;
-def : Pat<(zexti32 (shl GPR:$rs1, (shiftwamt GPR:$rs2))),
- (SRLI (SLLI (SLLW GPR:$rs1, GPR:$rs2), 32), 32)>;
-
-def : Pat<(sext_inreg (srl (zexti32 GPR:$rs1), (shiftwamt GPR:$rs2)), i32),
- (SRLW GPR:$rs1, GPR:$rs2)>;
-def : Pat<(zexti32 (srl (zexti32 GPR:$rs1), (shiftwamt GPR:$rs2))),
- (SRLI (SLLI (SRLW GPR:$rs1, GPR:$rs2), 32), 32)>;
-
-def : Pat<(sra (sexti32 GPR:$rs1), (shiftwamt GPR:$rs2)),
- (SRAW GPR:$rs1, GPR:$rs2)>;
+def : PatGprGpr<riscv_sllw, SLLW>;
+def : PatGprGpr<riscv_srlw, SRLW>;
+def : PatGprGpr<riscv_sraw, SRAW>;
/// Loads
@@ -971,6 +1065,16 @@ defm : StPat<truncstorei32, SW, GPR>;
defm : StPat<store, SD, GPR>;
} // Predicates = [IsRV64]
+/// readcyclecounter
+// On RV64, we can directly read the 64-bit "cycle" CSR.
+let Predicates = [IsRV64] in
+def : Pat<(readcyclecounter), (CSRRS CYCLE.Encoding, X0)>;
+// On RV32, ReadCycleWide will be expanded to the suggested loop reading both
+// halves of the 64-bit "cycle" CSR.
+let Predicates = [IsRV32], usesCustomInserter = 1, hasSideEffects = 0,
+mayLoad = 0, mayStore = 0, hasNoSchedulingInfo = 1 in
+def ReadCycleWide : Pseudo<(outs GPR:$lo, GPR:$hi), (ins), [], "", "">;
+
//===----------------------------------------------------------------------===//
// Standard extensions
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/RISCV/RISCVInstrInfoA.td b/lib/Target/RISCV/RISCVInstrInfoA.td
index 9cb1d2f0b627..b768c9347b38 100644
--- a/lib/Target/RISCV/RISCVInstrInfoA.td
+++ b/lib/Target/RISCV/RISCVInstrInfoA.td
@@ -1,9 +1,8 @@
//===-- RISCVInstrInfoA.td - RISC-V 'A' instructions -------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -85,7 +84,7 @@ defm AMOMIN_D : AMO_rr_aq_rl<0b10000, 0b011, "amomin.d">;
defm AMOMAX_D : AMO_rr_aq_rl<0b10100, 0b011, "amomax.d">;
defm AMOMINU_D : AMO_rr_aq_rl<0b11000, 0b011, "amominu.d">;
defm AMOMAXU_D : AMO_rr_aq_rl<0b11100, 0b011, "amomaxu.d">;
-} // Predicates = [HasStedExtA, IsRV64]
+} // Predicates = [HasStdExtA, IsRV64]
//===----------------------------------------------------------------------===//
// Pseudo-instructions and codegen patterns
@@ -235,7 +234,7 @@ def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umin_i32,
class PseudoCmpXchg
: Pseudo<(outs GPR:$res, GPR:$scratch),
- (ins GPR:$addr, GPR:$cmpval, GPR:$newval, i32imm:$ordering), []> {
+ (ins GPR:$addr, GPR:$cmpval, GPR:$newval, ixlenimm:$ordering), []> {
let Constraints = "@earlyclobber $res,@earlyclobber $scratch";
let mayLoad = 1;
let mayStore = 1;
@@ -263,7 +262,7 @@ defm : PseudoCmpXchgPat<"atomic_cmp_swap_32", PseudoCmpXchg32>;
def PseudoMaskedCmpXchg32
: Pseudo<(outs GPR:$res, GPR:$scratch),
(ins GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask,
- i32imm:$ordering), []> {
+ ixlenimm:$ordering), []> {
let Constraints = "@earlyclobber $res,@earlyclobber $scratch";
let mayLoad = 1;
let mayStore = 1;
@@ -276,3 +275,79 @@ def : Pat<(int_riscv_masked_cmpxchg_i32
GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, imm:$ordering)>;
} // Predicates = [HasStdExtA]
+
+let Predicates = [HasStdExtA, IsRV64] in {
+
+/// 64-bit atomic loads and stores
+
+// Fences will be inserted for atomic load/stores according to the logic in
+// RISCVTargetLowering::{emitLeadingFence,emitTrailingFence}.
+defm : LdPat<atomic_load_64, LD>;
+defm : AtomicStPat<atomic_store_64, SD, GPR>;
+
+defm : AMOPat<"atomic_swap_64", "AMOSWAP_D">;
+defm : AMOPat<"atomic_load_add_64", "AMOADD_D">;
+defm : AMOPat<"atomic_load_and_64", "AMOAND_D">;
+defm : AMOPat<"atomic_load_or_64", "AMOOR_D">;
+defm : AMOPat<"atomic_load_xor_64", "AMOXOR_D">;
+defm : AMOPat<"atomic_load_max_64", "AMOMAX_D">;
+defm : AMOPat<"atomic_load_min_64", "AMOMIN_D">;
+defm : AMOPat<"atomic_load_umax_64", "AMOMAXU_D">;
+defm : AMOPat<"atomic_load_umin_64", "AMOMINU_D">;
+
+/// 64-bit AMOs
+
+def : Pat<(atomic_load_sub_64_monotonic GPR:$addr, GPR:$incr),
+ (AMOADD_D GPR:$addr, (SUB X0, GPR:$incr))>;
+def : Pat<(atomic_load_sub_64_acquire GPR:$addr, GPR:$incr),
+ (AMOADD_D_AQ GPR:$addr, (SUB X0, GPR:$incr))>;
+def : Pat<(atomic_load_sub_64_release GPR:$addr, GPR:$incr),
+ (AMOADD_D_RL GPR:$addr, (SUB X0, GPR:$incr))>;
+def : Pat<(atomic_load_sub_64_acq_rel GPR:$addr, GPR:$incr),
+ (AMOADD_D_AQ_RL GPR:$addr, (SUB X0, GPR:$incr))>;
+def : Pat<(atomic_load_sub_64_seq_cst GPR:$addr, GPR:$incr),
+ (AMOADD_D_AQ_RL GPR:$addr, (SUB X0, GPR:$incr))>;
+
+/// 64-bit pseudo AMOs
+
+def PseudoAtomicLoadNand64 : PseudoAMO;
+// Ordering constants must be kept in sync with the AtomicOrdering enum in
+// AtomicOrdering.h.
+def : Pat<(atomic_load_nand_64_monotonic GPR:$addr, GPR:$incr),
+ (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 2)>;
+def : Pat<(atomic_load_nand_64_acquire GPR:$addr, GPR:$incr),
+ (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 4)>;
+def : Pat<(atomic_load_nand_64_release GPR:$addr, GPR:$incr),
+ (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 5)>;
+def : Pat<(atomic_load_nand_64_acq_rel GPR:$addr, GPR:$incr),
+ (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 6)>;
+def : Pat<(atomic_load_nand_64_seq_cst GPR:$addr, GPR:$incr),
+ (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 7)>;
+
+def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_xchg_i64,
+ PseudoMaskedAtomicSwap32>;
+def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_add_i64,
+ PseudoMaskedAtomicLoadAdd32>;
+def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_sub_i64,
+ PseudoMaskedAtomicLoadSub32>;
+def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_nand_i64,
+ PseudoMaskedAtomicLoadNand32>;
+def : PseudoMaskedAMOMinMaxPat<int_riscv_masked_atomicrmw_max_i64,
+ PseudoMaskedAtomicLoadMax32>;
+def : PseudoMaskedAMOMinMaxPat<int_riscv_masked_atomicrmw_min_i64,
+ PseudoMaskedAtomicLoadMin32>;
+def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umax_i64,
+ PseudoMaskedAtomicLoadUMax32>;
+def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umin_i64,
+ PseudoMaskedAtomicLoadUMin32>;
+
+/// 64-bit compare and exchange
+
+def PseudoCmpXchg64 : PseudoCmpXchg;
+defm : PseudoCmpXchgPat<"atomic_cmp_swap_64", PseudoCmpXchg64>;
+
+def : Pat<(int_riscv_masked_cmpxchg_i64
+ GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, imm:$ordering),
+ (PseudoMaskedCmpXchg32
+ GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, imm:$ordering)>;
+} // Predicates = [HasStdExtA, IsRV64]
diff --git a/lib/Target/RISCV/RISCVInstrInfoC.td b/lib/Target/RISCV/RISCVInstrInfoC.td
index ad68b5a7dc97..94477341eea7 100644
--- a/lib/Target/RISCV/RISCVInstrInfoC.td
+++ b/lib/Target/RISCV/RISCVInstrInfoC.td
@@ -1,9 +1,8 @@
//===- RISCVInstrInfoC.td - Compressed RISCV instructions -*- tblgen-*-----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -524,6 +523,56 @@ def C_UNIMP : RVInst16<(outs), (ins), "c.unimp", "", [], InstFormatOther> {
} // Predicates = [HasStdExtC]
//===----------------------------------------------------------------------===//
+// Assembler Pseudo Instructions
+//===----------------------------------------------------------------------===//
+
+let EmitPriority = 0 in {
+let Predicates = [HasStdExtC, HasStdExtD] in
+def : InstAlias<"c.fld $rd, (${rs1})", (C_FLD FPR64C:$rd, GPRC:$rs1, 0)>;
+
+def : InstAlias<"c.lw $rd, (${rs1})", (C_LW GPRC:$rd, GPRC:$rs1, 0)>;
+
+let Predicates = [HasStdExtC, HasStdExtF, IsRV32] in
+def : InstAlias<"c.flw $rd, (${rs1})", (C_FLW FPR32C:$rd, GPRC:$rs1, 0)>;
+
+let Predicates = [HasStdExtC, IsRV64] in
+def : InstAlias<"c.ld $rd, (${rs1})", (C_LD GPRC:$rd, GPRC:$rs1, 0)>;
+
+let Predicates = [HasStdExtC, HasStdExtD] in
+def : InstAlias<"c.fsd $rs2, (${rs1})", (C_FSD FPR64C:$rs2, GPRC:$rs1, 0)>;
+
+def : InstAlias<"c.sw $rs2, (${rs1})", (C_SW GPRC:$rs2, GPRC:$rs1, 0)>;
+
+let Predicates = [HasStdExtC, HasStdExtF, IsRV32] in
+def : InstAlias<"c.fsw $rs2, (${rs1})", (C_FSW FPR32C:$rs2, GPRC:$rs1, 0)>;
+
+let Predicates = [HasStdExtC, IsRV64] in
+def : InstAlias<"c.sd $rs2, (${rs1})", (C_SD GPRC:$rs2, GPRC:$rs1, 0)>;
+
+let Predicates = [HasStdExtC, HasStdExtD] in
+def : InstAlias<"c.fldsp $rd, (${rs1})", (C_FLDSP FPR64C:$rd, SP:$rs1, 0)>;
+
+def : InstAlias<"c.lwsp $rd, (${rs1})", (C_LWSP GPRC:$rd, SP:$rs1, 0)>;
+
+let Predicates = [HasStdExtC, HasStdExtF, IsRV32] in
+def : InstAlias<"c.flwsp $rd, (${rs1})", (C_FLWSP FPR32C:$rd, SP:$rs1, 0)>;
+
+let Predicates = [HasStdExtC, IsRV64] in
+def : InstAlias<"c.ldsp $rd, (${rs1})", (C_LDSP GPRC:$rd, SP:$rs1, 0)>;
+
+let Predicates = [HasStdExtC, HasStdExtD] in
+def : InstAlias<"c.fsdsp $rs2, (${rs1})", (C_FSDSP FPR64C:$rs2, SP:$rs1, 0)>;
+
+def : InstAlias<"c.swsp $rs2, (${rs1})", (C_SWSP GPRC:$rs2, SP:$rs1, 0)>;
+
+let Predicates = [HasStdExtC, HasStdExtF, IsRV32] in
+def : InstAlias<"c.fswsp $rs2, (${rs1})", (C_FSWSP FPR32C:$rs2, SP:$rs1, 0)>;
+
+let Predicates = [HasStdExtC, IsRV64] in
+def : InstAlias<"c.sdsp $rs2, (${rs1})", (C_SDSP GPRC:$rs2, SP:$rs1, 0)>;
+}
+
+//===----------------------------------------------------------------------===//
// Compress Instruction tablegen backend.
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/RISCV/RISCVInstrInfoD.td b/lib/Target/RISCV/RISCVInstrInfoD.td
index 9f1cd50de595..fe38c4ff02d3 100644
--- a/lib/Target/RISCV/RISCVInstrInfoD.td
+++ b/lib/Target/RISCV/RISCVInstrInfoD.td
@@ -1,9 +1,8 @@
//===-- RISCVInstrInfoD.td - RISC-V 'D' instructions -------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -179,8 +178,8 @@ def FMV_D_X : FPUnaryOp_r<0b1111001, 0b000, FPR64, GPR, "fmv.d.x"> {
//===----------------------------------------------------------------------===//
let Predicates = [HasStdExtD] in {
-// TODO fld
-// TODO fsd
+def : InstAlias<"fld $rd, (${rs1})", (FLD FPR64:$rd, GPR:$rs1, 0), 0>;
+def : InstAlias<"fsd $rs2, (${rs1})", (FSD FPR64:$rs2, GPR:$rs1, 0), 0>;
def : InstAlias<"fmv.d $rd, $rs", (FSGNJ_D FPR64:$rd, FPR64:$rs, FPR64:$rs)>;
def : InstAlias<"fabs.d $rd, $rs", (FSGNJX_D FPR64:$rd, FPR64:$rs, FPR64:$rs)>;
@@ -192,6 +191,9 @@ def : InstAlias<"fgt.d $rd, $rs, $rt",
(FLT_D GPR:$rd, FPR64:$rt, FPR64:$rs), 0>;
def : InstAlias<"fge.d $rd, $rs, $rt",
(FLE_D GPR:$rd, FPR64:$rt, FPR64:$rs), 0>;
+
+def PseudoFLD : PseudoFloatLoad<"fld", FPR64>;
+def PseudoFSD : PseudoStore<"fsd", FPR64>;
} // Predicates = [HasStdExtD]
//===----------------------------------------------------------------------===//
@@ -268,6 +270,10 @@ def : PatFpr64Fpr64<setole, FLE_D>;
// handled by a RISC-V instruction and aren't expanded in the SelectionDAG
// Legalizer.
+def : Pat<(seto FPR64:$rs1, FPR64:$rs2),
+ (AND (FEQ_D FPR64:$rs1, FPR64:$rs1),
+ (FEQ_D FPR64:$rs2, FPR64:$rs2))>;
+
def : Pat<(setuo FPR64:$rs1, FPR64:$rs2),
(SLTIU (AND (FEQ_D FPR64:$rs1, FPR64:$rs1),
(FEQ_D FPR64:$rs2, FPR64:$rs2)),
@@ -308,3 +314,26 @@ def : Pat<(fp_to_uint FPR64:$rs1), (FCVT_WU_D FPR64:$rs1, 0b001)>;
def : Pat<(sint_to_fp GPR:$rs1), (FCVT_D_W GPR:$rs1)>;
def : Pat<(uint_to_fp GPR:$rs1), (FCVT_D_WU GPR:$rs1)>;
} // Predicates = [HasStdExtD, IsRV32]
+
+let Predicates = [HasStdExtD, IsRV64] in {
+def : Pat<(bitconvert GPR:$rs1), (FMV_D_X GPR:$rs1)>;
+def : Pat<(bitconvert FPR64:$rs1), (FMV_X_D FPR64:$rs1)>;
+
+// FP->[u]int32 is mostly handled by the FP->[u]int64 patterns. This is safe
+// because fpto[u|s]i produce poison if the value can't fit into the target.
+// We match the single case below because fcvt.wu.d sign-extends its result so
+// is cheaper than fcvt.lu.d+sext.w.
+def : Pat<(sext_inreg (zexti32 (fp_to_uint FPR64:$rs1)), i32),
+ (FCVT_WU_D $rs1, 0b001)>;
+
+// [u]int32->fp
+def : Pat<(sint_to_fp (sext_inreg GPR:$rs1, i32)), (FCVT_D_W $rs1)>;
+def : Pat<(uint_to_fp (zexti32 GPR:$rs1)), (FCVT_D_WU $rs1)>;
+
+def : Pat<(fp_to_sint FPR64:$rs1), (FCVT_L_D FPR64:$rs1, 0b001)>;
+def : Pat<(fp_to_uint FPR64:$rs1), (FCVT_LU_D FPR64:$rs1, 0b001)>;
+
+// [u]int64->fp. Match GCC and default to using dynamic rounding mode.
+def : Pat<(sint_to_fp GPR:$rs1), (FCVT_D_L GPR:$rs1, 0b111)>;
+def : Pat<(uint_to_fp GPR:$rs1), (FCVT_D_LU GPR:$rs1, 0b111)>;
+} // Predicates = [HasStdExtD, IsRV64]
diff --git a/lib/Target/RISCV/RISCVInstrInfoF.td b/lib/Target/RISCV/RISCVInstrInfoF.td
index 03bdac45873d..032642942f2b 100644
--- a/lib/Target/RISCV/RISCVInstrInfoF.td
+++ b/lib/Target/RISCV/RISCVInstrInfoF.td
@@ -1,9 +1,8 @@
//===-- RISCVInstrInfoF.td - RISC-V 'F' instructions -------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -13,6 +12,20 @@
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
+// RISC-V specific DAG Nodes.
+//===----------------------------------------------------------------------===//
+
+def SDT_RISCVFMV_W_X_RV64
+ : SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisVT<1, i64>]>;
+def SDT_RISCVFMV_X_ANYEXTW_RV64
+ : SDTypeProfile<1, 1, [SDTCisVT<0, i64>, SDTCisVT<1, f32>]>;
+
+def riscv_fmv_w_x_rv64
+ : SDNode<"RISCVISD::FMV_W_X_RV64", SDT_RISCVFMV_W_X_RV64>;
+def riscv_fmv_x_anyextw_rv64
+ : SDNode<"RISCVISD::FMV_X_ANYEXTW_RV64", SDT_RISCVFMV_X_ANYEXTW_RV64>;
+
+//===----------------------------------------------------------------------===//
// Operand and SDNode transformation definitions.
//===----------------------------------------------------------------------===//
@@ -193,8 +206,8 @@ def : FPUnaryOpDynFrmAlias<FCVT_S_LU, "fcvt.s.lu", FPR32, GPR>;
//===----------------------------------------------------------------------===//
let Predicates = [HasStdExtF] in {
-// TODO flw
-// TODO fsw
+def : InstAlias<"flw $rd, (${rs1})", (FLW FPR32:$rd, GPR:$rs1, 0), 0>;
+def : InstAlias<"fsw $rs2, (${rs1})", (FSW FPR32:$rs2, GPR:$rs1, 0), 0>;
def : InstAlias<"fmv.s $rd, $rs", (FSGNJ_S FPR32:$rd, FPR32:$rs, FPR32:$rs)>;
def : InstAlias<"fabs.s $rd, $rs", (FSGNJX_S FPR32:$rd, FPR32:$rs, FPR32:$rs)>;
@@ -209,28 +222,30 @@ def : InstAlias<"fge.s $rd, $rs, $rt",
// The following csr instructions actually alias instructions from the base ISA.
// However, it only makes sense to support them when the F extension is enabled.
-// CSR Addresses: 0x003 == fcsr, 0x002 == frm, 0x001 == fflags
// NOTE: "frcsr", "frrm", and "frflags" are more specialized version of "csrr".
-def : InstAlias<"frcsr $rd", (CSRRS GPR:$rd, 0x003, X0), 2>;
-def : InstAlias<"fscsr $rd, $rs", (CSRRW GPR:$rd, 0x003, GPR:$rs)>;
-def : InstAlias<"fscsr $rs", (CSRRW X0, 0x003, GPR:$rs), 2>;
-
-def : InstAlias<"frrm $rd", (CSRRS GPR:$rd, 0x002, X0), 2>;
-def : InstAlias<"fsrm $rd, $rs", (CSRRW GPR:$rd, 0x002, GPR:$rs)>;
-def : InstAlias<"fsrm $rs", (CSRRW X0, 0x002, GPR:$rs), 2>;
-def : InstAlias<"fsrmi $rd, $imm", (CSRRWI GPR:$rd, 0x002, uimm5:$imm)>;
-def : InstAlias<"fsrmi $imm", (CSRRWI X0, 0x002, uimm5:$imm), 2>;
-
-def : InstAlias<"frflags $rd", (CSRRS GPR:$rd, 0x001, X0), 2>;
-def : InstAlias<"fsflags $rd, $rs", (CSRRW GPR:$rd, 0x001, GPR:$rs)>;
-def : InstAlias<"fsflags $rs", (CSRRW X0, 0x001, GPR:$rs), 2>;
-def : InstAlias<"fsflagsi $rd, $imm", (CSRRWI GPR:$rd, 0x001, uimm5:$imm)>;
-def : InstAlias<"fsflagsi $imm", (CSRRWI X0, 0x001, uimm5:$imm), 2>;
+def : InstAlias<"frcsr $rd", (CSRRS GPR:$rd, FCSR.Encoding, X0), 2>;
+def : InstAlias<"fscsr $rd, $rs", (CSRRW GPR:$rd, FCSR.Encoding, GPR:$rs)>;
+def : InstAlias<"fscsr $rs", (CSRRW X0, FCSR.Encoding, GPR:$rs), 2>;
+
+def : InstAlias<"frrm $rd", (CSRRS GPR:$rd, FRM.Encoding, X0), 2>;
+def : InstAlias<"fsrm $rd, $rs", (CSRRW GPR:$rd, FRM.Encoding, GPR:$rs)>;
+def : InstAlias<"fsrm $rs", (CSRRW X0, FRM.Encoding, GPR:$rs), 2>;
+def : InstAlias<"fsrmi $rd, $imm", (CSRRWI GPR:$rd, FRM.Encoding, uimm5:$imm)>;
+def : InstAlias<"fsrmi $imm", (CSRRWI X0, FRM.Encoding, uimm5:$imm), 2>;
+
+def : InstAlias<"frflags $rd", (CSRRS GPR:$rd, FFLAGS.Encoding, X0), 2>;
+def : InstAlias<"fsflags $rd, $rs", (CSRRW GPR:$rd, FFLAGS.Encoding, GPR:$rs)>;
+def : InstAlias<"fsflags $rs", (CSRRW X0, FFLAGS.Encoding, GPR:$rs), 2>;
+def : InstAlias<"fsflagsi $rd, $imm", (CSRRWI GPR:$rd, FFLAGS.Encoding, uimm5:$imm)>;
+def : InstAlias<"fsflagsi $imm", (CSRRWI X0, FFLAGS.Encoding, uimm5:$imm), 2>;
// fmv.w.x and fmv.x.w were previously known as fmv.s.x and fmv.x.s. Both
// spellings should be supported by standard tools.
def : MnemonicAlias<"fmv.s.x", "fmv.w.x">;
def : MnemonicAlias<"fmv.x.s", "fmv.x.w">;
+
+def PseudoFLW : PseudoFloatLoad<"flw", FPR32>;
+def PseudoFSW : PseudoStore<"fsw", FPR32>;
} // Predicates = [HasStdExtF]
//===----------------------------------------------------------------------===//
@@ -308,6 +323,10 @@ def : PatFpr32Fpr32<setole, FLE_S>;
// handled by a RISC-V instruction and aren't expanded in the SelectionDAG
// Legalizer.
+def : Pat<(seto FPR32:$rs1, FPR32:$rs2),
+ (AND (FEQ_S FPR32:$rs1, FPR32:$rs1),
+ (FEQ_S FPR32:$rs2, FPR32:$rs2))>;
+
def : Pat<(setuo FPR32:$rs1, FPR32:$rs2),
(SLTIU (AND (FEQ_S FPR32:$rs1, FPR32:$rs1),
(FEQ_S FPR32:$rs2, FPR32:$rs2)),
@@ -334,3 +353,37 @@ def : Pat<(fp_to_uint FPR32:$rs1), (FCVT_WU_S $rs1, 0b001)>;
def : Pat<(sint_to_fp GPR:$rs1), (FCVT_S_W $rs1, 0b111)>;
def : Pat<(uint_to_fp GPR:$rs1), (FCVT_S_WU $rs1, 0b111)>;
} // Predicates = [HasStdExtF, IsRV32]
+
+let Predicates = [HasStdExtF, IsRV32] in {
+// FP->[u]int. Round-to-zero must be used
+def : Pat<(fp_to_sint FPR32:$rs1), (FCVT_W_S $rs1, 0b001)>;
+def : Pat<(fp_to_uint FPR32:$rs1), (FCVT_WU_S $rs1, 0b001)>;
+
+// [u]int->fp. Match GCC and default to using dynamic rounding mode.
+def : Pat<(sint_to_fp GPR:$rs1), (FCVT_S_W $rs1, 0b111)>;
+def : Pat<(uint_to_fp GPR:$rs1), (FCVT_S_WU $rs1, 0b111)>;
+} // Predicates = [HasStdExtF, IsRV32]
+
+let Predicates = [HasStdExtF, IsRV64] in {
+def : Pat<(riscv_fmv_w_x_rv64 GPR:$src), (FMV_W_X GPR:$src)>;
+def : Pat<(riscv_fmv_x_anyextw_rv64 FPR32:$src), (FMV_X_W FPR32:$src)>;
+def : Pat<(sexti32 (riscv_fmv_x_anyextw_rv64 FPR32:$src)),
+ (FMV_X_W FPR32:$src)>;
+
+// FP->[u]int32 is mostly handled by the FP->[u]int64 patterns. This is safe
+// because fpto[u|s]i produces poison if the value can't fit into the target.
+// We match the single case below because fcvt.wu.s sign-extends its result so
+// is cheaper than fcvt.lu.s+sext.w.
+def : Pat<(sext_inreg (assertzexti32 (fp_to_uint FPR32:$rs1)), i32),
+ (FCVT_WU_S $rs1, 0b001)>;
+
+// FP->[u]int64
+def : Pat<(fp_to_sint FPR32:$rs1), (FCVT_L_S $rs1, 0b001)>;
+def : Pat<(fp_to_uint FPR32:$rs1), (FCVT_LU_S $rs1, 0b001)>;
+
+// [u]int->fp. Match GCC and default to using dynamic rounding mode.
+def : Pat<(sint_to_fp (sext_inreg GPR:$rs1, i32)), (FCVT_S_W $rs1, 0b111)>;
+def : Pat<(uint_to_fp (zexti32 GPR:$rs1)), (FCVT_S_WU $rs1, 0b111)>;
+def : Pat<(sint_to_fp GPR:$rs1), (FCVT_S_L $rs1, 0b111)>;
+def : Pat<(uint_to_fp GPR:$rs1), (FCVT_S_LU $rs1, 0b111)>;
+} // Predicates = [HasStdExtF, IsRV64]
diff --git a/lib/Target/RISCV/RISCVInstrInfoM.td b/lib/Target/RISCV/RISCVInstrInfoM.td
index 05dd3311ad54..e75151ba99c7 100644
--- a/lib/Target/RISCV/RISCVInstrInfoM.td
+++ b/lib/Target/RISCV/RISCVInstrInfoM.td
@@ -1,9 +1,8 @@
//===-- RISCVInstrInfoM.td - RISC-V 'M' instructions -------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -13,6 +12,14 @@
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
+// RISC-V specific DAG Nodes.
+//===----------------------------------------------------------------------===//
+
+def riscv_divw : SDNode<"RISCVISD::DIVW", SDTIntBinOp>;
+def riscv_divuw : SDNode<"RISCVISD::DIVUW", SDTIntBinOp>;
+def riscv_remuw : SDNode<"RISCVISD::REMUW", SDTIntBinOp>;
+
+//===----------------------------------------------------------------------===//
// Instructions
//===----------------------------------------------------------------------===//
@@ -53,18 +60,19 @@ def : PatGprGpr<urem, REMU>;
let Predicates = [HasStdExtM, IsRV64] in {
def : Pat<(sext_inreg (mul GPR:$rs1, GPR:$rs2), i32),
(MULW GPR:$rs1, GPR:$rs2)>;
-def : Pat<(sext_inreg (sdiv (sexti32 GPR:$rs1),
- (sexti32 GPR:$rs2)), i32),
- (DIVW GPR:$rs1, GPR:$rs2)>;
-def : Pat<(zexti32 (sdiv (sexti32 GPR:$rs1),
- (sexti32 GPR:$rs2))),
- (SRLI (SLLI (DIVW GPR:$rs1, GPR:$rs2), 32), 32)>;
-def : Pat<(sext_inreg (udiv (zexti32 GPR:$rs1), (zexti32 GPR:$rs2)), i32),
- (DIVUW GPR:$rs1, GPR:$rs2)>;
-// It's cheaper to perform a divuw and zero-extend the result than to
-// zero-extend both inputs to a udiv.
-def : Pat<(udiv (and GPR:$rs1, 0xffffffff), (and GPR:$rs2, 0xffffffff)),
- (SRLI (SLLI (DIVUW GPR:$rs1, GPR:$rs2), 32), 32)>;
+
+def : PatGprGpr<riscv_divw, DIVW>;
+def : PatGprGpr<riscv_divuw, DIVUW>;
+def : PatGprGpr<riscv_remuw, REMUW>;
+
+// Handle the specific cases where using DIVU/REMU would be correct and result
+// in fewer instructions than emitting DIVUW/REMUW then zero-extending the
+// result.
+def : Pat<(zexti32 (riscv_divuw (zexti32 GPR:$rs1), (zexti32 GPR:$rs2))),
+ (DIVU GPR:$rs1, GPR:$rs2)>;
+def : Pat<(zexti32 (riscv_remuw (zexti32 GPR:$rs1), (zexti32 GPR:$rs2))),
+ (REMU GPR:$rs1, GPR:$rs2)>;
+
// Although the sexti32 operands may not have originated from an i32 srem,
// this pattern is safe as it is impossible for two sign extended inputs to
// produce a result where res[63:32]=0 and res[31]=1.
@@ -73,10 +81,4 @@ def : Pat<(srem (sexti32 GPR:$rs1), (sexti32 GPR:$rs2)),
def : Pat<(sext_inreg (srem (sexti32 GPR:$rs1),
(sexti32 GPR:$rs2)), i32),
(REMW GPR:$rs1, GPR:$rs2)>;
-def : Pat<(sext_inreg (urem (zexti32 GPR:$rs1), (zexti32 GPR:$rs2)), i32),
- (REMUW GPR:$rs1, GPR:$rs2)>;
-// It's cheaper to perform a remuw and zero-extend the result than to
-// zero-extend both inputs to a urem.
-def : Pat<(urem (and GPR:$rs1, 0xffffffff), (and GPR:$rs2, 0xffffffff)),
- (SRLI (SLLI (REMUW GPR:$rs1, GPR:$rs2), 32), 32)>;
} // Predicates = [HasStdExtM, IsRV64]
diff --git a/lib/Target/RISCV/RISCVMCInstLower.cpp b/lib/Target/RISCV/RISCVMCInstLower.cpp
index e0100b1679be..b1dbcfa7f738 100644
--- a/lib/Target/RISCV/RISCVMCInstLower.cpp
+++ b/lib/Target/RISCV/RISCVMCInstLower.cpp
@@ -1,9 +1,8 @@
//===-- RISCVMCInstLower.cpp - Convert RISCV MachineInstr to an MCInst ------=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -37,12 +36,42 @@ static MCOperand lowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym,
case RISCVII::MO_None:
Kind = RISCVMCExpr::VK_RISCV_None;
break;
+ case RISCVII::MO_CALL:
+ Kind = RISCVMCExpr::VK_RISCV_CALL;
+ break;
+ case RISCVII::MO_PLT:
+ Kind = RISCVMCExpr::VK_RISCV_CALL_PLT;
+ break;
case RISCVII::MO_LO:
Kind = RISCVMCExpr::VK_RISCV_LO;
break;
case RISCVII::MO_HI:
Kind = RISCVMCExpr::VK_RISCV_HI;
break;
+ case RISCVII::MO_PCREL_LO:
+ Kind = RISCVMCExpr::VK_RISCV_PCREL_LO;
+ break;
+ case RISCVII::MO_PCREL_HI:
+ Kind = RISCVMCExpr::VK_RISCV_PCREL_HI;
+ break;
+ case RISCVII::MO_GOT_HI:
+ Kind = RISCVMCExpr::VK_RISCV_GOT_HI;
+ break;
+ case RISCVII::MO_TPREL_LO:
+ Kind = RISCVMCExpr::VK_RISCV_TPREL_LO;
+ break;
+ case RISCVII::MO_TPREL_HI:
+ Kind = RISCVMCExpr::VK_RISCV_TPREL_HI;
+ break;
+ case RISCVII::MO_TPREL_ADD:
+ Kind = RISCVMCExpr::VK_RISCV_TPREL_ADD;
+ break;
+ case RISCVII::MO_TLS_GOT_HI:
+ Kind = RISCVMCExpr::VK_RISCV_TLS_GOT_HI;
+ break;
+ case RISCVII::MO_TLS_GD_HI:
+ Kind = RISCVMCExpr::VK_RISCV_TLS_GD_HI;
+ break;
}
const MCExpr *ME =
diff --git a/lib/Target/RISCV/RISCVMachineFunctionInfo.h b/lib/Target/RISCV/RISCVMachineFunctionInfo.h
index 2fea3a1bdd2f..585bff2bc20a 100644
--- a/lib/Target/RISCV/RISCVMachineFunctionInfo.h
+++ b/lib/Target/RISCV/RISCVMachineFunctionInfo.h
@@ -1,9 +1,8 @@
//=- RISCVMachineFunctionInfo.h - RISCV machine function info -----*- C++ -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -33,8 +32,6 @@ private:
int MoveF64FrameIndex = -1;
public:
- // RISCVMachineFunctionInfo() = default;
-
RISCVMachineFunctionInfo(MachineFunction &MF) : MF(MF) {}
int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
diff --git a/lib/Target/RISCV/RISCVMergeBaseOffset.cpp b/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
index cea009c5447d..82b1209cb8e7 100644
--- a/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
+++ b/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
@@ -1,9 +1,8 @@
//===----- RISCVMergeBaseOffset.cpp - Optimise address calculations ------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/RISCV/RISCVRegisterInfo.cpp b/lib/Target/RISCV/RISCVRegisterInfo.cpp
index 3ed1dec434ce..e6a126e3e513 100644
--- a/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ b/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -1,9 +1,8 @@
//===-- RISCVRegisterInfo.cpp - RISCV Register Information ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -33,17 +32,32 @@ RISCVRegisterInfo::RISCVRegisterInfo(unsigned HwMode)
const MCPhysReg *
RISCVRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+ auto &Subtarget = MF->getSubtarget<RISCVSubtarget>();
if (MF->getFunction().hasFnAttribute("interrupt")) {
- if (MF->getSubtarget<RISCVSubtarget>().hasStdExtD())
+ if (Subtarget.hasStdExtD())
return CSR_XLEN_F64_Interrupt_SaveList;
- if (MF->getSubtarget<RISCVSubtarget>().hasStdExtF())
+ if (Subtarget.hasStdExtF())
return CSR_XLEN_F32_Interrupt_SaveList;
return CSR_Interrupt_SaveList;
}
- return CSR_SaveList;
+
+ switch (Subtarget.getTargetABI()) {
+ default:
+ llvm_unreachable("Unrecognized ABI");
+ case RISCVABI::ABI_ILP32:
+ case RISCVABI::ABI_LP64:
+ return CSR_ILP32_LP64_SaveList;
+ case RISCVABI::ABI_ILP32F:
+ case RISCVABI::ABI_LP64F:
+ return CSR_ILP32F_LP64F_SaveList;
+ case RISCVABI::ABI_ILP32D:
+ case RISCVABI::ABI_LP64D:
+ return CSR_ILP32D_LP64D_SaveList;
+ }
}
BitVector RISCVRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = getFrameLowering(MF);
BitVector Reserved(getNumRegs());
// Use markSuperRegs to ensure any register aliases are also reserved
@@ -52,7 +66,8 @@ BitVector RISCVRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
markSuperRegs(Reserved, RISCV::X2); // sp
markSuperRegs(Reserved, RISCV::X3); // gp
markSuperRegs(Reserved, RISCV::X4); // tp
- markSuperRegs(Reserved, RISCV::X8); // fp
+ if (TFI->hasFP(MF))
+ markSuperRegs(Reserved, RISCV::X8); // fp
assert(checkAllSuperRegsMarked(Reserved));
return Reserved;
}
@@ -109,7 +124,7 @@ void RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
}
-unsigned RISCVRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+Register RISCVRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
const TargetFrameLowering *TFI = getFrameLowering(MF);
return TFI->hasFP(MF) ? RISCV::X8 : RISCV::X2;
}
@@ -117,12 +132,26 @@ unsigned RISCVRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
const uint32_t *
RISCVRegisterInfo::getCallPreservedMask(const MachineFunction & MF,
CallingConv::ID /*CC*/) const {
+ auto &Subtarget = MF.getSubtarget<RISCVSubtarget>();
if (MF.getFunction().hasFnAttribute("interrupt")) {
- if (MF.getSubtarget<RISCVSubtarget>().hasStdExtD())
+ if (Subtarget.hasStdExtD())
return CSR_XLEN_F64_Interrupt_RegMask;
- if (MF.getSubtarget<RISCVSubtarget>().hasStdExtF())
+ if (Subtarget.hasStdExtF())
return CSR_XLEN_F32_Interrupt_RegMask;
return CSR_Interrupt_RegMask;
}
- return CSR_RegMask;
+
+ switch (Subtarget.getTargetABI()) {
+ default:
+ llvm_unreachable("Unrecognized ABI");
+ case RISCVABI::ABI_ILP32:
+ case RISCVABI::ABI_LP64:
+ return CSR_ILP32_LP64_RegMask;
+ case RISCVABI::ABI_ILP32F:
+ case RISCVABI::ABI_LP64F:
+ return CSR_ILP32F_LP64F_RegMask;
+ case RISCVABI::ABI_ILP32D:
+ case RISCVABI::ABI_LP64D:
+ return CSR_ILP32D_LP64D_RegMask;
+ }
}
diff --git a/lib/Target/RISCV/RISCVRegisterInfo.h b/lib/Target/RISCV/RISCVRegisterInfo.h
index cbbb70079dd1..4f339475508f 100644
--- a/lib/Target/RISCV/RISCVRegisterInfo.h
+++ b/lib/Target/RISCV/RISCVRegisterInfo.h
@@ -1,9 +1,8 @@
//===-- RISCVRegisterInfo.h - RISCV Register Information Impl ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -40,7 +39,7 @@ struct RISCVRegisterInfo : public RISCVGenRegisterInfo {
unsigned FIOperandNum,
RegScavenger *RS = nullptr) const override;
- unsigned getFrameRegister(const MachineFunction &MF) const override;
+ Register getFrameRegister(const MachineFunction &MF) const override;
bool requiresRegisterScavenging(const MachineFunction &MF) const override {
return true;
diff --git a/lib/Target/RISCV/RISCVRegisterInfo.td b/lib/Target/RISCV/RISCVRegisterInfo.td
index 4be8ff9200e9..79f8ab12f6c0 100644
--- a/lib/Target/RISCV/RISCVRegisterInfo.td
+++ b/lib/Target/RISCV/RISCVRegisterInfo.td
@@ -1,9 +1,8 @@
//===-- RISCVRegisterInfo.td - RISC-V Register defs --------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -56,7 +55,7 @@ let RegAltNameIndices = [ABIRegAltName] in {
def X6 : RISCVReg<6, "x6", ["t1"]>, DwarfRegNum<[6]>;
def X7 : RISCVReg<7, "x7", ["t2"]>, DwarfRegNum<[7]>;
}
- def X8 : RISCVReg<8, "x8", ["s0"]>, DwarfRegNum<[8]>;
+ def X8 : RISCVReg<8, "x8", ["s0", "fp"]>, DwarfRegNum<[8]>;
def X9 : RISCVReg<9, "x9", ["s1"]>, DwarfRegNum<[9]>;
def X10 : RISCVReg<10,"x10", ["a0"]>, DwarfRegNum<[10]>;
def X11 : RISCVReg<11,"x11", ["a1"]>, DwarfRegNum<[11]>;
diff --git a/lib/Target/RISCV/RISCVSubtarget.cpp b/lib/Target/RISCV/RISCVSubtarget.cpp
index b221ea84a33c..6902ed75d852 100644
--- a/lib/Target/RISCV/RISCVSubtarget.cpp
+++ b/lib/Target/RISCV/RISCVSubtarget.cpp
@@ -1,9 +1,8 @@
//===-- RISCVSubtarget.cpp - RISCV Subtarget Information ------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -26,10 +25,10 @@ using namespace llvm;
void RISCVSubtarget::anchor() {}
-RISCVSubtarget &RISCVSubtarget::initializeSubtargetDependencies(StringRef CPU,
- StringRef FS,
- bool Is64Bit) {
+RISCVSubtarget &RISCVSubtarget::initializeSubtargetDependencies(
+ const Triple &TT, StringRef CPU, StringRef FS, StringRef ABIName) {
// Determine default and user-specified characteristics
+ bool Is64Bit = TT.isArch64Bit();
std::string CPUName = CPU;
if (CPUName.empty())
CPUName = Is64Bit ? "generic-rv64" : "generic-rv32";
@@ -38,11 +37,14 @@ RISCVSubtarget &RISCVSubtarget::initializeSubtargetDependencies(StringRef CPU,
XLenVT = MVT::i64;
XLen = 64;
}
+
+ TargetABI = RISCVABI::computeTargetABI(TT, getFeatureBits(), ABIName);
+ RISCVFeatures::validate(TT, getFeatureBits());
return *this;
}
-RISCVSubtarget::RISCVSubtarget(const Triple &TT, const std::string &CPU,
- const std::string &FS, const TargetMachine &TM)
+RISCVSubtarget::RISCVSubtarget(const Triple &TT, StringRef CPU, StringRef FS,
+ StringRef ABIName, const TargetMachine &TM)
: RISCVGenSubtargetInfo(TT, CPU, FS),
- FrameLowering(initializeSubtargetDependencies(CPU, FS, TT.isArch64Bit())),
+ FrameLowering(initializeSubtargetDependencies(TT, CPU, FS, ABIName)),
InstrInfo(), RegInfo(getHwMode()), TLInfo(TM, *this) {}
diff --git a/lib/Target/RISCV/RISCVSubtarget.h b/lib/Target/RISCV/RISCVSubtarget.h
index 0e09391e7829..106ff49f021a 100644
--- a/lib/Target/RISCV/RISCVSubtarget.h
+++ b/lib/Target/RISCV/RISCVSubtarget.h
@@ -1,9 +1,8 @@
//===-- RISCVSubtarget.h - Define Subtarget for the RISCV -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -17,6 +16,7 @@
#include "RISCVFrameLowering.h"
#include "RISCVISelLowering.h"
#include "RISCVInstrInfo.h"
+#include "Utils/RISCVBaseInfo.h"
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DataLayout.h"
@@ -36,9 +36,11 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
bool HasStdExtD = false;
bool HasStdExtC = false;
bool HasRV64 = false;
+ bool IsRV32E = false;
bool EnableLinkerRelax = false;
unsigned XLen = 32;
MVT XLenVT = MVT::i32;
+ RISCVABI::ABI TargetABI = RISCVABI::ABI_Unknown;
RISCVFrameLowering FrameLowering;
RISCVInstrInfo InstrInfo;
RISCVRegisterInfo RegInfo;
@@ -47,13 +49,14 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
/// Initializes using the passed in CPU and feature strings so that we can
/// use initializer lists for subtarget initialization.
- RISCVSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS,
- bool Is64Bit);
+ RISCVSubtarget &initializeSubtargetDependencies(const Triple &TT,
+ StringRef CPU, StringRef FS,
+ StringRef ABIName);
public:
// Initializes the data members to match that of the specified triple.
- RISCVSubtarget(const Triple &TT, const std::string &CPU,
- const std::string &FS, const TargetMachine &TM);
+ RISCVSubtarget(const Triple &TT, StringRef CPU, StringRef FS,
+ StringRef ABIName, const TargetMachine &TM);
// Parses features string setting specified subtarget options. The
// definition of this function is auto-generated by tblgen.
@@ -78,9 +81,11 @@ public:
bool hasStdExtD() const { return HasStdExtD; }
bool hasStdExtC() const { return HasStdExtC; }
bool is64Bit() const { return HasRV64; }
+ bool isRV32E() const { return IsRV32E; }
bool enableLinkerRelax() const { return EnableLinkerRelax; }
MVT getXLenVT() const { return XLenVT; }
unsigned getXLen() const { return XLen; }
+ RISCVABI::ABI getTargetABI() const { return TargetABI; }
};
} // End llvm namespace
diff --git a/lib/Target/RISCV/RISCVSystemOperands.td b/lib/Target/RISCV/RISCVSystemOperands.td
index f1b7984ffe6b..a46a32c4e7f2 100644
--- a/lib/Target/RISCV/RISCVSystemOperands.td
+++ b/lib/Target/RISCV/RISCVSystemOperands.td
@@ -1,9 +1,8 @@
//===- RISCVSystemOperands.td ----------------------------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -72,18 +71,16 @@ def : SysReg<"uip", 0x044>;
// User Floating-Point CSRs
//===--------------------------
-let FeaturesRequired = [{ {RISCV::FeatureStdExtF} }] in {
-def : SysReg<"fflags", 0x001>;
-def : SysReg<"frm", 0x002>;
-def : SysReg<"fcsr", 0x003>;
-}
+def FFLAGS : SysReg<"fflags", 0x001>;
+def FRM : SysReg<"frm", 0x002>;
+def FCSR : SysReg<"fcsr", 0x003>;
//===--------------------------
// User Counter/Timers
//===--------------------------
-def : SysReg<"cycle", 0xC00>;
-def : SysReg<"time", 0xC01>;
-def : SysReg<"instret", 0xC02>;
+def CYCLE : SysReg<"cycle", 0xC00>;
+def TIME : SysReg<"time", 0xC01>;
+def INSTRET : SysReg<"instret", 0xC02>;
def : SysReg<"hpmcounter3", 0xC03>;
def : SysReg<"hpmcounter4", 0xC04>;
@@ -116,9 +113,9 @@ def : SysReg<"hpmcounter30", 0xC1E>;
def : SysReg<"hpmcounter31", 0xC1F>;
let isRV32Only = 1 in {
-def: SysReg<"cycleh", 0xC80>;
-def: SysReg<"timeh", 0xC81>;
-def: SysReg<"instreth", 0xC82>;
+def CYCLEH : SysReg<"cycleh", 0xC80>;
+def TIMEH : SysReg<"timeh", 0xC81>;
+def INSTRETH : SysReg<"instreth", 0xC82>;
def: SysReg<"hpmcounter3h", 0xC83>;
def: SysReg<"hpmcounter4h", 0xC84>;
diff --git a/lib/Target/RISCV/RISCVTargetMachine.cpp b/lib/Target/RISCV/RISCVTargetMachine.cpp
index 8937ec200bd7..f4e6ed9f6284 100644
--- a/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -1,9 +1,8 @@
//===-- RISCVTargetMachine.cpp - Define TargetMachine for RISCV -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -11,10 +10,13 @@
//
//===----------------------------------------------------------------------===//
-#include "RISCV.h"
#include "RISCVTargetMachine.h"
+#include "RISCV.h"
#include "RISCVTargetObjectFile.h"
+#include "RISCVTargetTransformInfo.h"
+#include "TargetInfo/RISCVTargetInfo.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/CodeGen/TargetPassConfig.h"
@@ -31,7 +33,7 @@ extern "C" void LLVMInitializeRISCVTarget() {
initializeRISCVExpandPseudoPass(*PR);
}
-static std::string computeDataLayout(const Triple &TT) {
+static StringRef computeDataLayout(const Triple &TT) {
if (TT.isArch64Bit()) {
return "e-m:e-p:64:64-i64:64-i128:128-n64-S128";
} else {
@@ -57,10 +59,15 @@ RISCVTargetMachine::RISCVTargetMachine(const Target &T, const Triple &TT,
getEffectiveRelocModel(TT, RM),
getEffectiveCodeModel(CM, CodeModel::Small), OL),
TLOF(make_unique<RISCVELFTargetObjectFile>()),
- Subtarget(TT, CPU, FS, *this) {
+ Subtarget(TT, CPU, FS, Options.MCOptions.getABIName(), *this) {
initAsmInfo();
}
+TargetTransformInfo
+RISCVTargetMachine::getTargetTransformInfo(const Function &F) {
+ return TargetTransformInfo(RISCVTTIImpl(this, F));
+}
+
namespace {
class RISCVPassConfig : public TargetPassConfig {
public:
diff --git a/lib/Target/RISCV/RISCVTargetMachine.h b/lib/Target/RISCV/RISCVTargetMachine.h
index 02361dddebf7..ebf3f3c07955 100644
--- a/lib/Target/RISCV/RISCVTargetMachine.h
+++ b/lib/Target/RISCV/RISCVTargetMachine.h
@@ -1,9 +1,8 @@
//===-- RISCVTargetMachine.h - Define TargetMachine for RISCV ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -40,6 +39,8 @@ public:
TargetLoweringObjectFile *getObjFileLowering() const override {
return TLOF.get();
}
+
+ TargetTransformInfo getTargetTransformInfo(const Function &F) override;
};
}
diff --git a/lib/Target/RISCV/RISCVTargetObjectFile.cpp b/lib/Target/RISCV/RISCVTargetObjectFile.cpp
index 46e81b628b65..bbd45c970d3d 100644
--- a/lib/Target/RISCV/RISCVTargetObjectFile.cpp
+++ b/lib/Target/RISCV/RISCVTargetObjectFile.cpp
@@ -1,14 +1,16 @@
//===-- RISCVTargetObjectFile.cpp - RISCV Object Info -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "RISCVTargetObjectFile.h"
#include "RISCVTargetMachine.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
using namespace llvm;
@@ -16,4 +18,97 @@ void RISCVELFTargetObjectFile::Initialize(MCContext &Ctx,
const TargetMachine &TM) {
TargetLoweringObjectFileELF::Initialize(Ctx, TM);
InitializeELF(TM.Options.UseInitArray);
+
+ SmallDataSection = getContext().getELFSection(
+ ".sdata", ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC);
+ SmallBSSSection = getContext().getELFSection(".sbss", ELF::SHT_NOBITS,
+ ELF::SHF_WRITE | ELF::SHF_ALLOC);
+}
+
+// A address must be loaded from a small section if its size is less than the
+// small section size threshold. Data in this section could be addressed by
+// using gp_rel operator.
+bool RISCVELFTargetObjectFile::isInSmallSection(uint64_t Size) const {
+ // gcc has traditionally not treated zero-sized objects as small data, so this
+ // is effectively part of the ABI.
+ return Size > 0 && Size <= SSThreshold;
+}
+
+// Return true if this global address should be placed into small data/bss
+// section.
+bool RISCVELFTargetObjectFile::isGlobalInSmallSection(
+ const GlobalObject *GO, const TargetMachine &TM) const {
+ // Only global variables, not functions.
+ const GlobalVariable *GVA = dyn_cast<GlobalVariable>(GO);
+ if (!GVA)
+ return false;
+
+ // If the variable has an explicit section, it is placed in that section.
+ if (GVA->hasSection()) {
+ StringRef Section = GVA->getSection();
+
+ // Explicitly placing any variable in the small data section overrides
+ // the global -G value.
+ if (Section == ".sdata" || Section == ".sbss")
+ return true;
+
+ // Otherwise reject putting the variable to small section if it has an
+ // explicit section name.
+ return false;
+ }
+
+ if (((GVA->hasExternalLinkage() && GVA->isDeclaration()) ||
+ GVA->hasCommonLinkage()))
+ return false;
+
+ Type *Ty = GVA->getValueType();
+ // It is possible that the type of the global is unsized, i.e. a declaration
+ // of a extern struct. In this case don't presume it is in the small data
+ // section. This happens e.g. when building the FreeBSD kernel.
+ if (!Ty->isSized())
+ return false;
+
+ return isInSmallSection(
+ GVA->getParent()->getDataLayout().getTypeAllocSize(Ty));
+}
+
+MCSection *RISCVELFTargetObjectFile::SelectSectionForGlobal(
+ const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
+ // Handle Small Section classification here.
+ if (Kind.isBSS() && isGlobalInSmallSection(GO, TM))
+ return SmallBSSSection;
+ if (Kind.isData() && isGlobalInSmallSection(GO, TM))
+ return SmallDataSection;
+
+ // Otherwise, we work the same as ELF.
+ return TargetLoweringObjectFileELF::SelectSectionForGlobal(GO, Kind, TM);
+}
+
+void RISCVELFTargetObjectFile::getModuleMetadata(Module &M) {
+ SmallVector<Module::ModuleFlagEntry, 8> ModuleFlags;
+ M.getModuleFlagsMetadata(ModuleFlags);
+
+ for (const auto &MFE : ModuleFlags) {
+ StringRef Key = MFE.Key->getString();
+ if (Key == "SmallDataLimit") {
+ SSThreshold = mdconst::extract<ConstantInt>(MFE.Val)->getZExtValue();
+ break;
+ }
+ }
+}
+
+/// Return true if this constant should be placed into small data section.
+bool RISCVELFTargetObjectFile::isConstantInSmallSection(
+ const DataLayout &DL, const Constant *CN) const {
+ return isInSmallSection(DL.getTypeAllocSize(CN->getType()));
+}
+
+MCSection *RISCVELFTargetObjectFile::getSectionForConstant(
+ const DataLayout &DL, SectionKind Kind, const Constant *C,
+ unsigned &Align) const {
+ if (isConstantInSmallSection(DL, C))
+ return SmallDataSection;
+
+ // Otherwise, we work the same as ELF.
+ return TargetLoweringObjectFileELF::getSectionForConstant(DL, Kind, C, Align);
}
diff --git a/lib/Target/RISCV/RISCVTargetObjectFile.h b/lib/Target/RISCV/RISCVTargetObjectFile.h
index 5467220301c1..b2daaaa9d364 100644
--- a/lib/Target/RISCV/RISCVTargetObjectFile.h
+++ b/lib/Target/RISCV/RISCVTargetObjectFile.h
@@ -1,9 +1,8 @@
//===-- RISCVTargetObjectFile.h - RISCV Object Info -*- C++ ---------*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -17,7 +16,31 @@ class RISCVTargetMachine;
/// This implementation is used for RISCV ELF targets.
class RISCVELFTargetObjectFile : public TargetLoweringObjectFileELF {
+ MCSection *SmallDataSection;
+ MCSection *SmallBSSSection;
+ unsigned SSThreshold = 8;
+
+public:
void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
+
+ /// Return true if this global address should be placed into small data/bss
+ /// section.
+ bool isGlobalInSmallSection(const GlobalObject *GO,
+ const TargetMachine &TM) const;
+
+ MCSection *SelectSectionForGlobal(const GlobalObject *GO, SectionKind Kind,
+ const TargetMachine &TM) const override;
+
+ /// Return true if this constant should be placed into small data section.
+ bool isConstantInSmallSection(const DataLayout &DL, const Constant *CN) const;
+
+ MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind,
+ const Constant *C,
+ unsigned &Align) const override;
+
+ void getModuleMetadata(Module &M) override;
+
+ bool isInSmallSection(uint64_t Size) const;
};
} // end namespace llvm
diff --git a/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
new file mode 100644
index 000000000000..2c6400cbb1eb
--- /dev/null
+++ b/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -0,0 +1,92 @@
+//===-- RISCVTargetTransformInfo.cpp - RISC-V specific TTI ----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "RISCVTargetTransformInfo.h"
+#include "Utils/RISCVMatInt.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
+#include "llvm/CodeGen/TargetLowering.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "riscvtti"
+
+int RISCVTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
+ assert(Ty->isIntegerTy() &&
+ "getIntImmCost can only estimate cost of materialising integers");
+
+ // We have a Zero register, so 0 is always free.
+ if (Imm == 0)
+ return TTI::TCC_Free;
+
+ // Otherwise, we check how many instructions it will take to materialise.
+ const DataLayout &DL = getDataLayout();
+ return RISCVMatInt::getIntMatCost(Imm, DL.getTypeSizeInBits(Ty),
+ getST()->is64Bit());
+}
+
+int RISCVTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
+ Type *Ty) {
+ assert(Ty->isIntegerTy() &&
+ "getIntImmCost can only estimate cost of materialising integers");
+
+ // We have a Zero register, so 0 is always free.
+ if (Imm == 0)
+ return TTI::TCC_Free;
+
+ // Some instructions in RISC-V can take a 12-bit immediate. Some of these are
+ // commutative, in others the immediate comes from a specific argument index.
+ bool Takes12BitImm = false;
+ unsigned ImmArgIdx = ~0U;
+
+ switch (Opcode) {
+ case Instruction::GetElementPtr:
+ // Never hoist any arguments to a GetElementPtr. CodeGenPrepare will
+ // split up large offsets in GEP into better parts than ConstantHoisting
+ // can.
+ return TTI::TCC_Free;
+ case Instruction::Add:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::Mul:
+ Takes12BitImm = true;
+ break;
+ case Instruction::Sub:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ Takes12BitImm = true;
+ ImmArgIdx = 1;
+ break;
+ default:
+ break;
+ }
+
+ if (Takes12BitImm) {
+ // Check immediate is the correct argument...
+ if (Instruction::isCommutative(Opcode) || Idx == ImmArgIdx) {
+ // ... and fits into the 12-bit immediate.
+ if (Imm.getMinSignedBits() <= 64 &&
+ getTLI()->isLegalAddImmediate(Imm.getSExtValue())) {
+ return TTI::TCC_Free;
+ }
+ }
+
+ // Otherwise, use the full materialisation cost.
+ return getIntImmCost(Imm, Ty);
+ }
+
+ // By default, prevent hoisting.
+ return TTI::TCC_Free;
+}
+
+int RISCVTTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
+ const APInt &Imm, Type *Ty) {
+ // Prevent hoisting in unknown cases.
+ return TTI::TCC_Free;
+}
diff --git a/lib/Target/RISCV/RISCVTargetTransformInfo.h b/lib/Target/RISCV/RISCVTargetTransformInfo.h
new file mode 100644
index 000000000000..f361b25a0c70
--- /dev/null
+++ b/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -0,0 +1,52 @@
+//===- RISCVTargetTransformInfo.h - RISC-V specific TTI ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file defines a TargetTransformInfo::Concept conforming object specific
+/// to the RISC-V target machine. It uses the target's detailed information to
+/// provide more precise answers to certain TTI queries, while letting the
+/// target independent and default TTI implementations handle the rest.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_RISCV_RISCVTARGETTRANSFORMINFO_H
+#define LLVM_LIB_TARGET_RISCV_RISCVTARGETTRANSFORMINFO_H
+
+#include "RISCVSubtarget.h"
+#include "RISCVTargetMachine.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
+#include "llvm/IR/Function.h"
+
+namespace llvm {
+
+class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
+ using BaseT = BasicTTIImplBase<RISCVTTIImpl>;
+ using TTI = TargetTransformInfo;
+
+ friend BaseT;
+
+ const RISCVSubtarget *ST;
+ const RISCVTargetLowering *TLI;
+
+ const RISCVSubtarget *getST() const { return ST; }
+ const RISCVTargetLowering *getTLI() const { return TLI; }
+
+public:
+ explicit RISCVTTIImpl(const RISCVTargetMachine *TM, const Function &F)
+ : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
+ TLI(ST->getTargetLowering()) {}
+
+ int getIntImmCost(const APInt &Imm, Type *Ty);
+ int getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty);
+ int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
+ Type *Ty);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_RISCV_RISCVTARGETTRANSFORMINFO_H \ No newline at end of file
diff --git a/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.cpp b/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.cpp
index 0f369d960fe1..e44984a3fcc5 100644
--- a/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.cpp
+++ b/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.cpp
@@ -1,26 +1,24 @@
//===-- RISCVTargetInfo.cpp - RISCV Target Implementation -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
+#include "TargetInfo/RISCVTargetInfo.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
-namespace llvm {
-Target &getTheRISCV32Target() {
+Target &llvm::getTheRISCV32Target() {
static Target TheRISCV32Target;
return TheRISCV32Target;
}
-Target &getTheRISCV64Target() {
+Target &llvm::getTheRISCV64Target() {
static Target TheRISCV64Target;
return TheRISCV64Target;
}
-}
extern "C" void LLVMInitializeRISCVTargetInfo() {
RegisterTarget<Triple::riscv32> X(getTheRISCV32Target(), "riscv32",
diff --git a/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.h b/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.h
new file mode 100644
index 000000000000..ef3d9d116efa
--- /dev/null
+++ b/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.h
@@ -0,0 +1,21 @@
+//===-- RISCVTargetInfo.h - RISCV Target Implementation ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_RISCV_TARGETINFO_RISCVTARGETINFO_H
+#define LLVM_LIB_TARGET_RISCV_TARGETINFO_RISCVTARGETINFO_H
+
+namespace llvm {
+
+class Target;
+
+Target &getTheRISCV32Target();
+Target &getTheRISCV64Target();
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_RISCV_TARGETINFO_RISCVTARGETINFO_H
diff --git a/lib/Target/RISCV/Utils/RISCVBaseInfo.cpp b/lib/Target/RISCV/Utils/RISCVBaseInfo.cpp
index 964af1f74cec..bc5395768ca1 100644
--- a/lib/Target/RISCV/Utils/RISCVBaseInfo.cpp
+++ b/lib/Target/RISCV/Utils/RISCVBaseInfo.cpp
@@ -1,9 +1,80 @@
#include "RISCVBaseInfo.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Support/raw_ostream.h"
namespace llvm {
namespace RISCVSysReg {
#define GET_SysRegsList_IMPL
#include "RISCVGenSystemOperands.inc"
} // namespace RISCVSysReg
+
+namespace RISCVABI {
+ABI computeTargetABI(const Triple &TT, FeatureBitset FeatureBits,
+ StringRef ABIName) {
+ auto TargetABI = StringSwitch<ABI>(ABIName)
+ .Case("ilp32", ABI_ILP32)
+ .Case("ilp32f", ABI_ILP32F)
+ .Case("ilp32d", ABI_ILP32D)
+ .Case("ilp32e", ABI_ILP32E)
+ .Case("lp64", ABI_LP64)
+ .Case("lp64f", ABI_LP64F)
+ .Case("lp64d", ABI_LP64D)
+ .Default(ABI_Unknown);
+
+ bool IsRV64 = TT.isArch64Bit();
+ bool IsRV32E = FeatureBits[RISCV::FeatureRV32E];
+
+ if (!ABIName.empty() && TargetABI == ABI_Unknown) {
+ errs()
+ << "'" << ABIName
+ << "' is not a recognized ABI for this target (ignoring target-abi)\n";
+ } else if (ABIName.startswith("ilp32") && IsRV64) {
+ errs() << "32-bit ABIs are not supported for 64-bit targets (ignoring "
+ "target-abi)\n";
+ TargetABI = ABI_Unknown;
+ } else if (ABIName.startswith("lp64") && !IsRV64) {
+ errs() << "64-bit ABIs are not supported for 32-bit targets (ignoring "
+ "target-abi)\n";
+ TargetABI = ABI_Unknown;
+ } else if (ABIName.endswith("f") && !FeatureBits[RISCV::FeatureStdExtF]) {
+ errs() << "Hard-float 'f' ABI can't be used for a target that "
+ "doesn't support the F instruction set extension (ignoring "
+ "target-abi)\n";
+ TargetABI = ABI_Unknown;
+ } else if (ABIName.endswith("d") && !FeatureBits[RISCV::FeatureStdExtD]) {
+ errs() << "Hard-float 'd' ABI can't be used for a target that "
+ "doesn't support the D instruction set extension (ignoring "
+ "target-abi)\n";
+ TargetABI = ABI_Unknown;
+ } else if (IsRV32E && TargetABI != ABI_ILP32E && TargetABI != ABI_Unknown) {
+ errs()
+ << "Only the ilp32e ABI is supported for RV32E (ignoring target-abi)\n";
+ TargetABI = ABI_Unknown;
+ }
+
+ if (TargetABI != ABI_Unknown)
+ return TargetABI;
+
+ // For now, default to the ilp32/ilp32e/lp64 ABI if no explicit ABI is given
+ // or an invalid/unrecognised string is given. In the future, it might be
+ // worth changing this to default to ilp32f/lp64f and ilp32d/lp64d when
+ // hardware support for floating point is present.
+ if (IsRV32E)
+ return ABI_ILP32E;
+ if (IsRV64)
+ return ABI_LP64;
+ return ABI_ILP32;
+}
+} // namespace RISCVABI
+
+namespace RISCVFeatures {
+
+void validate(const Triple &TT, const FeatureBitset &FeatureBits) {
+ if (TT.isArch64Bit() && FeatureBits[RISCV::FeatureRV32E])
+ report_fatal_error("RV32E can't be enabled for an RV64 target");
+}
+
+} // namespace RISCVFeatures
+
} // namespace llvm
diff --git a/lib/Target/RISCV/Utils/RISCVBaseInfo.h b/lib/Target/RISCV/Utils/RISCVBaseInfo.h
index 372e0e80bbaf..c33c72f24319 100644
--- a/lib/Target/RISCV/Utils/RISCVBaseInfo.h
+++ b/lib/Target/RISCV/Utils/RISCVBaseInfo.h
@@ -1,9 +1,8 @@
//===-- RISCVBaseInfo.h - Top level definitions for RISCV MC ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -49,9 +48,18 @@ enum {
enum {
MO_None,
+ MO_CALL,
+ MO_PLT,
MO_LO,
MO_HI,
+ MO_PCREL_LO,
MO_PCREL_HI,
+ MO_GOT_HI,
+ MO_TPREL_LO,
+ MO_TPREL_HI,
+ MO_TPREL_ADD,
+ MO_TLS_GOT_HI,
+ MO_TLS_GD_HI,
};
} // namespace RISCVII
@@ -153,6 +161,34 @@ struct SysReg {
#include "RISCVGenSystemOperands.inc"
} // end namespace RISCVSysReg
+namespace RISCVABI {
+
+enum ABI {
+ ABI_ILP32,
+ ABI_ILP32F,
+ ABI_ILP32D,
+ ABI_ILP32E,
+ ABI_LP64,
+ ABI_LP64F,
+ ABI_LP64D,
+ ABI_Unknown
+};
+
+// Returns the target ABI, or else a StringError if the requested ABIName is
+// not supported for the given TT and FeatureBits combination.
+ABI computeTargetABI(const Triple &TT, FeatureBitset FeatureBits,
+ StringRef ABIName);
+
+} // namespace RISCVABI
+
+namespace RISCVFeatures {
+
+// Validates if the given combination of features are valid for the target
+// triple. Exits with report_fatal_error if not.
+void validate(const Triple &TT, const FeatureBitset &FeatureBits);
+
+} // namespace RISCVFeatures
+
} // namespace llvm
#endif
diff --git a/lib/Target/RISCV/Utils/RISCVMatInt.cpp b/lib/Target/RISCV/Utils/RISCVMatInt.cpp
index 3dc298246bc5..f390ddb89e3c 100644
--- a/lib/Target/RISCV/Utils/RISCVMatInt.cpp
+++ b/lib/Target/RISCV/Utils/RISCVMatInt.cpp
@@ -1,9 +1,8 @@
//===- RISCVMatInt.cpp - Immediate materialisation -------------*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -17,7 +16,7 @@
namespace llvm {
namespace RISCVMatInt {
-void generateInstSeq(int64_t Val, bool Is64Bit, InstSeq &Res) {
+void generateInstSeq(int64_t Val, bool IsRV64, InstSeq &Res) {
if (isInt<32>(Val)) {
// Depending on the active bits in the immediate Value v, the following
// instruction sequences are emitted:
@@ -33,13 +32,13 @@ void generateInstSeq(int64_t Val, bool Is64Bit, InstSeq &Res) {
Res.push_back(Inst(RISCV::LUI, Hi20));
if (Lo12 || Hi20 == 0) {
- unsigned AddiOpc = (Is64Bit && Hi20) ? RISCV::ADDIW : RISCV::ADDI;
+ unsigned AddiOpc = (IsRV64 && Hi20) ? RISCV::ADDIW : RISCV::ADDI;
Res.push_back(Inst(AddiOpc, Lo12));
}
return;
}
- assert(Is64Bit && "Can't emit >32-bit imm for non-RV64 target");
+ assert(IsRV64 && "Can't emit >32-bit imm for non-RV64 target");
// In the worst case, for a full 64-bit constant, a sequence of 8 instructions
// (i.e., LUI+ADDIW+SLLI+ADDI+SLLI+ADDI+SLLI+ADDI) has to be emmitted. Note
@@ -65,15 +64,30 @@ void generateInstSeq(int64_t Val, bool Is64Bit, InstSeq &Res) {
// performed when the recursion returns.
int64_t Lo12 = SignExtend64<12>(Val);
- int64_t Hi52 = (Val + 0x800) >> 12;
+ int64_t Hi52 = ((uint64_t)Val + 0x800ull) >> 12;
int ShiftAmount = 12 + findFirstSet((uint64_t)Hi52);
Hi52 = SignExtend64(Hi52 >> (ShiftAmount - 12), 64 - ShiftAmount);
- generateInstSeq(Hi52, Is64Bit, Res);
+ generateInstSeq(Hi52, IsRV64, Res);
Res.push_back(Inst(RISCV::SLLI, ShiftAmount));
if (Lo12)
Res.push_back(Inst(RISCV::ADDI, Lo12));
}
+
+int getIntMatCost(const APInt &Val, unsigned Size, bool IsRV64) {
+ int PlatRegSize = IsRV64 ? 64 : 32;
+
+ // Split the constant into platform register sized chunks, and calculate cost
+ // of each chunk.
+ int Cost = 0;
+ for (unsigned ShiftVal = 0; ShiftVal < Size; ShiftVal += PlatRegSize) {
+ APInt Chunk = Val.ashr(ShiftVal).sextOrTrunc(PlatRegSize);
+ InstSeq MatSeq;
+ generateInstSeq(Chunk.getSExtValue(), IsRV64, MatSeq);
+ Cost += MatSeq.size();
+ }
+ return std::max(1, Cost);
+}
} // namespace RISCVMatInt
} // namespace llvm
diff --git a/lib/Target/RISCV/Utils/RISCVMatInt.h b/lib/Target/RISCV/Utils/RISCVMatInt.h
index 49d1d89adc7a..b12ae2eade99 100644
--- a/lib/Target/RISCV/Utils/RISCVMatInt.h
+++ b/lib/Target/RISCV/Utils/RISCVMatInt.h
@@ -1,15 +1,15 @@
//===- RISCVMatInt.h - Immediate materialisation ---------------*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIB_TARGET_RISCV_MATINT_H
#define LLVM_LIB_TARGET_RISCV_MATINT_H
+#include "llvm/ADT/APInt.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/MachineValueType.h"
#include <cstdint>
@@ -31,6 +31,14 @@ using InstSeq = SmallVector<Inst, 8>;
// order to allow this helper to be used from both the MC layer and during
// instruction selection.
void generateInstSeq(int64_t Val, bool IsRV64, InstSeq &Res);
+
+// Helper to estimate the number of instructions required to materialise the
+// given immediate value into a register. This estimate does not account for
+// `Val` possibly fitting into an immediate, and so may over-estimate.
+//
+// This will attempt to produce instructions to materialise `Val` as an
+// `Size`-bit immediate. `IsRV64` should match the target architecture.
+int getIntMatCost(const APInt &Val, unsigned Size, bool IsRV64);
} // namespace RISCVMatInt
} // namespace llvm
#endif
diff --git a/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp b/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
index 691421e533ea..15453ae59a4f 100644
--- a/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
+++ b/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
@@ -1,14 +1,14 @@
//===-- SparcAsmParser.cpp - Parse Sparc assembly to MCInst instructions --===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/SparcMCExpr.h"
#include "MCTargetDesc/SparcMCTargetDesc.h"
+#include "TargetInfo/SparcTargetInfo.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
@@ -646,7 +646,8 @@ bool SparcAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
return Error(StartLoc, "invalid register name");
}
-static void applyMnemonicAliases(StringRef &Mnemonic, uint64_t Features,
+static void applyMnemonicAliases(StringRef &Mnemonic,
+ const FeatureBitset &Features,
unsigned VariantID);
bool SparcAsmParser::ParseInstruction(ParseInstructionInfo &Info,
diff --git a/lib/Target/Sparc/DelaySlotFiller.cpp b/lib/Target/Sparc/DelaySlotFiller.cpp
index 6290e5a15a8b..f1ca8e18c228 100644
--- a/lib/Target/Sparc/DelaySlotFiller.cpp
+++ b/lib/Target/Sparc/DelaySlotFiller.cpp
@@ -1,9 +1,8 @@
//===-- DelaySlotFiller.cpp - SPARC delay slot filler ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp b/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
index 0045e63a824e..bee331874e96 100644
--- a/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
+++ b/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
@@ -1,9 +1,8 @@
//===- SparcDisassembler.cpp - Disassembler for Sparc -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -12,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/SparcMCTargetDesc.h"
+#include "TargetInfo/SparcTargetInfo.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
@@ -41,12 +41,6 @@ public:
};
}
-namespace llvm {
-Target &getTheSparcTarget();
-Target &getTheSparcV9Target();
-Target &getTheSparcelTarget();
-}
-
static MCDisassembler *createSparcDisassembler(const Target &T,
const MCSubtargetInfo &STI,
MCContext &Ctx) {
diff --git a/lib/Target/Sparc/LeonFeatures.td b/lib/Target/Sparc/LeonFeatures.td
index 61e5f16e0a1e..e0ea4e9c7645 100755
--- a/lib/Target/Sparc/LeonFeatures.td
+++ b/lib/Target/Sparc/LeonFeatures.td
@@ -1,9 +1,8 @@
//===-- LeonFeatures.td - Describe the Leon Features -------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Sparc/LeonPasses.cpp b/lib/Target/Sparc/LeonPasses.cpp
index 5ce00db365ab..e9d3aaeb9cfe 100755
--- a/lib/Target/Sparc/LeonPasses.cpp
+++ b/lib/Target/Sparc/LeonPasses.cpp
@@ -1,9 +1,8 @@
//===------ LeonPasses.cpp - Define passes specific to LEON ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Sparc/LeonPasses.h b/lib/Target/Sparc/LeonPasses.h
index 1b3d9a7a32f9..154a2b467e16 100755
--- a/lib/Target/Sparc/LeonPasses.h
+++ b/lib/Target/Sparc/LeonPasses.h
@@ -1,9 +1,8 @@
//===------- LeonPasses.h - Define passes specific to LEON ----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
index d7f1e3a1ab1d..2e8fa0dbaf4c 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
@@ -1,9 +1,8 @@
//===-- SparcAsmBackend.cpp - Sparc Assembler Backend ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp b/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp
index 5a730947796e..88547075c5ae 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp
@@ -1,9 +1,8 @@
//===-- SparcELFObjectWriter.cpp - Sparc ELF Writer -----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcFixupKinds.h b/lib/Target/Sparc/MCTargetDesc/SparcFixupKinds.h
index 99aa63fe2290..b5fac0264019 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcFixupKinds.h
+++ b/lib/Target/Sparc/MCTargetDesc/SparcFixupKinds.h
@@ -1,9 +1,8 @@
//===-- SparcFixupKinds.h - Sparc Specific Fixup Entries --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp b/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.cpp
index d152efae6d1f..c479459786d7 100644
--- a/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.cpp
@@ -1,9 +1,8 @@
//===-- SparcInstPrinter.cpp - Convert Sparc MCInst to assembly syntax -----==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h b/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.h
index 89015eb137c2..499bcadb0d4d 100644
--- a/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h
+++ b/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.h
@@ -1,9 +1,8 @@
//===-- SparcInstPrinter.h - Convert Sparc MCInst to assembly syntax ------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -11,8 +10,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIB_TARGET_SPARC_INSTPRINTER_SPARCINSTPRINTER_H
-#define LLVM_LIB_TARGET_SPARC_INSTPRINTER_SPARCINSTPRINTER_H
+#ifndef LLVM_LIB_TARGET_SPARC_MCTARGETDESC_SPARCINSTPRINTER_H
+#define LLVM_LIB_TARGET_SPARC_MCTARGETDESC_SPARCINSTPRINTER_H
#include "llvm/MC/MCInstPrinter.h"
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
index 50e8825b15e8..1a2a040990ae 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
@@ -1,9 +1,8 @@
//===- SparcMCAsmInfo.cpp - Sparc asm properties --------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h
index 5e8d0cb50312..c9162f2dc8a5 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h
@@ -1,9 +1,8 @@
//===- SparcMCAsmInfo.h - Sparc asm properties -----------------*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp
index 647be159a151..7e908011bd50 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp
@@ -1,9 +1,8 @@
//===-- SparcMCCodeEmitter.cpp - Convert Sparc code to machine code -------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -84,9 +83,10 @@ public:
const MCSubtargetInfo &STI) const;
private:
- uint64_t computeAvailableFeatures(const FeatureBitset &FB) const;
- void verifyInstructionPredicates(const MCInst &MI,
- uint64_t AvailableFeatures) const;
+ FeatureBitset computeAvailableFeatures(const FeatureBitset &FB) const;
+ void
+ verifyInstructionPredicates(const MCInst &MI,
+ const FeatureBitset &AvailableFeatures) const;
};
} // end anonymous namespace
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp
index 4ddb72643a91..00f319fc37e1 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp
@@ -1,9 +1,8 @@
//===-- SparcMCExpr.cpp - Sparc specific MC expression classes --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h
index cf2db067749c..c2467faca257 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h
@@ -1,9 +1,8 @@
//====- SparcMCExpr.h - Sparc specific MC expression classes --*- C++ -*-=====//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
index bd6596faee5d..ce593bb66770 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
@@ -1,9 +1,8 @@
//===-- SparcMCTargetDesc.cpp - Sparc Target Descriptions -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -12,9 +11,10 @@
//===----------------------------------------------------------------------===//
#include "SparcMCTargetDesc.h"
-#include "InstPrinter/SparcInstPrinter.h"
+#include "SparcInstPrinter.h"
#include "SparcMCAsmInfo.h"
#include "SparcTargetStreamer.h"
+#include "TargetInfo/SparcTargetInfo.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
index 3cd24104c443..e5699bb1c133 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
@@ -1,9 +1,8 @@
//===-- SparcMCTargetDesc.h - Sparc Target Descriptions ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -33,10 +32,6 @@ class StringRef;
class raw_pwrite_stream;
class raw_ostream;
-Target &getTheSparcTarget();
-Target &getTheSparcV9Target();
-Target &getTheSparcelTarget();
-
MCCodeEmitter *createSparcMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
MCContext &Ctx);
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcTargetStreamer.cpp b/lib/Target/Sparc/MCTargetDesc/SparcTargetStreamer.cpp
index 94af791e0e75..a322d49adb87 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcTargetStreamer.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcTargetStreamer.cpp
@@ -1,9 +1,8 @@
//===-- SparcTargetStreamer.cpp - Sparc Target Streamer Methods -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -12,7 +11,7 @@
//===----------------------------------------------------------------------===//
#include "SparcTargetStreamer.h"
-#include "InstPrinter/SparcInstPrinter.h"
+#include "SparcInstPrinter.h"
#include "llvm/Support/FormattedStream.h"
using namespace llvm;
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcTargetStreamer.h b/lib/Target/Sparc/MCTargetDesc/SparcTargetStreamer.h
index 8bb418e39ab4..9f729a6c2cf4 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcTargetStreamer.h
+++ b/lib/Target/Sparc/MCTargetDesc/SparcTargetStreamer.h
@@ -1,9 +1,8 @@
//===-- SparcTargetStreamer.h - Sparc Target Streamer ----------*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Sparc/Sparc.h b/lib/Target/Sparc/Sparc.h
index 0cea53b359eb..967c463f5281 100644
--- a/lib/Target/Sparc/Sparc.h
+++ b/lib/Target/Sparc/Sparc.h
@@ -1,9 +1,8 @@
//===-- Sparc.h - Top-level interface for Sparc representation --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Sparc/Sparc.td b/lib/Target/Sparc/Sparc.td
index 0412215be8ab..ca6147edc46b 100644
--- a/lib/Target/Sparc/Sparc.td
+++ b/lib/Target/Sparc/Sparc.td
@@ -1,9 +1,8 @@
//===-- Sparc.td - Describe the Sparc Target Machine -------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Sparc/SparcAsmPrinter.cpp b/lib/Target/Sparc/SparcAsmPrinter.cpp
index 5f0e359a3b00..4d5cbfbadc9d 100644
--- a/lib/Target/Sparc/SparcAsmPrinter.cpp
+++ b/lib/Target/Sparc/SparcAsmPrinter.cpp
@@ -1,9 +1,8 @@
//===-- SparcAsmPrinter.cpp - Sparc LLVM assembly writer ------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -12,12 +11,13 @@
//
//===----------------------------------------------------------------------===//
-#include "InstPrinter/SparcInstPrinter.h"
+#include "MCTargetDesc/SparcInstPrinter.h"
#include "MCTargetDesc/SparcMCExpr.h"
#include "MCTargetDesc/SparcTargetStreamer.h"
#include "Sparc.h"
#include "SparcInstrInfo.h"
#include "SparcTargetMachine.h"
+#include "TargetInfo/SparcTargetInfo.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
@@ -60,11 +60,9 @@ namespace {
}
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
- unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &O) override;
+ const char *ExtraCode, raw_ostream &O) override;
bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
- unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &O) override;
+ const char *ExtraCode, raw_ostream &O) override;
void LowerGETPCXAndEmitMCInsts(const MachineInstr *MI,
const MCSubtargetInfo &STI);
@@ -360,7 +358,7 @@ void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
MO.getMBB()->getSymbol()->print(O, MAI);
return;
case MachineOperand::MO_GlobalAddress:
- getSymbol(MO.getGlobal())->print(O, MAI);
+ PrintSymbolOperand(MO, O);
break;
case MachineOperand::MO_BlockAddress:
O << GetBlockAddressSymbol(MO.getBlockAddress())->getName();
@@ -406,7 +404,6 @@ void SparcAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
/// PrintAsmOperand - Print out an operand for an inline asm expression.
///
bool SparcAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
- unsigned AsmVariant,
const char *ExtraCode,
raw_ostream &O) {
if (ExtraCode && ExtraCode[0]) {
@@ -415,7 +412,7 @@ bool SparcAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
switch (ExtraCode[0]) {
default:
// See if this is a generic print operand
- return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
+ return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O);
case 'f':
case 'r':
break;
@@ -428,7 +425,7 @@ bool SparcAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
}
bool SparcAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
- unsigned OpNo, unsigned AsmVariant,
+ unsigned OpNo,
const char *ExtraCode,
raw_ostream &O) {
if (ExtraCode && ExtraCode[0])
diff --git a/lib/Target/Sparc/SparcCallingConv.td b/lib/Target/Sparc/SparcCallingConv.td
index 0aa29d186dc1..4be432211f1d 100644
--- a/lib/Target/Sparc/SparcCallingConv.td
+++ b/lib/Target/Sparc/SparcCallingConv.td
@@ -1,9 +1,8 @@
//===-- SparcCallingConv.td - Calling Conventions Sparc ----*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Sparc/SparcFrameLowering.cpp b/lib/Target/Sparc/SparcFrameLowering.cpp
index 9f6c7d65592d..1834a6fd861d 100644
--- a/lib/Target/Sparc/SparcFrameLowering.cpp
+++ b/lib/Target/Sparc/SparcFrameLowering.cpp
@@ -1,9 +1,8 @@
//===-- SparcFrameLowering.cpp - Sparc Frame Information ------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Sparc/SparcFrameLowering.h b/lib/Target/Sparc/SparcFrameLowering.h
index 6098afa68985..8e6001da05db 100644
--- a/lib/Target/Sparc/SparcFrameLowering.h
+++ b/lib/Target/Sparc/SparcFrameLowering.h
@@ -1,9 +1,8 @@
//===-- SparcFrameLowering.h - Define frame lowering for Sparc --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
index f845c41ede45..8cff50d19ed4 100644
--- a/lib/Target/Sparc/SparcISelDAGToDAG.cpp
+++ b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
@@ -1,9 +1,8 @@
//===-- SparcISelDAGToDAG.cpp - A dag to dag inst selector for Sparc ------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -313,7 +312,7 @@ bool SparcDAGToDAGISel::tryInlineAsm(SDNode *N){
SelectInlineAsmMemoryOperands(AsmNodeOperands, SDLoc(N));
- SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N),
+ SDValue New = CurDAG->getNode(N->getOpcode(), SDLoc(N),
CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
New->setNodeId(-1);
ReplaceNode(N, New.getNode());
@@ -329,7 +328,8 @@ void SparcDAGToDAGISel::Select(SDNode *N) {
switch (N->getOpcode()) {
default: break;
- case ISD::INLINEASM: {
+ case ISD::INLINEASM:
+ case ISD::INLINEASM_BR: {
if (tryInlineAsm(N))
return;
break;
diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp
index ae2257618a55..a6d440fa8aa2 100644
--- a/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/lib/Target/Sparc/SparcISelLowering.cpp
@@ -1,9 +1,8 @@
//===-- SparcISelLowering.cpp - Sparc DAG Lowering Implementation ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -18,6 +17,7 @@
#include "SparcRegisterInfo.h"
#include "SparcTargetMachine.h"
#include "SparcTargetObjectFile.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -3258,6 +3258,8 @@ SparcTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
case 'r':
if (VT == MVT::v2i32)
return std::make_pair(0U, &SP::IntPairRegClass);
+ else if (Subtarget->is64Bit())
+ return std::make_pair(0U, &SP::I64RegsRegClass);
else
return std::make_pair(0U, &SP::IntRegsRegClass);
case 'f':
diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h
index 718851db25bf..8d557a4225e5 100644
--- a/lib/Target/Sparc/SparcISelLowering.h
+++ b/lib/Target/Sparc/SparcISelLowering.h
@@ -1,9 +1,8 @@
//===-- SparcISelLowering.h - Sparc DAG Lowering Interface ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Sparc/SparcInstr64Bit.td b/lib/Target/Sparc/SparcInstr64Bit.td
index 0b94c6b614eb..2d4f687f72d2 100644
--- a/lib/Target/Sparc/SparcInstr64Bit.td
+++ b/lib/Target/Sparc/SparcInstr64Bit.td
@@ -1,9 +1,8 @@
//===-- SparcInstr64Bit.td - 64-bit instructions for Sparc Target ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Sparc/SparcInstrAliases.td b/lib/Target/Sparc/SparcInstrAliases.td
index 35987390d7ba..d4d056ea0af6 100644
--- a/lib/Target/Sparc/SparcInstrAliases.td
+++ b/lib/Target/Sparc/SparcInstrAliases.td
@@ -1,9 +1,8 @@
//===-- SparcInstrAliases.td - Instruction Aliases for Sparc Target -------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Sparc/SparcInstrFormats.td b/lib/Target/Sparc/SparcInstrFormats.td
index 76366c6695f4..fbf08b49d60c 100644
--- a/lib/Target/Sparc/SparcInstrFormats.td
+++ b/lib/Target/Sparc/SparcInstrFormats.td
@@ -1,9 +1,8 @@
//===-- SparcInstrFormats.td - Sparc Instruction Formats ---*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp
index 47b42444b94d..ad343fe6f80a 100644
--- a/lib/Target/Sparc/SparcInstrInfo.cpp
+++ b/lib/Target/Sparc/SparcInstrInfo.cpp
@@ -1,9 +1,8 @@
//===-- SparcInstrInfo.cpp - Sparc Instruction Information ----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Sparc/SparcInstrInfo.h b/lib/Target/Sparc/SparcInstrInfo.h
index 524b5d054163..b587b28c25fc 100644
--- a/lib/Target/Sparc/SparcInstrInfo.h
+++ b/lib/Target/Sparc/SparcInstrInfo.h
@@ -1,9 +1,8 @@
//===-- SparcInstrInfo.h - Sparc Instruction Information --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td
index 558b37aeebcb..8474c7abffb3 100644
--- a/lib/Target/Sparc/SparcInstrInfo.td
+++ b/lib/Target/Sparc/SparcInstrInfo.td
@@ -1,9 +1,8 @@
//===-- SparcInstrInfo.td - Target Description for Sparc Target -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Sparc/SparcInstrVIS.td b/lib/Target/Sparc/SparcInstrVIS.td
index d9adf3e8b0f5..bdefc70869d7 100644
--- a/lib/Target/Sparc/SparcInstrVIS.td
+++ b/lib/Target/Sparc/SparcInstrVIS.td
@@ -1,9 +1,8 @@
//===---- SparcInstrVIS.td - Visual Instruction Set extensions (VIS) -----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Sparc/SparcMCInstLower.cpp b/lib/Target/Sparc/SparcMCInstLower.cpp
index a784124ff688..8ea317fdd453 100644
--- a/lib/Target/Sparc/SparcMCInstLower.cpp
+++ b/lib/Target/Sparc/SparcMCInstLower.cpp
@@ -1,9 +1,8 @@
//===-- SparcMCInstLower.cpp - Convert Sparc MachineInstr to MCInst -------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Sparc/SparcMachineFunctionInfo.cpp b/lib/Target/Sparc/SparcMachineFunctionInfo.cpp
index e7442826e78b..7c36c4ab865f 100644
--- a/lib/Target/Sparc/SparcMachineFunctionInfo.cpp
+++ b/lib/Target/Sparc/SparcMachineFunctionInfo.cpp
@@ -1,9 +1,8 @@
//===-- SparcMachineFunctionInfo.cpp - Sparc Machine Function Info --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Sparc/SparcMachineFunctionInfo.h b/lib/Target/Sparc/SparcMachineFunctionInfo.h
index 104744279d9d..fe5705878693 100644
--- a/lib/Target/Sparc/SparcMachineFunctionInfo.h
+++ b/lib/Target/Sparc/SparcMachineFunctionInfo.h
@@ -1,9 +1,8 @@
//===- SparcMachineFunctionInfo.h - Sparc Machine Function Info -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp
index 33caa66154ff..ce11a423d10e 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.cpp
+++ b/lib/Target/Sparc/SparcRegisterInfo.cpp
@@ -1,9 +1,8 @@
//===-- SparcRegisterInfo.cpp - SPARC Register Information ----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -189,7 +188,7 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MachineInstr *StMI =
BuildMI(*MI.getParent(), II, dl, TII.get(SP::STDFri))
.addReg(FrameReg).addImm(0).addReg(SrcEvenReg);
- replaceFI(MF, II, *StMI, dl, 0, Offset, FrameReg);
+ replaceFI(MF, *StMI, *StMI, dl, 0, Offset, FrameReg);
MI.setDesc(TII.get(SP::STDFri));
MI.getOperand(2).setReg(SrcOddReg);
Offset += 8;
@@ -198,10 +197,10 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
unsigned DestReg = MI.getOperand(0).getReg();
unsigned DestEvenReg = getSubReg(DestReg, SP::sub_even64);
unsigned DestOddReg = getSubReg(DestReg, SP::sub_odd64);
- MachineInstr *StMI =
+ MachineInstr *LdMI =
BuildMI(*MI.getParent(), II, dl, TII.get(SP::LDDFri), DestEvenReg)
.addReg(FrameReg).addImm(0);
- replaceFI(MF, II, *StMI, dl, 1, Offset, FrameReg);
+ replaceFI(MF, *LdMI, *LdMI, dl, 1, Offset, FrameReg);
MI.setDesc(TII.get(SP::LDDFri));
MI.getOperand(0).setReg(DestOddReg);
@@ -213,7 +212,7 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
}
-unsigned SparcRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+Register SparcRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
return SP::I6;
}
diff --git a/lib/Target/Sparc/SparcRegisterInfo.h b/lib/Target/Sparc/SparcRegisterInfo.h
index 8dd2569d10de..118ef9d80fae 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.h
+++ b/lib/Target/Sparc/SparcRegisterInfo.h
@@ -1,9 +1,8 @@
//===-- SparcRegisterInfo.h - Sparc Register Information Impl ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -39,7 +38,7 @@ struct SparcRegisterInfo : public SparcGenRegisterInfo {
int SPAdj, unsigned FIOperandNum,
RegScavenger *RS = nullptr) const override;
- unsigned getFrameRegister(const MachineFunction &MF) const override;
+ Register getFrameRegister(const MachineFunction &MF) const override;
bool canRealignStack(const MachineFunction &MF) const override;
diff --git a/lib/Target/Sparc/SparcRegisterInfo.td b/lib/Target/Sparc/SparcRegisterInfo.td
index 6625eaafd992..98959d512955 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.td
+++ b/lib/Target/Sparc/SparcRegisterInfo.td
@@ -1,9 +1,8 @@
//===-- SparcRegisterInfo.td - Sparc Register defs ---------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Sparc/SparcSchedule.td b/lib/Target/Sparc/SparcSchedule.td
index f243546b029b..31e43c9bd95d 100755
--- a/lib/Target/Sparc/SparcSchedule.td
+++ b/lib/Target/Sparc/SparcSchedule.td
@@ -1,9 +1,8 @@
//===-- SparcSchedule.td - Describe the Sparc Itineries ----*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Sparc/SparcSubtarget.cpp b/lib/Target/Sparc/SparcSubtarget.cpp
index 5301fc30a006..075a002a358d 100644
--- a/lib/Target/Sparc/SparcSubtarget.cpp
+++ b/lib/Target/Sparc/SparcSubtarget.cpp
@@ -1,9 +1,8 @@
//===-- SparcSubtarget.cpp - SPARC Subtarget Information ------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Sparc/SparcSubtarget.h b/lib/Target/Sparc/SparcSubtarget.h
index 24ea41a266e7..db19f99e3c9c 100644
--- a/lib/Target/Sparc/SparcSubtarget.h
+++ b/lib/Target/Sparc/SparcSubtarget.h
@@ -1,9 +1,8 @@
//===-- SparcSubtarget.h - Define Subtarget for the SPARC -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp
index 5b467235f809..195cff79de03 100644
--- a/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -1,9 +1,8 @@
//===-- SparcTargetMachine.cpp - Define TargetMachine for Sparc -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -14,6 +13,7 @@
#include "LeonPasses.h"
#include "Sparc.h"
#include "SparcTargetObjectFile.h"
+#include "TargetInfo/SparcTargetInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/LegacyPassManager.h"
@@ -75,9 +75,9 @@ getEffectiveSparcCodeModel(Optional<CodeModel::Model> CM, Reloc::Model RM,
bool Is64Bit, bool JIT) {
if (CM) {
if (*CM == CodeModel::Tiny)
- report_fatal_error("Target does not support the tiny CodeModel");
+ report_fatal_error("Target does not support the tiny CodeModel", false);
if (*CM == CodeModel::Kernel)
- report_fatal_error("Target does not support the kernel CodeModel");
+ report_fatal_error("Target does not support the kernel CodeModel", false);
return *CM;
}
if (Is64Bit) {
diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h
index d1eb1d329a4c..4083f61433b1 100644
--- a/lib/Target/Sparc/SparcTargetMachine.h
+++ b/lib/Target/Sparc/SparcTargetMachine.h
@@ -1,9 +1,8 @@
//===-- SparcTargetMachine.h - Define TargetMachine for Sparc ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/Sparc/SparcTargetObjectFile.cpp b/lib/Target/Sparc/SparcTargetObjectFile.cpp
index d0db854f7849..e6ad4d2d67aa 100644
--- a/lib/Target/Sparc/SparcTargetObjectFile.cpp
+++ b/lib/Target/Sparc/SparcTargetObjectFile.cpp
@@ -1,9 +1,8 @@
//===------- SparcTargetObjectFile.cpp - Sparc Object Info Impl -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Sparc/SparcTargetObjectFile.h b/lib/Target/Sparc/SparcTargetObjectFile.h
index 3b1b345c3b19..9bbe602b32b3 100644
--- a/lib/Target/Sparc/SparcTargetObjectFile.h
+++ b/lib/Target/Sparc/SparcTargetObjectFile.h
@@ -1,9 +1,8 @@
//===-- SparcTargetObjectFile.h - Sparc Object Info -------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp b/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp
index d030bd9f232d..eafa2b4b2f13 100644
--- a/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp
+++ b/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp
@@ -1,14 +1,12 @@
//===-- SparcTargetInfo.cpp - Sparc Target Implementation -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-#include "Sparc.h"
-#include "llvm/IR/Module.h"
+#include "TargetInfo/SparcTargetInfo.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/lib/Target/Sparc/TargetInfo/SparcTargetInfo.h b/lib/Target/Sparc/TargetInfo/SparcTargetInfo.h
new file mode 100644
index 000000000000..e02ff59fdac3
--- /dev/null
+++ b/lib/Target/Sparc/TargetInfo/SparcTargetInfo.h
@@ -0,0 +1,22 @@
+//===-- SparcTargetInfo.h - Sparc Target Implementation ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SPARC_TARGETINFO_SPARCTARGETINFO_H
+#define LLVM_LIB_TARGET_SPARC_TARGETINFO_SPARCTARGETINFO_H
+
+namespace llvm {
+
+class Target;
+
+Target &getTheSparcTarget();
+Target &getTheSparcV9Target();
+Target &getTheSparcelTarget();
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_SPARC_TARGETINFO_SPARCTARGETINFO_H
diff --git a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
index 91959b4151b3..a259ba3433d6 100644
--- a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
+++ b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
@@ -1,14 +1,14 @@
//===-- SystemZAsmParser.cpp - Parse SystemZ assembly instructions --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-#include "InstPrinter/SystemZInstPrinter.h"
+#include "MCTargetDesc/SystemZInstPrinter.h"
#include "MCTargetDesc/SystemZMCTargetDesc.h"
+#include "TargetInfo/SystemZTargetInfo.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
@@ -651,7 +651,6 @@ static void printMCExpr(const MCExpr *E, raw_ostream &OS) {
void SystemZOperand::print(raw_ostream &OS) const {
switch (Kind) {
- break;
case KindToken:
OS << "Token:" << getToken();
break;
@@ -1181,8 +1180,10 @@ bool SystemZAsmParser::parseOperand(OperandVector &Operands,
// features to be available during the operand check, or else we will fail to
// find the custom parser, and then we will later get an InvalidOperand error
// instead of a MissingFeature errror.
- uint64_t AvailableFeatures = getAvailableFeatures();
- setAvailableFeatures(~(uint64_t)0);
+ FeatureBitset AvailableFeatures = getAvailableFeatures();
+ FeatureBitset All;
+ All.set();
+ setAvailableFeatures(All);
OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
setAvailableFeatures(AvailableFeatures);
if (ResTy == MatchOperand_Success)
@@ -1233,7 +1234,8 @@ bool SystemZAsmParser::parseOperand(OperandVector &Operands,
return false;
}
-static std::string SystemZMnemonicSpellCheck(StringRef S, uint64_t FBS,
+static std::string SystemZMnemonicSpellCheck(StringRef S,
+ const FeatureBitset &FBS,
unsigned VariantID = 0);
bool SystemZAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
@@ -1244,8 +1246,9 @@ bool SystemZAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
MCInst Inst;
unsigned MatchResult;
+ FeatureBitset MissingFeatures;
MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo,
- MatchingInlineAsm);
+ MissingFeatures, MatchingInlineAsm);
switch (MatchResult) {
case Match_Success:
Inst.setLoc(IDLoc);
@@ -1253,17 +1256,15 @@ bool SystemZAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
return false;
case Match_MissingFeature: {
- assert(ErrorInfo && "Unknown missing feature!");
+ assert(MissingFeatures.any() && "Unknown missing feature!");
// Special case the error message for the very common case where only
// a single subtarget feature is missing
std::string Msg = "instruction requires:";
- uint64_t Mask = 1;
- for (unsigned I = 0; I < sizeof(ErrorInfo) * 8 - 1; ++I) {
- if (ErrorInfo & Mask) {
+ for (unsigned I = 0, E = MissingFeatures.size(); I != E; ++I) {
+ if (MissingFeatures[I]) {
Msg += " ";
- Msg += getSubtargetFeatureName(ErrorInfo & Mask);
+ Msg += getSubtargetFeatureName(I);
}
- Mask <<= 1;
}
return Error(IDLoc, Msg);
}
@@ -1282,7 +1283,7 @@ bool SystemZAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
}
case Match_MnemonicFail: {
- uint64_t FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
+ FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
std::string Suggestion = SystemZMnemonicSpellCheck(
((SystemZOperand &)*Operands[0]).getToken(), FBS);
return Error(IDLoc, "invalid instruction" + Suggestion,
diff --git a/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp b/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
index 8903b57ffd0b..70c26db33ced 100644
--- a/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
+++ b/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
@@ -1,14 +1,14 @@
//===-- SystemZDisassembler.cpp - Disassembler for SystemZ ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/SystemZMCTargetDesc.h"
#include "SystemZ.h"
+#include "TargetInfo/SystemZTargetInfo.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
diff --git a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp
index 6cd12e13e220..91cb35dd72f2 100644
--- a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp
@@ -1,9 +1,8 @@
//===- SystemZInstPrinter.cpp - Convert SystemZ MCInst to assembly syntax -===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h b/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.h
index d65c661545eb..4235d4e21792 100644
--- a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.h
@@ -1,9 +1,8 @@
//==- SystemZInstPrinter.h - Convert SystemZ MCInst to assembly --*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -11,8 +10,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIB_TARGET_SYSTEMZ_INSTPRINTER_SYSTEMZINSTPRINTER_H
-#define LLVM_LIB_TARGET_SYSTEMZ_INSTPRINTER_SYSTEMZINSTPRINTER_H
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_MCTARGETDESC_SYSTEMZINSTPRINTER_H
+#define LLVM_LIB_TARGET_SYSTEMZ_MCTARGETDESC_SYSTEMZINSTPRINTER_H
#include "llvm/MC/MCInstPrinter.h"
#include <cstdint>
@@ -75,4 +74,4 @@ private:
} // end namespace llvm
-#endif // LLVM_LIB_TARGET_SYSTEMZ_INSTPRINTER_SYSTEMZINSTPRINTER_H
+#endif // LLVM_LIB_TARGET_SYSTEMZ_MCTARGETDESC_SYSTEMZINSTPRINTER_H
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
index 2146832f7794..23d8585095cc 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
@@ -1,9 +1,8 @@
//===-- SystemZMCAsmBackend.cpp - SystemZ assembler backend ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
index 6e00981939b6..d6cdacfcab92 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
@@ -1,9 +1,8 @@
//===-- SystemZMCAsmInfo.cpp - SystemZ asm properties ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h
index 800f89232063..b8818a65f9e3 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h
@@ -1,9 +1,8 @@
//====-- SystemZMCAsmInfo.h - SystemZ asm properties -----------*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
index d188f56512ab..a5ccf4f68ffd 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
@@ -1,9 +1,8 @@
//===-- SystemZMCCodeEmitter.cpp - Convert SystemZ code to machine code ---===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -144,9 +143,10 @@ private:
}
private:
- uint64_t computeAvailableFeatures(const FeatureBitset &FB) const;
- void verifyInstructionPredicates(const MCInst &MI,
- uint64_t AvailableFeatures) const;
+ FeatureBitset computeAvailableFeatures(const FeatureBitset &FB) const;
+ void
+ verifyInstructionPredicates(const MCInst &MI,
+ const FeatureBitset &AvailableFeatures) const;
};
} // end anonymous namespace
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h b/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h
index c012accc14dd..14f6198183b9 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h
@@ -1,9 +1,8 @@
//===-- SystemZMCFixups.h - SystemZ-specific fixup entries ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
index 888be519fb16..8d8ba5644e10 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
@@ -1,9 +1,8 @@
//===-- SystemZMCObjectWriter.cpp - SystemZ ELF writer --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -37,8 +36,8 @@ protected:
} // end anonymous namespace
SystemZObjectWriter::SystemZObjectWriter(uint8_t OSABI)
- : MCELFObjectTargetWriter(/*Is64Bit=*/true, OSABI, ELF::EM_S390,
- /*HasRelocationAddend=*/ true) {}
+ : MCELFObjectTargetWriter(/*Is64Bit_=*/true, OSABI, ELF::EM_S390,
+ /*HasRelocationAddend_=*/ true) {}
// Return the relocation type for an absolute value of MCFixupKind Kind.
static unsigned getAbsoluteReloc(unsigned Kind) {
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
index 05688ed8efbb..3c0300cfd8f0 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
@@ -1,15 +1,16 @@
//===-- SystemZMCTargetDesc.cpp - SystemZ target descriptions -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "SystemZMCTargetDesc.h"
-#include "InstPrinter/SystemZInstPrinter.h"
+#include "SystemZInstPrinter.h"
#include "SystemZMCAsmInfo.h"
+#include "TargetInfo/SystemZTargetInfo.h"
+#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
index 1617a807e65a..8f720c5abb34 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
@@ -1,9 +1,8 @@
//===-- SystemZMCTargetDesc.h - SystemZ target descriptions -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -30,8 +29,6 @@ class Triple;
class raw_pwrite_stream;
class raw_ostream;
-Target &getTheSystemZTarget();
-
namespace SystemZMC {
// How many bytes are in the ABI-defined, caller-allocated part of
// a stack frame.
diff --git a/lib/Target/SystemZ/SystemZ.h b/lib/Target/SystemZ/SystemZ.h
index fdbde3d8dbc3..2b0f90182d7f 100644
--- a/lib/Target/SystemZ/SystemZ.h
+++ b/lib/Target/SystemZ/SystemZ.h
@@ -1,9 +1,8 @@
//==- SystemZ.h - Top-Level Interface for SystemZ representation -*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -195,6 +194,7 @@ FunctionPass *createSystemZExpandPseudoPass(SystemZTargetMachine &TM);
FunctionPass *createSystemZShortenInstPass(SystemZTargetMachine &TM);
FunctionPass *createSystemZLongBranchPass(SystemZTargetMachine &TM);
FunctionPass *createSystemZLDCleanupPass(SystemZTargetMachine &TM);
+FunctionPass *createSystemZPostRewritePass(SystemZTargetMachine &TM);
FunctionPass *createSystemZTDCPass();
} // end namespace llvm
diff --git a/lib/Target/SystemZ/SystemZ.td b/lib/Target/SystemZ/SystemZ.td
index 3800f7a26b79..ebbc6ffd2f1e 100644
--- a/lib/Target/SystemZ/SystemZ.td
+++ b/lib/Target/SystemZ/SystemZ.td
@@ -1,9 +1,8 @@
//===-- SystemZ.td - Describe the SystemZ target machine -----*- tblgen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/lib/Target/SystemZ/SystemZAsmPrinter.cpp
index e2de721be568..ef378e4ade7a 100644
--- a/lib/Target/SystemZ/SystemZAsmPrinter.cpp
+++ b/lib/Target/SystemZ/SystemZAsmPrinter.cpp
@@ -1,9 +1,8 @@
//===-- SystemZAsmPrinter.cpp - SystemZ LLVM assembly printer -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -13,9 +12,10 @@
//===----------------------------------------------------------------------===//
#include "SystemZAsmPrinter.h"
-#include "InstPrinter/SystemZInstPrinter.h"
+#include "MCTargetDesc/SystemZInstPrinter.h"
#include "SystemZConstantPoolValue.h"
#include "SystemZMCInstLower.h"
+#include "TargetInfo/SystemZTargetInfo.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/IR/Mangler.h"
@@ -80,6 +80,27 @@ static const MCSymbolRefExpr *getGlobalOffsetTable(MCContext &Context) {
Context);
}
+// MI is an instruction that accepts an optional alignment hint,
+// and which was already lowered to LoweredMI. If the alignment
+// of the original memory operand is known, update LoweredMI to
+// an instruction with the corresponding hint set.
+static void lowerAlignmentHint(const MachineInstr *MI, MCInst &LoweredMI,
+ unsigned Opcode) {
+ if (!MI->hasOneMemOperand())
+ return;
+ const MachineMemOperand *MMO = *MI->memoperands_begin();
+ unsigned AlignmentHint = 0;
+ if (MMO->getAlignment() >= 16)
+ AlignmentHint = 4;
+ else if (MMO->getAlignment() >= 8)
+ AlignmentHint = 3;
+ if (AlignmentHint == 0)
+ return;
+
+ LoweredMI.setOpcode(Opcode);
+ LoweredMI.addOperand(MCOperand::createImm(AlignmentHint));
+}
+
// MI loads the high part of a vector from memory. Return an instruction
// that uses replicating vector load Opcode to do the same thing.
static MCInst lowerSubvectorLoad(const MachineInstr *MI, unsigned Opcode) {
@@ -351,6 +372,26 @@ void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) {
.addReg(SystemZMC::getRegAsVR128(MI->getOperand(1).getReg()));
break;
+ case SystemZ::VL:
+ Lower.lower(MI, LoweredMI);
+ lowerAlignmentHint(MI, LoweredMI, SystemZ::VLAlign);
+ break;
+
+ case SystemZ::VST:
+ Lower.lower(MI, LoweredMI);
+ lowerAlignmentHint(MI, LoweredMI, SystemZ::VSTAlign);
+ break;
+
+ case SystemZ::VLM:
+ Lower.lower(MI, LoweredMI);
+ lowerAlignmentHint(MI, LoweredMI, SystemZ::VLMAlign);
+ break;
+
+ case SystemZ::VSTM:
+ Lower.lower(MI, LoweredMI);
+ lowerAlignmentHint(MI, LoweredMI, SystemZ::VSTMAlign);
+ break;
+
case SystemZ::VL32:
LoweredMI = lowerSubvectorLoad(MI, SystemZ::VLREPF);
break;
@@ -618,26 +659,19 @@ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
OutStreamer->EmitValue(Expr, Size);
}
-bool SystemZAsmPrinter::PrintAsmOperand(const MachineInstr *MI,
- unsigned OpNo,
- unsigned AsmVariant,
+bool SystemZAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
const char *ExtraCode,
raw_ostream &OS) {
- if (ExtraCode && *ExtraCode == 'n') {
- if (!MI->getOperand(OpNo).isImm())
- return true;
- OS << -int64_t(MI->getOperand(OpNo).getImm());
- } else {
- SystemZMCInstLower Lower(MF->getContext(), *this);
- MCOperand MO(Lower.lowerOperand(MI->getOperand(OpNo)));
- SystemZInstPrinter::printOperand(MO, MAI, OS);
- }
+ if (ExtraCode)
+ return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, OS);
+ SystemZMCInstLower Lower(MF->getContext(), *this);
+ MCOperand MO(Lower.lowerOperand(MI->getOperand(OpNo)));
+ SystemZInstPrinter::printOperand(MO, MAI, OS);
return false;
}
bool SystemZAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
unsigned OpNo,
- unsigned AsmVariant,
const char *ExtraCode,
raw_ostream &OS) {
SystemZInstPrinter::printAddress(MI->getOperand(OpNo).getReg(),
diff --git a/lib/Target/SystemZ/SystemZAsmPrinter.h b/lib/Target/SystemZ/SystemZAsmPrinter.h
index cb88ec32f83a..aa5d3ca78e61 100644
--- a/lib/Target/SystemZ/SystemZAsmPrinter.h
+++ b/lib/Target/SystemZ/SystemZAsmPrinter.h
@@ -1,9 +1,8 @@
//===-- SystemZAsmPrinter.h - SystemZ LLVM assembly printer ----*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -37,11 +36,9 @@ public:
void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) override;
void EmitEndOfAsmFile(Module &M) override;
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
- unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &OS) override;
+ const char *ExtraCode, raw_ostream &OS) override;
bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
- unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &OS) override;
+ const char *ExtraCode, raw_ostream &OS) override;
bool doInitialization(Module &M) override {
SM.reset();
diff --git a/lib/Target/SystemZ/SystemZCallingConv.cpp b/lib/Target/SystemZ/SystemZCallingConv.cpp
index 72da51f74b10..91c7fae17a75 100644
--- a/lib/Target/SystemZ/SystemZCallingConv.cpp
+++ b/lib/Target/SystemZ/SystemZCallingConv.cpp
@@ -1,9 +1,8 @@
//===-- SystemZCallingConv.cpp - Calling conventions for SystemZ ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/SystemZ/SystemZCallingConv.h b/lib/Target/SystemZ/SystemZCallingConv.h
index b5523e586f4c..82f29b6361f1 100644
--- a/lib/Target/SystemZ/SystemZCallingConv.h
+++ b/lib/Target/SystemZ/SystemZCallingConv.h
@@ -1,9 +1,8 @@
//===-- SystemZCallingConv.h - Calling conventions for SystemZ --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/SystemZ/SystemZCallingConv.td b/lib/Target/SystemZ/SystemZCallingConv.td
index deba27fee7fe..bbd51546ac9f 100644
--- a/lib/Target/SystemZ/SystemZCallingConv.td
+++ b/lib/Target/SystemZ/SystemZCallingConv.td
@@ -1,9 +1,8 @@
//=- SystemZCallingConv.td - Calling conventions for SystemZ -*- tablegen -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// This describes the calling conventions for the SystemZ ABI.
diff --git a/lib/Target/SystemZ/SystemZConstantPoolValue.cpp b/lib/Target/SystemZ/SystemZConstantPoolValue.cpp
index 4a6beb67f182..ffeee4da95cc 100644
--- a/lib/Target/SystemZ/SystemZConstantPoolValue.cpp
+++ b/lib/Target/SystemZ/SystemZConstantPoolValue.cpp
@@ -1,9 +1,8 @@
//===-- SystemZConstantPoolValue.cpp - SystemZ constant-pool value --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/SystemZ/SystemZConstantPoolValue.h b/lib/Target/SystemZ/SystemZConstantPoolValue.h
index a71b595560d2..6cb7710abdfe 100644
--- a/lib/Target/SystemZ/SystemZConstantPoolValue.h
+++ b/lib/Target/SystemZ/SystemZConstantPoolValue.h
@@ -1,9 +1,8 @@
//===- SystemZConstantPoolValue.h - SystemZ constant-pool value -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/SystemZ/SystemZElimCompare.cpp b/lib/Target/SystemZ/SystemZElimCompare.cpp
index 668a77ac014f..9cbf6b320504 100644
--- a/lib/Target/SystemZ/SystemZElimCompare.cpp
+++ b/lib/Target/SystemZ/SystemZElimCompare.cpp
@@ -1,9 +1,8 @@
//===-- SystemZElimCompare.cpp - Eliminate comparison instructions --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -147,6 +146,9 @@ static bool resultTests(MachineInstr &MI, unsigned Reg) {
// Describe the references to Reg or any of its aliases in MI.
Reference SystemZElimCompare::getRegReferences(MachineInstr &MI, unsigned Reg) {
Reference Ref;
+ if (MI.isDebugInstr())
+ return Ref;
+
for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
const MachineOperand &MO = MI.getOperand(I);
if (MO.isReg()) {
@@ -523,9 +525,9 @@ bool SystemZElimCompare::fuseCompareOperations(
// SrcReg2 is the register if the source operand is a register,
// 0 if the source operand is immediate, and the base register
// if the source operand is memory (index is not supported).
- unsigned SrcReg = Compare.getOperand(0).getReg();
- unsigned SrcReg2 =
- Compare.getOperand(1).isReg() ? Compare.getOperand(1).getReg() : 0;
+ Register SrcReg = Compare.getOperand(0).getReg();
+ Register SrcReg2 =
+ Compare.getOperand(1).isReg() ? Compare.getOperand(1).getReg() : Register();
MachineBasicBlock::iterator MBBI = Compare, MBBE = Branch;
for (++MBBI; MBBI != MBBE; ++MBBI)
if (MBBI->modifiesRegister(SrcReg, TRI) ||
diff --git a/lib/Target/SystemZ/SystemZExpandPseudo.cpp b/lib/Target/SystemZ/SystemZExpandPseudo.cpp
index 67c80899d491..09708fb4241c 100644
--- a/lib/Target/SystemZ/SystemZExpandPseudo.cpp
+++ b/lib/Target/SystemZ/SystemZExpandPseudo.cpp
@@ -1,9 +1,8 @@
//==-- SystemZExpandPseudo.cpp - Expand pseudo instructions -------*- C++ -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/SystemZ/SystemZFeatures.td b/lib/Target/SystemZ/SystemZFeatures.td
index beff45dba81d..dae795e845b0 100644
--- a/lib/Target/SystemZ/SystemZFeatures.td
+++ b/lib/Target/SystemZ/SystemZFeatures.td
@@ -1,9 +1,8 @@
//===-- SystemZ.td - SystemZ processors and features ---------*- tblgen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -242,6 +241,51 @@ def Arch12NewFeatures : SystemZFeatureList<[
//===----------------------------------------------------------------------===//
//
+// New features added in the Thirteenth Edition of the z/Architecture
+//
+//===----------------------------------------------------------------------===//
+
+def FeatureMiscellaneousExtensions3 : SystemZFeature<
+ "miscellaneous-extensions-3", "MiscellaneousExtensions3",
+ "Assume that the miscellaneous-extensions facility 3 is installed"
+>;
+
+def FeatureMessageSecurityAssist9 : SystemZFeature<
+ "message-security-assist-extension9", "MessageSecurityAssist9",
+ "Assume that the message-security-assist extension facility 9 is installed"
+>;
+
+def FeatureVectorEnhancements2 : SystemZFeature<
+ "vector-enhancements-2", "VectorEnhancements2",
+ "Assume that the vector enhancements facility 2 is installed"
+>;
+
+def FeatureVectorPackedDecimalEnhancement : SystemZFeature<
+ "vector-packed-decimal-enhancement", "VectorPackedDecimalEnhancement",
+ "Assume that the vector packed decimal enhancement facility is installed"
+>;
+
+def FeatureEnhancedSort : SystemZFeature<
+ "enhanced-sort", "EnhancedSort",
+ "Assume that the enhanced-sort facility is installed"
+>;
+
+def FeatureDeflateConversion : SystemZFeature<
+ "deflate-conversion", "DeflateConversion",
+ "Assume that the deflate-conversion facility is installed"
+>;
+
+def Arch13NewFeatures : SystemZFeatureList<[
+ FeatureMiscellaneousExtensions3,
+ FeatureMessageSecurityAssist9,
+ FeatureVectorEnhancements2,
+ FeatureVectorPackedDecimalEnhancement,
+ FeatureEnhancedSort,
+ FeatureDeflateConversion
+]>;
+
+//===----------------------------------------------------------------------===//
+//
// Cumulative supported and unsupported feature sets
//
//===----------------------------------------------------------------------===//
@@ -256,9 +300,13 @@ def Arch11SupportedFeatures
: SystemZFeatureAdd<Arch10SupportedFeatures.List, Arch11NewFeatures.List>;
def Arch12SupportedFeatures
: SystemZFeatureAdd<Arch11SupportedFeatures.List, Arch12NewFeatures.List>;
+def Arch13SupportedFeatures
+ : SystemZFeatureAdd<Arch12SupportedFeatures.List, Arch13NewFeatures.List>;
-def Arch12UnsupportedFeatures
+def Arch13UnsupportedFeatures
: SystemZFeatureList<[]>;
+def Arch12UnsupportedFeatures
+ : SystemZFeatureAdd<Arch13UnsupportedFeatures.List, Arch13NewFeatures.List>;
def Arch11UnsupportedFeatures
: SystemZFeatureAdd<Arch12UnsupportedFeatures.List, Arch12NewFeatures.List>;
def Arch10UnsupportedFeatures
diff --git a/lib/Target/SystemZ/SystemZFrameLowering.cpp b/lib/Target/SystemZ/SystemZFrameLowering.cpp
index 565299c90139..da28faebb326 100644
--- a/lib/Target/SystemZ/SystemZFrameLowering.cpp
+++ b/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -1,9 +1,8 @@
//===-- SystemZFrameLowering.cpp - Frame lowering for SystemZ -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/SystemZ/SystemZFrameLowering.h b/lib/Target/SystemZ/SystemZFrameLowering.h
index 08c84c785cc0..71ef3e4dc240 100644
--- a/lib/Target/SystemZ/SystemZFrameLowering.h
+++ b/lib/Target/SystemZ/SystemZFrameLowering.h
@@ -1,9 +1,8 @@
//===-- SystemZFrameLowering.h - Frame lowering for SystemZ -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/SystemZ/SystemZHazardRecognizer.cpp b/lib/Target/SystemZ/SystemZHazardRecognizer.cpp
index 8726b56bc94f..e2af02227999 100644
--- a/lib/Target/SystemZ/SystemZHazardRecognizer.cpp
+++ b/lib/Target/SystemZ/SystemZHazardRecognizer.cpp
@@ -1,9 +1,8 @@
//=-- SystemZHazardRecognizer.h - SystemZ Hazard Recognizer -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/SystemZ/SystemZHazardRecognizer.h b/lib/Target/SystemZ/SystemZHazardRecognizer.h
index 6292feefbfea..38bf41ebe96a 100644
--- a/lib/Target/SystemZ/SystemZHazardRecognizer.h
+++ b/lib/Target/SystemZ/SystemZHazardRecognizer.h
@@ -1,9 +1,8 @@
//=-- SystemZHazardRecognizer.h - SystemZ Hazard Recognizer -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index 5bc2ab0ef2d8..9dc4512255cc 100644
--- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -1,9 +1,8 @@
//===-- SystemZISelDAGToDAG.cpp - A dag to dag inst selector for SystemZ --===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -12,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "SystemZTargetMachine.h"
+#include "SystemZISelLowering.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/Support/Debug.h"
@@ -304,6 +304,9 @@ class SystemZDAGToDAGISel : public SelectionDAGISel {
void splitLargeImmediate(unsigned Opcode, SDNode *Node, SDValue Op0,
uint64_t UpperVal, uint64_t LowerVal);
+ void loadVectorConstant(const SystemZVectorConstantInfo &VCI,
+ SDNode *Node);
+
// Try to use gather instruction Opcode to implement vector insertion N.
bool tryGather(SDNode *N, unsigned Opcode);
@@ -1132,6 +1135,35 @@ void SystemZDAGToDAGISel::splitLargeImmediate(unsigned Opcode, SDNode *Node,
SelectCode(Or.getNode());
}
+void SystemZDAGToDAGISel::loadVectorConstant(
+ const SystemZVectorConstantInfo &VCI, SDNode *Node) {
+ assert((VCI.Opcode == SystemZISD::BYTE_MASK ||
+ VCI.Opcode == SystemZISD::REPLICATE ||
+ VCI.Opcode == SystemZISD::ROTATE_MASK) &&
+ "Bad opcode!");
+ assert(VCI.VecVT.getSizeInBits() == 128 && "Expected a vector type");
+ EVT VT = Node->getValueType(0);
+ SDLoc DL(Node);
+ SmallVector<SDValue, 2> Ops;
+ for (unsigned OpVal : VCI.OpVals)
+ Ops.push_back(CurDAG->getConstant(OpVal, DL, MVT::i32));
+ SDValue Op = CurDAG->getNode(VCI.Opcode, DL, VCI.VecVT, Ops);
+
+ if (VCI.VecVT == VT.getSimpleVT())
+ ReplaceNode(Node, Op.getNode());
+ else if (VT.getSizeInBits() == 128) {
+ SDValue BitCast = CurDAG->getNode(ISD::BITCAST, DL, VT, Op);
+ ReplaceNode(Node, BitCast.getNode());
+ SelectCode(BitCast.getNode());
+ } else { // float or double
+ unsigned SubRegIdx =
+ (VT.getSizeInBits() == 32 ? SystemZ::subreg_h32 : SystemZ::subreg_h64);
+ ReplaceNode(
+ Node, CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, Op).getNode());
+ }
+ SelectCode(Op.getNode());
+}
+
bool SystemZDAGToDAGISel::tryGather(SDNode *N, unsigned Opcode) {
SDValue ElemV = N->getOperand(2);
auto *ElemN = dyn_cast<ConstantSDNode>(ElemV);
@@ -1243,6 +1275,9 @@ static bool isFusableLoadOpStorePattern(StoreSDNode *StoreNode,
InputChain = LoadNode->getChain();
} else if (Chain.getOpcode() == ISD::TokenFactor) {
SmallVector<SDValue, 4> ChainOps;
+ SmallVector<const SDNode *, 4> LoopWorklist;
+ SmallPtrSet<const SDNode *, 16> Visited;
+ const unsigned int Max = 1024;
for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) {
SDValue Op = Chain.getOperand(i);
if (Op == Load.getValue(1)) {
@@ -1251,28 +1286,26 @@ static bool isFusableLoadOpStorePattern(StoreSDNode *StoreNode,
ChainOps.push_back(Load.getOperand(0));
continue;
}
-
- // Make sure using Op as part of the chain would not cause a cycle here.
- // In theory, we could check whether the chain node is a predecessor of
- // the load. But that can be very expensive. Instead visit the uses and
- // make sure they all have smaller node id than the load.
- int LoadId = LoadNode->getNodeId();
- for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
- UE = UI->use_end(); UI != UE; ++UI) {
- if (UI.getUse().getResNo() != 0)
- continue;
- if (UI->getNodeId() > LoadId)
- return false;
- }
-
+ LoopWorklist.push_back(Op.getNode());
ChainOps.push_back(Op);
}
- if (ChainCheck)
+ if (ChainCheck) {
+ // Add the other operand of StoredVal to worklist.
+ for (SDValue Op : StoredVal->ops())
+ if (Op.getNode() != LoadNode)
+ LoopWorklist.push_back(Op.getNode());
+
+ // Check if Load is reachable from any of the nodes in the worklist.
+ if (SDNode::hasPredecessorHelper(Load.getNode(), Visited, LoopWorklist, Max,
+ true))
+ return false;
+
// Make a new TokenFactor with all the other input chains except
// for the load.
InputChain = CurDAG->getNode(ISD::TokenFactor, SDLoc(Chain),
MVT::Other, ChainOps);
+ }
}
if (!ChainCheck)
return false;
@@ -1447,6 +1480,23 @@ void SystemZDAGToDAGISel::Select(SDNode *Node) {
Node->getOperand(0).getOpcode() != ISD::Constant)
if (auto *Op1 = dyn_cast<ConstantSDNode>(Node->getOperand(1))) {
uint64_t Val = Op1->getZExtValue();
+ // Don't split the operation if we can match one of the combined
+ // logical operations provided by miscellaneous-extensions-3.
+ if (Subtarget->hasMiscellaneousExtensions3()) {
+ unsigned ChildOpcode = Node->getOperand(0).getOpcode();
+ // Check whether this expression matches NAND/NOR/NXOR.
+ if (Val == (uint64_t)-1 && Opcode == ISD::XOR)
+ if (ChildOpcode == ISD::AND || ChildOpcode == ISD::OR ||
+ ChildOpcode == ISD::XOR)
+ break;
+ // Check whether this expression matches OR-with-complement.
+ if (Opcode == ISD::OR && ChildOpcode == ISD::XOR) {
+ auto Op0 = Node->getOperand(0);
+ if (auto *Op0Op1 = dyn_cast<ConstantSDNode>(Op0->getOperand(1)))
+ if (Op0Op1->getZExtValue() == (uint64_t)-1)
+ break;
+ }
+ }
if (!SystemZ::isImmLF(Val) && !SystemZ::isImmHF(Val)) {
splitLargeImmediate(Opcode, Node, Node->getOperand(0),
Val - uint32_t(Val), uint32_t(Val));
@@ -1527,6 +1577,27 @@ void SystemZDAGToDAGISel::Select(SDNode *Node) {
break;
}
+ case ISD::BUILD_VECTOR: {
+ auto *BVN = cast<BuildVectorSDNode>(Node);
+ SystemZVectorConstantInfo VCI(BVN);
+ if (VCI.isVectorConstantLegal(*Subtarget)) {
+ loadVectorConstant(VCI, Node);
+ return;
+ }
+ break;
+ }
+
+ case ISD::ConstantFP: {
+ APFloat Imm = cast<ConstantFPSDNode>(Node)->getValueAPF();
+ if (Imm.isZero() || Imm.isNegZero())
+ break;
+ SystemZVectorConstantInfo VCI(Imm);
+ bool Success = VCI.isVectorConstantLegal(*Subtarget); (void)Success;
+ assert(Success && "Expected legal FP immediate");
+ loadVectorConstant(VCI, Node);
+ return;
+ }
+
case ISD::STORE: {
if (tryFoldLoadStoreIntoMemOperand(Node))
return;
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index 2a825c1316f3..78820f511ab4 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -1,9 +1,8 @@
//===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -250,8 +249,15 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
// We have native support for a 64-bit CTLZ, via FLOGR.
setOperationAction(ISD::CTLZ, MVT::i32, Promote);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Promote);
setOperationAction(ISD::CTLZ, MVT::i64, Legal);
+ // On arch13 we have native support for a 64-bit CTPOP.
+ if (Subtarget.hasMiscellaneousExtensions3()) {
+ setOperationAction(ISD::CTPOP, MVT::i32, Promote);
+ setOperationAction(ISD::CTPOP, MVT::i64, Legal);
+ }
+
// Give LowerOperation the chance to replace 64-bit ORs with subregs.
setOperationAction(ISD::OR, MVT::i64, Custom);
@@ -377,6 +383,17 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::UINT_TO_FP, MVT::v2f64, Legal);
}
+ if (Subtarget.hasVectorEnhancements2()) {
+ setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v4f32, Legal);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v4f32, Legal);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v4f32, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v4f32, Legal);
+ }
+
// Handle floating-point types.
for (unsigned I = MVT::FIRST_FP_VALUETYPE;
I <= MVT::LAST_FP_VALUETYPE;
@@ -401,6 +418,24 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FSINCOS, VT, Expand);
setOperationAction(ISD::FREM, VT, Expand);
setOperationAction(ISD::FPOW, VT, Expand);
+
+ // Handle constrained floating-point operations.
+ setOperationAction(ISD::STRICT_FADD, VT, Legal);
+ setOperationAction(ISD::STRICT_FSUB, VT, Legal);
+ setOperationAction(ISD::STRICT_FMUL, VT, Legal);
+ setOperationAction(ISD::STRICT_FDIV, VT, Legal);
+ setOperationAction(ISD::STRICT_FMA, VT, Legal);
+ setOperationAction(ISD::STRICT_FSQRT, VT, Legal);
+ setOperationAction(ISD::STRICT_FRINT, VT, Legal);
+ setOperationAction(ISD::STRICT_FP_ROUND, VT, Legal);
+ setOperationAction(ISD::STRICT_FP_EXTEND, VT, Legal);
+ if (Subtarget.hasFPExtension()) {
+ setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
+ setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
+ setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
+ setOperationAction(ISD::STRICT_FROUND, VT, Legal);
+ setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
+ }
}
}
@@ -432,6 +467,20 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
+
+ // Handle constrained floating-point operations.
+ setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FMA, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FRINT, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FFLOOR, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FCEIL, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FTRUNC, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FROUND, MVT::v2f64, Legal);
}
// The vector enhancements facility 1 has instructions for these.
@@ -475,6 +524,25 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FMAXIMUM, MVT::f128, Legal);
setOperationAction(ISD::FMINNUM, MVT::f128, Legal);
setOperationAction(ISD::FMINIMUM, MVT::f128, Legal);
+
+ // Handle constrained floating-point operations.
+ setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FMA, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FRINT, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FFLOOR, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FCEIL, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FROUND, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FTRUNC, MVT::v4f32, Legal);
+ for (auto VT : { MVT::f32, MVT::f64, MVT::f128,
+ MVT::v4f32, MVT::v2f64 }) {
+ setOperationAction(ISD::STRICT_FMAXNUM, VT, Legal);
+ setOperationAction(ISD::STRICT_FMINNUM, VT, Legal);
+ }
}
// We have fused multiply-addition for f32 and f64 but not f128.
@@ -525,6 +593,7 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
setTargetDAGCombine(ISD::LOAD);
setTargetDAGCombine(ISD::STORE);
+ setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
setTargetDAGCombine(ISD::FP_ROUND);
setTargetDAGCombine(ISD::FP_EXTEND);
@@ -577,9 +646,127 @@ bool SystemZTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
return false;
}
-bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+// Return true if the constant can be generated with a vector instruction,
+// such as VGM, VGMB or VREPI.
+bool SystemZVectorConstantInfo::isVectorConstantLegal(
+ const SystemZSubtarget &Subtarget) {
+ const SystemZInstrInfo *TII =
+ static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
+ if (!Subtarget.hasVector() ||
+ (isFP128 && !Subtarget.hasVectorEnhancements1()))
+ return false;
+
+ // Try using VECTOR GENERATE BYTE MASK. This is the architecturally-
+ // preferred way of creating all-zero and all-one vectors so give it
+ // priority over other methods below.
+ unsigned Mask = 0;
+ unsigned I = 0;
+ for (; I < SystemZ::VectorBytes; ++I) {
+ uint64_t Byte = IntBits.lshr(I * 8).trunc(8).getZExtValue();
+ if (Byte == 0xff)
+ Mask |= 1ULL << I;
+ else if (Byte != 0)
+ break;
+ }
+ if (I == SystemZ::VectorBytes) {
+ Opcode = SystemZISD::BYTE_MASK;
+ OpVals.push_back(Mask);
+ VecVT = MVT::getVectorVT(MVT::getIntegerVT(8), 16);
+ return true;
+ }
+
+ if (SplatBitSize > 64)
+ return false;
+
+ auto tryValue = [&](uint64_t Value) -> bool {
+ // Try VECTOR REPLICATE IMMEDIATE
+ int64_t SignedValue = SignExtend64(Value, SplatBitSize);
+ if (isInt<16>(SignedValue)) {
+ OpVals.push_back(((unsigned) SignedValue));
+ Opcode = SystemZISD::REPLICATE;
+ VecVT = MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize),
+ SystemZ::VectorBits / SplatBitSize);
+ return true;
+ }
+ // Try VECTOR GENERATE MASK
+ unsigned Start, End;
+ if (TII->isRxSBGMask(Value, SplatBitSize, Start, End)) {
+ // isRxSBGMask returns the bit numbers for a full 64-bit value, with 0
+ // denoting 1 << 63 and 63 denoting 1. Convert them to bit numbers for
+ // an SplatBitSize value, so that 0 denotes 1 << (SplatBitSize-1).
+ OpVals.push_back(Start - (64 - SplatBitSize));
+ OpVals.push_back(End - (64 - SplatBitSize));
+ Opcode = SystemZISD::ROTATE_MASK;
+ VecVT = MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize),
+ SystemZ::VectorBits / SplatBitSize);
+ return true;
+ }
+ return false;
+ };
+
+ // First try assuming that any undefined bits above the highest set bit
+ // and below the lowest set bit are 1s. This increases the likelihood of
+ // being able to use a sign-extended element value in VECTOR REPLICATE
+ // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK.
+ uint64_t SplatBitsZ = SplatBits.getZExtValue();
+ uint64_t SplatUndefZ = SplatUndef.getZExtValue();
+ uint64_t Lower =
+ (SplatUndefZ & ((uint64_t(1) << findFirstSet(SplatBitsZ)) - 1));
+ uint64_t Upper =
+ (SplatUndefZ & ~((uint64_t(1) << findLastSet(SplatBitsZ)) - 1));
+ if (tryValue(SplatBitsZ | Upper | Lower))
+ return true;
+
+ // Now try assuming that any undefined bits between the first and
+ // last defined set bits are set. This increases the chances of
+ // using a non-wraparound mask.
+ uint64_t Middle = SplatUndefZ & ~Upper & ~Lower;
+ return tryValue(SplatBitsZ | Middle);
+}
+
+SystemZVectorConstantInfo::SystemZVectorConstantInfo(APFloat FPImm) {
+ IntBits = FPImm.bitcastToAPInt().zextOrSelf(128);
+ isFP128 = (&FPImm.getSemantics() == &APFloat::IEEEquad());
+
+ // Find the smallest splat.
+ SplatBits = FPImm.bitcastToAPInt();
+ unsigned Width = SplatBits.getBitWidth();
+ while (Width > 8) {
+ unsigned HalfSize = Width / 2;
+ APInt HighValue = SplatBits.lshr(HalfSize).trunc(HalfSize);
+ APInt LowValue = SplatBits.trunc(HalfSize);
+
+ // If the two halves do not match, stop here.
+ if (HighValue != LowValue || 8 > HalfSize)
+ break;
+
+ SplatBits = HighValue;
+ Width = HalfSize;
+ }
+ SplatUndef = 0;
+ SplatBitSize = Width;
+}
+
+SystemZVectorConstantInfo::SystemZVectorConstantInfo(BuildVectorSDNode *BVN) {
+ assert(BVN->isConstant() && "Expected a constant BUILD_VECTOR");
+ bool HasAnyUndefs;
+
+ // Get IntBits by finding the 128 bit splat.
+ BVN->isConstantSplat(IntBits, SplatUndef, SplatBitSize, HasAnyUndefs, 128,
+ true);
+
+ // Get SplatBits by finding the 8 bit or greater splat.
+ BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, 8,
+ true);
+}
+
+bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
+ bool ForCodeSize) const {
// We can load zero using LZ?R and negative zero using LZ?R;LC?BR.
- return Imm.isZero() || Imm.isNegZero();
+ if (Imm.isZero() || Imm.isNegZero())
+ return true;
+
+ return SystemZVectorConstantInfo(Imm).isVectorConstantLegal(Subtarget);
}
bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
@@ -592,10 +779,8 @@ bool SystemZTargetLowering::isLegalAddImmediate(int64_t Imm) const {
return isUInt<32>(Imm) || isUInt<32>(-Imm);
}
-bool SystemZTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
- unsigned,
- unsigned,
- bool *Fast) const {
+bool SystemZTargetLowering::allowsMisalignedMemoryAccesses(
+ EVT VT, unsigned, unsigned, MachineMemOperand::Flags, bool *Fast) const {
// Unaligned accesses should never be slower than the expanded version.
// We check specifically for aligned accesses in the few cases where
// they are required.
@@ -1642,6 +1827,20 @@ static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
CCValid = SystemZ::CCMASK_ANY;
return true;
+ case Intrinsic::s390_vstrsb:
+ case Intrinsic::s390_vstrsh:
+ case Intrinsic::s390_vstrsf:
+ Opcode = SystemZISD::VSTRS_CC;
+ CCValid = SystemZ::CCMASK_ANY;
+ return true;
+
+ case Intrinsic::s390_vstrszb:
+ case Intrinsic::s390_vstrszh:
+ case Intrinsic::s390_vstrszf:
+ Opcode = SystemZISD::VSTRSZ_CC;
+ CCValid = SystemZ::CCMASK_ANY;
+ return true;
+
case Intrinsic::s390_vfcedbs:
case Intrinsic::s390_vfcesbs:
Opcode = SystemZISD::VFCMPES;
@@ -2511,9 +2710,8 @@ SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG,
break;
}
if (Invert) {
- SDValue Mask = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8,
- DAG.getConstant(65535, DL, MVT::i32));
- Mask = DAG.getNode(ISD::BITCAST, DL, VT, Mask);
+ SDValue Mask =
+ DAG.getSplatBuildVector(VT, DL, DAG.getConstant(-1, DL, MVT::i64));
Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask);
}
return Cmp;
@@ -3261,6 +3459,18 @@ SDValue SystemZTargetLowering::lowerXALUO(SDValue Op,
return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
}
+static bool isAddCarryChain(SDValue Carry) {
+ while (Carry.getOpcode() == ISD::ADDCARRY)
+ Carry = Carry.getOperand(2);
+ return Carry.getOpcode() == ISD::UADDO;
+}
+
+static bool isSubBorrowChain(SDValue Carry) {
+ while (Carry.getOpcode() == ISD::SUBCARRY)
+ Carry = Carry.getOperand(2);
+ return Carry.getOpcode() == ISD::USUBO;
+}
+
// Lower ADDCARRY/SUBCARRY nodes.
SDValue SystemZTargetLowering::lowerADDSUBCARRY(SDValue Op,
SelectionDAG &DAG) const {
@@ -3283,11 +3493,17 @@ SDValue SystemZTargetLowering::lowerADDSUBCARRY(SDValue Op,
switch (Op.getOpcode()) {
default: llvm_unreachable("Unknown instruction!");
case ISD::ADDCARRY:
+ if (!isAddCarryChain(Carry))
+ return SDValue();
+
BaseOp = SystemZISD::ADDCARRY;
CCValid = SystemZ::CCMASK_LOGICAL;
CCMask = SystemZ::CCMASK_LOGICAL_CARRY;
break;
case ISD::SUBCARRY:
+ if (!isSubBorrowChain(Carry))
+ return SDValue();
+
BaseOp = SystemZISD::SUBCARRY;
CCValid = SystemZ::CCMASK_LOGICAL;
CCMask = SystemZ::CCMASK_LOGICAL_BORROW;
@@ -3331,14 +3547,14 @@ SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
break;
}
case 32: {
- SDValue Tmp = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8,
- DAG.getConstant(0, DL, MVT::i32));
+ SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
+ DAG.getConstant(0, DL, MVT::i32));
Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
break;
}
case 64: {
- SDValue Tmp = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8,
- DAG.getConstant(0, DL, MVT::i32));
+ SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
+ DAG.getConstant(0, DL, MVT::i32));
Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp);
Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
break;
@@ -3602,6 +3818,27 @@ SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op,
return SDValue();
}
+MachineMemOperand::Flags
+SystemZTargetLowering::getMMOFlags(const Instruction &I) const {
+ // Because of how we convert atomic_load and atomic_store to normal loads and
+ // stores in the DAG, we need to ensure that the MMOs are marked volatile
+ // since DAGCombine hasn't been updated to account for atomic, but non
+ // volatile loads. (See D57601)
+ if (auto *SI = dyn_cast<StoreInst>(&I))
+ if (SI->isAtomic())
+ return MachineMemOperand::MOVolatile;
+ if (auto *LI = dyn_cast<LoadInst>(&I))
+ if (LI->isAtomic())
+ return MachineMemOperand::MOVolatile;
+ if (auto *AI = dyn_cast<AtomicRMWInst>(&I))
+ if (AI->isAtomic())
+ return MachineMemOperand::MOVolatile;
+ if (auto *AI = dyn_cast<AtomicCmpXchgInst>(&I))
+ if (AI->isAtomic())
+ return MachineMemOperand::MOVolatile;
+ return MachineMemOperand::MONone;
+}
+
SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
@@ -4260,78 +4497,6 @@ static SDValue joinDwords(SelectionDAG &DAG, const SDLoc &DL, SDValue Op0,
return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1);
}
-// Try to represent constant BUILD_VECTOR node BVN using a
-// SystemZISD::BYTE_MASK-style mask. Store the mask value in Mask
-// on success.
-static bool tryBuildVectorByteMask(BuildVectorSDNode *BVN, uint64_t &Mask) {
- EVT ElemVT = BVN->getValueType(0).getVectorElementType();
- unsigned BytesPerElement = ElemVT.getStoreSize();
- for (unsigned I = 0, E = BVN->getNumOperands(); I != E; ++I) {
- SDValue Op = BVN->getOperand(I);
- if (!Op.isUndef()) {
- uint64_t Value;
- if (Op.getOpcode() == ISD::Constant)
- Value = cast<ConstantSDNode>(Op)->getZExtValue();
- else if (Op.getOpcode() == ISD::ConstantFP)
- Value = (cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt()
- .getZExtValue());
- else
- return false;
- for (unsigned J = 0; J < BytesPerElement; ++J) {
- uint64_t Byte = (Value >> (J * 8)) & 0xff;
- if (Byte == 0xff)
- Mask |= 1ULL << ((E - I - 1) * BytesPerElement + J);
- else if (Byte != 0)
- return false;
- }
- }
- }
- return true;
-}
-
-// Try to load a vector constant in which BitsPerElement-bit value Value
-// is replicated to fill the vector. VT is the type of the resulting
-// constant, which may have elements of a different size from BitsPerElement.
-// Return the SDValue of the constant on success, otherwise return
-// an empty value.
-static SDValue tryBuildVectorReplicate(SelectionDAG &DAG,
- const SystemZInstrInfo *TII,
- const SDLoc &DL, EVT VT, uint64_t Value,
- unsigned BitsPerElement) {
- // Signed 16-bit values can be replicated using VREPI.
- // Mark the constants as opaque or DAGCombiner will convert back to
- // BUILD_VECTOR.
- int64_t SignedValue = SignExtend64(Value, BitsPerElement);
- if (isInt<16>(SignedValue)) {
- MVT VecVT = MVT::getVectorVT(MVT::getIntegerVT(BitsPerElement),
- SystemZ::VectorBits / BitsPerElement);
- SDValue Op = DAG.getNode(
- SystemZISD::REPLICATE, DL, VecVT,
- DAG.getConstant(SignedValue, DL, MVT::i32, false, true /*isOpaque*/));
- return DAG.getNode(ISD::BITCAST, DL, VT, Op);
- }
- // See whether rotating the constant left some N places gives a value that
- // is one less than a power of 2 (i.e. all zeros followed by all ones).
- // If so we can use VGM.
- unsigned Start, End;
- if (TII->isRxSBGMask(Value, BitsPerElement, Start, End)) {
- // isRxSBGMask returns the bit numbers for a full 64-bit value,
- // with 0 denoting 1 << 63 and 63 denoting 1. Convert them to
- // bit numbers for an BitsPerElement value, so that 0 denotes
- // 1 << (BitsPerElement-1).
- Start -= 64 - BitsPerElement;
- End -= 64 - BitsPerElement;
- MVT VecVT = MVT::getVectorVT(MVT::getIntegerVT(BitsPerElement),
- SystemZ::VectorBits / BitsPerElement);
- SDValue Op = DAG.getNode(
- SystemZISD::ROTATE_MASK, DL, VecVT,
- DAG.getConstant(Start, DL, MVT::i32, false, true /*isOpaque*/),
- DAG.getConstant(End, DL, MVT::i32, false, true /*isOpaque*/));
- return DAG.getNode(ISD::BITCAST, DL, VT, Op);
- }
- return SDValue();
-}
-
// If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually
// better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for
// the non-EXTRACT_VECTOR_ELT elements. See if the given BUILD_VECTOR
@@ -4385,9 +4550,18 @@ static SDValue tryBuildVectorShuffle(SelectionDAG &DAG,
return GS.getNode(DAG, SDLoc(BVN));
}
+bool SystemZTargetLowering::isVectorElementLoad(SDValue Op) const {
+ if (Op.getOpcode() == ISD::LOAD && cast<LoadSDNode>(Op)->isUnindexed())
+ return true;
+ if (Subtarget.hasVectorEnhancements2() && Op.getOpcode() == SystemZISD::LRV)
+ return true;
+ return false;
+}
+
// Combine GPR scalar values Elems into a vector of type VT.
-static SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
- SmallVectorImpl<SDValue> &Elems) {
+SDValue
+SystemZTargetLowering::buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
+ SmallVectorImpl<SDValue> &Elems) const {
// See whether there is a single replicated value.
SDValue Single;
unsigned int NumElements = Elems.size();
@@ -4416,13 +4590,13 @@ static SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
// we would need 2 instructions to replicate it: VLVGP followed by VREPx.
// This is only a win if the single defined element is used more than once.
// In other cases we're better off using a single VLVGx.
- if (Single.getNode() && (Count > 1 || Single.getOpcode() == ISD::LOAD))
+ if (Single.getNode() && (Count > 1 || isVectorElementLoad(Single)))
return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single);
// If all elements are loads, use VLREP/VLEs (below).
bool AllLoads = true;
for (auto Elem : Elems)
- if (Elem.getOpcode() != ISD::LOAD || cast<LoadSDNode>(Elem)->isIndexed()) {
+ if (!isVectorElementLoad(Elem)) {
AllLoads = false;
break;
}
@@ -4494,8 +4668,7 @@ static SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
std::map<const SDNode*, unsigned> UseCounts;
SDNode *LoadMaxUses = nullptr;
for (unsigned I = 0; I < NumElements; ++I)
- if (Elems[I].getOpcode() == ISD::LOAD &&
- cast<LoadSDNode>(Elems[I])->isUnindexed()) {
+ if (isVectorElementLoad(Elems[I])) {
SDNode *Ld = Elems[I].getNode();
UseCounts[Ld]++;
if (LoadMaxUses == nullptr || UseCounts[LoadMaxUses] < UseCounts[Ld])
@@ -4532,56 +4705,13 @@ static SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op,
SelectionDAG &DAG) const {
- const SystemZInstrInfo *TII =
- static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
auto *BVN = cast<BuildVectorSDNode>(Op.getNode());
SDLoc DL(Op);
EVT VT = Op.getValueType();
if (BVN->isConstant()) {
- // Try using VECTOR GENERATE BYTE MASK. This is the architecturally-
- // preferred way of creating all-zero and all-one vectors so give it
- // priority over other methods below.
- uint64_t Mask = 0;
- if (tryBuildVectorByteMask(BVN, Mask)) {
- SDValue Op = DAG.getNode(
- SystemZISD::BYTE_MASK, DL, MVT::v16i8,
- DAG.getConstant(Mask, DL, MVT::i32, false, true /*isOpaque*/));
- return DAG.getNode(ISD::BITCAST, DL, VT, Op);
- }
-
- // Try using some form of replication.
- APInt SplatBits, SplatUndef;
- unsigned SplatBitSize;
- bool HasAnyUndefs;
- if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
- 8, true) &&
- SplatBitSize <= 64) {
- // First try assuming that any undefined bits above the highest set bit
- // and below the lowest set bit are 1s. This increases the likelihood of
- // being able to use a sign-extended element value in VECTOR REPLICATE
- // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK.
- uint64_t SplatBitsZ = SplatBits.getZExtValue();
- uint64_t SplatUndefZ = SplatUndef.getZExtValue();
- uint64_t Lower = (SplatUndefZ
- & ((uint64_t(1) << findFirstSet(SplatBitsZ)) - 1));
- uint64_t Upper = (SplatUndefZ
- & ~((uint64_t(1) << findLastSet(SplatBitsZ)) - 1));
- uint64_t Value = SplatBitsZ | Upper | Lower;
- SDValue Op = tryBuildVectorReplicate(DAG, TII, DL, VT, Value,
- SplatBitSize);
- if (Op.getNode())
- return Op;
-
- // Now try assuming that any undefined bits between the first and
- // last defined set bits are set. This increases the chances of
- // using a non-wraparound mask.
- uint64_t Middle = SplatUndefZ & ~Upper & ~Lower;
- Value = SplatBitsZ | Middle;
- Op = tryBuildVectorReplicate(DAG, TII, DL, VT, Value, SplatBitSize);
- if (Op.getNode())
- return Op;
- }
+ if (SystemZVectorConstantInfo(BVN).isVectorConstantLegal(Subtarget))
+ return Op;
// Fall back to loading it from memory.
return SDValue();
@@ -5074,6 +5204,8 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
OPCODE(VISTR_CC);
OPCODE(VSTRC_CC);
OPCODE(VSTRCZ_CC);
+ OPCODE(VSTRS_CC);
+ OPCODE(VSTRSZ_CC);
OPCODE(TDC);
OPCODE(ATOMIC_SWAPW);
OPCODE(ATOMIC_LOADW_ADD);
@@ -5093,6 +5225,8 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
OPCODE(ATOMIC_CMP_SWAP_128);
OPCODE(LRV);
OPCODE(STRV);
+ OPCODE(VLER);
+ OPCODE(VSTER);
OPCODE(PREFETCH);
}
return nullptr;
@@ -5340,8 +5474,7 @@ SDValue SystemZTargetLowering::combineMERGE(
SDValue Op1 = N->getOperand(1);
if (Op0.getOpcode() == ISD::BITCAST)
Op0 = Op0.getOperand(0);
- if (Op0.getOpcode() == SystemZISD::BYTE_MASK &&
- cast<ConstantSDNode>(Op0.getOperand(0))->getZExtValue() == 0) {
+ if (ISD::isBuildVectorAllZeros(Op0.getNode())) {
// (z_merge_* 0, 0) -> 0. This is mostly useful for using VLLEZF
// for v4f32.
if (Op1 == N->getOperand(0))
@@ -5407,6 +5540,31 @@ SDValue SystemZTargetLowering::combineLOAD(
return SDValue(N, 0);
}
+bool SystemZTargetLowering::canLoadStoreByteSwapped(EVT VT) const {
+ if (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64)
+ return true;
+ if (Subtarget.hasVectorEnhancements2())
+ if (VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v2i64)
+ return true;
+ return false;
+}
+
+static bool isVectorElementSwap(ArrayRef<int> M, EVT VT) {
+ if (!VT.isVector() || !VT.isSimple() ||
+ VT.getSizeInBits() != 128 ||
+ VT.getScalarSizeInBits() % 8 != 0)
+ return false;
+
+ unsigned NumElts = VT.getVectorNumElements();
+ for (unsigned i = 0; i < NumElts; ++i) {
+ if (M[i] < 0) continue; // ignore UNDEF indices
+ if ((unsigned) M[i] != NumElts - 1 - i)
+ return false;
+ }
+
+ return true;
+}
+
SDValue SystemZTargetLowering::combineSTORE(
SDNode *N, DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -5428,13 +5586,11 @@ SDValue SystemZTargetLowering::combineSTORE(
SN->getMemOperand());
}
}
- // Combine STORE (BSWAP) into STRVH/STRV/STRVG
+ // Combine STORE (BSWAP) into STRVH/STRV/STRVG/VSTBR
if (!SN->isTruncatingStore() &&
Op1.getOpcode() == ISD::BSWAP &&
Op1.getNode()->hasOneUse() &&
- (Op1.getValueType() == MVT::i16 ||
- Op1.getValueType() == MVT::i32 ||
- Op1.getValueType() == MVT::i64)) {
+ canLoadStoreByteSwapped(Op1.getValueType())) {
SDValue BSwapOp = Op1.getOperand(0);
@@ -5449,15 +5605,97 @@ SDValue SystemZTargetLowering::combineSTORE(
DAG.getMemIntrinsicNode(SystemZISD::STRV, SDLoc(N), DAG.getVTList(MVT::Other),
Ops, MemVT, SN->getMemOperand());
}
+ // Combine STORE (element-swap) into VSTER
+ if (!SN->isTruncatingStore() &&
+ Op1.getOpcode() == ISD::VECTOR_SHUFFLE &&
+ Op1.getNode()->hasOneUse() &&
+ Subtarget.hasVectorEnhancements2()) {
+ ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op1.getNode());
+ ArrayRef<int> ShuffleMask = SVN->getMask();
+ if (isVectorElementSwap(ShuffleMask, Op1.getValueType())) {
+ SDValue Ops[] = {
+ N->getOperand(0), Op1.getOperand(0), N->getOperand(2)
+ };
+
+ return DAG.getMemIntrinsicNode(SystemZISD::VSTER, SDLoc(N),
+ DAG.getVTList(MVT::Other),
+ Ops, MemVT, SN->getMemOperand());
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue SystemZTargetLowering::combineVECTOR_SHUFFLE(
+ SDNode *N, DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ // Combine element-swap (LOAD) into VLER
+ if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
+ N->getOperand(0).hasOneUse() &&
+ Subtarget.hasVectorEnhancements2()) {
+ ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
+ ArrayRef<int> ShuffleMask = SVN->getMask();
+ if (isVectorElementSwap(ShuffleMask, N->getValueType(0))) {
+ SDValue Load = N->getOperand(0);
+ LoadSDNode *LD = cast<LoadSDNode>(Load);
+
+ // Create the element-swapping load.
+ SDValue Ops[] = {
+ LD->getChain(), // Chain
+ LD->getBasePtr() // Ptr
+ };
+ SDValue ESLoad =
+ DAG.getMemIntrinsicNode(SystemZISD::VLER, SDLoc(N),
+ DAG.getVTList(LD->getValueType(0), MVT::Other),
+ Ops, LD->getMemoryVT(), LD->getMemOperand());
+
+ // First, combine the VECTOR_SHUFFLE away. This makes the value produced
+ // by the load dead.
+ DCI.CombineTo(N, ESLoad);
+
+ // Next, combine the load away, we give it a bogus result value but a real
+ // chain result. The result value is dead because the shuffle is dead.
+ DCI.CombineTo(Load.getNode(), ESLoad, ESLoad.getValue(1));
+
+ // Return N so it doesn't get rechecked!
+ return SDValue(N, 0);
+ }
+ }
+
return SDValue();
}
SDValue SystemZTargetLowering::combineEXTRACT_VECTOR_ELT(
SDNode *N, DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
if (!Subtarget.hasVector())
return SDValue();
+ // Look through bitcasts that retain the number of vector elements.
+ SDValue Op = N->getOperand(0);
+ if (Op.getOpcode() == ISD::BITCAST &&
+ Op.getValueType().isVector() &&
+ Op.getOperand(0).getValueType().isVector() &&
+ Op.getValueType().getVectorNumElements() ==
+ Op.getOperand(0).getValueType().getVectorNumElements())
+ Op = Op.getOperand(0);
+
+ // Pull BSWAP out of a vector extraction.
+ if (Op.getOpcode() == ISD::BSWAP && Op.hasOneUse()) {
+ EVT VecVT = Op.getValueType();
+ EVT EltVT = VecVT.getVectorElementType();
+ Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), EltVT,
+ Op.getOperand(0), N->getOperand(1));
+ DCI.AddToWorklist(Op.getNode());
+ Op = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Op);
+ if (EltVT != N->getValueType(0)) {
+ DCI.AddToWorklist(Op.getNode());
+ Op = DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op);
+ }
+ return Op;
+ }
+
// Try to simplify a vector extraction.
if (auto *IndexN = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
SDValue Op0 = N->getOperand(0);
@@ -5480,6 +5718,10 @@ SDValue SystemZTargetLowering::combineJOIN_DWORDS(
SDValue SystemZTargetLowering::combineFP_ROUND(
SDNode *N, DAGCombinerInfo &DCI) const {
+
+ if (!Subtarget.hasVector())
+ return SDValue();
+
// (fpround (extract_vector_elt X 0))
// (fpround (extract_vector_elt X 1)) ->
// (extract_vector_elt (VROUND X) 0)
@@ -5527,6 +5769,10 @@ SDValue SystemZTargetLowering::combineFP_ROUND(
SDValue SystemZTargetLowering::combineFP_EXTEND(
SDNode *N, DAGCombinerInfo &DCI) const {
+
+ if (!Subtarget.hasVector())
+ return SDValue();
+
// (fpextend (extract_vector_elt X 0))
// (fpextend (extract_vector_elt X 2)) ->
// (extract_vector_elt (VEXTEND X) 0)
@@ -5575,11 +5821,10 @@ SDValue SystemZTargetLowering::combineFP_EXTEND(
SDValue SystemZTargetLowering::combineBSWAP(
SDNode *N, DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
- // Combine BSWAP (LOAD) into LRVH/LRV/LRVG
+ // Combine BSWAP (LOAD) into LRVH/LRV/LRVG/VLBR
if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
N->getOperand(0).hasOneUse() &&
- (N->getValueType(0) == MVT::i16 || N->getValueType(0) == MVT::i32 ||
- N->getValueType(0) == MVT::i64)) {
+ canLoadStoreByteSwapped(N->getValueType(0))) {
SDValue Load = N->getOperand(0);
LoadSDNode *LD = cast<LoadSDNode>(Load);
@@ -5612,61 +5857,170 @@ SDValue SystemZTargetLowering::combineBSWAP(
// Return N so it doesn't get rechecked!
return SDValue(N, 0);
}
+
+ // Look through bitcasts that retain the number of vector elements.
+ SDValue Op = N->getOperand(0);
+ if (Op.getOpcode() == ISD::BITCAST &&
+ Op.getValueType().isVector() &&
+ Op.getOperand(0).getValueType().isVector() &&
+ Op.getValueType().getVectorNumElements() ==
+ Op.getOperand(0).getValueType().getVectorNumElements())
+ Op = Op.getOperand(0);
+
+ // Push BSWAP into a vector insertion if at least one side then simplifies.
+ if (Op.getOpcode() == ISD::INSERT_VECTOR_ELT && Op.hasOneUse()) {
+ SDValue Vec = Op.getOperand(0);
+ SDValue Elt = Op.getOperand(1);
+ SDValue Idx = Op.getOperand(2);
+
+ if (DAG.isConstantIntBuildVectorOrConstantInt(Vec) ||
+ Vec.getOpcode() == ISD::BSWAP || Vec.isUndef() ||
+ DAG.isConstantIntBuildVectorOrConstantInt(Elt) ||
+ Elt.getOpcode() == ISD::BSWAP || Elt.isUndef() ||
+ (canLoadStoreByteSwapped(N->getValueType(0)) &&
+ ISD::isNON_EXTLoad(Elt.getNode()) && Elt.hasOneUse())) {
+ EVT VecVT = N->getValueType(0);
+ EVT EltVT = N->getValueType(0).getVectorElementType();
+ if (VecVT != Vec.getValueType()) {
+ Vec = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Vec);
+ DCI.AddToWorklist(Vec.getNode());
+ }
+ if (EltVT != Elt.getValueType()) {
+ Elt = DAG.getNode(ISD::BITCAST, SDLoc(N), EltVT, Elt);
+ DCI.AddToWorklist(Elt.getNode());
+ }
+ Vec = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Vec);
+ DCI.AddToWorklist(Vec.getNode());
+ Elt = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Elt);
+ DCI.AddToWorklist(Elt.getNode());
+ return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VecVT,
+ Vec, Elt, Idx);
+ }
+ }
+
+ // Push BSWAP into a vector shuffle if at least one side then simplifies.
+ ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(Op);
+ if (SV && Op.hasOneUse()) {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+
+ if (DAG.isConstantIntBuildVectorOrConstantInt(Op0) ||
+ Op0.getOpcode() == ISD::BSWAP || Op0.isUndef() ||
+ DAG.isConstantIntBuildVectorOrConstantInt(Op1) ||
+ Op1.getOpcode() == ISD::BSWAP || Op1.isUndef()) {
+ EVT VecVT = N->getValueType(0);
+ if (VecVT != Op0.getValueType()) {
+ Op0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op0);
+ DCI.AddToWorklist(Op0.getNode());
+ }
+ if (VecVT != Op1.getValueType()) {
+ Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op1);
+ DCI.AddToWorklist(Op1.getNode());
+ }
+ Op0 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op0);
+ DCI.AddToWorklist(Op0.getNode());
+ Op1 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op1);
+ DCI.AddToWorklist(Op1.getNode());
+ return DAG.getVectorShuffle(VecVT, SDLoc(N), Op0, Op1, SV->getMask());
+ }
+ }
+
return SDValue();
}
static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) {
// We have a SELECT_CCMASK or BR_CCMASK comparing the condition code
// set by the CCReg instruction using the CCValid / CCMask masks,
- // If the CCReg instruction is itself a (ICMP (SELECT_CCMASK)) testing
- // the condition code set by some other instruction, see whether we
- // can directly use that condition code.
- bool Invert = false;
+ // If the CCReg instruction is itself a ICMP testing the condition
+ // code set by some other instruction, see whether we can directly
+ // use that condition code.
- // Verify that we have an appropriate mask for a EQ or NE comparison.
+ // Verify that we have an ICMP against some constant.
if (CCValid != SystemZ::CCMASK_ICMP)
return false;
- if (CCMask == SystemZ::CCMASK_CMP_NE)
- Invert = !Invert;
- else if (CCMask != SystemZ::CCMASK_CMP_EQ)
- return false;
-
- // Verify that we have an ICMP that is the user of a SELECT_CCMASK.
- SDNode *ICmp = CCReg.getNode();
+ auto *ICmp = CCReg.getNode();
if (ICmp->getOpcode() != SystemZISD::ICMP)
return false;
- SDNode *Select = ICmp->getOperand(0).getNode();
- if (Select->getOpcode() != SystemZISD::SELECT_CCMASK)
+ auto *CompareLHS = ICmp->getOperand(0).getNode();
+ auto *CompareRHS = dyn_cast<ConstantSDNode>(ICmp->getOperand(1));
+ if (!CompareRHS)
return false;
- // Verify that the ICMP compares against one of select values.
- auto *CompareVal = dyn_cast<ConstantSDNode>(ICmp->getOperand(1));
- if (!CompareVal)
- return false;
- auto *TrueVal = dyn_cast<ConstantSDNode>(Select->getOperand(0));
- if (!TrueVal)
- return false;
- auto *FalseVal = dyn_cast<ConstantSDNode>(Select->getOperand(1));
- if (!FalseVal)
- return false;
- if (CompareVal->getZExtValue() == FalseVal->getZExtValue())
- Invert = !Invert;
- else if (CompareVal->getZExtValue() != TrueVal->getZExtValue())
- return false;
+ // Optimize the case where CompareLHS is a SELECT_CCMASK.
+ if (CompareLHS->getOpcode() == SystemZISD::SELECT_CCMASK) {
+ // Verify that we have an appropriate mask for a EQ or NE comparison.
+ bool Invert = false;
+ if (CCMask == SystemZ::CCMASK_CMP_NE)
+ Invert = !Invert;
+ else if (CCMask != SystemZ::CCMASK_CMP_EQ)
+ return false;
- // Compute the effective CC mask for the new branch or select.
- auto *NewCCValid = dyn_cast<ConstantSDNode>(Select->getOperand(2));
- auto *NewCCMask = dyn_cast<ConstantSDNode>(Select->getOperand(3));
- if (!NewCCValid || !NewCCMask)
- return false;
- CCValid = NewCCValid->getZExtValue();
- CCMask = NewCCMask->getZExtValue();
- if (Invert)
- CCMask ^= CCValid;
+ // Verify that the ICMP compares against one of select values.
+ auto *TrueVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(0));
+ if (!TrueVal)
+ return false;
+ auto *FalseVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(1));
+ if (!FalseVal)
+ return false;
+ if (CompareRHS->getZExtValue() == FalseVal->getZExtValue())
+ Invert = !Invert;
+ else if (CompareRHS->getZExtValue() != TrueVal->getZExtValue())
+ return false;
- // Return the updated CCReg link.
- CCReg = Select->getOperand(4);
- return true;
+ // Compute the effective CC mask for the new branch or select.
+ auto *NewCCValid = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(2));
+ auto *NewCCMask = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(3));
+ if (!NewCCValid || !NewCCMask)
+ return false;
+ CCValid = NewCCValid->getZExtValue();
+ CCMask = NewCCMask->getZExtValue();
+ if (Invert)
+ CCMask ^= CCValid;
+
+ // Return the updated CCReg link.
+ CCReg = CompareLHS->getOperand(4);
+ return true;
+ }
+
+ // Optimize the case where CompareRHS is (SRA (SHL (IPM))).
+ if (CompareLHS->getOpcode() == ISD::SRA) {
+ auto *SRACount = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(1));
+ if (!SRACount || SRACount->getZExtValue() != 30)
+ return false;
+ auto *SHL = CompareLHS->getOperand(0).getNode();
+ if (SHL->getOpcode() != ISD::SHL)
+ return false;
+ auto *SHLCount = dyn_cast<ConstantSDNode>(SHL->getOperand(1));
+ if (!SHLCount || SHLCount->getZExtValue() != 30 - SystemZ::IPM_CC)
+ return false;
+ auto *IPM = SHL->getOperand(0).getNode();
+ if (IPM->getOpcode() != SystemZISD::IPM)
+ return false;
+
+ // Avoid introducing CC spills (because SRA would clobber CC).
+ if (!CompareLHS->hasOneUse())
+ return false;
+ // Verify that the ICMP compares against zero.
+ if (CompareRHS->getZExtValue() != 0)
+ return false;
+
+ // Compute the effective CC mask for the new branch or select.
+ switch (CCMask) {
+ case SystemZ::CCMASK_CMP_EQ: break;
+ case SystemZ::CCMASK_CMP_NE: break;
+ case SystemZ::CCMASK_CMP_LT: CCMask = SystemZ::CCMASK_CMP_GT; break;
+ case SystemZ::CCMASK_CMP_GT: CCMask = SystemZ::CCMASK_CMP_LT; break;
+ case SystemZ::CCMASK_CMP_LE: CCMask = SystemZ::CCMASK_CMP_GE; break;
+ case SystemZ::CCMASK_CMP_GE: CCMask = SystemZ::CCMASK_CMP_LE; break;
+ default: return false;
+ }
+
+ // Return the updated CCReg link.
+ CCReg = IPM->getOperand(0);
+ return true;
+ }
+
+ return false;
}
SDValue SystemZTargetLowering::combineBR_CCMASK(
@@ -5770,12 +6124,18 @@ SDValue SystemZTargetLowering::combineIntDIVREM(
// since it is not Legal but Custom it can only happen before
// legalization. Therefore we must scalarize this early before Combine
// 1. For widened vectors, this is already the result of type legalization.
- if (VT.isVector() && isTypeLegal(VT) &&
+ if (DCI.Level == BeforeLegalizeTypes && VT.isVector() && isTypeLegal(VT) &&
DAG.isConstantIntBuildVectorOrConstantInt(N->getOperand(1)))
return DAG.UnrollVectorOp(N);
return SDValue();
}
+SDValue SystemZTargetLowering::unwrapAddress(SDValue N) const {
+ if (N->getOpcode() == SystemZISD::PCREL_WRAPPER)
+ return N->getOperand(0);
+ return N;
+}
+
SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
switch(N->getOpcode()) {
@@ -5787,6 +6147,7 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
case SystemZISD::MERGE_LOW: return combineMERGE(N, DCI);
case ISD::LOAD: return combineLOAD(N, DCI);
case ISD::STORE: return combineSTORE(N, DCI);
+ case ISD::VECTOR_SHUFFLE: return combineVECTOR_SHUFFLE(N, DCI);
case ISD::EXTRACT_VECTOR_ELT: return combineEXTRACT_VECTOR_ELT(N, DCI);
case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI);
case ISD::FP_ROUND: return combineFP_ROUND(N, DCI);
@@ -5977,12 +6338,10 @@ SystemZTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
case Intrinsic::s390_vuplhw:
case Intrinsic::s390_vuplf: {
SDValue SrcOp = Op.getOperand(1);
- unsigned SrcBitWidth = SrcOp.getScalarValueSizeInBits();
APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 0);
Known = DAG.computeKnownBits(SrcOp, SrcDemE, Depth + 1);
if (IsLogical) {
- Known = Known.zext(BitWidth);
- Known.Zero.setBitsFrom(SrcBitWidth);
+ Known = Known.zext(BitWidth, true);
} else
Known = Known.sext(BitWidth);
break;
@@ -6011,7 +6370,7 @@ SystemZTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
// Known has the width of the source operand(s). Adjust if needed to match
// the passed bitwidth.
if (Known.getBitWidth() != BitWidth)
- Known = Known.zextOrTrunc(BitWidth);
+ Known = Known.zextOrTrunc(BitWidth, false);
}
static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts,
@@ -6125,7 +6484,7 @@ static MachineBasicBlock *splitBlockBefore(MachineBasicBlock::iterator MI,
}
// Force base value Base into a register before MI. Return the register.
-static unsigned forceReg(MachineInstr &MI, MachineOperand &Base,
+static Register forceReg(MachineInstr &MI, MachineOperand &Base,
const SystemZInstrInfo *TII) {
if (Base.isReg())
return Base.getReg();
@@ -6134,7 +6493,7 @@ static unsigned forceReg(MachineInstr &MI, MachineOperand &Base,
MachineFunction &MF = *MBB->getParent();
MachineRegisterInfo &MRI = MF.getRegInfo();
- unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
+ Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LA), Reg)
.add(Base)
.addImm(0)
@@ -6213,7 +6572,8 @@ static void createPHIsForSelects(MachineBasicBlock::iterator MIItBegin,
// destination registers, and the registers that went into the PHI.
DenseMap<unsigned, std::pair<unsigned, unsigned>> RegRewriteTable;
- for (MachineBasicBlock::iterator MIIt = MIItBegin; MIIt != MIItEnd; ++MIIt) {
+ for (MachineBasicBlock::iterator MIIt = MIItBegin; MIIt != MIItEnd;
+ MIIt = skipDebugInstructionsForward(++MIIt, MIItEnd)) {
unsigned DestReg = MIIt->getOperand(0).getReg();
unsigned TrueReg = MIIt->getOperand(1).getReg();
unsigned FalseReg = MIIt->getOperand(2).getReg();
@@ -6237,6 +6597,8 @@ static void createPHIsForSelects(MachineBasicBlock::iterator MIItBegin,
// Add this PHI to the rewrite table.
RegRewriteTable[DestReg] = std::make_pair(TrueReg, FalseReg);
}
+
+ MF->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
}
// Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI.
@@ -6254,8 +6616,8 @@ SystemZTargetLowering::emitSelect(MachineInstr &MI,
// same condition code value, we want to expand all of them into
// a single pair of basic blocks using the same condition.
MachineInstr *LastMI = &MI;
- MachineBasicBlock::iterator NextMIIt =
- std::next(MachineBasicBlock::iterator(MI));
+ MachineBasicBlock::iterator NextMIIt = skipDebugInstructionsForward(
+ std::next(MachineBasicBlock::iterator(MI)), MBB->end());
if (isSelectPseudo(MI))
while (NextMIIt != MBB->end() && isSelectPseudo(*NextMIIt) &&
@@ -6263,7 +6625,7 @@ SystemZTargetLowering::emitSelect(MachineInstr &MI,
(NextMIIt->getOperand(4).getImm() == CCMask ||
NextMIIt->getOperand(4).getImm() == (CCValid ^ CCMask))) {
LastMI = &*NextMIIt;
- ++NextMIIt;
+ NextMIIt = skipDebugInstructionsForward(++NextMIIt, MBB->end());
}
MachineBasicBlock *StartMBB = MBB;
@@ -6296,8 +6658,8 @@ SystemZTargetLowering::emitSelect(MachineInstr &MI,
// ...
MBB = JoinMBB;
MachineBasicBlock::iterator MIItBegin = MachineBasicBlock::iterator(MI);
- MachineBasicBlock::iterator MIItEnd =
- std::next(MachineBasicBlock::iterator(LastMI));
+ MachineBasicBlock::iterator MIItEnd = skipDebugInstructionsForward(
+ std::next(MachineBasicBlock::iterator(LastMI)), MBB->end());
createPHIsForSelects(MIItBegin, MIItEnd, StartMBB, FalseMBB, MBB);
StartMBB->erase(MIItBegin, MIItEnd);
@@ -6415,8 +6777,8 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary(
MachineOperand Base = earlyUseOperand(MI.getOperand(1));
int64_t Disp = MI.getOperand(2).getImm();
MachineOperand Src2 = earlyUseOperand(MI.getOperand(3));
- unsigned BitShift = (IsSubWord ? MI.getOperand(4).getReg() : 0);
- unsigned NegBitShift = (IsSubWord ? MI.getOperand(5).getReg() : 0);
+ Register BitShift = IsSubWord ? MI.getOperand(4).getReg() : Register();
+ Register NegBitShift = IsSubWord ? MI.getOperand(5).getReg() : Register();
DebugLoc DL = MI.getDebugLoc();
if (IsSubWord)
BitSize = MI.getOperand(6).getImm();
@@ -6434,12 +6796,12 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary(
assert(LOpcode && CSOpcode && "Displacement out of range");
// Create virtual registers for temporary results.
- unsigned OrigVal = MRI.createVirtualRegister(RC);
- unsigned OldVal = MRI.createVirtualRegister(RC);
- unsigned NewVal = (BinOpcode || IsSubWord ?
+ Register OrigVal = MRI.createVirtualRegister(RC);
+ Register OldVal = MRI.createVirtualRegister(RC);
+ Register NewVal = (BinOpcode || IsSubWord ?
MRI.createVirtualRegister(RC) : Src2.getReg());
- unsigned RotatedOldVal = (IsSubWord ? MRI.createVirtualRegister(RC) : OldVal);
- unsigned RotatedNewVal = (IsSubWord ? MRI.createVirtualRegister(RC) : NewVal);
+ Register RotatedOldVal = (IsSubWord ? MRI.createVirtualRegister(RC) : OldVal);
+ Register RotatedNewVal = (IsSubWord ? MRI.createVirtualRegister(RC) : NewVal);
// Insert a basic block for the main loop.
MachineBasicBlock *StartMBB = MBB;
@@ -6532,9 +6894,9 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax(
unsigned Dest = MI.getOperand(0).getReg();
MachineOperand Base = earlyUseOperand(MI.getOperand(1));
int64_t Disp = MI.getOperand(2).getImm();
- unsigned Src2 = MI.getOperand(3).getReg();
- unsigned BitShift = (IsSubWord ? MI.getOperand(4).getReg() : 0);
- unsigned NegBitShift = (IsSubWord ? MI.getOperand(5).getReg() : 0);
+ Register Src2 = MI.getOperand(3).getReg();
+ Register BitShift = (IsSubWord ? MI.getOperand(4).getReg() : Register());
+ Register NegBitShift = (IsSubWord ? MI.getOperand(5).getReg() : Register());
DebugLoc DL = MI.getDebugLoc();
if (IsSubWord)
BitSize = MI.getOperand(6).getImm();
@@ -6552,12 +6914,12 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax(
assert(LOpcode && CSOpcode && "Displacement out of range");
// Create virtual registers for temporary results.
- unsigned OrigVal = MRI.createVirtualRegister(RC);
- unsigned OldVal = MRI.createVirtualRegister(RC);
- unsigned NewVal = MRI.createVirtualRegister(RC);
- unsigned RotatedOldVal = (IsSubWord ? MRI.createVirtualRegister(RC) : OldVal);
- unsigned RotatedAltVal = (IsSubWord ? MRI.createVirtualRegister(RC) : Src2);
- unsigned RotatedNewVal = (IsSubWord ? MRI.createVirtualRegister(RC) : NewVal);
+ Register OrigVal = MRI.createVirtualRegister(RC);
+ Register OldVal = MRI.createVirtualRegister(RC);
+ Register NewVal = MRI.createVirtualRegister(RC);
+ Register RotatedOldVal = (IsSubWord ? MRI.createVirtualRegister(RC) : OldVal);
+ Register RotatedAltVal = (IsSubWord ? MRI.createVirtualRegister(RC) : Src2);
+ Register RotatedNewVal = (IsSubWord ? MRI.createVirtualRegister(RC) : NewVal);
// Insert 3 basic blocks for the loop.
MachineBasicBlock *StartMBB = MBB;
@@ -6840,22 +7202,22 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
if (MI.getNumExplicitOperands() > 5) {
bool HaveSingleBase = DestBase.isIdenticalTo(SrcBase);
- uint64_t StartCountReg = MI.getOperand(5).getReg();
- uint64_t StartSrcReg = forceReg(MI, SrcBase, TII);
- uint64_t StartDestReg = (HaveSingleBase ? StartSrcReg :
+ Register StartCountReg = MI.getOperand(5).getReg();
+ Register StartSrcReg = forceReg(MI, SrcBase, TII);
+ Register StartDestReg = (HaveSingleBase ? StartSrcReg :
forceReg(MI, DestBase, TII));
const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass;
- uint64_t ThisSrcReg = MRI.createVirtualRegister(RC);
- uint64_t ThisDestReg = (HaveSingleBase ? ThisSrcReg :
+ Register ThisSrcReg = MRI.createVirtualRegister(RC);
+ Register ThisDestReg = (HaveSingleBase ? ThisSrcReg :
MRI.createVirtualRegister(RC));
- uint64_t NextSrcReg = MRI.createVirtualRegister(RC);
- uint64_t NextDestReg = (HaveSingleBase ? NextSrcReg :
+ Register NextSrcReg = MRI.createVirtualRegister(RC);
+ Register NextDestReg = (HaveSingleBase ? NextSrcReg :
MRI.createVirtualRegister(RC));
RC = &SystemZ::GR64BitRegClass;
- uint64_t ThisCountReg = MRI.createVirtualRegister(RC);
- uint64_t NextCountReg = MRI.createVirtualRegister(RC);
+ Register ThisCountReg = MRI.createVirtualRegister(RC);
+ Register NextCountReg = MRI.createVirtualRegister(RC);
MachineBasicBlock *StartMBB = MBB;
MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h
index 622da32e418d..23cdcc72bc42 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/lib/Target/SystemZ/SystemZISelLowering.h
@@ -1,9 +1,8 @@
//===-- SystemZISelLowering.h - SystemZ DAG lowering interface --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -16,6 +15,7 @@
#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZISELLOWERING_H
#include "SystemZ.h"
+#include "SystemZInstrInfo.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -281,6 +281,8 @@ enum NodeType : unsigned {
VISTR_CC,
VSTRC_CC,
VSTRCZ_CC,
+ VSTRS_CC,
+ VSTRSZ_CC,
// Test Data Class.
//
@@ -340,6 +342,9 @@ enum NodeType : unsigned {
// Byte swapping load/store. Same operands as regular load/store.
LRV, STRV,
+ // Element swapping load/store. Same operands as regular load/store.
+ VLER, VSTER,
+
// Prefetch from the second operand using the 4-bit control code in
// the first operand. The code is 1 for a load prefetch and 2 for
// a store prefetch.
@@ -396,10 +401,12 @@ public:
return TypeWidenVector;
return TargetLoweringBase::getPreferredVectorAction(VT);
}
+ bool isCheapToSpeculateCtlz() const override { return true; }
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &,
EVT) const override;
bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
- bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
+ bool isFPImmLegal(const APFloat &Imm, EVT VT,
+ bool ForCodeSize) const override;
bool isLegalICmpImmediate(int64_t Imm) const override;
bool isLegalAddImmediate(int64_t Imm) const override;
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
@@ -407,6 +414,7 @@ public:
Instruction *I = nullptr) const override;
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS,
unsigned Align,
+ MachineMemOperand::Flags Flags,
bool *Fast) const override;
bool isTruncateFree(Type *, Type *) const override;
bool isTruncateFree(EVT, EVT) const override;
@@ -568,6 +576,9 @@ private:
SDValue lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+ bool isVectorElementLoad(SDValue Op) const;
+ SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
+ SmallVectorImpl<SDValue> &Elems) const;
SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
@@ -587,8 +598,10 @@ private:
SDValue combineSIGN_EXTEND(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineSIGN_EXTEND_INREG(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineMERGE(SDNode *N, DAGCombinerInfo &DCI) const;
+ bool canLoadStoreByteSwapped(EVT VT) const;
SDValue combineLOAD(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineSTORE(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue combineVECTOR_SHUFFLE(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineEXTRACT_VECTOR_ELT(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineJOIN_DWORDS(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineFP_ROUND(SDNode *N, DAGCombinerInfo &DCI) const;
@@ -599,6 +612,8 @@ private:
SDValue combineGET_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineIntDIVREM(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue unwrapAddress(SDValue N) const override;
+
// If the last instruction before MBBI in MBB was some form of COMPARE,
// try to replace it with a COMPARE AND BRANCH just before MBBI.
// CCMask and Target are the BRC-like operands for the branch.
@@ -639,8 +654,27 @@ private:
MachineBasicBlock *MBB,
unsigned Opcode) const;
+ MachineMemOperand::Flags getMMOFlags(const Instruction &I) const override;
const TargetRegisterClass *getRepRegClassFor(MVT VT) const override;
};
+
+struct SystemZVectorConstantInfo {
+private:
+ APInt IntBits; // The 128 bits as an integer.
+ APInt SplatBits; // Smallest splat value.
+ APInt SplatUndef; // Bits correspoding to undef operands of the BVN.
+ unsigned SplatBitSize = 0;
+ bool isFP128 = false;
+
+public:
+ unsigned Opcode = 0;
+ SmallVector<unsigned, 2> OpVals;
+ MVT VecVT;
+ SystemZVectorConstantInfo(APFloat FPImm);
+ SystemZVectorConstantInfo(BuildVectorSDNode *BVN);
+ bool isVectorConstantLegal(const SystemZSubtarget &Subtarget);
+};
+
} // end namespace llvm
#endif
diff --git a/lib/Target/SystemZ/SystemZInstrBuilder.h b/lib/Target/SystemZ/SystemZInstrBuilder.h
index 896b665d25eb..ec7639e71f81 100644
--- a/lib/Target/SystemZ/SystemZInstrBuilder.h
+++ b/lib/Target/SystemZ/SystemZInstrBuilder.h
@@ -1,9 +1,8 @@
//===-- SystemZInstrBuilder.h - Functions to aid building insts -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/SystemZ/SystemZInstrDFP.td b/lib/Target/SystemZ/SystemZInstrDFP.td
index 08ab2d7bbc52..8d7a773ff4d9 100644
--- a/lib/Target/SystemZ/SystemZInstrDFP.td
+++ b/lib/Target/SystemZ/SystemZInstrDFP.td
@@ -1,9 +1,8 @@
//==- SystemZInstrDFP.td - Floating-point SystemZ instructions -*- tblgen-*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -20,7 +19,7 @@
//===----------------------------------------------------------------------===//
// Load and test.
-let Defs = [CC] in {
+let Uses = [FPC], Defs = [CC] in {
def LTDTR : UnaryRRE<"ltdtr", 0xB3D6, null_frag, FP64, FP64>;
def LTXTR : UnaryRRE<"ltxtr", 0xB3DE, null_frag, FP128, FP128>;
}
@@ -32,25 +31,31 @@ let Defs = [CC] in {
// Convert floating-point values to narrower representations. The destination
// of LDXTR is a 128-bit value, but only the first register of the pair is used.
-def LEDTR : TernaryRRFe<"ledtr", 0xB3D5, FP32, FP64>;
-def LDXTR : TernaryRRFe<"ldxtr", 0xB3DD, FP128, FP128>;
+let Uses = [FPC] in {
+ def LEDTR : TernaryRRFe<"ledtr", 0xB3D5, FP32, FP64>;
+ def LDXTR : TernaryRRFe<"ldxtr", 0xB3DD, FP128, FP128>;
+}
// Extend floating-point values to wider representations.
-def LDETR : BinaryRRFd<"ldetr", 0xB3D4, FP64, FP32>;
-def LXDTR : BinaryRRFd<"lxdtr", 0xB3DC, FP128, FP64>;
+let Uses = [FPC] in {
+ def LDETR : BinaryRRFd<"ldetr", 0xB3D4, FP64, FP32>;
+ def LXDTR : BinaryRRFd<"lxdtr", 0xB3DC, FP128, FP64>;
+}
// Convert a signed integer value to a floating-point one.
-def CDGTR : UnaryRRE<"cdgtr", 0xB3F1, null_frag, FP64, GR64>;
-def CXGTR : UnaryRRE<"cxgtr", 0xB3F9, null_frag, FP128, GR64>;
-let Predicates = [FeatureFPExtension] in {
- def CDGTRA : TernaryRRFe<"cdgtra", 0xB3F1, FP64, GR64>;
- def CXGTRA : TernaryRRFe<"cxgtra", 0xB3F9, FP128, GR64>;
- def CDFTR : TernaryRRFe<"cdftr", 0xB951, FP64, GR32>;
- def CXFTR : TernaryRRFe<"cxftr", 0xB959, FP128, GR32>;
+let Uses = [FPC] in {
+ def CDGTR : UnaryRRE<"cdgtr", 0xB3F1, null_frag, FP64, GR64>;
+ def CXGTR : UnaryRRE<"cxgtr", 0xB3F9, null_frag, FP128, GR64>;
+ let Predicates = [FeatureFPExtension] in {
+ def CDGTRA : TernaryRRFe<"cdgtra", 0xB3F1, FP64, GR64>;
+ def CXGTRA : TernaryRRFe<"cxgtra", 0xB3F9, FP128, GR64>;
+ def CDFTR : TernaryRRFe<"cdftr", 0xB951, FP64, GR32>;
+ def CXFTR : TernaryRRFe<"cxftr", 0xB959, FP128, GR32>;
+ }
}
// Convert an unsigned integer value to a floating-point one.
-let Predicates = [FeatureFPExtension] in {
+let Uses = [FPC], Predicates = [FeatureFPExtension] in {
def CDLGTR : TernaryRRFe<"cdlgtr", 0xB952, FP64, GR64>;
def CXLGTR : TernaryRRFe<"cxlgtr", 0xB95A, FP128, GR64>;
def CDLFTR : TernaryRRFe<"cdlftr", 0xB953, FP64, GR32>;
@@ -58,7 +63,7 @@ let Predicates = [FeatureFPExtension] in {
}
// Convert a floating-point value to a signed integer value.
-let Defs = [CC] in {
+let Uses = [FPC], Defs = [CC] in {
def CGDTR : BinaryRRFe<"cgdtr", 0xB3E1, GR64, FP64>;
def CGXTR : BinaryRRFe<"cgxtr", 0xB3E9, GR64, FP128>;
let Predicates = [FeatureFPExtension] in {
@@ -70,7 +75,7 @@ let Defs = [CC] in {
}
// Convert a floating-point value to an unsigned integer value.
-let Defs = [CC] in {
+let Uses = [FPC], Defs = [CC] in {
let Predicates = [FeatureFPExtension] in {
def CLGDTR : TernaryRRFe<"clgdtr", 0xB942, GR64, FP64>;
def CLGXTR : TernaryRRFe<"clgxtr", 0xB94A, GR64, FP128>;
@@ -108,7 +113,7 @@ let Predicates = [FeatureDFPPackedConversion] in {
}
// Perform floating-point operation.
-let Defs = [CC, R1L, F0Q], Uses = [R0L, F4Q] in
+let Defs = [CC, R1L, F0Q], Uses = [FPC, R0L, F4Q] in
def PFPO : SideEffectInherentE<"pfpo", 0x010A>;
@@ -118,8 +123,10 @@ let Defs = [CC, R1L, F0Q], Uses = [R0L, F4Q] in
// Round to an integer, with the second operand (M3) specifying the rounding
// mode. M4 can be set to 4 to suppress detection of inexact conditions.
-def FIDTR : TernaryRRFe<"fidtr", 0xB3D7, FP64, FP64>;
-def FIXTR : TernaryRRFe<"fixtr", 0xB3DF, FP128, FP128>;
+let Uses = [FPC] in {
+ def FIDTR : TernaryRRFe<"fidtr", 0xB3D7, FP64, FP64>;
+ def FIXTR : TernaryRRFe<"fixtr", 0xB3DF, FP128, FP128>;
+}
// Extract biased exponent.
def EEDTR : UnaryRRE<"eedtr", 0xB3E5, null_frag, FP64, FP64>;
@@ -135,7 +142,7 @@ def ESXTR : UnaryRRE<"esxtr", 0xB3EF, null_frag, FP128, FP128>;
//===----------------------------------------------------------------------===//
// Addition.
-let Defs = [CC] in {
+let Uses = [FPC], Defs = [CC] in {
let isCommutable = 1 in {
def ADTR : BinaryRRFa<"adtr", 0xB3D2, null_frag, FP64, FP64, FP64>;
def AXTR : BinaryRRFa<"axtr", 0xB3DA, null_frag, FP128, FP128, FP128>;
@@ -147,7 +154,7 @@ let Defs = [CC] in {
}
// Subtraction.
-let Defs = [CC] in {
+let Uses = [FPC], Defs = [CC] in {
def SDTR : BinaryRRFa<"sdtr", 0xB3D3, null_frag, FP64, FP64, FP64>;
def SXTR : BinaryRRFa<"sxtr", 0xB3DB, null_frag, FP128, FP128, FP128>;
let Predicates = [FeatureFPExtension] in {
@@ -157,30 +164,38 @@ let Defs = [CC] in {
}
// Multiplication.
-let isCommutable = 1 in {
- def MDTR : BinaryRRFa<"mdtr", 0xB3D0, null_frag, FP64, FP64, FP64>;
- def MXTR : BinaryRRFa<"mxtr", 0xB3D8, null_frag, FP128, FP128, FP128>;
-}
-let Predicates = [FeatureFPExtension] in {
- def MDTRA : TernaryRRFa<"mdtra", 0xB3D0, FP64, FP64, FP64>;
- def MXTRA : TernaryRRFa<"mxtra", 0xB3D8, FP128, FP128, FP128>;
+let Uses = [FPC] in {
+ let isCommutable = 1 in {
+ def MDTR : BinaryRRFa<"mdtr", 0xB3D0, null_frag, FP64, FP64, FP64>;
+ def MXTR : BinaryRRFa<"mxtr", 0xB3D8, null_frag, FP128, FP128, FP128>;
+ }
+ let Predicates = [FeatureFPExtension] in {
+ def MDTRA : TernaryRRFa<"mdtra", 0xB3D0, FP64, FP64, FP64>;
+ def MXTRA : TernaryRRFa<"mxtra", 0xB3D8, FP128, FP128, FP128>;
+ }
}
// Division.
-def DDTR : BinaryRRFa<"ddtr", 0xB3D1, null_frag, FP64, FP64, FP64>;
-def DXTR : BinaryRRFa<"dxtr", 0xB3D9, null_frag, FP128, FP128, FP128>;
-let Predicates = [FeatureFPExtension] in {
- def DDTRA : TernaryRRFa<"ddtra", 0xB3D1, FP64, FP64, FP64>;
- def DXTRA : TernaryRRFa<"dxtra", 0xB3D9, FP128, FP128, FP128>;
+let Uses = [FPC] in {
+ def DDTR : BinaryRRFa<"ddtr", 0xB3D1, null_frag, FP64, FP64, FP64>;
+ def DXTR : BinaryRRFa<"dxtr", 0xB3D9, null_frag, FP128, FP128, FP128>;
+ let Predicates = [FeatureFPExtension] in {
+ def DDTRA : TernaryRRFa<"ddtra", 0xB3D1, FP64, FP64, FP64>;
+ def DXTRA : TernaryRRFa<"dxtra", 0xB3D9, FP128, FP128, FP128>;
+ }
}
// Quantize.
-def QADTR : TernaryRRFb<"qadtr", 0xB3F5, FP64, FP64, FP64>;
-def QAXTR : TernaryRRFb<"qaxtr", 0xB3FD, FP128, FP128, FP128>;
+let Uses = [FPC] in {
+ def QADTR : TernaryRRFb<"qadtr", 0xB3F5, FP64, FP64, FP64>;
+ def QAXTR : TernaryRRFb<"qaxtr", 0xB3FD, FP128, FP128, FP128>;
+}
// Reround.
-def RRDTR : TernaryRRFb<"rrdtr", 0xB3F7, FP64, FP64, FP64>;
-def RRXTR : TernaryRRFb<"rrxtr", 0xB3FF, FP128, FP128, FP128>;
+let Uses = [FPC] in {
+ def RRDTR : TernaryRRFb<"rrdtr", 0xB3F7, FP64, FP64, FP64>;
+ def RRXTR : TernaryRRFb<"rrxtr", 0xB3FF, FP128, FP128, FP128>;
+}
// Shift significand left/right.
def SLDT : BinaryRXF<"sldt", 0xED40, null_frag, FP64, FP64, null_frag, 0>;
@@ -198,13 +213,13 @@ def IEXTR : BinaryRRFb<"iextr", 0xB3FE, null_frag, FP128, FP128, FP128>;
//===----------------------------------------------------------------------===//
// Compare.
-let Defs = [CC] in {
+let Uses = [FPC], Defs = [CC] in {
def CDTR : CompareRRE<"cdtr", 0xB3E4, null_frag, FP64, FP64>;
def CXTR : CompareRRE<"cxtr", 0xB3EC, null_frag, FP128, FP128>;
}
// Compare and signal.
-let Defs = [CC] in {
+let Uses = [FPC], Defs = [CC] in {
def KDTR : CompareRRE<"kdtr", 0xB3E0, null_frag, FP64, FP64>;
def KXTR : CompareRRE<"kxtr", 0xB3E8, null_frag, FP128, FP128>;
}
diff --git a/lib/Target/SystemZ/SystemZInstrFP.td b/lib/Target/SystemZ/SystemZInstrFP.td
index 1374ee91fa29..19c7ec58ed3d 100644
--- a/lib/Target/SystemZ/SystemZInstrFP.td
+++ b/lib/Target/SystemZ/SystemZInstrFP.td
@@ -1,9 +1,8 @@
//==- SystemZInstrFP.td - Floating-point SystemZ instructions --*- tblgen-*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -53,7 +52,8 @@ let isCodeGenOnly = 1 in
// Moves between two floating-point registers that also set the condition
// codes.
-let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
+let Uses = [FPC], mayRaiseFPException = 1,
+ Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
defm LTEBR : LoadAndTestRRE<"ltebr", 0xB302, FP32>;
defm LTDBR : LoadAndTestRRE<"ltdbr", 0xB312, FP64>;
defm LTXBR : LoadAndTestRRE<"ltxbr", 0xB342, FP128>;
@@ -69,7 +69,8 @@ let Predicates = [FeatureNoVector] in {
// Use a normal load-and-test for compare against zero in case of
// vector support (via a pseudo to simplify instruction selection).
-let Defs = [CC], usesCustomInserter = 1, hasNoSchedulingInfo = 1 in {
+let Uses = [FPC], mayRaiseFPException = 1,
+ Defs = [CC], usesCustomInserter = 1, hasNoSchedulingInfo = 1 in {
def LTEBRCompare_VecPseudo : Pseudo<(outs), (ins FP32:$R1, FP32:$R2), []>;
def LTDBRCompare_VecPseudo : Pseudo<(outs), (ins FP64:$R1, FP64:$R2), []>;
def LTXBRCompare_VecPseudo : Pseudo<(outs), (ins FP128:$R1, FP128:$R2), []>;
@@ -174,56 +175,64 @@ let SimpleBDXStore = 1, mayStore = 1 in {
// Convert floating-point values to narrower representations, rounding
// according to the current mode. The destination of LEXBR and LDXBR
// is a 128-bit value, but only the first register of the pair is used.
-def LEDBR : UnaryRRE<"ledbr", 0xB344, fpround, FP32, FP64>;
-def LEXBR : UnaryRRE<"lexbr", 0xB346, null_frag, FP128, FP128>;
-def LDXBR : UnaryRRE<"ldxbr", 0xB345, null_frag, FP128, FP128>;
-
-def LEDBRA : TernaryRRFe<"ledbra", 0xB344, FP32, FP64>,
- Requires<[FeatureFPExtension]>;
-def LEXBRA : TernaryRRFe<"lexbra", 0xB346, FP128, FP128>,
- Requires<[FeatureFPExtension]>;
-def LDXBRA : TernaryRRFe<"ldxbra", 0xB345, FP128, FP128>,
- Requires<[FeatureFPExtension]>;
+let Uses = [FPC], mayRaiseFPException = 1 in {
+ def LEDBR : UnaryRRE<"ledbr", 0xB344, any_fpround, FP32, FP64>;
+ def LEXBR : UnaryRRE<"lexbr", 0xB346, null_frag, FP128, FP128>;
+ def LDXBR : UnaryRRE<"ldxbr", 0xB345, null_frag, FP128, FP128>;
+
+ def LEDBRA : TernaryRRFe<"ledbra", 0xB344, FP32, FP64>,
+ Requires<[FeatureFPExtension]>;
+ def LEXBRA : TernaryRRFe<"lexbra", 0xB346, FP128, FP128>,
+ Requires<[FeatureFPExtension]>;
+ def LDXBRA : TernaryRRFe<"ldxbra", 0xB345, FP128, FP128>,
+ Requires<[FeatureFPExtension]>;
+}
let Predicates = [FeatureNoVectorEnhancements1] in {
- def : Pat<(f32 (fpround FP128:$src)),
+ def : Pat<(f32 (any_fpround FP128:$src)),
(EXTRACT_SUBREG (LEXBR FP128:$src), subreg_hh32)>;
- def : Pat<(f64 (fpround FP128:$src)),
+ def : Pat<(f64 (any_fpround FP128:$src)),
(EXTRACT_SUBREG (LDXBR FP128:$src), subreg_h64)>;
}
// Extend register floating-point values to wider representations.
-def LDEBR : UnaryRRE<"ldebr", 0xB304, fpextend, FP64, FP32>;
-def LXEBR : UnaryRRE<"lxebr", 0xB306, null_frag, FP128, FP32>;
-def LXDBR : UnaryRRE<"lxdbr", 0xB305, null_frag, FP128, FP64>;
+let Uses = [FPC], mayRaiseFPException = 1 in {
+ def LDEBR : UnaryRRE<"ldebr", 0xB304, any_fpextend, FP64, FP32>;
+ def LXEBR : UnaryRRE<"lxebr", 0xB306, null_frag, FP128, FP32>;
+ def LXDBR : UnaryRRE<"lxdbr", 0xB305, null_frag, FP128, FP64>;
+}
let Predicates = [FeatureNoVectorEnhancements1] in {
- def : Pat<(f128 (fpextend (f32 FP32:$src))), (LXEBR FP32:$src)>;
- def : Pat<(f128 (fpextend (f64 FP64:$src))), (LXDBR FP64:$src)>;
+ def : Pat<(f128 (any_fpextend (f32 FP32:$src))), (LXEBR FP32:$src)>;
+ def : Pat<(f128 (any_fpextend (f64 FP64:$src))), (LXDBR FP64:$src)>;
}
// Extend memory floating-point values to wider representations.
-def LDEB : UnaryRXE<"ldeb", 0xED04, extloadf32, FP64, 4>;
-def LXEB : UnaryRXE<"lxeb", 0xED06, null_frag, FP128, 4>;
-def LXDB : UnaryRXE<"lxdb", 0xED05, null_frag, FP128, 8>;
+let Uses = [FPC], mayRaiseFPException = 1 in {
+ def LDEB : UnaryRXE<"ldeb", 0xED04, any_extloadf32, FP64, 4>;
+ def LXEB : UnaryRXE<"lxeb", 0xED06, null_frag, FP128, 4>;
+ def LXDB : UnaryRXE<"lxdb", 0xED05, null_frag, FP128, 8>;
+}
let Predicates = [FeatureNoVectorEnhancements1] in {
- def : Pat<(f128 (extloadf32 bdxaddr12only:$src)),
+ def : Pat<(f128 (any_extloadf32 bdxaddr12only:$src)),
(LXEB bdxaddr12only:$src)>;
- def : Pat<(f128 (extloadf64 bdxaddr12only:$src)),
+ def : Pat<(f128 (any_extloadf64 bdxaddr12only:$src)),
(LXDB bdxaddr12only:$src)>;
}
// Convert a signed integer register value to a floating-point one.
-def CEFBR : UnaryRRE<"cefbr", 0xB394, sint_to_fp, FP32, GR32>;
-def CDFBR : UnaryRRE<"cdfbr", 0xB395, sint_to_fp, FP64, GR32>;
-def CXFBR : UnaryRRE<"cxfbr", 0xB396, sint_to_fp, FP128, GR32>;
-
-def CEGBR : UnaryRRE<"cegbr", 0xB3A4, sint_to_fp, FP32, GR64>;
-def CDGBR : UnaryRRE<"cdgbr", 0xB3A5, sint_to_fp, FP64, GR64>;
-def CXGBR : UnaryRRE<"cxgbr", 0xB3A6, sint_to_fp, FP128, GR64>;
+let Uses = [FPC], mayRaiseFPException = 1 in {
+ def CEFBR : UnaryRRE<"cefbr", 0xB394, sint_to_fp, FP32, GR32>;
+ def CDFBR : UnaryRRE<"cdfbr", 0xB395, sint_to_fp, FP64, GR32>;
+ def CXFBR : UnaryRRE<"cxfbr", 0xB396, sint_to_fp, FP128, GR32>;
+
+ def CEGBR : UnaryRRE<"cegbr", 0xB3A4, sint_to_fp, FP32, GR64>;
+ def CDGBR : UnaryRRE<"cdgbr", 0xB3A5, sint_to_fp, FP64, GR64>;
+ def CXGBR : UnaryRRE<"cxgbr", 0xB3A6, sint_to_fp, FP128, GR64>;
+}
// The FP extension feature provides versions of the above that allow
// specifying rounding mode and inexact-exception suppression flags.
-let Predicates = [FeatureFPExtension] in {
+let Uses = [FPC], mayRaiseFPException = 1, Predicates = [FeatureFPExtension] in {
def CEFBRA : TernaryRRFe<"cefbra", 0xB394, FP32, GR32>;
def CDFBRA : TernaryRRFe<"cdfbra", 0xB395, FP64, GR32>;
def CXFBRA : TernaryRRFe<"cxfbra", 0xB396, FP128, GR32>;
@@ -235,13 +244,15 @@ let Predicates = [FeatureFPExtension] in {
// Convert am unsigned integer register value to a floating-point one.
let Predicates = [FeatureFPExtension] in {
- def CELFBR : TernaryRRFe<"celfbr", 0xB390, FP32, GR32>;
- def CDLFBR : TernaryRRFe<"cdlfbr", 0xB391, FP64, GR32>;
- def CXLFBR : TernaryRRFe<"cxlfbr", 0xB392, FP128, GR32>;
-
- def CELGBR : TernaryRRFe<"celgbr", 0xB3A0, FP32, GR64>;
- def CDLGBR : TernaryRRFe<"cdlgbr", 0xB3A1, FP64, GR64>;
- def CXLGBR : TernaryRRFe<"cxlgbr", 0xB3A2, FP128, GR64>;
+ let Uses = [FPC], mayRaiseFPException = 1 in {
+ def CELFBR : TernaryRRFe<"celfbr", 0xB390, FP32, GR32>;
+ def CDLFBR : TernaryRRFe<"cdlfbr", 0xB391, FP64, GR32>;
+ def CXLFBR : TernaryRRFe<"cxlfbr", 0xB392, FP128, GR32>;
+
+ def CELGBR : TernaryRRFe<"celgbr", 0xB3A0, FP32, GR64>;
+ def CDLGBR : TernaryRRFe<"cdlgbr", 0xB3A1, FP64, GR64>;
+ def CXLGBR : TernaryRRFe<"cxlgbr", 0xB3A2, FP128, GR64>;
+ }
def : Pat<(f32 (uint_to_fp GR32:$src)), (CELFBR 0, GR32:$src, 0)>;
def : Pat<(f64 (uint_to_fp GR32:$src)), (CDLFBR 0, GR32:$src, 0)>;
@@ -254,7 +265,7 @@ let Predicates = [FeatureFPExtension] in {
// Convert a floating-point register value to a signed integer value,
// with the second operand (modifier M3) specifying the rounding mode.
-let Defs = [CC] in {
+let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC] in {
def CFEBR : BinaryRRFe<"cfebr", 0xB398, GR32, FP32>;
def CFDBR : BinaryRRFe<"cfdbr", 0xB399, GR32, FP64>;
def CFXBR : BinaryRRFe<"cfxbr", 0xB39A, GR32, FP128>;
@@ -275,7 +286,8 @@ def : Pat<(i64 (fp_to_sint FP128:$src)), (CGXBR 5, FP128:$src)>;
// The FP extension feature provides versions of the above that allow
// also specifying the inexact-exception suppression flag.
-let Predicates = [FeatureFPExtension], Defs = [CC] in {
+let Uses = [FPC], mayRaiseFPException = 1,
+ Predicates = [FeatureFPExtension], Defs = [CC] in {
def CFEBRA : TernaryRRFe<"cfebra", 0xB398, GR32, FP32>;
def CFDBRA : TernaryRRFe<"cfdbra", 0xB399, GR32, FP64>;
def CFXBRA : TernaryRRFe<"cfxbra", 0xB39A, GR32, FP128>;
@@ -287,7 +299,7 @@ let Predicates = [FeatureFPExtension], Defs = [CC] in {
// Convert a floating-point register value to an unsigned integer value.
let Predicates = [FeatureFPExtension] in {
- let Defs = [CC] in {
+ let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC] in {
def CLFEBR : TernaryRRFe<"clfebr", 0xB39C, GR32, FP32>;
def CLFDBR : TernaryRRFe<"clfdbr", 0xB39D, GR32, FP64>;
def CLFXBR : TernaryRRFe<"clfxbr", 0xB39E, GR32, FP128>;
@@ -353,59 +365,65 @@ let isCodeGenOnly = 1 in
def LNDFR_32 : UnaryRRE<"lndfr", 0xB371, fnabs, FP32, FP32>;
// Square root.
-def SQEBR : UnaryRRE<"sqebr", 0xB314, fsqrt, FP32, FP32>;
-def SQDBR : UnaryRRE<"sqdbr", 0xB315, fsqrt, FP64, FP64>;
-def SQXBR : UnaryRRE<"sqxbr", 0xB316, fsqrt, FP128, FP128>;
+let Uses = [FPC], mayRaiseFPException = 1 in {
+ def SQEBR : UnaryRRE<"sqebr", 0xB314, any_fsqrt, FP32, FP32>;
+ def SQDBR : UnaryRRE<"sqdbr", 0xB315, any_fsqrt, FP64, FP64>;
+ def SQXBR : UnaryRRE<"sqxbr", 0xB316, any_fsqrt, FP128, FP128>;
-def SQEB : UnaryRXE<"sqeb", 0xED14, loadu<fsqrt>, FP32, 4>;
-def SQDB : UnaryRXE<"sqdb", 0xED15, loadu<fsqrt>, FP64, 8>;
+ def SQEB : UnaryRXE<"sqeb", 0xED14, loadu<any_fsqrt>, FP32, 4>;
+ def SQDB : UnaryRXE<"sqdb", 0xED15, loadu<any_fsqrt>, FP64, 8>;
+}
// Round to an integer, with the second operand (modifier M3) specifying
// the rounding mode. These forms always check for inexact conditions.
-def FIEBR : BinaryRRFe<"fiebr", 0xB357, FP32, FP32>;
-def FIDBR : BinaryRRFe<"fidbr", 0xB35F, FP64, FP64>;
-def FIXBR : BinaryRRFe<"fixbr", 0xB347, FP128, FP128>;
+let Uses = [FPC], mayRaiseFPException = 1 in {
+ def FIEBR : BinaryRRFe<"fiebr", 0xB357, FP32, FP32>;
+ def FIDBR : BinaryRRFe<"fidbr", 0xB35F, FP64, FP64>;
+ def FIXBR : BinaryRRFe<"fixbr", 0xB347, FP128, FP128>;
+}
// frint rounds according to the current mode (modifier 0) and detects
// inexact conditions.
-def : Pat<(frint FP32:$src), (FIEBR 0, FP32:$src)>;
-def : Pat<(frint FP64:$src), (FIDBR 0, FP64:$src)>;
-def : Pat<(frint FP128:$src), (FIXBR 0, FP128:$src)>;
+def : Pat<(any_frint FP32:$src), (FIEBR 0, FP32:$src)>;
+def : Pat<(any_frint FP64:$src), (FIDBR 0, FP64:$src)>;
+def : Pat<(any_frint FP128:$src), (FIXBR 0, FP128:$src)>;
let Predicates = [FeatureFPExtension] in {
// Extended forms of the FIxBR instructions. M4 can be set to 4
// to suppress detection of inexact conditions.
- def FIEBRA : TernaryRRFe<"fiebra", 0xB357, FP32, FP32>;
- def FIDBRA : TernaryRRFe<"fidbra", 0xB35F, FP64, FP64>;
- def FIXBRA : TernaryRRFe<"fixbra", 0xB347, FP128, FP128>;
+ let Uses = [FPC], mayRaiseFPException = 1 in {
+ def FIEBRA : TernaryRRFe<"fiebra", 0xB357, FP32, FP32>;
+ def FIDBRA : TernaryRRFe<"fidbra", 0xB35F, FP64, FP64>;
+ def FIXBRA : TernaryRRFe<"fixbra", 0xB347, FP128, FP128>;
+ }
// fnearbyint is like frint but does not detect inexact conditions.
- def : Pat<(fnearbyint FP32:$src), (FIEBRA 0, FP32:$src, 4)>;
- def : Pat<(fnearbyint FP64:$src), (FIDBRA 0, FP64:$src, 4)>;
- def : Pat<(fnearbyint FP128:$src), (FIXBRA 0, FP128:$src, 4)>;
+ def : Pat<(any_fnearbyint FP32:$src), (FIEBRA 0, FP32:$src, 4)>;
+ def : Pat<(any_fnearbyint FP64:$src), (FIDBRA 0, FP64:$src, 4)>;
+ def : Pat<(any_fnearbyint FP128:$src), (FIXBRA 0, FP128:$src, 4)>;
// floor is no longer allowed to raise an inexact condition,
// so restrict it to the cases where the condition can be suppressed.
// Mode 7 is round towards -inf.
- def : Pat<(ffloor FP32:$src), (FIEBRA 7, FP32:$src, 4)>;
- def : Pat<(ffloor FP64:$src), (FIDBRA 7, FP64:$src, 4)>;
- def : Pat<(ffloor FP128:$src), (FIXBRA 7, FP128:$src, 4)>;
+ def : Pat<(any_ffloor FP32:$src), (FIEBRA 7, FP32:$src, 4)>;
+ def : Pat<(any_ffloor FP64:$src), (FIDBRA 7, FP64:$src, 4)>;
+ def : Pat<(any_ffloor FP128:$src), (FIXBRA 7, FP128:$src, 4)>;
// Same idea for ceil, where mode 6 is round towards +inf.
- def : Pat<(fceil FP32:$src), (FIEBRA 6, FP32:$src, 4)>;
- def : Pat<(fceil FP64:$src), (FIDBRA 6, FP64:$src, 4)>;
- def : Pat<(fceil FP128:$src), (FIXBRA 6, FP128:$src, 4)>;
+ def : Pat<(any_fceil FP32:$src), (FIEBRA 6, FP32:$src, 4)>;
+ def : Pat<(any_fceil FP64:$src), (FIDBRA 6, FP64:$src, 4)>;
+ def : Pat<(any_fceil FP128:$src), (FIXBRA 6, FP128:$src, 4)>;
// Same idea for trunc, where mode 5 is round towards zero.
- def : Pat<(ftrunc FP32:$src), (FIEBRA 5, FP32:$src, 4)>;
- def : Pat<(ftrunc FP64:$src), (FIDBRA 5, FP64:$src, 4)>;
- def : Pat<(ftrunc FP128:$src), (FIXBRA 5, FP128:$src, 4)>;
+ def : Pat<(any_ftrunc FP32:$src), (FIEBRA 5, FP32:$src, 4)>;
+ def : Pat<(any_ftrunc FP64:$src), (FIDBRA 5, FP64:$src, 4)>;
+ def : Pat<(any_ftrunc FP128:$src), (FIXBRA 5, FP128:$src, 4)>;
// Same idea for round, where mode 1 is round towards nearest with
// ties away from zero.
- def : Pat<(fround FP32:$src), (FIEBRA 1, FP32:$src, 4)>;
- def : Pat<(fround FP64:$src), (FIDBRA 1, FP64:$src, 4)>;
- def : Pat<(fround FP128:$src), (FIXBRA 1, FP128:$src, 4)>;
+ def : Pat<(any_fround FP32:$src), (FIEBRA 1, FP32:$src, 4)>;
+ def : Pat<(any_fround FP64:$src), (FIDBRA 1, FP64:$src, 4)>;
+ def : Pat<(any_fround FP128:$src), (FIXBRA 1, FP128:$src, 4)>;
}
//===----------------------------------------------------------------------===//
@@ -413,87 +431,103 @@ let Predicates = [FeatureFPExtension] in {
//===----------------------------------------------------------------------===//
// Addition.
-let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
+let Uses = [FPC], mayRaiseFPException = 1,
+ Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
let isCommutable = 1 in {
- def AEBR : BinaryRRE<"aebr", 0xB30A, fadd, FP32, FP32>;
- def ADBR : BinaryRRE<"adbr", 0xB31A, fadd, FP64, FP64>;
- def AXBR : BinaryRRE<"axbr", 0xB34A, fadd, FP128, FP128>;
+ def AEBR : BinaryRRE<"aebr", 0xB30A, any_fadd, FP32, FP32>;
+ def ADBR : BinaryRRE<"adbr", 0xB31A, any_fadd, FP64, FP64>;
+ def AXBR : BinaryRRE<"axbr", 0xB34A, any_fadd, FP128, FP128>;
}
- def AEB : BinaryRXE<"aeb", 0xED0A, fadd, FP32, load, 4>;
- def ADB : BinaryRXE<"adb", 0xED1A, fadd, FP64, load, 8>;
+ def AEB : BinaryRXE<"aeb", 0xED0A, any_fadd, FP32, load, 4>;
+ def ADB : BinaryRXE<"adb", 0xED1A, any_fadd, FP64, load, 8>;
}
// Subtraction.
-let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
- def SEBR : BinaryRRE<"sebr", 0xB30B, fsub, FP32, FP32>;
- def SDBR : BinaryRRE<"sdbr", 0xB31B, fsub, FP64, FP64>;
- def SXBR : BinaryRRE<"sxbr", 0xB34B, fsub, FP128, FP128>;
-
- def SEB : BinaryRXE<"seb", 0xED0B, fsub, FP32, load, 4>;
- def SDB : BinaryRXE<"sdb", 0xED1B, fsub, FP64, load, 8>;
+let Uses = [FPC], mayRaiseFPException = 1,
+ Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
+ def SEBR : BinaryRRE<"sebr", 0xB30B, any_fsub, FP32, FP32>;
+ def SDBR : BinaryRRE<"sdbr", 0xB31B, any_fsub, FP64, FP64>;
+ def SXBR : BinaryRRE<"sxbr", 0xB34B, any_fsub, FP128, FP128>;
+
+ def SEB : BinaryRXE<"seb", 0xED0B, any_fsub, FP32, load, 4>;
+ def SDB : BinaryRXE<"sdb", 0xED1B, any_fsub, FP64, load, 8>;
}
// Multiplication.
-let isCommutable = 1 in {
- def MEEBR : BinaryRRE<"meebr", 0xB317, fmul, FP32, FP32>;
- def MDBR : BinaryRRE<"mdbr", 0xB31C, fmul, FP64, FP64>;
- def MXBR : BinaryRRE<"mxbr", 0xB34C, fmul, FP128, FP128>;
+let Uses = [FPC], mayRaiseFPException = 1 in {
+ let isCommutable = 1 in {
+ def MEEBR : BinaryRRE<"meebr", 0xB317, any_fmul, FP32, FP32>;
+ def MDBR : BinaryRRE<"mdbr", 0xB31C, any_fmul, FP64, FP64>;
+ def MXBR : BinaryRRE<"mxbr", 0xB34C, any_fmul, FP128, FP128>;
+ }
+ def MEEB : BinaryRXE<"meeb", 0xED17, any_fmul, FP32, load, 4>;
+ def MDB : BinaryRXE<"mdb", 0xED1C, any_fmul, FP64, load, 8>;
}
-def MEEB : BinaryRXE<"meeb", 0xED17, fmul, FP32, load, 4>;
-def MDB : BinaryRXE<"mdb", 0xED1C, fmul, FP64, load, 8>;
// f64 multiplication of two FP32 registers.
-def MDEBR : BinaryRRE<"mdebr", 0xB30C, null_frag, FP64, FP32>;
-def : Pat<(fmul (f64 (fpextend FP32:$src1)), (f64 (fpextend FP32:$src2))),
+let Uses = [FPC], mayRaiseFPException = 1 in
+ def MDEBR : BinaryRRE<"mdebr", 0xB30C, null_frag, FP64, FP32>;
+def : Pat<(any_fmul (f64 (fpextend FP32:$src1)),
+ (f64 (fpextend FP32:$src2))),
(MDEBR (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
FP32:$src1, subreg_h32), FP32:$src2)>;
// f64 multiplication of an FP32 register and an f32 memory.
-def MDEB : BinaryRXE<"mdeb", 0xED0C, null_frag, FP64, load, 4>;
-def : Pat<(fmul (f64 (fpextend FP32:$src1)),
- (f64 (extloadf32 bdxaddr12only:$addr))),
+let Uses = [FPC], mayRaiseFPException = 1 in
+ def MDEB : BinaryRXE<"mdeb", 0xED0C, null_frag, FP64, load, 4>;
+def : Pat<(any_fmul (f64 (fpextend FP32:$src1)),
+ (f64 (extloadf32 bdxaddr12only:$addr))),
(MDEB (INSERT_SUBREG (f64 (IMPLICIT_DEF)), FP32:$src1, subreg_h32),
bdxaddr12only:$addr)>;
// f128 multiplication of two FP64 registers.
-def MXDBR : BinaryRRE<"mxdbr", 0xB307, null_frag, FP128, FP64>;
+let Uses = [FPC], mayRaiseFPException = 1 in
+ def MXDBR : BinaryRRE<"mxdbr", 0xB307, null_frag, FP128, FP64>;
let Predicates = [FeatureNoVectorEnhancements1] in
- def : Pat<(fmul (f128 (fpextend FP64:$src1)), (f128 (fpextend FP64:$src2))),
+ def : Pat<(any_fmul (f128 (fpextend FP64:$src1)),
+ (f128 (fpextend FP64:$src2))),
(MXDBR (INSERT_SUBREG (f128 (IMPLICIT_DEF)),
FP64:$src1, subreg_h64), FP64:$src2)>;
// f128 multiplication of an FP64 register and an f64 memory.
-def MXDB : BinaryRXE<"mxdb", 0xED07, null_frag, FP128, load, 8>;
+let Uses = [FPC], mayRaiseFPException = 1 in
+ def MXDB : BinaryRXE<"mxdb", 0xED07, null_frag, FP128, load, 8>;
let Predicates = [FeatureNoVectorEnhancements1] in
- def : Pat<(fmul (f128 (fpextend FP64:$src1)),
- (f128 (extloadf64 bdxaddr12only:$addr))),
+ def : Pat<(any_fmul (f128 (fpextend FP64:$src1)),
+ (f128 (extloadf64 bdxaddr12only:$addr))),
(MXDB (INSERT_SUBREG (f128 (IMPLICIT_DEF)), FP64:$src1, subreg_h64),
bdxaddr12only:$addr)>;
// Fused multiply-add.
-def MAEBR : TernaryRRD<"maebr", 0xB30E, z_fma, FP32, FP32>;
-def MADBR : TernaryRRD<"madbr", 0xB31E, z_fma, FP64, FP64>;
+let Uses = [FPC], mayRaiseFPException = 1 in {
+ def MAEBR : TernaryRRD<"maebr", 0xB30E, z_any_fma, FP32, FP32>;
+ def MADBR : TernaryRRD<"madbr", 0xB31E, z_any_fma, FP64, FP64>;
-def MAEB : TernaryRXF<"maeb", 0xED0E, z_fma, FP32, FP32, load, 4>;
-def MADB : TernaryRXF<"madb", 0xED1E, z_fma, FP64, FP64, load, 8>;
+ def MAEB : TernaryRXF<"maeb", 0xED0E, z_any_fma, FP32, FP32, load, 4>;
+ def MADB : TernaryRXF<"madb", 0xED1E, z_any_fma, FP64, FP64, load, 8>;
+}
// Fused multiply-subtract.
-def MSEBR : TernaryRRD<"msebr", 0xB30F, z_fms, FP32, FP32>;
-def MSDBR : TernaryRRD<"msdbr", 0xB31F, z_fms, FP64, FP64>;
+let Uses = [FPC], mayRaiseFPException = 1 in {
+ def MSEBR : TernaryRRD<"msebr", 0xB30F, z_any_fms, FP32, FP32>;
+ def MSDBR : TernaryRRD<"msdbr", 0xB31F, z_any_fms, FP64, FP64>;
-def MSEB : TernaryRXF<"mseb", 0xED0F, z_fms, FP32, FP32, load, 4>;
-def MSDB : TernaryRXF<"msdb", 0xED1F, z_fms, FP64, FP64, load, 8>;
+ def MSEB : TernaryRXF<"mseb", 0xED0F, z_any_fms, FP32, FP32, load, 4>;
+ def MSDB : TernaryRXF<"msdb", 0xED1F, z_any_fms, FP64, FP64, load, 8>;
+}
// Division.
-def DEBR : BinaryRRE<"debr", 0xB30D, fdiv, FP32, FP32>;
-def DDBR : BinaryRRE<"ddbr", 0xB31D, fdiv, FP64, FP64>;
-def DXBR : BinaryRRE<"dxbr", 0xB34D, fdiv, FP128, FP128>;
+let Uses = [FPC], mayRaiseFPException = 1 in {
+ def DEBR : BinaryRRE<"debr", 0xB30D, any_fdiv, FP32, FP32>;
+ def DDBR : BinaryRRE<"ddbr", 0xB31D, any_fdiv, FP64, FP64>;
+ def DXBR : BinaryRRE<"dxbr", 0xB34D, any_fdiv, FP128, FP128>;
-def DEB : BinaryRXE<"deb", 0xED0D, fdiv, FP32, load, 4>;
-def DDB : BinaryRXE<"ddb", 0xED1D, fdiv, FP64, load, 8>;
+ def DEB : BinaryRXE<"deb", 0xED0D, any_fdiv, FP32, load, 4>;
+ def DDB : BinaryRXE<"ddb", 0xED1D, any_fdiv, FP64, load, 8>;
+}
// Divide to integer.
-let Defs = [CC] in {
+let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC] in {
def DIEBR : TernaryRRFb<"diebr", 0xB353, FP32, FP32, FP32>;
def DIDBR : TernaryRRFb<"didbr", 0xB35B, FP64, FP64, FP64>;
}
@@ -502,7 +536,7 @@ let Defs = [CC] in {
// Comparisons
//===----------------------------------------------------------------------===//
-let Defs = [CC], CCValues = 0xF in {
+let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC], CCValues = 0xF in {
def CEBR : CompareRRE<"cebr", 0xB309, z_fcmp, FP32, FP32>;
def CDBR : CompareRRE<"cdbr", 0xB319, z_fcmp, FP64, FP64>;
def CXBR : CompareRRE<"cxbr", 0xB349, z_fcmp, FP128, FP128>;
@@ -532,20 +566,28 @@ let Defs = [CC], CCValues = 0xC in {
let hasSideEffects = 1 in {
let mayLoad = 1, mayStore = 1 in {
// TODO: EFPC and SFPC do not touch memory at all
- def EFPC : InherentRRE<"efpc", 0xB38C, GR32, int_s390_efpc>;
- def STFPC : StoreInherentS<"stfpc", 0xB29C, storei<int_s390_efpc>, 4>;
-
- def SFPC : SideEffectUnaryRRE<"sfpc", 0xB384, GR32, int_s390_sfpc>;
- def LFPC : SideEffectUnaryS<"lfpc", 0xB29D, loadu<int_s390_sfpc>, 4>;
+ let Uses = [FPC] in {
+ def EFPC : InherentRRE<"efpc", 0xB38C, GR32, int_s390_efpc>;
+ def STFPC : StoreInherentS<"stfpc", 0xB29C, storei<int_s390_efpc>, 4>;
+ }
+
+ let Defs = [FPC] in {
+ def SFPC : SideEffectUnaryRRE<"sfpc", 0xB384, GR32, int_s390_sfpc>;
+ def LFPC : SideEffectUnaryS<"lfpc", 0xB29D, loadu<int_s390_sfpc>, 4>;
+ }
}
- def SFASR : SideEffectUnaryRRE<"sfasr", 0xB385, GR32, null_frag>;
- def LFAS : SideEffectUnaryS<"lfas", 0xB2BD, null_frag, 4>;
+ let Defs = [FPC], mayRaiseFPException = 1 in {
+ def SFASR : SideEffectUnaryRRE<"sfasr", 0xB385, GR32, null_frag>;
+ def LFAS : SideEffectUnaryS<"lfas", 0xB2BD, null_frag, 4>;
+ }
- def SRNMB : SideEffectAddressS<"srnmb", 0xB2B8, null_frag, shift12only>,
- Requires<[FeatureFPExtension]>;
- def SRNM : SideEffectAddressS<"srnm", 0xB299, null_frag, shift12only>;
- def SRNMT : SideEffectAddressS<"srnmt", 0xB2B9, null_frag, shift12only>;
+ let Uses = [FPC], Defs = [FPC] in {
+ def SRNMB : SideEffectAddressS<"srnmb", 0xB2B8, null_frag, shift12only>,
+ Requires<[FeatureFPExtension]>;
+ def SRNM : SideEffectAddressS<"srnm", 0xB299, null_frag, shift12only>;
+ def SRNMT : SideEffectAddressS<"srnmt", 0xB2B9, null_frag, shift12only>;
+ }
}
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td
index 1e904a86ea79..2a1d14de3ddf 100644
--- a/lib/Target/SystemZ/SystemZInstrFormats.td
+++ b/lib/Target/SystemZ/SystemZInstrFormats.td
@@ -1,9 +1,8 @@
//==- SystemZInstrFormats.td - SystemZ Instruction Formats --*- tablegen -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -38,6 +37,12 @@ class InstSystemZ<int size, dag outs, dag ins, string asmstr,
string OpKey = "";
string OpType = "none";
+ // MemKey identifies a targe reg-mem opcode, while MemType can be either
+ // "pseudo" or "target". This is used to map a pseduo memory instruction to
+ // its corresponding target opcode. See comment at MemFoldPseudo.
+ string MemKey = "";
+ string MemType = "none";
+
// Many distinct-operands instructions have older 2-operand equivalents.
// NumOpsKey uniquely identifies one of these 2-operand and 3-operand pairs,
// with NumOpsValue being "2" or "3" as appropriate.
@@ -121,7 +126,8 @@ def getDisp20Opcode : InstrMapping {
let ValueCols = [["20"]];
}
-// Return the memory form of a register instruction.
+// Return the memory form of a register instruction. Note that this may
+// return a MemFoldPseudo instruction (see below).
def getMemOpcode : InstrMapping {
let FilterClass = "InstSystemZ";
let RowFields = ["OpKey"];
@@ -130,13 +136,22 @@ def getMemOpcode : InstrMapping {
let ValueCols = [["mem"]];
}
-// Return the 3-operand form of a 2-operand instruction.
-def getThreeOperandOpcode : InstrMapping {
+// Return the target memory instruction for a MemFoldPseudo.
+def getTargetMemOpcode : InstrMapping {
+ let FilterClass = "InstSystemZ";
+ let RowFields = ["MemKey"];
+ let ColFields = ["MemType"];
+ let KeyCol = ["pseudo"];
+ let ValueCols = [["target"]];
+}
+
+// Return the 2-operand form of a 3-operand instruction.
+def getTwoOperandOpcode : InstrMapping {
let FilterClass = "InstSystemZ";
let RowFields = ["NumOpsKey"];
let ColFields = ["NumOpsValue"];
- let KeyCol = ["2"];
- let ValueCols = [["3"]];
+ let KeyCol = ["3"];
+ let ValueCols = [["2"]];
}
//===----------------------------------------------------------------------===//
@@ -1399,13 +1414,15 @@ class InstVRRi<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
bits<4> R1;
bits<5> V2;
bits<4> M3;
+ bits<4> M4;
let Inst{47-40} = op{15-8};
let Inst{39-36} = R1;
let Inst{35-32} = V2{3-0};
let Inst{31-24} = 0;
let Inst{23-20} = M3;
- let Inst{19-12} = 0;
+ let Inst{19-16} = M4;
+ let Inst{15-12} = 0;
let Inst{11} = 0;
let Inst{10} = V2{4};
let Inst{9-8} = 0;
@@ -2410,11 +2427,16 @@ class LoadMultipleSSe<string mnemonic, bits<8> opcode, RegisterOperand cls>
let mayLoad = 1;
}
-class LoadMultipleVRSa<string mnemonic, bits<16> opcode>
- : InstVRSa<opcode, (outs VR128:$V1, VR128:$V3), (ins bdaddr12only:$BD2),
- mnemonic#"\t$V1, $V3, $BD2", []> {
- let M4 = 0;
- let mayLoad = 1;
+multiclass LoadMultipleVRSaAlign<string mnemonic, bits<16> opcode> {
+ let mayLoad = 1 in {
+ def Align : InstVRSa<opcode, (outs VR128:$V1, VR128:$V3),
+ (ins bdaddr12only:$BD2, imm32zx4:$M4),
+ mnemonic#"\t$V1, $V3, $BD2, $M4", []>;
+ let M4 = 0 in
+ def "" : InstVRSa<opcode, (outs VR128:$V1, VR128:$V3),
+ (ins bdaddr12only:$BD2),
+ mnemonic#"\t$V1, $V3, $BD2", []>;
+ }
}
class StoreRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator,
@@ -2469,12 +2491,29 @@ class StoreVRX<string mnemonic, bits<16> opcode, SDPatternOperator operator,
TypedReg tr, bits<5> bytes, bits<4> type = 0>
: InstVRX<opcode, (outs), (ins tr.op:$V1, bdxaddr12only:$XBD2),
mnemonic#"\t$V1, $XBD2",
- [(set (tr.vt tr.op:$V1), (operator bdxaddr12only:$XBD2))]> {
+ [(operator (tr.vt tr.op:$V1), bdxaddr12only:$XBD2)]> {
let M3 = type;
let mayStore = 1;
let AccessBytes = bytes;
}
+class StoreVRXGeneric<string mnemonic, bits<16> opcode>
+ : InstVRX<opcode, (outs), (ins VR128:$V1, bdxaddr12only:$XBD2, imm32zx4:$M3),
+ mnemonic#"\t$V1, $XBD2, $M3", []> {
+ let mayStore = 1;
+}
+
+multiclass StoreVRXAlign<string mnemonic, bits<16> opcode> {
+ let mayStore = 1, AccessBytes = 16 in {
+ def Align : InstVRX<opcode, (outs),
+ (ins VR128:$V1, bdxaddr12only:$XBD2, imm32zx4:$M3),
+ mnemonic#"\t$V1, $XBD2, $M3", []>;
+ let M3 = 0 in
+ def "" : InstVRX<opcode, (outs), (ins VR128:$V1, bdxaddr12only:$XBD2),
+ mnemonic#"\t$V1, $XBD2", []>;
+ }
+}
+
class StoreLengthVRSb<string mnemonic, bits<16> opcode,
SDPatternOperator operator, bits<5> bytes>
: InstVRSb<opcode, (outs), (ins VR128:$V1, GR32:$R3, bdaddr12only:$BD2),
@@ -2527,11 +2566,16 @@ multiclass StoreMultipleRSPair<string mnemonic, bits<8> rsOpcode,
}
}
-class StoreMultipleVRSa<string mnemonic, bits<16> opcode>
- : InstVRSa<opcode, (outs), (ins VR128:$V1, VR128:$V3, bdaddr12only:$BD2),
- mnemonic#"\t$V1, $V3, $BD2", []> {
- let M4 = 0;
- let mayStore = 1;
+multiclass StoreMultipleVRSaAlign<string mnemonic, bits<16> opcode> {
+ let mayStore = 1 in {
+ def Align : InstVRSa<opcode, (outs), (ins VR128:$V1, VR128:$V3,
+ bdaddr12only:$BD2, imm32zx4:$M4),
+ mnemonic#"\t$V1, $V3, $BD2, $M4", []>;
+ let M4 = 0 in
+ def "" : InstVRSa<opcode, (outs), (ins VR128:$V1, VR128:$V3,
+ bdaddr12only:$BD2),
+ mnemonic#"\t$V1, $V3, $BD2", []>;
+ }
}
// StoreSI* instructions are used to store an integer to memory, but the
@@ -2925,6 +2969,17 @@ class UnaryVRXGeneric<string mnemonic, bits<16> opcode>
let mayLoad = 1;
}
+multiclass UnaryVRXAlign<string mnemonic, bits<16> opcode> {
+ let mayLoad = 1, AccessBytes = 16 in {
+ def Align : InstVRX<opcode, (outs VR128:$V1),
+ (ins bdxaddr12only:$XBD2, imm32zx4:$M3),
+ mnemonic#"\t$V1, $XBD2, $M3", []>;
+ let M3 = 0 in
+ def "" : InstVRX<opcode, (outs VR128:$V1), (ins bdxaddr12only:$XBD2),
+ mnemonic#"\t$V1, $XBD2", []>;
+ }
+}
+
class SideEffectBinaryRX<string mnemonic, bits<8> opcode,
RegisterOperand cls>
: InstRXa<opcode, (outs), (ins cls:$R1, bdxaddr12only:$XBD2),
@@ -3067,6 +3122,8 @@ class BinaryRRFa<string mnemonic, bits<16> opcode, SDPatternOperator operator,
mnemonic#"\t$R1, $R2, $R3",
[(set cls1:$R1, (operator cls2:$R2, cls3:$R3))]> {
let M4 = 0;
+ let OpKey = mnemonic#cls1;
+ let OpType = "reg";
}
multiclass BinaryRRAndK<string mnemonic, bits<8> opcode1, bits<16> opcode2,
@@ -3074,9 +3131,9 @@ multiclass BinaryRRAndK<string mnemonic, bits<8> opcode1, bits<16> opcode2,
RegisterOperand cls2> {
let NumOpsKey = mnemonic in {
let NumOpsValue = "3" in
- def K : BinaryRRFa<mnemonic#"k", opcode2, null_frag, cls1, cls1, cls2>,
+ def K : BinaryRRFa<mnemonic#"k", opcode2, operator, cls1, cls1, cls2>,
Requires<[FeatureDistinctOps]>;
- let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in
+ let NumOpsValue = "2" in
def "" : BinaryRR<mnemonic, opcode1, operator, cls1, cls2>;
}
}
@@ -3086,9 +3143,9 @@ multiclass BinaryRREAndK<string mnemonic, bits<16> opcode1, bits<16> opcode2,
RegisterOperand cls2> {
let NumOpsKey = mnemonic in {
let NumOpsValue = "3" in
- def K : BinaryRRFa<mnemonic#"k", opcode2, null_frag, cls1, cls1, cls2>,
+ def K : BinaryRRFa<mnemonic#"k", opcode2, operator, cls1, cls1, cls2>,
Requires<[FeatureDistinctOps]>;
- let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in
+ let NumOpsValue = "2" in
def "" : BinaryRRE<mnemonic, opcode1, operator, cls1, cls2>;
}
}
@@ -3102,6 +3159,11 @@ class BinaryRRFb<string mnemonic, bits<16> opcode, SDPatternOperator operator,
let M4 = 0;
}
+class BinaryRRFc<string mnemonic, bits<16> opcode,
+ RegisterOperand cls1, RegisterOperand cls2>
+ : InstRRFc<opcode, (outs cls1:$R1), (ins cls2:$R2, imm32zx4:$M3),
+ mnemonic#"\t$R1, $R2, $M3", []>;
+
class BinaryMemRRFc<string mnemonic, bits<16> opcode,
RegisterOperand cls1, RegisterOperand cls2, Immediate imm>
: InstRRFc<opcode, (outs cls2:$R2, cls1:$R1), (ins cls1:$R1src, imm:$M3),
@@ -3169,6 +3231,41 @@ multiclass CondBinaryRRFPair<string mnemonic, bits<16> opcode,
def Asm : AsmCondBinaryRRF<mnemonic, opcode, cls1, cls2>;
}
+class CondBinaryRRFa<string mnemonic, bits<16> opcode, RegisterOperand cls1,
+ RegisterOperand cls2, RegisterOperand cls3>
+ : InstRRFa<opcode, (outs cls1:$R1),
+ (ins cls3:$R3, cls2:$R2, cond4:$valid, cond4:$M4),
+ mnemonic#"$M4\t$R1, $R2, $R3",
+ [(set cls1:$R1, (z_select_ccmask cls2:$R2, cls3:$R3,
+ cond4:$valid, cond4:$M4))]> {
+ let CCMaskLast = 1;
+}
+
+// Like CondBinaryRRFa, but used for the raw assembly form. The condition-code
+// mask is the third operand rather than being part of the mnemonic.
+class AsmCondBinaryRRFa<string mnemonic, bits<16> opcode, RegisterOperand cls1,
+ RegisterOperand cls2, RegisterOperand cls3>
+ : InstRRFa<opcode, (outs cls1:$R1), (ins cls3:$R3, cls2:$R2, imm32zx4:$M4),
+ mnemonic#"\t$R1, $R2, $R3, $M4", []>;
+
+// Like CondBinaryRRFa, but with a fixed CC mask.
+class FixedCondBinaryRRFa<CondVariant V, string mnemonic, bits<16> opcode,
+ RegisterOperand cls1, RegisterOperand cls2,
+ RegisterOperand cls3>
+ : InstRRFa<opcode, (outs cls1:$R1), (ins cls3:$R3, cls2:$R2),
+ mnemonic#V.suffix#"\t$R1, $R2, $R3", []> {
+ let isAsmParserOnly = V.alternate;
+ let M4 = V.ccmask;
+}
+
+multiclass CondBinaryRRFaPair<string mnemonic, bits<16> opcode,
+ RegisterOperand cls1, RegisterOperand cls2,
+ RegisterOperand cls3> {
+ let isCodeGenOnly = 1 in
+ def "" : CondBinaryRRFa<mnemonic, opcode, cls1, cls2, cls3>;
+ def Asm : AsmCondBinaryRRFa<mnemonic, opcode, cls1, cls2, cls3>;
+}
+
class BinaryRI<string mnemonic, bits<12> opcode, SDPatternOperator operator,
RegisterOperand cls, Immediate imm>
: InstRIa<opcode, (outs cls:$R1), (ins cls:$R1src, imm:$I2),
@@ -3189,9 +3286,9 @@ multiclass BinaryRIAndK<string mnemonic, bits<12> opcode1, bits<16> opcode2,
Immediate imm> {
let NumOpsKey = mnemonic in {
let NumOpsValue = "3" in
- def K : BinaryRIE<mnemonic##"k", opcode2, null_frag, cls, imm>,
+ def K : BinaryRIE<mnemonic##"k", opcode2, operator, cls, imm>,
Requires<[FeatureDistinctOps]>;
- let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in
+ let NumOpsValue = "2" in
def "" : BinaryRI<mnemonic, opcode1, operator, cls, imm>;
}
}
@@ -3266,9 +3363,9 @@ multiclass BinaryRSAndK<string mnemonic, bits<8> opcode1, bits<16> opcode2,
SDPatternOperator operator, RegisterOperand cls> {
let NumOpsKey = mnemonic in {
let NumOpsValue = "3" in
- def K : BinaryRSY<mnemonic##"k", opcode2, null_frag, cls>,
+ def K : BinaryRSY<mnemonic##"k", opcode2, operator, cls>,
Requires<[FeatureDistinctOps]>;
- let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in
+ let NumOpsValue = "2" in
def "" : BinaryRS<mnemonic, opcode1, operator, cls>;
}
}
@@ -3563,7 +3660,9 @@ class BinaryVRRf<string mnemonic, bits<16> opcode, SDPatternOperator operator,
class BinaryVRRi<string mnemonic, bits<16> opcode, RegisterOperand cls>
: InstVRRi<opcode, (outs cls:$R1), (ins VR128:$V2, imm32zx4:$M3),
- mnemonic#"\t$R1, $V2, $M3", []>;
+ mnemonic#"\t$R1, $V2, $M3", []> {
+ let M4 = 0;
+}
class BinaryVRSa<string mnemonic, bits<16> opcode, SDPatternOperator operator,
TypedReg tr1, TypedReg tr2, bits<4> type>
@@ -3941,6 +4040,17 @@ class SideEffectTernaryRRFa<string mnemonic, bits<16> opcode,
let M4 = 0;
}
+class SideEffectTernaryMemMemRRFa<string mnemonic, bits<16> opcode,
+ RegisterOperand cls1, RegisterOperand cls2,
+ RegisterOperand cls3>
+ : InstRRFa<opcode, (outs cls1:$R1, cls2:$R2),
+ (ins cls1:$R1src, cls2:$R2src, cls3:$R3),
+ mnemonic#"\t$R1, $R2, $R3", []> {
+ let Constraints = "$R1 = $R1src, $R2 = $R2src";
+ let DisableEncoding = "$R1src, $R2src";
+ let M4 = 0;
+}
+
class SideEffectTernaryRRFb<string mnemonic, bits<16> opcode,
RegisterOperand cls1, RegisterOperand cls2,
RegisterOperand cls3>
@@ -4229,7 +4339,7 @@ class TernaryVRRcFloatGeneric<string mnemonic, bits<16> opcode>
mnemonic#"\t$V1, $V2, $V3, $M4, $M5, $M6", []>;
class TernaryVRRd<string mnemonic, bits<16> opcode, SDPatternOperator operator,
- TypedReg tr1, TypedReg tr2, bits<4> type = 0>
+ TypedReg tr1, TypedReg tr2, bits<4> type = 0, bits<4> m6 = 0>
: InstVRRd<opcode, (outs tr1.op:$V1),
(ins tr2.op:$V2, tr2.op:$V3, tr1.op:$V4),
mnemonic#"\t$V1, $V2, $V3, $V4",
@@ -4237,7 +4347,7 @@ class TernaryVRRd<string mnemonic, bits<16> opcode, SDPatternOperator operator,
(tr2.vt tr2.op:$V3),
(tr1.vt tr1.op:$V4)))]> {
let M5 = type;
- let M6 = 0;
+ let M6 = m6;
}
class TernaryVRRdGeneric<string mnemonic, bits<16> opcode>
@@ -4247,6 +4357,34 @@ class TernaryVRRdGeneric<string mnemonic, bits<16> opcode>
let M6 = 0;
}
+// Ternary operation where the assembler mnemonic has an extra operand to
+// optionally allow specifiying arbitrary M6 values.
+multiclass TernaryExtraVRRd<string mnemonic, bits<16> opcode,
+ SDPatternOperator operator,
+ TypedReg tr1, TypedReg tr2, bits<4> type> {
+ let M5 = type, Defs = [CC] in
+ def "" : InstVRRd<opcode, (outs tr1.op:$V1),
+ (ins tr2.op:$V2, tr2.op:$V3, tr1.op:$V4, imm32zx4:$M6),
+ mnemonic#"\t$V1, $V2, $V3, $V4, $M6", []>;
+ def : Pat<(operator (tr2.vt tr2.op:$V2), (tr2.vt tr2.op:$V3),
+ (tr1.vt tr1.op:$V4)),
+ (!cast<Instruction>(NAME) tr2.op:$V2, tr2.op:$V3, tr1.op:$V4, 0)>;
+ def : InstAlias<mnemonic#"\t$V1, $V2, $V3, $V4",
+ (!cast<Instruction>(NAME) tr1.op:$V1, tr2.op:$V2,
+ tr2.op:$V3, tr1.op:$V4, 0)>;
+}
+
+multiclass TernaryExtraVRRdGeneric<string mnemonic, bits<16> opcode> {
+ let Defs = [CC] in
+ def "" : InstVRRd<opcode, (outs VR128:$V1),
+ (ins VR128:$V2, VR128:$V3, VR128:$V4,
+ imm32zx4:$M5, imm32zx4:$M6),
+ mnemonic#"\t$V1, $V2, $V3, $V4, $M5, $M6", []>;
+ def : InstAlias<mnemonic#"\t$V1, $V2, $V3, $V4, $M5",
+ (!cast<Instruction>(NAME) VR128:$V1, VR128:$V2, VR128:$V3,
+ VR128:$V4, imm32zx4:$M5, 0)>;
+}
+
class TernaryVRRe<string mnemonic, bits<16> opcode, SDPatternOperator operator,
TypedReg tr1, TypedReg tr2, bits<4> m5 = 0, bits<4> type = 0>
: InstVRRe<opcode, (outs tr1.op:$V1),
@@ -4277,6 +4415,11 @@ class TernaryVRSb<string mnemonic, bits<16> opcode, SDPatternOperator operator,
let M4 = type;
}
+class TernaryVRRi<string mnemonic, bits<16> opcode, RegisterOperand cls>
+ : InstVRRi<opcode, (outs cls:$R1), (ins VR128:$V2,
+ imm32zx4:$M3, imm32zx4:$M4),
+ mnemonic#"\t$R1, $V2, $M3, $M4", []>;
+
class TernaryVRSbGeneric<string mnemonic, bits<16> opcode>
: InstVRSb<opcode, (outs VR128:$V1),
(ins VR128:$V1src, GR64:$R3, shift12only:$BD2, imm32zx4:$M4),
@@ -4594,14 +4737,31 @@ multiclass BinaryRIAndKPseudo<string key, SDPatternOperator operator,
RegisterOperand cls, Immediate imm> {
let NumOpsKey = key in {
let NumOpsValue = "3" in
- def K : BinaryRIEPseudo<null_frag, cls, imm>,
+ def K : BinaryRIEPseudo<operator, cls, imm>,
Requires<[FeatureHighWord, FeatureDistinctOps]>;
- let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in
+ let NumOpsValue = "2" in
def "" : BinaryRIPseudo<operator, cls, imm>,
Requires<[FeatureHighWord]>;
}
}
+// A pseudo that is used during register allocation when folding a memory
+// operand. The 3-address register instruction with a spilled source cannot
+// be converted directly to a target 2-address reg/mem instruction.
+// Mapping: <INSN>R -> MemFoldPseudo -> <INSN>
+class MemFoldPseudo<string mnemonic, RegisterOperand cls, bits<5> bytes,
+ AddressingMode mode>
+ : Pseudo<(outs cls:$R1), (ins cls:$R2, mode:$XBD2), []> {
+ let OpKey = mnemonic#"rk"#cls;
+ let OpType = "mem";
+ let MemKey = mnemonic#cls;
+ let MemType = "pseudo";
+ let mayLoad = 1;
+ let AccessBytes = bytes;
+ let HasIndex = 1;
+ let hasNoSchedulingInfo = 1;
+}
+
// Like CompareRI, but expanded after RA depending on the choice of register.
class CompareRIPseudo<SDPatternOperator operator, RegisterOperand cls,
Immediate imm>
@@ -4639,6 +4799,17 @@ class CondBinaryRRFPseudo<RegisterOperand cls1, RegisterOperand cls2>
let CCMaskLast = 1;
}
+// Like CondBinaryRRFa, but expanded after RA depending on the choice of
+// register.
+class CondBinaryRRFaPseudo<RegisterOperand cls1, RegisterOperand cls2,
+ RegisterOperand cls3>
+ : Pseudo<(outs cls1:$R1),
+ (ins cls3:$R3, cls2:$R2, cond4:$valid, cond4:$M4),
+ [(set cls1:$R1, (z_select_ccmask cls2:$R2, cls3:$R3,
+ cond4:$valid, cond4:$M4))]> {
+ let CCMaskLast = 1;
+}
+
// Like CondBinaryRIE, but expanded after RA depending on the choice of
// register.
class CondBinaryRIEPseudo<RegisterOperand cls, Immediate imm>
@@ -4776,58 +4947,6 @@ class AtomicLoadWBinaryReg<SDPatternOperator operator>
class AtomicLoadWBinaryImm<SDPatternOperator operator, Immediate imm>
: AtomicLoadWBinary<operator, (i32 imm:$src2), imm>;
-// Define an instruction that operates on two fixed-length blocks of memory,
-// and associated pseudo instructions for operating on blocks of any size.
-// The Sequence form uses a straight-line sequence of instructions and
-// the Loop form uses a loop of length-256 instructions followed by
-// another instruction to handle the excess.
-multiclass MemorySS<string mnemonic, bits<8> opcode,
- SDPatternOperator sequence, SDPatternOperator loop> {
- def "" : SideEffectBinarySSa<mnemonic, opcode>;
- let usesCustomInserter = 1, hasNoSchedulingInfo = 1, Defs = [CC] in {
- def Sequence : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src,
- imm64:$length),
- [(sequence bdaddr12only:$dest, bdaddr12only:$src,
- imm64:$length)]>;
- def Loop : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src,
- imm64:$length, GR64:$count256),
- [(loop bdaddr12only:$dest, bdaddr12only:$src,
- imm64:$length, GR64:$count256)]>;
- }
-}
-
-// The same, but setting a CC result as comparion operator.
-multiclass CompareMemorySS<string mnemonic, bits<8> opcode,
- SDPatternOperator sequence, SDPatternOperator loop> {
- def "" : SideEffectBinarySSa<mnemonic, opcode>;
- let usesCustomInserter = 1, hasNoSchedulingInfo = 1 in {
- def Sequence : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src,
- imm64:$length),
- [(set CC, (sequence bdaddr12only:$dest, bdaddr12only:$src,
- imm64:$length))]>;
- def Loop : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src,
- imm64:$length, GR64:$count256),
- [(set CC, (loop bdaddr12only:$dest, bdaddr12only:$src,
- imm64:$length, GR64:$count256))]>;
- }
-}
-
-// Define an instruction that operates on two strings, both terminated
-// by the character in R0. The instruction processes a CPU-determinated
-// number of bytes at a time and sets CC to 3 if the instruction needs
-// to be repeated. Also define a pseudo instruction that represents
-// the full loop (the main instruction plus the branch on CC==3).
-multiclass StringRRE<string mnemonic, bits<16> opcode,
- SDPatternOperator operator> {
- let Uses = [R0L] in
- def "" : SideEffectBinaryMemMemRRE<mnemonic, opcode, GR64, GR64>;
- let usesCustomInserter = 1, hasNoSchedulingInfo = 1 in
- def Loop : Pseudo<(outs GR64:$end),
- (ins GR64:$start1, GR64:$start2, GR32:$char),
- [(set GR64:$end, (operator GR64:$start1, GR64:$start2,
- GR32:$char))]>;
-}
-
// A pseudo instruction that is a direct alias of a real instruction.
// These aliases are used in cases where a particular register operand is
// fixed or where the same instruction is used with different register sizes.
@@ -4893,3 +5012,90 @@ class RotateSelectAliasRIEf<RegisterOperand cls1, RegisterOperand cls2>
imm32zx6:$I5), []> {
let Constraints = "$R1 = $R1src";
}
+
+//===----------------------------------------------------------------------===//
+// Multiclasses that emit both real and pseudo instructions
+//===----------------------------------------------------------------------===//
+
+multiclass BinaryRXYAndPseudo<string mnemonic, bits<16> opcode,
+ SDPatternOperator operator, RegisterOperand cls,
+ SDPatternOperator load, bits<5> bytes,
+ AddressingMode mode = bdxaddr20only> {
+
+ def "" : BinaryRXY<mnemonic, opcode, operator, cls, load, bytes, mode> {
+ let MemKey = mnemonic#cls;
+ let MemType = "target";
+ }
+ let Has20BitOffset = 1 in
+ def _MemFoldPseudo : MemFoldPseudo<mnemonic, cls, bytes, mode>;
+}
+
+multiclass BinaryRXPairAndPseudo<string mnemonic, bits<8> rxOpcode,
+ bits<16> rxyOpcode, SDPatternOperator operator,
+ RegisterOperand cls,
+ SDPatternOperator load, bits<5> bytes> {
+ let DispKey = mnemonic ## #cls in {
+ def "" : BinaryRX<mnemonic, rxOpcode, operator, cls, load, bytes,
+ bdxaddr12pair> {
+ let DispSize = "12";
+ let MemKey = mnemonic#cls;
+ let MemType = "target";
+ }
+ let DispSize = "20" in
+ def Y : BinaryRXY<mnemonic#"y", rxyOpcode, operator, cls, load,
+ bytes, bdxaddr20pair>;
+ }
+ def _MemFoldPseudo : MemFoldPseudo<mnemonic, cls, bytes, bdxaddr12pair>;
+}
+
+// Define an instruction that operates on two fixed-length blocks of memory,
+// and associated pseudo instructions for operating on blocks of any size.
+// The Sequence form uses a straight-line sequence of instructions and
+// the Loop form uses a loop of length-256 instructions followed by
+// another instruction to handle the excess.
+multiclass MemorySS<string mnemonic, bits<8> opcode,
+ SDPatternOperator sequence, SDPatternOperator loop> {
+ def "" : SideEffectBinarySSa<mnemonic, opcode>;
+ let usesCustomInserter = 1, hasNoSchedulingInfo = 1, Defs = [CC] in {
+ def Sequence : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src,
+ imm64:$length),
+ [(sequence bdaddr12only:$dest, bdaddr12only:$src,
+ imm64:$length)]>;
+ def Loop : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src,
+ imm64:$length, GR64:$count256),
+ [(loop bdaddr12only:$dest, bdaddr12only:$src,
+ imm64:$length, GR64:$count256)]>;
+ }
+}
+
+// The same, but setting a CC result as comparion operator.
+multiclass CompareMemorySS<string mnemonic, bits<8> opcode,
+ SDPatternOperator sequence, SDPatternOperator loop> {
+ def "" : SideEffectBinarySSa<mnemonic, opcode>;
+ let usesCustomInserter = 1, hasNoSchedulingInfo = 1 in {
+ def Sequence : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src,
+ imm64:$length),
+ [(set CC, (sequence bdaddr12only:$dest, bdaddr12only:$src,
+ imm64:$length))]>;
+ def Loop : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src,
+ imm64:$length, GR64:$count256),
+ [(set CC, (loop bdaddr12only:$dest, bdaddr12only:$src,
+ imm64:$length, GR64:$count256))]>;
+ }
+}
+
+// Define an instruction that operates on two strings, both terminated
+// by the character in R0. The instruction processes a CPU-determinated
+// number of bytes at a time and sets CC to 3 if the instruction needs
+// to be repeated. Also define a pseudo instruction that represents
+// the full loop (the main instruction plus the branch on CC==3).
+multiclass StringRRE<string mnemonic, bits<16> opcode,
+ SDPatternOperator operator> {
+ let Uses = [R0L] in
+ def "" : SideEffectBinaryMemMemRRE<mnemonic, opcode, GR64, GR64>;
+ let usesCustomInserter = 1, hasNoSchedulingInfo = 1 in
+ def Loop : Pseudo<(outs GR64:$end),
+ (ins GR64:$start1, GR64:$start2, GR32:$char),
+ [(set GR64:$end, (operator GR64:$start1, GR64:$start2,
+ GR32:$char))]>;
+}
diff --git a/lib/Target/SystemZ/SystemZInstrHFP.td b/lib/Target/SystemZ/SystemZInstrHFP.td
index 6d5b4b92f650..2e3c9932d621 100644
--- a/lib/Target/SystemZ/SystemZInstrHFP.td
+++ b/lib/Target/SystemZ/SystemZInstrHFP.td
@@ -1,9 +1,8 @@
//==- SystemZInstrHFP.td - Floating-point SystemZ instructions -*- tblgen-*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp
index b03b4edaa4ab..57c1cf4ec70a 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -1,9 +1,8 @@
//===-- SystemZInstrInfo.cpp - SystemZ instruction information ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -169,11 +168,13 @@ void SystemZInstrInfo::expandRIEPseudo(MachineInstr &MI, unsigned LowOpcode,
if (!DestIsHigh && !SrcIsHigh)
MI.setDesc(get(LowOpcodeK));
else {
- emitGRX32Move(*MI.getParent(), MI, MI.getDebugLoc(), DestReg, SrcReg,
- SystemZ::LR, 32, MI.getOperand(1).isKill(),
- MI.getOperand(1).isUndef());
+ if (DestReg != SrcReg) {
+ emitGRX32Move(*MI.getParent(), MI, MI.getDebugLoc(), DestReg, SrcReg,
+ SystemZ::LR, 32, MI.getOperand(1).isKill(),
+ MI.getOperand(1).isUndef());
+ MI.getOperand(1).setReg(DestReg);
+ }
MI.setDesc(get(DestIsHigh ? HighOpcode : LowOpcode));
- MI.getOperand(1).setReg(DestReg);
MI.tieOperands(0, 1);
}
}
@@ -222,6 +223,65 @@ void SystemZInstrInfo::expandLOCRPseudo(MachineInstr &MI, unsigned LowOpcode,
// correctly. This change is defered to the SystemZExpandPseudo pass.
}
+// MI is a select pseudo instruction. Replace it with LowOpcode if source
+// and destination are all low GR32s and HighOpcode if source and destination
+// are all high GR32s. Otherwise, use the two-operand MixedOpcode.
+void SystemZInstrInfo::expandSELRPseudo(MachineInstr &MI, unsigned LowOpcode,
+ unsigned HighOpcode,
+ unsigned MixedOpcode) const {
+ unsigned DestReg = MI.getOperand(0).getReg();
+ unsigned Src1Reg = MI.getOperand(1).getReg();
+ unsigned Src2Reg = MI.getOperand(2).getReg();
+ bool DestIsHigh = isHighReg(DestReg);
+ bool Src1IsHigh = isHighReg(Src1Reg);
+ bool Src2IsHigh = isHighReg(Src2Reg);
+
+ // If sources and destination aren't all high or all low, we may be able to
+ // simplify the operation by moving one of the sources to the destination
+ // first. But only if this doesn't clobber the other source.
+ if (DestReg != Src1Reg && DestReg != Src2Reg) {
+ if (DestIsHigh != Src1IsHigh) {
+ emitGRX32Move(*MI.getParent(), MI, MI.getDebugLoc(), DestReg, Src1Reg,
+ SystemZ::LR, 32, MI.getOperand(1).isKill(),
+ MI.getOperand(1).isUndef());
+ MI.getOperand(1).setReg(DestReg);
+ Src1Reg = DestReg;
+ Src1IsHigh = DestIsHigh;
+ } else if (DestIsHigh != Src2IsHigh) {
+ emitGRX32Move(*MI.getParent(), MI, MI.getDebugLoc(), DestReg, Src2Reg,
+ SystemZ::LR, 32, MI.getOperand(2).isKill(),
+ MI.getOperand(2).isUndef());
+ MI.getOperand(2).setReg(DestReg);
+ Src2Reg = DestReg;
+ Src2IsHigh = DestIsHigh;
+ }
+ }
+
+ // If the destination (now) matches one source, prefer this to be first.
+ if (DestReg != Src1Reg && DestReg == Src2Reg) {
+ commuteInstruction(MI, false, 1, 2);
+ std::swap(Src1Reg, Src2Reg);
+ std::swap(Src1IsHigh, Src2IsHigh);
+ }
+
+ if (!DestIsHigh && !Src1IsHigh && !Src2IsHigh)
+ MI.setDesc(get(LowOpcode));
+ else if (DestIsHigh && Src1IsHigh && Src2IsHigh)
+ MI.setDesc(get(HighOpcode));
+ else {
+ // Given the simplifcation above, we must already have a two-operand case.
+ assert (DestReg == Src1Reg);
+ MI.setDesc(get(MixedOpcode));
+ MI.tieOperands(0, 1);
+ LOCRMuxJumps++;
+ }
+
+ // If we were unable to implement the pseudo with a single instruction, we
+ // need to convert it back into a branch sequence. This cannot be done here
+ // since the caller of expandPostRAPseudo does not handle changes to the CFG
+ // correctly. This change is defered to the SystemZExpandPseudo pass.
+}
+
// MI is an RR-style pseudo instruction that zero-extends the low Size bits
// of one GRX32 into another. Replace it with LowOpcode if both operands
// are low registers, otherwise use RISB[LH]G.
@@ -311,6 +371,10 @@ MachineInstr *SystemZInstrInfo::commuteInstructionImpl(MachineInstr &MI,
};
switch (MI.getOpcode()) {
+ case SystemZ::SELRMux:
+ case SystemZ::SELFHR:
+ case SystemZ::SELR:
+ case SystemZ::SELGR:
case SystemZ::LOCRMux:
case SystemZ::LOCFHR:
case SystemZ::LOCR:
@@ -557,80 +621,6 @@ bool SystemZInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
return false;
}
-// If Reg is a virtual register, return its definition, otherwise return null.
-static MachineInstr *getDef(unsigned Reg,
- const MachineRegisterInfo *MRI) {
- if (TargetRegisterInfo::isPhysicalRegister(Reg))
- return nullptr;
- return MRI->getUniqueVRegDef(Reg);
-}
-
-// Return true if MI is a shift of type Opcode by Imm bits.
-static bool isShift(MachineInstr *MI, unsigned Opcode, int64_t Imm) {
- return (MI->getOpcode() == Opcode &&
- !MI->getOperand(2).getReg() &&
- MI->getOperand(3).getImm() == Imm);
-}
-
-// If the destination of MI has no uses, delete it as dead.
-static void eraseIfDead(MachineInstr *MI, const MachineRegisterInfo *MRI) {
- if (MRI->use_nodbg_empty(MI->getOperand(0).getReg()))
- MI->eraseFromParent();
-}
-
-// Compare compares SrcReg against zero. Check whether SrcReg contains
-// the result of an IPM sequence whose input CC survives until Compare,
-// and whether Compare is therefore redundant. Delete it and return
-// true if so.
-static bool removeIPMBasedCompare(MachineInstr &Compare, unsigned SrcReg,
- const MachineRegisterInfo *MRI,
- const TargetRegisterInfo *TRI) {
- MachineInstr *LGFR = nullptr;
- MachineInstr *RLL = getDef(SrcReg, MRI);
- if (RLL && RLL->getOpcode() == SystemZ::LGFR) {
- LGFR = RLL;
- RLL = getDef(LGFR->getOperand(1).getReg(), MRI);
- }
- if (!RLL || !isShift(RLL, SystemZ::RLL, 31))
- return false;
-
- MachineInstr *SRL = getDef(RLL->getOperand(1).getReg(), MRI);
- if (!SRL || !isShift(SRL, SystemZ::SRL, SystemZ::IPM_CC))
- return false;
-
- MachineInstr *IPM = getDef(SRL->getOperand(1).getReg(), MRI);
- if (!IPM || IPM->getOpcode() != SystemZ::IPM)
- return false;
-
- // Check that there are no assignments to CC between the IPM and Compare,
- if (IPM->getParent() != Compare.getParent())
- return false;
- MachineBasicBlock::iterator MBBI = IPM, MBBE = Compare.getIterator();
- for (++MBBI; MBBI != MBBE; ++MBBI) {
- MachineInstr &MI = *MBBI;
- if (MI.modifiesRegister(SystemZ::CC, TRI))
- return false;
- }
-
- Compare.eraseFromParent();
- if (LGFR)
- eraseIfDead(LGFR, MRI);
- eraseIfDead(RLL, MRI);
- eraseIfDead(SRL, MRI);
- eraseIfDead(IPM, MRI);
-
- return true;
-}
-
-bool SystemZInstrInfo::optimizeCompareInstr(
- MachineInstr &Compare, unsigned SrcReg, unsigned SrcReg2, int Mask,
- int Value, const MachineRegisterInfo *MRI) const {
- assert(!SrcReg2 && "Only optimizing constant comparisons so far");
- bool IsLogical = (Compare.getDesc().TSFlags & SystemZII::IsLogical) != 0;
- return Value == 0 && !IsLogical &&
- removeIPMBasedCompare(Compare, SrcReg, MRI, &RI);
-}
-
bool SystemZInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
ArrayRef<MachineOperand> Pred,
unsigned TrueReg, unsigned FalseReg,
@@ -679,7 +669,9 @@ void SystemZInstrInfo::insertSelect(MachineBasicBlock &MBB,
unsigned Opc;
if (SystemZ::GRX32BitRegClass.hasSubClassEq(RC)) {
- if (STI.hasLoadStoreOnCond2())
+ if (STI.hasMiscellaneousExtensions3())
+ Opc = SystemZ::SELRMux;
+ else if (STI.hasLoadStoreOnCond2())
Opc = SystemZ::LOCRMux;
else {
Opc = SystemZ::LOCR;
@@ -691,9 +683,12 @@ void SystemZInstrInfo::insertSelect(MachineBasicBlock &MBB,
TrueReg = TReg;
FalseReg = FReg;
}
- } else if (SystemZ::GR64BitRegClass.hasSubClassEq(RC))
- Opc = SystemZ::LOCGR;
- else
+ } else if (SystemZ::GR64BitRegClass.hasSubClassEq(RC)) {
+ if (STI.hasMiscellaneousExtensions3())
+ Opc = SystemZ::SELGR;
+ else
+ Opc = SystemZ::LOCGR;
+ } else
llvm_unreachable("Invalid register class");
BuildMI(MBB, I, DL, get(Opc), DstReg)
@@ -716,7 +711,11 @@ bool SystemZInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
unsigned NewUseOpc;
unsigned UseIdx;
int CommuteIdx = -1;
+ bool TieOps = false;
switch (UseOpc) {
+ case SystemZ::SELRMux:
+ TieOps = true;
+ LLVM_FALLTHROUGH;
case SystemZ::LOCRMux:
if (!STI.hasLoadStoreOnCond2())
return false;
@@ -728,6 +727,9 @@ bool SystemZInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
else
return false;
break;
+ case SystemZ::SELGR:
+ TieOps = true;
+ LLVM_FALLTHROUGH;
case SystemZ::LOCGR:
if (!STI.hasLoadStoreOnCond2())
return false;
@@ -749,6 +751,8 @@ bool SystemZInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
UseMI.setDesc(get(NewUseOpc));
+ if (TieOps)
+ UseMI.tieOperands(0, 1);
UseMI.getOperand(UseIdx).ChangeToImmediate(ImmVal);
if (DeleteDef)
DefMI.eraseFromParent();
@@ -1032,73 +1036,13 @@ static void transferDeadCC(MachineInstr *OldMI, MachineInstr *NewMI) {
}
}
-// Used to return from convertToThreeAddress after replacing two-address
-// instruction OldMI with three-address instruction NewMI.
-static MachineInstr *finishConvertToThreeAddress(MachineInstr *OldMI,
- MachineInstr *NewMI,
- LiveVariables *LV) {
- if (LV) {
- unsigned NumOps = OldMI->getNumOperands();
- for (unsigned I = 1; I < NumOps; ++I) {
- MachineOperand &Op = OldMI->getOperand(I);
- if (Op.isReg() && Op.isKill())
- LV->replaceKillInstruction(Op.getReg(), *OldMI, *NewMI);
- }
- }
- transferDeadCC(OldMI, NewMI);
- return NewMI;
-}
-
MachineInstr *SystemZInstrInfo::convertToThreeAddress(
MachineFunction::iterator &MFI, MachineInstr &MI, LiveVariables *LV) const {
MachineBasicBlock *MBB = MI.getParent();
- MachineFunction *MF = MBB->getParent();
- MachineRegisterInfo &MRI = MF->getRegInfo();
-
- unsigned Opcode = MI.getOpcode();
- unsigned NumOps = MI.getNumOperands();
-
- // Try to convert something like SLL into SLLK, if supported.
- // We prefer to keep the two-operand form where possible both
- // because it tends to be shorter and because some instructions
- // have memory forms that can be used during spilling.
- if (STI.hasDistinctOps()) {
- MachineOperand &Dest = MI.getOperand(0);
- MachineOperand &Src = MI.getOperand(1);
- unsigned DestReg = Dest.getReg();
- unsigned SrcReg = Src.getReg();
- // AHIMux is only really a three-operand instruction when both operands
- // are low registers. Try to constrain both operands to be low if
- // possible.
- if (Opcode == SystemZ::AHIMux &&
- TargetRegisterInfo::isVirtualRegister(DestReg) &&
- TargetRegisterInfo::isVirtualRegister(SrcReg) &&
- MRI.getRegClass(DestReg)->contains(SystemZ::R1L) &&
- MRI.getRegClass(SrcReg)->contains(SystemZ::R1L)) {
- MRI.constrainRegClass(DestReg, &SystemZ::GR32BitRegClass);
- MRI.constrainRegClass(SrcReg, &SystemZ::GR32BitRegClass);
- }
- int ThreeOperandOpcode = SystemZ::getThreeOperandOpcode(Opcode);
- if (ThreeOperandOpcode >= 0) {
- // Create three address instruction without adding the implicit
- // operands. Those will instead be copied over from the original
- // instruction by the loop below.
- MachineInstrBuilder MIB(
- *MF, MF->CreateMachineInstr(get(ThreeOperandOpcode), MI.getDebugLoc(),
- /*NoImplicit=*/true));
- MIB.add(Dest);
- // Keep the kill state, but drop the tied flag.
- MIB.addReg(Src.getReg(), getKillRegState(Src.isKill()), Src.getSubReg());
- // Keep the remaining operands as-is.
- for (unsigned I = 2; I < NumOps; ++I)
- MIB.add(MI.getOperand(I));
- MBB->insert(MI, MIB);
- return finishConvertToThreeAddress(&MI, MIB, LV);
- }
- }
// Try to convert an AND into an RISBG-type instruction.
- if (LogicOp And = interpretAndImmediate(Opcode)) {
+ // TODO: It might be beneficial to select RISBG and shorten to AND instead.
+ if (LogicOp And = interpretAndImmediate(MI.getOpcode())) {
uint64_t Imm = MI.getOperand(2).getImm() << And.ImmLSB;
// AND IMMEDIATE leaves the other bits of the register unchanged.
Imm |= allOnes(And.RegSize) & ~(allOnes(And.ImmSize) << And.ImmLSB);
@@ -1126,7 +1070,16 @@ MachineInstr *SystemZInstrInfo::convertToThreeAddress(
.addImm(Start)
.addImm(End + 128)
.addImm(0);
- return finishConvertToThreeAddress(&MI, MIB, LV);
+ if (LV) {
+ unsigned NumOps = MI.getNumOperands();
+ for (unsigned I = 1; I < NumOps; ++I) {
+ MachineOperand &Op = MI.getOperand(I);
+ if (Op.isReg() && Op.isKill())
+ LV->replaceKillInstruction(Op.getReg(), MI, *MIB);
+ }
+ }
+ transferDeadCC(&MI, MIB);
+ return MIB;
}
}
return nullptr;
@@ -1135,7 +1088,7 @@ MachineInstr *SystemZInstrInfo::convertToThreeAddress(
MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
MachineBasicBlock::iterator InsertPt, int FrameIndex,
- LiveIntervals *LIS) const {
+ LiveIntervals *LIS, VirtRegMap *VRM) const {
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
const MachineFrameInfo &MFI = MF.getFrameInfo();
unsigned Size = MFI.getObjectSize(FrameIndex);
@@ -1263,7 +1216,7 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
// MVCs that turn out to be redundant.
if (OpNum == 0 && MI.hasOneMemOperand()) {
MachineMemOperand *MMO = *MI.memoperands_begin();
- if (MMO->getSize() == Size && !MMO->isVolatile()) {
+ if (MMO->getSize() == Size && !MMO->isVolatile() && !MMO->isAtomic()) {
// Handle conversion of loads.
if (isSimpleBD12Move(&MI, SystemZII::SimpleBDXLoad)) {
return BuildMI(*InsertPt->getParent(), InsertPt, MI.getDebugLoc(),
@@ -1289,12 +1242,37 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
}
}
- // If the spilled operand is the final one, try to change <INSN>R
- // into <INSN>.
+ // If the spilled operand is the final one or the instruction is
+ // commutable, try to change <INSN>R into <INSN>.
+ unsigned NumOps = MI.getNumExplicitOperands();
int MemOpcode = SystemZ::getMemOpcode(Opcode);
+
+ // See if this is a 3-address instruction that is convertible to 2-address
+ // and suitable for folding below. Only try this with virtual registers
+ // and a provided VRM (during regalloc).
+ bool NeedsCommute = false;
+ if (SystemZ::getTwoOperandOpcode(Opcode) != -1 && MemOpcode != -1) {
+ if (VRM == nullptr)
+ MemOpcode = -1;
+ else {
+ assert(NumOps == 3 && "Expected two source registers.");
+ Register DstReg = MI.getOperand(0).getReg();
+ Register DstPhys =
+ (TRI->isVirtualRegister(DstReg) ? VRM->getPhys(DstReg) : DstReg);
+ Register SrcReg = (OpNum == 2 ? MI.getOperand(1).getReg()
+ : ((OpNum == 1 && MI.isCommutable())
+ ? MI.getOperand(2).getReg()
+ : Register()));
+ if (DstPhys && !SystemZ::GRH32BitRegClass.contains(DstPhys) && SrcReg &&
+ TRI->isVirtualRegister(SrcReg) && DstPhys == VRM->getPhys(SrcReg))
+ NeedsCommute = (OpNum == 1);
+ else
+ MemOpcode = -1;
+ }
+ }
+
if (MemOpcode >= 0) {
- unsigned NumOps = MI.getNumExplicitOperands();
- if (OpNum == NumOps - 1) {
+ if ((OpNum == NumOps - 1) || NeedsCommute) {
const MCInstrDesc &MemDesc = get(MemOpcode);
uint64_t AccessBytes = SystemZII::getAccessSize(MemDesc.TSFlags);
assert(AccessBytes != 0 && "Size of access should be known");
@@ -1302,8 +1280,12 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
uint64_t Offset = Size - AccessBytes;
MachineInstrBuilder MIB = BuildMI(*InsertPt->getParent(), InsertPt,
MI.getDebugLoc(), get(MemOpcode));
- for (unsigned I = 0; I < OpNum; ++I)
- MIB.add(MI.getOperand(I));
+ MIB.add(MI.getOperand(0));
+ if (NeedsCommute)
+ MIB.add(MI.getOperand(2));
+ else
+ for (unsigned I = 1; I < OpNum; ++I)
+ MIB.add(MI.getOperand(I));
MIB.addFrameIndex(FrameIndex).addImm(Offset);
if (MemDesc.TSFlags & SystemZII::HasIndex)
MIB.addReg(0);
@@ -1380,6 +1362,11 @@ bool SystemZInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
expandLOCRPseudo(MI, SystemZ::LOCR, SystemZ::LOCFHR);
return true;
+ case SystemZ::SELRMux:
+ expandSELRPseudo(MI, SystemZ::SELR, SystemZ::SELFHR,
+ SystemZ::LOCRMux);
+ return true;
+
case SystemZ::STCMux:
expandRXYPseudo(MI, SystemZ::STC, SystemZ::STCH);
return true;
@@ -1506,7 +1493,7 @@ bool SystemZInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
}
unsigned SystemZInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
- if (MI.getOpcode() == TargetOpcode::INLINEASM) {
+ if (MI.isInlineAsm()) {
const MachineFunction *MF = MI.getParent()->getParent();
const char *AsmStr = MI.getOperand(0).getSymbolName();
return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
@@ -1857,7 +1844,8 @@ void SystemZInstrInfo::loadImmediate(MachineBasicBlock &MBB,
}
bool SystemZInstrInfo::
-areMemAccessesTriviallyDisjoint(MachineInstr &MIa, MachineInstr &MIb,
+areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
+ const MachineInstr &MIb,
AliasAnalysis *AA) const {
if (!MIa.hasOneMemOperand() || !MIb.hasOneMemOperand())
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h
index 216139eb7c79..2edde175542e 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -1,9 +1,8 @@
//===-- SystemZInstrInfo.h - SystemZ instruction information ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -142,6 +141,11 @@ enum FusedCompareType {
} // end namespace SystemZII
+namespace SystemZ {
+int getTwoOperandOpcode(uint16_t Opcode);
+int getTargetMemOpcode(uint16_t Opcode);
+}
+
class SystemZInstrInfo : public SystemZGenInstrInfo {
const SystemZRegisterInfo RI;
SystemZSubtarget &STI;
@@ -158,6 +162,8 @@ class SystemZInstrInfo : public SystemZGenInstrInfo {
unsigned HighOpcode) const;
void expandLOCRPseudo(MachineInstr &MI, unsigned LowOpcode,
unsigned HighOpcode) const;
+ void expandSELRPseudo(MachineInstr &MI, unsigned LowOpcode,
+ unsigned HighOpcode, unsigned MixedOpcode) const;
void expandZExtPseudo(MachineInstr &MI, unsigned LowOpcode,
unsigned Size) const;
void expandLoadStackGuard(MachineInstr *MI) const;
@@ -208,9 +214,6 @@ public:
int *BytesAdded = nullptr) const override;
bool analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
unsigned &SrcReg2, int &Mask, int &Value) const override;
- bool optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
- unsigned SrcReg2, int Mask, int Value,
- const MachineRegisterInfo *MRI) const override;
bool canInsertSelect(const MachineBasicBlock&, ArrayRef<MachineOperand> Cond,
unsigned, unsigned, int&, int&, int&) const override;
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
@@ -252,7 +255,8 @@ public:
foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
ArrayRef<unsigned> Ops,
MachineBasicBlock::iterator InsertPt, int FrameIndex,
- LiveIntervals *LIS = nullptr) const override;
+ LiveIntervals *LIS = nullptr,
+ VirtRegMap *VRM = nullptr) const override;
MachineInstr *foldMemoryOperandImpl(
MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
MachineBasicBlock::iterator InsertPt, MachineInstr &LoadMI,
@@ -317,7 +321,8 @@ public:
// addresses. This function returns true if two MIs access different
// memory addresses and false otherwise.
bool
- areMemAccessesTriviallyDisjoint(MachineInstr &MIa, MachineInstr &MIb,
+ areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
+ const MachineInstr &MIb,
AliasAnalysis *AA = nullptr) const override;
};
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td
index 8d3b1011d0a7..91856893e3bd 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -1,9 +1,8 @@
//===-- SystemZInstrInfo.td - General SystemZ instructions ----*- tblgen-*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -256,7 +255,7 @@ let isCall = 1, Defs = [CC] in {
}
// Regular calls.
-let isCall = 1, Defs = [R14D, CC] in {
+let isCall = 1, Defs = [R14D, CC], Uses = [FPC] in {
def CallBRASL : Alias<6, (outs), (ins pcrel32:$I2, variable_ops),
[(z_call pcrel32:$I2)]>;
def CallBASR : Alias<2, (outs), (ins ADDR64:$R2, variable_ops),
@@ -362,9 +361,6 @@ defm CondStore64 : CondStores<GR64, nonvolatile_store,
//===----------------------------------------------------------------------===//
// Register moves.
-// Expands to LR, RISBHG or RISBLG, depending on the choice of registers.
-def LRMux : UnaryRRPseudo<"lr", null_frag, GRX32, GRX32>,
- Requires<[FeatureHighWord]>;
def LR : UnaryRR <"lr", 0x18, null_frag, GR32, GR32>;
def LGR : UnaryRRE<"lgr", 0xB904, null_frag, GR64, GR64>;
@@ -478,6 +474,11 @@ let mayLoad = 1, mayStore = 1, Defs = [CC] in {
def MVCLU : SideEffectTernaryMemMemRSY<"mvclu", 0xEB8E, GR128, GR128>;
}
+// Move right.
+let Predicates = [FeatureMiscellaneousExtensions3],
+ mayLoad = 1, mayStore = 1, Uses = [R0L] in
+ def MVCRL : SideEffectBinarySSE<"mvcrl", 0xE50A>;
+
// String moves.
let mayLoad = 1, mayStore = 1, Defs = [CC] in
defm MVST : StringRRE<"mvst", 0xB255, z_stpcpy>;
@@ -486,6 +487,29 @@ let mayLoad = 1, mayStore = 1, Defs = [CC] in
// Conditional move instructions
//===----------------------------------------------------------------------===//
+let Predicates = [FeatureMiscellaneousExtensions3], Uses = [CC] in {
+ // Select.
+ let isCommutable = 1 in {
+ // Expands to SELR or SELFHR or a branch-and-move sequence,
+ // depending on the choice of registers.
+ def SELRMux : CondBinaryRRFaPseudo<GRX32, GRX32, GRX32>;
+ defm SELFHR : CondBinaryRRFaPair<"selfhr", 0xB9C0, GRH32, GRH32, GRH32>;
+ defm SELR : CondBinaryRRFaPair<"selr", 0xB9F0, GR32, GR32, GR32>;
+ defm SELGR : CondBinaryRRFaPair<"selgr", 0xB9E3, GR64, GR64, GR64>;
+ }
+
+ // Define AsmParser extended mnemonics for each general condition-code mask.
+ foreach V = [ "E", "NE", "H", "NH", "L", "NL", "HE", "NHE", "LE", "NLE",
+ "Z", "NZ", "P", "NP", "M", "NM", "LH", "NLH", "O", "NO" ] in {
+ def SELRAsm#V : FixedCondBinaryRRFa<CV<V>, "selr", 0xB9F0,
+ GR32, GR32, GR32>;
+ def SELFHRAsm#V : FixedCondBinaryRRFa<CV<V>, "selfhr", 0xB9C0,
+ GRH32, GRH32, GRH32>;
+ def SELGRAsm#V : FixedCondBinaryRRFa<CV<V>, "selgr", 0xB9E3,
+ GR64, GR64, GR64>;
+ }
+}
+
let Predicates = [FeatureLoadStoreOnCond2], Uses = [CC] in {
// Load immediate on condition. Matched via DAG pattern and created
// by the PeepholeOptimizer via FoldImmediate.
@@ -920,11 +944,11 @@ let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8 in {
// Addition of memory.
defm AH : BinaryRXPair<"ah", 0x4A, 0xE37A, z_sadd, GR32, asextloadi16, 2>;
- defm A : BinaryRXPair<"a", 0x5A, 0xE35A, z_sadd, GR32, load, 4>;
+ defm A : BinaryRXPairAndPseudo<"a", 0x5A, 0xE35A, z_sadd, GR32, load, 4>;
def AGH : BinaryRXY<"agh", 0xE338, z_sadd, GR64, asextloadi16, 2>,
Requires<[FeatureMiscellaneousExtensions2]>;
def AGF : BinaryRXY<"agf", 0xE318, z_sadd, GR64, asextloadi32, 4>;
- def AG : BinaryRXY<"ag", 0xE308, z_sadd, GR64, load, 8>;
+ defm AG : BinaryRXYAndPseudo<"ag", 0xE308, z_sadd, GR64, load, 8>;
// Addition to memory.
def ASI : BinarySIY<"asi", 0xEB6A, add, imm32sx8>;
@@ -962,9 +986,9 @@ let Defs = [CC] in {
Requires<[FeatureHighWord]>;
// Addition of memory.
- defm AL : BinaryRXPair<"al", 0x5E, 0xE35E, z_uadd, GR32, load, 4>;
+ defm AL : BinaryRXPairAndPseudo<"al", 0x5E, 0xE35E, z_uadd, GR32, load, 4>;
def ALGF : BinaryRXY<"algf", 0xE31A, z_uadd, GR64, azextloadi32, 4>;
- def ALG : BinaryRXY<"alg", 0xE30A, z_uadd, GR64, load, 8>;
+ defm ALG : BinaryRXYAndPseudo<"alg", 0xE30A, z_uadd, GR64, load, 8>;
// Addition to memory.
def ALSI : BinarySIY<"alsi", 0xEB6E, null_frag, imm32sx8>;
@@ -1007,11 +1031,11 @@ let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8 in {
// Subtraction of memory.
defm SH : BinaryRXPair<"sh", 0x4B, 0xE37B, z_ssub, GR32, asextloadi16, 2>;
- defm S : BinaryRXPair<"s", 0x5B, 0xE35B, z_ssub, GR32, load, 4>;
+ defm S : BinaryRXPairAndPseudo<"s", 0x5B, 0xE35B, z_ssub, GR32, load, 4>;
def SGH : BinaryRXY<"sgh", 0xE339, z_ssub, GR64, asextloadi16, 2>,
Requires<[FeatureMiscellaneousExtensions2]>;
def SGF : BinaryRXY<"sgf", 0xE319, z_ssub, GR64, asextloadi32, 4>;
- def SG : BinaryRXY<"sg", 0xE309, z_ssub, GR64, load, 8>;
+ defm SG : BinaryRXYAndPseudo<"sg", 0xE309, z_ssub, GR64, load, 8>;
}
defm : SXB<z_ssub, GR64, SGFR>;
@@ -1033,6 +1057,14 @@ let AddedComplexity = 1 in {
(AGFI GR64:$src1, imm64sx32n:$src2)>;
}
+// And vice versa in one special case, where we need to load a
+// constant into a register in any case, but the negated constant
+// requires fewer instructions to load.
+def : Pat<(z_saddo GR64:$src1, imm64lh16n:$src2),
+ (SGR GR64:$src1, (LLILH imm64lh16n:$src2))>;
+def : Pat<(z_saddo GR64:$src1, imm64lf32n:$src2),
+ (SGR GR64:$src1, (LLILF imm64lf32n:$src2))>;
+
// Subtraction producing a carry.
let Defs = [CC] in {
// Subtraction of a register.
@@ -1051,9 +1083,9 @@ let Defs = [CC] in {
def SLGFI : BinaryRIL<"slgfi", 0xC24, z_usub, GR64, imm64zx32>;
// Subtraction of memory.
- defm SL : BinaryRXPair<"sl", 0x5F, 0xE35F, z_usub, GR32, load, 4>;
+ defm SL : BinaryRXPairAndPseudo<"sl", 0x5F, 0xE35F, z_usub, GR32, load, 4>;
def SLGF : BinaryRXY<"slgf", 0xE31B, z_usub, GR64, azextloadi32, 4>;
- def SLG : BinaryRXY<"slg", 0xE30B, z_usub, GR64, load, 8>;
+ defm SLG : BinaryRXYAndPseudo<"slg", 0xE30B, z_usub, GR64, load, 8>;
}
defm : ZXB<z_usub, GR64, SLGFR>;
@@ -1128,8 +1160,8 @@ let Defs = [CC] in {
// ANDs of memory.
let CCValues = 0xC, CompareZeroCCMask = 0x8 in {
- defm N : BinaryRXPair<"n", 0x54, 0xE354, and, GR32, load, 4>;
- def NG : BinaryRXY<"ng", 0xE380, and, GR64, load, 8>;
+ defm N : BinaryRXPairAndPseudo<"n", 0x54, 0xE354, and, GR32, load, 4>;
+ defm NG : BinaryRXYAndPseudo<"ng", 0xE380, and, GR64, load, 8>;
}
// AND to memory
@@ -1185,8 +1217,8 @@ let Defs = [CC] in {
// ORs of memory.
let CCValues = 0xC, CompareZeroCCMask = 0x8 in {
- defm O : BinaryRXPair<"o", 0x56, 0xE356, or, GR32, load, 4>;
- def OG : BinaryRXY<"og", 0xE381, or, GR64, load, 8>;
+ defm O : BinaryRXPairAndPseudo<"o", 0x56, 0xE356, or, GR32, load, 4>;
+ defm OG : BinaryRXYAndPseudo<"og", 0xE381, or, GR64, load, 8>;
}
// OR to memory
@@ -1225,8 +1257,8 @@ let Defs = [CC] in {
// XORs of memory.
let CCValues = 0xC, CompareZeroCCMask = 0x8 in {
- defm X : BinaryRXPair<"x",0x57, 0xE357, xor, GR32, load, 4>;
- def XG : BinaryRXY<"xg", 0xE382, xor, GR64, load, 8>;
+ defm X : BinaryRXPairAndPseudo<"x",0x57, 0xE357, xor, GR32, load, 4>;
+ defm XG : BinaryRXYAndPseudo<"xg", 0xE382, xor, GR64, load, 8>;
}
// XOR to memory
@@ -1240,6 +1272,43 @@ defm : RMWIByte<xor, bdaddr12pair, XI>;
defm : RMWIByte<xor, bdaddr20pair, XIY>;
//===----------------------------------------------------------------------===//
+// Combined logical operations
+//===----------------------------------------------------------------------===//
+
+let Predicates = [FeatureMiscellaneousExtensions3],
+ Defs = [CC] in {
+ // AND with complement.
+ let CCValues = 0xC, CompareZeroCCMask = 0x8 in {
+ def NCRK : BinaryRRFa<"ncrk", 0xB9F5, andc, GR32, GR32, GR32>;
+ def NCGRK : BinaryRRFa<"ncgrk", 0xB9E5, andc, GR64, GR64, GR64>;
+ }
+
+ // OR with complement.
+ let CCValues = 0xC, CompareZeroCCMask = 0x8 in {
+ def OCRK : BinaryRRFa<"ocrk", 0xB975, orc, GR32, GR32, GR32>;
+ def OCGRK : BinaryRRFa<"ocgrk", 0xB965, orc, GR64, GR64, GR64>;
+ }
+
+ // NAND.
+ let isCommutable = 1, CCValues = 0xC, CompareZeroCCMask = 0x8 in {
+ def NNRK : BinaryRRFa<"nnrk", 0xB974, nand, GR32, GR32, GR32>;
+ def NNGRK : BinaryRRFa<"nngrk", 0xB964, nand, GR64, GR64, GR64>;
+ }
+
+ // NOR.
+ let isCommutable = 1, CCValues = 0xC, CompareZeroCCMask = 0x8 in {
+ def NORK : BinaryRRFa<"nork", 0xB976, nor, GR32, GR32, GR32>;
+ def NOGRK : BinaryRRFa<"nogrk", 0xB966, nor, GR64, GR64, GR64>;
+ }
+
+ // NXOR.
+ let isCommutable = 1, CCValues = 0xC, CompareZeroCCMask = 0x8 in {
+ def NXRK : BinaryRRFa<"nxrk", 0xB977, nxor, GR32, GR32, GR32>;
+ def NXGRK : BinaryRRFa<"nxgrk", 0xB967, nxor, GR64, GR64, GR64>;
+ }
+}
+
+//===----------------------------------------------------------------------===//
// Multiplication
//===----------------------------------------------------------------------===//
@@ -1833,6 +1902,9 @@ let mayLoad = 1, mayStore = 1, Uses = [R0L, R1D], Defs = [CC] in {
let Predicates = [FeatureMessageSecurityAssist8] in
def KMA : SideEffectTernaryMemMemMemRRFb<"kma", 0xB929,
GR128, GR128, GR128>;
+
+ let Predicates = [FeatureMessageSecurityAssist9] in
+ def KDSA : SideEffectBinaryMemRRE<"kdsa", 0xB93A, GR64, GR128>;
}
//===----------------------------------------------------------------------===//
@@ -2013,7 +2085,12 @@ let Defs = [CC] in
def : Pat<(ctlz GR64:$src),
(EXTRACT_SUBREG (FLOGR GR64:$src), subreg_h64)>;
-// Population count. Counts bits set per byte.
+// Population count. Counts bits set per byte or doubleword.
+let Predicates = [FeatureMiscellaneousExtensions3] in {
+ let Defs = [CC] in
+ def POPCNTOpt : BinaryRRFc<"popcnt", 0xB9E1, GR64, GR64>;
+ def : Pat<(ctpop GR64:$src), (POPCNTOpt GR64:$src, 8)>;
+}
let Predicates = [FeaturePopulationCount], Defs = [CC] in
def POPCNT : UnaryRRE<"popcnt", 0xB9E1, z_popcnt, GR64, GR64>;
@@ -2044,6 +2121,17 @@ let mayLoad = 1, Defs = [CC] in
let mayLoad = 1, mayStore = 1, Defs = [CC, R1D], Uses = [R0L, R1D] in
def CMPSC : SideEffectBinaryMemMemRRE<"cmpsc", 0xB263, GR128, GR128>;
+// Sort lists.
+let Predicates = [FeatureEnhancedSort],
+ mayLoad = 1, mayStore = 1, Defs = [CC], Uses = [R0L, R1D] in
+ def SORTL : SideEffectBinaryMemMemRRE<"sortl", 0xB938, GR128, GR128>;
+
+// Deflate conversion call.
+let Predicates = [FeatureDeflateConversion],
+ mayLoad = 1, mayStore = 1, Defs = [CC], Uses = [R0L, R1D] in
+ def DFLTCC : SideEffectTernaryMemMemRRFa<"dfltcc", 0xB939,
+ GR128, GR128, GR64>;
+
// Execute.
let hasSideEffects = 1 in {
def EX : SideEffectBinaryRX<"ex", 0x44, GR64>;
@@ -2186,6 +2274,22 @@ let AddedComplexity = 4 in {
(RLLG GR64:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>;
}
+// Substitute (x*64-s) with (-s), since shift/rotate instructions only
+// use the last 6 bits of the second operand register (making it modulo 64).
+let AddedComplexity = 4 in {
+ def : Pat<(shl GR64:$val, (sub imm32mod64, GR32:$shift)),
+ (SLLG GR64:$val, (LCR GR32:$shift), 0)>;
+
+ def : Pat<(sra GR64:$val, (sub imm32mod64, GR32:$shift)),
+ (SRAG GR64:$val, (LCR GR32:$shift), 0)>;
+
+ def : Pat<(srl GR64:$val, (sub imm32mod64, GR32:$shift)),
+ (SRLG GR64:$val, (LCR GR32:$shift), 0)>;
+
+ def : Pat<(rotl GR64:$val, (sub imm32mod64, GR32:$shift)),
+ (RLLG GR64:$val, (LCR GR32:$shift), 0)>;
+}
+
// Peepholes for turning scalar operations into block operations.
defm : BlockLoadStore<anyextloadi8, i32, MVCSequence, NCSequence, OCSequence,
XCSequence, 1>;
diff --git a/lib/Target/SystemZ/SystemZInstrSystem.td b/lib/Target/SystemZ/SystemZInstrSystem.td
index c351577fa5bd..ecce16c9cd73 100644
--- a/lib/Target/SystemZ/SystemZInstrSystem.td
+++ b/lib/Target/SystemZ/SystemZInstrSystem.td
@@ -1,9 +1,8 @@
//==- SystemZInstrSystem.td - SystemZ system instructions -*- tblgen-*-----==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/SystemZ/SystemZInstrVector.td b/lib/Target/SystemZ/SystemZInstrVector.td
index 6c97b85277c3..261727f89058 100644
--- a/lib/Target/SystemZ/SystemZInstrVector.td
+++ b/lib/Target/SystemZ/SystemZInstrVector.td
@@ -1,9 +1,8 @@
//==- SystemZInstrVector.td - SystemZ Vector instructions ------*- tblgen-*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -104,7 +103,7 @@ let Predicates = [FeatureVector] in {
let Predicates = [FeatureVector] in {
// Load.
- def VL : UnaryVRX<"vl", 0xE706, null_frag, v128any, 16>;
+ defm VL : UnaryVRXAlign<"vl", 0xE706>;
// Load to block boundary. The number of loaded bytes is only known
// at run time. The instruction is really polymorphic, but v128b matches
@@ -123,7 +122,7 @@ let Predicates = [FeatureVector] in {
def VLL : BinaryVRSb<"vll", 0xE737, int_s390_vll, 0>;
// Load multiple.
- def VLM : LoadMultipleVRSa<"vlm", 0xE736>;
+ defm VLM : LoadMultipleVRSaAlign<"vlm", 0xE736>;
// Load and replicate
def VLREP : UnaryVRXGeneric<"vlrep", 0xE705>;
@@ -208,13 +207,13 @@ defm : ReplicatePeephole<VLREPG, v2f64, load, f64>;
let Predicates = [FeatureVector] in {
// Store.
- def VST : StoreVRX<"vst", 0xE70E, null_frag, v128any, 16>;
+ defm VST : StoreVRXAlign<"vst", 0xE70E>;
// Store with length. The number of stored bytes is only known at run time.
def VSTL : StoreLengthVRSb<"vstl", 0xE73F, int_s390_vstl, 0>;
// Store multiple.
- def VSTM : StoreMultipleVRSa<"vstm", 0xE73E>;
+ defm VSTM : StoreMultipleVRSaAlign<"vstm", 0xE73E>;
// Store element.
def VSTEB : StoreBinaryVRX<"vsteb", 0xE708, z_vstei8, v128b, 1, imm32zx4>;
@@ -250,6 +249,81 @@ let Predicates = [FeatureVectorPackedDecimal] in {
}
//===----------------------------------------------------------------------===//
+// Byte swaps
+//===----------------------------------------------------------------------===//
+
+let Predicates = [FeatureVectorEnhancements2] in {
+ // Load byte-reversed elements.
+ def VLBR : UnaryVRXGeneric<"vlbr", 0xE606>;
+ def VLBRH : UnaryVRX<"vlbrh", 0xE606, z_loadbswap, v128h, 16, 1>;
+ def VLBRF : UnaryVRX<"vlbrf", 0xE606, z_loadbswap, v128f, 16, 2>;
+ def VLBRG : UnaryVRX<"vlbrg", 0xE606, z_loadbswap, v128g, 16, 3>;
+ def VLBRQ : UnaryVRX<"vlbrq", 0xE606, null_frag, v128q, 16, 4>;
+
+ // Load elements reversed.
+ def VLER : UnaryVRXGeneric<"vler", 0xE607>;
+ def VLERH : UnaryVRX<"vlerh", 0xE607, z_loadeswap, v128h, 16, 1>;
+ def VLERF : UnaryVRX<"vlerf", 0xE607, z_loadeswap, v128f, 16, 2>;
+ def VLERG : UnaryVRX<"vlerg", 0xE607, z_loadeswap, v128g, 16, 3>;
+ def : Pat<(v4f32 (z_loadeswap bdxaddr12only:$addr)),
+ (VLERF bdxaddr12only:$addr)>;
+ def : Pat<(v2f64 (z_loadeswap bdxaddr12only:$addr)),
+ (VLERG bdxaddr12only:$addr)>;
+ def : Pat<(v16i8 (z_loadeswap bdxaddr12only:$addr)),
+ (VLBRQ bdxaddr12only:$addr)>;
+
+ // Load byte-reversed element.
+ def VLEBRH : TernaryVRX<"vlebrh", 0xE601, z_vlebri16, v128h, v128h, 2, imm32zx3>;
+ def VLEBRF : TernaryVRX<"vlebrf", 0xE603, z_vlebri32, v128f, v128f, 4, imm32zx2>;
+ def VLEBRG : TernaryVRX<"vlebrg", 0xE602, z_vlebri64, v128g, v128g, 8, imm32zx1>;
+
+ // Load byte-reversed element and zero.
+ def VLLEBRZ : UnaryVRXGeneric<"vllebrz", 0xE604>;
+ def VLLEBRZH : UnaryVRX<"vllebrzh", 0xE604, z_vllebrzi16, v128h, 2, 1>;
+ def VLLEBRZF : UnaryVRX<"vllebrzf", 0xE604, z_vllebrzi32, v128f, 4, 2>;
+ def VLLEBRZG : UnaryVRX<"vllebrzg", 0xE604, z_vllebrzi64, v128g, 8, 3>;
+ def VLLEBRZE : UnaryVRX<"vllebrze", 0xE604, z_vllebrzli32, v128f, 4, 6>;
+ def : InstAlias<"lerv\t$V1, $XBD2",
+ (VLLEBRZE VR128:$V1, bdxaddr12only:$XBD2), 0>;
+ def : InstAlias<"ldrv\t$V1, $XBD2",
+ (VLLEBRZG VR128:$V1, bdxaddr12only:$XBD2), 0>;
+
+ // Load byte-reversed element and replicate.
+ def VLBRREP : UnaryVRXGeneric<"vlbrrep", 0xE605>;
+ def VLBRREPH : UnaryVRX<"vlbrreph", 0xE605, z_replicate_loadbswapi16, v128h, 2, 1>;
+ def VLBRREPF : UnaryVRX<"vlbrrepf", 0xE605, z_replicate_loadbswapi32, v128f, 4, 2>;
+ def VLBRREPG : UnaryVRX<"vlbrrepg", 0xE605, z_replicate_loadbswapi64, v128g, 8, 3>;
+
+ // Store byte-reversed elements.
+ def VSTBR : StoreVRXGeneric<"vstbr", 0xE60E>;
+ def VSTBRH : StoreVRX<"vstbrh", 0xE60E, z_storebswap, v128h, 16, 1>;
+ def VSTBRF : StoreVRX<"vstbrf", 0xE60E, z_storebswap, v128f, 16, 2>;
+ def VSTBRG : StoreVRX<"vstbrg", 0xE60E, z_storebswap, v128g, 16, 3>;
+ def VSTBRQ : StoreVRX<"vstbrq", 0xE60E, null_frag, v128q, 16, 4>;
+
+ // Store elements reversed.
+ def VSTER : StoreVRXGeneric<"vster", 0xE60F>;
+ def VSTERH : StoreVRX<"vsterh", 0xE60F, z_storeeswap, v128h, 16, 1>;
+ def VSTERF : StoreVRX<"vsterf", 0xE60F, z_storeeswap, v128f, 16, 2>;
+ def VSTERG : StoreVRX<"vsterg", 0xE60F, z_storeeswap, v128g, 16, 3>;
+ def : Pat<(z_storeeswap (v4f32 VR128:$val), bdxaddr12only:$addr),
+ (VSTERF VR128:$val, bdxaddr12only:$addr)>;
+ def : Pat<(z_storeeswap (v2f64 VR128:$val), bdxaddr12only:$addr),
+ (VSTERG VR128:$val, bdxaddr12only:$addr)>;
+ def : Pat<(z_storeeswap (v16i8 VR128:$val), bdxaddr12only:$addr),
+ (VSTBRQ VR128:$val, bdxaddr12only:$addr)>;
+
+ // Store byte-reversed element.
+ def VSTEBRH : StoreBinaryVRX<"vstebrh", 0xE609, z_vstebri16, v128h, 2, imm32zx3>;
+ def VSTEBRF : StoreBinaryVRX<"vstebrf", 0xE60B, z_vstebri32, v128f, 4, imm32zx2>;
+ def VSTEBRG : StoreBinaryVRX<"vstebrg", 0xE60A, z_vstebri64, v128g, 8, imm32zx1>;
+ def : InstAlias<"sterv\t$V1, $XBD2",
+ (VSTEBRF VR128:$V1, bdxaddr12only:$XBD2, 0), 0>;
+ def : InstAlias<"stdrv\t$V1, $XBD2",
+ (VSTEBRG VR128:$V1, bdxaddr12only:$XBD2, 0), 0>;
+}
+
+//===----------------------------------------------------------------------===//
// Selects and permutes
//===----------------------------------------------------------------------===//
@@ -707,6 +781,10 @@ let Predicates = [FeatureVector] in {
def : Pat<(int_s390_vsldb VR128:$x, VR128:$y, imm32zx8:$z),
(VSLDB VR128:$x, VR128:$y, imm32zx8:$z)>;
+ // Shift left double by bit.
+ let Predicates = [FeatureVectorEnhancements2] in
+ def VSLD : TernaryVRId<"vsld", 0xE786, int_s390_vsld, v128b, v128b, 0>;
+
// Shift right arithmetic.
def VSRA : BinaryVRRc<"vsra", 0xE77E, int_s390_vsra, v128b, v128b>;
@@ -719,6 +797,10 @@ let Predicates = [FeatureVector] in {
// Shift right logical by byte.
def VSRLB : BinaryVRRc<"vsrlb", 0xE77D, int_s390_vsrlb, v128b, v128b>;
+ // Shift right double by bit.
+ let Predicates = [FeatureVectorEnhancements2] in
+ def VSRD : TernaryVRId<"vsrd", 0xE787, int_s390_vsrd, v128b, v128b, 0>;
+
// Subtract.
def VS : BinaryVRRcGeneric<"vs", 0xE7F7>;
def VSB : BinaryVRRc<"vsb", 0xE7F7, sub, v128b, v128b, 0>;
@@ -925,126 +1007,190 @@ let Predicates = [FeatureVector] in {
// See comments in SystemZInstrFP.td for the suppression flags and
// rounding modes.
multiclass VectorRounding<Instruction insn, TypedReg tr> {
- def : FPConversion<insn, frint, tr, tr, 0, 0>;
- def : FPConversion<insn, fnearbyint, tr, tr, 4, 0>;
- def : FPConversion<insn, ffloor, tr, tr, 4, 7>;
- def : FPConversion<insn, fceil, tr, tr, 4, 6>;
- def : FPConversion<insn, ftrunc, tr, tr, 4, 5>;
- def : FPConversion<insn, fround, tr, tr, 4, 1>;
+ def : FPConversion<insn, any_frint, tr, tr, 0, 0>;
+ def : FPConversion<insn, any_fnearbyint, tr, tr, 4, 0>;
+ def : FPConversion<insn, any_ffloor, tr, tr, 4, 7>;
+ def : FPConversion<insn, any_fceil, tr, tr, 4, 6>;
+ def : FPConversion<insn, any_ftrunc, tr, tr, 4, 5>;
+ def : FPConversion<insn, any_fround, tr, tr, 4, 1>;
}
let Predicates = [FeatureVector] in {
// Add.
- def VFA : BinaryVRRcFloatGeneric<"vfa", 0xE7E3>;
- def VFADB : BinaryVRRc<"vfadb", 0xE7E3, fadd, v128db, v128db, 3, 0>;
- def WFADB : BinaryVRRc<"wfadb", 0xE7E3, fadd, v64db, v64db, 3, 8>;
- let Predicates = [FeatureVectorEnhancements1] in {
- def VFASB : BinaryVRRc<"vfasb", 0xE7E3, fadd, v128sb, v128sb, 2, 0>;
- def WFASB : BinaryVRRc<"wfasb", 0xE7E3, fadd, v32sb, v32sb, 2, 8>;
- def WFAXB : BinaryVRRc<"wfaxb", 0xE7E3, fadd, v128xb, v128xb, 4, 8>;
+ let Uses = [FPC], mayRaiseFPException = 1 in {
+ def VFA : BinaryVRRcFloatGeneric<"vfa", 0xE7E3>;
+ def VFADB : BinaryVRRc<"vfadb", 0xE7E3, any_fadd, v128db, v128db, 3, 0>;
+ def WFADB : BinaryVRRc<"wfadb", 0xE7E3, any_fadd, v64db, v64db, 3, 8>;
+ let Predicates = [FeatureVectorEnhancements1] in {
+ def VFASB : BinaryVRRc<"vfasb", 0xE7E3, any_fadd, v128sb, v128sb, 2, 0>;
+ def WFASB : BinaryVRRc<"wfasb", 0xE7E3, any_fadd, v32sb, v32sb, 2, 8>;
+ def WFAXB : BinaryVRRc<"wfaxb", 0xE7E3, any_fadd, v128xb, v128xb, 4, 8>;
+ }
}
- // Convert from fixed 64-bit.
- def VCDG : TernaryVRRaFloatGeneric<"vcdg", 0xE7C3>;
- def VCDGB : TernaryVRRa<"vcdgb", 0xE7C3, null_frag, v128db, v128g, 3, 0>;
- def WCDGB : TernaryVRRa<"wcdgb", 0xE7C3, null_frag, v64db, v64g, 3, 8>;
+ // Convert from fixed.
+ let Uses = [FPC], mayRaiseFPException = 1 in {
+ def VCDG : TernaryVRRaFloatGeneric<"vcdg", 0xE7C3>;
+ def VCDGB : TernaryVRRa<"vcdgb", 0xE7C3, null_frag, v128db, v128g, 3, 0>;
+ def WCDGB : TernaryVRRa<"wcdgb", 0xE7C3, null_frag, v64db, v64g, 3, 8>;
+ }
def : FPConversion<VCDGB, sint_to_fp, v128db, v128g, 0, 0>;
+ let Predicates = [FeatureVectorEnhancements2] in {
+ let Uses = [FPC], mayRaiseFPException = 1 in {
+ let isAsmParserOnly = 1 in
+ def VCFPS : TernaryVRRaFloatGeneric<"vcfps", 0xE7C3>;
+ def VCEFB : TernaryVRRa<"vcefb", 0xE7C3, null_frag, v128sb, v128g, 2, 0>;
+ def WCEFB : TernaryVRRa<"wcefb", 0xE7C3, null_frag, v32sb, v32f, 2, 8>;
+ }
+ def : FPConversion<VCEFB, sint_to_fp, v128sb, v128f, 0, 0>;
+ }
- // Convert from logical 64-bit.
- def VCDLG : TernaryVRRaFloatGeneric<"vcdlg", 0xE7C1>;
- def VCDLGB : TernaryVRRa<"vcdlgb", 0xE7C1, null_frag, v128db, v128g, 3, 0>;
- def WCDLGB : TernaryVRRa<"wcdlgb", 0xE7C1, null_frag, v64db, v64g, 3, 8>;
+ // Convert from logical.
+ let Uses = [FPC], mayRaiseFPException = 1 in {
+ def VCDLG : TernaryVRRaFloatGeneric<"vcdlg", 0xE7C1>;
+ def VCDLGB : TernaryVRRa<"vcdlgb", 0xE7C1, null_frag, v128db, v128g, 3, 0>;
+ def WCDLGB : TernaryVRRa<"wcdlgb", 0xE7C1, null_frag, v64db, v64g, 3, 8>;
+ }
def : FPConversion<VCDLGB, uint_to_fp, v128db, v128g, 0, 0>;
+ let Predicates = [FeatureVectorEnhancements2] in {
+ let Uses = [FPC], mayRaiseFPException = 1 in {
+ let isAsmParserOnly = 1 in
+ def VCFPL : TernaryVRRaFloatGeneric<"vcfpl", 0xE7C1>;
+ def VCELFB : TernaryVRRa<"vcelfb", 0xE7C1, null_frag, v128sb, v128g, 2, 0>;
+ def WCELFB : TernaryVRRa<"wcelfb", 0xE7C1, null_frag, v32sb, v32f, 2, 8>;
+ }
+ def : FPConversion<VCELFB, uint_to_fp, v128sb, v128f, 0, 0>;
+ }
- // Convert to fixed 64-bit.
- def VCGD : TernaryVRRaFloatGeneric<"vcgd", 0xE7C2>;
- def VCGDB : TernaryVRRa<"vcgdb", 0xE7C2, null_frag, v128g, v128db, 3, 0>;
- def WCGDB : TernaryVRRa<"wcgdb", 0xE7C2, null_frag, v64g, v64db, 3, 8>;
+ // Convert to fixed.
+ let Uses = [FPC], mayRaiseFPException = 1 in {
+ def VCGD : TernaryVRRaFloatGeneric<"vcgd", 0xE7C2>;
+ def VCGDB : TernaryVRRa<"vcgdb", 0xE7C2, null_frag, v128g, v128db, 3, 0>;
+ def WCGDB : TernaryVRRa<"wcgdb", 0xE7C2, null_frag, v64g, v64db, 3, 8>;
+ }
// Rounding mode should agree with SystemZInstrFP.td.
def : FPConversion<VCGDB, fp_to_sint, v128g, v128db, 0, 5>;
+ let Predicates = [FeatureVectorEnhancements2] in {
+ let Uses = [FPC], mayRaiseFPException = 1 in {
+ let isAsmParserOnly = 1 in
+ def VCSFP : TernaryVRRaFloatGeneric<"vcsfp", 0xE7C2>;
+ def VCFEB : TernaryVRRa<"vcfeb", 0xE7C2, null_frag, v128sb, v128g, 2, 0>;
+ def WCFEB : TernaryVRRa<"wcfeb", 0xE7C2, null_frag, v32sb, v32f, 2, 8>;
+ }
+ // Rounding mode should agree with SystemZInstrFP.td.
+ def : FPConversion<VCFEB, fp_to_sint, v128f, v128sb, 0, 5>;
+ }
- // Convert to logical 64-bit.
- def VCLGD : TernaryVRRaFloatGeneric<"vclgd", 0xE7C0>;
- def VCLGDB : TernaryVRRa<"vclgdb", 0xE7C0, null_frag, v128g, v128db, 3, 0>;
- def WCLGDB : TernaryVRRa<"wclgdb", 0xE7C0, null_frag, v64g, v64db, 3, 8>;
+ // Convert to logical.
+ let Uses = [FPC], mayRaiseFPException = 1 in {
+ def VCLGD : TernaryVRRaFloatGeneric<"vclgd", 0xE7C0>;
+ def VCLGDB : TernaryVRRa<"vclgdb", 0xE7C0, null_frag, v128g, v128db, 3, 0>;
+ def WCLGDB : TernaryVRRa<"wclgdb", 0xE7C0, null_frag, v64g, v64db, 3, 8>;
+ }
// Rounding mode should agree with SystemZInstrFP.td.
def : FPConversion<VCLGDB, fp_to_uint, v128g, v128db, 0, 5>;
+ let Predicates = [FeatureVectorEnhancements2] in {
+ let Uses = [FPC], mayRaiseFPException = 1 in {
+ let isAsmParserOnly = 1 in
+ def VCLFP : TernaryVRRaFloatGeneric<"vclfp", 0xE7C0>;
+ def VCLFEB : TernaryVRRa<"vclfeb", 0xE7C0, null_frag, v128sb, v128g, 2, 0>;
+ def WCLFEB : TernaryVRRa<"wclfeb", 0xE7C0, null_frag, v32sb, v32f, 2, 8>;
+ }
+ // Rounding mode should agree with SystemZInstrFP.td.
+ def : FPConversion<VCLFEB, fp_to_uint, v128f, v128sb, 0, 5>;
+ }
// Divide.
- def VFD : BinaryVRRcFloatGeneric<"vfd", 0xE7E5>;
- def VFDDB : BinaryVRRc<"vfddb", 0xE7E5, fdiv, v128db, v128db, 3, 0>;
- def WFDDB : BinaryVRRc<"wfddb", 0xE7E5, fdiv, v64db, v64db, 3, 8>;
- let Predicates = [FeatureVectorEnhancements1] in {
- def VFDSB : BinaryVRRc<"vfdsb", 0xE7E5, fdiv, v128sb, v128sb, 2, 0>;
- def WFDSB : BinaryVRRc<"wfdsb", 0xE7E5, fdiv, v32sb, v32sb, 2, 8>;
- def WFDXB : BinaryVRRc<"wfdxb", 0xE7E5, fdiv, v128xb, v128xb, 4, 8>;
+ let Uses = [FPC], mayRaiseFPException = 1 in {
+ def VFD : BinaryVRRcFloatGeneric<"vfd", 0xE7E5>;
+ def VFDDB : BinaryVRRc<"vfddb", 0xE7E5, any_fdiv, v128db, v128db, 3, 0>;
+ def WFDDB : BinaryVRRc<"wfddb", 0xE7E5, any_fdiv, v64db, v64db, 3, 8>;
+ let Predicates = [FeatureVectorEnhancements1] in {
+ def VFDSB : BinaryVRRc<"vfdsb", 0xE7E5, any_fdiv, v128sb, v128sb, 2, 0>;
+ def WFDSB : BinaryVRRc<"wfdsb", 0xE7E5, any_fdiv, v32sb, v32sb, 2, 8>;
+ def WFDXB : BinaryVRRc<"wfdxb", 0xE7E5, any_fdiv, v128xb, v128xb, 4, 8>;
+ }
}
// Load FP integer.
- def VFI : TernaryVRRaFloatGeneric<"vfi", 0xE7C7>;
- def VFIDB : TernaryVRRa<"vfidb", 0xE7C7, int_s390_vfidb, v128db, v128db, 3, 0>;
- def WFIDB : TernaryVRRa<"wfidb", 0xE7C7, null_frag, v64db, v64db, 3, 8>;
+ let Uses = [FPC], mayRaiseFPException = 1 in {
+ def VFI : TernaryVRRaFloatGeneric<"vfi", 0xE7C7>;
+ def VFIDB : TernaryVRRa<"vfidb", 0xE7C7, int_s390_vfidb, v128db, v128db, 3, 0>;
+ def WFIDB : TernaryVRRa<"wfidb", 0xE7C7, null_frag, v64db, v64db, 3, 8>;
+ }
defm : VectorRounding<VFIDB, v128db>;
defm : VectorRounding<WFIDB, v64db>;
let Predicates = [FeatureVectorEnhancements1] in {
- def VFISB : TernaryVRRa<"vfisb", 0xE7C7, int_s390_vfisb, v128sb, v128sb, 2, 0>;
- def WFISB : TernaryVRRa<"wfisb", 0xE7C7, null_frag, v32sb, v32sb, 2, 8>;
- def WFIXB : TernaryVRRa<"wfixb", 0xE7C7, null_frag, v128xb, v128xb, 4, 8>;
+ let Uses = [FPC], mayRaiseFPException = 1 in {
+ def VFISB : TernaryVRRa<"vfisb", 0xE7C7, int_s390_vfisb, v128sb, v128sb, 2, 0>;
+ def WFISB : TernaryVRRa<"wfisb", 0xE7C7, null_frag, v32sb, v32sb, 2, 8>;
+ def WFIXB : TernaryVRRa<"wfixb", 0xE7C7, null_frag, v128xb, v128xb, 4, 8>;
+ }
defm : VectorRounding<VFISB, v128sb>;
defm : VectorRounding<WFISB, v32sb>;
defm : VectorRounding<WFIXB, v128xb>;
}
// Load lengthened.
- def VLDE : UnaryVRRaFloatGeneric<"vlde", 0xE7C4>;
- def VLDEB : UnaryVRRa<"vldeb", 0xE7C4, z_vextend, v128db, v128sb, 2, 0>;
- def WLDEB : UnaryVRRa<"wldeb", 0xE7C4, fpextend, v64db, v32sb, 2, 8>;
+ let Uses = [FPC], mayRaiseFPException = 1 in {
+ def VLDE : UnaryVRRaFloatGeneric<"vlde", 0xE7C4>;
+ def VLDEB : UnaryVRRa<"vldeb", 0xE7C4, z_vextend, v128db, v128sb, 2, 0>;
+ def WLDEB : UnaryVRRa<"wldeb", 0xE7C4, any_fpextend, v64db, v32sb, 2, 8>;
+ }
let Predicates = [FeatureVectorEnhancements1] in {
- let isAsmParserOnly = 1 in {
- def VFLL : UnaryVRRaFloatGeneric<"vfll", 0xE7C4>;
- def VFLLS : UnaryVRRa<"vflls", 0xE7C4, null_frag, v128db, v128sb, 2, 0>;
- def WFLLS : UnaryVRRa<"wflls", 0xE7C4, null_frag, v64db, v32sb, 2, 8>;
+ let Uses = [FPC], mayRaiseFPException = 1 in {
+ let isAsmParserOnly = 1 in {
+ def VFLL : UnaryVRRaFloatGeneric<"vfll", 0xE7C4>;
+ def VFLLS : UnaryVRRa<"vflls", 0xE7C4, null_frag, v128db, v128sb, 2, 0>;
+ def WFLLS : UnaryVRRa<"wflls", 0xE7C4, null_frag, v64db, v32sb, 2, 8>;
+ }
+ def WFLLD : UnaryVRRa<"wflld", 0xE7C4, any_fpextend, v128xb, v64db, 3, 8>;
}
- def WFLLD : UnaryVRRa<"wflld", 0xE7C4, fpextend, v128xb, v64db, 3, 8>;
- def : Pat<(f128 (fpextend (f32 VR32:$src))),
+ def : Pat<(f128 (any_fpextend (f32 VR32:$src))),
(WFLLD (WLDEB VR32:$src))>;
}
// Load rounded.
- def VLED : TernaryVRRaFloatGeneric<"vled", 0xE7C5>;
- def VLEDB : TernaryVRRa<"vledb", 0xE7C5, null_frag, v128sb, v128db, 3, 0>;
- def WLEDB : TernaryVRRa<"wledb", 0xE7C5, null_frag, v32sb, v64db, 3, 8>;
+ let Uses = [FPC], mayRaiseFPException = 1 in {
+ def VLED : TernaryVRRaFloatGeneric<"vled", 0xE7C5>;
+ def VLEDB : TernaryVRRa<"vledb", 0xE7C5, null_frag, v128sb, v128db, 3, 0>;
+ def WLEDB : TernaryVRRa<"wledb", 0xE7C5, null_frag, v32sb, v64db, 3, 8>;
+ }
def : Pat<(v4f32 (z_vround (v2f64 VR128:$src))), (VLEDB VR128:$src, 0, 0)>;
- def : FPConversion<WLEDB, fpround, v32sb, v64db, 0, 0>;
+ def : FPConversion<WLEDB, any_fpround, v32sb, v64db, 0, 0>;
let Predicates = [FeatureVectorEnhancements1] in {
- let isAsmParserOnly = 1 in {
- def VFLR : TernaryVRRaFloatGeneric<"vflr", 0xE7C5>;
- def VFLRD : TernaryVRRa<"vflrd", 0xE7C5, null_frag, v128sb, v128db, 3, 0>;
- def WFLRD : TernaryVRRa<"wflrd", 0xE7C5, null_frag, v32sb, v64db, 3, 8>;
+ let Uses = [FPC], mayRaiseFPException = 1 in {
+ let isAsmParserOnly = 1 in {
+ def VFLR : TernaryVRRaFloatGeneric<"vflr", 0xE7C5>;
+ def VFLRD : TernaryVRRa<"vflrd", 0xE7C5, null_frag, v128sb, v128db, 3, 0>;
+ def WFLRD : TernaryVRRa<"wflrd", 0xE7C5, null_frag, v32sb, v64db, 3, 8>;
+ }
+ def WFLRX : TernaryVRRa<"wflrx", 0xE7C5, null_frag, v64db, v128xb, 4, 8>;
}
- def WFLRX : TernaryVRRa<"wflrx", 0xE7C5, null_frag, v64db, v128xb, 4, 8>;
- def : FPConversion<WFLRX, fpround, v64db, v128xb, 0, 0>;
- def : Pat<(f32 (fpround (f128 VR128:$src))),
+ def : FPConversion<WFLRX, any_fpround, v64db, v128xb, 0, 0>;
+ def : Pat<(f32 (any_fpround (f128 VR128:$src))),
(WLEDB (WFLRX VR128:$src, 0, 3), 0, 0)>;
}
// Maximum.
multiclass VectorMax<Instruction insn, TypedReg tr> {
- def : FPMinMax<insn, fmaxnum, tr, 4>;
+ def : FPMinMax<insn, any_fmaxnum, tr, 4>;
def : FPMinMax<insn, fmaximum, tr, 1>;
}
let Predicates = [FeatureVectorEnhancements1] in {
- def VFMAX : TernaryVRRcFloatGeneric<"vfmax", 0xE7EF>;
- def VFMAXDB : TernaryVRRcFloat<"vfmaxdb", 0xE7EF, int_s390_vfmaxdb,
- v128db, v128db, 3, 0>;
- def WFMAXDB : TernaryVRRcFloat<"wfmaxdb", 0xE7EF, null_frag,
- v64db, v64db, 3, 8>;
- def VFMAXSB : TernaryVRRcFloat<"vfmaxsb", 0xE7EF, int_s390_vfmaxsb,
- v128sb, v128sb, 2, 0>;
- def WFMAXSB : TernaryVRRcFloat<"wfmaxsb", 0xE7EF, null_frag,
- v32sb, v32sb, 2, 8>;
- def WFMAXXB : TernaryVRRcFloat<"wfmaxxb", 0xE7EF, null_frag,
- v128xb, v128xb, 4, 8>;
+ let Uses = [FPC], mayRaiseFPException = 1 in {
+ def VFMAX : TernaryVRRcFloatGeneric<"vfmax", 0xE7EF>;
+ def VFMAXDB : TernaryVRRcFloat<"vfmaxdb", 0xE7EF, int_s390_vfmaxdb,
+ v128db, v128db, 3, 0>;
+ def WFMAXDB : TernaryVRRcFloat<"wfmaxdb", 0xE7EF, null_frag,
+ v64db, v64db, 3, 8>;
+ def VFMAXSB : TernaryVRRcFloat<"vfmaxsb", 0xE7EF, int_s390_vfmaxsb,
+ v128sb, v128sb, 2, 0>;
+ def WFMAXSB : TernaryVRRcFloat<"wfmaxsb", 0xE7EF, null_frag,
+ v32sb, v32sb, 2, 8>;
+ def WFMAXXB : TernaryVRRcFloat<"wfmaxxb", 0xE7EF, null_frag,
+ v128xb, v128xb, 4, 8>;
+ }
defm : VectorMax<VFMAXDB, v128db>;
defm : VectorMax<WFMAXDB, v64db>;
defm : VectorMax<VFMAXSB, v128sb>;
@@ -1054,21 +1200,23 @@ let Predicates = [FeatureVector] in {
// Minimum.
multiclass VectorMin<Instruction insn, TypedReg tr> {
- def : FPMinMax<insn, fminnum, tr, 4>;
+ def : FPMinMax<insn, any_fminnum, tr, 4>;
def : FPMinMax<insn, fminimum, tr, 1>;
}
let Predicates = [FeatureVectorEnhancements1] in {
- def VFMIN : TernaryVRRcFloatGeneric<"vfmin", 0xE7EE>;
- def VFMINDB : TernaryVRRcFloat<"vfmindb", 0xE7EE, int_s390_vfmindb,
- v128db, v128db, 3, 0>;
- def WFMINDB : TernaryVRRcFloat<"wfmindb", 0xE7EE, null_frag,
- v64db, v64db, 3, 8>;
- def VFMINSB : TernaryVRRcFloat<"vfminsb", 0xE7EE, int_s390_vfminsb,
- v128sb, v128sb, 2, 0>;
- def WFMINSB : TernaryVRRcFloat<"wfminsb", 0xE7EE, null_frag,
- v32sb, v32sb, 2, 8>;
- def WFMINXB : TernaryVRRcFloat<"wfminxb", 0xE7EE, null_frag,
- v128xb, v128xb, 4, 8>;
+ let Uses = [FPC], mayRaiseFPException = 1 in {
+ def VFMIN : TernaryVRRcFloatGeneric<"vfmin", 0xE7EE>;
+ def VFMINDB : TernaryVRRcFloat<"vfmindb", 0xE7EE, int_s390_vfmindb,
+ v128db, v128db, 3, 0>;
+ def WFMINDB : TernaryVRRcFloat<"wfmindb", 0xE7EE, null_frag,
+ v64db, v64db, 3, 8>;
+ def VFMINSB : TernaryVRRcFloat<"vfminsb", 0xE7EE, int_s390_vfminsb,
+ v128sb, v128sb, 2, 0>;
+ def WFMINSB : TernaryVRRcFloat<"wfminsb", 0xE7EE, null_frag,
+ v32sb, v32sb, 2, 8>;
+ def WFMINXB : TernaryVRRcFloat<"wfminxb", 0xE7EE, null_frag,
+ v128xb, v128xb, 4, 8>;
+ }
defm : VectorMin<VFMINDB, v128db>;
defm : VectorMin<WFMINDB, v64db>;
defm : VectorMin<VFMINSB, v128sb>;
@@ -1077,53 +1225,61 @@ let Predicates = [FeatureVector] in {
}
// Multiply.
- def VFM : BinaryVRRcFloatGeneric<"vfm", 0xE7E7>;
- def VFMDB : BinaryVRRc<"vfmdb", 0xE7E7, fmul, v128db, v128db, 3, 0>;
- def WFMDB : BinaryVRRc<"wfmdb", 0xE7E7, fmul, v64db, v64db, 3, 8>;
- let Predicates = [FeatureVectorEnhancements1] in {
- def VFMSB : BinaryVRRc<"vfmsb", 0xE7E7, fmul, v128sb, v128sb, 2, 0>;
- def WFMSB : BinaryVRRc<"wfmsb", 0xE7E7, fmul, v32sb, v32sb, 2, 8>;
- def WFMXB : BinaryVRRc<"wfmxb", 0xE7E7, fmul, v128xb, v128xb, 4, 8>;
+ let Uses = [FPC], mayRaiseFPException = 1 in {
+ def VFM : BinaryVRRcFloatGeneric<"vfm", 0xE7E7>;
+ def VFMDB : BinaryVRRc<"vfmdb", 0xE7E7, any_fmul, v128db, v128db, 3, 0>;
+ def WFMDB : BinaryVRRc<"wfmdb", 0xE7E7, any_fmul, v64db, v64db, 3, 8>;
+ let Predicates = [FeatureVectorEnhancements1] in {
+ def VFMSB : BinaryVRRc<"vfmsb", 0xE7E7, any_fmul, v128sb, v128sb, 2, 0>;
+ def WFMSB : BinaryVRRc<"wfmsb", 0xE7E7, any_fmul, v32sb, v32sb, 2, 8>;
+ def WFMXB : BinaryVRRc<"wfmxb", 0xE7E7, any_fmul, v128xb, v128xb, 4, 8>;
+ }
}
// Multiply and add.
- def VFMA : TernaryVRReFloatGeneric<"vfma", 0xE78F>;
- def VFMADB : TernaryVRRe<"vfmadb", 0xE78F, fma, v128db, v128db, 0, 3>;
- def WFMADB : TernaryVRRe<"wfmadb", 0xE78F, fma, v64db, v64db, 8, 3>;
- let Predicates = [FeatureVectorEnhancements1] in {
- def VFMASB : TernaryVRRe<"vfmasb", 0xE78F, fma, v128sb, v128sb, 0, 2>;
- def WFMASB : TernaryVRRe<"wfmasb", 0xE78F, fma, v32sb, v32sb, 8, 2>;
- def WFMAXB : TernaryVRRe<"wfmaxb", 0xE78F, fma, v128xb, v128xb, 8, 4>;
+ let Uses = [FPC], mayRaiseFPException = 1 in {
+ def VFMA : TernaryVRReFloatGeneric<"vfma", 0xE78F>;
+ def VFMADB : TernaryVRRe<"vfmadb", 0xE78F, any_fma, v128db, v128db, 0, 3>;
+ def WFMADB : TernaryVRRe<"wfmadb", 0xE78F, any_fma, v64db, v64db, 8, 3>;
+ let Predicates = [FeatureVectorEnhancements1] in {
+ def VFMASB : TernaryVRRe<"vfmasb", 0xE78F, any_fma, v128sb, v128sb, 0, 2>;
+ def WFMASB : TernaryVRRe<"wfmasb", 0xE78F, any_fma, v32sb, v32sb, 8, 2>;
+ def WFMAXB : TernaryVRRe<"wfmaxb", 0xE78F, any_fma, v128xb, v128xb, 8, 4>;
+ }
}
// Multiply and subtract.
- def VFMS : TernaryVRReFloatGeneric<"vfms", 0xE78E>;
- def VFMSDB : TernaryVRRe<"vfmsdb", 0xE78E, fms, v128db, v128db, 0, 3>;
- def WFMSDB : TernaryVRRe<"wfmsdb", 0xE78E, fms, v64db, v64db, 8, 3>;
- let Predicates = [FeatureVectorEnhancements1] in {
- def VFMSSB : TernaryVRRe<"vfmssb", 0xE78E, fms, v128sb, v128sb, 0, 2>;
- def WFMSSB : TernaryVRRe<"wfmssb", 0xE78E, fms, v32sb, v32sb, 8, 2>;
- def WFMSXB : TernaryVRRe<"wfmsxb", 0xE78E, fms, v128xb, v128xb, 8, 4>;
+ let Uses = [FPC], mayRaiseFPException = 1 in {
+ def VFMS : TernaryVRReFloatGeneric<"vfms", 0xE78E>;
+ def VFMSDB : TernaryVRRe<"vfmsdb", 0xE78E, any_fms, v128db, v128db, 0, 3>;
+ def WFMSDB : TernaryVRRe<"wfmsdb", 0xE78E, any_fms, v64db, v64db, 8, 3>;
+ let Predicates = [FeatureVectorEnhancements1] in {
+ def VFMSSB : TernaryVRRe<"vfmssb", 0xE78E, any_fms, v128sb, v128sb, 0, 2>;
+ def WFMSSB : TernaryVRRe<"wfmssb", 0xE78E, any_fms, v32sb, v32sb, 8, 2>;
+ def WFMSXB : TernaryVRRe<"wfmsxb", 0xE78E, any_fms, v128xb, v128xb, 8, 4>;
+ }
}
// Negative multiply and add.
- let Predicates = [FeatureVectorEnhancements1] in {
+ let Uses = [FPC], mayRaiseFPException = 1,
+ Predicates = [FeatureVectorEnhancements1] in {
def VFNMA : TernaryVRReFloatGeneric<"vfnma", 0xE79F>;
- def VFNMADB : TernaryVRRe<"vfnmadb", 0xE79F, fnma, v128db, v128db, 0, 3>;
- def WFNMADB : TernaryVRRe<"wfnmadb", 0xE79F, fnma, v64db, v64db, 8, 3>;
- def VFNMASB : TernaryVRRe<"vfnmasb", 0xE79F, fnma, v128sb, v128sb, 0, 2>;
- def WFNMASB : TernaryVRRe<"wfnmasb", 0xE79F, fnma, v32sb, v32sb, 8, 2>;
- def WFNMAXB : TernaryVRRe<"wfnmaxb", 0xE79F, fnma, v128xb, v128xb, 8, 4>;
+ def VFNMADB : TernaryVRRe<"vfnmadb", 0xE79F, any_fnma, v128db, v128db, 0, 3>;
+ def WFNMADB : TernaryVRRe<"wfnmadb", 0xE79F, any_fnma, v64db, v64db, 8, 3>;
+ def VFNMASB : TernaryVRRe<"vfnmasb", 0xE79F, any_fnma, v128sb, v128sb, 0, 2>;
+ def WFNMASB : TernaryVRRe<"wfnmasb", 0xE79F, any_fnma, v32sb, v32sb, 8, 2>;
+ def WFNMAXB : TernaryVRRe<"wfnmaxb", 0xE79F, any_fnma, v128xb, v128xb, 8, 4>;
}
// Negative multiply and subtract.
- let Predicates = [FeatureVectorEnhancements1] in {
+ let Uses = [FPC], mayRaiseFPException = 1,
+ Predicates = [FeatureVectorEnhancements1] in {
def VFNMS : TernaryVRReFloatGeneric<"vfnms", 0xE79E>;
- def VFNMSDB : TernaryVRRe<"vfnmsdb", 0xE79E, fnms, v128db, v128db, 0, 3>;
- def WFNMSDB : TernaryVRRe<"wfnmsdb", 0xE79E, fnms, v64db, v64db, 8, 3>;
- def VFNMSSB : TernaryVRRe<"vfnmssb", 0xE79E, fnms, v128sb, v128sb, 0, 2>;
- def WFNMSSB : TernaryVRRe<"wfnmssb", 0xE79E, fnms, v32sb, v32sb, 8, 2>;
- def WFNMSXB : TernaryVRRe<"wfnmsxb", 0xE79E, fnms, v128xb, v128xb, 8, 4>;
+ def VFNMSDB : TernaryVRRe<"vfnmsdb", 0xE79E, any_fnms, v128db, v128db, 0, 3>;
+ def WFNMSDB : TernaryVRRe<"wfnmsdb", 0xE79E, any_fnms, v64db, v64db, 8, 3>;
+ def VFNMSSB : TernaryVRRe<"vfnmssb", 0xE79E, any_fnms, v128sb, v128sb, 0, 2>;
+ def WFNMSSB : TernaryVRRe<"wfnmssb", 0xE79E, any_fnms, v32sb, v32sb, 8, 2>;
+ def WFNMSXB : TernaryVRRe<"wfnmsxb", 0xE79E, any_fnms, v128xb, v128xb, 8, 4>;
}
// Perform sign operation.
@@ -1164,23 +1320,27 @@ let Predicates = [FeatureVector] in {
}
// Square root.
- def VFSQ : UnaryVRRaFloatGeneric<"vfsq", 0xE7CE>;
- def VFSQDB : UnaryVRRa<"vfsqdb", 0xE7CE, fsqrt, v128db, v128db, 3, 0>;
- def WFSQDB : UnaryVRRa<"wfsqdb", 0xE7CE, fsqrt, v64db, v64db, 3, 8>;
- let Predicates = [FeatureVectorEnhancements1] in {
- def VFSQSB : UnaryVRRa<"vfsqsb", 0xE7CE, fsqrt, v128sb, v128sb, 2, 0>;
- def WFSQSB : UnaryVRRa<"wfsqsb", 0xE7CE, fsqrt, v32sb, v32sb, 2, 8>;
- def WFSQXB : UnaryVRRa<"wfsqxb", 0xE7CE, fsqrt, v128xb, v128xb, 4, 8>;
+ let Uses = [FPC], mayRaiseFPException = 1 in {
+ def VFSQ : UnaryVRRaFloatGeneric<"vfsq", 0xE7CE>;
+ def VFSQDB : UnaryVRRa<"vfsqdb", 0xE7CE, any_fsqrt, v128db, v128db, 3, 0>;
+ def WFSQDB : UnaryVRRa<"wfsqdb", 0xE7CE, any_fsqrt, v64db, v64db, 3, 8>;
+ let Predicates = [FeatureVectorEnhancements1] in {
+ def VFSQSB : UnaryVRRa<"vfsqsb", 0xE7CE, any_fsqrt, v128sb, v128sb, 2, 0>;
+ def WFSQSB : UnaryVRRa<"wfsqsb", 0xE7CE, any_fsqrt, v32sb, v32sb, 2, 8>;
+ def WFSQXB : UnaryVRRa<"wfsqxb", 0xE7CE, any_fsqrt, v128xb, v128xb, 4, 8>;
+ }
}
// Subtract.
- def VFS : BinaryVRRcFloatGeneric<"vfs", 0xE7E2>;
- def VFSDB : BinaryVRRc<"vfsdb", 0xE7E2, fsub, v128db, v128db, 3, 0>;
- def WFSDB : BinaryVRRc<"wfsdb", 0xE7E2, fsub, v64db, v64db, 3, 8>;
- let Predicates = [FeatureVectorEnhancements1] in {
- def VFSSB : BinaryVRRc<"vfssb", 0xE7E2, fsub, v128sb, v128sb, 2, 0>;
- def WFSSB : BinaryVRRc<"wfssb", 0xE7E2, fsub, v32sb, v32sb, 2, 8>;
- def WFSXB : BinaryVRRc<"wfsxb", 0xE7E2, fsub, v128xb, v128xb, 4, 8>;
+ let Uses = [FPC], mayRaiseFPException = 1 in {
+ def VFS : BinaryVRRcFloatGeneric<"vfs", 0xE7E2>;
+ def VFSDB : BinaryVRRc<"vfsdb", 0xE7E2, any_fsub, v128db, v128db, 3, 0>;
+ def WFSDB : BinaryVRRc<"wfsdb", 0xE7E2, any_fsub, v64db, v64db, 3, 8>;
+ let Predicates = [FeatureVectorEnhancements1] in {
+ def VFSSB : BinaryVRRc<"vfssb", 0xE7E2, any_fsub, v128sb, v128sb, 2, 0>;
+ def WFSSB : BinaryVRRc<"wfssb", 0xE7E2, any_fsub, v32sb, v32sb, 2, 8>;
+ def WFSXB : BinaryVRRc<"wfsxb", 0xE7E2, any_fsub, v128xb, v128xb, 4, 8>;
+ }
}
// Test data class immediate.
@@ -1202,7 +1362,7 @@ let Predicates = [FeatureVector] in {
let Predicates = [FeatureVector] in {
// Compare scalar.
- let Defs = [CC] in {
+ let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC] in {
def WFC : CompareVRRaFloatGeneric<"wfc", 0xE7CB>;
def WFCDB : CompareVRRa<"wfcdb", 0xE7CB, z_fcmp, v64db, 3>;
let Predicates = [FeatureVectorEnhancements1] in {
@@ -1212,7 +1372,7 @@ let Predicates = [FeatureVector] in {
}
// Compare and signal scalar.
- let Defs = [CC] in {
+ let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC] in {
def WFK : CompareVRRaFloatGeneric<"wfk", 0xE7CA>;
def WFKDB : CompareVRRa<"wfkdb", 0xE7CA, null_frag, v64db, 3>;
let Predicates = [FeatureVectorEnhancements1] in {
@@ -1222,22 +1382,25 @@ let Predicates = [FeatureVector] in {
}
// Compare equal.
- def VFCE : BinaryVRRcSPairFloatGeneric<"vfce", 0xE7E8>;
- defm VFCEDB : BinaryVRRcSPair<"vfcedb", 0xE7E8, z_vfcmpe, z_vfcmpes,
- v128g, v128db, 3, 0>;
- defm WFCEDB : BinaryVRRcSPair<"wfcedb", 0xE7E8, null_frag, null_frag,
- v64g, v64db, 3, 8>;
- let Predicates = [FeatureVectorEnhancements1] in {
- defm VFCESB : BinaryVRRcSPair<"vfcesb", 0xE7E8, z_vfcmpe, z_vfcmpes,
- v128f, v128sb, 2, 0>;
- defm WFCESB : BinaryVRRcSPair<"wfcesb", 0xE7E8, null_frag, null_frag,
- v32f, v32sb, 2, 8>;
- defm WFCEXB : BinaryVRRcSPair<"wfcexb", 0xE7E8, null_frag, null_frag,
- v128q, v128xb, 4, 8>;
+ let Uses = [FPC], mayRaiseFPException = 1 in {
+ def VFCE : BinaryVRRcSPairFloatGeneric<"vfce", 0xE7E8>;
+ defm VFCEDB : BinaryVRRcSPair<"vfcedb", 0xE7E8, z_vfcmpe, z_vfcmpes,
+ v128g, v128db, 3, 0>;
+ defm WFCEDB : BinaryVRRcSPair<"wfcedb", 0xE7E8, null_frag, null_frag,
+ v64g, v64db, 3, 8>;
+ let Predicates = [FeatureVectorEnhancements1] in {
+ defm VFCESB : BinaryVRRcSPair<"vfcesb", 0xE7E8, z_vfcmpe, z_vfcmpes,
+ v128f, v128sb, 2, 0>;
+ defm WFCESB : BinaryVRRcSPair<"wfcesb", 0xE7E8, null_frag, null_frag,
+ v32f, v32sb, 2, 8>;
+ defm WFCEXB : BinaryVRRcSPair<"wfcexb", 0xE7E8, null_frag, null_frag,
+ v128q, v128xb, 4, 8>;
+ }
}
// Compare and signal equal.
- let Predicates = [FeatureVectorEnhancements1] in {
+ let Uses = [FPC], mayRaiseFPException = 1,
+ Predicates = [FeatureVectorEnhancements1] in {
defm VFKEDB : BinaryVRRcSPair<"vfkedb", 0xE7E8, null_frag, null_frag,
v128g, v128db, 3, 4>;
defm WFKEDB : BinaryVRRcSPair<"wfkedb", 0xE7E8, null_frag, null_frag,
@@ -1251,22 +1414,25 @@ let Predicates = [FeatureVector] in {
}
// Compare high.
- def VFCH : BinaryVRRcSPairFloatGeneric<"vfch", 0xE7EB>;
- defm VFCHDB : BinaryVRRcSPair<"vfchdb", 0xE7EB, z_vfcmph, z_vfcmphs,
- v128g, v128db, 3, 0>;
- defm WFCHDB : BinaryVRRcSPair<"wfchdb", 0xE7EB, null_frag, null_frag,
- v64g, v64db, 3, 8>;
- let Predicates = [FeatureVectorEnhancements1] in {
- defm VFCHSB : BinaryVRRcSPair<"vfchsb", 0xE7EB, z_vfcmph, z_vfcmphs,
- v128f, v128sb, 2, 0>;
- defm WFCHSB : BinaryVRRcSPair<"wfchsb", 0xE7EB, null_frag, null_frag,
- v32f, v32sb, 2, 8>;
- defm WFCHXB : BinaryVRRcSPair<"wfchxb", 0xE7EB, null_frag, null_frag,
- v128q, v128xb, 4, 8>;
+ let Uses = [FPC], mayRaiseFPException = 1 in {
+ def VFCH : BinaryVRRcSPairFloatGeneric<"vfch", 0xE7EB>;
+ defm VFCHDB : BinaryVRRcSPair<"vfchdb", 0xE7EB, z_vfcmph, z_vfcmphs,
+ v128g, v128db, 3, 0>;
+ defm WFCHDB : BinaryVRRcSPair<"wfchdb", 0xE7EB, null_frag, null_frag,
+ v64g, v64db, 3, 8>;
+ let Predicates = [FeatureVectorEnhancements1] in {
+ defm VFCHSB : BinaryVRRcSPair<"vfchsb", 0xE7EB, z_vfcmph, z_vfcmphs,
+ v128f, v128sb, 2, 0>;
+ defm WFCHSB : BinaryVRRcSPair<"wfchsb", 0xE7EB, null_frag, null_frag,
+ v32f, v32sb, 2, 8>;
+ defm WFCHXB : BinaryVRRcSPair<"wfchxb", 0xE7EB, null_frag, null_frag,
+ v128q, v128xb, 4, 8>;
+ }
}
// Compare and signal high.
- let Predicates = [FeatureVectorEnhancements1] in {
+ let Uses = [FPC], mayRaiseFPException = 1,
+ Predicates = [FeatureVectorEnhancements1] in {
defm VFKHDB : BinaryVRRcSPair<"vfkhdb", 0xE7EB, null_frag, null_frag,
v128g, v128db, 3, 4>;
defm WFKHDB : BinaryVRRcSPair<"wfkhdb", 0xE7EB, null_frag, null_frag,
@@ -1280,22 +1446,25 @@ let Predicates = [FeatureVector] in {
}
// Compare high or equal.
- def VFCHE : BinaryVRRcSPairFloatGeneric<"vfche", 0xE7EA>;
- defm VFCHEDB : BinaryVRRcSPair<"vfchedb", 0xE7EA, z_vfcmphe, z_vfcmphes,
- v128g, v128db, 3, 0>;
- defm WFCHEDB : BinaryVRRcSPair<"wfchedb", 0xE7EA, null_frag, null_frag,
- v64g, v64db, 3, 8>;
- let Predicates = [FeatureVectorEnhancements1] in {
- defm VFCHESB : BinaryVRRcSPair<"vfchesb", 0xE7EA, z_vfcmphe, z_vfcmphes,
- v128f, v128sb, 2, 0>;
- defm WFCHESB : BinaryVRRcSPair<"wfchesb", 0xE7EA, null_frag, null_frag,
- v32f, v32sb, 2, 8>;
- defm WFCHEXB : BinaryVRRcSPair<"wfchexb", 0xE7EA, null_frag, null_frag,
- v128q, v128xb, 4, 8>;
+ let Uses = [FPC], mayRaiseFPException = 1 in {
+ def VFCHE : BinaryVRRcSPairFloatGeneric<"vfche", 0xE7EA>;
+ defm VFCHEDB : BinaryVRRcSPair<"vfchedb", 0xE7EA, z_vfcmphe, z_vfcmphes,
+ v128g, v128db, 3, 0>;
+ defm WFCHEDB : BinaryVRRcSPair<"wfchedb", 0xE7EA, null_frag, null_frag,
+ v64g, v64db, 3, 8>;
+ let Predicates = [FeatureVectorEnhancements1] in {
+ defm VFCHESB : BinaryVRRcSPair<"vfchesb", 0xE7EA, z_vfcmphe, z_vfcmphes,
+ v128f, v128sb, 2, 0>;
+ defm WFCHESB : BinaryVRRcSPair<"wfchesb", 0xE7EA, null_frag, null_frag,
+ v32f, v32sb, 2, 8>;
+ defm WFCHEXB : BinaryVRRcSPair<"wfchexb", 0xE7EA, null_frag, null_frag,
+ v128q, v128xb, 4, 8>;
+ }
}
// Compare and signal high or equal.
- let Predicates = [FeatureVectorEnhancements1] in {
+ let Uses = [FPC], mayRaiseFPException = 1,
+ Predicates = [FeatureVectorEnhancements1] in {
defm VFKHEDB : BinaryVRRcSPair<"vfkhedb", 0xE7EA, null_frag, null_frag,
v128g, v128db, 3, 4>;
defm WFKHEDB : BinaryVRRcSPair<"wfkhedb", 0xE7EA, null_frag, null_frag,
@@ -1520,6 +1689,24 @@ let Predicates = [FeatureVector] in {
z_vstrcz_cc, v128f, v128f, 2, 2>;
}
+let Predicates = [FeatureVectorEnhancements2] in {
+ defm VSTRS : TernaryExtraVRRdGeneric<"vstrs", 0xE78B>;
+ defm VSTRSB : TernaryExtraVRRd<"vstrsb", 0xE78B,
+ z_vstrs_cc, v128b, v128b, 0>;
+ defm VSTRSH : TernaryExtraVRRd<"vstrsh", 0xE78B,
+ z_vstrs_cc, v128b, v128h, 1>;
+ defm VSTRSF : TernaryExtraVRRd<"vstrsf", 0xE78B,
+ z_vstrs_cc, v128b, v128f, 2>;
+ let Defs = [CC] in {
+ def VSTRSZB : TernaryVRRd<"vstrszb", 0xE78B,
+ z_vstrsz_cc, v128b, v128b, 0, 2>;
+ def VSTRSZH : TernaryVRRd<"vstrszh", 0xE78B,
+ z_vstrsz_cc, v128b, v128h, 1, 2>;
+ def VSTRSZF : TernaryVRRd<"vstrszf", 0xE78B,
+ z_vstrsz_cc, v128b, v128f, 2, 2>;
+ }
+}
+
//===----------------------------------------------------------------------===//
// Packed-decimal instructions
//===----------------------------------------------------------------------===//
@@ -1531,6 +1718,10 @@ let Predicates = [FeatureVectorPackedDecimal] in {
def VUPKZ : StoreLengthVSI<"vupkz", 0xE63C, null_frag, 0>;
let Defs = [CC] in {
+ let Predicates = [FeatureVectorPackedDecimalEnhancement] in {
+ def VCVBOpt : TernaryVRRi<"vcvb", 0xE650, GR32>;
+ def VCVBGOpt : TernaryVRRi<"vcvbg", 0xE652, GR64>;
+ }
def VCVB : BinaryVRRi<"vcvb", 0xE650, GR32>;
def VCVBG : BinaryVRRi<"vcvbg", 0xE652, GR64>;
def VCVD : TernaryVRIi<"vcvd", 0xE658, GR32>;
diff --git a/lib/Target/SystemZ/SystemZLDCleanup.cpp b/lib/Target/SystemZ/SystemZLDCleanup.cpp
index f532e9e23b1f..06d893d043e9 100644
--- a/lib/Target/SystemZ/SystemZLDCleanup.cpp
+++ b/lib/Target/SystemZ/SystemZLDCleanup.cpp
@@ -1,9 +1,8 @@
//===-- SystemZLDCleanup.cpp - Clean up local-dynamic TLS accesses --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/SystemZ/SystemZLongBranch.cpp b/lib/Target/SystemZ/SystemZLongBranch.cpp
index 802962bd4db0..95d7e22dec32 100644
--- a/lib/Target/SystemZ/SystemZLongBranch.cpp
+++ b/lib/Target/SystemZ/SystemZLongBranch.cpp
@@ -1,9 +1,8 @@
//===-- SystemZLongBranch.cpp - Branch lengthening for SystemZ ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/SystemZ/SystemZMCInstLower.cpp b/lib/Target/SystemZ/SystemZMCInstLower.cpp
index 2655e4866b20..ef39f80a94ef 100644
--- a/lib/Target/SystemZ/SystemZMCInstLower.cpp
+++ b/lib/Target/SystemZ/SystemZMCInstLower.cpp
@@ -1,9 +1,8 @@
//===-- SystemZMCInstLower.cpp - Lower MachineInstr to MCInst -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/SystemZ/SystemZMCInstLower.h b/lib/Target/SystemZ/SystemZMCInstLower.h
index 7173cfa42959..14ad06488312 100644
--- a/lib/Target/SystemZ/SystemZMCInstLower.h
+++ b/lib/Target/SystemZ/SystemZMCInstLower.h
@@ -1,9 +1,8 @@
//===-- SystemZMCInstLower.h - Lower MachineInstr to MCInst ----*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/SystemZ/SystemZMachineFunctionInfo.cpp b/lib/Target/SystemZ/SystemZMachineFunctionInfo.cpp
index 1a7c0d7f687a..9b6aa3593ce0 100644
--- a/lib/Target/SystemZ/SystemZMachineFunctionInfo.cpp
+++ b/lib/Target/SystemZ/SystemZMachineFunctionInfo.cpp
@@ -1,9 +1,8 @@
//=== SystemZMachineFunctionInfo.cpp - SystemZ machine function info ------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/SystemZ/SystemZMachineFunctionInfo.h b/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
index 4f64f4c65f1d..9eec3f37bc28 100644
--- a/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
+++ b/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
@@ -1,9 +1,8 @@
//=== SystemZMachineFunctionInfo.h - SystemZ machine function info -*- C++ -*-//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/SystemZ/SystemZMachineScheduler.cpp b/lib/Target/SystemZ/SystemZMachineScheduler.cpp
index 98e761ef87fe..0becfaa1d49c 100644
--- a/lib/Target/SystemZ/SystemZMachineScheduler.cpp
+++ b/lib/Target/SystemZ/SystemZMachineScheduler.cpp
@@ -1,9 +1,8 @@
//-- SystemZMachineScheduler.cpp - SystemZ Scheduler Interface -*- C++ -*---==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/SystemZ/SystemZMachineScheduler.h b/lib/Target/SystemZ/SystemZMachineScheduler.h
index ab820e5d3e63..0d5cc2e03e8d 100644
--- a/lib/Target/SystemZ/SystemZMachineScheduler.h
+++ b/lib/Target/SystemZ/SystemZMachineScheduler.h
@@ -1,9 +1,8 @@
//==- SystemZMachineScheduler.h - SystemZ Scheduler Interface ----*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/SystemZ/SystemZOperands.td b/lib/Target/SystemZ/SystemZOperands.td
index 7bf32bf19a4a..56632e1529a2 100644
--- a/lib/Target/SystemZ/SystemZOperands.td
+++ b/lib/Target/SystemZ/SystemZOperands.td
@@ -1,9 +1,8 @@
//===-- SystemZOperands.td - SystemZ instruction operands ----*- tblgen-*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -189,6 +188,17 @@ def HF32 : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i64);
}]>;
+// Negated variants.
+def NEGLH16 : SDNodeXForm<imm, [{
+ uint64_t Value = (-N->getZExtValue() & 0x00000000FFFF0000ULL) >> 16;
+ return CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i64);
+}]>;
+
+def NEGLF32 : SDNodeXForm<imm, [{
+ uint64_t Value = -N->getZExtValue() & 0x00000000FFFFFFFFULL;
+ return CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i64);
+}]>;
+
// Truncate an immediate to a 8-bit signed quantity.
def SIMM8 : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(int8_t(N->getZExtValue()), SDLoc(N),
@@ -431,6 +441,15 @@ def imm64hf32c : Immediate<i64, [{
return SystemZ::isImmHF(uint64_t(~N->getZExtValue()));
}], HF32, "U32Imm">;
+// Negated immediates that fit LF32 or LH16.
+def imm64lh16n : Immediate<i64, [{
+ return SystemZ::isImmLH(uint64_t(-N->getZExtValue()));
+}], NEGLH16, "U16Imm">;
+
+def imm64lf32n : Immediate<i64, [{
+ return SystemZ::isImmLF(uint64_t(-N->getZExtValue()));
+}], NEGLF32, "U32Imm">;
+
// Short immediates.
def imm64sx8 : Immediate<i64, [{
return isInt<8>(N->getSExtValue());
diff --git a/lib/Target/SystemZ/SystemZOperators.td b/lib/Target/SystemZ/SystemZOperators.td
index 626675bfb70c..15bd12bc98a4 100644
--- a/lib/Target/SystemZ/SystemZOperators.td
+++ b/lib/Target/SystemZ/SystemZOperators.td
@@ -1,9 +1,8 @@
//===-- SystemZOperators.td - SystemZ-specific operators ------*- tblgen-*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -192,6 +191,12 @@ def SDT_ZVecTernary : SDTypeProfile<1, 3,
SDTCisSameAs<0, 1>,
SDTCisSameAs<0, 2>,
SDTCisSameAs<0, 3>]>;
+def SDT_ZVecTernaryConvCC : SDTypeProfile<2, 3,
+ [SDTCisVec<0>,
+ SDTCisVT<1, i32>,
+ SDTCisVec<2>,
+ SDTCisSameAs<2, 3>,
+ SDTCisSameAs<0, 4>]>;
def SDT_ZVecTernaryInt : SDTypeProfile<1, 3,
[SDTCisVec<0>,
SDTCisSameAs<0, 1>,
@@ -279,6 +284,10 @@ def z_loadbswap : SDNode<"SystemZISD::LRV", SDTLoad,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def z_storebswap : SDNode<"SystemZISD::STRV", SDTStore,
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def z_loadeswap : SDNode<"SystemZISD::VLER", SDTLoad,
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def z_storeeswap : SDNode<"SystemZISD::VSTER", SDTStore,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def z_tdc : SDNode<"SystemZISD::TDC", SDT_ZTest>;
@@ -338,6 +347,10 @@ def z_vstrc_cc : SDNode<"SystemZISD::VSTRC_CC",
SDT_ZVecQuaternaryIntCC>;
def z_vstrcz_cc : SDNode<"SystemZISD::VSTRCZ_CC",
SDT_ZVecQuaternaryIntCC>;
+def z_vstrs_cc : SDNode<"SystemZISD::VSTRS_CC",
+ SDT_ZVecTernaryConvCC>;
+def z_vstrsz_cc : SDNode<"SystemZISD::VSTRSZ_CC",
+ SDT_ZVecTernaryConvCC>;
def z_vftci : SDNode<"SystemZISD::VFTCI", SDT_ZVecBinaryConvIntCC>;
class AtomicWOp<string name, SDTypeProfile profile = SDT_ZAtomicLoadBinaryW>
@@ -662,22 +675,34 @@ def z_usub : PatFrags<(ops node:$src1, node:$src2),
[(z_usubo node:$src1, node:$src2),
(sub node:$src1, node:$src2)]>;
+// Combined logical operations.
+def andc : PatFrag<(ops node:$src1, node:$src2),
+ (and node:$src1, (not node:$src2))>;
+def orc : PatFrag<(ops node:$src1, node:$src2),
+ (or node:$src1, (not node:$src2))>;
+def nand : PatFrag<(ops node:$src1, node:$src2),
+ (not (and node:$src1, node:$src2))>;
+def nor : PatFrag<(ops node:$src1, node:$src2),
+ (not (or node:$src1, node:$src2))>;
+def nxor : PatFrag<(ops node:$src1, node:$src2),
+ (not (xor node:$src1, node:$src2))>;
+
// Fused multiply-subtract, using the natural operand order.
-def fms : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (fma node:$src1, node:$src2, (fneg node:$src3))>;
+def any_fms : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (any_fma node:$src1, node:$src2, (fneg node:$src3))>;
// Fused multiply-add and multiply-subtract, but with the order of the
// operands matching SystemZ's MA and MS instructions.
-def z_fma : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (fma node:$src2, node:$src3, node:$src1)>;
-def z_fms : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (fma node:$src2, node:$src3, (fneg node:$src1))>;
+def z_any_fma : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (any_fma node:$src2, node:$src3, node:$src1)>;
+def z_any_fms : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (any_fma node:$src2, node:$src3, (fneg node:$src1))>;
// Negative fused multiply-add and multiply-subtract.
-def fnma : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (fneg (fma node:$src1, node:$src2, node:$src3))>;
-def fnms : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (fneg (fms node:$src1, node:$src2, node:$src3))>;
+def any_fnma : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (fneg (any_fma node:$src1, node:$src2, node:$src3))>;
+def any_fnms : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (fneg (any_fms node:$src1, node:$src2, node:$src3))>;
// Floating-point negative absolute.
def fnabs : PatFrag<(ops node:$ptr), (fneg (fabs node:$ptr))>;
@@ -709,9 +734,9 @@ class shiftop<SDPatternOperator operator>
[(operator node:$val, node:$count),
(operator node:$val, (and node:$count, imm32bottom6set))]>;
-// Vector representation of all-zeros and all-ones.
-def z_vzero : PatFrag<(ops), (bitconvert (v16i8 (z_byte_mask (i32 0))))>;
-def z_vones : PatFrag<(ops), (bitconvert (v16i8 (z_byte_mask (i32 65535))))>;
+def imm32mod64 : PatLeaf<(i32 imm), [{
+ return (N->getZExtValue() % 64 == 0);
+}]>;
// Load a scalar and replicate it in all elements of a vector.
class z_replicate_load<ValueType scalartype, SDPatternOperator load>
@@ -723,6 +748,10 @@ def z_replicate_loadi32 : z_replicate_load<i32, load>;
def z_replicate_loadi64 : z_replicate_load<i64, load>;
def z_replicate_loadf32 : z_replicate_load<f32, load>;
def z_replicate_loadf64 : z_replicate_load<f64, load>;
+// Byte-swapped replicated vector element loads.
+def z_replicate_loadbswapi16 : z_replicate_load<i32, z_loadbswap16>;
+def z_replicate_loadbswapi32 : z_replicate_load<i32, z_loadbswap32>;
+def z_replicate_loadbswapi64 : z_replicate_load<i64, z_loadbswap64>;
// Load a scalar and insert it into a single element of a vector.
class z_vle<ValueType scalartype, SDPatternOperator load>
@@ -735,18 +764,22 @@ def z_vlei32 : z_vle<i32, load>;
def z_vlei64 : z_vle<i64, load>;
def z_vlef32 : z_vle<f32, load>;
def z_vlef64 : z_vle<f64, load>;
+// Byte-swapped vector element loads.
+def z_vlebri16 : z_vle<i32, z_loadbswap16>;
+def z_vlebri32 : z_vle<i32, z_loadbswap32>;
+def z_vlebri64 : z_vle<i64, z_loadbswap64>;
// Load a scalar and insert it into the low element of the high i64 of a
// zeroed vector.
class z_vllez<ValueType scalartype, SDPatternOperator load, int index>
: PatFrag<(ops node:$addr),
- (z_vector_insert (z_vzero),
+ (z_vector_insert immAllZerosV,
(scalartype (load node:$addr)), (i32 index))>;
def z_vllezi8 : z_vllez<i32, anyextloadi8, 7>;
def z_vllezi16 : z_vllez<i32, anyextloadi16, 3>;
def z_vllezi32 : z_vllez<i32, load, 1>;
def z_vllezi64 : PatFrags<(ops node:$addr),
- [(z_vector_insert (z_vzero),
+ [(z_vector_insert immAllZerosV,
(i64 (load node:$addr)), (i32 0)),
(z_join_dwords (i64 (load node:$addr)), (i64 0))]>;
// We use high merges to form a v4f32 from four f32s. Propagating zero
@@ -759,11 +792,12 @@ def z_vllezf32 : PatFrag<(ops node:$addr),
(bitconvert
(v4f32 (scalar_to_vector
(f32 (load node:$addr)))))))),
- (v2i64 (z_vzero)))>;
+ (v2i64
+ (bitconvert (v4f32 immAllZerosV))))>;
def z_vllezf64 : PatFrag<(ops node:$addr),
(z_merge_high
(v2f64 (scalar_to_vector (f64 (load node:$addr)))),
- (z_vzero))>;
+ immAllZerosV)>;
// Similarly for the high element of a zeroed vector.
def z_vllezli32 : z_vllez<i32, load, 0>;
@@ -774,8 +808,21 @@ def z_vllezlf32 : PatFrag<(ops node:$addr),
(z_merge_high
(v4f32 (scalar_to_vector
(f32 (load node:$addr)))),
- (v4f32 (z_vzero))))),
- (v2i64 (z_vzero)))>;
+ (v4f32 immAllZerosV)))),
+ (v2i64
+ (bitconvert (v4f32 immAllZerosV))))>;
+
+// Byte-swapped variants.
+def z_vllebrzi16 : z_vllez<i32, z_loadbswap16, 3>;
+def z_vllebrzi32 : z_vllez<i32, z_loadbswap32, 1>;
+def z_vllebrzli32 : z_vllez<i32, z_loadbswap32, 0>;
+def z_vllebrzi64 : PatFrags<(ops node:$addr),
+ [(z_vector_insert immAllZerosV,
+ (i64 (z_loadbswap64 node:$addr)),
+ (i32 0)),
+ (z_join_dwords (i64 (z_loadbswap64 node:$addr)),
+ (i64 0))]>;
+
// Store one element of a vector.
class z_vste<ValueType scalartype, SDPatternOperator store>
@@ -788,18 +835,22 @@ def z_vstei32 : z_vste<i32, store>;
def z_vstei64 : z_vste<i64, store>;
def z_vstef32 : z_vste<f32, store>;
def z_vstef64 : z_vste<f64, store>;
+// Byte-swapped vector element stores.
+def z_vstebri16 : z_vste<i32, z_storebswap16>;
+def z_vstebri32 : z_vste<i32, z_storebswap32>;
+def z_vstebri64 : z_vste<i64, z_storebswap64>;
// Arithmetic negation on vectors.
-def z_vneg : PatFrag<(ops node:$x), (sub (z_vzero), node:$x)>;
+def z_vneg : PatFrag<(ops node:$x), (sub immAllZerosV, node:$x)>;
// Bitwise negation on vectors.
-def z_vnot : PatFrag<(ops node:$x), (xor node:$x, (z_vones))>;
+def z_vnot : PatFrag<(ops node:$x), (xor node:$x, immAllOnesV)>;
// Signed "integer greater than zero" on vectors.
-def z_vicmph_zero : PatFrag<(ops node:$x), (z_vicmph node:$x, (z_vzero))>;
+def z_vicmph_zero : PatFrag<(ops node:$x), (z_vicmph node:$x, immAllZerosV)>;
// Signed "integer less than zero" on vectors.
-def z_vicmpl_zero : PatFrag<(ops node:$x), (z_vicmph (z_vzero), node:$x)>;
+def z_vicmpl_zero : PatFrag<(ops node:$x), (z_vicmph immAllZerosV, node:$x)>;
// Integer absolute on vectors.
class z_viabs<int shift>
diff --git a/lib/Target/SystemZ/SystemZPatterns.td b/lib/Target/SystemZ/SystemZPatterns.td
index 152521fb66a8..beaf4de285a3 100644
--- a/lib/Target/SystemZ/SystemZPatterns.td
+++ b/lib/Target/SystemZ/SystemZPatterns.td
@@ -1,9 +1,8 @@
//===-- SystemZPatterns.td - SystemZ-specific pattern rules ---*- tblgen-*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/SystemZ/SystemZPostRewrite.cpp b/lib/Target/SystemZ/SystemZPostRewrite.cpp
new file mode 100644
index 000000000000..8e4060eac74c
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZPostRewrite.cpp
@@ -0,0 +1,124 @@
+//==---- SystemZPostRewrite.cpp - Select pseudos after RegAlloc ---*- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that is run immediately after VirtRegRewriter
+// but before MachineCopyPropagation. The purpose is to lower pseudos to
+// target instructions before any later pass might substitute a register for
+// another.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZ.h"
+#include "SystemZInstrInfo.h"
+#include "SystemZSubtarget.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+using namespace llvm;
+
+#define SYSTEMZ_POSTREWRITE_NAME "SystemZ Post Rewrite pass"
+
+#define DEBUG_TYPE "systemz-postrewrite"
+STATISTIC(MemFoldCopies, "Number of copies inserted before folded mem ops.");
+
+namespace llvm {
+ void initializeSystemZPostRewritePass(PassRegistry&);
+}
+
+namespace {
+
+class SystemZPostRewrite : public MachineFunctionPass {
+public:
+ static char ID;
+ SystemZPostRewrite() : MachineFunctionPass(ID) {
+ initializeSystemZPostRewritePass(*PassRegistry::getPassRegistry());
+ }
+
+ const SystemZInstrInfo *TII;
+
+ bool runOnMachineFunction(MachineFunction &Fn) override;
+
+ StringRef getPassName() const override { return SYSTEMZ_POSTREWRITE_NAME; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+private:
+ bool selectMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ MachineBasicBlock::iterator &NextMBBI);
+ bool selectMBB(MachineBasicBlock &MBB);
+};
+
+char SystemZPostRewrite::ID = 0;
+
+} // end anonymous namespace
+
+INITIALIZE_PASS(SystemZPostRewrite, "systemz-post-rewrite",
+ SYSTEMZ_POSTREWRITE_NAME, false, false)
+
+/// Returns an instance of the Post Rewrite pass.
+FunctionPass *llvm::createSystemZPostRewritePass(SystemZTargetMachine &TM) {
+ return new SystemZPostRewrite();
+}
+
+/// If MBBI references a pseudo instruction that should be selected here,
+/// do it and return true. Otherwise return false.
+bool SystemZPostRewrite::selectMI(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ MachineBasicBlock::iterator &NextMBBI) {
+ MachineInstr &MI = *MBBI;
+ unsigned Opcode = MI.getOpcode();
+
+ // Note: If this could be done during regalloc in foldMemoryOperandImpl()
+ // while also updating the LiveIntervals, there would be no need for the
+ // MemFoldPseudo to begin with.
+ int TargetMemOpcode = SystemZ::getTargetMemOpcode(Opcode);
+ if (TargetMemOpcode != -1) {
+ MI.setDesc(TII->get(TargetMemOpcode));
+ MI.tieOperands(0, 1);
+ unsigned DstReg = MI.getOperand(0).getReg();
+ MachineOperand &SrcMO = MI.getOperand(1);
+ if (DstReg != SrcMO.getReg()) {
+ BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(SystemZ::COPY), DstReg)
+ .addReg(SrcMO.getReg());
+ SrcMO.setReg(DstReg);
+ MemFoldCopies++;
+ }
+ return true;
+ }
+
+ return false;
+}
+
+/// Iterate over the instructions in basic block MBB and select any
+/// pseudo instructions. Return true if anything was modified.
+bool SystemZPostRewrite::selectMBB(MachineBasicBlock &MBB) {
+ bool Modified = false;
+
+ MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+ while (MBBI != E) {
+ MachineBasicBlock::iterator NMBBI = std::next(MBBI);
+ Modified |= selectMI(MBB, MBBI, NMBBI);
+ MBBI = NMBBI;
+ }
+
+ return Modified;
+}
+
+bool SystemZPostRewrite::runOnMachineFunction(MachineFunction &MF) {
+ TII = static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
+
+ bool Modified = false;
+ for (auto &MBB : MF)
+ Modified |= selectMBB(MBB);
+
+ return Modified;
+}
+
diff --git a/lib/Target/SystemZ/SystemZProcessors.td b/lib/Target/SystemZ/SystemZProcessors.td
index 0dca4582dc0d..b27c25beb58c 100644
--- a/lib/Target/SystemZ/SystemZProcessors.td
+++ b/lib/Target/SystemZ/SystemZProcessors.td
@@ -1,9 +1,8 @@
//===-- SystemZ.td - SystemZ processors and features ---------*- tblgen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -36,3 +35,5 @@ def : ProcessorModel<"z13", Z13Model, Arch11SupportedFeatures.List>;
def : ProcessorModel<"arch12", Z14Model, Arch12SupportedFeatures.List>;
def : ProcessorModel<"z14", Z14Model, Arch12SupportedFeatures.List>;
+def : ProcessorModel<"arch13", Arch13Model, Arch13SupportedFeatures.List>;
+
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
index e9f9188048da..e7cd6871dbb4 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -1,9 +1,8 @@
//===-- SystemZRegisterInfo.cpp - SystemZ register information ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -54,6 +53,26 @@ static const TargetRegisterClass *getRC32(MachineOperand &MO,
return RC;
}
+// Pass the registers of RC as hints while making sure that if any of these
+// registers are copy hints (and therefore already in Hints), hint them
+// first.
+static void addHints(ArrayRef<MCPhysReg> Order,
+ SmallVectorImpl<MCPhysReg> &Hints,
+ const TargetRegisterClass *RC,
+ const MachineRegisterInfo *MRI) {
+ SmallSet<unsigned, 4> CopyHints;
+ CopyHints.insert(Hints.begin(), Hints.end());
+ Hints.clear();
+ for (MCPhysReg Reg : Order)
+ if (CopyHints.count(Reg) &&
+ RC->contains(Reg) && !MRI->isReserved(Reg))
+ Hints.push_back(Reg);
+ for (MCPhysReg Reg : Order)
+ if (!CopyHints.count(Reg) &&
+ RC->contains(Reg) && !MRI->isReserved(Reg))
+ Hints.push_back(Reg);
+}
+
bool
SystemZRegisterInfo::getRegAllocationHints(unsigned VirtReg,
ArrayRef<MCPhysReg> Order,
@@ -62,7 +81,8 @@ SystemZRegisterInfo::getRegAllocationHints(unsigned VirtReg,
const VirtRegMap *VRM,
const LiveRegMatrix *Matrix) const {
const MachineRegisterInfo *MRI = &MF.getRegInfo();
- const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>();
+ const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
bool BaseImplRetVal = TargetRegisterInfo::getRegAllocationHints(
VirtReg, Order, Hints, MF, VRM, Matrix);
@@ -76,31 +96,23 @@ SystemZRegisterInfo::getRegAllocationHints(unsigned VirtReg,
if (!DoneRegs.insert(Reg).second)
continue;
- for (auto &Use : MRI->use_instructions(Reg))
+ for (auto &Use : MRI->reg_instructions(Reg)) {
// For LOCRMux, see if the other operand is already a high or low
- // register, and in that case give the correpsonding hints for
+ // register, and in that case give the corresponding hints for
// VirtReg. LOCR instructions need both operands in either high or
- // low parts.
- if (Use.getOpcode() == SystemZ::LOCRMux) {
+ // low parts. Same handling for SELRMux.
+ if (Use.getOpcode() == SystemZ::LOCRMux ||
+ Use.getOpcode() == SystemZ::SELRMux) {
MachineOperand &TrueMO = Use.getOperand(1);
MachineOperand &FalseMO = Use.getOperand(2);
const TargetRegisterClass *RC =
TRI->getCommonSubClass(getRC32(FalseMO, VRM, MRI),
getRC32(TrueMO, VRM, MRI));
+ if (Use.getOpcode() == SystemZ::SELRMux)
+ RC = TRI->getCommonSubClass(RC,
+ getRC32(Use.getOperand(0), VRM, MRI));
if (RC && RC != &SystemZ::GRX32BitRegClass) {
- // Pass the registers of RC as hints while making sure that if
- // any of these registers are copy hints, hint them first.
- SmallSet<unsigned, 4> CopyHints;
- CopyHints.insert(Hints.begin(), Hints.end());
- Hints.clear();
- for (MCPhysReg Reg : Order)
- if (CopyHints.count(Reg) &&
- RC->contains(Reg) && !MRI->isReserved(Reg))
- Hints.push_back(Reg);
- for (MCPhysReg Reg : Order)
- if (!CopyHints.count(Reg) &&
- RC->contains(Reg) && !MRI->isReserved(Reg))
- Hints.push_back(Reg);
+ addHints(Order, Hints, RC, MRI);
// Return true to make these hints the only regs available to
// RA. This may mean extra spilling but since the alternative is
// a jump sequence expansion of the LOCRMux, it is preferred.
@@ -112,10 +124,70 @@ SystemZRegisterInfo::getRegAllocationHints(unsigned VirtReg,
(TrueMO.getReg() == Reg ? FalseMO.getReg() : TrueMO.getReg());
if (MRI->getRegClass(OtherReg) == &SystemZ::GRX32BitRegClass)
Worklist.push_back(OtherReg);
- }
+ } // end LOCRMux
+ else if (Use.getOpcode() == SystemZ::CHIMux ||
+ Use.getOpcode() == SystemZ::CFIMux) {
+ if (Use.getOperand(1).getImm() == 0) {
+ bool OnlyLMuxes = true;
+ for (MachineInstr &DefMI : MRI->def_instructions(VirtReg))
+ if (DefMI.getOpcode() != SystemZ::LMux)
+ OnlyLMuxes = false;
+ if (OnlyLMuxes) {
+ addHints(Order, Hints, &SystemZ::GR32BitRegClass, MRI);
+ // Return false to make these hints preferred but not obligatory.
+ return false;
+ }
+ }
+ } // end CHIMux / CFIMux
+ }
}
}
+ if (VRM == nullptr)
+ return BaseImplRetVal;
+
+ // Add any two address hints after any copy hints.
+ SmallSet<unsigned, 4> TwoAddrHints;
+ for (auto &Use : MRI->reg_nodbg_instructions(VirtReg))
+ if (SystemZ::getTwoOperandOpcode(Use.getOpcode()) != -1) {
+ const MachineOperand *VRRegMO = nullptr;
+ const MachineOperand *OtherMO = nullptr;
+ const MachineOperand *CommuMO = nullptr;
+ if (VirtReg == Use.getOperand(0).getReg()) {
+ VRRegMO = &Use.getOperand(0);
+ OtherMO = &Use.getOperand(1);
+ if (Use.isCommutable())
+ CommuMO = &Use.getOperand(2);
+ } else if (VirtReg == Use.getOperand(1).getReg()) {
+ VRRegMO = &Use.getOperand(1);
+ OtherMO = &Use.getOperand(0);
+ } else if (VirtReg == Use.getOperand(2).getReg() && Use.isCommutable()) {
+ VRRegMO = &Use.getOperand(2);
+ OtherMO = &Use.getOperand(0);
+ } else
+ continue;
+
+ auto tryAddHint = [&](const MachineOperand *MO) -> void {
+ Register Reg = MO->getReg();
+ Register PhysReg = isPhysicalRegister(Reg) ? Reg : VRM->getPhys(Reg);
+ if (PhysReg) {
+ if (MO->getSubReg())
+ PhysReg = getSubReg(PhysReg, MO->getSubReg());
+ if (VRRegMO->getSubReg())
+ PhysReg = getMatchingSuperReg(PhysReg, VRRegMO->getSubReg(),
+ MRI->getRegClass(VirtReg));
+ if (!MRI->isReserved(PhysReg) && !is_contained(Hints, PhysReg))
+ TwoAddrHints.insert(PhysReg);
+ }
+ };
+ tryAddHint(OtherMO);
+ if (CommuMO)
+ tryAddHint(CommuMO);
+ }
+ for (MCPhysReg OrderReg : Order)
+ if (TwoAddrHints.count(OrderReg))
+ Hints.push_back(OrderReg);
+
return BaseImplRetVal;
}
@@ -169,6 +241,9 @@ SystemZRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(SystemZ::A0);
Reserved.set(SystemZ::A1);
+ // FPC is the floating-point control register.
+ Reserved.set(SystemZ::FPC);
+
return Reserved;
}
@@ -328,7 +403,7 @@ bool SystemZRegisterInfo::shouldCoalesce(MachineInstr *MI,
return true;
}
-unsigned
+Register
SystemZRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
const SystemZFrameLowering *TFI = getFrameLowering(MF);
return TFI->hasFP(MF) ? SystemZ::R11D : SystemZ::R15D;
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.h b/lib/Target/SystemZ/SystemZRegisterInfo.h
index 9fd2e4ae4f00..4f721ec23e53 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.h
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.h
@@ -1,9 +1,8 @@
//===-- SystemZRegisterInfo.h - SystemZ register information ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -84,7 +83,7 @@ public:
const TargetRegisterClass *NewRC,
LiveIntervals &LIS) const override;
- unsigned getFrameRegister(const MachineFunction &MF) const override;
+ Register getFrameRegister(const MachineFunction &MF) const override;
};
} // end namespace llvm
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.td b/lib/Target/SystemZ/SystemZRegisterInfo.td
index cea88c088b86..3567b0f3acf8 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.td
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.td
@@ -1,9 +1,8 @@
//==- SystemZRegisterInfo.td - SystemZ register definitions -*- tablegen -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -296,6 +295,13 @@ def CC : SystemZReg<"cc">;
let isAllocatable = 0, CopyCost = -1 in
def CCR : RegisterClass<"SystemZ", [i32], 32, (add CC)>;
+// The floating-point control register.
+// Note: We only model the current rounding modes and the IEEE masks.
+// IEEE flags and DXC are not modeled here.
+def FPC : SystemZReg<"fpc">;
+let isAllocatable = 0 in
+ def FPCRegs : RegisterClass<"SystemZ", [i32], 32, (add FPC)>;
+
// Access registers.
class ACR32<bits<16> num, string n> : SystemZReg<n> {
let HWEncoding = num;
diff --git a/lib/Target/SystemZ/SystemZSchedule.td b/lib/Target/SystemZ/SystemZSchedule.td
index 83bf97e6841a..98eca2802242 100644
--- a/lib/Target/SystemZ/SystemZSchedule.td
+++ b/lib/Target/SystemZ/SystemZSchedule.td
@@ -1,9 +1,8 @@
//==-- SystemZSchedule.td - SystemZ Scheduling Definitions ----*- tblgen -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -60,6 +59,7 @@ def VBU : SchedWrite; // Virtual branching unit
def MCD : SchedWrite; // Millicode
+include "SystemZScheduleArch13.td"
include "SystemZScheduleZ14.td"
include "SystemZScheduleZ13.td"
include "SystemZScheduleZEC12.td"
diff --git a/lib/Target/SystemZ/SystemZScheduleArch13.td b/lib/Target/SystemZ/SystemZScheduleArch13.td
new file mode 100644
index 000000000000..9f82f24d0e8f
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZScheduleArch13.td
@@ -0,0 +1,1695 @@
+//-- SystemZScheduleArch13.td - SystemZ Scheduling Definitions ----*- tblgen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for Arch13 to support instruction
+// scheduling and other instruction cost heuristics.
+//
+// Pseudos expanded right after isel do not need to be modelled here.
+//
+//===----------------------------------------------------------------------===//
+
+def Arch13Model : SchedMachineModel {
+
+ let UnsupportedFeatures = Arch13UnsupportedFeatures.List;
+
+ let IssueWidth = 6; // Number of instructions decoded per cycle.
+ let MicroOpBufferSize = 60; // Issue queues
+ let LoadLatency = 1; // Optimistic load latency.
+
+ let PostRAScheduler = 1;
+
+ // Extra cycles for a mispredicted branch.
+ let MispredictPenalty = 20;
+}
+
+let SchedModel = Arch13Model in {
+// These definitions need the SchedModel value. They could be put in a
+// subtarget common include file, but it seems the include system in Tablegen
+// currently (2016) rejects multiple includes of same file.
+
+// Decoder grouping rules
+let NumMicroOps = 1 in {
+ def : WriteRes<NormalGr, []>;
+ def : WriteRes<BeginGroup, []> { let BeginGroup = 1; }
+ def : WriteRes<EndGroup, []> { let EndGroup = 1; }
+}
+def : WriteRes<Cracked, []> {
+ let NumMicroOps = 2;
+ let BeginGroup = 1;
+}
+def : WriteRes<GroupAlone, []> {
+ let NumMicroOps = 3;
+ let BeginGroup = 1;
+ let EndGroup = 1;
+}
+def : WriteRes<GroupAlone2, []> {
+ let NumMicroOps = 6;
+ let BeginGroup = 1;
+ let EndGroup = 1;
+}
+def : WriteRes<GroupAlone3, []> {
+ let NumMicroOps = 9;
+ let BeginGroup = 1;
+ let EndGroup = 1;
+}
+
+// Incoming latency removed from the register operand which is used together
+// with a memory operand by the instruction.
+def : ReadAdvance<RegReadAdv, 4>;
+
+// LoadLatency (above) is not used for instructions in this file. This is
+// instead the role of LSULatency, which is the latency value added to the
+// result of loads and instructions with folded memory operands.
+def : WriteRes<LSULatency, []> { let Latency = 4; let NumMicroOps = 0; }
+
+let NumMicroOps = 0 in {
+ foreach L = 1-30 in
+ def : WriteRes<!cast<SchedWrite>("WLat"#L), []> { let Latency = L; }
+}
+
+// Execution units.
+def Arch13_FXaUnit : ProcResource<2>;
+def Arch13_FXbUnit : ProcResource<2>;
+def Arch13_LSUnit : ProcResource<2>;
+def Arch13_VecUnit : ProcResource<2>;
+def Arch13_VecFPdUnit : ProcResource<2> { let BufferSize = 1; /* blocking */ }
+def Arch13_VBUnit : ProcResource<2>;
+def Arch13_MCD : ProcResource<1>;
+
+// Subtarget specific definitions of scheduling resources.
+let NumMicroOps = 0 in {
+ def : WriteRes<FXa, [Arch13_FXaUnit]>;
+ def : WriteRes<FXb, [Arch13_FXbUnit]>;
+ def : WriteRes<LSU, [Arch13_LSUnit]>;
+ def : WriteRes<VecBF, [Arch13_VecUnit]>;
+ def : WriteRes<VecDF, [Arch13_VecUnit]>;
+ def : WriteRes<VecDFX, [Arch13_VecUnit]>;
+ def : WriteRes<VecMul, [Arch13_VecUnit]>;
+ def : WriteRes<VecStr, [Arch13_VecUnit]>;
+ def : WriteRes<VecXsPm, [Arch13_VecUnit]>;
+ foreach Num = 2-5 in { let ResourceCycles = [Num] in {
+ def : WriteRes<!cast<SchedWrite>("FXa"#Num), [Arch13_FXaUnit]>;
+ def : WriteRes<!cast<SchedWrite>("FXb"#Num), [Arch13_FXbUnit]>;
+ def : WriteRes<!cast<SchedWrite>("LSU"#Num), [Arch13_LSUnit]>;
+ def : WriteRes<!cast<SchedWrite>("VecBF"#Num), [Arch13_VecUnit]>;
+ def : WriteRes<!cast<SchedWrite>("VecDF"#Num), [Arch13_VecUnit]>;
+ def : WriteRes<!cast<SchedWrite>("VecDFX"#Num), [Arch13_VecUnit]>;
+ def : WriteRes<!cast<SchedWrite>("VecMul"#Num), [Arch13_VecUnit]>;
+ def : WriteRes<!cast<SchedWrite>("VecStr"#Num), [Arch13_VecUnit]>;
+ def : WriteRes<!cast<SchedWrite>("VecXsPm"#Num), [Arch13_VecUnit]>;
+ }}
+
+ def : WriteRes<VecFPd, [Arch13_VecFPdUnit]> { let ResourceCycles = [30]; }
+
+ def : WriteRes<VBU, [Arch13_VBUnit]>; // Virtual Branching Unit
+}
+
+def : WriteRes<MCD, [Arch13_MCD]> { let NumMicroOps = 3;
+ let BeginGroup = 1;
+ let EndGroup = 1; }
+
+// -------------------------- INSTRUCTIONS ---------------------------------- //
+
+// InstRW constructs have been used in order to preserve the
+// readability of the InstrInfo files.
+
+// For each instruction, as matched by a regexp, provide a list of
+// resources that it needs. These will be combined into a SchedClass.
+
+//===----------------------------------------------------------------------===//
+// Stack allocation
+//===----------------------------------------------------------------------===//
+
+// Pseudo -> LA / LAY
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "ADJDYNALLOC$")>;
+
+//===----------------------------------------------------------------------===//
+// Branch instructions
+//===----------------------------------------------------------------------===//
+
+// Branch
+def : InstRW<[WLat1, VBU, NormalGr], (instregex "(Call)?BRC(L)?(Asm.*)?$")>;
+def : InstRW<[WLat1, VBU, NormalGr], (instregex "(Call)?J(G)?(Asm.*)?$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "(Call)?BC(R)?(Asm.*)?$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "(Call)?B(R)?(Asm.*)?$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "BI(C)?(Asm.*)?$")>;
+def : InstRW<[WLat1, FXa, EndGroup], (instregex "BRCT(G)?$")>;
+def : InstRW<[WLat1, FXa, FXb, GroupAlone], (instregex "BRCTH$")>;
+def : InstRW<[WLat1, FXa, FXb, GroupAlone], (instregex "BCT(G)?(R)?$")>;
+def : InstRW<[WLat1, FXa2, FXb2, GroupAlone2],
+ (instregex "B(R)?X(H|L).*$")>;
+
+// Compare and branch
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(L)?(G)?(I|R)J(Asm.*)?$")>;
+def : InstRW<[WLat1, FXb2, GroupAlone],
+ (instregex "C(L)?(G)?(I|R)B(Call|Return|Asm.*)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Trap instructions
+//===----------------------------------------------------------------------===//
+
+// Trap
+def : InstRW<[WLat1, VBU, NormalGr], (instregex "(Cond)?Trap$")>;
+
+// Compare and trap
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(G)?(I|R)T(Asm.*)?$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "CL(G)?RT(Asm.*)?$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "CL(F|G)IT(Asm.*)?$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "CL(G)?T(Asm.*)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Call and return instructions
+//===----------------------------------------------------------------------===//
+
+// Call
+def : InstRW<[WLat1, VBU, FXa2, GroupAlone], (instregex "(Call)?BRAS$")>;
+def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "(Call)?BRASL$")>;
+def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "(Call)?BAS(R)?$")>;
+def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "TLS_(G|L)DCALL$")>;
+
+// Return
+def : InstRW<[WLat1, FXb, EndGroup], (instregex "Return$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "CondReturn$")>;
+
+//===----------------------------------------------------------------------===//
+// Move instructions
+//===----------------------------------------------------------------------===//
+
+// Moves
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "MV(G|H)?HI$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "MVI(Y)?$")>;
+
+// Move character
+def : InstRW<[WLat1, FXb, LSU3, GroupAlone], (instregex "MVC$")>;
+def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "MVCL(E|U)?$")>;
+def : InstRW<[WLat1, LSU2, GroupAlone], (instregex "MVCRL$")>;
+
+// Pseudo -> reg move
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "COPY(_TO_REGCLASS)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "EXTRACT_SUBREG$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "INSERT_SUBREG$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "REG_SEQUENCE$")>;
+
+// Loads
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "L(Y|FH|RL|Mux)?$")>;
+def : InstRW<[LSULatency, LSULatency, LSU, NormalGr], (instregex "LCBB$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LG(RL)?$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "L128$")>;
+
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLIH(F|H|L)$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLIL(F|H|L)$")>;
+
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LG(F|H)I$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LHI(Mux)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LR(Mux)?$")>;
+
+// Load and zero rightmost byte
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LZR(F|G)$")>;
+
+// Load and trap
+def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "L(FH|G)?AT$")>;
+
+// Load and test
+def : InstRW<[WLat1LSU, WLat1LSU, LSU, FXa, NormalGr], (instregex "LT(G)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LT(G)?R$")>;
+
+// Stores
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STG(RL)?$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "ST128$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "ST(Y|FH|RL|Mux)?$")>;
+
+// String moves.
+def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "MVST$")>;
+
+//===----------------------------------------------------------------------===//
+// Conditional move instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat2, FXa, NormalGr], (instregex "LOCRMux$")>;
+def : InstRW<[WLat2, FXa, NormalGr], (instregex "LOC(G|FH)?R(Asm.*)?$")>;
+def : InstRW<[WLat2, FXa, NormalGr], (instregex "LOC(G|H)?HI(Mux|(Asm.*))?$")>;
+def : InstRW<[WLat2LSU, RegReadAdv, FXa, LSU, NormalGr],
+ (instregex "LOC(G|FH|Mux)?(Asm.*)?$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr],
+ (instregex "STOC(G|FH|Mux)?(Asm.*)?$")>;
+
+def : InstRW<[WLat2, FXa, NormalGr], (instregex "SELRMux$")>;
+def : InstRW<[WLat2, FXa, NormalGr], (instregex "SEL(G|FH)?R(Asm.*)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Sign extensions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "L(B|H|G)R$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LG(B|H|F)R$")>;
+
+def : InstRW<[WLat1LSU, WLat1LSU, FXa, LSU, NormalGr], (instregex "LTGF$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LTGFR$")>;
+
+def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LB(H|Mux)?$")>;
+def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LH(Y)?$")>;
+def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LH(H|Mux|RL)$")>;
+def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LG(B|H|F)$")>;
+def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LG(H|F)RL$")>;
+
+//===----------------------------------------------------------------------===//
+// Zero extensions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLCR(Mux)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLHR(Mux)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLG(C|H|F|T)R$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLC(Mux)?$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLH(Mux)?$")>;
+def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LL(C|H)H$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLHRL$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLG(C|H|F|T|HRL|FRL)$")>;
+
+// Load and zero rightmost byte
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLZRGF$")>;
+
+// Load and trap
+def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "LLG(F|T)?AT$")>;
+
+//===----------------------------------------------------------------------===//
+// Truncations
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STC(H|Y|Mux)?$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STH(H|Y|RL|Mux)?$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STCM(H|Y)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Multi-register moves
+//===----------------------------------------------------------------------===//
+
+// Load multiple (estimated average of 5 ops)
+def : InstRW<[WLat10, WLat10, LSU5, GroupAlone], (instregex "LM(H|Y|G)?$")>;
+
+// Load multiple disjoint
+def : InstRW<[WLat30, WLat30, MCD], (instregex "LMD$")>;
+
+// Store multiple
+def : InstRW<[WLat1, LSU2, FXb3, GroupAlone], (instregex "STM(G|H|Y)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Byte swaps
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LRV(G)?R$")>;
+def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LRV(G|H)?$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STRV(G|H)?$")>;
+def : InstRW<[WLat30, MCD], (instregex "MVCIN$")>;
+
+//===----------------------------------------------------------------------===//
+// Load address instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LA(Y|RL)?$")>;
+
+// Load the Global Offset Table address ( -> larl )
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "GOT$")>;
+
+//===----------------------------------------------------------------------===//
+// Absolute and Negation
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, WLat1, FXa, NormalGr], (instregex "LP(G)?R$")>;
+def : InstRW<[WLat2, WLat2, FXa2, Cracked], (instregex "L(N|P)GFR$")>;
+def : InstRW<[WLat1, WLat1, FXa, NormalGr], (instregex "LN(R|GR)$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LC(R|GR)$")>;
+def : InstRW<[WLat2, WLat2, FXa2, Cracked], (instregex "LCGFR$")>;
+
+//===----------------------------------------------------------------------===//
+// Insertion
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], (instregex "IC(Y)?$")>;
+def : InstRW<[WLat1LSU, RegReadAdv, FXa, LSU, NormalGr],
+ (instregex "IC32(Y)?$")>;
+def : InstRW<[WLat1LSU, RegReadAdv, WLat1LSU, FXa, LSU, NormalGr],
+ (instregex "ICM(H|Y)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "II(F|H|L)Mux$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "IIHF(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "IIHH(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "IIHL(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "IILF(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "IILH(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "IILL(64)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Addition
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr],
+ (instregex "A(Y)?$")>;
+def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, NormalGr],
+ (instregex "AH(Y)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "AIH$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "AFI(Mux)?$")>;
+def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr],
+ (instregex "AG$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "AGFI$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "AGHI(K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "AGR(K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "AHI(K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "AHIMux(K)?$")>;
+def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr],
+ (instregex "AL(Y)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "AL(FI|HSIK)$")>;
+def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr],
+ (instregex "ALG(F)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALGHSIK$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALGF(I|R)$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALGR(K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALR(K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "AR(K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "A(L)?HHHR$")>;
+def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "A(L)?HHLR$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALSIH(N)?$")>;
+def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "A(L)?(G)?SI$")>;
+
+// Logical addition with carry
+def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, GroupAlone],
+ (instregex "ALC(G)?$")>;
+def : InstRW<[WLat2, WLat2, FXa, GroupAlone], (instregex "ALC(G)?R$")>;
+
+// Add with sign extension (16/32 -> 64)
+def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, NormalGr],
+ (instregex "AG(F|H)$")>;
+def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "AGFR$")>;
+
+//===----------------------------------------------------------------------===//
+// Subtraction
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr],
+ (instregex "S(G|Y)?$")>;
+def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, NormalGr],
+ (instregex "SH(Y)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "SGR(K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLFI$")>;
+def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr],
+ (instregex "SL(G|GF|Y)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLGF(I|R)$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLGR(K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLR(K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "SR(K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "S(L)?HHHR$")>;
+def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "S(L)?HHLR$")>;
+
+// Subtraction with borrow
+def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, GroupAlone],
+ (instregex "SLB(G)?$")>;
+def : InstRW<[WLat2, WLat2, FXa, GroupAlone], (instregex "SLB(G)?R$")>;
+
+// Subtraction with sign extension (16/32 -> 64)
+def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, NormalGr],
+ (instregex "SG(F|H)$")>;
+def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "SGFR$")>;
+
+//===----------------------------------------------------------------------===//
+// AND
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr],
+ (instregex "N(G|Y)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "NGR(K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "NI(FMux|HMux|LMux)$")>;
+def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "NI(Y)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "NIHF(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "NIHH(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "NIHL(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "NILF(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "NILH(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "NILL(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "NR(K)?$")>;
+def : InstRW<[WLat3LSU, LSU2, FXb, Cracked], (instregex "NC$")>;
+
+//===----------------------------------------------------------------------===//
+// OR
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr],
+ (instregex "O(G|Y)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "OGR(K)?$")>;
+def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "OI(Y)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "OI(FMux|HMux|LMux)$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "OIHF(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "OIHH(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "OIHL(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "OILF(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "OILH(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "OILL(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "OR(K)?$")>;
+def : InstRW<[WLat3LSU, LSU2, FXb, Cracked], (instregex "OC$")>;
+
+//===----------------------------------------------------------------------===//
+// XOR
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr],
+ (instregex "X(G|Y)?$")>;
+def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "XI(Y)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "XIFMux$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "XGR(K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "XIHF(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "XILF(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "XR(K)?$")>;
+def : InstRW<[WLat3LSU, LSU2, FXb, Cracked], (instregex "XC$")>;
+
+//===----------------------------------------------------------------------===//
+// Combined logical operations
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "NC(G)?RK$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "OC(G)?RK$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "NN(G)?RK$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "NO(G)?RK$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "NX(G)?RK$")>;
+
+//===----------------------------------------------------------------------===//
+// Multiplication
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat5LSU, RegReadAdv, FXa, LSU, NormalGr],
+ (instregex "MS(GF|Y)?$")>;
+def : InstRW<[WLat5, FXa, NormalGr], (instregex "MS(R|FI)$")>;
+def : InstRW<[WLat7LSU, RegReadAdv, FXa, LSU, NormalGr], (instregex "MSG$")>;
+def : InstRW<[WLat7, FXa, NormalGr], (instregex "MSGR$")>;
+def : InstRW<[WLat5, FXa, NormalGr], (instregex "MSGF(I|R)$")>;
+def : InstRW<[WLat8LSU, RegReadAdv, FXa2, LSU, GroupAlone], (instregex "MLG$")>;
+def : InstRW<[WLat8, FXa2, GroupAlone], (instregex "MLGR$")>;
+def : InstRW<[WLat4, FXa, NormalGr], (instregex "MGHI$")>;
+def : InstRW<[WLat4, FXa, NormalGr], (instregex "MHI$")>;
+def : InstRW<[WLat4LSU, RegReadAdv, FXa, LSU, NormalGr], (instregex "MH(Y)?$")>;
+def : InstRW<[WLat6, FXa2, GroupAlone], (instregex "M(L)?R$")>;
+def : InstRW<[WLat6LSU, RegReadAdv, FXa2, LSU, GroupAlone],
+ (instregex "M(FY|L)?$")>;
+def : InstRW<[WLat8, RegReadAdv, FXa, LSU, NormalGr], (instregex "MGH$")>;
+def : InstRW<[WLat12, RegReadAdv, FXa2, LSU, GroupAlone], (instregex "MG$")>;
+def : InstRW<[WLat8, FXa2, GroupAlone], (instregex "MGRK$")>;
+def : InstRW<[WLat6LSU, WLat6LSU, RegReadAdv, FXa, LSU, NormalGr],
+ (instregex "MSC$")>;
+def : InstRW<[WLat8LSU, WLat8LSU, RegReadAdv, FXa, LSU, NormalGr],
+ (instregex "MSGC$")>;
+def : InstRW<[WLat6, WLat6, FXa, NormalGr], (instregex "MSRKC$")>;
+def : InstRW<[WLat8, WLat8, FXa, NormalGr], (instregex "MSGRKC$")>;
+
+//===----------------------------------------------------------------------===//
+// Division and remainder
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat20, FXa4, GroupAlone], (instregex "DR$")>;
+def : InstRW<[WLat30, RegReadAdv, FXa4, LSU, GroupAlone2], (instregex "D$")>;
+def : InstRW<[WLat30, FXa2, GroupAlone], (instregex "DSG(F)?R$")>;
+def : InstRW<[WLat30, RegReadAdv, FXa2, LSU, GroupAlone2],
+ (instregex "DSG(F)?$")>;
+def : InstRW<[WLat20, FXa4, GroupAlone], (instregex "DLR$")>;
+def : InstRW<[WLat30, FXa4, GroupAlone], (instregex "DLGR$")>;
+def : InstRW<[WLat30, RegReadAdv, FXa4, LSU, GroupAlone2],
+ (instregex "DL(G)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Shifts
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLL(G|K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "SRL(G|K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "SRA(G|K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLA(G|K)?$")>;
+def : InstRW<[WLat5LSU, WLat5LSU, FXa4, LSU, GroupAlone2],
+ (instregex "S(L|R)D(A|L)$")>;
+
+// Rotate
+def : InstRW<[WLat2LSU, FXa, LSU, NormalGr], (instregex "RLL(G)?$")>;
+
+// Rotate and insert
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "RISBG(N|32)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "RISBH(G|H|L)$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "RISBL(G|H|L)$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "RISBMux$")>;
+
+// Rotate and Select
+def : InstRW<[WLat2, WLat2, FXa2, Cracked], (instregex "R(N|O|X)SBG$")>;
+
+//===----------------------------------------------------------------------===//
+// Comparison
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr],
+ (instregex "C(G|Y|Mux)?$")>;
+def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CRL$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(F|H)I(Mux)?$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "CG(F|H)I$")>;
+def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CG(HSI|RL)$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(G)?R$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "CIH$")>;
+def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CHF$")>;
+def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CHSI$")>;
+def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr],
+ (instregex "CL(Y|Mux)?$")>;
+def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLFHSI$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLFI(Mux)?$")>;
+def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CLG$")>;
+def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLG(HRL|HSI)$")>;
+def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CLGF$")>;
+def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLGFRL$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLGF(I|R)$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLGR$")>;
+def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLGRL$")>;
+def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CLHF$")>;
+def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLH(RL|HSI)$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLIH$")>;
+def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLI(Y)?$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLR$")>;
+def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLRL$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(L)?HHR$")>;
+def : InstRW<[WLat2, FXb, NormalGr], (instregex "C(L)?HLR$")>;
+
+// Compare halfword
+def : InstRW<[WLat2LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CH(Y)?$")>;
+def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "CHRL$")>;
+def : InstRW<[WLat2LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CGH$")>;
+def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "CGHRL$")>;
+def : InstRW<[WLat2LSU, FXa, FXb, LSU, Cracked], (instregex "CHHSI$")>;
+
+// Compare with sign extension (32 -> 64)
+def : InstRW<[WLat2LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CGF$")>;
+def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "CGFRL$")>;
+def : InstRW<[WLat2, FXb, NormalGr], (instregex "CGFR$")>;
+
+// Compare logical character
+def : InstRW<[WLat6, FXb, LSU2, Cracked], (instregex "CLC$")>;
+def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "CLCL(E|U)?$")>;
+def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "CLST$")>;
+
+// Test under mask
+def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "TM(Y)?$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "TM(H|L)Mux$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "TMHH(64)?$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "TMHL(64)?$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "TMLH(64)?$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "TMLL(64)?$")>;
+
+// Compare logical characters under mask
+def : InstRW<[WLat2LSU, RegReadAdv, FXb, LSU, NormalGr],
+ (instregex "CLM(H|Y)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Prefetch and execution hint
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, LSU, NormalGr], (instregex "PFD(RL)?$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "BPP$")>;
+def : InstRW<[FXb, EndGroup], (instregex "BPRP$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "NIAI$")>;
+
+//===----------------------------------------------------------------------===//
+// Atomic operations
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, FXb, EndGroup], (instregex "Serialize$")>;
+
+def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAA(G)?$")>;
+def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAAL(G)?$")>;
+def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAN(G)?$")>;
+def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAO(G)?$")>;
+def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAX(G)?$")>;
+
+// Test and set
+def : InstRW<[WLat2LSU, FXb, LSU, EndGroup], (instregex "TS$")>;
+
+// Compare and swap
+def : InstRW<[WLat3LSU, WLat3LSU, FXa, FXb, LSU, GroupAlone],
+ (instregex "CS(G|Y)?$")>;
+
+// Compare double and swap
+def : InstRW<[WLat6LSU, WLat6LSU, FXa3, FXb2, LSU, GroupAlone2],
+ (instregex "CDS(Y)?$")>;
+def : InstRW<[WLat15, WLat15, FXa2, FXb4, LSU3,
+ GroupAlone3], (instregex "CDSG$")>;
+
+// Compare and swap and store
+def : InstRW<[WLat30, MCD], (instregex "CSST$")>;
+
+// Perform locked operation
+def : InstRW<[WLat30, MCD], (instregex "PLO$")>;
+
+// Load/store pair from/to quadword
+def : InstRW<[WLat4LSU, LSU2, GroupAlone], (instregex "LPQ$")>;
+def : InstRW<[WLat1, FXb2, LSU, GroupAlone], (instregex "STPQ$")>;
+
+// Load pair disjoint
+def : InstRW<[WLat1LSU, WLat1LSU, LSU2, GroupAlone], (instregex "LPD(G)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Translate and convert
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "TR$")>;
+def : InstRW<[WLat30, WLat30, WLat30, FXa3, LSU2, GroupAlone2],
+ (instregex "TRT$")>;
+def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "TRTR$")>;
+def : InstRW<[WLat30, WLat30, MCD], (instregex "TRE$")>;
+def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "TRT(R)?E(Opt)?$")>;
+def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "TR(T|O)(T|O)(Opt)?$")>;
+def : InstRW<[WLat30, WLat30, WLat30, MCD],
+ (instregex "CU(12|14|21|24|41|42)(Opt)?$")>;
+def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "(CUUTF|CUTFU)(Opt)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Message-security assist
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD],
+ (instregex "KM(C|F|O|CTR|A)?$")>;
+def : InstRW<[WLat30, WLat30, WLat30, MCD],
+ (instregex "(KIMD|KLMD|KMAC|KDSA)$")>;
+def : InstRW<[WLat30, WLat30, WLat30, MCD],
+ (instregex "(PCC|PPNO|PRNO)$")>;
+
+//===----------------------------------------------------------------------===//
+// Guarded storage
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LGG$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLGFSG$")>;
+def : InstRW<[WLat30, MCD], (instregex "(L|ST)GSC$")>;
+
+//===----------------------------------------------------------------------===//
+// Decimal arithmetic
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat20, RegReadAdv, FXb, VecDF2, LSU2, GroupAlone2],
+ (instregex "CVBG$")>;
+def : InstRW<[WLat20, RegReadAdv, FXb, VecDF, LSU, GroupAlone2],
+ (instregex "CVB(Y)?$")>;
+def : InstRW<[WLat1, FXb3, VecDF4, LSU, GroupAlone3], (instregex "CVDG$")>;
+def : InstRW<[WLat1, FXb2, VecDF, LSU, GroupAlone2], (instregex "CVD(Y)?$")>;
+def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "MV(N|O|Z)$")>;
+def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "(PACK|PKA|PKU)$")>;
+def : InstRW<[WLat12, LSU5, GroupAlone], (instregex "UNPK(A|U)$")>;
+def : InstRW<[WLat1, FXb, LSU2, Cracked], (instregex "UNPK$")>;
+
+def : InstRW<[WLat5LSU, FXb, VecDFX, LSU3, GroupAlone2],
+ (instregex "(A|S|ZA)P$")>;
+def : InstRW<[WLat1, FXb, VecDFX2, LSU3, GroupAlone2], (instregex "MP$")>;
+def : InstRW<[WLat1, FXb, VecDFX4, LSU3, GroupAlone2], (instregex "DP$")>;
+def : InstRW<[WLat15, FXb, VecDFX2, LSU2, GroupAlone3], (instregex "SRP$")>;
+def : InstRW<[WLat8, VecDFX, LSU, LSU, GroupAlone], (instregex "CP$")>;
+def : InstRW<[WLat3LSU, VecDFX, LSU, Cracked], (instregex "TP$")>;
+def : InstRW<[WLat30, MCD], (instregex "ED(MK)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Access registers
+//===----------------------------------------------------------------------===//
+
+// Extract/set/copy access register
+def : InstRW<[WLat3, LSU, NormalGr], (instregex "(EAR|SAR|CPYA)$")>;
+
+// Load address extended
+def : InstRW<[WLat5, LSU, FXa, Cracked], (instregex "LAE(Y)?$")>;
+
+// Load/store access multiple (not modeled precisely)
+def : InstRW<[WLat20, WLat20, LSU5, GroupAlone], (instregex "LAM(Y)?$")>;
+def : InstRW<[WLat1, LSU5, FXb, GroupAlone2], (instregex "STAM(Y)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Program mask and addressing mode
+//===----------------------------------------------------------------------===//
+
+// Insert Program Mask
+def : InstRW<[WLat3, FXa, EndGroup], (instregex "IPM$")>;
+
+// Set Program Mask
+def : InstRW<[WLat3, LSU, EndGroup], (instregex "SPM$")>;
+
+// Branch and link
+def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "BAL(R)?$")>;
+
+// Test addressing mode
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "TAM$")>;
+
+// Set addressing mode
+def : InstRW<[WLat1, FXb, EndGroup], (instregex "SAM(24|31|64)$")>;
+
+// Branch (and save) and set mode.
+def : InstRW<[WLat1, FXa, FXb, GroupAlone], (instregex "BSM$")>;
+def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "BASSM$")>;
+
+//===----------------------------------------------------------------------===//
+// Transactional execution
+//===----------------------------------------------------------------------===//
+
+// Transaction begin
+def : InstRW<[WLat9, LSU2, FXb5, GroupAlone2], (instregex "TBEGIN(C)?$")>;
+
+// Transaction end
+def : InstRW<[WLat1, FXb, GroupAlone], (instregex "TEND$")>;
+
+// Transaction abort
+def : InstRW<[WLat30, MCD], (instregex "TABORT$")>;
+
+// Extract Transaction Nesting Depth
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "ETND$")>;
+
+// Nontransactional store
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "NTSTG$")>;
+
+//===----------------------------------------------------------------------===//
+// Processor assist
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, FXb, GroupAlone], (instregex "PPA$")>;
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Instructions.
+//===----------------------------------------------------------------------===//
+
+// Find leftmost one
+def : InstRW<[WLat5, WLat5, FXa2, GroupAlone], (instregex "FLOGR$")>;
+
+// Population count
+def : InstRW<[WLat3, WLat3, FXa, NormalGr], (instregex "POPCNT(Opt)?$")>;
+
+// String instructions
+def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "SRST(U)?$")>;
+def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "CUSE$")>;
+
+// Various complex instructions
+def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], (instregex "CFC$")>;
+def : InstRW<[WLat30, WLat30, WLat30, WLat30, WLat30, WLat30, MCD],
+ (instregex "UPT$")>;
+def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "CKSM$")>;
+def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], (instregex "CMPSC$")>;
+def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], (instregex "SORTL$")>;
+def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], (instregex "DFLTCC$")>;
+
+// Execute
+def : InstRW<[WLat1, FXb, GroupAlone], (instregex "EX(RL)?$")>;
+
+//===----------------------------------------------------------------------===//
+// .insn directive instructions
+//===----------------------------------------------------------------------===//
+
+// An "empty" sched-class will be assigned instead of the "invalid sched-class".
+// getNumDecoderSlots() will then return 1 instead of 0.
+def : InstRW<[], (instregex "Insn.*")>;
+
+
+// ----------------------------- Floating point ----------------------------- //
+
+//===----------------------------------------------------------------------===//
+// FP: Move instructions
+//===----------------------------------------------------------------------===//
+
+// Load zero
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "LZ(DR|ER)$")>;
+def : InstRW<[WLat2, FXb2, Cracked], (instregex "LZXR$")>;
+
+// Load
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "LER$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "LD(R|R32|GR)$")>;
+def : InstRW<[WLat3, FXb, NormalGr], (instregex "LGDR$")>;
+def : InstRW<[WLat2, FXb2, GroupAlone], (instregex "LXR$")>;
+
+// Load and Test
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "LT(E|D)BR$")>;
+def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "LT(E|D)BRCompare$")>;
+def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone],
+ (instregex "LTXBR(Compare)?$")>;
+
+// Copy sign
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "CPSDR(d|s)(d|s)$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Load instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat2LSU, VecXsPm, LSU, NormalGr], (instregex "LE(Y)?$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LD(Y|E32)?$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LX$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Store instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "ST(E|D)(Y)?$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STX$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Conversion instructions
+//===----------------------------------------------------------------------===//
+
+// Load rounded
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "LEDBR(A)?$")>;
+def : InstRW<[WLat9, VecDF2, NormalGr], (instregex "L(E|D)XBR(A)?$")>;
+
+// Load lengthened
+def : InstRW<[WLat6LSU, VecBF, LSU, NormalGr], (instregex "LDEB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "LDEBR$")>;
+def : InstRW<[WLat7LSU, VecBF4, LSU, GroupAlone], (instregex "LX(E|D)B$")>;
+def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "LX(E|D)BR$")>;
+
+// Convert from fixed / logical
+def : InstRW<[WLat7, FXb, VecBF, Cracked], (instregex "C(E|D)(F|G)BR(A)?$")>;
+def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "CX(F|G)BR(A)?$")>;
+def : InstRW<[WLat7, FXb, VecBF, Cracked], (instregex "C(E|D)L(F|G)BR$")>;
+def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "CXL(F|G)BR$")>;
+
+// Convert to fixed / logical
+def : InstRW<[WLat9, WLat9, FXb, VecBF, Cracked],
+ (instregex "C(F|G)(E|D)BR(A)?$")>;
+def : InstRW<[WLat12, WLat12, FXb, VecDF2, Cracked],
+ (instregex "C(F|G)XBR(A)?$")>;
+def : InstRW<[WLat9, WLat9, FXb, VecBF, GroupAlone], (instregex "CLFEBR$")>;
+def : InstRW<[WLat9, WLat9, FXb, VecBF, Cracked], (instregex "CLFDBR$")>;
+def : InstRW<[WLat9, WLat9, FXb, VecBF, Cracked], (instregex "CLG(E|D)BR$")>;
+def : InstRW<[WLat12, WLat12, FXb, VecDF2, Cracked], (instregex "CL(F|G)XBR$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Unary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Load Complement / Negative / Positive
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "L(C|N|P)(E|D)BR$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "L(C|N|P)DFR(_32)?$")>;
+def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "L(C|N|P)XBR$")>;
+
+// Square root
+def : InstRW<[WLat30, VecFPd, LSU, NormalGr], (instregex "SQ(E|D)B$")>;
+def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "SQ(E|D)BR$")>;
+def : InstRW<[WLat30, VecFPd, GroupAlone], (instregex "SQXBR$")>;
+
+// Load FP integer
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "FI(E|D)BR(A)?$")>;
+def : InstRW<[WLat10, VecDF4, GroupAlone], (instregex "FIXBR(A)?$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Binary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Addition
+def : InstRW<[WLat6LSU, WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr],
+ (instregex "A(E|D)B$")>;
+def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "A(E|D)BR$")>;
+def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "AXBR$")>;
+
+// Subtraction
+def : InstRW<[WLat6LSU, WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr],
+ (instregex "S(E|D)B$")>;
+def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "S(E|D)BR$")>;
+def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "SXBR$")>;
+
+// Multiply
+def : InstRW<[WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr],
+ (instregex "M(D|DE|EE)B$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "M(D|DE|EE)BR$")>;
+def : InstRW<[WLat7LSU, RegReadAdv, VecBF4, LSU, GroupAlone],
+ (instregex "MXDB$")>;
+def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "MXDBR$")>;
+def : InstRW<[WLat15, VecDF4, GroupAlone], (instregex "MXBR$")>;
+
+// Multiply and add / subtract
+def : InstRW<[WLat6LSU, RegReadAdv, RegReadAdv, VecBF2, LSU, GroupAlone],
+ (instregex "M(A|S)EB$")>;
+def : InstRW<[WLat6, VecBF, GroupAlone], (instregex "M(A|S)EBR$")>;
+def : InstRW<[WLat6LSU, RegReadAdv, RegReadAdv, VecBF2, LSU, GroupAlone],
+ (instregex "M(A|S)DB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "M(A|S)DBR$")>;
+
+// Division
+def : InstRW<[WLat30, RegReadAdv, VecFPd, LSU, NormalGr],
+ (instregex "D(E|D)B$")>;
+def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "D(E|D)BR$")>;
+def : InstRW<[WLat30, VecFPd, GroupAlone], (instregex "DXBR$")>;
+
+// Divide to integer
+def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "DI(E|D)BR$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Comparisons
+//===----------------------------------------------------------------------===//
+
+// Compare
+def : InstRW<[WLat3LSU, RegReadAdv, VecXsPm, LSU, NormalGr],
+ (instregex "(K|C)(E|D)B$")>;
+def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "(K|C)(E|D)BR$")>;
+def : InstRW<[WLat9, VecDF2, GroupAlone], (instregex "(K|C)XBR$")>;
+
+// Test Data Class
+def : InstRW<[WLat5, LSU, VecXsPm, NormalGr], (instregex "TC(E|D)B$")>;
+def : InstRW<[WLat10, LSU, VecDF4, GroupAlone], (instregex "TCXB$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Floating-point control register instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat4, FXa, LSU, GroupAlone], (instregex "EFPC$")>;
+def : InstRW<[WLat1, FXb, LSU, GroupAlone], (instregex "STFPC$")>;
+def : InstRW<[WLat3, LSU, GroupAlone], (instregex "SFPC$")>;
+def : InstRW<[WLat3LSU, LSU2, GroupAlone], (instregex "LFPC$")>;
+def : InstRW<[WLat30, MCD], (instregex "SFASR$")>;
+def : InstRW<[WLat30, MCD], (instregex "LFAS$")>;
+def : InstRW<[WLat3, FXb, GroupAlone], (instregex "SRNM(B|T)?$")>;
+
+
+// --------------------- Hexadecimal floating point ------------------------- //
+
+//===----------------------------------------------------------------------===//
+// HFP: Move instructions
+//===----------------------------------------------------------------------===//
+
+// Load and Test
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "LT(E|D)R$")>;
+def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "LTXR$")>;
+
+//===----------------------------------------------------------------------===//
+// HFP: Conversion instructions
+//===----------------------------------------------------------------------===//
+
+// Load rounded
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "(LEDR|LRER)$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "LEXR$")>;
+def : InstRW<[WLat9, VecDF2, NormalGr], (instregex "(LDXR|LRDR)$")>;
+
+// Load lengthened
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LDE$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "LDER$")>;
+def : InstRW<[WLat7LSU, VecBF4, LSU, GroupAlone], (instregex "LX(E|D)$")>;
+def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "LX(E|D)R$")>;
+
+// Convert from fixed
+def : InstRW<[WLat7, FXb, VecBF, Cracked], (instregex "C(E|D)(F|G)R$")>;
+def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "CX(F|G)R$")>;
+
+// Convert to fixed
+def : InstRW<[WLat9, WLat9, FXb, VecBF, Cracked], (instregex "C(F|G)(E|D)R$")>;
+def : InstRW<[WLat12, WLat12, FXb, VecDF2, Cracked], (instregex "C(F|G)XR$")>;
+
+// Convert BFP to HFP / HFP to BFP.
+def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "THD(E)?R$")>;
+def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "TB(E)?DR$")>;
+
+//===----------------------------------------------------------------------===//
+// HFP: Unary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Load Complement / Negative / Positive
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "L(C|N|P)(E|D)R$")>;
+def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "L(C|N|P)XR$")>;
+
+// Halve
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "H(E|D)R$")>;
+
+// Square root
+def : InstRW<[WLat30, VecFPd, LSU, NormalGr], (instregex "SQ(E|D)$")>;
+def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "SQ(E|D)R$")>;
+def : InstRW<[WLat30, VecFPd, GroupAlone], (instregex "SQXR$")>;
+
+// Load FP integer
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "FI(E|D)R$")>;
+def : InstRW<[WLat10, VecDF4, GroupAlone], (instregex "FIXR$")>;
+
+//===----------------------------------------------------------------------===//
+// HFP: Binary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Addition
+def : InstRW<[WLat6LSU, WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr],
+ (instregex "A(E|D|U|W)$")>;
+def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "A(E|D|U|W)R$")>;
+def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "AXR$")>;
+
+// Subtraction
+def : InstRW<[WLat6LSU, WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr],
+ (instregex "S(E|D|U|W)$")>;
+def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "S(E|D|U|W)R$")>;
+def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "SXR$")>;
+
+// Multiply
+def : InstRW<[WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr],
+ (instregex "M(D|DE|E|EE)$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "M(D|DE|E|EE)R$")>;
+def : InstRW<[WLat7LSU, RegReadAdv, VecBF4, LSU, GroupAlone],
+ (instregex "MXD$")>;
+def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "MXDR$")>;
+def : InstRW<[WLat30, VecDF4, GroupAlone], (instregex "MXR$")>;
+def : InstRW<[WLat7LSU, RegReadAdv, VecBF4, LSU, GroupAlone], (instregex "MY$")>;
+def : InstRW<[WLat6LSU, RegReadAdv, VecBF2, LSU, GroupAlone],
+ (instregex "MY(H|L)$")>;
+def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "MYR$")>;
+def : InstRW<[WLat6, VecBF, GroupAlone], (instregex "MY(H|L)R$")>;
+
+// Multiply and add / subtract
+def : InstRW<[WLat6LSU, RegReadAdv, RegReadAdv, VecBF2, LSU, GroupAlone],
+ (instregex "M(A|S)(E|D)$")>;
+def : InstRW<[WLat6, VecBF, GroupAlone], (instregex "M(A|S)(E|D)R$")>;
+def : InstRW<[WLat7LSU, RegReadAdv, RegReadAdv, VecBF4, LSU, GroupAlone],
+ (instregex "MAY$")>;
+def : InstRW<[WLat6LSU, RegReadAdv, RegReadAdv, VecBF2, LSU, GroupAlone],
+ (instregex "MAY(H|L)$")>;
+def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "MAYR$")>;
+def : InstRW<[WLat6, VecBF, GroupAlone], (instregex "MAY(H|L)R$")>;
+
+// Division
+def : InstRW<[WLat30, RegReadAdv, VecFPd, LSU, NormalGr], (instregex "D(E|D)$")>;
+def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "D(E|D)R$")>;
+def : InstRW<[WLat30, VecFPd, GroupAlone], (instregex "DXR$")>;
+
+//===----------------------------------------------------------------------===//
+// HFP: Comparisons
+//===----------------------------------------------------------------------===//
+
+// Compare
+def : InstRW<[WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr],
+ (instregex "C(E|D)$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "C(E|D)R$")>;
+def : InstRW<[WLat10, VecDF2, GroupAlone], (instregex "CXR$")>;
+
+
+// ------------------------ Decimal floating point -------------------------- //
+
+//===----------------------------------------------------------------------===//
+// DFP: Move instructions
+//===----------------------------------------------------------------------===//
+
+// Load and Test
+def : InstRW<[WLat8, WLat8, VecDF, NormalGr], (instregex "LTDTR$")>;
+def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "LTXTR$")>;
+
+//===----------------------------------------------------------------------===//
+// DFP: Conversion instructions
+//===----------------------------------------------------------------------===//
+
+// Load rounded
+def : InstRW<[WLat15, VecDF, NormalGr], (instregex "LEDTR$")>;
+def : InstRW<[WLat15, VecDF2, NormalGr], (instregex "LDXTR$")>;
+
+// Load lengthened
+def : InstRW<[WLat8, VecDF, NormalGr], (instregex "LDETR$")>;
+def : InstRW<[WLat10, VecDF4, GroupAlone], (instregex "LXDTR$")>;
+
+// Convert from fixed / logical
+def : InstRW<[WLat15, FXb, VecDF, Cracked], (instregex "CDFTR(A)?$")>;
+def : InstRW<[WLat30, FXb, VecDF, Cracked], (instregex "CDGTR(A)?$")>;
+def : InstRW<[WLat15, FXb, VecDF4, GroupAlone2], (instregex "CXFTR(A)?$")>;
+def : InstRW<[WLat30, FXb, VecDF4, GroupAlone2], (instregex "CXGTR(A)?$")>;
+def : InstRW<[WLat15, FXb, VecDF, Cracked], (instregex "CDLFTR$")>;
+def : InstRW<[WLat30, FXb, VecDF, Cracked], (instregex "CDLGTR$")>;
+def : InstRW<[WLat15, FXb, VecDF4, GroupAlone2], (instregex "CXLFTR$")>;
+def : InstRW<[WLat30, FXb, VecDF4, GroupAlone2], (instregex "CXLGTR$")>;
+
+// Convert to fixed / logical
+def : InstRW<[WLat30, WLat30, FXb, VecDF, Cracked],
+ (instregex "C(F|G)DTR(A)?$")>;
+def : InstRW<[WLat30, WLat30, FXb, VecDF2, Cracked],
+ (instregex "C(F|G)XTR(A)?$")>;
+def : InstRW<[WLat30, WLat30, FXb, VecDF, Cracked], (instregex "CL(F|G)DTR$")>;
+def : InstRW<[WLat30, WLat30, FXb, VecDF2, Cracked], (instregex "CL(F|G)XTR$")>;
+
+// Convert from / to signed / unsigned packed
+def : InstRW<[WLat9, FXb, VecDF, Cracked], (instregex "CD(S|U)TR$")>;
+def : InstRW<[WLat12, FXb2, VecDF4, GroupAlone2], (instregex "CX(S|U)TR$")>;
+def : InstRW<[WLat11, FXb, VecDF, Cracked], (instregex "C(S|U)DTR$")>;
+def : InstRW<[WLat15, FXb2, VecDF4, GroupAlone2], (instregex "C(S|U)XTR$")>;
+
+// Convert from / to zoned
+def : InstRW<[WLat8LSU, LSU, VecDF, Cracked], (instregex "CDZT$")>;
+def : InstRW<[WLat16LSU, LSU2, VecDF4, GroupAlone3], (instregex "CXZT$")>;
+def : InstRW<[WLat1, FXb, LSU, VecDF, Cracked], (instregex "CZDT$")>;
+def : InstRW<[WLat1, FXb, LSU, VecDF2, GroupAlone], (instregex "CZXT$")>;
+
+// Convert from / to packed
+def : InstRW<[WLat8LSU, LSU, VecDF, Cracked], (instregex "CDPT$")>;
+def : InstRW<[WLat16LSU, LSU2, VecDF4, GroupAlone3], (instregex "CXPT$")>;
+def : InstRW<[WLat1, FXb, LSU, VecDF, Cracked], (instregex "CPDT$")>;
+def : InstRW<[WLat1, FXb, LSU, VecDF2, GroupAlone], (instregex "CPXT$")>;
+
+// Perform floating-point operation
+def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "PFPO$")>;
+
+//===----------------------------------------------------------------------===//
+// DFP: Unary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Load FP integer
+def : InstRW<[WLat8, VecDF, NormalGr], (instregex "FIDTR$")>;
+def : InstRW<[WLat10, VecDF4, GroupAlone], (instregex "FIXTR$")>;
+
+// Extract biased exponent
+def : InstRW<[WLat11, FXb, VecDF, Cracked], (instregex "EEDTR$")>;
+def : InstRW<[WLat11, FXb, VecDF, Cracked], (instregex "EEXTR$")>;
+
+// Extract significance
+def : InstRW<[WLat11, FXb, VecDF, Cracked], (instregex "ESDTR$")>;
+def : InstRW<[WLat12, FXb, VecDF2, Cracked], (instregex "ESXTR$")>;
+
+//===----------------------------------------------------------------------===//
+// DFP: Binary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Addition
+def : InstRW<[WLat8, WLat8, VecDF, NormalGr], (instregex "ADTR(A)?$")>;
+def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "AXTR(A)?$")>;
+
+// Subtraction
+def : InstRW<[WLat8, WLat8, VecDF, NormalGr], (instregex "SDTR(A)?$")>;
+def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "SXTR(A)?$")>;
+
+// Multiply
+def : InstRW<[WLat30, VecDF, NormalGr], (instregex "MDTR(A)?$")>;
+def : InstRW<[WLat30, VecDF4, GroupAlone], (instregex "MXTR(A)?$")>;
+
+// Division
+def : InstRW<[WLat30, VecDF, NormalGr], (instregex "DDTR(A)?$")>;
+def : InstRW<[WLat30, VecDF4, GroupAlone], (instregex "DXTR(A)?$")>;
+
+// Quantize
+def : InstRW<[WLat8, WLat8, VecDF, NormalGr], (instregex "QADTR$")>;
+def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "QAXTR$")>;
+
+// Reround
+def : InstRW<[WLat9, WLat9, FXb, VecDF, Cracked], (instregex "RRDTR$")>;
+def : InstRW<[WLat11, WLat11, FXb, VecDF4, GroupAlone2], (instregex "RRXTR$")>;
+
+// Shift significand left/right
+def : InstRW<[WLat11LSU, LSU, VecDF, GroupAlone], (instregex "S(L|R)DT$")>;
+def : InstRW<[WLat11LSU, LSU, VecDF4, GroupAlone], (instregex "S(L|R)XT$")>;
+
+// Insert biased exponent
+def : InstRW<[WLat9, FXb, VecDF, Cracked], (instregex "IEDTR$")>;
+def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "IEXTR$")>;
+
+//===----------------------------------------------------------------------===//
+// DFP: Comparisons
+//===----------------------------------------------------------------------===//
+
+// Compare
+def : InstRW<[WLat8, VecDF, NormalGr], (instregex "(K|C)DTR$")>;
+def : InstRW<[WLat9, VecDF2, GroupAlone], (instregex "(K|C)XTR$")>;
+
+// Compare biased exponent
+def : InstRW<[WLat8, VecDF, NormalGr], (instregex "CEDTR$")>;
+def : InstRW<[WLat8, VecDF, NormalGr], (instregex "CEXTR$")>;
+
+// Test Data Class/Group
+def : InstRW<[WLat15, LSU, VecDF, NormalGr], (instregex "TD(C|G)(E|D)T$")>;
+def : InstRW<[WLat15, LSU, VecDF2, GroupAlone], (instregex "TD(C|G)XT$")>;
+
+
+// --------------------------------- Vector --------------------------------- //
+
+//===----------------------------------------------------------------------===//
+// Vector: Move instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "VLR(32|64)?$")>;
+def : InstRW<[WLat3, FXb, NormalGr], (instregex "VLGV(B|F|G|H)?$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "VLVG(B|F|G|H)?$")>;
+def : InstRW<[WLat3, FXb, NormalGr], (instregex "VLVGP(32)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Immediate instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VZERO$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VONE$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VGBM$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VGM(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VREPI(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VLEI(B|F|G|H)$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Loads
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(Align)?$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(L|BB)$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(32|64)$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLLEZ(B|F|G|H|LF)?$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLREP(B|F|G|H)?$")>;
+def : InstRW<[WLat2LSU, RegReadAdv, VecXsPm, LSU, NormalGr],
+ (instregex "VLE(B|F|G|H)$")>;
+def : InstRW<[WLat5LSU, RegReadAdv, FXb, LSU, VecXsPm, Cracked],
+ (instregex "VGE(F|G)$")>;
+def : InstRW<[WLat4LSU, WLat4LSU, LSU5, GroupAlone],
+ (instregex "VLM(Align)?$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLRL(R)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Stores
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VST(Align|L|32|64)?$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTE(F|G)$")>;
+def : InstRW<[WLat1, FXb, LSU, VecXsPm, Cracked], (instregex "VSTE(B|H)$")>;
+def : InstRW<[WLat1, LSU2, FXb3, GroupAlone2], (instregex "VSTM(Align)?$")>;
+def : InstRW<[WLat1, FXb2, LSU, Cracked], (instregex "VSCE(F|G)$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTRL(R)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Byte swaps
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLBR(H|F|G|Q)?$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLER(H|F|G)?$")>;
+def : InstRW<[WLat2LSU, RegReadAdv, VecXsPm, LSU, NormalGr],
+ (instregex "VLEBR(H|F|G)$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLLEBRZ(H|F|G|E)?$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLBRREP(H|F|G)?$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTBR(H|F|G|Q)?$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTER(H|F|G)?$")>;
+def : InstRW<[WLat1, FXb, LSU, VecXsPm, Cracked], (instregex "VSTEBRH$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTEBR(F|G)$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Selects and permutes
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMRH(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMRL(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPERM$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPDI$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VBPERM$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VREP(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSEL$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Widening and narrowing
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPK(F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPKS(F|G|H)?$")>;
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VPKS(F|G|H)S$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPKLS(F|G|H)?$")>;
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VPKLS(F|G|H)S$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSEG(B|F|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VUPH(B|F|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VUPL(B|F)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VUPLH(B|F|H|W)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VUPLL(B|F|H)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Integer arithmetic
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VA(B|F|G|H|Q|C|CQ)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VACC(B|F|G|H|Q|C|CQ)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VAVG(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VAVGL(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VN(C|O|N|X)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VO(C)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VCKSM$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCLZ(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCTZ(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VX$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VGFM?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VGFMA(B|F|G|H)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VGFM(B|F|G|H)$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VLC(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VLP(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMX(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMXL(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMN(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMNL(B|F|G|H)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMAL(B|F)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMALE(B|F|H)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMALH(B|F|H|W)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMALO(B|F|H)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMAO(B|F|H)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMAE(B|F|H)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMAH(B|F|H)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VME(B|F|H)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMH(B|F|H)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VML(B|F)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMLE(B|F|H)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMLH(B|F|H|W)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMLO(B|F|H)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMO(B|F|H)?$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VMSL(G)?$")>;
+
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPOPCT(B|F|G|H)?$")>;
+
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VERLL(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VERLLV(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VERIM(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESL(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESLV(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESRA(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESRAV(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESRL(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESRLV(B|F|G|H)?$")>;
+
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSL(DB)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSLB$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSR(A|L)$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSR(A|L)B$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSLD$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSRD$")>;
+
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSB(I|IQ|CBI|CBIQ)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSCBI(B|F|G|H|Q)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VS(F|G|H|Q)?$")>;
+
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VSUM(B|H)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VSUMG(F|H)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VSUMQ(F|G)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Integer comparison
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "VEC(B|F|G|H)?$")>;
+def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "VECL(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCEQ(B|F|G|H)?$")>;
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VCEQ(B|F|G|H)S$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCH(B|F|G|H)?$")>;
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VCH(B|F|G|H)S$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCHL(B|F|G|H)?$")>;
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VCHL(B|F|G|H)S$")>;
+def : InstRW<[WLat4, VecStr, NormalGr], (instregex "VTM$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Floating-point arithmetic
+//===----------------------------------------------------------------------===//
+
+// Conversion and rounding
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VCFP(S|L)$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VCD(L)?G$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VCD(L)?GB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WCD(L)?GB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VCE(L)?FB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WCE(L)?FB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VC(S|L)FP$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VC(L)?GD$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VC(L)?GDB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WC(L)?GDB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VC(L)?FEB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WC(L)?FEB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VL(DE|ED)$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VL(DE|ED)B$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WL(DE|ED)B$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFL(L|R)$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFL(LS|RD)$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WFL(LS|RD)$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WFLLD$")>;
+def : InstRW<[WLat10, VecDF2, NormalGr], (instregex "WFLRX$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFI(DB)?$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WFIDB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFISB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WFISB$")>;
+def : InstRW<[WLat10, VecDF2, NormalGr], (instregex "WFIXB$")>;
+
+// Sign operations
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VFPSO$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "(V|W)FPSODB$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "(V|W)FPSOSB$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFPSOXB$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "(V|W)FL(C|N|P)DB$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "(V|W)FL(C|N|P)SB$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFL(C|N|P)XB$")>;
+
+// Minimum / maximum
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(MAX|MIN)$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(MAX|MIN)DB$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WF(MAX|MIN)DB$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(MAX|MIN)SB$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WF(MAX|MIN)SB$")>;
+def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "WF(MAX|MIN)XB$")>;
+
+// Test data class
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VFTCI$")>;
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "(V|W)FTCIDB$")>;
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "(V|W)FTCISB$")>;
+def : InstRW<[WLat3, WLat3, VecDFX, NormalGr], (instregex "WFTCIXB$")>;
+
+// Add / subtract
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(A|S)$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(A|S)DB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WF(A|S)DB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(A|S)SB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WF(A|S)SB$")>;
+def : InstRW<[WLat10, VecDF2, NormalGr], (instregex "WF(A|S)XB$")>;
+
+// Multiply / multiply-and-add/subtract
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFM(DB)?$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WFM(D|S)B$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFMSB$")>;
+def : InstRW<[WLat20, VecDF2, NormalGr], (instregex "WFMXB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(N)?M(A|S)$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(N)?M(A|S)DB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WF(N)?M(A|S)DB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(N)?M(A|S)SB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WF(N)?M(A|S)SB$")>;
+def : InstRW<[WLat30, VecDF2, NormalGr], (instregex "WF(N)?M(A|S)XB$")>;
+
+// Divide / square root
+def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "VFD$")>;
+def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "(V|W)FDDB$")>;
+def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "(V|W)FDSB$")>;
+def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "WFDXB$")>;
+def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "VFSQ$")>;
+def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "(V|W)FSQDB$")>;
+def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "(V|W)FSQSB$")>;
+def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "WFSQXB$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Floating-point comparison
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(C|K)(E|H|HE)$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(C|K)(E|H|HE)DB$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFC(E|H|HE)DB$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFK(E|H|HE)DB$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(C|K)(E|H|HE)SB$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFC(E|H|HE)SB$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFK(E|H|HE)SB$")>;
+def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "WFC(E|H|HE)XB$")>;
+def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "WFK(E|H|HE)XB$")>;
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VFC(E|H|HE)DBS$")>;
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VFK(E|H|HE)DBS$")>;
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr],
+ (instregex "WF(C|K)(E|H|HE)DBS$")>;
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr],
+ (instregex "VF(C|K)(E|H|HE)SBS$")>;
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "WFC(E|H|HE)SBS$")>;
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "WFK(E|H|HE)SBS$")>;
+def : InstRW<[WLat3, WLat3, VecDFX, NormalGr], (instregex "WFC(E|H|HE)XBS$")>;
+def : InstRW<[WLat3, WLat3, VecDFX, NormalGr], (instregex "WFK(E|H|HE)XBS$")>;
+def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "WF(C|K)$")>;
+def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "WF(C|K)DB$")>;
+def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "WF(C|K)SB$")>;
+def : InstRW<[WLat3, VecDFX, NormalGr], (instregex "WF(C|K)XB$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Floating-point insertion and extraction
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "LEFR$")>;
+def : InstRW<[WLat3, FXb, NormalGr], (instregex "LFER$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: String instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFAE(B)?$")>;
+def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFAE(F|H)$")>;
+def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VFAE(B|F|H)S$")>;
+def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFAEZ(B|F|H)$")>;
+def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VFAEZ(B|F|H)S$")>;
+def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFEE(B|F|H|ZB|ZF|ZH)?$")>;
+def : InstRW<[WLat4, WLat4, VecStr, NormalGr],
+ (instregex "VFEE(B|F|H|ZB|ZF|ZH)S$")>;
+def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFENE(B|F|H|ZB|ZF|ZH)?$")>;
+def : InstRW<[WLat4, WLat4, VecStr, NormalGr],
+ (instregex "VFENE(B|F|H|ZB|ZF|ZH)S$")>;
+def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VISTR(B|F|H)?$")>;
+def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VISTR(B|F|H)S$")>;
+def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VSTRC(B|F|H)?$")>;
+def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VSTRC(B|F|H)S$")>;
+def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VSTRCZ(B|F|H)$")>;
+def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VSTRCZ(B|F|H)S$")>;
+def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VSTRS(B|F|H)?$")>;
+def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VSTRSZ(B|F|H)$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Packed-decimal instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "VLIP$")>;
+def : InstRW<[WLat6, VecDFX, LSU, GroupAlone2], (instregex "VPKZ$")>;
+def : InstRW<[WLat1, VecDFX, FXb, LSU2, GroupAlone2], (instregex "VUPKZ$")>;
+def : InstRW<[WLat20, WLat20, VecDF2, FXb, GroupAlone],
+ (instregex "VCVB(G)?(Opt)?$")>;
+def : InstRW<[WLat15, WLat15, VecDF2, FXb, GroupAlone],
+ (instregex "VCVD(G)?$")>;
+def : InstRW<[WLat4, WLat4, VecDFX, NormalGr], (instregex "V(A|S)P$")>;
+def : InstRW<[WLat30, WLat30, VecDF2, GroupAlone], (instregex "VM(S)?P$")>;
+def : InstRW<[WLat30, WLat30, VecDF2, GroupAlone], (instregex "V(D|R)P$")>;
+def : InstRW<[WLat30, WLat30, VecDF2, GroupAlone], (instregex "VSDP$")>;
+def : InstRW<[WLat10, WLat10, VecDF2, NormalGr], (instregex "VSRP$")>;
+def : InstRW<[WLat4, WLat4, VecDFX, NormalGr], (instregex "VPSOP$")>;
+def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "V(T|C)P$")>;
+
+
+// -------------------------------- System ---------------------------------- //
+
+//===----------------------------------------------------------------------===//
+// System: Program-Status Word Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat30, WLat30, MCD], (instregex "EPSW$")>;
+def : InstRW<[WLat20, GroupAlone3], (instregex "LPSW(E)?$")>;
+def : InstRW<[WLat3, FXa, GroupAlone], (instregex "IPK$")>;
+def : InstRW<[WLat1, LSU, EndGroup], (instregex "SPKA$")>;
+def : InstRW<[WLat1, LSU, EndGroup], (instregex "SSM$")>;
+def : InstRW<[WLat1, FXb, LSU, GroupAlone], (instregex "ST(N|O)SM$")>;
+def : InstRW<[WLat3, FXa, NormalGr], (instregex "IAC$")>;
+def : InstRW<[WLat1, LSU, EndGroup], (instregex "SAC(F)?$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Control Register Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat4LSU, WLat4LSU, LSU2, GroupAlone], (instregex "LCTL(G)?$")>;
+def : InstRW<[WLat1, LSU5, FXb, GroupAlone2], (instregex "STCT(L|G)$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "E(P|S)A(I)?R$")>;
+def : InstRW<[WLat30, MCD], (instregex "SSA(I)?R$")>;
+def : InstRW<[WLat30, MCD], (instregex "ESEA$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Prefix-Register Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat30, MCD], (instregex "S(T)?PX$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Storage-Key and Real Memory Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat30, MCD], (instregex "ISKE$")>;
+def : InstRW<[WLat30, MCD], (instregex "IVSK$")>;
+def : InstRW<[WLat30, MCD], (instregex "SSKE(Opt)?$")>;
+def : InstRW<[WLat30, MCD], (instregex "RRB(E|M)$")>;
+def : InstRW<[WLat30, MCD], (instregex "IRBM$")>;
+def : InstRW<[WLat30, MCD], (instregex "PFMF$")>;
+def : InstRW<[WLat30, WLat30, MCD], (instregex "TB$")>;
+def : InstRW<[WLat30, MCD], (instregex "PGIN$")>;
+def : InstRW<[WLat30, MCD], (instregex "PGOUT$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Dynamic-Address-Translation Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat30, MCD], (instregex "IPTE(Opt)?(Opt)?$")>;
+def : InstRW<[WLat30, MCD], (instregex "IDTE(Opt)?$")>;
+def : InstRW<[WLat30, MCD], (instregex "CRDTE(Opt)?$")>;
+def : InstRW<[WLat30, MCD], (instregex "PTLB$")>;
+def : InstRW<[WLat30, WLat30, MCD], (instregex "CSP(G)?$")>;
+def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "LPTEA$")>;
+def : InstRW<[WLat30, WLat30, MCD], (instregex "LRA(Y|G)?$")>;
+def : InstRW<[WLat30, MCD], (instregex "STRAG$")>;
+def : InstRW<[WLat30, MCD], (instregex "LURA(G)?$")>;
+def : InstRW<[WLat30, MCD], (instregex "STUR(A|G)$")>;
+def : InstRW<[WLat30, MCD], (instregex "TPROT$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Memory-move Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat4LSU, FXa2, FXb, LSU5, GroupAlone2], (instregex "MVC(K|P|S)$")>;
+def : InstRW<[WLat1, FXa, LSU5, GroupAlone2], (instregex "MVC(S|D)K$")>;
+def : InstRW<[WLat30, MCD], (instregex "MVCOS$")>;
+def : InstRW<[WLat30, MCD], (instregex "MVPG$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Address-Space Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat30, MCD], (instregex "LASP$")>;
+def : InstRW<[WLat1, LSU, GroupAlone], (instregex "PALB$")>;
+def : InstRW<[WLat30, MCD], (instregex "PC$")>;
+def : InstRW<[WLat30, MCD], (instregex "PR$")>;
+def : InstRW<[WLat30, MCD], (instregex "PT(I)?$")>;
+def : InstRW<[WLat30, MCD], (instregex "RP$")>;
+def : InstRW<[WLat30, MCD], (instregex "BS(G|A)$")>;
+def : InstRW<[WLat30, MCD], (instregex "TAR$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Linkage-Stack Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat30, MCD], (instregex "BAKR$")>;
+def : InstRW<[WLat30, MCD], (instregex "EREG(G)?$")>;
+def : InstRW<[WLat30, WLat30, MCD], (instregex "(E|M)STA$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Time-Related Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat30, MCD], (instregex "PTFF$")>;
+def : InstRW<[WLat30, MCD], (instregex "SCK(PF|C)?$")>;
+def : InstRW<[WLat1, LSU2, GroupAlone], (instregex "SPT$")>;
+def : InstRW<[WLat15, LSU3, FXa2, FXb, GroupAlone2], (instregex "STCK(F)?$")>;
+def : InstRW<[WLat20, LSU4, FXa2, FXb2, GroupAlone3], (instregex "STCKE$")>;
+def : InstRW<[WLat30, MCD], (instregex "STCKC$")>;
+def : InstRW<[WLat1, LSU2, FXb, Cracked], (instregex "STPT$")>;
+
+//===----------------------------------------------------------------------===//
+// System: CPU-Related Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat30, MCD], (instregex "STAP$")>;
+def : InstRW<[WLat30, MCD], (instregex "STIDP$")>;
+def : InstRW<[WLat30, WLat30, MCD], (instregex "STSI$")>;
+def : InstRW<[WLat30, WLat30, MCD], (instregex "STFL(E)?$")>;
+def : InstRW<[WLat30, MCD], (instregex "ECAG$")>;
+def : InstRW<[WLat30, WLat30, MCD], (instregex "ECTG$")>;
+def : InstRW<[WLat30, MCD], (instregex "PTF$")>;
+def : InstRW<[WLat30, MCD], (instregex "PCKMO$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Miscellaneous Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat30, MCD], (instregex "SVC$")>;
+def : InstRW<[WLat1, FXb, GroupAlone], (instregex "MC$")>;
+def : InstRW<[WLat30, MCD], (instregex "DIAG$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "TRAC(E|G)$")>;
+def : InstRW<[WLat30, MCD], (instregex "TRAP(2|4)$")>;
+def : InstRW<[WLat30, MCD], (instregex "SIG(P|A)$")>;
+def : InstRW<[WLat30, MCD], (instregex "SIE$")>;
+
+//===----------------------------------------------------------------------===//
+// System: CPU-Measurement Facility Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "LPP$")>;
+def : InstRW<[WLat30, WLat30, MCD], (instregex "ECPGA$")>;
+def : InstRW<[WLat30, WLat30, MCD], (instregex "E(C|P)CTR$")>;
+def : InstRW<[WLat30, MCD], (instregex "LCCTL$")>;
+def : InstRW<[WLat30, MCD], (instregex "L(P|S)CTL$")>;
+def : InstRW<[WLat30, MCD], (instregex "Q(S|CTR)I$")>;
+def : InstRW<[WLat30, MCD], (instregex "S(C|P)CTR$")>;
+
+//===----------------------------------------------------------------------===//
+// System: I/O Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat30, MCD], (instregex "(C|H|R|X)SCH$")>;
+def : InstRW<[WLat30, MCD], (instregex "(M|S|ST|T)SCH$")>;
+def : InstRW<[WLat30, MCD], (instregex "RCHP$")>;
+def : InstRW<[WLat30, MCD], (instregex "SCHM$")>;
+def : InstRW<[WLat30, MCD], (instregex "STC(PS|RW)$")>;
+def : InstRW<[WLat30, MCD], (instregex "TPI$")>;
+def : InstRW<[WLat30, MCD], (instregex "SAL$")>;
+
+}
+
diff --git a/lib/Target/SystemZ/SystemZScheduleZ13.td b/lib/Target/SystemZ/SystemZScheduleZ13.td
index 74e1dad87908..b3266051da4e 100644
--- a/lib/Target/SystemZ/SystemZScheduleZ13.td
+++ b/lib/Target/SystemZ/SystemZScheduleZ13.td
@@ -1,9 +1,8 @@
//-- SystemZScheduleZ13.td - SystemZ Scheduling Definitions ----*- tblgen -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -1192,8 +1191,8 @@ def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VLEI(B|F|G|H)$")>;
// Vector: Loads
//===----------------------------------------------------------------------===//
-def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(BB)?$")>;
-def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLL$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(Align)?$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(L|BB)$")>;
def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(32|64)$")>;
def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLLEZ(B|F|G|H)?$")>;
def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLREP(B|F|G|H)?$")>;
@@ -1201,16 +1200,17 @@ def : InstRW<[WLat2LSU, RegReadAdv, VecXsPm, LSU, NormalGr],
(instregex "VLE(B|F|G|H)$")>;
def : InstRW<[WLat6LSU, RegReadAdv, FXb, LSU, VecXsPm, Cracked],
(instregex "VGE(F|G)$")>;
-def : InstRW<[WLat4LSU, WLat4LSU, LSU5, GroupAlone], (instregex "VLM$")>;
+def : InstRW<[WLat4LSU, WLat4LSU, LSU5, GroupAlone],
+ (instregex "VLM(Align)?$")>;
//===----------------------------------------------------------------------===//
// Vector: Stores
//===----------------------------------------------------------------------===//
-def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VST(L|32|64)?$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VST(Align|L|32|64)?$")>;
def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTE(F|G)$")>;
def : InstRW<[WLat1, FXb, LSU, VecXsPm, Cracked], (instregex "VSTE(B|H)$")>;
-def : InstRW<[WLat1, LSU2, FXb3, GroupAlone2], (instregex "VSTM$")>;
+def : InstRW<[WLat1, LSU2, FXb3, GroupAlone2], (instregex "VSTM(Align)?$")>;
def : InstRW<[WLat1, FXb2, LSU, Cracked], (instregex "VSCE(F|G)$")>;
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/SystemZ/SystemZScheduleZ14.td b/lib/Target/SystemZ/SystemZScheduleZ14.td
index 1962fdf3a1d1..df7282a2961b 100644
--- a/lib/Target/SystemZ/SystemZScheduleZ14.td
+++ b/lib/Target/SystemZ/SystemZScheduleZ14.td
@@ -1,9 +1,8 @@
//-- SystemZScheduleZ14.td - SystemZ Scheduling Definitions ----*- tblgen -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -1210,8 +1209,8 @@ def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VLEI(B|F|G|H)$")>;
// Vector: Loads
//===----------------------------------------------------------------------===//
-def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(BB)?$")>;
-def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLL$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(Align)?$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(L|BB)$")>;
def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(32|64)$")>;
def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLLEZ(B|F|G|H|LF)?$")>;
def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLREP(B|F|G|H)?$")>;
@@ -1219,17 +1218,18 @@ def : InstRW<[WLat2LSU, RegReadAdv, VecXsPm, LSU, NormalGr],
(instregex "VLE(B|F|G|H)$")>;
def : InstRW<[WLat5LSU, RegReadAdv, FXb, LSU, VecXsPm, Cracked],
(instregex "VGE(F|G)$")>;
-def : InstRW<[WLat4LSU, WLat4LSU, LSU5, GroupAlone], (instregex "VLM$")>;
+def : InstRW<[WLat4LSU, WLat4LSU, LSU5, GroupAlone],
+ (instregex "VLM(Align)?$")>;
def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLRL(R)?$")>;
//===----------------------------------------------------------------------===//
// Vector: Stores
//===----------------------------------------------------------------------===//
-def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VST(L|32|64)?$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VST(Align|L|32|64)?$")>;
def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTE(F|G)$")>;
def : InstRW<[WLat1, FXb, LSU, VecXsPm, Cracked], (instregex "VSTE(B|H)$")>;
-def : InstRW<[WLat1, LSU2, FXb3, GroupAlone2], (instregex "VSTM$")>;
+def : InstRW<[WLat1, LSU2, FXb3, GroupAlone2], (instregex "VSTM(Align)?$")>;
def : InstRW<[WLat1, FXb2, LSU, Cracked], (instregex "VSCE(F|G)$")>;
def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTRL(R)?$")>;
diff --git a/lib/Target/SystemZ/SystemZScheduleZ196.td b/lib/Target/SystemZ/SystemZScheduleZ196.td
index 7535739f813a..ca714ef1a702 100644
--- a/lib/Target/SystemZ/SystemZScheduleZ196.td
+++ b/lib/Target/SystemZ/SystemZScheduleZ196.td
@@ -1,9 +1,8 @@
//=- SystemZScheduleZ196.td - SystemZ Scheduling Definitions ---*- tblgen -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/SystemZ/SystemZScheduleZEC12.td b/lib/Target/SystemZ/SystemZScheduleZEC12.td
index a21d2c4cef70..fb226be678da 100644
--- a/lib/Target/SystemZ/SystemZScheduleZEC12.td
+++ b/lib/Target/SystemZ/SystemZScheduleZEC12.td
@@ -1,9 +1,8 @@
//=- SystemZScheduleZEC12.td - SystemZ Scheduling Definitions --*- tblgen -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
index e0d7bca9a94b..a50e6aa59711 100644
--- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
+++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
@@ -1,9 +1,8 @@
//===-- SystemZSelectionDAGInfo.cpp - SystemZ SelectionDAG Info -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -164,17 +163,17 @@ static SDValue emitCLC(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain,
}
// Convert the current CC value into an integer that is 0 if CC == 0,
-// less than zero if CC == 1 and greater than zero if CC >= 2.
+// greater than zero if CC == 1 and less than zero if CC >= 2.
// The sequence starts with IPM, which puts CC into bits 29 and 28
// of an integer and clears bits 30 and 31.
static SDValue addIPMSequence(const SDLoc &DL, SDValue CCReg,
SelectionDAG &DAG) {
SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, CCReg);
- SDValue SRL = DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
- DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32));
- SDValue ROTL = DAG.getNode(ISD::ROTL, DL, MVT::i32, SRL,
- DAG.getConstant(31, DL, MVT::i32));
- return ROTL;
+ SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, IPM,
+ DAG.getConstant(30 - SystemZ::IPM_CC, DL, MVT::i32));
+ SDValue SRA = DAG.getNode(ISD::SRA, DL, MVT::i32, SHL,
+ DAG.getConstant(30, DL, MVT::i32));
+ return SRA;
}
std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::EmitTargetCodeForMemcmp(
@@ -184,7 +183,8 @@ std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::EmitTargetCodeForMemcmp(
if (auto *CSize = dyn_cast<ConstantSDNode>(Size)) {
uint64_t Bytes = CSize->getZExtValue();
assert(Bytes > 0 && "Caller should have handled 0-size case");
- SDValue CCReg = emitCLC(DAG, DL, Chain, Src1, Src2, Bytes);
+ // Swap operands to invert CC == 1 vs. CC == 2 cases.
+ SDValue CCReg = emitCLC(DAG, DL, Chain, Src2, Src1, Bytes);
Chain = CCReg.getValue(1);
return std::make_pair(addIPMSequence(DL, CCReg, DAG), Chain);
}
@@ -232,7 +232,8 @@ std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::EmitTargetCodeForStrcmp(
SDValue Src2, MachinePointerInfo Op1PtrInfo,
MachinePointerInfo Op2PtrInfo) const {
SDVTList VTs = DAG.getVTList(Src1.getValueType(), MVT::i32, MVT::Other);
- SDValue Unused = DAG.getNode(SystemZISD::STRCMP, DL, VTs, Chain, Src1, Src2,
+ // Swap operands to invert CC == 1 vs. CC == 2 cases.
+ SDValue Unused = DAG.getNode(SystemZISD::STRCMP, DL, VTs, Chain, Src2, Src1,
DAG.getConstant(0, DL, MVT::i32));
SDValue CCReg = Unused.getValue(1);
Chain = Unused.getValue(2);
diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.h b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
index 93cd970c30c6..7d63bae83cf3 100644
--- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
+++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
@@ -1,9 +1,8 @@
//===-- SystemZSelectionDAGInfo.h - SystemZ SelectionDAG Info ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/SystemZ/SystemZShortenInst.cpp b/lib/Target/SystemZ/SystemZShortenInst.cpp
index 195fa20a2c90..e79dfc5b4b9e 100644
--- a/lib/Target/SystemZ/SystemZShortenInst.cpp
+++ b/lib/Target/SystemZ/SystemZShortenInst.cpp
@@ -1,9 +1,8 @@
//===-- SystemZShortenInst.cpp - Instruction-shortening pass --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -47,6 +46,7 @@ private:
bool shortenOn001(MachineInstr &MI, unsigned Opcode);
bool shortenOn001AddCC(MachineInstr &MI, unsigned Opcode);
bool shortenFPConv(MachineInstr &MI, unsigned Opcode);
+ bool shortenSelect(MachineInstr &MI, unsigned Opcode);
const SystemZInstrInfo *TII;
const TargetRegisterInfo *TRI;
@@ -176,6 +176,23 @@ bool SystemZShortenInst::shortenFPConv(MachineInstr &MI, unsigned Opcode) {
return false;
}
+// MI is a three-operand select instruction. If one of the sources match
+// the destination, convert to the equivalent load-on-condition.
+bool SystemZShortenInst::shortenSelect(MachineInstr &MI, unsigned Opcode) {
+ if (MI.getOperand(0).getReg() == MI.getOperand(1).getReg()) {
+ MI.setDesc(TII->get(Opcode));
+ MI.tieOperands(0, 1);
+ return true;
+ }
+ if (MI.getOperand(0).getReg() == MI.getOperand(2).getReg()) {
+ TII->commuteInstruction(MI, false, 1, 2);
+ MI.setDesc(TII->get(Opcode));
+ MI.tieOperands(0, 1);
+ return true;
+ }
+ return false;
+}
+
// Process all instructions in MBB. Return true if something changed.
bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) {
bool Changed = false;
@@ -196,6 +213,18 @@ bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) {
Changed |= shortenIIF(MI, SystemZ::LLIHL, SystemZ::LLIHH);
break;
+ case SystemZ::SELR:
+ Changed |= shortenSelect(MI, SystemZ::LOCR);
+ break;
+
+ case SystemZ::SELFHR:
+ Changed |= shortenSelect(MI, SystemZ::LOCFHR);
+ break;
+
+ case SystemZ::SELGR:
+ Changed |= shortenSelect(MI, SystemZ::LOCGR);
+ break;
+
case SystemZ::WFADB:
Changed |= shortenOn001AddCC(MI, SystemZ::ADBR);
break;
@@ -300,6 +329,31 @@ bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) {
case SystemZ::VST64:
Changed |= shortenOn0(MI, SystemZ::STD);
break;
+
+ default: {
+ int TwoOperandOpcode = SystemZ::getTwoOperandOpcode(MI.getOpcode());
+ if (TwoOperandOpcode == -1)
+ break;
+
+ if ((MI.getOperand(0).getReg() != MI.getOperand(1).getReg()) &&
+ (!MI.isCommutable() ||
+ MI.getOperand(0).getReg() != MI.getOperand(2).getReg() ||
+ !TII->commuteInstruction(MI, false, 1, 2)))
+ break;
+
+ MI.setDesc(TII->get(TwoOperandOpcode));
+ MI.tieOperands(0, 1);
+ if (TwoOperandOpcode == SystemZ::SLL ||
+ TwoOperandOpcode == SystemZ::SLA ||
+ TwoOperandOpcode == SystemZ::SRL ||
+ TwoOperandOpcode == SystemZ::SRA) {
+ // These shifts only use the low 6 bits of the shift count.
+ MachineOperand &ImmMO = MI.getOperand(3);
+ ImmMO.setImm(ImmMO.getImm() & 0xfff);
+ }
+ Changed = true;
+ break;
+ }
}
LiveRegs.stepBackward(MI);
diff --git a/lib/Target/SystemZ/SystemZSubtarget.cpp b/lib/Target/SystemZ/SystemZSubtarget.cpp
index fb030a207bc7..5e8af81842c4 100644
--- a/lib/Target/SystemZ/SystemZSubtarget.cpp
+++ b/lib/Target/SystemZ/SystemZSubtarget.cpp
@@ -1,9 +1,8 @@
//===-- SystemZSubtarget.cpp - SystemZ subtarget information --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -56,6 +55,9 @@ SystemZSubtarget::SystemZSubtarget(const Triple &TT, const std::string &CPU,
HasMessageSecurityAssist7(false), HasMessageSecurityAssist8(false),
HasVectorEnhancements1(false), HasVectorPackedDecimal(false),
HasInsertReferenceBitsMultiple(false),
+ HasMiscellaneousExtensions3(false), HasMessageSecurityAssist9(false),
+ HasVectorEnhancements2(false), HasVectorPackedDecimalEnhancement(false),
+ HasEnhancedSort(false), HasDeflateConversion(false),
TargetTriple(TT), InstrInfo(initializeSubtargetDependencies(CPU, FS)),
TLInfo(TM, *this), TSInfo(), FrameLowering() {}
diff --git a/lib/Target/SystemZ/SystemZSubtarget.h b/lib/Target/SystemZ/SystemZSubtarget.h
index cb6b21a1d465..fa3f65d93c91 100644
--- a/lib/Target/SystemZ/SystemZSubtarget.h
+++ b/lib/Target/SystemZ/SystemZSubtarget.h
@@ -1,9 +1,8 @@
//===-- SystemZSubtarget.h - SystemZ subtarget information -----*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -63,6 +62,12 @@ protected:
bool HasVectorEnhancements1;
bool HasVectorPackedDecimal;
bool HasInsertReferenceBitsMultiple;
+ bool HasMiscellaneousExtensions3;
+ bool HasMessageSecurityAssist9;
+ bool HasVectorEnhancements2;
+ bool HasVectorPackedDecimalEnhancement;
+ bool HasEnhancedSort;
+ bool HasDeflateConversion;
private:
Triple TargetTriple;
@@ -210,6 +215,30 @@ public:
return HasInsertReferenceBitsMultiple;
}
+ // Return true if the target has the miscellaneous-extensions facility 3.
+ bool hasMiscellaneousExtensions3() const {
+ return HasMiscellaneousExtensions3;
+ }
+
+ // Return true if the target has the message-security-assist
+ // extension facility 9.
+ bool hasMessageSecurityAssist9() const { return HasMessageSecurityAssist9; }
+
+ // Return true if the target has the vector-enhancements facility 2.
+ bool hasVectorEnhancements2() const { return HasVectorEnhancements2; }
+
+ // Return true if the target has the vector-packed-decimal
+ // enhancement facility.
+ bool hasVectorPackedDecimalEnhancement() const {
+ return HasVectorPackedDecimalEnhancement;
+ }
+
+ // Return true if the target has the enhanced-sort facility.
+ bool hasEnhancedSort() const { return HasEnhancedSort; }
+
+ // Return true if the target has the deflate-conversion facility.
+ bool hasDeflateConversion() const { return HasDeflateConversion; }
+
// Return true if GV can be accessed using LARL for reloc model RM
// and code model CM.
bool isPC32DBLSymbol(const GlobalValue *GV, CodeModel::Model CM) const;
diff --git a/lib/Target/SystemZ/SystemZTDC.cpp b/lib/Target/SystemZ/SystemZTDC.cpp
index 5dbd23d420a3..478848c30701 100644
--- a/lib/Target/SystemZ/SystemZTDC.cpp
+++ b/lib/Target/SystemZ/SystemZTDC.cpp
@@ -1,9 +1,8 @@
//===-- SystemZTDC.cpp - Utilize Test Data Class instruction --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -356,8 +355,8 @@ bool SystemZTDCPass::runOnFunction(Function &F) {
if (!Worthy)
continue;
// Call the intrinsic, compare result with 0.
- Value *TDCFunc = Intrinsic::getDeclaration(&M, Intrinsic::s390_tdc,
- V->getType());
+ Function *TDCFunc =
+ Intrinsic::getDeclaration(&M, Intrinsic::s390_tdc, V->getType());
IRBuilder<> IRB(I);
Value *MaskVal = ConstantInt::get(Type::getInt64Ty(Ctx), Mask);
Instruction *TDC = IRB.CreateCall(TDCFunc, {V, MaskVal});
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp
index 9596a2b6388d..5c49e6eff0bf 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -1,9 +1,8 @@
//===-- SystemZTargetMachine.cpp - Define TargetMachine for SystemZ -------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -12,6 +11,7 @@
#include "SystemZ.h"
#include "SystemZMachineScheduler.h"
#include "SystemZTargetTransformInfo.h"
+#include "TargetInfo/SystemZTargetInfo.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
@@ -133,9 +133,9 @@ getEffectiveSystemZCodeModel(Optional<CodeModel::Model> CM, Reloc::Model RM,
bool JIT) {
if (CM) {
if (*CM == CodeModel::Tiny)
- report_fatal_error("Target does not support the tiny CodeModel");
+ report_fatal_error("Target does not support the tiny CodeModel", false);
if (*CM == CodeModel::Kernel)
- report_fatal_error("Target does not support the kernel CodeModel");
+ report_fatal_error("Target does not support the kernel CodeModel", false);
return *CM;
}
if (JIT)
@@ -183,6 +183,7 @@ public:
void addIRPasses() override;
bool addInstSelector() override;
bool addILPOpts() override;
+ void addPostRewrite() override;
void addPreSched2() override;
void addPreEmitPass() override;
};
@@ -212,7 +213,16 @@ bool SystemZPassConfig::addILPOpts() {
return true;
}
+void SystemZPassConfig::addPostRewrite() {
+ addPass(createSystemZPostRewritePass(getSystemZTargetMachine()));
+}
+
void SystemZPassConfig::addPreSched2() {
+ // PostRewrite needs to be run at -O0 also (in which case addPostRewrite()
+ // is not called).
+ if (getOptLevel() == CodeGenOpt::None)
+ addPass(createSystemZPostRewritePass(getSystemZTargetMachine()));
+
addPass(createSystemZExpandPseudoPass(getSystemZTargetMachine()));
if (getOptLevel() != CodeGenOpt::None)
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.h b/lib/Target/SystemZ/SystemZTargetMachine.h
index 52bf8bba55de..ac04a080f580 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.h
+++ b/lib/Target/SystemZ/SystemZTargetMachine.h
@@ -1,9 +1,8 @@
//=- SystemZTargetMachine.h - Define TargetMachine for SystemZ ----*- C++ -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index 129610fe095b..145cf87ef9f5 100644
--- a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -1,9 +1,8 @@
//===-- SystemZTargetTransformInfo.cpp - SystemZ-specific TTI -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -467,6 +466,27 @@ int SystemZTTIImpl::getArithmeticInstrCost(
if (Opcode == Instruction::FRem)
return LIBCALL_COST;
+ // Give discount for some combined logical operations if supported.
+ if (Args.size() == 2 && ST->hasMiscellaneousExtensions3()) {
+ if (Opcode == Instruction::Xor) {
+ for (const Value *A : Args) {
+ if (const Instruction *I = dyn_cast<Instruction>(A))
+ if (I->hasOneUse() &&
+ (I->getOpcode() == Instruction::And ||
+ I->getOpcode() == Instruction::Or ||
+ I->getOpcode() == Instruction::Xor))
+ return 0;
+ }
+ }
+ else if (Opcode == Instruction::Or || Opcode == Instruction::And) {
+ for (const Value *A : Args) {
+ if (const Instruction *I = dyn_cast<Instruction>(A))
+ if (I->hasOneUse() && I->getOpcode() == Instruction::Xor)
+ return 0;
+ }
+ }
+ }
+
// Or requires one instruction, although it has custom handling for i64.
if (Opcode == Instruction::Or)
return 1;
@@ -687,9 +707,9 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
// TODO: Fix base implementation which could simplify things a bit here
// (seems to miss on differentiating on scalar/vector types).
- // Only 64 bit vector conversions are natively supported.
- if (DstScalarBits == 64) {
- if (SrcScalarBits == 64)
+ // Only 64 bit vector conversions are natively supported before arch13.
+ if (DstScalarBits == 64 || ST->hasVectorEnhancements2()) {
+ if (SrcScalarBits == DstScalarBits)
return NumDstVectors;
if (SrcScalarBits == 1)
@@ -857,7 +877,7 @@ int SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
case Instruction::Select:
if (ValTy->isFloatingPointTy())
return 4; // No load on condition for FP - costs a conditional jump.
- return 1; // Load On Condition.
+ return 1; // Load On Condition / Select Register.
}
}
@@ -1010,7 +1030,8 @@ int SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
(Src->isVectorTy() ? getNumVectorRegs(Src) : getNumberOfParts(Src));
// Store/Load reversed saves one instruction.
- if (!Src->isVectorTy() && NumOps == 1 && I != nullptr) {
+ if (((!Src->isVectorTy() && NumOps == 1) || ST->hasVectorEnhancements2()) &&
+ I != nullptr) {
if (Opcode == Instruction::Load && I->hasOneUse()) {
const Instruction *LdUser = cast<Instruction>(*I->user_begin());
// In case of load -> bswap -> store, return normal cost for the load.
diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/lib/Target/SystemZ/SystemZTargetTransformInfo.h
index e79bee1ea3a8..16ce2ef1d7a0 100644
--- a/lib/Target/SystemZ/SystemZTargetTransformInfo.h
+++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.h
@@ -1,9 +1,8 @@
//===-- SystemZTargetTransformInfo.h - SystemZ-specific TTI ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp b/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp
index e2b9efd35d3e..713a55ee8400 100644
--- a/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp
+++ b/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp
@@ -1,13 +1,12 @@
//===-- SystemZTargetInfo.cpp - SystemZ target implementation -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-#include "SystemZ.h"
+#include "TargetInfo/SystemZTargetInfo.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.h b/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.h
new file mode 100644
index 000000000000..cad141c81e6b
--- /dev/null
+++ b/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.h
@@ -0,0 +1,20 @@
+//===-- SystemZTargetInfo.h - SystemZ target implementation -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_TARGETINFO_SYSTEMZTARGETINFO_H
+#define LLVM_LIB_TARGET_SYSTEMZ_TARGETINFO_SYSTEMZTARGETINFO_H
+
+namespace llvm {
+
+class Target;
+
+Target &getTheSystemZTarget();
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_SYSTEMZ_TARGETINFO_SYSTEMZTARGETINFO_H
diff --git a/lib/Target/Target.cpp b/lib/Target/Target.cpp
index f23ea72eb513..8a46c77492c5 100644
--- a/lib/Target/Target.cpp
+++ b/lib/Target/Target.cpp
@@ -1,9 +1,8 @@
//===-- Target.cpp --------------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/TargetIntrinsicInfo.cpp b/lib/Target/TargetIntrinsicInfo.cpp
index e8b71924e0d9..256514c8c22d 100644
--- a/lib/Target/TargetIntrinsicInfo.cpp
+++ b/lib/Target/TargetIntrinsicInfo.cpp
@@ -1,9 +1,8 @@
//===-- TargetIntrinsicInfo.cpp - Target Instruction Information ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp
index bb937923b47e..17274e1c2c6e 100644
--- a/lib/Target/TargetLoweringObjectFile.cpp
+++ b/lib/Target/TargetLoweringObjectFile.cpp
@@ -1,9 +1,8 @@
//===-- llvm/Target/TargetLoweringObjectFile.cpp - Object File Info -------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -48,6 +47,7 @@ void TargetLoweringObjectFile::Initialize(MCContext &ctx,
// Reset various EH DWARF encodings.
PersonalityEncoding = LSDAEncoding = TTypeEncoding = dwarf::DW_EH_PE_absptr;
+ CallSiteEncoding = dwarf::DW_EH_PE_uleb128;
}
TargetLoweringObjectFile::~TargetLoweringObjectFile() {
diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp
index 39d5705b2a53..634866d93570 100644
--- a/lib/Target/TargetMachine.cpp
+++ b/lib/Target/TargetMachine.cpp
@@ -1,9 +1,8 @@
//===-- TargetMachine.cpp - General Target Information ---------------------==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -145,6 +144,12 @@ bool TargetMachine::shouldAssumeDSOLocal(const Module &M,
isa<GlobalVariable>(GV))
return false;
+ // On COFF, don't mark 'extern_weak' symbols as DSO local. If these symbols
+ // remain unresolved in the link, they can be resolved to zero, which is
+ // outside the current DSO.
+ if (TT.isOSBinFormatCOFF() && GV && GV->hasExternalWeakLinkage())
+ return false;
+
// Every other GV is local on COFF.
// Make an exception for windows OS in the triple: Some firmware builds use
// *-win32-macho triples. This (accidentally?) produced windows relocations
@@ -168,7 +173,12 @@ bool TargetMachine::shouldAssumeDSOLocal(const Module &M,
return GV && GV->isStrongDefinitionForLinker();
}
- assert(TT.isOSBinFormatELF());
+ // Due to the AIX linkage model, any global with default visibility is
+ // considered non-local.
+ if (TT.isOSBinFormatXCOFF())
+ return false;
+
+ assert(TT.isOSBinFormatELF() || TT.isOSBinFormatWasm());
assert(RM != Reloc::DynamicNoPIC);
bool IsExecutable =
@@ -196,7 +206,7 @@ bool TargetMachine::shouldAssumeDSOLocal(const Module &M,
return true;
}
- // ELF supports preemption of other symbols.
+ // ELF & wasm support preemption of other symbols.
return false;
}
diff --git a/lib/Target/TargetMachineC.cpp b/lib/Target/TargetMachineC.cpp
index bae45ae28c45..5d9029682fdd 100644
--- a/lib/Target/TargetMachineC.cpp
+++ b/lib/Target/TargetMachineC.cpp
@@ -1,9 +1,8 @@
//===-- TargetMachine.cpp -------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp b/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
index 0a5908f43790..09628e872dd5 100644
--- a/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
+++ b/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
@@ -1,9 +1,8 @@
//==- WebAssemblyAsmParser.cpp - Assembler for WebAssembly -*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -16,12 +15,15 @@
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
#include "MCTargetDesc/WebAssemblyTargetStreamer.h"
+#include "TargetInfo/WebAssemblyTargetInfo.h"
#include "WebAssembly.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/MC/MCParser/MCTargetAsmParser.h"
+#include "llvm/MC/MCSectionWasm.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
@@ -87,9 +89,8 @@ struct WebAssemblyOperand : public MCParsedAsmOperand {
}
bool isToken() const override { return Kind == Token; }
- bool isImm() const override {
- return Kind == Integer || Kind == Float || Kind == Symbol;
- }
+ bool isImm() const override { return Kind == Integer || Kind == Symbol; }
+ bool isFPImm() const { return Kind == Float; }
bool isMem() const override { return false; }
bool isReg() const override { return false; }
bool isBrList() const { return Kind == BrList; }
@@ -116,12 +117,18 @@ struct WebAssemblyOperand : public MCParsedAsmOperand {
assert(N == 1 && "Invalid number of operands!");
if (Kind == Integer)
Inst.addOperand(MCOperand::createImm(Int.Val));
- else if (Kind == Float)
- Inst.addOperand(MCOperand::createFPImm(Flt.Val));
else if (Kind == Symbol)
Inst.addOperand(MCOperand::createExpr(Sym.Exp));
else
- llvm_unreachable("Should be immediate or symbol!");
+ llvm_unreachable("Should be integer immediate or symbol!");
+ }
+
+ void addFPImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ if (Kind == Float)
+ Inst.addOperand(MCOperand::createFPImm(Flt.Val));
+ else
+ llvm_unreachable("Should be float immediate!");
}
void addBrListOperands(MCInst &Inst, unsigned N) const {
@@ -170,6 +177,8 @@ class WebAssemblyAsmParser final : public MCTargetAsmParser {
FunctionStart,
FunctionLocals,
Instructions,
+ EndFunction,
+ DataSection,
} CurrentState = FileStart;
// For ensuring blocks are properly nested.
@@ -187,6 +196,7 @@ class WebAssemblyAsmParser final : public MCTargetAsmParser {
// We track this to see if a .functype following a label is the same,
// as this is how we recognize the start of a function.
MCSymbol *LastLabel = nullptr;
+ MCSymbol *LastFunctionLabel = nullptr;
public:
WebAssemblyAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
@@ -250,13 +260,13 @@ public:
}
bool ensureEmptyNestingStack() {
- auto err = !NestingStack.empty();
+ auto Err = !NestingStack.empty();
while (!NestingStack.empty()) {
error(Twine("Unmatched block construct(s) at function end: ") +
nestingString(NestingStack.back()).first);
NestingStack.pop_back();
}
- return err;
+ return Err;
}
bool isNext(AsmToken::TokenKind Kind) {
@@ -298,6 +308,8 @@ public:
Type == "i32x4" || Type == "i64x2" || Type == "f32x4" ||
Type == "f64x2")
return wasm::ValType::V128;
+ if (Type == "exnref")
+ return wasm::ValType::EXNREF;
return Optional<wasm::ValType>();
}
@@ -308,7 +320,7 @@ public:
.Case("f32", WebAssembly::ExprType::F32)
.Case("f64", WebAssembly::ExprType::F64)
.Case("v128", WebAssembly::ExprType::V128)
- .Case("except_ref", WebAssembly::ExprType::ExceptRef)
+ .Case("exnref", WebAssembly::ExprType::Exnref)
.Case("void", WebAssembly::ExprType::Void)
.Default(WebAssembly::ExprType::Invalid);
}
@@ -317,7 +329,7 @@ public:
while (Lexer.is(AsmToken::Identifier)) {
auto Type = parseType(Lexer.getTok().getString());
if (!Type)
- return true;
+ return error("unknown type: ", Lexer.getTok());
Types.push_back(Type.getValue());
Parser.Lex();
if (!isNext(AsmToken::Comma))
@@ -337,27 +349,67 @@ public:
Parser.Lex();
}
- bool parseOperandStartingWithInteger(bool IsNegative, OperandVector &Operands,
- StringRef InstName) {
- parseSingleInteger(IsNegative, Operands);
+ bool parseSingleFloat(bool IsNegative, OperandVector &Operands) {
+ auto &Flt = Lexer.getTok();
+ double Val;
+ if (Flt.getString().getAsDouble(Val, false))
+ return error("Cannot parse real: ", Flt);
+ if (IsNegative)
+ Val = -Val;
+ Operands.push_back(make_unique<WebAssemblyOperand>(
+ WebAssemblyOperand::Float, Flt.getLoc(), Flt.getEndLoc(),
+ WebAssemblyOperand::FltOp{Val}));
+ Parser.Lex();
+ return false;
+ }
+
+ bool parseSpecialFloatMaybe(bool IsNegative, OperandVector &Operands) {
+ if (Lexer.isNot(AsmToken::Identifier))
+ return true;
+ auto &Flt = Lexer.getTok();
+ auto S = Flt.getString();
+ double Val;
+ if (S.compare_lower("infinity") == 0) {
+ Val = std::numeric_limits<double>::infinity();
+ } else if (S.compare_lower("nan") == 0) {
+ Val = std::numeric_limits<double>::quiet_NaN();
+ } else {
+ return true;
+ }
+ if (IsNegative)
+ Val = -Val;
+ Operands.push_back(make_unique<WebAssemblyOperand>(
+ WebAssemblyOperand::Float, Flt.getLoc(), Flt.getEndLoc(),
+ WebAssemblyOperand::FltOp{Val}));
+ Parser.Lex();
+ return false;
+ }
+
+ bool checkForP2AlignIfLoadStore(OperandVector &Operands, StringRef InstName) {
// FIXME: there is probably a cleaner way to do this.
- auto IsLoadStore = InstName.startswith("load") ||
- InstName.startswith("store") ||
- InstName.startswith("atomic_load") ||
- InstName.startswith("atomic_store");
- if (IsLoadStore) {
- // Parse load/store operands of the form: offset align
- auto &Offset = Lexer.getTok();
- if (Offset.is(AsmToken::Integer)) {
+ auto IsLoadStore = InstName.find(".load") != StringRef::npos ||
+ InstName.find(".store") != StringRef::npos;
+ auto IsAtomic = InstName.find("atomic.") != StringRef::npos;
+ if (IsLoadStore || IsAtomic) {
+ // Parse load/store operands of the form: offset:p2align=align
+ if (IsLoadStore && isNext(AsmToken::Colon)) {
+ auto Id = expectIdent();
+ if (Id != "p2align")
+ return error("Expected p2align, instead got: " + Id);
+ if (expect(AsmToken::Equal, "="))
+ return true;
+ if (!Lexer.is(AsmToken::Integer))
+ return error("Expected integer constant");
parseSingleInteger(false, Operands);
} else {
- // Alignment not specified.
- // FIXME: correctly derive a default from the instruction.
+ // Alignment not specified (or atomics, must use default alignment).
// We can't just call WebAssembly::GetDefaultP2Align since we don't have
- // an opcode until after the assembly matcher.
+ // an opcode until after the assembly matcher, so set a default to fix
+ // up later.
+ auto Tok = Lexer.getTok();
Operands.push_back(make_unique<WebAssemblyOperand>(
- WebAssemblyOperand::Integer, Offset.getLoc(), Offset.getEndLoc(),
- WebAssemblyOperand::IntOp{0}));
+ WebAssemblyOperand::Integer, Tok.getLoc(), Tok.getEndLoc(),
+ WebAssemblyOperand::IntOp{-1}));
}
}
return false;
@@ -400,51 +452,45 @@ public:
Operands.push_back(make_unique<WebAssemblyOperand>(
WebAssemblyOperand::Token, NameLoc, SMLoc::getFromPointer(Name.end()),
WebAssemblyOperand::TokOp{Name}));
- auto NamePair = Name.split('.');
- // If no '.', there is no type prefix.
- auto BaseName = NamePair.second.empty() ? NamePair.first : NamePair.second;
// If this instruction is part of a control flow structure, ensure
// proper nesting.
bool ExpectBlockType = false;
- if (BaseName == "block") {
+ if (Name == "block") {
push(Block);
ExpectBlockType = true;
- } else if (BaseName == "loop") {
+ } else if (Name == "loop") {
push(Loop);
ExpectBlockType = true;
- } else if (BaseName == "try") {
+ } else if (Name == "try") {
push(Try);
ExpectBlockType = true;
- } else if (BaseName == "if") {
+ } else if (Name == "if") {
push(If);
ExpectBlockType = true;
- } else if (BaseName == "else") {
- if (pop(BaseName, If))
+ } else if (Name == "else") {
+ if (pop(Name, If))
return true;
push(Else);
- } else if (BaseName == "catch") {
- if (pop(BaseName, Try))
- return true;
- push(Try);
- } else if (BaseName == "catch_all") {
- if (pop(BaseName, Try))
+ } else if (Name == "catch") {
+ if (pop(Name, Try))
return true;
push(Try);
- } else if (BaseName == "end_if") {
- if (pop(BaseName, If, Else))
+ } else if (Name == "end_if") {
+ if (pop(Name, If, Else))
return true;
- } else if (BaseName == "end_try") {
- if (pop(BaseName, Try))
+ } else if (Name == "end_try") {
+ if (pop(Name, Try))
return true;
- } else if (BaseName == "end_loop") {
- if (pop(BaseName, Loop))
+ } else if (Name == "end_loop") {
+ if (pop(Name, Loop))
return true;
- } else if (BaseName == "end_block") {
- if (pop(BaseName, Block))
+ } else if (Name == "end_block") {
+ if (pop(Name, Block))
return true;
- } else if (BaseName == "end_function") {
- if (pop(BaseName, Function) || ensureEmptyNestingStack())
+ } else if (Name == "end_function") {
+ CurrentState = EndFunction;
+ if (pop(Name, Function) || ensureEmptyNestingStack())
return true;
}
@@ -452,6 +498,8 @@ public:
auto &Tok = Lexer.getTok();
switch (Tok.getKind()) {
case AsmToken::Identifier: {
+ if (!parseSpecialFloatMaybe(false, Operands))
+ break;
auto &Id = Lexer.getTok();
if (ExpectBlockType) {
// Assume this identifier is a block_type.
@@ -464,33 +512,39 @@ public:
// Assume this identifier is a label.
const MCExpr *Val;
SMLoc End;
- if (Parser.parsePrimaryExpr(Val, End))
+ if (Parser.parseExpression(Val, End))
return error("Cannot parse symbol: ", Lexer.getTok());
Operands.push_back(make_unique<WebAssemblyOperand>(
WebAssemblyOperand::Symbol, Id.getLoc(), Id.getEndLoc(),
WebAssemblyOperand::SymOp{Val}));
+ if (checkForP2AlignIfLoadStore(Operands, Name))
+ return true;
}
break;
}
case AsmToken::Minus:
Parser.Lex();
- if (Lexer.isNot(AsmToken::Integer))
- return error("Expected integer instead got: ", Lexer.getTok());
- if (parseOperandStartingWithInteger(true, Operands, BaseName))
- return true;
+ if (Lexer.is(AsmToken::Integer)) {
+ parseSingleInteger(true, Operands);
+ if (checkForP2AlignIfLoadStore(Operands, Name))
+ return true;
+ } else if(Lexer.is(AsmToken::Real)) {
+ if (parseSingleFloat(true, Operands))
+ return true;
+ } else if (!parseSpecialFloatMaybe(true, Operands)) {
+ } else {
+ return error("Expected numeric constant instead got: ",
+ Lexer.getTok());
+ }
break;
case AsmToken::Integer:
- if (parseOperandStartingWithInteger(false, Operands, BaseName))
+ parseSingleInteger(false, Operands);
+ if (checkForP2AlignIfLoadStore(Operands, Name))
return true;
break;
case AsmToken::Real: {
- double Val;
- if (Tok.getString().getAsDouble(Val, false))
- return error("Cannot parse real: ", Tok);
- Operands.push_back(make_unique<WebAssemblyOperand>(
- WebAssemblyOperand::Float, Tok.getLoc(), Tok.getEndLoc(),
- WebAssemblyOperand::FltOp{Val}));
- Parser.Lex();
+ if (parseSingleFloat(false, Operands))
+ return true;
break;
}
case AsmToken::LCurly: {
@@ -547,6 +601,17 @@ public:
return false;
}
+ bool CheckDataSection() {
+ if (CurrentState != DataSection) {
+ auto WS = cast<MCSectionWasm>(getStreamer().getCurrentSection().first);
+ if (WS && WS->getKind().isText())
+ return error("data directive must occur in a data segment: ",
+ Lexer.getTok());
+ }
+ CurrentState = DataSection;
+ return false;
+ }
+
// This function processes wasm-specific directives streamed to
// WebAssemblyTargetStreamer, all others go to the generic parser
// (see WasmAsmParser).
@@ -561,6 +626,7 @@ public:
auto &Out = getStreamer();
auto &TOut =
reinterpret_cast<WebAssemblyTargetStreamer &>(*Out.getTargetStreamer());
+ auto &Ctx = Out.getContext();
// TODO: any time we return an error, at least one token must have been
// consumed, otherwise this will not signal an error to the caller.
@@ -578,8 +644,7 @@ public:
if (!Type)
return error("Unknown type in .globaltype directive: ", TypeTok);
// Now set this symbol with the correct type.
- auto WasmSym = cast<MCSymbolWasm>(
- TOut.getStreamer().getContext().getOrCreateSymbol(SymName));
+ auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL);
WasmSym->setGlobalType(
wasm::WasmGlobalType{uint8_t(Type.getValue()), true});
@@ -597,13 +662,13 @@ public:
auto SymName = expectIdent();
if (SymName.empty())
return true;
- auto WasmSym = cast<MCSymbolWasm>(
- TOut.getStreamer().getContext().getOrCreateSymbol(SymName));
+ auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
if (CurrentState == Label && WasmSym == LastLabel) {
// This .functype indicates a start of a function.
if (ensureEmptyNestingStack())
return true;
CurrentState = FunctionStart;
+ LastFunctionLabel = LastLabel;
push(Function);
}
auto Signature = make_unique<wasm::WasmSignature>();
@@ -621,8 +686,7 @@ public:
auto SymName = expectIdent();
if (SymName.empty())
return true;
- auto WasmSym = cast<MCSymbolWasm>(
- TOut.getStreamer().getContext().getOrCreateSymbol(SymName));
+ auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
auto Signature = make_unique<wasm::WasmSignature>();
if (parseRegTypeList(Signature->Params))
return true;
@@ -646,6 +710,30 @@ public:
return expect(AsmToken::EndOfStatement, "EOL");
}
+ if (DirectiveID.getString() == ".int8" ||
+ DirectiveID.getString() == ".int16" ||
+ DirectiveID.getString() == ".int32" ||
+ DirectiveID.getString() == ".int64") {
+ if (CheckDataSection()) return true;
+ const MCExpr *Val;
+ SMLoc End;
+ if (Parser.parseExpression(Val, End))
+ return error("Cannot parse .int expression: ", Lexer.getTok());
+ size_t NumBits = 0;
+ DirectiveID.getString().drop_front(4).getAsInteger(10, NumBits);
+ Out.EmitValue(Val, NumBits / 8, End);
+ return expect(AsmToken::EndOfStatement, "EOL");
+ }
+
+ if (DirectiveID.getString() == ".asciz") {
+ if (CheckDataSection()) return true;
+ std::string S;
+ if (Parser.parseEscapedString(S))
+ return error("Cannot parse string constant: ", Lexer.getTok());
+ Out.EmitBytes(StringRef(S.c_str(), S.length() + 1));
+ return expect(AsmToken::EndOfStatement, "EOL");
+ }
+
return true; // We didn't process this directive.
}
@@ -667,8 +755,19 @@ public:
*Out.getTargetStreamer());
TOut.emitLocal(SmallVector<wasm::ValType, 0>());
}
- CurrentState = Instructions;
+ // Fix unknown p2align operands.
+ auto Align = WebAssembly::GetDefaultP2AlignAny(Inst.getOpcode());
+ if (Align != -1U) {
+ auto &Op0 = Inst.getOperand(0);
+ if (Op0.getImm() == -1)
+ Op0.setImm(Align);
+ }
Out.EmitInstruction(Inst, getSTI());
+ if (CurrentState == EndFunction) {
+ onEndOfFunction();
+ } else {
+ CurrentState = Instructions;
+ }
return false;
}
case Match_MissingFeature:
@@ -694,6 +793,35 @@ public:
llvm_unreachable("Implement any new match types added!");
}
+ void doBeforeLabelEmit(MCSymbol *Symbol) override {
+ // Start a new section for the next function automatically, since our
+ // object writer expects each function to have its own section. This way
+ // The user can't forget this "convention".
+ auto SymName = Symbol->getName();
+ if (SymName.startswith(".L"))
+ return; // Local Symbol.
+ // Only create a new text section if we're already in one.
+ auto CWS = cast<MCSectionWasm>(getStreamer().getCurrentSection().first);
+ if (!CWS || !CWS->getKind().isText())
+ return;
+ auto SecName = ".text." + SymName;
+ auto WS = getContext().getWasmSection(SecName, SectionKind::getText());
+ getStreamer().SwitchSection(WS);
+ }
+
+ void onEndOfFunction() {
+ // Automatically output a .size directive, so it becomes optional for the
+ // user.
+ if (!LastFunctionLabel) return;
+ auto TempSym = getContext().createLinkerPrivateTempSymbol();
+ getStreamer().EmitLabel(TempSym);
+ auto Start = MCSymbolRefExpr::create(LastFunctionLabel, getContext());
+ auto End = MCSymbolRefExpr::create(TempSym, getContext());
+ auto Expr =
+ MCBinaryExpr::create(MCBinaryExpr::Sub, End, Start, getContext());
+ getStreamer().emitELFSize(LastFunctionLabel, Expr);
+ }
+
void onEndOfFile() override { ensureEmptyNestingStack(); }
};
} // end anonymous namespace
diff --git a/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp b/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
index 6acc9b20eed2..f9bf3f85d30f 100644
--- a/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
+++ b/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
@@ -1,9 +1,8 @@
//==- WebAssemblyDisassembler.cpp - Disassembler for WebAssembly -*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -15,7 +14,9 @@
///
//===----------------------------------------------------------------------===//
+#include "MCTargetDesc/WebAssemblyInstPrinter.h"
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "TargetInfo/WebAssemblyTargetInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCFixedLenDisassembler.h"
@@ -45,6 +46,10 @@ class WebAssemblyDisassembler final : public MCDisassembler {
ArrayRef<uint8_t> Bytes, uint64_t Address,
raw_ostream &VStream,
raw_ostream &CStream) const override;
+ DecodeStatus onSymbolStart(StringRef Name, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes, uint64_t Address,
+ raw_ostream &VStream,
+ raw_ostream &CStream) const override;
public:
WebAssemblyDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
@@ -77,7 +82,7 @@ static int nextByte(ArrayRef<uint8_t> Bytes, uint64_t &Size) {
}
static bool nextLEB(int64_t &Val, ArrayRef<uint8_t> Bytes, uint64_t &Size,
- bool Signed = false) {
+ bool Signed) {
unsigned N = 0;
const char *Error = nullptr;
Val = Signed ? decodeSLEB128(Bytes.data() + Size, &N,
@@ -104,9 +109,8 @@ template <typename T>
bool parseImmediate(MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes) {
if (Size + sizeof(T) > Bytes.size())
return false;
- T Val;
- memcpy(&Val, Bytes.data() + Size, sizeof(T));
- support::endian::byte_swap<T, support::endianness::little>(Val);
+ T Val = support::endian::read<T, support::endianness::little, 1>(
+ Bytes.data() + Size);
Size += sizeof(T);
if (std::is_floating_point<T>::value) {
MI.addOperand(MCOperand::createFPImm(static_cast<double>(Val)));
@@ -116,6 +120,41 @@ bool parseImmediate(MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes) {
return true;
}
+MCDisassembler::DecodeStatus WebAssemblyDisassembler::onSymbolStart(
+ StringRef Name, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address,
+ raw_ostream &VStream, raw_ostream &CStream) const {
+ Size = 0;
+ if (Address == 0) {
+ // Start of a code section: we're parsing only the function count.
+ int64_t FunctionCount;
+ if (!nextLEB(FunctionCount, Bytes, Size, false))
+ return MCDisassembler::Fail;
+ outs() << " # " << FunctionCount << " functions in section.";
+ } else {
+ // Parse the start of a single function.
+ int64_t BodySize, LocalEntryCount;
+ if (!nextLEB(BodySize, Bytes, Size, false) ||
+ !nextLEB(LocalEntryCount, Bytes, Size, false))
+ return MCDisassembler::Fail;
+ if (LocalEntryCount) {
+ outs() << " .local ";
+ for (int64_t I = 0; I < LocalEntryCount; I++) {
+ int64_t Count, Type;
+ if (!nextLEB(Count, Bytes, Size, false) ||
+ !nextLEB(Type, Bytes, Size, false))
+ return MCDisassembler::Fail;
+ for (int64_t J = 0; J < Count; J++) {
+ if (I || J)
+ outs() << ", ";
+ outs() << WebAssembly::anyTypeToString(Type);
+ }
+ }
+ }
+ }
+ outs() << "\n";
+ return MCDisassembler::Success;
+}
+
MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction(
MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t /*Address*/,
raw_ostream & /*OS*/, raw_ostream &CS) const {
@@ -138,7 +177,7 @@ MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction(
if (!WasmInst)
return MCDisassembler::Fail;
int64_t PrefixedOpc;
- if (!nextLEB(PrefixedOpc, Bytes, Size))
+ if (!nextLEB(PrefixedOpc, Bytes, Size, false))
return MCDisassembler::Fail;
if (PrefixedOpc < 0 || PrefixedOpc >= WebAssemblyInstructionTableSize)
return MCDisassembler::Fail;
@@ -161,6 +200,7 @@ MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction(
case WebAssembly::OPERAND_OFFSET32:
case WebAssembly::OPERAND_P2ALIGN:
case WebAssembly::OPERAND_TYPEINDEX:
+ case WebAssembly::OPERAND_EVENT:
case MCOI::OPERAND_IMMEDIATE: {
if (!parseLEBImmediate(MI, Size, Bytes, false))
return MCDisassembler::Fail;
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
index 0726dd481174..70b409cf4a90 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
@@ -1,9 +1,8 @@
//===-- WebAssemblyAsmBackend.cpp - WebAssembly Assembler Backend ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -36,7 +35,6 @@ class WebAssemblyAsmBackend final : public MCAsmBackend {
public:
explicit WebAssemblyAsmBackend(bool Is64Bit)
: MCAsmBackend(support::little), Is64Bit(Is64Bit) {}
- ~WebAssemblyAsmBackend() override {}
unsigned getNumFixupKinds() const override {
return WebAssembly::NumTargetFixupKinds;
@@ -77,9 +75,9 @@ WebAssemblyAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
// WebAssemblyFixupKinds.h.
//
// Name Offset (bits) Size (bits) Flags
- {"fixup_code_sleb128_i32", 0, 5 * 8, 0},
- {"fixup_code_sleb128_i64", 0, 10 * 8, 0},
- {"fixup_code_uleb128_i32", 0, 5 * 8, 0},
+ {"fixup_sleb128_i32", 0, 5 * 8, 0},
+ {"fixup_sleb128_i64", 0, 10 * 8, 0},
+ {"fixup_uleb128_i32", 0, 5 * 8, 0},
};
if (Kind < FirstTargetFixupKind)
@@ -92,7 +90,7 @@ WebAssemblyAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
bool WebAssemblyAsmBackend::writeNopData(raw_ostream &OS,
uint64_t Count) const {
- for (uint64_t i = 0; i < Count; ++i)
+ for (uint64_t I = 0; I < Count; ++I)
OS << char(WebAssembly::Nop);
return true;
@@ -119,8 +117,8 @@ void WebAssemblyAsmBackend::applyFixup(const MCAssembler &Asm,
// For each byte of the fragment that the fixup touches, mask in the
// bits from the fixup value.
- for (unsigned i = 0; i != NumBytes; ++i)
- Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff);
+ for (unsigned I = 0; I != NumBytes; ++I)
+ Data[Offset + I] |= uint8_t((Value >> (I * 8)) & 0xff);
}
std::unique_ptr<MCObjectTargetWriter>
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyFixupKinds.h b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyFixupKinds.h
index c2fac5f93a2f..33e8de282955 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyFixupKinds.h
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyFixupKinds.h
@@ -1,9 +1,8 @@
//=- WebAssemblyFixupKinds.h - WebAssembly Specific Fixup Entries -*- C++ -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -15,9 +14,9 @@
namespace llvm {
namespace WebAssembly {
enum Fixups {
- fixup_code_sleb128_i32 = FirstTargetFixupKind, // 32-bit signed
- fixup_code_sleb128_i64, // 64-bit signed
- fixup_code_uleb128_i32, // 32-bit unsigned
+ fixup_sleb128_i32 = FirstTargetFixupKind, // 32-bit signed
+ fixup_sleb128_i64, // 64-bit signed
+ fixup_uleb128_i32, // 32-bit unsigned
// Marker
LastTargetFixupKind,
diff --git a/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp
index 15532d7ff1a6..b5d4d369b726 100644
--- a/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp
@@ -1,9 +1,8 @@
//=- WebAssemblyInstPrinter.cpp - WebAssembly assembly instruction printing -=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -12,7 +11,7 @@
///
//===----------------------------------------------------------------------===//
-#include "InstPrinter/WebAssemblyInstPrinter.h"
+#include "MCTargetDesc/WebAssemblyInstPrinter.h"
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
#include "WebAssembly.h"
#include "WebAssemblyMachineFunctionInfo.h"
@@ -53,15 +52,15 @@ void WebAssemblyInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
// Print any additional variadic operands.
const MCInstrDesc &Desc = MII.get(MI->getOpcode());
if (Desc.isVariadic())
- for (auto i = Desc.getNumOperands(), e = MI->getNumOperands(); i < e; ++i) {
+ for (auto I = Desc.getNumOperands(), E = MI->getNumOperands(); I < E; ++I) {
// FIXME: For CALL_INDIRECT_VOID, don't print a leading comma, because
// we have an extra flags operand which is not currently printed, for
// compatiblity reasons.
- if (i != 0 && ((MI->getOpcode() != WebAssembly::CALL_INDIRECT_VOID &&
+ if (I != 0 && ((MI->getOpcode() != WebAssembly::CALL_INDIRECT_VOID &&
MI->getOpcode() != WebAssembly::CALL_INDIRECT_VOID_S) ||
- i != Desc.getNumOperands()))
+ I != Desc.getNumOperands()))
OS << ", ";
- printOperand(MI, i, OS);
+ printOperand(MI, I, OS);
}
// Print any added annotation.
@@ -123,61 +122,48 @@ void WebAssemblyInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
}
break;
- case WebAssembly::CATCH_I32:
- case WebAssembly::CATCH_I32_S:
- case WebAssembly::CATCH_I64:
- case WebAssembly::CATCH_I64_S:
- case WebAssembly::CATCH_ALL:
- case WebAssembly::CATCH_ALL_S:
- // There can be multiple catch instructions for one try instruction, so we
- // print a label only for the first 'catch' label.
- if (LastSeenEHInst != CATCH) {
- if (EHPadStack.empty()) {
- printAnnotation(OS, "try-catch mismatch!");
- } else {
- printAnnotation(OS,
- "catch" + utostr(EHPadStack.pop_back_val()) + ':');
- }
+ case WebAssembly::CATCH:
+ case WebAssembly::CATCH_S:
+ if (EHPadStack.empty()) {
+ printAnnotation(OS, "try-catch mismatch!");
+ } else {
+ printAnnotation(OS, "catch" + utostr(EHPadStack.pop_back_val()) + ':');
}
- LastSeenEHInst = CATCH;
break;
}
// Annotate any control flow label references.
- unsigned NumFixedOperands = Desc.NumOperands;
- SmallSet<uint64_t, 8> Printed;
- for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
- // See if this operand denotes a basic block target.
- if (i < NumFixedOperands) {
- // A non-variable_ops operand, check its type.
- if (Desc.OpInfo[i].OperandType != WebAssembly::OPERAND_BASIC_BLOCK)
- continue;
+
+ // rethrow instruction does not take any depth argument and rethrows to the
+ // nearest enclosing catch scope, if any. If there's no enclosing catch
+ // scope, it throws up to the caller.
+ if (Opc == WebAssembly::RETHROW || Opc == WebAssembly::RETHROW_S) {
+ if (EHPadStack.empty()) {
+ printAnnotation(OS, "to caller");
} else {
- // A variable_ops operand, which currently can be immediates (used in
- // br_table) which are basic block targets, or for call instructions
- // when using -wasm-keep-registers (in which case they are registers,
- // and should not be processed).
- if (!MI->getOperand(i).isImm())
- continue;
+ printAnnotation(OS, "down to catch" + utostr(EHPadStack.back()));
}
- uint64_t Depth = MI->getOperand(i).getImm();
- if (!Printed.insert(Depth).second)
- continue;
- if (Opc == WebAssembly::RETHROW || Opc == WebAssembly::RETHROW_S) {
- if (Depth > EHPadStack.size()) {
- printAnnotation(OS, "Invalid depth argument!");
- } else if (Depth == EHPadStack.size()) {
- // This can happen when rethrow instruction breaks out of all nests
- // and throws up to the current function's caller.
- printAnnotation(OS, utostr(Depth) + ": " + "to caller");
+ } else {
+ unsigned NumFixedOperands = Desc.NumOperands;
+ SmallSet<uint64_t, 8> Printed;
+ for (unsigned I = 0, E = MI->getNumOperands(); I < E; ++I) {
+ // See if this operand denotes a basic block target.
+ if (I < NumFixedOperands) {
+ // A non-variable_ops operand, check its type.
+ if (Desc.OpInfo[I].OperandType != WebAssembly::OPERAND_BASIC_BLOCK)
+ continue;
} else {
- uint64_t CatchNo = EHPadStack.rbegin()[Depth];
- printAnnotation(OS, utostr(Depth) + ": " + "down to catch" +
- utostr(CatchNo));
+ // A variable_ops operand, which currently can be immediates (used in
+ // br_table) which are basic block targets, or for call instructions
+ // when using -wasm-keep-registers (in which case they are registers,
+ // and should not be processed).
+ if (!MI->getOperand(I).isImm())
+ continue;
}
-
- } else {
+ uint64_t Depth = MI->getOperand(I).getImm();
+ if (!Printed.insert(Depth).second)
+ continue;
if (Depth >= ControlFlowStack.size()) {
printAnnotation(OS, "Invalid depth argument!");
} else {
@@ -206,13 +192,13 @@ static std::string toString(const APFloat &FP) {
// Use C99's hexadecimal floating-point representation.
static const size_t BufBytes = 128;
- char buf[BufBytes];
+ char Buf[BufBytes];
auto Written = FP.convertToHexString(
- buf, /*hexDigits=*/0, /*upperCase=*/false, APFloat::rmNearestTiesToEven);
+ Buf, /*HexDigits=*/0, /*UpperCase=*/false, APFloat::rmNearestTiesToEven);
(void)Written;
assert(Written != 0);
assert(Written < BufBytes);
- return buf;
+ return Buf;
}
void WebAssemblyInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
@@ -296,8 +282,8 @@ const char *llvm::WebAssembly::anyTypeToString(unsigned Ty) {
return "funcref";
case wasm::WASM_TYPE_FUNC:
return "func";
- case wasm::WASM_TYPE_EXCEPT_REF:
- return "except_ref";
+ case wasm::WASM_TYPE_EXNREF:
+ return "exnref";
case wasm::WASM_TYPE_NORESULT:
return "void";
default:
diff --git a/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.h
index 5ad45c7d5c7f..b979de5028bf 100644
--- a/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.h
@@ -1,9 +1,8 @@
// WebAssemblyInstPrinter.h - Print wasm MCInst to assembly syntax -*- C++ -*-//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp
index 44fcc129c39e..8f6531563e1b 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp
@@ -1,9 +1,8 @@
//===-- WebAssemblyMCAsmInfo.cpp - WebAssembly asm properties -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -20,7 +19,7 @@ using namespace llvm;
#define DEBUG_TYPE "wasm-mc-asm-info"
-WebAssemblyMCAsmInfo::~WebAssemblyMCAsmInfo() {}
+WebAssemblyMCAsmInfo::~WebAssemblyMCAsmInfo() = default; // anchor.
WebAssemblyMCAsmInfo::WebAssemblyMCAsmInfo(const Triple &T) {
CodePointerSize = CalleeSaveStackSlotSize = T.isArch64Bit() ? 8 : 4;
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.h b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.h
index 8627a6e40c6a..9efbbf881f59 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.h
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.h
@@ -1,9 +1,8 @@
//===-- WebAssemblyMCAsmInfo.h - WebAssembly asm properties -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
index 065a4dc94ca6..44b6d6a968a9 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
@@ -1,9 +1,8 @@
//=- WebAssemblyMCCodeEmitter.cpp - Convert WebAssembly code to machine code -//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -49,7 +48,7 @@ class WebAssemblyMCCodeEmitter final : public MCCodeEmitter {
const MCSubtargetInfo &STI) const override;
public:
- WebAssemblyMCCodeEmitter(const MCInstrInfo &mcii) : MCII(mcii) {}
+ WebAssemblyMCCodeEmitter(const MCInstrInfo &MCII) : MCII(MCII) {}
};
} // end anonymous namespace
@@ -82,14 +81,14 @@ void WebAssemblyMCCodeEmitter::encodeInstruction(
encodeULEB128(MI.getNumOperands() - 2, OS);
const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
- for (unsigned i = 0, e = MI.getNumOperands(); i < e; ++i) {
- const MCOperand &MO = MI.getOperand(i);
+ for (unsigned I = 0, E = MI.getNumOperands(); I < E; ++I) {
+ const MCOperand &MO = MI.getOperand(I);
if (MO.isReg()) {
/* nothing to encode */
} else if (MO.isImm()) {
- if (i < Desc.getNumOperands()) {
- const MCOperandInfo &Info = Desc.OpInfo[i];
+ if (I < Desc.getNumOperands()) {
+ const MCOperandInfo &Info = Desc.OpInfo[I];
LLVM_DEBUG(dbgs() << "Encoding immediate: type="
<< int(Info.OperandType) << "\n");
switch (Info.OperandType) {
@@ -127,28 +126,28 @@ void WebAssemblyMCCodeEmitter::encodeInstruction(
}
} else if (MO.isFPImm()) {
- const MCOperandInfo &Info = Desc.OpInfo[i];
+ const MCOperandInfo &Info = Desc.OpInfo[I];
if (Info.OperandType == WebAssembly::OPERAND_F32IMM) {
// TODO: MC converts all floating point immediate operands to double.
// This is fine for numeric values, but may cause NaNs to change bits.
- float f = float(MO.getFPImm());
- support::endian::write<float>(OS, f, support::little);
+ auto F = float(MO.getFPImm());
+ support::endian::write<float>(OS, F, support::little);
} else {
assert(Info.OperandType == WebAssembly::OPERAND_F64IMM);
- double d = MO.getFPImm();
- support::endian::write<double>(OS, d, support::little);
+ double D = MO.getFPImm();
+ support::endian::write<double>(OS, D, support::little);
}
} else if (MO.isExpr()) {
- const MCOperandInfo &Info = Desc.OpInfo[i];
+ const MCOperandInfo &Info = Desc.OpInfo[I];
llvm::MCFixupKind FixupKind;
size_t PaddedSize = 5;
switch (Info.OperandType) {
case WebAssembly::OPERAND_I32IMM:
- FixupKind = MCFixupKind(WebAssembly::fixup_code_sleb128_i32);
+ FixupKind = MCFixupKind(WebAssembly::fixup_sleb128_i32);
break;
case WebAssembly::OPERAND_I64IMM:
- FixupKind = MCFixupKind(WebAssembly::fixup_code_sleb128_i64);
+ FixupKind = MCFixupKind(WebAssembly::fixup_sleb128_i64);
PaddedSize = 10;
break;
case WebAssembly::OPERAND_FUNCTION32:
@@ -156,7 +155,7 @@ void WebAssemblyMCCodeEmitter::encodeInstruction(
case WebAssembly::OPERAND_TYPEINDEX:
case WebAssembly::OPERAND_GLOBAL:
case WebAssembly::OPERAND_EVENT:
- FixupKind = MCFixupKind(WebAssembly::fixup_code_uleb128_i32);
+ FixupKind = MCFixupKind(WebAssembly::fixup_uleb128_i32);
break;
default:
llvm_unreachable("unexpected symbolic operand kind");
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
index 390f367c2978..9c8ca1f13b18 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
@@ -1,9 +1,8 @@
//===-- WebAssemblyMCTargetDesc.cpp - WebAssembly Target Descriptions -----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -12,10 +11,11 @@
///
//===----------------------------------------------------------------------===//
-#include "WebAssemblyMCTargetDesc.h"
-#include "InstPrinter/WebAssemblyInstPrinter.h"
-#include "WebAssemblyMCAsmInfo.h"
-#include "WebAssemblyTargetStreamer.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "MCTargetDesc/WebAssemblyInstPrinter.h"
+#include "MCTargetDesc/WebAssemblyMCAsmInfo.h"
+#include "MCTargetDesc/WebAssemblyTargetStreamer.h"
+#include "TargetInfo/WebAssemblyTargetInfo.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
@@ -40,13 +40,13 @@ static MCAsmInfo *createMCAsmInfo(const MCRegisterInfo & /*MRI*/,
}
static MCInstrInfo *createMCInstrInfo() {
- MCInstrInfo *X = new MCInstrInfo();
+ auto *X = new MCInstrInfo();
InitWebAssemblyMCInstrInfo(X);
return X;
}
static MCRegisterInfo *createMCRegisterInfo(const Triple & /*T*/) {
- MCRegisterInfo *X = new MCRegisterInfo();
+ auto *X = new MCRegisterInfo();
InitWebAssemblyMCRegisterInfo(X, 0);
return X;
}
@@ -146,8 +146,8 @@ wasm::ValType WebAssembly::toValType(const MVT &Ty) {
case MVT::v4f32:
case MVT::v2f64:
return wasm::ValType::V128;
- case MVT::ExceptRef:
- return wasm::ValType::EXCEPT_REF;
+ case MVT::exnref:
+ return wasm::ValType::EXNREF;
default:
llvm_unreachable("unexpected type");
}
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
index a01517fb90c3..7a9f59b1a4f2 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
@@ -1,9 +1,8 @@
//==- WebAssemblyMCTargetDesc.h - WebAssembly Target Descriptions -*- C++ -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -15,6 +14,7 @@
#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYMCTARGETDESC_H
#define LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYMCTARGETDESC_H
+#include "../WebAssemblySubtarget.h"
#include "llvm/BinaryFormat/Wasm.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/Support/DataTypes.h"
@@ -33,9 +33,6 @@ class Target;
class Triple;
class raw_pwrite_stream;
-Target &getTheWebAssemblyTarget32();
-Target &getTheWebAssemblyTarget64();
-
MCCodeEmitter *createWebAssemblyMCCodeEmitter(const MCInstrInfo &MCII);
MCAsmBackend *createWebAssemblyAsmBackend(const Triple &TT);
@@ -90,12 +87,23 @@ namespace WebAssemblyII {
enum TOF {
MO_NO_FLAG = 0,
- // Flags to indicate the type of the symbol being referenced
- MO_SYMBOL_FUNCTION = 0x1,
- MO_SYMBOL_GLOBAL = 0x2,
- MO_SYMBOL_EVENT = 0x4,
- MO_SYMBOL_MASK = 0x7,
+ // On a symbol operand this indicates that the immediate is a wasm global
+ // index. The value of the wasm global will be set to the symbol address at
+ // runtime. This adds a level of indirection similar to the GOT on native
+ // platforms.
+ MO_GOT,
+
+ // On a symbol operand this indicates that the immediate is the symbol
+ // address relative the __memory_base wasm global.
+ // Only applicable to data symbols.
+ MO_MEMORY_BASE_REL,
+
+ // On a symbol operand this indicates that the immediate is the symbol
+ // address relative the __table_base wasm global.
+ // Only applicable to function symbols.
+ MO_TABLE_BASE_REL,
};
+
} // end namespace WebAssemblyII
} // end namespace llvm
@@ -111,15 +119,30 @@ enum TOF {
#define GET_INSTRINFO_ENUM
#include "WebAssemblyGenInstrInfo.inc"
-#define GET_SUBTARGETINFO_ENUM
-#include "WebAssemblyGenSubtargetInfo.inc"
-
namespace llvm {
namespace WebAssembly {
+/// This is used to indicate block signatures.
+enum class ExprType : unsigned {
+ Void = 0x40,
+ I32 = 0x7F,
+ I64 = 0x7E,
+ F32 = 0x7D,
+ F64 = 0x7C,
+ V128 = 0x7B,
+ Exnref = 0x68,
+ Invalid = 0x00
+};
+
+/// Instruction opcodes emitted via means other than CodeGen.
+static const unsigned Nop = 0x01;
+static const unsigned End = 0x0b;
+
+wasm::ValType toValType(const MVT &Ty);
+
/// Return the default p2align value for a load or store with the given opcode.
-inline unsigned GetDefaultP2Align(unsigned Opcode) {
- switch (Opcode) {
+inline unsigned GetDefaultP2AlignAny(unsigned Opc) {
+ switch (Opc) {
case WebAssembly::LOAD8_S_I32:
case WebAssembly::LOAD8_S_I32_S:
case WebAssembly::LOAD8_U_I32:
@@ -328,35 +351,238 @@ inline unsigned GetDefaultP2Align(unsigned Opcode) {
case WebAssembly::STORE_v2f64_S:
return 4;
default:
+ return -1;
+ }
+}
+
+inline unsigned GetDefaultP2Align(unsigned Opc) {
+ auto Align = GetDefaultP2AlignAny(Opc);
+ if (Align == -1U) {
llvm_unreachable("Only loads and stores have p2align values");
}
+ return Align;
}
-/// The operand number of the load or store address in load/store instructions.
-static const unsigned LoadAddressOperandNo = 3;
-static const unsigned StoreAddressOperandNo = 2;
+inline bool isArgument(unsigned Opc) {
+ switch (Opc) {
+ case WebAssembly::ARGUMENT_i32:
+ case WebAssembly::ARGUMENT_i32_S:
+ case WebAssembly::ARGUMENT_i64:
+ case WebAssembly::ARGUMENT_i64_S:
+ case WebAssembly::ARGUMENT_f32:
+ case WebAssembly::ARGUMENT_f32_S:
+ case WebAssembly::ARGUMENT_f64:
+ case WebAssembly::ARGUMENT_f64_S:
+ case WebAssembly::ARGUMENT_v16i8:
+ case WebAssembly::ARGUMENT_v16i8_S:
+ case WebAssembly::ARGUMENT_v8i16:
+ case WebAssembly::ARGUMENT_v8i16_S:
+ case WebAssembly::ARGUMENT_v4i32:
+ case WebAssembly::ARGUMENT_v4i32_S:
+ case WebAssembly::ARGUMENT_v2i64:
+ case WebAssembly::ARGUMENT_v2i64_S:
+ case WebAssembly::ARGUMENT_v4f32:
+ case WebAssembly::ARGUMENT_v4f32_S:
+ case WebAssembly::ARGUMENT_v2f64:
+ case WebAssembly::ARGUMENT_v2f64_S:
+ case WebAssembly::ARGUMENT_exnref:
+ case WebAssembly::ARGUMENT_exnref_S:
+ return true;
+ default:
+ return false;
+ }
+}
-/// The operand number of the load or store p2align in load/store instructions.
-static const unsigned LoadP2AlignOperandNo = 1;
-static const unsigned StoreP2AlignOperandNo = 0;
+inline bool isCopy(unsigned Opc) {
+ switch (Opc) {
+ case WebAssembly::COPY_I32:
+ case WebAssembly::COPY_I32_S:
+ case WebAssembly::COPY_I64:
+ case WebAssembly::COPY_I64_S:
+ case WebAssembly::COPY_F32:
+ case WebAssembly::COPY_F32_S:
+ case WebAssembly::COPY_F64:
+ case WebAssembly::COPY_F64_S:
+ case WebAssembly::COPY_V128:
+ case WebAssembly::COPY_V128_S:
+ case WebAssembly::COPY_EXNREF:
+ case WebAssembly::COPY_EXNREF_S:
+ return true;
+ default:
+ return false;
+ }
+}
-/// This is used to indicate block signatures.
-enum class ExprType : unsigned {
- Void = 0x40,
- I32 = 0x7F,
- I64 = 0x7E,
- F32 = 0x7D,
- F64 = 0x7C,
- V128 = 0x7B,
- ExceptRef = 0x68,
- Invalid = 0x00
-};
+inline bool isTee(unsigned Opc) {
+ switch (Opc) {
+ case WebAssembly::TEE_I32:
+ case WebAssembly::TEE_I32_S:
+ case WebAssembly::TEE_I64:
+ case WebAssembly::TEE_I64_S:
+ case WebAssembly::TEE_F32:
+ case WebAssembly::TEE_F32_S:
+ case WebAssembly::TEE_F64:
+ case WebAssembly::TEE_F64_S:
+ case WebAssembly::TEE_V128:
+ case WebAssembly::TEE_V128_S:
+ case WebAssembly::TEE_EXNREF:
+ case WebAssembly::TEE_EXNREF_S:
+ return true;
+ default:
+ return false;
+ }
+}
-/// Instruction opcodes emitted via means other than CodeGen.
-static const unsigned Nop = 0x01;
-static const unsigned End = 0x0b;
+inline bool isCallDirect(unsigned Opc) {
+ switch (Opc) {
+ case WebAssembly::CALL_VOID:
+ case WebAssembly::CALL_VOID_S:
+ case WebAssembly::CALL_i32:
+ case WebAssembly::CALL_i32_S:
+ case WebAssembly::CALL_i64:
+ case WebAssembly::CALL_i64_S:
+ case WebAssembly::CALL_f32:
+ case WebAssembly::CALL_f32_S:
+ case WebAssembly::CALL_f64:
+ case WebAssembly::CALL_f64_S:
+ case WebAssembly::CALL_v16i8:
+ case WebAssembly::CALL_v16i8_S:
+ case WebAssembly::CALL_v8i16:
+ case WebAssembly::CALL_v8i16_S:
+ case WebAssembly::CALL_v4i32:
+ case WebAssembly::CALL_v4i32_S:
+ case WebAssembly::CALL_v2i64:
+ case WebAssembly::CALL_v2i64_S:
+ case WebAssembly::CALL_v4f32:
+ case WebAssembly::CALL_v4f32_S:
+ case WebAssembly::CALL_v2f64:
+ case WebAssembly::CALL_v2f64_S:
+ case WebAssembly::CALL_exnref:
+ case WebAssembly::CALL_exnref_S:
+ case WebAssembly::RET_CALL:
+ case WebAssembly::RET_CALL_S:
+ return true;
+ default:
+ return false;
+ }
+}
-wasm::ValType toValType(const MVT &Ty);
+inline bool isCallIndirect(unsigned Opc) {
+ switch (Opc) {
+ case WebAssembly::CALL_INDIRECT_VOID:
+ case WebAssembly::CALL_INDIRECT_VOID_S:
+ case WebAssembly::CALL_INDIRECT_i32:
+ case WebAssembly::CALL_INDIRECT_i32_S:
+ case WebAssembly::CALL_INDIRECT_i64:
+ case WebAssembly::CALL_INDIRECT_i64_S:
+ case WebAssembly::CALL_INDIRECT_f32:
+ case WebAssembly::CALL_INDIRECT_f32_S:
+ case WebAssembly::CALL_INDIRECT_f64:
+ case WebAssembly::CALL_INDIRECT_f64_S:
+ case WebAssembly::CALL_INDIRECT_v16i8:
+ case WebAssembly::CALL_INDIRECT_v16i8_S:
+ case WebAssembly::CALL_INDIRECT_v8i16:
+ case WebAssembly::CALL_INDIRECT_v8i16_S:
+ case WebAssembly::CALL_INDIRECT_v4i32:
+ case WebAssembly::CALL_INDIRECT_v4i32_S:
+ case WebAssembly::CALL_INDIRECT_v2i64:
+ case WebAssembly::CALL_INDIRECT_v2i64_S:
+ case WebAssembly::CALL_INDIRECT_v4f32:
+ case WebAssembly::CALL_INDIRECT_v4f32_S:
+ case WebAssembly::CALL_INDIRECT_v2f64:
+ case WebAssembly::CALL_INDIRECT_v2f64_S:
+ case WebAssembly::CALL_INDIRECT_exnref:
+ case WebAssembly::CALL_INDIRECT_exnref_S:
+ case WebAssembly::RET_CALL_INDIRECT:
+ case WebAssembly::RET_CALL_INDIRECT_S:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/// Returns the operand number of a callee, assuming the argument is a call
+/// instruction.
+inline unsigned getCalleeOpNo(unsigned Opc) {
+ switch (Opc) {
+ case WebAssembly::CALL_VOID:
+ case WebAssembly::CALL_VOID_S:
+ case WebAssembly::CALL_INDIRECT_VOID:
+ case WebAssembly::CALL_INDIRECT_VOID_S:
+ case WebAssembly::RET_CALL:
+ case WebAssembly::RET_CALL_S:
+ case WebAssembly::RET_CALL_INDIRECT:
+ case WebAssembly::RET_CALL_INDIRECT_S:
+ return 0;
+ case WebAssembly::CALL_i32:
+ case WebAssembly::CALL_i32_S:
+ case WebAssembly::CALL_i64:
+ case WebAssembly::CALL_i64_S:
+ case WebAssembly::CALL_f32:
+ case WebAssembly::CALL_f32_S:
+ case WebAssembly::CALL_f64:
+ case WebAssembly::CALL_f64_S:
+ case WebAssembly::CALL_v16i8:
+ case WebAssembly::CALL_v16i8_S:
+ case WebAssembly::CALL_v8i16:
+ case WebAssembly::CALL_v8i16_S:
+ case WebAssembly::CALL_v4i32:
+ case WebAssembly::CALL_v4i32_S:
+ case WebAssembly::CALL_v2i64:
+ case WebAssembly::CALL_v2i64_S:
+ case WebAssembly::CALL_v4f32:
+ case WebAssembly::CALL_v4f32_S:
+ case WebAssembly::CALL_v2f64:
+ case WebAssembly::CALL_v2f64_S:
+ case WebAssembly::CALL_exnref:
+ case WebAssembly::CALL_exnref_S:
+ case WebAssembly::CALL_INDIRECT_i32:
+ case WebAssembly::CALL_INDIRECT_i32_S:
+ case WebAssembly::CALL_INDIRECT_i64:
+ case WebAssembly::CALL_INDIRECT_i64_S:
+ case WebAssembly::CALL_INDIRECT_f32:
+ case WebAssembly::CALL_INDIRECT_f32_S:
+ case WebAssembly::CALL_INDIRECT_f64:
+ case WebAssembly::CALL_INDIRECT_f64_S:
+ case WebAssembly::CALL_INDIRECT_v16i8:
+ case WebAssembly::CALL_INDIRECT_v16i8_S:
+ case WebAssembly::CALL_INDIRECT_v8i16:
+ case WebAssembly::CALL_INDIRECT_v8i16_S:
+ case WebAssembly::CALL_INDIRECT_v4i32:
+ case WebAssembly::CALL_INDIRECT_v4i32_S:
+ case WebAssembly::CALL_INDIRECT_v2i64:
+ case WebAssembly::CALL_INDIRECT_v2i64_S:
+ case WebAssembly::CALL_INDIRECT_v4f32:
+ case WebAssembly::CALL_INDIRECT_v4f32_S:
+ case WebAssembly::CALL_INDIRECT_v2f64:
+ case WebAssembly::CALL_INDIRECT_v2f64_S:
+ case WebAssembly::CALL_INDIRECT_exnref:
+ case WebAssembly::CALL_INDIRECT_exnref_S:
+ return 1;
+ default:
+ llvm_unreachable("Not a call instruction");
+ }
+}
+
+inline bool isMarker(unsigned Opc) {
+ switch (Opc) {
+ case WebAssembly::BLOCK:
+ case WebAssembly::BLOCK_S:
+ case WebAssembly::END_BLOCK:
+ case WebAssembly::END_BLOCK_S:
+ case WebAssembly::LOOP:
+ case WebAssembly::LOOP_S:
+ case WebAssembly::END_LOOP:
+ case WebAssembly::END_LOOP_S:
+ case WebAssembly::TRY:
+ case WebAssembly::TRY_S:
+ case WebAssembly::END_TRY:
+ case WebAssembly::END_TRY_S:
+ return true;
+ default:
+ return false;
+ }
+}
} // end namespace WebAssembly
} // end namespace llvm
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp
index 50143fb0ece3..e05efef7201b 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp
@@ -1,9 +1,8 @@
//==-- WebAssemblyTargetStreamer.cpp - WebAssembly Target Streamer Methods --=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -13,9 +12,9 @@
///
//===----------------------------------------------------------------------===//
-#include "WebAssemblyTargetStreamer.h"
-#include "InstPrinter/WebAssemblyInstPrinter.h"
-#include "WebAssemblyMCTargetDesc.h"
+#include "MCTargetDesc/WebAssemblyTargetStreamer.h"
+#include "MCTargetDesc/WebAssemblyInstPrinter.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSectionWasm.h"
#include "llvm/MC/MCSubtargetInfo.h"
@@ -113,8 +112,15 @@ void WebAssemblyTargetAsmStreamer::emitEventType(const MCSymbolWasm *Sym) {
}
void WebAssemblyTargetAsmStreamer::emitImportModule(const MCSymbolWasm *Sym,
- StringRef ModuleName) {
- OS << "\t.import_module\t" << Sym->getName() << ", " << ModuleName << '\n';
+ StringRef ImportModule) {
+ OS << "\t.import_module\t" << Sym->getName() << ", "
+ << ImportModule << '\n';
+}
+
+void WebAssemblyTargetAsmStreamer::emitImportName(const MCSymbolWasm *Sym,
+ StringRef ImportName) {
+ OS << "\t.import_name\t" << Sym->getName() << ", "
+ << ImportName << '\n';
}
void WebAssemblyTargetAsmStreamer::emitIndIdx(const MCExpr *Value) {
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
index 3073938118b4..5ea62b179d22 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
@@ -1,9 +1,8 @@
//==-- WebAssemblyTargetStreamer.h - WebAssembly Target Streamer -*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -45,7 +44,10 @@ public:
virtual void emitEventType(const MCSymbolWasm *Sym) = 0;
/// .import_module
virtual void emitImportModule(const MCSymbolWasm *Sym,
- StringRef ModuleName) = 0;
+ StringRef ImportModule) = 0;
+ /// .import_name
+ virtual void emitImportName(const MCSymbolWasm *Sym,
+ StringRef ImportName) = 0;
protected:
void emitValueType(wasm::ValType Type);
@@ -67,7 +69,8 @@ public:
void emitIndIdx(const MCExpr *Value) override;
void emitGlobalType(const MCSymbolWasm *Sym) override;
void emitEventType(const MCSymbolWasm *Sym) override;
- void emitImportModule(const MCSymbolWasm *Sym, StringRef ModuleName) override;
+ void emitImportModule(const MCSymbolWasm *Sym, StringRef ImportModule) override;
+ void emitImportName(const MCSymbolWasm *Sym, StringRef ImportName) override;
};
/// This part is for Wasm object output
@@ -82,7 +85,9 @@ public:
void emitGlobalType(const MCSymbolWasm *Sym) override {}
void emitEventType(const MCSymbolWasm *Sym) override {}
void emitImportModule(const MCSymbolWasm *Sym,
- StringRef ModuleName) override {}
+ StringRef ImportModule) override {}
+ void emitImportName(const MCSymbolWasm *Sym,
+ StringRef ImportName) override {}
};
/// This part is for null output
@@ -98,6 +103,7 @@ public:
void emitGlobalType(const MCSymbolWasm *) override {}
void emitEventType(const MCSymbolWasm *) override {}
void emitImportModule(const MCSymbolWasm *, StringRef) override {}
+ void emitImportName(const MCSymbolWasm *, StringRef) override {}
};
} // end namespace llvm
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp
index 763e30be8e02..a1cc3e268e8f 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp
@@ -1,9 +1,8 @@
//===-- WebAssemblyWasmObjectWriter.cpp - WebAssembly Wasm Writer ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -43,26 +42,7 @@ private:
WebAssemblyWasmObjectWriter::WebAssemblyWasmObjectWriter(bool Is64Bit)
: MCWasmObjectTargetWriter(Is64Bit) {}
-// Test whether the given expression computes a function address.
-static bool IsFunctionExpr(const MCExpr *Expr) {
- if (auto SyExp = dyn_cast<MCSymbolRefExpr>(Expr))
- return cast<MCSymbolWasm>(SyExp->getSymbol()).isFunction();
-
- if (auto BinOp = dyn_cast<MCBinaryExpr>(Expr))
- return IsFunctionExpr(BinOp->getLHS()) != IsFunctionExpr(BinOp->getRHS());
-
- if (auto UnOp = dyn_cast<MCUnaryExpr>(Expr))
- return IsFunctionExpr(UnOp->getSubExpr());
-
- return false;
-}
-
-static bool IsFunctionType(const MCValue &Target) {
- const MCSymbolRefExpr *RefA = Target.getSymA();
- return RefA && RefA->getKind() == MCSymbolRefExpr::VK_WebAssembly_TYPEINDEX;
-}
-
-static const MCSection *GetFixupSection(const MCExpr *Expr) {
+static const MCSection *getFixupSection(const MCExpr *Expr) {
if (auto SyExp = dyn_cast<MCSymbolRefExpr>(Expr)) {
if (SyExp->getSymbol().isInSection())
return &SyExp->getSymbol().getSection();
@@ -70,63 +50,66 @@ static const MCSection *GetFixupSection(const MCExpr *Expr) {
}
if (auto BinOp = dyn_cast<MCBinaryExpr>(Expr)) {
- auto SectionLHS = GetFixupSection(BinOp->getLHS());
- auto SectionRHS = GetFixupSection(BinOp->getRHS());
+ auto SectionLHS = getFixupSection(BinOp->getLHS());
+ auto SectionRHS = getFixupSection(BinOp->getRHS());
return SectionLHS == SectionRHS ? nullptr : SectionLHS;
}
if (auto UnOp = dyn_cast<MCUnaryExpr>(Expr))
- return GetFixupSection(UnOp->getSubExpr());
+ return getFixupSection(UnOp->getSubExpr());
return nullptr;
}
-static bool IsGlobalType(const MCValue &Target) {
- const MCSymbolRefExpr *RefA = Target.getSymA();
- return RefA && RefA->getKind() == MCSymbolRefExpr::VK_WebAssembly_GLOBAL;
-}
-
-static bool IsEventType(const MCValue &Target) {
- const MCSymbolRefExpr *RefA = Target.getSymA();
- return RefA && RefA->getKind() == MCSymbolRefExpr::VK_WebAssembly_EVENT;
-}
-
unsigned WebAssemblyWasmObjectWriter::getRelocType(const MCValue &Target,
const MCFixup &Fixup) const {
- // WebAssembly functions are not allocated in the data address space. To
- // resolve a pointer to a function, we must use a special relocation type.
- bool IsFunction = IsFunctionExpr(Fixup.getValue());
+ const MCSymbolRefExpr *RefA = Target.getSymA();
+ assert(RefA);
+ auto& SymA = cast<MCSymbolWasm>(RefA->getSymbol());
+
+ MCSymbolRefExpr::VariantKind Modifier = Target.getAccessVariant();
+
+ switch (Modifier) {
+ case MCSymbolRefExpr::VK_GOT:
+ return wasm::R_WASM_GLOBAL_INDEX_LEB;
+ case MCSymbolRefExpr::VK_WASM_TBREL:
+ assert(SymA.isFunction());
+ return wasm::R_WASM_TABLE_INDEX_REL_SLEB;
+ case MCSymbolRefExpr::VK_WASM_MBREL:
+ assert(SymA.isData());
+ return wasm::R_WASM_MEMORY_ADDR_REL_SLEB;
+ case MCSymbolRefExpr::VK_WASM_TYPEINDEX:
+ return wasm::R_WASM_TYPE_INDEX_LEB;
+ default:
+ break;
+ }
switch (unsigned(Fixup.getKind())) {
- case WebAssembly::fixup_code_sleb128_i32:
- if (IsFunction)
- return wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB;
- return wasm::R_WEBASSEMBLY_MEMORY_ADDR_SLEB;
- case WebAssembly::fixup_code_sleb128_i64:
+ case WebAssembly::fixup_sleb128_i32:
+ if (SymA.isFunction())
+ return wasm::R_WASM_TABLE_INDEX_SLEB;
+ return wasm::R_WASM_MEMORY_ADDR_SLEB;
+ case WebAssembly::fixup_sleb128_i64:
llvm_unreachable("fixup_sleb128_i64 not implemented yet");
- case WebAssembly::fixup_code_uleb128_i32:
- if (IsGlobalType(Target))
- return wasm::R_WEBASSEMBLY_GLOBAL_INDEX_LEB;
- if (IsFunctionType(Target))
- return wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB;
- if (IsFunction)
- return wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB;
- if (IsEventType(Target))
- return wasm::R_WEBASSEMBLY_EVENT_INDEX_LEB;
- return wasm::R_WEBASSEMBLY_MEMORY_ADDR_LEB;
+ case WebAssembly::fixup_uleb128_i32:
+ if (SymA.isGlobal())
+ return wasm::R_WASM_GLOBAL_INDEX_LEB;
+ if (SymA.isFunction())
+ return wasm::R_WASM_FUNCTION_INDEX_LEB;
+ if (SymA.isEvent())
+ return wasm::R_WASM_EVENT_INDEX_LEB;
+ return wasm::R_WASM_MEMORY_ADDR_LEB;
case FK_Data_4:
- if (IsFunction)
- return wasm::R_WEBASSEMBLY_TABLE_INDEX_I32;
+ if (SymA.isFunction())
+ return wasm::R_WASM_TABLE_INDEX_I32;
if (auto Section = static_cast<const MCSectionWasm *>(
- GetFixupSection(Fixup.getValue()))) {
+ getFixupSection(Fixup.getValue()))) {
if (Section->getKind().isText())
- return wasm::R_WEBASSEMBLY_FUNCTION_OFFSET_I32;
+ return wasm::R_WASM_FUNCTION_OFFSET_I32;
else if (!Section->isWasmData())
- return wasm::R_WEBASSEMBLY_SECTION_OFFSET_I32;
+ return wasm::R_WASM_SECTION_OFFSET_I32;
}
- return wasm::R_WEBASSEMBLY_MEMORY_ADDR_I32;
- case FK_Data_8:
- llvm_unreachable("FK_Data_8 not implemented yet");
+ return wasm::R_WASM_MEMORY_ADDR_I32;
default:
llvm_unreachable("unimplemented fixup kind");
}
diff --git a/lib/Target/WebAssembly/README.txt b/lib/Target/WebAssembly/README.txt
index a154b4bf7ea8..ef3f5aaf7d33 100644
--- a/lib/Target/WebAssembly/README.txt
+++ b/lib/Target/WebAssembly/README.txt
@@ -14,7 +14,7 @@ can run in browsers and other environments. For more information, see the
Emscripten documentation in general, and this page in particular:
* https://github.com/kripken/emscripten/wiki/New-WebAssembly-Backend
-
+
Rust provides WebAssembly support integrated into Cargo. There are two
main options:
- wasm32-unknown-unknown, which provides a relatively minimal environment
diff --git a/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.cpp b/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.cpp
index f7a417c0ed49..e4afe2bb2830 100644
--- a/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.cpp
+++ b/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.cpp
@@ -1,9 +1,8 @@
//===-- WebAssemblyTargetInfo.cpp - WebAssembly Target Implementation -----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -12,8 +11,7 @@
///
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
-#include "llvm/ADT/Triple.h"
+#include "TargetInfo/WebAssemblyTargetInfo.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.h b/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.h
new file mode 100644
index 000000000000..a7427f78c72c
--- /dev/null
+++ b/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.h
@@ -0,0 +1,26 @@
+//===-- WebAssemblyTargetInfo.h - WebAssembly Target Impl -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file registers the WebAssembly target.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_TARGETINFO_WEBASSEMBLYTARGETINFO_H
+#define LLVM_LIB_TARGET_WEBASSEMBLY_TARGETINFO_WEBASSEMBLYTARGETINFO_H
+
+namespace llvm {
+
+class Target;
+
+Target &getTheWebAssemblyTarget32();
+Target &getTheWebAssemblyTarget64();
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_WEBASSEMBLY_TARGETINFO_WEBASSEMBLYTARGETINFO_H
diff --git a/lib/Target/WebAssembly/WebAssembly.h b/lib/Target/WebAssembly/WebAssembly.h
index 45145c0a6527..fcbd0a5082ff 100644
--- a/lib/Target/WebAssembly/WebAssembly.h
+++ b/lib/Target/WebAssembly/WebAssembly.h
@@ -1,9 +1,8 @@
//===-- WebAssembly.h - Top-level interface for WebAssembly ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -39,18 +38,17 @@ FunctionPass *createWebAssemblyArgumentMove();
FunctionPass *createWebAssemblySetP2AlignOperands();
// Late passes.
-FunctionPass *createWebAssemblyEHRestoreStackPointer();
FunctionPass *createWebAssemblyReplacePhysRegs();
FunctionPass *createWebAssemblyPrepareForLiveIntervals();
FunctionPass *createWebAssemblyOptimizeLiveIntervals();
FunctionPass *createWebAssemblyMemIntrinsicResults();
FunctionPass *createWebAssemblyRegStackify();
FunctionPass *createWebAssemblyRegColoring();
-FunctionPass *createWebAssemblyExplicitLocals();
FunctionPass *createWebAssemblyFixIrreducibleControlFlow();
FunctionPass *createWebAssemblyLateEHPrepare();
FunctionPass *createWebAssemblyCFGSort();
FunctionPass *createWebAssemblyCFGStackify();
+FunctionPass *createWebAssemblyExplicitLocals();
FunctionPass *createWebAssemblyLowerBrUnless();
FunctionPass *createWebAssemblyRegNumbering();
FunctionPass *createWebAssemblyPeephole();
@@ -64,19 +62,18 @@ void initializeFixFunctionBitcastsPass(PassRegistry &);
void initializeOptimizeReturnedPass(PassRegistry &);
void initializeWebAssemblyArgumentMovePass(PassRegistry &);
void initializeWebAssemblySetP2AlignOperandsPass(PassRegistry &);
-void initializeWebAssemblyEHRestoreStackPointerPass(PassRegistry &);
void initializeWebAssemblyReplacePhysRegsPass(PassRegistry &);
void initializeWebAssemblyPrepareForLiveIntervalsPass(PassRegistry &);
void initializeWebAssemblyOptimizeLiveIntervalsPass(PassRegistry &);
void initializeWebAssemblyMemIntrinsicResultsPass(PassRegistry &);
void initializeWebAssemblyRegStackifyPass(PassRegistry &);
void initializeWebAssemblyRegColoringPass(PassRegistry &);
-void initializeWebAssemblyExplicitLocalsPass(PassRegistry &);
void initializeWebAssemblyFixIrreducibleControlFlowPass(PassRegistry &);
void initializeWebAssemblyLateEHPreparePass(PassRegistry &);
void initializeWebAssemblyExceptionInfoPass(PassRegistry &);
void initializeWebAssemblyCFGSortPass(PassRegistry &);
void initializeWebAssemblyCFGStackifyPass(PassRegistry &);
+void initializeWebAssemblyExplicitLocalsPass(PassRegistry &);
void initializeWebAssemblyLowerBrUnlessPass(PassRegistry &);
void initializeWebAssemblyRegNumberingPass(PassRegistry &);
void initializeWebAssemblyPeepholePass(PassRegistry &);
diff --git a/lib/Target/WebAssembly/WebAssembly.td b/lib/Target/WebAssembly/WebAssembly.td
index 6b218f8aa880..b0b8a9b996a3 100644
--- a/lib/Target/WebAssembly/WebAssembly.td
+++ b/lib/Target/WebAssembly/WebAssembly.td
@@ -1,9 +1,8 @@
//- WebAssembly.td - Describe the WebAssembly Target Machine --*- tablegen -*-//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -34,6 +33,7 @@ def FeatureUnimplementedSIMD128 :
def FeatureAtomics : SubtargetFeature<"atomics", "HasAtomics", "true",
"Enable Atomics">;
+
def FeatureNontrappingFPToInt :
SubtargetFeature<"nontrapping-fptoint",
"HasNontrappingFPToInt", "true",
@@ -44,10 +44,28 @@ def FeatureSignExt :
"HasSignExt", "true",
"Enable sign extension operators">;
+def FeatureTailCall :
+ SubtargetFeature<"tail-call",
+ "HasTailCall", "true",
+ "Enable tail call instructions">;
+
def FeatureExceptionHandling :
SubtargetFeature<"exception-handling", "HasExceptionHandling", "true",
"Enable Wasm exception handling">;
+def FeatureBulkMemory :
+ SubtargetFeature<"bulk-memory", "HasBulkMemory", "true",
+ "Enable bulk memory operations">;
+
+def FeatureMultivalue :
+ SubtargetFeature<"multivalue",
+ "HasMultivalue", "true",
+ "Enable multivalue blocks, instructions, and functions">;
+
+def FeatureMutableGlobals :
+ SubtargetFeature<"mutable-globals", "HasMutableGlobals", "true",
+ "Enable mutable globals">;
+
//===----------------------------------------------------------------------===//
// Architectures.
//===----------------------------------------------------------------------===//
@@ -79,7 +97,8 @@ def : ProcessorModel<"generic", NoSchedModel, []>;
// Latest and greatest experimental version of WebAssembly. Bugs included!
def : ProcessorModel<"bleeding-edge", NoSchedModel,
[FeatureSIMD128, FeatureAtomics,
- FeatureNontrappingFPToInt, FeatureSignExt]>;
+ FeatureNontrappingFPToInt, FeatureSignExt,
+ FeatureMutableGlobals]>;
//===----------------------------------------------------------------------===//
// Target Declaration
diff --git a/lib/Target/WebAssembly/WebAssemblyAddMissingPrototypes.cpp b/lib/Target/WebAssembly/WebAssemblyAddMissingPrototypes.cpp
index e49e2b67f435..b7a701f15782 100644
--- a/lib/Target/WebAssembly/WebAssemblyAddMissingPrototypes.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyAddMissingPrototypes.cpp
@@ -1,9 +1,8 @@
//===-- WebAssemblyAddMissingPrototypes.cpp - Fix prototypeless functions -===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -79,32 +78,33 @@ bool WebAssemblyAddMissingPrototypes::runOnModule(Module &M) {
report_fatal_error(
"Functions with 'no-prototype' attribute must take varargs: " +
F.getName());
- if (F.getFunctionType()->getNumParams() != 0)
- report_fatal_error(
- "Functions with 'no-prototype' attribute should not have params: " +
- F.getName());
+ unsigned NumParams = F.getFunctionType()->getNumParams();
+ if (NumParams != 0) {
+ if (!(NumParams == 1 && F.arg_begin()->hasStructRetAttr()))
+ report_fatal_error("Functions with 'no-prototype' attribute should "
+ "not have params: " +
+ F.getName());
+ }
// Create a function prototype based on the first call site (first bitcast)
// that we find.
FunctionType *NewType = nullptr;
- Function *NewF = nullptr;
for (Use &U : F.uses()) {
LLVM_DEBUG(dbgs() << "prototype-less use: " << F.getName() << "\n");
+ LLVM_DEBUG(dbgs() << *U.getUser() << "\n");
if (auto *BC = dyn_cast<BitCastOperator>(U.getUser())) {
if (auto *DestType = dyn_cast<FunctionType>(
BC->getDestTy()->getPointerElementType())) {
if (!NewType) {
// Create a new function with the correct type
NewType = DestType;
- NewF = Function::Create(NewType, F.getLinkage(), F.getName());
- NewF->setAttributes(F.getAttributes());
- NewF->removeFnAttr("no-prototype");
- } else {
- if (NewType != DestType) {
- report_fatal_error("Prototypeless function used with "
- "conflicting signatures: " +
- F.getName());
- }
+ LLVM_DEBUG(dbgs() << "found function type: " << *NewType << "\n");
+ } else if (NewType != DestType) {
+ errs() << "warning: prototype-less function used with "
+ "conflicting signatures: "
+ << F.getName() << "\n";
+ LLVM_DEBUG(dbgs() << " " << *DestType << "\n");
+ LLVM_DEBUG(dbgs() << " "<< *NewType << "\n");
}
}
}
@@ -114,47 +114,30 @@ bool WebAssemblyAddMissingPrototypes::runOnModule(Module &M) {
LLVM_DEBUG(
dbgs() << "could not derive a function prototype from usage: " +
F.getName() + "\n");
- continue;
+ // We could not derive a type for this function. In this case strip
+ // the isVarArg and make it a simple zero-arg function. This has more
+ // chance of being correct. The current signature of (...) is illegal in
+ // C since it doesn't have any arguments before the "...", we this at
+ // least makes it possible for this symbol to be resolved by the linker.
+ NewType = FunctionType::get(F.getFunctionType()->getReturnType(), false);
}
- SmallVector<Instruction *, 4> DeadInsts;
-
- for (Use &US : F.uses()) {
- User *U = US.getUser();
- if (auto *BC = dyn_cast<BitCastOperator>(U)) {
- if (auto *Inst = dyn_cast<BitCastInst>(U)) {
- // Replace with a new bitcast
- IRBuilder<> Builder(Inst);
- Value *NewCast = Builder.CreatePointerCast(NewF, BC->getDestTy());
- Inst->replaceAllUsesWith(NewCast);
- DeadInsts.push_back(Inst);
- } else if (auto *Const = dyn_cast<ConstantExpr>(U)) {
- Constant *NewConst =
- ConstantExpr::getPointerCast(NewF, BC->getDestTy());
- Const->replaceAllUsesWith(NewConst);
- } else {
- dbgs() << *U->getType() << "\n";
-#ifndef NDEBUG
- U->dump();
-#endif
- report_fatal_error("unexpected use of prototypeless function: " +
- F.getName() + "\n");
- }
- }
- }
-
- for (auto I : DeadInsts)
- I->eraseFromParent();
+ Function *NewF =
+ Function::Create(NewType, F.getLinkage(), F.getName() + ".fixed_sig");
+ NewF->setAttributes(F.getAttributes());
+ NewF->removeFnAttr("no-prototype");
Replacements.emplace_back(&F, NewF);
}
-
- // Finally replace the old function declarations with the new ones
for (auto &Pair : Replacements) {
- Function *Old = Pair.first;
- Function *New = Pair.second;
- Old->eraseFromParent();
- M.getFunctionList().push_back(New);
+ Function *OldF = Pair.first;
+ Function *NewF = Pair.second;
+ std::string Name = OldF->getName();
+ M.getFunctionList().push_back(NewF);
+ OldF->replaceAllUsesWith(
+ ConstantExpr::getPointerBitCastOrAddrSpaceCast(NewF, OldF->getType()));
+ OldF->eraseFromParent();
+ NewF->setName(Name);
}
return !Replacements.empty();
diff --git a/lib/Target/WebAssembly/WebAssemblyArgumentMove.cpp b/lib/Target/WebAssembly/WebAssemblyArgumentMove.cpp
index 7c8a631cde8a..02f5cc6da77c 100644
--- a/lib/Target/WebAssembly/WebAssemblyArgumentMove.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyArgumentMove.cpp
@@ -1,9 +1,8 @@
//===-- WebAssemblyArgumentMove.cpp - Argument instruction moving ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -79,7 +78,7 @@ bool WebAssemblyArgumentMove::runOnMachineFunction(MachineFunction &MF) {
// Look for the first NonArg instruction.
for (MachineInstr &MI : EntryMBB) {
- if (!WebAssembly::isArgument(MI)) {
+ if (!WebAssembly::isArgument(MI.getOpcode())) {
InsertPt = MI;
break;
}
@@ -88,7 +87,7 @@ bool WebAssemblyArgumentMove::runOnMachineFunction(MachineFunction &MF) {
// Now move any argument instructions later in the block
// to before our first NonArg instruction.
for (MachineInstr &MI : llvm::make_range(InsertPt, EntryMBB.end())) {
- if (WebAssembly::isArgument(MI)) {
+ if (WebAssembly::isArgument(MI.getOpcode())) {
EntryMBB.insert(InsertPt, MI.removeFromParent());
Changed = true;
}
diff --git a/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
index c4f03dfa7f9e..7f9d41da3978 100644
--- a/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
@@ -1,9 +1,8 @@
//===-- WebAssemblyAsmPrinter.cpp - WebAssembly LLVM assembly writer ------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -15,21 +14,27 @@
//===----------------------------------------------------------------------===//
#include "WebAssemblyAsmPrinter.h"
-#include "InstPrinter/WebAssemblyInstPrinter.h"
+#include "MCTargetDesc/WebAssemblyInstPrinter.h"
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
#include "MCTargetDesc/WebAssemblyTargetStreamer.h"
+#include "TargetInfo/WebAssemblyTargetInfo.h"
#include "WebAssembly.h"
#include "WebAssemblyMCInstLower.h"
#include "WebAssemblyMachineFunctionInfo.h"
#include "WebAssemblyRegisterInfo.h"
+#include "WebAssemblyTargetMachine.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/BinaryFormat/Wasm.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Metadata.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSectionWasm.h"
#include "llvm/MC/MCStreamer.h"
@@ -38,10 +43,13 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
+
using namespace llvm;
#define DEBUG_TYPE "asm-printer"
+extern cl::opt<bool> WasmKeepRegisters;
+
//===----------------------------------------------------------------------===//
// Helpers.
//===----------------------------------------------------------------------===//
@@ -92,11 +100,11 @@ void WebAssemblyAsmPrinter::EmitEndOfAsmFile(Module &M) {
if (F.isDeclarationForLinker() && !F.isIntrinsic()) {
SmallVector<MVT, 4> Results;
SmallVector<MVT, 4> Params;
- ComputeSignatureVTs(F.getFunctionType(), F, TM, Params, Results);
+ computeSignatureVTs(F.getFunctionType(), F, TM, Params, Results);
auto *Sym = cast<MCSymbolWasm>(getSymbol(&F));
Sym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
if (!Sym->getSignature()) {
- auto Signature = SignatureFromMVTs(Results, Params);
+ auto Signature = signatureFromMVTs(Results, Params);
Sym->setSignature(Signature.get());
addSignature(std::move(Signature));
}
@@ -111,9 +119,16 @@ void WebAssemblyAsmPrinter::EmitEndOfAsmFile(Module &M) {
F.hasFnAttribute("wasm-import-module")) {
StringRef Name =
F.getFnAttribute("wasm-import-module").getValueAsString();
- Sym->setModuleName(Name);
+ Sym->setImportModule(Name);
getTargetStreamer()->emitImportModule(Sym, Name);
}
+ if (TM.getTargetTriple().isOSBinFormatWasm() &&
+ F.hasFnAttribute("wasm-import-name")) {
+ StringRef Name =
+ F.getFnAttribute("wasm-import-name").getValueAsString();
+ Sym->setImportName(Name);
+ getTargetStreamer()->emitImportName(Sym, Name);
+ }
}
}
@@ -129,7 +144,7 @@ void WebAssemblyAsmPrinter::EmitEndOfAsmFile(Module &M) {
if (const NamedMDNode *Named = M.getNamedMetadata("wasm.custom_sections")) {
for (const Metadata *MD : Named->operands()) {
- const MDTuple *Tuple = dyn_cast<MDTuple>(MD);
+ const auto *Tuple = dyn_cast<MDTuple>(MD);
if (!Tuple || Tuple->getNumOperands() != 2)
continue;
const MDString *Name = dyn_cast<MDString>(Tuple->getOperand(0));
@@ -139,13 +154,117 @@ void WebAssemblyAsmPrinter::EmitEndOfAsmFile(Module &M) {
OutStreamer->PushSection();
std::string SectionName = (".custom_section." + Name->getString()).str();
- MCSectionWasm *mySection =
+ MCSectionWasm *MySection =
OutContext.getWasmSection(SectionName, SectionKind::getMetadata());
- OutStreamer->SwitchSection(mySection);
+ OutStreamer->SwitchSection(MySection);
OutStreamer->EmitBytes(Contents->getString());
OutStreamer->PopSection();
}
}
+
+ EmitProducerInfo(M);
+ EmitTargetFeatures(M);
+}
+
+void WebAssemblyAsmPrinter::EmitProducerInfo(Module &M) {
+ llvm::SmallVector<std::pair<std::string, std::string>, 4> Languages;
+ if (const NamedMDNode *Debug = M.getNamedMetadata("llvm.dbg.cu")) {
+ llvm::SmallSet<StringRef, 4> SeenLanguages;
+ for (size_t I = 0, E = Debug->getNumOperands(); I < E; ++I) {
+ const auto *CU = cast<DICompileUnit>(Debug->getOperand(I));
+ StringRef Language = dwarf::LanguageString(CU->getSourceLanguage());
+ Language.consume_front("DW_LANG_");
+ if (SeenLanguages.insert(Language).second)
+ Languages.emplace_back(Language.str(), "");
+ }
+ }
+
+ llvm::SmallVector<std::pair<std::string, std::string>, 4> Tools;
+ if (const NamedMDNode *Ident = M.getNamedMetadata("llvm.ident")) {
+ llvm::SmallSet<StringRef, 4> SeenTools;
+ for (size_t I = 0, E = Ident->getNumOperands(); I < E; ++I) {
+ const auto *S = cast<MDString>(Ident->getOperand(I)->getOperand(0));
+ std::pair<StringRef, StringRef> Field = S->getString().split("version");
+ StringRef Name = Field.first.trim();
+ StringRef Version = Field.second.trim();
+ if (SeenTools.insert(Name).second)
+ Tools.emplace_back(Name.str(), Version.str());
+ }
+ }
+
+ int FieldCount = int(!Languages.empty()) + int(!Tools.empty());
+ if (FieldCount != 0) {
+ MCSectionWasm *Producers = OutContext.getWasmSection(
+ ".custom_section.producers", SectionKind::getMetadata());
+ OutStreamer->PushSection();
+ OutStreamer->SwitchSection(Producers);
+ OutStreamer->EmitULEB128IntValue(FieldCount);
+ for (auto &Producers : {std::make_pair("language", &Languages),
+ std::make_pair("processed-by", &Tools)}) {
+ if (Producers.second->empty())
+ continue;
+ OutStreamer->EmitULEB128IntValue(strlen(Producers.first));
+ OutStreamer->EmitBytes(Producers.first);
+ OutStreamer->EmitULEB128IntValue(Producers.second->size());
+ for (auto &Producer : *Producers.second) {
+ OutStreamer->EmitULEB128IntValue(Producer.first.size());
+ OutStreamer->EmitBytes(Producer.first);
+ OutStreamer->EmitULEB128IntValue(Producer.second.size());
+ OutStreamer->EmitBytes(Producer.second);
+ }
+ }
+ OutStreamer->PopSection();
+ }
+}
+
+void WebAssemblyAsmPrinter::EmitTargetFeatures(Module &M) {
+ struct FeatureEntry {
+ uint8_t Prefix;
+ StringRef Name;
+ };
+
+ // Read target features and linkage policies from module metadata
+ SmallVector<FeatureEntry, 4> EmittedFeatures;
+ for (const SubtargetFeatureKV &KV : WebAssemblyFeatureKV) {
+ std::string MDKey = (StringRef("wasm-feature-") + KV.Key).str();
+ Metadata *Policy = M.getModuleFlag(MDKey);
+ if (Policy == nullptr)
+ continue;
+
+ FeatureEntry Entry;
+ Entry.Prefix = 0;
+ Entry.Name = KV.Key;
+
+ if (auto *MD = cast<ConstantAsMetadata>(Policy))
+ if (auto *I = cast<ConstantInt>(MD->getValue()))
+ Entry.Prefix = I->getZExtValue();
+
+ // Silently ignore invalid metadata
+ if (Entry.Prefix != wasm::WASM_FEATURE_PREFIX_USED &&
+ Entry.Prefix != wasm::WASM_FEATURE_PREFIX_REQUIRED &&
+ Entry.Prefix != wasm::WASM_FEATURE_PREFIX_DISALLOWED)
+ continue;
+
+ EmittedFeatures.push_back(Entry);
+ }
+
+ if (EmittedFeatures.size() == 0)
+ return;
+
+ // Emit features and linkage policies into the "target_features" section
+ MCSectionWasm *FeaturesSection = OutContext.getWasmSection(
+ ".custom_section.target_features", SectionKind::getMetadata());
+ OutStreamer->PushSection();
+ OutStreamer->SwitchSection(FeaturesSection);
+
+ OutStreamer->EmitULEB128IntValue(EmittedFeatures.size());
+ for (auto &F : EmittedFeatures) {
+ OutStreamer->EmitIntValue(F.Prefix, 1);
+ OutStreamer->EmitULEB128IntValue(F.Name.size());
+ OutStreamer->EmitBytes(F.Name);
+ }
+
+ OutStreamer->PopSection();
}
void WebAssemblyAsmPrinter::EmitConstantPool() {
@@ -161,8 +280,8 @@ void WebAssemblyAsmPrinter::EmitFunctionBodyStart() {
const Function &F = MF->getFunction();
SmallVector<MVT, 1> ResultVTs;
SmallVector<MVT, 4> ParamVTs;
- ComputeSignatureVTs(F.getFunctionType(), F, TM, ParamVTs, ResultVTs);
- auto Signature = SignatureFromMVTs(ResultVTs, ParamVTs);
+ computeSignatureVTs(F.getFunctionType(), F, TM, ParamVTs, ResultVTs);
+ auto Signature = signatureFromMVTs(ResultVTs, ParamVTs);
auto *WasmSym = cast<MCSymbolWasm>(CurrentFnSym);
WasmSym->setSignature(Signature.get());
addSignature(std::move(Signature));
@@ -180,7 +299,7 @@ void WebAssemblyAsmPrinter::EmitFunctionBodyStart() {
}
SmallVector<wasm::ValType, 16> Locals;
- ValTypesFromMVTs(MFI->getLocals(), Locals);
+ valTypesFromMVTs(MFI->getLocals(), Locals);
getTargetStreamer()->emitLocal(Locals);
AsmPrinter::EmitFunctionBodyStart();
@@ -250,34 +369,34 @@ void WebAssemblyAsmPrinter::EmitInstruction(const MachineInstr *MI) {
OutStreamer->AddBlankLine();
}
break;
+ case WebAssembly::COMPILER_FENCE:
+ // This is a compiler barrier that prevents instruction reordering during
+ // backend compilation, and should not be emitted.
+ break;
+ case WebAssembly::EXTRACT_EXCEPTION_I32:
+ case WebAssembly::EXTRACT_EXCEPTION_I32_S:
+ // These are pseudo instructions that simulates popping values from stack.
+ // We print these only when we have -wasm-keep-registers on for assembly
+ // readability.
+ if (!WasmKeepRegisters)
+ break;
+ LLVM_FALLTHROUGH;
default: {
WebAssemblyMCInstLower MCInstLowering(OutContext, *this);
MCInst TmpInst;
- MCInstLowering.Lower(MI, TmpInst);
+ MCInstLowering.lower(MI, TmpInst);
EmitToStreamer(*OutStreamer, TmpInst);
break;
}
}
}
-const MCExpr *WebAssemblyAsmPrinter::lowerConstant(const Constant *CV) {
- if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV))
- if (GV->getValueType()->isFunctionTy()) {
- return MCSymbolRefExpr::create(
- getSymbol(GV), MCSymbolRefExpr::VK_WebAssembly_FUNCTION, OutContext);
- }
- return AsmPrinter::lowerConstant(CV);
-}
-
bool WebAssemblyAsmPrinter::PrintAsmOperand(const MachineInstr *MI,
- unsigned OpNo, unsigned AsmVariant,
+ unsigned OpNo,
const char *ExtraCode,
raw_ostream &OS) {
- if (AsmVariant != 0)
- report_fatal_error("There are no defined alternate asm variants");
-
// First try the generic code, which knows about modifiers like 'c' and 'n'.
- if (!AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, OS))
+ if (!AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, OS))
return false;
if (!ExtraCode) {
@@ -293,8 +412,7 @@ bool WebAssemblyAsmPrinter::PrintAsmOperand(const MachineInstr *MI,
OS << regToString(MO);
return false;
case MachineOperand::MO_GlobalAddress:
- getSymbol(MO.getGlobal())->print(OS, MAI);
- printOffset(MO.getOffset(), OS);
+ PrintSymbolOperand(MO, OS);
return false;
case MachineOperand::MO_ExternalSymbol:
GetExternalSymbolSymbol(MO.getSymbolName())->print(OS, MAI);
@@ -313,19 +431,15 @@ bool WebAssemblyAsmPrinter::PrintAsmOperand(const MachineInstr *MI,
bool WebAssemblyAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
unsigned OpNo,
- unsigned AsmVariant,
const char *ExtraCode,
raw_ostream &OS) {
- if (AsmVariant != 0)
- report_fatal_error("There are no defined alternate asm variants");
-
// The current approach to inline asm is that "r" constraints are expressed
// as local indices, rather than values on the operand stack. This simplifies
// using "r" as it eliminates the need to push and pop the values in a
// particular order, however it also makes it impossible to have an "m"
// constraint. So we don't support it.
- return AsmPrinter::PrintAsmMemoryOperand(MI, OpNo, AsmVariant, ExtraCode, OS);
+ return AsmPrinter::PrintAsmMemoryOperand(MI, OpNo, ExtraCode, OS);
}
// Force static initialization.
diff --git a/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h b/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h
index f6cb5610bad3..4e55c81dec38 100644
--- a/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h
+++ b/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h
@@ -1,9 +1,8 @@
// WebAssemblyAsmPrinter.h - WebAssembly implementation of AsmPrinter-*- C++ -*-
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -59,17 +58,16 @@ public:
//===------------------------------------------------------------------===//
void EmitEndOfAsmFile(Module &M) override;
+ void EmitProducerInfo(Module &M);
+ void EmitTargetFeatures(Module &M);
void EmitJumpTableInfo() override;
void EmitConstantPool() override;
void EmitFunctionBodyStart() override;
void EmitInstruction(const MachineInstr *MI) override;
- const MCExpr *lowerConstant(const Constant *CV) override;
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
- unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &OS) override;
+ const char *ExtraCode, raw_ostream &OS) override;
bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
- unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &OS) override;
+ const char *ExtraCode, raw_ostream &OS) override;
MVT getRegType(unsigned RegNo) const;
std::string regToString(const MachineOperand &MO);
diff --git a/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp b/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp
index fc827e9d5780..4c5d0192fc28 100644
--- a/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp
@@ -1,9 +1,8 @@
//===-- WebAssemblyCFGSort.cpp - CFG Sorting ------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -35,6 +34,14 @@ using namespace llvm;
#define DEBUG_TYPE "wasm-cfg-sort"
+// Option to disable EH pad first sorting. Only for testing unwind destination
+// mismatches in CFGStackify.
+static cl::opt<bool> WasmDisableEHPadSort(
+ "wasm-disable-ehpad-sort", cl::ReallyHidden,
+ cl::desc(
+ "WebAssembly: Disable EH pad-first sort order. Testing purpose only."),
+ cl::init(false));
+
namespace {
// Wrapper for loops and exceptions
@@ -133,7 +140,7 @@ FunctionPass *llvm::createWebAssemblyCFGSort() {
return new WebAssemblyCFGSort();
}
-static void MaybeUpdateTerminator(MachineBasicBlock *MBB) {
+static void maybeUpdateTerminator(MachineBasicBlock *MBB) {
#ifndef NDEBUG
bool AnyBarrier = false;
#endif
@@ -188,10 +195,12 @@ namespace {
struct CompareBlockNumbers {
bool operator()(const MachineBasicBlock *A,
const MachineBasicBlock *B) const {
- if (A->isEHPad() && !B->isEHPad())
- return false;
- if (!A->isEHPad() && B->isEHPad())
- return true;
+ if (!WasmDisableEHPadSort) {
+ if (A->isEHPad() && !B->isEHPad())
+ return false;
+ if (!A->isEHPad() && B->isEHPad())
+ return true;
+ }
return A->getNumber() > B->getNumber();
}
@@ -200,11 +209,12 @@ struct CompareBlockNumbers {
struct CompareBlockNumbersBackwards {
bool operator()(const MachineBasicBlock *A,
const MachineBasicBlock *B) const {
- // We give a higher priority to an EH pad
- if (A->isEHPad() && !B->isEHPad())
- return false;
- if (!A->isEHPad() && B->isEHPad())
- return true;
+ if (!WasmDisableEHPadSort) {
+ if (A->isEHPad() && !B->isEHPad())
+ return false;
+ if (!A->isEHPad() && B->isEHPad())
+ return true;
+ }
return A->getNumber() < B->getNumber();
}
@@ -228,7 +238,7 @@ struct Entry {
/// interrupted by blocks not dominated by their header.
/// TODO: There are many opportunities for improving the heuristics here.
/// Explore them.
-static void SortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI,
+static void sortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI,
const WebAssemblyExceptionInfo &WEI,
const MachineDominatorTree &MDT) {
// Prepare for a topological sort: Record the number of predecessors each
@@ -260,10 +270,10 @@ static void SortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI,
CompareBlockNumbersBackwards>
Ready;
- RegionInfo SUI(MLI, WEI);
+ RegionInfo RI(MLI, WEI);
SmallVector<Entry, 4> Entries;
for (MachineBasicBlock *MBB = &MF.front();;) {
- const Region *R = SUI.getRegionFor(MBB);
+ const Region *R = RI.getRegionFor(MBB);
if (R) {
// If MBB is a region header, add it to the active region list. We can't
// put any blocks that it doesn't dominate until we see the end of the
@@ -320,7 +330,7 @@ static void SortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI,
if (!Next) {
// If there are no more blocks to process, we're done.
if (Ready.empty()) {
- MaybeUpdateTerminator(MBB);
+ maybeUpdateTerminator(MBB);
break;
}
for (;;) {
@@ -338,7 +348,7 @@ static void SortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI,
}
// Move the next block into place and iterate.
Next->moveAfter(MBB);
- MaybeUpdateTerminator(MBB);
+ maybeUpdateTerminator(MBB);
MBB = Next;
}
assert(Entries.empty() && "Active sort region list not finished");
@@ -354,7 +364,7 @@ static void SortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI,
for (auto &MBB : MF) {
assert(MBB.getNumber() >= 0 && "Renumbered blocks should be non-negative.");
- const Region *Region = SUI.getRegionFor(&MBB);
+ const Region *Region = RI.getRegionFor(&MBB);
if (Region && &MBB == Region->getHeader()) {
if (Region->isLoop()) {
@@ -379,7 +389,7 @@ static void SortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI,
for (auto Pred : MBB.predecessors())
assert(Pred->getNumber() < MBB.getNumber() &&
"Non-loop-header predecessors should be topologically sorted");
- assert(OnStack.count(SUI.getRegionFor(&MBB)) &&
+ assert(OnStack.count(RI.getRegionFor(&MBB)) &&
"Blocks must be nested in their regions");
}
while (OnStack.size() > 1 && &MBB == WebAssembly::getBottom(OnStack.back()))
@@ -404,7 +414,7 @@ bool WebAssemblyCFGSort::runOnMachineFunction(MachineFunction &MF) {
MF.getRegInfo().invalidateLiveness();
// Sort the blocks, with contiguous sort regions.
- SortBlocks(MF, MLI, WEI, MDT);
+ sortBlocks(MF, MLI, WEI, MDT);
return true;
}
diff --git a/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp b/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
index f8f5f4040c86..e6bfc5226e2e 100644
--- a/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
@@ -1,9 +1,8 @@
//===-- WebAssemblyCFGStackify.cpp - CFG Stackification -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -22,26 +21,21 @@
///
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
#include "WebAssembly.h"
#include "WebAssemblyExceptionInfo.h"
#include "WebAssemblyMachineFunctionInfo.h"
#include "WebAssemblySubtarget.h"
#include "WebAssemblyUtilities.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/WasmEHFuncInfo.h"
#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
using namespace llvm;
#define DEBUG_TYPE "wasm-cfg-stackify"
+STATISTIC(NumUnwindMismatches, "Number of EH pad unwind mismatches found");
+
namespace {
class WebAssemblyCFGStackify final : public MachineFunctionPass {
StringRef getPassName() const override { return "WebAssembly CFG Stackify"; }
@@ -60,10 +54,13 @@ class WebAssemblyCFGStackify final : public MachineFunctionPass {
// over scoped regions when walking blocks.
SmallVector<MachineBasicBlock *, 8> ScopeTops;
+ // Placing markers.
void placeMarkers(MachineFunction &MF);
void placeBlockMarker(MachineBasicBlock &MBB);
void placeLoopMarker(MachineBasicBlock &MBB);
void placeTryMarker(MachineBasicBlock &MBB);
+ void removeUnnecessaryInstrs(MachineFunction &MF);
+ bool fixUnwindMismatches(MachineFunction &MF);
void rewriteDepthImmediates(MachineFunction &MF);
void fixEndsAtEndOfFunction(MachineFunction &MF);
@@ -75,16 +72,28 @@ class WebAssemblyCFGStackify final : public MachineFunctionPass {
DenseMap<const MachineInstr *, MachineBasicBlock *> TryToEHPad;
// <EH pad, TRY marker> map
DenseMap<const MachineBasicBlock *, MachineInstr *> EHPadToTry;
- // <LOOP|TRY marker, Loop/exception bottom BB> map
- DenseMap<const MachineInstr *, MachineBasicBlock *> BeginToBottom;
- // Helper functions to register scope information created by marker
- // instructions.
+ // There can be an appendix block at the end of each function, shared for:
+ // - creating a correct signature for fallthrough returns
+ // - target for rethrows that need to unwind to the caller, but are trapped
+ // inside another try/catch
+ MachineBasicBlock *AppendixBB = nullptr;
+ MachineBasicBlock *getAppendixBlock(MachineFunction &MF) {
+ if (!AppendixBB) {
+ AppendixBB = MF.CreateMachineBasicBlock();
+ // Give it a fake predecessor so that AsmPrinter prints its label.
+ AppendixBB->addSuccessor(AppendixBB);
+ MF.push_back(AppendixBB);
+ }
+ return AppendixBB;
+ }
+
+ // Helper functions to register / unregister scope information created by
+ // marker instructions.
void registerScope(MachineInstr *Begin, MachineInstr *End);
void registerTryScope(MachineInstr *Begin, MachineInstr *End,
MachineBasicBlock *EHPad);
-
- MachineBasicBlock *getBottom(const MachineInstr *Begin);
+ void unregisterScope(MachineInstr *Begin);
public:
static char ID; // Pass identification, replacement for typeid
@@ -96,7 +105,7 @@ public:
char WebAssemblyCFGStackify::ID = 0;
INITIALIZE_PASS(WebAssemblyCFGStackify, DEBUG_TYPE,
- "Insert BLOCK and LOOP markers for WebAssembly scopes", false,
+ "Insert BLOCK/LOOP/TRY markers for WebAssembly scopes", false,
false)
FunctionPass *llvm::createWebAssemblyCFGStackify() {
@@ -108,14 +117,12 @@ FunctionPass *llvm::createWebAssemblyCFGStackify() {
/// code) for a branch instruction to both branch to a block and fallthrough
/// to it, so we check the actual branch operands to see if there are any
/// explicit mentions.
-static bool ExplicitlyBranchesTo(MachineBasicBlock *Pred,
+static bool explicitlyBranchesTo(MachineBasicBlock *Pred,
MachineBasicBlock *MBB) {
for (MachineInstr &MI : Pred->terminators())
- // Even if a rethrow takes a BB argument, it is not a branch
- if (!WebAssembly::isRethrow(MI))
- for (MachineOperand &MO : MI.explicit_operands())
- if (MO.isMBB() && MO.getMBB() == MBB)
- return true;
+ for (MachineOperand &MO : MI.explicit_operands())
+ if (MO.isMBB() && MO.getMBB() == MBB)
+ return true;
return false;
}
@@ -125,7 +132,7 @@ static bool ExplicitlyBranchesTo(MachineBasicBlock *Pred,
// ones that should go after the marker. In this function, AfterSet is only
// used for sanity checking.
static MachineBasicBlock::iterator
-GetEarliestInsertPos(MachineBasicBlock *MBB,
+getEarliestInsertPos(MachineBasicBlock *MBB,
const SmallPtrSet<const MachineInstr *, 4> &BeforeSet,
const SmallPtrSet<const MachineInstr *, 4> &AfterSet) {
auto InsertPos = MBB->end();
@@ -149,7 +156,7 @@ GetEarliestInsertPos(MachineBasicBlock *MBB,
// ones that should go after the marker. In this function, BeforeSet is only
// used for sanity checking.
static MachineBasicBlock::iterator
-GetLatestInsertPos(MachineBasicBlock *MBB,
+getLatestInsertPos(MachineBasicBlock *MBB,
const SmallPtrSet<const MachineInstr *, 4> &BeforeSet,
const SmallPtrSet<const MachineInstr *, 4> &AfterSet) {
auto InsertPos = MBB->begin();
@@ -181,33 +188,25 @@ void WebAssemblyCFGStackify::registerTryScope(MachineInstr *Begin,
EHPadToTry[EHPad] = Begin;
}
-// Given a LOOP/TRY marker, returns its bottom BB. Use cached information if any
-// to prevent recomputation.
-MachineBasicBlock *
-WebAssemblyCFGStackify::getBottom(const MachineInstr *Begin) {
- const auto &MLI = getAnalysis<MachineLoopInfo>();
- const auto &WEI = getAnalysis<WebAssemblyExceptionInfo>();
- if (BeginToBottom.count(Begin))
- return BeginToBottom[Begin];
- if (Begin->getOpcode() == WebAssembly::LOOP) {
- MachineLoop *L = MLI.getLoopFor(Begin->getParent());
- assert(L);
- BeginToBottom[Begin] = WebAssembly::getBottom(L);
- } else if (Begin->getOpcode() == WebAssembly::TRY) {
- WebAssemblyException *WE = WEI.getExceptionFor(TryToEHPad[Begin]);
- assert(WE);
- BeginToBottom[Begin] = WebAssembly::getBottom(WE);
- } else
- assert(false);
- return BeginToBottom[Begin];
+void WebAssemblyCFGStackify::unregisterScope(MachineInstr *Begin) {
+ assert(BeginToEnd.count(Begin));
+ MachineInstr *End = BeginToEnd[Begin];
+ assert(EndToBegin.count(End));
+ BeginToEnd.erase(Begin);
+ EndToBegin.erase(End);
+ MachineBasicBlock *EHPad = TryToEHPad.lookup(Begin);
+ if (EHPad) {
+ assert(EHPadToTry.count(EHPad));
+ TryToEHPad.erase(Begin);
+ EHPadToTry.erase(EHPad);
+ }
}
/// Insert a BLOCK marker for branches to MBB (if needed).
+// TODO Consider a more generalized way of handling block (and also loop and
+// try) signatures when we implement the multi-value proposal later.
void WebAssemblyCFGStackify::placeBlockMarker(MachineBasicBlock &MBB) {
- // This should have been handled in placeTryMarker.
- if (MBB.isEHPad())
- return;
-
+ assert(!MBB.isEHPad());
MachineFunction &MF = *MBB.getParent();
auto &MDT = getAnalysis<MachineDominatorTree>();
const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
@@ -218,12 +217,20 @@ void WebAssemblyCFGStackify::placeBlockMarker(MachineBasicBlock &MBB) {
// which reduces overall stack height.
MachineBasicBlock *Header = nullptr;
bool IsBranchedTo = false;
+ bool IsBrOnExn = false;
+ MachineInstr *BrOnExn = nullptr;
int MBBNumber = MBB.getNumber();
for (MachineBasicBlock *Pred : MBB.predecessors()) {
if (Pred->getNumber() < MBBNumber) {
Header = Header ? MDT.findNearestCommonDominator(Header, Pred) : Pred;
- if (ExplicitlyBranchesTo(Pred, &MBB))
+ if (explicitlyBranchesTo(Pred, &MBB)) {
IsBranchedTo = true;
+ if (Pred->getFirstTerminator()->getOpcode() == WebAssembly::BR_ON_EXN) {
+ IsBrOnExn = true;
+ assert(!BrOnExn && "There should be only one br_on_exn per block");
+ BrOnExn = &*Pred->getFirstTerminator();
+ }
+ }
}
}
if (!Header)
@@ -232,7 +239,7 @@ void WebAssemblyCFGStackify::placeBlockMarker(MachineBasicBlock &MBB) {
return;
assert(&MBB != &MF.front() && "Header blocks shouldn't have predecessors");
- MachineBasicBlock *LayoutPred = &*std::prev(MachineFunction::iterator(&MBB));
+ MachineBasicBlock *LayoutPred = MBB.getPrevNode();
// If the nearest common dominator is inside a more deeply nested context,
// walk out to the nearest scope which isn't more deeply nested.
@@ -240,7 +247,7 @@ void WebAssemblyCFGStackify::placeBlockMarker(MachineBasicBlock &MBB) {
if (MachineBasicBlock *ScopeTop = ScopeTops[I->getNumber()]) {
if (ScopeTop->getNumber() > Header->getNumber()) {
// Skip over an intervening scope.
- I = std::next(MachineFunction::iterator(ScopeTop));
+ I = std::next(ScopeTop->getIterator());
} else {
// We found a scope level at an appropriate depth.
Header = ScopeTop;
@@ -256,13 +263,12 @@ void WebAssemblyCFGStackify::placeBlockMarker(MachineBasicBlock &MBB) {
// Instructions that should go after the BLOCK.
SmallPtrSet<const MachineInstr *, 4> AfterSet;
for (const auto &MI : *Header) {
- // If there is a previously placed LOOP/TRY marker and the bottom block of
- // the loop/exception is above MBB, it should be after the BLOCK, because
- // the loop/exception is nested in this block. Otherwise it should be before
- // the BLOCK.
- if (MI.getOpcode() == WebAssembly::LOOP ||
- MI.getOpcode() == WebAssembly::TRY) {
- if (MBB.getNumber() > getBottom(&MI)->getNumber())
+ // If there is a previously placed LOOP marker and the bottom block of the
+ // loop is above MBB, it should be after the BLOCK, because the loop is
+ // nested in this BLOCK. Otherwise it should be before the BLOCK.
+ if (MI.getOpcode() == WebAssembly::LOOP) {
+ auto *LoopBottom = BeginToEnd[&MI]->getParent()->getPrevNode();
+ if (MBB.getNumber() > LoopBottom->getNumber())
AfterSet.insert(&MI);
#ifndef NDEBUG
else
@@ -270,9 +276,10 @@ void WebAssemblyCFGStackify::placeBlockMarker(MachineBasicBlock &MBB) {
#endif
}
- // All previously inserted BLOCK markers should be after the BLOCK because
- // they are all nested blocks.
- if (MI.getOpcode() == WebAssembly::BLOCK)
+ // All previously inserted BLOCK/TRY markers should be after the BLOCK
+ // because they are all nested blocks.
+ if (MI.getOpcode() == WebAssembly::BLOCK ||
+ MI.getOpcode() == WebAssembly::TRY)
AfterSet.insert(&MI);
#ifndef NDEBUG
@@ -300,11 +307,27 @@ void WebAssemblyCFGStackify::placeBlockMarker(MachineBasicBlock &MBB) {
}
// Add the BLOCK.
- auto InsertPos = GetLatestInsertPos(Header, BeforeSet, AfterSet);
+
+ // 'br_on_exn' extracts exnref object and pushes variable number of values
+ // depending on its tag. For C++ exception, its a single i32 value, and the
+ // generated code will be in the form of:
+ // block i32
+ // br_on_exn 0, $__cpp_exception
+ // rethrow
+ // end_block
+ WebAssembly::ExprType ReturnType = WebAssembly::ExprType::Void;
+ if (IsBrOnExn) {
+ const char *TagName = BrOnExn->getOperand(1).getSymbolName();
+ if (std::strcmp(TagName, "__cpp_exception") != 0)
+ llvm_unreachable("Only C++ exception is supported");
+ ReturnType = WebAssembly::ExprType::I32;
+ }
+
+ auto InsertPos = getLatestInsertPos(Header, BeforeSet, AfterSet);
MachineInstr *Begin =
BuildMI(*Header, InsertPos, Header->findDebugLoc(InsertPos),
TII.get(WebAssembly::BLOCK))
- .addImm(int64_t(WebAssembly::ExprType::Void));
+ .addImm(int64_t(ReturnType));
// Decide where in Header to put the END_BLOCK.
BeforeSet.clear();
@@ -333,7 +356,7 @@ void WebAssemblyCFGStackify::placeBlockMarker(MachineBasicBlock &MBB) {
}
// Mark the end of the block.
- InsertPos = GetEarliestInsertPos(&MBB, BeforeSet, AfterSet);
+ InsertPos = getEarliestInsertPos(&MBB, BeforeSet, AfterSet);
MachineInstr *End = BuildMI(MBB, InsertPos, MBB.findPrevDebugLoc(InsertPos),
TII.get(WebAssembly::END_BLOCK));
registerScope(Begin, End);
@@ -358,13 +381,10 @@ void WebAssemblyCFGStackify::placeLoopMarker(MachineBasicBlock &MBB) {
// The operand of a LOOP is the first block after the loop. If the loop is the
// bottom of the function, insert a dummy block at the end.
MachineBasicBlock *Bottom = WebAssembly::getBottom(Loop);
- auto Iter = std::next(MachineFunction::iterator(Bottom));
+ auto Iter = std::next(Bottom->getIterator());
if (Iter == MF.end()) {
- MachineBasicBlock *Label = MF.CreateMachineBasicBlock();
- // Give it a fake predecessor so that AsmPrinter prints its label.
- Label->addSuccessor(Label);
- MF.push_back(Label);
- Iter = std::next(MachineFunction::iterator(Bottom));
+ getAppendixBlock(MF);
+ Iter = std::next(Bottom->getIterator());
}
MachineBasicBlock *AfterLoop = &*Iter;
@@ -383,7 +403,7 @@ void WebAssemblyCFGStackify::placeLoopMarker(MachineBasicBlock &MBB) {
}
// Mark the beginning of the loop.
- auto InsertPos = GetEarliestInsertPos(&MBB, BeforeSet, AfterSet);
+ auto InsertPos = getEarliestInsertPos(&MBB, BeforeSet, AfterSet);
MachineInstr *Begin = BuildMI(MBB, InsertPos, MBB.findDebugLoc(InsertPos),
TII.get(WebAssembly::LOOP))
.addImm(int64_t(WebAssembly::ExprType::Void));
@@ -400,8 +420,10 @@ void WebAssemblyCFGStackify::placeLoopMarker(MachineBasicBlock &MBB) {
// Mark the end of the loop (using arbitrary debug location that branched to
// the loop end as its location).
- InsertPos = GetEarliestInsertPos(AfterLoop, BeforeSet, AfterSet);
- DebugLoc EndDL = (*AfterLoop->pred_rbegin())->findBranchDebugLoc();
+ InsertPos = getEarliestInsertPos(AfterLoop, BeforeSet, AfterSet);
+ DebugLoc EndDL = AfterLoop->pred_empty()
+ ? DebugLoc()
+ : (*AfterLoop->pred_rbegin())->findBranchDebugLoc();
MachineInstr *End =
BuildMI(*AfterLoop, InsertPos, EndDL, TII.get(WebAssembly::END_LOOP));
registerScope(Begin, End);
@@ -414,14 +436,7 @@ void WebAssemblyCFGStackify::placeLoopMarker(MachineBasicBlock &MBB) {
}
void WebAssemblyCFGStackify::placeTryMarker(MachineBasicBlock &MBB) {
- if (!MBB.isEHPad())
- return;
-
- // catch_all terminate pad is grouped together with catch terminate pad and
- // does not need a separate TRY and END_TRY marker.
- if (WebAssembly::isCatchAllTerminatePad(MBB))
- return;
-
+ assert(MBB.isEHPad());
MachineFunction &MF = *MBB.getParent();
auto &MDT = getAnalysis<MachineDominatorTree>();
const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
@@ -434,7 +449,7 @@ void WebAssemblyCFGStackify::placeTryMarker(MachineBasicBlock &MBB) {
for (auto *Pred : MBB.predecessors()) {
if (Pred->getNumber() < MBBNumber) {
Header = Header ? MDT.findNearestCommonDominator(Header, Pred) : Pred;
- assert(!ExplicitlyBranchesTo(Pred, &MBB) &&
+ assert(!explicitlyBranchesTo(Pred, &MBB) &&
"Explicit branch to an EH pad!");
}
}
@@ -447,19 +462,15 @@ void WebAssemblyCFGStackify::placeTryMarker(MachineBasicBlock &MBB) {
assert(WE);
MachineBasicBlock *Bottom = WebAssembly::getBottom(WE);
- auto Iter = std::next(MachineFunction::iterator(Bottom));
+ auto Iter = std::next(Bottom->getIterator());
if (Iter == MF.end()) {
- MachineBasicBlock *Label = MF.CreateMachineBasicBlock();
- // Give it a fake predecessor so that AsmPrinter prints its label.
- Label->addSuccessor(Label);
- MF.push_back(Label);
- Iter = std::next(MachineFunction::iterator(Bottom));
+ getAppendixBlock(MF);
+ Iter = std::next(Bottom->getIterator());
}
- MachineBasicBlock *AfterTry = &*Iter;
+ MachineBasicBlock *Cont = &*Iter;
- assert(AfterTry != &MF.front());
- MachineBasicBlock *LayoutPred =
- &*std::prev(MachineFunction::iterator(AfterTry));
+ assert(Cont != &MF.front());
+ MachineBasicBlock *LayoutPred = Cont->getPrevNode();
// If the nearest common dominator is inside a more deeply nested context,
// walk out to the nearest scope which isn't more deeply nested.
@@ -467,7 +478,7 @@ void WebAssemblyCFGStackify::placeTryMarker(MachineBasicBlock &MBB) {
if (MachineBasicBlock *ScopeTop = ScopeTops[I->getNumber()]) {
if (ScopeTop->getNumber() > Header->getNumber()) {
// Skip over an intervening scope.
- I = std::next(MachineFunction::iterator(ScopeTop));
+ I = std::next(ScopeTop->getIterator());
} else {
// We found a scope level at an appropriate depth.
Header = ScopeTop;
@@ -478,16 +489,17 @@ void WebAssemblyCFGStackify::placeTryMarker(MachineBasicBlock &MBB) {
// Decide where in Header to put the TRY.
- // Instructions that should go before the BLOCK.
+ // Instructions that should go before the TRY.
SmallPtrSet<const MachineInstr *, 4> BeforeSet;
- // Instructions that should go after the BLOCK.
+ // Instructions that should go after the TRY.
SmallPtrSet<const MachineInstr *, 4> AfterSet;
for (const auto &MI : *Header) {
- // If there is a previously placed LOOP marker and the bottom block of
- // the loop is above MBB, the LOOP should be after the TRY, because the
- // loop is nested in this try. Otherwise it should be before the TRY.
+ // If there is a previously placed LOOP marker and the bottom block of the
+ // loop is above MBB, it should be after the TRY, because the loop is nested
+ // in this TRY. Otherwise it should be before the TRY.
if (MI.getOpcode() == WebAssembly::LOOP) {
- if (MBB.getNumber() > Bottom->getNumber())
+ auto *LoopBottom = BeginToEnd[&MI]->getParent()->getPrevNode();
+ if (MBB.getNumber() > LoopBottom->getNumber())
AfterSet.insert(&MI);
#ifndef NDEBUG
else
@@ -495,14 +507,16 @@ void WebAssemblyCFGStackify::placeTryMarker(MachineBasicBlock &MBB) {
#endif
}
- // All previously inserted TRY markers should be after the TRY because they
- // are all nested trys.
- if (MI.getOpcode() == WebAssembly::TRY)
+ // All previously inserted BLOCK/TRY markers should be after the TRY because
+ // they are all nested trys.
+ if (MI.getOpcode() == WebAssembly::BLOCK ||
+ MI.getOpcode() == WebAssembly::TRY)
AfterSet.insert(&MI);
#ifndef NDEBUG
- // All END_(LOOP/TRY) markers should be before the TRY.
- if (MI.getOpcode() == WebAssembly::END_LOOP ||
+ // All END_(BLOCK/LOOP/TRY) markers should be before the TRY.
+ if (MI.getOpcode() == WebAssembly::END_BLOCK ||
+ MI.getOpcode() == WebAssembly::END_LOOP ||
MI.getOpcode() == WebAssembly::END_TRY)
BeforeSet.insert(&MI);
#endif
@@ -530,10 +544,16 @@ void WebAssemblyCFGStackify::placeTryMarker(MachineBasicBlock &MBB) {
// throw.
if (MBB.isPredecessor(Header)) {
auto TermPos = Header->getFirstTerminator();
- if (TermPos == Header->end() || !WebAssembly::isRethrow(*TermPos)) {
+ if (TermPos == Header->end() ||
+ TermPos->getOpcode() != WebAssembly::RETHROW) {
for (const auto &MI : reverse(*Header)) {
if (MI.isCall()) {
AfterSet.insert(&MI);
+ // Possibly throwing calls are usually wrapped by EH_LABEL
+ // instructions. We don't want to split them and the call.
+ if (MI.getIterator() != Header->begin() &&
+ std::prev(MI.getIterator())->isEHLabel())
+ AfterSet.insert(&*std::prev(MI.getIterator()));
break;
}
}
@@ -541,7 +561,7 @@ void WebAssemblyCFGStackify::placeTryMarker(MachineBasicBlock &MBB) {
}
// Add the TRY.
- auto InsertPos = GetLatestInsertPos(Header, BeforeSet, AfterSet);
+ auto InsertPos = getLatestInsertPos(Header, BeforeSet, AfterSet);
MachineInstr *Begin =
BuildMI(*Header, InsertPos, Header->findDebugLoc(InsertPos),
TII.get(WebAssembly::TRY))
@@ -550,10 +570,11 @@ void WebAssemblyCFGStackify::placeTryMarker(MachineBasicBlock &MBB) {
// Decide where in Header to put the END_TRY.
BeforeSet.clear();
AfterSet.clear();
- for (const auto &MI : *AfterTry) {
+ for (const auto &MI : *Cont) {
#ifndef NDEBUG
- // END_TRY should precede existing LOOP markers.
- if (MI.getOpcode() == WebAssembly::LOOP)
+ // END_TRY should precede existing LOOP and BLOCK markers.
+ if (MI.getOpcode() == WebAssembly::LOOP ||
+ MI.getOpcode() == WebAssembly::BLOCK)
AfterSet.insert(&MI);
// All END_TRY markers placed earlier belong to exceptions that contains
@@ -567,31 +588,595 @@ void WebAssemblyCFGStackify::placeTryMarker(MachineBasicBlock &MBB) {
// the END_TRY marker should go after that. Otherwise, the whole try-catch
// is contained within this loop, so the END_TRY should go before that.
if (MI.getOpcode() == WebAssembly::END_LOOP) {
- if (EndToBegin[&MI]->getParent()->getNumber() >= Header->getNumber())
+ // For a LOOP to be after TRY, LOOP's BB should be after TRY's BB; if they
+ // are in the same BB, LOOP is always before TRY.
+ if (EndToBegin[&MI]->getParent()->getNumber() > Header->getNumber())
BeforeSet.insert(&MI);
#ifndef NDEBUG
else
AfterSet.insert(&MI);
#endif
}
+
+ // It is not possible for an END_BLOCK to be already in this block.
}
// Mark the end of the TRY.
- InsertPos = GetEarliestInsertPos(AfterTry, BeforeSet, AfterSet);
+ InsertPos = getEarliestInsertPos(Cont, BeforeSet, AfterSet);
MachineInstr *End =
- BuildMI(*AfterTry, InsertPos, Bottom->findBranchDebugLoc(),
+ BuildMI(*Cont, InsertPos, Bottom->findBranchDebugLoc(),
TII.get(WebAssembly::END_TRY));
registerTryScope(Begin, End, &MBB);
- // Track the farthest-spanning scope that ends at this point.
- int Number = AfterTry->getNumber();
- if (!ScopeTops[Number] ||
- ScopeTops[Number]->getNumber() > Header->getNumber())
- ScopeTops[Number] = Header;
+ // Track the farthest-spanning scope that ends at this point. We create two
+ // mappings: (BB with 'end_try' -> BB with 'try') and (BB with 'catch' -> BB
+ // with 'try'). We need to create 'catch' -> 'try' mapping here too because
+ // markers should not span across 'catch'. For example, this should not
+ // happen:
+ //
+ // try
+ // block --| (X)
+ // catch |
+ // end_block --|
+ // end_try
+ for (int Number : {Cont->getNumber(), MBB.getNumber()}) {
+ if (!ScopeTops[Number] ||
+ ScopeTops[Number]->getNumber() > Header->getNumber())
+ ScopeTops[Number] = Header;
+ }
+}
+
+void WebAssemblyCFGStackify::removeUnnecessaryInstrs(MachineFunction &MF) {
+ const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
+
+ // When there is an unconditional branch right before a catch instruction and
+ // it branches to the end of end_try marker, we don't need the branch, because
+ // it there is no exception, the control flow transfers to that point anyway.
+ // bb0:
+ // try
+ // ...
+ // br bb2 <- Not necessary
+ // bb1:
+ // catch
+ // ...
+ // bb2:
+ // end
+ for (auto &MBB : MF) {
+ if (!MBB.isEHPad())
+ continue;
+
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
+ SmallVector<MachineOperand, 4> Cond;
+ MachineBasicBlock *EHPadLayoutPred = MBB.getPrevNode();
+ MachineBasicBlock *Cont = BeginToEnd[EHPadToTry[&MBB]]->getParent();
+ bool Analyzable = !TII.analyzeBranch(*EHPadLayoutPred, TBB, FBB, Cond);
+ if (Analyzable && ((Cond.empty() && TBB && TBB == Cont) ||
+ (!Cond.empty() && FBB && FBB == Cont)))
+ TII.removeBranch(*EHPadLayoutPred);
+ }
+
+ // When there are block / end_block markers that overlap with try / end_try
+ // markers, and the block and try markers' return types are the same, the
+ // block /end_block markers are not necessary, because try / end_try markers
+ // also can serve as boundaries for branches.
+ // block <- Not necessary
+ // try
+ // ...
+ // catch
+ // ...
+ // end
+ // end <- Not necessary
+ SmallVector<MachineInstr *, 32> ToDelete;
+ for (auto &MBB : MF) {
+ for (auto &MI : MBB) {
+ if (MI.getOpcode() != WebAssembly::TRY)
+ continue;
+
+ MachineInstr *Try = &MI, *EndTry = BeginToEnd[Try];
+ MachineBasicBlock *TryBB = Try->getParent();
+ MachineBasicBlock *Cont = EndTry->getParent();
+ int64_t RetType = Try->getOperand(0).getImm();
+ for (auto B = Try->getIterator(), E = std::next(EndTry->getIterator());
+ B != TryBB->begin() && E != Cont->end() &&
+ std::prev(B)->getOpcode() == WebAssembly::BLOCK &&
+ E->getOpcode() == WebAssembly::END_BLOCK &&
+ std::prev(B)->getOperand(0).getImm() == RetType;
+ --B, ++E) {
+ ToDelete.push_back(&*std::prev(B));
+ ToDelete.push_back(&*E);
+ }
+ }
+ }
+ for (auto *MI : ToDelete) {
+ if (MI->getOpcode() == WebAssembly::BLOCK)
+ unregisterScope(MI);
+ MI->eraseFromParent();
+ }
+}
+
+bool WebAssemblyCFGStackify::fixUnwindMismatches(MachineFunction &MF) {
+ const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ // Linearizing the control flow by placing TRY / END_TRY markers can create
+ // mismatches in unwind destinations. There are two kinds of mismatches we
+ // try to solve here.
+
+ // 1. When an instruction may throw, but the EH pad it will unwind to can be
+ // different from the original CFG.
+ //
+ // Example: we have the following CFG:
+ // bb0:
+ // call @foo (if it throws, unwind to bb2)
+ // bb1:
+ // call @bar (if it throws, unwind to bb3)
+ // bb2 (ehpad):
+ // catch
+ // ...
+ // bb3 (ehpad)
+ // catch
+ // handler body
+ //
+ // And the CFG is sorted in this order. Then after placing TRY markers, it
+ // will look like: (BB markers are omitted)
+ // try $label1
+ // try
+ // call @foo
+ // call @bar (if it throws, unwind to bb3)
+ // catch <- ehpad (bb2)
+ // ...
+ // end_try
+ // catch <- ehpad (bb3)
+ // handler body
+ // end_try
+ //
+ // Now if bar() throws, it is going to end up ip in bb2, not bb3, where it
+ // is supposed to end up. We solve this problem by
+ // a. Split the target unwind EH pad (here bb3) so that the handler body is
+ // right after 'end_try', which means we extract the handler body out of
+ // the catch block. We do this because this handler body should be
+ // somewhere branch-eable from the inner scope.
+ // b. Wrap the call that has an incorrect unwind destination ('call @bar'
+ // here) with a nested try/catch/end_try scope, and within the new catch
+ // block, branches to the handler body.
+ // c. Place a branch after the newly inserted nested end_try so it can bypass
+ // the handler body, which is now outside of a catch block.
+ //
+ // The result will like as follows. (new: a) means this instruction is newly
+ // created in the process of doing 'a' above.
+ //
+ // block $label0 (new: placeBlockMarker)
+ // try $label1
+ // try
+ // call @foo
+ // try (new: b)
+ // call @bar
+ // catch (new: b)
+ // local.set n / drop (new: b)
+ // br $label1 (new: b)
+ // end_try (new: b)
+ // catch <- ehpad (bb2)
+ // end_try
+ // br $label0 (new: c)
+ // catch <- ehpad (bb3)
+ // end_try (hoisted: a)
+ // handler body
+ // end_block (new: placeBlockMarker)
+ //
+ // Note that the new wrapping block/end_block will be generated later in
+ // placeBlockMarker.
+ //
+ // TODO Currently local.set and local.gets are generated to move exnref value
+ // created by catches. That's because we don't support yielding values from a
+ // block in LLVM machine IR yet, even though it is supported by wasm. Delete
+ // unnecessary local.get/local.sets once yielding values from a block is
+ // supported. The full EH spec requires multi-value support to do this, but
+ // for C++ we don't yet need it because we only throw a single i32.
+ //
+ // ---
+ // 2. The same as 1, but in this case an instruction unwinds to a caller
+ // function and not another EH pad.
+ //
+ // Example: we have the following CFG:
+ // bb0:
+ // call @foo (if it throws, unwind to bb2)
+ // bb1:
+ // call @bar (if it throws, unwind to caller)
+ // bb2 (ehpad):
+ // catch
+ // ...
+ //
+ // And the CFG is sorted in this order. Then after placing TRY markers, it
+ // will look like:
+ // try
+ // call @foo
+ // call @bar (if it throws, unwind to caller)
+ // catch <- ehpad (bb2)
+ // ...
+ // end_try
+ //
+ // Now if bar() throws, it is going to end up ip in bb2, when it is supposed
+ // throw up to the caller.
+ // We solve this problem by
+ // a. Create a new 'appendix' BB at the end of the function and put a single
+ // 'rethrow' instruction (+ local.get) in there.
+ // b. Wrap the call that has an incorrect unwind destination ('call @bar'
+ // here) with a nested try/catch/end_try scope, and within the new catch
+ // block, branches to the new appendix block.
+ //
+ // block $label0 (new: placeBlockMarker)
+ // try
+ // call @foo
+ // try (new: b)
+ // call @bar
+ // catch (new: b)
+ // local.set n (new: b)
+ // br $label0 (new: b)
+ // end_try (new: b)
+ // catch <- ehpad (bb2)
+ // ...
+ // end_try
+ // ...
+ // end_block (new: placeBlockMarker)
+ // local.get n (new: a) <- appendix block
+ // rethrow (new: a)
+ //
+ // In case there are multiple calls in a BB that may throw to the caller, they
+ // can be wrapped together in one nested try scope. (In 1, this couldn't
+ // happen, because may-throwing instruction there had an unwind destination,
+ // i.e., it was an invoke before, and there could be only one invoke within a
+ // BB.)
+
+ SmallVector<const MachineBasicBlock *, 8> EHPadStack;
+ // Range of intructions to be wrapped in a new nested try/catch
+ using TryRange = std::pair<MachineInstr *, MachineInstr *>;
+ // In original CFG, <unwind destionation BB, a vector of try ranges>
+ DenseMap<MachineBasicBlock *, SmallVector<TryRange, 4>> UnwindDestToTryRanges;
+ // In new CFG, <destination to branch to, a vector of try ranges>
+ DenseMap<MachineBasicBlock *, SmallVector<TryRange, 4>> BrDestToTryRanges;
+ // In new CFG, <destination to branch to, register containing exnref>
+ DenseMap<MachineBasicBlock *, unsigned> BrDestToExnReg;
+
+ // Gather possibly throwing calls (i.e., previously invokes) whose current
+ // unwind destination is not the same as the original CFG.
+ for (auto &MBB : reverse(MF)) {
+ bool SeenThrowableInstInBB = false;
+ for (auto &MI : reverse(MBB)) {
+ if (MI.getOpcode() == WebAssembly::TRY)
+ EHPadStack.pop_back();
+ else if (MI.getOpcode() == WebAssembly::CATCH)
+ EHPadStack.push_back(MI.getParent());
+
+ // In this loop we only gather calls that have an EH pad to unwind. So
+ // there will be at most 1 such call (= invoke) in a BB, so after we've
+ // seen one, we can skip the rest of BB. Also if MBB has no EH pad
+ // successor or MI does not throw, this is not an invoke.
+ if (SeenThrowableInstInBB || !MBB.hasEHPadSuccessor() ||
+ !WebAssembly::mayThrow(MI))
+ continue;
+ SeenThrowableInstInBB = true;
+
+ // If the EH pad on the stack top is where this instruction should unwind
+ // next, we're good.
+ MachineBasicBlock *UnwindDest = nullptr;
+ for (auto *Succ : MBB.successors()) {
+ if (Succ->isEHPad()) {
+ UnwindDest = Succ;
+ break;
+ }
+ }
+ if (EHPadStack.back() == UnwindDest)
+ continue;
+
+ // If not, record the range.
+ UnwindDestToTryRanges[UnwindDest].push_back(TryRange(&MI, &MI));
+ }
+ }
+
+ assert(EHPadStack.empty());
+
+ // Gather possibly throwing calls that are supposed to unwind up to the caller
+ // if they throw, but currently unwind to an incorrect destination. Unlike the
+ // loop above, there can be multiple calls within a BB that unwind to the
+ // caller, which we should group together in a range.
+ bool NeedAppendixBlock = false;
+ for (auto &MBB : reverse(MF)) {
+ MachineInstr *RangeBegin = nullptr, *RangeEnd = nullptr; // inclusive
+ for (auto &MI : reverse(MBB)) {
+ if (MI.getOpcode() == WebAssembly::TRY)
+ EHPadStack.pop_back();
+ else if (MI.getOpcode() == WebAssembly::CATCH)
+ EHPadStack.push_back(MI.getParent());
+
+ // If MBB has an EH pad successor, this inst does not unwind to caller.
+ if (MBB.hasEHPadSuccessor())
+ continue;
+
+ // We wrap up the current range when we see a marker even if we haven't
+ // finished a BB.
+ if (RangeEnd && WebAssembly::isMarker(MI.getOpcode())) {
+ NeedAppendixBlock = true;
+ // Record the range. nullptr here means the unwind destination is the
+ // caller.
+ UnwindDestToTryRanges[nullptr].push_back(
+ TryRange(RangeBegin, RangeEnd));
+ RangeBegin = RangeEnd = nullptr; // Reset range pointers
+ }
+
+ // If EHPadStack is empty, that means it is correctly unwind to caller if
+ // it throws, so we're good. If MI does not throw, we're good too.
+ if (EHPadStack.empty() || !WebAssembly::mayThrow(MI))
+ continue;
+
+ // We found an instruction that unwinds to the caller but currently has an
+ // incorrect unwind destination. Create a new range or increment the
+ // currently existing range.
+ if (!RangeEnd)
+ RangeBegin = RangeEnd = &MI;
+ else
+ RangeBegin = &MI;
+ }
+
+ if (RangeEnd) {
+ NeedAppendixBlock = true;
+ // Record the range. nullptr here means the unwind destination is the
+ // caller.
+ UnwindDestToTryRanges[nullptr].push_back(TryRange(RangeBegin, RangeEnd));
+ RangeBegin = RangeEnd = nullptr; // Reset range pointers
+ }
+ }
+
+ assert(EHPadStack.empty());
+ // We don't have any unwind destination mismatches to resolve.
+ if (UnwindDestToTryRanges.empty())
+ return false;
+
+ // If we found instructions that should unwind to the caller but currently
+ // have incorrect unwind destination, we create an appendix block at the end
+ // of the function with a local.get and a rethrow instruction.
+ if (NeedAppendixBlock) {
+ auto *AppendixBB = getAppendixBlock(MF);
+ unsigned ExnReg = MRI.createVirtualRegister(&WebAssembly::EXNREFRegClass);
+ BuildMI(AppendixBB, DebugLoc(), TII.get(WebAssembly::RETHROW))
+ .addReg(ExnReg);
+ // These instruction ranges should branch to this appendix BB.
+ for (auto Range : UnwindDestToTryRanges[nullptr])
+ BrDestToTryRanges[AppendixBB].push_back(Range);
+ BrDestToExnReg[AppendixBB] = ExnReg;
+ }
+
+ // We loop through unwind destination EH pads that are targeted from some
+ // inner scopes. Because these EH pads are destination of more than one scope
+ // now, we split them so that the handler body is after 'end_try'.
+ // - Before
+ // ehpad:
+ // catch
+ // local.set n / drop
+ // handler body
+ // ...
+ // cont:
+ // end_try
+ //
+ // - After
+ // ehpad:
+ // catch
+ // local.set n / drop
+ // brdest: (new)
+ // end_try (hoisted from 'cont' BB)
+ // handler body (taken from 'ehpad')
+ // ...
+ // cont:
+ for (auto &P : UnwindDestToTryRanges) {
+ NumUnwindMismatches++;
+
+ // This means the destination is the appendix BB, which was separately
+ // handled above.
+ if (!P.first)
+ continue;
+
+ MachineBasicBlock *EHPad = P.first;
+
+ // Find 'catch' and 'local.set' or 'drop' instruction that follows the
+ // 'catch'. If -wasm-disable-explicit-locals is not set, 'catch' should be
+ // always followed by either 'local.set' or a 'drop', because 'br_on_exn' is
+ // generated after 'catch' in LateEHPrepare and we don't support blocks
+ // taking values yet.
+ MachineInstr *Catch = nullptr;
+ unsigned ExnReg = 0;
+ for (auto &MI : *EHPad) {
+ switch (MI.getOpcode()) {
+ case WebAssembly::CATCH:
+ Catch = &MI;
+ ExnReg = Catch->getOperand(0).getReg();
+ break;
+ }
+ }
+ assert(Catch && "EH pad does not have a catch");
+ assert(ExnReg != 0 && "Invalid register");
+
+ auto SplitPos = std::next(Catch->getIterator());
+
+ // Create a new BB that's gonna be the destination for branches from the
+ // inner mismatched scope.
+ MachineInstr *BeginTry = EHPadToTry[EHPad];
+ MachineInstr *EndTry = BeginToEnd[BeginTry];
+ MachineBasicBlock *Cont = EndTry->getParent();
+ auto *BrDest = MF.CreateMachineBasicBlock();
+ MF.insert(std::next(EHPad->getIterator()), BrDest);
+ // Hoist up the existing 'end_try'.
+ BrDest->insert(BrDest->end(), EndTry->removeFromParent());
+ // Take out the handler body from EH pad to the new branch destination BB.
+ BrDest->splice(BrDest->end(), EHPad, SplitPos, EHPad->end());
+ // Fix predecessor-successor relationship.
+ BrDest->transferSuccessors(EHPad);
+ EHPad->addSuccessor(BrDest);
+
+ // All try ranges that were supposed to unwind to this EH pad now have to
+ // branch to this new branch dest BB.
+ for (auto Range : UnwindDestToTryRanges[EHPad])
+ BrDestToTryRanges[BrDest].push_back(Range);
+ BrDestToExnReg[BrDest] = ExnReg;
+
+ // In case we fall through to the continuation BB after the catch block, we
+ // now have to add a branch to it.
+ // - Before
+ // try
+ // ...
+ // (falls through to 'cont')
+ // catch
+ // handler body
+ // end
+ // <-- cont
+ //
+ // - After
+ // try
+ // ...
+ // br %cont (new)
+ // catch
+ // end
+ // handler body
+ // <-- cont
+ MachineBasicBlock *EHPadLayoutPred = &*std::prev(EHPad->getIterator());
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
+ SmallVector<MachineOperand, 4> Cond;
+ bool Analyzable = !TII.analyzeBranch(*EHPadLayoutPred, TBB, FBB, Cond);
+ if (Analyzable && !TBB && !FBB) {
+ DebugLoc DL = EHPadLayoutPred->empty()
+ ? DebugLoc()
+ : EHPadLayoutPred->rbegin()->getDebugLoc();
+ BuildMI(EHPadLayoutPred, DL, TII.get(WebAssembly::BR)).addMBB(Cont);
+ }
+ }
+
+ // For possibly throwing calls whose unwind destinations are currently
+ // incorrect because of CFG linearization, we wrap them with a nested
+ // try/catch/end_try, and within the new catch block, we branch to the correct
+ // handler.
+ // - Before
+ // mbb:
+ // call @foo <- Unwind destination mismatch!
+ // ehpad:
+ // ...
+ //
+ // - After
+ // mbb:
+ // try (new)
+ // call @foo
+ // nested-ehpad: (new)
+ // catch (new)
+ // local.set n / drop (new)
+ // br %brdest (new)
+ // nested-end: (new)
+ // end_try (new)
+ // ehpad:
+ // ...
+ for (auto &P : BrDestToTryRanges) {
+ MachineBasicBlock *BrDest = P.first;
+ auto &TryRanges = P.second;
+ unsigned ExnReg = BrDestToExnReg[BrDest];
+
+ for (auto Range : TryRanges) {
+ MachineInstr *RangeBegin = nullptr, *RangeEnd = nullptr;
+ std::tie(RangeBegin, RangeEnd) = Range;
+ auto *MBB = RangeBegin->getParent();
+
+ // Include possible EH_LABELs in the range
+ if (RangeBegin->getIterator() != MBB->begin() &&
+ std::prev(RangeBegin->getIterator())->isEHLabel())
+ RangeBegin = &*std::prev(RangeBegin->getIterator());
+ if (std::next(RangeEnd->getIterator()) != MBB->end() &&
+ std::next(RangeEnd->getIterator())->isEHLabel())
+ RangeEnd = &*std::next(RangeEnd->getIterator());
+
+ MachineBasicBlock *EHPad = nullptr;
+ for (auto *Succ : MBB->successors()) {
+ if (Succ->isEHPad()) {
+ EHPad = Succ;
+ break;
+ }
+ }
+
+ // Create the nested try instruction.
+ MachineInstr *NestedTry =
+ BuildMI(*MBB, *RangeBegin, RangeBegin->getDebugLoc(),
+ TII.get(WebAssembly::TRY))
+ .addImm(int64_t(WebAssembly::ExprType::Void));
+
+ // Create the nested EH pad and fill instructions in.
+ MachineBasicBlock *NestedEHPad = MF.CreateMachineBasicBlock();
+ MF.insert(std::next(MBB->getIterator()), NestedEHPad);
+ NestedEHPad->setIsEHPad();
+ NestedEHPad->setIsEHScopeEntry();
+ BuildMI(NestedEHPad, RangeEnd->getDebugLoc(), TII.get(WebAssembly::CATCH),
+ ExnReg);
+ BuildMI(NestedEHPad, RangeEnd->getDebugLoc(), TII.get(WebAssembly::BR))
+ .addMBB(BrDest);
+
+ // Create the nested continuation BB and end_try instruction.
+ MachineBasicBlock *NestedCont = MF.CreateMachineBasicBlock();
+ MF.insert(std::next(NestedEHPad->getIterator()), NestedCont);
+ MachineInstr *NestedEndTry =
+ BuildMI(*NestedCont, NestedCont->begin(), RangeEnd->getDebugLoc(),
+ TII.get(WebAssembly::END_TRY));
+ // In case MBB has more instructions after the try range, move them to the
+ // new nested continuation BB.
+ NestedCont->splice(NestedCont->end(), MBB,
+ std::next(RangeEnd->getIterator()), MBB->end());
+ registerTryScope(NestedTry, NestedEndTry, NestedEHPad);
+
+ // Fix predecessor-successor relationship.
+ NestedCont->transferSuccessors(MBB);
+ if (EHPad)
+ NestedCont->removeSuccessor(EHPad);
+ MBB->addSuccessor(NestedEHPad);
+ MBB->addSuccessor(NestedCont);
+ NestedEHPad->addSuccessor(BrDest);
+ }
+ }
+
+ // Renumber BBs and recalculate ScopeTop info because new BBs might have been
+ // created and inserted above.
+ MF.RenumberBlocks();
+ ScopeTops.clear();
+ ScopeTops.resize(MF.getNumBlockIDs());
+ for (auto &MBB : reverse(MF)) {
+ for (auto &MI : reverse(MBB)) {
+ if (ScopeTops[MBB.getNumber()])
+ break;
+ switch (MI.getOpcode()) {
+ case WebAssembly::END_BLOCK:
+ case WebAssembly::END_LOOP:
+ case WebAssembly::END_TRY:
+ ScopeTops[MBB.getNumber()] = EndToBegin[&MI]->getParent();
+ break;
+ case WebAssembly::CATCH:
+ ScopeTops[MBB.getNumber()] = EHPadToTry[&MBB]->getParent();
+ break;
+ }
+ }
+ }
+
+ // Recompute the dominator tree.
+ getAnalysis<MachineDominatorTree>().runOnMachineFunction(MF);
+
+ // Place block markers for newly added branches.
+ SmallVector <MachineBasicBlock *, 8> BrDests;
+ for (auto &P : BrDestToTryRanges)
+ BrDests.push_back(P.first);
+ llvm::sort(BrDests,
+ [&](const MachineBasicBlock *A, const MachineBasicBlock *B) {
+ auto ANum = A->getNumber();
+ auto BNum = B->getNumber();
+ return ANum < BNum;
+ });
+ for (auto *Dest : BrDests)
+ placeBlockMarker(*Dest);
+
+ return true;
}
static unsigned
-GetDepth(const SmallVectorImpl<const MachineBasicBlock *> &Stack,
+getDepth(const SmallVectorImpl<const MachineBasicBlock *> &Stack,
const MachineBasicBlock *MBB) {
unsigned Depth = 0;
for (auto X : reverse(Stack)) {
@@ -617,19 +1202,19 @@ void WebAssemblyCFGStackify::fixEndsAtEndOfFunction(MachineFunction &MF) {
if (MFI.getResults().empty())
return;
- WebAssembly::ExprType retType;
+ WebAssembly::ExprType RetType;
switch (MFI.getResults().front().SimpleTy) {
case MVT::i32:
- retType = WebAssembly::ExprType::I32;
+ RetType = WebAssembly::ExprType::I32;
break;
case MVT::i64:
- retType = WebAssembly::ExprType::I64;
+ RetType = WebAssembly::ExprType::I64;
break;
case MVT::f32:
- retType = WebAssembly::ExprType::F32;
+ RetType = WebAssembly::ExprType::F32;
break;
case MVT::f64:
- retType = WebAssembly::ExprType::F64;
+ RetType = WebAssembly::ExprType::F64;
break;
case MVT::v16i8:
case MVT::v8i16:
@@ -637,10 +1222,10 @@ void WebAssemblyCFGStackify::fixEndsAtEndOfFunction(MachineFunction &MF) {
case MVT::v2i64:
case MVT::v4f32:
case MVT::v2f64:
- retType = WebAssembly::ExprType::V128;
+ RetType = WebAssembly::ExprType::V128;
break;
- case MVT::ExceptRef:
- retType = WebAssembly::ExprType::ExceptRef;
+ case MVT::exnref:
+ RetType = WebAssembly::ExprType::Exnref;
break;
default:
llvm_unreachable("unexpected return type");
@@ -651,11 +1236,11 @@ void WebAssemblyCFGStackify::fixEndsAtEndOfFunction(MachineFunction &MF) {
if (MI.isPosition() || MI.isDebugInstr())
continue;
if (MI.getOpcode() == WebAssembly::END_BLOCK) {
- EndToBegin[&MI]->getOperand(0).setImm(int32_t(retType));
+ EndToBegin[&MI]->getOperand(0).setImm(int32_t(RetType));
continue;
}
if (MI.getOpcode() == WebAssembly::END_LOOP) {
- EndToBegin[&MI]->getOperand(0).setImm(int32_t(retType));
+ EndToBegin[&MI]->getOperand(0).setImm(int32_t(RetType));
continue;
}
// Something other than an `end`. We're done.
@@ -666,7 +1251,7 @@ void WebAssemblyCFGStackify::fixEndsAtEndOfFunction(MachineFunction &MF) {
// WebAssembly functions end with an end instruction, as if the function body
// were a block.
-static void AppendEndToFunction(MachineFunction &MF,
+static void appendEndToFunction(MachineFunction &MF,
const WebAssemblyInstrInfo &TII) {
BuildMI(MF.back(), MF.back().end(),
MF.back().findPrevDebugLoc(MF.back().end()),
@@ -675,66 +1260,42 @@ static void AppendEndToFunction(MachineFunction &MF,
/// Insert LOOP/TRY/BLOCK markers at appropriate places.
void WebAssemblyCFGStackify::placeMarkers(MachineFunction &MF) {
- const MCAsmInfo *MCAI = MF.getTarget().getMCAsmInfo();
// We allocate one more than the number of blocks in the function to
// accommodate for the possible fake block we may insert at the end.
ScopeTops.resize(MF.getNumBlockIDs() + 1);
// Place the LOOP for MBB if MBB is the header of a loop.
for (auto &MBB : MF)
placeLoopMarker(MBB);
- // Place the TRY for MBB if MBB is the EH pad of an exception.
- if (MCAI->getExceptionHandlingType() == ExceptionHandling::Wasm &&
- MF.getFunction().hasPersonalityFn())
- for (auto &MBB : MF)
- placeTryMarker(MBB);
- // Place the BLOCK for MBB if MBB is branched to from above.
- for (auto &MBB : MF)
- placeBlockMarker(MBB);
+
+ const MCAsmInfo *MCAI = MF.getTarget().getMCAsmInfo();
+ for (auto &MBB : MF) {
+ if (MBB.isEHPad()) {
+ // Place the TRY for MBB if MBB is the EH pad of an exception.
+ if (MCAI->getExceptionHandlingType() == ExceptionHandling::Wasm &&
+ MF.getFunction().hasPersonalityFn())
+ placeTryMarker(MBB);
+ } else {
+ // Place the BLOCK for MBB if MBB is branched to from above.
+ placeBlockMarker(MBB);
+ }
+ }
+ // Fix mismatches in unwind destinations induced by linearizing the code.
+ fixUnwindMismatches(MF);
}
void WebAssemblyCFGStackify::rewriteDepthImmediates(MachineFunction &MF) {
- const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
// Now rewrite references to basic blocks to be depth immediates.
- // We need two stacks: one for normal scopes and the other for EH pad scopes.
- // EH pad stack is used to rewrite depths in rethrow instructions.
SmallVector<const MachineBasicBlock *, 8> Stack;
- SmallVector<const MachineBasicBlock *, 8> EHPadStack;
for (auto &MBB : reverse(MF)) {
for (auto I = MBB.rbegin(), E = MBB.rend(); I != E; ++I) {
MachineInstr &MI = *I;
switch (MI.getOpcode()) {
case WebAssembly::BLOCK:
- assert(ScopeTops[Stack.back()->getNumber()]->getNumber() <=
- MBB.getNumber() &&
- "Block/try should be balanced");
- Stack.pop_back();
- break;
-
case WebAssembly::TRY:
assert(ScopeTops[Stack.back()->getNumber()]->getNumber() <=
MBB.getNumber() &&
"Block/try marker should be balanced");
Stack.pop_back();
- EHPadStack.pop_back();
- break;
-
- case WebAssembly::CATCH_I32:
- case WebAssembly::CATCH_I64:
- case WebAssembly::CATCH_ALL:
- // Currently the only case there are more than one catch for a try is
- // for catch terminate pad, in the form of
- // try
- // catch
- // call @__clang_call_terminate
- // unreachable
- // catch_all
- // call @std::terminate
- // unreachable
- // end
- // So we shouldn't push the current BB for the second catch_all block
- // here.
- if (!WebAssembly::isCatchAllTerminatePad(MBB))
- EHPadStack.push_back(&MBB);
break;
case WebAssembly::LOOP:
@@ -751,23 +1312,6 @@ void WebAssemblyCFGStackify::rewriteDepthImmediates(MachineFunction &MF) {
Stack.push_back(EndToBegin[&MI]->getParent());
break;
- case WebAssembly::RETHROW: {
- // Rewrite MBB operands to be depth immediates.
- unsigned EHPadDepth = GetDepth(EHPadStack, MI.getOperand(0).getMBB());
- MI.RemoveOperand(0);
- MI.addOperand(MF, MachineOperand::CreateImm(EHPadDepth));
- break;
- }
-
- case WebAssembly::RETHROW_TO_CALLER: {
- MachineInstr *Rethrow =
- BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(WebAssembly::RETHROW))
- .addImm(EHPadStack.size());
- MI.eraseFromParent();
- I = MachineBasicBlock::reverse_iterator(Rethrow);
- break;
- }
-
default:
if (MI.isTerminator()) {
// Rewrite MBB operands to be depth immediates.
@@ -776,7 +1320,7 @@ void WebAssemblyCFGStackify::rewriteDepthImmediates(MachineFunction &MF) {
MI.RemoveOperand(MI.getNumOperands() - 1);
for (auto MO : Ops) {
if (MO.isMBB())
- MO = MachineOperand::CreateImm(GetDepth(Stack, MO.getMBB()));
+ MO = MachineOperand::CreateImm(getDepth(Stack, MO.getMBB()));
MI.addOperand(MF, MO);
}
}
@@ -793,13 +1337,14 @@ void WebAssemblyCFGStackify::releaseMemory() {
EndToBegin.clear();
TryToEHPad.clear();
EHPadToTry.clear();
- BeginToBottom.clear();
+ AppendixBB = nullptr;
}
bool WebAssemblyCFGStackify::runOnMachineFunction(MachineFunction &MF) {
LLVM_DEBUG(dbgs() << "********** CFG Stackifying **********\n"
"********** Function: "
<< MF.getName() << '\n');
+ const MCAsmInfo *MCAI = MF.getTarget().getMCAsmInfo();
releaseMemory();
@@ -809,6 +1354,11 @@ bool WebAssemblyCFGStackify::runOnMachineFunction(MachineFunction &MF) {
// Place the BLOCK/LOOP/TRY markers to indicate the beginnings of scopes.
placeMarkers(MF);
+ // Remove unnecessary instructions possibly introduced by try/end_trys.
+ if (MCAI->getExceptionHandlingType() == ExceptionHandling::Wasm &&
+ MF.getFunction().hasPersonalityFn())
+ removeUnnecessaryInstrs(MF);
+
// Convert MBB operands in terminators to relative depth immediates.
rewriteDepthImmediates(MF);
@@ -821,7 +1371,8 @@ bool WebAssemblyCFGStackify::runOnMachineFunction(MachineFunction &MF) {
if (!MF.getSubtarget<WebAssemblySubtarget>()
.getTargetTriple()
.isOSBinFormatELF())
- AppendEndToFunction(MF, TII);
+ appendEndToFunction(MF, TII);
+ MF.getInfo<WebAssemblyFunctionInfo>()->setCFGStackified();
return true;
}
diff --git a/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp b/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp
index aaa6d286598f..2537e6042b1e 100644
--- a/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp
@@ -1,9 +1,8 @@
//===-- WebAssemblyCallIndirectFixup.cpp - Fix call_indirects -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -61,19 +60,19 @@ FunctionPass *llvm::createWebAssemblyCallIndirectFixup() {
return new WebAssemblyCallIndirectFixup();
}
-static unsigned GetNonPseudoCallIndirectOpcode(const MachineInstr &MI) {
+static unsigned getNonPseudoCallIndirectOpcode(const MachineInstr &MI) {
switch (MI.getOpcode()) {
using namespace WebAssembly;
case PCALL_INDIRECT_VOID:
return CALL_INDIRECT_VOID;
- case PCALL_INDIRECT_I32:
- return CALL_INDIRECT_I32;
- case PCALL_INDIRECT_I64:
- return CALL_INDIRECT_I64;
- case PCALL_INDIRECT_F32:
- return CALL_INDIRECT_F32;
- case PCALL_INDIRECT_F64:
- return CALL_INDIRECT_F64;
+ case PCALL_INDIRECT_i32:
+ return CALL_INDIRECT_i32;
+ case PCALL_INDIRECT_i64:
+ return CALL_INDIRECT_i64;
+ case PCALL_INDIRECT_f32:
+ return CALL_INDIRECT_f32;
+ case PCALL_INDIRECT_f64:
+ return CALL_INDIRECT_f64;
case PCALL_INDIRECT_v16i8:
return CALL_INDIRECT_v16i8;
case PCALL_INDIRECT_v8i16:
@@ -86,13 +85,17 @@ static unsigned GetNonPseudoCallIndirectOpcode(const MachineInstr &MI) {
return CALL_INDIRECT_v4f32;
case PCALL_INDIRECT_v2f64:
return CALL_INDIRECT_v2f64;
+ case PCALL_INDIRECT_exnref:
+ return CALL_INDIRECT_exnref;
+ case PRET_CALL_INDIRECT:
+ return RET_CALL_INDIRECT;
default:
return INSTRUCTION_LIST_END;
}
}
-static bool IsPseudoCallIndirect(const MachineInstr &MI) {
- return GetNonPseudoCallIndirectOpcode(MI) !=
+static bool isPseudoCallIndirect(const MachineInstr &MI) {
+ return getNonPseudoCallIndirectOpcode(MI) !=
WebAssembly::INSTRUCTION_LIST_END;
}
@@ -106,11 +109,11 @@ bool WebAssemblyCallIndirectFixup::runOnMachineFunction(MachineFunction &MF) {
for (MachineBasicBlock &MBB : MF) {
for (MachineInstr &MI : MBB) {
- if (IsPseudoCallIndirect(MI)) {
+ if (isPseudoCallIndirect(MI)) {
LLVM_DEBUG(dbgs() << "Found call_indirect: " << MI << '\n');
// Rewrite pseudo to non-pseudo
- const MCInstrDesc &Desc = TII->get(GetNonPseudoCallIndirectOpcode(MI));
+ const MCInstrDesc &Desc = TII->get(getNonPseudoCallIndirectOpcode(MI));
MI.setDesc(Desc);
// Rewrite argument order
diff --git a/lib/Target/WebAssembly/WebAssemblyDebugValueManager.cpp b/lib/Target/WebAssembly/WebAssemblyDebugValueManager.cpp
index 8ecc159951ad..579377c9a5d7 100644
--- a/lib/Target/WebAssembly/WebAssemblyDebugValueManager.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyDebugValueManager.cpp
@@ -1,9 +1,8 @@
//===-- WebAssemblyDebugValueManager.cpp - WebAssembly DebugValue Manager -===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/lib/Target/WebAssembly/WebAssemblyDebugValueManager.h b/lib/Target/WebAssembly/WebAssemblyDebugValueManager.h
index 73f317214058..06e8805b5ad0 100644
--- a/lib/Target/WebAssembly/WebAssemblyDebugValueManager.h
+++ b/lib/Target/WebAssembly/WebAssemblyDebugValueManager.h
@@ -1,9 +1,8 @@
// WebAssemblyDebugValueManager.h - WebAssembly DebugValue Manager -*- C++ -*-//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/lib/Target/WebAssembly/WebAssemblyEHRestoreStackPointer.cpp b/lib/Target/WebAssembly/WebAssemblyEHRestoreStackPointer.cpp
deleted file mode 100644
index c86260ba408c..000000000000
--- a/lib/Target/WebAssembly/WebAssemblyEHRestoreStackPointer.cpp
+++ /dev/null
@@ -1,87 +0,0 @@
-//===-- WebAssemblyEHRestoreStackPointer.cpp - __stack_pointer restoration ===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// After the stack is unwound due to a thrown exception, the __stack_pointer
-/// global can point to an invalid address. This inserts instructions that
-/// restore __stack_pointer global.
-///
-//===----------------------------------------------------------------------===//
-
-#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
-#include "WebAssembly.h"
-#include "WebAssemblySubtarget.h"
-#include "WebAssemblyUtilities.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/MC/MCAsmInfo.h"
-using namespace llvm;
-
-#define DEBUG_TYPE "wasm-eh-restore-stack-pointer"
-
-namespace {
-class WebAssemblyEHRestoreStackPointer final : public MachineFunctionPass {
-public:
- static char ID; // Pass identification, replacement for typeid
- WebAssemblyEHRestoreStackPointer() : MachineFunctionPass(ID) {}
-
- StringRef getPassName() const override {
- return "WebAssembly Restore Stack Pointer for Exception Handling";
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-
- bool runOnMachineFunction(MachineFunction &MF) override;
-};
-} // end anonymous namespace
-
-char WebAssemblyEHRestoreStackPointer::ID = 0;
-INITIALIZE_PASS(WebAssemblyEHRestoreStackPointer, DEBUG_TYPE,
- "Restore Stack Pointer for Exception Handling", true, false)
-
-FunctionPass *llvm::createWebAssemblyEHRestoreStackPointer() {
- return new WebAssemblyEHRestoreStackPointer();
-}
-
-bool WebAssemblyEHRestoreStackPointer::runOnMachineFunction(
- MachineFunction &MF) {
- LLVM_DEBUG(dbgs() << "********** EH Restore Stack Pointer **********\n"
- "********** Function: "
- << MF.getName() << '\n');
-
- const auto *FrameLowering = static_cast<const WebAssemblyFrameLowering *>(
- MF.getSubtarget().getFrameLowering());
- if (!FrameLowering->needsPrologForEH(MF))
- return false;
- bool Changed = false;
-
- for (auto &MBB : MF) {
- if (!MBB.isEHPad())
- continue;
- Changed = true;
-
- // Insert __stack_pointer restoring instructions at the beginning of each EH
- // pad, after the catch instruction. (Catch instructions may have been
- // reordered, and catch_all instructions have not been inserted yet, but
- // those cases are handled in LateEHPrepare).
- //
- // Here it is safe to assume that SP32 holds the latest value of
- // __stack_pointer, because the only exception for this case is when a
- // function uses the red zone, but that only happens with leaf functions,
- // and we don't restore __stack_pointer in leaf functions anyway.
- auto InsertPos = MBB.begin();
- if (WebAssembly::isCatch(*MBB.begin()))
- InsertPos++;
- FrameLowering->writeSPToGlobal(WebAssembly::SP32, MF, MBB, InsertPos,
- MBB.begin()->getDebugLoc());
- }
- return Changed;
-}
diff --git a/lib/Target/WebAssembly/WebAssemblyExceptionInfo.cpp b/lib/Target/WebAssembly/WebAssemblyExceptionInfo.cpp
index 6b3a3e765786..0387957b14c2 100644
--- a/lib/Target/WebAssembly/WebAssemblyExceptionInfo.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyExceptionInfo.cpp
@@ -1,9 +1,8 @@
//===--- WebAssemblyExceptionInfo.cpp - Exception Infomation --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -51,10 +50,6 @@ void WebAssemblyExceptionInfo::recalculate(
MachineBasicBlock *EHPad = DomNode->getBlock();
if (!EHPad->isEHPad())
continue;
- // We group catch & catch-all terminate pads together, so skip the second
- // one
- if (WebAssembly::isCatchAllTerminatePad(*EHPad))
- continue;
auto *WE = new WebAssemblyException(EHPad);
discoverAndMapException(WE, MDT, MDF);
Exceptions.push_back(WE);
@@ -105,16 +100,6 @@ void WebAssemblyExceptionInfo::discoverAndMapException(
// Map blocks that belong to a catchpad / cleanuppad
MachineBasicBlock *EHPad = WE->getEHPad();
-
- // We group catch & catch-all terminate pads together within an exception
- if (WebAssembly::isCatchTerminatePad(*EHPad)) {
- assert(EHPad->succ_size() == 1 &&
- "Catch terminate pad has more than one successors");
- changeExceptionFor(EHPad, WE);
- changeExceptionFor(*(EHPad->succ_begin()), WE);
- return;
- }
-
SmallVector<MachineBasicBlock *, 8> WL;
WL.push_back(EHPad);
while (!WL.empty()) {
diff --git a/lib/Target/WebAssembly/WebAssemblyExceptionInfo.h b/lib/Target/WebAssembly/WebAssemblyExceptionInfo.h
index fcd7e2366e03..9a90d7df7d47 100644
--- a/lib/Target/WebAssembly/WebAssemblyExceptionInfo.h
+++ b/lib/Target/WebAssembly/WebAssemblyExceptionInfo.h
@@ -1,9 +1,8 @@
//===-- WebAssemblyExceptionInfo.h - WebAssembly Exception Info -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp b/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
index 27aabe6ba0bd..dbd62179f055 100644
--- a/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
@@ -1,9 +1,8 @@
//===-- WebAssemblyExplicitLocals.cpp - Make Locals Explicit --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -91,13 +90,13 @@ static unsigned getDropOpcode(const TargetRegisterClass *RC) {
return WebAssembly::DROP_F64;
if (RC == &WebAssembly::V128RegClass)
return WebAssembly::DROP_V128;
- if (RC == &WebAssembly::EXCEPT_REFRegClass)
- return WebAssembly::DROP_EXCEPT_REF;
+ if (RC == &WebAssembly::EXNREFRegClass)
+ return WebAssembly::DROP_EXNREF;
llvm_unreachable("Unexpected register class");
}
/// Get the appropriate local.get opcode for the given register class.
-static unsigned getGetLocalOpcode(const TargetRegisterClass *RC) {
+static unsigned getLocalGetOpcode(const TargetRegisterClass *RC) {
if (RC == &WebAssembly::I32RegClass)
return WebAssembly::LOCAL_GET_I32;
if (RC == &WebAssembly::I64RegClass)
@@ -108,13 +107,13 @@ static unsigned getGetLocalOpcode(const TargetRegisterClass *RC) {
return WebAssembly::LOCAL_GET_F64;
if (RC == &WebAssembly::V128RegClass)
return WebAssembly::LOCAL_GET_V128;
- if (RC == &WebAssembly::EXCEPT_REFRegClass)
- return WebAssembly::LOCAL_GET_EXCEPT_REF;
+ if (RC == &WebAssembly::EXNREFRegClass)
+ return WebAssembly::LOCAL_GET_EXNREF;
llvm_unreachable("Unexpected register class");
}
/// Get the appropriate local.set opcode for the given register class.
-static unsigned getSetLocalOpcode(const TargetRegisterClass *RC) {
+static unsigned getLocalSetOpcode(const TargetRegisterClass *RC) {
if (RC == &WebAssembly::I32RegClass)
return WebAssembly::LOCAL_SET_I32;
if (RC == &WebAssembly::I64RegClass)
@@ -125,13 +124,13 @@ static unsigned getSetLocalOpcode(const TargetRegisterClass *RC) {
return WebAssembly::LOCAL_SET_F64;
if (RC == &WebAssembly::V128RegClass)
return WebAssembly::LOCAL_SET_V128;
- if (RC == &WebAssembly::EXCEPT_REFRegClass)
- return WebAssembly::LOCAL_SET_EXCEPT_REF;
+ if (RC == &WebAssembly::EXNREFRegClass)
+ return WebAssembly::LOCAL_SET_EXNREF;
llvm_unreachable("Unexpected register class");
}
/// Get the appropriate local.tee opcode for the given register class.
-static unsigned getTeeLocalOpcode(const TargetRegisterClass *RC) {
+static unsigned getLocalTeeOpcode(const TargetRegisterClass *RC) {
if (RC == &WebAssembly::I32RegClass)
return WebAssembly::LOCAL_TEE_I32;
if (RC == &WebAssembly::I64RegClass)
@@ -142,8 +141,8 @@ static unsigned getTeeLocalOpcode(const TargetRegisterClass *RC) {
return WebAssembly::LOCAL_TEE_F64;
if (RC == &WebAssembly::V128RegClass)
return WebAssembly::LOCAL_TEE_V128;
- if (RC == &WebAssembly::EXCEPT_REFRegClass)
- return WebAssembly::LOCAL_TEE_EXCEPT_REF;
+ if (RC == &WebAssembly::EXNREFRegClass)
+ return WebAssembly::LOCAL_TEE_EXNREF;
llvm_unreachable("Unexpected register class");
}
@@ -159,8 +158,8 @@ static MVT typeForRegClass(const TargetRegisterClass *RC) {
return MVT::f64;
if (RC == &WebAssembly::V128RegClass)
return MVT::v16i8;
- if (RC == &WebAssembly::EXCEPT_REFRegClass)
- return MVT::ExceptRef;
+ if (RC == &WebAssembly::EXNREFRegClass)
+ return MVT::exnref;
llvm_unreachable("unrecognized register class");
}
@@ -206,7 +205,7 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
E = MF.begin()->end();
I != E;) {
MachineInstr &MI = *I++;
- if (!WebAssembly::isArgument(MI))
+ if (!WebAssembly::isArgument(MI.getOpcode()))
break;
unsigned Reg = MI.getOperand(0).getReg();
assert(!MFI.isVRegStackified(Reg));
@@ -228,7 +227,7 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
for (MachineBasicBlock &MBB : MF) {
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E;) {
MachineInstr &MI = *I++;
- assert(!WebAssembly::isArgument(MI));
+ assert(!WebAssembly::isArgument(MI.getOpcode()));
if (MI.isDebugInstr() || MI.isLabel())
continue;
@@ -236,7 +235,7 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
// Replace tee instructions with local.tee. The difference is that tee
// instructions have two defs, while local.tee instructions have one def
// and an index of a local to write to.
- if (WebAssembly::isTee(MI)) {
+ if (WebAssembly::isTee(MI.getOpcode())) {
assert(MFI.isVRegStackified(MI.getOperand(0).getReg()));
assert(!MFI.isVRegStackified(MI.getOperand(1).getReg()));
unsigned OldReg = MI.getOperand(2).getReg();
@@ -246,7 +245,7 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
if (!MFI.isVRegStackified(OldReg)) {
unsigned LocalId = getLocalId(Reg2Local, CurLocal, OldReg);
unsigned NewReg = MRI.createVirtualRegister(RC);
- unsigned Opc = getGetLocalOpcode(RC);
+ unsigned Opc = getLocalGetOpcode(RC);
BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(Opc), NewReg)
.addImm(LocalId);
MI.getOperand(2).setReg(NewReg);
@@ -256,7 +255,7 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
// Replace the TEE with a LOCAL_TEE.
unsigned LocalId =
getLocalId(Reg2Local, CurLocal, MI.getOperand(1).getReg());
- unsigned Opc = getTeeLocalOpcode(RC);
+ unsigned Opc = getLocalTeeOpcode(RC);
BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(Opc),
MI.getOperand(0).getReg())
.addImm(LocalId)
@@ -275,7 +274,7 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
if (!MFI.isVRegStackified(OldReg)) {
const TargetRegisterClass *RC = MRI.getRegClass(OldReg);
unsigned NewReg = MRI.createVirtualRegister(RC);
- auto InsertPt = std::next(MachineBasicBlock::iterator(&MI));
+ auto InsertPt = std::next(MI.getIterator());
if (MI.getOpcode() == WebAssembly::IMPLICIT_DEF) {
MI.eraseFromParent();
Changed = true;
@@ -290,7 +289,7 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
Drop->getOperand(0).setIsKill();
} else {
unsigned LocalId = getLocalId(Reg2Local, CurLocal, OldReg);
- unsigned Opc = getSetLocalOpcode(RC);
+ unsigned Opc = getLocalSetOpcode(RC);
BuildMI(MBB, InsertPt, MI.getDebugLoc(), TII->get(Opc))
.addImm(LocalId)
.addReg(NewReg);
@@ -317,7 +316,7 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
// with inline asm register operands is to provide local indices as
// immediates.
if (MO.isDef()) {
- assert(MI.getOpcode() == TargetOpcode::INLINEASM);
+ assert(MI.isInlineAsm());
unsigned LocalId = getLocalId(Reg2Local, CurLocal, OldReg);
// If this register operand is tied to another operand, we can't
// change it to an immediate. Untie it first.
@@ -335,7 +334,7 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
// Our contract with inline asm register operands is to provide local
// indices as immediates.
- if (MI.getOpcode() == TargetOpcode::INLINEASM) {
+ if (MI.isInlineAsm()) {
unsigned LocalId = getLocalId(Reg2Local, CurLocal, OldReg);
// Untie it first if this reg operand is tied to another operand.
MI.untieRegOperand(MI.getOperandNo(&MO));
@@ -347,7 +346,7 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
unsigned LocalId = getLocalId(Reg2Local, CurLocal, OldReg);
const TargetRegisterClass *RC = MRI.getRegClass(OldReg);
unsigned NewReg = MRI.createVirtualRegister(RC);
- unsigned Opc = getGetLocalOpcode(RC);
+ unsigned Opc = getLocalGetOpcode(RC);
InsertPt =
BuildMI(MBB, InsertPt, MI.getDebugLoc(), TII->get(Opc), NewReg)
.addImm(LocalId);
@@ -357,7 +356,7 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
}
// Coalesce and eliminate COPY instructions.
- if (WebAssembly::isCopy(MI)) {
+ if (WebAssembly::isCopy(MI.getOpcode())) {
MRI.replaceRegWith(MI.getOperand(1).getReg(),
MI.getOperand(0).getReg());
MI.eraseFromParent();
diff --git a/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
index 3856700cca94..2552e9150833 100644
--- a/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
@@ -1,9 +1,8 @@
//===-- WebAssemblyFastISel.cpp - WebAssembly FastISel implementation -----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -50,22 +49,22 @@ class WebAssemblyFastISel final : public FastISel {
// All possible address modes.
class Address {
public:
- typedef enum { RegBase, FrameIndexBase } BaseKind;
+ using BaseKind = enum { RegBase, FrameIndexBase };
private:
- BaseKind Kind;
+ BaseKind Kind = RegBase;
union {
unsigned Reg;
int FI;
} Base;
- int64_t Offset;
+ int64_t Offset = 0;
- const GlobalValue *GV;
+ const GlobalValue *GV = nullptr;
public:
// Innocuous defaults for our address.
- Address() : Kind(RegBase), Offset(0), GV(0) { Base.Reg = 0; }
+ Address() { Base.Reg = 0; }
void setKind(BaseKind K) {
assert(!isSet() && "Can't change kind with non-zero base");
Kind = K;
@@ -92,9 +91,9 @@ class WebAssemblyFastISel final : public FastISel {
return Base.FI;
}
- void setOffset(int64_t Offset_) {
- assert(Offset_ >= 0 && "Offsets must be non-negative");
- Offset = Offset_;
+ void setOffset(int64_t NewOffset) {
+ assert(NewOffset >= 0 && "Offsets must be non-negative");
+ Offset = NewOffset;
}
int64_t getOffset() const { return Offset; }
void setGlobalValue(const GlobalValue *G) { GV = G; }
@@ -116,7 +115,7 @@ class WebAssemblyFastISel final : public FastISel {
private:
// Utility helper routines
MVT::SimpleValueType getSimpleType(Type *Ty) {
- EVT VT = TLI.getValueType(DL, Ty, /*HandleUnknown=*/true);
+ EVT VT = TLI.getValueType(DL, Ty, /*AllowUnknown=*/true);
return VT.isSimple() ? VT.getSimpleVT().SimpleTy
: MVT::INVALID_SIMPLE_VALUE_TYPE;
}
@@ -130,7 +129,7 @@ private:
case MVT::i64:
case MVT::f32:
case MVT::f64:
- case MVT::ExceptRef:
+ case MVT::exnref:
return VT;
case MVT::f16:
return MVT::f32;
@@ -208,10 +207,9 @@ public:
} // end anonymous namespace
bool WebAssemblyFastISel::computeAddress(const Value *Obj, Address &Addr) {
-
const User *U = nullptr;
unsigned Opcode = Instruction::UserOp1;
- if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
+ if (const auto *I = dyn_cast<Instruction>(Obj)) {
// Don't walk into other basic blocks unless the object is an alloca from
// another block, otherwise it may not have a virtual register assigned.
if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
@@ -219,7 +217,7 @@ bool WebAssemblyFastISel::computeAddress(const Value *Obj, Address &Addr) {
Opcode = I->getOpcode();
U = I;
}
- } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
+ } else if (const auto *C = dyn_cast<ConstantExpr>(Obj)) {
Opcode = C->getOpcode();
U = C;
}
@@ -230,9 +228,13 @@ bool WebAssemblyFastISel::computeAddress(const Value *Obj, Address &Addr) {
// address spaces.
return false;
- if (const GlobalValue *GV = dyn_cast<GlobalValue>(Obj)) {
+ if (const auto *GV = dyn_cast<GlobalValue>(Obj)) {
+ if (TLI.isPositionIndependent())
+ return false;
if (Addr.getGlobalValue())
return false;
+ if (GV->isThreadLocal())
+ return false;
Addr.setGlobalValue(GV);
return true;
}
@@ -275,7 +277,7 @@ bool WebAssemblyFastISel::computeAddress(const Value *Obj, Address &Addr) {
} else {
uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
for (;;) {
- if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
+ if (const auto *CI = dyn_cast<ConstantInt>(Op)) {
// Constant-offset addressing.
TmpOffset += CI->getSExtValue() * S;
break;
@@ -290,8 +292,7 @@ bool WebAssemblyFastISel::computeAddress(const Value *Obj, Address &Addr) {
}
if (canFoldAddIntoGEP(U, Op)) {
// A compatible add with a constant operand. Fold the constant.
- ConstantInt *CI =
- cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
+ auto *CI = cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
TmpOffset += CI->getSExtValue() * S;
// Iterate on the other operand.
Op = cast<AddOperator>(Op)->getOperand(0);
@@ -315,7 +316,7 @@ bool WebAssemblyFastISel::computeAddress(const Value *Obj, Address &Addr) {
break;
}
case Instruction::Alloca: {
- const AllocaInst *AI = cast<AllocaInst>(Obj);
+ const auto *AI = cast<AllocaInst>(Obj);
DenseMap<const AllocaInst *, int>::iterator SI =
FuncInfo.StaticAllocaMap.find(AI);
if (SI != FuncInfo.StaticAllocaMap.end()) {
@@ -336,7 +337,7 @@ bool WebAssemblyFastISel::computeAddress(const Value *Obj, Address &Addr) {
if (isa<ConstantInt>(LHS))
std::swap(LHS, RHS);
- if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
+ if (const auto *CI = dyn_cast<ConstantInt>(RHS)) {
uint64_t TmpOffset = Addr.getOffset() + CI->getSExtValue();
if (int64_t(TmpOffset) >= 0) {
Addr.setOffset(TmpOffset);
@@ -356,7 +357,7 @@ bool WebAssemblyFastISel::computeAddress(const Value *Obj, Address &Addr) {
const Value *LHS = U->getOperand(0);
const Value *RHS = U->getOperand(1);
- if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
+ if (const auto *CI = dyn_cast<ConstantInt>(RHS)) {
int64_t TmpOffset = Addr.getOffset() - CI->getSExtValue();
if (TmpOffset >= 0) {
Addr.setOffset(TmpOffset);
@@ -416,7 +417,7 @@ unsigned WebAssemblyFastISel::maskI1Value(unsigned Reg, const Value *V) {
}
unsigned WebAssemblyFastISel::getRegForI1Value(const Value *V, bool &Not) {
- if (const ICmpInst *ICmp = dyn_cast<ICmpInst>(V))
+ if (const auto *ICmp = dyn_cast<ICmpInst>(V))
if (const ConstantInt *C = dyn_cast<ConstantInt>(ICmp->getOperand(1)))
if (ICmp->isEquality() && C->isZero() && C->getType()->isIntegerTy(32)) {
Not = ICmp->isTrueWhenEqual();
@@ -524,7 +525,10 @@ unsigned WebAssemblyFastISel::zeroExtend(unsigned Reg, const Value *V,
return Result;
}
- return zeroExtendToI32(Reg, V, From);
+ if (To == MVT::i32)
+ return zeroExtendToI32(Reg, V, From);
+
+ return 0;
}
unsigned WebAssemblyFastISel::signExtend(unsigned Reg, const Value *V,
@@ -543,7 +547,10 @@ unsigned WebAssemblyFastISel::signExtend(unsigned Reg, const Value *V,
return Result;
}
- return signExtendToI32(Reg, V, From);
+ if (To == MVT::i32)
+ return signExtendToI32(Reg, V, From);
+
+ return 0;
}
unsigned WebAssemblyFastISel::getRegForUnsignedValue(const Value *V) {
@@ -607,6 +614,10 @@ unsigned WebAssemblyFastISel::fastMaterializeAlloca(const AllocaInst *AI) {
unsigned WebAssemblyFastISel::fastMaterializeConstant(const Constant *C) {
if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) {
+ if (TLI.isPositionIndependent())
+ return 0;
+ if (GV->isThreadLocal())
+ return 0;
unsigned ResultReg =
createResultReg(Subtarget->hasAddr64() ? &WebAssembly::I64RegClass
: &WebAssembly::I32RegClass);
@@ -629,14 +640,14 @@ bool WebAssemblyFastISel::fastLowerArguments() {
if (F->isVarArg())
return false;
- unsigned i = 0;
+ unsigned I = 0;
for (auto const &Arg : F->args()) {
const AttributeList &Attrs = F->getAttributes();
- if (Attrs.hasParamAttribute(i, Attribute::ByVal) ||
- Attrs.hasParamAttribute(i, Attribute::SwiftSelf) ||
- Attrs.hasParamAttribute(i, Attribute::SwiftError) ||
- Attrs.hasParamAttribute(i, Attribute::InAlloca) ||
- Attrs.hasParamAttribute(i, Attribute::Nest))
+ if (Attrs.hasParamAttribute(I, Attribute::ByVal) ||
+ Attrs.hasParamAttribute(I, Attribute::SwiftSelf) ||
+ Attrs.hasParamAttribute(I, Attribute::SwiftError) ||
+ Attrs.hasParamAttribute(I, Attribute::InAlloca) ||
+ Attrs.hasParamAttribute(I, Attribute::Nest))
return false;
Type *ArgTy = Arg.getType();
@@ -691,19 +702,19 @@ bool WebAssemblyFastISel::fastLowerArguments() {
Opc = WebAssembly::ARGUMENT_v2f64;
RC = &WebAssembly::V128RegClass;
break;
- case MVT::ExceptRef:
- Opc = WebAssembly::ARGUMENT_ExceptRef;
- RC = &WebAssembly::EXCEPT_REFRegClass;
+ case MVT::exnref:
+ Opc = WebAssembly::ARGUMENT_exnref;
+ RC = &WebAssembly::EXNREFRegClass;
break;
default:
return false;
}
unsigned ResultReg = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
- .addImm(i);
+ .addImm(I);
updateValueMap(&Arg, ResultReg);
- ++i;
+ ++I;
}
MRI.addLiveIn(WebAssembly::ARGUMENTS);
@@ -732,8 +743,9 @@ bool WebAssemblyFastISel::fastLowerArguments() {
}
bool WebAssemblyFastISel::selectCall(const Instruction *I) {
- const CallInst *Call = cast<CallInst>(I);
+ const auto *Call = cast<CallInst>(I);
+ // TODO: Support tail calls in FastISel
if (Call->isMustTailCall() || Call->isInlineAsm() ||
Call->getFunctionType()->isVarArg())
return false;
@@ -762,19 +774,19 @@ bool WebAssemblyFastISel::selectCall(const Instruction *I) {
case MVT::i8:
case MVT::i16:
case MVT::i32:
- Opc = IsDirect ? WebAssembly::CALL_I32 : WebAssembly::PCALL_INDIRECT_I32;
+ Opc = IsDirect ? WebAssembly::CALL_i32 : WebAssembly::PCALL_INDIRECT_i32;
ResultReg = createResultReg(&WebAssembly::I32RegClass);
break;
case MVT::i64:
- Opc = IsDirect ? WebAssembly::CALL_I64 : WebAssembly::PCALL_INDIRECT_I64;
+ Opc = IsDirect ? WebAssembly::CALL_i64 : WebAssembly::PCALL_INDIRECT_i64;
ResultReg = createResultReg(&WebAssembly::I64RegClass);
break;
case MVT::f32:
- Opc = IsDirect ? WebAssembly::CALL_F32 : WebAssembly::PCALL_INDIRECT_F32;
+ Opc = IsDirect ? WebAssembly::CALL_f32 : WebAssembly::PCALL_INDIRECT_f32;
ResultReg = createResultReg(&WebAssembly::F32RegClass);
break;
case MVT::f64:
- Opc = IsDirect ? WebAssembly::CALL_F64 : WebAssembly::PCALL_INDIRECT_F64;
+ Opc = IsDirect ? WebAssembly::CALL_f64 : WebAssembly::PCALL_INDIRECT_f64;
ResultReg = createResultReg(&WebAssembly::F64RegClass);
break;
case MVT::v16i8:
@@ -807,10 +819,10 @@ bool WebAssemblyFastISel::selectCall(const Instruction *I) {
: WebAssembly::PCALL_INDIRECT_v2f64;
ResultReg = createResultReg(&WebAssembly::V128RegClass);
break;
- case MVT::ExceptRef:
- Opc = IsDirect ? WebAssembly::CALL_EXCEPT_REF
- : WebAssembly::PCALL_INDIRECT_EXCEPT_REF;
- ResultReg = createResultReg(&WebAssembly::EXCEPT_REFRegClass);
+ case MVT::exnref:
+ Opc = IsDirect ? WebAssembly::CALL_exnref
+ : WebAssembly::PCALL_INDIRECT_exnref;
+ ResultReg = createResultReg(&WebAssembly::EXNREFRegClass);
break;
default:
return false;
@@ -818,25 +830,25 @@ bool WebAssemblyFastISel::selectCall(const Instruction *I) {
}
SmallVector<unsigned, 8> Args;
- for (unsigned i = 0, e = Call->getNumArgOperands(); i < e; ++i) {
- Value *V = Call->getArgOperand(i);
+ for (unsigned I = 0, E = Call->getNumArgOperands(); I < E; ++I) {
+ Value *V = Call->getArgOperand(I);
MVT::SimpleValueType ArgTy = getSimpleType(V->getType());
if (ArgTy == MVT::INVALID_SIMPLE_VALUE_TYPE)
return false;
const AttributeList &Attrs = Call->getAttributes();
- if (Attrs.hasParamAttribute(i, Attribute::ByVal) ||
- Attrs.hasParamAttribute(i, Attribute::SwiftSelf) ||
- Attrs.hasParamAttribute(i, Attribute::SwiftError) ||
- Attrs.hasParamAttribute(i, Attribute::InAlloca) ||
- Attrs.hasParamAttribute(i, Attribute::Nest))
+ if (Attrs.hasParamAttribute(I, Attribute::ByVal) ||
+ Attrs.hasParamAttribute(I, Attribute::SwiftSelf) ||
+ Attrs.hasParamAttribute(I, Attribute::SwiftError) ||
+ Attrs.hasParamAttribute(I, Attribute::InAlloca) ||
+ Attrs.hasParamAttribute(I, Attribute::Nest))
return false;
unsigned Reg;
- if (Attrs.hasParamAttribute(i, Attribute::SExt))
+ if (Attrs.hasParamAttribute(I, Attribute::SExt))
Reg = getRegForSignedValue(V);
- else if (Attrs.hasParamAttribute(i, Attribute::ZExt))
+ else if (Attrs.hasParamAttribute(I, Attribute::ZExt))
Reg = getRegForUnsignedValue(V);
else
Reg = getRegForValue(V);
@@ -847,6 +859,13 @@ bool WebAssemblyFastISel::selectCall(const Instruction *I) {
Args.push_back(Reg);
}
+ unsigned CalleeReg = 0;
+ if (!IsDirect) {
+ CalleeReg = getRegForValue(Call->getCalledValue());
+ if (!CalleeReg)
+ return false;
+ }
+
auto MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc));
if (!IsVoid)
@@ -854,12 +873,8 @@ bool WebAssemblyFastISel::selectCall(const Instruction *I) {
if (IsDirect)
MIB.addGlobalAddress(Func);
- else {
- unsigned Reg = getRegForValue(Call->getCalledValue());
- if (Reg == 0)
- return false;
- MIB.addReg(Reg);
- }
+ else
+ MIB.addReg(CalleeReg);
for (unsigned ArgReg : Args)
MIB.addReg(ArgReg);
@@ -870,7 +885,7 @@ bool WebAssemblyFastISel::selectCall(const Instruction *I) {
}
bool WebAssemblyFastISel::selectSelect(const Instruction *I) {
- const SelectInst *Select = cast<SelectInst>(I);
+ const auto *Select = cast<SelectInst>(I);
bool Not;
unsigned CondReg = getRegForI1Value(Select->getCondition(), Not);
@@ -910,9 +925,9 @@ bool WebAssemblyFastISel::selectSelect(const Instruction *I) {
Opc = WebAssembly::SELECT_F64;
RC = &WebAssembly::F64RegClass;
break;
- case MVT::ExceptRef:
- Opc = WebAssembly::SELECT_EXCEPT_REF;
- RC = &WebAssembly::EXCEPT_REFRegClass;
+ case MVT::exnref:
+ Opc = WebAssembly::SELECT_EXNREF;
+ RC = &WebAssembly::EXNREFRegClass;
break;
default:
return false;
@@ -929,7 +944,7 @@ bool WebAssemblyFastISel::selectSelect(const Instruction *I) {
}
bool WebAssemblyFastISel::selectTrunc(const Instruction *I) {
- const TruncInst *Trunc = cast<TruncInst>(I);
+ const auto *Trunc = cast<TruncInst>(I);
unsigned Reg = getRegForValue(Trunc->getOperand(0));
if (Reg == 0)
@@ -948,7 +963,7 @@ bool WebAssemblyFastISel::selectTrunc(const Instruction *I) {
}
bool WebAssemblyFastISel::selectZExt(const Instruction *I) {
- const ZExtInst *ZExt = cast<ZExtInst>(I);
+ const auto *ZExt = cast<ZExtInst>(I);
const Value *Op = ZExt->getOperand(0);
MVT::SimpleValueType From = getSimpleType(Op->getType());
@@ -965,7 +980,7 @@ bool WebAssemblyFastISel::selectZExt(const Instruction *I) {
}
bool WebAssemblyFastISel::selectSExt(const Instruction *I) {
- const SExtInst *SExt = cast<SExtInst>(I);
+ const auto *SExt = cast<SExtInst>(I);
const Value *Op = SExt->getOperand(0);
MVT::SimpleValueType From = getSimpleType(Op->getType());
@@ -982,11 +997,11 @@ bool WebAssemblyFastISel::selectSExt(const Instruction *I) {
}
bool WebAssemblyFastISel::selectICmp(const Instruction *I) {
- const ICmpInst *ICmp = cast<ICmpInst>(I);
+ const auto *ICmp = cast<ICmpInst>(I);
bool I32 = getSimpleType(ICmp->getOperand(0)->getType()) != MVT::i64;
unsigned Opc;
- bool isSigned = false;
+ bool IsSigned = false;
switch (ICmp->getPredicate()) {
case ICmpInst::ICMP_EQ:
Opc = I32 ? WebAssembly::EQ_I32 : WebAssembly::EQ_I64;
@@ -1008,29 +1023,29 @@ bool WebAssemblyFastISel::selectICmp(const Instruction *I) {
break;
case ICmpInst::ICMP_SGT:
Opc = I32 ? WebAssembly::GT_S_I32 : WebAssembly::GT_S_I64;
- isSigned = true;
+ IsSigned = true;
break;
case ICmpInst::ICMP_SGE:
Opc = I32 ? WebAssembly::GE_S_I32 : WebAssembly::GE_S_I64;
- isSigned = true;
+ IsSigned = true;
break;
case ICmpInst::ICMP_SLT:
Opc = I32 ? WebAssembly::LT_S_I32 : WebAssembly::LT_S_I64;
- isSigned = true;
+ IsSigned = true;
break;
case ICmpInst::ICMP_SLE:
Opc = I32 ? WebAssembly::LE_S_I32 : WebAssembly::LE_S_I64;
- isSigned = true;
+ IsSigned = true;
break;
default:
return false;
}
- unsigned LHS = getRegForPromotedValue(ICmp->getOperand(0), isSigned);
+ unsigned LHS = getRegForPromotedValue(ICmp->getOperand(0), IsSigned);
if (LHS == 0)
return false;
- unsigned RHS = getRegForPromotedValue(ICmp->getOperand(1), isSigned);
+ unsigned RHS = getRegForPromotedValue(ICmp->getOperand(1), IsSigned);
if (RHS == 0)
return false;
@@ -1043,7 +1058,7 @@ bool WebAssemblyFastISel::selectICmp(const Instruction *I) {
}
bool WebAssemblyFastISel::selectFCmp(const Instruction *I) {
- const FCmpInst *FCmp = cast<FCmpInst>(I);
+ const auto *FCmp = cast<FCmpInst>(I);
unsigned LHS = getRegForValue(FCmp->getOperand(0));
if (LHS == 0)
@@ -1139,7 +1154,7 @@ bool WebAssemblyFastISel::selectBitCast(const Instruction *I) {
}
bool WebAssemblyFastISel::selectLoad(const Instruction *I) {
- const LoadInst *Load = cast<LoadInst>(I);
+ const auto *Load = cast<LoadInst>(I);
if (Load->isAtomic())
return false;
if (!Subtarget->hasSIMD128() && Load->getType()->isVectorTy())
@@ -1196,7 +1211,7 @@ bool WebAssemblyFastISel::selectLoad(const Instruction *I) {
}
bool WebAssemblyFastISel::selectStore(const Instruction *I) {
- const StoreInst *Store = cast<StoreInst>(I);
+ const auto *Store = cast<StoreInst>(I);
if (Store->isAtomic())
return false;
if (!Subtarget->hasSIMD128() &&
@@ -1252,7 +1267,7 @@ bool WebAssemblyFastISel::selectStore(const Instruction *I) {
}
bool WebAssemblyFastISel::selectBr(const Instruction *I) {
- const BranchInst *Br = cast<BranchInst>(I);
+ const auto *Br = cast<BranchInst>(I);
if (Br->isUnconditional()) {
MachineBasicBlock *MSucc = FuncInfo.MBBMap[Br->getSuccessor(0)];
fastEmitBranch(MSucc, Br->getDebugLoc());
@@ -1283,7 +1298,7 @@ bool WebAssemblyFastISel::selectRet(const Instruction *I) {
if (!FuncInfo.CanLowerReturn)
return false;
- const ReturnInst *Ret = cast<ReturnInst>(I);
+ const auto *Ret = cast<ReturnInst>(I);
if (Ret->getNumOperands() == 0) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
@@ -1330,8 +1345,8 @@ bool WebAssemblyFastISel::selectRet(const Instruction *I) {
case MVT::v2f64:
Opc = WebAssembly::RETURN_v2f64;
break;
- case MVT::ExceptRef:
- Opc = WebAssembly::RETURN_EXCEPT_REF;
+ case MVT::exnref:
+ Opc = WebAssembly::RETURN_EXNREF;
break;
default:
return false;
diff --git a/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp b/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp
index 1a416520f97d..b7fc65401fc4 100644
--- a/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp
@@ -1,9 +1,8 @@
//===-- WebAssemblyFixFunctionBitcasts.cpp - Fix function bitcasts --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -36,11 +35,6 @@ using namespace llvm;
#define DEBUG_TYPE "wasm-fix-function-bitcasts"
-static cl::opt<bool>
- TemporaryWorkarounds("wasm-temporary-workarounds",
- cl::desc("Apply certain temporary workarounds"),
- cl::init(true), cl::Hidden);
-
namespace {
class FixFunctionBitcasts final : public ModulePass {
StringRef getPassName() const override {
@@ -70,12 +64,12 @@ ModulePass *llvm::createWebAssemblyFixFunctionBitcasts() {
// Recursively descend the def-use lists from V to find non-bitcast users of
// bitcasts of V.
-static void FindUses(Value *V, Function &F,
+static void findUses(Value *V, Function &F,
SmallVectorImpl<std::pair<Use *, Function *>> &Uses,
SmallPtrSetImpl<Constant *> &ConstantBCs) {
for (Use &U : V->uses()) {
- if (BitCastOperator *BC = dyn_cast<BitCastOperator>(U.getUser()))
- FindUses(BC, F, Uses, ConstantBCs);
+ if (auto *BC = dyn_cast<BitCastOperator>(U.getUser()))
+ findUses(BC, F, Uses, ConstantBCs);
else if (U.get()->getType() != F.getType()) {
CallSite CS(U.getUser());
if (!CS)
@@ -87,8 +81,8 @@ static void FindUses(Value *V, Function &F,
continue;
if (isa<Constant>(U.get())) {
// Only add constant bitcasts to the list once; they get RAUW'd
- auto c = ConstantBCs.insert(cast<Constant>(U.get()));
- if (!c.second)
+ auto C = ConstantBCs.insert(cast<Constant>(U.get()));
+ if (!C.second)
continue;
}
Uses.push_back(std::make_pair(&U, &F));
@@ -119,7 +113,7 @@ static void FindUses(Value *V, Function &F,
// For bitcasts that involve struct types we don't know at this stage if they
// would be equivalent at the wasm level and so we can't know if we need to
// generate a wrapper.
-static Function *CreateWrapper(Function *F, FunctionType *Ty) {
+static Function *createWrapper(Function *F, FunctionType *Ty) {
Module *M = F->getParent();
Function *Wrapper = Function::Create(Ty, Function::PrivateLinkage,
@@ -157,11 +151,11 @@ static Function *CreateWrapper(Function *F, FunctionType *Ty) {
BB->getInstList().push_back(PtrCast);
Args.push_back(PtrCast);
} else if (ArgType->isStructTy() || ParamType->isStructTy()) {
- LLVM_DEBUG(dbgs() << "CreateWrapper: struct param type in bitcast: "
+ LLVM_DEBUG(dbgs() << "createWrapper: struct param type in bitcast: "
<< F->getName() << "\n");
WrapperNeeded = false;
} else {
- LLVM_DEBUG(dbgs() << "CreateWrapper: arg type mismatch calling: "
+ LLVM_DEBUG(dbgs() << "createWrapper: arg type mismatch calling: "
<< F->getName() << "\n");
LLVM_DEBUG(dbgs() << "Arg[" << Args.size() << "] Expected: "
<< *ParamType << " Got: " << *ArgType << "\n");
@@ -197,11 +191,11 @@ static Function *CreateWrapper(Function *F, FunctionType *Ty) {
BB->getInstList().push_back(Cast);
ReturnInst::Create(M->getContext(), Cast, BB);
} else if (RtnType->isStructTy() || ExpectedRtnType->isStructTy()) {
- LLVM_DEBUG(dbgs() << "CreateWrapper: struct return type in bitcast: "
+ LLVM_DEBUG(dbgs() << "createWrapper: struct return type in bitcast: "
<< F->getName() << "\n");
WrapperNeeded = false;
} else {
- LLVM_DEBUG(dbgs() << "CreateWrapper: return type mismatch calling: "
+ LLVM_DEBUG(dbgs() << "createWrapper: return type mismatch calling: "
<< F->getName() << "\n");
LLVM_DEBUG(dbgs() << "Expected: " << *ExpectedRtnType
<< " Got: " << *RtnType << "\n");
@@ -218,15 +212,26 @@ static Function *CreateWrapper(Function *F, FunctionType *Ty) {
new UnreachableInst(M->getContext(), BB);
Wrapper->setName(F->getName() + "_bitcast_invalid");
} else if (!WrapperNeeded) {
- LLVM_DEBUG(dbgs() << "CreateWrapper: no wrapper needed: " << F->getName()
+ LLVM_DEBUG(dbgs() << "createWrapper: no wrapper needed: " << F->getName()
<< "\n");
Wrapper->eraseFromParent();
return nullptr;
}
- LLVM_DEBUG(dbgs() << "CreateWrapper: " << F->getName() << "\n");
+ LLVM_DEBUG(dbgs() << "createWrapper: " << F->getName() << "\n");
return Wrapper;
}
+// Test whether a main function with type FuncTy should be rewritten to have
+// type MainTy.
+static bool shouldFixMainFunction(FunctionType *FuncTy, FunctionType *MainTy) {
+ // Only fix the main function if it's the standard zero-arg form. That way,
+ // the standard cases will work as expected, and users will see signature
+ // mismatches from the linker for non-standard cases.
+ return FuncTy->getReturnType() == MainTy->getReturnType() &&
+ FuncTy->getNumParams() == 0 &&
+ !FuncTy->isVarArg();
+}
+
bool FixFunctionBitcasts::runOnModule(Module &M) {
LLVM_DEBUG(dbgs() << "********** Fix Function Bitcasts **********\n");
@@ -237,27 +242,27 @@ bool FixFunctionBitcasts::runOnModule(Module &M) {
// Collect all the places that need wrappers.
for (Function &F : M) {
- FindUses(&F, F, Uses, ConstantBCs);
+ findUses(&F, F, Uses, ConstantBCs);
// If we have a "main" function, and its type isn't
// "int main(int argc, char *argv[])", create an artificial call with it
// bitcasted to that type so that we generate a wrapper for it, so that
// the C runtime can call it.
- if (!TemporaryWorkarounds && !F.isDeclaration() && F.getName() == "main") {
+ if (F.getName() == "main") {
Main = &F;
LLVMContext &C = M.getContext();
Type *MainArgTys[] = {Type::getInt32Ty(C),
PointerType::get(Type::getInt8PtrTy(C), 0)};
FunctionType *MainTy = FunctionType::get(Type::getInt32Ty(C), MainArgTys,
/*isVarArg=*/false);
- if (F.getFunctionType() != MainTy) {
+ if (shouldFixMainFunction(F.getFunctionType(), MainTy)) {
LLVM_DEBUG(dbgs() << "Found `main` function with incorrect type: "
<< *F.getFunctionType() << "\n");
Value *Args[] = {UndefValue::get(MainArgTys[0]),
UndefValue::get(MainArgTys[1])};
Value *Casted =
ConstantExpr::getBitCast(Main, PointerType::get(MainTy, 0));
- CallMain = CallInst::Create(Casted, Args, "call_main");
+ CallMain = CallInst::Create(MainTy, Casted, Args, "call_main");
Use *UseMain = &CallMain->getOperandUse(2);
Uses.push_back(std::make_pair(UseMain, &F));
}
@@ -269,8 +274,8 @@ bool FixFunctionBitcasts::runOnModule(Module &M) {
for (auto &UseFunc : Uses) {
Use *U = UseFunc.first;
Function *F = UseFunc.second;
- PointerType *PTy = cast<PointerType>(U->get()->getType());
- FunctionType *Ty = dyn_cast<FunctionType>(PTy->getElementType());
+ auto *PTy = cast<PointerType>(U->get()->getType());
+ auto *Ty = dyn_cast<FunctionType>(PTy->getElementType());
// If the function is casted to something like i8* as a "generic pointer"
// to be later casted to something else, we can't generate a wrapper for it.
@@ -280,7 +285,7 @@ bool FixFunctionBitcasts::runOnModule(Module &M) {
auto Pair = Wrappers.insert(std::make_pair(std::make_pair(F, Ty), nullptr));
if (Pair.second)
- Pair.first->second = CreateWrapper(F, Ty);
+ Pair.first->second = createWrapper(F, Ty);
Function *Wrapper = Pair.first->second;
if (!Wrapper)
@@ -296,14 +301,20 @@ bool FixFunctionBitcasts::runOnModule(Module &M) {
// one that gets called from startup.
if (CallMain) {
Main->setName("__original_main");
- Function *MainWrapper =
+ auto *MainWrapper =
cast<Function>(CallMain->getCalledValue()->stripPointerCasts());
- MainWrapper->setName("main");
- MainWrapper->setLinkage(Main->getLinkage());
- MainWrapper->setVisibility(Main->getVisibility());
- Main->setLinkage(Function::PrivateLinkage);
- Main->setVisibility(Function::DefaultVisibility);
delete CallMain;
+ if (Main->isDeclaration()) {
+ // The wrapper is not needed in this case as we don't need to export
+ // it to anyone else.
+ MainWrapper->eraseFromParent();
+ } else {
+ // Otherwise give the wrapper the same linkage as the original main
+ // function, so that it can be called from the same places.
+ MainWrapper->setName("main");
+ MainWrapper->setLinkage(Main->getLinkage());
+ MainWrapper->setVisibility(Main->getVisibility());
+ }
}
return true;
diff --git a/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp b/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp
index 108f2879a071..7d8e86d9b2c0 100644
--- a/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp
@@ -1,46 +1,48 @@
//=- WebAssemblyFixIrreducibleControlFlow.cpp - Fix irreducible control flow -//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// \file
-/// This file implements a pass that transforms irreducible control flow into
-/// reducible control flow. Irreducible control flow means multiple-entry
-/// loops; they appear as CFG cycles that are not recorded in MachineLoopInfo
-/// due to being unnatural.
+/// This file implements a pass that removes irreducible control flow.
+/// Irreducible control flow means multiple-entry loops, which this pass
+/// transforms to have a single entry.
///
/// Note that LLVM has a generic pass that lowers irreducible control flow, but
/// it linearizes control flow, turning diamonds into two triangles, which is
/// both unnecessary and undesirable for WebAssembly.
///
-/// The big picture: Ignoring natural loops (seeing them monolithically), we
-/// find all the blocks which can return to themselves ("loopers"). Loopers
-/// reachable from the non-loopers are loop entries: if there are 2 or more,
-/// then we have irreducible control flow. We fix that as follows: a new block
-/// is created that can dispatch to each of the loop entries, based on the
-/// value of a label "helper" variable, and we replace direct branches to the
-/// entries with assignments to the label variable and a branch to the dispatch
-/// block. Then the dispatch block is the single entry in a new natural loop.
+/// The big picture: We recursively process each "region", defined as a group
+/// of blocks with a single entry and no branches back to that entry. A region
+/// may be the entire function body, or the inner part of a loop, i.e., the
+/// loop's body without branches back to the loop entry. In each region we fix
+/// up multi-entry loops by adding a new block that can dispatch to each of the
+/// loop entries, based on the value of a label "helper" variable, and we
+/// replace direct branches to the entries with assignments to the label
+/// variable and a branch to the dispatch block. Then the dispatch block is the
+/// single entry in the loop containing the previous multiple entries. After
+/// ensuring all the loops in a region are reducible, we recurse into them. The
+/// total time complexity of this pass is:
+///
+/// O(NumBlocks * NumNestedLoops * NumIrreducibleLoops +
+/// NumLoops * NumLoops)
///
-/// This is similar to what the Relooper [1] does, both identify looping code
-/// that requires multiple entries, and resolve it in a similar way. In
-/// Relooper terminology, we implement a Multiple shape in a Loop shape. Note
+/// This pass is similar to what the Relooper [1] does. Both identify looping
+/// code that requires multiple entries, and resolve it in a similar way (in
+/// Relooper terminology, we implement a Multiple shape in a Loop shape). Note
/// also that like the Relooper, we implement a "minimal" intervention: we only
/// use the "label" helper for the blocks we absolutely must and no others. We
-/// also prioritize code size and do not perform node splitting (i.e. we don't
-/// duplicate code in order to resolve irreducibility).
+/// also prioritize code size and do not duplicate code in order to resolve
+/// irreducibility. The graph algorithms for finding loops and entries and so
+/// forth are also similar to the Relooper. The main differences between this
+/// pass and the Relooper are:
///
-/// The difference between this code and the Relooper is that the Relooper also
-/// generates ifs and loops and works in a recursive manner, knowing at each
-/// point what the entries are, and recursively breaks down the problem. Here
-/// we just want to resolve irreducible control flow, and we also want to use
-/// as much LLVM infrastructure as possible. So we use the MachineLoopInfo to
-/// identify natural loops, etc., and we start with the whole CFG and must
-/// identify both the looping code and its entries.
+/// * We just care about irreducibility, so we just look at loops.
+/// * The Relooper emits structured control flow (with ifs etc.), while we
+/// emit a CFG.
///
/// [1] Alon Zakai. 2011. Emscripten: an LLVM-to-JavaScript compiler. In
/// Proceedings of the ACM international conference companion on Object oriented
@@ -52,200 +54,277 @@
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
#include "WebAssembly.h"
-#include "WebAssemblyMachineFunctionInfo.h"
#include "WebAssemblySubtarget.h"
-#include "llvm/ADT/PriorityQueue.h"
-#include "llvm/ADT/SCCIterator.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
using namespace llvm;
#define DEBUG_TYPE "wasm-fix-irreducible-control-flow"
namespace {
-class LoopFixer {
+using BlockVector = SmallVector<MachineBasicBlock *, 4>;
+using BlockSet = SmallPtrSet<MachineBasicBlock *, 4>;
+
+// Calculates reachability in a region. Ignores branches to blocks outside of
+// the region, and ignores branches to the region entry (for the case where
+// the region is the inner part of a loop).
+class ReachabilityGraph {
public:
- LoopFixer(MachineFunction &MF, MachineLoopInfo &MLI, MachineLoop *Loop)
- : MF(MF), MLI(MLI), Loop(Loop) {}
+ ReachabilityGraph(MachineBasicBlock *Entry, const BlockSet &Blocks)
+ : Entry(Entry), Blocks(Blocks) {
+#ifndef NDEBUG
+ // The region must have a single entry.
+ for (auto *MBB : Blocks) {
+ if (MBB != Entry) {
+ for (auto *Pred : MBB->predecessors()) {
+ assert(inRegion(Pred));
+ }
+ }
+ }
+#endif
+ calculate();
+ }
+
+ bool canReach(MachineBasicBlock *From, MachineBasicBlock *To) const {
+ assert(inRegion(From) && inRegion(To));
+ auto I = Reachable.find(From);
+ if (I == Reachable.end())
+ return false;
+ return I->second.count(To);
+ }
+
+ // "Loopers" are blocks that are in a loop. We detect these by finding blocks
+ // that can reach themselves.
+ const BlockSet &getLoopers() const { return Loopers; }
+
+ // Get all blocks that are loop entries.
+ const BlockSet &getLoopEntries() const { return LoopEntries; }
- // Run the fixer on the given inputs. Returns whether changes were made.
- bool run();
+ // Get all blocks that enter a particular loop from outside.
+ const BlockSet &getLoopEnterers(MachineBasicBlock *LoopEntry) const {
+ assert(inRegion(LoopEntry));
+ auto I = LoopEnterers.find(LoopEntry);
+ assert(I != LoopEnterers.end());
+ return I->second;
+ }
private:
- MachineFunction &MF;
- MachineLoopInfo &MLI;
- MachineLoop *Loop;
+ MachineBasicBlock *Entry;
+ const BlockSet &Blocks;
+
+ BlockSet Loopers, LoopEntries;
+ DenseMap<MachineBasicBlock *, BlockSet> LoopEnterers;
- MachineBasicBlock *Header;
- SmallPtrSet<MachineBasicBlock *, 4> LoopBlocks;
+ bool inRegion(MachineBasicBlock *MBB) const { return Blocks.count(MBB); }
- using BlockSet = SmallPtrSet<MachineBasicBlock *, 4>;
+ // Maps a block to all the other blocks it can reach.
DenseMap<MachineBasicBlock *, BlockSet> Reachable;
- // The worklist contains pairs of recent additions, (a, b), where we just
- // added a link a => b.
- using BlockPair = std::pair<MachineBasicBlock *, MachineBasicBlock *>;
- SmallVector<BlockPair, 4> WorkList;
-
- // Get a canonical block to represent a block or a loop: the block, or if in
- // an inner loop, the loop header, of it in an outer loop scope, we can
- // ignore it. We need to call this on all blocks we work on.
- MachineBasicBlock *canonicalize(MachineBasicBlock *MBB) {
- MachineLoop *InnerLoop = MLI.getLoopFor(MBB);
- if (InnerLoop == Loop) {
- return MBB;
- } else {
- // This is either in an outer or an inner loop, and not in ours.
- if (!LoopBlocks.count(MBB)) {
- // It's in outer code, ignore it.
- return nullptr;
+ void calculate() {
+ // Reachability computation work list. Contains pairs of recent additions
+ // (A, B) where we just added a link A => B.
+ using BlockPair = std::pair<MachineBasicBlock *, MachineBasicBlock *>;
+ SmallVector<BlockPair, 4> WorkList;
+
+ // Add all relevant direct branches.
+ for (auto *MBB : Blocks) {
+ for (auto *Succ : MBB->successors()) {
+ if (Succ != Entry && inRegion(Succ)) {
+ Reachable[MBB].insert(Succ);
+ WorkList.emplace_back(MBB, Succ);
+ }
}
- assert(InnerLoop);
- // It's in an inner loop, canonicalize it to the header of that loop.
- return InnerLoop->getHeader();
}
- }
- // For a successor we can additionally ignore it if it's a branch back to a
- // natural loop top, as when we are in the scope of a loop, we just care
- // about internal irreducibility, and can ignore the loop we are in. We need
- // to call this on all blocks in a context where they are a successor.
- MachineBasicBlock *canonicalizeSuccessor(MachineBasicBlock *MBB) {
- if (Loop && MBB == Loop->getHeader()) {
- // Ignore branches going to the loop's natural header.
- return nullptr;
+ while (!WorkList.empty()) {
+ MachineBasicBlock *MBB, *Succ;
+ std::tie(MBB, Succ) = WorkList.pop_back_val();
+ assert(inRegion(MBB) && Succ != Entry && inRegion(Succ));
+ if (MBB != Entry) {
+ // We recently added MBB => Succ, and that means we may have enabled
+ // Pred => MBB => Succ.
+ for (auto *Pred : MBB->predecessors()) {
+ if (Reachable[Pred].insert(Succ).second) {
+ WorkList.emplace_back(Pred, Succ);
+ }
+ }
+ }
}
- return canonicalize(MBB);
- }
- // Potentially insert a new reachable edge, and if so, note it as further
- // work.
- void maybeInsert(MachineBasicBlock *MBB, MachineBasicBlock *Succ) {
- assert(MBB == canonicalize(MBB));
- assert(Succ);
- // Succ may not be interesting as a sucessor.
- Succ = canonicalizeSuccessor(Succ);
- if (!Succ)
- return;
- if (Reachable[MBB].insert(Succ).second) {
- // For there to be further work, it means that we have
- // X => MBB => Succ
- // for some other X, and in that case X => Succ would be a new edge for
- // us to discover later. However, if we don't care about MBB as a
- // successor, then we don't care about that anyhow.
- if (canonicalizeSuccessor(MBB)) {
- WorkList.emplace_back(MBB, Succ);
+ // Blocks that can return to themselves are in a loop.
+ for (auto *MBB : Blocks) {
+ if (canReach(MBB, MBB)) {
+ Loopers.insert(MBB);
+ }
+ }
+ assert(!Loopers.count(Entry));
+
+ // Find the loop entries - loopers reachable from blocks not in that loop -
+ // and those outside blocks that reach them, the "loop enterers".
+ for (auto *Looper : Loopers) {
+ for (auto *Pred : Looper->predecessors()) {
+ // Pred can reach Looper. If Looper can reach Pred, it is in the loop;
+ // otherwise, it is a block that enters into the loop.
+ if (!canReach(Looper, Pred)) {
+ LoopEntries.insert(Looper);
+ LoopEnterers[Looper].insert(Pred);
+ }
}
}
}
};
-bool LoopFixer::run() {
- Header = Loop ? Loop->getHeader() : &*MF.begin();
-
- // Identify all the blocks in this loop scope.
- if (Loop) {
- for (auto *MBB : Loop->getBlocks()) {
- LoopBlocks.insert(MBB);
- }
- } else {
- for (auto &MBB : MF) {
- LoopBlocks.insert(&MBB);
- }
+// Finds the blocks in a single-entry loop, given the loop entry and the
+// list of blocks that enter the loop.
+class LoopBlocks {
+public:
+ LoopBlocks(MachineBasicBlock *Entry, const BlockSet &Enterers)
+ : Entry(Entry), Enterers(Enterers) {
+ calculate();
}
- // Compute which (canonicalized) blocks each block can reach.
-
- // Add all the initial work.
- for (auto *MBB : LoopBlocks) {
- MachineLoop *InnerLoop = MLI.getLoopFor(MBB);
+ BlockSet &getBlocks() { return Blocks; }
- if (InnerLoop == Loop) {
- for (auto *Succ : MBB->successors()) {
- maybeInsert(MBB, Succ);
- }
- } else {
- // It can't be in an outer loop - we loop on LoopBlocks - and so it must
- // be an inner loop.
- assert(InnerLoop);
- // Check if we are the canonical block for this loop.
- if (canonicalize(MBB) != MBB) {
- continue;
- }
- // The successors are those of the loop.
- SmallVector<MachineBasicBlock *, 2> ExitBlocks;
- InnerLoop->getExitBlocks(ExitBlocks);
- for (auto *Succ : ExitBlocks) {
- maybeInsert(MBB, Succ);
+private:
+ MachineBasicBlock *Entry;
+ const BlockSet &Enterers;
+
+ BlockSet Blocks;
+
+ void calculate() {
+ // Going backwards from the loop entry, if we ignore the blocks entering
+ // from outside, we will traverse all the blocks in the loop.
+ BlockVector WorkList;
+ BlockSet AddedToWorkList;
+ Blocks.insert(Entry);
+ for (auto *Pred : Entry->predecessors()) {
+ if (!Enterers.count(Pred)) {
+ WorkList.push_back(Pred);
+ AddedToWorkList.insert(Pred);
}
}
- }
- // Do work until we are all done.
- while (!WorkList.empty()) {
- MachineBasicBlock *MBB;
- MachineBasicBlock *Succ;
- std::tie(MBB, Succ) = WorkList.pop_back_val();
- // The worklist item is an edge we just added, so it must have valid blocks
- // (and not something canonicalized to nullptr).
- assert(MBB);
- assert(Succ);
- // The successor in that pair must also be a valid successor.
- assert(MBB == canonicalizeSuccessor(MBB));
- // We recently added MBB => Succ, and that means we may have enabled
- // Pred => MBB => Succ. Check all the predecessors. Note that our loop here
- // is correct for both a block and a block representing a loop, as the loop
- // is natural and so the predecessors are all predecessors of the loop
- // header, which is the block we have here.
- for (auto *Pred : MBB->predecessors()) {
- // Canonicalize, make sure it's relevant, and check it's not the same
- // block (an update to the block itself doesn't help compute that same
- // block).
- Pred = canonicalize(Pred);
- if (Pred && Pred != MBB) {
- maybeInsert(Pred, Succ);
+ while (!WorkList.empty()) {
+ auto *MBB = WorkList.pop_back_val();
+ assert(!Enterers.count(MBB));
+ if (Blocks.insert(MBB).second) {
+ for (auto *Pred : MBB->predecessors()) {
+ if (!AddedToWorkList.count(Pred)) {
+ WorkList.push_back(Pred);
+ AddedToWorkList.insert(Pred);
+ }
+ }
}
}
}
+};
- // It's now trivial to identify the loopers.
- SmallPtrSet<MachineBasicBlock *, 4> Loopers;
- for (auto MBB : LoopBlocks) {
- if (Reachable[MBB].count(MBB)) {
- Loopers.insert(MBB);
- }
+class WebAssemblyFixIrreducibleControlFlow final : public MachineFunctionPass {
+ StringRef getPassName() const override {
+ return "WebAssembly Fix Irreducible Control Flow";
}
- // The header cannot be a looper. At the toplevel, LLVM does not allow the
- // entry to be in a loop, and in a natural loop we should ignore the header.
- assert(Loopers.count(Header) == 0);
-
- // Find the entries, loopers reachable from non-loopers.
- SmallPtrSet<MachineBasicBlock *, 4> Entries;
- SmallVector<MachineBasicBlock *, 4> SortedEntries;
- for (auto *Looper : Loopers) {
- for (auto *Pred : Looper->predecessors()) {
- Pred = canonicalize(Pred);
- if (Pred && !Loopers.count(Pred)) {
- Entries.insert(Looper);
- SortedEntries.push_back(Looper);
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ bool processRegion(MachineBasicBlock *Entry, BlockSet &Blocks,
+ MachineFunction &MF);
+
+ void makeSingleEntryLoop(BlockSet &Entries, BlockSet &Blocks,
+ MachineFunction &MF, const ReachabilityGraph &Graph);
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+ WebAssemblyFixIrreducibleControlFlow() : MachineFunctionPass(ID) {}
+};
+
+bool WebAssemblyFixIrreducibleControlFlow::processRegion(
+ MachineBasicBlock *Entry, BlockSet &Blocks, MachineFunction &MF) {
+ bool Changed = false;
+
+ // Remove irreducibility before processing child loops, which may take
+ // multiple iterations.
+ while (true) {
+ ReachabilityGraph Graph(Entry, Blocks);
+
+ bool FoundIrreducibility = false;
+
+ for (auto *LoopEntry : Graph.getLoopEntries()) {
+ // Find mutual entries - all entries which can reach this one, and
+ // are reached by it (that always includes LoopEntry itself). All mutual
+ // entries must be in the same loop, so if we have more than one, then we
+ // have irreducible control flow.
+ //
+ // Note that irreducibility may involve inner loops, e.g. imagine A
+ // starts one loop, and it has B inside it which starts an inner loop.
+ // If we add a branch from all the way on the outside to B, then in a
+ // sense B is no longer an "inner" loop, semantically speaking. We will
+ // fix that irreducibility by adding a block that dispatches to either
+ // either A or B, so B will no longer be an inner loop in our output.
+ // (A fancier approach might try to keep it as such.)
+ //
+ // Note that we still need to recurse into inner loops later, to handle
+ // the case where the irreducibility is entirely nested - we would not
+ // be able to identify that at this point, since the enclosing loop is
+ // a group of blocks all of whom can reach each other. (We'll see the
+ // irreducibility after removing branches to the top of that enclosing
+ // loop.)
+ BlockSet MutualLoopEntries;
+ MutualLoopEntries.insert(LoopEntry);
+ for (auto *OtherLoopEntry : Graph.getLoopEntries()) {
+ if (OtherLoopEntry != LoopEntry &&
+ Graph.canReach(LoopEntry, OtherLoopEntry) &&
+ Graph.canReach(OtherLoopEntry, LoopEntry)) {
+ MutualLoopEntries.insert(OtherLoopEntry);
+ }
+ }
+
+ if (MutualLoopEntries.size() > 1) {
+ makeSingleEntryLoop(MutualLoopEntries, Blocks, MF, Graph);
+ FoundIrreducibility = true;
+ Changed = true;
break;
}
}
+ // Only go on to actually process the inner loops when we are done
+ // removing irreducible control flow and changing the graph. Modifying
+ // the graph as we go is possible, and that might let us avoid looking at
+ // the already-fixed loops again if we are careful, but all that is
+ // complex and bug-prone. Since irreducible loops are rare, just starting
+ // another iteration is best.
+ if (FoundIrreducibility) {
+ continue;
+ }
+
+ for (auto *LoopEntry : Graph.getLoopEntries()) {
+ LoopBlocks InnerBlocks(LoopEntry, Graph.getLoopEnterers(LoopEntry));
+ // Each of these calls to processRegion may change the graph, but are
+ // guaranteed not to interfere with each other. The only changes we make
+ // to the graph are to add blocks on the way to a loop entry. As the
+ // loops are disjoint, that means we may only alter branches that exit
+ // another loop, which are ignored when recursing into that other loop
+ // anyhow.
+ if (processRegion(LoopEntry, InnerBlocks.getBlocks(), MF)) {
+ Changed = true;
+ }
+ }
+
+ return Changed;
}
+}
- // Check if we found irreducible control flow.
- if (LLVM_LIKELY(Entries.size() <= 1))
- return false;
+// Given a set of entries to a single loop, create a single entry for that
+// loop by creating a dispatch block for them, routing control flow using
+// a helper variable. Also updates Blocks with any new blocks created, so
+// that we properly track all the blocks in the region. But this does not update
+// ReachabilityGraph; this will be updated in the caller of this function as
+// needed.
+void WebAssemblyFixIrreducibleControlFlow::makeSingleEntryLoop(
+ BlockSet &Entries, BlockSet &Blocks, MachineFunction &MF,
+ const ReachabilityGraph &Graph) {
+ assert(Entries.size() >= 2);
// Sort the entries to ensure a deterministic build.
+ BlockVector SortedEntries(Entries.begin(), Entries.end());
llvm::sort(SortedEntries,
[&](const MachineBasicBlock *A, const MachineBasicBlock *B) {
auto ANum = A->getNumber();
@@ -257,8 +336,8 @@ bool LoopFixer::run() {
for (auto Block : SortedEntries)
assert(Block->getNumber() != -1);
if (SortedEntries.size() > 1) {
- for (auto I = SortedEntries.begin(), E = SortedEntries.end() - 1;
- I != E; ++I) {
+ for (auto I = SortedEntries.begin(), E = SortedEntries.end() - 1; I != E;
+ ++I) {
auto ANum = (*I)->getNumber();
auto BNum = (*(std::next(I)))->getNumber();
assert(ANum != BNum);
@@ -269,12 +348,12 @@ bool LoopFixer::run() {
// Create a dispatch block which will contain a jump table to the entries.
MachineBasicBlock *Dispatch = MF.CreateMachineBasicBlock();
MF.insert(MF.end(), Dispatch);
- MLI.changeLoopFor(Dispatch, Loop);
+ Blocks.insert(Dispatch);
// Add the jump table.
const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
- MachineInstrBuilder MIB = BuildMI(*Dispatch, Dispatch->end(), DebugLoc(),
- TII.get(WebAssembly::BR_TABLE_I32));
+ MachineInstrBuilder MIB =
+ BuildMI(Dispatch, DebugLoc(), TII.get(WebAssembly::BR_TABLE_I32));
// Add the register which will be used to tell the jump table which block to
// jump to.
@@ -285,112 +364,110 @@ bool LoopFixer::run() {
// Compute the indices in the superheader, one for each bad block, and
// add them as successors.
DenseMap<MachineBasicBlock *, unsigned> Indices;
- for (auto *MBB : SortedEntries) {
- auto Pair = Indices.insert(std::make_pair(MBB, 0));
- if (!Pair.second) {
- continue;
- }
+ for (auto *Entry : SortedEntries) {
+ auto Pair = Indices.insert(std::make_pair(Entry, 0));
+ assert(Pair.second);
unsigned Index = MIB.getInstr()->getNumExplicitOperands() - 1;
Pair.first->second = Index;
- MIB.addMBB(MBB);
- Dispatch->addSuccessor(MBB);
+ MIB.addMBB(Entry);
+ Dispatch->addSuccessor(Entry);
}
- // Rewrite the problematic successors for every block that wants to reach the
- // bad blocks. For simplicity, we just introduce a new block for every edge
- // we need to rewrite. (Fancier things are possible.)
+ // Rewrite the problematic successors for every block that wants to reach
+ // the bad blocks. For simplicity, we just introduce a new block for every
+ // edge we need to rewrite. (Fancier things are possible.)
- SmallVector<MachineBasicBlock *, 4> AllPreds;
- for (auto *MBB : SortedEntries) {
- for (auto *Pred : MBB->predecessors()) {
+ BlockVector AllPreds;
+ for (auto *Entry : SortedEntries) {
+ for (auto *Pred : Entry->predecessors()) {
if (Pred != Dispatch) {
AllPreds.push_back(Pred);
}
}
}
- for (MachineBasicBlock *MBB : AllPreds) {
- DenseMap<MachineBasicBlock *, MachineBasicBlock *> Map;
- for (auto *Succ : MBB->successors()) {
- if (!Entries.count(Succ)) {
+ // This set stores predecessors within this loop.
+ DenseSet<MachineBasicBlock *> InLoop;
+ for (auto *Pred : AllPreds) {
+ for (auto *Entry : Pred->successors()) {
+ if (!Entries.count(Entry))
continue;
+ if (Graph.canReach(Entry, Pred)) {
+ InLoop.insert(Pred);
+ break;
}
+ }
+ }
+
+ // Record if each entry has a layout predecessor. This map stores
+ // <<Predecessor is within the loop?, loop entry>, layout predecessor>
+ std::map<std::pair<bool, MachineBasicBlock *>, MachineBasicBlock *>
+ EntryToLayoutPred;
+ for (auto *Pred : AllPreds)
+ for (auto *Entry : Pred->successors())
+ if (Entries.count(Entry) && Pred->isLayoutSuccessor(Entry))
+ EntryToLayoutPred[std::make_pair(InLoop.count(Pred), Entry)] = Pred;
+
+ // We need to create at most two routing blocks per entry: one for
+ // predecessors outside the loop and one for predecessors inside the loop.
+ // This map stores
+ // <<Predecessor is within the loop?, loop entry>, routing block>
+ std::map<std::pair<bool, MachineBasicBlock *>, MachineBasicBlock *> Map;
+ for (auto *Pred : AllPreds) {
+ bool PredInLoop = InLoop.count(Pred);
+ for (auto *Entry : Pred->successors()) {
+ if (!Entries.count(Entry) ||
+ Map.count(std::make_pair(InLoop.count(Pred), Entry)))
+ continue;
+ // If there exists a layout predecessor of this entry and this predecessor
+ // is not that, we rather create a routing block after that layout
+ // predecessor to save a branch.
+ if (EntryToLayoutPred.count(std::make_pair(PredInLoop, Entry)) &&
+ EntryToLayoutPred[std::make_pair(PredInLoop, Entry)] != Pred)
+ continue;
// This is a successor we need to rewrite.
- MachineBasicBlock *Split = MF.CreateMachineBasicBlock();
- MF.insert(MBB->isLayoutSuccessor(Succ) ? MachineFunction::iterator(Succ)
- : MF.end(),
- Split);
- MLI.changeLoopFor(Split, Loop);
+ MachineBasicBlock *Routing = MF.CreateMachineBasicBlock();
+ MF.insert(Pred->isLayoutSuccessor(Entry)
+ ? MachineFunction::iterator(Entry)
+ : MF.end(),
+ Routing);
+ Blocks.insert(Routing);
// Set the jump table's register of the index of the block we wish to
// jump to, and jump to the jump table.
- BuildMI(*Split, Split->end(), DebugLoc(), TII.get(WebAssembly::CONST_I32),
- Reg)
- .addImm(Indices[Succ]);
- BuildMI(*Split, Split->end(), DebugLoc(), TII.get(WebAssembly::BR))
- .addMBB(Dispatch);
- Split->addSuccessor(Dispatch);
- Map[Succ] = Split;
+ BuildMI(Routing, DebugLoc(), TII.get(WebAssembly::CONST_I32), Reg)
+ .addImm(Indices[Entry]);
+ BuildMI(Routing, DebugLoc(), TII.get(WebAssembly::BR)).addMBB(Dispatch);
+ Routing->addSuccessor(Dispatch);
+ Map[std::make_pair(PredInLoop, Entry)] = Routing;
}
+ }
+
+ for (auto *Pred : AllPreds) {
+ bool PredInLoop = InLoop.count(Pred);
// Remap the terminator operands and the successor list.
- for (MachineInstr &Term : MBB->terminators())
+ for (MachineInstr &Term : Pred->terminators())
for (auto &Op : Term.explicit_uses())
if (Op.isMBB() && Indices.count(Op.getMBB()))
- Op.setMBB(Map[Op.getMBB()]);
- for (auto Rewrite : Map)
- MBB->replaceSuccessor(Rewrite.first, Rewrite.second);
+ Op.setMBB(Map[std::make_pair(PredInLoop, Op.getMBB())]);
+
+ for (auto *Succ : Pred->successors()) {
+ if (!Entries.count(Succ))
+ continue;
+ auto *Routing = Map[std::make_pair(PredInLoop, Succ)];
+ Pred->replaceSuccessor(Succ, Routing);
+ }
}
// Create a fake default label, because br_table requires one.
MIB.addMBB(MIB.getInstr()
->getOperand(MIB.getInstr()->getNumExplicitOperands() - 1)
.getMBB());
-
- return true;
}
-class WebAssemblyFixIrreducibleControlFlow final : public MachineFunctionPass {
- StringRef getPassName() const override {
- return "WebAssembly Fix Irreducible Control Flow";
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- AU.addRequired<MachineDominatorTree>();
- AU.addPreserved<MachineDominatorTree>();
- AU.addRequired<MachineLoopInfo>();
- AU.addPreserved<MachineLoopInfo>();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-
- bool runOnMachineFunction(MachineFunction &MF) override;
-
- bool runIteration(MachineFunction &MF, MachineLoopInfo &MLI) {
- // Visit the function body, which is identified as a null loop.
- if (LoopFixer(MF, MLI, nullptr).run()) {
- return true;
- }
-
- // Visit all the loops.
- SmallVector<MachineLoop *, 8> Worklist(MLI.begin(), MLI.end());
- while (!Worklist.empty()) {
- MachineLoop *Loop = Worklist.pop_back_val();
- Worklist.append(Loop->begin(), Loop->end());
- if (LoopFixer(MF, MLI, Loop).run()) {
- return true;
- }
- }
-
- return false;
- }
-
-public:
- static char ID; // Pass identification, replacement for typeid
- WebAssemblyFixIrreducibleControlFlow() : MachineFunctionPass(ID) {}
-};
} // end anonymous namespace
char WebAssemblyFixIrreducibleControlFlow::ID = 0;
@@ -407,23 +484,18 @@ bool WebAssemblyFixIrreducibleControlFlow::runOnMachineFunction(
"********** Function: "
<< MF.getName() << '\n');
- bool Changed = false;
- auto &MLI = getAnalysis<MachineLoopInfo>();
-
- // When we modify something, bail out and recompute MLI, then start again, as
- // we create a new natural loop when we resolve irreducible control flow, and
- // other loops may become nested in it, etc. In practice this is not an issue
- // because irreducible control flow is rare, only very few cycles are needed
- // here.
- while (LLVM_UNLIKELY(runIteration(MF, MLI))) {
- // We rewrote part of the function; recompute MLI and start again.
- LLVM_DEBUG(dbgs() << "Recomputing loops.\n");
+ // Start the recursive process on the entire function body.
+ BlockSet AllBlocks;
+ for (auto &MBB : MF) {
+ AllBlocks.insert(&MBB);
+ }
+
+ if (LLVM_UNLIKELY(processRegion(&*MF.begin(), AllBlocks, MF))) {
+ // We rewrote part of the function; recompute relevant things.
MF.getRegInfo().invalidateLiveness();
MF.RenumberBlocks();
- getAnalysis<MachineDominatorTree>().runOnMachineFunction(MF);
- MLI.runOnMachineFunction(MF);
- Changed = true;
+ return true;
}
- return Changed;
+ return false;
}
diff --git a/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp b/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
index 2d5aff28d27b..5299068efdd4 100644
--- a/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
@@ -1,9 +1,8 @@
//===-- WebAssemblyFrameLowering.cpp - WebAssembly Frame Lowering ----------==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -131,7 +130,7 @@ void WebAssemblyFrameLowering::writeSPToGlobal(
const char *ES = "__stack_pointer";
auto *SPSymbol = MF.createExternalSymbolName(ES);
BuildMI(MBB, InsertStore, DL, TII->get(WebAssembly::GLOBAL_SET_I32))
- .addExternalSymbol(SPSymbol, WebAssemblyII::MO_SYMBOL_GLOBAL)
+ .addExternalSymbol(SPSymbol)
.addReg(SrcReg);
}
@@ -165,7 +164,8 @@ void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF,
auto &MRI = MF.getRegInfo();
auto InsertPt = MBB.begin();
- while (InsertPt != MBB.end() && WebAssembly::isArgument(*InsertPt))
+ while (InsertPt != MBB.end() &&
+ WebAssembly::isArgument(InsertPt->getOpcode()))
++InsertPt;
DebugLoc DL;
@@ -178,7 +178,7 @@ void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF,
const char *ES = "__stack_pointer";
auto *SPSymbol = MF.createExternalSymbolName(ES);
BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::GLOBAL_GET_I32), SPReg)
- .addExternalSymbol(SPSymbol, WebAssemblyII::MO_SYMBOL_GLOBAL);
+ .addExternalSymbol(SPSymbol);
bool HasBP = hasBP(MF);
if (HasBP) {
diff --git a/lib/Target/WebAssembly/WebAssemblyFrameLowering.h b/lib/Target/WebAssembly/WebAssemblyFrameLowering.h
index c6fa8261b03f..daddd4ca16ff 100644
--- a/lib/Target/WebAssembly/WebAssemblyFrameLowering.h
+++ b/lib/Target/WebAssembly/WebAssemblyFrameLowering.h
@@ -1,9 +1,8 @@
// WebAssemblyFrameLowering.h - TargetFrameLowering for WebAssembly -*- C++ -*-/
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/lib/Target/WebAssembly/WebAssemblyISD.def b/lib/Target/WebAssembly/WebAssemblyISD.def
index e987d7f7f43a..77217f16a727 100644
--- a/lib/Target/WebAssembly/WebAssemblyISD.def
+++ b/lib/Target/WebAssembly/WebAssemblyISD.def
@@ -1,9 +1,8 @@
//- WebAssemblyISD.def - WebAssembly ISD ---------------------------*- C++ -*-//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -16,9 +15,14 @@
HANDLE_NODETYPE(CALL1)
HANDLE_NODETYPE(CALL0)
+HANDLE_NODETYPE(RET_CALL)
HANDLE_NODETYPE(RETURN)
HANDLE_NODETYPE(ARGUMENT)
+// A wrapper node for TargetExternalSymbol, TargetGlobalAddress, and MCSymbol
HANDLE_NODETYPE(Wrapper)
+// A special wapper used in PIC code for __memory_base/__table_base relcative
+// access.
+HANDLE_NODETYPE(WrapperPIC)
HANDLE_NODETYPE(BR_IF)
HANDLE_NODETYPE(BR_TABLE)
HANDLE_NODETYPE(SHUFFLE)
@@ -26,5 +30,7 @@ HANDLE_NODETYPE(VEC_SHL)
HANDLE_NODETYPE(VEC_SHR_S)
HANDLE_NODETYPE(VEC_SHR_U)
HANDLE_NODETYPE(THROW)
+HANDLE_NODETYPE(MEMORY_COPY)
+HANDLE_NODETYPE(MEMORY_FILL)
// add memory opcodes starting at ISD::FIRST_TARGET_MEMORY_OPCODE here...
diff --git a/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp b/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
index 0a7464cedc90..26339eaef37d 100644
--- a/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
@@ -1,9 +1,8 @@
//- WebAssemblyISelDAGToDAG.cpp - A dag to dag inst selector for WebAssembly -//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -16,6 +15,7 @@
#include "WebAssembly.h"
#include "WebAssemblyTargetMachine.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Function.h" // To access function attributes.
#include "llvm/Support/Debug.h"
#include "llvm/Support/KnownBits.h"
@@ -38,9 +38,9 @@ class WebAssemblyDAGToDAGISel final : public SelectionDAGISel {
bool ForCodeSize;
public:
- WebAssemblyDAGToDAGISel(WebAssemblyTargetMachine &tm,
+ WebAssemblyDAGToDAGISel(WebAssemblyTargetMachine &TM,
CodeGenOpt::Level OptLevel)
- : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr), ForCodeSize(false) {
+ : SelectionDAGISel(TM, OptLevel), Subtarget(nullptr), ForCodeSize(false) {
}
StringRef getPassName() const override {
@@ -52,8 +52,7 @@ public:
"********** Function: "
<< MF.getName() << '\n');
- ForCodeSize = MF.getFunction().hasFnAttribute(Attribute::OptimizeForSize) ||
- MF.getFunction().hasFnAttribute(Attribute::MinSize);
+ ForCodeSize = MF.getFunction().hasOptSize();
Subtarget = &MF.getSubtarget<WebAssemblySubtarget>();
return SelectionDAGISel::runOnMachineFunction(MF);
}
@@ -79,14 +78,159 @@ void WebAssemblyDAGToDAGISel::Select(SDNode *Node) {
return;
}
- // Few custom selection stuff. If we need WebAssembly-specific selection,
- // uncomment this block add corresponding case statements.
- /*
+ // Few custom selection stuff.
+ SDLoc DL(Node);
+ MachineFunction &MF = CurDAG->getMachineFunction();
switch (Node->getOpcode()) {
+ case ISD::ATOMIC_FENCE: {
+ if (!MF.getSubtarget<WebAssemblySubtarget>().hasAtomics())
+ break;
+
+ uint64_t SyncScopeID =
+ cast<ConstantSDNode>(Node->getOperand(2).getNode())->getZExtValue();
+ switch (SyncScopeID) {
+ case SyncScope::SingleThread: {
+ // We lower a single-thread fence to a pseudo compiler barrier instruction
+ // preventing instruction reordering. This will not be emitted in final
+ // binary.
+ MachineSDNode *Fence =
+ CurDAG->getMachineNode(WebAssembly::COMPILER_FENCE,
+ DL, // debug loc
+ MVT::Other, // outchain type
+ Node->getOperand(0) // inchain
+ );
+ ReplaceNode(Node, Fence);
+ CurDAG->RemoveDeadNode(Node);
+ return;
+ }
+
+ case SyncScope::System: {
+ // For non-emscripten systems, we have not decided on what we should
+ // traslate fences to yet.
+ if (!Subtarget->getTargetTriple().isOSEmscripten())
+ report_fatal_error(
+ "ATOMIC_FENCE is not yet supported in non-emscripten OSes");
+
+ // Wasm does not have a fence instruction, but because all atomic
+ // instructions in wasm are sequentially consistent, we translate a
+ // fence to an idempotent atomic RMW instruction to a linear memory
+ // address. All atomic instructions in wasm are sequentially consistent,
+ // but this is to ensure a fence also prevents reordering of non-atomic
+ // instructions in the VM. Even though LLVM IR's fence instruction does
+ // not say anything about its relationship with non-atomic instructions,
+ // we think this is more user-friendly.
+ //
+ // While any address can work, here we use a value stored in
+ // __stack_pointer wasm global because there's high chance that area is
+ // in cache.
+ //
+ // So the selected instructions will be in the form of:
+ // %addr = get_global $__stack_pointer
+ // %0 = i32.const 0
+ // i32.atomic.rmw.or %addr, %0
+ SDValue StackPtrSym = CurDAG->getTargetExternalSymbol(
+ "__stack_pointer", TLI->getPointerTy(CurDAG->getDataLayout()));
+ MachineSDNode *GetGlobal =
+ CurDAG->getMachineNode(WebAssembly::GLOBAL_GET_I32, // opcode
+ DL, // debug loc
+ MVT::i32, // result type
+ StackPtrSym // __stack_pointer symbol
+ );
+
+ SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
+ auto *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo::getUnknownStack(MF),
+ // FIXME Volatile isn't really correct, but currently all LLVM
+ // atomic instructions are treated as volatiles in the backend, so
+ // we should be consistent.
+ MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad |
+ MachineMemOperand::MOStore,
+ 4, 4, AAMDNodes(), nullptr, SyncScope::System,
+ AtomicOrdering::SequentiallyConsistent);
+ MachineSDNode *Const0 =
+ CurDAG->getMachineNode(WebAssembly::CONST_I32, DL, MVT::i32, Zero);
+ MachineSDNode *AtomicRMW = CurDAG->getMachineNode(
+ WebAssembly::ATOMIC_RMW_OR_I32, // opcode
+ DL, // debug loc
+ MVT::i32, // result type
+ MVT::Other, // outchain type
+ {
+ Zero, // alignment
+ Zero, // offset
+ SDValue(GetGlobal, 0), // __stack_pointer
+ SDValue(Const0, 0), // OR with 0 to make it idempotent
+ Node->getOperand(0) // inchain
+ });
+
+ CurDAG->setNodeMemRefs(AtomicRMW, {MMO});
+ ReplaceUses(SDValue(Node, 0), SDValue(AtomicRMW, 1));
+ CurDAG->RemoveDeadNode(Node);
+ return;
+ }
+ default:
+ llvm_unreachable("Unknown scope!");
+ }
+ }
+
+ case ISD::GlobalTLSAddress: {
+ const auto *GA = cast<GlobalAddressSDNode>(Node);
+
+ if (!MF.getSubtarget<WebAssemblySubtarget>().hasBulkMemory())
+ report_fatal_error("cannot use thread-local storage without bulk memory",
+ false);
+
+ // Currently Emscripten does not support dynamic linking with threads.
+ // Therefore, if we have thread-local storage, only the local-exec model
+ // is possible.
+ // TODO: remove this and implement proper TLS models once Emscripten
+ // supports dynamic linking with threads.
+ if (GA->getGlobal()->getThreadLocalMode() !=
+ GlobalValue::LocalExecTLSModel &&
+ !Subtarget->getTargetTriple().isOSEmscripten()) {
+ report_fatal_error("only -ftls-model=local-exec is supported for now on "
+ "non-Emscripten OSes: variable " +
+ GA->getGlobal()->getName(),
+ false);
+ }
+
+ MVT PtrVT = TLI->getPointerTy(CurDAG->getDataLayout());
+ assert(PtrVT == MVT::i32 && "only wasm32 is supported for now");
+
+ SDValue TLSBaseSym = CurDAG->getTargetExternalSymbol("__tls_base", PtrVT);
+ SDValue TLSOffsetSym = CurDAG->getTargetGlobalAddress(
+ GA->getGlobal(), DL, PtrVT, GA->getOffset(), 0);
+
+ MachineSDNode *TLSBase = CurDAG->getMachineNode(WebAssembly::GLOBAL_GET_I32,
+ DL, MVT::i32, TLSBaseSym);
+ MachineSDNode *TLSOffset = CurDAG->getMachineNode(
+ WebAssembly::CONST_I32, DL, MVT::i32, TLSOffsetSym);
+ MachineSDNode *TLSAddress =
+ CurDAG->getMachineNode(WebAssembly::ADD_I32, DL, MVT::i32,
+ SDValue(TLSBase, 0), SDValue(TLSOffset, 0));
+ ReplaceNode(Node, TLSAddress);
+ return;
+ }
+
+ case ISD::INTRINSIC_WO_CHAIN: {
+ unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
+ switch (IntNo) {
+ case Intrinsic::wasm_tls_size: {
+ MVT PtrVT = TLI->getPointerTy(CurDAG->getDataLayout());
+ assert(PtrVT == MVT::i32 && "only wasm32 is supported for now");
+
+ MachineSDNode *TLSSize = CurDAG->getMachineNode(
+ WebAssembly::GLOBAL_GET_I32, DL, PtrVT,
+ CurDAG->getTargetExternalSymbol("__tls_size", MVT::i32));
+ ReplaceNode(Node, TLSSize);
+ return;
+ }
+ }
+ break;
+ }
+
default:
break;
}
- */
// Select the default instruction.
SelectCode(Node);
diff --git a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 003848e34227..4064a983099c 100644
--- a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -1,9 +1,8 @@
//=- WebAssemblyISelLowering.cpp - WebAssembly DAG Lowering Implementation -==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -46,9 +45,6 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
setBooleanContents(ZeroOrOneBooleanContent);
// Except in SIMD vectors
setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
- // WebAssembly does not produce floating-point exceptions on normal floating
- // point operations.
- setHasFloatingPointExceptions(false);
// We don't know the microarchitecture here, so just reduce register pressure.
setSchedulingPreference(Sched::RegPressure);
// Tell ISel that we have a stack pointer.
@@ -64,10 +60,10 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
addRegisterClass(MVT::v8i16, &WebAssembly::V128RegClass);
addRegisterClass(MVT::v4i32, &WebAssembly::V128RegClass);
addRegisterClass(MVT::v4f32, &WebAssembly::V128RegClass);
- if (Subtarget->hasUnimplementedSIMD128()) {
- addRegisterClass(MVT::v2i64, &WebAssembly::V128RegClass);
- addRegisterClass(MVT::v2f64, &WebAssembly::V128RegClass);
- }
+ }
+ if (Subtarget->hasUnimplementedSIMD128()) {
+ addRegisterClass(MVT::v2i64, &WebAssembly::V128RegClass);
+ addRegisterClass(MVT::v2f64, &WebAssembly::V128RegClass);
}
// Compute derived properties from the register classes.
computeRegisterProperties(Subtarget->getRegisterInfo());
@@ -111,56 +107,62 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
setTruncStoreAction(T, MVT::f16, Expand);
}
- // Support saturating add for i8x16 and i16x8
- if (Subtarget->hasSIMD128())
- for (auto T : {MVT::v16i8, MVT::v8i16})
- for (auto Op : {ISD::SADDSAT, ISD::UADDSAT})
- setOperationAction(Op, T, Legal);
-
// Expand unavailable integer operations.
for (auto Op :
{ISD::BSWAP, ISD::SMUL_LOHI, ISD::UMUL_LOHI, ISD::MULHS, ISD::MULHU,
ISD::SDIVREM, ISD::UDIVREM, ISD::SHL_PARTS, ISD::SRA_PARTS,
ISD::SRL_PARTS, ISD::ADDC, ISD::ADDE, ISD::SUBC, ISD::SUBE}) {
- for (auto T : {MVT::i32, MVT::i64}) {
+ for (auto T : {MVT::i32, MVT::i64})
setOperationAction(Op, T, Expand);
- }
- if (Subtarget->hasSIMD128()) {
- for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32}) {
+ if (Subtarget->hasSIMD128())
+ for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
setOperationAction(Op, T, Expand);
- }
- if (Subtarget->hasUnimplementedSIMD128()) {
- setOperationAction(Op, MVT::v2i64, Expand);
- }
- }
+ if (Subtarget->hasUnimplementedSIMD128())
+ setOperationAction(Op, MVT::v2i64, Expand);
}
- // There is no i64x2.mul instruction
- setOperationAction(ISD::MUL, MVT::v2i64, Expand);
-
- // We have custom shuffle lowering to expose the shuffle mask
+ // SIMD-specific configuration
if (Subtarget->hasSIMD128()) {
- for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32}) {
+ // Support saturating add for i8x16 and i16x8
+ for (auto Op : {ISD::SADDSAT, ISD::UADDSAT})
+ for (auto T : {MVT::v16i8, MVT::v8i16})
+ setOperationAction(Op, T, Legal);
+
+ // Custom lower BUILD_VECTORs to minimize number of replace_lanes
+ for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32})
+ setOperationAction(ISD::BUILD_VECTOR, T, Custom);
+ if (Subtarget->hasUnimplementedSIMD128())
+ for (auto T : {MVT::v2i64, MVT::v2f64})
+ setOperationAction(ISD::BUILD_VECTOR, T, Custom);
+
+ // We have custom shuffle lowering to expose the shuffle mask
+ for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32})
setOperationAction(ISD::VECTOR_SHUFFLE, T, Custom);
- }
- if (Subtarget->hasUnimplementedSIMD128()) {
- setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom);
- }
- }
+ if (Subtarget->hasUnimplementedSIMD128())
+ for (auto T: {MVT::v2i64, MVT::v2f64})
+ setOperationAction(ISD::VECTOR_SHUFFLE, T, Custom);
- // Custom lowering since wasm shifts must have a scalar shift amount
- if (Subtarget->hasSIMD128()) {
- for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
- for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL})
+ // Custom lowering since wasm shifts must have a scalar shift amount
+ for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL}) {
+ for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
setOperationAction(Op, T, Custom);
- if (Subtarget->hasUnimplementedSIMD128())
- for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL})
+ if (Subtarget->hasUnimplementedSIMD128())
setOperationAction(Op, MVT::v2i64, Custom);
- }
+ }
- // There are no select instructions for vectors
- if (Subtarget->hasSIMD128())
+ // Custom lower lane accesses to expand out variable indices
+ for (auto Op : {ISD::EXTRACT_VECTOR_ELT, ISD::INSERT_VECTOR_ELT}) {
+ for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32})
+ setOperationAction(Op, T, Custom);
+ if (Subtarget->hasUnimplementedSIMD128())
+ for (auto T : {MVT::v2i64, MVT::v2f64})
+ setOperationAction(Op, T, Custom);
+ }
+
+ // There is no i64x2.mul instruction
+ setOperationAction(ISD::MUL, MVT::v2i64, Expand);
+
+ // There are no vector select instructions
for (auto Op : {ISD::VSELECT, ISD::SELECT_CC, ISD::SELECT}) {
for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32})
setOperationAction(Op, T, Expand);
@@ -169,6 +171,31 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
setOperationAction(Op, T, Expand);
}
+ // Expand integer operations supported for scalars but not SIMD
+ for (auto Op : {ISD::CTLZ, ISD::CTTZ, ISD::CTPOP, ISD::SDIV, ISD::UDIV,
+ ISD::SREM, ISD::UREM, ISD::ROTL, ISD::ROTR}) {
+ for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
+ setOperationAction(Op, T, Expand);
+ if (Subtarget->hasUnimplementedSIMD128())
+ setOperationAction(Op, MVT::v2i64, Expand);
+ }
+
+ // Expand float operations supported for scalars but not SIMD
+ for (auto Op : {ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FNEARBYINT,
+ ISD::FCOPYSIGN, ISD::FLOG, ISD::FLOG2, ISD::FLOG10,
+ ISD::FEXP, ISD::FEXP2, ISD::FRINT}) {
+ setOperationAction(Op, MVT::v4f32, Expand);
+ if (Subtarget->hasUnimplementedSIMD128())
+ setOperationAction(Op, MVT::v2f64, Expand);
+ }
+
+ // Expand additional SIMD ops that V8 hasn't implemented yet
+ if (!Subtarget->hasUnimplementedSIMD128()) {
+ setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
+ setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
+ }
+ }
+
// As a special case, these operators use the type to mean the type to
// sign-extend from.
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
@@ -220,25 +247,8 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
}
}
- // Expand additional SIMD ops that V8 hasn't implemented yet
- if (Subtarget->hasSIMD128() && !Subtarget->hasUnimplementedSIMD128()) {
- setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
- setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
- }
-
- // Custom lower lane accesses to expand out variable indices
- if (Subtarget->hasSIMD128()) {
- for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32}) {
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, T, Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, T, Custom);
- }
- if (Subtarget->hasUnimplementedSIMD128()) {
- for (auto T : {MVT::v2i64, MVT::v2f64}) {
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, T, Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, T, Custom);
- }
- }
- }
+ // Don't do anything clever with build_pairs
+ setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
// Trap lowers to wasm unreachable
setOperationAction(ISD::TRAP, MVT::Other, Legal);
@@ -248,6 +258,31 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
setMaxAtomicSizeInBitsSupported(64);
+
+ if (Subtarget->hasBulkMemory()) {
+ // Use memory.copy and friends over multiple loads and stores
+ MaxStoresPerMemcpy = 1;
+ MaxStoresPerMemcpyOptSize = 1;
+ MaxStoresPerMemmove = 1;
+ MaxStoresPerMemmoveOptSize = 1;
+ MaxStoresPerMemset = 1;
+ MaxStoresPerMemsetOptSize = 1;
+ }
+
+ // Override the __gnu_f2h_ieee/__gnu_h2f_ieee names so that the f32 name is
+ // consistent with the f64 and f128 names.
+ setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
+ setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
+
+ // Define the emscripten name for return address helper.
+ // TODO: when implementing other WASM backends, make this generic or only do
+ // this on emscripten depending on what they end up doing.
+ setLibcallName(RTLIB::RETURN_ADDRESS, "emscripten_return_address");
+
+ // Always convert switches to br_tables unless there is only one case, which
+ // is equivalent to a simple branch. This reduces code size for wasm, and we
+ // defer possible jump table optimizations to the VM.
+ setMinimumJumpTableEntries(2);
}
TargetLowering::AtomicExpansionKind
@@ -272,12 +307,6 @@ FastISel *WebAssemblyTargetLowering::createFastISel(
return WebAssembly::createFastISel(FuncInfo, LibInfo);
}
-bool WebAssemblyTargetLowering::isOffsetFoldingLegal(
- const GlobalAddressSDNode * /*GA*/) const {
- // All offsets can be folded.
- return true;
-}
-
MVT WebAssemblyTargetLowering::getScalarShiftAmountTy(const DataLayout & /*DL*/,
EVT VT) const {
unsigned BitWidth = NextPowerOf2(VT.getSizeInBits() - 1);
@@ -324,11 +353,11 @@ static MachineBasicBlock *LowerFPToInt(MachineInstr &MI, DebugLoc DL,
auto &Context = BB->getParent()->getFunction().getContext();
Type *Ty = Float64 ? Type::getDoubleTy(Context) : Type::getFloatTy(Context);
- const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ const BasicBlock *LLVMBB = BB->getBasicBlock();
MachineFunction *F = BB->getParent();
- MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB);
+ MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(LLVMBB);
+ MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB);
MachineFunction::iterator It = ++BB->getIterator();
F->insert(It, FalseMBB);
@@ -336,8 +365,7 @@ static MachineBasicBlock *LowerFPToInt(MachineInstr &MI, DebugLoc DL,
F->insert(It, DoneMBB);
// Transfer the remainder of BB and its successor edges to DoneMBB.
- DoneMBB->splice(DoneMBB->begin(), BB,
- std::next(MachineBasicBlock::iterator(MI)), BB->end());
+ DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end());
DoneMBB->transferSuccessorsAndUpdatePHIs(BB);
BB->addSuccessor(TrueMBB);
@@ -502,7 +530,8 @@ bool WebAssemblyTargetLowering::isLegalAddressingMode(const DataLayout &DL,
}
bool WebAssemblyTargetLowering::allowsMisalignedMemoryAccesses(
- EVT /*VT*/, unsigned /*AddrSpace*/, unsigned /*Align*/, bool *Fast) const {
+ EVT /*VT*/, unsigned /*AddrSpace*/, unsigned /*Align*/,
+ MachineMemOperand::Flags /*Flags*/, bool *Fast) const {
// WebAssembly supports unaligned accesses, though it should be declared
// with the p2align attribute on loads and stores which do so, and there
// may be a performance impact. We tell LLVM they're "fast" because
@@ -578,14 +607,14 @@ bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
// Lowering Code
//===----------------------------------------------------------------------===//
-static void fail(const SDLoc &DL, SelectionDAG &DAG, const char *msg) {
+static void fail(const SDLoc &DL, SelectionDAG &DAG, const char *Msg) {
MachineFunction &MF = DAG.getMachineFunction();
DAG.getContext()->diagnose(
- DiagnosticInfoUnsupported(MF.getFunction(), msg, DL.getDebugLoc()));
+ DiagnosticInfoUnsupported(MF.getFunction(), Msg, DL.getDebugLoc()));
}
// Test whether the given calling convention is supported.
-static bool CallingConvSupported(CallingConv::ID CallConv) {
+static bool callingConvSupported(CallingConv::ID CallConv) {
// We currently support the language-independent target-independent
// conventions. We don't yet have a way to annotate calls with properties like
// "cold", and we don't have any call-clobbered registers, so these are mostly
@@ -608,20 +637,21 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,
auto Layout = MF.getDataLayout();
CallingConv::ID CallConv = CLI.CallConv;
- if (!CallingConvSupported(CallConv))
+ if (!callingConvSupported(CallConv))
fail(DL, DAG,
"WebAssembly doesn't support language-specific or target-specific "
"calling conventions yet");
if (CLI.IsPatchPoint)
fail(DL, DAG, "WebAssembly doesn't support patch point yet");
- // WebAssembly doesn't currently support explicit tail calls. If they are
- // required, fail. Otherwise, just disable them.
- if ((CallConv == CallingConv::Fast && CLI.IsTailCall &&
- MF.getTarget().Options.GuaranteedTailCallOpt) ||
- (CLI.CS && CLI.CS.isMustTailCall()))
- fail(DL, DAG, "WebAssembly doesn't support tail call yet");
- CLI.IsTailCall = false;
+ // Fail if tail calls are required but not enabled
+ if (!Subtarget->hasTailCall()) {
+ if ((CallConv == CallingConv::Fast && CLI.IsTailCall &&
+ MF.getTarget().Options.GuaranteedTailCallOpt) ||
+ (CLI.CS && CLI.CS.isMustTailCall()))
+ fail(DL, DAG, "WebAssembly 'tail-call' feature not enabled");
+ CLI.IsTailCall = false;
+ }
SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
if (Ins.size() > 1)
@@ -630,9 +660,9 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,
SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
unsigned NumFixedArgs = 0;
- for (unsigned i = 0; i < Outs.size(); ++i) {
- const ISD::OutputArg &Out = Outs[i];
- SDValue &OutVal = OutVals[i];
+ for (unsigned I = 0; I < Outs.size(); ++I) {
+ const ISD::OutputArg &Out = Outs[I];
+ SDValue &OutVal = OutVals[I];
if (Out.Flags.isNest())
fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
if (Out.Flags.isInAlloca())
@@ -669,13 +699,16 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,
if (IsVarArg) {
// Outgoing non-fixed arguments are placed in a buffer. First
// compute their offsets and the total amount of buffer space needed.
- for (SDValue Arg :
- make_range(OutVals.begin() + NumFixedArgs, OutVals.end())) {
+ for (unsigned I = NumFixedArgs; I < Outs.size(); ++I) {
+ const ISD::OutputArg &Out = Outs[I];
+ SDValue &Arg = OutVals[I];
EVT VT = Arg.getValueType();
assert(VT != MVT::iPTR && "Legalized args should be concrete");
Type *Ty = VT.getTypeForEVT(*DAG.getContext());
+ unsigned Align = std::max(Out.Flags.getOrigAlign(),
+ Layout.getABITypeAlignment(Ty));
unsigned Offset = CCInfo.AllocateStack(Layout.getTypeAllocSize(Ty),
- Layout.getABITypeAlignment(Ty));
+ Align);
CCInfo.addLoc(CCValAssign::getMem(ArgLocs.size(), VT.getSimpleVT(),
Offset, VT.getSimpleVT(),
CCValAssign::Full));
@@ -711,6 +744,18 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,
FINode = DAG.getIntPtrConstant(0, DL);
}
+ if (Callee->getOpcode() == ISD::GlobalAddress) {
+ // If the callee is a GlobalAddress node (quite common, every direct call
+ // is) turn it into a TargetGlobalAddress node so that LowerGlobalAddress
+ // doesn't at MO_GOT which is not needed for direct calls.
+ GlobalAddressSDNode* GA = cast<GlobalAddressSDNode>(Callee);
+ Callee = DAG.getTargetGlobalAddress(GA->getGlobal(), DL,
+ getPointerTy(DAG.getDataLayout()),
+ GA->getOffset());
+ Callee = DAG.getNode(WebAssemblyISD::Wrapper, DL,
+ getPointerTy(DAG.getDataLayout()), Callee);
+ }
+
// Compute the operands for the CALLn node.
SmallVector<SDValue, 16> Ops;
Ops.push_back(Chain);
@@ -739,6 +784,13 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,
// registers.
InTys.push_back(In.VT);
}
+
+ if (CLI.IsTailCall) {
+ // ret_calls do not return values to the current frame
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ return DAG.getNode(WebAssemblyISD::RET_CALL, DL, NodeTys, Ops);
+ }
+
InTys.push_back(MVT::Other);
SDVTList InTyList = DAG.getVTList(InTys);
SDValue Res =
@@ -768,7 +820,7 @@ SDValue WebAssemblyTargetLowering::LowerReturn(
const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
SelectionDAG &DAG) const {
assert(Outs.size() <= 1 && "WebAssembly can only return up to one value");
- if (!CallingConvSupported(CallConv))
+ if (!callingConvSupported(CallConv))
fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
SmallVector<SDValue, 4> RetOps(1, Chain);
@@ -795,7 +847,7 @@ SDValue WebAssemblyTargetLowering::LowerFormalArguments(
SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
- if (!CallingConvSupported(CallConv))
+ if (!callingConvSupported(CallConv))
fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
MachineFunction &MF = DAG.getMachineFunction();
@@ -842,7 +894,7 @@ SDValue WebAssemblyTargetLowering::LowerFormalArguments(
// Record the number and types of arguments and results.
SmallVector<MVT, 4> Params;
SmallVector<MVT, 4> Results;
- ComputeSignatureVTs(MF.getFunction().getFunctionType(), MF.getFunction(),
+ computeSignatureVTs(MF.getFunction().getFunctionType(), MF.getFunction(),
DAG.getTarget(), Params, Results);
for (MVT VT : Results)
MFI->addResult(VT);
@@ -855,6 +907,21 @@ SDValue WebAssemblyTargetLowering::LowerFormalArguments(
return Chain;
}
+void WebAssemblyTargetLowering::ReplaceNodeResults(
+ SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
+ switch (N->getOpcode()) {
+ case ISD::SIGN_EXTEND_INREG:
+ // Do not add any results, signifying that N should not be custom lowered
+ // after all. This happens because simd128 turns on custom lowering for
+ // SIGN_EXTEND_INREG, but for non-vector sign extends the result might be an
+ // illegal type.
+ break;
+ default:
+ llvm_unreachable(
+ "ReplaceNodeResults not implemented for this op for WebAssembly!");
+ }
+}
+
//===----------------------------------------------------------------------===//
// Custom lowering hooks.
//===----------------------------------------------------------------------===//
@@ -882,22 +949,23 @@ SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op,
case ISD::BRIND:
fail(DL, DAG, "WebAssembly hasn't implemented computed gotos");
return SDValue();
- case ISD::RETURNADDR: // Probably nothing meaningful can be returned here.
- fail(DL, DAG, "WebAssembly hasn't implemented __builtin_return_address");
- return SDValue();
+ case ISD::RETURNADDR:
+ return LowerRETURNADDR(Op, DAG);
case ISD::FRAMEADDR:
return LowerFRAMEADDR(Op, DAG);
case ISD::CopyToReg:
return LowerCopyToReg(Op, DAG);
- case ISD::INTRINSIC_WO_CHAIN:
- return LowerINTRINSIC_WO_CHAIN(Op, DAG);
case ISD::EXTRACT_VECTOR_ELT:
case ISD::INSERT_VECTOR_ELT:
return LowerAccessVectorElement(Op, DAG);
case ISD::INTRINSIC_VOID:
- return LowerINTRINSIC_VOID(Op, DAG);
+ case ISD::INTRINSIC_WO_CHAIN:
+ case ISD::INTRINSIC_W_CHAIN:
+ return LowerIntrinsic(Op, DAG);
case ISD::SIGN_EXTEND_INREG:
return LowerSIGN_EXTEND_INREG(Op, DAG);
+ case ISD::BUILD_VECTOR:
+ return LowerBUILD_VECTOR(Op, DAG);
case ISD::VECTOR_SHUFFLE:
return LowerVECTOR_SHUFFLE(Op, DAG);
case ISD::SHL:
@@ -939,6 +1007,26 @@ SDValue WebAssemblyTargetLowering::LowerFrameIndex(SDValue Op,
return DAG.getTargetFrameIndex(FI, Op.getValueType());
}
+SDValue WebAssemblyTargetLowering::LowerRETURNADDR(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+
+ if (!Subtarget->getTargetTriple().isOSEmscripten()) {
+ fail(DL, DAG,
+ "Non-Emscripten WebAssembly hasn't implemented "
+ "__builtin_return_address");
+ return SDValue();
+ }
+
+ if (verifyReturnAddressArgumentIsConstant(Op, DAG))
+ return SDValue();
+
+ unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ return makeLibCall(DAG, RTLIB::RETURN_ADDRESS, Op.getValueType(),
+ {DAG.getConstant(Depth, DL, MVT::i32)}, false, DL)
+ .first;
+}
+
SDValue WebAssemblyTargetLowering::LowerFRAMEADDR(SDValue Op,
SelectionDAG &DAG) const {
// Non-zero depths are not supported by WebAssembly currently. Use the
@@ -963,9 +1051,40 @@ SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op,
"Unexpected target flags on generic GlobalAddressSDNode");
if (GA->getAddressSpace() != 0)
fail(DL, DAG, "WebAssembly only expects the 0 address space");
- return DAG.getNode(
- WebAssemblyISD::Wrapper, DL, VT,
- DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, GA->getOffset()));
+
+ unsigned OperandFlags = 0;
+ if (isPositionIndependent()) {
+ const GlobalValue *GV = GA->getGlobal();
+ if (getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV)) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MVT PtrVT = getPointerTy(MF.getDataLayout());
+ const char *BaseName;
+ if (GV->getValueType()->isFunctionTy()) {
+ BaseName = MF.createExternalSymbolName("__table_base");
+ OperandFlags = WebAssemblyII::MO_TABLE_BASE_REL;
+ }
+ else {
+ BaseName = MF.createExternalSymbolName("__memory_base");
+ OperandFlags = WebAssemblyII::MO_MEMORY_BASE_REL;
+ }
+ SDValue BaseAddr =
+ DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT,
+ DAG.getTargetExternalSymbol(BaseName, PtrVT));
+
+ SDValue SymAddr = DAG.getNode(
+ WebAssemblyISD::WrapperPIC, DL, VT,
+ DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, GA->getOffset(),
+ OperandFlags));
+
+ return DAG.getNode(ISD::ADD, DL, VT, BaseAddr, SymAddr);
+ } else {
+ OperandFlags = WebAssemblyII::MO_GOT;
+ }
+ }
+
+ return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
+ DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT,
+ GA->getOffset(), OperandFlags));
}
SDValue
@@ -976,15 +1095,8 @@ WebAssemblyTargetLowering::LowerExternalSymbol(SDValue Op,
EVT VT = Op.getValueType();
assert(ES->getTargetFlags() == 0 &&
"Unexpected target flags on generic ExternalSymbolSDNode");
- // Set the TargetFlags to 0x1 which indicates that this is a "function"
- // symbol rather than a data symbol. We do this unconditionally even though
- // we don't know anything about the symbol other than its name, because all
- // external symbols used in target-independent SelectionDAG code are for
- // functions.
- return DAG.getNode(
- WebAssemblyISD::Wrapper, DL, VT,
- DAG.getTargetExternalSymbol(ES->getSymbol(), VT,
- WebAssemblyII::MO_SYMBOL_FUNCTION));
+ return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
+ DAG.getTargetExternalSymbol(ES->getSymbol(), VT));
}
SDValue WebAssemblyTargetLowering::LowerJumpTable(SDValue Op,
@@ -1038,17 +1150,28 @@ SDValue WebAssemblyTargetLowering::LowerVASTART(SDValue Op,
MachinePointerInfo(SV), 0);
}
-SDValue
-WebAssemblyTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
- SelectionDAG &DAG) const {
- unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+SDValue WebAssemblyTargetLowering::LowerIntrinsic(SDValue Op,
+ SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ unsigned IntNo;
+ switch (Op.getOpcode()) {
+ case ISD::INTRINSIC_VOID:
+ case ISD::INTRINSIC_W_CHAIN:
+ IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ break;
+ case ISD::INTRINSIC_WO_CHAIN:
+ IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ break;
+ default:
+ llvm_unreachable("Invalid intrinsic");
+ }
SDLoc DL(Op);
+
switch (IntNo) {
default:
- return {}; // Don't custom lower most intrinsics.
+ return SDValue(); // Don't custom lower most intrinsics.
case Intrinsic::wasm_lsda: {
- MachineFunction &MF = DAG.getMachineFunction();
EVT VT = Op.getValueType();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
@@ -1058,43 +1181,24 @@ WebAssemblyTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
DAG.getMCSymbol(S, PtrVT));
}
- }
-}
-
-SDValue
-WebAssemblyTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
- SelectionDAG &DAG) const {
- MachineFunction &MF = DAG.getMachineFunction();
- unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
- SDLoc DL(Op);
-
- switch (IntNo) {
- default:
- return {}; // Don't custom lower most intrinsics.
case Intrinsic::wasm_throw: {
+ // We only support C++ exceptions for now
int Tag = cast<ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
- switch (Tag) {
- case CPP_EXCEPTION: {
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
- const char *SymName = MF.createExternalSymbolName("__cpp_exception");
- SDValue SymNode =
- DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT,
- DAG.getTargetExternalSymbol(
- SymName, PtrVT, WebAssemblyII::MO_SYMBOL_EVENT));
- return DAG.getNode(WebAssemblyISD::THROW, DL,
- MVT::Other, // outchain type
- {
- Op.getOperand(0), // inchain
- SymNode, // exception symbol
- Op.getOperand(3) // thrown value
- });
- }
- default:
+ if (Tag != CPP_EXCEPTION)
llvm_unreachable("Invalid tag!");
- }
- break;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
+ const char *SymName = MF.createExternalSymbolName("__cpp_exception");
+ SDValue SymNode = DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT,
+ DAG.getTargetExternalSymbol(SymName, PtrVT));
+ return DAG.getNode(WebAssemblyISD::THROW, DL,
+ MVT::Other, // outchain type
+ {
+ Op.getOperand(0), // inchain
+ SymNode, // exception symbol
+ Op.getOperand(3) // thrown value
+ });
}
}
}
@@ -1102,6 +1206,7 @@ WebAssemblyTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
SDValue
WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
SelectionDAG &DAG) const {
+ SDLoc DL(Op);
// If sign extension operations are disabled, allow sext_inreg only if operand
// is a vector extract. SIMD does not depend on sign extension operations, but
// allowing sext_inreg in this context lets us have simple patterns to select
@@ -1109,12 +1214,136 @@ WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
// simpler in this file, but would necessitate large and brittle patterns to
// undo the expansion and select extract_lane_s instructions.
assert(!Subtarget->hasSignExt() && Subtarget->hasSIMD128());
- if (Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT)
- return Op;
+ if (Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
+ const SDValue &Extract = Op.getOperand(0);
+ MVT VecT = Extract.getOperand(0).getSimpleValueType();
+ MVT ExtractedLaneT = static_cast<VTSDNode *>(Op.getOperand(1).getNode())
+ ->getVT()
+ .getSimpleVT();
+ MVT ExtractedVecT =
+ MVT::getVectorVT(ExtractedLaneT, 128 / ExtractedLaneT.getSizeInBits());
+ if (ExtractedVecT == VecT)
+ return Op;
+ // Bitcast vector to appropriate type to ensure ISel pattern coverage
+ const SDValue &Index = Extract.getOperand(1);
+ unsigned IndexVal =
+ static_cast<ConstantSDNode *>(Index.getNode())->getZExtValue();
+ unsigned Scale =
+ ExtractedVecT.getVectorNumElements() / VecT.getVectorNumElements();
+ assert(Scale > 1);
+ SDValue NewIndex =
+ DAG.getConstant(IndexVal * Scale, DL, Index.getValueType());
+ SDValue NewExtract = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, DL, Extract.getValueType(),
+ DAG.getBitcast(ExtractedVecT, Extract.getOperand(0)), NewIndex);
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Op.getValueType(),
+ NewExtract, Op.getOperand(1));
+ }
// Otherwise expand
return SDValue();
}
+SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ const EVT VecT = Op.getValueType();
+ const EVT LaneT = Op.getOperand(0).getValueType();
+ const size_t Lanes = Op.getNumOperands();
+ auto IsConstant = [](const SDValue &V) {
+ return V.getOpcode() == ISD::Constant || V.getOpcode() == ISD::ConstantFP;
+ };
+
+ // Find the most common operand, which is approximately the best to splat
+ using Entry = std::pair<SDValue, size_t>;
+ SmallVector<Entry, 16> ValueCounts;
+ size_t NumConst = 0, NumDynamic = 0;
+ for (const SDValue &Lane : Op->op_values()) {
+ if (Lane.isUndef()) {
+ continue;
+ } else if (IsConstant(Lane)) {
+ NumConst++;
+ } else {
+ NumDynamic++;
+ }
+ auto CountIt = std::find_if(ValueCounts.begin(), ValueCounts.end(),
+ [&Lane](Entry A) { return A.first == Lane; });
+ if (CountIt == ValueCounts.end()) {
+ ValueCounts.emplace_back(Lane, 1);
+ } else {
+ CountIt->second++;
+ }
+ }
+ auto CommonIt =
+ std::max_element(ValueCounts.begin(), ValueCounts.end(),
+ [](Entry A, Entry B) { return A.second < B.second; });
+ assert(CommonIt != ValueCounts.end() && "Unexpected all-undef build_vector");
+ SDValue SplatValue = CommonIt->first;
+ size_t NumCommon = CommonIt->second;
+
+ // If v128.const is available, consider using it instead of a splat
+ if (Subtarget->hasUnimplementedSIMD128()) {
+ // {i32,i64,f32,f64}.const opcode, and value
+ const size_t ConstBytes = 1 + std::max(size_t(4), 16 / Lanes);
+ // SIMD prefix and opcode
+ const size_t SplatBytes = 2;
+ const size_t SplatConstBytes = SplatBytes + ConstBytes;
+ // SIMD prefix, opcode, and lane index
+ const size_t ReplaceBytes = 3;
+ const size_t ReplaceConstBytes = ReplaceBytes + ConstBytes;
+ // SIMD prefix, v128.const opcode, and 128-bit value
+ const size_t VecConstBytes = 18;
+ // Initial v128.const and a replace_lane for each non-const operand
+ const size_t ConstInitBytes = VecConstBytes + NumDynamic * ReplaceBytes;
+ // Initial splat and all necessary replace_lanes
+ const size_t SplatInitBytes =
+ IsConstant(SplatValue)
+ // Initial constant splat
+ ? (SplatConstBytes +
+ // Constant replace_lanes
+ (NumConst - NumCommon) * ReplaceConstBytes +
+ // Dynamic replace_lanes
+ (NumDynamic * ReplaceBytes))
+ // Initial dynamic splat
+ : (SplatBytes +
+ // Constant replace_lanes
+ (NumConst * ReplaceConstBytes) +
+ // Dynamic replace_lanes
+ (NumDynamic - NumCommon) * ReplaceBytes);
+ if (ConstInitBytes < SplatInitBytes) {
+ // Create build_vector that will lower to initial v128.const
+ SmallVector<SDValue, 16> ConstLanes;
+ for (const SDValue &Lane : Op->op_values()) {
+ if (IsConstant(Lane)) {
+ ConstLanes.push_back(Lane);
+ } else if (LaneT.isFloatingPoint()) {
+ ConstLanes.push_back(DAG.getConstantFP(0, DL, LaneT));
+ } else {
+ ConstLanes.push_back(DAG.getConstant(0, DL, LaneT));
+ }
+ }
+ SDValue Result = DAG.getBuildVector(VecT, DL, ConstLanes);
+ // Add replace_lane instructions for non-const lanes
+ for (size_t I = 0; I < Lanes; ++I) {
+ const SDValue &Lane = Op->getOperand(I);
+ if (!Lane.isUndef() && !IsConstant(Lane))
+ Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecT, Result, Lane,
+ DAG.getConstant(I, DL, MVT::i32));
+ }
+ return Result;
+ }
+ }
+ // Use a splat for the initial vector
+ SDValue Result = DAG.getSplatBuildVector(VecT, DL, SplatValue);
+ // Add replace_lane instructions for other values
+ for (size_t I = 0; I < Lanes; ++I) {
+ const SDValue &Lane = Op->getOperand(I);
+ if (Lane != SplatValue)
+ Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecT, Result, Lane,
+ DAG.getConstant(I, DL, MVT::i32));
+ }
+ return Result;
+}
+
SDValue
WebAssemblyTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
SelectionDAG &DAG) const {
@@ -1131,11 +1360,10 @@ WebAssemblyTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
Ops[OpIdx++] = Op.getOperand(1);
// Expand mask indices to byte indices and materialize them as operands
- for (size_t I = 0, Lanes = Mask.size(); I < Lanes; ++I) {
+ for (int M : Mask) {
for (size_t J = 0; J < LaneBytes; ++J) {
// Lower undefs (represented by -1 in mask) to zero
- uint64_t ByteIndex =
- Mask[I] == -1 ? 0 : (uint64_t)Mask[I] * LaneBytes + J;
+ uint64_t ByteIndex = M == -1 ? 0 : (uint64_t)M * LaneBytes + J;
Ops[OpIdx++] = DAG.getConstant(ByteIndex, DL, MVT::i32);
}
}
@@ -1155,7 +1383,7 @@ WebAssemblyTargetLowering::LowerAccessVectorElement(SDValue Op,
return SDValue();
}
-static SDValue UnrollVectorShift(SDValue Op, SelectionDAG &DAG) {
+static SDValue unrollVectorShift(SDValue Op, SelectionDAG &DAG) {
EVT LaneT = Op.getSimpleValueType().getVectorElementType();
// 32-bit and 64-bit unrolled shifts will have proper semantics
if (LaneT.bitsGE(MVT::i32))
@@ -1190,17 +1418,17 @@ SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op,
// Expand all vector shifts until V8 fixes its implementation
// TODO: remove this once V8 is fixed
if (!Subtarget->hasUnimplementedSIMD128())
- return UnrollVectorShift(Op, DAG);
+ return unrollVectorShift(Op, DAG);
// Unroll non-splat vector shifts
BuildVectorSDNode *ShiftVec;
SDValue SplatVal;
if (!(ShiftVec = dyn_cast<BuildVectorSDNode>(Op.getOperand(1).getNode())) ||
!(SplatVal = ShiftVec->getSplatValue()))
- return UnrollVectorShift(Op, DAG);
+ return unrollVectorShift(Op, DAG);
// All splats except i64x2 const splats are handled by patterns
- ConstantSDNode *SplatConst = dyn_cast<ConstantSDNode>(SplatVal);
+ auto *SplatConst = dyn_cast<ConstantSDNode>(SplatVal);
if (!SplatConst || Op.getSimpleValueType() != MVT::v2i64)
return Op;
diff --git a/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/lib/Target/WebAssembly/WebAssemblyISelLowering.h
index 59f4230ed889..b3c7f3defd5f 100644
--- a/lib/Target/WebAssembly/WebAssemblyISelLowering.h
+++ b/lib/Target/WebAssembly/WebAssemblyISelLowering.h
@@ -1,9 +1,8 @@
//- WebAssemblyISelLowering.h - WebAssembly DAG Lowering Interface -*- C++ -*-//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -47,7 +46,6 @@ private:
AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override;
FastISel *createFastISel(FunctionLoweringInfo &FuncInfo,
const TargetLibraryInfo *LibInfo) const override;
- bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override;
MachineBasicBlock *
EmitInstrWithCustomInserter(MachineInstr &MI,
@@ -62,6 +60,7 @@ private:
unsigned AS,
Instruction *I = nullptr) const override;
bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace, unsigned Align,
+ MachineMemOperand::Flags Flags,
bool *Fast) const override;
bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
@@ -87,9 +86,17 @@ private:
const SDLoc &DL, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const override;
+ void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const override;
+
+ const char *getClearCacheBuiltinName() const override {
+ report_fatal_error("llvm.clear_cache is not supported on wasm");
+ }
+
// Custom lowering hooks.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
@@ -97,9 +104,9 @@ private:
SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCopyToReg(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerIntrinsic(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerAccessVectorElement(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td b/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td
index 5fb8ef90bc43..e85aa57efc42 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td
@@ -1,9 +1,8 @@
// WebAssemblyInstrAtomics.td-WebAssembly Atomic codegen support-*- tablegen -*-
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -12,20 +11,132 @@
///
//===----------------------------------------------------------------------===//
+let UseNamedOperandTable = 1 in
+multiclass ATOMIC_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s,
+ list<dag> pattern_r, string asmstr_r = "",
+ string asmstr_s = "", bits<32> atomic_op = -1> {
+ defm "" : I<oops_r, iops_r, oops_s, iops_s, pattern_r, asmstr_r, asmstr_s,
+ !or(0xfe00, !and(0xff, atomic_op))>,
+ Requires<[HasAtomics]>;
+}
+
+multiclass ATOMIC_NRI<dag oops, dag iops, list<dag> pattern, string asmstr = "",
+ bits<32> atomic_op = -1> {
+ defm "" : NRI<oops, iops, pattern, asmstr,
+ !or(0xfe00, !and(0xff, atomic_op))>,
+ Requires<[HasAtomics]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Atomic wait / notify
+//===----------------------------------------------------------------------===//
+
+let hasSideEffects = 1 in {
+defm ATOMIC_NOTIFY :
+ ATOMIC_I<(outs I32:$dst),
+ (ins P2Align:$p2align, offset32_op:$off, I32:$addr, I32:$count),
+ (outs), (ins P2Align:$p2align, offset32_op:$off), [],
+ "atomic.notify \t$dst, ${off}(${addr})${p2align}, $count",
+ "atomic.notify \t${off}${p2align}", 0x00>;
+let mayLoad = 1 in {
+defm ATOMIC_WAIT_I32 :
+ ATOMIC_I<(outs I32:$dst),
+ (ins P2Align:$p2align, offset32_op:$off, I32:$addr, I32:$exp,
+ I64:$timeout),
+ (outs), (ins P2Align:$p2align, offset32_op:$off), [],
+ "i32.atomic.wait \t$dst, ${off}(${addr})${p2align}, $exp, $timeout",
+ "i32.atomic.wait \t${off}${p2align}", 0x01>;
+defm ATOMIC_WAIT_I64 :
+ ATOMIC_I<(outs I32:$dst),
+ (ins P2Align:$p2align, offset32_op:$off, I32:$addr, I64:$exp,
+ I64:$timeout),
+ (outs), (ins P2Align:$p2align, offset32_op:$off), [],
+ "i64.atomic.wait \t$dst, ${off}(${addr})${p2align}, $exp, $timeout",
+ "i64.atomic.wait \t${off}${p2align}", 0x02>;
+} // mayLoad = 1
+} // hasSideEffects = 1
+
+let Predicates = [HasAtomics] in {
+// Select notifys with no constant offset.
+def NotifyPatNoOffset :
+ Pat<(i32 (int_wasm_atomic_notify I32:$addr, I32:$count)),
+ (ATOMIC_NOTIFY 0, 0, I32:$addr, I32:$count)>;
+
+// Select notifys with a constant offset.
+
+// Pattern with address + immediate offset
+class NotifyPatImmOff<PatFrag operand> :
+ Pat<(i32 (int_wasm_atomic_notify (operand I32:$addr, imm:$off), I32:$count)),
+ (ATOMIC_NOTIFY 0, imm:$off, I32:$addr, I32:$count)>;
+def : NotifyPatImmOff<regPlusImm>;
+def : NotifyPatImmOff<or_is_add>;
+
+def NotifyPatGlobalAddr :
+ Pat<(i32 (int_wasm_atomic_notify (regPlusGA I32:$addr,
+ (WebAssemblywrapper tglobaladdr:$off)),
+ I32:$count)),
+ (ATOMIC_NOTIFY 0, tglobaladdr:$off, I32:$addr, I32:$count)>;
+
+// Select notifys with just a constant offset.
+def NotifyPatOffsetOnly :
+ Pat<(i32 (int_wasm_atomic_notify imm:$off, I32:$count)),
+ (ATOMIC_NOTIFY 0, imm:$off, (CONST_I32 0), I32:$count)>;
+
+def NotifyPatGlobalAddrOffOnly :
+ Pat<(i32 (int_wasm_atomic_notify (WebAssemblywrapper tglobaladdr:$off),
+ I32:$count)),
+ (ATOMIC_NOTIFY 0, tglobaladdr:$off, (CONST_I32 0), I32:$count)>;
+
+// Select waits with no constant offset.
+class WaitPatNoOffset<ValueType ty, Intrinsic kind, NI inst> :
+ Pat<(i32 (kind I32:$addr, ty:$exp, I64:$timeout)),
+ (inst 0, 0, I32:$addr, ty:$exp, I64:$timeout)>;
+def : WaitPatNoOffset<i32, int_wasm_atomic_wait_i32, ATOMIC_WAIT_I32>;
+def : WaitPatNoOffset<i64, int_wasm_atomic_wait_i64, ATOMIC_WAIT_I64>;
+
+// Select waits with a constant offset.
+
+// Pattern with address + immediate offset
+class WaitPatImmOff<ValueType ty, Intrinsic kind, PatFrag operand, NI inst> :
+ Pat<(i32 (kind (operand I32:$addr, imm:$off), ty:$exp, I64:$timeout)),
+ (inst 0, imm:$off, I32:$addr, ty:$exp, I64:$timeout)>;
+def : WaitPatImmOff<i32, int_wasm_atomic_wait_i32, regPlusImm, ATOMIC_WAIT_I32>;
+def : WaitPatImmOff<i32, int_wasm_atomic_wait_i32, or_is_add, ATOMIC_WAIT_I32>;
+def : WaitPatImmOff<i64, int_wasm_atomic_wait_i64, regPlusImm, ATOMIC_WAIT_I64>;
+def : WaitPatImmOff<i64, int_wasm_atomic_wait_i64, or_is_add, ATOMIC_WAIT_I64>;
+
+class WaitPatGlobalAddr<ValueType ty, Intrinsic kind, NI inst> :
+ Pat<(i32 (kind (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)),
+ ty:$exp, I64:$timeout)),
+ (inst 0, tglobaladdr:$off, I32:$addr, ty:$exp, I64:$timeout)>;
+def : WaitPatGlobalAddr<i32, int_wasm_atomic_wait_i32, ATOMIC_WAIT_I32>;
+def : WaitPatGlobalAddr<i64, int_wasm_atomic_wait_i64, ATOMIC_WAIT_I64>;
+
+// Select wait_i32, ATOMIC_WAIT_I32s with just a constant offset.
+class WaitPatOffsetOnly<ValueType ty, Intrinsic kind, NI inst> :
+ Pat<(i32 (kind imm:$off, ty:$exp, I64:$timeout)),
+ (inst 0, imm:$off, (CONST_I32 0), ty:$exp, I64:$timeout)>;
+def : WaitPatOffsetOnly<i32, int_wasm_atomic_wait_i32, ATOMIC_WAIT_I32>;
+def : WaitPatOffsetOnly<i64, int_wasm_atomic_wait_i64, ATOMIC_WAIT_I64>;
+
+class WaitPatGlobalAddrOffOnly<ValueType ty, Intrinsic kind, NI inst> :
+ Pat<(i32 (kind (WebAssemblywrapper tglobaladdr:$off), ty:$exp, I64:$timeout)),
+ (inst 0, tglobaladdr:$off, (CONST_I32 0), ty:$exp, I64:$timeout)>;
+def : WaitPatGlobalAddrOffOnly<i32, int_wasm_atomic_wait_i32, ATOMIC_WAIT_I32>;
+def : WaitPatGlobalAddrOffOnly<i64, int_wasm_atomic_wait_i64, ATOMIC_WAIT_I64>;
+} // Predicates = [HasAtomics]
+
//===----------------------------------------------------------------------===//
// Atomic loads
//===----------------------------------------------------------------------===//
-multiclass ATOMIC_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s,
- list<dag> pattern_r, string asmstr_r = "",
- string asmstr_s = "", bits<32> inst = -1> {
- defm "" : I<oops_r, iops_r, oops_s, iops_s, pattern_r, asmstr_r, asmstr_s,
- inst>,
+multiclass AtomicLoad<WebAssemblyRegClass rc, string name, int atomic_op> {
+ defm "" : WebAssemblyLoad<rc, name, !or(0xfe00, !and(0xff, atomic_op))>,
Requires<[HasAtomics]>;
}
-defm ATOMIC_LOAD_I32 : WebAssemblyLoad<I32, "i32.atomic.load", 0xfe10>;
-defm ATOMIC_LOAD_I64 : WebAssemblyLoad<I64, "i64.atomic.load", 0xfe11>;
+defm ATOMIC_LOAD_I32 : AtomicLoad<I32, "i32.atomic.load", 0x10>;
+defm ATOMIC_LOAD_I64 : AtomicLoad<I64, "i64.atomic.load", 0x11>;
// Select loads with no constant offset.
let Predicates = [HasAtomics] in {
@@ -43,9 +154,6 @@ def : LoadPatImmOff<i64, atomic_load_64, or_is_add, ATOMIC_LOAD_I64>;
def : LoadPatGlobalAddr<i32, atomic_load_32, ATOMIC_LOAD_I32>;
def : LoadPatGlobalAddr<i64, atomic_load_64, ATOMIC_LOAD_I64>;
-def : LoadPatExternalSym<i32, atomic_load_32, ATOMIC_LOAD_I32>;
-def : LoadPatExternalSym<i64, atomic_load_64, ATOMIC_LOAD_I64>;
-
// Select loads with just a constant offset.
def : LoadPatOffsetOnly<i32, atomic_load_32, ATOMIC_LOAD_I32>;
def : LoadPatOffsetOnly<i64, atomic_load_64, ATOMIC_LOAD_I64>;
@@ -53,18 +161,15 @@ def : LoadPatOffsetOnly<i64, atomic_load_64, ATOMIC_LOAD_I64>;
def : LoadPatGlobalAddrOffOnly<i32, atomic_load_32, ATOMIC_LOAD_I32>;
def : LoadPatGlobalAddrOffOnly<i64, atomic_load_64, ATOMIC_LOAD_I64>;
-def : LoadPatExternSymOffOnly<i32, atomic_load_32, ATOMIC_LOAD_I32>;
-def : LoadPatExternSymOffOnly<i64, atomic_load_64, ATOMIC_LOAD_I64>;
-
} // Predicates = [HasAtomics]
// Extending loads. Note that there are only zero-extending atomic loads, no
// sign-extending loads.
-defm ATOMIC_LOAD8_U_I32 : WebAssemblyLoad<I32, "i32.atomic.load8_u", 0xfe12>;
-defm ATOMIC_LOAD16_U_I32 : WebAssemblyLoad<I32, "i32.atomic.load16_u", 0xfe13>;
-defm ATOMIC_LOAD8_U_I64 : WebAssemblyLoad<I64, "i64.atomic.load8_u", 0xfe14>;
-defm ATOMIC_LOAD16_U_I64 : WebAssemblyLoad<I64, "i64.atomic.load16_u", 0xfe15>;
-defm ATOMIC_LOAD32_U_I64 : WebAssemblyLoad<I64, "i64.atomic.load32_u", 0xfe16>;
+defm ATOMIC_LOAD8_U_I32 : AtomicLoad<I32, "i32.atomic.load8_u", 0x12>;
+defm ATOMIC_LOAD16_U_I32 : AtomicLoad<I32, "i32.atomic.load16_u", 0x13>;
+defm ATOMIC_LOAD8_U_I64 : AtomicLoad<I64, "i64.atomic.load8_u", 0x14>;
+defm ATOMIC_LOAD16_U_I64 : AtomicLoad<I64, "i64.atomic.load16_u", 0x15>;
+defm ATOMIC_LOAD32_U_I64 : AtomicLoad<I64, "i64.atomic.load32_u", 0x16>;
// Fragments for extending loads. These are different from regular loads because
// the SDNodes are derived from AtomicSDNode rather than LoadSDNode and
@@ -149,16 +254,6 @@ def : LoadPatGlobalAddr<i32, atomic_load_16, ATOMIC_LOAD16_U_I32>;
def : LoadPatGlobalAddr<i64, sext_aload_8_64, ATOMIC_LOAD8_U_I64>;
def : LoadPatGlobalAddr<i64, sext_aload_16_64, ATOMIC_LOAD16_U_I64>;
-def : LoadPatExternalSym<i32, zext_aload_8_32, ATOMIC_LOAD8_U_I32>;
-def : LoadPatExternalSym<i32, zext_aload_16_32, ATOMIC_LOAD16_U_I32>;
-def : LoadPatExternalSym<i64, zext_aload_8_64, ATOMIC_LOAD8_U_I64>;
-def : LoadPatExternalSym<i64, zext_aload_16_64, ATOMIC_LOAD16_U_I64>;
-def : LoadPatExternalSym<i64, zext_aload_32_64, ATOMIC_LOAD32_U_I64>;
-def : LoadPatExternalSym<i32, atomic_load_8, ATOMIC_LOAD8_U_I32>;
-def : LoadPatExternalSym<i32, atomic_load_16, ATOMIC_LOAD16_U_I32>;
-def : LoadPatExternalSym<i64, sext_aload_8_64, ATOMIC_LOAD8_U_I64>;
-def : LoadPatExternalSym<i64, sext_aload_16_64, ATOMIC_LOAD16_U_I64>;
-
// Extending loads with just a constant offset
def : LoadPatOffsetOnly<i32, zext_aload_8_32, ATOMIC_LOAD8_U_I32>;
def : LoadPatOffsetOnly<i32, zext_aload_16_32, ATOMIC_LOAD16_U_I32>;
@@ -180,24 +275,19 @@ def : LoadPatGlobalAddrOffOnly<i32, atomic_load_16, ATOMIC_LOAD16_U_I32>;
def : LoadPatGlobalAddrOffOnly<i64, sext_aload_8_64, ATOMIC_LOAD8_U_I64>;
def : LoadPatGlobalAddrOffOnly<i64, sext_aload_16_64, ATOMIC_LOAD16_U_I64>;
-def : LoadPatExternSymOffOnly<i32, zext_aload_8_32, ATOMIC_LOAD8_U_I32>;
-def : LoadPatExternSymOffOnly<i32, zext_aload_16_32, ATOMIC_LOAD16_U_I32>;
-def : LoadPatExternSymOffOnly<i64, zext_aload_8_64, ATOMIC_LOAD8_U_I64>;
-def : LoadPatExternSymOffOnly<i64, zext_aload_16_64, ATOMIC_LOAD16_U_I64>;
-def : LoadPatExternSymOffOnly<i64, zext_aload_32_64, ATOMIC_LOAD32_U_I64>;
-def : LoadPatExternSymOffOnly<i32, atomic_load_8, ATOMIC_LOAD8_U_I32>;
-def : LoadPatExternSymOffOnly<i32, atomic_load_16, ATOMIC_LOAD16_U_I32>;
-def : LoadPatExternSymOffOnly<i64, sext_aload_8_64, ATOMIC_LOAD8_U_I64>;
-def : LoadPatExternSymOffOnly<i64, sext_aload_16_64, ATOMIC_LOAD16_U_I64>;
-
} // Predicates = [HasAtomics]
//===----------------------------------------------------------------------===//
// Atomic stores
//===----------------------------------------------------------------------===//
-defm ATOMIC_STORE_I32 : WebAssemblyStore<I32, "i32.atomic.store", 0xfe17>;
-defm ATOMIC_STORE_I64 : WebAssemblyStore<I64, "i64.atomic.store", 0xfe18>;
+multiclass AtomicStore<WebAssemblyRegClass rc, string name, int atomic_op> {
+ defm "" : WebAssemblyStore<rc, name, !or(0xfe00, !and(0xff, atomic_op))>,
+ Requires<[HasAtomics]>;
+}
+
+defm ATOMIC_STORE_I32 : AtomicStore<I32, "i32.atomic.store", 0x17>;
+defm ATOMIC_STORE_I64 : AtomicStore<I64, "i64.atomic.store", 0x18>;
// We need an 'atomic' version of store patterns because store and atomic_store
// nodes have different operand orders:
@@ -230,12 +320,6 @@ class AStorePatGlobalAddr<ValueType ty, PatFrag kind, NI inst> :
def : AStorePatGlobalAddr<i32, atomic_store_32, ATOMIC_STORE_I32>;
def : AStorePatGlobalAddr<i64, atomic_store_64, ATOMIC_STORE_I64>;
-class AStorePatExternalSym<ValueType ty, PatFrag kind, NI inst> :
- Pat<(kind (add I32:$addr, (WebAssemblywrapper texternalsym:$off)), ty:$val),
- (inst 0, texternalsym:$off, I32:$addr, ty:$val)>;
-def : AStorePatExternalSym<i32, atomic_store_32, ATOMIC_STORE_I32>;
-def : AStorePatExternalSym<i64, atomic_store_64, ATOMIC_STORE_I64>;
-
// Select stores with just a constant offset.
class AStorePatOffsetOnly<ValueType ty, PatFrag kind, NI inst> :
Pat<(kind imm:$off, ty:$val), (inst 0, imm:$off, (CONST_I32 0), ty:$val)>;
@@ -248,20 +332,14 @@ class AStorePatGlobalAddrOffOnly<ValueType ty, PatFrag kind, NI inst> :
def : AStorePatGlobalAddrOffOnly<i32, atomic_store_32, ATOMIC_STORE_I32>;
def : AStorePatGlobalAddrOffOnly<i64, atomic_store_64, ATOMIC_STORE_I64>;
-class AStorePatExternSymOffOnly<ValueType ty, PatFrag kind, NI inst> :
- Pat<(kind (WebAssemblywrapper texternalsym:$off), ty:$val),
- (inst 0, texternalsym:$off, (CONST_I32 0), ty:$val)>;
-def : AStorePatExternSymOffOnly<i32, atomic_store_32, ATOMIC_STORE_I32>;
-def : AStorePatExternSymOffOnly<i64, atomic_store_64, ATOMIC_STORE_I64>;
-
} // Predicates = [HasAtomics]
// Truncating stores.
-defm ATOMIC_STORE8_I32 : WebAssemblyStore<I32, "i32.atomic.store8", 0xfe19>;
-defm ATOMIC_STORE16_I32 : WebAssemblyStore<I32, "i32.atomic.store16", 0xfe1a>;
-defm ATOMIC_STORE8_I64 : WebAssemblyStore<I64, "i64.atomic.store8", 0xfe1b>;
-defm ATOMIC_STORE16_I64 : WebAssemblyStore<I64, "i64.atomic.store16", 0xfe1c>;
-defm ATOMIC_STORE32_I64 : WebAssemblyStore<I64, "i64.atomic.store32", 0xfe1d>;
+defm ATOMIC_STORE8_I32 : AtomicStore<I32, "i32.atomic.store8", 0x19>;
+defm ATOMIC_STORE16_I32 : AtomicStore<I32, "i32.atomic.store16", 0x1a>;
+defm ATOMIC_STORE8_I64 : AtomicStore<I64, "i64.atomic.store8", 0x1b>;
+defm ATOMIC_STORE16_I64 : AtomicStore<I64, "i64.atomic.store16", 0x1c>;
+defm ATOMIC_STORE32_I64 : AtomicStore<I64, "i64.atomic.store32", 0x1d>;
// Fragments for truncating stores.
@@ -302,12 +380,6 @@ def : AStorePatGlobalAddr<i64, trunc_astore_8_64, ATOMIC_STORE8_I64>;
def : AStorePatGlobalAddr<i64, trunc_astore_16_64, ATOMIC_STORE16_I64>;
def : AStorePatGlobalAddr<i64, trunc_astore_32_64, ATOMIC_STORE32_I64>;
-def : AStorePatExternalSym<i32, atomic_store_8, ATOMIC_STORE8_I32>;
-def : AStorePatExternalSym<i32, atomic_store_16, ATOMIC_STORE16_I32>;
-def : AStorePatExternalSym<i64, trunc_astore_8_64, ATOMIC_STORE8_I64>;
-def : AStorePatExternalSym<i64, trunc_astore_16_64, ATOMIC_STORE16_I64>;
-def : AStorePatExternalSym<i64, trunc_astore_32_64, ATOMIC_STORE32_I64>;
-
// Truncating stores with just a constant offset
def : AStorePatOffsetOnly<i32, atomic_store_8, ATOMIC_STORE8_I32>;
def : AStorePatOffsetOnly<i32, atomic_store_16, ATOMIC_STORE16_I32>;
@@ -321,105 +393,101 @@ def : AStorePatGlobalAddrOffOnly<i64, trunc_astore_8_64, ATOMIC_STORE8_I64>;
def : AStorePatGlobalAddrOffOnly<i64, trunc_astore_16_64, ATOMIC_STORE16_I64>;
def : AStorePatGlobalAddrOffOnly<i64, trunc_astore_32_64, ATOMIC_STORE32_I64>;
-def : AStorePatExternSymOffOnly<i32, atomic_store_8, ATOMIC_STORE8_I32>;
-def : AStorePatExternSymOffOnly<i32, atomic_store_16, ATOMIC_STORE16_I32>;
-def : AStorePatExternSymOffOnly<i64, trunc_astore_8_64, ATOMIC_STORE8_I64>;
-def : AStorePatExternSymOffOnly<i64, trunc_astore_16_64, ATOMIC_STORE16_I64>;
-def : AStorePatExternSymOffOnly<i64, trunc_astore_32_64, ATOMIC_STORE32_I64>;
-
} // Predicates = [HasAtomics]
//===----------------------------------------------------------------------===//
// Atomic binary read-modify-writes
//===----------------------------------------------------------------------===//
-multiclass WebAssemblyBinRMW<WebAssemblyRegClass rc, string Name, int Opcode> {
- defm "" : I<(outs rc:$dst),
- (ins P2Align:$p2align, offset32_op:$off, I32:$addr, rc:$val),
- (outs), (ins P2Align:$p2align, offset32_op:$off), [],
- !strconcat(Name, "\t$dst, ${off}(${addr})${p2align}, $val"),
- !strconcat(Name, "\t${off}, ${p2align}"), Opcode>;
+multiclass WebAssemblyBinRMW<WebAssemblyRegClass rc, string name,
+ int atomic_op> {
+ defm "" :
+ ATOMIC_I<(outs rc:$dst),
+ (ins P2Align:$p2align, offset32_op:$off, I32:$addr, rc:$val),
+ (outs), (ins P2Align:$p2align, offset32_op:$off), [],
+ !strconcat(name, "\t$dst, ${off}(${addr})${p2align}, $val"),
+ !strconcat(name, "\t${off}${p2align}"), atomic_op>;
}
-defm ATOMIC_RMW_ADD_I32 : WebAssemblyBinRMW<I32, "i32.atomic.rmw.add", 0xfe1e>;
-defm ATOMIC_RMW_ADD_I64 : WebAssemblyBinRMW<I64, "i64.atomic.rmw.add", 0xfe1f>;
+defm ATOMIC_RMW_ADD_I32 : WebAssemblyBinRMW<I32, "i32.atomic.rmw.add", 0x1e>;
+defm ATOMIC_RMW_ADD_I64 : WebAssemblyBinRMW<I64, "i64.atomic.rmw.add", 0x1f>;
defm ATOMIC_RMW8_U_ADD_I32 :
- WebAssemblyBinRMW<I32, "i32.atomic.rmw8.add_u", 0xfe20>;
+ WebAssemblyBinRMW<I32, "i32.atomic.rmw8.add_u", 0x20>;
defm ATOMIC_RMW16_U_ADD_I32 :
- WebAssemblyBinRMW<I32, "i32.atomic.rmw16.add_u", 0xfe21>;
+ WebAssemblyBinRMW<I32, "i32.atomic.rmw16.add_u", 0x21>;
defm ATOMIC_RMW8_U_ADD_I64 :
- WebAssemblyBinRMW<I64, "i64.atomic.rmw8.add_u", 0xfe22>;
+ WebAssemblyBinRMW<I64, "i64.atomic.rmw8.add_u", 0x22>;
defm ATOMIC_RMW16_U_ADD_I64 :
- WebAssemblyBinRMW<I64, "i64.atomic.rmw16.add_u", 0xfe23>;
+ WebAssemblyBinRMW<I64, "i64.atomic.rmw16.add_u", 0x23>;
defm ATOMIC_RMW32_U_ADD_I64 :
- WebAssemblyBinRMW<I64, "i64.atomic.rmw32.add_u", 0xfe24>;
+ WebAssemblyBinRMW<I64, "i64.atomic.rmw32.add_u", 0x24>;
-defm ATOMIC_RMW_SUB_I32 : WebAssemblyBinRMW<I32, "i32.atomic.rmw.sub", 0xfe25>;
-defm ATOMIC_RMW_SUB_I64 : WebAssemblyBinRMW<I64, "i64.atomic.rmw.sub", 0xfe26>;
+defm ATOMIC_RMW_SUB_I32 : WebAssemblyBinRMW<I32, "i32.atomic.rmw.sub", 0x25>;
+defm ATOMIC_RMW_SUB_I64 : WebAssemblyBinRMW<I64, "i64.atomic.rmw.sub", 0x26>;
defm ATOMIC_RMW8_U_SUB_I32 :
- WebAssemblyBinRMW<I32, "i32.atomic.rmw8.sub_u", 0xfe27>;
+ WebAssemblyBinRMW<I32, "i32.atomic.rmw8.sub_u", 0x27>;
defm ATOMIC_RMW16_U_SUB_I32 :
- WebAssemblyBinRMW<I32, "i32.atomic.rmw16.sub_u", 0xfe28>;
+ WebAssemblyBinRMW<I32, "i32.atomic.rmw16.sub_u", 0x28>;
defm ATOMIC_RMW8_U_SUB_I64 :
- WebAssemblyBinRMW<I64, "i64.atomic.rmw8.sub_u", 0xfe29>;
+ WebAssemblyBinRMW<I64, "i64.atomic.rmw8.sub_u", 0x29>;
defm ATOMIC_RMW16_U_SUB_I64 :
- WebAssemblyBinRMW<I64, "i64.atomic.rmw16.sub_u", 0xfe2a>;
+ WebAssemblyBinRMW<I64, "i64.atomic.rmw16.sub_u", 0x2a>;
defm ATOMIC_RMW32_U_SUB_I64 :
- WebAssemblyBinRMW<I64, "i64.atomic.rmw32.sub_u", 0xfe2b>;
+ WebAssemblyBinRMW<I64, "i64.atomic.rmw32.sub_u", 0x2b>;
-defm ATOMIC_RMW_AND_I32 : WebAssemblyBinRMW<I32, "i32.atomic.rmw.and", 0xfe2c>;
-defm ATOMIC_RMW_AND_I64 : WebAssemblyBinRMW<I64, "i64.atomic.rmw.and", 0xfe2d>;
+defm ATOMIC_RMW_AND_I32 : WebAssemblyBinRMW<I32, "i32.atomic.rmw.and", 0x2c>;
+defm ATOMIC_RMW_AND_I64 : WebAssemblyBinRMW<I64, "i64.atomic.rmw.and", 0x2d>;
defm ATOMIC_RMW8_U_AND_I32 :
- WebAssemblyBinRMW<I32, "i32.atomic.rmw8.and_u", 0xfe2e>;
+ WebAssemblyBinRMW<I32, "i32.atomic.rmw8.and_u", 0x2e>;
defm ATOMIC_RMW16_U_AND_I32 :
- WebAssemblyBinRMW<I32, "i32.atomic.rmw16.and_u", 0xfe2f>;
+ WebAssemblyBinRMW<I32, "i32.atomic.rmw16.and_u", 0x2f>;
defm ATOMIC_RMW8_U_AND_I64 :
- WebAssemblyBinRMW<I64, "i64.atomic.rmw8.and_u", 0xfe30>;
+ WebAssemblyBinRMW<I64, "i64.atomic.rmw8.and_u", 0x30>;
defm ATOMIC_RMW16_U_AND_I64 :
- WebAssemblyBinRMW<I64, "i64.atomic.rmw16.and_u", 0xfe31>;
+ WebAssemblyBinRMW<I64, "i64.atomic.rmw16.and_u", 0x31>;
defm ATOMIC_RMW32_U_AND_I64 :
- WebAssemblyBinRMW<I64, "i64.atomic.rmw32.and_u", 0xfe32>;
+ WebAssemblyBinRMW<I64, "i64.atomic.rmw32.and_u", 0x32>;
-defm ATOMIC_RMW_OR_I32 : WebAssemblyBinRMW<I32, "i32.atomic.rmw.or", 0xfe33>;
-defm ATOMIC_RMW_OR_I64 : WebAssemblyBinRMW<I64, "i64.atomic.rmw.or", 0xfe34>;
+defm ATOMIC_RMW_OR_I32 : WebAssemblyBinRMW<I32, "i32.atomic.rmw.or", 0x33>;
+defm ATOMIC_RMW_OR_I64 : WebAssemblyBinRMW<I64, "i64.atomic.rmw.or", 0x34>;
defm ATOMIC_RMW8_U_OR_I32 :
- WebAssemblyBinRMW<I32, "i32.atomic.rmw8.or_u", 0xfe35>;
+ WebAssemblyBinRMW<I32, "i32.atomic.rmw8.or_u", 0x35>;
defm ATOMIC_RMW16_U_OR_I32 :
- WebAssemblyBinRMW<I32, "i32.atomic.rmw16.or_u", 0xfe36>;
+ WebAssemblyBinRMW<I32, "i32.atomic.rmw16.or_u", 0x36>;
defm ATOMIC_RMW8_U_OR_I64 :
- WebAssemblyBinRMW<I64, "i64.atomic.rmw8.or_u", 0xfe37>;
+ WebAssemblyBinRMW<I64, "i64.atomic.rmw8.or_u", 0x37>;
defm ATOMIC_RMW16_U_OR_I64 :
- WebAssemblyBinRMW<I64, "i64.atomic.rmw16.or_u", 0xfe38>;
+ WebAssemblyBinRMW<I64, "i64.atomic.rmw16.or_u", 0x38>;
defm ATOMIC_RMW32_U_OR_I64 :
- WebAssemblyBinRMW<I64, "i64.atomic.rmw32.or_u", 0xfe39>;
+ WebAssemblyBinRMW<I64, "i64.atomic.rmw32.or_u", 0x39>;
-defm ATOMIC_RMW_XOR_I32 : WebAssemblyBinRMW<I32, "i32.atomic.rmw.xor", 0xfe3a>;
-defm ATOMIC_RMW_XOR_I64 : WebAssemblyBinRMW<I64, "i64.atomic.rmw.xor", 0xfe3b>;
+defm ATOMIC_RMW_XOR_I32 : WebAssemblyBinRMW<I32, "i32.atomic.rmw.xor", 0x3a>;
+defm ATOMIC_RMW_XOR_I64 : WebAssemblyBinRMW<I64, "i64.atomic.rmw.xor", 0x3b>;
defm ATOMIC_RMW8_U_XOR_I32 :
- WebAssemblyBinRMW<I32, "i32.atomic.rmw8.xor_u", 0xfe3c>;
+ WebAssemblyBinRMW<I32, "i32.atomic.rmw8.xor_u", 0x3c>;
defm ATOMIC_RMW16_U_XOR_I32 :
- WebAssemblyBinRMW<I32, "i32.atomic.rmw16.xor_u", 0xfe3d>;
+ WebAssemblyBinRMW<I32, "i32.atomic.rmw16.xor_u", 0x3d>;
defm ATOMIC_RMW8_U_XOR_I64 :
- WebAssemblyBinRMW<I64, "i64.atomic.rmw8.xor_u", 0xfe3e>;
+ WebAssemblyBinRMW<I64, "i64.atomic.rmw8.xor_u", 0x3e>;
defm ATOMIC_RMW16_U_XOR_I64 :
- WebAssemblyBinRMW<I64, "i64.atomic.rmw16.xor_u", 0xfe3f>;
+ WebAssemblyBinRMW<I64, "i64.atomic.rmw16.xor_u", 0x3f>;
defm ATOMIC_RMW32_U_XOR_I64 :
- WebAssemblyBinRMW<I64, "i64.atomic.rmw32.xor_u", 0xfe40>;
+ WebAssemblyBinRMW<I64, "i64.atomic.rmw32.xor_u", 0x40>;
defm ATOMIC_RMW_XCHG_I32 :
- WebAssemblyBinRMW<I32, "i32.atomic.rmw.xchg", 0xfe41>;
+ WebAssemblyBinRMW<I32, "i32.atomic.rmw.xchg", 0x41>;
defm ATOMIC_RMW_XCHG_I64 :
- WebAssemblyBinRMW<I64, "i64.atomic.rmw.xchg", 0xfe42>;
+ WebAssemblyBinRMW<I64, "i64.atomic.rmw.xchg", 0x42>;
defm ATOMIC_RMW8_U_XCHG_I32 :
- WebAssemblyBinRMW<I32, "i32.atomic.rmw8.xchg_u", 0xfe43>;
+ WebAssemblyBinRMW<I32, "i32.atomic.rmw8.xchg_u", 0x43>;
defm ATOMIC_RMW16_U_XCHG_I32 :
- WebAssemblyBinRMW<I32, "i32.atomic.rmw16.xchg_u", 0xfe44>;
+ WebAssemblyBinRMW<I32, "i32.atomic.rmw16.xchg_u", 0x44>;
defm ATOMIC_RMW8_U_XCHG_I64 :
- WebAssemblyBinRMW<I64, "i64.atomic.rmw8.xchg_u", 0xfe45>;
+ WebAssemblyBinRMW<I64, "i64.atomic.rmw8.xchg_u", 0x45>;
defm ATOMIC_RMW16_U_XCHG_I64 :
- WebAssemblyBinRMW<I64, "i64.atomic.rmw16.xchg_u", 0xfe46>;
+ WebAssemblyBinRMW<I64, "i64.atomic.rmw16.xchg_u", 0x46>;
defm ATOMIC_RMW32_U_XCHG_I64 :
- WebAssemblyBinRMW<I64, "i64.atomic.rmw32.xchg_u", 0xfe47>;
+ WebAssemblyBinRMW<I64, "i64.atomic.rmw32.xchg_u", 0x47>;
// Select binary RMWs with no constant offset.
class BinRMWPatNoOffset<ValueType ty, PatFrag kind, NI inst> :
@@ -437,11 +505,6 @@ class BinRMWPatGlobalAddr<ValueType ty, PatFrag kind, NI inst> :
ty:$val)),
(inst 0, tglobaladdr:$off, I32:$addr, ty:$val)>;
-class BinRMWPatExternalSym<ValueType ty, PatFrag kind, NI inst> :
- Pat<(ty (kind (add I32:$addr, (WebAssemblywrapper texternalsym:$off)),
- ty:$val)),
- (inst 0, texternalsym:$off, I32:$addr, ty:$val)>;
-
// Select binary RMWs with just a constant offset.
class BinRMWPatOffsetOnly<ValueType ty, PatFrag kind, NI inst> :
Pat<(ty (kind imm:$off, ty:$val)),
@@ -451,10 +514,6 @@ class BinRMWPatGlobalAddrOffOnly<ValueType ty, PatFrag kind, NI inst> :
Pat<(ty (kind (WebAssemblywrapper tglobaladdr:$off), ty:$val)),
(inst 0, tglobaladdr:$off, (CONST_I32 0), ty:$val)>;
-class BinRMWPatExternSymOffOnly<ValueType ty, PatFrag kind, NI inst> :
- Pat<(ty (kind (WebAssemblywrapper texternalsym:$off), ty:$val)),
- (inst 0, texternalsym:$off, (CONST_I32 0), ty:$val)>;
-
// Patterns for various addressing modes.
multiclass BinRMWPattern<PatFrag rmw_32, PatFrag rmw_64, NI inst_32,
NI inst_64> {
@@ -469,17 +528,11 @@ multiclass BinRMWPattern<PatFrag rmw_32, PatFrag rmw_64, NI inst_32,
def : BinRMWPatGlobalAddr<i32, rmw_32, inst_32>;
def : BinRMWPatGlobalAddr<i64, rmw_64, inst_64>;
- def : BinRMWPatExternalSym<i32, rmw_32, inst_32>;
- def : BinRMWPatExternalSym<i64, rmw_64, inst_64>;
-
def : BinRMWPatOffsetOnly<i32, rmw_32, inst_32>;
def : BinRMWPatOffsetOnly<i64, rmw_64, inst_64>;
def : BinRMWPatGlobalAddrOffOnly<i32, rmw_32, inst_32>;
def : BinRMWPatGlobalAddrOffOnly<i64, rmw_64, inst_64>;
-
- def : BinRMWPatExternSymOffOnly<i32, rmw_32, inst_32>;
- def : BinRMWPatExternSymOffOnly<i64, rmw_64, inst_64>;
}
let Predicates = [HasAtomics] in {
@@ -580,17 +633,6 @@ multiclass BinRMWTruncExtPattern<
def : BinRMWPatGlobalAddr<i64, sext_bin_rmw_8_64<rmw_8>, inst8_64>;
def : BinRMWPatGlobalAddr<i64, sext_bin_rmw_16_64<rmw_16>, inst16_64>;
- def : BinRMWPatExternalSym<i32, zext_bin_rmw_8_32<rmw_8>, inst8_32>;
- def : BinRMWPatExternalSym<i32, zext_bin_rmw_16_32<rmw_16>, inst16_32>;
- def : BinRMWPatExternalSym<i64, zext_bin_rmw_8_64<rmw_8>, inst8_64>;
- def : BinRMWPatExternalSym<i64, zext_bin_rmw_16_64<rmw_16>, inst16_64>;
- def : BinRMWPatExternalSym<i64, zext_bin_rmw_32_64<rmw_32>, inst32_64>;
-
- def : BinRMWPatExternalSym<i32, sext_bin_rmw_8_32<rmw_8>, inst8_32>;
- def : BinRMWPatExternalSym<i32, sext_bin_rmw_16_32<rmw_16>, inst16_32>;
- def : BinRMWPatExternalSym<i64, sext_bin_rmw_8_64<rmw_8>, inst8_64>;
- def : BinRMWPatExternalSym<i64, sext_bin_rmw_16_64<rmw_16>, inst16_64>;
-
// Truncating-extending binary RMWs with just a constant offset
def : BinRMWPatOffsetOnly<i32, zext_bin_rmw_8_32<rmw_8>, inst8_32>;
def : BinRMWPatOffsetOnly<i32, zext_bin_rmw_16_32<rmw_16>, inst16_32>;
@@ -613,17 +655,6 @@ multiclass BinRMWTruncExtPattern<
def : BinRMWPatGlobalAddrOffOnly<i32, sext_bin_rmw_16_32<rmw_16>, inst16_32>;
def : BinRMWPatGlobalAddrOffOnly<i64, sext_bin_rmw_8_64<rmw_8>, inst8_64>;
def : BinRMWPatGlobalAddrOffOnly<i64, sext_bin_rmw_16_64<rmw_16>, inst16_64>;
-
- def : BinRMWPatExternSymOffOnly<i32, zext_bin_rmw_8_32<rmw_8>, inst8_32>;
- def : BinRMWPatExternSymOffOnly<i32, zext_bin_rmw_16_32<rmw_16>, inst16_32>;
- def : BinRMWPatExternSymOffOnly<i64, zext_bin_rmw_8_64<rmw_8>, inst8_64>;
- def : BinRMWPatExternSymOffOnly<i64, zext_bin_rmw_16_64<rmw_16>, inst16_64>;
- def : BinRMWPatExternSymOffOnly<i64, zext_bin_rmw_32_64<rmw_32>, inst32_64>;
-
- def : BinRMWPatExternSymOffOnly<i32, sext_bin_rmw_8_32<rmw_8>, inst8_32>;
- def : BinRMWPatExternSymOffOnly<i32, sext_bin_rmw_16_32<rmw_16>, inst16_32>;
- def : BinRMWPatExternSymOffOnly<i64, sext_bin_rmw_8_64<rmw_8>, inst8_64>;
- def : BinRMWPatExternSymOffOnly<i64, sext_bin_rmw_16_64<rmw_16>, inst16_64>;
}
let Predicates = [HasAtomics] in {
@@ -663,29 +694,31 @@ defm : BinRMWTruncExtPattern<
// Consider adding a pass after instruction selection that optimizes this case
// if it is frequent.
-multiclass WebAssemblyTerRMW<WebAssemblyRegClass rc, string Name, int Opcode> {
- defm "" : I<(outs rc:$dst),
- (ins P2Align:$p2align, offset32_op:$off, I32:$addr, rc:$exp,
- rc:$new),
- (outs), (ins P2Align:$p2align, offset32_op:$off), [],
- !strconcat(Name, "\t$dst, ${off}(${addr})${p2align}, $exp, $new"),
- !strconcat(Name, "\t${off}, ${p2align}"), Opcode>;
+multiclass WebAssemblyTerRMW<WebAssemblyRegClass rc, string name,
+ int atomic_op> {
+ defm "" :
+ ATOMIC_I<(outs rc:$dst),
+ (ins P2Align:$p2align, offset32_op:$off, I32:$addr, rc:$exp,
+ rc:$new_),
+ (outs), (ins P2Align:$p2align, offset32_op:$off), [],
+ !strconcat(name, "\t$dst, ${off}(${addr})${p2align}, $exp, $new_"),
+ !strconcat(name, "\t${off}${p2align}"), atomic_op>;
}
defm ATOMIC_RMW_CMPXCHG_I32 :
- WebAssemblyTerRMW<I32, "i32.atomic.rmw.cmpxchg", 0xfe48>;
+ WebAssemblyTerRMW<I32, "i32.atomic.rmw.cmpxchg", 0x48>;
defm ATOMIC_RMW_CMPXCHG_I64 :
- WebAssemblyTerRMW<I64, "i64.atomic.rmw.cmpxchg", 0xfe49>;
+ WebAssemblyTerRMW<I64, "i64.atomic.rmw.cmpxchg", 0x49>;
defm ATOMIC_RMW8_U_CMPXCHG_I32 :
- WebAssemblyTerRMW<I32, "i32.atomic.rmw8.cmpxchg_u", 0xfe4a>;
+ WebAssemblyTerRMW<I32, "i32.atomic.rmw8.cmpxchg_u", 0x4a>;
defm ATOMIC_RMW16_U_CMPXCHG_I32 :
- WebAssemblyTerRMW<I32, "i32.atomic.rmw16.cmpxchg_u", 0xfe4b>;
+ WebAssemblyTerRMW<I32, "i32.atomic.rmw16.cmpxchg_u", 0x4b>;
defm ATOMIC_RMW8_U_CMPXCHG_I64 :
- WebAssemblyTerRMW<I64, "i64.atomic.rmw8.cmpxchg_u", 0xfe4c>;
+ WebAssemblyTerRMW<I64, "i64.atomic.rmw8.cmpxchg_u", 0x4c>;
defm ATOMIC_RMW16_U_CMPXCHG_I64 :
- WebAssemblyTerRMW<I64, "i64.atomic.rmw16.cmpxchg_u", 0xfe4d>;
+ WebAssemblyTerRMW<I64, "i64.atomic.rmw16.cmpxchg_u", 0x4d>;
defm ATOMIC_RMW32_U_CMPXCHG_I64 :
- WebAssemblyTerRMW<I64, "i64.atomic.rmw32.cmpxchg_u", 0xfe4e>;
+ WebAssemblyTerRMW<I64, "i64.atomic.rmw32.cmpxchg_u", 0x4e>;
// Select ternary RMWs with no constant offset.
class TerRMWPatNoOffset<ValueType ty, PatFrag kind, NI inst> :
@@ -704,11 +737,6 @@ class TerRMWPatGlobalAddr<ValueType ty, PatFrag kind, NI inst> :
ty:$exp, ty:$new)),
(inst 0, tglobaladdr:$off, I32:$addr, ty:$exp, ty:$new)>;
-class TerRMWPatExternalSym<ValueType ty, PatFrag kind, NI inst> :
- Pat<(ty (kind (add I32:$addr, (WebAssemblywrapper texternalsym:$off)),
- ty:$exp, ty:$new)),
- (inst 0, texternalsym:$off, I32:$addr, ty:$exp, ty:$new)>;
-
// Select ternary RMWs with just a constant offset.
class TerRMWPatOffsetOnly<ValueType ty, PatFrag kind, NI inst> :
Pat<(ty (kind imm:$off, ty:$exp, ty:$new)),
@@ -718,10 +746,6 @@ class TerRMWPatGlobalAddrOffOnly<ValueType ty, PatFrag kind, NI inst> :
Pat<(ty (kind (WebAssemblywrapper tglobaladdr:$off), ty:$exp, ty:$new)),
(inst 0, tglobaladdr:$off, (CONST_I32 0), ty:$exp, ty:$new)>;
-class TerRMWPatExternSymOffOnly<ValueType ty, PatFrag kind, NI inst> :
- Pat<(ty (kind (WebAssemblywrapper texternalsym:$off), ty:$exp, ty:$new)),
- (inst 0, texternalsym:$off, (CONST_I32 0), ty:$exp, ty:$new)>;
-
// Patterns for various addressing modes.
multiclass TerRMWPattern<PatFrag rmw_32, PatFrag rmw_64, NI inst_32,
NI inst_64> {
@@ -736,23 +760,16 @@ multiclass TerRMWPattern<PatFrag rmw_32, PatFrag rmw_64, NI inst_32,
def : TerRMWPatGlobalAddr<i32, rmw_32, inst_32>;
def : TerRMWPatGlobalAddr<i64, rmw_64, inst_64>;
- def : TerRMWPatExternalSym<i32, rmw_32, inst_32>;
- def : TerRMWPatExternalSym<i64, rmw_64, inst_64>;
-
def : TerRMWPatOffsetOnly<i32, rmw_32, inst_32>;
def : TerRMWPatOffsetOnly<i64, rmw_64, inst_64>;
def : TerRMWPatGlobalAddrOffOnly<i32, rmw_32, inst_32>;
def : TerRMWPatGlobalAddrOffOnly<i64, rmw_64, inst_64>;
-
- def : TerRMWPatExternSymOffOnly<i32, rmw_32, inst_32>;
- def : TerRMWPatExternSymOffOnly<i64, rmw_64, inst_64>;
}
-let Predicates = [HasAtomics] in {
+let Predicates = [HasAtomics] in
defm : TerRMWPattern<atomic_cmp_swap_32, atomic_cmp_swap_64,
ATOMIC_RMW_CMPXCHG_I32, ATOMIC_RMW_CMPXCHG_I64>;
-} // Predicates = [HasAtomics]
// Truncating & zero-extending ternary RMW patterns.
// DAG legalization & optimization before instruction selection may introduce
@@ -840,17 +857,6 @@ multiclass TerRMWTruncExtPattern<
def : TerRMWPatGlobalAddr<i64, sext_ter_rmw_8_64<rmw_8>, inst8_64>;
def : TerRMWPatGlobalAddr<i64, sext_ter_rmw_16_64<rmw_16>, inst16_64>;
- def : TerRMWPatExternalSym<i32, zext_ter_rmw_8_32<rmw_8>, inst8_32>;
- def : TerRMWPatExternalSym<i32, zext_ter_rmw_16_32<rmw_16>, inst16_32>;
- def : TerRMWPatExternalSym<i64, zext_ter_rmw_8_64<rmw_8>, inst8_64>;
- def : TerRMWPatExternalSym<i64, zext_ter_rmw_16_64<rmw_16>, inst16_64>;
- def : TerRMWPatExternalSym<i64, zext_ter_rmw_32_64<rmw_32>, inst32_64>;
-
- def : TerRMWPatExternalSym<i32, sext_ter_rmw_8_32<rmw_8>, inst8_32>;
- def : TerRMWPatExternalSym<i32, sext_ter_rmw_16_32<rmw_16>, inst16_32>;
- def : TerRMWPatExternalSym<i64, sext_ter_rmw_8_64<rmw_8>, inst8_64>;
- def : TerRMWPatExternalSym<i64, sext_ter_rmw_16_64<rmw_16>, inst16_64>;
-
// Truncating-extending ternary RMWs with just a constant offset
def : TerRMWPatOffsetOnly<i32, zext_ter_rmw_8_32<rmw_8>, inst8_32>;
def : TerRMWPatOffsetOnly<i32, zext_ter_rmw_16_32<rmw_16>, inst16_32>;
@@ -873,147 +879,21 @@ multiclass TerRMWTruncExtPattern<
def : TerRMWPatGlobalAddrOffOnly<i32, sext_ter_rmw_16_32<rmw_16>, inst16_32>;
def : TerRMWPatGlobalAddrOffOnly<i64, sext_ter_rmw_8_64<rmw_8>, inst8_64>;
def : TerRMWPatGlobalAddrOffOnly<i64, sext_ter_rmw_16_64<rmw_16>, inst16_64>;
-
- def : TerRMWPatExternSymOffOnly<i32, zext_ter_rmw_8_32<rmw_8>, inst8_32>;
- def : TerRMWPatExternSymOffOnly<i32, zext_ter_rmw_16_32<rmw_16>, inst16_32>;
- def : TerRMWPatExternSymOffOnly<i64, zext_ter_rmw_8_64<rmw_8>, inst8_64>;
- def : TerRMWPatExternSymOffOnly<i64, zext_ter_rmw_16_64<rmw_16>, inst16_64>;
- def : TerRMWPatExternSymOffOnly<i64, zext_ter_rmw_32_64<rmw_32>, inst32_64>;
-
- def : TerRMWPatExternSymOffOnly<i32, sext_ter_rmw_8_32<rmw_8>, inst8_32>;
- def : TerRMWPatExternSymOffOnly<i32, sext_ter_rmw_16_32<rmw_16>, inst16_32>;
- def : TerRMWPatExternSymOffOnly<i64, sext_ter_rmw_8_64<rmw_8>, inst8_64>;
- def : TerRMWPatExternSymOffOnly<i64, sext_ter_rmw_16_64<rmw_16>, inst16_64>;
}
-let Predicates = [HasAtomics] in {
+let Predicates = [HasAtomics] in
defm : TerRMWTruncExtPattern<
atomic_cmp_swap_8, atomic_cmp_swap_16, atomic_cmp_swap_32, atomic_cmp_swap_64,
ATOMIC_RMW8_U_CMPXCHG_I32, ATOMIC_RMW16_U_CMPXCHG_I32,
ATOMIC_RMW8_U_CMPXCHG_I64, ATOMIC_RMW16_U_CMPXCHG_I64,
ATOMIC_RMW32_U_CMPXCHG_I64>;
-}
//===----------------------------------------------------------------------===//
-// Atomic wait / notify
+// Atomic fences
//===----------------------------------------------------------------------===//
-let hasSideEffects = 1 in {
-defm ATOMIC_NOTIFY :
- I<(outs I32:$dst),
- (ins P2Align:$p2align, offset32_op:$off, I32:$addr, I32:$count),
- (outs), (ins P2Align:$p2align, offset32_op:$off), [],
- "atomic.notify \t$dst, ${off}(${addr})${p2align}, $count",
- "atomic.notify \t${off}, ${p2align}", 0xfe00>;
-let mayLoad = 1 in {
-defm ATOMIC_WAIT_I32 :
- I<(outs I32:$dst),
- (ins P2Align:$p2align, offset32_op:$off, I32:$addr, I32:$exp, I64:$timeout),
- (outs), (ins P2Align:$p2align, offset32_op:$off), [],
- "i32.atomic.wait \t$dst, ${off}(${addr})${p2align}, $exp, $timeout",
- "i32.atomic.wait \t${off}, ${p2align}", 0xfe01>;
-defm ATOMIC_WAIT_I64 :
- I<(outs I32:$dst),
- (ins P2Align:$p2align, offset32_op:$off, I32:$addr, I64:$exp, I64:$timeout),
- (outs), (ins P2Align:$p2align, offset32_op:$off), [],
- "i64.atomic.wait \t$dst, ${off}(${addr})${p2align}, $exp, $timeout",
- "i64.atomic.wait \t${off}, ${p2align}", 0xfe02>;
-} // mayLoad = 1
-} // hasSideEffects = 1
-
-let Predicates = [HasAtomics] in {
-// Select notifys with no constant offset.
-class NotifyPatNoOffset<Intrinsic kind> :
- Pat<(i32 (kind I32:$addr, I32:$count)),
- (ATOMIC_NOTIFY 0, 0, I32:$addr, I32:$count)>;
-def : NotifyPatNoOffset<int_wasm_atomic_notify>;
-
-// Select notifys with a constant offset.
-
-// Pattern with address + immediate offset
-class NotifyPatImmOff<Intrinsic kind, PatFrag operand> :
- Pat<(i32 (kind (operand I32:$addr, imm:$off), I32:$count)),
- (ATOMIC_NOTIFY 0, imm:$off, I32:$addr, I32:$count)>;
-def : NotifyPatImmOff<int_wasm_atomic_notify, regPlusImm>;
-def : NotifyPatImmOff<int_wasm_atomic_notify, or_is_add>;
-
-class NotifyPatGlobalAddr<Intrinsic kind> :
- Pat<(i32 (kind (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)),
- I32:$count)),
- (ATOMIC_NOTIFY 0, tglobaladdr:$off, I32:$addr, I32:$count)>;
-def : NotifyPatGlobalAddr<int_wasm_atomic_notify>;
-
-class NotifyPatExternalSym<Intrinsic kind> :
- Pat<(i32 (kind (add I32:$addr, (WebAssemblywrapper texternalsym:$off)),
- I32:$count)),
- (ATOMIC_NOTIFY 0, texternalsym:$off, I32:$addr, I32:$count)>;
-def : NotifyPatExternalSym<int_wasm_atomic_notify>;
-
-// Select notifys with just a constant offset.
-class NotifyPatOffsetOnly<Intrinsic kind> :
- Pat<(i32 (kind imm:$off, I32:$count)),
- (ATOMIC_NOTIFY 0, imm:$off, (CONST_I32 0), I32:$count)>;
-def : NotifyPatOffsetOnly<int_wasm_atomic_notify>;
-
-class NotifyPatGlobalAddrOffOnly<Intrinsic kind> :
- Pat<(i32 (kind (WebAssemblywrapper tglobaladdr:$off), I32:$count)),
- (ATOMIC_NOTIFY 0, tglobaladdr:$off, (CONST_I32 0), I32:$count)>;
-def : NotifyPatGlobalAddrOffOnly<int_wasm_atomic_notify>;
-
-class NotifyPatExternSymOffOnly<Intrinsic kind> :
- Pat<(i32 (kind (WebAssemblywrapper texternalsym:$off), I32:$count)),
- (ATOMIC_NOTIFY 0, texternalsym:$off, (CONST_I32 0), I32:$count)>;
-def : NotifyPatExternSymOffOnly<int_wasm_atomic_notify>;
-
-// Select waits with no constant offset.
-class WaitPatNoOffset<ValueType ty, Intrinsic kind, NI inst> :
- Pat<(i32 (kind I32:$addr, ty:$exp, I64:$timeout)),
- (inst 0, 0, I32:$addr, ty:$exp, I64:$timeout)>;
-def : WaitPatNoOffset<i32, int_wasm_atomic_wait_i32, ATOMIC_WAIT_I32>;
-def : WaitPatNoOffset<i64, int_wasm_atomic_wait_i64, ATOMIC_WAIT_I64>;
-
-// Select waits with a constant offset.
-
-// Pattern with address + immediate offset
-class WaitPatImmOff<ValueType ty, Intrinsic kind, PatFrag operand, NI inst> :
- Pat<(i32 (kind (operand I32:$addr, imm:$off), ty:$exp, I64:$timeout)),
- (inst 0, imm:$off, I32:$addr, ty:$exp, I64:$timeout)>;
-def : WaitPatImmOff<i32, int_wasm_atomic_wait_i32, regPlusImm, ATOMIC_WAIT_I32>;
-def : WaitPatImmOff<i32, int_wasm_atomic_wait_i32, or_is_add, ATOMIC_WAIT_I32>;
-def : WaitPatImmOff<i64, int_wasm_atomic_wait_i64, regPlusImm, ATOMIC_WAIT_I64>;
-def : WaitPatImmOff<i64, int_wasm_atomic_wait_i64, or_is_add, ATOMIC_WAIT_I64>;
-
-class WaitPatGlobalAddr<ValueType ty, Intrinsic kind, NI inst> :
- Pat<(i32 (kind (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)),
- ty:$exp, I64:$timeout)),
- (inst 0, tglobaladdr:$off, I32:$addr, ty:$exp, I64:$timeout)>;
-def : WaitPatGlobalAddr<i32, int_wasm_atomic_wait_i32, ATOMIC_WAIT_I32>;
-def : WaitPatGlobalAddr<i64, int_wasm_atomic_wait_i64, ATOMIC_WAIT_I64>;
-
-class WaitPatExternalSym<ValueType ty, Intrinsic kind, NI inst> :
- Pat<(i32 (kind (add I32:$addr, (WebAssemblywrapper texternalsym:$off)),
- ty:$exp, I64:$timeout)),
- (inst 0, texternalsym:$off, I32:$addr, ty:$exp, I64:$timeout)>;
-def : WaitPatExternalSym<i32, int_wasm_atomic_wait_i32, ATOMIC_WAIT_I32>;
-def : WaitPatExternalSym<i64, int_wasm_atomic_wait_i64, ATOMIC_WAIT_I64>;
-
-// Select wait_i32, ATOMIC_WAIT_I32s with just a constant offset.
-class WaitPatOffsetOnly<ValueType ty, Intrinsic kind, NI inst> :
- Pat<(i32 (kind imm:$off, ty:$exp, I64:$timeout)),
- (inst 0, imm:$off, (CONST_I32 0), ty:$exp, I64:$timeout)>;
-def : WaitPatOffsetOnly<i32, int_wasm_atomic_wait_i32, ATOMIC_WAIT_I32>;
-def : WaitPatOffsetOnly<i64, int_wasm_atomic_wait_i64, ATOMIC_WAIT_I64>;
-
-class WaitPatGlobalAddrOffOnly<ValueType ty, Intrinsic kind, NI inst> :
- Pat<(i32 (kind (WebAssemblywrapper tglobaladdr:$off), ty:$exp, I64:$timeout)),
- (inst 0, tglobaladdr:$off, (CONST_I32 0), ty:$exp, I64:$timeout)>;
-def : WaitPatGlobalAddrOffOnly<i32, int_wasm_atomic_wait_i32, ATOMIC_WAIT_I32>;
-def : WaitPatGlobalAddrOffOnly<i64, int_wasm_atomic_wait_i64, ATOMIC_WAIT_I64>;
-
-class WaitPatExternSymOffOnly<ValueType ty, Intrinsic kind, NI inst> :
- Pat<(i32 (kind (WebAssemblywrapper texternalsym:$off), ty:$exp,
- I64:$timeout)),
- (inst 0, texternalsym:$off, (CONST_I32 0), ty:$exp, I64:$timeout)>;
-def : WaitPatExternSymOffOnly<i32, int_wasm_atomic_wait_i32, ATOMIC_WAIT_I32>;
-def : WaitPatExternSymOffOnly<i64, int_wasm_atomic_wait_i64, ATOMIC_WAIT_I64>;
-} // Predicates = [HasAtomics]
+// A compiler fence instruction that prevents reordering of instructions.
+let Defs = [ARGUMENTS] in {
+let isPseudo = 1, hasSideEffects = 1 in
+defm COMPILER_FENCE : ATOMIC_NRI<(outs), (ins), [], "compiler_fence">;
+} // Defs = [ARGUMENTS]
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrBulkMemory.td b/lib/Target/WebAssembly/WebAssemblyInstrBulkMemory.td
new file mode 100644
index 000000000000..f4352e3d12ec
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyInstrBulkMemory.td
@@ -0,0 +1,71 @@
+// WebAssemblyInstrBulkMemory.td - bulk memory codegen support --*- tablegen -*-
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// WebAssembly bulk memory codegen constructs.
+///
+//===----------------------------------------------------------------------===//
+
+// Instruction requiring HasBulkMemory and the bulk memory prefix byte
+multiclass BULK_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s,
+ list<dag> pattern_r, string asmstr_r = "",
+ string asmstr_s = "", bits<32> simdop = -1> {
+ defm "" : I<oops_r, iops_r, oops_s, iops_s, pattern_r, asmstr_r, asmstr_s,
+ !or(0xfc00, !and(0xff, simdop))>,
+ Requires<[HasBulkMemory]>;
+}
+
+// Bespoke types and nodes for bulk memory ops
+def wasm_memcpy_t : SDTypeProfile<0, 5,
+ [SDTCisInt<0>, SDTCisInt<1>, SDTCisPtrTy<2>, SDTCisPtrTy<3>, SDTCisInt<4>]
+>;
+def wasm_memcpy : SDNode<"WebAssemblyISD::MEMORY_COPY", wasm_memcpy_t,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore]>;
+
+def wasm_memset_t : SDTypeProfile<0, 4,
+ [SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisInt<2>, SDTCisInt<3>]
+>;
+def wasm_memset : SDNode<"WebAssemblyISD::MEMORY_FILL", wasm_memset_t,
+ [SDNPHasChain, SDNPMayStore]>;
+
+let mayStore = 1, hasSideEffects = 1 in
+defm MEMORY_INIT :
+ BULK_I<(outs),
+ (ins i32imm_op:$seg, i32imm_op:$idx, I32:$dest,
+ I32:$offset, I32:$size),
+ (outs), (ins i32imm_op:$seg, i32imm_op:$idx),
+ [(int_wasm_memory_init (i32 imm:$seg), (i32 imm:$idx), I32:$dest,
+ I32:$offset, I32:$size
+ )],
+ "memory.init\t$seg, $idx, $dest, $offset, $size",
+ "memory.init\t$seg, $idx", 0x08>;
+
+let hasSideEffects = 1 in
+defm DATA_DROP :
+ BULK_I<(outs), (ins i32imm_op:$seg), (outs), (ins i32imm_op:$seg),
+ [(int_wasm_data_drop (i32 imm:$seg))],
+ "data.drop\t$seg", "data.drop\t$seg", 0x09>;
+
+let mayLoad = 1, mayStore = 1 in
+defm MEMORY_COPY :
+ BULK_I<(outs), (ins i32imm_op:$src_idx, i32imm_op:$dst_idx,
+ I32:$dst, I32:$src, I32:$len),
+ (outs), (ins i32imm_op:$src_idx, i32imm_op:$dst_idx),
+ [(wasm_memcpy (i32 imm:$src_idx), (i32 imm:$dst_idx),
+ I32:$dst, I32:$src, I32:$len
+ )],
+ "memory.copy\t$src_idx, $dst_idx, $dst, $src, $len",
+ "memory.copy\t$src_idx, $dst_idx", 0x0a>;
+
+let mayStore = 1 in
+defm MEMORY_FILL :
+ BULK_I<(outs), (ins i32imm_op:$idx, I32:$dst, I32:$value, I32:$size),
+ (outs), (ins i32imm_op:$idx),
+ [(wasm_memset (i32 imm:$idx), I32:$dst, I32:$value, I32:$size)],
+ "memory.fill\t$idx, $dst, $value, $size",
+ "memory.fill\t$idx", 0x0b>;
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrCall.td b/lib/Target/WebAssembly/WebAssemblyInstrCall.td
index 07839b790114..703c15d58c93 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrCall.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrCall.td
@@ -1,9 +1,8 @@
//===- WebAssemblyInstrCall.td-WebAssembly Call codegen support -*- tablegen -*-
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -22,109 +21,112 @@ defm ADJCALLSTACKDOWN : NRI<(outs), (ins i32imm:$amt, i32imm:$amt2),
[(WebAssemblycallseq_start timm:$amt, timm:$amt2)]>;
defm ADJCALLSTACKUP : NRI<(outs), (ins i32imm:$amt, i32imm:$amt2),
[(WebAssemblycallseq_end timm:$amt, timm:$amt2)]>;
-} // isCodeGenOnly = 1
+} // Uses = [SP32, SP64], Defs = [SP32, SP64], isCodeGenOnly = 1
-multiclass CALL<WebAssemblyRegClass vt, string prefix> {
- defm CALL_#vt : I<(outs vt:$dst), (ins function32_op:$callee, variable_ops),
- (outs), (ins function32_op:$callee),
- [(set vt:$dst, (WebAssemblycall1 (i32 imm:$callee)))],
- !strconcat(prefix, "call\t$dst, $callee"),
- !strconcat(prefix, "call\t$callee"),
- 0x10>;
+multiclass CALL<ValueType vt, WebAssemblyRegClass rt, string prefix,
+ list<Predicate> preds = []> {
+ defm CALL_#vt :
+ I<(outs rt:$dst), (ins function32_op:$callee, variable_ops),
+ (outs), (ins function32_op:$callee),
+ [(set (vt rt:$dst), (WebAssemblycall1 (i32 imm:$callee)))],
+ !strconcat(prefix, "call\t$dst, $callee"),
+ !strconcat(prefix, "call\t$callee"),
+ 0x10>,
+ Requires<preds>;
- let isCodeGenOnly = 1 in {
- defm PCALL_INDIRECT_#vt : I<(outs vt:$dst), (ins I32:$callee, variable_ops),
- (outs), (ins I32:$callee),
- [(set vt:$dst, (WebAssemblycall1 I32:$callee))],
- "PSEUDO CALL INDIRECT\t$callee",
- "PSEUDO CALL INDIRECT\t$callee">;
- } // isCodeGenOnly = 1
+ let isCodeGenOnly = 1 in
+ defm PCALL_INDIRECT_#vt :
+ I<(outs rt:$dst), (ins I32:$callee, variable_ops),
+ (outs), (ins I32:$callee),
+ [(set (vt rt:$dst), (WebAssemblycall1 I32:$callee))],
+ "PSEUDO CALL INDIRECT\t$callee",
+ "PSEUDO CALL INDIRECT\t$callee">,
+ Requires<preds>;
- defm CALL_INDIRECT_#vt : I<(outs vt:$dst),
- (ins TypeIndex:$type, i32imm:$flags, variable_ops),
- (outs), (ins TypeIndex:$type, i32imm:$flags),
- [],
- !strconcat(prefix, "call_indirect\t$dst"),
- !strconcat(prefix, "call_indirect\t$type"),
- 0x11>;
+ defm CALL_INDIRECT_#vt :
+ I<(outs rt:$dst),
+ (ins TypeIndex:$type, i32imm:$flags, variable_ops),
+ (outs), (ins TypeIndex:$type, i32imm:$flags),
+ [],
+ !strconcat(prefix, "call_indirect\t$dst"),
+ !strconcat(prefix, "call_indirect\t$type"),
+ 0x11>,
+ Requires<preds>;
}
-multiclass SIMD_CALL<ValueType vt, string prefix> {
+let Uses = [SP32, SP64], isCall = 1 in {
+defm "" : CALL<i32, I32, "i32.">;
+defm "" : CALL<i64, I64, "i64.">;
+defm "" : CALL<f32, F32, "f32.">;
+defm "" : CALL<f64, F64, "f64.">;
+defm "" : CALL<exnref, EXNREF, "exnref.", [HasExceptionHandling]>;
+defm "" : CALL<v16i8, V128, "v128.", [HasSIMD128]>;
+defm "" : CALL<v8i16, V128, "v128.", [HasSIMD128]>;
+defm "" : CALL<v4i32, V128, "v128.", [HasSIMD128]>;
+defm "" : CALL<v2i64, V128, "v128.", [HasSIMD128]>;
+defm "" : CALL<v4f32, V128, "v128.", [HasSIMD128]>;
+defm "" : CALL<v2f64, V128, "v128.", [HasSIMD128]>;
- defm CALL_#vt : I<(outs V128:$dst), (ins function32_op:$callee, variable_ops),
- (outs), (ins function32_op:$callee),
- [(set (vt V128:$dst),
- (WebAssemblycall1 (i32 imm:$callee)))],
- !strconcat(prefix, "call\t$dst, $callee"),
- !strconcat(prefix, "call\t$callee"),
- 0x10>,
- Requires<[HasSIMD128]>;
+let IsCanonical = 1 in {
+defm CALL_VOID :
+ I<(outs), (ins function32_op:$callee, variable_ops),
+ (outs), (ins function32_op:$callee),
+ [(WebAssemblycall0 (i32 imm:$callee))],
+ "call \t$callee", "call\t$callee", 0x10>;
- let isCodeGenOnly = 1 in {
- defm PCALL_INDIRECT_#vt : I<(outs V128:$dst),
- (ins I32:$callee, variable_ops),
- (outs), (ins I32:$callee),
- [(set (vt V128:$dst),
- (WebAssemblycall1 I32:$callee))],
- "PSEUDO CALL INDIRECT\t$callee",
- "PSEUDO CALL INDIRECT\t$callee">,
- Requires<[HasSIMD128]>;
- } // isCodeGenOnly = 1
+let isReturn = 1 in
+defm RET_CALL :
+ I<(outs), (ins function32_op:$callee, variable_ops),
+ (outs), (ins function32_op:$callee),
+ [(WebAssemblyretcall (i32 imm:$callee))],
+ "return_call \t$callee", "return_call\t$callee", 0x12>,
+ Requires<[HasTailCall]>;
- defm CALL_INDIRECT_#vt : I<(outs V128:$dst),
- (ins TypeIndex:$type, i32imm:$flags, variable_ops),
- (outs), (ins TypeIndex:$type, i32imm:$flags),
- [],
- !strconcat(prefix, "call_indirect\t$dst"),
- !strconcat(prefix, "call_indirect\t$type"),
- 0x11>,
- Requires<[HasSIMD128]>;
-}
+let isCodeGenOnly = 1 in
+defm PCALL_INDIRECT_VOID :
+ I<(outs), (ins I32:$callee, variable_ops),
+ (outs), (ins I32:$callee),
+ [(WebAssemblycall0 I32:$callee)],
+ "PSEUDO CALL INDIRECT\t$callee",
+ "PSEUDO CALL INDIRECT\t$callee">;
-let Uses = [SP32, SP64], isCall = 1 in {
- defm "" : CALL<I32, "i32.">;
- defm "" : CALL<I64, "i64.">;
- defm "" : CALL<F32, "f32.">;
- defm "" : CALL<F64, "f64.">;
- defm "" : CALL<EXCEPT_REF, "except_ref.">;
- defm "" : SIMD_CALL<v16i8, "v128.">;
- defm "" : SIMD_CALL<v8i16, "v128.">;
- defm "" : SIMD_CALL<v4i32, "v128.">;
- defm "" : SIMD_CALL<v2i64, "v128.">;
- defm "" : SIMD_CALL<v4f32, "v128.">;
- defm "" : SIMD_CALL<v2f64, "v128.">;
+defm CALL_INDIRECT_VOID :
+ I<(outs), (ins TypeIndex:$type, i32imm:$flags, variable_ops),
+ (outs), (ins TypeIndex:$type, i32imm:$flags),
+ [],
+ "call_indirect\t", "call_indirect\t$type",
+ 0x11>;
- defm CALL_VOID : I<(outs), (ins function32_op:$callee, variable_ops),
- (outs), (ins function32_op:$callee),
- [(WebAssemblycall0 (i32 imm:$callee))],
- "call \t$callee", "call\t$callee", 0x10>;
+let isReturn = 1 in
+defm RET_CALL_INDIRECT :
+ I<(outs), (ins TypeIndex:$type, i32imm:$flags, variable_ops),
+ (outs), (ins TypeIndex:$type, i32imm:$flags),
+ [],
+ "return_call_indirect\t", "return_call_indirect\t$type",
+ 0x13>,
+ Requires<[HasTailCall]>;
- let isCodeGenOnly = 1 in {
- defm PCALL_INDIRECT_VOID : I<(outs), (ins I32:$callee, variable_ops),
- (outs), (ins I32:$callee),
- [(WebAssemblycall0 I32:$callee)],
- "PSEUDO CALL INDIRECT\t$callee",
- "PSEUDO CALL INDIRECT\t$callee">;
- } // isCodeGenOnly = 1
+let isCodeGenOnly = 1, isReturn = 1 in
+defm PRET_CALL_INDIRECT:
+ I<(outs), (ins I32:$callee, variable_ops),
+ (outs), (ins I32:$callee),
+ [(WebAssemblyretcall I32:$callee)],
+ "PSEUDO RET_CALL INDIRECT\t$callee",
+ "PSEUDO RET_CALL INDIRECT\t$callee">,
+ Requires<[HasTailCall]>;
- defm CALL_INDIRECT_VOID : I<(outs),
- (ins TypeIndex:$type, i32imm:$flags,
- variable_ops),
- (outs), (ins TypeIndex:$type, i32imm:$flags),
- [],
- "call_indirect\t", "call_indirect\t$type",
- 0x11>;
+} // IsCanonical = 1
} // Uses = [SP32,SP64], isCall = 1
// Patterns for matching a direct call to a global address.
def : Pat<(i32 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
- (CALL_I32 tglobaladdr:$callee)>;
+ (CALL_i32 tglobaladdr:$callee)>;
def : Pat<(i64 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
- (CALL_I64 tglobaladdr:$callee)>;
+ (CALL_i64 tglobaladdr:$callee)>;
def : Pat<(f32 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
- (CALL_F32 tglobaladdr:$callee)>;
+ (CALL_f32 tglobaladdr:$callee)>;
def : Pat<(f64 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
- (CALL_F64 tglobaladdr:$callee)>;
+ (CALL_f64 tglobaladdr:$callee)>;
def : Pat<(v16i8 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
(CALL_v16i8 tglobaladdr:$callee)>, Requires<[HasSIMD128]>;
def : Pat<(v8i16 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
@@ -137,21 +139,23 @@ def : Pat<(v4f32 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
(CALL_v4f32 tglobaladdr:$callee)>, Requires<[HasSIMD128]>;
def : Pat<(v2f64 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
(CALL_v2f64 tglobaladdr:$callee)>, Requires<[HasSIMD128]>;
-def : Pat<(ExceptRef
- (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
- (CALL_EXCEPT_REF tglobaladdr:$callee)>;
+def : Pat<(exnref (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
+ (CALL_exnref tglobaladdr:$callee)>,
+ Requires<[HasExceptionHandling]>;
def : Pat<(WebAssemblycall0 (WebAssemblywrapper tglobaladdr:$callee)),
(CALL_VOID tglobaladdr:$callee)>;
+def : Pat<(WebAssemblyretcall (WebAssemblywrapper tglobaladdr:$callee)),
+ (RET_CALL tglobaladdr:$callee)>, Requires<[HasTailCall]>;
// Patterns for matching a direct call to an external symbol.
def : Pat<(i32 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
- (CALL_I32 texternalsym:$callee)>;
+ (CALL_i32 texternalsym:$callee)>;
def : Pat<(i64 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
- (CALL_I64 texternalsym:$callee)>;
+ (CALL_i64 texternalsym:$callee)>;
def : Pat<(f32 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
- (CALL_F32 texternalsym:$callee)>;
+ (CALL_f32 texternalsym:$callee)>;
def : Pat<(f64 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
- (CALL_F64 texternalsym:$callee)>;
+ (CALL_f64 texternalsym:$callee)>;
def : Pat<(v16i8 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
(CALL_v16i8 texternalsym:$callee)>, Requires<[HasSIMD128]>;
def : Pat<(v8i16 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
@@ -164,8 +168,10 @@ def : Pat<(v4f32 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
(CALL_v4f32 texternalsym:$callee)>, Requires<[HasSIMD128]>;
def : Pat<(v2f64 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
(CALL_v2f64 texternalsym:$callee)>, Requires<[HasSIMD128]>;
-def : Pat<(ExceptRef
- (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
- (CALL_EXCEPT_REF texternalsym:$callee)>;
+def : Pat<(exnref (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
+ (CALL_exnref texternalsym:$callee)>,
+ Requires<[HasExceptionHandling]>;
def : Pat<(WebAssemblycall0 (WebAssemblywrapper texternalsym:$callee)),
(CALL_VOID texternalsym:$callee)>;
+def : Pat<(WebAssemblyretcall (WebAssemblywrapper texternalsym:$callee)),
+ (RET_CALL texternalsym:$callee)>, Requires<[HasTailCall]>;
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrControl.td b/lib/Target/WebAssembly/WebAssemblyInstrControl.td
index 7eb6cbf4d249..1870c5bc34b0 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrControl.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrControl.td
@@ -1,9 +1,8 @@
//===- WebAssemblyInstrControl.td-WebAssembly control-flow ------*- tablegen -*-
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -21,11 +20,10 @@ defm BR_IF : I<(outs), (ins bb_op:$dst, I32:$cond),
let isCodeGenOnly = 1 in
defm BR_UNLESS : I<(outs), (ins bb_op:$dst, I32:$cond),
(outs), (ins bb_op:$dst), []>;
-let isBarrier = 1 in {
+let isBarrier = 1 in
defm BR : NRI<(outs), (ins bb_op:$dst),
[(br bb:$dst)],
"br \t$dst", 0x0c>;
-} // isBarrier = 1
} // isBranch = 1, isTerminator = 1, hasCtrlDep = 1
def : Pat<(brcond (i32 (setne I32:$cond, 0)), bb:$dst),
@@ -36,14 +34,11 @@ def : Pat<(brcond (i32 (seteq I32:$cond, 0)), bb:$dst),
// A list of branch targets enclosed in {} and separated by comma.
// Used by br_table only.
def BrListAsmOperand : AsmOperandClass { let Name = "BrList"; }
-let OperandNamespace = "WebAssembly" in {
-let OperandType = "OPERAND_BRLIST" in {
+let OperandNamespace = "WebAssembly", OperandType = "OPERAND_BRLIST" in
def brlist : Operand<i32> {
let ParserMatchClass = BrListAsmOperand;
let PrintMethod = "printBrList";
}
-} // OPERAND_BRLIST
-} // OperandNamespace = "WebAssembly"
// TODO: SelectionDAG's lowering insists on using a pointer as the index for
// jump tables, so in practice we don't ever use BR_TABLE_I64 in wasm32 mode
@@ -82,6 +77,9 @@ defm ELSE : NRI<(outs), (ins), [], "else", 0x05>;
defm END_BLOCK : NRI<(outs), (ins), [], "end_block", 0x0b>;
defm END_LOOP : NRI<(outs), (ins), [], "end_loop", 0x0b>;
defm END_IF : NRI<(outs), (ins), [], "end_if", 0x0b>;
+// Generic instruction, for disassembler.
+let IsCanonical = 1 in
+defm END : NRI<(outs), (ins), [], "end", 0x0b>;
let isTerminator = 1, isBarrier = 1 in
defm END_FUNCTION : NRI<(outs), (ins), [], "end_function", 0x0b>;
} // Uses = [VALUE_STACK], Defs = [VALUE_STACK]
@@ -106,7 +104,7 @@ multiclass SIMD_RETURN<ValueType vt> {
let isCodeGenOnly = 1 in
defm FALLTHROUGH_RETURN_#vt : I<(outs), (ins V128:$val), (outs), (ins),
[]>,
- Requires<[HasSIMD128]>;
+ Requires<[HasSIMD128]>;
}
let isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in {
@@ -116,7 +114,7 @@ let isReturn = 1 in {
defm "": RETURN<I64>;
defm "": RETURN<F32>;
defm "": RETURN<F64>;
- defm "": RETURN<EXCEPT_REF>;
+ defm "": RETURN<EXNREF>;
defm "": SIMD_RETURN<v16i8>;
defm "": SIMD_RETURN<v8i16>;
defm "": SIMD_RETURN<v4i32>;
@@ -142,23 +140,17 @@ let Predicates = [HasExceptionHandling] in {
// Throwing an exception: throw / rethrow
let isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in {
-defm THROW_I32 : I<(outs), (ins event_op:$tag, I32:$val),
- (outs), (ins event_op:$tag),
- [(WebAssemblythrow (WebAssemblywrapper texternalsym:$tag),
- I32:$val)],
- "throw \t$tag, $val", "throw \t$tag",
- 0x08>;
-defm THROW_I64 : I<(outs), (ins event_op:$tag, I64:$val),
- (outs), (ins event_op:$tag),
- [(WebAssemblythrow (WebAssemblywrapper texternalsym:$tag),
- I64:$val)],
- "throw \t$tag, $val", "throw \t$tag",
- 0x08>;
-defm RETHROW : NRI<(outs), (ins bb_op:$dst), [], "rethrow \t$dst", 0x09>;
-let isCodeGenOnly = 1 in
-// This is used when the destination for rethrow is the caller function. This
-// will be converted to a rethrow in CFGStackify.
-defm RETHROW_TO_CALLER : NRI<(outs), (ins), [], "rethrow">;
+defm THROW : I<(outs), (ins event_op:$tag, variable_ops),
+ (outs), (ins event_op:$tag),
+ [(WebAssemblythrow (WebAssemblywrapper texternalsym:$tag))],
+ "throw \t$tag", "throw \t$tag", 0x08>;
+defm RETHROW : I<(outs), (ins EXNREF:$exn), (outs), (ins), [],
+ "rethrow \t$exn", "rethrow", 0x09>;
+// Pseudo instruction to be the lowering target of int_wasm_rethrow_in_catch
+// intrinsic. Will be converted to the real rethrow instruction later.
+let isPseudo = 1 in
+defm RETHROW_IN_CATCH : NRI<(outs), (ins), [(int_wasm_rethrow_in_catch)],
+ "rethrow_in_catch", 0>;
} // isTerminator = 1, hasCtrlDep = 1, isBarrier = 1
// Region within which an exception is caught: try / end_try
@@ -167,24 +159,33 @@ defm TRY : NRI<(outs), (ins Signature:$sig), [], "try \t$sig", 0x06>;
defm END_TRY : NRI<(outs), (ins), [], "end_try", 0x0b>;
} // Uses = [VALUE_STACK], Defs = [VALUE_STACK]
-// Catching an exception: catch / catch_all
-let hasCtrlDep = 1, hasSideEffects = 1 in {
-defm CATCH_I32 : I<(outs I32:$dst), (ins i32imm:$tag),
- (outs), (ins i32imm:$tag),
- [(set I32:$dst, (int_wasm_catch imm:$tag))],
- "i32.catch \t$dst, $tag", "i32.catch \t$tag", 0x07>;
-defm CATCH_I64 : I<(outs I64:$dst), (ins i32imm:$tag),
- (outs), (ins i32imm:$tag),
- [(set I64:$dst, (int_wasm_catch imm:$tag))],
- "i64.catch \t$dst, $tag", "i64.catch \t$tag", 0x07>;
-defm CATCH_ALL : NRI<(outs), (ins), [], "catch_all", 0x05>;
-}
+// Catching an exception: catch / extract_exception
+let hasCtrlDep = 1, hasSideEffects = 1 in
+defm CATCH : I<(outs EXNREF:$dst), (ins), (outs), (ins), [],
+ "catch \t$dst", "catch", 0x07>;
+
+// Querying / extracing exception: br_on_exn
+// br_on_exn queries an exnref to see if it matches the corresponding exception
+// tag index. If true it branches to the given label and pushes the
+// corresponding argument values of the exception onto the stack.
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in
+defm BR_ON_EXN : I<(outs), (ins bb_op:$dst, event_op:$tag, EXNREF:$exn),
+ (outs), (ins bb_op:$dst, event_op:$tag), [],
+ "br_on_exn \t$dst, $tag, $exn", "br_on_exn \t$dst, $tag",
+ 0x0a>;
+// This is a pseudo instruction that simulates popping a value from stack, which
+// has been pushed by br_on_exn
+let isCodeGenOnly = 1, hasSideEffects = 1 in
+defm EXTRACT_EXCEPTION_I32 : NRI<(outs I32:$dst), (ins),
+ [(set I32:$dst, (int_wasm_extract_exception))],
+ "extract_exception\t$dst">;
// Pseudo instructions: cleanupret / catchret
let isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1,
- isCodeGenOnly = 1, isEHScopeReturn = 1 in {
- defm CLEANUPRET : NRI<(outs), (ins), [(cleanupret)], "", 0>;
+ isPseudo = 1, isEHScopeReturn = 1 in {
+ defm CLEANUPRET : NRI<(outs), (ins), [(cleanupret)], "cleanupret", 0>;
defm CATCHRET : NRI<(outs), (ins bb_op:$dst, bb_op:$from),
- [(catchret bb:$dst, bb:$from)], "", 0>;
-}
-}
+ [(catchret bb:$dst, bb:$from)], "catchret", 0>;
+} // isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1,
+ // isPseudo = 1, isEHScopeReturn = 1
+} // Predicates = [HasExceptionHandling]
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrConv.td b/lib/Target/WebAssembly/WebAssemblyInstrConv.td
index e128656a142c..661fee2715ba 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrConv.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrConv.td
@@ -1,9 +1,8 @@
//===-- WebAssemblyInstrConv.td-WebAssembly Conversion support -*- tablegen -*-=
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrExceptRef.td b/lib/Target/WebAssembly/WebAssemblyInstrExceptRef.td
deleted file mode 100644
index a251d60b89ee..000000000000
--- a/lib/Target/WebAssembly/WebAssemblyInstrExceptRef.td
+++ /dev/null
@@ -1,27 +0,0 @@
-// WebAssemblyInstrExceptRef.td-WebAssembly except_ref codegen --*- tablegen -*-
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// WebAssembly except_ref operand code-gen constructs.
-///
-//===----------------------------------------------------------------------===//
-
-defm SELECT_EXCEPT_REF : I<(outs EXCEPT_REF:$dst),
- (ins EXCEPT_REF:$lhs, EXCEPT_REF:$rhs, I32:$cond),
- (outs), (ins),
- [(set EXCEPT_REF:$dst,
- (select I32:$cond, EXCEPT_REF:$lhs,
- EXCEPT_REF:$rhs))],
- "except_ref.select\t$dst, $lhs, $rhs, $cond",
- "except_ref.select", 0x1b>;
-
-def : Pat<(select (i32 (setne I32:$cond, 0)), EXCEPT_REF:$lhs, EXCEPT_REF:$rhs),
- (SELECT_EXCEPT_REF EXCEPT_REF:$lhs, EXCEPT_REF:$rhs, I32:$cond)>;
-def : Pat<(select (i32 (seteq I32:$cond, 0)), EXCEPT_REF:$lhs, EXCEPT_REF:$rhs),
- (SELECT_EXCEPT_REF EXCEPT_REF:$rhs, EXCEPT_REF:$lhs, I32:$cond)>;
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrFloat.td b/lib/Target/WebAssembly/WebAssemblyInstrFloat.td
index c5290f00b431..5c9b34f44734 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrFloat.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrFloat.td
@@ -1,9 +1,8 @@
// WebAssemblyInstrFloat.td-WebAssembly Float codegen support ---*- tablegen -*-
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrFormats.td b/lib/Target/WebAssembly/WebAssemblyInstrFormats.td
index 15a9714a55a1..aff4d20d8d82 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrFormats.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrFormats.td
@@ -1,9 +1,8 @@
//=- WebAssemblyInstrFormats.td - WebAssembly Instr. Formats -*- tablegen -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -23,6 +22,9 @@ class WebAssemblyInst<bits<32> inst, string asmstr, string stack> : StackRel,
let Namespace = "WebAssembly";
let Pattern = [];
let AsmString = asmstr;
+ // When there are multiple instructions that map to the same encoding (in
+ // e.g. the disassembler use case) prefer the one where IsCanonical == 1.
+ bit IsCanonical = 0;
}
// Normal instructions. Default instantiation of a WebAssemblyInst.
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp b/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
index 5efff32d6167..a86c9af28f0d 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
@@ -1,9 +1,8 @@
//===-- WebAssemblyInstrInfo.cpp - WebAssembly Instruction Information ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -28,6 +27,10 @@ using namespace llvm;
#define GET_INSTRINFO_CTOR_DTOR
#include "WebAssemblyGenInstrInfo.inc"
+// defines WebAssembly::getNamedOperandIdx
+#define GET_INSTRINFO_NAMED_OPS
+#include "WebAssemblyGenInstrInfo.inc"
+
WebAssemblyInstrInfo::WebAssemblyInstrInfo(const WebAssemblySubtarget &STI)
: WebAssemblyGenInstrInfo(WebAssembly::ADJCALLSTACKDOWN,
WebAssembly::ADJCALLSTACKUP,
@@ -72,6 +75,8 @@ void WebAssemblyInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
CopyOpcode = WebAssembly::COPY_F64;
else if (RC == &WebAssembly::V128RegClass)
CopyOpcode = WebAssembly::COPY_V128;
+ else if (RC == &WebAssembly::EXNREFRegClass)
+ CopyOpcode = WebAssembly::COPY_EXNREF;
else
llvm_unreachable("Unexpected register class");
@@ -98,6 +103,13 @@ bool WebAssemblyInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
bool /*AllowModify*/) const {
+ const auto &MFI = *MBB.getParent()->getInfo<WebAssemblyFunctionInfo>();
+ // WebAssembly has control flow that doesn't have explicit branches or direct
+ // fallthrough (e.g. try/catch), which can't be modeled by analyzeBranch. It
+ // is created after CFGStackify.
+ if (MFI.isCFGStackified())
+ return true;
+
bool HaveCond = false;
for (MachineInstr &MI : MBB.terminators()) {
switch (MI.getOpcode()) {
@@ -107,9 +119,6 @@ bool WebAssemblyInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
case WebAssembly::BR_IF:
if (HaveCond)
return true;
- // If we're running after CFGStackify, we can't optimize further.
- if (!MI.getOperand(0).isMBB())
- return true;
Cond.push_back(MachineOperand::CreateImm(true));
Cond.push_back(MI.getOperand(1));
TBB = MI.getOperand(0).getMBB();
@@ -118,23 +127,25 @@ bool WebAssemblyInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
case WebAssembly::BR_UNLESS:
if (HaveCond)
return true;
- // If we're running after CFGStackify, we can't optimize further.
- if (!MI.getOperand(0).isMBB())
- return true;
Cond.push_back(MachineOperand::CreateImm(false));
Cond.push_back(MI.getOperand(1));
TBB = MI.getOperand(0).getMBB();
HaveCond = true;
break;
case WebAssembly::BR:
- // If we're running after CFGStackify, we can't optimize further.
- if (!MI.getOperand(0).isMBB())
- return true;
if (!HaveCond)
TBB = MI.getOperand(0).getMBB();
else
FBB = MI.getOperand(0).getMBB();
break;
+ case WebAssembly::BR_ON_EXN:
+ if (HaveCond)
+ return true;
+ Cond.push_back(MachineOperand::CreateImm(true));
+ Cond.push_back(MI.getOperand(2));
+ TBB = MI.getOperand(0).getMBB();
+ HaveCond = true;
+ break;
}
if (MI.isBarrier())
break;
@@ -180,9 +191,22 @@ unsigned WebAssemblyInstrInfo::insertBranch(
assert(Cond.size() == 2 && "Expected a flag and a successor block");
+ MachineFunction &MF = *MBB.getParent();
+ auto &MRI = MF.getRegInfo();
+ bool IsBrOnExn = Cond[1].isReg() && MRI.getRegClass(Cond[1].getReg()) ==
+ &WebAssembly::EXNREFRegClass;
+
if (Cond[0].getImm()) {
- BuildMI(&MBB, DL, get(WebAssembly::BR_IF)).addMBB(TBB).add(Cond[1]);
+ if (IsBrOnExn) {
+ const char *CPPExnSymbol = MF.createExternalSymbolName("__cpp_exception");
+ BuildMI(&MBB, DL, get(WebAssembly::BR_ON_EXN))
+ .addMBB(TBB)
+ .addExternalSymbol(CPPExnSymbol)
+ .add(Cond[1]);
+ } else
+ BuildMI(&MBB, DL, get(WebAssembly::BR_IF)).addMBB(TBB).add(Cond[1]);
} else {
+ assert(!IsBrOnExn && "br_on_exn does not have a reversed condition");
BuildMI(&MBB, DL, get(WebAssembly::BR_UNLESS)).addMBB(TBB).add(Cond[1]);
}
if (!FBB)
@@ -194,7 +218,15 @@ unsigned WebAssemblyInstrInfo::insertBranch(
bool WebAssemblyInstrInfo::reverseBranchCondition(
SmallVectorImpl<MachineOperand> &Cond) const {
- assert(Cond.size() == 2 && "Expected a flag and a successor block");
+ assert(Cond.size() == 2 && "Expected a flag and a condition expression");
+
+ // br_on_exn's condition cannot be reversed
+ MachineFunction &MF = *Cond[1].getParent()->getParent()->getParent();
+ auto &MRI = MF.getRegInfo();
+ if (Cond[1].isReg() &&
+ MRI.getRegClass(Cond[1].getReg()) == &WebAssembly::EXNREFRegClass)
+ return true;
+
Cond.front() = MachineOperand::CreateImm(!Cond.front().getImm());
return false;
}
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrInfo.h b/lib/Target/WebAssembly/WebAssemblyInstrInfo.h
index 4a3763c345b0..df1051b4f42c 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrInfo.h
+++ b/lib/Target/WebAssembly/WebAssemblyInstrInfo.h
@@ -1,9 +1,8 @@
//=- WebAssemblyInstrInfo.h - WebAssembly Instruction Information -*- C++ -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -22,8 +21,17 @@
#define GET_INSTRINFO_HEADER
#include "WebAssemblyGenInstrInfo.inc"
+#define GET_INSTRINFO_OPERAND_ENUM
+#include "WebAssemblyGenInstrInfo.inc"
+
namespace llvm {
+namespace WebAssembly {
+
+int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIndex);
+
+}
+
class WebAssemblySubtarget;
class WebAssemblyInstrInfo final : public WebAssemblyGenInstrInfo {
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrInfo.td b/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
index e3d795f2aab1..73ddbe85d551 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
@@ -1,9 +1,8 @@
// WebAssemblyInstrInfo.td-Describe the WebAssembly Instructions-*- tablegen -*-
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -16,41 +15,52 @@
// WebAssembly Instruction Predicate Definitions.
//===----------------------------------------------------------------------===//
+def IsPIC : Predicate<"TM.isPositionIndependent()">;
+def IsNotPIC : Predicate<"!TM.isPositionIndependent()">;
+
def HasAddr32 : Predicate<"!Subtarget->hasAddr64()">;
+
def HasAddr64 : Predicate<"Subtarget->hasAddr64()">;
-def HasSIMD128 : Predicate<"Subtarget->hasSIMD128()">,
- AssemblerPredicate<"FeatureSIMD128", "simd128">;
+
+def HasSIMD128 :
+ Predicate<"Subtarget->hasSIMD128()">,
+ AssemblerPredicate<"FeatureSIMD128", "simd128">;
+
def HasUnimplementedSIMD128 :
Predicate<"Subtarget->hasUnimplementedSIMD128()">,
AssemblerPredicate<"FeatureUnimplementedSIMD128", "unimplemented-simd128">;
-def HasAtomics : Predicate<"Subtarget->hasAtomics()">,
- AssemblerPredicate<"FeatureAtomics", "atomics">;
+
+def HasAtomics :
+ Predicate<"Subtarget->hasAtomics()">,
+ AssemblerPredicate<"FeatureAtomics", "atomics">;
+
+def HasMultivalue :
+ Predicate<"Subtarget->hasMultivalue()">,
+ AssemblerPredicate<"FeatureMultivalue", "multivalue">;
+
def HasNontrappingFPToInt :
Predicate<"Subtarget->hasNontrappingFPToInt()">,
- AssemblerPredicate<"FeatureNontrappingFPToInt",
- "nontrapping-fptoint">;
+ AssemblerPredicate<"FeatureNontrappingFPToInt", "nontrapping-fptoint">;
+
def NotHasNontrappingFPToInt :
Predicate<"!Subtarget->hasNontrappingFPToInt()">,
- AssemblerPredicate<"!FeatureNontrappingFPToInt",
- "nontrapping-fptoint">;
+ AssemblerPredicate<"!FeatureNontrappingFPToInt", "nontrapping-fptoint">;
+
def HasSignExt :
Predicate<"Subtarget->hasSignExt()">,
- AssemblerPredicate<"FeatureSignExt",
- "sign-ext">;
-def NotHasSignExt :
- Predicate<"!Subtarget->hasSignExt()">,
- AssemblerPredicate<"!FeatureSignExt",
- "sign-ext">;
+ AssemblerPredicate<"FeatureSignExt", "sign-ext">;
+
+def HasTailCall :
+ Predicate<"Subtarget->hasTailCall()">,
+ AssemblerPredicate<"FeatureTailCall", "tail-call">;
def HasExceptionHandling :
Predicate<"Subtarget->hasExceptionHandling()">,
- AssemblerPredicate<"FeatureExceptionHandling",
- "exception-handling">;
+ AssemblerPredicate<"FeatureExceptionHandling", "exception-handling">;
-def NotHasExceptionHandling :
- Predicate<"!Subtarget->hasExceptionHandling()">,
- AssemblerPredicate<"!FeatureExceptionHandling",
- "exception-handling">;
+def HasBulkMemory :
+ Predicate<"Subtarget->hasBulkMemory()">,
+ AssemblerPredicate<"FeatureBulkMemory", "bulk-memory">;
//===----------------------------------------------------------------------===//
// WebAssembly-specific DAG Node Types.
@@ -60,14 +70,16 @@ def SDT_WebAssemblyCallSeqStart : SDCallSeqStart<[SDTCisVT<0, iPTR>,
SDTCisVT<1, iPTR>]>;
def SDT_WebAssemblyCallSeqEnd :
SDCallSeqEnd<[SDTCisVT<0, iPTR>, SDTCisVT<1, iPTR>]>;
-def SDT_WebAssemblyCall0 : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>;
-def SDT_WebAssemblyCall1 : SDTypeProfile<1, -1, [SDTCisPtrTy<1>]>;
-def SDT_WebAssemblyBrTable : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>;
-def SDT_WebAssemblyArgument : SDTypeProfile<1, 1, [SDTCisVT<1, i32>]>;
-def SDT_WebAssemblyReturn : SDTypeProfile<0, -1, []>;
-def SDT_WebAssemblyWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
- SDTCisPtrTy<0>]>;
-def SDT_WebAssemblyThrow : SDTypeProfile<0, 2, [SDTCisPtrTy<0>]>;
+def SDT_WebAssemblyCall0 : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>;
+def SDT_WebAssemblyCall1 : SDTypeProfile<1, -1, [SDTCisPtrTy<1>]>;
+def SDT_WebAssemblyBrTable : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>;
+def SDT_WebAssemblyArgument : SDTypeProfile<1, 1, [SDTCisVT<1, i32>]>;
+def SDT_WebAssemblyReturn : SDTypeProfile<0, -1, []>;
+def SDT_WebAssemblyWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
+ SDTCisPtrTy<0>]>;
+def SDT_WebAssemblyWrapperPIC : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
+ SDTCisPtrTy<0>]>;
+def SDT_WebAssemblyThrow : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>;
//===----------------------------------------------------------------------===//
// WebAssembly-specific DAG Nodes.
@@ -85,6 +97,9 @@ def WebAssemblycall0 : SDNode<"WebAssemblyISD::CALL0",
def WebAssemblycall1 : SDNode<"WebAssemblyISD::CALL1",
SDT_WebAssemblyCall1,
[SDNPHasChain, SDNPVariadic]>;
+def WebAssemblyretcall : SDNode<"WebAssemblyISD::RET_CALL",
+ SDT_WebAssemblyCall0,
+ [SDNPHasChain, SDNPVariadic]>;
def WebAssemblybr_table : SDNode<"WebAssemblyISD::BR_TABLE",
SDT_WebAssemblyBrTable,
[SDNPHasChain, SDNPVariadic]>;
@@ -94,13 +109,26 @@ def WebAssemblyreturn : SDNode<"WebAssemblyISD::RETURN",
SDT_WebAssemblyReturn, [SDNPHasChain]>;
def WebAssemblywrapper : SDNode<"WebAssemblyISD::Wrapper",
SDT_WebAssemblyWrapper>;
+def WebAssemblywrapperPIC : SDNode<"WebAssemblyISD::WrapperPIC",
+ SDT_WebAssemblyWrapperPIC>;
def WebAssemblythrow : SDNode<"WebAssemblyISD::THROW", SDT_WebAssemblyThrow,
- [SDNPHasChain]>;
+ [SDNPHasChain, SDNPVariadic]>;
//===----------------------------------------------------------------------===//
// WebAssembly-specific Operands.
//===----------------------------------------------------------------------===//
+// Default Operand has AsmOperandClass "Imm" which is for integers (and
+// symbols), so specialize one for floats:
+def FPImmAsmOperand : AsmOperandClass {
+ let Name = "FPImm";
+ let PredicateMethod = "isFPImm";
+}
+
+class FPOperand<ValueType ty> : Operand<ty> {
+ AsmOperandClass ParserMatchClass = FPImmAsmOperand;
+}
+
let OperandNamespace = "WebAssembly" in {
let OperandType = "OPERAND_BASIC_BLOCK" in
@@ -119,10 +147,10 @@ let OperandType = "OPERAND_I64IMM" in
def i64imm_op : Operand<i64>;
let OperandType = "OPERAND_F32IMM" in
-def f32imm_op : Operand<f32>;
+def f32imm_op : FPOperand<f32>;
let OperandType = "OPERAND_F64IMM" in
-def f64imm_op : Operand<f64>;
+def f64imm_op : FPOperand<f64>;
let OperandType = "OPERAND_VEC_I8IMM" in
def vec_i8imm_op : Operand<i32>;
@@ -152,11 +180,10 @@ def event_op : Operand<i32>;
} // OperandType = "OPERAND_P2ALIGN"
-let OperandType = "OPERAND_SIGNATURE" in {
+let OperandType = "OPERAND_SIGNATURE" in
def Signature : Operand<i32> {
let PrintMethod = "printWebAssemblySignatureOperand";
}
-} // OperandType = "OPERAND_SIGNATURE"
let OperandType = "OPERAND_TYPEINDEX" in
def TypeIndex : Operand<i32>;
@@ -187,8 +214,8 @@ include "WebAssemblyInstrFormats.td"
//===----------------------------------------------------------------------===//
multiclass ARGUMENT<WebAssemblyRegClass reg, ValueType vt> {
- let hasSideEffects = 1, isCodeGenOnly = 1,
- Defs = []<Register>, Uses = [ARGUMENTS] in
+ let hasSideEffects = 1, isCodeGenOnly = 1, Defs = []<Register>,
+ Uses = [ARGUMENTS] in
defm ARGUMENT_#vt :
I<(outs reg:$res), (ins i32imm:$argno), (outs), (ins i32imm:$argno),
[(set (vt reg:$res), (WebAssemblyargument timm:$argno))]>;
@@ -197,12 +224,12 @@ defm "": ARGUMENT<I32, i32>;
defm "": ARGUMENT<I64, i64>;
defm "": ARGUMENT<F32, f32>;
defm "": ARGUMENT<F64, f64>;
-defm "": ARGUMENT<EXCEPT_REF, ExceptRef>;
+defm "": ARGUMENT<EXNREF, exnref>;
// local.get and local.set are not generated by instruction selection; they
// are implied by virtual register uses and defs.
multiclass LOCAL<WebAssemblyRegClass vt> {
-let hasSideEffects = 0 in {
+ let hasSideEffects = 0 in {
// COPY is not an actual instruction in wasm, but since we allow local.get and
// local.set to be implicit during most of codegen, we can have a COPY which
// is actually a no-op because all the work is done in the implied local.get
@@ -267,7 +294,7 @@ defm "" : LOCAL<I64>;
defm "" : LOCAL<F32>;
defm "" : LOCAL<F64>;
defm "" : LOCAL<V128>, Requires<[HasSIMD128]>;
-defm "" : LOCAL<EXCEPT_REF>, Requires<[HasExceptionHandling]>;
+defm "" : LOCAL<EXNREF>, Requires<[HasExceptionHandling]>;
let isMoveImm = 1, isAsCheapAsAMove = 1, isReMaterializable = 1 in {
defm CONST_I32 : I<(outs I32:$res), (ins i32imm_op:$imm),
@@ -289,9 +316,20 @@ defm CONST_F64 : I<(outs F64:$res), (ins f64imm_op:$imm),
} // isMoveImm = 1, isAsCheapAsAMove = 1, isReMaterializable = 1
def : Pat<(i32 (WebAssemblywrapper tglobaladdr:$addr)),
- (CONST_I32 tglobaladdr:$addr)>;
+ (CONST_I32 tglobaladdr:$addr)>, Requires<[IsNotPIC]>;
+
+def : Pat<(i32 (WebAssemblywrapper tglobaladdr:$addr)),
+ (GLOBAL_GET_I32 tglobaladdr:$addr)>, Requires<[IsPIC]>;
+
+def : Pat<(i32 (WebAssemblywrapperPIC tglobaladdr:$addr)),
+ (CONST_I32 tglobaladdr:$addr)>, Requires<[IsPIC]>;
+
def : Pat<(i32 (WebAssemblywrapper texternalsym:$addr)),
- (CONST_I32 texternalsym:$addr)>;
+ (GLOBAL_GET_I32 texternalsym:$addr)>, Requires<[IsPIC]>;
+
+def : Pat<(i32 (WebAssemblywrapper texternalsym:$addr)),
+ (CONST_I32 texternalsym:$addr)>, Requires<[IsNotPIC]>;
+
def : Pat<(i32 (WebAssemblywrapper mcsym:$sym)), (CONST_I32 mcsym:$sym)>;
def : Pat<(i64 (WebAssemblywrapper mcsym:$sym)), (CONST_I64 mcsym:$sym)>;
@@ -307,4 +345,5 @@ include "WebAssemblyInstrConv.td"
include "WebAssemblyInstrFloat.td"
include "WebAssemblyInstrAtomics.td"
include "WebAssemblyInstrSIMD.td"
-include "WebAssemblyInstrExceptRef.td"
+include "WebAssemblyInstrRef.td"
+include "WebAssemblyInstrBulkMemory.td"
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrInteger.td b/lib/Target/WebAssembly/WebAssemblyInstrInteger.td
index d5b63d643697..18250cf8ef85 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrInteger.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrInteger.td
@@ -1,9 +1,8 @@
// WebAssemblyInstrInteger.td-WebAssembly Integer codegen -------*- tablegen -*-
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -122,10 +121,3 @@ def : Pat<(select (i32 (seteq I32:$cond, 0)), I32:$lhs, I32:$rhs),
(SELECT_I32 I32:$rhs, I32:$lhs, I32:$cond)>;
def : Pat<(select (i32 (seteq I32:$cond, 0)), I64:$lhs, I64:$rhs),
(SELECT_I64 I64:$rhs, I64:$lhs, I32:$cond)>;
-
-// The legalizer inserts an unnecessary `and 1` to make input conform
-// to getBooleanContents, which we can lower away.
-def : Pat<(select (i32 (and I32:$cond, 1)), I32:$lhs, I32:$rhs),
- (SELECT_I32 I32:$lhs, I32:$rhs, I32:$cond)>;
-def : Pat<(select (i32 (and I32:$cond, 1)), I64:$lhs, I64:$rhs),
- (SELECT_I64 I64:$lhs, I64:$rhs, I32:$cond)>;
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrMemory.td b/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
index 518f81c61dc4..6916b165f970 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
@@ -1,9 +1,8 @@
// WebAssemblyInstrMemory.td-WebAssembly Memory codegen support -*- tablegen -*-
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -53,7 +52,7 @@ def regPlusGA : PatFrag<(ops node:$addr, node:$off),
// Defines atomic and non-atomic loads, regular and extending.
multiclass WebAssemblyLoad<WebAssemblyRegClass rc, string Name, int Opcode> {
- let mayLoad = 1 in
+ let mayLoad = 1, UseNamedOperandTable = 1 in
defm "": I<(outs rc:$dst),
(ins P2Align:$p2align, offset32_op:$off, I32:$addr),
(outs), (ins P2Align:$p2align, offset32_op:$off),
@@ -96,22 +95,13 @@ def : LoadPatImmOff<f64, load, or_is_add, LOAD_F64>;
class LoadPatGlobalAddr<ValueType ty, PatFrag kind, NI inst> :
Pat<(ty (kind (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)))),
- (inst 0, tglobaladdr:$off, I32:$addr)>;
+ (inst 0, tglobaladdr:$off, I32:$addr)>, Requires<[IsNotPIC]>;
def : LoadPatGlobalAddr<i32, load, LOAD_I32>;
def : LoadPatGlobalAddr<i64, load, LOAD_I64>;
def : LoadPatGlobalAddr<f32, load, LOAD_F32>;
def : LoadPatGlobalAddr<f64, load, LOAD_F64>;
-class LoadPatExternalSym<ValueType ty, PatFrag kind, NI inst> :
- Pat<(ty (kind (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))),
- (inst 0, texternalsym:$off, I32:$addr)>;
-def : LoadPatExternalSym<i32, load, LOAD_I32>;
-def : LoadPatExternalSym<i64, load, LOAD_I64>;
-def : LoadPatExternalSym<f32, load, LOAD_F32>;
-def : LoadPatExternalSym<f64, load, LOAD_F64>;
-
-
// Select loads with just a constant offset.
class LoadPatOffsetOnly<ValueType ty, PatFrag kind, NI inst> :
Pat<(ty (kind imm:$off)), (inst 0, imm:$off, (CONST_I32 0))>;
@@ -123,21 +113,13 @@ def : LoadPatOffsetOnly<f64, load, LOAD_F64>;
class LoadPatGlobalAddrOffOnly<ValueType ty, PatFrag kind, NI inst> :
Pat<(ty (kind (WebAssemblywrapper tglobaladdr:$off))),
- (inst 0, tglobaladdr:$off, (CONST_I32 0))>;
+ (inst 0, tglobaladdr:$off, (CONST_I32 0))>, Requires<[IsNotPIC]>;
def : LoadPatGlobalAddrOffOnly<i32, load, LOAD_I32>;
def : LoadPatGlobalAddrOffOnly<i64, load, LOAD_I64>;
def : LoadPatGlobalAddrOffOnly<f32, load, LOAD_F32>;
def : LoadPatGlobalAddrOffOnly<f64, load, LOAD_F64>;
-class LoadPatExternSymOffOnly<ValueType ty, PatFrag kind, NI inst> :
- Pat<(ty (kind (WebAssemblywrapper texternalsym:$off))),
- (inst 0, texternalsym:$off, (CONST_I32 0))>;
-def : LoadPatExternSymOffOnly<i32, load, LOAD_I32>;
-def : LoadPatExternSymOffOnly<i64, load, LOAD_I64>;
-def : LoadPatExternSymOffOnly<f32, load, LOAD_F32>;
-def : LoadPatExternSymOffOnly<f64, load, LOAD_F64>;
-
// Extending load.
defm LOAD8_S_I32 : WebAssemblyLoad<I32, "i32.load8_s", 0x2c>;
defm LOAD8_U_I32 : WebAssemblyLoad<I32, "i32.load8_u", 0x2d>;
@@ -197,18 +179,6 @@ def : LoadPatGlobalAddr<i64, zextloadi16, LOAD16_U_I64>;
def : LoadPatGlobalAddr<i64, sextloadi32, LOAD32_S_I64>;
def : LoadPatGlobalAddr<i64, zextloadi32, LOAD32_U_I64>;
-def : LoadPatExternalSym<i32, sextloadi8, LOAD8_S_I32>;
-def : LoadPatExternalSym<i32, zextloadi8, LOAD8_U_I32>;
-def : LoadPatExternalSym<i32, sextloadi16, LOAD16_S_I32>;
-def : LoadPatExternalSym<i32, zextloadi16, LOAD16_U_I32>;
-def : LoadPatExternalSym<i64, sextloadi8, LOAD8_S_I64>;
-def : LoadPatExternalSym<i64, zextloadi8, LOAD8_U_I64>;
-def : LoadPatExternalSym<i64, sextloadi16, LOAD16_S_I64>;
-def : LoadPatExternalSym<i64, zextloadi16, LOAD16_U_I64>;
-def : LoadPatExternalSym<i64, sextloadi32, LOAD32_S_I64>;
-def : LoadPatExternalSym<i64, zextloadi32, LOAD32_U_I64>;
-
-
// Select extending loads with just a constant offset.
def : LoadPatOffsetOnly<i32, sextloadi8, LOAD8_S_I32>;
def : LoadPatOffsetOnly<i32, zextloadi8, LOAD8_U_I32>;
@@ -233,17 +203,6 @@ def : LoadPatGlobalAddrOffOnly<i64, zextloadi16, LOAD16_U_I64>;
def : LoadPatGlobalAddrOffOnly<i64, sextloadi32, LOAD32_S_I64>;
def : LoadPatGlobalAddrOffOnly<i64, zextloadi32, LOAD32_U_I64>;
-def : LoadPatExternSymOffOnly<i32, sextloadi8, LOAD8_S_I32>;
-def : LoadPatExternSymOffOnly<i32, zextloadi8, LOAD8_U_I32>;
-def : LoadPatExternSymOffOnly<i32, sextloadi16, LOAD16_S_I32>;
-def : LoadPatExternSymOffOnly<i32, zextloadi16, LOAD16_U_I32>;
-def : LoadPatExternSymOffOnly<i64, sextloadi8, LOAD8_S_I64>;
-def : LoadPatExternSymOffOnly<i64, zextloadi8, LOAD8_U_I64>;
-def : LoadPatExternSymOffOnly<i64, sextloadi16, LOAD16_S_I64>;
-def : LoadPatExternSymOffOnly<i64, zextloadi16, LOAD16_U_I64>;
-def : LoadPatExternSymOffOnly<i64, sextloadi32, LOAD32_S_I64>;
-def : LoadPatExternSymOffOnly<i64, zextloadi32, LOAD32_U_I64>;
-
// Resolve "don't care" extending loads to zero-extending loads. This is
// somewhat arbitrary, but zero-extending is conceptually simpler.
@@ -270,11 +229,6 @@ def : LoadPatGlobalAddr<i32, extloadi16, LOAD16_U_I32>;
def : LoadPatGlobalAddr<i64, extloadi8, LOAD8_U_I64>;
def : LoadPatGlobalAddr<i64, extloadi16, LOAD16_U_I64>;
def : LoadPatGlobalAddr<i64, extloadi32, LOAD32_U_I64>;
-def : LoadPatExternalSym<i32, extloadi8, LOAD8_U_I32>;
-def : LoadPatExternalSym<i32, extloadi16, LOAD16_U_I32>;
-def : LoadPatExternalSym<i64, extloadi8, LOAD8_U_I64>;
-def : LoadPatExternalSym<i64, extloadi16, LOAD16_U_I64>;
-def : LoadPatExternalSym<i64, extloadi32, LOAD32_U_I64>;
// Select "don't care" extending loads with just a constant offset.
def : LoadPatOffsetOnly<i32, extloadi8, LOAD8_U_I32>;
@@ -287,15 +241,10 @@ def : LoadPatGlobalAddrOffOnly<i32, extloadi16, LOAD16_U_I32>;
def : LoadPatGlobalAddrOffOnly<i64, extloadi8, LOAD8_U_I64>;
def : LoadPatGlobalAddrOffOnly<i64, extloadi16, LOAD16_U_I64>;
def : LoadPatGlobalAddrOffOnly<i64, extloadi32, LOAD32_U_I64>;
-def : LoadPatExternSymOffOnly<i32, extloadi8, LOAD8_U_I32>;
-def : LoadPatExternSymOffOnly<i32, extloadi16, LOAD16_U_I32>;
-def : LoadPatExternSymOffOnly<i64, extloadi8, LOAD8_U_I64>;
-def : LoadPatExternSymOffOnly<i64, extloadi16, LOAD16_U_I64>;
-def : LoadPatExternSymOffOnly<i64, extloadi32, LOAD32_U_I64>;
// Defines atomic and non-atomic stores, regular and truncating
multiclass WebAssemblyStore<WebAssemblyRegClass rc, string Name, int Opcode> {
- let mayStore = 1 in
+ let mayStore = 1, UseNamedOperandTable = 1 in
defm "" : I<(outs),
(ins P2Align:$p2align, offset32_op:$off, I32:$addr, rc:$val),
(outs),
@@ -336,20 +285,12 @@ def : StorePatImmOff<f64, store, or_is_add, STORE_F64>;
class StorePatGlobalAddr<ValueType ty, PatFrag kind, NI inst> :
Pat<(kind ty:$val,
(regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off))),
- (inst 0, tglobaladdr:$off, I32:$addr, ty:$val)>;
+ (inst 0, tglobaladdr:$off, I32:$addr, ty:$val)>, Requires<[IsNotPIC]>;
def : StorePatGlobalAddr<i32, store, STORE_I32>;
def : StorePatGlobalAddr<i64, store, STORE_I64>;
def : StorePatGlobalAddr<f32, store, STORE_F32>;
def : StorePatGlobalAddr<f64, store, STORE_F64>;
-class StorePatExternalSym<ValueType ty, PatFrag kind, NI inst> :
- Pat<(kind ty:$val, (add I32:$addr, (WebAssemblywrapper texternalsym:$off))),
- (inst 0, texternalsym:$off, I32:$addr, ty:$val)>;
-def : StorePatExternalSym<i32, store, STORE_I32>;
-def : StorePatExternalSym<i64, store, STORE_I64>;
-def : StorePatExternalSym<f32, store, STORE_F32>;
-def : StorePatExternalSym<f64, store, STORE_F64>;
-
// Select stores with just a constant offset.
class StorePatOffsetOnly<ValueType ty, PatFrag kind, NI inst> :
Pat<(kind ty:$val, imm:$off), (inst 0, imm:$off, (CONST_I32 0), ty:$val)>;
@@ -360,20 +301,12 @@ def : StorePatOffsetOnly<f64, store, STORE_F64>;
class StorePatGlobalAddrOffOnly<ValueType ty, PatFrag kind, NI inst> :
Pat<(kind ty:$val, (WebAssemblywrapper tglobaladdr:$off)),
- (inst 0, tglobaladdr:$off, (CONST_I32 0), ty:$val)>;
+ (inst 0, tglobaladdr:$off, (CONST_I32 0), ty:$val)>, Requires<[IsNotPIC]>;
def : StorePatGlobalAddrOffOnly<i32, store, STORE_I32>;
def : StorePatGlobalAddrOffOnly<i64, store, STORE_I64>;
def : StorePatGlobalAddrOffOnly<f32, store, STORE_F32>;
def : StorePatGlobalAddrOffOnly<f64, store, STORE_F64>;
-class StorePatExternSymOffOnly<ValueType ty, PatFrag kind, NI inst> :
- Pat<(kind ty:$val, (WebAssemblywrapper texternalsym:$off)),
- (inst 0, texternalsym:$off, (CONST_I32 0), ty:$val)>;
-def : StorePatExternSymOffOnly<i32, store, STORE_I32>;
-def : StorePatExternSymOffOnly<i64, store, STORE_I64>;
-def : StorePatExternSymOffOnly<f32, store, STORE_F32>;
-def : StorePatExternSymOffOnly<f64, store, STORE_F64>;
-
// Truncating store.
defm STORE8_I32 : WebAssemblyStore<I32, "i32.store8", 0x3a>;
defm STORE16_I32 : WebAssemblyStore<I32, "i32.store16", 0x3b>;
@@ -405,11 +338,6 @@ def : StorePatGlobalAddr<i32, truncstorei16, STORE16_I32>;
def : StorePatGlobalAddr<i64, truncstorei8, STORE8_I64>;
def : StorePatGlobalAddr<i64, truncstorei16, STORE16_I64>;
def : StorePatGlobalAddr<i64, truncstorei32, STORE32_I64>;
-def : StorePatExternalSym<i32, truncstorei8, STORE8_I32>;
-def : StorePatExternalSym<i32, truncstorei16, STORE16_I32>;
-def : StorePatExternalSym<i64, truncstorei8, STORE8_I64>;
-def : StorePatExternalSym<i64, truncstorei16, STORE16_I64>;
-def : StorePatExternalSym<i64, truncstorei32, STORE32_I64>;
// Select truncating stores with just a constant offset.
def : StorePatOffsetOnly<i32, truncstorei8, STORE8_I32>;
@@ -422,11 +350,6 @@ def : StorePatGlobalAddrOffOnly<i32, truncstorei16, STORE16_I32>;
def : StorePatGlobalAddrOffOnly<i64, truncstorei8, STORE8_I64>;
def : StorePatGlobalAddrOffOnly<i64, truncstorei16, STORE16_I64>;
def : StorePatGlobalAddrOffOnly<i64, truncstorei32, STORE32_I64>;
-def : StorePatExternSymOffOnly<i32, truncstorei8, STORE8_I32>;
-def : StorePatExternSymOffOnly<i32, truncstorei16, STORE16_I32>;
-def : StorePatExternSymOffOnly<i64, truncstorei8, STORE8_I64>;
-def : StorePatExternSymOffOnly<i64, truncstorei16, STORE16_I64>;
-def : StorePatExternSymOffOnly<i64, truncstorei32, STORE32_I64>;
// Current memory size.
defm MEMORY_SIZE_I32 : I<(outs I32:$dst), (ins i32imm:$flags),
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrRef.td b/lib/Target/WebAssembly/WebAssemblyInstrRef.td
new file mode 100644
index 000000000000..afe89de60b36
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyInstrRef.td
@@ -0,0 +1,25 @@
+// WebAssemblyInstrRef.td - WebAssembly reference type codegen --*- tablegen -*-
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// WebAssembly refence type operand codegen constructs.
+///
+//===----------------------------------------------------------------------===//
+
+defm SELECT_EXNREF : I<(outs EXNREF:$dst),
+ (ins EXNREF:$lhs, EXNREF:$rhs, I32:$cond),
+ (outs), (ins),
+ [(set EXNREF:$dst,
+ (select I32:$cond, EXNREF:$lhs, EXNREF:$rhs))],
+ "exnref.select\t$dst, $lhs, $rhs, $cond",
+ "exnref.select", 0x1b>;
+
+def : Pat<(select (i32 (setne I32:$cond, 0)), EXNREF:$lhs, EXNREF:$rhs),
+ (SELECT_EXNREF EXNREF:$lhs, EXNREF:$rhs, I32:$cond)>;
+def : Pat<(select (i32 (seteq I32:$cond, 0)), EXNREF:$lhs, EXNREF:$rhs),
+ (SELECT_EXNREF EXNREF:$rhs, EXNREF:$lhs, I32:$cond)>;
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index 587515c5b299..dd8930f079b0 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -1,9 +1,8 @@
// WebAssemblyInstrSIMD.td - WebAssembly SIMD codegen support -*- tablegen -*-//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -31,7 +30,7 @@ defm "" : ARGUMENT<V128, v2f64>;
// Constrained immediate argument types
foreach SIZE = [8, 16] in
def ImmI#SIZE : ImmLeaf<i32,
- "return ((uint64_t)Imm & ((1UL << "#SIZE#") - 1)) == (uint64_t)Imm;"
+ "return -(1 << ("#SIZE#" - 1)) <= Imm && Imm < (1 << ("#SIZE#" - 1));"
>;
foreach SIZE = [2, 4, 8, 16, 32] in
def LaneIdx#SIZE : ImmLeaf<i32, "return 0 <= Imm && Imm < "#SIZE#";">;
@@ -42,12 +41,12 @@ def LaneIdx#SIZE : ImmLeaf<i32, "return 0 <= Imm && Imm < "#SIZE#";">;
// Load: v128.load
multiclass SIMDLoad<ValueType vec_t> {
- let mayLoad = 1 in
+ let mayLoad = 1, UseNamedOperandTable = 1 in
defm LOAD_#vec_t :
- SIMD_I<(outs V128:$dst), (ins P2Align:$align, offset32_op:$off, I32:$addr),
- (outs), (ins P2Align:$align, offset32_op:$off), [],
- "v128.load\t$dst, ${off}(${addr})$align",
- "v128.load\t$off$align", 0>;
+ SIMD_I<(outs V128:$dst), (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
+ (outs), (ins P2Align:$p2align, offset32_op:$off), [],
+ "v128.load\t$dst, ${off}(${addr})$p2align",
+ "v128.load\t$off$p2align", 0>;
}
foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in {
@@ -58,20 +57,18 @@ def : LoadPatNoOffset<vec_t, load, !cast<NI>("LOAD_"#vec_t)>;
def : LoadPatImmOff<vec_t, load, regPlusImm, !cast<NI>("LOAD_"#vec_t)>;
def : LoadPatImmOff<vec_t, load, or_is_add, !cast<NI>("LOAD_"#vec_t)>;
def : LoadPatGlobalAddr<vec_t, load, !cast<NI>("LOAD_"#vec_t)>;
-def : LoadPatExternalSym<vec_t, load, !cast<NI>("LOAD_"#vec_t)>;
def : LoadPatOffsetOnly<vec_t, load, !cast<NI>("LOAD_"#vec_t)>;
def : LoadPatGlobalAddrOffOnly<vec_t, load, !cast<NI>("LOAD_"#vec_t)>;
-def : LoadPatExternSymOffOnly<vec_t, load, !cast<NI>("LOAD_"#vec_t)>;
}
// Store: v128.store
multiclass SIMDStore<ValueType vec_t> {
- let mayStore = 1 in
+ let mayStore = 1, UseNamedOperandTable = 1 in
defm STORE_#vec_t :
- SIMD_I<(outs), (ins P2Align:$align, offset32_op:$off, I32:$addr, V128:$vec),
- (outs), (ins P2Align:$align, offset32_op:$off), [],
- "v128.store\t${off}(${addr})$align, $vec",
- "v128.store\t$off$align", 1>;
+ SIMD_I<(outs), (ins P2Align:$p2align, offset32_op:$off, I32:$addr, V128:$vec),
+ (outs), (ins P2Align:$p2align, offset32_op:$off), [],
+ "v128.store\t${off}(${addr})$p2align, $vec",
+ "v128.store\t$off$p2align", 1>;
}
foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in {
@@ -82,10 +79,8 @@ def : StorePatNoOffset<vec_t, store, !cast<NI>("STORE_"#vec_t)>;
def : StorePatImmOff<vec_t, store, regPlusImm, !cast<NI>("STORE_"#vec_t)>;
def : StorePatImmOff<vec_t, store, or_is_add, !cast<NI>("STORE_"#vec_t)>;
def : StorePatGlobalAddr<vec_t, store, !cast<NI>("STORE_"#vec_t)>;
-def : StorePatExternalSym<vec_t, store, !cast<NI>("STORE_"#vec_t)>;
def : StorePatOffsetOnly<vec_t, store, !cast<NI>("STORE_"#vec_t)>;
def : StorePatGlobalAddrOffOnly<vec_t, store, !cast<NI>("STORE_"#vec_t)>;
-def : StorePatExternSymOffOnly<vec_t, store, !cast<NI>("STORE_"#vec_t)>;
}
//===----------------------------------------------------------------------===//
@@ -95,7 +90,7 @@ def : StorePatExternSymOffOnly<vec_t, store, !cast<NI>("STORE_"#vec_t)>;
// Constant: v128.const
multiclass ConstVec<ValueType vec_t, dag ops, dag pat, string args> {
let isMoveImm = 1, isReMaterializable = 1,
- Predicates = [HasSIMD128, HasUnimplementedSIMD128] in
+ Predicates = [HasSIMD128, HasUnimplementedSIMD128] in
defm CONST_V128_#vec_t : SIMD_I<(outs V128:$dst), ops, (outs), ops,
[(set V128:$dst, (vec_t pat))],
"v128.const\t$dst, "#args,
@@ -126,6 +121,7 @@ defm "" : ConstVec<v8i16,
ImmI16:$i0, ImmI16:$i1, ImmI16:$i2, ImmI16:$i3,
ImmI16:$i4, ImmI16:$i5, ImmI16:$i6, ImmI16:$i7),
"$i0, $i1, $i2, $i3, $i4, $i5, $i6, $i7">;
+let IsCanonical = 1 in
defm "" : ConstVec<v4i32,
(ins vec_i32imm_op:$i0, vec_i32imm_op:$i1,
vec_i32imm_op:$i2, vec_i32imm_op:$i3),
@@ -231,6 +227,19 @@ defm "" : Splat<v2i64, "i64x2", I64, splat2, 15>;
defm "" : Splat<v4f32, "f32x4", F32, splat4, 18>;
defm "" : Splat<v2f64, "f64x2", F64, splat2, 21>;
+// scalar_to_vector leaves high lanes undefined, so can be a splat
+class ScalarSplatPat<ValueType vec_t, ValueType lane_t,
+ WebAssemblyRegClass reg_t> :
+ Pat<(vec_t (scalar_to_vector (lane_t reg_t:$x))),
+ (!cast<Instruction>("SPLAT_"#vec_t) reg_t:$x)>;
+
+def : ScalarSplatPat<v16i8, i32, I32>;
+def : ScalarSplatPat<v8i16, i32, I32>;
+def : ScalarSplatPat<v4i32, i32, I32>;
+def : ScalarSplatPat<v2i64, i64, I64>;
+def : ScalarSplatPat<v4f32, f32, F32>;
+def : ScalarSplatPat<v2f64, f64, F64>;
+
//===----------------------------------------------------------------------===//
// Accessing lanes
//===----------------------------------------------------------------------===//
@@ -347,118 +356,6 @@ def : Pat<(vector_insert (v4f32 V128:$vec), F32:$x, undef),
def : Pat<(vector_insert (v2f64 V128:$vec), F64:$x, undef),
(REPLACE_LANE_v2f64 V128:$vec, 0, F64:$x)>;
-// Arbitrary other BUILD_VECTOR patterns
-def : Pat<(v16i8 (build_vector
- (i32 I32:$x0), (i32 I32:$x1), (i32 I32:$x2), (i32 I32:$x3),
- (i32 I32:$x4), (i32 I32:$x5), (i32 I32:$x6), (i32 I32:$x7),
- (i32 I32:$x8), (i32 I32:$x9), (i32 I32:$x10), (i32 I32:$x11),
- (i32 I32:$x12), (i32 I32:$x13), (i32 I32:$x14), (i32 I32:$x15)
- )),
- (v16i8 (REPLACE_LANE_v16i8
- (v16i8 (REPLACE_LANE_v16i8
- (v16i8 (REPLACE_LANE_v16i8
- (v16i8 (REPLACE_LANE_v16i8
- (v16i8 (REPLACE_LANE_v16i8
- (v16i8 (REPLACE_LANE_v16i8
- (v16i8 (REPLACE_LANE_v16i8
- (v16i8 (REPLACE_LANE_v16i8
- (v16i8 (REPLACE_LANE_v16i8
- (v16i8 (REPLACE_LANE_v16i8
- (v16i8 (REPLACE_LANE_v16i8
- (v16i8 (REPLACE_LANE_v16i8
- (v16i8 (REPLACE_LANE_v16i8
- (v16i8 (REPLACE_LANE_v16i8
- (v16i8 (REPLACE_LANE_v16i8
- (v16i8 (SPLAT_v16i8 (i32 I32:$x0))),
- 1, I32:$x1
- )),
- 2, I32:$x2
- )),
- 3, I32:$x3
- )),
- 4, I32:$x4
- )),
- 5, I32:$x5
- )),
- 6, I32:$x6
- )),
- 7, I32:$x7
- )),
- 8, I32:$x8
- )),
- 9, I32:$x9
- )),
- 10, I32:$x10
- )),
- 11, I32:$x11
- )),
- 12, I32:$x12
- )),
- 13, I32:$x13
- )),
- 14, I32:$x14
- )),
- 15, I32:$x15
- ))>;
-def : Pat<(v8i16 (build_vector
- (i32 I32:$x0), (i32 I32:$x1), (i32 I32:$x2), (i32 I32:$x3),
- (i32 I32:$x4), (i32 I32:$x5), (i32 I32:$x6), (i32 I32:$x7)
- )),
- (v8i16 (REPLACE_LANE_v8i16
- (v8i16 (REPLACE_LANE_v8i16
- (v8i16 (REPLACE_LANE_v8i16
- (v8i16 (REPLACE_LANE_v8i16
- (v8i16 (REPLACE_LANE_v8i16
- (v8i16 (REPLACE_LANE_v8i16
- (v8i16 (REPLACE_LANE_v8i16
- (v8i16 (SPLAT_v8i16 (i32 I32:$x0))),
- 1, I32:$x1
- )),
- 2, I32:$x2
- )),
- 3, I32:$x3
- )),
- 4, I32:$x4
- )),
- 5, I32:$x5
- )),
- 6, I32:$x6
- )),
- 7, I32:$x7
- ))>;
-def : Pat<(v4i32 (build_vector
- (i32 I32:$x0), (i32 I32:$x1), (i32 I32:$x2), (i32 I32:$x3)
- )),
- (v4i32 (REPLACE_LANE_v4i32
- (v4i32 (REPLACE_LANE_v4i32
- (v4i32 (REPLACE_LANE_v4i32
- (v4i32 (SPLAT_v4i32 (i32 I32:$x0))),
- 1, I32:$x1
- )),
- 2, I32:$x2
- )),
- 3, I32:$x3
- ))>;
-def : Pat<(v2i64 (build_vector (i64 I64:$x0), (i64 I64:$x1))),
- (v2i64 (REPLACE_LANE_v2i64
- (v2i64 (SPLAT_v2i64 (i64 I64:$x0))), 1, I64:$x1))>;
-def : Pat<(v4f32 (build_vector
- (f32 F32:$x0), (f32 F32:$x1), (f32 F32:$x2), (f32 F32:$x3)
- )),
- (v4f32 (REPLACE_LANE_v4f32
- (v4f32 (REPLACE_LANE_v4f32
- (v4f32 (REPLACE_LANE_v4f32
- (v4f32 (SPLAT_v4f32 (f32 F32:$x0))),
- 1, F32:$x1
- )),
- 2, F32:$x2
- )),
- 3, F32:$x3
- ))>;
-def : Pat<(v2f64 (build_vector (f64 F64:$x0), (f64 F64:$x1))),
- (v2f64 (REPLACE_LANE_v2f64
- (v2f64 (SPLAT_v2f64 (f64 F64:$x0))), 1, F64:$x1))>;
-
//===----------------------------------------------------------------------===//
// Comparisons
//===----------------------------------------------------------------------===//
@@ -520,16 +417,18 @@ defm GE_U : SIMDConditionInt<"ge_u", SETUGE, 33>;
defm GE : SIMDConditionFP<"ge", SETOGE, 69>;
// Lower float comparisons that don't care about NaN to standard WebAssembly
-// float comparisons. These instructions are generated in the target-independent
-// expansion of unordered comparisons and ordered ne.
-def : Pat<(v4i32 (seteq (v4f32 V128:$lhs), (v4f32 V128:$rhs))),
- (v4i32 (EQ_v4f32 (v4f32 V128:$lhs), (v4f32 V128:$rhs)))>;
-def : Pat<(v4i32 (setne (v4f32 V128:$lhs), (v4f32 V128:$rhs))),
- (v4i32 (NE_v4f32 (v4f32 V128:$lhs), (v4f32 V128:$rhs)))>;
-def : Pat<(v2i64 (seteq (v2f64 V128:$lhs), (v2f64 V128:$rhs))),
- (v2i64 (EQ_v2f64 (v2f64 V128:$lhs), (v2f64 V128:$rhs)))>;
-def : Pat<(v2i64 (setne (v2f64 V128:$lhs), (v2f64 V128:$rhs))),
- (v2i64 (NE_v2f64 (v2f64 V128:$lhs), (v2f64 V128:$rhs)))>;
+// float comparisons. These instructions are generated with nnan and in the
+// target-independent expansion of unordered comparisons and ordered ne.
+foreach nodes = [[seteq, EQ_v4f32], [setne, NE_v4f32], [setlt, LT_v4f32],
+ [setgt, GT_v4f32], [setle, LE_v4f32], [setge, GE_v4f32]] in
+def : Pat<(v4i32 (nodes[0] (v4f32 V128:$lhs), (v4f32 V128:$rhs))),
+ (v4i32 (nodes[1] (v4f32 V128:$lhs), (v4f32 V128:$rhs)))>;
+
+foreach nodes = [[seteq, EQ_v2f64], [setne, NE_v2f64], [setlt, LT_v2f64],
+ [setgt, GT_v2f64], [setle, LE_v2f64], [setge, GE_v2f64]] in
+def : Pat<(v2i64 (nodes[0] (v2f64 V128:$lhs), (v2f64 V128:$rhs))),
+ (v2i64 (nodes[1] (v2f64 V128:$lhs), (v2f64 V128:$rhs)))>;
+
//===----------------------------------------------------------------------===//
// Bitwise operations
@@ -628,6 +527,28 @@ defm ANYTRUE : SIMDReduce<int_wasm_anytrue, "any_true", 82>;
// All lanes true: all_true
defm ALLTRUE : SIMDReduce<int_wasm_alltrue, "all_true", 83>;
+// Reductions already return 0 or 1, so and 1, setne 0, and seteq 1
+// can be folded out
+foreach reduction =
+ [["int_wasm_anytrue", "ANYTRUE"], ["int_wasm_alltrue", "ALLTRUE"]] in
+foreach ty = [v16i8, v8i16, v4i32, v2i64] in {
+def : Pat<(i32 (and
+ (i32 (!cast<Intrinsic>(reduction[0]) (ty V128:$x))),
+ (i32 1)
+ )),
+ (i32 (!cast<NI>(reduction[1]#"_"#ty) (ty V128:$x)))>;
+def : Pat<(i32 (setne
+ (i32 (!cast<Intrinsic>(reduction[0]) (ty V128:$x))),
+ (i32 0)
+ )),
+ (i32 (!cast<NI>(reduction[1]#"_"#ty) (ty V128:$x)))>;
+def : Pat<(i32 (seteq
+ (i32 (!cast<Intrinsic>(reduction[0]) (ty V128:$x))),
+ (i32 1)
+ )),
+ (i32 (!cast<NI>(reduction[1]#"_"#ty) (ty V128:$x)))>;
+}
+
//===----------------------------------------------------------------------===//
// Bit shifts
//===----------------------------------------------------------------------===//
@@ -658,10 +579,16 @@ defm SHL : SIMDShiftInt<shl, "shl", 84>;
defm SHR_S : SIMDShiftInt<sra, "shr_s", 85>;
defm SHR_U : SIMDShiftInt<srl, "shr_u", 86>;
-// Truncate i64 shift operands to i32s
-foreach shifts = [[shl, SHL_v2i64], [sra, SHR_S_v2i64], [srl, SHR_U_v2i64]] in
+// Truncate i64 shift operands to i32s, except if they are already i32s
+foreach shifts = [[shl, SHL_v2i64], [sra, SHR_S_v2i64], [srl, SHR_U_v2i64]] in {
+def : Pat<(v2i64 (shifts[0]
+ (v2i64 V128:$vec),
+ (v2i64 (splat2 (i64 (sext I32:$x))))
+ )),
+ (v2i64 (shifts[1] (v2i64 V128:$vec), (i32 I32:$x)))>;
def : Pat<(v2i64 (shifts[0] (v2i64 V128:$vec), (v2i64 (splat2 I64:$x)))),
(v2i64 (shifts[1] (v2i64 V128:$vec), (I32_WRAP_I64 I64:$x)))>;
+}
// 2xi64 shifts with constant shift amounts are custom lowered to avoid wrapping
def wasm_shift_t : SDTypeProfile<1, 2,
diff --git a/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp b/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp
index ad838dfb574a..e92b34430272 100644
--- a/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp
@@ -1,9 +1,8 @@
//=== WebAssemblyLateEHPrepare.cpp - WebAssembly Exception Preparation -===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -16,29 +15,26 @@
#include "WebAssembly.h"
#include "WebAssemblySubtarget.h"
#include "WebAssemblyUtilities.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/WasmEHFuncInfo.h"
#include "llvm/MC/MCAsmInfo.h"
using namespace llvm;
-#define DEBUG_TYPE "wasm-exception-prepare"
+#define DEBUG_TYPE "wasm-late-eh-prepare"
namespace {
class WebAssemblyLateEHPrepare final : public MachineFunctionPass {
StringRef getPassName() const override {
- return "WebAssembly Prepare Exception";
+ return "WebAssembly Late Prepare Exception";
}
bool runOnMachineFunction(MachineFunction &MF) override;
-
- bool removeUnnecessaryUnreachables(MachineFunction &MF);
+ bool addCatches(MachineFunction &MF);
bool replaceFuncletReturns(MachineFunction &MF);
- bool hoistCatches(MachineFunction &MF);
- bool addCatchAlls(MachineFunction &MF);
- bool addRethrows(MachineFunction &MF);
- bool ensureSingleBBTermPads(MachineFunction &MF);
- bool mergeTerminatePads(MachineFunction &MF);
- bool addCatchAllTerminatePads(MachineFunction &MF);
+ bool removeUnnecessaryUnreachables(MachineFunction &MF);
+ bool addExceptionExtraction(MachineFunction &MF);
+ bool restoreStackPointer(MachineFunction &MF);
public:
static char ID; // Pass identification, replacement for typeid
@@ -112,48 +108,40 @@ bool WebAssemblyLateEHPrepare::runOnMachineFunction(MachineFunction &MF) {
return false;
bool Changed = false;
+ if (MF.getFunction().hasPersonalityFn()) {
+ Changed |= addCatches(MF);
+ Changed |= replaceFuncletReturns(MF);
+ }
Changed |= removeUnnecessaryUnreachables(MF);
- Changed |= addRethrows(MF);
- if (!MF.getFunction().hasPersonalityFn())
- return Changed;
- Changed |= replaceFuncletReturns(MF);
- Changed |= hoistCatches(MF);
- Changed |= addCatchAlls(MF);
- Changed |= ensureSingleBBTermPads(MF);
- Changed |= mergeTerminatePads(MF);
- Changed |= addCatchAllTerminatePads(MF);
+ if (MF.getFunction().hasPersonalityFn()) {
+ Changed |= addExceptionExtraction(MF);
+ Changed |= restoreStackPointer(MF);
+ }
return Changed;
}
-bool WebAssemblyLateEHPrepare::removeUnnecessaryUnreachables(
- MachineFunction &MF) {
+// Add catch instruction to beginning of catchpads and cleanuppads.
+bool WebAssemblyLateEHPrepare::addCatches(MachineFunction &MF) {
bool Changed = false;
+ const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
for (auto &MBB : MF) {
- for (auto &MI : MBB) {
- if (!WebAssembly::isThrow(MI))
- continue;
+ if (MBB.isEHPad()) {
Changed = true;
-
- // The instruction after the throw should be an unreachable or a branch to
- // another BB that should eventually lead to an unreachable. Delete it
- // because throw itself is a terminator, and also delete successors if
- // any.
- MBB.erase(std::next(MachineBasicBlock::iterator(MI)), MBB.end());
- SmallVector<MachineBasicBlock *, 8> Succs(MBB.succ_begin(),
- MBB.succ_end());
- for (auto *Succ : Succs)
- MBB.removeSuccessor(Succ);
- eraseDeadBBsAndChildren(Succs);
+ auto InsertPos = MBB.begin();
+ if (InsertPos->isEHLabel()) // EH pad starts with an EH label
+ ++InsertPos;
+ unsigned DstReg = MRI.createVirtualRegister(&WebAssembly::EXNREFRegClass);
+ BuildMI(MBB, InsertPos, MBB.begin()->getDebugLoc(),
+ TII.get(WebAssembly::CATCH), DstReg);
}
}
-
return Changed;
}
bool WebAssemblyLateEHPrepare::replaceFuncletReturns(MachineFunction &MF) {
bool Changed = false;
const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
- auto *EHInfo = MF.getWasmEHFuncInfo();
for (auto &MBB : MF) {
auto Pos = MBB.getFirstTerminator();
@@ -172,15 +160,17 @@ bool WebAssemblyLateEHPrepare::replaceFuncletReturns(MachineFunction &MF) {
Changed = true;
break;
}
- case WebAssembly::CLEANUPRET: {
- // Replace a cleanupret with a rethrow
- if (EHInfo->hasThrowUnwindDest(&MBB))
- BuildMI(MBB, TI, TI->getDebugLoc(), TII.get(WebAssembly::RETHROW))
- .addMBB(EHInfo->getThrowUnwindDest(&MBB));
- else
- BuildMI(MBB, TI, TI->getDebugLoc(),
- TII.get(WebAssembly::RETHROW_TO_CALLER));
-
+ case WebAssembly::CLEANUPRET:
+ case WebAssembly::RETHROW_IN_CATCH: {
+ // Replace a cleanupret/rethrow_in_catch with a rethrow
+ auto *EHPad = getMatchingEHPad(TI);
+ auto CatchPos = EHPad->begin();
+ if (CatchPos->isEHLabel()) // EH pad starts with an EH label
+ ++CatchPos;
+ MachineInstr *Catch = &*CatchPos;
+ unsigned ExnReg = Catch->getOperand(0).getReg();
+ BuildMI(MBB, TI, TI->getDebugLoc(), TII.get(WebAssembly::RETHROW))
+ .addReg(ExnReg);
TI->eraseFromParent();
Changed = true;
break;
@@ -190,233 +180,208 @@ bool WebAssemblyLateEHPrepare::replaceFuncletReturns(MachineFunction &MF) {
return Changed;
}
-// Hoist catch instructions to the beginning of their matching EH pad BBs in
-// case,
-// (1) catch instruction is not the first instruction in EH pad.
-// ehpad:
-// some_other_instruction
-// ...
-// %exn = catch 0
-// (2) catch instruction is in a non-EH pad BB. For example,
-// ehpad:
-// br bb0
-// bb0:
-// %exn = catch 0
-bool WebAssemblyLateEHPrepare::hoistCatches(MachineFunction &MF) {
- bool Changed = false;
- SmallVector<MachineInstr *, 16> Catches;
- for (auto &MBB : MF)
- for (auto &MI : MBB)
- if (WebAssembly::isCatch(MI))
- Catches.push_back(&MI);
-
- for (auto *Catch : Catches) {
- MachineBasicBlock *EHPad = getMatchingEHPad(Catch);
- assert(EHPad && "No matching EH pad for catch");
- if (EHPad->begin() == Catch)
- continue;
- Changed = true;
- EHPad->insert(EHPad->begin(), Catch->removeFromParent());
- }
- return Changed;
-}
-
-// Add catch_all to beginning of cleanup pads.
-bool WebAssemblyLateEHPrepare::addCatchAlls(MachineFunction &MF) {
+bool WebAssemblyLateEHPrepare::removeUnnecessaryUnreachables(
+ MachineFunction &MF) {
bool Changed = false;
- const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
-
for (auto &MBB : MF) {
- if (!MBB.isEHPad())
- continue;
- // This runs after hoistCatches(), so we assume that if there is a catch,
- // that should be the first instruction in an EH pad.
- if (!WebAssembly::isCatch(*MBB.begin())) {
- Changed = true;
- BuildMI(MBB, MBB.begin(), MBB.begin()->getDebugLoc(),
- TII.get(WebAssembly::CATCH_ALL));
- }
- }
- return Changed;
-}
-
-// Add a 'rethrow' instruction after __cxa_rethrow() call
-bool WebAssemblyLateEHPrepare::addRethrows(MachineFunction &MF) {
- bool Changed = false;
- const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
- auto *EHInfo = MF.getWasmEHFuncInfo();
-
- for (auto &MBB : MF)
for (auto &MI : MBB) {
- // Check if it is a call to __cxa_rethrow()
- if (!MI.isCall())
+ if (MI.getOpcode() != WebAssembly::THROW &&
+ MI.getOpcode() != WebAssembly::RETHROW)
continue;
- MachineOperand &CalleeOp = MI.getOperand(0);
- if (!CalleeOp.isGlobal() ||
- CalleeOp.getGlobal()->getName() != WebAssembly::CxaRethrowFn)
- continue;
-
- // Now we have __cxa_rethrow() call
Changed = true;
- auto InsertPt = std::next(MachineBasicBlock::iterator(MI));
- while (InsertPt != MBB.end() && InsertPt->isLabel()) // Skip EH_LABELs
- ++InsertPt;
- MachineInstr *Rethrow = nullptr;
- if (EHInfo->hasThrowUnwindDest(&MBB))
- Rethrow = BuildMI(MBB, InsertPt, MI.getDebugLoc(),
- TII.get(WebAssembly::RETHROW))
- .addMBB(EHInfo->getThrowUnwindDest(&MBB));
- else
- Rethrow = BuildMI(MBB, InsertPt, MI.getDebugLoc(),
- TII.get(WebAssembly::RETHROW_TO_CALLER));
- // Because __cxa_rethrow does not return, the instruction after the
- // rethrow should be an unreachable or a branch to another BB that should
- // eventually lead to an unreachable. Delete it because rethrow itself is
- // a terminator, and also delete non-EH pad successors if any.
- MBB.erase(std::next(MachineBasicBlock::iterator(Rethrow)), MBB.end());
- SmallVector<MachineBasicBlock *, 8> NonPadSuccessors;
- for (auto *Succ : MBB.successors())
+ // The instruction after the throw should be an unreachable or a branch to
+ // another BB that should eventually lead to an unreachable. Delete it
+ // because throw itself is a terminator, and also delete successors if
+ // any.
+ MBB.erase(std::next(MI.getIterator()), MBB.end());
+ SmallVector<MachineBasicBlock *, 8> Succs(MBB.succ_begin(),
+ MBB.succ_end());
+ for (auto *Succ : Succs)
if (!Succ->isEHPad())
- NonPadSuccessors.push_back(Succ);
- for (auto *Succ : NonPadSuccessors)
- MBB.removeSuccessor(Succ);
- eraseDeadBBsAndChildren(NonPadSuccessors);
+ MBB.removeSuccessor(Succ);
+ eraseDeadBBsAndChildren(Succs);
}
+ }
+
return Changed;
}
-// Terminate pads are an single-BB EH pad in the form of
-// termpad:
-// %exn = catch 0
-// call @__clang_call_terminate(%exn)
-// unreachable
-// (There can be local.set and local.gets before the call if we didn't run
-// RegStackify)
-// But code transformations can change or add more control flow, so the call to
-// __clang_call_terminate() function may not be in the original EH pad anymore.
-// This ensures every terminate pad is a single BB in the form illustrated
-// above.
-bool WebAssemblyLateEHPrepare::ensureSingleBBTermPads(MachineFunction &MF) {
+// Wasm uses 'br_on_exn' instruction to check the tag of an exception. It takes
+// exnref type object returned by 'catch', and branches to the destination if it
+// matches a given tag. We currently use __cpp_exception symbol to represent the
+// tag for all C++ exceptions.
+//
+// block $l (result i32)
+// ...
+// ;; exnref $e is on the stack at this point
+// br_on_exn $l $e ;; branch to $l with $e's arguments
+// ...
+// end
+// ;; Here we expect the extracted values are on top of the wasm value stack
+// ... Handle exception using values ...
+//
+// br_on_exn takes an exnref object and branches if it matches the given tag.
+// There can be multiple br_on_exn instructions if we want to match for another
+// tag, but for now we only test for __cpp_exception tag, and if it does not
+// match, i.e., it is a foreign exception, we rethrow it.
+//
+// In the destination BB that's the target of br_on_exn, extracted exception
+// values (in C++'s case a single i32, which represents an exception pointer)
+// are placed on top of the wasm stack. Because we can't model wasm stack in
+// LLVM instruction, we use 'extract_exception' pseudo instruction to retrieve
+// it. The pseudo instruction will be deleted later.
+bool WebAssemblyLateEHPrepare::addExceptionExtraction(MachineFunction &MF) {
const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
+ auto *EHInfo = MF.getWasmEHFuncInfo();
+ SmallVector<MachineInstr *, 16> ExtractInstrs;
+ SmallVector<MachineInstr *, 8> ToDelete;
+ for (auto &MBB : MF) {
+ for (auto &MI : MBB) {
+ if (MI.getOpcode() == WebAssembly::EXTRACT_EXCEPTION_I32) {
+ if (MI.getOperand(0).isDead())
+ ToDelete.push_back(&MI);
+ else
+ ExtractInstrs.push_back(&MI);
+ }
+ }
+ }
+ bool Changed = !ToDelete.empty() || !ExtractInstrs.empty();
+ for (auto *MI : ToDelete)
+ MI->eraseFromParent();
+ if (ExtractInstrs.empty())
+ return Changed;
- // Find calls to __clang_call_terminate()
- SmallVector<MachineInstr *, 8> ClangCallTerminateCalls;
- for (auto &MBB : MF)
- for (auto &MI : MBB)
+ // Find terminate pads.
+ SmallSet<MachineBasicBlock *, 8> TerminatePads;
+ for (auto &MBB : MF) {
+ for (auto &MI : MBB) {
if (MI.isCall()) {
const MachineOperand &CalleeOp = MI.getOperand(0);
if (CalleeOp.isGlobal() && CalleeOp.getGlobal()->getName() ==
WebAssembly::ClangCallTerminateFn)
- ClangCallTerminateCalls.push_back(&MI);
+ TerminatePads.insert(getMatchingEHPad(&MI));
}
-
- bool Changed = false;
- for (auto *Call : ClangCallTerminateCalls) {
- MachineBasicBlock *EHPad = getMatchingEHPad(Call);
- assert(EHPad && "No matching EH pad for catch");
-
- // If it is already the form we want, skip it
- if (Call->getParent() == EHPad &&
- Call->getNextNode()->getOpcode() == WebAssembly::UNREACHABLE)
- continue;
-
- // In case the __clang_call_terminate() call is not in its matching EH pad,
- // move the call to the end of EH pad and add an unreachable instruction
- // after that. Delete all successors and their children if any, because here
- // the program terminates.
- Changed = true;
- MachineInstr *Catch = &*EHPad->begin();
- // This runs after hoistCatches(), so catch instruction should be at the top
- assert(WebAssembly::isCatch(*Catch));
- // Takes the result register of the catch instruction as argument. There may
- // have been some other local.set/local.gets in between, but at this point
- // we don't care.
- Call->getOperand(1).setReg(Catch->getOperand(0).getReg());
- auto InsertPos = std::next(MachineBasicBlock::iterator(Catch));
- EHPad->insert(InsertPos, Call->removeFromParent());
- BuildMI(*EHPad, InsertPos, Call->getDebugLoc(),
- TII.get(WebAssembly::UNREACHABLE));
- EHPad->erase(InsertPos, EHPad->end());
- SmallVector<MachineBasicBlock *, 8> Succs(EHPad->succ_begin(),
- EHPad->succ_end());
- for (auto *Succ : Succs)
- EHPad->removeSuccessor(Succ);
- eraseDeadBBsAndChildren(Succs);
+ }
}
- return Changed;
-}
-// In case there are multiple terminate pads, merge them into one for code size.
-// This runs after ensureSingleBBTermPads() and assumes every terminate pad is a
-// single BB.
-// In principle this violates EH scope relationship because it can merge
-// multiple inner EH scopes, each of which is in different outer EH scope. But
-// getEHScopeMembership() function will not be called after this, so it is fine.
-bool WebAssemblyLateEHPrepare::mergeTerminatePads(MachineFunction &MF) {
- SmallVector<MachineBasicBlock *, 8> TermPads;
- for (auto &MBB : MF)
- if (WebAssembly::isCatchTerminatePad(MBB))
- TermPads.push_back(&MBB);
- if (TermPads.empty())
- return false;
-
- MachineBasicBlock *UniqueTermPad = TermPads.front();
- for (auto *TermPad :
- llvm::make_range(std::next(TermPads.begin()), TermPads.end())) {
- SmallVector<MachineBasicBlock *, 2> Preds(TermPad->pred_begin(),
- TermPad->pred_end());
- for (auto *Pred : Preds)
- Pred->replaceSuccessor(TermPad, UniqueTermPad);
- TermPad->eraseFromParent();
+ for (auto *Extract : ExtractInstrs) {
+ MachineBasicBlock *EHPad = getMatchingEHPad(Extract);
+ assert(EHPad && "No matching EH pad for extract_exception");
+ auto CatchPos = EHPad->begin();
+ if (CatchPos->isEHLabel()) // EH pad starts with an EH label
+ ++CatchPos;
+ MachineInstr *Catch = &*CatchPos;
+
+ if (Catch->getNextNode() != Extract)
+ EHPad->insert(Catch->getNextNode(), Extract->removeFromParent());
+
+ // - Before:
+ // ehpad:
+ // %exnref:exnref = catch
+ // %exn:i32 = extract_exception
+ // ... use exn ...
+ //
+ // - After:
+ // ehpad:
+ // %exnref:exnref = catch
+ // br_on_exn %thenbb, $__cpp_exception, %exnref
+ // br %elsebb
+ // elsebb:
+ // rethrow
+ // thenbb:
+ // %exn:i32 = extract_exception
+ // ... use exn ...
+ unsigned ExnReg = Catch->getOperand(0).getReg();
+ auto *ThenMBB = MF.CreateMachineBasicBlock();
+ auto *ElseMBB = MF.CreateMachineBasicBlock();
+ MF.insert(std::next(MachineFunction::iterator(EHPad)), ElseMBB);
+ MF.insert(std::next(MachineFunction::iterator(ElseMBB)), ThenMBB);
+ ThenMBB->splice(ThenMBB->end(), EHPad, Extract, EHPad->end());
+ ThenMBB->transferSuccessors(EHPad);
+ EHPad->addSuccessor(ThenMBB);
+ EHPad->addSuccessor(ElseMBB);
+
+ DebugLoc DL = Extract->getDebugLoc();
+ const char *CPPExnSymbol = MF.createExternalSymbolName("__cpp_exception");
+ BuildMI(EHPad, DL, TII.get(WebAssembly::BR_ON_EXN))
+ .addMBB(ThenMBB)
+ .addExternalSymbol(CPPExnSymbol)
+ .addReg(ExnReg);
+ BuildMI(EHPad, DL, TII.get(WebAssembly::BR)).addMBB(ElseMBB);
+
+ // When this is a terminate pad with __clang_call_terminate() call, we don't
+ // rethrow it anymore and call __clang_call_terminate() with a nullptr
+ // argument, which will call std::terminate().
+ //
+ // - Before:
+ // ehpad:
+ // %exnref:exnref = catch
+ // %exn:i32 = extract_exception
+ // call @__clang_call_terminate(%exn)
+ // unreachable
+ //
+ // - After:
+ // ehpad:
+ // %exnref:exnref = catch
+ // br_on_exn %thenbb, $__cpp_exception, %exnref
+ // br %elsebb
+ // elsebb:
+ // call @__clang_call_terminate(0)
+ // unreachable
+ // thenbb:
+ // %exn:i32 = extract_exception
+ // call @__clang_call_terminate(%exn)
+ // unreachable
+ if (TerminatePads.count(EHPad)) {
+ Function *ClangCallTerminateFn =
+ MF.getFunction().getParent()->getFunction(
+ WebAssembly::ClangCallTerminateFn);
+ assert(ClangCallTerminateFn &&
+ "There is no __clang_call_terminate() function");
+ BuildMI(ElseMBB, DL, TII.get(WebAssembly::CALL_VOID))
+ .addGlobalAddress(ClangCallTerminateFn)
+ .addImm(0);
+ BuildMI(ElseMBB, DL, TII.get(WebAssembly::UNREACHABLE));
+
+ } else {
+ BuildMI(ElseMBB, DL, TII.get(WebAssembly::RETHROW)).addReg(ExnReg);
+ if (EHInfo->hasEHPadUnwindDest(EHPad))
+ ElseMBB->addSuccessor(EHInfo->getEHPadUnwindDest(EHPad));
+ }
}
+
return true;
}
-// Terminate pads are cleanup pads, so they should start with a 'catch_all'
-// instruction. But in the Itanium model, when we have a C++ exception object,
-// we pass them to __clang_call_terminate function, which calls __cxa_end_catch
-// with the passed exception pointer and then std::terminate. This is the reason
-// that terminate pads are generated with not a catch_all but a catch
-// instruction in clang and earlier llvm passes. Here we append a terminate pad
-// with a catch_all after each existing terminate pad so we can also catch
-// foreign exceptions. For every terminate pad:
-// %exn = catch 0
-// call @__clang_call_terminate(%exn)
-// unreachable
-// We append this BB right after that:
-// catch_all
-// call @std::terminate()
-// unreachable
-bool WebAssemblyLateEHPrepare::addCatchAllTerminatePads(MachineFunction &MF) {
- const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
- SmallVector<MachineBasicBlock *, 8> TermPads;
- for (auto &MBB : MF)
- if (WebAssembly::isCatchTerminatePad(MBB))
- TermPads.push_back(&MBB);
- if (TermPads.empty())
+// After the stack is unwound due to a thrown exception, the __stack_pointer
+// global can point to an invalid address. This inserts instructions that
+// restore __stack_pointer global.
+bool WebAssemblyLateEHPrepare::restoreStackPointer(MachineFunction &MF) {
+ const auto *FrameLowering = static_cast<const WebAssemblyFrameLowering *>(
+ MF.getSubtarget().getFrameLowering());
+ if (!FrameLowering->needsPrologForEH(MF))
return false;
+ bool Changed = false;
- Function *StdTerminateFn =
- MF.getFunction().getParent()->getFunction(WebAssembly::StdTerminateFn);
- assert(StdTerminateFn && "There is no std::terminate() function");
- for (auto *CatchTermPad : TermPads) {
- DebugLoc DL = CatchTermPad->findDebugLoc(CatchTermPad->begin());
- auto *CatchAllTermPad = MF.CreateMachineBasicBlock();
- MF.insert(std::next(MachineFunction::iterator(CatchTermPad)),
- CatchAllTermPad);
- CatchAllTermPad->setIsEHPad();
- BuildMI(CatchAllTermPad, DL, TII.get(WebAssembly::CATCH_ALL));
- BuildMI(CatchAllTermPad, DL, TII.get(WebAssembly::CALL_VOID))
- .addGlobalAddress(StdTerminateFn);
- BuildMI(CatchAllTermPad, DL, TII.get(WebAssembly::UNREACHABLE));
+ for (auto &MBB : MF) {
+ if (!MBB.isEHPad())
+ continue;
+ Changed = true;
- // Actually this CatchAllTermPad (new terminate pad with a catch_all) is not
- // a successor of an existing terminate pad. CatchAllTermPad should have all
- // predecessors CatchTermPad has instead. This is a hack to force
- // CatchAllTermPad be always sorted right after CatchTermPad; the correct
- // predecessor-successor relationships will be restored in CFGStackify pass.
- CatchTermPad->addSuccessor(CatchAllTermPad);
+ // Insert __stack_pointer restoring instructions at the beginning of each EH
+ // pad, after the catch instruction. Here it is safe to assume that SP32
+ // holds the latest value of __stack_pointer, because the only exception for
+ // this case is when a function uses the red zone, but that only happens
+ // with leaf functions, and we don't restore __stack_pointer in leaf
+ // functions anyway.
+ auto InsertPos = MBB.begin();
+ if (InsertPos->isEHLabel()) // EH pad starts with an EH label
+ ++InsertPos;
+ if (InsertPos->getOpcode() == WebAssembly::CATCH)
+ ++InsertPos;
+ FrameLowering->writeSPToGlobal(WebAssembly::SP32, MF, MBB, InsertPos,
+ MBB.begin()->getDebugLoc());
}
- return true;
+ return Changed;
}
diff --git a/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp b/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp
index c9a3527d3fbd..34a8195ac4b4 100644
--- a/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp
@@ -1,9 +1,8 @@
//===-- WebAssemblyLowerBrUnless.cpp - Lower br_unless --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp b/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
index 0491f71cea7f..960d5134f6e9 100644
--- a/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
@@ -1,9 +1,8 @@
//=== WebAssemblyLowerEmscriptenEHSjLj.cpp - Lower exceptions for Emscripten =//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -240,16 +239,16 @@ class WebAssemblyLowerEmscriptenEHSjLj final : public ModulePass {
bool EnableEH; // Enable exception handling
bool EnableSjLj; // Enable setjmp/longjmp handling
- GlobalVariable *ThrewGV;
- GlobalVariable *ThrewValueGV;
- Function *GetTempRet0Func;
- Function *SetTempRet0Func;
- Function *ResumeF;
- Function *EHTypeIDF;
- Function *EmLongjmpF;
- Function *EmLongjmpJmpbufF;
- Function *SaveSetjmpF;
- Function *TestSetjmpF;
+ GlobalVariable *ThrewGV = nullptr;
+ GlobalVariable *ThrewValueGV = nullptr;
+ Function *GetTempRet0Func = nullptr;
+ Function *SetTempRet0Func = nullptr;
+ Function *ResumeF = nullptr;
+ Function *EHTypeIDF = nullptr;
+ Function *EmLongjmpF = nullptr;
+ Function *EmLongjmpJmpbufF = nullptr;
+ Function *SaveSetjmpF = nullptr;
+ Function *TestSetjmpF = nullptr;
// __cxa_find_matching_catch_N functions.
// Indexed by the number of clauses in an original landingpad instruction.
@@ -282,11 +281,7 @@ public:
static char ID;
WebAssemblyLowerEmscriptenEHSjLj(bool EnableEH = true, bool EnableSjLj = true)
- : ModulePass(ID), EnableEH(EnableEH), EnableSjLj(EnableSjLj),
- ThrewGV(nullptr), ThrewValueGV(nullptr), GetTempRet0Func(nullptr),
- SetTempRet0Func(nullptr), ResumeF(nullptr), EHTypeIDF(nullptr),
- EmLongjmpF(nullptr), EmLongjmpJmpbufF(nullptr), SaveSetjmpF(nullptr),
- TestSetjmpF(nullptr) {
+ : ModulePass(ID), EnableEH(EnableEH), EnableSjLj(EnableSjLj) {
EHWhitelistSet.insert(EHWhitelist.begin(), EHWhitelist.end());
}
bool runOnModule(Module &M) override;
@@ -339,11 +334,12 @@ static bool canThrow(const Value *V) {
// which will generate an import and asssumes that it will exist at link time.
static GlobalVariable *getGlobalVariableI32(Module &M, IRBuilder<> &IRB,
const char *Name) {
- if (M.getNamedGlobal(Name))
- report_fatal_error(Twine("variable name is reserved: ") + Name);
- return new GlobalVariable(M, IRB.getInt32Ty(), false,
- GlobalValue::ExternalLinkage, nullptr, Name);
+ auto* GV = dyn_cast<GlobalVariable>(M.getOrInsertGlobal(Name, IRB.getInt32Ty()));
+ if (!GV)
+ report_fatal_error(Twine("unable to create global: ") + Name);
+
+ return GV;
}
// Simple function name mangler.
@@ -433,8 +429,8 @@ Value *WebAssemblyLowerEmscriptenEHSjLj::wrapInvoke(CallOrInvoke *CI) {
// No attributes for the callee pointer.
ArgAttributes.push_back(AttributeSet());
// Copy the argument attributes from the original
- for (unsigned i = 0, e = CI->getNumArgOperands(); i < e; ++i)
- ArgAttributes.push_back(InvokeAL.getParamAttributes(i));
+ for (unsigned I = 0, E = CI->getNumArgOperands(); I < E; ++I)
+ ArgAttributes.push_back(InvokeAL.getParamAttributes(I));
// Reconstruct the AttributesList based on the vector we constructed.
AttributeList NewCallAL =
@@ -446,7 +442,8 @@ Value *WebAssemblyLowerEmscriptenEHSjLj::wrapInvoke(CallOrInvoke *CI) {
// Post-invoke
// %__THREW__.val = __THREW__; __THREW__ = 0;
- Value *Threw = IRB.CreateLoad(ThrewGV, ThrewGV->getName() + ".val");
+ Value *Threw =
+ IRB.CreateLoad(IRB.getInt32Ty(), ThrewGV, ThrewGV->getName() + ".val");
IRB.CreateStore(IRB.getInt32(0), ThrewGV);
return Threw;
}
@@ -488,6 +485,13 @@ bool WebAssemblyLowerEmscriptenEHSjLj::canLongjmp(Module &M,
if (CalleeF->isIntrinsic())
return false;
+ // Attempting to transform inline assembly will result in something like:
+ // call void @__invoke_void(void ()* asm ...)
+ // which is invalid because inline assembly blocks do not have addresses
+ // and can't be passed by pointer. The result is a crash with illegal IR.
+ if (isa<InlineAsm>(Callee))
+ return false;
+
// The reason we include malloc/free here is to exclude the malloc/free
// calls generated in setjmp prep / cleanup routines.
Function *SetjmpF = M.getFunction("setjmp");
@@ -549,8 +553,8 @@ void WebAssemblyLowerEmscriptenEHSjLj::wrapTestSetjmp(
BasicBlock *ElseBB1 = BasicBlock::Create(C, "if.else1", F);
BasicBlock *EndBB1 = BasicBlock::Create(C, "if.end", F);
Value *ThrewCmp = IRB.CreateICmpNE(Threw, IRB.getInt32(0));
- Value *ThrewValue =
- IRB.CreateLoad(ThrewValueGV, ThrewValueGV->getName() + ".val");
+ Value *ThrewValue = IRB.CreateLoad(IRB.getInt32Ty(), ThrewValueGV,
+ ThrewValueGV->getName() + ".val");
Value *ThrewValueCmp = IRB.CreateICmpNE(ThrewValue, IRB.getInt32(0));
Value *Cmp1 = IRB.CreateAnd(ThrewCmp, ThrewValueCmp, "cmp1");
IRB.CreateCondBr(Cmp1, ThenBB1, ElseBB1);
@@ -562,8 +566,8 @@ void WebAssemblyLowerEmscriptenEHSjLj::wrapTestSetjmp(
BasicBlock *EndBB2 = BasicBlock::Create(C, "if.end2", F);
Value *ThrewInt = IRB.CreateIntToPtr(Threw, Type::getInt32PtrTy(C),
Threw->getName() + ".i32p");
- Value *LoadedThrew =
- IRB.CreateLoad(ThrewInt, ThrewInt->getName() + ".loaded");
+ Value *LoadedThrew = IRB.CreateLoad(IRB.getInt32Ty(), ThrewInt,
+ ThrewInt->getName() + ".loaded");
Value *ThenLabel = IRB.CreateCall(
TestSetjmpF, {LoadedThrew, SetjmpTable, SetjmpTableSize}, "label");
Value *Cmp2 = IRB.CreateICmpEQ(ThenLabel, IRB.getInt32(0));
@@ -606,11 +610,11 @@ void WebAssemblyLowerEmscriptenEHSjLj::rebuildSSA(Function &F) {
++UI;
SSA.Initialize(I.getType(), I.getName());
SSA.AddAvailableValue(&BB, &I);
- Instruction *User = cast<Instruction>(U.getUser());
+ auto *User = cast<Instruction>(U.getUser());
if (User->getParent() == &BB)
continue;
- if (PHINode *UserPN = dyn_cast<PHINode>(User))
+ if (auto *UserPN = dyn_cast<PHINode>(User))
if (UserPN->getIncomingBlock(U) == &BB)
continue;
@@ -769,7 +773,8 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runEHOnFunction(Function &F) {
// This can't throw, and we don't need this invoke, just replace it with a
// call+branch
SmallVector<Value *, 16> Args(II->arg_begin(), II->arg_end());
- CallInst *NewCall = IRB.CreateCall(II->getCalledValue(), Args);
+ CallInst *NewCall =
+ IRB.CreateCall(II->getFunctionType(), II->getCalledValue(), Args);
NewCall->takeName(II);
NewCall->setCallingConv(II->getCallingConv());
NewCall->setDebugLoc(II->getDebugLoc());
@@ -836,15 +841,15 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runEHOnFunction(Function &F) {
for (LandingPadInst *LPI : LandingPads) {
IRB.SetInsertPoint(LPI);
SmallVector<Value *, 16> FMCArgs;
- for (unsigned i = 0, e = LPI->getNumClauses(); i < e; ++i) {
- Constant *Clause = LPI->getClause(i);
+ for (unsigned I = 0, E = LPI->getNumClauses(); I < E; ++I) {
+ Constant *Clause = LPI->getClause(I);
// As a temporary workaround for the lack of aggregate varargs support
// in the interface between JS and wasm, break out filter operands into
// their component elements.
- if (LPI->isFilter(i)) {
+ if (LPI->isFilter(I)) {
auto *ATy = cast<ArrayType>(Clause->getType());
- for (unsigned j = 0, e = ATy->getNumElements(); j < e; ++j) {
- Value *EV = IRB.CreateExtractValue(Clause, makeArrayRef(j), "filter");
+ for (unsigned J = 0, E = ATy->getNumElements(); J < E; ++J) {
+ Value *EV = IRB.CreateExtractValue(Clause, makeArrayRef(J), "filter");
FMCArgs.push_back(EV);
}
} else
@@ -954,8 +959,8 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) {
BBs.push_back(&BB);
// BBs.size() will change within the loop, so we query it every time
- for (unsigned i = 0; i < BBs.size(); i++) {
- BasicBlock *BB = BBs[i];
+ for (unsigned I = 0; I < BBs.size(); I++) {
+ BasicBlock *BB = BBs[I];
for (Instruction &I : *BB) {
assert(!isa<InvokeInst>(&I));
auto *CI = dyn_cast<CallInst>(&I);
@@ -1028,9 +1033,9 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) {
// switch case). 0 means a longjmp that is not ours to handle, needs a
// rethrow. Otherwise the index is the same as the index in P+1 (to avoid
// 0).
- for (unsigned i = 0; i < SetjmpRetPHIs.size(); i++) {
- SI->addCase(IRB.getInt32(i + 1), SetjmpRetPHIs[i]->getParent());
- SetjmpRetPHIs[i]->addIncoming(LongjmpResult, EndBB);
+ for (unsigned I = 0; I < SetjmpRetPHIs.size(); I++) {
+ SI->addCase(IRB.getInt32(I + 1), SetjmpRetPHIs[I]->getParent());
+ SetjmpRetPHIs[I]->addIncoming(LongjmpResult, EndBB);
}
// We are splitting the block here, and must continue to find other calls
@@ -1077,7 +1082,7 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) {
Use &U = *UI;
// Increment the iterator before removing the use from the list.
++UI;
- if (Instruction *I = dyn_cast<Instruction>(U.getUser()))
+ if (auto *I = dyn_cast<Instruction>(U.getUser()))
if (I->getParent() != &EntryBB)
SetjmpTableSSA.RewriteUse(U);
}
@@ -1085,7 +1090,7 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) {
UI != UE;) {
Use &U = *UI;
++UI;
- if (Instruction *I = dyn_cast<Instruction>(U.getUser()))
+ if (auto *I = dyn_cast<Instruction>(U.getUser()))
if (I->getParent() != &EntryBB)
SetjmpTableSizeSSA.RewriteUse(U);
}
diff --git a/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp b/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp
index 84c877cb8d02..494d3fadbc8c 100644
--- a/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp
@@ -1,9 +1,8 @@
//===-- WebAssemblyLowerGlobalDtors.cpp - Lower @llvm.global_dtors --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -62,7 +61,7 @@ bool LowerGlobalDtors::runOnModule(Module &M) {
LLVM_DEBUG(dbgs() << "********** Lower Global Destructors **********\n");
GlobalVariable *GV = M.getGlobalVariable("llvm.global_dtors");
- if (!GV)
+ if (!GV || !GV->hasInitializer())
return false;
const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
@@ -70,7 +69,7 @@ bool LowerGlobalDtors::runOnModule(Module &M) {
return false;
// Sanity-check @llvm.global_dtor's type.
- StructType *ETy = dyn_cast<StructType>(InitList->getType()->getElementType());
+ auto *ETy = dyn_cast<StructType>(InitList->getType()->getElementType());
if (!ETy || ETy->getNumElements() != 3 ||
!ETy->getTypeAtIndex(0U)->isIntegerTy() ||
!ETy->getTypeAtIndex(1U)->isPointerTy() ||
@@ -81,11 +80,11 @@ bool LowerGlobalDtors::runOnModule(Module &M) {
// associated symbol.
std::map<uint16_t, MapVector<Constant *, std::vector<Constant *>>> DtorFuncs;
for (Value *O : InitList->operands()) {
- ConstantStruct *CS = dyn_cast<ConstantStruct>(O);
+ auto *CS = dyn_cast<ConstantStruct>(O);
if (!CS)
continue; // Malformed.
- ConstantInt *Priority = dyn_cast<ConstantInt>(CS->getOperand(0));
+ auto *Priority = dyn_cast<ConstantInt>(CS->getOperand(0));
if (!Priority)
continue; // Malformed.
uint16_t PriorityValue = Priority->getLimitedValue(UINT16_MAX);
@@ -110,10 +109,11 @@ bool LowerGlobalDtors::runOnModule(Module &M) {
FunctionType::get(Type::getVoidTy(C), AtExitFuncArgs,
/*isVarArg=*/false);
- Type *AtExitArgs[] = {PointerType::get(AtExitFuncTy, 0), VoidStar, VoidStar};
- FunctionType *AtExitTy = FunctionType::get(Type::getInt32Ty(C), AtExitArgs,
- /*isVarArg=*/false);
- Constant *AtExit = M.getOrInsertFunction("__cxa_atexit", AtExitTy);
+ FunctionCallee AtExit = M.getOrInsertFunction(
+ "__cxa_atexit",
+ FunctionType::get(Type::getInt32Ty(C),
+ {PointerType::get(AtExitFuncTy, 0), VoidStar, VoidStar},
+ /*isVarArg=*/false));
// Declare __dso_local.
Constant *DsoHandle = M.getNamedValue("__dso_handle");
@@ -143,13 +143,13 @@ bool LowerGlobalDtors::runOnModule(Module &M) {
: Twine()),
&M);
BasicBlock *BB = BasicBlock::Create(C, "body", CallDtors);
+ FunctionType *VoidVoid = FunctionType::get(Type::getVoidTy(C),
+ /*isVarArg=*/false);
for (auto Dtor : AssociatedAndMore.second)
- CallInst::Create(Dtor, "", BB);
+ CallInst::Create(VoidVoid, Dtor, "", BB);
ReturnInst::Create(C, BB);
- FunctionType *VoidVoid = FunctionType::get(Type::getVoidTy(C),
- /*isVarArg=*/false);
Function *RegisterCallDtors = Function::Create(
VoidVoid, Function::PrivateLinkage,
"register_call_dtors" +
diff --git a/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp b/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
index fa862fbaa634..288b991ae2c5 100644
--- a/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
@@ -1,9 +1,8 @@
// WebAssemblyMCInstLower.cpp - Convert WebAssembly MachineInstr to an MCInst //
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -17,7 +16,7 @@
#include "WebAssemblyAsmPrinter.h"
#include "WebAssemblyMachineFunctionInfo.h"
#include "WebAssemblyRuntimeLibcallSignatures.h"
-#include "WebAssemblyUtilities.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/IR/Constants.h"
@@ -37,7 +36,7 @@ using namespace llvm;
// This disables the removal of registers when lowering into MC, as required
// by some current tests.
-static cl::opt<bool>
+cl::opt<bool>
WasmKeepRegisters("wasm-keep-registers", cl::Hidden,
cl::desc("WebAssembly: output stack registers in"
" instruction output for test purposes only."),
@@ -48,7 +47,7 @@ static void removeRegisterOperands(const MachineInstr *MI, MCInst &OutMI);
MCSymbol *
WebAssemblyMCInstLower::GetGlobalAddressSymbol(const MachineOperand &MO) const {
const GlobalValue *Global = MO.getGlobal();
- MCSymbolWasm *WasmSym = cast<MCSymbolWasm>(Printer.getSymbol(Global));
+ auto *WasmSym = cast<MCSymbolWasm>(Printer.getSymbol(Global));
if (const auto *FuncTy = dyn_cast<FunctionType>(Global->getValueType())) {
const MachineFunction &MF = *MO.getParent()->getParent()->getParent();
@@ -57,9 +56,9 @@ WebAssemblyMCInstLower::GetGlobalAddressSymbol(const MachineOperand &MO) const {
SmallVector<MVT, 1> ResultMVTs;
SmallVector<MVT, 4> ParamMVTs;
- ComputeSignatureVTs(FuncTy, CurrentFunc, TM, ParamMVTs, ResultMVTs);
+ computeSignatureVTs(FuncTy, CurrentFunc, TM, ParamMVTs, ResultMVTs);
- auto Signature = SignatureFromMVTs(ResultMVTs, ParamMVTs);
+ auto Signature = signatureFromMVTs(ResultMVTs, ParamMVTs);
WasmSym->setSignature(Signature.get());
Printer.addSignature(std::move(Signature));
WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
@@ -71,20 +70,23 @@ WebAssemblyMCInstLower::GetGlobalAddressSymbol(const MachineOperand &MO) const {
MCSymbol *WebAssemblyMCInstLower::GetExternalSymbolSymbol(
const MachineOperand &MO) const {
const char *Name = MO.getSymbolName();
- MCSymbolWasm *WasmSym =
- cast<MCSymbolWasm>(Printer.GetExternalSymbolSymbol(Name));
+ auto *WasmSym = cast<MCSymbolWasm>(Printer.GetExternalSymbolSymbol(Name));
const WebAssemblySubtarget &Subtarget = Printer.getSubtarget();
- // Except for the two exceptions (__stack_pointer and __cpp_exception), all
- // other external symbols used by CodeGen are functions. It's OK to hardcode
- // knowledge of specific symbols here; this method is precisely there for
- // fetching the signatures of known Clang-provided symbols.
- if (strcmp(Name, "__stack_pointer") == 0) {
+ // Except for certain known symbols, all symbols used by CodeGen are
+ // functions. It's OK to hardcode knowledge of specific symbols here; this
+ // method is precisely there for fetching the signatures of known
+ // Clang-provided symbols.
+ if (strcmp(Name, "__stack_pointer") == 0 || strcmp(Name, "__tls_base") == 0 ||
+ strcmp(Name, "__memory_base") == 0 || strcmp(Name, "__table_base") == 0 ||
+ strcmp(Name, "__tls_size") == 0) {
+ bool Mutable =
+ strcmp(Name, "__stack_pointer") == 0 || strcmp(Name, "__tls_base") == 0;
WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL);
WasmSym->setGlobalType(wasm::WasmGlobalType{
uint8_t(Subtarget.hasAddr64() ? wasm::WASM_TYPE_I64
: wasm::WASM_TYPE_I32),
- true});
+ Mutable});
return WasmSym;
}
@@ -110,7 +112,7 @@ MCSymbol *WebAssemblyMCInstLower::GetExternalSymbolSymbol(
: wasm::ValType::I32);
} else { // Function symbols
WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
- GetLibcallSignature(Subtarget, Name, Returns, Params);
+ getLibcallSignature(Subtarget, Name, Returns, Params);
}
auto Signature =
make_unique<wasm::WasmSignature>(std::move(Returns), std::move(Params));
@@ -120,27 +122,42 @@ MCSymbol *WebAssemblyMCInstLower::GetExternalSymbolSymbol(
return WasmSym;
}
-MCOperand WebAssemblyMCInstLower::LowerSymbolOperand(MCSymbol *Sym,
- int64_t Offset,
- bool IsFunc, bool IsGlob,
- bool IsEvent) const {
- MCSymbolRefExpr::VariantKind VK =
- IsFunc ? MCSymbolRefExpr::VK_WebAssembly_FUNCTION
- : IsGlob ? MCSymbolRefExpr::VK_WebAssembly_GLOBAL
- : IsEvent ? MCSymbolRefExpr::VK_WebAssembly_EVENT
- : MCSymbolRefExpr::VK_None;
+MCOperand WebAssemblyMCInstLower::lowerSymbolOperand(const MachineOperand &MO,
+ MCSymbol *Sym) const {
+ MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VK_None;
+ unsigned TargetFlags = MO.getTargetFlags();
+
+ switch (TargetFlags) {
+ case WebAssemblyII::MO_NO_FLAG:
+ break;
+ case WebAssemblyII::MO_GOT:
+ Kind = MCSymbolRefExpr::VK_GOT;
+ break;
+ case WebAssemblyII::MO_MEMORY_BASE_REL:
+ Kind = MCSymbolRefExpr::VK_WASM_MBREL;
+ break;
+ case WebAssemblyII::MO_TABLE_BASE_REL:
+ Kind = MCSymbolRefExpr::VK_WASM_TBREL;
+ break;
+ default:
+ llvm_unreachable("Unknown target flag on GV operand");
+ }
- const MCExpr *Expr = MCSymbolRefExpr::create(Sym, VK, Ctx);
+ const MCExpr *Expr = MCSymbolRefExpr::create(Sym, Kind, Ctx);
- if (Offset != 0) {
- if (IsFunc)
+ if (MO.getOffset() != 0) {
+ const auto *WasmSym = cast<MCSymbolWasm>(Sym);
+ if (TargetFlags == WebAssemblyII::MO_GOT)
+ report_fatal_error("GOT symbol references do not support offsets");
+ if (WasmSym->isFunction())
report_fatal_error("Function addresses with offsets not supported");
- if (IsGlob)
+ if (WasmSym->isGlobal())
report_fatal_error("Global indexes with offsets not supported");
- if (IsEvent)
+ if (WasmSym->isEvent())
report_fatal_error("Event indexes with offsets not supported");
- Expr =
- MCBinaryExpr::createAdd(Expr, MCConstantExpr::create(Offset, Ctx), Ctx);
+
+ Expr = MCBinaryExpr::createAdd(
+ Expr, MCConstantExpr::create(MO.getOffset(), Ctx), Ctx);
}
return MCOperand::createExpr(Expr);
@@ -161,13 +178,13 @@ static wasm::ValType getType(const TargetRegisterClass *RC) {
llvm_unreachable("Unexpected register class");
}
-void WebAssemblyMCInstLower::Lower(const MachineInstr *MI,
+void WebAssemblyMCInstLower::lower(const MachineInstr *MI,
MCInst &OutMI) const {
OutMI.setOpcode(MI->getOpcode());
const MCInstrDesc &Desc = MI->getDesc();
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
+ for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) {
+ const MachineOperand &MO = MI->getOperand(I);
MCOperand MCOp;
switch (MO.getType()) {
@@ -188,8 +205,8 @@ void WebAssemblyMCInstLower::Lower(const MachineInstr *MI,
break;
}
case MachineOperand::MO_Immediate:
- if (i < Desc.NumOperands) {
- const MCOperandInfo &Info = Desc.OpInfo[i];
+ if (I < Desc.NumOperands) {
+ const MCOperandInfo &Info = Desc.OpInfo[I];
if (Info.OperandType == WebAssembly::OPERAND_TYPEINDEX) {
MCSymbol *Sym = Printer.createTempSymbol("typeindex");
@@ -206,10 +223,10 @@ void WebAssemblyMCInstLower::Lower(const MachineInstr *MI,
// call_indirect instructions have a callee operand at the end which
// doesn't count as a param.
- if (WebAssembly::isCallIndirect(*MI))
+ if (WebAssembly::isCallIndirect(MI->getOpcode()))
Params.pop_back();
- MCSymbolWasm *WasmSym = cast<MCSymbolWasm>(Sym);
+ auto *WasmSym = cast<MCSymbolWasm>(Sym);
auto Signature = make_unique<wasm::WasmSignature>(std::move(Returns),
std::move(Params));
WasmSym->setSignature(Signature.get());
@@ -217,7 +234,7 @@ void WebAssemblyMCInstLower::Lower(const MachineInstr *MI,
WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
const MCExpr *Expr = MCSymbolRefExpr::create(
- WasmSym, MCSymbolRefExpr::VK_WebAssembly_TYPEINDEX, Ctx);
+ WasmSym, MCSymbolRefExpr::VK_WASM_TYPEINDEX, Ctx);
MCOp = MCOperand::createExpr(Expr);
break;
}
@@ -237,30 +254,21 @@ void WebAssemblyMCInstLower::Lower(const MachineInstr *MI,
break;
}
case MachineOperand::MO_GlobalAddress:
- assert(MO.getTargetFlags() == WebAssemblyII::MO_NO_FLAG &&
- "WebAssembly does not use target flags on GlobalAddresses");
- MCOp = LowerSymbolOperand(GetGlobalAddressSymbol(MO), MO.getOffset(),
- MO.getGlobal()->getValueType()->isFunctionTy(),
- false, false);
+ MCOp = lowerSymbolOperand(MO, GetGlobalAddressSymbol(MO));
break;
case MachineOperand::MO_ExternalSymbol:
// The target flag indicates whether this is a symbol for a
// variable or a function.
- assert((MO.getTargetFlags() & ~WebAssemblyII::MO_SYMBOL_MASK) == 0 &&
+ assert(MO.getTargetFlags() == 0 &&
"WebAssembly uses only symbol flags on ExternalSymbols");
- MCOp = LowerSymbolOperand(
- GetExternalSymbolSymbol(MO), /*Offset=*/0,
- (MO.getTargetFlags() & WebAssemblyII::MO_SYMBOL_FUNCTION) != 0,
- (MO.getTargetFlags() & WebAssemblyII::MO_SYMBOL_GLOBAL) != 0,
- (MO.getTargetFlags() & WebAssemblyII::MO_SYMBOL_EVENT) != 0);
+ MCOp = lowerSymbolOperand(MO, GetExternalSymbolSymbol(MO));
break;
case MachineOperand::MO_MCSymbol:
// This is currently used only for LSDA symbols (GCC_except_table),
// because global addresses or other external symbols are handled above.
assert(MO.getTargetFlags() == 0 &&
"WebAssembly does not use target flags on MCSymbol");
- MCOp = LowerSymbolOperand(MO.getMCSymbol(), /*Offset=*/0, false, false,
- false);
+ MCOp = lowerSymbolOperand(MO, MO.getMCSymbol());
break;
}
diff --git a/lib/Target/WebAssembly/WebAssemblyMCInstLower.h b/lib/Target/WebAssembly/WebAssemblyMCInstLower.h
index fa7a0ea61b3b..2c375a01a7f5 100644
--- a/lib/Target/WebAssembly/WebAssemblyMCInstLower.h
+++ b/lib/Target/WebAssembly/WebAssemblyMCInstLower.h
@@ -1,9 +1,8 @@
//===-- WebAssemblyMCInstLower.h - Lower MachineInstr to MCInst -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -33,13 +32,12 @@ class LLVM_LIBRARY_VISIBILITY WebAssemblyMCInstLower {
MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const;
MCSymbol *GetExternalSymbolSymbol(const MachineOperand &MO) const;
- MCOperand LowerSymbolOperand(MCSymbol *Sym, int64_t Offset, bool IsFunc,
- bool IsGlob, bool IsEvent) const;
+ MCOperand lowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
public:
WebAssemblyMCInstLower(MCContext &ctx, WebAssemblyAsmPrinter &printer)
: Ctx(ctx), Printer(printer) {}
- void Lower(const MachineInstr *MI, MCInst &OutMI) const;
+ void lower(const MachineInstr *MI, MCInst &OutMI) const;
};
} // end namespace llvm
diff --git a/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp b/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp
index 0157af0f8510..d31c1226bfdb 100644
--- a/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp
@@ -1,9 +1,8 @@
//=- WebAssemblyMachineFunctionInfo.cpp - WebAssembly Machine Function Info -=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -19,7 +18,7 @@
#include "llvm/CodeGen/Analysis.h"
using namespace llvm;
-WebAssemblyFunctionInfo::~WebAssemblyFunctionInfo() {}
+WebAssemblyFunctionInfo::~WebAssemblyFunctionInfo() = default; // anchor.
void WebAssemblyFunctionInfo::initWARegs() {
assert(WARegs.empty());
@@ -27,7 +26,7 @@ void WebAssemblyFunctionInfo::initWARegs() {
WARegs.resize(MF.getRegInfo().getNumVirtRegs(), Reg);
}
-void llvm::ComputeLegalValueVTs(const Function &F, const TargetMachine &TM,
+void llvm::computeLegalValueVTs(const Function &F, const TargetMachine &TM,
Type *Ty, SmallVectorImpl<MVT> &ValueVTs) {
const DataLayout &DL(F.getParent()->getDataLayout());
const WebAssemblyTargetLowering &TLI =
@@ -38,16 +37,16 @@ void llvm::ComputeLegalValueVTs(const Function &F, const TargetMachine &TM,
for (EVT VT : VTs) {
unsigned NumRegs = TLI.getNumRegisters(F.getContext(), VT);
MVT RegisterVT = TLI.getRegisterType(F.getContext(), VT);
- for (unsigned i = 0; i != NumRegs; ++i)
+ for (unsigned I = 0; I != NumRegs; ++I)
ValueVTs.push_back(RegisterVT);
}
}
-void llvm::ComputeSignatureVTs(const FunctionType *Ty, const Function &F,
+void llvm::computeSignatureVTs(const FunctionType *Ty, const Function &F,
const TargetMachine &TM,
SmallVectorImpl<MVT> &Params,
SmallVectorImpl<MVT> &Results) {
- ComputeLegalValueVTs(F, TM, Ty->getReturnType(), Results);
+ computeLegalValueVTs(F, TM, Ty->getReturnType(), Results);
MVT PtrVT = MVT::getIntegerVT(TM.createDataLayout().getPointerSizeInBits());
if (Results.size() > 1) {
@@ -59,22 +58,35 @@ void llvm::ComputeSignatureVTs(const FunctionType *Ty, const Function &F,
}
for (auto *Param : Ty->params())
- ComputeLegalValueVTs(F, TM, Param, Params);
+ computeLegalValueVTs(F, TM, Param, Params);
if (Ty->isVarArg())
Params.push_back(PtrVT);
}
-void llvm::ValTypesFromMVTs(const ArrayRef<MVT> &In,
+void llvm::valTypesFromMVTs(const ArrayRef<MVT> &In,
SmallVectorImpl<wasm::ValType> &Out) {
for (MVT Ty : In)
Out.push_back(WebAssembly::toValType(Ty));
}
std::unique_ptr<wasm::WasmSignature>
-llvm::SignatureFromMVTs(const SmallVectorImpl<MVT> &Results,
+llvm::signatureFromMVTs(const SmallVectorImpl<MVT> &Results,
const SmallVectorImpl<MVT> &Params) {
auto Sig = make_unique<wasm::WasmSignature>();
- ValTypesFromMVTs(Results, Sig->Returns);
- ValTypesFromMVTs(Params, Sig->Params);
+ valTypesFromMVTs(Results, Sig->Returns);
+ valTypesFromMVTs(Params, Sig->Params);
return Sig;
}
+
+yaml::WebAssemblyFunctionInfo::WebAssemblyFunctionInfo(
+ const llvm::WebAssemblyFunctionInfo &MFI)
+ : CFGStackified(MFI.isCFGStackified()) {}
+
+void yaml::WebAssemblyFunctionInfo::mappingImpl(yaml::IO &YamlIO) {
+ MappingTraits<WebAssemblyFunctionInfo>::mapping(YamlIO, *this);
+}
+
+void WebAssemblyFunctionInfo::initializeBaseYamlFields(
+ const yaml::WebAssemblyFunctionInfo &YamlMFI) {
+ CFGStackified = YamlMFI.CFGStackified;
+}
diff --git a/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h b/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h
index 4be4beb85d04..4b9ba491dee6 100644
--- a/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h
+++ b/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h
@@ -1,9 +1,8 @@
// WebAssemblyMachineFunctionInfo.h-WebAssembly machine function info-*- C++ -*-
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -18,11 +17,16 @@
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
#include "llvm/BinaryFormat/Wasm.h"
+#include "llvm/CodeGen/MIRYamlMapping.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/MC/MCSymbolWasm.h"
namespace llvm {
+namespace yaml {
+struct WebAssemblyFunctionInfo;
+}
+
/// This class is derived from MachineFunctionInfo and contains private
/// WebAssembly-specific information for each MachineFunction.
class WebAssemblyFunctionInfo final : public MachineFunctionInfo {
@@ -52,9 +56,13 @@ class WebAssemblyFunctionInfo final : public MachineFunctionInfo {
// overaligned values on the user stack.
unsigned BasePtrVreg = -1U;
+ // Function properties.
+ bool CFGStackified = false;
+
public:
explicit WebAssemblyFunctionInfo(MachineFunction &MF) : MF(MF) {}
~WebAssemblyFunctionInfo() override;
+ void initializeBaseYamlFields(const yaml::WebAssemblyFunctionInfo &YamlMFI);
void addParam(MVT VT) { Params.push_back(VT); }
const std::vector<MVT> &getParams() const { return Params; }
@@ -118,24 +126,47 @@ public:
assert(Reg & INT32_MIN);
return Reg & INT32_MAX;
}
+
+ bool isCFGStackified() const { return CFGStackified; }
+ void setCFGStackified(bool Value = true) { CFGStackified = Value; }
};
-void ComputeLegalValueVTs(const Function &F, const TargetMachine &TM, Type *Ty,
+void computeLegalValueVTs(const Function &F, const TargetMachine &TM, Type *Ty,
SmallVectorImpl<MVT> &ValueVTs);
// Compute the signature for a given FunctionType (Ty). Note that it's not the
// signature for F (F is just used to get varous context)
-void ComputeSignatureVTs(const FunctionType *Ty, const Function &F,
+void computeSignatureVTs(const FunctionType *Ty, const Function &F,
const TargetMachine &TM, SmallVectorImpl<MVT> &Params,
SmallVectorImpl<MVT> &Results);
-void ValTypesFromMVTs(const ArrayRef<MVT> &In,
+void valTypesFromMVTs(const ArrayRef<MVT> &In,
SmallVectorImpl<wasm::ValType> &Out);
std::unique_ptr<wasm::WasmSignature>
-SignatureFromMVTs(const SmallVectorImpl<MVT> &Results,
+signatureFromMVTs(const SmallVectorImpl<MVT> &Results,
const SmallVectorImpl<MVT> &Params);
+namespace yaml {
+
+struct WebAssemblyFunctionInfo final : public yaml::MachineFunctionInfo {
+ bool CFGStackified = false;
+
+ WebAssemblyFunctionInfo() = default;
+ WebAssemblyFunctionInfo(const llvm::WebAssemblyFunctionInfo &MFI);
+
+ void mappingImpl(yaml::IO &YamlIO) override;
+ ~WebAssemblyFunctionInfo() = default;
+};
+
+template <> struct MappingTraits<WebAssemblyFunctionInfo> {
+ static void mapping(IO &YamlIO, WebAssemblyFunctionInfo &MFI) {
+ YamlIO.mapOptional("isCFGStackified", MFI.CFGStackified, false);
+ }
+};
+
+} // end namespace yaml
+
} // end namespace llvm
#endif
diff --git a/lib/Target/WebAssembly/WebAssemblyMemIntrinsicResults.cpp b/lib/Target/WebAssembly/WebAssemblyMemIntrinsicResults.cpp
index c4b5e96db0c7..7ac0511c28b0 100644
--- a/lib/Target/WebAssembly/WebAssemblyMemIntrinsicResults.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyMemIntrinsicResults.cpp
@@ -1,9 +1,8 @@
//== WebAssemblyMemIntrinsicResults.cpp - Optimize memory intrinsic results ==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -82,7 +81,7 @@ FunctionPass *llvm::createWebAssemblyMemIntrinsicResults() {
}
// Replace uses of FromReg with ToReg if they are dominated by MI.
-static bool ReplaceDominatedUses(MachineBasicBlock &MBB, MachineInstr &MI,
+static bool replaceDominatedUses(MachineBasicBlock &MBB, MachineInstr &MI,
unsigned FromReg, unsigned ToReg,
const MachineRegisterInfo &MRI,
MachineDominatorTree &MDT,
@@ -157,10 +156,10 @@ static bool optimizeCall(MachineBasicBlock &MBB, MachineInstr &MI,
return false;
StringRef Name(Op1.getSymbolName());
- bool callReturnsInput = Name == TLI.getLibcallName(RTLIB::MEMCPY) ||
+ bool CallReturnsInput = Name == TLI.getLibcallName(RTLIB::MEMCPY) ||
Name == TLI.getLibcallName(RTLIB::MEMMOVE) ||
Name == TLI.getLibcallName(RTLIB::MEMSET);
- if (!callReturnsInput)
+ if (!CallReturnsInput)
return false;
LibFunc Func;
@@ -172,7 +171,7 @@ static bool optimizeCall(MachineBasicBlock &MBB, MachineInstr &MI,
if (MRI.getRegClass(FromReg) != MRI.getRegClass(ToReg))
report_fatal_error("Memory Intrinsic results: call to builtin function "
"with wrong signature, from/to mismatch");
- return ReplaceDominatedUses(MBB, MI, FromReg, ToReg, MRI, MDT, LIS);
+ return replaceDominatedUses(MBB, MI, FromReg, ToReg, MRI, MDT, LIS);
}
bool WebAssemblyMemIntrinsicResults::runOnMachineFunction(MachineFunction &MF) {
@@ -182,11 +181,11 @@ bool WebAssemblyMemIntrinsicResults::runOnMachineFunction(MachineFunction &MF) {
});
MachineRegisterInfo &MRI = MF.getRegInfo();
- MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
+ auto &MDT = getAnalysis<MachineDominatorTree>();
const WebAssemblyTargetLowering &TLI =
*MF.getSubtarget<WebAssemblySubtarget>().getTargetLowering();
const auto &LibInfo = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
- LiveIntervals &LIS = getAnalysis<LiveIntervals>();
+ auto &LIS = getAnalysis<LiveIntervals>();
bool Changed = false;
// We don't preserve SSA form.
@@ -201,8 +200,8 @@ bool WebAssemblyMemIntrinsicResults::runOnMachineFunction(MachineFunction &MF) {
switch (MI.getOpcode()) {
default:
break;
- case WebAssembly::CALL_I32:
- case WebAssembly::CALL_I64:
+ case WebAssembly::CALL_i32:
+ case WebAssembly::CALL_i64:
Changed |= optimizeCall(MBB, MI, MRI, MDT, LIS, TLI, LibInfo);
break;
}
diff --git a/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp b/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp
index 3d0a15244ee0..8c7c3305c201 100644
--- a/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp
@@ -1,9 +1,8 @@
//===--- WebAssemblyOptimizeLiveIntervals.cpp - LiveInterval processing ---===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -72,7 +71,7 @@ bool WebAssemblyOptimizeLiveIntervals::runOnMachineFunction(
<< MF.getName() << '\n');
MachineRegisterInfo &MRI = MF.getRegInfo();
- LiveIntervals &LIS = getAnalysis<LiveIntervals>();
+ auto &LIS = getAnalysis<LiveIntervals>();
// We don't preserve SSA form.
MRI.leaveSSA();
@@ -81,8 +80,8 @@ bool WebAssemblyOptimizeLiveIntervals::runOnMachineFunction(
// Split multiple-VN LiveIntervals into multiple LiveIntervals.
SmallVector<LiveInterval *, 4> SplitLIs;
- for (unsigned i = 0, e = MRI.getNumVirtRegs(); i < e; ++i) {
- unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ for (unsigned I = 0, E = MRI.getNumVirtRegs(); I < E; ++I) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(I);
if (MRI.reg_nodbg_empty(Reg))
continue;
diff --git a/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp b/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp
index 2c018d0785a7..d20352259e07 100644
--- a/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp
@@ -1,9 +1,8 @@
//===-- WebAssemblyOptimizeReturned.cpp - Optimize "returned" attributes --===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -37,11 +36,11 @@ class OptimizeReturned final : public FunctionPass,
bool runOnFunction(Function &F) override;
- DominatorTree *DT;
+ DominatorTree *DT = nullptr;
public:
static char ID;
- OptimizeReturned() : FunctionPass(ID), DT(nullptr) {}
+ OptimizeReturned() : FunctionPass(ID) {}
void visitCallSite(CallSite CS);
};
@@ -57,10 +56,10 @@ FunctionPass *llvm::createWebAssemblyOptimizeReturned() {
}
void OptimizeReturned::visitCallSite(CallSite CS) {
- for (unsigned i = 0, e = CS.getNumArgOperands(); i < e; ++i)
- if (CS.paramHasAttr(i, Attribute::Returned)) {
+ for (unsigned I = 0, E = CS.getNumArgOperands(); I < E; ++I)
+ if (CS.paramHasAttr(I, Attribute::Returned)) {
Instruction *Inst = CS.getInstruction();
- Value *Arg = CS.getArgOperand(i);
+ Value *Arg = CS.getArgOperand(I);
// Ignore constants, globals, undef, etc.
if (isa<Constant>(Arg))
continue;
diff --git a/lib/Target/WebAssembly/WebAssemblyPeephole.cpp b/lib/Target/WebAssembly/WebAssemblyPeephole.cpp
index 2dfd85953f14..e11cdeaa0e79 100644
--- a/lib/Target/WebAssembly/WebAssemblyPeephole.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyPeephole.cpp
@@ -1,9 +1,8 @@
//===-- WebAssemblyPeephole.cpp - WebAssembly Peephole Optimiztions -------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -58,7 +57,7 @@ FunctionPass *llvm::createWebAssemblyPeephole() {
}
/// If desirable, rewrite NewReg to a drop register.
-static bool MaybeRewriteToDrop(unsigned OldReg, unsigned NewReg,
+static bool maybeRewriteToDrop(unsigned OldReg, unsigned NewReg,
MachineOperand &MO, WebAssemblyFunctionInfo &MFI,
MachineRegisterInfo &MRI) {
bool Changed = false;
@@ -72,7 +71,7 @@ static bool MaybeRewriteToDrop(unsigned OldReg, unsigned NewReg,
return Changed;
}
-static bool MaybeRewriteToFallthrough(MachineInstr &MI, MachineBasicBlock &MBB,
+static bool maybeRewriteToFallthrough(MachineInstr &MI, MachineBasicBlock &MBB,
const MachineFunction &MF,
WebAssemblyFunctionInfo &MFI,
MachineRegisterInfo &MRI,
@@ -129,8 +128,8 @@ bool WebAssemblyPeephole::runOnMachineFunction(MachineFunction &MF) {
switch (MI.getOpcode()) {
default:
break;
- case WebAssembly::CALL_I32:
- case WebAssembly::CALL_I64: {
+ case WebAssembly::CALL_i32:
+ case WebAssembly::CALL_i64: {
MachineOperand &Op1 = MI.getOperand(1);
if (Op1.isSymbol()) {
StringRef Name(Op1.getSymbolName());
@@ -150,7 +149,7 @@ bool WebAssemblyPeephole::runOnMachineFunction(MachineFunction &MF) {
if (MRI.getRegClass(NewReg) != MRI.getRegClass(OldReg))
report_fatal_error("Peephole: call to builtin function with "
"wrong signature, from/to mismatch");
- Changed |= MaybeRewriteToDrop(OldReg, NewReg, MO, MFI, MRI);
+ Changed |= maybeRewriteToDrop(OldReg, NewReg, MO, MFI, MRI);
}
}
}
@@ -158,57 +157,57 @@ bool WebAssemblyPeephole::runOnMachineFunction(MachineFunction &MF) {
}
// Optimize away an explicit void return at the end of the function.
case WebAssembly::RETURN_I32:
- Changed |= MaybeRewriteToFallthrough(
+ Changed |= maybeRewriteToFallthrough(
MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_I32,
WebAssembly::COPY_I32);
break;
case WebAssembly::RETURN_I64:
- Changed |= MaybeRewriteToFallthrough(
+ Changed |= maybeRewriteToFallthrough(
MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_I64,
WebAssembly::COPY_I64);
break;
case WebAssembly::RETURN_F32:
- Changed |= MaybeRewriteToFallthrough(
+ Changed |= maybeRewriteToFallthrough(
MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_F32,
WebAssembly::COPY_F32);
break;
case WebAssembly::RETURN_F64:
- Changed |= MaybeRewriteToFallthrough(
+ Changed |= maybeRewriteToFallthrough(
MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_F64,
WebAssembly::COPY_F64);
break;
case WebAssembly::RETURN_v16i8:
- Changed |= MaybeRewriteToFallthrough(
+ Changed |= maybeRewriteToFallthrough(
MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_v16i8,
WebAssembly::COPY_V128);
break;
case WebAssembly::RETURN_v8i16:
- Changed |= MaybeRewriteToFallthrough(
+ Changed |= maybeRewriteToFallthrough(
MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_v8i16,
WebAssembly::COPY_V128);
break;
case WebAssembly::RETURN_v4i32:
- Changed |= MaybeRewriteToFallthrough(
+ Changed |= maybeRewriteToFallthrough(
MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_v4i32,
WebAssembly::COPY_V128);
break;
case WebAssembly::RETURN_v2i64:
- Changed |= MaybeRewriteToFallthrough(
+ Changed |= maybeRewriteToFallthrough(
MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_v2i64,
WebAssembly::COPY_V128);
break;
case WebAssembly::RETURN_v4f32:
- Changed |= MaybeRewriteToFallthrough(
+ Changed |= maybeRewriteToFallthrough(
MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_v4f32,
WebAssembly::COPY_V128);
break;
case WebAssembly::RETURN_v2f64:
- Changed |= MaybeRewriteToFallthrough(
+ Changed |= maybeRewriteToFallthrough(
MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_v2f64,
WebAssembly::COPY_V128);
break;
case WebAssembly::RETURN_VOID:
- Changed |= MaybeRewriteToFallthrough(
+ Changed |= maybeRewriteToFallthrough(
MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_VOID,
WebAssembly::INSTRUCTION_LIST_END);
break;
diff --git a/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp b/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp
index 0be0ba657830..3bfbf607344d 100644
--- a/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp
@@ -1,9 +1,8 @@
//===- WebAssemblyPrepareForLiveIntervals.cpp - Prepare for LiveIntervals -===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -63,9 +62,9 @@ FunctionPass *llvm::createWebAssemblyPrepareForLiveIntervals() {
}
// Test whether the given register has an ARGUMENT def.
-static bool HasArgumentDef(unsigned Reg, const MachineRegisterInfo &MRI) {
+static bool hasArgumentDef(unsigned Reg, const MachineRegisterInfo &MRI) {
for (const auto &Def : MRI.def_instructions(Reg))
- if (WebAssembly::isArgument(Def))
+ if (WebAssembly::isArgument(Def.getOpcode()))
return true;
return false;
}
@@ -95,15 +94,15 @@ bool WebAssemblyPrepareForLiveIntervals::runOnMachineFunction(
//
// TODO: This is fairly heavy-handed; find a better approach.
//
- for (unsigned i = 0, e = MRI.getNumVirtRegs(); i < e; ++i) {
- unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ for (unsigned I = 0, E = MRI.getNumVirtRegs(); I < E; ++I) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(I);
// Skip unused registers.
if (MRI.use_nodbg_empty(Reg))
continue;
// Skip registers that have an ARGUMENT definition.
- if (HasArgumentDef(Reg, MRI))
+ if (hasArgumentDef(Reg, MRI))
continue;
BuildMI(Entry, Entry.begin(), DebugLoc(),
@@ -115,7 +114,7 @@ bool WebAssemblyPrepareForLiveIntervals::runOnMachineFunction(
// liveness reflects the fact that these really are live-in values.
for (auto MII = Entry.begin(), MIE = Entry.end(); MII != MIE;) {
MachineInstr &MI = *MII++;
- if (WebAssembly::isArgument(MI)) {
+ if (WebAssembly::isArgument(MI.getOpcode())) {
MI.removeFromParent();
Entry.insert(Entry.begin(), &MI);
}
diff --git a/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp b/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp
index d97b13a8d699..6f09c45b6642 100644
--- a/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp
@@ -1,9 +1,8 @@
//===-- WebAssemblyRegColoring.cpp - Register coloring --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -66,11 +65,11 @@ FunctionPass *llvm::createWebAssemblyRegColoring() {
static float computeWeight(const MachineRegisterInfo *MRI,
const MachineBlockFrequencyInfo *MBFI,
unsigned VReg) {
- float weight = 0.0f;
+ float Weight = 0.0f;
for (MachineOperand &MO : MRI->reg_nodbg_operands(VReg))
- weight += LiveIntervals::getSpillWeight(MO.isDef(), MO.isUse(), MBFI,
+ Weight += LiveIntervals::getSpillWeight(MO.isDef(), MO.isUse(), MBFI,
*MO.getParent());
- return weight;
+ return Weight;
}
bool WebAssemblyRegColoring::runOnMachineFunction(MachineFunction &MF) {
@@ -98,8 +97,8 @@ bool WebAssemblyRegColoring::runOnMachineFunction(MachineFunction &MF) {
SortedIntervals.reserve(NumVRegs);
LLVM_DEBUG(dbgs() << "Interesting register intervals:\n");
- for (unsigned i = 0; i < NumVRegs; ++i) {
- unsigned VReg = TargetRegisterInfo::index2VirtReg(i);
+ for (unsigned I = 0; I < NumVRegs; ++I) {
+ unsigned VReg = TargetRegisterInfo::index2VirtReg(I);
if (MFI.isVRegStackified(VReg))
continue;
// Skip unused registers, which can use $drop.
@@ -134,10 +133,10 @@ bool WebAssemblyRegColoring::runOnMachineFunction(MachineFunction &MF) {
SortedIntervals.size());
BitVector UsedColors(SortedIntervals.size());
bool Changed = false;
- for (size_t i = 0, e = SortedIntervals.size(); i < e; ++i) {
- LiveInterval *LI = SortedIntervals[i];
+ for (size_t I = 0, E = SortedIntervals.size(); I < E; ++I) {
+ LiveInterval *LI = SortedIntervals[I];
unsigned Old = LI->reg;
- size_t Color = i;
+ size_t Color = I;
const TargetRegisterClass *RC = MRI->getRegClass(Old);
// Check if it's possible to reuse any of the used colors.
@@ -154,7 +153,7 @@ bool WebAssemblyRegColoring::runOnMachineFunction(MachineFunction &MF) {
}
unsigned New = SortedIntervals[Color]->reg;
- SlotMapping[i] = New;
+ SlotMapping[I] = New;
Changed |= Old != New;
UsedColors.set(Color);
Assignments[Color].push_back(LI);
@@ -166,9 +165,9 @@ bool WebAssemblyRegColoring::runOnMachineFunction(MachineFunction &MF) {
return false;
// Rewrite register operands.
- for (size_t i = 0, e = SortedIntervals.size(); i < e; ++i) {
- unsigned Old = SortedIntervals[i]->reg;
- unsigned New = SlotMapping[i];
+ for (size_t I = 0, E = SortedIntervals.size(); I < E; ++I) {
+ unsigned Old = SortedIntervals[I]->reg;
+ unsigned New = SlotMapping[I];
if (Old != New)
MRI->replaceRegWith(Old, New);
}
diff --git a/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp b/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp
index 1e2a248f097e..cdca23f55b29 100644
--- a/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp
@@ -1,9 +1,8 @@
//===-- WebAssemblyRegNumbering.cpp - Register Numbering ------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -73,7 +72,7 @@ bool WebAssemblyRegNumbering::runOnMachineFunction(MachineFunction &MF) {
// variables. Assign the numbers for them first.
MachineBasicBlock &EntryMBB = MF.front();
for (MachineInstr &MI : EntryMBB) {
- if (!WebAssembly::isArgument(MI))
+ if (!WebAssembly::isArgument(MI.getOpcode()))
break;
int64_t Imm = MI.getOperand(1).getImm();
diff --git a/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp b/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
index 1eb32ed64494..a120a6471014 100644
--- a/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
@@ -1,9 +1,8 @@
//===-- WebAssemblyRegStackify.cpp - Register Stackification --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -80,7 +79,7 @@ FunctionPass *llvm::createWebAssemblyRegStackify() {
// Decorate the given instruction with implicit operands that enforce the
// expression stack ordering constraints for an instruction which is on
// the expression stack.
-static void ImposeStackOrdering(MachineInstr *MI) {
+static void imposeStackOrdering(MachineInstr *MI) {
// Write the opaque VALUE_STACK register.
if (!MI->definesRegister(WebAssembly::VALUE_STACK))
MI->addOperand(MachineOperand::CreateReg(WebAssembly::VALUE_STACK,
@@ -96,7 +95,7 @@ static void ImposeStackOrdering(MachineInstr *MI) {
// Convert an IMPLICIT_DEF instruction into an instruction which defines
// a constant zero value.
-static void ConvertImplicitDefToConstZero(MachineInstr *MI,
+static void convertImplicitDefToConstZero(MachineInstr *MI,
MachineRegisterInfo &MRI,
const TargetInstrInfo *TII,
MachineFunction &MF,
@@ -112,12 +111,12 @@ static void ConvertImplicitDefToConstZero(MachineInstr *MI,
MI->addOperand(MachineOperand::CreateImm(0));
} else if (RegClass == &WebAssembly::F32RegClass) {
MI->setDesc(TII->get(WebAssembly::CONST_F32));
- ConstantFP *Val = cast<ConstantFP>(Constant::getNullValue(
+ auto *Val = cast<ConstantFP>(Constant::getNullValue(
Type::getFloatTy(MF.getFunction().getContext())));
MI->addOperand(MachineOperand::CreateFPImm(Val));
} else if (RegClass == &WebAssembly::F64RegClass) {
MI->setDesc(TII->get(WebAssembly::CONST_F64));
- ConstantFP *Val = cast<ConstantFP>(Constant::getNullValue(
+ auto *Val = cast<ConstantFP>(Constant::getNullValue(
Type::getDoubleTy(MF.getFunction().getContext())));
MI->addOperand(MachineOperand::CreateFPImm(Val));
} else if (RegClass == &WebAssembly::V128RegClass) {
@@ -136,7 +135,7 @@ static void ConvertImplicitDefToConstZero(MachineInstr *MI,
// Determine whether a call to the callee referenced by
// MI->getOperand(CalleeOpNo) reads memory, writes memory, and/or has side
// effects.
-static void QueryCallee(const MachineInstr &MI, unsigned CalleeOpNo, bool &Read,
+static void queryCallee(const MachineInstr &MI, unsigned CalleeOpNo, bool &Read,
bool &Write, bool &Effects, bool &StackPointer) {
// All calls can use the stack pointer.
StackPointer = true;
@@ -144,11 +143,11 @@ static void QueryCallee(const MachineInstr &MI, unsigned CalleeOpNo, bool &Read,
const MachineOperand &MO = MI.getOperand(CalleeOpNo);
if (MO.isGlobal()) {
const Constant *GV = MO.getGlobal();
- if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
+ if (const auto *GA = dyn_cast<GlobalAlias>(GV))
if (!GA->isInterposable())
GV = GA->getAliasee();
- if (const Function *F = dyn_cast<Function>(GV)) {
+ if (const auto *F = dyn_cast<Function>(GV)) {
if (!F->doesNotThrow())
Effects = true;
if (F->doesNotAccessMemory())
@@ -168,7 +167,7 @@ static void QueryCallee(const MachineInstr &MI, unsigned CalleeOpNo, bool &Read,
// Determine whether MI reads memory, writes memory, has side effects,
// and/or uses the stack pointer value.
-static void Query(const MachineInstr &MI, AliasAnalysis &AA, bool &Read,
+static void query(const MachineInstr &MI, AliasAnalysis &AA, bool &Read,
bool &Write, bool &Effects, bool &StackPointer) {
assert(!MI.isTerminator());
@@ -253,13 +252,13 @@ static void Query(const MachineInstr &MI, AliasAnalysis &AA, bool &Read,
// Analyze calls.
if (MI.isCall()) {
- unsigned CalleeOpNo = WebAssembly::getCalleeOpNo(MI);
- QueryCallee(MI, CalleeOpNo, Read, Write, Effects, StackPointer);
+ unsigned CalleeOpNo = WebAssembly::getCalleeOpNo(MI.getOpcode());
+ queryCallee(MI, CalleeOpNo, Read, Write, Effects, StackPointer);
}
}
// Test whether Def is safe and profitable to rematerialize.
-static bool ShouldRematerialize(const MachineInstr &Def, AliasAnalysis &AA,
+static bool shouldRematerialize(const MachineInstr &Def, AliasAnalysis &AA,
const WebAssemblyInstrInfo *TII) {
return Def.isAsCheapAsAMove() && TII->isTriviallyReMaterializable(Def, &AA);
}
@@ -267,7 +266,7 @@ static bool ShouldRematerialize(const MachineInstr &Def, AliasAnalysis &AA,
// Identify the definition for this register at this point. This is a
// generalization of MachineRegisterInfo::getUniqueVRegDef that uses
// LiveIntervals to handle complex cases.
-static MachineInstr *GetVRegDef(unsigned Reg, const MachineInstr *Insert,
+static MachineInstr *getVRegDef(unsigned Reg, const MachineInstr *Insert,
const MachineRegisterInfo &MRI,
const LiveIntervals &LIS) {
// Most registers are in SSA form here so we try a quick MRI query first.
@@ -285,7 +284,7 @@ static MachineInstr *GetVRegDef(unsigned Reg, const MachineInstr *Insert,
// Test whether Reg, as defined at Def, has exactly one use. This is a
// generalization of MachineRegisterInfo::hasOneUse that uses LiveIntervals
// to handle complex cases.
-static bool HasOneUse(unsigned Reg, MachineInstr *Def, MachineRegisterInfo &MRI,
+static bool hasOneUse(unsigned Reg, MachineInstr *Def, MachineRegisterInfo &MRI,
MachineDominatorTree &MDT, LiveIntervals &LIS) {
// Most registers are in SSA form here so we try a quick MRI query first.
if (MRI.hasOneUse(Reg))
@@ -314,10 +313,22 @@ static bool HasOneUse(unsigned Reg, MachineInstr *Def, MachineRegisterInfo &MRI,
// walking the block.
// TODO: Compute memory dependencies in a way that uses AliasAnalysis to be
// more precise.
-static bool IsSafeToMove(const MachineInstr *Def, const MachineInstr *Insert,
+static bool isSafeToMove(const MachineInstr *Def, const MachineInstr *Insert,
AliasAnalysis &AA, const MachineRegisterInfo &MRI) {
assert(Def->getParent() == Insert->getParent());
+ // 'catch' and 'extract_exception' should be the first instruction of a BB and
+ // cannot move.
+ if (Def->getOpcode() == WebAssembly::CATCH ||
+ Def->getOpcode() == WebAssembly::EXTRACT_EXCEPTION_I32) {
+ const MachineBasicBlock *MBB = Def->getParent();
+ auto NextI = std::next(MachineBasicBlock::const_iterator(Def));
+ for (auto E = MBB->end(); NextI != E && NextI->isDebugInstr(); ++NextI)
+ ;
+ if (NextI != Insert)
+ return false;
+ }
+
// Check for register dependencies.
SmallVector<unsigned, 4> MutableRegisters;
for (const MachineOperand &MO : Def->operands()) {
@@ -350,7 +361,7 @@ static bool IsSafeToMove(const MachineInstr *Def, const MachineInstr *Insert,
}
bool Read = false, Write = false, Effects = false, StackPointer = false;
- Query(*Def, AA, Read, Write, Effects, StackPointer);
+ query(*Def, AA, Read, Write, Effects, StackPointer);
// If the instruction does not access memory and has no side effects, it has
// no additional dependencies.
@@ -365,7 +376,7 @@ static bool IsSafeToMove(const MachineInstr *Def, const MachineInstr *Insert,
bool InterveningWrite = false;
bool InterveningEffects = false;
bool InterveningStackPointer = false;
- Query(*I, AA, InterveningRead, InterveningWrite, InterveningEffects,
+ query(*I, AA, InterveningRead, InterveningWrite, InterveningEffects,
InterveningStackPointer);
if (Effects && InterveningEffects)
return false;
@@ -386,7 +397,7 @@ static bool IsSafeToMove(const MachineInstr *Def, const MachineInstr *Insert,
}
/// Test whether OneUse, a use of Reg, dominates all of Reg's other uses.
-static bool OneUseDominatesOtherUses(unsigned Reg, const MachineOperand &OneUse,
+static bool oneUseDominatesOtherUses(unsigned Reg, const MachineOperand &OneUse,
const MachineBasicBlock &MBB,
const MachineRegisterInfo &MRI,
const MachineDominatorTree &MDT,
@@ -445,7 +456,7 @@ static bool OneUseDominatesOtherUses(unsigned Reg, const MachineOperand &OneUse,
}
/// Get the appropriate tee opcode for the given register class.
-static unsigned GetTeeOpcode(const TargetRegisterClass *RC) {
+static unsigned getTeeOpcode(const TargetRegisterClass *RC) {
if (RC == &WebAssembly::I32RegClass)
return WebAssembly::TEE_I32;
if (RC == &WebAssembly::I64RegClass)
@@ -460,7 +471,7 @@ static unsigned GetTeeOpcode(const TargetRegisterClass *RC) {
}
// Shrink LI to its uses, cleaning up LI.
-static void ShrinkToUses(LiveInterval &LI, LiveIntervals &LIS) {
+static void shrinkToUses(LiveInterval &LI, LiveIntervals &LIS) {
if (LIS.shrinkToUses(&LI)) {
SmallVector<LiveInterval *, 4> SplitLIs;
LIS.splitSeparateComponents(LI, SplitLIs);
@@ -469,7 +480,7 @@ static void ShrinkToUses(LiveInterval &LI, LiveIntervals &LIS) {
/// A single-use def in the same block with no intervening memory or register
/// dependencies; move the def down and nest it with the current instruction.
-static MachineInstr *MoveForSingleUse(unsigned Reg, MachineOperand &Op,
+static MachineInstr *moveForSingleUse(unsigned Reg, MachineOperand &Op,
MachineInstr *Def, MachineBasicBlock &MBB,
MachineInstr *Insert, LiveIntervals &LIS,
WebAssemblyFunctionInfo &MFI,
@@ -508,13 +519,13 @@ static MachineInstr *MoveForSingleUse(unsigned Reg, MachineOperand &Op,
LLVM_DEBUG(dbgs() << " - Replaced register: "; Def->dump());
}
- ImposeStackOrdering(Def);
+ imposeStackOrdering(Def);
return Def;
}
/// A trivially cloneable instruction; clone it and nest the new copy with the
/// current instruction.
-static MachineInstr *RematerializeCheapDef(
+static MachineInstr *rematerializeCheapDef(
unsigned Reg, MachineOperand &Op, MachineInstr &Def, MachineBasicBlock &MBB,
MachineBasicBlock::instr_iterator Insert, LiveIntervals &LIS,
WebAssemblyFunctionInfo &MFI, MachineRegisterInfo &MRI,
@@ -531,7 +542,7 @@ static MachineInstr *RematerializeCheapDef(
LIS.InsertMachineInstrInMaps(*Clone);
LIS.createAndComputeVirtRegInterval(NewReg);
MFI.stackifyVReg(NewReg);
- ImposeStackOrdering(Clone);
+ imposeStackOrdering(Clone);
LLVM_DEBUG(dbgs() << " - Cloned to "; Clone->dump());
@@ -539,7 +550,7 @@ static MachineInstr *RematerializeCheapDef(
bool IsDead = MRI.use_empty(Reg);
if (!IsDead) {
LiveInterval &LI = LIS.getInterval(Reg);
- ShrinkToUses(LI, LIS);
+ shrinkToUses(LI, LIS);
IsDead = !LI.liveAt(LIS.getInstructionIndex(Def).getDeadSlot());
}
@@ -582,7 +593,7 @@ static MachineInstr *RematerializeCheapDef(
///
/// with DefReg and TeeReg stackified. This eliminates a local.get from the
/// resulting code.
-static MachineInstr *MoveAndTeeForMultiUse(
+static MachineInstr *moveAndTeeForMultiUse(
unsigned Reg, MachineOperand &Op, MachineInstr *Def, MachineBasicBlock &MBB,
MachineInstr *Insert, LiveIntervals &LIS, WebAssemblyFunctionInfo &MFI,
MachineRegisterInfo &MRI, const WebAssemblyInstrInfo *TII) {
@@ -600,7 +611,7 @@ static MachineInstr *MoveAndTeeForMultiUse(
unsigned DefReg = MRI.createVirtualRegister(RegClass);
MachineOperand &DefMO = Def->getOperand(0);
MachineInstr *Tee = BuildMI(MBB, Insert, Insert->getDebugLoc(),
- TII->get(GetTeeOpcode(RegClass)), TeeReg)
+ TII->get(getTeeOpcode(RegClass)), TeeReg)
.addReg(Reg, RegState::Define)
.addReg(DefReg, getUndefRegState(DefMO.isDead()));
Op.setReg(TeeReg);
@@ -616,15 +627,15 @@ static MachineInstr *MoveAndTeeForMultiUse(
VNInfo *ValNo = LI.getVNInfoAt(DefIdx);
I->start = TeeIdx;
ValNo->def = TeeIdx;
- ShrinkToUses(LI, LIS);
+ shrinkToUses(LI, LIS);
// Finish stackifying the new regs.
LIS.createAndComputeVirtRegInterval(TeeReg);
LIS.createAndComputeVirtRegInterval(DefReg);
MFI.stackifyVReg(DefReg);
MFI.stackifyVReg(TeeReg);
- ImposeStackOrdering(Def);
- ImposeStackOrdering(Tee);
+ imposeStackOrdering(Def);
+ imposeStackOrdering(Tee);
DefDIs.clone(Tee, DefReg);
DefDIs.clone(Insert, TeeReg);
@@ -638,9 +649,9 @@ namespace {
/// A stack for walking the tree of instructions being built, visiting the
/// MachineOperands in DFS order.
class TreeWalkerState {
- typedef MachineInstr::mop_iterator mop_iterator;
- typedef std::reverse_iterator<mop_iterator> mop_reverse_iterator;
- typedef iterator_range<mop_reverse_iterator> RangeTy;
+ using mop_iterator = MachineInstr::mop_iterator;
+ using mop_reverse_iterator = std::reverse_iterator<mop_iterator>;
+ using RangeTy = iterator_range<mop_reverse_iterator>;
SmallVector<RangeTy, 4> Worklist;
public:
@@ -650,9 +661,9 @@ public:
Worklist.push_back(reverse(Range));
}
- bool Done() const { return Worklist.empty(); }
+ bool done() const { return Worklist.empty(); }
- MachineOperand &Pop() {
+ MachineOperand &pop() {
RangeTy &Range = Worklist.back();
MachineOperand &Op = *Range.begin();
Range = drop_begin(Range, 1);
@@ -665,7 +676,7 @@ public:
}
/// Push Instr's operands onto the stack to be visited.
- void PushOperands(MachineInstr *Instr) {
+ void pushOperands(MachineInstr *Instr) {
const iterator_range<mop_iterator> &Range(Instr->explicit_uses());
if (Range.begin() != Range.end())
Worklist.push_back(reverse(Range));
@@ -673,8 +684,8 @@ public:
/// Some of Instr's operands are on the top of the stack; remove them and
/// re-insert them starting from the beginning (because we've commuted them).
- void ResetTopOperands(MachineInstr *Instr) {
- assert(HasRemainingOperands(Instr) &&
+ void resetTopOperands(MachineInstr *Instr) {
+ assert(hasRemainingOperands(Instr) &&
"Reseting operands should only be done when the instruction has "
"an operand still on the stack");
Worklist.back() = reverse(Instr->explicit_uses());
@@ -682,7 +693,7 @@ public:
/// Test whether Instr has operands remaining to be visited at the top of
/// the stack.
- bool HasRemainingOperands(const MachineInstr *Instr) const {
+ bool hasRemainingOperands(const MachineInstr *Instr) const {
if (Worklist.empty())
return false;
const RangeTy &Range = Worklist.back();
@@ -695,7 +706,7 @@ public:
///
/// This is needed as a consequence of using implicit local.gets for
/// uses and implicit local.sets for defs.
- bool IsOnStack(unsigned Reg) const {
+ bool isOnStack(unsigned Reg) const {
for (const RangeTy &Range : Worklist)
for (const MachineOperand &MO : Range)
if (MO.isReg() && MO.getReg() == Reg)
@@ -712,20 +723,18 @@ class CommutingState {
/// state where we've commuted the operands of the current instruction and are
/// revisiting it, and the declined state where we've reverted the operands
/// back to their original order and will no longer commute it further.
- bool TentativelyCommuting;
- bool Declined;
+ bool TentativelyCommuting = false;
+ bool Declined = false;
/// During the tentative state, these hold the operand indices of the commuted
/// operands.
unsigned Operand0, Operand1;
public:
- CommutingState() : TentativelyCommuting(false), Declined(false) {}
-
/// Stackification for an operand was not successful due to ordering
/// constraints. If possible, and if we haven't already tried it and declined
/// it, commute Insert's operands and prepare to revisit it.
- void MaybeCommute(MachineInstr *Insert, TreeWalkerState &TreeWalker,
+ void maybeCommute(MachineInstr *Insert, TreeWalkerState &TreeWalker,
const WebAssemblyInstrInfo *TII) {
if (TentativelyCommuting) {
assert(!Declined &&
@@ -734,13 +743,13 @@ public:
TII->commuteInstruction(*Insert, /*NewMI=*/false, Operand0, Operand1);
TentativelyCommuting = false;
Declined = true;
- } else if (!Declined && TreeWalker.HasRemainingOperands(Insert)) {
+ } else if (!Declined && TreeWalker.hasRemainingOperands(Insert)) {
Operand0 = TargetInstrInfo::CommuteAnyOperandIndex;
Operand1 = TargetInstrInfo::CommuteAnyOperandIndex;
if (TII->findCommutedOpIndices(*Insert, Operand0, Operand1)) {
// Tentatively commute the operands and try again.
TII->commuteInstruction(*Insert, /*NewMI=*/false, Operand0, Operand1);
- TreeWalker.ResetTopOperands(Insert);
+ TreeWalker.resetTopOperands(Insert);
TentativelyCommuting = true;
Declined = false;
}
@@ -749,7 +758,7 @@ public:
/// Stackification for some operand was successful. Reset to the default
/// state.
- void Reset() {
+ void reset() {
TentativelyCommuting = false;
Declined = false;
}
@@ -767,8 +776,8 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
const auto *TRI = MF.getSubtarget<WebAssemblySubtarget>().getRegisterInfo();
AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
- MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
- LiveIntervals &LIS = getAnalysis<LiveIntervals>();
+ auto &MDT = getAnalysis<MachineDominatorTree>();
+ auto &LIS = getAnalysis<LiveIntervals>();
// Walk the instructions from the bottom up. Currently we don't look past
// block boundaries, and the blocks aren't ordered so the block visitation
@@ -780,19 +789,19 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
MachineInstr *Insert = &*MII;
// Don't nest anything inside an inline asm, because we don't have
// constraints for $push inputs.
- if (Insert->getOpcode() == TargetOpcode::INLINEASM)
+ if (Insert->isInlineAsm())
continue;
// Ignore debugging intrinsics.
- if (Insert->getOpcode() == TargetOpcode::DBG_VALUE)
+ if (Insert->isDebugValue())
continue;
// Iterate through the inputs in reverse order, since we'll be pulling
// operands off the stack in LIFO order.
CommutingState Commuting;
TreeWalkerState TreeWalker(Insert);
- while (!TreeWalker.Done()) {
- MachineOperand &Op = TreeWalker.Pop();
+ while (!TreeWalker.done()) {
+ MachineOperand &Op = TreeWalker.pop();
// We're only interested in explicit virtual register operands.
if (!Op.isReg())
@@ -806,18 +815,36 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
continue;
// Identify the definition for this register at this point.
- MachineInstr *Def = GetVRegDef(Reg, Insert, MRI, LIS);
+ MachineInstr *Def = getVRegDef(Reg, Insert, MRI, LIS);
if (!Def)
continue;
// Don't nest an INLINE_ASM def into anything, because we don't have
// constraints for $pop outputs.
- if (Def->getOpcode() == TargetOpcode::INLINEASM)
+ if (Def->isInlineAsm())
continue;
// Argument instructions represent live-in registers and not real
// instructions.
- if (WebAssembly::isArgument(*Def))
+ if (WebAssembly::isArgument(Def->getOpcode()))
+ continue;
+
+ // Currently catch's return value register cannot be stackified, because
+ // the wasm LLVM backend currently does not support live-in values
+ // entering blocks, which is a part of multi-value proposal.
+ //
+ // Once we support live-in values of wasm blocks, this can be:
+ // catch ; push exnref value onto stack
+ // block exnref -> i32
+ // br_on_exn $__cpp_exception ; pop the exnref value
+ // end_block
+ //
+ // But because we don't support it yet, the catch instruction's dst
+ // register should be assigned to a local to be propagated across
+ // 'block' boundary now.
+ //
+ // TODO Fix this once we support the multi-value proposal.
+ if (Def->getOpcode() == WebAssembly::CATCH)
continue;
// Decide which strategy to take. Prefer to move a single-use value
@@ -827,23 +854,23 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
// supports intra-block moves) and it's MachineSink's job to catch all
// the sinking opportunities anyway.
bool SameBlock = Def->getParent() == &MBB;
- bool CanMove = SameBlock && IsSafeToMove(Def, Insert, AA, MRI) &&
- !TreeWalker.IsOnStack(Reg);
- if (CanMove && HasOneUse(Reg, Def, MRI, MDT, LIS)) {
- Insert = MoveForSingleUse(Reg, Op, Def, MBB, Insert, LIS, MFI, MRI);
- } else if (ShouldRematerialize(*Def, AA, TII)) {
+ bool CanMove = SameBlock && isSafeToMove(Def, Insert, AA, MRI) &&
+ !TreeWalker.isOnStack(Reg);
+ if (CanMove && hasOneUse(Reg, Def, MRI, MDT, LIS)) {
+ Insert = moveForSingleUse(Reg, Op, Def, MBB, Insert, LIS, MFI, MRI);
+ } else if (shouldRematerialize(*Def, AA, TII)) {
Insert =
- RematerializeCheapDef(Reg, Op, *Def, MBB, Insert->getIterator(),
+ rematerializeCheapDef(Reg, Op, *Def, MBB, Insert->getIterator(),
LIS, MFI, MRI, TII, TRI);
} else if (CanMove &&
- OneUseDominatesOtherUses(Reg, Op, MBB, MRI, MDT, LIS, MFI)) {
- Insert = MoveAndTeeForMultiUse(Reg, Op, Def, MBB, Insert, LIS, MFI,
+ oneUseDominatesOtherUses(Reg, Op, MBB, MRI, MDT, LIS, MFI)) {
+ Insert = moveAndTeeForMultiUse(Reg, Op, Def, MBB, Insert, LIS, MFI,
MRI, TII);
} else {
// We failed to stackify the operand. If the problem was ordering
// constraints, Commuting may be able to help.
if (!CanMove && SameBlock)
- Commuting.MaybeCommute(Insert, TreeWalker, TII);
+ Commuting.maybeCommute(Insert, TreeWalker, TII);
// Proceed to the next operand.
continue;
}
@@ -852,18 +879,18 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
// to a constant 0 so that the def is explicit, and the push/pop
// correspondence is maintained.
if (Insert->getOpcode() == TargetOpcode::IMPLICIT_DEF)
- ConvertImplicitDefToConstZero(Insert, MRI, TII, MF, LIS);
+ convertImplicitDefToConstZero(Insert, MRI, TII, MF, LIS);
// We stackified an operand. Add the defining instruction's operands to
// the worklist stack now to continue to build an ever deeper tree.
- Commuting.Reset();
- TreeWalker.PushOperands(Insert);
+ Commuting.reset();
+ TreeWalker.pushOperands(Insert);
}
// If we stackified any operands, skip over the tree to start looking for
// the next instruction we can build a tree on.
if (Insert != &*MII) {
- ImposeStackOrdering(&*MII);
+ imposeStackOrdering(&*MII);
MII = MachineBasicBlock::iterator(Insert).getReverse();
Changed = true;
}
diff --git a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
index 1f0870865b06..ea9cfc00adfd 100644
--- a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
@@ -1,9 +1,8 @@
//===-- WebAssemblyRegisterInfo.cpp - WebAssembly Register Information ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -67,19 +66,22 @@ void WebAssemblyRegisterInfo::eliminateFrameIndex(
assert(MFI.getObjectSize(FrameIndex) != 0 &&
"We assume that variable-sized objects have already been lowered, "
"and don't use FrameIndex operands.");
- unsigned FrameRegister = getFrameRegister(MF);
+ Register FrameRegister = getFrameRegister(MF);
// If this is the address operand of a load or store, make it relative to SP
// and fold the frame offset directly in.
- if ((MI.mayLoad() && FIOperandNum == WebAssembly::LoadAddressOperandNo) ||
- (MI.mayStore() && FIOperandNum == WebAssembly::StoreAddressOperandNo)) {
- assert(FrameOffset >= 0 && MI.getOperand(FIOperandNum - 1).getImm() >= 0);
- int64_t Offset = MI.getOperand(FIOperandNum - 1).getImm() + FrameOffset;
+ unsigned AddrOperandNum = WebAssembly::getNamedOperandIdx(
+ MI.getOpcode(), WebAssembly::OpName::addr);
+ if (AddrOperandNum == FIOperandNum) {
+ unsigned OffsetOperandNum = WebAssembly::getNamedOperandIdx(
+ MI.getOpcode(), WebAssembly::OpName::off);
+ assert(FrameOffset >= 0 && MI.getOperand(OffsetOperandNum).getImm() >= 0);
+ int64_t Offset = MI.getOperand(OffsetOperandNum).getImm() + FrameOffset;
if (static_cast<uint64_t>(Offset) <= std::numeric_limits<uint32_t>::max()) {
- MI.getOperand(FIOperandNum - 1).setImm(Offset);
+ MI.getOperand(OffsetOperandNum).setImm(Offset);
MI.getOperand(FIOperandNum)
- .ChangeToRegister(FrameRegister, /*IsDef=*/false);
+ .ChangeToRegister(FrameRegister, /*isDef=*/false);
return;
}
}
@@ -100,7 +102,7 @@ void WebAssemblyRegisterInfo::eliminateFrameIndex(
MachineOperand &ImmMO = Def->getOperand(1);
ImmMO.setImm(ImmMO.getImm() + uint32_t(FrameOffset));
MI.getOperand(FIOperandNum)
- .ChangeToRegister(FrameRegister, /*IsDef=*/false);
+ .ChangeToRegister(FrameRegister, /*isDef=*/false);
return;
}
}
@@ -125,10 +127,10 @@ void WebAssemblyRegisterInfo::eliminateFrameIndex(
.addReg(FrameRegister)
.addReg(OffsetOp);
}
- MI.getOperand(FIOperandNum).ChangeToRegister(FIRegOperand, /*IsDef=*/false);
+ MI.getOperand(FIOperandNum).ChangeToRegister(FIRegOperand, /*isDef=*/false);
}
-unsigned
+Register
WebAssemblyRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
static const unsigned Regs[2][2] = {
/* !isArch64Bit isArch64Bit */
diff --git a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h
index 2a73dfd4b065..7880eb217dbf 100644
--- a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h
+++ b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h
@@ -1,9 +1,8 @@
// WebAssemblyRegisterInfo.h - WebAssembly Register Information Impl -*- C++ -*-
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -40,7 +39,7 @@ public:
RegScavenger *RS = nullptr) const override;
// Debug information queries.
- unsigned getFrameRegister(const MachineFunction &MF) const override;
+ Register getFrameRegister(const MachineFunction &MF) const override;
const TargetRegisterClass *
getPointerRegClass(const MachineFunction &MF,
diff --git a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td
index a7c3d177724d..6d3d6c723277 100644
--- a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td
+++ b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td
@@ -1,9 +1,8 @@
//WebAssemblyRegisterInfo.td-Describe the WebAssembly Registers -*- tablegen -*-
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -44,7 +43,7 @@ def F64_0 : WebAssemblyReg<"%f64.0">;
def V128_0: WebAssemblyReg<"%v128">;
-def EXCEPT_REF_0 : WebAssemblyReg<"%except_ref.0">;
+def EXNREF_0 : WebAssemblyReg<"%exnref.0">;
// The value stack "register". This is an opaque entity which serves to order
// uses and defs that must remain in LIFO order.
@@ -65,4 +64,4 @@ def F32 : WebAssemblyRegClass<[f32], 32, (add F32_0)>;
def F64 : WebAssemblyRegClass<[f64], 64, (add F64_0)>;
def V128 : WebAssemblyRegClass<[v4f32, v2f64, v2i64, v4i32, v16i8, v8i16], 128,
(add V128_0)>;
-def EXCEPT_REF : WebAssemblyRegClass<[ExceptRef], 0, (add EXCEPT_REF_0)>;
+def EXNREF : WebAssemblyRegClass<[exnref], 0, (add EXNREF_0)>;
diff --git a/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp b/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp
index e5a3e47a3bcd..5eafd6c54e78 100644
--- a/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp
@@ -1,9 +1,8 @@
//===-- WebAssemblyReplacePhysRegs.cpp - Replace phys regs with virt regs -===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp b/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp
index 6cf81a9d77b3..7b9ae90326f0 100644
--- a/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp
@@ -1,9 +1,8 @@
// CodeGen/RuntimeLibcallSignatures.cpp - R.T. Lib. Call Signatures -*- C++ -*--
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -52,6 +51,8 @@ enum RuntimeLibcallSignature {
f64_func_f64_i32,
f64_func_i64_i64,
i16_func_f32,
+ i16_func_f64,
+ i16_func_i64_i64,
i8_func_i8_i8,
func_f32_iPTR_iPTR,
func_f64_iPTR_iPTR,
@@ -85,6 +86,9 @@ enum RuntimeLibcallSignature {
func_iPTR_i64_i64_i64_i64_i64_i64,
i32_func_i64_i64,
i32_func_i64_i64_i64_i64,
+ iPTR_func_f32,
+ iPTR_func_f64,
+ iPTR_func_i64_i64,
unsupported
};
@@ -215,6 +219,18 @@ struct RuntimeLibcallSignatureTable {
Table[RTLIB::ROUND_F32] = f32_func_f32;
Table[RTLIB::ROUND_F64] = f64_func_f64;
Table[RTLIB::ROUND_F128] = func_iPTR_i64_i64;
+ Table[RTLIB::LROUND_F32] = iPTR_func_f32;
+ Table[RTLIB::LROUND_F64] = iPTR_func_f64;
+ Table[RTLIB::LROUND_F128] = iPTR_func_i64_i64;
+ Table[RTLIB::LLROUND_F32] = i64_func_f32;
+ Table[RTLIB::LLROUND_F64] = i64_func_f64;
+ Table[RTLIB::LLROUND_F128] = i64_func_i64_i64;
+ Table[RTLIB::LRINT_F32] = iPTR_func_f32;
+ Table[RTLIB::LRINT_F64] = iPTR_func_f64;
+ Table[RTLIB::LRINT_F128] = iPTR_func_i64_i64;
+ Table[RTLIB::LLRINT_F32] = i64_func_f32;
+ Table[RTLIB::LLRINT_F64] = i64_func_f64;
+ Table[RTLIB::LLRINT_F128] = i64_func_i64_i64;
Table[RTLIB::FLOOR_F32] = f32_func_f32;
Table[RTLIB::FLOOR_F64] = f64_func_f64;
Table[RTLIB::FLOOR_F128] = func_iPTR_i64_i64;
@@ -229,13 +245,15 @@ struct RuntimeLibcallSignatureTable {
Table[RTLIB::FMAX_F128] = func_iPTR_i64_i64_i64_i64;
// Conversion
- // All F80 and PPCF128 routines are unspported.
+ // All F80 and PPCF128 routines are unsupported.
Table[RTLIB::FPEXT_F64_F128] = func_iPTR_f64;
Table[RTLIB::FPEXT_F32_F128] = func_iPTR_f32;
Table[RTLIB::FPEXT_F32_F64] = f64_func_f32;
Table[RTLIB::FPEXT_F16_F32] = f32_func_i16;
Table[RTLIB::FPROUND_F32_F16] = i16_func_f32;
+ Table[RTLIB::FPROUND_F64_F16] = i16_func_f64;
Table[RTLIB::FPROUND_F64_F32] = f32_func_f64;
+ Table[RTLIB::FPROUND_F128_F16] = i16_func_i64_i64;
Table[RTLIB::FPROUND_F128_F32] = f32_func_i64_i64;
Table[RTLIB::FPROUND_F128_F64] = f64_func_i64_i64;
Table[RTLIB::FPTOSINT_F32_I32] = i32_func_f32;
@@ -310,6 +328,12 @@ struct RuntimeLibcallSignatureTable {
Table[RTLIB::MEMSET] = iPTR_func_iPTR_i32_iPTR;
Table[RTLIB::MEMMOVE] = iPTR_func_iPTR_iPTR_iPTR;
+ // __stack_chk_fail
+ Table[RTLIB::STACKPROTECTOR_CHECK_FAIL] = func;
+
+ // Return address handling
+ Table[RTLIB::RETURN_ADDRESS] = i32_func_i32;
+
// Element-wise Atomic memory
// TODO: Fix these when we implement atomic support
Table[RTLIB::MEMCPY_ELEMENT_UNORDERED_ATOMIC_1] = unsupported;
@@ -480,19 +504,25 @@ struct StaticLibcallNameMap {
Map[NameLibcall.first] = NameLibcall.second;
}
}
+ // Override the __gnu_f2h_ieee/__gnu_h2f_ieee names so that the f32 name is
+ // consistent with the f64 and f128 names.
+ Map["__extendhfsf2"] = RTLIB::FPEXT_F16_F32;
+ Map["__truncsfhf2"] = RTLIB::FPROUND_F32_F16;
+
+ Map["emscripten_return_address"] = RTLIB::RETURN_ADDRESS;
}
};
} // end anonymous namespace
-void llvm::GetLibcallSignature(const WebAssemblySubtarget &Subtarget,
+void llvm::getLibcallSignature(const WebAssemblySubtarget &Subtarget,
RTLIB::Libcall LC,
SmallVectorImpl<wasm::ValType> &Rets,
SmallVectorImpl<wasm::ValType> &Params) {
assert(Rets.empty());
assert(Params.empty());
- wasm::ValType iPTR =
+ wasm::ValType PtrTy =
Subtarget.hasAddr64() ? wasm::ValType::I64 : wasm::ValType::I32;
auto &Table = RuntimeLibcallSignatures->Table;
@@ -593,6 +623,15 @@ void llvm::GetLibcallSignature(const WebAssemblySubtarget &Subtarget,
Rets.push_back(wasm::ValType::I32);
Params.push_back(wasm::ValType::F32);
break;
+ case i16_func_f64:
+ Rets.push_back(wasm::ValType::I32);
+ Params.push_back(wasm::ValType::F64);
+ break;
+ case i16_func_i64_i64:
+ Rets.push_back(wasm::ValType::I32);
+ Params.push_back(wasm::ValType::I64);
+ Params.push_back(wasm::ValType::I64);
+ break;
case i8_func_i8_i8:
Rets.push_back(wasm::ValType::I32);
Params.push_back(wasm::ValType::I32);
@@ -600,13 +639,13 @@ void llvm::GetLibcallSignature(const WebAssemblySubtarget &Subtarget,
break;
case func_f32_iPTR_iPTR:
Params.push_back(wasm::ValType::F32);
- Params.push_back(iPTR);
- Params.push_back(iPTR);
+ Params.push_back(PtrTy);
+ Params.push_back(PtrTy);
break;
case func_f64_iPTR_iPTR:
Params.push_back(wasm::ValType::F64);
- Params.push_back(iPTR);
- Params.push_back(iPTR);
+ Params.push_back(PtrTy);
+ Params.push_back(PtrTy);
break;
case i16_func_i16_i16:
Rets.push_back(wasm::ValType::I32);
@@ -632,7 +671,7 @@ void llvm::GetLibcallSignature(const WebAssemblySubtarget &Subtarget,
Rets.push_back(wasm::ValType::I32);
Params.push_back(wasm::ValType::I32);
Params.push_back(wasm::ValType::I32);
- Params.push_back(iPTR);
+ Params.push_back(PtrTy);
break;
case i64_func_i64_i64:
Rets.push_back(wasm::ValType::I64);
@@ -643,14 +682,14 @@ void llvm::GetLibcallSignature(const WebAssemblySubtarget &Subtarget,
Rets.push_back(wasm::ValType::I64);
Params.push_back(wasm::ValType::I64);
Params.push_back(wasm::ValType::I64);
- Params.push_back(iPTR);
+ Params.push_back(PtrTy);
break;
case i64_i64_func_f32:
#if 0 // TODO: Enable this when wasm gets multiple-return-value support.
Rets.push_back(wasm::ValType::I64);
Rets.push_back(wasm::ValType::I64);
#else
- Params.push_back(iPTR);
+ Params.push_back(PtrTy);
#endif
Params.push_back(wasm::ValType::F32);
break;
@@ -659,7 +698,7 @@ void llvm::GetLibcallSignature(const WebAssemblySubtarget &Subtarget,
Rets.push_back(wasm::ValType::I64);
Rets.push_back(wasm::ValType::I64);
#else
- Params.push_back(iPTR);
+ Params.push_back(PtrTy);
#endif
Params.push_back(wasm::ValType::F64);
break;
@@ -668,7 +707,7 @@ void llvm::GetLibcallSignature(const WebAssemblySubtarget &Subtarget,
Rets.push_back(wasm::ValType::I32);
Rets.push_back(wasm::ValType::I32);
#else
- Params.push_back(iPTR);
+ Params.push_back(PtrTy);
#endif
Params.push_back(wasm::ValType::I32);
Params.push_back(wasm::ValType::I32);
@@ -678,7 +717,7 @@ void llvm::GetLibcallSignature(const WebAssemblySubtarget &Subtarget,
Rets.push_back(wasm::ValType::I32);
Rets.push_back(wasm::ValType::I32);
#else
- Params.push_back(iPTR);
+ Params.push_back(PtrTy);
#endif
Params.push_back(wasm::ValType::I32);
Params.push_back(wasm::ValType::I32);
@@ -688,7 +727,7 @@ void llvm::GetLibcallSignature(const WebAssemblySubtarget &Subtarget,
Rets.push_back(wasm::ValType::I64);
Rets.push_back(wasm::ValType::I64);
#else
- Params.push_back(iPTR);
+ Params.push_back(PtrTy);
#endif
Params.push_back(wasm::ValType::I64);
Params.push_back(wasm::ValType::I64);
@@ -698,7 +737,7 @@ void llvm::GetLibcallSignature(const WebAssemblySubtarget &Subtarget,
Rets.push_back(wasm::ValType::I64);
Rets.push_back(wasm::ValType::I64);
#else
- Params.push_back(iPTR);
+ Params.push_back(PtrTy);
#endif
Params.push_back(wasm::ValType::I64);
Params.push_back(wasm::ValType::I64);
@@ -710,13 +749,13 @@ void llvm::GetLibcallSignature(const WebAssemblySubtarget &Subtarget,
Rets.push_back(wasm::ValType::I64);
Rets.push_back(wasm::ValType::I64);
#else
- Params.push_back(iPTR);
+ Params.push_back(PtrTy);
#endif
Params.push_back(wasm::ValType::I64);
Params.push_back(wasm::ValType::I64);
Params.push_back(wasm::ValType::I64);
Params.push_back(wasm::ValType::I64);
- Params.push_back(iPTR);
+ Params.push_back(PtrTy);
break;
case i64_i64_i64_i64_func_i64_i64_i64_i64:
#if 0 // TODO: Enable this when wasm gets multiple-return-value support.
@@ -725,7 +764,7 @@ void llvm::GetLibcallSignature(const WebAssemblySubtarget &Subtarget,
Rets.push_back(wasm::ValType::I64);
Rets.push_back(wasm::ValType::I64);
#else
- Params.push_back(iPTR);
+ Params.push_back(PtrTy);
#endif
Params.push_back(wasm::ValType::I64);
Params.push_back(wasm::ValType::I64);
@@ -739,23 +778,23 @@ void llvm::GetLibcallSignature(const WebAssemblySubtarget &Subtarget,
Rets.push_back(wasm::ValType::I64);
Rets.push_back(wasm::ValType::I64);
#else
- Params.push_back(iPTR);
+ Params.push_back(PtrTy);
#endif
Params.push_back(wasm::ValType::I64);
Params.push_back(wasm::ValType::I64);
Params.push_back(wasm::ValType::I32);
break;
case iPTR_func_iPTR_i32_iPTR:
- Rets.push_back(iPTR);
- Params.push_back(iPTR);
+ Rets.push_back(PtrTy);
+ Params.push_back(PtrTy);
Params.push_back(wasm::ValType::I32);
- Params.push_back(iPTR);
+ Params.push_back(PtrTy);
break;
case iPTR_func_iPTR_iPTR_iPTR:
- Rets.push_back(iPTR);
- Params.push_back(iPTR);
- Params.push_back(iPTR);
- Params.push_back(iPTR);
+ Rets.push_back(PtrTy);
+ Params.push_back(PtrTy);
+ Params.push_back(PtrTy);
+ Params.push_back(PtrTy);
break;
case f32_func_f32_f32_f32:
Rets.push_back(wasm::ValType::F32);
@@ -772,39 +811,39 @@ void llvm::GetLibcallSignature(const WebAssemblySubtarget &Subtarget,
case func_i64_i64_iPTR_iPTR:
Params.push_back(wasm::ValType::I64);
Params.push_back(wasm::ValType::I64);
- Params.push_back(iPTR);
- Params.push_back(iPTR);
+ Params.push_back(PtrTy);
+ Params.push_back(PtrTy);
break;
case func_iPTR_f32:
- Params.push_back(iPTR);
+ Params.push_back(PtrTy);
Params.push_back(wasm::ValType::F32);
break;
case func_iPTR_f64:
- Params.push_back(iPTR);
+ Params.push_back(PtrTy);
Params.push_back(wasm::ValType::F64);
break;
case func_iPTR_i32:
- Params.push_back(iPTR);
+ Params.push_back(PtrTy);
Params.push_back(wasm::ValType::I32);
break;
case func_iPTR_i64:
- Params.push_back(iPTR);
+ Params.push_back(PtrTy);
Params.push_back(wasm::ValType::I64);
break;
case func_iPTR_i64_i64:
- Params.push_back(iPTR);
+ Params.push_back(PtrTy);
Params.push_back(wasm::ValType::I64);
Params.push_back(wasm::ValType::I64);
break;
case func_iPTR_i64_i64_i64_i64:
- Params.push_back(iPTR);
+ Params.push_back(PtrTy);
Params.push_back(wasm::ValType::I64);
Params.push_back(wasm::ValType::I64);
Params.push_back(wasm::ValType::I64);
Params.push_back(wasm::ValType::I64);
break;
case func_iPTR_i64_i64_i64_i64_i64_i64:
- Params.push_back(iPTR);
+ Params.push_back(PtrTy);
Params.push_back(wasm::ValType::I64);
Params.push_back(wasm::ValType::I64);
Params.push_back(wasm::ValType::I64);
@@ -824,6 +863,19 @@ void llvm::GetLibcallSignature(const WebAssemblySubtarget &Subtarget,
Params.push_back(wasm::ValType::I64);
Params.push_back(wasm::ValType::I64);
break;
+ case iPTR_func_f32:
+ Rets.push_back(PtrTy);
+ Params.push_back(wasm::ValType::F32);
+ break;
+ case iPTR_func_f64:
+ Rets.push_back(PtrTy);
+ Params.push_back(wasm::ValType::F64);
+ break;
+ case iPTR_func_i64_i64:
+ Rets.push_back(PtrTy);
+ Params.push_back(wasm::ValType::I64);
+ Params.push_back(wasm::ValType::I64);
+ break;
case unsupported:
llvm_unreachable("unsupported runtime library signature");
}
@@ -832,12 +884,17 @@ void llvm::GetLibcallSignature(const WebAssemblySubtarget &Subtarget,
static ManagedStatic<StaticLibcallNameMap> LibcallNameMap;
// TODO: If the RTLIB::Libcall-taking flavor of GetSignature remains unsed
// other than here, just roll its logic into this version.
-void llvm::GetLibcallSignature(const WebAssemblySubtarget &Subtarget,
+void llvm::getLibcallSignature(const WebAssemblySubtarget &Subtarget,
const char *Name,
SmallVectorImpl<wasm::ValType> &Rets,
SmallVectorImpl<wasm::ValType> &Params) {
auto &Map = LibcallNameMap->Map;
- auto val = Map.find(Name);
- assert(val != Map.end() && "unexpected runtime library name");
- return GetLibcallSignature(Subtarget, val->second, Rets, Params);
+ auto Val = Map.find(Name);
+#ifndef NDEBUG
+ if (Val == Map.end()) {
+ auto message = std::string("unexpected runtime library name: ") + Name;
+ llvm_unreachable(message.c_str());
+ }
+#endif
+ return getLibcallSignature(Subtarget, Val->second, Rets, Params);
}
diff --git a/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.h b/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.h
index 7fa70bea96de..6ae8aaaba59c 100644
--- a/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.h
+++ b/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.h
@@ -1,9 +1,8 @@
// CodeGen/RuntimeLibcallSignatures.h - R.T. Lib. Call Signatures -*- C++ -*--//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -23,12 +22,12 @@ namespace llvm {
class WebAssemblySubtarget;
-extern void GetLibcallSignature(const WebAssemblySubtarget &Subtarget,
+extern void getLibcallSignature(const WebAssemblySubtarget &Subtarget,
RTLIB::Libcall LC,
SmallVectorImpl<wasm::ValType> &Rets,
SmallVectorImpl<wasm::ValType> &Params);
-extern void GetLibcallSignature(const WebAssemblySubtarget &Subtarget,
+extern void getLibcallSignature(const WebAssemblySubtarget &Subtarget,
const char *Name,
SmallVectorImpl<wasm::ValType> &Rets,
SmallVectorImpl<wasm::ValType> &Params);
diff --git a/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp b/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp
index bec72049258a..890e4b8e4e2a 100644
--- a/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp
+++ b/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp
@@ -1,9 +1,8 @@
//===-- WebAssemblySelectionDAGInfo.cpp - WebAssembly SelectionDAG Info ---===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -17,4 +16,44 @@ using namespace llvm;
#define DEBUG_TYPE "wasm-selectiondag-info"
-WebAssemblySelectionDAGInfo::~WebAssemblySelectionDAGInfo() {}
+WebAssemblySelectionDAGInfo::~WebAssemblySelectionDAGInfo() = default; // anchor
+
+SDValue WebAssemblySelectionDAGInfo::EmitTargetCodeForMemcpy(
+ SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align, bool IsVolatile, bool AlwaysInline,
+ MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
+ if (!DAG.getMachineFunction()
+ .getSubtarget<WebAssemblySubtarget>()
+ .hasBulkMemory())
+ return SDValue();
+
+ SDValue MemIdx = DAG.getConstant(0, DL, MVT::i32);
+ return DAG.getNode(WebAssemblyISD::MEMORY_COPY, DL, MVT::Other,
+ {Chain, MemIdx, MemIdx, Dst, Src,
+ DAG.getZExtOrTrunc(Size, DL, MVT::i32)});
+}
+
+SDValue WebAssemblySelectionDAGInfo::EmitTargetCodeForMemmove(
+ SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Op1, SDValue Op2,
+ SDValue Op3, unsigned Align, bool IsVolatile,
+ MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
+ return EmitTargetCodeForMemcpy(DAG, DL, Chain, Op1, Op2, Op3, Align,
+ IsVolatile, false, DstPtrInfo,
+ SrcPtrInfo);
+}
+
+SDValue WebAssemblySelectionDAGInfo::EmitTargetCodeForMemset(
+ SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst, SDValue Val,
+ SDValue Size, unsigned Align, bool IsVolatile,
+ MachinePointerInfo DstPtrInfo) const {
+ if (!DAG.getMachineFunction()
+ .getSubtarget<WebAssemblySubtarget>()
+ .hasBulkMemory())
+ return SDValue();
+
+ SDValue MemIdx = DAG.getConstant(0, DL, MVT::i32);
+ // Only low byte matters for val argument, so anyext the i8
+ return DAG.getNode(WebAssemblyISD::MEMORY_FILL, DL, MVT::Other, Chain, MemIdx,
+ Dst, DAG.getAnyExtOrTrunc(Val, DL, MVT::i32),
+ DAG.getZExtOrTrunc(Size, DL, MVT::i32));
+}
diff --git a/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h b/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h
index 31d150eded67..0b90ece27dff 100644
--- a/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h
+++ b/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h
@@ -1,9 +1,8 @@
//=- WebAssemblySelectionDAGInfo.h - WebAssembly SelectionDAG Info -*- C++ -*-//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -23,6 +22,21 @@ namespace llvm {
class WebAssemblySelectionDAGInfo final : public SelectionDAGTargetInfo {
public:
~WebAssemblySelectionDAGInfo() override;
+ SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl,
+ SDValue Chain, SDValue Op1, SDValue Op2,
+ SDValue Op3, unsigned Align, bool isVolatile,
+ bool AlwaysInline,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) const override;
+ SDValue EmitTargetCodeForMemmove(SelectionDAG &DAG, const SDLoc &dl,
+ SDValue Chain, SDValue Op1, SDValue Op2,
+ SDValue Op3, unsigned Align, bool isVolatile,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) const override;
+ SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &DL,
+ SDValue Chain, SDValue Op1, SDValue Op2,
+ SDValue Op3, unsigned Align, bool IsVolatile,
+ MachinePointerInfo DstPtrInfo) const override;
};
} // end namespace llvm
diff --git a/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp b/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp
index c95af88c6f43..a249ccf17638 100644
--- a/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp
+++ b/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp
@@ -1,9 +1,8 @@
//=- WebAssemblySetP2AlignOperands.cpp - Set alignments on loads and stores -=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -14,6 +13,7 @@
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
#include "WebAssembly.h"
+#include "WebAssemblyInstrInfo.h"
#include "WebAssemblyMachineFunctionInfo.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"
@@ -54,7 +54,7 @@ FunctionPass *llvm::createWebAssemblySetP2AlignOperands() {
return new WebAssemblySetP2AlignOperands();
}
-static void RewriteP2Align(MachineInstr &MI, unsigned OperandNo) {
+static void rewriteP2Align(MachineInstr &MI, unsigned OperandNo) {
assert(MI.getOperand(OperandNo).getImm() == 0 &&
"ISel should set p2align operands to 0");
assert(MI.hasOneMemOperand() &&
@@ -84,114 +84,11 @@ bool WebAssemblySetP2AlignOperands::runOnMachineFunction(MachineFunction &MF) {
for (auto &MBB : MF) {
for (auto &MI : MBB) {
- switch (MI.getOpcode()) {
- case WebAssembly::LOAD_I32:
- case WebAssembly::LOAD_I64:
- case WebAssembly::LOAD_F32:
- case WebAssembly::LOAD_F64:
- case WebAssembly::LOAD_v16i8:
- case WebAssembly::LOAD_v8i16:
- case WebAssembly::LOAD_v4i32:
- case WebAssembly::LOAD_v2i64:
- case WebAssembly::LOAD_v4f32:
- case WebAssembly::LOAD_v2f64:
- case WebAssembly::LOAD8_S_I32:
- case WebAssembly::LOAD8_U_I32:
- case WebAssembly::LOAD16_S_I32:
- case WebAssembly::LOAD16_U_I32:
- case WebAssembly::LOAD8_S_I64:
- case WebAssembly::LOAD8_U_I64:
- case WebAssembly::LOAD16_S_I64:
- case WebAssembly::LOAD16_U_I64:
- case WebAssembly::LOAD32_S_I64:
- case WebAssembly::LOAD32_U_I64:
- case WebAssembly::ATOMIC_LOAD_I32:
- case WebAssembly::ATOMIC_LOAD8_U_I32:
- case WebAssembly::ATOMIC_LOAD16_U_I32:
- case WebAssembly::ATOMIC_LOAD_I64:
- case WebAssembly::ATOMIC_LOAD8_U_I64:
- case WebAssembly::ATOMIC_LOAD16_U_I64:
- case WebAssembly::ATOMIC_LOAD32_U_I64:
- case WebAssembly::ATOMIC_RMW8_U_ADD_I32:
- case WebAssembly::ATOMIC_RMW8_U_ADD_I64:
- case WebAssembly::ATOMIC_RMW8_U_SUB_I32:
- case WebAssembly::ATOMIC_RMW8_U_SUB_I64:
- case WebAssembly::ATOMIC_RMW8_U_AND_I32:
- case WebAssembly::ATOMIC_RMW8_U_AND_I64:
- case WebAssembly::ATOMIC_RMW8_U_OR_I32:
- case WebAssembly::ATOMIC_RMW8_U_OR_I64:
- case WebAssembly::ATOMIC_RMW8_U_XOR_I32:
- case WebAssembly::ATOMIC_RMW8_U_XOR_I64:
- case WebAssembly::ATOMIC_RMW8_U_XCHG_I32:
- case WebAssembly::ATOMIC_RMW8_U_XCHG_I64:
- case WebAssembly::ATOMIC_RMW8_U_CMPXCHG_I32:
- case WebAssembly::ATOMIC_RMW8_U_CMPXCHG_I64:
- case WebAssembly::ATOMIC_RMW16_U_ADD_I32:
- case WebAssembly::ATOMIC_RMW16_U_ADD_I64:
- case WebAssembly::ATOMIC_RMW16_U_SUB_I32:
- case WebAssembly::ATOMIC_RMW16_U_SUB_I64:
- case WebAssembly::ATOMIC_RMW16_U_AND_I32:
- case WebAssembly::ATOMIC_RMW16_U_AND_I64:
- case WebAssembly::ATOMIC_RMW16_U_OR_I32:
- case WebAssembly::ATOMIC_RMW16_U_OR_I64:
- case WebAssembly::ATOMIC_RMW16_U_XOR_I32:
- case WebAssembly::ATOMIC_RMW16_U_XOR_I64:
- case WebAssembly::ATOMIC_RMW16_U_XCHG_I32:
- case WebAssembly::ATOMIC_RMW16_U_XCHG_I64:
- case WebAssembly::ATOMIC_RMW16_U_CMPXCHG_I32:
- case WebAssembly::ATOMIC_RMW16_U_CMPXCHG_I64:
- case WebAssembly::ATOMIC_RMW_ADD_I32:
- case WebAssembly::ATOMIC_RMW32_U_ADD_I64:
- case WebAssembly::ATOMIC_RMW_SUB_I32:
- case WebAssembly::ATOMIC_RMW32_U_SUB_I64:
- case WebAssembly::ATOMIC_RMW_AND_I32:
- case WebAssembly::ATOMIC_RMW32_U_AND_I64:
- case WebAssembly::ATOMIC_RMW_OR_I32:
- case WebAssembly::ATOMIC_RMW32_U_OR_I64:
- case WebAssembly::ATOMIC_RMW_XOR_I32:
- case WebAssembly::ATOMIC_RMW32_U_XOR_I64:
- case WebAssembly::ATOMIC_RMW_XCHG_I32:
- case WebAssembly::ATOMIC_RMW32_U_XCHG_I64:
- case WebAssembly::ATOMIC_RMW_CMPXCHG_I32:
- case WebAssembly::ATOMIC_RMW32_U_CMPXCHG_I64:
- case WebAssembly::ATOMIC_RMW_ADD_I64:
- case WebAssembly::ATOMIC_RMW_SUB_I64:
- case WebAssembly::ATOMIC_RMW_AND_I64:
- case WebAssembly::ATOMIC_RMW_OR_I64:
- case WebAssembly::ATOMIC_RMW_XOR_I64:
- case WebAssembly::ATOMIC_RMW_XCHG_I64:
- case WebAssembly::ATOMIC_RMW_CMPXCHG_I64:
- case WebAssembly::ATOMIC_NOTIFY:
- case WebAssembly::ATOMIC_WAIT_I32:
- case WebAssembly::ATOMIC_WAIT_I64:
- RewriteP2Align(MI, WebAssembly::LoadP2AlignOperandNo);
- break;
- case WebAssembly::STORE_I32:
- case WebAssembly::STORE_I64:
- case WebAssembly::STORE_F32:
- case WebAssembly::STORE_F64:
- case WebAssembly::STORE_v16i8:
- case WebAssembly::STORE_v8i16:
- case WebAssembly::STORE_v4i32:
- case WebAssembly::STORE_v2i64:
- case WebAssembly::STORE_v4f32:
- case WebAssembly::STORE_v2f64:
- case WebAssembly::STORE8_I32:
- case WebAssembly::STORE16_I32:
- case WebAssembly::STORE8_I64:
- case WebAssembly::STORE16_I64:
- case WebAssembly::STORE32_I64:
- case WebAssembly::ATOMIC_STORE_I32:
- case WebAssembly::ATOMIC_STORE8_I32:
- case WebAssembly::ATOMIC_STORE16_I32:
- case WebAssembly::ATOMIC_STORE_I64:
- case WebAssembly::ATOMIC_STORE8_I64:
- case WebAssembly::ATOMIC_STORE16_I64:
- case WebAssembly::ATOMIC_STORE32_I64:
- RewriteP2Align(MI, WebAssembly::StoreP2AlignOperandNo);
- break;
- default:
- break;
+ int16_t P2AlignOpNum = WebAssembly::getNamedOperandIdx(
+ MI.getOpcode(), WebAssembly::OpName::p2align);
+ if (P2AlignOpNum != -1) {
+ rewriteP2Align(MI, P2AlignOpNum);
+ Changed = true;
}
}
}
diff --git a/lib/Target/WebAssembly/WebAssemblySubtarget.cpp b/lib/Target/WebAssembly/WebAssemblySubtarget.cpp
index 98133e2153a0..196a74565285 100644
--- a/lib/Target/WebAssembly/WebAssemblySubtarget.cpp
+++ b/lib/Target/WebAssembly/WebAssemblySubtarget.cpp
@@ -1,9 +1,8 @@
//===-- WebAssemblySubtarget.cpp - WebAssembly Subtarget Information ------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -45,6 +44,11 @@ WebAssemblySubtarget::WebAssemblySubtarget(const Triple &TT,
InstrInfo(initializeSubtargetDependencies(FS)), TSInfo(),
TLInfo(TM, *this) {}
+bool WebAssemblySubtarget::enableAtomicExpand() const {
+ // If atomics are disabled, atomic ops are lowered instead of expanded
+ return hasAtomics();
+}
+
bool WebAssemblySubtarget::enableMachineScheduler() const {
// Disable the MachineScheduler for now. Even with ShouldTrackPressure set and
// enableMachineSchedDefaultSched overridden, it appears to have an overall
diff --git a/lib/Target/WebAssembly/WebAssemblySubtarget.h b/lib/Target/WebAssembly/WebAssemblySubtarget.h
index 0a0c04609ac4..8db2120f9834 100644
--- a/lib/Target/WebAssembly/WebAssemblySubtarget.h
+++ b/lib/Target/WebAssembly/WebAssemblySubtarget.h
@@ -1,9 +1,8 @@
//=- WebAssemblySubtarget.h - Define Subtarget for the WebAssembly -*- C++ -*-//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -23,11 +22,16 @@
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include <string>
+#define GET_SUBTARGETINFO_ENUM
#define GET_SUBTARGETINFO_HEADER
#include "WebAssemblyGenSubtargetInfo.inc"
namespace llvm {
+// Defined in WebAssemblyGenSubtargetInfo.inc.
+extern const SubtargetFeatureKV
+ WebAssemblyFeatureKV[WebAssembly::NumSubtargetFeatures];
+
class WebAssemblySubtarget final : public WebAssemblyGenSubtargetInfo {
enum SIMDEnum {
NoSIMD,
@@ -39,6 +43,10 @@ class WebAssemblySubtarget final : public WebAssemblyGenSubtargetInfo {
bool HasNontrappingFPToInt = false;
bool HasSignExt = false;
bool HasExceptionHandling = false;
+ bool HasBulkMemory = false;
+ bool HasMultivalue = false;
+ bool HasMutableGlobals = false;
+ bool HasTailCall = false;
/// String name of used CPU.
std::string CPUString;
@@ -77,6 +85,8 @@ public:
return &getInstrInfo()->getRegisterInfo();
}
const Triple &getTargetTriple() const { return TargetTriple; }
+ bool enableAtomicExpand() const override;
+ bool enableIndirectBrExpand() const override { return true; }
bool enableMachineScheduler() const override;
bool useAA() const override;
@@ -90,6 +100,10 @@ public:
bool hasNontrappingFPToInt() const { return HasNontrappingFPToInt; }
bool hasSignExt() const { return HasSignExt; }
bool hasExceptionHandling() const { return HasExceptionHandling; }
+ bool hasBulkMemory() const { return HasBulkMemory; }
+ bool hasMultivalue() const { return HasMultivalue; }
+ bool hasMutableGlobals() const { return HasMutableGlobals; }
+ bool hasTailCall() const { return HasTailCall; }
/// Parses features string setting specified subtarget options. Definition of
/// function is auto generated by tblgen.
diff --git a/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
index 3bf8dd40892c..7e65368e671a 100644
--- a/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
@@ -1,9 +1,8 @@
//===- WebAssemblyTargetMachine.cpp - Define TargetMachine for WebAssembly -==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -14,9 +13,12 @@
#include "WebAssemblyTargetMachine.h"
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "TargetInfo/WebAssemblyTargetInfo.h"
#include "WebAssembly.h"
+#include "WebAssemblyMachineFunctionInfo.h"
#include "WebAssemblyTargetObjectFile.h"
#include "WebAssemblyTargetTransformInfo.h"
+#include "llvm/CodeGen/MIRParser/MIParser.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegAllocRegistry.h"
@@ -25,6 +27,7 @@
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/LowerAtomic.h"
#include "llvm/Transforms/Utils.h"
using namespace llvm;
@@ -58,19 +61,18 @@ extern "C" void LLVMInitializeWebAssemblyTarget() {
initializeOptimizeReturnedPass(PR);
initializeWebAssemblyArgumentMovePass(PR);
initializeWebAssemblySetP2AlignOperandsPass(PR);
- initializeWebAssemblyEHRestoreStackPointerPass(PR);
initializeWebAssemblyReplacePhysRegsPass(PR);
initializeWebAssemblyPrepareForLiveIntervalsPass(PR);
initializeWebAssemblyOptimizeLiveIntervalsPass(PR);
initializeWebAssemblyMemIntrinsicResultsPass(PR);
initializeWebAssemblyRegStackifyPass(PR);
initializeWebAssemblyRegColoringPass(PR);
- initializeWebAssemblyExplicitLocalsPass(PR);
initializeWebAssemblyFixIrreducibleControlFlowPass(PR);
initializeWebAssemblyLateEHPreparePass(PR);
initializeWebAssemblyExceptionInfoPass(PR);
initializeWebAssemblyCFGSortPass(PR);
initializeWebAssemblyCFGStackifyPass(PR);
+ initializeWebAssemblyExplicitLocalsPass(PR);
initializeWebAssemblyLowerBrUnlessPass(PR);
initializeWebAssemblyRegNumberingPass(PR);
initializeWebAssemblyPeepholePass(PR);
@@ -81,13 +83,22 @@ extern "C" void LLVMInitializeWebAssemblyTarget() {
// WebAssembly Lowering public interface.
//===----------------------------------------------------------------------===//
-static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
+static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM,
+ const Triple &TT) {
if (!RM.hasValue()) {
// Default to static relocation model. This should always be more optimial
// than PIC since the static linker can determine all global addresses and
// assume direct function calls.
return Reloc::Static;
}
+
+ if (!TT.isOSEmscripten()) {
+ // Relocation modes other than static are currently implemented in a way
+ // that only works for Emscripten, so disable them if we aren't targeting
+ // Emscripten.
+ return Reloc::Static;
+ }
+
return *RM;
}
@@ -100,7 +111,7 @@ WebAssemblyTargetMachine::WebAssemblyTargetMachine(
: LLVMTargetMachine(T,
TT.isArch64Bit() ? "e-m:e-p:64:64-i64:64-n32:64-S128"
: "e-m:e-p:32:32-i64:64-n32:64-S128",
- TT, CPU, FS, Options, getEffectiveRelocModel(RM),
+ TT, CPU, FS, Options, getEffectiveRelocModel(RM, TT),
getEffectiveCodeModel(CM, CodeModel::Large), OL),
TLOF(new WebAssemblyTargetObjectFile()) {
// WebAssembly type-checks instructions, but a noreturn function with a return
@@ -122,7 +133,17 @@ WebAssemblyTargetMachine::WebAssemblyTargetMachine(
// splitting and tail merging.
}
-WebAssemblyTargetMachine::~WebAssemblyTargetMachine() {}
+WebAssemblyTargetMachine::~WebAssemblyTargetMachine() = default; // anchor.
+
+const WebAssemblySubtarget *
+WebAssemblyTargetMachine::getSubtargetImpl(std::string CPU,
+ std::string FS) const {
+ auto &I = SubtargetMap[CPU + FS];
+ if (!I) {
+ I = llvm::make_unique<WebAssemblySubtarget>(TargetTriple, CPU, FS, *this);
+ }
+ return I.get();
+}
const WebAssemblySubtarget *
WebAssemblyTargetMachine::getSubtargetImpl(const Function &F) const {
@@ -136,33 +157,141 @@ WebAssemblyTargetMachine::getSubtargetImpl(const Function &F) const {
? FSAttr.getValueAsString().str()
: TargetFS;
- auto &I = SubtargetMap[CPU + FS];
- if (!I) {
- // This needs to be done before we create a new subtarget since any
- // creation will depend on the TM and the code generation flags on the
- // function that reside in TargetOptions.
- resetTargetOptions(F);
- I = llvm::make_unique<WebAssemblySubtarget>(TargetTriple, CPU, FS, *this);
- }
- return I.get();
+ // This needs to be done before we create a new subtarget since any
+ // creation will depend on the TM and the code generation flags on the
+ // function that reside in TargetOptions.
+ resetTargetOptions(F);
+
+ return getSubtargetImpl(CPU, FS);
}
namespace {
-class StripThreadLocal final : public ModulePass {
- // The default thread model for wasm is single, where thread-local variables
- // are identical to regular globals and should be treated the same. So this
- // pass just converts all GlobalVariables to NotThreadLocal
+
+class CoalesceFeaturesAndStripAtomics final : public ModulePass {
+ // Take the union of all features used in the module and use it for each
+ // function individually, since having multiple feature sets in one module
+ // currently does not make sense for WebAssembly. If atomics are not enabled,
+ // also strip atomic operations and thread local storage.
static char ID;
+ WebAssemblyTargetMachine *WasmTM;
public:
- StripThreadLocal() : ModulePass(ID) {}
+ CoalesceFeaturesAndStripAtomics(WebAssemblyTargetMachine *WasmTM)
+ : ModulePass(ID), WasmTM(WasmTM) {}
+
bool runOnModule(Module &M) override {
- for (auto &GV : M.globals())
- GV.setThreadLocalMode(GlobalValue::ThreadLocalMode::NotThreadLocal);
+ FeatureBitset Features = coalesceFeatures(M);
+
+ std::string FeatureStr = getFeatureString(Features);
+ for (auto &F : M)
+ replaceFeatures(F, FeatureStr);
+
+ bool StrippedAtomics = false;
+ bool StrippedTLS = false;
+
+ if (!Features[WebAssembly::FeatureAtomics])
+ StrippedAtomics = stripAtomics(M);
+
+ if (!Features[WebAssembly::FeatureBulkMemory])
+ StrippedTLS = stripThreadLocals(M);
+
+ if (StrippedAtomics && !StrippedTLS)
+ stripThreadLocals(M);
+ else if (StrippedTLS && !StrippedAtomics)
+ stripAtomics(M);
+
+ recordFeatures(M, Features, StrippedAtomics || StrippedTLS);
+
+ // Conservatively assume we have made some change
+ return true;
+ }
+
+private:
+ FeatureBitset coalesceFeatures(const Module &M) {
+ FeatureBitset Features =
+ WasmTM
+ ->getSubtargetImpl(WasmTM->getTargetCPU(),
+ WasmTM->getTargetFeatureString())
+ ->getFeatureBits();
+ for (auto &F : M)
+ Features |= WasmTM->getSubtargetImpl(F)->getFeatureBits();
+ return Features;
+ }
+
+ std::string getFeatureString(const FeatureBitset &Features) {
+ std::string Ret;
+ for (const SubtargetFeatureKV &KV : WebAssemblyFeatureKV) {
+ if (Features[KV.Value])
+ Ret += (StringRef("+") + KV.Key + ",").str();
+ }
+ return Ret;
+ }
+
+ void replaceFeatures(Function &F, const std::string &Features) {
+ F.removeFnAttr("target-features");
+ F.removeFnAttr("target-cpu");
+ F.addFnAttr("target-features", Features);
+ }
+
+ bool stripAtomics(Module &M) {
+ // Detect whether any atomics will be lowered, since there is no way to tell
+ // whether the LowerAtomic pass lowers e.g. stores.
+ bool Stripped = false;
+ for (auto &F : M) {
+ for (auto &B : F) {
+ for (auto &I : B) {
+ if (I.isAtomic()) {
+ Stripped = true;
+ goto done;
+ }
+ }
+ }
+ }
+
+ done:
+ if (!Stripped)
+ return false;
+
+ LowerAtomicPass Lowerer;
+ FunctionAnalysisManager FAM;
+ for (auto &F : M)
+ Lowerer.run(F, FAM);
+
return true;
}
+
+ bool stripThreadLocals(Module &M) {
+ bool Stripped = false;
+ for (auto &GV : M.globals()) {
+ if (GV.getThreadLocalMode() !=
+ GlobalValue::ThreadLocalMode::NotThreadLocal) {
+ Stripped = true;
+ GV.setThreadLocalMode(GlobalValue::ThreadLocalMode::NotThreadLocal);
+ }
+ }
+ return Stripped;
+ }
+
+ void recordFeatures(Module &M, const FeatureBitset &Features, bool Stripped) {
+ for (const SubtargetFeatureKV &KV : WebAssemblyFeatureKV) {
+ std::string MDKey = (StringRef("wasm-feature-") + KV.Key).str();
+ if (KV.Value == WebAssembly::FeatureAtomics && Stripped) {
+ // "atomics" is special: code compiled without atomics may have had its
+ // atomics lowered to nonatomic operations. In that case, atomics is
+ // disallowed to prevent unsafe linking with atomics-enabled objects.
+ assert(!Features[WebAssembly::FeatureAtomics] ||
+ !Features[WebAssembly::FeatureBulkMemory]);
+ M.addModuleFlag(Module::ModFlagBehavior::Error, MDKey,
+ wasm::WASM_FEATURE_PREFIX_DISALLOWED);
+ } else if (Features[KV.Value]) {
+ // Otherwise features are marked Used or not mentioned
+ M.addModuleFlag(Module::ModFlagBehavior::Error, MDKey,
+ wasm::WASM_FEATURE_PREFIX_USED);
+ }
+ }
+ }
};
-char StripThreadLocal::ID = 0;
+char CoalesceFeaturesAndStripAtomics::ID = 0;
/// WebAssembly Code Generator Pass Configuration Options.
class WebAssemblyPassConfig final : public TargetPassConfig {
@@ -181,6 +310,12 @@ public:
void addPostRegAlloc() override;
bool addGCPasses() override { return false; }
void addPreEmitPass() override;
+
+ // No reg alloc
+ bool addRegAssignmentFast() override { return false; }
+
+ // No reg alloc
+ bool addRegAssignmentOptimized() override { return false; }
};
} // end anonymous namespace
@@ -204,15 +339,11 @@ FunctionPass *WebAssemblyPassConfig::createTargetRegisterAllocator(bool) {
//===----------------------------------------------------------------------===//
void WebAssemblyPassConfig::addIRPasses() {
- if (TM->Options.ThreadModel == ThreadModel::Single) {
- // In "single" mode, atomics get lowered to non-atomics.
- addPass(createLowerAtomicPass());
- addPass(new StripThreadLocal());
- } else {
- // Expand some atomic operations. WebAssemblyTargetLowering has hooks which
- // control specifically what gets lowered.
- addPass(createAtomicExpandPass());
- }
+ // Runs LowerAtomicPass if necessary
+ addPass(new CoalesceFeaturesAndStripAtomics(&getWebAssemblyTargetMachine()));
+
+ // This is a no-op if atomics are not used in the module
+ addPass(createAtomicExpandPass());
// Add signatures to prototype-less function declarations
addPass(createWebAssemblyAddMissingPrototypes());
@@ -246,6 +377,9 @@ void WebAssemblyPassConfig::addIRPasses() {
addPass(createWebAssemblyLowerEmscriptenEHSjLj(EnableEmException,
EnableEmSjLj));
+ // Expand indirectbr instructions to switches.
+ addPass(createIndirectBrExpandPass());
+
TargetPassConfig::addIRPasses();
}
@@ -279,20 +413,16 @@ void WebAssemblyPassConfig::addPostRegAlloc() {
disablePass(&PatchableFunctionID);
disablePass(&ShrinkWrapID);
+ // This pass hurts code size for wasm because it can generate irreducible
+ // control flow.
+ disablePass(&MachineBlockPlacementID);
+
TargetPassConfig::addPostRegAlloc();
}
void WebAssemblyPassConfig::addPreEmitPass() {
TargetPassConfig::addPreEmitPass();
- // Restore __stack_pointer global after an exception is thrown.
- addPass(createWebAssemblyEHRestoreStackPointer());
-
- // Now that we have a prologue and epilogue and all frame indices are
- // rewritten, eliminate SP and FP. This allows them to be stackified,
- // colored, and numbered with the rest of the registers.
- addPass(createWebAssemblyReplacePhysRegs());
-
// Rewrite pseudo call_indirect instructions as real instructions.
// This needs to run before register stackification, because we change the
// order of the arguments.
@@ -302,8 +432,15 @@ void WebAssemblyPassConfig::addPreEmitPass() {
addPass(createWebAssemblyFixIrreducibleControlFlow());
// Do various transformations for exception handling.
+ // Every CFG-changing optimizations should come before this.
addPass(createWebAssemblyLateEHPrepare());
+ // Now that we have a prologue and epilogue and all frame indices are
+ // rewritten, eliminate SP and FP. This allows them to be stackified,
+ // colored, and numbered with the rest of the registers.
+ addPass(createWebAssemblyReplacePhysRegs());
+
+ // Preparations and optimizations related to register stackification.
if (getOptLevel() != CodeGenOpt::None) {
// LiveIntervals isn't commonly run this late. Re-establish preconditions.
addPass(createWebAssemblyPrepareForLiveIntervals());
@@ -327,9 +464,6 @@ void WebAssemblyPassConfig::addPreEmitPass() {
addPass(createWebAssemblyRegColoring());
}
- // Insert explicit local.get and local.set operators.
- addPass(createWebAssemblyExplicitLocals());
-
// Sort the blocks of the CFG into topological order, a prerequisite for
// BLOCK and LOOP markers.
addPass(createWebAssemblyCFGSort());
@@ -337,6 +471,9 @@ void WebAssemblyPassConfig::addPreEmitPass() {
// Insert BLOCK and LOOP markers.
addPass(createWebAssemblyCFGStackify());
+ // Insert explicit local.get and local.set operators.
+ addPass(createWebAssemblyExplicitLocals());
+
// Lower br_unless into br_if.
addPass(createWebAssemblyLowerBrUnless());
@@ -347,3 +484,24 @@ void WebAssemblyPassConfig::addPreEmitPass() {
// Create a mapping from LLVM CodeGen virtual registers to wasm registers.
addPass(createWebAssemblyRegNumbering());
}
+
+yaml::MachineFunctionInfo *
+WebAssemblyTargetMachine::createDefaultFuncInfoYAML() const {
+ return new yaml::WebAssemblyFunctionInfo();
+}
+
+yaml::MachineFunctionInfo *WebAssemblyTargetMachine::convertFuncInfoToYAML(
+ const MachineFunction &MF) const {
+ const auto *MFI = MF.getInfo<WebAssemblyFunctionInfo>();
+ return new yaml::WebAssemblyFunctionInfo(*MFI);
+}
+
+bool WebAssemblyTargetMachine::parseMachineFunctionInfo(
+ const yaml::MachineFunctionInfo &MFI, PerFunctionMIParsingState &PFS,
+ SMDiagnostic &Error, SMRange &SourceRange) const {
+ const auto &YamlMFI =
+ reinterpret_cast<const yaml::WebAssemblyFunctionInfo &>(MFI);
+ MachineFunction &MF = PFS.MF;
+ MF.getInfo<WebAssemblyFunctionInfo>()->initializeBaseYamlFields(YamlMFI);
+ return false;
+}
diff --git a/lib/Target/WebAssembly/WebAssemblyTargetMachine.h b/lib/Target/WebAssembly/WebAssemblyTargetMachine.h
index 41001e7a0cc7..850e6b9a9e9e 100644
--- a/lib/Target/WebAssembly/WebAssemblyTargetMachine.h
+++ b/lib/Target/WebAssembly/WebAssemblyTargetMachine.h
@@ -1,9 +1,8 @@
// WebAssemblyTargetMachine.h - Define TargetMachine for WebAssembly -*- C++ -*-
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -33,6 +32,9 @@ public:
bool JIT);
~WebAssemblyTargetMachine() override;
+
+ const WebAssemblySubtarget *getSubtargetImpl(std::string CPU,
+ std::string FS) const;
const WebAssemblySubtarget *
getSubtargetImpl(const Function &F) const override;
@@ -46,6 +48,14 @@ public:
TargetTransformInfo getTargetTransformInfo(const Function &F) override;
bool usesPhysRegsForPEI() const override { return false; }
+
+ yaml::MachineFunctionInfo *createDefaultFuncInfoYAML() const override;
+ yaml::MachineFunctionInfo *
+ convertFuncInfoToYAML(const MachineFunction &MF) const override;
+ bool parseMachineFunctionInfo(const yaml::MachineFunctionInfo &,
+ PerFunctionMIParsingState &PFS,
+ SMDiagnostic &Error,
+ SMRange &SourceRange) const override;
};
} // end namespace llvm
diff --git a/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.cpp b/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.cpp
index 0459bfca418d..ad57c600db10 100644
--- a/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.cpp
@@ -1,9 +1,8 @@
//===-- WebAssemblyTargetObjectFile.cpp - WebAssembly Object Info ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.h b/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.h
index ce744ba8b8e8..f46bb2040a7d 100644
--- a/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.h
+++ b/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.h
@@ -1,9 +1,8 @@
//===-- WebAssemblyTargetObjectFile.h - WebAssembly Object Info -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp b/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
index 4a2777cc3a9f..46ef765ce0f4 100644
--- a/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
@@ -1,9 +1,8 @@
//===-- WebAssemblyTargetTransformInfo.cpp - WebAssembly-specific TTI -----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -51,7 +50,7 @@ unsigned WebAssemblyTTIImpl::getArithmeticInstrCost(
unsigned Cost = BasicTTIImplBase<WebAssemblyTTIImpl>::getArithmeticInstrCost(
Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo);
- if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
+ if (auto *VTy = dyn_cast<VectorType>(Ty)) {
switch (Opcode) {
case Instruction::LShr:
case Instruction::AShr:
diff --git a/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h b/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
index 4300ca3defbf..1b11b4b631eb 100644
--- a/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
+++ b/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
@@ -1,9 +1,8 @@
//==- WebAssemblyTargetTransformInfo.h - WebAssembly-specific TTI -*- C++ -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/lib/Target/WebAssembly/WebAssemblyUtilities.cpp b/lib/Target/WebAssembly/WebAssemblyUtilities.cpp
index ada6fb9a96d7..e9d88d4818a5 100644
--- a/lib/Target/WebAssembly/WebAssemblyUtilities.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyUtilities.cpp
@@ -1,9 +1,8 @@
//===-- WebAssemblyUtilities.cpp - WebAssembly Utility Functions ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -25,70 +24,6 @@ const char *const WebAssembly::StdTerminateFn = "_ZSt9terminatev";
const char *const WebAssembly::PersonalityWrapperFn =
"_Unwind_Wasm_CallPersonality";
-bool WebAssembly::isArgument(const MachineInstr &MI) {
- switch (MI.getOpcode()) {
- case WebAssembly::ARGUMENT_i32:
- case WebAssembly::ARGUMENT_i32_S:
- case WebAssembly::ARGUMENT_i64:
- case WebAssembly::ARGUMENT_i64_S:
- case WebAssembly::ARGUMENT_f32:
- case WebAssembly::ARGUMENT_f32_S:
- case WebAssembly::ARGUMENT_f64:
- case WebAssembly::ARGUMENT_f64_S:
- case WebAssembly::ARGUMENT_v16i8:
- case WebAssembly::ARGUMENT_v16i8_S:
- case WebAssembly::ARGUMENT_v8i16:
- case WebAssembly::ARGUMENT_v8i16_S:
- case WebAssembly::ARGUMENT_v4i32:
- case WebAssembly::ARGUMENT_v4i32_S:
- case WebAssembly::ARGUMENT_v2i64:
- case WebAssembly::ARGUMENT_v2i64_S:
- case WebAssembly::ARGUMENT_v4f32:
- case WebAssembly::ARGUMENT_v4f32_S:
- case WebAssembly::ARGUMENT_v2f64:
- case WebAssembly::ARGUMENT_v2f64_S:
- return true;
- default:
- return false;
- }
-}
-
-bool WebAssembly::isCopy(const MachineInstr &MI) {
- switch (MI.getOpcode()) {
- case WebAssembly::COPY_I32:
- case WebAssembly::COPY_I32_S:
- case WebAssembly::COPY_I64:
- case WebAssembly::COPY_I64_S:
- case WebAssembly::COPY_F32:
- case WebAssembly::COPY_F32_S:
- case WebAssembly::COPY_F64:
- case WebAssembly::COPY_F64_S:
- case WebAssembly::COPY_V128:
- case WebAssembly::COPY_V128_S:
- return true;
- default:
- return false;
- }
-}
-
-bool WebAssembly::isTee(const MachineInstr &MI) {
- switch (MI.getOpcode()) {
- case WebAssembly::TEE_I32:
- case WebAssembly::TEE_I32_S:
- case WebAssembly::TEE_I64:
- case WebAssembly::TEE_I64_S:
- case WebAssembly::TEE_F32:
- case WebAssembly::TEE_F32_S:
- case WebAssembly::TEE_F64:
- case WebAssembly::TEE_F64_S:
- case WebAssembly::TEE_V128:
- case WebAssembly::TEE_V128_S:
- return true;
- default:
- return false;
- }
-}
-
/// Test whether MI is a child of some other node in an expression tree.
bool WebAssembly::isChild(const MachineInstr &MI,
const WebAssemblyFunctionInfo &MFI) {
@@ -102,201 +37,20 @@ bool WebAssembly::isChild(const MachineInstr &MI,
MFI.isVRegStackified(Reg);
}
-bool WebAssembly::isCallDirect(const MachineInstr &MI) {
- switch (MI.getOpcode()) {
- case WebAssembly::CALL_VOID:
- case WebAssembly::CALL_VOID_S:
- case WebAssembly::CALL_I32:
- case WebAssembly::CALL_I32_S:
- case WebAssembly::CALL_I64:
- case WebAssembly::CALL_I64_S:
- case WebAssembly::CALL_F32:
- case WebAssembly::CALL_F32_S:
- case WebAssembly::CALL_F64:
- case WebAssembly::CALL_F64_S:
- case WebAssembly::CALL_v16i8:
- case WebAssembly::CALL_v16i8_S:
- case WebAssembly::CALL_v8i16:
- case WebAssembly::CALL_v8i16_S:
- case WebAssembly::CALL_v4i32:
- case WebAssembly::CALL_v4i32_S:
- case WebAssembly::CALL_v2i64:
- case WebAssembly::CALL_v2i64_S:
- case WebAssembly::CALL_v4f32:
- case WebAssembly::CALL_v4f32_S:
- case WebAssembly::CALL_v2f64:
- case WebAssembly::CALL_v2f64_S:
- case WebAssembly::CALL_EXCEPT_REF:
- case WebAssembly::CALL_EXCEPT_REF_S:
- return true;
- default:
- return false;
- }
-}
-
-bool WebAssembly::isCallIndirect(const MachineInstr &MI) {
- switch (MI.getOpcode()) {
- case WebAssembly::CALL_INDIRECT_VOID:
- case WebAssembly::CALL_INDIRECT_VOID_S:
- case WebAssembly::CALL_INDIRECT_I32:
- case WebAssembly::CALL_INDIRECT_I32_S:
- case WebAssembly::CALL_INDIRECT_I64:
- case WebAssembly::CALL_INDIRECT_I64_S:
- case WebAssembly::CALL_INDIRECT_F32:
- case WebAssembly::CALL_INDIRECT_F32_S:
- case WebAssembly::CALL_INDIRECT_F64:
- case WebAssembly::CALL_INDIRECT_F64_S:
- case WebAssembly::CALL_INDIRECT_v16i8:
- case WebAssembly::CALL_INDIRECT_v16i8_S:
- case WebAssembly::CALL_INDIRECT_v8i16:
- case WebAssembly::CALL_INDIRECT_v8i16_S:
- case WebAssembly::CALL_INDIRECT_v4i32:
- case WebAssembly::CALL_INDIRECT_v4i32_S:
- case WebAssembly::CALL_INDIRECT_v2i64:
- case WebAssembly::CALL_INDIRECT_v2i64_S:
- case WebAssembly::CALL_INDIRECT_v4f32:
- case WebAssembly::CALL_INDIRECT_v4f32_S:
- case WebAssembly::CALL_INDIRECT_v2f64:
- case WebAssembly::CALL_INDIRECT_v2f64_S:
- case WebAssembly::CALL_INDIRECT_EXCEPT_REF:
- case WebAssembly::CALL_INDIRECT_EXCEPT_REF_S:
- return true;
- default:
- return false;
- }
-}
-
-unsigned WebAssembly::getCalleeOpNo(const MachineInstr &MI) {
- switch (MI.getOpcode()) {
- case WebAssembly::CALL_VOID:
- case WebAssembly::CALL_VOID_S:
- case WebAssembly::CALL_INDIRECT_VOID:
- case WebAssembly::CALL_INDIRECT_VOID_S:
- return 0;
- case WebAssembly::CALL_I32:
- case WebAssembly::CALL_I32_S:
- case WebAssembly::CALL_I64:
- case WebAssembly::CALL_I64_S:
- case WebAssembly::CALL_F32:
- case WebAssembly::CALL_F32_S:
- case WebAssembly::CALL_F64:
- case WebAssembly::CALL_F64_S:
- case WebAssembly::CALL_v16i8:
- case WebAssembly::CALL_v16i8_S:
- case WebAssembly::CALL_v8i16:
- case WebAssembly::CALL_v8i16_S:
- case WebAssembly::CALL_v4i32:
- case WebAssembly::CALL_v4i32_S:
- case WebAssembly::CALL_v2i64:
- case WebAssembly::CALL_v2i64_S:
- case WebAssembly::CALL_v4f32:
- case WebAssembly::CALL_v4f32_S:
- case WebAssembly::CALL_v2f64:
- case WebAssembly::CALL_v2f64_S:
- case WebAssembly::CALL_EXCEPT_REF:
- case WebAssembly::CALL_EXCEPT_REF_S:
- case WebAssembly::CALL_INDIRECT_I32:
- case WebAssembly::CALL_INDIRECT_I32_S:
- case WebAssembly::CALL_INDIRECT_I64:
- case WebAssembly::CALL_INDIRECT_I64_S:
- case WebAssembly::CALL_INDIRECT_F32:
- case WebAssembly::CALL_INDIRECT_F32_S:
- case WebAssembly::CALL_INDIRECT_F64:
- case WebAssembly::CALL_INDIRECT_F64_S:
- case WebAssembly::CALL_INDIRECT_v16i8:
- case WebAssembly::CALL_INDIRECT_v16i8_S:
- case WebAssembly::CALL_INDIRECT_v8i16:
- case WebAssembly::CALL_INDIRECT_v8i16_S:
- case WebAssembly::CALL_INDIRECT_v4i32:
- case WebAssembly::CALL_INDIRECT_v4i32_S:
- case WebAssembly::CALL_INDIRECT_v2i64:
- case WebAssembly::CALL_INDIRECT_v2i64_S:
- case WebAssembly::CALL_INDIRECT_v4f32:
- case WebAssembly::CALL_INDIRECT_v4f32_S:
- case WebAssembly::CALL_INDIRECT_v2f64:
- case WebAssembly::CALL_INDIRECT_v2f64_S:
- case WebAssembly::CALL_INDIRECT_EXCEPT_REF:
- case WebAssembly::CALL_INDIRECT_EXCEPT_REF_S:
- return 1;
- default:
- llvm_unreachable("Not a call instruction");
- }
-}
-
-bool WebAssembly::isMarker(const MachineInstr &MI) {
- switch (MI.getOpcode()) {
- case WebAssembly::BLOCK:
- case WebAssembly::BLOCK_S:
- case WebAssembly::END_BLOCK:
- case WebAssembly::END_BLOCK_S:
- case WebAssembly::LOOP:
- case WebAssembly::LOOP_S:
- case WebAssembly::END_LOOP:
- case WebAssembly::END_LOOP_S:
- case WebAssembly::TRY:
- case WebAssembly::TRY_S:
- case WebAssembly::END_TRY:
- case WebAssembly::END_TRY_S:
- return true;
- default:
- return false;
- }
-}
-
-bool WebAssembly::isThrow(const MachineInstr &MI) {
- switch (MI.getOpcode()) {
- case WebAssembly::THROW_I32:
- case WebAssembly::THROW_I32_S:
- case WebAssembly::THROW_I64:
- case WebAssembly::THROW_I64_S:
- return true;
- default:
- return false;
- }
-}
-
-bool WebAssembly::isRethrow(const MachineInstr &MI) {
- switch (MI.getOpcode()) {
- case WebAssembly::RETHROW:
- case WebAssembly::RETHROW_S:
- case WebAssembly::RETHROW_TO_CALLER:
- case WebAssembly::RETHROW_TO_CALLER_S:
- return true;
- default:
- return false;
- }
-}
-
-bool WebAssembly::isCatch(const MachineInstr &MI) {
- switch (MI.getOpcode()) {
- case WebAssembly::CATCH_I32:
- case WebAssembly::CATCH_I32_S:
- case WebAssembly::CATCH_I64:
- case WebAssembly::CATCH_I64_S:
- case WebAssembly::CATCH_ALL:
- case WebAssembly::CATCH_ALL_S:
- return true;
- default:
- return false;
- }
-}
-
bool WebAssembly::mayThrow(const MachineInstr &MI) {
switch (MI.getOpcode()) {
- case WebAssembly::THROW_I32:
- case WebAssembly::THROW_I32_S:
- case WebAssembly::THROW_I64:
- case WebAssembly::THROW_I64_S:
+ case WebAssembly::THROW:
+ case WebAssembly::THROW_S:
case WebAssembly::RETHROW:
case WebAssembly::RETHROW_S:
return true;
}
- if (isCallIndirect(MI))
+ if (isCallIndirect(MI.getOpcode()))
return true;
if (!MI.isCall())
return false;
- const MachineOperand &MO = MI.getOperand(getCalleeOpNo(MI));
+ const MachineOperand &MO = MI.getOperand(getCalleeOpNo(MI.getOpcode()));
assert(MO.isGlobal());
const auto *F = dyn_cast<Function>(MO.getGlobal());
if (!F)
@@ -307,43 +61,8 @@ bool WebAssembly::mayThrow(const MachineInstr &MI) {
if (F->getName() == CxaBeginCatchFn || F->getName() == PersonalityWrapperFn ||
F->getName() == ClangCallTerminateFn || F->getName() == StdTerminateFn)
return false;
- return true;
-}
-
-bool WebAssembly::isCatchTerminatePad(const MachineBasicBlock &MBB) {
- if (!MBB.isEHPad())
- return false;
- bool SeenCatch = false;
- for (auto &MI : MBB) {
- if (MI.getOpcode() == WebAssembly::CATCH_I32 ||
- MI.getOpcode() == WebAssembly::CATCH_I64 ||
- MI.getOpcode() == WebAssembly::CATCH_I32_S ||
- MI.getOpcode() == WebAssembly::CATCH_I64_S)
- SeenCatch = true;
- if (SeenCatch && MI.isCall()) {
- const MachineOperand &CalleeOp = MI.getOperand(getCalleeOpNo(MI));
- if (CalleeOp.isGlobal() &&
- CalleeOp.getGlobal()->getName() == ClangCallTerminateFn)
- return true;
- }
- }
- return false;
-}
-bool WebAssembly::isCatchAllTerminatePad(const MachineBasicBlock &MBB) {
- if (!MBB.isEHPad())
- return false;
- bool SeenCatchAll = false;
- for (auto &MI : MBB) {
- if (MI.getOpcode() == WebAssembly::CATCH_ALL ||
- MI.getOpcode() == WebAssembly::CATCH_ALL_S)
- SeenCatchAll = true;
- if (SeenCatchAll && MI.isCall()) {
- const MachineOperand &CalleeOp = MI.getOperand(getCalleeOpNo(MI));
- if (CalleeOp.isGlobal() &&
- CalleeOp.getGlobal()->getName() == StdTerminateFn)
- return true;
- }
- }
- return false;
+ // TODO Can we exclude call instructions that are marked as 'nounwind' in the
+ // original LLVm IR? (Even when the callee may throw)
+ return true;
}
diff --git a/lib/Target/WebAssembly/WebAssemblyUtilities.h b/lib/Target/WebAssembly/WebAssemblyUtilities.h
index cdb7873e9013..26cf84de89b9 100644
--- a/lib/Target/WebAssembly/WebAssemblyUtilities.h
+++ b/lib/Target/WebAssembly/WebAssemblyUtilities.h
@@ -1,9 +1,8 @@
//===-- WebAssemblyUtilities - WebAssembly Utility Functions ---*- C++ -*-====//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -24,29 +23,9 @@ class WebAssemblyFunctionInfo;
namespace WebAssembly {
-bool isArgument(const MachineInstr &MI);
-bool isCopy(const MachineInstr &MI);
-bool isTee(const MachineInstr &MI);
bool isChild(const MachineInstr &MI, const WebAssemblyFunctionInfo &MFI);
-bool isCallDirect(const MachineInstr &MI);
-bool isCallIndirect(const MachineInstr &MI);
-bool isMarker(const MachineInstr &MI);
-bool isThrow(const MachineInstr &MI);
-bool isRethrow(const MachineInstr &MI);
-bool isCatch(const MachineInstr &MI);
bool mayThrow(const MachineInstr &MI);
-/// Returns the operand number of a callee, assuming the argument is a call
-/// instruction.
-unsigned getCalleeOpNo(const MachineInstr &MI);
-
-/// Returns if the given BB is a single BB terminate pad which starts with a
-/// 'catch' instruction.
-bool isCatchTerminatePad(const MachineBasicBlock &MBB);
-/// Returns if the given BB is a single BB terminate pad which starts with a
-/// 'catch_all' insrtruction.
-bool isCatchAllTerminatePad(const MachineBasicBlock &MBB);
-
// Exception-related function names
extern const char *const ClangCallTerminateFn;
extern const char *const CxaBeginCatchFn;
diff --git a/lib/Target/WebAssembly/known_gcc_test_failures.txt b/lib/Target/WebAssembly/known_gcc_test_failures.txt
index 364c871f61b0..701b347bcbd7 100644
--- a/lib/Target/WebAssembly/known_gcc_test_failures.txt
+++ b/lib/Target/WebAssembly/known_gcc_test_failures.txt
@@ -6,21 +6,13 @@
# error). The format is
# <name> <attributes> # comment
-# Computed gotos are not supported (Cannot select BlockAddress/BRIND)
-20071220-1.c
+# blockaddress without an indirectbr still can't be supported
+20071220-1.c O2 # Relocation against a BB address
20071220-2.c
-20040302-1.c
-20041214-1.c O0
-20071210-1.c
-920501-4.c
-920501-5.c
-comp-goto-1.c
-980526-1.c
990208-1.c
label13.C O0
label13a.C O0
label3.C
-pr42462.C O0
# WebAssembly hasn't implemented (will never?) __builtin_return_address
20010122-1.c
@@ -75,7 +67,6 @@ pr41935.c
920501-3.c
920728-1.c
pr28865.c
-widechar-2.c
attr-alias-1.C
attr-alias-2.C
attr-ifunc-1.C
@@ -86,7 +77,6 @@ complit12.C
va-arg-pack-1.C
va-arg-pack-len-1.C
builtin-line1.C
-builtin-location.C
devirt-6.C # bad main signature
devirt-13.C # bad main signature
devirt-14.C # bad main signature
@@ -94,11 +84,22 @@ devirt-21.C # bad main signature
devirt-23.C # bad main signature
lifetime2.C # violates C++ DR1696
+# WASI doesn't have stdjmp.h yet
+pr56982.c
+simd-2.C
+
+# WASI doesn't have pthread.h yet
+thread_local3.C
+thread_local3g.C
+thread_local4.C
+thread_local4g.C
+thread_local5.C
+thread_local5g.C
+
# Untriaged C++ failures
spec5.C
addr1.C
ef_test.C
-friend18.C
member2.C
new39.C
new40.C
diff --git a/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp b/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp
deleted file mode 100644
index 2c376fd062ca..000000000000
--- a/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp
+++ /dev/null
@@ -1,1089 +0,0 @@
-//===-- X86AsmInstrumentation.cpp - Instrument X86 inline assembly --------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "X86AsmInstrumentation.h"
-#include "MCTargetDesc/X86MCTargetDesc.h"
-#include "X86Operand.h"
-#include "llvm/ADT/Triple.h"
-#include "llvm/ADT/Twine.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCDwarf.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCInstBuilder.h"
-#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
-#include "llvm/MC/MCParser/MCTargetAsmParser.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/MC/MCTargetOptions.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/SMLoc.h"
-#include <algorithm>
-#include <cassert>
-#include <cstdint>
-#include <limits>
-#include <memory>
-#include <vector>
-
-// Following comment describes how assembly instrumentation works.
-// Currently we have only AddressSanitizer instrumentation, but we're
-// planning to implement MemorySanitizer for inline assembly too. If
-// you're not familiar with AddressSanitizer algorithm, please, read
-// https://github.com/google/sanitizers/wiki/AddressSanitizerAlgorithm
-//
-// When inline assembly is parsed by an instance of X86AsmParser, all
-// instructions are emitted via EmitInstruction method. That's the
-// place where X86AsmInstrumentation analyzes an instruction and
-// decides, whether the instruction should be emitted as is or
-// instrumentation is required. The latter case happens when an
-// instruction reads from or writes to memory. Now instruction opcode
-// is explicitly checked, and if an instruction has a memory operand
-// (for instance, movq (%rsi, %rcx, 8), %rax) - it should be
-// instrumented. There're also exist instructions that modify
-// memory but don't have an explicit memory operands, for instance,
-// movs.
-//
-// Let's consider at first 8-byte memory accesses when an instruction
-// has an explicit memory operand. In this case we need two registers -
-// AddressReg to compute address of a memory cells which are accessed
-// and ShadowReg to compute corresponding shadow address. So, we need
-// to spill both registers before instrumentation code and restore them
-// after instrumentation. Thus, in general, instrumentation code will
-// look like this:
-// PUSHF # Store flags, otherwise they will be overwritten
-// PUSH AddressReg # spill AddressReg
-// PUSH ShadowReg # spill ShadowReg
-// LEA MemOp, AddressReg # compute address of the memory operand
-// MOV AddressReg, ShadowReg
-// SHR ShadowReg, 3
-// # ShadowOffset(AddressReg >> 3) contains address of a shadow
-// # corresponding to MemOp.
-// CMP ShadowOffset(ShadowReg), 0 # test shadow value
-// JZ .Done # when shadow equals to zero, everything is fine
-// MOV AddressReg, RDI
-// # Call __asan_report function with AddressReg as an argument
-// CALL __asan_report
-// .Done:
-// POP ShadowReg # Restore ShadowReg
-// POP AddressReg # Restore AddressReg
-// POPF # Restore flags
-//
-// Memory accesses with different size (1-, 2-, 4- and 16-byte) are
-// handled in a similar manner, but small memory accesses (less than 8
-// byte) require an additional ScratchReg, which is used for shadow value.
-//
-// If, suppose, we're instrumenting an instruction like movs, only
-// contents of RDI, RDI + AccessSize * RCX, RSI, RSI + AccessSize *
-// RCX are checked. In this case there're no need to spill and restore
-// AddressReg , ShadowReg or flags four times, they're saved on stack
-// just once, before instrumentation of these four addresses, and restored
-// at the end of the instrumentation.
-//
-// There exist several things which complicate this simple algorithm.
-// * Instrumented memory operand can have RSP as a base or an index
-// register. So we need to add a constant offset before computation
-// of memory address, since flags, AddressReg, ShadowReg, etc. were
-// already stored on stack and RSP was modified.
-// * Debug info (usually, DWARF) should be adjusted, because sometimes
-// RSP is used as a frame register. So, we need to select some
-// register as a frame register and temprorary override current CFA
-// register.
-
-using namespace llvm;
-
-static cl::opt<bool> ClAsanInstrumentAssembly(
- "asan-instrument-assembly",
- cl::desc("instrument assembly with AddressSanitizer checks"), cl::Hidden,
- cl::init(false));
-
-static const int64_t MinAllowedDisplacement =
- std::numeric_limits<int32_t>::min();
-static const int64_t MaxAllowedDisplacement =
- std::numeric_limits<int32_t>::max();
-
-static int64_t ApplyDisplacementBounds(int64_t Displacement) {
- return std::max(std::min(MaxAllowedDisplacement, Displacement),
- MinAllowedDisplacement);
-}
-
-static void CheckDisplacementBounds(int64_t Displacement) {
- assert(Displacement >= MinAllowedDisplacement &&
- Displacement <= MaxAllowedDisplacement);
-}
-
-static bool IsStackReg(unsigned Reg) {
- return Reg == X86::RSP || Reg == X86::ESP;
-}
-
-static bool IsSmallMemAccess(unsigned AccessSize) { return AccessSize < 8; }
-
-namespace {
-
-class X86AddressSanitizer : public X86AsmInstrumentation {
-public:
- struct RegisterContext {
- private:
- enum RegOffset {
- REG_OFFSET_ADDRESS = 0,
- REG_OFFSET_SHADOW,
- REG_OFFSET_SCRATCH
- };
-
- public:
- RegisterContext(unsigned AddressReg, unsigned ShadowReg,
- unsigned ScratchReg) {
- BusyRegs.push_back(convReg(AddressReg, 64));
- BusyRegs.push_back(convReg(ShadowReg, 64));
- BusyRegs.push_back(convReg(ScratchReg, 64));
- }
-
- unsigned AddressReg(unsigned Size) const {
- return convReg(BusyRegs[REG_OFFSET_ADDRESS], Size);
- }
-
- unsigned ShadowReg(unsigned Size) const {
- return convReg(BusyRegs[REG_OFFSET_SHADOW], Size);
- }
-
- unsigned ScratchReg(unsigned Size) const {
- return convReg(BusyRegs[REG_OFFSET_SCRATCH], Size);
- }
-
- void AddBusyReg(unsigned Reg) {
- if (Reg != X86::NoRegister)
- BusyRegs.push_back(convReg(Reg, 64));
- }
-
- void AddBusyRegs(const X86Operand &Op) {
- AddBusyReg(Op.getMemBaseReg());
- AddBusyReg(Op.getMemIndexReg());
- }
-
- unsigned ChooseFrameReg(unsigned Size) const {
- static const MCPhysReg Candidates[] = { X86::RBP, X86::RAX, X86::RBX,
- X86::RCX, X86::RDX, X86::RDI,
- X86::RSI };
- for (unsigned Reg : Candidates) {
- if (!std::count(BusyRegs.begin(), BusyRegs.end(), Reg))
- return convReg(Reg, Size);
- }
- return X86::NoRegister;
- }
-
- private:
- unsigned convReg(unsigned Reg, unsigned Size) const {
- return Reg == X86::NoRegister ? Reg : getX86SubSuperRegister(Reg, Size);
- }
-
- std::vector<unsigned> BusyRegs;
- };
-
- X86AddressSanitizer(const MCSubtargetInfo *&STI)
- : X86AsmInstrumentation(STI), RepPrefix(false), OrigSPOffset(0) {}
-
- ~X86AddressSanitizer() override = default;
-
- // X86AsmInstrumentation implementation:
- void InstrumentAndEmitInstruction(const MCInst &Inst, OperandVector &Operands,
- MCContext &Ctx, const MCInstrInfo &MII,
- MCStreamer &Out,
- /* unused */ bool) override {
- InstrumentMOVS(Inst, Operands, Ctx, MII, Out);
- if (RepPrefix)
- EmitInstruction(Out, MCInstBuilder(X86::REP_PREFIX));
-
- InstrumentMOV(Inst, Operands, Ctx, MII, Out);
-
- RepPrefix = (Inst.getOpcode() == X86::REP_PREFIX);
- if (!RepPrefix)
- EmitInstruction(Out, Inst);
- }
-
- // Adjusts up stack and saves all registers used in instrumentation.
- virtual void InstrumentMemOperandPrologue(const RegisterContext &RegCtx,
- MCContext &Ctx,
- MCStreamer &Out) = 0;
-
- // Restores all registers used in instrumentation and adjusts stack.
- virtual void InstrumentMemOperandEpilogue(const RegisterContext &RegCtx,
- MCContext &Ctx,
- MCStreamer &Out) = 0;
-
- virtual void InstrumentMemOperandSmall(X86Operand &Op, unsigned AccessSize,
- bool IsWrite,
- const RegisterContext &RegCtx,
- MCContext &Ctx, MCStreamer &Out) = 0;
- virtual void InstrumentMemOperandLarge(X86Operand &Op, unsigned AccessSize,
- bool IsWrite,
- const RegisterContext &RegCtx,
- MCContext &Ctx, MCStreamer &Out) = 0;
-
- virtual void InstrumentMOVSImpl(unsigned AccessSize, MCContext &Ctx,
- MCStreamer &Out) = 0;
-
- void InstrumentMemOperand(X86Operand &Op, unsigned AccessSize, bool IsWrite,
- const RegisterContext &RegCtx, MCContext &Ctx,
- MCStreamer &Out);
- void InstrumentMOVSBase(unsigned DstReg, unsigned SrcReg, unsigned CntReg,
- unsigned AccessSize, MCContext &Ctx, MCStreamer &Out);
-
- void InstrumentMOVS(const MCInst &Inst, OperandVector &Operands,
- MCContext &Ctx, const MCInstrInfo &MII, MCStreamer &Out);
- void InstrumentMOV(const MCInst &Inst, OperandVector &Operands,
- MCContext &Ctx, const MCInstrInfo &MII, MCStreamer &Out);
-
-protected:
- void EmitLabel(MCStreamer &Out, MCSymbol *Label) { Out.EmitLabel(Label); }
-
- void EmitLEA(X86Operand &Op, unsigned Size, unsigned Reg, MCStreamer &Out) {
- assert(Size == 32 || Size == 64);
- MCInst Inst;
- Inst.setOpcode(Size == 32 ? X86::LEA32r : X86::LEA64r);
- Inst.addOperand(MCOperand::createReg(getX86SubSuperRegister(Reg, Size)));
- Op.addMemOperands(Inst, 5);
- EmitInstruction(Out, Inst);
- }
-
- void ComputeMemOperandAddress(X86Operand &Op, unsigned Size,
- unsigned Reg, MCContext &Ctx, MCStreamer &Out);
-
- // Creates new memory operand with Displacement added to an original
- // displacement. Residue will contain a residue which could happen when the
- // total displacement exceeds 32-bit limitation.
- std::unique_ptr<X86Operand> AddDisplacement(X86Operand &Op,
- int64_t Displacement,
- MCContext &Ctx, int64_t *Residue);
-
- bool is64BitMode() const {
- return STI->getFeatureBits()[X86::Mode64Bit];
- }
-
- bool is32BitMode() const {
- return STI->getFeatureBits()[X86::Mode32Bit];
- }
-
- bool is16BitMode() const {
- return STI->getFeatureBits()[X86::Mode16Bit];
- }
-
- unsigned getPointerWidth() {
- if (is16BitMode()) return 16;
- if (is32BitMode()) return 32;
- if (is64BitMode()) return 64;
- llvm_unreachable("invalid mode");
- }
-
- // True when previous instruction was actually REP prefix.
- bool RepPrefix;
-
- // Offset from the original SP register.
- int64_t OrigSPOffset;
-};
-
-void X86AddressSanitizer::InstrumentMemOperand(
- X86Operand &Op, unsigned AccessSize, bool IsWrite,
- const RegisterContext &RegCtx, MCContext &Ctx, MCStreamer &Out) {
- assert(Op.isMem() && "Op should be a memory operand.");
- assert((AccessSize & (AccessSize - 1)) == 0 && AccessSize <= 16 &&
- "AccessSize should be a power of two, less or equal than 16.");
- // FIXME: take into account load/store alignment.
- if (IsSmallMemAccess(AccessSize))
- InstrumentMemOperandSmall(Op, AccessSize, IsWrite, RegCtx, Ctx, Out);
- else
- InstrumentMemOperandLarge(Op, AccessSize, IsWrite, RegCtx, Ctx, Out);
-}
-
-void X86AddressSanitizer::InstrumentMOVSBase(unsigned DstReg, unsigned SrcReg,
- unsigned CntReg,
- unsigned AccessSize,
- MCContext &Ctx, MCStreamer &Out) {
- // FIXME: check whole ranges [DstReg .. DstReg + AccessSize * (CntReg - 1)]
- // and [SrcReg .. SrcReg + AccessSize * (CntReg - 1)].
- RegisterContext RegCtx(X86::RDX /* AddressReg */, X86::RAX /* ShadowReg */,
- IsSmallMemAccess(AccessSize)
- ? X86::RBX
- : X86::NoRegister /* ScratchReg */);
- RegCtx.AddBusyReg(DstReg);
- RegCtx.AddBusyReg(SrcReg);
- RegCtx.AddBusyReg(CntReg);
-
- InstrumentMemOperandPrologue(RegCtx, Ctx, Out);
-
- // Test (%SrcReg)
- {
- const MCExpr *Disp = MCConstantExpr::create(0, Ctx);
- std::unique_ptr<X86Operand> Op(X86Operand::CreateMem(
- getPointerWidth(), 0, Disp, SrcReg, 0, AccessSize, SMLoc(), SMLoc()));
- InstrumentMemOperand(*Op, AccessSize, false /* IsWrite */, RegCtx, Ctx,
- Out);
- }
-
- // Test -1(%SrcReg, %CntReg, AccessSize)
- {
- const MCExpr *Disp = MCConstantExpr::create(-1, Ctx);
- std::unique_ptr<X86Operand> Op(X86Operand::CreateMem(
- getPointerWidth(), 0, Disp, SrcReg, CntReg, AccessSize, SMLoc(),
- SMLoc()));
- InstrumentMemOperand(*Op, AccessSize, false /* IsWrite */, RegCtx, Ctx,
- Out);
- }
-
- // Test (%DstReg)
- {
- const MCExpr *Disp = MCConstantExpr::create(0, Ctx);
- std::unique_ptr<X86Operand> Op(X86Operand::CreateMem(
- getPointerWidth(), 0, Disp, DstReg, 0, AccessSize, SMLoc(), SMLoc()));
- InstrumentMemOperand(*Op, AccessSize, true /* IsWrite */, RegCtx, Ctx, Out);
- }
-
- // Test -1(%DstReg, %CntReg, AccessSize)
- {
- const MCExpr *Disp = MCConstantExpr::create(-1, Ctx);
- std::unique_ptr<X86Operand> Op(X86Operand::CreateMem(
- getPointerWidth(), 0, Disp, DstReg, CntReg, AccessSize, SMLoc(),
- SMLoc()));
- InstrumentMemOperand(*Op, AccessSize, true /* IsWrite */, RegCtx, Ctx, Out);
- }
-
- InstrumentMemOperandEpilogue(RegCtx, Ctx, Out);
-}
-
-void X86AddressSanitizer::InstrumentMOVS(const MCInst &Inst,
- OperandVector &Operands,
- MCContext &Ctx, const MCInstrInfo &MII,
- MCStreamer &Out) {
- // Access size in bytes.
- unsigned AccessSize = 0;
-
- switch (Inst.getOpcode()) {
- case X86::MOVSB:
- AccessSize = 1;
- break;
- case X86::MOVSW:
- AccessSize = 2;
- break;
- case X86::MOVSL:
- AccessSize = 4;
- break;
- case X86::MOVSQ:
- AccessSize = 8;
- break;
- default:
- return;
- }
-
- InstrumentMOVSImpl(AccessSize, Ctx, Out);
-}
-
-void X86AddressSanitizer::InstrumentMOV(const MCInst &Inst,
- OperandVector &Operands, MCContext &Ctx,
- const MCInstrInfo &MII,
- MCStreamer &Out) {
- // Access size in bytes.
- unsigned AccessSize = 0;
-
- switch (Inst.getOpcode()) {
- case X86::MOV8mi:
- case X86::MOV8mr:
- case X86::MOV8rm:
- AccessSize = 1;
- break;
- case X86::MOV16mi:
- case X86::MOV16mr:
- case X86::MOV16rm:
- AccessSize = 2;
- break;
- case X86::MOV32mi:
- case X86::MOV32mr:
- case X86::MOV32rm:
- AccessSize = 4;
- break;
- case X86::MOV64mi32:
- case X86::MOV64mr:
- case X86::MOV64rm:
- AccessSize = 8;
- break;
- case X86::MOVAPDmr:
- case X86::MOVAPSmr:
- case X86::MOVAPDrm:
- case X86::MOVAPSrm:
- AccessSize = 16;
- break;
- default:
- return;
- }
-
- const bool IsWrite = MII.get(Inst.getOpcode()).mayStore();
-
- for (unsigned Ix = 0; Ix < Operands.size(); ++Ix) {
- assert(Operands[Ix]);
- MCParsedAsmOperand &Op = *Operands[Ix];
- if (Op.isMem()) {
- X86Operand &MemOp = static_cast<X86Operand &>(Op);
- RegisterContext RegCtx(
- X86::RDI /* AddressReg */, X86::RAX /* ShadowReg */,
- IsSmallMemAccess(AccessSize) ? X86::RCX
- : X86::NoRegister /* ScratchReg */);
- RegCtx.AddBusyRegs(MemOp);
- InstrumentMemOperandPrologue(RegCtx, Ctx, Out);
- InstrumentMemOperand(MemOp, AccessSize, IsWrite, RegCtx, Ctx, Out);
- InstrumentMemOperandEpilogue(RegCtx, Ctx, Out);
- }
- }
-}
-
-void X86AddressSanitizer::ComputeMemOperandAddress(X86Operand &Op,
- unsigned Size,
- unsigned Reg, MCContext &Ctx,
- MCStreamer &Out) {
- int64_t Displacement = 0;
- if (IsStackReg(Op.getMemBaseReg()))
- Displacement -= OrigSPOffset;
- if (IsStackReg(Op.getMemIndexReg()))
- Displacement -= OrigSPOffset * Op.getMemScale();
-
- assert(Displacement >= 0);
-
- // Emit Op as is.
- if (Displacement == 0) {
- EmitLEA(Op, Size, Reg, Out);
- return;
- }
-
- int64_t Residue;
- std::unique_ptr<X86Operand> NewOp =
- AddDisplacement(Op, Displacement, Ctx, &Residue);
- EmitLEA(*NewOp, Size, Reg, Out);
-
- while (Residue != 0) {
- const MCConstantExpr *Disp =
- MCConstantExpr::create(ApplyDisplacementBounds(Residue), Ctx);
- std::unique_ptr<X86Operand> DispOp =
- X86Operand::CreateMem(getPointerWidth(), 0, Disp, Reg, 0, 1, SMLoc(),
- SMLoc());
- EmitLEA(*DispOp, Size, Reg, Out);
- Residue -= Disp->getValue();
- }
-}
-
-std::unique_ptr<X86Operand>
-X86AddressSanitizer::AddDisplacement(X86Operand &Op, int64_t Displacement,
- MCContext &Ctx, int64_t *Residue) {
- assert(Displacement >= 0);
-
- if (Displacement == 0 ||
- (Op.getMemDisp() && Op.getMemDisp()->getKind() != MCExpr::Constant)) {
- *Residue = Displacement;
- return X86Operand::CreateMem(Op.getMemModeSize(), Op.getMemSegReg(),
- Op.getMemDisp(), Op.getMemBaseReg(),
- Op.getMemIndexReg(), Op.getMemScale(),
- SMLoc(), SMLoc());
- }
-
- int64_t OrigDisplacement =
- static_cast<const MCConstantExpr *>(Op.getMemDisp())->getValue();
- CheckDisplacementBounds(OrigDisplacement);
- Displacement += OrigDisplacement;
-
- int64_t NewDisplacement = ApplyDisplacementBounds(Displacement);
- CheckDisplacementBounds(NewDisplacement);
-
- *Residue = Displacement - NewDisplacement;
- const MCExpr *Disp = MCConstantExpr::create(NewDisplacement, Ctx);
- return X86Operand::CreateMem(Op.getMemModeSize(), Op.getMemSegReg(), Disp,
- Op.getMemBaseReg(), Op.getMemIndexReg(),
- Op.getMemScale(), SMLoc(), SMLoc());
-}
-
-class X86AddressSanitizer32 : public X86AddressSanitizer {
-public:
- static const long kShadowOffset = 0x20000000;
-
- X86AddressSanitizer32(const MCSubtargetInfo *&STI)
- : X86AddressSanitizer(STI) {}
-
- ~X86AddressSanitizer32() override = default;
-
- unsigned GetFrameReg(const MCContext &Ctx, MCStreamer &Out) {
- unsigned FrameReg = GetFrameRegGeneric(Ctx, Out);
- if (FrameReg == X86::NoRegister)
- return FrameReg;
- return getX86SubSuperRegister(FrameReg, 32);
- }
-
- void SpillReg(MCStreamer &Out, unsigned Reg) {
- EmitInstruction(Out, MCInstBuilder(X86::PUSH32r).addReg(Reg));
- OrigSPOffset -= 4;
- }
-
- void RestoreReg(MCStreamer &Out, unsigned Reg) {
- EmitInstruction(Out, MCInstBuilder(X86::POP32r).addReg(Reg));
- OrigSPOffset += 4;
- }
-
- void StoreFlags(MCStreamer &Out) {
- EmitInstruction(Out, MCInstBuilder(X86::PUSHF32));
- OrigSPOffset -= 4;
- }
-
- void RestoreFlags(MCStreamer &Out) {
- EmitInstruction(Out, MCInstBuilder(X86::POPF32));
- OrigSPOffset += 4;
- }
-
- void InstrumentMemOperandPrologue(const RegisterContext &RegCtx,
- MCContext &Ctx,
- MCStreamer &Out) override {
- unsigned LocalFrameReg = RegCtx.ChooseFrameReg(32);
- assert(LocalFrameReg != X86::NoRegister);
-
- const MCRegisterInfo *MRI = Ctx.getRegisterInfo();
- unsigned FrameReg = GetFrameReg(Ctx, Out);
- if (MRI && FrameReg != X86::NoRegister) {
- SpillReg(Out, LocalFrameReg);
- if (FrameReg == X86::ESP) {
- Out.EmitCFIAdjustCfaOffset(4 /* byte size of the LocalFrameReg */);
- Out.EmitCFIRelOffset(
- MRI->getDwarfRegNum(LocalFrameReg, true /* IsEH */), 0);
- }
- EmitInstruction(
- Out,
- MCInstBuilder(X86::MOV32rr).addReg(LocalFrameReg).addReg(FrameReg));
- Out.EmitCFIRememberState();
- Out.EmitCFIDefCfaRegister(
- MRI->getDwarfRegNum(LocalFrameReg, true /* IsEH */));
- }
-
- SpillReg(Out, RegCtx.AddressReg(32));
- SpillReg(Out, RegCtx.ShadowReg(32));
- if (RegCtx.ScratchReg(32) != X86::NoRegister)
- SpillReg(Out, RegCtx.ScratchReg(32));
- StoreFlags(Out);
- }
-
- void InstrumentMemOperandEpilogue(const RegisterContext &RegCtx,
- MCContext &Ctx,
- MCStreamer &Out) override {
- unsigned LocalFrameReg = RegCtx.ChooseFrameReg(32);
- assert(LocalFrameReg != X86::NoRegister);
-
- RestoreFlags(Out);
- if (RegCtx.ScratchReg(32) != X86::NoRegister)
- RestoreReg(Out, RegCtx.ScratchReg(32));
- RestoreReg(Out, RegCtx.ShadowReg(32));
- RestoreReg(Out, RegCtx.AddressReg(32));
-
- unsigned FrameReg = GetFrameReg(Ctx, Out);
- if (Ctx.getRegisterInfo() && FrameReg != X86::NoRegister) {
- RestoreReg(Out, LocalFrameReg);
- Out.EmitCFIRestoreState();
- if (FrameReg == X86::ESP)
- Out.EmitCFIAdjustCfaOffset(-4 /* byte size of the LocalFrameReg */);
- }
- }
-
- void InstrumentMemOperandSmall(X86Operand &Op, unsigned AccessSize,
- bool IsWrite,
- const RegisterContext &RegCtx,
- MCContext &Ctx,
- MCStreamer &Out) override;
- void InstrumentMemOperandLarge(X86Operand &Op, unsigned AccessSize,
- bool IsWrite,
- const RegisterContext &RegCtx,
- MCContext &Ctx,
- MCStreamer &Out) override;
- void InstrumentMOVSImpl(unsigned AccessSize, MCContext &Ctx,
- MCStreamer &Out) override;
-
-private:
- void EmitCallAsanReport(unsigned AccessSize, bool IsWrite, MCContext &Ctx,
- MCStreamer &Out, const RegisterContext &RegCtx) {
- EmitInstruction(Out, MCInstBuilder(X86::CLD));
- EmitInstruction(Out, MCInstBuilder(X86::MMX_EMMS));
-
- EmitInstruction(Out, MCInstBuilder(X86::AND32ri8)
- .addReg(X86::ESP)
- .addReg(X86::ESP)
- .addImm(-16));
- EmitInstruction(
- Out, MCInstBuilder(X86::PUSH32r).addReg(RegCtx.AddressReg(32)));
-
- MCSymbol *FnSym = Ctx.getOrCreateSymbol(Twine("__asan_report_") +
- (IsWrite ? "store" : "load") +
- Twine(AccessSize));
- const MCSymbolRefExpr *FnExpr =
- MCSymbolRefExpr::create(FnSym, MCSymbolRefExpr::VK_PLT, Ctx);
- EmitInstruction(Out, MCInstBuilder(X86::CALLpcrel32).addExpr(FnExpr));
- }
-};
-
-void X86AddressSanitizer32::InstrumentMemOperandSmall(
- X86Operand &Op, unsigned AccessSize, bool IsWrite,
- const RegisterContext &RegCtx, MCContext &Ctx, MCStreamer &Out) {
- unsigned AddressRegI32 = RegCtx.AddressReg(32);
- unsigned ShadowRegI32 = RegCtx.ShadowReg(32);
- unsigned ShadowRegI8 = RegCtx.ShadowReg(8);
-
- assert(RegCtx.ScratchReg(32) != X86::NoRegister);
- unsigned ScratchRegI32 = RegCtx.ScratchReg(32);
-
- ComputeMemOperandAddress(Op, 32, AddressRegI32, Ctx, Out);
-
- EmitInstruction(Out, MCInstBuilder(X86::MOV32rr).addReg(ShadowRegI32).addReg(
- AddressRegI32));
- EmitInstruction(Out, MCInstBuilder(X86::SHR32ri)
- .addReg(ShadowRegI32)
- .addReg(ShadowRegI32)
- .addImm(3));
-
- {
- MCInst Inst;
- Inst.setOpcode(X86::MOV8rm);
- Inst.addOperand(MCOperand::createReg(ShadowRegI8));
- const MCExpr *Disp = MCConstantExpr::create(kShadowOffset, Ctx);
- std::unique_ptr<X86Operand> Op(
- X86Operand::CreateMem(getPointerWidth(), 0, Disp, ShadowRegI32, 0, 1,
- SMLoc(), SMLoc()));
- Op->addMemOperands(Inst, 5);
- EmitInstruction(Out, Inst);
- }
-
- EmitInstruction(
- Out, MCInstBuilder(X86::TEST8rr).addReg(ShadowRegI8).addReg(ShadowRegI8));
- MCSymbol *DoneSym = Ctx.createTempSymbol();
- const MCExpr *DoneExpr = MCSymbolRefExpr::create(DoneSym, Ctx);
- EmitInstruction(Out, MCInstBuilder(X86::JE_1).addExpr(DoneExpr));
-
- EmitInstruction(Out, MCInstBuilder(X86::MOV32rr).addReg(ScratchRegI32).addReg(
- AddressRegI32));
- EmitInstruction(Out, MCInstBuilder(X86::AND32ri)
- .addReg(ScratchRegI32)
- .addReg(ScratchRegI32)
- .addImm(7));
-
- switch (AccessSize) {
- default: llvm_unreachable("Incorrect access size");
- case 1:
- break;
- case 2: {
- const MCExpr *Disp = MCConstantExpr::create(1, Ctx);
- std::unique_ptr<X86Operand> Op(
- X86Operand::CreateMem(getPointerWidth(), 0, Disp, ScratchRegI32, 0, 1,
- SMLoc(), SMLoc()));
- EmitLEA(*Op, 32, ScratchRegI32, Out);
- break;
- }
- case 4:
- EmitInstruction(Out, MCInstBuilder(X86::ADD32ri8)
- .addReg(ScratchRegI32)
- .addReg(ScratchRegI32)
- .addImm(3));
- break;
- }
-
- EmitInstruction(
- Out,
- MCInstBuilder(X86::MOVSX32rr8).addReg(ShadowRegI32).addReg(ShadowRegI8));
- EmitInstruction(Out, MCInstBuilder(X86::CMP32rr).addReg(ScratchRegI32).addReg(
- ShadowRegI32));
- EmitInstruction(Out, MCInstBuilder(X86::JL_1).addExpr(DoneExpr));
-
- EmitCallAsanReport(AccessSize, IsWrite, Ctx, Out, RegCtx);
- EmitLabel(Out, DoneSym);
-}
-
-void X86AddressSanitizer32::InstrumentMemOperandLarge(
- X86Operand &Op, unsigned AccessSize, bool IsWrite,
- const RegisterContext &RegCtx, MCContext &Ctx, MCStreamer &Out) {
- unsigned AddressRegI32 = RegCtx.AddressReg(32);
- unsigned ShadowRegI32 = RegCtx.ShadowReg(32);
-
- ComputeMemOperandAddress(Op, 32, AddressRegI32, Ctx, Out);
-
- EmitInstruction(Out, MCInstBuilder(X86::MOV32rr).addReg(ShadowRegI32).addReg(
- AddressRegI32));
- EmitInstruction(Out, MCInstBuilder(X86::SHR32ri)
- .addReg(ShadowRegI32)
- .addReg(ShadowRegI32)
- .addImm(3));
- {
- MCInst Inst;
- switch (AccessSize) {
- default: llvm_unreachable("Incorrect access size");
- case 8:
- Inst.setOpcode(X86::CMP8mi);
- break;
- case 16:
- Inst.setOpcode(X86::CMP16mi);
- break;
- }
- const MCExpr *Disp = MCConstantExpr::create(kShadowOffset, Ctx);
- std::unique_ptr<X86Operand> Op(
- X86Operand::CreateMem(getPointerWidth(), 0, Disp, ShadowRegI32, 0, 1,
- SMLoc(), SMLoc()));
- Op->addMemOperands(Inst, 5);
- Inst.addOperand(MCOperand::createImm(0));
- EmitInstruction(Out, Inst);
- }
- MCSymbol *DoneSym = Ctx.createTempSymbol();
- const MCExpr *DoneExpr = MCSymbolRefExpr::create(DoneSym, Ctx);
- EmitInstruction(Out, MCInstBuilder(X86::JE_1).addExpr(DoneExpr));
-
- EmitCallAsanReport(AccessSize, IsWrite, Ctx, Out, RegCtx);
- EmitLabel(Out, DoneSym);
-}
-
-void X86AddressSanitizer32::InstrumentMOVSImpl(unsigned AccessSize,
- MCContext &Ctx,
- MCStreamer &Out) {
- StoreFlags(Out);
-
- // No need to test when ECX is equals to zero.
- MCSymbol *DoneSym = Ctx.createTempSymbol();
- const MCExpr *DoneExpr = MCSymbolRefExpr::create(DoneSym, Ctx);
- EmitInstruction(
- Out, MCInstBuilder(X86::TEST32rr).addReg(X86::ECX).addReg(X86::ECX));
- EmitInstruction(Out, MCInstBuilder(X86::JE_1).addExpr(DoneExpr));
-
- // Instrument first and last elements in src and dst range.
- InstrumentMOVSBase(X86::EDI /* DstReg */, X86::ESI /* SrcReg */,
- X86::ECX /* CntReg */, AccessSize, Ctx, Out);
-
- EmitLabel(Out, DoneSym);
- RestoreFlags(Out);
-}
-
-class X86AddressSanitizer64 : public X86AddressSanitizer {
-public:
- static const long kShadowOffset = 0x7fff8000;
-
- X86AddressSanitizer64(const MCSubtargetInfo *&STI)
- : X86AddressSanitizer(STI) {}
-
- ~X86AddressSanitizer64() override = default;
-
- unsigned GetFrameReg(const MCContext &Ctx, MCStreamer &Out) {
- unsigned FrameReg = GetFrameRegGeneric(Ctx, Out);
- if (FrameReg == X86::NoRegister)
- return FrameReg;
- return getX86SubSuperRegister(FrameReg, 64);
- }
-
- void SpillReg(MCStreamer &Out, unsigned Reg) {
- EmitInstruction(Out, MCInstBuilder(X86::PUSH64r).addReg(Reg));
- OrigSPOffset -= 8;
- }
-
- void RestoreReg(MCStreamer &Out, unsigned Reg) {
- EmitInstruction(Out, MCInstBuilder(X86::POP64r).addReg(Reg));
- OrigSPOffset += 8;
- }
-
- void StoreFlags(MCStreamer &Out) {
- EmitInstruction(Out, MCInstBuilder(X86::PUSHF64));
- OrigSPOffset -= 8;
- }
-
- void RestoreFlags(MCStreamer &Out) {
- EmitInstruction(Out, MCInstBuilder(X86::POPF64));
- OrigSPOffset += 8;
- }
-
- void InstrumentMemOperandPrologue(const RegisterContext &RegCtx,
- MCContext &Ctx,
- MCStreamer &Out) override {
- unsigned LocalFrameReg = RegCtx.ChooseFrameReg(64);
- assert(LocalFrameReg != X86::NoRegister);
-
- const MCRegisterInfo *MRI = Ctx.getRegisterInfo();
- unsigned FrameReg = GetFrameReg(Ctx, Out);
- if (MRI && FrameReg != X86::NoRegister) {
- SpillReg(Out, X86::RBP);
- if (FrameReg == X86::RSP) {
- Out.EmitCFIAdjustCfaOffset(8 /* byte size of the LocalFrameReg */);
- Out.EmitCFIRelOffset(
- MRI->getDwarfRegNum(LocalFrameReg, true /* IsEH */), 0);
- }
- EmitInstruction(
- Out,
- MCInstBuilder(X86::MOV64rr).addReg(LocalFrameReg).addReg(FrameReg));
- Out.EmitCFIRememberState();
- Out.EmitCFIDefCfaRegister(
- MRI->getDwarfRegNum(LocalFrameReg, true /* IsEH */));
- }
-
- EmitAdjustRSP(Ctx, Out, -128);
- SpillReg(Out, RegCtx.ShadowReg(64));
- SpillReg(Out, RegCtx.AddressReg(64));
- if (RegCtx.ScratchReg(64) != X86::NoRegister)
- SpillReg(Out, RegCtx.ScratchReg(64));
- StoreFlags(Out);
- }
-
- void InstrumentMemOperandEpilogue(const RegisterContext &RegCtx,
- MCContext &Ctx,
- MCStreamer &Out) override {
- unsigned LocalFrameReg = RegCtx.ChooseFrameReg(64);
- assert(LocalFrameReg != X86::NoRegister);
-
- RestoreFlags(Out);
- if (RegCtx.ScratchReg(64) != X86::NoRegister)
- RestoreReg(Out, RegCtx.ScratchReg(64));
- RestoreReg(Out, RegCtx.AddressReg(64));
- RestoreReg(Out, RegCtx.ShadowReg(64));
- EmitAdjustRSP(Ctx, Out, 128);
-
- unsigned FrameReg = GetFrameReg(Ctx, Out);
- if (Ctx.getRegisterInfo() && FrameReg != X86::NoRegister) {
- RestoreReg(Out, LocalFrameReg);
- Out.EmitCFIRestoreState();
- if (FrameReg == X86::RSP)
- Out.EmitCFIAdjustCfaOffset(-8 /* byte size of the LocalFrameReg */);
- }
- }
-
- void InstrumentMemOperandSmall(X86Operand &Op, unsigned AccessSize,
- bool IsWrite,
- const RegisterContext &RegCtx,
- MCContext &Ctx,
- MCStreamer &Out) override;
- void InstrumentMemOperandLarge(X86Operand &Op, unsigned AccessSize,
- bool IsWrite,
- const RegisterContext &RegCtx,
- MCContext &Ctx,
- MCStreamer &Out) override;
- void InstrumentMOVSImpl(unsigned AccessSize, MCContext &Ctx,
- MCStreamer &Out) override;
-
-private:
- void EmitAdjustRSP(MCContext &Ctx, MCStreamer &Out, long Offset) {
- const MCExpr *Disp = MCConstantExpr::create(Offset, Ctx);
- std::unique_ptr<X86Operand> Op(
- X86Operand::CreateMem(getPointerWidth(), 0, Disp, X86::RSP, 0, 1,
- SMLoc(), SMLoc()));
- EmitLEA(*Op, 64, X86::RSP, Out);
- OrigSPOffset += Offset;
- }
-
- void EmitCallAsanReport(unsigned AccessSize, bool IsWrite, MCContext &Ctx,
- MCStreamer &Out, const RegisterContext &RegCtx) {
- EmitInstruction(Out, MCInstBuilder(X86::CLD));
- EmitInstruction(Out, MCInstBuilder(X86::MMX_EMMS));
-
- EmitInstruction(Out, MCInstBuilder(X86::AND64ri8)
- .addReg(X86::RSP)
- .addReg(X86::RSP)
- .addImm(-16));
-
- if (RegCtx.AddressReg(64) != X86::RDI) {
- EmitInstruction(Out, MCInstBuilder(X86::MOV64rr).addReg(X86::RDI).addReg(
- RegCtx.AddressReg(64)));
- }
- MCSymbol *FnSym = Ctx.getOrCreateSymbol(Twine("__asan_report_") +
- (IsWrite ? "store" : "load") +
- Twine(AccessSize));
- const MCSymbolRefExpr *FnExpr =
- MCSymbolRefExpr::create(FnSym, MCSymbolRefExpr::VK_PLT, Ctx);
- EmitInstruction(Out, MCInstBuilder(X86::CALL64pcrel32).addExpr(FnExpr));
- }
-};
-
-} // end anonymous namespace
-
-void X86AddressSanitizer64::InstrumentMemOperandSmall(
- X86Operand &Op, unsigned AccessSize, bool IsWrite,
- const RegisterContext &RegCtx, MCContext &Ctx, MCStreamer &Out) {
- unsigned AddressRegI64 = RegCtx.AddressReg(64);
- unsigned AddressRegI32 = RegCtx.AddressReg(32);
- unsigned ShadowRegI64 = RegCtx.ShadowReg(64);
- unsigned ShadowRegI32 = RegCtx.ShadowReg(32);
- unsigned ShadowRegI8 = RegCtx.ShadowReg(8);
-
- assert(RegCtx.ScratchReg(32) != X86::NoRegister);
- unsigned ScratchRegI32 = RegCtx.ScratchReg(32);
-
- ComputeMemOperandAddress(Op, 64, AddressRegI64, Ctx, Out);
-
- EmitInstruction(Out, MCInstBuilder(X86::MOV64rr).addReg(ShadowRegI64).addReg(
- AddressRegI64));
- EmitInstruction(Out, MCInstBuilder(X86::SHR64ri)
- .addReg(ShadowRegI64)
- .addReg(ShadowRegI64)
- .addImm(3));
- {
- MCInst Inst;
- Inst.setOpcode(X86::MOV8rm);
- Inst.addOperand(MCOperand::createReg(ShadowRegI8));
- const MCExpr *Disp = MCConstantExpr::create(kShadowOffset, Ctx);
- std::unique_ptr<X86Operand> Op(
- X86Operand::CreateMem(getPointerWidth(), 0, Disp, ShadowRegI64, 0, 1,
- SMLoc(), SMLoc()));
- Op->addMemOperands(Inst, 5);
- EmitInstruction(Out, Inst);
- }
-
- EmitInstruction(
- Out, MCInstBuilder(X86::TEST8rr).addReg(ShadowRegI8).addReg(ShadowRegI8));
- MCSymbol *DoneSym = Ctx.createTempSymbol();
- const MCExpr *DoneExpr = MCSymbolRefExpr::create(DoneSym, Ctx);
- EmitInstruction(Out, MCInstBuilder(X86::JE_1).addExpr(DoneExpr));
-
- EmitInstruction(Out, MCInstBuilder(X86::MOV32rr).addReg(ScratchRegI32).addReg(
- AddressRegI32));
- EmitInstruction(Out, MCInstBuilder(X86::AND32ri)
- .addReg(ScratchRegI32)
- .addReg(ScratchRegI32)
- .addImm(7));
-
- switch (AccessSize) {
- default: llvm_unreachable("Incorrect access size");
- case 1:
- break;
- case 2: {
- const MCExpr *Disp = MCConstantExpr::create(1, Ctx);
- std::unique_ptr<X86Operand> Op(
- X86Operand::CreateMem(getPointerWidth(), 0, Disp, ScratchRegI32, 0, 1,
- SMLoc(), SMLoc()));
- EmitLEA(*Op, 32, ScratchRegI32, Out);
- break;
- }
- case 4:
- EmitInstruction(Out, MCInstBuilder(X86::ADD32ri8)
- .addReg(ScratchRegI32)
- .addReg(ScratchRegI32)
- .addImm(3));
- break;
- }
-
- EmitInstruction(
- Out,
- MCInstBuilder(X86::MOVSX32rr8).addReg(ShadowRegI32).addReg(ShadowRegI8));
- EmitInstruction(Out, MCInstBuilder(X86::CMP32rr).addReg(ScratchRegI32).addReg(
- ShadowRegI32));
- EmitInstruction(Out, MCInstBuilder(X86::JL_1).addExpr(DoneExpr));
-
- EmitCallAsanReport(AccessSize, IsWrite, Ctx, Out, RegCtx);
- EmitLabel(Out, DoneSym);
-}
-
-void X86AddressSanitizer64::InstrumentMemOperandLarge(
- X86Operand &Op, unsigned AccessSize, bool IsWrite,
- const RegisterContext &RegCtx, MCContext &Ctx, MCStreamer &Out) {
- unsigned AddressRegI64 = RegCtx.AddressReg(64);
- unsigned ShadowRegI64 = RegCtx.ShadowReg(64);
-
- ComputeMemOperandAddress(Op, 64, AddressRegI64, Ctx, Out);
-
- EmitInstruction(Out, MCInstBuilder(X86::MOV64rr).addReg(ShadowRegI64).addReg(
- AddressRegI64));
- EmitInstruction(Out, MCInstBuilder(X86::SHR64ri)
- .addReg(ShadowRegI64)
- .addReg(ShadowRegI64)
- .addImm(3));
- {
- MCInst Inst;
- switch (AccessSize) {
- default: llvm_unreachable("Incorrect access size");
- case 8:
- Inst.setOpcode(X86::CMP8mi);
- break;
- case 16:
- Inst.setOpcode(X86::CMP16mi);
- break;
- }
- const MCExpr *Disp = MCConstantExpr::create(kShadowOffset, Ctx);
- std::unique_ptr<X86Operand> Op(
- X86Operand::CreateMem(getPointerWidth(), 0, Disp, ShadowRegI64, 0, 1,
- SMLoc(), SMLoc()));
- Op->addMemOperands(Inst, 5);
- Inst.addOperand(MCOperand::createImm(0));
- EmitInstruction(Out, Inst);
- }
-
- MCSymbol *DoneSym = Ctx.createTempSymbol();
- const MCExpr *DoneExpr = MCSymbolRefExpr::create(DoneSym, Ctx);
- EmitInstruction(Out, MCInstBuilder(X86::JE_1).addExpr(DoneExpr));
-
- EmitCallAsanReport(AccessSize, IsWrite, Ctx, Out, RegCtx);
- EmitLabel(Out, DoneSym);
-}
-
-void X86AddressSanitizer64::InstrumentMOVSImpl(unsigned AccessSize,
- MCContext &Ctx,
- MCStreamer &Out) {
- StoreFlags(Out);
-
- // No need to test when RCX is equals to zero.
- MCSymbol *DoneSym = Ctx.createTempSymbol();
- const MCExpr *DoneExpr = MCSymbolRefExpr::create(DoneSym, Ctx);
- EmitInstruction(
- Out, MCInstBuilder(X86::TEST64rr).addReg(X86::RCX).addReg(X86::RCX));
- EmitInstruction(Out, MCInstBuilder(X86::JE_1).addExpr(DoneExpr));
-
- // Instrument first and last elements in src and dst range.
- InstrumentMOVSBase(X86::RDI /* DstReg */, X86::RSI /* SrcReg */,
- X86::RCX /* CntReg */, AccessSize, Ctx, Out);
-
- EmitLabel(Out, DoneSym);
- RestoreFlags(Out);
-}
-
-X86AsmInstrumentation::X86AsmInstrumentation(const MCSubtargetInfo *&STI)
- : STI(STI) {}
-
-X86AsmInstrumentation::~X86AsmInstrumentation() = default;
-
-void X86AsmInstrumentation::InstrumentAndEmitInstruction(
- const MCInst &Inst, OperandVector &Operands, MCContext &Ctx,
- const MCInstrInfo &MII, MCStreamer &Out, bool PrintSchedInfoEnabled) {
- EmitInstruction(Out, Inst, PrintSchedInfoEnabled);
-}
-
-void X86AsmInstrumentation::EmitInstruction(MCStreamer &Out, const MCInst &Inst,
- bool PrintSchedInfoEnabled) {
- Out.EmitInstruction(Inst, *STI, PrintSchedInfoEnabled);
-}
-
-unsigned X86AsmInstrumentation::GetFrameRegGeneric(const MCContext &Ctx,
- MCStreamer &Out) {
- if (!Out.getNumFrameInfos()) // No active dwarf frame
- return X86::NoRegister;
- const MCDwarfFrameInfo &Frame = Out.getDwarfFrameInfos().back();
- if (Frame.End) // Active dwarf frame is closed
- return X86::NoRegister;
- const MCRegisterInfo *MRI = Ctx.getRegisterInfo();
- if (!MRI) // No register info
- return X86::NoRegister;
-
- if (InitialFrameReg) {
- // FrameReg is set explicitly, we're instrumenting a MachineFunction.
- return InitialFrameReg;
- }
-
- return MRI->getLLVMRegNum(Frame.CurrentCfaRegister, true /* IsEH */);
-}
-
-X86AsmInstrumentation *
-llvm::CreateX86AsmInstrumentation(const MCTargetOptions &MCOptions,
- const MCContext &Ctx,
- const MCSubtargetInfo *&STI) {
- Triple T(STI->getTargetTriple());
- const bool hasCompilerRTSupport = T.isOSLinux();
- if (ClAsanInstrumentAssembly && hasCompilerRTSupport &&
- MCOptions.SanitizeAddress) {
- if (STI->getFeatureBits()[X86::Mode32Bit] != 0)
- return new X86AddressSanitizer32(STI);
- if (STI->getFeatureBits()[X86::Mode64Bit] != 0)
- return new X86AddressSanitizer64(STI);
- }
- return new X86AsmInstrumentation(STI);
-}
diff --git a/lib/Target/X86/AsmParser/X86AsmInstrumentation.h b/lib/Target/X86/AsmParser/X86AsmInstrumentation.h
deleted file mode 100644
index 42a9dc3ba26a..000000000000
--- a/lib/Target/X86/AsmParser/X86AsmInstrumentation.h
+++ /dev/null
@@ -1,68 +0,0 @@
-//===- X86AsmInstrumentation.h - Instrument X86 inline assembly -*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_X86_ASMPARSER_X86ASMINSTRUMENTATION_H
-#define LLVM_LIB_TARGET_X86_ASMPARSER_X86ASMINSTRUMENTATION_H
-
-#include "llvm/ADT/SmallVector.h"
-#include <memory>
-
-namespace llvm {
-
-class MCContext;
-class MCInst;
-class MCInstrInfo;
-class MCParsedAsmOperand;
-class MCStreamer;
-class MCSubtargetInfo;
-class MCTargetOptions;
-class X86AsmInstrumentation;
-
-X86AsmInstrumentation *
-CreateX86AsmInstrumentation(const MCTargetOptions &MCOptions,
- const MCContext &Ctx,
- const MCSubtargetInfo *&STI);
-
-class X86AsmInstrumentation {
-public:
- virtual ~X86AsmInstrumentation();
-
- // Sets frame register corresponding to a current frame.
- void SetInitialFrameRegister(unsigned RegNo) {
- InitialFrameReg = RegNo;
- }
-
- // Tries to instrument and emit instruction.
- virtual void InstrumentAndEmitInstruction(
- const MCInst &Inst,
- SmallVectorImpl<std::unique_ptr<MCParsedAsmOperand>> &Operands,
- MCContext &Ctx, const MCInstrInfo &MII, MCStreamer &Out,
- bool PrintSchedInfoEnabled);
-
-protected:
- friend X86AsmInstrumentation *
- CreateX86AsmInstrumentation(const MCTargetOptions &MCOptions,
- const MCContext &Ctx,
- const MCSubtargetInfo *&STI);
-
- X86AsmInstrumentation(const MCSubtargetInfo *&STI);
-
- unsigned GetFrameRegGeneric(const MCContext &Ctx, MCStreamer &Out);
-
- void EmitInstruction(MCStreamer &Out, const MCInst &Inst,
- bool PrintSchedInfoEnabled = false);
-
- const MCSubtargetInfo *&STI;
-
- unsigned InitialFrameReg = 0;
-};
-
-} // end namespace llvm
-
-#endif // LLVM_LIB_TARGET_X86_ASMPARSER_X86ASMINSTRUMENTATION_H
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 899b50d0f78f..95cbf46d37ed 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -1,17 +1,16 @@
//===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-#include "InstPrinter/X86IntelInstPrinter.h"
#include "MCTargetDesc/X86BaseInfo.h"
+#include "MCTargetDesc/X86IntelInstPrinter.h"
#include "MCTargetDesc/X86MCExpr.h"
#include "MCTargetDesc/X86TargetStreamer.h"
-#include "X86AsmInstrumentation.h"
+#include "TargetInfo/X86TargetInfo.h"
#include "X86AsmParserCommon.h"
#include "X86Operand.h"
#include "llvm/ADT/STLExtras.h"
@@ -71,9 +70,17 @@ static const char OpPrecedence[] = {
class X86AsmParser : public MCTargetAsmParser {
ParseInstructionInfo *InstInfo;
- std::unique_ptr<X86AsmInstrumentation> Instrumentation;
bool Code16GCC;
+ enum VEXEncoding {
+ VEXEncoding_Default,
+ VEXEncoding_VEX2,
+ VEXEncoding_VEX3,
+ VEXEncoding_EVEX,
+ };
+
+ VEXEncoding ForcedVEXEncoding = VEXEncoding_Default;
+
private:
SMLoc consumeToken() {
MCAsmParser &Parser = getParser();
@@ -90,13 +97,14 @@ private:
}
unsigned MatchInstruction(const OperandVector &Operands, MCInst &Inst,
- uint64_t &ErrorInfo, bool matchingInlineAsm,
- unsigned VariantID = 0) {
+ uint64_t &ErrorInfo, FeatureBitset &MissingFeatures,
+ bool matchingInlineAsm, unsigned VariantID = 0) {
// In Code16GCC mode, match as 32-bit.
if (Code16GCC)
SwitchMode(X86::Mode32Bit);
unsigned rv = MatchInstructionImpl(Operands, Inst, ErrorInfo,
- matchingInlineAsm, VariantID);
+ MissingFeatures, matchingInlineAsm,
+ VariantID);
if (Code16GCC)
SwitchMode(X86::Mode16Bit);
return rv;
@@ -840,6 +848,8 @@ private:
const SMLoc &StartLoc,
SMLoc &EndLoc);
+ X86::CondCode ParseConditionCode(StringRef CCode);
+
bool ParseIntelMemoryOperandSize(unsigned &Size);
std::unique_ptr<X86Operand>
CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg,
@@ -860,6 +870,8 @@ private:
bool parseDirectiveFPOEndProc(SMLoc L);
bool parseDirectiveFPOData(SMLoc L);
+ unsigned checkTargetMatchPredicate(MCInst &Inst) override;
+
bool validateInstruction(MCInst &Inst, const OperandVector &Ops);
bool processInstruction(MCInst &Inst, const OperandVector &Ops);
@@ -875,7 +887,7 @@ private:
void MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, OperandVector &Operands,
MCStreamer &Out, bool MatchingInlineAsm);
- bool ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo,
+ bool ErrorMissingFeature(SMLoc IDLoc, const FeatureBitset &MissingFeatures,
bool MatchingInlineAsm);
bool MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
@@ -914,7 +926,7 @@ private:
MCSubtargetInfo &STI = copySTI();
FeatureBitset AllModes({X86::Mode64Bit, X86::Mode32Bit, X86::Mode16Bit});
FeatureBitset OldMode = STI.getFeatureBits() & AllModes;
- uint64_t FB = ComputeAvailableFeatures(
+ FeatureBitset FB = ComputeAvailableFeatures(
STI.ToggleFeature(OldMode.flip(mode)));
setAvailableFeatures(FB);
@@ -941,6 +953,9 @@ private:
/// }
public:
+ enum X86MatchResultTy {
+ Match_Unsupported = FIRST_TARGET_MATCH_RESULT_TY,
+ };
X86AsmParser(const MCSubtargetInfo &sti, MCAsmParser &Parser,
const MCInstrInfo &mii, const MCTargetOptions &Options)
@@ -951,14 +966,10 @@ public:
// Initialize the set of available features.
setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
- Instrumentation.reset(
- CreateX86AsmInstrumentation(Options, Parser.getContext(), STI));
}
bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
- void SetFrameRegister(unsigned RegNo) override;
-
bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
@@ -1115,8 +1126,7 @@ bool X86AsmParser::ParseRegister(unsigned &RegNo,
}
// Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
- if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
- RegNo = X86::ST0;
+ if (RegNo == X86::ST0) {
Parser.Lex(); // Eat 'st'
// Check to see if we have '(4)' after %st.
@@ -1194,10 +1204,6 @@ bool X86AsmParser::ParseRegister(unsigned &RegNo,
return false;
}
-void X86AsmParser::SetFrameRegister(unsigned RegNo) {
- Instrumentation->SetInitialFrameRegister(RegNo);
-}
-
std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
bool Parse32 = is32BitMode() || Code16GCC;
unsigned Basereg = is64BitMode() ? X86::RSI : (Parse32 ? X86::ESI : X86::SI);
@@ -1656,6 +1662,8 @@ X86AsmParser::ParseRoundingModeOp(SMLoc Start) {
const AsmToken &Tok = Parser.getTok();
// Eat "{" and mark the current place.
const SMLoc consumedToken = consumeToken();
+ if (Tok.isNot(AsmToken::Identifier))
+ return ErrorOperand(Tok.getLoc(), "Expected an identifier after {");
if (Tok.getIdentifier().startswith("r")){
int rndMode = StringSwitch<int>(Tok.getIdentifier())
.Case("rn", X86::STATIC_ROUNDING::TO_NEAREST_INT)
@@ -1999,6 +2007,29 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() {
}
}
+// X86::COND_INVALID if not a recognized condition code or alternate mnemonic,
+// otherwise the EFLAGS Condition Code enumerator.
+X86::CondCode X86AsmParser::ParseConditionCode(StringRef CC) {
+ return StringSwitch<X86::CondCode>(CC)
+ .Case("o", X86::COND_O) // Overflow
+ .Case("no", X86::COND_NO) // No Overflow
+ .Cases("b", "nae", X86::COND_B) // Below/Neither Above nor Equal
+ .Cases("ae", "nb", X86::COND_AE) // Above or Equal/Not Below
+ .Cases("e", "z", X86::COND_E) // Equal/Zero
+ .Cases("ne", "nz", X86::COND_NE) // Not Equal/Not Zero
+ .Cases("be", "na", X86::COND_BE) // Below or Equal/Not Above
+ .Cases("a", "nbe", X86::COND_A) // Above/Neither Below nor Equal
+ .Case("s", X86::COND_S) // Sign
+ .Case("ns", X86::COND_NS) // No Sign
+ .Cases("p", "pe", X86::COND_P) // Parity/Parity Even
+ .Cases("np", "po", X86::COND_NP) // No Parity/Parity Odd
+ .Cases("l", "nge", X86::COND_L) // Less/Neither Greater nor Equal
+ .Cases("ge", "nl", X86::COND_GE) // Greater or Equal/Not Less
+ .Cases("le", "ng", X86::COND_LE) // Less or Equal/Not Greater
+ .Cases("g", "nle", X86::COND_G) // Greater/Neither Less nor Equal
+ .Default(X86::COND_INVALID);
+}
+
// true on failure, false otherwise
// If no {z} mark was found - Parser doesn't advance
bool X86AsmParser::ParseZ(std::unique_ptr<X86Operand> &Z,
@@ -2305,18 +2336,64 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) {
MCAsmParser &Parser = getParser();
InstInfo = &Info;
+
+ // Reset the forced VEX encoding.
+ ForcedVEXEncoding = VEXEncoding_Default;
+
+ // Parse pseudo prefixes.
+ while (1) {
+ if (Name == "{") {
+ if (getLexer().isNot(AsmToken::Identifier))
+ return Error(Parser.getTok().getLoc(), "Unexpected token after '{'");
+ std::string Prefix = Parser.getTok().getString().lower();
+ Parser.Lex(); // Eat identifier.
+ if (getLexer().isNot(AsmToken::RCurly))
+ return Error(Parser.getTok().getLoc(), "Expected '}'");
+ Parser.Lex(); // Eat curly.
+
+ if (Prefix == "vex2")
+ ForcedVEXEncoding = VEXEncoding_VEX2;
+ else if (Prefix == "vex3")
+ ForcedVEXEncoding = VEXEncoding_VEX3;
+ else if (Prefix == "evex")
+ ForcedVEXEncoding = VEXEncoding_EVEX;
+ else
+ return Error(NameLoc, "unknown prefix");
+
+ NameLoc = Parser.getTok().getLoc();
+ if (getLexer().is(AsmToken::LCurly)) {
+ Parser.Lex();
+ Name = "{";
+ } else {
+ if (getLexer().isNot(AsmToken::Identifier))
+ return Error(Parser.getTok().getLoc(), "Expected identifier");
+ // FIXME: The mnemonic won't match correctly if its not in lower case.
+ Name = Parser.getTok().getString();
+ Parser.Lex();
+ }
+ continue;
+ }
+
+ break;
+ }
+
StringRef PatchedName = Name;
- if ((Name.equals("jmp") || Name.equals("jc") || Name.equals("jz")) &&
- isParsingIntelSyntax() && isParsingInlineAsm()) {
+ // Hack to skip "short" following Jcc.
+ if (isParsingIntelSyntax() &&
+ (PatchedName == "jmp" || PatchedName == "jc" || PatchedName == "jnc" ||
+ PatchedName == "jcxz" || PatchedName == "jexcz" ||
+ (PatchedName.startswith("j") &&
+ ParseConditionCode(PatchedName.substr(1)) != X86::COND_INVALID))) {
StringRef NextTok = Parser.getTok().getString();
if (NextTok == "short") {
SMLoc NameEndLoc =
NameLoc.getFromPointer(NameLoc.getPointer() + Name.size());
- // Eat the short keyword
+ // Eat the short keyword.
Parser.Lex();
- // MS ignores the short keyword, it determines the jmp type based
- // on the distance of the label
+ // MS and GAS ignore the short keyword; they both determine the jmp type
+ // based on the distance of the label. (NASM does emit different code with
+ // and without "short," though.)
InstInfo->AsmRewrites->emplace_back(AOK_Skip, NameEndLoc,
NextTok.size() + 1);
}
@@ -2327,13 +2404,15 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
PatchedName != "setb" && PatchedName != "setnb")
PatchedName = PatchedName.substr(0, Name.size()-1);
+ unsigned ComparisonPredicate = ~0U;
+
// FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
(PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
bool IsVCMP = PatchedName[0] == 'v';
unsigned CCIdx = IsVCMP ? 4 : 3;
- unsigned ComparisonCode = StringSwitch<unsigned>(
+ unsigned CC = StringSwitch<unsigned>(
PatchedName.slice(CCIdx, PatchedName.size() - 2))
.Case("eq", 0x00)
.Case("eq_oq", 0x00)
@@ -2383,26 +2462,29 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
.Case("gt_oq", 0x1E)
.Case("true_us", 0x1F)
.Default(~0U);
- if (ComparisonCode != ~0U && (IsVCMP || ComparisonCode < 8)) {
-
- Operands.push_back(X86Operand::CreateToken(PatchedName.slice(0, CCIdx),
- NameLoc));
-
- const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode,
- getParser().getContext());
- Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
+ if (CC != ~0U && (IsVCMP || CC < 8)) {
+ if (PatchedName.endswith("ss"))
+ PatchedName = IsVCMP ? "vcmpss" : "cmpss";
+ else if (PatchedName.endswith("sd"))
+ PatchedName = IsVCMP ? "vcmpsd" : "cmpsd";
+ else if (PatchedName.endswith("ps"))
+ PatchedName = IsVCMP ? "vcmpps" : "cmpps";
+ else if (PatchedName.endswith("pd"))
+ PatchedName = IsVCMP ? "vcmppd" : "cmppd";
+ else
+ llvm_unreachable("Unexpected suffix!");
- PatchedName = PatchedName.substr(PatchedName.size() - 2);
+ ComparisonPredicate = CC;
}
}
// FIXME: Hack to recognize vpcmp<comparison code>{ub,uw,ud,uq,b,w,d,q}.
if (PatchedName.startswith("vpcmp") &&
- (PatchedName.endswith("b") || PatchedName.endswith("w") ||
- PatchedName.endswith("d") || PatchedName.endswith("q"))) {
- unsigned CCIdx = PatchedName.drop_back().back() == 'u' ? 2 : 1;
- unsigned ComparisonCode = StringSwitch<unsigned>(
- PatchedName.slice(5, PatchedName.size() - CCIdx))
+ (PatchedName.back() == 'b' || PatchedName.back() == 'w' ||
+ PatchedName.back() == 'd' || PatchedName.back() == 'q')) {
+ unsigned SuffixSize = PatchedName.drop_back().back() == 'u' ? 2 : 1;
+ unsigned CC = StringSwitch<unsigned>(
+ PatchedName.slice(5, PatchedName.size() - SuffixSize))
.Case("eq", 0x0) // Only allowed on unsigned. Checked below.
.Case("lt", 0x1)
.Case("le", 0x2)
@@ -2412,24 +2494,26 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
.Case("nle", 0x6)
//.Case("true", 0x7) // Not a documented alias.
.Default(~0U);
- if (ComparisonCode != ~0U && (ComparisonCode != 0 || CCIdx == 2)) {
- Operands.push_back(X86Operand::CreateToken("vpcmp", NameLoc));
-
- const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode,
- getParser().getContext());
- Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
-
- PatchedName = PatchedName.substr(PatchedName.size() - CCIdx);
+ if (CC != ~0U && (CC != 0 || SuffixSize == 2)) {
+ switch (PatchedName.back()) {
+ default: llvm_unreachable("Unexpected character!");
+ case 'b': PatchedName = SuffixSize == 2 ? "vpcmpub" : "vpcmpb"; break;
+ case 'w': PatchedName = SuffixSize == 2 ? "vpcmpuw" : "vpcmpw"; break;
+ case 'd': PatchedName = SuffixSize == 2 ? "vpcmpud" : "vpcmpd"; break;
+ case 'q': PatchedName = SuffixSize == 2 ? "vpcmpuq" : "vpcmpq"; break;
+ }
+ // Set up the immediate to push into the operands later.
+ ComparisonPredicate = CC;
}
}
// FIXME: Hack to recognize vpcom<comparison code>{ub,uw,ud,uq,b,w,d,q}.
if (PatchedName.startswith("vpcom") &&
- (PatchedName.endswith("b") || PatchedName.endswith("w") ||
- PatchedName.endswith("d") || PatchedName.endswith("q"))) {
- unsigned CCIdx = PatchedName.drop_back().back() == 'u' ? 2 : 1;
- unsigned ComparisonCode = StringSwitch<unsigned>(
- PatchedName.slice(5, PatchedName.size() - CCIdx))
+ (PatchedName.back() == 'b' || PatchedName.back() == 'w' ||
+ PatchedName.back() == 'd' || PatchedName.back() == 'q')) {
+ unsigned SuffixSize = PatchedName.drop_back().back() == 'u' ? 2 : 1;
+ unsigned CC = StringSwitch<unsigned>(
+ PatchedName.slice(5, PatchedName.size() - SuffixSize))
.Case("lt", 0x0)
.Case("le", 0x1)
.Case("gt", 0x2)
@@ -2439,14 +2523,16 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
.Case("false", 0x6)
.Case("true", 0x7)
.Default(~0U);
- if (ComparisonCode != ~0U) {
- Operands.push_back(X86Operand::CreateToken("vpcom", NameLoc));
-
- const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode,
- getParser().getContext());
- Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
-
- PatchedName = PatchedName.substr(PatchedName.size() - CCIdx);
+ if (CC != ~0U) {
+ switch (PatchedName.back()) {
+ default: llvm_unreachable("Unexpected character!");
+ case 'b': PatchedName = SuffixSize == 2 ? "vpcomub" : "vpcomb"; break;
+ case 'w': PatchedName = SuffixSize == 2 ? "vpcomuw" : "vpcomw"; break;
+ case 'd': PatchedName = SuffixSize == 2 ? "vpcomud" : "vpcomd"; break;
+ case 'q': PatchedName = SuffixSize == 2 ? "vpcomuq" : "vpcomq"; break;
+ }
+ // Set up the immediate to push into the operands later.
+ ComparisonPredicate = CC;
}
}
@@ -2489,6 +2575,7 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
Flags = X86::IP_NO_PREFIX;
break;
}
+ // FIXME: The mnemonic won't match correctly if its not in lower case.
Name = Parser.getTok().getString();
Parser.Lex(); // eat the prefix
// Hack: we could have something like "rep # some comment" or
@@ -2496,6 +2583,7 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
while (Name.startswith(";") || Name.startswith("\n") ||
Name.startswith("#") || Name.startswith("\t") ||
Name.startswith("/")) {
+ // FIXME: The mnemonic won't match correctly if its not in lower case.
Name = Parser.getTok().getString();
Parser.Lex(); // go to next prefix or instr
}
@@ -2519,6 +2607,13 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
+ // Push the immediate if we extracted one from the mnemonic.
+ if (ComparisonPredicate != ~0U && !isParsingIntelSyntax()) {
+ const MCExpr *ImmOp = MCConstantExpr::create(ComparisonPredicate,
+ getParser().getContext());
+ Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
+ }
+
// This does the actual operand parsing. Don't parse any more if we have a
// prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
// just want to parse the "lock" as the first instruction and the "incl" as
@@ -2553,6 +2648,13 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
return TokError("unexpected token in argument list");
}
+ // Push the immediate if we extracted one from the mnemonic.
+ if (ComparisonPredicate != ~0U && isParsingIntelSyntax()) {
+ const MCExpr *ImmOp = MCConstantExpr::create(ComparisonPredicate,
+ getParser().getContext());
+ Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
+ }
+
// Consume the EndOfStatement or the prefix separator Slash
if (getLexer().is(AsmToken::EndOfStatement) ||
(isPrefix && getLexer().is(AsmToken::Slash)))
@@ -2576,13 +2678,13 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
static_cast<X86Operand &>(*Operands[0]).setTokenValue(Repl);
}
- // Moving a 32 or 16 bit value into a segment register has the same
- // behavior. Modify such instructions to always take shorter form.
if ((Name == "mov" || Name == "movw" || Name == "movl") &&
(Operands.size() == 3)) {
X86Operand &Op1 = (X86Operand &)*Operands[1];
X86Operand &Op2 = (X86Operand &)*Operands[2];
SMLoc Loc = Op1.getEndLoc();
+ // Moving a 32 or 16 bit value into a segment register has the same
+ // behavior. Modify such instructions to always take shorter form.
if (Op1.isReg() && Op2.isReg() &&
X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(
Op2.getReg()) &&
@@ -2759,7 +2861,69 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
}
bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
- return false;
+ const MCRegisterInfo *MRI = getContext().getRegisterInfo();
+
+ switch (Inst.getOpcode()) {
+ default: return false;
+ case X86::VMOVZPQILo2PQIrr:
+ case X86::VMOVAPDrr:
+ case X86::VMOVAPDYrr:
+ case X86::VMOVAPSrr:
+ case X86::VMOVAPSYrr:
+ case X86::VMOVDQArr:
+ case X86::VMOVDQAYrr:
+ case X86::VMOVDQUrr:
+ case X86::VMOVDQUYrr:
+ case X86::VMOVUPDrr:
+ case X86::VMOVUPDYrr:
+ case X86::VMOVUPSrr:
+ case X86::VMOVUPSYrr: {
+ // We can get a smaller encoding by using VEX.R instead of VEX.B if one of
+ // the registers is extended, but other isn't.
+ if (ForcedVEXEncoding == VEXEncoding_VEX3 ||
+ MRI->getEncodingValue(Inst.getOperand(0).getReg()) >= 8 ||
+ MRI->getEncodingValue(Inst.getOperand(1).getReg()) < 8)
+ return false;
+
+ unsigned NewOpc;
+ switch (Inst.getOpcode()) {
+ default: llvm_unreachable("Invalid opcode");
+ case X86::VMOVZPQILo2PQIrr: NewOpc = X86::VMOVPQI2QIrr; break;
+ case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break;
+ case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
+ case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break;
+ case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
+ case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break;
+ case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
+ case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break;
+ case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
+ case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break;
+ case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
+ case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break;
+ case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
+ }
+ Inst.setOpcode(NewOpc);
+ return true;
+ }
+ case X86::VMOVSDrr:
+ case X86::VMOVSSrr: {
+ // We can get a smaller encoding by using VEX.R instead of VEX.B if one of
+ // the registers is extended, but other isn't.
+ if (ForcedVEXEncoding == VEXEncoding_VEX3 ||
+ MRI->getEncodingValue(Inst.getOperand(0).getReg()) >= 8 ||
+ MRI->getEncodingValue(Inst.getOperand(2).getReg()) < 8)
+ return false;
+
+ unsigned NewOpc;
+ switch (Inst.getOpcode()) {
+ default: llvm_unreachable("Invalid opcode");
+ case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
+ case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
+ }
+ Inst.setOpcode(NewOpc);
+ return true;
+ }
+ }
}
bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) {
@@ -2865,9 +3029,7 @@ static const char *getSubtargetFeatureName(uint64_t Val);
void X86AsmParser::EmitInstruction(MCInst &Inst, OperandVector &Operands,
MCStreamer &Out) {
- Instrumentation->InstrumentAndEmitInstruction(
- Inst, Operands, getContext(), MII, Out,
- getParser().shouldPrintSchedInfo());
+ Out.EmitInstruction(Inst, getSTI());
}
bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
@@ -2907,17 +3069,16 @@ void X86AsmParser::MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op,
}
}
-bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo,
+bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc,
+ const FeatureBitset &MissingFeatures,
bool MatchingInlineAsm) {
- assert(ErrorInfo && "Unknown missing feature!");
+ assert(MissingFeatures.any() && "Unknown missing feature!");
SmallString<126> Msg;
raw_svector_ostream OS(Msg);
OS << "instruction requires:";
- uint64_t Mask = 1;
- for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
- if (ErrorInfo & Mask)
- OS << ' ' << getSubtargetFeatureName(ErrorInfo & Mask);
- Mask <<= 1;
+ for (unsigned i = 0, e = MissingFeatures.size(); i != e; ++i) {
+ if (MissingFeatures[i])
+ OS << ' ' << getSubtargetFeatureName(i);
}
return Error(IDLoc, OS.str(), SMRange(), MatchingInlineAsm);
}
@@ -2932,30 +3093,70 @@ static unsigned getPrefixes(OperandVector &Operands) {
return Result;
}
+unsigned X86AsmParser::checkTargetMatchPredicate(MCInst &Inst) {
+ unsigned Opc = Inst.getOpcode();
+ const MCInstrDesc &MCID = MII.get(Opc);
+
+ if (ForcedVEXEncoding == VEXEncoding_EVEX &&
+ (MCID.TSFlags & X86II::EncodingMask) != X86II::EVEX)
+ return Match_Unsupported;
+
+ if ((ForcedVEXEncoding == VEXEncoding_VEX2 ||
+ ForcedVEXEncoding == VEXEncoding_VEX3) &&
+ (MCID.TSFlags & X86II::EncodingMask) != X86II::VEX)
+ return Match_Unsupported;
+
+ // These instructions match ambiguously with their VEX encoded counterparts
+ // and appear first in the matching table. Reject them unless we're forcing
+ // EVEX encoding.
+ // FIXME: We really need a way to break the ambiguity.
+ switch (Opc) {
+ case X86::VCVTSD2SIZrm_Int:
+ case X86::VCVTSD2SI64Zrm_Int:
+ case X86::VCVTSS2SIZrm_Int:
+ case X86::VCVTSS2SI64Zrm_Int:
+ case X86::VCVTTSD2SIZrm: case X86::VCVTTSD2SIZrm_Int:
+ case X86::VCVTTSD2SI64Zrm: case X86::VCVTTSD2SI64Zrm_Int:
+ case X86::VCVTTSS2SIZrm: case X86::VCVTTSS2SIZrm_Int:
+ case X86::VCVTTSS2SI64Zrm: case X86::VCVTTSS2SI64Zrm_Int:
+ if (ForcedVEXEncoding != VEXEncoding_EVEX)
+ return Match_Unsupported;
+ }
+
+ return Match_Success;
+}
+
bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands,
MCStreamer &Out,
uint64_t &ErrorInfo,
bool MatchingInlineAsm) {
assert(!Operands.empty() && "Unexpect empty operand list!");
- X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
- assert(Op.isToken() && "Leading operand should always be a mnemonic!");
+ assert((*Operands[0]).isToken() && "Leading operand should always be a mnemonic!");
SMRange EmptyRange = None;
// First, handle aliases that expand to multiple instructions.
- MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm);
-
- bool WasOriginallyInvalidOperand = false;
+ MatchFPUWaitAlias(IDLoc, static_cast<X86Operand &>(*Operands[0]), Operands,
+ Out, MatchingInlineAsm);
+ X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
unsigned Prefixes = getPrefixes(Operands);
MCInst Inst;
+ // If VEX3 encoding is forced, we need to pass the USE_VEX3 flag to the
+ // encoder.
+ if (ForcedVEXEncoding == VEXEncoding_VEX3)
+ Prefixes |= X86::IP_USE_VEX3;
+
if (Prefixes)
Inst.setFlags(Prefixes);
// First, try a direct match.
- switch (MatchInstruction(Operands, Inst, ErrorInfo, MatchingInlineAsm,
- isParsingIntelSyntax())) {
+ FeatureBitset MissingFeatures;
+ unsigned OriginalError = MatchInstruction(Operands, Inst, ErrorInfo,
+ MissingFeatures, MatchingInlineAsm,
+ isParsingIntelSyntax());
+ switch (OriginalError) {
default: llvm_unreachable("Unexpected match result!");
case Match_Success:
if (!MatchingInlineAsm && validateInstruction(Inst, Operands))
@@ -2973,13 +3174,17 @@ bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
Opcode = Inst.getOpcode();
return false;
case Match_MissingFeature:
- return ErrorMissingFeature(IDLoc, ErrorInfo, MatchingInlineAsm);
+ return ErrorMissingFeature(IDLoc, MissingFeatures, MatchingInlineAsm);
case Match_InvalidOperand:
- WasOriginallyInvalidOperand = true;
- break;
case Match_MnemonicFail:
+ case Match_Unsupported:
break;
}
+ if (Op.getToken().empty()) {
+ Error(IDLoc, "instruction must have size higher than 0", EmptyRange,
+ MatchingInlineAsm);
+ return true;
+ }
// FIXME: Ideally, we would only attempt suffix matches for things which are
// valid prefixes, and we could just infer the right unambiguous
@@ -3003,16 +3208,17 @@ bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
// Check for the various suffix matches.
uint64_t ErrorInfoIgnore;
- uint64_t ErrorInfoMissingFeature = 0; // Init suppresses compiler warnings.
+ FeatureBitset ErrorInfoMissingFeatures; // Init suppresses compiler warnings.
unsigned Match[4];
for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I) {
Tmp.back() = Suffixes[I];
Match[I] = MatchInstruction(Operands, Inst, ErrorInfoIgnore,
- MatchingInlineAsm, isParsingIntelSyntax());
+ MissingFeatures, MatchingInlineAsm,
+ isParsingIntelSyntax());
// If this returned as a missing feature failure, remember that.
if (Match[I] == Match_MissingFeature)
- ErrorInfoMissingFeature = ErrorInfoIgnore;
+ ErrorInfoMissingFeatures = MissingFeatures;
}
// Restore the old token.
@@ -3062,11 +3268,15 @@ bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
// If all of the instructions reported an invalid mnemonic, then the original
// mnemonic was invalid.
if (std::count(std::begin(Match), std::end(Match), Match_MnemonicFail) == 4) {
- if (!WasOriginallyInvalidOperand) {
+ if (OriginalError == Match_MnemonicFail)
return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
Op.getLocRange(), MatchingInlineAsm);
- }
+ if (OriginalError == Match_Unsupported)
+ return Error(IDLoc, "unsupported instruction", EmptyRange,
+ MatchingInlineAsm);
+
+ assert(OriginalError == Match_InvalidOperand && "Unexpected error");
// Recover location info for the operand if we know which was the problem.
if (ErrorInfo != ~0ULL) {
if (ErrorInfo >= Operands.size())
@@ -3085,12 +3295,19 @@ bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
MatchingInlineAsm);
}
+ // If one instruction matched as unsupported, report this as unsupported.
+ if (std::count(std::begin(Match), std::end(Match),
+ Match_Unsupported) == 1) {
+ return Error(IDLoc, "unsupported instruction", EmptyRange,
+ MatchingInlineAsm);
+ }
+
// If one instruction matched with a missing feature, report this as a
// missing feature.
if (std::count(std::begin(Match), std::end(Match),
Match_MissingFeature) == 1) {
- ErrorInfo = ErrorInfoMissingFeature;
- return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeature,
+ ErrorInfo = Match_MissingFeature;
+ return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeatures,
MatchingInlineAsm);
}
@@ -3114,18 +3331,23 @@ bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
uint64_t &ErrorInfo,
bool MatchingInlineAsm) {
assert(!Operands.empty() && "Unexpect empty operand list!");
- X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
- assert(Op.isToken() && "Leading operand should always be a mnemonic!");
- StringRef Mnemonic = Op.getToken();
+ assert((*Operands[0]).isToken() && "Leading operand should always be a mnemonic!");
+ StringRef Mnemonic = (static_cast<X86Operand &>(*Operands[0])).getToken();
SMRange EmptyRange = None;
- StringRef Base = Op.getToken();
+ StringRef Base = (static_cast<X86Operand &>(*Operands[0])).getToken();
unsigned Prefixes = getPrefixes(Operands);
// First, handle aliases that expand to multiple instructions.
- MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm);
+ MatchFPUWaitAlias(IDLoc, static_cast<X86Operand &>(*Operands[0]), Operands, Out, MatchingInlineAsm);
+ X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
MCInst Inst;
+ // If VEX3 encoding is forced, we need to pass the USE_VEX3 flag to the
+ // encoder.
+ if (ForcedVEXEncoding == VEXEncoding_VEX3)
+ Prefixes |= X86::IP_USE_VEX3;
+
if (Prefixes)
Inst.setFlags(Prefixes);
@@ -3154,7 +3376,8 @@ bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
}
SmallVector<unsigned, 8> Match;
- uint64_t ErrorInfoMissingFeature = 0;
+ FeatureBitset ErrorInfoMissingFeatures;
+ FeatureBitset MissingFeatures;
// If unsized push has immediate operand we should default the default pointer
// size for the size.
@@ -3174,7 +3397,7 @@ bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
Op.setTokenValue(Tmp);
// Do match in ATT mode to allow explicit suffix usage.
Match.push_back(MatchInstruction(Operands, Inst, ErrorInfo,
- MatchingInlineAsm,
+ MissingFeatures, MatchingInlineAsm,
false /*isParsingIntelSyntax()*/));
Op.setTokenValue(Base);
}
@@ -3191,13 +3414,14 @@ bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
uint64_t ErrorInfoIgnore;
unsigned LastOpcode = Inst.getOpcode();
unsigned M = MatchInstruction(Operands, Inst, ErrorInfoIgnore,
- MatchingInlineAsm, isParsingIntelSyntax());
+ MissingFeatures, MatchingInlineAsm,
+ isParsingIntelSyntax());
if (Match.empty() || LastOpcode != Inst.getOpcode())
Match.push_back(M);
// If this returned as a missing feature failure, remember that.
if (Match.back() == Match_MissingFeature)
- ErrorInfoMissingFeature = ErrorInfoIgnore;
+ ErrorInfoMissingFeatures = MissingFeatures;
}
// Restore the size of the unsized memory operand if we modified it.
@@ -3209,10 +3433,11 @@ bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
// matching with the unsized operand.
if (Match.empty()) {
Match.push_back(MatchInstruction(
- Operands, Inst, ErrorInfo, MatchingInlineAsm, isParsingIntelSyntax()));
+ Operands, Inst, ErrorInfo, MissingFeatures, MatchingInlineAsm,
+ isParsingIntelSyntax()));
// If this returned as a missing feature failure, remember that.
if (Match.back() == Match_MissingFeature)
- ErrorInfoMissingFeature = ErrorInfo;
+ ErrorInfoMissingFeatures = MissingFeatures;
}
// Restore the size of the unsized memory operand if we modified it.
@@ -3234,7 +3459,8 @@ bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
UnsizedMemOp->getMemFrontendSize()) {
UnsizedMemOp->Mem.Size = UnsizedMemOp->getMemFrontendSize();
unsigned M = MatchInstruction(
- Operands, Inst, ErrorInfo, MatchingInlineAsm, isParsingIntelSyntax());
+ Operands, Inst, ErrorInfo, MissingFeatures, MatchingInlineAsm,
+ isParsingIntelSyntax());
if (M == Match_Success)
NumSuccessfulMatches = 1;
@@ -3270,12 +3496,19 @@ bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
UnsizedMemOp->getLocRange());
}
+ // If one instruction matched as unsupported, report this as unsupported.
+ if (std::count(std::begin(Match), std::end(Match),
+ Match_Unsupported) == 1) {
+ return Error(IDLoc, "unsupported instruction", EmptyRange,
+ MatchingInlineAsm);
+ }
+
// If one instruction matched with a missing feature, report this as a
// missing feature.
if (std::count(std::begin(Match), std::end(Match),
Match_MissingFeature) == 1) {
- ErrorInfo = ErrorInfoMissingFeature;
- return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeature,
+ ErrorInfo = Match_MissingFeature;
+ return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeatures,
MatchingInlineAsm);
}
diff --git a/lib/Target/X86/AsmParser/X86AsmParserCommon.h b/lib/Target/X86/AsmParser/X86AsmParserCommon.h
index c45a3f14ef11..5bc979d1f18c 100644
--- a/lib/Target/X86/AsmParser/X86AsmParserCommon.h
+++ b/lib/Target/X86/AsmParser/X86AsmParserCommon.h
@@ -1,9 +1,8 @@
//===-- X86AsmParserCommon.h - Common functions for X86AsmParser ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/AsmParser/X86Operand.h b/lib/Target/X86/AsmParser/X86Operand.h
index 4d4aae0a1c6a..a771ba366318 100644
--- a/lib/Target/X86/AsmParser/X86Operand.h
+++ b/lib/Target/X86/AsmParser/X86Operand.h
@@ -1,16 +1,15 @@
//===- X86Operand.h - Parsed X86 machine instruction ------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIB_TARGET_X86_ASMPARSER_X86OPERAND_H
#define LLVM_LIB_TARGET_X86_ASMPARSER_X86OPERAND_H
-#include "InstPrinter/X86IntelInstPrinter.h"
+#include "MCTargetDesc/X86IntelInstPrinter.h"
#include "MCTargetDesc/X86MCTargetDesc.h"
#include "X86AsmParserCommon.h"
#include "llvm/ADT/STLExtras.h"
@@ -452,6 +451,31 @@ struct X86Operand final : public MCParsedAsmOperand {
X86MCRegisterClasses[X86::GR64RegClassID].contains(getReg()));
}
+ bool isVK1Pair() const {
+ return Kind == Register &&
+ X86MCRegisterClasses[X86::VK1RegClassID].contains(getReg());
+ }
+
+ bool isVK2Pair() const {
+ return Kind == Register &&
+ X86MCRegisterClasses[X86::VK2RegClassID].contains(getReg());
+ }
+
+ bool isVK4Pair() const {
+ return Kind == Register &&
+ X86MCRegisterClasses[X86::VK4RegClassID].contains(getReg());
+ }
+
+ bool isVK8Pair() const {
+ return Kind == Register &&
+ X86MCRegisterClasses[X86::VK8RegClassID].contains(getReg());
+ }
+
+ bool isVK16Pair() const {
+ return Kind == Register &&
+ X86MCRegisterClasses[X86::VK16RegClassID].contains(getReg());
+ }
+
void addExpr(MCInst &Inst, const MCExpr *Expr) const {
// Add as immediates when possible.
if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
@@ -483,6 +507,30 @@ struct X86Operand final : public MCParsedAsmOperand {
addExpr(Inst, getImm());
}
+ void addMaskPairOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ unsigned Reg = getReg();
+ switch (Reg) {
+ case X86::K0:
+ case X86::K1:
+ Reg = X86::K0_K1;
+ break;
+ case X86::K2:
+ case X86::K3:
+ Reg = X86::K2_K3;
+ break;
+ case X86::K4:
+ case X86::K5:
+ Reg = X86::K4_K5;
+ break;
+ case X86::K6:
+ case X86::K7:
+ Reg = X86::K6_K7;
+ break;
+ }
+ Inst.addOperand(MCOperand::createReg(Reg));
+ }
+
void addMemOperands(MCInst &Inst, unsigned N) const {
assert((N == 5) && "Invalid number of operands!");
Inst.addOperand(MCOperand::createReg(getMemBaseReg()));
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp
index 62312777318e..9a635bbe5f85 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -1,9 +1,8 @@
//===-- X86Disassembler.cpp - Disassembler for x86 and x86_64 -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -76,6 +75,7 @@
#include "MCTargetDesc/X86BaseInfo.h"
#include "MCTargetDesc/X86MCTargetDesc.h"
+#include "TargetInfo/X86TargetInfo.h"
#include "X86DisassemblerDecoder.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
@@ -446,211 +446,6 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate,
case ENCODING_IO:
break;
}
- } else if (type == TYPE_IMM3) {
- // Check for immediates that printSSECC can't handle.
- if (immediate >= 8) {
- unsigned NewOpc;
- switch (mcInst.getOpcode()) {
- default: llvm_unreachable("unexpected opcode");
- case X86::CMPPDrmi: NewOpc = X86::CMPPDrmi_alt; break;
- case X86::CMPPDrri: NewOpc = X86::CMPPDrri_alt; break;
- case X86::CMPPSrmi: NewOpc = X86::CMPPSrmi_alt; break;
- case X86::CMPPSrri: NewOpc = X86::CMPPSrri_alt; break;
- case X86::CMPSDrm: NewOpc = X86::CMPSDrm_alt; break;
- case X86::CMPSDrr: NewOpc = X86::CMPSDrr_alt; break;
- case X86::CMPSSrm: NewOpc = X86::CMPSSrm_alt; break;
- case X86::CMPSSrr: NewOpc = X86::CMPSSrr_alt; break;
- case X86::VPCOMBri: NewOpc = X86::VPCOMBri_alt; break;
- case X86::VPCOMBmi: NewOpc = X86::VPCOMBmi_alt; break;
- case X86::VPCOMWri: NewOpc = X86::VPCOMWri_alt; break;
- case X86::VPCOMWmi: NewOpc = X86::VPCOMWmi_alt; break;
- case X86::VPCOMDri: NewOpc = X86::VPCOMDri_alt; break;
- case X86::VPCOMDmi: NewOpc = X86::VPCOMDmi_alt; break;
- case X86::VPCOMQri: NewOpc = X86::VPCOMQri_alt; break;
- case X86::VPCOMQmi: NewOpc = X86::VPCOMQmi_alt; break;
- case X86::VPCOMUBri: NewOpc = X86::VPCOMUBri_alt; break;
- case X86::VPCOMUBmi: NewOpc = X86::VPCOMUBmi_alt; break;
- case X86::VPCOMUWri: NewOpc = X86::VPCOMUWri_alt; break;
- case X86::VPCOMUWmi: NewOpc = X86::VPCOMUWmi_alt; break;
- case X86::VPCOMUDri: NewOpc = X86::VPCOMUDri_alt; break;
- case X86::VPCOMUDmi: NewOpc = X86::VPCOMUDmi_alt; break;
- case X86::VPCOMUQri: NewOpc = X86::VPCOMUQri_alt; break;
- case X86::VPCOMUQmi: NewOpc = X86::VPCOMUQmi_alt; break;
- }
- // Switch opcode to the one that doesn't get special printing.
- mcInst.setOpcode(NewOpc);
- }
- } else if (type == TYPE_IMM5) {
- // Check for immediates that printAVXCC can't handle.
- if (immediate >= 32) {
- unsigned NewOpc;
- switch (mcInst.getOpcode()) {
- default: llvm_unreachable("unexpected opcode");
- case X86::VCMPPDrmi: NewOpc = X86::VCMPPDrmi_alt; break;
- case X86::VCMPPDrri: NewOpc = X86::VCMPPDrri_alt; break;
- case X86::VCMPPSrmi: NewOpc = X86::VCMPPSrmi_alt; break;
- case X86::VCMPPSrri: NewOpc = X86::VCMPPSrri_alt; break;
- case X86::VCMPSDrm: NewOpc = X86::VCMPSDrm_alt; break;
- case X86::VCMPSDrr: NewOpc = X86::VCMPSDrr_alt; break;
- case X86::VCMPSSrm: NewOpc = X86::VCMPSSrm_alt; break;
- case X86::VCMPSSrr: NewOpc = X86::VCMPSSrr_alt; break;
- case X86::VCMPPDYrmi: NewOpc = X86::VCMPPDYrmi_alt; break;
- case X86::VCMPPDYrri: NewOpc = X86::VCMPPDYrri_alt; break;
- case X86::VCMPPSYrmi: NewOpc = X86::VCMPPSYrmi_alt; break;
- case X86::VCMPPSYrri: NewOpc = X86::VCMPPSYrri_alt; break;
- case X86::VCMPPDZrmi: NewOpc = X86::VCMPPDZrmi_alt; break;
- case X86::VCMPPDZrri: NewOpc = X86::VCMPPDZrri_alt; break;
- case X86::VCMPPDZrrib: NewOpc = X86::VCMPPDZrrib_alt; break;
- case X86::VCMPPSZrmi: NewOpc = X86::VCMPPSZrmi_alt; break;
- case X86::VCMPPSZrri: NewOpc = X86::VCMPPSZrri_alt; break;
- case X86::VCMPPSZrrib: NewOpc = X86::VCMPPSZrrib_alt; break;
- case X86::VCMPPDZ128rmi: NewOpc = X86::VCMPPDZ128rmi_alt; break;
- case X86::VCMPPDZ128rri: NewOpc = X86::VCMPPDZ128rri_alt; break;
- case X86::VCMPPSZ128rmi: NewOpc = X86::VCMPPSZ128rmi_alt; break;
- case X86::VCMPPSZ128rri: NewOpc = X86::VCMPPSZ128rri_alt; break;
- case X86::VCMPPDZ256rmi: NewOpc = X86::VCMPPDZ256rmi_alt; break;
- case X86::VCMPPDZ256rri: NewOpc = X86::VCMPPDZ256rri_alt; break;
- case X86::VCMPPSZ256rmi: NewOpc = X86::VCMPPSZ256rmi_alt; break;
- case X86::VCMPPSZ256rri: NewOpc = X86::VCMPPSZ256rri_alt; break;
- case X86::VCMPSDZrm_Int: NewOpc = X86::VCMPSDZrmi_alt; break;
- case X86::VCMPSDZrr_Int: NewOpc = X86::VCMPSDZrri_alt; break;
- case X86::VCMPSDZrrb_Int: NewOpc = X86::VCMPSDZrrb_alt; break;
- case X86::VCMPSSZrm_Int: NewOpc = X86::VCMPSSZrmi_alt; break;
- case X86::VCMPSSZrr_Int: NewOpc = X86::VCMPSSZrri_alt; break;
- case X86::VCMPSSZrrb_Int: NewOpc = X86::VCMPSSZrrb_alt; break;
- }
- // Switch opcode to the one that doesn't get special printing.
- mcInst.setOpcode(NewOpc);
- }
- } else if (type == TYPE_AVX512ICC) {
- if (immediate >= 8 || ((immediate & 0x3) == 3)) {
- unsigned NewOpc;
- switch (mcInst.getOpcode()) {
- default: llvm_unreachable("unexpected opcode");
- case X86::VPCMPBZ128rmi: NewOpc = X86::VPCMPBZ128rmi_alt; break;
- case X86::VPCMPBZ128rmik: NewOpc = X86::VPCMPBZ128rmik_alt; break;
- case X86::VPCMPBZ128rri: NewOpc = X86::VPCMPBZ128rri_alt; break;
- case X86::VPCMPBZ128rrik: NewOpc = X86::VPCMPBZ128rrik_alt; break;
- case X86::VPCMPBZ256rmi: NewOpc = X86::VPCMPBZ256rmi_alt; break;
- case X86::VPCMPBZ256rmik: NewOpc = X86::VPCMPBZ256rmik_alt; break;
- case X86::VPCMPBZ256rri: NewOpc = X86::VPCMPBZ256rri_alt; break;
- case X86::VPCMPBZ256rrik: NewOpc = X86::VPCMPBZ256rrik_alt; break;
- case X86::VPCMPBZrmi: NewOpc = X86::VPCMPBZrmi_alt; break;
- case X86::VPCMPBZrmik: NewOpc = X86::VPCMPBZrmik_alt; break;
- case X86::VPCMPBZrri: NewOpc = X86::VPCMPBZrri_alt; break;
- case X86::VPCMPBZrrik: NewOpc = X86::VPCMPBZrrik_alt; break;
- case X86::VPCMPDZ128rmi: NewOpc = X86::VPCMPDZ128rmi_alt; break;
- case X86::VPCMPDZ128rmib: NewOpc = X86::VPCMPDZ128rmib_alt; break;
- case X86::VPCMPDZ128rmibk: NewOpc = X86::VPCMPDZ128rmibk_alt; break;
- case X86::VPCMPDZ128rmik: NewOpc = X86::VPCMPDZ128rmik_alt; break;
- case X86::VPCMPDZ128rri: NewOpc = X86::VPCMPDZ128rri_alt; break;
- case X86::VPCMPDZ128rrik: NewOpc = X86::VPCMPDZ128rrik_alt; break;
- case X86::VPCMPDZ256rmi: NewOpc = X86::VPCMPDZ256rmi_alt; break;
- case X86::VPCMPDZ256rmib: NewOpc = X86::VPCMPDZ256rmib_alt; break;
- case X86::VPCMPDZ256rmibk: NewOpc = X86::VPCMPDZ256rmibk_alt; break;
- case X86::VPCMPDZ256rmik: NewOpc = X86::VPCMPDZ256rmik_alt; break;
- case X86::VPCMPDZ256rri: NewOpc = X86::VPCMPDZ256rri_alt; break;
- case X86::VPCMPDZ256rrik: NewOpc = X86::VPCMPDZ256rrik_alt; break;
- case X86::VPCMPDZrmi: NewOpc = X86::VPCMPDZrmi_alt; break;
- case X86::VPCMPDZrmib: NewOpc = X86::VPCMPDZrmib_alt; break;
- case X86::VPCMPDZrmibk: NewOpc = X86::VPCMPDZrmibk_alt; break;
- case X86::VPCMPDZrmik: NewOpc = X86::VPCMPDZrmik_alt; break;
- case X86::VPCMPDZrri: NewOpc = X86::VPCMPDZrri_alt; break;
- case X86::VPCMPDZrrik: NewOpc = X86::VPCMPDZrrik_alt; break;
- case X86::VPCMPQZ128rmi: NewOpc = X86::VPCMPQZ128rmi_alt; break;
- case X86::VPCMPQZ128rmib: NewOpc = X86::VPCMPQZ128rmib_alt; break;
- case X86::VPCMPQZ128rmibk: NewOpc = X86::VPCMPQZ128rmibk_alt; break;
- case X86::VPCMPQZ128rmik: NewOpc = X86::VPCMPQZ128rmik_alt; break;
- case X86::VPCMPQZ128rri: NewOpc = X86::VPCMPQZ128rri_alt; break;
- case X86::VPCMPQZ128rrik: NewOpc = X86::VPCMPQZ128rrik_alt; break;
- case X86::VPCMPQZ256rmi: NewOpc = X86::VPCMPQZ256rmi_alt; break;
- case X86::VPCMPQZ256rmib: NewOpc = X86::VPCMPQZ256rmib_alt; break;
- case X86::VPCMPQZ256rmibk: NewOpc = X86::VPCMPQZ256rmibk_alt; break;
- case X86::VPCMPQZ256rmik: NewOpc = X86::VPCMPQZ256rmik_alt; break;
- case X86::VPCMPQZ256rri: NewOpc = X86::VPCMPQZ256rri_alt; break;
- case X86::VPCMPQZ256rrik: NewOpc = X86::VPCMPQZ256rrik_alt; break;
- case X86::VPCMPQZrmi: NewOpc = X86::VPCMPQZrmi_alt; break;
- case X86::VPCMPQZrmib: NewOpc = X86::VPCMPQZrmib_alt; break;
- case X86::VPCMPQZrmibk: NewOpc = X86::VPCMPQZrmibk_alt; break;
- case X86::VPCMPQZrmik: NewOpc = X86::VPCMPQZrmik_alt; break;
- case X86::VPCMPQZrri: NewOpc = X86::VPCMPQZrri_alt; break;
- case X86::VPCMPQZrrik: NewOpc = X86::VPCMPQZrrik_alt; break;
- case X86::VPCMPUBZ128rmi: NewOpc = X86::VPCMPUBZ128rmi_alt; break;
- case X86::VPCMPUBZ128rmik: NewOpc = X86::VPCMPUBZ128rmik_alt; break;
- case X86::VPCMPUBZ128rri: NewOpc = X86::VPCMPUBZ128rri_alt; break;
- case X86::VPCMPUBZ128rrik: NewOpc = X86::VPCMPUBZ128rrik_alt; break;
- case X86::VPCMPUBZ256rmi: NewOpc = X86::VPCMPUBZ256rmi_alt; break;
- case X86::VPCMPUBZ256rmik: NewOpc = X86::VPCMPUBZ256rmik_alt; break;
- case X86::VPCMPUBZ256rri: NewOpc = X86::VPCMPUBZ256rri_alt; break;
- case X86::VPCMPUBZ256rrik: NewOpc = X86::VPCMPUBZ256rrik_alt; break;
- case X86::VPCMPUBZrmi: NewOpc = X86::VPCMPUBZrmi_alt; break;
- case X86::VPCMPUBZrmik: NewOpc = X86::VPCMPUBZrmik_alt; break;
- case X86::VPCMPUBZrri: NewOpc = X86::VPCMPUBZrri_alt; break;
- case X86::VPCMPUBZrrik: NewOpc = X86::VPCMPUBZrrik_alt; break;
- case X86::VPCMPUDZ128rmi: NewOpc = X86::VPCMPUDZ128rmi_alt; break;
- case X86::VPCMPUDZ128rmib: NewOpc = X86::VPCMPUDZ128rmib_alt; break;
- case X86::VPCMPUDZ128rmibk: NewOpc = X86::VPCMPUDZ128rmibk_alt; break;
- case X86::VPCMPUDZ128rmik: NewOpc = X86::VPCMPUDZ128rmik_alt; break;
- case X86::VPCMPUDZ128rri: NewOpc = X86::VPCMPUDZ128rri_alt; break;
- case X86::VPCMPUDZ128rrik: NewOpc = X86::VPCMPUDZ128rrik_alt; break;
- case X86::VPCMPUDZ256rmi: NewOpc = X86::VPCMPUDZ256rmi_alt; break;
- case X86::VPCMPUDZ256rmib: NewOpc = X86::VPCMPUDZ256rmib_alt; break;
- case X86::VPCMPUDZ256rmibk: NewOpc = X86::VPCMPUDZ256rmibk_alt; break;
- case X86::VPCMPUDZ256rmik: NewOpc = X86::VPCMPUDZ256rmik_alt; break;
- case X86::VPCMPUDZ256rri: NewOpc = X86::VPCMPUDZ256rri_alt; break;
- case X86::VPCMPUDZ256rrik: NewOpc = X86::VPCMPUDZ256rrik_alt; break;
- case X86::VPCMPUDZrmi: NewOpc = X86::VPCMPUDZrmi_alt; break;
- case X86::VPCMPUDZrmib: NewOpc = X86::VPCMPUDZrmib_alt; break;
- case X86::VPCMPUDZrmibk: NewOpc = X86::VPCMPUDZrmibk_alt; break;
- case X86::VPCMPUDZrmik: NewOpc = X86::VPCMPUDZrmik_alt; break;
- case X86::VPCMPUDZrri: NewOpc = X86::VPCMPUDZrri_alt; break;
- case X86::VPCMPUDZrrik: NewOpc = X86::VPCMPUDZrrik_alt; break;
- case X86::VPCMPUQZ128rmi: NewOpc = X86::VPCMPUQZ128rmi_alt; break;
- case X86::VPCMPUQZ128rmib: NewOpc = X86::VPCMPUQZ128rmib_alt; break;
- case X86::VPCMPUQZ128rmibk: NewOpc = X86::VPCMPUQZ128rmibk_alt; break;
- case X86::VPCMPUQZ128rmik: NewOpc = X86::VPCMPUQZ128rmik_alt; break;
- case X86::VPCMPUQZ128rri: NewOpc = X86::VPCMPUQZ128rri_alt; break;
- case X86::VPCMPUQZ128rrik: NewOpc = X86::VPCMPUQZ128rrik_alt; break;
- case X86::VPCMPUQZ256rmi: NewOpc = X86::VPCMPUQZ256rmi_alt; break;
- case X86::VPCMPUQZ256rmib: NewOpc = X86::VPCMPUQZ256rmib_alt; break;
- case X86::VPCMPUQZ256rmibk: NewOpc = X86::VPCMPUQZ256rmibk_alt; break;
- case X86::VPCMPUQZ256rmik: NewOpc = X86::VPCMPUQZ256rmik_alt; break;
- case X86::VPCMPUQZ256rri: NewOpc = X86::VPCMPUQZ256rri_alt; break;
- case X86::VPCMPUQZ256rrik: NewOpc = X86::VPCMPUQZ256rrik_alt; break;
- case X86::VPCMPUQZrmi: NewOpc = X86::VPCMPUQZrmi_alt; break;
- case X86::VPCMPUQZrmib: NewOpc = X86::VPCMPUQZrmib_alt; break;
- case X86::VPCMPUQZrmibk: NewOpc = X86::VPCMPUQZrmibk_alt; break;
- case X86::VPCMPUQZrmik: NewOpc = X86::VPCMPUQZrmik_alt; break;
- case X86::VPCMPUQZrri: NewOpc = X86::VPCMPUQZrri_alt; break;
- case X86::VPCMPUQZrrik: NewOpc = X86::VPCMPUQZrrik_alt; break;
- case X86::VPCMPUWZ128rmi: NewOpc = X86::VPCMPUWZ128rmi_alt; break;
- case X86::VPCMPUWZ128rmik: NewOpc = X86::VPCMPUWZ128rmik_alt; break;
- case X86::VPCMPUWZ128rri: NewOpc = X86::VPCMPUWZ128rri_alt; break;
- case X86::VPCMPUWZ128rrik: NewOpc = X86::VPCMPUWZ128rrik_alt; break;
- case X86::VPCMPUWZ256rmi: NewOpc = X86::VPCMPUWZ256rmi_alt; break;
- case X86::VPCMPUWZ256rmik: NewOpc = X86::VPCMPUWZ256rmik_alt; break;
- case X86::VPCMPUWZ256rri: NewOpc = X86::VPCMPUWZ256rri_alt; break;
- case X86::VPCMPUWZ256rrik: NewOpc = X86::VPCMPUWZ256rrik_alt; break;
- case X86::VPCMPUWZrmi: NewOpc = X86::VPCMPUWZrmi_alt; break;
- case X86::VPCMPUWZrmik: NewOpc = X86::VPCMPUWZrmik_alt; break;
- case X86::VPCMPUWZrri: NewOpc = X86::VPCMPUWZrri_alt; break;
- case X86::VPCMPUWZrrik: NewOpc = X86::VPCMPUWZrrik_alt; break;
- case X86::VPCMPWZ128rmi: NewOpc = X86::VPCMPWZ128rmi_alt; break;
- case X86::VPCMPWZ128rmik: NewOpc = X86::VPCMPWZ128rmik_alt; break;
- case X86::VPCMPWZ128rri: NewOpc = X86::VPCMPWZ128rri_alt; break;
- case X86::VPCMPWZ128rrik: NewOpc = X86::VPCMPWZ128rrik_alt; break;
- case X86::VPCMPWZ256rmi: NewOpc = X86::VPCMPWZ256rmi_alt; break;
- case X86::VPCMPWZ256rmik: NewOpc = X86::VPCMPWZ256rmik_alt; break;
- case X86::VPCMPWZ256rri: NewOpc = X86::VPCMPWZ256rri_alt; break;
- case X86::VPCMPWZ256rrik: NewOpc = X86::VPCMPWZ256rrik_alt; break;
- case X86::VPCMPWZrmi: NewOpc = X86::VPCMPWZrmi_alt; break;
- case X86::VPCMPWZrmik: NewOpc = X86::VPCMPWZrmik_alt; break;
- case X86::VPCMPWZrri: NewOpc = X86::VPCMPWZrri_alt; break;
- case X86::VPCMPWZrrik: NewOpc = X86::VPCMPWZrrik_alt; break;
- }
- // Switch opcode to the one that doesn't get special printing.
- mcInst.setOpcode(NewOpc);
- }
}
switch (type) {
@@ -899,6 +694,7 @@ static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
case TYPE_XMM:
case TYPE_YMM:
case TYPE_ZMM:
+ case TYPE_VK_PAIR:
case TYPE_VK:
case TYPE_DEBUGREG:
case TYPE_CONTROLREG:
@@ -987,6 +783,9 @@ static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,
case ENCODING_Rv:
translateRegister(mcInst, insn.opcodeRegister);
return false;
+ case ENCODING_CC:
+ mcInst.addOperand(MCOperand::createImm(insn.immediates[1]));
+ return false;
case ENCODING_FP:
translateFPRegister(mcInst, insn.modRM & 7);
return false;
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
index 54d550b60652..a241362a271d 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
@@ -1,9 +1,8 @@
//===-- X86DisassemblerDecoder.cpp - Disassembler decoder -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -377,8 +376,7 @@ static int readPrefixes(struct InternalInstruction* insn) {
if (byte == 0xf3 && (nextByte == 0x88 || nextByte == 0x89 ||
nextByte == 0xc6 || nextByte == 0xc7)) {
insn->xAcquireRelease = true;
- if (nextByte != 0x90) // PAUSE instruction support
- break;
+ break;
}
if (isREX(insn, nextByte)) {
uint8_t nnextByte;
@@ -884,7 +882,7 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) {
if (aaaFromEVEX4of4(insn->vectorExtensionPrefix[3]))
attrMask |= ATTR_EVEXK;
if (lFromEVEX4of4(insn->vectorExtensionPrefix[3]))
- attrMask |= ATTR_EVEXL;
+ attrMask |= ATTR_VEXL;
if (l2FromEVEX4of4(insn->vectorExtensionPrefix[3]))
attrMask |= ATTR_EVEXL2;
} else if (insn->vectorExtensionType == TYPE_VEX_3B) {
@@ -1470,6 +1468,10 @@ static int readModRM(struct InternalInstruction* insn) {
if (index > 7) \
*valid = 0; \
return prefix##_K0 + index; \
+ case TYPE_VK_PAIR: \
+ if (index > 7) \
+ *valid = 0; \
+ return prefix##_K0_K1 + (index / 2); \
case TYPE_MM64: \
return prefix##_MM0 + (index & 0x7); \
case TYPE_SEGMENTREG: \
@@ -1847,6 +1849,9 @@ static int readOperands(struct InternalInstruction* insn) {
if (readOpcodeRegister(insn, 0))
return -1;
break;
+ case ENCODING_CC:
+ insn->immediates[1] = insn->opcode & 0xf;
+ break;
case ENCODING_FP:
break;
case ENCODING_VVVV:
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
index 3b8a4f732eed..7c0a42c019e3 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
@@ -1,9 +1,8 @@
//===-- X86DisassemblerDecoderInternal.h - Disassembler decoder -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -325,6 +324,12 @@ namespace X86Disassembler {
ENTRY(K6) \
ENTRY(K7)
+#define REGS_MASK_PAIRS \
+ ENTRY(K0_K1) \
+ ENTRY(K2_K3) \
+ ENTRY(K4_K5) \
+ ENTRY(K6_K7)
+
#define REGS_SEGMENT \
ENTRY(ES) \
ENTRY(CS) \
@@ -394,6 +399,7 @@ namespace X86Disassembler {
REGS_YMM \
REGS_ZMM \
REGS_MASKS \
+ REGS_MASK_PAIRS \
REGS_SEGMENT \
REGS_DEBUG \
REGS_CONTROL \
diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
deleted file mode 100644
index 0e861d5ddbc9..000000000000
--- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
+++ /dev/null
@@ -1,202 +0,0 @@
-//===-- X86ATTInstPrinter.cpp - AT&T assembly instruction printing --------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file includes code for rendering MCInst instances as AT&T-style
-// assembly.
-//
-//===----------------------------------------------------------------------===//
-
-#include "X86ATTInstPrinter.h"
-#include "MCTargetDesc/X86BaseInfo.h"
-#include "X86InstComments.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/raw_ostream.h"
-#include <cassert>
-#include <cinttypes>
-#include <cstdint>
-
-using namespace llvm;
-
-#define DEBUG_TYPE "asm-printer"
-
-// Include the auto-generated portion of the assembly writer.
-#define PRINT_ALIAS_INSTR
-#include "X86GenAsmWriter.inc"
-
-void X86ATTInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
- OS << markup("<reg:") << '%' << getRegisterName(RegNo) << markup(">");
-}
-
-void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
- StringRef Annot, const MCSubtargetInfo &STI) {
- // If verbose assembly is enabled, we can print some informative comments.
- if (CommentStream)
- HasCustomInstComment = EmitAnyX86InstComments(MI, *CommentStream, MII);
-
- printInstFlags(MI, OS);
-
- // Output CALLpcrel32 as "callq" in 64-bit mode.
- // In Intel annotation it's always emitted as "call".
- //
- // TODO: Probably this hack should be redesigned via InstAlias in
- // InstrInfo.td as soon as Requires clause is supported properly
- // for InstAlias.
- if (MI->getOpcode() == X86::CALLpcrel32 &&
- (STI.getFeatureBits()[X86::Mode64Bit])) {
- OS << "\tcallq\t";
- printPCRelImm(MI, 0, OS);
- }
- // data16 and data32 both have the same encoding of 0x66. While data32 is
- // valid only in 16 bit systems, data16 is valid in the rest.
- // There seems to be some lack of support of the Requires clause that causes
- // 0x66 to be interpreted as "data16" by the asm printer.
- // Thus we add an adjustment here in order to print the "right" instruction.
- else if (MI->getOpcode() == X86::DATA16_PREFIX &&
- STI.getFeatureBits()[X86::Mode16Bit]) {
- OS << "\tdata32";
- }
- // Try to print any aliases first.
- else if (!printAliasInstr(MI, OS))
- printInstruction(MI, OS);
-
- // Next always print the annotation.
- printAnnotation(OS, Annot);
-}
-
-void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- const MCOperand &Op = MI->getOperand(OpNo);
- if (Op.isReg()) {
- printRegName(O, Op.getReg());
- } else if (Op.isImm()) {
- // Print immediates as signed values.
- int64_t Imm = Op.getImm();
- O << markup("<imm:") << '$' << formatImm(Imm) << markup(">");
-
- // TODO: This should be in a helper function in the base class, so it can
- // be used by other printers.
-
- // If there are no instruction-specific comments, add a comment clarifying
- // the hex value of the immediate operand when it isn't in the range
- // [-256,255].
- if (CommentStream && !HasCustomInstComment && (Imm > 255 || Imm < -256)) {
- // Don't print unnecessary hex sign bits.
- if (Imm == (int16_t)(Imm))
- *CommentStream << format("imm = 0x%" PRIX16 "\n", (uint16_t)Imm);
- else if (Imm == (int32_t)(Imm))
- *CommentStream << format("imm = 0x%" PRIX32 "\n", (uint32_t)Imm);
- else
- *CommentStream << format("imm = 0x%" PRIX64 "\n", (uint64_t)Imm);
- }
- } else {
- assert(Op.isExpr() && "unknown operand kind in printOperand");
- O << markup("<imm:") << '$';
- Op.getExpr()->print(O, &MAI);
- O << markup(">");
- }
-}
-
-void X86ATTInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
- raw_ostream &O) {
- const MCOperand &BaseReg = MI->getOperand(Op + X86::AddrBaseReg);
- const MCOperand &IndexReg = MI->getOperand(Op + X86::AddrIndexReg);
- const MCOperand &DispSpec = MI->getOperand(Op + X86::AddrDisp);
-
- O << markup("<mem:");
-
- // If this has a segment register, print it.
- printOptionalSegReg(MI, Op + X86::AddrSegmentReg, O);
-
- if (DispSpec.isImm()) {
- int64_t DispVal = DispSpec.getImm();
- if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg()))
- O << formatImm(DispVal);
- } else {
- assert(DispSpec.isExpr() && "non-immediate displacement for LEA?");
- DispSpec.getExpr()->print(O, &MAI);
- }
-
- if (IndexReg.getReg() || BaseReg.getReg()) {
- O << '(';
- if (BaseReg.getReg())
- printOperand(MI, Op + X86::AddrBaseReg, O);
-
- if (IndexReg.getReg()) {
- O << ',';
- printOperand(MI, Op + X86::AddrIndexReg, O);
- unsigned ScaleVal = MI->getOperand(Op + X86::AddrScaleAmt).getImm();
- if (ScaleVal != 1) {
- O << ',' << markup("<imm:") << ScaleVal // never printed in hex.
- << markup(">");
- }
- }
- O << ')';
- }
-
- O << markup(">");
-}
-
-void X86ATTInstPrinter::printSrcIdx(const MCInst *MI, unsigned Op,
- raw_ostream &O) {
- O << markup("<mem:");
-
- // If this has a segment register, print it.
- printOptionalSegReg(MI, Op + 1, O);
-
- O << "(";
- printOperand(MI, Op, O);
- O << ")";
-
- O << markup(">");
-}
-
-void X86ATTInstPrinter::printDstIdx(const MCInst *MI, unsigned Op,
- raw_ostream &O) {
- O << markup("<mem:");
-
- O << "%es:(";
- printOperand(MI, Op, O);
- O << ")";
-
- O << markup(">");
-}
-
-void X86ATTInstPrinter::printMemOffset(const MCInst *MI, unsigned Op,
- raw_ostream &O) {
- const MCOperand &DispSpec = MI->getOperand(Op);
-
- O << markup("<mem:");
-
- // If this has a segment register, print it.
- printOptionalSegReg(MI, Op + 1, O);
-
- if (DispSpec.isImm()) {
- O << formatImm(DispSpec.getImm());
- } else {
- assert(DispSpec.isExpr() && "non-immediate displacement?");
- DispSpec.getExpr()->print(O, &MAI);
- }
-
- O << markup(">");
-}
-
-void X86ATTInstPrinter::printU8Imm(const MCInst *MI, unsigned Op,
- raw_ostream &O) {
- if (MI->getOperand(Op).isExpr())
- return printOperand(MI, Op, O);
-
- O << markup("<imm:") << '$' << formatImm(MI->getOperand(Op).getImm() & 0xff)
- << markup(">");
-}
diff --git a/lib/Target/X86/InstPrinter/X86InstPrinterCommon.cpp b/lib/Target/X86/InstPrinter/X86InstPrinterCommon.cpp
deleted file mode 100644
index 432cd47ae499..000000000000
--- a/lib/Target/X86/InstPrinter/X86InstPrinterCommon.cpp
+++ /dev/null
@@ -1,142 +0,0 @@
-//===--- X86InstPrinterCommon.cpp - X86 assembly instruction printing -----===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file includes common code for rendering MCInst instances as Intel-style
-// and Intel-style assembly.
-//
-//===----------------------------------------------------------------------===//
-
-#include "X86InstPrinterCommon.h"
-#include "MCTargetDesc/X86BaseInfo.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCInstrDesc.h"
-#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/Casting.h"
-#include <cstdint>
-#include <cassert>
-
-using namespace llvm;
-
-void X86InstPrinterCommon::printSSEAVXCC(const MCInst *MI, unsigned Op,
- raw_ostream &O) {
- int64_t Imm = MI->getOperand(Op).getImm();
- switch (Imm) {
- default: llvm_unreachable("Invalid ssecc/avxcc argument!");
- case 0: O << "eq"; break;
- case 1: O << "lt"; break;
- case 2: O << "le"; break;
- case 3: O << "unord"; break;
- case 4: O << "neq"; break;
- case 5: O << "nlt"; break;
- case 6: O << "nle"; break;
- case 7: O << "ord"; break;
- case 8: O << "eq_uq"; break;
- case 9: O << "nge"; break;
- case 0xa: O << "ngt"; break;
- case 0xb: O << "false"; break;
- case 0xc: O << "neq_oq"; break;
- case 0xd: O << "ge"; break;
- case 0xe: O << "gt"; break;
- case 0xf: O << "true"; break;
- case 0x10: O << "eq_os"; break;
- case 0x11: O << "lt_oq"; break;
- case 0x12: O << "le_oq"; break;
- case 0x13: O << "unord_s"; break;
- case 0x14: O << "neq_us"; break;
- case 0x15: O << "nlt_uq"; break;
- case 0x16: O << "nle_uq"; break;
- case 0x17: O << "ord_s"; break;
- case 0x18: O << "eq_us"; break;
- case 0x19: O << "nge_uq"; break;
- case 0x1a: O << "ngt_uq"; break;
- case 0x1b: O << "false_os"; break;
- case 0x1c: O << "neq_os"; break;
- case 0x1d: O << "ge_oq"; break;
- case 0x1e: O << "gt_oq"; break;
- case 0x1f: O << "true_us"; break;
- }
-}
-
-void X86InstPrinterCommon::printXOPCC(const MCInst *MI, unsigned Op,
- raw_ostream &O) {
- int64_t Imm = MI->getOperand(Op).getImm();
- switch (Imm) {
- default: llvm_unreachable("Invalid xopcc argument!");
- case 0: O << "lt"; break;
- case 1: O << "le"; break;
- case 2: O << "gt"; break;
- case 3: O << "ge"; break;
- case 4: O << "eq"; break;
- case 5: O << "neq"; break;
- case 6: O << "false"; break;
- case 7: O << "true"; break;
- }
-}
-
-void X86InstPrinterCommon::printRoundingControl(const MCInst *MI, unsigned Op,
- raw_ostream &O) {
- int64_t Imm = MI->getOperand(Op).getImm() & 0x3;
- switch (Imm) {
- case 0: O << "{rn-sae}"; break;
- case 1: O << "{rd-sae}"; break;
- case 2: O << "{ru-sae}"; break;
- case 3: O << "{rz-sae}"; break;
- }
-}
-
-/// printPCRelImm - This is used to print an immediate value that ends up
-/// being encoded as a pc-relative value (e.g. for jumps and calls). In
-/// Intel-style these print slightly differently than normal immediates.
-/// for example, a $ is not emitted.
-void X86InstPrinterCommon::printPCRelImm(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- const MCOperand &Op = MI->getOperand(OpNo);
- if (Op.isImm())
- O << formatImm(Op.getImm());
- else {
- assert(Op.isExpr() && "unknown pcrel immediate operand");
- // If a symbolic branch target was added as a constant expression then print
- // that address in hex.
- const MCConstantExpr *BranchTarget = dyn_cast<MCConstantExpr>(Op.getExpr());
- int64_t Address;
- if (BranchTarget && BranchTarget->evaluateAsAbsolute(Address)) {
- O << formatHex((uint64_t)Address);
- } else {
- // Otherwise, just print the expression.
- Op.getExpr()->print(O, &MAI);
- }
- }
-}
-
-void X86InstPrinterCommon::printOptionalSegReg(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- if (MI->getOperand(OpNo).getReg()) {
- printOperand(MI, OpNo, O);
- O << ':';
- }
-}
-
-void X86InstPrinterCommon::printInstFlags(const MCInst *MI, raw_ostream &O) {
- const MCInstrDesc &Desc = MII.get(MI->getOpcode());
- uint64_t TSFlags = Desc.TSFlags;
- unsigned Flags = MI->getFlags();
-
- if ((TSFlags & X86II::LOCK) || (Flags & X86::IP_HAS_LOCK))
- O << "\tlock\t";
-
- if ((TSFlags & X86II::NOTRACK) || (Flags & X86::IP_HAS_NOTRACK))
- O << "\tnotrack\t";
-
- if (Flags & X86::IP_HAS_REPEAT_NE)
- O << "\trepne\t";
- else if (Flags & X86::IP_HAS_REPEAT)
- O << "\trep\t";
-}
diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
deleted file mode 100644
index 044b71564152..000000000000
--- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
+++ /dev/null
@@ -1,162 +0,0 @@
-//===-- X86IntelInstPrinter.cpp - Intel assembly instruction printing -----===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file includes code for rendering MCInst instances as Intel-style
-// assembly.
-//
-//===----------------------------------------------------------------------===//
-
-#include "X86IntelInstPrinter.h"
-#include "MCTargetDesc/X86BaseInfo.h"
-#include "X86InstComments.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCInstrDesc.h"
-#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/ErrorHandling.h"
-#include <cassert>
-#include <cstdint>
-
-using namespace llvm;
-
-#define DEBUG_TYPE "asm-printer"
-
-#include "X86GenAsmWriter1.inc"
-
-void X86IntelInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
- OS << getRegisterName(RegNo);
-}
-
-void X86IntelInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
- StringRef Annot,
- const MCSubtargetInfo &STI) {
- printInstFlags(MI, OS);
-
- // In 16-bit mode, print data16 as data32.
- if (MI->getOpcode() == X86::DATA16_PREFIX &&
- STI.getFeatureBits()[X86::Mode16Bit]) {
- OS << "\tdata32";
- } else
- printInstruction(MI, OS);
-
- // Next always print the annotation.
- printAnnotation(OS, Annot);
-
- // If verbose assembly is enabled, we can print some informative comments.
- if (CommentStream)
- EmitAnyX86InstComments(MI, *CommentStream, MII);
-}
-
-void X86IntelInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- const MCOperand &Op = MI->getOperand(OpNo);
- if (Op.isReg()) {
- printRegName(O, Op.getReg());
- } else if (Op.isImm()) {
- O << formatImm((int64_t)Op.getImm());
- } else {
- assert(Op.isExpr() && "unknown operand kind in printOperand");
- O << "offset ";
- Op.getExpr()->print(O, &MAI);
- }
-}
-
-void X86IntelInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
- raw_ostream &O) {
- const MCOperand &BaseReg = MI->getOperand(Op+X86::AddrBaseReg);
- unsigned ScaleVal = MI->getOperand(Op+X86::AddrScaleAmt).getImm();
- const MCOperand &IndexReg = MI->getOperand(Op+X86::AddrIndexReg);
- const MCOperand &DispSpec = MI->getOperand(Op+X86::AddrDisp);
-
- // If this has a segment register, print it.
- printOptionalSegReg(MI, Op + X86::AddrSegmentReg, O);
-
- O << '[';
-
- bool NeedPlus = false;
- if (BaseReg.getReg()) {
- printOperand(MI, Op+X86::AddrBaseReg, O);
- NeedPlus = true;
- }
-
- if (IndexReg.getReg()) {
- if (NeedPlus) O << " + ";
- if (ScaleVal != 1)
- O << ScaleVal << '*';
- printOperand(MI, Op+X86::AddrIndexReg, O);
- NeedPlus = true;
- }
-
- if (!DispSpec.isImm()) {
- if (NeedPlus) O << " + ";
- assert(DispSpec.isExpr() && "non-immediate displacement for LEA?");
- DispSpec.getExpr()->print(O, &MAI);
- } else {
- int64_t DispVal = DispSpec.getImm();
- if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg())) {
- if (NeedPlus) {
- if (DispVal > 0)
- O << " + ";
- else {
- O << " - ";
- DispVal = -DispVal;
- }
- }
- O << formatImm(DispVal);
- }
- }
-
- O << ']';
-}
-
-void X86IntelInstPrinter::printSrcIdx(const MCInst *MI, unsigned Op,
- raw_ostream &O) {
- // If this has a segment register, print it.
- printOptionalSegReg(MI, Op + 1, O);
- O << '[';
- printOperand(MI, Op, O);
- O << ']';
-}
-
-void X86IntelInstPrinter::printDstIdx(const MCInst *MI, unsigned Op,
- raw_ostream &O) {
- // DI accesses are always ES-based.
- O << "es:[";
- printOperand(MI, Op, O);
- O << ']';
-}
-
-void X86IntelInstPrinter::printMemOffset(const MCInst *MI, unsigned Op,
- raw_ostream &O) {
- const MCOperand &DispSpec = MI->getOperand(Op);
-
- // If this has a segment register, print it.
- printOptionalSegReg(MI, Op + 1, O);
-
- O << '[';
-
- if (DispSpec.isImm()) {
- O << formatImm(DispSpec.getImm());
- } else {
- assert(DispSpec.isExpr() && "non-immediate displacement?");
- DispSpec.getExpr()->print(O, &MAI);
- }
-
- O << ']';
-}
-
-void X86IntelInstPrinter::printU8Imm(const MCInst *MI, unsigned Op,
- raw_ostream &O) {
- if (MI->getOperand(Op).isExpr())
- return MI->getOperand(Op).getExpr()->print(O, &MAI);
-
- O << formatImm(MI->getOperand(Op).getImm() & 0xff);
-}
diff --git a/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp b/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp
new file mode 100644
index 000000000000..ed2ee55ff2a5
--- /dev/null
+++ b/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp
@@ -0,0 +1,487 @@
+//===-- X86ATTInstPrinter.cpp - AT&T assembly instruction printing --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file includes code for rendering MCInst instances as AT&T-style
+// assembly.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86ATTInstPrinter.h"
+#include "X86BaseInfo.h"
+#include "X86InstComments.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cinttypes>
+#include <cstdint>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "asm-printer"
+
+// Include the auto-generated portion of the assembly writer.
+#define PRINT_ALIAS_INSTR
+#include "X86GenAsmWriter.inc"
+
+void X86ATTInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
+ OS << markup("<reg:") << '%' << getRegisterName(RegNo) << markup(">");
+}
+
+void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
+ StringRef Annot, const MCSubtargetInfo &STI) {
+ // If verbose assembly is enabled, we can print some informative comments.
+ if (CommentStream)
+ HasCustomInstComment = EmitAnyX86InstComments(MI, *CommentStream, MII);
+
+ printInstFlags(MI, OS);
+
+ // Output CALLpcrel32 as "callq" in 64-bit mode.
+ // In Intel annotation it's always emitted as "call".
+ //
+ // TODO: Probably this hack should be redesigned via InstAlias in
+ // InstrInfo.td as soon as Requires clause is supported properly
+ // for InstAlias.
+ if (MI->getOpcode() == X86::CALLpcrel32 &&
+ (STI.getFeatureBits()[X86::Mode64Bit])) {
+ OS << "\tcallq\t";
+ printPCRelImm(MI, 0, OS);
+ }
+ // data16 and data32 both have the same encoding of 0x66. While data32 is
+ // valid only in 16 bit systems, data16 is valid in the rest.
+ // There seems to be some lack of support of the Requires clause that causes
+ // 0x66 to be interpreted as "data16" by the asm printer.
+ // Thus we add an adjustment here in order to print the "right" instruction.
+ else if (MI->getOpcode() == X86::DATA16_PREFIX &&
+ STI.getFeatureBits()[X86::Mode16Bit]) {
+ OS << "\tdata32";
+ }
+ // Try to print any aliases first.
+ else if (!printAliasInstr(MI, OS) &&
+ !printVecCompareInstr(MI, OS))
+ printInstruction(MI, OS);
+
+ // Next always print the annotation.
+ printAnnotation(OS, Annot);
+}
+
+bool X86ATTInstPrinter::printVecCompareInstr(const MCInst *MI,
+ raw_ostream &OS) {
+ if (MI->getNumOperands() == 0 ||
+ !MI->getOperand(MI->getNumOperands() - 1).isImm())
+ return false;
+
+ int64_t Imm = MI->getOperand(MI->getNumOperands() - 1).getImm();
+
+ const MCInstrDesc &Desc = MII.get(MI->getOpcode());
+
+ // Custom print the vector compare instructions to get the immediate
+ // translated into the mnemonic.
+ switch (MI->getOpcode()) {
+ case X86::CMPPDrmi: case X86::CMPPDrri:
+ case X86::CMPPSrmi: case X86::CMPPSrri:
+ case X86::CMPSDrm: case X86::CMPSDrr:
+ case X86::CMPSDrm_Int: case X86::CMPSDrr_Int:
+ case X86::CMPSSrm: case X86::CMPSSrr:
+ case X86::CMPSSrm_Int: case X86::CMPSSrr_Int:
+ if (Imm >= 0 && Imm <= 7) {
+ OS << '\t';
+ printCMPMnemonic(MI, /*IsVCMP*/false, OS);
+
+ if ((Desc.TSFlags & X86II::FormMask) == X86II::MRMSrcMem) {
+ if ((Desc.TSFlags & X86II::OpPrefixMask) == X86II::XS)
+ printdwordmem(MI, 2, OS);
+ else if ((Desc.TSFlags & X86II::OpPrefixMask) == X86II::XD)
+ printqwordmem(MI, 2, OS);
+ else
+ printxmmwordmem(MI, 2, OS);
+ } else
+ printOperand(MI, 2, OS);
+
+ // Skip operand 1 as its tied to the dest.
+
+ OS << ", ";
+ printOperand(MI, 0, OS);
+ return true;
+ }
+ break;
+
+ case X86::VCMPPDrmi: case X86::VCMPPDrri:
+ case X86::VCMPPDYrmi: case X86::VCMPPDYrri:
+ case X86::VCMPPDZ128rmi: case X86::VCMPPDZ128rri:
+ case X86::VCMPPDZ256rmi: case X86::VCMPPDZ256rri:
+ case X86::VCMPPDZrmi: case X86::VCMPPDZrri:
+ case X86::VCMPPSrmi: case X86::VCMPPSrri:
+ case X86::VCMPPSYrmi: case X86::VCMPPSYrri:
+ case X86::VCMPPSZ128rmi: case X86::VCMPPSZ128rri:
+ case X86::VCMPPSZ256rmi: case X86::VCMPPSZ256rri:
+ case X86::VCMPPSZrmi: case X86::VCMPPSZrri:
+ case X86::VCMPSDrm: case X86::VCMPSDrr:
+ case X86::VCMPSDZrm: case X86::VCMPSDZrr:
+ case X86::VCMPSDrm_Int: case X86::VCMPSDrr_Int:
+ case X86::VCMPSDZrm_Int: case X86::VCMPSDZrr_Int:
+ case X86::VCMPSSrm: case X86::VCMPSSrr:
+ case X86::VCMPSSZrm: case X86::VCMPSSZrr:
+ case X86::VCMPSSrm_Int: case X86::VCMPSSrr_Int:
+ case X86::VCMPSSZrm_Int: case X86::VCMPSSZrr_Int:
+ case X86::VCMPPDZ128rmik: case X86::VCMPPDZ128rrik:
+ case X86::VCMPPDZ256rmik: case X86::VCMPPDZ256rrik:
+ case X86::VCMPPDZrmik: case X86::VCMPPDZrrik:
+ case X86::VCMPPSZ128rmik: case X86::VCMPPSZ128rrik:
+ case X86::VCMPPSZ256rmik: case X86::VCMPPSZ256rrik:
+ case X86::VCMPPSZrmik: case X86::VCMPPSZrrik:
+ case X86::VCMPSDZrm_Intk: case X86::VCMPSDZrr_Intk:
+ case X86::VCMPSSZrm_Intk: case X86::VCMPSSZrr_Intk:
+ case X86::VCMPPDZ128rmbi: case X86::VCMPPDZ128rmbik:
+ case X86::VCMPPDZ256rmbi: case X86::VCMPPDZ256rmbik:
+ case X86::VCMPPDZrmbi: case X86::VCMPPDZrmbik:
+ case X86::VCMPPSZ128rmbi: case X86::VCMPPSZ128rmbik:
+ case X86::VCMPPSZ256rmbi: case X86::VCMPPSZ256rmbik:
+ case X86::VCMPPSZrmbi: case X86::VCMPPSZrmbik:
+ case X86::VCMPPDZrrib: case X86::VCMPPDZrribk:
+ case X86::VCMPPSZrrib: case X86::VCMPPSZrribk:
+ case X86::VCMPSDZrrb_Int: case X86::VCMPSDZrrb_Intk:
+ case X86::VCMPSSZrrb_Int: case X86::VCMPSSZrrb_Intk:
+ if (Imm >= 0 && Imm <= 31) {
+ OS << '\t';
+ printCMPMnemonic(MI, /*IsVCMP*/true, OS);
+
+ unsigned CurOp = (Desc.TSFlags & X86II::EVEX_K) ? 3 : 2;
+
+ if ((Desc.TSFlags & X86II::FormMask) == X86II::MRMSrcMem) {
+ if (Desc.TSFlags & X86II::EVEX_B) {
+ // Broadcast form.
+ // Load size is based on W-bit.
+ if (Desc.TSFlags & X86II::VEX_W)
+ printqwordmem(MI, CurOp--, OS);
+ else
+ printdwordmem(MI, CurOp--, OS);
+
+ // Print the number of elements broadcasted.
+ unsigned NumElts;
+ if (Desc.TSFlags & X86II::EVEX_L2)
+ NumElts = (Desc.TSFlags & X86II::VEX_W) ? 8 : 16;
+ else if (Desc.TSFlags & X86II::VEX_L)
+ NumElts = (Desc.TSFlags & X86II::VEX_W) ? 4 : 8;
+ else
+ NumElts = (Desc.TSFlags & X86II::VEX_W) ? 2 : 4;
+ OS << "{1to" << NumElts << "}";
+ } else {
+ if ((Desc.TSFlags & X86II::OpPrefixMask) == X86II::XS)
+ printdwordmem(MI, CurOp--, OS);
+ else if ((Desc.TSFlags & X86II::OpPrefixMask) == X86II::XD)
+ printqwordmem(MI, CurOp--, OS);
+ else if (Desc.TSFlags & X86II::EVEX_L2)
+ printzmmwordmem(MI, CurOp--, OS);
+ else if (Desc.TSFlags & X86II::VEX_L)
+ printymmwordmem(MI, CurOp--, OS);
+ else
+ printxmmwordmem(MI, CurOp--, OS);
+ }
+ } else {
+ if (Desc.TSFlags & X86II::EVEX_B)
+ OS << "{sae}, ";
+ printOperand(MI, CurOp--, OS);
+ }
+
+ OS << ", ";
+ printOperand(MI, CurOp--, OS);
+ OS << ", ";
+ printOperand(MI, 0, OS);
+ if (CurOp > 0) {
+ // Print mask operand.
+ OS << " {";
+ printOperand(MI, CurOp--, OS);
+ OS << "}";
+ }
+
+ return true;
+ }
+ break;
+
+ case X86::VPCOMBmi: case X86::VPCOMBri:
+ case X86::VPCOMDmi: case X86::VPCOMDri:
+ case X86::VPCOMQmi: case X86::VPCOMQri:
+ case X86::VPCOMUBmi: case X86::VPCOMUBri:
+ case X86::VPCOMUDmi: case X86::VPCOMUDri:
+ case X86::VPCOMUQmi: case X86::VPCOMUQri:
+ case X86::VPCOMUWmi: case X86::VPCOMUWri:
+ case X86::VPCOMWmi: case X86::VPCOMWri:
+ if (Imm >= 0 && Imm <= 7) {
+ OS << '\t';
+ printVPCOMMnemonic(MI, OS);
+
+ if ((Desc.TSFlags & X86II::FormMask) == X86II::MRMSrcMem)
+ printxmmwordmem(MI, 2, OS);
+ else
+ printOperand(MI, 2, OS);
+
+ OS << ", ";
+ printOperand(MI, 1, OS);
+ OS << ", ";
+ printOperand(MI, 0, OS);
+ return true;
+ }
+ break;
+
+ case X86::VPCMPBZ128rmi: case X86::VPCMPBZ128rri:
+ case X86::VPCMPBZ256rmi: case X86::VPCMPBZ256rri:
+ case X86::VPCMPBZrmi: case X86::VPCMPBZrri:
+ case X86::VPCMPDZ128rmi: case X86::VPCMPDZ128rri:
+ case X86::VPCMPDZ256rmi: case X86::VPCMPDZ256rri:
+ case X86::VPCMPDZrmi: case X86::VPCMPDZrri:
+ case X86::VPCMPQZ128rmi: case X86::VPCMPQZ128rri:
+ case X86::VPCMPQZ256rmi: case X86::VPCMPQZ256rri:
+ case X86::VPCMPQZrmi: case X86::VPCMPQZrri:
+ case X86::VPCMPUBZ128rmi: case X86::VPCMPUBZ128rri:
+ case X86::VPCMPUBZ256rmi: case X86::VPCMPUBZ256rri:
+ case X86::VPCMPUBZrmi: case X86::VPCMPUBZrri:
+ case X86::VPCMPUDZ128rmi: case X86::VPCMPUDZ128rri:
+ case X86::VPCMPUDZ256rmi: case X86::VPCMPUDZ256rri:
+ case X86::VPCMPUDZrmi: case X86::VPCMPUDZrri:
+ case X86::VPCMPUQZ128rmi: case X86::VPCMPUQZ128rri:
+ case X86::VPCMPUQZ256rmi: case X86::VPCMPUQZ256rri:
+ case X86::VPCMPUQZrmi: case X86::VPCMPUQZrri:
+ case X86::VPCMPUWZ128rmi: case X86::VPCMPUWZ128rri:
+ case X86::VPCMPUWZ256rmi: case X86::VPCMPUWZ256rri:
+ case X86::VPCMPUWZrmi: case X86::VPCMPUWZrri:
+ case X86::VPCMPWZ128rmi: case X86::VPCMPWZ128rri:
+ case X86::VPCMPWZ256rmi: case X86::VPCMPWZ256rri:
+ case X86::VPCMPWZrmi: case X86::VPCMPWZrri:
+ case X86::VPCMPBZ128rmik: case X86::VPCMPBZ128rrik:
+ case X86::VPCMPBZ256rmik: case X86::VPCMPBZ256rrik:
+ case X86::VPCMPBZrmik: case X86::VPCMPBZrrik:
+ case X86::VPCMPDZ128rmik: case X86::VPCMPDZ128rrik:
+ case X86::VPCMPDZ256rmik: case X86::VPCMPDZ256rrik:
+ case X86::VPCMPDZrmik: case X86::VPCMPDZrrik:
+ case X86::VPCMPQZ128rmik: case X86::VPCMPQZ128rrik:
+ case X86::VPCMPQZ256rmik: case X86::VPCMPQZ256rrik:
+ case X86::VPCMPQZrmik: case X86::VPCMPQZrrik:
+ case X86::VPCMPUBZ128rmik: case X86::VPCMPUBZ128rrik:
+ case X86::VPCMPUBZ256rmik: case X86::VPCMPUBZ256rrik:
+ case X86::VPCMPUBZrmik: case X86::VPCMPUBZrrik:
+ case X86::VPCMPUDZ128rmik: case X86::VPCMPUDZ128rrik:
+ case X86::VPCMPUDZ256rmik: case X86::VPCMPUDZ256rrik:
+ case X86::VPCMPUDZrmik: case X86::VPCMPUDZrrik:
+ case X86::VPCMPUQZ128rmik: case X86::VPCMPUQZ128rrik:
+ case X86::VPCMPUQZ256rmik: case X86::VPCMPUQZ256rrik:
+ case X86::VPCMPUQZrmik: case X86::VPCMPUQZrrik:
+ case X86::VPCMPUWZ128rmik: case X86::VPCMPUWZ128rrik:
+ case X86::VPCMPUWZ256rmik: case X86::VPCMPUWZ256rrik:
+ case X86::VPCMPUWZrmik: case X86::VPCMPUWZrrik:
+ case X86::VPCMPWZ128rmik: case X86::VPCMPWZ128rrik:
+ case X86::VPCMPWZ256rmik: case X86::VPCMPWZ256rrik:
+ case X86::VPCMPWZrmik: case X86::VPCMPWZrrik:
+ case X86::VPCMPDZ128rmib: case X86::VPCMPDZ128rmibk:
+ case X86::VPCMPDZ256rmib: case X86::VPCMPDZ256rmibk:
+ case X86::VPCMPDZrmib: case X86::VPCMPDZrmibk:
+ case X86::VPCMPQZ128rmib: case X86::VPCMPQZ128rmibk:
+ case X86::VPCMPQZ256rmib: case X86::VPCMPQZ256rmibk:
+ case X86::VPCMPQZrmib: case X86::VPCMPQZrmibk:
+ case X86::VPCMPUDZ128rmib: case X86::VPCMPUDZ128rmibk:
+ case X86::VPCMPUDZ256rmib: case X86::VPCMPUDZ256rmibk:
+ case X86::VPCMPUDZrmib: case X86::VPCMPUDZrmibk:
+ case X86::VPCMPUQZ128rmib: case X86::VPCMPUQZ128rmibk:
+ case X86::VPCMPUQZ256rmib: case X86::VPCMPUQZ256rmibk:
+ case X86::VPCMPUQZrmib: case X86::VPCMPUQZrmibk:
+ if ((Imm >= 0 && Imm <= 2) || (Imm >= 4 && Imm <= 6)) {
+ OS << '\t';
+ printVPCMPMnemonic(MI, OS);
+
+ unsigned CurOp = (Desc.TSFlags & X86II::EVEX_K) ? 3 : 2;
+
+ if ((Desc.TSFlags & X86II::FormMask) == X86II::MRMSrcMem) {
+ if (Desc.TSFlags & X86II::EVEX_B) {
+ // Broadcast form.
+ // Load size is based on W-bit as only D and Q are supported.
+ if (Desc.TSFlags & X86II::VEX_W)
+ printqwordmem(MI, CurOp--, OS);
+ else
+ printdwordmem(MI, CurOp--, OS);
+
+ // Print the number of elements broadcasted.
+ unsigned NumElts;
+ if (Desc.TSFlags & X86II::EVEX_L2)
+ NumElts = (Desc.TSFlags & X86II::VEX_W) ? 8 : 16;
+ else if (Desc.TSFlags & X86II::VEX_L)
+ NumElts = (Desc.TSFlags & X86II::VEX_W) ? 4 : 8;
+ else
+ NumElts = (Desc.TSFlags & X86II::VEX_W) ? 2 : 4;
+ OS << "{1to" << NumElts << "}";
+ } else {
+ if (Desc.TSFlags & X86II::EVEX_L2)
+ printzmmwordmem(MI, CurOp--, OS);
+ else if (Desc.TSFlags & X86II::VEX_L)
+ printymmwordmem(MI, CurOp--, OS);
+ else
+ printxmmwordmem(MI, CurOp--, OS);
+ }
+ } else {
+ printOperand(MI, CurOp--, OS);
+ }
+
+ OS << ", ";
+ printOperand(MI, CurOp--, OS);
+ OS << ", ";
+ printOperand(MI, 0, OS);
+ if (CurOp > 0) {
+ // Print mask operand.
+ OS << " {";
+ printOperand(MI, CurOp--, OS);
+ OS << "}";
+ }
+
+ return true;
+ }
+ break;
+ }
+
+ return false;
+}
+
+void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.isReg()) {
+ printRegName(O, Op.getReg());
+ } else if (Op.isImm()) {
+ // Print immediates as signed values.
+ int64_t Imm = Op.getImm();
+ O << markup("<imm:") << '$' << formatImm(Imm) << markup(">");
+
+ // TODO: This should be in a helper function in the base class, so it can
+ // be used by other printers.
+
+ // If there are no instruction-specific comments, add a comment clarifying
+ // the hex value of the immediate operand when it isn't in the range
+ // [-256,255].
+ if (CommentStream && !HasCustomInstComment && (Imm > 255 || Imm < -256)) {
+ // Don't print unnecessary hex sign bits.
+ if (Imm == (int16_t)(Imm))
+ *CommentStream << format("imm = 0x%" PRIX16 "\n", (uint16_t)Imm);
+ else if (Imm == (int32_t)(Imm))
+ *CommentStream << format("imm = 0x%" PRIX32 "\n", (uint32_t)Imm);
+ else
+ *CommentStream << format("imm = 0x%" PRIX64 "\n", (uint64_t)Imm);
+ }
+ } else {
+ assert(Op.isExpr() && "unknown operand kind in printOperand");
+ O << markup("<imm:") << '$';
+ Op.getExpr()->print(O, &MAI);
+ O << markup(">");
+ }
+}
+
+void X86ATTInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ const MCOperand &BaseReg = MI->getOperand(Op + X86::AddrBaseReg);
+ const MCOperand &IndexReg = MI->getOperand(Op + X86::AddrIndexReg);
+ const MCOperand &DispSpec = MI->getOperand(Op + X86::AddrDisp);
+
+ O << markup("<mem:");
+
+ // If this has a segment register, print it.
+ printOptionalSegReg(MI, Op + X86::AddrSegmentReg, O);
+
+ if (DispSpec.isImm()) {
+ int64_t DispVal = DispSpec.getImm();
+ if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg()))
+ O << formatImm(DispVal);
+ } else {
+ assert(DispSpec.isExpr() && "non-immediate displacement for LEA?");
+ DispSpec.getExpr()->print(O, &MAI);
+ }
+
+ if (IndexReg.getReg() || BaseReg.getReg()) {
+ O << '(';
+ if (BaseReg.getReg())
+ printOperand(MI, Op + X86::AddrBaseReg, O);
+
+ if (IndexReg.getReg()) {
+ O << ',';
+ printOperand(MI, Op + X86::AddrIndexReg, O);
+ unsigned ScaleVal = MI->getOperand(Op + X86::AddrScaleAmt).getImm();
+ if (ScaleVal != 1) {
+ O << ',' << markup("<imm:") << ScaleVal // never printed in hex.
+ << markup(">");
+ }
+ }
+ O << ')';
+ }
+
+ O << markup(">");
+}
+
+void X86ATTInstPrinter::printSrcIdx(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ O << markup("<mem:");
+
+ // If this has a segment register, print it.
+ printOptionalSegReg(MI, Op + 1, O);
+
+ O << "(";
+ printOperand(MI, Op, O);
+ O << ")";
+
+ O << markup(">");
+}
+
+void X86ATTInstPrinter::printDstIdx(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ O << markup("<mem:");
+
+ O << "%es:(";
+ printOperand(MI, Op, O);
+ O << ")";
+
+ O << markup(">");
+}
+
+void X86ATTInstPrinter::printMemOffset(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ const MCOperand &DispSpec = MI->getOperand(Op);
+
+ O << markup("<mem:");
+
+ // If this has a segment register, print it.
+ printOptionalSegReg(MI, Op + 1, O);
+
+ if (DispSpec.isImm()) {
+ O << formatImm(DispSpec.getImm());
+ } else {
+ assert(DispSpec.isExpr() && "non-immediate displacement?");
+ DispSpec.getExpr()->print(O, &MAI);
+ }
+
+ O << markup(">");
+}
+
+void X86ATTInstPrinter::printU8Imm(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ if (MI->getOperand(Op).isExpr())
+ return printOperand(MI, Op, O);
+
+ O << markup("<imm:") << '$' << formatImm(MI->getOperand(Op).getImm() & 0xff)
+ << markup(">");
+}
+
+void X86ATTInstPrinter::printSTiRegOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &OS) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ unsigned Reg = Op.getReg();
+ // Override the default printing to print st(0) instead st.
+ if (Reg == X86::ST0)
+ OS << markup("<reg:") << "%st(0)" << markup(">");
+ else
+ printRegName(OS, Reg);
+}
diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h b/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.h
index 57422bc9a0b2..747ddd30a2d9 100644
--- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
+++ b/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.h
@@ -1,9 +1,8 @@
//=- X86ATTInstPrinter.h - Convert X86 MCInst to assembly syntax --*- C++ -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -11,8 +10,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIB_TARGET_X86_INSTPRINTER_X86ATTINSTPRINTER_H
-#define LLVM_LIB_TARGET_X86_INSTPRINTER_X86ATTINSTPRINTER_H
+#ifndef LLVM_LIB_TARGET_X86_MCTARGETDESC_X86ATTINSTPRINTER_H
+#define LLVM_LIB_TARGET_X86_MCTARGETDESC_X86ATTINSTPRINTER_H
#include "X86InstPrinterCommon.h"
@@ -22,11 +21,12 @@ class X86ATTInstPrinter final : public X86InstPrinterCommon {
public:
X86ATTInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
const MCRegisterInfo &MRI)
- : X86InstPrinterCommon(MAI, MII, MRI) {}
+ : X86InstPrinterCommon(MAI, MII, MRI), HasCustomInstComment(false) {}
void printRegName(raw_ostream &OS, unsigned RegNo) const override;
void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot,
const MCSubtargetInfo &STI) override;
+ bool printVecCompareInstr(const MCInst *MI, raw_ostream &OS);
// Autogenerated by tblgen, returns true if we successfully printed an
// alias.
@@ -44,6 +44,7 @@ public:
void printSrcIdx(const MCInst *MI, unsigned Op, raw_ostream &O);
void printDstIdx(const MCInst *MI, unsigned Op, raw_ostream &O);
void printU8Imm(const MCInst *MI, unsigned Op, raw_ostream &OS);
+ void printSTiRegOperand(const MCInst *MI, unsigned OpNo, raw_ostream &OS);
void printanymem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
printMemReference(MI, OpNo, O);
@@ -52,43 +53,28 @@ public:
printMemReference(MI, OpNo, O);
}
- void printi8mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
- printMemReference(MI, OpNo, O);
- }
- void printi16mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
- printMemReference(MI, OpNo, O);
- }
- void printi32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
- printMemReference(MI, OpNo, O);
- }
- void printi64mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
- printMemReference(MI, OpNo, O);
- }
- void printi128mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
- printMemReference(MI, OpNo, O);
- }
- void printi256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ void printbytemem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
printMemReference(MI, OpNo, O);
}
- void printi512mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ void printwordmem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
printMemReference(MI, OpNo, O);
}
- void printf32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ void printdwordmem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
printMemReference(MI, OpNo, O);
}
- void printf64mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ void printqwordmem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
printMemReference(MI, OpNo, O);
}
- void printf80mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ void printxmmwordmem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
printMemReference(MI, OpNo, O);
}
- void printf128mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ void printymmwordmem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
printMemReference(MI, OpNo, O);
}
- void printf256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ void printzmmwordmem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
printMemReference(MI, OpNo, O);
}
- void printf512mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ void printtbytemem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
printMemReference(MI, OpNo, O);
}
@@ -135,4 +121,4 @@ private:
} // end namespace llvm
-#endif // LLVM_LIB_TARGET_X86_INSTPRINTER_X86ATTINSTPRINTER_H
+#endif // LLVM_LIB_TARGET_X86_MCTARGETDESC_X86ATTINSTPRINTER_H
diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index 64e6fb9f0375..54413fa1a02f 100644
--- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -1,9 +1,8 @@
//===-- X86AsmBackend.cpp - X86 Assembler Backend -------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -13,6 +12,7 @@
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/BinaryFormat/MachO.h"
#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCFixupKindInfo.h"
@@ -26,18 +26,20 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
-static unsigned getFixupKindLog2Size(unsigned Kind) {
+static unsigned getFixupKindSize(unsigned Kind) {
switch (Kind) {
default:
llvm_unreachable("invalid fixup kind!");
+ case FK_NONE:
+ return 0;
case FK_PCRel_1:
case FK_SecRel_1:
case FK_Data_1:
- return 0;
+ return 1;
case FK_PCRel_2:
case FK_SecRel_2:
case FK_Data_2:
- return 1;
+ return 2;
case FK_PCRel_4:
case X86::reloc_riprel_4byte:
case X86::reloc_riprel_4byte_relax:
@@ -49,12 +51,12 @@ static unsigned getFixupKindLog2Size(unsigned Kind) {
case X86::reloc_branch_4byte_pcrel:
case FK_SecRel_4:
case FK_Data_4:
- return 2;
+ return 4;
case FK_PCRel_8:
case FK_SecRel_8:
case FK_Data_8:
case X86::reloc_global_offset_table8:
- return 3;
+ return 8;
}
}
@@ -77,6 +79,8 @@ public:
return X86::NumTargetFixupKinds;
}
+ Optional<MCFixupKind> getFixupKind(StringRef Name) const override;
+
const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override {
const static MCFixupKindInfo Infos[X86::NumTargetFixupKinds] = {
{"reloc_riprel_4byte", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
@@ -99,11 +103,14 @@ public:
return Infos[Kind - FirstTargetFixupKind];
}
+ bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target) override;
+
void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
const MCValue &Target, MutableArrayRef<char> Data,
uint64_t Value, bool IsResolved,
const MCSubtargetInfo *STI) const override {
- unsigned Size = 1 << getFixupKindLog2Size(Fixup.getKind());
+ unsigned Size = getFixupKindSize(Fixup.getKind());
assert(Fixup.getOffset() + Size <= Data.size() && "Invalid fixup offset!");
@@ -111,7 +118,7 @@ public:
// Specifically ignore overflow/underflow as long as the leakage is
// limited to the lower bits. This is to remain compatible with
// other assemblers.
- assert(isIntN(Size * 8 + 1, Value) &&
+ assert((Size == 0 || isIntN(Size * 8 + 1, Value)) &&
"Value does not fit in the Fixup field");
for (unsigned i = 0; i != Size; ++i)
@@ -137,40 +144,10 @@ static unsigned getRelaxedOpcodeBranch(const MCInst &Inst, bool is16BitMode) {
switch (Op) {
default:
return Op;
- case X86::JAE_1:
- return (is16BitMode) ? X86::JAE_2 : X86::JAE_4;
- case X86::JA_1:
- return (is16BitMode) ? X86::JA_2 : X86::JA_4;
- case X86::JBE_1:
- return (is16BitMode) ? X86::JBE_2 : X86::JBE_4;
- case X86::JB_1:
- return (is16BitMode) ? X86::JB_2 : X86::JB_4;
- case X86::JE_1:
- return (is16BitMode) ? X86::JE_2 : X86::JE_4;
- case X86::JGE_1:
- return (is16BitMode) ? X86::JGE_2 : X86::JGE_4;
- case X86::JG_1:
- return (is16BitMode) ? X86::JG_2 : X86::JG_4;
- case X86::JLE_1:
- return (is16BitMode) ? X86::JLE_2 : X86::JLE_4;
- case X86::JL_1:
- return (is16BitMode) ? X86::JL_2 : X86::JL_4;
+ case X86::JCC_1:
+ return (is16BitMode) ? X86::JCC_2 : X86::JCC_4;
case X86::JMP_1:
return (is16BitMode) ? X86::JMP_2 : X86::JMP_4;
- case X86::JNE_1:
- return (is16BitMode) ? X86::JNE_2 : X86::JNE_4;
- case X86::JNO_1:
- return (is16BitMode) ? X86::JNO_2 : X86::JNO_4;
- case X86::JNP_1:
- return (is16BitMode) ? X86::JNP_2 : X86::JNP_4;
- case X86::JNS_1:
- return (is16BitMode) ? X86::JNS_2 : X86::JNS_4;
- case X86::JO_1:
- return (is16BitMode) ? X86::JO_2 : X86::JO_4;
- case X86::JP_1:
- return (is16BitMode) ? X86::JP_2 : X86::JP_4;
- case X86::JS_1:
- return (is16BitMode) ? X86::JS_2 : X86::JS_4;
}
}
@@ -266,6 +243,25 @@ static unsigned getRelaxedOpcode(const MCInst &Inst, bool is16BitMode) {
return getRelaxedOpcodeBranch(Inst, is16BitMode);
}
+Optional<MCFixupKind> X86AsmBackend::getFixupKind(StringRef Name) const {
+ if (STI.getTargetTriple().isOSBinFormatELF()) {
+ if (STI.getTargetTriple().getArch() == Triple::x86_64) {
+ if (Name == "R_X86_64_NONE")
+ return FK_NONE;
+ } else {
+ if (Name == "R_386_NONE")
+ return FK_NONE;
+ }
+ }
+ return MCAsmBackend::getFixupKind(Name);
+}
+
+bool X86AsmBackend::shouldForceRelocation(const MCAssembler &,
+ const MCFixup &Fixup,
+ const MCValue &) {
+ return Fixup.getKind() == FK_NONE;
+}
+
bool X86AsmBackend::mayNeedRelaxation(const MCInst &Inst,
const MCSubtargetInfo &STI) const {
// Branches can always be relaxed in either mode.
diff --git a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
index c85ce9bbd5a4..6bd6c6cac7df 100644
--- a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -1,9 +1,8 @@
//===-- X86BaseInfo.h - Top level definitions for X86 -------- --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -49,7 +48,8 @@ namespace X86 {
TO_NEG_INF = 1,
TO_POS_INF = 2,
TO_ZERO = 3,
- CUR_DIRECTION = 4
+ CUR_DIRECTION = 4,
+ NO_EXC = 8
};
/// The constants to describe instr prefixes if there are
@@ -60,9 +60,46 @@ namespace X86 {
IP_HAS_REPEAT_NE = 4,
IP_HAS_REPEAT = 8,
IP_HAS_LOCK = 16,
- NO_SCHED_INFO = 32, // Don't add sched comment to the current instr because
- // it was already added
- IP_HAS_NOTRACK = 64
+ IP_HAS_NOTRACK = 32,
+ IP_USE_VEX3 = 64,
+ };
+
+ enum OperandType : unsigned {
+ /// AVX512 embedded rounding control. This should only have values 0-3.
+ OPERAND_ROUNDING_CONTROL = MCOI::OPERAND_FIRST_TARGET,
+ OPERAND_COND_CODE,
+ };
+
+ // X86 specific condition code. These correspond to X86_*_COND in
+ // X86InstrInfo.td. They must be kept in synch.
+ enum CondCode {
+ COND_O = 0,
+ COND_NO = 1,
+ COND_B = 2,
+ COND_AE = 3,
+ COND_E = 4,
+ COND_NE = 5,
+ COND_BE = 6,
+ COND_A = 7,
+ COND_S = 8,
+ COND_NS = 9,
+ COND_P = 10,
+ COND_NP = 11,
+ COND_L = 12,
+ COND_GE = 13,
+ COND_LE = 14,
+ COND_G = 15,
+ LAST_VALID_COND = COND_G,
+
+ // Artificial condition codes. These are used by AnalyzeBranch
+ // to indicate a block terminated with two conditional branches that together
+ // form a compound condition. They occur in code using FCMP_OEQ or FCMP_UNE,
+ // which can't be represented on x86 with a single condition. These
+ // are never used in MachineInstrs and are inverses of one another.
+ COND_NE_OR_P,
+ COND_E_AND_NP,
+
+ COND_INVALID
};
} // end namespace X86;
@@ -285,6 +322,10 @@ namespace X86II {
/// manual, this operand is described as pntr16:32 and pntr16:16
RawFrmImm16 = 8,
+ /// AddCCFrm - This form is used for Jcc that encode the condition code
+ /// in the lower 4 bits of the opcode.
+ AddCCFrm = 9,
+
/// MRM[0-7][rm] - These forms are used to represent instructions that use
/// a Mod/RM byte, and use the middle field to hold extended opcode
/// information. In the intel manual these are represented as /0, /1, ...
@@ -310,10 +351,21 @@ namespace X86II {
///
MRMSrcMemOp4 = 35,
+ /// MRMSrcMemCC - This form is used for instructions that use the Mod/RM
+ /// byte to specify the operands and also encodes a condition code.
+ ///
+ MRMSrcMemCC = 36,
+
+ /// MRMXm - This form is used for instructions that use the Mod/RM byte
+ /// to specify a memory source, but doesn't use the middle field. And has
+ /// a condition code.
+ ///
+ MRMXmCC = 38,
+
/// MRMXm - This form is used for instructions that use the Mod/RM byte
/// to specify a memory source, but doesn't use the middle field.
///
- MRMXm = 39, // Instruction that uses Mod/RM but not the middle field.
+ MRMXm = 39,
// Next, instructions that operate on a memory r/m operand...
MRM0m = 40, MRM1m = 41, MRM2m = 42, MRM3m = 43, // Format /0 /1 /2 /3
@@ -339,10 +391,21 @@ namespace X86II {
///
MRMSrcRegOp4 = 51,
+ /// MRMSrcRegCC - This form is used for instructions that use the Mod/RM
+ /// byte to specify the operands and also encodes a condition code
+ ///
+ MRMSrcRegCC = 52,
+
+ /// MRMXCCr - This form is used for instructions that use the Mod/RM byte
+ /// to specify a register source, but doesn't use the middle field. And has
+ /// a condition code.
+ ///
+ MRMXrCC = 54,
+
/// MRMXr - This form is used for instructions that use the Mod/RM byte
/// to specify a register source, but doesn't use the middle field.
///
- MRMXr = 55, // Instruction that uses Mod/RM but not the middle field.
+ MRMXr = 55,
// Instructions that operate on a register r/m operand...
MRM0r = 56, MRM1r = 57, MRM2r = 58, MRM3r = 59, // Format /0 /1 /2 /3
@@ -681,8 +744,7 @@ namespace X86II {
// has it as the last op.
if (NumOps == 9 && Desc.getOperandConstraint(2, MCOI::TIED_TO) == 0 &&
(Desc.getOperandConstraint(3, MCOI::TIED_TO) == 1 ||
- Desc.getOperandConstraint(8, MCOI::TIED_TO) == 1) &&
- "Instruction with 2 defs isn't gather?")
+ Desc.getOperandConstraint(8, MCOI::TIED_TO) == 1))
return 2;
return 0;
}
@@ -711,6 +773,7 @@ namespace X86II {
case X86II::RawFrmSrc:
case X86II::RawFrmDst:
case X86II::RawFrmDstSrc:
+ case X86II::AddCCFrm:
return -1;
case X86II::MRMDestMem:
return 0;
@@ -724,16 +787,23 @@ namespace X86II {
case X86II::MRMSrcMemOp4:
// Skip registers encoded in reg, VEX_VVVV, and I8IMM.
return 3;
+ case X86II::MRMSrcMemCC:
+ // Start from 1, skip any registers encoded in VEX_VVVV or I8IMM, or a
+ // mask register.
+ return 1;
case X86II::MRMDestReg:
case X86II::MRMSrcReg:
case X86II::MRMSrcReg4VOp3:
case X86II::MRMSrcRegOp4:
+ case X86II::MRMSrcRegCC:
+ case X86II::MRMXrCC:
case X86II::MRMXr:
case X86II::MRM0r: case X86II::MRM1r:
case X86II::MRM2r: case X86II::MRM3r:
case X86II::MRM4r: case X86II::MRM5r:
case X86II::MRM6r: case X86II::MRM7r:
return -1;
+ case X86II::MRMXmCC:
case X86II::MRMXm:
case X86II::MRM0m: case X86II::MRM1m:
case X86II::MRM2m: case X86II::MRM3m:
diff --git a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
index b724a89f81d2..232a06593238 100644
--- a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
@@ -1,9 +1,8 @@
//===-- X86ELFObjectWriter.cpp - X86 ELF Writer ---------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -45,7 +44,7 @@ X86ELFObjectWriter::X86ELFObjectWriter(bool IsELF64, uint8_t OSABI,
(EMachine != ELF::EM_386) &&
(EMachine != ELF::EM_IAMCU)) {}
-enum X86_64RelType { RT64_64, RT64_32, RT64_32S, RT64_16, RT64_8 };
+enum X86_64RelType { RT64_NONE, RT64_64, RT64_32, RT64_32S, RT64_16, RT64_8 };
static X86_64RelType getType64(unsigned Kind,
MCSymbolRefExpr::VariantKind &Modifier,
@@ -53,6 +52,8 @@ static X86_64RelType getType64(unsigned Kind,
switch (Kind) {
default:
llvm_unreachable("Unimplemented");
+ case FK_NONE:
+ return RT64_NONE;
case X86::reloc_global_offset_table8:
Modifier = MCSymbolRefExpr::VK_GOT;
IsPCRel = true;
@@ -103,6 +104,10 @@ static unsigned getRelocType64(MCContext &Ctx, SMLoc Loc,
case MCSymbolRefExpr::VK_None:
case MCSymbolRefExpr::VK_X86_ABS8:
switch (Type) {
+ case RT64_NONE:
+ if (Modifier == MCSymbolRefExpr::VK_None)
+ return ELF::R_X86_64_NONE;
+ llvm_unreachable("Unimplemented");
case RT64_64:
return IsPCRel ? ELF::R_X86_64_PC64 : ELF::R_X86_64_64;
case RT64_32:
@@ -114,6 +119,7 @@ static unsigned getRelocType64(MCContext &Ctx, SMLoc Loc,
case RT64_8:
return IsPCRel ? ELF::R_X86_64_PC8 : ELF::R_X86_64_8;
}
+ llvm_unreachable("unexpected relocation type!");
case MCSymbolRefExpr::VK_GOT:
switch (Type) {
case RT64_64:
@@ -123,8 +129,10 @@ static unsigned getRelocType64(MCContext &Ctx, SMLoc Loc,
case RT64_32S:
case RT64_16:
case RT64_8:
+ case RT64_NONE:
llvm_unreachable("Unimplemented");
}
+ llvm_unreachable("unexpected relocation type!");
case MCSymbolRefExpr::VK_GOTOFF:
assert(Type == RT64_64);
assert(!IsPCRel);
@@ -139,8 +147,10 @@ static unsigned getRelocType64(MCContext &Ctx, SMLoc Loc,
case RT64_32S:
case RT64_16:
case RT64_8:
+ case RT64_NONE:
llvm_unreachable("Unimplemented");
}
+ llvm_unreachable("unexpected relocation type!");
case MCSymbolRefExpr::VK_DTPOFF:
assert(!IsPCRel);
switch (Type) {
@@ -151,8 +161,10 @@ static unsigned getRelocType64(MCContext &Ctx, SMLoc Loc,
case RT64_32S:
case RT64_16:
case RT64_8:
+ case RT64_NONE:
llvm_unreachable("Unimplemented");
}
+ llvm_unreachable("unexpected relocation type!");
case MCSymbolRefExpr::VK_SIZE:
assert(!IsPCRel);
switch (Type) {
@@ -163,8 +175,10 @@ static unsigned getRelocType64(MCContext &Ctx, SMLoc Loc,
case RT64_32S:
case RT64_16:
case RT64_8:
+ case RT64_NONE:
llvm_unreachable("Unimplemented");
}
+ llvm_unreachable("unexpected relocation type!");
case MCSymbolRefExpr::VK_TLSCALL:
return ELF::R_X86_64_TLSDESC_CALL;
case MCSymbolRefExpr::VK_TLSDESC:
@@ -197,13 +211,16 @@ static unsigned getRelocType64(MCContext &Ctx, SMLoc Loc,
case X86::reloc_riprel_4byte_movq_load:
return ELF::R_X86_64_REX_GOTPCRELX;
}
+ llvm_unreachable("unexpected relocation type!");
}
}
-enum X86_32RelType { RT32_32, RT32_16, RT32_8 };
+enum X86_32RelType { RT32_NONE, RT32_32, RT32_16, RT32_8 };
static X86_32RelType getType32(X86_64RelType T) {
switch (T) {
+ case RT64_NONE:
+ return RT32_NONE;
case RT64_64:
llvm_unreachable("Unimplemented");
case RT64_32:
@@ -227,6 +244,10 @@ static unsigned getRelocType32(MCContext &Ctx,
case MCSymbolRefExpr::VK_None:
case MCSymbolRefExpr::VK_X86_ABS8:
switch (Type) {
+ case RT32_NONE:
+ if (Modifier == MCSymbolRefExpr::VK_None)
+ return ELF::R_386_NONE;
+ llvm_unreachable("Unimplemented");
case RT32_32:
return IsPCRel ? ELF::R_386_PC32 : ELF::R_386_32;
case RT32_16:
@@ -234,6 +255,7 @@ static unsigned getRelocType32(MCContext &Ctx,
case RT32_8:
return IsPCRel ? ELF::R_386_PC8 : ELF::R_386_8;
}
+ llvm_unreachable("unexpected relocation type!");
case MCSymbolRefExpr::VK_GOT:
assert(Type == RT32_32);
if (IsPCRel)
@@ -249,6 +271,10 @@ static unsigned getRelocType32(MCContext &Ctx,
assert(Type == RT32_32);
assert(!IsPCRel);
return ELF::R_386_GOTOFF;
+ case MCSymbolRefExpr::VK_TLSCALL:
+ return ELF::R_386_TLS_DESC_CALL;
+ case MCSymbolRefExpr::VK_TLSDESC:
+ return ELF::R_386_TLS_GOTDESC;
case MCSymbolRefExpr::VK_TPOFF:
assert(Type == RT32_32);
assert(!IsPCRel);
diff --git a/lib/Target/X86/MCTargetDesc/X86FixupKinds.h b/lib/Target/X86/MCTargetDesc/X86FixupKinds.h
index 3c04b13e002e..2d5217115d07 100644
--- a/lib/Target/X86/MCTargetDesc/X86FixupKinds.h
+++ b/lib/Target/X86/MCTargetDesc/X86FixupKinds.h
@@ -1,9 +1,8 @@
//===-- X86FixupKinds.h - X86 Specific Fixup Entries ------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/MCTargetDesc/X86InstComments.cpp
index 37bed37b0994..73b1969b4e82 100644
--- a/lib/Target/X86/InstPrinter/X86InstComments.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86InstComments.cpp
@@ -1,9 +1,8 @@
//===-- X86InstComments.cpp - Generate verbose-asm comments for instrs ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -14,8 +13,8 @@
#include "X86InstComments.h"
#include "X86ATTInstPrinter.h"
-#include "MCTargetDesc/X86BaseInfo.h"
-#include "MCTargetDesc/X86MCTargetDesc.h"
+#include "X86BaseInfo.h"
+#include "X86MCTargetDesc.h"
#include "Utils/X86ShuffleDecode.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
@@ -1076,9 +1075,12 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
Src1Name = getRegName(MI->getOperand(1).getReg());
LLVM_FALLTHROUGH;
+ case X86::MOVSDrm_alt:
case X86::MOVSDrm:
+ case X86::VMOVSDrm_alt:
case X86::VMOVSDrm:
case X86::VMOVSDZrm:
+ case X86::VMOVSDZrm_alt:
DecodeScalarMoveMask(2, nullptr == Src2Name, ShuffleMask);
DestName = getRegName(MI->getOperand(0).getReg());
break;
@@ -1091,8 +1093,11 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
LLVM_FALLTHROUGH;
case X86::MOVSSrm:
+ case X86::MOVSSrm_alt:
case X86::VMOVSSrm:
+ case X86::VMOVSSrm_alt:
case X86::VMOVSSZrm:
+ case X86::VMOVSSZrm_alt:
DecodeScalarMoveMask(4, nullptr == Src2Name, ShuffleMask);
DestName = getRegName(MI->getOperand(0).getReg());
break;
@@ -1203,7 +1208,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
LLVM_FALLTHROUGH;
CASE_PMOVZX(PMOVZXBW, m)
- DecodeZeroExtendMask(8, 16, getRegOperandNumElts(MI, 16, 0), ShuffleMask);
+ DecodeZeroExtendMask(8, 16, getRegOperandNumElts(MI, 16, 0), false,
+ ShuffleMask);
DestName = getRegName(MI->getOperand(0).getReg());
break;
@@ -1211,7 +1217,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
LLVM_FALLTHROUGH;
CASE_PMOVZX(PMOVZXBD, m)
- DecodeZeroExtendMask(8, 32, getRegOperandNumElts(MI, 32, 0), ShuffleMask);
+ DecodeZeroExtendMask(8, 32, getRegOperandNumElts(MI, 32, 0), false,
+ ShuffleMask);
DestName = getRegName(MI->getOperand(0).getReg());
break;
@@ -1219,7 +1226,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
LLVM_FALLTHROUGH;
CASE_PMOVZX(PMOVZXBQ, m)
- DecodeZeroExtendMask(8, 64, getRegOperandNumElts(MI, 64, 0), ShuffleMask);
+ DecodeZeroExtendMask(8, 64, getRegOperandNumElts(MI, 64, 0), false,
+ ShuffleMask);
DestName = getRegName(MI->getOperand(0).getReg());
break;
@@ -1227,7 +1235,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
LLVM_FALLTHROUGH;
CASE_PMOVZX(PMOVZXWD, m)
- DecodeZeroExtendMask(16, 32, getRegOperandNumElts(MI, 32, 0), ShuffleMask);
+ DecodeZeroExtendMask(16, 32, getRegOperandNumElts(MI, 32, 0), false,
+ ShuffleMask);
DestName = getRegName(MI->getOperand(0).getReg());
break;
@@ -1235,7 +1244,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
LLVM_FALLTHROUGH;
CASE_PMOVZX(PMOVZXWQ, m)
- DecodeZeroExtendMask(16, 64, getRegOperandNumElts(MI, 64, 0), ShuffleMask);
+ DecodeZeroExtendMask(16, 64, getRegOperandNumElts(MI, 64, 0), false,
+ ShuffleMask);
DestName = getRegName(MI->getOperand(0).getReg());
break;
@@ -1243,7 +1253,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
LLVM_FALLTHROUGH;
CASE_PMOVZX(PMOVZXDQ, m)
- DecodeZeroExtendMask(32, 64, getRegOperandNumElts(MI, 64, 0), ShuffleMask);
+ DecodeZeroExtendMask(32, 64, getRegOperandNumElts(MI, 64, 0), false,
+ ShuffleMask);
DestName = getRegName(MI->getOperand(0).getReg());
break;
}
@@ -1304,6 +1315,7 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
OS << ']';
--i; // For loop increments element #.
}
+ OS << '\n';
// We successfully added a comment to this instruction.
return true;
diff --git a/lib/Target/X86/InstPrinter/X86InstComments.h b/lib/Target/X86/MCTargetDesc/X86InstComments.h
index 40dffa5fbb8a..96760664012a 100644
--- a/lib/Target/X86/InstPrinter/X86InstComments.h
+++ b/lib/Target/X86/MCTargetDesc/X86InstComments.h
@@ -1,9 +1,8 @@
//=- X86InstComments.h - Generate verbose-asm comments for instrs -*- C++ -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -12,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIB_TARGET_X86_INSTPRINTER_X86INSTCOMMENTS_H
-#define LLVM_LIB_TARGET_X86_INSTPRINTER_X86INSTCOMMENTS_H
+#ifndef LLVM_LIB_TARGET_X86_MCTARGETDESC_X86INSTCOMMENTS_H
+#define LLVM_LIB_TARGET_X86_MCTARGETDESC_X86INSTCOMMENTS_H
namespace llvm {
diff --git a/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp b/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
new file mode 100644
index 000000000000..a21555076976
--- /dev/null
+++ b/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
@@ -0,0 +1,362 @@
+//===--- X86InstPrinterCommon.cpp - X86 assembly instruction printing -----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file includes common code for rendering MCInst instances as Intel-style
+// and Intel-style assembly.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86InstPrinterCommon.h"
+#include "X86BaseInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Casting.h"
+#include <cstdint>
+#include <cassert>
+
+using namespace llvm;
+
+void X86InstPrinterCommon::printCondCode(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ int64_t Imm = MI->getOperand(Op).getImm();
+ switch (Imm) {
+ default: llvm_unreachable("Invalid condcode argument!");
+ case 0: O << "o"; break;
+ case 1: O << "no"; break;
+ case 2: O << "b"; break;
+ case 3: O << "ae"; break;
+ case 4: O << "e"; break;
+ case 5: O << "ne"; break;
+ case 6: O << "be"; break;
+ case 7: O << "a"; break;
+ case 8: O << "s"; break;
+ case 9: O << "ns"; break;
+ case 0xa: O << "p"; break;
+ case 0xb: O << "np"; break;
+ case 0xc: O << "l"; break;
+ case 0xd: O << "ge"; break;
+ case 0xe: O << "le"; break;
+ case 0xf: O << "g"; break;
+ }
+}
+
+void X86InstPrinterCommon::printSSEAVXCC(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ int64_t Imm = MI->getOperand(Op).getImm();
+ switch (Imm) {
+ default: llvm_unreachable("Invalid ssecc/avxcc argument!");
+ case 0: O << "eq"; break;
+ case 1: O << "lt"; break;
+ case 2: O << "le"; break;
+ case 3: O << "unord"; break;
+ case 4: O << "neq"; break;
+ case 5: O << "nlt"; break;
+ case 6: O << "nle"; break;
+ case 7: O << "ord"; break;
+ case 8: O << "eq_uq"; break;
+ case 9: O << "nge"; break;
+ case 0xa: O << "ngt"; break;
+ case 0xb: O << "false"; break;
+ case 0xc: O << "neq_oq"; break;
+ case 0xd: O << "ge"; break;
+ case 0xe: O << "gt"; break;
+ case 0xf: O << "true"; break;
+ case 0x10: O << "eq_os"; break;
+ case 0x11: O << "lt_oq"; break;
+ case 0x12: O << "le_oq"; break;
+ case 0x13: O << "unord_s"; break;
+ case 0x14: O << "neq_us"; break;
+ case 0x15: O << "nlt_uq"; break;
+ case 0x16: O << "nle_uq"; break;
+ case 0x17: O << "ord_s"; break;
+ case 0x18: O << "eq_us"; break;
+ case 0x19: O << "nge_uq"; break;
+ case 0x1a: O << "ngt_uq"; break;
+ case 0x1b: O << "false_os"; break;
+ case 0x1c: O << "neq_os"; break;
+ case 0x1d: O << "ge_oq"; break;
+ case 0x1e: O << "gt_oq"; break;
+ case 0x1f: O << "true_us"; break;
+ }
+}
+
+void X86InstPrinterCommon::printVPCOMMnemonic(const MCInst *MI,
+ raw_ostream &OS) {
+ OS << "vpcom";
+
+ int64_t Imm = MI->getOperand(MI->getNumOperands() - 1).getImm();
+ switch (Imm) {
+ default: llvm_unreachable("Invalid vpcom argument!");
+ case 0: OS << "lt"; break;
+ case 1: OS << "le"; break;
+ case 2: OS << "gt"; break;
+ case 3: OS << "ge"; break;
+ case 4: OS << "eq"; break;
+ case 5: OS << "neq"; break;
+ case 6: OS << "false"; break;
+ case 7: OS << "true"; break;
+ }
+
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("Unexpected opcode!");
+ case X86::VPCOMBmi: case X86::VPCOMBri: OS << "b\t"; break;
+ case X86::VPCOMDmi: case X86::VPCOMDri: OS << "d\t"; break;
+ case X86::VPCOMQmi: case X86::VPCOMQri: OS << "q\t"; break;
+ case X86::VPCOMUBmi: case X86::VPCOMUBri: OS << "ub\t"; break;
+ case X86::VPCOMUDmi: case X86::VPCOMUDri: OS << "ud\t"; break;
+ case X86::VPCOMUQmi: case X86::VPCOMUQri: OS << "uq\t"; break;
+ case X86::VPCOMUWmi: case X86::VPCOMUWri: OS << "uw\t"; break;
+ case X86::VPCOMWmi: case X86::VPCOMWri: OS << "w\t"; break;
+ }
+}
+
+void X86InstPrinterCommon::printVPCMPMnemonic(const MCInst *MI,
+ raw_ostream &OS) {
+ OS << "vpcmp";
+
+ printSSEAVXCC(MI, MI->getNumOperands() - 1, OS);
+
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("Unexpected opcode!");
+ case X86::VPCMPBZ128rmi: case X86::VPCMPBZ128rri:
+ case X86::VPCMPBZ256rmi: case X86::VPCMPBZ256rri:
+ case X86::VPCMPBZrmi: case X86::VPCMPBZrri:
+ case X86::VPCMPBZ128rmik: case X86::VPCMPBZ128rrik:
+ case X86::VPCMPBZ256rmik: case X86::VPCMPBZ256rrik:
+ case X86::VPCMPBZrmik: case X86::VPCMPBZrrik:
+ OS << "b\t";
+ break;
+ case X86::VPCMPDZ128rmi: case X86::VPCMPDZ128rri:
+ case X86::VPCMPDZ256rmi: case X86::VPCMPDZ256rri:
+ case X86::VPCMPDZrmi: case X86::VPCMPDZrri:
+ case X86::VPCMPDZ128rmik: case X86::VPCMPDZ128rrik:
+ case X86::VPCMPDZ256rmik: case X86::VPCMPDZ256rrik:
+ case X86::VPCMPDZrmik: case X86::VPCMPDZrrik:
+ case X86::VPCMPDZ128rmib: case X86::VPCMPDZ128rmibk:
+ case X86::VPCMPDZ256rmib: case X86::VPCMPDZ256rmibk:
+ case X86::VPCMPDZrmib: case X86::VPCMPDZrmibk:
+ OS << "d\t";
+ break;
+ case X86::VPCMPQZ128rmi: case X86::VPCMPQZ128rri:
+ case X86::VPCMPQZ256rmi: case X86::VPCMPQZ256rri:
+ case X86::VPCMPQZrmi: case X86::VPCMPQZrri:
+ case X86::VPCMPQZ128rmik: case X86::VPCMPQZ128rrik:
+ case X86::VPCMPQZ256rmik: case X86::VPCMPQZ256rrik:
+ case X86::VPCMPQZrmik: case X86::VPCMPQZrrik:
+ case X86::VPCMPQZ128rmib: case X86::VPCMPQZ128rmibk:
+ case X86::VPCMPQZ256rmib: case X86::VPCMPQZ256rmibk:
+ case X86::VPCMPQZrmib: case X86::VPCMPQZrmibk:
+ OS << "q\t";
+ break;
+ case X86::VPCMPUBZ128rmi: case X86::VPCMPUBZ128rri:
+ case X86::VPCMPUBZ256rmi: case X86::VPCMPUBZ256rri:
+ case X86::VPCMPUBZrmi: case X86::VPCMPUBZrri:
+ case X86::VPCMPUBZ128rmik: case X86::VPCMPUBZ128rrik:
+ case X86::VPCMPUBZ256rmik: case X86::VPCMPUBZ256rrik:
+ case X86::VPCMPUBZrmik: case X86::VPCMPUBZrrik:
+ OS << "ub\t";
+ break;
+ case X86::VPCMPUDZ128rmi: case X86::VPCMPUDZ128rri:
+ case X86::VPCMPUDZ256rmi: case X86::VPCMPUDZ256rri:
+ case X86::VPCMPUDZrmi: case X86::VPCMPUDZrri:
+ case X86::VPCMPUDZ128rmik: case X86::VPCMPUDZ128rrik:
+ case X86::VPCMPUDZ256rmik: case X86::VPCMPUDZ256rrik:
+ case X86::VPCMPUDZrmik: case X86::VPCMPUDZrrik:
+ case X86::VPCMPUDZ128rmib: case X86::VPCMPUDZ128rmibk:
+ case X86::VPCMPUDZ256rmib: case X86::VPCMPUDZ256rmibk:
+ case X86::VPCMPUDZrmib: case X86::VPCMPUDZrmibk:
+ OS << "ud\t";
+ break;
+ case X86::VPCMPUQZ128rmi: case X86::VPCMPUQZ128rri:
+ case X86::VPCMPUQZ256rmi: case X86::VPCMPUQZ256rri:
+ case X86::VPCMPUQZrmi: case X86::VPCMPUQZrri:
+ case X86::VPCMPUQZ128rmik: case X86::VPCMPUQZ128rrik:
+ case X86::VPCMPUQZ256rmik: case X86::VPCMPUQZ256rrik:
+ case X86::VPCMPUQZrmik: case X86::VPCMPUQZrrik:
+ case X86::VPCMPUQZ128rmib: case X86::VPCMPUQZ128rmibk:
+ case X86::VPCMPUQZ256rmib: case X86::VPCMPUQZ256rmibk:
+ case X86::VPCMPUQZrmib: case X86::VPCMPUQZrmibk:
+ OS << "uq\t";
+ break;
+ case X86::VPCMPUWZ128rmi: case X86::VPCMPUWZ128rri:
+ case X86::VPCMPUWZ256rri: case X86::VPCMPUWZ256rmi:
+ case X86::VPCMPUWZrmi: case X86::VPCMPUWZrri:
+ case X86::VPCMPUWZ128rmik: case X86::VPCMPUWZ128rrik:
+ case X86::VPCMPUWZ256rrik: case X86::VPCMPUWZ256rmik:
+ case X86::VPCMPUWZrmik: case X86::VPCMPUWZrrik:
+ OS << "uw\t";
+ break;
+ case X86::VPCMPWZ128rmi: case X86::VPCMPWZ128rri:
+ case X86::VPCMPWZ256rmi: case X86::VPCMPWZ256rri:
+ case X86::VPCMPWZrmi: case X86::VPCMPWZrri:
+ case X86::VPCMPWZ128rmik: case X86::VPCMPWZ128rrik:
+ case X86::VPCMPWZ256rmik: case X86::VPCMPWZ256rrik:
+ case X86::VPCMPWZrmik: case X86::VPCMPWZrrik:
+ OS << "w\t";
+ break;
+ }
+}
+
+void X86InstPrinterCommon::printCMPMnemonic(const MCInst *MI, bool IsVCmp,
+ raw_ostream &OS) {
+ OS << (IsVCmp ? "vcmp" : "cmp");
+
+ printSSEAVXCC(MI, MI->getNumOperands() - 1, OS);
+
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("Unexpected opcode!");
+ case X86::CMPPDrmi: case X86::CMPPDrri:
+ case X86::VCMPPDrmi: case X86::VCMPPDrri:
+ case X86::VCMPPDYrmi: case X86::VCMPPDYrri:
+ case X86::VCMPPDZ128rmi: case X86::VCMPPDZ128rri:
+ case X86::VCMPPDZ256rmi: case X86::VCMPPDZ256rri:
+ case X86::VCMPPDZrmi: case X86::VCMPPDZrri:
+ case X86::VCMPPDZ128rmik: case X86::VCMPPDZ128rrik:
+ case X86::VCMPPDZ256rmik: case X86::VCMPPDZ256rrik:
+ case X86::VCMPPDZrmik: case X86::VCMPPDZrrik:
+ case X86::VCMPPDZ128rmbi: case X86::VCMPPDZ128rmbik:
+ case X86::VCMPPDZ256rmbi: case X86::VCMPPDZ256rmbik:
+ case X86::VCMPPDZrmbi: case X86::VCMPPDZrmbik:
+ case X86::VCMPPDZrrib: case X86::VCMPPDZrribk:
+ OS << "pd\t";
+ break;
+ case X86::CMPPSrmi: case X86::CMPPSrri:
+ case X86::VCMPPSrmi: case X86::VCMPPSrri:
+ case X86::VCMPPSYrmi: case X86::VCMPPSYrri:
+ case X86::VCMPPSZ128rmi: case X86::VCMPPSZ128rri:
+ case X86::VCMPPSZ256rmi: case X86::VCMPPSZ256rri:
+ case X86::VCMPPSZrmi: case X86::VCMPPSZrri:
+ case X86::VCMPPSZ128rmik: case X86::VCMPPSZ128rrik:
+ case X86::VCMPPSZ256rmik: case X86::VCMPPSZ256rrik:
+ case X86::VCMPPSZrmik: case X86::VCMPPSZrrik:
+ case X86::VCMPPSZ128rmbi: case X86::VCMPPSZ128rmbik:
+ case X86::VCMPPSZ256rmbi: case X86::VCMPPSZ256rmbik:
+ case X86::VCMPPSZrmbi: case X86::VCMPPSZrmbik:
+ case X86::VCMPPSZrrib: case X86::VCMPPSZrribk:
+ OS << "ps\t";
+ break;
+ case X86::CMPSDrm: case X86::CMPSDrr:
+ case X86::CMPSDrm_Int: case X86::CMPSDrr_Int:
+ case X86::VCMPSDrm: case X86::VCMPSDrr:
+ case X86::VCMPSDrm_Int: case X86::VCMPSDrr_Int:
+ case X86::VCMPSDZrm: case X86::VCMPSDZrr:
+ case X86::VCMPSDZrm_Int: case X86::VCMPSDZrr_Int:
+ case X86::VCMPSDZrm_Intk: case X86::VCMPSDZrr_Intk:
+ case X86::VCMPSDZrrb_Int: case X86::VCMPSDZrrb_Intk:
+ OS << "sd\t";
+ break;
+ case X86::CMPSSrm: case X86::CMPSSrr:
+ case X86::CMPSSrm_Int: case X86::CMPSSrr_Int:
+ case X86::VCMPSSrm: case X86::VCMPSSrr:
+ case X86::VCMPSSrm_Int: case X86::VCMPSSrr_Int:
+ case X86::VCMPSSZrm: case X86::VCMPSSZrr:
+ case X86::VCMPSSZrm_Int: case X86::VCMPSSZrr_Int:
+ case X86::VCMPSSZrm_Intk: case X86::VCMPSSZrr_Intk:
+ case X86::VCMPSSZrrb_Int: case X86::VCMPSSZrrb_Intk:
+ OS << "ss\t";
+ break;
+ }
+}
+
+void X86InstPrinterCommon::printRoundingControl(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ int64_t Imm = MI->getOperand(Op).getImm();
+ switch (Imm) {
+ default:
+ llvm_unreachable("Invalid rounding control!");
+ case X86::TO_NEAREST_INT:
+ O << "{rn-sae}";
+ break;
+ case X86::TO_NEG_INF:
+ O << "{rd-sae}";
+ break;
+ case X86::TO_POS_INF:
+ O << "{ru-sae}";
+ break;
+ case X86::TO_ZERO:
+ O << "{rz-sae}";
+ break;
+ }
+}
+
+/// printPCRelImm - This is used to print an immediate value that ends up
+/// being encoded as a pc-relative value (e.g. for jumps and calls). In
+/// Intel-style these print slightly differently than normal immediates.
+/// for example, a $ is not emitted.
+void X86InstPrinterCommon::printPCRelImm(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.isImm())
+ O << formatImm(Op.getImm());
+ else {
+ assert(Op.isExpr() && "unknown pcrel immediate operand");
+ // If a symbolic branch target was added as a constant expression then print
+ // that address in hex.
+ const MCConstantExpr *BranchTarget = dyn_cast<MCConstantExpr>(Op.getExpr());
+ int64_t Address;
+ if (BranchTarget && BranchTarget->evaluateAsAbsolute(Address)) {
+ O << formatHex((uint64_t)Address);
+ } else {
+ // Otherwise, just print the expression.
+ Op.getExpr()->print(O, &MAI);
+ }
+ }
+}
+
+void X86InstPrinterCommon::printOptionalSegReg(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (MI->getOperand(OpNo).getReg()) {
+ printOperand(MI, OpNo, O);
+ O << ':';
+ }
+}
+
+void X86InstPrinterCommon::printInstFlags(const MCInst *MI, raw_ostream &O) {
+ const MCInstrDesc &Desc = MII.get(MI->getOpcode());
+ uint64_t TSFlags = Desc.TSFlags;
+ unsigned Flags = MI->getFlags();
+
+ if ((TSFlags & X86II::LOCK) || (Flags & X86::IP_HAS_LOCK))
+ O << "\tlock\t";
+
+ if ((TSFlags & X86II::NOTRACK) || (Flags & X86::IP_HAS_NOTRACK))
+ O << "\tnotrack\t";
+
+ if (Flags & X86::IP_HAS_REPEAT_NE)
+ O << "\trepne\t";
+ else if (Flags & X86::IP_HAS_REPEAT)
+ O << "\trep\t";
+}
+
+void X86InstPrinterCommon::printVKPair(const MCInst *MI, unsigned OpNo,
+ raw_ostream &OS) {
+ // In assembly listings, a pair is represented by one of its members, any
+ // of the two. Here, we pick k0, k2, k4, k6, but we could as well
+ // print K2_K3 as "k3". It would probably make a lot more sense, if
+ // the assembly would look something like:
+ // "vp2intersect %zmm5, %zmm7, {%k2, %k3}"
+ // but this can work too.
+ switch (MI->getOperand(OpNo).getReg()) {
+ case X86::K0_K1:
+ printRegName(OS, X86::K0);
+ return;
+ case X86::K2_K3:
+ printRegName(OS, X86::K2);
+ return;
+ case X86::K4_K5:
+ printRegName(OS, X86::K4);
+ return;
+ case X86::K6_K7:
+ printRegName(OS, X86::K6);
+ return;
+ }
+ llvm_unreachable("Unknown mask pair register name");
+}
diff --git a/lib/Target/X86/InstPrinter/X86InstPrinterCommon.h b/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.h
index f2875e71f22c..8e28f24b619a 100644
--- a/lib/Target/X86/InstPrinter/X86InstPrinterCommon.h
+++ b/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.h
@@ -1,9 +1,8 @@
//===-- X86InstPrinterCommon.cpp - X86 assembly instruction printing ------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -12,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIB_TARGET_X86_INSTPRINTER_X86INSTPRINTERCOMMON_H
-#define LLVM_LIB_TARGET_X86_INSTPRINTER_X86INSTPRINTERCOMMON_H
+#ifndef LLVM_LIB_TARGET_X86_MCTARGETDESC_X86INSTPRINTERCOMMON_H
+#define LLVM_LIB_TARGET_X86_MCTARGETDESC_X86INSTPRINTERCOMMON_H
#include "llvm/MC/MCInstPrinter.h"
@@ -24,15 +23,19 @@ public:
using MCInstPrinter::MCInstPrinter;
virtual void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) = 0;
+ void printCondCode(const MCInst *MI, unsigned Op, raw_ostream &OS);
void printSSEAVXCC(const MCInst *MI, unsigned Op, raw_ostream &OS);
- void printXOPCC(const MCInst *MI, unsigned Op, raw_ostream &OS);
+ void printVPCOMMnemonic(const MCInst *MI, raw_ostream &OS);
+ void printVPCMPMnemonic(const MCInst *MI, raw_ostream &OS);
+ void printCMPMnemonic(const MCInst *MI, bool IsVCmp, raw_ostream &OS);
void printRoundingControl(const MCInst *MI, unsigned Op, raw_ostream &O);
void printPCRelImm(const MCInst *MI, unsigned OpNo, raw_ostream &O);
protected:
void printInstFlags(const MCInst *MI, raw_ostream &O);
void printOptionalSegReg(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printVKPair(const MCInst *MI, unsigned OpNo, raw_ostream &OS);
};
} // end namespace llvm
-#endif // LLVM_LIB_TARGET_X86_INSTPRINTER_X86ATTINSTPRINTER_H
+#endif // LLVM_LIB_TARGET_X86_MCTARGETDESC_X86ATTINSTPRINTER_H
diff --git a/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp b/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp
new file mode 100644
index 000000000000..ea28bef42569
--- /dev/null
+++ b/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp
@@ -0,0 +1,445 @@
+//===-- X86IntelInstPrinter.cpp - Intel assembly instruction printing -----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file includes code for rendering MCInst instances as Intel-style
+// assembly.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86IntelInstPrinter.h"
+#include "X86BaseInfo.h"
+#include "X86InstComments.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cassert>
+#include <cstdint>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "asm-printer"
+
+// Include the auto-generated portion of the assembly writer.
+#define PRINT_ALIAS_INSTR
+#include "X86GenAsmWriter1.inc"
+
+void X86IntelInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
+ OS << getRegisterName(RegNo);
+}
+
+void X86IntelInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
+ StringRef Annot,
+ const MCSubtargetInfo &STI) {
+ printInstFlags(MI, OS);
+
+ // In 16-bit mode, print data16 as data32.
+ if (MI->getOpcode() == X86::DATA16_PREFIX &&
+ STI.getFeatureBits()[X86::Mode16Bit]) {
+ OS << "\tdata32";
+ } else if (!printAliasInstr(MI, OS) &&
+ !printVecCompareInstr(MI, OS))
+ printInstruction(MI, OS);
+
+ // Next always print the annotation.
+ printAnnotation(OS, Annot);
+
+ // If verbose assembly is enabled, we can print some informative comments.
+ if (CommentStream)
+ EmitAnyX86InstComments(MI, *CommentStream, MII);
+}
+
+bool X86IntelInstPrinter::printVecCompareInstr(const MCInst *MI, raw_ostream &OS) {
+ if (MI->getNumOperands() == 0 ||
+ !MI->getOperand(MI->getNumOperands() - 1).isImm())
+ return false;
+
+ int64_t Imm = MI->getOperand(MI->getNumOperands() - 1).getImm();
+
+ const MCInstrDesc &Desc = MII.get(MI->getOpcode());
+
+ // Custom print the vector compare instructions to get the immediate
+ // translated into the mnemonic.
+ switch (MI->getOpcode()) {
+ case X86::CMPPDrmi: case X86::CMPPDrri:
+ case X86::CMPPSrmi: case X86::CMPPSrri:
+ case X86::CMPSDrm: case X86::CMPSDrr:
+ case X86::CMPSDrm_Int: case X86::CMPSDrr_Int:
+ case X86::CMPSSrm: case X86::CMPSSrr:
+ case X86::CMPSSrm_Int: case X86::CMPSSrr_Int:
+ if (Imm >= 0 && Imm <= 7) {
+ OS << '\t';
+ printCMPMnemonic(MI, /*IsVCMP*/false, OS);
+ printOperand(MI, 0, OS);
+ OS << ", ";
+ // Skip operand 1 as its tied to the dest.
+
+ if ((Desc.TSFlags & X86II::FormMask) == X86II::MRMSrcMem) {
+ if ((Desc.TSFlags & X86II::OpPrefixMask) == X86II::XS)
+ printdwordmem(MI, 2, OS);
+ else if ((Desc.TSFlags & X86II::OpPrefixMask) == X86II::XD)
+ printqwordmem(MI, 2, OS);
+ else
+ printxmmwordmem(MI, 2, OS);
+ } else
+ printOperand(MI, 2, OS);
+
+ return true;
+ }
+ break;
+
+ case X86::VCMPPDrmi: case X86::VCMPPDrri:
+ case X86::VCMPPDYrmi: case X86::VCMPPDYrri:
+ case X86::VCMPPDZ128rmi: case X86::VCMPPDZ128rri:
+ case X86::VCMPPDZ256rmi: case X86::VCMPPDZ256rri:
+ case X86::VCMPPDZrmi: case X86::VCMPPDZrri:
+ case X86::VCMPPSrmi: case X86::VCMPPSrri:
+ case X86::VCMPPSYrmi: case X86::VCMPPSYrri:
+ case X86::VCMPPSZ128rmi: case X86::VCMPPSZ128rri:
+ case X86::VCMPPSZ256rmi: case X86::VCMPPSZ256rri:
+ case X86::VCMPPSZrmi: case X86::VCMPPSZrri:
+ case X86::VCMPSDrm: case X86::VCMPSDrr:
+ case X86::VCMPSDZrm: case X86::VCMPSDZrr:
+ case X86::VCMPSDrm_Int: case X86::VCMPSDrr_Int:
+ case X86::VCMPSDZrm_Int: case X86::VCMPSDZrr_Int:
+ case X86::VCMPSSrm: case X86::VCMPSSrr:
+ case X86::VCMPSSZrm: case X86::VCMPSSZrr:
+ case X86::VCMPSSrm_Int: case X86::VCMPSSrr_Int:
+ case X86::VCMPSSZrm_Int: case X86::VCMPSSZrr_Int:
+ case X86::VCMPPDZ128rmik: case X86::VCMPPDZ128rrik:
+ case X86::VCMPPDZ256rmik: case X86::VCMPPDZ256rrik:
+ case X86::VCMPPDZrmik: case X86::VCMPPDZrrik:
+ case X86::VCMPPSZ128rmik: case X86::VCMPPSZ128rrik:
+ case X86::VCMPPSZ256rmik: case X86::VCMPPSZ256rrik:
+ case X86::VCMPPSZrmik: case X86::VCMPPSZrrik:
+ case X86::VCMPSDZrm_Intk: case X86::VCMPSDZrr_Intk:
+ case X86::VCMPSSZrm_Intk: case X86::VCMPSSZrr_Intk:
+ case X86::VCMPPDZ128rmbi: case X86::VCMPPDZ128rmbik:
+ case X86::VCMPPDZ256rmbi: case X86::VCMPPDZ256rmbik:
+ case X86::VCMPPDZrmbi: case X86::VCMPPDZrmbik:
+ case X86::VCMPPSZ128rmbi: case X86::VCMPPSZ128rmbik:
+ case X86::VCMPPSZ256rmbi: case X86::VCMPPSZ256rmbik:
+ case X86::VCMPPSZrmbi: case X86::VCMPPSZrmbik:
+ case X86::VCMPPDZrrib: case X86::VCMPPDZrribk:
+ case X86::VCMPPSZrrib: case X86::VCMPPSZrribk:
+ case X86::VCMPSDZrrb_Int: case X86::VCMPSDZrrb_Intk:
+ case X86::VCMPSSZrrb_Int: case X86::VCMPSSZrrb_Intk:
+ if (Imm >= 0 && Imm <= 31) {
+ OS << '\t';
+ printCMPMnemonic(MI, /*IsVCMP*/true, OS);
+
+ unsigned CurOp = 0;
+ printOperand(MI, CurOp++, OS);
+
+ if (Desc.TSFlags & X86II::EVEX_K) {
+ // Print mask operand.
+ OS << " {";
+ printOperand(MI, CurOp++, OS);
+ OS << "}";
+ }
+ OS << ", ";
+ printOperand(MI, CurOp++, OS);
+ OS << ", ";
+
+ if ((Desc.TSFlags & X86II::FormMask) == X86II::MRMSrcMem) {
+ if (Desc.TSFlags & X86II::EVEX_B) {
+ // Broadcast form.
+ // Load size is based on W-bit.
+ if (Desc.TSFlags & X86II::VEX_W)
+ printqwordmem(MI, CurOp++, OS);
+ else
+ printdwordmem(MI, CurOp++, OS);
+
+ // Print the number of elements broadcasted.
+ unsigned NumElts;
+ if (Desc.TSFlags & X86II::EVEX_L2)
+ NumElts = (Desc.TSFlags & X86II::VEX_W) ? 8 : 16;
+ else if (Desc.TSFlags & X86II::VEX_L)
+ NumElts = (Desc.TSFlags & X86II::VEX_W) ? 4 : 8;
+ else
+ NumElts = (Desc.TSFlags & X86II::VEX_W) ? 2 : 4;
+ OS << "{1to" << NumElts << "}";
+ } else {
+ if ((Desc.TSFlags & X86II::OpPrefixMask) == X86II::XS)
+ printdwordmem(MI, CurOp++, OS);
+ else if ((Desc.TSFlags & X86II::OpPrefixMask) == X86II::XD)
+ printqwordmem(MI, CurOp++, OS);
+ else if (Desc.TSFlags & X86II::EVEX_L2)
+ printzmmwordmem(MI, CurOp++, OS);
+ else if (Desc.TSFlags & X86II::VEX_L)
+ printymmwordmem(MI, CurOp++, OS);
+ else
+ printxmmwordmem(MI, CurOp++, OS);
+ }
+ } else {
+ printOperand(MI, CurOp++, OS);
+ if (Desc.TSFlags & X86II::EVEX_B)
+ OS << ", {sae}";
+ }
+
+ return true;
+ }
+ break;
+
+ case X86::VPCOMBmi: case X86::VPCOMBri:
+ case X86::VPCOMDmi: case X86::VPCOMDri:
+ case X86::VPCOMQmi: case X86::VPCOMQri:
+ case X86::VPCOMUBmi: case X86::VPCOMUBri:
+ case X86::VPCOMUDmi: case X86::VPCOMUDri:
+ case X86::VPCOMUQmi: case X86::VPCOMUQri:
+ case X86::VPCOMUWmi: case X86::VPCOMUWri:
+ case X86::VPCOMWmi: case X86::VPCOMWri:
+ if (Imm >= 0 && Imm <= 7) {
+ OS << '\t';
+ printVPCOMMnemonic(MI, OS);
+ printOperand(MI, 0, OS);
+ OS << ", ";
+ printOperand(MI, 1, OS);
+ OS << ", ";
+ if ((Desc.TSFlags & X86II::FormMask) == X86II::MRMSrcMem)
+ printxmmwordmem(MI, 2, OS);
+ else
+ printOperand(MI, 2, OS);
+ return true;
+ }
+ break;
+
+ case X86::VPCMPBZ128rmi: case X86::VPCMPBZ128rri:
+ case X86::VPCMPBZ256rmi: case X86::VPCMPBZ256rri:
+ case X86::VPCMPBZrmi: case X86::VPCMPBZrri:
+ case X86::VPCMPDZ128rmi: case X86::VPCMPDZ128rri:
+ case X86::VPCMPDZ256rmi: case X86::VPCMPDZ256rri:
+ case X86::VPCMPDZrmi: case X86::VPCMPDZrri:
+ case X86::VPCMPQZ128rmi: case X86::VPCMPQZ128rri:
+ case X86::VPCMPQZ256rmi: case X86::VPCMPQZ256rri:
+ case X86::VPCMPQZrmi: case X86::VPCMPQZrri:
+ case X86::VPCMPUBZ128rmi: case X86::VPCMPUBZ128rri:
+ case X86::VPCMPUBZ256rmi: case X86::VPCMPUBZ256rri:
+ case X86::VPCMPUBZrmi: case X86::VPCMPUBZrri:
+ case X86::VPCMPUDZ128rmi: case X86::VPCMPUDZ128rri:
+ case X86::VPCMPUDZ256rmi: case X86::VPCMPUDZ256rri:
+ case X86::VPCMPUDZrmi: case X86::VPCMPUDZrri:
+ case X86::VPCMPUQZ128rmi: case X86::VPCMPUQZ128rri:
+ case X86::VPCMPUQZ256rmi: case X86::VPCMPUQZ256rri:
+ case X86::VPCMPUQZrmi: case X86::VPCMPUQZrri:
+ case X86::VPCMPUWZ128rmi: case X86::VPCMPUWZ128rri:
+ case X86::VPCMPUWZ256rmi: case X86::VPCMPUWZ256rri:
+ case X86::VPCMPUWZrmi: case X86::VPCMPUWZrri:
+ case X86::VPCMPWZ128rmi: case X86::VPCMPWZ128rri:
+ case X86::VPCMPWZ256rmi: case X86::VPCMPWZ256rri:
+ case X86::VPCMPWZrmi: case X86::VPCMPWZrri:
+ case X86::VPCMPBZ128rmik: case X86::VPCMPBZ128rrik:
+ case X86::VPCMPBZ256rmik: case X86::VPCMPBZ256rrik:
+ case X86::VPCMPBZrmik: case X86::VPCMPBZrrik:
+ case X86::VPCMPDZ128rmik: case X86::VPCMPDZ128rrik:
+ case X86::VPCMPDZ256rmik: case X86::VPCMPDZ256rrik:
+ case X86::VPCMPDZrmik: case X86::VPCMPDZrrik:
+ case X86::VPCMPQZ128rmik: case X86::VPCMPQZ128rrik:
+ case X86::VPCMPQZ256rmik: case X86::VPCMPQZ256rrik:
+ case X86::VPCMPQZrmik: case X86::VPCMPQZrrik:
+ case X86::VPCMPUBZ128rmik: case X86::VPCMPUBZ128rrik:
+ case X86::VPCMPUBZ256rmik: case X86::VPCMPUBZ256rrik:
+ case X86::VPCMPUBZrmik: case X86::VPCMPUBZrrik:
+ case X86::VPCMPUDZ128rmik: case X86::VPCMPUDZ128rrik:
+ case X86::VPCMPUDZ256rmik: case X86::VPCMPUDZ256rrik:
+ case X86::VPCMPUDZrmik: case X86::VPCMPUDZrrik:
+ case X86::VPCMPUQZ128rmik: case X86::VPCMPUQZ128rrik:
+ case X86::VPCMPUQZ256rmik: case X86::VPCMPUQZ256rrik:
+ case X86::VPCMPUQZrmik: case X86::VPCMPUQZrrik:
+ case X86::VPCMPUWZ128rmik: case X86::VPCMPUWZ128rrik:
+ case X86::VPCMPUWZ256rmik: case X86::VPCMPUWZ256rrik:
+ case X86::VPCMPUWZrmik: case X86::VPCMPUWZrrik:
+ case X86::VPCMPWZ128rmik: case X86::VPCMPWZ128rrik:
+ case X86::VPCMPWZ256rmik: case X86::VPCMPWZ256rrik:
+ case X86::VPCMPWZrmik: case X86::VPCMPWZrrik:
+ case X86::VPCMPDZ128rmib: case X86::VPCMPDZ128rmibk:
+ case X86::VPCMPDZ256rmib: case X86::VPCMPDZ256rmibk:
+ case X86::VPCMPDZrmib: case X86::VPCMPDZrmibk:
+ case X86::VPCMPQZ128rmib: case X86::VPCMPQZ128rmibk:
+ case X86::VPCMPQZ256rmib: case X86::VPCMPQZ256rmibk:
+ case X86::VPCMPQZrmib: case X86::VPCMPQZrmibk:
+ case X86::VPCMPUDZ128rmib: case X86::VPCMPUDZ128rmibk:
+ case X86::VPCMPUDZ256rmib: case X86::VPCMPUDZ256rmibk:
+ case X86::VPCMPUDZrmib: case X86::VPCMPUDZrmibk:
+ case X86::VPCMPUQZ128rmib: case X86::VPCMPUQZ128rmibk:
+ case X86::VPCMPUQZ256rmib: case X86::VPCMPUQZ256rmibk:
+ case X86::VPCMPUQZrmib: case X86::VPCMPUQZrmibk:
+ if ((Imm >= 0 && Imm <= 2) || (Imm >= 4 && Imm <= 6)) {
+ OS << '\t';
+ printVPCMPMnemonic(MI, OS);
+
+ unsigned CurOp = 0;
+ printOperand(MI, CurOp++, OS);
+
+ if (Desc.TSFlags & X86II::EVEX_K) {
+ // Print mask operand.
+ OS << " {";
+ printOperand(MI, CurOp++, OS);
+ OS << "}";
+ }
+ OS << ", ";
+ printOperand(MI, CurOp++, OS);
+ OS << ", ";
+
+ if ((Desc.TSFlags & X86II::FormMask) == X86II::MRMSrcMem) {
+ if (Desc.TSFlags & X86II::EVEX_B) {
+ // Broadcast form.
+ // Load size is based on W-bit as only D and Q are supported.
+ if (Desc.TSFlags & X86II::VEX_W)
+ printqwordmem(MI, CurOp++, OS);
+ else
+ printdwordmem(MI, CurOp++, OS);
+
+ // Print the number of elements broadcasted.
+ unsigned NumElts;
+ if (Desc.TSFlags & X86II::EVEX_L2)
+ NumElts = (Desc.TSFlags & X86II::VEX_W) ? 8 : 16;
+ else if (Desc.TSFlags & X86II::VEX_L)
+ NumElts = (Desc.TSFlags & X86II::VEX_W) ? 4 : 8;
+ else
+ NumElts = (Desc.TSFlags & X86II::VEX_W) ? 2 : 4;
+ OS << "{1to" << NumElts << "}";
+ } else {
+ if (Desc.TSFlags & X86II::EVEX_L2)
+ printzmmwordmem(MI, CurOp++, OS);
+ else if (Desc.TSFlags & X86II::VEX_L)
+ printymmwordmem(MI, CurOp++, OS);
+ else
+ printxmmwordmem(MI, CurOp++, OS);
+ }
+ } else {
+ printOperand(MI, CurOp++, OS);
+ }
+
+ return true;
+ }
+ break;
+ }
+
+ return false;
+}
+
+void X86IntelInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.isReg()) {
+ printRegName(O, Op.getReg());
+ } else if (Op.isImm()) {
+ O << formatImm((int64_t)Op.getImm());
+ } else {
+ assert(Op.isExpr() && "unknown operand kind in printOperand");
+ O << "offset ";
+ Op.getExpr()->print(O, &MAI);
+ }
+}
+
+void X86IntelInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ const MCOperand &BaseReg = MI->getOperand(Op+X86::AddrBaseReg);
+ unsigned ScaleVal = MI->getOperand(Op+X86::AddrScaleAmt).getImm();
+ const MCOperand &IndexReg = MI->getOperand(Op+X86::AddrIndexReg);
+ const MCOperand &DispSpec = MI->getOperand(Op+X86::AddrDisp);
+
+ // If this has a segment register, print it.
+ printOptionalSegReg(MI, Op + X86::AddrSegmentReg, O);
+
+ O << '[';
+
+ bool NeedPlus = false;
+ if (BaseReg.getReg()) {
+ printOperand(MI, Op+X86::AddrBaseReg, O);
+ NeedPlus = true;
+ }
+
+ if (IndexReg.getReg()) {
+ if (NeedPlus) O << " + ";
+ if (ScaleVal != 1)
+ O << ScaleVal << '*';
+ printOperand(MI, Op+X86::AddrIndexReg, O);
+ NeedPlus = true;
+ }
+
+ if (!DispSpec.isImm()) {
+ if (NeedPlus) O << " + ";
+ assert(DispSpec.isExpr() && "non-immediate displacement for LEA?");
+ DispSpec.getExpr()->print(O, &MAI);
+ } else {
+ int64_t DispVal = DispSpec.getImm();
+ if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg())) {
+ if (NeedPlus) {
+ if (DispVal > 0)
+ O << " + ";
+ else {
+ O << " - ";
+ DispVal = -DispVal;
+ }
+ }
+ O << formatImm(DispVal);
+ }
+ }
+
+ O << ']';
+}
+
+void X86IntelInstPrinter::printSrcIdx(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ // If this has a segment register, print it.
+ printOptionalSegReg(MI, Op + 1, O);
+ O << '[';
+ printOperand(MI, Op, O);
+ O << ']';
+}
+
+void X86IntelInstPrinter::printDstIdx(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ // DI accesses are always ES-based.
+ O << "es:[";
+ printOperand(MI, Op, O);
+ O << ']';
+}
+
+void X86IntelInstPrinter::printMemOffset(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ const MCOperand &DispSpec = MI->getOperand(Op);
+
+ // If this has a segment register, print it.
+ printOptionalSegReg(MI, Op + 1, O);
+
+ O << '[';
+
+ if (DispSpec.isImm()) {
+ O << formatImm(DispSpec.getImm());
+ } else {
+ assert(DispSpec.isExpr() && "non-immediate displacement?");
+ DispSpec.getExpr()->print(O, &MAI);
+ }
+
+ O << ']';
+}
+
+void X86IntelInstPrinter::printU8Imm(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ if (MI->getOperand(Op).isExpr())
+ return MI->getOperand(Op).getExpr()->print(O, &MAI);
+
+ O << formatImm(MI->getOperand(Op).getImm() & 0xff);
+}
+
+void X86IntelInstPrinter::printSTiRegOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &OS) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ unsigned Reg = Op.getReg();
+ // Override the default printing to print st(0) instead st.
+ if (Reg == X86::ST0)
+ OS << "st(0)";
+ else
+ printRegName(OS, Reg);
+}
diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h b/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.h
index 3b34a8052bec..f32f49f7c417 100644
--- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
+++ b/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.h
@@ -1,9 +1,8 @@
//= X86IntelInstPrinter.h - Convert X86 MCInst to assembly syntax -*- C++ -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -11,8 +10,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIB_TARGET_X86_INSTPRINTER_X86INTELINSTPRINTER_H
-#define LLVM_LIB_TARGET_X86_INSTPRINTER_X86INTELINSTPRINTER_H
+#ifndef LLVM_LIB_TARGET_X86_MCTARGETDESC_X86INTELINSTPRINTER_H
+#define LLVM_LIB_TARGET_X86_MCTARGETDESC_X86INTELINSTPRINTER_H
#include "X86InstPrinterCommon.h"
#include "llvm/Support/raw_ostream.h"
@@ -28,6 +27,13 @@ public:
void printRegName(raw_ostream &OS, unsigned RegNo) const override;
void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot,
const MCSubtargetInfo &STI) override;
+ bool printVecCompareInstr(const MCInst *MI, raw_ostream &OS);
+
+ // Autogenerated by tblgen, returns true if we successfully printed an
+ // alias.
+ bool printAliasInstr(const MCInst *MI, raw_ostream &OS);
+ void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
+ unsigned PrintMethodIdx, raw_ostream &O);
// Autogenerated by tblgen.
void printInstruction(const MCInst *MI, raw_ostream &O);
@@ -39,6 +45,7 @@ public:
void printSrcIdx(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printDstIdx(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printU8Imm(const MCInst *MI, unsigned Op, raw_ostream &O);
+ void printSTiRegOperand(const MCInst *MI, unsigned OpNo, raw_ostream &OS);
void printanymem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
printMemReference(MI, OpNo, O);
@@ -48,58 +55,38 @@ public:
printMemReference(MI, OpNo, O);
}
- void printi8mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ void printbytemem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
O << "byte ptr ";
printMemReference(MI, OpNo, O);
}
- void printi16mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ void printwordmem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
O << "word ptr ";
printMemReference(MI, OpNo, O);
}
- void printi32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ void printdwordmem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
O << "dword ptr ";
printMemReference(MI, OpNo, O);
}
- void printi64mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ void printqwordmem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
O << "qword ptr ";
printMemReference(MI, OpNo, O);
}
- void printi128mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ void printxmmwordmem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
O << "xmmword ptr ";
printMemReference(MI, OpNo, O);
}
- void printi256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ void printymmwordmem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
O << "ymmword ptr ";
printMemReference(MI, OpNo, O);
}
- void printi512mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ void printzmmwordmem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
O << "zmmword ptr ";
printMemReference(MI, OpNo, O);
}
- void printf32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
- O << "dword ptr ";
- printMemReference(MI, OpNo, O);
- }
- void printf64mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
- O << "qword ptr ";
- printMemReference(MI, OpNo, O);
- }
- void printf80mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ void printtbytemem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
O << "tbyte ptr ";
printMemReference(MI, OpNo, O);
}
- void printf128mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
- O << "xmmword ptr ";
- printMemReference(MI, OpNo, O);
- }
- void printf256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
- O << "ymmword ptr ";
- printMemReference(MI, OpNo, O);
- }
- void printf512mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
- O << "zmmword ptr ";
- printMemReference(MI, OpNo, O);
- }
void printSrcIdx8(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
@@ -154,4 +141,4 @@ public:
} // end namespace llvm
-#endif // LLVM_LIB_TARGET_X86_INSTPRINTER_X86INTELINSTPRINTER_H
+#endif // LLVM_LIB_TARGET_X86_MCTARGETDESC_X86INTELINSTPRINTER_H
diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
index fa7c352a1b63..e1125c176b25 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
@@ -1,9 +1,8 @@
//===-- X86MCAsmInfo.cpp - X86 asm properties -----------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h
index 30d5c802d1ed..b2369647a40f 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h
+++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h
@@ -1,9 +1,8 @@
//===-- X86MCAsmInfo.h - X86 asm properties --------------------*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index f5371db9e77a..31d26d08a63f 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -1,9 +1,8 @@
//===-- X86MCCodeEmitter.cpp - Convert X86 code to machine code -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -525,9 +524,23 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
// indirect register encoding, this handles addresses like [EAX]. The
// encoding for [EBP] with no displacement means [disp32] so we handle it
// by emitting a displacement of 0 below.
- if (Disp.isImm() && Disp.getImm() == 0 && BaseRegNo != N86::EBP) {
- EmitByte(ModRMByte(0, RegOpcodeField, BaseRegNo), CurByte, OS);
- return;
+ if (BaseRegNo != N86::EBP) {
+ if (Disp.isImm() && Disp.getImm() == 0) {
+ EmitByte(ModRMByte(0, RegOpcodeField, BaseRegNo), CurByte, OS);
+ return;
+ }
+
+ // If the displacement is @tlscall, treat it as a zero.
+ if (Disp.isExpr()) {
+ auto *Sym = dyn_cast<MCSymbolRefExpr>(Disp.getExpr());
+ if (Sym && Sym->getKind() == MCSymbolRefExpr::VK_TLSCALL) {
+ // This is exclusively used by call *a@tlscall(base). The relocation
+ // (R_386_TLSCALL or R_X86_64_TLSCALL) applies to the beginning.
+ Fixups.push_back(MCFixup::create(0, Sym, FK_NONE, MI.getLoc()));
+ EmitByte(ModRMByte(0, RegOpcodeField, BaseRegNo), CurByte, OS);
+ return;
+ }
+ }
}
// Otherwise, if the displacement fits in a byte, encode as [REG+disp8].
@@ -880,7 +893,8 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
if (HasEVEX_RC) {
unsigned RcOperand = NumOps-1;
assert(RcOperand >= CurOp);
- EVEX_rc = MI.getOperand(RcOperand).getImm() & 0x3;
+ EVEX_rc = MI.getOperand(RcOperand).getImm();
+ assert(EVEX_rc <= 3 && "Invalid rounding control!");
}
EncodeRC = true;
}
@@ -979,7 +993,8 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
uint8_t LastByte = VEX_PP | (VEX_L << 2) | (VEX_4V << 3);
// Can we use the 2 byte VEX prefix?
- if (Encoding == X86II::VEX && VEX_B && VEX_X && !VEX_W && (VEX_5M == 1)) {
+ if (!(MI.getFlags() & X86::IP_USE_VEX3) &&
+ Encoding == X86II::VEX && VEX_B && VEX_X && !VEX_W && (VEX_5M == 1)) {
EmitByte(0xC5, CurByte, OS);
EmitByte(LastByte | (VEX_R << 7), CurByte, OS);
return;
@@ -1060,16 +1075,17 @@ uint8_t X86MCCodeEmitter::DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags,
REX |= isREXExtendedReg(MI, CurOp++) << 0; // REX.B
break;
case X86II::MRMSrcReg:
+ case X86II::MRMSrcRegCC:
REX |= isREXExtendedReg(MI, CurOp++) << 2; // REX.R
REX |= isREXExtendedReg(MI, CurOp++) << 0; // REX.B
break;
- case X86II::MRMSrcMem: {
+ case X86II::MRMSrcMem:
+ case X86II::MRMSrcMemCC:
REX |= isREXExtendedReg(MI, CurOp++) << 2; // REX.R
REX |= isREXExtendedReg(MI, MemOperand+X86::AddrBaseReg) << 0; // REX.B
REX |= isREXExtendedReg(MI, MemOperand+X86::AddrIndexReg) << 1; // REX.X
CurOp += X86::AddrNumOperands;
break;
- }
case X86II::MRMDestReg:
REX |= isREXExtendedReg(MI, CurOp++) << 0; // REX.B
REX |= isREXExtendedReg(MI, CurOp++) << 2; // REX.R
@@ -1080,7 +1096,7 @@ uint8_t X86MCCodeEmitter::DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags,
CurOp += X86::AddrNumOperands;
REX |= isREXExtendedReg(MI, CurOp++) << 2; // REX.R
break;
- case X86II::MRMXm:
+ case X86II::MRMXmCC: case X86II::MRMXm:
case X86II::MRM0m: case X86II::MRM1m:
case X86II::MRM2m: case X86II::MRM3m:
case X86II::MRM4m: case X86II::MRM5m:
@@ -1088,7 +1104,7 @@ uint8_t X86MCCodeEmitter::DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags,
REX |= isREXExtendedReg(MI, MemOperand+X86::AddrBaseReg) << 0; // REX.B
REX |= isREXExtendedReg(MI, MemOperand+X86::AddrIndexReg) << 1; // REX.X
break;
- case X86II::MRMXr:
+ case X86II::MRMXrCC: case X86II::MRMXr:
case X86II::MRM0r: case X86II::MRM1r:
case X86II::MRM2r: case X86II::MRM3r:
case X86II::MRM4r: case X86II::MRM5r:
@@ -1272,6 +1288,8 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS,
if ((TSFlags & X86II::OpMapMask) == X86II::ThreeDNow)
BaseOpcode = 0x0F; // Weird 3DNow! encoding.
+ unsigned OpcodeOffset = 0;
+
uint64_t Form = TSFlags & X86II::FormMask;
switch (Form) {
default: errs() << "FORM: " << Form << "\n";
@@ -1318,8 +1336,14 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS,
EmitByte(BaseOpcode, CurByte, OS);
break;
}
- case X86II::RawFrm: {
- EmitByte(BaseOpcode, CurByte, OS);
+ case X86II::AddCCFrm: {
+ // This will be added to the opcode in the fallthrough.
+ OpcodeOffset = MI.getOperand(NumOps - 1).getImm();
+ assert(OpcodeOffset < 16 && "Unexpected opcode offset!");
+ --NumOps; // Drop the operand from the end.
+ LLVM_FALLTHROUGH;
+ case X86II::RawFrm:
+ EmitByte(BaseOpcode + OpcodeOffset, CurByte, OS);
if (!is64BitMode(STI) || !isPCRel32Branch(MI))
break;
@@ -1436,6 +1460,17 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS,
CurOp = SrcRegNum + 1;
break;
}
+ case X86II::MRMSrcRegCC: {
+ unsigned FirstOp = CurOp++;
+ unsigned SecondOp = CurOp++;
+
+ unsigned CC = MI.getOperand(CurOp++).getImm();
+ EmitByte(BaseOpcode + CC, CurByte, OS);
+
+ EmitRegModRMByte(MI.getOperand(SecondOp),
+ GetX86RegNum(MI.getOperand(FirstOp)), CurByte, OS);
+ break;
+ }
case X86II::MRMSrcMem: {
unsigned FirstMemOp = CurOp+1;
@@ -1481,6 +1516,27 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS,
CurOp = FirstMemOp + X86::AddrNumOperands;
break;
}
+ case X86II::MRMSrcMemCC: {
+ unsigned RegOp = CurOp++;
+ unsigned FirstMemOp = CurOp;
+ CurOp = FirstMemOp + X86::AddrNumOperands;
+
+ unsigned CC = MI.getOperand(CurOp++).getImm();
+ EmitByte(BaseOpcode + CC, CurByte, OS);
+
+ emitMemModRMByte(MI, FirstMemOp, GetX86RegNum(MI.getOperand(RegOp)),
+ TSFlags, Rex, CurByte, OS, Fixups, STI);
+ break;
+ }
+
+ case X86II::MRMXrCC: {
+ unsigned RegOp = CurOp++;
+
+ unsigned CC = MI.getOperand(CurOp++).getImm();
+ EmitByte(BaseOpcode + CC, CurByte, OS);
+ EmitRegModRMByte(MI.getOperand(RegOp), 0, CurByte, OS);
+ break;
+ }
case X86II::MRMXr:
case X86II::MRM0r: case X86II::MRM1r:
@@ -1497,6 +1553,17 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS,
CurByte, OS);
break;
+ case X86II::MRMXmCC: {
+ unsigned FirstMemOp = CurOp;
+ CurOp = FirstMemOp + X86::AddrNumOperands;
+
+ unsigned CC = MI.getOperand(CurOp++).getImm();
+ EmitByte(BaseOpcode + CC, CurByte, OS);
+
+ emitMemModRMByte(MI, FirstMemOp, 0, TSFlags, Rex, CurByte, OS, Fixups, STI);
+ break;
+ }
+
case X86II::MRMXm:
case X86II::MRM0m: case X86II::MRM1m:
case X86II::MRM2m: case X86II::MRM3m:
diff --git a/lib/Target/X86/MCTargetDesc/X86MCExpr.h b/lib/Target/X86/MCTargetDesc/X86MCExpr.h
index 1070f70468fa..532fecd9951b 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCExpr.h
+++ b/lib/Target/X86/MCTargetDesc/X86MCExpr.h
@@ -1,9 +1,8 @@
//=--- X86MCExpr.h - X86 specific MC expression classes ---*- C++ -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -15,7 +14,7 @@
#ifndef LLVM_LIB_TARGET_X86_MCTARGETDESC_X86MCEXPR_H
#define LLVM_LIB_TARGET_X86_MCTARGETDESC_X86MCEXPR_H
-#include "InstPrinter/X86ATTInstPrinter.h"
+#include "X86ATTInstPrinter.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
index ea4aaf14223d..ce05ad974507 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
@@ -1,9 +1,8 @@
//===-- X86MCTargetDesc.cpp - X86 Target Descriptions ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -12,13 +11,15 @@
//===----------------------------------------------------------------------===//
#include "X86MCTargetDesc.h"
-#include "InstPrinter/X86ATTInstPrinter.h"
-#include "InstPrinter/X86IntelInstPrinter.h"
+#include "TargetInfo/X86TargetInfo.h"
+#include "X86ATTInstPrinter.h"
#include "X86BaseInfo.h"
+#include "X86IntelInstPrinter.h"
#include "X86MCAsmInfo.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/Triple.h"
#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCInstrAnalysis.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
@@ -117,6 +118,15 @@ void X86_MC::initLLVMToSEHAndCVRegMapping(MCRegisterInfo *MRI) {
{codeview::RegisterId::ST6, X86::FP6},
{codeview::RegisterId::ST7, X86::FP7},
+ {codeview::RegisterId::MM0, X86::MM0},
+ {codeview::RegisterId::MM1, X86::MM1},
+ {codeview::RegisterId::MM2, X86::MM2},
+ {codeview::RegisterId::MM3, X86::MM3},
+ {codeview::RegisterId::MM4, X86::MM4},
+ {codeview::RegisterId::MM5, X86::MM5},
+ {codeview::RegisterId::MM6, X86::MM6},
+ {codeview::RegisterId::MM7, X86::MM7},
+
{codeview::RegisterId::XMM0, X86::XMM0},
{codeview::RegisterId::XMM1, X86::XMM1},
{codeview::RegisterId::XMM2, X86::XMM2},
diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
index 4e9f5ba60d2e..00dd5908cbf5 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
+++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
@@ -1,9 +1,8 @@
//===-- X86MCTargetDesc.h - X86 Target Descriptions -------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -35,9 +34,6 @@ class StringRef;
class raw_ostream;
class raw_pwrite_stream;
-Target &getTheX86_32Target();
-Target &getTheX86_64Target();
-
/// Flavour of dwarf regnumbers
///
namespace DWARFFlavour {
diff --git a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
index 883278b7bc1f..fc7e99f61e5e 100644
--- a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
@@ -1,9 +1,8 @@
//===-- X86MachObjectWriter.cpp - X86 Mach-O Writer -----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/MCTargetDesc/X86TargetStreamer.h b/lib/Target/X86/MCTargetDesc/X86TargetStreamer.h
index 10a282dd2962..3b1e9e7c34fb 100644
--- a/lib/Target/X86/MCTargetDesc/X86TargetStreamer.h
+++ b/lib/Target/X86/MCTargetDesc/X86TargetStreamer.h
@@ -1,9 +1,8 @@
//===- X86TargetStreamer.h ------------------------------*- C++ -*---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
index 2aec695b2dbf..3baab9da1c41 100644
--- a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
@@ -1,9 +1,8 @@
//===-- X86WinCOFFObjectWriter.cpp - X86 Win COFF Writer ------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
index 0085787e576a..796a27a17255 100644
--- a/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
@@ -1,9 +1,8 @@
//===-- X86WinCOFFStreamer.cpp - X86 Target WinCOFF Streamer ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp
index bee9b7046338..e9987d1f62bd 100644
--- a/lib/Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp
@@ -1,9 +1,8 @@
//===-- X86WinCOFFTargetStreamer.cpp ----------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/ShadowCallStack.cpp b/lib/Target/X86/ShadowCallStack.cpp
deleted file mode 100644
index ab2cebcb58ee..000000000000
--- a/lib/Target/X86/ShadowCallStack.cpp
+++ /dev/null
@@ -1,322 +0,0 @@
-//===------- ShadowCallStack.cpp - Shadow Call Stack pass -----------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// The ShadowCallStack pass instruments function prologs/epilogs to check that
-// the return address has not been corrupted during the execution of the
-// function. The return address is stored in a 'shadow call stack' addressed
-// using the %gs segment register.
-//
-//===----------------------------------------------------------------------===//
-
-#include "X86.h"
-#include "X86InstrBuilder.h"
-#include "X86InstrInfo.h"
-#include "X86Subtarget.h"
-
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/TargetInstrInfo.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-namespace {
-
-class ShadowCallStack : public MachineFunctionPass {
-public:
- static char ID;
-
- ShadowCallStack() : MachineFunctionPass(ID) {
- initializeShadowCallStackPass(*PassRegistry::getPassRegistry());
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-
- bool runOnMachineFunction(MachineFunction &Fn) override;
-
-private:
- // Do not instrument leaf functions with this many or fewer instructions. The
- // shadow call stack instrumented prolog/epilog are slightly race-y reading
- // and checking the saved return address, so it is better to not instrument
- // functions that have fewer instructions than the instrumented prolog/epilog
- // race.
- static const size_t SkipLeafInstructions = 3;
-};
-
-char ShadowCallStack::ID = 0;
-} // end anonymous namespace.
-
-static void addProlog(MachineFunction &Fn, const TargetInstrInfo *TII,
- MachineBasicBlock &MBB, const DebugLoc &DL);
-static void addPrologLeaf(MachineFunction &Fn, const TargetInstrInfo *TII,
- MachineBasicBlock &MBB, const DebugLoc &DL,
- MCPhysReg FreeRegister);
-
-static void addEpilog(const TargetInstrInfo *TII, MachineBasicBlock &MBB,
- MachineInstr &MI, MachineBasicBlock &TrapBB);
-static void addEpilogLeaf(const TargetInstrInfo *TII, MachineBasicBlock &MBB,
- MachineInstr &MI, MachineBasicBlock &TrapBB,
- MCPhysReg FreeRegister);
-// Generate a longer epilog that only uses r10 when a tailcall branches to r11.
-static void addEpilogOnlyR10(const TargetInstrInfo *TII, MachineBasicBlock &MBB,
- MachineInstr &MI, MachineBasicBlock &TrapBB);
-
-// Helper function to add ModR/M references for [Seg: Reg + Offset] memory
-// accesses
-static inline const MachineInstrBuilder &
-addSegmentedMem(const MachineInstrBuilder &MIB, MCPhysReg Seg, MCPhysReg Reg,
- int Offset = 0) {
- return MIB.addReg(Reg).addImm(1).addReg(0).addImm(Offset).addReg(Seg);
-}
-
-static void addProlog(MachineFunction &Fn, const TargetInstrInfo *TII,
- MachineBasicBlock &MBB, const DebugLoc &DL) {
- const MCPhysReg ReturnReg = X86::R10;
- const MCPhysReg OffsetReg = X86::R11;
-
- auto MBBI = MBB.begin();
- // mov r10, [rsp]
- addDirectMem(BuildMI(MBB, MBBI, DL, TII->get(X86::MOV64rm)).addDef(ReturnReg),
- X86::RSP);
- // xor r11, r11
- BuildMI(MBB, MBBI, DL, TII->get(X86::XOR64rr))
- .addDef(OffsetReg)
- .addReg(OffsetReg, RegState::Undef)
- .addReg(OffsetReg, RegState::Undef);
- // add QWORD [gs:r11], 8
- addSegmentedMem(BuildMI(MBB, MBBI, DL, TII->get(X86::ADD64mi8)), X86::GS,
- OffsetReg)
- .addImm(8);
- // mov r11, [gs:r11]
- addSegmentedMem(
- BuildMI(MBB, MBBI, DL, TII->get(X86::MOV64rm)).addDef(OffsetReg), X86::GS,
- OffsetReg);
- // mov [gs:r11], r10
- addSegmentedMem(BuildMI(MBB, MBBI, DL, TII->get(X86::MOV64mr)), X86::GS,
- OffsetReg)
- .addReg(ReturnReg);
-}
-
-static void addPrologLeaf(MachineFunction &Fn, const TargetInstrInfo *TII,
- MachineBasicBlock &MBB, const DebugLoc &DL,
- MCPhysReg FreeRegister) {
- // mov REG, [rsp]
- addDirectMem(BuildMI(MBB, MBB.begin(), DL, TII->get(X86::MOV64rm))
- .addDef(FreeRegister),
- X86::RSP);
-}
-
-static void addEpilog(const TargetInstrInfo *TII, MachineBasicBlock &MBB,
- MachineInstr &MI, MachineBasicBlock &TrapBB) {
- const DebugLoc &DL = MI.getDebugLoc();
-
- // xor r11, r11
- BuildMI(MBB, MI, DL, TII->get(X86::XOR64rr))
- .addDef(X86::R11)
- .addReg(X86::R11, RegState::Undef)
- .addReg(X86::R11, RegState::Undef);
- // mov r10, [gs:r11]
- addSegmentedMem(BuildMI(MBB, MI, DL, TII->get(X86::MOV64rm)).addDef(X86::R10),
- X86::GS, X86::R11);
- // mov r10, [gs:r10]
- addSegmentedMem(BuildMI(MBB, MI, DL, TII->get(X86::MOV64rm)).addDef(X86::R10),
- X86::GS, X86::R10);
- // sub QWORD [gs:r11], 8
- // This instruction should not be moved up to avoid a signal race.
- addSegmentedMem(BuildMI(MBB, MI, DL, TII->get(X86::SUB64mi8)),
- X86::GS, X86::R11)
- .addImm(8);
- // cmp [rsp], r10
- addDirectMem(BuildMI(MBB, MI, DL, TII->get(X86::CMP64mr)), X86::RSP)
- .addReg(X86::R10);
- // jne trap
- BuildMI(MBB, MI, DL, TII->get(X86::JNE_1)).addMBB(&TrapBB);
- MBB.addSuccessor(&TrapBB);
-}
-
-static void addEpilogLeaf(const TargetInstrInfo *TII, MachineBasicBlock &MBB,
- MachineInstr &MI, MachineBasicBlock &TrapBB,
- MCPhysReg FreeRegister) {
- const DebugLoc &DL = MI.getDebugLoc();
-
- // cmp [rsp], REG
- addDirectMem(BuildMI(MBB, MI, DL, TII->get(X86::CMP64mr)), X86::RSP)
- .addReg(FreeRegister);
- // jne trap
- BuildMI(MBB, MI, DL, TII->get(X86::JNE_1)).addMBB(&TrapBB);
- MBB.addSuccessor(&TrapBB);
-}
-
-static void addEpilogOnlyR10(const TargetInstrInfo *TII, MachineBasicBlock &MBB,
- MachineInstr &MI, MachineBasicBlock &TrapBB) {
- const DebugLoc &DL = MI.getDebugLoc();
-
- // xor r10, r10
- BuildMI(MBB, MI, DL, TII->get(X86::XOR64rr))
- .addDef(X86::R10)
- .addReg(X86::R10, RegState::Undef)
- .addReg(X86::R10, RegState::Undef);
- // mov r10, [gs:r10]
- addSegmentedMem(BuildMI(MBB, MI, DL, TII->get(X86::MOV64rm)).addDef(X86::R10),
- X86::GS, X86::R10);
- // mov r10, [gs:r10]
- addSegmentedMem(BuildMI(MBB, MI, DL, TII->get(X86::MOV64rm)).addDef(X86::R10),
- X86::GS, X86::R10);
- // sub QWORD [gs:0], 8
- // This instruction should not be moved up to avoid a signal race.
- addSegmentedMem(BuildMI(MBB, MI, DL, TII->get(X86::SUB64mi8)), X86::GS, 0)
- .addImm(8);
- // cmp [rsp], r10
- addDirectMem(BuildMI(MBB, MI, DL, TII->get(X86::CMP64mr)), X86::RSP)
- .addReg(X86::R10);
- // jne trap
- BuildMI(MBB, MI, DL, TII->get(X86::JNE_1)).addMBB(&TrapBB);
- MBB.addSuccessor(&TrapBB);
-}
-
-bool ShadowCallStack::runOnMachineFunction(MachineFunction &Fn) {
- if (!Fn.getFunction().hasFnAttribute(Attribute::ShadowCallStack) ||
- Fn.getFunction().hasFnAttribute(Attribute::Naked))
- return false;
-
- if (Fn.empty() || !Fn.getRegInfo().tracksLiveness())
- return false;
-
- // FIXME: Skip functions that have r10 or r11 live on entry (r10 can be live
- // on entry for parameters with the nest attribute.)
- if (Fn.front().isLiveIn(X86::R10) || Fn.front().isLiveIn(X86::R11))
- return false;
-
- // FIXME: Skip functions with conditional and r10 tail calls for now.
- bool HasReturn = false;
- for (auto &MBB : Fn) {
- if (MBB.empty())
- continue;
-
- const MachineInstr &MI = MBB.instr_back();
- if (MI.isReturn())
- HasReturn = true;
-
- if (MI.isReturn() && MI.isCall()) {
- if (MI.findRegisterUseOperand(X86::EFLAGS))
- return false;
- // This should only be possible on Windows 64 (see GR64_TC versus
- // GR64_TCW64.)
- if (MI.findRegisterUseOperand(X86::R10) ||
- MI.hasRegisterImplicitUseOperand(X86::R10))
- return false;
- }
- }
-
- if (!HasReturn)
- return false;
-
- // For leaf functions:
- // 1. Do not instrument very short functions where it would not improve that
- // function's security.
- // 2. Detect if there is an unused caller-saved register we can reserve to
- // hold the return address instead of writing/reading it from the shadow
- // call stack.
- MCPhysReg LeafFuncRegister = X86::NoRegister;
- if (!Fn.getFrameInfo().adjustsStack()) {
- size_t InstructionCount = 0;
- std::bitset<X86::NUM_TARGET_REGS> UsedRegs;
- for (auto &MBB : Fn) {
- for (auto &LiveIn : MBB.liveins())
- UsedRegs.set(LiveIn.PhysReg);
- for (auto &MI : MBB) {
- if (!MI.isDebugValue() && !MI.isCFIInstruction() && !MI.isLabel())
- InstructionCount++;
- for (auto &Op : MI.operands())
- if (Op.isReg() && Op.isDef())
- UsedRegs.set(Op.getReg());
- }
- }
-
- if (InstructionCount <= SkipLeafInstructions)
- return false;
-
- std::bitset<X86::NUM_TARGET_REGS> CalleeSavedRegs;
- const MCPhysReg *CSRegs = Fn.getRegInfo().getCalleeSavedRegs();
- for (size_t i = 0; CSRegs[i]; i++)
- CalleeSavedRegs.set(CSRegs[i]);
-
- const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo();
- for (auto &Reg : X86::GR64_NOSPRegClass.getRegisters()) {
- // FIXME: Optimization opportunity: spill/restore a callee-saved register
- // if a caller-saved register is unavailable.
- if (CalleeSavedRegs.test(Reg))
- continue;
-
- bool Used = false;
- for (MCSubRegIterator SR(Reg, TRI, true); SR.isValid(); ++SR)
- if ((Used = UsedRegs.test(*SR)))
- break;
-
- if (!Used) {
- LeafFuncRegister = Reg;
- break;
- }
- }
- }
-
- const bool LeafFuncOptimization = LeafFuncRegister != X86::NoRegister;
- if (LeafFuncOptimization)
- // Mark the leaf function register live-in for all MBBs except the entry MBB
- for (auto I = ++Fn.begin(), E = Fn.end(); I != E; ++I)
- I->addLiveIn(LeafFuncRegister);
-
- MachineBasicBlock &MBB = Fn.front();
- const MachineBasicBlock *NonEmpty = MBB.empty() ? MBB.getFallThrough() : &MBB;
- const DebugLoc &DL = NonEmpty->front().getDebugLoc();
-
- const TargetInstrInfo *TII = Fn.getSubtarget().getInstrInfo();
- if (LeafFuncOptimization)
- addPrologLeaf(Fn, TII, MBB, DL, LeafFuncRegister);
- else
- addProlog(Fn, TII, MBB, DL);
-
- MachineBasicBlock *Trap = nullptr;
- for (auto &MBB : Fn) {
- if (MBB.empty())
- continue;
-
- MachineInstr &MI = MBB.instr_back();
- if (MI.isReturn()) {
- if (!Trap) {
- Trap = Fn.CreateMachineBasicBlock();
- BuildMI(Trap, MI.getDebugLoc(), TII->get(X86::TRAP));
- Fn.push_back(Trap);
- }
-
- if (LeafFuncOptimization)
- addEpilogLeaf(TII, MBB, MI, *Trap, LeafFuncRegister);
- else if (MI.findRegisterUseOperand(X86::R11))
- addEpilogOnlyR10(TII, MBB, MI, *Trap);
- else
- addEpilog(TII, MBB, MI, *Trap);
- }
- }
-
- return true;
-}
-
-INITIALIZE_PASS(ShadowCallStack, "shadow-call-stack", "Shadow Call Stack",
- false, false)
-
-FunctionPass *llvm::createShadowCallStackPass() {
- return new ShadowCallStack();
-}
diff --git a/lib/Target/X86/TargetInfo/X86TargetInfo.cpp b/lib/Target/X86/TargetInfo/X86TargetInfo.cpp
index 16c2b56c48b5..47c41626a666 100644
--- a/lib/Target/X86/TargetInfo/X86TargetInfo.cpp
+++ b/lib/Target/X86/TargetInfo/X86TargetInfo.cpp
@@ -1,13 +1,12 @@
//===-- X86TargetInfo.cpp - X86 Target Implementation ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/X86MCTargetDesc.h"
+#include "TargetInfo/X86TargetInfo.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/lib/Target/X86/TargetInfo/X86TargetInfo.h b/lib/Target/X86/TargetInfo/X86TargetInfo.h
new file mode 100644
index 000000000000..caf6b8d424fc
--- /dev/null
+++ b/lib/Target/X86/TargetInfo/X86TargetInfo.h
@@ -0,0 +1,21 @@
+//===-- X86TargetInfo.h - X86 Target Implementation -------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_X86_TARGETINFO_X86TARGETINFO_H
+#define LLVM_LIB_TARGET_X86_TARGETINFO_X86TARGETINFO_H
+
+namespace llvm {
+
+class Target;
+
+Target &getTheX86_32Target();
+Target &getTheX86_64Target();
+
+}
+
+#endif // LLVM_LIB_TARGET_X86_TARGETINFO_X86TARGETINFO_H
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp
index bed940d0d0e9..48fd3e0b7ab9 100644
--- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp
+++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp
@@ -1,9 +1,8 @@
//===-- X86ShuffleDecode.cpp - X86 shuffle decode logic -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -300,7 +299,7 @@ void DecodeVPERM2X128Mask(unsigned NumElts, unsigned Imm,
unsigned HalfMask = Imm >> (l * 4);
unsigned HalfBegin = (HalfMask & 0x3) * HalfSize;
for (unsigned i = HalfBegin, e = HalfBegin + HalfSize; i != e; ++i)
- ShuffleMask.push_back(HalfMask & 8 ? SM_SentinelZero : (int)i);
+ ShuffleMask.push_back((HalfMask & 8) ? SM_SentinelZero : (int)i);
}
}
@@ -384,7 +383,8 @@ void DecodeVPERMMask(unsigned NumElts, unsigned Imm,
}
void DecodeZeroExtendMask(unsigned SrcScalarBits, unsigned DstScalarBits,
- unsigned NumDstElts, SmallVectorImpl<int> &Mask) {
+ unsigned NumDstElts, bool IsAnyExtend,
+ SmallVectorImpl<int> &Mask) {
unsigned Scale = DstScalarBits / SrcScalarBits;
assert(SrcScalarBits < DstScalarBits &&
"Expected zero extension mask to increase scalar size");
@@ -392,7 +392,7 @@ void DecodeZeroExtendMask(unsigned SrcScalarBits, unsigned DstScalarBits,
for (unsigned i = 0; i != NumDstElts; i++) {
Mask.push_back(i);
for (unsigned j = 1; j != Scale; j++)
- Mask.push_back(SM_SentinelZero);
+ Mask.push_back(IsAnyExtend ? SM_SentinelUndef : SM_SentinelZero);
}
}
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.h b/lib/Target/X86/Utils/X86ShuffleDecode.h
index 85cde14a3241..f52785063071 100644
--- a/lib/Target/X86/Utils/X86ShuffleDecode.h
+++ b/lib/Target/X86/Utils/X86ShuffleDecode.h
@@ -1,9 +1,8 @@
//===-- X86ShuffleDecode.h - X86 shuffle decode logic -----------*-C++-*---===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -137,7 +136,7 @@ void DecodeVPPERMMask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
/// Decode a zero extension instruction as a shuffle mask.
void DecodeZeroExtendMask(unsigned SrcScalarBits, unsigned DstScalarBits,
- unsigned NumDstElts,
+ unsigned NumDstElts, bool IsAnyExtend,
SmallVectorImpl<int> &ShuffleMask);
/// Decode a move lower and zero upper instruction as a shuffle mask.
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index 1c8813815b86..a95f68434d12 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -1,9 +1,8 @@
//===-- X86.h - Top-level interface for X86 representation ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -50,11 +49,6 @@ FunctionPass *createX86FloatingPointStackifierPass();
/// transition penalty between functions encoded with AVX and SSE.
FunctionPass *createX86IssueVZeroUpperPass();
-/// This pass instruments the function prolog to save the return address to a
-/// 'shadow call stack' and the function epilog to check that the return address
-/// did not change during function execution.
-FunctionPass *createShadowCallStackPass();
-
/// This pass inserts ENDBR instructions before indirect jump/call
/// destinations as part of CET IBT mechanism.
FunctionPass *createX86IndirectBranchTrackingPass();
@@ -138,11 +132,12 @@ FunctionPass *createX86SpeculativeLoadHardeningPass();
void initializeEvexToVexInstPassPass(PassRegistry &);
void initializeFixupBWInstPassPass(PassRegistry &);
void initializeFixupLEAPassPass(PassRegistry &);
-void initializeShadowCallStackPass(PassRegistry &);
+void initializeFPSPass(PassRegistry &);
void initializeWinEHStatePassPass(PassRegistry &);
void initializeX86AvoidSFBPassPass(PassRegistry &);
void initializeX86CallFrameOptimizationPass(PassRegistry &);
void initializeX86CmovConverterPassPass(PassRegistry &);
+void initializeX86ExpandPseudoPass(PassRegistry&);
void initializeX86CondBrFoldingPassPass(PassRegistry &);
void initializeX86DomainReassignmentPass(PassRegistry &);
void initializeX86ExecutionDomainFixPass(PassRegistry &);
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index 6b1749fc7500..3112f00c91f2 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -1,9 +1,8 @@
//===-- X86.td - Target definition file for the Intel X86 --*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -40,6 +39,9 @@ def FeatureNOPL : SubtargetFeature<"nopl", "HasNOPL", "true",
def FeatureCMOV : SubtargetFeature<"cmov","HasCMov", "true",
"Enable conditional move instructions">;
+def FeatureCMPXCHG8B : SubtargetFeature<"cx8", "HasCmpxchg8b", "true",
+ "Support CMPXCHG8B instructions">;
+
def FeaturePOPCNT : SubtargetFeature<"popcnt", "HasPOPCNT", "true",
"Support POPCNT instruction">;
@@ -165,9 +167,16 @@ def FeaturePKU : SubtargetFeature<"pku", "HasPKU", "true",
def FeatureVNNI : SubtargetFeature<"avx512vnni", "HasVNNI", "true",
"Enable AVX-512 Vector Neural Network Instructions",
[FeatureAVX512]>;
+def FeatureBF16 : SubtargetFeature<"avx512bf16", "HasBF16", "true",
+ "Support bfloat16 floating point",
+ [FeatureBWI]>;
def FeatureBITALG : SubtargetFeature<"avx512bitalg", "HasBITALG", "true",
"Enable AVX-512 Bit Algorithms",
[FeatureBWI]>;
+def FeatureVP2INTERSECT : SubtargetFeature<"avx512vp2intersect",
+ "HasVP2INTERSECT", "true",
+ "Enable AVX-512 vp2intersect",
+ [FeatureAVX512]>;
def FeaturePCLMUL : SubtargetFeature<"pclmul", "HasPCLMUL", "true",
"Enable packed carry-less multiplication instructions",
[FeatureSSE2]>;
@@ -258,6 +267,8 @@ def FeatureRDPID : SubtargetFeature<"rdpid", "HasRDPID", "true",
"Support RDPID instructions">;
def FeatureWAITPKG : SubtargetFeature<"waitpkg", "HasWAITPKG", "true",
"Wait and pause enhancements">;
+def FeatureENQCMD : SubtargetFeature<"enqcmd", "HasENQCMD", "true",
+ "Has ENQCMD instructions">;
// On some processors, instructions that implicitly take two memory operands are
// slow. In practice, this means that CALL, PUSH, and POP with memory operands
// should be avoided in favor of a MOV + register CALL/PUSH/POP.
@@ -274,7 +285,7 @@ def FeatureSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true",
"INC and DEC instructions are slower than ADD and SUB">;
def FeatureSoftFloat
: SubtargetFeature<"soft-float", "UseSoftFloat", "true",
- "Use software floating point features.">;
+ "Use software floating point features">;
def FeaturePOPCNTFalseDeps : SubtargetFeature<"false-deps-popcnt",
"HasPOPCNTFalseDeps", "true",
"POPCNT has a false dependency on dest register">;
@@ -342,6 +353,12 @@ def FeatureERMSB
"ermsb", "HasERMSB", "true",
"REP MOVS/STOS are fast">;
+// Bulldozer and newer processors can merge CMP/TEST (but not other
+// instructions) with conditional branches.
+def FeatureBranchFusion
+ : SubtargetFeature<"branchfusion", "HasBranchFusion", "true",
+ "CMP/TEST can be fused with conditional branches">;
+
// Sandy Bridge and newer processors have many instructions that can be
// fused with conditional branches and pass through the CPU as a single
// operation.
@@ -355,7 +372,7 @@ def FeatureMacroFusion
// similar to Skylake Server (AVX-512).
def FeatureHasFastGather
: SubtargetFeature<"fast-gather", "HasFastGather", "true",
- "Indicates if gather is reasonably fast.">;
+ "Indicates if gather is reasonably fast">;
def FeaturePrefer256Bit
: SubtargetFeature<"prefer-256-bit", "Prefer256Bit", "true",
@@ -366,7 +383,7 @@ def FeaturePrefer256Bit
def FeatureRetpolineIndirectCalls
: SubtargetFeature<
"retpoline-indirect-calls", "UseRetpolineIndirectCalls", "true",
- "Remove speculation of indirect calls from the generated code.">;
+ "Remove speculation of indirect calls from the generated code">;
// Lower indirect branches and switches either using conditional branch trees
// or using a special construct called a `retpoline` to mitigate potential
@@ -374,7 +391,7 @@ def FeatureRetpolineIndirectCalls
def FeatureRetpolineIndirectBranches
: SubtargetFeature<
"retpoline-indirect-branches", "UseRetpolineIndirectBranches", "true",
- "Remove speculation of indirect branches from the generated code.">;
+ "Remove speculation of indirect branches from the generated code">;
// Deprecated umbrella feature for enabling both `retpoline-indirect-calls` and
// `retpoline-indirect-branches` above.
@@ -382,7 +399,7 @@ def FeatureRetpoline
: SubtargetFeature<"retpoline", "DeprecatedUseRetpoline", "true",
"Remove speculation of indirect branches from the "
"generated code, either by avoiding them entirely or "
- "lowering them with a speculation blocking construct.",
+ "lowering them with a speculation blocking construct",
[FeatureRetpolineIndirectCalls,
FeatureRetpolineIndirectBranches]>;
@@ -395,7 +412,7 @@ def FeatureRetpolineExternalThunk
"When lowering an indirect call or branch using a `retpoline`, rely "
"on the specified user provided thunk rather than emitting one "
"ourselves. Only has effect when combined with some other retpoline "
- "feature.", [FeatureRetpolineIndirectCalls]>;
+ "feature", [FeatureRetpolineIndirectCalls]>;
// Direct Move instructions.
def FeatureMOVDIRI : SubtargetFeature<"movdiri", "HasMOVDIRI", "true",
@@ -405,7 +422,7 @@ def FeatureMOVDIR64B : SubtargetFeature<"movdir64b", "HasMOVDIR64B", "true",
def FeatureFastBEXTR : SubtargetFeature<"fast-bextr", "HasFastBEXTR", "true",
"Indicates that the BEXTR instruction is implemented as a single uop "
- "with good throughput.">;
+ "with good throughput">;
// Combine vector math operations with shuffles into horizontal math
// instructions if a CPU implements horizontal operations (introduced with
@@ -416,12 +433,33 @@ def FeatureFastHorizontalOps
"Prefer horizontal vector math instructions (haddp, phsub, etc.) over "
"normal vector instructions with shuffles", [FeatureSSE3]>;
+def FeatureFastScalarShiftMasks
+ : SubtargetFeature<
+ "fast-scalar-shift-masks", "HasFastScalarShiftMasks", "true",
+ "Prefer a left/right scalar logical shift pair over a shift+and pair">;
+
+def FeatureFastVectorShiftMasks
+ : SubtargetFeature<
+ "fast-vector-shift-masks", "HasFastVectorShiftMasks", "true",
+ "Prefer a left/right vector logical shift pair over a shift+and pair">;
+
// Merge branches using three-way conditional code.
def FeatureMergeToThreeWayBranch : SubtargetFeature<"merge-to-threeway-branch",
"ThreewayBranchProfitable", "true",
"Merge branches to a three-way "
"conditional branch">;
+// Bonnell
+def ProcIntelAtom : SubtargetFeature<"", "X86ProcFamily", "IntelAtom", "">;
+// Silvermont
+def ProcIntelSLM : SubtargetFeature<"", "X86ProcFamily", "IntelSLM", "">;
+// Goldmont
+def ProcIntelGLM : SubtargetFeature<"", "X86ProcFamily", "IntelGLM", "">;
+// Goldmont Plus
+def ProcIntelGLP : SubtargetFeature<"", "X86ProcFamily", "IntelGLP", "">;
+// Tremont
+def ProcIntelTRM : SubtargetFeature<"", "X86ProcFamily", "IntelTRM", "">;
+
//===----------------------------------------------------------------------===//
// Register File Description
//===----------------------------------------------------------------------===//
@@ -440,7 +478,7 @@ include "X86SchedPredicates.td"
def X86InstrInfo : InstrInfo;
//===----------------------------------------------------------------------===//
-// X86 processors supported.
+// X86 Scheduler Models
//===----------------------------------------------------------------------===//
include "X86ScheduleAtom.td"
@@ -454,37 +492,468 @@ include "X86ScheduleBtVer2.td"
include "X86SchedSkylakeClient.td"
include "X86SchedSkylakeServer.td"
-def ProcIntelAtom : SubtargetFeature<"atom", "X86ProcFamily", "IntelAtom",
- "Intel Atom processors">;
-def ProcIntelSLM : SubtargetFeature<"slm", "X86ProcFamily", "IntelSLM",
- "Intel Silvermont processors">;
-def ProcIntelGLM : SubtargetFeature<"glm", "X86ProcFamily", "IntelGLM",
- "Intel Goldmont processors">;
-def ProcIntelGLP : SubtargetFeature<"glp", "X86ProcFamily", "IntelGLP",
- "Intel Goldmont Plus processors">;
-def ProcIntelTRM : SubtargetFeature<"tremont", "X86ProcFamily", "IntelTRM",
- "Intel Tremont processors">;
+//===----------------------------------------------------------------------===//
+// X86 Processor Feature Lists
+//===----------------------------------------------------------------------===//
+
+def ProcessorFeatures {
+ // Nehalem
+ list<SubtargetFeature> NHMInheritableFeatures = [FeatureX87,
+ FeatureCMPXCHG8B,
+ FeatureCMOV,
+ FeatureMMX,
+ FeatureSSE42,
+ FeatureFXSR,
+ FeatureNOPL,
+ Feature64Bit,
+ FeatureCMPXCHG16B,
+ FeaturePOPCNT,
+ FeatureLAHFSAHF,
+ FeatureMacroFusion];
+ list<SubtargetFeature> NHMSpecificFeatures = [];
+ list<SubtargetFeature> NHMFeatures =
+ !listconcat(NHMInheritableFeatures, NHMSpecificFeatures);
+
+ // Westmere
+ list<SubtargetFeature> WSMAdditionalFeatures = [FeaturePCLMUL];
+ list<SubtargetFeature> WSMSpecificFeatures = [];
+ list<SubtargetFeature> WSMInheritableFeatures =
+ !listconcat(NHMInheritableFeatures, WSMAdditionalFeatures);
+ list<SubtargetFeature> WSMFeatures =
+ !listconcat(WSMInheritableFeatures, WSMSpecificFeatures);
+
+ // Sandybridge
+ list<SubtargetFeature> SNBAdditionalFeatures = [FeatureAVX,
+ FeatureSlowDivide64,
+ FeatureXSAVE,
+ FeatureXSAVEOPT,
+ FeatureSlow3OpsLEA,
+ FeatureFastScalarFSQRT,
+ FeatureFastSHLDRotate,
+ FeatureMergeToThreeWayBranch];
+ list<SubtargetFeature> SNBSpecificFeatures = [FeatureSlowUAMem32,
+ FeaturePOPCNTFalseDeps];
+ list<SubtargetFeature> SNBInheritableFeatures =
+ !listconcat(WSMInheritableFeatures, SNBAdditionalFeatures);
+ list<SubtargetFeature> SNBFeatures =
+ !listconcat(SNBInheritableFeatures, SNBSpecificFeatures);
+
+ // Ivybridge
+ list<SubtargetFeature> IVBAdditionalFeatures = [FeatureRDRAND,
+ FeatureF16C,
+ FeatureFSGSBase];
+ list<SubtargetFeature> IVBSpecificFeatures = [FeatureSlowUAMem32,
+ FeaturePOPCNTFalseDeps];
+ list<SubtargetFeature> IVBInheritableFeatures =
+ !listconcat(SNBInheritableFeatures, IVBAdditionalFeatures);
+ list<SubtargetFeature> IVBFeatures =
+ !listconcat(IVBInheritableFeatures, IVBSpecificFeatures);
+
+ // Haswell
+ list<SubtargetFeature> HSWAdditionalFeatures = [FeatureAVX2,
+ FeatureBMI,
+ FeatureBMI2,
+ FeatureERMSB,
+ FeatureFMA,
+ FeatureINVPCID,
+ FeatureLZCNT,
+ FeatureMOVBE,
+ FeatureFastVariableShuffle];
+ list<SubtargetFeature> HSWSpecificFeatures = [FeaturePOPCNTFalseDeps,
+ FeatureLZCNTFalseDeps];
+ list<SubtargetFeature> HSWInheritableFeatures =
+ !listconcat(IVBInheritableFeatures, HSWAdditionalFeatures);
+ list<SubtargetFeature> HSWFeatures =
+ !listconcat(HSWInheritableFeatures, HSWSpecificFeatures);
+
+ // Broadwell
+ list<SubtargetFeature> BDWAdditionalFeatures = [FeatureADX,
+ FeatureRDSEED,
+ FeaturePRFCHW];
+ list<SubtargetFeature> BDWSpecificFeatures = [FeaturePOPCNTFalseDeps,
+ FeatureLZCNTFalseDeps];
+ list<SubtargetFeature> BDWInheritableFeatures =
+ !listconcat(HSWInheritableFeatures, BDWAdditionalFeatures);
+ list<SubtargetFeature> BDWFeatures =
+ !listconcat(BDWInheritableFeatures, BDWSpecificFeatures);
+
+ // Skylake
+ list<SubtargetFeature> SKLAdditionalFeatures = [FeatureAES,
+ FeatureMPX,
+ FeatureXSAVEC,
+ FeatureXSAVES,
+ FeatureCLFLUSHOPT,
+ FeatureFastVectorFSQRT];
+ list<SubtargetFeature> SKLSpecificFeatures = [FeatureHasFastGather,
+ FeaturePOPCNTFalseDeps,
+ FeatureSGX];
+ list<SubtargetFeature> SKLInheritableFeatures =
+ !listconcat(BDWInheritableFeatures, SKLAdditionalFeatures);
+ list<SubtargetFeature> SKLFeatures =
+ !listconcat(SKLInheritableFeatures, SKLSpecificFeatures);
+
+ // Skylake-AVX512
+ list<SubtargetFeature> SKXAdditionalFeatures = [FeatureAVX512,
+ FeatureCDI,
+ FeatureDQI,
+ FeatureBWI,
+ FeatureVLX,
+ FeaturePKU,
+ FeatureCLWB];
+ list<SubtargetFeature> SKXSpecificFeatures = [FeatureHasFastGather,
+ FeaturePOPCNTFalseDeps];
+ list<SubtargetFeature> SKXInheritableFeatures =
+ !listconcat(SKLInheritableFeatures, SKXAdditionalFeatures);
+ list<SubtargetFeature> SKXFeatures =
+ !listconcat(SKXInheritableFeatures, SKXSpecificFeatures);
+
+ // Cascadelake
+ list<SubtargetFeature> CLXAdditionalFeatures = [FeatureVNNI];
+ list<SubtargetFeature> CLXSpecificFeatures = [FeatureHasFastGather,
+ FeaturePOPCNTFalseDeps];
+ list<SubtargetFeature> CLXInheritableFeatures =
+ !listconcat(SKXInheritableFeatures, CLXAdditionalFeatures);
+ list<SubtargetFeature> CLXFeatures =
+ !listconcat(CLXInheritableFeatures, CLXSpecificFeatures);
+
+ // Cooperlake
+ list<SubtargetFeature> CPXAdditionalFeatures = [FeatureBF16];
+ list<SubtargetFeature> CPXSpecificFeatures = [FeatureHasFastGather,
+ FeaturePOPCNTFalseDeps];
+ list<SubtargetFeature> CPXInheritableFeatures =
+ !listconcat(CLXInheritableFeatures, CPXAdditionalFeatures);
+ list<SubtargetFeature> CPXFeatures =
+ !listconcat(CPXInheritableFeatures, CPXSpecificFeatures);
+
+ // Cannonlake
+ list<SubtargetFeature> CNLAdditionalFeatures = [FeatureAVX512,
+ FeatureCDI,
+ FeatureDQI,
+ FeatureBWI,
+ FeatureVLX,
+ FeaturePKU,
+ FeatureVBMI,
+ FeatureIFMA,
+ FeatureSHA,
+ FeatureSGX];
+ list<SubtargetFeature> CNLSpecificFeatures = [FeatureHasFastGather];
+ list<SubtargetFeature> CNLInheritableFeatures =
+ !listconcat(SKLInheritableFeatures, CNLAdditionalFeatures);
+ list<SubtargetFeature> CNLFeatures =
+ !listconcat(CNLInheritableFeatures, CNLSpecificFeatures);
+
+ // Icelake
+ list<SubtargetFeature> ICLAdditionalFeatures = [FeatureBITALG,
+ FeatureVAES,
+ FeatureVBMI2,
+ FeatureVNNI,
+ FeatureVPCLMULQDQ,
+ FeatureVPOPCNTDQ,
+ FeatureGFNI,
+ FeatureCLWB,
+ FeatureRDPID];
+ list<SubtargetFeature> ICLSpecificFeatures = [FeatureHasFastGather];
+ list<SubtargetFeature> ICLInheritableFeatures =
+ !listconcat(CNLInheritableFeatures, ICLAdditionalFeatures);
+ list<SubtargetFeature> ICLFeatures =
+ !listconcat(ICLInheritableFeatures, ICLSpecificFeatures);
+
+ // Icelake Server
+ list<SubtargetFeature> ICXSpecificFeatures = [FeaturePCONFIG,
+ FeatureWBNOINVD,
+ FeatureHasFastGather];
+ list<SubtargetFeature> ICXFeatures =
+ !listconcat(ICLInheritableFeatures, ICXSpecificFeatures);
+
+ // Atom
+ list<SubtargetFeature> AtomInheritableFeatures = [FeatureX87,
+ FeatureCMPXCHG8B,
+ FeatureCMOV,
+ FeatureMMX,
+ FeatureSSSE3,
+ FeatureFXSR,
+ FeatureNOPL,
+ Feature64Bit,
+ FeatureCMPXCHG16B,
+ FeatureMOVBE,
+ FeatureSlowTwoMemOps,
+ FeatureLAHFSAHF];
+ list<SubtargetFeature> AtomSpecificFeatures = [ProcIntelAtom,
+ FeatureSlowUAMem16,
+ FeatureLEAForSP,
+ FeatureSlowDivide32,
+ FeatureSlowDivide64,
+ FeatureLEAUsesAG,
+ FeaturePadShortFunctions];
+ list<SubtargetFeature> AtomFeatures =
+ !listconcat(AtomInheritableFeatures, AtomSpecificFeatures);
+
+ // Silvermont
+ list<SubtargetFeature> SLMAdditionalFeatures = [FeatureSSE42,
+ FeaturePOPCNT,
+ FeaturePCLMUL,
+ FeaturePRFCHW,
+ FeatureSlowLEA,
+ FeatureSlowIncDec,
+ FeatureRDRAND];
+ list<SubtargetFeature> SLMSpecificFeatures = [ProcIntelSLM,
+ FeatureSlowDivide64,
+ FeatureSlowPMULLD,
+ FeaturePOPCNTFalseDeps];
+ list<SubtargetFeature> SLMInheritableFeatures =
+ !listconcat(AtomInheritableFeatures, SLMAdditionalFeatures);
+ list<SubtargetFeature> SLMFeatures =
+ !listconcat(SLMInheritableFeatures, SLMSpecificFeatures);
+
+ // Goldmont
+ list<SubtargetFeature> GLMAdditionalFeatures = [FeatureAES,
+ FeatureMPX,
+ FeatureSHA,
+ FeatureRDSEED,
+ FeatureXSAVE,
+ FeatureXSAVEOPT,
+ FeatureXSAVEC,
+ FeatureXSAVES,
+ FeatureCLFLUSHOPT,
+ FeatureFSGSBase];
+ list<SubtargetFeature> GLMSpecificFeatures = [ProcIntelGLM,
+ FeaturePOPCNTFalseDeps];
+ list<SubtargetFeature> GLMInheritableFeatures =
+ !listconcat(SLMInheritableFeatures, GLMAdditionalFeatures);
+ list<SubtargetFeature> GLMFeatures =
+ !listconcat(GLMInheritableFeatures, GLMSpecificFeatures);
+
+ // Goldmont Plus
+ list<SubtargetFeature> GLPAdditionalFeatures = [FeaturePTWRITE,
+ FeatureRDPID,
+ FeatureSGX];
+ list<SubtargetFeature> GLPSpecificFeatures = [ProcIntelGLP];
+ list<SubtargetFeature> GLPInheritableFeatures =
+ !listconcat(GLMInheritableFeatures, GLPAdditionalFeatures);
+ list<SubtargetFeature> GLPFeatures =
+ !listconcat(GLPInheritableFeatures, GLPSpecificFeatures);
+
+ // Tremont
+ list<SubtargetFeature> TRMAdditionalFeatures = [FeatureCLDEMOTE,
+ FeatureGFNI,
+ FeatureMOVDIRI,
+ FeatureMOVDIR64B,
+ FeatureWAITPKG];
+ list<SubtargetFeature> TRMSpecificFeatures = [ProcIntelTRM];
+ list<SubtargetFeature> TRMFeatures =
+ !listconcat(GLPInheritableFeatures, TRMAdditionalFeatures,
+ TRMSpecificFeatures);
+
+ // Knights Landing
+ list<SubtargetFeature> KNLFeatures = [FeatureX87,
+ FeatureCMPXCHG8B,
+ FeatureCMOV,
+ FeatureMMX,
+ FeatureFXSR,
+ FeatureNOPL,
+ Feature64Bit,
+ FeatureCMPXCHG16B,
+ FeaturePOPCNT,
+ FeatureSlowDivide64,
+ FeaturePCLMUL,
+ FeatureXSAVE,
+ FeatureXSAVEOPT,
+ FeatureLAHFSAHF,
+ FeatureSlow3OpsLEA,
+ FeatureSlowIncDec,
+ FeatureAES,
+ FeatureRDRAND,
+ FeatureF16C,
+ FeatureFSGSBase,
+ FeatureAVX512,
+ FeatureERI,
+ FeatureCDI,
+ FeaturePFI,
+ FeaturePREFETCHWT1,
+ FeatureADX,
+ FeatureRDSEED,
+ FeatureMOVBE,
+ FeatureLZCNT,
+ FeatureBMI,
+ FeatureBMI2,
+ FeatureFMA,
+ FeaturePRFCHW,
+ FeatureSlowTwoMemOps,
+ FeatureFastPartialYMMorZMMWrite,
+ FeatureHasFastGather,
+ FeatureSlowPMADDWD];
+ // TODO Add AVX5124FMAPS/AVX5124VNNIW features
+ list<SubtargetFeature> KNMFeatures =
+ !listconcat(KNLFeatures, [FeatureVPOPCNTDQ]);
+
+
+ // Bobcat
+ list<SubtargetFeature> BtVer1InheritableFeatures = [FeatureX87,
+ FeatureCMPXCHG8B,
+ FeatureCMOV,
+ FeatureMMX,
+ FeatureSSSE3,
+ FeatureSSE4A,
+ FeatureFXSR,
+ FeatureNOPL,
+ Feature64Bit,
+ FeatureCMPXCHG16B,
+ FeaturePRFCHW,
+ FeatureLZCNT,
+ FeaturePOPCNT,
+ FeatureSlowSHLD,
+ FeatureLAHFSAHF,
+ FeatureFast15ByteNOP,
+ FeatureFastScalarShiftMasks,
+ FeatureFastVectorShiftMasks];
+ list<SubtargetFeature> BtVer1Features = BtVer1InheritableFeatures;
+
+ // Jaguar
+ list<SubtargetFeature> BtVer2AdditionalFeatures = [FeatureAVX,
+ FeatureAES,
+ FeaturePCLMUL,
+ FeatureBMI,
+ FeatureF16C,
+ FeatureMOVBE,
+ FeatureXSAVE,
+ FeatureXSAVEOPT];
+ list<SubtargetFeature> BtVer2SpecificFeatures = [FeatureFastLZCNT,
+ FeatureFastBEXTR,
+ FeatureFastPartialYMMorZMMWrite,
+ FeatureFastHorizontalOps];
+ list<SubtargetFeature> BtVer2InheritableFeatures =
+ !listconcat(BtVer1InheritableFeatures, BtVer2AdditionalFeatures);
+ list<SubtargetFeature> BtVer2Features =
+ !listconcat(BtVer2InheritableFeatures, BtVer2SpecificFeatures);
+
+ // Bulldozer
+ list<SubtargetFeature> BdVer1InheritableFeatures = [FeatureX87,
+ FeatureCMPXCHG8B,
+ FeatureCMOV,
+ FeatureXOP,
+ Feature64Bit,
+ FeatureCMPXCHG16B,
+ FeatureAES,
+ FeaturePRFCHW,
+ FeaturePCLMUL,
+ FeatureMMX,
+ FeatureFXSR,
+ FeatureNOPL,
+ FeatureLZCNT,
+ FeaturePOPCNT,
+ FeatureXSAVE,
+ FeatureLWP,
+ FeatureSlowSHLD,
+ FeatureLAHFSAHF,
+ FeatureFast11ByteNOP,
+ FeatureFastScalarShiftMasks,
+ FeatureBranchFusion];
+ list<SubtargetFeature> BdVer1Features = BdVer1InheritableFeatures;
+
+ // PileDriver
+ list<SubtargetFeature> BdVer2AdditionalFeatures = [FeatureF16C,
+ FeatureBMI,
+ FeatureTBM,
+ FeatureFMA,
+ FeatureFastBEXTR];
+ list<SubtargetFeature> BdVer2InheritableFeatures =
+ !listconcat(BdVer1InheritableFeatures, BdVer2AdditionalFeatures);
+ list<SubtargetFeature> BdVer2Features = BdVer2InheritableFeatures;
+
+ // Steamroller
+ list<SubtargetFeature> BdVer3AdditionalFeatures = [FeatureXSAVEOPT,
+ FeatureFSGSBase];
+ list<SubtargetFeature> BdVer3InheritableFeatures =
+ !listconcat(BdVer2InheritableFeatures, BdVer3AdditionalFeatures);
+ list<SubtargetFeature> BdVer3Features = BdVer3InheritableFeatures;
+
+ // Excavator
+ list<SubtargetFeature> BdVer4AdditionalFeatures = [FeatureAVX2,
+ FeatureBMI2,
+ FeatureMWAITX];
+ list<SubtargetFeature> BdVer4InheritableFeatures =
+ !listconcat(BdVer3InheritableFeatures, BdVer4AdditionalFeatures);
+ list<SubtargetFeature> BdVer4Features = BdVer4InheritableFeatures;
+
+
+ // AMD Zen Processors common ISAs
+ list<SubtargetFeature> ZNFeatures = [FeatureADX,
+ FeatureAES,
+ FeatureAVX2,
+ FeatureBMI,
+ FeatureBMI2,
+ FeatureCLFLUSHOPT,
+ FeatureCLZERO,
+ FeatureCMOV,
+ Feature64Bit,
+ FeatureCMPXCHG16B,
+ FeatureF16C,
+ FeatureFMA,
+ FeatureFSGSBase,
+ FeatureFXSR,
+ FeatureNOPL,
+ FeatureFastLZCNT,
+ FeatureLAHFSAHF,
+ FeatureLZCNT,
+ FeatureFastBEXTR,
+ FeatureFast15ByteNOP,
+ FeatureBranchFusion,
+ FeatureFastScalarShiftMasks,
+ FeatureMMX,
+ FeatureMOVBE,
+ FeatureMWAITX,
+ FeaturePCLMUL,
+ FeaturePOPCNT,
+ FeaturePRFCHW,
+ FeatureRDRAND,
+ FeatureRDSEED,
+ FeatureSHA,
+ FeatureSSE4A,
+ FeatureSlowSHLD,
+ FeatureX87,
+ FeatureXSAVE,
+ FeatureXSAVEC,
+ FeatureXSAVEOPT,
+ FeatureXSAVES];
+ list<SubtargetFeature> ZN2AdditionalFeatures = [FeatureCLWB,
+ FeatureRDPID,
+ FeatureWBNOINVD];
+ list<SubtargetFeature> ZN2Features =
+ !listconcat(ZNFeatures, ZN2AdditionalFeatures);
+}
+
+//===----------------------------------------------------------------------===//
+// X86 processors supported.
+//===----------------------------------------------------------------------===//
class Proc<string Name, list<SubtargetFeature> Features>
: ProcessorModel<Name, GenericModel, Features>;
-def : Proc<"generic", [FeatureX87, FeatureSlowUAMem16]>;
+// NOTE: CMPXCHG8B is here for legacy compatbility so that it is only disabled
+// if i386/i486 is specifically requested.
+def : Proc<"generic", [FeatureX87, FeatureSlowUAMem16,
+ FeatureCMPXCHG8B]>;
def : Proc<"i386", [FeatureX87, FeatureSlowUAMem16]>;
def : Proc<"i486", [FeatureX87, FeatureSlowUAMem16]>;
-def : Proc<"i586", [FeatureX87, FeatureSlowUAMem16]>;
-def : Proc<"pentium", [FeatureX87, FeatureSlowUAMem16]>;
-def : Proc<"pentium-mmx", [FeatureX87, FeatureSlowUAMem16, FeatureMMX]>;
-
-def : Proc<"i686", [FeatureX87, FeatureSlowUAMem16, FeatureCMOV]>;
-def : Proc<"pentiumpro", [FeatureX87, FeatureSlowUAMem16, FeatureCMOV,
- FeatureNOPL]>;
-
-def : Proc<"pentium2", [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
- FeatureCMOV, FeatureFXSR, FeatureNOPL]>;
+def : Proc<"i586", [FeatureX87, FeatureSlowUAMem16,
+ FeatureCMPXCHG8B]>;
+def : Proc<"pentium", [FeatureX87, FeatureSlowUAMem16,
+ FeatureCMPXCHG8B]>;
+def : Proc<"pentium-mmx", [FeatureX87, FeatureSlowUAMem16,
+ FeatureCMPXCHG8B, FeatureMMX]>;
+
+def : Proc<"i686", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
+ FeatureCMOV]>;
+def : Proc<"pentiumpro", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
+ FeatureCMOV, FeatureNOPL]>;
+
+def : Proc<"pentium2", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
+ FeatureMMX, FeatureCMOV, FeatureFXSR,
+ FeatureNOPL]>;
foreach P = ["pentium3", "pentium3m"] in {
- def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE1,
- FeatureFXSR, FeatureNOPL, FeatureCMOV]>;
+ def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,FeatureMMX,
+ FeatureSSE1, FeatureFXSR, FeatureNOPL, FeatureCMOV]>;
}
// Enable the PostRAScheduler for SSE2 and SSE3 class cpus.
@@ -498,13 +967,15 @@ foreach P = ["pentium3", "pentium3m"] in {
// changes slightly.
def : ProcessorModel<"pentium-m", GenericPostRAModel,
- [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
- FeatureSSE2, FeatureFXSR, FeatureNOPL, FeatureCMOV]>;
+ [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
+ FeatureMMX, FeatureSSE2, FeatureFXSR, FeatureNOPL,
+ FeatureCMOV]>;
foreach P = ["pentium4", "pentium4m"] in {
def : ProcessorModel<P, GenericPostRAModel,
- [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
- FeatureSSE2, FeatureFXSR, FeatureNOPL, FeatureCMOV]>;
+ [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
+ FeatureMMX, FeatureSSE2, FeatureFXSR, FeatureNOPL,
+ FeatureCMOV]>;
}
// Intel Quark.
@@ -512,16 +983,19 @@ def : Proc<"lakemont", []>;
// Intel Core Duo.
def : ProcessorModel<"yonah", SandyBridgeModel,
- [FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE3,
- FeatureFXSR, FeatureNOPL, FeatureCMOV]>;
+ [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
+ FeatureMMX, FeatureSSE3, FeatureFXSR, FeatureNOPL,
+ FeatureCMOV]>;
// NetBurst.
def : ProcessorModel<"prescott", GenericPostRAModel,
- [FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE3,
- FeatureFXSR, FeatureNOPL, FeatureCMOV]>;
+ [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
+ FeatureMMX, FeatureSSE3, FeatureFXSR, FeatureNOPL,
+ FeatureCMOV]>;
def : ProcessorModel<"nocona", GenericPostRAModel, [
FeatureX87,
FeatureSlowUAMem16,
+ FeatureCMPXCHG8B,
FeatureCMOV,
FeatureMMX,
FeatureSSE3,
@@ -535,6 +1009,7 @@ def : ProcessorModel<"nocona", GenericPostRAModel, [
def : ProcessorModel<"core2", SandyBridgeModel, [
FeatureX87,
FeatureSlowUAMem16,
+ FeatureCMPXCHG8B,
FeatureCMOV,
FeatureMMX,
FeatureSSSE3,
@@ -548,6 +1023,7 @@ def : ProcessorModel<"core2", SandyBridgeModel, [
def : ProcessorModel<"penryn", SandyBridgeModel, [
FeatureX87,
FeatureSlowUAMem16,
+ FeatureCMPXCHG8B,
FeatureCMOV,
FeatureMMX,
FeatureSSE41,
@@ -560,638 +1036,131 @@ def : ProcessorModel<"penryn", SandyBridgeModel, [
]>;
// Atom CPUs.
-class BonnellProc<string Name> : ProcessorModel<Name, AtomModel, [
- ProcIntelAtom,
- FeatureX87,
- FeatureSlowUAMem16,
- FeatureCMOV,
- FeatureMMX,
- FeatureSSSE3,
- FeatureFXSR,
- FeatureNOPL,
- Feature64Bit,
- FeatureCMPXCHG16B,
- FeatureMOVBE,
- FeatureLEAForSP,
- FeatureSlowDivide32,
- FeatureSlowDivide64,
- FeatureSlowTwoMemOps,
- FeatureLEAUsesAG,
- FeaturePadShortFunctions,
- FeatureLAHFSAHF
-]>;
-def : BonnellProc<"bonnell">;
-def : BonnellProc<"atom">; // Pin the generic name to the baseline.
-
-class SilvermontProc<string Name> : ProcessorModel<Name, SLMModel, [
- ProcIntelSLM,
- FeatureX87,
- FeatureCMOV,
- FeatureMMX,
- FeatureSSE42,
- FeatureFXSR,
- FeatureNOPL,
- Feature64Bit,
- FeatureCMPXCHG16B,
- FeatureMOVBE,
- FeaturePOPCNT,
- FeaturePCLMUL,
- FeatureSlowDivide64,
- FeatureSlowTwoMemOps,
- FeaturePRFCHW,
- FeatureSlowLEA,
- FeatureSlowIncDec,
- FeatureSlowPMULLD,
- FeatureRDRAND,
- FeatureLAHFSAHF,
- FeaturePOPCNTFalseDeps
-]>;
-def : SilvermontProc<"silvermont">;
-def : SilvermontProc<"slm">; // Legacy alias.
-
-class ProcessorFeatures<list<SubtargetFeature> Inherited,
- list<SubtargetFeature> NewFeatures> {
- list<SubtargetFeature> Value = !listconcat(Inherited, NewFeatures);
+foreach P = ["bonnell", "atom"] in {
+ def : ProcessorModel<P, AtomModel, ProcessorFeatures.AtomFeatures>;
}
-class ProcModel<string Name, SchedMachineModel Model,
- list<SubtargetFeature> ProcFeatures,
- list<SubtargetFeature> OtherFeatures> :
- ProcessorModel<Name, Model, !listconcat(ProcFeatures, OtherFeatures)>;
-
-def GLMFeatures : ProcessorFeatures<[], [
- FeatureX87,
- FeatureCMOV,
- FeatureMMX,
- FeatureSSE42,
- FeatureFXSR,
- FeatureNOPL,
- Feature64Bit,
- FeatureCMPXCHG16B,
- FeatureMOVBE,
- FeaturePOPCNT,
- FeaturePCLMUL,
- FeatureAES,
- FeaturePRFCHW,
- FeatureSlowTwoMemOps,
- FeatureSlowLEA,
- FeatureSlowIncDec,
- FeatureLAHFSAHF,
- FeatureMPX,
- FeatureSHA,
- FeatureRDRAND,
- FeatureRDSEED,
- FeatureXSAVE,
- FeatureXSAVEOPT,
- FeatureXSAVEC,
- FeatureXSAVES,
- FeatureCLFLUSHOPT,
- FeatureFSGSBase
-]>;
+foreach P = ["silvermont", "slm"] in {
+ def : ProcessorModel<P, SLMModel, ProcessorFeatures.SLMFeatures>;
+}
-class GoldmontProc<string Name> : ProcModel<Name, SLMModel,
- GLMFeatures.Value, [
- ProcIntelGLM,
- FeaturePOPCNTFalseDeps
-]>;
-def : GoldmontProc<"goldmont">;
-
-def GLPFeatures : ProcessorFeatures<GLMFeatures.Value, [
- FeaturePTWRITE,
- FeatureRDPID,
- FeatureSGX
-]>;
-
-class GoldmontPlusProc<string Name> : ProcModel<Name, SLMModel,
- GLPFeatures.Value, [
- ProcIntelGLP
-]>;
-def : GoldmontPlusProc<"goldmont-plus">;
-
-class TremontProc<string Name> : ProcModel<Name, SLMModel,
- GLPFeatures.Value, [
- ProcIntelTRM,
- FeatureCLDEMOTE,
- FeatureGFNI,
- FeatureMOVDIRI,
- FeatureMOVDIR64B,
- FeatureWAITPKG
-]>;
-def : TremontProc<"tremont">;
+def : ProcessorModel<"goldmont", SLMModel, ProcessorFeatures.GLMFeatures>;
+def : ProcessorModel<"goldmont-plus", SLMModel, ProcessorFeatures.GLPFeatures>;
+def : ProcessorModel<"tremont", SLMModel, ProcessorFeatures.TRMFeatures>;
// "Arrandale" along with corei3 and corei5
-class NehalemProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
- FeatureX87,
- FeatureCMOV,
- FeatureMMX,
- FeatureSSE42,
- FeatureFXSR,
- FeatureNOPL,
- Feature64Bit,
- FeatureCMPXCHG16B,
- FeaturePOPCNT,
- FeatureLAHFSAHF,
- FeatureMacroFusion
-]>;
-def : NehalemProc<"nehalem">;
-def : NehalemProc<"corei7">;
+foreach P = ["nehalem", "corei7"] in {
+ def : ProcessorModel<P, SandyBridgeModel, ProcessorFeatures.NHMFeatures>;
+}
-// Westmere is a similar machine to nehalem with some additional features.
// Westmere is the corei3/i5/i7 path from nehalem to sandybridge
-class WestmereProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
- FeatureX87,
- FeatureCMOV,
- FeatureMMX,
- FeatureSSE42,
- FeatureFXSR,
- FeatureNOPL,
- Feature64Bit,
- FeatureCMPXCHG16B,
- FeaturePOPCNT,
- FeaturePCLMUL,
- FeatureLAHFSAHF,
- FeatureMacroFusion
-]>;
-def : WestmereProc<"westmere">;
-
-// SSE is not listed here since llvm treats AVX as a reimplementation of SSE,
-// rather than a superset.
-def SNBFeatures : ProcessorFeatures<[], [
- FeatureX87,
- FeatureCMOV,
- FeatureMMX,
- FeatureAVX,
- FeatureFXSR,
- FeatureNOPL,
- Feature64Bit,
- FeatureCMPXCHG16B,
- FeaturePOPCNT,
- FeatureSlowDivide64,
- FeaturePCLMUL,
- FeatureXSAVE,
- FeatureXSAVEOPT,
- FeatureLAHFSAHF,
- FeatureSlow3OpsLEA,
- FeatureFastScalarFSQRT,
- FeatureFastSHLDRotate,
- FeatureSlowIncDec,
- FeatureMergeToThreeWayBranch,
- FeatureMacroFusion
-]>;
-
-class SandyBridgeProc<string Name> : ProcModel<Name, SandyBridgeModel,
- SNBFeatures.Value, [
- FeatureSlowUAMem32,
- FeaturePOPCNTFalseDeps
-]>;
-def : SandyBridgeProc<"sandybridge">;
-def : SandyBridgeProc<"corei7-avx">; // Legacy alias.
-
-def IVBFeatures : ProcessorFeatures<SNBFeatures.Value, [
- FeatureRDRAND,
- FeatureF16C,
- FeatureFSGSBase
-]>;
-
-class IvyBridgeProc<string Name> : ProcModel<Name, SandyBridgeModel,
- IVBFeatures.Value, [
- FeatureSlowUAMem32,
- FeaturePOPCNTFalseDeps
-]>;
-def : IvyBridgeProc<"ivybridge">;
-def : IvyBridgeProc<"core-avx-i">; // Legacy alias.
-
-def HSWFeatures : ProcessorFeatures<IVBFeatures.Value, [
- FeatureAVX2,
- FeatureBMI,
- FeatureBMI2,
- FeatureERMSB,
- FeatureFMA,
- FeatureINVPCID,
- FeatureLZCNT,
- FeatureMOVBE,
- FeatureFastVariableShuffle
-]>;
-
-class HaswellProc<string Name> : ProcModel<Name, HaswellModel,
- HSWFeatures.Value, [
- FeaturePOPCNTFalseDeps,
- FeatureLZCNTFalseDeps
-]>;
-def : HaswellProc<"haswell">;
-def : HaswellProc<"core-avx2">; // Legacy alias.
+def : ProcessorModel<"westmere", SandyBridgeModel,
+ ProcessorFeatures.WSMFeatures>;
-def BDWFeatures : ProcessorFeatures<HSWFeatures.Value, [
- FeatureADX,
- FeatureRDSEED,
- FeaturePRFCHW
-]>;
-class BroadwellProc<string Name> : ProcModel<Name, BroadwellModel,
- BDWFeatures.Value, [
- FeaturePOPCNTFalseDeps,
- FeatureLZCNTFalseDeps
-]>;
-def : BroadwellProc<"broadwell">;
-
-def SKLFeatures : ProcessorFeatures<BDWFeatures.Value, [
- FeatureAES,
- FeatureMPX,
- FeatureXSAVEC,
- FeatureXSAVES,
- FeatureCLFLUSHOPT,
- FeatureFastVectorFSQRT
-]>;
-
-class SkylakeClientProc<string Name> : ProcModel<Name, SkylakeClientModel,
- SKLFeatures.Value, [
- FeatureHasFastGather,
- FeaturePOPCNTFalseDeps,
- FeatureSGX
-]>;
-def : SkylakeClientProc<"skylake">;
+foreach P = ["sandybridge", "corei7-avx"] in {
+ def : ProcessorModel<P, SandyBridgeModel, ProcessorFeatures.SNBFeatures>;
+}
-def KNLFeatures : ProcessorFeatures<[], [
- FeatureX87,
- FeatureCMOV,
- FeatureMMX,
- FeatureFXSR,
- FeatureNOPL,
- Feature64Bit,
- FeatureCMPXCHG16B,
- FeaturePOPCNT,
- FeatureSlowDivide64,
- FeaturePCLMUL,
- FeatureXSAVE,
- FeatureXSAVEOPT,
- FeatureLAHFSAHF,
- FeatureSlow3OpsLEA,
- FeatureSlowIncDec,
- FeatureAES,
- FeatureRDRAND,
- FeatureF16C,
- FeatureFSGSBase,
- FeatureAVX512,
- FeatureERI,
- FeatureCDI,
- FeaturePFI,
- FeaturePREFETCHWT1,
- FeatureADX,
- FeatureRDSEED,
- FeatureMOVBE,
- FeatureLZCNT,
- FeatureBMI,
- FeatureBMI2,
- FeatureFMA,
- FeaturePRFCHW
-]>;
+foreach P = ["ivybridge", "core-avx-i"] in {
+ def : ProcessorModel<P, SandyBridgeModel, ProcessorFeatures.IVBFeatures>;
+}
-// FIXME: define KNL model
-class KnightsLandingProc<string Name> : ProcModel<Name, HaswellModel,
- KNLFeatures.Value, [
- FeatureSlowTwoMemOps,
- FeatureFastPartialYMMorZMMWrite,
- FeatureHasFastGather,
- FeatureSlowPMADDWD
-]>;
-def : KnightsLandingProc<"knl">;
-
-class KnightsMillProc<string Name> : ProcModel<Name, HaswellModel,
- KNLFeatures.Value, [
- FeatureSlowTwoMemOps,
- FeatureFastPartialYMMorZMMWrite,
- FeatureHasFastGather,
- FeatureSlowPMADDWD,
- FeatureVPOPCNTDQ
-]>;
-def : KnightsMillProc<"knm">; // TODO Add AVX5124FMAPS/AVX5124VNNIW features
-
-def SKXFeatures : ProcessorFeatures<SKLFeatures.Value, [
- FeatureAVX512,
- FeatureCDI,
- FeatureDQI,
- FeatureBWI,
- FeatureVLX,
- FeaturePKU,
- FeatureCLWB
-]>;
+foreach P = ["haswell", "core-avx2"] in {
+ def : ProcessorModel<P, HaswellModel, ProcessorFeatures.HSWFeatures>;
+}
-class SkylakeServerProc<string Name> : ProcModel<Name, SkylakeServerModel,
- SKXFeatures.Value, [
- FeatureHasFastGather,
- FeaturePOPCNTFalseDeps
-]>;
-def : SkylakeServerProc<"skylake-avx512">;
-def : SkylakeServerProc<"skx">; // Legacy alias.
+def : ProcessorModel<"broadwell", BroadwellModel,
+ ProcessorFeatures.BDWFeatures>;
-def CLXFeatures : ProcessorFeatures<SKXFeatures.Value, [
- FeatureVNNI
-]>;
+def : ProcessorModel<"skylake", SkylakeClientModel,
+ ProcessorFeatures.SKLFeatures>;
-class CascadelakeProc<string Name> : ProcModel<Name, SkylakeServerModel,
- CLXFeatures.Value, [
- FeatureHasFastGather,
- FeaturePOPCNTFalseDeps
-]>;
-def : CascadelakeProc<"cascadelake">;
-
-def CNLFeatures : ProcessorFeatures<SKLFeatures.Value, [
- FeatureAVX512,
- FeatureCDI,
- FeatureDQI,
- FeatureBWI,
- FeatureVLX,
- FeaturePKU,
- FeatureVBMI,
- FeatureIFMA,
- FeatureSHA,
- FeatureSGX
-]>;
+// FIXME: define KNL scheduler model
+def : ProcessorModel<"knl", HaswellModel, ProcessorFeatures.KNLFeatures>;
+def : ProcessorModel<"knm", HaswellModel, ProcessorFeatures.KNMFeatures>;
-class CannonlakeProc<string Name> : ProcModel<Name, SkylakeServerModel,
- CNLFeatures.Value, [
- FeatureHasFastGather
-]>;
-def : CannonlakeProc<"cannonlake">;
-
-def ICLFeatures : ProcessorFeatures<CNLFeatures.Value, [
- FeatureBITALG,
- FeatureVAES,
- FeatureVBMI2,
- FeatureVNNI,
- FeatureVPCLMULQDQ,
- FeatureVPOPCNTDQ,
- FeatureGFNI,
- FeatureCLWB,
- FeatureRDPID
-]>;
-
-class IcelakeClientProc<string Name> : ProcModel<Name, SkylakeServerModel,
- ICLFeatures.Value, [
- FeatureHasFastGather
-]>;
-def : IcelakeClientProc<"icelake-client">;
+foreach P = ["skylake-avx512", "skx"] in {
+ def : ProcessorModel<P, SkylakeServerModel, ProcessorFeatures.SKXFeatures>;
+}
-class IcelakeServerProc<string Name> : ProcModel<Name, SkylakeServerModel,
- ICLFeatures.Value, [
- FeaturePCONFIG,
- FeatureWBNOINVD,
- FeatureHasFastGather
-]>;
-def : IcelakeServerProc<"icelake-server">;
+def : ProcessorModel<"cascadelake", SkylakeServerModel,
+ ProcessorFeatures.CLXFeatures>;
+def : ProcessorModel<"cooperlake", SkylakeServerModel,
+ ProcessorFeatures.CPXFeatures>;
+def : ProcessorModel<"cannonlake", SkylakeServerModel,
+ ProcessorFeatures.CNLFeatures>;
+def : ProcessorModel<"icelake-client", SkylakeServerModel,
+ ProcessorFeatures.ICLFeatures>;
+def : ProcessorModel<"icelake-server", SkylakeServerModel,
+ ProcessorFeatures.ICXFeatures>;
// AMD CPUs.
-def : Proc<"k6", [FeatureX87, FeatureSlowUAMem16, FeatureMMX]>;
-def : Proc<"k6-2", [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>;
-def : Proc<"k6-3", [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>;
+def : Proc<"k6", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
+ FeatureMMX]>;
+def : Proc<"k6-2", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
+ Feature3DNow]>;
+def : Proc<"k6-3", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
+ Feature3DNow]>;
foreach P = ["athlon", "athlon-tbird"] in {
- def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMOV, Feature3DNowA,
- FeatureNOPL, FeatureSlowSHLD]>;
+ def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, FeatureCMOV,
+ Feature3DNowA, FeatureNOPL, FeatureSlowSHLD]>;
}
foreach P = ["athlon-4", "athlon-xp", "athlon-mp"] in {
- def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMOV, FeatureSSE1,
- Feature3DNowA, FeatureFXSR, FeatureNOPL, FeatureSlowSHLD]>;
+ def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, FeatureCMOV,
+ FeatureSSE1, Feature3DNowA, FeatureFXSR, FeatureNOPL,
+ FeatureSlowSHLD]>;
}
foreach P = ["k8", "opteron", "athlon64", "athlon-fx"] in {
- def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureSSE2, Feature3DNowA,
- FeatureFXSR, FeatureNOPL, Feature64Bit, FeatureSlowSHLD,
- FeatureCMOV]>;
+ def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
+ FeatureSSE2, Feature3DNowA, FeatureFXSR, FeatureNOPL,
+ Feature64Bit, FeatureSlowSHLD, FeatureCMOV,
+ FeatureFastScalarShiftMasks]>;
}
foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in {
- def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureSSE3, Feature3DNowA,
- FeatureFXSR, FeatureNOPL, FeatureCMPXCHG16B, FeatureSlowSHLD,
- FeatureCMOV, Feature64Bit]>;
+ def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, FeatureSSE3,
+ Feature3DNowA, FeatureFXSR, FeatureNOPL, FeatureCMPXCHG16B,
+ FeatureSlowSHLD, FeatureCMOV, Feature64Bit,
+ FeatureFastScalarShiftMasks]>;
}
foreach P = ["amdfam10", "barcelona"] in {
- def : Proc<P, [FeatureX87, FeatureSSE4A, Feature3DNowA, FeatureFXSR,
- FeatureNOPL, FeatureCMPXCHG16B, FeatureLZCNT, FeaturePOPCNT,
- FeatureSlowSHLD, FeatureLAHFSAHF, FeatureCMOV, Feature64Bit]>;
+ def : Proc<P, [FeatureX87, FeatureCMPXCHG8B, FeatureSSE4A, Feature3DNowA,
+ FeatureFXSR, FeatureNOPL, FeatureCMPXCHG16B, FeatureLZCNT,
+ FeaturePOPCNT, FeatureSlowSHLD, FeatureLAHFSAHF, FeatureCMOV,
+ Feature64Bit, FeatureFastScalarShiftMasks]>;
}
// Bobcat
-def : Proc<"btver1", [
- FeatureX87,
- FeatureCMOV,
- FeatureMMX,
- FeatureSSSE3,
- FeatureSSE4A,
- FeatureFXSR,
- FeatureNOPL,
- Feature64Bit,
- FeatureCMPXCHG16B,
- FeaturePRFCHW,
- FeatureLZCNT,
- FeaturePOPCNT,
- FeatureSlowSHLD,
- FeatureLAHFSAHF,
- FeatureFast15ByteNOP
-]>;
-
+def : Proc<"btver1", ProcessorFeatures.BtVer1Features>;
// Jaguar
-def : ProcessorModel<"btver2", BtVer2Model, [
- FeatureX87,
- FeatureCMOV,
- FeatureMMX,
- FeatureAVX,
- FeatureFXSR,
- FeatureNOPL,
- FeatureSSE4A,
- Feature64Bit,
- FeatureCMPXCHG16B,
- FeaturePRFCHW,
- FeatureAES,
- FeaturePCLMUL,
- FeatureBMI,
- FeatureF16C,
- FeatureMOVBE,
- FeatureLZCNT,
- FeatureFastLZCNT,
- FeaturePOPCNT,
- FeatureXSAVE,
- FeatureXSAVEOPT,
- FeatureSlowSHLD,
- FeatureLAHFSAHF,
- FeatureFast15ByteNOP,
- FeatureFastBEXTR,
- FeatureFastPartialYMMorZMMWrite,
- FeatureFastHorizontalOps
-]>;
+def : ProcessorModel<"btver2", BtVer2Model, ProcessorFeatures.BtVer2Features>;
// Bulldozer
-def : ProcessorModel<"bdver1", BdVer2Model, [
- FeatureX87,
- FeatureCMOV,
- FeatureXOP,
- FeatureFMA4,
- Feature64Bit,
- FeatureCMPXCHG16B,
- FeatureAES,
- FeaturePRFCHW,
- FeaturePCLMUL,
- FeatureMMX,
- FeatureAVX,
- FeatureFXSR,
- FeatureNOPL,
- FeatureSSE4A,
- FeatureLZCNT,
- FeaturePOPCNT,
- FeatureXSAVE,
- FeatureLWP,
- FeatureSlowSHLD,
- FeatureLAHFSAHF,
- FeatureFast11ByteNOP,
- FeatureMacroFusion
-]>;
+def : ProcessorModel<"bdver1", BdVer2Model, ProcessorFeatures.BdVer1Features>;
// Piledriver
-def : ProcessorModel<"bdver2", BdVer2Model, [
- FeatureX87,
- FeatureCMOV,
- FeatureXOP,
- FeatureFMA4,
- Feature64Bit,
- FeatureCMPXCHG16B,
- FeatureAES,
- FeaturePRFCHW,
- FeaturePCLMUL,
- FeatureMMX,
- FeatureAVX,
- FeatureFXSR,
- FeatureNOPL,
- FeatureSSE4A,
- FeatureF16C,
- FeatureLZCNT,
- FeaturePOPCNT,
- FeatureXSAVE,
- FeatureBMI,
- FeatureTBM,
- FeatureLWP,
- FeatureFMA,
- FeatureSlowSHLD,
- FeatureLAHFSAHF,
- FeatureFast11ByteNOP,
- FeatureFastBEXTR,
- FeatureMacroFusion
-]>;
-
+def : ProcessorModel<"bdver2", BdVer2Model, ProcessorFeatures.BdVer2Features>;
// Steamroller
-def : Proc<"bdver3", [
- FeatureX87,
- FeatureCMOV,
- FeatureXOP,
- FeatureFMA4,
- Feature64Bit,
- FeatureCMPXCHG16B,
- FeatureAES,
- FeaturePRFCHW,
- FeaturePCLMUL,
- FeatureMMX,
- FeatureAVX,
- FeatureFXSR,
- FeatureNOPL,
- FeatureSSE4A,
- FeatureF16C,
- FeatureLZCNT,
- FeaturePOPCNT,
- FeatureXSAVE,
- FeatureBMI,
- FeatureTBM,
- FeatureLWP,
- FeatureFMA,
- FeatureXSAVEOPT,
- FeatureSlowSHLD,
- FeatureFSGSBase,
- FeatureLAHFSAHF,
- FeatureFast11ByteNOP,
- FeatureFastBEXTR,
- FeatureMacroFusion
-]>;
-
+def : Proc<"bdver3", ProcessorFeatures.BdVer3Features>;
// Excavator
-def : Proc<"bdver4", [
- FeatureX87,
- FeatureCMOV,
- FeatureMMX,
- FeatureAVX2,
- FeatureFXSR,
- FeatureNOPL,
- FeatureXOP,
- FeatureFMA4,
- Feature64Bit,
- FeatureCMPXCHG16B,
- FeatureAES,
- FeaturePRFCHW,
- FeaturePCLMUL,
- FeatureF16C,
- FeatureLZCNT,
- FeaturePOPCNT,
- FeatureXSAVE,
- FeatureBMI,
- FeatureBMI2,
- FeatureTBM,
- FeatureLWP,
- FeatureFMA,
- FeatureXSAVEOPT,
- FeatureSlowSHLD,
- FeatureFSGSBase,
- FeatureLAHFSAHF,
- FeatureFastBEXTR,
- FeatureFast11ByteNOP,
- FeatureMWAITX,
- FeatureMacroFusion
-]>;
+def : Proc<"bdver4", ProcessorFeatures.BdVer4Features>;
-// Znver1
-def: ProcessorModel<"znver1", Znver1Model, [
- FeatureADX,
- FeatureAES,
- FeatureAVX2,
- FeatureBMI,
- FeatureBMI2,
- FeatureCLFLUSHOPT,
- FeatureCLZERO,
- FeatureCMOV,
- Feature64Bit,
- FeatureCMPXCHG16B,
- FeatureF16C,
- FeatureFMA,
- FeatureFSGSBase,
- FeatureFXSR,
- FeatureNOPL,
- FeatureFastLZCNT,
- FeatureLAHFSAHF,
- FeatureLZCNT,
- FeatureFastBEXTR,
- FeatureFast15ByteNOP,
- FeatureMacroFusion,
- FeatureMMX,
- FeatureMOVBE,
- FeatureMWAITX,
- FeaturePCLMUL,
- FeaturePOPCNT,
- FeaturePRFCHW,
- FeatureRDRAND,
- FeatureRDSEED,
- FeatureSHA,
- FeatureSSE4A,
- FeatureSlowSHLD,
- FeatureX87,
- FeatureXSAVE,
- FeatureXSAVEC,
- FeatureXSAVEOPT,
- FeatureXSAVES]>;
+def : ProcessorModel<"znver1", Znver1Model, ProcessorFeatures.ZNFeatures>;
+def : ProcessorModel<"znver2", Znver1Model, ProcessorFeatures.ZN2Features>;
-def : Proc<"geode", [FeatureX87, FeatureSlowUAMem16, Feature3DNowA]>;
+def : Proc<"geode", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
+ Feature3DNowA]>;
def : Proc<"winchip-c6", [FeatureX87, FeatureSlowUAMem16, FeatureMMX]>;
def : Proc<"winchip2", [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>;
def : Proc<"c3", [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>;
-def : Proc<"c3-2", [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
- FeatureSSE1, FeatureFXSR, FeatureCMOV]>;
+def : Proc<"c3-2", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
+ FeatureMMX, FeatureSSE1, FeatureFXSR,
+ FeatureCMOV]>;
// We also provide a generic 64-bit specific x86 processor model which tries to
// be good for modern chips without enabling instruction set encodings past the
@@ -1205,6 +1174,7 @@ def : Proc<"c3-2", [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
// forming a common base for them.
def : ProcessorModel<"x86-64", SandyBridgeModel, [
FeatureX87,
+ FeatureCMPXCHG8B,
FeatureCMOV,
FeatureMMX,
FeatureSSE2,
diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp
index 36cef98a1ef5..80120722e0e6 100644
--- a/lib/Target/X86/X86AsmPrinter.cpp
+++ b/lib/Target/X86/X86AsmPrinter.cpp
@@ -1,9 +1,8 @@
//===-- X86AsmPrinter.cpp - Convert X86 LLVM code to AT&T assembly --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -13,9 +12,10 @@
//===----------------------------------------------------------------------===//
#include "X86AsmPrinter.h"
-#include "InstPrinter/X86ATTInstPrinter.h"
+#include "MCTargetDesc/X86ATTInstPrinter.h"
#include "MCTargetDesc/X86BaseInfo.h"
#include "MCTargetDesc/X86TargetStreamer.h"
+#include "TargetInfo/X86TargetInfo.h"
#include "X86InstrInfo.h"
#include "X86MachineFunctionInfo.h"
#include "llvm/BinaryFormat/COFF.h"
@@ -24,6 +24,7 @@
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Mangler.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
@@ -104,16 +105,16 @@ void X86AsmPrinter::EmitFunctionBodyEnd() {
}
}
-/// printSymbolOperand - Print a raw symbol reference operand. This handles
+/// PrintSymbolOperand - Print a raw symbol reference operand. This handles
/// jump tables, constant pools, global address and external symbols, all of
/// which print to a label with various suffixes for relocation types etc.
-static void printSymbolOperand(X86AsmPrinter &P, const MachineOperand &MO,
- raw_ostream &O) {
+void X86AsmPrinter::PrintSymbolOperand(const MachineOperand &MO,
+ raw_ostream &O) {
switch (MO.getType()) {
default: llvm_unreachable("unknown symbol type!");
case MachineOperand::MO_ConstantPoolIndex:
- P.GetCPISymbol(MO.getIndex())->print(O, P.MAI);
- P.printOffset(MO.getOffset(), O);
+ GetCPISymbol(MO.getIndex())->print(O, MAI);
+ printOffset(MO.getOffset(), O);
break;
case MachineOperand::MO_GlobalAddress: {
const GlobalValue *GV = MO.getGlobal();
@@ -121,38 +122,37 @@ static void printSymbolOperand(X86AsmPrinter &P, const MachineOperand &MO,
MCSymbol *GVSym;
if (MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY ||
MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE)
- GVSym = P.getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
+ GVSym = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
else
- GVSym = P.getSymbol(GV);
+ GVSym = getSymbol(GV);
// Handle dllimport linkage.
if (MO.getTargetFlags() == X86II::MO_DLLIMPORT)
- GVSym =
- P.OutContext.getOrCreateSymbol(Twine("__imp_") + GVSym->getName());
+ GVSym = OutContext.getOrCreateSymbol(Twine("__imp_") + GVSym->getName());
else if (MO.getTargetFlags() == X86II::MO_COFFSTUB)
GVSym =
- P.OutContext.getOrCreateSymbol(Twine(".refptr.") + GVSym->getName());
+ OutContext.getOrCreateSymbol(Twine(".refptr.") + GVSym->getName());
if (MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY ||
MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE) {
- MCSymbol *Sym = P.getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
+ MCSymbol *Sym = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
MachineModuleInfoImpl::StubValueTy &StubSym =
- P.MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry(Sym);
+ MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry(Sym);
if (!StubSym.getPointer())
- StubSym = MachineModuleInfoImpl::
- StubValueTy(P.getSymbol(GV), !GV->hasInternalLinkage());
+ StubSym = MachineModuleInfoImpl::StubValueTy(getSymbol(GV),
+ !GV->hasInternalLinkage());
}
// If the name begins with a dollar-sign, enclose it in parens. We do this
// to avoid having it look like an integer immediate to the assembler.
if (GVSym->getName()[0] != '$')
- GVSym->print(O, P.MAI);
+ GVSym->print(O, MAI);
else {
O << '(';
- GVSym->print(O, P.MAI);
+ GVSym->print(O, MAI);
O << ')';
}
- P.printOffset(MO.getOffset(), O);
+ printOffset(MO.getOffset(), O);
break;
}
}
@@ -169,13 +169,13 @@ static void printSymbolOperand(X86AsmPrinter &P, const MachineOperand &MO,
break;
case X86II::MO_GOT_ABSOLUTE_ADDRESS:
O << " + [.-";
- P.MF->getPICBaseSymbol()->print(O, P.MAI);
+ MF->getPICBaseSymbol()->print(O, MAI);
O << ']';
break;
case X86II::MO_PIC_BASE_OFFSET:
case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
O << '-';
- P.MF->getPICBaseSymbol()->print(O, P.MAI);
+ MF->getPICBaseSymbol()->print(O, MAI);
break;
case X86II::MO_TLSGD: O << "@TLSGD"; break;
case X86II::MO_TLSLD: O << "@TLSLD"; break;
@@ -193,76 +193,91 @@ static void printSymbolOperand(X86AsmPrinter &P, const MachineOperand &MO,
case X86II::MO_TLVP: O << "@TLVP"; break;
case X86II::MO_TLVP_PIC_BASE:
O << "@TLVP" << '-';
- P.MF->getPICBaseSymbol()->print(O, P.MAI);
+ MF->getPICBaseSymbol()->print(O, MAI);
break;
case X86II::MO_SECREL: O << "@SECREL32"; break;
}
}
-static void printOperand(X86AsmPrinter &P, const MachineInstr *MI,
- unsigned OpNo, raw_ostream &O,
- const char *Modifier = nullptr, unsigned AsmVariant = 0);
-
-/// printPCRelImm - This is used to print an immediate value that ends up
-/// being encoded as a pc-relative value. These print slightly differently, for
-/// example, a $ is not emitted.
-static void printPCRelImm(X86AsmPrinter &P, const MachineInstr *MI,
- unsigned OpNo, raw_ostream &O) {
+void X86AsmPrinter::PrintOperand(const MachineInstr *MI, unsigned OpNo,
+ raw_ostream &O) {
const MachineOperand &MO = MI->getOperand(OpNo);
+ const bool IsATT = MI->getInlineAsmDialect() == InlineAsm::AD_ATT;
switch (MO.getType()) {
- default: llvm_unreachable("Unknown pcrel immediate operand");
- case MachineOperand::MO_Register:
- // pc-relativeness was handled when computing the value in the reg.
- printOperand(P, MI, OpNo, O);
+ default: llvm_unreachable("unknown operand type!");
+ case MachineOperand::MO_Register: {
+ if (IsATT)
+ O << '%';
+ O << X86ATTInstPrinter::getRegisterName(MO.getReg());
return;
+ }
+
case MachineOperand::MO_Immediate:
+ if (IsATT)
+ O << '$';
O << MO.getImm();
return;
- case MachineOperand::MO_GlobalAddress:
- printSymbolOperand(P, MO, O);
- return;
+
+ case MachineOperand::MO_GlobalAddress: {
+ if (IsATT)
+ O << '$';
+ PrintSymbolOperand(MO, O);
+ break;
+ }
+ case MachineOperand::MO_BlockAddress: {
+ MCSymbol *Sym = GetBlockAddressSymbol(MO.getBlockAddress());
+ Sym->print(O, MAI);
+ break;
+ }
}
}
-static void printOperand(X86AsmPrinter &P, const MachineInstr *MI,
- unsigned OpNo, raw_ostream &O, const char *Modifier,
- unsigned AsmVariant) {
+/// PrintModifiedOperand - Print subregisters based on supplied modifier,
+/// deferring to PrintOperand() if no modifier was supplied or if operand is not
+/// a register.
+void X86AsmPrinter::PrintModifiedOperand(const MachineInstr *MI, unsigned OpNo,
+ raw_ostream &O, const char *Modifier) {
const MachineOperand &MO = MI->getOperand(OpNo);
- switch (MO.getType()) {
- default: llvm_unreachable("unknown operand type!");
- case MachineOperand::MO_Register: {
- // FIXME: Enumerating AsmVariant, so we can remove magic number.
- if (AsmVariant == 0) O << '%';
- unsigned Reg = MO.getReg();
- if (Modifier && strncmp(Modifier, "subreg", strlen("subreg")) == 0) {
- unsigned Size = (strcmp(Modifier+6,"64") == 0) ? 64 :
- (strcmp(Modifier+6,"32") == 0) ? 32 :
- (strcmp(Modifier+6,"16") == 0) ? 16 : 8;
- Reg = getX86SubSuperRegister(Reg, Size);
- }
- O << X86ATTInstPrinter::getRegisterName(Reg);
- return;
+ if (!Modifier || MO.getType() != MachineOperand::MO_Register)
+ return PrintOperand(MI, OpNo, O);
+ if (MI->getInlineAsmDialect() == InlineAsm::AD_ATT)
+ O << '%';
+ unsigned Reg = MO.getReg();
+ if (strncmp(Modifier, "subreg", strlen("subreg")) == 0) {
+ unsigned Size = (strcmp(Modifier+6,"64") == 0) ? 64 :
+ (strcmp(Modifier+6,"32") == 0) ? 32 :
+ (strcmp(Modifier+6,"16") == 0) ? 16 : 8;
+ Reg = getX86SubSuperRegister(Reg, Size);
}
+ O << X86ATTInstPrinter::getRegisterName(Reg);
+}
+/// PrintPCRelImm - This is used to print an immediate value that ends up
+/// being encoded as a pc-relative value. These print slightly differently, for
+/// example, a $ is not emitted.
+void X86AsmPrinter::PrintPCRelImm(const MachineInstr *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ switch (MO.getType()) {
+ default: llvm_unreachable("Unknown pcrel immediate operand");
+ case MachineOperand::MO_Register:
+ // pc-relativeness was handled when computing the value in the reg.
+ PrintOperand(MI, OpNo, O);
+ return;
case MachineOperand::MO_Immediate:
- if (AsmVariant == 0) O << '$';
O << MO.getImm();
return;
-
- case MachineOperand::MO_GlobalAddress: {
- if (AsmVariant == 0) O << '$';
- printSymbolOperand(P, MO, O);
- break;
- }
+ case MachineOperand::MO_GlobalAddress:
+ PrintSymbolOperand(MO, O);
+ return;
}
}
-static void printLeaMemReference(X86AsmPrinter &P, const MachineInstr *MI,
- unsigned Op, raw_ostream &O,
- const char *Modifier = nullptr) {
- const MachineOperand &BaseReg = MI->getOperand(Op+X86::AddrBaseReg);
- const MachineOperand &IndexReg = MI->getOperand(Op+X86::AddrIndexReg);
- const MachineOperand &DispSpec = MI->getOperand(Op+X86::AddrDisp);
+void X86AsmPrinter::PrintLeaMemReference(const MachineInstr *MI, unsigned OpNo,
+ raw_ostream &O, const char *Modifier) {
+ const MachineOperand &BaseReg = MI->getOperand(OpNo + X86::AddrBaseReg);
+ const MachineOperand &IndexReg = MI->getOperand(OpNo + X86::AddrIndexReg);
+ const MachineOperand &DispSpec = MI->getOperand(OpNo + X86::AddrDisp);
// If we really don't want to print out (rip), don't.
bool HasBaseReg = BaseReg.getReg() != 0;
@@ -284,7 +299,8 @@ static void printLeaMemReference(X86AsmPrinter &P, const MachineInstr *MI,
}
case MachineOperand::MO_GlobalAddress:
case MachineOperand::MO_ConstantPoolIndex:
- printSymbolOperand(P, DispSpec, O);
+ PrintSymbolOperand(DispSpec, O);
+ break;
}
if (Modifier && strcmp(Modifier, "H") == 0)
@@ -296,12 +312,12 @@ static void printLeaMemReference(X86AsmPrinter &P, const MachineInstr *MI,
O << '(';
if (HasBaseReg)
- printOperand(P, MI, Op+X86::AddrBaseReg, O, Modifier);
+ PrintModifiedOperand(MI, OpNo + X86::AddrBaseReg, O, Modifier);
if (IndexReg.getReg()) {
O << ',';
- printOperand(P, MI, Op+X86::AddrIndexReg, O, Modifier);
- unsigned ScaleVal = MI->getOperand(Op+X86::AddrScaleAmt).getImm();
+ PrintModifiedOperand(MI, OpNo + X86::AddrIndexReg, O, Modifier);
+ unsigned ScaleVal = MI->getOperand(OpNo + X86::AddrScaleAmt).getImm();
if (ScaleVal != 1)
O << ',' << ScaleVal;
}
@@ -309,31 +325,28 @@ static void printLeaMemReference(X86AsmPrinter &P, const MachineInstr *MI,
}
}
-static void printMemReference(X86AsmPrinter &P, const MachineInstr *MI,
- unsigned Op, raw_ostream &O,
- const char *Modifier = nullptr) {
- assert(isMem(*MI, Op) && "Invalid memory reference!");
- const MachineOperand &Segment = MI->getOperand(Op+X86::AddrSegmentReg);
+void X86AsmPrinter::PrintMemReference(const MachineInstr *MI, unsigned OpNo,
+ raw_ostream &O, const char *Modifier) {
+ assert(isMem(*MI, OpNo) && "Invalid memory reference!");
+ const MachineOperand &Segment = MI->getOperand(OpNo + X86::AddrSegmentReg);
if (Segment.getReg()) {
- printOperand(P, MI, Op+X86::AddrSegmentReg, O, Modifier);
+ PrintModifiedOperand(MI, OpNo + X86::AddrSegmentReg, O, Modifier);
O << ':';
}
- printLeaMemReference(P, MI, Op, O, Modifier);
+ PrintLeaMemReference(MI, OpNo, O, Modifier);
}
-static void printIntelMemReference(X86AsmPrinter &P, const MachineInstr *MI,
- unsigned Op, raw_ostream &O,
- const char *Modifier = nullptr,
- unsigned AsmVariant = 1) {
- const MachineOperand &BaseReg = MI->getOperand(Op+X86::AddrBaseReg);
- unsigned ScaleVal = MI->getOperand(Op+X86::AddrScaleAmt).getImm();
- const MachineOperand &IndexReg = MI->getOperand(Op+X86::AddrIndexReg);
- const MachineOperand &DispSpec = MI->getOperand(Op+X86::AddrDisp);
- const MachineOperand &SegReg = MI->getOperand(Op+X86::AddrSegmentReg);
+void X86AsmPrinter::PrintIntelMemReference(const MachineInstr *MI,
+ unsigned OpNo, raw_ostream &O) {
+ const MachineOperand &BaseReg = MI->getOperand(OpNo + X86::AddrBaseReg);
+ unsigned ScaleVal = MI->getOperand(OpNo + X86::AddrScaleAmt).getImm();
+ const MachineOperand &IndexReg = MI->getOperand(OpNo + X86::AddrIndexReg);
+ const MachineOperand &DispSpec = MI->getOperand(OpNo + X86::AddrDisp);
+ const MachineOperand &SegReg = MI->getOperand(OpNo + X86::AddrSegmentReg);
// If this has a segment register, print it.
if (SegReg.getReg()) {
- printOperand(P, MI, Op+X86::AddrSegmentReg, O, Modifier, AsmVariant);
+ PrintOperand(MI, OpNo + X86::AddrSegmentReg, O);
O << ':';
}
@@ -341,7 +354,7 @@ static void printIntelMemReference(X86AsmPrinter &P, const MachineInstr *MI,
bool NeedPlus = false;
if (BaseReg.getReg()) {
- printOperand(P, MI, Op+X86::AddrBaseReg, O, Modifier, AsmVariant);
+ PrintOperand(MI, OpNo + X86::AddrBaseReg, O);
NeedPlus = true;
}
@@ -349,13 +362,13 @@ static void printIntelMemReference(X86AsmPrinter &P, const MachineInstr *MI,
if (NeedPlus) O << " + ";
if (ScaleVal != 1)
O << ScaleVal << '*';
- printOperand(P, MI, Op+X86::AddrIndexReg, O, Modifier, AsmVariant);
+ PrintOperand(MI, OpNo + X86::AddrIndexReg, O);
NeedPlus = true;
}
if (!DispSpec.isImm()) {
if (NeedPlus) O << " + ";
- printOperand(P, MI, Op+X86::AddrDisp, O, Modifier, AsmVariant);
+ PrintOperand(MI, OpNo + X86::AddrDisp, O);
} else {
int64_t DispVal = DispSpec.getImm();
if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg())) {
@@ -418,7 +431,6 @@ static bool printAsmMRegister(X86AsmPrinter &P, const MachineOperand &MO,
/// PrintAsmOperand - Print out an operand for an inline asm expression.
///
bool X86AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
- unsigned AsmVariant,
const char *ExtraCode, raw_ostream &O) {
// Does this asm operand have a single letter operand modifier?
if (ExtraCode && ExtraCode[0]) {
@@ -429,7 +441,7 @@ bool X86AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
switch (ExtraCode[0]) {
default:
// See if this is a generic print operand
- return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
+ return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O);
case 'a': // This is an address. Currently only 'i' and 'r' are expected.
switch (MO.getType()) {
default:
@@ -442,13 +454,13 @@ bool X86AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
case MachineOperand::MO_ExternalSymbol:
llvm_unreachable("unexpected operand type!");
case MachineOperand::MO_GlobalAddress:
- printSymbolOperand(*this, MO, O);
+ PrintSymbolOperand(MO, O);
if (Subtarget->isPICStyleRIPRel())
O << "(%rip)";
return false;
case MachineOperand::MO_Register:
O << '(';
- printOperand(*this, MI, OpNo, O);
+ PrintOperand(MI, OpNo, O);
O << ')';
return false;
}
@@ -456,7 +468,7 @@ bool X86AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
case 'c': // Don't print "$" before a global var name or constant.
switch (MO.getType()) {
default:
- printOperand(*this, MI, OpNo, O);
+ PrintOperand(MI, OpNo, O);
break;
case MachineOperand::MO_Immediate:
O << MO.getImm();
@@ -466,7 +478,7 @@ bool X86AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
case MachineOperand::MO_ExternalSymbol:
llvm_unreachable("unexpected operand type!");
case MachineOperand::MO_GlobalAddress:
- printSymbolOperand(*this, MO, O);
+ PrintSymbolOperand(MO, O);
break;
}
return false;
@@ -474,7 +486,7 @@ bool X86AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
case 'A': // Print '*' before a register (it must be a register)
if (MO.isReg()) {
O << '*';
- printOperand(*this, MI, OpNo, O);
+ PrintOperand(MI, OpNo, O);
return false;
}
return true;
@@ -487,11 +499,11 @@ bool X86AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
case 'V': // Print native register without '%'
if (MO.isReg())
return printAsmMRegister(*this, MO, ExtraCode[0], O);
- printOperand(*this, MI, OpNo, O);
+ PrintOperand(MI, OpNo, O);
return false;
case 'P': // This is the operand of a call, treat specially.
- printPCRelImm(*this, MI, OpNo, O);
+ PrintPCRelImm(MI, OpNo, O);
return false;
case 'n': // Negate the immediate or print a '-' before the operand.
@@ -505,16 +517,15 @@ bool X86AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
}
}
- printOperand(*this, MI, OpNo, O, /*Modifier*/ nullptr, AsmVariant);
+ PrintOperand(MI, OpNo, O);
return false;
}
-bool X86AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
- unsigned OpNo, unsigned AsmVariant,
+bool X86AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
const char *ExtraCode,
raw_ostream &O) {
- if (AsmVariant) {
- printIntelMemReference(*this, MI, OpNo, O);
+ if (MI->getInlineAsmDialect() == InlineAsm::AD_Intel) {
+ PrintIntelMemReference(MI, OpNo, O);
return false;
}
@@ -531,14 +542,14 @@ bool X86AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
// These only apply to registers, ignore on mem.
break;
case 'H':
- printMemReference(*this, MI, OpNo, O, "H");
+ PrintMemReference(MI, OpNo, O, "H");
return false;
case 'P': // Don't print @PLT, but do print as memory.
- printMemReference(*this, MI, OpNo, O, "no-rip");
+ PrintMemReference(MI, OpNo, O, "no-rip");
return false;
}
}
- printMemReference(*this, MI, OpNo, O);
+ PrintMemReference(MI, OpNo, O, nullptr);
return false;
}
@@ -683,26 +694,31 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
// stripping. Since LLVM never generates code that does this, it is always
// safe to set.
OutStreamer->EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
- return;
- }
-
- if (TT.isKnownWindowsMSVCEnvironment() && MMI->usesVAFloatArgument()) {
- StringRef SymbolName =
- (TT.getArch() == Triple::x86_64) ? "_fltused" : "__fltused";
- MCSymbol *S = MMI->getContext().getOrCreateSymbol(SymbolName);
- OutStreamer->EmitSymbolAttribute(S, MCSA_Global);
- return;
- }
-
- if (TT.isOSBinFormatCOFF()) {
+ } else if (TT.isOSBinFormatCOFF()) {
+ if (MMI->usesMSVCFloatingPoint()) {
+ // In Windows' libcmt.lib, there is a file which is linked in only if the
+ // symbol _fltused is referenced. Linking this in causes some
+ // side-effects:
+ //
+ // 1. For x86-32, it will set the x87 rounding mode to 53-bit instead of
+ // 64-bit mantissas at program start.
+ //
+ // 2. It links in support routines for floating-point in scanf and printf.
+ //
+ // MSVC emits an undefined reference to _fltused when there are any
+ // floating point operations in the program (including calls). A program
+ // that only has: `scanf("%f", &global_float);` may fail to trigger this,
+ // but oh well...that's a documented issue.
+ StringRef SymbolName =
+ (TT.getArch() == Triple::x86) ? "__fltused" : "_fltused";
+ MCSymbol *S = MMI->getContext().getOrCreateSymbol(SymbolName);
+ OutStreamer->EmitSymbolAttribute(S, MCSA_Global);
+ return;
+ }
emitStackMaps(SM);
- return;
- }
-
- if (TT.isOSBinFormatELF()) {
+ } else if (TT.isOSBinFormatELF()) {
emitStackMaps(SM);
FM.serializeToFaultMapSection();
- return;
}
}
diff --git a/lib/Target/X86/X86AsmPrinter.h b/lib/Target/X86/X86AsmPrinter.h
index 55abdf2ba601..a011310970b3 100644
--- a/lib/Target/X86/X86AsmPrinter.h
+++ b/lib/Target/X86/X86AsmPrinter.h
@@ -1,9 +1,8 @@
//===-- X86AsmPrinter.h - X86 implementation of AsmPrinter ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -103,6 +102,18 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
// Choose between emitting .seh_ directives and .cv_fpo_ directives.
void EmitSEHInstruction(const MachineInstr *MI);
+ void PrintSymbolOperand(const MachineOperand &MO, raw_ostream &O) override;
+ void PrintOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O);
+ void PrintModifiedOperand(const MachineInstr *MI, unsigned OpNo,
+ raw_ostream &O, const char *Modifier);
+ void PrintPCRelImm(const MachineInstr *MI, unsigned OpNo, raw_ostream &O);
+ void PrintLeaMemReference(const MachineInstr *MI, unsigned OpNo,
+ raw_ostream &O, const char *Modifier);
+ void PrintMemReference(const MachineInstr *MI, unsigned OpNo, raw_ostream &O,
+ const char *Modifier);
+ void PrintIntelMemReference(const MachineInstr *MI, unsigned OpNo,
+ raw_ostream &O);
+
public:
X86AsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer);
@@ -124,11 +135,9 @@ public:
}
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
- unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &OS) override;
+ const char *ExtraCode, raw_ostream &OS) override;
bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
- unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &OS) override;
+ const char *ExtraCode, raw_ostream &OS) override;
bool doInitialization(Module &M) override {
SMShadowTracker.reset(0);
diff --git a/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp b/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp
index 627a6cb14514..3dcc1015dc7c 100644
--- a/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp
+++ b/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp
@@ -1,9 +1,8 @@
//===- X86AvoidStoreForwardingBlockis.cpp - Avoid HW Store Forward Block --===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -69,9 +68,7 @@ using DisplacementSizeMap = std::map<int64_t, unsigned>;
class X86AvoidSFBPass : public MachineFunctionPass {
public:
static char ID;
- X86AvoidSFBPass() : MachineFunctionPass(ID) {
- initializeX86AvoidSFBPassPass(*PassRegistry::getPassRegistry());
- }
+ X86AvoidSFBPass() : MachineFunctionPass(ID) { }
StringRef getPassName() const override {
return "X86 Avoid Store Forwarding Blocks";
@@ -343,6 +340,8 @@ findPotentialBlockers(MachineInstr *LoadInst) {
for (auto PBInst = std::next(MachineBasicBlock::reverse_iterator(LoadInst)),
E = LoadInst->getParent()->rend();
PBInst != E; ++PBInst) {
+ if (PBInst->isMetaInstruction())
+ continue;
BlockCount++;
if (BlockCount >= InspectionLimit)
break;
@@ -366,6 +365,8 @@ findPotentialBlockers(MachineInstr *LoadInst) {
for (MachineBasicBlock::reverse_iterator PBInst = PMBB->rbegin(),
PME = PMBB->rend();
PBInst != PME; ++PBInst) {
+ if (PBInst->isMetaInstruction())
+ continue;
PredCount++;
if (PredCount >= LimitLeft)
break;
@@ -407,7 +408,10 @@ void X86AvoidSFBPass::buildCopy(MachineInstr *LoadInst, unsigned NLoadOpcode,
// If the load and store are consecutive, use the loadInst location to
// reduce register pressure.
MachineInstr *StInst = StoreInst;
- if (StoreInst->getPrevNode() == LoadInst)
+ auto PrevInstrIt = skipDebugInstructionsBackward(
+ std::prev(MachineBasicBlock::instr_iterator(StoreInst)),
+ MBB->instr_begin());
+ if (PrevInstrIt.getNodePtr() == LoadInst)
StInst = LoadInst;
MachineInstr *NewStore =
BuildMI(*MBB, StInst, StInst->getDebugLoc(), TII->get(NStoreOpcode))
@@ -492,19 +496,22 @@ void X86AvoidSFBPass::buildCopies(int Size, MachineInstr *LoadInst,
static void updateKillStatus(MachineInstr *LoadInst, MachineInstr *StoreInst) {
MachineOperand &LoadBase = getBaseOperand(LoadInst);
MachineOperand &StoreBase = getBaseOperand(StoreInst);
+ auto StorePrevNonDbgInstr = skipDebugInstructionsBackward(
+ std::prev(MachineBasicBlock::instr_iterator(StoreInst)),
+ LoadInst->getParent()->instr_begin()).getNodePtr();
if (LoadBase.isReg()) {
MachineInstr *LastLoad = LoadInst->getPrevNode();
// If the original load and store to xmm/ymm were consecutive
// then the partial copies were also created in
// a consecutive order to reduce register pressure,
// and the location of the last load is before the last store.
- if (StoreInst->getPrevNode() == LoadInst)
+ if (StorePrevNonDbgInstr == LoadInst)
LastLoad = LoadInst->getPrevNode()->getPrevNode();
getBaseOperand(LastLoad).setIsKill(LoadBase.isKill());
}
if (StoreBase.isReg()) {
MachineInstr *StInst = StoreInst;
- if (StoreInst->getPrevNode() == LoadInst)
+ if (StorePrevNonDbgInstr == LoadInst)
StInst = LoadInst;
getBaseOperand(StInst->getPrevNode()).setIsKill(StoreBase.isKill());
}
@@ -531,7 +538,7 @@ void X86AvoidSFBPass::findPotentiallylBlockedCopies(MachineFunction &MF) {
if (!isPotentialBlockedMemCpyLd(MI.getOpcode()))
continue;
int DefVR = MI.getOperand(0).getReg();
- if (!MRI->hasOneUse(DefVR))
+ if (!MRI->hasOneNonDBGUse(DefVR))
continue;
for (auto UI = MRI->use_nodbg_begin(DefVR), UE = MRI->use_nodbg_end();
UI != UE;) {
diff --git a/lib/Target/X86/X86CallFrameOptimization.cpp b/lib/Target/X86/X86CallFrameOptimization.cpp
index 24d7a219e751..4df849a2e14c 100644
--- a/lib/Target/X86/X86CallFrameOptimization.cpp
+++ b/lib/Target/X86/X86CallFrameOptimization.cpp
@@ -1,9 +1,8 @@
//===----- X86CallFrameOptimization.cpp - Optimize x86 call sequences -----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -60,10 +59,7 @@ namespace {
class X86CallFrameOptimization : public MachineFunctionPass {
public:
- X86CallFrameOptimization() : MachineFunctionPass(ID) {
- initializeX86CallFrameOptimizationPass(
- *PassRegistry::getPassRegistry());
- }
+ X86CallFrameOptimization() : MachineFunctionPass(ID) { }
bool runOnMachineFunction(MachineFunction &MF) override;
diff --git a/lib/Target/X86/X86CallLowering.cpp b/lib/Target/X86/X86CallLowering.cpp
index 1dc83b76595d..b16b3839c85a 100644
--- a/lib/Target/X86/X86CallLowering.cpp
+++ b/lib/Target/X86/X86CallLowering.cpp
@@ -1,9 +1,8 @@
//===- llvm/lib/Target/X86/X86CallLowering.cpp - Call lowering ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -48,8 +47,6 @@
using namespace llvm;
-#include "X86GenCallingConv.inc"
-
X86CallLowering::X86CallLowering(const X86TargetLowering &TLI)
: CallLowering(&TLI) {}
@@ -64,6 +61,7 @@ bool X86CallLowering::splitToValueTypes(const ArgInfo &OrigArg,
SmallVector<EVT, 4> SplitVTs;
SmallVector<uint64_t, 4> Offsets;
ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs, &Offsets, 0);
+ assert(OrigArg.Regs.size() == 1 && "Can't handle multple regs yet");
if (OrigArg.Ty->isVoidTy())
return true;
@@ -73,12 +71,12 @@ bool X86CallLowering::splitToValueTypes(const ArgInfo &OrigArg,
if (NumParts == 1) {
// replace the original type ( pointer -> GPR ).
- SplitArgs.emplace_back(OrigArg.Reg, VT.getTypeForEVT(Context),
+ SplitArgs.emplace_back(OrigArg.Regs[0], VT.getTypeForEVT(Context),
OrigArg.Flags, OrigArg.IsFixed);
return true;
}
- SmallVector<unsigned, 8> SplitRegs;
+ SmallVector<Register, 8> SplitRegs;
EVT PartVT = TLI.getRegisterType(Context, VT);
Type *PartTy = PartVT.getTypeForEVT(Context);
@@ -88,7 +86,7 @@ bool X86CallLowering::splitToValueTypes(const ArgInfo &OrigArg,
ArgInfo{MRI.createGenericVirtualRegister(getLLTForType(*PartTy, DL)),
PartTy, OrigArg.Flags};
SplitArgs.push_back(Info);
- SplitRegs.push_back(Info.Reg);
+ SplitRegs.push_back(Info.Regs[0]);
}
PerformArgSplit(SplitRegs);
@@ -104,28 +102,28 @@ struct OutgoingValueHandler : public CallLowering::ValueHandler {
DL(MIRBuilder.getMF().getDataLayout()),
STI(MIRBuilder.getMF().getSubtarget<X86Subtarget>()) {}
- unsigned getStackAddress(uint64_t Size, int64_t Offset,
+ Register getStackAddress(uint64_t Size, int64_t Offset,
MachinePointerInfo &MPO) override {
LLT p0 = LLT::pointer(0, DL.getPointerSizeInBits(0));
LLT SType = LLT::scalar(DL.getPointerSizeInBits(0));
- unsigned SPReg = MRI.createGenericVirtualRegister(p0);
+ Register SPReg = MRI.createGenericVirtualRegister(p0);
MIRBuilder.buildCopy(SPReg, STI.getRegisterInfo()->getStackRegister());
- unsigned OffsetReg = MRI.createGenericVirtualRegister(SType);
+ Register OffsetReg = MRI.createGenericVirtualRegister(SType);
MIRBuilder.buildConstant(OffsetReg, Offset);
- unsigned AddrReg = MRI.createGenericVirtualRegister(p0);
+ Register AddrReg = MRI.createGenericVirtualRegister(p0);
MIRBuilder.buildGEP(AddrReg, SPReg, OffsetReg);
MPO = MachinePointerInfo::getStack(MIRBuilder.getMF(), Offset);
return AddrReg;
}
- void assignValueToReg(unsigned ValVReg, unsigned PhysReg,
+ void assignValueToReg(Register ValVReg, Register PhysReg,
CCValAssign &VA) override {
MIB.addUse(PhysReg, RegState::Implicit);
- unsigned ExtReg;
+ Register ExtReg;
// If we are copying the value to a physical register with the
// size larger than the size of the value itself - build AnyExt
// to the size of the register first and only then do the copy.
@@ -146,12 +144,12 @@ struct OutgoingValueHandler : public CallLowering::ValueHandler {
MIRBuilder.buildCopy(PhysReg, ExtReg);
}
- void assignValueToAddress(unsigned ValVReg, unsigned Addr, uint64_t Size,
+ void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size,
MachinePointerInfo &MPO, CCValAssign &VA) override {
- unsigned ExtReg = extendRegister(ValVReg, VA);
+ Register ExtReg = extendRegister(ValVReg, VA);
auto MMO = MIRBuilder.getMF().getMachineMemOperand(
MPO, MachineMemOperand::MOStore, VA.getLocVT().getStoreSize(),
- /* Alignment */ 0);
+ /* Alignment */ 1);
MIRBuilder.buildStore(ExtReg, Addr, *MMO);
}
@@ -185,7 +183,7 @@ protected:
bool X86CallLowering::lowerReturn(
MachineIRBuilder &MIRBuilder, const Value *Val,
- ArrayRef<unsigned> VRegs) const {
+ ArrayRef<Register> VRegs) const {
assert(((Val && !VRegs.empty()) || (!Val && VRegs.empty())) &&
"Return value without a vreg");
auto MIB = MIRBuilder.buildInstrNoInsert(X86::RET).addImm(0);
@@ -208,7 +206,7 @@ bool X86CallLowering::lowerReturn(
ArgInfo CurArgInfo = ArgInfo{VRegs[i], SplitEVTs[i].getTypeForEVT(Ctx)};
setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F);
if (!splitToValueTypes(CurArgInfo, SplitArgs, DL, MRI,
- [&](ArrayRef<unsigned> Regs) {
+ [&](ArrayRef<Register> Regs) {
MIRBuilder.buildUnmerge(Regs, VRegs[i]);
}))
return false;
@@ -231,7 +229,9 @@ struct IncomingValueHandler : public CallLowering::ValueHandler {
: ValueHandler(MIRBuilder, MRI, AssignFn),
DL(MIRBuilder.getMF().getDataLayout()) {}
- unsigned getStackAddress(uint64_t Size, int64_t Offset,
+ bool isArgumentHandler() const override { return true; }
+
+ Register getStackAddress(uint64_t Size, int64_t Offset,
MachinePointerInfo &MPO) override {
auto &MFI = MIRBuilder.getMF().getFrameInfo();
int FI = MFI.CreateFixedObject(Size, Offset, true);
@@ -243,15 +243,15 @@ struct IncomingValueHandler : public CallLowering::ValueHandler {
return AddrReg;
}
- void assignValueToAddress(unsigned ValVReg, unsigned Addr, uint64_t Size,
+ void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size,
MachinePointerInfo &MPO, CCValAssign &VA) override {
auto MMO = MIRBuilder.getMF().getMachineMemOperand(
MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, Size,
- 0);
+ 1);
MIRBuilder.buildLoad(ValVReg, Addr, *MMO);
}
- void assignValueToReg(unsigned ValVReg, unsigned PhysReg,
+ void assignValueToReg(Register ValVReg, Register PhysReg,
CCValAssign &VA) override {
markPhysRegUsed(PhysReg);
@@ -320,9 +320,9 @@ protected:
} // end anonymous namespace
-bool X86CallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
- const Function &F,
- ArrayRef<unsigned> VRegs) const {
+bool X86CallLowering::lowerFormalArguments(
+ MachineIRBuilder &MIRBuilder, const Function &F,
+ ArrayRef<ArrayRef<Register>> VRegs) const {
if (F.arg_empty())
return true;
@@ -344,14 +344,14 @@ bool X86CallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
Arg.hasAttribute(Attribute::StructRet) ||
Arg.hasAttribute(Attribute::SwiftSelf) ||
Arg.hasAttribute(Attribute::SwiftError) ||
- Arg.hasAttribute(Attribute::Nest))
+ Arg.hasAttribute(Attribute::Nest) || VRegs[Idx].size() > 1)
return false;
ArgInfo OrigArg(VRegs[Idx], Arg.getType());
setArgFlags(OrigArg, Idx + AttributeList::FirstArgIndex, DL, F);
if (!splitToValueTypes(OrigArg, SplitArgs, DL, MRI,
- [&](ArrayRef<unsigned> Regs) {
- MIRBuilder.buildMerge(VRegs[Idx], Regs);
+ [&](ArrayRef<Register> Regs) {
+ MIRBuilder.buildMerge(VRegs[Idx][0], Regs);
}))
return false;
Idx++;
@@ -409,9 +409,12 @@ bool X86CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
if (OrigArg.Flags.isByVal())
return false;
+ if (OrigArg.Regs.size() > 1)
+ return false;
+
if (!splitToValueTypes(OrigArg, SplitArgs, DL, MRI,
- [&](ArrayRef<unsigned> Regs) {
- MIRBuilder.buildUnmerge(Regs, OrigArg.Reg);
+ [&](ArrayRef<Register> Regs) {
+ MIRBuilder.buildUnmerge(Regs, OrigArg.Regs[0]);
}))
return false;
}
@@ -451,12 +454,15 @@ bool X86CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
// symmetry with the arguments, the physical register must be an
// implicit-define of the call instruction.
- if (OrigRet.Reg) {
+ if (!OrigRet.Ty->isVoidTy()) {
+ if (OrigRet.Regs.size() > 1)
+ return false;
+
SplitArgs.clear();
- SmallVector<unsigned, 8> NewRegs;
+ SmallVector<Register, 8> NewRegs;
if (!splitToValueTypes(OrigRet, SplitArgs, DL, MRI,
- [&](ArrayRef<unsigned> Regs) {
+ [&](ArrayRef<Register> Regs) {
NewRegs.assign(Regs.begin(), Regs.end());
}))
return false;
@@ -466,7 +472,7 @@ bool X86CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
return false;
if (!NewRegs.empty())
- MIRBuilder.buildMerge(OrigRet.Reg, NewRegs);
+ MIRBuilder.buildMerge(OrigRet.Regs[0], NewRegs);
}
CallSeqStart.addImm(Handler.getStackSize())
diff --git a/lib/Target/X86/X86CallLowering.h b/lib/Target/X86/X86CallLowering.h
index f5f8f9a3ef6d..0445331bc3ff 100644
--- a/lib/Target/X86/X86CallLowering.h
+++ b/lib/Target/X86/X86CallLowering.h
@@ -1,9 +1,8 @@
//===- llvm/lib/Target/X86/X86CallLowering.h - Call lowering ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -30,10 +29,10 @@ public:
X86CallLowering(const X86TargetLowering &TLI);
bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val,
- ArrayRef<unsigned> VRegs) const override;
+ ArrayRef<Register> VRegs) const override;
bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
- ArrayRef<unsigned> VRegs) const override;
+ ArrayRef<ArrayRef<Register>> VRegs) const override;
bool lowerCall(MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv,
const MachineOperand &Callee, const ArgInfo &OrigRet,
@@ -41,7 +40,7 @@ public:
private:
/// A function of this type is used to perform value split action.
- using SplitArgTy = std::function<void(ArrayRef<unsigned>)>;
+ using SplitArgTy = std::function<void(ArrayRef<Register>)>;
bool splitToValueTypes(const ArgInfo &OrigArgInfo,
SmallVectorImpl<ArgInfo> &SplitArgs,
diff --git a/lib/Target/X86/X86CallingConv.cpp b/lib/Target/X86/X86CallingConv.cpp
index 59dde982f512..aee344a26764 100644
--- a/lib/Target/X86/X86CallingConv.cpp
+++ b/lib/Target/X86/X86CallingConv.cpp
@@ -1,9 +1,8 @@
//=== X86CallingConv.cpp - X86 Custom Calling Convention Impl -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -12,16 +11,23 @@
//
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/X86MCTargetDesc.h"
+#include "X86CallingConv.h"
#include "X86Subtarget.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/IR/CallingConv.h"
-namespace llvm {
-
-bool CC_X86_32_RegCall_Assign2Regs(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+using namespace llvm;
+
+/// When regcall calling convention compiled to 32 bit arch, special treatment
+/// is required for 64 bit masks.
+/// The value should be assigned to two GPRs.
+/// \return true if registers were allocated and false otherwise.
+static bool CC_X86_32_RegCall_Assign2Regs(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
// List of GPR registers that are available to store values in regcall
// calling convention.
static const MCPhysReg RegList[] = {X86::EAX, X86::ECX, X86::EDX, X86::EDI,
@@ -113,9 +119,15 @@ static bool CC_X86_VectorCallAssignRegister(unsigned &ValNo, MVT &ValVT,
return false;
}
-bool CC_X86_64_VectorCall(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+/// Vectorcall calling convention has special handling for vector types or
+/// HVA for 64 bit arch.
+/// For HVAs shadow registers might be allocated on the first pass
+/// and actual XMM registers are allocated on the second pass.
+/// For vector types, actual XMM registers are allocated on the first pass.
+/// \return true if registers were allocated and false otherwise.
+static bool CC_X86_64_VectorCall(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags, CCState &State) {
// On the second pass, go through the HVAs only.
if (ArgFlags.isSecArgPass()) {
if (ArgFlags.isHva())
@@ -150,7 +162,10 @@ bool CC_X86_64_VectorCall(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
// created on top of the basic 32 bytes of win64.
// It can happen if the fifth or sixth argument is vector type or HVA.
// At that case for each argument a shadow stack of 8 bytes is allocated.
- if (Reg == X86::XMM4 || Reg == X86::XMM5)
+ const TargetRegisterInfo *TRI =
+ State.getMachineFunction().getSubtarget().getRegisterInfo();
+ if (TRI->regsOverlap(Reg, X86::XMM4) ||
+ TRI->regsOverlap(Reg, X86::XMM5))
State.AllocateStack(8, 8);
if (!ArgFlags.isHva()) {
@@ -165,9 +180,14 @@ bool CC_X86_64_VectorCall(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
return ArgFlags.isHva();
}
-bool CC_X86_32_VectorCall(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+/// Vectorcall calling convention has special handling for vector types or
+/// HVA for 32 bit arch.
+/// For HVAs actual XMM registers are allocated on the second pass.
+/// For vector types, actual XMM registers are allocated on the first pass.
+/// \return true if registers were allocated and false otherwise.
+static bool CC_X86_32_VectorCall(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags, CCState &State) {
// On the second pass, go through the HVAs only.
if (ArgFlags.isSecArgPass()) {
if (ArgFlags.isHva())
@@ -205,4 +225,110 @@ bool CC_X86_32_VectorCall(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
return false; // No register was assigned - Continue the search.
}
-} // End llvm namespace
+static bool CC_X86_AnyReg_Error(unsigned &, MVT &, MVT &,
+ CCValAssign::LocInfo &, ISD::ArgFlagsTy &,
+ CCState &) {
+ llvm_unreachable("The AnyReg calling convention is only supported by the "
+ "stackmap and patchpoint intrinsics.");
+ // gracefully fallback to X86 C calling convention on Release builds.
+ return false;
+}
+
+static bool CC_X86_32_MCUInReg(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+ // This is similar to CCAssignToReg<[EAX, EDX, ECX]>, but makes sure
+ // not to split i64 and double between a register and stack
+ static const MCPhysReg RegList[] = {X86::EAX, X86::EDX, X86::ECX};
+ static const unsigned NumRegs = sizeof(RegList) / sizeof(RegList[0]);
+
+ SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
+
+ // If this is the first part of an double/i64/i128, or if we're already
+ // in the middle of a split, add to the pending list. If this is not
+ // the end of the split, return, otherwise go on to process the pending
+ // list
+ if (ArgFlags.isSplit() || !PendingMembers.empty()) {
+ PendingMembers.push_back(
+ CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
+ if (!ArgFlags.isSplitEnd())
+ return true;
+ }
+
+ // If there are no pending members, we are not in the middle of a split,
+ // so do the usual inreg stuff.
+ if (PendingMembers.empty()) {
+ if (unsigned Reg = State.AllocateReg(RegList)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return true;
+ }
+ return false;
+ }
+
+ assert(ArgFlags.isSplitEnd());
+
+ // We now have the entire original argument in PendingMembers, so decide
+ // whether to use registers or the stack.
+ // Per the MCU ABI:
+ // a) To use registers, we need to have enough of them free to contain
+ // the entire argument.
+ // b) We never want to use more than 2 registers for a single argument.
+
+ unsigned FirstFree = State.getFirstUnallocated(RegList);
+ bool UseRegs = PendingMembers.size() <= std::min(2U, NumRegs - FirstFree);
+
+ for (auto &It : PendingMembers) {
+ if (UseRegs)
+ It.convertToReg(State.AllocateReg(RegList[FirstFree++]));
+ else
+ It.convertToMem(State.AllocateStack(4, 4));
+ State.addLoc(It);
+ }
+
+ PendingMembers.clear();
+
+ return true;
+}
+
+/// X86 interrupt handlers can only take one or two stack arguments, but if
+/// there are two arguments, they are in the opposite order from the standard
+/// convention. Therefore, we have to look at the argument count up front before
+/// allocating stack for each argument.
+static bool CC_X86_Intr(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+ const MachineFunction &MF = State.getMachineFunction();
+ size_t ArgCount = State.getMachineFunction().getFunction().arg_size();
+ bool Is64Bit = static_cast<const X86Subtarget &>(MF.getSubtarget()).is64Bit();
+ unsigned SlotSize = Is64Bit ? 8 : 4;
+ unsigned Offset;
+ if (ArgCount == 1 && ValNo == 0) {
+ // If we have one argument, the argument is five stack slots big, at fixed
+ // offset zero.
+ Offset = State.AllocateStack(5 * SlotSize, 4);
+ } else if (ArgCount == 2 && ValNo == 0) {
+ // If we have two arguments, the stack slot is *after* the error code
+ // argument. Pretend it doesn't consume stack space, and account for it when
+ // we assign the second argument.
+ Offset = SlotSize;
+ } else if (ArgCount == 2 && ValNo == 1) {
+ // If this is the second of two arguments, it must be the error code. It
+ // appears first on the stack, and is then followed by the five slot
+ // interrupt struct.
+ Offset = 0;
+ (void)State.AllocateStack(6 * SlotSize, 4);
+ } else {
+ report_fatal_error("unsupported x86 interrupt prototype");
+ }
+
+ // FIXME: This should be accounted for in
+ // X86FrameLowering::getFrameIndexReference, not here.
+ if (Is64Bit && ArgCount == 2)
+ Offset += SlotSize;
+
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+ return true;
+}
+
+// Provides entry points of CC_X86 and RetCC_X86.
+#include "X86GenCallingConv.inc"
diff --git a/lib/Target/X86/X86CallingConv.h b/lib/Target/X86/X86CallingConv.h
index d0fcbd313312..191e0fa619b2 100644
--- a/lib/Target/X86/X86CallingConv.h
+++ b/lib/Target/X86/X86CallingConv.h
@@ -1,9 +1,8 @@
//=== X86CallingConv.h - X86 Custom Calling Convention Routines -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -21,99 +20,12 @@
namespace llvm {
-/// When regcall calling convention compiled to 32 bit arch, special treatment
-/// is required for 64 bit masks.
-/// The value should be assigned to two GPRs.
-/// \return true if registers were allocated and false otherwise.
-bool CC_X86_32_RegCall_Assign2Regs(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags, CCState &State);
-
-/// Vectorcall calling convention has special handling for vector types or
-/// HVA for 64 bit arch.
-/// For HVAs shadow registers might be allocated on the first pass
-/// and actual XMM registers are allocated on the second pass.
-/// For vector types, actual XMM registers are allocated on the first pass.
-/// \return true if registers were allocated and false otherwise.
-bool CC_X86_64_VectorCall(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags, CCState &State);
-
-/// Vectorcall calling convention has special handling for vector types or
-/// HVA for 32 bit arch.
-/// For HVAs actual XMM registers are allocated on the second pass.
-/// For vector types, actual XMM registers are allocated on the first pass.
-/// \return true if registers were allocated and false otherwise.
-bool CC_X86_32_VectorCall(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags, CCState &State);
-
-inline bool CC_X86_AnyReg_Error(unsigned &, MVT &, MVT &,
- CCValAssign::LocInfo &, ISD::ArgFlagsTy &,
- CCState &) {
- llvm_unreachable("The AnyReg calling convention is only supported by the " \
- "stackmap and patchpoint intrinsics.");
- // gracefully fallback to X86 C calling convention on Release builds.
- return false;
-}
-
-inline bool CC_X86_32_MCUInReg(unsigned &ValNo, MVT &ValVT,
- MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State) {
- // This is similar to CCAssignToReg<[EAX, EDX, ECX]>, but makes sure
- // not to split i64 and double between a register and stack
- static const MCPhysReg RegList[] = {X86::EAX, X86::EDX, X86::ECX};
- static const unsigned NumRegs = sizeof(RegList)/sizeof(RegList[0]);
-
- SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
-
- // If this is the first part of an double/i64/i128, or if we're already
- // in the middle of a split, add to the pending list. If this is not
- // the end of the split, return, otherwise go on to process the pending
- // list
- if (ArgFlags.isSplit() || !PendingMembers.empty()) {
- PendingMembers.push_back(
- CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
- if (!ArgFlags.isSplitEnd())
- return true;
- }
-
- // If there are no pending members, we are not in the middle of a split,
- // so do the usual inreg stuff.
- if (PendingMembers.empty()) {
- if (unsigned Reg = State.AllocateReg(RegList)) {
- State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
- return true;
- }
- return false;
- }
-
- assert(ArgFlags.isSplitEnd());
-
- // We now have the entire original argument in PendingMembers, so decide
- // whether to use registers or the stack.
- // Per the MCU ABI:
- // a) To use registers, we need to have enough of them free to contain
- // the entire argument.
- // b) We never want to use more than 2 registers for a single argument.
-
- unsigned FirstFree = State.getFirstUnallocated(RegList);
- bool UseRegs = PendingMembers.size() <= std::min(2U, NumRegs - FirstFree);
-
- for (auto &It : PendingMembers) {
- if (UseRegs)
- It.convertToReg(State.AllocateReg(RegList[FirstFree++]));
- else
- It.convertToMem(State.AllocateStack(4, 4));
- State.addLoc(It);
- }
-
- PendingMembers.clear();
+bool RetCC_X86(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
+ CCState &State);
- return true;
-}
+bool CC_X86(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State);
} // End llvm namespace
diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td
index fe49c9ffbd95..1c3034a5116a 100644
--- a/lib/Target/X86/X86CallingConv.td
+++ b/lib/Target/X86/X86CallingConv.td
@@ -1,9 +1,8 @@
//===-- X86CallingConv.td - Calling Conventions X86 32/64 --*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -148,7 +147,8 @@ def CC_#NAME : CallingConv<[
CCAssignToStack<32, 32>>,
// 512-bit vectors get 64-byte stack slots that are 64-byte aligned.
- CCIfType<[v16i32, v8i64, v16f32, v8f64], CCAssignToStack<64, 64>>
+ CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
+ CCAssignToStack<64, 64>>
]>;
def RetCC_#NAME : CallingConv<[
@@ -477,6 +477,7 @@ def RetCC_X86_64 : CallingConv<[
]>;
// This is the return-value convention used for the entire X86 backend.
+let Entry = 1 in
def RetCC_X86 : CallingConv<[
// Check if this is the Intel OpenCL built-ins calling convention
@@ -567,7 +568,7 @@ def CC_X86_64_C : CallingConv<[
CCAssignToStack<32, 32>>,
// 512-bit vectors get 64-byte stack slots that are 64-byte aligned.
- CCIfType<[v16i32, v8i64, v16f32, v8f64],
+ CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
CCAssignToStack<64, 64>>
]>;
@@ -612,7 +613,7 @@ def CC_X86_Win64_C : CallingConv<[
CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64], CCPassIndirect<i64>>,
// 512 bit vectors are passed by pointer
- CCIfType<[v16i32, v16f32, v8f64, v8i64], CCPassIndirect<i64>>,
+ CCIfType<[v64i8, v32i16, v16i32, v16f32, v8f64, v8i64], CCPassIndirect<i64>>,
// Long doubles are passed by pointer
CCIfType<[f80], CCPassIndirect<i64>>,
@@ -985,14 +986,6 @@ def CC_Intel_OCL_BI : CallingConv<[
CCDelegateTo<CC_X86_32_C>
]>;
-def CC_X86_32_Intr : CallingConv<[
- CCAssignToStack<4, 4>
-]>;
-
-def CC_X86_64_Intr : CallingConv<[
- CCAssignToStack<8, 8>
-]>;
-
//===----------------------------------------------------------------------===//
// X86 Root Argument Calling Conventions
//===----------------------------------------------------------------------===//
@@ -1001,7 +994,7 @@ def CC_X86_64_Intr : CallingConv<[
def CC_X86_32 : CallingConv<[
// X86_INTR calling convention is valid in MCU target and should override the
// MCU calling convention. Thus, this should be checked before isTargetMCU().
- CCIfCC<"CallingConv::X86_INTR", CCDelegateTo<CC_X86_32_Intr>>,
+ CCIfCC<"CallingConv::X86_INTR", CCCustom<"CC_X86_Intr">>,
CCIfSubtarget<"isTargetMCU()", CCDelegateTo<CC_X86_32_MCU>>,
CCIfCC<"CallingConv::X86_FastCall", CCDelegateTo<CC_X86_32_FastCall>>,
CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo<CC_X86_Win32_VectorCall>>,
@@ -1029,7 +1022,7 @@ def CC_X86_64 : CallingConv<[
CCIfCC<"CallingConv::X86_RegCall",
CCIfSubtarget<"isTargetWin64()", CCDelegateTo<CC_X86_Win64_RegCall>>>,
CCIfCC<"CallingConv::X86_RegCall", CCDelegateTo<CC_X86_SysV64_RegCall>>,
- CCIfCC<"CallingConv::X86_INTR", CCDelegateTo<CC_X86_64_Intr>>,
+ CCIfCC<"CallingConv::X86_INTR", CCCustom<"CC_X86_Intr">>,
// Mingw64 and native Win64 use Win64 CC
CCIfSubtarget<"isTargetWin64()", CCDelegateTo<CC_X86_Win64_C>>,
@@ -1039,6 +1032,7 @@ def CC_X86_64 : CallingConv<[
]>;
// This is the argument convention used for the entire X86 backend.
+let Entry = 1 in
def CC_X86 : CallingConv<[
CCIfCC<"CallingConv::Intel_OCL_BI", CCDelegateTo<CC_Intel_OCL_BI>>,
CCIfSubtarget<"is64Bit()", CCDelegateTo<CC_X86_64>>,
diff --git a/lib/Target/X86/X86CmovConversion.cpp b/lib/Target/X86/X86CmovConversion.cpp
index c3e76fd2a856..a61fa3246f09 100644
--- a/lib/Target/X86/X86CmovConversion.cpp
+++ b/lib/Target/X86/X86CmovConversion.cpp
@@ -1,9 +1,8 @@
//====- X86CmovConversion.cpp - Convert Cmov to Branch --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -102,9 +101,7 @@ namespace {
/// Converts X86 cmov instructions into branches when profitable.
class X86CmovConverterPass : public MachineFunctionPass {
public:
- X86CmovConverterPass() : MachineFunctionPass(ID) {
- initializeX86CmovConverterPassPass(*PassRegistry::getPassRegistry());
- }
+ X86CmovConverterPass() : MachineFunctionPass(ID) { }
StringRef getPassName() const override { return "X86 cmov Conversion"; }
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -281,7 +278,8 @@ bool X86CmovConverterPass::collectCmovCandidates(
Group.clear();
// Condition code of first CMOV instruction current processed range and its
// opposite condition code.
- X86::CondCode FirstCC, FirstOppCC, MemOpCC;
+ X86::CondCode FirstCC = X86::COND_INVALID, FirstOppCC = X86::COND_INVALID,
+ MemOpCC = X86::COND_INVALID;
// Indicator of a non CMOVrr instruction in the current processed range.
bool FoundNonCMOVInst = false;
// Indicator for current processed CMOV-group if it should be skipped.
@@ -291,7 +289,7 @@ bool X86CmovConverterPass::collectCmovCandidates(
// Skip debug instructions.
if (I.isDebugInstr())
continue;
- X86::CondCode CC = X86::getCondFromCMovOpc(I.getOpcode());
+ X86::CondCode CC = X86::getCondFromCMov(I);
// Check if we found a X86::CMOVrr instruction.
if (CC != X86::COND_INVALID && (IncludeLoads || !I.mayLoad())) {
if (Group.empty()) {
@@ -546,7 +544,7 @@ bool X86CmovConverterPass::checkForProfitableCmovCandidates(
}
unsigned CondCost =
- DepthMap[OperandToDefMap.lookup(&MI->getOperand(3))].Depth;
+ DepthMap[OperandToDefMap.lookup(&MI->getOperand(4))].Depth;
unsigned ValCost = getDepthOfOptCmov(
DepthMap[OperandToDefMap.lookup(&MI->getOperand(1))].Depth,
DepthMap[OperandToDefMap.lookup(&MI->getOperand(2))].Depth);
@@ -594,7 +592,7 @@ static bool checkEFLAGSLive(MachineInstr *MI) {
/// move all debug instructions to after the last CMOV instruction, making the
/// CMOV group consecutive.
static void packCmovGroup(MachineInstr *First, MachineInstr *Last) {
- assert(X86::getCondFromCMovOpc(Last->getOpcode()) != X86::COND_INVALID &&
+ assert(X86::getCondFromCMov(*Last) != X86::COND_INVALID &&
"Last instruction in a CMOV group must be a CMOV instruction");
SmallVector<MachineInstr *, 2> DBGInstructions;
@@ -652,14 +650,14 @@ void X86CmovConverterPass::convertCmovInstsToBranches(
MachineInstr *LastCMOV = Group.back();
DebugLoc DL = MI.getDebugLoc();
- X86::CondCode CC = X86::CondCode(X86::getCondFromCMovOpc(MI.getOpcode()));
+ X86::CondCode CC = X86::CondCode(X86::getCondFromCMov(MI));
X86::CondCode OppCC = X86::GetOppositeBranchCondition(CC);
// Potentially swap the condition codes so that any memory operand to a CMOV
// is in the *false* position instead of the *true* position. We can invert
// any non-memory operand CMOV instructions to cope with this and we ensure
// memory operand CMOVs are only included with a single condition code.
if (llvm::any_of(Group, [&](MachineInstr *I) {
- return I->mayLoad() && X86::getCondFromCMovOpc(I->getOpcode()) == CC;
+ return I->mayLoad() && X86::getCondFromCMov(*I) == CC;
}))
std::swap(CC, OppCC);
@@ -690,7 +688,7 @@ void X86CmovConverterPass::convertCmovInstsToBranches(
MBB->addSuccessor(SinkMBB);
// Create the conditional branch instruction.
- BuildMI(MBB, DL, TII->get(X86::GetCondBranchFromCond(CC))).addMBB(SinkMBB);
+ BuildMI(MBB, DL, TII->get(X86::JCC_1)).addMBB(SinkMBB).addImm(CC);
// Add the sink block to the false block successors.
FalseMBB->addSuccessor(SinkMBB);
@@ -713,8 +711,7 @@ void X86CmovConverterPass::convertCmovInstsToBranches(
if (!MI.mayLoad()) {
// Remember the false-side register input.
unsigned FalseReg =
- MI.getOperand(X86::getCondFromCMovOpc(MI.getOpcode()) == CC ? 1 : 2)
- .getReg();
+ MI.getOperand(X86::getCondFromCMov(MI) == CC ? 1 : 2).getReg();
// Walk back through any intermediate cmovs referenced.
while (true) {
auto FRIt = FalseBBRegRewriteTable.find(FalseReg);
@@ -729,7 +726,7 @@ void X86CmovConverterPass::convertCmovInstsToBranches(
// The condition must be the *opposite* of the one we've decided to branch
// on as the branch will go *around* the load and the load should happen
// when the CMOV condition is false.
- assert(X86::getCondFromCMovOpc(MI.getOpcode()) == OppCC &&
+ assert(X86::getCondFromCMov(MI) == OppCC &&
"Can only handle memory-operand cmov instructions with a condition "
"opposite to the selected branch direction.");
@@ -768,7 +765,7 @@ void X86CmovConverterPass::convertCmovInstsToBranches(
// Move the new CMOV to just before the old one and reset any impacted
// iterator.
auto *NewCMOV = NewMIs.pop_back_val();
- assert(X86::getCondFromCMovOpc(NewCMOV->getOpcode()) == OppCC &&
+ assert(X86::getCondFromCMov(*NewCMOV) == OppCC &&
"Last new instruction isn't the expected CMOV!");
LLVM_DEBUG(dbgs() << "\tRewritten cmov: "; NewCMOV->dump());
MBB->insert(MachineBasicBlock::iterator(MI), NewCMOV);
@@ -820,7 +817,7 @@ void X86CmovConverterPass::convertCmovInstsToBranches(
// If this CMOV we are processing is the opposite condition from the jump we
// generated, then we have to swap the operands for the PHI that is going to
// be generated.
- if (X86::getCondFromCMovOpc(MIIt->getOpcode()) == OppCC)
+ if (X86::getCondFromCMov(*MIIt) == OppCC)
std::swap(Op1Reg, Op2Reg);
auto Op1Itr = RegRewriteTable.find(Op1Reg);
diff --git a/lib/Target/X86/X86CondBrFolding.cpp b/lib/Target/X86/X86CondBrFolding.cpp
index 7ce443c4656a..9dea94f1368d 100644
--- a/lib/Target/X86/X86CondBrFolding.cpp
+++ b/lib/Target/X86/X86CondBrFolding.cpp
@@ -1,9 +1,8 @@
//===---- X86CondBrFolding.cpp - optimize conditional branches ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// This file defines a pass that optimizes condition branches on x86 by taking
@@ -62,9 +61,7 @@ STATISTIC(NumFixedCondBrs, "Number of x86 condbr folded");
namespace {
class X86CondBrFoldingPass : public MachineFunctionPass {
public:
- X86CondBrFoldingPass() : MachineFunctionPass(ID) {
- initializeX86CondBrFoldingPassPass(*PassRegistry::getPassRegistry());
- }
+ X86CondBrFoldingPass() : MachineFunctionPass(ID) { }
StringRef getPassName() const override { return "X86 CondBr Folding"; }
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -226,10 +223,9 @@ void X86CondBrFolding::replaceBrDest(MachineBasicBlock *MBB,
MachineInstr *BrMI;
if (MBBInfo->TBB == OrigDest) {
BrMI = MBBInfo->BrInstr;
- unsigned JNCC = GetCondBranchFromCond(MBBInfo->BranchCode);
MachineInstrBuilder MIB =
- BuildMI(*MBB, BrMI, MBB->findDebugLoc(BrMI), TII->get(JNCC))
- .addMBB(NewDest);
+ BuildMI(*MBB, BrMI, MBB->findDebugLoc(BrMI), TII->get(X86::JCC_1))
+ .addMBB(NewDest).addImm(MBBInfo->BranchCode);
MBBInfo->TBB = NewDest;
MBBInfo->BrInstr = MIB.getInstr();
} else { // Should be the unconditional jump stmt.
@@ -255,8 +251,8 @@ void X86CondBrFolding::fixupModifiedCond(MachineBasicBlock *MBB) {
MachineInstr *BrMI = MBBInfo->BrInstr;
X86::CondCode CC = MBBInfo->BranchCode;
MachineInstrBuilder MIB = BuildMI(*MBB, BrMI, MBB->findDebugLoc(BrMI),
- TII->get(GetCondBranchFromCond(CC)))
- .addMBB(MBBInfo->TBB);
+ TII->get(X86::JCC_1))
+ .addMBB(MBBInfo->TBB).addImm(CC);
BrMI->eraseFromParent();
MBBInfo->BrInstr = MIB.getInstr();
@@ -324,8 +320,8 @@ void X86CondBrFolding::optimizeCondBr(
llvm_unreachable("unexpected condtional code.");
}
BuildMI(*RootMBB, UncondBrI, RootMBB->findDebugLoc(UncondBrI),
- TII->get(GetCondBranchFromCond(NewCC)))
- .addMBB(RootMBBInfo->FBB);
+ TII->get(X86::JCC_1))
+ .addMBB(RootMBBInfo->FBB).addImm(NewCC);
// RootMBB: Jump to TargetMBB
BuildMI(*RootMBB, UncondBrI, RootMBB->findDebugLoc(UncondBrI),
@@ -513,7 +509,7 @@ X86CondBrFolding::analyzeMBB(MachineBasicBlock &MBB) {
if (I->isBranch()) {
if (TBB)
return nullptr;
- CC = X86::getCondFromBranchOpc(I->getOpcode());
+ CC = X86::getCondFromBranch(*I);
switch (CC) {
default:
return nullptr;
diff --git a/lib/Target/X86/X86DiscriminateMemOps.cpp b/lib/Target/X86/X86DiscriminateMemOps.cpp
index 3654bf04f4e9..7051550d52e6 100644
--- a/lib/Target/X86/X86DiscriminateMemOps.cpp
+++ b/lib/Target/X86/X86DiscriminateMemOps.cpp
@@ -1,9 +1,8 @@
//===- X86DiscriminateMemOps.cpp - Unique IDs for Mem Ops -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -27,6 +26,22 @@ using namespace llvm;
#define DEBUG_TYPE "x86-discriminate-memops"
+static cl::opt<bool> EnableDiscriminateMemops(
+ DEBUG_TYPE, cl::init(false),
+ cl::desc("Generate unique debug info for each instruction with a memory "
+ "operand. Should be enabled for profile-drived cache prefetching, "
+ "both in the build of the binary being profiled, as well as in "
+ "the build of the binary consuming the profile."),
+ cl::Hidden);
+
+static cl::opt<bool> BypassPrefetchInstructions(
+ "x86-bypass-prefetch-instructions", cl::init(true),
+ cl::desc("When discriminating instructions with memory operands, ignore "
+ "prefetch instructions. This ensures the other memory operand "
+ "instructions have the same identifiers after inserting "
+ "prefetches, allowing for successive insertions."),
+ cl::Hidden);
+
namespace {
using Location = std::pair<StringRef, unsigned>;
@@ -55,6 +70,10 @@ public:
X86DiscriminateMemOps();
};
+bool IsPrefetchOpcode(unsigned Opcode) {
+ return Opcode == X86::PREFETCHNTA || Opcode == X86::PREFETCHT0 ||
+ Opcode == X86::PREFETCHT1 || Opcode == X86::PREFETCHT2;
+}
} // end anonymous namespace
//===----------------------------------------------------------------------===//
@@ -67,6 +86,9 @@ char X86DiscriminateMemOps::ID = 0;
X86DiscriminateMemOps::X86DiscriminateMemOps() : MachineFunctionPass(ID) {}
bool X86DiscriminateMemOps::runOnMachineFunction(MachineFunction &MF) {
+ if (!EnableDiscriminateMemops)
+ return false;
+
DISubprogram *FDI = MF.getFunction().getSubprogram();
if (!FDI || !FDI->getUnit()->getDebugInfoForProfiling())
return false;
@@ -75,7 +97,7 @@ bool X86DiscriminateMemOps::runOnMachineFunction(MachineFunction &MF) {
// have any debug info.
const DILocation *ReferenceDI =
DILocation::get(FDI->getContext(), FDI->getLine(), 0, FDI);
-
+ assert(ReferenceDI && "ReferenceDI should not be nullptr");
DenseMap<Location, unsigned> MemOpDiscriminators;
MemOpDiscriminators[diToLocation(ReferenceDI)] = 0;
@@ -88,6 +110,8 @@ bool X86DiscriminateMemOps::runOnMachineFunction(MachineFunction &MF) {
const auto &DI = MI.getDebugLoc();
if (!DI)
continue;
+ if (BypassPrefetchInstructions && IsPrefetchOpcode(MI.getDesc().Opcode))
+ continue;
Location Loc = diToLocation(DI);
MemOpDiscriminators[Loc] =
std::max(MemOpDiscriminators[Loc], DI->getBaseDiscriminator());
@@ -104,15 +128,18 @@ bool X86DiscriminateMemOps::runOnMachineFunction(MachineFunction &MF) {
for (auto &MI : MBB) {
if (X86II::getMemoryOperandNo(MI.getDesc().TSFlags) < 0)
continue;
+ if (BypassPrefetchInstructions && IsPrefetchOpcode(MI.getDesc().Opcode))
+ continue;
const DILocation *DI = MI.getDebugLoc();
- if (!DI) {
+ bool HasDebug = DI;
+ if (!HasDebug) {
DI = ReferenceDI;
}
Location L = diToLocation(DI);
DenseSet<unsigned> &Set = Seen[L];
const std::pair<DenseSet<unsigned>::iterator, bool> TryInsert =
Set.insert(DI->getBaseDiscriminator());
- if (!TryInsert.second) {
+ if (!TryInsert.second || !HasDebug) {
unsigned BF, DF, CI = 0;
DILocation::decodeDiscriminator(DI->getDiscriminator(), BF, DF, CI);
Optional<unsigned> EncodedDiscriminator = DILocation::encodeDiscriminator(
@@ -133,6 +160,7 @@ bool X86DiscriminateMemOps::runOnMachineFunction(MachineFunction &MF) {
// Since we were able to encode, bump the MemOpDiscriminators.
++MemOpDiscriminators[L];
DI = DI->cloneWithDiscriminator(EncodedDiscriminator.getValue());
+ assert(DI && "DI should not be nullptr");
updateDebugInfo(&MI, DI);
Changed = true;
std::pair<DenseSet<unsigned>::iterator, bool> MustInsert =
diff --git a/lib/Target/X86/X86DomainReassignment.cpp b/lib/Target/X86/X86DomainReassignment.cpp
index d9ebbb506ca4..18bbfa32e11b 100644
--- a/lib/Target/X86/X86DomainReassignment.cpp
+++ b/lib/Target/X86/X86DomainReassignment.cpp
@@ -1,9 +1,8 @@
//===--- X86DomainReassignment.cpp - Selectively switch register classes---===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -387,9 +386,7 @@ class X86DomainReassignment : public MachineFunctionPass {
public:
static char ID;
- X86DomainReassignment() : MachineFunctionPass(ID) {
- initializeX86DomainReassignmentPass(*PassRegistry::getPassRegistry());
- }
+ X86DomainReassignment() : MachineFunctionPass(ID) { }
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -557,6 +554,7 @@ void X86DomainReassignment::buildClosure(Closure &C, unsigned Reg) {
// Register already in this closure.
if (!C.insertEdge(CurReg))
continue;
+ EnclosedEdges.insert(Reg);
MachineInstr *DefMI = MRI->getVRegDef(CurReg);
encloseInstr(C, DefMI);
diff --git a/lib/Target/X86/X86EvexToVex.cpp b/lib/Target/X86/X86EvexToVex.cpp
index 80674c7251fe..58680f1815bb 100755
--- a/lib/Target/X86/X86EvexToVex.cpp
+++ b/lib/Target/X86/X86EvexToVex.cpp
@@ -1,10 +1,9 @@
//===- X86EvexToVex.cpp ---------------------------------------------------===//
// Compress EVEX instructions to VEX encoding when possible to reduce code size
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -13,15 +12,15 @@
/// are encoded using the EVEX prefix and if possible replaces them by their
/// corresponding VEX encoding which is usually shorter by 2 bytes.
/// EVEX instructions may be encoded via the VEX prefix when the AVX-512
-/// instruction has a corresponding AVX/AVX2 opcode and when it does not
-/// use the xmm or the mask registers or xmm/ymm registers with indexes
-/// higher than 15.
+/// instruction has a corresponding AVX/AVX2 opcode, when vector length
+/// accessed by instruction is less than 512 bits and when it does not use
+// the xmm or the mask registers or xmm/ymm registers with indexes higher than 15.
/// The pass applies code reduction on the generated code for AVX-512 instrs.
//
//===----------------------------------------------------------------------===//
-#include "InstPrinter/X86InstComments.h"
#include "MCTargetDesc/X86BaseInfo.h"
+#include "MCTargetDesc/X86InstComments.h"
#include "X86.h"
#include "X86InstrInfo.h"
#include "X86Subtarget.h"
@@ -69,9 +68,7 @@ class EvexToVexInstPass : public MachineFunctionPass {
public:
static char ID;
- EvexToVexInstPass() : MachineFunctionPass(ID) {
- initializeEvexToVexInstPassPass(*PassRegistry::getPassRegistry());
- }
+ EvexToVexInstPass() : MachineFunctionPass(ID) { }
StringRef getPassName() const override { return EVEX2VEX_DESC; }
@@ -255,7 +252,7 @@ bool EvexToVexInstPass::CompressEvexToVexImpl(MachineInstr &MI) const {
(Desc.TSFlags & X86II::VEX_L) ? makeArrayRef(X86EvexToVex256CompressTable)
: makeArrayRef(X86EvexToVex128CompressTable);
- auto I = std::lower_bound(Table.begin(), Table.end(), MI.getOpcode());
+ auto I = llvm::lower_bound(Table, MI.getOpcode());
if (I == Table.end() || I->EvexOpcode != MI.getOpcode())
return false;
diff --git a/lib/Target/X86/X86ExpandPseudo.cpp b/lib/Target/X86/X86ExpandPseudo.cpp
index 1dd73163080b..b8624b40f2f7 100644
--- a/lib/Target/X86/X86ExpandPseudo.cpp
+++ b/lib/Target/X86/X86ExpandPseudo.cpp
@@ -1,9 +1,8 @@
//===------- X86ExpandPseudo.cpp - Expand pseudo instructions -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -27,6 +26,7 @@
using namespace llvm;
#define DEBUG_TYPE "x86-pseudo"
+#define X86_EXPAND_PSEUDO_NAME "X86 pseudo instruction expansion pass"
namespace {
class X86ExpandPseudo : public MachineFunctionPass {
@@ -66,8 +66,12 @@ private:
bool ExpandMBB(MachineBasicBlock &MBB);
};
char X86ExpandPseudo::ID = 0;
+
} // End anonymous namespace.
+INITIALIZE_PASS(X86ExpandPseudo, DEBUG_TYPE, X86_EXPAND_PSEUDO_NAME, false,
+ false)
+
void X86ExpandPseudo::ExpandICallBranchFunnel(
MachineBasicBlock *MBB, MachineBasicBlock::iterator MBBI) {
MachineBasicBlock *JTMBB = MBB;
@@ -83,6 +87,8 @@ void X86ExpandPseudo::ExpandICallBranchFunnel(
const GlobalValue *CombinedGlobal = JTInst->getOperand(1).getGlobal();
auto CmpTarget = [&](unsigned Target) {
+ if (Selector.isReg())
+ MBB->addLiveIn(Selector.getReg());
BuildMI(*MBB, MBBI, DL, TII->get(X86::LEA64r), X86::R11)
.addReg(X86::RIP)
.addImm(1)
@@ -98,11 +104,13 @@ void X86ExpandPseudo::ExpandICallBranchFunnel(
auto CreateMBB = [&]() {
auto *NewMBB = MF->CreateMachineBasicBlock(BB);
MBB->addSuccessor(NewMBB);
+ if (!MBB->isLiveIn(X86::EFLAGS))
+ MBB->addLiveIn(X86::EFLAGS);
return NewMBB;
};
- auto EmitCondJump = [&](unsigned Opcode, MachineBasicBlock *ThenMBB) {
- BuildMI(*MBB, MBBI, DL, TII->get(Opcode)).addMBB(ThenMBB);
+ auto EmitCondJump = [&](unsigned CC, MachineBasicBlock *ThenMBB) {
+ BuildMI(*MBB, MBBI, DL, TII->get(X86::JCC_1)).addMBB(ThenMBB).addImm(CC);
auto *ElseMBB = CreateMBB();
MF->insert(InsPt, ElseMBB);
@@ -110,10 +118,10 @@ void X86ExpandPseudo::ExpandICallBranchFunnel(
MBBI = MBB->end();
};
- auto EmitCondJumpTarget = [&](unsigned Opcode, unsigned Target) {
+ auto EmitCondJumpTarget = [&](unsigned CC, unsigned Target) {
auto *ThenMBB = CreateMBB();
TargetMBBs.push_back({ThenMBB, Target});
- EmitCondJump(Opcode, ThenMBB);
+ EmitCondJump(CC, ThenMBB);
};
auto EmitTailCall = [&](unsigned Target) {
@@ -130,23 +138,23 @@ void X86ExpandPseudo::ExpandICallBranchFunnel(
if (NumTargets == 2) {
CmpTarget(FirstTarget + 1);
- EmitCondJumpTarget(X86::JB_1, FirstTarget);
+ EmitCondJumpTarget(X86::COND_B, FirstTarget);
EmitTailCall(FirstTarget + 1);
return;
}
if (NumTargets < 6) {
CmpTarget(FirstTarget + 1);
- EmitCondJumpTarget(X86::JB_1, FirstTarget);
- EmitCondJumpTarget(X86::JE_1, FirstTarget + 1);
+ EmitCondJumpTarget(X86::COND_B, FirstTarget);
+ EmitCondJumpTarget(X86::COND_E, FirstTarget + 1);
EmitBranchFunnel(FirstTarget + 2, NumTargets - 2);
return;
}
auto *ThenMBB = CreateMBB();
CmpTarget(FirstTarget + (NumTargets / 2));
- EmitCondJump(X86::JB_1, ThenMBB);
- EmitCondJumpTarget(X86::JE_1, FirstTarget + (NumTargets / 2));
+ EmitCondJump(X86::COND_B, ThenMBB);
+ EmitCondJumpTarget(X86::COND_E, FirstTarget + (NumTargets / 2));
EmitBranchFunnel(FirstTarget + (NumTargets / 2) + 1,
NumTargets - (NumTargets / 2) - 1);
@@ -254,16 +262,19 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
for (unsigned i = 0; i != 5; ++i)
MIB.add(MBBI->getOperand(i));
} else if (Opcode == X86::TCRETURNri64) {
+ JumpTarget.setIsKill();
BuildMI(MBB, MBBI, DL,
TII->get(IsWin64 ? X86::TAILJMPr64_REX : X86::TAILJMPr64))
- .addReg(JumpTarget.getReg(), RegState::Kill);
+ .add(JumpTarget);
} else {
+ JumpTarget.setIsKill();
BuildMI(MBB, MBBI, DL, TII->get(X86::TAILJMPr))
- .addReg(JumpTarget.getReg(), RegState::Kill);
+ .add(JumpTarget);
}
MachineInstr &NewMI = *std::prev(MBBI);
NewMI.copyImplicitOps(*MBBI->getParent()->getParent(), *MBBI);
+ MBB.getParent()->updateCallSiteInfo(&*MBBI, &NewMI);
// Delete the pseudo instruction TCRETURN.
MBB.erase(MBBI);
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 9dd3f2652543..7b9ce0271205 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -1,9 +1,8 @@
//===-- X86FastISel.cpp - X86 FastISel implementation ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -85,7 +84,7 @@ private:
bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT,
const DebugLoc &DL);
- bool X86FastEmitLoad(EVT VT, X86AddressMode &AM, MachineMemOperand *MMO,
+ bool X86FastEmitLoad(MVT VT, X86AddressMode &AM, MachineMemOperand *MMO,
unsigned &ResultReg, unsigned Alignment = 1);
bool X86FastEmitStore(EVT VT, const Value *Val, X86AddressMode &AM,
@@ -290,7 +289,7 @@ bool X86FastISel::foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
}
bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) {
- EVT evt = TLI.getValueType(DL, Ty, /*HandleUnknown=*/true);
+ EVT evt = TLI.getValueType(DL, Ty, /*AllowUnknown=*/true);
if (evt == MVT::Other || !evt.isSimple())
// Unhandled type. Halt "fast" selection and bail.
return false;
@@ -312,12 +311,10 @@ bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) {
return (AllowI1 && VT == MVT::i1) || TLI.isTypeLegal(VT);
}
-#include "X86GenCallingConv.inc"
-
/// X86FastEmitLoad - Emit a machine instruction to load a value of type VT.
/// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV.
/// Return true and the result register by reference if it is possible.
-bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
+bool X86FastISel::X86FastEmitLoad(MVT VT, X86AddressMode &AM,
MachineMemOperand *MMO, unsigned &ResultReg,
unsigned Alignment) {
bool HasSSE41 = Subtarget->hasSSE41();
@@ -327,46 +324,42 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
bool HasVLX = Subtarget->hasVLX();
bool IsNonTemporal = MMO && MMO->isNonTemporal();
+ // Treat i1 loads the same as i8 loads. Masking will be done when storing.
+ if (VT == MVT::i1)
+ VT = MVT::i8;
+
// Get opcode and regclass of the output for the given load instruction.
unsigned Opc = 0;
- const TargetRegisterClass *RC = nullptr;
- switch (VT.getSimpleVT().SimpleTy) {
+ switch (VT.SimpleTy) {
default: return false;
- case MVT::i1:
case MVT::i8:
Opc = X86::MOV8rm;
- RC = &X86::GR8RegClass;
break;
case MVT::i16:
Opc = X86::MOV16rm;
- RC = &X86::GR16RegClass;
break;
case MVT::i32:
Opc = X86::MOV32rm;
- RC = &X86::GR32RegClass;
break;
case MVT::i64:
// Must be in x86-64 mode.
Opc = X86::MOV64rm;
- RC = &X86::GR64RegClass;
break;
case MVT::f32:
- if (X86ScalarSSEf32) {
- Opc = HasAVX512 ? X86::VMOVSSZrm : HasAVX ? X86::VMOVSSrm : X86::MOVSSrm;
- RC = HasAVX512 ? &X86::FR32XRegClass : &X86::FR32RegClass;
- } else {
+ if (X86ScalarSSEf32)
+ Opc = HasAVX512 ? X86::VMOVSSZrm_alt :
+ HasAVX ? X86::VMOVSSrm_alt :
+ X86::MOVSSrm_alt;
+ else
Opc = X86::LD_Fp32m;
- RC = &X86::RFP32RegClass;
- }
break;
case MVT::f64:
- if (X86ScalarSSEf64) {
- Opc = HasAVX512 ? X86::VMOVSDZrm : HasAVX ? X86::VMOVSDrm : X86::MOVSDrm;
- RC = HasAVX512 ? &X86::FR64XRegClass : &X86::FR64RegClass;
- } else {
+ if (X86ScalarSSEf64)
+ Opc = HasAVX512 ? X86::VMOVSDZrm_alt :
+ HasAVX ? X86::VMOVSDrm_alt :
+ X86::MOVSDrm_alt;
+ else
Opc = X86::LD_Fp64m;
- RC = &X86::RFP64RegClass;
- }
break;
case MVT::f80:
// No f80 support yet.
@@ -381,7 +374,6 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
else
Opc = HasVLX ? X86::VMOVUPSZ128rm :
HasAVX ? X86::VMOVUPSrm : X86::MOVUPSrm;
- RC = HasVLX ? &X86::VR128XRegClass : &X86::VR128RegClass;
break;
case MVT::v2f64:
if (IsNonTemporal && Alignment >= 16 && HasSSE41)
@@ -393,13 +385,12 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
else
Opc = HasVLX ? X86::VMOVUPDZ128rm :
HasAVX ? X86::VMOVUPDrm : X86::MOVUPDrm;
- RC = HasVLX ? &X86::VR128XRegClass : &X86::VR128RegClass;
break;
case MVT::v4i32:
case MVT::v2i64:
case MVT::v8i16:
case MVT::v16i8:
- if (IsNonTemporal && Alignment >= 16)
+ if (IsNonTemporal && Alignment >= 16 && HasSSE41)
Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
else if (Alignment >= 16)
@@ -408,7 +399,6 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
else
Opc = HasVLX ? X86::VMOVDQU64Z128rm :
HasAVX ? X86::VMOVDQUrm : X86::MOVDQUrm;
- RC = HasVLX ? &X86::VR128XRegClass : &X86::VR128RegClass;
break;
case MVT::v8f32:
assert(HasAVX);
@@ -420,7 +410,6 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
Opc = HasVLX ? X86::VMOVAPSZ256rm : X86::VMOVAPSYrm;
else
Opc = HasVLX ? X86::VMOVUPSZ256rm : X86::VMOVUPSYrm;
- RC = HasVLX ? &X86::VR256XRegClass : &X86::VR256RegClass;
break;
case MVT::v4f64:
assert(HasAVX);
@@ -432,7 +421,6 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
Opc = HasVLX ? X86::VMOVAPDZ256rm : X86::VMOVAPDYrm;
else
Opc = HasVLX ? X86::VMOVUPDZ256rm : X86::VMOVUPDYrm;
- RC = HasVLX ? &X86::VR256XRegClass : &X86::VR256RegClass;
break;
case MVT::v8i32:
case MVT::v4i64:
@@ -447,7 +435,6 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
Opc = HasVLX ? X86::VMOVDQA64Z256rm : X86::VMOVDQAYrm;
else
Opc = HasVLX ? X86::VMOVDQU64Z256rm : X86::VMOVDQUYrm;
- RC = HasVLX ? &X86::VR256XRegClass : &X86::VR256RegClass;
break;
case MVT::v16f32:
assert(HasAVX512);
@@ -455,7 +442,6 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
Opc = X86::VMOVNTDQAZrm;
else
Opc = (Alignment >= 64) ? X86::VMOVAPSZrm : X86::VMOVUPSZrm;
- RC = &X86::VR512RegClass;
break;
case MVT::v8f64:
assert(HasAVX512);
@@ -463,7 +449,6 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
Opc = X86::VMOVNTDQAZrm;
else
Opc = (Alignment >= 64) ? X86::VMOVAPDZrm : X86::VMOVUPDZrm;
- RC = &X86::VR512RegClass;
break;
case MVT::v8i64:
case MVT::v16i32:
@@ -476,10 +461,11 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
Opc = X86::VMOVNTDQAZrm;
else
Opc = (Alignment >= 64) ? X86::VMOVDQA64Zrm : X86::VMOVDQU64Zrm;
- RC = &X86::VR512RegClass;
break;
}
+ const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
+
ResultReg = createResultReg(RC);
MachineInstrBuilder MIB =
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
@@ -1483,8 +1469,8 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) {
// FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
static const uint16_t SETFOpcTable[2][3] = {
- { X86::SETEr, X86::SETNPr, X86::AND8rr },
- { X86::SETNEr, X86::SETPr, X86::OR8rr }
+ { X86::COND_E, X86::COND_NP, X86::AND8rr },
+ { X86::COND_NE, X86::COND_P, X86::OR8rr }
};
const uint16_t *SETFOpc = nullptr;
switch (Predicate) {
@@ -1500,10 +1486,10 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) {
unsigned FlagReg1 = createResultReg(&X86::GR8RegClass);
unsigned FlagReg2 = createResultReg(&X86::GR8RegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[0]),
- FlagReg1);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[1]),
- FlagReg2);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETCCr),
+ FlagReg1).addImm(SETFOpc[0]);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETCCr),
+ FlagReg2).addImm(SETFOpc[1]);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[2]),
ResultReg).addReg(FlagReg1).addReg(FlagReg2);
updateValueMap(I, ResultReg);
@@ -1514,7 +1500,6 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) {
bool SwapArgs;
std::tie(CC, SwapArgs) = X86::getX86ConditionCode(Predicate);
assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
- unsigned Opc = X86::getSETFromCond(CC);
if (SwapArgs)
std::swap(LHS, RHS);
@@ -1523,7 +1508,8 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) {
if (!X86FastEmitCompare(LHS, RHS, VT, I->getDebugLoc()))
return false;
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETCCr),
+ ResultReg).addImm(CC);
updateValueMap(I, ResultReg);
return true;
}
@@ -1693,11 +1679,9 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
}
bool SwapArgs;
- unsigned BranchOpc;
std::tie(CC, SwapArgs) = X86::getX86ConditionCode(Predicate);
assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
- BranchOpc = X86::GetCondBranchFromCond(CC);
if (SwapArgs)
std::swap(CmpLHS, CmpRHS);
@@ -1705,14 +1689,14 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
if (!X86FastEmitCompare(CmpLHS, CmpRHS, VT, CI->getDebugLoc()))
return false;
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BranchOpc))
- .addMBB(TrueMBB);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JCC_1))
+ .addMBB(TrueMBB).addImm(CC);
// X86 requires a second branch to handle UNE (and OEQ, which is mapped
// to UNE above).
if (NeedExtraBranch) {
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JP_1))
- .addMBB(TrueMBB);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JCC_1))
+ .addMBB(TrueMBB).addImm(X86::COND_P);
}
finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
@@ -1739,14 +1723,14 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TestOpc))
.addReg(OpReg).addImm(1);
- unsigned JmpOpc = X86::JNE_1;
+ unsigned JmpCond = X86::COND_NE;
if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
std::swap(TrueMBB, FalseMBB);
- JmpOpc = X86::JE_1;
+ JmpCond = X86::COND_E;
}
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(JmpOpc))
- .addMBB(TrueMBB);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JCC_1))
+ .addMBB(TrueMBB).addImm(JmpCond);
finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
return true;
@@ -1759,10 +1743,8 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
if (TmpReg == 0)
return false;
- unsigned BranchOpc = X86::GetCondBranchFromCond(CC);
-
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BranchOpc))
- .addMBB(TrueMBB);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JCC_1))
+ .addMBB(TrueMBB).addImm(CC);
finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
return true;
}
@@ -1786,8 +1768,8 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
.addReg(OpReg)
.addImm(1);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JNE_1))
- .addMBB(TrueMBB);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JCC_1))
+ .addMBB(TrueMBB).addImm(X86::COND_NE);
finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
return true;
}
@@ -2050,8 +2032,8 @@ bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) {
// FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
static const uint16_t SETFOpcTable[2][3] = {
- { X86::SETNPr, X86::SETEr , X86::TEST8rr },
- { X86::SETPr, X86::SETNEr, X86::OR8rr }
+ { X86::COND_NP, X86::COND_E, X86::TEST8rr },
+ { X86::COND_P, X86::COND_NE, X86::OR8rr }
};
const uint16_t *SETFOpc = nullptr;
switch (Predicate) {
@@ -2083,10 +2065,10 @@ bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) {
if (SETFOpc) {
unsigned FlagReg1 = createResultReg(&X86::GR8RegClass);
unsigned FlagReg2 = createResultReg(&X86::GR8RegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[0]),
- FlagReg1);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[1]),
- FlagReg2);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETCCr),
+ FlagReg1).addImm(SETFOpc[0]);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETCCr),
+ FlagReg2).addImm(SETFOpc[1]);
auto const &II = TII.get(SETFOpc[2]);
if (II.getNumDefs()) {
unsigned TmpReg = createResultReg(&X86::GR8RegClass);
@@ -2147,9 +2129,9 @@ bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) {
return false;
const TargetRegisterInfo &TRI = *Subtarget->getRegisterInfo();
- unsigned Opc = X86::getCMovFromCond(CC, TRI.getRegSizeInBits(*RC)/8);
- unsigned ResultReg = fastEmitInst_rr(Opc, RC, RHSReg, RHSIsKill,
- LHSReg, LHSIsKill);
+ unsigned Opc = X86::getCMovOpcode(TRI.getRegSizeInBits(*RC)/8);
+ unsigned ResultReg = fastEmitInst_rri(Opc, RC, RHSReg, RHSIsKill,
+ LHSReg, LHSIsKill, CC);
updateValueMap(I, ResultReg);
return true;
}
@@ -2194,19 +2176,6 @@ bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
if (NeedSwap)
std::swap(CmpLHS, CmpRHS);
- // Choose the SSE instruction sequence based on data type (float or double).
- static const uint16_t OpcTable[2][4] = {
- { X86::CMPSSrr, X86::ANDPSrr, X86::ANDNPSrr, X86::ORPSrr },
- { X86::CMPSDrr, X86::ANDPDrr, X86::ANDNPDrr, X86::ORPDrr }
- };
-
- const uint16_t *Opc = nullptr;
- switch (RetVT.SimpleTy) {
- default: return false;
- case MVT::f32: Opc = &OpcTable[0][0]; break;
- case MVT::f64: Opc = &OpcTable[1][0]; break;
- }
-
const Value *LHS = I->getOperand(1);
const Value *RHS = I->getOperand(2);
@@ -2277,6 +2246,19 @@ bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), ResultReg).addReg(VBlendReg);
} else {
+ // Choose the SSE instruction sequence based on data type (float or double).
+ static const uint16_t OpcTable[2][4] = {
+ { X86::CMPSSrr, X86::ANDPSrr, X86::ANDNPSrr, X86::ORPSrr },
+ { X86::CMPSDrr, X86::ANDPDrr, X86::ANDNPDrr, X86::ORPDrr }
+ };
+
+ const uint16_t *Opc = nullptr;
+ switch (RetVT.SimpleTy) {
+ default: return false;
+ case MVT::f32: Opc = &OpcTable[0][0]; break;
+ case MVT::f64: Opc = &OpcTable[1][0]; break;
+ }
+
const TargetRegisterClass *VR128 = &X86::VR128RegClass;
unsigned CmpReg = fastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpLHSIsKill,
CmpRHSReg, CmpRHSIsKill, CC);
@@ -2303,8 +2285,10 @@ bool X86FastISel::X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I) {
case MVT::i8: Opc = X86::CMOV_GR8; break;
case MVT::i16: Opc = X86::CMOV_GR16; break;
case MVT::i32: Opc = X86::CMOV_GR32; break;
- case MVT::f32: Opc = X86::CMOV_FR32; break;
- case MVT::f64: Opc = X86::CMOV_FR64; break;
+ case MVT::f32: Opc = Subtarget->hasAVX512() ? X86::CMOV_FR32X
+ : X86::CMOV_FR32; break;
+ case MVT::f64: Opc = Subtarget->hasAVX512() ? X86::CMOV_FR64X
+ : X86::CMOV_FR64; break;
}
const Value *Cond = I->getOperand(0);
@@ -2485,13 +2469,14 @@ bool X86FastISel::X86SelectFPExtOrFPTrunc(const Instruction *I,
assert((I->getOpcode() == Instruction::FPExt ||
I->getOpcode() == Instruction::FPTrunc) &&
"Instruction must be an FPExt or FPTrunc!");
+ bool HasAVX = Subtarget->hasAVX();
unsigned OpReg = getRegForValue(I->getOperand(0));
if (OpReg == 0)
return false;
unsigned ImplicitDefReg;
- if (Subtarget->hasAVX()) {
+ if (HasAVX) {
ImplicitDefReg = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
@@ -2503,7 +2488,7 @@ bool X86FastISel::X86SelectFPExtOrFPTrunc(const Instruction *I,
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpc),
ResultReg);
- if (Subtarget->hasAVX())
+ if (HasAVX)
MIB.addReg(ImplicitDefReg);
MIB.addReg(OpReg);
@@ -2519,8 +2504,7 @@ bool X86FastISel::X86SelectFPExt(const Instruction *I) {
unsigned Opc =
HasAVX512 ? X86::VCVTSS2SDZrr
: Subtarget->hasAVX() ? X86::VCVTSS2SDrr : X86::CVTSS2SDrr;
- return X86SelectFPExtOrFPTrunc(
- I, Opc, HasAVX512 ? &X86::FR64XRegClass : &X86::FR64RegClass);
+ return X86SelectFPExtOrFPTrunc(I, Opc, TLI.getRegClassFor(MVT::f64));
}
return false;
@@ -2534,8 +2518,7 @@ bool X86FastISel::X86SelectFPTrunc(const Instruction *I) {
unsigned Opc =
HasAVX512 ? X86::VCVTSD2SSZrr
: Subtarget->hasAVX() ? X86::VCVTSD2SSrr : X86::CVTSD2SSrr;
- return X86SelectFPExtOrFPTrunc(
- I, Opc, HasAVX512 ? &X86::FR32XRegClass : &X86::FR32RegClass);
+ return X86SelectFPExtOrFPTrunc(I, Opc, TLI.getRegClassFor(MVT::f32));
}
return false;
@@ -2900,21 +2883,21 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
isCommutativeIntrinsic(II))
std::swap(LHS, RHS);
- unsigned BaseOpc, CondOpc;
+ unsigned BaseOpc, CondCode;
switch (II->getIntrinsicID()) {
default: llvm_unreachable("Unexpected intrinsic!");
case Intrinsic::sadd_with_overflow:
- BaseOpc = ISD::ADD; CondOpc = X86::SETOr; break;
+ BaseOpc = ISD::ADD; CondCode = X86::COND_O; break;
case Intrinsic::uadd_with_overflow:
- BaseOpc = ISD::ADD; CondOpc = X86::SETBr; break;
+ BaseOpc = ISD::ADD; CondCode = X86::COND_B; break;
case Intrinsic::ssub_with_overflow:
- BaseOpc = ISD::SUB; CondOpc = X86::SETOr; break;
+ BaseOpc = ISD::SUB; CondCode = X86::COND_O; break;
case Intrinsic::usub_with_overflow:
- BaseOpc = ISD::SUB; CondOpc = X86::SETBr; break;
+ BaseOpc = ISD::SUB; CondCode = X86::COND_B; break;
case Intrinsic::smul_with_overflow:
- BaseOpc = X86ISD::SMUL; CondOpc = X86::SETOr; break;
+ BaseOpc = X86ISD::SMUL; CondCode = X86::COND_O; break;
case Intrinsic::umul_with_overflow:
- BaseOpc = X86ISD::UMUL; CondOpc = X86::SETOr; break;
+ BaseOpc = X86ISD::UMUL; CondCode = X86::COND_O; break;
}
unsigned LHSReg = getRegForValue(LHS);
@@ -2931,7 +2914,7 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
};
if (CI->isOne() && (BaseOpc == ISD::ADD || BaseOpc == ISD::SUB) &&
- CondOpc == X86::SETOr) {
+ CondCode == X86::COND_O) {
// We can use INC/DEC.
ResultReg = createResultReg(TLI.getRegClassFor(VT));
bool IsDec = BaseOpc == ISD::SUB;
@@ -2990,8 +2973,8 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
// Assign to a GPR since the overflow return value is lowered to a SETcc.
unsigned ResultReg2 = createResultReg(&X86::GR8RegClass);
assert((ResultReg+1) == ResultReg2 && "Nonconsecutive result registers.");
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CondOpc),
- ResultReg2);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETCCr),
+ ResultReg2).addImm(CondCode);
updateValueMap(II, ResultReg, 2);
return true;
@@ -3509,8 +3492,9 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
// This will be a direct call, or an indirect call through memory for
// NonLazyBind calls or dllimport calls.
- bool NeedLoad =
- OpFlags == X86II::MO_DLLIMPORT || OpFlags == X86II::MO_GOTPCREL;
+ bool NeedLoad = OpFlags == X86II::MO_DLLIMPORT ||
+ OpFlags == X86II::MO_GOTPCREL ||
+ OpFlags == X86II::MO_COFFSTUB;
unsigned CallOpc = NeedLoad
? (Is64Bit ? X86::CALL64m : X86::CALL32m)
: (Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32);
@@ -3595,7 +3579,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Opc)), FI)
.addReg(CopyReg);
- Opc = ResVT == MVT::f32 ? X86::MOVSSrm : X86::MOVSDrm;
+ Opc = ResVT == MVT::f32 ? X86::MOVSSrm_alt : X86::MOVSDrm_alt;
addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Opc), ResultReg + i), FI);
}
@@ -3662,24 +3646,19 @@ X86FastISel::fastSelectInstruction(const Instruction *I) {
return true;
}
case Instruction::BitCast: {
- // Select SSE2/AVX bitcasts between 128/256 bit vector types.
+ // Select SSE2/AVX bitcasts between 128/256/512 bit vector types.
if (!Subtarget->hasSSE2())
return false;
- EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
- EVT DstVT = TLI.getValueType(DL, I->getType());
-
- if (!SrcVT.isSimple() || !DstVT.isSimple())
+ MVT SrcVT, DstVT;
+ if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT) ||
+ !isTypeLegal(I->getType(), DstVT))
return false;
- MVT SVT = SrcVT.getSimpleVT();
- MVT DVT = DstVT.getSimpleVT();
-
- if (!SVT.is128BitVector() &&
- !(Subtarget->hasAVX() && SVT.is256BitVector()) &&
- !(Subtarget->hasAVX512() && SVT.is512BitVector() &&
- (Subtarget->hasBWI() || (SVT.getScalarSizeInBits() >= 32 &&
- DVT.getScalarSizeInBits() >= 32))))
+ // Only allow vectors that use xmm/ymm/zmm.
+ if (!SrcVT.isVector() || !DstVT.isVector() ||
+ SrcVT.getVectorElementType() == MVT::i1 ||
+ DstVT.getVectorElementType() == MVT::i1)
return false;
unsigned Reg = getRegForValue(I->getOperand(0));
@@ -3757,30 +3736,25 @@ unsigned X86FastISel::X86MaterializeFP(const ConstantFP *CFP, MVT VT) {
// Get opcode and regclass of the output for the given load instruction.
unsigned Opc = 0;
- const TargetRegisterClass *RC = nullptr;
+ bool HasAVX = Subtarget->hasAVX();
+ bool HasAVX512 = Subtarget->hasAVX512();
switch (VT.SimpleTy) {
default: return 0;
case MVT::f32:
- if (X86ScalarSSEf32) {
- Opc = Subtarget->hasAVX512()
- ? X86::VMOVSSZrm
- : Subtarget->hasAVX() ? X86::VMOVSSrm : X86::MOVSSrm;
- RC = Subtarget->hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
- } else {
+ if (X86ScalarSSEf32)
+ Opc = HasAVX512 ? X86::VMOVSSZrm_alt :
+ HasAVX ? X86::VMOVSSrm_alt :
+ X86::MOVSSrm_alt;
+ else
Opc = X86::LD_Fp32m;
- RC = &X86::RFP32RegClass;
- }
break;
case MVT::f64:
- if (X86ScalarSSEf64) {
- Opc = Subtarget->hasAVX512()
- ? X86::VMOVSDZrm
- : Subtarget->hasAVX() ? X86::VMOVSDrm : X86::MOVSDrm;
- RC = Subtarget->hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
- } else {
+ if (X86ScalarSSEf64)
+ Opc = HasAVX512 ? X86::VMOVSDZrm_alt :
+ HasAVX ? X86::VMOVSDrm_alt :
+ X86::MOVSDrm_alt;
+ else
Opc = X86::LD_Fp64m;
- RC = &X86::RFP64RegClass;
- }
break;
case MVT::f80:
// No f80 support yet.
@@ -3806,7 +3780,7 @@ unsigned X86FastISel::X86MaterializeFP(const ConstantFP *CFP, MVT VT) {
// Create the load from the constant pool.
unsigned CPI = MCP.getConstantPoolIndex(CFP, Align);
- unsigned ResultReg = createResultReg(RC);
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT.SimpleTy));
if (CM == CodeModel::Large) {
unsigned AddrReg = createResultReg(&X86::GR64RegClass);
@@ -3916,33 +3890,26 @@ unsigned X86FastISel::fastMaterializeFloatZero(const ConstantFP *CF) {
// Get opcode and regclass for the given zero.
bool HasAVX512 = Subtarget->hasAVX512();
unsigned Opc = 0;
- const TargetRegisterClass *RC = nullptr;
switch (VT.SimpleTy) {
default: return 0;
case MVT::f32:
- if (X86ScalarSSEf32) {
+ if (X86ScalarSSEf32)
Opc = HasAVX512 ? X86::AVX512_FsFLD0SS : X86::FsFLD0SS;
- RC = HasAVX512 ? &X86::FR32XRegClass : &X86::FR32RegClass;
- } else {
+ else
Opc = X86::LD_Fp032;
- RC = &X86::RFP32RegClass;
- }
break;
case MVT::f64:
- if (X86ScalarSSEf64) {
+ if (X86ScalarSSEf64)
Opc = HasAVX512 ? X86::AVX512_FsFLD0SD : X86::FsFLD0SD;
- RC = HasAVX512 ? &X86::FR64XRegClass : &X86::FR64RegClass;
- } else {
+ else
Opc = X86::LD_Fp064;
- RC = &X86::RFP64RegClass;
- }
break;
case MVT::f80:
// No f80 support yet.
return 0;
}
- unsigned ResultReg = createResultReg(RC);
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
return ResultReg;
}
@@ -3992,6 +3959,7 @@ bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
}
Result->addMemOperand(*FuncInfo.MF, createMachineMemOperandFor(LI));
+ Result->cloneInstrSymbols(*FuncInfo.MF, *MI);
MachineBasicBlock::iterator I(MI);
removeDeadCode(I, std::next(I));
return true;
diff --git a/lib/Target/X86/X86FixupBWInsts.cpp b/lib/Target/X86/X86FixupBWInsts.cpp
index ed297e678203..bf541d933790 100644
--- a/lib/Target/X86/X86FixupBWInsts.cpp
+++ b/lib/Target/X86/X86FixupBWInsts.cpp
@@ -1,9 +1,8 @@
//===-- X86FixupBWInsts.cpp - Fixup Byte or Word instructions -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -103,9 +102,7 @@ public:
StringRef getPassName() const override { return FIXUPBW_DESC; }
- FixupBWInstPass() : MachineFunctionPass(ID) {
- initializeFixupBWInstPassPass(*PassRegistry::getPassRegistry());
- }
+ FixupBWInstPass() : MachineFunctionPass(ID) { }
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<MachineLoopInfo>(); // Machine loop info is used to
@@ -151,7 +148,7 @@ bool FixupBWInstPass::runOnMachineFunction(MachineFunction &MF) {
this->MF = &MF;
TII = MF.getSubtarget<X86Subtarget>().getInstrInfo();
- OptForSize = MF.getFunction().optForSize();
+ OptForSize = MF.getFunction().hasOptSize();
MLI = &getAnalysis<MachineLoopInfo>();
LiveRegs.init(TII->getRegisterInfo());
diff --git a/lib/Target/X86/X86FixupLEAs.cpp b/lib/Target/X86/X86FixupLEAs.cpp
index a346085a52cb..041529a0be68 100644
--- a/lib/Target/X86/X86FixupLEAs.cpp
+++ b/lib/Target/X86/X86FixupLEAs.cpp
@@ -1,15 +1,14 @@
//===-- X86FixupLEAs.cpp - use or replace LEA instructions -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file defines the pass that finds instructions that can be
// re-written as LEA instructions in order to reduce pipeline delays.
-// When optimizing for size it replaces suitable LEAs with INC or DEC.
+// It replaces LEAs with ADD/INC/DEC when that is better for size/speed.
//
//===----------------------------------------------------------------------===//
@@ -36,31 +35,25 @@ namespace {
class FixupLEAPass : public MachineFunctionPass {
enum RegUsageState { RU_NotUsed, RU_Write, RU_Read };
- /// Loop over all of the instructions in the basic block
- /// replacing applicable instructions with LEA instructions,
- /// where appropriate.
- bool processBasicBlock(MachineFunction &MF, MachineFunction::iterator MFI,
- bool IsSlowLEA, bool IsSlow3OpsLEA);
-
/// Given a machine register, look for the instruction
/// which writes it in the current basic block. If found,
/// try to replace it with an equivalent LEA instruction.
/// If replacement succeeds, then also process the newly created
/// instruction.
void seekLEAFixup(MachineOperand &p, MachineBasicBlock::iterator &I,
- MachineFunction::iterator MFI);
+ MachineBasicBlock &MBB);
/// Given a memory access or LEA instruction
/// whose address mode uses a base and/or index register, look for
/// an opportunity to replace the instruction which sets the base or index
/// register with an equivalent LEA instruction.
void processInstruction(MachineBasicBlock::iterator &I,
- MachineFunction::iterator MFI);
+ MachineBasicBlock &MBB);
/// Given a LEA instruction which is unprofitable
/// on SlowLEA targets try to replace it with an equivalent ADD instruction.
void processInstructionForSlowLEA(MachineBasicBlock::iterator &I,
- MachineFunction::iterator MFI);
+ MachineBasicBlock &MBB);
/// Given a LEA instruction which is unprofitable
/// on SNB+ try to replace it with other instructions.
@@ -75,12 +68,13 @@ class FixupLEAPass : public MachineFunctionPass {
/// - LEA that uses 16-bit addressing mode "
/// This function currently handles the first 2 cases only.
MachineInstr *processInstrForSlow3OpLEA(MachineInstr &MI,
- MachineFunction::iterator MFI);
+ MachineBasicBlock &MBB);
- /// Look for LEAs that add 1 to reg or subtract 1 from reg
- /// and convert them to INC or DEC respectively.
- bool fixupIncDec(MachineBasicBlock::iterator &I,
- MachineFunction::iterator MFI) const;
+ /// Look for LEAs that are really two address LEAs that we might be able to
+ /// turn into regular ADD instructions.
+ bool optTwoAddrLEA(MachineBasicBlock::iterator &I,
+ MachineBasicBlock &MBB, bool OptIncDec,
+ bool UseLEAForSP) const;
/// Determine if an instruction references a machine register
/// and, if so, whether it reads or writes the register.
@@ -91,12 +85,12 @@ class FixupLEAPass : public MachineFunctionPass {
/// a maximum of INSTR_DISTANCE_THRESHOLD instruction latency cycles.
MachineBasicBlock::iterator searchBackwards(MachineOperand &p,
MachineBasicBlock::iterator &I,
- MachineFunction::iterator MFI);
+ MachineBasicBlock &MBB);
/// if an instruction can be converted to an
/// equivalent LEA, insert the new instruction into the basic block
/// and return a pointer to it. Otherwise, return zero.
- MachineInstr *postRAConvertToLEA(MachineFunction::iterator &MFI,
+ MachineInstr *postRAConvertToLEA(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI) const;
public:
@@ -104,9 +98,7 @@ public:
StringRef getPassName() const override { return FIXUPLEA_DESC; }
- FixupLEAPass() : MachineFunctionPass(ID) {
- initializeFixupLEAPassPass(*PassRegistry::getPassRegistry());
- }
+ FixupLEAPass() : MachineFunctionPass(ID) { }
/// Loop over all of the basic blocks,
/// replacing instructions by equivalent LEA instructions
@@ -121,10 +113,8 @@ public:
private:
TargetSchedModel TSM;
- MachineFunction *MF;
- const X86InstrInfo *TII; // Machine instruction info.
- bool OptIncDec;
- bool OptLEA;
+ const X86InstrInfo *TII;
+ const X86RegisterInfo *TRI;
};
}
@@ -133,7 +123,7 @@ char FixupLEAPass::ID = 0;
INITIALIZE_PASS(FixupLEAPass, FIXUPLEA_NAME, FIXUPLEA_DESC, false, false)
MachineInstr *
-FixupLEAPass::postRAConvertToLEA(MachineFunction::iterator &MFI,
+FixupLEAPass::postRAConvertToLEA(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI) const {
MachineInstr &MI = *MBBI;
switch (MI.getOpcode()) {
@@ -142,7 +132,7 @@ FixupLEAPass::postRAConvertToLEA(MachineFunction::iterator &MFI,
const MachineOperand &Src = MI.getOperand(1);
const MachineOperand &Dest = MI.getOperand(0);
MachineInstr *NewMI =
- BuildMI(*MF, MI.getDebugLoc(),
+ BuildMI(MBB, MBBI, MI.getDebugLoc(),
TII->get(MI.getOpcode() == X86::MOV32rr ? X86::LEA32r
: X86::LEA64r))
.add(Dest)
@@ -151,9 +141,17 @@ FixupLEAPass::postRAConvertToLEA(MachineFunction::iterator &MFI,
.addReg(0)
.addImm(0)
.addReg(0);
- MFI->insert(MBBI, NewMI); // Insert the new inst
return NewMI;
}
+ }
+
+ if (!MI.isConvertibleTo3Addr())
+ return nullptr;
+
+ switch (MI.getOpcode()) {
+ default:
+ // Only convert instructions that we've verified are safe.
+ return nullptr;
case X86::ADD64ri32:
case X86::ADD64ri8:
case X86::ADD64ri32_DB:
@@ -162,52 +160,80 @@ FixupLEAPass::postRAConvertToLEA(MachineFunction::iterator &MFI,
case X86::ADD32ri8:
case X86::ADD32ri_DB:
case X86::ADD32ri8_DB:
- case X86::ADD16ri:
- case X86::ADD16ri8:
- case X86::ADD16ri_DB:
- case X86::ADD16ri8_DB:
if (!MI.getOperand(2).isImm()) {
// convertToThreeAddress will call getImm()
// which requires isImm() to be true
return nullptr;
}
break;
- case X86::ADD16rr:
- case X86::ADD16rr_DB:
- if (MI.getOperand(1).getReg() != MI.getOperand(2).getReg()) {
- // if src1 != src2, then convertToThreeAddress will
- // need to create a Virtual register, which we cannot do
- // after register allocation.
- return nullptr;
- }
+ case X86::SHL64ri:
+ case X86::SHL32ri:
+ case X86::INC64r:
+ case X86::INC32r:
+ case X86::DEC64r:
+ case X86::DEC32r:
+ case X86::ADD64rr:
+ case X86::ADD64rr_DB:
+ case X86::ADD32rr:
+ case X86::ADD32rr_DB:
+ // These instructions are all fine to convert.
+ break;
}
+ MachineFunction::iterator MFI = MBB.getIterator();
return TII->convertToThreeAddress(MFI, MI, nullptr);
}
FunctionPass *llvm::createX86FixupLEAs() { return new FixupLEAPass(); }
-bool FixupLEAPass::runOnMachineFunction(MachineFunction &Func) {
- if (skipFunction(Func.getFunction()))
+static bool isLEA(unsigned Opcode) {
+ return Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
+ Opcode == X86::LEA64_32r;
+}
+
+bool FixupLEAPass::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(MF.getFunction()))
return false;
- MF = &Func;
- const X86Subtarget &ST = Func.getSubtarget<X86Subtarget>();
+ const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
bool IsSlowLEA = ST.slowLEA();
bool IsSlow3OpsLEA = ST.slow3OpsLEA();
+ bool LEAUsesAG = ST.LEAusesAG();
- OptIncDec = !ST.slowIncDec() || Func.getFunction().optForMinSize();
- OptLEA = ST.LEAusesAG() || IsSlowLEA || IsSlow3OpsLEA;
-
- if (!OptLEA && !OptIncDec)
- return false;
+ bool OptIncDec = !ST.slowIncDec() || MF.getFunction().hasOptSize();
+ bool UseLEAForSP = ST.useLeaForSP();
- TSM.init(&Func.getSubtarget());
+ TSM.init(&ST);
TII = ST.getInstrInfo();
+ TRI = ST.getRegisterInfo();
LLVM_DEBUG(dbgs() << "Start X86FixupLEAs\n";);
- // Process all basic blocks.
- for (MachineFunction::iterator I = Func.begin(), E = Func.end(); I != E; ++I)
- processBasicBlock(Func, I, IsSlowLEA, IsSlow3OpsLEA);
+ for (MachineBasicBlock &MBB : MF) {
+ // First pass. Try to remove or optimize existing LEAs.
+ for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) {
+ if (!isLEA(I->getOpcode()))
+ continue;
+
+ if (optTwoAddrLEA(I, MBB, OptIncDec, UseLEAForSP))
+ continue;
+
+ if (IsSlowLEA) {
+ processInstructionForSlowLEA(I, MBB);
+ } else if (IsSlow3OpsLEA) {
+ if (auto *NewMI = processInstrForSlow3OpLEA(*I, MBB)) {
+ MBB.erase(I);
+ I = NewMI;
+ }
+ }
+ }
+
+ // Second pass for creating LEAs. This may reverse some of the
+ // transformations above.
+ if (LEAUsesAG) {
+ for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
+ processInstruction(I, MBB);
+ }
+ }
+
LLVM_DEBUG(dbgs() << "End X86FixupLEAs\n";);
return true;
@@ -218,7 +244,7 @@ FixupLEAPass::usesRegister(MachineOperand &p, MachineBasicBlock::iterator I) {
RegUsageState RegUsage = RU_NotUsed;
MachineInstr &MI = *I;
- for (unsigned int i = 0; i < MI.getNumOperands(); ++i) {
+ for (unsigned i = 0; i < MI.getNumOperands(); ++i) {
MachineOperand &opnd = MI.getOperand(i);
if (opnd.isReg() && opnd.getReg() == p.getReg()) {
if (opnd.isDef())
@@ -234,10 +260,10 @@ FixupLEAPass::usesRegister(MachineOperand &p, MachineBasicBlock::iterator I) {
/// wrapping around to the last instruction of the block if the block
/// branches to itself.
static inline bool getPreviousInstr(MachineBasicBlock::iterator &I,
- MachineFunction::iterator MFI) {
- if (I == MFI->begin()) {
- if (MFI->isPredecessor(&*MFI)) {
- I = --MFI->end();
+ MachineBasicBlock &MBB) {
+ if (I == MBB.begin()) {
+ if (MBB.isPredecessor(&MBB)) {
+ I = --MBB.end();
return true;
} else
return false;
@@ -248,14 +274,14 @@ static inline bool getPreviousInstr(MachineBasicBlock::iterator &I,
MachineBasicBlock::iterator
FixupLEAPass::searchBackwards(MachineOperand &p, MachineBasicBlock::iterator &I,
- MachineFunction::iterator MFI) {
+ MachineBasicBlock &MBB) {
int InstrDistance = 1;
MachineBasicBlock::iterator CurInst;
static const int INSTR_DISTANCE_THRESHOLD = 5;
CurInst = I;
bool Found;
- Found = getPreviousInstr(CurInst, MFI);
+ Found = getPreviousInstr(CurInst, MBB);
while (Found && I != CurInst) {
if (CurInst->isCall() || CurInst->isInlineAsm())
break;
@@ -265,17 +291,12 @@ FixupLEAPass::searchBackwards(MachineOperand &p, MachineBasicBlock::iterator &I,
return CurInst;
}
InstrDistance += TSM.computeInstrLatency(&*CurInst);
- Found = getPreviousInstr(CurInst, MFI);
+ Found = getPreviousInstr(CurInst, MBB);
}
return MachineBasicBlock::iterator();
}
-static inline bool isLEA(const int Opcode) {
- return Opcode == X86::LEA16r || Opcode == X86::LEA32r ||
- Opcode == X86::LEA64r || Opcode == X86::LEA64_32r;
-}
-
-static inline bool isInefficientLEAReg(unsigned int Reg) {
+static inline bool isInefficientLEAReg(unsigned Reg) {
return Reg == X86::EBP || Reg == X86::RBP ||
Reg == X86::R13D || Reg == X86::R13;
}
@@ -298,27 +319,24 @@ static inline bool hasLEAOffset(const MachineOperand &Offset) {
return (Offset.isImm() && Offset.getImm() != 0) || Offset.isGlobal();
}
-static inline int getADDrrFromLEA(int LEAOpcode) {
+static inline unsigned getADDrrFromLEA(unsigned LEAOpcode) {
switch (LEAOpcode) {
default:
llvm_unreachable("Unexpected LEA instruction");
- case X86::LEA16r:
- return X86::ADD16rr;
case X86::LEA32r:
- return X86::ADD32rr;
case X86::LEA64_32r:
+ return X86::ADD32rr;
case X86::LEA64r:
return X86::ADD64rr;
}
}
-static inline int getADDriFromLEA(int LEAOpcode, const MachineOperand &Offset) {
+static inline unsigned getADDriFromLEA(unsigned LEAOpcode,
+ const MachineOperand &Offset) {
bool IsInt8 = Offset.isImm() && isInt<8>(Offset.getImm());
switch (LEAOpcode) {
default:
llvm_unreachable("Unexpected LEA instruction");
- case X86::LEA16r:
- return IsInt8 ? X86::ADD16ri8 : X86::ADD16ri;
case X86::LEA32r:
case X86::LEA64_32r:
return IsInt8 ? X86::ADD32ri8 : X86::ADD32ri;
@@ -327,56 +345,110 @@ static inline int getADDriFromLEA(int LEAOpcode, const MachineOperand &Offset) {
}
}
-/// isLEASimpleIncOrDec - Does this LEA have one these forms:
-/// lea %reg, 1(%reg)
-/// lea %reg, -1(%reg)
-static inline bool isLEASimpleIncOrDec(MachineInstr &LEA) {
- unsigned SrcReg = LEA.getOperand(1 + X86::AddrBaseReg).getReg();
- unsigned DstReg = LEA.getOperand(0).getReg();
- const MachineOperand &AddrDisp = LEA.getOperand(1 + X86::AddrDisp);
- return SrcReg == DstReg &&
- LEA.getOperand(1 + X86::AddrIndexReg).getReg() == 0 &&
- LEA.getOperand(1 + X86::AddrSegmentReg).getReg() == 0 &&
- AddrDisp.isImm() &&
- (AddrDisp.getImm() == 1 || AddrDisp.getImm() == -1);
+static inline unsigned getINCDECFromLEA(unsigned LEAOpcode, bool IsINC) {
+ switch (LEAOpcode) {
+ default:
+ llvm_unreachable("Unexpected LEA instruction");
+ case X86::LEA32r:
+ case X86::LEA64_32r:
+ return IsINC ? X86::INC32r : X86::DEC32r;
+ case X86::LEA64r:
+ return IsINC ? X86::INC64r : X86::DEC64r;
+ }
}
-bool FixupLEAPass::fixupIncDec(MachineBasicBlock::iterator &I,
- MachineFunction::iterator MFI) const {
+bool FixupLEAPass::optTwoAddrLEA(MachineBasicBlock::iterator &I,
+ MachineBasicBlock &MBB, bool OptIncDec,
+ bool UseLEAForSP) const {
MachineInstr &MI = *I;
- int Opcode = MI.getOpcode();
- if (!isLEA(Opcode))
+
+ const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg);
+ const MachineOperand &Scale = MI.getOperand(1 + X86::AddrScaleAmt);
+ const MachineOperand &Index = MI.getOperand(1 + X86::AddrIndexReg);
+ const MachineOperand &Disp = MI.getOperand(1 + X86::AddrDisp);
+ const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg);
+
+ if (Segment.getReg() != 0 || !Disp.isImm() || Scale.getImm() > 1 ||
+ !TII->isSafeToClobberEFLAGS(MBB, I))
return false;
- if (isLEASimpleIncOrDec(MI) && TII->isSafeToClobberEFLAGS(*MFI, I)) {
- int NewOpcode;
- bool isINC = MI.getOperand(1 + X86::AddrDisp).getImm() == 1;
- switch (Opcode) {
- case X86::LEA16r:
- NewOpcode = isINC ? X86::INC16r : X86::DEC16r;
- break;
- case X86::LEA32r:
- case X86::LEA64_32r:
- NewOpcode = isINC ? X86::INC32r : X86::DEC32r;
- break;
- case X86::LEA64r:
- NewOpcode = isINC ? X86::INC64r : X86::DEC64r;
- break;
- }
+ unsigned DestReg = MI.getOperand(0).getReg();
+ unsigned BaseReg = Base.getReg();
+ unsigned IndexReg = Index.getReg();
- MachineInstr *NewMI =
- BuildMI(*MFI, I, MI.getDebugLoc(), TII->get(NewOpcode))
- .add(MI.getOperand(0))
- .add(MI.getOperand(1 + X86::AddrBaseReg));
- MFI->erase(I);
- I = static_cast<MachineBasicBlock::iterator>(NewMI);
- return true;
+ // Don't change stack adjustment LEAs.
+ if (UseLEAForSP && (DestReg == X86::ESP || DestReg == X86::RSP))
+ return false;
+
+ // LEA64_32 has 64-bit operands but 32-bit result.
+ if (MI.getOpcode() == X86::LEA64_32r) {
+ if (BaseReg != 0)
+ BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit);
+ if (IndexReg != 0)
+ IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit);
}
- return false;
+
+ MachineInstr *NewMI = nullptr;
+
+ // Look for lea(%reg1, %reg2), %reg1 or lea(%reg2, %reg1), %reg1
+ // which can be turned into add %reg2, %reg1
+ if (BaseReg != 0 && IndexReg != 0 && Disp.getImm() == 0 &&
+ (DestReg == BaseReg || DestReg == IndexReg)) {
+ unsigned NewOpcode = getADDrrFromLEA(MI.getOpcode());
+ if (DestReg != BaseReg)
+ std::swap(BaseReg, IndexReg);
+
+ if (MI.getOpcode() == X86::LEA64_32r) {
+ // TODO: Do we need the super register implicit use?
+ NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
+ .addReg(BaseReg).addReg(IndexReg)
+ .addReg(Base.getReg(), RegState::Implicit)
+ .addReg(Index.getReg(), RegState::Implicit);
+ } else {
+ NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
+ .addReg(BaseReg).addReg(IndexReg);
+ }
+ } else if (DestReg == BaseReg && IndexReg == 0) {
+ // This is an LEA with only a base register and a displacement,
+ // We can use ADDri or INC/DEC.
+
+ // Does this LEA have one these forms:
+ // lea %reg, 1(%reg)
+ // lea %reg, -1(%reg)
+ if (OptIncDec && (Disp.getImm() == 1 || Disp.getImm() == -1)) {
+ bool IsINC = Disp.getImm() == 1;
+ unsigned NewOpcode = getINCDECFromLEA(MI.getOpcode(), IsINC);
+
+ if (MI.getOpcode() == X86::LEA64_32r) {
+ // TODO: Do we need the super register implicit use?
+ NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
+ .addReg(BaseReg).addReg(Base.getReg(), RegState::Implicit);
+ } else {
+ NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
+ .addReg(BaseReg);
+ }
+ } else {
+ unsigned NewOpcode = getADDriFromLEA(MI.getOpcode(), Disp);
+ if (MI.getOpcode() == X86::LEA64_32r) {
+ // TODO: Do we need the super register implicit use?
+ NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
+ .addReg(BaseReg).addImm(Disp.getImm())
+ .addReg(Base.getReg(), RegState::Implicit);
+ } else {
+ NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
+ .addReg(BaseReg).addImm(Disp.getImm());
+ }
+ }
+ } else
+ return false;
+
+ MBB.erase(I);
+ I = NewMI;
+ return true;
}
void FixupLEAPass::processInstruction(MachineBasicBlock::iterator &I,
- MachineFunction::iterator MFI) {
+ MachineBasicBlock &MBB) {
// Process a load, store, or LEA instruction.
MachineInstr &MI = *I;
const MCInstrDesc &Desc = MI.getDesc();
@@ -385,40 +457,38 @@ void FixupLEAPass::processInstruction(MachineBasicBlock::iterator &I,
AddrOffset += X86II::getOperandBias(Desc);
MachineOperand &p = MI.getOperand(AddrOffset + X86::AddrBaseReg);
if (p.isReg() && p.getReg() != X86::ESP) {
- seekLEAFixup(p, I, MFI);
+ seekLEAFixup(p, I, MBB);
}
MachineOperand &q = MI.getOperand(AddrOffset + X86::AddrIndexReg);
if (q.isReg() && q.getReg() != X86::ESP) {
- seekLEAFixup(q, I, MFI);
+ seekLEAFixup(q, I, MBB);
}
}
}
void FixupLEAPass::seekLEAFixup(MachineOperand &p,
MachineBasicBlock::iterator &I,
- MachineFunction::iterator MFI) {
- MachineBasicBlock::iterator MBI = searchBackwards(p, I, MFI);
+ MachineBasicBlock &MBB) {
+ MachineBasicBlock::iterator MBI = searchBackwards(p, I, MBB);
if (MBI != MachineBasicBlock::iterator()) {
- MachineInstr *NewMI = postRAConvertToLEA(MFI, MBI);
+ MachineInstr *NewMI = postRAConvertToLEA(MBB, MBI);
if (NewMI) {
++NumLEAs;
LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MBI->dump(););
// now to replace with an equivalent LEA...
LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: "; NewMI->dump(););
- MFI->erase(MBI);
+ MBB.erase(MBI);
MachineBasicBlock::iterator J =
static_cast<MachineBasicBlock::iterator>(NewMI);
- processInstruction(J, MFI);
+ processInstruction(J, MBB);
}
}
}
void FixupLEAPass::processInstructionForSlowLEA(MachineBasicBlock::iterator &I,
- MachineFunction::iterator MFI) {
+ MachineBasicBlock &MBB) {
MachineInstr &MI = *I;
- const int Opcode = MI.getOpcode();
- if (!isLEA(Opcode))
- return;
+ const unsigned Opcode = MI.getOpcode();
const MachineOperand &Dst = MI.getOperand(0);
const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg);
@@ -428,7 +498,7 @@ void FixupLEAPass::processInstructionForSlowLEA(MachineBasicBlock::iterator &I,
const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg);
if (Segment.getReg() != 0 || !Offset.isImm() ||
- !TII->isSafeToClobberEFLAGS(*MFI, I))
+ !TII->isSafeToClobberEFLAGS(MBB, I))
return;
const unsigned DstR = Dst.getReg();
const unsigned SrcR1 = Base.getReg();
@@ -445,7 +515,7 @@ void FixupLEAPass::processInstructionForSlowLEA(MachineBasicBlock::iterator &I,
const MCInstrDesc &ADDrr = TII->get(getADDrrFromLEA(Opcode));
const MachineOperand &Src = SrcR1 == DstR ? Index : Base;
NewMI =
- BuildMI(*MFI, I, MI.getDebugLoc(), ADDrr, DstR).addReg(DstR).add(Src);
+ BuildMI(MBB, I, MI.getDebugLoc(), ADDrr, DstR).addReg(DstR).add(Src);
LLVM_DEBUG(NewMI->dump(););
}
// Make ADD instruction for immediate
@@ -453,24 +523,21 @@ void FixupLEAPass::processInstructionForSlowLEA(MachineBasicBlock::iterator &I,
const MCInstrDesc &ADDri =
TII->get(getADDriFromLEA(Opcode, Offset));
const MachineOperand &SrcR = SrcR1 == DstR ? Base : Index;
- NewMI = BuildMI(*MFI, I, MI.getDebugLoc(), ADDri, DstR)
+ NewMI = BuildMI(MBB, I, MI.getDebugLoc(), ADDri, DstR)
.add(SrcR)
.addImm(Offset.getImm());
LLVM_DEBUG(NewMI->dump(););
}
if (NewMI) {
- MFI->erase(I);
+ MBB.erase(I);
I = NewMI;
}
}
MachineInstr *
FixupLEAPass::processInstrForSlow3OpLEA(MachineInstr &MI,
- MachineFunction::iterator MFI) {
-
- const int LEAOpcode = MI.getOpcode();
- if (!isLEA(LEAOpcode))
- return nullptr;
+ MachineBasicBlock &MBB) {
+ const unsigned LEAOpcode = MI.getOpcode();
const MachineOperand &Dst = MI.getOperand(0);
const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg);
@@ -481,13 +548,13 @@ FixupLEAPass::processInstrForSlow3OpLEA(MachineInstr &MI,
if (!(TII->isThreeOperandsLEA(MI) ||
hasInefficientLEABaseReg(Base, Index)) ||
- !TII->isSafeToClobberEFLAGS(*MFI, MI) ||
+ !TII->isSafeToClobberEFLAGS(MBB, MI) ||
Segment.getReg() != X86::NoRegister)
return nullptr;
- unsigned int DstR = Dst.getReg();
- unsigned int BaseR = Base.getReg();
- unsigned int IndexR = Index.getReg();
+ unsigned DstR = Dst.getReg();
+ unsigned BaseR = Base.getReg();
+ unsigned IndexR = Index.getReg();
unsigned SSDstR =
(LEAOpcode == X86::LEA64_32r) ? getX86SubSuperRegister(DstR, 64) : DstR;
bool IsScale1 = Scale.getImm() == 1;
@@ -516,11 +583,11 @@ FixupLEAPass::processInstrForSlow3OpLEA(MachineInstr &MI,
if (IsScale1 && (DstR == BaseR || DstR == IndexR)) {
const MachineOperand &Src = DstR == BaseR ? Index : Base;
MachineInstr *NewMI =
- BuildMI(*MFI, MI, DL, ADDrr, DstR).addReg(DstR).add(Src);
+ BuildMI(MBB, MI, DL, ADDrr, DstR).addReg(DstR).add(Src);
LLVM_DEBUG(NewMI->dump(););
// Create ADD instruction for the Offset in case of 3-Ops LEA.
if (hasLEAOffset(Offset)) {
- NewMI = BuildMI(*MFI, MI, DL, ADDri, DstR).addReg(DstR).add(Offset);
+ NewMI = BuildMI(MBB, MI, DL, ADDri, DstR).addReg(DstR).add(Offset);
LLVM_DEBUG(NewMI->dump(););
}
return NewMI;
@@ -530,7 +597,7 @@ FixupLEAPass::processInstrForSlow3OpLEA(MachineInstr &MI,
// lea offset(%base,%index,scale),%dst =>
// lea (%base,%index,scale); add offset,%dst
if (!IsInefficientBase || (!IsInefficientIndex && IsScale1)) {
- MachineInstr *NewMI = BuildMI(*MFI, MI, DL, TII->get(LEAOpcode))
+ MachineInstr *NewMI = BuildMI(MBB, MI, DL, TII->get(LEAOpcode))
.add(Dst)
.add(IsInefficientBase ? Index : Base)
.add(Scale)
@@ -540,7 +607,7 @@ FixupLEAPass::processInstrForSlow3OpLEA(MachineInstr &MI,
LLVM_DEBUG(NewMI->dump(););
// Create ADD instruction for the Offset in case of 3-Ops LEA.
if (hasLEAOffset(Offset)) {
- NewMI = BuildMI(*MFI, MI, DL, ADDri, DstR).addReg(DstR).add(Offset);
+ NewMI = BuildMI(MBB, MI, DL, ADDri, DstR).addReg(DstR).add(Offset);
LLVM_DEBUG(NewMI->dump(););
}
return NewMI;
@@ -552,17 +619,17 @@ FixupLEAPass::processInstrForSlow3OpLEA(MachineInstr &MI,
// lea (%base,%index,1), %dst => mov %base,%dst; add %index,%dst
if (IsScale1 && !hasLEAOffset(Offset)) {
bool BIK = Base.isKill() && BaseR != IndexR;
- TII->copyPhysReg(*MFI, MI, DL, DstR, BaseR, BIK);
+ TII->copyPhysReg(MBB, MI, DL, DstR, BaseR, BIK);
LLVM_DEBUG(MI.getPrevNode()->dump(););
MachineInstr *NewMI =
- BuildMI(*MFI, MI, DL, ADDrr, DstR).addReg(DstR).add(Index);
+ BuildMI(MBB, MI, DL, ADDrr, DstR).addReg(DstR).add(Index);
LLVM_DEBUG(NewMI->dump(););
return NewMI;
}
// lea offset(%base,%index,scale), %dst =>
// lea offset( ,%index,scale), %dst; add %base,%dst
- MachineInstr *NewMI = BuildMI(*MFI, MI, DL, TII->get(LEAOpcode))
+ MachineInstr *NewMI = BuildMI(MBB, MI, DL, TII->get(LEAOpcode))
.add(Dst)
.addReg(0)
.add(Scale)
@@ -571,35 +638,7 @@ FixupLEAPass::processInstrForSlow3OpLEA(MachineInstr &MI,
.add(Segment);
LLVM_DEBUG(NewMI->dump(););
- NewMI = BuildMI(*MFI, MI, DL, ADDrr, DstR).addReg(DstR).add(Base);
+ NewMI = BuildMI(MBB, MI, DL, ADDrr, DstR).addReg(DstR).add(Base);
LLVM_DEBUG(NewMI->dump(););
return NewMI;
}
-
-bool FixupLEAPass::processBasicBlock(MachineFunction &MF,
- MachineFunction::iterator MFI,
- bool IsSlowLEA, bool IsSlow3OpsLEA) {
- for (MachineBasicBlock::iterator I = MFI->begin(); I != MFI->end(); ++I) {
- if (OptIncDec)
- if (fixupIncDec(I, MFI))
- continue;
-
- if (OptLEA) {
- if (IsSlowLEA) {
- processInstructionForSlowLEA(I, MFI);
- continue;
- }
-
- if (IsSlow3OpsLEA) {
- if (auto *NewMI = processInstrForSlow3OpLEA(*I, MFI)) {
- MFI->erase(I);
- I = NewMI;
- }
- continue;
- }
-
- processInstruction(I, MFI);
- }
- }
- return false;
-}
diff --git a/lib/Target/X86/X86FixupSetCC.cpp b/lib/Target/X86/X86FixupSetCC.cpp
index a86eb997635e..e2d4d1ede6f3 100644
--- a/lib/Target/X86/X86FixupSetCC.cpp
+++ b/lib/Target/X86/X86FixupSetCC.cpp
@@ -1,9 +1,8 @@
//===---- X86FixupSetCC.cpp - optimize usage of LEA instructions ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -68,30 +67,6 @@ char X86FixupSetCCPass::ID = 0;
FunctionPass *llvm::createX86FixupSetCC() { return new X86FixupSetCCPass(); }
-bool X86FixupSetCCPass::isSetCCr(unsigned Opcode) {
- switch (Opcode) {
- default:
- return false;
- case X86::SETOr:
- case X86::SETNOr:
- case X86::SETBr:
- case X86::SETAEr:
- case X86::SETEr:
- case X86::SETNEr:
- case X86::SETBEr:
- case X86::SETAr:
- case X86::SETSr:
- case X86::SETNSr:
- case X86::SETPr:
- case X86::SETNPr:
- case X86::SETLr:
- case X86::SETGEr:
- case X86::SETLEr:
- case X86::SETGr:
- return true;
- }
-}
-
// We expect the instruction *immediately* before the setcc to imp-def
// EFLAGS (because of scheduling glue). To make this less brittle w.r.t
// scheduling, look backwards until we hit the beginning of the
@@ -103,7 +78,7 @@ X86FixupSetCCPass::findFlagsImpDef(MachineBasicBlock *MBB,
auto MBBStart = MBB->rend();
for (int i = 0; (i < SearchBound) && (MI != MBBStart); ++i, ++MI)
for (auto &Op : MI->implicit_operands())
- if ((Op.getReg() == X86::EFLAGS) && (Op.isDef()))
+ if (Op.isReg() && (Op.getReg() == X86::EFLAGS) && Op.isDef())
return &*MI;
return nullptr;
@@ -111,7 +86,7 @@ X86FixupSetCCPass::findFlagsImpDef(MachineBasicBlock *MBB,
bool X86FixupSetCCPass::impUsesFlags(MachineInstr *MI) {
for (auto &Op : MI->implicit_operands())
- if ((Op.getReg() == X86::EFLAGS) && (Op.isUse()))
+ if (Op.isReg() && (Op.getReg() == X86::EFLAGS) && Op.isUse())
return true;
return false;
@@ -129,7 +104,7 @@ bool X86FixupSetCCPass::runOnMachineFunction(MachineFunction &MF) {
// Find a setcc that is used by a zext.
// This doesn't have to be the only use, the transformation is safe
// regardless.
- if (!isSetCCr(MI.getOpcode()))
+ if (MI.getOpcode() != X86::SETCCr)
continue;
MachineInstr *ZExt = nullptr;
diff --git a/lib/Target/X86/X86FlagsCopyLowering.cpp b/lib/Target/X86/X86FlagsCopyLowering.cpp
index 778aa505b2d9..5ce3255ea96a 100644
--- a/lib/Target/X86/X86FlagsCopyLowering.cpp
+++ b/lib/Target/X86/X86FlagsCopyLowering.cpp
@@ -1,9 +1,8 @@
//====- X86FlagsCopyLowering.cpp - Lowers COPY nodes of EFLAGS ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -71,12 +70,6 @@ STATISTIC(NumSetCCsInserted, "Number of setCC instructions inserted");
STATISTIC(NumTestsInserted, "Number of test instructions inserted");
STATISTIC(NumAddsInserted, "Number of adds instructions inserted");
-namespace llvm {
-
-void initializeX86FlagsCopyLoweringPassPass(PassRegistry &);
-
-} // end namespace llvm
-
namespace {
// Convenient array type for storing registers associated with each condition.
@@ -84,9 +77,7 @@ using CondRegArray = std::array<unsigned, X86::LAST_VALID_COND + 1>;
class X86FlagsCopyLoweringPass : public MachineFunctionPass {
public:
- X86FlagsCopyLoweringPass() : MachineFunctionPass(ID) {
- initializeX86FlagsCopyLoweringPassPass(*PassRegistry::getPassRegistry());
- }
+ X86FlagsCopyLoweringPass() : MachineFunctionPass(ID) { }
StringRef getPassName() const override { return "X86 EFLAGS copy lowering"; }
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -252,13 +243,13 @@ static MachineBasicBlock &splitBlock(MachineBasicBlock &MBB,
"Split instruction must be in the split block!");
assert(SplitI.isBranch() &&
"Only designed to split a tail of branch instructions!");
- assert(X86::getCondFromBranchOpc(SplitI.getOpcode()) != X86::COND_INVALID &&
+ assert(X86::getCondFromBranch(SplitI) != X86::COND_INVALID &&
"Must split on an actual jCC instruction!");
// Dig out the previous instruction to the split point.
MachineInstr &PrevI = *std::prev(SplitI.getIterator());
assert(PrevI.isBranch() && "Must split after a branch!");
- assert(X86::getCondFromBranchOpc(PrevI.getOpcode()) != X86::COND_INVALID &&
+ assert(X86::getCondFromBranch(PrevI) != X86::COND_INVALID &&
"Must split after an actual jCC instruction!");
assert(!std::prev(PrevI.getIterator())->isTerminator() &&
"Must only have this one terminator prior to the split!");
@@ -588,22 +579,21 @@ bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) {
// branch folding or black placement. As a consequence, we get to deal
// with the simpler formulation of conditional branches followed by tail
// calls.
- if (X86::getCondFromBranchOpc(MI.getOpcode()) != X86::COND_INVALID) {
+ if (X86::getCondFromBranch(MI) != X86::COND_INVALID) {
auto JmpIt = MI.getIterator();
do {
JmpIs.push_back(&*JmpIt);
++JmpIt;
} while (JmpIt != UseMBB.instr_end() &&
- X86::getCondFromBranchOpc(JmpIt->getOpcode()) !=
+ X86::getCondFromBranch(*JmpIt) !=
X86::COND_INVALID);
break;
}
// Otherwise we can just rewrite in-place.
- if (X86::getCondFromCMovOpc(MI.getOpcode()) != X86::COND_INVALID) {
+ if (X86::getCondFromCMov(MI) != X86::COND_INVALID) {
rewriteCMov(*TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs);
- } else if (X86::getCondFromSETOpc(MI.getOpcode()) !=
- X86::COND_INVALID) {
+ } else if (X86::getCondFromSETCC(MI) != X86::COND_INVALID) {
rewriteSetCC(*TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs);
} else if (MI.getOpcode() == TargetOpcode::COPY) {
rewriteCopy(MI, *FlagUse, CopyDefI);
@@ -730,7 +720,7 @@ CondRegArray X86FlagsCopyLoweringPass::collectCondsInRegs(
// Scan backwards across the range of instructions with live EFLAGS.
for (MachineInstr &MI :
llvm::reverse(llvm::make_range(MBB.begin(), TestPos))) {
- X86::CondCode Cond = X86::getCondFromSETOpc(MI.getOpcode());
+ X86::CondCode Cond = X86::getCondFromSETCC(MI);
if (Cond != X86::COND_INVALID && !MI.mayStore() && MI.getOperand(0).isReg() &&
TRI->isVirtualRegister(MI.getOperand(0).getReg())) {
assert(MI.getOperand(0).isDef() &&
@@ -751,7 +741,7 @@ unsigned X86FlagsCopyLoweringPass::promoteCondToReg(
DebugLoc TestLoc, X86::CondCode Cond) {
unsigned Reg = MRI->createVirtualRegister(PromoteRC);
auto SetI = BuildMI(TestMBB, TestPos, TestLoc,
- TII->get(X86::getSETFromCond(Cond)), Reg);
+ TII->get(X86::SETCCr), Reg).addImm(Cond);
(void)SetI;
LLVM_DEBUG(dbgs() << " save cond: "; SetI->dump());
++NumSetCCsInserted;
@@ -842,7 +832,7 @@ void X86FlagsCopyLoweringPass::rewriteCMov(MachineBasicBlock &TestMBB,
MachineOperand &FlagUse,
CondRegArray &CondRegs) {
// First get the register containing this specific condition.
- X86::CondCode Cond = X86::getCondFromCMovOpc(CMovI.getOpcode());
+ X86::CondCode Cond = X86::getCondFromCMov(CMovI);
unsigned CondReg;
bool Inverted;
std::tie(CondReg, Inverted) =
@@ -853,12 +843,10 @@ void X86FlagsCopyLoweringPass::rewriteCMov(MachineBasicBlock &TestMBB,
// Insert a direct test of the saved register.
insertTest(MBB, CMovI.getIterator(), CMovI.getDebugLoc(), CondReg);
- // Rewrite the CMov to use the !ZF flag from the test (but match register
- // size and memory operand), and then kill its use of the flags afterward.
- auto &CMovRC = *MRI->getRegClass(CMovI.getOperand(0).getReg());
- CMovI.setDesc(TII->get(X86::getCMovFromCond(
- Inverted ? X86::COND_E : X86::COND_NE, TRI->getRegSizeInBits(CMovRC) / 8,
- !CMovI.memoperands_empty())));
+ // Rewrite the CMov to use the !ZF flag from the test, and then kill its use
+ // of the flags afterward.
+ CMovI.getOperand(CMovI.getDesc().getNumOperands() - 1)
+ .setImm(Inverted ? X86::COND_E : X86::COND_NE);
FlagUse.setIsKill(true);
LLVM_DEBUG(dbgs() << " fixed cmov: "; CMovI.dump());
}
@@ -867,7 +855,7 @@ void X86FlagsCopyLoweringPass::rewriteCondJmp(
MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos,
DebugLoc TestLoc, MachineInstr &JmpI, CondRegArray &CondRegs) {
// First get the register containing this specific condition.
- X86::CondCode Cond = X86::getCondFromBranchOpc(JmpI.getOpcode());
+ X86::CondCode Cond = X86::getCondFromBranch(JmpI);
unsigned CondReg;
bool Inverted;
std::tie(CondReg, Inverted) =
@@ -880,10 +868,8 @@ void X86FlagsCopyLoweringPass::rewriteCondJmp(
// Rewrite the jump to use the !ZF flag from the test, and kill its use of
// flags afterward.
- JmpI.setDesc(TII->get(
- X86::GetCondBranchFromCond(Inverted ? X86::COND_E : X86::COND_NE)));
- const int ImplicitEFLAGSOpIdx = 1;
- JmpI.getOperand(ImplicitEFLAGSOpIdx).setIsKill(true);
+ JmpI.getOperand(1).setImm(Inverted ? X86::COND_E : X86::COND_NE);
+ JmpI.findRegisterUseOperand(X86::EFLAGS)->setIsKill(true);
LLVM_DEBUG(dbgs() << " fixed jCC: "; JmpI.dump());
}
@@ -1026,7 +1012,7 @@ void X86FlagsCopyLoweringPass::rewriteSetCC(MachineBasicBlock &TestMBB,
MachineInstr &SetCCI,
MachineOperand &FlagUse,
CondRegArray &CondRegs) {
- X86::CondCode Cond = X86::getCondFromSETOpc(SetCCI.getOpcode());
+ X86::CondCode Cond = X86::getCondFromSETCC(SetCCI);
// Note that we can't usefully rewrite this to the inverse without complex
// analysis of the users of the setCC. Largely we rely on duplicates which
// could have been avoided already being avoided here.
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index f330acff61a1..074cf21d03f5 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -1,9 +1,8 @@
//===-- X86FloatingPoint.cpp - Floating point Reg -> Stack converter ------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -60,7 +59,6 @@ namespace {
struct FPS : public MachineFunctionPass {
static char ID;
FPS() : MachineFunctionPass(ID) {
- initializeEdgeBundlesPass(*PassRegistry::getPassRegistry());
// This is really only to keep valgrind quiet.
// The logic in isLive() is too much for it.
memset(Stack, 0, sizeof(Stack));
@@ -299,9 +297,16 @@ namespace {
void setKillFlags(MachineBasicBlock &MBB) const;
};
- char FPS::ID = 0;
}
+char FPS::ID = 0;
+
+INITIALIZE_PASS_BEGIN(FPS, DEBUG_TYPE, "X86 FP Stackifier",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(EdgeBundles)
+INITIALIZE_PASS_END(FPS, DEBUG_TYPE, "X86 FP Stackifier",
+ false, false)
+
FunctionPass *llvm::createX86FloatingPointStackifierPass() { return new FPS(); }
/// getFPReg - Return the X86::FPx register number for the specified operand.
@@ -591,7 +596,7 @@ namespace {
}
static int Lookup(ArrayRef<TableEntry> Table, unsigned Opcode) {
- const TableEntry *I = std::lower_bound(Table.begin(), Table.end(), Opcode);
+ const TableEntry *I = llvm::lower_bound(Table, Opcode);
if (I != Table.end() && I->from == Opcode)
return I->to;
return -1;
@@ -1096,6 +1101,8 @@ void FPS::handleZeroArgFP(MachineBasicBlock::iterator &I) {
// Change from the pseudo instruction to the concrete instruction.
MI.RemoveOperand(0); // Remove the explicit ST(0) operand
MI.setDesc(TII->get(getConcreteOpcode(MI.getOpcode())));
+ MI.addOperand(
+ MachineOperand::CreateReg(X86::ST0, /*isDef*/ true, /*isImp*/ true));
// Result gets pushed on the stack.
pushReg(DestReg);
@@ -1140,6 +1147,8 @@ void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) {
// Convert from the pseudo instruction to the concrete instruction.
MI.RemoveOperand(NumOps - 1); // Remove explicit ST(0) operand
MI.setDesc(TII->get(getConcreteOpcode(MI.getOpcode())));
+ MI.addOperand(
+ MachineOperand::CreateReg(X86::ST0, /*isDef*/ false, /*isImp*/ true));
if (MI.getOpcode() == X86::IST_FP64m || MI.getOpcode() == X86::ISTT_FP16m ||
MI.getOpcode() == X86::ISTT_FP32m || MI.getOpcode() == X86::ISTT_FP64m ||
@@ -1369,8 +1378,6 @@ void FPS::handleTwoArgFP(MachineBasicBlock::iterator &I) {
/// register arguments and no explicit destinations.
///
void FPS::handleCompareFP(MachineBasicBlock::iterator &I) {
- ASSERT_SORTED(ForwardST0Table); ASSERT_SORTED(ReverseST0Table);
- ASSERT_SORTED(ForwardSTiTable); ASSERT_SORTED(ReverseSTiTable);
MachineInstr &MI = *I;
unsigned NumOperands = MI.getDesc().getNumOperands();
@@ -1475,7 +1482,8 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &Inst) {
break;
}
- case TargetOpcode::INLINEASM: {
+ case TargetOpcode::INLINEASM:
+ case TargetOpcode::INLINEASM_BR: {
// The inline asm MachineInstr currently only *uses* FP registers for the
// 'f' constraint. These should be turned into the current ST(x) register
// in the machine instr.
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index 984db12201ed..e310fe069117 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -1,9 +1,8 @@
//===-- X86FrameLowering.cpp - X86 Frame Information ----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -585,23 +584,23 @@ void X86FrameLowering::emitStackProbeInline(MachineFunction &MF,
// registers. For the prolog expansion we use RAX, RCX and RDX.
MachineRegisterInfo &MRI = MF.getRegInfo();
const TargetRegisterClass *RegClass = &X86::GR64RegClass;
- const unsigned SizeReg = InProlog ? (unsigned)X86::RAX
+ const Register SizeReg = InProlog ? X86::RAX
: MRI.createVirtualRegister(RegClass),
- ZeroReg = InProlog ? (unsigned)X86::RCX
+ ZeroReg = InProlog ? X86::RCX
: MRI.createVirtualRegister(RegClass),
- CopyReg = InProlog ? (unsigned)X86::RDX
+ CopyReg = InProlog ? X86::RDX
: MRI.createVirtualRegister(RegClass),
- TestReg = InProlog ? (unsigned)X86::RDX
+ TestReg = InProlog ? X86::RDX
: MRI.createVirtualRegister(RegClass),
- FinalReg = InProlog ? (unsigned)X86::RDX
+ FinalReg = InProlog ? X86::RDX
: MRI.createVirtualRegister(RegClass),
- RoundedReg = InProlog ? (unsigned)X86::RDX
+ RoundedReg = InProlog ? X86::RDX
: MRI.createVirtualRegister(RegClass),
- LimitReg = InProlog ? (unsigned)X86::RCX
+ LimitReg = InProlog ? X86::RCX
: MRI.createVirtualRegister(RegClass),
- JoinReg = InProlog ? (unsigned)X86::RCX
+ JoinReg = InProlog ? X86::RCX
: MRI.createVirtualRegister(RegClass),
- ProbeReg = InProlog ? (unsigned)X86::RCX
+ ProbeReg = InProlog ? X86::RCX
: MRI.createVirtualRegister(RegClass);
// SP-relative offsets where we can save RCX and RDX.
@@ -654,9 +653,10 @@ void X86FrameLowering::emitStackProbeInline(MachineFunction &MF,
BuildMI(&MBB, DL, TII.get(X86::SUB64rr), TestReg)
.addReg(CopyReg)
.addReg(SizeReg);
- BuildMI(&MBB, DL, TII.get(X86::CMOVB64rr), FinalReg)
+ BuildMI(&MBB, DL, TII.get(X86::CMOV64rr), FinalReg)
.addReg(TestReg)
- .addReg(ZeroReg);
+ .addReg(ZeroReg)
+ .addImm(X86::COND_B);
// FinalReg now holds final stack pointer value, or zero if
// allocation would overflow. Compare against the current stack
@@ -673,7 +673,7 @@ void X86FrameLowering::emitStackProbeInline(MachineFunction &MF,
.addReg(X86::GS);
BuildMI(&MBB, DL, TII.get(X86::CMP64rr)).addReg(FinalReg).addReg(LimitReg);
// Jump if the desired stack pointer is at or above the stack limit.
- BuildMI(&MBB, DL, TII.get(X86::JAE_1)).addMBB(ContinueMBB);
+ BuildMI(&MBB, DL, TII.get(X86::JCC_1)).addMBB(ContinueMBB).addImm(X86::COND_AE);
// Add code to roundMBB to round the final stack pointer to a page boundary.
RoundMBB->addLiveIn(FinalReg);
@@ -710,7 +710,7 @@ void X86FrameLowering::emitStackProbeInline(MachineFunction &MF,
BuildMI(LoopMBB, DL, TII.get(X86::CMP64rr))
.addReg(RoundedReg)
.addReg(ProbeReg);
- BuildMI(LoopMBB, DL, TII.get(X86::JNE_1)).addMBB(LoopMBB);
+ BuildMI(LoopMBB, DL, TII.get(X86::JCC_1)).addMBB(LoopMBB).addImm(X86::COND_NE);
MachineBasicBlock::iterator ContinueMBBI = ContinueMBB->getFirstNonPHI();
@@ -794,8 +794,8 @@ void X86FrameLowering::emitStackProbeCall(MachineFunction &MF,
.addExternalSymbol(MF.createExternalSymbolName(Symbol));
}
- unsigned AX = Is64Bit ? X86::RAX : X86::EAX;
- unsigned SP = Is64Bit ? X86::RSP : X86::ESP;
+ unsigned AX = Uses64BitFramePtr ? X86::RAX : X86::EAX;
+ unsigned SP = Uses64BitFramePtr ? X86::RSP : X86::ESP;
CI.addReg(AX, RegState::Implicit)
.addReg(SP, RegState::Implicit)
.addReg(AX, RegState::Define | RegState::Implicit)
@@ -809,7 +809,7 @@ void X86FrameLowering::emitStackProbeCall(MachineFunction &MF,
// adjusting %rsp.
// All other platforms do not specify a particular ABI for the stack probe
// function, so we arbitrarily define it to not adjust %esp/%rsp itself.
- BuildMI(MBB, MBBI, DL, TII.get(getSUBrrOpcode(Is64Bit)), SP)
+ BuildMI(MBB, MBBI, DL, TII.get(getSUBrrOpcode(Uses64BitFramePtr)), SP)
.addReg(SP)
.addReg(AX);
}
@@ -872,6 +872,17 @@ void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB,
MI->getOperand(3).setIsDead();
}
+bool X86FrameLowering::has128ByteRedZone(const MachineFunction& MF) const {
+ // x86-64 (non Win64) has a 128 byte red zone which is guaranteed not to be
+ // clobbered by any interrupt handler.
+ assert(&STI == &MF.getSubtarget<X86Subtarget>() &&
+ "MF used frame lowering for wrong subtarget");
+ const Function &Fn = MF.getFunction();
+ const bool IsWin64CC = STI.isCallingConvWin64(Fn.getCallingConv());
+ return Is64Bit && !IsWin64CC && !Fn.hasFnAttribute(Attribute::NoRedZone);
+}
+
+
/// emitPrologue - Push callee-saved registers onto the stack, which
/// automatically adjust the stack pointer. Adjust the stack pointer to allocate
/// space for local variables. Also emit labels used by the exception handler to
@@ -976,7 +987,6 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
MF.hasEHFunclets() && Personality == EHPersonality::CoreCLR;
bool IsClrFunclet = IsFunclet && FnHasClrFunclet;
bool HasFP = hasFP(MF);
- bool IsWin64CC = STI.isCallingConvWin64(Fn.getCallingConv());
bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
bool NeedsWin64CFI = IsWin64Prologue && Fn.needsUnwindTableEntry();
// FIXME: Emit FPO data for EH funclets.
@@ -1030,12 +1040,11 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
// pointer, calls, or dynamic alloca then we do not need to adjust the
// stack pointer (we fit in the Red Zone). We also check that we don't
// push and pop from the stack.
- if (Is64Bit && !Fn.hasFnAttribute(Attribute::NoRedZone) &&
+ if (has128ByteRedZone(MF) &&
!TRI->needsStackRealignment(MF) &&
!MFI.hasVarSizedObjects() && // No dynamic alloca.
!MFI.adjustsStack() && // No calls.
!UseStackProbe && // No stack probes.
- !IsWin64CC && // Win64 has no Red Zone
!MFI.hasCopyImplyingStackAdjustment() && // Don't push and pop.
!MF.shouldSplitStack()) { // Regular stack
uint64_t MinSize = X86FI->getCalleeSavedFrameSize();
@@ -1774,6 +1783,15 @@ int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
int64_t FPDelta = 0;
+ // In an x86 interrupt, remove the offset we added to account for the return
+ // address from any stack object allocated in the caller's frame. Interrupts
+ // do not have a standard return address. Fixed objects in the current frame,
+ // such as SSE register spills, should not get this treatment.
+ if (MF.getFunction().getCallingConv() == CallingConv::X86_INTR &&
+ Offset >= 0) {
+ Offset += getOffsetOfLocalArea();
+ }
+
if (IsWin64Prologue) {
assert(!MFI.hasCalls() || (StackSize % 16) == 8);
@@ -1888,8 +1906,7 @@ X86FrameLowering::getFrameIndexReferencePreferSP(const MachineFunction &MF,
// If !hasReservedCallFrame the function might have SP adjustement in the
// body. So, even though the offset is statically known, it depends on where
// we are in the function.
- const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
- if (!IgnoreSPUpdates && !TFI->hasReservedCallFrame(MF))
+ if (!IgnoreSPUpdates && !hasReservedCallFrame(MF))
return getFrameIndexReference(MF, FI, FrameReg);
// We don't handle tail calls, and shouldn't be seeing them either.
@@ -2407,7 +2424,7 @@ void X86FrameLowering::adjustForSegmentedStacks(
// This jump is taken if SP >= (Stacklet Limit + Stack Space required).
// It jumps to normal execution of the function body.
- BuildMI(checkMBB, DL, TII.get(X86::JA_1)).addMBB(&PrologueMBB);
+ BuildMI(checkMBB, DL, TII.get(X86::JCC_1)).addMBB(&PrologueMBB).addImm(X86::COND_A);
// On 32 bit we first push the arguments size and then the frame size. On 64
// bit, we pass the stack frame size in r10 and the argument size in r11.
@@ -2637,7 +2654,7 @@ void X86FrameLowering::adjustForHiPEPrologue(
// SPLimitOffset is in a fixed heap location (pointed by BP).
addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(CMPop))
.addReg(ScratchReg), PReg, false, SPLimitOffset);
- BuildMI(stackCheckMBB, DL, TII.get(X86::JAE_1)).addMBB(&PrologueMBB);
+ BuildMI(stackCheckMBB, DL, TII.get(X86::JCC_1)).addMBB(&PrologueMBB).addImm(X86::COND_AE);
// Create new MBB for IncStack:
BuildMI(incStackMBB, DL, TII.get(CALLop)).
@@ -2646,7 +2663,7 @@ void X86FrameLowering::adjustForHiPEPrologue(
SPReg, false, -MaxStack);
addRegOffset(BuildMI(incStackMBB, DL, TII.get(CMPop))
.addReg(ScratchReg), PReg, false, SPLimitOffset);
- BuildMI(incStackMBB, DL, TII.get(X86::JLE_1)).addMBB(incStackMBB);
+ BuildMI(incStackMBB, DL, TII.get(X86::JCC_1)).addMBB(incStackMBB).addImm(X86::COND_LE);
stackCheckMBB->addSuccessor(&PrologueMBB, {99, 100});
stackCheckMBB->addSuccessor(incStackMBB, {1, 100});
@@ -2802,7 +2819,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
StackAdjustment += mergeSPUpdates(MBB, InsertPos, false);
if (StackAdjustment) {
- if (!(F.optForMinSize() &&
+ if (!(F.hasMinSize() &&
adjustStackWithPops(MBB, InsertPos, DL, StackAdjustment)))
BuildStackAdjustment(MBB, InsertPos, DL, StackAdjustment,
/*InEpilogue=*/false);
@@ -3079,8 +3096,7 @@ void X86FrameLowering::orderFrameObjects(
// Sort the objects using X86FrameSortingAlgorithm (see its comment for
// info).
- std::stable_sort(SortingObjects.begin(), SortingObjects.end(),
- X86FrameSortingComparator());
+ llvm::stable_sort(SortingObjects, X86FrameSortingComparator());
// Now modify the original list to represent the final order that
// we want. The order will depend on whether we're going to access them
@@ -3154,7 +3170,7 @@ void X86FrameLowering::processFunctionBeforeFrameFinalized(
MinFixedObjOffset -= std::abs(MinFixedObjOffset) % 8;
int64_t UnwindHelpOffset = MinFixedObjOffset - SlotSize;
int UnwindHelpFI =
- MFI.CreateFixedObject(SlotSize, UnwindHelpOffset, /*Immutable=*/false);
+ MFI.CreateFixedObject(SlotSize, UnwindHelpOffset, /*IsImmutable=*/false);
EHInfo.UnwindHelpFrameIdx = UnwindHelpFI;
// Store -2 into UnwindHelp on function entry. We have to scan forwards past
diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h
index 3bd805aae123..d32746e3a36e 100644
--- a/lib/Target/X86/X86FrameLowering.h
+++ b/lib/Target/X86/X86FrameLowering.h
@@ -1,9 +1,8 @@
//===-- X86TargetFrameLowering.h - Define frame lowering for X86 -*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -172,6 +171,10 @@ public:
unsigned getInitialCFARegister(const MachineFunction &MF) const override;
+ /// Return true if the function has a redzone (accessible bytes past the
+ /// frame of the top of stack function) as part of it's ABI.
+ bool has128ByteRedZone(const MachineFunction& MF) const;
+
private:
uint64_t calculateMaxStackAlign(const MachineFunction &MF) const;
diff --git a/lib/Target/X86/X86GenRegisterBankInfo.def b/lib/Target/X86/X86GenRegisterBankInfo.def
index 9cd3f96f83ac..0fdea9071c29 100644
--- a/lib/Target/X86/X86GenRegisterBankInfo.def
+++ b/lib/Target/X86/X86GenRegisterBankInfo.def
@@ -1,9 +1,8 @@
//===- X86GenRegisterBankInfo.def ----------------------------*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 5ac153244df9..95d31e62cafc 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -1,9 +1,8 @@
//===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -74,6 +73,7 @@ namespace {
int JT;
unsigned Align; // CP alignment.
unsigned char SymbolFlags; // X86II::MO_*
+ bool NegateIndex = false;
X86ISelAddressMode()
: BaseType(RegBase), Base_FrameIndex(0), Scale(1), IndexReg(), Disp(0),
@@ -116,6 +116,8 @@ namespace {
dbgs() << " Base.FrameIndex " << Base_FrameIndex << '\n';
dbgs() << " Scale " << Scale << '\n'
<< "IndexReg ";
+ if (NegateIndex)
+ dbgs() << "negate ";
if (IndexReg.getNode())
IndexReg.getNode()->dump(DAG);
else
@@ -170,8 +172,8 @@ namespace {
public:
explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel)
- : SelectionDAGISel(tm, OptLevel), OptForSize(false),
- OptForMinSize(false) {}
+ : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr), OptForSize(false),
+ OptForMinSize(false), IndirectTlsSegRefs(false) {}
StringRef getPassName() const override {
return "X86 DAG->DAG Instruction Selection";
@@ -182,6 +184,13 @@ namespace {
Subtarget = &MF.getSubtarget<X86Subtarget>();
IndirectTlsSegRefs = MF.getFunction().hasFnAttribute(
"indirect-tls-seg-refs");
+
+ // OptFor[Min]Size are used in pattern predicates that isel is matching.
+ OptForSize = MF.getFunction().hasOptSize();
+ OptForMinSize = MF.getFunction().hasMinSize();
+ assert((!OptForMinSize || OptForSize) &&
+ "OptForMinSize implies OptForSize");
+
SelectionDAGISel::runOnMachineFunction(MF);
return true;
}
@@ -204,7 +213,7 @@ namespace {
bool matchWrapper(SDValue N, X86ISelAddressMode &AM);
bool matchAddress(SDValue N, X86ISelAddressMode &AM);
bool matchVectorAddress(SDValue N, X86ISelAddressMode &AM);
- bool matchAdd(SDValue N, X86ISelAddressMode &AM, unsigned Depth);
+ bool matchAdd(SDValue &N, X86ISelAddressMode &AM, unsigned Depth);
bool matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
unsigned Depth);
bool matchAddressBase(SDValue N, X86ISelAddressMode &AM);
@@ -252,16 +261,32 @@ namespace {
void emitSpecialCodeForMain();
inline void getAddressOperands(X86ISelAddressMode &AM, const SDLoc &DL,
- SDValue &Base, SDValue &Scale,
+ MVT VT, SDValue &Base, SDValue &Scale,
SDValue &Index, SDValue &Disp,
SDValue &Segment) {
- Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
- ? CurDAG->getTargetFrameIndex(
- AM.Base_FrameIndex,
- TLI->getPointerTy(CurDAG->getDataLayout()))
- : AM.Base_Reg;
+ if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
+ Base = CurDAG->getTargetFrameIndex(
+ AM.Base_FrameIndex, TLI->getPointerTy(CurDAG->getDataLayout()));
+ else if (AM.Base_Reg.getNode())
+ Base = AM.Base_Reg;
+ else
+ Base = CurDAG->getRegister(0, VT);
+
Scale = getI8Imm(AM.Scale, DL);
- Index = AM.IndexReg;
+
+ // Negate the index if needed.
+ if (AM.NegateIndex) {
+ unsigned NegOpc = VT == MVT::i64 ? X86::NEG64r : X86::NEG32r;
+ SDValue Neg = SDValue(CurDAG->getMachineNode(NegOpc, DL, VT, MVT::i32,
+ AM.IndexReg), 0);
+ AM.IndexReg = Neg;
+ }
+
+ if (AM.IndexReg.getNode())
+ Index = AM.IndexReg;
+ else
+ Index = CurDAG->getRegister(0, VT);
+
// These are 32-bit even in 64-bit mode since RIP-relative offset
// is 32-bit.
if (AM.GV)
@@ -290,7 +315,7 @@ namespace {
if (AM.Segment.getNode())
Segment = AM.Segment;
else
- Segment = CurDAG->getRegister(0, MVT::i32);
+ Segment = CurDAG->getRegister(0, MVT::i16);
}
// Utility function to determine whether we should avoid selecting
@@ -400,6 +425,19 @@ namespace {
return getI8Imm((Index * VecVT.getScalarSizeInBits()) / VecWidth, DL);
}
+ // Helper to detect unneeded and instructions on shift amounts. Called
+ // from PatFrags in tablegen.
+ bool isUnneededShiftMask(SDNode *N, unsigned Width) const {
+ assert(N->getOpcode() == ISD::AND && "Unexpected opcode");
+ const APInt &Val = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue();
+
+ if (Val.countTrailingOnes() >= Width)
+ return true;
+
+ APInt Mask = Val | CurDAG->computeKnownBits(N->getOperand(0)).Zero;
+ return Mask.countTrailingOnes() >= Width;
+ }
+
/// Return an SDNode that returns the value of the global base register.
/// Output instructions required to initialize the global base register,
/// if necessary.
@@ -464,6 +502,8 @@ namespace {
bool shrinkAndImmediate(SDNode *N);
bool isMaskZeroExtended(SDNode *N) const;
bool tryShiftAmountMod(SDNode *N);
+ bool tryShrinkShlLogicImm(SDNode *N);
+ bool tryVPTESTM(SDNode *Root, SDValue Setcc, SDValue Mask);
MachineSDNode *emitPCMPISTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad,
const SDLoc &dl, MVT VT, SDNode *Node);
@@ -485,7 +525,7 @@ namespace {
static bool isLegalMaskCompare(SDNode *N, const X86Subtarget *Subtarget) {
unsigned Opcode = N->getOpcode();
if (Opcode == X86ISD::CMPM || Opcode == ISD::SETCC ||
- Opcode == X86ISD::CMPM_RND || Opcode == X86ISD::VFPCLASS) {
+ Opcode == X86ISD::CMPM_SAE || Opcode == X86ISD::VFPCLASS) {
// We can get 256-bit 8 element types here without VLX being enabled. When
// this happens we will use 512-bit operations and the mask will not be
// zero extended.
@@ -497,7 +537,7 @@ static bool isLegalMaskCompare(SDNode *N, const X86Subtarget *Subtarget) {
}
// Scalar opcodes use 128 bit registers, but aren't subject to the VLX check.
if (Opcode == X86ISD::VFPCLASSS || Opcode == X86ISD::FSETCCM ||
- Opcode == X86ISD::FSETCCM_RND)
+ Opcode == X86ISD::FSETCCM_SAE)
return true;
return false;
@@ -571,6 +611,21 @@ X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const {
Imm->getAPIntValue().getBitWidth() == 64 &&
Imm->getAPIntValue().isIntN(32))
return false;
+
+ // If this really a zext_inreg that can be represented with a movzx
+ // instruction, prefer that.
+ // TODO: We could shrink the load and fold if it is non-volatile.
+ if (U->getOpcode() == ISD::AND &&
+ (Imm->getAPIntValue() == UINT8_MAX ||
+ Imm->getAPIntValue() == UINT16_MAX ||
+ Imm->getAPIntValue() == UINT32_MAX))
+ return false;
+
+ // ADD/SUB with can negate the immediate and use the opposite operation
+ // to fit 128 into a sign extended 8 bit immediate.
+ if ((U->getOpcode() == ISD::ADD || U->getOpcode() == ISD::SUB) &&
+ (-Imm->getAPIntValue()).isSignedIntN(8))
+ return false;
}
// If the other operand is a TLS address, we should fold it instead.
@@ -720,11 +775,6 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
}
void X86DAGToDAGISel::PreprocessISelDAG() {
- // OptFor[Min]Size are used in pattern predicates that isel is matching.
- OptForSize = MF->getFunction().optForSize();
- OptForMinSize = MF->getFunction().optForMinSize();
- assert((!OptForMinSize || OptForSize) && "OptForMinSize implies OptForSize");
-
for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
E = CurDAG->allnodes_end(); I != E; ) {
SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
@@ -741,6 +791,143 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
continue;
}
+ switch (N->getOpcode()) {
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT: {
+ // Replace vector fp_to_s/uint with their X86 specific equivalent so we
+ // don't need 2 sets of patterns.
+ if (!N->getSimpleValueType(0).isVector())
+ break;
+
+ unsigned NewOpc;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("Unexpected opcode!");
+ case ISD::FP_TO_SINT: NewOpc = X86ISD::CVTTP2SI; break;
+ case ISD::FP_TO_UINT: NewOpc = X86ISD::CVTTP2UI; break;
+ }
+ SDValue Res = CurDAG->getNode(NewOpc, SDLoc(N), N->getValueType(0),
+ N->getOperand(0));
+ --I;
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
+ ++I;
+ CurDAG->DeleteNode(N);
+ continue;
+ }
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL: {
+ // Replace vector shifts with their X86 specific equivalent so we don't
+ // need 2 sets of patterns.
+ if (!N->getValueType(0).isVector())
+ break;
+
+ unsigned NewOpc;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("Unexpected opcode!");
+ case ISD::SHL: NewOpc = X86ISD::VSHLV; break;
+ case ISD::SRA: NewOpc = X86ISD::VSRAV; break;
+ case ISD::SRL: NewOpc = X86ISD::VSRLV; break;
+ }
+ SDValue Res = CurDAG->getNode(NewOpc, SDLoc(N), N->getValueType(0),
+ N->getOperand(0), N->getOperand(1));
+ --I;
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
+ ++I;
+ CurDAG->DeleteNode(N);
+ continue;
+ }
+ case ISD::ANY_EXTEND:
+ case ISD::ANY_EXTEND_VECTOR_INREG: {
+ // Replace vector any extend with the zero extend equivalents so we don't
+ // need 2 sets of patterns. Ignore vXi1 extensions.
+ if (!N->getValueType(0).isVector() ||
+ N->getOperand(0).getScalarValueSizeInBits() == 1)
+ break;
+
+ unsigned NewOpc = N->getOpcode() == ISD::ANY_EXTEND
+ ? ISD::ZERO_EXTEND
+ : ISD::ZERO_EXTEND_VECTOR_INREG;
+
+ SDValue Res = CurDAG->getNode(NewOpc, SDLoc(N), N->getValueType(0),
+ N->getOperand(0));
+ --I;
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
+ ++I;
+ CurDAG->DeleteNode(N);
+ continue;
+ }
+ case ISD::FCEIL:
+ case ISD::FFLOOR:
+ case ISD::FTRUNC:
+ case ISD::FNEARBYINT:
+ case ISD::FRINT: {
+ // Replace fp rounding with their X86 specific equivalent so we don't
+ // need 2 sets of patterns.
+ unsigned Imm;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("Unexpected opcode!");
+ case ISD::FCEIL: Imm = 0xA; break;
+ case ISD::FFLOOR: Imm = 0x9; break;
+ case ISD::FTRUNC: Imm = 0xB; break;
+ case ISD::FNEARBYINT: Imm = 0xC; break;
+ case ISD::FRINT: Imm = 0x4; break;
+ }
+ SDLoc dl(N);
+ SDValue Res = CurDAG->getNode(X86ISD::VRNDSCALE, dl,
+ N->getValueType(0),
+ N->getOperand(0),
+ CurDAG->getConstant(Imm, dl, MVT::i8));
+ --I;
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
+ ++I;
+ CurDAG->DeleteNode(N);
+ continue;
+ }
+ case X86ISD::FANDN:
+ case X86ISD::FAND:
+ case X86ISD::FOR:
+ case X86ISD::FXOR: {
+ // Widen scalar fp logic ops to vector to reduce isel patterns.
+ // FIXME: Can we do this during lowering/combine.
+ MVT VT = N->getSimpleValueType(0);
+ if (VT.isVector() || VT == MVT::f128)
+ break;
+
+ MVT VecVT = VT == MVT::f64 ? MVT::v2f64 : MVT::v4f32;
+ SDLoc dl(N);
+ SDValue Op0 = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT,
+ N->getOperand(0));
+ SDValue Op1 = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT,
+ N->getOperand(1));
+
+ SDValue Res;
+ if (Subtarget->hasSSE2()) {
+ EVT IntVT = EVT(VecVT).changeVectorElementTypeToInteger();
+ Op0 = CurDAG->getNode(ISD::BITCAST, dl, IntVT, Op0);
+ Op1 = CurDAG->getNode(ISD::BITCAST, dl, IntVT, Op1);
+ unsigned Opc;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("Unexpected opcode!");
+ case X86ISD::FANDN: Opc = X86ISD::ANDNP; break;
+ case X86ISD::FAND: Opc = ISD::AND; break;
+ case X86ISD::FOR: Opc = ISD::OR; break;
+ case X86ISD::FXOR: Opc = ISD::XOR; break;
+ }
+ Res = CurDAG->getNode(Opc, dl, IntVT, Op0, Op1);
+ Res = CurDAG->getNode(ISD::BITCAST, dl, VecVT, Res);
+ } else {
+ Res = CurDAG->getNode(N->getOpcode(), dl, VecVT, Op0, Op1);
+ }
+ Res = CurDAG->getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Res,
+ CurDAG->getIntPtrConstant(0, dl));
+ --I;
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
+ ++I;
+ CurDAG->DeleteNode(N);
+ continue;
+ }
+ }
+
if (OptLevel != CodeGenOpt::None &&
// Only do this when the target can fold the load into the call or
// jmp.
@@ -786,65 +973,135 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
// and the node legalization. As such this pass basically does "really
// late" legalization of these inline with the X86 isel pass.
// FIXME: This should only happen when not compiled with -O0.
- if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND)
- continue;
+ switch (N->getOpcode()) {
+ default: continue;
+ case ISD::FP_ROUND:
+ case ISD::FP_EXTEND:
+ {
+ MVT SrcVT = N->getOperand(0).getSimpleValueType();
+ MVT DstVT = N->getSimpleValueType(0);
+
+ // If any of the sources are vectors, no fp stack involved.
+ if (SrcVT.isVector() || DstVT.isVector())
+ continue;
- MVT SrcVT = N->getOperand(0).getSimpleValueType();
- MVT DstVT = N->getSimpleValueType(0);
+ // If the source and destination are SSE registers, then this is a legal
+ // conversion that should not be lowered.
+ const X86TargetLowering *X86Lowering =
+ static_cast<const X86TargetLowering *>(TLI);
+ bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT);
+ bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT);
+ if (SrcIsSSE && DstIsSSE)
+ continue;
- // If any of the sources are vectors, no fp stack involved.
- if (SrcVT.isVector() || DstVT.isVector())
- continue;
+ if (!SrcIsSSE && !DstIsSSE) {
+ // If this is an FPStack extension, it is a noop.
+ if (N->getOpcode() == ISD::FP_EXTEND)
+ continue;
+ // If this is a value-preserving FPStack truncation, it is a noop.
+ if (N->getConstantOperandVal(1))
+ continue;
+ }
- // If the source and destination are SSE registers, then this is a legal
- // conversion that should not be lowered.
- const X86TargetLowering *X86Lowering =
- static_cast<const X86TargetLowering *>(TLI);
- bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT);
- bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT);
- if (SrcIsSSE && DstIsSSE)
- continue;
+ // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
+ // FPStack has extload and truncstore. SSE can fold direct loads into other
+ // operations. Based on this, decide what we want to do.
+ MVT MemVT;
+ if (N->getOpcode() == ISD::FP_ROUND)
+ MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'.
+ else
+ MemVT = SrcIsSSE ? SrcVT : DstVT;
+
+ SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
+ SDLoc dl(N);
- if (!SrcIsSSE && !DstIsSSE) {
- // If this is an FPStack extension, it is a noop.
- if (N->getOpcode() == ISD::FP_EXTEND)
+ // FIXME: optimize the case where the src/dest is a load or store?
+
+ SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), dl, N->getOperand(0),
+ MemTmp, MachinePointerInfo(), MemVT);
+ SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp,
+ MachinePointerInfo(), MemVT);
+
+ // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
+ // extload we created. This will cause general havok on the dag because
+ // anything below the conversion could be folded into other existing nodes.
+ // To avoid invalidating 'I', back it up to the convert node.
+ --I;
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
+ break;
+ }
+
+ //The sequence of events for lowering STRICT_FP versions of these nodes requires
+ //dealing with the chain differently, as there is already a preexisting chain.
+ case ISD::STRICT_FP_ROUND:
+ case ISD::STRICT_FP_EXTEND:
+ {
+ MVT SrcVT = N->getOperand(1).getSimpleValueType();
+ MVT DstVT = N->getSimpleValueType(0);
+
+ // If any of the sources are vectors, no fp stack involved.
+ if (SrcVT.isVector() || DstVT.isVector())
continue;
- // If this is a value-preserving FPStack truncation, it is a noop.
- if (N->getConstantOperandVal(1))
+
+ // If the source and destination are SSE registers, then this is a legal
+ // conversion that should not be lowered.
+ const X86TargetLowering *X86Lowering =
+ static_cast<const X86TargetLowering *>(TLI);
+ bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT);
+ bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT);
+ if (SrcIsSSE && DstIsSSE)
continue;
- }
- // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
- // FPStack has extload and truncstore. SSE can fold direct loads into other
- // operations. Based on this, decide what we want to do.
- MVT MemVT;
- if (N->getOpcode() == ISD::FP_ROUND)
- MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'.
- else
- MemVT = SrcIsSSE ? SrcVT : DstVT;
+ if (!SrcIsSSE && !DstIsSSE) {
+ // If this is an FPStack extension, it is a noop.
+ if (N->getOpcode() == ISD::STRICT_FP_EXTEND)
+ continue;
+ // If this is a value-preserving FPStack truncation, it is a noop.
+ if (N->getConstantOperandVal(2))
+ continue;
+ }
+
+ // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
+ // FPStack has extload and truncstore. SSE can fold direct loads into other
+ // operations. Based on this, decide what we want to do.
+ MVT MemVT;
+ if (N->getOpcode() == ISD::STRICT_FP_ROUND)
+ MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'.
+ else
+ MemVT = SrcIsSSE ? SrcVT : DstVT;
+
+ SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
+ SDLoc dl(N);
+
+ // FIXME: optimize the case where the src/dest is a load or store?
- SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
- SDLoc dl(N);
+ //Since the operation is StrictFP, use the preexisting chain.
+ SDValue Store = CurDAG->getTruncStore(N->getOperand(0), dl, N->getOperand(1),
+ MemTmp, MachinePointerInfo(), MemVT);
+ SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp,
+ MachinePointerInfo(), MemVT);
- // FIXME: optimize the case where the src/dest is a load or store?
- SDValue Store =
- CurDAG->getTruncStore(CurDAG->getEntryNode(), dl, N->getOperand(0),
- MemTmp, MachinePointerInfo(), MemVT);
- SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp,
- MachinePointerInfo(), MemVT);
+ // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
+ // extload we created. This will cause general havok on the dag because
+ // anything below the conversion could be folded into other existing nodes.
+ // To avoid invalidating 'I', back it up to the convert node.
+ --I;
+ CurDAG->ReplaceAllUsesWith(N, Result.getNode());
+ break;
+ }
+ }
- // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
- // extload we created. This will cause general havok on the dag because
- // anything below the conversion could be folded into other existing nodes.
- // To avoid invalidating 'I', back it up to the convert node.
- --I;
- CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
// Now that we did that, the node is dead. Increment the iterator to the
// next node to process, then delete N.
++I;
CurDAG->DeleteNode(N);
}
+
+ // The load+call transform above can leave some dead nodes in the graph. Make
+ // sure we remove them. Its possible some of the other transforms do to so
+ // just remove dead nodes unconditionally.
+ CurDAG->RemoveDeadNodes();
}
// Look for a redundant movzx/movsx that can occur after an 8-bit divrem.
@@ -1138,15 +1395,23 @@ bool X86DAGToDAGISel::matchWrapper(SDValue N, X86ISelAddressMode &AM) {
if (AM.hasSymbolicDisplacement())
return true;
+ bool IsRIPRelTLS = false;
bool IsRIPRel = N.getOpcode() == X86ISD::WrapperRIP;
+ if (IsRIPRel) {
+ SDValue Val = N.getOperand(0);
+ if (Val.getOpcode() == ISD::TargetGlobalTLSAddress)
+ IsRIPRelTLS = true;
+ }
- // We can't use an addressing mode in the 64-bit large code model. In the
- // medium code model, we use can use an mode when RIP wrappers are present.
- // That signifies access to globals that are known to be "near", such as the
- // GOT itself.
+ // We can't use an addressing mode in the 64-bit large code model.
+ // Global TLS addressing is an exception. In the medium code model,
+ // we use can use a mode when RIP wrappers are present.
+ // That signifies access to globals that are known to be "near",
+ // such as the GOT itself.
CodeModel::Model M = TM.getCodeModel();
if (Subtarget->is64Bit() &&
- (M == CodeModel::Large || (M == CodeModel::Medium && !IsRIPRel)))
+ ((M == CodeModel::Large && !IsRIPRelTLS) ||
+ (M == CodeModel::Medium && !IsRIPRel)))
return true;
// Base and index reg must be 0 in order to use %rip as base.
@@ -1212,20 +1477,25 @@ bool X86DAGToDAGISel::matchAddress(SDValue N, X86ISelAddressMode &AM) {
// Post-processing: Convert foo to foo(%rip), even in non-PIC mode,
// because it has a smaller encoding.
// TODO: Which other code models can use this?
- if (TM.getCodeModel() == CodeModel::Small &&
- Subtarget->is64Bit() &&
- AM.Scale == 1 &&
- AM.BaseType == X86ISelAddressMode::RegBase &&
- AM.Base_Reg.getNode() == nullptr &&
- AM.IndexReg.getNode() == nullptr &&
- AM.SymbolFlags == X86II::MO_NO_FLAG &&
- AM.hasSymbolicDisplacement())
- AM.Base_Reg = CurDAG->getRegister(X86::RIP, MVT::i64);
+ switch (TM.getCodeModel()) {
+ default: break;
+ case CodeModel::Small:
+ case CodeModel::Kernel:
+ if (Subtarget->is64Bit() &&
+ AM.Scale == 1 &&
+ AM.BaseType == X86ISelAddressMode::RegBase &&
+ AM.Base_Reg.getNode() == nullptr &&
+ AM.IndexReg.getNode() == nullptr &&
+ AM.SymbolFlags == X86II::MO_NO_FLAG &&
+ AM.hasSymbolicDisplacement())
+ AM.Base_Reg = CurDAG->getRegister(X86::RIP, MVT::i64);
+ break;
+ }
return false;
}
-bool X86DAGToDAGISel::matchAdd(SDValue N, X86ISelAddressMode &AM,
+bool X86DAGToDAGISel::matchAdd(SDValue &N, X86ISelAddressMode &AM,
unsigned Depth) {
// Add an artificial use to this node so that we can keep track of
// it if it gets CSE'd with a different node.
@@ -1317,6 +1587,7 @@ static bool foldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N,
insertDAGNode(DAG, N, ShlCount);
insertDAGNode(DAG, N, Shl);
DAG.ReplaceAllUsesWith(N, Shl);
+ DAG.RemoveDeadNode(N.getNode());
AM.IndexReg = And;
AM.Scale = (1 << ScaleLog);
return false;
@@ -1326,13 +1597,31 @@ static bool foldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N,
// allows us to fold the shift into this addressing mode. Returns false if the
// transform succeeded.
static bool foldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N,
- uint64_t Mask,
- SDValue Shift, SDValue X,
X86ISelAddressMode &AM) {
+ SDValue Shift = N.getOperand(0);
+
+ // Use a signed mask so that shifting right will insert sign bits. These
+ // bits will be removed when we shift the result left so it doesn't matter
+ // what we use. This might allow a smaller immediate encoding.
+ int64_t Mask = cast<ConstantSDNode>(N->getOperand(1))->getSExtValue();
+
+ // If we have an any_extend feeding the AND, look through it to see if there
+ // is a shift behind it. But only if the AND doesn't use the extended bits.
+ // FIXME: Generalize this to other ANY_EXTEND than i32 to i64?
+ bool FoundAnyExtend = false;
+ if (Shift.getOpcode() == ISD::ANY_EXTEND && Shift.hasOneUse() &&
+ Shift.getOperand(0).getSimpleValueType() == MVT::i32 &&
+ isUInt<32>(Mask)) {
+ FoundAnyExtend = true;
+ Shift = Shift.getOperand(0);
+ }
+
if (Shift.getOpcode() != ISD::SHL ||
!isa<ConstantSDNode>(Shift.getOperand(1)))
return true;
+ SDValue X = Shift.getOperand(0);
+
// Not likely to be profitable if either the AND or SHIFT node has more
// than one use (unless all uses are for address computation). Besides,
// isel mechanism requires their node ids to be reused.
@@ -1346,6 +1635,12 @@ static bool foldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N,
MVT VT = N.getSimpleValueType();
SDLoc DL(N);
+ if (FoundAnyExtend) {
+ SDValue NewX = DAG.getNode(ISD::ANY_EXTEND, DL, VT, X);
+ insertDAGNode(DAG, N, NewX);
+ X = NewX;
+ }
+
SDValue NewMask = DAG.getConstant(Mask >> ShiftAmt, DL, VT);
SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, NewMask);
SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, NewAnd, Shift.getOperand(1));
@@ -1359,6 +1654,7 @@ static bool foldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N,
insertDAGNode(DAG, N, NewAnd);
insertDAGNode(DAG, N, NewShift);
DAG.ReplaceAllUsesWith(N, NewShift);
+ DAG.RemoveDeadNode(N.getNode());
AM.Scale = 1 << ShiftAmt;
AM.IndexReg = NewAnd;
@@ -1469,6 +1765,7 @@ static bool foldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N,
insertDAGNode(DAG, N, NewSHLAmt);
insertDAGNode(DAG, N, NewSHL);
DAG.ReplaceAllUsesWith(N, NewSHL);
+ DAG.RemoveDeadNode(N.getNode());
AM.Scale = 1 << AMShiftAmt;
AM.IndexReg = NewSRL;
@@ -1527,6 +1824,7 @@ static bool foldMaskedShiftToBEXTR(SelectionDAG &DAG, SDValue N,
insertDAGNode(DAG, N, NewSHLAmt);
insertDAGNode(DAG, N, NewSHL);
DAG.ReplaceAllUsesWith(N, NewSHL);
+ DAG.RemoveDeadNode(N.getNode());
AM.Scale = 1 << AMShiftAmt;
AM.IndexReg = NewAnd;
@@ -1634,14 +1932,15 @@ bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
// Scale must not be used already.
if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break;
+ // We only handle up to 64-bit values here as those are what matter for
+ // addressing mode optimizations.
+ assert(N.getSimpleValueType().getSizeInBits() <= 64 &&
+ "Unexpected value size!");
+
SDValue And = N.getOperand(0);
if (And.getOpcode() != ISD::AND) break;
SDValue X = And.getOperand(0);
- // We only handle up to 64-bit values here as those are what matter for
- // addressing mode optimizations.
- if (X.getSimpleValueType().getSizeInBits() > 64) break;
-
// The mask used for the transform is expected to be post-shift, but we
// found the shift first so just apply the shift to the mask before passing
// it down.
@@ -1712,9 +2011,11 @@ bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
// Test if the LHS of the sub can be folded.
X86ISelAddressMode Backup = AM;
if (matchAddressRecursively(N.getOperand(0), AM, Depth+1)) {
+ N = Handle.getValue();
AM = Backup;
break;
}
+ N = Handle.getValue();
// Test if the index field is free for use.
if (AM.IndexReg.getNode() || AM.isRIPRelative()) {
AM = Backup;
@@ -1722,7 +2023,7 @@ bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
}
int Cost = 0;
- SDValue RHS = Handle.getValue().getOperand(1);
+ SDValue RHS = N.getOperand(1);
// If the RHS involves a register with multiple uses, this
// transformation incurs an extra mov, due to the neg instruction
// clobbering its operand.
@@ -1735,9 +2036,7 @@ bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
++Cost;
// If the base is a register with multiple uses, this
// transformation may save a mov.
- // FIXME: Don't rely on DELETED_NODEs.
if ((AM.BaseType == X86ISelAddressMode::RegBase && AM.Base_Reg.getNode() &&
- AM.Base_Reg->getOpcode() != ISD::DELETED_NODE &&
!AM.Base_Reg.getNode()->hasOneUse()) ||
AM.BaseType == X86ISelAddressMode::FrameIndexBase)
--Cost;
@@ -1754,14 +2053,11 @@ bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
}
// Ok, the transformation is legal and appears profitable. Go for it.
- SDValue Zero = CurDAG->getConstant(0, dl, N.getValueType());
- SDValue Neg = CurDAG->getNode(ISD::SUB, dl, N.getValueType(), Zero, RHS);
- AM.IndexReg = Neg;
+ // Negation will be emitted later to avoid creating dangling nodes if this
+ // was an unprofitable LEA.
+ AM.IndexReg = RHS;
+ AM.NegateIndex = true;
AM.Scale = 1;
-
- // Insert the new nodes into the topological ordering.
- insertDAGNode(*CurDAG, Handle.getValue(), Zero);
- insertDAGNode(*CurDAG, Handle.getValue(), Neg);
return false;
}
@@ -1789,37 +2085,77 @@ bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
// Scale must not be used already.
if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break;
- SDValue Shift = N.getOperand(0);
- if (Shift.getOpcode() != ISD::SRL && Shift.getOpcode() != ISD::SHL) break;
- SDValue X = Shift.getOperand(0);
-
// We only handle up to 64-bit values here as those are what matter for
// addressing mode optimizations.
- if (X.getSimpleValueType().getSizeInBits() > 64) break;
+ assert(N.getSimpleValueType().getSizeInBits() <= 64 &&
+ "Unexpected value size!");
if (!isa<ConstantSDNode>(N.getOperand(1)))
break;
- uint64_t Mask = N.getConstantOperandVal(1);
- // Try to fold the mask and shift into an extract and scale.
- if (!foldMaskAndShiftToExtract(*CurDAG, N, Mask, Shift, X, AM))
- return false;
+ if (N.getOperand(0).getOpcode() == ISD::SRL) {
+ SDValue Shift = N.getOperand(0);
+ SDValue X = Shift.getOperand(0);
- // Try to fold the mask and shift directly into the scale.
- if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, Shift, X, AM))
- return false;
+ uint64_t Mask = N.getConstantOperandVal(1);
+
+ // Try to fold the mask and shift into an extract and scale.
+ if (!foldMaskAndShiftToExtract(*CurDAG, N, Mask, Shift, X, AM))
+ return false;
+
+ // Try to fold the mask and shift directly into the scale.
+ if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, Shift, X, AM))
+ return false;
+
+ // Try to fold the mask and shift into BEXTR and scale.
+ if (!foldMaskedShiftToBEXTR(*CurDAG, N, Mask, Shift, X, AM, *Subtarget))
+ return false;
+ }
// Try to swap the mask and shift to place shifts which can be done as
// a scale on the outside of the mask.
- if (!foldMaskedShiftToScaledMask(*CurDAG, N, Mask, Shift, X, AM))
- return false;
-
- // Try to fold the mask and shift into BEXTR and scale.
- if (!foldMaskedShiftToBEXTR(*CurDAG, N, Mask, Shift, X, AM, *Subtarget))
+ if (!foldMaskedShiftToScaledMask(*CurDAG, N, AM))
return false;
break;
}
+ case ISD::ZERO_EXTEND: {
+ // Try to widen a zexted shift left to the same size as its use, so we can
+ // match the shift as a scale factor.
+ if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1)
+ break;
+ if (N.getOperand(0).getOpcode() != ISD::SHL || !N.getOperand(0).hasOneUse())
+ break;
+
+ // Give up if the shift is not a valid scale factor [1,2,3].
+ SDValue Shl = N.getOperand(0);
+ auto *ShAmtC = dyn_cast<ConstantSDNode>(Shl.getOperand(1));
+ if (!ShAmtC || ShAmtC->getZExtValue() > 3)
+ break;
+
+ // The narrow shift must only shift out zero bits (it must be 'nuw').
+ // That makes it safe to widen to the destination type.
+ APInt HighZeros = APInt::getHighBitsSet(Shl.getValueSizeInBits(),
+ ShAmtC->getZExtValue());
+ if (!CurDAG->MaskedValueIsZero(Shl.getOperand(0), HighZeros))
+ break;
+
+ // zext (shl nuw i8 %x, C) to i32 --> shl (zext i8 %x to i32), (zext C)
+ MVT VT = N.getSimpleValueType();
+ SDLoc DL(N);
+ SDValue Zext = CurDAG->getNode(ISD::ZERO_EXTEND, DL, VT, Shl.getOperand(0));
+ SDValue NewShl = CurDAG->getNode(ISD::SHL, DL, VT, Zext, Shl.getOperand(1));
+
+ // Convert the shift to scale factor.
+ AM.Scale = 1 << ShAmtC->getZExtValue();
+ AM.IndexReg = Zext;
+
+ insertDAGNode(*CurDAG, N, Zext);
+ insertDAGNode(*CurDAG, N, NewShl);
+ CurDAG->ReplaceAllUsesWith(N, NewShl);
+ CurDAG->RemoveDeadNode(N.getNode());
+ return false;
+ }
}
return matchAddressBase(N, AM);
@@ -1885,17 +2221,14 @@ bool X86DAGToDAGISel::selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
if (AddrSpace == 258)
AM.Segment = CurDAG->getRegister(X86::SS, MVT::i16);
+ SDLoc DL(N);
+ MVT VT = N.getSimpleValueType();
+
// Try to match into the base and displacement fields.
if (matchVectorAddress(N, AM))
return false;
- MVT VT = N.getSimpleValueType();
- if (AM.BaseType == X86ISelAddressMode::RegBase) {
- if (!AM.Base_Reg.getNode())
- AM.Base_Reg = CurDAG->getRegister(0, VT);
- }
-
- getAddressOperands(AM, SDLoc(N), Base, Scale, Index, Disp, Segment);
+ getAddressOperands(AM, DL, VT, Base, Scale, Index, Disp, Segment);
return true;
}
@@ -1917,6 +2250,8 @@ bool X86DAGToDAGISel::selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
Parent->getOpcode() != ISD::INTRINSIC_W_CHAIN && // unaligned loads, fixme
Parent->getOpcode() != ISD::INTRINSIC_VOID && // nontemporal stores
Parent->getOpcode() != X86ISD::TLSCALL && // Fixme
+ Parent->getOpcode() != X86ISD::ENQCMD && // Fixme
+ Parent->getOpcode() != X86ISD::ENQCMDS && // Fixme
Parent->getOpcode() != X86ISD::EH_SJLJ_SETJMP && // setjmp
Parent->getOpcode() != X86ISD::EH_SJLJ_LONGJMP) { // longjmp
unsigned AddrSpace =
@@ -1930,19 +2265,14 @@ bool X86DAGToDAGISel::selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
AM.Segment = CurDAG->getRegister(X86::SS, MVT::i16);
}
- if (matchAddress(N, AM))
- return false;
-
+ // Save the DL and VT before calling matchAddress, it can invalidate N.
+ SDLoc DL(N);
MVT VT = N.getSimpleValueType();
- if (AM.BaseType == X86ISelAddressMode::RegBase) {
- if (!AM.Base_Reg.getNode())
- AM.Base_Reg = CurDAG->getRegister(0, VT);
- }
- if (!AM.IndexReg.getNode())
- AM.IndexReg = CurDAG->getRegister(0, VT);
+ if (matchAddress(N, AM))
+ return false;
- getAddressOperands(AM, SDLoc(N), Base, Scale, Index, Disp, Segment);
+ getAddressOperands(AM, DL, VT, Base, Scale, Index, Disp, Segment);
return true;
}
@@ -1974,12 +2304,14 @@ bool X86DAGToDAGISel::selectScalarSSELoad(SDNode *Root, SDNode *Parent,
if (!hasSingleUsesFromRoot(Root, Parent))
return false;
- // We can allow a full vector load here since narrowing a load is ok.
+ // We can allow a full vector load here since narrowing a load is ok unless
+ // it's volatile.
if (ISD::isNON_EXTLoad(N.getNode())) {
- PatternNodeWithChain = N;
- if (IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
- IsLegalToFold(PatternNodeWithChain, Parent, Root, OptLevel)) {
- LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ if (!LD->isVolatile() &&
+ IsProfitableToFold(N, LD, Root) &&
+ IsLegalToFold(N, Parent, Root, OptLevel)) {
+ PatternNodeWithChain = N;
return selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp,
Segment);
}
@@ -2010,23 +2342,6 @@ bool X86DAGToDAGISel::selectScalarSSELoad(SDNode *Root, SDNode *Parent,
}
}
- // Also handle the case where we explicitly require zeros in the top
- // elements. This is a vector shuffle from the zero vector.
- if (N.getOpcode() == X86ISD::VZEXT_MOVL && N.getNode()->hasOneUse() &&
- // Check to see if the top elements are all zeros (or bitcast of zeros).
- N.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
- N.getOperand(0).getNode()->hasOneUse()) {
- PatternNodeWithChain = N.getOperand(0).getOperand(0);
- if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) &&
- IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
- IsLegalToFold(PatternNodeWithChain, N.getNode(), Root, OptLevel)) {
- // Okay, this is a zero extending load. Fold it.
- LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
- return selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp,
- Segment);
- }
- }
-
return false;
}
@@ -2077,14 +2392,12 @@ bool X86DAGToDAGISel::selectLEA64_32Addr(SDValue N, SDValue &Base,
RegisterSDNode *RN = dyn_cast<RegisterSDNode>(Base);
if (RN && RN->getReg() == 0)
Base = CurDAG->getRegister(0, MVT::i64);
- else if (Base.getValueType() == MVT::i32 && !dyn_cast<FrameIndexSDNode>(Base)) {
+ else if (Base.getValueType() == MVT::i32 && !isa<FrameIndexSDNode>(Base)) {
// Base could already be %rip, particularly in the x32 ABI.
- Base = SDValue(CurDAG->getMachineNode(
- TargetOpcode::SUBREG_TO_REG, DL, MVT::i64,
- CurDAG->getTargetConstant(0, DL, MVT::i64),
- Base,
- CurDAG->getTargetConstant(X86::sub_32bit, DL, MVT::i32)),
- 0);
+ SDValue ImplDef = SDValue(CurDAG->getMachineNode(X86::IMPLICIT_DEF, DL,
+ MVT::i64), 0);
+ Base = CurDAG->getTargetInsertSubreg(X86::sub_32bit, DL, MVT::i64, ImplDef,
+ Base);
}
RN = dyn_cast<RegisterSDNode>(Index);
@@ -2093,13 +2406,10 @@ bool X86DAGToDAGISel::selectLEA64_32Addr(SDValue N, SDValue &Base,
else {
assert(Index.getValueType() == MVT::i32 &&
"Expect to be extending 32-bit registers for use in LEA");
- Index = SDValue(CurDAG->getMachineNode(
- TargetOpcode::SUBREG_TO_REG, DL, MVT::i64,
- CurDAG->getTargetConstant(0, DL, MVT::i64),
- Index,
- CurDAG->getTargetConstant(X86::sub_32bit, DL,
- MVT::i32)),
- 0);
+ SDValue ImplDef = SDValue(CurDAG->getMachineNode(X86::IMPLICIT_DEF, DL,
+ MVT::i64), 0);
+ Index = CurDAG->getTargetInsertSubreg(X86::sub_32bit, DL, MVT::i64, ImplDef,
+ Index);
}
return true;
@@ -2128,18 +2438,13 @@ bool X86DAGToDAGISel::selectLEAAddr(SDValue N,
AM.Segment = Copy;
unsigned Complexity = 0;
- if (AM.BaseType == X86ISelAddressMode::RegBase)
- if (AM.Base_Reg.getNode())
- Complexity = 1;
- else
- AM.Base_Reg = CurDAG->getRegister(0, VT);
+ if (AM.BaseType == X86ISelAddressMode::RegBase && AM.Base_Reg.getNode())
+ Complexity = 1;
else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
Complexity = 4;
if (AM.IndexReg.getNode())
Complexity++;
- else
- AM.IndexReg = CurDAG->getRegister(0, VT);
// Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with
// a simple shift.
@@ -2159,14 +2464,14 @@ bool X86DAGToDAGISel::selectLEAAddr(SDValue N,
Complexity += 2;
}
- if (AM.Disp && (AM.Base_Reg.getNode() || AM.IndexReg.getNode()))
+ if (AM.Disp)
Complexity++;
// If it isn't worth using an LEA, reject it.
if (Complexity <= 2)
return false;
- getAddressOperands(AM, DL, Base, Scale, Index, Disp, Segment);
+ getAddressOperands(AM, DL, VT, Base, Scale, Index, Disp, Segment);
return true;
}
@@ -2180,17 +2485,15 @@ bool X86DAGToDAGISel::selectTLSADDRAddr(SDValue N, SDValue &Base,
X86ISelAddressMode AM;
AM.GV = GA->getGlobal();
AM.Disp += GA->getOffset();
- AM.Base_Reg = CurDAG->getRegister(0, N.getValueType());
AM.SymbolFlags = GA->getTargetFlags();
- if (N.getValueType() == MVT::i32) {
+ MVT VT = N.getSimpleValueType();
+ if (VT == MVT::i32) {
AM.Scale = 1;
AM.IndexReg = CurDAG->getRegister(X86::EBX, MVT::i32);
- } else {
- AM.IndexReg = CurDAG->getRegister(0, MVT::i64);
}
- getAddressOperands(AM, SDLoc(N), Base, Scale, Index, Disp, Segment);
+ getAddressOperands(AM, SDLoc(N), VT, Base, Scale, Index, Disp, Segment);
return true;
}
@@ -2274,14 +2577,22 @@ bool X86DAGToDAGISel::isSExtAbsoluteSymbolRef(unsigned Width, SDNode *N) const {
CR->getSignedMax().slt(1ull << Width);
}
-static X86::CondCode getCondFromOpc(unsigned Opc) {
+static X86::CondCode getCondFromNode(SDNode *N) {
+ assert(N->isMachineOpcode() && "Unexpected node");
X86::CondCode CC = X86::COND_INVALID;
- if (CC == X86::COND_INVALID)
- CC = X86::getCondFromBranchOpc(Opc);
- if (CC == X86::COND_INVALID)
- CC = X86::getCondFromSETOpc(Opc);
- if (CC == X86::COND_INVALID)
- CC = X86::getCondFromCMovOpc(Opc);
+ unsigned Opc = N->getMachineOpcode();
+ if (Opc == X86::JCC_1)
+ CC = static_cast<X86::CondCode>(N->getConstantOperandVal(1));
+ else if (Opc == X86::SETCCr)
+ CC = static_cast<X86::CondCode>(N->getConstantOperandVal(0));
+ else if (Opc == X86::SETCCm)
+ CC = static_cast<X86::CondCode>(N->getConstantOperandVal(5));
+ else if (Opc == X86::CMOV16rr || Opc == X86::CMOV32rr ||
+ Opc == X86::CMOV64rr)
+ CC = static_cast<X86::CondCode>(N->getConstantOperandVal(2));
+ else if (Opc == X86::CMOV16rm || Opc == X86::CMOV32rm ||
+ Opc == X86::CMOV64rm)
+ CC = static_cast<X86::CondCode>(N->getConstantOperandVal(6));
return CC;
}
@@ -2307,7 +2618,7 @@ bool X86DAGToDAGISel::onlyUsesZeroFlag(SDValue Flags) const {
// Anything unusual: assume conservatively.
if (!FlagUI->isMachineOpcode()) return false;
// Examine the condition code of the user.
- X86::CondCode CC = getCondFromOpc(FlagUI->getMachineOpcode());
+ X86::CondCode CC = getCondFromNode(*FlagUI);
switch (CC) {
// Comparisons which only use the zero flag.
@@ -2343,7 +2654,7 @@ bool X86DAGToDAGISel::hasNoSignFlagUses(SDValue Flags) const {
// Anything unusual: assume conservatively.
if (!FlagUI->isMachineOpcode()) return false;
// Examine the condition code of the user.
- X86::CondCode CC = getCondFromOpc(FlagUI->getMachineOpcode());
+ X86::CondCode CC = getCondFromNode(*FlagUI);
switch (CC) {
// Comparisons which don't examine the SF flag.
@@ -2404,7 +2715,7 @@ static bool mayUseCarryFlag(X86::CondCode CC) {
if (!FlagUI->isMachineOpcode())
return false;
// Examine the condition code of the user.
- X86::CondCode CC = getCondFromOpc(FlagUI->getMachineOpcode());
+ X86::CondCode CC = getCondFromNode(*FlagUI);
if (mayUseCarryFlag(CC))
return false;
@@ -2582,10 +2893,13 @@ bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
return false;
bool IsCommutable = false;
+ bool IsNegate = false;
switch (Opc) {
default:
return false;
case X86ISD::SUB:
+ IsNegate = isNullConstant(StoredVal.getOperand(0));
+ break;
case X86ISD::SBB:
break;
case X86ISD::ADD:
@@ -2597,7 +2911,7 @@ bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
break;
}
- unsigned LoadOpNo = 0;
+ unsigned LoadOpNo = IsNegate ? 1 : 0;
LoadSDNode *LoadNode = nullptr;
SDValue InputChain;
if (!isFusableLoadOpStorePattern(StoreNode, StoredVal, CurDAG, LoadOpNo,
@@ -2635,11 +2949,20 @@ bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
MachineSDNode *Result;
switch (Opc) {
- case X86ISD::ADD:
case X86ISD::SUB:
+ // Handle negate.
+ if (IsNegate) {
+ unsigned NewOpc = SelectOpcode(X86::NEG64m, X86::NEG32m, X86::NEG16m,
+ X86::NEG8m);
+ const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain};
+ Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32,
+ MVT::Other, Ops);
+ break;
+ }
+ LLVM_FALLTHROUGH;
+ case X86ISD::ADD:
// Try to match inc/dec.
- if (!Subtarget->slowIncDec() ||
- CurDAG->getMachineFunction().getFunction().optForSize()) {
+ if (!Subtarget->slowIncDec() || OptForSize) {
bool IsOne = isOneConstant(StoredVal.getOperand(1));
bool IsNegOne = isAllOnesConstant(StoredVal.getOperand(1));
// ADD/SUB with 1/-1 and carry flag isn't used can use inc/dec.
@@ -2740,16 +3063,15 @@ bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
// See if the operand is a constant that we can fold into an immediate
// operand.
if (auto *OperandC = dyn_cast<ConstantSDNode>(Operand)) {
- auto OperandV = OperandC->getAPIntValue();
+ int64_t OperandV = OperandC->getSExtValue();
// Check if we can shrink the operand enough to fit in an immediate (or
// fit into a smaller immediate) by negating it and switching the
// operation.
if ((Opc == X86ISD::ADD || Opc == X86ISD::SUB) &&
- ((MemVT != MVT::i8 && OperandV.getMinSignedBits() > 8 &&
- (-OperandV).getMinSignedBits() <= 8) ||
- (MemVT == MVT::i64 && OperandV.getMinSignedBits() > 32 &&
- (-OperandV).getMinSignedBits() <= 32)) &&
+ ((MemVT != MVT::i8 && !isInt<8>(OperandV) && isInt<8>(-OperandV)) ||
+ (MemVT == MVT::i64 && !isInt<32>(OperandV) &&
+ isInt<32>(-OperandV))) &&
hasNoCarryFlagUses(StoredVal.getValue(1))) {
OperandV = -OperandV;
Opc = Opc == X86ISD::ADD ? X86ISD::SUB : X86ISD::ADD;
@@ -2757,11 +3079,10 @@ bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
// First try to fit this into an Imm8 operand. If it doesn't fit, then try
// the larger immediate operand.
- if (MemVT != MVT::i8 && OperandV.getMinSignedBits() <= 8) {
+ if (MemVT != MVT::i8 && isInt<8>(OperandV)) {
Operand = CurDAG->getTargetConstant(OperandV, SDLoc(Node), MemVT);
NewOpc = SelectImm8Opcode(Opc);
- } else if (OperandV.getActiveBits() <= MemVT.getSizeInBits() &&
- (MemVT != MVT::i64 || OperandV.getMinSignedBits() <= 32)) {
+ } else if (MemVT != MVT::i64 || isInt<32>(OperandV)) {
Operand = CurDAG->getTargetConstant(OperandV, SDLoc(Node), MemVT);
NewOpc = SelectImmOpcode(Opc);
}
@@ -2821,8 +3142,6 @@ bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) {
if (NVT != MVT::i32 && NVT != MVT::i64)
return false;
- unsigned Size = NVT.getSizeInBits();
-
SDValue NBits;
// If we have BMI2's BZHI, we are ok with muti-use patterns.
@@ -2835,16 +3154,27 @@ bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) {
auto checkOneUse = [checkUses](SDValue Op) { return checkUses(Op, 1); };
auto checkTwoUse = [checkUses](SDValue Op) { return checkUses(Op, 2); };
+ auto peekThroughOneUseTruncation = [checkOneUse](SDValue V) {
+ if (V->getOpcode() == ISD::TRUNCATE && checkOneUse(V)) {
+ assert(V.getSimpleValueType() == MVT::i32 &&
+ V.getOperand(0).getSimpleValueType() == MVT::i64 &&
+ "Expected i64 -> i32 truncation");
+ V = V.getOperand(0);
+ }
+ return V;
+ };
+
// a) x & ((1 << nbits) + (-1))
- auto matchPatternA = [&checkOneUse, &NBits](SDValue Mask) -> bool {
+ auto matchPatternA = [checkOneUse, peekThroughOneUseTruncation,
+ &NBits](SDValue Mask) -> bool {
// Match `add`. Must only have one use!
if (Mask->getOpcode() != ISD::ADD || !checkOneUse(Mask))
return false;
// We should be adding all-ones constant (i.e. subtracting one.)
if (!isAllOnesConstant(Mask->getOperand(1)))
return false;
- // Match `1 << nbits`. Must only have one use!
- SDValue M0 = Mask->getOperand(0);
+ // Match `1 << nbits`. Might be truncated. Must only have one use!
+ SDValue M0 = peekThroughOneUseTruncation(Mask->getOperand(0));
if (M0->getOpcode() != ISD::SHL || !checkOneUse(M0))
return false;
if (!isOneConstant(M0->getOperand(0)))
@@ -2853,23 +3183,36 @@ bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) {
return true;
};
+ auto isAllOnes = [this, peekThroughOneUseTruncation, NVT](SDValue V) {
+ V = peekThroughOneUseTruncation(V);
+ return CurDAG->MaskedValueIsAllOnes(
+ V, APInt::getLowBitsSet(V.getSimpleValueType().getSizeInBits(),
+ NVT.getSizeInBits()));
+ };
+
// b) x & ~(-1 << nbits)
- auto matchPatternB = [&checkOneUse, &NBits](SDValue Mask) -> bool {
+ auto matchPatternB = [checkOneUse, isAllOnes, peekThroughOneUseTruncation,
+ &NBits](SDValue Mask) -> bool {
// Match `~()`. Must only have one use!
- if (!isBitwiseNot(Mask) || !checkOneUse(Mask))
+ if (Mask.getOpcode() != ISD::XOR || !checkOneUse(Mask))
return false;
- // Match `-1 << nbits`. Must only have one use!
- SDValue M0 = Mask->getOperand(0);
+ // The -1 only has to be all-ones for the final Node's NVT.
+ if (!isAllOnes(Mask->getOperand(1)))
+ return false;
+ // Match `-1 << nbits`. Might be truncated. Must only have one use!
+ SDValue M0 = peekThroughOneUseTruncation(Mask->getOperand(0));
if (M0->getOpcode() != ISD::SHL || !checkOneUse(M0))
return false;
- if (!isAllOnesConstant(M0->getOperand(0)))
+ // The -1 only has to be all-ones for the final Node's NVT.
+ if (!isAllOnes(M0->getOperand(0)))
return false;
NBits = M0->getOperand(1);
return true;
};
// Match potentially-truncated (bitwidth - y)
- auto matchShiftAmt = [checkOneUse, Size, &NBits](SDValue ShiftAmt) {
+ auto matchShiftAmt = [checkOneUse, &NBits](SDValue ShiftAmt,
+ unsigned Bitwidth) {
// Skip over a truncate of the shift amount.
if (ShiftAmt.getOpcode() == ISD::TRUNCATE) {
ShiftAmt = ShiftAmt.getOperand(0);
@@ -2881,52 +3224,56 @@ bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) {
if (ShiftAmt.getOpcode() != ISD::SUB)
return false;
auto V0 = dyn_cast<ConstantSDNode>(ShiftAmt.getOperand(0));
- if (!V0 || V0->getZExtValue() != Size)
+ if (!V0 || V0->getZExtValue() != Bitwidth)
return false;
NBits = ShiftAmt.getOperand(1);
return true;
};
// c) x & (-1 >> (32 - y))
- auto matchPatternC = [&checkOneUse, matchShiftAmt](SDValue Mask) -> bool {
+ auto matchPatternC = [checkOneUse, peekThroughOneUseTruncation,
+ matchShiftAmt](SDValue Mask) -> bool {
+ // The mask itself may be truncated.
+ Mask = peekThroughOneUseTruncation(Mask);
+ unsigned Bitwidth = Mask.getSimpleValueType().getSizeInBits();
// Match `l>>`. Must only have one use!
if (Mask.getOpcode() != ISD::SRL || !checkOneUse(Mask))
return false;
- // We should be shifting all-ones constant.
+ // We should be shifting truly all-ones constant.
if (!isAllOnesConstant(Mask.getOperand(0)))
return false;
SDValue M1 = Mask.getOperand(1);
// The shift amount should not be used externally.
if (!checkOneUse(M1))
return false;
- return matchShiftAmt(M1);
+ return matchShiftAmt(M1, Bitwidth);
};
SDValue X;
// d) x << (32 - y) >> (32 - y)
- auto matchPatternD = [&checkOneUse, &checkTwoUse, matchShiftAmt,
+ auto matchPatternD = [checkOneUse, checkTwoUse, matchShiftAmt,
&X](SDNode *Node) -> bool {
if (Node->getOpcode() != ISD::SRL)
return false;
SDValue N0 = Node->getOperand(0);
if (N0->getOpcode() != ISD::SHL || !checkOneUse(N0))
return false;
+ unsigned Bitwidth = N0.getSimpleValueType().getSizeInBits();
SDValue N1 = Node->getOperand(1);
SDValue N01 = N0->getOperand(1);
// Both of the shifts must be by the exact same value.
// There should not be any uses of the shift amount outside of the pattern.
if (N1 != N01 || !checkTwoUse(N1))
return false;
- if (!matchShiftAmt(N1))
+ if (!matchShiftAmt(N1, Bitwidth))
return false;
X = N0->getOperand(0);
return true;
};
- auto matchLowBitMask = [&matchPatternA, &matchPatternB,
- &matchPatternC](SDValue Mask) -> bool {
- // FIXME: pattern c.
+ auto matchLowBitMask = [matchPatternA, matchPatternB,
+ matchPatternC](SDValue Mask) -> bool {
return matchPatternA(Mask) || matchPatternB(Mask) || matchPatternC(Mask);
};
@@ -2946,42 +3293,46 @@ bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) {
SDLoc DL(Node);
- // If we do *NOT* have BMI2, let's find out if the if the 'X' is *logically*
- // shifted (potentially with one-use trunc inbetween),
- // and if so look past one-use truncation.
- MVT XVT = NVT;
- if (!Subtarget->hasBMI2() && X.getOpcode() == ISD::TRUNCATE &&
- X.hasOneUse() && X.getOperand(0).getOpcode() == ISD::SRL) {
- assert(NVT == MVT::i32 && "Expected target valuetype to be i32");
- X = X.getOperand(0);
- XVT = X.getSimpleValueType();
- assert(XVT == MVT::i64 && "Expected truncation from i64");
- }
+ // Truncate the shift amount.
+ NBits = CurDAG->getNode(ISD::TRUNCATE, DL, MVT::i8, NBits);
+ insertDAGNode(*CurDAG, SDValue(Node, 0), NBits);
- SDValue OrigNBits = NBits;
- if (NBits.getValueType() != XVT) {
- // Truncate the shift amount.
- NBits = CurDAG->getNode(ISD::TRUNCATE, DL, MVT::i8, NBits);
- insertDAGNode(*CurDAG, OrigNBits, NBits);
-
- // Insert 8-bit NBits into lowest 8 bits of XVT-sized (32 or 64-bit)
- // register. All the other bits are undefined, we do not care about them.
- SDValue ImplDef =
- SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, XVT), 0);
- insertDAGNode(*CurDAG, OrigNBits, ImplDef);
- NBits =
- CurDAG->getTargetInsertSubreg(X86::sub_8bit, DL, XVT, ImplDef, NBits);
- insertDAGNode(*CurDAG, OrigNBits, NBits);
- }
+ // Insert 8-bit NBits into lowest 8 bits of 32-bit register.
+ // All the other bits are undefined, we do not care about them.
+ SDValue ImplDef = SDValue(
+ CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i32), 0);
+ insertDAGNode(*CurDAG, SDValue(Node, 0), ImplDef);
+ NBits = CurDAG->getTargetInsertSubreg(X86::sub_8bit, DL, MVT::i32, ImplDef,
+ NBits);
+ insertDAGNode(*CurDAG, SDValue(Node, 0), NBits);
if (Subtarget->hasBMI2()) {
// Great, just emit the the BZHI..
- SDValue Extract = CurDAG->getNode(X86ISD::BZHI, DL, XVT, X, NBits);
+ if (NVT != MVT::i32) {
+ // But have to place the bit count into the wide-enough register first.
+ NBits = CurDAG->getNode(ISD::ANY_EXTEND, DL, NVT, NBits);
+ insertDAGNode(*CurDAG, SDValue(Node, 0), NBits);
+ }
+
+ SDValue Extract = CurDAG->getNode(X86ISD::BZHI, DL, NVT, X, NBits);
ReplaceNode(Node, Extract.getNode());
SelectCode(Extract.getNode());
return true;
}
+ // Else, if we do *NOT* have BMI2, let's find out if the if the 'X' is
+ // *logically* shifted (potentially with one-use trunc inbetween),
+ // and the truncation was the only use of the shift,
+ // and if so look past one-use truncation.
+ {
+ SDValue RealX = peekThroughOneUseTruncation(X);
+ // FIXME: only if the shift is one-use?
+ if (RealX != X && RealX.getOpcode() == ISD::SRL)
+ X = RealX;
+ }
+
+ MVT XVT = X.getSimpleValueType();
+
// Else, emitting BEXTR requires one more step.
// The 'control' of BEXTR has the pattern of:
// [15...8 bit][ 7...0 bit] location
@@ -2991,10 +3342,11 @@ bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) {
// Shift NBits left by 8 bits, thus producing 'control'.
// This makes the low 8 bits to be zero.
SDValue C8 = CurDAG->getConstant(8, DL, MVT::i8);
- SDValue Control = CurDAG->getNode(ISD::SHL, DL, XVT, NBits, C8);
- insertDAGNode(*CurDAG, OrigNBits, Control);
+ SDValue Control = CurDAG->getNode(ISD::SHL, DL, MVT::i32, NBits, C8);
+ insertDAGNode(*CurDAG, SDValue(Node, 0), Control);
// If the 'X' is *logically* shifted, we can fold that shift into 'control'.
+ // FIXME: only if the shift is one-use?
if (X.getOpcode() == ISD::SRL) {
SDValue ShiftAmt = X.getOperand(1);
X = X.getOperand(0);
@@ -3003,13 +3355,20 @@ bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) {
"Expected shift amount to be i8");
// Now, *zero*-extend the shift amount. The bits 8...15 *must* be zero!
+ // We could zext to i16 in some form, but we intentionally don't do that.
SDValue OrigShiftAmt = ShiftAmt;
- ShiftAmt = CurDAG->getNode(ISD::ZERO_EXTEND, DL, XVT, ShiftAmt);
+ ShiftAmt = CurDAG->getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShiftAmt);
insertDAGNode(*CurDAG, OrigShiftAmt, ShiftAmt);
// And now 'or' these low 8 bits of shift amount into the 'control'.
- Control = CurDAG->getNode(ISD::OR, DL, XVT, Control, ShiftAmt);
- insertDAGNode(*CurDAG, OrigNBits, Control);
+ Control = CurDAG->getNode(ISD::OR, DL, MVT::i32, Control, ShiftAmt);
+ insertDAGNode(*CurDAG, SDValue(Node, 0), Control);
+ }
+
+ // But have to place the 'control' into the wide-enough register first.
+ if (XVT != MVT::i32) {
+ Control = CurDAG->getNode(ISD::ANY_EXTEND, DL, XVT, Control);
+ insertDAGNode(*CurDAG, SDValue(Node, 0), Control);
}
// And finally, form the BEXTR itself.
@@ -3017,7 +3376,7 @@ bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) {
// The 'X' was originally truncated. Do that now.
if (XVT != NVT) {
- insertDAGNode(*CurDAG, OrigNBits, Extract);
+ insertDAGNode(*CurDAG, SDValue(Node, 0), Extract);
Extract = CurDAG->getNode(ISD::TRUNCATE, DL, NVT, Extract);
}
@@ -3098,14 +3457,14 @@ MachineSDNode *X86DAGToDAGISel::matchBEXTRFromAndImm(SDNode *Node) {
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
if (tryFoldLoad(Node, N0.getNode(), Input, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, New, Input.getOperand(0) };
- SDVTList VTs = CurDAG->getVTList(NVT, MVT::Other);
+ SDVTList VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other);
NewNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
// Update the chain.
- ReplaceUses(Input.getValue(1), SDValue(NewNode, 1));
+ ReplaceUses(Input.getValue(1), SDValue(NewNode, 2));
// Record the mem-refs
CurDAG->setNodeMemRefs(NewNode, {cast<LoadSDNode>(Input)->getMemOperand()});
} else {
- NewNode = CurDAG->getMachineNode(ROpc, dl, NVT, Input, New);
+ NewNode = CurDAG->getMachineNode(ROpc, dl, NVT, MVT::i32, Input, New);
}
return NewNode;
@@ -3263,6 +3622,119 @@ bool X86DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
return true;
}
+bool X86DAGToDAGISel::tryShrinkShlLogicImm(SDNode *N) {
+ MVT NVT = N->getSimpleValueType(0);
+ unsigned Opcode = N->getOpcode();
+ SDLoc dl(N);
+
+ // For operations of the form (x << C1) op C2, check if we can use a smaller
+ // encoding for C2 by transforming it into (x op (C2>>C1)) << C1.
+ SDValue Shift = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N1);
+ if (!Cst)
+ return false;
+
+ int64_t Val = Cst->getSExtValue();
+
+ // If we have an any_extend feeding the AND, look through it to see if there
+ // is a shift behind it. But only if the AND doesn't use the extended bits.
+ // FIXME: Generalize this to other ANY_EXTEND than i32 to i64?
+ bool FoundAnyExtend = false;
+ if (Shift.getOpcode() == ISD::ANY_EXTEND && Shift.hasOneUse() &&
+ Shift.getOperand(0).getSimpleValueType() == MVT::i32 &&
+ isUInt<32>(Val)) {
+ FoundAnyExtend = true;
+ Shift = Shift.getOperand(0);
+ }
+
+ if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse())
+ return false;
+
+ // i8 is unshrinkable, i16 should be promoted to i32.
+ if (NVT != MVT::i32 && NVT != MVT::i64)
+ return false;
+
+ ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
+ if (!ShlCst)
+ return false;
+
+ uint64_t ShAmt = ShlCst->getZExtValue();
+
+ // Make sure that we don't change the operation by removing bits.
+ // This only matters for OR and XOR, AND is unaffected.
+ uint64_t RemovedBitsMask = (1ULL << ShAmt) - 1;
+ if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
+ return false;
+
+ // Check the minimum bitwidth for the new constant.
+ // TODO: Using 16 and 8 bit operations is also possible for or32 & xor32.
+ auto CanShrinkImmediate = [&](int64_t &ShiftedVal) {
+ if (Opcode == ISD::AND) {
+ // AND32ri is the same as AND64ri32 with zext imm.
+ // Try this before sign extended immediates below.
+ ShiftedVal = (uint64_t)Val >> ShAmt;
+ if (NVT == MVT::i64 && !isUInt<32>(Val) && isUInt<32>(ShiftedVal))
+ return true;
+ // Also swap order when the AND can become MOVZX.
+ if (ShiftedVal == UINT8_MAX || ShiftedVal == UINT16_MAX)
+ return true;
+ }
+ ShiftedVal = Val >> ShAmt;
+ if ((!isInt<8>(Val) && isInt<8>(ShiftedVal)) ||
+ (!isInt<32>(Val) && isInt<32>(ShiftedVal)))
+ return true;
+ if (Opcode != ISD::AND) {
+ // MOV32ri+OR64r/XOR64r is cheaper than MOV64ri64+OR64rr/XOR64rr
+ ShiftedVal = (uint64_t)Val >> ShAmt;
+ if (NVT == MVT::i64 && !isUInt<32>(Val) && isUInt<32>(ShiftedVal))
+ return true;
+ }
+ return false;
+ };
+
+ int64_t ShiftedVal;
+ if (!CanShrinkImmediate(ShiftedVal))
+ return false;
+
+ // Ok, we can reorder to get a smaller immediate.
+
+ // But, its possible the original immediate allowed an AND to become MOVZX.
+ // Doing this late due to avoid the MakedValueIsZero call as late as
+ // possible.
+ if (Opcode == ISD::AND) {
+ // Find the smallest zext this could possibly be.
+ unsigned ZExtWidth = Cst->getAPIntValue().getActiveBits();
+ ZExtWidth = PowerOf2Ceil(std::max(ZExtWidth, 8U));
+
+ // Figure out which bits need to be zero to achieve that mask.
+ APInt NeededMask = APInt::getLowBitsSet(NVT.getSizeInBits(),
+ ZExtWidth);
+ NeededMask &= ~Cst->getAPIntValue();
+
+ if (CurDAG->MaskedValueIsZero(N->getOperand(0), NeededMask))
+ return false;
+ }
+
+ SDValue X = Shift.getOperand(0);
+ if (FoundAnyExtend) {
+ SDValue NewX = CurDAG->getNode(ISD::ANY_EXTEND, dl, NVT, X);
+ insertDAGNode(*CurDAG, SDValue(N, 0), NewX);
+ X = NewX;
+ }
+
+ SDValue NewCst = CurDAG->getConstant(ShiftedVal, dl, NVT);
+ insertDAGNode(*CurDAG, SDValue(N, 0), NewCst);
+ SDValue NewBinOp = CurDAG->getNode(Opcode, dl, NVT, X, NewCst);
+ insertDAGNode(*CurDAG, SDValue(N, 0), NewBinOp);
+ SDValue NewSHL = CurDAG->getNode(ISD::SHL, dl, NVT, NewBinOp,
+ Shift.getOperand(1));
+ ReplaceNode(N, NewSHL.getNode());
+ SelectCode(NewSHL.getNode());
+ return true;
+}
+
/// If the high bits of an 'and' operand are known zero, try setting the
/// high bits of an 'and' constant operand to produce a smaller encoding by
/// creating a small, sign-extended negative immediate rather than a large
@@ -3333,6 +3805,347 @@ bool X86DAGToDAGISel::shrinkAndImmediate(SDNode *And) {
return true;
}
+static unsigned getVPTESTMOpc(MVT TestVT, bool IsTestN, bool FoldedLoad,
+ bool FoldedBCast, bool Masked) {
+ if (Masked) {
+ if (FoldedLoad) {
+ switch (TestVT.SimpleTy) {
+ default: llvm_unreachable("Unexpected VT!");
+ case MVT::v16i8:
+ return IsTestN ? X86::VPTESTNMBZ128rmk : X86::VPTESTMBZ128rmk;
+ case MVT::v8i16:
+ return IsTestN ? X86::VPTESTNMWZ128rmk : X86::VPTESTMWZ128rmk;
+ case MVT::v4i32:
+ return IsTestN ? X86::VPTESTNMDZ128rmk : X86::VPTESTMDZ128rmk;
+ case MVT::v2i64:
+ return IsTestN ? X86::VPTESTNMQZ128rmk : X86::VPTESTMQZ128rmk;
+ case MVT::v32i8:
+ return IsTestN ? X86::VPTESTNMBZ256rmk : X86::VPTESTMBZ256rmk;
+ case MVT::v16i16:
+ return IsTestN ? X86::VPTESTNMWZ256rmk : X86::VPTESTMWZ256rmk;
+ case MVT::v8i32:
+ return IsTestN ? X86::VPTESTNMDZ256rmk : X86::VPTESTMDZ256rmk;
+ case MVT::v4i64:
+ return IsTestN ? X86::VPTESTNMQZ256rmk : X86::VPTESTMQZ256rmk;
+ case MVT::v64i8:
+ return IsTestN ? X86::VPTESTNMBZrmk : X86::VPTESTMBZrmk;
+ case MVT::v32i16:
+ return IsTestN ? X86::VPTESTNMWZrmk : X86::VPTESTMWZrmk;
+ case MVT::v16i32:
+ return IsTestN ? X86::VPTESTNMDZrmk : X86::VPTESTMDZrmk;
+ case MVT::v8i64:
+ return IsTestN ? X86::VPTESTNMQZrmk : X86::VPTESTMQZrmk;
+ }
+ }
+
+ if (FoldedBCast) {
+ switch (TestVT.SimpleTy) {
+ default: llvm_unreachable("Unexpected VT!");
+ case MVT::v4i32:
+ return IsTestN ? X86::VPTESTNMDZ128rmbk : X86::VPTESTMDZ128rmbk;
+ case MVT::v2i64:
+ return IsTestN ? X86::VPTESTNMQZ128rmbk : X86::VPTESTMQZ128rmbk;
+ case MVT::v8i32:
+ return IsTestN ? X86::VPTESTNMDZ256rmbk : X86::VPTESTMDZ256rmbk;
+ case MVT::v4i64:
+ return IsTestN ? X86::VPTESTNMQZ256rmbk : X86::VPTESTMQZ256rmbk;
+ case MVT::v16i32:
+ return IsTestN ? X86::VPTESTNMDZrmbk : X86::VPTESTMDZrmbk;
+ case MVT::v8i64:
+ return IsTestN ? X86::VPTESTNMQZrmbk : X86::VPTESTMQZrmbk;
+ }
+ }
+
+ switch (TestVT.SimpleTy) {
+ default: llvm_unreachable("Unexpected VT!");
+ case MVT::v16i8:
+ return IsTestN ? X86::VPTESTNMBZ128rrk : X86::VPTESTMBZ128rrk;
+ case MVT::v8i16:
+ return IsTestN ? X86::VPTESTNMWZ128rrk : X86::VPTESTMWZ128rrk;
+ case MVT::v4i32:
+ return IsTestN ? X86::VPTESTNMDZ128rrk : X86::VPTESTMDZ128rrk;
+ case MVT::v2i64:
+ return IsTestN ? X86::VPTESTNMQZ128rrk : X86::VPTESTMQZ128rrk;
+ case MVT::v32i8:
+ return IsTestN ? X86::VPTESTNMBZ256rrk : X86::VPTESTMBZ256rrk;
+ case MVT::v16i16:
+ return IsTestN ? X86::VPTESTNMWZ256rrk : X86::VPTESTMWZ256rrk;
+ case MVT::v8i32:
+ return IsTestN ? X86::VPTESTNMDZ256rrk : X86::VPTESTMDZ256rrk;
+ case MVT::v4i64:
+ return IsTestN ? X86::VPTESTNMQZ256rrk : X86::VPTESTMQZ256rrk;
+ case MVT::v64i8:
+ return IsTestN ? X86::VPTESTNMBZrrk : X86::VPTESTMBZrrk;
+ case MVT::v32i16:
+ return IsTestN ? X86::VPTESTNMWZrrk : X86::VPTESTMWZrrk;
+ case MVT::v16i32:
+ return IsTestN ? X86::VPTESTNMDZrrk : X86::VPTESTMDZrrk;
+ case MVT::v8i64:
+ return IsTestN ? X86::VPTESTNMQZrrk : X86::VPTESTMQZrrk;
+ }
+ }
+
+ if (FoldedLoad) {
+ switch (TestVT.SimpleTy) {
+ default: llvm_unreachable("Unexpected VT!");
+ case MVT::v16i8:
+ return IsTestN ? X86::VPTESTNMBZ128rm : X86::VPTESTMBZ128rm;
+ case MVT::v8i16:
+ return IsTestN ? X86::VPTESTNMWZ128rm : X86::VPTESTMWZ128rm;
+ case MVT::v4i32:
+ return IsTestN ? X86::VPTESTNMDZ128rm : X86::VPTESTMDZ128rm;
+ case MVT::v2i64:
+ return IsTestN ? X86::VPTESTNMQZ128rm : X86::VPTESTMQZ128rm;
+ case MVT::v32i8:
+ return IsTestN ? X86::VPTESTNMBZ256rm : X86::VPTESTMBZ256rm;
+ case MVT::v16i16:
+ return IsTestN ? X86::VPTESTNMWZ256rm : X86::VPTESTMWZ256rm;
+ case MVT::v8i32:
+ return IsTestN ? X86::VPTESTNMDZ256rm : X86::VPTESTMDZ256rm;
+ case MVT::v4i64:
+ return IsTestN ? X86::VPTESTNMQZ256rm : X86::VPTESTMQZ256rm;
+ case MVT::v64i8:
+ return IsTestN ? X86::VPTESTNMBZrm : X86::VPTESTMBZrm;
+ case MVT::v32i16:
+ return IsTestN ? X86::VPTESTNMWZrm : X86::VPTESTMWZrm;
+ case MVT::v16i32:
+ return IsTestN ? X86::VPTESTNMDZrm : X86::VPTESTMDZrm;
+ case MVT::v8i64:
+ return IsTestN ? X86::VPTESTNMQZrm : X86::VPTESTMQZrm;
+ }
+ }
+
+ if (FoldedBCast) {
+ switch (TestVT.SimpleTy) {
+ default: llvm_unreachable("Unexpected VT!");
+ case MVT::v4i32:
+ return IsTestN ? X86::VPTESTNMDZ128rmb : X86::VPTESTMDZ128rmb;
+ case MVT::v2i64:
+ return IsTestN ? X86::VPTESTNMQZ128rmb : X86::VPTESTMQZ128rmb;
+ case MVT::v8i32:
+ return IsTestN ? X86::VPTESTNMDZ256rmb : X86::VPTESTMDZ256rmb;
+ case MVT::v4i64:
+ return IsTestN ? X86::VPTESTNMQZ256rmb : X86::VPTESTMQZ256rmb;
+ case MVT::v16i32:
+ return IsTestN ? X86::VPTESTNMDZrmb : X86::VPTESTMDZrmb;
+ case MVT::v8i64:
+ return IsTestN ? X86::VPTESTNMQZrmb : X86::VPTESTMQZrmb;
+ }
+ }
+
+ switch (TestVT.SimpleTy) {
+ default: llvm_unreachable("Unexpected VT!");
+ case MVT::v16i8:
+ return IsTestN ? X86::VPTESTNMBZ128rr : X86::VPTESTMBZ128rr;
+ case MVT::v8i16:
+ return IsTestN ? X86::VPTESTNMWZ128rr : X86::VPTESTMWZ128rr;
+ case MVT::v4i32:
+ return IsTestN ? X86::VPTESTNMDZ128rr : X86::VPTESTMDZ128rr;
+ case MVT::v2i64:
+ return IsTestN ? X86::VPTESTNMQZ128rr : X86::VPTESTMQZ128rr;
+ case MVT::v32i8:
+ return IsTestN ? X86::VPTESTNMBZ256rr : X86::VPTESTMBZ256rr;
+ case MVT::v16i16:
+ return IsTestN ? X86::VPTESTNMWZ256rr : X86::VPTESTMWZ256rr;
+ case MVT::v8i32:
+ return IsTestN ? X86::VPTESTNMDZ256rr : X86::VPTESTMDZ256rr;
+ case MVT::v4i64:
+ return IsTestN ? X86::VPTESTNMQZ256rr : X86::VPTESTMQZ256rr;
+ case MVT::v64i8:
+ return IsTestN ? X86::VPTESTNMBZrr : X86::VPTESTMBZrr;
+ case MVT::v32i16:
+ return IsTestN ? X86::VPTESTNMWZrr : X86::VPTESTMWZrr;
+ case MVT::v16i32:
+ return IsTestN ? X86::VPTESTNMDZrr : X86::VPTESTMDZrr;
+ case MVT::v8i64:
+ return IsTestN ? X86::VPTESTNMQZrr : X86::VPTESTMQZrr;
+ }
+}
+
+// Try to create VPTESTM instruction. If InMask is not null, it will be used
+// to form a masked operation.
+bool X86DAGToDAGISel::tryVPTESTM(SDNode *Root, SDValue Setcc,
+ SDValue InMask) {
+ assert(Subtarget->hasAVX512() && "Expected AVX512!");
+ assert(Setcc.getSimpleValueType().getVectorElementType() == MVT::i1 &&
+ "Unexpected VT!");
+
+ // Look for equal and not equal compares.
+ ISD::CondCode CC = cast<CondCodeSDNode>(Setcc.getOperand(2))->get();
+ if (CC != ISD::SETEQ && CC != ISD::SETNE)
+ return false;
+
+ // See if we're comparing against zero. This should have been canonicalized
+ // to RHS during lowering.
+ if (!ISD::isBuildVectorAllZeros(Setcc.getOperand(1).getNode()))
+ return false;
+
+ SDValue N0 = Setcc.getOperand(0);
+
+ MVT CmpVT = N0.getSimpleValueType();
+ MVT CmpSVT = CmpVT.getVectorElementType();
+
+ // Start with both operands the same. We'll try to refine this.
+ SDValue Src0 = N0;
+ SDValue Src1 = N0;
+
+ {
+ // Look through single use bitcasts.
+ SDValue N0Temp = N0;
+ if (N0Temp.getOpcode() == ISD::BITCAST && N0Temp.hasOneUse())
+ N0Temp = N0.getOperand(0);
+
+ // Look for single use AND.
+ if (N0Temp.getOpcode() == ISD::AND && N0Temp.hasOneUse()) {
+ Src0 = N0Temp.getOperand(0);
+ Src1 = N0Temp.getOperand(1);
+ }
+ }
+
+ // Without VLX we need to widen the load.
+ bool Widen = !Subtarget->hasVLX() && !CmpVT.is512BitVector();
+
+ // We can only fold loads if the sources are unique.
+ bool CanFoldLoads = Src0 != Src1;
+
+ // Try to fold loads unless we need to widen.
+ bool FoldedLoad = false;
+ SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Load;
+ if (!Widen && CanFoldLoads) {
+ Load = Src1;
+ FoldedLoad = tryFoldLoad(Root, N0.getNode(), Load, Tmp0, Tmp1, Tmp2, Tmp3,
+ Tmp4);
+ if (!FoldedLoad) {
+ // And is computative.
+ Load = Src0;
+ FoldedLoad = tryFoldLoad(Root, N0.getNode(), Load, Tmp0, Tmp1, Tmp2,
+ Tmp3, Tmp4);
+ if (FoldedLoad)
+ std::swap(Src0, Src1);
+ }
+ }
+
+ auto findBroadcastedOp = [](SDValue Src, MVT CmpSVT, SDNode *&Parent) {
+ // Look through single use bitcasts.
+ if (Src.getOpcode() == ISD::BITCAST && Src.hasOneUse())
+ Src = Src.getOperand(0);
+
+ if (Src.getOpcode() == X86ISD::VBROADCAST && Src.hasOneUse()) {
+ Parent = Src.getNode();
+ Src = Src.getOperand(0);
+ if (Src.getSimpleValueType() == CmpSVT)
+ return Src;
+ }
+
+ return SDValue();
+ };
+
+ // If we didn't fold a load, try to match broadcast. No widening limitation
+ // for this. But only 32 and 64 bit types are supported.
+ bool FoldedBCast = false;
+ if (!FoldedLoad && CanFoldLoads &&
+ (CmpSVT == MVT::i32 || CmpSVT == MVT::i64)) {
+ SDNode *ParentNode = nullptr;
+ if ((Load = findBroadcastedOp(Src1, CmpSVT, ParentNode))) {
+ FoldedBCast = tryFoldLoad(Root, ParentNode, Load, Tmp0,
+ Tmp1, Tmp2, Tmp3, Tmp4);
+ }
+
+ // Try the other operand.
+ if (!FoldedBCast) {
+ if ((Load = findBroadcastedOp(Src0, CmpSVT, ParentNode))) {
+ FoldedBCast = tryFoldLoad(Root, ParentNode, Load, Tmp0,
+ Tmp1, Tmp2, Tmp3, Tmp4);
+ if (FoldedBCast)
+ std::swap(Src0, Src1);
+ }
+ }
+ }
+
+ auto getMaskRC = [](MVT MaskVT) {
+ switch (MaskVT.SimpleTy) {
+ default: llvm_unreachable("Unexpected VT!");
+ case MVT::v2i1: return X86::VK2RegClassID;
+ case MVT::v4i1: return X86::VK4RegClassID;
+ case MVT::v8i1: return X86::VK8RegClassID;
+ case MVT::v16i1: return X86::VK16RegClassID;
+ case MVT::v32i1: return X86::VK32RegClassID;
+ case MVT::v64i1: return X86::VK64RegClassID;
+ }
+ };
+
+ bool IsMasked = InMask.getNode() != nullptr;
+
+ SDLoc dl(Root);
+
+ MVT ResVT = Setcc.getSimpleValueType();
+ MVT MaskVT = ResVT;
+ if (Widen) {
+ // Widen the inputs using insert_subreg or copy_to_regclass.
+ unsigned Scale = CmpVT.is128BitVector() ? 4 : 2;
+ unsigned SubReg = CmpVT.is128BitVector() ? X86::sub_xmm : X86::sub_ymm;
+ unsigned NumElts = CmpVT.getVectorNumElements() * Scale;
+ CmpVT = MVT::getVectorVT(CmpSVT, NumElts);
+ MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
+ SDValue ImplDef = SDValue(CurDAG->getMachineNode(X86::IMPLICIT_DEF, dl,
+ CmpVT), 0);
+ Src0 = CurDAG->getTargetInsertSubreg(SubReg, dl, CmpVT, ImplDef, Src0);
+
+ assert(!FoldedLoad && "Shouldn't have folded the load");
+ if (!FoldedBCast)
+ Src1 = CurDAG->getTargetInsertSubreg(SubReg, dl, CmpVT, ImplDef, Src1);
+
+ if (IsMasked) {
+ // Widen the mask.
+ unsigned RegClass = getMaskRC(MaskVT);
+ SDValue RC = CurDAG->getTargetConstant(RegClass, dl, MVT::i32);
+ InMask = SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
+ dl, MaskVT, InMask, RC), 0);
+ }
+ }
+
+ bool IsTestN = CC == ISD::SETEQ;
+ unsigned Opc = getVPTESTMOpc(CmpVT, IsTestN, FoldedLoad, FoldedBCast,
+ IsMasked);
+
+ MachineSDNode *CNode;
+ if (FoldedLoad || FoldedBCast) {
+ SDVTList VTs = CurDAG->getVTList(MaskVT, MVT::Other);
+
+ if (IsMasked) {
+ SDValue Ops[] = { InMask, Src0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4,
+ Load.getOperand(0) };
+ CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
+ } else {
+ SDValue Ops[] = { Src0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4,
+ Load.getOperand(0) };
+ CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
+ }
+
+ // Update the chain.
+ ReplaceUses(Load.getValue(1), SDValue(CNode, 1));
+ // Record the mem-refs
+ CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(Load)->getMemOperand()});
+ } else {
+ if (IsMasked)
+ CNode = CurDAG->getMachineNode(Opc, dl, MaskVT, InMask, Src0, Src1);
+ else
+ CNode = CurDAG->getMachineNode(Opc, dl, MaskVT, Src0, Src1);
+ }
+
+ // If we widened, we need to shrink the mask VT.
+ if (Widen) {
+ unsigned RegClass = getMaskRC(ResVT);
+ SDValue RC = CurDAG->getTargetConstant(RegClass, dl, MVT::i32);
+ CNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
+ dl, ResVT, SDValue(CNode, 0), RC);
+ }
+
+ ReplaceUses(SDValue(Root, 0), SDValue(CNode, 0));
+ CurDAG->RemoveDeadNode(Root);
+ return true;
+}
+
void X86DAGToDAGISel::Select(SDNode *Node) {
MVT NVT = Node->getSimpleValueType(0);
unsigned Opcode = Node->getOpcode();
@@ -3346,6 +4159,61 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
switch (Opcode) {
default: break;
+ case ISD::INTRINSIC_VOID: {
+ unsigned IntNo = Node->getConstantOperandVal(1);
+ switch (IntNo) {
+ default: break;
+ case Intrinsic::x86_sse3_monitor:
+ case Intrinsic::x86_monitorx:
+ case Intrinsic::x86_clzero: {
+ bool Use64BitPtr = Node->getOperand(2).getValueType() == MVT::i64;
+
+ unsigned Opc = 0;
+ switch (IntNo) {
+ case Intrinsic::x86_sse3_monitor:
+ if (!Subtarget->hasSSE3())
+ break;
+ Opc = Use64BitPtr ? X86::MONITOR64rrr : X86::MONITOR32rrr;
+ break;
+ case Intrinsic::x86_monitorx:
+ if (!Subtarget->hasMWAITX())
+ break;
+ Opc = Use64BitPtr ? X86::MONITORX64rrr : X86::MONITORX32rrr;
+ break;
+ case Intrinsic::x86_clzero:
+ if (!Subtarget->hasCLZERO())
+ break;
+ Opc = Use64BitPtr ? X86::CLZERO64r : X86::CLZERO32r;
+ break;
+ }
+
+ if (Opc) {
+ unsigned PtrReg = Use64BitPtr ? X86::RAX : X86::EAX;
+ SDValue Chain = CurDAG->getCopyToReg(Node->getOperand(0), dl, PtrReg,
+ Node->getOperand(2), SDValue());
+ SDValue InFlag = Chain.getValue(1);
+
+ if (IntNo == Intrinsic::x86_sse3_monitor ||
+ IntNo == Intrinsic::x86_monitorx) {
+ // Copy the other two operands to ECX and EDX.
+ Chain = CurDAG->getCopyToReg(Chain, dl, X86::ECX, Node->getOperand(3),
+ InFlag);
+ InFlag = Chain.getValue(1);
+ Chain = CurDAG->getCopyToReg(Chain, dl, X86::EDX, Node->getOperand(4),
+ InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ MachineSDNode *CNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
+ { Chain, InFlag});
+ ReplaceNode(Node, CNode);
+ return;
+ }
+ }
+ }
+
+ break;
+ }
case ISD::BRIND: {
if (Subtarget->isTargetNaCl())
// NaCl has its own pass where jmp %r32 are converted to jmp %r64. We
@@ -3381,13 +4249,17 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
}
break;
- case X86ISD::BLENDV: {
- // BLENDV selects like a regular VSELECT.
- SDValue VSelect = CurDAG->getNode(
- ISD::VSELECT, SDLoc(Node), Node->getValueType(0), Node->getOperand(0),
+ case ISD::VSELECT: {
+ // Replace VSELECT with non-mask conditions with with BLENDV.
+ if (Node->getOperand(0).getValueType().getVectorElementType() == MVT::i1)
+ break;
+
+ assert(Subtarget->hasSSE41() && "Expected SSE4.1 support!");
+ SDValue Blendv = CurDAG->getNode(
+ X86ISD::BLENDV, SDLoc(Node), Node->getValueType(0), Node->getOperand(0),
Node->getOperand(1), Node->getOperand(2));
- ReplaceNode(Node, VSelect.getNode());
- SelectCode(VSelect.getNode());
+ ReplaceNode(Node, Blendv.getNode());
+ SelectCode(Blendv.getNode());
// We already called ReplaceUses.
return;
}
@@ -3403,6 +4275,18 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
break;
case ISD::AND:
+ if (NVT.isVector() && NVT.getVectorElementType() == MVT::i1) {
+ // Try to form a masked VPTESTM. Operands can be in either order.
+ SDValue N0 = Node->getOperand(0);
+ SDValue N1 = Node->getOperand(1);
+ if (N0.getOpcode() == ISD::SETCC && N0.hasOneUse() &&
+ tryVPTESTM(Node, N0, N1))
+ return;
+ if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse() &&
+ tryVPTESTM(Node, N1, N0))
+ return;
+ }
+
if (MachineSDNode *NewNode = matchBEXTRFromAndImm(Node)) {
ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0));
CurDAG->RemoveDeadNode(Node);
@@ -3415,89 +4299,113 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
LLVM_FALLTHROUGH;
case ISD::OR:
- case ISD::XOR: {
-
- // For operations of the form (x << C1) op C2, check if we can use a smaller
- // encoding for C2 by transforming it into (x op (C2>>C1)) << C1.
- SDValue N0 = Node->getOperand(0);
- SDValue N1 = Node->getOperand(1);
+ case ISD::XOR:
+ if (tryShrinkShlLogicImm(Node))
+ return;
- if (N0->getOpcode() != ISD::SHL || !N0->hasOneUse())
+ LLVM_FALLTHROUGH;
+ case ISD::ADD:
+ case ISD::SUB: {
+ // Try to avoid folding immediates with multiple uses for optsize.
+ // This code tries to select to register form directly to avoid going
+ // through the isel table which might fold the immediate. We can't change
+ // the patterns on the add/sub/and/or/xor with immediate paterns in the
+ // tablegen files to check immediate use count without making the patterns
+ // unavailable to the fast-isel table.
+ if (!OptForSize)
break;
- // i8 is unshrinkable, i16 should be promoted to i32.
- if (NVT != MVT::i32 && NVT != MVT::i64)
+ // Only handle i8/i16/i32/i64.
+ if (NVT != MVT::i8 && NVT != MVT::i16 && NVT != MVT::i32 && NVT != MVT::i64)
break;
+ SDValue N0 = Node->getOperand(0);
+ SDValue N1 = Node->getOperand(1);
+
ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N1);
- ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(N0->getOperand(1));
- if (!Cst || !ShlCst)
+ if (!Cst)
break;
int64_t Val = Cst->getSExtValue();
- uint64_t ShlVal = ShlCst->getZExtValue();
- // Make sure that we don't change the operation by removing bits.
- // This only matters for OR and XOR, AND is unaffected.
- uint64_t RemovedBitsMask = (1ULL << ShlVal) - 1;
- if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
+ // Make sure its an immediate that is considered foldable.
+ // FIXME: Handle unsigned 32 bit immediates for 64-bit AND.
+ if (!isInt<8>(Val) && !isInt<32>(Val))
break;
- unsigned ShlOp, AddOp, Op;
- MVT CstVT = NVT;
-
- // Check the minimum bitwidth for the new constant.
- // TODO: AND32ri is the same as AND64ri32 with zext imm.
- // TODO: MOV32ri+OR64r is cheaper than MOV64ri64+OR64rr
- // TODO: Using 16 and 8 bit operations is also possible for or32 & xor32.
- if (!isInt<8>(Val) && isInt<8>(Val >> ShlVal))
- CstVT = MVT::i8;
- else if (!isInt<32>(Val) && isInt<32>(Val >> ShlVal))
- CstVT = MVT::i32;
-
- // Bail if there is no smaller encoding.
- if (NVT == CstVT)
+ // Check if we should avoid folding this immediate.
+ if (!shouldAvoidImmediateInstFormsForSize(N1.getNode()))
break;
+ // We should not fold the immediate. So we need a register form instead.
+ unsigned ROpc, MOpc;
switch (NVT.SimpleTy) {
- default: llvm_unreachable("Unsupported VT!");
+ default: llvm_unreachable("Unexpected VT!");
+ case MVT::i8:
+ switch (Opcode) {
+ default: llvm_unreachable("Unexpected opcode!");
+ case ISD::ADD: ROpc = X86::ADD8rr; MOpc = X86::ADD8rm; break;
+ case ISD::SUB: ROpc = X86::SUB8rr; MOpc = X86::SUB8rm; break;
+ case ISD::AND: ROpc = X86::AND8rr; MOpc = X86::AND8rm; break;
+ case ISD::OR: ROpc = X86::OR8rr; MOpc = X86::OR8rm; break;
+ case ISD::XOR: ROpc = X86::XOR8rr; MOpc = X86::XOR8rm; break;
+ }
+ break;
+ case MVT::i16:
+ switch (Opcode) {
+ default: llvm_unreachable("Unexpected opcode!");
+ case ISD::ADD: ROpc = X86::ADD16rr; MOpc = X86::ADD16rm; break;
+ case ISD::SUB: ROpc = X86::SUB16rr; MOpc = X86::SUB16rm; break;
+ case ISD::AND: ROpc = X86::AND16rr; MOpc = X86::AND16rm; break;
+ case ISD::OR: ROpc = X86::OR16rr; MOpc = X86::OR16rm; break;
+ case ISD::XOR: ROpc = X86::XOR16rr; MOpc = X86::XOR16rm; break;
+ }
+ break;
case MVT::i32:
- assert(CstVT == MVT::i8);
- ShlOp = X86::SHL32ri;
- AddOp = X86::ADD32rr;
-
switch (Opcode) {
- default: llvm_unreachable("Impossible opcode");
- case ISD::AND: Op = X86::AND32ri8; break;
- case ISD::OR: Op = X86::OR32ri8; break;
- case ISD::XOR: Op = X86::XOR32ri8; break;
+ default: llvm_unreachable("Unexpected opcode!");
+ case ISD::ADD: ROpc = X86::ADD32rr; MOpc = X86::ADD32rm; break;
+ case ISD::SUB: ROpc = X86::SUB32rr; MOpc = X86::SUB32rm; break;
+ case ISD::AND: ROpc = X86::AND32rr; MOpc = X86::AND32rm; break;
+ case ISD::OR: ROpc = X86::OR32rr; MOpc = X86::OR32rm; break;
+ case ISD::XOR: ROpc = X86::XOR32rr; MOpc = X86::XOR32rm; break;
}
break;
case MVT::i64:
- assert(CstVT == MVT::i8 || CstVT == MVT::i32);
- ShlOp = X86::SHL64ri;
- AddOp = X86::ADD64rr;
-
switch (Opcode) {
- default: llvm_unreachable("Impossible opcode");
- case ISD::AND: Op = CstVT==MVT::i8? X86::AND64ri8 : X86::AND64ri32; break;
- case ISD::OR: Op = CstVT==MVT::i8? X86::OR64ri8 : X86::OR64ri32; break;
- case ISD::XOR: Op = CstVT==MVT::i8? X86::XOR64ri8 : X86::XOR64ri32; break;
+ default: llvm_unreachable("Unexpected opcode!");
+ case ISD::ADD: ROpc = X86::ADD64rr; MOpc = X86::ADD64rm; break;
+ case ISD::SUB: ROpc = X86::SUB64rr; MOpc = X86::SUB64rm; break;
+ case ISD::AND: ROpc = X86::AND64rr; MOpc = X86::AND64rm; break;
+ case ISD::OR: ROpc = X86::OR64rr; MOpc = X86::OR64rm; break;
+ case ISD::XOR: ROpc = X86::XOR64rr; MOpc = X86::XOR64rm; break;
}
break;
}
- // Emit the smaller op and the shift.
- SDValue NewCst = CurDAG->getTargetConstant(Val >> ShlVal, dl, CstVT);
- SDNode *New = CurDAG->getMachineNode(Op, dl, NVT, N0->getOperand(0),NewCst);
- if (ShlVal == 1)
- CurDAG->SelectNodeTo(Node, AddOp, NVT, SDValue(New, 0),
- SDValue(New, 0));
- else
- CurDAG->SelectNodeTo(Node, ShlOp, NVT, SDValue(New, 0),
- getI8Imm(ShlVal, dl));
+ // Ok this is a AND/OR/XOR/ADD/SUB with constant.
+
+ // If this is a not a subtract, we can still try to fold a load.
+ if (Opcode != ISD::SUB) {
+ SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
+ if (tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
+ SDValue Ops[] = { N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
+ SDVTList VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other);
+ MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
+ // Update the chain.
+ ReplaceUses(N0.getValue(1), SDValue(CNode, 2));
+ // Record the mem-refs
+ CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N0)->getMemOperand()});
+ ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
+ CurDAG->RemoveDeadNode(Node);
+ return;
+ }
+ }
+
+ CurDAG->SelectNodeTo(Node, ROpc, NVT, MVT::i32, N0, N1);
return;
}
+
case X86ISD::SMUL:
// i16/i32/i64 are handled with isel patterns.
if (NVT != MVT::i8)
@@ -3895,7 +4803,7 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
unsigned TrailingZeros = countTrailingZeros(Mask);
SDValue Imm = CurDAG->getTargetConstant(TrailingZeros, dl, MVT::i64);
SDValue Shift =
- SDValue(CurDAG->getMachineNode(X86::SHR64ri, dl, MVT::i64,
+ SDValue(CurDAG->getMachineNode(X86::SHR64ri, dl, MVT::i64, MVT::i32,
N0.getOperand(0), Imm), 0);
MachineSDNode *Test = CurDAG->getMachineNode(X86::TEST64rr, dl,
MVT::i32, Shift, Shift);
@@ -3906,7 +4814,7 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
unsigned LeadingZeros = countLeadingZeros(Mask);
SDValue Imm = CurDAG->getTargetConstant(LeadingZeros, dl, MVT::i64);
SDValue Shift =
- SDValue(CurDAG->getMachineNode(X86::SHL64ri, dl, MVT::i64,
+ SDValue(CurDAG->getMachineNode(X86::SHL64ri, dl, MVT::i64, MVT::i32,
N0.getOperand(0), Imm), 0);
MachineSDNode *Test = CurDAG->getMachineNode(X86::TEST64rr, dl,
MVT::i32, Shift, Shift);
@@ -3964,8 +4872,6 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
break;
}
- // FIXME: We should be able to fold loads here.
-
SDValue Imm = CurDAG->getTargetConstant(Mask, dl, VT);
SDValue Reg = N0.getOperand(0);
@@ -4058,10 +4964,46 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
return;
}
+ case ISD::SETCC: {
+ if (NVT.isVector() && tryVPTESTM(Node, SDValue(Node, 0), SDValue()))
+ return;
+
+ break;
+ }
+
case ISD::STORE:
if (foldLoadStoreIntoMemOperand(Node))
return;
break;
+ case ISD::FCEIL:
+ case ISD::FFLOOR:
+ case ISD::FTRUNC:
+ case ISD::FNEARBYINT:
+ case ISD::FRINT: {
+ // Replace fp rounding with their X86 specific equivalent so we don't
+ // need 2 sets of patterns.
+ // FIXME: This can only happen when the nodes started as STRICT_* and have
+ // been mutated into their non-STRICT equivalents. Eventually this
+ // mutation will be removed and we should switch the STRICT_ nodes to a
+ // strict version of RNDSCALE in PreProcessISelDAG.
+ unsigned Imm;
+ switch (Node->getOpcode()) {
+ default: llvm_unreachable("Unexpected opcode!");
+ case ISD::FCEIL: Imm = 0xA; break;
+ case ISD::FFLOOR: Imm = 0x9; break;
+ case ISD::FTRUNC: Imm = 0xB; break;
+ case ISD::FNEARBYINT: Imm = 0xC; break;
+ case ISD::FRINT: Imm = 0x4; break;
+ }
+ SDLoc dl(Node);
+ SDValue Res = CurDAG->getNode(X86ISD::VRNDSCALE, dl,
+ Node->getValueType(0),
+ Node->getOperand(0),
+ CurDAG->getConstant(Imm, dl, MVT::i8));
+ ReplaceNode(Node, Res.getNode());
+ SelectCode(Res.getNode());
+ return;
+ }
}
SelectCode(Node);
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index b6a692ee187d..0b4bf687e6cf 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -1,9 +1,8 @@
//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -131,7 +130,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
addBypassSlowDiv(64, 32);
}
- if (Subtarget.isTargetKnownWindowsMSVC() ||
+ if (Subtarget.isTargetWindowsMSVC() ||
Subtarget.isTargetWindowsItanium()) {
// Setup Windows compiler runtime calls.
setLibcallName(RTLIB::SDIV_I64, "_alldiv");
@@ -159,6 +158,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setUseUnderscoreLongJmp(true);
}
+ // If we don't have cmpxchg8b(meaing this is a 386/486), limit atomic size to
+ // 32 bits so the AtomicExpandPass will expand it so we don't need cmpxchg8b.
+ // FIXME: Should we be limitting the atomic size on other configs? Default is
+ // 1024.
+ if (!Subtarget.hasCmpxchg8b())
+ setMaxAtomicSizeInBitsSupported(32);
+
// Set up the register classes.
addRegisterClass(MVT::i8, &X86::GR8RegClass);
addRegisterClass(MVT::i16, &X86::GR16RegClass);
@@ -190,10 +196,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// Integer absolute.
if (Subtarget.hasCMov()) {
setOperationAction(ISD::ABS , MVT::i16 , Custom);
- setOperationAction(ISD::ABS , MVT::i32 , Custom);
- if (Subtarget.is64Bit())
- setOperationAction(ISD::ABS , MVT::i64 , Custom);
+ setOperationAction(ISD::ABS , MVT::i32 , Custom);
}
+ setOperationAction(ISD::ABS , MVT::i64 , Custom);
// Funnel shifts.
for (auto ShiftOp : {ISD::FSHL, ISD::FSHR}) {
@@ -258,14 +263,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom);
setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom);
- if (X86ScalarSSEf32) {
- setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote);
- // f32 and f64 cases are Legal, f80 case is not
- setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
- } else {
- setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom);
- setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
- }
+ setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom);
+ setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
} else {
setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote);
setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Expand);
@@ -415,6 +414,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
if (Subtarget.is64Bit())
setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
+ else
+ setOperationAction(ISD::CTPOP , MVT::i64 , Custom);
}
setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
@@ -486,6 +487,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
}
+ if (!Subtarget.is64Bit())
+ setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom);
+
if (Subtarget.hasCmpxchg16b()) {
setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
}
@@ -530,6 +534,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
addRegisterClass(MVT::f64, Subtarget.hasAVX512() ? &X86::FR64XRegClass
: &X86::FR64RegClass);
+ // Disable f32->f64 extload as we can only generate this in one instruction
+ // under optsize. So its easier to pattern match (fpext (load)) for that
+ // case instead of needing to emit 2 instructions for extload in the
+ // non-optsize case.
+ setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
+
for (auto VT : { MVT::f32, MVT::f64 }) {
// Use ANDPD to simulate FABS.
setOperationAction(ISD::FABS, VT, Custom);
@@ -668,6 +678,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FRINT, MVT::f80, Expand);
setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand);
setOperationAction(ISD::FMA, MVT::f80, Expand);
+ setOperationAction(ISD::LROUND, MVT::f80, Expand);
+ setOperationAction(ISD::LLROUND, MVT::f80, Expand);
+ setOperationAction(ISD::LRINT, MVT::f80, Expand);
+ setOperationAction(ISD::LLRINT, MVT::f80, Expand);
}
// Always use a library call for pow.
@@ -780,6 +794,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
+
+ setOperationAction(ISD::LOAD, MVT::v2f32, Custom);
+ setOperationAction(ISD::STORE, MVT::v2f32, Custom);
}
if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
@@ -841,6 +858,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SADDSAT, MVT::v8i16, Legal);
setOperationAction(ISD::USUBSAT, MVT::v8i16, Legal);
setOperationAction(ISD::SSUBSAT, MVT::v8i16, Legal);
+ setOperationAction(ISD::UADDSAT, MVT::v4i32, Custom);
+ setOperationAction(ISD::USUBSAT, MVT::v4i32, Custom);
+ setOperationAction(ISD::UADDSAT, MVT::v2i64, Custom);
+ setOperationAction(ISD::USUBSAT, MVT::v2i64, Custom);
if (!ExperimentalVectorWideningLegalization) {
// Use widening instead of promotion.
@@ -950,17 +971,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom);
- for (MVT VT : MVT::fp_vector_valuetypes())
- setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2f32, Legal);
-
// We want to legalize this to an f64 load rather than an i64 load on
// 64-bit targets and two 32-bit loads on a 32-bit target. Similar for
// store.
- setOperationAction(ISD::LOAD, MVT::v2f32, Custom);
setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
setOperationAction(ISD::LOAD, MVT::v4i16, Custom);
setOperationAction(ISD::LOAD, MVT::v8i8, Custom);
- setOperationAction(ISD::STORE, MVT::v2f32, Custom);
setOperationAction(ISD::STORE, MVT::v2i32, Custom);
setOperationAction(ISD::STORE, MVT::v4i16, Custom);
setOperationAction(ISD::STORE, MVT::v8i8, Custom);
@@ -1128,14 +1144,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
- setOperationAction(ISD::FP_ROUND, MVT::v4f32, Legal);
if (!Subtarget.hasAVX512())
setOperationAction(ISD::BITCAST, MVT::v32i1, Custom);
- for (MVT VT : MVT::fp_vector_valuetypes())
- setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4f32, Legal);
-
// In the customized shift lowering, the legal v8i32/v4i64 cases
// in AVX2 will be recognized.
for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
@@ -1144,13 +1156,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SRA, VT, Custom);
}
- if (ExperimentalVectorWideningLegalization) {
- // These types need custom splitting if their input is a 128-bit vector.
- setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
- setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
- setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
- setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
- }
+ // These types need custom splitting if their input is a 128-bit vector.
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
setOperationAction(ISD::ROTL, MVT::v8i32, Custom);
setOperationAction(ISD::ROTL, MVT::v16i16, Custom);
@@ -1182,9 +1192,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::CTPOP, VT, Custom);
setOperationAction(ISD::CTLZ, VT, Custom);
- // TODO - remove this once 256-bit X86ISD::ANDNP correctly split.
- setOperationAction(ISD::CTTZ, VT, HasInt256 ? Expand : Custom);
-
// The condition codes aren't legal in SSE/AVX and under AVX512 we use
// setcc all the way to isel and prefer SETGT in some isel patterns.
setCondCodeAction(ISD::SETLT, VT, Custom);
@@ -1260,7 +1267,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) {
- setOperationAction(ISD::MLOAD, VT, Legal);
+ setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom);
setOperationAction(ISD::MSTORE, VT, Legal);
}
@@ -1282,6 +1289,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
+ setOperationAction(ISD::STORE, VT, Custom);
}
if (HasInt256)
@@ -1352,19 +1360,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SSUBSAT, VT, Custom);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
+ setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
setOperationAction(ISD::VSELECT, VT, Expand);
}
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i1, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i1, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i1, Custom);
- setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v2i1, Custom);
- setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v4i1, Custom);
- setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8i1, Custom);
- setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v16i1, Custom);
for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1 })
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
}
@@ -1378,9 +1381,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
addRegisterClass(MVT::v8i64, &X86::VR512RegClass);
addRegisterClass(MVT::v8f64, &X86::VR512RegClass);
- for (MVT VT : MVT::fp_vector_valuetypes())
- setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8f32, Legal);
-
for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) {
setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i8, Legal);
setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i16, Legal);
@@ -1413,10 +1413,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setTruncStoreAction(MVT::v16i32, MVT::v16i8, Legal);
setTruncStoreAction(MVT::v16i32, MVT::v16i16, Legal);
+ // With 512-bit vectors and no VLX, we prefer to widen MLOAD/MSTORE
+ // to 512-bit rather than use the AVX2 instructions so that we can use
+ // k-masks.
if (!Subtarget.hasVLX()) {
- // With 512-bit vectors and no VLX, we prefer to widen MLOAD/MSTORE
- // to 512-bit rather than use the AVX2 instructions so that we can use
- // k-masks.
for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) {
setOperationAction(ISD::MLOAD, VT, Custom);
@@ -1446,6 +1446,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FTRUNC, VT, Legal);
setOperationAction(ISD::FRINT, VT, Legal);
setOperationAction(ISD::FNEARBYINT, VT, Legal);
+
+ setOperationAction(ISD::SELECT, VT, Custom);
}
// Without BWI we need to use custom lowering to handle MVT::v64i8 input.
@@ -1465,13 +1467,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::MULHU, MVT::v16i32, Custom);
setOperationAction(ISD::MULHS, MVT::v16i32, Custom);
- setOperationAction(ISD::SELECT, MVT::v8f64, Custom);
- setOperationAction(ISD::SELECT, MVT::v8i64, Custom);
- setOperationAction(ISD::SELECT, MVT::v16i32, Custom);
- setOperationAction(ISD::SELECT, MVT::v32i16, Custom);
- setOperationAction(ISD::SELECT, MVT::v64i8, Custom);
- setOperationAction(ISD::SELECT, MVT::v16f32, Custom);
-
for (auto VT : { MVT::v16i32, MVT::v8i64 }) {
setOperationAction(ISD::SMAX, VT, Legal);
setOperationAction(ISD::UMAX, VT, Legal);
@@ -1485,6 +1480,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::ROTL, VT, Custom);
setOperationAction(ISD::ROTR, VT, Custom);
setOperationAction(ISD::SETCC, VT, Custom);
+ setOperationAction(ISD::SELECT, VT, Custom);
// The condition codes aren't legal in SSE/AVX and under AVX512 we use
// setcc all the way to isel and prefer SETGT in some isel patterns.
@@ -1705,6 +1701,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SADDSAT, VT, Legal);
setOperationAction(ISD::USUBSAT, VT, Legal);
setOperationAction(ISD::SSUBSAT, VT, Legal);
+ setOperationAction(ISD::SELECT, VT, Custom);
// The condition codes aren't legal in SSE/AVX and under AVX512 we use
// setcc all the way to isel and prefer SETGT in some isel patterns.
@@ -1788,7 +1785,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
if (!Subtarget.is64Bit()) {
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
- setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
}
// Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
@@ -1842,8 +1838,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// is. We should promote the value to 64-bits to solve this.
// This is what the CRT headers do - `fmodf` is an inline header
// function casting to f64 and calling `fmod`.
- if (Subtarget.is32Bit() && (Subtarget.isTargetKnownWindowsMSVC() ||
- Subtarget.isTargetWindowsItanium()))
+ if (Subtarget.is32Bit() &&
+ (Subtarget.isTargetWindowsMSVC() || Subtarget.isTargetWindowsItanium()))
for (ISD::NodeType Op :
{ISD::FCEIL, ISD::FCOS, ISD::FEXP, ISD::FFLOOR, ISD::FREM, ISD::FLOG,
ISD::FLOG10, ISD::FPOW, ISD::FSIN})
@@ -1854,6 +1850,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
setTargetDAGCombine(ISD::SCALAR_TO_VECTOR);
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
+ setTargetDAGCombine(ISD::CONCAT_VECTORS);
setTargetDAGCombine(ISD::INSERT_SUBVECTOR);
setTargetDAGCombine(ISD::EXTRACT_SUBVECTOR);
setTargetDAGCombine(ISD::BITCAST);
@@ -1881,6 +1878,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setTargetDAGCombine(ISD::ANY_EXTEND);
setTargetDAGCombine(ISD::SIGN_EXTEND);
setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
+ setTargetDAGCombine(ISD::ANY_EXTEND_VECTOR_INREG);
+ setTargetDAGCombine(ISD::SIGN_EXTEND_VECTOR_INREG);
+ setTargetDAGCombine(ISD::ZERO_EXTEND_VECTOR_INREG);
setTargetDAGCombine(ISD::SINT_TO_FP);
setTargetDAGCombine(ISD::UINT_TO_FP);
setTargetDAGCombine(ISD::SETCC);
@@ -2050,20 +2050,19 @@ unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty,
/// source is constant so it does not need to be loaded.
/// It returns EVT::Other if the type should be determined using generic
/// target-independent logic.
-EVT
-X86TargetLowering::getOptimalMemOpType(uint64_t Size,
- unsigned DstAlign, unsigned SrcAlign,
- bool IsMemset, bool ZeroMemset,
- bool MemcpyStrSrc,
- MachineFunction &MF) const {
- const Function &F = MF.getFunction();
- if (!F.hasFnAttribute(Attribute::NoImplicitFloat)) {
- if (Size >= 16 &&
- (!Subtarget.isUnalignedMem16Slow() ||
- ((DstAlign == 0 || DstAlign >= 16) &&
- (SrcAlign == 0 || SrcAlign >= 16)))) {
+/// For vector ops we check that the overall size isn't larger than our
+/// preferred vector width.
+EVT X86TargetLowering::getOptimalMemOpType(
+ uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset,
+ bool ZeroMemset, bool MemcpyStrSrc,
+ const AttributeList &FuncAttributes) const {
+ if (!FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)) {
+ if (Size >= 16 && (!Subtarget.isUnalignedMem16Slow() ||
+ ((DstAlign == 0 || DstAlign >= 16) &&
+ (SrcAlign == 0 || SrcAlign >= 16)))) {
// FIXME: Check if unaligned 32-byte accesses are slow.
- if (Size >= 32 && Subtarget.hasAVX()) {
+ if (Size >= 32 && Subtarget.hasAVX() &&
+ (Subtarget.getPreferVectorWidth() >= 256)) {
// Although this isn't a well-supported type for AVX1, we'll let
// legalization and shuffle lowering produce the optimal codegen. If we
// choose an optimal type with a vector element larger than a byte,
@@ -2071,11 +2070,12 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size,
// multiply) before we splat as a vector.
return MVT::v32i8;
}
- if (Subtarget.hasSSE2())
+ if (Subtarget.hasSSE2() && (Subtarget.getPreferVectorWidth() >= 128))
return MVT::v16i8;
// TODO: Can SSE1 handle a byte vector?
// If we have SSE1 registers we should be able to use them.
- if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87()))
+ if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87()) &&
+ (Subtarget.getPreferVectorWidth() >= 128))
return MVT::v4f32;
} else if ((!IsMemset || ZeroMemset) && !MemcpyStrSrc && Size >= 8 &&
!Subtarget.is64Bit() && Subtarget.hasSSE2()) {
@@ -2104,11 +2104,9 @@ bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
return true;
}
-bool
-X86TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
- unsigned,
- unsigned,
- bool *Fast) const {
+bool X86TargetLowering::allowsMisalignedMemoryAccesses(
+ EVT VT, unsigned, unsigned Align, MachineMemOperand::Flags Flags,
+ bool *Fast) const {
if (Fast) {
switch (VT.getSizeInBits()) {
default:
@@ -2124,6 +2122,16 @@ X86TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
// TODO: What about AVX-512 (512-bit) accesses?
}
}
+ // NonTemporal vector memory ops must be aligned.
+ if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {
+ // NT loads can only be vector aligned, so if its less aligned than the
+ // minimum vector size (which we can split the vector down to), we might as
+ // well use a regular unaligned vector load.
+ // We don't have any NT loads pre-SSE41.
+ if (!!(Flags & MachineMemOperand::MOLoad))
+ return (Align < 16 || !Subtarget.hasSSE41());
+ return false;
+ }
// Misaligned accesses of any size are always allowed.
return true;
}
@@ -2281,12 +2289,13 @@ void X86TargetLowering::insertSSPDeclarations(Module &M) const {
Type::getInt8PtrTy(M.getContext()));
// MSVC CRT has a function to validate security cookie.
- auto *SecurityCheckCookie = cast<Function>(
- M.getOrInsertFunction("__security_check_cookie",
- Type::getVoidTy(M.getContext()),
- Type::getInt8PtrTy(M.getContext())));
- SecurityCheckCookie->setCallingConv(CallingConv::X86_FastCall);
- SecurityCheckCookie->addAttribute(1, Attribute::AttrKind::InReg);
+ FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
+ "__security_check_cookie", Type::getVoidTy(M.getContext()),
+ Type::getInt8PtrTy(M.getContext()));
+ if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) {
+ F->setCallingConv(CallingConv::X86_FastCall);
+ F->addAttribute(1, Attribute::AttrKind::InReg);
+ }
return;
}
// glibc, bionic, and Fuchsia have a special slot for the stack guard.
@@ -2304,7 +2313,7 @@ Value *X86TargetLowering::getSDagStackGuard(const Module &M) const {
return TargetLowering::getSDagStackGuard(M);
}
-Value *X86TargetLowering::getSSPStackGuardCheck(const Module &M) const {
+Function *X86TargetLowering::getSSPStackGuardCheck(const Module &M) const {
// MSVC CRT has a function to validate security cookie.
if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
@@ -2347,8 +2356,6 @@ bool X86TargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
// Return Value Calling Convention Implementation
//===----------------------------------------------------------------------===//
-#include "X86GenCallingConv.inc"
-
bool X86TargetLowering::CanLowerReturn(
CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
@@ -2703,7 +2710,6 @@ static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA,
"The values should reside in two registers");
SDValue Lo, Hi;
- unsigned Reg;
SDValue ArgValueLo, ArgValueHi;
MachineFunction &MF = DAG.getMachineFunction();
@@ -2713,7 +2719,7 @@ static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA,
if (nullptr == InFlag) {
// When no physical register is present,
// create an intermediate virtual register.
- Reg = MF.addLiveIn(VA.getLocReg(), RC);
+ unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
ArgValueLo = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
ArgValueHi = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
@@ -2934,6 +2940,8 @@ static bool mayTailCallThisCC(CallingConv::ID CC) {
case CallingConv::X86_StdCall:
case CallingConv::X86_VectorCall:
case CallingConv::X86_FastCall:
+ // Swift:
+ case CallingConv::Swift:
return true;
default:
return canGuaranteeTCO(CC);
@@ -2986,22 +2994,6 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
else
ValVT = VA.getValVT();
- // Calculate SP offset of interrupt parameter, re-arrange the slot normally
- // taken by a return address.
- int Offset = 0;
- if (CallConv == CallingConv::X86_INTR) {
- // X86 interrupts may take one or two arguments.
- // On the stack there will be no return address as in regular call.
- // Offset of last argument need to be set to -4/-8 bytes.
- // Where offset of the first argument out of two, should be set to 0 bytes.
- Offset = (Subtarget.is64Bit() ? 8 : 4) * ((i + 1) % Ins.size() - 1);
- if (Subtarget.is64Bit() && Ins.size() == 2) {
- // The stack pointer needs to be realigned for 64 bit handlers with error
- // code, so the argument offset changes by 8 bytes.
- Offset += 8;
- }
- }
-
// FIXME: For now, all byval parameter objects are marked mutable. This can be
// changed with more analysis.
// In case of tail call optimization mark all arguments mutable. Since they
@@ -3014,15 +3006,15 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
// can be improved with deeper analysis.
int FI = MFI.CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable,
/*isAliased=*/true);
- // Adjust SP offset of interrupt parameter.
- if (CallConv == CallingConv::X86_INTR) {
- MFI.setObjectOffset(FI, Offset);
- }
return DAG.getFrameIndex(FI, PtrVT);
}
// This is an argument in memory. We might be able to perform copy elision.
- if (Flags.isCopyElisionCandidate()) {
+ // If the argument is passed directly in memory without any extension, then we
+ // can perform copy elision. Large vector types, for example, may be passed
+ // indirectly by pointer.
+ if (Flags.isCopyElisionCandidate() &&
+ VA.getLocInfo() != CCValAssign::Indirect && !ExtendedInMem) {
EVT ArgVT = Ins[i].ArgVT;
SDValue PartAddr;
if (Ins[i].PartOffset == 0) {
@@ -3031,7 +3023,7 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
// load from our portion of it. This assumes that if the first part of an
// argument is in memory, the rest will also be in memory.
int FI = MFI.CreateFixedObject(ArgVT.getStoreSize(), VA.getLocMemOffset(),
- /*Immutable=*/false);
+ /*IsImmutable=*/false);
PartAddr = DAG.getFrameIndex(FI, PtrVT);
return DAG.getLoad(
ValVT, dl, Chain, PartAddr,
@@ -3072,11 +3064,6 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
MFI.setObjectSExt(FI, true);
}
- // Adjust SP offset of interrupt parameter.
- if (CallConv == CallingConv::X86_INTR) {
- MFI.setObjectOffset(FI, Offset);
- }
-
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
SDValue Val = DAG.getLoad(
ValVT, dl, Chain, FIN,
@@ -3166,14 +3153,6 @@ SDValue X86TargetLowering::LowerFormalArguments(
!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling conv' regcall, fastcc, ghc or hipe");
- if (CallConv == CallingConv::X86_INTR) {
- bool isLegal = Ins.size() == 1 ||
- (Ins.size() == 2 && ((Is64Bit && Ins[1].VT == MVT::i64) ||
- (!Is64Bit && Ins[1].VT == MVT::i32)));
- if (!isLegal)
- report_fatal_error("X86 interrupts may take one or two arguments");
- }
-
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
@@ -3454,11 +3433,11 @@ SDValue X86TargetLowering::LowerFormalArguments(
}
// Copy all forwards from physical to virtual registers.
- for (ForwardedRegister &F : Forwards) {
+ for (ForwardedRegister &FR : Forwards) {
// FIXME: Can we use a less constrained schedule?
- SDValue RegVal = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
- F.VReg = MF.getRegInfo().createVirtualRegister(getRegClassFor(F.VT));
- Chain = DAG.getCopyToReg(Chain, dl, F.VReg, RegVal);
+ SDValue RegVal = DAG.getCopyFromReg(Chain, dl, FR.VReg, FR.VT);
+ FR.VReg = MF.getRegInfo().createVirtualRegister(getRegClassFor(FR.VT));
+ Chain = DAG.getCopyToReg(Chain, dl, FR.VReg, RegVal);
}
}
@@ -3610,6 +3589,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
const Module *M = MF.getMMI().getModule();
Metadata *IsCFProtectionSupported = M->getModuleFlag("cf-protection-branch");
+ MachineFunction::CallSiteInfo CSInfo;
+
if (CallConv == CallingConv::X86_INTR)
report_fatal_error("X86 interrupts may not be called directly");
@@ -3805,6 +3786,9 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Subtarget);
} else if (VA.isRegLoc()) {
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ const TargetOptions &Options = DAG.getTarget().Options;
+ if (Options.EnableDebugEntryValues)
+ CSInfo.emplace_back(VA.getLocReg(), I);
if (isVarArg && IsWin64) {
// Win64 ABI requires argument XMM reg to be copied to the corresponding
// shadow reg if callee is a varargs function.
@@ -3975,46 +3959,13 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// through a register, since the call instruction's 32-bit
// pc-relative offset may not be large enough to hold the whole
// address.
- } else if (Callee->getOpcode() == ISD::GlobalAddress) {
- // If the callee is a GlobalAddress node (quite common, every direct call
- // is) turn it into a TargetGlobalAddress node so that legalize doesn't hack
- // it.
- GlobalAddressSDNode* G = cast<GlobalAddressSDNode>(Callee);
-
- // We should use extra load for direct calls to dllimported functions in
- // non-JIT mode.
- const GlobalValue *GV = G->getGlobal();
- if (!GV->hasDLLImportStorageClass()) {
- unsigned char OpFlags = Subtarget.classifyGlobalFunctionReference(GV);
-
- Callee = DAG.getTargetGlobalAddress(
- GV, dl, getPointerTy(DAG.getDataLayout()), G->getOffset(), OpFlags);
-
- if (OpFlags == X86II::MO_GOTPCREL) {
- // Add a wrapper.
- Callee = DAG.getNode(X86ISD::WrapperRIP, dl,
- getPointerTy(DAG.getDataLayout()), Callee);
- // Add extra indirection
- Callee = DAG.getLoad(
- getPointerTy(DAG.getDataLayout()), dl, DAG.getEntryNode(), Callee,
- MachinePointerInfo::getGOT(DAG.getMachineFunction()));
- }
- }
- } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
- const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
- unsigned char OpFlags =
- Subtarget.classifyGlobalFunctionReference(nullptr, *Mod);
-
- Callee = DAG.getTargetExternalSymbol(
- S->getSymbol(), getPointerTy(DAG.getDataLayout()), OpFlags);
-
- if (OpFlags == X86II::MO_GOTPCREL) {
- Callee = DAG.getNode(X86ISD::WrapperRIP, dl,
- getPointerTy(DAG.getDataLayout()), Callee);
- Callee = DAG.getLoad(
- getPointerTy(DAG.getDataLayout()), dl, DAG.getEntryNode(), Callee,
- MachinePointerInfo::getGOT(DAG.getMachineFunction()));
- }
+ } else if (Callee->getOpcode() == ISD::GlobalAddress ||
+ Callee->getOpcode() == ISD::ExternalSymbol) {
+ // Lower direct calls to global addresses and external symbols. Setting
+ // ForCall to true here has the effect of removing WrapperRIP when possible
+ // to allow direct calls to be selected without first materializing the
+ // address into a register.
+ Callee = LowerGlobalOrExternal(Callee, DAG, /*ForCall=*/true);
} else if (Subtarget.isTarget64BitILP32() &&
Callee->getValueType(0) == MVT::i32) {
// Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
@@ -4105,7 +4056,9 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// should be computed from returns not tail calls. Consider a void
// function making a tail call to a function returning int.
MF.getFrameInfo().setHasTailCall();
- return DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops);
+ SDValue Ret = DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops);
+ DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
+ return Ret;
}
if (HasNoCfCheck && IsCFProtectionSupported) {
@@ -4114,6 +4067,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops);
}
InFlag = Chain.getValue(1);
+ DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
// Create the CALLSEQ_END node.
unsigned NumBytesForCalleeToPop;
@@ -4787,7 +4741,6 @@ bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
if (!IntrData)
return false;
- Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.flags = MachineMemOperand::MONone;
Info.offset = 0;
@@ -4795,6 +4748,7 @@ bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
case TRUNCATE_TO_MEM_VI8:
case TRUNCATE_TO_MEM_VI16:
case TRUNCATE_TO_MEM_VI32: {
+ Info.opc = ISD::INTRINSIC_VOID;
Info.ptrVal = I.getArgOperand(0);
MVT VT = MVT::getVT(I.getArgOperand(1)->getType());
MVT ScalarVT = MVT::INVALID_SIMPLE_VALUE_TYPE;
@@ -4810,6 +4764,31 @@ bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.flags |= MachineMemOperand::MOStore;
break;
}
+ case GATHER:
+ case GATHER_AVX2: {
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.ptrVal = nullptr;
+ MVT DataVT = MVT::getVT(I.getType());
+ MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType());
+ unsigned NumElts = std::min(DataVT.getVectorNumElements(),
+ IndexVT.getVectorNumElements());
+ Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts);
+ Info.align = 1;
+ Info.flags |= MachineMemOperand::MOLoad;
+ break;
+ }
+ case SCATTER: {
+ Info.opc = ISD::INTRINSIC_VOID;
+ Info.ptrVal = nullptr;
+ MVT DataVT = MVT::getVT(I.getArgOperand(3)->getType());
+ MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType());
+ unsigned NumElts = std::min(DataVT.getVectorNumElements(),
+ IndexVT.getVectorNumElements());
+ Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts);
+ Info.align = 1;
+ Info.flags |= MachineMemOperand::MOStore;
+ break;
+ }
default:
return false;
}
@@ -4820,7 +4799,8 @@ bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
/// Returns true if the target can instruction select the
/// specified FP immediate natively. If false, the legalizer will
/// materialize the FP immediate as a load from a constant pool.
-bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
+ bool ForCodeSize) const {
for (unsigned i = 0, e = LegalFPImmediates.size(); i != e; ++i) {
if (Imm.bitwiseIsEqual(LegalFPImmediates[i]))
return true;
@@ -4837,6 +4817,26 @@ bool X86TargetLowering::shouldReduceLoadWidth(SDNode *Load,
if (BasePtr.getOpcode() == X86ISD::WrapperRIP)
if (const auto *GA = dyn_cast<GlobalAddressSDNode>(BasePtr.getOperand(0)))
return GA->getTargetFlags() != X86II::MO_GOTTPOFF;
+
+ // If this is an (1) AVX vector load with (2) multiple uses and (3) all of
+ // those uses are extracted directly into a store, then the extract + store
+ // can be store-folded. Therefore, it's probably not worth splitting the load.
+ EVT VT = Load->getValueType(0);
+ if ((VT.is256BitVector() || VT.is512BitVector()) && !Load->hasOneUse()) {
+ for (auto UI = Load->use_begin(), UE = Load->use_end(); UI != UE; ++UI) {
+ // Skip uses of the chain value. Result 0 of the node is the load value.
+ if (UI.getUse().getResNo() != 0)
+ continue;
+
+ // If this use is not an extract + store, it's probably worth splitting.
+ if (UI->getOpcode() != ISD::EXTRACT_SUBVECTOR || !UI->hasOneUse() ||
+ UI->use_begin()->getOpcode() != ISD::STORE)
+ return true;
+ }
+ // All non-chain uses are extract + store.
+ return false;
+ }
+
return true;
}
@@ -4909,15 +4909,29 @@ bool X86TargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
}
bool X86TargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
+ unsigned Opc = VecOp.getOpcode();
+
+ // Assume target opcodes can't be scalarized.
+ // TODO - do we have any exceptions?
+ if (Opc >= ISD::BUILTIN_OP_END)
+ return false;
+
// If the vector op is not supported, try to convert to scalar.
EVT VecVT = VecOp.getValueType();
- if (!isOperationLegalOrCustomOrPromote(VecOp.getOpcode(), VecVT))
+ if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
return true;
// If the vector op is supported, but the scalar op is not, the transform may
// not be worthwhile.
EVT ScalarVT = VecVT.getScalarType();
- return isOperationLegalOrCustomOrPromote(VecOp.getOpcode(), ScalarVT);
+ return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
+}
+
+bool X86TargetLowering::shouldFormOverflowOp(unsigned Opcode, EVT VT) const {
+ // TODO: Allow vectors?
+ if (VT.isVector())
+ return false;
+ return VT.isSimple() || !isOperationExpand(Opcode, VT);
}
bool X86TargetLowering::isCheapToSpeculateCttz() const {
@@ -4930,8 +4944,9 @@ bool X86TargetLowering::isCheapToSpeculateCtlz() const {
return Subtarget.hasLZCNT();
}
-bool X86TargetLowering::isLoadBitCastBeneficial(EVT LoadVT,
- EVT BitcastVT) const {
+bool X86TargetLowering::isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
+ const SelectionDAG &DAG,
+ const MachineMemOperand &MMO) const {
if (!Subtarget.hasAVX512() && !LoadVT.isVector() && BitcastVT.isVector() &&
BitcastVT.getVectorElementType() == MVT::i1)
return false;
@@ -4939,7 +4954,12 @@ bool X86TargetLowering::isLoadBitCastBeneficial(EVT LoadVT,
if (!Subtarget.hasDQI() && BitcastVT == MVT::v8i1 && LoadVT == MVT::i8)
return false;
- return TargetLowering::isLoadBitCastBeneficial(LoadVT, BitcastVT);
+ // If both types are legal vectors, it's always ok to convert them.
+ if (LoadVT.isVector() && BitcastVT.isVector() &&
+ isTypeLegal(LoadVT) && isTypeLegal(BitcastVT))
+ return true;
+
+ return TargetLowering::isLoadBitCastBeneficial(LoadVT, BitcastVT, DAG, MMO);
}
bool X86TargetLowering::canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
@@ -4953,6 +4973,10 @@ bool X86TargetLowering::canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
unsigned MaxIntSize = Subtarget.is64Bit() ? 64 : 32;
return (MemVT.getSizeInBits() <= MaxIntSize);
}
+ // Make sure we don't merge greater than our preferred vector
+ // width.
+ if (MemVT.getSizeInBits() > Subtarget.getPreferVectorWidth())
+ return false;
return true;
}
@@ -4998,7 +5022,25 @@ bool X86TargetLowering::hasAndNot(SDValue Y) const {
return Subtarget.hasSSE2();
}
-bool X86TargetLowering::preferShiftsToClearExtremeBits(SDValue Y) const {
+bool X86TargetLowering::shouldFoldConstantShiftPairToMask(
+ const SDNode *N, CombineLevel Level) const {
+ assert(((N->getOpcode() == ISD::SHL &&
+ N->getOperand(0).getOpcode() == ISD::SRL) ||
+ (N->getOpcode() == ISD::SRL &&
+ N->getOperand(0).getOpcode() == ISD::SHL)) &&
+ "Expected shift-shift mask");
+ EVT VT = N->getValueType(0);
+ if ((Subtarget.hasFastVectorShiftMasks() && VT.isVector()) ||
+ (Subtarget.hasFastScalarShiftMasks() && !VT.isVector())) {
+ // Only fold if the shift values are equal - so it folds to AND.
+ // TODO - we should fold if either is a non-uniform vector but we don't do
+ // the fold for non-splats yet.
+ return N->getOperand(1) == N->getOperand(0).getOperand(1);
+ }
+ return TargetLoweringBase::shouldFoldConstantShiftPairToMask(N, Level);
+}
+
+bool X86TargetLowering::shouldFoldMaskToVariableShiftPair(SDValue Y) const {
EVT VT = Y.getValueType();
// For vectors, we don't have a preference, but we probably want a mask.
@@ -5048,8 +5090,8 @@ static bool isUndefOrZero(int Val) {
return ((Val == SM_SentinelUndef) || (Val == SM_SentinelZero));
}
-/// Return true if every element in Mask, beginning
-/// from position Pos and ending in Pos+Size is the undef sentinel value.
+/// Return true if every element in Mask, beginning from position Pos and ending
+/// in Pos+Size is the undef sentinel value.
static bool isUndefInRange(ArrayRef<int> Mask, unsigned Pos, unsigned Size) {
for (unsigned i = Pos, e = Pos + Size; i != e; ++i)
if (Mask[i] != SM_SentinelUndef)
@@ -5057,6 +5099,18 @@ static bool isUndefInRange(ArrayRef<int> Mask, unsigned Pos, unsigned Size) {
return true;
}
+/// Return true if the mask creates a vector whose lower half is undefined.
+static bool isUndefLowerHalf(ArrayRef<int> Mask) {
+ unsigned NumElts = Mask.size();
+ return isUndefInRange(Mask, 0, NumElts / 2);
+}
+
+/// Return true if the mask creates a vector whose upper half is undefined.
+static bool isUndefUpperHalf(ArrayRef<int> Mask) {
+ unsigned NumElts = Mask.size();
+ return isUndefInRange(Mask, NumElts / 2, NumElts / 2);
+}
+
/// Return true if Val falls within the specified range (L, H].
static bool isInRange(int Val, int Low, int Hi) {
return (Val >= Low && Val < Hi);
@@ -5409,6 +5463,53 @@ static SDValue widenSubVector(MVT VT, SDValue Vec, bool ZeroNewElements,
DAG.getIntPtrConstant(0, dl));
}
+/// Widen a vector to a larger size with the same scalar type, with the new
+/// elements either zero or undef.
+static SDValue widenSubVector(SDValue Vec, bool ZeroNewElements,
+ const X86Subtarget &Subtarget, SelectionDAG &DAG,
+ const SDLoc &dl, unsigned WideSizeInBits) {
+ assert(Vec.getValueSizeInBits() < WideSizeInBits &&
+ (WideSizeInBits % Vec.getScalarValueSizeInBits()) == 0 &&
+ "Unsupported vector widening type");
+ unsigned WideNumElts = WideSizeInBits / Vec.getScalarValueSizeInBits();
+ MVT SVT = Vec.getSimpleValueType().getScalarType();
+ MVT VT = MVT::getVectorVT(SVT, WideNumElts);
+ return widenSubVector(VT, Vec, ZeroNewElements, Subtarget, DAG, dl);
+}
+
+// Helper function to collect subvector ops that are concated together,
+// either by ISD::CONCAT_VECTORS or a ISD::INSERT_SUBVECTOR series.
+// The subvectors in Ops are guaranteed to be the same type.
+static bool collectConcatOps(SDNode *N, SmallVectorImpl<SDValue> &Ops) {
+ assert(Ops.empty() && "Expected an empty ops vector");
+
+ if (N->getOpcode() == ISD::CONCAT_VECTORS) {
+ Ops.append(N->op_begin(), N->op_end());
+ return true;
+ }
+
+ if (N->getOpcode() == ISD::INSERT_SUBVECTOR &&
+ isa<ConstantSDNode>(N->getOperand(2))) {
+ SDValue Src = N->getOperand(0);
+ SDValue Sub = N->getOperand(1);
+ const APInt &Idx = N->getConstantOperandAPInt(2);
+ EVT VT = Src.getValueType();
+ EVT SubVT = Sub.getValueType();
+
+ // TODO - Handle more general insert_subvector chains.
+ if (VT.getSizeInBits() == (SubVT.getSizeInBits() * 2) &&
+ Idx == (VT.getVectorNumElements() / 2) &&
+ Src.getOpcode() == ISD::INSERT_SUBVECTOR &&
+ isNullConstant(Src.getOperand(2))) {
+ Ops.push_back(Src.getOperand(1));
+ Ops.push_back(Sub);
+ return true;
+ }
+ }
+
+ return false;
+}
+
// Helper for splitting operands of an operation to legal target size and
// apply a function on each part.
// Useful for operations that are available on SSE2 in 128-bit, on AVX2 in
@@ -5457,19 +5558,6 @@ SDValue SplitOpsAndApply(SelectionDAG &DAG, const X86Subtarget &Subtarget,
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Subs);
}
-// Return true if the instruction zeroes the unused upper part of the
-// destination and accepts mask.
-static bool isMaskedZeroUpperBitsvXi1(unsigned int Opcode) {
- switch (Opcode) {
- default:
- return false;
- case X86ISD::CMPM:
- case X86ISD::CMPM_RND:
- case ISD::SETCC:
- return true;
- }
-}
-
/// Insert i1-subvector to i1-vector.
static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
@@ -5626,10 +5714,29 @@ static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) {
return DAG.getBitcast(VT, Vec);
}
-static SDValue getExtendInVec(bool Signed, const SDLoc &DL, EVT VT, SDValue In,
- SelectionDAG &DAG) {
+// Convert *_EXTEND to *_EXTEND_VECTOR_INREG opcode.
+static unsigned getOpcode_EXTEND_VECTOR_INREG(unsigned Opcode) {
+ switch (Opcode) {
+ case ISD::ANY_EXTEND:
+ case ISD::ANY_EXTEND_VECTOR_INREG:
+ return ISD::ANY_EXTEND_VECTOR_INREG;
+ case ISD::ZERO_EXTEND:
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ return ISD::ZERO_EXTEND_VECTOR_INREG;
+ case ISD::SIGN_EXTEND:
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ return ISD::SIGN_EXTEND_VECTOR_INREG;
+ }
+ llvm_unreachable("Unknown opcode");
+}
+
+static SDValue getExtendInVec(unsigned Opcode, const SDLoc &DL, EVT VT,
+ SDValue In, SelectionDAG &DAG) {
EVT InVT = In.getValueType();
assert(VT.isVector() && InVT.isVector() && "Expected vector VTs.");
+ assert((ISD::ANY_EXTEND == Opcode || ISD::SIGN_EXTEND == Opcode ||
+ ISD::ZERO_EXTEND == Opcode) &&
+ "Unknown extension opcode");
// For 256-bit vectors, we only need the lower (128-bit) input half.
// For 512-bit vectors, we only need the lower input half or quarter.
@@ -5642,13 +5749,10 @@ static SDValue getExtendInVec(bool Signed, const SDLoc &DL, EVT VT, SDValue In,
InVT = In.getValueType();
}
- if (VT.getVectorNumElements() == InVT.getVectorNumElements())
- return DAG.getNode(Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
- DL, VT, In);
+ if (VT.getVectorNumElements() != InVT.getVectorNumElements())
+ Opcode = getOpcode_EXTEND_VECTOR_INREG(Opcode);
- return DAG.getNode(Signed ? ISD::SIGN_EXTEND_VECTOR_INREG
- : ISD::ZERO_EXTEND_VECTOR_INREG,
- DL, VT, In);
+ return DAG.getNode(Opcode, DL, VT, In);
}
/// Returns a vector_shuffle node for an unpackl operation.
@@ -5686,18 +5790,8 @@ static SDValue getShuffleVectorZeroOrUndef(SDValue V2, int Idx,
return DAG.getVectorShuffle(VT, SDLoc(V2), V1, V2, MaskVec);
}
-// Peek through EXTRACT_SUBVECTORs - typically used for AVX1 256-bit intops.
-static SDValue peekThroughEXTRACT_SUBVECTORs(SDValue V) {
- while (V.getOpcode() == ISD::EXTRACT_SUBVECTOR)
- V = V.getOperand(0);
- return V;
-}
-
-static const Constant *getTargetConstantFromNode(SDValue Op) {
- Op = peekThroughBitcasts(Op);
-
- auto *Load = dyn_cast<LoadSDNode>(Op);
- if (!Load)
+static const Constant *getTargetConstantFromNode(LoadSDNode *Load) {
+ if (!Load || !ISD::isNormalLoad(Load))
return nullptr;
SDValue Ptr = Load->getBasePtr();
@@ -5712,6 +5806,17 @@ static const Constant *getTargetConstantFromNode(SDValue Op) {
return CNode->getConstVal();
}
+static const Constant *getTargetConstantFromNode(SDValue Op) {
+ Op = peekThroughBitcasts(Op);
+ return getTargetConstantFromNode(dyn_cast<LoadSDNode>(Op));
+}
+
+const Constant *
+X86TargetLowering::getTargetConstantFromLoad(LoadSDNode *LD) const {
+ assert(LD && "Unexpected null LoadSDNode");
+ return getTargetConstantFromNode(LD);
+}
+
// Extract raw constant bits from constant pools.
static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
APInt &UndefElts,
@@ -5778,8 +5883,7 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
if (UndefEltBits.getBoolValue() && !AllowPartialUndefs)
return false;
- APInt Bits = MaskBits.extractBits(EltSizeInBits, BitOffset);
- EltBits[i] = Bits.getZExtValue();
+ EltBits[i] = MaskBits.extractBits(EltSizeInBits, BitOffset);
}
return true;
};
@@ -5899,6 +6003,19 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
}
}
+ // Extract constant bits from a subvector broadcast.
+ if (Op.getOpcode() == X86ISD::SUBV_BROADCAST) {
+ SmallVector<APInt, 16> SubEltBits;
+ if (getTargetConstantBitsFromNode(Op.getOperand(0), EltSizeInBits,
+ UndefElts, SubEltBits, AllowWholeUndefs,
+ AllowPartialUndefs)) {
+ UndefElts = APInt::getSplat(NumElts, UndefElts);
+ while (EltBits.size() < NumElts)
+ EltBits.append(SubEltBits.begin(), SubEltBits.end());
+ return true;
+ }
+ }
+
// Extract a rematerialized scalar constant insertion.
if (Op.getOpcode() == X86ISD::VZEXT_MOVL &&
Op.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
@@ -5914,6 +6031,29 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
return CastBitData(UndefSrcElts, SrcEltBits);
}
+ // Insert constant bits from a base and sub vector sources.
+ if (Op.getOpcode() == ISD::INSERT_SUBVECTOR &&
+ isa<ConstantSDNode>(Op.getOperand(2))) {
+ // TODO - support insert_subvector through bitcasts.
+ if (EltSizeInBits != VT.getScalarSizeInBits())
+ return false;
+
+ APInt UndefSubElts;
+ SmallVector<APInt, 32> EltSubBits;
+ if (getTargetConstantBitsFromNode(Op.getOperand(1), EltSizeInBits,
+ UndefSubElts, EltSubBits,
+ AllowWholeUndefs, AllowPartialUndefs) &&
+ getTargetConstantBitsFromNode(Op.getOperand(0), EltSizeInBits,
+ UndefElts, EltBits, AllowWholeUndefs,
+ AllowPartialUndefs)) {
+ unsigned BaseIdx = Op.getConstantOperandVal(2);
+ UndefElts.insertBits(UndefSubElts, BaseIdx);
+ for (unsigned i = 0, e = EltSubBits.size(); i != e; ++i)
+ EltBits[BaseIdx + i] = EltSubBits[i];
+ return true;
+ }
+ }
+
// Extract constant bits from a subvector's source.
if (Op.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
isa<ConstantSDNode>(Op.getOperand(1))) {
@@ -6068,6 +6208,34 @@ static void getPackDemandedElts(EVT VT, const APInt &DemandedElts,
}
}
+// Split the demanded elts of a HADD/HSUB node between its operands.
+static void getHorizDemandedElts(EVT VT, const APInt &DemandedElts,
+ APInt &DemandedLHS, APInt &DemandedRHS) {
+ int NumLanes = VT.getSizeInBits() / 128;
+ int NumElts = DemandedElts.getBitWidth();
+ int NumEltsPerLane = NumElts / NumLanes;
+ int HalfEltsPerLane = NumEltsPerLane / 2;
+
+ DemandedLHS = APInt::getNullValue(NumElts);
+ DemandedRHS = APInt::getNullValue(NumElts);
+
+ // Map DemandedElts to the horizontal operands.
+ for (int Idx = 0; Idx != NumElts; ++Idx) {
+ if (!DemandedElts[Idx])
+ continue;
+ int LaneIdx = (Idx / NumEltsPerLane) * NumEltsPerLane;
+ int LocalIdx = Idx % NumEltsPerLane;
+ if (LocalIdx < HalfEltsPerLane) {
+ DemandedLHS.setBit(LaneIdx + 2 * LocalIdx + 0);
+ DemandedLHS.setBit(LaneIdx + 2 * LocalIdx + 1);
+ } else {
+ LocalIdx -= HalfEltsPerLane;
+ DemandedRHS.setBit(LaneIdx + 2 * LocalIdx + 0);
+ DemandedRHS.setBit(LaneIdx + 2 * LocalIdx + 1);
+ }
+ }
+}
+
/// Calculates the shuffle mask corresponding to the target-specific opcode.
/// If the mask could be calculated, returns it in \p Mask, returns the shuffle
/// operands in \p Ops, and returns true.
@@ -6468,14 +6636,15 @@ static bool setTargetShuffleZeroElements(SDValue N,
static bool resolveTargetShuffleInputs(SDValue Op,
SmallVectorImpl<SDValue> &Inputs,
SmallVectorImpl<int> &Mask,
- const SelectionDAG &DAG);
+ SelectionDAG &DAG);
// Attempt to decode ops that could be represented as a shuffle mask.
// The decoded shuffle mask may contain a different number of elements to the
// destination value type.
-static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
+static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts,
+ SmallVectorImpl<int> &Mask,
SmallVectorImpl<SDValue> &Ops,
- const SelectionDAG &DAG) {
+ SelectionDAG &DAG) {
Mask.clear();
Ops.clear();
@@ -6483,8 +6652,9 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
unsigned NumElts = VT.getVectorNumElements();
unsigned NumSizeInBits = VT.getSizeInBits();
unsigned NumBitsPerElt = VT.getScalarSizeInBits();
- assert((NumBitsPerElt % 8) == 0 && (NumSizeInBits % 8) == 0 &&
- "Expected byte aligned value types");
+ if ((NumBitsPerElt % 8) != 0 || (NumSizeInBits % 8) != 0)
+ return false;
+ assert(NumElts == DemandedElts.getBitWidth() && "Unexpected vector size");
unsigned Opcode = N.getOpcode();
switch (Opcode) {
@@ -6524,6 +6694,40 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
return true;
}
case ISD::OR: {
+ // Inspect each operand at the byte level. We can merge these into a
+ // blend shuffle mask if for each byte at least one is masked out (zero).
+ KnownBits Known0 = DAG.computeKnownBits(N.getOperand(0), DemandedElts);
+ KnownBits Known1 = DAG.computeKnownBits(N.getOperand(1), DemandedElts);
+ if (Known0.One.isNullValue() && Known1.One.isNullValue()) {
+ bool IsByteMask = true;
+ unsigned NumSizeInBytes = NumSizeInBits / 8;
+ unsigned NumBytesPerElt = NumBitsPerElt / 8;
+ APInt ZeroMask = APInt::getNullValue(NumBytesPerElt);
+ APInt SelectMask = APInt::getNullValue(NumBytesPerElt);
+ for (unsigned i = 0; i != NumBytesPerElt && IsByteMask; ++i) {
+ unsigned LHS = Known0.Zero.extractBits(8, i * 8).getZExtValue();
+ unsigned RHS = Known1.Zero.extractBits(8, i * 8).getZExtValue();
+ if (LHS == 255 && RHS == 0)
+ SelectMask.setBit(i);
+ else if (LHS == 255 && RHS == 255)
+ ZeroMask.setBit(i);
+ else if (!(LHS == 0 && RHS == 255))
+ IsByteMask = false;
+ }
+ if (IsByteMask) {
+ for (unsigned i = 0; i != NumSizeInBytes; i += NumBytesPerElt) {
+ for (unsigned j = 0; j != NumBytesPerElt; ++j) {
+ unsigned Ofs = (SelectMask[j] ? NumSizeInBytes : 0);
+ int Idx = (ZeroMask[j] ? (int)SM_SentinelZero : (i + j + Ofs));
+ Mask.push_back(Idx);
+ }
+ }
+ Ops.push_back(N.getOperand(0));
+ Ops.push_back(N.getOperand(1));
+ return true;
+ }
+ }
+
// Handle OR(SHUFFLE,SHUFFLE) case where one source is zero and the other
// is a valid shuffle index.
SDValue N0 = peekThroughOneUseBitcasts(N.getOperand(0));
@@ -6558,9 +6762,6 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
return true;
}
case ISD::INSERT_SUBVECTOR: {
- // Handle INSERT_SUBVECTOR(SRC0, SHUFFLE(EXTRACT_SUBVECTOR(SRC1)) where
- // SRC0/SRC1 are both of the same valuetype VT.
- // TODO - add peekThroughOneUseBitcasts support.
SDValue Src = N.getOperand(0);
SDValue Sub = N.getOperand(1);
EVT SubVT = Sub.getValueType();
@@ -6568,28 +6769,57 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
if (!isa<ConstantSDNode>(N.getOperand(2)) ||
!N->isOnlyUserOf(Sub.getNode()))
return false;
+ uint64_t InsertIdx = N.getConstantOperandVal(2);
+ // Handle INSERT_SUBVECTOR(SRC0, EXTRACT_SUBVECTOR(SRC1)).
+ if (Sub.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ Sub.getOperand(0).getValueType() == VT &&
+ isa<ConstantSDNode>(Sub.getOperand(1))) {
+ uint64_t ExtractIdx = Sub.getConstantOperandVal(1);
+ for (int i = 0; i != (int)NumElts; ++i)
+ Mask.push_back(i);
+ for (int i = 0; i != (int)NumSubElts; ++i)
+ Mask[InsertIdx + i] = NumElts + ExtractIdx + i;
+ Ops.push_back(Src);
+ Ops.push_back(Sub.getOperand(0));
+ return true;
+ }
+ // Handle INSERT_SUBVECTOR(SRC0, SHUFFLE(SRC1)).
SmallVector<int, 64> SubMask;
SmallVector<SDValue, 2> SubInputs;
- if (!resolveTargetShuffleInputs(Sub, SubInputs, SubMask, DAG) ||
- SubMask.size() != NumSubElts)
+ if (!resolveTargetShuffleInputs(peekThroughOneUseBitcasts(Sub), SubInputs,
+ SubMask, DAG))
return false;
+ if (SubMask.size() != NumSubElts) {
+ assert(((SubMask.size() % NumSubElts) == 0 ||
+ (NumSubElts % SubMask.size()) == 0) && "Illegal submask scale");
+ if ((NumSubElts % SubMask.size()) == 0) {
+ int Scale = NumSubElts / SubMask.size();
+ SmallVector<int,64> ScaledSubMask;
+ scaleShuffleMask<int>(Scale, SubMask, ScaledSubMask);
+ SubMask = ScaledSubMask;
+ } else {
+ int Scale = SubMask.size() / NumSubElts;
+ NumSubElts = SubMask.size();
+ NumElts *= Scale;
+ InsertIdx *= Scale;
+ }
+ }
Ops.push_back(Src);
for (SDValue &SubInput : SubInputs) {
- if (SubInput.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
- SubInput.getOperand(0).getValueType() != VT ||
- !isa<ConstantSDNode>(SubInput.getOperand(1)))
- return false;
- Ops.push_back(SubInput.getOperand(0));
+ EVT SubSVT = SubInput.getValueType().getScalarType();
+ EVT AltVT = EVT::getVectorVT(*DAG.getContext(), SubSVT,
+ NumSizeInBits / SubSVT.getSizeInBits());
+ Ops.push_back(DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), AltVT,
+ DAG.getUNDEF(AltVT), SubInput,
+ DAG.getIntPtrConstant(0, SDLoc(N))));
}
- int InsertIdx = N.getConstantOperandVal(2);
for (int i = 0; i != (int)NumElts; ++i)
Mask.push_back(i);
for (int i = 0; i != (int)NumSubElts; ++i) {
int M = SubMask[i];
if (0 <= M) {
int InputIdx = M / NumSubElts;
- int ExtractIdx = SubInputs[InputIdx].getConstantOperandVal(1);
- M = (NumElts * (1 + InputIdx)) + ExtractIdx + (M % NumSubElts);
+ M = (NumElts * (1 + InputIdx)) + (M % NumSubElts);
}
Mask[i + InsertIdx] = M;
}
@@ -6674,16 +6904,21 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
N1.getValueType().getVectorNumElements() == (NumElts / 2) &&
"Unexpected input value type");
+ APInt EltsLHS, EltsRHS;
+ getPackDemandedElts(VT, DemandedElts, EltsLHS, EltsRHS);
+
// If we know input saturation won't happen we can treat this
// as a truncation shuffle.
if (Opcode == X86ISD::PACKSS) {
- if ((!N0.isUndef() && DAG.ComputeNumSignBits(N0) <= NumBitsPerElt) ||
- (!N1.isUndef() && DAG.ComputeNumSignBits(N1) <= NumBitsPerElt))
+ if ((!N0.isUndef() &&
+ DAG.ComputeNumSignBits(N0, EltsLHS) <= NumBitsPerElt) ||
+ (!N1.isUndef() &&
+ DAG.ComputeNumSignBits(N1, EltsRHS) <= NumBitsPerElt))
return false;
} else {
APInt ZeroMask = APInt::getHighBitsSet(2 * NumBitsPerElt, NumBitsPerElt);
- if ((!N0.isUndef() && !DAG.MaskedValueIsZero(N0, ZeroMask)) ||
- (!N1.isUndef() && !DAG.MaskedValueIsZero(N1, ZeroMask)))
+ if ((!N0.isUndef() && !DAG.MaskedValueIsZero(N0, ZeroMask, EltsLHS)) ||
+ (!N1.isUndef() && !DAG.MaskedValueIsZero(N1, ZeroMask, EltsRHS)))
return false;
}
@@ -6728,15 +6963,54 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
}
return true;
}
- case ISD::ZERO_EXTEND_VECTOR_INREG:
- case ISD::ZERO_EXTEND: {
- // TODO - add support for VPMOVZX with smaller input vector types.
+ case X86ISD::VBROADCAST: {
SDValue Src = N.getOperand(0);
MVT SrcVT = Src.getSimpleValueType();
- if (NumSizeInBits != SrcVT.getSizeInBits())
- break;
- DecodeZeroExtendMask(SrcVT.getScalarSizeInBits(), NumBitsPerElt, NumElts,
+ if (!SrcVT.isVector())
+ return false;
+
+ if (NumSizeInBits != SrcVT.getSizeInBits()) {
+ assert((NumSizeInBits % SrcVT.getSizeInBits()) == 0 &&
+ "Illegal broadcast type");
+ SrcVT = MVT::getVectorVT(SrcVT.getScalarType(),
+ NumSizeInBits / SrcVT.getScalarSizeInBits());
+ Src = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), SrcVT,
+ DAG.getUNDEF(SrcVT), Src,
+ DAG.getIntPtrConstant(0, SDLoc(N)));
+ }
+
+ Ops.push_back(Src);
+ Mask.append(NumElts, 0);
+ return true;
+ }
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND:
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ case ISD::ANY_EXTEND_VECTOR_INREG: {
+ SDValue Src = N.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+
+ // Extended source must be a simple vector.
+ if (!SrcVT.isSimple() || (SrcVT.getSizeInBits() % 128) != 0 ||
+ (SrcVT.getScalarSizeInBits() % 8) != 0)
+ return false;
+
+ unsigned NumSrcBitsPerElt = SrcVT.getScalarSizeInBits();
+ bool IsAnyExtend =
+ (ISD::ANY_EXTEND == Opcode || ISD::ANY_EXTEND_VECTOR_INREG == Opcode);
+ DecodeZeroExtendMask(NumSrcBitsPerElt, NumBitsPerElt, NumElts, IsAnyExtend,
Mask);
+
+ if (NumSizeInBits != SrcVT.getSizeInBits()) {
+ assert((NumSizeInBits % SrcVT.getSizeInBits()) == 0 &&
+ "Illegal zero-extension type");
+ SrcVT = MVT::getVectorVT(SrcVT.getSimpleVT().getScalarType(),
+ NumSizeInBits / NumSrcBitsPerElt);
+ Src = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), SrcVT,
+ DAG.getUNDEF(SrcVT), Src,
+ DAG.getIntPtrConstant(0, SDLoc(N)));
+ }
+
Ops.push_back(Src);
return true;
}
@@ -6745,7 +7019,7 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
return false;
}
-/// Removes unused shuffle source inputs and adjusts the shuffle mask accordingly.
+/// Removes unused/repeated shuffle source inputs and adjusts the shuffle mask.
static void resolveTargetShuffleInputsAndMask(SmallVectorImpl<SDValue> &Inputs,
SmallVectorImpl<int> &Mask) {
int MaskWidth = Mask.size();
@@ -6761,13 +7035,28 @@ static void resolveTargetShuffleInputsAndMask(SmallVectorImpl<SDValue> &Inputs,
M = SM_SentinelUndef;
// Check for unused inputs.
- if (any_of(Mask, [lo, hi](int i) { return (lo <= i) && (i < hi); })) {
- UsedInputs.push_back(Inputs[i]);
+ if (none_of(Mask, [lo, hi](int i) { return (lo <= i) && (i < hi); })) {
+ for (int &M : Mask)
+ if (lo <= M)
+ M -= MaskWidth;
continue;
}
- for (int &M : Mask)
- if (lo <= M)
- M -= MaskWidth;
+
+ // Check for repeated inputs.
+ bool IsRepeat = false;
+ for (int j = 0, ue = UsedInputs.size(); j != ue; ++j) {
+ if (UsedInputs[j] != Inputs[i])
+ continue;
+ for (int &M : Mask)
+ if (lo <= M)
+ M = (M < hi) ? ((M - lo) + (j * MaskWidth)) : (M - MaskWidth);
+ IsRepeat = true;
+ break;
+ }
+ if (IsRepeat)
+ continue;
+
+ UsedInputs.push_back(Inputs[i]);
}
Inputs = UsedInputs;
}
@@ -6780,9 +7069,11 @@ static void resolveTargetShuffleInputsAndMask(SmallVectorImpl<SDValue> &Inputs,
static bool resolveTargetShuffleInputs(SDValue Op,
SmallVectorImpl<SDValue> &Inputs,
SmallVectorImpl<int> &Mask,
- const SelectionDAG &DAG) {
+ SelectionDAG &DAG) {
+ unsigned NumElts = Op.getValueType().getVectorNumElements();
+ APInt DemandedElts = APInt::getAllOnesValue(NumElts);
if (!setTargetShuffleZeroElements(Op, Mask, Inputs))
- if (!getFauxShuffleMask(Op, Mask, Inputs, DAG))
+ if (!getFauxShuffleMask(Op, DemandedElts, Mask, Inputs, DAG))
return false;
resolveTargetShuffleInputsAndMask(Inputs, Mask);
@@ -6838,6 +7129,28 @@ static SDValue getShuffleScalarElt(SDNode *N, unsigned Index, SelectionDAG &DAG,
Depth+1);
}
+ // Recurse into insert_subvector base/sub vector to find scalars.
+ if (Opcode == ISD::INSERT_SUBVECTOR &&
+ isa<ConstantSDNode>(N->getOperand(2))) {
+ SDValue Vec = N->getOperand(0);
+ SDValue Sub = N->getOperand(1);
+ EVT SubVT = Sub.getValueType();
+ unsigned NumSubElts = SubVT.getVectorNumElements();
+ uint64_t SubIdx = N->getConstantOperandVal(2);
+
+ if (SubIdx <= Index && Index < (SubIdx + NumSubElts))
+ return getShuffleScalarElt(Sub.getNode(), Index - SubIdx, DAG, Depth + 1);
+ return getShuffleScalarElt(Vec.getNode(), Index, DAG, Depth + 1);
+ }
+
+ // Recurse into extract_subvector src vector to find scalars.
+ if (Opcode == ISD::EXTRACT_SUBVECTOR &&
+ isa<ConstantSDNode>(N->getOperand(1))) {
+ SDValue Src = N->getOperand(0);
+ uint64_t SrcIdx = N->getConstantOperandVal(1);
+ return getShuffleScalarElt(Src.getNode(), Index + SrcIdx, DAG, Depth + 1);
+ }
+
// Actual nodes that may contain scalar elements
if (Opcode == ISD::BITCAST) {
V = V.getOperand(0);
@@ -6880,7 +7193,7 @@ static SDValue LowerBuildVectorAsInsert(SDValue Op, unsigned NonZeros,
// If the build vector contains zeros or our first insertion is not the
// first index then insert into zero vector to break any register
- // dependency else use SCALAR_TO_VECTOR/VZEXT_MOVL.
+ // dependency else use SCALAR_TO_VECTOR.
if (First) {
First = false;
if (NumZero || 0 != i)
@@ -6889,7 +7202,6 @@ static SDValue LowerBuildVectorAsInsert(SDValue Op, unsigned NonZeros,
assert(0 == i && "Expected insertion into zero-index");
V = DAG.getAnyExtOrTrunc(Op.getOperand(i), dl, MVT::i32);
V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, V);
- V = DAG.getNode(X86ISD::VZEXT_MOVL, dl, MVT::v4i32, V);
V = DAG.getBitcast(VT, V);
continue;
}
@@ -6916,50 +7228,51 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
SDLoc dl(Op);
SDValue V;
- bool First = true;
// Pre-SSE4.1 - merge byte pairs and insert with PINSRW.
- for (unsigned i = 0; i < 16; ++i) {
+ for (unsigned i = 0; i < 16; i += 2) {
bool ThisIsNonZero = (NonZeros & (1 << i)) != 0;
- if (ThisIsNonZero && First) {
- if (NumZero)
- V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl);
+ bool NextIsNonZero = (NonZeros & (1 << (i + 1))) != 0;
+ if (!ThisIsNonZero && !NextIsNonZero)
+ continue;
+
+ // FIXME: Investigate combining the first 4 bytes as a i32 instead.
+ SDValue Elt;
+ if (ThisIsNonZero) {
+ if (NumZero || NextIsNonZero)
+ Elt = DAG.getZExtOrTrunc(Op.getOperand(i), dl, MVT::i32);
else
- V = DAG.getUNDEF(MVT::v8i16);
- First = false;
+ Elt = DAG.getAnyExtOrTrunc(Op.getOperand(i), dl, MVT::i32);
}
- if ((i & 1) != 0) {
- // FIXME: Investigate extending to i32 instead of just i16.
- // FIXME: Investigate combining the first 4 bytes as a i32 instead.
- SDValue ThisElt, LastElt;
- bool LastIsNonZero = (NonZeros & (1 << (i - 1))) != 0;
- if (LastIsNonZero) {
- LastElt =
- DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Op.getOperand(i - 1));
- }
- if (ThisIsNonZero) {
- ThisElt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Op.getOperand(i));
- ThisElt = DAG.getNode(ISD::SHL, dl, MVT::i16, ThisElt,
- DAG.getConstant(8, dl, MVT::i8));
- if (LastIsNonZero)
- ThisElt = DAG.getNode(ISD::OR, dl, MVT::i16, ThisElt, LastElt);
- } else
- ThisElt = LastElt;
-
- if (ThisElt) {
- if (1 == i) {
- V = NumZero ? DAG.getZExtOrTrunc(ThisElt, dl, MVT::i32)
- : DAG.getAnyExtOrTrunc(ThisElt, dl, MVT::i32);
- V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, V);
- V = DAG.getNode(X86ISD::VZEXT_MOVL, dl, MVT::v4i32, V);
- V = DAG.getBitcast(MVT::v8i16, V);
- } else {
- V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, V, ThisElt,
- DAG.getIntPtrConstant(i / 2, dl));
- }
+ if (NextIsNonZero) {
+ SDValue NextElt = Op.getOperand(i + 1);
+ if (i == 0 && NumZero)
+ NextElt = DAG.getZExtOrTrunc(NextElt, dl, MVT::i32);
+ else
+ NextElt = DAG.getAnyExtOrTrunc(NextElt, dl, MVT::i32);
+ NextElt = DAG.getNode(ISD::SHL, dl, MVT::i32, NextElt,
+ DAG.getConstant(8, dl, MVT::i8));
+ if (ThisIsNonZero)
+ Elt = DAG.getNode(ISD::OR, dl, MVT::i32, NextElt, Elt);
+ else
+ Elt = NextElt;
+ }
+
+ // If our first insertion is not the first index then insert into zero
+ // vector to break any register dependency else use SCALAR_TO_VECTOR.
+ if (!V) {
+ if (i != 0)
+ V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl);
+ else {
+ V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Elt);
+ V = DAG.getBitcast(MVT::v8i16, V);
+ continue;
}
}
+ Elt = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, Elt);
+ V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, V, Elt,
+ DAG.getIntPtrConstant(i / 2, dl));
}
return DAG.getBitcast(MVT::v16i8, V);
@@ -7002,9 +7315,10 @@ static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG,
}
// Find all zeroable elements.
- std::bitset<4> Zeroable;
- for (int i=0; i < 4; ++i) {
- SDValue Elt = Op->getOperand(i);
+ std::bitset<4> Zeroable, Undefs;
+ for (int i = 0; i < 4; ++i) {
+ SDValue Elt = Op.getOperand(i);
+ Undefs[i] = Elt.isUndef();
Zeroable[i] = (Elt.isUndef() || X86::isZeroNode(Elt));
}
assert(Zeroable.size() - Zeroable.count() > 1 &&
@@ -7014,10 +7328,10 @@ static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG,
// zeroable or extract_vector_elt with constant index.
SDValue FirstNonZero;
unsigned FirstNonZeroIdx;
- for (unsigned i=0; i < 4; ++i) {
+ for (unsigned i = 0; i < 4; ++i) {
if (Zeroable[i])
continue;
- SDValue Elt = Op->getOperand(i);
+ SDValue Elt = Op.getOperand(i);
if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
!isa<ConstantSDNode>(Elt.getOperand(1)))
return SDValue();
@@ -7056,10 +7370,12 @@ static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG,
if (EltIdx == 4) {
// Let the shuffle legalizer deal with blend operations.
- SDValue VZero = getZeroVector(VT, Subtarget, DAG, SDLoc(Op));
+ SDValue VZeroOrUndef = (Zeroable == Undefs)
+ ? DAG.getUNDEF(VT)
+ : getZeroVector(VT, Subtarget, DAG, SDLoc(Op));
if (V1.getSimpleValueType() != VT)
V1 = DAG.getBitcast(VT, V1);
- return DAG.getVectorShuffle(VT, SDLoc(V1), V1, VZero, Mask);
+ return DAG.getVectorShuffle(VT, SDLoc(V1), V1, VZeroOrUndef, Mask);
}
// See if we can lower this build_vector to a INSERTPS.
@@ -7079,7 +7395,7 @@ static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG,
SDValue SrcVector = Current->getOperand(0);
if (!V1.getNode())
V1 = SrcVector;
- CanFold = (SrcVector == V1) && (Current.getConstantOperandVal(1) == i);
+ CanFold = (SrcVector == V1) && (Current.getConstantOperandAPInt(1) == i);
}
if (!CanFold)
@@ -7200,9 +7516,11 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
unsigned NumElems = Elts.size();
int LastLoadedElt = -1;
- SmallBitVector LoadMask(NumElems, false);
- SmallBitVector ZeroMask(NumElems, false);
- SmallBitVector UndefMask(NumElems, false);
+ APInt LoadMask = APInt::getNullValue(NumElems);
+ APInt ZeroMask = APInt::getNullValue(NumElems);
+ APInt UndefMask = APInt::getNullValue(NumElems);
+
+ SmallVector<LoadSDNode*, 8> Loads(NumElems, nullptr);
// For each element in the initializer, see if we've found a load, zero or an
// undef.
@@ -7210,38 +7528,52 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
SDValue Elt = peekThroughBitcasts(Elts[i]);
if (!Elt.getNode())
return SDValue();
+ if (Elt.isUndef()) {
+ UndefMask.setBit(i);
+ continue;
+ }
+ if (X86::isZeroNode(Elt) || ISD::isBuildVectorAllZeros(Elt.getNode())) {
+ ZeroMask.setBit(i);
+ continue;
+ }
+
+ // Each loaded element must be the correct fractional portion of the
+ // requested vector load.
+ if ((NumElems * Elt.getValueSizeInBits()) != VT.getSizeInBits())
+ return SDValue();
- if (Elt.isUndef())
- UndefMask[i] = true;
- else if (X86::isZeroNode(Elt) || ISD::isBuildVectorAllZeros(Elt.getNode()))
- ZeroMask[i] = true;
- else if (ISD::isNON_EXTLoad(Elt.getNode())) {
- LoadMask[i] = true;
- LastLoadedElt = i;
- // Each loaded element must be the correct fractional portion of the
- // requested vector load.
- if ((NumElems * Elt.getValueSizeInBits()) != VT.getSizeInBits())
- return SDValue();
- } else
+ if (!ISD::isNON_EXTLoad(Elt.getNode()))
return SDValue();
+
+ Loads[i] = cast<LoadSDNode>(Elt);
+ LoadMask.setBit(i);
+ LastLoadedElt = i;
}
- assert((ZeroMask | UndefMask | LoadMask).count() == NumElems &&
+ assert((ZeroMask.countPopulation() + UndefMask.countPopulation() +
+ LoadMask.countPopulation()) == NumElems &&
"Incomplete element masks");
// Handle Special Cases - all undef or undef/zero.
- if (UndefMask.count() == NumElems)
+ if (UndefMask.countPopulation() == NumElems)
return DAG.getUNDEF(VT);
// FIXME: Should we return this as a BUILD_VECTOR instead?
- if ((ZeroMask | UndefMask).count() == NumElems)
+ if ((ZeroMask.countPopulation() + UndefMask.countPopulation()) == NumElems)
return VT.isInteger() ? DAG.getConstant(0, DL, VT)
: DAG.getConstantFP(0.0, DL, VT);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- int FirstLoadedElt = LoadMask.find_first();
+ int FirstLoadedElt = LoadMask.countTrailingZeros();
SDValue EltBase = peekThroughBitcasts(Elts[FirstLoadedElt]);
- LoadSDNode *LDBase = cast<LoadSDNode>(EltBase);
- EVT LDBaseVT = EltBase.getValueType();
+ EVT EltBaseVT = EltBase.getValueType();
+ assert(EltBaseVT.getSizeInBits() == EltBaseVT.getStoreSizeInBits() &&
+ "Register/Memory size mismatch");
+ LoadSDNode *LDBase = Loads[FirstLoadedElt];
+ assert(LDBase && "Did not find base load for merging consecutive loads");
+ unsigned BaseSizeInBits = EltBaseVT.getStoreSizeInBits();
+ unsigned BaseSizeInBytes = BaseSizeInBits / 8;
+ int LoadSizeInBits = (1 + LastLoadedElt - FirstLoadedElt) * BaseSizeInBits;
+ assert((BaseSizeInBits % 8) == 0 && "Sub-byte element loads detected");
// Consecutive loads can contain UNDEFS but not ZERO elements.
// Consecutive loads with UNDEFs and ZEROs elements require a
@@ -7250,11 +7582,8 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
bool IsConsecutiveLoadWithZeros = true;
for (int i = FirstLoadedElt + 1; i <= LastLoadedElt; ++i) {
if (LoadMask[i]) {
- SDValue Elt = peekThroughBitcasts(Elts[i]);
- LoadSDNode *LD = cast<LoadSDNode>(Elt);
- if (!DAG.areNonVolatileConsecutiveLoads(
- LD, LDBase, Elt.getValueType().getStoreSizeInBits() / 8,
- i - FirstLoadedElt)) {
+ if (!DAG.areNonVolatileConsecutiveLoads(Loads[i], LDBase, BaseSizeInBytes,
+ i - FirstLoadedElt)) {
IsConsecutiveLoad = false;
IsConsecutiveLoadWithZeros = false;
break;
@@ -7264,11 +7593,6 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
}
}
- SmallVector<LoadSDNode *, 8> Loads;
- for (int i = FirstLoadedElt; i <= LastLoadedElt; ++i)
- if (LoadMask[i])
- Loads.push_back(cast<LoadSDNode>(peekThroughBitcasts(Elts[i])));
-
auto CreateLoad = [&DAG, &DL, &Loads](EVT VT, LoadSDNode *LDBase) {
auto MMOFlags = LDBase->getMemOperand()->getFlags();
assert(!(MMOFlags & MachineMemOperand::MOVolatile) &&
@@ -7277,23 +7601,23 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(),
LDBase->getPointerInfo(), LDBase->getAlignment(), MMOFlags);
for (auto *LD : Loads)
- DAG.makeEquivalentMemoryOrdering(LD, NewLd);
+ if (LD)
+ DAG.makeEquivalentMemoryOrdering(LD, NewLd);
return NewLd;
};
- // LOAD - all consecutive load/undefs (must start/end with a load).
- // If we have found an entire vector of loads and undefs, then return a large
- // load of the entire vector width starting at the base pointer.
- // If the vector contains zeros, then attempt to shuffle those elements.
- if (FirstLoadedElt == 0 && LastLoadedElt == (int)(NumElems - 1) &&
+ // Check if the base load is entirely dereferenceable.
+ bool IsDereferenceable = LDBase->getPointerInfo().isDereferenceable(
+ VT.getSizeInBits() / 8, *DAG.getContext(), DAG.getDataLayout());
+
+ // LOAD - all consecutive load/undefs (must start/end with a load or be
+ // entirely dereferenceable). If we have found an entire vector of loads and
+ // undefs, then return a large load of the entire vector width starting at the
+ // base pointer. If the vector contains zeros, then attempt to shuffle those
+ // elements.
+ if (FirstLoadedElt == 0 &&
+ (LastLoadedElt == (int)(NumElems - 1) || IsDereferenceable) &&
(IsConsecutiveLoad || IsConsecutiveLoadWithZeros)) {
- assert(LDBase && "Did not find base load for merging consecutive loads");
- EVT EltVT = LDBase->getValueType(0);
- // Ensure that the input vector size for the merged loads matches the
- // cumulative size of the input elements.
- if (VT.getSizeInBits() != EltVT.getSizeInBits() * NumElems)
- return SDValue();
-
if (isAfterLegalize && !TLI.isOperationLegal(ISD::LOAD, VT))
return SDValue();
@@ -7303,12 +7627,15 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
VT.is256BitVector() && !Subtarget.hasInt256())
return SDValue();
- if (IsConsecutiveLoad)
+ if (NumElems == 1)
+ return DAG.getBitcast(VT, Elts[FirstLoadedElt]);
+
+ if (!ZeroMask)
return CreateLoad(VT, LDBase);
// IsConsecutiveLoadWithZeros - we need to create a shuffle of the loaded
// vector and a zero vector to clear out the zero elements.
- if (!isAfterLegalize && NumElems == VT.getVectorNumElements()) {
+ if (!isAfterLegalize && VT.isVector()) {
SmallVector<int, 4> ClearMask(NumElems, -1);
for (unsigned i = 0; i < NumElems; ++i) {
if (ZeroMask[i])
@@ -7323,16 +7650,28 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
}
}
- int LoadSize =
- (1 + LastLoadedElt - FirstLoadedElt) * LDBaseVT.getStoreSizeInBits();
+ // If the upper half of a ymm/zmm load is undef then just load the lower half.
+ if (VT.is256BitVector() || VT.is512BitVector()) {
+ unsigned HalfNumElems = NumElems / 2;
+ if (UndefMask.extractBits(HalfNumElems, HalfNumElems).isAllOnesValue()) {
+ EVT HalfVT =
+ EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(), HalfNumElems);
+ SDValue HalfLD =
+ EltsFromConsecutiveLoads(HalfVT, Elts.drop_back(HalfNumElems), DL,
+ DAG, Subtarget, isAfterLegalize);
+ if (HalfLD)
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT),
+ HalfLD, DAG.getIntPtrConstant(0, DL));
+ }
+ }
// VZEXT_LOAD - consecutive 32/64-bit load/undefs followed by zeros/undefs.
if (IsConsecutiveLoad && FirstLoadedElt == 0 &&
- (LoadSize == 32 || LoadSize == 64) &&
+ (LoadSizeInBits == 32 || LoadSizeInBits == 64) &&
((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()))) {
- MVT VecSVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(LoadSize)
- : MVT::getIntegerVT(LoadSize);
- MVT VecVT = MVT::getVectorVT(VecSVT, VT.getSizeInBits() / LoadSize);
+ MVT VecSVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(LoadSizeInBits)
+ : MVT::getIntegerVT(LoadSizeInBits);
+ MVT VecVT = MVT::getVectorVT(VecSVT, VT.getSizeInBits() / LoadSizeInBits);
if (TLI.isTypeLegal(VecVT)) {
SDVTList Tys = DAG.getVTList(VecVT, MVT::Other);
SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() };
@@ -7342,14 +7681,85 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
LDBase->getAlignment(),
MachineMemOperand::MOLoad);
for (auto *LD : Loads)
- DAG.makeEquivalentMemoryOrdering(LD, ResNode);
+ if (LD)
+ DAG.makeEquivalentMemoryOrdering(LD, ResNode);
return DAG.getBitcast(VT, ResNode);
}
}
+ // BROADCAST - match the smallest possible repetition pattern, load that
+ // scalar/subvector element and then broadcast to the entire vector.
+ if (ZeroMask.isNullValue() && isPowerOf2_32(NumElems) && Subtarget.hasAVX() &&
+ (VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector())) {
+ for (unsigned SubElems = 1; SubElems < NumElems; SubElems *= 2) {
+ unsigned RepeatSize = SubElems * BaseSizeInBits;
+ unsigned ScalarSize = std::min(RepeatSize, 64u);
+ if (!Subtarget.hasAVX2() && ScalarSize < 32)
+ continue;
+
+ bool Match = true;
+ SmallVector<SDValue, 8> RepeatedLoads(SubElems, DAG.getUNDEF(EltBaseVT));
+ for (unsigned i = 0; i != NumElems && Match; ++i) {
+ if (!LoadMask[i])
+ continue;
+ SDValue Elt = peekThroughBitcasts(Elts[i]);
+ if (RepeatedLoads[i % SubElems].isUndef())
+ RepeatedLoads[i % SubElems] = Elt;
+ else
+ Match &= (RepeatedLoads[i % SubElems] == Elt);
+ }
+
+ // We must have loads at both ends of the repetition.
+ Match &= !RepeatedLoads.front().isUndef();
+ Match &= !RepeatedLoads.back().isUndef();
+ if (!Match)
+ continue;
+
+ EVT RepeatVT =
+ VT.isInteger() && (RepeatSize != 64 || TLI.isTypeLegal(MVT::i64))
+ ? EVT::getIntegerVT(*DAG.getContext(), ScalarSize)
+ : EVT::getFloatingPointVT(ScalarSize);
+ if (RepeatSize > ScalarSize)
+ RepeatVT = EVT::getVectorVT(*DAG.getContext(), RepeatVT,
+ RepeatSize / ScalarSize);
+ EVT BroadcastVT =
+ EVT::getVectorVT(*DAG.getContext(), RepeatVT.getScalarType(),
+ VT.getSizeInBits() / ScalarSize);
+ if (TLI.isTypeLegal(BroadcastVT)) {
+ if (SDValue RepeatLoad = EltsFromConsecutiveLoads(
+ RepeatVT, RepeatedLoads, DL, DAG, Subtarget, isAfterLegalize)) {
+ unsigned Opcode = RepeatSize > ScalarSize ? X86ISD::SUBV_BROADCAST
+ : X86ISD::VBROADCAST;
+ SDValue Broadcast = DAG.getNode(Opcode, DL, BroadcastVT, RepeatLoad);
+ return DAG.getBitcast(VT, Broadcast);
+ }
+ }
+ }
+ }
+
return SDValue();
}
+// Combine a vector ops (shuffles etc.) that is equal to build_vector load1,
+// load2, load3, load4, <0, 1, 2, 3> into a vector load if the load addresses
+// are consecutive, non-overlapping, and in the right order.
+static SDValue combineToConsecutiveLoads(EVT VT, SDNode *N, const SDLoc &DL,
+ SelectionDAG &DAG,
+ const X86Subtarget &Subtarget,
+ bool isAfterLegalize) {
+ SmallVector<SDValue, 64> Elts;
+ for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
+ if (SDValue Elt = getShuffleScalarElt(N, i, DAG, 0)) {
+ Elts.push_back(Elt);
+ continue;
+ }
+ return SDValue();
+ }
+ assert(Elts.size() == VT.getVectorNumElements());
+ return EltsFromConsecutiveLoads(VT, Elts, DL, DAG, Subtarget,
+ isAfterLegalize);
+}
+
static Constant *getConstantVector(MVT VT, const APInt &SplatValue,
unsigned SplatBitSize, LLVMContext &C) {
unsigned ScalarSize = VT.getScalarSizeInBits();
@@ -7373,12 +7783,20 @@ static Constant *getConstantVector(MVT VT, const APInt &SplatValue,
return ConstantVector::get(ArrayRef<Constant *>(ConstantVec));
}
-static bool isUseOfShuffle(SDNode *N) {
+static bool isFoldableUseOfShuffle(SDNode *N) {
for (auto *U : N->uses()) {
- if (isTargetShuffle(U->getOpcode()))
+ unsigned Opc = U->getOpcode();
+ // VPERMV/VPERMV3 shuffles can never fold their index operands.
+ if (Opc == X86ISD::VPERMV && U->getOperand(0).getNode() == N)
+ return false;
+ if (Opc == X86ISD::VPERMV3 && U->getOperand(1).getNode() == N)
+ return false;
+ if (isTargetShuffle(Opc))
+ return true;
+ if (Opc == ISD::BITCAST) // Ignore bitcasts
+ return isFoldableUseOfShuffle(U);
+ if (N->hasOneUse())
return true;
- if (U->getOpcode() == ISD::BITCAST) // Ignore bitcasts
- return isUseOfShuffle(U);
}
return false;
}
@@ -7486,7 +7904,7 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp,
SplatBitSize < VT.getSizeInBits()) {
// Avoid replacing with broadcast when it's a use of a shuffle
// instruction to preserve the present custom lowering of shuffles.
- if (isUseOfShuffle(BVOp) || BVOp->hasOneUse())
+ if (isFoldableUseOfShuffle(BVOp))
return SDValue();
// replace BUILD_VECTOR with broadcast of the repeated constants.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -7581,7 +7999,7 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp,
// TODO: If multiple splats are generated to load the same constant,
// it may be detrimental to overall size. There needs to be a way to detect
// that condition to know if this is truly a size win.
- bool OptForSize = DAG.getMachineFunction().getFunction().optForSize();
+ bool OptForSize = DAG.getMachineFunction().getFunction().hasOptSize();
// Handle broadcasting a single constant scalar from the constant pool
// into a vector.
@@ -8330,6 +8748,22 @@ static SDValue getHopForBuildVector(const BuildVectorSDNode *BV,
else if (V1.getValueSizeInBits() < Width)
V1 = insertSubVector(DAG.getUNDEF(VT), V1, 0, DAG, SDLoc(BV), Width);
+ unsigned NumElts = VT.getVectorNumElements();
+ APInt DemandedElts = APInt::getAllOnesValue(NumElts);
+ for (unsigned i = 0; i != NumElts; ++i)
+ if (BV->getOperand(i).isUndef())
+ DemandedElts.clearBit(i);
+
+ // If we don't need the upper xmm, then perform as a xmm hop.
+ unsigned HalfNumElts = NumElts / 2;
+ if (VT.is256BitVector() && DemandedElts.lshr(HalfNumElts) == 0) {
+ MVT HalfVT = MVT::getVectorVT(VT.getScalarType(), HalfNumElts);
+ V0 = extractSubVector(V0, 0, DAG, SDLoc(BV), 128);
+ V1 = extractSubVector(V1, 0, DAG, SDLoc(BV), 128);
+ SDValue Half = DAG.getNode(HOpcode, SDLoc(BV), HalfVT, V0, V1);
+ return insertSubVector(DAG.getUNDEF(VT), Half, 0, DAG, SDLoc(BV), 256);
+ }
+
return DAG.getNode(HOpcode, SDLoc(BV), VT, V0, V1);
}
@@ -8338,11 +8772,8 @@ static SDValue LowerToHorizontalOp(const BuildVectorSDNode *BV,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
// We need at least 2 non-undef elements to make this worthwhile by default.
- unsigned NumNonUndefs = 0;
- for (const SDValue &V : BV->op_values())
- if (!V.isUndef())
- ++NumNonUndefs;
-
+ unsigned NumNonUndefs =
+ count_if(BV->op_values(), [](SDValue V) { return !V.isUndef(); });
if (NumNonUndefs < 2)
return SDValue();
@@ -8350,23 +8781,15 @@ static SDValue LowerToHorizontalOp(const BuildVectorSDNode *BV,
// int/FP at 128-bit/256-bit. Each type was introduced with a different
// subtarget feature. Try to match those "native" patterns first.
MVT VT = BV->getSimpleValueType(0);
- unsigned HOpcode;
- SDValue V0, V1;
- if ((VT == MVT::v4f32 || VT == MVT::v2f64) && Subtarget.hasSSE3())
- if (isHopBuildVector(BV, DAG, HOpcode, V0, V1))
- return getHopForBuildVector(BV, DAG, HOpcode, V0, V1);
-
- if ((VT == MVT::v8i16 || VT == MVT::v4i32) && Subtarget.hasSSSE3())
- if (isHopBuildVector(BV, DAG, HOpcode, V0, V1))
- return getHopForBuildVector(BV, DAG, HOpcode, V0, V1);
-
- if ((VT == MVT::v8f32 || VT == MVT::v4f64) && Subtarget.hasAVX())
- if (isHopBuildVector(BV, DAG, HOpcode, V0, V1))
- return getHopForBuildVector(BV, DAG, HOpcode, V0, V1);
-
- if ((VT == MVT::v16i16 || VT == MVT::v8i32) && Subtarget.hasAVX2())
+ if (((VT == MVT::v4f32 || VT == MVT::v2f64) && Subtarget.hasSSE3()) ||
+ ((VT == MVT::v8i16 || VT == MVT::v4i32) && Subtarget.hasSSSE3()) ||
+ ((VT == MVT::v8f32 || VT == MVT::v4f64) && Subtarget.hasAVX()) ||
+ ((VT == MVT::v16i16 || VT == MVT::v8i32) && Subtarget.hasAVX2())) {
+ unsigned HOpcode;
+ SDValue V0, V1;
if (isHopBuildVector(BV, DAG, HOpcode, V0, V1))
return getHopForBuildVector(BV, DAG, HOpcode, V0, V1);
+ }
// Try harder to match 256-bit ops by using extract/concat.
if (!Subtarget.hasAVX() || !VT.is256BitVector())
@@ -8481,9 +8904,15 @@ static SDValue lowerBuildVectorToBitOp(BuildVectorSDNode *Op,
return SDValue();
// TODO: We may be able to add support for other Ops (ADD/SUB + shifts).
+ bool IsShift = false;
switch (Opcode) {
default:
return SDValue();
+ case ISD::SHL:
+ case ISD::SRL:
+ case ISD::SRA:
+ IsShift = true;
+ break;
case ISD::AND:
case ISD::XOR:
case ISD::OR:
@@ -8504,10 +8933,24 @@ static SDValue lowerBuildVectorToBitOp(BuildVectorSDNode *Op,
// We expect the canonicalized RHS operand to be the constant.
if (!isa<ConstantSDNode>(RHS))
return SDValue();
+
+ // Extend shift amounts.
+ if (RHS.getValueSizeInBits() != VT.getScalarSizeInBits()) {
+ if (!IsShift)
+ return SDValue();
+ RHS = DAG.getZExtOrTrunc(RHS, DL, VT.getScalarType());
+ }
+
LHSElts.push_back(LHS);
RHSElts.push_back(RHS);
}
+ // Limit to shifts by uniform immediates.
+ // TODO: Only accept vXi8/vXi64 special cases?
+ // TODO: Permit non-uniform XOP/AVX2/MULLO cases?
+ if (IsShift && any_of(RHSElts, [&](SDValue V) { return RHSElts[0] != V; }))
+ return SDValue();
+
SDValue LHS = DAG.getBuildVector(VT, DL, LHSElts);
SDValue RHS = DAG.getBuildVector(VT, DL, RHSElts);
return DAG.getNode(Opcode, DL, VT, LHS, RHS);
@@ -9288,60 +9731,9 @@ static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG,
return Vec;
}
-// Return true if all the operands of the given CONCAT_VECTORS node are zeros
-// except for the first one. (CONCAT_VECTORS Op, 0, 0,...,0)
-static bool isExpandWithZeros(const SDValue &Op) {
- assert(Op.getOpcode() == ISD::CONCAT_VECTORS &&
- "Expand with zeros only possible in CONCAT_VECTORS nodes!");
-
- for (unsigned i = 1; i < Op.getNumOperands(); i++)
- if (!ISD::isBuildVectorAllZeros(Op.getOperand(i).getNode()))
- return false;
-
- return true;
-}
-
// Returns true if the given node is a type promotion (by concatenating i1
// zeros) of the result of a node that already zeros all upper bits of
// k-register.
-static SDValue isTypePromotionOfi1ZeroUpBits(SDValue Op) {
- unsigned Opc = Op.getOpcode();
-
- assert(Opc == ISD::CONCAT_VECTORS &&
- Op.getSimpleValueType().getVectorElementType() == MVT::i1 &&
- "Unexpected node to check for type promotion!");
-
- // As long as we are concatenating zeros to the upper part of a previous node
- // result, climb up the tree until a node with different opcode is
- // encountered
- while (Opc == ISD::INSERT_SUBVECTOR || Opc == ISD::CONCAT_VECTORS) {
- if (Opc == ISD::INSERT_SUBVECTOR) {
- if (ISD::isBuildVectorAllZeros(Op.getOperand(0).getNode()) &&
- Op.getConstantOperandVal(2) == 0)
- Op = Op.getOperand(1);
- else
- return SDValue();
- } else { // Opc == ISD::CONCAT_VECTORS
- if (isExpandWithZeros(Op))
- Op = Op.getOperand(0);
- else
- return SDValue();
- }
- Opc = Op.getOpcode();
- }
-
- // Check if the first inserted node zeroes the upper bits, or an 'and' result
- // of a node that zeros the upper bits (its masked version).
- if (isMaskedZeroUpperBitsvXi1(Op.getOpcode()) ||
- (Op.getOpcode() == ISD::AND &&
- (isMaskedZeroUpperBitsvXi1(Op.getOperand(0).getOpcode()) ||
- isMaskedZeroUpperBitsvXi1(Op.getOperand(1).getOpcode())))) {
- return Op;
- }
-
- return SDValue();
-}
-
// TODO: Merge this with LowerAVXCONCAT_VECTORS?
static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op,
const X86Subtarget &Subtarget,
@@ -9353,13 +9745,6 @@ static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op,
assert(NumOperands > 1 && isPowerOf2_32(NumOperands) &&
"Unexpected number of operands in CONCAT_VECTORS");
- // If this node promotes - by concatenating zeroes - the type of the result
- // of a node with instruction that zeroes all upper (irrelevant) bits of the
- // output register, mark it as legal and catch the pattern in instruction
- // selection to avoid emitting extra instructions (for zeroing upper bits).
- if (SDValue Promoted = isTypePromotionOfi1ZeroUpBits(Op))
- return widenSubVector(ResVT, Promoted, true, Subtarget, DAG, dl);
-
unsigned NumZero = 0;
unsigned NumNonZero = 0;
uint64_t NonZeros = 0;
@@ -9618,6 +10003,8 @@ static bool isTargetShuffleEquivalent(ArrayRef<int> Mask,
int Size = Mask.size();
if (Size != (int)ExpectedMask.size())
return false;
+ assert(isUndefOrZeroOrInRange(ExpectedMask, 0, 2 * Size) &&
+ "Illegal target shuffle mask");
for (int i = 0; i < Size; ++i)
if (Mask[i] == SM_SentinelUndef)
@@ -9687,6 +10074,40 @@ static bool isUnpackWdShuffleMask(ArrayRef<int> Mask, MVT VT) {
return IsUnpackwdMask;
}
+static bool is128BitUnpackShuffleMask(ArrayRef<int> Mask) {
+ // Create 128-bit vector type based on mask size.
+ MVT EltVT = MVT::getIntegerVT(128 / Mask.size());
+ MVT VT = MVT::getVectorVT(EltVT, Mask.size());
+
+ // We can't assume a canonical shuffle mask, so try the commuted version too.
+ SmallVector<int, 4> CommutedMask(Mask.begin(), Mask.end());
+ ShuffleVectorSDNode::commuteMask(CommutedMask);
+
+ // Match any of unary/binary or low/high.
+ for (unsigned i = 0; i != 4; ++i) {
+ SmallVector<int, 16> UnpackMask;
+ createUnpackShuffleMask(VT, UnpackMask, (i >> 1) % 2, i % 2);
+ if (isTargetShuffleEquivalent(Mask, UnpackMask) ||
+ isTargetShuffleEquivalent(CommutedMask, UnpackMask))
+ return true;
+ }
+ return false;
+}
+
+/// Return true if a shuffle mask chooses elements identically in its top and
+/// bottom halves. For example, any splat mask has the same top and bottom
+/// halves. If an element is undefined in only one half of the mask, the halves
+/// are not considered identical.
+static bool hasIdenticalHalvesShuffleMask(ArrayRef<int> Mask) {
+ assert(Mask.size() % 2 == 0 && "Expecting even number of elements in mask");
+ unsigned HalfSize = Mask.size() / 2;
+ for (unsigned i = 0; i != HalfSize; ++i) {
+ if (Mask[i] != Mask[i + HalfSize])
+ return false;
+ }
+ return true;
+}
+
/// Get a 4-lane 8-bit shuffle immediate for a mask.
///
/// This helper function produces an 8-bit shuffle immediate corresponding to
@@ -9826,12 +10247,11 @@ static bool isNonZeroElementsInOrder(const APInt &Zeroable,
}
/// Try to lower a shuffle with a single PSHUFB of V1 or V2.
-static SDValue lowerVectorShuffleWithPSHUFB(const SDLoc &DL, MVT VT,
- ArrayRef<int> Mask, SDValue V1,
- SDValue V2,
- const APInt &Zeroable,
- const X86Subtarget &Subtarget,
- SelectionDAG &DAG) {
+static SDValue lowerShuffleWithPSHUFB(const SDLoc &DL, MVT VT,
+ ArrayRef<int> Mask, SDValue V1,
+ SDValue V2, const APInt &Zeroable,
+ const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
int Size = Mask.size();
int LaneSize = 128 / VT.getScalarSizeInBits();
const int NumBytes = VT.getSizeInBits() / 8;
@@ -9885,11 +10305,11 @@ static SDValue getMaskNode(SDValue Mask, MVT MaskVT,
const SDLoc &dl);
// X86 has dedicated shuffle that can be lowered to VEXPAND
-static SDValue lowerVectorShuffleToEXPAND(const SDLoc &DL, MVT VT,
- const APInt &Zeroable,
- ArrayRef<int> Mask, SDValue &V1,
- SDValue &V2, SelectionDAG &DAG,
- const X86Subtarget &Subtarget) {
+static SDValue lowerShuffleToEXPAND(const SDLoc &DL, MVT VT,
+ const APInt &Zeroable,
+ ArrayRef<int> Mask, SDValue &V1,
+ SDValue &V2, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
bool IsLeftZeroSide = true;
if (!isNonZeroElementsInOrder(Zeroable, Mask, V1.getValueType(),
IsLeftZeroSide))
@@ -9905,9 +10325,7 @@ static SDValue lowerVectorShuffleToEXPAND(const SDLoc &DL, MVT VT,
Subtarget, DAG, DL);
SDValue ZeroVector = getZeroVector(VT, Subtarget, DAG, DL);
SDValue ExpandedVector = IsLeftZeroSide ? V2 : V1;
- return DAG.getSelect(DL, VT, VMask,
- DAG.getNode(X86ISD::EXPAND, DL, VT, ExpandedVector),
- ZeroVector);
+ return DAG.getNode(X86ISD::EXPAND, DL, VT, ExpandedVector, ZeroVector, VMask);
}
static bool matchVectorShuffleWithUNPCK(MVT VT, SDValue &V1, SDValue &V2,
@@ -9997,9 +10415,9 @@ static bool matchVectorShuffleWithUNPCK(MVT VT, SDValue &V1, SDValue &V2,
// X86 has dedicated unpack instructions that can handle specific blend
// operations: UNPCKH and UNPCKL.
-static SDValue lowerVectorShuffleWithUNPCK(const SDLoc &DL, MVT VT,
- ArrayRef<int> Mask, SDValue V1,
- SDValue V2, SelectionDAG &DAG) {
+static SDValue lowerShuffleWithUNPCK(const SDLoc &DL, MVT VT,
+ ArrayRef<int> Mask, SDValue V1, SDValue V2,
+ SelectionDAG &DAG) {
SmallVector<int, 8> Unpckl;
createUnpackShuffleMask(VT, Unpckl, /* Lo = */ true, /* Unary = */ false);
if (isShuffleEquivalent(V1, V2, Mask, Unpckl))
@@ -10061,10 +10479,10 @@ static bool matchVectorShuffleAsVPMOV(ArrayRef<int> Mask, bool SwappedOps,
//
// But when avx512vl is available, one can just use a single vpmovdw
// instruction.
-static SDValue lowerVectorShuffleWithVPMOV(const SDLoc &DL, ArrayRef<int> Mask,
- MVT VT, SDValue V1, SDValue V2,
- SelectionDAG &DAG,
- const X86Subtarget &Subtarget) {
+static SDValue lowerShuffleWithVPMOV(const SDLoc &DL, ArrayRef<int> Mask,
+ MVT VT, SDValue V1, SDValue V2,
+ SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
if (VT != MVT::v16i8 && VT != MVT::v8i16)
return SDValue();
@@ -10169,10 +10587,9 @@ static bool matchVectorShuffleWithPACK(MVT VT, MVT &SrcVT, SDValue &V1,
return false;
}
-static SDValue lowerVectorShuffleWithPACK(const SDLoc &DL, MVT VT,
- ArrayRef<int> Mask, SDValue V1,
- SDValue V2, SelectionDAG &DAG,
- const X86Subtarget &Subtarget) {
+static SDValue lowerShuffleWithPACK(const SDLoc &DL, MVT VT, ArrayRef<int> Mask,
+ SDValue V1, SDValue V2, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
MVT PackVT;
unsigned PackOpcode;
if (matchVectorShuffleWithPACK(VT, PackVT, V1, V2, PackOpcode, Mask, DAG,
@@ -10187,14 +10604,32 @@ static SDValue lowerVectorShuffleWithPACK(const SDLoc &DL, MVT VT,
///
/// This handles cases where we can model a blend exactly as a bitmask due to
/// one of the inputs being zeroable.
-static SDValue lowerVectorShuffleAsBitMask(const SDLoc &DL, MVT VT, SDValue V1,
- SDValue V2, ArrayRef<int> Mask,
- const APInt &Zeroable,
- SelectionDAG &DAG) {
- assert(!VT.isFloatingPoint() && "Floating point types are not supported");
+static SDValue lowerShuffleAsBitMask(const SDLoc &DL, MVT VT, SDValue V1,
+ SDValue V2, ArrayRef<int> Mask,
+ const APInt &Zeroable,
+ const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
+ MVT MaskVT = VT;
MVT EltVT = VT.getVectorElementType();
- SDValue Zero = DAG.getConstant(0, DL, EltVT);
- SDValue AllOnes = DAG.getAllOnesConstant(DL, EltVT);
+ SDValue Zero, AllOnes;
+ // Use f64 if i64 isn't legal.
+ if (EltVT == MVT::i64 && !Subtarget.is64Bit()) {
+ EltVT = MVT::f64;
+ MaskVT = MVT::getVectorVT(EltVT, Mask.size());
+ }
+
+ MVT LogicVT = VT;
+ if (EltVT == MVT::f32 || EltVT == MVT::f64) {
+ Zero = DAG.getConstantFP(0.0, DL, EltVT);
+ AllOnes = DAG.getConstantFP(
+ APFloat::getAllOnesValue(EltVT.getSizeInBits(), true), DL, EltVT);
+ LogicVT =
+ MVT::getVectorVT(EltVT == MVT::f64 ? MVT::i64 : MVT::i32, Mask.size());
+ } else {
+ Zero = DAG.getConstant(0, DL, EltVT);
+ AllOnes = DAG.getAllOnesConstant(DL, EltVT);
+ }
+
SmallVector<SDValue, 16> VMaskOps(Mask.size(), Zero);
SDValue V;
for (int i = 0, Size = Mask.size(); i < Size; ++i) {
@@ -10212,8 +10647,11 @@ static SDValue lowerVectorShuffleAsBitMask(const SDLoc &DL, MVT VT, SDValue V1,
if (!V)
return SDValue(); // No non-zeroable elements!
- SDValue VMask = DAG.getBuildVector(VT, DL, VMaskOps);
- return DAG.getNode(ISD::AND, DL, VT, V, VMask);
+ SDValue VMask = DAG.getBuildVector(MaskVT, DL, VMaskOps);
+ VMask = DAG.getBitcast(LogicVT, VMask);
+ V = DAG.getBitcast(LogicVT, V);
+ SDValue And = DAG.getNode(ISD::AND, DL, LogicVT, V, VMask);
+ return DAG.getBitcast(VT, And);
}
/// Try to emit a blend instruction for a shuffle using bit math.
@@ -10221,9 +10659,9 @@ static SDValue lowerVectorShuffleAsBitMask(const SDLoc &DL, MVT VT, SDValue V1,
/// This is used as a fallback approach when first class blend instructions are
/// unavailable. Currently it is only suitable for integer vectors, but could
/// be generalized for floating point vectors if desirable.
-static SDValue lowerVectorShuffleAsBitBlend(const SDLoc &DL, MVT VT, SDValue V1,
- SDValue V2, ArrayRef<int> Mask,
- SelectionDAG &DAG) {
+static SDValue lowerShuffleAsBitBlend(const SDLoc &DL, MVT VT, SDValue V1,
+ SDValue V2, ArrayRef<int> Mask,
+ SelectionDAG &DAG) {
assert(VT.isInteger() && "Only supports integer vector types!");
MVT EltVT = VT.getVectorElementType();
SDValue Zero = DAG.getConstant(0, DL, EltVT);
@@ -10305,11 +10743,11 @@ static uint64_t scaleVectorShuffleBlendMask(uint64_t BlendMask, int Size,
/// these values. It relies on the availability of the X86ISD::BLENDI pattern to
/// be matched in the backend with the type given. What it does check for is
/// that the shuffle mask is a blend, or convertible into a blend with zero.
-static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
- SDValue V2, ArrayRef<int> Original,
- const APInt &Zeroable,
- const X86Subtarget &Subtarget,
- SelectionDAG &DAG) {
+static SDValue lowerShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
+ SDValue V2, ArrayRef<int> Original,
+ const APInt &Zeroable,
+ const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
SmallVector<int, 64> Mask = createTargetShuffleMask(Original, Zeroable);
uint64_t BlendMask = 0;
@@ -10325,45 +10763,24 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
V2 = getZeroVector(VT, Subtarget, DAG, DL);
switch (VT.SimpleTy) {
- case MVT::v2f64:
- case MVT::v4f32:
- case MVT::v4f64:
- case MVT::v8f32:
- return DAG.getNode(X86ISD::BLENDI, DL, VT, V1, V2,
- DAG.getConstant(BlendMask, DL, MVT::i8));
case MVT::v4i64:
case MVT::v8i32:
assert(Subtarget.hasAVX2() && "256-bit integer blends require AVX2!");
LLVM_FALLTHROUGH;
+ case MVT::v4f64:
+ case MVT::v8f32:
+ assert(Subtarget.hasAVX() && "256-bit float blends require AVX!");
+ LLVM_FALLTHROUGH;
+ case MVT::v2f64:
case MVT::v2i64:
+ case MVT::v4f32:
case MVT::v4i32:
- // If we have AVX2 it is faster to use VPBLENDD when the shuffle fits into
- // that instruction.
- if (Subtarget.hasAVX2()) {
- // Scale the blend by the number of 32-bit dwords per element.
- int Scale = VT.getScalarSizeInBits() / 32;
- BlendMask = scaleVectorShuffleBlendMask(BlendMask, Mask.size(), Scale);
- MVT BlendVT = VT.getSizeInBits() > 128 ? MVT::v8i32 : MVT::v4i32;
- V1 = DAG.getBitcast(BlendVT, V1);
- V2 = DAG.getBitcast(BlendVT, V2);
- return DAG.getBitcast(
- VT, DAG.getNode(X86ISD::BLENDI, DL, BlendVT, V1, V2,
- DAG.getConstant(BlendMask, DL, MVT::i8)));
- }
- LLVM_FALLTHROUGH;
- case MVT::v8i16: {
- // For integer shuffles we need to expand the mask and cast the inputs to
- // v8i16s prior to blending.
- int Scale = 8 / VT.getVectorNumElements();
- BlendMask = scaleVectorShuffleBlendMask(BlendMask, Mask.size(), Scale);
- V1 = DAG.getBitcast(MVT::v8i16, V1);
- V2 = DAG.getBitcast(MVT::v8i16, V2);
- return DAG.getBitcast(VT,
- DAG.getNode(X86ISD::BLENDI, DL, MVT::v8i16, V1, V2,
- DAG.getConstant(BlendMask, DL, MVT::i8)));
- }
+ case MVT::v8i16:
+ assert(Subtarget.hasSSE41() && "128-bit blends require SSE41!");
+ return DAG.getNode(X86ISD::BLENDI, DL, VT, V1, V2,
+ DAG.getConstant(BlendMask, DL, MVT::i8));
case MVT::v16i16: {
- assert(Subtarget.hasAVX2() && "256-bit integer blends require AVX2!");
+ assert(Subtarget.hasAVX2() && "v16i16 blends require AVX2!");
SmallVector<int, 8> RepeatedMask;
if (is128BitLaneRepeatedShuffleMask(MVT::v16i16, Mask, RepeatedMask)) {
// We can lower these with PBLENDW which is mirrored across 128-bit lanes.
@@ -10391,14 +10808,15 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
}
LLVM_FALLTHROUGH;
}
- case MVT::v16i8:
- case MVT::v32i8: {
- assert((VT.is128BitVector() || Subtarget.hasAVX2()) &&
- "256-bit byte-blends require AVX2 support!");
+ case MVT::v32i8:
+ assert(Subtarget.hasAVX2() && "256-bit byte-blends require AVX2!");
+ LLVM_FALLTHROUGH;
+ case MVT::v16i8: {
+ assert(Subtarget.hasSSE41() && "128-bit byte-blends require SSE41!");
// Attempt to lower to a bitmask if we can. VPAND is faster than VPBLENDVB.
- if (SDValue Masked =
- lowerVectorShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable, DAG))
+ if (SDValue Masked = lowerShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable,
+ Subtarget, DAG))
return Masked;
if (Subtarget.hasBWI() && Subtarget.hasVLX()) {
@@ -10456,6 +10874,16 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
case MVT::v16i32:
case MVT::v32i16:
case MVT::v64i8: {
+ // Attempt to lower to a bitmask if we can. Only if not optimizing for size.
+ bool OptForSize = DAG.getMachineFunction().getFunction().hasOptSize();
+ if (!OptForSize) {
+ if (SDValue Masked = lowerShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable,
+ Subtarget, DAG))
+ return Masked;
+ }
+
+ // Otherwise load an immediate into a GPR, cast to k-register, and use a
+ // masked move.
MVT IntegerType =
MVT::getIntegerVT(std::max((int)VT.getVectorNumElements(), 8));
SDValue MaskNode = DAG.getConstant(BlendMask, DL, IntegerType);
@@ -10471,11 +10899,11 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
///
/// This matches the pattern where we can blend elements from two inputs and
/// then reduce the shuffle to a single-input permutation.
-static SDValue lowerVectorShuffleAsBlendAndPermute(const SDLoc &DL, MVT VT,
- SDValue V1, SDValue V2,
- ArrayRef<int> Mask,
- SelectionDAG &DAG,
- bool ImmBlends = false) {
+static SDValue lowerShuffleAsBlendAndPermute(const SDLoc &DL, MVT VT,
+ SDValue V1, SDValue V2,
+ ArrayRef<int> Mask,
+ SelectionDAG &DAG,
+ bool ImmBlends = false) {
// We build up the blend mask while checking whether a blend is a viable way
// to reduce the shuffle.
SmallVector<int, 32> BlendMask(Mask.size(), -1);
@@ -10510,10 +10938,10 @@ static SDValue lowerVectorShuffleAsBlendAndPermute(const SDLoc &DL, MVT VT,
///
/// This matches the pattern where we can unpack elements from two inputs and
/// then reduce the shuffle to a single-input (wider) permutation.
-static SDValue lowerVectorShuffleAsUNPCKAndPermute(const SDLoc &DL, MVT VT,
- SDValue V1, SDValue V2,
- ArrayRef<int> Mask,
- SelectionDAG &DAG) {
+static SDValue lowerShuffleAsUNPCKAndPermute(const SDLoc &DL, MVT VT,
+ SDValue V1, SDValue V2,
+ ArrayRef<int> Mask,
+ SelectionDAG &DAG) {
int NumElts = Mask.size();
int NumLanes = VT.getSizeInBits() / 128;
int NumLaneElts = NumElts / NumLanes;
@@ -10573,7 +11001,7 @@ static SDValue lowerVectorShuffleAsUNPCKAndPermute(const SDLoc &DL, MVT VT,
/// Helper to form a PALIGNR-based rotate+permute, merging 2 inputs and then
/// permuting the elements of the result in place.
-static SDValue lowerVectorShuffleAsByteRotateAndPermute(
+static SDValue lowerShuffleAsByteRotateAndPermute(
const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
const X86Subtarget &Subtarget, SelectionDAG &DAG) {
if ((VT.is128BitVector() && !Subtarget.hasSSSE3()) ||
@@ -10664,7 +11092,7 @@ static SDValue lowerVectorShuffleAsByteRotateAndPermute(
/// shuffle+blend operations on newer X86 ISAs where we have very fast blend
/// operations. It will try to pick the best arrangement of shuffles and
/// blends.
-static SDValue lowerVectorShuffleAsDecomposedShuffleBlend(
+static SDValue lowerShuffleAsDecomposedShuffleBlend(
const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
const X86Subtarget &Subtarget, SelectionDAG &DAG) {
// Shuffle the input elements into the desired positions in V1 and V2 and
@@ -10688,18 +11116,18 @@ static SDValue lowerVectorShuffleAsDecomposedShuffleBlend(
// pre-shuffle first is a better strategy.
if (!isNoopShuffleMask(V1Mask) && !isNoopShuffleMask(V2Mask)) {
// Only prefer immediate blends to unpack/rotate.
- if (SDValue BlendPerm = lowerVectorShuffleAsBlendAndPermute(
- DL, VT, V1, V2, Mask, DAG, true))
+ if (SDValue BlendPerm = lowerShuffleAsBlendAndPermute(DL, VT, V1, V2, Mask,
+ DAG, true))
return BlendPerm;
- if (SDValue UnpackPerm =
- lowerVectorShuffleAsUNPCKAndPermute(DL, VT, V1, V2, Mask, DAG))
+ if (SDValue UnpackPerm = lowerShuffleAsUNPCKAndPermute(DL, VT, V1, V2, Mask,
+ DAG))
return UnpackPerm;
- if (SDValue RotatePerm = lowerVectorShuffleAsByteRotateAndPermute(
+ if (SDValue RotatePerm = lowerShuffleAsByteRotateAndPermute(
DL, VT, V1, V2, Mask, Subtarget, DAG))
return RotatePerm;
// Unpack/rotate failed - try again with variable blends.
- if (SDValue BlendPerm =
- lowerVectorShuffleAsBlendAndPermute(DL, VT, V1, V2, Mask, DAG))
+ if (SDValue BlendPerm = lowerShuffleAsBlendAndPermute(DL, VT, V1, V2, Mask,
+ DAG))
return BlendPerm;
}
@@ -10711,8 +11139,7 @@ static SDValue lowerVectorShuffleAsDecomposedShuffleBlend(
/// Try to lower a vector shuffle as a rotation.
///
/// This is used for support PALIGNR for SSSE3 or VALIGND/Q for AVX512.
-static int matchVectorShuffleAsRotate(SDValue &V1, SDValue &V2,
- ArrayRef<int> Mask) {
+static int matchShuffleAsRotate(SDValue &V1, SDValue &V2, ArrayRef<int> Mask) {
int NumElts = Mask.size();
// We need to detect various ways of spelling a rotation:
@@ -10796,8 +11223,8 @@ static int matchVectorShuffleAsRotate(SDValue &V1, SDValue &V2,
/// elements, and takes the low elements as the result. Note that while this is
/// specified as a *right shift* because x86 is little-endian, it is a *left
/// rotate* of the vector lanes.
-static int matchVectorShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2,
- ArrayRef<int> Mask) {
+static int matchShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2,
+ ArrayRef<int> Mask) {
// Don't accept any shuffles with zero elements.
if (any_of(Mask, [](int M) { return M == SM_SentinelZero; }))
return -1;
@@ -10807,7 +11234,7 @@ static int matchVectorShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2,
if (!is128BitLaneRepeatedShuffleMask(VT, Mask, RepeatedMask))
return -1;
- int Rotation = matchVectorShuffleAsRotate(V1, V2, RepeatedMask);
+ int Rotation = matchShuffleAsRotate(V1, V2, RepeatedMask);
if (Rotation <= 0)
return -1;
@@ -10818,15 +11245,14 @@ static int matchVectorShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2,
return Rotation * Scale;
}
-static SDValue lowerVectorShuffleAsByteRotate(const SDLoc &DL, MVT VT,
- SDValue V1, SDValue V2,
- ArrayRef<int> Mask,
- const X86Subtarget &Subtarget,
- SelectionDAG &DAG) {
+static SDValue lowerShuffleAsByteRotate(const SDLoc &DL, MVT VT, SDValue V1,
+ SDValue V2, ArrayRef<int> Mask,
+ const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
assert(!isNoopShuffleMask(Mask) && "We shouldn't lower no-op shuffles!");
SDValue Lo = V1, Hi = V2;
- int ByteRotation = matchVectorShuffleAsByteRotate(VT, Lo, Hi, Mask);
+ int ByteRotation = matchShuffleAsByteRotate(VT, Lo, Hi, Mask);
if (ByteRotation <= 0)
return SDValue();
@@ -10874,11 +11300,10 @@ static SDValue lowerVectorShuffleAsByteRotate(const SDLoc &DL, MVT VT,
/// elements, and takes the low elements as the result. Note that while this is
/// specified as a *right shift* because x86 is little-endian, it is a *left
/// rotate* of the vector lanes.
-static SDValue lowerVectorShuffleAsRotate(const SDLoc &DL, MVT VT,
- SDValue V1, SDValue V2,
- ArrayRef<int> Mask,
- const X86Subtarget &Subtarget,
- SelectionDAG &DAG) {
+static SDValue lowerShuffleAsRotate(const SDLoc &DL, MVT VT, SDValue V1,
+ SDValue V2, ArrayRef<int> Mask,
+ const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
assert((VT.getScalarType() == MVT::i32 || VT.getScalarType() == MVT::i64) &&
"Only 32-bit and 64-bit elements are supported!");
@@ -10887,7 +11312,7 @@ static SDValue lowerVectorShuffleAsRotate(const SDLoc &DL, MVT VT,
&& "VLX required for 128/256-bit vectors");
SDValue Lo = V1, Hi = V2;
- int Rotation = matchVectorShuffleAsRotate(Lo, Hi, Mask);
+ int Rotation = matchShuffleAsRotate(Lo, Hi, Mask);
if (Rotation <= 0)
return SDValue();
@@ -10895,6 +11320,69 @@ static SDValue lowerVectorShuffleAsRotate(const SDLoc &DL, MVT VT,
DAG.getConstant(Rotation, DL, MVT::i8));
}
+/// Try to lower a vector shuffle as a byte shift sequence.
+static SDValue lowerVectorShuffleAsByteShiftMask(
+ const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
+ const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG) {
+ assert(!isNoopShuffleMask(Mask) && "We shouldn't lower no-op shuffles!");
+ assert(VT.is128BitVector() && "Only 128-bit vectors supported");
+
+ // We need a shuffle that has zeros at one/both ends and a sequential
+ // shuffle from one source within.
+ unsigned ZeroLo = Zeroable.countTrailingOnes();
+ unsigned ZeroHi = Zeroable.countLeadingOnes();
+ if (!ZeroLo && !ZeroHi)
+ return SDValue();
+
+ unsigned NumElts = Mask.size();
+ unsigned Len = NumElts - (ZeroLo + ZeroHi);
+ if (!isSequentialOrUndefInRange(Mask, ZeroLo, Len, Mask[ZeroLo]))
+ return SDValue();
+
+ unsigned Scale = VT.getScalarSizeInBits() / 8;
+ ArrayRef<int> StubMask = Mask.slice(ZeroLo, Len);
+ if (!isUndefOrInRange(StubMask, 0, NumElts) &&
+ !isUndefOrInRange(StubMask, NumElts, 2 * NumElts))
+ return SDValue();
+
+ SDValue Res = Mask[ZeroLo] < (int)NumElts ? V1 : V2;
+ Res = DAG.getBitcast(MVT::v16i8, Res);
+
+ // Use VSHLDQ/VSRLDQ ops to zero the ends of a vector and leave an
+ // inner sequential set of elements, possibly offset:
+ // 01234567 --> zzzzzz01 --> 1zzzzzzz
+ // 01234567 --> 4567zzzz --> zzzzz456
+ // 01234567 --> z0123456 --> 3456zzzz --> zz3456zz
+ if (ZeroLo == 0) {
+ unsigned Shift = (NumElts - 1) - (Mask[ZeroLo + Len - 1] % NumElts);
+ Res = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Res,
+ DAG.getConstant(Scale * Shift, DL, MVT::i8));
+ Res = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v16i8, Res,
+ DAG.getConstant(Scale * ZeroHi, DL, MVT::i8));
+ } else if (ZeroHi == 0) {
+ unsigned Shift = Mask[ZeroLo] % NumElts;
+ Res = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v16i8, Res,
+ DAG.getConstant(Scale * Shift, DL, MVT::i8));
+ Res = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Res,
+ DAG.getConstant(Scale * ZeroLo, DL, MVT::i8));
+ } else if (!Subtarget.hasSSSE3()) {
+ // If we don't have PSHUFB then its worth avoiding an AND constant mask
+ // by performing 3 byte shifts. Shuffle combining can kick in above that.
+ // TODO: There may be some cases where VSH{LR}DQ+PAND is still better.
+ unsigned Shift = (NumElts - 1) - (Mask[ZeroLo + Len - 1] % NumElts);
+ Res = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Res,
+ DAG.getConstant(Scale * Shift, DL, MVT::i8));
+ Shift += Mask[ZeroLo] % NumElts;
+ Res = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v16i8, Res,
+ DAG.getConstant(Scale * Shift, DL, MVT::i8));
+ Res = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Res,
+ DAG.getConstant(Scale * ZeroLo, DL, MVT::i8));
+ } else
+ return SDValue();
+
+ return DAG.getBitcast(VT, Res);
+}
+
/// Try to lower a vector shuffle as a bit shift (shifts in zeros).
///
/// Attempts to match a shuffle mask against the PSLL(W/D/Q/DQ) and
@@ -10918,11 +11406,10 @@ static SDValue lowerVectorShuffleAsRotate(const SDLoc &DL, MVT VT,
/// [ 5, 6, 7, zz, zz, zz, zz, zz]
/// [ -1, 5, 6, 7, zz, zz, zz, zz]
/// [ 1, 2, -1, -1, -1, -1, zz, zz]
-static int matchVectorShuffleAsShift(MVT &ShiftVT, unsigned &Opcode,
- unsigned ScalarSizeInBits,
- ArrayRef<int> Mask, int MaskOffset,
- const APInt &Zeroable,
- const X86Subtarget &Subtarget) {
+static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode,
+ unsigned ScalarSizeInBits, ArrayRef<int> Mask,
+ int MaskOffset, const APInt &Zeroable,
+ const X86Subtarget &Subtarget) {
int Size = Mask.size();
unsigned SizeInBits = Size * ScalarSizeInBits;
@@ -10981,11 +11468,11 @@ static int matchVectorShuffleAsShift(MVT &ShiftVT, unsigned &Opcode,
return -1;
}
-static SDValue lowerVectorShuffleAsShift(const SDLoc &DL, MVT VT, SDValue V1,
- SDValue V2, ArrayRef<int> Mask,
- const APInt &Zeroable,
- const X86Subtarget &Subtarget,
- SelectionDAG &DAG) {
+static SDValue lowerShuffleAsShift(const SDLoc &DL, MVT VT, SDValue V1,
+ SDValue V2, ArrayRef<int> Mask,
+ const APInt &Zeroable,
+ const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
int Size = Mask.size();
assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
@@ -10994,14 +11481,13 @@ static SDValue lowerVectorShuffleAsShift(const SDLoc &DL, MVT VT, SDValue V1,
unsigned Opcode;
// Try to match shuffle against V1 shift.
- int ShiftAmt = matchVectorShuffleAsShift(
- ShiftVT, Opcode, VT.getScalarSizeInBits(), Mask, 0, Zeroable, Subtarget);
+ int ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
+ Mask, 0, Zeroable, Subtarget);
// If V1 failed, try to match shuffle against V2 shift.
if (ShiftAmt < 0) {
- ShiftAmt =
- matchVectorShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
- Mask, Size, Zeroable, Subtarget);
+ ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
+ Mask, Size, Zeroable, Subtarget);
V = V2;
}
@@ -11018,16 +11504,16 @@ static SDValue lowerVectorShuffleAsShift(const SDLoc &DL, MVT VT, SDValue V1,
// EXTRQ: Extract Len elements from lower half of source, starting at Idx.
// Remainder of lower half result is zero and upper half is all undef.
-static bool matchVectorShuffleAsEXTRQ(MVT VT, SDValue &V1, SDValue &V2,
- ArrayRef<int> Mask, uint64_t &BitLen,
- uint64_t &BitIdx, const APInt &Zeroable) {
+static bool matchShuffleAsEXTRQ(MVT VT, SDValue &V1, SDValue &V2,
+ ArrayRef<int> Mask, uint64_t &BitLen,
+ uint64_t &BitIdx, const APInt &Zeroable) {
int Size = Mask.size();
int HalfSize = Size / 2;
assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
assert(!Zeroable.isAllOnesValue() && "Fully zeroable shuffle mask");
// Upper half must be undefined.
- if (!isUndefInRange(Mask, HalfSize, HalfSize))
+ if (!isUndefUpperHalf(Mask))
return false;
// Determine the extraction length from the part of the
@@ -11074,15 +11560,15 @@ static bool matchVectorShuffleAsEXTRQ(MVT VT, SDValue &V1, SDValue &V2,
// INSERTQ: Extract lowest Len elements from lower half of second source and
// insert over first source, starting at Idx.
// { A[0], .., A[Idx-1], B[0], .., B[Len-1], A[Idx+Len], .., UNDEF, ... }
-static bool matchVectorShuffleAsINSERTQ(MVT VT, SDValue &V1, SDValue &V2,
- ArrayRef<int> Mask, uint64_t &BitLen,
- uint64_t &BitIdx) {
+static bool matchShuffleAsINSERTQ(MVT VT, SDValue &V1, SDValue &V2,
+ ArrayRef<int> Mask, uint64_t &BitLen,
+ uint64_t &BitIdx) {
int Size = Mask.size();
int HalfSize = Size / 2;
assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
// Upper half must be undefined.
- if (!isUndefInRange(Mask, HalfSize, HalfSize))
+ if (!isUndefUpperHalf(Mask))
return false;
for (int Idx = 0; Idx != HalfSize; ++Idx) {
@@ -11140,17 +11626,16 @@ static bool matchVectorShuffleAsINSERTQ(MVT VT, SDValue &V1, SDValue &V2,
}
/// Try to lower a vector shuffle using SSE4a EXTRQ/INSERTQ.
-static SDValue lowerVectorShuffleWithSSE4A(const SDLoc &DL, MVT VT, SDValue V1,
- SDValue V2, ArrayRef<int> Mask,
- const APInt &Zeroable,
- SelectionDAG &DAG) {
+static SDValue lowerShuffleWithSSE4A(const SDLoc &DL, MVT VT, SDValue V1,
+ SDValue V2, ArrayRef<int> Mask,
+ const APInt &Zeroable, SelectionDAG &DAG) {
uint64_t BitLen, BitIdx;
- if (matchVectorShuffleAsEXTRQ(VT, V1, V2, Mask, BitLen, BitIdx, Zeroable))
+ if (matchShuffleAsEXTRQ(VT, V1, V2, Mask, BitLen, BitIdx, Zeroable))
return DAG.getNode(X86ISD::EXTRQI, DL, VT, V1,
DAG.getConstant(BitLen, DL, MVT::i8),
DAG.getConstant(BitIdx, DL, MVT::i8));
- if (matchVectorShuffleAsINSERTQ(VT, V1, V2, Mask, BitLen, BitIdx))
+ if (matchShuffleAsINSERTQ(VT, V1, V2, Mask, BitLen, BitIdx))
return DAG.getNode(X86ISD::INSERTQI, DL, VT, V1 ? V1 : DAG.getUNDEF(VT),
V2 ? V2 : DAG.getUNDEF(VT),
DAG.getConstant(BitLen, DL, MVT::i8),
@@ -11168,7 +11653,7 @@ static SDValue lowerVectorShuffleWithSSE4A(const SDLoc &DL, MVT VT, SDValue V1,
/// avoid excess shuffling the offset must either being in the bottom lane
/// or at the start of a higher lane. All extended elements must be from
/// the same lane.
-static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend(
+static SDValue lowerShuffleAsSpecificZeroOrAnyExtend(
const SDLoc &DL, MVT VT, int Scale, int Offset, bool AnyExt, SDValue InputV,
ArrayRef<int> Mask, const X86Subtarget &Subtarget, SelectionDAG &DAG) {
assert(Scale > 1 && "Need a scale to extend.");
@@ -11203,6 +11688,7 @@ static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend(
// Found a valid zext mask! Try various lowering strategies based on the
// input type and available ISA extensions.
+ // TODO: Add AnyExt support.
if (Subtarget.hasSSE41()) {
// Not worth offsetting 128-bit vectors if scale == 2, a pattern using
// PUNPCK will catch this in a later shuffle match.
@@ -11211,7 +11697,7 @@ static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend(
MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Scale),
NumElements / Scale);
InputV = ShuffleOffset(InputV);
- InputV = getExtendInVec(/*Signed*/false, DL, ExtVT, InputV, DAG);
+ InputV = getExtendInVec(ISD::ZERO_EXTEND, DL, ExtVT, InputV, DAG);
return DAG.getBitcast(VT, InputV);
}
@@ -11234,7 +11720,7 @@ static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend(
DAG.getBitcast(MVT::v4i32, InputV),
getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG));
int PSHUFWMask[4] = {1, -1, -1, -1};
- unsigned OddEvenOp = (Offset & 1 ? X86ISD::PSHUFLW : X86ISD::PSHUFHW);
+ unsigned OddEvenOp = (Offset & 1) ? X86ISD::PSHUFLW : X86ISD::PSHUFHW;
return DAG.getBitcast(
VT, DAG.getNode(OddEvenOp, DL, MVT::v8i16,
DAG.getBitcast(MVT::v8i16, InputV),
@@ -11253,8 +11739,7 @@ static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend(
DAG.getConstant(EltBits, DL, MVT::i8),
DAG.getConstant(LoIdx, DL, MVT::i8)));
- if (isUndefInRange(Mask, NumElements / 2, NumElements / 2) ||
- !SafeOffset(Offset + 1))
+ if (isUndefUpperHalf(Mask) || !SafeOffset(Offset + 1))
return DAG.getBitcast(VT, Lo);
int HiIdx = (Offset + 1) * EltBits;
@@ -11326,7 +11811,7 @@ static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend(
///
/// The reason we have dedicated lowering for zext-style shuffles is that they
/// are both incredibly common and often quite performance sensitive.
-static SDValue lowerVectorShuffleAsZeroOrAnyExtend(
+static SDValue lowerShuffleAsZeroOrAnyExtend(
const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
const APInt &Zeroable, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
@@ -11397,8 +11882,8 @@ static SDValue lowerVectorShuffleAsZeroOrAnyExtend(
if (Offset != 0 && Matches < 2)
return SDValue();
- return lowerVectorShuffleAsSpecificZeroOrAnyExtend(
- DL, VT, Scale, Offset, AnyExt, InputV, Mask, Subtarget, DAG);
+ return lowerShuffleAsSpecificZeroOrAnyExtend(DL, VT, Scale, Offset, AnyExt,
+ InputV, Mask, Subtarget, DAG);
};
// The widest scale possible for extending is to a 64-bit integer.
@@ -11482,7 +11967,7 @@ static bool isShuffleFoldableLoad(SDValue V) {
///
/// This is a common pattern that we have especially efficient patterns to lower
/// across all subtarget feature sets.
-static SDValue lowerVectorShuffleAsElementInsertion(
+static SDValue lowerShuffleAsElementInsertion(
const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
const APInt &Zeroable, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
@@ -11580,10 +12065,10 @@ static SDValue lowerVectorShuffleAsElementInsertion(
/// coming from a scalar_to_vector/build_vector node \p V0 with larger elements.
///
/// This assumes we have AVX2.
-static SDValue lowerVectorShuffleAsTruncBroadcast(const SDLoc &DL, MVT VT,
- SDValue V0, int BroadcastIdx,
- const X86Subtarget &Subtarget,
- SelectionDAG &DAG) {
+static SDValue lowerShuffleAsTruncBroadcast(const SDLoc &DL, MVT VT, SDValue V0,
+ int BroadcastIdx,
+ const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
assert(Subtarget.hasAVX2() &&
"We can only lower integer broadcasts with AVX2!");
@@ -11629,16 +12114,90 @@ static SDValue lowerVectorShuffleAsTruncBroadcast(const SDLoc &DL, MVT VT,
DAG.getNode(ISD::TRUNCATE, DL, EltVT, Scalar));
}
+/// Test whether this can be lowered with a single SHUFPS instruction.
+///
+/// This is used to disable more specialized lowerings when the shufps lowering
+/// will happen to be efficient.
+static bool isSingleSHUFPSMask(ArrayRef<int> Mask) {
+ // This routine only handles 128-bit shufps.
+ assert(Mask.size() == 4 && "Unsupported mask size!");
+ assert(Mask[0] >= -1 && Mask[0] < 8 && "Out of bound mask element!");
+ assert(Mask[1] >= -1 && Mask[1] < 8 && "Out of bound mask element!");
+ assert(Mask[2] >= -1 && Mask[2] < 8 && "Out of bound mask element!");
+ assert(Mask[3] >= -1 && Mask[3] < 8 && "Out of bound mask element!");
+
+ // To lower with a single SHUFPS we need to have the low half and high half
+ // each requiring a single input.
+ if (Mask[0] >= 0 && Mask[1] >= 0 && (Mask[0] < 4) != (Mask[1] < 4))
+ return false;
+ if (Mask[2] >= 0 && Mask[3] >= 0 && (Mask[2] < 4) != (Mask[3] < 4))
+ return false;
+
+ return true;
+}
+
+/// If we are extracting two 128-bit halves of a vector and shuffling the
+/// result, match that to a 256-bit AVX2 vperm* instruction to avoid a
+/// multi-shuffle lowering.
+static SDValue lowerShuffleOfExtractsAsVperm(const SDLoc &DL, SDValue N0,
+ SDValue N1, ArrayRef<int> Mask,
+ SelectionDAG &DAG) {
+ EVT VT = N0.getValueType();
+ assert((VT.is128BitVector() &&
+ (VT.getScalarSizeInBits() == 32 || VT.getScalarSizeInBits() == 64)) &&
+ "VPERM* family of shuffles requires 32-bit or 64-bit elements");
+
+ // Check that both sources are extracts of the same source vector.
+ if (!N0.hasOneUse() || !N1.hasOneUse() ||
+ N0.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
+ N1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
+ N0.getOperand(0) != N1.getOperand(0))
+ return SDValue();
+
+ SDValue WideVec = N0.getOperand(0);
+ EVT WideVT = WideVec.getValueType();
+ if (!WideVT.is256BitVector() || !isa<ConstantSDNode>(N0.getOperand(1)) ||
+ !isa<ConstantSDNode>(N1.getOperand(1)))
+ return SDValue();
+
+ // Match extracts of each half of the wide source vector. Commute the shuffle
+ // if the extract of the low half is N1.
+ unsigned NumElts = VT.getVectorNumElements();
+ SmallVector<int, 4> NewMask(Mask.begin(), Mask.end());
+ const APInt &ExtIndex0 = N0.getConstantOperandAPInt(1);
+ const APInt &ExtIndex1 = N1.getConstantOperandAPInt(1);
+ if (ExtIndex1 == 0 && ExtIndex0 == NumElts)
+ ShuffleVectorSDNode::commuteMask(NewMask);
+ else if (ExtIndex0 != 0 || ExtIndex1 != NumElts)
+ return SDValue();
+
+ // Final bailout: if the mask is simple, we are better off using an extract
+ // and a simple narrow shuffle. Prefer extract+unpack(h/l)ps to vpermps
+ // because that avoids a constant load from memory.
+ if (NumElts == 4 &&
+ (isSingleSHUFPSMask(NewMask) || is128BitUnpackShuffleMask(NewMask)))
+ return SDValue();
+
+ // Extend the shuffle mask with undef elements.
+ NewMask.append(NumElts, -1);
+
+ // shuf (extract X, 0), (extract X, 4), M --> extract (shuf X, undef, M'), 0
+ SDValue Shuf = DAG.getVectorShuffle(WideVT, DL, WideVec, DAG.getUNDEF(WideVT),
+ NewMask);
+ // This is free: ymm -> xmm.
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuf,
+ DAG.getIntPtrConstant(0, DL));
+}
+
/// Try to lower broadcast of a single element.
///
/// For convenience, this code also bundles all of the subtarget feature set
/// filtering. While a little annoying to re-dispatch on type here, there isn't
/// a convenient way to factor it out.
-static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT,
- SDValue V1, SDValue V2,
- ArrayRef<int> Mask,
- const X86Subtarget &Subtarget,
- SelectionDAG &DAG) {
+static SDValue lowerShuffleAsBroadcast(const SDLoc &DL, MVT VT, SDValue V1,
+ SDValue V2, ArrayRef<int> Mask,
+ const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
if (!((Subtarget.hasSSE3() && VT == MVT::v2f64) ||
(Subtarget.hasAVX() && VT.isFloatingPoint()) ||
(Subtarget.hasAVX2() && VT.isInteger())))
@@ -11647,6 +12206,7 @@ static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT,
// With MOVDDUP (v2f64) we can broadcast from a register or a load, otherwise
// we can only broadcast from a register with AVX2.
unsigned NumElts = Mask.size();
+ unsigned NumEltBits = VT.getScalarSizeInBits();
unsigned Opcode = (VT == MVT::v2f64 && !Subtarget.hasAVX2())
? X86ISD::MOVDDUP
: X86ISD::VBROADCAST;
@@ -11670,29 +12230,19 @@ static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT,
// Go up the chain of (vector) values to find a scalar load that we can
// combine with the broadcast.
+ int BitOffset = BroadcastIdx * NumEltBits;
SDValue V = V1;
for (;;) {
switch (V.getOpcode()) {
case ISD::BITCAST: {
- // Peek through bitcasts as long as BroadcastIdx can be adjusted.
- SDValue VSrc = V.getOperand(0);
- unsigned NumEltBits = V.getScalarValueSizeInBits();
- unsigned NumSrcBits = VSrc.getScalarValueSizeInBits();
- if ((NumEltBits % NumSrcBits) == 0)
- BroadcastIdx *= (NumEltBits / NumSrcBits);
- else if ((NumSrcBits % NumEltBits) == 0 &&
- (BroadcastIdx % (NumSrcBits / NumEltBits)) == 0)
- BroadcastIdx /= (NumSrcBits / NumEltBits);
- else
- break;
- V = VSrc;
+ V = V.getOperand(0);
continue;
}
case ISD::CONCAT_VECTORS: {
- int OperandSize =
- V.getOperand(0).getSimpleValueType().getVectorNumElements();
- V = V.getOperand(BroadcastIdx / OperandSize);
- BroadcastIdx %= OperandSize;
+ int OpBitWidth = V.getOperand(0).getValueSizeInBits();
+ int OpIdx = BitOffset / OpBitWidth;
+ V = V.getOperand(OpIdx);
+ BitOffset %= OpBitWidth;
continue;
}
case ISD::INSERT_SUBVECTOR: {
@@ -11701,11 +12251,13 @@ static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT,
if (!ConstantIdx)
break;
- int BeginIdx = (int)ConstantIdx->getZExtValue();
- int EndIdx =
- BeginIdx + (int)VInner.getSimpleValueType().getVectorNumElements();
- if (BroadcastIdx >= BeginIdx && BroadcastIdx < EndIdx) {
- BroadcastIdx -= BeginIdx;
+ int EltBitWidth = VOuter.getScalarValueSizeInBits();
+ int Idx = (int)ConstantIdx->getZExtValue();
+ int NumSubElts = (int)VInner.getSimpleValueType().getVectorNumElements();
+ int BeginOffset = Idx * EltBitWidth;
+ int EndOffset = BeginOffset + NumSubElts * EltBitWidth;
+ if (BeginOffset <= BitOffset && BitOffset < EndOffset) {
+ BitOffset -= BeginOffset;
V = VInner;
} else {
V = VOuter;
@@ -11715,48 +12267,34 @@ static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT,
}
break;
}
+ assert((BitOffset % NumEltBits) == 0 && "Illegal bit-offset");
+ BroadcastIdx = BitOffset / NumEltBits;
- // Ensure the source vector and BroadcastIdx are for a suitable type.
- if (VT.getScalarSizeInBits() != V.getScalarValueSizeInBits()) {
- unsigned NumEltBits = VT.getScalarSizeInBits();
- unsigned NumSrcBits = V.getScalarValueSizeInBits();
- if ((NumSrcBits % NumEltBits) == 0)
- BroadcastIdx *= (NumSrcBits / NumEltBits);
- else if ((NumEltBits % NumSrcBits) == 0 &&
- (BroadcastIdx % (NumEltBits / NumSrcBits)) == 0)
- BroadcastIdx /= (NumEltBits / NumSrcBits);
- else
- return SDValue();
-
- unsigned NumSrcElts = V.getValueSizeInBits() / NumEltBits;
- MVT SrcVT = MVT::getVectorVT(VT.getScalarType(), NumSrcElts);
- V = DAG.getBitcast(SrcVT, V);
- }
+ // Do we need to bitcast the source to retrieve the original broadcast index?
+ bool BitCastSrc = V.getScalarValueSizeInBits() != NumEltBits;
// Check if this is a broadcast of a scalar. We special case lowering
// for scalars so that we can more effectively fold with loads.
- // First, look through bitcast: if the original value has a larger element
- // type than the shuffle, the broadcast element is in essence truncated.
- // Make that explicit to ease folding.
- if (V.getOpcode() == ISD::BITCAST && VT.isInteger())
- if (SDValue TruncBroadcast = lowerVectorShuffleAsTruncBroadcast(
- DL, VT, V.getOperand(0), BroadcastIdx, Subtarget, DAG))
+ // If the original value has a larger element type than the shuffle, the
+ // broadcast element is in essence truncated. Make that explicit to ease
+ // folding.
+ if (BitCastSrc && VT.isInteger())
+ if (SDValue TruncBroadcast = lowerShuffleAsTruncBroadcast(
+ DL, VT, V, BroadcastIdx, Subtarget, DAG))
return TruncBroadcast;
MVT BroadcastVT = VT;
- // Peek through any bitcast (only useful for loads).
- SDValue BC = peekThroughBitcasts(V);
-
// Also check the simpler case, where we can directly reuse the scalar.
- if ((V.getOpcode() == ISD::BUILD_VECTOR && V.hasOneUse()) ||
- (V.getOpcode() == ISD::SCALAR_TO_VECTOR && BroadcastIdx == 0)) {
+ if (!BitCastSrc &&
+ ((V.getOpcode() == ISD::BUILD_VECTOR && V.hasOneUse()) ||
+ (V.getOpcode() == ISD::SCALAR_TO_VECTOR && BroadcastIdx == 0))) {
V = V.getOperand(BroadcastIdx);
// If we can't broadcast from a register, check that the input is a load.
if (!BroadcastFromReg && !isShuffleFoldableLoad(V))
return SDValue();
- } else if (MayFoldLoad(BC) && !cast<LoadSDNode>(BC)->isVolatile()) {
+ } else if (MayFoldLoad(V) && !cast<LoadSDNode>(V)->isVolatile()) {
// 32-bit targets need to load i64 as a f64 and then bitcast the result.
if (!Subtarget.is64Bit() && VT.getScalarType() == MVT::i64) {
BroadcastVT = MVT::getVectorVT(MVT::f64, VT.getVectorNumElements());
@@ -11767,10 +12305,11 @@ static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT,
// If we are broadcasting a load that is only used by the shuffle
// then we can reduce the vector load to the broadcasted scalar load.
- LoadSDNode *Ld = cast<LoadSDNode>(BC);
+ LoadSDNode *Ld = cast<LoadSDNode>(V);
SDValue BaseAddr = Ld->getOperand(1);
EVT SVT = BroadcastVT.getScalarType();
unsigned Offset = BroadcastIdx * SVT.getStoreSize();
+ assert((int)(Offset * 8) == BitOffset && "Unexpected bit-offset");
SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL);
V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
DAG.getMachineFunction().getMachineMemOperand(
@@ -11779,7 +12318,7 @@ static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT,
} else if (!BroadcastFromReg) {
// We can't broadcast from a vector register.
return SDValue();
- } else if (BroadcastIdx != 0) {
+ } else if (BitOffset != 0) {
// We can only broadcast from the zero-element of a vector register,
// but it can be advantageous to broadcast from the zero-element of a
// subvector.
@@ -11791,18 +12330,15 @@ static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT,
return SDValue();
// Only broadcast the zero-element of a 128-bit subvector.
- unsigned EltSize = VT.getScalarSizeInBits();
- if (((BroadcastIdx * EltSize) % 128) != 0)
+ if ((BitOffset % 128) != 0)
return SDValue();
- // The shuffle input might have been a bitcast we looked through; look at
- // the original input vector. Emit an EXTRACT_SUBVECTOR of that type; we'll
- // later bitcast it to BroadcastVT.
- assert(V.getScalarValueSizeInBits() == BroadcastVT.getScalarSizeInBits() &&
- "Unexpected vector element size");
+ assert((BitOffset % V.getScalarValueSizeInBits()) == 0 &&
+ "Unexpected bit-offset");
assert((V.getValueSizeInBits() == 256 || V.getValueSizeInBits() == 512) &&
"Unexpected vector size");
- V = extract128BitVector(V, BroadcastIdx, DAG, DL);
+ unsigned ExtractIdx = BitOffset / V.getScalarValueSizeInBits();
+ V = extract128BitVector(V, ExtractIdx, DAG, DL);
}
if (Opcode == X86ISD::MOVDDUP && !V.getValueType().isVector())
@@ -11810,21 +12346,21 @@ static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT,
DAG.getBitcast(MVT::f64, V));
// Bitcast back to the same scalar type as BroadcastVT.
- MVT SrcVT = V.getSimpleValueType();
- if (SrcVT.getScalarType() != BroadcastVT.getScalarType()) {
- assert(SrcVT.getScalarSizeInBits() == BroadcastVT.getScalarSizeInBits() &&
+ if (V.getValueType().getScalarType() != BroadcastVT.getScalarType()) {
+ assert(NumEltBits == BroadcastVT.getScalarSizeInBits() &&
"Unexpected vector element size");
- if (SrcVT.isVector()) {
- unsigned NumSrcElts = SrcVT.getVectorNumElements();
- SrcVT = MVT::getVectorVT(BroadcastVT.getScalarType(), NumSrcElts);
+ MVT ExtVT;
+ if (V.getValueType().isVector()) {
+ unsigned NumSrcElts = V.getValueSizeInBits() / NumEltBits;
+ ExtVT = MVT::getVectorVT(BroadcastVT.getScalarType(), NumSrcElts);
} else {
- SrcVT = BroadcastVT.getScalarType();
+ ExtVT = BroadcastVT.getScalarType();
}
- V = DAG.getBitcast(SrcVT, V);
+ V = DAG.getBitcast(ExtVT, V);
}
// 32-bit targets need to load i64 as a f64 and then bitcast the result.
- if (!Subtarget.is64Bit() && SrcVT == MVT::i64) {
+ if (!Subtarget.is64Bit() && V.getValueType() == MVT::i64) {
V = DAG.getBitcast(MVT::f64, V);
unsigned NumBroadcastElts = BroadcastVT.getVectorNumElements();
BroadcastVT = MVT::getVectorVT(MVT::f64, NumBroadcastElts);
@@ -11833,9 +12369,9 @@ static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT,
// We only support broadcasting from 128-bit vectors to minimize the
// number of patterns we need to deal with in isel. So extract down to
// 128-bits, removing as many bitcasts as possible.
- if (SrcVT.getSizeInBits() > 128) {
- MVT ExtVT = MVT::getVectorVT(SrcVT.getScalarType(),
- 128 / SrcVT.getScalarSizeInBits());
+ if (V.getValueSizeInBits() > 128) {
+ MVT ExtVT = V.getSimpleValueType().getScalarType();
+ ExtVT = MVT::getVectorVT(ExtVT, 128 / ExtVT.getScalarSizeInBits());
V = extract128BitVector(peekThroughBitcasts(V), 0, DAG, DL);
V = DAG.getBitcast(ExtVT, V);
}
@@ -11849,11 +12385,10 @@ static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT,
// are much smaller to encode than a SHUFPS and an INSERTPS. We can also
// perform INSERTPS if a single V1 element is out of place and all V2
// elements are zeroable.
-static bool matchVectorShuffleAsInsertPS(SDValue &V1, SDValue &V2,
- unsigned &InsertPSMask,
- const APInt &Zeroable,
- ArrayRef<int> Mask,
- SelectionDAG &DAG) {
+static bool matchShuffleAsInsertPS(SDValue &V1, SDValue &V2,
+ unsigned &InsertPSMask,
+ const APInt &Zeroable,
+ ArrayRef<int> Mask, SelectionDAG &DAG) {
assert(V1.getSimpleValueType().is128BitVector() && "Bad operand type!");
assert(V2.getSimpleValueType().is128BitVector() && "Bad operand type!");
assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
@@ -11938,16 +12473,15 @@ static bool matchVectorShuffleAsInsertPS(SDValue &V1, SDValue &V2,
return false;
}
-static SDValue lowerVectorShuffleAsInsertPS(const SDLoc &DL, SDValue V1,
- SDValue V2, ArrayRef<int> Mask,
- const APInt &Zeroable,
- SelectionDAG &DAG) {
+static SDValue lowerShuffleAsInsertPS(const SDLoc &DL, SDValue V1, SDValue V2,
+ ArrayRef<int> Mask, const APInt &Zeroable,
+ SelectionDAG &DAG) {
assert(V1.getSimpleValueType() == MVT::v4f32 && "Bad operand type!");
assert(V2.getSimpleValueType() == MVT::v4f32 && "Bad operand type!");
// Attempt to match the insertps pattern.
unsigned InsertPSMask;
- if (!matchVectorShuffleAsInsertPS(V1, V2, InsertPSMask, Zeroable, Mask, DAG))
+ if (!matchShuffleAsInsertPS(V1, V2, InsertPSMask, Zeroable, Mask, DAG))
return SDValue();
// Insert the V2 element into the desired position.
@@ -11964,7 +12498,7 @@ static SDValue lowerVectorShuffleAsInsertPS(const SDLoc &DL, SDValue V1,
/// because for floating point vectors we have a generalized SHUFPS lowering
/// strategy that handles everything that doesn't *exactly* match an unpack,
/// making this clever lowering unnecessary.
-static SDValue lowerVectorShuffleAsPermuteAndUnpack(
+static SDValue lowerShuffleAsPermuteAndUnpack(
const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
const X86Subtarget &Subtarget, SelectionDAG &DAG) {
assert(!VT.isFloatingPoint() &&
@@ -12079,19 +12613,18 @@ static SDValue lowerVectorShuffleAsPermuteAndUnpack(
/// instructions will incur a domain crossing penalty on some chips though so
/// it is better to avoid lowering through this for integer vectors where
/// possible.
-static SDValue lowerV2F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
- const APInt &Zeroable,
- SDValue V1, SDValue V2,
- const X86Subtarget &Subtarget,
- SelectionDAG &DAG) {
+static SDValue lowerV2F64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
+ const APInt &Zeroable, SDValue V1, SDValue V2,
+ const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
assert(V1.getSimpleValueType() == MVT::v2f64 && "Bad operand type!");
assert(V2.getSimpleValueType() == MVT::v2f64 && "Bad operand type!");
assert(Mask.size() == 2 && "Unexpected mask size for v2 shuffle!");
if (V2.isUndef()) {
// Check for being able to broadcast a single element.
- if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(
- DL, MVT::v2f64, V1, V2, Mask, Subtarget, DAG))
+ if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v2f64, V1, V2,
+ Mask, Subtarget, DAG))
return Broadcast;
// Straight shuffle of a single input vector. Simulate this by using the
@@ -12116,16 +12649,20 @@ static SDValue lowerV2F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
assert(Mask[0] < 2 && "We sort V1 to be the first input.");
assert(Mask[1] >= 2 && "We sort V2 to be the second input.");
+ if (Subtarget.hasAVX2())
+ if (SDValue Extract = lowerShuffleOfExtractsAsVperm(DL, V1, V2, Mask, DAG))
+ return Extract;
+
// When loading a scalar and then shuffling it into a vector we can often do
// the insertion cheaply.
- if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(
+ if (SDValue Insertion = lowerShuffleAsElementInsertion(
DL, MVT::v2f64, V1, V2, Mask, Zeroable, Subtarget, DAG))
return Insertion;
// Try inverting the insertion since for v2 masks it is easy to do and we
// can't reliably sort the mask one way or the other.
int InverseMask[2] = {Mask[0] < 0 ? -1 : (Mask[0] ^ 2),
Mask[1] < 0 ? -1 : (Mask[1] ^ 2)};
- if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(
+ if (SDValue Insertion = lowerShuffleAsElementInsertion(
DL, MVT::v2f64, V2, V1, InverseMask, Zeroable, Subtarget, DAG))
return Insertion;
@@ -12141,13 +12678,12 @@ static SDValue lowerV2F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f64, V1S));
if (Subtarget.hasSSE41())
- if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v2f64, V1, V2, Mask,
- Zeroable, Subtarget, DAG))
+ if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v2f64, V1, V2, Mask,
+ Zeroable, Subtarget, DAG))
return Blend;
// Use dedicated unpack instructions for masks that match their pattern.
- if (SDValue V =
- lowerVectorShuffleWithUNPCK(DL, MVT::v2f64, Mask, V1, V2, DAG))
+ if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v2f64, Mask, V1, V2, DAG))
return V;
unsigned SHUFPDMask = (Mask[0] == 1) | (((Mask[1] - 2) == 1) << 1);
@@ -12161,19 +12697,18 @@ static SDValue lowerV2F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
/// the integer unit to minimize domain crossing penalties. However, for blends
/// it falls back to the floating point shuffle operation with appropriate bit
/// casting.
-static SDValue lowerV2I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
- const APInt &Zeroable,
- SDValue V1, SDValue V2,
- const X86Subtarget &Subtarget,
- SelectionDAG &DAG) {
+static SDValue lowerV2I64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
+ const APInt &Zeroable, SDValue V1, SDValue V2,
+ const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
assert(V1.getSimpleValueType() == MVT::v2i64 && "Bad operand type!");
assert(V2.getSimpleValueType() == MVT::v2i64 && "Bad operand type!");
assert(Mask.size() == 2 && "Unexpected mask size for v2 shuffle!");
if (V2.isUndef()) {
// Check for being able to broadcast a single element.
- if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(
- DL, MVT::v2i64, V1, V2, Mask, Subtarget, DAG))
+ if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v2i64, V1, V2,
+ Mask, Subtarget, DAG))
return Broadcast;
// Straight shuffle of a single input vector. For everything from SSE2
@@ -12193,20 +12728,24 @@ static SDValue lowerV2I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
assert(Mask[0] < 2 && "We sort V1 to be the first input.");
assert(Mask[1] >= 2 && "We sort V2 to be the second input.");
+ if (Subtarget.hasAVX2())
+ if (SDValue Extract = lowerShuffleOfExtractsAsVperm(DL, V1, V2, Mask, DAG))
+ return Extract;
+
// Try to use shift instructions.
- if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v2i64, V1, V2, Mask,
- Zeroable, Subtarget, DAG))
+ if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v2i64, V1, V2, Mask,
+ Zeroable, Subtarget, DAG))
return Shift;
// When loading a scalar and then shuffling it into a vector we can often do
// the insertion cheaply.
- if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(
+ if (SDValue Insertion = lowerShuffleAsElementInsertion(
DL, MVT::v2i64, V1, V2, Mask, Zeroable, Subtarget, DAG))
return Insertion;
// Try inverting the insertion since for v2 masks it is easy to do and we
// can't reliably sort the mask one way or the other.
int InverseMask[2] = {Mask[0] ^ 2, Mask[1] ^ 2};
- if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(
+ if (SDValue Insertion = lowerShuffleAsElementInsertion(
DL, MVT::v2i64, V2, V1, InverseMask, Zeroable, Subtarget, DAG))
return Insertion;
@@ -12214,33 +12753,32 @@ static SDValue lowerV2I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
// *exact* same predicate.
bool IsBlendSupported = Subtarget.hasSSE41();
if (IsBlendSupported)
- if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v2i64, V1, V2, Mask,
- Zeroable, Subtarget, DAG))
+ if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v2i64, V1, V2, Mask,
+ Zeroable, Subtarget, DAG))
return Blend;
// Use dedicated unpack instructions for masks that match their pattern.
- if (SDValue V =
- lowerVectorShuffleWithUNPCK(DL, MVT::v2i64, Mask, V1, V2, DAG))
+ if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v2i64, Mask, V1, V2, DAG))
return V;
// Try to use byte rotation instructions.
// Its more profitable for pre-SSSE3 to use shuffles/unpacks.
if (Subtarget.hasSSSE3()) {
if (Subtarget.hasVLX())
- if (SDValue Rotate = lowerVectorShuffleAsRotate(DL, MVT::v2i64, V1, V2,
- Mask, Subtarget, DAG))
+ if (SDValue Rotate = lowerShuffleAsRotate(DL, MVT::v2i64, V1, V2, Mask,
+ Subtarget, DAG))
return Rotate;
- if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
- DL, MVT::v2i64, V1, V2, Mask, Subtarget, DAG))
+ if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v2i64, V1, V2, Mask,
+ Subtarget, DAG))
return Rotate;
}
// If we have direct support for blends, we should lower by decomposing into
// a permute. That will be faster than the domain cross.
if (IsBlendSupported)
- return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v2i64, V1, V2,
- Mask, Subtarget, DAG);
+ return lowerShuffleAsDecomposedShuffleBlend(DL, MVT::v2i64, V1, V2, Mask,
+ Subtarget, DAG);
// We implement this with SHUFPD which is pretty lame because it will likely
// incur 2 cycles of stall for integer vectors on Nehalem and older chips.
@@ -12252,36 +12790,14 @@ static SDValue lowerV2I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
DAG.getVectorShuffle(MVT::v2f64, DL, V1, V2, Mask));
}
-/// Test whether this can be lowered with a single SHUFPS instruction.
-///
-/// This is used to disable more specialized lowerings when the shufps lowering
-/// will happen to be efficient.
-static bool isSingleSHUFPSMask(ArrayRef<int> Mask) {
- // This routine only handles 128-bit shufps.
- assert(Mask.size() == 4 && "Unsupported mask size!");
- assert(Mask[0] >= -1 && Mask[0] < 8 && "Out of bound mask element!");
- assert(Mask[1] >= -1 && Mask[1] < 8 && "Out of bound mask element!");
- assert(Mask[2] >= -1 && Mask[2] < 8 && "Out of bound mask element!");
- assert(Mask[3] >= -1 && Mask[3] < 8 && "Out of bound mask element!");
-
- // To lower with a single SHUFPS we need to have the low half and high half
- // each requiring a single input.
- if (Mask[0] >= 0 && Mask[1] >= 0 && (Mask[0] < 4) != (Mask[1] < 4))
- return false;
- if (Mask[2] >= 0 && Mask[3] >= 0 && (Mask[2] < 4) != (Mask[3] < 4))
- return false;
-
- return true;
-}
-
/// Lower a vector shuffle using the SHUFPS instruction.
///
/// This is a helper routine dedicated to lowering vector shuffles using SHUFPS.
/// It makes no assumptions about whether this is the *best* lowering, it simply
/// uses it.
-static SDValue lowerVectorShuffleWithSHUFPS(const SDLoc &DL, MVT VT,
- ArrayRef<int> Mask, SDValue V1,
- SDValue V2, SelectionDAG &DAG) {
+static SDValue lowerShuffleWithSHUFPS(const SDLoc &DL, MVT VT,
+ ArrayRef<int> Mask, SDValue V1,
+ SDValue V2, SelectionDAG &DAG) {
SDValue LowV = V1, HighV = V2;
int NewMask[4] = {Mask[0], Mask[1], Mask[2], Mask[3]};
@@ -12366,11 +12882,10 @@ static SDValue lowerVectorShuffleWithSHUFPS(const SDLoc &DL, MVT VT,
/// Uses instructions exclusively from the floating point unit to minimize
/// domain crossing penalties, as these are sufficient to implement all v4f32
/// shuffles.
-static SDValue lowerV4F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
- const APInt &Zeroable,
- SDValue V1, SDValue V2,
- const X86Subtarget &Subtarget,
- SelectionDAG &DAG) {
+static SDValue lowerV4F32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
+ const APInt &Zeroable, SDValue V1, SDValue V2,
+ const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
assert(V1.getSimpleValueType() == MVT::v4f32 && "Bad operand type!");
assert(V2.getSimpleValueType() == MVT::v4f32 && "Bad operand type!");
assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
@@ -12379,8 +12894,8 @@ static SDValue lowerV4F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
if (NumV2Elements == 0) {
// Check for being able to broadcast a single element.
- if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(
- DL, MVT::v4f32, V1, V2, Mask, Subtarget, DAG))
+ if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v4f32, V1, V2,
+ Mask, Subtarget, DAG))
return Broadcast;
// Use even/odd duplicate instructions for masks that match their pattern.
@@ -12413,29 +12928,32 @@ static SDValue lowerV4F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
getV4X86ShuffleImm8ForMask(Mask, DL, DAG));
}
+ if (Subtarget.hasAVX2())
+ if (SDValue Extract = lowerShuffleOfExtractsAsVperm(DL, V1, V2, Mask, DAG))
+ return Extract;
+
// There are special ways we can lower some single-element blends. However, we
// have custom ways we can lower more complex single-element blends below that
// we defer to if both this and BLENDPS fail to match, so restrict this to
// when the V2 input is targeting element 0 of the mask -- that is the fast
// case here.
if (NumV2Elements == 1 && Mask[0] >= 4)
- if (SDValue V = lowerVectorShuffleAsElementInsertion(
+ if (SDValue V = lowerShuffleAsElementInsertion(
DL, MVT::v4f32, V1, V2, Mask, Zeroable, Subtarget, DAG))
return V;
if (Subtarget.hasSSE41()) {
- if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4f32, V1, V2, Mask,
- Zeroable, Subtarget, DAG))
+ if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v4f32, V1, V2, Mask,
+ Zeroable, Subtarget, DAG))
return Blend;
// Use INSERTPS if we can complete the shuffle efficiently.
- if (SDValue V =
- lowerVectorShuffleAsInsertPS(DL, V1, V2, Mask, Zeroable, DAG))
+ if (SDValue V = lowerShuffleAsInsertPS(DL, V1, V2, Mask, Zeroable, DAG))
return V;
if (!isSingleSHUFPSMask(Mask))
- if (SDValue BlendPerm = lowerVectorShuffleAsBlendAndPermute(
- DL, MVT::v4f32, V1, V2, Mask, DAG))
+ if (SDValue BlendPerm = lowerShuffleAsBlendAndPermute(DL, MVT::v4f32, V1,
+ V2, Mask, DAG))
return BlendPerm;
}
@@ -12449,23 +12967,21 @@ static SDValue lowerV4F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
}
// Use dedicated unpack instructions for masks that match their pattern.
- if (SDValue V =
- lowerVectorShuffleWithUNPCK(DL, MVT::v4f32, Mask, V1, V2, DAG))
+ if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v4f32, Mask, V1, V2, DAG))
return V;
// Otherwise fall back to a SHUFPS lowering strategy.
- return lowerVectorShuffleWithSHUFPS(DL, MVT::v4f32, Mask, V1, V2, DAG);
+ return lowerShuffleWithSHUFPS(DL, MVT::v4f32, Mask, V1, V2, DAG);
}
/// Lower 4-lane i32 vector shuffles.
///
/// We try to handle these with integer-domain shuffles where we can, but for
/// blends we use the floating point domain blend instructions.
-static SDValue lowerV4I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
- const APInt &Zeroable,
- SDValue V1, SDValue V2,
- const X86Subtarget &Subtarget,
- SelectionDAG &DAG) {
+static SDValue lowerV4I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
+ const APInt &Zeroable, SDValue V1, SDValue V2,
+ const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
assert(V1.getSimpleValueType() == MVT::v4i32 && "Bad operand type!");
assert(V2.getSimpleValueType() == MVT::v4i32 && "Bad operand type!");
assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
@@ -12473,16 +12989,16 @@ static SDValue lowerV4I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
// Whenever we can lower this as a zext, that instruction is strictly faster
// than any alternative. It also allows us to fold memory operands into the
// shuffle in many cases.
- if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(
- DL, MVT::v4i32, V1, V2, Mask, Zeroable, Subtarget, DAG))
+ if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend(DL, MVT::v4i32, V1, V2, Mask,
+ Zeroable, Subtarget, DAG))
return ZExt;
int NumV2Elements = count_if(Mask, [](int M) { return M >= 4; });
if (NumV2Elements == 0) {
// Check for being able to broadcast a single element.
- if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(
- DL, MVT::v4i32, V1, V2, Mask, Subtarget, DAG))
+ if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v4i32, V1, V2,
+ Mask, Subtarget, DAG))
return Broadcast;
// Straight shuffle of a single input vector. For everything from SSE2
@@ -12501,14 +13017,18 @@ static SDValue lowerV4I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
getV4X86ShuffleImm8ForMask(Mask, DL, DAG));
}
+ if (Subtarget.hasAVX2())
+ if (SDValue Extract = lowerShuffleOfExtractsAsVperm(DL, V1, V2, Mask, DAG))
+ return Extract;
+
// Try to use shift instructions.
- if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v4i32, V1, V2, Mask,
- Zeroable, Subtarget, DAG))
+ if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v4i32, V1, V2, Mask,
+ Zeroable, Subtarget, DAG))
return Shift;
// There are special ways we can lower some single-element blends.
if (NumV2Elements == 1)
- if (SDValue V = lowerVectorShuffleAsElementInsertion(
+ if (SDValue V = lowerShuffleAsElementInsertion(
DL, MVT::v4i32, V1, V2, Mask, Zeroable, Subtarget, DAG))
return V;
@@ -12516,29 +13036,28 @@ static SDValue lowerV4I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
// *exact* same predicate.
bool IsBlendSupported = Subtarget.hasSSE41();
if (IsBlendSupported)
- if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4i32, V1, V2, Mask,
- Zeroable, Subtarget, DAG))
+ if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v4i32, V1, V2, Mask,
+ Zeroable, Subtarget, DAG))
return Blend;
- if (SDValue Masked = lowerVectorShuffleAsBitMask(DL, MVT::v4i32, V1, V2, Mask,
- Zeroable, DAG))
+ if (SDValue Masked = lowerShuffleAsBitMask(DL, MVT::v4i32, V1, V2, Mask,
+ Zeroable, Subtarget, DAG))
return Masked;
// Use dedicated unpack instructions for masks that match their pattern.
- if (SDValue V =
- lowerVectorShuffleWithUNPCK(DL, MVT::v4i32, Mask, V1, V2, DAG))
+ if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v4i32, Mask, V1, V2, DAG))
return V;
// Try to use byte rotation instructions.
// Its more profitable for pre-SSSE3 to use shuffles/unpacks.
if (Subtarget.hasSSSE3()) {
if (Subtarget.hasVLX())
- if (SDValue Rotate = lowerVectorShuffleAsRotate(DL, MVT::v4i32, V1, V2,
- Mask, Subtarget, DAG))
+ if (SDValue Rotate = lowerShuffleAsRotate(DL, MVT::v4i32, V1, V2, Mask,
+ Subtarget, DAG))
return Rotate;
- if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
- DL, MVT::v4i32, V1, V2, Mask, Subtarget, DAG))
+ if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v4i32, V1, V2, Mask,
+ Subtarget, DAG))
return Rotate;
}
@@ -12549,12 +13068,12 @@ static SDValue lowerV4I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
// If we have direct support for blends, we should lower by decomposing into
// a permute. That will be faster than the domain cross.
if (IsBlendSupported)
- return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v4i32, V1, V2,
- Mask, Subtarget, DAG);
+ return lowerShuffleAsDecomposedShuffleBlend(DL, MVT::v4i32, V1, V2, Mask,
+ Subtarget, DAG);
// Try to lower by permuting the inputs into an unpack instruction.
- if (SDValue Unpack = lowerVectorShuffleAsPermuteAndUnpack(
- DL, MVT::v4i32, V1, V2, Mask, Subtarget, DAG))
+ if (SDValue Unpack = lowerShuffleAsPermuteAndUnpack(DL, MVT::v4i32, V1, V2,
+ Mask, Subtarget, DAG))
return Unpack;
}
@@ -12585,7 +13104,7 @@ static SDValue lowerV4I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
/// lane must shuffle the *exact* same way. In fact, you must pass a v8 Mask to
/// this routine for it to work correctly. To shuffle a 256-bit or 512-bit i16
/// vector, form the analogous 128-bit 8-element Mask.
-static SDValue lowerV8I16GeneralSingleInputVectorShuffle(
+static SDValue lowerV8I16GeneralSingleInputShuffle(
const SDLoc &DL, MVT VT, SDValue V, MutableArrayRef<int> Mask,
const X86Subtarget &Subtarget, SelectionDAG &DAG) {
assert(VT.getVectorElementType() == MVT::i16 && "Bad input type!");
@@ -12617,11 +13136,9 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle(
copy_if(HiMask, std::back_inserter(HiInputs), [](int M) { return M >= 0; });
array_pod_sort(HiInputs.begin(), HiInputs.end());
HiInputs.erase(std::unique(HiInputs.begin(), HiInputs.end()), HiInputs.end());
- int NumLToL =
- std::lower_bound(LoInputs.begin(), LoInputs.end(), 4) - LoInputs.begin();
+ int NumLToL = llvm::lower_bound(LoInputs, 4) - LoInputs.begin();
int NumHToL = LoInputs.size() - NumLToL;
- int NumLToH =
- std::lower_bound(HiInputs.begin(), HiInputs.end(), 4) - HiInputs.begin();
+ int NumLToH = llvm::lower_bound(HiInputs, 4) - HiInputs.begin();
int NumHToH = HiInputs.size() - NumLToH;
MutableArrayRef<int> LToLInputs(LoInputs.data(), NumLToL);
MutableArrayRef<int> LToHInputs(HiInputs.data(), NumLToH);
@@ -12730,7 +13247,7 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle(
// a half by taking the sum of the half with three inputs and subtracting
// the sum of the actual three inputs. The difference is the remaining
// slot.
- int ADWord, BDWord;
+ int ADWord = 0, BDWord = 0;
int &TripleDWord = ThreeAInputs ? ADWord : BDWord;
int &OneInputDWord = ThreeAInputs ? BDWord : ADWord;
int TripleInputOffset = ThreeAInputs ? AOffset : BOffset;
@@ -12825,8 +13342,7 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle(
// Recurse back into this routine to re-compute state now that this isn't
// a 3 and 1 problem.
- return lowerV8I16GeneralSingleInputVectorShuffle(DL, VT, V, Mask, Subtarget,
- DAG);
+ return lowerV8I16GeneralSingleInputShuffle(DL, VT, V, Mask, Subtarget, DAG);
};
if ((NumLToL == 3 && NumHToL == 1) || (NumLToL == 1 && NumHToL == 3))
return balanceSides(LToLInputs, HToLInputs, HToHInputs, LToHInputs, 0, 4);
@@ -13084,7 +13600,7 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle(
/// Helper to form a PSHUFB-based shuffle+blend, opportunistically avoiding the
/// blend if only one input is used.
-static SDValue lowerVectorShuffleAsBlendOfPSHUFBs(
+static SDValue lowerShuffleAsBlendOfPSHUFBs(
const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
const APInt &Zeroable, SelectionDAG &DAG, bool &V1InUse, bool &V2InUse) {
assert(!is128BitLaneCrossingShuffleMask(VT, Mask) &&
@@ -13147,54 +13663,51 @@ static SDValue lowerVectorShuffleAsBlendOfPSHUFBs(
/// the two inputs, try to interleave them. Otherwise, blend the low and high
/// halves of the inputs separately (making them have relatively few inputs)
/// and then concatenate them.
-static SDValue lowerV8I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
- const APInt &Zeroable,
- SDValue V1, SDValue V2,
- const X86Subtarget &Subtarget,
- SelectionDAG &DAG) {
+static SDValue lowerV8I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
+ const APInt &Zeroable, SDValue V1, SDValue V2,
+ const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
assert(V1.getSimpleValueType() == MVT::v8i16 && "Bad operand type!");
assert(V2.getSimpleValueType() == MVT::v8i16 && "Bad operand type!");
assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
// Whenever we can lower this as a zext, that instruction is strictly faster
// than any alternative.
- if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(
- DL, MVT::v8i16, V1, V2, Mask, Zeroable, Subtarget, DAG))
+ if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend(DL, MVT::v8i16, V1, V2, Mask,
+ Zeroable, Subtarget, DAG))
return ZExt;
int NumV2Inputs = count_if(Mask, [](int M) { return M >= 8; });
if (NumV2Inputs == 0) {
// Check for being able to broadcast a single element.
- if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(
- DL, MVT::v8i16, V1, V2, Mask, Subtarget, DAG))
+ if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v8i16, V1, V2,
+ Mask, Subtarget, DAG))
return Broadcast;
// Try to use shift instructions.
- if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v8i16, V1, V1, Mask,
- Zeroable, Subtarget, DAG))
+ if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v8i16, V1, V1, Mask,
+ Zeroable, Subtarget, DAG))
return Shift;
// Use dedicated unpack instructions for masks that match their pattern.
- if (SDValue V =
- lowerVectorShuffleWithUNPCK(DL, MVT::v8i16, Mask, V1, V2, DAG))
+ if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v8i16, Mask, V1, V2, DAG))
return V;
// Use dedicated pack instructions for masks that match their pattern.
- if (SDValue V = lowerVectorShuffleWithPACK(DL, MVT::v8i16, Mask, V1, V2,
- DAG, Subtarget))
+ if (SDValue V = lowerShuffleWithPACK(DL, MVT::v8i16, Mask, V1, V2, DAG,
+ Subtarget))
return V;
// Try to use byte rotation instructions.
- if (SDValue Rotate = lowerVectorShuffleAsByteRotate(DL, MVT::v8i16, V1, V1,
- Mask, Subtarget, DAG))
+ if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v8i16, V1, V1, Mask,
+ Subtarget, DAG))
return Rotate;
// Make a copy of the mask so it can be modified.
SmallVector<int, 8> MutableMask(Mask.begin(), Mask.end());
- return lowerV8I16GeneralSingleInputVectorShuffle(DL, MVT::v8i16, V1,
- MutableMask, Subtarget,
- DAG);
+ return lowerV8I16GeneralSingleInputShuffle(DL, MVT::v8i16, V1, MutableMask,
+ Subtarget, DAG);
}
assert(llvm::any_of(Mask, [](int M) { return M >= 0 && M < 8; }) &&
@@ -13202,19 +13715,19 @@ static SDValue lowerV8I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
"shuffles.");
// Try to use shift instructions.
- if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v8i16, V1, V2, Mask,
- Zeroable, Subtarget, DAG))
+ if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v8i16, V1, V2, Mask,
+ Zeroable, Subtarget, DAG))
return Shift;
// See if we can use SSE4A Extraction / Insertion.
if (Subtarget.hasSSE4A())
- if (SDValue V = lowerVectorShuffleWithSSE4A(DL, MVT::v8i16, V1, V2, Mask,
- Zeroable, DAG))
+ if (SDValue V = lowerShuffleWithSSE4A(DL, MVT::v8i16, V1, V2, Mask,
+ Zeroable, DAG))
return V;
// There are special ways we can lower some single-element blends.
if (NumV2Inputs == 1)
- if (SDValue V = lowerVectorShuffleAsElementInsertion(
+ if (SDValue V = lowerShuffleAsElementInsertion(
DL, MVT::v8i16, V1, V2, Mask, Zeroable, Subtarget, DAG))
return V;
@@ -13222,50 +13735,54 @@ static SDValue lowerV8I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
// *exact* same predicate.
bool IsBlendSupported = Subtarget.hasSSE41();
if (IsBlendSupported)
- if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8i16, V1, V2, Mask,
- Zeroable, Subtarget, DAG))
+ if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v8i16, V1, V2, Mask,
+ Zeroable, Subtarget, DAG))
return Blend;
- if (SDValue Masked = lowerVectorShuffleAsBitMask(DL, MVT::v8i16, V1, V2, Mask,
- Zeroable, DAG))
+ if (SDValue Masked = lowerShuffleAsBitMask(DL, MVT::v8i16, V1, V2, Mask,
+ Zeroable, Subtarget, DAG))
return Masked;
// Use dedicated unpack instructions for masks that match their pattern.
- if (SDValue V =
- lowerVectorShuffleWithUNPCK(DL, MVT::v8i16, Mask, V1, V2, DAG))
+ if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v8i16, Mask, V1, V2, DAG))
return V;
// Use dedicated pack instructions for masks that match their pattern.
- if (SDValue V = lowerVectorShuffleWithPACK(DL, MVT::v8i16, Mask, V1, V2, DAG,
- Subtarget))
+ if (SDValue V = lowerShuffleWithPACK(DL, MVT::v8i16, Mask, V1, V2, DAG,
+ Subtarget))
return V;
// Try to use byte rotation instructions.
- if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
- DL, MVT::v8i16, V1, V2, Mask, Subtarget, DAG))
+ if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v8i16, V1, V2, Mask,
+ Subtarget, DAG))
return Rotate;
if (SDValue BitBlend =
- lowerVectorShuffleAsBitBlend(DL, MVT::v8i16, V1, V2, Mask, DAG))
+ lowerShuffleAsBitBlend(DL, MVT::v8i16, V1, V2, Mask, DAG))
return BitBlend;
+ // Try to use byte shift instructions to mask.
+ if (SDValue V = lowerVectorShuffleAsByteShiftMask(
+ DL, MVT::v8i16, V1, V2, Mask, Zeroable, Subtarget, DAG))
+ return V;
+
// Try to lower by permuting the inputs into an unpack instruction.
- if (SDValue Unpack = lowerVectorShuffleAsPermuteAndUnpack(
- DL, MVT::v8i16, V1, V2, Mask, Subtarget, DAG))
+ if (SDValue Unpack = lowerShuffleAsPermuteAndUnpack(DL, MVT::v8i16, V1, V2,
+ Mask, Subtarget, DAG))
return Unpack;
// If we can't directly blend but can use PSHUFB, that will be better as it
// can both shuffle and set up the inefficient blend.
if (!IsBlendSupported && Subtarget.hasSSSE3()) {
bool V1InUse, V2InUse;
- return lowerVectorShuffleAsBlendOfPSHUFBs(DL, MVT::v8i16, V1, V2, Mask,
- Zeroable, DAG, V1InUse, V2InUse);
+ return lowerShuffleAsBlendOfPSHUFBs(DL, MVT::v8i16, V1, V2, Mask,
+ Zeroable, DAG, V1InUse, V2InUse);
}
// We can always bit-blend if we have to so the fallback strategy is to
// decompose into single-input permutes and blends.
- return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v8i16, V1, V2,
- Mask, Subtarget, DAG);
+ return lowerShuffleAsDecomposedShuffleBlend(DL, MVT::v8i16, V1, V2,
+ Mask, Subtarget, DAG);
}
/// Check whether a compaction lowering can be done by dropping even
@@ -13334,9 +13851,9 @@ static int canLowerByDroppingEvenElements(ArrayRef<int> Mask,
return 0;
}
-static SDValue lowerVectorShuffleWithPERMV(const SDLoc &DL, MVT VT,
- ArrayRef<int> Mask, SDValue V1,
- SDValue V2, SelectionDAG &DAG) {
+static SDValue lowerShuffleWithPERMV(const SDLoc &DL, MVT VT,
+ ArrayRef<int> Mask, SDValue V1,
+ SDValue V2, SelectionDAG &DAG) {
MVT MaskEltVT = MVT::getIntegerVT(VT.getScalarSizeInBits());
MVT MaskVecVT = MVT::getVectorVT(MaskEltVT, VT.getVectorNumElements());
@@ -13354,39 +13871,38 @@ static SDValue lowerVectorShuffleWithPERMV(const SDLoc &DL, MVT VT,
/// UNPCK to spread the i8 elements across two i16-element vectors, and uses
/// the existing lowering for v8i16 blends on each half, finally PACK-ing them
/// back together.
-static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
- const APInt &Zeroable,
- SDValue V1, SDValue V2,
- const X86Subtarget &Subtarget,
- SelectionDAG &DAG) {
+static SDValue lowerV16I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
+ const APInt &Zeroable, SDValue V1, SDValue V2,
+ const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
assert(V1.getSimpleValueType() == MVT::v16i8 && "Bad operand type!");
assert(V2.getSimpleValueType() == MVT::v16i8 && "Bad operand type!");
assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");
// Try to use shift instructions.
- if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v16i8, V1, V2, Mask,
- Zeroable, Subtarget, DAG))
+ if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v16i8, V1, V2, Mask,
+ Zeroable, Subtarget, DAG))
return Shift;
// Try to use byte rotation instructions.
- if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
- DL, MVT::v16i8, V1, V2, Mask, Subtarget, DAG))
+ if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v16i8, V1, V2, Mask,
+ Subtarget, DAG))
return Rotate;
// Use dedicated pack instructions for masks that match their pattern.
- if (SDValue V = lowerVectorShuffleWithPACK(DL, MVT::v16i8, Mask, V1, V2, DAG,
- Subtarget))
+ if (SDValue V = lowerShuffleWithPACK(DL, MVT::v16i8, Mask, V1, V2, DAG,
+ Subtarget))
return V;
// Try to use a zext lowering.
- if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(
- DL, MVT::v16i8, V1, V2, Mask, Zeroable, Subtarget, DAG))
+ if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend(DL, MVT::v16i8, V1, V2, Mask,
+ Zeroable, Subtarget, DAG))
return ZExt;
// See if we can use SSE4A Extraction / Insertion.
if (Subtarget.hasSSE4A())
- if (SDValue V = lowerVectorShuffleWithSSE4A(DL, MVT::v16i8, V1, V2, Mask,
- Zeroable, DAG))
+ if (SDValue V = lowerShuffleWithSSE4A(DL, MVT::v16i8, V1, V2, Mask,
+ Zeroable, DAG))
return V;
int NumV2Elements = count_if(Mask, [](int M) { return M >= 16; });
@@ -13394,12 +13910,11 @@ static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
// For single-input shuffles, there are some nicer lowering tricks we can use.
if (NumV2Elements == 0) {
// Check for being able to broadcast a single element.
- if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(
- DL, MVT::v16i8, V1, V2, Mask, Subtarget, DAG))
+ if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v16i8, V1, V2,
+ Mask, Subtarget, DAG))
return Broadcast;
- if (SDValue V =
- lowerVectorShuffleWithUNPCK(DL, MVT::v16i8, Mask, V1, V2, DAG))
+ if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v16i8, Mask, V1, V2, DAG))
return V;
// Check whether we can widen this to an i16 shuffle by duplicating bytes.
@@ -13492,13 +14007,17 @@ static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
return V;
}
- if (SDValue Masked = lowerVectorShuffleAsBitMask(DL, MVT::v16i8, V1, V2, Mask,
- Zeroable, DAG))
+ if (SDValue Masked = lowerShuffleAsBitMask(DL, MVT::v16i8, V1, V2, Mask,
+ Zeroable, Subtarget, DAG))
return Masked;
// Use dedicated unpack instructions for masks that match their pattern.
- if (SDValue V =
- lowerVectorShuffleWithUNPCK(DL, MVT::v16i8, Mask, V1, V2, DAG))
+ if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v16i8, Mask, V1, V2, DAG))
+ return V;
+
+ // Try to use byte shift instructions to mask.
+ if (SDValue V = lowerVectorShuffleAsByteShiftMask(
+ DL, MVT::v16i8, V1, V2, Mask, Zeroable, Subtarget, DAG))
return V;
// Check for SSSE3 which lets us lower all v16i8 shuffles much more directly
@@ -13518,7 +14037,7 @@ static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
bool V1InUse = false;
bool V2InUse = false;
- SDValue PSHUFB = lowerVectorShuffleAsBlendOfPSHUFBs(
+ SDValue PSHUFB = lowerShuffleAsBlendOfPSHUFBs(
DL, MVT::v16i8, V1, V2, Mask, Zeroable, DAG, V1InUse, V2InUse);
// If both V1 and V2 are in use and we can use a direct blend or an unpack,
@@ -13526,8 +14045,8 @@ static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
// important as a single pshufb is significantly faster for that.
if (V1InUse && V2InUse) {
if (Subtarget.hasSSE41())
- if (SDValue Blend = lowerVectorShuffleAsBlend(
- DL, MVT::v16i8, V1, V2, Mask, Zeroable, Subtarget, DAG))
+ if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v16i8, V1, V2, Mask,
+ Zeroable, Subtarget, DAG))
return Blend;
// We can use an unpack to do the blending rather than an or in some
@@ -13538,17 +14057,17 @@ static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
// FIXME: It might be worth trying to detect if the unpack-feeding
// shuffles will both be pshufb, in which case we shouldn't bother with
// this.
- if (SDValue Unpack = lowerVectorShuffleAsPermuteAndUnpack(
+ if (SDValue Unpack = lowerShuffleAsPermuteAndUnpack(
DL, MVT::v16i8, V1, V2, Mask, Subtarget, DAG))
return Unpack;
// If we have VBMI we can use one VPERM instead of multiple PSHUFBs.
if (Subtarget.hasVBMI() && Subtarget.hasVLX())
- return lowerVectorShuffleWithPERMV(DL, MVT::v16i8, Mask, V1, V2, DAG);
+ return lowerShuffleWithPERMV(DL, MVT::v16i8, Mask, V1, V2, DAG);
// Use PALIGNR+Permute if possible - permute might become PSHUFB but the
// PALIGNR will be cheaper than the second PSHUFB+OR.
- if (SDValue V = lowerVectorShuffleAsByteRotateAndPermute(
+ if (SDValue V = lowerShuffleAsByteRotateAndPermute(
DL, MVT::v16i8, V1, V2, Mask, Subtarget, DAG))
return V;
}
@@ -13558,13 +14077,12 @@ static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
// There are special ways we can lower some single-element blends.
if (NumV2Elements == 1)
- if (SDValue V = lowerVectorShuffleAsElementInsertion(
+ if (SDValue V = lowerShuffleAsElementInsertion(
DL, MVT::v16i8, V1, V2, Mask, Zeroable, Subtarget, DAG))
return V;
- if (SDValue BitBlend =
- lowerVectorShuffleAsBitBlend(DL, MVT::v16i8, V1, V2, Mask, DAG))
- return BitBlend;
+ if (SDValue Blend = lowerShuffleAsBitBlend(DL, MVT::v16i8, V1, V2, Mask, DAG))
+ return Blend;
// Check whether a compaction lowering can be done. This handles shuffles
// which take every Nth element for some even N. See the helper function for
@@ -13605,8 +14123,8 @@ static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
// Handle multi-input cases by blending single-input shuffles.
if (NumV2Elements > 0)
- return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v16i8, V1, V2,
- Mask, Subtarget, DAG);
+ return lowerShuffleAsDecomposedShuffleBlend(DL, MVT::v16i8, V1, V2, Mask,
+ Subtarget, DAG);
// The fallback path for single-input shuffles widens this into two v8i16
// vectors with unpacks, shuffles those, and then pulls them back together
@@ -13661,24 +14179,24 @@ static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
///
/// This routine breaks down the specific type of 128-bit shuffle and
/// dispatches to the lowering routines accordingly.
-static SDValue lower128BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
- MVT VT, SDValue V1, SDValue V2,
- const APInt &Zeroable,
- const X86Subtarget &Subtarget,
- SelectionDAG &DAG) {
+static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask,
+ MVT VT, SDValue V1, SDValue V2,
+ const APInt &Zeroable,
+ const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
switch (VT.SimpleTy) {
case MVT::v2i64:
- return lowerV2I64VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
+ return lowerV2I64Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
case MVT::v2f64:
- return lowerV2F64VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
+ return lowerV2F64Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
case MVT::v4i32:
- return lowerV4I32VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
+ return lowerV4I32Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
case MVT::v4f32:
- return lowerV4F32VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
+ return lowerV4F32Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
case MVT::v8i16:
- return lowerV8I16VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
+ return lowerV8I16Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
case MVT::v16i8:
- return lowerV16I8VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
+ return lowerV16I8Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
default:
llvm_unreachable("Unimplemented!");
@@ -13690,9 +14208,9 @@ static SDValue lower128BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
/// This routine just extracts two subvectors, shuffles them independently, and
/// then concatenates them back together. This should work effectively with all
/// AVX vector shuffle types.
-static SDValue splitAndLowerVectorShuffle(const SDLoc &DL, MVT VT, SDValue V1,
- SDValue V2, ArrayRef<int> Mask,
- SelectionDAG &DAG) {
+static SDValue splitAndLowerShuffle(const SDLoc &DL, MVT VT, SDValue V1,
+ SDValue V2, ArrayRef<int> Mask,
+ SelectionDAG &DAG) {
assert(VT.getSizeInBits() >= 256 &&
"Only for 256-bit or wider vector shuffles!");
assert(V1.getSimpleValueType() == VT && "Bad operand type!");
@@ -13816,11 +14334,10 @@ static SDValue splitAndLowerVectorShuffle(const SDLoc &DL, MVT VT, SDValue V1,
/// between splitting the shuffle into 128-bit components and stitching those
/// back together vs. extracting the single-input shuffles and blending those
/// results.
-static SDValue lowerVectorShuffleAsSplitOrBlend(const SDLoc &DL, MVT VT,
- SDValue V1, SDValue V2,
- ArrayRef<int> Mask,
- const X86Subtarget &Subtarget,
- SelectionDAG &DAG) {
+static SDValue lowerShuffleAsSplitOrBlend(const SDLoc &DL, MVT VT, SDValue V1,
+ SDValue V2, ArrayRef<int> Mask,
+ const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
assert(!V2.isUndef() && "This routine must not be used to lower single-input "
"shuffles as it could then recurse on itself.");
int Size = Mask.size();
@@ -13845,8 +14362,8 @@ static SDValue lowerVectorShuffleAsSplitOrBlend(const SDLoc &DL, MVT VT,
return true;
};
if (DoBothBroadcast())
- return lowerVectorShuffleAsDecomposedShuffleBlend(DL, VT, V1, V2, Mask,
- Subtarget, DAG);
+ return lowerShuffleAsDecomposedShuffleBlend(DL, VT, V1, V2, Mask,
+ Subtarget, DAG);
// If the inputs all stem from a single 128-bit lane of each input, then we
// split them rather than blending because the split will decompose to
@@ -13860,12 +14377,12 @@ static SDValue lowerVectorShuffleAsSplitOrBlend(const SDLoc &DL, MVT VT,
if (Mask[i] >= 0)
LaneInputs[Mask[i] / Size][(Mask[i] % Size) / LaneSize] = true;
if (LaneInputs[0].count() <= 1 && LaneInputs[1].count() <= 1)
- return splitAndLowerVectorShuffle(DL, VT, V1, V2, Mask, DAG);
+ return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG);
// Otherwise, just fall back to decomposed shuffles and a blend. This requires
// that the decomposed single-input shuffles don't end up here.
- return lowerVectorShuffleAsDecomposedShuffleBlend(DL, VT, V1, V2, Mask,
- Subtarget, DAG);
+ return lowerShuffleAsDecomposedShuffleBlend(DL, VT, V1, V2, Mask, Subtarget,
+ DAG);
}
/// Lower a vector shuffle crossing multiple 128-bit lanes as
@@ -13874,9 +14391,9 @@ static SDValue lowerVectorShuffleAsSplitOrBlend(const SDLoc &DL, MVT VT,
/// This is mainly for cases where we can have non-repeating permutes
/// in each lane.
///
-/// TODO: This is very similar to lowerVectorShuffleByMerging128BitLanes,
+/// TODO: This is very similar to lowerShuffleAsLanePermuteAndRepeatedMask,
/// we should investigate merging them.
-static SDValue lowerVectorShuffleAsLanePermuteAndPermute(
+static SDValue lowerShuffleAsLanePermuteAndPermute(
const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
SelectionDAG &DAG, const X86Subtarget &Subtarget) {
int NumElts = VT.getVectorNumElements();
@@ -13884,7 +14401,6 @@ static SDValue lowerVectorShuffleAsLanePermuteAndPermute(
int NumEltsPerLane = NumElts / NumLanes;
SmallVector<int, 4> SrcLaneMask(NumLanes, SM_SentinelUndef);
- SmallVector<int, 16> LaneMask(NumElts, SM_SentinelUndef);
SmallVector<int, 16> PermMask(NumElts, SM_SentinelUndef);
for (int i = 0; i != NumElts; ++i) {
@@ -13899,10 +14415,20 @@ static SDValue lowerVectorShuffleAsLanePermuteAndPermute(
return SDValue();
SrcLaneMask[DstLane] = SrcLane;
- LaneMask[i] = (SrcLane * NumEltsPerLane) + (i % NumEltsPerLane);
PermMask[i] = (DstLane * NumEltsPerLane) + (M % NumEltsPerLane);
}
+ // Make sure we set all elements of the lane mask, to avoid undef propagation.
+ SmallVector<int, 16> LaneMask(NumElts, SM_SentinelUndef);
+ for (int DstLane = 0; DstLane != NumLanes; ++DstLane) {
+ int SrcLane = SrcLaneMask[DstLane];
+ if (0 <= SrcLane)
+ for (int j = 0; j != NumEltsPerLane; ++j) {
+ LaneMask[(DstLane * NumEltsPerLane) + j] =
+ (SrcLane * NumEltsPerLane) + j;
+ }
+ }
+
// If we're only shuffling a single lowest lane and the rest are identity
// then don't bother.
// TODO - isShuffleMaskInputInPlace could be extended to something like this.
@@ -13931,11 +14457,9 @@ static SDValue lowerVectorShuffleAsLanePermuteAndPermute(
/// is lower than any other fully general cross-lane shuffle strategy I'm aware
/// of. Special cases for each particular shuffle pattern should be handled
/// prior to trying this lowering.
-static SDValue lowerVectorShuffleAsLanePermuteAndBlend(const SDLoc &DL, MVT VT,
- SDValue V1, SDValue V2,
- ArrayRef<int> Mask,
- SelectionDAG &DAG,
- const X86Subtarget &Subtarget) {
+static SDValue lowerShuffleAsLanePermuteAndBlend(
+ const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
+ SelectionDAG &DAG, const X86Subtarget &Subtarget) {
// FIXME: This should probably be generalized for 512-bit vectors as well.
assert(VT.is256BitVector() && "Only for 256-bit vector shuffles!");
int Size = Mask.size();
@@ -13950,14 +14474,14 @@ static SDValue lowerVectorShuffleAsLanePermuteAndBlend(const SDLoc &DL, MVT VT,
if (Mask[i] >= 0 && (Mask[i] % Size) / LaneSize != i / LaneSize)
LaneCrossing[(Mask[i] % Size) / LaneSize] = true;
if (!LaneCrossing[0] || !LaneCrossing[1])
- return splitAndLowerVectorShuffle(DL, VT, V1, V2, Mask, DAG);
+ return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG);
} else {
bool LaneUsed[2] = {false, false};
for (int i = 0; i < Size; ++i)
if (Mask[i] >= 0)
LaneUsed[(Mask[i] / LaneSize)] = true;
if (!LaneUsed[0] || !LaneUsed[1])
- return splitAndLowerVectorShuffle(DL, VT, V1, V2, Mask, DAG);
+ return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG);
}
assert(V2.isUndef() &&
@@ -13981,11 +14505,11 @@ static SDValue lowerVectorShuffleAsLanePermuteAndBlend(const SDLoc &DL, MVT VT,
}
/// Handle lowering 2-lane 128-bit shuffles.
-static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1,
- SDValue V2, ArrayRef<int> Mask,
- const APInt &Zeroable,
- const X86Subtarget &Subtarget,
- SelectionDAG &DAG) {
+static SDValue lowerV2X128Shuffle(const SDLoc &DL, MVT VT, SDValue V1,
+ SDValue V2, ArrayRef<int> Mask,
+ const APInt &Zeroable,
+ const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
// With AVX2, use VPERMQ/VPERMPD for unary shuffles to allow memory folding.
if (Subtarget.hasAVX2() && V2.isUndef())
return SDValue();
@@ -14012,8 +14536,8 @@ static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1,
// instruction bytes needed to explicitly generate the zero vector.
// Blends are faster and handle all the non-lane-crossing cases.
- if (SDValue Blend = lowerVectorShuffleAsBlend(DL, VT, V1, V2, Mask,
- Zeroable, Subtarget, DAG))
+ if (SDValue Blend = lowerShuffleAsBlend(DL, VT, V1, V2, Mask, Zeroable,
+ Subtarget, DAG))
return Blend;
// If either input operand is a zero vector, use VPERM2X128 because its mask
@@ -14084,9 +14608,7 @@ static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1,
/// or two of the lanes of the inputs. The lanes of the input vectors are
/// shuffled in one or two independent shuffles to get the lanes into the
/// position needed by the final shuffle.
-///
-/// FIXME: This should be generalized to 512-bit shuffles.
-static SDValue lowerVectorShuffleByMerging128BitLanes(
+static SDValue lowerShuffleAsLanePermuteAndRepeatedMask(
const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
const X86Subtarget &Subtarget, SelectionDAG &DAG) {
assert(!V2.isUndef() && "This is only useful with multiple inputs.");
@@ -14095,12 +14617,10 @@ static SDValue lowerVectorShuffleByMerging128BitLanes(
return SDValue();
int Size = Mask.size();
+ int NumLanes = VT.getSizeInBits() / 128;
int LaneSize = 128 / VT.getScalarSizeInBits();
- int NumLanes = Size / LaneSize;
- assert(NumLanes == 2 && "Only handles 256-bit shuffles.");
-
SmallVector<int, 16> RepeatMask(LaneSize, -1);
- int LaneSrcs[2][2] = { { -1, -1 }, { -1 , -1 } };
+ SmallVector<std::array<int, 2>, 2> LaneSrcs(NumLanes, {{-1, -1}});
// First pass will try to fill in the RepeatMask from lanes that need two
// sources.
@@ -14111,7 +14631,7 @@ static SDValue lowerVectorShuffleByMerging128BitLanes(
int M = Mask[(Lane * LaneSize) + i];
if (M < 0)
continue;
- // Determine which of the 4 possible input lanes (2 from each source)
+ // Determine which of the possible input lanes (NumLanes from each source)
// this element comes from. Assign that as one of the sources for this
// lane. We can assign up to 2 sources for this lane. If we run out
// sources we can't do anything.
@@ -14250,54 +14770,30 @@ static SDValue lowerVectorShuffleByMerging128BitLanes(
return DAG.getVectorShuffle(VT, DL, NewV1, NewV2, NewMask);
}
-/// Lower shuffles where an entire half of a 256 or 512-bit vector is UNDEF.
-/// This allows for fast cases such as subvector extraction/insertion
-/// or shuffling smaller vector types which can lower more efficiently.
-static SDValue lowerVectorShuffleWithUndefHalf(const SDLoc &DL, MVT VT,
- SDValue V1, SDValue V2,
- ArrayRef<int> Mask,
- const X86Subtarget &Subtarget,
- SelectionDAG &DAG) {
- assert((VT.is256BitVector() || VT.is512BitVector()) &&
- "Expected 256-bit or 512-bit vector");
-
- unsigned NumElts = VT.getVectorNumElements();
- unsigned HalfNumElts = NumElts / 2;
- MVT HalfVT = MVT::getVectorVT(VT.getVectorElementType(), HalfNumElts);
-
- bool UndefLower = isUndefInRange(Mask, 0, HalfNumElts);
- bool UndefUpper = isUndefInRange(Mask, HalfNumElts, HalfNumElts);
- if (!UndefLower && !UndefUpper)
- return SDValue();
-
- // Upper half is undef and lower half is whole upper subvector.
- // e.g. vector_shuffle <4, 5, 6, 7, u, u, u, u> or <2, 3, u, u>
- if (UndefUpper &&
- isSequentialOrUndefInRange(Mask, 0, HalfNumElts, HalfNumElts)) {
- SDValue Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V1,
- DAG.getIntPtrConstant(HalfNumElts, DL));
- return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), Hi,
- DAG.getIntPtrConstant(0, DL));
- }
-
- // Lower half is undef and upper half is whole lower subvector.
- // e.g. vector_shuffle <u, u, u, u, 0, 1, 2, 3> or <u, u, 0, 1>
- if (UndefLower &&
- isSequentialOrUndefInRange(Mask, HalfNumElts, HalfNumElts, 0)) {
- SDValue Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V1,
- DAG.getIntPtrConstant(0, DL));
- return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), Hi,
- DAG.getIntPtrConstant(HalfNumElts, DL));
- }
+/// If the input shuffle mask results in a vector that is undefined in all upper
+/// or lower half elements and that mask accesses only 2 halves of the
+/// shuffle's operands, return true. A mask of half the width with mask indexes
+/// adjusted to access the extracted halves of the original shuffle operands is
+/// returned in HalfMask. HalfIdx1 and HalfIdx2 return whether the upper or
+/// lower half of each input operand is accessed.
+static bool
+getHalfShuffleMask(ArrayRef<int> Mask, MutableArrayRef<int> HalfMask,
+ int &HalfIdx1, int &HalfIdx2) {
+ assert((Mask.size() == HalfMask.size() * 2) &&
+ "Expected input mask to be twice as long as output");
+
+ // Exactly one half of the result must be undef to allow narrowing.
+ bool UndefLower = isUndefLowerHalf(Mask);
+ bool UndefUpper = isUndefUpperHalf(Mask);
+ if (UndefLower == UndefUpper)
+ return false;
- // If the shuffle only uses two of the four halves of the input operands,
- // then extract them and perform the 'half' shuffle at half width.
- // e.g. vector_shuffle <X, X, X, X, u, u, u, u> or <X, X, u, u>
- int HalfIdx1 = -1, HalfIdx2 = -1;
- SmallVector<int, 8> HalfMask(HalfNumElts);
- unsigned Offset = UndefLower ? HalfNumElts : 0;
+ unsigned HalfNumElts = HalfMask.size();
+ unsigned MaskIndexOffset = UndefLower ? HalfNumElts : 0;
+ HalfIdx1 = -1;
+ HalfIdx2 = -1;
for (unsigned i = 0; i != HalfNumElts; ++i) {
- int M = Mask[i + Offset];
+ int M = Mask[i + MaskIndexOffset];
if (M < 0) {
HalfMask[i] = M;
continue;
@@ -14324,42 +14820,27 @@ static SDValue lowerVectorShuffleWithUndefHalf(const SDLoc &DL, MVT VT,
}
// Too many half vectors referenced.
- return SDValue();
+ return false;
}
- assert(HalfMask.size() == HalfNumElts && "Unexpected shuffle mask length");
-
- // Only shuffle the halves of the inputs when useful.
- int NumLowerHalves =
- (HalfIdx1 == 0 || HalfIdx1 == 2) + (HalfIdx2 == 0 || HalfIdx2 == 2);
- int NumUpperHalves =
- (HalfIdx1 == 1 || HalfIdx1 == 3) + (HalfIdx2 == 1 || HalfIdx2 == 3);
-
- // uuuuXXXX - don't extract uppers just to insert again.
- if (UndefLower && NumUpperHalves != 0)
- return SDValue();
- // XXXXuuuu - don't extract both uppers, instead shuffle and then extract.
- if (UndefUpper && NumUpperHalves == 2)
- return SDValue();
+ return true;
+}
- // AVX2 - XXXXuuuu - always extract lowers.
- if (Subtarget.hasAVX2() && !(UndefUpper && NumUpperHalves == 0)) {
- // AVX2 supports efficient immediate 64-bit element cross-lane shuffles.
- if (VT == MVT::v4f64 || VT == MVT::v4i64)
- return SDValue();
- // AVX2 supports variable 32-bit element cross-lane shuffles.
- if (VT == MVT::v8f32 || VT == MVT::v8i32) {
- // XXXXuuuu - don't extract lowers and uppers.
- if (UndefUpper && NumLowerHalves != 0 && NumUpperHalves != 0)
- return SDValue();
- }
- }
+/// Given the output values from getHalfShuffleMask(), create a half width
+/// shuffle of extracted vectors followed by an insert back to full width.
+static SDValue getShuffleHalfVectors(const SDLoc &DL, SDValue V1, SDValue V2,
+ ArrayRef<int> HalfMask, int HalfIdx1,
+ int HalfIdx2, bool UndefLower,
+ SelectionDAG &DAG) {
+ assert(V1.getValueType() == V2.getValueType() && "Different sized vectors?");
+ assert(V1.getValueType().isSimple() && "Expecting only simple types");
- // AVX512 - XXXXuuuu - always extract lowers.
- if (VT.is512BitVector() && !(UndefUpper && NumUpperHalves == 0))
- return SDValue();
+ MVT VT = V1.getSimpleValueType();
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned HalfNumElts = NumElts / 2;
+ MVT HalfVT = MVT::getVectorVT(VT.getVectorElementType(), HalfNumElts);
- auto GetHalfVector = [&](int HalfIdx) {
+ auto getHalfVector = [&](int HalfIdx) {
if (HalfIdx < 0)
return DAG.getUNDEF(HalfVT);
SDValue V = (HalfIdx < 2 ? V1 : V2);
@@ -14368,13 +14849,126 @@ static SDValue lowerVectorShuffleWithUndefHalf(const SDLoc &DL, MVT VT,
DAG.getIntPtrConstant(HalfIdx, DL));
};
- SDValue Half1 = GetHalfVector(HalfIdx1);
- SDValue Half2 = GetHalfVector(HalfIdx2);
+ // ins undef, (shuf (ext V1, HalfIdx1), (ext V2, HalfIdx2), HalfMask), Offset
+ SDValue Half1 = getHalfVector(HalfIdx1);
+ SDValue Half2 = getHalfVector(HalfIdx2);
SDValue V = DAG.getVectorShuffle(HalfVT, DL, Half1, Half2, HalfMask);
+ unsigned Offset = UndefLower ? HalfNumElts : 0;
return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V,
DAG.getIntPtrConstant(Offset, DL));
}
+/// Lower shuffles where an entire half of a 256 or 512-bit vector is UNDEF.
+/// This allows for fast cases such as subvector extraction/insertion
+/// or shuffling smaller vector types which can lower more efficiently.
+static SDValue lowerShuffleWithUndefHalf(const SDLoc &DL, MVT VT, SDValue V1,
+ SDValue V2, ArrayRef<int> Mask,
+ const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
+ assert((VT.is256BitVector() || VT.is512BitVector()) &&
+ "Expected 256-bit or 512-bit vector");
+
+ bool UndefLower = isUndefLowerHalf(Mask);
+ if (!UndefLower && !isUndefUpperHalf(Mask))
+ return SDValue();
+
+ assert((!UndefLower || !isUndefUpperHalf(Mask)) &&
+ "Completely undef shuffle mask should have been simplified already");
+
+ // Upper half is undef and lower half is whole upper subvector.
+ // e.g. vector_shuffle <4, 5, 6, 7, u, u, u, u> or <2, 3, u, u>
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned HalfNumElts = NumElts / 2;
+ MVT HalfVT = MVT::getVectorVT(VT.getVectorElementType(), HalfNumElts);
+ if (!UndefLower &&
+ isSequentialOrUndefInRange(Mask, 0, HalfNumElts, HalfNumElts)) {
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V1,
+ DAG.getIntPtrConstant(HalfNumElts, DL));
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), Hi,
+ DAG.getIntPtrConstant(0, DL));
+ }
+
+ // Lower half is undef and upper half is whole lower subvector.
+ // e.g. vector_shuffle <u, u, u, u, 0, 1, 2, 3> or <u, u, 0, 1>
+ if (UndefLower &&
+ isSequentialOrUndefInRange(Mask, HalfNumElts, HalfNumElts, 0)) {
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V1,
+ DAG.getIntPtrConstant(0, DL));
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), Hi,
+ DAG.getIntPtrConstant(HalfNumElts, DL));
+ }
+
+ int HalfIdx1, HalfIdx2;
+ SmallVector<int, 8> HalfMask(HalfNumElts);
+ if (!getHalfShuffleMask(Mask, HalfMask, HalfIdx1, HalfIdx2))
+ return SDValue();
+
+ assert(HalfMask.size() == HalfNumElts && "Unexpected shuffle mask length");
+
+ // Only shuffle the halves of the inputs when useful.
+ unsigned NumLowerHalves =
+ (HalfIdx1 == 0 || HalfIdx1 == 2) + (HalfIdx2 == 0 || HalfIdx2 == 2);
+ unsigned NumUpperHalves =
+ (HalfIdx1 == 1 || HalfIdx1 == 3) + (HalfIdx2 == 1 || HalfIdx2 == 3);
+ assert(NumLowerHalves + NumUpperHalves <= 2 && "Only 1 or 2 halves allowed");
+
+ // Determine the larger pattern of undef/halves, then decide if it's worth
+ // splitting the shuffle based on subtarget capabilities and types.
+ unsigned EltWidth = VT.getVectorElementType().getSizeInBits();
+ if (!UndefLower) {
+ // XXXXuuuu: no insert is needed.
+ // Always extract lowers when setting lower - these are all free subreg ops.
+ if (NumUpperHalves == 0)
+ return getShuffleHalfVectors(DL, V1, V2, HalfMask, HalfIdx1, HalfIdx2,
+ UndefLower, DAG);
+
+ if (NumUpperHalves == 1) {
+ // AVX2 has efficient 32/64-bit element cross-lane shuffles.
+ if (Subtarget.hasAVX2()) {
+ // extract128 + vunpckhps/vshufps, is better than vblend + vpermps.
+ if (EltWidth == 32 && NumLowerHalves && HalfVT.is128BitVector() &&
+ !is128BitUnpackShuffleMask(HalfMask) &&
+ (!isSingleSHUFPSMask(HalfMask) ||
+ Subtarget.hasFastVariableShuffle()))
+ return SDValue();
+ // If this is a unary shuffle (assume that the 2nd operand is
+ // canonicalized to undef), then we can use vpermpd. Otherwise, we
+ // are better off extracting the upper half of 1 operand and using a
+ // narrow shuffle.
+ if (EltWidth == 64 && V2.isUndef())
+ return SDValue();
+ }
+ // AVX512 has efficient cross-lane shuffles for all legal 512-bit types.
+ if (Subtarget.hasAVX512() && VT.is512BitVector())
+ return SDValue();
+ // Extract + narrow shuffle is better than the wide alternative.
+ return getShuffleHalfVectors(DL, V1, V2, HalfMask, HalfIdx1, HalfIdx2,
+ UndefLower, DAG);
+ }
+
+ // Don't extract both uppers, instead shuffle and then extract.
+ assert(NumUpperHalves == 2 && "Half vector count went wrong");
+ return SDValue();
+ }
+
+ // UndefLower - uuuuXXXX: an insert to high half is required if we split this.
+ if (NumUpperHalves == 0) {
+ // AVX2 has efficient 64-bit element cross-lane shuffles.
+ // TODO: Refine to account for unary shuffle, splat, and other masks?
+ if (Subtarget.hasAVX2() && EltWidth == 64)
+ return SDValue();
+ // AVX512 has efficient cross-lane shuffles for all legal 512-bit types.
+ if (Subtarget.hasAVX512() && VT.is512BitVector())
+ return SDValue();
+ // Narrow shuffle + insert is better than the wide alternative.
+ return getShuffleHalfVectors(DL, V1, V2, HalfMask, HalfIdx1, HalfIdx2,
+ UndefLower, DAG);
+ }
+
+ // NumUpperHalves != 0: don't bother with extract, shuffle, and then insert.
+ return SDValue();
+}
+
/// Test whether the specified input (0 or 1) is in-place blended by the
/// given mask.
///
@@ -14560,9 +15154,8 @@ static SDValue lowerShuffleAsRepeatedMaskAndLanePermute(
SubLaneMask);
}
-static bool matchVectorShuffleWithSHUFPD(MVT VT, SDValue &V1, SDValue &V2,
- unsigned &ShuffleImm,
- ArrayRef<int> Mask) {
+static bool matchShuffleWithSHUFPD(MVT VT, SDValue &V1, SDValue &V2,
+ unsigned &ShuffleImm, ArrayRef<int> Mask) {
int NumElts = VT.getVectorNumElements();
assert(VT.getScalarSizeInBits() == 64 &&
(NumElts == 2 || NumElts == 4 || NumElts == 8) &&
@@ -14597,14 +15190,14 @@ static bool matchVectorShuffleWithSHUFPD(MVT VT, SDValue &V1, SDValue &V2,
return false;
}
-static SDValue lowerVectorShuffleWithSHUFPD(const SDLoc &DL, MVT VT,
- ArrayRef<int> Mask, SDValue V1,
- SDValue V2, SelectionDAG &DAG) {
+static SDValue lowerShuffleWithSHUFPD(const SDLoc &DL, MVT VT,
+ ArrayRef<int> Mask, SDValue V1,
+ SDValue V2, SelectionDAG &DAG) {
assert((VT == MVT::v2f64 || VT == MVT::v4f64 || VT == MVT::v8f64)&&
"Unexpected data type for VSHUFPD");
unsigned Immediate = 0;
- if (!matchVectorShuffleWithSHUFPD(VT, V1, V2, Immediate, Mask))
+ if (!matchShuffleWithSHUFPD(VT, V1, V2, Immediate, Mask))
return SDValue();
return DAG.getNode(X86ISD::SHUFP, DL, VT, V1, V2,
@@ -14615,23 +15208,22 @@ static SDValue lowerVectorShuffleWithSHUFPD(const SDLoc &DL, MVT VT,
///
/// Also ends up handling lowering of 4-lane 64-bit integer shuffles when AVX2
/// isn't available.
-static SDValue lowerV4F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
- const APInt &Zeroable,
- SDValue V1, SDValue V2,
- const X86Subtarget &Subtarget,
- SelectionDAG &DAG) {
+static SDValue lowerV4F64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
+ const APInt &Zeroable, SDValue V1, SDValue V2,
+ const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
assert(V1.getSimpleValueType() == MVT::v4f64 && "Bad operand type!");
assert(V2.getSimpleValueType() == MVT::v4f64 && "Bad operand type!");
assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
- if (SDValue V = lowerV2X128VectorShuffle(DL, MVT::v4f64, V1, V2, Mask,
- Zeroable, Subtarget, DAG))
+ if (SDValue V = lowerV2X128Shuffle(DL, MVT::v4f64, V1, V2, Mask, Zeroable,
+ Subtarget, DAG))
return V;
if (V2.isUndef()) {
// Check for being able to broadcast a single element.
- if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(
- DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG))
+ if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v4f64, V1, V2,
+ Mask, Subtarget, DAG))
return Broadcast;
// Use low duplicate instructions for masks that match their pattern.
@@ -14659,29 +15251,33 @@ static SDValue lowerV4F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
return V;
// Try to permute the lanes and then use a per-lane permute.
- if (SDValue V = lowerVectorShuffleAsLanePermuteAndPermute(
- DL, MVT::v4f64, V1, V2, Mask, DAG, Subtarget))
+ if (SDValue V = lowerShuffleAsLanePermuteAndPermute(DL, MVT::v4f64, V1, V2,
+ Mask, DAG, Subtarget))
return V;
// Otherwise, fall back.
- return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v4f64, V1, V2, Mask,
- DAG, Subtarget);
+ return lowerShuffleAsLanePermuteAndBlend(DL, MVT::v4f64, V1, V2, Mask, DAG,
+ Subtarget);
}
// Use dedicated unpack instructions for masks that match their pattern.
- if (SDValue V =
- lowerVectorShuffleWithUNPCK(DL, MVT::v4f64, Mask, V1, V2, DAG))
+ if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v4f64, Mask, V1, V2, DAG))
return V;
- if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4f64, V1, V2, Mask,
- Zeroable, Subtarget, DAG))
+ if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v4f64, V1, V2, Mask,
+ Zeroable, Subtarget, DAG))
return Blend;
// Check if the blend happens to exactly fit that of SHUFPD.
- if (SDValue Op =
- lowerVectorShuffleWithSHUFPD(DL, MVT::v4f64, Mask, V1, V2, DAG))
+ if (SDValue Op = lowerShuffleWithSHUFPD(DL, MVT::v4f64, Mask, V1, V2, DAG))
return Op;
+ // If we have one input in place, then we can permute the other input and
+ // blend the result.
+ if (isShuffleMaskInputInPlace(0, Mask) || isShuffleMaskInputInPlace(1, Mask))
+ return lowerShuffleAsDecomposedShuffleBlend(DL, MVT::v4f64, V1, V2, Mask,
+ Subtarget, DAG);
+
// Try to create an in-lane repeating shuffle mask and then shuffle the
// results into the target lanes.
if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute(
@@ -14694,52 +15290,51 @@ static SDValue lowerV4F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
// instruction so skip this pattern.
if (!(Subtarget.hasAVX2() && (isShuffleMaskInputInPlace(0, Mask) ||
isShuffleMaskInputInPlace(1, Mask))))
- if (SDValue Result = lowerVectorShuffleByMerging128BitLanes(
+ if (SDValue V = lowerShuffleAsLanePermuteAndRepeatedMask(
DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG))
- return Result;
+ return V;
// If we have VLX support, we can use VEXPAND.
if (Subtarget.hasVLX())
- if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v4f64, Zeroable, Mask,
- V1, V2, DAG, Subtarget))
+ if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v4f64, Zeroable, Mask, V1, V2,
+ DAG, Subtarget))
return V;
// If we have AVX2 then we always want to lower with a blend because an v4 we
// can fully permute the elements.
if (Subtarget.hasAVX2())
- return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v4f64, V1, V2,
- Mask, Subtarget, DAG);
+ return lowerShuffleAsDecomposedShuffleBlend(DL, MVT::v4f64, V1, V2, Mask,
+ Subtarget, DAG);
// Otherwise fall back on generic lowering.
- return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v4f64, V1, V2, Mask,
- Subtarget, DAG);
+ return lowerShuffleAsSplitOrBlend(DL, MVT::v4f64, V1, V2, Mask,
+ Subtarget, DAG);
}
/// Handle lowering of 4-lane 64-bit integer shuffles.
///
/// This routine is only called when we have AVX2 and thus a reasonable
/// instruction set for v4i64 shuffling..
-static SDValue lowerV4I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
- const APInt &Zeroable,
- SDValue V1, SDValue V2,
- const X86Subtarget &Subtarget,
- SelectionDAG &DAG) {
+static SDValue lowerV4I64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
+ const APInt &Zeroable, SDValue V1, SDValue V2,
+ const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
assert(V1.getSimpleValueType() == MVT::v4i64 && "Bad operand type!");
assert(V2.getSimpleValueType() == MVT::v4i64 && "Bad operand type!");
assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
assert(Subtarget.hasAVX2() && "We can only lower v4i64 with AVX2!");
- if (SDValue V = lowerV2X128VectorShuffle(DL, MVT::v4i64, V1, V2, Mask,
- Zeroable, Subtarget, DAG))
+ if (SDValue V = lowerV2X128Shuffle(DL, MVT::v4i64, V1, V2, Mask, Zeroable,
+ Subtarget, DAG))
return V;
- if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4i64, V1, V2, Mask,
- Zeroable, Subtarget, DAG))
+ if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v4i64, V1, V2, Mask,
+ Zeroable, Subtarget, DAG))
return Blend;
// Check for being able to broadcast a single element.
- if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, MVT::v4i64, V1, V2,
- Mask, Subtarget, DAG))
+ if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v4i64, V1, V2, Mask,
+ Subtarget, DAG))
return Broadcast;
if (V2.isUndef()) {
@@ -14763,31 +15358,36 @@ static SDValue lowerV4I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
}
// Try to use shift instructions.
- if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v4i64, V1, V2, Mask,
- Zeroable, Subtarget, DAG))
+ if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v4i64, V1, V2, Mask,
+ Zeroable, Subtarget, DAG))
return Shift;
// If we have VLX support, we can use VALIGN or VEXPAND.
if (Subtarget.hasVLX()) {
- if (SDValue Rotate = lowerVectorShuffleAsRotate(DL, MVT::v4i64, V1, V2,
- Mask, Subtarget, DAG))
+ if (SDValue Rotate = lowerShuffleAsRotate(DL, MVT::v4i64, V1, V2, Mask,
+ Subtarget, DAG))
return Rotate;
- if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v4i64, Zeroable, Mask,
- V1, V2, DAG, Subtarget))
+ if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v4i64, Zeroable, Mask, V1, V2,
+ DAG, Subtarget))
return V;
}
// Try to use PALIGNR.
- if (SDValue Rotate = lowerVectorShuffleAsByteRotate(DL, MVT::v4i64, V1, V2,
- Mask, Subtarget, DAG))
+ if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v4i64, V1, V2, Mask,
+ Subtarget, DAG))
return Rotate;
// Use dedicated unpack instructions for masks that match their pattern.
- if (SDValue V =
- lowerVectorShuffleWithUNPCK(DL, MVT::v4i64, Mask, V1, V2, DAG))
+ if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v4i64, Mask, V1, V2, DAG))
return V;
+ // If we have one input in place, then we can permute the other input and
+ // blend the result.
+ if (isShuffleMaskInputInPlace(0, Mask) || isShuffleMaskInputInPlace(1, Mask))
+ return lowerShuffleAsDecomposedShuffleBlend(DL, MVT::v4i64, V1, V2, Mask,
+ Subtarget, DAG);
+
// Try to create an in-lane repeating shuffle mask and then shuffle the
// results into the target lanes.
if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute(
@@ -14800,35 +15400,34 @@ static SDValue lowerV4I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
// instruction so skip this pattern.
if (!isShuffleMaskInputInPlace(0, Mask) &&
!isShuffleMaskInputInPlace(1, Mask))
- if (SDValue Result = lowerVectorShuffleByMerging128BitLanes(
+ if (SDValue Result = lowerShuffleAsLanePermuteAndRepeatedMask(
DL, MVT::v4i64, V1, V2, Mask, Subtarget, DAG))
return Result;
// Otherwise fall back on generic blend lowering.
- return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v4i64, V1, V2,
- Mask, Subtarget, DAG);
+ return lowerShuffleAsDecomposedShuffleBlend(DL, MVT::v4i64, V1, V2, Mask,
+ Subtarget, DAG);
}
/// Handle lowering of 8-lane 32-bit floating point shuffles.
///
/// Also ends up handling lowering of 8-lane 32-bit integer shuffles when AVX2
/// isn't available.
-static SDValue lowerV8F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
- const APInt &Zeroable,
- SDValue V1, SDValue V2,
- const X86Subtarget &Subtarget,
- SelectionDAG &DAG) {
+static SDValue lowerV8F32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
+ const APInt &Zeroable, SDValue V1, SDValue V2,
+ const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
assert(V1.getSimpleValueType() == MVT::v8f32 && "Bad operand type!");
assert(V2.getSimpleValueType() == MVT::v8f32 && "Bad operand type!");
assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
- if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8f32, V1, V2, Mask,
- Zeroable, Subtarget, DAG))
+ if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v8f32, V1, V2, Mask,
+ Zeroable, Subtarget, DAG))
return Blend;
// Check for being able to broadcast a single element.
- if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, MVT::v8f32, V1, V2,
- Mask, Subtarget, DAG))
+ if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v8f32, V1, V2, Mask,
+ Subtarget, DAG))
return Broadcast;
// If the shuffle mask is repeated in each 128-bit lane, we have many more
@@ -14849,13 +15448,12 @@ static SDValue lowerV8F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG));
// Use dedicated unpack instructions for masks that match their pattern.
- if (SDValue V =
- lowerVectorShuffleWithUNPCK(DL, MVT::v8f32, Mask, V1, V2, DAG))
+ if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v8f32, Mask, V1, V2, DAG))
return V;
// Otherwise, fall back to a SHUFPS sequence. Here it is important that we
// have already handled any direct blends.
- return lowerVectorShuffleWithSHUFPS(DL, MVT::v8f32, RepeatedMask, V1, V2, DAG);
+ return lowerShuffleWithSHUFPS(DL, MVT::v8f32, RepeatedMask, V1, V2, DAG);
}
// Try to create an in-lane repeating shuffle mask and then shuffle the
@@ -14875,49 +15473,49 @@ static SDValue lowerV8F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
return DAG.getNode(X86ISD::VPERMV, DL, MVT::v8f32, VPermMask, V1);
// Otherwise, fall back.
- return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v8f32, V1, V2, Mask,
- DAG, Subtarget);
+ return lowerShuffleAsLanePermuteAndBlend(DL, MVT::v8f32, V1, V2, Mask,
+ DAG, Subtarget);
}
// Try to simplify this by merging 128-bit lanes to enable a lane-based
// shuffle.
- if (SDValue Result = lowerVectorShuffleByMerging128BitLanes(
+ if (SDValue Result = lowerShuffleAsLanePermuteAndRepeatedMask(
DL, MVT::v8f32, V1, V2, Mask, Subtarget, DAG))
return Result;
+
// If we have VLX support, we can use VEXPAND.
if (Subtarget.hasVLX())
- if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v8f32, Zeroable, Mask,
- V1, V2, DAG, Subtarget))
+ if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v8f32, Zeroable, Mask, V1, V2,
+ DAG, Subtarget))
return V;
// For non-AVX512 if the Mask is of 16bit elements in lane then try to split
// since after split we get a more efficient code using vpunpcklwd and
// vpunpckhwd instrs than vblend.
if (!Subtarget.hasAVX512() && isUnpackWdShuffleMask(Mask, MVT::v8f32))
- if (SDValue V = lowerVectorShuffleAsSplitOrBlend(DL, MVT::v8f32, V1, V2,
- Mask, Subtarget, DAG))
+ if (SDValue V = lowerShuffleAsSplitOrBlend(DL, MVT::v8f32, V1, V2, Mask,
+ Subtarget, DAG))
return V;
// If we have AVX2 then we always want to lower with a blend because at v8 we
// can fully permute the elements.
if (Subtarget.hasAVX2())
- return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v8f32, V1, V2,
- Mask, Subtarget, DAG);
+ return lowerShuffleAsDecomposedShuffleBlend(DL, MVT::v8f32, V1, V2, Mask,
+ Subtarget, DAG);
// Otherwise fall back on generic lowering.
- return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v8f32, V1, V2, Mask,
- Subtarget, DAG);
+ return lowerShuffleAsSplitOrBlend(DL, MVT::v8f32, V1, V2, Mask,
+ Subtarget, DAG);
}
/// Handle lowering of 8-lane 32-bit integer shuffles.
///
/// This routine is only called when we have AVX2 and thus a reasonable
/// instruction set for v8i32 shuffling..
-static SDValue lowerV8I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
- const APInt &Zeroable,
- SDValue V1, SDValue V2,
- const X86Subtarget &Subtarget,
- SelectionDAG &DAG) {
+static SDValue lowerV8I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
+ const APInt &Zeroable, SDValue V1, SDValue V2,
+ const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
assert(V1.getSimpleValueType() == MVT::v8i32 && "Bad operand type!");
assert(V2.getSimpleValueType() == MVT::v8i32 && "Bad operand type!");
assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
@@ -14926,8 +15524,8 @@ static SDValue lowerV8I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
// Whenever we can lower this as a zext, that instruction is strictly faster
// than any alternative. It also allows us to fold memory operands into the
// shuffle in many cases.
- if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(
- DL, MVT::v8i32, V1, V2, Mask, Zeroable, Subtarget, DAG))
+ if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend(DL, MVT::v8i32, V1, V2, Mask,
+ Zeroable, Subtarget, DAG))
return ZExt;
// For non-AVX512 if the Mask is of 16bit elements in lane then try to split
@@ -14935,17 +15533,17 @@ static SDValue lowerV8I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
// vpunpcklwd and vpunpckhwd instrs.
if (isUnpackWdShuffleMask(Mask, MVT::v8i32) && !V2.isUndef() &&
!Subtarget.hasAVX512())
- if (SDValue V = lowerVectorShuffleAsSplitOrBlend(DL, MVT::v8i32, V1, V2,
- Mask, Subtarget, DAG))
+ if (SDValue V = lowerShuffleAsSplitOrBlend(DL, MVT::v8i32, V1, V2, Mask,
+ Subtarget, DAG))
return V;
- if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8i32, V1, V2, Mask,
- Zeroable, Subtarget, DAG))
+ if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v8i32, V1, V2, Mask,
+ Zeroable, Subtarget, DAG))
return Blend;
// Check for being able to broadcast a single element.
- if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, MVT::v8i32, V1, V2,
- Mask, Subtarget, DAG))
+ if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v8i32, V1, V2, Mask,
+ Subtarget, DAG))
return Broadcast;
// If the shuffle mask is repeated in each 128-bit lane we can use more
@@ -14961,30 +15559,29 @@ static SDValue lowerV8I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG));
// Use dedicated unpack instructions for masks that match their pattern.
- if (SDValue V =
- lowerVectorShuffleWithUNPCK(DL, MVT::v8i32, Mask, V1, V2, DAG))
+ if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v8i32, Mask, V1, V2, DAG))
return V;
}
// Try to use shift instructions.
- if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v8i32, V1, V2, Mask,
- Zeroable, Subtarget, DAG))
+ if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v8i32, V1, V2, Mask,
+ Zeroable, Subtarget, DAG))
return Shift;
// If we have VLX support, we can use VALIGN or EXPAND.
if (Subtarget.hasVLX()) {
- if (SDValue Rotate = lowerVectorShuffleAsRotate(DL, MVT::v8i32, V1, V2,
- Mask, Subtarget, DAG))
+ if (SDValue Rotate = lowerShuffleAsRotate(DL, MVT::v8i32, V1, V2, Mask,
+ Subtarget, DAG))
return Rotate;
- if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v8i32, Zeroable, Mask,
- V1, V2, DAG, Subtarget))
+ if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v8i32, Zeroable, Mask, V1, V2,
+ DAG, Subtarget))
return V;
}
// Try to use byte rotation instructions.
- if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
- DL, MVT::v8i32, V1, V2, Mask, Subtarget, DAG))
+ if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v8i32, V1, V2, Mask,
+ Subtarget, DAG))
return Rotate;
// Try to create an in-lane repeating shuffle mask and then shuffle the
@@ -15006,31 +15603,30 @@ static SDValue lowerV8I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
if (Is128BitLaneRepeatedShuffle && isSingleSHUFPSMask(RepeatedMask)) {
SDValue CastV1 = DAG.getBitcast(MVT::v8f32, V1);
SDValue CastV2 = DAG.getBitcast(MVT::v8f32, V2);
- SDValue ShufPS = lowerVectorShuffleWithSHUFPS(DL, MVT::v8f32, RepeatedMask,
- CastV1, CastV2, DAG);
+ SDValue ShufPS = lowerShuffleWithSHUFPS(DL, MVT::v8f32, RepeatedMask,
+ CastV1, CastV2, DAG);
return DAG.getBitcast(MVT::v8i32, ShufPS);
}
// Try to simplify this by merging 128-bit lanes to enable a lane-based
// shuffle.
- if (SDValue Result = lowerVectorShuffleByMerging128BitLanes(
+ if (SDValue Result = lowerShuffleAsLanePermuteAndRepeatedMask(
DL, MVT::v8i32, V1, V2, Mask, Subtarget, DAG))
return Result;
// Otherwise fall back on generic blend lowering.
- return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v8i32, V1, V2,
- Mask, Subtarget, DAG);
+ return lowerShuffleAsDecomposedShuffleBlend(DL, MVT::v8i32, V1, V2, Mask,
+ Subtarget, DAG);
}
/// Handle lowering of 16-lane 16-bit integer shuffles.
///
/// This routine is only called when we have AVX2 and thus a reasonable
/// instruction set for v16i16 shuffling..
-static SDValue lowerV16I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
- const APInt &Zeroable,
- SDValue V1, SDValue V2,
- const X86Subtarget &Subtarget,
- SelectionDAG &DAG) {
+static SDValue lowerV16I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
+ const APInt &Zeroable, SDValue V1, SDValue V2,
+ const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
assert(V1.getSimpleValueType() == MVT::v16i16 && "Bad operand type!");
assert(V2.getSimpleValueType() == MVT::v16i16 && "Bad operand type!");
assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");
@@ -15039,37 +15635,36 @@ static SDValue lowerV16I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
// Whenever we can lower this as a zext, that instruction is strictly faster
// than any alternative. It also allows us to fold memory operands into the
// shuffle in many cases.
- if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(
+ if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend(
DL, MVT::v16i16, V1, V2, Mask, Zeroable, Subtarget, DAG))
return ZExt;
// Check for being able to broadcast a single element.
- if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, MVT::v16i16, V1, V2,
- Mask, Subtarget, DAG))
+ if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v16i16, V1, V2, Mask,
+ Subtarget, DAG))
return Broadcast;
- if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v16i16, V1, V2, Mask,
- Zeroable, Subtarget, DAG))
+ if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v16i16, V1, V2, Mask,
+ Zeroable, Subtarget, DAG))
return Blend;
// Use dedicated unpack instructions for masks that match their pattern.
- if (SDValue V =
- lowerVectorShuffleWithUNPCK(DL, MVT::v16i16, Mask, V1, V2, DAG))
+ if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v16i16, Mask, V1, V2, DAG))
return V;
// Use dedicated pack instructions for masks that match their pattern.
- if (SDValue V = lowerVectorShuffleWithPACK(DL, MVT::v16i16, Mask, V1, V2, DAG,
- Subtarget))
+ if (SDValue V = lowerShuffleWithPACK(DL, MVT::v16i16, Mask, V1, V2, DAG,
+ Subtarget))
return V;
// Try to use shift instructions.
- if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v16i16, V1, V2, Mask,
- Zeroable, Subtarget, DAG))
+ if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v16i16, V1, V2, Mask,
+ Zeroable, Subtarget, DAG))
return Shift;
// Try to use byte rotation instructions.
- if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
- DL, MVT::v16i16, V1, V2, Mask, Subtarget, DAG))
+ if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v16i16, V1, V2, Mask,
+ Subtarget, DAG))
return Rotate;
// Try to create an in-lane repeating shuffle mask and then shuffle the
@@ -15082,12 +15677,12 @@ static SDValue lowerV16I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
// There are no generalized cross-lane shuffle operations available on i16
// element types.
if (is128BitLaneCrossingShuffleMask(MVT::v16i16, Mask)) {
- if (SDValue V = lowerVectorShuffleAsLanePermuteAndPermute(
+ if (SDValue V = lowerShuffleAsLanePermuteAndPermute(
DL, MVT::v16i16, V1, V2, Mask, DAG, Subtarget))
return V;
- return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v16i16, V1, V2,
- Mask, DAG, Subtarget);
+ return lowerShuffleAsLanePermuteAndBlend(DL, MVT::v16i16, V1, V2, Mask,
+ DAG, Subtarget);
}
SmallVector<int, 8> RepeatedMask;
@@ -15095,44 +15690,43 @@ static SDValue lowerV16I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
// As this is a single-input shuffle, the repeated mask should be
// a strictly valid v8i16 mask that we can pass through to the v8i16
// lowering to handle even the v16 case.
- return lowerV8I16GeneralSingleInputVectorShuffle(
+ return lowerV8I16GeneralSingleInputShuffle(
DL, MVT::v16i16, V1, RepeatedMask, Subtarget, DAG);
}
}
- if (SDValue PSHUFB = lowerVectorShuffleWithPSHUFB(
- DL, MVT::v16i16, Mask, V1, V2, Zeroable, Subtarget, DAG))
+ if (SDValue PSHUFB = lowerShuffleWithPSHUFB(DL, MVT::v16i16, Mask, V1, V2,
+ Zeroable, Subtarget, DAG))
return PSHUFB;
// AVX512BWVL can lower to VPERMW.
if (Subtarget.hasBWI() && Subtarget.hasVLX())
- return lowerVectorShuffleWithPERMV(DL, MVT::v16i16, Mask, V1, V2, DAG);
+ return lowerShuffleWithPERMV(DL, MVT::v16i16, Mask, V1, V2, DAG);
// Try to simplify this by merging 128-bit lanes to enable a lane-based
// shuffle.
- if (SDValue Result = lowerVectorShuffleByMerging128BitLanes(
+ if (SDValue Result = lowerShuffleAsLanePermuteAndRepeatedMask(
DL, MVT::v16i16, V1, V2, Mask, Subtarget, DAG))
return Result;
// Try to permute the lanes and then use a per-lane permute.
- if (SDValue V = lowerVectorShuffleAsLanePermuteAndPermute(
+ if (SDValue V = lowerShuffleAsLanePermuteAndPermute(
DL, MVT::v16i16, V1, V2, Mask, DAG, Subtarget))
return V;
// Otherwise fall back on generic lowering.
- return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v16i16, V1, V2, Mask,
- Subtarget, DAG);
+ return lowerShuffleAsSplitOrBlend(DL, MVT::v16i16, V1, V2, Mask,
+ Subtarget, DAG);
}
/// Handle lowering of 32-lane 8-bit integer shuffles.
///
/// This routine is only called when we have AVX2 and thus a reasonable
/// instruction set for v32i8 shuffling..
-static SDValue lowerV32I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
- const APInt &Zeroable,
- SDValue V1, SDValue V2,
- const X86Subtarget &Subtarget,
- SelectionDAG &DAG) {
+static SDValue lowerV32I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
+ const APInt &Zeroable, SDValue V1, SDValue V2,
+ const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
assert(V1.getSimpleValueType() == MVT::v32i8 && "Bad operand type!");
assert(V2.getSimpleValueType() == MVT::v32i8 && "Bad operand type!");
assert(Mask.size() == 32 && "Unexpected mask size for v32 shuffle!");
@@ -15141,37 +15735,36 @@ static SDValue lowerV32I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
// Whenever we can lower this as a zext, that instruction is strictly faster
// than any alternative. It also allows us to fold memory operands into the
// shuffle in many cases.
- if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(
- DL, MVT::v32i8, V1, V2, Mask, Zeroable, Subtarget, DAG))
+ if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend(DL, MVT::v32i8, V1, V2, Mask,
+ Zeroable, Subtarget, DAG))
return ZExt;
// Check for being able to broadcast a single element.
- if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, MVT::v32i8, V1, V2,
- Mask, Subtarget, DAG))
+ if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v32i8, V1, V2, Mask,
+ Subtarget, DAG))
return Broadcast;
- if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v32i8, V1, V2, Mask,
- Zeroable, Subtarget, DAG))
+ if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v32i8, V1, V2, Mask,
+ Zeroable, Subtarget, DAG))
return Blend;
// Use dedicated unpack instructions for masks that match their pattern.
- if (SDValue V =
- lowerVectorShuffleWithUNPCK(DL, MVT::v32i8, Mask, V1, V2, DAG))
+ if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v32i8, Mask, V1, V2, DAG))
return V;
// Use dedicated pack instructions for masks that match their pattern.
- if (SDValue V = lowerVectorShuffleWithPACK(DL, MVT::v32i8, Mask, V1, V2, DAG,
- Subtarget))
+ if (SDValue V = lowerShuffleWithPACK(DL, MVT::v32i8, Mask, V1, V2, DAG,
+ Subtarget))
return V;
// Try to use shift instructions.
- if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v32i8, V1, V2, Mask,
+ if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v32i8, V1, V2, Mask,
Zeroable, Subtarget, DAG))
return Shift;
// Try to use byte rotation instructions.
- if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
- DL, MVT::v32i8, V1, V2, Mask, Subtarget, DAG))
+ if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v32i8, V1, V2, Mask,
+ Subtarget, DAG))
return Rotate;
// Try to create an in-lane repeating shuffle mask and then shuffle the
@@ -15183,36 +15776,36 @@ static SDValue lowerV32I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
// There are no generalized cross-lane shuffle operations available on i8
// element types.
if (V2.isUndef() && is128BitLaneCrossingShuffleMask(MVT::v32i8, Mask)) {
- if (SDValue V = lowerVectorShuffleAsLanePermuteAndPermute(
+ if (SDValue V = lowerShuffleAsLanePermuteAndPermute(
DL, MVT::v32i8, V1, V2, Mask, DAG, Subtarget))
return V;
- return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v32i8, V1, V2, Mask,
- DAG, Subtarget);
+ return lowerShuffleAsLanePermuteAndBlend(DL, MVT::v32i8, V1, V2, Mask, DAG,
+ Subtarget);
}
- if (SDValue PSHUFB = lowerVectorShuffleWithPSHUFB(
- DL, MVT::v32i8, Mask, V1, V2, Zeroable, Subtarget, DAG))
+ if (SDValue PSHUFB = lowerShuffleWithPSHUFB(DL, MVT::v32i8, Mask, V1, V2,
+ Zeroable, Subtarget, DAG))
return PSHUFB;
// AVX512VBMIVL can lower to VPERMB.
if (Subtarget.hasVBMI() && Subtarget.hasVLX())
- return lowerVectorShuffleWithPERMV(DL, MVT::v32i8, Mask, V1, V2, DAG);
+ return lowerShuffleWithPERMV(DL, MVT::v32i8, Mask, V1, V2, DAG);
// Try to simplify this by merging 128-bit lanes to enable a lane-based
// shuffle.
- if (SDValue Result = lowerVectorShuffleByMerging128BitLanes(
+ if (SDValue Result = lowerShuffleAsLanePermuteAndRepeatedMask(
DL, MVT::v32i8, V1, V2, Mask, Subtarget, DAG))
return Result;
// Try to permute the lanes and then use a per-lane permute.
- if (SDValue V = lowerVectorShuffleAsLanePermuteAndPermute(
+ if (SDValue V = lowerShuffleAsLanePermuteAndPermute(
DL, MVT::v32i8, V1, V2, Mask, DAG, Subtarget))
return V;
// Otherwise fall back on generic lowering.
- return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v32i8, V1, V2, Mask,
- Subtarget, DAG);
+ return lowerShuffleAsSplitOrBlend(DL, MVT::v32i8, V1, V2, Mask,
+ Subtarget, DAG);
}
/// High-level routine to lower various 256-bit x86 vector shuffles.
@@ -15220,24 +15813,23 @@ static SDValue lowerV32I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
/// This routine either breaks down the specific type of a 256-bit x86 vector
/// shuffle or splits it into two 128-bit shuffles and fuses the results back
/// together based on the available instructions.
-static SDValue lower256BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
- MVT VT, SDValue V1, SDValue V2,
- const APInt &Zeroable,
- const X86Subtarget &Subtarget,
- SelectionDAG &DAG) {
+static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
+ SDValue V1, SDValue V2, const APInt &Zeroable,
+ const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
// If we have a single input to the zero element, insert that into V1 if we
// can do so cheaply.
int NumElts = VT.getVectorNumElements();
int NumV2Elements = count_if(Mask, [NumElts](int M) { return M >= NumElts; });
if (NumV2Elements == 1 && Mask[0] >= NumElts)
- if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(
+ if (SDValue Insertion = lowerShuffleAsElementInsertion(
DL, VT, V1, V2, Mask, Zeroable, Subtarget, DAG))
return Insertion;
// Handle special cases where the lower or upper half is UNDEF.
if (SDValue V =
- lowerVectorShuffleWithUndefHalf(DL, VT, V1, V2, Mask, Subtarget, DAG))
+ lowerShuffleWithUndefHalf(DL, VT, V1, V2, Mask, Subtarget, DAG))
return V;
// There is a really nice hard cut-over between AVX1 and AVX2 that means we
@@ -15251,12 +15843,12 @@ static SDValue lower256BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
if (ElementBits < 32) {
// No floating point type available, if we can't use the bit operations
// for masking/blending then decompose into 128-bit vectors.
- if (SDValue V =
- lowerVectorShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable, DAG))
+ if (SDValue V = lowerShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable,
+ Subtarget, DAG))
return V;
- if (SDValue V = lowerVectorShuffleAsBitBlend(DL, VT, V1, V2, Mask, DAG))
+ if (SDValue V = lowerShuffleAsBitBlend(DL, VT, V1, V2, Mask, DAG))
return V;
- return splitAndLowerVectorShuffle(DL, VT, V1, V2, Mask, DAG);
+ return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG);
}
MVT FpVT = MVT::getVectorVT(MVT::getFloatingPointVT(ElementBits),
@@ -15268,17 +15860,17 @@ static SDValue lower256BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
switch (VT.SimpleTy) {
case MVT::v4f64:
- return lowerV4F64VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
+ return lowerV4F64Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
case MVT::v4i64:
- return lowerV4I64VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
+ return lowerV4I64Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
case MVT::v8f32:
- return lowerV8F32VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
+ return lowerV8F32Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
case MVT::v8i32:
- return lowerV8I32VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
+ return lowerV8I32Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
case MVT::v16i16:
- return lowerV16I16VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
+ return lowerV16I16Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
case MVT::v32i8:
- return lowerV32I8VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
+ return lowerV32I8Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
default:
llvm_unreachable("Not a valid 256-bit x86 vector type!");
@@ -15286,12 +15878,10 @@ static SDValue lower256BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
}
/// Try to lower a vector shuffle as a 128-bit shuffles.
-static SDValue lowerV4X128VectorShuffle(const SDLoc &DL, MVT VT,
- ArrayRef<int> Mask,
- const APInt &Zeroable,
- SDValue V1, SDValue V2,
- const X86Subtarget &Subtarget,
- SelectionDAG &DAG) {
+static SDValue lowerV4X128Shuffle(const SDLoc &DL, MVT VT, ArrayRef<int> Mask,
+ const APInt &Zeroable, SDValue V1, SDValue V2,
+ const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
assert(VT.getScalarSizeInBits() == 64 &&
"Unexpected element type size for 128bit shuffle.");
@@ -15388,11 +15978,10 @@ static SDValue lowerV4X128VectorShuffle(const SDLoc &DL, MVT VT,
}
/// Handle lowering of 8-lane 64-bit floating point shuffles.
-static SDValue lowerV8F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
- const APInt &Zeroable,
- SDValue V1, SDValue V2,
- const X86Subtarget &Subtarget,
- SelectionDAG &DAG) {
+static SDValue lowerV8F64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
+ const APInt &Zeroable, SDValue V1, SDValue V2,
+ const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
assert(V1.getSimpleValueType() == MVT::v8f64 && "Bad operand type!");
assert(V2.getSimpleValueType() == MVT::v8f64 && "Bad operand type!");
assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
@@ -15419,37 +16008,33 @@ static SDValue lowerV8F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG));
}
- if (SDValue Shuf128 =
- lowerV4X128VectorShuffle(DL, MVT::v8f64, Mask, Zeroable, V1, V2,
- Subtarget, DAG))
+ if (SDValue Shuf128 = lowerV4X128Shuffle(DL, MVT::v8f64, Mask, Zeroable, V1,
+ V2, Subtarget, DAG))
return Shuf128;
- if (SDValue Unpck =
- lowerVectorShuffleWithUNPCK(DL, MVT::v8f64, Mask, V1, V2, DAG))
+ if (SDValue Unpck = lowerShuffleWithUNPCK(DL, MVT::v8f64, Mask, V1, V2, DAG))
return Unpck;
// Check if the blend happens to exactly fit that of SHUFPD.
- if (SDValue Op =
- lowerVectorShuffleWithSHUFPD(DL, MVT::v8f64, Mask, V1, V2, DAG))
+ if (SDValue Op = lowerShuffleWithSHUFPD(DL, MVT::v8f64, Mask, V1, V2, DAG))
return Op;
- if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v8f64, Zeroable, Mask, V1,
- V2, DAG, Subtarget))
+ if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v8f64, Zeroable, Mask, V1, V2,
+ DAG, Subtarget))
return V;
- if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8f64, V1, V2, Mask,
- Zeroable, Subtarget, DAG))
+ if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v8f64, V1, V2, Mask,
+ Zeroable, Subtarget, DAG))
return Blend;
- return lowerVectorShuffleWithPERMV(DL, MVT::v8f64, Mask, V1, V2, DAG);
+ return lowerShuffleWithPERMV(DL, MVT::v8f64, Mask, V1, V2, DAG);
}
/// Handle lowering of 16-lane 32-bit floating point shuffles.
-static SDValue lowerV16F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
- const APInt &Zeroable,
- SDValue V1, SDValue V2,
- const X86Subtarget &Subtarget,
- SelectionDAG &DAG) {
+static SDValue lowerV16F32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
+ const APInt &Zeroable, SDValue V1, SDValue V2,
+ const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
assert(V1.getSimpleValueType() == MVT::v16f32 && "Bad operand type!");
assert(V2.getSimpleValueType() == MVT::v16f32 && "Bad operand type!");
assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");
@@ -15471,16 +16056,15 @@ static SDValue lowerV16F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG));
// Use dedicated unpack instructions for masks that match their pattern.
- if (SDValue Unpck =
- lowerVectorShuffleWithUNPCK(DL, MVT::v16f32, Mask, V1, V2, DAG))
- return Unpck;
+ if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v16f32, Mask, V1, V2, DAG))
+ return V;
- if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v16f32, V1, V2, Mask,
- Zeroable, Subtarget, DAG))
+ if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v16f32, V1, V2, Mask,
+ Zeroable, Subtarget, DAG))
return Blend;
// Otherwise, fall back to a SHUFPS sequence.
- return lowerVectorShuffleWithSHUFPS(DL, MVT::v16f32, RepeatedMask, V1, V2, DAG);
+ return lowerShuffleWithSHUFPS(DL, MVT::v16f32, RepeatedMask, V1, V2, DAG);
}
// If we have a single input shuffle with different shuffle patterns in the
@@ -15492,19 +16076,18 @@ static SDValue lowerV16F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
}
// If we have AVX512F support, we can use VEXPAND.
- if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v16f32, Zeroable, Mask,
+ if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v16f32, Zeroable, Mask,
V1, V2, DAG, Subtarget))
return V;
- return lowerVectorShuffleWithPERMV(DL, MVT::v16f32, Mask, V1, V2, DAG);
+ return lowerShuffleWithPERMV(DL, MVT::v16f32, Mask, V1, V2, DAG);
}
/// Handle lowering of 8-lane 64-bit integer shuffles.
-static SDValue lowerV8I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
- const APInt &Zeroable,
- SDValue V1, SDValue V2,
- const X86Subtarget &Subtarget,
- SelectionDAG &DAG) {
+static SDValue lowerV8I64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
+ const APInt &Zeroable, SDValue V1, SDValue V2,
+ const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
assert(V1.getSimpleValueType() == MVT::v8i64 && "Bad operand type!");
assert(V2.getSimpleValueType() == MVT::v8i64 && "Bad operand type!");
assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
@@ -15530,47 +16113,44 @@ static SDValue lowerV8I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
getV4X86ShuffleImm8ForMask(Repeated256Mask, DL, DAG));
}
- if (SDValue Shuf128 =
- lowerV4X128VectorShuffle(DL, MVT::v8i64, Mask, Zeroable,
- V1, V2, Subtarget, DAG))
+ if (SDValue Shuf128 = lowerV4X128Shuffle(DL, MVT::v8i64, Mask, Zeroable, V1,
+ V2, Subtarget, DAG))
return Shuf128;
// Try to use shift instructions.
- if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v8i64, V1, V2, Mask,
- Zeroable, Subtarget, DAG))
+ if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v8i64, V1, V2, Mask,
+ Zeroable, Subtarget, DAG))
return Shift;
// Try to use VALIGN.
- if (SDValue Rotate = lowerVectorShuffleAsRotate(DL, MVT::v8i64, V1, V2,
- Mask, Subtarget, DAG))
+ if (SDValue Rotate = lowerShuffleAsRotate(DL, MVT::v8i64, V1, V2, Mask,
+ Subtarget, DAG))
return Rotate;
// Try to use PALIGNR.
- if (SDValue Rotate = lowerVectorShuffleAsByteRotate(DL, MVT::v8i64, V1, V2,
- Mask, Subtarget, DAG))
+ if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v8i64, V1, V2, Mask,
+ Subtarget, DAG))
return Rotate;
- if (SDValue Unpck =
- lowerVectorShuffleWithUNPCK(DL, MVT::v8i64, Mask, V1, V2, DAG))
+ if (SDValue Unpck = lowerShuffleWithUNPCK(DL, MVT::v8i64, Mask, V1, V2, DAG))
return Unpck;
// If we have AVX512F support, we can use VEXPAND.
- if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v8i64, Zeroable, Mask, V1,
- V2, DAG, Subtarget))
+ if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v8i64, Zeroable, Mask, V1, V2,
+ DAG, Subtarget))
return V;
- if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8i64, V1, V2, Mask,
- Zeroable, Subtarget, DAG))
+ if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v8i64, V1, V2, Mask,
+ Zeroable, Subtarget, DAG))
return Blend;
- return lowerVectorShuffleWithPERMV(DL, MVT::v8i64, Mask, V1, V2, DAG);
+ return lowerShuffleWithPERMV(DL, MVT::v8i64, Mask, V1, V2, DAG);
}
/// Handle lowering of 16-lane 32-bit integer shuffles.
-static SDValue lowerV16I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
- const APInt &Zeroable,
- SDValue V1, SDValue V2,
- const X86Subtarget &Subtarget,
- SelectionDAG &DAG) {
+static SDValue lowerV16I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
+ const APInt &Zeroable, SDValue V1, SDValue V2,
+ const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
assert(V1.getSimpleValueType() == MVT::v16i32 && "Bad operand type!");
assert(V2.getSimpleValueType() == MVT::v16i32 && "Bad operand type!");
assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");
@@ -15578,7 +16158,7 @@ static SDValue lowerV16I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
// Whenever we can lower this as a zext, that instruction is strictly faster
// than any alternative. It also allows us to fold memory operands into the
// shuffle in many cases.
- if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(
+ if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend(
DL, MVT::v16i32, V1, V2, Mask, Zeroable, Subtarget, DAG))
return ZExt;
@@ -15595,25 +16175,24 @@ static SDValue lowerV16I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG));
// Use dedicated unpack instructions for masks that match their pattern.
- if (SDValue V =
- lowerVectorShuffleWithUNPCK(DL, MVT::v16i32, Mask, V1, V2, DAG))
+ if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v16i32, Mask, V1, V2, DAG))
return V;
}
// Try to use shift instructions.
- if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v16i32, V1, V2, Mask,
- Zeroable, Subtarget, DAG))
+ if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v16i32, V1, V2, Mask,
+ Zeroable, Subtarget, DAG))
return Shift;
// Try to use VALIGN.
- if (SDValue Rotate = lowerVectorShuffleAsRotate(DL, MVT::v16i32, V1, V2,
- Mask, Subtarget, DAG))
+ if (SDValue Rotate = lowerShuffleAsRotate(DL, MVT::v16i32, V1, V2, Mask,
+ Subtarget, DAG))
return Rotate;
// Try to use byte rotation instructions.
if (Subtarget.hasBWI())
- if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
- DL, MVT::v16i32, V1, V2, Mask, Subtarget, DAG))
+ if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v16i32, V1, V2, Mask,
+ Subtarget, DAG))
return Rotate;
// Assume that a single SHUFPS is faster than using a permv shuffle.
@@ -15621,27 +16200,26 @@ static SDValue lowerV16I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
if (Is128BitLaneRepeatedShuffle && isSingleSHUFPSMask(RepeatedMask)) {
SDValue CastV1 = DAG.getBitcast(MVT::v16f32, V1);
SDValue CastV2 = DAG.getBitcast(MVT::v16f32, V2);
- SDValue ShufPS = lowerVectorShuffleWithSHUFPS(DL, MVT::v16f32, RepeatedMask,
- CastV1, CastV2, DAG);
+ SDValue ShufPS = lowerShuffleWithSHUFPS(DL, MVT::v16f32, RepeatedMask,
+ CastV1, CastV2, DAG);
return DAG.getBitcast(MVT::v16i32, ShufPS);
}
// If we have AVX512F support, we can use VEXPAND.
- if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v16i32, Zeroable, Mask,
- V1, V2, DAG, Subtarget))
+ if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v16i32, Zeroable, Mask, V1, V2,
+ DAG, Subtarget))
return V;
- if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v16i32, V1, V2, Mask,
- Zeroable, Subtarget, DAG))
+ if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v16i32, V1, V2, Mask,
+ Zeroable, Subtarget, DAG))
return Blend;
- return lowerVectorShuffleWithPERMV(DL, MVT::v16i32, Mask, V1, V2, DAG);
+ return lowerShuffleWithPERMV(DL, MVT::v16i32, Mask, V1, V2, DAG);
}
/// Handle lowering of 32-lane 16-bit integer shuffles.
-static SDValue lowerV32I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
- const APInt &Zeroable,
- SDValue V1, SDValue V2,
- const X86Subtarget &Subtarget,
- SelectionDAG &DAG) {
+static SDValue lowerV32I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
+ const APInt &Zeroable, SDValue V1, SDValue V2,
+ const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
assert(V1.getSimpleValueType() == MVT::v32i16 && "Bad operand type!");
assert(V2.getSimpleValueType() == MVT::v32i16 && "Bad operand type!");
assert(Mask.size() == 32 && "Unexpected mask size for v32 shuffle!");
@@ -15650,23 +16228,22 @@ static SDValue lowerV32I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
// Whenever we can lower this as a zext, that instruction is strictly faster
// than any alternative. It also allows us to fold memory operands into the
// shuffle in many cases.
- if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(
+ if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend(
DL, MVT::v32i16, V1, V2, Mask, Zeroable, Subtarget, DAG))
return ZExt;
// Use dedicated unpack instructions for masks that match their pattern.
- if (SDValue V =
- lowerVectorShuffleWithUNPCK(DL, MVT::v32i16, Mask, V1, V2, DAG))
+ if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v32i16, Mask, V1, V2, DAG))
return V;
// Try to use shift instructions.
- if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v32i16, V1, V2, Mask,
- Zeroable, Subtarget, DAG))
+ if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v32i16, V1, V2, Mask,
+ Zeroable, Subtarget, DAG))
return Shift;
// Try to use byte rotation instructions.
- if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
- DL, MVT::v32i16, V1, V2, Mask, Subtarget, DAG))
+ if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v32i16, V1, V2, Mask,
+ Subtarget, DAG))
return Rotate;
if (V2.isUndef()) {
@@ -15675,28 +16252,27 @@ static SDValue lowerV32I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
// As this is a single-input shuffle, the repeated mask should be
// a strictly valid v8i16 mask that we can pass through to the v8i16
// lowering to handle even the v32 case.
- return lowerV8I16GeneralSingleInputVectorShuffle(
+ return lowerV8I16GeneralSingleInputShuffle(
DL, MVT::v32i16, V1, RepeatedMask, Subtarget, DAG);
}
}
- if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v32i16, V1, V2, Mask,
+ if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v32i16, V1, V2, Mask,
Zeroable, Subtarget, DAG))
return Blend;
- if (SDValue PSHUFB = lowerVectorShuffleWithPSHUFB(
- DL, MVT::v32i16, Mask, V1, V2, Zeroable, Subtarget, DAG))
+ if (SDValue PSHUFB = lowerShuffleWithPSHUFB(DL, MVT::v32i16, Mask, V1, V2,
+ Zeroable, Subtarget, DAG))
return PSHUFB;
- return lowerVectorShuffleWithPERMV(DL, MVT::v32i16, Mask, V1, V2, DAG);
+ return lowerShuffleWithPERMV(DL, MVT::v32i16, Mask, V1, V2, DAG);
}
/// Handle lowering of 64-lane 8-bit integer shuffles.
-static SDValue lowerV64I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
- const APInt &Zeroable,
- SDValue V1, SDValue V2,
- const X86Subtarget &Subtarget,
- SelectionDAG &DAG) {
+static SDValue lowerV64I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
+ const APInt &Zeroable, SDValue V1, SDValue V2,
+ const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
assert(V1.getSimpleValueType() == MVT::v64i8 && "Bad operand type!");
assert(V2.getSimpleValueType() == MVT::v64i8 && "Bad operand type!");
assert(Mask.size() == 64 && "Unexpected mask size for v64 shuffle!");
@@ -15705,37 +16281,36 @@ static SDValue lowerV64I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
// Whenever we can lower this as a zext, that instruction is strictly faster
// than any alternative. It also allows us to fold memory operands into the
// shuffle in many cases.
- if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(
+ if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend(
DL, MVT::v64i8, V1, V2, Mask, Zeroable, Subtarget, DAG))
return ZExt;
// Use dedicated unpack instructions for masks that match their pattern.
- if (SDValue V =
- lowerVectorShuffleWithUNPCK(DL, MVT::v64i8, Mask, V1, V2, DAG))
+ if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v64i8, Mask, V1, V2, DAG))
return V;
// Use dedicated pack instructions for masks that match their pattern.
- if (SDValue V = lowerVectorShuffleWithPACK(DL, MVT::v64i8, Mask, V1, V2, DAG,
- Subtarget))
+ if (SDValue V = lowerShuffleWithPACK(DL, MVT::v64i8, Mask, V1, V2, DAG,
+ Subtarget))
return V;
// Try to use shift instructions.
- if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v64i8, V1, V2, Mask,
- Zeroable, Subtarget, DAG))
+ if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v64i8, V1, V2, Mask,
+ Zeroable, Subtarget, DAG))
return Shift;
// Try to use byte rotation instructions.
- if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
- DL, MVT::v64i8, V1, V2, Mask, Subtarget, DAG))
+ if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v64i8, V1, V2, Mask,
+ Subtarget, DAG))
return Rotate;
- if (SDValue PSHUFB = lowerVectorShuffleWithPSHUFB(
- DL, MVT::v64i8, Mask, V1, V2, Zeroable, Subtarget, DAG))
+ if (SDValue PSHUFB = lowerShuffleWithPSHUFB(DL, MVT::v64i8, Mask, V1, V2,
+ Zeroable, Subtarget, DAG))
return PSHUFB;
// VBMI can use VPERMV/VPERMV3 byte shuffles.
if (Subtarget.hasVBMI())
- return lowerVectorShuffleWithPERMV(DL, MVT::v64i8, Mask, V1, V2, DAG);
+ return lowerShuffleWithPERMV(DL, MVT::v64i8, Mask, V1, V2, DAG);
// Try to create an in-lane repeating shuffle mask and then shuffle the
// results into the target lanes.
@@ -15743,12 +16318,19 @@ static SDValue lowerV64I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
DL, MVT::v64i8, V1, V2, Mask, Subtarget, DAG))
return V;
- if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v64i8, V1, V2, Mask,
- Zeroable, Subtarget, DAG))
+ if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v64i8, V1, V2, Mask,
+ Zeroable, Subtarget, DAG))
return Blend;
+ // Try to simplify this by merging 128-bit lanes to enable a lane-based
+ // shuffle.
+ if (!V2.isUndef())
+ if (SDValue Result = lowerShuffleAsLanePermuteAndRepeatedMask(
+ DL, MVT::v64i8, V1, V2, Mask, Subtarget, DAG))
+ return Result;
+
// FIXME: Implement direct support for this type!
- return splitAndLowerVectorShuffle(DL, MVT::v64i8, V1, V2, Mask, DAG);
+ return splitAndLowerShuffle(DL, MVT::v64i8, V1, V2, Mask, DAG);
}
/// High-level routine to lower various 512-bit x86 vector shuffles.
@@ -15756,11 +16338,11 @@ static SDValue lowerV64I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
/// This routine either breaks down the specific type of a 512-bit x86 vector
/// shuffle or splits it into two 256-bit shuffles and fuses the results back
/// together based on the available instructions.
-static SDValue lower512BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
- MVT VT, SDValue V1, SDValue V2,
- const APInt &Zeroable,
- const X86Subtarget &Subtarget,
- SelectionDAG &DAG) {
+static SDValue lower512BitShuffle(const SDLoc &DL, ArrayRef<int> Mask,
+ MVT VT, SDValue V1, SDValue V2,
+ const APInt &Zeroable,
+ const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
assert(Subtarget.hasAVX512() &&
"Cannot lower 512-bit vectors w/ basic ISA!");
@@ -15770,18 +16352,18 @@ static SDValue lower512BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
int NumV2Elements = count_if(Mask, [NumElts](int M) { return M >= NumElts; });
if (NumV2Elements == 1 && Mask[0] >= NumElts)
- if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(
+ if (SDValue Insertion = lowerShuffleAsElementInsertion(
DL, VT, V1, V2, Mask, Zeroable, Subtarget, DAG))
return Insertion;
// Handle special cases where the lower or upper half is UNDEF.
if (SDValue V =
- lowerVectorShuffleWithUndefHalf(DL, VT, V1, V2, Mask, Subtarget, DAG))
+ lowerShuffleWithUndefHalf(DL, VT, V1, V2, Mask, Subtarget, DAG))
return V;
// Check for being able to broadcast a single element.
- if (SDValue Broadcast =
- lowerVectorShuffleAsBroadcast(DL, VT, V1, V2, Mask, Subtarget, DAG))
+ if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, VT, V1, V2, Mask,
+ Subtarget, DAG))
return Broadcast;
// Dispatch to each element type for lowering. If we don't have support for
@@ -15790,17 +16372,17 @@ static SDValue lower512BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
// the requisite ISA extensions for that element type are available.
switch (VT.SimpleTy) {
case MVT::v8f64:
- return lowerV8F64VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
+ return lowerV8F64Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
case MVT::v16f32:
- return lowerV16F32VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
+ return lowerV16F32Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
case MVT::v8i64:
- return lowerV8I64VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
+ return lowerV8I64Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
case MVT::v16i32:
- return lowerV16I32VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
+ return lowerV16I32Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
case MVT::v32i16:
- return lowerV32I16VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
+ return lowerV32I16Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
case MVT::v64i8:
- return lowerV64I8VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
+ return lowerV64I8Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
default:
llvm_unreachable("Not a valid 512-bit x86 vector type!");
@@ -15809,7 +16391,7 @@ static SDValue lower512BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
// Determine if this shuffle can be implemented with a KSHIFT instruction.
// Returns the shift amount if possible or -1 if not. This is a simplified
-// version of matchVectorShuffleAsShift.
+// version of matchShuffleAsShift.
static int match1BitShuffleAsKSHIFT(unsigned &Opcode, ArrayRef<int> Mask,
int MaskOffset, const APInt &Zeroable) {
int Size = Mask.size();
@@ -15844,11 +16426,11 @@ static int match1BitShuffleAsKSHIFT(unsigned &Opcode, ArrayRef<int> Mask,
// There is no a dedicated instruction on AVX-512 that shuffles the masks.
// The only way to shuffle bits is to sign-extend the mask vector to SIMD
// vector, shuffle and then truncate it back.
-static SDValue lower1BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
- MVT VT, SDValue V1, SDValue V2,
- const APInt &Zeroable,
- const X86Subtarget &Subtarget,
- SelectionDAG &DAG) {
+static SDValue lower1BitShuffle(const SDLoc &DL, ArrayRef<int> Mask,
+ MVT VT, SDValue V1, SDValue V2,
+ const APInt &Zeroable,
+ const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
assert(Subtarget.hasAVX512() &&
"Cannot lower 512-bit vectors w/o basic ISA!");
@@ -16037,15 +16619,14 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget &Subtarget,
// Check for non-undef masks pointing at an undef vector and make the masks
// undef as well. This makes it easier to match the shuffle based solely on
// the mask.
- if (V2IsUndef)
- for (int M : Mask)
- if (M >= NumElements) {
- SmallVector<int, 8> NewMask(Mask.begin(), Mask.end());
- for (int &M : NewMask)
- if (M >= NumElements)
- M = -1;
- return DAG.getVectorShuffle(VT, DL, V1, V2, NewMask);
- }
+ if (V2IsUndef &&
+ any_of(Mask, [NumElements](int M) { return M >= NumElements; })) {
+ SmallVector<int, 8> NewMask(Mask.begin(), Mask.end());
+ for (int &M : NewMask)
+ if (M >= NumElements)
+ M = -1;
+ return DAG.getVectorShuffle(VT, DL, V1, V2, NewMask);
+ }
// Check for illegal shuffle mask element index values.
int MaskUpperLimit = Mask.size() * (V2IsUndef ? 1 : 2); (void)MaskUpperLimit;
@@ -16083,8 +16664,8 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget &Subtarget,
// by obfuscating the operands with bitcasts.
// TODO: Avoid lowering directly from this top-level function: make this
// a query (canLowerAsBroadcast) and defer lowering to the type-based calls.
- if (SDValue Broadcast =
- lowerVectorShuffleAsBroadcast(DL, VT, V1, V2, Mask, Subtarget, DAG))
+ if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, VT, V1, V2, Mask,
+ Subtarget, DAG))
return Broadcast;
MVT NewEltVT = VT.isFloatingPoint()
@@ -16122,26 +16703,21 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget &Subtarget,
if (canonicalizeShuffleMaskWithCommute(Mask))
return DAG.getCommutedVectorShuffle(*SVOp);
- if (SDValue V =
- lowerVectorShuffleWithVPMOV(DL, Mask, VT, V1, V2, DAG, Subtarget))
+ if (SDValue V = lowerShuffleWithVPMOV(DL, Mask, VT, V1, V2, DAG, Subtarget))
return V;
// For each vector width, delegate to a specialized lowering routine.
if (VT.is128BitVector())
- return lower128BitVectorShuffle(DL, Mask, VT, V1, V2, Zeroable, Subtarget,
- DAG);
+ return lower128BitShuffle(DL, Mask, VT, V1, V2, Zeroable, Subtarget, DAG);
if (VT.is256BitVector())
- return lower256BitVectorShuffle(DL, Mask, VT, V1, V2, Zeroable, Subtarget,
- DAG);
+ return lower256BitShuffle(DL, Mask, VT, V1, V2, Zeroable, Subtarget, DAG);
if (VT.is512BitVector())
- return lower512BitVectorShuffle(DL, Mask, VT, V1, V2, Zeroable, Subtarget,
- DAG);
+ return lower512BitShuffle(DL, Mask, VT, V1, V2, Zeroable, Subtarget, DAG);
if (Is1BitVector)
- return lower1BitVectorShuffle(DL, Mask, VT, V1, V2, Zeroable, Subtarget,
- DAG);
+ return lower1BitShuffle(DL, Mask, VT, V1, V2, Zeroable, Subtarget, DAG);
llvm_unreachable("Unimplemented!");
}
@@ -16401,7 +16977,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
// this can be done with a mask.
IdxVal &= ElemsPerChunk - 1;
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Vec,
- DAG.getConstant(IdxVal, dl, MVT::i32));
+ DAG.getIntPtrConstant(IdxVal, dl));
}
assert(VecVT.is128BitVector() && "Unexpected vector length");
@@ -16527,10 +17103,11 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
SDValue N0 = Op.getOperand(0);
SDValue N1 = Op.getOperand(1);
SDValue N2 = Op.getOperand(2);
- if (!isa<ConstantSDNode>(N2))
+
+ auto *N2C = dyn_cast<ConstantSDNode>(N2);
+ if (!N2C || N2C->getAPIntValue().uge(NumElts))
return SDValue();
- auto *N2C = cast<ConstantSDNode>(N2);
- unsigned IdxVal = N2C->getZExtValue();
+ uint64_t IdxVal = N2C->getZExtValue();
bool IsZeroElt = X86::isZeroNode(N1);
bool IsAllOnesElt = VT.isInteger() && llvm::isAllOnesConstant(N1);
@@ -16575,13 +17152,21 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
unsigned IdxIn128 = IdxVal & (NumEltsIn128 - 1);
V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, V.getValueType(), V, N1,
- DAG.getConstant(IdxIn128, dl, MVT::i32));
+ DAG.getIntPtrConstant(IdxIn128, dl));
// Insert the changed part back into the bigger vector
return insert128BitVector(N0, V, IdxVal, DAG, dl);
}
assert(VT.is128BitVector() && "Only 128-bit vector types should be left!");
+ // This will be just movd/movq/movss/movsd.
+ if (IdxVal == 0 && ISD::isBuildVectorAllZeros(N0.getNode()) &&
+ (EltVT == MVT::i32 || EltVT == MVT::f32 || EltVT == MVT::f64 ||
+ EltVT == MVT::i64)) {
+ N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, N1);
+ return getShuffleVectorZeroOrUndef(N1, 0, true, Subtarget, DAG);
+ }
+
// Transform it so it match pinsr{b,w} which expects a GR32 as its second
// argument. SSE41 required for pinsrb.
if (VT == MVT::v8i16 || (VT == MVT::v16i8 && Subtarget.hasSSE41())) {
@@ -16613,7 +17198,7 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
// Bits [3:0] of the constant are the zero mask. The DAG Combiner may
// combine either bitwise AND or insert of float 0.0 to set these bits.
- bool MinSize = DAG.getMachineFunction().getFunction().optForMinSize();
+ bool MinSize = DAG.getMachineFunction().getFunction().hasMinSize();
if (IdxVal == 0 && (!MinSize || !MayFoldLoad(N1))) {
// If this is an insertion of 32-bits into the low 32-bits of
// a vector, we prefer to generate a blend with immediate rather
@@ -16663,7 +17248,8 @@ static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, const X86Subtarget &Subtarget,
// Insert the 128-bit vector.
return insert128BitVector(DAG.getUNDEF(OpVT), Op, 0, DAG, dl);
}
- assert(OpVT.is128BitVector() && "Expected an SSE type!");
+ assert(OpVT.is128BitVector() && OpVT.isInteger() && OpVT != MVT::v2i64 &&
+ "Expected an SSE type!");
// Pass through a v4i32 SCALAR_TO_VECTOR as that's what we use in tblgen.
if (OpVT == MVT::v4i32)
@@ -16789,35 +17375,9 @@ SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
return Result;
}
-SDValue
-X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const {
- const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
-
- // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
- // global base reg.
- const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
- unsigned char OpFlag = Subtarget.classifyGlobalReference(nullptr, *Mod);
-
- auto PtrVT = getPointerTy(DAG.getDataLayout());
- SDValue Result = DAG.getTargetExternalSymbol(Sym, PtrVT, OpFlag);
-
- SDLoc DL(Op);
- Result = DAG.getNode(getGlobalWrapperKind(), DL, PtrVT, Result);
-
- // With PIC, the address is actually $g + Offset.
- if (OpFlag) {
- Result =
- DAG.getNode(ISD::ADD, DL, PtrVT,
- DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), Result);
- }
-
- // For symbols that require a load from a stub to get the address, emit the
- // load.
- if (isGlobalStubReference(OpFlag))
- Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
- MachinePointerInfo::getGOT(DAG.getMachineFunction()));
-
- return Result;
+SDValue X86TargetLowering::LowerExternalSymbol(SDValue Op,
+ SelectionDAG &DAG) const {
+ return LowerGlobalOrExternal(Op, DAG, /*ForCall=*/false);
}
SDValue
@@ -16841,35 +17401,67 @@ X86TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
return Result;
}
-SDValue X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV,
- const SDLoc &dl, int64_t Offset,
- SelectionDAG &DAG) const {
- // Create the TargetGlobalAddress node, folding in the constant
- // offset if it is legal.
- unsigned char OpFlags = Subtarget.classifyGlobalReference(GV);
+/// Creates target global address or external symbol nodes for calls or
+/// other uses.
+SDValue X86TargetLowering::LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG,
+ bool ForCall) const {
+ // Unpack the global address or external symbol.
+ const SDLoc &dl = SDLoc(Op);
+ const GlobalValue *GV = nullptr;
+ int64_t Offset = 0;
+ const char *ExternalSym = nullptr;
+ if (const auto *G = dyn_cast<GlobalAddressSDNode>(Op)) {
+ GV = G->getGlobal();
+ Offset = G->getOffset();
+ } else {
+ const auto *ES = cast<ExternalSymbolSDNode>(Op);
+ ExternalSym = ES->getSymbol();
+ }
+
+ // Calculate some flags for address lowering.
+ const Module &Mod = *DAG.getMachineFunction().getFunction().getParent();
+ unsigned char OpFlags;
+ if (ForCall)
+ OpFlags = Subtarget.classifyGlobalFunctionReference(GV, Mod);
+ else
+ OpFlags = Subtarget.classifyGlobalReference(GV, Mod);
+ bool HasPICReg = isGlobalRelativeToPICBase(OpFlags);
+ bool NeedsLoad = isGlobalStubReference(OpFlags);
+
CodeModel::Model M = DAG.getTarget().getCodeModel();
auto PtrVT = getPointerTy(DAG.getDataLayout());
SDValue Result;
- if (OpFlags == X86II::MO_NO_FLAG &&
- X86::isOffsetSuitableForCodeModel(Offset, M)) {
- // A direct static reference to a global.
- Result = DAG.getTargetGlobalAddress(GV, dl, PtrVT, Offset);
- Offset = 0;
+
+ if (GV) {
+ // Create a target global address if this is a global. If possible, fold the
+ // offset into the global address reference. Otherwise, ADD it on later.
+ int64_t GlobalOffset = 0;
+ if (OpFlags == X86II::MO_NO_FLAG &&
+ X86::isOffsetSuitableForCodeModel(Offset, M)) {
+ std::swap(GlobalOffset, Offset);
+ }
+ Result = DAG.getTargetGlobalAddress(GV, dl, PtrVT, GlobalOffset, OpFlags);
} else {
- Result = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, OpFlags);
+ // If this is not a global address, this must be an external symbol.
+ Result = DAG.getTargetExternalSymbol(ExternalSym, PtrVT, OpFlags);
}
+ // If this is a direct call, avoid the wrapper if we don't need to do any
+ // loads or adds. This allows SDAG ISel to match direct calls.
+ if (ForCall && !NeedsLoad && !HasPICReg && Offset == 0)
+ return Result;
+
Result = DAG.getNode(getGlobalWrapperKind(GV, OpFlags), dl, PtrVT, Result);
// With PIC, the address is actually $g + Offset.
- if (isGlobalRelativeToPICBase(OpFlags)) {
+ if (HasPICReg) {
Result = DAG.getNode(ISD::ADD, dl, PtrVT,
DAG.getNode(X86ISD::GlobalBaseReg, dl, PtrVT), Result);
}
// For globals that require a load from a stub to get the address, emit the
// load.
- if (isGlobalStubReference(OpFlags))
+ if (NeedsLoad)
Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
MachinePointerInfo::getGOT(DAG.getMachineFunction()));
@@ -16884,9 +17476,7 @@ SDValue X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV,
SDValue
X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
- const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
- int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset();
- return LowerGlobalAddress(GV, SDLoc(Op), Offset, DAG);
+ return LowerGlobalOrExternal(Op, DAG, /*ForCall=*/false);
}
static SDValue
@@ -17112,9 +17702,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
return DAG.getCopyFromReg(Chain, DL, Reg, PtrVT, Chain.getValue(1));
}
- if (Subtarget.isTargetKnownWindowsMSVC() ||
- Subtarget.isTargetWindowsItanium() ||
- Subtarget.isTargetWindowsGNU()) {
+ if (Subtarget.isOSWindows()) {
// Just use the implicit TLS architecture
// Need to generate something similar to:
// mov rdx, qword [gs:abs 58H]; Load pointer to ThreadLocalStorage
@@ -17254,7 +17842,7 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget,
APInt APIntShiftAmt;
if (isConstantSplat(Amt, APIntShiftAmt)) {
- uint64_t ShiftAmt = APIntShiftAmt.getZExtValue();
+ uint64_t ShiftAmt = APIntShiftAmt.urem(VT.getScalarSizeInBits());
return DAG.getNode(IsFSHR ? X86ISD::VSHRD : X86ISD::VSHLD, DL, VT,
Op0, Op1, DAG.getConstant(ShiftAmt, DL, MVT::i8));
}
@@ -17267,7 +17855,7 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget,
"Unexpected funnel shift type!");
// Expand slow SHLD/SHRD cases if we are not optimizing for size.
- bool OptForSize = DAG.getMachineFunction().getFunction().optForSize();
+ bool OptForSize = DAG.getMachineFunction().getFunction().hasOptSize();
if (!OptForSize && Subtarget.isSHLDSlow())
return SDValue();
@@ -17311,6 +17899,70 @@ static SDValue LowerI64IntToFP_AVX512DQ(SDValue Op, SelectionDAG &DAG,
DAG.getIntPtrConstant(0, dl));
}
+static bool useVectorCast(unsigned Opcode, MVT FromVT, MVT ToVT,
+ const X86Subtarget &Subtarget) {
+ switch (Opcode) {
+ case ISD::SINT_TO_FP:
+ // TODO: Handle wider types with AVX/AVX512.
+ if (!Subtarget.hasSSE2() || FromVT != MVT::v4i32)
+ return false;
+ // CVTDQ2PS or (V)CVTDQ2PD
+ return ToVT == MVT::v4f32 || (Subtarget.hasAVX() && ToVT == MVT::v4f64);
+
+ case ISD::UINT_TO_FP:
+ // TODO: Handle wider types and i64 elements.
+ if (!Subtarget.hasAVX512() || FromVT != MVT::v4i32)
+ return false;
+ // VCVTUDQ2PS or VCVTUDQ2PD
+ return ToVT == MVT::v4f32 || ToVT == MVT::v4f64;
+
+ default:
+ return false;
+ }
+}
+
+/// Given a scalar cast operation that is extracted from a vector, try to
+/// vectorize the cast op followed by extraction. This will avoid an expensive
+/// round-trip between XMM and GPR.
+static SDValue vectorizeExtractedCast(SDValue Cast, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ // TODO: This could be enhanced to handle smaller integer types by peeking
+ // through an extend.
+ SDValue Extract = Cast.getOperand(0);
+ MVT DestVT = Cast.getSimpleValueType();
+ if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ !isa<ConstantSDNode>(Extract.getOperand(1)))
+ return SDValue();
+
+ // See if we have a 128-bit vector cast op for this type of cast.
+ SDValue VecOp = Extract.getOperand(0);
+ MVT FromVT = VecOp.getSimpleValueType();
+ unsigned NumEltsInXMM = 128 / FromVT.getScalarSizeInBits();
+ MVT Vec128VT = MVT::getVectorVT(FromVT.getScalarType(), NumEltsInXMM);
+ MVT ToVT = MVT::getVectorVT(DestVT, NumEltsInXMM);
+ if (!useVectorCast(Cast.getOpcode(), Vec128VT, ToVT, Subtarget))
+ return SDValue();
+
+ // If we are extracting from a non-zero element, first shuffle the source
+ // vector to allow extracting from element zero.
+ SDLoc DL(Cast);
+ if (!isNullConstant(Extract.getOperand(1))) {
+ SmallVector<int, 16> Mask(FromVT.getVectorNumElements(), -1);
+ Mask[0] = Extract.getConstantOperandVal(1);
+ VecOp = DAG.getVectorShuffle(FromVT, DL, VecOp, DAG.getUNDEF(FromVT), Mask);
+ }
+ // If the source vector is wider than 128-bits, extract the low part. Do not
+ // create an unnecessarily wide vector cast op.
+ if (FromVT != Vec128VT)
+ VecOp = extract128BitVector(VecOp, 0, DAG, DL);
+
+ // cast (extelt V, 0) --> extelt (cast (extract_subv V)), 0
+ // cast (extelt V, C) --> extelt (cast (extract_subv (shuffle V, [C...]))), 0
+ SDValue VCast = DAG.getNode(Cast.getOpcode(), DL, ToVT, VecOp);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, DestVT, VCast,
+ DAG.getIntPtrConstant(0, DL));
+}
+
SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
SDValue Src = Op.getOperand(0);
@@ -17318,6 +17970,9 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
MVT VT = Op.getSimpleValueType();
SDLoc dl(Op);
+ if (SDValue Extract = vectorizeExtractedCast(Op, DAG, Subtarget))
+ return Extract;
+
if (SrcVT.isVector()) {
if (SrcVT == MVT::v2i32 && VT == MVT::v2f64) {
return DAG.getNode(X86ISD::CVTSI2P, dl, VT,
@@ -17371,23 +18026,23 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
else
Tys = DAG.getVTList(Op.getValueType(), MVT::Other);
- unsigned ByteSize = SrcVT.getSizeInBits()/8;
+ unsigned ByteSize = SrcVT.getSizeInBits() / 8;
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(StackSlot);
- MachineMemOperand *MMO;
+ MachineMemOperand *LoadMMO;
if (FI) {
int SSFI = FI->getIndex();
- MMO = DAG.getMachineFunction().getMachineMemOperand(
+ LoadMMO = DAG.getMachineFunction().getMachineMemOperand(
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI),
MachineMemOperand::MOLoad, ByteSize, ByteSize);
} else {
- MMO = cast<LoadSDNode>(StackSlot)->getMemOperand();
+ LoadMMO = cast<LoadSDNode>(StackSlot)->getMemOperand();
StackSlot = StackSlot.getOperand(1);
}
- SDValue Ops[] = { Chain, StackSlot, DAG.getValueType(SrcVT) };
- SDValue Result = DAG.getMemIntrinsicNode(useSSE ? X86ISD::FILD_FLAG :
- X86ISD::FILD, DL,
- Tys, Ops, SrcVT, MMO);
+ SDValue FILDOps[] = {Chain, StackSlot};
+ SDValue Result =
+ DAG.getMemIntrinsicNode(useSSE ? X86ISD::FILD_FLAG : X86ISD::FILD, DL,
+ Tys, FILDOps, SrcVT, LoadMMO);
if (useSSE) {
Chain = Result.getValue(1);
@@ -17397,20 +18052,18 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
// shouldn't be necessary except that RFP cannot be live across
// multiple blocks. When stackifier is fixed, they can be uncoupled.
MachineFunction &MF = DAG.getMachineFunction();
- unsigned SSFISize = Op.getValueSizeInBits()/8;
+ unsigned SSFISize = Op.getValueSizeInBits() / 8;
int SSFI = MF.getFrameInfo().CreateStackObject(SSFISize, SSFISize, false);
auto PtrVT = getPointerTy(MF.getDataLayout());
SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
Tys = DAG.getVTList(MVT::Other);
- SDValue Ops[] = {
- Chain, Result, StackSlot, DAG.getValueType(Op.getValueType()), InFlag
- };
- MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ SDValue FSTOps[] = {Chain, Result, StackSlot, InFlag};
+ MachineMemOperand *StoreMMO = DAG.getMachineFunction().getMachineMemOperand(
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI),
MachineMemOperand::MOStore, SSFISize, SSFISize);
- Chain = DAG.getMemIntrinsicNode(X86ISD::FST, DL, Tys,
- Ops, Op.getValueType(), MMO);
+ Chain = DAG.getMemIntrinsicNode(X86ISD::FST, DL, Tys, FSTOps,
+ Op.getValueType(), StoreMMO);
Result = DAG.getLoad(
Op.getValueType(), DL, Chain, StackSlot,
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI));
@@ -17545,7 +18198,7 @@ static SDValue lowerUINT_TO_FP_v2i32(SDValue Op, SelectionDAG &DAG,
SDValue HalfWordMask = DAG.getConstant(0x0000FFFF, DL, MVT::v4i32);
// Two to the power of half-word-size.
- SDValue TWOHW = DAG.getConstantFP(1 << 16, DL, MVT::v2f64);
+ SDValue TWOHW = DAG.getConstantFP((double)(1 << 16), DL, MVT::v2f64);
// Clear upper part of LO, lower HI.
SDValue HI = DAG.getNode(ISD::SRL, DL, MVT::v4i32, N0, HalfWord);
@@ -17680,6 +18333,9 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
if (Op.getSimpleValueType().isVector())
return lowerUINT_TO_FP_vec(Op, DAG, Subtarget);
+ if (SDValue Extract = vectorizeExtractedCast(Op, DAG, Subtarget))
+ return Extract;
+
MVT SrcVT = N0.getSimpleValueType();
MVT DstVT = Op.getSimpleValueType();
@@ -17732,7 +18388,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
MachineMemOperand::MOLoad, 8, 8);
SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other);
- SDValue Ops[] = { Store, StackSlot, DAG.getValueType(MVT::i64) };
+ SDValue Ops[] = { Store, StackSlot };
SDValue Fild = DAG.getMemIntrinsicNode(X86ISD::FILD, dl, Tys, Ops,
MVT::i64, MMO);
@@ -17768,16 +18424,13 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
// If the given FP_TO_SINT (IsSigned) or FP_TO_UINT (!IsSigned) operation
// is legal, or has an fp128 or f16 source (which needs to be promoted to f32),
-// just return an <SDValue(), SDValue()> pair.
+// just return an SDValue().
// Otherwise it is assumed to be a conversion from one of f32, f64 or f80
-// to i16, i32 or i64, and we lower it to a legal sequence.
-// If lowered to the final integer result we return a <result, SDValue()> pair.
-// Otherwise we lower it to a sequence ending with a FIST, return a
-// <FIST, StackSlot> pair, and the caller is responsible for loading
-// the final integer result from StackSlot.
-std::pair<SDValue,SDValue>
+// to i16, i32 or i64, and we lower it to a legal sequence and return the
+// result.
+SDValue
X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
- bool IsSigned, bool IsReplace) const {
+ bool IsSigned) const {
SDLoc DL(Op);
EVT DstTy = Op.getValueType();
@@ -17787,18 +18440,15 @@ X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
if (TheVT != MVT::f32 && TheVT != MVT::f64 && TheVT != MVT::f80) {
// f16 must be promoted before using the lowering in this routine.
// fp128 does not use this lowering.
- return std::make_pair(SDValue(), SDValue());
+ return SDValue();
}
// If using FIST to compute an unsigned i64, we'll need some fixup
// to handle values above the maximum signed i64. A FIST is always
// used for the 32-bit subtarget, but also for f80 on a 64-bit target.
- bool UnsignedFixup = !IsSigned &&
- DstTy == MVT::i64 &&
- (!Subtarget.is64Bit() ||
- !isScalarFPTypeInSSEReg(TheVT));
+ bool UnsignedFixup = !IsSigned && DstTy == MVT::i64;
- if (!IsSigned && DstTy != MVT::i64 && !Subtarget.hasAVX512()) {
+ if (!IsSigned && DstTy != MVT::i64) {
// Replace the fp-to-uint32 operation with an fp-to-sint64 FIST.
// The low 32 bits of the fist result will have the correct uint32 result.
assert(DstTy == MVT::i32 && "Unexpected FP_TO_UINT");
@@ -17809,30 +18459,13 @@ X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
DstTy.getSimpleVT() >= MVT::i16 &&
"Unknown FP_TO_INT to lower!");
- // These are really Legal.
- if (DstTy == MVT::i32 &&
- isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType()))
- return std::make_pair(SDValue(), SDValue());
- if (Subtarget.is64Bit() &&
- DstTy == MVT::i64 &&
- isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType()))
- return std::make_pair(SDValue(), SDValue());
-
// We lower FP->int64 into FISTP64 followed by a load from a temporary
// stack slot.
MachineFunction &MF = DAG.getMachineFunction();
- unsigned MemSize = DstTy.getSizeInBits()/8;
+ unsigned MemSize = DstTy.getStoreSize();
int SSFI = MF.getFrameInfo().CreateStackObject(MemSize, MemSize, false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
- unsigned Opc;
- switch (DstTy.getSimpleVT().SimpleTy) {
- default: llvm_unreachable("Invalid FP_TO_SINT to lower!");
- case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break;
- case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break;
- case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break;
- }
-
SDValue Chain = DAG.getEntryNode();
SDValue Value = Op.getOperand(0);
SDValue Adjust; // 0x0 or 0x80000000, for result sign bit adjustment.
@@ -17874,9 +18507,10 @@ X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
getSetCCResultType(DAG.getDataLayout(),
*DAG.getContext(), TheVT),
Value, ThreshVal, ISD::SETLT);
- Adjust = DAG.getSelect(DL, MVT::i32, Cmp,
- DAG.getConstant(0, DL, MVT::i32),
- DAG.getConstant(0x80000000, DL, MVT::i32));
+ Adjust = DAG.getSelect(DL, MVT::i64, Cmp,
+ DAG.getConstant(0, DL, MVT::i64),
+ DAG.getConstant(APInt::getSignMask(64),
+ DL, MVT::i64));
SDValue Sub = DAG.getNode(ISD::FSUB, DL, TheVT, Value, ThreshVal);
Cmp = DAG.getSetCC(DL, getSetCCResultType(DAG.getDataLayout(),
*DAG.getContext(), TheVT),
@@ -17884,81 +18518,52 @@ X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
Value = DAG.getSelect(DL, TheVT, Cmp, Value, Sub);
}
+ MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, SSFI);
+
// FIXME This causes a redundant load/store if the SSE-class value is already
// in memory, such as if it is on the callstack.
if (isScalarFPTypeInSSEReg(TheVT)) {
assert(DstTy == MVT::i64 && "Invalid FP_TO_SINT to lower!");
- Chain = DAG.getStore(Chain, DL, Value, StackSlot,
- MachinePointerInfo::getFixedStack(MF, SSFI));
- SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other);
- SDValue Ops[] = {
- Chain, StackSlot, DAG.getValueType(TheVT)
- };
-
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, SSFI),
- MachineMemOperand::MOLoad, MemSize, MemSize);
- Value = DAG.getMemIntrinsicNode(X86ISD::FLD, DL, Tys, Ops, DstTy, MMO);
+ Chain = DAG.getStore(Chain, DL, Value, StackSlot, MPI);
+ SDVTList Tys = DAG.getVTList(TheVT, MVT::Other);
+ SDValue Ops[] = { Chain, StackSlot };
+
+ unsigned FLDSize = TheVT.getStoreSize();
+ assert(FLDSize <= MemSize && "Stack slot not big enough");
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MPI, MachineMemOperand::MOLoad, FLDSize, FLDSize);
+ Value = DAG.getMemIntrinsicNode(X86ISD::FLD, DL, Tys, Ops, TheVT, MMO);
Chain = Value.getValue(1);
- SSFI = MF.getFrameInfo().CreateStackObject(MemSize, MemSize, false);
- StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
}
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, SSFI),
- MachineMemOperand::MOStore, MemSize, MemSize);
-
- if (UnsignedFixup) {
-
- // Insert the FIST, load its result as two i32's,
- // and XOR the high i32 with Adjust.
+ // Build the FP_TO_INT*_IN_MEM
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MPI, MachineMemOperand::MOStore, MemSize, MemSize);
+ SDValue Ops[] = { Chain, Value, StackSlot };
+ SDValue FIST = DAG.getMemIntrinsicNode(X86ISD::FP_TO_INT_IN_MEM, DL,
+ DAG.getVTList(MVT::Other),
+ Ops, DstTy, MMO);
- SDValue FistOps[] = { Chain, Value, StackSlot };
- SDValue FIST = DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::Other),
- FistOps, DstTy, MMO);
+ SDValue Res = DAG.getLoad(Op.getValueType(), SDLoc(Op), FIST, StackSlot, MPI);
- SDValue Low32 =
- DAG.getLoad(MVT::i32, DL, FIST, StackSlot, MachinePointerInfo());
- SDValue HighAddr = DAG.getMemBasePlusOffset(StackSlot, 4, DL);
+ // If we need an unsigned fixup, XOR the result with adjust.
+ if (UnsignedFixup)
+ Res = DAG.getNode(ISD::XOR, DL, MVT::i64, Res, Adjust);
- SDValue High32 =
- DAG.getLoad(MVT::i32, DL, FIST, HighAddr, MachinePointerInfo());
- High32 = DAG.getNode(ISD::XOR, DL, MVT::i32, High32, Adjust);
-
- if (Subtarget.is64Bit()) {
- // Join High32 and Low32 into a 64-bit result.
- // (High32 << 32) | Low32
- Low32 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Low32);
- High32 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, High32);
- High32 = DAG.getNode(ISD::SHL, DL, MVT::i64, High32,
- DAG.getConstant(32, DL, MVT::i8));
- SDValue Result = DAG.getNode(ISD::OR, DL, MVT::i64, High32, Low32);
- return std::make_pair(Result, SDValue());
- }
-
- SDValue ResultOps[] = { Low32, High32 };
-
- SDValue pair = IsReplace
- ? DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, ResultOps)
- : DAG.getMergeValues(ResultOps, DL);
- return std::make_pair(pair, SDValue());
- } else {
- // Build the FP_TO_INT*_IN_MEM
- SDValue Ops[] = { Chain, Value, StackSlot };
- SDValue FIST = DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::Other),
- Ops, DstTy, MMO);
- return std::make_pair(FIST, StackSlot);
- }
+ return Res;
}
static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
- MVT VT = Op->getSimpleValueType(0);
- SDValue In = Op->getOperand(0);
+ MVT VT = Op.getSimpleValueType();
+ SDValue In = Op.getOperand(0);
MVT InVT = In.getSimpleValueType();
SDLoc dl(Op);
+ unsigned Opc = Op.getOpcode();
assert(VT.isVector() && InVT.isVector() && "Expected vector type");
+ assert((Opc == ISD::ANY_EXTEND || Opc == ISD::ZERO_EXTEND) &&
+ "Unexpected extension opcode");
assert(VT.getVectorNumElements() == VT.getVectorNumElements() &&
"Expected same number of elements");
assert((VT.getVectorElementType() == MVT::i16 ||
@@ -17970,6 +18575,8 @@ static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG,
InVT.getVectorElementType() == MVT::i32) &&
"Unexpected element type");
+ unsigned ExtendInVecOpc = getOpcode_EXTEND_VECTOR_INREG(Opc);
+
// Custom legalize v8i8->v8i64 on CPUs without avx512bw.
if (InVT == MVT::v8i8) {
if (!ExperimentalVectorWideningLegalization || VT != MVT::v8i64)
@@ -17977,8 +18584,7 @@ static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG,
In = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Op),
MVT::v16i8, In, DAG.getUNDEF(MVT::v8i8));
- // FIXME: This should be ANY_EXTEND_VECTOR_INREG for ANY_EXTEND input.
- return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, dl, VT, In);
+ return DAG.getNode(ExtendInVecOpc, dl, VT, In);
}
if (Subtarget.hasInt256())
@@ -18000,11 +18606,17 @@ static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG,
MVT HalfVT = MVT::getVectorVT(VT.getVectorElementType(),
VT.getVectorNumElements() / 2);
- SDValue OpLo = DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, dl, HalfVT, In);
+ SDValue OpLo = DAG.getNode(ExtendInVecOpc, dl, HalfVT, In);
+
+ // Short-circuit if we can determine that each 128-bit half is the same value.
+ // Otherwise, this is difficult to match and optimize.
+ if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(In))
+ if (hasIdenticalHalvesShuffleMask(Shuf->getMask()))
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpLo);
SDValue ZeroVec = DAG.getConstant(0, dl, InVT);
SDValue Undef = DAG.getUNDEF(InVT);
- bool NeedZero = Op.getOpcode() == ISD::ZERO_EXTEND;
+ bool NeedZero = Opc == ISD::ZERO_EXTEND;
SDValue OpHi = getUnpackh(DAG, dl, InVT, In, NeedZero ? ZeroVec : Undef);
OpHi = DAG.getBitcast(HalfVT, OpHi);
@@ -18179,8 +18791,11 @@ static SDValue truncateVectorWithPACK(unsigned Opcode, EVT DstVT, SDValue In,
// 256-bit PACK(ARG0, ARG1) leaves us with ((LO0,LO1),(HI0,HI1)),
// so we need to shuffle to get ((LO0,HI0),(LO1,HI1)).
- Res = DAG.getBitcast(MVT::v4i64, Res);
- Res = DAG.getVectorShuffle(MVT::v4i64, DL, Res, Res, {0, 2, 1, 3});
+ // Scale shuffle mask to avoid bitcasts and help ComputeNumSignBits.
+ SmallVector<int, 64> Mask;
+ int Scale = 64 / OutVT.getScalarSizeInBits();
+ scaleShuffleMask<int>(Scale, ArrayRef<int>({ 0, 2, 1, 3 }), Mask);
+ Res = DAG.getVectorShuffle(OutVT, DL, Res, Res, Mask);
if (DstVT.is256BitVector())
return DAG.getBitcast(DstVT, Res);
@@ -18422,12 +19037,12 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT;
MVT VT = Op.getSimpleValueType();
+ SDValue Src = Op.getOperand(0);
+ MVT SrcVT = Src.getSimpleValueType();
+ SDLoc dl(Op);
if (VT.isVector()) {
- SDValue Src = Op.getOperand(0);
- SDLoc dl(Op);
-
- if (VT == MVT::v2i1 && Src.getSimpleValueType() == MVT::v2f64) {
+ if (VT == MVT::v2i1 && SrcVT == MVT::v2f64) {
MVT ResVT = MVT::v4i32;
MVT TruncVT = MVT::v4i1;
unsigned Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI;
@@ -18447,7 +19062,7 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
}
assert(Subtarget.hasDQI() && Subtarget.hasVLX() && "Requires AVX512DQVL!");
- if (VT == MVT::v2i64 && Src.getSimpleValueType() == MVT::v2f32) {
+ if (VT == MVT::v2i64 && SrcVT == MVT::v2f32) {
return DAG.getNode(IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI, dl, VT,
DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src,
DAG.getUNDEF(MVT::v2f32)));
@@ -18458,19 +19073,34 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
assert(!VT.isVector());
- std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG,
- IsSigned, /*IsReplace=*/ false);
- SDValue FIST = Vals.first, StackSlot = Vals.second;
- // If FP_TO_INTHelper failed, the node is actually supposed to be Legal.
- if (!FIST.getNode())
+ bool UseSSEReg = isScalarFPTypeInSSEReg(SrcVT);
+
+ if (!IsSigned && Subtarget.hasAVX512()) {
+ // Conversions from f32/f64 should be legal.
+ if (UseSSEReg)
+ return Op;
+
+ // Use default expansion.
+ if (VT == MVT::i64)
+ return SDValue();
+ }
+
+ // Promote i16 to i32 if we can use a SSE operation.
+ if (VT == MVT::i16 && UseSSEReg) {
+ assert(IsSigned && "Expected i16 FP_TO_UINT to have been promoted!");
+ SDValue Res = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);
+ return DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
+ }
+
+ // If this is a SINT_TO_FP using SSEReg we're done.
+ if (UseSSEReg && IsSigned)
return Op;
- if (StackSlot.getNode())
- // Load the result.
- return DAG.getLoad(VT, SDLoc(Op), FIST, StackSlot, MachinePointerInfo());
+ // Fall back to X87.
+ if (SDValue V = FP_TO_INTHelper(Op, DAG, IsSigned))
+ return V;
- // The node is the result.
- return FIST;
+ llvm_unreachable("Expected FP_TO_INTHelper to handle all remaining cases.");
}
static SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) {
@@ -18491,7 +19121,7 @@ static SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) {
/// implementation, and likely shuffle complexity of the alternate sequence.
static bool shouldUseHorizontalOp(bool IsSingleSource, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
- bool IsOptimizingSize = DAG.getMachineFunction().getFunction().optForSize();
+ bool IsOptimizingSize = DAG.getMachineFunction().getFunction().hasOptSize();
bool HasFastHOps = Subtarget.hasFastHorizontalOps();
return !IsSingleSource || IsOptimizingSize || HasFastHOps;
}
@@ -18513,16 +19143,11 @@ static SDValue lowerAddSubToHorizontalOp(SDValue Op, SelectionDAG &DAG,
if (!IsFP && !Subtarget.hasSSSE3())
return Op;
- // Defer forming the minimal horizontal op if the vector source has more than
- // the 2 extract element uses that we're matching here. In that case, we might
- // form a horizontal op that includes more than 1 add/sub op.
+ // Extract from a common vector.
if (LHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
LHS.getOperand(0) != RHS.getOperand(0) ||
- !LHS.getOperand(0)->hasNUsesOfValue(2, 0))
- return Op;
-
- if (!isa<ConstantSDNode>(LHS.getOperand(1)) ||
+ !isa<ConstantSDNode>(LHS.getOperand(1)) ||
!isa<ConstantSDNode>(RHS.getOperand(1)) ||
!shouldUseHorizontalOp(true, DAG, Subtarget))
return Op;
@@ -18540,33 +19165,37 @@ static SDValue lowerAddSubToHorizontalOp(SDValue Op, SelectionDAG &DAG,
}
unsigned LExtIndex = LHS.getConstantOperandVal(1);
unsigned RExtIndex = RHS.getConstantOperandVal(1);
- if (LExtIndex == 1 && RExtIndex == 0 &&
+ if ((LExtIndex & 1) == 1 && (RExtIndex & 1) == 0 &&
(HOpcode == X86ISD::HADD || HOpcode == X86ISD::FHADD))
std::swap(LExtIndex, RExtIndex);
- // TODO: This can be extended to handle other adjacent extract pairs.
- if (LExtIndex != 0 || RExtIndex != 1)
+ if ((LExtIndex & 1) != 0 || RExtIndex != (LExtIndex + 1))
return Op;
SDValue X = LHS.getOperand(0);
EVT VecVT = X.getValueType();
unsigned BitWidth = VecVT.getSizeInBits();
+ unsigned NumLanes = BitWidth / 128;
+ unsigned NumEltsPerLane = VecVT.getVectorNumElements() / NumLanes;
assert((BitWidth == 128 || BitWidth == 256 || BitWidth == 512) &&
"Not expecting illegal vector widths here");
// Creating a 256-bit horizontal op would be wasteful, and there is no 512-bit
- // equivalent, so extract the 256/512-bit source op to 128-bit.
- // This is free: ymm/zmm -> xmm.
+ // equivalent, so extract the 256/512-bit source op to 128-bit if we can.
SDLoc DL(Op);
- if (BitWidth == 256 || BitWidth == 512)
- X = extract128BitVector(X, 0, DAG, DL);
+ if (BitWidth == 256 || BitWidth == 512) {
+ unsigned LaneIdx = LExtIndex / NumEltsPerLane;
+ X = extract128BitVector(X, LaneIdx * NumEltsPerLane, DAG, DL);
+ LExtIndex %= NumEltsPerLane;
+ }
// add (extractelt (X, 0), extractelt (X, 1)) --> extractelt (hadd X, X), 0
// add (extractelt (X, 1), extractelt (X, 0)) --> extractelt (hadd X, X), 0
+ // add (extractelt (X, 2), extractelt (X, 3)) --> extractelt (hadd X, X), 1
// sub (extractelt (X, 0), extractelt (X, 1)) --> extractelt (hsub X, X), 0
SDValue HOp = DAG.getNode(HOpcode, DL, X.getValueType(), X, X);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getSimpleValueType(), HOp,
- DAG.getIntPtrConstant(0, DL));
+ DAG.getIntPtrConstant(LExtIndex / 2, DL));
}
/// Depending on uarch and/or optimizing for size, we might prefer to use a
@@ -18732,36 +19361,25 @@ static SDValue getSETCC(X86::CondCode Cond, SDValue EFLAGS, const SDLoc &dl,
DAG.getConstant(Cond, dl, MVT::i8), EFLAGS);
}
-// Check whether an OR'd tree is PTEST-able.
-static SDValue LowerVectorAllZeroTest(SDValue Op, ISD::CondCode CC,
- const X86Subtarget &Subtarget,
- SelectionDAG &DAG,
- SDValue &X86CC) {
- assert(Op.getOpcode() == ISD::OR && "Only check OR'd tree.");
-
- if (!Subtarget.hasSSE41())
- return SDValue();
-
- if (!Op->hasOneUse())
- return SDValue();
-
- SDNode *N = Op.getNode();
- SDLoc DL(N);
-
+/// Helper for matching OR(EXTRACTELT(X,0),OR(EXTRACTELT(X,1),...))
+/// style scalarized (associative) reduction patterns.
+static bool matchBitOpReduction(SDValue Op, ISD::NodeType BinOp,
+ SmallVectorImpl<SDValue> &SrcOps) {
SmallVector<SDValue, 8> Opnds;
- DenseMap<SDValue, unsigned> VecInMap;
- SmallVector<SDValue, 8> VecIns;
+ DenseMap<SDValue, APInt> SrcOpMap;
EVT VT = MVT::Other;
// Recognize a special case where a vector is casted into wide integer to
// test all 0s.
- Opnds.push_back(N->getOperand(0));
- Opnds.push_back(N->getOperand(1));
+ assert(Op.getOpcode() == unsigned(BinOp) &&
+ "Unexpected bit reduction opcode");
+ Opnds.push_back(Op.getOperand(0));
+ Opnds.push_back(Op.getOperand(1));
for (unsigned Slot = 0, e = Opnds.size(); Slot < e; ++Slot) {
SmallVectorImpl<SDValue>::const_iterator I = Opnds.begin() + Slot;
- // BFS traverse all OR'd operands.
- if (I->getOpcode() == ISD::OR) {
+ // BFS traverse all BinOp operands.
+ if (I->getOpcode() == unsigned(BinOp)) {
Opnds.push_back(I->getOperand(0));
Opnds.push_back(I->getOperand(1));
// Re-evaluate the number of nodes to be traversed.
@@ -18771,42 +19389,63 @@ static SDValue LowerVectorAllZeroTest(SDValue Op, ISD::CondCode CC,
// Quit if a non-EXTRACT_VECTOR_ELT
if (I->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
- return SDValue();
+ return false;
// Quit if without a constant index.
SDValue Idx = I->getOperand(1);
if (!isa<ConstantSDNode>(Idx))
- return SDValue();
+ return false;
- SDValue ExtractedFromVec = I->getOperand(0);
- DenseMap<SDValue, unsigned>::iterator M = VecInMap.find(ExtractedFromVec);
- if (M == VecInMap.end()) {
- VT = ExtractedFromVec.getValueType();
- // Quit if not 128/256-bit vector.
- if (!VT.is128BitVector() && !VT.is256BitVector())
- return SDValue();
+ SDValue Src = I->getOperand(0);
+ DenseMap<SDValue, APInt>::iterator M = SrcOpMap.find(Src);
+ if (M == SrcOpMap.end()) {
+ VT = Src.getValueType();
// Quit if not the same type.
- if (VecInMap.begin() != VecInMap.end() &&
- VT != VecInMap.begin()->first.getValueType())
- return SDValue();
- M = VecInMap.insert(std::make_pair(ExtractedFromVec, 0)).first;
- VecIns.push_back(ExtractedFromVec);
+ if (SrcOpMap.begin() != SrcOpMap.end() &&
+ VT != SrcOpMap.begin()->first.getValueType())
+ return false;
+ unsigned NumElts = VT.getVectorNumElements();
+ APInt EltCount = APInt::getNullValue(NumElts);
+ M = SrcOpMap.insert(std::make_pair(Src, EltCount)).first;
+ SrcOps.push_back(Src);
}
- M->second |= 1U << cast<ConstantSDNode>(Idx)->getZExtValue();
+ // Quit if element already used.
+ unsigned CIdx = cast<ConstantSDNode>(Idx)->getZExtValue();
+ if (M->second[CIdx])
+ return false;
+ M->second.setBit(CIdx);
}
- assert((VT.is128BitVector() || VT.is256BitVector()) &&
- "Not extracted from 128-/256-bit vector.");
+ // Quit if not all elements are used.
+ for (DenseMap<SDValue, APInt>::const_iterator I = SrcOpMap.begin(),
+ E = SrcOpMap.end();
+ I != E; ++I) {
+ if (!I->second.isAllOnesValue())
+ return false;
+ }
- unsigned FullMask = (1U << VT.getVectorNumElements()) - 1U;
+ return true;
+}
- for (DenseMap<SDValue, unsigned>::const_iterator
- I = VecInMap.begin(), E = VecInMap.end(); I != E; ++I) {
- // Quit if not all elements are used.
- if (I->second != FullMask)
- return SDValue();
- }
+// Check whether an OR'd tree is PTEST-able.
+static SDValue LowerVectorAllZeroTest(SDValue Op, ISD::CondCode CC,
+ const X86Subtarget &Subtarget,
+ SelectionDAG &DAG, SDValue &X86CC) {
+ assert(Op.getOpcode() == ISD::OR && "Only check OR'd tree.");
+
+ if (!Subtarget.hasSSE41() || !Op->hasOneUse())
+ return SDValue();
+
+ SmallVector<SDValue, 8> VecIns;
+ if (!matchBitOpReduction(Op, ISD::OR, VecIns))
+ return SDValue();
+ // Quit if not 128/256-bit vector.
+ EVT VT = VecIns[0].getValueType();
+ if (!VT.is128BitVector() && !VT.is256BitVector())
+ return SDValue();
+
+ SDLoc DL(Op);
MVT TestVT = VT.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
// Cast all vectors into TestVT for PTEST.
@@ -18822,10 +19461,9 @@ static SDValue LowerVectorAllZeroTest(SDValue Op, ISD::CondCode CC,
VecIns.push_back(DAG.getNode(ISD::OR, DL, TestVT, LHS, RHS));
}
- X86CC = DAG.getConstant(CC == ISD::SETEQ ? X86::COND_E : X86::COND_NE,
- DL, MVT::i8);
- return DAG.getNode(X86ISD::PTEST, DL, MVT::i32,
- VecIns.back(), VecIns.back());
+ X86CC = DAG.getConstant(CC == ISD::SETEQ ? X86::COND_E : X86::COND_NE, DL,
+ MVT::i8);
+ return DAG.getNode(X86ISD::PTEST, DL, MVT::i32, VecIns.back(), VecIns.back());
}
/// return true if \c Op has a use that doesn't just read flags.
@@ -18963,29 +19601,52 @@ SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
if (isNullConstant(Op1))
return EmitTest(Op0, X86CC, dl, DAG, Subtarget);
- if ((Op0.getValueType() == MVT::i8 || Op0.getValueType() == MVT::i16 ||
- Op0.getValueType() == MVT::i32 || Op0.getValueType() == MVT::i64)) {
- // Only promote the compare up to I32 if it is a 16 bit operation
- // with an immediate. 16 bit immediates are to be avoided.
- if (Op0.getValueType() == MVT::i16 &&
- ((isa<ConstantSDNode>(Op0) &&
- !cast<ConstantSDNode>(Op0)->getAPIntValue().isSignedIntN(8)) ||
- (isa<ConstantSDNode>(Op1) &&
- !cast<ConstantSDNode>(Op1)->getAPIntValue().isSignedIntN(8))) &&
- !DAG.getMachineFunction().getFunction().optForMinSize() &&
- !Subtarget.isAtom()) {
+ EVT CmpVT = Op0.getValueType();
+
+ if (CmpVT.isFloatingPoint())
+ return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op0, Op1);
+
+ assert((CmpVT == MVT::i8 || CmpVT == MVT::i16 ||
+ CmpVT == MVT::i32 || CmpVT == MVT::i64) && "Unexpected VT!");
+
+ // Only promote the compare up to I32 if it is a 16 bit operation
+ // with an immediate. 16 bit immediates are to be avoided.
+ if (CmpVT == MVT::i16 && !Subtarget.isAtom() &&
+ !DAG.getMachineFunction().getFunction().hasMinSize()) {
+ ConstantSDNode *COp0 = dyn_cast<ConstantSDNode>(Op0);
+ ConstantSDNode *COp1 = dyn_cast<ConstantSDNode>(Op1);
+ // Don't do this if the immediate can fit in 8-bits.
+ if ((COp0 && !COp0->getAPIntValue().isSignedIntN(8)) ||
+ (COp1 && !COp1->getAPIntValue().isSignedIntN(8))) {
unsigned ExtendOp =
isX86CCUnsigned(X86CC) ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
- Op0 = DAG.getNode(ExtendOp, dl, MVT::i32, Op0);
- Op1 = DAG.getNode(ExtendOp, dl, MVT::i32, Op1);
+ if (X86CC == X86::COND_E || X86CC == X86::COND_NE) {
+ // For equality comparisons try to use SIGN_EXTEND if the input was
+ // truncate from something with enough sign bits.
+ if (Op0.getOpcode() == ISD::TRUNCATE) {
+ SDValue In = Op0.getOperand(0);
+ unsigned EffBits =
+ In.getScalarValueSizeInBits() - DAG.ComputeNumSignBits(In) + 1;
+ if (EffBits <= 16)
+ ExtendOp = ISD::SIGN_EXTEND;
+ } else if (Op1.getOpcode() == ISD::TRUNCATE) {
+ SDValue In = Op1.getOperand(0);
+ unsigned EffBits =
+ In.getScalarValueSizeInBits() - DAG.ComputeNumSignBits(In) + 1;
+ if (EffBits <= 16)
+ ExtendOp = ISD::SIGN_EXTEND;
+ }
+ }
+
+ CmpVT = MVT::i32;
+ Op0 = DAG.getNode(ExtendOp, dl, CmpVT, Op0);
+ Op1 = DAG.getNode(ExtendOp, dl, CmpVT, Op1);
}
- // Use SUB instead of CMP to enable CSE between SUB and CMP.
- SDVTList VTs = DAG.getVTList(Op0.getValueType(), MVT::i32);
- SDValue Sub = DAG.getNode(X86ISD::SUB, dl, VTs, Op0, Op1);
- return SDValue(Sub.getNode(), 1);
}
- assert(Op0.getValueType().isFloatingPoint() && "Unexpected VT!");
- return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op0, Op1);
+ // Use SUB instead of CMP to enable CSE between SUB and CMP.
+ SDVTList VTs = DAG.getVTList(CmpVT, MVT::i32);
+ SDValue Sub = DAG.getNode(X86ISD::SUB, dl, VTs, Op0, Op1);
+ return Sub.getValue(1);
}
/// Convert a comparison if required by the subtarget.
@@ -19146,7 +19807,7 @@ static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC,
} else {
// Use BT if the immediate can't be encoded in a TEST instruction or we
// are optimizing for size and the immedaite won't fit in a byte.
- bool OptForSize = DAG.getMachineFunction().getFunction().optForSize();
+ bool OptForSize = DAG.getMachineFunction().getFunction().hasOptSize();
if ((!isUInt<32>(AndRHSVal) || (OptForSize && !isUInt<8>(AndRHSVal))) &&
isPowerOf2_64(AndRHSVal)) {
Src = AndLHS;
@@ -19290,10 +19951,11 @@ static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG) {
return DAG.getSetCC(dl, VT, Op0, Op1, SetCCOpcode);
}
-/// Given a simple buildvector constant, return a new vector constant with each
-/// element decremented. If decrementing would result in underflow or this
-/// is not a simple vector constant, return an empty value.
-static SDValue decrementVectorConstant(SDValue V, SelectionDAG &DAG) {
+/// Given a buildvector constant, return a new vector constant with each element
+/// incremented or decremented. If incrementing or decrementing would result in
+/// unsigned overflow or underflow or this is not a simple vector constant,
+/// return an empty value.
+static SDValue incDecVectorConstant(SDValue V, SelectionDAG &DAG, bool IsInc) {
auto *BV = dyn_cast<BuildVectorSDNode>(V.getNode());
if (!BV)
return SDValue();
@@ -19308,11 +19970,12 @@ static SDValue decrementVectorConstant(SDValue V, SelectionDAG &DAG) {
if (!Elt || Elt->isOpaque() || Elt->getSimpleValueType(0) != EltVT)
return SDValue();
- // Avoid underflow.
- if (Elt->getAPIntValue().isNullValue())
+ // Avoid overflow/underflow.
+ const APInt &EltC = Elt->getAPIntValue();
+ if ((IsInc && EltC.isMaxValue()) || (!IsInc && EltC.isNullValue()))
return SDValue();
- NewVecC.push_back(DAG.getConstant(Elt->getAPIntValue() - 1, DL, EltVT));
+ NewVecC.push_back(DAG.getConstant(EltC + (IsInc ? 1 : -1), DL, EltVT));
}
return DAG.getBuildVector(VT, DL, NewVecC);
@@ -19344,12 +20007,24 @@ static SDValue LowerVSETCCWithSUBUS(SDValue Op0, SDValue Op1, MVT VT,
// Only do this pre-AVX since vpcmp* is no longer destructive.
if (Subtarget.hasAVX())
return SDValue();
- SDValue ULEOp1 = decrementVectorConstant(Op1, DAG);
+ SDValue ULEOp1 = incDecVectorConstant(Op1, DAG, false);
if (!ULEOp1)
return SDValue();
Op1 = ULEOp1;
break;
}
+ case ISD::SETUGT: {
+ // If the comparison is against a constant, we can turn this into a setuge.
+ // This is beneficial because materializing a constant 0 for the PCMPEQ is
+ // probably cheaper than XOR+PCMPGT using 2 different vector constants:
+ // cmpgt (xor X, SignMaskC) CmpC --> cmpeq (usubsat (CmpC+1), X), 0
+ SDValue UGEOp1 = incDecVectorConstant(Op1, DAG, true);
+ if (!UGEOp1)
+ return SDValue();
+ Op1 = Op0;
+ Op0 = UGEOp1;
+ break;
+ }
// Psubus is better than flip-sign because it requires no inversion.
case ISD::SETUGE:
std::swap(Op0, Op1);
@@ -19446,10 +20121,6 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
assert((Subtarget.hasAVX512() || (VT == VTOp0)) &&
"Value types for source and destination must be the same!");
- // Break 256-bit integer vector compare into smaller ones.
- if (VT.is256BitVector() && !Subtarget.hasInt256())
- return Lower256IntVSETCC(Op, DAG);
-
// The result is boolean, but operands are int/float
if (VT.getVectorElementType() == MVT::i1) {
// In AVX-512 architecture setcc returns mask with i1 elements,
@@ -19503,6 +20174,27 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
}
}
+ // ICMP_EQ(AND(X,C),C) -> SRA(SHL(X,LOG2(C)),BW-1) iff C is power-of-2.
+ if (Cond == ISD::SETEQ && Op0.getOpcode() == ISD::AND &&
+ Op0.getOperand(1) == Op1 && Op0.hasOneUse()) {
+ ConstantSDNode *C1 = isConstOrConstSplat(Op1);
+ if (C1 && C1->getAPIntValue().isPowerOf2()) {
+ unsigned BitWidth = VT.getScalarSizeInBits();
+ unsigned ShiftAmt = BitWidth - C1->getAPIntValue().logBase2() - 1;
+
+ SDValue Result = Op0.getOperand(0);
+ Result = DAG.getNode(ISD::SHL, dl, VT, Result,
+ DAG.getConstant(ShiftAmt, dl, VT));
+ Result = DAG.getNode(ISD::SRA, dl, VT, Result,
+ DAG.getConstant(BitWidth - 1, dl, VT));
+ return Result;
+ }
+ }
+
+ // Break 256-bit integer vector compare into smaller ones.
+ if (VT.is256BitVector() && !Subtarget.hasInt256())
+ return Lower256IntVSETCC(Op, DAG);
+
// If this is a SETNE against the signed minimum value, change it to SETGT.
// If this is a SETNE against the signed maximum value, change it to SETLT.
// which will be swapped to SETGT.
@@ -19530,17 +20222,20 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
TLI.isOperationLegal(ISD::UMIN, VT)) {
// If we have a constant operand, increment/decrement it and change the
// condition to avoid an invert.
- // TODO: This could be extended to handle a non-splat constant by checking
- // that each element of the constant is not the max/null value.
- APInt C;
- if (Cond == ISD::SETUGT && isConstantSplat(Op1, C) && !C.isMaxValue()) {
+ if (Cond == ISD::SETUGT &&
+ ISD::matchUnaryPredicate(Op1, [](ConstantSDNode *C) {
+ return !C->getAPIntValue().isMaxValue();
+ })) {
// X > C --> X >= (C+1) --> X == umax(X, C+1)
- Op1 = DAG.getConstant(C + 1, dl, VT);
+ Op1 = DAG.getNode(ISD::ADD, dl, VT, Op1, DAG.getConstant(1, dl, VT));
Cond = ISD::SETUGE;
}
- if (Cond == ISD::SETULT && isConstantSplat(Op1, C) && !C.isNullValue()) {
+ if (Cond == ISD::SETULT &&
+ ISD::matchUnaryPredicate(Op1, [](ConstantSDNode *C) {
+ return !C->getAPIntValue().isNullValue();
+ })) {
// X < C --> X <= (C-1) --> X == umin(X, C-1)
- Op1 = DAG.getConstant(C - 1, dl, VT);
+ Op1 = DAG.getNode(ISD::SUB, dl, VT, Op1, DAG.getConstant(1, dl, VT));
Cond = ISD::SETULE;
}
bool Invert = false;
@@ -19826,7 +20521,7 @@ getX86XALUOOp(X86::CondCode &Cond, SDValue Op, SelectionDAG &DAG) {
break;
case ISD::UADDO:
BaseOp = X86ISD::ADD;
- Cond = X86::COND_B;
+ Cond = isOneConstant(RHS) ? X86::COND_E : X86::COND_B;
break;
case ISD::SSUBO:
BaseOp = X86ISD::SUB;
@@ -19867,6 +20562,7 @@ static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
std::tie(Value, Overflow) = getX86XALUOOp(Cond, Op, DAG);
SDValue SetCC = getSETCC(Cond, Overflow, DL, DAG);
+ assert(Op->getValueType(1) == MVT::i8 && "Unexpected VT!");
return DAG.getNode(ISD::MERGE_VALUES, DL, Op->getVTList(), Value, SetCC);
}
@@ -20036,10 +20732,10 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
if (isNullConstant(Y) &&
(isAllOnesConstant(Op1) == (CondCode == X86::COND_NE))) {
SDValue Zero = DAG.getConstant(0, DL, CmpOp0.getValueType());
- SDValue Cmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32, Zero, CmpOp0);
+ SDValue CmpZero = DAG.getNode(X86ISD::CMP, DL, MVT::i32, Zero, CmpOp0);
SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
Zero = DAG.getConstant(0, DL, Op.getValueType());
- return DAG.getNode(X86ISD::SBB, DL, VTs, Zero, Zero, Cmp);
+ return DAG.getNode(X86ISD::SBB, DL, VTs, Zero, Zero, CmpZero);
}
Cmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32,
@@ -20111,7 +20807,6 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
CC = Cond.getOperand(0);
SDValue Cmp = Cond.getOperand(1);
- unsigned Opc = Cmp.getOpcode();
MVT VT = Op.getSimpleValueType();
bool IllegalFPCMov = false;
@@ -20120,7 +20815,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSExtValue());
if ((isX86LogicalCmp(Cmp) && !IllegalFPCMov) ||
- Opc == X86ISD::BT) { // FIXME
+ Cmp.getOpcode() == X86ISD::BT) { // FIXME
Cond = Cmp;
AddTest = false;
}
@@ -20193,8 +20888,15 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
}
}
- // Promote i16 cmovs if it won't prevent folding a load.
- if (Op.getValueType() == MVT::i16 && !MayFoldLoad(Op1) && !MayFoldLoad(Op2)) {
+ // Or finally, promote i8 cmovs if we have CMOV,
+ // or i16 cmovs if it won't prevent folding a load.
+ // FIXME: we should not limit promotion of i8 case to only when the CMOV is
+ // legal, but EmitLoweredSelect() can not deal with these extensions
+ // being inserted between two CMOV's. (in i16 case too TBN)
+ // https://bugs.llvm.org/show_bug.cgi?id=40974
+ if ((Op.getValueType() == MVT::i8 && Subtarget.hasCMov()) ||
+ (Op.getValueType() == MVT::i16 && !MayFoldLoad(Op1) &&
+ !MayFoldLoad(Op2))) {
Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op1);
Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op2);
SDValue Ops[] = { Op2, Op1, CC, Cond };
@@ -20453,6 +21155,76 @@ static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget &Subtarget,
return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);
}
+/// Change a vector store into a pair of half-size vector stores.
+static SDValue splitVectorStore(StoreSDNode *Store, SelectionDAG &DAG) {
+ SDValue StoredVal = Store->getValue();
+ assert((StoredVal.getValueType().is256BitVector() ||
+ StoredVal.getValueType().is512BitVector()) &&
+ "Expecting 256/512-bit op");
+
+ // Splitting volatile memory ops is not allowed unless the operation was not
+ // legal to begin with. We are assuming the input op is legal (this transform
+ // is only used for targets with AVX).
+ if (Store->isVolatile())
+ return SDValue();
+
+ MVT StoreVT = StoredVal.getSimpleValueType();
+ unsigned NumElems = StoreVT.getVectorNumElements();
+ unsigned HalfSize = StoredVal.getValueSizeInBits() / 2;
+ unsigned HalfAlign = (128 == HalfSize ? 16 : 32);
+
+ SDLoc DL(Store);
+ SDValue Value0 = extractSubVector(StoredVal, 0, DAG, DL, HalfSize);
+ SDValue Value1 = extractSubVector(StoredVal, NumElems / 2, DAG, DL, HalfSize);
+ SDValue Ptr0 = Store->getBasePtr();
+ SDValue Ptr1 = DAG.getMemBasePlusOffset(Ptr0, HalfAlign, DL);
+ unsigned Alignment = Store->getAlignment();
+ SDValue Ch0 =
+ DAG.getStore(Store->getChain(), DL, Value0, Ptr0, Store->getPointerInfo(),
+ Alignment, Store->getMemOperand()->getFlags());
+ SDValue Ch1 = DAG.getStore(Store->getChain(), DL, Value1, Ptr1,
+ Store->getPointerInfo().getWithOffset(HalfAlign),
+ MinAlign(Alignment, HalfAlign),
+ Store->getMemOperand()->getFlags());
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Ch0, Ch1);
+}
+
+/// Scalarize a vector store, bitcasting to TargetVT to determine the scalar
+/// type.
+static SDValue scalarizeVectorStore(StoreSDNode *Store, MVT StoreVT,
+ SelectionDAG &DAG) {
+ SDValue StoredVal = Store->getValue();
+ assert(StoreVT.is128BitVector() &&
+ StoredVal.getValueType().is128BitVector() && "Expecting 128-bit op");
+ StoredVal = DAG.getBitcast(StoreVT, StoredVal);
+
+ // Splitting volatile memory ops is not allowed unless the operation was not
+ // legal to begin with. We are assuming the input op is legal (this transform
+ // is only used for targets with AVX).
+ if (Store->isVolatile())
+ return SDValue();
+
+ MVT StoreSVT = StoreVT.getScalarType();
+ unsigned NumElems = StoreVT.getVectorNumElements();
+ unsigned ScalarSize = StoreSVT.getStoreSize();
+ unsigned Alignment = Store->getAlignment();
+
+ SDLoc DL(Store);
+ SmallVector<SDValue, 4> Stores;
+ for (unsigned i = 0; i != NumElems; ++i) {
+ unsigned Offset = i * ScalarSize;
+ SDValue Ptr = DAG.getMemBasePlusOffset(Store->getBasePtr(), Offset, DL);
+ SDValue Scl = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, StoreSVT, StoredVal,
+ DAG.getIntPtrConstant(i, DL));
+ SDValue Ch = DAG.getStore(Store->getChain(), DL, Scl, Ptr,
+ Store->getPointerInfo().getWithOffset(Offset),
+ MinAlign(Alignment, Offset),
+ Store->getMemOperand()->getFlags());
+ Stores.push_back(Ch);
+ }
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
+}
+
static SDValue LowerStore(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
StoreSDNode *St = cast<StoreSDNode>(Op.getNode());
@@ -20482,28 +21254,47 @@ static SDValue LowerStore(SDValue Op, const X86Subtarget &Subtarget,
if (St->isTruncatingStore())
return SDValue();
+ // If this is a 256-bit store of concatenated ops, we are better off splitting
+ // that store into two 128-bit stores. This avoids spurious use of 256-bit ops
+ // and each half can execute independently. Some cores would split the op into
+ // halves anyway, so the concat (vinsertf128) is purely an extra op.
MVT StoreVT = StoredVal.getSimpleValueType();
+ if (StoreVT.is256BitVector()) {
+ SmallVector<SDValue, 4> CatOps;
+ if (StoredVal.hasOneUse() && collectConcatOps(StoredVal.getNode(), CatOps))
+ return splitVectorStore(St, DAG);
+ return SDValue();
+ }
+
assert(StoreVT.isVector() && StoreVT.getSizeInBits() == 64 &&
"Unexpected VT");
if (DAG.getTargetLoweringInfo().getTypeAction(*DAG.getContext(), StoreVT) !=
TargetLowering::TypeWidenVector)
return SDValue();
- // Widen the vector, cast to a v2x64 type, extract the single 64-bit element
- // and store it.
MVT WideVT = MVT::getVectorVT(StoreVT.getVectorElementType(),
StoreVT.getVectorNumElements() * 2);
StoredVal = DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, StoredVal,
DAG.getUNDEF(StoreVT));
- MVT StVT = Subtarget.is64Bit() && StoreVT.isInteger() ? MVT::i64 : MVT::f64;
- MVT CastVT = MVT::getVectorVT(StVT, 2);
- StoredVal = DAG.getBitcast(CastVT, StoredVal);
- StoredVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, StVT, StoredVal,
- DAG.getIntPtrConstant(0, dl));
- return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),
- St->getPointerInfo(), St->getAlignment(),
- St->getMemOperand()->getFlags());
+ if (Subtarget.hasSSE2()) {
+ // Widen the vector, cast to a v2x64 type, extract the single 64-bit element
+ // and store it.
+ MVT StVT = Subtarget.is64Bit() && StoreVT.isInteger() ? MVT::i64 : MVT::f64;
+ MVT CastVT = MVT::getVectorVT(StVT, 2);
+ StoredVal = DAG.getBitcast(CastVT, StoredVal);
+ StoredVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, StVT, StoredVal,
+ DAG.getIntPtrConstant(0, dl));
+
+ return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),
+ St->getPointerInfo(), St->getAlignment(),
+ St->getMemOperand()->getFlags());
+ }
+ assert(Subtarget.hasSSE1() && "Expected SSE");
+ SDVTList Tys = DAG.getVTList(MVT::Other);
+ SDValue Ops[] = {St->getChain(), StoredVal, St->getBasePtr()};
+ return DAG.getMemIntrinsicNode(X86ISD::VEXTRACT_STORE, dl, Tys, Ops, MVT::i64,
+ St->getMemOperand());
}
// Lower vector extended loads using a shuffle. If SSSE3 is not available we
@@ -20694,13 +21485,13 @@ static SDValue LowerLoad(SDValue Op, const X86Subtarget &Subtarget,
unsigned SizeRatio = RegSz / MemSz;
if (Ext == ISD::SEXTLOAD) {
- SDValue Sext = getExtendInVec(/*Signed*/true, dl, RegVT, SlicedVec, DAG);
+ SDValue Sext = getExtendInVec(ISD::SIGN_EXTEND, dl, RegVT, SlicedVec, DAG);
return DAG.getMergeValues({Sext, TF}, dl);
}
if (Ext == ISD::EXTLOAD && !Subtarget.hasBWI() && RegVT == MVT::v8i64 &&
MemVT == MVT::v8i8) {
- SDValue Sext = getExtendInVec(/*Signed*/false, dl, RegVT, SlicedVec, DAG);
+ SDValue Sext = getExtendInVec(ISD::ZERO_EXTEND, dl, RegVT, SlicedVec, DAG);
return DAG.getMergeValues({Sext, TF}, dl);
}
@@ -21240,42 +22031,41 @@ static SDValue getTargetVShiftByConstNode(unsigned Opc, const SDLoc &dl, MVT VT,
if (ISD::isBuildVectorOfConstantSDNodes(SrcOp.getNode())) {
SmallVector<SDValue, 8> Elts;
unsigned NumElts = SrcOp->getNumOperands();
- ConstantSDNode *ND;
- switch(Opc) {
+ switch (Opc) {
default: llvm_unreachable("Unknown opcode!");
case X86ISD::VSHLI:
- for (unsigned i=0; i!=NumElts; ++i) {
+ for (unsigned i = 0; i != NumElts; ++i) {
SDValue CurrentOp = SrcOp->getOperand(i);
if (CurrentOp->isUndef()) {
Elts.push_back(CurrentOp);
continue;
}
- ND = cast<ConstantSDNode>(CurrentOp);
+ auto *ND = cast<ConstantSDNode>(CurrentOp);
const APInt &C = ND->getAPIntValue();
Elts.push_back(DAG.getConstant(C.shl(ShiftAmt), dl, ElementType));
}
break;
case X86ISD::VSRLI:
- for (unsigned i=0; i!=NumElts; ++i) {
+ for (unsigned i = 0; i != NumElts; ++i) {
SDValue CurrentOp = SrcOp->getOperand(i);
if (CurrentOp->isUndef()) {
Elts.push_back(CurrentOp);
continue;
}
- ND = cast<ConstantSDNode>(CurrentOp);
+ auto *ND = cast<ConstantSDNode>(CurrentOp);
const APInt &C = ND->getAPIntValue();
Elts.push_back(DAG.getConstant(C.lshr(ShiftAmt), dl, ElementType));
}
break;
case X86ISD::VSRAI:
- for (unsigned i=0; i!=NumElts; ++i) {
+ for (unsigned i = 0; i != NumElts; ++i) {
SDValue CurrentOp = SrcOp->getOperand(i);
if (CurrentOp->isUndef()) {
Elts.push_back(CurrentOp);
continue;
}
- ND = cast<ConstantSDNode>(CurrentOp);
+ auto *ND = cast<ConstantSDNode>(CurrentOp);
const APInt &C = ND->getAPIntValue();
Elts.push_back(DAG.getConstant(C.ashr(ShiftAmt), dl, ElementType));
}
@@ -21443,7 +22233,7 @@ static SDValue getScalarMaskingNode(SDValue Op, SDValue Mask,
DAG.getBitcast(MVT::v8i1, Mask),
DAG.getIntPtrConstant(0, dl));
if (Op.getOpcode() == X86ISD::FSETCCM ||
- Op.getOpcode() == X86ISD::FSETCCM_RND ||
+ Op.getOpcode() == X86ISD::FSETCCM_SAE ||
Op.getOpcode() == X86ISD::VFPCLASSS)
return DAG.getNode(ISD::AND, dl, VT, Op, IMask);
@@ -21517,11 +22307,31 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
// Helper to detect if the operand is CUR_DIRECTION rounding mode.
auto isRoundModeCurDirection = [](SDValue Rnd) {
- if (!isa<ConstantSDNode>(Rnd))
- return false;
+ if (auto *C = dyn_cast<ConstantSDNode>(Rnd))
+ return C->getZExtValue() == X86::STATIC_ROUNDING::CUR_DIRECTION;
+
+ return false;
+ };
+ auto isRoundModeSAE = [](SDValue Rnd) {
+ if (auto *C = dyn_cast<ConstantSDNode>(Rnd))
+ return C->getZExtValue() == X86::STATIC_ROUNDING::NO_EXC;
- unsigned Round = cast<ConstantSDNode>(Rnd)->getZExtValue();
- return Round == X86::STATIC_ROUNDING::CUR_DIRECTION;
+ return false;
+ };
+ auto isRoundModeSAEToX = [](SDValue Rnd, unsigned &RC) {
+ if (auto *C = dyn_cast<ConstantSDNode>(Rnd)) {
+ RC = C->getZExtValue();
+ if (RC & X86::STATIC_ROUNDING::NO_EXC) {
+ // Clear the NO_EXC bit and check remaining bits.
+ RC ^= X86::STATIC_ROUNDING::NO_EXC;
+ return RC == X86::STATIC_ROUNDING::TO_NEAREST_INT ||
+ RC == X86::STATIC_ROUNDING::TO_NEG_INF ||
+ RC == X86::STATIC_ROUNDING::TO_POS_INF ||
+ RC == X86::STATIC_ROUNDING::TO_ZERO;
+ }
+ }
+
+ return false;
};
SDLoc dl(Op);
@@ -21537,13 +22347,29 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
if (IntrWithRoundingModeOpcode != 0) {
SDValue Rnd = Op.getOperand(2);
- if (!isRoundModeCurDirection(Rnd)) {
+ unsigned RC = 0;
+ if (isRoundModeSAEToX(Rnd, RC))
return DAG.getNode(IntrWithRoundingModeOpcode, dl, Op.getValueType(),
- Op.getOperand(1), Rnd);
- }
+ Op.getOperand(1),
+ DAG.getTargetConstant(RC, dl, MVT::i32));
+ if (!isRoundModeCurDirection(Rnd))
+ return SDValue();
}
return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1));
}
+ case INTR_TYPE_1OP_SAE: {
+ SDValue Sae = Op.getOperand(2);
+
+ unsigned Opc;
+ if (isRoundModeCurDirection(Sae))
+ Opc = IntrData->Opc0;
+ else if (isRoundModeSAE(Sae))
+ Opc = IntrData->Opc1;
+ else
+ return SDValue();
+
+ return DAG.getNode(Opc, dl, Op.getValueType(), Op.getOperand(1));
+ }
case INTR_TYPE_2OP: {
SDValue Src2 = Op.getOperand(2);
@@ -21553,15 +22379,32 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
if (IntrWithRoundingModeOpcode != 0) {
SDValue Rnd = Op.getOperand(3);
- if (!isRoundModeCurDirection(Rnd)) {
+ unsigned RC = 0;
+ if (isRoundModeSAEToX(Rnd, RC))
return DAG.getNode(IntrWithRoundingModeOpcode, dl, Op.getValueType(),
- Op.getOperand(1), Src2, Rnd);
- }
+ Op.getOperand(1), Src2,
+ DAG.getTargetConstant(RC, dl, MVT::i32));
+ if (!isRoundModeCurDirection(Rnd))
+ return SDValue();
}
return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(),
Op.getOperand(1), Src2);
}
+ case INTR_TYPE_2OP_SAE: {
+ SDValue Sae = Op.getOperand(3);
+
+ unsigned Opc;
+ if (isRoundModeCurDirection(Sae))
+ Opc = IntrData->Opc0;
+ else if (isRoundModeSAE(Sae))
+ Opc = IntrData->Opc1;
+ else
+ return SDValue();
+
+ return DAG.getNode(Opc, dl, Op.getValueType(), Op.getOperand(1),
+ Op.getOperand(2));
+ }
case INTR_TYPE_3OP:
case INTR_TYPE_3OP_IMM8: {
SDValue Src1 = Op.getOperand(1);
@@ -21577,11 +22420,13 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
if (IntrWithRoundingModeOpcode != 0) {
SDValue Rnd = Op.getOperand(4);
- if (!isRoundModeCurDirection(Rnd)) {
- return DAG.getNode(IntrWithRoundingModeOpcode,
- dl, Op.getValueType(),
- Src1, Src2, Src3, Rnd);
- }
+ unsigned RC = 0;
+ if (isRoundModeSAEToX(Rnd, RC))
+ return DAG.getNode(IntrWithRoundingModeOpcode, dl, Op.getValueType(),
+ Src1, Src2, Src3,
+ DAG.getTargetConstant(RC, dl, MVT::i32));
+ if (!isRoundModeCurDirection(Rnd))
+ return SDValue();
}
return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(),
@@ -21590,44 +22435,45 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case INTR_TYPE_4OP:
return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(4));
- case INTR_TYPE_1OP_MASK_RM: {
- SDValue Src = Op.getOperand(1);
- SDValue PassThru = Op.getOperand(2);
- SDValue Mask = Op.getOperand(3);
- SDValue RoundingMode;
- // We always add rounding mode to the Node.
- // If the rounding mode is not specified, we add the
- // "current direction" mode.
- if (Op.getNumOperands() == 4)
- RoundingMode =
- DAG.getConstant(X86::STATIC_ROUNDING::CUR_DIRECTION, dl, MVT::i32);
- else
- RoundingMode = Op.getOperand(4);
- assert(IntrData->Opc1 == 0 && "Unexpected second opcode!");
- return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src,
- RoundingMode),
- Mask, PassThru, Subtarget, DAG);
- }
case INTR_TYPE_1OP_MASK: {
SDValue Src = Op.getOperand(1);
SDValue PassThru = Op.getOperand(2);
SDValue Mask = Op.getOperand(3);
// We add rounding mode to the Node when
- // - RM Opcode is specified and
- // - RM is not "current direction".
+ // - RC Opcode is specified and
+ // - RC is not "current direction".
unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
if (IntrWithRoundingModeOpcode != 0) {
SDValue Rnd = Op.getOperand(4);
- if (!isRoundModeCurDirection(Rnd)) {
- return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
- dl, Op.getValueType(),
- Src, Rnd),
- Mask, PassThru, Subtarget, DAG);
- }
+ unsigned RC = 0;
+ if (isRoundModeSAEToX(Rnd, RC))
+ return getVectorMaskingNode(
+ DAG.getNode(IntrWithRoundingModeOpcode, dl, Op.getValueType(),
+ Src, DAG.getTargetConstant(RC, dl, MVT::i32)),
+ Mask, PassThru, Subtarget, DAG);
+ if (!isRoundModeCurDirection(Rnd))
+ return SDValue();
}
return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src),
Mask, PassThru, Subtarget, DAG);
}
+ case INTR_TYPE_1OP_MASK_SAE: {
+ SDValue Src = Op.getOperand(1);
+ SDValue PassThru = Op.getOperand(2);
+ SDValue Mask = Op.getOperand(3);
+ SDValue Rnd = Op.getOperand(4);
+
+ unsigned Opc;
+ if (isRoundModeCurDirection(Rnd))
+ Opc = IntrData->Opc0;
+ else if (isRoundModeSAE(Rnd))
+ Opc = IntrData->Opc1;
+ else
+ return SDValue();
+
+ return getVectorMaskingNode(DAG.getNode(Opc, dl, VT, Src),
+ Mask, PassThru, Subtarget, DAG);
+ }
case INTR_TYPE_SCALAR_MASK: {
SDValue Src1 = Op.getOperand(1);
SDValue Src2 = Op.getOperand(2);
@@ -21641,10 +22487,14 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
if (Op.getNumOperands() == (5U + HasRounding)) {
if (HasRounding) {
SDValue Rnd = Op.getOperand(5);
+ unsigned RC = 0;
+ if (isRoundModeSAEToX(Rnd, RC))
+ return getScalarMaskingNode(
+ DAG.getNode(IntrWithRoundingModeOpcode, dl, VT, Src1, Src2,
+ DAG.getTargetConstant(RC, dl, MVT::i32)),
+ Mask, passThru, Subtarget, DAG);
if (!isRoundModeCurDirection(Rnd))
- return getScalarMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
- dl, VT, Src1, Src2, Rnd),
- Mask, passThru, Subtarget, DAG);
+ return SDValue();
}
return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1,
Src2),
@@ -21654,123 +22504,138 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
assert(Op.getNumOperands() == (6U + HasRounding) &&
"Unexpected intrinsic form");
SDValue RoundingMode = Op.getOperand(5);
+ unsigned Opc = IntrData->Opc0;
if (HasRounding) {
SDValue Sae = Op.getOperand(6);
- if (!isRoundModeCurDirection(Sae))
- return getScalarMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
- dl, VT, Src1, Src2,
- RoundingMode, Sae),
- Mask, passThru, Subtarget, DAG);
+ if (isRoundModeSAE(Sae))
+ Opc = IntrWithRoundingModeOpcode;
+ else if (!isRoundModeCurDirection(Sae))
+ return SDValue();
}
- return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1,
+ return getScalarMaskingNode(DAG.getNode(Opc, dl, VT, Src1,
Src2, RoundingMode),
Mask, passThru, Subtarget, DAG);
}
- case INTR_TYPE_SCALAR_MASK_RM: {
+ case INTR_TYPE_SCALAR_MASK_RND: {
SDValue Src1 = Op.getOperand(1);
SDValue Src2 = Op.getOperand(2);
- SDValue Src0 = Op.getOperand(3);
+ SDValue passThru = Op.getOperand(3);
SDValue Mask = Op.getOperand(4);
- // There are 2 kinds of intrinsics in this group:
- // (1) With suppress-all-exceptions (sae) or rounding mode- 6 operands
- // (2) With rounding mode and sae - 7 operands.
- if (Op.getNumOperands() == 6) {
- SDValue Sae = Op.getOperand(5);
- return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2,
- Sae),
- Mask, Src0, Subtarget, DAG);
- }
- assert(Op.getNumOperands() == 7 && "Unexpected intrinsic form");
- SDValue RoundingMode = Op.getOperand(5);
- SDValue Sae = Op.getOperand(6);
- return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2,
- RoundingMode, Sae),
- Mask, Src0, Subtarget, DAG);
+ SDValue Rnd = Op.getOperand(5);
+
+ SDValue NewOp;
+ unsigned RC = 0;
+ if (isRoundModeCurDirection(Rnd))
+ NewOp = DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2);
+ else if (isRoundModeSAEToX(Rnd, RC))
+ NewOp = DAG.getNode(IntrData->Opc1, dl, VT, Src1, Src2,
+ DAG.getTargetConstant(RC, dl, MVT::i32));
+ else
+ return SDValue();
+
+ return getScalarMaskingNode(NewOp, Mask, passThru, Subtarget, DAG);
+ }
+ case INTR_TYPE_SCALAR_MASK_SAE: {
+ SDValue Src1 = Op.getOperand(1);
+ SDValue Src2 = Op.getOperand(2);
+ SDValue passThru = Op.getOperand(3);
+ SDValue Mask = Op.getOperand(4);
+ SDValue Sae = Op.getOperand(5);
+ unsigned Opc;
+ if (isRoundModeCurDirection(Sae))
+ Opc = IntrData->Opc0;
+ else if (isRoundModeSAE(Sae))
+ Opc = IntrData->Opc1;
+ else
+ return SDValue();
+
+ return getScalarMaskingNode(DAG.getNode(Opc, dl, VT, Src1, Src2),
+ Mask, passThru, Subtarget, DAG);
}
case INTR_TYPE_2OP_MASK: {
SDValue Src1 = Op.getOperand(1);
SDValue Src2 = Op.getOperand(2);
SDValue PassThru = Op.getOperand(3);
SDValue Mask = Op.getOperand(4);
-
- // We specify 2 possible opcodes for intrinsics with rounding modes.
- // First, we check if the intrinsic may have non-default rounding mode,
- // (IntrData->Opc1 != 0), then we check the rounding mode operand.
- unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
- if (IntrWithRoundingModeOpcode != 0) {
+ SDValue NewOp;
+ if (IntrData->Opc1 != 0) {
SDValue Rnd = Op.getOperand(5);
- if (!isRoundModeCurDirection(Rnd)) {
- return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
- dl, Op.getValueType(),
- Src1, Src2, Rnd),
- Mask, PassThru, Subtarget, DAG);
- }
+ unsigned RC = 0;
+ if (isRoundModeSAEToX(Rnd, RC))
+ NewOp = DAG.getNode(IntrData->Opc1, dl, VT, Src1, Src2,
+ DAG.getTargetConstant(RC, dl, MVT::i32));
+ else if (!isRoundModeCurDirection(Rnd))
+ return SDValue();
}
- // TODO: Intrinsics should have fast-math-flags to propagate.
- return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,Src1,Src2),
- Mask, PassThru, Subtarget, DAG);
+ if (!NewOp)
+ NewOp = DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2);
+ return getVectorMaskingNode(NewOp, Mask, PassThru, Subtarget, DAG);
}
- case INTR_TYPE_2OP_MASK_RM: {
+ case INTR_TYPE_2OP_MASK_SAE: {
SDValue Src1 = Op.getOperand(1);
SDValue Src2 = Op.getOperand(2);
SDValue PassThru = Op.getOperand(3);
SDValue Mask = Op.getOperand(4);
- // We specify 2 possible modes for intrinsics, with/without rounding
- // modes.
- // First, we check if the intrinsic have rounding mode (6 operands),
- // if not, we set rounding mode to "current".
- SDValue Rnd;
- if (Op.getNumOperands() == 6)
- Rnd = Op.getOperand(5);
- else
- Rnd = DAG.getConstant(X86::STATIC_ROUNDING::CUR_DIRECTION, dl, MVT::i32);
- return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
- Src1, Src2, Rnd),
+
+ unsigned Opc = IntrData->Opc0;
+ if (IntrData->Opc1 != 0) {
+ SDValue Sae = Op.getOperand(5);
+ if (isRoundModeSAE(Sae))
+ Opc = IntrData->Opc1;
+ else if (!isRoundModeCurDirection(Sae))
+ return SDValue();
+ }
+
+ return getVectorMaskingNode(DAG.getNode(Opc, dl, VT, Src1, Src2),
Mask, PassThru, Subtarget, DAG);
}
- case INTR_TYPE_3OP_SCALAR_MASK: {
+ case INTR_TYPE_3OP_SCALAR_MASK_SAE: {
SDValue Src1 = Op.getOperand(1);
SDValue Src2 = Op.getOperand(2);
SDValue Src3 = Op.getOperand(3);
SDValue PassThru = Op.getOperand(4);
SDValue Mask = Op.getOperand(5);
+ SDValue Sae = Op.getOperand(6);
+ unsigned Opc;
+ if (isRoundModeCurDirection(Sae))
+ Opc = IntrData->Opc0;
+ else if (isRoundModeSAE(Sae))
+ Opc = IntrData->Opc1;
+ else
+ return SDValue();
- unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
- if (IntrWithRoundingModeOpcode != 0) {
- SDValue Rnd = Op.getOperand(6);
- if (!isRoundModeCurDirection(Rnd))
- return getScalarMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
- dl, VT, Src1, Src2, Src3, Rnd),
- Mask, PassThru, Subtarget, DAG);
- }
- return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1,
- Src2, Src3),
+ return getScalarMaskingNode(DAG.getNode(Opc, dl, VT, Src1, Src2, Src3),
Mask, PassThru, Subtarget, DAG);
}
- case INTR_TYPE_3OP_MASK: {
+ case INTR_TYPE_3OP_MASK_SAE: {
SDValue Src1 = Op.getOperand(1);
SDValue Src2 = Op.getOperand(2);
SDValue Src3 = Op.getOperand(3);
SDValue PassThru = Op.getOperand(4);
SDValue Mask = Op.getOperand(5);
- // We specify 2 possible opcodes for intrinsics with rounding modes.
- // First, we check if the intrinsic may have non-default rounding mode,
- // (IntrData->Opc1 != 0), then we check the rounding mode operand.
- unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
- if (IntrWithRoundingModeOpcode != 0) {
- SDValue Rnd = Op.getOperand(6);
- if (!isRoundModeCurDirection(Rnd)) {
- return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
- dl, Op.getValueType(),
- Src1, Src2, Src3, Rnd),
- Mask, PassThru, Subtarget, DAG);
- }
+ unsigned Opc = IntrData->Opc0;
+ if (IntrData->Opc1 != 0) {
+ SDValue Sae = Op.getOperand(6);
+ if (isRoundModeSAE(Sae))
+ Opc = IntrData->Opc1;
+ else if (!isRoundModeCurDirection(Sae))
+ return SDValue();
}
- return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
- Src1, Src2, Src3),
+ return getVectorMaskingNode(DAG.getNode(Opc, dl, VT, Src1, Src2, Src3),
Mask, PassThru, Subtarget, DAG);
}
+ case BLENDV: {
+ SDValue Src1 = Op.getOperand(1);
+ SDValue Src2 = Op.getOperand(2);
+ SDValue Src3 = Op.getOperand(3);
+
+ EVT MaskVT = Src3.getValueType().changeVectorElementTypeToInteger();
+ Src3 = DAG.getBitcast(MaskVT, Src3);
+
+ // Reverse the operands to match VSELECT order.
+ return DAG.getNode(IntrData->Opc0, dl, VT, Src3, Src2, Src1);
+ }
case VPERM_2OP : {
SDValue Src1 = Op.getOperand(1);
SDValue Src2 = Op.getOperand(2);
@@ -21783,35 +22648,6 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
// first.
return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
- case CVTPD2PS:
- // ISD::FP_ROUND has a second argument that indicates if the truncation
- // does not change the value. Set it to 0 since it can change.
- return DAG.getNode(IntrData->Opc0, dl, VT, Op.getOperand(1),
- DAG.getIntPtrConstant(0, dl));
- case CVTPD2PS_RND_MASK: {
- SDValue Src = Op.getOperand(1);
- SDValue PassThru = Op.getOperand(2);
- SDValue Mask = Op.getOperand(3);
- // We add rounding mode to the Node when
- // - RM Opcode is specified and
- // - RM is not "current direction".
- unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
- if (IntrWithRoundingModeOpcode != 0) {
- SDValue Rnd = Op.getOperand(4);
- if (!isRoundModeCurDirection(Rnd)) {
- return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
- dl, Op.getValueType(),
- Src, Rnd),
- Mask, PassThru, Subtarget, DAG);
- }
- }
- assert(IntrData->Opc0 == ISD::FP_ROUND && "Unexpected opcode!");
- // ISD::FP_ROUND has a second argument that indicates if the truncation
- // does not change the value. Set it to 0 since it can change.
- return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src,
- DAG.getIntPtrConstant(0, dl)),
- Mask, PassThru, Subtarget, DAG);
- }
case FPCLASSS: {
SDValue Src1 = Op.getOperand(1);
SDValue Imm = Op.getOperand(2);
@@ -21829,24 +22665,22 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case CMP_MASK_CC: {
MVT MaskVT = Op.getSimpleValueType();
- SDValue Cmp;
SDValue CC = Op.getOperand(3);
CC = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, CC);
// We specify 2 possible opcodes for intrinsics with rounding modes.
// First, we check if the intrinsic may have non-default rounding mode,
// (IntrData->Opc1 != 0), then we check the rounding mode operand.
if (IntrData->Opc1 != 0) {
- SDValue Rnd = Op.getOperand(4);
- if (!isRoundModeCurDirection(Rnd))
- Cmp = DAG.getNode(IntrData->Opc1, dl, MaskVT, Op.getOperand(1),
- Op.getOperand(2), CC, Rnd);
+ SDValue Sae = Op.getOperand(4);
+ if (isRoundModeSAE(Sae))
+ return DAG.getNode(IntrData->Opc1, dl, MaskVT, Op.getOperand(1),
+ Op.getOperand(2), CC, Sae);
+ if (!isRoundModeCurDirection(Sae))
+ return SDValue();
}
//default rounding mode
- if (!Cmp.getNode())
- Cmp = DAG.getNode(IntrData->Opc0, dl, MaskVT, Op.getOperand(1),
+ return DAG.getNode(IntrData->Opc0, dl, MaskVT, Op.getOperand(1),
Op.getOperand(2), CC);
-
- return Cmp;
}
case CMP_MASK_SCALAR_CC: {
SDValue Src1 = Op.getOperand(1);
@@ -21856,12 +22690,14 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SDValue Cmp;
if (IntrData->Opc1 != 0) {
- SDValue Rnd = Op.getOperand(5);
- if (!isRoundModeCurDirection(Rnd))
- Cmp = DAG.getNode(IntrData->Opc1, dl, MVT::v1i1, Src1, Src2, CC, Rnd);
+ SDValue Sae = Op.getOperand(5);
+ if (isRoundModeSAE(Sae))
+ Cmp = DAG.getNode(IntrData->Opc1, dl, MVT::v1i1, Src1, Src2, CC, Sae);
+ else if (!isRoundModeCurDirection(Sae))
+ return SDValue();
}
//default rounding mode
- if(!Cmp.getNode())
+ if (!Cmp.getNode())
Cmp = DAG.getNode(IntrData->Opc0, dl, MVT::v1i1, Src1, Src2, CC);
SDValue CmpMask = getScalarMaskingNode(Cmp, Mask, SDValue(),
@@ -21921,9 +22757,11 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
if (isRoundModeCurDirection(Sae))
FCmp = DAG.getNode(X86ISD::FSETCCM, dl, MVT::v1i1, LHS, RHS,
DAG.getConstant(CondVal, dl, MVT::i8));
- else
- FCmp = DAG.getNode(X86ISD::FSETCCM_RND, dl, MVT::v1i1, LHS, RHS,
+ else if (isRoundModeSAE(Sae))
+ FCmp = DAG.getNode(X86ISD::FSETCCM_SAE, dl, MVT::v1i1, LHS, RHS,
DAG.getConstant(CondVal, dl, MVT::i8), Sae);
+ else
+ return SDValue();
// Need to fill with zeros to ensure the bitcast will produce zeroes
// for the upper bits. An EXTRACT_ELEMENT here wouldn't guarantee that.
SDValue Ins = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i1,
@@ -21940,41 +22778,42 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SDValue Mask = Op.getOperand(3);
SDValue DataToCompress = Op.getOperand(1);
SDValue PassThru = Op.getOperand(2);
- if (isAllOnesConstant(Mask)) // return data as is
+ if (ISD::isBuildVectorAllOnes(Mask.getNode())) // return data as is
return Op.getOperand(1);
- return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
- DataToCompress),
- Mask, PassThru, Subtarget, DAG);
+ // Avoid false dependency.
+ if (PassThru.isUndef())
+ PassThru = DAG.getConstant(0, dl, VT);
+
+ return DAG.getNode(IntrData->Opc0, dl, VT, DataToCompress, PassThru,
+ Mask);
}
- case FIXUPIMMS:
- case FIXUPIMMS_MASKZ:
case FIXUPIMM:
- case FIXUPIMM_MASKZ:{
+ case FIXUPIMM_MASKZ: {
SDValue Src1 = Op.getOperand(1);
SDValue Src2 = Op.getOperand(2);
SDValue Src3 = Op.getOperand(3);
SDValue Imm = Op.getOperand(4);
SDValue Mask = Op.getOperand(5);
- SDValue Passthru = (IntrData->Type == FIXUPIMM || IntrData->Type == FIXUPIMMS ) ?
- Src1 : getZeroVector(VT, Subtarget, DAG, dl);
- // We specify 2 possible modes for intrinsics, with/without rounding
- // modes.
- // First, we check if the intrinsic have rounding mode (7 operands),
- // if not, we set rounding mode to "current".
- SDValue Rnd;
- if (Op.getNumOperands() == 7)
- Rnd = Op.getOperand(6);
- else
- Rnd = DAG.getConstant(X86::STATIC_ROUNDING::CUR_DIRECTION, dl, MVT::i32);
- if (IntrData->Type == FIXUPIMM || IntrData->Type == FIXUPIMM_MASKZ)
- return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
- Src1, Src2, Src3, Imm, Rnd),
- Mask, Passthru, Subtarget, DAG);
- else // Scalar - FIXUPIMMS, FIXUPIMMS_MASKZ
- return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
- Src1, Src2, Src3, Imm, Rnd),
- Mask, Passthru, Subtarget, DAG);
+ SDValue Passthru = (IntrData->Type == FIXUPIMM)
+ ? Src1
+ : getZeroVector(VT, Subtarget, DAG, dl);
+
+ unsigned Opc = IntrData->Opc0;
+ if (IntrData->Opc1 != 0) {
+ SDValue Sae = Op.getOperand(6);
+ if (isRoundModeSAE(Sae))
+ Opc = IntrData->Opc1;
+ else if (!isRoundModeCurDirection(Sae))
+ return SDValue();
+ }
+
+ SDValue FixupImm = DAG.getNode(Opc, dl, VT, Src1, Src2, Src3, Imm);
+
+ if (Opc == X86ISD::VFIXUPIMM || Opc == X86ISD::VFIXUPIMM_SAE)
+ return getVectorMaskingNode(FixupImm, Mask, Passthru, Subtarget, DAG);
+
+ return getScalarMaskingNode(FixupImm, Mask, Passthru, Subtarget, DAG);
}
case ROUNDP: {
assert(IntrData->Opc0 == X86ISD::VRNDSCALE && "Unexpected opcode");
@@ -22018,7 +22857,8 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getMergeValues(Results, dl);
}
case CVTPD2PS_MASK:
- case CVTPD2I_MASK:
+ case CVTPD2DQ_MASK:
+ case CVTQQ2PS_MASK:
case TRUNCATE_TO_REG: {
SDValue Src = Op.getOperand(1);
SDValue PassThru = Op.getOperand(2);
@@ -22049,6 +22889,21 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
PassThru, Mask);
}
+ case CVTNEPS2BF16_MASK: {
+ SDValue Src = Op.getOperand(1);
+ SDValue PassThru = Op.getOperand(2);
+ SDValue Mask = Op.getOperand(3);
+
+ if (ISD::isBuildVectorAllOnes(Mask.getNode()))
+ return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Src);
+
+ // Break false dependency.
+ if (PassThru.isUndef())
+ PassThru = DAG.getConstant(0, dl, PassThru.getValueType());
+
+ return DAG.getNode(IntrData->Opc1, dl, Op.getValueType(), Src, PassThru,
+ Mask);
+ }
default:
break;
}
@@ -22279,10 +23134,37 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
unsigned Reg;
if (RegInfo->hasBasePointer(MF))
Reg = RegInfo->getBaseRegister();
- else // This function handles the SP or FP case.
- Reg = RegInfo->getPtrSizedFrameRegister(MF);
+ else { // Handles the SP or FP case.
+ bool CantUseFP = RegInfo->needsStackRealignment(MF);
+ if (CantUseFP)
+ Reg = RegInfo->getPtrSizedStackRegister(MF);
+ else
+ Reg = RegInfo->getPtrSizedFrameRegister(MF);
+ }
return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
}
+
+ case Intrinsic::x86_avx512_vp2intersect_q_512:
+ case Intrinsic::x86_avx512_vp2intersect_q_256:
+ case Intrinsic::x86_avx512_vp2intersect_q_128:
+ case Intrinsic::x86_avx512_vp2intersect_d_512:
+ case Intrinsic::x86_avx512_vp2intersect_d_256:
+ case Intrinsic::x86_avx512_vp2intersect_d_128: {
+ MVT MaskVT = Op.getSimpleValueType();
+
+ SDVTList VTs = DAG.getVTList(MVT::Untyped, MVT::Other);
+ SDLoc DL(Op);
+
+ SDValue Operation =
+ DAG.getNode(X86ISD::VP2INTERSECT, DL, VTs,
+ Op->getOperand(1), Op->getOperand(2));
+
+ SDValue Result0 = DAG.getTargetExtractSubreg(X86::sub_mask_0, DL,
+ MaskVT, Operation);
+ SDValue Result1 = DAG.getTargetExtractSubreg(X86::sub_mask_1, DL,
+ MaskVT, Operation);
+ return DAG.getMergeValues({Result0, Result1}, DL);
+ }
}
}
@@ -22296,25 +23178,26 @@ static SDValue getAVX2GatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
if (!C)
return SDValue();
SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8);
- EVT MaskVT = Mask.getValueType();
+ EVT MaskVT = Mask.getValueType().changeVectorElementTypeToInteger();
SDVTList VTs = DAG.getVTList(Op.getValueType(), MaskVT, MVT::Other);
- SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32);
- SDValue Segment = DAG.getRegister(0, MVT::i32);
// If source is undef or we know it won't be used, use a zero vector
// to break register dependency.
// TODO: use undef instead and let BreakFalseDeps deal with it?
if (Src.isUndef() || ISD::isBuildVectorAllOnes(Mask.getNode()))
Src = getZeroVector(Op.getSimpleValueType(), Subtarget, DAG, dl);
- SDValue Ops[] = {Src, Base, Scale, Index, Disp, Segment, Mask, Chain};
- SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops);
- SDValue RetOps[] = { SDValue(Res, 0), SDValue(Res, 2) };
- return DAG.getMergeValues(RetOps, dl);
+
+ MemIntrinsicSDNode *MemIntr = cast<MemIntrinsicSDNode>(Op);
+
+ SDValue Ops[] = {Chain, Src, Mask, Base, Index, Scale };
+ SDValue Res = DAG.getTargetMemSDNode<X86MaskedGatherSDNode>(
+ VTs, Ops, dl, MemIntr->getMemoryVT(), MemIntr->getMemOperand());
+ return DAG.getMergeValues({ Res, Res.getValue(2) }, dl);
}
-static SDValue getGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
- SDValue Src, SDValue Mask, SDValue Base,
- SDValue Index, SDValue ScaleOp, SDValue Chain,
- const X86Subtarget &Subtarget) {
+static SDValue getGatherNode(SDValue Op, SelectionDAG &DAG,
+ SDValue Src, SDValue Mask, SDValue Base,
+ SDValue Index, SDValue ScaleOp, SDValue Chain,
+ const X86Subtarget &Subtarget) {
MVT VT = Op.getSimpleValueType();
SDLoc dl(Op);
auto *C = dyn_cast<ConstantSDNode>(ScaleOp);
@@ -22332,17 +23215,18 @@ static SDValue getGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
Mask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
SDVTList VTs = DAG.getVTList(Op.getValueType(), MaskVT, MVT::Other);
- SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32);
- SDValue Segment = DAG.getRegister(0, MVT::i32);
// If source is undef or we know it won't be used, use a zero vector
// to break register dependency.
// TODO: use undef instead and let BreakFalseDeps deal with it?
if (Src.isUndef() || ISD::isBuildVectorAllOnes(Mask.getNode()))
Src = getZeroVector(Op.getSimpleValueType(), Subtarget, DAG, dl);
- SDValue Ops[] = {Src, Mask, Base, Scale, Index, Disp, Segment, Chain};
- SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops);
- SDValue RetOps[] = { SDValue(Res, 0), SDValue(Res, 2) };
- return DAG.getMergeValues(RetOps, dl);
+
+ MemIntrinsicSDNode *MemIntr = cast<MemIntrinsicSDNode>(Op);
+
+ SDValue Ops[] = {Chain, Src, Mask, Base, Index, Scale };
+ SDValue Res = DAG.getTargetMemSDNode<X86MaskedGatherSDNode>(
+ VTs, Ops, dl, MemIntr->getMemoryVT(), MemIntr->getMemOperand());
+ return DAG.getMergeValues({ Res, Res.getValue(2) }, dl);
}
static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
@@ -22355,8 +23239,6 @@ static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
if (!C)
return SDValue();
SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8);
- SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32);
- SDValue Segment = DAG.getRegister(0, MVT::i32);
unsigned MinElts = std::min(Index.getSimpleValueType().getVectorNumElements(),
Src.getSimpleValueType().getVectorNumElements());
MVT MaskVT = MVT::getVectorVT(MVT::i1, MinElts);
@@ -22366,10 +23248,13 @@ static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
if (Mask.getValueType() != MaskVT)
Mask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
+ MemIntrinsicSDNode *MemIntr = cast<MemIntrinsicSDNode>(Op);
+
SDVTList VTs = DAG.getVTList(MaskVT, MVT::Other);
- SDValue Ops[] = {Base, Scale, Index, Disp, Segment, Mask, Src, Chain};
- SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops);
- return SDValue(Res, 1);
+ SDValue Ops[] = {Chain, Src, Mask, Base, Index, Scale};
+ SDValue Res = DAG.getTargetMemSDNode<X86MaskedScatterSDNode>(
+ VTs, Ops, dl, MemIntr->getMemoryVT(), MemIntr->getMemOperand());
+ return Res.getValue(1);
}
static SDValue getPrefetchNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
@@ -22392,24 +23277,37 @@ static SDValue getPrefetchNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
return SDValue(Res, 0);
}
-/// Handles the lowering of builtin intrinsic that return the value
-/// of the extended control register.
-static void getExtendedControlRegister(SDNode *N, const SDLoc &DL,
- SelectionDAG &DAG,
- const X86Subtarget &Subtarget,
- SmallVectorImpl<SDValue> &Results) {
- assert(N->getNumOperands() == 3 && "Unexpected number of operands!");
- SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue LO, HI;
+/// Handles the lowering of builtin intrinsics with chain that return their
+/// value into registers EDX:EAX.
+/// If operand ScrReg is a valid register identifier, then operand 2 of N is
+/// copied to SrcReg. The assumption is that SrcReg is an implicit input to
+/// TargetOpcode.
+/// Returns a Glue value which can be used to add extra copy-from-reg if the
+/// expanded intrinsics implicitly defines extra registers (i.e. not just
+/// EDX:EAX).
+static SDValue expandIntrinsicWChainHelper(SDNode *N, const SDLoc &DL,
+ SelectionDAG &DAG,
+ unsigned TargetOpcode,
+ unsigned SrcReg,
+ const X86Subtarget &Subtarget,
+ SmallVectorImpl<SDValue> &Results) {
+ SDValue Chain = N->getOperand(0);
+ SDValue Glue;
- // The ECX register is used to select the index of the XCR register to
- // return.
- SDValue Chain =
- DAG.getCopyToReg(N->getOperand(0), DL, X86::ECX, N->getOperand(2));
- SDNode *N1 = DAG.getMachineNode(X86::XGETBV, DL, Tys, Chain);
+ if (SrcReg) {
+ assert(N->getNumOperands() == 3 && "Unexpected number of operands!");
+ Chain = DAG.getCopyToReg(Chain, DL, SrcReg, N->getOperand(2), Glue);
+ Glue = Chain.getValue(1);
+ }
+
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue N1Ops[] = {Chain, Glue};
+ SDNode *N1 = DAG.getMachineNode(
+ TargetOpcode, DL, Tys, ArrayRef<SDValue>(N1Ops, Glue.getNode() ? 2 : 1));
Chain = SDValue(N1, 0);
// Reads the content of XCR and returns it in registers EDX:EAX.
+ SDValue LO, HI;
if (Subtarget.is64Bit()) {
LO = DAG.getCopyFromReg(Chain, DL, X86::RAX, MVT::i64, SDValue(N1, 1));
HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::RDX, MVT::i64,
@@ -22420,60 +23318,15 @@ static void getExtendedControlRegister(SDNode *N, const SDLoc &DL,
LO.getValue(2));
}
Chain = HI.getValue(1);
+ Glue = HI.getValue(2);
if (Subtarget.is64Bit()) {
- // Merge the two 32-bit values into a 64-bit one..
+ // Merge the two 32-bit values into a 64-bit one.
SDValue Tmp = DAG.getNode(ISD::SHL, DL, MVT::i64, HI,
DAG.getConstant(32, DL, MVT::i8));
Results.push_back(DAG.getNode(ISD::OR, DL, MVT::i64, LO, Tmp));
Results.push_back(Chain);
- return;
- }
-
- // Use a buildpair to merge the two 32-bit values into a 64-bit one.
- SDValue Ops[] = { LO, HI };
- SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops);
- Results.push_back(Pair);
- Results.push_back(Chain);
-}
-
-/// Handles the lowering of builtin intrinsics that read performance monitor
-/// counters (x86_rdpmc).
-static void getReadPerformanceCounter(SDNode *N, const SDLoc &DL,
- SelectionDAG &DAG,
- const X86Subtarget &Subtarget,
- SmallVectorImpl<SDValue> &Results) {
- assert(N->getNumOperands() == 3 && "Unexpected number of operands!");
- SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue LO, HI;
-
- // The ECX register is used to select the index of the performance counter
- // to read.
- SDValue Chain = DAG.getCopyToReg(N->getOperand(0), DL, X86::ECX,
- N->getOperand(2));
- SDValue rd = DAG.getNode(X86ISD::RDPMC_DAG, DL, Tys, Chain);
-
- // Reads the content of a 64-bit performance counter and returns it in the
- // registers EDX:EAX.
- if (Subtarget.is64Bit()) {
- LO = DAG.getCopyFromReg(rd, DL, X86::RAX, MVT::i64, rd.getValue(1));
- HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::RDX, MVT::i64,
- LO.getValue(2));
- } else {
- LO = DAG.getCopyFromReg(rd, DL, X86::EAX, MVT::i32, rd.getValue(1));
- HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::EDX, MVT::i32,
- LO.getValue(2));
- }
- Chain = HI.getValue(1);
-
- if (Subtarget.is64Bit()) {
- // The EAX register is loaded with the low-order 32 bits. The EDX register
- // is loaded with the supported high-order bits of the counter.
- SDValue Tmp = DAG.getNode(ISD::SHL, DL, MVT::i64, HI,
- DAG.getConstant(32, DL, MVT::i8));
- Results.push_back(DAG.getNode(ISD::OR, DL, MVT::i64, LO, Tmp));
- Results.push_back(Chain);
- return;
+ return Glue;
}
// Use a buildpair to merge the two 32-bit values into a 64-bit one.
@@ -22481,6 +23334,7 @@ static void getReadPerformanceCounter(SDNode *N, const SDLoc &DL,
SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops);
Results.push_back(Pair);
Results.push_back(Chain);
+ return Glue;
}
/// Handles the lowering of builtin intrinsics that read the time stamp counter
@@ -22490,59 +23344,28 @@ static void getReadTimeStampCounter(SDNode *N, const SDLoc &DL, unsigned Opcode,
SelectionDAG &DAG,
const X86Subtarget &Subtarget,
SmallVectorImpl<SDValue> &Results) {
- SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue rd = DAG.getNode(Opcode, DL, Tys, N->getOperand(0));
- SDValue LO, HI;
-
// The processor's time-stamp counter (a 64-bit MSR) is stored into the
// EDX:EAX registers. EDX is loaded with the high-order 32 bits of the MSR
// and the EAX register is loaded with the low-order 32 bits.
- if (Subtarget.is64Bit()) {
- LO = DAG.getCopyFromReg(rd, DL, X86::RAX, MVT::i64, rd.getValue(1));
- HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::RDX, MVT::i64,
- LO.getValue(2));
- } else {
- LO = DAG.getCopyFromReg(rd, DL, X86::EAX, MVT::i32, rd.getValue(1));
- HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::EDX, MVT::i32,
- LO.getValue(2));
- }
- SDValue Chain = HI.getValue(1);
-
- SDValue TSC;
- if (Subtarget.is64Bit()) {
- // The EDX register is loaded with the high-order 32 bits of the MSR, and
- // the EAX register is loaded with the low-order 32 bits.
- TSC = DAG.getNode(ISD::SHL, DL, MVT::i64, HI,
- DAG.getConstant(32, DL, MVT::i8));
- TSC = DAG.getNode(ISD::OR, DL, MVT::i64, LO, TSC);
- } else {
- // Use a buildpair to merge the two 32-bit values into a 64-bit one.
- TSC = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, { LO, HI });
- }
-
- if (Opcode == X86ISD::RDTSCP_DAG) {
- assert(N->getNumOperands() == 2 && "Unexpected number of operands!");
-
- // Instruction RDTSCP loads the IA32:TSC_AUX_MSR (address C000_0103H) into
- // the ECX register. Add 'ecx' explicitly to the chain.
- SDValue ecx = DAG.getCopyFromReg(Chain, DL, X86::ECX, MVT::i32,
- HI.getValue(2));
-
- Results.push_back(TSC);
- Results.push_back(ecx);
- Results.push_back(ecx.getValue(1));
+ SDValue Glue = expandIntrinsicWChainHelper(N, DL, DAG, Opcode,
+ /* NoRegister */0, Subtarget,
+ Results);
+ if (Opcode != X86::RDTSCP)
return;
- }
- Results.push_back(TSC);
- Results.push_back(Chain);
+ SDValue Chain = Results[1];
+ // Instruction RDTSCP loads the IA32:TSC_AUX_MSR (address C000_0103H) into
+ // the ECX register. Add 'ecx' explicitly to the chain.
+ SDValue ecx = DAG.getCopyFromReg(Chain, DL, X86::ECX, MVT::i32, Glue);
+ Results[1] = ecx;
+ Results.push_back(ecx.getValue(1));
}
static SDValue LowerREADCYCLECOUNTER(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SmallVector<SDValue, 3> Results;
SDLoc DL(Op);
- getReadTimeStampCounter(Op.getNode(), DL, X86ISD::RDTSC_DAG, DAG, Subtarget,
+ getReadTimeStampCounter(Op.getNode(), DL, X86::RDTSC, DAG, Subtarget,
Results);
return DAG.getMergeValues(Results, DL);
}
@@ -22621,6 +23444,22 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
return MarkEHRegistrationNode(Op, DAG);
case llvm::Intrinsic::x86_seh_ehguard:
return MarkEHGuard(Op, DAG);
+ case llvm::Intrinsic::x86_rdpkru: {
+ SDLoc dl(Op);
+ SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
+ // Create a RDPKRU node and pass 0 to the ECX parameter.
+ return DAG.getNode(X86ISD::RDPKRU, dl, VTs, Op.getOperand(0),
+ DAG.getConstant(0, dl, MVT::i32));
+ }
+ case llvm::Intrinsic::x86_wrpkru: {
+ SDLoc dl(Op);
+ // Create a WRPKRU node, pass the input to the EAX parameter, and pass 0
+ // to the EDX and ECX parameters.
+ return DAG.getNode(X86ISD::WRPKRU, dl, MVT::Other,
+ Op.getOperand(0), Op.getOperand(2),
+ DAG.getConstant(0, dl, MVT::i32),
+ DAG.getConstant(0, dl, MVT::i32));
+ }
case llvm::Intrinsic::x86_flags_read_u32:
case llvm::Intrinsic::x86_flags_read_u64:
case llvm::Intrinsic::x86_flags_write_u32:
@@ -22630,7 +23469,7 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
MFI.setHasCopyImplyingStackAdjustment(true);
// Don't do anything here, we will expand these intrinsics out later
- // during ExpandISelPseudos in EmitInstrWithCustomInserter.
+ // during FinalizeISel in EmitInstrWithCustomInserter.
return SDValue();
}
case Intrinsic::x86_lwpins32:
@@ -22660,8 +23499,28 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
DAG.getNode(Opcode, dl, VTs, Chain, Op->getOperand(2),
Op->getOperand(3), Op->getOperand(4));
SDValue SetCC = getSETCC(X86::COND_B, Operation.getValue(0), dl, DAG);
- SDValue Result = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i8, SetCC);
- return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Result,
+ return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), SetCC,
+ Operation.getValue(1));
+ }
+ case Intrinsic::x86_enqcmd:
+ case Intrinsic::x86_enqcmds: {
+ SDLoc dl(Op);
+ SDValue Chain = Op.getOperand(0);
+ SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
+ unsigned Opcode;
+ switch (IntNo) {
+ default: llvm_unreachable("Impossible intrinsic!");
+ case Intrinsic::x86_enqcmd:
+ Opcode = X86ISD::ENQCMD;
+ break;
+ case Intrinsic::x86_enqcmds:
+ Opcode = X86ISD::ENQCMDS;
+ break;
+ }
+ SDValue Operation = DAG.getNode(Opcode, dl, VTs, Chain, Op.getOperand(2),
+ Op.getOperand(3));
+ SDValue SetCC = getSETCC(X86::COND_E, Operation.getValue(0), dl, DAG);
+ return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), SetCC,
Operation.getValue(1));
}
}
@@ -22707,7 +23566,7 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
SDValue Index = Op.getOperand(4);
SDValue Mask = Op.getOperand(5);
SDValue Scale = Op.getOperand(6);
- return getGatherNode(IntrData->Opc0, Op, DAG, Src, Mask, Base, Index, Scale,
+ return getGatherNode(Op, DAG, Src, Mask, Base, Index, Scale,
Chain, Subtarget);
}
case SCATTER: {
@@ -22743,15 +23602,16 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
return DAG.getMergeValues(Results, dl);
}
// Read Performance Monitoring Counters.
- case RDPMC: {
- SmallVector<SDValue, 2> Results;
- getReadPerformanceCounter(Op.getNode(), dl, DAG, Subtarget, Results);
- return DAG.getMergeValues(Results, dl);
- }
- // Get Extended Control Register.
+ case RDPMC:
+ // GetExtended Control Register.
case XGETBV: {
SmallVector<SDValue, 2> Results;
- getExtendedControlRegister(Op.getNode(), dl, DAG, Subtarget, Results);
+
+ // RDPMC uses ECX to select the index of the performance counter to read.
+ // XGETBV uses ECX to select the index of the XCR register to return.
+ // The result is stored into registers EDX:EAX.
+ expandIntrinsicWChainHelper(Op.getNode(), dl, DAG, IntrData->Opc0, X86::ECX,
+ Subtarget, Results);
return DAG.getMergeValues(Results, dl);
}
// XTEST intrinsics.
@@ -22861,7 +23721,7 @@ SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
// Set up a frame object for the return address.
unsigned SlotSize = RegInfo->getSlotSize();
FrameAddrIndex = MF.getFrameInfo().CreateFixedObject(
- SlotSize, /*Offset=*/0, /*IsImmutable=*/false);
+ SlotSize, /*SPOffset=*/0, /*IsImmutable=*/false);
FuncInfo->setFAIndex(FrameAddrIndex);
}
return DAG.getFrameIndex(FrameAddrIndex, VT);
@@ -23444,10 +24304,6 @@ static SDValue LowerCTTZ(SDValue Op, const X86Subtarget &Subtarget,
SDValue N0 = Op.getOperand(0);
SDLoc dl(Op);
- // Decompose 256-bit ops into smaller 128-bit ops.
- if (VT.is256BitVector() && !Subtarget.hasInt256())
- return Lower256IntUnary(Op, DAG);
-
assert(!VT.isVector() && Op.getOpcode() == ISD::CTTZ &&
"Only scalar CTTZ requires custom lowering");
@@ -23539,22 +24395,48 @@ static SDValue lowerAddSub(SDValue Op, SelectionDAG &DAG,
return split256IntArith(Op, DAG);
}
-static SDValue LowerADDSAT_SUBSAT(SDValue Op, SelectionDAG &DAG) {
+static SDValue LowerADDSAT_SUBSAT(SDValue Op, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
MVT VT = Op.getSimpleValueType();
+ SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
+ unsigned Opcode = Op.getOpcode();
if (VT.getScalarType() == MVT::i1) {
SDLoc dl(Op);
- switch (Op.getOpcode()) {
+ switch (Opcode) {
default: llvm_unreachable("Expected saturated arithmetic opcode");
case ISD::UADDSAT:
case ISD::SADDSAT:
- return DAG.getNode(ISD::OR, dl, VT, Op.getOperand(0), Op.getOperand(1));
+ // *addsat i1 X, Y --> X | Y
+ return DAG.getNode(ISD::OR, dl, VT, X, Y);
case ISD::USUBSAT:
case ISD::SSUBSAT:
- return DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0),
- DAG.getNOT(dl, Op.getOperand(1), VT));
+ // *subsat i1 X, Y --> X & ~Y
+ return DAG.getNode(ISD::AND, dl, VT, X, DAG.getNOT(dl, Y, VT));
}
}
+ if (VT.is128BitVector()) {
+ // Avoid the generic expansion with min/max if we don't have pminu*/pmaxu*.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT SetCCResultType = TLI.getSetCCResultType(DAG.getDataLayout(),
+ *DAG.getContext(), VT);
+ SDLoc DL(Op);
+ if (Opcode == ISD::UADDSAT && !TLI.isOperationLegal(ISD::UMIN, VT)) {
+ // uaddsat X, Y --> (X >u (X + Y)) ? -1 : X + Y
+ SDValue Add = DAG.getNode(ISD::ADD, DL, VT, X, Y);
+ SDValue Cmp = DAG.getSetCC(DL, SetCCResultType, X, Add, ISD::SETUGT);
+ return DAG.getSelect(DL, VT, Cmp, DAG.getAllOnesConstant(DL, VT), Add);
+ }
+ if (Opcode == ISD::USUBSAT && !TLI.isOperationLegal(ISD::UMAX, VT)) {
+ // usubsat X, Y --> (X >u Y) ? X - Y : 0
+ SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, X, Y);
+ SDValue Cmp = DAG.getSetCC(DL, SetCCResultType, X, Y, ISD::SETUGT);
+ return DAG.getSelect(DL, VT, Cmp, Sub, DAG.getConstant(0, DL, VT));
+ }
+ // Use default expansion.
+ return SDValue();
+ }
+
assert(Op.getSimpleValueType().is256BitVector() &&
Op.getSimpleValueType().isInteger() &&
"Only handle AVX 256-bit vector integer operation");
@@ -23886,9 +24768,6 @@ static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget,
// Signed AVX2 implementation - extend xmm subvectors to ymm.
if (VT == MVT::v32i8 && IsSigned) {
- SDValue Lo = DAG.getIntPtrConstant(0, dl);
- SDValue Hi = DAG.getIntPtrConstant(NumElts / 2, dl);
-
MVT ExVT = MVT::v16i16;
SDValue ALo = extract128BitVector(A, 0, DAG, dl);
SDValue BLo = extract128BitVector(B, 0, DAG, dl);
@@ -23898,8 +24777,8 @@ static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget,
BLo = DAG.getNode(ExAVX, dl, ExVT, BLo);
AHi = DAG.getNode(ExAVX, dl, ExVT, AHi);
BHi = DAG.getNode(ExAVX, dl, ExVT, BHi);
- Lo = DAG.getNode(ISD::MUL, dl, ExVT, ALo, BLo);
- Hi = DAG.getNode(ISD::MUL, dl, ExVT, AHi, BHi);
+ SDValue Lo = DAG.getNode(ISD::MUL, dl, ExVT, ALo, BLo);
+ SDValue Hi = DAG.getNode(ISD::MUL, dl, ExVT, AHi, BHi);
Lo = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExVT, Lo, 8, DAG);
Hi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExVT, Hi, 8, DAG);
@@ -24156,6 +25035,11 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
APInt APIntShiftAmt;
if (!isConstantSplat(Amt, APIntShiftAmt))
return SDValue();
+
+ // If the shift amount is out of range, return undef.
+ if (APIntShiftAmt.uge(VT.getScalarSizeInBits()))
+ return DAG.getUNDEF(VT);
+
uint64_t ShiftAmt = APIntShiftAmt.getZExtValue();
if (SupportedVectorShiftWithImm(VT, Subtarget, Op.getOpcode()))
@@ -24197,8 +25081,8 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
ShiftAmt, DAG);
SHL = DAG.getBitcast(VT, SHL);
// Zero out the rightmost bits.
- return DAG.getNode(ISD::AND, dl, VT, SHL,
- DAG.getConstant(uint8_t(-1U << ShiftAmt), dl, VT));
+ APInt Mask = APInt::getHighBitsSet(8, 8 - ShiftAmt);
+ return DAG.getNode(ISD::AND, dl, VT, SHL, DAG.getConstant(Mask, dl, VT));
}
if (Op.getOpcode() == ISD::SRL) {
// Make a large shift.
@@ -24224,54 +25108,6 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
return SDValue();
}
-// If V is a splat value, return the source vector and splat index;
-static SDValue IsSplatVector(SDValue V, int &SplatIdx, SelectionDAG &DAG) {
- V = peekThroughEXTRACT_SUBVECTORs(V);
-
- EVT VT = V.getValueType();
- unsigned Opcode = V.getOpcode();
- switch (Opcode) {
- default: {
- APInt UndefElts;
- APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements());
- if (DAG.isSplatValue(V, DemandedElts, UndefElts)) {
- // Handle case where all demanded elements are UNDEF.
- if (DemandedElts.isSubsetOf(UndefElts)) {
- SplatIdx = 0;
- return DAG.getUNDEF(VT);
- }
- SplatIdx = (UndefElts & DemandedElts).countTrailingOnes();
- return V;
- }
- break;
- }
- case ISD::VECTOR_SHUFFLE: {
- // Check if this is a shuffle node doing a splat.
- // TODO - remove this and rely purely on SelectionDAG::isSplatValue,
- // getTargetVShiftNode currently struggles without the splat source.
- auto *SVN = cast<ShuffleVectorSDNode>(V);
- if (!SVN->isSplat())
- break;
- int Idx = SVN->getSplatIndex();
- int NumElts = V.getValueType().getVectorNumElements();
- SplatIdx = Idx % NumElts;
- return V.getOperand(Idx / NumElts);
- }
- }
-
- return SDValue();
-}
-
-static SDValue GetSplatValue(SDValue V, const SDLoc &dl,
- SelectionDAG &DAG) {
- int SplatIdx;
- if (SDValue SrcVector = IsSplatVector(V, SplatIdx, DAG))
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
- SrcVector.getValueType().getScalarType(), SrcVector,
- DAG.getIntPtrConstant(SplatIdx, dl));
- return SDValue();
-}
-
static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
MVT VT = Op.getSimpleValueType();
@@ -24282,7 +25118,7 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
unsigned X86OpcI = getTargetVShiftUniformOpcode(Opcode, false);
unsigned X86OpcV = getTargetVShiftUniformOpcode(Opcode, true);
- if (SDValue BaseShAmt = GetSplatValue(Amt, dl, DAG)) {
+ if (SDValue BaseShAmt = DAG.getSplatValue(Amt)) {
if (SupportedVectorShiftWithBaseAmnt(VT, Subtarget, Opcode)) {
MVT EltVT = VT.getVectorElementType();
assert(EltVT.bitsLE(MVT::i64) && "Unexpected element type!");
@@ -25102,24 +25938,45 @@ bool X86TargetLowering::needsCmpXchgNb(Type *MemType) const {
unsigned OpWidth = MemType->getPrimitiveSizeInBits();
if (OpWidth == 64)
- return !Subtarget.is64Bit(); // FIXME this should be Subtarget.hasCmpxchg8b
- else if (OpWidth == 128)
+ return Subtarget.hasCmpxchg8b() && !Subtarget.is64Bit();
+ if (OpWidth == 128)
return Subtarget.hasCmpxchg16b();
- else
- return false;
+
+ return false;
}
+// TODO: In 32-bit mode, use MOVLPS when SSE1 is available?
+// TODO: In 32-bit mode, use FISTP when X87 is available?
bool X86TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
- return needsCmpXchgNb(SI->getValueOperand()->getType());
+ Type *MemType = SI->getValueOperand()->getType();
+
+ bool NoImplicitFloatOps =
+ SI->getFunction()->hasFnAttribute(Attribute::NoImplicitFloat);
+ if (MemType->getPrimitiveSizeInBits() == 64 && !Subtarget.is64Bit() &&
+ !Subtarget.useSoftFloat() && !NoImplicitFloatOps && Subtarget.hasSSE2())
+ return false;
+
+ return needsCmpXchgNb(MemType);
}
// Note: this turns large loads into lock cmpxchg8b/16b.
-// FIXME: On 32 bits x86, fild/movq might be faster than lock cmpxchg8b.
+// TODO: In 32-bit mode, use MOVLPS when SSE1 is available?
TargetLowering::AtomicExpansionKind
X86TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
- auto PTy = cast<PointerType>(LI->getPointerOperandType());
- return needsCmpXchgNb(PTy->getElementType()) ? AtomicExpansionKind::CmpXChg
- : AtomicExpansionKind::None;
+ Type *MemType = LI->getType();
+
+ // If this a 64 bit atomic load on a 32-bit target and SSE2 is enabled, we
+ // can use movq to do the load. If we have X87 we can load into an 80-bit
+ // X87 register and store it to a stack temporary.
+ bool NoImplicitFloatOps =
+ LI->getFunction()->hasFnAttribute(Attribute::NoImplicitFloat);
+ if (MemType->getPrimitiveSizeInBits() == 64 && !Subtarget.is64Bit() &&
+ !Subtarget.useSoftFloat() && !NoImplicitFloatOps &&
+ (Subtarget.hasSSE2() || Subtarget.hasX87()))
+ return AtomicExpansionKind::None;
+
+ return needsCmpXchgNb(MemType) ? AtomicExpansionKind::CmpXChg
+ : AtomicExpansionKind::None;
}
TargetLowering::AtomicExpansionKind
@@ -25155,6 +26012,8 @@ X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
case AtomicRMWInst::Min:
case AtomicRMWInst::UMax:
case AtomicRMWInst::UMin:
+ case AtomicRMWInst::FAdd:
+ case AtomicRMWInst::FSub:
// These always require a non-trivial set of data operations on x86. We must
// use a cmpxchg loop.
return AtomicExpansionKind::CmpXChg;
@@ -25171,13 +26030,20 @@ X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
if (MemType->getPrimitiveSizeInBits() > NativeWidth)
return nullptr;
+ // If this is a canonical idempotent atomicrmw w/no uses, we have a better
+ // lowering available in lowerAtomicArith.
+ // TODO: push more cases through this path.
+ if (auto *C = dyn_cast<ConstantInt>(AI->getValOperand()))
+ if (AI->getOperation() == AtomicRMWInst::Or && C->isZero() &&
+ AI->use_empty())
+ return nullptr;
+
auto Builder = IRBuilder<>(AI);
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
auto SSID = AI->getSyncScopeID();
// We must restrict the ordering to avoid generating loads with Release or
// ReleaseAcquire orderings.
auto Order = AtomicCmpXchgInst::getStrongestFailureOrdering(AI->getOrdering());
- auto Ptr = AI->getPointerOperand();
// Before the load we need a fence. Here is an example lifted from
// http://www.hpl.hp.com/techreports/2012/HPL-2012-68.pdf showing why a fence
@@ -25212,14 +26078,80 @@ X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
Builder.CreateCall(MFence, {});
// Finally we can emit the atomic load.
- LoadInst *Loaded = Builder.CreateAlignedLoad(Ptr,
- AI->getType()->getPrimitiveSizeInBits());
+ LoadInst *Loaded =
+ Builder.CreateAlignedLoad(AI->getType(), AI->getPointerOperand(),
+ AI->getType()->getPrimitiveSizeInBits());
Loaded->setAtomic(Order, SSID);
AI->replaceAllUsesWith(Loaded);
AI->eraseFromParent();
return Loaded;
}
+/// Emit a locked operation on a stack location which does not change any
+/// memory location, but does involve a lock prefix. Location is chosen to be
+/// a) very likely accessed only by a single thread to minimize cache traffic,
+/// and b) definitely dereferenceable. Returns the new Chain result.
+static SDValue emitLockedStackOp(SelectionDAG &DAG,
+ const X86Subtarget &Subtarget,
+ SDValue Chain, SDLoc DL) {
+ // Implementation notes:
+ // 1) LOCK prefix creates a full read/write reordering barrier for memory
+ // operations issued by the current processor. As such, the location
+ // referenced is not relevant for the ordering properties of the instruction.
+ // See: Intel® 64 and IA-32 ArchitecturesSoftware Developer’s Manual,
+ // 8.2.3.9 Loads and Stores Are Not Reordered with Locked Instructions
+ // 2) Using an immediate operand appears to be the best encoding choice
+ // here since it doesn't require an extra register.
+ // 3) OR appears to be very slightly faster than ADD. (Though, the difference
+ // is small enough it might just be measurement noise.)
+ // 4) When choosing offsets, there are several contributing factors:
+ // a) If there's no redzone, we default to TOS. (We could allocate a cache
+ // line aligned stack object to improve this case.)
+ // b) To minimize our chances of introducing a false dependence, we prefer
+ // to offset the stack usage from TOS slightly.
+ // c) To minimize concerns about cross thread stack usage - in particular,
+ // the idiomatic MyThreadPool.run([&StackVars]() {...}) pattern which
+ // captures state in the TOS frame and accesses it from many threads -
+ // we want to use an offset such that the offset is in a distinct cache
+ // line from the TOS frame.
+ //
+ // For a general discussion of the tradeoffs and benchmark results, see:
+ // https://shipilev.net/blog/2014/on-the-fence-with-dependencies/
+
+ auto &MF = DAG.getMachineFunction();
+ auto &TFL = *Subtarget.getFrameLowering();
+ const unsigned SPOffset = TFL.has128ByteRedZone(MF) ? -64 : 0;
+
+ if (Subtarget.is64Bit()) {
+ SDValue Zero = DAG.getTargetConstant(0, DL, MVT::i32);
+ SDValue Ops[] = {
+ DAG.getRegister(X86::RSP, MVT::i64), // Base
+ DAG.getTargetConstant(1, DL, MVT::i8), // Scale
+ DAG.getRegister(0, MVT::i64), // Index
+ DAG.getTargetConstant(SPOffset, DL, MVT::i32), // Disp
+ DAG.getRegister(0, MVT::i16), // Segment.
+ Zero,
+ Chain};
+ SDNode *Res = DAG.getMachineNode(X86::OR32mi8Locked, DL, MVT::i32,
+ MVT::Other, Ops);
+ return SDValue(Res, 1);
+ }
+
+ SDValue Zero = DAG.getTargetConstant(0, DL, MVT::i32);
+ SDValue Ops[] = {
+ DAG.getRegister(X86::ESP, MVT::i32), // Base
+ DAG.getTargetConstant(1, DL, MVT::i8), // Scale
+ DAG.getRegister(0, MVT::i32), // Index
+ DAG.getTargetConstant(SPOffset, DL, MVT::i32), // Disp
+ DAG.getRegister(0, MVT::i16), // Segment.
+ Zero,
+ Chain
+ };
+ SDNode *Res = DAG.getMachineNode(X86::OR32mi8Locked, DL, MVT::i32,
+ MVT::Other, Ops);
+ return SDValue(Res, 1);
+}
+
static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SDLoc dl(Op);
@@ -25235,19 +26167,8 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget &Subtarget,
if (Subtarget.hasMFence())
return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0));
- SDValue Chain = Op.getOperand(0);
- SDValue Zero = DAG.getTargetConstant(0, dl, MVT::i32);
- SDValue Ops[] = {
- DAG.getRegister(X86::ESP, MVT::i32), // Base
- DAG.getTargetConstant(1, dl, MVT::i8), // Scale
- DAG.getRegister(0, MVT::i32), // Index
- DAG.getTargetConstant(0, dl, MVT::i32), // Disp
- DAG.getRegister(0, MVT::i32), // Segment.
- Zero,
- Chain
- };
- SDNode *Res = DAG.getMachineNode(X86::OR32mi8Locked, dl, MVT::Other, Ops);
- return SDValue(Res, 0);
+ SDValue Chain = Op.getOperand(0);
+ return emitLockedStackOp(DAG, Subtarget, Chain, dl);
}
// MEMBARRIER is a compiler barrier; it codegens to a no-op.
@@ -25288,10 +26209,8 @@ static SDValue LowerCMP_SWAP(SDValue Op, const X86Subtarget &Subtarget,
MVT::i32, cpOut.getValue(2));
SDValue Success = getSETCC(X86::COND_E, EFLAGS, DL, DAG);
- DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), cpOut);
- DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
- DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), EFLAGS.getValue(1));
- return SDValue();
+ return DAG.getNode(ISD::MERGE_VALUES, DL, Op->getVTList(),
+ cpOut, Success, EFLAGS.getValue(1));
}
// Create MOVMSKB, taking into account whether we need to split for AVX1.
@@ -25703,6 +26622,7 @@ static SDValue lowerAtomicArithWithLOCK(SDValue N, SelectionDAG &DAG,
/// Lower atomic_load_ops into LOCK-prefixed operations.
static SDValue lowerAtomicArith(SDValue N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
+ AtomicSDNode *AN = cast<AtomicSDNode>(N.getNode());
SDValue Chain = N->getOperand(0);
SDValue LHS = N->getOperand(1);
SDValue RHS = N->getOperand(2);
@@ -25717,7 +26637,6 @@ static SDValue lowerAtomicArith(SDValue N, SelectionDAG &DAG,
// Handle (atomic_load_sub p, v) as (atomic_load_add p, -v), to be able to
// select LXADD if LOCK_SUB can't be selected.
if (Opc == ISD::ATOMIC_LOAD_SUB) {
- AtomicSDNode *AN = cast<AtomicSDNode>(N.getNode());
RHS = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), RHS);
return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, VT, Chain, LHS,
RHS, AN->getMemOperand());
@@ -25727,35 +26646,93 @@ static SDValue lowerAtomicArith(SDValue N, SelectionDAG &DAG,
return N;
}
+ // Specialized lowering for the canonical form of an idemptotent atomicrmw.
+ // The core idea here is that since the memory location isn't actually
+ // changing, all we need is a lowering for the *ordering* impacts of the
+ // atomicrmw. As such, we can chose a different operation and memory
+ // location to minimize impact on other code.
+ if (Opc == ISD::ATOMIC_LOAD_OR && isNullConstant(RHS)) {
+ // On X86, the only ordering which actually requires an instruction is
+ // seq_cst which isn't SingleThread, everything just needs to be preserved
+ // during codegen and then dropped. Note that we expect (but don't assume),
+ // that orderings other than seq_cst and acq_rel have been canonicalized to
+ // a store or load.
+ if (AN->getOrdering() == AtomicOrdering::SequentiallyConsistent &&
+ AN->getSyncScopeID() == SyncScope::System) {
+ // Prefer a locked operation against a stack location to minimize cache
+ // traffic. This assumes that stack locations are very likely to be
+ // accessed only by the owning thread.
+ SDValue NewChain = emitLockedStackOp(DAG, Subtarget, Chain, DL);
+ assert(!N->hasAnyUseOfValue(0));
+ // NOTE: The getUNDEF is needed to give something for the unused result 0.
+ return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(),
+ DAG.getUNDEF(VT), NewChain);
+ }
+ // MEMBARRIER is a compiler barrier; it codegens to a no-op.
+ SDValue NewChain = DAG.getNode(X86ISD::MEMBARRIER, DL, MVT::Other, Chain);
+ assert(!N->hasAnyUseOfValue(0));
+ // NOTE: The getUNDEF is needed to give something for the unused result 0.
+ return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(),
+ DAG.getUNDEF(VT), NewChain);
+ }
+
SDValue LockOp = lowerAtomicArithWithLOCK(N, DAG, Subtarget);
// RAUW the chain, but don't worry about the result, as it's unused.
assert(!N->hasAnyUseOfValue(0));
- DAG.ReplaceAllUsesOfValueWith(N.getValue(1), LockOp.getValue(1));
- return SDValue();
+ // NOTE: The getUNDEF is needed to give something for the unused result 0.
+ return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(),
+ DAG.getUNDEF(VT), LockOp.getValue(1));
}
-static SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) {
- SDNode *Node = Op.getNode();
+static SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ auto *Node = cast<AtomicSDNode>(Op.getNode());
SDLoc dl(Node);
- EVT VT = cast<AtomicSDNode>(Node)->getMemoryVT();
+ EVT VT = Node->getMemoryVT();
+
+ bool IsSeqCst = Node->getOrdering() == AtomicOrdering::SequentiallyConsistent;
+ bool IsTypeLegal = DAG.getTargetLoweringInfo().isTypeLegal(VT);
+
+ // If this store is not sequentially consistent and the type is legal
+ // we can just keep it.
+ if (!IsSeqCst && IsTypeLegal)
+ return Op;
+
+ if (VT == MVT::i64 && !IsTypeLegal) {
+ // For illegal i64 atomic_stores, we can try to use MOVQ if SSE2 is enabled.
+ // FIXME: Use movlps with SSE1.
+ // FIXME: Use fist with X87.
+ bool NoImplicitFloatOps =
+ DAG.getMachineFunction().getFunction().hasFnAttribute(
+ Attribute::NoImplicitFloat);
+ if (!Subtarget.useSoftFloat() && !NoImplicitFloatOps &&
+ Subtarget.hasSSE2()) {
+ SDValue SclToVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
+ Node->getOperand(2));
+ SDVTList Tys = DAG.getVTList(MVT::Other);
+ SDValue Ops[] = { Node->getChain(), SclToVec, Node->getBasePtr() };
+ SDValue Chain = DAG.getMemIntrinsicNode(X86ISD::VEXTRACT_STORE, dl, Tys,
+ Ops, MVT::i64,
+ Node->getMemOperand());
+
+ // If this is a sequentially consistent store, also emit an appropriate
+ // barrier.
+ if (IsSeqCst)
+ Chain = emitLockedStackOp(DAG, Subtarget, Chain, dl);
+
+ return Chain;
+ }
+ }
// Convert seq_cst store -> xchg
// Convert wide store -> swap (-> cmpxchg8b/cmpxchg16b)
- // FIXME: On 32-bit, store -> fist or movq would be more efficient
- // (The only way to get a 16-byte store is cmpxchg16b)
// FIXME: 16-byte ATOMIC_SWAP isn't actually hooked up at the moment.
- if (cast<AtomicSDNode>(Node)->getOrdering() ==
- AtomicOrdering::SequentiallyConsistent ||
- !DAG.getTargetLoweringInfo().isTypeLegal(VT)) {
- SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl,
- cast<AtomicSDNode>(Node)->getMemoryVT(),
- Node->getOperand(0),
- Node->getOperand(1), Node->getOperand(2),
- cast<AtomicSDNode>(Node)->getMemOperand());
- return Swap.getValue(1);
- }
- // Other atomic stores have a simple pattern.
- return Op;
+ SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl,
+ Node->getMemoryVT(),
+ Node->getOperand(0),
+ Node->getOperand(1), Node->getOperand(2),
+ Node->getMemOperand());
+ return Swap.getValue(1);
}
static SDValue LowerADDSUBCARRY(SDValue Op, SelectionDAG &DAG) {
@@ -25919,7 +26896,6 @@ static SDValue LowerMSCATTER(SDValue Op, const X86Subtarget &Subtarget,
SDValue Ops[] = {Chain, Src, Mask, BasePtr, Index, Scale};
SDValue NewScatter = DAG.getTargetMemSDNode<X86MaskedScatterSDNode>(
VTs, Ops, dl, N->getMemoryVT(), N->getMemOperand());
- DAG.ReplaceAllUsesWith(Op, SDValue(NewScatter.getNode(), 1));
return SDValue(NewScatter.getNode(), 1);
}
return SDValue();
@@ -25935,7 +26911,6 @@ static SDValue LowerMSCATTER(SDValue Op, const X86Subtarget &Subtarget,
SDValue Ops[] = {Chain, Src, Mask, BasePtr, Index, Scale};
SDValue NewScatter = DAG.getTargetMemSDNode<X86MaskedScatterSDNode>(
VTs, Ops, dl, N->getMemoryVT(), N->getMemOperand());
- DAG.ReplaceAllUsesWith(Op, SDValue(NewScatter.getNode(), 1));
return SDValue(NewScatter.getNode(), 1);
}
// Custom widen all the operands to avoid promotion.
@@ -25980,7 +26955,6 @@ static SDValue LowerMSCATTER(SDValue Op, const X86Subtarget &Subtarget,
SDValue Ops[] = {Chain, Src, Mask, BasePtr, Index, Scale};
SDValue NewScatter = DAG.getTargetMemSDNode<X86MaskedScatterSDNode>(
VTs, Ops, dl, N->getMemoryVT(), N->getMemOperand());
- DAG.ReplaceAllUsesWith(Op, SDValue(NewScatter.getNode(), 1));
return SDValue(NewScatter.getNode(), 1);
}
@@ -25991,8 +26965,28 @@ static SDValue LowerMLOAD(SDValue Op, const X86Subtarget &Subtarget,
MVT VT = Op.getSimpleValueType();
MVT ScalarVT = VT.getScalarType();
SDValue Mask = N->getMask();
+ MVT MaskVT = Mask.getSimpleValueType();
+ SDValue PassThru = N->getPassThru();
SDLoc dl(Op);
+ // Handle AVX masked loads which don't support passthru other than 0.
+ if (MaskVT.getVectorElementType() != MVT::i1) {
+ // We also allow undef in the isel pattern.
+ if (PassThru.isUndef() || ISD::isBuildVectorAllZeros(PassThru.getNode()))
+ return Op;
+
+ SDValue NewLoad = DAG.getMaskedLoad(VT, dl, N->getChain(),
+ N->getBasePtr(), Mask,
+ getZeroVector(VT, Subtarget, DAG, dl),
+ N->getMemoryVT(), N->getMemOperand(),
+ N->getExtensionType(),
+ N->isExpandingLoad());
+ // Emit a blend.
+ SDValue Select = DAG.getNode(ISD::VSELECT, dl, MaskVT, Mask, NewLoad,
+ PassThru);
+ return DAG.getMergeValues({ Select, NewLoad.getValue(1) }, dl);
+ }
+
assert((!N->isExpandingLoad() || Subtarget.hasAVX512()) &&
"Expanding masked load is supported on AVX-512 target only!");
@@ -26011,7 +27005,7 @@ static SDValue LowerMLOAD(SDValue Op, const X86Subtarget &Subtarget,
// VLX the vector should be widened to 512 bit
unsigned NumEltsInWideVec = 512 / VT.getScalarSizeInBits();
MVT WideDataVT = MVT::getVectorVT(ScalarVT, NumEltsInWideVec);
- SDValue PassThru = ExtendToType(N->getPassThru(), WideDataVT, DAG);
+ PassThru = ExtendToType(PassThru, WideDataVT, DAG);
// Mask element has to be i1.
assert(Mask.getSimpleValueType().getScalarType() == MVT::i1 &&
@@ -26179,7 +27173,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::ATOMIC_LOAD_OR:
case ISD::ATOMIC_LOAD_XOR:
case ISD::ATOMIC_LOAD_AND: return lowerAtomicArith(Op, DAG, Subtarget);
- case ISD::ATOMIC_STORE: return LowerATOMIC_STORE(Op, DAG);
+ case ISD::ATOMIC_STORE: return LowerATOMIC_STORE(Op, DAG, Subtarget);
case ISD::BITREVERSE: return LowerBITREVERSE(Op, Subtarget, DAG);
case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, Subtarget, DAG);
@@ -26272,7 +27266,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::UADDSAT:
case ISD::SADDSAT:
case ISD::USUBSAT:
- case ISD::SSUBSAT: return LowerADDSAT_SUBSAT(Op, DAG);
+ case ISD::SSUBSAT: return LowerADDSAT_SUBSAT(Op, DAG, Subtarget);
case ISD::SMAX:
case ISD::SMIN:
case ISD::UMAX:
@@ -26301,12 +27295,19 @@ void X86TargetLowering::LowerOperationWrapper(SDNode *N,
if (!Res.getNode())
return;
- assert((N->getNumValues() <= Res->getNumValues()) &&
+ // If the original node has one result, take the return value from
+ // LowerOperation as is. It might not be result number 0.
+ if (N->getNumValues() == 1) {
+ Results.push_back(Res);
+ return;
+ }
+
+ // If the original node has multiple results, then the return node should
+ // have the same number of results.
+ assert((N->getNumValues() == Res->getNumValues()) &&
"Lowering returned the wrong number of results!");
// Places new result values base on N result number.
- // In some cases (LowerSINT_TO_FP for example) Res has more result values
- // than original node, chain should be dropped(last value).
for (unsigned I = 0, E = N->getNumValues(); I != E; ++I)
Results.push_back(Res.getValue(I));
}
@@ -26319,7 +27320,31 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
SDLoc dl(N);
switch (N->getOpcode()) {
default:
+#ifndef NDEBUG
+ dbgs() << "ReplaceNodeResults: ";
+ N->dump(&DAG);
+#endif
llvm_unreachable("Do not know how to custom type legalize this operation!");
+ case ISD::CTPOP: {
+ assert(N->getValueType(0) == MVT::i64 && "Unexpected VT!");
+ // Use a v2i64 if possible.
+ bool NoImplicitFloatOps =
+ DAG.getMachineFunction().getFunction().hasFnAttribute(
+ Attribute::NoImplicitFloat);
+ if (isTypeLegal(MVT::v2i64) && !NoImplicitFloatOps) {
+ SDValue Wide =
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, N->getOperand(0));
+ Wide = DAG.getNode(ISD::CTPOP, dl, MVT::v2i64, Wide);
+ // Bit count should fit in 32-bits, extract it as that and then zero
+ // extend to i64. Otherwise we end up extracting bits 63:32 separately.
+ Wide = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Wide);
+ Wide = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, Wide,
+ DAG.getIntPtrConstant(0, dl));
+ Wide = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Wide);
+ Results.push_back(Wide);
+ }
+ return;
+ }
case ISD::MUL: {
EVT VT = N->getValueType(0);
assert(VT.isVector() && "Unexpected VT");
@@ -26385,6 +27410,31 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
Results.push_back(Res);
return;
}
+ case ISD::ABS: {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ assert(N->getValueType(0) == MVT::i64 &&
+ "Unexpected type (!= i64) on ABS.");
+ MVT HalfT = MVT::i32;
+ SDValue Lo, Hi, Tmp;
+ SDVTList VTList = DAG.getVTList(HalfT, MVT::i1);
+
+ Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(0),
+ DAG.getConstant(0, dl, HalfT));
+ Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(0),
+ DAG.getConstant(1, dl, HalfT));
+ Tmp = DAG.getNode(
+ ISD::SRA, dl, HalfT, Hi,
+ DAG.getConstant(HalfT.getSizeInBits() - 1, dl,
+ TLI.getShiftAmountTy(HalfT, DAG.getDataLayout())));
+ Lo = DAG.getNode(ISD::UADDO, dl, VTList, Tmp, Lo);
+ Hi = DAG.getNode(ISD::ADDCARRY, dl, VTList, Tmp, Hi,
+ SDValue(Lo.getNode(), 1));
+ Hi = DAG.getNode(ISD::XOR, dl, HalfT, Tmp, Hi);
+ Lo = DAG.getNode(ISD::XOR, dl, HalfT, Tmp, Lo);
+ Results.push_back(Lo);
+ Results.push_back(Hi);
+ return;
+ }
case ISD::SETCC: {
// Widen v2i32 (setcc v2f32). This is really needed for AVX512VL when
// setCC result type is v2i1 because type legalzation will end up with
@@ -26557,14 +27607,13 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
}
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND: {
- if (!ExperimentalVectorWideningLegalization)
- return;
-
EVT VT = N->getValueType(0);
SDValue In = N->getOperand(0);
EVT InVT = In.getValueType();
if (!Subtarget.hasSSE41() && VT == MVT::v4i64 &&
- (InVT == MVT::v4i16 || InVT == MVT::v4i8)) {
+ (InVT == MVT::v4i16 || InVT == MVT::v4i8) &&
+ getTypeAction(*DAG.getContext(), InVT) == TypeWidenVector) {
+ assert(N->getOpcode() == ISD::SIGN_EXTEND && "Unexpected opcode");
// Custom split this so we can extend i8/i16->i32 invec. This is better
// since sign_extend_inreg i8/i16->i64 requires an extend to i32 using
// sra. Then extending from i32 to i64 using pcmpgt. By custom splitting
@@ -26589,16 +27638,28 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
return;
}
- if ((VT == MVT::v16i32 || VT == MVT::v8i64) && InVT.is128BitVector()) {
+ if (VT == MVT::v16i32 || VT == MVT::v8i64) {
+ if (!InVT.is128BitVector()) {
+ // Not a 128 bit vector, but maybe type legalization will promote
+ // it to 128 bits.
+ if (getTypeAction(*DAG.getContext(), InVT) != TypePromoteInteger)
+ return;
+ InVT = getTypeToTransformTo(*DAG.getContext(), InVT);
+ if (!InVT.is128BitVector())
+ return;
+
+ // Promote the input to 128 bits. Type legalization will turn this into
+ // zext_inreg/sext_inreg.
+ In = DAG.getNode(N->getOpcode(), dl, InVT, In);
+ }
+
// Perform custom splitting instead of the two stage extend we would get
// by default.
EVT LoVT, HiVT;
std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
assert(isTypeLegal(LoVT) && "Split VT not legal?");
- bool IsSigned = N->getOpcode() == ISD::SIGN_EXTEND;
-
- SDValue Lo = getExtendInVec(IsSigned, dl, LoVT, In, DAG);
+ SDValue Lo = getExtendInVec(N->getOpcode(), dl, LoVT, In, DAG);
// We need to shift the input over by half the number of elements.
unsigned NumElts = InVT.getVectorNumElements();
@@ -26608,7 +27669,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
ShufMask[i] = i + HalfNumElts;
SDValue Hi = DAG.getVectorShuffle(InVT, dl, In, In, ShufMask);
- Hi = getExtendInVec(IsSigned, dl, HiVT, Hi, DAG);
+ Hi = getExtendInVec(N->getOpcode(), dl, HiVT, Hi, DAG);
SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi);
Results.push_back(Res);
@@ -26735,17 +27796,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
return;
}
- std::pair<SDValue,SDValue> Vals =
- FP_TO_INTHelper(SDValue(N, 0), DAG, IsSigned, /*IsReplace=*/ true);
- SDValue FIST = Vals.first, StackSlot = Vals.second;
- if (FIST.getNode()) {
- // Return a load from the stack slot.
- if (StackSlot.getNode())
- Results.push_back(
- DAG.getLoad(VT, dl, FIST, StackSlot, MachinePointerInfo()));
- else
- Results.push_back(FIST);
- }
+ if (SDValue V = FP_TO_INTHelper(SDValue(N, 0), DAG, IsSigned))
+ Results.push_back(V);
return;
}
case ISD::SINT_TO_FP: {
@@ -26800,31 +27852,30 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
default : llvm_unreachable("Do not know how to custom type "
"legalize this intrinsic operation!");
case Intrinsic::x86_rdtsc:
- return getReadTimeStampCounter(N, dl, X86ISD::RDTSC_DAG, DAG, Subtarget,
+ return getReadTimeStampCounter(N, dl, X86::RDTSC, DAG, Subtarget,
Results);
case Intrinsic::x86_rdtscp:
- return getReadTimeStampCounter(N, dl, X86ISD::RDTSCP_DAG, DAG, Subtarget,
+ return getReadTimeStampCounter(N, dl, X86::RDTSCP, DAG, Subtarget,
Results);
case Intrinsic::x86_rdpmc:
- return getReadPerformanceCounter(N, dl, DAG, Subtarget, Results);
-
+ expandIntrinsicWChainHelper(N, dl, DAG, X86::RDPMC, X86::ECX, Subtarget,
+ Results);
+ return;
case Intrinsic::x86_xgetbv:
- return getExtendedControlRegister(N, dl, DAG, Subtarget, Results);
+ expandIntrinsicWChainHelper(N, dl, DAG, X86::XGETBV, X86::ECX, Subtarget,
+ Results);
+ return;
}
}
- case ISD::INTRINSIC_WO_CHAIN: {
- if (SDValue V = LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), DAG))
- Results.push_back(V);
- return;
- }
case ISD::READCYCLECOUNTER: {
- return getReadTimeStampCounter(N, dl, X86ISD::RDTSC_DAG, DAG, Subtarget,
- Results);
+ return getReadTimeStampCounter(N, dl, X86::RDTSC, DAG, Subtarget, Results);
}
case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: {
EVT T = N->getValueType(0);
assert((T == MVT::i64 || T == MVT::i128) && "can only expand cmpxchg pair");
bool Regs64bit = T == MVT::i128;
+ assert((!Regs64bit || Subtarget.hasCmpxchg16b()) &&
+ "64-bit ATOMIC_CMP_SWAP_WITH_SUCCESS requires CMPXCHG16B");
MVT HalfT = Regs64bit ? MVT::i64 : MVT::i32;
SDValue cpInL, cpInH;
cpInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(2),
@@ -26903,6 +27954,66 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
Results.push_back(EFLAGS.getValue(1));
return;
}
+ case ISD::ATOMIC_LOAD: {
+ assert(N->getValueType(0) == MVT::i64 && "Unexpected VT!");
+ bool NoImplicitFloatOps =
+ DAG.getMachineFunction().getFunction().hasFnAttribute(
+ Attribute::NoImplicitFloat);
+ if (!Subtarget.useSoftFloat() && !NoImplicitFloatOps) {
+ auto *Node = cast<AtomicSDNode>(N);
+ if (Subtarget.hasSSE2()) {
+ // Use a VZEXT_LOAD which will be selected as MOVQ. Then extract the
+ // lower 64-bits.
+ SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other);
+ SDValue Ops[] = { Node->getChain(), Node->getBasePtr() };
+ SDValue Ld = DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops,
+ MVT::i64, Node->getMemOperand());
+ SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Ld,
+ DAG.getIntPtrConstant(0, dl));
+ Results.push_back(Res);
+ Results.push_back(Ld.getValue(1));
+ return;
+ }
+ if (Subtarget.hasX87()) {
+ // First load this into an 80-bit X87 register. This will put the whole
+ // integer into the significand.
+ // FIXME: Do we need to glue? See FIXME comment in BuildFILD.
+ SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other, MVT::Glue);
+ SDValue Ops[] = { Node->getChain(), Node->getBasePtr() };
+ SDValue Result = DAG.getMemIntrinsicNode(X86ISD::FILD_FLAG,
+ dl, Tys, Ops, MVT::i64,
+ Node->getMemOperand());
+ SDValue Chain = Result.getValue(1);
+ SDValue InFlag = Result.getValue(2);
+
+ // Now store the X87 register to a stack temporary and convert to i64.
+ // This store is not atomic and doesn't need to be.
+ // FIXME: We don't need a stack temporary if the result of the load
+ // is already being stored. We could just directly store there.
+ SDValue StackPtr = DAG.CreateStackTemporary(MVT::i64);
+ int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+ MachinePointerInfo MPI =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI);
+ SDValue StoreOps[] = { Chain, Result, StackPtr, InFlag };
+ Chain = DAG.getMemIntrinsicNode(X86ISD::FIST, dl,
+ DAG.getVTList(MVT::Other), StoreOps,
+ MVT::i64, MPI, 0 /*Align*/,
+ MachineMemOperand::MOStore);
+
+ // Finally load the value back from the stack temporary and return it.
+ // This load is not atomic and doesn't need to be.
+ // This load will be further type legalized.
+ Result = DAG.getLoad(MVT::i64, dl, Chain, StackPtr, MPI);
+ Results.push_back(Result);
+ Results.push_back(Result.getValue(1));
+ return;
+ }
+ }
+ // TODO: Use MOVLPS when SSE1 is available?
+ // Delegate to generic TypeLegalization. Situations we can really handle
+ // should have already been dealt with by AtomicExpandPass.cpp.
+ break;
+ }
case ISD::ATOMIC_SWAP:
case ISD::ATOMIC_LOAD_ADD:
case ISD::ATOMIC_LOAD_SUB:
@@ -26914,11 +28025,10 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::ATOMIC_LOAD_MAX:
case ISD::ATOMIC_LOAD_UMIN:
case ISD::ATOMIC_LOAD_UMAX:
- case ISD::ATOMIC_LOAD: {
// Delegate to generic TypeLegalization. Situations we can really handle
// should have already been dealt with by AtomicExpandPass.cpp.
break;
- }
+
case ISD::BITCAST: {
assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
EVT DstVT = N->getValueType(0);
@@ -27061,19 +28171,28 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
if (!ISD::isNON_EXTLoad(N))
return;
auto *Ld = cast<LoadSDNode>(N);
- MVT LdVT = Subtarget.is64Bit() && VT.isInteger() ? MVT::i64 : MVT::f64;
- SDValue Res = DAG.getLoad(LdVT, dl, Ld->getChain(), Ld->getBasePtr(),
- Ld->getPointerInfo(),
- Ld->getAlignment(),
- Ld->getMemOperand()->getFlags());
- SDValue Chain = Res.getValue(1);
- MVT WideVT = MVT::getVectorVT(LdVT, 2);
- Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, WideVT, Res);
- MVT CastVT = MVT::getVectorVT(VT.getVectorElementType(),
- VT.getVectorNumElements() * 2);
- Res = DAG.getBitcast(CastVT, Res);
+ if (Subtarget.hasSSE2()) {
+ MVT LdVT = Subtarget.is64Bit() && VT.isInteger() ? MVT::i64 : MVT::f64;
+ SDValue Res = DAG.getLoad(LdVT, dl, Ld->getChain(), Ld->getBasePtr(),
+ Ld->getPointerInfo(), Ld->getAlignment(),
+ Ld->getMemOperand()->getFlags());
+ SDValue Chain = Res.getValue(1);
+ MVT WideVT = MVT::getVectorVT(LdVT, 2);
+ Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, WideVT, Res);
+ MVT CastVT = MVT::getVectorVT(VT.getVectorElementType(),
+ VT.getVectorNumElements() * 2);
+ Res = DAG.getBitcast(CastVT, Res);
+ Results.push_back(Res);
+ Results.push_back(Chain);
+ return;
+ }
+ assert(Subtarget.hasSSE1() && "Expected SSE");
+ SDVTList Tys = DAG.getVTList(MVT::v4f32, MVT::Other);
+ SDValue Ops[] = {Ld->getChain(), Ld->getBasePtr()};
+ SDValue Res = DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops,
+ MVT::i64, Ld->getMemOperand());
Results.push_back(Res);
- Results.push_back(Chain);
+ Results.push_back(Res.getValue(1));
return;
}
}
@@ -27092,26 +28211,22 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::FXOR: return "X86ISD::FXOR";
case X86ISD::FILD: return "X86ISD::FILD";
case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG";
- case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM";
- case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM";
- case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM";
+ case X86ISD::FIST: return "X86ISD::FIST";
+ case X86ISD::FP_TO_INT_IN_MEM: return "X86ISD::FP_TO_INT_IN_MEM";
case X86ISD::FLD: return "X86ISD::FLD";
case X86ISD::FST: return "X86ISD::FST";
case X86ISD::CALL: return "X86ISD::CALL";
- case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG";
- case X86ISD::RDTSCP_DAG: return "X86ISD::RDTSCP_DAG";
- case X86ISD::RDPMC_DAG: return "X86ISD::RDPMC_DAG";
case X86ISD::BT: return "X86ISD::BT";
case X86ISD::CMP: return "X86ISD::CMP";
case X86ISD::COMI: return "X86ISD::COMI";
case X86ISD::UCOMI: return "X86ISD::UCOMI";
case X86ISD::CMPM: return "X86ISD::CMPM";
- case X86ISD::CMPM_RND: return "X86ISD::CMPM_RND";
+ case X86ISD::CMPM_SAE: return "X86ISD::CMPM_SAE";
case X86ISD::SETCC: return "X86ISD::SETCC";
case X86ISD::SETCC_CARRY: return "X86ISD::SETCC_CARRY";
case X86ISD::FSETCC: return "X86ISD::FSETCC";
case X86ISD::FSETCCM: return "X86ISD::FSETCCM";
- case X86ISD::FSETCCM_RND: return "X86ISD::FSETCCM_RND";
+ case X86ISD::FSETCCM_SAE: return "X86ISD::FSETCCM_SAE";
case X86ISD::CMOV: return "X86ISD::CMOV";
case X86ISD::BRCOND: return "X86ISD::BRCOND";
case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG";
@@ -27140,12 +28255,12 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::CONFLICT: return "X86ISD::CONFLICT";
case X86ISD::FMAX: return "X86ISD::FMAX";
case X86ISD::FMAXS: return "X86ISD::FMAXS";
- case X86ISD::FMAX_RND: return "X86ISD::FMAX_RND";
- case X86ISD::FMAXS_RND: return "X86ISD::FMAX_RND";
+ case X86ISD::FMAX_SAE: return "X86ISD::FMAX_SAE";
+ case X86ISD::FMAXS_SAE: return "X86ISD::FMAXS_SAE";
case X86ISD::FMIN: return "X86ISD::FMIN";
case X86ISD::FMINS: return "X86ISD::FMINS";
- case X86ISD::FMIN_RND: return "X86ISD::FMIN_RND";
- case X86ISD::FMINS_RND: return "X86ISD::FMINS_RND";
+ case X86ISD::FMIN_SAE: return "X86ISD::FMIN_SAE";
+ case X86ISD::FMINS_SAE: return "X86ISD::FMINS_SAE";
case X86ISD::FMAXC: return "X86ISD::FMAXC";
case X86ISD::FMINC: return "X86ISD::FMINC";
case X86ISD::FRSQRT: return "X86ISD::FRSQRT";
@@ -27177,6 +28292,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::LAND: return "X86ISD::LAND";
case X86ISD::VZEXT_MOVL: return "X86ISD::VZEXT_MOVL";
case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD";
+ case X86ISD::VEXTRACT_STORE: return "X86ISD::VEXTRACT_STORE";
case X86ISD::VTRUNC: return "X86ISD::VTRUNC";
case X86ISD::VTRUNCS: return "X86ISD::VTRUNCS";
case X86ISD::VTRUNCUS: return "X86ISD::VTRUNCUS";
@@ -27188,11 +28304,13 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::VMTRUNCSTORES: return "X86ISD::VMTRUNCSTORES";
case X86ISD::VMTRUNCSTOREUS: return "X86ISD::VMTRUNCSTOREUS";
case X86ISD::VFPEXT: return "X86ISD::VFPEXT";
- case X86ISD::VFPEXT_RND: return "X86ISD::VFPEXT_RND";
- case X86ISD::VFPEXTS_RND: return "X86ISD::VFPEXTS_RND";
+ case X86ISD::VFPEXT_SAE: return "X86ISD::VFPEXT_SAE";
+ case X86ISD::VFPEXTS: return "X86ISD::VFPEXTS";
+ case X86ISD::VFPEXTS_SAE: return "X86ISD::VFPEXTS_SAE";
case X86ISD::VFPROUND: return "X86ISD::VFPROUND";
case X86ISD::VMFPROUND: return "X86ISD::VMFPROUND";
case X86ISD::VFPROUND_RND: return "X86ISD::VFPROUND_RND";
+ case X86ISD::VFPROUNDS: return "X86ISD::VFPROUNDS";
case X86ISD::VFPROUNDS_RND: return "X86ISD::VFPROUNDS_RND";
case X86ISD::VSHLDQ: return "X86ISD::VSHLDQ";
case X86ISD::VSRLDQ: return "X86ISD::VSRLDQ";
@@ -27202,6 +28320,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::VSHLI: return "X86ISD::VSHLI";
case X86ISD::VSRLI: return "X86ISD::VSRLI";
case X86ISD::VSRAI: return "X86ISD::VSRAI";
+ case X86ISD::VSHLV: return "X86ISD::VSHLV";
+ case X86ISD::VSRLV: return "X86ISD::VSRLV";
case X86ISD::VSRAV: return "X86ISD::VSRAV";
case X86ISD::VROTLI: return "X86ISD::VROTLI";
case X86ISD::VROTRI: return "X86ISD::VROTRI";
@@ -27263,11 +28383,13 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::VPERMI: return "X86ISD::VPERMI";
case X86ISD::VPTERNLOG: return "X86ISD::VPTERNLOG";
case X86ISD::VFIXUPIMM: return "X86ISD::VFIXUPIMM";
+ case X86ISD::VFIXUPIMM_SAE: return "X86ISD::VFIXUPIMM_SAE";
case X86ISD::VFIXUPIMMS: return "X86ISD::VFIXUPIMMS";
+ case X86ISD::VFIXUPIMMS_SAE: return "X86ISD::VFIXUPIMMS_SAE";
case X86ISD::VRANGE: return "X86ISD::VRANGE";
- case X86ISD::VRANGE_RND: return "X86ISD::VRANGE_RND";
+ case X86ISD::VRANGE_SAE: return "X86ISD::VRANGE_SAE";
case X86ISD::VRANGES: return "X86ISD::VRANGES";
- case X86ISD::VRANGES_RND: return "X86ISD::VRANGES_RND";
+ case X86ISD::VRANGES_SAE: return "X86ISD::VRANGES_SAE";
case X86ISD::PMULUDQ: return "X86ISD::PMULUDQ";
case X86ISD::PMULDQ: return "X86ISD::PMULDQ";
case X86ISD::PSADBW: return "X86ISD::PSADBW";
@@ -27281,6 +28403,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::SAHF: return "X86ISD::SAHF";
case X86ISD::RDRAND: return "X86ISD::RDRAND";
case X86ISD::RDSEED: return "X86ISD::RDSEED";
+ case X86ISD::RDPKRU: return "X86ISD::RDPKRU";
+ case X86ISD::WRPKRU: return "X86ISD::WRPKRU";
case X86ISD::VPMADDUBSW: return "X86ISD::VPMADDUBSW";
case X86ISD::VPMADDWD: return "X86ISD::VPMADDWD";
case X86ISD::VPSHA: return "X86ISD::VPSHA";
@@ -27302,17 +28426,17 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::VPMADD52H: return "X86ISD::VPMADD52H";
case X86ISD::VPMADD52L: return "X86ISD::VPMADD52L";
case X86ISD::VRNDSCALE: return "X86ISD::VRNDSCALE";
- case X86ISD::VRNDSCALE_RND: return "X86ISD::VRNDSCALE_RND";
+ case X86ISD::VRNDSCALE_SAE: return "X86ISD::VRNDSCALE_SAE";
case X86ISD::VRNDSCALES: return "X86ISD::VRNDSCALES";
- case X86ISD::VRNDSCALES_RND: return "X86ISD::VRNDSCALES_RND";
+ case X86ISD::VRNDSCALES_SAE: return "X86ISD::VRNDSCALES_SAE";
case X86ISD::VREDUCE: return "X86ISD::VREDUCE";
- case X86ISD::VREDUCE_RND: return "X86ISD::VREDUCE_RND";
+ case X86ISD::VREDUCE_SAE: return "X86ISD::VREDUCE_SAE";
case X86ISD::VREDUCES: return "X86ISD::VREDUCES";
- case X86ISD::VREDUCES_RND: return "X86ISD::VREDUCES_RND";
+ case X86ISD::VREDUCES_SAE: return "X86ISD::VREDUCES_SAE";
case X86ISD::VGETMANT: return "X86ISD::VGETMANT";
- case X86ISD::VGETMANT_RND: return "X86ISD::VGETMANT_RND";
+ case X86ISD::VGETMANT_SAE: return "X86ISD::VGETMANT_SAE";
case X86ISD::VGETMANTS: return "X86ISD::VGETMANTS";
- case X86ISD::VGETMANTS_RND: return "X86ISD::VGETMANTS_RND";
+ case X86ISD::VGETMANTS_SAE: return "X86ISD::VGETMANTS_SAE";
case X86ISD::PCMPESTR: return "X86ISD::PCMPESTR";
case X86ISD::PCMPISTR: return "X86ISD::PCMPISTR";
case X86ISD::XTEST: return "X86ISD::XTEST";
@@ -27323,26 +28447,40 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::RCP14: return "X86ISD::RCP14";
case X86ISD::RCP14S: return "X86ISD::RCP14S";
case X86ISD::RCP28: return "X86ISD::RCP28";
+ case X86ISD::RCP28_SAE: return "X86ISD::RCP28_SAE";
case X86ISD::RCP28S: return "X86ISD::RCP28S";
+ case X86ISD::RCP28S_SAE: return "X86ISD::RCP28S_SAE";
case X86ISD::EXP2: return "X86ISD::EXP2";
+ case X86ISD::EXP2_SAE: return "X86ISD::EXP2_SAE";
case X86ISD::RSQRT14: return "X86ISD::RSQRT14";
case X86ISD::RSQRT14S: return "X86ISD::RSQRT14S";
case X86ISD::RSQRT28: return "X86ISD::RSQRT28";
+ case X86ISD::RSQRT28_SAE: return "X86ISD::RSQRT28_SAE";
case X86ISD::RSQRT28S: return "X86ISD::RSQRT28S";
+ case X86ISD::RSQRT28S_SAE: return "X86ISD::RSQRT28S_SAE";
case X86ISD::FADD_RND: return "X86ISD::FADD_RND";
+ case X86ISD::FADDS: return "X86ISD::FADDS";
case X86ISD::FADDS_RND: return "X86ISD::FADDS_RND";
case X86ISD::FSUB_RND: return "X86ISD::FSUB_RND";
+ case X86ISD::FSUBS: return "X86ISD::FSUBS";
case X86ISD::FSUBS_RND: return "X86ISD::FSUBS_RND";
case X86ISD::FMUL_RND: return "X86ISD::FMUL_RND";
+ case X86ISD::FMULS: return "X86ISD::FMULS";
case X86ISD::FMULS_RND: return "X86ISD::FMULS_RND";
case X86ISD::FDIV_RND: return "X86ISD::FDIV_RND";
+ case X86ISD::FDIVS: return "X86ISD::FDIVS";
case X86ISD::FDIVS_RND: return "X86ISD::FDIVS_RND";
case X86ISD::FSQRT_RND: return "X86ISD::FSQRT_RND";
+ case X86ISD::FSQRTS: return "X86ISD::FSQRTS";
case X86ISD::FSQRTS_RND: return "X86ISD::FSQRTS_RND";
- case X86ISD::FGETEXP_RND: return "X86ISD::FGETEXP_RND";
- case X86ISD::FGETEXPS_RND: return "X86ISD::FGETEXPS_RND";
+ case X86ISD::FGETEXP: return "X86ISD::FGETEXP";
+ case X86ISD::FGETEXP_SAE: return "X86ISD::FGETEXP_SAE";
+ case X86ISD::FGETEXPS: return "X86ISD::FGETEXPS";
+ case X86ISD::FGETEXPS_SAE: return "X86ISD::FGETEXPS_SAE";
case X86ISD::SCALEF: return "X86ISD::SCALEF";
+ case X86ISD::SCALEF_RND: return "X86ISD::SCALEF_RND";
case X86ISD::SCALEFS: return "X86ISD::SCALEFS";
+ case X86ISD::SCALEFS_RND: return "X86ISD::SCALEFS_RND";
case X86ISD::AVG: return "X86ISD::AVG";
case X86ISD::MULHRS: return "X86ISD::MULHRS";
case X86ISD::SINT_TO_FP_RND: return "X86ISD::SINT_TO_FP_RND";
@@ -27351,23 +28489,27 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::CVTTP2UI: return "X86ISD::CVTTP2UI";
case X86ISD::MCVTTP2SI: return "X86ISD::MCVTTP2SI";
case X86ISD::MCVTTP2UI: return "X86ISD::MCVTTP2UI";
- case X86ISD::CVTTP2SI_RND: return "X86ISD::CVTTP2SI_RND";
- case X86ISD::CVTTP2UI_RND: return "X86ISD::CVTTP2UI_RND";
+ case X86ISD::CVTTP2SI_SAE: return "X86ISD::CVTTP2SI_SAE";
+ case X86ISD::CVTTP2UI_SAE: return "X86ISD::CVTTP2UI_SAE";
case X86ISD::CVTTS2SI: return "X86ISD::CVTTS2SI";
case X86ISD::CVTTS2UI: return "X86ISD::CVTTS2UI";
- case X86ISD::CVTTS2SI_RND: return "X86ISD::CVTTS2SI_RND";
- case X86ISD::CVTTS2UI_RND: return "X86ISD::CVTTS2UI_RND";
+ case X86ISD::CVTTS2SI_SAE: return "X86ISD::CVTTS2SI_SAE";
+ case X86ISD::CVTTS2UI_SAE: return "X86ISD::CVTTS2UI_SAE";
case X86ISD::CVTSI2P: return "X86ISD::CVTSI2P";
case X86ISD::CVTUI2P: return "X86ISD::CVTUI2P";
+ case X86ISD::MCVTSI2P: return "X86ISD::MCVTSI2P";
+ case X86ISD::MCVTUI2P: return "X86ISD::MCVTUI2P";
case X86ISD::VFPCLASS: return "X86ISD::VFPCLASS";
case X86ISD::VFPCLASSS: return "X86ISD::VFPCLASSS";
case X86ISD::MULTISHIFT: return "X86ISD::MULTISHIFT";
+ case X86ISD::SCALAR_SINT_TO_FP: return "X86ISD::SCALAR_SINT_TO_FP";
case X86ISD::SCALAR_SINT_TO_FP_RND: return "X86ISD::SCALAR_SINT_TO_FP_RND";
+ case X86ISD::SCALAR_UINT_TO_FP: return "X86ISD::SCALAR_UINT_TO_FP";
case X86ISD::SCALAR_UINT_TO_FP_RND: return "X86ISD::SCALAR_UINT_TO_FP_RND";
case X86ISD::CVTPS2PH: return "X86ISD::CVTPS2PH";
case X86ISD::MCVTPS2PH: return "X86ISD::MCVTPS2PH";
case X86ISD::CVTPH2PS: return "X86ISD::CVTPH2PS";
- case X86ISD::CVTPH2PS_RND: return "X86ISD::CVTPH2PS_RND";
+ case X86ISD::CVTPH2PS_SAE: return "X86ISD::CVTPH2PS_SAE";
case X86ISD::CVTP2SI: return "X86ISD::CVTP2SI";
case X86ISD::CVTP2UI: return "X86ISD::CVTP2UI";
case X86ISD::MCVTP2SI: return "X86ISD::MCVTP2SI";
@@ -27378,6 +28520,10 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::CVTS2UI: return "X86ISD::CVTS2UI";
case X86ISD::CVTS2SI_RND: return "X86ISD::CVTS2SI_RND";
case X86ISD::CVTS2UI_RND: return "X86ISD::CVTS2UI_RND";
+ case X86ISD::CVTNE2PS2BF16: return "X86ISD::CVTNE2PS2BF16";
+ case X86ISD::CVTNEPS2BF16: return "X86ISD::CVTNEPS2BF16";
+ case X86ISD::MCVTNEPS2BF16: return "X86ISD::MCVTNEPS2BF16";
+ case X86ISD::DPBF16PS: return "X86ISD::DPBF16PS";
case X86ISD::LWPINS: return "X86ISD::LWPINS";
case X86ISD::MGATHER: return "X86ISD::MGATHER";
case X86ISD::MSCATTER: return "X86ISD::MSCATTER";
@@ -27393,6 +28539,9 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::NT_BRIND: return "X86ISD::NT_BRIND";
case X86ISD::UMWAIT: return "X86ISD::UMWAIT";
case X86ISD::TPAUSE: return "X86ISD::TPAUSE";
+ case X86ISD::ENQCMD: return "X86ISD:ENQCMD";
+ case X86ISD::ENQCMDS: return "X86ISD:ENQCMDS";
+ case X86ISD::VP2INTERSECT: return "X86ISD::VP2INTERSECT";
}
return nullptr;
}
@@ -27478,6 +28627,38 @@ bool X86TargetLowering::isVectorShiftByScalarCheap(Type *Ty) const {
return true;
}
+bool X86TargetLowering::isBinOp(unsigned Opcode) const {
+ switch (Opcode) {
+ // These are non-commutative binops.
+ // TODO: Add more X86ISD opcodes once we have test coverage.
+ case X86ISD::ANDNP:
+ case X86ISD::PCMPGT:
+ case X86ISD::FMAX:
+ case X86ISD::FMIN:
+ case X86ISD::FANDN:
+ return true;
+ }
+
+ return TargetLoweringBase::isBinOp(Opcode);
+}
+
+bool X86TargetLowering::isCommutativeBinOp(unsigned Opcode) const {
+ switch (Opcode) {
+ // TODO: Add more X86ISD opcodes once we have test coverage.
+ case X86ISD::PCMPEQ:
+ case X86ISD::PMULDQ:
+ case X86ISD::PMULUDQ:
+ case X86ISD::FMAXC:
+ case X86ISD::FMINC:
+ case X86ISD::FAND:
+ case X86ISD::FOR:
+ case X86ISD::FXOR:
+ return true;
+ }
+
+ return TargetLoweringBase::isCommutativeBinOp(Opcode);
+}
+
bool X86TargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
return false;
@@ -27713,87 +28894,6 @@ static MachineBasicBlock *emitXBegin(MachineInstr &MI, MachineBasicBlock *MBB,
return sinkMBB;
}
-static MachineBasicBlock *emitWRPKRU(MachineInstr &MI, MachineBasicBlock *BB,
- const X86Subtarget &Subtarget) {
- DebugLoc dl = MI.getDebugLoc();
- const TargetInstrInfo *TII = Subtarget.getInstrInfo();
-
- // insert input VAL into EAX
- BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::EAX)
- .addReg(MI.getOperand(0).getReg());
- // insert zero to ECX
- BuildMI(*BB, MI, dl, TII->get(X86::MOV32r0), X86::ECX);
-
- // insert zero to EDX
- BuildMI(*BB, MI, dl, TII->get(X86::MOV32r0), X86::EDX);
-
- // insert WRPKRU instruction
- BuildMI(*BB, MI, dl, TII->get(X86::WRPKRUr));
-
- MI.eraseFromParent(); // The pseudo is gone now.
- return BB;
-}
-
-static MachineBasicBlock *emitRDPKRU(MachineInstr &MI, MachineBasicBlock *BB,
- const X86Subtarget &Subtarget) {
- DebugLoc dl = MI.getDebugLoc();
- const TargetInstrInfo *TII = Subtarget.getInstrInfo();
-
- // insert zero to ECX
- BuildMI(*BB, MI, dl, TII->get(X86::MOV32r0), X86::ECX);
-
- // insert RDPKRU instruction
- BuildMI(*BB, MI, dl, TII->get(X86::RDPKRUr));
- BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), MI.getOperand(0).getReg())
- .addReg(X86::EAX);
-
- MI.eraseFromParent(); // The pseudo is gone now.
- return BB;
-}
-
-static MachineBasicBlock *emitMonitor(MachineInstr &MI, MachineBasicBlock *BB,
- const X86Subtarget &Subtarget,
- unsigned Opc) {
- DebugLoc dl = MI.getDebugLoc();
- const TargetInstrInfo *TII = Subtarget.getInstrInfo();
- // Address into RAX/EAX, other two args into ECX, EDX.
- unsigned MemOpc = Subtarget.is64Bit() ? X86::LEA64r : X86::LEA32r;
- unsigned MemReg = Subtarget.is64Bit() ? X86::RAX : X86::EAX;
- MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(MemOpc), MemReg);
- for (int i = 0; i < X86::AddrNumOperands; ++i)
- MIB.add(MI.getOperand(i));
-
- unsigned ValOps = X86::AddrNumOperands;
- BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::ECX)
- .addReg(MI.getOperand(ValOps).getReg());
- BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::EDX)
- .addReg(MI.getOperand(ValOps + 1).getReg());
-
- // The instruction doesn't actually take any operands though.
- BuildMI(*BB, MI, dl, TII->get(Opc));
-
- MI.eraseFromParent(); // The pseudo is gone now.
- return BB;
-}
-
-static MachineBasicBlock *emitClzero(MachineInstr *MI, MachineBasicBlock *BB,
- const X86Subtarget &Subtarget) {
- DebugLoc dl = MI->getDebugLoc();
- const TargetInstrInfo *TII = Subtarget.getInstrInfo();
- // Address into RAX/EAX
- unsigned MemOpc = Subtarget.is64Bit() ? X86::LEA64r : X86::LEA32r;
- unsigned MemReg = Subtarget.is64Bit() ? X86::RAX : X86::EAX;
- MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(MemOpc), MemReg);
- for (int i = 0; i < X86::AddrNumOperands; ++i)
- MIB.add(MI->getOperand(i));
-
- // The instruction doesn't actually take any operands though.
- BuildMI(*BB, MI, dl, TII->get(X86::CLZEROr));
-
- MI->eraseFromParent(); // The pseudo is gone now.
- return BB;
-}
-
MachineBasicBlock *
@@ -27823,10 +28923,18 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI,
unsigned ArgMode = MI.getOperand(7).getImm();
unsigned Align = MI.getOperand(8).getImm();
+ MachineFunction *MF = MBB->getParent();
+
// Memory Reference
assert(MI.hasOneMemOperand() && "Expected VAARG_64 to have one memoperand");
- SmallVector<MachineMemOperand *, 1> MMOs(MI.memoperands_begin(),
- MI.memoperands_end());
+
+ MachineMemOperand *OldMMO = MI.memoperands().front();
+
+ // Clone the MMO into two separate MMOs for loading and storing
+ MachineMemOperand *LoadOnlyMMO = MF->getMachineMemOperand(
+ OldMMO, OldMMO->getFlags() & ~MachineMemOperand::MOStore);
+ MachineMemOperand *StoreOnlyMMO = MF->getMachineMemOperand(
+ OldMMO, OldMMO->getFlags() & ~MachineMemOperand::MOLoad);
// Machine Information
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
@@ -27891,7 +28999,6 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI,
OverflowDestReg = MRI.createVirtualRegister(AddrRegClass);
const BasicBlock *LLVM_BB = MBB->getBasicBlock();
- MachineFunction *MF = MBB->getParent();
overflowMBB = MF->CreateMachineBasicBlock(LLVM_BB);
offsetMBB = MF->CreateMachineBasicBlock(LLVM_BB);
endMBB = MF->CreateMachineBasicBlock(LLVM_BB);
@@ -27924,7 +29031,7 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI,
.add(Index)
.addDisp(Disp, UseFPOffset ? 4 : 0)
.add(Segment)
- .setMemRefs(MMOs);
+ .setMemRefs(LoadOnlyMMO);
// Check if there is enough room left to pull this argument.
BuildMI(thisMBB, DL, TII->get(X86::CMP32ri))
@@ -27933,8 +29040,8 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI,
// Branch to "overflowMBB" if offset >= max
// Fall through to "offsetMBB" otherwise
- BuildMI(thisMBB, DL, TII->get(X86::GetCondBranchFromCond(X86::COND_AE)))
- .addMBB(overflowMBB);
+ BuildMI(thisMBB, DL, TII->get(X86::JCC_1))
+ .addMBB(overflowMBB).addImm(X86::COND_AE);
}
// In offsetMBB, emit code to use the reg_save_area.
@@ -27949,7 +29056,7 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI,
.add(Index)
.addDisp(Disp, 16)
.add(Segment)
- .setMemRefs(MMOs);
+ .setMemRefs(LoadOnlyMMO);
// Zero-extend the offset
unsigned OffsetReg64 = MRI.createVirtualRegister(AddrRegClass);
@@ -27977,7 +29084,7 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI,
.addDisp(Disp, UseFPOffset ? 4 : 0)
.add(Segment)
.addReg(NextOffsetReg)
- .setMemRefs(MMOs);
+ .setMemRefs(StoreOnlyMMO);
// Jump to endMBB
BuildMI(offsetMBB, DL, TII->get(X86::JMP_1))
@@ -27996,7 +29103,7 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI,
.add(Index)
.addDisp(Disp, 8)
.add(Segment)
- .setMemRefs(MMOs);
+ .setMemRefs(LoadOnlyMMO);
// If we need to align it, do so. Otherwise, just copy the address
// to OverflowDestReg.
@@ -28033,7 +29140,7 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI,
.addDisp(Disp, 8)
.add(Segment)
.addReg(NextAddrReg)
- .setMemRefs(MMOs);
+ .setMemRefs(StoreOnlyMMO);
// If we branched, emit the PHI to the front of endMBB.
if (offsetMBB) {
@@ -28091,7 +29198,7 @@ MachineBasicBlock *X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
if (!Subtarget.isCallingConvWin64(F->getFunction().getCallingConv())) {
// If %al is 0, branch around the XMM save block.
BuildMI(MBB, DL, TII->get(X86::TEST8rr)).addReg(CountReg).addReg(CountReg);
- BuildMI(MBB, DL, TII->get(X86::JE_1)).addMBB(EndMBB);
+ BuildMI(MBB, DL, TII->get(X86::JCC_1)).addMBB(EndMBB).addImm(X86::COND_E);
MBB->addSuccessor(EndMBB);
}
@@ -28371,13 +29478,11 @@ X86TargetLowering::EmitLoweredCascadedSelect(MachineInstr &FirstCMOV,
// Create the conditional branch instructions.
X86::CondCode FirstCC = X86::CondCode(FirstCMOV.getOperand(3).getImm());
- unsigned Opc = X86::GetCondBranchFromCond(FirstCC);
- BuildMI(ThisMBB, DL, TII->get(Opc)).addMBB(SinkMBB);
+ BuildMI(ThisMBB, DL, TII->get(X86::JCC_1)).addMBB(SinkMBB).addImm(FirstCC);
X86::CondCode SecondCC =
X86::CondCode(SecondCascadedCMOV.getOperand(3).getImm());
- unsigned Opc2 = X86::GetCondBranchFromCond(SecondCC);
- BuildMI(FirstInsertedMBB, DL, TII->get(Opc2)).addMBB(SinkMBB);
+ BuildMI(FirstInsertedMBB, DL, TII->get(X86::JCC_1)).addMBB(SinkMBB).addImm(SecondCC);
// SinkMBB:
// %Result = phi [ %FalseValue, SecondInsertedMBB ], [ %TrueValue, ThisMBB ]
@@ -28463,20 +29568,21 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr &MI,
X86::CondCode CC = X86::CondCode(MI.getOperand(3).getImm());
X86::CondCode OppCC = X86::GetOppositeBranchCondition(CC);
MachineInstr *LastCMOV = &MI;
- MachineBasicBlock::iterator NextMIIt =
- std::next(MachineBasicBlock::iterator(MI));
+ MachineBasicBlock::iterator NextMIIt = MachineBasicBlock::iterator(MI);
// Check for case 1, where there are multiple CMOVs with the same condition
// first. Of the two cases of multiple CMOV lowerings, case 1 reduces the
// number of jumps the most.
if (isCMOVPseudo(MI)) {
- // See if we have a string of CMOVS with the same condition.
+ // See if we have a string of CMOVS with the same condition. Skip over
+ // intervening debug insts.
while (NextMIIt != ThisMBB->end() && isCMOVPseudo(*NextMIIt) &&
(NextMIIt->getOperand(3).getImm() == CC ||
NextMIIt->getOperand(3).getImm() == OppCC)) {
LastCMOV = &*NextMIIt;
++NextMIIt;
+ NextMIIt = skipDebugInstructionsForward(NextMIIt, ThisMBB->end());
}
}
@@ -28508,8 +29614,18 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr &MI,
SinkMBB->addLiveIn(X86::EFLAGS);
}
+ // Transfer any debug instructions inside the CMOV sequence to the sunk block.
+ auto DbgEnd = MachineBasicBlock::iterator(LastCMOV);
+ auto DbgIt = MachineBasicBlock::iterator(MI);
+ while (DbgIt != DbgEnd) {
+ auto Next = std::next(DbgIt);
+ if (DbgIt->isDebugInstr())
+ SinkMBB->push_back(DbgIt->removeFromParent());
+ DbgIt = Next;
+ }
+
// Transfer the remainder of ThisMBB and its successor edges to SinkMBB.
- SinkMBB->splice(SinkMBB->begin(), ThisMBB,
+ SinkMBB->splice(SinkMBB->end(), ThisMBB,
std::next(MachineBasicBlock::iterator(LastCMOV)),
ThisMBB->end());
SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB);
@@ -28522,8 +29638,7 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr &MI,
FalseMBB->addSuccessor(SinkMBB);
// Create the conditional branch instruction.
- unsigned Opc = X86::GetCondBranchFromCond(CC);
- BuildMI(ThisMBB, DL, TII->get(Opc)).addMBB(SinkMBB);
+ BuildMI(ThisMBB, DL, TII->get(X86::JCC_1)).addMBB(SinkMBB).addImm(CC);
// SinkMBB:
// %Result = phi [ %FalseValue, FalseMBB ], [ %TrueValue, ThisMBB ]
@@ -28540,53 +29655,6 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr &MI,
}
MachineBasicBlock *
-X86TargetLowering::EmitLoweredAtomicFP(MachineInstr &MI,
- MachineBasicBlock *BB) const {
- // Combine the following atomic floating-point modification pattern:
- // a.store(reg OP a.load(acquire), release)
- // Transform them into:
- // OPss (%gpr), %xmm
- // movss %xmm, (%gpr)
- // Or sd equivalent for 64-bit operations.
- unsigned MOp, FOp;
- switch (MI.getOpcode()) {
- default: llvm_unreachable("unexpected instr type for EmitLoweredAtomicFP");
- case X86::RELEASE_FADD32mr:
- FOp = X86::ADDSSrm;
- MOp = X86::MOVSSmr;
- break;
- case X86::RELEASE_FADD64mr:
- FOp = X86::ADDSDrm;
- MOp = X86::MOVSDmr;
- break;
- }
- const X86InstrInfo *TII = Subtarget.getInstrInfo();
- DebugLoc DL = MI.getDebugLoc();
- MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
- unsigned ValOpIdx = X86::AddrNumOperands;
- unsigned VSrc = MI.getOperand(ValOpIdx).getReg();
- MachineInstrBuilder MIB =
- BuildMI(*BB, MI, DL, TII->get(FOp),
- MRI.createVirtualRegister(MRI.getRegClass(VSrc)))
- .addReg(VSrc);
- for (int i = 0; i < X86::AddrNumOperands; ++i) {
- MachineOperand &Operand = MI.getOperand(i);
- // Clear any kill flags on register operands as we'll create a second
- // instruction using the same address operands.
- if (Operand.isReg())
- Operand.setIsKill(false);
- MIB.add(Operand);
- }
- MachineInstr *FOpMI = MIB;
- MIB = BuildMI(*BB, MI, DL, TII->get(MOp));
- for (int i = 0; i < X86::AddrNumOperands; ++i)
- MIB.add(MI.getOperand(i));
- MIB.addReg(FOpMI->getOperand(0).getReg(), RegState::Kill);
- MI.eraseFromParent(); // The pseudo instruction is gone now.
- return BB;
-}
-
-MachineBasicBlock *
X86TargetLowering::EmitLoweredSegAlloca(MachineInstr &MI,
MachineBasicBlock *BB) const {
MachineFunction *MF = BB->getParent();
@@ -28652,7 +29720,7 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr &MI,
BuildMI(BB, DL, TII->get(IsLP64 ? X86::CMP64mr:X86::CMP32mr))
.addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg)
.addReg(SPLimitVReg);
- BuildMI(BB, DL, TII->get(X86::JG_1)).addMBB(mallocMBB);
+ BuildMI(BB, DL, TII->get(X86::JCC_1)).addMBB(mallocMBB).addImm(X86::COND_G);
// bumpMBB simply decreases the stack pointer, since we know the current
// stacklet has enough space.
@@ -29279,7 +30347,7 @@ X86TargetLowering::emitLongJmpShadowStackFix(MachineInstr &MI,
BuildMI(checkSspMBB, DL, TII->get(TestRROpc))
.addReg(SSPCopyReg)
.addReg(SSPCopyReg);
- BuildMI(checkSspMBB, DL, TII->get(X86::JE_1)).addMBB(sinkMBB);
+ BuildMI(checkSspMBB, DL, TII->get(X86::JCC_1)).addMBB(sinkMBB).addImm(X86::COND_E);
checkSspMBB->addSuccessor(sinkMBB);
checkSspMBB->addSuccessor(fallMBB);
@@ -29309,7 +30377,7 @@ X86TargetLowering::emitLongJmpShadowStackFix(MachineInstr &MI,
.addReg(SSPCopyReg);
// Jump to sink in case PrevSSPReg <= SSPCopyReg.
- BuildMI(fallMBB, DL, TII->get(X86::JBE_1)).addMBB(sinkMBB);
+ BuildMI(fallMBB, DL, TII->get(X86::JCC_1)).addMBB(sinkMBB).addImm(X86::COND_BE);
fallMBB->addSuccessor(sinkMBB);
fallMBB->addSuccessor(fixShadowMBB);
@@ -29332,7 +30400,7 @@ X86TargetLowering::emitLongJmpShadowStackFix(MachineInstr &MI,
.addImm(8);
// Jump if the result of the shift is zero.
- BuildMI(fixShadowMBB, DL, TII->get(X86::JE_1)).addMBB(sinkMBB);
+ BuildMI(fixShadowMBB, DL, TII->get(X86::JCC_1)).addMBB(sinkMBB).addImm(X86::COND_E);
fixShadowMBB->addSuccessor(sinkMBB);
fixShadowMBB->addSuccessor(fixShadowLoopPrepareMBB);
@@ -29367,7 +30435,7 @@ X86TargetLowering::emitLongJmpShadowStackFix(MachineInstr &MI,
BuildMI(fixShadowLoopMBB, DL, TII->get(DecROpc), DecReg).addReg(CounterReg);
// Jump if the counter is not zero yet.
- BuildMI(fixShadowLoopMBB, DL, TII->get(X86::JNE_1)).addMBB(fixShadowLoopMBB);
+ BuildMI(fixShadowLoopMBB, DL, TII->get(X86::JCC_1)).addMBB(fixShadowLoopMBB).addImm(X86::COND_NE);
fixShadowLoopMBB->addSuccessor(sinkMBB);
fixShadowLoopMBB->addSuccessor(fixShadowLoopMBB);
@@ -29512,10 +30580,9 @@ X86TargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
MachineBasicBlock *BB) const {
DebugLoc DL = MI.getDebugLoc();
MachineFunction *MF = BB->getParent();
- MachineFrameInfo &MFI = MF->getFrameInfo();
MachineRegisterInfo *MRI = &MF->getRegInfo();
const X86InstrInfo *TII = Subtarget.getInstrInfo();
- int FI = MFI.getFunctionContextIndex();
+ int FI = MF->getFrameInfo().getFunctionContextIndex();
// Get a mapping of the call site numbers to all of the landing pads they're
// associated with.
@@ -29613,7 +30680,7 @@ X86TargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
BuildMI(DispatchBB, DL, TII->get(X86::CMP32ri))
.addReg(IReg)
.addImm(LPadList.size());
- BuildMI(DispatchBB, DL, TII->get(X86::JAE_1)).addMBB(TrapBB);
+ BuildMI(DispatchBB, DL, TII->get(X86::JCC_1)).addMBB(TrapBB).addImm(X86::COND_AE);
if (Subtarget.is64Bit()) {
unsigned BReg = MRI->createVirtualRegister(&X86::GR64RegClass);
@@ -29766,7 +30833,9 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case X86::TLSCall_64:
return EmitLoweredTLSCall(MI, BB);
case X86::CMOV_FR32:
+ case X86::CMOV_FR32X:
case X86::CMOV_FR64:
+ case X86::CMOV_FR64X:
case X86::CMOV_GR8:
case X86::CMOV_GR16:
case X86::CMOV_GR32:
@@ -29821,10 +30890,6 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
return BB;
}
- case X86::RELEASE_FADD32mr:
- case X86::RELEASE_FADD64mr:
- return EmitLoweredAtomicFP(MI, BB);
-
case X86::FP32_TO_INT16_IN_MEM:
case X86::FP32_TO_INT32_IN_MEM:
case X86::FP32_TO_INT64_IN_MEM:
@@ -29836,27 +30901,37 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case X86::FP80_TO_INT64_IN_MEM: {
// Change the floating point control register to use "round towards zero"
// mode when truncating to an integer value.
- int CWFrameIdx = MF->getFrameInfo().CreateStackObject(2, 2, false);
+ int OrigCWFrameIdx = MF->getFrameInfo().CreateStackObject(2, 2, false);
addFrameReference(BuildMI(*BB, MI, DL,
- TII->get(X86::FNSTCW16m)), CWFrameIdx);
+ TII->get(X86::FNSTCW16m)), OrigCWFrameIdx);
- // Load the old value of the high byte of the control word...
+ // Load the old value of the control word...
unsigned OldCW =
+ MF->getRegInfo().createVirtualRegister(&X86::GR32RegClass);
+ addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOVZX32rm16), OldCW),
+ OrigCWFrameIdx);
+
+ // OR 0b11 into bit 10 and 11. 0b11 is the encoding for round toward zero.
+ unsigned NewCW =
+ MF->getRegInfo().createVirtualRegister(&X86::GR32RegClass);
+ BuildMI(*BB, MI, DL, TII->get(X86::OR32ri), NewCW)
+ .addReg(OldCW, RegState::Kill).addImm(0xC00);
+
+ // Extract to 16 bits.
+ unsigned NewCW16 =
MF->getRegInfo().createVirtualRegister(&X86::GR16RegClass);
- addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16rm), OldCW),
- CWFrameIdx);
+ BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), NewCW16)
+ .addReg(NewCW, RegState::Kill, X86::sub_16bit);
- // Set the high part to be round to zero...
- addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16mi)), CWFrameIdx)
- .addImm(0xC7F);
+ // Prepare memory for FLDCW.
+ int NewCWFrameIdx = MF->getFrameInfo().CreateStackObject(2, 2, false);
+ addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16mr)),
+ NewCWFrameIdx)
+ .addReg(NewCW16, RegState::Kill);
// Reload the modified control word now...
addFrameReference(BuildMI(*BB, MI, DL,
- TII->get(X86::FLDCW16m)), CWFrameIdx);
-
- // Restore the memory image of control word to original value
- addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16mr)), CWFrameIdx)
- .addReg(OldCW);
+ TII->get(X86::FLDCW16m)), NewCWFrameIdx);
// Get the X86 opcode to use.
unsigned Opc;
@@ -29879,26 +30954,12 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
// Reload the original control word now.
addFrameReference(BuildMI(*BB, MI, DL,
- TII->get(X86::FLDCW16m)), CWFrameIdx);
+ TII->get(X86::FLDCW16m)), OrigCWFrameIdx);
MI.eraseFromParent(); // The pseudo instruction is gone now.
return BB;
}
- // Thread synchronization.
- case X86::MONITOR:
- return emitMonitor(MI, BB, Subtarget, X86::MONITORrrr);
- case X86::MONITORX:
- return emitMonitor(MI, BB, Subtarget, X86::MONITORXrrr);
-
- // Cache line zero
- case X86::CLZERO:
- return emitClzero(&MI, BB, Subtarget);
-
- // PKU feature
- case X86::WRPKRU:
- return emitWRPKRU(MI, BB, Subtarget);
- case X86::RDPKRU:
- return emitRDPKRU(MI, BB, Subtarget);
+
// xbegin
case X86::XBEGIN:
return emitXBegin(MI, BB, Subtarget.getInstrInfo());
@@ -30093,7 +31154,7 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
APInt DemandedElt = APInt::getOneBitSet(SrcVT.getVectorNumElements(),
Op.getConstantOperandVal(1));
Known = DAG.computeKnownBits(Src, DemandedElt, Depth + 1);
- Known = Known.zextOrTrunc(BitWidth);
+ Known = Known.zextOrTrunc(BitWidth, false);
Known.Zero.setBitsFrom(SrcVT.getScalarSizeInBits());
break;
}
@@ -30150,6 +31211,27 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
Known = Known.trunc(BitWidth);
break;
}
+ case X86ISD::ANDNP: {
+ KnownBits Known2;
+ Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known2 = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+
+ // ANDNP = (~X & Y);
+ Known.One &= Known2.Zero;
+ Known.Zero |= Known2.One;
+ break;
+ }
+ case X86ISD::FOR: {
+ KnownBits Known2;
+ Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known2 = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+
+ // Output known-0 bits are only known if clear in both the LHS & RHS.
+ Known.Zero &= Known2.Zero;
+ // Output known-1 are known to be set if set in either the LHS | RHS.
+ Known.One |= Known2.One;
+ break;
+ }
case X86ISD::CMOV: {
Known = DAG.computeKnownBits(Op.getOperand(1), Depth+1);
// If we don't know any bits, early out.
@@ -30219,7 +31301,8 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode(
SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
unsigned Depth) const {
- unsigned VTBits = Op.getScalarValueSizeInBits();
+ EVT VT = Op.getValueType();
+ unsigned VTBits = VT.getScalarSizeInBits();
unsigned Opcode = Op.getOpcode();
switch (Opcode) {
case X86ISD::SETCC_CARRY:
@@ -30257,7 +31340,7 @@ unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode(
case X86ISD::VSHLI: {
SDValue Src = Op.getOperand(0);
- APInt ShiftVal = cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue();
+ const APInt &ShiftVal = Op.getConstantOperandAPInt(1);
if (ShiftVal.uge(VTBits))
return VTBits; // Shifted all bits out --> zero.
unsigned Tmp = DAG.ComputeNumSignBits(Src, DemandedElts, Depth + 1);
@@ -30268,7 +31351,7 @@ unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode(
case X86ISD::VSRAI: {
SDValue Src = Op.getOperand(0);
- APInt ShiftVal = cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue();
+ APInt ShiftVal = Op.getConstantOperandAPInt(1);
if (ShiftVal.uge(VTBits - 1))
return VTBits; // Sign splat.
unsigned Tmp = DAG.ComputeNumSignBits(Src, DemandedElts, Depth + 1);
@@ -30284,6 +31367,15 @@ unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode(
// Vector compares return zero/all-bits result values.
return VTBits;
+ case X86ISD::ANDNP: {
+ unsigned Tmp0 =
+ DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ if (Tmp0 == 1) return 1; // Early out.
+ unsigned Tmp1 =
+ DAG.ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ return std::min(Tmp0, Tmp1);
+ }
+
case X86ISD::CMOV: {
unsigned Tmp0 = DAG.ComputeNumSignBits(Op.getOperand(0), Depth+1);
if (Tmp0 == 1) return 1; // Early out.
@@ -30292,6 +31384,54 @@ unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode(
}
}
+ // Handle target shuffles.
+ // TODO - use resolveTargetShuffleInputs once we can limit recursive depth.
+ if (isTargetShuffle(Opcode)) {
+ bool IsUnary;
+ SmallVector<int, 64> Mask;
+ SmallVector<SDValue, 2> Ops;
+ if (getTargetShuffleMask(Op.getNode(), VT.getSimpleVT(), true, Ops, Mask,
+ IsUnary)) {
+ unsigned NumOps = Ops.size();
+ unsigned NumElts = VT.getVectorNumElements();
+ if (Mask.size() == NumElts) {
+ SmallVector<APInt, 2> DemandedOps(NumOps, APInt(NumElts, 0));
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (!DemandedElts[i])
+ continue;
+ int M = Mask[i];
+ if (M == SM_SentinelUndef) {
+ // For UNDEF elements, we don't know anything about the common state
+ // of the shuffle result.
+ return 1;
+ } else if (M == SM_SentinelZero) {
+ // Zero = all sign bits.
+ continue;
+ }
+ assert(0 <= M && (unsigned)M < (NumOps * NumElts) &&
+ "Shuffle index out of range");
+
+ unsigned OpIdx = (unsigned)M / NumElts;
+ unsigned EltIdx = (unsigned)M % NumElts;
+ if (Ops[OpIdx].getValueType() != VT) {
+ // TODO - handle target shuffle ops with different value types.
+ return 1;
+ }
+ DemandedOps[OpIdx].setBit(EltIdx);
+ }
+ unsigned Tmp0 = VTBits;
+ for (unsigned i = 0; i != NumOps && Tmp0 > 1; ++i) {
+ if (!DemandedOps[i])
+ continue;
+ unsigned Tmp1 =
+ DAG.ComputeNumSignBits(Ops[i], DemandedOps[i], Depth + 1);
+ Tmp0 = std::min(Tmp0, Tmp1);
+ }
+ return Tmp0;
+ }
+ }
+ }
+
// Fallback case.
return 1;
}
@@ -30305,12 +31445,11 @@ SDValue X86TargetLowering::unwrapAddress(SDValue N) const {
// Attempt to match a combined shuffle mask against supported unary shuffle
// instructions.
// TODO: Investigate sharing more of this with shuffle lowering.
-static bool matchUnaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
- bool AllowFloatDomain, bool AllowIntDomain,
- SDValue &V1, const SDLoc &DL,
- SelectionDAG &DAG,
- const X86Subtarget &Subtarget,
- unsigned &Shuffle, MVT &SrcVT, MVT &DstVT) {
+static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
+ bool AllowFloatDomain, bool AllowIntDomain,
+ SDValue &V1, const SDLoc &DL, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget, unsigned &Shuffle,
+ MVT &SrcVT, MVT &DstVT) {
unsigned NumMaskElts = Mask.size();
unsigned MaskEltSize = MaskVT.getScalarSizeInBits();
@@ -30322,19 +31461,25 @@ static bool matchUnaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
return true;
}
- // Match against a ZERO_EXTEND_VECTOR_INREG/VZEXT instruction.
+ // Match against a ANY/ZERO_EXTEND_VECTOR_INREG instruction.
// TODO: Add 512-bit vector support (split AVX512F and AVX512BW).
if (AllowIntDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSE41()) ||
(MaskVT.is256BitVector() && Subtarget.hasInt256()))) {
unsigned MaxScale = 64 / MaskEltSize;
for (unsigned Scale = 2; Scale <= MaxScale; Scale *= 2) {
- bool Match = true;
+ bool MatchAny = true;
+ bool MatchZero = true;
unsigned NumDstElts = NumMaskElts / Scale;
- for (unsigned i = 0; i != NumDstElts && Match; ++i) {
- Match &= isUndefOrEqual(Mask[i * Scale], (int)i);
- Match &= isUndefOrZeroInRange(Mask, (i * Scale) + 1, Scale - 1);
+ for (unsigned i = 0; i != NumDstElts && (MatchAny || MatchZero); ++i) {
+ if (!isUndefOrEqual(Mask[i * Scale], (int)i)) {
+ MatchAny = MatchZero = false;
+ break;
+ }
+ MatchAny &= isUndefInRange(Mask, (i * Scale) + 1, Scale - 1);
+ MatchZero &= isUndefOrZeroInRange(Mask, (i * Scale) + 1, Scale - 1);
}
- if (Match) {
+ if (MatchAny || MatchZero) {
+ assert(MatchZero && "Failed to match zext but matched aext?");
unsigned SrcSize = std::max(128u, NumDstElts * MaskEltSize);
MVT ScalarTy = MaskVT.isInteger() ? MaskVT.getScalarType() :
MVT::getIntegerVT(MaskEltSize);
@@ -30343,10 +31488,9 @@ static bool matchUnaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
if (SrcVT.getSizeInBits() != MaskVT.getSizeInBits())
V1 = extractSubVector(V1, 0, DAG, DL, SrcSize);
- if (SrcVT.getVectorNumElements() == NumDstElts)
- Shuffle = unsigned(ISD::ZERO_EXTEND);
- else
- Shuffle = unsigned(ISD::ZERO_EXTEND_VECTOR_INREG);
+ Shuffle = unsigned(MatchAny ? ISD::ANY_EXTEND : ISD::ZERO_EXTEND);
+ if (SrcVT.getVectorNumElements() != NumDstElts)
+ Shuffle = getOpcode_EXTEND_VECTOR_INREG(Shuffle);
DstVT = MVT::getIntegerVT(Scale * MaskEltSize);
DstVT = MVT::getVectorVT(DstVT, NumDstElts);
@@ -30368,7 +31512,7 @@ static bool matchUnaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
// instructions are no slower than UNPCKLPD but has the option to
// fold the input operand into even an unaligned memory load.
if (MaskVT.is128BitVector() && Subtarget.hasSSE3() && AllowFloatDomain) {
- if (!Subtarget.hasAVX2() && isTargetShuffleEquivalent(Mask, {0, 0})) {
+ if (isTargetShuffleEquivalent(Mask, {0, 0})) {
Shuffle = X86ISD::MOVDDUP;
SrcVT = DstVT = MVT::v2f64;
return true;
@@ -30426,29 +31570,18 @@ static bool matchUnaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
}
}
- // Attempt to match against broadcast-from-vector.
- if (Subtarget.hasAVX2()) {
- SmallVector<int, 64> BroadcastMask(NumMaskElts, 0);
- if (isTargetShuffleEquivalent(Mask, BroadcastMask)) {
- SrcVT = DstVT = MaskVT;
- Shuffle = X86ISD::VBROADCAST;
- return true;
- }
- }
-
return false;
}
// Attempt to match a combined shuffle mask against supported unary immediate
// permute instructions.
// TODO: Investigate sharing more of this with shuffle lowering.
-static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
- const APInt &Zeroable,
- bool AllowFloatDomain,
- bool AllowIntDomain,
- const X86Subtarget &Subtarget,
- unsigned &Shuffle, MVT &ShuffleVT,
- unsigned &PermuteImm) {
+static bool matchUnaryPermuteShuffle(MVT MaskVT, ArrayRef<int> Mask,
+ const APInt &Zeroable,
+ bool AllowFloatDomain, bool AllowIntDomain,
+ const X86Subtarget &Subtarget,
+ unsigned &Shuffle, MVT &ShuffleVT,
+ unsigned &PermuteImm) {
unsigned NumMaskElts = Mask.size();
unsigned InputSizeInBits = MaskVT.getSizeInBits();
unsigned MaskScalarSizeInBits = InputSizeInBits / NumMaskElts;
@@ -30549,9 +31682,8 @@ static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
// FIXME: Add 512-bit support.
if (AllowIntDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSE2()) ||
(MaskVT.is256BitVector() && Subtarget.hasAVX2()))) {
- int ShiftAmt = matchVectorShuffleAsShift(ShuffleVT, Shuffle,
- MaskScalarSizeInBits, Mask,
- 0, Zeroable, Subtarget);
+ int ShiftAmt = matchShuffleAsShift(ShuffleVT, Shuffle, MaskScalarSizeInBits,
+ Mask, 0, Zeroable, Subtarget);
if (0 < ShiftAmt) {
PermuteImm = (unsigned)ShiftAmt;
return true;
@@ -30564,13 +31696,12 @@ static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
// Attempt to match a combined unary shuffle mask against supported binary
// shuffle instructions.
// TODO: Investigate sharing more of this with shuffle lowering.
-static bool matchBinaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
- bool AllowFloatDomain, bool AllowIntDomain,
- SDValue &V1, SDValue &V2, const SDLoc &DL,
- SelectionDAG &DAG,
- const X86Subtarget &Subtarget,
- unsigned &Shuffle, MVT &SrcVT, MVT &DstVT,
- bool IsUnary) {
+static bool matchBinaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
+ bool AllowFloatDomain, bool AllowIntDomain,
+ SDValue &V1, SDValue &V2, const SDLoc &DL,
+ SelectionDAG &DAG, const X86Subtarget &Subtarget,
+ unsigned &Shuffle, MVT &SrcVT, MVT &DstVT,
+ bool IsUnary) {
unsigned EltSizeInBits = MaskVT.getScalarSizeInBits();
if (MaskVT.is128BitVector()) {
@@ -30631,7 +31762,7 @@ static bool matchBinaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
return false;
}
-static bool matchBinaryPermuteVectorShuffle(
+static bool matchBinaryPermuteShuffle(
MVT MaskVT, ArrayRef<int> Mask, const APInt &Zeroable,
bool AllowFloatDomain, bool AllowIntDomain, SDValue &V1, SDValue &V2,
const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget,
@@ -30642,7 +31773,7 @@ static bool matchBinaryPermuteVectorShuffle(
// Attempt to match against PALIGNR byte rotate.
if (AllowIntDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSSE3()) ||
(MaskVT.is256BitVector() && Subtarget.hasAVX2()))) {
- int ByteRotation = matchVectorShuffleAsByteRotate(MaskVT, V1, V2, Mask);
+ int ByteRotation = matchShuffleAsByteRotate(MaskVT, V1, V2, Mask);
if (0 < ByteRotation) {
Shuffle = X86ISD::PALIGNR;
ShuffleVT = MVT::getVectorVT(MVT::i8, MaskVT.getSizeInBits() / 8);
@@ -30678,34 +31809,11 @@ static bool matchBinaryPermuteVectorShuffle(
return true;
}
} else {
- // Determine a type compatible with X86ISD::BLENDI.
- ShuffleVT = MaskVT;
- if (Subtarget.hasAVX2()) {
- if (ShuffleVT == MVT::v4i64)
- ShuffleVT = MVT::v8i32;
- else if (ShuffleVT == MVT::v2i64)
- ShuffleVT = MVT::v4i32;
- } else {
- if (ShuffleVT == MVT::v2i64 || ShuffleVT == MVT::v4i32)
- ShuffleVT = MVT::v8i16;
- else if (ShuffleVT == MVT::v4i64)
- ShuffleVT = MVT::v4f64;
- else if (ShuffleVT == MVT::v8i32)
- ShuffleVT = MVT::v8f32;
- }
-
- if (!ShuffleVT.isFloatingPoint()) {
- int Scale = EltSizeInBits / ShuffleVT.getScalarSizeInBits();
- BlendMask =
- scaleVectorShuffleBlendMask(BlendMask, NumMaskElts, Scale);
- ShuffleVT = MVT::getIntegerVT(EltSizeInBits / Scale);
- ShuffleVT = MVT::getVectorVT(ShuffleVT, NumMaskElts * Scale);
- }
-
V1 = ForceV1Zero ? getZeroVector(MaskVT, Subtarget, DAG, DL) : V1;
V2 = ForceV2Zero ? getZeroVector(MaskVT, Subtarget, DAG, DL) : V2;
PermuteImm = (unsigned)BlendMask;
Shuffle = X86ISD::BLENDI;
+ ShuffleVT = MaskVT;
return true;
}
}
@@ -30715,7 +31823,7 @@ static bool matchBinaryPermuteVectorShuffle(
if (AllowFloatDomain && EltSizeInBits == 32 && Subtarget.hasSSE41() &&
MaskVT.is128BitVector()) {
if (Zeroable.getBoolValue() &&
- matchVectorShuffleAsInsertPS(V1, V2, PermuteImm, Zeroable, Mask, DAG)) {
+ matchShuffleAsInsertPS(V1, V2, PermuteImm, Zeroable, Mask, DAG)) {
Shuffle = X86ISD::INSERTPS;
ShuffleVT = MVT::v4f32;
return true;
@@ -30727,7 +31835,7 @@ static bool matchBinaryPermuteVectorShuffle(
((MaskVT.is128BitVector() && Subtarget.hasSSE2()) ||
(MaskVT.is256BitVector() && Subtarget.hasAVX()) ||
(MaskVT.is512BitVector() && Subtarget.hasAVX512()))) {
- if (matchVectorShuffleWithSHUFPD(MaskVT, V1, V2, PermuteImm, Mask)) {
+ if (matchShuffleWithSHUFPD(MaskVT, V1, V2, PermuteImm, Mask)) {
Shuffle = X86ISD::SHUFP;
ShuffleVT = MVT::getVectorVT(MVT::f64, MaskVT.getSizeInBits() / 64);
return true;
@@ -30784,6 +31892,11 @@ static bool matchBinaryPermuteVectorShuffle(
return false;
}
+static SDValue combineX86ShuffleChainWithExtract(
+ ArrayRef<SDValue> Inputs, SDValue Root, ArrayRef<int> BaseMask, int Depth,
+ bool HasVariableMask, bool AllowVariableMask, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget);
+
/// Combine an arbitrary chain of shuffles into a single instruction if
/// possible.
///
@@ -30841,6 +31954,24 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
bool IsEVEXShuffle =
RootSizeInBits == 512 || (Subtarget.hasVLX() && RootSizeInBits >= 128);
+ // Attempt to match a subvector broadcast.
+ // shuffle(insert_subvector(undef, sub, 0), undef, 0, 0, 0, 0)
+ if (UnaryShuffle &&
+ (BaseMaskEltSizeInBits == 128 || BaseMaskEltSizeInBits == 256)) {
+ SmallVector<int, 64> BroadcastMask(NumBaseMaskElts, 0);
+ if (isTargetShuffleEquivalent(BaseMask, BroadcastMask)) {
+ SDValue Src = Inputs[0];
+ if (Src.getOpcode() == ISD::INSERT_SUBVECTOR &&
+ Src.getOperand(0).isUndef() &&
+ Src.getOperand(1).getValueSizeInBits() == BaseMaskEltSizeInBits &&
+ MayFoldLoad(Src.getOperand(1)) && isNullConstant(Src.getOperand(2))) {
+ return DAG.getBitcast(RootVT, DAG.getNode(X86ISD::SUBV_BROADCAST, DL,
+ Src.getValueType(),
+ Src.getOperand(1)));
+ }
+ }
+ }
+
// TODO - handle 128/256-bit lane shuffles of 512-bit vectors.
// Handle 128-bit lane shuffles of 256-bit vectors.
@@ -30894,6 +32025,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
// Which shuffle domains are permitted?
// Permit domain crossing at higher combine depths.
+ // TODO: Should we indicate which domain is preferred if both are allowed?
bool AllowFloatDomain = FloatDomain || (Depth > 3);
bool AllowIntDomain = (!FloatDomain || (Depth > 3)) && Subtarget.hasSSE2() &&
(!MaskVT.is256BitVector() || Subtarget.hasAVX2());
@@ -30909,8 +32041,11 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
// directly if we don't shuffle the lower element and we shuffle the upper
// (zero) elements within themselves.
if (V1.getOpcode() == X86ISD::VZEXT_LOAD &&
- (V1.getScalarValueSizeInBits() % MaskEltSizeInBits) == 0) {
- unsigned Scale = V1.getScalarValueSizeInBits() / MaskEltSizeInBits;
+ (cast<MemIntrinsicSDNode>(V1)->getMemoryVT().getScalarSizeInBits() %
+ MaskEltSizeInBits) == 0) {
+ unsigned Scale =
+ cast<MemIntrinsicSDNode>(V1)->getMemoryVT().getScalarSizeInBits() /
+ MaskEltSizeInBits;
ArrayRef<int> HiMask(Mask.data() + Scale, NumMaskElts - Scale);
if (isSequentialOrUndefInRange(Mask, 0, Scale, 0) &&
isUndefOrZeroOrInRange(HiMask, Scale, NumMaskElts)) {
@@ -30918,10 +32053,35 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
}
}
+ // Attempt to match against broadcast-from-vector.
+ // Limit AVX1 to cases where we're loading+broadcasting a scalar element.
+ if ((Subtarget.hasAVX2() || (Subtarget.hasAVX() && 32 <= MaskEltSizeInBits))
+ && (!IsEVEXShuffle || NumRootElts == NumMaskElts)) {
+ SmallVector<int, 64> BroadcastMask(NumMaskElts, 0);
+ if (isTargetShuffleEquivalent(Mask, BroadcastMask)) {
+ if (V1.getValueType() == MaskVT &&
+ V1.getOpcode() == ISD::SCALAR_TO_VECTOR &&
+ MayFoldLoad(V1.getOperand(0))) {
+ if (Depth == 1 && Root.getOpcode() == X86ISD::VBROADCAST)
+ return SDValue(); // Nothing to do!
+ Res = V1.getOperand(0);
+ Res = DAG.getNode(X86ISD::VBROADCAST, DL, MaskVT, Res);
+ return DAG.getBitcast(RootVT, Res);
+ }
+ if (Subtarget.hasAVX2()) {
+ if (Depth == 1 && Root.getOpcode() == X86ISD::VBROADCAST)
+ return SDValue(); // Nothing to do!
+ Res = DAG.getBitcast(MaskVT, V1);
+ Res = DAG.getNode(X86ISD::VBROADCAST, DL, MaskVT, Res);
+ return DAG.getBitcast(RootVT, Res);
+ }
+ }
+ }
+
SDValue NewV1 = V1; // Save operand in case early exit happens.
- if (matchUnaryVectorShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain,
- NewV1, DL, DAG, Subtarget, Shuffle,
- ShuffleSrcVT, ShuffleVT) &&
+ if (matchUnaryShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain, NewV1,
+ DL, DAG, Subtarget, Shuffle, ShuffleSrcVT,
+ ShuffleVT) &&
(!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {
if (Depth == 1 && Root.getOpcode() == Shuffle)
return SDValue(); // Nothing to do!
@@ -30930,9 +32090,9 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
return DAG.getBitcast(RootVT, Res);
}
- if (matchUnaryPermuteVectorShuffle(MaskVT, Mask, Zeroable, AllowFloatDomain,
- AllowIntDomain, Subtarget, Shuffle,
- ShuffleVT, PermuteImm) &&
+ if (matchUnaryPermuteShuffle(MaskVT, Mask, Zeroable, AllowFloatDomain,
+ AllowIntDomain, Subtarget, Shuffle, ShuffleVT,
+ PermuteImm) &&
(!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {
if (Depth == 1 && Root.getOpcode() == Shuffle)
return SDValue(); // Nothing to do!
@@ -30945,9 +32105,9 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
SDValue NewV1 = V1; // Save operands in case early exit happens.
SDValue NewV2 = V2;
- if (matchBinaryVectorShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain,
- NewV1, NewV2, DL, DAG, Subtarget, Shuffle,
- ShuffleSrcVT, ShuffleVT, UnaryShuffle) &&
+ if (matchBinaryShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain, NewV1,
+ NewV2, DL, DAG, Subtarget, Shuffle, ShuffleSrcVT,
+ ShuffleVT, UnaryShuffle) &&
(!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {
if (Depth == 1 && Root.getOpcode() == Shuffle)
return SDValue(); // Nothing to do!
@@ -30959,7 +32119,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
NewV1 = V1; // Save operands in case early exit happens.
NewV2 = V2;
- if (matchBinaryPermuteVectorShuffle(
+ if (matchBinaryPermuteShuffle(
MaskVT, Mask, Zeroable, AllowFloatDomain, AllowIntDomain, NewV1,
NewV2, DL, DAG, Subtarget, Shuffle, ShuffleVT, PermuteImm) &&
(!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {
@@ -30979,8 +32139,8 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
// Annoyingly, SSE4A instructions don't map into the above match helpers.
if (Subtarget.hasSSE4A() && AllowIntDomain && RootSizeInBits == 128) {
uint64_t BitLen, BitIdx;
- if (matchVectorShuffleAsEXTRQ(IntMaskVT, V1, V2, Mask, BitLen, BitIdx,
- Zeroable)) {
+ if (matchShuffleAsEXTRQ(IntMaskVT, V1, V2, Mask, BitLen, BitIdx,
+ Zeroable)) {
if (Depth == 1 && Root.getOpcode() == X86ISD::EXTRQI)
return SDValue(); // Nothing to do!
V1 = DAG.getBitcast(IntMaskVT, V1);
@@ -30990,7 +32150,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
return DAG.getBitcast(RootVT, Res);
}
- if (matchVectorShuffleAsINSERTQ(IntMaskVT, V1, V2, Mask, BitLen, BitIdx)) {
+ if (matchShuffleAsINSERTQ(IntMaskVT, V1, V2, Mask, BitLen, BitIdx)) {
if (Depth == 1 && Root.getOpcode() == X86ISD::INSERTQI)
return SDValue(); // Nothing to do!
V1 = DAG.getBitcast(IntMaskVT, V1);
@@ -31057,6 +32217,13 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
return DAG.getBitcast(RootVT, Res);
}
+ // If that failed and either input is extracted then try to combine as a
+ // shuffle with the larger type.
+ if (SDValue WideShuffle = combineX86ShuffleChainWithExtract(
+ Inputs, Root, BaseMask, Depth, HasVariableMask, AllowVariableMask,
+ DAG, Subtarget))
+ return WideShuffle;
+
// If we have a dual input lane-crossing shuffle then lower to VPERMV3.
if (AllowVariableMask && !MaskContainsZeros &&
((Subtarget.hasAVX512() &&
@@ -31222,10 +32389,145 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
return DAG.getBitcast(RootVT, Res);
}
+ // If that failed and either input is extracted then try to combine as a
+ // shuffle with the larger type.
+ if (SDValue WideShuffle = combineX86ShuffleChainWithExtract(
+ Inputs, Root, BaseMask, Depth, HasVariableMask, AllowVariableMask,
+ DAG, Subtarget))
+ return WideShuffle;
+
+ // If we have a dual input shuffle then lower to VPERMV3.
+ if (!UnaryShuffle && AllowVariableMask && !MaskContainsZeros &&
+ ((Subtarget.hasAVX512() &&
+ (MaskVT == MVT::v8f64 || MaskVT == MVT::v8i64 ||
+ MaskVT == MVT::v16f32 || MaskVT == MVT::v16i32)) ||
+ (Subtarget.hasVLX() &&
+ (MaskVT == MVT::v2f64 || MaskVT == MVT::v2i64 || MaskVT == MVT::v4f64 ||
+ MaskVT == MVT::v4i64 || MaskVT == MVT::v4f32 || MaskVT == MVT::v4i32 ||
+ MaskVT == MVT::v8f32 || MaskVT == MVT::v8i32)) ||
+ (Subtarget.hasBWI() && MaskVT == MVT::v32i16) ||
+ (Subtarget.hasBWI() && Subtarget.hasVLX() &&
+ (MaskVT == MVT::v8i16 || MaskVT == MVT::v16i16)) ||
+ (Subtarget.hasVBMI() && MaskVT == MVT::v64i8) ||
+ (Subtarget.hasVBMI() && Subtarget.hasVLX() &&
+ (MaskVT == MVT::v16i8 || MaskVT == MVT::v32i8)))) {
+ SDValue VPermMask = getConstVector(Mask, IntMaskVT, DAG, DL, true);
+ V1 = DAG.getBitcast(MaskVT, V1);
+ V2 = DAG.getBitcast(MaskVT, V2);
+ Res = DAG.getNode(X86ISD::VPERMV3, DL, MaskVT, V1, VPermMask, V2);
+ return DAG.getBitcast(RootVT, Res);
+ }
+
// Failed to find any combines.
return SDValue();
}
+// Combine an arbitrary chain of shuffles + extract_subvectors into a single
+// instruction if possible.
+//
+// Wrapper for combineX86ShuffleChain that extends the shuffle mask to a larger
+// type size to attempt to combine:
+// shuffle(extract_subvector(x,c1),extract_subvector(y,c2),m1)
+// -->
+// extract_subvector(shuffle(x,y,m2),0)
+static SDValue combineX86ShuffleChainWithExtract(
+ ArrayRef<SDValue> Inputs, SDValue Root, ArrayRef<int> BaseMask, int Depth,
+ bool HasVariableMask, bool AllowVariableMask, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ unsigned NumMaskElts = BaseMask.size();
+ unsigned NumInputs = Inputs.size();
+ if (NumInputs == 0)
+ return SDValue();
+
+ SmallVector<SDValue, 4> WideInputs(Inputs.begin(), Inputs.end());
+ SmallVector<unsigned, 4> Offsets(NumInputs, 0);
+
+ // Peek through subvectors.
+ // TODO: Support inter-mixed EXTRACT_SUBVECTORs + BITCASTs?
+ unsigned WideSizeInBits = WideInputs[0].getValueSizeInBits();
+ for (unsigned i = 0; i != NumInputs; ++i) {
+ SDValue &Src = WideInputs[i];
+ unsigned &Offset = Offsets[i];
+ Src = peekThroughBitcasts(Src);
+ EVT BaseVT = Src.getValueType();
+ while (Src.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ isa<ConstantSDNode>(Src.getOperand(1))) {
+ Offset += Src.getConstantOperandVal(1);
+ Src = Src.getOperand(0);
+ }
+ WideSizeInBits = std::max(WideSizeInBits, Src.getValueSizeInBits());
+ assert((Offset % BaseVT.getVectorNumElements()) == 0 &&
+ "Unexpected subvector extraction");
+ Offset /= BaseVT.getVectorNumElements();
+ Offset *= NumMaskElts;
+ }
+
+ // Bail if we're always extracting from the lowest subvectors,
+ // combineX86ShuffleChain should match this for the current width.
+ if (llvm::all_of(Offsets, [](unsigned Offset) { return Offset == 0; }))
+ return SDValue();
+
+ EVT RootVT = Root.getValueType();
+ unsigned RootSizeInBits = RootVT.getSizeInBits();
+ unsigned Scale = WideSizeInBits / RootSizeInBits;
+ assert((WideSizeInBits % RootSizeInBits) == 0 &&
+ "Unexpected subvector extraction");
+
+ // If the src vector types aren't the same, see if we can extend
+ // them to match each other.
+ // TODO: Support different scalar types?
+ EVT WideSVT = WideInputs[0].getValueType().getScalarType();
+ if (llvm::any_of(WideInputs, [&WideSVT, &DAG](SDValue Op) {
+ return !DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()) ||
+ Op.getValueType().getScalarType() != WideSVT;
+ }))
+ return SDValue();
+
+ for (SDValue &NewInput : WideInputs) {
+ assert((WideSizeInBits % NewInput.getValueSizeInBits()) == 0 &&
+ "Shuffle vector size mismatch");
+ if (WideSizeInBits > NewInput.getValueSizeInBits())
+ NewInput = widenSubVector(NewInput, false, Subtarget, DAG,
+ SDLoc(NewInput), WideSizeInBits);
+ assert(WideSizeInBits == NewInput.getValueSizeInBits() &&
+ "Unexpected subvector extraction");
+ }
+
+ // Create new mask for larger type.
+ for (unsigned i = 1; i != NumInputs; ++i)
+ Offsets[i] += i * Scale * NumMaskElts;
+
+ SmallVector<int, 64> WideMask(BaseMask.begin(), BaseMask.end());
+ for (int &M : WideMask) {
+ if (M < 0)
+ continue;
+ M = (M % NumMaskElts) + Offsets[M / NumMaskElts];
+ }
+ WideMask.append((Scale - 1) * NumMaskElts, SM_SentinelUndef);
+
+ // Remove unused/repeated shuffle source ops.
+ resolveTargetShuffleInputsAndMask(WideInputs, WideMask);
+ assert(!WideInputs.empty() && "Shuffle with no inputs detected");
+
+ if (WideInputs.size() > 2)
+ return SDValue();
+
+ // Increase depth for every upper subvector we've peeked through.
+ Depth += count_if(Offsets, [](unsigned Offset) { return Offset > 0; });
+
+ // Attempt to combine wider chain.
+ // TODO: Can we use a better Root?
+ SDValue WideRoot = WideInputs[0];
+ if (SDValue WideShuffle = combineX86ShuffleChain(
+ WideInputs, WideRoot, WideMask, Depth, HasVariableMask,
+ AllowVariableMask, DAG, Subtarget)) {
+ WideShuffle =
+ extractSubVector(WideShuffle, 0, DAG, SDLoc(Root), RootSizeInBits);
+ return DAG.getBitcast(RootVT, WideShuffle);
+ }
+ return SDValue();
+}
+
// Attempt to constant fold all of the constant source ops.
// Returns true if the entire shuffle is folded to a constant.
// TODO: Extend this to merge multiple constant Ops and update the mask.
@@ -31370,19 +32672,10 @@ static SDValue combineX86ShufflesRecursively(
if (!resolveTargetShuffleInputs(Op, OpInputs, OpMask, DAG))
return SDValue();
- // TODO - Add support for more than 2 inputs.
- if (2 < OpInputs.size())
- return SDValue();
-
- SDValue Input0 = (OpInputs.size() > 0 ? OpInputs[0] : SDValue());
- SDValue Input1 = (OpInputs.size() > 1 ? OpInputs[1] : SDValue());
-
// Add the inputs to the Ops list, avoiding duplicates.
SmallVector<SDValue, 16> Ops(SrcOps.begin(), SrcOps.end());
auto AddOp = [&Ops](SDValue Input, int InsertionPoint) -> int {
- if (!Input)
- return -1;
// Attempt to find an existing match.
SDValue InputBC = peekThroughBitcasts(Input);
for (int i = 0, e = Ops.size(); i < e; ++i)
@@ -31398,8 +32691,9 @@ static SDValue combineX86ShufflesRecursively(
return Ops.size() - 1;
};
- int InputIdx0 = AddOp(Input0, SrcOpIndex);
- int InputIdx1 = AddOp(Input1, -1);
+ SmallVector<int, 2> OpInputIdx;
+ for (SDValue OpInput : OpInputs)
+ OpInputIdx.push_back(AddOp(OpInput, OpInputIdx.empty() ? SrcOpIndex : -1));
assert(((RootMask.size() > OpMask.size() &&
RootMask.size() % OpMask.size() == 0) ||
@@ -31471,13 +32765,9 @@ static SDValue combineX86ShufflesRecursively(
: (OpMask[OpIdx] << OpRatioLog2) + (RootMaskedIdx & (OpRatio - 1));
OpMaskedIdx = OpMaskedIdx & (MaskWidth - 1);
- if (OpMask[OpIdx] < (int)OpMask.size()) {
- assert(0 <= InputIdx0 && "Unknown target shuffle input");
- OpMaskedIdx += InputIdx0 * MaskWidth;
- } else {
- assert(0 <= InputIdx1 && "Unknown target shuffle input");
- OpMaskedIdx += InputIdx1 * MaskWidth;
- }
+ int InputIdx = OpMask[OpIdx] / (int)OpMask.size();
+ assert(0 <= OpInputIdx[InputIdx] && "Unknown target shuffle input");
+ OpMaskedIdx += OpInputIdx[InputIdx] * MaskWidth;
Mask[i] = OpMaskedIdx;
}
@@ -31493,7 +32783,7 @@ static SDValue combineX86ShufflesRecursively(
return getZeroVector(Root.getSimpleValueType(), Subtarget, DAG,
SDLoc(Root));
- // Remove unused shuffle source ops.
+ // Remove unused/repeated shuffle source ops.
resolveTargetShuffleInputsAndMask(Ops, Mask);
assert(!Ops.empty() && "Shuffle with no inputs detected");
@@ -31530,29 +32820,42 @@ static SDValue combineX86ShufflesRecursively(
return Cst;
// We can only combine unary and binary shuffle mask cases.
- if (Ops.size() > 2)
- return SDValue();
+ if (Ops.size() <= 2) {
+ // Minor canonicalization of the accumulated shuffle mask to make it easier
+ // to match below. All this does is detect masks with sequential pairs of
+ // elements, and shrink them to the half-width mask. It does this in a loop
+ // so it will reduce the size of the mask to the minimal width mask which
+ // performs an equivalent shuffle.
+ SmallVector<int, 64> WidenedMask;
+ while (Mask.size() > 1 && canWidenShuffleElements(Mask, WidenedMask)) {
+ Mask = std::move(WidenedMask);
+ }
+
+ // Canonicalization of binary shuffle masks to improve pattern matching by
+ // commuting the inputs.
+ if (Ops.size() == 2 && canonicalizeShuffleMaskWithCommute(Mask)) {
+ ShuffleVectorSDNode::commuteMask(Mask);
+ std::swap(Ops[0], Ops[1]);
+ }
- // Minor canonicalization of the accumulated shuffle mask to make it easier
- // to match below. All this does is detect masks with sequential pairs of
- // elements, and shrink them to the half-width mask. It does this in a loop
- // so it will reduce the size of the mask to the minimal width mask which
- // performs an equivalent shuffle.
- SmallVector<int, 64> WidenedMask;
- while (Mask.size() > 1 && canWidenShuffleElements(Mask, WidenedMask)) {
- Mask = std::move(WidenedMask);
+ // Finally, try to combine into a single shuffle instruction.
+ return combineX86ShuffleChain(Ops, Root, Mask, Depth, HasVariableMask,
+ AllowVariableMask, DAG, Subtarget);
}
- // Canonicalization of binary shuffle masks to improve pattern matching by
- // commuting the inputs.
- if (Ops.size() == 2 && canonicalizeShuffleMaskWithCommute(Mask)) {
- ShuffleVectorSDNode::commuteMask(Mask);
- std::swap(Ops[0], Ops[1]);
- }
+ // If that failed and any input is extracted then try to combine as a
+ // shuffle with the larger type.
+ return combineX86ShuffleChainWithExtract(Ops, Root, Mask, Depth,
+ HasVariableMask, AllowVariableMask,
+ DAG, Subtarget);
+}
- // Finally, try to combine into a single shuffle instruction.
- return combineX86ShuffleChain(Ops, Root, Mask, Depth, HasVariableMask,
- AllowVariableMask, DAG, Subtarget);
+/// Helper entry wrapper to combineX86ShufflesRecursively.
+static SDValue combineX86ShufflesRecursively(SDValue Op, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ return combineX86ShufflesRecursively({Op}, 0, Op, {0}, {}, /*Depth*/ 1,
+ /*HasVarMask*/ false,
+ /*AllowVarMask*/ true, DAG, Subtarget);
}
/// Get the PSHUF-style mask from PSHUF node.
@@ -31770,12 +33073,13 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
switch (Opcode) {
case X86ISD::VBROADCAST: {
- // If broadcasting from another shuffle, attempt to simplify it.
- // TODO - we really need a general SimplifyDemandedVectorElts mechanism.
SDValue Src = N.getOperand(0);
SDValue BC = peekThroughBitcasts(Src);
EVT SrcVT = Src.getValueType();
EVT BCVT = BC.getValueType();
+
+ // If broadcasting from another shuffle, attempt to simplify it.
+ // TODO - we really need a general SimplifyDemandedVectorElts mechanism.
if (isTargetShuffle(BC.getOpcode()) &&
VT.getScalarSizeInBits() % BCVT.getScalarSizeInBits() == 0) {
unsigned Scale = VT.getScalarSizeInBits() / BCVT.getScalarSizeInBits();
@@ -31789,6 +33093,71 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
return DAG.getNode(X86ISD::VBROADCAST, DL, VT,
DAG.getBitcast(SrcVT, Res));
}
+
+ // broadcast(bitcast(src)) -> bitcast(broadcast(src))
+ // 32-bit targets have to bitcast i64 to f64, so better to bitcast upward.
+ if (Src.getOpcode() == ISD::BITCAST &&
+ SrcVT.getScalarSizeInBits() == BCVT.getScalarSizeInBits()) {
+ EVT NewVT = EVT::getVectorVT(*DAG.getContext(), BCVT.getScalarType(),
+ VT.getVectorNumElements());
+ return DAG.getBitcast(VT, DAG.getNode(X86ISD::VBROADCAST, DL, NewVT, BC));
+ }
+
+ // Reduce broadcast source vector to lowest 128-bits.
+ if (SrcVT.getSizeInBits() > 128)
+ return DAG.getNode(X86ISD::VBROADCAST, DL, VT,
+ extract128BitVector(Src, 0, DAG, DL));
+
+ // broadcast(scalar_to_vector(x)) -> broadcast(x).
+ if (Src.getOpcode() == ISD::SCALAR_TO_VECTOR)
+ return DAG.getNode(X86ISD::VBROADCAST, DL, VT, Src.getOperand(0));
+
+ // Share broadcast with the longest vector and extract low subvector (free).
+ for (SDNode *User : Src->uses())
+ if (User != N.getNode() && User->getOpcode() == X86ISD::VBROADCAST &&
+ User->getValueSizeInBits(0) > VT.getSizeInBits()) {
+ return extractSubVector(SDValue(User, 0), 0, DAG, DL,
+ VT.getSizeInBits());
+ }
+
+ return SDValue();
+ }
+ case X86ISD::BLENDI: {
+ SDValue N0 = N.getOperand(0);
+ SDValue N1 = N.getOperand(1);
+
+ // blend(bitcast(x),bitcast(y)) -> bitcast(blend(x,y)) to narrower types.
+ // TODO: Handle MVT::v16i16 repeated blend mask.
+ if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST &&
+ N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()) {
+ MVT SrcVT = N0.getOperand(0).getSimpleValueType();
+ if ((VT.getScalarSizeInBits() % SrcVT.getScalarSizeInBits()) == 0 &&
+ SrcVT.getScalarSizeInBits() >= 32) {
+ unsigned Mask = N.getConstantOperandVal(2);
+ unsigned Size = VT.getVectorNumElements();
+ unsigned Scale = VT.getScalarSizeInBits() / SrcVT.getScalarSizeInBits();
+ unsigned ScaleMask = scaleVectorShuffleBlendMask(Mask, Size, Scale);
+ return DAG.getBitcast(
+ VT, DAG.getNode(X86ISD::BLENDI, DL, SrcVT, N0.getOperand(0),
+ N1.getOperand(0),
+ DAG.getConstant(ScaleMask, DL, MVT::i8)));
+ }
+ }
+ return SDValue();
+ }
+ case X86ISD::VPERMI: {
+ // vpermi(bitcast(x)) -> bitcast(vpermi(x)) for same number of elements.
+ // TODO: Remove when we have preferred domains in combineX86ShuffleChain.
+ SDValue N0 = N.getOperand(0);
+ SDValue N1 = N.getOperand(1);
+ unsigned EltSizeInBits = VT.getScalarSizeInBits();
+ if (N0.getOpcode() == ISD::BITCAST &&
+ N0.getOperand(0).getScalarValueSizeInBits() == EltSizeInBits) {
+ SDValue Src = N0.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ SDValue Res = DAG.getNode(X86ISD::VPERMI, DL, SrcVT, Src, N1);
+ return DAG.getBitcast(VT, Res);
+ }
return SDValue();
}
case X86ISD::PSHUFD:
@@ -32212,8 +33581,22 @@ static SDValue combineShuffleOfConcatUndef(SDNode *N, SelectionDAG &DAG,
/// Eliminate a redundant shuffle of a horizontal math op.
static SDValue foldShuffleOfHorizOp(SDNode *N) {
- if (N->getOpcode() != ISD::VECTOR_SHUFFLE || !N->getOperand(1).isUndef())
- return SDValue();
+ unsigned Opcode = N->getOpcode();
+ if (Opcode != X86ISD::MOVDDUP && Opcode != X86ISD::VBROADCAST)
+ if (Opcode != ISD::VECTOR_SHUFFLE || !N->getOperand(1).isUndef())
+ return SDValue();
+
+ // For a broadcast, peek through an extract element of index 0 to find the
+ // horizontal op: broadcast (ext_vec_elt HOp, 0)
+ EVT VT = N->getValueType(0);
+ if (Opcode == X86ISD::VBROADCAST) {
+ SDValue SrcOp = N->getOperand(0);
+ if (SrcOp.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ SrcOp.getValueType() == MVT::f64 &&
+ SrcOp.getOperand(0).getValueType() == VT &&
+ isNullConstant(SrcOp.getOperand(1)))
+ N = SrcOp.getNode();
+ }
SDValue HOp = N->getOperand(0);
if (HOp.getOpcode() != X86ISD::HADD && HOp.getOpcode() != X86ISD::FHADD &&
@@ -32224,13 +33607,25 @@ static SDValue foldShuffleOfHorizOp(SDNode *N) {
// lanes of each operand as:
// v4X32: A[0] + A[1] , A[2] + A[3] , B[0] + B[1] , B[2] + B[3]
// ...similarly for v2f64 and v8i16.
- // TODO: Handle UNDEF operands.
- if (HOp.getOperand(0) != HOp.getOperand(1))
+ if (!HOp.getOperand(0).isUndef() && !HOp.getOperand(1).isUndef() &&
+ HOp.getOperand(0) != HOp.getOperand(1))
return SDValue();
// When the operands of a horizontal math op are identical, the low half of
- // the result is the same as the high half. If the shuffle is also replicating
- // low and high halves, we don't need the shuffle.
+ // the result is the same as the high half. If a target shuffle is also
+ // replicating low and high halves, we don't need the shuffle.
+ if (Opcode == X86ISD::MOVDDUP || Opcode == X86ISD::VBROADCAST) {
+ if (HOp.getScalarValueSizeInBits() == 64) {
+ // movddup (hadd X, X) --> hadd X, X
+ // broadcast (extract_vec_elt (hadd X, X), 0) --> hadd X, X
+ assert((HOp.getValueType() == MVT::v2f64 ||
+ HOp.getValueType() == MVT::v4f64) && HOp.getValueType() == VT &&
+ "Unexpected type for h-op");
+ return HOp;
+ }
+ return SDValue();
+ }
+
// shuffle (hadd X, X), undef, [low half...high half] --> hadd X, X
ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(N)->getMask();
// TODO: Other mask possibilities like {1,1} and {1,0} could be added here,
@@ -32252,14 +33647,51 @@ static SDValue foldShuffleOfHorizOp(SDNode *N) {
return SDValue();
}
+/// If we have a shuffle of AVX/AVX512 (256/512 bit) vectors that only uses the
+/// low half of each source vector and does not set any high half elements in
+/// the destination vector, narrow the shuffle to half its original size.
+static SDValue narrowShuffle(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG) {
+ if (!Shuf->getValueType(0).isSimple())
+ return SDValue();
+ MVT VT = Shuf->getSimpleValueType(0);
+ if (!VT.is256BitVector() && !VT.is512BitVector())
+ return SDValue();
+
+ // See if we can ignore all of the high elements of the shuffle.
+ ArrayRef<int> Mask = Shuf->getMask();
+ if (!isUndefUpperHalf(Mask))
+ return SDValue();
+
+ // Check if the shuffle mask accesses only the low half of each input vector
+ // (half-index output is 0 or 2).
+ int HalfIdx1, HalfIdx2;
+ SmallVector<int, 8> HalfMask(Mask.size() / 2);
+ if (!getHalfShuffleMask(Mask, HalfMask, HalfIdx1, HalfIdx2) ||
+ (HalfIdx1 % 2 == 1) || (HalfIdx2 % 2 == 1))
+ return SDValue();
+
+ // Create a half-width shuffle to replace the unnecessarily wide shuffle.
+ // The trick is knowing that all of the insert/extract are actually free
+ // subregister (zmm<->ymm or ymm<->xmm) ops. That leaves us with a shuffle
+ // of narrow inputs into a narrow output, and that is always cheaper than
+ // the wide shuffle that we started with.
+ return getShuffleHalfVectors(SDLoc(Shuf), Shuf->getOperand(0),
+ Shuf->getOperand(1), HalfMask, HalfIdx1,
+ HalfIdx2, false, DAG);
+}
+
static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
+ if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(N))
+ if (SDValue V = narrowShuffle(Shuf, DAG))
+ return V;
+
+ // If we have legalized the vector types, look for blends of FADD and FSUB
+ // nodes that we can fuse into an ADDSUB, FMADDSUB, or FMSUBADD node.
SDLoc dl(N);
EVT VT = N->getValueType(0);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- // If we have legalized the vector types, look for blends of FADD and FSUB
- // nodes that we can fuse into an ADDSUB, FMADDSUB, or FMSUBADD node.
if (TLI.isTypeLegal(VT)) {
if (SDValue AddSub = combineShuffleToAddSubOrFMAddSub(N, Subtarget, DAG))
return AddSub;
@@ -32328,23 +33760,9 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
}
}
- // Combine a vector_shuffle that is equal to build_vector load1, load2, load3,
- // load4, <0, 1, 2, 3> into a 128-bit load if the load addresses are
- // consecutive, non-overlapping, and in the right order.
- SmallVector<SDValue, 16> Elts;
- for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
- if (SDValue Elt = getShuffleScalarElt(N, i, DAG, 0)) {
- Elts.push_back(Elt);
- continue;
- }
- Elts.clear();
- break;
- }
-
- if (Elts.size() == VT.getVectorNumElements())
- if (SDValue LD =
- EltsFromConsecutiveLoads(VT, Elts, dl, DAG, Subtarget, true))
- return LD;
+ // Attempt to combine into a vector load/broadcast.
+ if (SDValue LD = combineToConsecutiveLoads(VT, N, dl, DAG, Subtarget, true))
+ return LD;
// For AVX2, we sometimes want to combine
// (vector_shuffle <mask> (concat_vectors t1, undef)
@@ -32365,9 +33783,7 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
// specific PSHUF instruction sequences into their minimal form so that we
// can evaluate how many specialized shuffle instructions are involved in
// a particular chain.
- if (SDValue Res = combineX86ShufflesRecursively(
- {Op}, 0, Op, {0}, {}, /*Depth*/ 1,
- /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget))
+ if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget))
return Res;
// Simplify source operands based on shuffle mask.
@@ -32378,6 +33794,68 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
return SDValue(N, 0);
}
+ // Look for a v2i64/v2f64 VZEXT_MOVL of a node that already produces zeros
+ // in the upper 64 bits.
+ // TODO: Can we generalize this using computeKnownBits.
+ if (N->getOpcode() == X86ISD::VZEXT_MOVL &&
+ (VT == MVT::v2f64 || VT == MVT::v2i64) &&
+ N->getOperand(0).getOpcode() == ISD::BITCAST &&
+ (N->getOperand(0).getOperand(0).getValueType() == MVT::v4f32 ||
+ N->getOperand(0).getOperand(0).getValueType() == MVT::v4i32)) {
+ SDValue In = N->getOperand(0).getOperand(0);
+ switch (In.getOpcode()) {
+ default:
+ break;
+ case X86ISD::CVTP2SI: case X86ISD::CVTP2UI:
+ case X86ISD::MCVTP2SI: case X86ISD::MCVTP2UI:
+ case X86ISD::CVTTP2SI: case X86ISD::CVTTP2UI:
+ case X86ISD::MCVTTP2SI: case X86ISD::MCVTTP2UI:
+ case X86ISD::CVTSI2P: case X86ISD::CVTUI2P:
+ case X86ISD::MCVTSI2P: case X86ISD::MCVTUI2P:
+ case X86ISD::VFPROUND: case X86ISD::VMFPROUND:
+ if (In.getOperand(0).getValueType() == MVT::v2f64 ||
+ In.getOperand(0).getValueType() == MVT::v2i64)
+ return N->getOperand(0); // return the bitcast
+ break;
+ }
+ }
+
+ // Pull subvector inserts into undef through VZEXT_MOVL by making it an
+ // insert into a zero vector. This helps get VZEXT_MOVL closer to
+ // scalar_to_vectors where 256/512 are canonicalized to an insert and a
+ // 128-bit scalar_to_vector. This reduces the number of isel patterns.
+ if (N->getOpcode() == X86ISD::VZEXT_MOVL && !DCI.isBeforeLegalizeOps() &&
+ N->getOperand(0).getOpcode() == ISD::INSERT_SUBVECTOR &&
+ N->getOperand(0).hasOneUse() &&
+ N->getOperand(0).getOperand(0).isUndef() &&
+ isNullConstant(N->getOperand(0).getOperand(2))) {
+ SDValue In = N->getOperand(0).getOperand(1);
+ SDValue Movl = DAG.getNode(X86ISD::VZEXT_MOVL, dl, In.getValueType(), In);
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT,
+ getZeroVector(VT.getSimpleVT(), Subtarget, DAG, dl),
+ Movl, N->getOperand(0).getOperand(2));
+ }
+
+ // If this a vzmovl of a full vector load, replace it with a vzload, unless
+ // the load is volatile.
+ if (N->getOpcode() == X86ISD::VZEXT_MOVL && N->getOperand(0).hasOneUse() &&
+ ISD::isNormalLoad(N->getOperand(0).getNode())) {
+ LoadSDNode *LN = cast<LoadSDNode>(N->getOperand(0));
+ if (!LN->isVolatile()) {
+ SDVTList Tys = DAG.getVTList(VT, MVT::Other);
+ SDValue Ops[] = { LN->getChain(), LN->getBasePtr() };
+ SDValue VZLoad =
+ DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops,
+ VT.getVectorElementType(),
+ LN->getPointerInfo(),
+ LN->getAlignment(),
+ MachineMemOperand::MOLoad);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1));
+ return VZLoad;
+ }
+ }
+
+
// Look for a truncating shuffle to v2i32 of a PMULUDQ where one of the
// operands is an extend from v2i32 to v2i64. Turn it into a pmulld.
// FIXME: This can probably go away once we default to widening legalization.
@@ -32436,6 +33914,22 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
// Handle special case opcodes.
switch (Opc) {
+ case X86ISD::PMULDQ:
+ case X86ISD::PMULUDQ: {
+ APInt LHSUndef, LHSZero;
+ APInt RHSUndef, RHSZero;
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ if (SimplifyDemandedVectorElts(LHS, DemandedElts, LHSUndef, LHSZero, TLO,
+ Depth + 1))
+ return true;
+ if (SimplifyDemandedVectorElts(RHS, DemandedElts, RHSUndef, RHSZero, TLO,
+ Depth + 1))
+ return true;
+ // Multiply by zero.
+ KnownZero = LHSZero | RHSZero;
+ break;
+ }
case X86ISD::VSHL:
case X86ISD::VSRL:
case X86ISD::VSRA: {
@@ -32443,11 +33937,21 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
SDValue Amt = Op.getOperand(1);
MVT AmtVT = Amt.getSimpleValueType();
assert(AmtVT.is128BitVector() && "Unexpected value type");
+
+ // If we reuse the shift amount just for sse shift amounts then we know that
+ // only the bottom 64-bits are only ever used.
+ bool AssumeSingleUse = llvm::all_of(Amt->uses(), [&Amt](SDNode *Use) {
+ unsigned UseOpc = Use->getOpcode();
+ return (UseOpc == X86ISD::VSHL || UseOpc == X86ISD::VSRL ||
+ UseOpc == X86ISD::VSRA) &&
+ Use->getOperand(0) != Amt;
+ });
+
APInt AmtUndef, AmtZero;
unsigned NumAmtElts = AmtVT.getVectorNumElements();
APInt AmtElts = APInt::getLowBitsSet(NumAmtElts, NumAmtElts / 2);
if (SimplifyDemandedVectorElts(Amt, AmtElts, AmtUndef, AmtZero, TLO,
- Depth + 1))
+ Depth + 1, AssumeSingleUse))
return true;
LLVM_FALLTHROUGH;
}
@@ -32487,6 +33991,58 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
return true;
break;
}
+ case X86ISD::HADD:
+ case X86ISD::HSUB:
+ case X86ISD::FHADD:
+ case X86ISD::FHSUB: {
+ APInt DemandedLHS, DemandedRHS;
+ getHorizDemandedElts(VT, DemandedElts, DemandedLHS, DemandedRHS);
+
+ APInt LHSUndef, LHSZero;
+ if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedLHS, LHSUndef,
+ LHSZero, TLO, Depth + 1))
+ return true;
+ APInt RHSUndef, RHSZero;
+ if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedRHS, RHSUndef,
+ RHSZero, TLO, Depth + 1))
+ return true;
+ break;
+ }
+ case X86ISD::VTRUNC:
+ case X86ISD::VTRUNCS:
+ case X86ISD::VTRUNCUS: {
+ SDValue Src = Op.getOperand(0);
+ MVT SrcVT = Src.getSimpleValueType();
+ APInt DemandedSrc = DemandedElts.zextOrTrunc(SrcVT.getVectorNumElements());
+ APInt SrcUndef, SrcZero;
+ if (SimplifyDemandedVectorElts(Src, DemandedSrc, SrcUndef, SrcZero, TLO,
+ Depth + 1))
+ return true;
+ KnownZero = SrcZero.zextOrTrunc(NumElts);
+ KnownUndef = SrcUndef.zextOrTrunc(NumElts);
+ break;
+ }
+ case X86ISD::BLENDV: {
+ APInt SelUndef, SelZero;
+ if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, SelUndef,
+ SelZero, TLO, Depth + 1))
+ return true;
+
+ // TODO: Use SelZero to adjust LHS/RHS DemandedElts.
+ APInt LHSUndef, LHSZero;
+ if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, LHSUndef,
+ LHSZero, TLO, Depth + 1))
+ return true;
+
+ APInt RHSUndef, RHSZero;
+ if (SimplifyDemandedVectorElts(Op.getOperand(2), DemandedElts, RHSUndef,
+ RHSZero, TLO, Depth + 1))
+ return true;
+
+ KnownZero = LHSZero & RHSZero;
+ KnownUndef = LHSUndef & RHSUndef;
+ break;
+ }
case X86ISD::VBROADCAST: {
SDValue Src = Op.getOperand(0);
MVT SrcVT = Src.getSimpleValueType();
@@ -32494,7 +34050,7 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
return false;
// Don't bother broadcasting if we just need the 0'th element.
if (DemandedElts == 1) {
- if(Src.getValueType() != VT)
+ if (Src.getValueType() != VT)
Src = widenSubVector(VT.getSimpleVT(), Src, false, Subtarget, TLO.DAG,
SDLoc(Op));
return TLO.CombineTo(Op, Src);
@@ -32506,8 +34062,36 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
return true;
break;
}
- case X86ISD::PSHUFB: {
- // TODO - simplify other variable shuffle masks.
+ case X86ISD::SUBV_BROADCAST: {
+ // Reduce size of broadcast if we don't need the upper half.
+ unsigned HalfElts = NumElts / 2;
+ if (DemandedElts.extractBits(HalfElts, HalfElts).isNullValue()) {
+ SDValue Src = Op.getOperand(0);
+ MVT SrcVT = Src.getSimpleValueType();
+
+ SDValue Half = Src;
+ if (SrcVT.getVectorNumElements() != HalfElts) {
+ MVT HalfVT = MVT::getVectorVT(SrcVT.getScalarType(), HalfElts);
+ Half = TLO.DAG.getNode(X86ISD::SUBV_BROADCAST, SDLoc(Op), HalfVT, Src);
+ }
+
+ return TLO.CombineTo(Op, insertSubVector(TLO.DAG.getUNDEF(VT), Half, 0,
+ TLO.DAG, SDLoc(Op),
+ Half.getValueSizeInBits()));
+ }
+ break;
+ }
+ case X86ISD::VPERMV: {
+ SDValue Mask = Op.getOperand(0);
+ APInt MaskUndef, MaskZero;
+ if (SimplifyDemandedVectorElts(Mask, DemandedElts, MaskUndef, MaskZero, TLO,
+ Depth + 1))
+ return true;
+ break;
+ }
+ case X86ISD::PSHUFB:
+ case X86ISD::VPERMV3:
+ case X86ISD::VPERMILPV: {
SDValue Mask = Op.getOperand(1);
APInt MaskUndef, MaskZero;
if (SimplifyDemandedVectorElts(Mask, DemandedElts, MaskUndef, MaskZero, TLO,
@@ -32515,6 +34099,106 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
return true;
break;
}
+ case X86ISD::VPPERM:
+ case X86ISD::VPERMIL2: {
+ SDValue Mask = Op.getOperand(2);
+ APInt MaskUndef, MaskZero;
+ if (SimplifyDemandedVectorElts(Mask, DemandedElts, MaskUndef, MaskZero, TLO,
+ Depth + 1))
+ return true;
+ break;
+ }
+ }
+
+ // For 256/512-bit ops that are 128/256-bit ops glued together, if we do not
+ // demand any of the high elements, then narrow the op to 128/256-bits: e.g.
+ // (op ymm0, ymm1) --> insert undef, (op xmm0, xmm1), 0
+ if ((VT.is256BitVector() || VT.is512BitVector()) &&
+ DemandedElts.lshr(NumElts / 2) == 0) {
+ unsigned SizeInBits = VT.getSizeInBits();
+ unsigned ExtSizeInBits = SizeInBits / 2;
+
+ // See if 512-bit ops only use the bottom 128-bits.
+ if (VT.is512BitVector() && DemandedElts.lshr(NumElts / 4) == 0)
+ ExtSizeInBits = SizeInBits / 4;
+
+ switch (Opc) {
+ // Zero upper elements.
+ case X86ISD::VZEXT_MOVL: {
+ SDLoc DL(Op);
+ SDValue Ext0 =
+ extractSubVector(Op.getOperand(0), 0, TLO.DAG, DL, ExtSizeInBits);
+ SDValue ExtOp =
+ TLO.DAG.getNode(Opc, DL, Ext0.getValueType(), Ext0);
+ SDValue UndefVec = TLO.DAG.getUNDEF(VT);
+ SDValue Insert =
+ insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits);
+ return TLO.CombineTo(Op, Insert);
+ }
+ // Byte shifts by immediate.
+ case X86ISD::VSHLDQ:
+ case X86ISD::VSRLDQ:
+ // Shift by uniform.
+ case X86ISD::VSHL:
+ case X86ISD::VSRL:
+ case X86ISD::VSRA:
+ // Shift by immediate.
+ case X86ISD::VSHLI:
+ case X86ISD::VSRLI:
+ case X86ISD::VSRAI: {
+ SDLoc DL(Op);
+ SDValue Ext0 =
+ extractSubVector(Op.getOperand(0), 0, TLO.DAG, DL, ExtSizeInBits);
+ SDValue ExtOp =
+ TLO.DAG.getNode(Opc, DL, Ext0.getValueType(), Ext0, Op.getOperand(1));
+ SDValue UndefVec = TLO.DAG.getUNDEF(VT);
+ SDValue Insert =
+ insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits);
+ return TLO.CombineTo(Op, Insert);
+ }
+ case X86ISD::VPERMI: {
+ // Simplify PERMPD/PERMQ to extract_subvector.
+ // TODO: This should be done in shuffle combining.
+ if (VT == MVT::v4f64 || VT == MVT::v4i64) {
+ SmallVector<int, 4> Mask;
+ DecodeVPERMMask(NumElts, Op.getConstantOperandVal(1), Mask);
+ if (isUndefOrEqual(Mask[0], 2) && isUndefOrEqual(Mask[1], 3)) {
+ SDLoc DL(Op);
+ SDValue Ext = extractSubVector(Op.getOperand(0), 2, TLO.DAG, DL, 128);
+ SDValue UndefVec = TLO.DAG.getUNDEF(VT);
+ SDValue Insert = insertSubVector(UndefVec, Ext, 0, TLO.DAG, DL, 128);
+ return TLO.CombineTo(Op, Insert);
+ }
+ }
+ break;
+ }
+ // Target Shuffles.
+ case X86ISD::PSHUFB:
+ case X86ISD::UNPCKL:
+ case X86ISD::UNPCKH:
+ // Saturated Packs.
+ case X86ISD::PACKSS:
+ case X86ISD::PACKUS:
+ // Horizontal Ops.
+ case X86ISD::HADD:
+ case X86ISD::HSUB:
+ case X86ISD::FHADD:
+ case X86ISD::FHSUB: {
+ SDLoc DL(Op);
+ MVT ExtVT = VT.getSimpleVT();
+ ExtVT = MVT::getVectorVT(ExtVT.getScalarType(),
+ ExtSizeInBits / ExtVT.getScalarSizeInBits());
+ SDValue Ext0 =
+ extractSubVector(Op.getOperand(0), 0, TLO.DAG, DL, ExtSizeInBits);
+ SDValue Ext1 =
+ extractSubVector(Op.getOperand(1), 0, TLO.DAG, DL, ExtSizeInBits);
+ SDValue ExtOp = TLO.DAG.getNode(Opc, DL, ExtVT, Ext0, Ext1);
+ SDValue UndefVec = TLO.DAG.getUNDEF(VT);
+ SDValue Insert =
+ insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits);
+ return TLO.CombineTo(Op, Insert);
+ }
+ }
}
// Simplify target shuffles.
@@ -32606,9 +34290,11 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
SDValue RHS = Op.getOperand(1);
// FIXME: Can we bound this better?
APInt DemandedMask = APInt::getLowBitsSet(64, 32);
- if (SimplifyDemandedBits(LHS, DemandedMask, KnownOp, TLO, Depth + 1))
+ if (SimplifyDemandedBits(LHS, DemandedMask, OriginalDemandedElts, KnownOp,
+ TLO, Depth + 1))
return true;
- if (SimplifyDemandedBits(RHS, DemandedMask, KnownOp, TLO, Depth + 1))
+ if (SimplifyDemandedBits(RHS, DemandedMask, OriginalDemandedElts, KnownOp,
+ TLO, Depth + 1))
return true;
break;
}
@@ -32727,6 +34413,97 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
}
break;
}
+ case X86ISD::PEXTRB:
+ case X86ISD::PEXTRW: {
+ SDValue Vec = Op.getOperand(0);
+ auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ MVT VecVT = Vec.getSimpleValueType();
+ unsigned NumVecElts = VecVT.getVectorNumElements();
+
+ if (CIdx && CIdx->getAPIntValue().ult(NumVecElts)) {
+ unsigned Idx = CIdx->getZExtValue();
+ unsigned VecBitWidth = VecVT.getScalarSizeInBits();
+
+ // If we demand no bits from the vector then we must have demanded
+ // bits from the implict zext - simplify to zero.
+ APInt DemandedVecBits = OriginalDemandedBits.trunc(VecBitWidth);
+ if (DemandedVecBits == 0)
+ return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
+
+ APInt KnownUndef, KnownZero;
+ APInt DemandedVecElts = APInt::getOneBitSet(NumVecElts, Idx);
+ if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
+ KnownZero, TLO, Depth + 1))
+ return true;
+
+ KnownBits KnownVec;
+ if (SimplifyDemandedBits(Vec, DemandedVecBits, DemandedVecElts,
+ KnownVec, TLO, Depth + 1))
+ return true;
+
+ Known = KnownVec.zext(BitWidth, true);
+ return false;
+ }
+ break;
+ }
+ case X86ISD::PINSRB:
+ case X86ISD::PINSRW: {
+ SDValue Vec = Op.getOperand(0);
+ SDValue Scl = Op.getOperand(1);
+ auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
+ MVT VecVT = Vec.getSimpleValueType();
+
+ if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
+ unsigned Idx = CIdx->getZExtValue();
+ if (!OriginalDemandedElts[Idx])
+ return TLO.CombineTo(Op, Vec);
+
+ KnownBits KnownVec;
+ APInt DemandedVecElts(OriginalDemandedElts);
+ DemandedVecElts.clearBit(Idx);
+ if (SimplifyDemandedBits(Vec, OriginalDemandedBits, DemandedVecElts,
+ KnownVec, TLO, Depth + 1))
+ return true;
+
+ KnownBits KnownScl;
+ unsigned NumSclBits = Scl.getScalarValueSizeInBits();
+ APInt DemandedSclBits = OriginalDemandedBits.zext(NumSclBits);
+ if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
+ return true;
+
+ KnownScl = KnownScl.trunc(VecVT.getScalarSizeInBits());
+ Known.One = KnownVec.One & KnownScl.One;
+ Known.Zero = KnownVec.Zero & KnownScl.Zero;
+ return false;
+ }
+ break;
+ }
+ case X86ISD::PACKSS:
+ // PACKSS saturates to MIN/MAX integer values. So if we just want the
+ // sign bit then we can just ask for the source operands sign bit.
+ // TODO - add known bits handling.
+ if (OriginalDemandedBits.isSignMask()) {
+ APInt DemandedLHS, DemandedRHS;
+ getPackDemandedElts(VT, OriginalDemandedElts, DemandedLHS, DemandedRHS);
+
+ KnownBits KnownLHS, KnownRHS;
+ APInt SignMask = APInt::getSignMask(BitWidth * 2);
+ if (SimplifyDemandedBits(Op.getOperand(0), SignMask, DemandedLHS,
+ KnownLHS, TLO, Depth + 1))
+ return true;
+ if (SimplifyDemandedBits(Op.getOperand(1), SignMask, DemandedRHS,
+ KnownRHS, TLO, Depth + 1))
+ return true;
+ }
+ // TODO - add general PACKSS/PACKUS SimplifyDemandedBits support.
+ break;
+ case X86ISD::PCMPGT:
+ // icmp sgt(0, R) == ashr(R, BitWidth-1).
+ // iff we only need the sign bit then we can use R directly.
+ if (OriginalDemandedBits.isSignMask() &&
+ ISD::isBuildVectorAllZeros(Op.getOperand(0).getNode()))
+ return TLO.CombineTo(Op, Op.getOperand(1));
+ break;
case X86ISD::MOVMSK: {
SDValue Src = Op.getOperand(0);
MVT SrcVT = Src.getSimpleValueType();
@@ -32868,29 +34645,42 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
EltNo);
}
+// Helper to peek through bitops/setcc to determine size of source vector.
+// Allows combineBitcastvxi1 to determine what size vector generated a <X x i1>.
+static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size) {
+ switch (Src.getOpcode()) {
+ case ISD::SETCC:
+ return Src.getOperand(0).getValueSizeInBits() == Size;
+ case ISD::AND:
+ case ISD::XOR:
+ case ISD::OR:
+ return checkBitcastSrcVectorSize(Src.getOperand(0), Size) &&
+ checkBitcastSrcVectorSize(Src.getOperand(1), Size);
+ }
+ return false;
+}
+
// Try to match patterns such as
// (i16 bitcast (v16i1 x))
// ->
// (i16 movmsk (16i8 sext (v16i1 x)))
// before the illegal vector is scalarized on subtargets that don't have legal
// vxi1 types.
-static SDValue combineBitcastvxi1(SelectionDAG &DAG, SDValue BitCast,
+static SDValue combineBitcastvxi1(SelectionDAG &DAG, EVT VT, SDValue Src,
+ const SDLoc &DL,
const X86Subtarget &Subtarget) {
- EVT VT = BitCast.getValueType();
- SDValue N0 = BitCast.getOperand(0);
- EVT VecVT = N0->getValueType(0);
-
- if (!VT.isScalarInteger() || !VecVT.isSimple())
+ EVT SrcVT = Src.getValueType();
+ if (!SrcVT.isSimple() || SrcVT.getScalarType() != MVT::i1)
return SDValue();
// If the input is a truncate from v16i8 or v32i8 go ahead and use a
// movmskb even with avx512. This will be better than truncating to vXi1 and
// using a kmov. This can especially help KNL if the input is a v16i8/v32i8
// vpcmpeqb/vpcmpgtb.
- bool IsTruncated = N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
- (N0.getOperand(0).getValueType() == MVT::v16i8 ||
- N0.getOperand(0).getValueType() == MVT::v32i8 ||
- N0.getOperand(0).getValueType() == MVT::v64i8);
+ bool IsTruncated = Src.getOpcode() == ISD::TRUNCATE && Src.hasOneUse() &&
+ (Src.getOperand(0).getValueType() == MVT::v16i8 ||
+ Src.getOperand(0).getValueType() == MVT::v32i8 ||
+ Src.getOperand(0).getValueType() == MVT::v64i8);
// With AVX512 vxi1 types are legal and we prefer using k-regs.
// MOVMSK is supported in SSE2 or later.
@@ -32908,7 +34698,7 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, SDValue BitCast,
// For example, t0 := (v8i16 sext(v8i1 x)) needs to be shuffled as:
// (v16i8 shuffle <0,2,4,6,8,10,12,14,u,u,...,u> (v16i8 bitcast t0), undef)
MVT SExtVT;
- switch (VecVT.getSimpleVT().SimpleTy) {
+ switch (SrcVT.getSimpleVT().SimpleTy) {
default:
return SDValue();
case MVT::v2i1:
@@ -32918,10 +34708,8 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, SDValue BitCast,
SExtVT = MVT::v4i32;
// For cases such as (i4 bitcast (v4i1 setcc v4i64 v1, v2))
// sign-extend to a 256-bit operation to avoid truncation.
- if (N0->getOpcode() == ISD::SETCC && Subtarget.hasAVX() &&
- N0->getOperand(0).getValueType().is256BitVector()) {
+ if (Subtarget.hasAVX() && checkBitcastSrcVectorSize(Src, 256))
SExtVT = MVT::v4i64;
- }
break;
case MVT::v8i1:
SExtVT = MVT::v8i16;
@@ -32930,9 +34718,10 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, SDValue BitCast,
// If the setcc operand is 128-bit, prefer sign-extending to 128-bit over
// 256-bit because the shuffle is cheaper than sign extending the result of
// the compare.
- if (N0->getOpcode() == ISD::SETCC && Subtarget.hasAVX() &&
- (N0->getOperand(0).getValueType().is256BitVector() ||
- N0->getOperand(0).getValueType().is512BitVector())) {
+ // TODO : use checkBitcastSrcVectorSize
+ if (Src.getOpcode() == ISD::SETCC && Subtarget.hasAVX() &&
+ (Src.getOperand(0).getValueType().is256BitVector() ||
+ Src.getOperand(0).getValueType().is512BitVector())) {
SExtVT = MVT::v8i32;
}
break;
@@ -32956,8 +34745,7 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, SDValue BitCast,
return SDValue();
};
- SDLoc DL(BitCast);
- SDValue V = DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, N0);
+ SDValue V = DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
if (SExtVT == MVT::v64i8) {
SDValue Lo, Hi;
@@ -32977,7 +34765,11 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, SDValue BitCast,
DAG.getUNDEF(MVT::v8i16));
V = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, V);
}
- return DAG.getZExtOrTrunc(V, DL, VT);
+
+ EVT IntVT =
+ EVT::getIntegerVT(*DAG.getContext(), SrcVT.getVectorNumElements());
+ V = DAG.getZExtOrTrunc(V, DL, IntVT);
+ return DAG.getBitcast(VT, V);
}
// Convert a vXi1 constant build vector to the same width scalar integer.
@@ -33054,12 +34846,10 @@ static SDValue combineCastedMaskArithmetic(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-static SDValue createMMXBuildVector(SDValue N, SelectionDAG &DAG,
+static SDValue createMMXBuildVector(BuildVectorSDNode *BV, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
- SDLoc DL(N);
- unsigned NumElts = N.getNumOperands();
-
- auto *BV = cast<BuildVectorSDNode>(N);
+ SDLoc DL(BV);
+ unsigned NumElts = BV->getNumOperands();
SDValue Splat = BV->getSplatValue();
// Build MMX element from integer GPR or SSE float values.
@@ -33107,7 +34897,7 @@ static SDValue createMMXBuildVector(SDValue N, SelectionDAG &DAG,
Ops.append(NumElts, Splat);
} else {
for (unsigned i = 0; i != NumElts; ++i)
- Ops.push_back(CreateMMXElement(N.getOperand(i)));
+ Ops.push_back(CreateMMXElement(BV->getOperand(i)));
}
// Use tree of PUNPCKLs to build up general MMX vector.
@@ -33141,14 +34931,14 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
// before the setcc result is scalarized on subtargets that don't have legal
// vxi1 types.
if (DCI.isBeforeLegalize()) {
- if (SDValue V = combineBitcastvxi1(DAG, SDValue(N, 0), Subtarget))
+ SDLoc dl(N);
+ if (SDValue V = combineBitcastvxi1(DAG, VT, N0, dl, Subtarget))
return V;
// If this is a bitcast between a MVT::v4i1/v2i1 and an illegal integer
// type, widen both sides to avoid a trip through memory.
if ((VT == MVT::v4i1 || VT == MVT::v2i1) && SrcVT.isScalarInteger() &&
Subtarget.hasAVX512()) {
- SDLoc dl(N);
N0 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i8, N0);
N0 = DAG.getBitcast(MVT::v8i1, N0);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, N0,
@@ -33159,7 +34949,6 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
// type, widen both sides to avoid a trip through memory.
if ((SrcVT == MVT::v4i1 || SrcVT == MVT::v2i1) && VT.isScalarInteger() &&
Subtarget.hasAVX512()) {
- SDLoc dl(N);
unsigned NumConcats = 8 / SrcVT.getVectorNumElements();
SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT));
Ops[0] = N0;
@@ -33213,7 +35002,7 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
if (N0.getOpcode() == ISD::BUILD_VECTOR &&
(SrcVT == MVT::v2f32 || SrcVT == MVT::v2i32 || SrcVT == MVT::v4i16 ||
SrcVT == MVT::v8i8))
- return createMMXBuildVector(N0, DAG, Subtarget);
+ return createMMXBuildVector(cast<BuildVectorSDNode>(N0), DAG, Subtarget);
// Detect bitcasts between element or subvector extraction to x86mmx.
if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
@@ -33297,66 +35086,16 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-// Given a select, detect the following pattern:
-// 1: %2 = zext <N x i8> %0 to <N x i32>
-// 2: %3 = zext <N x i8> %1 to <N x i32>
-// 3: %4 = sub nsw <N x i32> %2, %3
-// 4: %5 = icmp sgt <N x i32> %4, [0 x N] or [-1 x N]
-// 5: %6 = sub nsw <N x i32> zeroinitializer, %4
-// 6: %7 = select <N x i1> %5, <N x i32> %4, <N x i32> %6
+// Given a ABS node, detect the following pattern:
+// (ABS (SUB (ZERO_EXTEND a), (ZERO_EXTEND b))).
// This is useful as it is the input into a SAD pattern.
-static bool detectZextAbsDiff(const SDValue &Select, SDValue &Op0,
- SDValue &Op1) {
- // Check the condition of the select instruction is greater-than.
- SDValue SetCC = Select->getOperand(0);
- if (SetCC.getOpcode() != ISD::SETCC)
- return false;
- ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
- if (CC != ISD::SETGT && CC != ISD::SETLT)
- return false;
-
- SDValue SelectOp1 = Select->getOperand(1);
- SDValue SelectOp2 = Select->getOperand(2);
-
- // The following instructions assume SelectOp1 is the subtraction operand
- // and SelectOp2 is the negation operand.
- // In the case of SETLT this is the other way around.
- if (CC == ISD::SETLT)
- std::swap(SelectOp1, SelectOp2);
-
- // The second operand of the select should be the negation of the first
- // operand, which is implemented as 0 - SelectOp1.
- if (!(SelectOp2.getOpcode() == ISD::SUB &&
- ISD::isBuildVectorAllZeros(SelectOp2.getOperand(0).getNode()) &&
- SelectOp2.getOperand(1) == SelectOp1))
- return false;
-
- // The first operand of SetCC is the first operand of the select, which is the
- // difference between the two input vectors.
- if (SetCC.getOperand(0) != SelectOp1)
- return false;
-
- // In SetLT case, The second operand of the comparison can be either 1 or 0.
- APInt SplatVal;
- if ((CC == ISD::SETLT) &&
- !((ISD::isConstantSplatVector(SetCC.getOperand(1).getNode(), SplatVal) &&
- SplatVal.isOneValue()) ||
- (ISD::isBuildVectorAllZeros(SetCC.getOperand(1).getNode()))))
+static bool detectZextAbsDiff(const SDValue &Abs, SDValue &Op0, SDValue &Op1) {
+ SDValue AbsOp1 = Abs->getOperand(0);
+ if (AbsOp1.getOpcode() != ISD::SUB)
return false;
- // In SetGT case, The second operand of the comparison can be either -1 or 0.
- if ((CC == ISD::SETGT) &&
- !(ISD::isBuildVectorAllZeros(SetCC.getOperand(1).getNode()) ||
- ISD::isBuildVectorAllOnes(SetCC.getOperand(1).getNode())))
- return false;
-
- // The first operand of the select is the difference between the two input
- // vectors.
- if (SelectOp1.getOpcode() != ISD::SUB)
- return false;
-
- Op0 = SelectOp1.getOperand(0);
- Op1 = SelectOp1.getOperand(1);
+ Op0 = AbsOp1.getOperand(0);
+ Op1 = AbsOp1.getOperand(1);
// Check if the operands of the sub are zero-extended from vectors of i8.
if (Op0.getOpcode() != ISD::ZERO_EXTEND ||
@@ -33476,23 +35215,25 @@ static SDValue combineHorizontalMinMaxResult(SDNode *Extract, SelectionDAG &DAG,
DAG.getIntPtrConstant(0, DL));
}
-// Attempt to replace an all_of/any_of style horizontal reduction with a MOVMSK.
+// Attempt to replace an all_of/any_of/parity style horizontal reduction with a MOVMSK.
static SDValue combineHorizontalPredicateResult(SDNode *Extract,
SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
- // Bail without SSE2 or with AVX512VL (which uses predicate registers).
- if (!Subtarget.hasSSE2() || Subtarget.hasVLX())
+ // Bail without SSE2.
+ if (!Subtarget.hasSSE2())
return SDValue();
EVT ExtractVT = Extract->getValueType(0);
unsigned BitWidth = ExtractVT.getSizeInBits();
if (ExtractVT != MVT::i64 && ExtractVT != MVT::i32 && ExtractVT != MVT::i16 &&
- ExtractVT != MVT::i8)
+ ExtractVT != MVT::i8 && ExtractVT != MVT::i1)
return SDValue();
- // Check for OR(any_of) and AND(all_of) horizontal reduction patterns.
+ // Check for OR(any_of)/AND(all_of)/XOR(parity) horizontal reduction patterns.
ISD::NodeType BinOp;
SDValue Match = DAG.matchBinOpReduction(Extract, BinOp, {ISD::OR, ISD::AND});
+ if (!Match && ExtractVT == MVT::i1)
+ Match = DAG.matchBinOpReduction(Extract, BinOp, {ISD::XOR});
if (!Match)
return SDValue();
@@ -33501,53 +35242,104 @@ static SDValue combineHorizontalPredicateResult(SDNode *Extract,
if (Match.getScalarValueSizeInBits() != BitWidth)
return SDValue();
- // We require AVX2 for PMOVMSKB for v16i16/v32i8;
- unsigned MatchSizeInBits = Match.getValueSizeInBits();
- if (!(MatchSizeInBits == 128 ||
- (MatchSizeInBits == 256 &&
- ((Subtarget.hasAVX() && BitWidth >= 32) || Subtarget.hasAVX2()))))
- return SDValue();
+ SDValue Movmsk;
+ SDLoc DL(Extract);
+ EVT MatchVT = Match.getValueType();
+ unsigned NumElts = MatchVT.getVectorNumElements();
- // Don't bother performing this for 2-element vectors.
- if (Match.getValueType().getVectorNumElements() <= 2)
- return SDValue();
+ if (ExtractVT == MVT::i1) {
+ // Special case for (pre-legalization) vXi1 reductions.
+ if (NumElts > 32)
+ return SDValue();
+ if (DAG.getTargetLoweringInfo().isTypeLegal(MatchVT)) {
+ // If this is a legal AVX512 predicate type then we can just bitcast.
+ EVT MovmskVT = EVT::getIntegerVT(*DAG.getContext(), NumElts);
+ Movmsk = DAG.getBitcast(MovmskVT, Match);
+ } else {
+ // Use combineBitcastvxi1 to create the MOVMSK.
+ if (NumElts == 32 && !Subtarget.hasInt256()) {
+ SDValue Lo, Hi;
+ std::tie(Lo, Hi) = DAG.SplitVector(Match, DL);
+ Match = DAG.getNode(BinOp, DL, Lo.getValueType(), Lo, Hi);
+ NumElts = 16;
+ }
+ EVT MovmskVT = EVT::getIntegerVT(*DAG.getContext(), NumElts);
+ Movmsk = combineBitcastvxi1(DAG, MovmskVT, Match, DL, Subtarget);
+ }
+ if (!Movmsk)
+ return SDValue();
+ Movmsk = DAG.getZExtOrTrunc(Movmsk, DL, MVT::i32);
+ } else {
+ // Bail with AVX512VL (which uses predicate registers).
+ if (Subtarget.hasVLX())
+ return SDValue();
- // Check that we are extracting a reduction of all sign bits.
- if (DAG.ComputeNumSignBits(Match) != BitWidth)
- return SDValue();
+ unsigned MatchSizeInBits = Match.getValueSizeInBits();
+ if (!(MatchSizeInBits == 128 ||
+ (MatchSizeInBits == 256 && Subtarget.hasAVX())))
+ return SDValue();
- // For 32/64 bit comparisons use MOVMSKPS/MOVMSKPD, else PMOVMSKB.
- MVT MaskVT;
- if (64 == BitWidth || 32 == BitWidth)
- MaskVT = MVT::getVectorVT(MVT::getFloatingPointVT(BitWidth),
- MatchSizeInBits / BitWidth);
- else
- MaskVT = MVT::getVectorVT(MVT::i8, MatchSizeInBits / 8);
+ // Make sure this isn't a vector of 1 element. The perf win from using
+ // MOVMSK diminishes with less elements in the reduction, but it is
+ // generally better to get the comparison over to the GPRs as soon as
+ // possible to reduce the number of vector ops.
+ if (Match.getValueType().getVectorNumElements() < 2)
+ return SDValue();
+
+ // Check that we are extracting a reduction of all sign bits.
+ if (DAG.ComputeNumSignBits(Match) != BitWidth)
+ return SDValue();
+
+ if (MatchSizeInBits == 256 && BitWidth < 32 && !Subtarget.hasInt256()) {
+ SDValue Lo, Hi;
+ std::tie(Lo, Hi) = DAG.SplitVector(Match, DL);
+ Match = DAG.getNode(BinOp, DL, Lo.getValueType(), Lo, Hi);
+ MatchSizeInBits = Match.getValueSizeInBits();
+ }
+
+ // For 32/64 bit comparisons use MOVMSKPS/MOVMSKPD, else PMOVMSKB.
+ MVT MaskSrcVT;
+ if (64 == BitWidth || 32 == BitWidth)
+ MaskSrcVT = MVT::getVectorVT(MVT::getFloatingPointVT(BitWidth),
+ MatchSizeInBits / BitWidth);
+ else
+ MaskSrcVT = MVT::getVectorVT(MVT::i8, MatchSizeInBits / 8);
+
+ SDValue BitcastLogicOp = DAG.getBitcast(MaskSrcVT, Match);
+ Movmsk = getPMOVMSKB(DL, BitcastLogicOp, DAG, Subtarget);
+ NumElts = MaskSrcVT.getVectorNumElements();
+ }
+ assert(NumElts <= 32 && "Not expecting more than 32 elements");
- APInt CompareBits;
+ if (BinOp == ISD::XOR) {
+ // parity -> (AND (CTPOP(MOVMSK X)), 1)
+ SDValue Mask = DAG.getConstant(1, DL, MVT::i32);
+ SDValue Result = DAG.getNode(ISD::CTPOP, DL, MVT::i32, Movmsk);
+ Result = DAG.getNode(ISD::AND, DL, MVT::i32, Result, Mask);
+ return DAG.getZExtOrTrunc(Result, DL, ExtractVT);
+ }
+
+ SDValue CmpC;
ISD::CondCode CondCode;
if (BinOp == ISD::OR) {
// any_of -> MOVMSK != 0
- CompareBits = APInt::getNullValue(32);
+ CmpC = DAG.getConstant(0, DL, MVT::i32);
CondCode = ISD::CondCode::SETNE;
} else {
// all_of -> MOVMSK == ((1 << NumElts) - 1)
- CompareBits = APInt::getLowBitsSet(32, MaskVT.getVectorNumElements());
+ CmpC = DAG.getConstant((1ULL << NumElts) - 1, DL, MVT::i32);
CondCode = ISD::CondCode::SETEQ;
}
- // Perform the select as i32/i64 and then truncate to avoid partial register
- // stalls.
- unsigned ResWidth = std::max(BitWidth, 32u);
- EVT ResVT = EVT::getIntegerVT(*DAG.getContext(), ResWidth);
- SDLoc DL(Extract);
- SDValue Zero = DAG.getConstant(0, DL, ResVT);
- SDValue Ones = DAG.getAllOnesConstant(DL, ResVT);
- SDValue Res = DAG.getBitcast(MaskVT, Match);
- Res = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Res);
- Res = DAG.getSelectCC(DL, Res, DAG.getConstant(CompareBits, DL, MVT::i32),
- Ones, Zero, CondCode);
- return DAG.getSExtOrTrunc(Res, DL, ExtractVT);
+ // The setcc produces an i8 of 0/1, so extend that to the result width and
+ // negate to get the final 0/-1 mask value.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT SetccVT =
+ TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i32);
+ SDValue Setcc = DAG.getSetCC(DL, SetccVT, Movmsk, CmpC, CondCode);
+ SDValue Zext = DAG.getZExtOrTrunc(Setcc, DL, ExtractVT);
+ SDValue Zero = DAG.getConstant(0, DL, ExtractVT);
+ return DAG.getNode(ISD::SUB, DL, ExtractVT, Zero, Zext);
}
static SDValue combineBasicSADPattern(SDNode *Extract, SelectionDAG &DAG,
@@ -33592,7 +35384,7 @@ static SDValue combineBasicSADPattern(SDNode *Extract, SelectionDAG &DAG,
// If there was a match, we want Root to be a select that is the root of an
// abs-diff pattern.
- if (!Root || (Root.getOpcode() != ISD::VSELECT))
+ if (!Root || Root.getOpcode() != ISD::ABS)
return SDValue();
// Check whether we have an abs-diff pattern feeding into the select.
@@ -33651,15 +35443,19 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
if (SrcSVT == MVT::i1 || !isa<ConstantSDNode>(Idx))
return SDValue();
+ SDValue SrcBC = peekThroughBitcasts(Src);
+
// Handle extract(broadcast(scalar_value)), it doesn't matter what index is.
- if (X86ISD::VBROADCAST == Src.getOpcode() &&
- Src.getOperand(0).getValueType() == VT)
- return Src.getOperand(0);
+ if (X86ISD::VBROADCAST == SrcBC.getOpcode()) {
+ SDValue SrcOp = SrcBC.getOperand(0);
+ if (SrcOp.getValueSizeInBits() == VT.getSizeInBits())
+ return DAG.getBitcast(VT, SrcOp);
+ }
// Resolve the target shuffle inputs and mask.
SmallVector<int, 16> Mask;
SmallVector<SDValue, 2> Ops;
- if (!resolveTargetShuffleInputs(peekThroughBitcasts(Src), Ops, Mask, DAG))
+ if (!resolveTargetShuffleInputs(SrcBC, Ops, Mask, DAG))
return SDValue();
// Attempt to narrow/widen the shuffle mask to the correct size.
@@ -33704,7 +35500,6 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
: DAG.getConstant(0, dl, VT);
SDValue SrcOp = Ops[SrcIdx / Mask.size()];
- SrcOp = DAG.getBitcast(SrcVT, SrcOp);
SrcIdx = SrcIdx % Mask.size();
// We can only extract other elements from 128-bit vectors and in certain
@@ -33714,6 +35509,7 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
if ((SrcVT == MVT::v4i32 || SrcVT == MVT::v2i64) &&
((SrcIdx == 0 && Subtarget.hasSSE2()) || Subtarget.hasSSE41())) {
assert(SrcSVT == VT && "Unexpected extraction type");
+ SrcOp = DAG.getBitcast(SrcVT, SrcOp);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SrcSVT, SrcOp,
DAG.getIntPtrConstant(SrcIdx, dl));
}
@@ -33723,6 +35519,7 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
assert(VT.getSizeInBits() >= SrcSVT.getSizeInBits() &&
"Unexpected extraction type");
unsigned OpCode = (SrcVT == MVT::v8i16 ? X86ISD::PEXTRW : X86ISD::PEXTRB);
+ SrcOp = DAG.getBitcast(SrcVT, SrcOp);
SDValue ExtOp = DAG.getNode(OpCode, dl, MVT::i32, SrcOp,
DAG.getIntPtrConstant(SrcIdx, dl));
return DAG.getZExtOrTrunc(ExtOp, dl, VT);
@@ -33731,6 +35528,155 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+/// Extracting a scalar FP value from vector element 0 is free, so extract each
+/// operand first, then perform the math as a scalar op.
+static SDValue scalarizeExtEltFP(SDNode *ExtElt, SelectionDAG &DAG) {
+ assert(ExtElt->getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Expected extract");
+ SDValue Vec = ExtElt->getOperand(0);
+ SDValue Index = ExtElt->getOperand(1);
+ EVT VT = ExtElt->getValueType(0);
+ EVT VecVT = Vec.getValueType();
+
+ // TODO: If this is a unary/expensive/expand op, allow extraction from a
+ // non-zero element because the shuffle+scalar op will be cheaper?
+ if (!Vec.hasOneUse() || !isNullConstant(Index) || VecVT.getScalarType() != VT)
+ return SDValue();
+
+ // Vector FP compares don't fit the pattern of FP math ops (propagate, not
+ // extract, the condition code), so deal with those as a special-case.
+ if (Vec.getOpcode() == ISD::SETCC && VT == MVT::i1) {
+ EVT OpVT = Vec.getOperand(0).getValueType().getScalarType();
+ if (OpVT != MVT::f32 && OpVT != MVT::f64)
+ return SDValue();
+
+ // extract (setcc X, Y, CC), 0 --> setcc (extract X, 0), (extract Y, 0), CC
+ SDLoc DL(ExtElt);
+ SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT,
+ Vec.getOperand(0), Index);
+ SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT,
+ Vec.getOperand(1), Index);
+ return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1, Vec.getOperand(2));
+ }
+
+ if (VT != MVT::f32 && VT != MVT::f64)
+ return SDValue();
+
+ // Vector FP selects don't fit the pattern of FP math ops (because the
+ // condition has a different type and we have to change the opcode), so deal
+ // with those here.
+ // FIXME: This is restricted to pre type legalization by ensuring the setcc
+ // has i1 elements. If we loosen this we need to convert vector bool to a
+ // scalar bool.
+ if (Vec.getOpcode() == ISD::VSELECT &&
+ Vec.getOperand(0).getOpcode() == ISD::SETCC &&
+ Vec.getOperand(0).getValueType().getScalarType() == MVT::i1 &&
+ Vec.getOperand(0).getOperand(0).getValueType() == VecVT) {
+ // ext (sel Cond, X, Y), 0 --> sel (ext Cond, 0), (ext X, 0), (ext Y, 0)
+ SDLoc DL(ExtElt);
+ SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+ Vec.getOperand(0).getValueType().getScalarType(),
+ Vec.getOperand(0), Index);
+ SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
+ Vec.getOperand(1), Index);
+ SDValue Ext2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
+ Vec.getOperand(2), Index);
+ return DAG.getNode(ISD::SELECT, DL, VT, Ext0, Ext1, Ext2);
+ }
+
+ // TODO: This switch could include FNEG and the x86-specific FP logic ops
+ // (FAND, FANDN, FOR, FXOR). But that may require enhancements to avoid
+ // missed load folding and fma+fneg combining.
+ switch (Vec.getOpcode()) {
+ case ISD::FMA: // Begin 3 operands
+ case ISD::FMAD:
+ case ISD::FADD: // Begin 2 operands
+ case ISD::FSUB:
+ case ISD::FMUL:
+ case ISD::FDIV:
+ case ISD::FREM:
+ case ISD::FCOPYSIGN:
+ case ISD::FMINNUM:
+ case ISD::FMAXNUM:
+ case ISD::FMINNUM_IEEE:
+ case ISD::FMAXNUM_IEEE:
+ case ISD::FMAXIMUM:
+ case ISD::FMINIMUM:
+ case X86ISD::FMAX:
+ case X86ISD::FMIN:
+ case ISD::FABS: // Begin 1 operand
+ case ISD::FSQRT:
+ case ISD::FRINT:
+ case ISD::FCEIL:
+ case ISD::FTRUNC:
+ case ISD::FNEARBYINT:
+ case ISD::FROUND:
+ case ISD::FFLOOR:
+ case X86ISD::FRCP:
+ case X86ISD::FRSQRT: {
+ // extract (fp X, Y, ...), 0 --> fp (extract X, 0), (extract Y, 0), ...
+ SDLoc DL(ExtElt);
+ SmallVector<SDValue, 4> ExtOps;
+ for (SDValue Op : Vec->ops())
+ ExtOps.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op, Index));
+ return DAG.getNode(Vec.getOpcode(), DL, VT, ExtOps);
+ }
+ default:
+ return SDValue();
+ }
+ llvm_unreachable("All opcodes should return within switch");
+}
+
+/// Try to convert a vector reduction sequence composed of binops and shuffles
+/// into horizontal ops.
+static SDValue combineReductionToHorizontal(SDNode *ExtElt, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ assert(ExtElt->getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unexpected caller");
+ bool OptForSize = DAG.getMachineFunction().getFunction().hasOptSize();
+ if (!Subtarget.hasFastHorizontalOps() && !OptForSize)
+ return SDValue();
+ SDValue Index = ExtElt->getOperand(1);
+ if (!isNullConstant(Index))
+ return SDValue();
+
+ // TODO: Allow FADD with reduction and/or reassociation and no-signed-zeros.
+ ISD::NodeType Opc;
+ SDValue Rdx = DAG.matchBinOpReduction(ExtElt, Opc, {ISD::ADD});
+ if (!Rdx)
+ return SDValue();
+
+ EVT VT = ExtElt->getValueType(0);
+ EVT VecVT = ExtElt->getOperand(0).getValueType();
+ if (VecVT.getScalarType() != VT)
+ return SDValue();
+
+ unsigned HorizOpcode = Opc == ISD::ADD ? X86ISD::HADD : X86ISD::FHADD;
+ SDLoc DL(ExtElt);
+
+ // 256-bit horizontal instructions operate on 128-bit chunks rather than
+ // across the whole vector, so we need an extract + hop preliminary stage.
+ // This is the only step where the operands of the hop are not the same value.
+ // TODO: We could extend this to handle 512-bit or even longer vectors.
+ if (((VecVT == MVT::v16i16 || VecVT == MVT::v8i32) && Subtarget.hasSSSE3()) ||
+ ((VecVT == MVT::v8f32 || VecVT == MVT::v4f64) && Subtarget.hasSSE3())) {
+ unsigned NumElts = VecVT.getVectorNumElements();
+ SDValue Hi = extract128BitVector(Rdx, NumElts / 2, DAG, DL);
+ SDValue Lo = extract128BitVector(Rdx, 0, DAG, DL);
+ VecVT = EVT::getVectorVT(*DAG.getContext(), VT, NumElts / 2);
+ Rdx = DAG.getNode(HorizOpcode, DL, VecVT, Hi, Lo);
+ }
+ if (!((VecVT == MVT::v8i16 || VecVT == MVT::v4i32) && Subtarget.hasSSSE3()) &&
+ !((VecVT == MVT::v4f32 || VecVT == MVT::v2f64) && Subtarget.hasSSE3()))
+ return SDValue();
+
+ // extract (add (shuf X), X), 0 --> extract (hadd X, X), 0
+ assert(Rdx.getValueType() == VecVT && "Unexpected reduction match");
+ unsigned ReductionSteps = Log2_32(VecVT.getVectorNumElements());
+ for (unsigned i = 0; i != ReductionSteps; ++i)
+ Rdx = DAG.getNode(HorizOpcode, DL, VecVT, Rdx, Rdx);
+
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Rdx, Index);
+}
+
/// Detect vector gather/scatter index generation and convert it from being a
/// bunch of shuffles and extracts into a somewhat faster sequence.
/// For i686, the best sequence is apparently storing the value and loading
@@ -33741,23 +35687,48 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
if (SDValue NewOp = combineExtractWithShuffle(N, DAG, DCI, Subtarget))
return NewOp;
+ SDValue InputVector = N->getOperand(0);
+ SDValue EltIdx = N->getOperand(1);
+ auto *CIdx = dyn_cast<ConstantSDNode>(EltIdx);
+
+ EVT SrcVT = InputVector.getValueType();
+ EVT VT = N->getValueType(0);
+ SDLoc dl(InputVector);
+ bool IsPextr = N->getOpcode() != ISD::EXTRACT_VECTOR_ELT;
+
+ if (CIdx && CIdx->getAPIntValue().uge(SrcVT.getVectorNumElements()))
+ return IsPextr ? DAG.getConstant(0, dl, VT) : DAG.getUNDEF(VT);
+
+ // Integer Constant Folding.
+ if (CIdx && VT.isInteger()) {
+ APInt UndefVecElts;
+ SmallVector<APInt, 16> EltBits;
+ unsigned VecEltBitWidth = SrcVT.getScalarSizeInBits();
+ if (getTargetConstantBitsFromNode(InputVector, VecEltBitWidth, UndefVecElts,
+ EltBits, true, false)) {
+ uint64_t Idx = CIdx->getZExtValue();
+ if (UndefVecElts[Idx])
+ return IsPextr ? DAG.getConstant(0, dl, VT) : DAG.getUNDEF(VT);
+ return DAG.getConstant(EltBits[Idx].zextOrSelf(VT.getScalarSizeInBits()),
+ dl, VT);
+ }
+ }
+
// TODO - Remove this once we can handle the implicit zero-extension of
// X86ISD::PEXTRW/X86ISD::PEXTRB in:
// XFormVExtractWithShuffleIntoLoad, combineHorizontalPredicateResult and
// combineBasicSADPattern.
- if (N->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ if (IsPextr) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (TLI.SimplifyDemandedBits(
+ SDValue(N, 0), APInt::getAllOnesValue(VT.getSizeInBits()), DCI))
+ return SDValue(N, 0);
return SDValue();
+ }
if (SDValue NewOp = XFormVExtractWithShuffleIntoLoad(N, DAG, DCI))
return NewOp;
- SDValue InputVector = N->getOperand(0);
- SDValue EltIdx = N->getOperand(1);
-
- EVT SrcVT = InputVector.getValueType();
- EVT VT = N->getValueType(0);
- SDLoc dl(InputVector);
-
// Detect mmx extraction of all bits as a i64. It works better as a bitcast.
if (InputVector.getOpcode() == ISD::BITCAST && InputVector.hasOneUse() &&
VT == MVT::i64 && SrcVT == MVT::v1i64 && isNullConstant(EltIdx)) {
@@ -33778,16 +35749,6 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(X86ISD::MMX_MOVD2W, dl, MVT::i32, MMXSrc);
}
- if (VT == MVT::i1 && InputVector.getOpcode() == ISD::BITCAST &&
- isa<ConstantSDNode>(EltIdx) &&
- isa<ConstantSDNode>(InputVector.getOperand(0))) {
- uint64_t ExtractedElt = N->getConstantOperandVal(1);
- auto *InputC = cast<ConstantSDNode>(InputVector.getOperand(0));
- const APInt &InputValue = InputC->getAPIntValue();
- uint64_t Res = InputValue[ExtractedElt];
- return DAG.getConstant(Res, dl, MVT::i1);
- }
-
// Check whether this extract is the root of a sum of absolute differences
// pattern. This has to be done here because we really want it to happen
// pre-legalization,
@@ -33802,6 +35763,45 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
if (SDValue MinMax = combineHorizontalMinMaxResult(N, DAG, Subtarget))
return MinMax;
+ if (SDValue V = combineReductionToHorizontal(N, DAG, Subtarget))
+ return V;
+
+ if (SDValue V = scalarizeExtEltFP(N, DAG))
+ return V;
+
+ // Attempt to extract a i1 element by using MOVMSK to extract the signbits
+ // and then testing the relevant element.
+ if (CIdx && SrcVT.getScalarType() == MVT::i1) {
+ SmallVector<SDNode *, 16> BoolExtracts;
+ auto IsBoolExtract = [&BoolExtracts](SDNode *Use) {
+ if (Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ isa<ConstantSDNode>(Use->getOperand(1)) &&
+ Use->getValueType(0) == MVT::i1) {
+ BoolExtracts.push_back(Use);
+ return true;
+ }
+ return false;
+ };
+ if (all_of(InputVector->uses(), IsBoolExtract) &&
+ BoolExtracts.size() > 1) {
+ unsigned NumSrcElts = SrcVT.getVectorNumElements();
+ EVT BCVT = EVT::getIntegerVT(*DAG.getContext(), NumSrcElts);
+ if (SDValue BC =
+ combineBitcastvxi1(DAG, BCVT, InputVector, dl, Subtarget)) {
+ for (SDNode *Use : BoolExtracts) {
+ // extractelement vXi1 X, MaskIdx --> ((movmsk X) & Mask) == Mask
+ unsigned MaskIdx = Use->getConstantOperandVal(1);
+ APInt MaskBit = APInt::getOneBitSet(NumSrcElts, MaskIdx);
+ SDValue Mask = DAG.getConstant(MaskBit, dl, BCVT);
+ SDValue Res = DAG.getNode(ISD::AND, dl, BCVT, BC, Mask);
+ Res = DAG.getSetCC(dl, MVT::i1, Res, Mask, ISD::SETEQ);
+ DCI.CombineTo(Use, Res);
+ }
+ return SDValue(N, 0);
+ }
+ }
+ }
+
return SDValue();
}
@@ -33825,11 +35825,15 @@ combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG,
assert(CondVT.isVector() && "Vector select expects a vector selector!");
- bool TValIsAllZeros = ISD::isBuildVectorAllZeros(LHS.getNode());
// Check if the first operand is all zeros and Cond type is vXi1.
// This situation only applies to avx512.
- if (TValIsAllZeros && Subtarget.hasAVX512() && Cond.hasOneUse() &&
- CondVT.getVectorElementType() == MVT::i1) {
+ // TODO: Use isNullOrNullSplat() to distinguish constants with undefs?
+ // TODO: Can we assert that both operands are not zeros (because that should
+ // get simplified at node creation time)?
+ bool TValIsAllZeros = ISD::isBuildVectorAllZeros(LHS.getNode());
+ bool FValIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
+ if (TValIsAllZeros && !FValIsAllZeros && Subtarget.hasAVX512() &&
+ Cond.hasOneUse() && CondVT.getVectorElementType() == MVT::i1) {
// Invert the cond to not(cond) : xor(op,allones)=not(op)
SDValue CondNew = DAG.getNOT(DL, Cond, CondVT);
// Vselect cond, op1, op2 = Vselect not(cond), op2, op1
@@ -33844,12 +35848,10 @@ combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG,
if (CondVT.getScalarSizeInBits() != VT.getScalarSizeInBits())
return SDValue();
- bool TValIsAllOnes = ISD::isBuildVectorAllOnes(LHS.getNode());
- bool FValIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
-
// Try to invert the condition if true value is not all 1s and false value is
- // not all 0s.
- if (!TValIsAllOnes && !FValIsAllZeros &&
+ // not all 0s. Only do this if the condition has one use.
+ bool TValIsAllOnes = ISD::isBuildVectorAllOnes(LHS.getNode());
+ if (!TValIsAllOnes && !FValIsAllZeros && Cond.hasOneUse() &&
// Check if the selector will be produced by CMPP*/PCMP*.
Cond.getOpcode() == ISD::SETCC &&
// Check if SETCC has already been promoted.
@@ -33907,6 +35909,39 @@ combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+/// If both arms of a vector select are concatenated vectors, split the select,
+/// and concatenate the result to eliminate a wide (256-bit) vector instruction:
+/// vselect Cond, (concat T0, T1), (concat F0, F1) -->
+/// concat (vselect (split Cond), T0, F0), (vselect (split Cond), T1, F1)
+static SDValue narrowVectorSelect(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ unsigned Opcode = N->getOpcode();
+ if (Opcode != X86ISD::BLENDV && Opcode != ISD::VSELECT)
+ return SDValue();
+
+ // TODO: Split 512-bit vectors too?
+ EVT VT = N->getValueType(0);
+ if (!VT.is256BitVector())
+ return SDValue();
+
+ // TODO: Split as long as any 2 of the 3 operands are concatenated?
+ SDValue Cond = N->getOperand(0);
+ SDValue TVal = N->getOperand(1);
+ SDValue FVal = N->getOperand(2);
+ SmallVector<SDValue, 4> CatOpsT, CatOpsF;
+ if (!TVal.hasOneUse() || !FVal.hasOneUse() ||
+ !collectConcatOps(TVal.getNode(), CatOpsT) ||
+ !collectConcatOps(FVal.getNode(), CatOpsF))
+ return SDValue();
+
+ auto makeBlend = [Opcode](SelectionDAG &DAG, const SDLoc &DL,
+ ArrayRef<SDValue> Ops) {
+ return DAG.getNode(Opcode, DL, Ops[1].getValueType(), Ops);
+ };
+ return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, { Cond, TVal, FVal },
+ makeBlend, /*CheckBWI*/ false);
+}
+
static SDValue combineSelectOfTwoConstants(SDNode *N, SelectionDAG &DAG) {
SDValue Cond = N->getOperand(0);
SDValue LHS = N->getOperand(1);
@@ -33973,7 +36008,7 @@ static SDValue combineSelectOfTwoConstants(SDNode *N, SelectionDAG &DAG) {
/// If this is a *dynamic* select (non-constant condition) and we can match
/// this node with one of the variable blend instructions, restructure the
/// condition so that blends can use the high (sign) bit of each element.
-/// This function will also call SimplfiyDemandedBits on already created
+/// This function will also call SimplifyDemandedBits on already created
/// BLENDV to perform additional simplifications.
static SDValue combineVSelectToBLENDV(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
@@ -34268,6 +36303,42 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(N->getOpcode(), DL, VT, Cond, LHS, RHS);
}
+ // AVX512 - Extend select with zero to merge with target shuffle.
+ // select(mask, extract_subvector(shuffle(x)), zero) -->
+ // extract_subvector(select(insert_subvector(mask), shuffle(x), zero))
+ // TODO - support non target shuffles as well.
+ if (Subtarget.hasAVX512() && CondVT.isVector() &&
+ CondVT.getVectorElementType() == MVT::i1) {
+ auto SelectableOp = [&TLI](SDValue Op) {
+ return Op.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ isTargetShuffle(Op.getOperand(0).getOpcode()) &&
+ isNullConstant(Op.getOperand(1)) &&
+ TLI.isTypeLegal(Op.getOperand(0).getValueType()) &&
+ Op.hasOneUse() && Op.getOperand(0).hasOneUse();
+ };
+
+ bool SelectableLHS = SelectableOp(LHS);
+ bool SelectableRHS = SelectableOp(RHS);
+ bool ZeroLHS = ISD::isBuildVectorAllZeros(LHS.getNode());
+ bool ZeroRHS = ISD::isBuildVectorAllZeros(RHS.getNode());
+
+ if ((SelectableLHS && ZeroRHS) || (SelectableRHS && ZeroLHS)) {
+ EVT SrcVT = SelectableLHS ? LHS.getOperand(0).getValueType()
+ : RHS.getOperand(0).getValueType();
+ unsigned NumSrcElts = SrcVT.getVectorNumElements();
+ EVT SrcCondVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, NumSrcElts);
+ LHS = insertSubVector(DAG.getUNDEF(SrcVT), LHS, 0, DAG, DL,
+ VT.getSizeInBits());
+ RHS = insertSubVector(DAG.getUNDEF(SrcVT), RHS, 0, DAG, DL,
+ VT.getSizeInBits());
+ Cond = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SrcCondVT,
+ DAG.getUNDEF(SrcCondVT), Cond,
+ DAG.getIntPtrConstant(0, DL));
+ SDValue Res = DAG.getSelect(DL, SrcVT, Cond, LHS, RHS);
+ return extractSubVector(Res, 0, DAG, DL, VT.getSizeInBits());
+ }
+ }
+
if (SDValue V = combineSelectOfTwoConstants(N, DAG))
return V;
@@ -34338,14 +36409,16 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
// If the RHS is a constant we have to reverse the const
// canonicalization.
// x > C-1 ? x+-C : 0 --> subus x, C
- // TODO: Handle build_vectors with undef elements.
auto MatchUSUBSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
- return Cond->getAPIntValue() == (-Op->getAPIntValue() - 1);
+ return (!Op && !Cond) ||
+ (Op && Cond &&
+ Cond->getAPIntValue() == (-Op->getAPIntValue() - 1));
};
if (CC == ISD::SETUGT && Other->getOpcode() == ISD::ADD &&
- ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT)) {
- OpRHS = DAG.getNode(ISD::SUB, DL, VT,
- DAG.getConstant(0, DL, VT), OpRHS);
+ ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT,
+ /*AllowUndefs*/ true)) {
+ OpRHS = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
+ OpRHS);
return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
}
@@ -34432,6 +36505,9 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
if (SDValue V = combineVSelectToBLENDV(N, DAG, DCI, Subtarget))
return V;
+ if (SDValue V = narrowVectorSelect(N, DAG, Subtarget))
+ return V;
+
// Custom action for SELECT MMX
if (VT == MVT::x86mmx) {
LHS = DAG.getBitcast(MVT::i64, LHS);
@@ -34715,7 +36791,7 @@ static bool checkBoolTestAndOrSetCCCombine(SDValue Cond, X86::CondCode &CC0,
// When legalizing carry, we create carries via add X, -1
// If that comes from an actual carry, via setcc, we use the
// carry directly.
-static SDValue combineCarryThroughADD(SDValue EFLAGS) {
+static SDValue combineCarryThroughADD(SDValue EFLAGS, SelectionDAG &DAG) {
if (EFLAGS.getOpcode() == X86ISD::ADD) {
if (isAllOnesConstant(EFLAGS.getOperand(1))) {
SDValue Carry = EFLAGS.getOperand(0);
@@ -34728,8 +36804,34 @@ static SDValue combineCarryThroughADD(SDValue EFLAGS) {
Carry = Carry.getOperand(0);
if (Carry.getOpcode() == X86ISD::SETCC ||
Carry.getOpcode() == X86ISD::SETCC_CARRY) {
- if (Carry.getConstantOperandVal(0) == X86::COND_B)
- return Carry.getOperand(1);
+ // TODO: Merge this code with equivalent in combineAddOrSubToADCOrSBB?
+ uint64_t CarryCC = Carry.getConstantOperandVal(0);
+ SDValue CarryOp1 = Carry.getOperand(1);
+ if (CarryCC == X86::COND_B)
+ return CarryOp1;
+ if (CarryCC == X86::COND_A) {
+ // Try to convert COND_A into COND_B in an attempt to facilitate
+ // materializing "setb reg".
+ //
+ // Do not flip "e > c", where "c" is a constant, because Cmp
+ // instruction cannot take an immediate as its first operand.
+ //
+ if (CarryOp1.getOpcode() == X86ISD::SUB &&
+ CarryOp1.getNode()->hasOneUse() &&
+ CarryOp1.getValueType().isInteger() &&
+ !isa<ConstantSDNode>(CarryOp1.getOperand(1))) {
+ SDValue SubCommute =
+ DAG.getNode(X86ISD::SUB, SDLoc(CarryOp1), CarryOp1->getVTList(),
+ CarryOp1.getOperand(1), CarryOp1.getOperand(0));
+ return SDValue(SubCommute.getNode(), CarryOp1.getResNo());
+ }
+ }
+ // If this is a check of the z flag of an add with 1, switch to the
+ // C flag.
+ if (CarryCC == X86::COND_E &&
+ CarryOp1.getOpcode() == X86ISD::ADD &&
+ isOneConstant(CarryOp1.getOperand(1)))
+ return CarryOp1;
}
}
}
@@ -34744,7 +36846,7 @@ static SDValue combineSetCCEFLAGS(SDValue EFLAGS, X86::CondCode &CC,
SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
if (CC == X86::COND_B)
- if (SDValue Flags = combineCarryThroughADD(EFLAGS))
+ if (SDValue Flags = combineCarryThroughADD(EFLAGS, DAG))
return Flags;
if (SDValue R = checkBoolTestSetCCCombine(EFLAGS, CC))
@@ -34763,6 +36865,10 @@ static SDValue combineCMov(SDNode *N, SelectionDAG &DAG,
X86::CondCode CC = (X86::CondCode)N->getConstantOperandVal(2);
SDValue Cond = N->getOperand(3);
+ // cmov X, X, ?, ? --> X
+ if (TrueOp == FalseOp)
+ return TrueOp;
+
// Try to simplify the EFLAGS and condition code operands.
// We can't always do this as FCMOV only supports a subset of X86 cond.
if (SDValue Flags = combineSetCCEFLAGS(Cond, CC, DAG, Subtarget)) {
@@ -35044,7 +37150,7 @@ static SDValue reduceVMULWidth(SDNode *N, SelectionDAG &DAG,
// pmulld is supported since SSE41. It is better to use pmulld
// instead of pmullw+pmulhw, except for subtargets where pmulld is slower than
// the expansion.
- bool OptForMinSize = DAG.getMachineFunction().getFunction().optForMinSize();
+ bool OptForMinSize = DAG.getMachineFunction().getFunction().hasMinSize();
if (Subtarget.hasSSE41() && (OptForMinSize || !Subtarget.isPMULLDSlow()))
return SDValue();
@@ -35283,8 +37389,8 @@ static SDValue combineMulToPMADDWD(SDNode *N, SelectionDAG &DAG,
// Use SplitOpsAndApply to handle AVX splitting.
auto PMADDWDBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
ArrayRef<SDValue> Ops) {
- MVT VT = MVT::getVectorVT(MVT::i32, Ops[0].getValueSizeInBits() / 32);
- return DAG.getNode(X86ISD::VPMADDWD, DL, VT, Ops);
+ MVT OpVT = MVT::getVectorVT(MVT::i32, Ops[0].getValueSizeInBits() / 32);
+ return DAG.getNode(X86ISD::VPMADDWD, DL, OpVT, Ops);
};
return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT,
{ DAG.getBitcast(WVT, N0), DAG.getBitcast(WVT, N1) },
@@ -35352,7 +37458,7 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG,
if (!MulConstantOptimization)
return SDValue();
// An imul is usually smaller than the alternative sequence.
- if (DAG.getMachineFunction().getFunction().optForMinSize())
+ if (DAG.getMachineFunction().getFunction().hasMinSize())
return SDValue();
if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
@@ -35489,7 +37595,7 @@ static SDValue combineShiftLeft(SDNode *N, SelectionDAG &DAG) {
N1C && N0.getOpcode() == ISD::AND &&
N0.getOperand(1).getOpcode() == ISD::Constant) {
SDValue N00 = N0.getOperand(0);
- APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+ APInt Mask = N0.getConstantOperandAPInt(1);
Mask <<= N1C->getAPIntValue();
bool MaskOK = false;
// We can handle cases concerning bit-widening nodes containing setcc_c if
@@ -35638,24 +37744,6 @@ static SDValue combineShiftRightLogical(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-static SDValue combineShift(SDNode* N, SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI,
- const X86Subtarget &Subtarget) {
- if (N->getOpcode() == ISD::SHL)
- if (SDValue V = combineShiftLeft(N, DAG))
- return V;
-
- if (N->getOpcode() == ISD::SRA)
- if (SDValue V = combineShiftRightArithmetic(N, DAG))
- return V;
-
- if (N->getOpcode() == ISD::SRL)
- if (SDValue V = combineShiftRightLogical(N, DAG, DCI))
- return V;
-
- return SDValue();
-}
-
static SDValue combineVectorPack(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
@@ -35677,8 +37765,8 @@ static SDValue combineVectorPack(SDNode *N, SelectionDAG &DAG,
// Constant Folding.
APInt UndefElts0, UndefElts1;
SmallVector<APInt, 32> EltBits0, EltBits1;
- if ((N0->isUndef() || N->isOnlyUserOf(N0.getNode())) &&
- (N1->isUndef() || N->isOnlyUserOf(N1.getNode())) &&
+ if ((N0.isUndef() || N->isOnlyUserOf(N0.getNode())) &&
+ (N1.isUndef() || N->isOnlyUserOf(N1.getNode())) &&
getTargetConstantBitsFromNode(N0, SrcBitsPerElt, UndefElts0, EltBits0) &&
getTargetConstantBitsFromNode(N1, SrcBitsPerElt, UndefElts1, EltBits1)) {
unsigned NumLanes = VT.getSizeInBits() / 128;
@@ -35750,10 +37838,7 @@ static SDValue combineVectorPack(SDNode *N, SelectionDAG &DAG,
// Attempt to combine as shuffle.
SDValue Op(N, 0);
- if (SDValue Res =
- combineX86ShufflesRecursively({Op}, 0, Op, {0}, {}, /*Depth*/ 1,
- /*HasVarMask*/ false,
- /*AllowVarMask*/ true, DAG, Subtarget))
+ if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget))
return Res;
return SDValue();
@@ -35766,11 +37851,22 @@ static SDValue combineVectorShiftVar(SDNode *N, SelectionDAG &DAG,
X86ISD::VSRL == N->getOpcode()) &&
"Unexpected shift opcode");
EVT VT = N->getValueType(0);
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
// Shift zero -> zero.
- if (ISD::isBuildVectorAllZeros(N->getOperand(0).getNode()))
+ if (ISD::isBuildVectorAllZeros(N0.getNode()))
return DAG.getConstant(0, SDLoc(N), VT);
+ // Detect constant shift amounts.
+ APInt UndefElts;
+ SmallVector<APInt, 32> EltBits;
+ if (getTargetConstantBitsFromNode(N1, 64, UndefElts, EltBits, true, false)) {
+ unsigned X86Opc = getTargetVShiftUniformOpcode(N->getOpcode(), false);
+ return getTargetVShiftByConstNode(X86Opc, SDLoc(N), VT.getSimpleVT(), N0,
+ EltBits[0].getZExtValue(), DAG);
+ }
+
APInt KnownUndef, KnownZero;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements());
@@ -35829,9 +37925,7 @@ static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG,
// We can decode 'whole byte' logical bit shifts as shuffles.
if (LogicalShift && (ShiftVal % 8) == 0) {
SDValue Op(N, 0);
- if (SDValue Res = combineX86ShufflesRecursively(
- {Op}, 0, Op, {0}, {}, /*Depth*/ 1,
- /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget))
+ if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget))
return Res;
}
@@ -35864,18 +37958,20 @@ static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG,
static SDValue combineVectorInsert(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
- assert(
- ((N->getOpcode() == X86ISD::PINSRB && N->getValueType(0) == MVT::v16i8) ||
- (N->getOpcode() == X86ISD::PINSRW &&
- N->getValueType(0) == MVT::v8i16)) &&
- "Unexpected vector insertion");
+ EVT VT = N->getValueType(0);
+ assert(((N->getOpcode() == X86ISD::PINSRB && VT == MVT::v16i8) ||
+ (N->getOpcode() == X86ISD::PINSRW && VT == MVT::v8i16)) &&
+ "Unexpected vector insertion");
+
+ unsigned NumBitsPerElt = VT.getScalarSizeInBits();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (TLI.SimplifyDemandedBits(SDValue(N, 0),
+ APInt::getAllOnesValue(NumBitsPerElt), DCI))
+ return SDValue(N, 0);
// Attempt to combine PINSRB/PINSRW patterns to a shuffle.
SDValue Op(N, 0);
- if (SDValue Res =
- combineX86ShufflesRecursively({Op}, 0, Op, {0}, {}, /*Depth*/ 1,
- /*HasVarMask*/ false,
- /*AllowVarMask*/ true, DAG, Subtarget))
+ if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget))
return Res;
return SDValue();
@@ -35894,8 +37990,8 @@ static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG,
if (Subtarget.hasSSE2() && isAndOrOfSetCCs(SDValue(N, 0U), opcode)) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- SDValue CMP0 = N0->getOperand(1);
- SDValue CMP1 = N1->getOperand(1);
+ SDValue CMP0 = N0.getOperand(1);
+ SDValue CMP1 = N1.getOperand(1);
SDLoc DL(N);
// The SETCCs should both refer to the same CMP.
@@ -35987,6 +38083,34 @@ static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+// Match (xor X, -1) -> X.
+// Match extract_subvector(xor X, -1) -> extract_subvector(X).
+// Match concat_vectors(xor X, -1, xor Y, -1) -> concat_vectors(X, Y).
+static SDValue IsNOT(SDValue V, SelectionDAG &DAG) {
+ V = peekThroughBitcasts(V);
+ if (V.getOpcode() == ISD::XOR &&
+ ISD::isBuildVectorAllOnes(V.getOperand(1).getNode()))
+ return V.getOperand(0);
+ if (V.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ (isNullConstant(V.getOperand(1)) || V.getOperand(0).hasOneUse())) {
+ if (SDValue Not = IsNOT(V.getOperand(0), DAG)) {
+ Not = DAG.getBitcast(V.getOperand(0).getValueType(), Not);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(Not), V.getValueType(),
+ Not, V.getOperand(1));
+ }
+ }
+ SmallVector<SDValue, 2> CatOps;
+ if (collectConcatOps(V.getNode(), CatOps)) {
+ for (SDValue &CatOp : CatOps) {
+ SDValue NotCat = IsNOT(CatOp, DAG);
+ if (!NotCat) return SDValue();
+ CatOp = DAG.getBitcast(CatOp.getValueType(), NotCat);
+ }
+ return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(V), V.getValueType(), CatOps);
+ }
+ return SDValue();
+}
+
/// Try to fold: (and (xor X, -1), Y) -> (andnp X, Y).
static SDValue combineANDXORWithAllOnesIntoANDNP(SDNode *N, SelectionDAG &DAG) {
assert(N->getOpcode() == ISD::AND);
@@ -35996,15 +38120,14 @@ static SDValue combineANDXORWithAllOnesIntoANDNP(SDNode *N, SelectionDAG &DAG) {
return SDValue();
SDValue X, Y;
- SDValue N0 = peekThroughBitcasts(N->getOperand(0));
- SDValue N1 = peekThroughBitcasts(N->getOperand(1));
- if (N0.getOpcode() == ISD::XOR &&
- ISD::isBuildVectorAllOnes(N0.getOperand(1).getNode())) {
- X = N0.getOperand(0);
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ if (SDValue Not = IsNOT(N0, DAG)) {
+ X = Not;
Y = N1;
- } else if (N1.getOpcode() == ISD::XOR &&
- ISD::isBuildVectorAllOnes(N1.getOperand(1).getNode())) {
- X = N1.getOperand(0);
+ } else if (SDValue Not = IsNOT(N1, DAG)) {
+ X = Not;
Y = N0;
} else
return SDValue();
@@ -36046,7 +38169,7 @@ static SDValue PromoteMaskArithmetic(SDNode *N, SelectionDAG &DAG,
return SDValue();
// The type of the truncated inputs.
- if (N0->getOperand(0).getValueType() != VT)
+ if (N0.getOperand(0).getValueType() != VT)
return SDValue();
// The right side has to be a 'trunc' or a constant vector.
@@ -36062,9 +38185,9 @@ static SDValue PromoteMaskArithmetic(SDNode *N, SelectionDAG &DAG,
return SDValue();
// Set N0 and N1 to hold the inputs to the new wide operation.
- N0 = N0->getOperand(0);
+ N0 = N0.getOperand(0);
if (RHSTrunc)
- N1 = N1->getOperand(0);
+ N1 = N1.getOperand(0);
else
N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N1);
@@ -36088,34 +38211,35 @@ static SDValue PromoteMaskArithmetic(SDNode *N, SelectionDAG &DAG,
/// unnecessary moves from SSE to integer registers.
static SDValue convertIntLogicToFPLogic(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
- unsigned FPOpcode = ISD::DELETED_NODE;
- if (N->getOpcode() == ISD::AND)
- FPOpcode = X86ISD::FAND;
- else if (N->getOpcode() == ISD::OR)
- FPOpcode = X86ISD::FOR;
- else if (N->getOpcode() == ISD::XOR)
- FPOpcode = X86ISD::FXOR;
-
- assert(FPOpcode != ISD::DELETED_NODE &&
- "Unexpected input node for FP logic conversion");
-
EVT VT = N->getValueType(0);
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDLoc DL(N);
- if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST &&
- ((Subtarget.hasSSE1() && VT == MVT::i32) ||
- (Subtarget.hasSSE2() && VT == MVT::i64))) {
- SDValue N00 = N0.getOperand(0);
- SDValue N10 = N1.getOperand(0);
- EVT N00Type = N00.getValueType();
- EVT N10Type = N10.getValueType();
- if (N00Type.isFloatingPoint() && N10Type.isFloatingPoint()) {
- SDValue FPLogic = DAG.getNode(FPOpcode, DL, N00Type, N00, N10);
- return DAG.getBitcast(VT, FPLogic);
- }
+
+ if (N0.getOpcode() != ISD::BITCAST || N1.getOpcode() != ISD::BITCAST)
+ return SDValue();
+
+ SDValue N00 = N0.getOperand(0);
+ SDValue N10 = N1.getOperand(0);
+ EVT N00Type = N00.getValueType();
+ EVT N10Type = N10.getValueType();
+
+ // Ensure that both types are the same and are legal scalar fp types.
+ if (N00Type != N10Type ||
+ !((Subtarget.hasSSE1() && N00Type == MVT::f32) ||
+ (Subtarget.hasSSE2() && N00Type == MVT::f64)))
+ return SDValue();
+
+ unsigned FPOpcode;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("Unexpected input node for FP logic conversion");
+ case ISD::AND: FPOpcode = X86ISD::FAND; break;
+ case ISD::OR: FPOpcode = X86ISD::FOR; break;
+ case ISD::XOR: FPOpcode = X86ISD::FXOR; break;
}
- return SDValue();
+
+ SDValue FPLogic = DAG.getNode(FPOpcode, DL, N00Type, N00, N10);
+ return DAG.getBitcast(VT, FPLogic);
}
/// If this is a zero/all-bits result that is bitwise-anded with a low bits
@@ -36371,6 +38495,24 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
if (SDValue V = combineParity(N, DAG, Subtarget))
return V;
+ // Match all-of bool scalar reductions into a bitcast/movmsk + cmp.
+ // TODO: Support multiple SrcOps.
+ if (VT == MVT::i1) {
+ SmallVector<SDValue, 2> SrcOps;
+ if (matchBitOpReduction(SDValue(N, 0), ISD::AND, SrcOps) &&
+ SrcOps.size() == 1) {
+ SDLoc dl(N);
+ unsigned NumElts = SrcOps[0].getValueType().getVectorNumElements();
+ EVT MaskVT = EVT::getIntegerVT(*DAG.getContext(), NumElts);
+ SDValue Mask = combineBitcastvxi1(DAG, MaskVT, SrcOps[0], dl, Subtarget);
+ if (Mask) {
+ APInt AllBits = APInt::getAllOnesValue(NumElts);
+ return DAG.getSetCC(dl, MVT::i1, Mask,
+ DAG.getConstant(AllBits, dl, MaskVT), ISD::SETEQ);
+ }
+ }
+ }
+
if (DCI.isBeforeLegalizeOps())
return SDValue();
@@ -36392,9 +38534,7 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
// Attempt to recursively combine a bitmask AND with shuffles.
if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) {
SDValue Op(N, 0);
- if (SDValue Res = combineX86ShufflesRecursively(
- {Op}, 0, Op, {0}, {}, /*Depth*/ 1,
- /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget))
+ if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget))
return Res;
}
@@ -36440,6 +38580,52 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+// Canonicalize OR(AND(X,C),AND(Y,~C)) -> OR(AND(X,C),ANDNP(C,Y))
+static SDValue canonicalizeBitSelect(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ assert(N->getOpcode() == ISD::OR && "Unexpected Opcode");
+
+ EVT VT = N->getValueType(0);
+ if (!VT.isVector() || (VT.getScalarSizeInBits() % 8) != 0)
+ return SDValue();
+
+ SDValue N0 = peekThroughBitcasts(N->getOperand(0));
+ SDValue N1 = peekThroughBitcasts(N->getOperand(1));
+ if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND)
+ return SDValue();
+
+ // On XOP we'll lower to PCMOV so accept one use, otherwise only
+ // do this if either mask has multiple uses already.
+ if (!(Subtarget.hasXOP() || !N0.getOperand(1).hasOneUse() ||
+ !N1.getOperand(1).hasOneUse()))
+ return SDValue();
+
+ // Attempt to extract constant byte masks.
+ APInt UndefElts0, UndefElts1;
+ SmallVector<APInt, 32> EltBits0, EltBits1;
+ if (!getTargetConstantBitsFromNode(N0.getOperand(1), 8, UndefElts0, EltBits0,
+ false, false))
+ return SDValue();
+ if (!getTargetConstantBitsFromNode(N1.getOperand(1), 8, UndefElts1, EltBits1,
+ false, false))
+ return SDValue();
+
+ for (unsigned i = 0, e = EltBits0.size(); i != e; ++i) {
+ // TODO - add UNDEF elts support.
+ if (UndefElts0[i] || UndefElts1[i])
+ return SDValue();
+ if (EltBits0[i] != ~EltBits1[i])
+ return SDValue();
+ }
+
+ SDLoc DL(N);
+ SDValue X = N->getOperand(0);
+ SDValue Y =
+ DAG.getNode(X86ISD::ANDNP, DL, VT, DAG.getBitcast(VT, N0.getOperand(1)),
+ DAG.getBitcast(VT, N1.getOperand(0)));
+ return DAG.getNode(ISD::OR, DL, VT, X, Y);
+}
+
// Try to match OR(AND(~MASK,X),AND(MASK,Y)) logic pattern.
static bool matchLogicBlend(SDNode *N, SDValue &X, SDValue &Y, SDValue &Mask) {
if (N->getOpcode() != ISD::OR)
@@ -36472,6 +38658,68 @@ static bool matchLogicBlend(SDNode *N, SDValue &X, SDValue &Y, SDValue &Mask) {
return true;
}
+// Try to match:
+// (or (and (M, (sub 0, X)), (pandn M, X)))
+// which is a special case of vselect:
+// (vselect M, (sub 0, X), X)
+// Per:
+// http://graphics.stanford.edu/~seander/bithacks.html#ConditionalNegate
+// We know that, if fNegate is 0 or 1:
+// (fNegate ? -v : v) == ((v ^ -fNegate) + fNegate)
+//
+// Here, we have a mask, M (all 1s or 0), and, similarly, we know that:
+// ((M & 1) ? -X : X) == ((X ^ -(M & 1)) + (M & 1))
+// ( M ? -X : X) == ((X ^ M ) + (M & 1))
+// This lets us transform our vselect to:
+// (add (xor X, M), (and M, 1))
+// And further to:
+// (sub (xor X, M), M)
+static SDValue combineLogicBlendIntoConditionalNegate(
+ EVT VT, SDValue Mask, SDValue X, SDValue Y, const SDLoc &DL,
+ SelectionDAG &DAG, const X86Subtarget &Subtarget) {
+ EVT MaskVT = Mask.getValueType();
+ assert(MaskVT.isInteger() &&
+ DAG.ComputeNumSignBits(Mask) == MaskVT.getScalarSizeInBits() &&
+ "Mask must be zero/all-bits");
+
+ if (X.getValueType() != MaskVT || Y.getValueType() != MaskVT)
+ return SDValue();
+ if (!DAG.getTargetLoweringInfo().isOperationLegal(ISD::SUB, MaskVT))
+ return SDValue();
+
+ auto IsNegV = [](SDNode *N, SDValue V) {
+ return N->getOpcode() == ISD::SUB && N->getOperand(1) == V &&
+ ISD::isBuildVectorAllZeros(N->getOperand(0).getNode());
+ };
+
+ SDValue V;
+ if (IsNegV(Y.getNode(), X))
+ V = X;
+ else if (IsNegV(X.getNode(), Y))
+ V = Y;
+ else
+ return SDValue();
+
+ SDValue SubOp1 = DAG.getNode(ISD::XOR, DL, MaskVT, V, Mask);
+ SDValue SubOp2 = Mask;
+
+ // If the negate was on the false side of the select, then
+ // the operands of the SUB need to be swapped. PR 27251.
+ // This is because the pattern being matched above is
+ // (vselect M, (sub (0, X), X) -> (sub (xor X, M), M)
+ // but if the pattern matched was
+ // (vselect M, X, (sub (0, X))), that is really negation of the pattern
+ // above, -(vselect M, (sub 0, X), X), and therefore the replacement
+ // pattern also needs to be a negation of the replacement pattern above.
+ // And -(sub X, Y) is just sub (Y, X), so swapping the operands of the
+ // sub accomplishes the negation of the replacement pattern.
+ if (V == Y)
+ std::swap(SubOp1, SubOp2);
+
+ SDValue Res = DAG.getNode(ISD::SUB, DL, MaskVT, SubOp1, SubOp2);
+ return DAG.getBitcast(VT, Res);
+}
+
// Try to fold:
// (or (and (m, y), (pandn m, x)))
// into:
@@ -36507,55 +38755,10 @@ static SDValue combineLogicBlendIntoPBLENDV(SDNode *N, SelectionDAG &DAG,
SDLoc DL(N);
- // Try to match:
- // (or (and (M, (sub 0, X)), (pandn M, X)))
- // which is a special case of vselect:
- // (vselect M, (sub 0, X), X)
- // Per:
- // http://graphics.stanford.edu/~seander/bithacks.html#ConditionalNegate
- // We know that, if fNegate is 0 or 1:
- // (fNegate ? -v : v) == ((v ^ -fNegate) + fNegate)
- //
- // Here, we have a mask, M (all 1s or 0), and, similarly, we know that:
- // ((M & 1) ? -X : X) == ((X ^ -(M & 1)) + (M & 1))
- // ( M ? -X : X) == ((X ^ M ) + (M & 1))
- // This lets us transform our vselect to:
- // (add (xor X, M), (and M, 1))
- // And further to:
- // (sub (xor X, M), M)
- if (X.getValueType() == MaskVT && Y.getValueType() == MaskVT &&
- DAG.getTargetLoweringInfo().isOperationLegal(ISD::SUB, MaskVT)) {
- auto IsNegV = [](SDNode *N, SDValue V) {
- return N->getOpcode() == ISD::SUB && N->getOperand(1) == V &&
- ISD::isBuildVectorAllZeros(N->getOperand(0).getNode());
- };
- SDValue V;
- if (IsNegV(Y.getNode(), X))
- V = X;
- else if (IsNegV(X.getNode(), Y))
- V = Y;
-
- if (V) {
- SDValue SubOp1 = DAG.getNode(ISD::XOR, DL, MaskVT, V, Mask);
- SDValue SubOp2 = Mask;
-
- // If the negate was on the false side of the select, then
- // the operands of the SUB need to be swapped. PR 27251.
- // This is because the pattern being matched above is
- // (vselect M, (sub (0, X), X) -> (sub (xor X, M), M)
- // but if the pattern matched was
- // (vselect M, X, (sub (0, X))), that is really negation of the pattern
- // above, -(vselect M, (sub 0, X), X), and therefore the replacement
- // pattern also needs to be a negation of the replacement pattern above.
- // And -(sub X, Y) is just sub (Y, X), so swapping the operands of the
- // sub accomplishes the negation of the replacement pattern.
- if (V == Y)
- std::swap(SubOp1, SubOp2);
-
- SDValue Res = DAG.getNode(ISD::SUB, DL, MaskVT, SubOp1, SubOp2);
- return DAG.getBitcast(VT, Res);
- }
- }
+ // Attempt to combine to conditional negate: (sub (xor X, M), M)
+ if (SDValue Res = combineLogicBlendIntoConditionalNegate(VT, Mask, X, Y, DL,
+ DAG, Subtarget))
+ return Res;
// PBLENDVB is only available on SSE 4.1.
if (!Subtarget.hasSSE41())
@@ -36665,8 +38868,7 @@ static SDValue combineOrCmpEqZeroToCtlzSrl(SDNode *N, SelectionDAG &DAG,
// Swap rhs with lhs to match or(setcc(eq, cmp, 0), or).
if (RHS->getOpcode() == ISD::OR)
std::swap(LHS, RHS);
- EVT VT = OR->getValueType(0);
- SDValue NewRHS = lowerX86CmpEqZeroToCtlzSrl(RHS, VT, DAG);
+ NewRHS = lowerX86CmpEqZeroToCtlzSrl(RHS, VT, DAG);
if (!NewRHS)
return SDValue();
Ret = DAG.getNode(ISD::OR, SDLoc(OR), VT, Ret, NewRHS);
@@ -36702,15 +38904,16 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, Subtarget))
return FPLogic;
+ if (SDValue R = canonicalizeBitSelect(N, DAG, Subtarget))
+ return R;
+
if (SDValue R = combineLogicBlendIntoPBLENDV(N, DAG, Subtarget))
return R;
// Attempt to recursively combine an OR of shuffles.
if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) {
SDValue Op(N, 0);
- if (SDValue Res = combineX86ShufflesRecursively(
- {Op}, 0, Op, {0}, {}, /*Depth*/ 1,
- /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget))
+ if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget))
return Res;
}
@@ -36718,7 +38921,7 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
return SDValue();
// fold (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c)
- bool OptForSize = DAG.getMachineFunction().getFunction().optForSize();
+ bool OptForSize = DAG.getMachineFunction().getFunction().hasOptSize();
unsigned Bits = VT.getScalarSizeInBits();
// SHLD/SHRD instructions have lower register pressure, but on some
@@ -36747,14 +38950,14 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
SDValue ShMsk0;
if (ShAmt0.getOpcode() == ISD::AND &&
isa<ConstantSDNode>(ShAmt0.getOperand(1)) &&
- ShAmt0.getConstantOperandVal(1) == (Bits - 1)) {
+ ShAmt0.getConstantOperandAPInt(1) == (Bits - 1)) {
ShMsk0 = ShAmt0;
ShAmt0 = ShAmt0.getOperand(0);
}
SDValue ShMsk1;
if (ShAmt1.getOpcode() == ISD::AND &&
isa<ConstantSDNode>(ShAmt1.getOperand(1)) &&
- ShAmt1.getConstantOperandVal(1) == (Bits - 1)) {
+ ShAmt1.getConstantOperandAPInt(1) == (Bits - 1)) {
ShMsk1 = ShAmt1;
ShAmt1 = ShAmt1.getOperand(0);
}
@@ -36765,46 +38968,55 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
ShAmt1 = ShAmt1.getOperand(0);
SDLoc DL(N);
- unsigned Opc = X86ISD::SHLD;
+ unsigned Opc = ISD::FSHL;
SDValue Op0 = N0.getOperand(0);
SDValue Op1 = N1.getOperand(0);
- if (ShAmt0.getOpcode() == ISD::SUB ||
- ShAmt0.getOpcode() == ISD::XOR) {
- Opc = X86ISD::SHRD;
+ if (ShAmt0.getOpcode() == ISD::SUB || ShAmt0.getOpcode() == ISD::XOR) {
+ Opc = ISD::FSHR;
std::swap(Op0, Op1);
std::swap(ShAmt0, ShAmt1);
std::swap(ShMsk0, ShMsk1);
}
- // OR( SHL( X, C ), SRL( Y, 32 - C ) ) -> SHLD( X, Y, C )
- // OR( SRL( X, C ), SHL( Y, 32 - C ) ) -> SHRD( X, Y, C )
- // OR( SHL( X, C ), SRL( SRL( Y, 1 ), XOR( C, 31 ) ) ) -> SHLD( X, Y, C )
- // OR( SRL( X, C ), SHL( SHL( Y, 1 ), XOR( C, 31 ) ) ) -> SHRD( X, Y, C )
- // OR( SHL( X, AND( C, 31 ) ), SRL( Y, AND( 0 - C, 31 ) ) ) -> SHLD( X, Y, C )
- // OR( SRL( X, AND( C, 31 ) ), SHL( Y, AND( 0 - C, 31 ) ) ) -> SHRD( X, Y, C )
+ auto GetFunnelShift = [&DAG, &DL, VT, Opc](SDValue Op0, SDValue Op1,
+ SDValue Amt) {
+ if (Opc == ISD::FSHR)
+ std::swap(Op0, Op1);
+ return DAG.getNode(Opc, DL, VT, Op0, Op1,
+ DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, Amt));
+ };
+
+ // OR( SHL( X, C ), SRL( Y, 32 - C ) ) -> FSHL( X, Y, C )
+ // OR( SRL( X, C ), SHL( Y, 32 - C ) ) -> FSHR( Y, X, C )
+ // OR( SHL( X, C ), SRL( SRL( Y, 1 ), XOR( C, 31 ) ) ) -> FSHL( X, Y, C )
+ // OR( SRL( X, C ), SHL( SHL( Y, 1 ), XOR( C, 31 ) ) ) -> FSHR( Y, X, C )
+ // OR( SHL( X, AND( C, 31 ) ), SRL( Y, AND( 0 - C, 31 ) ) ) -> FSHL( X, Y, C )
+ // OR( SRL( X, AND( C, 31 ) ), SHL( Y, AND( 0 - C, 31 ) ) ) -> FSHR( Y, X, C )
if (ShAmt1.getOpcode() == ISD::SUB) {
SDValue Sum = ShAmt1.getOperand(0);
if (auto *SumC = dyn_cast<ConstantSDNode>(Sum)) {
SDValue ShAmt1Op1 = ShAmt1.getOperand(1);
+ if (ShAmt1Op1.getOpcode() == ISD::AND &&
+ isa<ConstantSDNode>(ShAmt1Op1.getOperand(1)) &&
+ ShAmt1Op1.getConstantOperandAPInt(1) == (Bits - 1)) {
+ ShMsk1 = ShAmt1Op1;
+ ShAmt1Op1 = ShAmt1Op1.getOperand(0);
+ }
if (ShAmt1Op1.getOpcode() == ISD::TRUNCATE)
ShAmt1Op1 = ShAmt1Op1.getOperand(0);
if ((SumC->getAPIntValue() == Bits ||
(SumC->getAPIntValue() == 0 && ShMsk1)) &&
ShAmt1Op1 == ShAmt0)
- return DAG.getNode(Opc, DL, VT, Op0, Op1,
- DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ShAmt0));
+ return GetFunnelShift(Op0, Op1, ShAmt0);
}
} else if (auto *ShAmt1C = dyn_cast<ConstantSDNode>(ShAmt1)) {
auto *ShAmt0C = dyn_cast<ConstantSDNode>(ShAmt0);
if (ShAmt0C && (ShAmt0C->getSExtValue() + ShAmt1C->getSExtValue()) == Bits)
- return DAG.getNode(Opc, DL, VT,
- N0.getOperand(0), N1.getOperand(0),
- DAG.getNode(ISD::TRUNCATE, DL,
- MVT::i8, ShAmt0));
+ return GetFunnelShift(Op0, Op1, ShAmt0);
} else if (ShAmt1.getOpcode() == ISD::XOR) {
SDValue Mask = ShAmt1.getOperand(1);
if (auto *MaskC = dyn_cast<ConstantSDNode>(Mask)) {
- unsigned InnerShift = (X86ISD::SHLD == Opc ? ISD::SRL : ISD::SHL);
+ unsigned InnerShift = (ISD::FSHL == Opc ? ISD::SRL : ISD::SHL);
SDValue ShAmt1Op0 = ShAmt1.getOperand(0);
if (ShAmt1Op0.getOpcode() == ISD::TRUNCATE)
ShAmt1Op0 = ShAmt1Op0.getOperand(0);
@@ -36812,15 +39024,13 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
(ShAmt1Op0 == ShAmt0 || ShAmt1Op0 == ShMsk0)) {
if (Op1.getOpcode() == InnerShift &&
isa<ConstantSDNode>(Op1.getOperand(1)) &&
- Op1.getConstantOperandVal(1) == 1) {
- return DAG.getNode(Opc, DL, VT, Op0, Op1.getOperand(0),
- DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ShAmt0));
+ Op1.getConstantOperandAPInt(1) == 1) {
+ return GetFunnelShift(Op0, Op1.getOperand(0), ShAmt0);
}
// Test for ADD( Y, Y ) as an equivalent to SHL( Y, 1 ).
if (InnerShift == ISD::SHL && Op1.getOpcode() == ISD::ADD &&
Op1.getOperand(0) == Op1.getOperand(1)) {
- return DAG.getNode(Opc, DL, VT, Op0, Op1.getOperand(0),
- DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ShAmt0));
+ return GetFunnelShift(Op0, Op1.getOperand(0), ShAmt0);
}
}
}
@@ -36862,7 +39072,7 @@ static SDValue foldXorTruncShiftIntoCmp(SDNode *N, SelectionDAG &DAG) {
// Make sure the shift amount extracts the sign bit.
if (!isa<ConstantSDNode>(Shift.getOperand(1)) ||
- Shift.getConstantOperandVal(1) != ShiftTy.getSizeInBits() - 1)
+ Shift.getConstantOperandAPInt(1) != (ShiftTy.getSizeInBits() - 1))
return SDValue();
// Create a greater-than comparison against -1.
@@ -36915,13 +39125,10 @@ static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG,
return SDValue();
// The shift should be smearing the sign bit across each vector element.
- auto *ShiftBV = dyn_cast<BuildVectorSDNode>(Shift.getOperand(1));
- if (!ShiftBV)
- return SDValue();
-
- EVT ShiftEltTy = Shift.getValueType().getVectorElementType();
- auto *ShiftAmt = ShiftBV->getConstantSplatNode();
- if (!ShiftAmt || ShiftAmt->getZExtValue() != ShiftEltTy.getSizeInBits() - 1)
+ auto *ShiftAmt =
+ isConstOrConstSplat(Shift.getOperand(1), /*AllowUndefs*/ true);
+ if (!ShiftAmt ||
+ ShiftAmt->getAPIntValue() != (Shift.getScalarValueSizeInBits() - 1))
return SDValue();
// Create a greater-than comparison against -1. We don't use the more obvious
@@ -37203,15 +39410,35 @@ static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG,
AVGBuilder);
}
- if (Operands[0].getOpcode() == ISD::ADD)
+ // Matches 'add like' patterns: add(Op0,Op1) + zext(or(Op0,Op1)).
+ // Match the or case only if its 'add-like' - can be replaced by an add.
+ auto FindAddLike = [&](SDValue V, SDValue &Op0, SDValue &Op1) {
+ if (ISD::ADD == V.getOpcode()) {
+ Op0 = V.getOperand(0);
+ Op1 = V.getOperand(1);
+ return true;
+ }
+ if (ISD::ZERO_EXTEND != V.getOpcode())
+ return false;
+ V = V.getOperand(0);
+ if (V.getValueType() != VT || ISD::OR != V.getOpcode() ||
+ !DAG.haveNoCommonBitsSet(V.getOperand(0), V.getOperand(1)))
+ return false;
+ Op0 = V.getOperand(0);
+ Op1 = V.getOperand(1);
+ return true;
+ };
+
+ SDValue Op0, Op1;
+ if (FindAddLike(Operands[0], Op0, Op1))
std::swap(Operands[0], Operands[1]);
- else if (Operands[1].getOpcode() != ISD::ADD)
+ else if (!FindAddLike(Operands[1], Op0, Op1))
return SDValue();
- Operands[2] = Operands[1].getOperand(0);
- Operands[1] = Operands[1].getOperand(1);
+ Operands[2] = Op0;
+ Operands[1] = Op1;
// Now we have three operands of two additions. Check that one of them is a
- // constant vector with ones, and the other two are promoted from i8/i16.
+ // constant vector with ones, and the other two can be promoted from i8/i16.
for (int i = 0; i < 3; ++i) {
if (!IsConstVectorInRange(Operands[i], 1, 1))
continue;
@@ -37219,14 +39446,16 @@ static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG,
// Check if Operands[0] and Operands[1] are results of type promotion.
for (int j = 0; j < 2; ++j)
- if (Operands[j].getOpcode() != ISD::ZERO_EXTEND ||
- Operands[j].getOperand(0).getValueType() != VT)
- return SDValue();
+ if (Operands[j].getValueType() != VT) {
+ if (Operands[j].getOpcode() != ISD::ZERO_EXTEND ||
+ Operands[j].getOperand(0).getValueType() != VT)
+ return SDValue();
+ Operands[j] = Operands[j].getOperand(0);
+ }
// The pattern is detected, emit X86ISD::AVG instruction(s).
- return SplitOpsAndApply(DAG, Subtarget, DL, VT,
- { Operands[0].getOperand(0),
- Operands[1].getOperand(0) }, AVGBuilder);
+ return SplitOpsAndApply(DAG, Subtarget, DL, VT, {Operands[0], Operands[1]},
+ AVGBuilder);
}
return SDValue();
@@ -37246,38 +39475,51 @@ static SDValue combineLoad(SDNode *N, SelectionDAG &DAG,
// pre-AVX2 targets as 32-byte loads will lower to regular temporal loads.
ISD::LoadExtType Ext = Ld->getExtensionType();
bool Fast;
- unsigned AddressSpace = Ld->getAddressSpace();
unsigned Alignment = Ld->getAlignment();
if (RegVT.is256BitVector() && !DCI.isBeforeLegalizeOps() &&
Ext == ISD::NON_EXTLOAD &&
((Ld->isNonTemporal() && !Subtarget.hasInt256() && Alignment >= 16) ||
(TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), RegVT,
- AddressSpace, Alignment, &Fast) && !Fast))) {
+ *Ld->getMemOperand(), &Fast) &&
+ !Fast))) {
unsigned NumElems = RegVT.getVectorNumElements();
if (NumElems < 2)
return SDValue();
- SDValue Ptr = Ld->getBasePtr();
-
+ unsigned HalfAlign = 16;
+ SDValue Ptr1 = Ld->getBasePtr();
+ SDValue Ptr2 = DAG.getMemBasePlusOffset(Ptr1, HalfAlign, dl);
EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(),
- NumElems/2);
+ NumElems / 2);
SDValue Load1 =
- DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(),
+ DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr1, Ld->getPointerInfo(),
Alignment, Ld->getMemOperand()->getFlags());
-
- Ptr = DAG.getMemBasePlusOffset(Ptr, 16, dl);
- SDValue Load2 =
- DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr,
- Ld->getPointerInfo().getWithOffset(16),
- MinAlign(Alignment, 16U), Ld->getMemOperand()->getFlags());
+ SDValue Load2 = DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr2,
+ Ld->getPointerInfo().getWithOffset(HalfAlign),
+ MinAlign(Alignment, HalfAlign),
+ Ld->getMemOperand()->getFlags());
SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- Load1.getValue(1),
- Load2.getValue(1));
+ Load1.getValue(1), Load2.getValue(1));
SDValue NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, RegVT, Load1, Load2);
return DCI.CombineTo(N, NewVec, TF, true);
}
+ // Bool vector load - attempt to cast to an integer, as we have good
+ // (vXiY *ext(vXi1 bitcast(iX))) handling.
+ if (Ext == ISD::NON_EXTLOAD && !Subtarget.hasAVX512() && RegVT.isVector() &&
+ RegVT.getScalarType() == MVT::i1 && DCI.isBeforeLegalize()) {
+ unsigned NumElts = RegVT.getVectorNumElements();
+ EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumElts);
+ if (TLI.isTypeLegal(IntVT)) {
+ SDValue IntLoad = DAG.getLoad(IntVT, dl, Ld->getChain(), Ld->getBasePtr(),
+ Ld->getPointerInfo(), Alignment,
+ Ld->getMemOperand()->getFlags());
+ SDValue BoolVec = DAG.getBitcast(RegVT, IntLoad);
+ return DCI.CombineTo(N, BoolVec, IntLoad.getValue(1), true);
+ }
+ }
+
return SDValue();
}
@@ -37404,6 +39646,9 @@ combineMaskedLoadConstantMask(MaskedLoadSDNode *ML, SelectionDAG &DAG,
if (ML->getPassThru().isUndef())
return SDValue();
+ if (ISD::isBuildVectorAllZeros(ML->getPassThru().getNode()))
+ return SDValue();
+
// The new masked load has an undef pass-through operand. The select uses the
// original pass-through operand.
SDValue NewML = DAG.getMaskedLoad(VT, DL, ML->getChain(), ML->getBasePtr(),
@@ -37434,7 +39679,7 @@ static SDValue combineMaskedLoad(SDNode *N, SelectionDAG &DAG,
return Blend;
}
- if (Mld->getExtensionType() != ISD::SEXTLOAD)
+ if (Mld->getExtensionType() != ISD::EXTLOAD)
return SDValue();
// Resolve extending loads.
@@ -37504,8 +39749,20 @@ static SDValue combineMaskedLoad(SDNode *N, SelectionDAG &DAG,
Mld->getBasePtr(), NewMask, WidePassThru,
Mld->getMemoryVT(), Mld->getMemOperand(),
ISD::NON_EXTLOAD);
- SDValue NewVec = getExtendInVec(/*Signed*/true, dl, VT, WideLd, DAG);
- return DCI.CombineTo(N, NewVec, WideLd.getValue(1), true);
+
+ SDValue SlicedVec = DAG.getBitcast(WideVecVT, WideLd);
+ SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1);
+ for (unsigned i = 0; i != NumElems; ++i)
+ ShuffleVec[i * SizeRatio] = i;
+
+ // Can't shuffle using an illegal type.
+ assert(DAG.getTargetLoweringInfo().isTypeLegal(WideVecVT) &&
+ "WideVecVT should be legal");
+ SlicedVec = DAG.getVectorShuffle(WideVecVT, dl, SlicedVec,
+ DAG.getUNDEF(WideVecVT), ShuffleVec);
+ SlicedVec = DAG.getBitcast(VT, SlicedVec);
+
+ return DCI.CombineTo(N, SlicedVec, WideLd.getValue(1), true);
}
/// If exactly one element of the mask is set for a non-truncating masked store,
@@ -37543,6 +39800,10 @@ static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG,
return SDValue();
EVT VT = Mst->getValue().getValueType();
+ EVT StVT = Mst->getMemoryVT();
+ SDLoc dl(Mst);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
if (!Mst->isTruncatingStore()) {
if (SDValue ScalarStore = reduceMaskedStoreToScalarStore(Mst, DAG))
return ScalarStore;
@@ -37551,7 +39812,6 @@ static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG,
// simplify ops leading up to it. We only demand the MSB of each lane.
SDValue Mask = Mst->getMask();
if (Mask.getScalarValueSizeInBits() != 1) {
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
APInt DemandedMask(APInt::getSignMask(VT.getScalarSizeInBits()));
if (TLI.SimplifyDemandedBits(Mask, DemandedMask, DCI))
return SDValue(N, 0);
@@ -37561,20 +39821,25 @@ static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG,
// pattern above, but that pattern will be different. It will either need to
// match setcc more generally or match PCMPGTM later (in tablegen?).
+ SDValue Value = Mst->getValue();
+ if (Value.getOpcode() == ISD::TRUNCATE && Value.getNode()->hasOneUse() &&
+ TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
+ Mst->getMemoryVT())) {
+ return DAG.getMaskedStore(Mst->getChain(), SDLoc(N), Value.getOperand(0),
+ Mst->getBasePtr(), Mask,
+ Mst->getMemoryVT(), Mst->getMemOperand(), true);
+ }
+
return SDValue();
}
// Resolve truncating stores.
unsigned NumElems = VT.getVectorNumElements();
- EVT StVT = Mst->getMemoryVT();
- SDLoc dl(Mst);
assert(StVT != VT && "Cannot truncate to the same type");
unsigned FromSz = VT.getScalarSizeInBits();
unsigned ToSz = StVT.getScalarSizeInBits();
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-
// The truncating store is legal in some cases. For example
// vpmovqb, vpmovqw, vpmovqd, vpmovdb, vpmovdw
// are designated for truncate store.
@@ -37644,11 +39909,13 @@ static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG,
}
static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
StoreSDNode *St = cast<StoreSDNode>(N);
EVT VT = St->getValue().getValueType();
EVT StVT = St->getMemoryVT();
SDLoc dl(St);
+ unsigned Alignment = St->getAlignment();
SDValue StoredVal = St->getOperand(1);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -37699,8 +39966,6 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
StoredVal->ops().slice(32, 32));
Hi = combinevXi1ConstantToInteger(Hi, DAG);
- unsigned Alignment = St->getAlignment();
-
SDValue Ptr0 = St->getBasePtr();
SDValue Ptr1 = DAG.getMemBasePlusOffset(Ptr0, 4, dl);
@@ -37724,30 +39989,48 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
// If we are saving a concatenation of two XMM registers and 32-byte stores
// are slow, such as on Sandy Bridge, perform two 16-byte stores.
bool Fast;
- unsigned AddressSpace = St->getAddressSpace();
- unsigned Alignment = St->getAlignment();
if (VT.is256BitVector() && StVT == VT &&
TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
- AddressSpace, Alignment, &Fast) &&
+ *St->getMemOperand(), &Fast) &&
!Fast) {
unsigned NumElems = VT.getVectorNumElements();
if (NumElems < 2)
return SDValue();
- SDValue Value0 = extract128BitVector(StoredVal, 0, DAG, dl);
- SDValue Value1 = extract128BitVector(StoredVal, NumElems / 2, DAG, dl);
+ return splitVectorStore(St, DAG);
+ }
- SDValue Ptr0 = St->getBasePtr();
- SDValue Ptr1 = DAG.getMemBasePlusOffset(Ptr0, 16, dl);
+ // Split under-aligned vector non-temporal stores.
+ if (St->isNonTemporal() && StVT == VT && Alignment < VT.getStoreSize()) {
+ // ZMM/YMM nt-stores - either it can be stored as a series of shorter
+ // vectors or the legalizer can scalarize it to use MOVNTI.
+ if (VT.is256BitVector() || VT.is512BitVector()) {
+ unsigned NumElems = VT.getVectorNumElements();
+ if (NumElems < 2)
+ return SDValue();
+ return splitVectorStore(St, DAG);
+ }
+
+ // XMM nt-stores - scalarize this to f64 nt-stores on SSE4A, else i32/i64
+ // to use MOVNTI.
+ if (VT.is128BitVector() && Subtarget.hasSSE2()) {
+ MVT NTVT = Subtarget.hasSSE4A()
+ ? MVT::v2f64
+ : (TLI.isTypeLegal(MVT::i64) ? MVT::v2i64 : MVT::v4i32);
+ return scalarizeVectorStore(St, NTVT, DAG);
+ }
+ }
- SDValue Ch0 =
- DAG.getStore(St->getChain(), dl, Value0, Ptr0, St->getPointerInfo(),
- Alignment, St->getMemOperand()->getFlags());
- SDValue Ch1 =
- DAG.getStore(St->getChain(), dl, Value1, Ptr1,
- St->getPointerInfo().getWithOffset(16),
- MinAlign(Alignment, 16U), St->getMemOperand()->getFlags());
- return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ch0, Ch1);
+ // Try to optimize v16i16->v16i8 truncating stores when BWI is not
+ // supported, but avx512f is by extending to v16i32 and truncating.
+ if (!St->isTruncatingStore() && VT == MVT::v16i8 && !Subtarget.hasBWI() &&
+ St->getValue().getOpcode() == ISD::TRUNCATE &&
+ St->getValue().getOperand(0).getValueType() == MVT::v16i16 &&
+ TLI.isTruncStoreLegalOrCustom(MVT::v16i32, MVT::v16i8) &&
+ !DCI.isBeforeLegalizeOps()) {
+ SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::v16i32, St->getValue());
+ return DAG.getTruncStore(St->getChain(), dl, Ext, St->getBasePtr(),
+ MVT::v16i8, St->getMemOperand());
}
// Optimize trunc store (of multiple scalars) to shuffle and store.
@@ -37763,7 +40046,6 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
St->getPointerInfo(), St->getAlignment(),
St->getMemOperand()->getFlags());
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (SDValue Val =
detectAVX512SSatPattern(St->getValue(), St->getMemoryVT(), Subtarget,
TLI))
@@ -37867,7 +40149,7 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
bool NoImplicitFloatOps = F.hasFnAttribute(Attribute::NoImplicitFloat);
bool F64IsLegal =
!Subtarget.useSoftFloat() && !NoImplicitFloatOps && Subtarget.hasSSE2();
- if ((VT.isVector() ||
+ if (((VT.isVector() && !VT.isFloatingPoint()) ||
(VT == MVT::i64 && F64IsLegal && !Subtarget.is64Bit())) &&
isa<LoadSDNode>(St->getValue()) &&
!cast<LoadSDNode>(St->getValue())->isVolatile() &&
@@ -37890,8 +40172,7 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
// Otherwise, if it's legal to use f64 SSE instructions, use f64 load/store
// pair instead.
if (Subtarget.is64Bit() || F64IsLegal) {
- MVT LdVT = (Subtarget.is64Bit() &&
- (!VT.isFloatingPoint() || !F64IsLegal)) ? MVT::i64 : MVT::f64;
+ MVT LdVT = Subtarget.is64Bit() ? MVT::i64 : MVT::f64;
SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(), Ld->getBasePtr(),
Ld->getMemOperand());
@@ -37965,7 +40246,9 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
/// In short, LHS and RHS are inspected to see if LHS op RHS is of the form
/// A horizontal-op B, for some already available A and B, and if so then LHS is
/// set to A, RHS to B, and the routine returns 'true'.
-static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool IsCommutative) {
+static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget,
+ bool IsCommutative) {
// If either operand is undef, bail out. The binop should be simplified.
if (LHS.isUndef() || RHS.isUndef())
return false;
@@ -37979,51 +40262,83 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool IsCommutative) {
// then LHS op RHS = < a0 op a1, a2 op a3, b0 op b1, b2 op b3 >
// which is A horizontal-op B.
- // At least one of the operands should be a vector shuffle.
- if (LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
- RHS.getOpcode() != ISD::VECTOR_SHUFFLE)
- return false;
-
MVT VT = LHS.getSimpleValueType();
assert((VT.is128BitVector() || VT.is256BitVector()) &&
"Unsupported vector type for horizontal add/sub");
+ unsigned NumElts = VT.getVectorNumElements();
+
+ // TODO - can we make a general helper method that does all of this for us?
+ auto GetShuffle = [&](SDValue Op, SDValue &N0, SDValue &N1,
+ SmallVectorImpl<int> &ShuffleMask) {
+ if (Op.getOpcode() == ISD::VECTOR_SHUFFLE) {
+ if (!Op.getOperand(0).isUndef())
+ N0 = Op.getOperand(0);
+ if (!Op.getOperand(1).isUndef())
+ N1 = Op.getOperand(1);
+ ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op)->getMask();
+ ShuffleMask.append(Mask.begin(), Mask.end());
+ return;
+ }
+ bool UseSubVector = false;
+ if (Op.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ Op.getOperand(0).getValueType().is256BitVector() &&
+ llvm::isNullConstant(Op.getOperand(1))) {
+ Op = Op.getOperand(0);
+ UseSubVector = true;
+ }
+ bool IsUnary;
+ SmallVector<SDValue, 2> SrcOps;
+ SmallVector<int, 16> SrcShuffleMask;
+ SDValue BC = peekThroughBitcasts(Op);
+ if (isTargetShuffle(BC.getOpcode()) &&
+ getTargetShuffleMask(BC.getNode(), BC.getSimpleValueType(), false,
+ SrcOps, SrcShuffleMask, IsUnary)) {
+ if (!UseSubVector && SrcShuffleMask.size() == NumElts &&
+ SrcOps.size() <= 2) {
+ N0 = SrcOps.size() > 0 ? SrcOps[0] : SDValue();
+ N1 = SrcOps.size() > 1 ? SrcOps[1] : SDValue();
+ ShuffleMask.append(SrcShuffleMask.begin(), SrcShuffleMask.end());
+ }
+ if (UseSubVector && (SrcShuffleMask.size() == (NumElts * 2)) &&
+ SrcOps.size() == 1) {
+ N0 = extract128BitVector(SrcOps[0], 0, DAG, SDLoc(Op));
+ N1 = extract128BitVector(SrcOps[0], NumElts, DAG, SDLoc(Op));
+ ArrayRef<int> Mask = ArrayRef<int>(SrcShuffleMask).slice(0, NumElts);
+ ShuffleMask.append(Mask.begin(), Mask.end());
+ }
+ }
+ };
// View LHS in the form
// LHS = VECTOR_SHUFFLE A, B, LMask
// If LHS is not a shuffle, then pretend it is the identity shuffle:
// LHS = VECTOR_SHUFFLE LHS, undef, <0, 1, ..., N-1>
// NOTE: A default initialized SDValue represents an UNDEF of type VT.
- unsigned NumElts = VT.getVectorNumElements();
SDValue A, B;
- SmallVector<int, 16> LMask(NumElts);
- if (LHS.getOpcode() == ISD::VECTOR_SHUFFLE) {
- if (!LHS.getOperand(0).isUndef())
- A = LHS.getOperand(0);
- if (!LHS.getOperand(1).isUndef())
- B = LHS.getOperand(1);
- ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(LHS.getNode())->getMask();
- llvm::copy(Mask, LMask.begin());
- } else {
- A = LHS;
- for (unsigned i = 0; i != NumElts; ++i)
- LMask[i] = i;
- }
+ SmallVector<int, 16> LMask;
+ GetShuffle(LHS, A, B, LMask);
// Likewise, view RHS in the form
// RHS = VECTOR_SHUFFLE C, D, RMask
SDValue C, D;
- SmallVector<int, 16> RMask(NumElts);
- if (RHS.getOpcode() == ISD::VECTOR_SHUFFLE) {
- if (!RHS.getOperand(0).isUndef())
- C = RHS.getOperand(0);
- if (!RHS.getOperand(1).isUndef())
- D = RHS.getOperand(1);
- ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(RHS.getNode())->getMask();
- llvm::copy(Mask, RMask.begin());
- } else {
+ SmallVector<int, 16> RMask;
+ GetShuffle(RHS, C, D, RMask);
+
+ // At least one of the operands should be a vector shuffle.
+ unsigned NumShuffles = (LMask.empty() ? 0 : 1) + (RMask.empty() ? 0 : 1);
+ if (NumShuffles == 0)
+ return false;
+
+ if (LMask.empty()) {
+ A = LHS;
+ for (unsigned i = 0; i != NumElts; ++i)
+ LMask.push_back(i);
+ }
+
+ if (RMask.empty()) {
C = RHS;
for (unsigned i = 0; i != NumElts; ++i)
- RMask[i] = i;
+ RMask.push_back(i);
}
// If A and B occur in reverse order in RHS, then canonicalize by commuting
@@ -38072,6 +40387,12 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool IsCommutative) {
LHS = A.getNode() ? A : B; // If A is 'UNDEF', use B for it.
RHS = B.getNode() ? B : A; // If B is 'UNDEF', use A for it.
+
+ if (!shouldUseHorizontalOp(LHS == RHS && NumShuffles < 2, DAG, Subtarget))
+ return false;
+
+ LHS = DAG.getBitcast(VT, LHS);
+ RHS = DAG.getBitcast(VT, RHS);
return true;
}
@@ -38088,8 +40409,7 @@ static SDValue combineFaddFsub(SDNode *N, SelectionDAG &DAG,
// Try to synthesize horizontal add/sub from adds/subs of shuffles.
if (((Subtarget.hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) ||
(Subtarget.hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))) &&
- isHorizontalBinOp(LHS, RHS, IsFadd) &&
- shouldUseHorizontalOp(LHS == RHS, DAG, Subtarget))
+ isHorizontalBinOp(LHS, RHS, DAG, Subtarget, IsFadd))
return DAG.getNode(HorizOpcode, SDLoc(N), VT, LHS, RHS);
return SDValue();
@@ -38105,7 +40425,7 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
const SDLoc &DL) {
assert(N->getOpcode() == ISD::TRUNCATE && "Wrong opcode");
SDValue Src = N->getOperand(0);
- unsigned Opcode = Src.getOpcode();
+ unsigned SrcOpcode = Src.getOpcode();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT VT = N->getValueType(0);
@@ -38123,14 +40443,17 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
return true;
// See if this is a single use constant which can be constant folded.
- SDValue BC = peekThroughOneUseBitcasts(Op);
- return ISD::isBuildVectorOfConstantSDNodes(BC.getNode());
+ // NOTE: We don't peek throught bitcasts here because there is currently
+ // no support for constant folding truncate+bitcast+vector_of_constants. So
+ // we'll just send up with a truncate on both operands which will
+ // get turned back into (truncate (binop)) causing an infinite loop.
+ return ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
};
auto TruncateArithmetic = [&](SDValue N0, SDValue N1) {
SDValue Trunc0 = DAG.getNode(ISD::TRUNCATE, DL, VT, N0);
SDValue Trunc1 = DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
- return DAG.getNode(Opcode, DL, VT, Trunc0, Trunc1);
+ return DAG.getNode(SrcOpcode, DL, VT, Trunc0, Trunc1);
};
// Don't combine if the operation has other uses.
@@ -38145,13 +40468,13 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
// In most cases its only worth pre-truncating if we're only facing the cost
// of one truncation.
// i.e. if one of the inputs will constant fold or the input is repeated.
- switch (Opcode) {
+ switch (SrcOpcode) {
case ISD::AND:
case ISD::XOR:
case ISD::OR: {
SDValue Op0 = Src.getOperand(0);
SDValue Op1 = Src.getOperand(1);
- if (TLI.isOperationLegalOrPromote(Opcode, VT) &&
+ if (TLI.isOperationLegalOrPromote(SrcOpcode, VT) &&
(Op0 == Op1 || IsFreeTruncation(Op0) || IsFreeTruncation(Op1)))
return TruncateArithmetic(Op0, Op1);
break;
@@ -38160,14 +40483,15 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
case ISD::MUL:
// X86 is rubbish at scalar and vector i64 multiplies (until AVX512DQ) - its
// better to truncate if we have the chance.
- if (SrcVT.getScalarType() == MVT::i64 && TLI.isOperationLegal(Opcode, VT) &&
- !TLI.isOperationLegal(Opcode, SrcVT))
+ if (SrcVT.getScalarType() == MVT::i64 &&
+ TLI.isOperationLegal(SrcOpcode, VT) &&
+ !TLI.isOperationLegal(SrcOpcode, SrcVT))
return TruncateArithmetic(Src.getOperand(0), Src.getOperand(1));
LLVM_FALLTHROUGH;
case ISD::ADD: {
SDValue Op0 = Src.getOperand(0);
SDValue Op1 = Src.getOperand(1);
- if (TLI.isOperationLegal(Opcode, VT) &&
+ if (TLI.isOperationLegal(SrcOpcode, VT) &&
(Op0 == Op1 || IsFreeTruncation(Op0) || IsFreeTruncation(Op1)))
return TruncateArithmetic(Op0, Op1);
break;
@@ -38177,7 +40501,7 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
// truncatable to avoid interfering with combineSubToSubus.
SDValue Op0 = Src.getOperand(0);
SDValue Op1 = Src.getOperand(1);
- if (TLI.isOperationLegal(Opcode, VT) &&
+ if (TLI.isOperationLegal(SrcOpcode, VT) &&
(Op0 == Op1 || (IsFreeTruncation(Op0) && IsFreeTruncation(Op1))))
return TruncateArithmetic(Op0, Op1);
break;
@@ -38188,36 +40512,19 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
}
/// Truncate using ISD::AND mask and X86ISD::PACKUS.
+/// e.g. trunc <8 x i32> X to <8 x i16> -->
+/// MaskX = X & 0xffff (clear high bits to prevent saturation)
+/// packus (extract_subv MaskX, 0), (extract_subv MaskX, 1)
static SDValue combineVectorTruncationWithPACKUS(SDNode *N, const SDLoc &DL,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SDValue In = N->getOperand(0);
EVT InVT = In.getValueType();
- EVT InSVT = InVT.getVectorElementType();
EVT OutVT = N->getValueType(0);
- EVT OutSVT = OutVT.getVectorElementType();
-
- // Split a long vector into vectors of legal type and mask to unset all bits
- // that won't appear in the result to prevent saturation.
- // TODO - we should be doing this at the maximum legal size but this is
- // causing regressions where we're concatenating back to max width just to
- // perform the AND and then extracting back again.....
- unsigned NumSubRegs = InVT.getSizeInBits() / 128;
- unsigned NumSubRegElts = 128 / InSVT.getSizeInBits();
- EVT SubRegVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumSubRegElts);
- SmallVector<SDValue, 8> SubVecs(NumSubRegs);
-
- APInt Mask =
- APInt::getLowBitsSet(InSVT.getSizeInBits(), OutSVT.getSizeInBits());
- SDValue MaskVal = DAG.getConstant(Mask, DL, SubRegVT);
-
- for (unsigned i = 0; i < NumSubRegs; i++) {
- SDValue Sub = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubRegVT, In,
- DAG.getIntPtrConstant(i * NumSubRegElts, DL));
- SubVecs[i] = DAG.getNode(ISD::AND, DL, SubRegVT, Sub, MaskVal);
- }
- In = DAG.getNode(ISD::CONCAT_VECTORS, DL, InVT, SubVecs);
+ APInt Mask = APInt::getLowBitsSet(InVT.getScalarSizeInBits(),
+ OutVT.getScalarSizeInBits());
+ In = DAG.getNode(ISD::AND, DL, InVT, In, DAG.getConstant(Mask, DL, InVT));
return truncateVectorWithPACK(X86ISD::PACKUS, OutVT, In, DL, DAG, Subtarget);
}
@@ -38580,16 +40887,23 @@ static SDValue isFNEG(SelectionDAG &DAG, SDNode *N) {
if (N->getOpcode() == ISD::FNEG)
return N->getOperand(0);
+ unsigned ScalarSize = N->getValueType(0).getScalarSizeInBits();
+
SDValue Op = peekThroughBitcasts(SDValue(N, 0));
- auto VT = Op->getValueType(0);
+ EVT VT = Op->getValueType(0);
+ // Make sure the element size does't change.
+ if (VT.getScalarSizeInBits() != ScalarSize)
+ return SDValue();
+
if (auto SVOp = dyn_cast<ShuffleVectorSDNode>(Op.getNode())) {
// For a VECTOR_SHUFFLE(VEC1, VEC2), if the VEC2 is undef, then the negate
// of this is VECTOR_SHUFFLE(-VEC1, UNDEF). The mask can be anything here.
if (!SVOp->getOperand(1).isUndef())
return SDValue();
if (SDValue NegOp0 = isFNEG(DAG, SVOp->getOperand(0).getNode()))
- return DAG.getVectorShuffle(VT, SDLoc(SVOp), NegOp0, DAG.getUNDEF(VT),
- SVOp->getMask());
+ if (NegOp0.getValueType() == VT) // FIXME: Can we do better?
+ return DAG.getVectorShuffle(VT, SDLoc(SVOp), NegOp0, DAG.getUNDEF(VT),
+ SVOp->getMask());
return SDValue();
}
unsigned Opc = Op.getOpcode();
@@ -38601,19 +40915,17 @@ static SDValue isFNEG(SelectionDAG &DAG, SDNode *N) {
if (!InsVector.isUndef())
return SDValue();
if (SDValue NegInsVal = isFNEG(DAG, InsVal.getNode()))
- return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Op), VT, InsVector,
- NegInsVal, Op.getOperand(2));
+ if (NegInsVal.getValueType() == VT.getVectorElementType()) // FIXME
+ return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Op), VT, InsVector,
+ NegInsVal, Op.getOperand(2));
return SDValue();
}
if (Opc != X86ISD::FXOR && Opc != ISD::XOR && Opc != ISD::FSUB)
return SDValue();
- SDValue Op1 = peekThroughBitcasts(Op.getOperand(1));
- if (!Op1.getValueType().isFloatingPoint())
- return SDValue();
-
- SDValue Op0 = peekThroughBitcasts(Op.getOperand(0));
+ SDValue Op1 = Op.getOperand(1);
+ SDValue Op0 = Op.getOperand(0);
// For XOR and FXOR, we want to check if constant bits of Op1 are sign bit
// masks. For FSUB, we have to check if constant bits of Op0 are sign bit
@@ -38625,7 +40937,7 @@ static SDValue isFNEG(SelectionDAG &DAG, SDNode *N) {
SmallVector<APInt, 16> EltBits;
// Extract constant bits and see if they are all sign bit masks. Ignore the
// undef elements.
- if (getTargetConstantBitsFromNode(Op1, Op1.getScalarValueSizeInBits(),
+ if (getTargetConstantBitsFromNode(Op1, ScalarSize,
UndefElts, EltBits,
/* AllowWholeUndefs */ true,
/* AllowPartialUndefs */ false)) {
@@ -38922,13 +41234,12 @@ static SDValue combineFMinNumFMaxNum(SDNode *N, SelectionDAG &DAG,
if (Subtarget.useSoftFloat())
return SDValue();
- // TODO: If an operand is already known to be a NaN or not a NaN, this
- // should be an optional swap and FMAX/FMIN.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT VT = N->getValueType(0);
- if (!((Subtarget.hasSSE1() && (VT == MVT::f32 || VT == MVT::v4f32)) ||
- (Subtarget.hasSSE2() && (VT == MVT::f64 || VT == MVT::v2f64)) ||
- (Subtarget.hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))))
+ if (!((Subtarget.hasSSE1() && VT == MVT::f32) ||
+ (Subtarget.hasSSE2() && VT == MVT::f64) ||
+ (VT.isVector() && TLI.isTypeLegal(VT))))
return SDValue();
SDValue Op0 = N->getOperand(0);
@@ -38941,13 +41252,20 @@ static SDValue combineFMinNumFMaxNum(SDNode *N, SelectionDAG &DAG,
if (DAG.getTarget().Options.NoNaNsFPMath || N->getFlags().hasNoNaNs())
return DAG.getNode(MinMaxOp, DL, VT, Op0, Op1, N->getFlags());
+ // If one of the operands is known non-NaN use the native min/max instructions
+ // with the non-NaN input as second operand.
+ if (DAG.isKnownNeverNaN(Op1))
+ return DAG.getNode(MinMaxOp, DL, VT, Op0, Op1, N->getFlags());
+ if (DAG.isKnownNeverNaN(Op0))
+ return DAG.getNode(MinMaxOp, DL, VT, Op1, Op0, N->getFlags());
+
// If we have to respect NaN inputs, this takes at least 3 instructions.
// Favor a library call when operating on a scalar and minimizing code size.
- if (!VT.isVector() && DAG.getMachineFunction().getFunction().optForMinSize())
+ if (!VT.isVector() && DAG.getMachineFunction().getFunction().hasMinSize())
return SDValue();
- EVT SetCCType = DAG.getTargetLoweringInfo().getSetCCResultType(
- DAG.getDataLayout(), *DAG.getContext(), VT);
+ EVT SetCCType = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
+ VT);
// There are 4 possibilities involving NaN inputs, and these are the required
// outputs:
@@ -38987,6 +41305,69 @@ static SDValue combineX86INT_TO_FP(SDNode *N, SelectionDAG &DAG,
KnownZero, DCI))
return SDValue(N, 0);
+ // Convert a full vector load into vzload when not all bits are needed.
+ SDValue In = N->getOperand(0);
+ MVT InVT = In.getSimpleValueType();
+ if (VT.getVectorNumElements() < InVT.getVectorNumElements() &&
+ ISD::isNormalLoad(In.getNode()) && In.hasOneUse()) {
+ assert(InVT.is128BitVector() && "Expected 128-bit input vector");
+ LoadSDNode *LN = cast<LoadSDNode>(N->getOperand(0));
+ // Unless the load is volatile.
+ if (!LN->isVolatile()) {
+ SDLoc dl(N);
+ unsigned NumBits = InVT.getScalarSizeInBits() * VT.getVectorNumElements();
+ MVT MemVT = MVT::getIntegerVT(NumBits);
+ MVT LoadVT = MVT::getVectorVT(MemVT, 128 / NumBits);
+ SDVTList Tys = DAG.getVTList(LoadVT, MVT::Other);
+ SDValue Ops[] = { LN->getChain(), LN->getBasePtr() };
+ SDValue VZLoad =
+ DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, MemVT,
+ LN->getPointerInfo(),
+ LN->getAlignment(),
+ LN->getMemOperand()->getFlags());
+ SDValue Convert = DAG.getNode(N->getOpcode(), dl, VT,
+ DAG.getBitcast(InVT, VZLoad));
+ DCI.CombineTo(N, Convert);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1));
+ return SDValue(N, 0);
+ }
+ }
+
+ return SDValue();
+}
+
+static SDValue combineCVTP2I_CVTTP2I(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ EVT VT = N->getValueType(0);
+
+ // Convert a full vector load into vzload when not all bits are needed.
+ SDValue In = N->getOperand(0);
+ MVT InVT = In.getSimpleValueType();
+ if (VT.getVectorNumElements() < InVT.getVectorNumElements() &&
+ ISD::isNormalLoad(In.getNode()) && In.hasOneUse()) {
+ assert(InVT.is128BitVector() && "Expected 128-bit input vector");
+ LoadSDNode *LN = cast<LoadSDNode>(N->getOperand(0));
+ // Unless the load is volatile.
+ if (!LN->isVolatile()) {
+ SDLoc dl(N);
+ unsigned NumBits = InVT.getScalarSizeInBits() * VT.getVectorNumElements();
+ MVT MemVT = MVT::getFloatingPointVT(NumBits);
+ MVT LoadVT = MVT::getVectorVT(MemVT, 128 / NumBits);
+ SDVTList Tys = DAG.getVTList(LoadVT, MVT::Other);
+ SDValue Ops[] = { LN->getChain(), LN->getBasePtr() };
+ SDValue VZLoad =
+ DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, MemVT,
+ LN->getPointerInfo(),
+ LN->getAlignment(),
+ LN->getMemOperand()->getFlags());
+ SDValue Convert = DAG.getNode(N->getOpcode(), dl, VT,
+ DAG.getBitcast(InVT, VZLoad));
+ DCI.CombineTo(N, Convert);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1));
+ return SDValue(N, 0);
+ }
+ }
+
return SDValue();
}
@@ -39005,18 +41386,14 @@ static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG,
return DAG.getConstant(0, SDLoc(N), VT);
// Turn ANDNP back to AND if input is inverted.
- if (VT.isVector() && N->getOperand(0).getOpcode() == ISD::XOR &&
- ISD::isBuildVectorAllOnes(N->getOperand(0).getOperand(1).getNode())) {
- return DAG.getNode(ISD::AND, SDLoc(N), VT,
- N->getOperand(0).getOperand(0), N->getOperand(1));
- }
+ if (SDValue Not = IsNOT(N->getOperand(0), DAG))
+ return DAG.getNode(ISD::AND, SDLoc(N), VT, DAG.getBitcast(VT, Not),
+ N->getOperand(1));
// Attempt to recursively combine a bitmask ANDNP with shuffles.
if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) {
SDValue Op(N, 0);
- if (SDValue Res = combineX86ShufflesRecursively(
- {Op}, 0, Op, {0}, {}, /*Depth*/ 1,
- /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget))
+ if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget))
return Res;
}
@@ -39039,18 +41416,24 @@ static SDValue combineBT(SDNode *N, SelectionDAG &DAG,
// Try to combine sext_in_reg of a cmov of constants by extending the constants.
static SDValue combineSextInRegCmov(SDNode *N, SelectionDAG &DAG) {
- EVT VT = N->getValueType(0);
+ assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG);
+
+ EVT DstVT = N->getValueType(0);
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT ExtraVT = cast<VTSDNode>(N1)->getVT();
- if (ExtraVT != MVT::i16)
+ if (ExtraVT != MVT::i8 && ExtraVT != MVT::i16)
return SDValue();
- // Look through single use any_extends.
- if (N0.getOpcode() == ISD::ANY_EXTEND && N0.hasOneUse())
+ // Look through single use any_extends / truncs.
+ SDValue IntermediateBitwidthOp;
+ if ((N0.getOpcode() == ISD::ANY_EXTEND || N0.getOpcode() == ISD::TRUNCATE) &&
+ N0.hasOneUse()) {
+ IntermediateBitwidthOp = N0;
N0 = N0.getOperand(0);
+ }
// See if we have a single use cmov.
if (N0.getOpcode() != X86ISD::CMOV || !N0.hasOneUse())
@@ -39066,21 +41449,37 @@ static SDValue combineSextInRegCmov(SDNode *N, SelectionDAG &DAG) {
SDLoc DL(N);
- // If we looked through an any_extend above, add one to the constants.
- if (N0.getValueType() != VT) {
- CMovOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, VT, CMovOp0);
- CMovOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, VT, CMovOp1);
+ // If we looked through an any_extend/trunc above, add one to the constants.
+ if (IntermediateBitwidthOp) {
+ unsigned IntermediateOpc = IntermediateBitwidthOp.getOpcode();
+ CMovOp0 = DAG.getNode(IntermediateOpc, DL, DstVT, CMovOp0);
+ CMovOp1 = DAG.getNode(IntermediateOpc, DL, DstVT, CMovOp1);
+ }
+
+ CMovOp0 = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, DstVT, CMovOp0, N1);
+ CMovOp1 = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, DstVT, CMovOp1, N1);
+
+ EVT CMovVT = DstVT;
+ // We do not want i16 CMOV's. Promote to i32 and truncate afterwards.
+ if (DstVT == MVT::i16) {
+ CMovVT = MVT::i32;
+ CMovOp0 = DAG.getNode(ISD::ZERO_EXTEND, DL, CMovVT, CMovOp0);
+ CMovOp1 = DAG.getNode(ISD::ZERO_EXTEND, DL, CMovVT, CMovOp1);
}
- CMovOp0 = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, CMovOp0, N1);
- CMovOp1 = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, CMovOp1, N1);
+ SDValue CMov = DAG.getNode(X86ISD::CMOV, DL, CMovVT, CMovOp0, CMovOp1,
+ N0.getOperand(2), N0.getOperand(3));
- return DAG.getNode(X86ISD::CMOV, DL, VT, CMovOp0, CMovOp1,
- N0.getOperand(2), N0.getOperand(3));
+ if (CMovVT != DstVT)
+ CMov = DAG.getNode(ISD::TRUNCATE, DL, DstVT, CMov);
+
+ return CMov;
}
static SDValue combineSignExtendInReg(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
+ assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG);
+
if (SDValue V = combineSextInRegCmov(N, DAG))
return V;
@@ -39336,6 +41735,7 @@ static SDValue combineToExtendVectorInReg(SDNode *N, SelectionDAG &DAG,
return SDValue();
unsigned Opcode = N->getOpcode();
+ // TODO - add ANY_EXTEND support.
if (Opcode != ISD::SIGN_EXTEND && Opcode != ISD::ZERO_EXTEND)
return SDValue();
if (!DCI.isBeforeLegalizeOps())
@@ -39382,13 +41782,13 @@ static SDValue combineToExtendVectorInReg(SDNode *N, SelectionDAG &DAG,
SDLoc DL(N);
auto ExtendVecSize = [&DAG](const SDLoc &DL, SDValue N, unsigned Size) {
- EVT InVT = N.getValueType();
- EVT OutVT = EVT::getVectorVT(*DAG.getContext(), InVT.getScalarType(),
- Size / InVT.getScalarSizeInBits());
- SmallVector<SDValue, 8> Opnds(Size / InVT.getSizeInBits(),
- DAG.getUNDEF(InVT));
+ EVT SrcVT = N.getValueType();
+ EVT DstVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
+ Size / SrcVT.getScalarSizeInBits());
+ SmallVector<SDValue, 8> Opnds(Size / SrcVT.getSizeInBits(),
+ DAG.getUNDEF(SrcVT));
Opnds[0] = N;
- return DAG.getNode(ISD::CONCAT_VECTORS, DL, OutVT, Opnds);
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Opnds);
};
// If target-size is less than 128-bits, extend to a type that would extend
@@ -39410,8 +41810,7 @@ static SDValue combineToExtendVectorInReg(SDNode *N, SelectionDAG &DAG,
(VT.is256BitVector() && Subtarget.hasAVX()) ||
(VT.is512BitVector() && Subtarget.useAVX512Regs())) {
SDValue ExOp = ExtendVecSize(DL, N0, VT.getSizeInBits());
- Opcode = Opcode == ISD::SIGN_EXTEND ? ISD::SIGN_EXTEND_VECTOR_INREG
- : ISD::ZERO_EXTEND_VECTOR_INREG;
+ Opcode = getOpcode_EXTEND_VECTOR_INREG(Opcode);
return DAG.getNode(Opcode, DL, VT, ExOp);
}
@@ -39421,9 +41820,7 @@ static SDValue combineToExtendVectorInReg(SDNode *N, SelectionDAG &DAG,
EVT SubVT = EVT::getVectorVT(*DAG.getContext(), SVT, NumSubElts);
EVT InSubVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumSubElts);
- unsigned IROpc = Opcode == ISD::SIGN_EXTEND ? ISD::SIGN_EXTEND_VECTOR_INREG
- : ISD::ZERO_EXTEND_VECTOR_INREG;
-
+ unsigned IROpc = getOpcode_EXTEND_VECTOR_INREG(Opcode);
SmallVector<SDValue, 8> Opnds;
for (unsigned i = 0, Offset = 0; i != NumVecs; ++i, Offset += NumSubElts) {
SDValue SrcVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InSubVT, N0,
@@ -39457,7 +41854,7 @@ static SDValue combineExtSetcc(SDNode *N, SelectionDAG &DAG,
SDLoc dl(N);
// Only do this combine with AVX512 for vector extends.
- if (!Subtarget.hasAVX512() || !VT.isVector() || N0->getOpcode() != ISD::SETCC)
+ if (!Subtarget.hasAVX512() || !VT.isVector() || N0.getOpcode() != ISD::SETCC)
return SDValue();
// Only combine legal element types.
@@ -39473,7 +41870,7 @@ static SDValue combineExtSetcc(SDNode *N, SelectionDAG &DAG,
// Don't fold if the condition code can't be handled by PCMPEQ/PCMPGT since
// that's the only integer compares with we have.
- ISD::CondCode CC = cast<CondCodeSDNode>(N0->getOperand(2))->get();
+ ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
if (ISD::isUnsignedIntSetCC(CC))
return SDValue();
@@ -39629,6 +42026,10 @@ static SDValue combineFMADDSUB(SDNode *N, SelectionDAG &DAG,
if (!NegVal)
return SDValue();
+ // FIXME: Should we bitcast instead?
+ if (NegVal.getValueType() != VT)
+ return SDValue();
+
unsigned NewOpcode;
switch (N->getOpcode()) {
default: llvm_unreachable("Unexpected opcode!");
@@ -39705,6 +42106,20 @@ static SDValue combineZext(SDNode *N, SelectionDAG &DAG,
if (SDValue R = combineOrCmpEqZeroToCtlzSrl(N, DAG, DCI, Subtarget))
return R;
+ // TODO: Combine with any target/faux shuffle.
+ if (N0.getOpcode() == X86ISD::PACKUS && N0.getValueSizeInBits() == 128 &&
+ VT.getScalarSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits()) {
+ SDValue N00 = N0.getOperand(0);
+ SDValue N01 = N0.getOperand(1);
+ unsigned NumSrcElts = N00.getValueType().getVectorNumElements();
+ unsigned NumSrcEltBits = N00.getScalarValueSizeInBits();
+ APInt ZeroMask = APInt::getHighBitsSet(NumSrcEltBits, NumSrcEltBits / 2);
+ if ((N00.isUndef() || DAG.MaskedValueIsZero(N00, ZeroMask)) &&
+ (N01.isUndef() || DAG.MaskedValueIsZero(N01, ZeroMask))) {
+ return concatSubVectors(N00, N01, VT, NumSrcElts * 2, DAG, dl, 128);
+ }
+ }
+
return SDValue();
}
@@ -39734,9 +42149,14 @@ static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG,
if (isNullConstant(Y) && !IsOrXorXorCCZero)
return SDValue();
- // Bail out if we know that this is not really just an oversized integer.
- if (peekThroughBitcasts(X).getValueType() == MVT::f128 ||
- peekThroughBitcasts(Y).getValueType() == MVT::f128)
+ // Don't perform this combine if constructing the vector will be expensive.
+ auto IsVectorBitCastCheap = [](SDValue X) {
+ X = peekThroughBitcasts(X);
+ return isa<ConstantSDNode>(X) || X.getValueType().isVector() ||
+ X.getOpcode() == ISD::LOAD;
+ };
+ if ((!IsVectorBitCastCheap(X) || !IsVectorBitCastCheap(Y)) &&
+ !IsOrXorXorCCZero)
return SDValue();
// TODO: Use PXOR + PTEST for SSE4.1 or later?
@@ -39873,66 +42293,44 @@ static SDValue combineMOVMSK(SDNode *N, SelectionDAG &DAG,
SDValue Src = N->getOperand(0);
MVT SrcVT = Src.getSimpleValueType();
MVT VT = N->getSimpleValueType(0);
+ unsigned NumBits = VT.getScalarSizeInBits();
+ unsigned NumElts = SrcVT.getVectorNumElements();
// Perform constant folding.
if (ISD::isBuildVectorOfConstantSDNodes(Src.getNode())) {
- assert(VT== MVT::i32 && "Unexpected result type");
+ assert(VT == MVT::i32 && "Unexpected result type");
APInt Imm(32, 0);
for (unsigned Idx = 0, e = Src.getNumOperands(); Idx < e; ++Idx) {
- SDValue In = Src.getOperand(Idx);
- if (!In.isUndef() &&
- cast<ConstantSDNode>(In)->getAPIntValue().isNegative())
+ if (!Src.getOperand(Idx).isUndef() &&
+ Src.getConstantOperandAPInt(Idx).isNegative())
Imm.setBit(Idx);
}
return DAG.getConstant(Imm, SDLoc(N), VT);
}
// Look through int->fp bitcasts that don't change the element width.
- if (Src.getOpcode() == ISD::BITCAST && Src.hasOneUse() &&
- SrcVT.isFloatingPoint() &&
- Src.getOperand(0).getValueType() ==
- EVT(SrcVT).changeVectorElementTypeToInteger())
- Src = Src.getOperand(0);
+ unsigned EltWidth = SrcVT.getScalarSizeInBits();
+ if (Src.getOpcode() == ISD::BITCAST &&
+ Src.getOperand(0).getScalarValueSizeInBits() == EltWidth)
+ return DAG.getNode(X86ISD::MOVMSK, SDLoc(N), VT, Src.getOperand(0));
+
+ // Fold movmsk(not(x)) -> not(movmsk) to improve folding of movmsk results
+ // with scalar comparisons.
+ if (SDValue NotSrc = IsNOT(Src, DAG)) {
+ SDLoc DL(N);
+ APInt NotMask = APInt::getLowBitsSet(NumBits, NumElts);
+ NotSrc = DAG.getBitcast(SrcVT, NotSrc);
+ return DAG.getNode(ISD::XOR, DL, VT,
+ DAG.getNode(X86ISD::MOVMSK, DL, VT, NotSrc),
+ DAG.getConstant(NotMask, DL, VT));
+ }
// Simplify the inputs.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- APInt DemandedMask(APInt::getAllOnesValue(VT.getScalarSizeInBits()));
+ APInt DemandedMask(APInt::getAllOnesValue(NumBits));
if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI))
return SDValue(N, 0);
- // Combine (movmsk (setne (and X, (1 << C)), 0)) -> (movmsk (X << C)).
- // Only do this when the setcc input and output types are the same and the
- // setcc and the 'and' node have a single use.
- // FIXME: Support 256-bits with AVX1. The movmsk is split, but the and isn't.
- APInt SplatVal;
- if (Src.getOpcode() == ISD::SETCC && Src.hasOneUse() &&
- Src.getOperand(0).getValueType() == Src.getValueType() &&
- cast<CondCodeSDNode>(Src.getOperand(2))->get() == ISD::SETNE &&
- ISD::isBuildVectorAllZeros(Src.getOperand(1).getNode()) &&
- Src.getOperand(0).getOpcode() == ISD::AND) {
- SDValue And = Src.getOperand(0);
- if (And.hasOneUse() &&
- ISD::isConstantSplatVector(And.getOperand(1).getNode(), SplatVal) &&
- SplatVal.isPowerOf2()) {
- MVT VT = Src.getSimpleValueType();
- unsigned BitWidth = VT.getScalarSizeInBits();
- unsigned ShAmt = BitWidth - SplatVal.logBase2() - 1;
- SDLoc DL(And);
- SDValue X = And.getOperand(0);
- // If the element type is i8, we need to bitcast to i16 to use a legal
- // shift. If we wait until lowering we end up with an extra and to bits
- // from crossing the 8-bit elements, but we don't care about that here.
- if (VT.getVectorElementType() == MVT::i8) {
- VT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements() / 2);
- X = DAG.getBitcast(VT, X);
- }
- SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, X,
- DAG.getConstant(ShAmt, DL, VT));
- SDValue Cast = DAG.getBitcast(SrcVT, Shl);
- return DAG.getNode(X86ISD::MOVMSK, SDLoc(N), N->getValueType(0), Cast);
- }
- }
-
return SDValue();
}
@@ -40065,8 +42463,7 @@ static SDValue combineVectorCompareAndMaskUnaryOp(SDNode *N,
// make the transformation for non-constant splats as well, but it's unclear
// that would be a benefit as it would not eliminate any operations, just
// perform one more step in scalar code before moving to the vector unit.
- if (BuildVectorSDNode *BV =
- dyn_cast<BuildVectorSDNode>(N->getOperand(0)->getOperand(1))) {
+ if (auto *BV = dyn_cast<BuildVectorSDNode>(N->getOperand(0).getOperand(1))) {
// Bail out if the vector isn't a constant.
if (!BV->isConstant())
return SDValue();
@@ -40088,6 +42485,41 @@ static SDValue combineVectorCompareAndMaskUnaryOp(SDNode *N,
return SDValue();
}
+/// If we are converting a value to floating-point, try to replace scalar
+/// truncate of an extracted vector element with a bitcast. This tries to keep
+/// the sequence on XMM registers rather than moving between vector and GPRs.
+static SDValue combineToFPTruncExtElt(SDNode *N, SelectionDAG &DAG) {
+ // TODO: This is currently only used by combineSIntToFP, but it is generalized
+ // to allow being called by any similar cast opcode.
+ // TODO: Consider merging this into lowering: vectorizeExtractedCast().
+ SDValue Trunc = N->getOperand(0);
+ if (!Trunc.hasOneUse() || Trunc.getOpcode() != ISD::TRUNCATE)
+ return SDValue();
+
+ SDValue ExtElt = Trunc.getOperand(0);
+ if (!ExtElt.hasOneUse() || ExtElt.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ !isNullConstant(ExtElt.getOperand(1)))
+ return SDValue();
+
+ EVT TruncVT = Trunc.getValueType();
+ EVT SrcVT = ExtElt.getValueType();
+ unsigned DestWidth = TruncVT.getSizeInBits();
+ unsigned SrcWidth = SrcVT.getSizeInBits();
+ if (SrcWidth % DestWidth != 0)
+ return SDValue();
+
+ // inttofp (trunc (extelt X, 0)) --> inttofp (extelt (bitcast X), 0)
+ EVT SrcVecVT = ExtElt.getOperand(0).getValueType();
+ unsigned VecWidth = SrcVecVT.getSizeInBits();
+ unsigned NumElts = VecWidth / DestWidth;
+ EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), TruncVT, NumElts);
+ SDValue BitcastVec = DAG.getBitcast(BitcastVT, ExtElt.getOperand(0));
+ SDLoc DL(N);
+ SDValue NewExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TruncVT,
+ BitcastVec, ExtElt.getOperand(1));
+ return DAG.getNode(N->getOpcode(), DL, N->getValueType(0), NewExtElt);
+}
+
static SDValue combineUIntToFP(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
SDValue Op0 = N->getOperand(0);
@@ -40181,6 +42613,10 @@ static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG,
return FILDChain;
}
}
+
+ if (SDValue V = combineToFPTruncExtElt(N, DAG))
+ return V;
+
return SDValue();
}
@@ -40267,13 +42703,13 @@ static SDValue combineCMP(SDNode *N, SelectionDAG &DAG) {
if ((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) &&
Op.hasOneUse() && isa<ConstantSDNode>(Op.getOperand(1)) &&
onlyZeroFlagUsed(SDValue(N, 0))) {
- EVT VT = Op.getValueType();
unsigned BitWidth = VT.getSizeInBits();
- unsigned ShAmt = Op.getConstantOperandVal(1);
- if (ShAmt < BitWidth) { // Avoid undefined shifts.
+ const APInt &ShAmt = Op.getConstantOperandAPInt(1);
+ if (ShAmt.ult(BitWidth)) { // Avoid undefined shifts.
+ unsigned MaskBits = BitWidth - ShAmt.getZExtValue();
APInt Mask = Op.getOpcode() == ISD::SRL
- ? APInt::getHighBitsSet(BitWidth, BitWidth - ShAmt)
- : APInt::getLowBitsSet(BitWidth, BitWidth - ShAmt);
+ ? APInt::getHighBitsSet(BitWidth, MaskBits)
+ : APInt::getLowBitsSet(BitWidth, MaskBits);
if (Mask.isSignedIntN(32)) {
Op = DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0),
DAG.getConstant(Mask, dl, VT));
@@ -40283,7 +42719,6 @@ static SDValue combineCMP(SDNode *N, SelectionDAG &DAG) {
}
}
-
// Look for a truncate with a single use.
if (Op.getOpcode() != ISD::TRUNCATE || !Op.hasOneUse())
return SDValue();
@@ -40337,8 +42772,42 @@ static SDValue combineCMP(SDNode *N, SelectionDAG &DAG) {
return Op.getValue(1);
}
+static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ assert((X86ISD::ADD == N->getOpcode() || X86ISD::SUB == N->getOpcode()) &&
+ "Expected X86ISD::ADD or X86ISD::SUB");
+
+ SDLoc DL(N);
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ MVT VT = LHS.getSimpleValueType();
+ unsigned GenericOpc = X86ISD::ADD == N->getOpcode() ? ISD::ADD : ISD::SUB;
+
+ // If we don't use the flag result, simplify back to a generic ADD/SUB.
+ if (!N->hasAnyUseOfValue(1)) {
+ SDValue Res = DAG.getNode(GenericOpc, DL, VT, LHS, RHS);
+ return DAG.getMergeValues({Res, DAG.getConstant(0, DL, MVT::i32)}, DL);
+ }
+
+ // Fold any similar generic ADD/SUB opcodes to reuse this node.
+ auto MatchGeneric = [&](SDValue N0, SDValue N1, bool Negate) {
+ SDValue Ops[] = {N0, N1};
+ SDVTList VTs = DAG.getVTList(N->getValueType(0));
+ if (SDNode *GenericAddSub = DAG.getNodeIfExists(GenericOpc, VTs, Ops)) {
+ SDValue Op(N, 0);
+ if (Negate)
+ Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
+ DCI.CombineTo(GenericAddSub, Op);
+ }
+ };
+ MatchGeneric(LHS, RHS, false);
+ MatchGeneric(RHS, LHS, X86ISD::SUB == N->getOpcode());
+
+ return SDValue();
+}
+
static SDValue combineSBB(SDNode *N, SelectionDAG &DAG) {
- if (SDValue Flags = combineCarryThroughADD(N->getOperand(2))) {
+ if (SDValue Flags = combineCarryThroughADD(N->getOperand(2), DAG)) {
MVT VT = N->getSimpleValueType(0);
SDVTList VTs = DAG.getVTList(VT, MVT::i32);
return DAG.getNode(X86ISD::SBB, SDLoc(N), VTs,
@@ -40346,6 +42815,15 @@ static SDValue combineSBB(SDNode *N, SelectionDAG &DAG) {
Flags);
}
+ // Fold SBB(SUB(X,Y),0,Carry) -> SBB(X,Y,Carry)
+ // iff the flag result is dead.
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ if (Op0.getOpcode() == ISD::SUB && isNullConstant(Op1) &&
+ !N->hasAnyUseOfValue(1))
+ return DAG.getNode(X86ISD::SBB, SDLoc(N), N->getVTList(), Op0.getOperand(0),
+ Op0.getOperand(1), N->getOperand(2));
+
return SDValue();
}
@@ -40372,7 +42850,7 @@ static SDValue combineADC(SDNode *N, SelectionDAG &DAG,
return DCI.CombineTo(N, Res1, CarryOut);
}
- if (SDValue Flags = combineCarryThroughADD(N->getOperand(2))) {
+ if (SDValue Flags = combineCarryThroughADD(N->getOperand(2), DAG)) {
MVT VT = N->getSimpleValueType(0);
SDVTList VTs = DAG.getVTList(VT, MVT::i32);
return DAG.getNode(X86ISD::ADC, SDLoc(N), VTs,
@@ -40468,7 +42946,7 @@ static SDValue combineAddOrSubToADCOrSBB(SDNode *N, SelectionDAG &DAG) {
// Do not flip "e > c", where "c" is a constant, because Cmp instruction
// cannot take an immediate as its first operand.
//
- if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.hasOneUse() &&
+ if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.getNode()->hasOneUse() &&
EFLAGS.getValueType().isInteger() &&
!isa<ConstantSDNode>(EFLAGS.getOperand(1))) {
SDValue NewSub = DAG.getNode(X86ISD::SUB, SDLoc(EFLAGS),
@@ -40575,8 +43053,8 @@ static SDValue combineLoopMAddPattern(SDNode *N, SelectionDAG &DAG,
// Madd vector size is half of the original vector size
auto PMADDWDBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
ArrayRef<SDValue> Ops) {
- MVT VT = MVT::getVectorVT(MVT::i32, Ops[0].getValueSizeInBits() / 32);
- return DAG.getNode(X86ISD::VPMADDWD, DL, VT, Ops);
+ MVT OpVT = MVT::getVectorVT(MVT::i32, Ops[0].getValueSizeInBits() / 32);
+ return DAG.getNode(X86ISD::VPMADDWD, DL, OpVT, Ops);
};
auto BuildPMADDWD = [&](SDValue Mul) {
@@ -40631,10 +43109,10 @@ static SDValue combineLoopSADPattern(SDNode *N, SelectionDAG &DAG,
return SDValue();
// We know N is a reduction add, which means one of its operands is a phi.
- // To match SAD, we need the other operand to be a vector select.
- if (Op0.getOpcode() != ISD::VSELECT)
+ // To match SAD, we need the other operand to be a ABS.
+ if (Op0.getOpcode() != ISD::ABS)
std::swap(Op0, Op1);
- if (Op0.getOpcode() != ISD::VSELECT)
+ if (Op0.getOpcode() != ISD::ABS)
return SDValue();
auto BuildPSADBW = [&](SDValue Op0, SDValue Op1) {
@@ -40673,7 +43151,7 @@ static SDValue combineLoopSADPattern(SDNode *N, SelectionDAG &DAG,
Op0 = BuildPSADBW(SadOp0, SadOp1);
// It's possible we have a sad on the other side too.
- if (Op1.getOpcode() == ISD::VSELECT &&
+ if (Op1.getOpcode() == ISD::ABS &&
detectZextAbsDiff(Op1, SadOp0, SadOp1)) {
Op1 = BuildPSADBW(SadOp0, SadOp1);
}
@@ -40815,39 +43293,6 @@ static SDValue matchPMADDWD(SelectionDAG &DAG, SDValue Op0, SDValue Op1,
PMADDBuilder);
}
-// Try to turn (add (umax X, C), -C) into (psubus X, C)
-static SDValue combineAddToSUBUS(SDNode *N, SelectionDAG &DAG,
- const X86Subtarget &Subtarget) {
- if (!Subtarget.hasSSE2())
- return SDValue();
-
- EVT VT = N->getValueType(0);
-
- // psubus is available in SSE2 for i8 and i16 vectors.
- if (!VT.isVector() || VT.getVectorNumElements() < 2 ||
- !isPowerOf2_32(VT.getVectorNumElements()) ||
- !(VT.getVectorElementType() == MVT::i8 ||
- VT.getVectorElementType() == MVT::i16))
- return SDValue();
-
- SDValue Op0 = N->getOperand(0);
- SDValue Op1 = N->getOperand(1);
- if (Op0.getOpcode() != ISD::UMAX)
- return SDValue();
-
- // The add should have a constant that is the negative of the max.
- // TODO: Handle build_vectors with undef elements.
- auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
- return Max->getAPIntValue() == (-Op->getAPIntValue());
- };
- if (!ISD::matchBinaryPredicate(Op0.getOperand(1), Op1, MatchUSUBSAT))
- return SDValue();
-
- SDLoc DL(N);
- return DAG.getNode(ISD::USUBSAT, DL, VT, Op0.getOperand(0),
- Op0.getOperand(1));
-}
-
// Attempt to turn this pattern into PMADDWD.
// (mul (add (zext (build_vector)), (zext (build_vector))),
// (add (zext (build_vector)), (zext (build_vector)))
@@ -40957,12 +43402,12 @@ static SDValue matchPMADDWD_2(SelectionDAG &DAG, SDValue N0, SDValue N1,
ArrayRef<SDValue> Ops) {
// Shrink by adding truncate nodes and let DAGCombine fold with the
// sources.
- EVT InVT = Ops[0].getValueType();
- assert(InVT.getScalarType() == MVT::i16 &&
+ EVT OpVT = Ops[0].getValueType();
+ assert(OpVT.getScalarType() == MVT::i16 &&
"Unexpected scalar element type");
- assert(InVT == Ops[1].getValueType() && "Operands' types mismatch");
+ assert(OpVT == Ops[1].getValueType() && "Operands' types mismatch");
EVT ResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32,
- InVT.getVectorNumElements() / 2);
+ OpVT.getVectorNumElements() / 2);
return DAG.getNode(X86ISD::VPMADDWD, DL, ResVT, Ops[0], Ops[1]);
};
return SplitOpsAndApply(DAG, Subtarget, DL, VT, { In0, In1 },
@@ -40990,8 +43435,8 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
// Try to synthesize horizontal adds from adds of shuffles.
if ((VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v16i16 ||
VT == MVT::v8i32) &&
- Subtarget.hasSSSE3() && isHorizontalBinOp(Op0, Op1, true) &&
- shouldUseHorizontalOp(Op0 == Op1, DAG, Subtarget)) {
+ Subtarget.hasSSSE3() &&
+ isHorizontalBinOp(Op0, Op1, DAG, Subtarget, true)) {
auto HADDBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
ArrayRef<SDValue> Ops) {
return DAG.getNode(X86ISD::HADD, DL, Ops[0].getValueType(), Ops);
@@ -41003,9 +43448,6 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
if (SDValue V = combineIncDecVector(N, DAG))
return V;
- if (SDValue V = combineAddToSUBUS(N, DAG, Subtarget))
- return V;
-
return combineAddOrSubToADCOrSBB(N, DAG);
}
@@ -41110,7 +43552,7 @@ static SDValue combineSub(SDNode *N, SelectionDAG &DAG,
// X-Y -> X+~Y+1, saving one register.
if (Op1->hasOneUse() && Op1.getOpcode() == ISD::XOR &&
isa<ConstantSDNode>(Op1.getOperand(1))) {
- APInt XorC = cast<ConstantSDNode>(Op1.getOperand(1))->getAPIntValue();
+ const APInt &XorC = Op1.getConstantOperandAPInt(1);
EVT VT = Op0.getValueType();
SDValue NewXor = DAG.getNode(ISD::XOR, SDLoc(Op1), VT,
Op1.getOperand(0),
@@ -41124,8 +43566,8 @@ static SDValue combineSub(SDNode *N, SelectionDAG &DAG,
EVT VT = N->getValueType(0);
if ((VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v16i16 ||
VT == MVT::v8i32) &&
- Subtarget.hasSSSE3() && isHorizontalBinOp(Op0, Op1, false) &&
- shouldUseHorizontalOp(Op0 == Op1, DAG, Subtarget)) {
+ Subtarget.hasSSSE3() &&
+ isHorizontalBinOp(Op0, Op1, DAG, Subtarget, false)) {
auto HSUBBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
ArrayRef<SDValue> Ops) {
return DAG.getNode(X86ISD::HSUB, DL, Ops[0].getValueType(), Ops);
@@ -41159,6 +43601,149 @@ static SDValue combineVectorCompare(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+/// Helper that combines an array of subvector ops as if they were the operands
+/// of a ISD::CONCAT_VECTORS node, but may have come from another source (e.g.
+/// ISD::INSERT_SUBVECTOR). The ops are assumed to be of the same type.
+static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
+ ArrayRef<SDValue> Ops, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget &Subtarget) {
+ assert(Subtarget.hasAVX() && "AVX assumed for concat_vectors");
+
+ if (llvm::all_of(Ops, [](SDValue Op) { return Op.isUndef(); }))
+ return DAG.getUNDEF(VT);
+
+ if (llvm::all_of(Ops, [](SDValue Op) {
+ return ISD::isBuildVectorAllZeros(Op.getNode());
+ }))
+ return getZeroVector(VT, Subtarget, DAG, DL);
+
+ SDValue Op0 = Ops[0];
+
+ // Fold subvector loads into one.
+ // If needed, look through bitcasts to get to the load.
+ if (auto *FirstLd = dyn_cast<LoadSDNode>(peekThroughBitcasts(Op0))) {
+ bool Fast;
+ const X86TargetLowering *TLI = Subtarget.getTargetLowering();
+ if (TLI->allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
+ *FirstLd->getMemOperand(), &Fast) &&
+ Fast) {
+ if (SDValue Ld =
+ EltsFromConsecutiveLoads(VT, Ops, DL, DAG, Subtarget, false))
+ return Ld;
+ }
+ }
+
+ // Repeated subvectors.
+ if (llvm::all_of(Ops, [Op0](SDValue Op) { return Op == Op0; })) {
+ // If this broadcast/subv_broadcast is inserted into both halves, use a
+ // larger broadcast/subv_broadcast.
+ if (Op0.getOpcode() == X86ISD::VBROADCAST ||
+ Op0.getOpcode() == X86ISD::SUBV_BROADCAST)
+ return DAG.getNode(Op0.getOpcode(), DL, VT, Op0.getOperand(0));
+
+ // concat_vectors(movddup(x),movddup(x)) -> broadcast(x)
+ if (Op0.getOpcode() == X86ISD::MOVDDUP && VT == MVT::v4f64 &&
+ (Subtarget.hasAVX2() || MayFoldLoad(Op0.getOperand(0))))
+ return DAG.getNode(X86ISD::VBROADCAST, DL, VT,
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f64,
+ Op0.getOperand(0),
+ DAG.getIntPtrConstant(0, DL)));
+
+ // concat_vectors(scalar_to_vector(x),scalar_to_vector(x)) -> broadcast(x)
+ if (Op0.getOpcode() == ISD::SCALAR_TO_VECTOR &&
+ (Subtarget.hasAVX2() ||
+ (VT.getScalarSizeInBits() >= 32 && MayFoldLoad(Op0.getOperand(0)))) &&
+ Op0.getOperand(0).getValueType() == VT.getScalarType())
+ return DAG.getNode(X86ISD::VBROADCAST, DL, VT, Op0.getOperand(0));
+ }
+
+ bool IsSplat = llvm::all_of(Ops, [&Op0](SDValue Op) { return Op == Op0; });
+
+ // Repeated opcode.
+ // TODO - combineX86ShufflesRecursively should handle shuffle concatenation
+ // but it currently struggles with different vector widths.
+ if (llvm::all_of(Ops, [Op0](SDValue Op) {
+ return Op.getOpcode() == Op0.getOpcode();
+ })) {
+ unsigned NumOps = Ops.size();
+ switch (Op0.getOpcode()) {
+ case X86ISD::PSHUFHW:
+ case X86ISD::PSHUFLW:
+ case X86ISD::PSHUFD:
+ if (!IsSplat && NumOps == 2 && VT.is256BitVector() &&
+ Subtarget.hasInt256() && Op0.getOperand(1) == Ops[1].getOperand(1)) {
+ SmallVector<SDValue, 2> Src;
+ for (unsigned i = 0; i != NumOps; ++i)
+ Src.push_back(Ops[i].getOperand(0));
+ return DAG.getNode(Op0.getOpcode(), DL, VT,
+ DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Src),
+ Op0.getOperand(1));
+ }
+ LLVM_FALLTHROUGH;
+ case X86ISD::VPERMILPI:
+ // TODO - add support for vXf64/vXi64 shuffles.
+ if (!IsSplat && NumOps == 2 && (VT == MVT::v8f32 || VT == MVT::v8i32) &&
+ Subtarget.hasAVX() && Op0.getOperand(1) == Ops[1].getOperand(1)) {
+ SmallVector<SDValue, 2> Src;
+ for (unsigned i = 0; i != NumOps; ++i)
+ Src.push_back(DAG.getBitcast(MVT::v4f32, Ops[i].getOperand(0)));
+ SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8f32, Src);
+ Res = DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v8f32, Res,
+ Op0.getOperand(1));
+ return DAG.getBitcast(VT, Res);
+ }
+ break;
+ case X86ISD::PACKUS:
+ if (NumOps == 2 && VT.is256BitVector() && Subtarget.hasInt256()) {
+ SmallVector<SDValue, 2> LHS, RHS;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ LHS.push_back(Ops[i].getOperand(0));
+ RHS.push_back(Ops[i].getOperand(1));
+ }
+ MVT SrcVT = Op0.getOperand(0).getSimpleValueType();
+ SrcVT = MVT::getVectorVT(SrcVT.getScalarType(),
+ NumOps * SrcVT.getVectorNumElements());
+ return DAG.getNode(Op0.getOpcode(), DL, VT,
+ DAG.getNode(ISD::CONCAT_VECTORS, DL, SrcVT, LHS),
+ DAG.getNode(ISD::CONCAT_VECTORS, DL, SrcVT, RHS));
+ }
+ break;
+ }
+ }
+
+ // If we're inserting all zeros into the upper half, change this to
+ // an insert into an all zeros vector. We will match this to a move
+ // with implicit upper bit zeroing during isel.
+ if (Ops.size() == 2 && ISD::isBuildVectorAllZeros(Ops[1].getNode()))
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
+ getZeroVector(VT, Subtarget, DAG, DL), Ops[0],
+ DAG.getIntPtrConstant(0, DL));
+
+ return SDValue();
+}
+
+static SDValue combineConcatVectors(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget &Subtarget) {
+ EVT VT = N->getValueType(0);
+ EVT SrcVT = N->getOperand(0).getValueType();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // Don't do anything for i1 vectors.
+ if (VT.getVectorElementType() == MVT::i1)
+ return SDValue();
+
+ if (Subtarget.hasAVX() && TLI.isTypeLegal(VT) && TLI.isTypeLegal(SrcVT)) {
+ SmallVector<SDValue, 4> Ops(N->op_begin(), N->op_end());
+ if (SDValue R = combineConcatVectorOps(SDLoc(N), VT.getSimpleVT(), Ops, DAG,
+ DCI, Subtarget))
+ return R;
+ }
+
+ return SDValue();
+}
+
static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
@@ -41173,19 +43758,23 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG,
SDValue Vec = N->getOperand(0);
SDValue SubVec = N->getOperand(1);
- unsigned IdxVal = N->getConstantOperandVal(2);
+ uint64_t IdxVal = N->getConstantOperandVal(2);
MVT SubVecVT = SubVec.getSimpleValueType();
- if (ISD::isBuildVectorAllZeros(Vec.getNode())) {
- // Inserting zeros into zeros is a nop.
- if (ISD::isBuildVectorAllZeros(SubVec.getNode()))
- return getZeroVector(OpVT, Subtarget, DAG, dl);
+ if (Vec.isUndef() && SubVec.isUndef())
+ return DAG.getUNDEF(OpVT);
+
+ // Inserting undefs/zeros into zeros/undefs is a zero vector.
+ if ((Vec.isUndef() || ISD::isBuildVectorAllZeros(Vec.getNode())) &&
+ (SubVec.isUndef() || ISD::isBuildVectorAllZeros(SubVec.getNode())))
+ return getZeroVector(OpVT, Subtarget, DAG, dl);
+ if (ISD::isBuildVectorAllZeros(Vec.getNode())) {
// If we're inserting into a zero vector and then into a larger zero vector,
// just insert into the larger zero vector directly.
if (SubVec.getOpcode() == ISD::INSERT_SUBVECTOR &&
ISD::isBuildVectorAllZeros(SubVec.getOperand(0).getNode())) {
- unsigned Idx2Val = SubVec.getConstantOperandVal(2);
+ uint64_t Idx2Val = SubVec.getConstantOperandVal(2);
return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT,
getZeroVector(OpVT, Subtarget, DAG, dl),
SubVec.getOperand(1),
@@ -41197,30 +43786,16 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG,
// least as large as the original insertion. Just insert the original
// subvector into a zero vector.
if (SubVec.getOpcode() == ISD::EXTRACT_SUBVECTOR && IdxVal == 0 &&
- SubVec.getConstantOperandVal(1) == 0 &&
+ SubVec.getConstantOperandAPInt(1) == 0 &&
SubVec.getOperand(0).getOpcode() == ISD::INSERT_SUBVECTOR) {
SDValue Ins = SubVec.getOperand(0);
- if (Ins.getConstantOperandVal(2) == 0 &&
+ if (Ins.getConstantOperandAPInt(2) == 0 &&
ISD::isBuildVectorAllZeros(Ins.getOperand(0).getNode()) &&
Ins.getOperand(1).getValueSizeInBits() <= SubVecVT.getSizeInBits())
return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT,
getZeroVector(OpVT, Subtarget, DAG, dl),
Ins.getOperand(1), N->getOperand(2));
}
-
- // If we're inserting a bitcast into zeros, rewrite the insert and move the
- // bitcast to the other side. This helps with detecting zero extending
- // during isel.
- // TODO: Is this useful for other indices than 0?
- if (!IsI1Vector && SubVec.getOpcode() == ISD::BITCAST && IdxVal == 0) {
- MVT CastVT = SubVec.getOperand(0).getSimpleValueType();
- unsigned NumElems = OpVT.getSizeInBits() / CastVT.getScalarSizeInBits();
- MVT NewVT = MVT::getVectorVT(CastVT.getVectorElementType(), NumElems);
- SDValue Insert = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, NewVT,
- DAG.getBitcast(NewVT, Vec),
- SubVec.getOperand(0), N->getOperand(2));
- return DAG.getBitcast(OpVT, Insert);
- }
}
// Stop here if this is an i1 vector.
@@ -41248,77 +43823,92 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG,
}
}
- // Fold two 16-byte or 32-byte subvector loads into one 32-byte or 64-byte
- // load:
- // (insert_subvector (insert_subvector undef, (load16 addr), 0),
- // (load16 addr + 16), Elts/2)
- // --> load32 addr
- // or:
- // (insert_subvector (insert_subvector undef, (load32 addr), 0),
- // (load32 addr + 32), Elts/2)
- // --> load64 addr
- // or a 16-byte or 32-byte broadcast:
- // (insert_subvector (insert_subvector undef, (load16 addr), 0),
- // (load16 addr), Elts/2)
- // --> X86SubVBroadcast(load16 addr)
- // or:
- // (insert_subvector (insert_subvector undef, (load32 addr), 0),
- // (load32 addr), Elts/2)
- // --> X86SubVBroadcast(load32 addr)
+ // Match concat_vector style patterns.
+ SmallVector<SDValue, 2> SubVectorOps;
+ if (collectConcatOps(N, SubVectorOps))
+ if (SDValue Fold =
+ combineConcatVectorOps(dl, OpVT, SubVectorOps, DAG, DCI, Subtarget))
+ return Fold;
+
+ // If we are inserting into both halves of the vector, the starting vector
+ // should be undef. If it isn't, make it so. Only do this if the early insert
+ // has no other uses.
+ // TODO: Should this be a generic DAG combine?
+ // TODO: Why doesn't SimplifyDemandedVectorElts catch this?
if ((IdxVal == OpVT.getVectorNumElements() / 2) &&
Vec.getOpcode() == ISD::INSERT_SUBVECTOR &&
- OpVT.getSizeInBits() == SubVecVT.getSizeInBits() * 2) {
- if (isNullConstant(Vec.getOperand(2))) {
- SDValue SubVec2 = Vec.getOperand(1);
- // If needed, look through bitcasts to get to the load.
- if (auto *FirstLd = dyn_cast<LoadSDNode>(peekThroughBitcasts(SubVec2))) {
- bool Fast;
- unsigned Alignment = FirstLd->getAlignment();
- unsigned AS = FirstLd->getAddressSpace();
- const X86TargetLowering *TLI = Subtarget.getTargetLowering();
- if (TLI->allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
- OpVT, AS, Alignment, &Fast) && Fast) {
- SDValue Ops[] = {SubVec2, SubVec};
- if (SDValue Ld = EltsFromConsecutiveLoads(OpVT, Ops, dl, DAG,
- Subtarget, false))
- return Ld;
- }
- }
- // If lower/upper loads are the same and there's no other use of the lower
- // load, then splat the loaded value with a broadcast.
- if (auto *Ld = dyn_cast<LoadSDNode>(peekThroughOneUseBitcasts(SubVec2)))
- if (SubVec2 == SubVec && ISD::isNormalLoad(Ld) && Vec.hasOneUse())
- return DAG.getNode(X86ISD::SUBV_BROADCAST, dl, OpVT, SubVec);
-
- // If this is subv_broadcast insert into both halves, use a larger
- // subv_broadcast.
- if (SubVec.getOpcode() == X86ISD::SUBV_BROADCAST && SubVec == SubVec2)
- return DAG.getNode(X86ISD::SUBV_BROADCAST, dl, OpVT,
- SubVec.getOperand(0));
-
- // If we're inserting all zeros into the upper half, change this to
- // an insert into an all zeros vector. We will match this to a move
- // with implicit upper bit zeroing during isel.
- if (ISD::isBuildVectorAllZeros(SubVec.getNode()))
- return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT,
- getZeroVector(OpVT, Subtarget, DAG, dl), SubVec2,
- Vec.getOperand(2));
+ OpVT.getSizeInBits() == SubVecVT.getSizeInBits() * 2 &&
+ isNullConstant(Vec.getOperand(2)) && !Vec.getOperand(0).isUndef() &&
+ Vec.hasOneUse()) {
+ Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, DAG.getUNDEF(OpVT),
+ Vec.getOperand(1), Vec.getOperand(2));
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, Vec, SubVec,
+ N->getOperand(2));
+ }
- // If we are inserting into both halves of the vector, the starting
- // vector should be undef. If it isn't, make it so. Only do this if the
- // the early insert has no other uses.
- // TODO: Should this be a generic DAG combine?
- if (!Vec.getOperand(0).isUndef() && Vec.hasOneUse()) {
- Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, DAG.getUNDEF(OpVT),
- SubVec2, Vec.getOperand(2));
- return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, Vec, SubVec,
- N->getOperand(2));
+ // If this is a broadcast insert into an upper undef, use a larger broadcast.
+ if (Vec.isUndef() && IdxVal != 0 && SubVec.getOpcode() == X86ISD::VBROADCAST)
+ return DAG.getNode(X86ISD::VBROADCAST, dl, OpVT, SubVec.getOperand(0));
- }
- }
+ return SDValue();
+}
+
+/// If we are extracting a subvector of a vector select and the select condition
+/// is composed of concatenated vectors, try to narrow the select width. This
+/// is a common pattern for AVX1 integer code because 256-bit selects may be
+/// legal, but there is almost no integer math/logic available for 256-bit.
+/// This function should only be called with legal types (otherwise, the calls
+/// to get simple value types will assert).
+static SDValue narrowExtractedVectorSelect(SDNode *Ext, SelectionDAG &DAG) {
+ SDValue Sel = peekThroughBitcasts(Ext->getOperand(0));
+ SmallVector<SDValue, 4> CatOps;
+ if (Sel.getOpcode() != ISD::VSELECT ||
+ !collectConcatOps(Sel.getOperand(0).getNode(), CatOps))
+ return SDValue();
+
+ // Note: We assume simple value types because this should only be called with
+ // legal operations/types.
+ // TODO: This can be extended to handle extraction to 256-bits.
+ MVT VT = Ext->getSimpleValueType(0);
+ if (!VT.is128BitVector())
+ return SDValue();
+
+ MVT SelCondVT = Sel.getOperand(0).getSimpleValueType();
+ if (!SelCondVT.is256BitVector() && !SelCondVT.is512BitVector())
+ return SDValue();
+
+ MVT WideVT = Ext->getOperand(0).getSimpleValueType();
+ MVT SelVT = Sel.getSimpleValueType();
+ assert((SelVT.is256BitVector() || SelVT.is512BitVector()) &&
+ "Unexpected vector type with legal operations");
+
+ unsigned SelElts = SelVT.getVectorNumElements();
+ unsigned CastedElts = WideVT.getVectorNumElements();
+ unsigned ExtIdx = cast<ConstantSDNode>(Ext->getOperand(1))->getZExtValue();
+ if (SelElts % CastedElts == 0) {
+ // The select has the same or more (narrower) elements than the extract
+ // operand. The extraction index gets scaled by that factor.
+ ExtIdx *= (SelElts / CastedElts);
+ } else if (CastedElts % SelElts == 0) {
+ // The select has less (wider) elements than the extract operand. Make sure
+ // that the extraction index can be divided evenly.
+ unsigned IndexDivisor = CastedElts / SelElts;
+ if (ExtIdx % IndexDivisor != 0)
+ return SDValue();
+ ExtIdx /= IndexDivisor;
+ } else {
+ llvm_unreachable("Element count of simple vector types are not divisible?");
}
- return SDValue();
+ unsigned NarrowingFactor = WideVT.getSizeInBits() / VT.getSizeInBits();
+ unsigned NarrowElts = SelElts / NarrowingFactor;
+ MVT NarrowSelVT = MVT::getVectorVT(SelVT.getVectorElementType(), NarrowElts);
+ SDLoc DL(Ext);
+ SDValue ExtCond = extract128BitVector(Sel.getOperand(0), ExtIdx, DAG, DL);
+ SDValue ExtT = extract128BitVector(Sel.getOperand(1), ExtIdx, DAG, DL);
+ SDValue ExtF = extract128BitVector(Sel.getOperand(2), ExtIdx, DAG, DL);
+ SDValue NarrowSel = DAG.getSelect(DL, NarrowSelVT, ExtCond, ExtT, ExtF);
+ return DAG.getBitcast(VT, NarrowSel);
}
static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG,
@@ -41334,7 +43924,10 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG,
// Capture the original wide type in the likely case that we need to bitcast
// back to this type.
- EVT VT = N->getValueType(0);
+ if (!N->getValueType(0).isSimple())
+ return SDValue();
+
+ MVT VT = N->getSimpleValueType(0);
EVT WideVecVT = N->getOperand(0).getValueType();
SDValue WideVec = peekThroughBitcasts(N->getOperand(0));
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -41360,65 +43953,102 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG,
if (DCI.isBeforeLegalizeOps())
return SDValue();
- MVT OpVT = N->getSimpleValueType(0);
+ if (SDValue V = narrowExtractedVectorSelect(N, DAG))
+ return V;
+
SDValue InVec = N->getOperand(0);
unsigned IdxVal = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
if (ISD::isBuildVectorAllZeros(InVec.getNode()))
- return getZeroVector(OpVT, Subtarget, DAG, SDLoc(N));
+ return getZeroVector(VT, Subtarget, DAG, SDLoc(N));
if (ISD::isBuildVectorAllOnes(InVec.getNode())) {
- if (OpVT.getScalarType() == MVT::i1)
- return DAG.getConstant(1, SDLoc(N), OpVT);
- return getOnesVector(OpVT, DAG, SDLoc(N));
+ if (VT.getScalarType() == MVT::i1)
+ return DAG.getConstant(1, SDLoc(N), VT);
+ return getOnesVector(VT, DAG, SDLoc(N));
}
if (InVec.getOpcode() == ISD::BUILD_VECTOR)
return DAG.getBuildVector(
- OpVT, SDLoc(N),
- InVec.getNode()->ops().slice(IdxVal, OpVT.getVectorNumElements()));
+ VT, SDLoc(N),
+ InVec.getNode()->ops().slice(IdxVal, VT.getVectorNumElements()));
+
+ // Try to move vector bitcast after extract_subv by scaling extraction index:
+ // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
+ // TODO: Move this to DAGCombiner::visitEXTRACT_SUBVECTOR
+ if (InVec.getOpcode() == ISD::BITCAST &&
+ InVec.getOperand(0).getValueType().isVector()) {
+ SDValue SrcOp = InVec.getOperand(0);
+ EVT SrcVT = SrcOp.getValueType();
+ unsigned SrcNumElts = SrcVT.getVectorNumElements();
+ unsigned DestNumElts = InVec.getValueType().getVectorNumElements();
+ if ((DestNumElts % SrcNumElts) == 0) {
+ unsigned DestSrcRatio = DestNumElts / SrcNumElts;
+ if ((VT.getVectorNumElements() % DestSrcRatio) == 0) {
+ unsigned NewExtNumElts = VT.getVectorNumElements() / DestSrcRatio;
+ EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(),
+ SrcVT.getScalarType(), NewExtNumElts);
+ if ((N->getConstantOperandVal(1) % DestSrcRatio) == 0 &&
+ TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
+ unsigned IndexValScaled = N->getConstantOperandVal(1) / DestSrcRatio;
+ SDLoc DL(N);
+ SDValue NewIndex = DAG.getIntPtrConstant(IndexValScaled, DL);
+ SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
+ SrcOp, NewIndex);
+ return DAG.getBitcast(VT, NewExtract);
+ }
+ }
+ }
+ }
+
+ // If we're extracting from a broadcast then we're better off just
+ // broadcasting to the smaller type directly, assuming this is the only use.
+ // As its a broadcast we don't care about the extraction index.
+ if (InVec.getOpcode() == X86ISD::VBROADCAST && InVec.hasOneUse() &&
+ InVec.getOperand(0).getValueSizeInBits() <= VT.getSizeInBits())
+ return DAG.getNode(X86ISD::VBROADCAST, SDLoc(N), VT, InVec.getOperand(0));
// If we're extracting the lowest subvector and we're the only user,
// we may be able to perform this with a smaller vector width.
if (IdxVal == 0 && InVec.hasOneUse()) {
unsigned InOpcode = InVec.getOpcode();
- if (OpVT == MVT::v2f64 && InVec.getValueType() == MVT::v4f64) {
+ if (VT == MVT::v2f64 && InVec.getValueType() == MVT::v4f64) {
// v2f64 CVTDQ2PD(v4i32).
if (InOpcode == ISD::SINT_TO_FP &&
InVec.getOperand(0).getValueType() == MVT::v4i32) {
- return DAG.getNode(X86ISD::CVTSI2P, SDLoc(N), OpVT, InVec.getOperand(0));
+ return DAG.getNode(X86ISD::CVTSI2P, SDLoc(N), VT, InVec.getOperand(0));
+ }
+ // v2f64 CVTUDQ2PD(v4i32).
+ if (InOpcode == ISD::UINT_TO_FP &&
+ InVec.getOperand(0).getValueType() == MVT::v4i32) {
+ return DAG.getNode(X86ISD::CVTUI2P, SDLoc(N), VT, InVec.getOperand(0));
}
// v2f64 CVTPS2PD(v4f32).
if (InOpcode == ISD::FP_EXTEND &&
InVec.getOperand(0).getValueType() == MVT::v4f32) {
- return DAG.getNode(X86ISD::VFPEXT, SDLoc(N), OpVT, InVec.getOperand(0));
+ return DAG.getNode(X86ISD::VFPEXT, SDLoc(N), VT, InVec.getOperand(0));
}
}
- if ((InOpcode == ISD::ZERO_EXTEND || InOpcode == ISD::SIGN_EXTEND) &&
- OpVT.is128BitVector() &&
- InVec.getOperand(0).getSimpleValueType().is128BitVector()) {
- unsigned ExtOp =
- InOpcode == ISD::ZERO_EXTEND ? ISD::ZERO_EXTEND_VECTOR_INREG
- : ISD::SIGN_EXTEND_VECTOR_INREG;
- return DAG.getNode(ExtOp, SDLoc(N), OpVT, InVec.getOperand(0));
- }
- if ((InOpcode == ISD::ZERO_EXTEND_VECTOR_INREG ||
+ if ((InOpcode == ISD::ANY_EXTEND ||
+ InOpcode == ISD::ANY_EXTEND_VECTOR_INREG ||
+ InOpcode == ISD::ZERO_EXTEND ||
+ InOpcode == ISD::ZERO_EXTEND_VECTOR_INREG ||
+ InOpcode == ISD::SIGN_EXTEND ||
InOpcode == ISD::SIGN_EXTEND_VECTOR_INREG) &&
- OpVT.is128BitVector() &&
+ VT.is128BitVector() &&
InVec.getOperand(0).getSimpleValueType().is128BitVector()) {
- return DAG.getNode(InOpcode, SDLoc(N), OpVT, InVec.getOperand(0));
+ unsigned ExtOp = getOpcode_EXTEND_VECTOR_INREG(InOpcode);
+ return DAG.getNode(ExtOp, SDLoc(N), VT, InVec.getOperand(0));
}
- if (InOpcode == ISD::BITCAST) {
- // TODO - do this for target shuffles in general.
- SDValue InVecBC = peekThroughOneUseBitcasts(InVec);
- if (InVecBC.getOpcode() == X86ISD::PSHUFB && OpVT.is128BitVector()) {
- SDLoc DL(N);
- SDValue SubPSHUFB =
- DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8,
- extract128BitVector(InVecBC.getOperand(0), 0, DAG, DL),
- extract128BitVector(InVecBC.getOperand(1), 0, DAG, DL));
- return DAG.getBitcast(OpVT, SubPSHUFB);
- }
+ if (InOpcode == ISD::VSELECT &&
+ InVec.getOperand(0).getValueType().is256BitVector() &&
+ InVec.getOperand(1).getValueType().is256BitVector() &&
+ InVec.getOperand(2).getValueType().is256BitVector()) {
+ SDLoc DL(N);
+ SDValue Ext0 = extractSubVector(InVec.getOperand(0), 0, DAG, DL, 128);
+ SDValue Ext1 = extractSubVector(InVec.getOperand(1), 0, DAG, DL, 128);
+ SDValue Ext2 = extractSubVector(InVec.getOperand(2), 0, DAG, DL, 128);
+ return DAG.getNode(InOpcode, DL, VT, Ext0, Ext1, Ext2);
}
}
@@ -41428,6 +44058,7 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG,
static SDValue combineScalarToVector(SDNode *N, SelectionDAG &DAG) {
EVT VT = N->getValueType(0);
SDValue Src = N->getOperand(0);
+ SDLoc DL(N);
// If this is a scalar to vector to v1i1 from an AND with 1, bypass the and.
// This occurs frequently in our masked scalar intrinsic code and our
@@ -41436,7 +44067,7 @@ static SDValue combineScalarToVector(SDNode *N, SelectionDAG &DAG) {
if (VT == MVT::v1i1 && Src.getOpcode() == ISD::AND && Src.hasOneUse())
if (auto *C = dyn_cast<ConstantSDNode>(Src.getOperand(1)))
if (C->getAPIntValue().isOneValue())
- return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), MVT::v1i1,
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v1i1,
Src.getOperand(0));
// Combine scalar_to_vector of an extract_vector_elt into an extract_subvec.
@@ -41445,8 +44076,17 @@ static SDValue combineScalarToVector(SDNode *N, SelectionDAG &DAG) {
Src.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
if (auto *C = dyn_cast<ConstantSDNode>(Src.getOperand(1)))
if (C->isNullValue())
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), VT,
- Src.getOperand(0), Src.getOperand(1));
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Src.getOperand(0),
+ Src.getOperand(1));
+
+ // Reduce v2i64 to v4i32 if we don't need the upper bits.
+ // TODO: Move to DAGCombine?
+ if (VT == MVT::v2i64 && Src.getOpcode() == ISD::ANY_EXTEND &&
+ Src.getValueType() == MVT::i64 && Src.hasOneUse() &&
+ Src.getOperand(0).getScalarValueSizeInBits() <= 32)
+ return DAG.getBitcast(
+ VT, DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v4i32,
+ DAG.getAnyExtOrTrunc(Src.getOperand(0), DL, MVT::i32)));
return SDValue();
}
@@ -41483,6 +44123,56 @@ static SDValue combinePMULDQ(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+static SDValue combineExtInVec(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget &Subtarget) {
+ EVT VT = N->getValueType(0);
+ SDValue In = N->getOperand(0);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // Try to merge vector loads and extend_inreg to an extload.
+ if (!DCI.isBeforeLegalizeOps() && ISD::isNormalLoad(In.getNode()) &&
+ In.hasOneUse()) {
+ auto *Ld = cast<LoadSDNode>(In);
+ if (!Ld->isVolatile()) {
+ MVT SVT = In.getSimpleValueType().getVectorElementType();
+ ISD::LoadExtType Ext = N->getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
+ EVT MemVT = EVT::getVectorVT(*DAG.getContext(), SVT,
+ VT.getVectorNumElements());
+ if (TLI.isLoadExtLegal(Ext, VT, MemVT)) {
+ SDValue Load =
+ DAG.getExtLoad(Ext, SDLoc(N), VT, Ld->getChain(), Ld->getBasePtr(),
+ Ld->getPointerInfo(), MemVT, Ld->getAlignment(),
+ Ld->getMemOperand()->getFlags());
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Load.getValue(1));
+ return Load;
+ }
+ }
+ }
+
+ // Disabling for widening legalization for now. We can enable if we find a
+ // case that needs it. Otherwise it can be deleted when we switch to
+ // widening legalization.
+ if (ExperimentalVectorWideningLegalization)
+ return SDValue();
+
+ // Combine (ext_invec (ext_invec X)) -> (ext_invec X)
+ if (In.getOpcode() == N->getOpcode() &&
+ TLI.isTypeLegal(VT) && TLI.isTypeLegal(In.getOperand(0).getValueType()))
+ return DAG.getNode(N->getOpcode(), SDLoc(N), VT, In.getOperand(0));
+
+ // Attempt to combine as a shuffle.
+ // TODO: SSE41 support
+ if (Subtarget.hasAVX() && N->getOpcode() != ISD::SIGN_EXTEND_VECTOR_INREG) {
+ SDValue Op(N, 0);
+ if (TLI.isTypeLegal(VT) && TLI.isTypeLegal(In.getValueType()))
+ if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget))
+ return Res;
+ }
+
+ return SDValue();
+}
+
SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -41494,6 +44184,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::PEXTRW:
case X86ISD::PEXTRB:
return combineExtractVectorElt(N, DAG, DCI, Subtarget);
+ case ISD::CONCAT_VECTORS:
+ return combineConcatVectors(N, DAG, DCI, Subtarget);
case ISD::INSERT_SUBVECTOR:
return combineInsertSubvector(N, DAG, DCI, Subtarget);
case ISD::EXTRACT_SUBVECTOR:
@@ -41506,19 +44198,21 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::CMP: return combineCMP(N, DAG);
case ISD::ADD: return combineAdd(N, DAG, Subtarget);
case ISD::SUB: return combineSub(N, DAG, Subtarget);
+ case X86ISD::ADD:
+ case X86ISD::SUB: return combineX86AddSub(N, DAG, DCI);
case X86ISD::SBB: return combineSBB(N, DAG);
case X86ISD::ADC: return combineADC(N, DAG, DCI);
case ISD::MUL: return combineMul(N, DAG, DCI, Subtarget);
- case ISD::SHL:
- case ISD::SRA:
- case ISD::SRL: return combineShift(N, DAG, DCI, Subtarget);
+ case ISD::SHL: return combineShiftLeft(N, DAG);
+ case ISD::SRA: return combineShiftRightArithmetic(N, DAG);
+ case ISD::SRL: return combineShiftRightLogical(N, DAG, DCI);
case ISD::AND: return combineAnd(N, DAG, DCI, Subtarget);
case ISD::OR: return combineOr(N, DAG, DCI, Subtarget);
case ISD::XOR: return combineXor(N, DAG, DCI, Subtarget);
case X86ISD::BEXTR: return combineBEXTR(N, DAG, DCI, Subtarget);
case ISD::LOAD: return combineLoad(N, DAG, DCI, Subtarget);
case ISD::MLOAD: return combineMaskedLoad(N, DAG, DCI, Subtarget);
- case ISD::STORE: return combineStore(N, DAG, Subtarget);
+ case ISD::STORE: return combineStore(N, DAG, DCI, Subtarget);
case ISD::MSTORE: return combineMaskedStore(N, DAG, DCI, Subtarget);
case ISD::SINT_TO_FP: return combineSIntToFP(N, DAG, Subtarget);
case ISD::UINT_TO_FP: return combineUIntToFP(N, DAG, Subtarget);
@@ -41535,13 +44229,21 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::FMAX: return combineFMinFMax(N, DAG);
case ISD::FMINNUM:
case ISD::FMAXNUM: return combineFMinNumFMaxNum(N, DAG, Subtarget);
- case X86ISD::CVTSI2P:
+ case X86ISD::CVTSI2P:
case X86ISD::CVTUI2P: return combineX86INT_TO_FP(N, DAG, DCI);
+ case X86ISD::CVTP2SI:
+ case X86ISD::CVTP2UI:
+ case X86ISD::CVTTP2SI:
+ case X86ISD::CVTTP2UI: return combineCVTP2I_CVTTP2I(N, DAG, DCI);
case X86ISD::BT: return combineBT(N, DAG, DCI);
case ISD::ANY_EXTEND:
case ISD::ZERO_EXTEND: return combineZext(N, DAG, DCI, Subtarget);
case ISD::SIGN_EXTEND: return combineSext(N, DAG, DCI, Subtarget);
case ISD::SIGN_EXTEND_INREG: return combineSignExtendInReg(N, DAG, Subtarget);
+ case ISD::ANY_EXTEND_VECTOR_INREG:
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ case ISD::ZERO_EXTEND_VECTOR_INREG: return combineExtInVec(N, DAG, DCI,
+ Subtarget);
case ISD::SETCC: return combineSetCC(N, DAG, Subtarget);
case X86ISD::SETCC: return combineX86SetCC(N, DAG, Subtarget);
case X86ISD::BRCOND: return combineBrCond(N, DAG, Subtarget);
@@ -41624,11 +44326,15 @@ bool X86TargetLowering::isTypeDesirableForOp(unsigned Opc, EVT VT) const {
if (Opc == ISD::SHL && VT.isVector() && VT.getVectorElementType() == MVT::i8)
return false;
- // 8-bit multiply is probably not much cheaper than 32-bit multiply, and
- // we have specializations to turn 32-bit multiply into LEA or other ops.
+ // TODO: Almost no 8-bit ops are desirable because they have no actual
+ // size/speed advantages vs. 32-bit ops, but they do have a major
+ // potential disadvantage by causing partial register stalls.
+ //
+ // 8-bit multiply/shl is probably not cheaper than 32-bit multiply/shl, and
+ // we have specializations to turn 32-bit multiply/shl into LEA or other ops.
// Also, see the comment in "IsDesirableToPromoteOp" - where we additionally
// check for a constant operand to the multiply.
- if (Opc == ISD::MUL && VT == MVT::i8)
+ if ((Opc == ISD::MUL || Opc == ISD::SHL) && VT == MVT::i8)
return false;
// i16 instruction encodings are longer and some i16 instructions are slow,
@@ -41642,6 +44348,7 @@ bool X86TargetLowering::isTypeDesirableForOp(unsigned Opc, EVT VT) const {
case ISD::ZERO_EXTEND:
case ISD::ANY_EXTEND:
case ISD::SHL:
+ case ISD::SRA:
case ISD::SRL:
case ISD::SUB:
case ISD::ADD:
@@ -41717,6 +44424,7 @@ bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const {
case ISD::ANY_EXTEND:
break;
case ISD::SHL:
+ case ISD::SRA:
case ISD::SRL: {
SDValue N0 = Op.getOperand(0);
// Look out for (store (shl (load), x)).
@@ -41889,6 +44597,40 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
return false;
}
+static X86::CondCode parseConstraintCode(llvm::StringRef Constraint) {
+ X86::CondCode Cond = StringSwitch<X86::CondCode>(Constraint)
+ .Case("{@cca}", X86::COND_A)
+ .Case("{@ccae}", X86::COND_AE)
+ .Case("{@ccb}", X86::COND_B)
+ .Case("{@ccbe}", X86::COND_BE)
+ .Case("{@ccc}", X86::COND_B)
+ .Case("{@cce}", X86::COND_E)
+ .Case("{@ccz}", X86::COND_E)
+ .Case("{@ccg}", X86::COND_G)
+ .Case("{@ccge}", X86::COND_GE)
+ .Case("{@ccl}", X86::COND_L)
+ .Case("{@ccle}", X86::COND_LE)
+ .Case("{@ccna}", X86::COND_BE)
+ .Case("{@ccnae}", X86::COND_B)
+ .Case("{@ccnb}", X86::COND_AE)
+ .Case("{@ccnbe}", X86::COND_A)
+ .Case("{@ccnc}", X86::COND_AE)
+ .Case("{@ccne}", X86::COND_NE)
+ .Case("{@ccnz}", X86::COND_NE)
+ .Case("{@ccng}", X86::COND_LE)
+ .Case("{@ccnge}", X86::COND_L)
+ .Case("{@ccnl}", X86::COND_GE)
+ .Case("{@ccnle}", X86::COND_G)
+ .Case("{@ccno}", X86::COND_NO)
+ .Case("{@ccnp}", X86::COND_P)
+ .Case("{@ccns}", X86::COND_NS)
+ .Case("{@cco}", X86::COND_O)
+ .Case("{@ccp}", X86::COND_P)
+ .Case("{@ccs}", X86::COND_S)
+ .Default(X86::COND_INVALID);
+ return Cond;
+}
+
/// Given a constraint letter, return the type of constraint for this target.
X86TargetLowering::ConstraintType
X86TargetLowering::getConstraintType(StringRef Constraint) const {
@@ -41949,7 +44691,8 @@ X86TargetLowering::getConstraintType(StringRef Constraint) const {
return C_RegisterClass;
}
}
- }
+ } else if (parseConstraintCode(Constraint) != X86::COND_INVALID)
+ return C_Other;
return TargetLowering::getConstraintType(Constraint);
}
@@ -42120,6 +44863,32 @@ LowerXConstraint(EVT ConstraintVT) const {
return TargetLowering::LowerXConstraint(ConstraintVT);
}
+// Lower @cc targets via setcc.
+SDValue X86TargetLowering::LowerAsmOutputForConstraint(
+ SDValue &Chain, SDValue &Flag, SDLoc DL, const AsmOperandInfo &OpInfo,
+ SelectionDAG &DAG) const {
+ X86::CondCode Cond = parseConstraintCode(OpInfo.ConstraintCode);
+ if (Cond == X86::COND_INVALID)
+ return SDValue();
+ // Check that return type is valid.
+ if (OpInfo.ConstraintVT.isVector() || !OpInfo.ConstraintVT.isInteger() ||
+ OpInfo.ConstraintVT.getSizeInBits() < 8)
+ report_fatal_error("Flag output operand is of invalid type");
+
+ // Get EFLAGS register. Only update chain when copyfrom is glued.
+ if (Flag.getNode()) {
+ Flag = DAG.getCopyFromReg(Chain, DL, X86::EFLAGS, MVT::i32, Flag);
+ Chain = Flag.getValue(1);
+ } else
+ Flag = DAG.getCopyFromReg(Chain, DL, X86::EFLAGS, MVT::i32);
+ // Extract CC code.
+ SDValue CC = getSETCC(Cond, Flag, DL, DAG);
+ // Extend to 32-bits
+ SDValue Result = DAG.getNode(ISD::ZERO_EXTEND, DL, OpInfo.ConstraintVT, CC);
+
+ return Result;
+}
+
/// Lower the specified operand into the Ops vector.
/// If it is invalid, don't add anything to Ops.
void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
@@ -42229,8 +44998,13 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
case 'i': {
// Literal immediates are always ok.
if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op)) {
- // Widen to 64 bits here to get it sign extended.
- Result = DAG.getTargetConstant(CST->getSExtValue(), SDLoc(Op), MVT::i64);
+ bool IsBool = CST->getConstantIntValue()->getBitWidth() == 1;
+ BooleanContent BCont = getBooleanContents(MVT::i64);
+ ISD::NodeType ExtOpc = IsBool ? getExtendForContent(BCont)
+ : ISD::SIGN_EXTEND;
+ int64_t ExtVal = ExtOpc == ISD::ZERO_EXTEND ? CST->getZExtValue()
+ : CST->getSExtValue();
+ Result = DAG.getTargetConstant(ExtVal, SDLoc(Op), MVT::i64);
break;
}
@@ -42242,40 +45016,12 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
// If we are in non-pic codegen mode, we allow the address of a global (with
// an optional displacement) to be used with 'i'.
- GlobalAddressSDNode *GA = nullptr;
- int64_t Offset = 0;
-
- // Match either (GA), (GA+C), (GA+C1+C2), etc.
- while (1) {
- if ((GA = dyn_cast<GlobalAddressSDNode>(Op))) {
- Offset += GA->getOffset();
- break;
- } else if (Op.getOpcode() == ISD::ADD) {
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
- Offset += C->getZExtValue();
- Op = Op.getOperand(0);
- continue;
- }
- } else if (Op.getOpcode() == ISD::SUB) {
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
- Offset += -C->getZExtValue();
- Op = Op.getOperand(0);
- continue;
- }
- }
-
- // Otherwise, this isn't something we can handle, reject it.
- return;
- }
-
- const GlobalValue *GV = GA->getGlobal();
- // If we require an extra load to get this address, as in PIC mode, we
- // can't accept it.
- if (isGlobalStubReference(Subtarget.classifyGlobalReference(GV)))
- return;
-
- Result = DAG.getTargetGlobalAddress(GV, SDLoc(Op),
- GA->getValueType(0), Offset);
+ if (auto *GA = dyn_cast<GlobalAddressSDNode>(Op))
+ // If we require an extra load to get this address, as in PIC mode, we
+ // can't accept it.
+ if (isGlobalStubReference(
+ Subtarget.classifyGlobalReference(GA->getGlobal())))
+ return;
break;
}
}
@@ -42307,6 +45053,18 @@ static bool isFRClass(const TargetRegisterClass &RC) {
RC.hasSuperClassEq(&X86::VR512RegClass);
}
+/// Check if \p RC is a mask register class.
+/// I.e., VK* or one of their variant.
+static bool isVKClass(const TargetRegisterClass &RC) {
+ return RC.hasSuperClassEq(&X86::VK1RegClass) ||
+ RC.hasSuperClassEq(&X86::VK2RegClass) ||
+ RC.hasSuperClassEq(&X86::VK4RegClass) ||
+ RC.hasSuperClassEq(&X86::VK8RegClass) ||
+ RC.hasSuperClassEq(&X86::VK16RegClass) ||
+ RC.hasSuperClassEq(&X86::VK32RegClass) ||
+ RC.hasSuperClassEq(&X86::VK64RegClass);
+}
+
std::pair<unsigned, const TargetRegisterClass *>
X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
StringRef Constraint,
@@ -42317,25 +45075,31 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
// GCC Constraint Letters
switch (Constraint[0]) {
default: break;
+ // 'A' means [ER]AX + [ER]DX.
+ case 'A':
+ if (Subtarget.is64Bit())
+ return std::make_pair(X86::RAX, &X86::GR64_ADRegClass);
+ assert((Subtarget.is32Bit() || Subtarget.is16Bit()) &&
+ "Expecting 64, 32 or 16 bit subtarget");
+ return std::make_pair(X86::EAX, &X86::GR32_ADRegClass);
+
// TODO: Slight differences here in allocation order and leaving
// RIP in the class. Do they matter any more here than they do
// in the normal allocation?
case 'k':
if (Subtarget.hasAVX512()) {
- // Only supported in AVX512 or later.
- switch (VT.SimpleTy) {
- default: break;
- case MVT::i32:
- return std::make_pair(0U, &X86::VK32RegClass);
- case MVT::i16:
- return std::make_pair(0U, &X86::VK16RegClass);
- case MVT::i8:
- return std::make_pair(0U, &X86::VK8RegClass);
- case MVT::i1:
+ if (VT == MVT::i1)
return std::make_pair(0U, &X86::VK1RegClass);
- case MVT::i64:
+ if (VT == MVT::i8)
+ return std::make_pair(0U, &X86::VK8RegClass);
+ if (VT == MVT::i16)
+ return std::make_pair(0U, &X86::VK16RegClass);
+ }
+ if (Subtarget.hasBWI()) {
+ if (VT == MVT::i32)
+ return std::make_pair(0U, &X86::VK32RegClass);
+ if (VT == MVT::i64)
return std::make_pair(0U, &X86::VK64RegClass);
- }
}
break;
case 'q': // GENERAL_REGS in 64-bit mode, Q_REGS in 32-bit mode.
@@ -42403,7 +45167,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
// Scalar SSE types.
case MVT::f32:
case MVT::i32:
- if (VConstraint && Subtarget.hasAVX512() && Subtarget.hasVLX())
+ if (VConstraint && Subtarget.hasVLX())
return std::make_pair(0U, &X86::FR32XRegClass);
return std::make_pair(0U, &X86::FR32RegClass);
case MVT::f64:
@@ -42431,12 +45195,17 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
case MVT::v4f64:
if (VConstraint && Subtarget.hasVLX())
return std::make_pair(0U, &X86::VR256XRegClass);
- return std::make_pair(0U, &X86::VR256RegClass);
+ if (Subtarget.hasAVX())
+ return std::make_pair(0U, &X86::VR256RegClass);
+ break;
case MVT::v8f64:
case MVT::v16f32:
case MVT::v16i32:
case MVT::v8i64:
- return std::make_pair(0U, &X86::VR512RegClass);
+ if (!Subtarget.hasAVX512()) break;
+ if (VConstraint)
+ return std::make_pair(0U, &X86::VR512RegClass);
+ return std::make_pair(0U, &X86::VR512_0_15RegClass);
}
break;
}
@@ -42457,25 +45226,27 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
return std::make_pair(X86::XMM0, &X86::VR128RegClass);
case 'k':
// This register class doesn't allocate k0 for masked vector operation.
- if (Subtarget.hasAVX512()) { // Only supported in AVX512.
- switch (VT.SimpleTy) {
- default: break;
- case MVT::i32:
- return std::make_pair(0U, &X86::VK32WMRegClass);
- case MVT::i16:
- return std::make_pair(0U, &X86::VK16WMRegClass);
- case MVT::i8:
- return std::make_pair(0U, &X86::VK8WMRegClass);
- case MVT::i1:
+ if (Subtarget.hasAVX512()) {
+ if (VT == MVT::i1)
return std::make_pair(0U, &X86::VK1WMRegClass);
- case MVT::i64:
+ if (VT == MVT::i8)
+ return std::make_pair(0U, &X86::VK8WMRegClass);
+ if (VT == MVT::i16)
+ return std::make_pair(0U, &X86::VK16WMRegClass);
+ }
+ if (Subtarget.hasBWI()) {
+ if (VT == MVT::i32)
+ return std::make_pair(0U, &X86::VK32WMRegClass);
+ if (VT == MVT::i64)
return std::make_pair(0U, &X86::VK64WMRegClass);
- }
}
break;
}
}
+ if (parseConstraintCode(Constraint) != X86::COND_INVALID)
+ return std::make_pair(0U, &X86::GR32RegClass);
+
// Use the default implementation in TargetLowering to convert the register
// constraint into a member of a register class.
std::pair<unsigned, const TargetRegisterClass*> Res;
@@ -42505,14 +45276,14 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
if (StringRef("{flags}").equals_lower(Constraint))
return std::make_pair(X86::EFLAGS, &X86::CCRRegClass);
- // 'A' means [ER]AX + [ER]DX.
- if (Constraint == "A") {
- if (Subtarget.is64Bit())
- return std::make_pair(X86::RAX, &X86::GR64_ADRegClass);
- assert((Subtarget.is32Bit() || Subtarget.is16Bit()) &&
- "Expecting 64, 32 or 16 bit subtarget");
- return std::make_pair(X86::EAX, &X86::GR32_ADRegClass);
- }
+ // dirflag -> DF
+ if (StringRef("{dirflag}").equals_lower(Constraint))
+ return std::make_pair(X86::DF, &X86::DFCCRRegClass);
+
+ // fpsr -> FPSW
+ if (StringRef("{fpsr}").equals_lower(Constraint))
+ return std::make_pair(X86::FPSW, &X86::FPCCRRegClass);
+
return Res;
}
@@ -42561,20 +45332,20 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
if (Size == 64 && !is64Bit) {
// Model GCC's behavior here and select a fixed pair of 32-bit
// registers.
- switch (Res.first) {
- case X86::EAX:
+ switch (DestReg) {
+ case X86::RAX:
return std::make_pair(X86::EAX, &X86::GR32_ADRegClass);
- case X86::EDX:
+ case X86::RDX:
return std::make_pair(X86::EDX, &X86::GR32_DCRegClass);
- case X86::ECX:
+ case X86::RCX:
return std::make_pair(X86::ECX, &X86::GR32_CBRegClass);
- case X86::EBX:
+ case X86::RBX:
return std::make_pair(X86::EBX, &X86::GR32_BSIRegClass);
- case X86::ESI:
+ case X86::RSI:
return std::make_pair(X86::ESI, &X86::GR32_SIDIRegClass);
- case X86::EDI:
+ case X86::RDI:
return std::make_pair(X86::EDI, &X86::GR32_DIBPRegClass);
- case X86::EBP:
+ case X86::RBP:
return std::make_pair(X86::EBP, &X86::GR32_BPSPRegClass);
default:
return std::make_pair(0, nullptr);
@@ -42594,13 +45365,13 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
// TODO: Handle f128 and i128 in FR128RegClass after it is tested well.
if (VT == MVT::f32 || VT == MVT::i32)
- Res.second = &X86::FR32RegClass;
+ Res.second = &X86::FR32XRegClass;
else if (VT == MVT::f64 || VT == MVT::i64)
- Res.second = &X86::FR64RegClass;
- else if (TRI->isTypeLegalForClass(X86::VR128RegClass, VT))
- Res.second = &X86::VR128RegClass;
- else if (TRI->isTypeLegalForClass(X86::VR256RegClass, VT))
- Res.second = &X86::VR256RegClass;
+ Res.second = &X86::FR64XRegClass;
+ else if (TRI->isTypeLegalForClass(X86::VR128XRegClass, VT))
+ Res.second = &X86::VR128XRegClass;
+ else if (TRI->isTypeLegalForClass(X86::VR256XRegClass, VT))
+ Res.second = &X86::VR256XRegClass;
else if (TRI->isTypeLegalForClass(X86::VR512RegClass, VT))
Res.second = &X86::VR512RegClass;
else {
@@ -42608,6 +45379,22 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
Res.first = 0;
Res.second = nullptr;
}
+ } else if (isVKClass(*Class)) {
+ if (VT == MVT::i1)
+ Res.second = &X86::VK1RegClass;
+ else if (VT == MVT::i8)
+ Res.second = &X86::VK8RegClass;
+ else if (VT == MVT::i16)
+ Res.second = &X86::VK16RegClass;
+ else if (VT == MVT::i32)
+ Res.second = &X86::VK32RegClass;
+ else if (VT == MVT::i64)
+ Res.second = &X86::VK64RegClass;
+ else {
+ // Type mismatch and not a clobber: Return an error;
+ Res.first = 0;
+ Res.second = nullptr;
+ }
}
return Res;
@@ -42660,7 +45447,7 @@ void X86TargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
// Update IsSplitCSR in X86MachineFunctionInfo.
X86MachineFunctionInfo *AFI =
- Entry->getParent()->getInfo<X86MachineFunctionInfo>();
+ Entry->getParent()->getInfo<X86MachineFunctionInfo>();
AFI->setIsSplitCSR(true);
}
@@ -42688,9 +45475,9 @@ void X86TargetLowering::insertCopiesSplitCSR(
// fine for CXX_FAST_TLS since the C++-style TLS access functions should be
// nounwind. If we want to generalize this later, we may need to emit
// CFI pseudo-instructions.
- assert(Entry->getParent()->getFunction().hasFnAttribute(
- Attribute::NoUnwind) &&
- "Function should be nounwind in insertCopiesSplitCSR!");
+ assert(
+ Entry->getParent()->getFunction().hasFnAttribute(Attribute::NoUnwind) &&
+ "Function should be nounwind in insertCopiesSplitCSR!");
Entry->addLiveIn(*I);
BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
.addReg(*I);
@@ -42709,7 +45496,8 @@ bool X86TargetLowering::supportSwiftError() const {
/// Returns the name of the symbol used to emit stack probes or the empty
/// string if not applicable.
-StringRef X86TargetLowering::getStackProbeSymbolName(MachineFunction &MF) const {
+StringRef
+X86TargetLowering::getStackProbeSymbolName(MachineFunction &MF) const {
// If the function specifically requests stack probes, emit them.
if (MF.getFunction().hasFnAttribute("probe-stack"))
return MF.getFunction().getFnAttribute("probe-stack").getValueAsString();
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 910acd80e8b8..e0be03bc3f9d 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -1,9 +1,8 @@
//===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -78,15 +77,6 @@ namespace llvm {
/// Same as call except it adds the NoTrack prefix.
NT_CALL,
- /// This operation implements the lowering for readcyclecounter.
- RDTSC_DAG,
-
- /// X86 Read Time-Stamp Counter and Processor ID.
- RDTSCP_DAG,
-
- /// X86 Read Performance Monitoring Counters.
- RDPMC_DAG,
-
/// X86 compare and logical compare instructions.
CMP, COMI, UCOMI,
@@ -110,13 +100,12 @@ namespace llvm {
FSETCC,
/// X86 FP SETCC, similar to above, but with output as an i1 mask and
- /// with optional rounding mode.
- FSETCCM, FSETCCM_RND,
+ /// and a version with SAE.
+ FSETCCM, FSETCCM_SAE,
/// X86 conditional moves. Operand 0 and operand 1 are the two values
/// to select from. Operand 2 is the condition code, and operand 3 is the
- /// flag operand produced by a CMP or TEST instruction. It also writes a
- /// flag result.
+ /// flag operand produced by a CMP or TEST instruction.
CMOV,
/// X86 conditional branches. Operand 0 is the chain operand, operand 1
@@ -204,28 +193,29 @@ namespace llvm {
/// Dynamic (non-constant condition) vector blend where only the sign bits
/// of the condition elements are used. This is used to enforce that the
/// condition mask is not valid for generic VSELECT optimizations. This
- /// can also be used to implement the intrinsics.
+ /// is also used to implement the intrinsics.
+ /// Operands are in VSELECT order: MASK, TRUE, FALSE
BLENDV,
/// Combined add and sub on an FP vector.
ADDSUB,
// FP vector ops with rounding mode.
- FADD_RND, FADDS_RND,
- FSUB_RND, FSUBS_RND,
- FMUL_RND, FMULS_RND,
- FDIV_RND, FDIVS_RND,
- FMAX_RND, FMAXS_RND,
- FMIN_RND, FMINS_RND,
- FSQRT_RND, FSQRTS_RND,
+ FADD_RND, FADDS, FADDS_RND,
+ FSUB_RND, FSUBS, FSUBS_RND,
+ FMUL_RND, FMULS, FMULS_RND,
+ FDIV_RND, FDIVS, FDIVS_RND,
+ FMAX_SAE, FMAXS_SAE,
+ FMIN_SAE, FMINS_SAE,
+ FSQRT_RND, FSQRTS, FSQRTS_RND,
// FP vector get exponent.
- FGETEXP_RND, FGETEXPS_RND,
+ FGETEXP, FGETEXP_SAE, FGETEXPS, FGETEXPS_SAE,
// Extract Normalized Mantissas.
- VGETMANT, VGETMANT_RND, VGETMANTS, VGETMANTS_RND,
+ VGETMANT, VGETMANT_SAE, VGETMANTS, VGETMANTS_SAE,
// FP Scale.
- SCALEF,
- SCALEFS,
+ SCALEF, SCALEF_RND,
+ SCALEFS, SCALEFS_RND,
// Unsigned Integer average.
AVG,
@@ -300,10 +290,10 @@ namespace llvm {
VMTRUNC, VMTRUNCUS, VMTRUNCS,
// Vector FP extend.
- VFPEXT, VFPEXT_RND, VFPEXTS_RND,
+ VFPEXT, VFPEXT_SAE, VFPEXTS, VFPEXTS_SAE,
// Vector FP round.
- VFPROUND, VFPROUND_RND, VFPROUNDS_RND,
+ VFPROUND, VFPROUND_RND, VFPROUNDS, VFPROUNDS_RND,
// Masked version of above. Used for v2f64->v4f32.
// SRC, PASSTHRU, MASK
@@ -315,10 +305,8 @@ namespace llvm {
// Vector shift elements
VSHL, VSRL, VSRA,
- // Vector variable shift right arithmetic.
- // Unlike ISD::SRA, in case shift count greater then element size
- // use sign bit to fill destination data element.
- VSRAV,
+ // Vector variable shift
+ VSHLV, VSRLV, VSRAV,
// Vector shift elements by immediate
VSHLI, VSRLI, VSRAI,
@@ -343,8 +331,8 @@ namespace llvm {
/// Vector comparison generating mask bits for fp and
/// integer signed and unsigned data types.
CMPM,
- // Vector comparison with rounding mode for FP values
- CMPM_RND,
+ // Vector comparison with SAE for FP values
+ CMPM_SAE,
// Arithmetic operations with FLAGS results.
ADD, SUB, ADC, SBB, SMUL, UMUL,
@@ -419,16 +407,16 @@ namespace llvm {
// Bitwise ternary logic.
VPTERNLOG,
// Fix Up Special Packed Float32/64 values.
- VFIXUPIMM,
- VFIXUPIMMS,
+ VFIXUPIMM, VFIXUPIMM_SAE,
+ VFIXUPIMMS, VFIXUPIMMS_SAE,
// Range Restriction Calculation For Packed Pairs of Float32/64 values.
- VRANGE, VRANGE_RND, VRANGES, VRANGES_RND,
+ VRANGE, VRANGE_SAE, VRANGES, VRANGES_SAE,
// Reduce - Perform Reduction Transformation on scalar\packed FP.
- VREDUCE, VREDUCE_RND, VREDUCES, VREDUCES_RND,
+ VREDUCE, VREDUCE_SAE, VREDUCES, VREDUCES_SAE,
// RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
// Also used by the legacy (V)ROUND intrinsics where we mask out the
// scaling part of the immediate.
- VRNDSCALE, VRNDSCALE_RND, VRNDSCALES, VRNDSCALES_RND,
+ VRNDSCALE, VRNDSCALE_SAE, VRNDSCALES, VRNDSCALES_SAE,
// Tests Types Of a FP Values for packed types.
VFPCLASS,
// Tests Types Of a FP Values for scalar types.
@@ -499,6 +487,7 @@ namespace llvm {
// Convert Unsigned/Integer to Floating-Point Value with rounding mode.
SINT_TO_FP_RND, UINT_TO_FP_RND,
+ SCALAR_SINT_TO_FP, SCALAR_UINT_TO_FP,
SCALAR_SINT_TO_FP_RND, SCALAR_UINT_TO_FP_RND,
// Vector float/double to signed/unsigned integer.
@@ -507,9 +496,9 @@ namespace llvm {
CVTS2SI, CVTS2UI, CVTS2SI_RND, CVTS2UI_RND,
// Vector float/double to signed/unsigned integer with truncation.
- CVTTP2SI, CVTTP2UI, CVTTP2SI_RND, CVTTP2UI_RND,
+ CVTTP2SI, CVTTP2UI, CVTTP2SI_SAE, CVTTP2UI_SAE,
// Scalar float/double to signed/unsigned integer with truncation.
- CVTTS2SI, CVTTS2UI, CVTTS2SI_RND, CVTTS2UI_RND,
+ CVTTS2SI, CVTTS2UI, CVTTS2SI_SAE, CVTTS2UI_SAE,
// Vector signed/unsigned integer to float/double.
CVTSI2P, CVTUI2P,
@@ -517,6 +506,20 @@ namespace llvm {
// Masked versions of above. Used for v2f64->v4f32.
// SRC, PASSTHRU, MASK
MCVTP2SI, MCVTP2UI, MCVTTP2SI, MCVTTP2UI,
+ MCVTSI2P, MCVTUI2P,
+
+ // Vector float to bfloat16.
+ // Convert TWO packed single data to one packed BF16 data
+ CVTNE2PS2BF16,
+ // Convert packed single data to packed BF16 data
+ CVTNEPS2BF16,
+ // Masked version of above.
+ // SRC, PASSTHRU, MASK
+ MCVTNEPS2BF16,
+
+ // Dot product of BF16 pairs to accumulated into
+ // packed single precision.
+ DPBF16PS,
// Save xmm argument registers to the stack, according to %al. An operator
// is needed so that this can be expanded with control flow.
@@ -547,6 +550,12 @@ namespace llvm {
// indicate whether it is valid in CF.
RDSEED,
+ // Protection keys
+ // RDPKRU - Operand 0 is chain. Operand 1 is value for ECX.
+ // WRPKRU - Operand 0 is chain. Operand 1 is value for EDX. Operand 2 is
+ // value for ECX.
+ RDPKRU, WRPKRU,
+
// SSE42 string comparisons.
// These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG
// will emit one or two instructions based on which results are used. If
@@ -560,10 +569,11 @@ namespace llvm {
XTEST,
// ERI instructions.
- RSQRT28, RSQRT28S, RCP28, RCP28S, EXP2,
+ RSQRT28, RSQRT28_SAE, RSQRT28S, RSQRT28S_SAE,
+ RCP28, RCP28_SAE, RCP28S, RCP28S_SAE, EXP2, EXP2_SAE,
// Conversions between float and half-float.
- CVTPS2PH, CVTPH2PS, CVTPH2PS_RND,
+ CVTPS2PH, CVTPH2PS, CVTPH2PS_SAE,
// Masked version of above.
// SRC, RND, PASSTHRU, MASK
@@ -578,6 +588,12 @@ namespace llvm {
// User level wait
UMWAIT, TPAUSE,
+ // Enqueue Stores Instructions
+ ENQCMD, ENQCMDS,
+
+ // For avx512-vp2intersect
+ VP2INTERSECT,
+
// Compare and swap.
LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
LCMPXCHG8_DAG,
@@ -592,6 +608,9 @@ namespace llvm {
// Load, scalar_to_vector, and zero extend.
VZEXT_LOAD,
+ // extract_vector_elt, store.
+ VEXTRACT_STORE,
+
// Store FP control world into i16 memory.
FNSTCW16m,
@@ -599,29 +618,33 @@ namespace llvm {
/// integer destination in memory and a FP reg source. This corresponds
/// to the X86::FIST*m instructions and the rounding mode change stuff. It
/// has two inputs (token chain and address) and two outputs (int value
- /// and token chain).
- FP_TO_INT16_IN_MEM,
- FP_TO_INT32_IN_MEM,
- FP_TO_INT64_IN_MEM,
+ /// and token chain). Memory VT specifies the type to store to.
+ FP_TO_INT_IN_MEM,
/// This instruction implements SINT_TO_FP with the
/// integer source in memory and FP reg result. This corresponds to the
- /// X86::FILD*m instructions. It has three inputs (token chain, address,
- /// and source type) and two outputs (FP value and token chain). FILD_FLAG
- /// also produces a flag).
+ /// X86::FILD*m instructions. It has two inputs (token chain and address)
+ /// and two outputs (FP value and token chain). FILD_FLAG also produces a
+ /// flag). The integer source type is specified by the memory VT.
FILD,
FILD_FLAG,
+ /// This instruction implements a fp->int store from FP stack
+ /// slots. This corresponds to the fist instruction. It takes a
+ /// chain operand, value to store, address, and glue. The memory VT
+ /// specifies the type to store as.
+ FIST,
+
/// This instruction implements an extending load to FP stack slots.
/// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
- /// operand, ptr to load from, and a ValueType node indicating the type
- /// to load to.
+ /// operand, and ptr to load from. The memory VT specifies the type to
+ /// load from.
FLD,
- /// This instruction implements a truncating store to FP stack
+ /// This instruction implements a truncating store from FP stack
/// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
- /// chain operand, value to store, address, and a ValueType to store it
- /// as.
+ /// chain operand, value to store, address, and glue. The memory VT
+ /// specifies the type to store as.
FST,
/// This instruction grabs the address of the next argument
@@ -708,7 +731,7 @@ namespace llvm {
/// target-independent logic.
EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
- MachineFunction &MF) const override;
+ const AttributeList &FuncAttributes) const override;
/// Returns true if it's safe to use load / store of the
/// specified type to expand memcpy / memset inline. This is mostly true
@@ -721,7 +744,8 @@ namespace llvm {
/// Returns true if the target allows unaligned memory accesses of the
/// specified type. Returns whether it is "fast" in the last argument.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align,
- bool *Fast) const override;
+ MachineMemOperand::Flags Flags,
+ bool *Fast) const override;
/// Provide custom lowering hooks for some operations.
///
@@ -775,7 +799,11 @@ namespace llvm {
/// This method returns the name of a target specific DAG node.
const char *getTargetNodeName(unsigned Opcode) const override;
- bool mergeStoresAfterLegalization() const override { return true; }
+ /// Do not merge vector stores after legalization because that may conflict
+ /// with x86-specific store splitting optimizations.
+ bool mergeStoresAfterLegalization(EVT MemVT) const override {
+ return !MemVT.isVector();
+ }
bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
const SelectionDAG &DAG) const override;
@@ -812,7 +840,10 @@ namespace llvm {
bool hasAndNot(SDValue Y) const override;
- bool preferShiftsToClearExtremeBits(SDValue Y) const override;
+ bool shouldFoldConstantShiftPairToMask(const SDNode *N,
+ CombineLevel Level) const override;
+
+ bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override;
bool
shouldTransformSignedTruncationCheck(EVT XVT,
@@ -832,6 +863,12 @@ namespace llvm {
return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
}
+ bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override {
+ if (DAG.getMachineFunction().getFunction().hasMinSize())
+ return false;
+ return true;
+ }
+
bool shouldSplatInsEltVarIndex(EVT VT) const override;
bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
@@ -841,11 +878,6 @@ namespace llvm {
/// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST.
MVT hasFastEqualityCompare(unsigned NumBits) const override;
- /// Allow multiple load pairs per block for smaller and faster code.
- unsigned getMemcmpEqZeroLoadsPerBlock() const override {
- return 2;
- }
-
/// Return the value type to use for ISD::SETCC.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
EVT VT) const override;
@@ -881,6 +913,8 @@ namespace llvm {
TargetLoweringOpt &TLO,
unsigned Depth) const override;
+ const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override;
+
SDValue unwrapAddress(SDValue N) const override;
SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
@@ -918,6 +952,11 @@ namespace llvm {
return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
}
+ /// Handle Lowering flag assembly outputs.
+ SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag, SDLoc DL,
+ const AsmOperandInfo &Constraint,
+ SelectionDAG &DAG) const override;
+
/// Given a physical register constraint
/// (e.g. {edx}), return the register number and the register class for the
/// register. This should only be used for C_Register constraints. On
@@ -956,6 +995,12 @@ namespace llvm {
bool isVectorShiftByScalarCheap(Type *Ty) const override;
+ /// Add x86-specific opcodes to the default list.
+ bool isBinOp(unsigned Opcode) const override;
+
+ /// Returns true if the opcode is a commutative binary operation.
+ bool isCommutativeBinOp(unsigned Opcode) const override;
+
/// Return true if it's free to truncate a value of
/// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
/// register EAX to i16 by referencing its sub-register AX.
@@ -1001,7 +1046,8 @@ namespace llvm {
/// Returns true if the target can instruction select the
/// specified FP immediate natively. If false, the legalizer will
/// materialize the FP immediate as a load from a constant pool.
- bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
+ bool isFPImmLegal(const APFloat &Imm, EVT VT,
+ bool ForCodeSize) const override;
/// Targets can use this to indicate that they only support *some*
/// VECTOR_SHUFFLE operations, those with specific masks. By default, if a
@@ -1063,6 +1109,17 @@ namespace llvm {
/// supported.
bool shouldScalarizeBinop(SDValue) const override;
+ /// Extract of a scalar FP value from index 0 of a vector is free.
+ bool isExtractVecEltCheap(EVT VT, unsigned Index) const override {
+ EVT EltVT = VT.getScalarType();
+ return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0;
+ }
+
+ /// Overflow nodes should get combined/lowered to optimal instructions
+ /// (they should allow eliminating explicit compares by getting flags from
+ /// math ops).
+ bool shouldFormOverflowOp(unsigned Opcode, EVT VT) const override;
+
bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem,
unsigned AddrSpace) const override {
// If we can replace more than 2 scalar stores, there will be a reduction
@@ -1070,7 +1127,9 @@ namespace llvm {
return NumElem > 2;
}
- bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT) const override;
+ bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
+ const SelectionDAG &DAG,
+ const MachineMemOperand &MMO) const override;
/// Intel processors have a unified instruction and data cache
const char * getClearCacheBuiltinName() const override {
@@ -1105,7 +1164,7 @@ namespace llvm {
bool useStackGuardXorFP() const override;
void insertSSPDeclarations(Module &M) const override;
Value *getSDagStackGuard(const Module &M) const override;
- Value *getSSPStackGuardCheck(const Module &M) const override;
+ Function *getSSPStackGuardCheck(const Module &M) const override;
SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
const SDLoc &DL) const override;
@@ -1221,9 +1280,7 @@ namespace llvm {
unsigned getAddressSpace(void) const;
- std::pair<SDValue,SDValue> FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
- bool isSigned,
- bool isReplace) const;
+ SDValue FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool isSigned) const;
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
@@ -1234,12 +1291,15 @@ namespace llvm {
const unsigned char OpFlags = 0) const;
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerGlobalAddress(const GlobalValue *GV, const SDLoc &dl,
- int64_t Offset, SelectionDAG &DAG) const;
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
+ /// Creates target global address or external symbol nodes for calls or
+ /// other uses.
+ SDValue LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG,
+ bool ForCall) const;
+
SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
@@ -1568,10 +1628,10 @@ namespace llvm {
void scaleShuffleMask(int Scale, ArrayRef<T> Mask,
SmallVectorImpl<T> &ScaledMask) {
assert(0 < Scale && "Unexpected scaling factor");
- int NumElts = Mask.size();
- ScaledMask.assign(static_cast<size_t>(NumElts * Scale), -1);
+ size_t NumElts = Mask.size();
+ ScaledMask.assign(NumElts * Scale, -1);
- for (int i = 0; i != NumElts; ++i) {
+ for (int i = 0; i != (int)NumElts; ++i) {
int M = Mask[i];
// Repeat sentinel values in every mask element.
diff --git a/lib/Target/X86/X86IndirectBranchTracking.cpp b/lib/Target/X86/X86IndirectBranchTracking.cpp
index 7c00c9260d15..04e8b2231fec 100644
--- a/lib/Target/X86/X86IndirectBranchTracking.cpp
+++ b/lib/Target/X86/X86IndirectBranchTracking.cpp
@@ -1,9 +1,8 @@
//===---- X86IndirectBranchTracking.cpp - Enables CET IBT mechanism -------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -58,7 +57,7 @@ private:
/// The function will not add it if already exists.
/// It will add ENDBR32 or ENDBR64 opcode, depending on the target.
/// \returns true if the ENDBR was added and false otherwise.
- bool addENDBR(MachineBasicBlock &MBB) const;
+ bool addENDBR(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const;
};
} // end anonymous namespace
@@ -69,20 +68,31 @@ FunctionPass *llvm::createX86IndirectBranchTrackingPass() {
return new X86IndirectBranchTrackingPass();
}
-bool X86IndirectBranchTrackingPass::addENDBR(MachineBasicBlock &MBB) const {
+bool X86IndirectBranchTrackingPass::addENDBR(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const {
assert(TII && "Target instruction info was not initialized");
assert((X86::ENDBR64 == EndbrOpcode || X86::ENDBR32 == EndbrOpcode) &&
"Unexpected Endbr opcode");
- auto MI = MBB.begin();
- // If the MBB is empty or the first instruction is not ENDBR,
- // add the ENDBR instruction to the beginning of the MBB.
- if (MI == MBB.end() || EndbrOpcode != MI->getOpcode()) {
- BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(EndbrOpcode));
- NumEndBranchAdded++;
+ // If the MBB/I is empty or the current instruction is not ENDBR,
+ // insert ENDBR instruction to the location of I.
+ if (I == MBB.end() || I->getOpcode() != EndbrOpcode) {
+ BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(EndbrOpcode));
+ ++NumEndBranchAdded;
return true;
}
+ return false;
+}
+bool IsCallReturnTwice(llvm::MachineOperand &MOp) {
+ if (!MOp.isGlobal())
+ return false;
+ auto *CalleeFn = dyn_cast<Function>(MOp.getGlobal());
+ if (!CalleeFn)
+ return false;
+ AttributeList Attrs = CalleeFn->getAttributes();
+ if (Attrs.hasAttribute(AttributeList::FunctionIndex, Attribute::ReturnsTwice))
+ return true;
return false;
}
@@ -108,14 +118,21 @@ bool X86IndirectBranchTrackingPass::runOnMachineFunction(MachineFunction &MF) {
!MF.getFunction().hasLocalLinkage()) &&
!MF.getFunction().doesNoCfCheck()) {
auto MBB = MF.begin();
- Changed |= addENDBR(*MBB);
+ Changed |= addENDBR(*MBB, MBB->begin());
}
- for (auto &MBB : MF)
+ for (auto &MBB : MF) {
// Find all basic blocks that their address was taken (for example
// in the case of indirect jump) and add ENDBR instruction.
if (MBB.hasAddressTaken())
- Changed |= addENDBR(MBB);
-
+ Changed |= addENDBR(MBB, MBB.begin());
+
+ for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) {
+ if (!I->isCall())
+ continue;
+ if (IsCallReturnTwice(I->getOperand(0)))
+ Changed |= addENDBR(MBB, std::next(I));
+ }
+ }
return Changed;
}
diff --git a/lib/Target/X86/X86InsertPrefetch.cpp b/lib/Target/X86/X86InsertPrefetch.cpp
index 30b46a09ef0f..02ae73706a34 100644
--- a/lib/Target/X86/X86InsertPrefetch.cpp
+++ b/lib/Target/X86/X86InsertPrefetch.cpp
@@ -1,9 +1,8 @@
//===------- X86InsertPrefetch.cpp - Insert cache prefetch hints ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -34,7 +33,8 @@ using namespace sampleprof;
static cl::opt<std::string>
PrefetchHintsFile("prefetch-hints-file",
- cl::desc("Path to the prefetch hints profile."),
+ cl::desc("Path to the prefetch hints profile. See also "
+ "-x86-discriminate-memops"),
cl::Hidden);
namespace {
diff --git a/lib/Target/X86/X86Instr3DNow.td b/lib/Target/X86/X86Instr3DNow.td
index 49e9e924887a..cd1b06365971 100644
--- a/lib/Target/X86/X86Instr3DNow.td
+++ b/lib/Target/X86/X86Instr3DNow.td
@@ -1,9 +1,8 @@
//===-- X86Instr3DNow.td - The 3DNow! Instruction Set ------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -74,7 +73,9 @@ defm PFSUBR : I3DNow_binop_rm_int<0xAA, "pfsubr", WriteFAdd, 1>;
defm PI2FD : I3DNow_conv_rm_int<0x0D, "pi2fd", WriteCvtI2PS>;
defm PMULHRW : I3DNow_binop_rm_int<0xB7, "pmulhrw", SchedWriteVecIMul.MMX, 1>;
-let SchedRW = [WriteEMMS] in
+let SchedRW = [WriteEMMS],
+ Defs = [MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+ ST0, ST1, ST2, ST3, ST4, ST5, ST6, ST7] in
def FEMMS : I3DNow<0x0E, RawFrm, (outs), (ins), "femms",
[(int_x86_mmx_femms)]>, TB;
diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td
index 7423cb85acd2..54eddeacaa17 100644
--- a/lib/Target/X86/X86InstrAVX512.td
+++ b/lib/Target/X86/X86InstrAVX512.td
@@ -1,9 +1,8 @@
//===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -27,6 +26,10 @@ class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
// Corresponding mask register class.
RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts);
+ // Corresponding mask register pair class.
+ RegisterOperand KRPC = !if (!gt(NumElts, 16), ?,
+ !cast<RegisterOperand>("VK" # NumElts # "Pair"));
+
// Corresponding write-mask register class.
RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM");
@@ -95,10 +98,7 @@ class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X, FR64X);
- // A vector type of the same width with element type i32. This is used to
- // create the canonical constant zero node ImmAllZerosV.
- ValueType i32VT = !cast<ValueType>("v" # !srl(Size, 5) # "i32");
- dag ImmAllZerosV = (VT (bitconvert (i32VT immAllZerosV)));
+ dag ImmAllZerosV = (VT immAllZerosV);
string ZSuffix = !if (!eq (Size, 128), "Z128",
!if (!eq (Size, 256), "Z256", "Z"));
@@ -277,10 +277,9 @@ multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
dag Outs, dag Ins, string OpcodeStr,
string AttSrcAsm, string IntelSrcAsm,
- dag RHS,
- bit IsCommutable = 0> :
+ dag RHS> :
AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm,
- RHS, IsCommutable, 0, IsCommutable, X86selects>;
+ RHS, 0, 0, 0, X86selects>;
// Similar to AVX512_maskable but in this case one of the source operands
// ($src1) is already tied to $dst so we just use that for the preserved
@@ -365,7 +364,7 @@ multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F,
list<dag> Pattern,
list<dag> MaskingPattern,
bit IsCommutable = 0> {
- let isCommutable = IsCommutable in
+ let isCommutable = IsCommutable in {
def NAME: AVX512<O, F, Outs, Ins,
OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
"$dst, "#IntelSrcAsm#"}",
@@ -375,6 +374,7 @@ multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F,
OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
"$dst {${mask}}, "#IntelSrcAsm#"}",
MaskingPattern>, EVEX_K;
+ }
}
multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _,
@@ -392,38 +392,11 @@ multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _,
multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _,
dag Outs, dag Ins, string OpcodeStr,
string AttSrcAsm, string IntelSrcAsm,
- dag RHS, bit IsCommutable = 0> :
+ dag RHS, dag RHS_su, bit IsCommutable = 0> :
AVX512_maskable_common_cmp<O, F, _, Outs, Ins,
!con((ins _.KRCWM:$mask), Ins),
OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
- (and _.KRCWM:$mask, RHS), IsCommutable>;
-
-multiclass AVX512_maskable_cmp_alt<bits<8> O, Format F, X86VectorVTInfo _,
- dag Outs, dag Ins, string OpcodeStr,
- string AttSrcAsm, string IntelSrcAsm> :
- AVX512_maskable_custom_cmp<O, F, Outs,
- Ins, !con((ins _.KRCWM:$mask),Ins), OpcodeStr,
- AttSrcAsm, IntelSrcAsm, [], []>;
-
-// This multiclass generates the unconditional/non-masking, the masking and
-// the zero-masking variant of the vector instruction. In the masking case, the
-// perserved vector elements come from a new dummy input operand tied to $dst.
-multiclass AVX512_maskable_logic<bits<8> O, Format F, X86VectorVTInfo _,
- dag Outs, dag Ins, string OpcodeStr,
- string AttSrcAsm, string IntelSrcAsm,
- dag RHS, dag MaskedRHS,
- bit IsCommutable = 0, SDNode Select = vselect> :
- AVX512_maskable_custom<O, F, Outs, Ins,
- !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
- !con((ins _.KRCWM:$mask), Ins),
- OpcodeStr, AttSrcAsm, IntelSrcAsm,
- [(set _.RC:$dst, RHS)],
- [(set _.RC:$dst,
- (Select _.KRCWM:$mask, MaskedRHS, _.RC:$src0))],
- [(set _.RC:$dst,
- (Select _.KRCWM:$mask, MaskedRHS,
- _.ImmAllZerosV))],
- "$src0 = $dst", IsCommutable>;
+ (and _.KRCWM:$mask, RHS_su), IsCommutable>;
// Alias instruction that maps zero vector to pxor / xorp* for AVX-512.
@@ -451,8 +424,8 @@ def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst),
def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst),
(ins VK8WM:$mask), "",
[(set VR512:$dst, (vselect (v8i1 VK8WM:$mask),
- (bc_v8i64 (v16i32 immAllOnesV)),
- (bc_v8i64 (v16i32 immAllZerosV))))]>;
+ (v8i64 immAllOnesV),
+ (v8i64 immAllZerosV)))]>;
}
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
@@ -753,6 +726,7 @@ defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
// vinsertps - insert f32 to XMM
let ExeDomain = SSEPackedSingle in {
+let isCommutable = 1 in
def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
(ins VR128X:$src1, VR128X:$src2, u8imm:$src3),
"vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
@@ -1378,15 +1352,15 @@ multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
let Predicates = [HasAVX512] in {
// 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
- def : Pat<(v8i64 (X86VBroadcast (v8i64 (X86vzload addr:$src)))),
+ def : Pat<(v8i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))),
(VPBROADCASTQZm addr:$src)>;
}
let Predicates = [HasVLX] in {
// 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
- def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload addr:$src)))),
+ def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))),
(VPBROADCASTQZ128m addr:$src)>;
- def : Pat<(v4i64 (X86VBroadcast (v4i64 (X86vzload addr:$src)))),
+ def : Pat<(v4i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))),
(VPBROADCASTQZ256m addr:$src)>;
}
let Predicates = [HasVLX, HasBWI] in {
@@ -1397,12 +1371,30 @@ let Predicates = [HasVLX, HasBWI] in {
def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
(VPBROADCASTWZ256m addr:$src)>;
def : Pat<(v8i16 (X86VBroadcast
+ (i16 (trunc (i32 (extloadi16 addr:$src)))))),
+ (VPBROADCASTWZ128m addr:$src)>;
+ def : Pat<(v8i16 (X86VBroadcast
(i16 (trunc (i32 (zextloadi16 addr:$src)))))),
(VPBROADCASTWZ128m addr:$src)>;
def : Pat<(v16i16 (X86VBroadcast
+ (i16 (trunc (i32 (extloadi16 addr:$src)))))),
+ (VPBROADCASTWZ256m addr:$src)>;
+ def : Pat<(v16i16 (X86VBroadcast
(i16 (trunc (i32 (zextloadi16 addr:$src)))))),
(VPBROADCASTWZ256m addr:$src)>;
}
+let Predicates = [HasBWI] in {
+ // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
+ // This means we'll encounter truncated i32 loads; match that here.
+ def : Pat<(v32i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
+ (VPBROADCASTWZm addr:$src)>;
+ def : Pat<(v32i16 (X86VBroadcast
+ (i16 (trunc (i32 (extloadi16 addr:$src)))))),
+ (VPBROADCASTWZm addr:$src)>;
+ def : Pat<(v32i16 (X86VBroadcast
+ (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
+ (VPBROADCASTWZm addr:$src)>;
+}
//===----------------------------------------------------------------------===//
// AVX-512 BROADCAST SUBVECTORS
@@ -1464,7 +1456,7 @@ def : Pat<(v64i8 (X86SubVBroadcast (loadv16i8 addr:$src))),
// Patterns for selects of bitcasted operations.
def : Pat<(vselect VK16WM:$mask,
(bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
- (bc_v16f32 (v16i32 immAllZerosV))),
+ (v16f32 immAllZerosV)),
(VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>;
def : Pat<(vselect VK16WM:$mask,
(bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
@@ -1481,7 +1473,7 @@ def : Pat<(vselect VK16WM:$mask,
def : Pat<(vselect VK8WM:$mask,
(bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))),
- (bc_v8f64 (v16i32 immAllZerosV))),
+ (v8f64 immAllZerosV)),
(VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>;
def : Pat<(vselect VK8WM:$mask,
(bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))),
@@ -1489,7 +1481,7 @@ def : Pat<(vselect VK8WM:$mask,
(VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
def : Pat<(vselect VK8WM:$mask,
(bc_v8i64 (v16i32 (X86SubVBroadcast (loadv8i32 addr:$src)))),
- (bc_v8i64 (v16i32 immAllZerosV))),
+ (v8i64 immAllZerosV)),
(VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>;
def : Pat<(vselect VK8WM:$mask,
(bc_v8i64 (v16i32 (X86SubVBroadcast (loadv8i32 addr:$src)))),
@@ -1517,7 +1509,7 @@ def : Pat<(v32i8 (X86SubVBroadcast (loadv16i8 addr:$src))),
// Patterns for selects of bitcasted operations.
def : Pat<(vselect VK8WM:$mask,
(bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
- (bc_v8f32 (v8i32 immAllZerosV))),
+ (v8f32 immAllZerosV)),
(VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>;
def : Pat<(vselect VK8WM:$mask,
(bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
@@ -1566,7 +1558,7 @@ defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2"
// Patterns for selects of bitcasted operations.
def : Pat<(vselect VK4WM:$mask,
(bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
- (bc_v4f64 (v8i32 immAllZerosV))),
+ (v4f64 immAllZerosV)),
(VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>;
def : Pat<(vselect VK4WM:$mask,
(bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
@@ -1574,7 +1566,7 @@ def : Pat<(vselect VK4WM:$mask,
(VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
def : Pat<(vselect VK4WM:$mask,
(bc_v4i64 (v8i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
- (bc_v4i64 (v8i32 immAllZerosV))),
+ (v4i64 immAllZerosV)),
(VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>;
def : Pat<(vselect VK4WM:$mask,
(bc_v4i64 (v8i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
@@ -1599,7 +1591,7 @@ defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8",
// Patterns for selects of bitcasted operations.
def : Pat<(vselect VK16WM:$mask,
(bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))),
- (bc_v16f32 (v16i32 immAllZerosV))),
+ (v16f32 immAllZerosV)),
(VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>;
def : Pat<(vselect VK16WM:$mask,
(bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))),
@@ -1616,7 +1608,7 @@ def : Pat<(vselect VK16WM:$mask,
def : Pat<(vselect VK8WM:$mask,
(bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
- (bc_v8f64 (v16i32 immAllZerosV))),
+ (v8f64 immAllZerosV)),
(VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>;
def : Pat<(vselect VK8WM:$mask,
(bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
@@ -1624,7 +1616,7 @@ def : Pat<(vselect VK8WM:$mask,
(VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
def : Pat<(vselect VK8WM:$mask,
(bc_v8i64 (v16i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
- (bc_v8i64 (v16i32 immAllZerosV))),
+ (v8i64 immAllZerosV)),
(VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>;
def : Pat<(vselect VK8WM:$mask,
(bc_v8i64 (v16i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
@@ -2031,96 +2023,86 @@ defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", SchedWriteVarBlend,
// avx512_cmp_scalar - AVX512 CMPSS and CMPSD
-multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeRnd,
+multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
+ PatFrag OpNode_su, PatFrag OpNodeSAE_su,
X86FoldableSchedWrite sched> {
defm rr_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
(outs _.KRC:$dst),
- (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
- "vcmp${cc}"#_.Suffix,
- "$src2, $src1", "$src1, $src2",
- (OpNode (_.VT _.RC:$src1),
- (_.VT _.RC:$src2),
- imm:$cc)>, EVEX_4V, Sched<[sched]>;
+ (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
+ "vcmp"#_.Suffix,
+ "$cc, $src2, $src1", "$src1, $src2, $cc",
+ (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc),
+ (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
+ imm:$cc)>, EVEX_4V, VEX_LIG, Sched<[sched]>;
let mayLoad = 1 in
defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
(outs _.KRC:$dst),
- (ins _.RC:$src1, _.IntScalarMemOp:$src2, AVXCC:$cc),
- "vcmp${cc}"#_.Suffix,
- "$src2, $src1", "$src1, $src2",
+ (ins _.RC:$src1, _.IntScalarMemOp:$src2, u8imm:$cc),
+ "vcmp"#_.Suffix,
+ "$cc, $src2, $src1", "$src1, $src2, $cc",
(OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
- imm:$cc)>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>,
+ imm:$cc),
+ (OpNode_su (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
+ imm:$cc)>, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rrb_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
(outs _.KRC:$dst),
- (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
- "vcmp${cc}"#_.Suffix,
- "{sae}, $src2, $src1", "$src1, $src2, {sae}",
- (OpNodeRnd (_.VT _.RC:$src1),
- (_.VT _.RC:$src2),
- imm:$cc,
- (i32 FROUND_NO_EXC))>,
- EVEX_4V, EVEX_B, Sched<[sched]>;
- // Accept explicit immediate argument form instead of comparison code.
- let isAsmParserOnly = 1, hasSideEffects = 0 in {
- defm rri_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
- (outs VK1:$dst),
- (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
- "vcmp"#_.Suffix,
- "$cc, $src2, $src1", "$src1, $src2, $cc">, EVEX_4V,
- Sched<[sched]>, NotMemoryFoldable;
- let mayLoad = 1 in
- defm rmi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
- (outs _.KRC:$dst),
- (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
- "vcmp"#_.Suffix,
- "$cc, $src2, $src1", "$src1, $src2, $cc">,
- EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>,
- Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
-
- defm rrb_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
- (outs _.KRC:$dst),
- (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
- "vcmp"#_.Suffix,
- "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc">,
- EVEX_4V, EVEX_B, Sched<[sched]>, NotMemoryFoldable;
- }// let isAsmParserOnly = 1, hasSideEffects = 0
+ (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
+ "vcmp"#_.Suffix,
+ "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc",
+ (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
+ imm:$cc),
+ (OpNodeSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
+ imm:$cc)>,
+ EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
let isCodeGenOnly = 1 in {
let isCommutable = 1 in
def rr : AVX512Ii8<0xC2, MRMSrcReg,
- (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, AVXCC:$cc),
- !strconcat("vcmp${cc}", _.Suffix,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, u8imm:$cc),
+ !strconcat("vcmp", _.Suffix,
+ "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
[(set _.KRC:$dst, (OpNode _.FRC:$src1,
_.FRC:$src2,
imm:$cc))]>,
- EVEX_4V, Sched<[sched]>;
+ EVEX_4V, VEX_LIG, Sched<[sched]>;
def rm : AVX512Ii8<0xC2, MRMSrcMem,
(outs _.KRC:$dst),
- (ins _.FRC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc),
- !strconcat("vcmp${cc}", _.Suffix,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ (ins _.FRC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
+ !strconcat("vcmp", _.Suffix,
+ "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
[(set _.KRC:$dst, (OpNode _.FRC:$src1,
(_.ScalarLdFrag addr:$src2),
imm:$cc))]>,
- EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>,
+ EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
+def X86cmpms_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
+ (X86cmpms node:$src1, node:$src2, node:$cc), [{
+ return N->hasOneUse();
+}]>;
+def X86cmpmsSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
+ (X86cmpmsSAE node:$src1, node:$src2, node:$cc), [{
+ return N->hasOneUse();
+}]>;
+
let Predicates = [HasAVX512] in {
let ExeDomain = SSEPackedSingle in
- defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsRnd,
+ defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsSAE,
+ X86cmpms_su, X86cmpmsSAE_su,
SchedWriteFCmp.Scl>, AVX512XSIi8Base;
let ExeDomain = SSEPackedDouble in
- defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsRnd,
+ defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsSAE,
+ X86cmpms_su, X86cmpmsSAE_su,
SchedWriteFCmp.Scl>, AVX512XDIi8Base, VEX_W;
}
multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, PatFrag OpNode,
- X86FoldableSchedWrite sched, X86VectorVTInfo _,
- bit IsCommutable> {
+ PatFrag OpNode_su, X86FoldableSchedWrite sched,
+ X86VectorVTInfo _, bit IsCommutable> {
let isCommutable = IsCommutable in
def rr : AVX512BI<opc, MRMSrcReg,
(outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
@@ -2139,22 +2121,23 @@ multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, PatFrag OpNode,
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
"$dst {${mask}}, $src1, $src2}"),
[(set _.KRC:$dst, (and _.KRCWM:$mask,
- (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))))]>,
+ (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2))))]>,
EVEX_4V, EVEX_K, Sched<[sched]>;
def rmk : AVX512BI<opc, MRMSrcMem,
(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
"$dst {${mask}}, $src1, $src2}"),
[(set _.KRC:$dst, (and _.KRCWM:$mask,
- (OpNode (_.VT _.RC:$src1),
+ (OpNode_su (_.VT _.RC:$src1),
(_.VT (_.LdFrag addr:$src2)))))]>,
EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, PatFrag OpNode,
+ PatFrag OpNode_su,
X86FoldableSchedWrite sched, X86VectorVTInfo _,
bit IsCommutable> :
- avx512_icmp_packed<opc, OpcodeStr, OpNode, sched, _, IsCommutable> {
+ avx512_icmp_packed<opc, OpcodeStr, OpNode, OpNode_su, sched, _, IsCommutable> {
def rmb : AVX512BI<opc, MRMSrcMem,
(outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
!strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
@@ -2169,7 +2152,7 @@ multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, PatFrag OpNode,
"\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
"$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
[(set _.KRC:$dst, (and _.KRCWM:$mask,
- (OpNode (_.VT _.RC:$src1),
+ (OpNode_su (_.VT _.RC:$src1),
(X86VBroadcast
(_.ScalarLdFrag addr:$src2)))))]>,
EVEX_4V, EVEX_K, EVEX_B,
@@ -2177,33 +2160,34 @@ multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, PatFrag OpNode,
}
multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, PatFrag OpNode,
- X86SchedWriteWidths sched,
+ PatFrag OpNode_su, X86SchedWriteWidths sched,
AVX512VLVectorVTInfo VTInfo, Predicate prd,
bit IsCommutable = 0> {
let Predicates = [prd] in
- defm Z : avx512_icmp_packed<opc, OpcodeStr, OpNode, sched.ZMM,
+ defm Z : avx512_icmp_packed<opc, OpcodeStr, OpNode, OpNode_su, sched.ZMM,
VTInfo.info512, IsCommutable>, EVEX_V512;
let Predicates = [prd, HasVLX] in {
- defm Z256 : avx512_icmp_packed<opc, OpcodeStr, OpNode, sched.YMM,
+ defm Z256 : avx512_icmp_packed<opc, OpcodeStr, OpNode, OpNode_su, sched.YMM,
VTInfo.info256, IsCommutable>, EVEX_V256;
- defm Z128 : avx512_icmp_packed<opc, OpcodeStr, OpNode, sched.XMM,
+ defm Z128 : avx512_icmp_packed<opc, OpcodeStr, OpNode, OpNode_su, sched.XMM,
VTInfo.info128, IsCommutable>, EVEX_V128;
}
}
multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
- PatFrag OpNode, X86SchedWriteWidths sched,
+ PatFrag OpNode, PatFrag OpNode_su,
+ X86SchedWriteWidths sched,
AVX512VLVectorVTInfo VTInfo,
Predicate prd, bit IsCommutable = 0> {
let Predicates = [prd] in
- defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, sched.ZMM,
+ defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, OpNode_su, sched.ZMM,
VTInfo.info512, IsCommutable>, EVEX_V512;
let Predicates = [prd, HasVLX] in {
- defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, sched.YMM,
+ defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, OpNode_su, sched.YMM,
VTInfo.info256, IsCommutable>, EVEX_V256;
- defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, sched.XMM,
+ defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, OpNode_su, sched.XMM,
VTInfo.info128, IsCommutable>, EVEX_V128;
}
}
@@ -2216,59 +2200,69 @@ def X86pcmpeqm_c : PatFrag<(ops node:$src1, node:$src2),
def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2),
(setcc node:$src1, node:$src2, SETGT)>;
+def X86pcmpeqm_c_su : PatFrag<(ops node:$src1, node:$src2),
+ (X86pcmpeqm_c node:$src1, node:$src2), [{
+ return N->hasOneUse();
+}]>;
+def X86pcmpgtm_su : PatFrag<(ops node:$src1, node:$src2),
+ (X86pcmpgtm node:$src1, node:$src2), [{
+ return N->hasOneUse();
+}]>;
+
// AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
// increase the pattern complexity the way an immediate would.
let AddedComplexity = 2 in {
// FIXME: Is there a better scheduler class for VPCMP?
-defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", X86pcmpeqm_c,
+defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", X86pcmpeqm_c, X86pcmpeqm_c_su,
SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>,
EVEX_CD8<8, CD8VF>, VEX_WIG;
-defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", X86pcmpeqm_c,
+defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", X86pcmpeqm_c, X86pcmpeqm_c_su,
SchedWriteVecALU, avx512vl_i16_info, HasBWI, 1>,
EVEX_CD8<16, CD8VF>, VEX_WIG;
-defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", X86pcmpeqm_c,
+defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", X86pcmpeqm_c, X86pcmpeqm_c_su,
SchedWriteVecALU, avx512vl_i32_info, HasAVX512, 1>,
EVEX_CD8<32, CD8VF>;
-defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", X86pcmpeqm_c,
+defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", X86pcmpeqm_c, X86pcmpeqm_c_su,
SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>,
T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
-defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", X86pcmpgtm,
+defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", X86pcmpgtm, X86pcmpgtm_su,
SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
EVEX_CD8<8, CD8VF>, VEX_WIG;
-defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", X86pcmpgtm,
+defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", X86pcmpgtm, X86pcmpgtm_su,
SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
EVEX_CD8<16, CD8VF>, VEX_WIG;
-defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", X86pcmpgtm,
+defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", X86pcmpgtm, X86pcmpgtm_su,
SchedWriteVecALU, avx512vl_i32_info, HasAVX512>,
EVEX_CD8<32, CD8VF>;
-defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", X86pcmpgtm,
+defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", X86pcmpgtm, X86pcmpgtm_su,
SchedWriteVecALU, avx512vl_i64_info, HasAVX512>,
T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
}
multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag,
- PatFrag CommFrag, X86FoldableSchedWrite sched,
+ PatFrag Frag_su, PatFrag CommFrag, PatFrag CommFrag_su,
+ X86FoldableSchedWrite sched,
X86VectorVTInfo _, string Name> {
let isCommutable = 1 in
def rri : AVX512AIi8<opc, MRMSrcReg,
- (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, AVX512ICC:$cc),
- !strconcat("vpcmp${cc}", Suffix,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
+ !strconcat("vpcmp", Suffix,
+ "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
[(set _.KRC:$dst, (_.KVT (Frag:$cc (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
cond)))]>,
EVEX_4V, Sched<[sched]>;
def rmi : AVX512AIi8<opc, MRMSrcMem,
- (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, AVX512ICC:$cc),
- !strconcat("vpcmp${cc}", Suffix,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
+ !strconcat("vpcmp", Suffix,
+ "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
[(set _.KRC:$dst, (_.KVT
(Frag:$cc
(_.VT _.RC:$src1),
@@ -2278,67 +2272,36 @@ multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag,
let isCommutable = 1 in
def rrik : AVX512AIi8<opc, MRMSrcReg,
(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
- AVX512ICC:$cc),
- !strconcat("vpcmp${cc}", Suffix,
- "\t{$src2, $src1, $dst {${mask}}|",
- "$dst {${mask}}, $src1, $src2}"),
+ u8imm:$cc),
+ !strconcat("vpcmp", Suffix,
+ "\t{$cc, $src2, $src1, $dst {${mask}}|",
+ "$dst {${mask}}, $src1, $src2, $cc}"),
[(set _.KRC:$dst, (and _.KRCWM:$mask,
- (_.KVT (Frag:$cc (_.VT _.RC:$src1),
- (_.VT _.RC:$src2),
- cond))))]>,
+ (_.KVT (Frag_su:$cc (_.VT _.RC:$src1),
+ (_.VT _.RC:$src2),
+ cond))))]>,
EVEX_4V, EVEX_K, Sched<[sched]>;
def rmik : AVX512AIi8<opc, MRMSrcMem,
(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
- AVX512ICC:$cc),
- !strconcat("vpcmp${cc}", Suffix,
- "\t{$src2, $src1, $dst {${mask}}|",
- "$dst {${mask}}, $src1, $src2}"),
+ u8imm:$cc),
+ !strconcat("vpcmp", Suffix,
+ "\t{$cc, $src2, $src1, $dst {${mask}}|",
+ "$dst {${mask}}, $src1, $src2, $cc}"),
[(set _.KRC:$dst, (and _.KRCWM:$mask,
(_.KVT
- (Frag:$cc
+ (Frag_su:$cc
(_.VT _.RC:$src1),
(_.VT (_.LdFrag addr:$src2)),
cond))))]>,
EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
- // Accept explicit immediate argument form instead of comparison code.
- let isAsmParserOnly = 1, hasSideEffects = 0 in {
- def rri_alt : AVX512AIi8<opc, MRMSrcReg,
- (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
- !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
- "$dst, $src1, $src2, $cc}"), []>,
- EVEX_4V, Sched<[sched]>, NotMemoryFoldable;
- let mayLoad = 1 in
- def rmi_alt : AVX512AIi8<opc, MRMSrcMem,
- (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
- !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
- "$dst, $src1, $src2, $cc}"), []>,
- EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
- def rrik_alt : AVX512AIi8<opc, MRMSrcReg,
- (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
- u8imm:$cc),
- !strconcat("vpcmp", Suffix,
- "\t{$cc, $src2, $src1, $dst {${mask}}|",
- "$dst {${mask}}, $src1, $src2, $cc}"), []>,
- EVEX_4V, EVEX_K, Sched<[sched]>, NotMemoryFoldable;
- let mayLoad = 1 in
- def rmik_alt : AVX512AIi8<opc, MRMSrcMem,
- (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
- u8imm:$cc),
- !strconcat("vpcmp", Suffix,
- "\t{$cc, $src2, $src1, $dst {${mask}}|",
- "$dst {${mask}}, $src1, $src2, $cc}"), []>,
- EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>,
- NotMemoryFoldable;
- }
-
def : Pat<(_.KVT (CommFrag:$cc (_.LdFrag addr:$src2),
(_.VT _.RC:$src1), cond)),
(!cast<Instruction>(Name#_.ZSuffix#"rmi")
_.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>;
def : Pat<(and _.KRCWM:$mask,
- (_.KVT (CommFrag:$cc (_.LdFrag addr:$src2),
+ (_.KVT (CommFrag_su:$cc (_.LdFrag addr:$src2),
(_.VT _.RC:$src1), cond))),
(!cast<Instruction>(Name#_.ZSuffix#"rmik")
_.KRCWM:$mask, _.RC:$src1, addr:$src2,
@@ -2346,15 +2309,17 @@ multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag,
}
multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag,
- PatFrag CommFrag, X86FoldableSchedWrite sched,
+ PatFrag Frag_su, PatFrag CommFrag,
+ PatFrag CommFrag_su, X86FoldableSchedWrite sched,
X86VectorVTInfo _, string Name> :
- avx512_icmp_cc<opc, Suffix, Frag, CommFrag, sched, _, Name> {
+ avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
+ sched, _, Name> {
def rmib : AVX512AIi8<opc, MRMSrcMem,
(outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
- AVX512ICC:$cc),
- !strconcat("vpcmp${cc}", Suffix,
- "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
- "$dst, $src1, ${src2}", _.BroadcastStr, "}"),
+ u8imm:$cc),
+ !strconcat("vpcmp", Suffix,
+ "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
+ "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
[(set _.KRC:$dst, (_.KVT (Frag:$cc
(_.VT _.RC:$src1),
(X86VBroadcast
@@ -2363,45 +2328,25 @@ multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag,
EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
def rmibk : AVX512AIi8<opc, MRMSrcMem,
(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
- _.ScalarMemOp:$src2, AVX512ICC:$cc),
- !strconcat("vpcmp${cc}", Suffix,
- "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
- "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
+ _.ScalarMemOp:$src2, u8imm:$cc),
+ !strconcat("vpcmp", Suffix,
+ "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
+ "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
[(set _.KRC:$dst, (and _.KRCWM:$mask,
- (_.KVT (Frag:$cc
+ (_.KVT (Frag_su:$cc
(_.VT _.RC:$src1),
(X86VBroadcast
(_.ScalarLdFrag addr:$src2)),
cond))))]>,
EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
- // Accept explicit immediate argument form instead of comparison code.
- let isAsmParserOnly = 1, hasSideEffects = 0, mayLoad = 1 in {
- def rmib_alt : AVX512AIi8<opc, MRMSrcMem,
- (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
- u8imm:$cc),
- !strconcat("vpcmp", Suffix,
- "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
- "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"), []>,
- EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>,
- NotMemoryFoldable;
- def rmibk_alt : AVX512AIi8<opc, MRMSrcMem,
- (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
- _.ScalarMemOp:$src2, u8imm:$cc),
- !strconcat("vpcmp", Suffix,
- "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
- "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"), []>,
- EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>,
- NotMemoryFoldable;
- }
-
def : Pat<(_.KVT (CommFrag:$cc (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
(_.VT _.RC:$src1), cond)),
(!cast<Instruction>(Name#_.ZSuffix#"rmib")
_.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>;
def : Pat<(and _.KRCWM:$mask,
- (_.KVT (CommFrag:$cc (X86VBroadcast
+ (_.KVT (CommFrag_su:$cc (X86VBroadcast
(_.ScalarLdFrag addr:$src2)),
(_.VT _.RC:$src1), cond))),
(!cast<Instruction>(Name#_.ZSuffix#"rmibk")
@@ -2410,32 +2355,34 @@ multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag,
}
multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, PatFrag Frag,
- PatFrag CommFrag, X86SchedWriteWidths sched,
+ PatFrag Frag_su, PatFrag CommFrag,
+ PatFrag CommFrag_su, X86SchedWriteWidths sched,
AVX512VLVectorVTInfo VTInfo, Predicate prd> {
let Predicates = [prd] in
- defm Z : avx512_icmp_cc<opc, Suffix, Frag, CommFrag, sched.ZMM,
- VTInfo.info512, NAME>, EVEX_V512;
+ defm Z : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
+ sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
let Predicates = [prd, HasVLX] in {
- defm Z256 : avx512_icmp_cc<opc, Suffix, Frag, CommFrag, sched.YMM,
- VTInfo.info256, NAME>, EVEX_V256;
- defm Z128 : avx512_icmp_cc<opc, Suffix, Frag, CommFrag, sched.XMM,
- VTInfo.info128, NAME>, EVEX_V128;
+ defm Z256 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
+ sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
+ defm Z128 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
+ sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
}
}
multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, PatFrag Frag,
- PatFrag CommFrag, X86SchedWriteWidths sched,
+ PatFrag Frag_su, PatFrag CommFrag,
+ PatFrag CommFrag_su, X86SchedWriteWidths sched,
AVX512VLVectorVTInfo VTInfo, Predicate prd> {
let Predicates = [prd] in
- defm Z : avx512_icmp_cc_rmb<opc, Suffix, Frag, CommFrag, sched.ZMM,
- VTInfo.info512, NAME>, EVEX_V512;
+ defm Z : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
+ sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
let Predicates = [prd, HasVLX] in {
- defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, Frag, CommFrag, sched.YMM,
- VTInfo.info256, NAME>, EVEX_V256;
- defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, Frag, CommFrag, sched.XMM,
- VTInfo.info128, NAME>, EVEX_V128;
+ defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
+ sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
+ defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
+ sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
}
}
@@ -2459,6 +2406,12 @@ def X86pcmpm : PatFrag<(ops node:$src1, node:$src2, node:$cc),
return !ISD::isUnsignedIntSetCC(CC);
}], X86pcmpm_imm>;
+def X86pcmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
+ (setcc node:$src1, node:$src2, node:$cc), [{
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ return N->hasOneUse() && !ISD::isUnsignedIntSetCC(CC);
+}], X86pcmpm_imm>;
+
// Same as above, but commutes immediate. Use for load folding.
def X86pcmpm_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc),
(setcc node:$src1, node:$src2, node:$cc), [{
@@ -2466,12 +2419,24 @@ def X86pcmpm_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc),
return !ISD::isUnsignedIntSetCC(CC);
}], X86pcmpm_imm_commute>;
+def X86pcmpm_commute_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
+ (setcc node:$src1, node:$src2, node:$cc), [{
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ return N->hasOneUse() && !ISD::isUnsignedIntSetCC(CC);
+}], X86pcmpm_imm_commute>;
+
def X86pcmpum : PatFrag<(ops node:$src1, node:$src2, node:$cc),
(setcc node:$src1, node:$src2, node:$cc), [{
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
return ISD::isUnsignedIntSetCC(CC);
}], X86pcmpm_imm>;
+def X86pcmpum_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
+ (setcc node:$src1, node:$src2, node:$cc), [{
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ return N->hasOneUse() && ISD::isUnsignedIntSetCC(CC);
+}], X86pcmpm_imm>;
+
// Same as above, but commutes immediate. Use for load folding.
def X86pcmpum_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc),
(setcc node:$src1, node:$src2, node:$cc), [{
@@ -2479,93 +2444,91 @@ def X86pcmpum_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc),
return ISD::isUnsignedIntSetCC(CC);
}], X86pcmpm_imm_commute>;
+def X86pcmpum_commute_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
+ (setcc node:$src1, node:$src2, node:$cc), [{
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ return N->hasOneUse() && ISD::isUnsignedIntSetCC(CC);
+}], X86pcmpm_imm_commute>;
+
// FIXME: Is there a better scheduler class for VPCMP/VPCMPU?
-defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86pcmpm, X86pcmpm_commute,
+defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86pcmpm, X86pcmpm_su,
+ X86pcmpm_commute, X86pcmpm_commute_su,
SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
EVEX_CD8<8, CD8VF>;
-defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86pcmpum, X86pcmpum_commute,
+defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86pcmpum, X86pcmpum_su,
+ X86pcmpum_commute, X86pcmpum_commute_su,
SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
EVEX_CD8<8, CD8VF>;
-defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86pcmpm, X86pcmpm_commute,
+defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86pcmpm, X86pcmpm_su,
+ X86pcmpm_commute, X86pcmpm_commute_su,
SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
VEX_W, EVEX_CD8<16, CD8VF>;
-defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86pcmpum, X86pcmpum_commute,
+defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86pcmpum, X86pcmpum_su,
+ X86pcmpum_commute, X86pcmpum_commute_su,
SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
VEX_W, EVEX_CD8<16, CD8VF>;
-defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86pcmpm, X86pcmpm_commute,
+defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86pcmpm, X86pcmpm_su,
+ X86pcmpm_commute, X86pcmpm_commute_su,
SchedWriteVecALU, avx512vl_i32_info,
HasAVX512>, EVEX_CD8<32, CD8VF>;
-defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86pcmpum, X86pcmpum_commute,
+defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86pcmpum, X86pcmpum_su,
+ X86pcmpum_commute, X86pcmpum_commute_su,
SchedWriteVecALU, avx512vl_i32_info,
HasAVX512>, EVEX_CD8<32, CD8VF>;
-defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86pcmpm, X86pcmpm_commute,
+defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86pcmpm, X86pcmpm_su,
+ X86pcmpm_commute, X86pcmpm_commute_su,
SchedWriteVecALU, avx512vl_i64_info,
HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
-defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86pcmpum, X86pcmpum_commute,
+defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86pcmpum, X86pcmpum_su,
+ X86pcmpum_commute, X86pcmpum_commute_su,
SchedWriteVecALU, avx512vl_i64_info,
HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
+def X86cmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
+ (X86cmpm node:$src1, node:$src2, node:$cc), [{
+ return N->hasOneUse();
+}]>;
+def X86cmpmSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
+ (X86cmpmSAE node:$src1, node:$src2, node:$cc), [{
+ return N->hasOneUse();
+}]>;
+
multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
string Name> {
defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
- (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,AVXCC:$cc),
- "vcmp${cc}"#_.Suffix,
- "$src2, $src1", "$src1, $src2",
- (X86cmpm (_.VT _.RC:$src1),
- (_.VT _.RC:$src2),
- imm:$cc), 1>,
- Sched<[sched]>;
+ (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,u8imm:$cc),
+ "vcmp"#_.Suffix,
+ "$cc, $src2, $src1", "$src1, $src2, $cc",
+ (X86cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc),
+ (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc),
+ 1>, Sched<[sched]>;
defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
- (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, AVXCC:$cc),
- "vcmp${cc}"#_.Suffix,
- "$src2, $src1", "$src1, $src2",
- (X86cmpm (_.VT _.RC:$src1),
- (_.VT (_.LdFrag addr:$src2)),
- imm:$cc)>,
+ (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
+ "vcmp"#_.Suffix,
+ "$cc, $src2, $src1", "$src1, $src2, $cc",
+ (X86cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
+ imm:$cc),
+ (X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
+ imm:$cc)>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
(outs _.KRC:$dst),
- (ins _.RC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc),
- "vcmp${cc}"#_.Suffix,
- "${src2}"##_.BroadcastStr##", $src1",
- "$src1, ${src2}"##_.BroadcastStr,
+ (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
+ "vcmp"#_.Suffix,
+ "$cc, ${src2}"#_.BroadcastStr#", $src1",
+ "$src1, ${src2}"#_.BroadcastStr#", $cc",
(X86cmpm (_.VT _.RC:$src1),
(_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
- imm:$cc)>,
+ imm:$cc),
+ (X86cmpm_su (_.VT _.RC:$src1),
+ (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
+ imm:$cc)>,
EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
- // Accept explicit immediate argument form instead of comparison code.
- let isAsmParserOnly = 1, hasSideEffects = 0 in {
- defm rri_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
- (outs _.KRC:$dst),
- (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
- "vcmp"#_.Suffix,
- "$cc, $src2, $src1", "$src1, $src2, $cc">,
- Sched<[sched]>, NotMemoryFoldable;
-
- let mayLoad = 1 in {
- defm rmi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
- (outs _.KRC:$dst),
- (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
- "vcmp"#_.Suffix,
- "$cc, $src2, $src1", "$src1, $src2, $cc">,
- Sched<[sched.Folded, sched.ReadAfterFold]>,
- NotMemoryFoldable;
-
- defm rmbi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
- (outs _.KRC:$dst),
- (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
- "vcmp"#_.Suffix,
- "$cc, ${src2}"##_.BroadcastStr##", $src1",
- "$src1, ${src2}"##_.BroadcastStr##", $cc">,
- EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>,
- NotMemoryFoldable;
- }
- }
// Patterns for selecting with loads in other operand.
def : Pat<(X86cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1),
@@ -2573,9 +2536,9 @@ multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
(!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
imm:$cc)>;
- def : Pat<(and _.KRCWM:$mask, (X86cmpm (_.LdFrag addr:$src2),
- (_.VT _.RC:$src1),
- CommutableCMPCC:$cc)),
+ def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.LdFrag addr:$src2),
+ (_.VT _.RC:$src1),
+ CommutableCMPCC:$cc)),
(!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask,
_.RC:$src1, addr:$src2,
imm:$cc)>;
@@ -2585,10 +2548,10 @@ multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
(!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
imm:$cc)>;
- def : Pat<(and _.KRCWM:$mask, (X86cmpm (X86VBroadcast
- (_.ScalarLdFrag addr:$src2)),
- (_.VT _.RC:$src1),
- CommutableCMPCC:$cc)),
+ def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (X86VBroadcast
+ (_.ScalarLdFrag addr:$src2)),
+ (_.VT _.RC:$src1),
+ CommutableCMPCC:$cc)),
(!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask,
_.RC:$src1, addr:$src2,
imm:$cc)>;
@@ -2597,24 +2560,14 @@ multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
// comparison code form (VCMP[EQ/LT/LE/...]
defm rrib : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
- (outs _.KRC:$dst),(ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
- "vcmp${cc}"#_.Suffix,
- "{sae}, $src2, $src1", "$src1, $src2, {sae}",
- (X86cmpmRnd (_.VT _.RC:$src1),
- (_.VT _.RC:$src2),
- imm:$cc,
- (i32 FROUND_NO_EXC))>,
+ (outs _.KRC:$dst),(ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
+ "vcmp"#_.Suffix,
+ "$cc, {sae}, $src2, $src1",
+ "$src1, $src2, {sae}, $cc",
+ (X86cmpmSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc),
+ (X86cmpmSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
+ imm:$cc)>,
EVEX_B, Sched<[sched]>;
-
- let isAsmParserOnly = 1, hasSideEffects = 0 in {
- defm rrib_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
- (outs _.KRC:$dst),
- (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
- "vcmp"#_.Suffix,
- "$cc, {sae}, $src2, $src1",
- "$src1, $src2, {sae}, $cc">,
- EVEX_B, Sched<[sched]>, NotMemoryFoldable;
- }
}
multiclass avx512_vcmp<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
@@ -2647,16 +2600,27 @@ let Predicates = [HasAVX512] in {
// ----------------------------------------------------------------
// FPClass
+
+def X86Vfpclasss_su : PatFrag<(ops node:$src1, node:$src2),
+ (X86Vfpclasss node:$src1, node:$src2), [{
+ return N->hasOneUse();
+}]>;
+
+def X86Vfpclass_su : PatFrag<(ops node:$src1, node:$src2),
+ (X86Vfpclass node:$src1, node:$src2), [{
+ return N->hasOneUse();
+}]>;
+
//handle fpclass instruction mask = op(reg_scalar,imm)
// op(mem_scalar,imm)
-multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
+multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr,
X86FoldableSchedWrite sched, X86VectorVTInfo _,
Predicate prd> {
let Predicates = [prd], ExeDomain = _.ExeDomain in {
def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
(ins _.RC:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set _.KRC:$dst,(OpNode (_.VT _.RC:$src1),
+ [(set _.KRC:$dst,(X86Vfpclasss (_.VT _.RC:$src1),
(i32 imm:$src2)))]>,
Sched<[sched]>;
def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
@@ -2664,7 +2628,7 @@ multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
OpcodeStr##_.Suffix#
"\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
[(set _.KRC:$dst,(and _.KRCWM:$mask,
- (OpNode (_.VT _.RC:$src1),
+ (X86Vfpclasss_su (_.VT _.RC:$src1),
(i32 imm:$src2))))]>,
EVEX_K, Sched<[sched]>;
def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
@@ -2672,15 +2636,15 @@ multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
OpcodeStr##_.Suffix##
"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set _.KRC:$dst,
- (OpNode _.ScalarIntMemCPat:$src1,
- (i32 imm:$src2)))]>,
+ (X86Vfpclasss _.ScalarIntMemCPat:$src1,
+ (i32 imm:$src2)))]>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
(ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix##
"\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
[(set _.KRC:$dst,(and _.KRCWM:$mask,
- (OpNode _.ScalarIntMemCPat:$src1,
+ (X86Vfpclasss_su _.ScalarIntMemCPat:$src1,
(i32 imm:$src2))))]>,
EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
@@ -2689,14 +2653,14 @@ multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
//handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm)
// fpclass(reg_vec, mem_vec, imm)
// fpclass(reg_vec, broadcast(eltVt), imm)
-multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
+multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr,
X86FoldableSchedWrite sched, X86VectorVTInfo _,
- string mem, string broadcast>{
+ string mem>{
let ExeDomain = _.ExeDomain in {
def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
(ins _.RC:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set _.KRC:$dst,(OpNode (_.VT _.RC:$src1),
+ [(set _.KRC:$dst,(X86Vfpclass (_.VT _.RC:$src1),
(i32 imm:$src2)))]>,
Sched<[sched]>;
def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
@@ -2704,85 +2668,103 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
OpcodeStr##_.Suffix#
"\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
[(set _.KRC:$dst,(and _.KRCWM:$mask,
- (OpNode (_.VT _.RC:$src1),
+ (X86Vfpclass_su (_.VT _.RC:$src1),
(i32 imm:$src2))))]>,
EVEX_K, Sched<[sched]>;
def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
(ins _.MemOp:$src1, i32u8imm:$src2),
- OpcodeStr##_.Suffix##mem#
+ OpcodeStr##_.Suffix#"{"#mem#"}"#
"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set _.KRC:$dst,(OpNode
+ [(set _.KRC:$dst,(X86Vfpclass
(_.VT (_.LdFrag addr:$src1)),
(i32 imm:$src2)))]>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
(ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
- OpcodeStr##_.Suffix##mem#
+ OpcodeStr##_.Suffix#"{"#mem#"}"#
"\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
- [(set _.KRC:$dst, (and _.KRCWM:$mask, (OpNode
+ [(set _.KRC:$dst, (and _.KRCWM:$mask, (X86Vfpclass_su
(_.VT (_.LdFrag addr:$src1)),
(i32 imm:$src2))))]>,
EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
(ins _.ScalarMemOp:$src1, i32u8imm:$src2),
- OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"##
+ OpcodeStr##_.Suffix##"\t{$src2, ${src1}"##
_.BroadcastStr##", $dst|$dst, ${src1}"
##_.BroadcastStr##", $src2}",
- [(set _.KRC:$dst,(OpNode
+ [(set _.KRC:$dst,(X86Vfpclass
(_.VT (X86VBroadcast
(_.ScalarLdFrag addr:$src1))),
(i32 imm:$src2)))]>,
EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
(ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
- OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"##
+ OpcodeStr##_.Suffix##"\t{$src2, ${src1}"##
_.BroadcastStr##", $dst {${mask}}|$dst {${mask}}, ${src1}"##
_.BroadcastStr##", $src2}",
- [(set _.KRC:$dst,(and _.KRCWM:$mask, (OpNode
+ [(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclass_su
(_.VT (X86VBroadcast
(_.ScalarLdFrag addr:$src1))),
(i32 imm:$src2))))]>,
EVEX_B, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
+
+ // Allow registers or broadcast with the x, y, z suffix we use to disambiguate
+ // the memory form.
+ def : InstAlias<OpcodeStr#_.Suffix#mem#
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ (!cast<Instruction>(NAME#"rr")
+ _.KRC:$dst, _.RC:$src1, i32u8imm:$src2), 0, "att">;
+ def : InstAlias<OpcodeStr#_.Suffix#mem#
+ "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
+ (!cast<Instruction>(NAME#"rrk")
+ _.KRC:$dst, _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 0, "att">;
+ def : InstAlias<OpcodeStr#_.Suffix#mem#
+ "\t{$src2, ${src1}"#_.BroadcastStr#", $dst|$dst, ${src1}"#
+ _.BroadcastStr#", $src2}",
+ (!cast<Instruction>(NAME#"rmb")
+ _.KRC:$dst, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
+ def : InstAlias<OpcodeStr#_.Suffix#mem#
+ "\t{$src2, ${src1}"#_.BroadcastStr#", $dst {${mask}}|"
+ "$dst {${mask}}, ${src1}"#_.BroadcastStr#", $src2}",
+ (!cast<Instruction>(NAME#"rmbk")
+ _.KRC:$dst, _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
}
multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _,
- bits<8> opc, SDNode OpNode,
- X86SchedWriteWidths sched, Predicate prd,
- string broadcast>{
+ bits<8> opc, X86SchedWriteWidths sched,
+ Predicate prd>{
let Predicates = [prd] in {
- defm Z : avx512_vector_fpclass<opc, OpcodeStr, OpNode, sched.ZMM,
- _.info512, "{z}", broadcast>, EVEX_V512;
+ defm Z : avx512_vector_fpclass<opc, OpcodeStr, sched.ZMM,
+ _.info512, "z">, EVEX_V512;
}
let Predicates = [prd, HasVLX] in {
- defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, sched.XMM,
- _.info128, "{x}", broadcast>, EVEX_V128;
- defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, sched.YMM,
- _.info256, "{y}", broadcast>, EVEX_V256;
+ defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, sched.XMM,
+ _.info128, "x">, EVEX_V128;
+ defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, sched.YMM,
+ _.info256, "y">, EVEX_V256;
}
}
multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec,
- bits<8> opcScalar, SDNode VecOpNode,
- SDNode ScalarOpNode, X86SchedWriteWidths sched,
+ bits<8> opcScalar, X86SchedWriteWidths sched,
Predicate prd> {
defm PS : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f32_info, opcVec,
- VecOpNode, sched, prd, "{l}">,
+ sched, prd>,
EVEX_CD8<32, CD8VF>;
defm PD : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f64_info, opcVec,
- VecOpNode, sched, prd, "{q}">,
+ sched, prd>,
EVEX_CD8<64, CD8VF> , VEX_W;
- defm SSZ : avx512_scalar_fpclass<opcScalar, OpcodeStr, ScalarOpNode,
- sched.Scl, f32x_info, prd>,
+ defm SSZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
+ sched.Scl, f32x_info, prd>, VEX_LIG,
EVEX_CD8<32, CD8VT1>;
- defm SDZ : avx512_scalar_fpclass<opcScalar, OpcodeStr, ScalarOpNode,
- sched.Scl, f64x_info, prd>,
+ defm SDZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
+ sched.Scl, f64x_info, prd>, VEX_LIG,
EVEX_CD8<64, CD8VT1>, VEX_W;
}
-defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, X86Vfpclass,
- X86Vfpclasss, SchedWriteFCmp, HasDQI>,
- AVX512AIi8Base, EVEX;
+defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, SchedWriteFCmp,
+ HasDQI>, AVX512AIi8Base, EVEX;
//-----------------------------------------------------------------
// Mask register copy, including
@@ -3039,26 +3021,24 @@ defm : avx512_binop_pat<vxnor, xnor, KXNORWrr>;
defm : avx512_binop_pat<xor, xor, KXORWrr>;
// Mask unpacking
-multiclass avx512_mask_unpck<string Suffix,RegisterClass KRC, ValueType VT,
- RegisterClass KRCSrc, X86FoldableSchedWrite sched,
+multiclass avx512_mask_unpck<string Suffix, X86KVectorVTInfo Dst,
+ X86KVectorVTInfo Src, X86FoldableSchedWrite sched,
Predicate prd> {
let Predicates = [prd] in {
let hasSideEffects = 0 in
- def rr : I<0x4b, MRMSrcReg, (outs KRC:$dst),
- (ins KRC:$src1, KRC:$src2),
+ def rr : I<0x4b, MRMSrcReg, (outs Dst.KRC:$dst),
+ (ins Src.KRC:$src1, Src.KRC:$src2),
"kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
VEX_4V, VEX_L, Sched<[sched]>;
- def : Pat<(VT (concat_vectors KRCSrc:$src1, KRCSrc:$src2)),
- (!cast<Instruction>(NAME##rr)
- (COPY_TO_REGCLASS KRCSrc:$src2, KRC),
- (COPY_TO_REGCLASS KRCSrc:$src1, KRC))>;
+ def : Pat<(Dst.KVT (concat_vectors Src.KRC:$src1, Src.KRC:$src2)),
+ (!cast<Instruction>(NAME##rr) Src.KRC:$src2, Src.KRC:$src1)>;
}
}
-defm KUNPCKBW : avx512_mask_unpck<"bw", VK16, v16i1, VK8, WriteShuffle, HasAVX512>, PD;
-defm KUNPCKWD : avx512_mask_unpck<"wd", VK32, v32i1, VK16, WriteShuffle, HasBWI>, PS;
-defm KUNPCKDQ : avx512_mask_unpck<"dq", VK64, v64i1, VK32, WriteShuffle, HasBWI>, PS, VEX_W;
+defm KUNPCKBW : avx512_mask_unpck<"bw", v16i1_info, v8i1_info, WriteShuffle, HasAVX512>, PD;
+defm KUNPCKWD : avx512_mask_unpck<"wd", v32i1_info, v16i1_info, WriteShuffle, HasBWI>, PS;
+defm KUNPCKDQ : avx512_mask_unpck<"dq", v64i1_info, v32i1_info, WriteShuffle, HasBWI>, PS, VEX_W;
// Mask bit testing
multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
@@ -3118,7 +3098,8 @@ defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, WriteShu
defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, WriteShuffle>;
// Patterns for comparing 128/256-bit integer vectors using 512-bit instruction.
-multiclass axv512_icmp_packed_no_vlx_lowering<PatFrag Frag, string InstStr,
+multiclass axv512_icmp_packed_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
+ string InstStr,
X86VectorVTInfo Narrow,
X86VectorVTInfo Wide> {
def : Pat<(Narrow.KVT (Frag (Narrow.VT Narrow.RC:$src1),
@@ -3130,8 +3111,8 @@ multiclass axv512_icmp_packed_no_vlx_lowering<PatFrag Frag, string InstStr,
Narrow.KRC)>;
def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
- (Frag (Narrow.VT Narrow.RC:$src1),
- (Narrow.VT Narrow.RC:$src2)))),
+ (Frag_su (Narrow.VT Narrow.RC:$src1),
+ (Narrow.VT Narrow.RC:$src2)))),
(COPY_TO_REGCLASS
(!cast<Instruction>(InstStr#"Zrrk")
(COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
@@ -3141,7 +3122,7 @@ multiclass axv512_icmp_packed_no_vlx_lowering<PatFrag Frag, string InstStr,
}
// Patterns for comparing 128/256-bit integer vectors using 512-bit instruction.
-multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag,
+multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
string InstStr,
X86VectorVTInfo Narrow,
X86VectorVTInfo Wide> {
@@ -3154,9 +3135,9 @@ def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
(Frag.OperandTransform $cc)), Narrow.KRC)>;
def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
- (Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
- (Narrow.VT Narrow.RC:$src2),
- cond)))),
+ (Narrow.KVT (Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
+ (Narrow.VT Narrow.RC:$src2),
+ cond)))),
(COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrrik)
(COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
@@ -3165,7 +3146,8 @@ def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
}
// Same as above, but for fp types which don't use PatFrags.
-multiclass axv512_cmp_packed_cc_no_vlx_lowering<SDNode OpNode, string InstStr,
+multiclass axv512_cmp_packed_cc_no_vlx_lowering<SDNode OpNode, PatFrag OpNode_su,
+ string InstStr,
X86VectorVTInfo Narrow,
X86VectorVTInfo Wide> {
def : Pat<(Narrow.KVT (OpNode (Narrow.VT Narrow.RC:$src1),
@@ -3177,8 +3159,8 @@ def : Pat<(Narrow.KVT (OpNode (Narrow.VT Narrow.RC:$src1),
imm:$cc), Narrow.KRC)>;
def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
- (OpNode (Narrow.VT Narrow.RC:$src1),
- (Narrow.VT Narrow.RC:$src2), imm:$cc))),
+ (OpNode_su (Narrow.VT Narrow.RC:$src1),
+ (Narrow.VT Narrow.RC:$src2), imm:$cc))),
(COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrrik)
(COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
@@ -3190,65 +3172,65 @@ let Predicates = [HasAVX512, NoVLX] in {
// AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
// increase the pattern complexity the way an immediate would.
let AddedComplexity = 2 in {
- defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTD", v8i32x_info, v16i32_info>;
- defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQD", v8i32x_info, v16i32_info>;
+ defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTD", v8i32x_info, v16i32_info>;
+ defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQD", v8i32x_info, v16i32_info>;
- defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTD", v4i32x_info, v16i32_info>;
- defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQD", v4i32x_info, v16i32_info>;
+ defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTD", v4i32x_info, v16i32_info>;
+ defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQD", v4i32x_info, v16i32_info>;
- defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTQ", v4i64x_info, v8i64_info>;
- defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQQ", v4i64x_info, v8i64_info>;
+ defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTQ", v4i64x_info, v8i64_info>;
+ defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQQ", v4i64x_info, v8i64_info>;
- defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTQ", v2i64x_info, v8i64_info>;
- defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQQ", v2i64x_info, v8i64_info>;
+ defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTQ", v2i64x_info, v8i64_info>;
+ defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQQ", v2i64x_info, v8i64_info>;
}
- defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPD", v8i32x_info, v16i32_info>;
- defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUD", v8i32x_info, v16i32_info>;
+ defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>;
+ defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>;
- defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPD", v4i32x_info, v16i32_info>;
- defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUD", v4i32x_info, v16i32_info>;
+ defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>;
+ defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>;
- defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPQ", v4i64x_info, v8i64_info>;
- defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUQ", v4i64x_info, v8i64_info>;
+ defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>;
+ defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>;
- defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPQ", v2i64x_info, v8i64_info>;
- defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUQ", v2i64x_info, v8i64_info>;
+ defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>;
+ defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>;
- defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPS", v8f32x_info, v16f32_info>;
- defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPS", v4f32x_info, v16f32_info>;
- defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPD", v4f64x_info, v8f64_info>;
- defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPD", v2f64x_info, v8f64_info>;
+ defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, X86cmpm_su, "VCMPPS", v8f32x_info, v16f32_info>;
+ defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, X86cmpm_su, "VCMPPS", v4f32x_info, v16f32_info>;
+ defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, X86cmpm_su, "VCMPPD", v4f64x_info, v8f64_info>;
+ defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, X86cmpm_su, "VCMPPD", v2f64x_info, v8f64_info>;
}
let Predicates = [HasBWI, NoVLX] in {
// AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
// increase the pattern complexity the way an immediate would.
let AddedComplexity = 2 in {
- defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTB", v32i8x_info, v64i8_info>;
- defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQB", v32i8x_info, v64i8_info>;
+ defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTB", v32i8x_info, v64i8_info>;
+ defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQB", v32i8x_info, v64i8_info>;
- defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTB", v16i8x_info, v64i8_info>;
- defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQB", v16i8x_info, v64i8_info>;
+ defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTB", v16i8x_info, v64i8_info>;
+ defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQB", v16i8x_info, v64i8_info>;
- defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTW", v16i16x_info, v32i16_info>;
- defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQW", v16i16x_info, v32i16_info>;
+ defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTW", v16i16x_info, v32i16_info>;
+ defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQW", v16i16x_info, v32i16_info>;
- defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTW", v8i16x_info, v32i16_info>;
- defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQW", v8i16x_info, v32i16_info>;
+ defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, X86pcmpgtm_su, "VPCMPGTW", v8i16x_info, v32i16_info>;
+ defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, X86pcmpeqm_c_su, "VPCMPEQW", v8i16x_info, v32i16_info>;
}
- defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPB", v32i8x_info, v64i8_info>;
- defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUB", v32i8x_info, v64i8_info>;
+ defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v32i8x_info, v64i8_info>;
+ defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v32i8x_info, v64i8_info>;
- defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPB", v16i8x_info, v64i8_info>;
- defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUB", v16i8x_info, v64i8_info>;
+ defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v16i8x_info, v64i8_info>;
+ defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v16i8x_info, v64i8_info>;
- defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPW", v16i16x_info, v32i16_info>;
- defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUW", v16i16x_info, v32i16_info>;
+ defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v16i16x_info, v32i16_info>;
+ defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v16i16x_info, v32i16_info>;
- defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPW", v8i16x_info, v32i16_info>;
- defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUW", v8i16x_info, v32i16_info>;
+ defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v8i16x_info, v32i16_info>;
+ defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v8i16x_info, v32i16_info>;
}
// Mask setting all 0s or 1s
@@ -3394,15 +3376,15 @@ multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr,
string EVEX2VEXOvrd, bit NoRMPattern = 0> {
let Predicates = [prd] in
defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512,
- _.info512.AlignedLdFrag, masked_load_aligned512,
+ _.info512.AlignedLdFrag, masked_load_aligned,
Sched.ZMM, "", NoRMPattern>, EVEX_V512;
let Predicates = [prd, HasVLX] in {
defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256,
- _.info256.AlignedLdFrag, masked_load_aligned256,
+ _.info256.AlignedLdFrag, masked_load_aligned,
Sched.YMM, EVEX2VEXOvrd#"Y", NoRMPattern>, EVEX_V256;
defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128,
- _.info128.AlignedLdFrag, masked_load_aligned128,
+ _.info128.AlignedLdFrag, masked_load_aligned,
Sched.XMM, EVEX2VEXOvrd, NoRMPattern>, EVEX_V128;
}
}
@@ -3414,15 +3396,15 @@ multiclass avx512_load_vl<bits<8> opc, string OpcodeStr,
SDPatternOperator SelectOprr = vselect> {
let Predicates = [prd] in
defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, _.info512.LdFrag,
- masked_load_unaligned, Sched.ZMM, "",
+ masked_load, Sched.ZMM, "",
NoRMPattern, SelectOprr>, EVEX_V512;
let Predicates = [prd, HasVLX] in {
defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, _.info256.LdFrag,
- masked_load_unaligned, Sched.YMM, EVEX2VEXOvrd#"Y",
+ masked_load, Sched.YMM, EVEX2VEXOvrd#"Y",
NoRMPattern, SelectOprr>, EVEX_V256;
defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, _.info128.LdFrag,
- masked_load_unaligned, Sched.XMM, EVEX2VEXOvrd,
+ masked_load, Sched.XMM, EVEX2VEXOvrd,
NoRMPattern, SelectOprr>, EVEX_V128;
}
}
@@ -3488,14 +3470,14 @@ multiclass avx512_store_vl< bits<8> opc, string OpcodeStr,
string EVEX2VEXOvrd, bit NoMRPattern = 0> {
let Predicates = [prd] in
defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, store,
- masked_store_unaligned, Sched.ZMM, "",
+ masked_store, Sched.ZMM, "",
NoMRPattern>, EVEX_V512;
let Predicates = [prd, HasVLX] in {
defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, store,
- masked_store_unaligned, Sched.YMM,
+ masked_store, Sched.YMM,
EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, store,
- masked_store_unaligned, Sched.XMM, EVEX2VEXOvrd,
+ masked_store, Sched.XMM, EVEX2VEXOvrd,
NoMRPattern>, EVEX_V128;
}
}
@@ -3506,15 +3488,15 @@ multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr,
string EVEX2VEXOvrd, bit NoMRPattern = 0> {
let Predicates = [prd] in
defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, alignedstore,
- masked_store_aligned512, Sched.ZMM, "",
+ masked_store_aligned, Sched.ZMM, "",
NoMRPattern>, EVEX_V512;
let Predicates = [prd, HasVLX] in {
defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, alignedstore,
- masked_store_aligned256, Sched.YMM,
+ masked_store_aligned, Sched.YMM,
EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, alignedstore,
- masked_store_aligned128, Sched.XMM, EVEX2VEXOvrd,
+ masked_store_aligned, Sched.XMM, EVEX2VEXOvrd,
NoMRPattern>, EVEX_V128;
}
}
@@ -3609,7 +3591,7 @@ def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
"", []>, Sched<[WriteFStoreY]>;
}
-def : Pat<(v8i64 (vselect VK8WM:$mask, (bc_v8i64 (v16i32 immAllZerosV)),
+def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 immAllZerosV),
(v8i64 VR512:$src))),
(VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
VK8), VR512:$src)>;
@@ -3621,7 +3603,7 @@ def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
// These patterns exist to prevent the above patterns from introducing a second
// mask inversion when one already exists.
def : Pat<(v8i64 (vselect (xor VK8:$mask, (v8i1 immAllOnesV)),
- (bc_v8i64 (v16i32 immAllZerosV)),
+ (v8i64 immAllZerosV),
(v8i64 VR512:$src))),
(VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>;
def : Pat<(v16i32 (vselect (xor VK16:$mask, (v16i1 immAllOnesV)),
@@ -3761,75 +3743,6 @@ let Predicates = [HasVLX] in {
(VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
}
-multiclass masked_move_for_extract<string InstrStr, X86VectorVTInfo From,
- X86VectorVTInfo To, X86VectorVTInfo Cast> {
- def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
- (bitconvert
- (To.VT (extract_subvector
- (From.VT From.RC:$src), (iPTR 0)))),
- To.RC:$src0)),
- (Cast.VT (!cast<Instruction>(InstrStr#"rrk")
- Cast.RC:$src0, Cast.KRCWM:$mask,
- (To.VT (EXTRACT_SUBREG From.RC:$src, To.SubRegIdx))))>;
-
- def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
- (bitconvert
- (To.VT (extract_subvector
- (From.VT From.RC:$src), (iPTR 0)))),
- Cast.ImmAllZerosV)),
- (Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
- Cast.KRCWM:$mask,
- (To.VT (EXTRACT_SUBREG From.RC:$src, To.SubRegIdx))))>;
-}
-
-
-let Predicates = [HasVLX] in {
-// A masked extract from the first 128-bits of a 256-bit vector can be
-// implemented with masked move.
-defm : masked_move_for_extract<"VMOVDQA64Z128", v4i64x_info, v2i64x_info, v2i64x_info>;
-defm : masked_move_for_extract<"VMOVDQA64Z128", v8i32x_info, v4i32x_info, v2i64x_info>;
-defm : masked_move_for_extract<"VMOVDQA64Z128", v16i16x_info, v8i16x_info, v2i64x_info>;
-defm : masked_move_for_extract<"VMOVDQA64Z128", v32i8x_info, v16i8x_info, v2i64x_info>;
-defm : masked_move_for_extract<"VMOVDQA32Z128", v4i64x_info, v2i64x_info, v4i32x_info>;
-defm : masked_move_for_extract<"VMOVDQA32Z128", v8i32x_info, v4i32x_info, v4i32x_info>;
-defm : masked_move_for_extract<"VMOVDQA32Z128", v16i16x_info, v8i16x_info, v4i32x_info>;
-defm : masked_move_for_extract<"VMOVDQA32Z128", v32i8x_info, v16i8x_info, v4i32x_info>;
-defm : masked_move_for_extract<"VMOVAPDZ128", v4f64x_info, v2f64x_info, v2f64x_info>;
-defm : masked_move_for_extract<"VMOVAPDZ128", v8f32x_info, v4f32x_info, v2f64x_info>;
-defm : masked_move_for_extract<"VMOVAPSZ128", v4f64x_info, v2f64x_info, v4f32x_info>;
-defm : masked_move_for_extract<"VMOVAPSZ128", v8f32x_info, v4f32x_info, v4f32x_info>;
-
-// A masked extract from the first 128-bits of a 512-bit vector can be
-// implemented with masked move.
-defm : masked_move_for_extract<"VMOVDQA64Z128", v8i64_info, v2i64x_info, v2i64x_info>;
-defm : masked_move_for_extract<"VMOVDQA64Z128", v16i32_info, v4i32x_info, v2i64x_info>;
-defm : masked_move_for_extract<"VMOVDQA64Z128", v32i16_info, v8i16x_info, v2i64x_info>;
-defm : masked_move_for_extract<"VMOVDQA64Z128", v64i8_info, v16i8x_info, v2i64x_info>;
-defm : masked_move_for_extract<"VMOVDQA32Z128", v8i64_info, v2i64x_info, v4i32x_info>;
-defm : masked_move_for_extract<"VMOVDQA32Z128", v16i32_info, v4i32x_info, v4i32x_info>;
-defm : masked_move_for_extract<"VMOVDQA32Z128", v32i16_info, v8i16x_info, v4i32x_info>;
-defm : masked_move_for_extract<"VMOVDQA32Z128", v64i8_info, v16i8x_info, v4i32x_info>;
-defm : masked_move_for_extract<"VMOVAPDZ128", v8f64_info, v2f64x_info, v2f64x_info>;
-defm : masked_move_for_extract<"VMOVAPDZ128", v16f32_info, v4f32x_info, v2f64x_info>;
-defm : masked_move_for_extract<"VMOVAPSZ128", v8f64_info, v2f64x_info, v4f32x_info>;
-defm : masked_move_for_extract<"VMOVAPSZ128", v16f32_info, v4f32x_info, v4f32x_info>;
-
-// A masked extract from the first 256-bits of a 512-bit vector can be
-// implemented with masked move.
-defm : masked_move_for_extract<"VMOVDQA64Z256", v8i64_info, v4i64x_info, v4i64x_info>;
-defm : masked_move_for_extract<"VMOVDQA64Z256", v16i32_info, v8i32x_info, v4i64x_info>;
-defm : masked_move_for_extract<"VMOVDQA64Z256", v32i16_info, v16i16x_info, v4i64x_info>;
-defm : masked_move_for_extract<"VMOVDQA64Z256", v64i8_info, v32i8x_info, v4i64x_info>;
-defm : masked_move_for_extract<"VMOVDQA32Z256", v8i64_info, v4i64x_info, v8i32x_info>;
-defm : masked_move_for_extract<"VMOVDQA32Z256", v16i32_info, v8i32x_info, v8i32x_info>;
-defm : masked_move_for_extract<"VMOVDQA32Z256", v32i16_info, v16i16x_info, v8i32x_info>;
-defm : masked_move_for_extract<"VMOVDQA32Z256", v64i8_info, v32i8x_info, v8i32x_info>;
-defm : masked_move_for_extract<"VMOVAPDZ256", v8f64_info, v4f64x_info, v4f64x_info>;
-defm : masked_move_for_extract<"VMOVAPDZ256", v16f32_info, v8f32x_info, v4f64x_info>;
-defm : masked_move_for_extract<"VMOVAPSZ256", v8f64_info, v4f64x_info, v8f32x_info>;
-defm : masked_move_for_extract<"VMOVAPSZ256", v16f32_info, v8f32x_info, v8f32x_info>;
-}
-
// Move Int Doubleword to Packed Double Int
//
let ExeDomain = SSEPackedInt in {
@@ -3858,19 +3771,10 @@ def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src)
"vmovq\t{$src, $dst|$dst, $src}",
[(set FR64X:$dst, (bitconvert GR64:$src))]>,
EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
-def VMOV64toSDZrm : AVX512XSI<0x7E, MRMSrcMem, (outs FR64X:$dst), (ins i64mem:$src),
- "vmovq\t{$src, $dst|$dst, $src}",
- [(set FR64X:$dst, (bitconvert (loadi64 addr:$src)))]>,
- EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>;
def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (bitconvert FR64X:$src))]>,
EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
-def VMOVSDto64Zmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64X:$src),
- "vmovq\t{$src, $dst|$dst, $src}",
- [(store (i64 (bitconvert FR64X:$src)), addr:$dst)]>,
- EVEX, VEX_W, Sched<[WriteVecStore]>,
- EVEX_CD8<64, CD8VT1>;
}
} // ExeDomain = SSEPackedInt
@@ -3881,11 +3785,6 @@ def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src)
"vmovd\t{$src, $dst|$dst, $src}",
[(set FR32X:$dst, (bitconvert GR32:$src))]>,
EVEX, Sched<[WriteVecMoveFromGpr]>;
-
-def VMOVDI2SSZrm : AVX512BI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$src),
- "vmovd\t{$src, $dst|$dst, $src}",
- [(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))]>,
- EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>;
} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
// Move doubleword from xmm register to r/m32
@@ -3938,6 +3837,11 @@ def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst),
def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}",
(VMOVPQI2QIZrr VR128X:$dst, VR128X:$src), 0>;
+let Predicates = [HasAVX512] in {
+ def : Pat<(X86vextractstore64 (v2i64 VR128X:$src), addr:$dst),
+ (VMOVPQI2QIZmr addr:$dst, VR128X:$src)>;
+}
+
// Move Scalar Single to Double Int
//
let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
@@ -3946,11 +3850,6 @@ def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
"vmovd\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (bitconvert FR32X:$src))]>,
EVEX, Sched<[WriteVecMoveToGpr]>;
-def VMOVSS2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
- (ins i32mem:$dst, FR32X:$src),
- "vmovd\t{$src, $dst|$dst, $src}",
- [(store (i32 (bitconvert FR32X:$src)), addr:$dst)]>,
- EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>;
} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
// Move Quadword Int to Packed Quadword Int
@@ -3974,7 +3873,7 @@ def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
// AVX-512 MOVSS, MOVSD
//===----------------------------------------------------------------------===//
-multiclass avx512_move_scalar<string asm, SDNode OpNode,
+multiclass avx512_move_scalar<string asm, SDNode OpNode, PatFrag vzload_frag,
X86VectorVTInfo _> {
let Predicates = [HasAVX512, OptForSize] in
def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
@@ -3999,11 +3898,18 @@ multiclass avx512_move_scalar<string asm, SDNode OpNode,
(_.VT (OpNode _.RC:$src1, _.RC:$src2)),
(_.VT _.RC:$src0))))],
_.ExeDomain>, EVEX_4V, EVEX_K, Sched<[SchedWriteFShuffle.XMM]>;
- let canFoldAsLoad = 1, isReMaterializable = 1 in
- def rm : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src),
+ let canFoldAsLoad = 1, isReMaterializable = 1 in {
+ def rm : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), (ins _.ScalarMemOp:$src),
!strconcat(asm, "\t{$src, $dst|$dst, $src}"),
- [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))],
+ [(set _.RC:$dst, (_.VT (vzload_frag addr:$src)))],
_.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
+ // _alt version uses FR32/FR64 register class.
+ let isCodeGenOnly = 1 in
+ def rm_alt : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src),
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
+ [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))],
+ _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
+ }
let mayLoad = 1, hasSideEffects = 0 in {
let Constraints = "$src0 = $dst" in
def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
@@ -4023,16 +3929,16 @@ multiclass avx512_move_scalar<string asm, SDNode OpNode,
EVEX, Sched<[WriteFStore]>;
let mayStore = 1, hasSideEffects = 0 in
def mrk: AVX512PI<0x11, MRMDestMem, (outs),
- (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.FRC:$src),
+ (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.RC:$src),
!strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
[], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFStore]>,
NotMemoryFoldable;
}
-defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, f32x_info>,
+defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, X86vzload32, f32x_info>,
VEX_LIG, XS, EVEX_CD8<32, CD8VT1>;
-defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, f64x_info>,
+defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, X86vzload64, f64x_info>,
VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
@@ -4070,7 +3976,7 @@ def : Pat<(masked_store
(iPTR 0))), addr:$dst, Mask),
(!cast<Instruction>(InstrStr#mrk) addr:$dst,
(COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
- (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
+ _.info128.RC:$src)>;
}
@@ -4085,7 +3991,7 @@ def : Pat<(masked_store
(iPTR 0))), addr:$dst, Mask),
(!cast<Instruction>(InstrStr#mrk) addr:$dst,
(COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
- (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
+ _.info128.RC:$src)>;
}
@@ -4105,13 +4011,13 @@ def : Pat<(masked_store
(iPTR 0))), addr:$dst, Mask512),
(!cast<Instruction>(InstrStr#mrk) addr:$dst,
(COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
- (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
+ _.info128.RC:$src)>;
// AVX512VL pattern.
def : Pat<(masked_store (_.info128.VT _.info128.RC:$src), addr:$dst, Mask128),
(!cast<Instruction>(InstrStr#mrk) addr:$dst,
(COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
- (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
+ _.info128.RC:$src)>;
}
multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
@@ -4119,8 +4025,7 @@ multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
def : Pat<(_.info128.VT (extract_subvector
(_.info512.VT (masked_load addr:$srcAddr, Mask,
- (_.info512.VT (bitconvert
- (v16i32 immAllZerosV))))),
+ _.info512.ImmAllZerosV)),
(iPTR 0))),
(!cast<Instruction>(InstrStr#rmkz)
(COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
@@ -4145,8 +4050,7 @@ multiclass avx512_load_scalar_lowering_subreg<string InstrStr,
def : Pat<(_.info128.VT (extract_subvector
(_.info512.VT (masked_load addr:$srcAddr, Mask,
- (_.info512.VT (bitconvert
- (v16i32 immAllZerosV))))),
+ _.info512.ImmAllZerosV)),
(iPTR 0))),
(!cast<Instruction>(InstrStr#rmkz)
(COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
@@ -4175,8 +4079,7 @@ multiclass avx512_load_scalar_lowering_subreg2<string InstrStr,
// AVX512F patterns.
def : Pat<(_.info128.VT (extract_subvector
(_.info512.VT (masked_load addr:$srcAddr, Mask512,
- (_.info512.VT (bitconvert
- (v16i32 immAllZerosV))))),
+ _.info512.ImmAllZerosV)),
(iPTR 0))),
(!cast<Instruction>(InstrStr#rmkz)
(COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
@@ -4194,7 +4097,7 @@ def : Pat<(_.info128.VT (extract_subvector
// AVX512Vl patterns.
def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
- (_.info128.VT (bitconvert (v4i32 immAllZerosV))))),
+ _.info128.ImmAllZerosV)),
(!cast<Instruction>(InstrStr#rmkz)
(COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
addr:$srcAddr)>;
@@ -4383,15 +4286,6 @@ let Predicates = [HasAVX512, OptForSize] in {
(v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
(v4i32 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)))), sub_xmm)>;
- def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
- (SUBREG_TO_REG (i32 0),
- (v2f64 (VMOVSDZrr (v2f64 (AVX512_128_SET0)),
- (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))), sub_xmm)>;
- def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
- (SUBREG_TO_REG (i32 0),
- (v2i64 (VMOVSDZrr (v2i64 (AVX512_128_SET0)),
- (v2i64 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)))), sub_xmm)>;
-
def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
(SUBREG_TO_REG (i32 0),
(v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
@@ -4400,17 +4294,6 @@ let Predicates = [HasAVX512, OptForSize] in {
(SUBREG_TO_REG (i32 0),
(v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
(v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)))), sub_xmm)>;
-
- def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
- (SUBREG_TO_REG (i32 0),
- (v2f64 (VMOVSDZrr (v2f64 (AVX512_128_SET0)),
- (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))), sub_xmm)>;
-
- def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
- (SUBREG_TO_REG (i32 0),
- (v2i64 (VMOVSDZrr (v2i64 (AVX512_128_SET0)),
- (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)))), sub_xmm)>;
-
}
// Use 128-bit blends for OptForSpeed since BLENDs have better throughput than
@@ -4426,79 +4309,27 @@ let Predicates = [HasAVX512, OptForSpeed] in {
(v4i32 (VPBLENDWrri (v4i32 (V_SET0)),
(v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)),
(i8 3))), sub_xmm)>;
-
- def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
- (SUBREG_TO_REG (i32 0),
- (v2f64 (VBLENDPDrri (v2f64 (V_SET0)),
- (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)),
- (i8 1))), sub_xmm)>;
- def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
- (SUBREG_TO_REG (i32 0),
- (v2i64 (VPBLENDWrri (v2i64 (V_SET0)),
- (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)),
- (i8 0xf))), sub_xmm)>;
}
let Predicates = [HasAVX512] in {
-
- // MOVSSrm zeros the high parts of the register; represent this
- // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
- def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
- (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
- def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
- (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
- def : Pat<(v4f32 (X86vzload addr:$src)),
- (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
-
- // MOVSDrm zeros the high parts of the register; represent this
- // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
- def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
- (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
- def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
- (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
- def : Pat<(v2f64 (X86vzload addr:$src)),
- (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
+ def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
+ (VMOVSSZrm addr:$src)>;
+ def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
+ (VMOVSDZrm addr:$src)>;
// Represent the same patterns above but in the form they appear for
// 256-bit types
- def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
- (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
- (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
- def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
- (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
- (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
- def : Pat<(v8f32 (X86vzload addr:$src)),
+ def : Pat<(v8f32 (X86vzload32 addr:$src)),
(SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
- def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
- (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
- (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
- def : Pat<(v4f64 (X86vzload addr:$src)),
+ def : Pat<(v4f64 (X86vzload64 addr:$src)),
(SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
// Represent the same patterns above but in the form they appear for
// 512-bit types
- def : Pat<(v16i32 (X86vzmovl (insert_subvector undef,
- (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
- (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
- def : Pat<(v16f32 (X86vzmovl (insert_subvector undef,
- (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
+ def : Pat<(v16f32 (X86vzload32 addr:$src)),
(SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
- def : Pat<(v16f32 (X86vzload addr:$src)),
- (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
- def : Pat<(v8f64 (X86vzmovl (insert_subvector undef,
- (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
- (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
- def : Pat<(v8f64 (X86vzload addr:$src)),
+ def : Pat<(v8f64 (X86vzload64 addr:$src)),
(SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
-
- def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
- (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))),
- (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
-
- // Extract and store.
- def : Pat<(store (f32 (extractelt (v4f32 VR128X:$src), (iPTR 0))),
- addr:$dst),
- (VMOVSSZmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128X:$src), FR32X))>;
}
let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in {
@@ -4517,47 +4348,47 @@ let Predicates = [HasAVX512] in {
def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
(VMOV64toPQIZrr GR64:$src)>;
- def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
- (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
- (SUBREG_TO_REG (i64 0), (v2i64 (VMOV64toPQIZrr GR64:$src)), sub_xmm)>;
-
- def : Pat<(v8i64 (X86vzmovl (insert_subvector undef,
- (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
- (SUBREG_TO_REG (i64 0), (v2i64 (VMOV64toPQIZrr GR64:$src)), sub_xmm)>;
-
// AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))),
(VMOVDI2PDIZrm addr:$src)>;
- def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
- (VMOVDI2PDIZrm addr:$src)>;
- def : Pat<(v4i32 (X86vzmovl (loadv4i32 addr:$src))),
- (VMOVDI2PDIZrm addr:$src)>;
- def : Pat<(v4i32 (X86vzload addr:$src)),
+ def : Pat<(v4i32 (X86vzload32 addr:$src)),
(VMOVDI2PDIZrm addr:$src)>;
- def : Pat<(v8i32 (X86vzload addr:$src)),
+ def : Pat<(v8i32 (X86vzload32 addr:$src)),
(SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
- def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
- (VMOVQI2PQIZrm addr:$src)>;
def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
(VMOVZPQILo2PQIZrr VR128X:$src)>;
- def : Pat<(v2i64 (X86vzload addr:$src)),
+ def : Pat<(v2i64 (X86vzload64 addr:$src)),
(VMOVQI2PQIZrm addr:$src)>;
- def : Pat<(v4i64 (X86vzload addr:$src)),
+ def : Pat<(v4i64 (X86vzload64 addr:$src)),
(SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
- // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext.
- def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
- (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
- (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrr GR32:$src)), sub_xmm)>;
- def : Pat<(v16i32 (X86vzmovl (insert_subvector undef,
- (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
- (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrr GR32:$src)), sub_xmm)>;
-
// Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
- def : Pat<(v16i32 (X86vzload addr:$src)),
+ def : Pat<(v16i32 (X86vzload32 addr:$src)),
(SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
- def : Pat<(v8i64 (X86vzload addr:$src)),
+ def : Pat<(v8i64 (X86vzload64 addr:$src)),
(SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
+
+ def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
+ (SUBREG_TO_REG (i32 0),
+ (v2f64 (VMOVZPQILo2PQIZrr
+ (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))),
+ sub_xmm)>;
+ def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
+ (SUBREG_TO_REG (i32 0),
+ (v2i64 (VMOVZPQILo2PQIZrr
+ (v2i64 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)))),
+ sub_xmm)>;
+
+ def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
+ (SUBREG_TO_REG (i32 0),
+ (v2f64 (VMOVZPQILo2PQIZrr
+ (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))),
+ sub_xmm)>;
+ def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
+ (SUBREG_TO_REG (i32 0),
+ (v2i64 (VMOVZPQILo2PQIZrr
+ (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)))),
+ sub_xmm)>;
}
//===----------------------------------------------------------------------===//
@@ -4686,7 +4517,7 @@ multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode _.RC:$src1, _.RC:$src2)),
- IsCommutable>, AVX512BIBase, EVEX_4V,
+ IsCommutable, IsCommutable>, AVX512BIBase, EVEX_4V,
Sched<[sched]>;
defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
@@ -4922,7 +4753,7 @@ multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
(_Dst.VT (OpNode
(_Src.VT _Src.RC:$src1),
(_Src.VT _Src.RC:$src2))),
- IsCommutable>,
+ IsCommutable, IsCommutable>,
EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V, Sched<[sched]>;
defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
(ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
@@ -5458,16 +5289,14 @@ multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
- (_.VT (VecNode _.RC:$src1, _.RC:$src2,
- (i32 FROUND_CURRENT)))>,
+ (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
Sched<[sched]>;
defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (VecNode _.RC:$src1,
- _.ScalarIntMemCPat:$src2,
- (i32 FROUND_CURRENT)))>,
+ _.ScalarIntMemCPat:$src2))>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
@@ -5495,7 +5324,7 @@ multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo
(ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
"$rc, $src2, $src1", "$src1, $src2, $rc",
(VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
- (i32 imm:$rc)), IsCommutable>,
+ (i32 timm:$rc))>,
EVEX_B, EVEX_RC, Sched<[sched]>;
}
multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
@@ -5534,23 +5363,22 @@ multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
"{sae}, $src2, $src1", "$src1, $src2, {sae}",
- (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
- (i32 FROUND_NO_EXC))>, EVEX_B,
- Sched<[sched]>;
+ (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
+ EVEX_B, Sched<[sched]>;
}
}
multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
- SDNode VecNode, X86SchedWriteSizes sched,
- bit IsCommutable> {
+ SDNode VecNode, SDNode RndNode,
+ X86SchedWriteSizes sched, bit IsCommutable> {
defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
sched.PS.Scl, IsCommutable>,
- avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, VecNode,
+ avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, RndNode,
sched.PS.Scl, IsCommutable>,
XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
sched.PD.Scl, IsCommutable>,
- avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, VecNode,
+ avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, RndNode,
sched.PD.Scl, IsCommutable>,
XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
}
@@ -5565,17 +5393,17 @@ multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
VecNode, SaeNode, sched.PD.Scl, IsCommutable>,
XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
}
-defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86faddRnds,
+defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86fadds, X86faddRnds,
SchedWriteFAddSizes, 1>;
-defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmulRnds,
+defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmuls, X86fmulRnds,
SchedWriteFMulSizes, 1>;
-defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubRnds,
+defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubs, X86fsubRnds,
SchedWriteFAddSizes, 0>;
-defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivRnds,
+defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivs, X86fdivRnds,
SchedWriteFDivSizes, 0>;
-defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminRnds,
+defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminSAEs,
SchedWriteFCmpSizes, 0>;
-defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxRnds,
+defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxSAEs,
SchedWriteFCmpSizes, 0>;
// MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use
@@ -5618,13 +5446,13 @@ defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
X86VectorVTInfo _, X86FoldableSchedWrite sched,
bit IsCommutable,
- bit IsKZCommutable = IsCommutable> {
+ bit IsKCommutable = IsCommutable> {
let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
"$src2, $src1", "$src1, $src2",
- (_.VT (OpNode _.RC:$src1, _.RC:$src2)), IsCommutable, 0,
- IsKZCommutable>,
+ (_.VT (OpNode _.RC:$src1, _.RC:$src2)), IsCommutable,
+ IsKCommutable, IsKCommutable>,
EVEX_4V, Sched<[sched]>;
let mayLoad = 1 in {
defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
@@ -5651,18 +5479,18 @@ multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr,
defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr##_.Suffix,
"$rc, $src2, $src1", "$src1, $src2, $rc",
- (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 imm:$rc)))>,
+ (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 timm:$rc)))>,
EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
}
multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr,
- SDPatternOperator OpNodeRnd,
+ SDPatternOperator OpNodeSAE,
X86FoldableSchedWrite sched, X86VectorVTInfo _> {
let ExeDomain = _.ExeDomain in
defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
"{sae}, $src2, $src1", "$src1, $src2, {sae}",
- (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 FROUND_NO_EXC)))>,
+ (_.VT (OpNodeSAE _.RC:$src1, _.RC:$src2))>,
EVEX_4V, EVEX_B, Sched<[sched]>;
}
@@ -5731,10 +5559,10 @@ defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv, HasAVX512,
avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>;
defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512,
SchedWriteFCmpSizes, 0>,
- avx512_fp_binop_p_sae<0x5D, "vmin", X86fminRnd, SchedWriteFCmpSizes>;
+ avx512_fp_binop_p_sae<0x5D, "vmin", X86fminSAE, SchedWriteFCmpSizes>;
defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, HasAVX512,
SchedWriteFCmpSizes, 0>,
- avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxRnd, SchedWriteFCmpSizes>;
+ avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxSAE, SchedWriteFCmpSizes>;
let isCodeGenOnly = 1 in {
defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512,
SchedWriteFCmpSizes, 1>;
@@ -5750,71 +5578,25 @@ defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI,
defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI,
SchedWriteFLogicSizes, 1>;
-let Predicates = [HasVLX,HasDQI] in {
- // Use packed logical operations for scalar ops.
- def : Pat<(f64 (X86fand FR64X:$src1, FR64X:$src2)),
- (COPY_TO_REGCLASS
- (v2f64 (VANDPDZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)),
- (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)))),
- FR64X)>;
- def : Pat<(f64 (X86for FR64X:$src1, FR64X:$src2)),
- (COPY_TO_REGCLASS
- (v2f64 (VORPDZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)),
- (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)))),
- FR64X)>;
- def : Pat<(f64 (X86fxor FR64X:$src1, FR64X:$src2)),
- (COPY_TO_REGCLASS
- (v2f64 (VXORPDZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)),
- (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)))),
- FR64X)>;
- def : Pat<(f64 (X86fandn FR64X:$src1, FR64X:$src2)),
- (COPY_TO_REGCLASS
- (v2f64 (VANDNPDZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)),
- (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)))),
- FR64X)>;
-
- def : Pat<(f32 (X86fand FR32X:$src1, FR32X:$src2)),
- (COPY_TO_REGCLASS
- (v4f32 (VANDPSZ128rr (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)),
- (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)))),
- FR32X)>;
- def : Pat<(f32 (X86for FR32X:$src1, FR32X:$src2)),
- (COPY_TO_REGCLASS
- (v4f32 (VORPSZ128rr (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)),
- (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)))),
- FR32X)>;
- def : Pat<(f32 (X86fxor FR32X:$src1, FR32X:$src2)),
- (COPY_TO_REGCLASS
- (v4f32 (VXORPSZ128rr (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)),
- (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)))),
- FR32X)>;
- def : Pat<(f32 (X86fandn FR32X:$src1, FR32X:$src2)),
- (COPY_TO_REGCLASS
- (v4f32 (VANDNPSZ128rr (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)),
- (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)))),
- FR32X)>;
-}
-
multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86FoldableSchedWrite sched, X86VectorVTInfo _> {
let ExeDomain = _.ExeDomain in {
defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
"$src2, $src1", "$src1, $src2",
- (_.VT (OpNode _.RC:$src1, _.RC:$src2, (i32 FROUND_CURRENT)))>,
+ (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
EVEX_4V, Sched<[sched]>;
defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
"$src2, $src1", "$src1, $src2",
- (OpNode _.RC:$src1, (_.LdFrag addr:$src2), (i32 FROUND_CURRENT))>,
+ (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
"${src2}"##_.BroadcastStr##", $src1",
"$src1, ${src2}"##_.BroadcastStr,
(OpNode _.RC:$src1, (_.VT (X86VBroadcast
- (_.ScalarLdFrag addr:$src2))),
- (i32 FROUND_CURRENT))>,
+ (_.ScalarLdFrag addr:$src2))))>,
EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -5825,332 +5607,139 @@ multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
"$src2, $src1", "$src1, $src2",
- (_.VT (OpNode _.RC:$src1, _.RC:$src2, (i32 FROUND_CURRENT)))>,
+ (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
Sched<[sched]>;
defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr##_.Suffix,
"$src2, $src1", "$src1, $src2",
- (OpNode _.RC:$src1, _.ScalarIntMemCPat:$src2,
- (i32 FROUND_CURRENT))>,
+ (OpNode _.RC:$src1, _.ScalarIntMemCPat:$src2)>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr,
- SDNode OpNode, SDNode OpNodeScal,
X86SchedWriteWidths sched> {
- defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.ZMM, v16f32_info>,
- avx512_fp_round_packed<opc, OpcodeStr, OpNode, sched.ZMM, v16f32_info>,
+ defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v16f32_info>,
+ avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v16f32_info>,
EVEX_V512, EVEX_CD8<32, CD8VF>;
- defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.ZMM, v8f64_info>,
- avx512_fp_round_packed<opc, OpcodeStr, OpNode, sched.ZMM, v8f64_info>,
+ defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v8f64_info>,
+ avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v8f64_info>,
EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
- defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNodeScal, sched.Scl, f32x_info>,
- avx512_fp_scalar_round<opcScaler, OpcodeStr##"ss", f32x_info, OpNodeScal, sched.Scl>,
- EVEX_4V,EVEX_CD8<32, CD8VT1>;
- defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNodeScal, sched.Scl, f64x_info>,
- avx512_fp_scalar_round<opcScaler, OpcodeStr##"sd", f64x_info, OpNodeScal, sched.Scl>,
- EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
+ defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f32x_info>,
+ avx512_fp_scalar_round<opcScaler, OpcodeStr##"ss", f32x_info,
+ X86scalefsRnd, sched.Scl>,
+ EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
+ defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f64x_info>,
+ avx512_fp_scalar_round<opcScaler, OpcodeStr##"sd", f64x_info,
+ X86scalefsRnd, sched.Scl>,
+ EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>, VEX_W;
// Define only if AVX512VL feature is present.
let Predicates = [HasVLX] in {
- defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.XMM, v4f32x_info>,
+ defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v4f32x_info>,
EVEX_V128, EVEX_CD8<32, CD8VF>;
- defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.YMM, v8f32x_info>,
+ defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v8f32x_info>,
EVEX_V256, EVEX_CD8<32, CD8VF>;
- defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.XMM, v2f64x_info>,
+ defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v2f64x_info>,
EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
- defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.YMM, v4f64x_info>,
+ defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v4f64x_info>,
EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
}
}
-defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", X86scalef, X86scalefs,
+defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef",
SchedWriteFAdd>, T8PD, NotEVEX2VEXConvertible;
//===----------------------------------------------------------------------===//
// AVX-512 VPTESTM instructions
//===----------------------------------------------------------------------===//
-multiclass avx512_vptest<bits<8> opc, string OpcodeStr, PatFrag OpNode,
+multiclass avx512_vptest<bits<8> opc, string OpcodeStr,
X86FoldableSchedWrite sched, X86VectorVTInfo _,
string Name> {
- let ExeDomain = _.ExeDomain in {
- let isCommutable = 1 in
+ // NOTE: Patterns are omitted in favor of manual selection in X86ISelDAGToDAG.
+ // There are just too many permuations due to commutability and bitcasts.
+ let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
- (OpNode (and _.RC:$src1, _.RC:$src2), _.ImmAllZerosV)>,
+ (null_frag), (null_frag), 1>,
EVEX_4V, Sched<[sched]>;
+ let mayLoad = 1 in
defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
(ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
- (OpNode (and _.RC:$src1, (_.LdFrag addr:$src2)),
- _.ImmAllZerosV)>,
+ (null_frag), (null_frag)>,
EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
-
- // Patterns for compare with 0 that just use the same source twice.
- def : Pat<(_.KVT (OpNode _.RC:$src, _.ImmAllZerosV)),
- (_.KVT (!cast<Instruction>(Name # _.ZSuffix # "rr")
- _.RC:$src, _.RC:$src))>;
-
- def : Pat<(_.KVT (and _.KRC:$mask, (OpNode _.RC:$src, _.ImmAllZerosV))),
- (_.KVT (!cast<Instruction>(Name # _.ZSuffix # "rrk")
- _.KRC:$mask, _.RC:$src, _.RC:$src))>;
}
-multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr, PatFrag OpNode,
+multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr,
X86FoldableSchedWrite sched, X86VectorVTInfo _> {
- let ExeDomain = _.ExeDomain in
+ let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in
defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
"${src2}"##_.BroadcastStr##", $src1",
"$src1, ${src2}"##_.BroadcastStr,
- (OpNode (and _.RC:$src1,
- (X86VBroadcast
- (_.ScalarLdFrag addr:$src2))),
- _.ImmAllZerosV)>,
+ (null_frag), (null_frag)>,
EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
-// Use 512bit version to implement 128/256 bit in case NoVLX.
-multiclass avx512_vptest_lowering<PatFrag OpNode, X86VectorVTInfo ExtendInfo,
- X86VectorVTInfo _, string Name> {
- def : Pat<(_.KVT (OpNode (and _.RC:$src1, _.RC:$src2),
- _.ImmAllZerosV)),
- (_.KVT (COPY_TO_REGCLASS
- (!cast<Instruction>(Name # "Zrr")
- (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
- _.RC:$src1, _.SubRegIdx),
- (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
- _.RC:$src2, _.SubRegIdx)),
- _.KRC))>;
-
- def : Pat<(_.KVT (and _.KRC:$mask,
- (OpNode (and _.RC:$src1, _.RC:$src2),
- _.ImmAllZerosV))),
- (COPY_TO_REGCLASS
- (!cast<Instruction>(Name # "Zrrk")
- (COPY_TO_REGCLASS _.KRC:$mask, ExtendInfo.KRC),
- (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
- _.RC:$src1, _.SubRegIdx),
- (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
- _.RC:$src2, _.SubRegIdx)),
- _.KRC)>;
-
- def : Pat<(_.KVT (OpNode _.RC:$src, _.ImmAllZerosV)),
- (_.KVT (COPY_TO_REGCLASS
- (!cast<Instruction>(Name # "Zrr")
- (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
- _.RC:$src, _.SubRegIdx),
- (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
- _.RC:$src, _.SubRegIdx)),
- _.KRC))>;
-
- def : Pat<(_.KVT (and _.KRC:$mask, (OpNode _.RC:$src, _.ImmAllZerosV))),
- (COPY_TO_REGCLASS
- (!cast<Instruction>(Name # "Zrrk")
- (COPY_TO_REGCLASS _.KRC:$mask, ExtendInfo.KRC),
- (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
- _.RC:$src, _.SubRegIdx),
- (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
- _.RC:$src, _.SubRegIdx)),
- _.KRC)>;
-}
-
-multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr, PatFrag OpNode,
- X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
+multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr,
+ X86SchedWriteWidths sched,
+ AVX512VLVectorVTInfo _> {
let Predicates = [HasAVX512] in
- defm Z : avx512_vptest<opc, OpcodeStr, OpNode, sched.ZMM, _.info512, NAME>,
- avx512_vptest_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512;
+ defm Z : avx512_vptest<opc, OpcodeStr, sched.ZMM, _.info512, NAME>,
+ avx512_vptest_mb<opc, OpcodeStr, sched.ZMM, _.info512>, EVEX_V512;
let Predicates = [HasAVX512, HasVLX] in {
- defm Z256 : avx512_vptest<opc, OpcodeStr, OpNode, sched.YMM, _.info256, NAME>,
- avx512_vptest_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256;
- defm Z128 : avx512_vptest<opc, OpcodeStr, OpNode, sched.XMM, _.info128, NAME>,
- avx512_vptest_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128;
- }
- let Predicates = [HasAVX512, NoVLX] in {
- defm Z256_Alt : avx512_vptest_lowering< OpNode, _.info512, _.info256, NAME>;
- defm Z128_Alt : avx512_vptest_lowering< OpNode, _.info512, _.info128, NAME>;
+ defm Z256 : avx512_vptest<opc, OpcodeStr, sched.YMM, _.info256, NAME>,
+ avx512_vptest_mb<opc, OpcodeStr, sched.YMM, _.info256>, EVEX_V256;
+ defm Z128 : avx512_vptest<opc, OpcodeStr, sched.XMM, _.info128, NAME>,
+ avx512_vptest_mb<opc, OpcodeStr, sched.XMM, _.info128>, EVEX_V128;
}
}
-multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr, PatFrag OpNode,
+multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr,
X86SchedWriteWidths sched> {
- defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", OpNode, sched,
+ defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", sched,
avx512vl_i32_info>;
- defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", OpNode, sched,
+ defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", sched,
avx512vl_i64_info>, VEX_W;
}
multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
- PatFrag OpNode, X86SchedWriteWidths sched> {
+ X86SchedWriteWidths sched> {
let Predicates = [HasBWI] in {
- defm WZ: avx512_vptest<opc, OpcodeStr#"w", OpNode, sched.ZMM,
+ defm WZ: avx512_vptest<opc, OpcodeStr#"w", sched.ZMM,
v32i16_info, NAME#"W">, EVEX_V512, VEX_W;
- defm BZ: avx512_vptest<opc, OpcodeStr#"b", OpNode, sched.ZMM,
+ defm BZ: avx512_vptest<opc, OpcodeStr#"b", sched.ZMM,
v64i8_info, NAME#"B">, EVEX_V512;
}
let Predicates = [HasVLX, HasBWI] in {
- defm WZ256: avx512_vptest<opc, OpcodeStr#"w", OpNode, sched.YMM,
+ defm WZ256: avx512_vptest<opc, OpcodeStr#"w", sched.YMM,
v16i16x_info, NAME#"W">, EVEX_V256, VEX_W;
- defm WZ128: avx512_vptest<opc, OpcodeStr#"w", OpNode, sched.XMM,
+ defm WZ128: avx512_vptest<opc, OpcodeStr#"w", sched.XMM,
v8i16x_info, NAME#"W">, EVEX_V128, VEX_W;
- defm BZ256: avx512_vptest<opc, OpcodeStr#"b", OpNode, sched.YMM,
+ defm BZ256: avx512_vptest<opc, OpcodeStr#"b", sched.YMM,
v32i8x_info, NAME#"B">, EVEX_V256;
- defm BZ128: avx512_vptest<opc, OpcodeStr#"b", OpNode, sched.XMM,
+ defm BZ128: avx512_vptest<opc, OpcodeStr#"b", sched.XMM,
v16i8x_info, NAME#"B">, EVEX_V128;
}
-
- let Predicates = [HasBWI, NoVLX] in {
- defm BZ256_Alt : avx512_vptest_lowering<OpNode, v64i8_info, v32i8x_info, NAME#"B">;
- defm BZ128_Alt : avx512_vptest_lowering<OpNode, v64i8_info, v16i8x_info, NAME#"B">;
- defm WZ256_Alt : avx512_vptest_lowering<OpNode, v32i16_info, v16i16x_info, NAME#"W">;
- defm WZ128_Alt : avx512_vptest_lowering<OpNode, v32i16_info, v8i16x_info, NAME#"W">;
- }
}
-// These patterns are used to match vptestm/vptestnm. We don't treat pcmpeqm
-// as commutable here because we already canonicalized all zeros vectors to the
-// RHS during lowering.
-def X86pcmpeqm : PatFrag<(ops node:$src1, node:$src2),
- (setcc node:$src1, node:$src2, SETEQ)>;
-def X86pcmpnem : PatFrag<(ops node:$src1, node:$src2),
- (setcc node:$src1, node:$src2, SETNE)>;
-
multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr,
- PatFrag OpNode, X86SchedWriteWidths sched> :
- avx512_vptest_wb<opc_wb, OpcodeStr, OpNode, sched>,
- avx512_vptest_dq<opc_dq, OpcodeStr, OpNode, sched>;
+ X86SchedWriteWidths sched> :
+ avx512_vptest_wb<opc_wb, OpcodeStr, sched>,
+ avx512_vptest_dq<opc_dq, OpcodeStr, sched>;
-defm VPTESTM : avx512_vptest_all_forms<0x26, 0x27, "vptestm", X86pcmpnem,
+defm VPTESTM : avx512_vptest_all_forms<0x26, 0x27, "vptestm",
SchedWriteVecLogic>, T8PD;
-defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm", X86pcmpeqm,
+defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm",
SchedWriteVecLogic>, T8XS;
-
-multiclass avx512_vptest_lowering_pats<string InstrStr, PatFrag OpNode,
- X86VectorVTInfo _,
- X86VectorVTInfo AndInfo> {
- def : Pat<(_.KVT (OpNode (bitconvert
- (AndInfo.VT (and _.RC:$src1, _.RC:$src2))),
- _.ImmAllZerosV)),
- (!cast<Instruction>(InstrStr # "rr") _.RC:$src1, _.RC:$src2)>;
-
- def : Pat<(_.KVT (and _.KRC:$mask,
- (OpNode (bitconvert
- (AndInfo.VT (and _.RC:$src1, _.RC:$src2))),
- _.ImmAllZerosV))),
- (!cast<Instruction>(InstrStr # "rrk") _.KRC:$mask, _.RC:$src1,
- _.RC:$src2)>;
-
- def : Pat<(_.KVT (OpNode (bitconvert
- (AndInfo.VT (and _.RC:$src1,
- (AndInfo.LdFrag addr:$src2)))),
- _.ImmAllZerosV)),
- (!cast<Instruction>(InstrStr # "rm") _.RC:$src1, addr:$src2)>;
-
- def : Pat<(_.KVT (and _.KRC:$mask,
- (OpNode (bitconvert
- (AndInfo.VT (and _.RC:$src1,
- (AndInfo.LdFrag addr:$src2)))),
- _.ImmAllZerosV))),
- (!cast<Instruction>(InstrStr # "rmk") _.KRC:$mask, _.RC:$src1,
- addr:$src2)>;
-}
-
-// Patterns to use 512-bit instructions when 128/256 are not available.
-multiclass avx512_vptest_lowering_wide_pats<string InstrStr, PatFrag OpNode,
- X86VectorVTInfo _,
- X86VectorVTInfo AndInfo,
- X86VectorVTInfo ExtendInfo> {
- def : Pat<(_.KVT (OpNode (bitconvert
- (AndInfo.VT (and _.RC:$src1, _.RC:$src2))),
- _.ImmAllZerosV)),
- (_.KVT (COPY_TO_REGCLASS
- (!cast<Instruction>(InstrStr#"rr")
- (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
- _.RC:$src1, _.SubRegIdx),
- (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
- _.RC:$src2, _.SubRegIdx)),
- _.KRC))>;
-
- def : Pat<(_.KVT (and _.KRC:$mask,
- (OpNode (bitconvert
- (AndInfo.VT (and _.RC:$src1, _.RC:$src2))),
- _.ImmAllZerosV))),
- (COPY_TO_REGCLASS
- (!cast<Instruction>(InstrStr#"rrk")
- (COPY_TO_REGCLASS _.KRC:$mask, ExtendInfo.KRC),
- (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
- _.RC:$src1, _.SubRegIdx),
- (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
- _.RC:$src2, _.SubRegIdx)),
- _.KRC)>;
-}
-
-multiclass avx512_vptest_lowering_sizes<string InstrStr, PatFrag OpNode,
- Predicate prd,
- AVX512VLVectorVTInfo CmpInfo,
- AVX512VLVectorVTInfo AndInfo> {
-let Predicates = [prd, HasVLX] in {
- defm : avx512_vptest_lowering_pats<InstrStr#"Z128", OpNode,
- CmpInfo.info128, AndInfo.info128>;
- defm : avx512_vptest_lowering_pats<InstrStr#"Z256", OpNode,
- CmpInfo.info256, AndInfo.info256>;
-}
-let Predicates = [prd] in {
- defm : avx512_vptest_lowering_pats<InstrStr#"Z", OpNode,
- CmpInfo.info512, AndInfo.info512>;
-}
-
-let Predicates = [prd, NoVLX] in {
- defm : avx512_vptest_lowering_wide_pats<InstrStr#"Z", OpNode,
- CmpInfo.info128, AndInfo.info128,
- CmpInfo.info512>;
- defm : avx512_vptest_lowering_wide_pats<InstrStr#"Z", OpNode,
- CmpInfo.info256, AndInfo.info256,
- CmpInfo.info512>;
-}
-}
-
-multiclass avx512_vptest_lowering_types<string InstrStr, PatFrag OpNode> {
- defm : avx512_vptest_lowering_sizes<InstrStr # "B", OpNode, HasBWI,
- avx512vl_i8_info, avx512vl_i16_info>;
- defm : avx512_vptest_lowering_sizes<InstrStr # "B", OpNode, HasBWI,
- avx512vl_i8_info, avx512vl_i32_info>;
- defm : avx512_vptest_lowering_sizes<InstrStr # "B", OpNode, HasBWI,
- avx512vl_i8_info, avx512vl_i64_info>;
-
- defm : avx512_vptest_lowering_sizes<InstrStr # "W", OpNode, HasBWI,
- avx512vl_i16_info, avx512vl_i8_info>;
- defm : avx512_vptest_lowering_sizes<InstrStr # "W", OpNode, HasBWI,
- avx512vl_i16_info, avx512vl_i32_info>;
- defm : avx512_vptest_lowering_sizes<InstrStr # "W", OpNode, HasBWI,
- avx512vl_i16_info, avx512vl_i64_info>;
-
- defm : avx512_vptest_lowering_sizes<InstrStr # "D", OpNode, HasAVX512,
- avx512vl_i32_info, avx512vl_i8_info>;
- defm : avx512_vptest_lowering_sizes<InstrStr # "D", OpNode, HasAVX512,
- avx512vl_i32_info, avx512vl_i16_info>;
- defm : avx512_vptest_lowering_sizes<InstrStr # "D", OpNode, HasAVX512,
- avx512vl_i32_info, avx512vl_i64_info>;
-
- defm : avx512_vptest_lowering_sizes<InstrStr # "Q", OpNode, HasAVX512,
- avx512vl_i64_info, avx512vl_i8_info>;
- defm : avx512_vptest_lowering_sizes<InstrStr # "Q", OpNode, HasAVX512,
- avx512vl_i64_info, avx512vl_i16_info>;
- defm : avx512_vptest_lowering_sizes<InstrStr # "Q", OpNode, HasAVX512,
- avx512vl_i64_info, avx512vl_i32_info>;
-}
-
-defm : avx512_vptest_lowering_types<"VPTESTM", X86pcmpnem>;
-defm : avx512_vptest_lowering_types<"VPTESTNM", X86pcmpeqm>;
-
//===----------------------------------------------------------------------===//
// AVX-512 Shift instructions
//===----------------------------------------------------------------------===//
@@ -6427,86 +6016,23 @@ multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
}
}
-defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", shl, SchedWriteVarVecShift>,
- avx512_var_shift_w<0x12, "vpsllvw", shl, SchedWriteVarVecShift>;
+defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", X86vshlv, SchedWriteVarVecShift>,
+ avx512_var_shift_w<0x12, "vpsllvw", X86vshlv, SchedWriteVarVecShift>;
-defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", sra, SchedWriteVarVecShift>,
- avx512_var_shift_w<0x11, "vpsravw", sra, SchedWriteVarVecShift>;
+defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", X86vsrav, SchedWriteVarVecShift>,
+ avx512_var_shift_w<0x11, "vpsravw", X86vsrav, SchedWriteVarVecShift>;
-defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", srl, SchedWriteVarVecShift>,
- avx512_var_shift_w<0x10, "vpsrlvw", srl, SchedWriteVarVecShift>;
+defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", X86vsrlv, SchedWriteVarVecShift>,
+ avx512_var_shift_w<0x10, "vpsrlvw", X86vsrlv, SchedWriteVarVecShift>;
defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SchedWriteVarVecShift>;
defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SchedWriteVarVecShift>;
-defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", sra, [HasAVX512, NoVLX]>;
-defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", shl, [HasBWI, NoVLX]>;
-defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", sra, [HasBWI, NoVLX]>;
-defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", srl, [HasBWI, NoVLX]>;
+defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", X86vsrav, [HasAVX512, NoVLX]>;
+defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", X86vshlv, [HasBWI, NoVLX]>;
+defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", X86vsrav, [HasBWI, NoVLX]>;
+defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", X86vsrlv, [HasBWI, NoVLX]>;
-// Special handing for handling VPSRAV intrinsics.
-multiclass avx512_var_shift_int_lowering<string InstrStr, X86VectorVTInfo _,
- list<Predicate> p> {
- let Predicates = p in {
- def : Pat<(_.VT (X86vsrav _.RC:$src1, _.RC:$src2)),
- (!cast<Instruction>(InstrStr#_.ZSuffix#rr) _.RC:$src1,
- _.RC:$src2)>;
- def : Pat<(_.VT (X86vsrav _.RC:$src1, (_.LdFrag addr:$src2))),
- (!cast<Instruction>(InstrStr#_.ZSuffix##rm)
- _.RC:$src1, addr:$src2)>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (X86vsrav _.RC:$src1, _.RC:$src2), _.RC:$src0)),
- (!cast<Instruction>(InstrStr#_.ZSuffix#rrk) _.RC:$src0,
- _.KRC:$mask, _.RC:$src1, _.RC:$src2)>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (X86vsrav _.RC:$src1, (_.LdFrag addr:$src2)),
- _.RC:$src0)),
- (!cast<Instruction>(InstrStr#_.ZSuffix##rmk) _.RC:$src0,
- _.KRC:$mask, _.RC:$src1, addr:$src2)>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (X86vsrav _.RC:$src1, _.RC:$src2), _.ImmAllZerosV)),
- (!cast<Instruction>(InstrStr#_.ZSuffix#rrkz) _.KRC:$mask,
- _.RC:$src1, _.RC:$src2)>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (X86vsrav _.RC:$src1, (_.LdFrag addr:$src2)),
- _.ImmAllZerosV)),
- (!cast<Instruction>(InstrStr#_.ZSuffix##rmkz) _.KRC:$mask,
- _.RC:$src1, addr:$src2)>;
- }
-}
-
-multiclass avx512_var_shift_int_lowering_mb<string InstrStr, X86VectorVTInfo _,
- list<Predicate> p> :
- avx512_var_shift_int_lowering<InstrStr, _, p> {
- let Predicates = p in {
- def : Pat<(_.VT (X86vsrav _.RC:$src1,
- (X86VBroadcast (_.ScalarLdFrag addr:$src2)))),
- (!cast<Instruction>(InstrStr#_.ZSuffix##rmb)
- _.RC:$src1, addr:$src2)>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (X86vsrav _.RC:$src1,
- (X86VBroadcast (_.ScalarLdFrag addr:$src2))),
- _.RC:$src0)),
- (!cast<Instruction>(InstrStr#_.ZSuffix##rmbk) _.RC:$src0,
- _.KRC:$mask, _.RC:$src1, addr:$src2)>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (X86vsrav _.RC:$src1,
- (X86VBroadcast (_.ScalarLdFrag addr:$src2))),
- _.ImmAllZerosV)),
- (!cast<Instruction>(InstrStr#_.ZSuffix##rmbkz) _.KRC:$mask,
- _.RC:$src1, addr:$src2)>;
- }
-}
-
-defm : avx512_var_shift_int_lowering<"VPSRAVW", v8i16x_info, [HasVLX, HasBWI]>;
-defm : avx512_var_shift_int_lowering<"VPSRAVW", v16i16x_info, [HasVLX, HasBWI]>;
-defm : avx512_var_shift_int_lowering<"VPSRAVW", v32i16_info, [HasBWI]>;
-defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v4i32x_info, [HasVLX]>;
-defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v8i32x_info, [HasVLX]>;
-defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v16i32_info, [HasAVX512]>;
-defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v2i64x_info, [HasVLX]>;
-defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v4i64x_info, [HasVLX]>;
-defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v8i64_info, [HasAVX512]>;
// Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
let Predicates = [HasAVX512, NoVLX] in {
@@ -6827,17 +6353,20 @@ let Predicates = [HasAVX512] in {
def : Pat<(v2f64 (X86Unpckl VR128X:$src1,
(bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))),
(VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
+ def : Pat<(v2f64 (X86Unpckl VR128X:$src1, (X86vzload64 addr:$src2))),
+ (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
+
+ // VMOVLPD patterns
+ def : Pat<(v2f64 (X86Movsd VR128X:$src1, (X86vzload64 addr:$src2))),
+ (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
}
let SchedRW = [WriteFStore] in {
+let mayStore = 1, hasSideEffects = 0 in
def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs),
(ins f64mem:$dst, VR128X:$src),
"vmovhps\t{$src, $dst|$dst, $src}",
- [(store (f64 (extractelt
- (X86Unpckh (bc_v2f64 (v4f32 VR128X:$src)),
- (bc_v2f64 (v4f32 VR128X:$src))),
- (iPTR 0))), addr:$dst)]>,
- EVEX, EVEX_CD8<32, CD8VT2>;
+ []>, EVEX, EVEX_CD8<32, CD8VT2>;
def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs),
(ins f64mem:$dst, VR128X:$src),
"vmovhpd\t{$src, $dst|$dst, $src}",
@@ -6845,12 +6374,11 @@ def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs),
(v2f64 (X86Unpckh VR128X:$src, VR128X:$src)),
(iPTR 0))), addr:$dst)]>,
EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
+let mayStore = 1, hasSideEffects = 0 in
def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs),
(ins f64mem:$dst, VR128X:$src),
"vmovlps\t{$src, $dst|$dst, $src}",
- [(store (f64 (extractelt (bc_v2f64 (v4f32 VR128X:$src)),
- (iPTR 0))), addr:$dst)]>,
- EVEX, EVEX_CD8<32, CD8VT2>;
+ []>, EVEX, EVEX_CD8<32, CD8VT2>;
def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs),
(ins f64mem:$dst, VR128X:$src),
"vmovlpd\t{$src, $dst|$dst, $src}",
@@ -6903,7 +6431,7 @@ multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
- (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 imm:$rc))), 1, 1>,
+ (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 1, 1>,
AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
}
@@ -6978,7 +6506,7 @@ multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
- (_.VT ( OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 imm:$rc))),
+ (_.VT ( OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc))),
1, 1, vselect, 1>,
AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
}
@@ -7056,7 +6584,7 @@ multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
- (_.VT ( OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 imm:$rc))),
+ (_.VT ( OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 timm:$rc))),
1, 1, vselect, 1>,
AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
}
@@ -7132,7 +6660,7 @@ let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
def rb : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
(ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3, AVX512RC:$rc),
!strconcat(OpcodeStr,
- "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ "\t{$rc, $src3, $src2, $dst|$dst, $src2, $src3, $rc}"),
!if(MaskOnlyReg, [], [RHS_b])>, EVEX_B, EVEX_RC,
Sched<[SchedWriteFMA.Scl]>;
}// isCodeGenOnly = 1
@@ -7151,7 +6679,7 @@ multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
(set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
(_.ScalarLdFrag addr:$src3)))),
(set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src1,
- _.FRC:$src3, (i32 imm:$rc)))), 0>;
+ _.FRC:$src3, (i32 timm:$rc)))), 0>;
defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _,
(set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3,
@@ -7159,7 +6687,7 @@ multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
(set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2,
(_.ScalarLdFrag addr:$src3), _.FRC:$src1))),
(set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src3,
- _.FRC:$src1, (i32 imm:$rc)))), 1>;
+ _.FRC:$src1, (i32 timm:$rc)))), 1>;
// One pattern is 312 order so that the load is in a different place from the
// 213 and 231 patterns this helps tablegen's duplicate pattern detection.
@@ -7169,7 +6697,7 @@ multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
(set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3),
_.FRC:$src1, _.FRC:$src2))),
(set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src1, _.FRC:$src3,
- _.FRC:$src2, (i32 imm:$rc)))), 1>;
+ _.FRC:$src2, (i32 timm:$rc)))), 1>;
}
}
@@ -7333,62 +6861,62 @@ multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix,
def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
(RndOp _.FRC:$src2,
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
- _.FRC:$src3, (i32 imm:$rc)))))),
+ _.FRC:$src3, (i32 timm:$rc)))))),
(!cast<I>(Prefix#"213"#Suffix#"Zrb_Int")
VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
- (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), imm:$rc)>;
+ (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
(RndOp _.FRC:$src2, _.FRC:$src3,
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
- (i32 imm:$rc)))))),
+ (i32 timm:$rc)))))),
(!cast<I>(Prefix#"231"#Suffix#"Zrb_Int")
VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
- (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), imm:$rc)>;
+ (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
(X86selects VK1WM:$mask,
(RndOp _.FRC:$src2,
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
- _.FRC:$src3, (i32 imm:$rc)),
+ _.FRC:$src3, (i32 timm:$rc)),
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
(!cast<I>(Prefix#"213"#Suffix#"Zrb_Intk")
VR128X:$src1, VK1WM:$mask,
(_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
- (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), imm:$rc)>;
+ (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
(X86selects VK1WM:$mask,
(RndOp _.FRC:$src2, _.FRC:$src3,
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
- (i32 imm:$rc)),
+ (i32 timm:$rc)),
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
(!cast<I>(Prefix#"231"#Suffix#"Zrb_Intk")
VR128X:$src1, VK1WM:$mask,
(_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
- (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), imm:$rc)>;
+ (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
(X86selects VK1WM:$mask,
(RndOp _.FRC:$src2,
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
- _.FRC:$src3, (i32 imm:$rc)),
+ _.FRC:$src3, (i32 timm:$rc)),
(_.EltVT ZeroFP)))))),
(!cast<I>(Prefix#"213"#Suffix#"Zrb_Intkz")
VR128X:$src1, VK1WM:$mask,
(_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
- (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), imm:$rc)>;
+ (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
(X86selects VK1WM:$mask,
(RndOp _.FRC:$src2, _.FRC:$src3,
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
- (i32 imm:$rc)),
+ (i32 timm:$rc)),
(_.EltVT ZeroFP)))))),
(!cast<I>(Prefix#"231"#Suffix#"Zrb_Intkz")
VR128X:$src1, VK1WM:$mask,
(_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
- (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), imm:$rc)>;
+ (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
}
}
@@ -7468,44 +6996,44 @@ defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h,
// AVX-512 Scalar convert from sign integer to float/double
//===----------------------------------------------------------------------===//
-multiclass avx512_vcvtsi<bits<8> opc, SDNode OpNode, X86FoldableSchedWrite sched,
+multiclass avx512_vcvtsi<bits<8> opc, SDPatternOperator OpNode, X86FoldableSchedWrite sched,
RegisterClass SrcRC, X86VectorVTInfo DstVT,
- X86MemOperand x86memop, PatFrag ld_frag, string asm> {
- let hasSideEffects = 0 in {
+ X86MemOperand x86memop, PatFrag ld_frag, string asm,
+ string mem> {
+ let hasSideEffects = 0, isCodeGenOnly = 1 in {
def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst),
(ins DstVT.FRC:$src1, SrcRC:$src),
!strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
- EVEX_4V, Sched<[sched]>;
+ EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
let mayLoad = 1 in
def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst),
(ins DstVT.FRC:$src1, x86memop:$src),
- !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
+ asm#"{"#mem#"}\t{$src, $src1, $dst|$dst, $src1, $src}", []>,
EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
} // hasSideEffects = 0
- let isCodeGenOnly = 1 in {
- def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
- (ins DstVT.RC:$src1, SrcRC:$src2),
- !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set DstVT.RC:$dst,
- (OpNode (DstVT.VT DstVT.RC:$src1),
- SrcRC:$src2,
- (i32 FROUND_CURRENT)))]>,
- EVEX_4V, Sched<[sched]>;
-
- def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst),
- (ins DstVT.RC:$src1, x86memop:$src2),
- !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set DstVT.RC:$dst,
- (OpNode (DstVT.VT DstVT.RC:$src1),
- (ld_frag addr:$src2),
- (i32 FROUND_CURRENT)))]>,
- EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
- }//isCodeGenOnly = 1
+ def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
+ (ins DstVT.RC:$src1, SrcRC:$src2),
+ !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set DstVT.RC:$dst,
+ (OpNode (DstVT.VT DstVT.RC:$src1), SrcRC:$src2))]>,
+ EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
+
+ def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst),
+ (ins DstVT.RC:$src1, x86memop:$src2),
+ asm#"{"#mem#"}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set DstVT.RC:$dst,
+ (OpNode (DstVT.VT DstVT.RC:$src1),
+ (ld_frag addr:$src2)))]>,
+ EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ def : InstAlias<"v"#asm#mem#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ (!cast<Instruction>(NAME#"rr_Int") DstVT.RC:$dst,
+ DstVT.RC:$src1, SrcRC:$src2), 0, "att">;
}
multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode,
X86FoldableSchedWrite sched, RegisterClass SrcRC,
- X86VectorVTInfo DstVT, string asm> {
+ X86VectorVTInfo DstVT, string asm,
+ string mem> {
def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
(ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc),
!strconcat(asm,
@@ -7513,37 +7041,44 @@ multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode,
[(set DstVT.RC:$dst,
(OpNode (DstVT.VT DstVT.RC:$src1),
SrcRC:$src2,
- (i32 imm:$rc)))]>,
- EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
+ (i32 timm:$rc)))]>,
+ EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
+ def : InstAlias<"v"#asm#mem#"\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}",
+ (!cast<Instruction>(NAME#"rrb_Int") DstVT.RC:$dst,
+ DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), 0, "att">;
}
-multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode,
+multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, SDNode OpNodeRnd,
X86FoldableSchedWrite sched,
RegisterClass SrcRC, X86VectorVTInfo DstVT,
- X86MemOperand x86memop, PatFrag ld_frag, string asm> {
- defm NAME : avx512_vcvtsi_round<opc, OpNode, sched, SrcRC, DstVT, asm>,
+ X86MemOperand x86memop, PatFrag ld_frag,
+ string asm, string mem> {
+ defm NAME : avx512_vcvtsi_round<opc, OpNodeRnd, sched, SrcRC, DstVT, asm, mem>,
avx512_vcvtsi<opc, OpNode, sched, SrcRC, DstVT, x86memop,
- ld_frag, asm>, VEX_LIG;
+ ld_frag, asm, mem>, VEX_LIG;
}
let Predicates = [HasAVX512] in {
-defm VCVTSI2SSZ : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, WriteCvtI2SS, GR32,
- v4f32x_info, i32mem, loadi32, "cvtsi2ss{l}">,
+defm VCVTSI2SSZ : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
+ WriteCvtI2SS, GR32,
+ v4f32x_info, i32mem, loadi32, "cvtsi2ss", "l">,
XS, EVEX_CD8<32, CD8VT1>;
-defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, WriteCvtI2SS, GR64,
- v4f32x_info, i64mem, loadi64, "cvtsi2ss{q}">,
+defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
+ WriteCvtI2SS, GR64,
+ v4f32x_info, i64mem, loadi64, "cvtsi2ss", "q">,
XS, VEX_W, EVEX_CD8<64, CD8VT1>;
-defm VCVTSI2SDZ : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, WriteCvtI2SD, GR32,
- v2f64x_info, i32mem, loadi32, "cvtsi2sd{l}">,
- XD, EVEX_CD8<32, CD8VT1>;
-defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, WriteCvtI2SD, GR64,
- v2f64x_info, i64mem, loadi64, "cvtsi2sd{q}">,
+defm VCVTSI2SDZ : avx512_vcvtsi<0x2A, null_frag, WriteCvtI2SD, GR32,
+ v2f64x_info, i32mem, loadi32, "cvtsi2sd", "l">,
+ XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
+defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
+ WriteCvtI2SD, GR64,
+ v2f64x_info, i64mem, loadi64, "cvtsi2sd", "q">,
XD, VEX_W, EVEX_CD8<64, CD8VT1>;
def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
- (VCVTSI2SSZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0, "att">;
+ (VCVTSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
- (VCVTSI2SDZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0, "att">;
+ (VCVTSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
(VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
@@ -7563,23 +7098,26 @@ def : Pat<(f64 (sint_to_fp GR32:$src)),
def : Pat<(f64 (sint_to_fp GR64:$src)),
(VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
-defm VCVTUSI2SSZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, WriteCvtI2SS, GR32,
+defm VCVTUSI2SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
+ WriteCvtI2SS, GR32,
v4f32x_info, i32mem, loadi32,
- "cvtusi2ss{l}">, XS, EVEX_CD8<32, CD8VT1>;
-defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, WriteCvtI2SS, GR64,
- v4f32x_info, i64mem, loadi64, "cvtusi2ss{q}">,
+ "cvtusi2ss", "l">, XS, EVEX_CD8<32, CD8VT1>;
+defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
+ WriteCvtI2SS, GR64,
+ v4f32x_info, i64mem, loadi64, "cvtusi2ss", "q">,
XS, VEX_W, EVEX_CD8<64, CD8VT1>;
-defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, X86UintToFpRnd, WriteCvtI2SD, GR32, v2f64x_info,
- i32mem, loadi32, "cvtusi2sd{l}">,
+defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, null_frag, WriteCvtI2SD, GR32, v2f64x_info,
+ i32mem, loadi32, "cvtusi2sd", "l">,
XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
-defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, WriteCvtI2SD, GR64,
- v2f64x_info, i64mem, loadi64, "cvtusi2sd{q}">,
+defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
+ WriteCvtI2SD, GR64,
+ v2f64x_info, i64mem, loadi64, "cvtusi2sd", "q">,
XD, VEX_W, EVEX_CD8<64, CD8VT1>;
def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
- (VCVTUSI2SSZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0, "att">;
+ (VCVTUSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
- (VCVTUSI2SDZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0, "att">;
+ (VCVTUSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
def : Pat<(f32 (uint_to_fp (loadi32 addr:$src))),
(VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
@@ -7608,8 +7146,7 @@ multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
X86VectorVTInfo DstVT, SDNode OpNode,
SDNode OpNodeRnd,
X86FoldableSchedWrite sched, string asm,
- string aliasStr,
- bit CodeGenOnly = 1> {
+ string aliasStr> {
let Predicates = [HasAVX512] in {
def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src),
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
@@ -7617,34 +7154,23 @@ multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
EVEX, VEX_LIG, Sched<[sched]>;
def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc),
!strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
- [(set DstVT.RC:$dst, (OpNodeRnd (SrcVT.VT SrcVT.RC:$src),(i32 imm:$rc)))]>,
+ [(set DstVT.RC:$dst, (OpNodeRnd (SrcVT.VT SrcVT.RC:$src),(i32 timm:$rc)))]>,
EVEX, VEX_LIG, EVEX_B, EVEX_RC,
Sched<[sched]>;
- let isCodeGenOnly = CodeGenOnly, ForceDisassemble = CodeGenOnly in
def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src),
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
[(set DstVT.RC:$dst, (OpNode
(SrcVT.VT SrcVT.ScalarIntMemCPat:$src)))]>,
EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
-
- def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
- (!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">;
- def : InstAlias<"v" # asm # aliasStr # "\t{$rc, $src, $dst|$dst, $src, $rc}",
- (!cast<Instruction>(NAME # "rrb_Int") DstVT.RC:$dst, SrcVT.RC:$src, AVX512RC:$rc), 0, "att">;
} // Predicates = [HasAVX512]
-}
-multiclass avx512_cvt_s_int_round_aliases<bits<8> opc, X86VectorVTInfo SrcVT,
- X86VectorVTInfo DstVT, SDNode OpNode,
- SDNode OpNodeRnd,
- X86FoldableSchedWrite sched, string asm,
- string aliasStr> :
- avx512_cvt_s_int_round<opc, SrcVT, DstVT, OpNode, OpNodeRnd, sched, asm, aliasStr, 0> {
- let Predicates = [HasAVX512] in {
- def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
- (!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst,
- SrcVT.IntScalarMemOp:$src), 0, "att">;
- } // Predicates = [HasAVX512]
+ def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
+ (!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">;
+ def : InstAlias<"v" # asm # aliasStr # "\t{$rc, $src, $dst|$dst, $src, $rc}",
+ (!cast<Instruction>(NAME # "rrb_Int") DstVT.RC:$dst, SrcVT.RC:$src, AVX512RC:$rc), 0, "att">;
+ def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
+ (!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst,
+ SrcVT.IntScalarMemOp:$src), 0, "att">;
}
// Convert float/double to signed/unsigned int 32/64
@@ -7654,10 +7180,10 @@ defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,X86cvts2si,
defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info, X86cvts2si,
X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{q}">,
XS, VEX_W, EVEX_CD8<32, CD8VT1>;
-defm VCVTSS2USIZ: avx512_cvt_s_int_round_aliases<0x79, f32x_info, i32x_info, X86cvts2usi,
+defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, f32x_info, i32x_info, X86cvts2usi,
X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{l}">,
XS, EVEX_CD8<32, CD8VT1>;
-defm VCVTSS2USI64Z: avx512_cvt_s_int_round_aliases<0x79, f32x_info, i64x_info, X86cvts2usi,
+defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, f32x_info, i64x_info, X86cvts2usi,
X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{q}">,
XS, VEX_W, EVEX_CD8<32, CD8VT1>;
defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info, X86cvts2si,
@@ -7666,10 +7192,10 @@ defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info, X86cvts2si,
defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info, X86cvts2si,
X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{q}">,
XD, VEX_W, EVEX_CD8<64, CD8VT1>;
-defm VCVTSD2USIZ: avx512_cvt_s_int_round_aliases<0x79, f64x_info, i32x_info, X86cvts2usi,
+defm VCVTSD2USIZ: avx512_cvt_s_int_round<0x79, f64x_info, i32x_info, X86cvts2usi,
X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{l}">,
XD, EVEX_CD8<64, CD8VT1>;
-defm VCVTSD2USI64Z: avx512_cvt_s_int_round_aliases<0x79, f64x_info, i64x_info, X86cvts2usi,
+defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info, X86cvts2usi,
X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{q}">,
XD, VEX_W, EVEX_CD8<64, CD8VT1>;
@@ -7760,19 +7286,18 @@ def : Pat<(v2f64 (X86Movsd
// Convert float/double to signed/unsigned int 32/64 with truncation
multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
X86VectorVTInfo _DstRC, SDNode OpNode,
- SDNode OpNodeInt, SDNode OpNodeRnd,
- X86FoldableSchedWrite sched, string aliasStr,
- bit CodeGenOnly = 1>{
+ SDNode OpNodeInt, SDNode OpNodeSAE,
+ X86FoldableSchedWrite sched, string aliasStr>{
let Predicates = [HasAVX512] in {
let isCodeGenOnly = 1 in {
def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
[(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>,
- EVEX, Sched<[sched]>;
+ EVEX, VEX_LIG, Sched<[sched]>;
def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src),
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
[(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>,
- EVEX, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
@@ -7781,63 +7306,49 @@ let Predicates = [HasAVX512] in {
EVEX, VEX_LIG, Sched<[sched]>;
def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
!strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
- [(set _DstRC.RC:$dst, (OpNodeRnd (_SrcRC.VT _SrcRC.RC:$src),
- (i32 FROUND_NO_EXC)))]>,
- EVEX,VEX_LIG , EVEX_B, Sched<[sched]>;
- let isCodeGenOnly = CodeGenOnly, ForceDisassemble = CodeGenOnly in
+ [(set _DstRC.RC:$dst, (OpNodeSAE (_SrcRC.VT _SrcRC.RC:$src)))]>,
+ EVEX, VEX_LIG, EVEX_B, Sched<[sched]>;
def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
(ins _SrcRC.IntScalarMemOp:$src),
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
[(set _DstRC.RC:$dst,
(OpNodeInt (_SrcRC.VT _SrcRC.ScalarIntMemCPat:$src)))]>,
EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
+} //HasAVX512
def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
(!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
def : InstAlias<asm # aliasStr # "\t{{sae}, $src, $dst|$dst, $src, {sae}}",
(!cast<Instruction>(NAME # "rrb_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
-} //HasAVX512
-}
-
-multiclass avx512_cvt_s_all_unsigned<bits<8> opc, string asm,
- X86VectorVTInfo _SrcRC,
- X86VectorVTInfo _DstRC, SDNode OpNode,
- SDNode OpNodeInt, SDNode OpNodeRnd,
- X86FoldableSchedWrite sched,
- string aliasStr> :
- avx512_cvt_s_all<opc, asm, _SrcRC, _DstRC, OpNode, OpNodeInt, OpNodeRnd, sched,
- aliasStr, 0> {
-let Predicates = [HasAVX512] in {
def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
(!cast<Instruction>(NAME # "rm_Int") _DstRC.RC:$dst,
_SrcRC.IntScalarMemOp:$src), 0, "att">;
}
-}
defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info,
- fp_to_sint, X86cvtts2Int, X86cvtts2IntRnd, WriteCvtSS2I,
+ fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
"{l}">, XS, EVEX_CD8<32, CD8VT1>;
defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info,
- fp_to_sint, X86cvtts2Int, X86cvtts2IntRnd, WriteCvtSS2I,
+ fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
"{q}">, VEX_W, XS, EVEX_CD8<32, CD8VT1>;
defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info,
- fp_to_sint, X86cvtts2Int, X86cvtts2IntRnd, WriteCvtSD2I,
+ fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
"{l}">, XD, EVEX_CD8<64, CD8VT1>;
defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info,
- fp_to_sint, X86cvtts2Int, X86cvtts2IntRnd, WriteCvtSD2I,
+ fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
"{q}">, VEX_W, XD, EVEX_CD8<64, CD8VT1>;
-defm VCVTTSS2USIZ: avx512_cvt_s_all_unsigned<0x78, "vcvttss2usi", f32x_info, i32x_info,
- fp_to_uint, X86cvtts2UInt, X86cvtts2UIntRnd, WriteCvtSS2I,
+defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i32x_info,
+ fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
"{l}">, XS, EVEX_CD8<32, CD8VT1>;
-defm VCVTTSS2USI64Z: avx512_cvt_s_all_unsigned<0x78, "vcvttss2usi", f32x_info, i64x_info,
- fp_to_uint, X86cvtts2UInt, X86cvtts2UIntRnd, WriteCvtSS2I,
+defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i64x_info,
+ fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
"{q}">, XS,VEX_W, EVEX_CD8<32, CD8VT1>;
-defm VCVTTSD2USIZ: avx512_cvt_s_all_unsigned<0x78, "vcvttsd2usi", f64x_info, i32x_info,
- fp_to_uint, X86cvtts2UInt, X86cvtts2UIntRnd, WriteCvtSD2I,
+defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i32x_info,
+ fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
"{l}">, XD, EVEX_CD8<64, CD8VT1>;
-defm VCVTTSD2USI64Z: avx512_cvt_s_all_unsigned<0x78, "vcvttsd2usi", f64x_info, i64x_info,
- fp_to_uint, X86cvtts2UInt, X86cvtts2UIntRnd, WriteCvtSD2I,
+defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i64x_info,
+ fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
"{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
//===----------------------------------------------------------------------===//
@@ -7851,15 +7362,13 @@ multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _
(ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode (_.VT _.RC:$src1),
- (_Src.VT _Src.RC:$src2),
- (i32 FROUND_CURRENT)))>,
+ (_Src.VT _Src.RC:$src2)))>,
EVEX_4V, VEX_LIG, Sched<[sched]>;
defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode (_.VT _.RC:$src1),
- (_Src.VT _Src.ScalarIntMemCPat:$src2),
- (i32 FROUND_CURRENT)))>,
+ (_Src.VT _Src.ScalarIntMemCPat:$src2)))>,
EVEX_4V, VEX_LIG,
Sched<[sched.Folded, sched.ReadAfterFold]>;
@@ -7878,14 +7387,13 @@ multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _
// Scalar Coversion with SAE - suppress all exceptions
multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
- X86VectorVTInfo _Src, SDNode OpNodeRnd,
+ X86VectorVTInfo _Src, SDNode OpNodeSAE,
X86FoldableSchedWrite sched> {
defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
"{sae}, $src2, $src1", "$src1, $src2, {sae}",
- (_.VT (OpNodeRnd (_.VT _.RC:$src1),
- (_Src.VT _Src.RC:$src2),
- (i32 FROUND_NO_EXC)))>,
+ (_.VT (OpNodeSAE (_.VT _.RC:$src1),
+ (_Src.VT _Src.RC:$src2)))>,
EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
}
@@ -7897,34 +7405,36 @@ multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInf
(ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
"$rc, $src2, $src1", "$src1, $src2, $rc",
(_.VT (OpNodeRnd (_.VT _.RC:$src1),
- (_Src.VT _Src.RC:$src2), (i32 imm:$rc)))>,
+ (_Src.VT _Src.RC:$src2), (i32 timm:$rc)))>,
EVEX_4V, VEX_LIG, Sched<[sched]>,
EVEX_B, EVEX_RC;
}
multiclass avx512_cvt_fp_scalar_sd2ss<bits<8> opc, string OpcodeStr,
- SDNode OpNodeRnd, X86FoldableSchedWrite sched,
- X86VectorVTInfo _src, X86VectorVTInfo _dst> {
+ SDNode OpNode, SDNode OpNodeRnd,
+ X86FoldableSchedWrite sched,
+ X86VectorVTInfo _src, X86VectorVTInfo _dst> {
let Predicates = [HasAVX512] in {
- defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd, sched>,
+ defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src,
OpNodeRnd, sched>, VEX_W, EVEX_CD8<64, CD8VT1>, XD;
}
}
-multiclass avx512_cvt_fp_scalar_ss2sd<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
+multiclass avx512_cvt_fp_scalar_ss2sd<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, SDNode OpNodeSAE,
X86FoldableSchedWrite sched,
X86VectorVTInfo _src, X86VectorVTInfo _dst> {
let Predicates = [HasAVX512] in {
- defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd, sched>,
- avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd, sched>,
+ defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
+ avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeSAE, sched>,
EVEX_CD8<32, CD8VT1>, XS;
}
}
-defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss",
- X86froundRnd, WriteCvtSD2SS, f64x_info,
+defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss", X86frounds,
+ X86froundsRnd, WriteCvtSD2SS, f64x_info,
f32x_info>;
-defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd",
- X86fpextRnd, WriteCvtSS2SD, f32x_info,
+defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd", X86fpexts,
+ X86fpextsSAE, WriteCvtSS2SD, f32x_info,
f64x_info>;
def : Pat<(f64 (fpextend FR32X:$src)),
@@ -7934,14 +7444,6 @@ def : Pat<(f64 (fpextend (loadf32 addr:$src))),
(VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
Requires<[HasAVX512, OptForSize]>;
-def : Pat<(f64 (extloadf32 addr:$src)),
- (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
- Requires<[HasAVX512, OptForSize]>;
-
-def : Pat<(f64 (extloadf32 addr:$src)),
- (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), (VMOVSSZrm addr:$src))>,
- Requires<[HasAVX512, OptForSpeed]>;
-
def : Pat<(f32 (fpround FR64X:$src)),
(VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>,
Requires<[HasAVX512]>;
@@ -7970,7 +7472,8 @@ multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
X86FoldableSchedWrite sched,
string Broadcast = _.BroadcastStr,
string Alias = "", X86MemOperand MemOp = _Src.MemOp,
- RegisterClass MaskRC = _.KRCWM> {
+ RegisterClass MaskRC = _.KRCWM,
+ dag LdDAG = (_.VT (OpNode (_Src.VT (_Src.LdFrag addr:$src))))> {
defm rr : AVX512_maskable_common<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _Src.RC:$src),
@@ -7989,12 +7492,8 @@ multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
(ins _.RC:$src0, MaskRC:$mask, MemOp:$src),
(ins MaskRC:$mask, MemOp:$src),
OpcodeStr#Alias, "$src", "$src",
- (_.VT (OpNode (_Src.VT
- (_Src.LdFrag addr:$src)))),
- (vselect MaskRC:$mask,
- (_.VT (OpNode (_Src.VT
- (_Src.LdFrag addr:$src)))),
- _.RC:$src0),
+ LdDAG,
+ (vselect MaskRC:$mask, LdDAG, _.RC:$src0),
vselect, "$src0 = $dst">,
EVEX, Sched<[sched.Folded]>;
@@ -8019,13 +7518,12 @@ multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
}
// Coversion with SAE - suppress all exceptions
multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
- X86VectorVTInfo _Src, SDNode OpNodeRnd,
+ X86VectorVTInfo _Src, SDNode OpNodeSAE,
X86FoldableSchedWrite sched> {
defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _Src.RC:$src), OpcodeStr,
"{sae}, $src", "$src, {sae}",
- (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src),
- (i32 FROUND_NO_EXC)))>,
+ (_.VT (OpNodeSAE (_Src.VT _Src.RC:$src)))>,
EVEX, EVEX_B, Sched<[sched]>;
}
@@ -8036,23 +7534,34 @@ multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr,
"$rc, $src", "$src, $rc",
- (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 imm:$rc)))>,
+ (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 timm:$rc)))>,
EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
}
+// Similar to avx512_vcvt_fp, but uses an extload for the memory form.
+multiclass avx512_vcvt_fpextend<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
+ X86VectorVTInfo _Src, SDNode OpNode,
+ X86FoldableSchedWrite sched,
+ string Broadcast = _.BroadcastStr,
+ string Alias = "", X86MemOperand MemOp = _Src.MemOp,
+ RegisterClass MaskRC = _.KRCWM>
+ : avx512_vcvt_fp<opc, OpcodeStr, _, _Src, OpNode, sched, Broadcast, Alias,
+ MemOp, MaskRC,
+ (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src))>;
+
// Extend Float to Double
multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr,
X86SchedWriteWidths sched> {
let Predicates = [HasAVX512] in {
- defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8f32x_info,
+ defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, v8f64_info, v8f32x_info,
fpextend, sched.ZMM>,
avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f32x_info,
- X86vfpextRnd, sched.ZMM>, EVEX_V512;
+ X86vfpextSAE, sched.ZMM>, EVEX_V512;
}
let Predicates = [HasVLX] in {
- defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4f32x_info,
+ defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, v2f64x_info, v4f32x_info,
X86vfpext, sched.XMM, "{1to2}", "", f64mem>, EVEX_V128;
- defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4f32x_info, fpextend,
+ defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v4f32x_info, fpextend,
sched.YMM>, EVEX_V256;
}
}
@@ -8060,7 +7569,7 @@ multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr,
// Truncate Double to Float
multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> {
let Predicates = [HasAVX512] in {
- defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info, fpround, sched.ZMM>,
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info, X86vfpround, sched.ZMM>,
avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8f64_info,
X86vfproundRnd, sched.ZMM>, EVEX_V512;
}
@@ -8068,18 +7577,49 @@ multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sc
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2f64x_info,
null_frag, sched.XMM, "{1to2}", "{x}", f128mem, VK2WM>,
EVEX_V128;
- defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info, fpround,
+ defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info, X86vfpround,
sched.YMM, "{1to4}", "{y}">, EVEX_V256;
-
- def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
- (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
- def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
- (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, f128mem:$src), 0, "intel">;
- def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
- (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
- def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
- (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, f256mem:$src), 0, "intel">;
}
+
+ def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
+ (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
+ def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
+ (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
+ VK2WM:$mask, VR128X:$src), 0, "att">;
+ def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|"
+ "$dst {${mask}} {z}, $src}",
+ (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
+ VK2WM:$mask, VR128X:$src), 0, "att">;
+ def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
+ (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, f64mem:$src), 0, "att">;
+ def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|"
+ "$dst {${mask}}, ${src}{1to2}}",
+ (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
+ VK2WM:$mask, f64mem:$src), 0, "att">;
+ def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|"
+ "$dst {${mask}} {z}, ${src}{1to2}}",
+ (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
+ VK2WM:$mask, f64mem:$src), 0, "att">;
+
+ def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
+ (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
+ def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
+ (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
+ VK4WM:$mask, VR256X:$src), 0, "att">;
+ def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|"
+ "$dst {${mask}} {z}, $src}",
+ (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
+ VK4WM:$mask, VR256X:$src), 0, "att">;
+ def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
+ (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, f64mem:$src), 0, "att">;
+ def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|"
+ "$dst {${mask}}, ${src}{1to4}}",
+ (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
+ VK4WM:$mask, f64mem:$src), 0, "att">;
+ def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|"
+ "$dst {${mask}} {z}, ${src}{1to4}}",
+ (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
+ VK4WM:$mask, f64mem:$src), 0, "att">;
}
defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps", SchedWriteCvtPD2PS>,
@@ -8087,20 +7627,66 @@ defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps", SchedWriteCvtPD2PS>,
defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd", SchedWriteCvtPS2PD>,
PS, EVEX_CD8<32, CD8VH>;
-def : Pat<(v8f64 (extloadv8f32 addr:$src)),
- (VCVTPS2PDZrm addr:$src)>;
+let Predicates = [HasAVX512] in {
+ def : Pat<(v8f32 (fpround (v8f64 VR512:$src))),
+ (VCVTPD2PSZrr VR512:$src)>;
+ def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (v8f64 VR512:$src))),
+ VR256X:$src0),
+ (VCVTPD2PSZrrk VR256X:$src0, VK8WM:$mask, VR512:$src)>;
+ def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (v8f64 VR512:$src))),
+ v8f32x_info.ImmAllZerosV),
+ (VCVTPD2PSZrrkz VK8WM:$mask, VR512:$src)>;
+
+ def : Pat<(v8f32 (fpround (loadv8f64 addr:$src))),
+ (VCVTPD2PSZrm addr:$src)>;
+ def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (loadv8f64 addr:$src))),
+ VR256X:$src0),
+ (VCVTPD2PSZrmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
+ def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (loadv8f64 addr:$src))),
+ v8f32x_info.ImmAllZerosV),
+ (VCVTPD2PSZrmkz VK8WM:$mask, addr:$src)>;
+
+ def : Pat<(v8f32 (fpround (v8f64 (X86VBroadcast (loadf64 addr:$src))))),
+ (VCVTPD2PSZrmb addr:$src)>;
+ def : Pat<(vselect VK8WM:$mask,
+ (fpround (v8f64 (X86VBroadcast (loadf64 addr:$src)))),
+ (v8f32 VR256X:$src0)),
+ (VCVTPD2PSZrmbk VR256X:$src0, VK8WM:$mask, addr:$src)>;
+ def : Pat<(vselect VK8WM:$mask,
+ (fpround (v8f64 (X86VBroadcast (loadf64 addr:$src)))),
+ v8f32x_info.ImmAllZerosV),
+ (VCVTPD2PSZrmbkz VK8WM:$mask, addr:$src)>;
+}
let Predicates = [HasVLX] in {
- def : Pat<(X86vzmovl (v2f64 (bitconvert
- (v4f32 (X86vfpround (v2f64 VR128X:$src)))))),
- (VCVTPD2PSZ128rr VR128X:$src)>;
- def : Pat<(X86vzmovl (v2f64 (bitconvert
- (v4f32 (X86vfpround (loadv2f64 addr:$src)))))),
- (VCVTPD2PSZ128rm addr:$src)>;
- def : Pat<(v2f64 (extloadv2f32 addr:$src)),
- (VCVTPS2PDZ128rm addr:$src)>;
- def : Pat<(v4f64 (extloadv4f32 addr:$src)),
- (VCVTPS2PDZ256rm addr:$src)>;
+ def : Pat<(v4f32 (fpround (v4f64 VR256X:$src))),
+ (VCVTPD2PSZ256rr VR256X:$src)>;
+ def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (v4f64 VR256X:$src))),
+ VR128X:$src0),
+ (VCVTPD2PSZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
+ def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (v4f64 VR256X:$src))),
+ v4f32x_info.ImmAllZerosV),
+ (VCVTPD2PSZ256rrkz VK4WM:$mask, VR256X:$src)>;
+
+ def : Pat<(v4f32 (fpround (loadv4f64 addr:$src))),
+ (VCVTPD2PSZ256rm addr:$src)>;
+ def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (loadv4f64 addr:$src))),
+ VR128X:$src0),
+ (VCVTPD2PSZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
+ def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (loadv4f64 addr:$src))),
+ v4f32x_info.ImmAllZerosV),
+ (VCVTPD2PSZ256rmkz VK4WM:$mask, addr:$src)>;
+
+ def : Pat<(v4f32 (fpround (v4f64 (X86VBroadcast (loadf64 addr:$src))))),
+ (VCVTPD2PSZ256rmb addr:$src)>;
+ def : Pat<(vselect VK4WM:$mask,
+ (v4f32 (fpround (v4f64 (X86VBroadcast (loadf64 addr:$src))))),
+ VR128X:$src0),
+ (VCVTPD2PSZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
+ def : Pat<(vselect VK4WM:$mask,
+ (v4f32 (fpround (v4f64 (X86VBroadcast (loadf64 addr:$src))))),
+ v4f32x_info.ImmAllZerosV),
+ (VCVTPD2PSZ256rmbkz VK4WM:$mask, addr:$src)>;
// Special patterns to allow use of X86vmfpround for masking. Instruction
// patterns have been disabled with null_frag.
@@ -8142,7 +7728,11 @@ multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
let Predicates = [HasVLX] in {
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info,
- OpNode128, sched.XMM, "{1to2}", "", i64mem>, EVEX_V128;
+ OpNode128, sched.XMM, "{1to2}", "", i64mem, VK2WM,
+ (v2f64 (OpNode128 (bc_v4i32
+ (v2i64
+ (scalar_to_vector (loadi64 addr:$src))))))>,
+ EVEX_V128;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode,
sched.YMM>, EVEX_V256;
}
@@ -8167,12 +7757,12 @@ multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
// Convert Float to Signed/Unsigned Doubleword with truncation
multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
- SDNode OpNodeRnd, X86SchedWriteWidths sched> {
+ SDNode OpNodeSAE, X86SchedWriteWidths sched> {
let Predicates = [HasAVX512] in {
defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
sched.ZMM>,
avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info,
- OpNodeRnd, sched.ZMM>, EVEX_V512;
+ OpNodeSAE, sched.ZMM>, EVEX_V512;
}
let Predicates = [HasVLX] in {
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
@@ -8201,12 +7791,12 @@ multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
// Convert Double to Signed/Unsigned Doubleword with truncation
multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
- SDNode OpNodeRnd, X86SchedWriteWidths sched> {
+ SDNode OpNodeSAE, X86SchedWriteWidths sched> {
let Predicates = [HasAVX512] in {
defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
sched.ZMM>,
avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
- OpNodeRnd, sched.ZMM>, EVEX_V512;
+ OpNodeSAE, sched.ZMM>, EVEX_V512;
}
let Predicates = [HasVLX] in {
// we need "x"/"y" suffixes in order to distinguish between 128 and 256
@@ -8218,16 +7808,49 @@ multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
VK2WM>, EVEX_V128;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
sched.YMM, "{1to4}", "{y}">, EVEX_V256;
-
- def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
- (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
- def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
- (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, i128mem:$src), 0, "intel">;
- def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
- (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
- def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
- (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, i256mem:$src), 0, "intel">;
}
+
+ def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
+ (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
+ VR128X:$src), 0, "att">;
+ def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
+ (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
+ VK2WM:$mask, VR128X:$src), 0, "att">;
+ def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
+ (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
+ VK2WM:$mask, VR128X:$src), 0, "att">;
+ def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
+ (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
+ f64mem:$src), 0, "att">;
+ def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|"
+ "$dst {${mask}}, ${src}{1to2}}",
+ (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
+ VK2WM:$mask, f64mem:$src), 0, "att">;
+ def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|"
+ "$dst {${mask}} {z}, ${src}{1to2}}",
+ (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
+ VK2WM:$mask, f64mem:$src), 0, "att">;
+
+ def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
+ (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
+ VR256X:$src), 0, "att">;
+ def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
+ (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
+ VK4WM:$mask, VR256X:$src), 0, "att">;
+ def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
+ (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
+ VK4WM:$mask, VR256X:$src), 0, "att">;
+ def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
+ (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
+ f64mem:$src), 0, "att">;
+ def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|"
+ "$dst {${mask}}, ${src}{1to4}}",
+ (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
+ VK4WM:$mask, f64mem:$src), 0, "att">;
+ def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|"
+ "$dst {${mask}} {z}, ${src}{1to4}}",
+ (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
+ VK4WM:$mask, f64mem:$src), 0, "att">;
}
// Convert Double to Signed/Unsigned Doubleword
@@ -8249,16 +7872,47 @@ multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
VK2WM>, EVEX_V128;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
sched.YMM, "{1to4}", "{y}">, EVEX_V256;
-
- def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
- (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
- def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
- (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, f128mem:$src), 0, "intel">;
- def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
- (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
- def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
- (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, f256mem:$src), 0, "intel">;
}
+
+ def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
+ (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
+ def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
+ (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
+ VK2WM:$mask, VR128X:$src), 0, "att">;
+ def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
+ (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
+ VK2WM:$mask, VR128X:$src), 0, "att">;
+ def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
+ (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
+ f64mem:$src), 0, "att">;
+ def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|"
+ "$dst {${mask}}, ${src}{1to2}}",
+ (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
+ VK2WM:$mask, f64mem:$src), 0, "att">;
+ def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|"
+ "$dst {${mask}} {z}, ${src}{1to2}}",
+ (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
+ VK2WM:$mask, f64mem:$src), 0, "att">;
+
+ def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
+ (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
+ def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
+ (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
+ VK4WM:$mask, VR256X:$src), 0, "att">;
+ def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
+ (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
+ VK4WM:$mask, VR256X:$src), 0, "att">;
+ def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
+ (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
+ f64mem:$src), 0, "att">;
+ def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|"
+ "$dst {${mask}}, ${src}{1to4}}",
+ (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
+ VK4WM:$mask, f64mem:$src), 0, "att">;
+ def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|"
+ "$dst {${mask}} {z}, ${src}{1to4}}",
+ (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
+ VK4WM:$mask, f64mem:$src), 0, "att">;
}
// Convert Double to Signed/Unsigned Quardword
@@ -8325,7 +7979,11 @@ multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
// Explicitly specified broadcast string, since we take only 2 elements
// from v4f32x_info source
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
- sched.XMM, "{1to2}", "", f64mem>, EVEX_V128;
+ sched.XMM, "{1to2}", "", f64mem, VK2WM,
+ (v2i64 (OpNode (bc_v4f32
+ (v2f64
+ (scalar_to_vector (loadf64 addr:$src))))))>,
+ EVEX_V128;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
sched.YMM>, EVEX_V256;
}
@@ -8343,7 +8001,11 @@ multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
// Explicitly specified broadcast string, since we take only 2 elements
// from v4f32x_info source
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
- sched.XMM, "{1to2}", "", f64mem>, EVEX_V128;
+ sched.XMM, "{1to2}", "", f64mem, VK2WM,
+ (v2i64 (OpNode (bc_v4f32
+ (v2f64
+ (scalar_to_vector (loadf64 addr:$src))))))>,
+ EVEX_V128;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
sched.YMM>, EVEX_V256;
}
@@ -8351,8 +8013,7 @@ multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
// Convert Signed/Unsigned Quardword to Float
multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
- SDNode OpNode128, SDNode OpNodeRnd,
- X86SchedWriteWidths sched> {
+ SDNode OpNodeRnd, X86SchedWriteWidths sched> {
let Predicates = [HasDQI] in {
defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i64_info, OpNode,
sched.ZMM>,
@@ -8364,22 +8025,57 @@ multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
// memory forms of these instructions in Asm Parcer. They have the same
// dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
// due to the same reason.
- defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2i64x_info, OpNode128,
- sched.XMM, "{1to2}", "{x}">, EVEX_V128,
- NotEVEX2VEXConvertible;
+ defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2i64x_info, null_frag,
+ sched.XMM, "{1to2}", "{x}", i128mem, VK2WM>,
+ EVEX_V128, NotEVEX2VEXConvertible;
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i64x_info, OpNode,
sched.YMM, "{1to4}", "{y}">, EVEX_V256,
NotEVEX2VEXConvertible;
-
- def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
- (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
- def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
- (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, i128mem:$src), 0, "intel">;
- def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
- (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
- def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
- (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, i256mem:$src), 0, "intel">;
}
+
+ def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
+ (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
+ VR128X:$src), 0, "att">;
+ def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
+ (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
+ VK2WM:$mask, VR128X:$src), 0, "att">;
+ def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
+ (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
+ VK2WM:$mask, VR128X:$src), 0, "att">;
+ def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
+ (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
+ i64mem:$src), 0, "att">;
+ def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|"
+ "$dst {${mask}}, ${src}{1to2}}",
+ (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
+ VK2WM:$mask, i64mem:$src), 0, "att">;
+ def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|"
+ "$dst {${mask}} {z}, ${src}{1to2}}",
+ (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
+ VK2WM:$mask, i64mem:$src), 0, "att">;
+
+ def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
+ (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
+ VR256X:$src), 0, "att">;
+ def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|"
+ "$dst {${mask}}, $src}",
+ (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
+ VK4WM:$mask, VR256X:$src), 0, "att">;
+ def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|"
+ "$dst {${mask}} {z}, $src}",
+ (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
+ VK4WM:$mask, VR256X:$src), 0, "att">;
+ def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
+ (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
+ i64mem:$src), 0, "att">;
+ def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|"
+ "$dst {${mask}}, ${src}{1to4}}",
+ (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
+ VK4WM:$mask, i64mem:$src), 0, "att">;
+ def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|"
+ "$dst {${mask}} {z}, ${src}{1to4}}",
+ (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
+ VK4WM:$mask, i64mem:$src), 0, "att">;
}
defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", sint_to_fp, X86VSintToFP,
@@ -8390,19 +8086,19 @@ defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", sint_to_fp,
PS, EVEX_CD8<32, CD8VF>;
defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86cvttp2si,
- X86cvttp2siRnd, SchedWriteCvtPS2DQ>,
+ X86cvttp2siSAE, SchedWriteCvtPS2DQ>,
XS, EVEX_CD8<32, CD8VF>;
defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86cvttp2si,
- X86cvttp2siRnd, SchedWriteCvtPD2DQ>,
+ X86cvttp2siSAE, SchedWriteCvtPD2DQ>,
PD, VEX_W, EVEX_CD8<64, CD8VF>;
defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86cvttp2ui,
- X86cvttp2uiRnd, SchedWriteCvtPS2DQ>, PS,
+ X86cvttp2uiSAE, SchedWriteCvtPS2DQ>, PS,
EVEX_CD8<32, CD8VF>;
defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86cvttp2ui,
- X86cvttp2uiRnd, SchedWriteCvtPD2DQ>,
+ X86cvttp2uiSAE, SchedWriteCvtPD2DQ>,
PS, VEX_W, EVEX_CD8<64, CD8VF>;
defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", uint_to_fp,
@@ -8446,19 +8142,19 @@ defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt,
EVEX_CD8<32, CD8VH>;
defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86cvttp2si,
- X86cvttp2siRnd, SchedWriteCvtPD2DQ>, VEX_W,
+ X86cvttp2siSAE, SchedWriteCvtPD2DQ>, VEX_W,
PD, EVEX_CD8<64, CD8VF>;
defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86cvttp2si,
- X86cvttp2siRnd, SchedWriteCvtPS2DQ>, PD,
+ X86cvttp2siSAE, SchedWriteCvtPS2DQ>, PD,
EVEX_CD8<32, CD8VH>;
defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86cvttp2ui,
- X86cvttp2uiRnd, SchedWriteCvtPD2DQ>, VEX_W,
+ X86cvttp2uiSAE, SchedWriteCvtPD2DQ>, VEX_W,
PD, EVEX_CD8<64, CD8VF>;
defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86cvttp2ui,
- X86cvttp2uiRnd, SchedWriteCvtPS2DQ>, PD,
+ X86cvttp2uiSAE, SchedWriteCvtPS2DQ>, PD,
EVEX_CD8<32, CD8VH>;
defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", sint_to_fp,
@@ -8469,67 +8165,15 @@ defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", uint_to_fp,
X86VUintToFpRnd, SchedWriteCvtDQ2PD>, VEX_W, XS,
EVEX_CD8<64, CD8VF>;
-defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", sint_to_fp, X86VSintToFP,
+defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", sint_to_fp,
X86VSintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, PS,
EVEX_CD8<64, CD8VF>;
-defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", uint_to_fp, X86VUintToFP,
+defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", uint_to_fp,
X86VUintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, XD,
EVEX_CD8<64, CD8VF>;
-let Predicates = [HasAVX512] in {
- def : Pat<(v16i32 (fp_to_sint (v16f32 VR512:$src))),
- (VCVTTPS2DQZrr VR512:$src)>;
- def : Pat<(v16i32 (fp_to_sint (loadv16f32 addr:$src))),
- (VCVTTPS2DQZrm addr:$src)>;
-
- def : Pat<(v16i32 (fp_to_uint (v16f32 VR512:$src))),
- (VCVTTPS2UDQZrr VR512:$src)>;
- def : Pat<(v16i32 (fp_to_uint (loadv16f32 addr:$src))),
- (VCVTTPS2UDQZrm addr:$src)>;
-
- def : Pat<(v8i32 (fp_to_sint (v8f64 VR512:$src))),
- (VCVTTPD2DQZrr VR512:$src)>;
- def : Pat<(v8i32 (fp_to_sint (loadv8f64 addr:$src))),
- (VCVTTPD2DQZrm addr:$src)>;
-
- def : Pat<(v8i32 (fp_to_uint (v8f64 VR512:$src))),
- (VCVTTPD2UDQZrr VR512:$src)>;
- def : Pat<(v8i32 (fp_to_uint (loadv8f64 addr:$src))),
- (VCVTTPD2UDQZrm addr:$src)>;
-}
-
let Predicates = [HasVLX] in {
- def : Pat<(v4i32 (fp_to_sint (v4f32 VR128X:$src))),
- (VCVTTPS2DQZ128rr VR128X:$src)>;
- def : Pat<(v4i32 (fp_to_sint (loadv4f32 addr:$src))),
- (VCVTTPS2DQZ128rm addr:$src)>;
-
- def : Pat<(v4i32 (fp_to_uint (v4f32 VR128X:$src))),
- (VCVTTPS2UDQZ128rr VR128X:$src)>;
- def : Pat<(v4i32 (fp_to_uint (loadv4f32 addr:$src))),
- (VCVTTPS2UDQZ128rm addr:$src)>;
-
- def : Pat<(v8i32 (fp_to_sint (v8f32 VR256X:$src))),
- (VCVTTPS2DQZ256rr VR256X:$src)>;
- def : Pat<(v8i32 (fp_to_sint (loadv8f32 addr:$src))),
- (VCVTTPS2DQZ256rm addr:$src)>;
-
- def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src))),
- (VCVTTPS2UDQZ256rr VR256X:$src)>;
- def : Pat<(v8i32 (fp_to_uint (loadv8f32 addr:$src))),
- (VCVTTPS2UDQZ256rm addr:$src)>;
-
- def : Pat<(v4i32 (fp_to_sint (v4f64 VR256X:$src))),
- (VCVTTPD2DQZ256rr VR256X:$src)>;
- def : Pat<(v4i32 (fp_to_sint (loadv4f64 addr:$src))),
- (VCVTTPD2DQZ256rm addr:$src)>;
-
- def : Pat<(v4i32 (fp_to_uint (v4f64 VR256X:$src))),
- (VCVTTPD2UDQZ256rr VR256X:$src)>;
- def : Pat<(v4i32 (fp_to_uint (loadv4f64 addr:$src))),
- (VCVTTPD2UDQZ256rm addr:$src)>;
-
// Special patterns to allow use of X86mcvtp2Int for masking. Instruction
// patterns have been disabled with null_frag.
def : Pat<(v4i32 (X86cvtp2Int (v2f64 VR128X:$src))),
@@ -8647,72 +8291,64 @@ let Predicates = [HasVLX] in {
(VCVTTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
}
-let Predicates = [HasDQI] in {
- def : Pat<(v8i64 (fp_to_sint (v8f32 VR256X:$src))),
- (VCVTTPS2QQZrr VR256X:$src)>;
- def : Pat<(v8i64 (fp_to_sint (loadv8f32 addr:$src))),
- (VCVTTPS2QQZrm addr:$src)>;
-
- def : Pat<(v8i64 (fp_to_uint (v8f32 VR256X:$src))),
- (VCVTTPS2UQQZrr VR256X:$src)>;
- def : Pat<(v8i64 (fp_to_uint (loadv8f32 addr:$src))),
- (VCVTTPS2UQQZrm addr:$src)>;
-
- def : Pat<(v8i64 (fp_to_sint (v8f64 VR512:$src))),
- (VCVTTPD2QQZrr VR512:$src)>;
- def : Pat<(v8i64 (fp_to_sint (loadv8f64 addr:$src))),
- (VCVTTPD2QQZrm addr:$src)>;
-
- def : Pat<(v8i64 (fp_to_uint (v8f64 VR512:$src))),
- (VCVTTPD2UQQZrr VR512:$src)>;
- def : Pat<(v8i64 (fp_to_uint (loadv8f64 addr:$src))),
- (VCVTTPD2UQQZrm addr:$src)>;
-}
-
let Predicates = [HasDQI, HasVLX] in {
- def : Pat<(v4i64 (fp_to_sint (v4f32 VR128X:$src))),
- (VCVTTPS2QQZ256rr VR128X:$src)>;
- def : Pat<(v4i64 (fp_to_sint (loadv4f32 addr:$src))),
- (VCVTTPS2QQZ256rm addr:$src)>;
-
- def : Pat<(v4i64 (fp_to_uint (v4f32 VR128X:$src))),
- (VCVTTPS2UQQZ256rr VR128X:$src)>;
- def : Pat<(v4i64 (fp_to_uint (loadv4f32 addr:$src))),
- (VCVTTPS2UQQZ256rm addr:$src)>;
-
- def : Pat<(v2i64 (fp_to_sint (v2f64 VR128X:$src))),
- (VCVTTPD2QQZ128rr VR128X:$src)>;
- def : Pat<(v2i64 (fp_to_sint (loadv2f64 addr:$src))),
- (VCVTTPD2QQZ128rm addr:$src)>;
-
- def : Pat<(v2i64 (fp_to_uint (v2f64 VR128X:$src))),
- (VCVTTPD2UQQZ128rr VR128X:$src)>;
- def : Pat<(v2i64 (fp_to_uint (loadv2f64 addr:$src))),
- (VCVTTPD2UQQZ128rm addr:$src)>;
-
- def : Pat<(v4i64 (fp_to_sint (v4f64 VR256X:$src))),
- (VCVTTPD2QQZ256rr VR256X:$src)>;
- def : Pat<(v4i64 (fp_to_sint (loadv4f64 addr:$src))),
- (VCVTTPD2QQZ256rm addr:$src)>;
-
- def : Pat<(v4i64 (fp_to_uint (v4f64 VR256X:$src))),
- (VCVTTPD2UQQZ256rr VR256X:$src)>;
- def : Pat<(v4i64 (fp_to_uint (loadv4f64 addr:$src))),
- (VCVTTPD2UQQZ256rm addr:$src)>;
+ def : Pat<(v2i64 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
+ (VCVTPS2QQZ128rm addr:$src)>;
+ def : Pat<(v2i64 (vselect VK2WM:$mask,
+ (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
+ VR128X:$src0)),
+ (VCVTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+ def : Pat<(v2i64 (vselect VK2WM:$mask,
+ (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
+ v2i64x_info.ImmAllZerosV)),
+ (VCVTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
+
+ def : Pat<(v2i64 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
+ (VCVTPS2UQQZ128rm addr:$src)>;
+ def : Pat<(v2i64 (vselect VK2WM:$mask,
+ (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
+ VR128X:$src0)),
+ (VCVTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+ def : Pat<(v2i64 (vselect VK2WM:$mask,
+ (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
+ v2i64x_info.ImmAllZerosV)),
+ (VCVTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
+
+ def : Pat<(v2i64 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
+ (VCVTTPS2QQZ128rm addr:$src)>;
+ def : Pat<(v2i64 (vselect VK2WM:$mask,
+ (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
+ VR128X:$src0)),
+ (VCVTTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+ def : Pat<(v2i64 (vselect VK2WM:$mask,
+ (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
+ v2i64x_info.ImmAllZerosV)),
+ (VCVTTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
+
+ def : Pat<(v2i64 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
+ (VCVTTPS2UQQZ128rm addr:$src)>;
+ def : Pat<(v2i64 (vselect VK2WM:$mask,
+ (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
+ VR128X:$src0)),
+ (VCVTTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+ def : Pat<(v2i64 (vselect VK2WM:$mask,
+ (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
+ v2i64x_info.ImmAllZerosV)),
+ (VCVTTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
}
let Predicates = [HasAVX512, NoVLX] in {
-def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))),
+def : Pat<(v8i32 (X86cvttp2ui (v8f32 VR256X:$src1))),
(EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
(v16f32 (INSERT_SUBREG (IMPLICIT_DEF),
VR256X:$src1, sub_ymm)))), sub_ymm)>;
-def : Pat<(v4i32 (fp_to_uint (v4f32 VR128X:$src1))),
+def : Pat<(v4i32 (X86cvttp2ui (v4f32 VR128X:$src1))),
(EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
(v16f32 (INSERT_SUBREG (IMPLICIT_DEF),
VR128X:$src1, sub_xmm)))), sub_xmm)>;
-def : Pat<(v4i32 (fp_to_uint (v4f64 VR256X:$src1))),
+def : Pat<(v4i32 (X86cvttp2ui (v4f64 VR256X:$src1))),
(EXTRACT_SUBREG (v8i32 (VCVTTPD2UDQZrr
(v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
VR256X:$src1, sub_ymm)))), sub_xmm)>;
@@ -8738,80 +8374,117 @@ def : Pat<(v2f64 (X86VUintToFP (v4i32 VR128X:$src1))),
VR128X:$src1, sub_xmm)))), sub_xmm)>;
}
-let Predicates = [HasAVX512, HasVLX] in {
- def : Pat<(X86vzmovl (v2i64 (bitconvert
- (v4i32 (X86cvtp2Int (v2f64 VR128X:$src)))))),
- (VCVTPD2DQZ128rr VR128X:$src)>;
- def : Pat<(X86vzmovl (v2i64 (bitconvert
- (v4i32 (X86cvtp2Int (loadv2f64 addr:$src)))))),
- (VCVTPD2DQZ128rm addr:$src)>;
- def : Pat<(X86vzmovl (v2i64 (bitconvert
- (v4i32 (X86cvtp2UInt (v2f64 VR128X:$src)))))),
- (VCVTPD2UDQZ128rr VR128X:$src)>;
- def : Pat<(X86vzmovl (v2i64 (bitconvert
- (v4i32 (X86cvttp2si (v2f64 VR128X:$src)))))),
- (VCVTTPD2DQZ128rr VR128X:$src)>;
- def : Pat<(X86vzmovl (v2i64 (bitconvert
- (v4i32 (X86cvttp2si (loadv2f64 addr:$src)))))),
- (VCVTTPD2DQZ128rm addr:$src)>;
- def : Pat<(X86vzmovl (v2i64 (bitconvert
- (v4i32 (X86cvttp2ui (v2f64 VR128X:$src)))))),
- (VCVTTPD2UDQZ128rr VR128X:$src)>;
-
- def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
- (VCVTDQ2PDZ128rm addr:$src)>;
- def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))),
+let Predicates = [HasVLX] in {
+ def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
(VCVTDQ2PDZ128rm addr:$src)>;
-
- def : Pat<(v2f64 (X86VUintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
- (VCVTUDQ2PDZ128rm addr:$src)>;
- def : Pat<(v2f64 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))),
+ def : Pat<(v2f64 (vselect VK2WM:$mask,
+ (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
+ VR128X:$src0)),
+ (VCVTDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+ def : Pat<(v2f64 (vselect VK2WM:$mask,
+ (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
+ v2f64x_info.ImmAllZerosV)),
+ (VCVTDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
+
+ def : Pat<(v2f64 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
(VCVTUDQ2PDZ128rm addr:$src)>;
-}
-
-let Predicates = [HasAVX512] in {
- def : Pat<(v8f32 (fpround (loadv8f64 addr:$src))),
- (VCVTPD2PSZrm addr:$src)>;
- def : Pat<(v8f64 (extloadv8f32 addr:$src)),
- (VCVTPS2PDZrm addr:$src)>;
+ def : Pat<(v2f64 (vselect VK2WM:$mask,
+ (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
+ VR128X:$src0)),
+ (VCVTUDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+ def : Pat<(v2f64 (vselect VK2WM:$mask,
+ (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
+ v2f64x_info.ImmAllZerosV)),
+ (VCVTUDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
}
let Predicates = [HasDQI, HasVLX] in {
- def : Pat<(X86vzmovl (v2f64 (bitconvert
- (v4f32 (X86VSintToFP (v2i64 VR128X:$src)))))),
+ // Special patterns to allow use of X86VMSintToFP for masking. Instruction
+ // patterns have been disabled with null_frag.
+ def : Pat<(v4f32 (X86VSintToFP (v2i64 VR128X:$src))),
(VCVTQQ2PSZ128rr VR128X:$src)>;
- def : Pat<(X86vzmovl (v2f64 (bitconvert
- (v4f32 (X86VUintToFP (v2i64 VR128X:$src)))))),
+ def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), (v4f32 VR128X:$src0),
+ VK2WM:$mask),
+ (VCVTQQ2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
+ def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), v4f32x_info.ImmAllZerosV,
+ VK2WM:$mask),
+ (VCVTQQ2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
+
+ def : Pat<(v4f32 (X86VSintToFP (loadv2i64 addr:$src))),
+ (VCVTQQ2PSZ128rm addr:$src)>;
+ def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), (v4f32 VR128X:$src0),
+ VK2WM:$mask),
+ (VCVTQQ2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+ def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), v4f32x_info.ImmAllZerosV,
+ VK2WM:$mask),
+ (VCVTQQ2PSZ128rmkz VK2WM:$mask, addr:$src)>;
+
+ def : Pat<(v4f32 (X86VSintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))))),
+ (VCVTQQ2PSZ128rmb addr:$src)>;
+ def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))),
+ (v4f32 VR128X:$src0), VK2WM:$mask),
+ (VCVTQQ2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+ def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))),
+ v4f32x_info.ImmAllZerosV, VK2WM:$mask),
+ (VCVTQQ2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
+
+ // Special patterns to allow use of X86VMUintToFP for masking. Instruction
+ // patterns have been disabled with null_frag.
+ def : Pat<(v4f32 (X86VUintToFP (v2i64 VR128X:$src))),
(VCVTUQQ2PSZ128rr VR128X:$src)>;
+ def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), (v4f32 VR128X:$src0),
+ VK2WM:$mask),
+ (VCVTUQQ2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
+ def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), v4f32x_info.ImmAllZerosV,
+ VK2WM:$mask),
+ (VCVTUQQ2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
+
+ def : Pat<(v4f32 (X86VUintToFP (loadv2i64 addr:$src))),
+ (VCVTUQQ2PSZ128rm addr:$src)>;
+ def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), (v4f32 VR128X:$src0),
+ VK2WM:$mask),
+ (VCVTUQQ2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+ def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), v4f32x_info.ImmAllZerosV,
+ VK2WM:$mask),
+ (VCVTUQQ2PSZ128rmkz VK2WM:$mask, addr:$src)>;
+
+ def : Pat<(v4f32 (X86VUintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))))),
+ (VCVTUQQ2PSZ128rmb addr:$src)>;
+ def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))),
+ (v4f32 VR128X:$src0), VK2WM:$mask),
+ (VCVTUQQ2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+ def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcast (loadi64 addr:$src))),
+ v4f32x_info.ImmAllZerosV, VK2WM:$mask),
+ (VCVTUQQ2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
}
let Predicates = [HasDQI, NoVLX] in {
-def : Pat<(v2i64 (fp_to_sint (v2f64 VR128X:$src1))),
+def : Pat<(v2i64 (X86cvttp2si (v2f64 VR128X:$src1))),
(EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr
(v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
VR128X:$src1, sub_xmm)))), sub_xmm)>;
-def : Pat<(v4i64 (fp_to_sint (v4f32 VR128X:$src1))),
+def : Pat<(v4i64 (X86cvttp2si (v4f32 VR128X:$src1))),
(EXTRACT_SUBREG (v8i64 (VCVTTPS2QQZrr
(v8f32 (INSERT_SUBREG (IMPLICIT_DEF),
VR128X:$src1, sub_xmm)))), sub_ymm)>;
-def : Pat<(v4i64 (fp_to_sint (v4f64 VR256X:$src1))),
+def : Pat<(v4i64 (X86cvttp2si (v4f64 VR256X:$src1))),
(EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr
(v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
VR256X:$src1, sub_ymm)))), sub_ymm)>;
-def : Pat<(v2i64 (fp_to_uint (v2f64 VR128X:$src1))),
+def : Pat<(v2i64 (X86cvttp2ui (v2f64 VR128X:$src1))),
(EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr
(v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
VR128X:$src1, sub_xmm)))), sub_xmm)>;
-def : Pat<(v4i64 (fp_to_uint (v4f32 VR128X:$src1))),
+def : Pat<(v4i64 (X86cvttp2ui (v4f32 VR128X:$src1))),
(EXTRACT_SUBREG (v8i64 (VCVTTPS2UQQZrr
(v8f32 (INSERT_SUBREG (IMPLICIT_DEF),
VR128X:$src1, sub_xmm)))), sub_ymm)>;
-def : Pat<(v4i64 (fp_to_uint (v4f64 VR256X:$src1))),
+def : Pat<(v4i64 (X86cvttp2ui (v4f64 VR256X:$src1))),
(EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr
(v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
VR256X:$src1, sub_ymm)))), sub_ymm)>;
@@ -8870,8 +8543,7 @@ multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst),
(ins _src.RC:$src), "vcvtph2ps",
"{sae}, $src", "$src, {sae}",
- (X86cvtph2psRnd (_src.VT _src.RC:$src),
- (i32 FROUND_NO_EXC))>,
+ (X86cvtph2psSAE (_src.VT _src.RC:$src))>,
T8PD, EVEX_B, Sched<[sched]>;
}
@@ -8890,9 +8562,7 @@ let Predicates = [HasVLX] in {
EVEX_CD8<32, CD8VH>;
// Pattern match vcvtph2ps of a scalar i64 load.
- def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzmovl_v2i64 addr:$src)))),
- (VCVTPH2PSZ128rm addr:$src)>;
- def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzload_v2i64 addr:$src)))),
+ def : Pat<(v4f32 (X86cvtph2ps (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
(VCVTPH2PSZ128rm addr:$src)>;
def : Pat<(v4f32 (X86cvtph2ps (v8i16 (bitconvert
(v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
@@ -9055,12 +8725,12 @@ multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
- EVEX_4V, Sched<[sched]>;
+ EVEX_4V, VEX_LIG, Sched<[sched]>;
defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(OpNode (_.VT _.RC:$src1),
- _.ScalarIntMemCPat:$src2)>, EVEX_4V,
+ _.ScalarIntMemCPat:$src2)>, EVEX_4V, VEX_LIG,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -9129,47 +8799,45 @@ defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86rcp14, SchedWriteFRcp>;
/// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
- SDNode OpNode, X86FoldableSchedWrite sched> {
+ SDNode OpNode, SDNode OpNodeSAE,
+ X86FoldableSchedWrite sched> {
let ExeDomain = _.ExeDomain in {
defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
- (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
- (i32 FROUND_CURRENT))>,
+ (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
Sched<[sched]>;
defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
"{sae}, $src2, $src1", "$src1, $src2, {sae}",
- (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
- (i32 FROUND_NO_EXC))>, EVEX_B,
- Sched<[sched]>;
+ (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
+ EVEX_B, Sched<[sched]>;
defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
- (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
- (i32 FROUND_CURRENT))>,
+ (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2)>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
- X86FoldableSchedWrite sched> {
- defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, sched>,
- EVEX_CD8<32, CD8VT1>;
- defm SDZ : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, sched>,
- EVEX_CD8<64, CD8VT1>, VEX_W;
+ SDNode OpNodeSAE, X86FoldableSchedWrite sched> {
+ defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, OpNodeSAE,
+ sched>, EVEX_CD8<32, CD8VT1>, VEX_LIG;
+ defm SDZ : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, OpNodeSAE,
+ sched>, EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W;
}
let Predicates = [HasERI] in {
- defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, SchedWriteFRcp.Scl>,
- T8PD, EVEX_4V;
- defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s,
+ defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, X86rcp28SAEs,
+ SchedWriteFRcp.Scl>, T8PD, EVEX_4V;
+ defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, X86rsqrt28SAEs,
SchedWriteFRsqrt.Scl>, T8PD, EVEX_4V;
}
-defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexpRnds,
+defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs,
SchedWriteFRnd.Scl>, T8PD, EVEX_4V;
/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
@@ -9178,42 +8846,40 @@ multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
let ExeDomain = _.ExeDomain in {
defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src), OpcodeStr, "$src", "$src",
- (OpNode (_.VT _.RC:$src), (i32 FROUND_CURRENT))>,
+ (OpNode (_.VT _.RC:$src))>,
Sched<[sched]>;
defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.MemOp:$src), OpcodeStr, "$src", "$src",
(OpNode (_.VT
- (bitconvert (_.LdFrag addr:$src))),
- (i32 FROUND_CURRENT))>,
+ (bitconvert (_.LdFrag addr:$src))))>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.ScalarMemOp:$src), OpcodeStr,
"${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
(OpNode (_.VT
- (X86VBroadcast (_.ScalarLdFrag addr:$src))),
- (i32 FROUND_CURRENT))>, EVEX_B,
- Sched<[sched.Folded, sched.ReadAfterFold]>;
+ (X86VBroadcast (_.ScalarLdFrag addr:$src))))>,
+ EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
-multiclass avx512_fp28_p_round<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
+multiclass avx512_fp28_p_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
SDNode OpNode, X86FoldableSchedWrite sched> {
let ExeDomain = _.ExeDomain in
defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src), OpcodeStr,
"{sae}, $src", "$src, {sae}",
- (OpNode (_.VT _.RC:$src), (i32 FROUND_NO_EXC))>,
+ (OpNode (_.VT _.RC:$src))>,
EVEX_B, Sched<[sched]>;
}
multiclass avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode,
- X86SchedWriteWidths sched> {
+ SDNode OpNodeSAE, X86SchedWriteWidths sched> {
defm PSZ : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>,
- avx512_fp28_p_round<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>,
+ avx512_fp28_p_sae<opc, OpcodeStr#"ps", v16f32_info, OpNodeSAE, sched.ZMM>,
T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
defm PDZ : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>,
- avx512_fp28_p_round<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>,
+ avx512_fp28_p_sae<opc, OpcodeStr#"pd", v8f64_info, OpNodeSAE, sched.ZMM>,
T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
}
@@ -9221,24 +8887,32 @@ multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
SDNode OpNode, X86SchedWriteWidths sched> {
// Define only if AVX512VL feature is present.
let Predicates = [HasVLX] in {
- defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode, sched.XMM>,
- EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>;
- defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode, sched.YMM>,
- EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>;
- defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode, sched.XMM>,
- EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
- defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode, sched.YMM>,
- EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
+ defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode,
+ sched.XMM>,
+ EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>;
+ defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode,
+ sched.YMM>,
+ EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>;
+ defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode,
+ sched.XMM>,
+ EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
+ defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode,
+ sched.YMM>,
+ EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
}
}
let Predicates = [HasERI] in {
- defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, SchedWriteFRsqrt>, EVEX;
- defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28, SchedWriteFRcp>, EVEX;
- defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2, SchedWriteFAdd>, EVEX;
-}
-defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexpRnd, SchedWriteFRnd>,
- avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexpRnd,
+ defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, X86rsqrt28SAE,
+ SchedWriteFRsqrt>, EVEX;
+ defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28, X86rcp28SAE,
+ SchedWriteFRcp>, EVEX;
+ defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2, X86exp2SAE,
+ SchedWriteFAdd>, EVEX;
+}
+defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE,
+ SchedWriteFRnd>,
+ avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexp,
SchedWriteFRnd>, EVEX;
multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr,
@@ -9246,7 +8920,7 @@ multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr,
let ExeDomain = _.ExeDomain in
defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc",
- (_.VT (X86fsqrtRnd _.RC:$src, (i32 imm:$rc)))>,
+ (_.VT (X86fsqrtRnd _.RC:$src, (i32 timm:$rc)))>,
EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
}
@@ -9312,23 +8986,21 @@ multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWri
defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
- (X86fsqrtRnds (_.VT _.RC:$src1),
- (_.VT _.RC:$src2),
- (i32 FROUND_CURRENT))>,
+ (X86fsqrts (_.VT _.RC:$src1),
+ (_.VT _.RC:$src2))>,
Sched<[sched]>;
defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
- (X86fsqrtRnds (_.VT _.RC:$src1),
- _.ScalarIntMemCPat:$src2,
- (i32 FROUND_CURRENT))>,
+ (X86fsqrts (_.VT _.RC:$src1),
+ _.ScalarIntMemCPat:$src2)>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
"$rc, $src2, $src1", "$src1, $src2, $rc",
(X86fsqrtRnds (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
- (i32 imm:$rc))>,
+ (i32 timm:$rc))>,
EVEX_B, EVEX_RC, Sched<[sched]>;
let isCodeGenOnly = 1, hasSideEffects = 0, Predicates=[HasAVX512] in {
@@ -9383,8 +9055,8 @@ multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
"$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
- (_.VT (X86RndScalesRnd (_.VT _.RC:$src1), (_.VT _.RC:$src2),
- (i32 imm:$src3), (i32 FROUND_NO_EXC)))>, EVEX_B,
+ (_.VT (X86RndScalesSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
+ (i32 imm:$src3)))>, EVEX_B,
Sched<[sched]>;
defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
@@ -9410,50 +9082,26 @@ multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
}
let Predicates = [HasAVX512] in {
- def : Pat<(ffloor _.FRC:$src),
- (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
- _.FRC:$src, (i32 0x9)))>;
- def : Pat<(fceil _.FRC:$src),
- (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
- _.FRC:$src, (i32 0xa)))>;
- def : Pat<(ftrunc _.FRC:$src),
+ def : Pat<(X86VRndScale _.FRC:$src1, imm:$src2),
(_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
- _.FRC:$src, (i32 0xb)))>;
- def : Pat<(frint _.FRC:$src),
- (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
- _.FRC:$src, (i32 0x4)))>;
- def : Pat<(fnearbyint _.FRC:$src),
- (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
- _.FRC:$src, (i32 0xc)))>;
+ _.FRC:$src1, imm:$src2))>;
}
let Predicates = [HasAVX512, OptForSize] in {
- def : Pat<(ffloor (_.ScalarLdFrag addr:$src)),
- (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
- addr:$src, (i32 0x9)))>;
- def : Pat<(fceil (_.ScalarLdFrag addr:$src)),
- (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
- addr:$src, (i32 0xa)))>;
- def : Pat<(ftrunc (_.ScalarLdFrag addr:$src)),
+ def : Pat<(X86VRndScale (_.ScalarLdFrag addr:$src1), imm:$src2),
(_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
- addr:$src, (i32 0xb)))>;
- def : Pat<(frint (_.ScalarLdFrag addr:$src)),
- (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
- addr:$src, (i32 0x4)))>;
- def : Pat<(fnearbyint (_.ScalarLdFrag addr:$src)),
- (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
- addr:$src, (i32 0xc)))>;
+ addr:$src1, imm:$src2))>;
}
}
defm VRNDSCALESSZ : avx512_rndscale_scalar<0x0A, "vrndscaless",
SchedWriteFRnd.Scl, f32x_info>,
- AVX512AIi8Base, EVEX_4V,
+ AVX512AIi8Base, EVEX_4V, VEX_LIG,
EVEX_CD8<32, CD8VT1>;
defm VRNDSCALESDZ : avx512_rndscale_scalar<0x0B, "vrndscalesd",
SchedWriteFRnd.Scl, f64x_info>,
- VEX_W, AVX512AIi8Base, EVEX_4V,
+ VEX_W, AVX512AIi8Base, EVEX_4V, VEX_LIG,
EVEX_CD8<64, CD8VT1>;
multiclass avx512_masked_scalar<SDNode OpNode, string OpcPrefix, SDNode Move,
@@ -9481,32 +9129,6 @@ defm : avx512_masked_scalar<fsqrt, "SQRTSDZ", X86Movsd,
(v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v2f64x_info,
fp64imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>;
-multiclass avx512_masked_scalar_imm<SDNode OpNode, string OpcPrefix, SDNode Move,
- X86VectorVTInfo _, PatLeaf ZeroFP,
- bits<8> ImmV, Predicate BasePredicate> {
- let Predicates = [BasePredicate] in {
- def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects VK1WM:$mask,
- (OpNode (extractelt _.VT:$src2, (iPTR 0))),
- (extractelt _.VT:$dst, (iPTR 0))))),
- (!cast<Instruction>("V"#OpcPrefix#Zr_Intk)
- _.VT:$dst, VK1WM:$mask, _.VT:$src1, _.VT:$src2, (i32 ImmV))>;
-
- def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects VK1WM:$mask,
- (OpNode (extractelt _.VT:$src2, (iPTR 0))), ZeroFP))),
- (!cast<Instruction>("V"#OpcPrefix#Zr_Intkz)
- VK1WM:$mask, _.VT:$src1, _.VT:$src2, (i32 ImmV))>;
- }
-}
-
-defm : avx512_masked_scalar_imm<ffloor, "RNDSCALESS", X86Movss,
- v4f32x_info, fp32imm0, 0x01, HasAVX512>;
-defm : avx512_masked_scalar_imm<fceil, "RNDSCALESS", X86Movss,
- v4f32x_info, fp32imm0, 0x02, HasAVX512>;
-defm : avx512_masked_scalar_imm<ffloor, "RNDSCALESD", X86Movsd,
- v2f64x_info, fp64imm0, 0x01, HasAVX512>;
-defm : avx512_masked_scalar_imm<fceil, "RNDSCALESD", X86Movsd,
- v2f64x_info, fp64imm0, 0x02, HasAVX512>;
-
//-------------------------------------------------
// Integer truncate and extend operations
@@ -9966,26 +9588,14 @@ multiclass AVX512_pmovx_patterns_base<string OpcPrefix, SDNode ExtOp> {
let Predicates = [HasVLX, HasBWI] in {
def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))),
(!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
- def : Pat<(v16i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
- def : Pat<(v16i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
}
let Predicates = [HasVLX] in {
def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))),
(!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
- def : Pat<(v8i32 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
- def : Pat<(v8i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))),
(!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
- def : Pat<(v4i64 (ExtOp (v4i32 (vzmovl_v2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
- def : Pat<(v4i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
}
// 512-bit patterns
@@ -10007,41 +9617,6 @@ multiclass AVX512_pmovx_patterns_base<string OpcPrefix, SDNode ExtOp> {
}
}
-multiclass AVX512_pmovx_patterns_aext<string OpcPrefix, SDNode ExtOp> :
- AVX512_pmovx_patterns_base<OpcPrefix, ExtOp> {
- let Predicates = [HasVLX, HasBWI] in {
- def : Pat<(v16i16 (ExtOp (v16i8 VR128X:$src))),
- (!cast<I>(OpcPrefix#BWZ256rr) VR128X:$src)>;
- }
-
- let Predicates = [HasVLX] in {
- def : Pat<(v8i32 (ExtOp (v8i16 VR128X:$src))),
- (!cast<I>(OpcPrefix#WDZ256rr) VR128X:$src)>;
-
- def : Pat<(v4i64 (ExtOp (v4i32 VR128X:$src))),
- (!cast<I>(OpcPrefix#DQZ256rr) VR128X:$src)>;
- }
-
- // 512-bit patterns
- let Predicates = [HasBWI] in {
- def : Pat<(v32i16 (ExtOp (v32i8 VR256X:$src))),
- (!cast<I>(OpcPrefix#BWZrr) VR256X:$src)>;
- }
- let Predicates = [HasAVX512] in {
- def : Pat<(v16i32 (ExtOp (v16i8 VR128X:$src))),
- (!cast<I>(OpcPrefix#BDZrr) VR128X:$src)>;
- def : Pat<(v16i32 (ExtOp (v16i16 VR256X:$src))),
- (!cast<I>(OpcPrefix#WDZrr) VR256X:$src)>;
-
- def : Pat<(v8i64 (ExtOp (v8i16 VR128X:$src))),
- (!cast<I>(OpcPrefix#WQZrr) VR128X:$src)>;
-
- def : Pat<(v8i64 (ExtOp (v8i32 VR256X:$src))),
- (!cast<I>(OpcPrefix#DQZrr) VR256X:$src)>;
- }
-}
-
-
multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
SDNode InVecOp> :
AVX512_pmovx_patterns_base<OpcPrefix, ExtOp> {
@@ -10051,103 +9626,62 @@ multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
(!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
(!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
- def : Pat<(v8i16 (InVecOp (v16i8 (vzmovl_v2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
- def : Pat<(v8i16 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
- def : Pat<(v8i16 (InVecOp (loadv16i8 addr:$src))),
+ def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
(!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
}
let Predicates = [HasVLX] in {
def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
(!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
- def : Pat<(v4i32 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))),
- (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
- def : Pat<(v4i32 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
- def : Pat<(v4i32 (InVecOp (loadv16i8 addr:$src))),
+ def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
(!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))),
(!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
- def : Pat<(v2i64 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))),
- (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
- def : Pat<(v2i64 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
- def : Pat<(v2i64 (InVecOp (loadv16i8 addr:$src))),
- (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
(!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
(!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
- def : Pat<(v4i32 (InVecOp (v8i16 (vzmovl_v2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
- def : Pat<(v4i32 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
- def : Pat<(v4i32 (InVecOp (loadv8i16 addr:$src))),
+ def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
(!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
(!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
- def : Pat<(v2i64 (InVecOp (v8i16 (vzmovl_v4i32 addr:$src)))),
- (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
- def : Pat<(v2i64 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
- def : Pat<(v2i64 (InVecOp (loadv8i16 addr:$src))),
+ def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (X86vzload32 addr:$src))))),
(!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
(!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
(!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
- def : Pat<(v2i64 (InVecOp (v4i32 (vzmovl_v2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
- def : Pat<(v2i64 (InVecOp (v4i32 (vzload_v2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
- def : Pat<(v2i64 (InVecOp (loadv4i32 addr:$src))),
+ def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
(!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
}
let Predicates = [HasVLX] in {
def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
(!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
- def : Pat<(v8i32 (InVecOp (v16i8 (vzmovl_v2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
- def : Pat<(v8i32 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
- def : Pat<(v8i32 (InVecOp (loadv16i8 addr:$src))),
+ def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
(!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
(!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
- def : Pat<(v4i64 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))),
- (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
- def : Pat<(v4i64 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
- def : Pat<(v4i64 (InVecOp (loadv16i8 addr:$src))),
+ def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
(!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
(!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
- def : Pat<(v4i64 (InVecOp (v8i16 (vzmovl_v2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
- def : Pat<(v4i64 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
- def : Pat<(v4i64 (InVecOp (loadv8i16 addr:$src))),
+ def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
(!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
}
// 512-bit patterns
let Predicates = [HasAVX512] in {
def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
(!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
- def : Pat<(v8i64 (InVecOp (loadv16i8 addr:$src))),
- (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
}
}
defm : AVX512_pmovx_patterns<"VPMOVSX", sext, sext_invec>;
defm : AVX512_pmovx_patterns<"VPMOVZX", zext, zext_invec>;
-defm : AVX512_pmovx_patterns_aext<"VPMOVZX", anyext>;
// Without BWI we can't do a trunc from v16i16 to v16i8. DAG combine can merge
// ext+trunc aggresively making it impossible to legalize the DAG to this
@@ -10155,22 +9689,8 @@ defm : AVX512_pmovx_patterns_aext<"VPMOVZX", anyext>;
let Predicates = [HasAVX512, NoBWI] in {
def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
(VPMOVDBZrr (v16i32 (VPMOVZXWDZrr VR256X:$src)))>;
-def: Pat<(v16i8 (trunc (bc_v16i16 (loadv4i64 addr:$src)))),
+def: Pat<(v16i8 (trunc (loadv16i16 addr:$src))),
(VPMOVDBZrr (v16i32 (VPMOVZXWDZrm addr:$src)))>;
-def: Pat<(store (v16i8 (trunc (v16i16 VR256X:$src))), addr:$dst),
- (VPMOVDBZmr addr:$dst, (v16i32 (VPMOVZXWDZrr VR256X:$src)))>;
-}
-
-// Without BWI we can't do a trunc from v16i16 to v16i8. DAG combine can merge
-// ext+trunc aggresively making it impossible to legalize the DAG to this
-// pattern directly.
-let Predicates = [HasAVX512, NoBWI] in {
-def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
- (VPMOVDBZrr (v16i32 (VPMOVZXWDZrr VR256X:$src)))>;
-def: Pat<(v16i8 (trunc (bc_v16i16 (loadv4i64 addr:$src)))),
- (VPMOVDBZrr (v16i32 (VPMOVZXWDZrm addr:$src)))>;
-def: Pat<(store (v16i8 (trunc (v16i16 VR256X:$src))), addr:$dst),
- (VPMOVDBZmr addr:$dst, (v16i32 (VPMOVZXWDZrr VR256X:$src)))>;
}
//===----------------------------------------------------------------------===//
@@ -10457,7 +9977,7 @@ multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _,
string OpcodeStr, X86FoldableSchedWrite sched> {
defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),
(ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
- (_.VT (X86compress _.RC:$src1))>, AVX5128IBase,
+ (null_frag)>, AVX5128IBase,
Sched<[sched]>;
let mayStore = 1, hasSideEffects = 0 in
@@ -10479,6 +9999,13 @@ multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
def : Pat<(X86mCompressingStore (_.VT _.RC:$src), addr:$dst, _.KRCWM:$mask),
(!cast<Instruction>(Name#_.ZSuffix##mrk)
addr:$dst, _.KRCWM:$mask, _.RC:$src)>;
+
+ def : Pat<(X86compress (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
+ (!cast<Instruction>(Name#_.ZSuffix##rrk)
+ _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
+ def : Pat<(X86compress (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
+ (!cast<Instruction>(Name#_.ZSuffix##rrkz)
+ _.KRCWM:$mask, _.RC:$src)>;
}
multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr,
@@ -10512,13 +10039,12 @@ multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
string OpcodeStr, X86FoldableSchedWrite sched> {
defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
- (_.VT (X86expand _.RC:$src1))>, AVX5128IBase,
+ (null_frag)>, AVX5128IBase,
Sched<[sched]>;
defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1",
- (_.VT (X86expand (_.VT (bitconvert
- (_.LdFrag addr:$src1)))))>,
+ (null_frag)>,
AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
@@ -10537,6 +10063,13 @@ multiclass expand_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
(_.VT _.RC:$src0))),
(!cast<Instruction>(Name#_.ZSuffix##rmk)
_.RC:$src0, _.KRCWM:$mask, addr:$src)>;
+
+ def : Pat<(X86expand (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
+ (!cast<Instruction>(Name#_.ZSuffix##rrk)
+ _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
+ def : Pat<(X86expand (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
+ (!cast<Instruction>(Name#_.ZSuffix##rrkz)
+ _.KRCWM:$mask, _.RC:$src)>;
}
multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
@@ -10603,18 +10136,17 @@ multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
OpcodeStr##_.Suffix, "$src2, {sae}, $src1",
"$src1, {sae}, $src2",
(OpNode (_.VT _.RC:$src1),
- (i32 imm:$src2),
- (i32 FROUND_NO_EXC))>,
+ (i32 imm:$src2))>,
EVEX_B, Sched<[sched]>;
}
multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr,
AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
- SDNode OpNodeRnd, X86SchedWriteWidths sched, Predicate prd>{
+ SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{
let Predicates = [prd] in {
defm Z : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM,
_.info512>,
- avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeRnd,
+ avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE,
sched.ZMM, _.info512>, EVEX_V512;
}
let Predicates = [prd, HasVLX] in {
@@ -10733,8 +10265,7 @@ multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
"$src1, $src2, {sae}, $src3",
(OpNode (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
- (i32 imm:$src3),
- (i32 FROUND_NO_EXC))>,
+ (i32 imm:$src3))>,
EVEX_B, Sched<[sched]>;
}
@@ -10748,17 +10279,16 @@ multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode
"$src1, $src2, {sae}, $src3",
(OpNode (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
- (i32 imm:$src3),
- (i32 FROUND_NO_EXC))>,
+ (i32 imm:$src3))>,
EVEX_B, Sched<[sched]>;
}
multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
- SDNode OpNodeRnd, X86SchedWriteWidths sched, Predicate prd>{
+ SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{
let Predicates = [prd] in {
defm Z : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
- avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeRnd, sched.ZMM, _.info512>,
+ avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE, sched.ZMM, _.info512>,
EVEX_V512;
}
@@ -10802,267 +10332,64 @@ multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
X86VectorVTInfo _, bits<8> opc, SDNode OpNode,
- SDNode OpNodeRnd, X86SchedWriteWidths sched, Predicate prd> {
+ SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd> {
let Predicates = [prd] in {
defm Z : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, sched.XMM, _>,
- avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeRnd, sched.XMM, _>;
+ avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeSAE, sched.XMM, _>;
}
}
multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
bits<8> opcPs, bits<8> opcPd, SDNode OpNode,
- SDNode OpNodeRnd, X86SchedWriteWidths sched, Predicate prd>{
+ SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{
defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info,
- opcPs, OpNode, OpNodeRnd, sched, prd>,
+ opcPs, OpNode, OpNodeSAE, sched, prd>,
EVEX_CD8<32, CD8VF>;
defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info,
- opcPd, OpNode, OpNodeRnd, sched, prd>,
+ opcPd, OpNode, OpNodeSAE, sched, prd>,
EVEX_CD8<64, CD8VF>, VEX_W;
}
defm VREDUCE : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
- X86VReduce, X86VReduceRnd, SchedWriteFRnd, HasDQI>,
+ X86VReduce, X86VReduceSAE, SchedWriteFRnd, HasDQI>,
AVX512AIi8Base, EVEX;
defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
- X86VRndScale, X86VRndScaleRnd, SchedWriteFRnd, HasAVX512>,
+ X86VRndScale, X86VRndScaleSAE, SchedWriteFRnd, HasAVX512>,
AVX512AIi8Base, EVEX;
defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
- X86VGetMant, X86VGetMantRnd, SchedWriteFRnd, HasAVX512>,
+ X86VGetMant, X86VGetMantSAE, SchedWriteFRnd, HasAVX512>,
AVX512AIi8Base, EVEX;
defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
- 0x50, X86VRange, X86VRangeRnd,
+ 0x50, X86VRange, X86VRangeSAE,
SchedWriteFAdd, HasDQI>,
AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info,
- 0x50, X86VRange, X86VRangeRnd,
+ 0x50, X86VRange, X86VRangeSAE,
SchedWriteFAdd, HasDQI>,
AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd",
- f64x_info, 0x51, X86Ranges, X86RangesRnd, SchedWriteFAdd, HasDQI>,
+ f64x_info, 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
- 0x51, X86Ranges, X86RangesRnd, SchedWriteFAdd, HasDQI>,
+ 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
- 0x57, X86Reduces, X86ReducesRnd, SchedWriteFRnd, HasDQI>,
+ 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
- 0x57, X86Reduces, X86ReducesRnd, SchedWriteFRnd, HasDQI>,
+ 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
- 0x27, X86GetMants, X86GetMantsRnd, SchedWriteFRnd, HasAVX512>,
+ 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
- 0x27, X86GetMants, X86GetMantsRnd, SchedWriteFRnd, HasAVX512>,
+ 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
-
-multiclass AVX512_rndscale_lowering<X86VectorVTInfo _, string Suffix> {
- // Register
- def : Pat<(_.VT (ffloor _.RC:$src)),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rri")
- _.RC:$src, (i32 0x9))>;
- def : Pat<(_.VT (fnearbyint _.RC:$src)),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rri")
- _.RC:$src, (i32 0xC))>;
- def : Pat<(_.VT (fceil _.RC:$src)),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rri")
- _.RC:$src, (i32 0xA))>;
- def : Pat<(_.VT (frint _.RC:$src)),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rri")
- _.RC:$src, (i32 0x4))>;
- def : Pat<(_.VT (ftrunc _.RC:$src)),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rri")
- _.RC:$src, (i32 0xB))>;
-
- // Merge-masking
- def : Pat<(_.VT (vselect _.KRCWM:$mask, (ffloor _.RC:$src), _.RC:$dst)),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrik")
- _.RC:$dst, _.KRCWM:$mask, _.RC:$src, (i32 0x9))>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask, (fnearbyint _.RC:$src), _.RC:$dst)),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrik")
- _.RC:$dst, _.KRCWM:$mask, _.RC:$src, (i32 0xC))>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask, (fceil _.RC:$src), _.RC:$dst)),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrik")
- _.RC:$dst, _.KRCWM:$mask, _.RC:$src, (i32 0xA))>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask, (frint _.RC:$src), _.RC:$dst)),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrik")
- _.RC:$dst, _.KRCWM:$mask, _.RC:$src, (i32 0x4))>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask, (ftrunc _.RC:$src), _.RC:$dst)),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrik")
- _.RC:$dst, _.KRCWM:$mask, _.RC:$src, (i32 0xB))>;
-
- // Zero-masking
- def : Pat<(_.VT (vselect _.KRCWM:$mask, (ffloor _.RC:$src),
- _.ImmAllZerosV)),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrikz")
- _.KRCWM:$mask, _.RC:$src, (i32 0x9))>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask, (fnearbyint _.RC:$src),
- _.ImmAllZerosV)),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrikz")
- _.KRCWM:$mask, _.RC:$src, (i32 0xC))>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask, (fceil _.RC:$src),
- _.ImmAllZerosV)),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrikz")
- _.KRCWM:$mask, _.RC:$src, (i32 0xA))>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask, (frint _.RC:$src),
- _.ImmAllZerosV)),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrikz")
- _.KRCWM:$mask, _.RC:$src, (i32 0x4))>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask, (ftrunc _.RC:$src),
- _.ImmAllZerosV)),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrikz")
- _.KRCWM:$mask, _.RC:$src, (i32 0xB))>;
-
- // Load
- def : Pat<(_.VT (ffloor (_.LdFrag addr:$src))),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmi")
- addr:$src, (i32 0x9))>;
- def : Pat<(_.VT (fnearbyint (_.LdFrag addr:$src))),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmi")
- addr:$src, (i32 0xC))>;
- def : Pat<(_.VT (fceil (_.LdFrag addr:$src))),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmi")
- addr:$src, (i32 0xA))>;
- def : Pat<(_.VT (frint (_.LdFrag addr:$src))),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmi")
- addr:$src, (i32 0x4))>;
- def : Pat<(_.VT (ftrunc (_.LdFrag addr:$src))),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmi")
- addr:$src, (i32 0xB))>;
-
- // Merge-masking + load
- def : Pat<(_.VT (vselect _.KRCWM:$mask, (ffloor (_.LdFrag addr:$src)),
- _.RC:$dst)),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmik")
- _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0x9))>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask, (fnearbyint (_.LdFrag addr:$src)),
- _.RC:$dst)),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmik")
- _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xC))>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask, (fceil (_.LdFrag addr:$src)),
- _.RC:$dst)),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmik")
- _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xA))>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask, (frint (_.LdFrag addr:$src)),
- _.RC:$dst)),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmik")
- _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0x4))>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask, (ftrunc (_.LdFrag addr:$src)),
- _.RC:$dst)),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmik")
- _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xB))>;
-
- // Zero-masking + load
- def : Pat<(_.VT (vselect _.KRCWM:$mask, (ffloor (_.LdFrag addr:$src)),
- _.ImmAllZerosV)),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmikz")
- _.KRCWM:$mask, addr:$src, (i32 0x9))>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask, (fnearbyint (_.LdFrag addr:$src)),
- _.ImmAllZerosV)),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmikz")
- _.KRCWM:$mask, addr:$src, (i32 0xC))>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask, (fceil (_.LdFrag addr:$src)),
- _.ImmAllZerosV)),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmikz")
- _.KRCWM:$mask, addr:$src, (i32 0xA))>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask, (frint (_.LdFrag addr:$src)),
- _.ImmAllZerosV)),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmikz")
- _.KRCWM:$mask, addr:$src, (i32 0x4))>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask, (ftrunc (_.LdFrag addr:$src)),
- _.ImmAllZerosV)),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmikz")
- _.KRCWM:$mask, addr:$src, (i32 0xB))>;
-
- // Broadcast load
- def : Pat<(_.VT (ffloor (X86VBroadcast (_.ScalarLdFrag addr:$src)))),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbi")
- addr:$src, (i32 0x9))>;
- def : Pat<(_.VT (fnearbyint (X86VBroadcast (_.ScalarLdFrag addr:$src)))),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbi")
- addr:$src, (i32 0xC))>;
- def : Pat<(_.VT (fceil (X86VBroadcast (_.ScalarLdFrag addr:$src)))),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbi")
- addr:$src, (i32 0xA))>;
- def : Pat<(_.VT (frint (X86VBroadcast (_.ScalarLdFrag addr:$src)))),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbi")
- addr:$src, (i32 0x4))>;
- def : Pat<(_.VT (ftrunc (X86VBroadcast (_.ScalarLdFrag addr:$src)))),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbi")
- addr:$src, (i32 0xB))>;
-
- // Merge-masking + broadcast load
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (ffloor (X86VBroadcast (_.ScalarLdFrag addr:$src))),
- _.RC:$dst)),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbik")
- _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0x9))>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (fnearbyint (X86VBroadcast (_.ScalarLdFrag addr:$src))),
- _.RC:$dst)),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbik")
- _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xC))>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (fceil (X86VBroadcast (_.ScalarLdFrag addr:$src))),
- _.RC:$dst)),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbik")
- _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xA))>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (frint (X86VBroadcast (_.ScalarLdFrag addr:$src))),
- _.RC:$dst)),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbik")
- _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0x4))>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (ftrunc (X86VBroadcast (_.ScalarLdFrag addr:$src))),
- _.RC:$dst)),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbik")
- _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xB))>;
-
- // Zero-masking + broadcast load
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (ffloor (X86VBroadcast (_.ScalarLdFrag addr:$src))),
- _.ImmAllZerosV)),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbikz")
- _.KRCWM:$mask, addr:$src, (i32 0x9))>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (fnearbyint (X86VBroadcast (_.ScalarLdFrag addr:$src))),
- _.ImmAllZerosV)),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbikz")
- _.KRCWM:$mask, addr:$src, (i32 0xC))>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (fceil (X86VBroadcast (_.ScalarLdFrag addr:$src))),
- _.ImmAllZerosV)),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbikz")
- _.KRCWM:$mask, addr:$src, (i32 0xA))>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (frint (X86VBroadcast (_.ScalarLdFrag addr:$src))),
- _.ImmAllZerosV)),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbikz")
- _.KRCWM:$mask, addr:$src, (i32 0x4))>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (ftrunc (X86VBroadcast (_.ScalarLdFrag addr:$src))),
- _.ImmAllZerosV)),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbikz")
- _.KRCWM:$mask, addr:$src, (i32 0xB))>;
-}
-
-let Predicates = [HasAVX512] in {
- defm : AVX512_rndscale_lowering<v16f32_info, "PS">;
- defm : AVX512_rndscale_lowering<v8f64_info, "PD">;
-}
-
-let Predicates = [HasVLX] in {
- defm : AVX512_rndscale_lowering<v8f32x_info, "PS">;
- defm : AVX512_rndscale_lowering<v4f64x_info, "PD">;
- defm : AVX512_rndscale_lowering<v4f32x_info, "PS">;
- defm : AVX512_rndscale_lowering<v2f64x_info, "PD">;
-}
-
multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
X86FoldableSchedWrite sched,
X86VectorVTInfo _,
@@ -11544,9 +10871,9 @@ def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))),
(VMOVDDUPZ128rm addr:$src)>;
def : Pat<(v2f64 (X86VBroadcast f64:$src)),
(VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
-def : Pat<(v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
+def : Pat<(v2f64 (X86VBroadcast (v2f64 (nonvolatile_load addr:$src)))),
(VMOVDDUPZ128rm addr:$src)>;
-def : Pat<(v2f64 (X86VBroadcast (v2f64 (X86vzload addr:$src)))),
+def : Pat<(v2f64 (X86VBroadcast (v2f64 (X86vzload64 addr:$src)))),
(VMOVDDUPZ128rm addr:$src)>;
def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
@@ -11554,21 +10881,21 @@ def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
(VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask,
(v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
- (bitconvert (v4i32 immAllZerosV))),
+ immAllZerosV),
(VMOVDDUPZ128rrkz VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))),
(v2f64 VR128X:$src0)),
(VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))),
- (bitconvert (v4i32 immAllZerosV))),
+ immAllZerosV),
(VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
-def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
+def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (v2f64 (nonvolatile_load addr:$src)))),
(v2f64 VR128X:$src0)),
(VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
-def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
- (bitconvert (v4i32 immAllZerosV))),
+def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (v2f64 (nonvolatile_load addr:$src)))),
+ immAllZerosV),
(VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
}
@@ -12067,39 +11394,39 @@ defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU,
// TODO: We should maybe have a more generalized algorithm for folding to
// vpternlog.
let Predicates = [HasAVX512] in {
- def : Pat<(xor VR512:$src, (bc_v64i8 (v16i32 immAllOnesV))),
+ def : Pat<(xor VR512:$src, (v64i8 immAllOnesV)),
(VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
- def : Pat<(xor VR512:$src, (bc_v32i16 (v16i32 immAllOnesV))),
+ def : Pat<(xor VR512:$src, (v32i16 immAllOnesV)),
(VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
- def : Pat<(xor VR512:$src, (bc_v16i32 (v16i32 immAllOnesV))),
+ def : Pat<(xor VR512:$src, (v16i32 immAllOnesV)),
(VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
- def : Pat<(xor VR512:$src, (bc_v8i64 (v16i32 immAllOnesV))),
+ def : Pat<(xor VR512:$src, (v8i64 immAllOnesV)),
(VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
}
let Predicates = [HasAVX512, NoVLX] in {
- def : Pat<(xor VR128X:$src, (bc_v16i8 (v4i32 immAllOnesV))),
+ def : Pat<(xor VR128X:$src, (v16i8 immAllOnesV)),
(EXTRACT_SUBREG
(VPTERNLOGQZrri
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
(i8 15)), sub_xmm)>;
- def : Pat<(xor VR128X:$src, (bc_v8i16 (v4i32 immAllOnesV))),
+ def : Pat<(xor VR128X:$src, (v8i16 immAllOnesV)),
(EXTRACT_SUBREG
(VPTERNLOGQZrri
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
(i8 15)), sub_xmm)>;
- def : Pat<(xor VR128X:$src, (bc_v4i32 (v4i32 immAllOnesV))),
+ def : Pat<(xor VR128X:$src, (v4i32 immAllOnesV)),
(EXTRACT_SUBREG
(VPTERNLOGQZrri
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
(i8 15)), sub_xmm)>;
- def : Pat<(xor VR128X:$src, (bc_v2i64 (v4i32 immAllOnesV))),
+ def : Pat<(xor VR128X:$src, (v2i64 immAllOnesV)),
(EXTRACT_SUBREG
(VPTERNLOGQZrri
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
@@ -12107,28 +11434,28 @@ let Predicates = [HasAVX512, NoVLX] in {
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
(i8 15)), sub_xmm)>;
- def : Pat<(xor VR256X:$src, (bc_v32i8 (v8i32 immAllOnesV))),
+ def : Pat<(xor VR256X:$src, (v32i8 immAllOnesV)),
(EXTRACT_SUBREG
(VPTERNLOGQZrri
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
(i8 15)), sub_ymm)>;
- def : Pat<(xor VR256X:$src, (bc_v16i16 (v8i32 immAllOnesV))),
+ def : Pat<(xor VR256X:$src, (v16i16 immAllOnesV)),
(EXTRACT_SUBREG
(VPTERNLOGQZrri
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
(i8 15)), sub_ymm)>;
- def : Pat<(xor VR256X:$src, (bc_v8i32 (v8i32 immAllOnesV))),
+ def : Pat<(xor VR256X:$src, (v8i32 immAllOnesV)),
(EXTRACT_SUBREG
(VPTERNLOGQZrri
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
(i8 15)), sub_ymm)>;
- def : Pat<(xor VR256X:$src, (bc_v4i64 (v8i32 immAllOnesV))),
+ def : Pat<(xor VR256X:$src, (v4i64 immAllOnesV)),
(EXTRACT_SUBREG
(VPTERNLOGQZrri
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
@@ -12138,22 +11465,22 @@ let Predicates = [HasAVX512, NoVLX] in {
}
let Predicates = [HasVLX] in {
- def : Pat<(xor VR128X:$src, (bc_v16i8 (v4i32 immAllOnesV))),
+ def : Pat<(xor VR128X:$src, (v16i8 immAllOnesV)),
(VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
- def : Pat<(xor VR128X:$src, (bc_v8i16 (v4i32 immAllOnesV))),
+ def : Pat<(xor VR128X:$src, (v8i16 immAllOnesV)),
(VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
- def : Pat<(xor VR128X:$src, (bc_v4i32 (v4i32 immAllOnesV))),
+ def : Pat<(xor VR128X:$src, (v4i32 immAllOnesV)),
(VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
- def : Pat<(xor VR128X:$src, (bc_v2i64 (v4i32 immAllOnesV))),
+ def : Pat<(xor VR128X:$src, (v2i64 immAllOnesV)),
(VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
- def : Pat<(xor VR256X:$src, (bc_v32i8 (v8i32 immAllOnesV))),
+ def : Pat<(xor VR256X:$src, (v32i8 immAllOnesV)),
(VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
- def : Pat<(xor VR256X:$src, (bc_v16i16 (v8i32 immAllOnesV))),
+ def : Pat<(xor VR256X:$src, (v16i16 immAllOnesV)),
(VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
- def : Pat<(xor VR256X:$src, (bc_v8i32 (v8i32 immAllOnesV))),
+ def : Pat<(xor VR256X:$src, (v8i32 immAllOnesV)),
(VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
- def : Pat<(xor VR256X:$src, (bc_v4i64 (v8i32 immAllOnesV))),
+ def : Pat<(xor VR256X:$src, (v4i64 immAllOnesV)),
(VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
}
@@ -12161,58 +11488,55 @@ let Predicates = [HasVLX] in {
// AVX-512 - FixupImm
//===----------------------------------------------------------------------===//
-multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
+multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr,
X86FoldableSchedWrite sched, X86VectorVTInfo _,
X86VectorVTInfo TblVT>{
let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
- (OpNode (_.VT _.RC:$src1),
- (_.VT _.RC:$src2),
- (TblVT.VT _.RC:$src3),
- (i32 imm:$src4),
- (i32 FROUND_CURRENT))>, Sched<[sched]>;
+ (X86VFixupimm (_.VT _.RC:$src1),
+ (_.VT _.RC:$src2),
+ (TblVT.VT _.RC:$src3),
+ (i32 imm:$src4))>, Sched<[sched]>;
defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4),
OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
- (OpNode (_.VT _.RC:$src1),
- (_.VT _.RC:$src2),
- (TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))),
- (i32 imm:$src4),
- (i32 FROUND_CURRENT))>,
+ (X86VFixupimm (_.VT _.RC:$src1),
+ (_.VT _.RC:$src2),
+ (TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))),
+ (i32 imm:$src4))>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
OpcodeStr##_.Suffix, "$src4, ${src3}"##_.BroadcastStr##", $src2",
"$src2, ${src3}"##_.BroadcastStr##", $src4",
- (OpNode (_.VT _.RC:$src1),
- (_.VT _.RC:$src2),
- (TblVT.VT (X86VBroadcast(TblVT.ScalarLdFrag addr:$src3))),
- (i32 imm:$src4),
- (i32 FROUND_CURRENT))>,
+ (X86VFixupimm (_.VT _.RC:$src1),
+ (_.VT _.RC:$src2),
+ (TblVT.VT (X86VBroadcast(TblVT.ScalarLdFrag addr:$src3))),
+ (i32 imm:$src4))>,
EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
} // Constraints = "$src1 = $dst"
}
multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr,
- SDNode OpNode, X86FoldableSchedWrite sched,
- X86VectorVTInfo _, X86VectorVTInfo TblVT>{
+ X86FoldableSchedWrite sched,
+ X86VectorVTInfo _, X86VectorVTInfo TblVT>
+ : avx512_fixupimm_packed<opc, OpcodeStr, sched, _, TblVT> {
let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
"$src2, $src3, {sae}, $src4",
- (OpNode (_.VT _.RC:$src1),
- (_.VT _.RC:$src2),
- (TblVT.VT _.RC:$src3),
- (i32 imm:$src4),
- (i32 FROUND_NO_EXC))>,
+ (X86VFixupimmSAE (_.VT _.RC:$src1),
+ (_.VT _.RC:$src2),
+ (TblVT.VT _.RC:$src3),
+ (i32 imm:$src4))>,
EVEX_B, Sched<[sched]>;
}
}
-multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
+multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr,
X86FoldableSchedWrite sched, X86VectorVTInfo _,
X86VectorVTInfo _src3VT> {
let Constraints = "$src1 = $dst" , Predicates = [HasAVX512],
@@ -12220,30 +11544,27 @@ multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
- (OpNode (_.VT _.RC:$src1),
- (_.VT _.RC:$src2),
- (_src3VT.VT _src3VT.RC:$src3),
- (i32 imm:$src4),
- (i32 FROUND_CURRENT))>, Sched<[sched]>;
+ (X86VFixupimms (_.VT _.RC:$src1),
+ (_.VT _.RC:$src2),
+ (_src3VT.VT _src3VT.RC:$src3),
+ (i32 imm:$src4))>, Sched<[sched]>;
defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
"$src2, $src3, {sae}, $src4",
- (OpNode (_.VT _.RC:$src1),
- (_.VT _.RC:$src2),
- (_src3VT.VT _src3VT.RC:$src3),
- (i32 imm:$src4),
- (i32 FROUND_NO_EXC))>,
+ (X86VFixupimmSAEs (_.VT _.RC:$src1),
+ (_.VT _.RC:$src2),
+ (_src3VT.VT _src3VT.RC:$src3),
+ (i32 imm:$src4))>,
EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
- (OpNode (_.VT _.RC:$src1),
- (_.VT _.RC:$src2),
- (_src3VT.VT (scalar_to_vector
- (_src3VT.ScalarLdFrag addr:$src3))),
- (i32 imm:$src4),
- (i32 FROUND_CURRENT))>,
+ (X86VFixupimms (_.VT _.RC:$src1),
+ (_.VT _.RC:$src2),
+ (_src3VT.VT (scalar_to_vector
+ (_src3VT.ScalarLdFrag addr:$src3))),
+ (i32 imm:$src4))>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -12252,25 +11573,23 @@ multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched,
AVX512VLVectorVTInfo _Vec,
AVX512VLVectorVTInfo _Tbl> {
let Predicates = [HasAVX512] in
- defm Z : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, sched.ZMM,
- _Vec.info512, _Tbl.info512>,
- avx512_fixupimm_packed_sae<0x54, "vfixupimm", X86VFixupimm, sched.ZMM,
+ defm Z : avx512_fixupimm_packed_sae<0x54, "vfixupimm", sched.ZMM,
_Vec.info512, _Tbl.info512>, AVX512AIi8Base,
EVEX_4V, EVEX_V512;
let Predicates = [HasAVX512, HasVLX] in {
- defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, sched.XMM,
+ defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.XMM,
_Vec.info128, _Tbl.info128>, AVX512AIi8Base,
EVEX_4V, EVEX_V128;
- defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, sched.YMM,
+ defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.YMM,
_Vec.info256, _Tbl.info256>, AVX512AIi8Base,
EVEX_4V, EVEX_V256;
}
}
-defm VFIXUPIMMSSZ : avx512_fixupimm_scalar<0x55, "vfixupimm", X86VFixupimmScalar,
+defm VFIXUPIMMSSZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
SchedWriteFAdd.Scl, f32x_info, v4i32x_info>,
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
-defm VFIXUPIMMSDZ : avx512_fixupimm_scalar<0x55, "vfixupimm", X86VFixupimmScalar,
+defm VFIXUPIMMSDZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
SchedWriteFAdd.Scl, f64x_info, v2i64x_info>,
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f32_info,
@@ -12331,6 +11650,12 @@ multiclass AVX512_scalar_math_fp_patterns<SDNode Op, string OpcPrefix, SDNode Mo
_.FRC:$src)))),
(!cast<Instruction>("V"#OpcPrefix#Zrr_Int) _.VT:$dst,
(_.VT (COPY_TO_REGCLASS _.FRC:$src, VR128X)))>;
+ def : Pat<(MoveNode
+ (_.VT VR128X:$dst),
+ (_.VT (scalar_to_vector
+ (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
+ (_.ScalarLdFrag addr:$src))))),
+ (!cast<Instruction>("V"#OpcPrefix#Zrm_Int) _.VT:$dst, addr:$src)>;
// extracted masked scalar math op with insert via movss
def : Pat<(MoveNode (_.VT VR128X:$src1),
@@ -12344,6 +11669,16 @@ multiclass AVX512_scalar_math_fp_patterns<SDNode Op, string OpcPrefix, SDNode Mo
(_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
VK1WM:$mask, _.VT:$src1,
(_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
+ def : Pat<(MoveNode (_.VT VR128X:$src1),
+ (scalar_to_vector
+ (X86selects VK1WM:$mask,
+ (Op (_.EltVT
+ (extractelt (_.VT VR128X:$src1), (iPTR 0))),
+ (_.ScalarLdFrag addr:$src2)),
+ _.FRC:$src0))),
+ (!cast<Instruction>("V"#OpcPrefix#Zrm_Intk)
+ (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
+ VK1WM:$mask, _.VT:$src1, addr:$src2)>;
// extracted masked scalar math op with insert via movss
def : Pat<(MoveNode (_.VT VR128X:$src1),
@@ -12355,6 +11690,13 @@ multiclass AVX512_scalar_math_fp_patterns<SDNode Op, string OpcPrefix, SDNode Mo
(!cast<I>("V"#OpcPrefix#Zrr_Intkz)
VK1WM:$mask, _.VT:$src1,
(_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
+ def : Pat<(MoveNode (_.VT VR128X:$src1),
+ (scalar_to_vector
+ (X86selects VK1WM:$mask,
+ (Op (_.EltVT
+ (extractelt (_.VT VR128X:$src1), (iPTR 0))),
+ (_.ScalarLdFrag addr:$src2)), (_.EltVT ZeroFP)))),
+ (!cast<I>("V"#OpcPrefix#Zrm_Intkz) VK1WM:$mask, _.VT:$src1, addr:$src2)>;
}
}
@@ -12380,26 +11722,6 @@ multiclass AVX512_scalar_unary_math_patterns<SDNode OpNode, string OpcPrefix,
defm : AVX512_scalar_unary_math_patterns<fsqrt, "SQRTSS", X86Movss, v4f32x_info>;
defm : AVX512_scalar_unary_math_patterns<fsqrt, "SQRTSD", X86Movsd, v2f64x_info>;
-multiclass AVX512_scalar_unary_math_imm_patterns<SDNode OpNode, string OpcPrefix,
- SDNode Move, X86VectorVTInfo _,
- bits<8> ImmV> {
- let Predicates = [HasAVX512] in {
- def : Pat<(_.VT (Move _.VT:$dst,
- (scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))),
- (!cast<Instruction>("V"#OpcPrefix#Zr_Int) _.VT:$dst, _.VT:$src,
- (i32 ImmV))>;
- }
-}
-
-defm : AVX512_scalar_unary_math_imm_patterns<ffloor, "RNDSCALESS", X86Movss,
- v4f32x_info, 0x01>;
-defm : AVX512_scalar_unary_math_imm_patterns<fceil, "RNDSCALESS", X86Movss,
- v4f32x_info, 0x02>;
-defm : AVX512_scalar_unary_math_imm_patterns<ffloor, "RNDSCALESD", X86Movsd,
- v2f64x_info, 0x01>;
-defm : AVX512_scalar_unary_math_imm_patterns<fceil, "RNDSCALESD", X86Movsd,
- v2f64x_info, 0x02>;
-
//===----------------------------------------------------------------------===//
// AES instructions
//===----------------------------------------------------------------------===//
@@ -12612,12 +11934,19 @@ defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SchedWriteVecALU,
defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>;
defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>;
+def X86Vpshufbitqmb_su : PatFrag<(ops node:$src1, node:$src2),
+ (X86Vpshufbitqmb node:$src1, node:$src2), [{
+ return N->hasOneUse();
+}]>;
+
multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst),
(ins VTI.RC:$src1, VTI.RC:$src2),
"vpshufbitqmb",
"$src2, $src1", "$src1, $src2",
(X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
+ (VTI.VT VTI.RC:$src2)),
+ (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
(VTI.VT VTI.RC:$src2))>, EVEX_4V, T8PD,
Sched<[sched]>;
defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst),
@@ -12625,6 +11954,8 @@ multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
"vpshufbitqmb",
"$src2, $src1", "$src1, $src2",
(X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
+ (VTI.VT (VTI.LdFrag addr:$src2))),
+ (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
(VTI.VT (VTI.LdFrag addr:$src2)))>,
EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD,
Sched<[sched.Folded, sched.ReadAfterFold]>;
@@ -12720,13 +12051,13 @@ defm V4FNMADDPSrm : AVX512_maskable_3src_in_asm<0xAA, MRMSrcMem, v16f32_info,
defm V4FMADDSSrm : AVX512_maskable_3src_in_asm<0x9B, MRMSrcMem, f32x_info,
(outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3),
"v4fmaddss", "$src3, $src2", "$src2, $src3",
- []>, EVEX_V128, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
+ []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
Sched<[SchedWriteFMA.Scl.Folded]>;
defm V4FNMADDSSrm : AVX512_maskable_3src_in_asm<0xAB, MRMSrcMem, f32x_info,
(outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3),
"v4fnmaddss", "$src3, $src2", "$src2, $src3",
- []>, EVEX_V128, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
+ []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
Sched<[SchedWriteFMA.Scl.Folded]>;
}
@@ -12749,3 +12080,196 @@ defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info,
Sched<[SchedWriteFMA.ZMM.Folded]>;
}
+let hasSideEffects = 0 in {
+ let mayStore = 1 in
+ def MASKPAIR16STORE : PseudoI<(outs), (ins anymem:$dst, VK16PAIR:$src), []>;
+ let mayLoad = 1 in
+ def MASKPAIR16LOAD : PseudoI<(outs VK16PAIR:$dst), (ins anymem:$src), []>;
+}
+
+//===----------------------------------------------------------------------===//
+// VP2INTERSECT
+//===----------------------------------------------------------------------===//
+
+multiclass avx512_vp2intersect_modes<X86VectorVTInfo _> {
+ def rr : I<0x68, MRMSrcReg,
+ (outs _.KRPC:$dst),
+ (ins _.RC:$src1, _.RC:$src2),
+ !strconcat("vp2intersect", _.Suffix,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set _.KRPC:$dst, (X86vp2intersect
+ _.RC:$src1, (_.VT _.RC:$src2)))]>,
+ EVEX_4V, T8XD;
+
+ def rm : I<0x68, MRMSrcMem,
+ (outs _.KRPC:$dst),
+ (ins _.RC:$src1, _.MemOp:$src2),
+ !strconcat("vp2intersect", _.Suffix,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set _.KRPC:$dst, (X86vp2intersect
+ _.RC:$src1, (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>,
+ EVEX_4V, T8XD, EVEX_CD8<_.EltSize, CD8VF>;
+
+ def rmb : I<0x68, MRMSrcMem,
+ (outs _.KRPC:$dst),
+ (ins _.RC:$src1, _.ScalarMemOp:$src2),
+ !strconcat("vp2intersect", _.Suffix, "\t{${src2}", _.BroadcastStr,
+ ", $src1, $dst|$dst, $src1, ${src2}", _.BroadcastStr ,"}"),
+ [(set _.KRPC:$dst, (X86vp2intersect
+ _.RC:$src1, (_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src2)))))]>,
+ EVEX_4V, T8XD, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>;
+}
+
+multiclass avx512_vp2intersect<AVX512VLVectorVTInfo _> {
+ let Predicates = [HasAVX512, HasVP2INTERSECT] in
+ defm Z : avx512_vp2intersect_modes<_.info512>, EVEX_V512;
+
+ let Predicates = [HasAVX512, HasVP2INTERSECT, HasVLX] in {
+ defm Z256 : avx512_vp2intersect_modes<_.info256>, EVEX_V256;
+ defm Z128 : avx512_vp2intersect_modes<_.info128>, EVEX_V128;
+ }
+}
+
+defm VP2INTERSECTD : avx512_vp2intersect<avx512vl_i32_info>;
+defm VP2INTERSECTQ : avx512_vp2intersect<avx512vl_i64_info>, VEX_W;
+
+multiclass avx512_binop_all2<bits<8> opc, string OpcodeStr,
+ X86SchedWriteWidths sched,
+ AVX512VLVectorVTInfo _SrcVTInfo,
+ AVX512VLVectorVTInfo _DstVTInfo,
+ SDNode OpNode, Predicate prd,
+ bit IsCommutable = 0> {
+ let Predicates = [prd] in
+ defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
+ _SrcVTInfo.info512, _DstVTInfo.info512,
+ _SrcVTInfo.info512, IsCommutable>,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+ let Predicates = [HasVLX, prd] in {
+ defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
+ _SrcVTInfo.info256, _DstVTInfo.info256,
+ _SrcVTInfo.info256, IsCommutable>,
+ EVEX_V256, EVEX_CD8<32, CD8VF>;
+ defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
+ _SrcVTInfo.info128, _DstVTInfo.info128,
+ _SrcVTInfo.info128, IsCommutable>,
+ EVEX_V128, EVEX_CD8<32, CD8VF>;
+ }
+}
+
+defm VCVTNE2PS2BF16 : avx512_binop_all2<0x72, "vcvtne2ps2bf16",
+ SchedWriteCvtPD2PS, //FIXME: Shoulod be SchedWriteCvtPS2BF
+ avx512vl_f32_info, avx512vl_i16_info,
+ X86cvtne2ps2bf16, HasBF16, 0>, T8XD;
+
+// Truncate Float to BFloat16
+multiclass avx512_cvtps2bf16<bits<8> opc, string OpcodeStr,
+ X86SchedWriteWidths sched> {
+ let Predicates = [HasBF16] in {
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i16x_info, v16f32_info,
+ X86cvtneps2bf16, sched.ZMM>, EVEX_V512;
+ }
+ let Predicates = [HasBF16, HasVLX] in {
+ defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v4f32x_info,
+ null_frag, sched.XMM, "{1to4}", "{x}", f128mem,
+ VK4WM>, EVEX_V128;
+ defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v8f32x_info,
+ X86cvtneps2bf16,
+ sched.YMM, "{1to8}", "{y}">, EVEX_V256;
+
+ def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
+ (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
+ VR128X:$src), 0>;
+ def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
+ (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst,
+ f128mem:$src), 0, "intel">;
+ def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
+ (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
+ VR256X:$src), 0>;
+ def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
+ (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst,
+ f256mem:$src), 0, "intel">;
+ }
+}
+
+defm VCVTNEPS2BF16 : avx512_cvtps2bf16<0x72, "vcvtneps2bf16",
+ SchedWriteCvtPD2PS>, T8XS,
+ EVEX_CD8<32, CD8VF>;
+
+let Predicates = [HasBF16, HasVLX] in {
+ // Special patterns to allow use of X86mcvtneps2bf16 for masking. Instruction
+ // patterns have been disabled with null_frag.
+ def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32 VR128X:$src))),
+ (VCVTNEPS2BF16Z128rr VR128X:$src)>;
+ def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), (v8i16 VR128X:$src0),
+ VK4WM:$mask),
+ (VCVTNEPS2BF16Z128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src)>;
+ def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), v8i16x_info.ImmAllZerosV,
+ VK4WM:$mask),
+ (VCVTNEPS2BF16Z128rrkz VK4WM:$mask, VR128X:$src)>;
+
+ def : Pat<(v8i16 (X86cvtneps2bf16 (loadv4f32 addr:$src))),
+ (VCVTNEPS2BF16Z128rm addr:$src)>;
+ def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), (v8i16 VR128X:$src0),
+ VK4WM:$mask),
+ (VCVTNEPS2BF16Z128rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
+ def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), v8i16x_info.ImmAllZerosV,
+ VK4WM:$mask),
+ (VCVTNEPS2BF16Z128rmkz VK4WM:$mask, addr:$src)>;
+
+ def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32
+ (X86VBroadcast (loadf32 addr:$src))))),
+ (VCVTNEPS2BF16Z128rmb addr:$src)>;
+ def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcast (loadf32 addr:$src))),
+ (v8i16 VR128X:$src0), VK4WM:$mask),
+ (VCVTNEPS2BF16Z128rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
+ def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcast (loadf32 addr:$src))),
+ v8i16x_info.ImmAllZerosV, VK4WM:$mask),
+ (VCVTNEPS2BF16Z128rmbkz VK4WM:$mask, addr:$src)>;
+}
+
+let Constraints = "$src1 = $dst" in {
+multiclass avx512_dpbf16ps_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _, X86VectorVTInfo src_v> {
+ defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src2, _.RC:$src3),
+ OpcodeStr, "$src3, $src2", "$src2, $src3",
+ (_.VT (OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3))>,
+ EVEX_4V;
+
+ defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src2, _.MemOp:$src3),
+ OpcodeStr, "$src3, $src2", "$src2, $src3",
+ (_.VT (OpNode _.RC:$src1, _.RC:$src2,
+ (src_v.VT (bitconvert
+ (src_v.LdFrag addr:$src3)))))>, EVEX_4V;
+
+ defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src2, _.ScalarMemOp:$src3),
+ OpcodeStr,
+ !strconcat("${src3}", _.BroadcastStr,", $src2"),
+ !strconcat("$src2, ${src3}", _.BroadcastStr),
+ (_.VT (OpNode _.RC:$src1, _.RC:$src2,
+ (src_v.VT (X86VBroadcast(src_v.ScalarLdFrag addr:$src3)))))>,
+ EVEX_B, EVEX_4V;
+
+}
+} // Constraints = "$src1 = $dst"
+
+multiclass avx512_dpbf16ps_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ AVX512VLVectorVTInfo _,
+ AVX512VLVectorVTInfo src_v, Predicate prd> {
+ let Predicates = [prd] in {
+ defm Z : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, _.info512,
+ src_v.info512>, EVEX_V512;
+ }
+ let Predicates = [HasVLX, prd] in {
+ defm Z256 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, _.info256,
+ src_v.info256>, EVEX_V256;
+ defm Z128 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, _.info128,
+ src_v.info128>, EVEX_V128;
+ }
+}
+
+defm VDPBF16PS : avx512_dpbf16ps_sizes<0x52, "vdpbf16ps", X86dpbf16ps,
+ avx512vl_f32_info, avx512vl_i32_info,
+ HasBF16>, T8XS, EVEX_CD8<32, CD8VF>;
diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td
index cb5a4e5b5d41..e52635f8d48b 100644
--- a/lib/Target/X86/X86InstrArithmetic.td
+++ b/lib/Target/X86/X86InstrArithmetic.td
@@ -1,9 +1,8 @@
//===-- X86InstrArithmetic.td - Integer Arithmetic Instrs --*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -195,19 +194,22 @@ def IMUL64rm : RI<0xAF, MRMSrcMem, (outs GR64:$dst),
// Surprisingly enough, these are not two address instructions!
let Defs = [EFLAGS] in {
+// NOTE: These are order specific, we want the ri8 forms to be listed
+// first so that they are slightly preferred to the ri forms.
+
// Register-Integer Signed Integer Multiply
-def IMUL16rri : Ii16<0x69, MRMSrcReg, // GR16 = GR16*I16
- (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
- "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR16:$dst, EFLAGS,
- (X86smul_flag GR16:$src1, imm:$src2))]>,
- Sched<[WriteIMul16Imm]>, OpSize16;
def IMUL16rri8 : Ii8<0x6B, MRMSrcReg, // GR16 = GR16*I8
(outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
"imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR16:$dst, EFLAGS,
(X86smul_flag GR16:$src1, i16immSExt8:$src2))]>,
Sched<[WriteIMul16Imm]>, OpSize16;
+def IMUL16rri : Ii16<0x69, MRMSrcReg, // GR16 = GR16*I16
+ (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
+ "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR16:$dst, EFLAGS,
+ (X86smul_flag GR16:$src1, imm:$src2))]>,
+ Sched<[WriteIMul16Imm]>, OpSize16;
def IMUL32rri : Ii32<0x69, MRMSrcReg, // GR32 = GR32*I32
(outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
"imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -220,26 +222,20 @@ def IMUL32rri8 : Ii8<0x6B, MRMSrcReg, // GR32 = GR32*I8
[(set GR32:$dst, EFLAGS,
(X86smul_flag GR32:$src1, i32immSExt8:$src2))]>,
Sched<[WriteIMul32Imm]>, OpSize32;
-def IMUL64rri32 : RIi32S<0x69, MRMSrcReg, // GR64 = GR64*I32
- (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
- "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR64:$dst, EFLAGS,
- (X86smul_flag GR64:$src1, i64immSExt32:$src2))]>,
- Sched<[WriteIMul64Imm]>;
def IMUL64rri8 : RIi8<0x6B, MRMSrcReg, // GR64 = GR64*I8
(outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
"imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR64:$dst, EFLAGS,
(X86smul_flag GR64:$src1, i64immSExt8:$src2))]>,
Sched<[WriteIMul64Imm]>;
+def IMUL64rri32 : RIi32S<0x69, MRMSrcReg, // GR64 = GR64*I32
+ (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
+ "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR64:$dst, EFLAGS,
+ (X86smul_flag GR64:$src1, i64immSExt32:$src2))]>,
+ Sched<[WriteIMul64Imm]>;
// Memory-Integer Signed Integer Multiply
-def IMUL16rmi : Ii16<0x69, MRMSrcMem, // GR16 = [mem16]*I16
- (outs GR16:$dst), (ins i16mem:$src1, i16imm:$src2),
- "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR16:$dst, EFLAGS,
- (X86smul_flag (loadi16 addr:$src1), imm:$src2))]>,
- Sched<[WriteIMul16Imm.Folded]>, OpSize16;
def IMUL16rmi8 : Ii8<0x6B, MRMSrcMem, // GR16 = [mem16]*I8
(outs GR16:$dst), (ins i16mem:$src1, i16i8imm :$src2),
"imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -247,12 +243,12 @@ def IMUL16rmi8 : Ii8<0x6B, MRMSrcMem, // GR16 = [mem16]*I8
(X86smul_flag (loadi16 addr:$src1),
i16immSExt8:$src2))]>,
Sched<[WriteIMul16Imm.Folded]>, OpSize16;
-def IMUL32rmi : Ii32<0x69, MRMSrcMem, // GR32 = [mem32]*I32
- (outs GR32:$dst), (ins i32mem:$src1, i32imm:$src2),
- "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR32:$dst, EFLAGS,
- (X86smul_flag (loadi32 addr:$src1), imm:$src2))]>,
- Sched<[WriteIMul32Imm.Folded]>, OpSize32;
+def IMUL16rmi : Ii16<0x69, MRMSrcMem, // GR16 = [mem16]*I16
+ (outs GR16:$dst), (ins i16mem:$src1, i16imm:$src2),
+ "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR16:$dst, EFLAGS,
+ (X86smul_flag (loadi16 addr:$src1), imm:$src2))]>,
+ Sched<[WriteIMul16Imm.Folded]>, OpSize16;
def IMUL32rmi8 : Ii8<0x6B, MRMSrcMem, // GR32 = [mem32]*I8
(outs GR32:$dst), (ins i32mem:$src1, i32i8imm: $src2),
"imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -260,13 +256,12 @@ def IMUL32rmi8 : Ii8<0x6B, MRMSrcMem, // GR32 = [mem32]*I8
(X86smul_flag (loadi32 addr:$src1),
i32immSExt8:$src2))]>,
Sched<[WriteIMul32Imm.Folded]>, OpSize32;
-def IMUL64rmi32 : RIi32S<0x69, MRMSrcMem, // GR64 = [mem64]*I32
- (outs GR64:$dst), (ins i64mem:$src1, i64i32imm:$src2),
- "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR64:$dst, EFLAGS,
- (X86smul_flag (loadi64 addr:$src1),
- i64immSExt32:$src2))]>,
- Sched<[WriteIMul64Imm.Folded]>;
+def IMUL32rmi : Ii32<0x69, MRMSrcMem, // GR32 = [mem32]*I32
+ (outs GR32:$dst), (ins i32mem:$src1, i32imm:$src2),
+ "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR32:$dst, EFLAGS,
+ (X86smul_flag (loadi32 addr:$src1), imm:$src2))]>,
+ Sched<[WriteIMul32Imm.Folded]>, OpSize32;
def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem, // GR64 = [mem64]*I8
(outs GR64:$dst), (ins i64mem:$src1, i64i8imm: $src2),
"imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -274,6 +269,13 @@ def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem, // GR64 = [mem64]*I8
(X86smul_flag (loadi64 addr:$src1),
i64immSExt8:$src2))]>,
Sched<[WriteIMul64Imm.Folded]>;
+def IMUL64rmi32 : RIi32S<0x69, MRMSrcMem, // GR64 = [mem64]*I32
+ (outs GR64:$dst), (ins i64mem:$src1, i64i32imm:$src2),
+ "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR64:$dst, EFLAGS,
+ (X86smul_flag (loadi64 addr:$src1),
+ i64immSExt32:$src2))]>,
+ Sched<[WriteIMul64Imm.Folded]>;
} // Defs = [EFLAGS]
// unsigned division/remainder
@@ -436,11 +438,10 @@ def X86sub_flag_nocf : PatFrag<(ops node:$lhs, node:$rhs),
// TODO: inc/dec is slow for P4, but fast for Pentium-M.
let Defs = [EFLAGS] in {
let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in {
-let CodeSize = 2 in
+let isConvertibleToThreeAddress = 1, CodeSize = 2 in { // Can xform into LEA.
def INC8r : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
"inc{b}\t$dst",
[(set GR8:$dst, EFLAGS, (X86add_flag_nocf GR8:$src1, 1))]>;
-let isConvertibleToThreeAddress = 1, CodeSize = 2 in { // Can xform into LEA.
def INC16r : I<0xFF, MRM0r, (outs GR16:$dst), (ins GR16:$src1),
"inc{w}\t$dst",
[(set GR16:$dst, EFLAGS, (X86add_flag_nocf GR16:$src1, 1))]>,
@@ -484,11 +485,10 @@ let Predicates = [UseIncDec, In64BitMode] in {
} // CodeSize = 2, SchedRW
let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in {
-let CodeSize = 2 in
+let isConvertibleToThreeAddress = 1, CodeSize = 2 in { // Can xform into LEA.
def DEC8r : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
"dec{b}\t$dst",
[(set GR8:$dst, EFLAGS, (X86sub_flag_nocf GR8:$src1, 1))]>;
-let isConvertibleToThreeAddress = 1, CodeSize = 2 in { // Can xform into LEA.
def DEC16r : I<0xFF, MRM1r, (outs GR16:$dst), (ins GR16:$src1),
"dec{w}\t$dst",
[(set GR16:$dst, EFLAGS, (X86sub_flag_nocf GR16:$src1, 1))]>,
@@ -605,16 +605,16 @@ def invalid_node : SDNode<"<<invalid_node>>", SDTIntLeaf,[],"<<invalid_node>>">;
def Xi8 : X86TypeInfo<i8, "b", GR8, loadi8, i8mem,
- Imm8, i8imm, imm8_su, i8imm, invalid_node,
+ Imm8, i8imm, relocImm8_su, i8imm, invalid_node,
0, OpSizeFixed, 0>;
def Xi16 : X86TypeInfo<i16, "w", GR16, loadi16, i16mem,
- Imm16, i16imm, imm16_su, i16i8imm, i16immSExt8_su,
+ Imm16, i16imm, relocImm16_su, i16i8imm, i16immSExt8_su,
1, OpSize16, 0>;
def Xi32 : X86TypeInfo<i32, "l", GR32, loadi32, i32mem,
- Imm32, i32imm, imm32_su, i32i8imm, i32immSExt8_su,
+ Imm32, i32imm, relocImm32_su, i32i8imm, i32immSExt8_su,
1, OpSize32, 0>;
def Xi64 : X86TypeInfo<i64, "q", GR64, loadi64, i64mem,
- Imm32S, i64i32imm, i64immSExt32_su, i64i8imm, i64immSExt8_su,
+ Imm32S, i64i32imm, i64relocImmSExt32_su, i64i8imm, i64immSExt8_su,
1, OpSizeFixed, 1>;
/// ITy - This instruction base class takes the type info for the instruction.
@@ -924,11 +924,12 @@ class BinOpAI_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
multiclass ArithBinOp_RF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
string mnemonic, Format RegMRM, Format MemMRM,
SDNode opnodeflag, SDNode opnode,
- bit CommutableRR, bit ConvertibleToThreeAddress> {
+ bit CommutableRR, bit ConvertibleToThreeAddress,
+ bit ConvertibleToThreeAddressRR> {
let Defs = [EFLAGS] in {
let Constraints = "$src1 = $dst" in {
let isCommutable = CommutableRR in {
- let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+ let isConvertibleToThreeAddress = ConvertibleToThreeAddressRR in {
def NAME#8rr : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , opnodeflag>;
def NAME#16rr : BinOpRR_RF<BaseOpc, mnemonic, Xi16, opnodeflag>;
def NAME#32rr : BinOpRR_RF<BaseOpc, mnemonic, Xi32, opnodeflag>;
@@ -1169,16 +1170,16 @@ multiclass ArithBinOp_F<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
defm AND : ArithBinOp_RF<0x20, 0x22, 0x24, "and", MRM4r, MRM4m,
- X86and_flag, and, 1, 0>;
+ X86and_flag, and, 1, 0, 0>;
defm OR : ArithBinOp_RF<0x08, 0x0A, 0x0C, "or", MRM1r, MRM1m,
- X86or_flag, or, 1, 0>;
+ X86or_flag, or, 1, 0, 0>;
defm XOR : ArithBinOp_RF<0x30, 0x32, 0x34, "xor", MRM6r, MRM6m,
- X86xor_flag, xor, 1, 0>;
+ X86xor_flag, xor, 1, 0, 0>;
defm ADD : ArithBinOp_RF<0x00, 0x02, 0x04, "add", MRM0r, MRM0m,
- X86add_flag, add, 1, 1>;
+ X86add_flag, add, 1, 1, 1>;
let isCompare = 1 in {
defm SUB : ArithBinOp_RF<0x28, 0x2A, 0x2C, "sub", MRM5r, MRM5m,
- X86sub_flag, sub, 0, 0>;
+ X86sub_flag, sub, 0, 1, 0>;
}
// Arithmetic.
diff --git a/lib/Target/X86/X86InstrBuilder.h b/lib/Target/X86/X86InstrBuilder.h
index dcce7b9951f2..50aed98112c3 100644
--- a/lib/Target/X86/X86InstrBuilder.h
+++ b/lib/Target/X86/X86InstrBuilder.h
@@ -1,9 +1,8 @@
//===-- X86InstrBuilder.h - Functions to aid building x86 insts -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/X86/X86InstrCMovSetCC.td b/lib/Target/X86/X86InstrCMovSetCC.td
index f5494fc0b13f..099f6aa8d8bb 100644
--- a/lib/Target/X86/X86InstrCMovSetCC.td
+++ b/lib/Target/X86/X86InstrCMovSetCC.td
@@ -1,9 +1,8 @@
//===-- X86InstrCMovSetCC.td - Conditional Move and SetCC --*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -14,99 +13,94 @@
// CMOV instructions.
-multiclass CMOV<bits<8> opc, string Mnemonic, X86FoldableSchedWrite Sched,
- PatLeaf CondNode> {
- let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst",
- isCommutable = 1, SchedRW = [Sched] in {
- def NAME#16rr
- : I<opc, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
- !strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"),
- [(set GR16:$dst,
- (X86cmov GR16:$src1, GR16:$src2, CondNode, EFLAGS))]>,
- TB, OpSize16;
- def NAME#32rr
- : I<opc, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- !strconcat(Mnemonic, "{l}\t{$src2, $dst|$dst, $src2}"),
- [(set GR32:$dst,
- (X86cmov GR32:$src1, GR32:$src2, CondNode, EFLAGS))]>,
- TB, OpSize32;
- def NAME#64rr
- :RI<opc, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
- !strconcat(Mnemonic, "{q}\t{$src2, $dst|$dst, $src2}"),
- [(set GR64:$dst,
- (X86cmov GR64:$src1, GR64:$src2, CondNode, EFLAGS))]>, TB;
- }
-
- let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst",
- SchedRW = [Sched.Folded, Sched.ReadAfterFold] in {
- def NAME#16rm
- : I<opc, MRMSrcMem, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
- !strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"),
- [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
- CondNode, EFLAGS))]>, TB, OpSize16;
- def NAME#32rm
- : I<opc, MRMSrcMem, (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
- !strconcat(Mnemonic, "{l}\t{$src2, $dst|$dst, $src2}"),
- [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
- CondNode, EFLAGS))]>, TB, OpSize32;
- def NAME#64rm
- :RI<opc, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
- !strconcat(Mnemonic, "{q}\t{$src2, $dst|$dst, $src2}"),
- [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
- CondNode, EFLAGS))]>, TB;
- } // Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst"
-} // end multiclass
+let isCodeGenOnly = 1, ForceDisassemble = 1 in {
+let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst",
+ isCommutable = 1, SchedRW = [WriteCMOV] in {
+ def CMOV16rr
+ : I<0x40, MRMSrcRegCC, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2, ccode:$cond),
+ "cmov${cond}{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst,
+ (X86cmov GR16:$src1, GR16:$src2, imm:$cond, EFLAGS))]>,
+ TB, OpSize16;
+ def CMOV32rr
+ : I<0x40, MRMSrcRegCC, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2, ccode:$cond),
+ "cmov${cond}{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst,
+ (X86cmov GR32:$src1, GR32:$src2, imm:$cond, EFLAGS))]>,
+ TB, OpSize32;
+ def CMOV64rr
+ :RI<0x40, MRMSrcRegCC, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2, ccode:$cond),
+ "cmov${cond}{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst,
+ (X86cmov GR64:$src1, GR64:$src2, imm:$cond, EFLAGS))]>, TB;
+}
+let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst",
+ SchedRW = [WriteCMOV.Folded, WriteCMOV.ReadAfterFold] in {
+ def CMOV16rm
+ : I<0x40, MRMSrcMemCC, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2, ccode:$cond),
+ "cmov${cond}{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
+ imm:$cond, EFLAGS))]>, TB, OpSize16;
+ def CMOV32rm
+ : I<0x40, MRMSrcMemCC, (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2, ccode:$cond),
+ "cmov${cond}{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
+ imm:$cond, EFLAGS))]>, TB, OpSize32;
+ def CMOV64rm
+ :RI<0x40, MRMSrcMemCC, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2, ccode:$cond),
+ "cmov${cond}{q}\t{$src2, $dst|$dst, $src2}",
+ [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+ imm:$cond, EFLAGS))]>, TB;
+} // Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst"
+} // isCodeGenOnly = 1, ForceDisassemble = 1
-// Conditional Moves.
-defm CMOVO : CMOV<0x40, "cmovo" , WriteCMOV, X86_COND_O>;
-defm CMOVNO : CMOV<0x41, "cmovno", WriteCMOV, X86_COND_NO>;
-defm CMOVB : CMOV<0x42, "cmovb" , WriteCMOV, X86_COND_B>;
-defm CMOVAE : CMOV<0x43, "cmovae", WriteCMOV, X86_COND_AE>;
-defm CMOVE : CMOV<0x44, "cmove" , WriteCMOV, X86_COND_E>;
-defm CMOVNE : CMOV<0x45, "cmovne", WriteCMOV, X86_COND_NE>;
-defm CMOVBE : CMOV<0x46, "cmovbe", WriteCMOV2, X86_COND_BE>;
-defm CMOVA : CMOV<0x47, "cmova" , WriteCMOV2, X86_COND_A>;
-defm CMOVS : CMOV<0x48, "cmovs" , WriteCMOV, X86_COND_S>;
-defm CMOVNS : CMOV<0x49, "cmovns", WriteCMOV, X86_COND_NS>;
-defm CMOVP : CMOV<0x4A, "cmovp" , WriteCMOV, X86_COND_P>;
-defm CMOVNP : CMOV<0x4B, "cmovnp", WriteCMOV, X86_COND_NP>;
-defm CMOVL : CMOV<0x4C, "cmovl" , WriteCMOV, X86_COND_L>;
-defm CMOVGE : CMOV<0x4D, "cmovge", WriteCMOV, X86_COND_GE>;
-defm CMOVLE : CMOV<0x4E, "cmovle", WriteCMOV, X86_COND_LE>;
-defm CMOVG : CMOV<0x4F, "cmovg" , WriteCMOV, X86_COND_G>;
+// SetCC instructions.
+let Uses = [EFLAGS], isCodeGenOnly = 1, ForceDisassemble = 1 in {
+ def SETCCr : I<0x90, MRMXrCC, (outs GR8:$dst), (ins ccode:$cond),
+ "set${cond}\t$dst",
+ [(set GR8:$dst, (X86setcc imm:$cond, EFLAGS))]>,
+ TB, Sched<[WriteSETCC]>;
+ def SETCCm : I<0x90, MRMXmCC, (outs), (ins i8mem:$dst, ccode:$cond),
+ "set${cond}\t$dst",
+ [(store (X86setcc imm:$cond, EFLAGS), addr:$dst)]>,
+ TB, Sched<[WriteSETCCStore]>;
+} // Uses = [EFLAGS]
+multiclass CMOV_SETCC_Aliases<string Cond, int CC> {
+ def : InstAlias<"cmov"#Cond#"{w}\t{$src, $dst|$dst, $src}",
+ (CMOV16rr GR16:$dst, GR16:$src, CC), 0>;
+ def : InstAlias<"cmov"#Cond#"{w}\t{$src, $dst|$dst, $src}",
+ (CMOV16rm GR16:$dst, i16mem:$src, CC), 0>;
+ def : InstAlias<"cmov"#Cond#"{l}\t{$src, $dst|$dst, $src}",
+ (CMOV32rr GR32:$dst, GR32:$src, CC), 0>;
+ def : InstAlias<"cmov"#Cond#"{l}\t{$src, $dst|$dst, $src}",
+ (CMOV32rm GR32:$dst, i32mem:$src, CC), 0>;
+ def : InstAlias<"cmov"#Cond#"{q}\t{$src, $dst|$dst, $src}",
+ (CMOV64rr GR64:$dst, GR64:$src, CC), 0>;
+ def : InstAlias<"cmov"#Cond#"{q}\t{$src, $dst|$dst, $src}",
+ (CMOV64rm GR64:$dst, i64mem:$src, CC), 0>;
-// SetCC instructions.
-multiclass SETCC<bits<8> opc, string Mnemonic, PatLeaf OpNode> {
- let Uses = [EFLAGS] in {
- def r : I<opc, MRMXr, (outs GR8:$dst), (ins),
- !strconcat(Mnemonic, "\t$dst"),
- [(set GR8:$dst, (X86setcc OpNode, EFLAGS))]>,
- TB, Sched<[WriteSETCC]>;
- def m : I<opc, MRMXm, (outs), (ins i8mem:$dst),
- !strconcat(Mnemonic, "\t$dst"),
- [(store (X86setcc OpNode, EFLAGS), addr:$dst)]>,
- TB, Sched<[WriteSETCCStore]>;
- } // Uses = [EFLAGS]
+ def : InstAlias<"set"#Cond#"\t$dst", (SETCCr GR8:$dst, CC), 0>;
+ def : InstAlias<"set"#Cond#"\t$dst", (SETCCm i8mem:$dst, CC), 0>;
}
-defm SETO : SETCC<0x90, "seto", X86_COND_O>; // is overflow bit set
-defm SETNO : SETCC<0x91, "setno", X86_COND_NO>; // is overflow bit not set
-defm SETB : SETCC<0x92, "setb", X86_COND_B>; // unsigned less than
-defm SETAE : SETCC<0x93, "setae", X86_COND_AE>; // unsigned greater or equal
-defm SETE : SETCC<0x94, "sete", X86_COND_E>; // equal to
-defm SETNE : SETCC<0x95, "setne", X86_COND_NE>; // not equal to
-defm SETBE : SETCC<0x96, "setbe", X86_COND_BE>; // unsigned less than or equal
-defm SETA : SETCC<0x97, "seta", X86_COND_A>; // unsigned greater than
-defm SETS : SETCC<0x98, "sets", X86_COND_S>; // is signed bit set
-defm SETNS : SETCC<0x99, "setns", X86_COND_NS>; // is not signed
-defm SETP : SETCC<0x9A, "setp", X86_COND_P>; // is parity bit set
-defm SETNP : SETCC<0x9B, "setnp", X86_COND_NP>; // is parity bit not set
-defm SETL : SETCC<0x9C, "setl", X86_COND_L>; // signed less than
-defm SETGE : SETCC<0x9D, "setge", X86_COND_GE>; // signed greater or equal
-defm SETLE : SETCC<0x9E, "setle", X86_COND_LE>; // signed less than or equal
-defm SETG : SETCC<0x9F, "setg", X86_COND_G>; // signed greater than
+defm : CMOV_SETCC_Aliases<"o" , 0>;
+defm : CMOV_SETCC_Aliases<"no", 1>;
+defm : CMOV_SETCC_Aliases<"b" , 2>;
+defm : CMOV_SETCC_Aliases<"ae", 3>;
+defm : CMOV_SETCC_Aliases<"e" , 4>;
+defm : CMOV_SETCC_Aliases<"ne", 5>;
+defm : CMOV_SETCC_Aliases<"be", 6>;
+defm : CMOV_SETCC_Aliases<"a" , 7>;
+defm : CMOV_SETCC_Aliases<"s" , 8>;
+defm : CMOV_SETCC_Aliases<"ns", 9>;
+defm : CMOV_SETCC_Aliases<"p" , 10>;
+defm : CMOV_SETCC_Aliases<"np", 11>;
+defm : CMOV_SETCC_Aliases<"l" , 12>;
+defm : CMOV_SETCC_Aliases<"ge", 13>;
+defm : CMOV_SETCC_Aliases<"le", 14>;
+defm : CMOV_SETCC_Aliases<"g" , 15>;
// SALC is an undocumented instruction. Information for this instruction can be found
// here http://www.rcollins.org/secrets/opcodes/SALC.html
diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td
index 394dca8e7817..efaccdc9ee96 100644
--- a/lib/Target/X86/X86InstrCompiler.td
+++ b/lib/Target/X86/X86InstrCompiler.td
@@ -1,9 +1,8 @@
//===- X86InstrCompiler.td - Compiler Pseudos and Patterns -*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -20,11 +19,6 @@ def GetLo32XForm : SDNodeXForm<imm, [{
return getI32Imm((uint32_t)N->getZExtValue(), SDLoc(N));
}]>;
-def GetLo8XForm : SDNodeXForm<imm, [{
- // Transformation function: get the low 8 bits.
- return getI8Imm((uint8_t)N->getZExtValue(), SDLoc(N));
-}]>;
-
//===----------------------------------------------------------------------===//
// Random Pseudo Instructions.
@@ -360,7 +354,7 @@ def : Pat<(i64 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
// this happens, it is great. However, if we are left with an 8-bit sbb and an
// and, we might as well just match it as a setb.
def : Pat<(and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1),
- (SETBr)>;
+ (SETCCr (i8 2))>;
// Patterns to give priority when both inputs are zero so that we don't use
// an immediate for the RHS.
@@ -574,8 +568,14 @@ let usesCustomInserter = 1, hasNoSchedulingInfo = 1, Uses = [EFLAGS] in {
defm _RFP80 : CMOVrr_PSEUDO<RFP80, f80>;
- defm _FR32 : CMOVrr_PSEUDO<FR32, f32>;
- defm _FR64 : CMOVrr_PSEUDO<FR64, f64>;
+ let Predicates = [NoAVX512] in {
+ defm _FR32 : CMOVrr_PSEUDO<FR32, f32>;
+ defm _FR64 : CMOVrr_PSEUDO<FR64, f64>;
+ }
+ let Predicates = [HasAVX512] in {
+ defm _FR32X : CMOVrr_PSEUDO<FR32X, f32>;
+ defm _FR64X : CMOVrr_PSEUDO<FR64X, f64>;
+ }
let Predicates = [NoVLX] in {
defm _VR128 : CMOVrr_PSEUDO<VR128, v2i64>;
defm _VR256 : CMOVrr_PSEUDO<VR256, v4i64>;
@@ -712,6 +712,32 @@ def NAME#64mr : RI<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
"{$src2, $dst|$dst, $src2}"),
[(set EFLAGS, (Op addr:$dst, GR64:$src2))]>, LOCK;
+// NOTE: These are order specific, we want the mi8 forms to be listed
+// first so that they are slightly preferred to the mi forms.
+def NAME#16mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
+ ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
+ ImmMod, (outs), (ins i16mem :$dst, i16i8imm :$src2),
+ !strconcat(mnemonic, "{w}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ [(set EFLAGS, (Op addr:$dst, i16immSExt8:$src2))]>,
+ OpSize16, LOCK;
+
+def NAME#32mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
+ ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
+ ImmMod, (outs), (ins i32mem :$dst, i32i8imm :$src2),
+ !strconcat(mnemonic, "{l}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ [(set EFLAGS, (Op addr:$dst, i32immSExt8:$src2))]>,
+ OpSize32, LOCK;
+
+def NAME#64mi8 : RIi8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
+ ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
+ ImmMod, (outs), (ins i64mem :$dst, i64i8imm :$src2),
+ !strconcat(mnemonic, "{q}\t",
+ "{$src2, $dst|$dst, $src2}"),
+ [(set EFLAGS, (Op addr:$dst, i64immSExt8:$src2))]>,
+ LOCK;
+
def NAME#8mi : Ii8<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 0 },
ImmMod, (outs), (ins i8mem :$dst, i8imm :$src2),
@@ -742,30 +768,6 @@ def NAME#64mi32 : RIi32S<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
"{$src2, $dst|$dst, $src2}"),
[(set EFLAGS, (Op addr:$dst, i64immSExt32:$src2))]>,
LOCK;
-
-def NAME#16mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
- ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
- ImmMod, (outs), (ins i16mem :$dst, i16i8imm :$src2),
- !strconcat(mnemonic, "{w}\t",
- "{$src2, $dst|$dst, $src2}"),
- [(set EFLAGS, (Op addr:$dst, i16immSExt8:$src2))]>,
- OpSize16, LOCK;
-
-def NAME#32mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
- ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
- ImmMod, (outs), (ins i32mem :$dst, i32i8imm :$src2),
- !strconcat(mnemonic, "{l}\t",
- "{$src2, $dst|$dst, $src2}"),
- [(set EFLAGS, (Op addr:$dst, i32immSExt8:$src2))]>,
- OpSize32, LOCK;
-
-def NAME#64mi8 : RIi8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
- ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
- ImmMod, (outs), (ins i64mem :$dst, i64i8imm :$src2),
- !strconcat(mnemonic, "{q}\t",
- "{$src2, $dst|$dst, $src2}"),
- [(set EFLAGS, (Op addr:$dst, i64immSExt8:$src2))]>,
- LOCK;
}
}
@@ -868,7 +870,7 @@ let isCodeGenOnly = 1, SchedRW = [WriteCMPXCHGRMW] in {
}
let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX],
- SchedRW = [WriteCMPXCHGRMW] in {
+ Predicates = [HasCmpxchg8b], SchedRW = [WriteCMPXCHGRMW] in {
defm LCMPXCHG8B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg8b", X86cas8, i64mem>;
}
@@ -892,8 +894,9 @@ defm LCMPXCHG8B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg8b", X86cas8, i64mem>;
// the instruction and we are sure we will have a valid register to restore
// the value of RBX.
let Defs = [EAX, EDX, EBX, EFLAGS], Uses = [EAX, ECX, EDX],
- SchedRW = [WriteCMPXCHGRMW], isCodeGenOnly = 1, isPseudo = 1,
- Constraints = "$ebx_save = $dst", usesCustomInserter = 1 in {
+ Predicates = [HasCmpxchg8b], SchedRW = [WriteCMPXCHGRMW],
+ isCodeGenOnly = 1, isPseudo = 1, Constraints = "$ebx_save = $dst",
+ usesCustomInserter = 1 in {
def LCMPXCHG8B_SAVE_EBX :
I<0, Pseudo, (outs GR32:$dst),
(ins i64mem:$ptr, GR32:$ebx_input, GR32:$ebx_save),
@@ -904,14 +907,14 @@ def LCMPXCHG8B_SAVE_EBX :
let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX],
- Predicates = [HasCmpxchg16b], SchedRW = [WriteCMPXCHGRMW] in {
+ Predicates = [HasCmpxchg16b,In64BitMode], SchedRW = [WriteCMPXCHGRMW] in {
defm LCMPXCHG16B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg16b",
X86cas16, i128mem>, REX_W;
}
// Same as LCMPXCHG8B_SAVE_RBX but for the 16 Bytes variant.
let Defs = [RAX, RDX, RBX, EFLAGS], Uses = [RAX, RCX, RDX],
- Predicates = [HasCmpxchg16b], SchedRW = [WriteCMPXCHGRMW],
+ Predicates = [HasCmpxchg16b,In64BitMode], SchedRW = [WriteCMPXCHGRMW],
isCodeGenOnly = 1, isPseudo = 1, Constraints = "$rbx_save = $dst",
usesCustomInserter = 1 in {
def LCMPXCHG16B_SAVE_RBX :
@@ -1001,28 +1004,31 @@ defm : RELEASE_BINOP_MI<"OR", or>;
defm : RELEASE_BINOP_MI<"XOR", xor>;
defm : RELEASE_BINOP_MI<"SUB", sub>;
-// Same as above, but for floating-point.
-// FIXME: imm version.
-// FIXME: Version that doesn't clobber $src, using AVX's VADDSS.
+// Atomic load + floating point patterns.
// FIXME: This could also handle SIMD operations with *ps and *pd instructions.
-let usesCustomInserter = 1, SchedRW = [WriteMicrocoded] in {
-multiclass RELEASE_FP_BINOP_MI<SDNode op> {
- def NAME#32mr : I<0, Pseudo, (outs), (ins i32mem:$dst, FR32:$src),
- "#BINOP "#NAME#"32mr PSEUDO!",
- [(atomic_store_32 addr:$dst,
- (i32 (bitconvert (op
- (f32 (bitconvert (i32 (atomic_load_32 addr:$dst)))),
- FR32:$src))))]>, Requires<[HasSSE1]>;
- def NAME#64mr : I<0, Pseudo, (outs), (ins i64mem:$dst, FR64:$src),
- "#BINOP "#NAME#"64mr PSEUDO!",
- [(atomic_store_64 addr:$dst,
- (i64 (bitconvert (op
- (f64 (bitconvert (i64 (atomic_load_64 addr:$dst)))),
- FR64:$src))))]>, Requires<[HasSSE2]>;
+multiclass ATOMIC_LOAD_FP_BINOP_MI<string Name, SDNode op> {
+ def : Pat<(op FR32:$src1, (bitconvert (i32 (atomic_load_32 addr:$src2)))),
+ (!cast<Instruction>(Name#"SSrm") FR32:$src1, addr:$src2)>,
+ Requires<[UseSSE1]>;
+ def : Pat<(op FR32:$src1, (bitconvert (i32 (atomic_load_32 addr:$src2)))),
+ (!cast<Instruction>("V"#Name#"SSrm") FR32:$src1, addr:$src2)>,
+ Requires<[UseAVX]>;
+ def : Pat<(op FR32X:$src1, (bitconvert (i32 (atomic_load_32 addr:$src2)))),
+ (!cast<Instruction>("V"#Name#"SSZrm") FR32X:$src1, addr:$src2)>,
+ Requires<[HasAVX512]>;
+
+ def : Pat<(op FR64:$src1, (bitconvert (i64 (atomic_load_64 addr:$src2)))),
+ (!cast<Instruction>(Name#"SDrm") FR64:$src1, addr:$src2)>,
+ Requires<[UseSSE1]>;
+ def : Pat<(op FR64:$src1, (bitconvert (i64 (atomic_load_64 addr:$src2)))),
+ (!cast<Instruction>("V"#Name#"SDrm") FR64:$src1, addr:$src2)>,
+ Requires<[UseAVX]>;
+ def : Pat<(op FR64X:$src1, (bitconvert (i64 (atomic_load_64 addr:$src2)))),
+ (!cast<Instruction>("V"#Name#"SDZrm") FR64X:$src1, addr:$src2)>,
+ Requires<[HasAVX512]>;
}
-defm RELEASE_FADD : RELEASE_FP_BINOP_MI<fadd>;
+defm : ATOMIC_LOAD_FP_BINOP_MI<"ADD", fadd>;
// FIXME: Add fsub, fmul, fdiv, ...
-}
multiclass RELEASE_UNOP<string Name, dag dag8, dag dag16, dag dag32,
dag dag64> {
@@ -1083,6 +1089,35 @@ def : Pat<(i16 (atomic_load_16 addr:$src)), (MOV16rm addr:$src)>;
def : Pat<(i32 (atomic_load_32 addr:$src)), (MOV32rm addr:$src)>;
def : Pat<(i64 (atomic_load_64 addr:$src)), (MOV64rm addr:$src)>;
+// Floating point loads/stores.
+def : Pat<(atomic_store_32 addr:$dst, (i32 (bitconvert (f32 FR32:$src)))),
+ (MOVSSmr addr:$dst, FR32:$src)>, Requires<[UseSSE1]>;
+def : Pat<(atomic_store_32 addr:$dst, (i32 (bitconvert (f32 FR32:$src)))),
+ (VMOVSSmr addr:$dst, FR32:$src)>, Requires<[UseAVX]>;
+def : Pat<(atomic_store_32 addr:$dst, (i32 (bitconvert (f32 FR32:$src)))),
+ (VMOVSSZmr addr:$dst, FR32:$src)>, Requires<[HasAVX512]>;
+
+def : Pat<(atomic_store_64 addr:$dst, (i64 (bitconvert (f64 FR64:$src)))),
+ (MOVSDmr addr:$dst, FR64:$src)>, Requires<[UseSSE2]>;
+def : Pat<(atomic_store_64 addr:$dst, (i64 (bitconvert (f64 FR64:$src)))),
+ (VMOVSDmr addr:$dst, FR64:$src)>, Requires<[UseAVX]>;
+def : Pat<(atomic_store_64 addr:$dst, (i64 (bitconvert (f64 FR64:$src)))),
+ (VMOVSDmr addr:$dst, FR64:$src)>, Requires<[HasAVX512]>;
+
+def : Pat<(f32 (bitconvert (i32 (atomic_load_32 addr:$src)))),
+ (MOVSSrm_alt addr:$src)>, Requires<[UseSSE1]>;
+def : Pat<(f32 (bitconvert (i32 (atomic_load_32 addr:$src)))),
+ (VMOVSSrm_alt addr:$src)>, Requires<[UseAVX]>;
+def : Pat<(f32 (bitconvert (i32 (atomic_load_32 addr:$src)))),
+ (VMOVSSZrm_alt addr:$src)>, Requires<[HasAVX512]>;
+
+def : Pat<(f64 (bitconvert (i64 (atomic_load_64 addr:$src)))),
+ (MOVSDrm_alt addr:$src)>, Requires<[UseSSE2]>;
+def : Pat<(f64 (bitconvert (i64 (atomic_load_64 addr:$src)))),
+ (VMOVSDrm_alt addr:$src)>, Requires<[UseAVX]>;
+def : Pat<(f64 (bitconvert (i64 (atomic_load_64 addr:$src)))),
+ (VMOVSDZrm_alt addr:$src)>, Requires<[HasAVX512]>;
+
//===----------------------------------------------------------------------===//
// DAG Pattern Matching Rules
//===----------------------------------------------------------------------===//
@@ -1241,37 +1276,23 @@ def : Pat<(X86cmp GR32:$src1, 0),
def : Pat<(X86cmp GR64:$src1, 0),
(TEST64rr GR64:$src1, GR64:$src1)>;
+def inv_cond_XFORM : SDNodeXForm<imm, [{
+ X86::CondCode CC = static_cast<X86::CondCode>(N->getZExtValue());
+ return CurDAG->getTargetConstant(X86::GetOppositeBranchCondition(CC),
+ SDLoc(N), MVT::i8);
+}]>;
+
// Conditional moves with folded loads with operands swapped and conditions
// inverted.
-multiclass CMOVmr<PatLeaf InvertedCond, Instruction Inst16, Instruction Inst32,
- Instruction Inst64> {
- let Predicates = [HasCMov] in {
- def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, InvertedCond, EFLAGS),
- (Inst16 GR16:$src2, addr:$src1)>;
- def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, InvertedCond, EFLAGS),
- (Inst32 GR32:$src2, addr:$src1)>;
- def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, InvertedCond, EFLAGS),
- (Inst64 GR64:$src2, addr:$src1)>;
- }
+let Predicates = [HasCMov] in {
+ def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, imm:$cond, EFLAGS),
+ (CMOV16rm GR16:$src2, addr:$src1, (inv_cond_XFORM imm:$cond))>;
+ def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, imm:$cond, EFLAGS),
+ (CMOV32rm GR32:$src2, addr:$src1, (inv_cond_XFORM imm:$cond))>;
+ def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, imm:$cond, EFLAGS),
+ (CMOV64rm GR64:$src2, addr:$src1, (inv_cond_XFORM imm:$cond))>;
}
-defm : CMOVmr<X86_COND_B , CMOVAE16rm, CMOVAE32rm, CMOVAE64rm>;
-defm : CMOVmr<X86_COND_AE, CMOVB16rm , CMOVB32rm , CMOVB64rm>;
-defm : CMOVmr<X86_COND_E , CMOVNE16rm, CMOVNE32rm, CMOVNE64rm>;
-defm : CMOVmr<X86_COND_NE, CMOVE16rm , CMOVE32rm , CMOVE64rm>;
-defm : CMOVmr<X86_COND_BE, CMOVA16rm , CMOVA32rm , CMOVA64rm>;
-defm : CMOVmr<X86_COND_A , CMOVBE16rm, CMOVBE32rm, CMOVBE64rm>;
-defm : CMOVmr<X86_COND_L , CMOVGE16rm, CMOVGE32rm, CMOVGE64rm>;
-defm : CMOVmr<X86_COND_GE, CMOVL16rm , CMOVL32rm , CMOVL64rm>;
-defm : CMOVmr<X86_COND_LE, CMOVG16rm , CMOVG32rm , CMOVG64rm>;
-defm : CMOVmr<X86_COND_G , CMOVLE16rm, CMOVLE32rm, CMOVLE64rm>;
-defm : CMOVmr<X86_COND_P , CMOVNP16rm, CMOVNP32rm, CMOVNP64rm>;
-defm : CMOVmr<X86_COND_NP, CMOVP16rm , CMOVP32rm , CMOVP64rm>;
-defm : CMOVmr<X86_COND_S , CMOVNS16rm, CMOVNS32rm, CMOVNS64rm>;
-defm : CMOVmr<X86_COND_NS, CMOVS16rm , CMOVS32rm , CMOVS64rm>;
-defm : CMOVmr<X86_COND_O , CMOVNO16rm, CMOVNO32rm, CMOVNO64rm>;
-defm : CMOVmr<X86_COND_NO, CMOVO16rm , CMOVO32rm , CMOVO64rm>;
-
// zextload bool -> zextload byte
// i1 stored in one byte in zero-extended form.
// Upper bits cleanup should be executed before Store.
@@ -1298,14 +1319,16 @@ def : Pat<(extloadi32i16 addr:$src), (MOVZX32rm16 addr:$src)>;
// For other extloads, use subregs, since the high contents of the register are
// defined after an extload.
+// NOTE: The extloadi64i32 pattern needs to be first as it will try to form
+// 32-bit loads for 4 byte aligned i8/i16 loads.
+def : Pat<(extloadi64i32 addr:$src),
+ (SUBREG_TO_REG (i64 0), (MOV32rm addr:$src), sub_32bit)>;
def : Pat<(extloadi64i1 addr:$src),
(SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>;
def : Pat<(extloadi64i8 addr:$src),
(SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>;
def : Pat<(extloadi64i16 addr:$src),
(SUBREG_TO_REG (i64 0), (MOVZX32rm16 addr:$src), sub_32bit)>;
-def : Pat<(extloadi64i32 addr:$src),
- (SUBREG_TO_REG (i64 0), (MOV32rm addr:$src), sub_32bit)>;
// anyext. Define these to do an explicit zero-extend to
// avoid partial-register updates.
@@ -1351,6 +1374,8 @@ def def32 : PatLeaf<(i32 GR32:$src), [{
// we can use a SUBREG_TO_REG.
def : Pat<(i64 (zext def32:$src)),
(SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>;
+def : Pat<(i64 (and (anyext def32:$src), 0x00000000FFFFFFFF)),
+ (SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>;
//===----------------------------------------------------------------------===//
// Pattern match OR as ADD
@@ -1377,9 +1402,12 @@ def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{
// Try this before the selecting to OR.
let SchedRW = [WriteALU] in {
-let isConvertibleToThreeAddress = 1,
+let isConvertibleToThreeAddress = 1, isPseudo = 1,
Constraints = "$src1 = $dst", Defs = [EFLAGS] in {
let isCommutable = 1 in {
+def ADD8rr_DB : I<0, Pseudo, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
+ "", // orb/addb REG, REG
+ [(set GR8:$dst, (or_is_add GR8:$src1, GR8:$src2))]>;
def ADD16rr_DB : I<0, Pseudo, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
"", // orw/addw REG, REG
[(set GR16:$dst, (or_is_add GR16:$src1, GR16:$src2))]>;
@@ -1394,6 +1422,10 @@ def ADD64rr_DB : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
// NOTE: These are order specific, we want the ri8 forms to be listed
// first so that they are slightly preferred to the ri forms.
+def ADD8ri_DB : I<0, Pseudo,
+ (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
+ "", // orb/addb REG, imm8
+ [(set GR8:$dst, (or_is_add GR8:$src1, imm:$src2))]>;
def ADD16ri8_DB : I<0, Pseudo,
(outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
"", // orw/addw REG, imm8
@@ -1483,6 +1515,13 @@ def : Pat<(add GR64:$src1, 128),
def : Pat<(store (add (loadi64 addr:$dst), 128), addr:$dst),
(SUB64mi8 addr:$dst, -128)>;
+def : Pat<(X86add_flag_nocf GR16:$src1, 128),
+ (SUB16ri8 GR16:$src1, -128)>;
+def : Pat<(X86add_flag_nocf GR32:$src1, 128),
+ (SUB32ri8 GR32:$src1, -128)>;
+def : Pat<(X86add_flag_nocf GR64:$src1, 128),
+ (SUB64ri8 GR64:$src1, -128)>;
+
// The same trick applies for 32-bit immediate fields in 64-bit
// instructions.
def : Pat<(add GR64:$src1, 0x0000000080000000),
@@ -1490,6 +1529,9 @@ def : Pat<(add GR64:$src1, 0x0000000080000000),
def : Pat<(store (add (loadi64 addr:$dst), 0x0000000080000000), addr:$dst),
(SUB64mi32 addr:$dst, 0xffffffff80000000)>;
+def : Pat<(X86add_flag_nocf GR64:$src1, 0x0000000080000000),
+ (SUB64ri32 GR64:$src1, 0xffffffff80000000)>;
+
// To avoid needing to materialize an immediate in a register, use a 32-bit and
// with implicit zero-extension instead of a 64-bit and if the immediate has at
// least 32 bits of leading zeros. If in addition the last 32 bits can be
@@ -1504,7 +1546,7 @@ def : Pat<(and GR64:$src, i64immZExt32SExt8:$imm),
(i64 0),
(AND32ri8
(EXTRACT_SUBREG GR64:$src, sub_32bit),
- (i32 (GetLo8XForm imm:$imm))),
+ (i32 (GetLo32XForm imm:$imm))),
sub_32bit)>;
def : Pat<(and GR64:$src, i64immZExt32:$imm),
@@ -1714,40 +1756,43 @@ def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>;
def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>;
def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>;
-// Helper imms to check if a mask doesn't change significant shift/rotate bits.
-def immShift8 : ImmLeaf<i8, [{
- return countTrailingOnes<uint64_t>(Imm) >= 3;
+def shiftMask8 : PatFrag<(ops node:$lhs), (and node:$lhs, imm), [{
+ return isUnneededShiftMask(N, 3);
}]>;
-def immShift16 : ImmLeaf<i8, [{
- return countTrailingOnes<uint64_t>(Imm) >= 4;
+
+def shiftMask16 : PatFrag<(ops node:$lhs), (and node:$lhs, imm), [{
+ return isUnneededShiftMask(N, 4);
}]>;
-def immShift32 : ImmLeaf<i8, [{
- return countTrailingOnes<uint64_t>(Imm) >= 5;
+
+def shiftMask32 : PatFrag<(ops node:$lhs), (and node:$lhs, imm), [{
+ return isUnneededShiftMask(N, 5);
}]>;
-def immShift64 : ImmLeaf<i8, [{
- return countTrailingOnes<uint64_t>(Imm) >= 6;
+
+def shiftMask64 : PatFrag<(ops node:$lhs), (and node:$lhs, imm), [{
+ return isUnneededShiftMask(N, 6);
}]>;
+
// Shift amount is implicitly masked.
multiclass MaskedShiftAmountPats<SDNode frag, string name> {
// (shift x (and y, 31)) ==> (shift x, y)
- def : Pat<(frag GR8:$src1, (and CL, immShift32)),
+ def : Pat<(frag GR8:$src1, (shiftMask32 CL)),
(!cast<Instruction>(name # "8rCL") GR8:$src1)>;
- def : Pat<(frag GR16:$src1, (and CL, immShift32)),
+ def : Pat<(frag GR16:$src1, (shiftMask32 CL)),
(!cast<Instruction>(name # "16rCL") GR16:$src1)>;
- def : Pat<(frag GR32:$src1, (and CL, immShift32)),
+ def : Pat<(frag GR32:$src1, (shiftMask32 CL)),
(!cast<Instruction>(name # "32rCL") GR32:$src1)>;
- def : Pat<(store (frag (loadi8 addr:$dst), (and CL, immShift32)), addr:$dst),
+ def : Pat<(store (frag (loadi8 addr:$dst), (shiftMask32 CL)), addr:$dst),
(!cast<Instruction>(name # "8mCL") addr:$dst)>;
- def : Pat<(store (frag (loadi16 addr:$dst), (and CL, immShift32)), addr:$dst),
+ def : Pat<(store (frag (loadi16 addr:$dst), (shiftMask32 CL)), addr:$dst),
(!cast<Instruction>(name # "16mCL") addr:$dst)>;
- def : Pat<(store (frag (loadi32 addr:$dst), (and CL, immShift32)), addr:$dst),
+ def : Pat<(store (frag (loadi32 addr:$dst), (shiftMask32 CL)), addr:$dst),
(!cast<Instruction>(name # "32mCL") addr:$dst)>;
// (shift x (and y, 63)) ==> (shift x, y)
- def : Pat<(frag GR64:$src1, (and CL, immShift64)),
+ def : Pat<(frag GR64:$src1, (shiftMask64 CL)),
(!cast<Instruction>(name # "64rCL") GR64:$src1)>;
- def : Pat<(store (frag (loadi64 addr:$dst), (and CL, immShift64)), addr:$dst),
+ def : Pat<(store (frag (loadi64 addr:$dst), (shiftMask64 CL)), addr:$dst),
(!cast<Instruction>(name # "64mCL") addr:$dst)>;
}
@@ -1763,23 +1808,23 @@ defm : MaskedShiftAmountPats<sra, "SAR">;
// not tracking flags for these nodes.
multiclass MaskedRotateAmountPats<SDNode frag, string name> {
// (rot x (and y, BitWidth - 1)) ==> (rot x, y)
- def : Pat<(frag GR8:$src1, (and CL, immShift8)),
+ def : Pat<(frag GR8:$src1, (shiftMask8 CL)),
(!cast<Instruction>(name # "8rCL") GR8:$src1)>;
- def : Pat<(frag GR16:$src1, (and CL, immShift16)),
+ def : Pat<(frag GR16:$src1, (shiftMask16 CL)),
(!cast<Instruction>(name # "16rCL") GR16:$src1)>;
- def : Pat<(frag GR32:$src1, (and CL, immShift32)),
+ def : Pat<(frag GR32:$src1, (shiftMask32 CL)),
(!cast<Instruction>(name # "32rCL") GR32:$src1)>;
- def : Pat<(store (frag (loadi8 addr:$dst), (and CL, immShift8)), addr:$dst),
+ def : Pat<(store (frag (loadi8 addr:$dst), (shiftMask8 CL)), addr:$dst),
(!cast<Instruction>(name # "8mCL") addr:$dst)>;
- def : Pat<(store (frag (loadi16 addr:$dst), (and CL, immShift16)), addr:$dst),
+ def : Pat<(store (frag (loadi16 addr:$dst), (shiftMask16 CL)), addr:$dst),
(!cast<Instruction>(name # "16mCL") addr:$dst)>;
- def : Pat<(store (frag (loadi32 addr:$dst), (and CL, immShift32)), addr:$dst),
+ def : Pat<(store (frag (loadi32 addr:$dst), (shiftMask32 CL)), addr:$dst),
(!cast<Instruction>(name # "32mCL") addr:$dst)>;
// (rot x (and y, 63)) ==> (rot x, y)
- def : Pat<(frag GR64:$src1, (and CL, immShift64)),
+ def : Pat<(frag GR64:$src1, (shiftMask64 CL)),
(!cast<Instruction>(name # "64rCL") GR64:$src1)>;
- def : Pat<(store (frag (loadi64 addr:$dst), (and CL, immShift64)), addr:$dst),
+ def : Pat<(store (frag (loadi64 addr:$dst), (shiftMask64 CL)), addr:$dst),
(!cast<Instruction>(name # "64mCL") addr:$dst)>;
}
@@ -1790,13 +1835,13 @@ defm : MaskedRotateAmountPats<rotr, "ROR">;
// Double shift amount is implicitly masked.
multiclass MaskedDoubleShiftAmountPats<SDNode frag, string name> {
// (shift x (and y, 31)) ==> (shift x, y)
- def : Pat<(frag GR16:$src1, GR16:$src2, (and CL, immShift32)),
+ def : Pat<(frag GR16:$src1, GR16:$src2, (shiftMask32 CL)),
(!cast<Instruction>(name # "16rrCL") GR16:$src1, GR16:$src2)>;
- def : Pat<(frag GR32:$src1, GR32:$src2, (and CL, immShift32)),
+ def : Pat<(frag GR32:$src1, GR32:$src2, (shiftMask32 CL)),
(!cast<Instruction>(name # "32rrCL") GR32:$src1, GR32:$src2)>;
// (shift x (and y, 63)) ==> (shift x, y)
- def : Pat<(frag GR64:$src1, GR64:$src2, (and CL, immShift64)),
+ def : Pat<(frag GR64:$src1, GR64:$src2, (shiftMask32 CL)),
(!cast<Instruction>(name # "64rrCL") GR64:$src1, GR64:$src2)>;
}
@@ -1805,57 +1850,57 @@ defm : MaskedDoubleShiftAmountPats<X86shrd, "SHRD">;
let Predicates = [HasBMI2] in {
let AddedComplexity = 1 in {
- def : Pat<(sra GR32:$src1, (and GR8:$src2, immShift32)),
+ def : Pat<(sra GR32:$src1, (shiftMask32 GR8:$src2)),
(SARX32rr GR32:$src1,
(INSERT_SUBREG
(i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
- def : Pat<(sra GR64:$src1, (and GR8:$src2, immShift64)),
+ def : Pat<(sra GR64:$src1, (shiftMask64 GR8:$src2)),
(SARX64rr GR64:$src1,
(INSERT_SUBREG
(i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
- def : Pat<(srl GR32:$src1, (and GR8:$src2, immShift32)),
+ def : Pat<(srl GR32:$src1, (shiftMask32 GR8:$src2)),
(SHRX32rr GR32:$src1,
(INSERT_SUBREG
(i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
- def : Pat<(srl GR64:$src1, (and GR8:$src2, immShift64)),
+ def : Pat<(srl GR64:$src1, (shiftMask64 GR8:$src2)),
(SHRX64rr GR64:$src1,
(INSERT_SUBREG
(i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
- def : Pat<(shl GR32:$src1, (and GR8:$src2, immShift32)),
+ def : Pat<(shl GR32:$src1, (shiftMask32 GR8:$src2)),
(SHLX32rr GR32:$src1,
(INSERT_SUBREG
(i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
- def : Pat<(shl GR64:$src1, (and GR8:$src2, immShift64)),
+ def : Pat<(shl GR64:$src1, (shiftMask64 GR8:$src2)),
(SHLX64rr GR64:$src1,
(INSERT_SUBREG
(i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
}
- def : Pat<(sra (loadi32 addr:$src1), (and GR8:$src2, immShift32)),
+ def : Pat<(sra (loadi32 addr:$src1), (shiftMask32 GR8:$src2)),
(SARX32rm addr:$src1,
(INSERT_SUBREG
(i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
- def : Pat<(sra (loadi64 addr:$src1), (and GR8:$src2, immShift64)),
+ def : Pat<(sra (loadi64 addr:$src1), (shiftMask64 GR8:$src2)),
(SARX64rm addr:$src1,
(INSERT_SUBREG
(i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
- def : Pat<(srl (loadi32 addr:$src1), (and GR8:$src2, immShift32)),
+ def : Pat<(srl (loadi32 addr:$src1), (shiftMask32 GR8:$src2)),
(SHRX32rm addr:$src1,
(INSERT_SUBREG
(i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
- def : Pat<(srl (loadi64 addr:$src1), (and GR8:$src2, immShift64)),
+ def : Pat<(srl (loadi64 addr:$src1), (shiftMask64 GR8:$src2)),
(SHRX64rm addr:$src1,
(INSERT_SUBREG
(i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
- def : Pat<(shl (loadi32 addr:$src1), (and GR8:$src2, immShift32)),
+ def : Pat<(shl (loadi32 addr:$src1), (shiftMask32 GR8:$src2)),
(SHLX32rm addr:$src1,
(INSERT_SUBREG
(i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
- def : Pat<(shl (loadi64 addr:$src1), (and GR8:$src2, immShift64)),
+ def : Pat<(shl (loadi64 addr:$src1), (shiftMask64 GR8:$src2)),
(SHLX64rm addr:$src1,
(INSERT_SUBREG
(i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
@@ -1864,7 +1909,7 @@ let Predicates = [HasBMI2] in {
// Use BTR/BTS/BTC for clearing/setting/toggling a bit in a variable location.
multiclass one_bit_patterns<RegisterClass RC, ValueType VT, Instruction BTR,
Instruction BTS, Instruction BTC,
- ImmLeaf ImmShift> {
+ PatFrag ShiftMask> {
def : Pat<(and RC:$src1, (rotl -2, GR8:$src2)),
(BTR RC:$src1,
(INSERT_SUBREG (VT (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
@@ -1876,20 +1921,20 @@ multiclass one_bit_patterns<RegisterClass RC, ValueType VT, Instruction BTR,
(INSERT_SUBREG (VT (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
// Similar to above, but removing unneeded masking of the shift amount.
- def : Pat<(and RC:$src1, (rotl -2, (and GR8:$src2, ImmShift))),
+ def : Pat<(and RC:$src1, (rotl -2, (ShiftMask GR8:$src2))),
(BTR RC:$src1,
(INSERT_SUBREG (VT (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
- def : Pat<(or RC:$src1, (shl 1, (and GR8:$src2, ImmShift))),
+ def : Pat<(or RC:$src1, (shl 1, (ShiftMask GR8:$src2))),
(BTS RC:$src1,
(INSERT_SUBREG (VT (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
- def : Pat<(xor RC:$src1, (shl 1, (and GR8:$src2, ImmShift))),
+ def : Pat<(xor RC:$src1, (shl 1, (ShiftMask GR8:$src2))),
(BTC RC:$src1,
(INSERT_SUBREG (VT (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
}
-defm : one_bit_patterns<GR16, i16, BTR16rr, BTS16rr, BTC16rr, immShift16>;
-defm : one_bit_patterns<GR32, i32, BTR32rr, BTS32rr, BTC32rr, immShift32>;
-defm : one_bit_patterns<GR64, i64, BTR64rr, BTS64rr, BTC64rr, immShift64>;
+defm : one_bit_patterns<GR16, i16, BTR16rr, BTS16rr, BTC16rr, shiftMask16>;
+defm : one_bit_patterns<GR32, i32, BTR32rr, BTS32rr, BTC32rr, shiftMask32>;
+defm : one_bit_patterns<GR64, i64, BTR64rr, BTS64rr, BTC64rr, shiftMask64>;
// (anyext (setcc_carry)) -> (setcc_carry)
@@ -1974,8 +2019,6 @@ def : Pat<(X86sub_flag 0, GR64:$src), (NEG64r GR64:$src)>;
// sub reg, relocImm
def : Pat<(X86sub_flag GR64:$src1, i64relocImmSExt8_su:$src2),
(SUB64ri8 GR64:$src1, i64relocImmSExt8_su:$src2)>;
-def : Pat<(X86sub_flag GR64:$src1, i64relocImmSExt32_su:$src2),
- (SUB64ri32 GR64:$src1, i64relocImmSExt32_su:$src2)>;
// mul reg, reg
def : Pat<(mul GR16:$src1, GR16:$src2),
diff --git a/lib/Target/X86/X86InstrControl.td b/lib/Target/X86/X86InstrControl.td
index a7c7aaab2285..f82e80965b7c 100644
--- a/lib/Target/X86/X86InstrControl.td
+++ b/lib/Target/X86/X86InstrControl.td
@@ -1,9 +1,8 @@
//===-- X86InstrControl.td - Control Flow Instructions -----*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -71,35 +70,40 @@ let isBarrier = 1, isBranch = 1, isTerminator = 1, SchedRW = [WriteJump] in {
}
// Conditional Branches.
-let isBranch = 1, isTerminator = 1, Uses = [EFLAGS], SchedRW = [WriteJump] in {
- multiclass ICBr<bits<8> opc1, bits<8> opc4, string asm, PatFrag Cond> {
- def _1 : Ii8PCRel <opc1, RawFrm, (outs), (ins brtarget8:$dst), asm,
- [(X86brcond bb:$dst, Cond, EFLAGS)]>;
- let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
- def _2 : Ii16PCRel<opc4, RawFrm, (outs), (ins brtarget16:$dst), asm,
- []>, OpSize16, TB;
- def _4 : Ii32PCRel<opc4, RawFrm, (outs), (ins brtarget32:$dst), asm,
- []>, TB, OpSize32;
- }
+let isBranch = 1, isTerminator = 1, Uses = [EFLAGS], SchedRW = [WriteJump],
+ isCodeGenOnly = 1, ForceDisassemble = 1 in {
+ def JCC_1 : Ii8PCRel <0x70, AddCCFrm, (outs),
+ (ins brtarget8:$dst, ccode:$cond),
+ "j${cond}\t$dst",
+ [(X86brcond bb:$dst, imm:$cond, EFLAGS)]>;
+ let hasSideEffects = 0 in {
+ def JCC_2 : Ii16PCRel<0x80, AddCCFrm, (outs),
+ (ins brtarget16:$dst, ccode:$cond),
+ "j${cond}\t$dst",
+ []>, OpSize16, TB;
+ def JCC_4 : Ii32PCRel<0x80, AddCCFrm, (outs),
+ (ins brtarget32:$dst, ccode:$cond),
+ "j${cond}\t$dst",
+ []>, TB, OpSize32;
}
}
-defm JO : ICBr<0x70, 0x80, "jo\t$dst" , X86_COND_O>;
-defm JNO : ICBr<0x71, 0x81, "jno\t$dst", X86_COND_NO>;
-defm JB : ICBr<0x72, 0x82, "jb\t$dst" , X86_COND_B>;
-defm JAE : ICBr<0x73, 0x83, "jae\t$dst", X86_COND_AE>;
-defm JE : ICBr<0x74, 0x84, "je\t$dst" , X86_COND_E>;
-defm JNE : ICBr<0x75, 0x85, "jne\t$dst", X86_COND_NE>;
-defm JBE : ICBr<0x76, 0x86, "jbe\t$dst", X86_COND_BE>;
-defm JA : ICBr<0x77, 0x87, "ja\t$dst" , X86_COND_A>;
-defm JS : ICBr<0x78, 0x88, "js\t$dst" , X86_COND_S>;
-defm JNS : ICBr<0x79, 0x89, "jns\t$dst", X86_COND_NS>;
-defm JP : ICBr<0x7A, 0x8A, "jp\t$dst" , X86_COND_P>;
-defm JNP : ICBr<0x7B, 0x8B, "jnp\t$dst", X86_COND_NP>;
-defm JL : ICBr<0x7C, 0x8C, "jl\t$dst" , X86_COND_L>;
-defm JGE : ICBr<0x7D, 0x8D, "jge\t$dst", X86_COND_GE>;
-defm JLE : ICBr<0x7E, 0x8E, "jle\t$dst", X86_COND_LE>;
-defm JG : ICBr<0x7F, 0x8F, "jg\t$dst" , X86_COND_G>;
+def : InstAlias<"jo\t$dst", (JCC_1 brtarget8:$dst, 0), 0>;
+def : InstAlias<"jno\t$dst", (JCC_1 brtarget8:$dst, 1), 0>;
+def : InstAlias<"jb\t$dst", (JCC_1 brtarget8:$dst, 2), 0>;
+def : InstAlias<"jae\t$dst", (JCC_1 brtarget8:$dst, 3), 0>;
+def : InstAlias<"je\t$dst", (JCC_1 brtarget8:$dst, 4), 0>;
+def : InstAlias<"jne\t$dst", (JCC_1 brtarget8:$dst, 5), 0>;
+def : InstAlias<"jbe\t$dst", (JCC_1 brtarget8:$dst, 6), 0>;
+def : InstAlias<"ja\t$dst", (JCC_1 brtarget8:$dst, 7), 0>;
+def : InstAlias<"js\t$dst", (JCC_1 brtarget8:$dst, 8), 0>;
+def : InstAlias<"jns\t$dst", (JCC_1 brtarget8:$dst, 9), 0>;
+def : InstAlias<"jp\t$dst", (JCC_1 brtarget8:$dst, 10), 0>;
+def : InstAlias<"jnp\t$dst", (JCC_1 brtarget8:$dst, 11), 0>;
+def : InstAlias<"jl\t$dst", (JCC_1 brtarget8:$dst, 12), 0>;
+def : InstAlias<"jge\t$dst", (JCC_1 brtarget8:$dst, 13), 0>;
+def : InstAlias<"jle\t$dst", (JCC_1 brtarget8:$dst, 14), 0>;
+def : InstAlias<"jg\t$dst", (JCC_1 brtarget8:$dst, 15), 0>;
// jcx/jecx/jrcx instructions.
let isBranch = 1, isTerminator = 1, hasSideEffects = 0, SchedRW = [WriteJump] in {
diff --git a/lib/Target/X86/X86InstrExtension.td b/lib/Target/X86/X86InstrExtension.td
index c24d6d5b8df1..06e605fe5db2 100644
--- a/lib/Target/X86/X86InstrExtension.td
+++ b/lib/Target/X86/X86InstrExtension.td
@@ -1,9 +1,8 @@
//===-- X86InstrExtension.td - Sign and Zero Extensions ----*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -29,11 +28,11 @@ let hasSideEffects = 0 in {
let Defs = [RAX], Uses = [EAX] in // RAX = signext(EAX)
def CDQE : RI<0x98, RawFrm, (outs), (ins),
- "{cltq|cdqe}", []>, Sched<[WriteALU]>;
+ "{cltq|cdqe}", []>, Sched<[WriteALU]>, Requires<[In64BitMode]>;
let Defs = [RAX,RDX], Uses = [RAX] in // RDX:RAX = signext(RAX)
def CQO : RI<0x99, RawFrm, (outs), (ins),
- "{cqto|cqo}", []>, Sched<[WriteALU]>;
+ "{cqto|cqo}", []>, Sched<[WriteALU]>, Requires<[In64BitMode]>;
}
// Sign/Zero extenders
diff --git a/lib/Target/X86/X86InstrFMA.td b/lib/Target/X86/X86InstrFMA.td
index 1a8e529431af..0cca71bdc431 100644
--- a/lib/Target/X86/X86InstrFMA.td
+++ b/lib/Target/X86/X86InstrFMA.td
@@ -1,9 +1,8 @@
//===-- X86InstrFMA.td - FMA Instruction Set ---------------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -237,7 +236,8 @@ multiclass fma3s_rm_132<bits<8> opc, string OpcodeStr,
Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>;
}
-let Constraints = "$src1 = $dst", isCommutable = 1, hasSideEffects = 0 in
+let Constraints = "$src1 = $dst", isCommutable = 1, isCodeGenOnly = 1,
+ hasSideEffects = 0 in
multiclass fma3s_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
string OpStr, string PackTy, string Suff,
SDNode OpNode, RegisterClass RC,
@@ -263,8 +263,7 @@ multiclass fma3s_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
// the lowest element of the FMA*_Int instruction. Even though such analysis
// may be not implemented yet we allow the routines doing the actual commute
// transformation to decide if one or another instruction is commutable or not.
-let Constraints = "$src1 = $dst", isCommutable = 1, isCodeGenOnly = 1,
- hasSideEffects = 0 in
+let Constraints = "$src1 = $dst", isCommutable = 1, hasSideEffects = 0 in
multiclass fma3s_rm_int<bits<8> opc, string OpcodeStr,
Operand memopr, RegisterClass RC,
X86FoldableSchedWrite sched> {
diff --git a/lib/Target/X86/X86InstrFMA3Info.cpp b/lib/Target/X86/X86InstrFMA3Info.cpp
index def732a2dd00..25bbdddb7a21 100644
--- a/lib/Target/X86/X86InstrFMA3Info.cpp
+++ b/lib/Target/X86/X86InstrFMA3Info.cpp
@@ -1,9 +1,8 @@
//===-- X86InstrFMA3Info.cpp - X86 FMA3 Instruction Information -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -57,7 +56,7 @@ using namespace llvm;
#define FMA3GROUP_SCALAR(Name, Attrs) \
FMA3GROUP_SCALAR_WIDTHS(Name, SD, Attrs) \
- FMA3GROUP_SCALAR_WIDTHS(Name, SS, Attrs) \
+ FMA3GROUP_SCALAR_WIDTHS(Name, SS, Attrs)
#define FMA3GROUP_FULL(Name, Attrs) \
FMA3GROUP_PACKED(Name, Attrs) \
@@ -159,11 +158,9 @@ const X86InstrFMA3Group *llvm::getFMA3Group(unsigned Opcode, uint64_t TSFlags) {
// FMA 231 instructions have an opcode of 0xB6-0xBF
unsigned FormIndex = ((BaseOpcode - 0x90) >> 4) & 0x3;
- auto I = std::lower_bound(Table.begin(), Table.end(), Opcode,
- [FormIndex](const X86InstrFMA3Group &Group,
- unsigned Opcode) {
- return Group.Opcodes[FormIndex] < Opcode;
- });
+ auto I = partition_point(Table, [=](const X86InstrFMA3Group &Group) {
+ return Group.Opcodes[FormIndex] < Opcode;
+ });
assert(I != Table.end() && I->Opcodes[FormIndex] == Opcode &&
"Couldn't find FMA3 opcode!");
return I;
diff --git a/lib/Target/X86/X86InstrFMA3Info.h b/lib/Target/X86/X86InstrFMA3Info.h
index 6eec1db98bf8..7fa6f5917862 100644
--- a/lib/Target/X86/X86InstrFMA3Info.h
+++ b/lib/Target/X86/X86InstrFMA3Info.h
@@ -1,9 +1,8 @@
//===- X86InstrFMA3Info.h - X86 FMA3 Instruction Information ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td
index 5912a3199613..2ec6d50f9702 100644
--- a/lib/Target/X86/X86InstrFPStack.td
+++ b/lib/Target/X86/X86InstrFPStack.td
@@ -1,9 +1,8 @@
//===- X86InstrFPStack.td - FPU Instruction Set ------------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -17,18 +16,13 @@
// FPStack specific DAG Nodes.
//===----------------------------------------------------------------------===//
-def SDTX86FpGet2 : SDTypeProfile<2, 0, [SDTCisVT<0, f80>,
- SDTCisVT<1, f80>]>;
-def SDTX86Fld : SDTypeProfile<1, 2, [SDTCisFP<0>,
- SDTCisPtrTy<1>,
- SDTCisVT<2, OtherVT>]>;
-def SDTX86Fst : SDTypeProfile<0, 3, [SDTCisFP<0>,
- SDTCisPtrTy<1>,
- SDTCisVT<2, OtherVT>]>;
-def SDTX86Fild : SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisPtrTy<1>,
- SDTCisVT<2, OtherVT>]>;
+def SDTX86Fld : SDTypeProfile<1, 1, [SDTCisFP<0>,
+ SDTCisPtrTy<1>]>;
+def SDTX86Fst : SDTypeProfile<0, 2, [SDTCisFP<0>,
+ SDTCisPtrTy<1>]>;
+def SDTX86Fild : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisPtrTy<1>]>;
+def SDTX86Fist : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisPtrTy<1>]>;
def SDTX86Fnstsw : SDTypeProfile<1, 1, [SDTCisVT<0, i16>, SDTCisVT<1, i16>]>;
-def SDTX86FpToIMem : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisPtrTy<1>]>;
def SDTX86CwdStore : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
@@ -42,17 +36,71 @@ def X86fild : SDNode<"X86ISD::FILD", SDTX86Fild,
def X86fildflag : SDNode<"X86ISD::FILD_FLAG", SDTX86Fild,
[SDNPHasChain, SDNPOutGlue, SDNPMayLoad,
SDNPMemOperand]>;
+def X86fist : SDNode<"X86ISD::FIST", SDTX86Fist,
+ [SDNPHasChain, SDNPInGlue, SDNPMayStore,
+ SDNPMemOperand]>;
def X86fp_stsw : SDNode<"X86ISD::FNSTSW16r", SDTX86Fnstsw>;
-def X86fp_to_i16mem : SDNode<"X86ISD::FP_TO_INT16_IN_MEM", SDTX86FpToIMem,
- [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
-def X86fp_to_i32mem : SDNode<"X86ISD::FP_TO_INT32_IN_MEM", SDTX86FpToIMem,
- [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
-def X86fp_to_i64mem : SDNode<"X86ISD::FP_TO_INT64_IN_MEM", SDTX86FpToIMem,
- [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def X86fp_to_mem : SDNode<"X86ISD::FP_TO_INT_IN_MEM", SDTX86Fst,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def X86fp_cwd_get16 : SDNode<"X86ISD::FNSTCW16m", SDTX86CwdStore,
[SDNPHasChain, SDNPMayStore, SDNPSideEffect,
SDNPMemOperand]>;
+def X86fstf32 : PatFrag<(ops node:$val, node:$ptr),
+ (X86fst node:$val, node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::f32;
+}]>;
+def X86fstf64 : PatFrag<(ops node:$val, node:$ptr),
+ (X86fst node:$val, node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::f64;
+}]>;
+def X86fstf80 : PatFrag<(ops node:$val, node:$ptr),
+ (X86fst node:$val, node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::f80;
+}]>;
+
+def X86fldf32 : PatFrag<(ops node:$ptr), (X86fld node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::f32;
+}]>;
+def X86fldf64 : PatFrag<(ops node:$ptr), (X86fld node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::f64;
+}]>;
+def X86fldf80 : PatFrag<(ops node:$ptr), (X86fld node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::f80;
+}]>;
+
+def X86fild16 : PatFrag<(ops node:$ptr), (X86fild node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
+}]>;
+def X86fild32 : PatFrag<(ops node:$ptr), (X86fild node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
+}]>;
+def X86fild64 : PatFrag<(ops node:$ptr), (X86fild node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
+}]>;
+
+def X86fildflag64 : PatFrag<(ops node:$ptr), (X86fildflag node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
+}]>;
+
+def X86fist64 : PatFrag<(ops node:$val, node:$ptr),
+ (X86fist node:$val, node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
+}]>;
+
+def X86fp_to_i16mem : PatFrag<(ops node:$val, node:$ptr),
+ (X86fp_to_mem node:$val, node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
+}]>;
+def X86fp_to_i32mem : PatFrag<(ops node:$val, node:$ptr),
+ (X86fp_to_mem node:$val, node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
+}]>;
+def X86fp_to_i64mem : PatFrag<(ops node:$val, node:$ptr),
+ (X86fp_to_mem node:$val, node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
+}]>;
+
//===----------------------------------------------------------------------===//
// FPStack pattern fragments
//===----------------------------------------------------------------------===//
@@ -74,7 +122,9 @@ def fpimmneg1 : FPImmLeaf<fAny, [{
}]>;
// Some 'special' instructions - expanded after instruction selection.
-let usesCustomInserter = 1, hasNoSchedulingInfo = 1 in {
+// Clobbers EFLAGS due to OR instruction used internally.
+// FIXME: Can we model this in SelectionDAG?
+let usesCustomInserter = 1, hasNoSchedulingInfo = 1, Defs = [EFLAGS] in {
def FP32_TO_INT16_IN_MEM : PseudoI<(outs), (ins i16mem:$dst, RFP32:$src),
[(X86fp_to_i16mem RFP32:$src, addr:$dst)]>;
def FP32_TO_INT32_IN_MEM : PseudoI<(outs), (ins i32mem:$dst, RFP32:$src),
@@ -139,7 +189,6 @@ def _Fp80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, RFP80:$src2), TwoArgFP,
// These instructions cannot address 80-bit memory.
multiclass FPBinary<SDNode OpNode, Format fp, string asmstring,
bit Forward = 1> {
-let mayLoad = 1, hasSideEffects = 1 in {
// ST(0) = ST(0) + [mem]
def _Fp32m : FpIf32<(outs RFP32:$dst),
(ins RFP32:$src1, f32mem:$src2), OneArgFPRW,
@@ -176,8 +225,10 @@ def _Fp80m64: FpI_<(outs RFP80:$dst),
(OpNode RFP80:$src1, (f80 (extloadf64 addr:$src2)))),
(set RFP80:$dst,
(OpNode (f80 (extloadf64 addr:$src2)), RFP80:$src1)))]>;
+let mayLoad = 1 in
def _F32m : FPI<0xD8, fp, (outs), (ins f32mem:$src),
!strconcat("f", asmstring, "{s}\t$src")>;
+let mayLoad = 1 in
def _F64m : FPI<0xDC, fp, (outs), (ins f64mem:$src),
!strconcat("f", asmstring, "{l}\t$src")>;
// ST(0) = ST(0) + [memint]
@@ -185,52 +236,53 @@ def _FpI16m32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, i16mem:$src2),
OneArgFPRW,
[!if(Forward,
(set RFP32:$dst,
- (OpNode RFP32:$src1, (X86fild addr:$src2, i16))),
+ (OpNode RFP32:$src1, (X86fild16 addr:$src2))),
(set RFP32:$dst,
- (OpNode (X86fild addr:$src2, i16), RFP32:$src1)))]>;
+ (OpNode (X86fild16 addr:$src2), RFP32:$src1)))]>;
def _FpI32m32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, i32mem:$src2),
OneArgFPRW,
[!if(Forward,
(set RFP32:$dst,
- (OpNode RFP32:$src1, (X86fild addr:$src2, i32))),
+ (OpNode RFP32:$src1, (X86fild32 addr:$src2))),
(set RFP32:$dst,
- (OpNode (X86fild addr:$src2, i32), RFP32:$src1)))]>;
+ (OpNode (X86fild32 addr:$src2), RFP32:$src1)))]>;
def _FpI16m64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, i16mem:$src2),
OneArgFPRW,
[!if(Forward,
(set RFP64:$dst,
- (OpNode RFP64:$src1, (X86fild addr:$src2, i16))),
+ (OpNode RFP64:$src1, (X86fild16 addr:$src2))),
(set RFP64:$dst,
- (OpNode (X86fild addr:$src2, i16), RFP64:$src1)))]>;
+ (OpNode (X86fild16 addr:$src2), RFP64:$src1)))]>;
def _FpI32m64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, i32mem:$src2),
OneArgFPRW,
[!if(Forward,
(set RFP64:$dst,
- (OpNode RFP64:$src1, (X86fild addr:$src2, i32))),
+ (OpNode RFP64:$src1, (X86fild32 addr:$src2))),
(set RFP64:$dst,
- (OpNode (X86fild addr:$src2, i32), RFP64:$src1)))]>;
+ (OpNode (X86fild32 addr:$src2), RFP64:$src1)))]>;
def _FpI16m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i16mem:$src2),
OneArgFPRW,
[!if(Forward,
(set RFP80:$dst,
- (OpNode RFP80:$src1, (X86fild addr:$src2, i16))),
+ (OpNode RFP80:$src1, (X86fild16 addr:$src2))),
(set RFP80:$dst,
- (OpNode (X86fild addr:$src2, i16), RFP80:$src1)))]>;
+ (OpNode (X86fild16 addr:$src2), RFP80:$src1)))]>;
def _FpI32m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i32mem:$src2),
OneArgFPRW,
[!if(Forward,
(set RFP80:$dst,
- (OpNode RFP80:$src1, (X86fild addr:$src2, i32))),
+ (OpNode RFP80:$src1, (X86fild32 addr:$src2))),
(set RFP80:$dst,
- (OpNode (X86fild addr:$src2, i32), RFP80:$src1)))]>;
+ (OpNode (X86fild32 addr:$src2), RFP80:$src1)))]>;
+let mayLoad = 1 in
def _FI16m : FPI<0xDE, fp, (outs), (ins i16mem:$src),
!strconcat("fi", asmstring, "{s}\t$src")>;
+let mayLoad = 1 in
def _FI32m : FPI<0xDA, fp, (outs), (ins i32mem:$src),
!strconcat("fi", asmstring, "{l}\t$src")>;
-} // mayLoad = 1, hasSideEffects = 1
}
-let Defs = [FPSW] in {
+let Defs = [FPSW], Uses = [FPCW] in {
// FPBinary_rr just defines pseudo-instructions, no need to set a scheduling
// resources.
let hasNoSchedulingInfo = 1 in {
@@ -258,42 +310,42 @@ defm DIVR: FPBinary<fdiv, MRM7m, "divr", 0>;
} // Defs = [FPSW]
class FPST0rInst<Format fp, string asm>
- : FPI<0xD8, fp, (outs), (ins RST:$op), asm>;
+ : FPI<0xD8, fp, (outs), (ins RSTi:$op), asm>;
class FPrST0Inst<Format fp, string asm>
- : FPI<0xDC, fp, (outs), (ins RST:$op), asm>;
+ : FPI<0xDC, fp, (outs), (ins RSTi:$op), asm>;
class FPrST0PInst<Format fp, string asm>
- : FPI<0xDE, fp, (outs), (ins RST:$op), asm>;
+ : FPI<0xDE, fp, (outs), (ins RSTi:$op), asm>;
// NOTE: GAS and apparently all other AT&T style assemblers have a broken notion
// of some of the 'reverse' forms of the fsub and fdiv instructions. As such,
// we have to put some 'r's in and take them out of weird places.
-let SchedRW = [WriteFAdd] in {
-def ADD_FST0r : FPST0rInst <MRM0r, "fadd\t$op">;
-def ADD_FrST0 : FPrST0Inst <MRM0r, "fadd\t{%st(0), $op|$op, st(0)}">;
-def ADD_FPrST0 : FPrST0PInst<MRM0r, "faddp\t$op">;
-def SUBR_FST0r : FPST0rInst <MRM5r, "fsubr\t$op">;
-def SUB_FrST0 : FPrST0Inst <MRM5r, "fsub{r}\t{%st(0), $op|$op, st(0)}">;
-def SUB_FPrST0 : FPrST0PInst<MRM5r, "fsub{r}p\t$op">;
-def SUB_FST0r : FPST0rInst <MRM4r, "fsub\t$op">;
-def SUBR_FrST0 : FPrST0Inst <MRM4r, "fsub{|r}\t{%st(0), $op|$op, st(0)}">;
-def SUBR_FPrST0 : FPrST0PInst<MRM4r, "fsub{|r}p\t$op">;
+let SchedRW = [WriteFAdd], Defs = [FPSW], Uses = [FPCW] in {
+def ADD_FST0r : FPST0rInst <MRM0r, "fadd\t{$op, %st|st, $op}">;
+def ADD_FrST0 : FPrST0Inst <MRM0r, "fadd\t{%st, $op|$op, st}">;
+def ADD_FPrST0 : FPrST0PInst<MRM0r, "faddp\t{%st, $op|$op, st}">;
+def SUBR_FST0r : FPST0rInst <MRM5r, "fsubr\t{$op, %st|st, $op}">;
+def SUB_FrST0 : FPrST0Inst <MRM5r, "fsub{r}\t{%st, $op|$op, st}">;
+def SUB_FPrST0 : FPrST0PInst<MRM5r, "fsub{r}p\t{%st, $op|$op, st}">;
+def SUB_FST0r : FPST0rInst <MRM4r, "fsub\t{$op, %st|st, $op}">;
+def SUBR_FrST0 : FPrST0Inst <MRM4r, "fsub{|r}\t{%st, $op|$op, st}">;
+def SUBR_FPrST0 : FPrST0PInst<MRM4r, "fsub{|r}p\t{%st, $op|$op, st}">;
} // SchedRW
-let SchedRW = [WriteFCom] in {
+let SchedRW = [WriteFCom], Defs = [FPSW], Uses = [FPCW] in {
def COM_FST0r : FPST0rInst <MRM2r, "fcom\t$op">;
def COMP_FST0r : FPST0rInst <MRM3r, "fcomp\t$op">;
} // SchedRW
-let SchedRW = [WriteFMul] in {
-def MUL_FST0r : FPST0rInst <MRM1r, "fmul\t$op">;
-def MUL_FrST0 : FPrST0Inst <MRM1r, "fmul\t{%st(0), $op|$op, st(0)}">;
-def MUL_FPrST0 : FPrST0PInst<MRM1r, "fmulp\t$op">;
+let SchedRW = [WriteFMul], Defs = [FPSW], Uses = [FPCW] in {
+def MUL_FST0r : FPST0rInst <MRM1r, "fmul\t{$op, %st|st, $op}">;
+def MUL_FrST0 : FPrST0Inst <MRM1r, "fmul\t{%st, $op|$op, st}">;
+def MUL_FPrST0 : FPrST0PInst<MRM1r, "fmulp\t{%st, $op|$op, st}">;
} // SchedRW
-let SchedRW = [WriteFDiv] in {
-def DIVR_FST0r : FPST0rInst <MRM7r, "fdivr\t$op">;
-def DIV_FrST0 : FPrST0Inst <MRM7r, "fdiv{r}\t{%st(0), $op|$op, st(0)}">;
-def DIV_FPrST0 : FPrST0PInst<MRM7r, "fdiv{r}p\t$op">;
-def DIV_FST0r : FPST0rInst <MRM6r, "fdiv\t$op">;
-def DIVR_FrST0 : FPrST0Inst <MRM6r, "fdiv{|r}\t{%st(0), $op|$op, st(0)}">;
-def DIVR_FPrST0 : FPrST0PInst<MRM6r, "fdiv{|r}p\t$op">;
+let SchedRW = [WriteFDiv], Defs = [FPSW], Uses = [FPCW] in {
+def DIVR_FST0r : FPST0rInst <MRM7r, "fdivr\t{$op, %st|st, $op}">;
+def DIV_FrST0 : FPrST0Inst <MRM7r, "fdiv{r}\t{%st, $op|$op, st}">;
+def DIV_FPrST0 : FPrST0PInst<MRM7r, "fdiv{r}p\t{%st, $op|$op, st}">;
+def DIV_FST0r : FPST0rInst <MRM6r, "fdiv\t{$op, %st|st, $op}">;
+def DIVR_FrST0 : FPrST0Inst <MRM6r, "fdiv{|r}\t{%st, $op|$op, st}">;
+def DIVR_FPrST0 : FPrST0PInst<MRM6r, "fdiv{|r}p\t{%st, $op|$op, st}">;
} // SchedRW
// Unary operations.
@@ -307,7 +359,7 @@ def _Fp80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src), OneArgFPRW,
def _F : FPI<0xD9, fp, (outs), (ins), asmstring>;
}
-let Defs = [FPSW] in {
+let Defs = [FPSW], Uses = [FPCW] in {
let SchedRW = [WriteFSign] in {
defm CHS : FPUnary<fneg, MRM_E0, "fchs">;
@@ -335,7 +387,7 @@ def TST_F : FPI<0xD9, MRM_E4, (outs), (ins), "ftst">;
// Versions of FP instructions that take a single memory operand. Added for the
// disassembler; remove as they are included with patterns elsewhere.
-let SchedRW = [WriteFComLd] in {
+let SchedRW = [WriteFComLd], Defs = [FPSW], Uses = [FPCW] in {
def FCOM32m : FPI<0xD8, MRM2m, (outs), (ins f32mem:$src), "fcom{s}\t$src">;
def FCOMP32m : FPI<0xD8, MRM3m, (outs), (ins f32mem:$src), "fcomp{s}\t$src">;
@@ -398,32 +450,31 @@ defm CMOVNP : FPCMov<X86_COND_NP>;
let Predicates = [HasCMov] in {
// These are not factored because there's no clean way to pass DA/DB.
-def CMOVB_F : FPI<0xDA, MRM0r, (outs), (ins RST:$op),
- "fcmovb\t{$op, %st(0)|st(0), $op}">;
-def CMOVBE_F : FPI<0xDA, MRM2r, (outs), (ins RST:$op),
- "fcmovbe\t{$op, %st(0)|st(0), $op}">;
-def CMOVE_F : FPI<0xDA, MRM1r, (outs), (ins RST:$op),
- "fcmove\t{$op, %st(0)|st(0), $op}">;
-def CMOVP_F : FPI<0xDA, MRM3r, (outs), (ins RST:$op),
- "fcmovu\t{$op, %st(0)|st(0), $op}">;
-def CMOVNB_F : FPI<0xDB, MRM0r, (outs), (ins RST:$op),
- "fcmovnb\t{$op, %st(0)|st(0), $op}">;
-def CMOVNBE_F: FPI<0xDB, MRM2r, (outs), (ins RST:$op),
- "fcmovnbe\t{$op, %st(0)|st(0), $op}">;
-def CMOVNE_F : FPI<0xDB, MRM1r, (outs), (ins RST:$op),
- "fcmovne\t{$op, %st(0)|st(0), $op}">;
-def CMOVNP_F : FPI<0xDB, MRM3r, (outs), (ins RST:$op),
- "fcmovnu\t{$op, %st(0)|st(0), $op}">;
+def CMOVB_F : FPI<0xDA, MRM0r, (outs), (ins RSTi:$op),
+ "fcmovb\t{$op, %st|st, $op}">;
+def CMOVBE_F : FPI<0xDA, MRM2r, (outs), (ins RSTi:$op),
+ "fcmovbe\t{$op, %st|st, $op}">;
+def CMOVE_F : FPI<0xDA, MRM1r, (outs), (ins RSTi:$op),
+ "fcmove\t{$op, %st|st, $op}">;
+def CMOVP_F : FPI<0xDA, MRM3r, (outs), (ins RSTi:$op),
+ "fcmovu\t{$op, %st|st, $op}">;
+def CMOVNB_F : FPI<0xDB, MRM0r, (outs), (ins RSTi:$op),
+ "fcmovnb\t{$op, %st|st, $op}">;
+def CMOVNBE_F: FPI<0xDB, MRM2r, (outs), (ins RSTi:$op),
+ "fcmovnbe\t{$op, %st|st, $op}">;
+def CMOVNE_F : FPI<0xDB, MRM1r, (outs), (ins RSTi:$op),
+ "fcmovne\t{$op, %st|st, $op}">;
+def CMOVNP_F : FPI<0xDB, MRM3r, (outs), (ins RSTi:$op),
+ "fcmovnu\t{$op, %st|st, $op}">;
} // Predicates = [HasCMov]
} // SchedRW
// Floating point loads & stores.
-let SchedRW = [WriteLoad] in {
+let SchedRW = [WriteLoad], Uses = [FPCW] in {
let canFoldAsLoad = 1 in {
def LD_Fp32m : FpIf32<(outs RFP32:$dst), (ins f32mem:$src), ZeroArgFP,
[(set RFP32:$dst, (loadf32 addr:$src))]>;
-let isReMaterializable = 1 in
- def LD_Fp64m : FpIf64<(outs RFP64:$dst), (ins f64mem:$src), ZeroArgFP,
+def LD_Fp64m : FpIf64<(outs RFP64:$dst), (ins f64mem:$src), ZeroArgFP,
[(set RFP64:$dst, (loadf64 addr:$src))]>;
def LD_Fp80m : FpI_<(outs RFP80:$dst), (ins f80mem:$src), ZeroArgFP,
[(set RFP80:$dst, (loadf80 addr:$src))]>;
@@ -435,26 +486,26 @@ def LD_Fp64m80 : FpI_<(outs RFP80:$dst), (ins f64mem:$src), ZeroArgFP,
def LD_Fp32m80 : FpI_<(outs RFP80:$dst), (ins f32mem:$src), ZeroArgFP,
[(set RFP80:$dst, (f80 (extloadf32 addr:$src)))]>;
def ILD_Fp16m32: FpIf32<(outs RFP32:$dst), (ins i16mem:$src), ZeroArgFP,
- [(set RFP32:$dst, (X86fild addr:$src, i16))]>;
+ [(set RFP32:$dst, (X86fild16 addr:$src))]>;
def ILD_Fp32m32: FpIf32<(outs RFP32:$dst), (ins i32mem:$src), ZeroArgFP,
- [(set RFP32:$dst, (X86fild addr:$src, i32))]>;
+ [(set RFP32:$dst, (X86fild32 addr:$src))]>;
def ILD_Fp64m32: FpIf32<(outs RFP32:$dst), (ins i64mem:$src), ZeroArgFP,
- [(set RFP32:$dst, (X86fild addr:$src, i64))]>;
+ [(set RFP32:$dst, (X86fild64 addr:$src))]>;
def ILD_Fp16m64: FpIf64<(outs RFP64:$dst), (ins i16mem:$src), ZeroArgFP,
- [(set RFP64:$dst, (X86fild addr:$src, i16))]>;
+ [(set RFP64:$dst, (X86fild16 addr:$src))]>;
def ILD_Fp32m64: FpIf64<(outs RFP64:$dst), (ins i32mem:$src), ZeroArgFP,
- [(set RFP64:$dst, (X86fild addr:$src, i32))]>;
+ [(set RFP64:$dst, (X86fild32 addr:$src))]>;
def ILD_Fp64m64: FpIf64<(outs RFP64:$dst), (ins i64mem:$src), ZeroArgFP,
- [(set RFP64:$dst, (X86fild addr:$src, i64))]>;
+ [(set RFP64:$dst, (X86fild64 addr:$src))]>;
def ILD_Fp16m80: FpI_<(outs RFP80:$dst), (ins i16mem:$src), ZeroArgFP,
- [(set RFP80:$dst, (X86fild addr:$src, i16))]>;
+ [(set RFP80:$dst, (X86fild16 addr:$src))]>;
def ILD_Fp32m80: FpI_<(outs RFP80:$dst), (ins i32mem:$src), ZeroArgFP,
- [(set RFP80:$dst, (X86fild addr:$src, i32))]>;
+ [(set RFP80:$dst, (X86fild32 addr:$src))]>;
def ILD_Fp64m80: FpI_<(outs RFP80:$dst), (ins i64mem:$src), ZeroArgFP,
- [(set RFP80:$dst, (X86fild addr:$src, i64))]>;
+ [(set RFP80:$dst, (X86fild64 addr:$src))]>;
} // SchedRW
-let SchedRW = [WriteStore] in {
+let SchedRW = [WriteStore], Uses = [FPCW] in {
def ST_Fp32m : FpIf32<(outs), (ins f32mem:$op, RFP32:$src), OneArgFP,
[(store RFP32:$src, addr:$op)]>;
def ST_Fp64m32 : FpIf64<(outs), (ins f32mem:$op, RFP64:$src), OneArgFP,
@@ -489,9 +540,9 @@ def IST_Fp16m80 : FpI_<(outs), (ins i16mem:$op, RFP80:$src), OneArgFP, []>;
def IST_Fp32m80 : FpI_<(outs), (ins i32mem:$op, RFP80:$src), OneArgFP, []>;
def IST_Fp64m80 : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP, []>;
} // mayStore
-} // SchedRW
+} // SchedRW, Uses = [FPCW]
-let mayLoad = 1, SchedRW = [WriteLoad] in {
+let mayLoad = 1, SchedRW = [WriteLoad], Uses = [FPCW] in {
def LD_F32m : FPI<0xD9, MRM0m, (outs), (ins f32mem:$src), "fld{s}\t$src">;
def LD_F64m : FPI<0xDD, MRM0m, (outs), (ins f64mem:$src), "fld{l}\t$src">;
def LD_F80m : FPI<0xDB, MRM5m, (outs), (ins f80mem:$src), "fld{t}\t$src">;
@@ -499,7 +550,7 @@ def ILD_F16m : FPI<0xDF, MRM0m, (outs), (ins i16mem:$src), "fild{s}\t$src">;
def ILD_F32m : FPI<0xDB, MRM0m, (outs), (ins i32mem:$src), "fild{l}\t$src">;
def ILD_F64m : FPI<0xDF, MRM5m, (outs), (ins i64mem:$src), "fild{ll}\t$src">;
}
-let mayStore = 1, SchedRW = [WriteStore] in {
+let mayStore = 1, SchedRW = [WriteStore], Uses = [FPCW] in {
def ST_F32m : FPI<0xD9, MRM2m, (outs), (ins f32mem:$dst), "fst{s}\t$dst">;
def ST_F64m : FPI<0xDD, MRM2m, (outs), (ins f64mem:$dst), "fst{l}\t$dst">;
def ST_FP32m : FPI<0xD9, MRM3m, (outs), (ins f32mem:$dst), "fstp{s}\t$dst">;
@@ -513,7 +564,7 @@ def IST_FP64m : FPI<0xDF, MRM7m, (outs), (ins i64mem:$dst), "fistp{ll}\t$dst">;
}
// FISTTP requires SSE3 even though it's a FPStack op.
-let Predicates = [HasSSE3], SchedRW = [WriteStore] in {
+let Predicates = [HasSSE3], SchedRW = [WriteStore], Uses = [FPCW] in {
def ISTT_Fp16m32 : FpI_<(outs), (ins i16mem:$op, RFP32:$src), OneArgFP,
[(X86fp_to_i16mem RFP32:$src, addr:$op)]>;
def ISTT_Fp32m32 : FpI_<(outs), (ins i32mem:$op, RFP32:$src), OneArgFP,
@@ -534,22 +585,22 @@ def ISTT_Fp64m80 : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP,
[(X86fp_to_i64mem RFP80:$src, addr:$op)]>;
} // Predicates = [HasSSE3]
-let mayStore = 1, SchedRW = [WriteStore] in {
+let mayStore = 1, SchedRW = [WriteStore], Uses = [FPCW] in {
def ISTT_FP16m : FPI<0xDF, MRM1m, (outs), (ins i16mem:$dst), "fisttp{s}\t$dst">;
def ISTT_FP32m : FPI<0xDB, MRM1m, (outs), (ins i32mem:$dst), "fisttp{l}\t$dst">;
def ISTT_FP64m : FPI<0xDD, MRM1m, (outs), (ins i64mem:$dst), "fisttp{ll}\t$dst">;
}
// FP Stack manipulation instructions.
-let SchedRW = [WriteMove] in {
-def LD_Frr : FPI<0xD9, MRM0r, (outs), (ins RST:$op), "fld\t$op">;
-def ST_Frr : FPI<0xDD, MRM2r, (outs), (ins RST:$op), "fst\t$op">;
-def ST_FPrr : FPI<0xDD, MRM3r, (outs), (ins RST:$op), "fstp\t$op">;
-def XCH_F : FPI<0xD9, MRM1r, (outs), (ins RST:$op), "fxch\t$op">;
+let SchedRW = [WriteMove], Uses = [FPCW] in {
+def LD_Frr : FPI<0xD9, MRM0r, (outs), (ins RSTi:$op), "fld\t$op">;
+def ST_Frr : FPI<0xDD, MRM2r, (outs), (ins RSTi:$op), "fst\t$op">;
+def ST_FPrr : FPI<0xDD, MRM3r, (outs), (ins RSTi:$op), "fstp\t$op">;
+def XCH_F : FPI<0xD9, MRM1r, (outs), (ins RSTi:$op), "fxch\t$op">;
}
// Floating point constant loads.
-let isReMaterializable = 1, SchedRW = [WriteZero] in {
+let SchedRW = [WriteZero], Uses = [FPCW] in {
def LD_Fp032 : FpIf32<(outs RFP32:$dst), (ins), ZeroArgFP,
[(set RFP32:$dst, fpimm0)]>;
def LD_Fp132 : FpIf32<(outs RFP32:$dst), (ins), ZeroArgFP,
@@ -564,13 +615,13 @@ def LD_Fp180 : FpI_<(outs RFP80:$dst), (ins), ZeroArgFP,
[(set RFP80:$dst, fpimm1)]>;
}
-let SchedRW = [WriteFLD0] in
+let SchedRW = [WriteFLD0], Uses = [FPCW] in
def LD_F0 : FPI<0xD9, MRM_EE, (outs), (ins), "fldz">;
-let SchedRW = [WriteFLD1] in
+let SchedRW = [WriteFLD1], Uses = [FPCW] in
def LD_F1 : FPI<0xD9, MRM_E8, (outs), (ins), "fld1">;
-let SchedRW = [WriteFLDC], Defs = [FPSW] in {
+let SchedRW = [WriteFLDC], Uses = [FPCW] in {
def FLDL2T : I<0xD9, MRM_E9, (outs), (ins), "fldl2t", []>;
def FLDL2E : I<0xD9, MRM_EA, (outs), (ins), "fldl2e", []>;
def FLDPI : I<0xD9, MRM_EB, (outs), (ins), "fldpi", []>;
@@ -579,7 +630,7 @@ def FLDLN2 : I<0xD9, MRM_ED, (outs), (ins), "fldln2", []>;
} // SchedRW
// Floating point compares.
-let SchedRW = [WriteFCom] in {
+let SchedRW = [WriteFCom], Uses = [FPCW] in {
def UCOM_Fpr32 : FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
[(set FPSW, (trunc (X86cmp RFP32:$lhs, RFP32:$rhs)))]>;
def UCOM_Fpr64 : FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
@@ -591,37 +642,37 @@ def UCOM_Fpr80 : FpI_ <(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP,
let SchedRW = [WriteFCom] in {
// CC = ST(0) cmp ST(i)
-let Defs = [EFLAGS, FPSW] in {
-let Predicates = [FPStackf32, HasCMov] in
-def UCOM_FpIr32: FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
- [(set EFLAGS, (X86cmp RFP32:$lhs, RFP32:$rhs))]>;
-let Predicates = [FPStackf64, HasCMov] in
-def UCOM_FpIr64: FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
- [(set EFLAGS, (X86cmp RFP64:$lhs, RFP64:$rhs))]>;
-let Predicates = [HasCMov] in
+let Defs = [EFLAGS, FPSW], Uses = [FPCW] in {
+def UCOM_FpIr32: FpI_<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
+ [(set EFLAGS, (X86cmp RFP32:$lhs, RFP32:$rhs))]>,
+ Requires<[FPStackf32, HasCMov]>;
+def UCOM_FpIr64: FpI_<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
+ [(set EFLAGS, (X86cmp RFP64:$lhs, RFP64:$rhs))]>,
+ Requires<[FPStackf64, HasCMov]>;
def UCOM_FpIr80: FpI_<(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP,
- [(set EFLAGS, (X86cmp RFP80:$lhs, RFP80:$rhs))]>;
+ [(set EFLAGS, (X86cmp RFP80:$lhs, RFP80:$rhs))]>,
+ Requires<[HasCMov]>;
}
-let Defs = [FPSW], Uses = [ST0] in {
+let Defs = [FPSW], Uses = [ST0, FPCW] in {
def UCOM_Fr : FPI<0xDD, MRM4r, // FPSW = cmp ST(0) with ST(i)
- (outs), (ins RST:$reg), "fucom\t$reg">;
+ (outs), (ins RSTi:$reg), "fucom\t$reg">;
def UCOM_FPr : FPI<0xDD, MRM5r, // FPSW = cmp ST(0) with ST(i), pop
- (outs), (ins RST:$reg), "fucomp\t$reg">;
+ (outs), (ins RSTi:$reg), "fucomp\t$reg">;
def UCOM_FPPr : FPI<0xDA, MRM_E9, // cmp ST(0) with ST(1), pop, pop
(outs), (ins), "fucompp">;
}
-let Defs = [EFLAGS, FPSW], Uses = [ST0] in {
+let Defs = [EFLAGS, FPSW], Uses = [ST0, FPCW] in {
def UCOM_FIr : FPI<0xDB, MRM5r, // CC = cmp ST(0) with ST(i)
- (outs), (ins RST:$reg), "fucomi\t$reg">;
+ (outs), (ins RSTi:$reg), "fucomi\t{$reg, %st|st, $reg}">;
def UCOM_FIPr : FPI<0xDF, MRM5r, // CC = cmp ST(0) with ST(i), pop
- (outs), (ins RST:$reg), "fucompi\t$reg">;
-}
+ (outs), (ins RSTi:$reg), "fucompi\t{$reg, %st|st, $reg}">;
-let Defs = [EFLAGS, FPSW] in {
-def COM_FIr : FPI<0xDB, MRM6r, (outs), (ins RST:$reg), "fcomi\t$reg">;
-def COM_FIPr : FPI<0xDF, MRM6r, (outs), (ins RST:$reg), "fcompi\t$reg">;
+def COM_FIr : FPI<0xDB, MRM6r, (outs), (ins RSTi:$reg),
+ "fcomi\t{$reg, %st|st, $reg}">;
+def COM_FIPr : FPI<0xDF, MRM6r, (outs), (ins RSTi:$reg),
+ "fcompi\t{$reg, %st|st, $reg}">;
}
} // SchedRW
@@ -631,12 +682,12 @@ let Defs = [AX], Uses = [FPSW] in
def FNSTSW16r : I<0xDF, MRM_E0, // AX = fp flags
(outs), (ins), "fnstsw\t{%ax|ax}",
[(set AX, (X86fp_stsw FPSW))]>;
-let Defs = [FPSW] in
+let Defs = [FPSW], Uses = [FPCW] in
def FNSTCW16m : I<0xD9, MRM7m, // [mem16] = X87 control world
(outs), (ins i16mem:$dst), "fnstcw\t$dst",
[(X86fp_cwd_get16 addr:$dst)]>;
} // SchedRW
-let Defs = [FPSW], mayLoad = 1 in
+let Defs = [FPSW,FPCW], mayLoad = 1 in
def FLDCW16m : I<0xD9, MRM5m, // X87 control world = [mem16]
(outs), (ins i16mem:$dst), "fldcw\t$dst", []>,
Sched<[WriteLoad]>;
@@ -645,8 +696,8 @@ def FLDCW16m : I<0xD9, MRM5m, // X87 control world = [mem16]
let SchedRW = [WriteMicrocoded] in {
let Defs = [FPSW] in {
def FNINIT : I<0xDB, MRM_E3, (outs), (ins), "fninit", []>;
-def FFREE : FPI<0xDD, MRM0r, (outs), (ins RST:$reg), "ffree\t$reg">;
-def FFREEP : FPI<0xDF, MRM0r, (outs), (ins RST:$reg), "ffreep\t$reg">;
+def FFREE : FPI<0xDD, MRM0r, (outs), (ins RSTi:$reg), "ffree\t$reg">;
+def FFREEP : FPI<0xDF, MRM0r, (outs), (ins RSTi:$reg), "ffreep\t$reg">;
// Clear exceptions
def FNCLEX : I<0xDB, MRM_E2, (outs), (ins), "fnclex", []>;
@@ -695,21 +746,17 @@ def FXRSTOR64 : RI<0xAE, MRM1m, (outs), (ins opaquemem:$src),
//===----------------------------------------------------------------------===//
// Required for RET of f32 / f64 / f80 values.
-def : Pat<(X86fld addr:$src, f32), (LD_Fp32m addr:$src)>;
-def : Pat<(X86fld addr:$src, f64), (LD_Fp64m addr:$src)>;
-def : Pat<(X86fld addr:$src, f80), (LD_Fp80m addr:$src)>;
+def : Pat<(X86fldf32 addr:$src), (LD_Fp32m addr:$src)>;
+def : Pat<(X86fldf64 addr:$src), (LD_Fp64m addr:$src)>;
+def : Pat<(X86fldf80 addr:$src), (LD_Fp80m addr:$src)>;
// Required for CALL which return f32 / f64 / f80 values.
-def : Pat<(X86fst RFP32:$src, addr:$op, f32), (ST_Fp32m addr:$op, RFP32:$src)>;
-def : Pat<(X86fst RFP64:$src, addr:$op, f32), (ST_Fp64m32 addr:$op,
- RFP64:$src)>;
-def : Pat<(X86fst RFP64:$src, addr:$op, f64), (ST_Fp64m addr:$op, RFP64:$src)>;
-def : Pat<(X86fst RFP80:$src, addr:$op, f32), (ST_Fp80m32 addr:$op,
- RFP80:$src)>;
-def : Pat<(X86fst RFP80:$src, addr:$op, f64), (ST_Fp80m64 addr:$op,
- RFP80:$src)>;
-def : Pat<(X86fst RFP80:$src, addr:$op, f80), (ST_FpP80m addr:$op,
- RFP80:$src)>;
+def : Pat<(X86fstf32 RFP32:$src, addr:$op), (ST_Fp32m addr:$op, RFP32:$src)>;
+def : Pat<(X86fstf32 RFP64:$src, addr:$op), (ST_Fp64m32 addr:$op, RFP64:$src)>;
+def : Pat<(X86fstf64 RFP64:$src, addr:$op), (ST_Fp64m addr:$op, RFP64:$src)>;
+def : Pat<(X86fstf32 RFP80:$src, addr:$op), (ST_Fp80m32 addr:$op, RFP80:$src)>;
+def : Pat<(X86fstf64 RFP80:$src, addr:$op), (ST_Fp80m64 addr:$op, RFP80:$src)>;
+def : Pat<(X86fstf80 RFP80:$src, addr:$op), (ST_FpP80m addr:$op, RFP80:$src)>;
// Floating point constant -0.0 and -1.0
def : Pat<(f32 fpimmneg0), (CHS_Fp32 (LD_Fp032))>, Requires<[FPStackf32]>;
@@ -720,7 +767,11 @@ def : Pat<(f80 fpimmneg0), (CHS_Fp80 (LD_Fp080))>;
def : Pat<(f80 fpimmneg1), (CHS_Fp80 (LD_Fp180))>;
// Used to conv. i64 to f64 since there isn't a SSE version.
-def : Pat<(X86fildflag addr:$src, i64), (ILD_Fp64m64 addr:$src)>;
+def : Pat<(X86fildflag64 addr:$src), (ILD_Fp64m64 addr:$src)>;
+
+// Used to conv. between f80 and i64 for i64 atomic loads.
+def : Pat<(X86fildflag64 addr:$src), (ILD_Fp64m80 addr:$src)>;
+def : Pat<(X86fist64 RFP80:$src, addr:$op), (IST_Fp64m80 addr:$op, RFP80:$src)>;
// FP extensions map onto simple pseudo-value conversions if they are to/from
// the FP stack.
diff --git a/lib/Target/X86/X86InstrFoldTables.cpp b/lib/Target/X86/X86InstrFoldTables.cpp
index 7d31cfab4137..d42fec3770c7 100644
--- a/lib/Target/X86/X86InstrFoldTables.cpp
+++ b/lib/Target/X86/X86InstrFoldTables.cpp
@@ -1,9 +1,8 @@
//===-- X86InstrFoldTables.cpp - X86 Instruction Folding Tables -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -34,6 +33,17 @@ using namespace llvm;
// tables that would be incorrect. The manual review process allows us a chance
// to catch these before they become observable bugs.
static const X86MemoryFoldTableEntry MemoryFoldTable2Addr[] = {
+ { X86::ADD16ri8_DB, X86::ADD16mi8, TB_NO_REVERSE },
+ { X86::ADD16ri_DB, X86::ADD16mi, TB_NO_REVERSE },
+ { X86::ADD16rr_DB, X86::ADD16mr, TB_NO_REVERSE },
+ { X86::ADD32ri8_DB, X86::ADD32mi8, TB_NO_REVERSE },
+ { X86::ADD32ri_DB, X86::ADD32mi, TB_NO_REVERSE },
+ { X86::ADD32rr_DB, X86::ADD32mr, TB_NO_REVERSE },
+ { X86::ADD64ri32_DB,X86::ADD64mi32, TB_NO_REVERSE },
+ { X86::ADD64ri8_DB, X86::ADD64mi8, TB_NO_REVERSE },
+ { X86::ADD64rr_DB, X86::ADD64mr, TB_NO_REVERSE },
+ { X86::ADD8ri_DB, X86::ADD8mi, TB_NO_REVERSE },
+ { X86::ADD8rr_DB, X86::ADD8mr, TB_NO_REVERSE },
{ X86::ADC16ri, X86::ADC16mi, 0 },
{ X86::ADC16ri8, X86::ADC16mi8, 0 },
{ X86::ADC16rr, X86::ADC16mr, 0 },
@@ -48,22 +58,13 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2Addr[] = {
{ X86::ADC8rr, X86::ADC8mr, 0 },
{ X86::ADD16ri, X86::ADD16mi, 0 },
{ X86::ADD16ri8, X86::ADD16mi8, 0 },
- { X86::ADD16ri8_DB, X86::ADD16mi8, TB_NO_REVERSE },
- { X86::ADD16ri_DB, X86::ADD16mi, TB_NO_REVERSE },
{ X86::ADD16rr, X86::ADD16mr, 0 },
- { X86::ADD16rr_DB, X86::ADD16mr, TB_NO_REVERSE },
{ X86::ADD32ri, X86::ADD32mi, 0 },
{ X86::ADD32ri8, X86::ADD32mi8, 0 },
- { X86::ADD32ri8_DB, X86::ADD32mi8, TB_NO_REVERSE },
- { X86::ADD32ri_DB, X86::ADD32mi, TB_NO_REVERSE },
{ X86::ADD32rr, X86::ADD32mr, 0 },
- { X86::ADD32rr_DB, X86::ADD32mr, TB_NO_REVERSE },
{ X86::ADD64ri32, X86::ADD64mi32, 0 },
- { X86::ADD64ri32_DB,X86::ADD64mi32, TB_NO_REVERSE },
{ X86::ADD64ri8, X86::ADD64mi8, 0 },
- { X86::ADD64ri8_DB, X86::ADD64mi8, TB_NO_REVERSE },
{ X86::ADD64rr, X86::ADD64mr, 0 },
- { X86::ADD64rr_DB, X86::ADD64mr, TB_NO_REVERSE },
{ X86::ADD8ri, X86::ADD8mi, 0 },
{ X86::ADD8ri8, X86::ADD8mi8, 0 },
{ X86::ADD8rr, X86::ADD8mr, 0 },
@@ -247,7 +248,7 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2Addr[] = {
{ X86::XOR64rr, X86::XOR64mr, 0 },
{ X86::XOR8ri, X86::XOR8mi, 0 },
{ X86::XOR8ri8, X86::XOR8mi8, 0 },
- { X86::XOR8rr, X86::XOR8mr, 0 }
+ { X86::XOR8rr, X86::XOR8mr, 0 },
};
static const X86MemoryFoldTableEntry MemoryFoldTable0[] = {
@@ -305,9 +306,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable0[] = {
{ X86::MOVDQArr, X86::MOVDQAmr, TB_FOLDED_STORE | TB_ALIGN_16 },
{ X86::MOVDQUrr, X86::MOVDQUmr, TB_FOLDED_STORE },
{ X86::MOVPDI2DIrr, X86::MOVPDI2DImr, TB_FOLDED_STORE },
- { X86::MOVPQIto64rr, X86::MOVPQI2QImr, TB_FOLDED_STORE },
- { X86::MOVSDto64rr, X86::MOVSDto64mr, TB_FOLDED_STORE },
- { X86::MOVSS2DIrr, X86::MOVSS2DImr, TB_FOLDED_STORE },
+ { X86::MOVPQIto64rr, X86::MOVPQI2QImr, TB_FOLDED_STORE | TB_NO_REVERSE },
+ { X86::MOVSDto64rr, X86::MOVSDmr, TB_FOLDED_STORE | TB_NO_REVERSE },
+ { X86::MOVSS2DIrr, X86::MOVSSmr, TB_FOLDED_STORE },
{ X86::MOVUPDrr, X86::MOVUPDmr, TB_FOLDED_STORE },
{ X86::MOVUPSrr, X86::MOVUPSmr, TB_FOLDED_STORE },
{ X86::MUL16r, X86::MUL16m, TB_FOLDED_LOAD },
@@ -321,22 +322,7 @@ static const X86MemoryFoldTableEntry MemoryFoldTable0[] = {
{ X86::PUSH16r, X86::PUSH16rmm, TB_FOLDED_LOAD },
{ X86::PUSH32r, X86::PUSH32rmm, TB_FOLDED_LOAD },
{ X86::PUSH64r, X86::PUSH64rmm, TB_FOLDED_LOAD },
- { X86::SETAEr, X86::SETAEm, TB_FOLDED_STORE },
- { X86::SETAr, X86::SETAm, TB_FOLDED_STORE },
- { X86::SETBEr, X86::SETBEm, TB_FOLDED_STORE },
- { X86::SETBr, X86::SETBm, TB_FOLDED_STORE },
- { X86::SETEr, X86::SETEm, TB_FOLDED_STORE },
- { X86::SETGEr, X86::SETGEm, TB_FOLDED_STORE },
- { X86::SETGr, X86::SETGm, TB_FOLDED_STORE },
- { X86::SETLEr, X86::SETLEm, TB_FOLDED_STORE },
- { X86::SETLr, X86::SETLm, TB_FOLDED_STORE },
- { X86::SETNEr, X86::SETNEm, TB_FOLDED_STORE },
- { X86::SETNOr, X86::SETNOm, TB_FOLDED_STORE },
- { X86::SETNPr, X86::SETNPm, TB_FOLDED_STORE },
- { X86::SETNSr, X86::SETNSm, TB_FOLDED_STORE },
- { X86::SETOr, X86::SETOm, TB_FOLDED_STORE },
- { X86::SETPr, X86::SETPm, TB_FOLDED_STORE },
- { X86::SETSr, X86::SETSm, TB_FOLDED_STORE },
+ { X86::SETCCr, X86::SETCCm, TB_FOLDED_STORE },
{ X86::TAILJMPr, X86::TAILJMPm, TB_FOLDED_LOAD },
{ X86::TAILJMPr64, X86::TAILJMPm64, TB_FOLDED_LOAD },
{ X86::TAILJMPr64_REX, X86::TAILJMPm64_REX, TB_FOLDED_LOAD },
@@ -403,12 +389,12 @@ static const X86MemoryFoldTableEntry MemoryFoldTable0[] = {
{ X86::VMOVDQUrr, X86::VMOVDQUmr, TB_FOLDED_STORE },
{ X86::VMOVPDI2DIZrr, X86::VMOVPDI2DIZmr, TB_FOLDED_STORE },
{ X86::VMOVPDI2DIrr, X86::VMOVPDI2DImr, TB_FOLDED_STORE },
- { X86::VMOVPQIto64Zrr, X86::VMOVPQI2QIZmr, TB_FOLDED_STORE },
- { X86::VMOVPQIto64rr, X86::VMOVPQI2QImr, TB_FOLDED_STORE },
- { X86::VMOVSDto64Zrr, X86::VMOVSDto64Zmr, TB_FOLDED_STORE },
- { X86::VMOVSDto64rr, X86::VMOVSDto64mr, TB_FOLDED_STORE },
- { X86::VMOVSS2DIZrr, X86::VMOVSS2DIZmr, TB_FOLDED_STORE },
- { X86::VMOVSS2DIrr, X86::VMOVSS2DImr, TB_FOLDED_STORE },
+ { X86::VMOVPQIto64Zrr, X86::VMOVPQI2QIZmr, TB_FOLDED_STORE | TB_NO_REVERSE },
+ { X86::VMOVPQIto64rr, X86::VMOVPQI2QImr, TB_FOLDED_STORE | TB_NO_REVERSE },
+ { X86::VMOVSDto64Zrr, X86::VMOVSDZmr, TB_FOLDED_STORE | TB_NO_REVERSE },
+ { X86::VMOVSDto64rr, X86::VMOVSDmr, TB_FOLDED_STORE | TB_NO_REVERSE },
+ { X86::VMOVSS2DIZrr, X86::VMOVSSZmr, TB_FOLDED_STORE },
+ { X86::VMOVSS2DIrr, X86::VMOVSSmr, TB_FOLDED_STORE },
{ X86::VMOVUPDYrr, X86::VMOVUPDYmr, TB_FOLDED_STORE },
{ X86::VMOVUPDZ128rr, X86::VMOVUPDZ128mr, TB_FOLDED_STORE },
{ X86::VMOVUPDZ256rr, X86::VMOVUPDZ256mr, TB_FOLDED_STORE },
@@ -544,14 +530,14 @@ static const X86MemoryFoldTableEntry MemoryFoldTable1[] = {
{ X86::MOV16rr, X86::MOV16rm, 0 },
{ X86::MOV32rr, X86::MOV32rm, 0 },
{ X86::MOV64rr, X86::MOV64rm, 0 },
- { X86::MOV64toPQIrr, X86::MOVQI2PQIrm, 0 },
- { X86::MOV64toSDrr, X86::MOV64toSDrm, 0 },
+ { X86::MOV64toPQIrr, X86::MOVQI2PQIrm, TB_NO_REVERSE },
+ { X86::MOV64toSDrr, X86::MOVSDrm_alt, TB_NO_REVERSE },
{ X86::MOV8rr, X86::MOV8rm, 0 },
{ X86::MOVAPDrr, X86::MOVAPDrm, TB_ALIGN_16 },
{ X86::MOVAPSrr, X86::MOVAPSrm, TB_ALIGN_16 },
{ X86::MOVDDUPrr, X86::MOVDDUPrm, TB_NO_REVERSE },
{ X86::MOVDI2PDIrr, X86::MOVDI2PDIrm, 0 },
- { X86::MOVDI2SSrr, X86::MOVDI2SSrm, 0 },
+ { X86::MOVDI2SSrr, X86::MOVSSrm_alt, 0 },
{ X86::MOVDQArr, X86::MOVDQArm, TB_ALIGN_16 },
{ X86::MOVDQUrr, X86::MOVDQUrm, 0 },
{ X86::MOVSHDUPrr, X86::MOVSHDUPrm, TB_ALIGN_16 },
@@ -628,7 +614,6 @@ static const X86MemoryFoldTableEntry MemoryFoldTable1[] = {
{ X86::SQRTSSr, X86::SQRTSSm, 0 },
{ X86::T1MSKC32rr, X86::T1MSKC32rm, 0 },
{ X86::T1MSKC64rr, X86::T1MSKC64rm, 0 },
- // FIXME: TEST*rr EAX,EAX ---> CMP [mem], 0
{ X86::TZCNT16rr, X86::TZCNT16rm, 0 },
{ X86::TZCNT32rr, X86::TZCNT32rm, 0 },
{ X86::TZCNT64rr, X86::TZCNT64rm, 0 },
@@ -663,7 +648,7 @@ static const X86MemoryFoldTableEntry MemoryFoldTable1[] = {
{ X86::VCOMISSrr_Int, X86::VCOMISSrm_Int, TB_NO_REVERSE },
{ X86::VCVTDQ2PDYrr, X86::VCVTDQ2PDYrm, 0 },
{ X86::VCVTDQ2PDZ128rr, X86::VCVTDQ2PDZ128rm, TB_NO_REVERSE },
- { X86::VCVTDQ2PDZ256rr, X86::VCVTDQ2PDZ256rm, 0 },
+ { X86::VCVTDQ2PDZ256rr, X86::VCVTDQ2PDZ256rm, 0 },
{ X86::VCVTDQ2PDZrr, X86::VCVTDQ2PDZrm, 0 },
{ X86::VCVTDQ2PDrr, X86::VCVTDQ2PDrm, TB_NO_REVERSE },
{ X86::VCVTDQ2PSYrr, X86::VCVTDQ2PSYrm, 0 },
@@ -671,6 +656,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable1[] = {
{ X86::VCVTDQ2PSZ256rr, X86::VCVTDQ2PSZ256rm, 0 },
{ X86::VCVTDQ2PSZrr, X86::VCVTDQ2PSZrm, 0 },
{ X86::VCVTDQ2PSrr, X86::VCVTDQ2PSrm, 0 },
+ { X86::VCVTNEPS2BF16Z128rr, X86::VCVTNEPS2BF16Z128rm, 0 },
+ { X86::VCVTNEPS2BF16Z256rr, X86::VCVTNEPS2BF16Z256rm, 0 },
+ { X86::VCVTNEPS2BF16Zrr, X86::VCVTNEPS2BF16Zrm, 0 },
{ X86::VCVTPD2DQYrr, X86::VCVTPD2DQYrm, 0 },
{ X86::VCVTPD2DQZ128rr, X86::VCVTPD2DQZ128rm, 0 },
{ X86::VCVTPD2DQZ256rr, X86::VCVTPD2DQZ256rm, 0 },
@@ -830,10 +818,10 @@ static const X86MemoryFoldTableEntry MemoryFoldTable1[] = {
{ X86::VGETMANTPSZ128rri, X86::VGETMANTPSZ128rmi, 0 },
{ X86::VGETMANTPSZ256rri, X86::VGETMANTPSZ256rmi, 0 },
{ X86::VGETMANTPSZrri, X86::VGETMANTPSZrmi, 0 },
- { X86::VMOV64toPQIZrr, X86::VMOVQI2PQIZrm, 0 },
- { X86::VMOV64toPQIrr, X86::VMOVQI2PQIrm, 0 },
- { X86::VMOV64toSDZrr, X86::VMOV64toSDZrm, 0 },
- { X86::VMOV64toSDrr, X86::VMOV64toSDrm, 0 },
+ { X86::VMOV64toPQIZrr, X86::VMOVQI2PQIZrm, TB_NO_REVERSE },
+ { X86::VMOV64toPQIrr, X86::VMOVQI2PQIrm, TB_NO_REVERSE },
+ { X86::VMOV64toSDZrr, X86::VMOVSDZrm_alt, TB_NO_REVERSE },
+ { X86::VMOV64toSDrr, X86::VMOVSDrm_alt, TB_NO_REVERSE },
{ X86::VMOVAPDYrr, X86::VMOVAPDYrm, TB_ALIGN_32 },
{ X86::VMOVAPDZ128rr, X86::VMOVAPDZ128rm, TB_ALIGN_16 },
{ X86::VMOVAPDZ256rr, X86::VMOVAPDZ256rm, TB_ALIGN_32 },
@@ -851,8 +839,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable1[] = {
{ X86::VMOVDDUPrr, X86::VMOVDDUPrm, TB_NO_REVERSE },
{ X86::VMOVDI2PDIZrr, X86::VMOVDI2PDIZrm, 0 },
{ X86::VMOVDI2PDIrr, X86::VMOVDI2PDIrm, 0 },
- { X86::VMOVDI2SSZrr, X86::VMOVDI2SSZrm, 0 },
- { X86::VMOVDI2SSrr, X86::VMOVDI2SSrm, 0 },
+ { X86::VMOVDI2SSZrr, X86::VMOVSSZrm_alt, 0 },
+ { X86::VMOVDI2SSrr, X86::VMOVSSrm_alt, 0 },
{ X86::VMOVDQA32Z128rr, X86::VMOVDQA32Z128rm, TB_ALIGN_16 },
{ X86::VMOVDQA32Z256rr, X86::VMOVDQA32Z256rm, TB_ALIGN_32 },
{ X86::VMOVDQA32Zrr, X86::VMOVDQA32Zrm, TB_ALIGN_64 },
@@ -1206,6 +1194,10 @@ static const X86MemoryFoldTableEntry MemoryFoldTable1[] = {
};
static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
+ { X86::ADD16rr_DB, X86::ADD16rm, TB_NO_REVERSE },
+ { X86::ADD32rr_DB, X86::ADD32rm, TB_NO_REVERSE },
+ { X86::ADD64rr_DB, X86::ADD64rm, TB_NO_REVERSE },
+ { X86::ADD8rr_DB, X86::ADD8rm, TB_NO_REVERSE },
{ X86::ADC16rr, X86::ADC16rm, 0 },
{ X86::ADC32rr, X86::ADC32rm, 0 },
{ X86::ADC64rr, X86::ADC64rm, 0 },
@@ -1213,11 +1205,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
{ X86::ADCX32rr, X86::ADCX32rm, 0 },
{ X86::ADCX64rr, X86::ADCX64rm, 0 },
{ X86::ADD16rr, X86::ADD16rm, 0 },
- { X86::ADD16rr_DB, X86::ADD16rm, TB_NO_REVERSE },
{ X86::ADD32rr, X86::ADD32rm, 0 },
- { X86::ADD32rr_DB, X86::ADD32rm, TB_NO_REVERSE },
{ X86::ADD64rr, X86::ADD64rm, 0 },
- { X86::ADD64rr_DB, X86::ADD64rm, TB_NO_REVERSE },
{ X86::ADD8rr, X86::ADD8rm, 0 },
{ X86::ADDPDrr, X86::ADDPDrm, TB_ALIGN_16 },
{ X86::ADDPSrr, X86::ADDPSrm, TB_ALIGN_16 },
@@ -1247,54 +1236,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
{ X86::BLENDPSrri, X86::BLENDPSrmi, TB_ALIGN_16 },
{ X86::BLENDVPDrr0, X86::BLENDVPDrm0, TB_ALIGN_16 },
{ X86::BLENDVPSrr0, X86::BLENDVPSrm0, TB_ALIGN_16 },
- { X86::CMOVA16rr, X86::CMOVA16rm, 0 },
- { X86::CMOVA32rr, X86::CMOVA32rm, 0 },
- { X86::CMOVA64rr, X86::CMOVA64rm, 0 },
- { X86::CMOVAE16rr, X86::CMOVAE16rm, 0 },
- { X86::CMOVAE32rr, X86::CMOVAE32rm, 0 },
- { X86::CMOVAE64rr, X86::CMOVAE64rm, 0 },
- { X86::CMOVB16rr, X86::CMOVB16rm, 0 },
- { X86::CMOVB32rr, X86::CMOVB32rm, 0 },
- { X86::CMOVB64rr, X86::CMOVB64rm, 0 },
- { X86::CMOVBE16rr, X86::CMOVBE16rm, 0 },
- { X86::CMOVBE32rr, X86::CMOVBE32rm, 0 },
- { X86::CMOVBE64rr, X86::CMOVBE64rm, 0 },
- { X86::CMOVE16rr, X86::CMOVE16rm, 0 },
- { X86::CMOVE32rr, X86::CMOVE32rm, 0 },
- { X86::CMOVE64rr, X86::CMOVE64rm, 0 },
- { X86::CMOVG16rr, X86::CMOVG16rm, 0 },
- { X86::CMOVG32rr, X86::CMOVG32rm, 0 },
- { X86::CMOVG64rr, X86::CMOVG64rm, 0 },
- { X86::CMOVGE16rr, X86::CMOVGE16rm, 0 },
- { X86::CMOVGE32rr, X86::CMOVGE32rm, 0 },
- { X86::CMOVGE64rr, X86::CMOVGE64rm, 0 },
- { X86::CMOVL16rr, X86::CMOVL16rm, 0 },
- { X86::CMOVL32rr, X86::CMOVL32rm, 0 },
- { X86::CMOVL64rr, X86::CMOVL64rm, 0 },
- { X86::CMOVLE16rr, X86::CMOVLE16rm, 0 },
- { X86::CMOVLE32rr, X86::CMOVLE32rm, 0 },
- { X86::CMOVLE64rr, X86::CMOVLE64rm, 0 },
- { X86::CMOVNE16rr, X86::CMOVNE16rm, 0 },
- { X86::CMOVNE32rr, X86::CMOVNE32rm, 0 },
- { X86::CMOVNE64rr, X86::CMOVNE64rm, 0 },
- { X86::CMOVNO16rr, X86::CMOVNO16rm, 0 },
- { X86::CMOVNO32rr, X86::CMOVNO32rm, 0 },
- { X86::CMOVNO64rr, X86::CMOVNO64rm, 0 },
- { X86::CMOVNP16rr, X86::CMOVNP16rm, 0 },
- { X86::CMOVNP32rr, X86::CMOVNP32rm, 0 },
- { X86::CMOVNP64rr, X86::CMOVNP64rm, 0 },
- { X86::CMOVNS16rr, X86::CMOVNS16rm, 0 },
- { X86::CMOVNS32rr, X86::CMOVNS32rm, 0 },
- { X86::CMOVNS64rr, X86::CMOVNS64rm, 0 },
- { X86::CMOVO16rr, X86::CMOVO16rm, 0 },
- { X86::CMOVO32rr, X86::CMOVO32rm, 0 },
- { X86::CMOVO64rr, X86::CMOVO64rm, 0 },
- { X86::CMOVP16rr, X86::CMOVP16rm, 0 },
- { X86::CMOVP32rr, X86::CMOVP32rm, 0 },
- { X86::CMOVP64rr, X86::CMOVP64rm, 0 },
- { X86::CMOVS16rr, X86::CMOVS16rm, 0 },
- { X86::CMOVS32rr, X86::CMOVS32rm, 0 },
- { X86::CMOVS64rr, X86::CMOVS64rm, 0 },
+ { X86::CMOV16rr, X86::CMOV16rm, 0 },
+ { X86::CMOV32rr, X86::CMOV32rm, 0 },
+ { X86::CMOV64rr, X86::CMOV64rm, 0 },
{ X86::CMPPDrri, X86::CMPPDrmi, TB_ALIGN_16 },
{ X86::CMPPSrri, X86::CMPPSrmi, TB_ALIGN_16 },
{ X86::CMPSDrr, X86::CMPSDrm, 0 },
@@ -1421,6 +1365,7 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
{ X86::MMX_PUNPCKLWDirr, X86::MMX_PUNPCKLWDirm, TB_NO_REVERSE },
{ X86::MMX_PXORirr, X86::MMX_PXORirm, 0 },
{ X86::MOVLHPSrr, X86::MOVHPSrm, TB_NO_REVERSE },
+ { X86::MOVSDrr, X86::MOVLPDrm, TB_NO_REVERSE },
{ X86::MPSADBWrri, X86::MPSADBWrmi, TB_ALIGN_16 },
{ X86::MULPDrr, X86::MULPDrm, TB_ALIGN_16 },
{ X86::MULPSrr, X86::MULPSrm, TB_ALIGN_16 },
@@ -1576,7 +1521,6 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
{ X86::SUBSDrr_Int, X86::SUBSDrm_Int, TB_NO_REVERSE },
{ X86::SUBSSrr, X86::SUBSSrm, 0 },
{ X86::SUBSSrr_Int, X86::SUBSSrm_Int, TB_NO_REVERSE },
- // FIXME: TEST*rr -> swapped operand of TEST *mr.
{ X86::UNPCKHPDrr, X86::UNPCKHPDrm, TB_ALIGN_16 },
{ X86::UNPCKHPSrr, X86::UNPCKHPSrm, TB_ALIGN_16 },
{ X86::UNPCKLPDrr, X86::UNPCKLPDrm, TB_ALIGN_16 },
@@ -1697,6 +1641,12 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
{ X86::VCVTDQ2PSZ128rrkz, X86::VCVTDQ2PSZ128rmkz, 0 },
{ X86::VCVTDQ2PSZ256rrkz, X86::VCVTDQ2PSZ256rmkz, 0 },
{ X86::VCVTDQ2PSZrrkz, X86::VCVTDQ2PSZrmkz, 0 },
+ { X86::VCVTNE2PS2BF16Z128rr, X86::VCVTNE2PS2BF16Z128rm, 0 },
+ { X86::VCVTNE2PS2BF16Z256rr, X86::VCVTNE2PS2BF16Z256rm, 0 },
+ { X86::VCVTNE2PS2BF16Zrr, X86::VCVTNE2PS2BF16Zrm, 0 },
+ { X86::VCVTNEPS2BF16Z128rrkz, X86::VCVTNEPS2BF16Z128rmkz, 0 },
+ { X86::VCVTNEPS2BF16Z256rrkz, X86::VCVTNEPS2BF16Z256rmkz, 0 },
+ { X86::VCVTNEPS2BF16Zrrkz, X86::VCVTNEPS2BF16Zrmkz, 0 },
{ X86::VCVTPD2DQZ128rrkz, X86::VCVTPD2DQZ128rmkz, 0 },
{ X86::VCVTPD2DQZ256rrkz, X86::VCVTPD2DQZ256rmkz, 0 },
{ X86::VCVTPD2DQZrrkz, X86::VCVTPD2DQZrmkz, 0 },
@@ -2030,6 +1980,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
{ X86::VMOVDQU8Zrrkz, X86::VMOVDQU8Zrmkz, TB_NO_REVERSE },
{ X86::VMOVLHPSZrr, X86::VMOVHPSZ128rm, TB_NO_REVERSE },
{ X86::VMOVLHPSrr, X86::VMOVHPSrm, TB_NO_REVERSE },
+ { X86::VMOVSDZrr, X86::VMOVLPDZ128rm, TB_NO_REVERSE },
+ { X86::VMOVSDrr, X86::VMOVLPDrm, TB_NO_REVERSE },
{ X86::VMOVSHDUPZ128rrkz, X86::VMOVSHDUPZ128rmkz, 0 },
{ X86::VMOVSHDUPZ256rrkz, X86::VMOVSHDUPZ256rmkz, 0 },
{ X86::VMOVSHDUPZrrkz, X86::VMOVSHDUPZrmkz, 0 },
@@ -2072,6 +2024,12 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
{ X86::VORPSZ256rr, X86::VORPSZ256rm, 0 },
{ X86::VORPSZrr, X86::VORPSZrm, 0 },
{ X86::VORPSrr, X86::VORPSrm, 0 },
+ { X86::VP2INTERSECTDZ128rr, X86::VP2INTERSECTDZ128rm, 0 },
+ { X86::VP2INTERSECTDZ256rr, X86::VP2INTERSECTDZ256rm, 0 },
+ { X86::VP2INTERSECTDZrr, X86::VP2INTERSECTDZrm, 0 },
+ { X86::VP2INTERSECTQZ128rr, X86::VP2INTERSECTQZ128rm, 0 },
+ { X86::VP2INTERSECTQZ256rr, X86::VP2INTERSECTQZ256rm, 0 },
+ { X86::VP2INTERSECTQZrr, X86::VP2INTERSECTQZrm, 0 },
{ X86::VPABSBZ128rrkz, X86::VPABSBZ128rmkz, 0 },
{ X86::VPABSBZ256rrkz, X86::VPABSBZ256rmkz, 0 },
{ X86::VPABSBZrrkz, X86::VPABSBZrmkz, 0 },
@@ -3074,6 +3032,12 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VCVTDQ2PSZ128rrk, X86::VCVTDQ2PSZ128rmk, 0 },
{ X86::VCVTDQ2PSZ256rrk, X86::VCVTDQ2PSZ256rmk, 0 },
{ X86::VCVTDQ2PSZrrk, X86::VCVTDQ2PSZrmk, 0 },
+ { X86::VCVTNE2PS2BF16Z128rrkz, X86::VCVTNE2PS2BF16Z128rmkz, 0 },
+ { X86::VCVTNE2PS2BF16Z256rrkz, X86::VCVTNE2PS2BF16Z256rmkz, 0 },
+ { X86::VCVTNE2PS2BF16Zrrkz, X86::VCVTNE2PS2BF16Zrmkz, 0 },
+ { X86::VCVTNEPS2BF16Z128rrk, X86::VCVTNEPS2BF16Z128rmk, 0 },
+ { X86::VCVTNEPS2BF16Z256rrk, X86::VCVTNEPS2BF16Z256rmk, 0 },
+ { X86::VCVTNEPS2BF16Zrrk, X86::VCVTNEPS2BF16Zrmk, 0 },
{ X86::VCVTPD2DQZ128rrk, X86::VCVTPD2DQZ128rmk, 0 },
{ X86::VCVTPD2DQZ256rrk, X86::VCVTPD2DQZ256rmk, 0 },
{ X86::VCVTPD2DQZrrk, X86::VCVTPD2DQZrmk, 0 },
@@ -3162,6 +3126,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VDIVPSZrrkz, X86::VDIVPSZrmkz, 0 },
{ X86::VDIVSDZrr_Intkz, X86::VDIVSDZrm_Intkz, TB_NO_REVERSE },
{ X86::VDIVSSZrr_Intkz, X86::VDIVSSZrm_Intkz, TB_NO_REVERSE },
+ { X86::VDPBF16PSZ128r, X86::VDPBF16PSZ128m, 0 },
+ { X86::VDPBF16PSZ256r, X86::VDPBF16PSZ256m, 0 },
+ { X86::VDPBF16PSZr, X86::VDPBF16PSZm, 0 },
{ X86::VEXP2PDZrk, X86::VEXP2PDZmk, 0 },
{ X86::VEXP2PSZrk, X86::VEXP2PSZmk, 0 },
{ X86::VEXPANDPDZ128rrk, X86::VEXPANDPDZ128rmk, TB_NO_REVERSE },
@@ -4376,6 +4343,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VANDPSZ128rrk, X86::VANDPSZ128rmk, 0 },
{ X86::VANDPSZ256rrk, X86::VANDPSZ256rmk, 0 },
{ X86::VANDPSZrrk, X86::VANDPSZrmk, 0 },
+ { X86::VCVTNE2PS2BF16Z128rrk, X86::VCVTNE2PS2BF16Z128rmk, 0 },
+ { X86::VCVTNE2PS2BF16Z256rrk, X86::VCVTNE2PS2BF16Z256rmk, 0 },
+ { X86::VCVTNE2PS2BF16Zrrk, X86::VCVTNE2PS2BF16Zrmk, 0 },
{ X86::VCVTSD2SSZrr_Intk, X86::VCVTSD2SSZrm_Intk, TB_NO_REVERSE },
{ X86::VCVTSS2SDZrr_Intk, X86::VCVTSS2SDZrm_Intk, TB_NO_REVERSE },
{ X86::VDBPSADBWZ128rrik, X86::VDBPSADBWZ128rmik, 0 },
@@ -4389,6 +4359,12 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VDIVPSZrrk, X86::VDIVPSZrmk, 0 },
{ X86::VDIVSDZrr_Intk, X86::VDIVSDZrm_Intk, TB_NO_REVERSE },
{ X86::VDIVSSZrr_Intk, X86::VDIVSSZrm_Intk, TB_NO_REVERSE },
+ { X86::VDPBF16PSZ128rk, X86::VDPBF16PSZ128mk, 0 },
+ { X86::VDPBF16PSZ128rkz, X86::VDPBF16PSZ128mkz, 0 },
+ { X86::VDPBF16PSZ256rk, X86::VDPBF16PSZ256mk, 0 },
+ { X86::VDPBF16PSZ256rkz, X86::VDPBF16PSZ256mkz, 0 },
+ { X86::VDPBF16PSZrk, X86::VDPBF16PSZmk, 0 },
+ { X86::VDPBF16PSZrkz, X86::VDPBF16PSZmkz, 0 },
{ X86::VFIXUPIMMPDZ128rrik, X86::VFIXUPIMMPDZ128rmik, 0 },
{ X86::VFIXUPIMMPDZ128rrikz, X86::VFIXUPIMMPDZ128rmikz, 0 },
{ X86::VFIXUPIMMPDZ256rrik, X86::VFIXUPIMMPDZ256rmik, 0 },
@@ -5315,9 +5291,7 @@ lookupFoldTableImpl(ArrayRef<X86MemoryFoldTableEntry> Table, unsigned RegOp) {
}
#endif
- const X86MemoryFoldTableEntry *Data = std::lower_bound(Table.begin(),
- Table.end(),
- RegOp);
+ const X86MemoryFoldTableEntry *Data = llvm::lower_bound(Table, RegOp);
if (Data != Table.end() && Data->KeyOp == RegOp &&
!(Data->Flags & TB_NO_FORWARD))
return Data;
@@ -5404,7 +5378,7 @@ static ManagedStatic<X86MemUnfoldTable> MemUnfoldTable;
const X86MemoryFoldTableEntry *
llvm::lookupUnfoldTable(unsigned MemOp) {
auto &Table = MemUnfoldTable->Table;
- auto I = std::lower_bound(Table.begin(), Table.end(), MemOp);
+ auto I = llvm::lower_bound(Table, MemOp);
if (I != Table.end() && I->KeyOp == MemOp)
return &*I;
return nullptr;
diff --git a/lib/Target/X86/X86InstrFoldTables.h b/lib/Target/X86/X86InstrFoldTables.h
index 90016baead96..419baf98f61d 100644
--- a/lib/Target/X86/X86InstrFoldTables.h
+++ b/lib/Target/X86/X86InstrFoldTables.h
@@ -1,9 +1,8 @@
//===-- X86InstrFoldTables.h - X86 Instruction Folding Tables ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td
index 47d4719d3060..e8f0d937dff4 100644
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@@ -1,9 +1,8 @@
//===-- X86InstrFormats.td - X86 Instruction Formats -------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -27,10 +26,13 @@ def RawFrmDst : Format<5>;
def RawFrmDstSrc : Format<6>;
def RawFrmImm8 : Format<7>;
def RawFrmImm16 : Format<8>;
+def AddCCFrm : Format<9>;
def MRMDestMem : Format<32>;
def MRMSrcMem : Format<33>;
def MRMSrcMem4VOp3 : Format<34>;
def MRMSrcMemOp4 : Format<35>;
+def MRMSrcMemCC : Format<36>;
+def MRMXmCC: Format<38>;
def MRMXm : Format<39>;
def MRM0m : Format<40>; def MRM1m : Format<41>; def MRM2m : Format<42>;
def MRM3m : Format<43>; def MRM4m : Format<44>; def MRM5m : Format<45>;
@@ -39,6 +41,8 @@ def MRMDestReg : Format<48>;
def MRMSrcReg : Format<49>;
def MRMSrcReg4VOp3 : Format<50>;
def MRMSrcRegOp4 : Format<51>;
+def MRMSrcRegCC : Format<52>;
+def MRMXrCC: Format<54>;
def MRMXr : Format<55>;
def MRM0r : Format<56>; def MRM1r : Format<57>; def MRM2r : Format<58>;
def MRM3r : Format<59>; def MRM4r : Format<60>; def MRM5r : Format<61>;
@@ -206,13 +210,10 @@ class TAPS : TA { Prefix OpPrefix = PS; }
class TAPD : TA { Prefix OpPrefix = PD; }
class TAXD : TA { Prefix OpPrefix = XD; }
class VEX { Encoding OpEnc = EncVEX; }
-class VEX_W { bits<2> VEX_WPrefix = 1; }
-class VEX_WIG { bits<2> VEX_WPrefix = 2; }
+class VEX_W { bit HasVEX_W = 1; }
+class VEX_WIG { bit IgnoresVEX_W = 1; }
// Special version of VEX_W that can be changed to VEX.W==0 for EVEX2VEX.
-// FIXME: We should consider adding separate bits for VEX_WIG and the extra
-// part of W1X. This would probably simplify the tablegen emitters and
-// the TSFlags creation below.
-class VEX_W1X { bits<2> VEX_WPrefix = 3; }
+class VEX_W1X { bit HasVEX_W = 1; bit EVEX_W1_VEX_W0 = 1; }
class VEX_4V : VEX { bit hasVEX_4V = 1; }
class VEX_L { bit hasVEX_L = 1; }
class VEX_LIG { bit ignoresVEX_L = 1; }
@@ -296,7 +297,10 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
bit hasREPPrefix = 0; // Does this inst have a REP prefix?
Encoding OpEnc = EncNormal; // Encoding used by this instruction
bits<2> OpEncBits = OpEnc.Value;
- bits<2> VEX_WPrefix = 0; // Does this inst set the VEX_W field?
+ bit HasVEX_W = 0; // Does this inst set the VEX_W field?
+ bit IgnoresVEX_W = 0; // Does this inst ignore VEX_W field?
+ bit EVEX_W1_VEX_W0 = 0; // This EVEX inst with VEX.W==1 can become a VEX
+ // instruction with VEX.W == 0.
bit hasVEX_4V = 0; // Does this inst require the VEX.VVVV field?
bit hasVEX_L = 0; // Does this inst use large (256-bit) registers?
bit ignoresVEX_L = 0; // Does this instruction ignore the L-bit
@@ -311,11 +315,8 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
bit hasEVEX_RC = 0; // Explicitly specified rounding control in FP instruction.
bit hasNoTrackPrefix = 0; // Does this inst has 0x3E (NoTrack) prefix?
- bits<2> EVEX_LL;
- let EVEX_LL{0} = hasVEX_L;
- let EVEX_LL{1} = hasEVEX_L2;
// Vector size in bytes.
- bits<7> VectSize = !shl(16, EVEX_LL);
+ bits<7> VectSize = !if(hasEVEX_L2, 64, !if(hasVEX_L, 32, 16));
// The scaling factor for AVX512's compressed displacement is either
// - the size of a power-of-two number of elements or
@@ -355,7 +356,7 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
let TSFlags{29-28} = OpEncBits;
let TSFlags{37-30} = Opcode;
// Currently no need for second bit in TSFlags - W Ignore is equivalent to 0.
- let TSFlags{38} = VEX_WPrefix{0};
+ let TSFlags{38} = HasVEX_W;
let TSFlags{39} = hasVEX_4V;
let TSFlags{40} = hasVEX_L;
let TSFlags{41} = hasEVEX_K;
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index 11a27ba90586..096cc27861ca 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -1,9 +1,8 @@
//===-- X86InstrFragmentsSIMD.td - x86 SIMD ISA ------------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -100,8 +99,10 @@ def X86insertps : SDNode<"X86ISD::INSERTPS",
def X86vzmovl : SDNode<"X86ISD::VZEXT_MOVL",
SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>;
-def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
- [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def X86vzld : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def X86vextractst : SDNode<"X86ISD::VEXTRACT_STORE", SDTStore,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def SDTVtrunc : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
SDTCisInt<0>, SDTCisInt<1>,
@@ -127,21 +128,31 @@ def X86vfpext : SDNode<"X86ISD::VFPEXT",
def X86vfpround: SDNode<"X86ISD::VFPROUND",
SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f32>,
SDTCVecEltisVT<1, f64>,
- SDTCisSameSizeAs<0, 1>]>>;
+ SDTCisOpSmallerThanOp<0, 1>]>>;
-def X86froundRnd: SDNode<"X86ISD::VFPROUNDS_RND",
+def X86frounds : SDNode<"X86ISD::VFPROUNDS",
+ SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f32>,
+ SDTCisSameAs<0, 1>,
+ SDTCVecEltisVT<2, f64>,
+ SDTCisSameSizeAs<0, 2>]>>;
+
+def X86froundsRnd: SDNode<"X86ISD::VFPROUNDS_RND",
SDTypeProfile<1, 3, [SDTCVecEltisVT<0, f32>,
SDTCisSameAs<0, 1>,
SDTCVecEltisVT<2, f64>,
SDTCisSameSizeAs<0, 2>,
SDTCisVT<3, i32>]>>;
-def X86fpextRnd : SDNode<"X86ISD::VFPEXTS_RND",
- SDTypeProfile<1, 3, [SDTCVecEltisVT<0, f64>,
+def X86fpexts : SDNode<"X86ISD::VFPEXTS",
+ SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f64>,
SDTCisSameAs<0, 1>,
SDTCVecEltisVT<2, f32>,
- SDTCisSameSizeAs<0, 2>,
- SDTCisVT<3, i32>]>>;
+ SDTCisSameSizeAs<0, 2>]>>;
+def X86fpextsSAE : SDNode<"X86ISD::VFPEXTS_SAE",
+ SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f64>,
+ SDTCisSameAs<0, 1>,
+ SDTCVecEltisVT<2, f32>,
+ SDTCisSameSizeAs<0, 2>]>>;
def X86vmfpround: SDNode<"X86ISD::VMFPROUND",
SDTypeProfile<1, 3, [SDTCVecEltisVT<0, f32>,
@@ -164,25 +175,14 @@ def X86CmpMaskCC :
SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCVecEltisVT<0, i1>,
SDTCisVec<1>, SDTCisSameAs<2, 1>,
SDTCisSameNumEltsAs<0, 1>, SDTCisVT<3, i8>]>;
-def X86CmpMaskCCRound :
- SDTypeProfile<1, 4, [SDTCisVec<0>,SDTCVecEltisVT<0, i1>,
- SDTCisVec<1>, SDTCisFP<1>, SDTCisSameAs<2, 1>,
- SDTCisSameNumEltsAs<0, 1>, SDTCisVT<3, i8>,
- SDTCisVT<4, i32>]>;
def X86CmpMaskCCScalar :
SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisFP<1>, SDTCisSameAs<1, 2>,
SDTCisVT<3, i8>]>;
-def X86CmpMaskCCScalarRound :
- SDTypeProfile<1, 4, [SDTCisInt<0>, SDTCisFP<1>, SDTCisSameAs<1, 2>,
- SDTCisVT<3, i8>, SDTCisVT<4, i32>]>;
-
def X86cmpm : SDNode<"X86ISD::CMPM", X86CmpMaskCC>;
-// Hack to make CMPM commutable in tablegen patterns for load folding.
-def X86cmpm_c : SDNode<"X86ISD::CMPM", X86CmpMaskCC, [SDNPCommutative]>;
-def X86cmpmRnd : SDNode<"X86ISD::CMPM_RND", X86CmpMaskCCRound>;
+def X86cmpmSAE : SDNode<"X86ISD::CMPM_SAE", X86CmpMaskCC>;
def X86cmpms : SDNode<"X86ISD::FSETCCM", X86CmpMaskCCScalar>;
-def X86cmpmsRnd : SDNode<"X86ISD::FSETCCM_RND", X86CmpMaskCCScalarRound>;
+def X86cmpmsSAE : SDNode<"X86ISD::FSETCCM_SAE", X86CmpMaskCCScalar>;
def X86phminpos: SDNode<"X86ISD::PHMINPOS",
SDTypeProfile<1, 1, [SDTCisVT<0, v8i16>, SDTCisVT<1, v8i16>]>>;
@@ -198,6 +198,8 @@ def X86vsra : SDNode<"X86ISD::VSRA", X86vshiftuniform>;
def X86vshiftvariable : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>, SDTCisInt<0>]>;
+def X86vshlv : SDNode<"X86ISD::VSHLV", X86vshiftvariable>;
+def X86vsrlv : SDNode<"X86ISD::VSRLV", X86vshiftvariable>;
def X86vsrav : SDNode<"X86ISD::VSRAV", X86vshiftvariable>;
def X86vshli : SDNode<"X86ISD::VSHLI", X86vshiftimm>;
@@ -299,25 +301,15 @@ def SDTFPBinOpImm: SDTypeProfile<1, 3, [SDTCisFP<0>, SDTCisVec<0>,
SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>,
SDTCisVT<3, i32>]>;
-def SDTFPBinOpImmRound: SDTypeProfile<1, 4, [SDTCisFP<0>, SDTCisVec<0>,
- SDTCisSameAs<0,1>,
- SDTCisSameAs<0,2>,
- SDTCisVT<3, i32>,
- SDTCisVT<4, i32>]>;
-def SDTFPTernaryOpImmRound: SDTypeProfile<1, 5, [SDTCisFP<0>, SDTCisSameAs<0,1>,
- SDTCisSameAs<0,2>,
- SDTCisInt<3>,
- SDTCisSameSizeAs<0, 3>,
- SDTCisSameNumEltsAs<0, 3>,
- SDTCisVT<4, i32>,
- SDTCisVT<5, i32>]>;
-def SDTFPUnaryOpImm: SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisVec<0>,
+def SDTFPTernaryOpImm: SDTypeProfile<1, 4, [SDTCisFP<0>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>,
+ SDTCisInt<3>,
+ SDTCisSameSizeAs<0, 3>,
+ SDTCisSameNumEltsAs<0, 3>,
+ SDTCisVT<4, i32>]>;
+def SDTFPUnaryOpImm: SDTypeProfile<1, 2, [SDTCisFP<0>,
SDTCisSameAs<0,1>,
SDTCisVT<2, i32>]>;
-def SDTFPUnaryOpImmRound: SDTypeProfile<1, 3, [SDTCisFP<0>, SDTCisVec<0>,
- SDTCisSameAs<0,1>,
- SDTCisVT<2, i32>,
- SDTCisVT<3, i32>]>;
def SDTVBroadcast : SDTypeProfile<1, 1, [SDTCisVec<0>]>;
def SDTVBroadcastm : SDTypeProfile<1, 1, [SDTCisVec<0>,
@@ -373,11 +365,23 @@ def X86Movddup : SDNode<"X86ISD::MOVDDUP", SDTShuff1Op>;
def X86Movshdup : SDNode<"X86ISD::MOVSHDUP", SDTShuff1Op>;
def X86Movsldup : SDNode<"X86ISD::MOVSLDUP", SDTShuff1Op>;
-def X86Movsd : SDNode<"X86ISD::MOVSD", SDTShuff2OpFP>;
-def X86Movss : SDNode<"X86ISD::MOVSS", SDTShuff2OpFP>;
-
-def X86Movlhps : SDNode<"X86ISD::MOVLHPS", SDTShuff2OpFP>;
-def X86Movhlps : SDNode<"X86ISD::MOVHLPS", SDTShuff2OpFP>;
+def X86Movsd : SDNode<"X86ISD::MOVSD",
+ SDTypeProfile<1, 2, [SDTCisVT<0, v2f64>,
+ SDTCisVT<1, v2f64>,
+ SDTCisVT<2, v2f64>]>>;
+def X86Movss : SDNode<"X86ISD::MOVSS",
+ SDTypeProfile<1, 2, [SDTCisVT<0, v4f32>,
+ SDTCisVT<1, v4f32>,
+ SDTCisVT<2, v4f32>]>>;
+
+def X86Movlhps : SDNode<"X86ISD::MOVLHPS",
+ SDTypeProfile<1, 2, [SDTCisVT<0, v4f32>,
+ SDTCisVT<1, v4f32>,
+ SDTCisVT<2, v4f32>]>>;
+def X86Movhlps : SDNode<"X86ISD::MOVHLPS",
+ SDTypeProfile<1, 2, [SDTCisVT<0, v4f32>,
+ SDTCisVT<1, v4f32>,
+ SDTCisVT<2, v4f32>]>>;
def SDTPack : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<0>,
SDTCisVec<1>, SDTCisInt<1>,
@@ -421,16 +425,18 @@ def X86vpternlog : SDNode<"X86ISD::VPTERNLOG", SDTTernlog>;
def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>;
-def X86VFixupimm : SDNode<"X86ISD::VFIXUPIMM", SDTFPTernaryOpImmRound>;
-def X86VFixupimmScalar : SDNode<"X86ISD::VFIXUPIMMS", SDTFPTernaryOpImmRound>;
+def X86VFixupimm : SDNode<"X86ISD::VFIXUPIMM", SDTFPTernaryOpImm>;
+def X86VFixupimmSAE : SDNode<"X86ISD::VFIXUPIMM_SAE", SDTFPTernaryOpImm>;
+def X86VFixupimms : SDNode<"X86ISD::VFIXUPIMMS", SDTFPTernaryOpImm>;
+def X86VFixupimmSAEs : SDNode<"X86ISD::VFIXUPIMMS_SAE", SDTFPTernaryOpImm>;
def X86VRange : SDNode<"X86ISD::VRANGE", SDTFPBinOpImm>;
-def X86VRangeRnd : SDNode<"X86ISD::VRANGE_RND", SDTFPBinOpImmRound>;
+def X86VRangeSAE : SDNode<"X86ISD::VRANGE_SAE", SDTFPBinOpImm>;
def X86VReduce : SDNode<"X86ISD::VREDUCE", SDTFPUnaryOpImm>;
-def X86VReduceRnd : SDNode<"X86ISD::VREDUCE_RND", SDTFPUnaryOpImmRound>;
+def X86VReduceSAE : SDNode<"X86ISD::VREDUCE_SAE", SDTFPUnaryOpImm>;
def X86VRndScale : SDNode<"X86ISD::VRNDSCALE", SDTFPUnaryOpImm>;
-def X86VRndScaleRnd: SDNode<"X86ISD::VRNDSCALE_RND", SDTFPUnaryOpImmRound>;
+def X86VRndScaleSAE: SDNode<"X86ISD::VRNDSCALE_SAE", SDTFPUnaryOpImm>;
def X86VGetMant : SDNode<"X86ISD::VGETMANT", SDTFPUnaryOpImm>;
-def X86VGetMantRnd : SDNode<"X86ISD::VGETMANT_RND", SDTFPUnaryOpImmRound>;
+def X86VGetMantSAE : SDNode<"X86ISD::VGETMANT_SAE", SDTFPUnaryOpImm>;
def X86Vfpclass : SDNode<"X86ISD::VFPCLASS",
SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i1>,
SDTCisFP<1>,
@@ -448,27 +454,42 @@ def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>;
def X86VBroadcastm : SDNode<"X86ISD::VBROADCASTM", SDTVBroadcastm>;
def X86Blendi : SDNode<"X86ISD::BLENDI", SDTBlend>;
+def X86Blendv : SDNode<"X86ISD::BLENDV",
+ SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisInt<1>,
+ SDTCisSameAs<0, 2>,
+ SDTCisSameAs<2, 3>,
+ SDTCisSameNumEltsAs<0, 1>,
+ SDTCisSameSizeAs<0, 1>]>>;
def X86Addsub : SDNode<"X86ISD::ADDSUB", SDTFPBinOp>;
def X86faddRnd : SDNode<"X86ISD::FADD_RND", SDTFPBinOpRound>;
+def X86fadds : SDNode<"X86ISD::FADDS", SDTFPBinOp>;
def X86faddRnds : SDNode<"X86ISD::FADDS_RND", SDTFPBinOpRound>;
def X86fsubRnd : SDNode<"X86ISD::FSUB_RND", SDTFPBinOpRound>;
+def X86fsubs : SDNode<"X86ISD::FSUBS", SDTFPBinOp>;
def X86fsubRnds : SDNode<"X86ISD::FSUBS_RND", SDTFPBinOpRound>;
def X86fmulRnd : SDNode<"X86ISD::FMUL_RND", SDTFPBinOpRound>;
+def X86fmuls : SDNode<"X86ISD::FMULS", SDTFPBinOp>;
def X86fmulRnds : SDNode<"X86ISD::FMULS_RND", SDTFPBinOpRound>;
def X86fdivRnd : SDNode<"X86ISD::FDIV_RND", SDTFPBinOpRound>;
+def X86fdivs : SDNode<"X86ISD::FDIVS", SDTFPBinOp>;
def X86fdivRnds : SDNode<"X86ISD::FDIVS_RND", SDTFPBinOpRound>;
-def X86fmaxRnd : SDNode<"X86ISD::FMAX_RND", SDTFPBinOpRound>;
-def X86fmaxRnds : SDNode<"X86ISD::FMAXS_RND", SDTFPBinOpRound>;
-def X86fminRnd : SDNode<"X86ISD::FMIN_RND", SDTFPBinOpRound>;
-def X86fminRnds : SDNode<"X86ISD::FMINS_RND", SDTFPBinOpRound>;
-def X86scalef : SDNode<"X86ISD::SCALEF", SDTFPBinOpRound>;
-def X86scalefs : SDNode<"X86ISD::SCALEFS", SDTFPBinOpRound>;
+def X86fmaxSAE : SDNode<"X86ISD::FMAX_SAE", SDTFPBinOp>;
+def X86fmaxSAEs : SDNode<"X86ISD::FMAXS_SAE", SDTFPBinOp>;
+def X86fminSAE : SDNode<"X86ISD::FMIN_SAE", SDTFPBinOp>;
+def X86fminSAEs : SDNode<"X86ISD::FMINS_SAE", SDTFPBinOp>;
+def X86scalef : SDNode<"X86ISD::SCALEF", SDTFPBinOp>;
+def X86scalefRnd : SDNode<"X86ISD::SCALEF_RND", SDTFPBinOpRound>;
+def X86scalefs : SDNode<"X86ISD::SCALEFS", SDTFPBinOp>;
+def X86scalefsRnd: SDNode<"X86ISD::SCALEFS_RND", SDTFPBinOpRound>;
def X86fsqrtRnd : SDNode<"X86ISD::FSQRT_RND", SDTFPUnaryOpRound>;
+def X86fsqrts : SDNode<"X86ISD::FSQRTS", SDTFPBinOp>;
def X86fsqrtRnds : SDNode<"X86ISD::FSQRTS_RND", SDTFPBinOpRound>;
-def X86fgetexpRnd : SDNode<"X86ISD::FGETEXP_RND", SDTFPUnaryOpRound>;
-def X86fgetexpRnds : SDNode<"X86ISD::FGETEXPS_RND", SDTFPBinOpRound>;
+def X86fgetexp : SDNode<"X86ISD::FGETEXP", SDTFPUnaryOp>;
+def X86fgetexpSAE : SDNode<"X86ISD::FGETEXP_SAE", SDTFPUnaryOp>;
+def X86fgetexps : SDNode<"X86ISD::FGETEXPS", SDTFPBinOp>;
+def X86fgetexpSAEs : SDNode<"X86ISD::FGETEXPS_SAE", SDTFPBinOp>;
def X86Fmadd : SDNode<"ISD::FMA", SDTFPTernaryOp, [SDNPCommutative]>;
def X86Fnmadd : SDNode<"X86ISD::FNMADD", SDTFPTernaryOp, [SDNPCommutative]>;
@@ -484,6 +505,10 @@ def X86FnmsubRnd : SDNode<"X86ISD::FNMSUB_RND", SDTFmaRound, [SDNPCommutat
def X86FmaddsubRnd : SDNode<"X86ISD::FMADDSUB_RND", SDTFmaRound, [SDNPCommutative]>;
def X86FmsubaddRnd : SDNode<"X86ISD::FMSUBADD_RND", SDTFmaRound, [SDNPCommutative]>;
+def X86vp2intersect : SDNode<"X86ISD::VP2INTERSECT",
+ SDTypeProfile<1, 2, [SDTCisVT<0, untyped>,
+ SDTCisVec<1>, SDTCisSameAs<1, 2>]>>;
+
def SDTIFma : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<1,2>, SDTCisSameAs<1,3>]>;
def x86vpmadd52l : SDNode<"X86ISD::VPMADD52L", SDTIFma, [SDNPCommutative]>;
@@ -500,27 +525,36 @@ def X86Vpdpbusds : SDNode<"X86ISD::VPDPBUSDS", SDTVnni>;
def X86Vpdpwssd : SDNode<"X86ISD::VPDPWSSD", SDTVnni>;
def X86Vpdpwssds : SDNode<"X86ISD::VPDPWSSDS", SDTVnni>;
-def X86rsqrt28 : SDNode<"X86ISD::RSQRT28", SDTFPUnaryOpRound>;
-def X86rcp28 : SDNode<"X86ISD::RCP28", SDTFPUnaryOpRound>;
-def X86exp2 : SDNode<"X86ISD::EXP2", SDTFPUnaryOpRound>;
+def X86rsqrt28 : SDNode<"X86ISD::RSQRT28", SDTFPUnaryOp>;
+def X86rsqrt28SAE: SDNode<"X86ISD::RSQRT28_SAE", SDTFPUnaryOp>;
+def X86rcp28 : SDNode<"X86ISD::RCP28", SDTFPUnaryOp>;
+def X86rcp28SAE : SDNode<"X86ISD::RCP28_SAE", SDTFPUnaryOp>;
+def X86exp2 : SDNode<"X86ISD::EXP2", SDTFPUnaryOp>;
+def X86exp2SAE : SDNode<"X86ISD::EXP2_SAE", SDTFPUnaryOp>;
def X86rsqrt14s : SDNode<"X86ISD::RSQRT14S", SDTFPBinOp>;
def X86rcp14s : SDNode<"X86ISD::RCP14S", SDTFPBinOp>;
-def X86rsqrt28s : SDNode<"X86ISD::RSQRT28S", SDTFPBinOpRound>;
-def X86rcp28s : SDNode<"X86ISD::RCP28S", SDTFPBinOpRound>;
+def X86rsqrt28s : SDNode<"X86ISD::RSQRT28S", SDTFPBinOp>;
+def X86rsqrt28SAEs : SDNode<"X86ISD::RSQRT28S_SAE", SDTFPBinOp>;
+def X86rcp28s : SDNode<"X86ISD::RCP28S", SDTFPBinOp>;
+def X86rcp28SAEs : SDNode<"X86ISD::RCP28S_SAE", SDTFPBinOp>;
def X86Ranges : SDNode<"X86ISD::VRANGES", SDTFPBinOpImm>;
def X86RndScales : SDNode<"X86ISD::VRNDSCALES", SDTFPBinOpImm>;
def X86Reduces : SDNode<"X86ISD::VREDUCES", SDTFPBinOpImm>;
def X86GetMants : SDNode<"X86ISD::VGETMANTS", SDTFPBinOpImm>;
-def X86RangesRnd : SDNode<"X86ISD::VRANGES_RND", SDTFPBinOpImmRound>;
-def X86RndScalesRnd : SDNode<"X86ISD::VRNDSCALES_RND", SDTFPBinOpImmRound>;
-def X86ReducesRnd : SDNode<"X86ISD::VREDUCES_RND", SDTFPBinOpImmRound>;
-def X86GetMantsRnd : SDNode<"X86ISD::VGETMANTS_RND", SDTFPBinOpImmRound>;
-
-def X86compress: SDNode<"X86ISD::COMPRESS", SDTypeProfile<1, 1,
- [SDTCisSameAs<0, 1>, SDTCisVec<1>]>, []>;
-def X86expand : SDNode<"X86ISD::EXPAND", SDTypeProfile<1, 1,
- [SDTCisSameAs<0, 1>, SDTCisVec<1>]>, []>;
+def X86RangesSAE : SDNode<"X86ISD::VRANGES_SAE", SDTFPBinOpImm>;
+def X86RndScalesSAE : SDNode<"X86ISD::VRNDSCALES_SAE", SDTFPBinOpImm>;
+def X86ReducesSAE : SDNode<"X86ISD::VREDUCES_SAE", SDTFPBinOpImm>;
+def X86GetMantsSAE : SDNode<"X86ISD::VGETMANTS_SAE", SDTFPBinOpImm>;
+
+def X86compress: SDNode<"X86ISD::COMPRESS", SDTypeProfile<1, 3,
+ [SDTCisSameAs<0, 1>, SDTCisVec<1>,
+ SDTCisSameAs<0, 2>, SDTCVecEltisVT<3, i1>,
+ SDTCisSameNumEltsAs<0, 3>]>, []>;
+def X86expand : SDNode<"X86ISD::EXPAND", SDTypeProfile<1, 3,
+ [SDTCisSameAs<0, 1>, SDTCisVec<1>,
+ SDTCisSameAs<0, 2>, SDTCVecEltisVT<3, i1>,
+ SDTCisSameNumEltsAs<0, 3>]>, []>;
// vpshufbitqmb
def X86Vpshufbitqmb : SDNode<"X86ISD::VPSHUFBITQMB",
@@ -529,6 +563,8 @@ def X86Vpshufbitqmb : SDNode<"X86ISD::VPSHUFBITQMB",
SDTCVecEltisVT<0,i1>,
SDTCisSameNumEltsAs<0,1>]>>;
+def SDTintToFP: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisFP<0>,
+ SDTCisSameAs<0,1>, SDTCisInt<2>]>;
def SDTintToFPRound: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisFP<0>,
SDTCisSameAs<0,1>, SDTCisInt<2>,
SDTCisVT<3, i32>]>;
@@ -550,13 +586,15 @@ def SDTVintToFPRound: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
SDTCisVT<2, i32>]>;
// Scalar
+def X86SintToFp : SDNode<"X86ISD::SCALAR_SINT_TO_FP", SDTintToFP>;
def X86SintToFpRnd : SDNode<"X86ISD::SCALAR_SINT_TO_FP_RND", SDTintToFPRound>;
+def X86UintToFp : SDNode<"X86ISD::SCALAR_UINT_TO_FP", SDTintToFP>;
def X86UintToFpRnd : SDNode<"X86ISD::SCALAR_UINT_TO_FP_RND", SDTintToFPRound>;
def X86cvtts2Int : SDNode<"X86ISD::CVTTS2SI", SDTSFloatToInt>;
def X86cvtts2UInt : SDNode<"X86ISD::CVTTS2UI", SDTSFloatToInt>;
-def X86cvtts2IntRnd : SDNode<"X86ISD::CVTTS2SI_RND", SDTSFloatToIntRnd>;
-def X86cvtts2UIntRnd : SDNode<"X86ISD::CVTTS2UI_RND", SDTSFloatToIntRnd>;
+def X86cvtts2IntSAE : SDNode<"X86ISD::CVTTS2SI_SAE", SDTSFloatToInt>;
+def X86cvtts2UIntSAE : SDNode<"X86ISD::CVTTS2UI_SAE", SDTSFloatToInt>;
def X86cvts2si : SDNode<"X86ISD::CVTS2SI", SDTSFloatToInt>;
def X86cvts2usi : SDNode<"X86ISD::CVTS2UI", SDTSFloatToInt>;
@@ -566,8 +604,8 @@ def X86cvts2usiRnd : SDNode<"X86ISD::CVTS2UI_RND", SDTSFloatToIntRnd>;
// Vector with rounding mode
// cvtt fp-to-int staff
-def X86cvttp2siRnd : SDNode<"X86ISD::CVTTP2SI_RND", SDTFloatToIntRnd>;
-def X86cvttp2uiRnd : SDNode<"X86ISD::CVTTP2UI_RND", SDTFloatToIntRnd>;
+def X86cvttp2siSAE : SDNode<"X86ISD::CVTTP2SI_SAE", SDTFloatToInt>;
+def X86cvttp2uiSAE : SDNode<"X86ISD::CVTTP2UI_SAE", SDTFloatToInt>;
def X86VSintToFpRnd : SDNode<"X86ISD::SINT_TO_FP_RND", SDTVintToFPRound>;
def X86VUintToFpRnd : SDNode<"X86ISD::UINT_TO_FP_RND", SDTVintToFPRound>;
@@ -590,6 +628,13 @@ def X86cvtp2Int : SDNode<"X86ISD::CVTP2SI", SDTFloatToInt>;
def X86cvtp2UInt : SDNode<"X86ISD::CVTP2UI", SDTFloatToInt>;
+// Masked versions of above
+def SDTMVintToFP: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTCisFP<0>, SDTCisInt<1>,
+ SDTCisSameSizeAs<0, 1>,
+ SDTCisSameAs<0, 2>,
+ SDTCVecEltisVT<3, i1>,
+ SDTCisSameNumEltsAs<1, 3>]>;
def SDTMFloatToInt: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisVec<1>,
SDTCisInt<0>, SDTCisFP<1>,
SDTCisSameSizeAs<0, 1>,
@@ -597,6 +642,9 @@ def SDTMFloatToInt: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisVec<1>,
SDTCVecEltisVT<3, i1>,
SDTCisSameNumEltsAs<1, 3>]>;
+def X86VMSintToFP : SDNode<"X86ISD::MCVTSI2P", SDTMVintToFP>;
+def X86VMUintToFP : SDNode<"X86ISD::MCVTUI2P", SDTMVintToFP>;
+
def X86mcvtp2Int : SDNode<"X86ISD::MCVTP2SI", SDTMFloatToInt>;
def X86mcvtp2UInt : SDNode<"X86ISD::MCVTP2UI", SDTMFloatToInt>;
def X86mcvttp2si : SDNode<"X86ISD::MCVTTP2SI", SDTMFloatToInt>;
@@ -607,10 +655,9 @@ def X86cvtph2ps : SDNode<"X86ISD::CVTPH2PS",
SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f32>,
SDTCVecEltisVT<1, i16>]> >;
-def X86cvtph2psRnd : SDNode<"X86ISD::CVTPH2PS_RND",
- SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f32>,
- SDTCVecEltisVT<1, i16>,
- SDTCisVT<2, i32>]> >;
+def X86cvtph2psSAE : SDNode<"X86ISD::CVTPH2PS_SAE",
+ SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f32>,
+ SDTCVecEltisVT<1, i16>]> >;
def X86cvtps2ph : SDNode<"X86ISD::CVTPS2PH",
SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i16>,
@@ -623,17 +670,35 @@ def X86mcvtps2ph : SDNode<"X86ISD::MCVTPS2PH",
SDTCisSameAs<0, 3>,
SDTCVecEltisVT<4, i1>,
SDTCisSameNumEltsAs<1, 4>]> >;
-def X86vfpextRnd : SDNode<"X86ISD::VFPEXT_RND",
- SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f64>,
+def X86vfpextSAE : SDNode<"X86ISD::VFPEXT_SAE",
+ SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f64>,
SDTCVecEltisVT<1, f32>,
- SDTCisOpSmallerThanOp<1, 0>,
- SDTCisVT<2, i32>]>>;
+ SDTCisOpSmallerThanOp<1, 0>]>>;
def X86vfproundRnd: SDNode<"X86ISD::VFPROUND_RND",
SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f32>,
SDTCVecEltisVT<1, f64>,
SDTCisOpSmallerThanOp<0, 1>,
SDTCisVT<2, i32>]>>;
+// cvt fp to bfloat16
+def X86cvtne2ps2bf16 : SDNode<"X86ISD::CVTNE2PS2BF16",
+ SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTCisSameAs<1,2>]>>;
+def X86mcvtneps2bf16 : SDNode<"X86ISD::MCVTNEPS2BF16",
+ SDTypeProfile<1, 3, [SDTCVecEltisVT<0, i16>,
+ SDTCVecEltisVT<1, f32>,
+ SDTCisSameAs<0, 2>,
+ SDTCVecEltisVT<3, i1>,
+ SDTCisSameNumEltsAs<1, 3>]>>;
+def X86cvtneps2bf16 : SDNode<"X86ISD::CVTNEPS2BF16",
+ SDTypeProfile<1, 1, [SDTCVecEltisVT<0, i16>,
+ SDTCVecEltisVT<1, f32>]>>;
+def X86dpbf16ps : SDNode<"X86ISD::DPBF16PS",
+ SDTypeProfile<1, 3, [SDTCVecEltisVT<0, f32>,
+ SDTCisSameAs<0,1>,
+ SDTCVecEltisVT<2, i32>,
+ SDTCisSameAs<2,3>]>>;
+
// galois field arithmetic
def X86GF2P8affineinvqb : SDNode<"X86ISD::GF2P8AFFINEINVQB", SDTBlend>;
def X86GF2P8affineqb : SDNode<"X86ISD::GF2P8AFFINEQB", SDTBlend>;
@@ -653,18 +718,8 @@ def sse_load_f64 : ComplexPattern<v2f64, 5, "selectScalarSSELoad", [],
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand,
SDNPWantRoot, SDNPWantParent]>;
-def ssmem : Operand<v4f32> {
- let PrintMethod = "printf32mem";
- let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, SEGMENT_REG);
- let ParserMatchClass = X86Mem32AsmOperand;
- let OperandType = "OPERAND_MEMORY";
-}
-def sdmem : Operand<v2f64> {
- let PrintMethod = "printf64mem";
- let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, SEGMENT_REG);
- let ParserMatchClass = X86Mem64AsmOperand;
- let OperandType = "OPERAND_MEMORY";
-}
+def ssmem : X86MemOperand<"printdwordmem", X86Mem32AsmOperand>;
+def sdmem : X86MemOperand<"printqwordmem", X86Mem64AsmOperand>;
//===----------------------------------------------------------------------===//
// SSE pattern fragments
@@ -695,9 +750,9 @@ def loadv32i16 : PatFrag<(ops node:$ptr), (v32i16 (load node:$ptr))>;
def loadv64i8 : PatFrag<(ops node:$ptr), (v64i8 (load node:$ptr))>;
// 128-/256-/512-bit extload pattern fragments
-def extloadv2f32 : PatFrag<(ops node:$ptr), (v2f64 (extloadvf32 node:$ptr))>;
-def extloadv4f32 : PatFrag<(ops node:$ptr), (v4f64 (extloadvf32 node:$ptr))>;
-def extloadv8f32 : PatFrag<(ops node:$ptr), (v8f64 (extloadvf32 node:$ptr))>;
+def extloadv2f32 : PatFrag<(ops node:$ptr), (extloadvf32 node:$ptr)>;
+def extloadv4f32 : PatFrag<(ops node:$ptr), (extloadvf32 node:$ptr)>;
+def extloadv8f32 : PatFrag<(ops node:$ptr), (extloadvf32 node:$ptr)>;
// Like 'store', but always requires vector size alignment.
def alignedstore : PatFrag<(ops node:$val, node:$ptr),
@@ -884,15 +939,20 @@ def bc_v8i64 : PatFrag<(ops node:$in), (v8i64 (bitconvert node:$in))>;
def bc_v8f64 : PatFrag<(ops node:$in), (v8f64 (bitconvert node:$in))>;
def bc_v16f32 : PatFrag<(ops node:$in), (v16f32 (bitconvert node:$in))>;
-def vzmovl_v2i64 : PatFrag<(ops node:$src),
- (bitconvert (v2i64 (X86vzmovl
- (v2i64 (scalar_to_vector (loadi64 node:$src))))))>;
-def vzmovl_v4i32 : PatFrag<(ops node:$src),
- (bitconvert (v4i32 (X86vzmovl
- (v4i32 (scalar_to_vector (loadi32 node:$src))))))>;
+def X86vzload32 : PatFrag<(ops node:$src),
+ (X86vzld node:$src), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT().getStoreSize() == 4;
+}]>;
-def vzload_v2i64 : PatFrag<(ops node:$src),
- (bitconvert (v2i64 (X86vzload node:$src)))>;
+def X86vzload64 : PatFrag<(ops node:$src),
+ (X86vzld node:$src), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT().getStoreSize() == 8;
+}]>;
+
+def X86vextractstore64 : PatFrag<(ops node:$val, node:$ptr),
+ (X86vextractst node:$val, node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT().getStoreSize() == 8;
+}]>;
def fp32imm0 : PatLeaf<(f32 fpimm), [{
@@ -903,20 +963,6 @@ def fp64imm0 : PatLeaf<(f64 fpimm), [{
return N->isExactlyValue(+0.0);
}]>;
-def I8Imm : SDNodeXForm<imm, [{
- // Transformation function: get the low 8 bits.
- return getI8Imm((uint8_t)N->getZExtValue(), SDLoc(N));
-}]>;
-
-def FROUND_NO_EXC : PatLeaf<(i32 8)>;
-def FROUND_CURRENT : PatLeaf<(i32 4)>;
-
-// BYTE_imm - Transform bit immediates into byte immediates.
-def BYTE_imm : SDNodeXForm<imm, [{
- // Transformation function: imm >> 3
- return getI32Imm(N->getZExtValue() >> 3, SDLoc(N));
-}]>;
-
// EXTRACT_get_vextract128_imm xform function: convert extract_subvector index
// to VEXTRACTF128/VEXTRACTI128 imm.
def EXTRACT_get_vextract128_imm : SDNodeXForm<extract_subvector, [{
@@ -943,8 +989,10 @@ def INSERT_get_vinsert256_imm : SDNodeXForm<insert_subvector, [{
def vextract128_extract : PatFrag<(ops node:$bigvec, node:$index),
(extract_subvector node:$bigvec,
- node:$index), [{}],
- EXTRACT_get_vextract128_imm>;
+ node:$index), [{
+ // Index 0 can be handled via extract_subreg.
+ return !isNullConstant(N->getOperand(1));
+}], EXTRACT_get_vextract128_imm>;
def vinsert128_insert : PatFrag<(ops node:$bigvec, node:$smallvec,
node:$index),
@@ -954,8 +1002,10 @@ def vinsert128_insert : PatFrag<(ops node:$bigvec, node:$smallvec,
def vextract256_extract : PatFrag<(ops node:$bigvec, node:$index),
(extract_subvector node:$bigvec,
- node:$index), [{}],
- EXTRACT_get_vextract256_imm>;
+ node:$index), [{
+ // Index 0 can be handled via extract_subreg.
+ return !isNullConstant(N->getOperand(1));
+}], EXTRACT_get_vextract256_imm>;
def vinsert256_insert : PatFrag<(ops node:$bigvec, node:$smallvec,
node:$index),
@@ -963,70 +1013,46 @@ def vinsert256_insert : PatFrag<(ops node:$bigvec, node:$smallvec,
node:$index), [{}],
INSERT_get_vinsert256_imm>;
-def X86mload : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (masked_load node:$src1, node:$src2, node:$src3), [{
+def masked_load : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (masked_ld node:$src1, node:$src2, node:$src3), [{
return !cast<MaskedLoadSDNode>(N)->isExpandingLoad() &&
cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD;
}]>;
-def masked_load_aligned128 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (X86mload node:$src1, node:$src2, node:$src3), [{
- return cast<MaskedLoadSDNode>(N)->getAlignment() >= 16;
-}]>;
-
-def masked_load_aligned256 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (X86mload node:$src1, node:$src2, node:$src3), [{
- return cast<MaskedLoadSDNode>(N)->getAlignment() >= 32;
-}]>;
-
-def masked_load_aligned512 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (X86mload node:$src1, node:$src2, node:$src3), [{
- return cast<MaskedLoadSDNode>(N)->getAlignment() >= 64;
-}]>;
-
-def masked_load_unaligned : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+def masked_load_aligned : PatFrag<(ops node:$src1, node:$src2, node:$src3),
(masked_load node:$src1, node:$src2, node:$src3), [{
- return !cast<MaskedLoadSDNode>(N)->isExpandingLoad() &&
- cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD;
+ // Use the node type to determine the size the alignment needs to match.
+ // We can't use memory VT because type widening changes the node VT, but
+ // not the memory VT.
+ auto *Ld = cast<MaskedLoadSDNode>(N);
+ return Ld->getAlignment() >= Ld->getValueType(0).getStoreSize();
}]>;
def X86mExpandingLoad : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (masked_load node:$src1, node:$src2, node:$src3), [{
+ (masked_ld node:$src1, node:$src2, node:$src3), [{
return cast<MaskedLoadSDNode>(N)->isExpandingLoad();
}]>;
// Masked store fragments.
// X86mstore can't be implemented in core DAG files because some targets
// do not support vector types (llvm-tblgen will fail).
-def X86mstore : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (masked_store node:$src1, node:$src2, node:$src3), [{
+def masked_store : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (masked_st node:$src1, node:$src2, node:$src3), [{
return (!cast<MaskedStoreSDNode>(N)->isTruncatingStore()) &&
(!cast<MaskedStoreSDNode>(N)->isCompressingStore());
}]>;
-def masked_store_aligned128 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (X86mstore node:$src1, node:$src2, node:$src3), [{
- return cast<MaskedStoreSDNode>(N)->getAlignment() >= 16;
-}]>;
-
-def masked_store_aligned256 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (X86mstore node:$src1, node:$src2, node:$src3), [{
- return cast<MaskedStoreSDNode>(N)->getAlignment() >= 32;
-}]>;
-
-def masked_store_aligned512 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (X86mstore node:$src1, node:$src2, node:$src3), [{
- return cast<MaskedStoreSDNode>(N)->getAlignment() >= 64;
-}]>;
-
-def masked_store_unaligned : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+def masked_store_aligned : PatFrag<(ops node:$src1, node:$src2, node:$src3),
(masked_store node:$src1, node:$src2, node:$src3), [{
- return (!cast<MaskedStoreSDNode>(N)->isTruncatingStore()) &&
- (!cast<MaskedStoreSDNode>(N)->isCompressingStore());
+ // Use the node type to determine the size the alignment needs to match.
+ // We can't use memory VT because type widening changes the node VT, but
+ // not the memory VT.
+ auto *St = cast<MaskedStoreSDNode>(N);
+ return St->getAlignment() >= St->getOperand(1).getValueType().getStoreSize();
}]>;
def X86mCompressingStore : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (masked_store node:$src1, node:$src2, node:$src3), [{
+ (masked_st node:$src1, node:$src2, node:$src3), [{
return cast<MaskedStoreSDNode>(N)->isCompressingStore();
}]>;
@@ -1034,7 +1060,7 @@ def X86mCompressingStore : PatFrag<(ops node:$src1, node:$src2, node:$src3),
// X86mtruncstore can't be implemented in core DAG files because some targets
// doesn't support vector type ( llvm-tblgen will fail)
def X86mtruncstore : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (masked_store node:$src1, node:$src2, node:$src3), [{
+ (masked_st node:$src1, node:$src2, node:$src3), [{
return cast<MaskedStoreSDNode>(N)->isTruncatingStore();
}]>;
def masked_truncstorevi8 :
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index ab14ee7fadf2..dbe45356c42b 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -1,9 +1,8 @@
//===-- X86InstrInfo.cpp - X86 Instruction Information --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -220,16 +219,22 @@ static bool isFrameLoadOpcode(int Opcode, unsigned &MemBytes) {
return true;
case X86::MOV32rm:
case X86::MOVSSrm:
- case X86::VMOVSSZrm:
+ case X86::MOVSSrm_alt:
case X86::VMOVSSrm:
+ case X86::VMOVSSrm_alt:
+ case X86::VMOVSSZrm:
+ case X86::VMOVSSZrm_alt:
case X86::KMOVDkm:
MemBytes = 4;
return true;
case X86::MOV64rm:
case X86::LD_Fp64m:
case X86::MOVSDrm:
+ case X86::MOVSDrm_alt:
case X86::VMOVSDrm:
+ case X86::VMOVSDrm_alt:
case X86::VMOVSDZrm:
+ case X86::VMOVSDZrm_alt:
case X86::MMX_MOVD64rm:
case X86::MMX_MOVQ64rm:
case X86::KMOVQkm:
@@ -483,9 +488,10 @@ bool X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
case X86::MOV16rm:
case X86::MOV32rm:
case X86::MOV64rm:
- case X86::LD_Fp64m:
case X86::MOVSSrm:
+ case X86::MOVSSrm_alt:
case X86::MOVSDrm:
+ case X86::MOVSDrm_alt:
case X86::MOVAPSrm:
case X86::MOVUPSrm:
case X86::MOVAPDrm:
@@ -493,7 +499,9 @@ bool X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
case X86::MOVDQArm:
case X86::MOVDQUrm:
case X86::VMOVSSrm:
+ case X86::VMOVSSrm_alt:
case X86::VMOVSDrm:
+ case X86::VMOVSDrm_alt:
case X86::VMOVAPSrm:
case X86::VMOVUPSrm:
case X86::VMOVAPDrm:
@@ -510,7 +518,9 @@ bool X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
case X86::MMX_MOVQ64rm:
// AVX-512
case X86::VMOVSSZrm:
+ case X86::VMOVSSZrm_alt:
case X86::VMOVSDZrm:
+ case X86::VMOVSDZrm_alt:
case X86::VMOVAPDZ128rm:
case X86::VMOVAPDZ256rm:
case X86::VMOVAPDZrm:
@@ -590,96 +600,12 @@ bool X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
return true;
}
-bool X86InstrInfo::isSafeToClobberEFLAGS(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const {
- MachineBasicBlock::iterator E = MBB.end();
-
- // For compile time consideration, if we are not able to determine the
- // safety after visiting 4 instructions in each direction, we will assume
- // it's not safe.
- MachineBasicBlock::iterator Iter = I;
- for (unsigned i = 0; Iter != E && i < 4; ++i) {
- bool SeenDef = false;
- for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) {
- MachineOperand &MO = Iter->getOperand(j);
- if (MO.isRegMask() && MO.clobbersPhysReg(X86::EFLAGS))
- SeenDef = true;
- if (!MO.isReg())
- continue;
- if (MO.getReg() == X86::EFLAGS) {
- if (MO.isUse())
- return false;
- SeenDef = true;
- }
- }
-
- if (SeenDef)
- // This instruction defines EFLAGS, no need to look any further.
- return true;
- ++Iter;
- // Skip over debug instructions.
- while (Iter != E && Iter->isDebugInstr())
- ++Iter;
- }
-
- // It is safe to clobber EFLAGS at the end of a block of no successor has it
- // live in.
- if (Iter == E) {
- for (MachineBasicBlock *S : MBB.successors())
- if (S->isLiveIn(X86::EFLAGS))
- return false;
- return true;
- }
-
- MachineBasicBlock::iterator B = MBB.begin();
- Iter = I;
- for (unsigned i = 0; i < 4; ++i) {
- // If we make it to the beginning of the block, it's safe to clobber
- // EFLAGS iff EFLAGS is not live-in.
- if (Iter == B)
- return !MBB.isLiveIn(X86::EFLAGS);
-
- --Iter;
- // Skip over debug instructions.
- while (Iter != B && Iter->isDebugInstr())
- --Iter;
-
- bool SawKill = false;
- for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) {
- MachineOperand &MO = Iter->getOperand(j);
- // A register mask may clobber EFLAGS, but we should still look for a
- // live EFLAGS def.
- if (MO.isRegMask() && MO.clobbersPhysReg(X86::EFLAGS))
- SawKill = true;
- if (MO.isReg() && MO.getReg() == X86::EFLAGS) {
- if (MO.isDef()) return MO.isDead();
- if (MO.isKill()) SawKill = true;
- }
- }
-
- if (SawKill)
- // This instruction kills EFLAGS and doesn't redefine it, so
- // there's no need to look further.
- return true;
- }
-
- // Conservative answer.
- return false;
-}
-
void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
unsigned DestReg, unsigned SubIdx,
const MachineInstr &Orig,
const TargetRegisterInfo &TRI) const {
- bool ClobbersEFLAGS = false;
- for (const MachineOperand &MO : Orig.operands()) {
- if (MO.isReg() && MO.isDef() && MO.getReg() == X86::EFLAGS) {
- ClobbersEFLAGS = true;
- break;
- }
- }
-
+ bool ClobbersEFLAGS = Orig.modifiesRegister(X86::EFLAGS, &TRI);
if (ClobbersEFLAGS && !isSafeToClobberEFLAGS(MBB, I)) {
// The instruction clobbers EFLAGS. Re-materialize as MOV32ri to avoid side
// effects.
@@ -796,11 +722,10 @@ bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src,
MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(
unsigned MIOpc, MachineFunction::iterator &MFI, MachineInstr &MI,
- LiveVariables *LV) const {
+ LiveVariables *LV, bool Is8BitOp) const {
// We handle 8-bit adds and various 16-bit opcodes in the switch below.
- bool Is16BitOp = !(MIOpc == X86::ADD8rr || MIOpc == X86::ADD8ri);
MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo();
- assert((!Is16BitOp || RegInfo.getTargetRegisterInfo()->getRegSizeInBits(
+ assert((Is8BitOp || RegInfo.getTargetRegisterInfo()->getRegSizeInBits(
*RegInfo.getRegClass(MI.getOperand(0).getReg())) == 16) &&
"Unexpected type for LEA transform");
@@ -830,7 +755,7 @@ MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(
unsigned Src = MI.getOperand(1).getReg();
bool IsDead = MI.getOperand(0).isDead();
bool IsKill = MI.getOperand(1).isKill();
- unsigned SubReg = Is16BitOp ? X86::sub_16bit : X86::sub_8bit;
+ unsigned SubReg = Is8BitOp ? X86::sub_8bit : X86::sub_16bit;
assert(!MI.getOperand(1).isUndef() && "Undef op doesn't need optimization");
BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(X86::IMPLICIT_DEF), InRegLEA);
MachineInstr *InsMI =
@@ -842,19 +767,23 @@ MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(
BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(Opcode), OutRegLEA);
switch (MIOpc) {
default: llvm_unreachable("Unreachable!");
+ case X86::SHL8ri:
case X86::SHL16ri: {
unsigned ShAmt = MI.getOperand(2).getImm();
MIB.addReg(0).addImm(1ULL << ShAmt)
.addReg(InRegLEA, RegState::Kill).addImm(0).addReg(0);
break;
}
+ case X86::INC8r:
case X86::INC16r:
addRegOffset(MIB, InRegLEA, true, 1);
break;
+ case X86::DEC8r:
case X86::DEC16r:
addRegOffset(MIB, InRegLEA, true, -1);
break;
case X86::ADD8ri:
+ case X86::ADD8ri_DB:
case X86::ADD16ri:
case X86::ADD16ri8:
case X86::ADD16ri_DB:
@@ -862,6 +791,7 @@ MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(
addRegOffset(MIB, InRegLEA, true, MI.getOperand(2).getImm());
break;
case X86::ADD8rr:
+ case X86::ADD8rr_DB:
case X86::ADD16rr:
case X86::ADD16rr_DB: {
unsigned Src2 = MI.getOperand(2).getReg();
@@ -948,9 +878,10 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
MachineInstr *NewMI = nullptr;
bool Is64Bit = Subtarget.is64Bit();
+ bool Is8BitOp = false;
unsigned MIOpc = MI.getOpcode();
switch (MIOpc) {
- default: return nullptr;
+ default: llvm_unreachable("Unreachable!");
case X86::SHL64ri: {
assert(MI.getNumOperands() >= 3 && "Unknown shift instruction!");
unsigned ShAmt = getTruncatedShiftCount(MI, 2);
@@ -1000,12 +931,15 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
break;
}
+ case X86::SHL8ri:
+ Is8BitOp = true;
+ LLVM_FALLTHROUGH;
case X86::SHL16ri: {
assert(MI.getNumOperands() >= 3 && "Unknown shift instruction!");
unsigned ShAmt = getTruncatedShiftCount(MI, 2);
if (!isTruncatedShiftCountForLEA(ShAmt))
return nullptr;
- return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV);
+ return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV, Is8BitOp);
}
case X86::INC64r:
case X86::INC32r: {
@@ -1029,8 +963,6 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
NewMI = addOffset(MIB, 1);
break;
}
- case X86::INC16r:
- return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV);
case X86::DEC64r:
case X86::DEC32r: {
assert(MI.getNumOperands() >= 2 && "Unknown dec instruction!");
@@ -1054,8 +986,13 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
break;
}
+ case X86::DEC8r:
+ case X86::INC8r:
+ Is8BitOp = true;
+ LLVM_FALLTHROUGH;
case X86::DEC16r:
- return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV);
+ case X86::INC16r:
+ return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV, Is8BitOp);
case X86::ADD64rr:
case X86::ADD64rr_DB:
case X86::ADD32rr:
@@ -1094,9 +1031,12 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
break;
}
case X86::ADD8rr:
+ case X86::ADD8rr_DB:
+ Is8BitOp = true;
+ LLVM_FALLTHROUGH;
case X86::ADD16rr:
case X86::ADD16rr_DB:
- return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV);
+ return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV, Is8BitOp);
case X86::ADD64ri32:
case X86::ADD64ri8:
case X86::ADD64ri32_DB:
@@ -1130,11 +1070,59 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
break;
}
case X86::ADD8ri:
+ case X86::ADD8ri_DB:
+ Is8BitOp = true;
+ LLVM_FALLTHROUGH;
case X86::ADD16ri:
case X86::ADD16ri8:
case X86::ADD16ri_DB:
case X86::ADD16ri8_DB:
- return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV);
+ return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV, Is8BitOp);
+ case X86::SUB8ri:
+ case X86::SUB16ri8:
+ case X86::SUB16ri:
+ /// FIXME: Support these similar to ADD8ri/ADD16ri*.
+ return nullptr;
+ case X86::SUB32ri8:
+ case X86::SUB32ri: {
+ int64_t Imm = MI.getOperand(2).getImm();
+ if (!isInt<32>(-Imm))
+ return nullptr;
+
+ assert(MI.getNumOperands() >= 3 && "Unknown add instruction!");
+ unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r;
+
+ bool isKill;
+ unsigned SrcReg;
+ MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
+ if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ true,
+ SrcReg, isKill, ImplicitOp, LV))
+ return nullptr;
+
+ MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc))
+ .add(Dest)
+ .addReg(SrcReg, getKillRegState(isKill));
+ if (ImplicitOp.getReg() != 0)
+ MIB.add(ImplicitOp);
+
+ NewMI = addOffset(MIB, -Imm);
+ break;
+ }
+
+ case X86::SUB64ri8:
+ case X86::SUB64ri32: {
+ int64_t Imm = MI.getOperand(2).getImm();
+ if (!isInt<32>(-Imm))
+ return nullptr;
+
+ assert(MI.getNumOperands() >= 3 && "Unknown sub instruction!");
+
+ MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(),
+ get(X86::LEA64r)).add(Dest).add(Src);
+ NewMI = addOffset(MIB, -Imm);
+ break;
+ }
+
case X86::VMOVDQU8Z128rmk:
case X86::VMOVDQU8Z256rmk:
case X86::VMOVDQU8Zrmk:
@@ -1522,7 +1510,7 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
case X86::VBLENDPDrri:
case X86::VBLENDPSrri:
// If we're optimizing for size, try to use MOVSD/MOVSS.
- if (MI.getParent()->getParent()->getFunction().optForSize()) {
+ if (MI.getParent()->getParent()->getFunction().hasOptSize()) {
unsigned Mask, Opc;
switch (MI.getOpcode()) {
default: llvm_unreachable("Unreachable!");
@@ -1548,47 +1536,90 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
case X86::VPBLENDWrri:
case X86::VPBLENDDYrri:
case X86::VPBLENDWYrri:{
- unsigned Mask;
+ int8_t Mask;
switch (MI.getOpcode()) {
default: llvm_unreachable("Unreachable!");
- case X86::BLENDPDrri: Mask = 0x03; break;
- case X86::BLENDPSrri: Mask = 0x0F; break;
- case X86::PBLENDWrri: Mask = 0xFF; break;
- case X86::VBLENDPDrri: Mask = 0x03; break;
- case X86::VBLENDPSrri: Mask = 0x0F; break;
- case X86::VBLENDPDYrri: Mask = 0x0F; break;
- case X86::VBLENDPSYrri: Mask = 0xFF; break;
- case X86::VPBLENDDrri: Mask = 0x0F; break;
- case X86::VPBLENDWrri: Mask = 0xFF; break;
- case X86::VPBLENDDYrri: Mask = 0xFF; break;
- case X86::VPBLENDWYrri: Mask = 0xFF; break;
+ case X86::BLENDPDrri: Mask = (int8_t)0x03; break;
+ case X86::BLENDPSrri: Mask = (int8_t)0x0F; break;
+ case X86::PBLENDWrri: Mask = (int8_t)0xFF; break;
+ case X86::VBLENDPDrri: Mask = (int8_t)0x03; break;
+ case X86::VBLENDPSrri: Mask = (int8_t)0x0F; break;
+ case X86::VBLENDPDYrri: Mask = (int8_t)0x0F; break;
+ case X86::VBLENDPSYrri: Mask = (int8_t)0xFF; break;
+ case X86::VPBLENDDrri: Mask = (int8_t)0x0F; break;
+ case X86::VPBLENDWrri: Mask = (int8_t)0xFF; break;
+ case X86::VPBLENDDYrri: Mask = (int8_t)0xFF; break;
+ case X86::VPBLENDWYrri: Mask = (int8_t)0xFF; break;
}
// Only the least significant bits of Imm are used.
- unsigned Imm = MI.getOperand(3).getImm() & Mask;
+ // Using int8_t to ensure it will be sign extended to the int64_t that
+ // setImm takes in order to match isel behavior.
+ int8_t Imm = MI.getOperand(3).getImm() & Mask;
auto &WorkingMI = cloneIfNew(MI);
WorkingMI.getOperand(3).setImm(Mask ^ Imm);
return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
OpIdx1, OpIdx2);
}
+ case X86::INSERTPSrr:
+ case X86::VINSERTPSrr:
+ case X86::VINSERTPSZrr: {
+ unsigned Imm = MI.getOperand(MI.getNumOperands() - 1).getImm();
+ unsigned ZMask = Imm & 15;
+ unsigned DstIdx = (Imm >> 4) & 3;
+ unsigned SrcIdx = (Imm >> 6) & 3;
+
+ // We can commute insertps if we zero 2 of the elements, the insertion is
+ // "inline" and we don't override the insertion with a zero.
+ if (DstIdx == SrcIdx && (ZMask & (1 << DstIdx)) == 0 &&
+ countPopulation(ZMask) == 2) {
+ unsigned AltIdx = findFirstSet((ZMask | (1 << DstIdx)) ^ 15);
+ assert(AltIdx < 4 && "Illegal insertion index");
+ unsigned AltImm = (AltIdx << 6) | (AltIdx << 4) | ZMask;
+ auto &WorkingMI = cloneIfNew(MI);
+ WorkingMI.getOperand(MI.getNumOperands() - 1).setImm(AltImm);
+ return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
+ OpIdx1, OpIdx2);
+ }
+ return nullptr;
+ }
case X86::MOVSDrr:
case X86::MOVSSrr:
case X86::VMOVSDrr:
case X86::VMOVSSrr:{
// On SSE41 or later we can commute a MOVSS/MOVSD to a BLENDPS/BLENDPD.
- assert(Subtarget.hasSSE41() && "Commuting MOVSD/MOVSS requires SSE41!");
+ if (Subtarget.hasSSE41()) {
+ unsigned Mask, Opc;
+ switch (MI.getOpcode()) {
+ default: llvm_unreachable("Unreachable!");
+ case X86::MOVSDrr: Opc = X86::BLENDPDrri; Mask = 0x02; break;
+ case X86::MOVSSrr: Opc = X86::BLENDPSrri; Mask = 0x0E; break;
+ case X86::VMOVSDrr: Opc = X86::VBLENDPDrri; Mask = 0x02; break;
+ case X86::VMOVSSrr: Opc = X86::VBLENDPSrri; Mask = 0x0E; break;
+ }
- unsigned Mask, Opc;
- switch (MI.getOpcode()) {
- default: llvm_unreachable("Unreachable!");
- case X86::MOVSDrr: Opc = X86::BLENDPDrri; Mask = 0x02; break;
- case X86::MOVSSrr: Opc = X86::BLENDPSrri; Mask = 0x0E; break;
- case X86::VMOVSDrr: Opc = X86::VBLENDPDrri; Mask = 0x02; break;
- case X86::VMOVSSrr: Opc = X86::VBLENDPSrri; Mask = 0x0E; break;
+ auto &WorkingMI = cloneIfNew(MI);
+ WorkingMI.setDesc(get(Opc));
+ WorkingMI.addOperand(MachineOperand::CreateImm(Mask));
+ return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
+ OpIdx1, OpIdx2);
}
+ // Convert to SHUFPD.
+ assert(MI.getOpcode() == X86::MOVSDrr &&
+ "Can only commute MOVSDrr without SSE4.1");
+
auto &WorkingMI = cloneIfNew(MI);
- WorkingMI.setDesc(get(Opc));
- WorkingMI.addOperand(MachineOperand::CreateImm(Mask));
+ WorkingMI.setDesc(get(X86::SHUFPDrri));
+ WorkingMI.addOperand(MachineOperand::CreateImm(0x02));
+ return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
+ OpIdx1, OpIdx2);
+ }
+ case X86::SHUFPDrri: {
+ // Commute to MOVSD.
+ assert(MI.getOperand(3).getImm() == 0x02 && "Unexpected immediate!");
+ auto &WorkingMI = cloneIfNew(MI);
+ WorkingMI.setDesc(get(X86::MOVSDrr));
+ WorkingMI.RemoveOperand(3);
return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
OpIdx1, OpIdx2);
}
@@ -1657,7 +1688,7 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
// Flip permute source immediate.
// Imm & 0x02: lo = if set, select Op1.lo/hi else Op0.lo/hi.
// Imm & 0x20: hi = if set, select Op1.lo/hi else Op0.lo/hi.
- unsigned Imm = MI.getOperand(3).getImm() & 0xFF;
+ int8_t Imm = MI.getOperand(3).getImm() & 0xFF;
auto &WorkingMI = cloneIfNew(MI);
WorkingMI.getOperand(3).setImm(Imm ^ 0x22);
return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
@@ -1686,76 +1717,11 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
OpIdx1, OpIdx2);
}
- case X86::CMOVB16rr: case X86::CMOVB32rr: case X86::CMOVB64rr:
- case X86::CMOVAE16rr: case X86::CMOVAE32rr: case X86::CMOVAE64rr:
- case X86::CMOVE16rr: case X86::CMOVE32rr: case X86::CMOVE64rr:
- case X86::CMOVNE16rr: case X86::CMOVNE32rr: case X86::CMOVNE64rr:
- case X86::CMOVBE16rr: case X86::CMOVBE32rr: case X86::CMOVBE64rr:
- case X86::CMOVA16rr: case X86::CMOVA32rr: case X86::CMOVA64rr:
- case X86::CMOVL16rr: case X86::CMOVL32rr: case X86::CMOVL64rr:
- case X86::CMOVGE16rr: case X86::CMOVGE32rr: case X86::CMOVGE64rr:
- case X86::CMOVLE16rr: case X86::CMOVLE32rr: case X86::CMOVLE64rr:
- case X86::CMOVG16rr: case X86::CMOVG32rr: case X86::CMOVG64rr:
- case X86::CMOVS16rr: case X86::CMOVS32rr: case X86::CMOVS64rr:
- case X86::CMOVNS16rr: case X86::CMOVNS32rr: case X86::CMOVNS64rr:
- case X86::CMOVP16rr: case X86::CMOVP32rr: case X86::CMOVP64rr:
- case X86::CMOVNP16rr: case X86::CMOVNP32rr: case X86::CMOVNP64rr:
- case X86::CMOVO16rr: case X86::CMOVO32rr: case X86::CMOVO64rr:
- case X86::CMOVNO16rr: case X86::CMOVNO32rr: case X86::CMOVNO64rr: {
- unsigned Opc;
- switch (MI.getOpcode()) {
- default: llvm_unreachable("Unreachable!");
- case X86::CMOVB16rr: Opc = X86::CMOVAE16rr; break;
- case X86::CMOVB32rr: Opc = X86::CMOVAE32rr; break;
- case X86::CMOVB64rr: Opc = X86::CMOVAE64rr; break;
- case X86::CMOVAE16rr: Opc = X86::CMOVB16rr; break;
- case X86::CMOVAE32rr: Opc = X86::CMOVB32rr; break;
- case X86::CMOVAE64rr: Opc = X86::CMOVB64rr; break;
- case X86::CMOVE16rr: Opc = X86::CMOVNE16rr; break;
- case X86::CMOVE32rr: Opc = X86::CMOVNE32rr; break;
- case X86::CMOVE64rr: Opc = X86::CMOVNE64rr; break;
- case X86::CMOVNE16rr: Opc = X86::CMOVE16rr; break;
- case X86::CMOVNE32rr: Opc = X86::CMOVE32rr; break;
- case X86::CMOVNE64rr: Opc = X86::CMOVE64rr; break;
- case X86::CMOVBE16rr: Opc = X86::CMOVA16rr; break;
- case X86::CMOVBE32rr: Opc = X86::CMOVA32rr; break;
- case X86::CMOVBE64rr: Opc = X86::CMOVA64rr; break;
- case X86::CMOVA16rr: Opc = X86::CMOVBE16rr; break;
- case X86::CMOVA32rr: Opc = X86::CMOVBE32rr; break;
- case X86::CMOVA64rr: Opc = X86::CMOVBE64rr; break;
- case X86::CMOVL16rr: Opc = X86::CMOVGE16rr; break;
- case X86::CMOVL32rr: Opc = X86::CMOVGE32rr; break;
- case X86::CMOVL64rr: Opc = X86::CMOVGE64rr; break;
- case X86::CMOVGE16rr: Opc = X86::CMOVL16rr; break;
- case X86::CMOVGE32rr: Opc = X86::CMOVL32rr; break;
- case X86::CMOVGE64rr: Opc = X86::CMOVL64rr; break;
- case X86::CMOVLE16rr: Opc = X86::CMOVG16rr; break;
- case X86::CMOVLE32rr: Opc = X86::CMOVG32rr; break;
- case X86::CMOVLE64rr: Opc = X86::CMOVG64rr; break;
- case X86::CMOVG16rr: Opc = X86::CMOVLE16rr; break;
- case X86::CMOVG32rr: Opc = X86::CMOVLE32rr; break;
- case X86::CMOVG64rr: Opc = X86::CMOVLE64rr; break;
- case X86::CMOVS16rr: Opc = X86::CMOVNS16rr; break;
- case X86::CMOVS32rr: Opc = X86::CMOVNS32rr; break;
- case X86::CMOVS64rr: Opc = X86::CMOVNS64rr; break;
- case X86::CMOVNS16rr: Opc = X86::CMOVS16rr; break;
- case X86::CMOVNS32rr: Opc = X86::CMOVS32rr; break;
- case X86::CMOVNS64rr: Opc = X86::CMOVS64rr; break;
- case X86::CMOVP16rr: Opc = X86::CMOVNP16rr; break;
- case X86::CMOVP32rr: Opc = X86::CMOVNP32rr; break;
- case X86::CMOVP64rr: Opc = X86::CMOVNP64rr; break;
- case X86::CMOVNP16rr: Opc = X86::CMOVP16rr; break;
- case X86::CMOVNP32rr: Opc = X86::CMOVP32rr; break;
- case X86::CMOVNP64rr: Opc = X86::CMOVP64rr; break;
- case X86::CMOVO16rr: Opc = X86::CMOVNO16rr; break;
- case X86::CMOVO32rr: Opc = X86::CMOVNO32rr; break;
- case X86::CMOVO64rr: Opc = X86::CMOVNO64rr; break;
- case X86::CMOVNO16rr: Opc = X86::CMOVO16rr; break;
- case X86::CMOVNO32rr: Opc = X86::CMOVO32rr; break;
- case X86::CMOVNO64rr: Opc = X86::CMOVO64rr; break;
- }
+ case X86::CMOV16rr: case X86::CMOV32rr: case X86::CMOV64rr: {
auto &WorkingMI = cloneIfNew(MI);
- WorkingMI.setDesc(get(Opc));
+ unsigned OpNo = MI.getDesc().getNumOperands() - 1;
+ X86::CondCode CC = static_cast<X86::CondCode>(MI.getOperand(OpNo).getImm());
+ WorkingMI.getOperand(OpNo).setImm(X86::GetOppositeBranchCondition(CC));
return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
OpIdx1, OpIdx2);
}
@@ -1879,7 +1845,6 @@ X86InstrInfo::findThreeSrcCommutedOpIndices(const MachineInstr &MI,
// regardless of the FMA opcode. The FMA opcode is adjusted later.
if (SrcOpIdx1 == CommuteAnyOperandIndex ||
SrcOpIdx2 == CommuteAnyOperandIndex) {
- unsigned CommutableOpIdx1 = SrcOpIdx1;
unsigned CommutableOpIdx2 = SrcOpIdx2;
// At least one of operands to be commuted is not specified and
@@ -1895,6 +1860,8 @@ X86InstrInfo::findThreeSrcCommutedOpIndices(const MachineInstr &MI,
// CommutableOpIdx2 is well defined now. Let's choose another commutable
// operand and assign its index to CommutableOpIdx1.
unsigned Op2Reg = MI.getOperand(CommutableOpIdx2).getReg();
+
+ unsigned CommutableOpIdx1;
for (CommutableOpIdx1 = LastCommutableVecOp;
CommutableOpIdx1 >= FirstCommutableVecOp; CommutableOpIdx1--) {
// Just ignore and skip the k-mask operand.
@@ -1946,28 +1913,43 @@ bool X86InstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1,
case X86::VCMPPDZ128rri:
case X86::VCMPPSZ128rri:
case X86::VCMPPDZ256rri:
- case X86::VCMPPSZ256rri: {
+ case X86::VCMPPSZ256rri:
+ case X86::VCMPPDZrrik:
+ case X86::VCMPPSZrrik:
+ case X86::VCMPPDZ128rrik:
+ case X86::VCMPPSZ128rrik:
+ case X86::VCMPPDZ256rrik:
+ case X86::VCMPPSZ256rrik: {
+ unsigned OpOffset = X86II::isKMasked(Desc.TSFlags) ? 1 : 0;
+
// Float comparison can be safely commuted for
// Ordered/Unordered/Equal/NotEqual tests
- unsigned Imm = MI.getOperand(3).getImm() & 0x7;
+ unsigned Imm = MI.getOperand(3 + OpOffset).getImm() & 0x7;
switch (Imm) {
case 0x00: // EQUAL
case 0x03: // UNORDERED
case 0x04: // NOT EQUAL
case 0x07: // ORDERED
- // The indices of the commutable operands are 1 and 2.
+ // The indices of the commutable operands are 1 and 2 (or 2 and 3
+ // when masked).
// Assign them to the returned operand indices here.
- return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 1, 2);
+ return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 1 + OpOffset,
+ 2 + OpOffset);
}
return false;
}
- case X86::MOVSDrr:
case X86::MOVSSrr:
- case X86::VMOVSDrr:
- case X86::VMOVSSrr:
+ // X86::MOVSDrr is always commutable. MOVSS is only commutable if we can
+ // form sse4.1 blend. We assume VMOVSSrr/VMOVSDrr is always commutable since
+ // AVX implies sse4.1.
if (Subtarget.hasSSE41())
return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
return false;
+ case X86::SHUFPDrri:
+ // We can commute this to MOVSD.
+ if (MI.getOperand(3).getImm() == 0x02)
+ return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
+ return false;
case X86::MOVHLPSrr:
case X86::UNPCKHPDrr:
case X86::VMOVHLPSrr:
@@ -2089,125 +2071,33 @@ bool X86InstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1,
return false;
}
-X86::CondCode X86::getCondFromBranchOpc(unsigned BrOpc) {
- switch (BrOpc) {
+X86::CondCode X86::getCondFromBranch(const MachineInstr &MI) {
+ switch (MI.getOpcode()) {
default: return X86::COND_INVALID;
- case X86::JE_1: return X86::COND_E;
- case X86::JNE_1: return X86::COND_NE;
- case X86::JL_1: return X86::COND_L;
- case X86::JLE_1: return X86::COND_LE;
- case X86::JG_1: return X86::COND_G;
- case X86::JGE_1: return X86::COND_GE;
- case X86::JB_1: return X86::COND_B;
- case X86::JBE_1: return X86::COND_BE;
- case X86::JA_1: return X86::COND_A;
- case X86::JAE_1: return X86::COND_AE;
- case X86::JS_1: return X86::COND_S;
- case X86::JNS_1: return X86::COND_NS;
- case X86::JP_1: return X86::COND_P;
- case X86::JNP_1: return X86::COND_NP;
- case X86::JO_1: return X86::COND_O;
- case X86::JNO_1: return X86::COND_NO;
- }
-}
-
-/// Return condition code of a SET opcode.
-X86::CondCode X86::getCondFromSETOpc(unsigned Opc) {
- switch (Opc) {
+ case X86::JCC_1:
+ return static_cast<X86::CondCode>(
+ MI.getOperand(MI.getDesc().getNumOperands() - 1).getImm());
+ }
+}
+
+/// Return condition code of a SETCC opcode.
+X86::CondCode X86::getCondFromSETCC(const MachineInstr &MI) {
+ switch (MI.getOpcode()) {
default: return X86::COND_INVALID;
- case X86::SETAr: case X86::SETAm: return X86::COND_A;
- case X86::SETAEr: case X86::SETAEm: return X86::COND_AE;
- case X86::SETBr: case X86::SETBm: return X86::COND_B;
- case X86::SETBEr: case X86::SETBEm: return X86::COND_BE;
- case X86::SETEr: case X86::SETEm: return X86::COND_E;
- case X86::SETGr: case X86::SETGm: return X86::COND_G;
- case X86::SETGEr: case X86::SETGEm: return X86::COND_GE;
- case X86::SETLr: case X86::SETLm: return X86::COND_L;
- case X86::SETLEr: case X86::SETLEm: return X86::COND_LE;
- case X86::SETNEr: case X86::SETNEm: return X86::COND_NE;
- case X86::SETNOr: case X86::SETNOm: return X86::COND_NO;
- case X86::SETNPr: case X86::SETNPm: return X86::COND_NP;
- case X86::SETNSr: case X86::SETNSm: return X86::COND_NS;
- case X86::SETOr: case X86::SETOm: return X86::COND_O;
- case X86::SETPr: case X86::SETPm: return X86::COND_P;
- case X86::SETSr: case X86::SETSm: return X86::COND_S;
+ case X86::SETCCr: case X86::SETCCm:
+ return static_cast<X86::CondCode>(
+ MI.getOperand(MI.getDesc().getNumOperands() - 1).getImm());
}
}
/// Return condition code of a CMov opcode.
-X86::CondCode X86::getCondFromCMovOpc(unsigned Opc) {
- switch (Opc) {
+X86::CondCode X86::getCondFromCMov(const MachineInstr &MI) {
+ switch (MI.getOpcode()) {
default: return X86::COND_INVALID;
- case X86::CMOVA16rm: case X86::CMOVA16rr: case X86::CMOVA32rm:
- case X86::CMOVA32rr: case X86::CMOVA64rm: case X86::CMOVA64rr:
- return X86::COND_A;
- case X86::CMOVAE16rm: case X86::CMOVAE16rr: case X86::CMOVAE32rm:
- case X86::CMOVAE32rr: case X86::CMOVAE64rm: case X86::CMOVAE64rr:
- return X86::COND_AE;
- case X86::CMOVB16rm: case X86::CMOVB16rr: case X86::CMOVB32rm:
- case X86::CMOVB32rr: case X86::CMOVB64rm: case X86::CMOVB64rr:
- return X86::COND_B;
- case X86::CMOVBE16rm: case X86::CMOVBE16rr: case X86::CMOVBE32rm:
- case X86::CMOVBE32rr: case X86::CMOVBE64rm: case X86::CMOVBE64rr:
- return X86::COND_BE;
- case X86::CMOVE16rm: case X86::CMOVE16rr: case X86::CMOVE32rm:
- case X86::CMOVE32rr: case X86::CMOVE64rm: case X86::CMOVE64rr:
- return X86::COND_E;
- case X86::CMOVG16rm: case X86::CMOVG16rr: case X86::CMOVG32rm:
- case X86::CMOVG32rr: case X86::CMOVG64rm: case X86::CMOVG64rr:
- return X86::COND_G;
- case X86::CMOVGE16rm: case X86::CMOVGE16rr: case X86::CMOVGE32rm:
- case X86::CMOVGE32rr: case X86::CMOVGE64rm: case X86::CMOVGE64rr:
- return X86::COND_GE;
- case X86::CMOVL16rm: case X86::CMOVL16rr: case X86::CMOVL32rm:
- case X86::CMOVL32rr: case X86::CMOVL64rm: case X86::CMOVL64rr:
- return X86::COND_L;
- case X86::CMOVLE16rm: case X86::CMOVLE16rr: case X86::CMOVLE32rm:
- case X86::CMOVLE32rr: case X86::CMOVLE64rm: case X86::CMOVLE64rr:
- return X86::COND_LE;
- case X86::CMOVNE16rm: case X86::CMOVNE16rr: case X86::CMOVNE32rm:
- case X86::CMOVNE32rr: case X86::CMOVNE64rm: case X86::CMOVNE64rr:
- return X86::COND_NE;
- case X86::CMOVNO16rm: case X86::CMOVNO16rr: case X86::CMOVNO32rm:
- case X86::CMOVNO32rr: case X86::CMOVNO64rm: case X86::CMOVNO64rr:
- return X86::COND_NO;
- case X86::CMOVNP16rm: case X86::CMOVNP16rr: case X86::CMOVNP32rm:
- case X86::CMOVNP32rr: case X86::CMOVNP64rm: case X86::CMOVNP64rr:
- return X86::COND_NP;
- case X86::CMOVNS16rm: case X86::CMOVNS16rr: case X86::CMOVNS32rm:
- case X86::CMOVNS32rr: case X86::CMOVNS64rm: case X86::CMOVNS64rr:
- return X86::COND_NS;
- case X86::CMOVO16rm: case X86::CMOVO16rr: case X86::CMOVO32rm:
- case X86::CMOVO32rr: case X86::CMOVO64rm: case X86::CMOVO64rr:
- return X86::COND_O;
- case X86::CMOVP16rm: case X86::CMOVP16rr: case X86::CMOVP32rm:
- case X86::CMOVP32rr: case X86::CMOVP64rm: case X86::CMOVP64rr:
- return X86::COND_P;
- case X86::CMOVS16rm: case X86::CMOVS16rr: case X86::CMOVS32rm:
- case X86::CMOVS32rr: case X86::CMOVS64rm: case X86::CMOVS64rr:
- return X86::COND_S;
- }
-}
-
-unsigned X86::GetCondBranchFromCond(X86::CondCode CC) {
- switch (CC) {
- default: llvm_unreachable("Illegal condition code!");
- case X86::COND_E: return X86::JE_1;
- case X86::COND_NE: return X86::JNE_1;
- case X86::COND_L: return X86::JL_1;
- case X86::COND_LE: return X86::JLE_1;
- case X86::COND_G: return X86::JG_1;
- case X86::COND_GE: return X86::JGE_1;
- case X86::COND_B: return X86::JB_1;
- case X86::COND_BE: return X86::JBE_1;
- case X86::COND_A: return X86::JA_1;
- case X86::COND_AE: return X86::JAE_1;
- case X86::COND_S: return X86::JS_1;
- case X86::COND_NS: return X86::JNS_1;
- case X86::COND_P: return X86::JP_1;
- case X86::COND_NP: return X86::JNP_1;
- case X86::COND_O: return X86::JO_1;
- case X86::COND_NO: return X86::JNO_1;
+ case X86::CMOV16rr: case X86::CMOV32rr: case X86::CMOV64rr:
+ case X86::CMOV16rm: case X86::CMOV32rm: case X86::CMOV64rm:
+ return static_cast<X86::CondCode>(
+ MI.getOperand(MI.getDesc().getNumOperands() - 1).getImm());
}
}
@@ -2293,78 +2183,18 @@ X86::getX86ConditionCode(CmpInst::Predicate Predicate) {
return std::make_pair(CC, NeedSwap);
}
-/// Return a set opcode for the given condition and
-/// whether it has memory operand.
-unsigned X86::getSETFromCond(CondCode CC, bool HasMemoryOperand) {
- static const uint16_t Opc[16][2] = {
- { X86::SETAr, X86::SETAm },
- { X86::SETAEr, X86::SETAEm },
- { X86::SETBr, X86::SETBm },
- { X86::SETBEr, X86::SETBEm },
- { X86::SETEr, X86::SETEm },
- { X86::SETGr, X86::SETGm },
- { X86::SETGEr, X86::SETGEm },
- { X86::SETLr, X86::SETLm },
- { X86::SETLEr, X86::SETLEm },
- { X86::SETNEr, X86::SETNEm },
- { X86::SETNOr, X86::SETNOm },
- { X86::SETNPr, X86::SETNPm },
- { X86::SETNSr, X86::SETNSm },
- { X86::SETOr, X86::SETOm },
- { X86::SETPr, X86::SETPm },
- { X86::SETSr, X86::SETSm }
- };
-
- assert(CC <= LAST_VALID_COND && "Can only handle standard cond codes");
- return Opc[CC][HasMemoryOperand ? 1 : 0];
-}
-
-/// Return a cmov opcode for the given condition,
-/// register size in bytes, and operand type.
-unsigned X86::getCMovFromCond(CondCode CC, unsigned RegBytes,
- bool HasMemoryOperand) {
- static const uint16_t Opc[32][3] = {
- { X86::CMOVA16rr, X86::CMOVA32rr, X86::CMOVA64rr },
- { X86::CMOVAE16rr, X86::CMOVAE32rr, X86::CMOVAE64rr },
- { X86::CMOVB16rr, X86::CMOVB32rr, X86::CMOVB64rr },
- { X86::CMOVBE16rr, X86::CMOVBE32rr, X86::CMOVBE64rr },
- { X86::CMOVE16rr, X86::CMOVE32rr, X86::CMOVE64rr },
- { X86::CMOVG16rr, X86::CMOVG32rr, X86::CMOVG64rr },
- { X86::CMOVGE16rr, X86::CMOVGE32rr, X86::CMOVGE64rr },
- { X86::CMOVL16rr, X86::CMOVL32rr, X86::CMOVL64rr },
- { X86::CMOVLE16rr, X86::CMOVLE32rr, X86::CMOVLE64rr },
- { X86::CMOVNE16rr, X86::CMOVNE32rr, X86::CMOVNE64rr },
- { X86::CMOVNO16rr, X86::CMOVNO32rr, X86::CMOVNO64rr },
- { X86::CMOVNP16rr, X86::CMOVNP32rr, X86::CMOVNP64rr },
- { X86::CMOVNS16rr, X86::CMOVNS32rr, X86::CMOVNS64rr },
- { X86::CMOVO16rr, X86::CMOVO32rr, X86::CMOVO64rr },
- { X86::CMOVP16rr, X86::CMOVP32rr, X86::CMOVP64rr },
- { X86::CMOVS16rr, X86::CMOVS32rr, X86::CMOVS64rr },
- { X86::CMOVA16rm, X86::CMOVA32rm, X86::CMOVA64rm },
- { X86::CMOVAE16rm, X86::CMOVAE32rm, X86::CMOVAE64rm },
- { X86::CMOVB16rm, X86::CMOVB32rm, X86::CMOVB64rm },
- { X86::CMOVBE16rm, X86::CMOVBE32rm, X86::CMOVBE64rm },
- { X86::CMOVE16rm, X86::CMOVE32rm, X86::CMOVE64rm },
- { X86::CMOVG16rm, X86::CMOVG32rm, X86::CMOVG64rm },
- { X86::CMOVGE16rm, X86::CMOVGE32rm, X86::CMOVGE64rm },
- { X86::CMOVL16rm, X86::CMOVL32rm, X86::CMOVL64rm },
- { X86::CMOVLE16rm, X86::CMOVLE32rm, X86::CMOVLE64rm },
- { X86::CMOVNE16rm, X86::CMOVNE32rm, X86::CMOVNE64rm },
- { X86::CMOVNO16rm, X86::CMOVNO32rm, X86::CMOVNO64rm },
- { X86::CMOVNP16rm, X86::CMOVNP32rm, X86::CMOVNP64rm },
- { X86::CMOVNS16rm, X86::CMOVNS32rm, X86::CMOVNS64rm },
- { X86::CMOVO16rm, X86::CMOVO32rm, X86::CMOVO64rm },
- { X86::CMOVP16rm, X86::CMOVP32rm, X86::CMOVP64rm },
- { X86::CMOVS16rm, X86::CMOVS32rm, X86::CMOVS64rm }
- };
+/// Return a setcc opcode based on whether it has memory operand.
+unsigned X86::getSETOpc(bool HasMemoryOperand) {
+ return HasMemoryOperand ? X86::SETCCr : X86::SETCCm;
+}
- assert(CC < 16 && "Can only handle standard cond codes");
- unsigned Idx = HasMemoryOperand ? 16+CC : CC;
+/// Return a cmov opcode for the given register size in bytes, and operand type.
+unsigned X86::getCMovOpcode(unsigned RegBytes, bool HasMemoryOperand) {
switch(RegBytes) {
default: llvm_unreachable("Illegal register size!");
- case 2: return Opc[Idx][0];
- case 4: return Opc[Idx][1];
- case 8: return Opc[Idx][2];
+ case 2: return HasMemoryOperand ? X86::CMOV16rm : X86::CMOV16rr;
+ case 4: return HasMemoryOperand ? X86::CMOV32rm : X86::CMOV32rr;
+ case 8: return HasMemoryOperand ? X86::CMOV32rm : X86::CMOV64rr;
}
}
@@ -2490,7 +2320,7 @@ void X86InstrInfo::replaceBranchWithTailCall(
if (!I->isBranch())
assert(0 && "Can't find the branch to replace!");
- X86::CondCode CC = X86::getCondFromBranchOpc(I->getOpcode());
+ X86::CondCode CC = X86::getCondFromBranch(*I);
assert(BranchCond.size() == 1);
if (CC != BranchCond[0].getImm())
continue;
@@ -2597,13 +2427,13 @@ bool X86InstrInfo::AnalyzeBranchImpl(
}
// Handle conditional branches.
- X86::CondCode BranchCode = X86::getCondFromBranchOpc(I->getOpcode());
+ X86::CondCode BranchCode = X86::getCondFromBranch(*I);
if (BranchCode == X86::COND_INVALID)
return true; // Can't handle indirect branch.
// In practice we should never have an undef eflags operand, if we do
// abort here as we are not prepared to preserve the flag.
- if (I->getOperand(1).isUndef())
+ if (I->findRegisterUseOperand(X86::EFLAGS)->isUndef())
return true;
// Working from the bottom, handle the first conditional branch.
@@ -2629,11 +2459,11 @@ bool X86InstrInfo::AnalyzeBranchImpl(
// Which is a bit more efficient.
// We conditionally jump to the fall-through block.
BranchCode = GetOppositeBranchCondition(BranchCode);
- unsigned JNCC = GetCondBranchFromCond(BranchCode);
MachineBasicBlock::iterator OldInst = I;
- BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(JNCC))
- .addMBB(UnCondBrIter->getOperand(0).getMBB());
+ BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(X86::JCC_1))
+ .addMBB(UnCondBrIter->getOperand(0).getMBB())
+ .addImm(BranchCode);
BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(X86::JMP_1))
.addMBB(TargetBB);
@@ -2798,7 +2628,7 @@ unsigned X86InstrInfo::removeBranch(MachineBasicBlock &MBB,
if (I->isDebugInstr())
continue;
if (I->getOpcode() != X86::JMP_1 &&
- X86::getCondFromBranchOpc(I->getOpcode()) == X86::COND_INVALID)
+ X86::getCondFromBranch(*I) == X86::COND_INVALID)
break;
// Remove the branch.
I->eraseFromParent();
@@ -2837,9 +2667,9 @@ unsigned X86InstrInfo::insertBranch(MachineBasicBlock &MBB,
switch (CC) {
case X86::COND_NE_OR_P:
// Synthesize NE_OR_P with two branches.
- BuildMI(&MBB, DL, get(X86::JNE_1)).addMBB(TBB);
+ BuildMI(&MBB, DL, get(X86::JCC_1)).addMBB(TBB).addImm(X86::COND_NE);
++Count;
- BuildMI(&MBB, DL, get(X86::JP_1)).addMBB(TBB);
+ BuildMI(&MBB, DL, get(X86::JCC_1)).addMBB(TBB).addImm(X86::COND_P);
++Count;
break;
case X86::COND_E_AND_NP:
@@ -2850,14 +2680,13 @@ unsigned X86InstrInfo::insertBranch(MachineBasicBlock &MBB,
"body is a fall-through.");
}
// Synthesize COND_E_AND_NP with two branches.
- BuildMI(&MBB, DL, get(X86::JNE_1)).addMBB(FBB);
+ BuildMI(&MBB, DL, get(X86::JCC_1)).addMBB(FBB).addImm(X86::COND_NE);
++Count;
- BuildMI(&MBB, DL, get(X86::JNP_1)).addMBB(TBB);
+ BuildMI(&MBB, DL, get(X86::JCC_1)).addMBB(TBB).addImm(X86::COND_NP);
++Count;
break;
default: {
- unsigned Opc = GetCondBranchFromCond(CC);
- BuildMI(&MBB, DL, get(Opc)).addMBB(TBB);
+ BuildMI(&MBB, DL, get(X86::JCC_1)).addMBB(TBB).addImm(CC);
++Count;
}
}
@@ -2880,7 +2709,7 @@ canInsertSelect(const MachineBasicBlock &MBB,
if (Cond.size() != 1)
return false;
// We cannot do the composite conditions, at least not in SSA form.
- if ((X86::CondCode)Cond[0].getImm() > X86::COND_S)
+ if ((X86::CondCode)Cond[0].getImm() > X86::LAST_VALID_COND)
return false;
// Check register classes.
@@ -2915,10 +2744,12 @@ void X86InstrInfo::insertSelect(MachineBasicBlock &MBB,
const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
const TargetRegisterClass &RC = *MRI.getRegClass(DstReg);
assert(Cond.size() == 1 && "Invalid Cond array");
- unsigned Opc = getCMovFromCond((X86::CondCode)Cond[0].getImm(),
- TRI.getRegSizeInBits(RC) / 8,
- false /*HasMemoryOperand*/);
- BuildMI(MBB, I, DL, get(Opc), DstReg).addReg(FalseReg).addReg(TrueReg);
+ unsigned Opc = X86::getCMovOpcode(TRI.getRegSizeInBits(RC) / 8,
+ false /*HasMemoryOperand*/);
+ BuildMI(MBB, I, DL, get(Opc), DstReg)
+ .addReg(FalseReg)
+ .addReg(TrueReg)
+ .addImm(Cond[0].getImm());
}
/// Test if the given register is a physical h register.
@@ -2984,22 +2815,22 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg,
return X86::MMX_MOVD64to64rr;
}
- // SrcReg(FR32) -> DestReg(GR32)
- // SrcReg(GR32) -> DestReg(FR32)
+ // SrcReg(VR128) -> DestReg(GR32)
+ // SrcReg(GR32) -> DestReg(VR128)
if (X86::GR32RegClass.contains(DestReg) &&
- X86::FR32XRegClass.contains(SrcReg))
- // Copy from a FR32 register to a GR32 register.
- return HasAVX512 ? X86::VMOVSS2DIZrr :
- HasAVX ? X86::VMOVSS2DIrr :
- X86::MOVSS2DIrr;
+ X86::VR128XRegClass.contains(SrcReg))
+ // Copy from a VR128 register to a GR32 register.
+ return HasAVX512 ? X86::VMOVPDI2DIZrr :
+ HasAVX ? X86::VMOVPDI2DIrr :
+ X86::MOVPDI2DIrr;
- if (X86::FR32XRegClass.contains(DestReg) &&
+ if (X86::VR128XRegClass.contains(DestReg) &&
X86::GR32RegClass.contains(SrcReg))
- // Copy from a GR32 register to a FR32 register.
- return HasAVX512 ? X86::VMOVDI2SSZrr :
- HasAVX ? X86::VMOVDI2SSrr :
- X86::MOVDI2SSrr;
+ // Copy from a VR128 register to a VR128 register.
+ return HasAVX512 ? X86::VMOVDI2PDIZrr :
+ HasAVX ? X86::VMOVDI2PDIrr :
+ X86::MOVDI2PDIrr;
return 0;
}
@@ -3129,22 +2960,38 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg,
return load ? X86::MOV32rm : X86::MOV32mr;
if (X86::FR32XRegClass.hasSubClassEq(RC))
return load ?
- (HasAVX512 ? X86::VMOVSSZrm : HasAVX ? X86::VMOVSSrm : X86::MOVSSrm) :
- (HasAVX512 ? X86::VMOVSSZmr : HasAVX ? X86::VMOVSSmr : X86::MOVSSmr);
+ (HasAVX512 ? X86::VMOVSSZrm_alt :
+ HasAVX ? X86::VMOVSSrm_alt :
+ X86::MOVSSrm_alt) :
+ (HasAVX512 ? X86::VMOVSSZmr :
+ HasAVX ? X86::VMOVSSmr :
+ X86::MOVSSmr);
if (X86::RFP32RegClass.hasSubClassEq(RC))
return load ? X86::LD_Fp32m : X86::ST_Fp32m;
if (X86::VK32RegClass.hasSubClassEq(RC)) {
assert(STI.hasBWI() && "KMOVD requires BWI");
return load ? X86::KMOVDkm : X86::KMOVDmk;
}
+ // All of these mask pair classes have the same spill size, the same kind
+ // of kmov instructions can be used with all of them.
+ if (X86::VK1PAIRRegClass.hasSubClassEq(RC) ||
+ X86::VK2PAIRRegClass.hasSubClassEq(RC) ||
+ X86::VK4PAIRRegClass.hasSubClassEq(RC) ||
+ X86::VK8PAIRRegClass.hasSubClassEq(RC) ||
+ X86::VK16PAIRRegClass.hasSubClassEq(RC))
+ return load ? X86::MASKPAIR16LOAD : X86::MASKPAIR16STORE;
llvm_unreachable("Unknown 4-byte regclass");
case 8:
if (X86::GR64RegClass.hasSubClassEq(RC))
return load ? X86::MOV64rm : X86::MOV64mr;
if (X86::FR64XRegClass.hasSubClassEq(RC))
return load ?
- (HasAVX512 ? X86::VMOVSDZrm : HasAVX ? X86::VMOVSDrm : X86::MOVSDrm) :
- (HasAVX512 ? X86::VMOVSDZmr : HasAVX ? X86::VMOVSDmr : X86::MOVSDmr);
+ (HasAVX512 ? X86::VMOVSDZrm_alt :
+ HasAVX ? X86::VMOVSDrm_alt :
+ X86::MOVSDrm_alt) :
+ (HasAVX512 ? X86::VMOVSDZmr :
+ HasAVX ? X86::VMOVSDmr :
+ X86::MOVSDmr);
if (X86::VR64RegClass.hasSubClassEq(RC))
return load ? X86::MMX_MOVQ64rm : X86::MMX_MOVQ64mr;
if (X86::RFP64RegClass.hasSubClassEq(RC))
@@ -3219,7 +3066,7 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg,
}
bool X86InstrInfo::getMemOperandWithOffset(
- MachineInstr &MemOp, MachineOperand *&BaseOp, int64_t &Offset,
+ const MachineInstr &MemOp, const MachineOperand *&BaseOp, int64_t &Offset,
const TargetRegisterInfo *TRI) const {
const MCInstrDesc &Desc = MemOp.getDesc();
int MemRefBegin = X86II::getMemoryOperandNo(Desc.TSFlags);
@@ -3572,25 +3419,39 @@ inline static bool isDefConvertible(const MachineInstr &MI, bool &NoSignFlag) {
static X86::CondCode isUseDefConvertible(const MachineInstr &MI) {
switch (MI.getOpcode()) {
default: return X86::COND_INVALID;
- case X86::LZCNT16rr: case X86::LZCNT16rm:
- case X86::LZCNT32rr: case X86::LZCNT32rm:
- case X86::LZCNT64rr: case X86::LZCNT64rm:
+ case X86::NEG8r:
+ case X86::NEG16r:
+ case X86::NEG32r:
+ case X86::NEG64r:
+ return X86::COND_AE;
+ case X86::LZCNT16rr:
+ case X86::LZCNT32rr:
+ case X86::LZCNT64rr:
return X86::COND_B;
- case X86::POPCNT16rr:case X86::POPCNT16rm:
- case X86::POPCNT32rr:case X86::POPCNT32rm:
- case X86::POPCNT64rr:case X86::POPCNT64rm:
+ case X86::POPCNT16rr:
+ case X86::POPCNT32rr:
+ case X86::POPCNT64rr:
return X86::COND_E;
- case X86::TZCNT16rr: case X86::TZCNT16rm:
- case X86::TZCNT32rr: case X86::TZCNT32rm:
- case X86::TZCNT64rr: case X86::TZCNT64rm:
+ case X86::TZCNT16rr:
+ case X86::TZCNT32rr:
+ case X86::TZCNT64rr:
return X86::COND_B;
- case X86::BSF16rr: case X86::BSF16rm:
- case X86::BSF32rr: case X86::BSF32rm:
- case X86::BSF64rr: case X86::BSF64rm:
- case X86::BSR16rr: case X86::BSR16rm:
- case X86::BSR32rr: case X86::BSR32rm:
- case X86::BSR64rr: case X86::BSR64rm:
+ case X86::BSF16rr:
+ case X86::BSF32rr:
+ case X86::BSF64rr:
+ case X86::BSR16rr:
+ case X86::BSR32rr:
+ case X86::BSR64rr:
return X86::COND_E;
+ case X86::BLSI32rr:
+ case X86::BLSI64rr:
+ return X86::COND_AE;
+ case X86::BLSR32rr:
+ case X86::BLSR64rr:
+ case X86::BLSMSK32rr:
+ case X86::BLSMSK64rr:
+ return X86::COND_B;
+ // TODO: TBM instructions.
}
}
@@ -3602,7 +3463,6 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
int CmpValue,
const MachineRegisterInfo *MRI) const {
// Check whether we can replace SUB with CMP.
- unsigned NewOpcode = 0;
switch (CmpInstr.getOpcode()) {
default: break;
case X86::SUB64ri32:
@@ -3623,6 +3483,7 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
return false;
// There is no use of the destination register, we can replace SUB with CMP.
+ unsigned NewOpcode = 0;
switch (CmpInstr.getOpcode()) {
default: llvm_unreachable("Unreachable!");
case X86::SUB64rm: NewOpcode = X86::CMP64rm; break;
@@ -3746,7 +3607,7 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
// If we are done with the basic block, we need to check whether EFLAGS is
// live-out.
bool IsSafe = false;
- SmallVector<std::pair<MachineInstr*, unsigned /*NewOpc*/>, 4> OpsToUpdate;
+ SmallVector<std::pair<MachineInstr*, X86::CondCode>, 4> OpsToUpdate;
MachineBasicBlock::iterator E = CmpInstr.getParent()->end();
for (++I; I != E; ++I) {
const MachineInstr &Instr = *I;
@@ -3763,17 +3624,14 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
// EFLAGS is used by this instruction.
X86::CondCode OldCC = X86::COND_INVALID;
- bool OpcIsSET = false;
if (IsCmpZero || IsSwapped) {
// We decode the condition code from opcode.
if (Instr.isBranch())
- OldCC = X86::getCondFromBranchOpc(Instr.getOpcode());
+ OldCC = X86::getCondFromBranch(Instr);
else {
- OldCC = X86::getCondFromSETOpc(Instr.getOpcode());
- if (OldCC != X86::COND_INVALID)
- OpcIsSET = true;
- else
- OldCC = X86::getCondFromCMovOpc(Instr.getOpcode());
+ OldCC = X86::getCondFromSETCC(Instr);
+ if (OldCC == X86::COND_INVALID)
+ OldCC = X86::getCondFromCMov(Instr);
}
if (OldCC == X86::COND_INVALID) return false;
}
@@ -3818,24 +3676,10 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
}
if ((ShouldUpdateCC || IsSwapped) && ReplacementCC != OldCC) {
- // Synthesize the new opcode.
- bool HasMemoryOperand = Instr.hasOneMemOperand();
- unsigned NewOpc;
- if (Instr.isBranch())
- NewOpc = GetCondBranchFromCond(ReplacementCC);
- else if(OpcIsSET)
- NewOpc = getSETFromCond(ReplacementCC, HasMemoryOperand);
- else {
- unsigned DstReg = Instr.getOperand(0).getReg();
- const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg);
- NewOpc = getCMovFromCond(ReplacementCC, TRI->getRegSizeInBits(*DstRC)/8,
- HasMemoryOperand);
- }
-
// Push the MachineInstr to OpsToUpdate.
// If it is safe to remove CmpInstr, the condition code of these
// instructions will be modified.
- OpsToUpdate.push_back(std::make_pair(&*I, NewOpc));
+ OpsToUpdate.push_back(std::make_pair(&*I, ReplacementCC));
}
if (ModifyEFLAGS || Instr.killsRegister(X86::EFLAGS, TRI)) {
// It is safe to remove CmpInstr if EFLAGS is updated again or killed.
@@ -3876,21 +3720,17 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
}
// Make sure Sub instruction defines EFLAGS and mark the def live.
- unsigned i = 0, e = Sub->getNumOperands();
- for (; i != e; ++i) {
- MachineOperand &MO = Sub->getOperand(i);
- if (MO.isReg() && MO.isDef() && MO.getReg() == X86::EFLAGS) {
- MO.setIsDead(false);
- break;
- }
- }
- assert(i != e && "Unable to locate a def EFLAGS operand");
+ MachineOperand *FlagDef = Sub->findRegisterDefOperand(X86::EFLAGS);
+ assert(FlagDef && "Unable to locate a def EFLAGS operand");
+ FlagDef->setIsDead(false);
CmpInstr.eraseFromParent();
// Modify the condition code of instructions in OpsToUpdate.
- for (auto &Op : OpsToUpdate)
- Op.first->setDesc(get(Op.second));
+ for (auto &Op : OpsToUpdate) {
+ Op.first->getOperand(Op.first->getDesc().getNumOperands() - 1)
+ .setImm(Op.second);
+ }
return true;
}
@@ -4128,6 +3968,20 @@ static bool expandNOVLXStore(MachineInstrBuilder &MIB,
return true;
}
+
+static bool expandSHXDROT(MachineInstrBuilder &MIB, const MCInstrDesc &Desc) {
+ MIB->setDesc(Desc);
+ int64_t ShiftAmt = MIB->getOperand(2).getImm();
+ // Temporarily remove the immediate so we can add another source register.
+ MIB->RemoveOperand(2);
+ // Add the register. Don't copy the kill flag if there is one.
+ MIB.addReg(MIB->getOperand(1).getReg(),
+ getUndefRegState(MIB->getOperand(1).isUndef()));
+ // Add back the immediate.
+ MIB.addImm(ShiftAmt);
+ return true;
+}
+
bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
bool HasAVX = Subtarget.hasAVX();
MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
@@ -4193,6 +4047,12 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
MIB.addReg(SrcReg, RegState::ImplicitDefine);
return true;
}
+ if (MI.getOpcode() == X86::AVX512_256_SET0) {
+ // No VLX so we must reference a zmm.
+ unsigned ZReg =
+ TRI->getMatchingSuperReg(SrcReg, X86::sub_ymm, &X86::VR512RegClass);
+ MIB->getOperand(0).setReg(ZReg);
+ }
return Expand2AddrUndef(MIB, get(X86::VPXORDZrr));
}
case X86::V_SETALLONES:
@@ -4282,6 +4142,21 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
case X86::XOR64_FP:
case X86::XOR32_FP:
return expandXorFP(MIB, *this);
+ case X86::SHLDROT32ri: return expandSHXDROT(MIB, get(X86::SHLD32rri8));
+ case X86::SHLDROT64ri: return expandSHXDROT(MIB, get(X86::SHLD64rri8));
+ case X86::SHRDROT32ri: return expandSHXDROT(MIB, get(X86::SHRD32rri8));
+ case X86::SHRDROT64ri: return expandSHXDROT(MIB, get(X86::SHRD64rri8));
+ case X86::ADD8rr_DB: MIB->setDesc(get(X86::OR8rr)); break;
+ case X86::ADD16rr_DB: MIB->setDesc(get(X86::OR16rr)); break;
+ case X86::ADD32rr_DB: MIB->setDesc(get(X86::OR32rr)); break;
+ case X86::ADD64rr_DB: MIB->setDesc(get(X86::OR64rr)); break;
+ case X86::ADD8ri_DB: MIB->setDesc(get(X86::OR8ri)); break;
+ case X86::ADD16ri_DB: MIB->setDesc(get(X86::OR16ri)); break;
+ case X86::ADD32ri_DB: MIB->setDesc(get(X86::OR32ri)); break;
+ case X86::ADD64ri32_DB: MIB->setDesc(get(X86::OR64ri32)); break;
+ case X86::ADD16ri8_DB: MIB->setDesc(get(X86::OR16ri8)); break;
+ case X86::ADD32ri8_DB: MIB->setDesc(get(X86::OR32ri8)); break;
+ case X86::ADD64ri8_DB: MIB->setDesc(get(X86::OR64ri8)); break;
}
return false;
}
@@ -4303,7 +4178,8 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
/// FIXME: This should be turned into a TSFlags.
///
static bool hasPartialRegUpdate(unsigned Opcode,
- const X86Subtarget &Subtarget) {
+ const X86Subtarget &Subtarget,
+ bool ForLoadFold = false) {
switch (Opcode) {
case X86::CVTSI2SSrr:
case X86::CVTSI2SSrm:
@@ -4313,6 +4189,9 @@ static bool hasPartialRegUpdate(unsigned Opcode,
case X86::CVTSI2SDrm:
case X86::CVTSI642SDrr:
case X86::CVTSI642SDrm:
+ // Load folding won't effect the undef register update since the input is
+ // a GPR.
+ return !ForLoadFold;
case X86::CVTSD2SSrr:
case X86::CVTSD2SSrm:
case X86::CVTSS2SDrr:
@@ -4389,7 +4268,7 @@ unsigned X86InstrInfo::getPartialRegUpdateClearance(
// Return true for any instruction the copies the high bits of the first source
// operand into the unused high bits of the destination operand.
-static bool hasUndefRegUpdate(unsigned Opcode) {
+static bool hasUndefRegUpdate(unsigned Opcode, bool ForLoadFold = false) {
switch (Opcode) {
case X86::VCVTSI2SSrr:
case X86::VCVTSI2SSrm:
@@ -4407,38 +4286,6 @@ static bool hasUndefRegUpdate(unsigned Opcode) {
case X86::VCVTSI642SDrm:
case X86::VCVTSI642SDrr_Int:
case X86::VCVTSI642SDrm_Int:
- case X86::VCVTSD2SSrr:
- case X86::VCVTSD2SSrm:
- case X86::VCVTSD2SSrr_Int:
- case X86::VCVTSD2SSrm_Int:
- case X86::VCVTSS2SDrr:
- case X86::VCVTSS2SDrm:
- case X86::VCVTSS2SDrr_Int:
- case X86::VCVTSS2SDrm_Int:
- case X86::VRCPSSr:
- case X86::VRCPSSr_Int:
- case X86::VRCPSSm:
- case X86::VRCPSSm_Int:
- case X86::VROUNDSDr:
- case X86::VROUNDSDm:
- case X86::VROUNDSDr_Int:
- case X86::VROUNDSDm_Int:
- case X86::VROUNDSSr:
- case X86::VROUNDSSm:
- case X86::VROUNDSSr_Int:
- case X86::VROUNDSSm_Int:
- case X86::VRSQRTSSr:
- case X86::VRSQRTSSr_Int:
- case X86::VRSQRTSSm:
- case X86::VRSQRTSSm_Int:
- case X86::VSQRTSSr:
- case X86::VSQRTSSr_Int:
- case X86::VSQRTSSm:
- case X86::VSQRTSSm_Int:
- case X86::VSQRTSDr:
- case X86::VSQRTSDr_Int:
- case X86::VSQRTSDm:
- case X86::VSQRTSDm_Int:
// AVX-512
case X86::VCVTSI2SSZrr:
case X86::VCVTSI2SSZrm:
@@ -4453,7 +4300,6 @@ static bool hasUndefRegUpdate(unsigned Opcode) {
case X86::VCVTSI2SDZrr:
case X86::VCVTSI2SDZrm:
case X86::VCVTSI2SDZrr_Int:
- case X86::VCVTSI2SDZrrb_Int:
case X86::VCVTSI2SDZrm_Int:
case X86::VCVTSI642SDZrr:
case X86::VCVTSI642SDZrm:
@@ -4479,6 +4325,42 @@ static bool hasUndefRegUpdate(unsigned Opcode) {
case X86::VCVTUSI642SDZrr_Int:
case X86::VCVTUSI642SDZrrb_Int:
case X86::VCVTUSI642SDZrm_Int:
+ // Load folding won't effect the undef register update since the input is
+ // a GPR.
+ return !ForLoadFold;
+ case X86::VCVTSD2SSrr:
+ case X86::VCVTSD2SSrm:
+ case X86::VCVTSD2SSrr_Int:
+ case X86::VCVTSD2SSrm_Int:
+ case X86::VCVTSS2SDrr:
+ case X86::VCVTSS2SDrm:
+ case X86::VCVTSS2SDrr_Int:
+ case X86::VCVTSS2SDrm_Int:
+ case X86::VRCPSSr:
+ case X86::VRCPSSr_Int:
+ case X86::VRCPSSm:
+ case X86::VRCPSSm_Int:
+ case X86::VROUNDSDr:
+ case X86::VROUNDSDm:
+ case X86::VROUNDSDr_Int:
+ case X86::VROUNDSDm_Int:
+ case X86::VROUNDSSr:
+ case X86::VROUNDSSm:
+ case X86::VROUNDSSr_Int:
+ case X86::VROUNDSSm_Int:
+ case X86::VRSQRTSSr:
+ case X86::VRSQRTSSr_Int:
+ case X86::VRSQRTSSm:
+ case X86::VRSQRTSSm_Int:
+ case X86::VSQRTSSr:
+ case X86::VSQRTSSr_Int:
+ case X86::VSQRTSSm:
+ case X86::VSQRTSSm_Int:
+ case X86::VSQRTSDr:
+ case X86::VSQRTSDr_Int:
+ case X86::VSQRTSDm:
+ case X86::VSQRTSDm_Int:
+ // AVX-512
case X86::VCVTSD2SSZrr:
case X86::VCVTSD2SSZrr_Int:
case X86::VCVTSD2SSZrrb_Int:
@@ -4759,7 +4641,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandCustom(
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum, &RI, MF);
unsigned RCSize = TRI.getRegSizeInBits(*RC) / 8;
- if (Size <= RCSize && 4 <= Align) {
+ if ((Size == 0 || Size >= 16) && RCSize >= 16 && 4 <= Align) {
int PtrOffset = SrcIdx * 4;
unsigned NewImm = (DstIdx << 4) | ZMask;
unsigned NewOpCode =
@@ -4783,7 +4665,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandCustom(
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum, &RI, MF);
unsigned RCSize = TRI.getRegSizeInBits(*RC) / 8;
- if (Size <= RCSize && 8 <= Align) {
+ if ((Size == 0 || Size >= 16) && RCSize >= 16 && 8 <= Align) {
unsigned NewOpCode =
(MI.getOpcode() == X86::VMOVHLPSZrr) ? X86::VMOVLPSZ128rm :
(MI.getOpcode() == X86::VMOVHLPSrr) ? X86::VMOVLPSrm :
@@ -4794,13 +4676,29 @@ MachineInstr *X86InstrInfo::foldMemoryOperandCustom(
}
}
break;
- };
+ case X86::UNPCKLPDrr:
+ // If we won't be able to fold this to the memory form of UNPCKL, use
+ // MOVHPD instead. Done as custom because we can't have this in the load
+ // table twice.
+ if (OpNum == 2) {
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+ const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum, &RI, MF);
+ unsigned RCSize = TRI.getRegSizeInBits(*RC) / 8;
+ if ((Size == 0 || Size >= 16) && RCSize >= 16 && Align < 16) {
+ MachineInstr *NewMI =
+ FuseInst(MF, X86::MOVHPDrm, OpNum, MOs, InsertPt, MI, *this);
+ return NewMI;
+ }
+ }
+ break;
+ }
return nullptr;
}
-static bool shouldPreventUndefRegUpdateMemFold(MachineFunction &MF, MachineInstr &MI) {
- if (MF.getFunction().optForSize() || !hasUndefRegUpdate(MI.getOpcode()) ||
+static bool shouldPreventUndefRegUpdateMemFold(MachineFunction &MF,
+ MachineInstr &MI) {
+ if (!hasUndefRegUpdate(MI.getOpcode(), /*ForLoadFold*/true) ||
!MI.getOperand(1).isReg())
return false;
@@ -4828,15 +4726,15 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
// For CPUs that favor the register form of a call or push,
// do not fold loads into calls or pushes, unless optimizing for size
// aggressively.
- if (isSlowTwoMemOps && !MF.getFunction().optForMinSize() &&
+ if (isSlowTwoMemOps && !MF.getFunction().hasMinSize() &&
(MI.getOpcode() == X86::CALL32r || MI.getOpcode() == X86::CALL64r ||
MI.getOpcode() == X86::PUSH16r || MI.getOpcode() == X86::PUSH32r ||
MI.getOpcode() == X86::PUSH64r))
return nullptr;
// Avoid partial and undef register update stalls unless optimizing for size.
- if (!MF.getFunction().optForSize() &&
- (hasPartialRegUpdate(MI.getOpcode(), Subtarget) ||
+ if (!MF.getFunction().hasOptSize() &&
+ (hasPartialRegUpdate(MI.getOpcode(), Subtarget, /*ForLoadFold*/true) ||
shouldPreventUndefRegUpdateMemFold(MF, MI)))
return nullptr;
@@ -4899,6 +4797,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
&RI, MF);
unsigned RCSize = TRI.getRegSizeInBits(*RC) / 8;
if (Size < RCSize) {
+ // FIXME: Allow scalar intrinsic instructions like ADDSSrm_Int.
// Check if it's safe to fold the load. If the size of the object is
// narrower than the load width, then it's not.
if (Opcode != X86::MOV64rm || RCSize != 8 || Size != 4)
@@ -4937,9 +4836,9 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
unsigned CommuteOpIdx1 = OpNum, CommuteOpIdx2 = CommuteAnyOperandIndex;
if (findCommutedOpIndices(MI, CommuteOpIdx1, CommuteOpIdx2)) {
bool HasDef = MI.getDesc().getNumDefs();
- unsigned Reg0 = HasDef ? MI.getOperand(0).getReg() : 0;
- unsigned Reg1 = MI.getOperand(CommuteOpIdx1).getReg();
- unsigned Reg2 = MI.getOperand(CommuteOpIdx2).getReg();
+ Register Reg0 = HasDef ? MI.getOperand(0).getReg() : Register();
+ Register Reg1 = MI.getOperand(CommuteOpIdx1).getReg();
+ Register Reg2 = MI.getOperand(CommuteOpIdx2).getReg();
bool Tied1 =
0 == MI.getDesc().getOperandConstraint(CommuteOpIdx1, MCOI::TIED_TO);
bool Tied2 =
@@ -4997,14 +4896,15 @@ MachineInstr *
X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
ArrayRef<unsigned> Ops,
MachineBasicBlock::iterator InsertPt,
- int FrameIndex, LiveIntervals *LIS) const {
+ int FrameIndex, LiveIntervals *LIS,
+ VirtRegMap *VRM) const {
// Check switch flag
if (NoFusing)
return nullptr;
// Avoid partial and undef register update stalls unless optimizing for size.
- if (!MF.getFunction().optForSize() &&
- (hasPartialRegUpdate(MI.getOpcode(), Subtarget) ||
+ if (!MF.getFunction().hasOptSize() &&
+ (hasPartialRegUpdate(MI.getOpcode(), Subtarget, /*ForLoadFold*/true) ||
shouldPreventUndefRegUpdateMemFold(MF, MI)))
return nullptr;
@@ -5073,7 +4973,9 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI,
MF.getRegInfo().getRegClass(LoadMI.getOperand(0).getReg());
unsigned RegSize = TRI.getRegSizeInBits(*RC);
- if ((Opc == X86::MOVSSrm || Opc == X86::VMOVSSrm || Opc == X86::VMOVSSZrm) &&
+ if ((Opc == X86::MOVSSrm || Opc == X86::VMOVSSrm || Opc == X86::VMOVSSZrm ||
+ Opc == X86::MOVSSrm_alt || Opc == X86::VMOVSSrm_alt ||
+ Opc == X86::VMOVSSZrm_alt) &&
RegSize > 32) {
// These instructions only load 32 bits, we can't fold them if the
// destination register is wider than 32 bits (4 bytes), and its user
@@ -5087,6 +4989,7 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI,
case X86::MULSSrr_Int: case X86::VMULSSrr_Int: case X86::VMULSSZrr_Int:
case X86::SUBSSrr_Int: case X86::VSUBSSrr_Int: case X86::VSUBSSZrr_Int:
case X86::VADDSSZrr_Intk: case X86::VADDSSZrr_Intkz:
+ case X86::VCMPSSZrr_Intk:
case X86::VDIVSSZrr_Intk: case X86::VDIVSSZrr_Intkz:
case X86::VMAXSSZrr_Intk: case X86::VMAXSSZrr_Intkz:
case X86::VMINSSZrr_Intk: case X86::VMINSSZrr_Intkz:
@@ -5124,7 +5027,9 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI,
}
}
- if ((Opc == X86::MOVSDrm || Opc == X86::VMOVSDrm || Opc == X86::VMOVSDZrm) &&
+ if ((Opc == X86::MOVSDrm || Opc == X86::VMOVSDrm || Opc == X86::VMOVSDZrm ||
+ Opc == X86::MOVSDrm_alt || Opc == X86::VMOVSDrm_alt ||
+ Opc == X86::VMOVSDZrm_alt) &&
RegSize > 64) {
// These instructions only load 64 bits, we can't fold them if the
// destination register is wider than 64 bits (8 bytes), and its user
@@ -5138,6 +5043,7 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI,
case X86::MULSDrr_Int: case X86::VMULSDrr_Int: case X86::VMULSDZrr_Int:
case X86::SUBSDrr_Int: case X86::VSUBSDrr_Int: case X86::VSUBSDZrr_Int:
case X86::VADDSDZrr_Intk: case X86::VADDSDZrr_Intkz:
+ case X86::VCMPSDZrr_Intk:
case X86::VDIVSDZrr_Intk: case X86::VDIVSDZrr_Intkz:
case X86::VMAXSDZrr_Intk: case X86::VMAXSDZrr_Intkz:
case X86::VMINSDZrr_Intk: case X86::VMINSDZrr_Intkz:
@@ -5203,8 +5109,8 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
if (NoFusing) return nullptr;
// Avoid partial and undef register update stalls unless optimizing for size.
- if (!MF.getFunction().optForSize() &&
- (hasPartialRegUpdate(MI.getOpcode(), Subtarget) ||
+ if (!MF.getFunction().hasOptSize() &&
+ (hasPartialRegUpdate(MI.getOpcode(), Subtarget, /*ForLoadFold*/true) ||
shouldPreventUndefRegUpdateMemFold(MF, MI)))
return nullptr;
@@ -5359,10 +5265,7 @@ extractLoadMMOs(ArrayRef<MachineMemOperand *> MMOs, MachineFunction &MF) {
} else {
// Clone the MMO and unset the store flag.
LoadMMOs.push_back(MF.getMachineMemOperand(
- MMO->getPointerInfo(), MMO->getFlags() & ~MachineMemOperand::MOStore,
- MMO->getSize(), MMO->getBaseAlignment(), MMO->getAAInfo(), nullptr,
- MMO->getSyncScopeID(), MMO->getOrdering(),
- MMO->getFailureOrdering()));
+ MMO, MMO->getFlags() & ~MachineMemOperand::MOStore));
}
}
@@ -5383,10 +5286,7 @@ extractStoreMMOs(ArrayRef<MachineMemOperand *> MMOs, MachineFunction &MF) {
} else {
// Clone the MMO and unset the load flag.
StoreMMOs.push_back(MF.getMachineMemOperand(
- MMO->getPointerInfo(), MMO->getFlags() & ~MachineMemOperand::MOLoad,
- MMO->getSize(), MMO->getBaseAlignment(), MMO->getAAInfo(), nullptr,
- MMO->getSyncScopeID(), MMO->getOrdering(),
- MMO->getFailureOrdering()));
+ MMO, MMO->getFlags() & ~MachineMemOperand::MOLoad));
}
}
@@ -5668,7 +5568,9 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
case X86::LD_Fp64m:
case X86::LD_Fp80m:
case X86::MOVSSrm:
+ case X86::MOVSSrm_alt:
case X86::MOVSDrm:
+ case X86::MOVSDrm_alt:
case X86::MMX_MOVD64rm:
case X86::MMX_MOVQ64rm:
case X86::MOVAPSrm:
@@ -5679,7 +5581,9 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
case X86::MOVDQUrm:
// AVX load instructions
case X86::VMOVSSrm:
+ case X86::VMOVSSrm_alt:
case X86::VMOVSDrm:
+ case X86::VMOVSDrm_alt:
case X86::VMOVAPSrm:
case X86::VMOVUPSrm:
case X86::VMOVAPDrm:
@@ -5694,7 +5598,9 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
case X86::VMOVDQUYrm:
// AVX512 load instructions
case X86::VMOVSSZrm:
+ case X86::VMOVSSZrm_alt:
case X86::VMOVSDZrm:
+ case X86::VMOVSDZrm_alt:
case X86::VMOVAPSZ128rm:
case X86::VMOVUPSZ128rm:
case X86::VMOVAPSZ128rm_NOVLX:
@@ -5745,7 +5651,9 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
case X86::LD_Fp64m:
case X86::LD_Fp80m:
case X86::MOVSSrm:
+ case X86::MOVSSrm_alt:
case X86::MOVSDrm:
+ case X86::MOVSDrm_alt:
case X86::MMX_MOVD64rm:
case X86::MMX_MOVQ64rm:
case X86::MOVAPSrm:
@@ -5756,7 +5664,9 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
case X86::MOVDQUrm:
// AVX load instructions
case X86::VMOVSSrm:
+ case X86::VMOVSSrm_alt:
case X86::VMOVSDrm:
+ case X86::VMOVSDrm_alt:
case X86::VMOVAPSrm:
case X86::VMOVUPSrm:
case X86::VMOVAPDrm:
@@ -5771,7 +5681,9 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
case X86::VMOVDQUYrm:
// AVX512 load instructions
case X86::VMOVSSZrm:
+ case X86::VMOVSSZrm_alt:
case X86::VMOVSDZrm:
+ case X86::VMOVSDZrm_alt:
case X86::VMOVAPSZ128rm:
case X86::VMOVUPSZ128rm:
case X86::VMOVAPSZ128rm_NOVLX:
@@ -5943,7 +5855,9 @@ static const uint16_t ReplaceableInstrs[][3] = {
{ X86::MOVSDmr, X86::MOVSDmr, X86::MOVPQI2QImr },
{ X86::MOVSSmr, X86::MOVSSmr, X86::MOVPDI2DImr },
{ X86::MOVSDrm, X86::MOVSDrm, X86::MOVQI2PQIrm },
+ { X86::MOVSDrm_alt,X86::MOVSDrm_alt,X86::MOVQI2PQIrm },
{ X86::MOVSSrm, X86::MOVSSrm, X86::MOVDI2PDIrm },
+ { X86::MOVSSrm_alt,X86::MOVSSrm_alt,X86::MOVDI2PDIrm },
{ X86::MOVNTPSmr, X86::MOVNTPDmr, X86::MOVNTDQmr },
{ X86::ANDNPSrm, X86::ANDNPDrm, X86::PANDNrm },
{ X86::ANDNPSrr, X86::ANDNPDrr, X86::PANDNrr },
@@ -5973,7 +5887,9 @@ static const uint16_t ReplaceableInstrs[][3] = {
{ X86::VMOVSDmr, X86::VMOVSDmr, X86::VMOVPQI2QImr },
{ X86::VMOVSSmr, X86::VMOVSSmr, X86::VMOVPDI2DImr },
{ X86::VMOVSDrm, X86::VMOVSDrm, X86::VMOVQI2PQIrm },
+ { X86::VMOVSDrm_alt,X86::VMOVSDrm_alt,X86::VMOVQI2PQIrm },
{ X86::VMOVSSrm, X86::VMOVSSrm, X86::VMOVDI2PDIrm },
+ { X86::VMOVSSrm_alt,X86::VMOVSSrm_alt,X86::VMOVDI2PDIrm },
{ X86::VMOVNTPSmr, X86::VMOVNTPDmr, X86::VMOVNTDQmr },
{ X86::VANDNPSrm, X86::VANDNPDrm, X86::VPANDNrm },
{ X86::VANDNPSrr, X86::VANDNPDrr, X86::VPANDNrr },
@@ -6012,13 +5928,17 @@ static const uint16_t ReplaceableInstrs[][3] = {
{ X86::VMOVSDZmr, X86::VMOVSDZmr, X86::VMOVPQI2QIZmr },
{ X86::VMOVSSZmr, X86::VMOVSSZmr, X86::VMOVPDI2DIZmr },
{ X86::VMOVSDZrm, X86::VMOVSDZrm, X86::VMOVQI2PQIZrm },
+ { X86::VMOVSDZrm_alt, X86::VMOVSDZrm_alt, X86::VMOVQI2PQIZrm },
{ X86::VMOVSSZrm, X86::VMOVSSZrm, X86::VMOVDI2PDIZrm },
+ { X86::VMOVSSZrm_alt, X86::VMOVSSZrm_alt, X86::VMOVDI2PDIZrm },
{ X86::VBROADCASTSSZ128r, X86::VBROADCASTSSZ128r, X86::VPBROADCASTDZ128r },
{ X86::VBROADCASTSSZ128m, X86::VBROADCASTSSZ128m, X86::VPBROADCASTDZ128m },
{ X86::VBROADCASTSSZ256r, X86::VBROADCASTSSZ256r, X86::VPBROADCASTDZ256r },
{ X86::VBROADCASTSSZ256m, X86::VBROADCASTSSZ256m, X86::VPBROADCASTDZ256m },
{ X86::VBROADCASTSSZr, X86::VBROADCASTSSZr, X86::VPBROADCASTDZr },
{ X86::VBROADCASTSSZm, X86::VBROADCASTSSZm, X86::VPBROADCASTDZm },
+ { X86::VMOVDDUPZ128rr, X86::VMOVDDUPZ128rr, X86::VPBROADCASTQZ128r },
+ { X86::VMOVDDUPZ128rm, X86::VMOVDDUPZ128rm, X86::VPBROADCASTQZ128m },
{ X86::VBROADCASTSDZ256r, X86::VBROADCASTSDZ256r, X86::VPBROADCASTQZ256r },
{ X86::VBROADCASTSDZ256m, X86::VBROADCASTSDZ256m, X86::VPBROADCASTQZ256m },
{ X86::VBROADCASTSDZr, X86::VBROADCASTSDZr, X86::VPBROADCASTQZr },
@@ -6109,6 +6029,8 @@ static const uint16_t ReplaceableInstrsAVX2[][3] = {
{ X86::VPERM2F128rr, X86::VPERM2F128rr, X86::VPERM2I128rr },
{ X86::VBROADCASTSSrm, X86::VBROADCASTSSrm, X86::VPBROADCASTDrm},
{ X86::VBROADCASTSSrr, X86::VBROADCASTSSrr, X86::VPBROADCASTDrr},
+ { X86::VMOVDDUPrm, X86::VMOVDDUPrm, X86::VPBROADCASTQrm},
+ { X86::VMOVDDUPrr, X86::VMOVDDUPrr, X86::VPBROADCASTQrr},
{ X86::VBROADCASTSSYrr, X86::VBROADCASTSSYrr, X86::VPBROADCASTDYrr},
{ X86::VBROADCASTSSYrm, X86::VBROADCASTSSYrm, X86::VPBROADCASTDYrm},
{ X86::VBROADCASTSDYrr, X86::VBROADCASTSDYrr, X86::VPBROADCASTQYrr},
@@ -6128,6 +6050,19 @@ static const uint16_t ReplaceableInstrsAVX2[][3] = {
{ X86::VUNPCKHPSYrr, X86::VUNPCKHPSYrr, X86::VPUNPCKHDQYrr },
};
+static const uint16_t ReplaceableInstrsFP[][3] = {
+ //PackedSingle PackedDouble
+ { X86::MOVLPSrm, X86::MOVLPDrm, X86::INSTRUCTION_LIST_END },
+ { X86::MOVHPSrm, X86::MOVHPDrm, X86::INSTRUCTION_LIST_END },
+ { X86::MOVHPSmr, X86::MOVHPDmr, X86::INSTRUCTION_LIST_END },
+ { X86::VMOVLPSrm, X86::VMOVLPDrm, X86::INSTRUCTION_LIST_END },
+ { X86::VMOVHPSrm, X86::VMOVHPDrm, X86::INSTRUCTION_LIST_END },
+ { X86::VMOVHPSmr, X86::VMOVHPDmr, X86::INSTRUCTION_LIST_END },
+ { X86::VMOVLPSZ128rm, X86::VMOVLPDZ128rm, X86::INSTRUCTION_LIST_END },
+ { X86::VMOVHPSZ128rm, X86::VMOVHPDZ128rm, X86::INSTRUCTION_LIST_END },
+ { X86::VMOVHPSZ128mr, X86::VMOVHPDZ128mr, X86::INSTRUCTION_LIST_END },
+};
+
static const uint16_t ReplaceableInstrsAVX2InsertExtract[][3] = {
//PackedSingle PackedDouble PackedInt
{ X86::VEXTRACTF128mr, X86::VEXTRACTF128mr, X86::VEXTRACTI128mr },
@@ -6368,7 +6303,7 @@ static const uint16_t ReplaceableInstrsAVX512DQMasked[][4] = {
};
// NOTE: These should only be used by the custom domain methods.
-static const uint16_t ReplaceableCustomInstrs[][3] = {
+static const uint16_t ReplaceableBlendInstrs[][3] = {
//PackedSingle PackedDouble PackedInt
{ X86::BLENDPSrmi, X86::BLENDPDrmi, X86::PBLENDWrmi },
{ X86::BLENDPSrri, X86::BLENDPDrri, X86::PBLENDWrri },
@@ -6377,7 +6312,7 @@ static const uint16_t ReplaceableCustomInstrs[][3] = {
{ X86::VBLENDPSYrmi, X86::VBLENDPDYrmi, X86::VPBLENDWYrmi },
{ X86::VBLENDPSYrri, X86::VBLENDPDYrri, X86::VPBLENDWYrri },
};
-static const uint16_t ReplaceableCustomAVX2Instrs[][3] = {
+static const uint16_t ReplaceableBlendAVX2Instrs[][3] = {
//PackedSingle PackedDouble PackedInt
{ X86::VBLENDPSrmi, X86::VBLENDPDrmi, X86::VPBLENDDrmi },
{ X86::VBLENDPSrri, X86::VBLENDPDrri, X86::VPBLENDDrri },
@@ -6552,6 +6487,8 @@ uint16_t X86InstrInfo::getExecutionDomainCustom(const MachineInstr &MI) const {
MI.getOperand(2).getSubReg() == 0)
return 0x6;
return 0;
+ case X86::SHUFPDrri:
+ return 0x6;
}
return 0;
}
@@ -6571,9 +6508,9 @@ bool X86InstrInfo::setExecutionDomainCustom(MachineInstr &MI,
Imm = (ImmWidth == 16 ? ((Imm << 8) | Imm) : Imm);
unsigned NewImm = Imm;
- const uint16_t *table = lookup(Opcode, dom, ReplaceableCustomInstrs);
+ const uint16_t *table = lookup(Opcode, dom, ReplaceableBlendInstrs);
if (!table)
- table = lookup(Opcode, dom, ReplaceableCustomAVX2Instrs);
+ table = lookup(Opcode, dom, ReplaceableBlendAVX2Instrs);
if (Domain == 1) { // PackedSingle
AdjustBlendMask(Imm, ImmWidth, Is256 ? 8 : 4, &NewImm);
@@ -6583,7 +6520,7 @@ bool X86InstrInfo::setExecutionDomainCustom(MachineInstr &MI,
if (Subtarget.hasAVX2()) {
// If we are already VPBLENDW use that, else use VPBLENDD.
if ((ImmWidth / (Is256 ? 2 : 1)) != 8) {
- table = lookup(Opcode, dom, ReplaceableCustomAVX2Instrs);
+ table = lookup(Opcode, dom, ReplaceableBlendAVX2Instrs);
AdjustBlendMask(Imm, ImmWidth, Is256 ? 8 : 4, &NewImm);
}
} else {
@@ -6672,6 +6609,18 @@ bool X86InstrInfo::setExecutionDomainCustom(MachineInstr &MI,
// We must always return true for MOVHLPSrr.
if (Opcode == X86::MOVHLPSrr)
return true;
+ break;
+ case X86::SHUFPDrri: {
+ if (Domain == 1) {
+ unsigned Imm = MI.getOperand(3).getImm();
+ unsigned NewImm = 0x44;
+ if (Imm & 1) NewImm |= 0x0a;
+ if (Imm & 2) NewImm |= 0xa0;
+ MI.getOperand(3).setImm(NewImm);
+ MI.setDesc(get(X86::SHUFPSrri));
+ }
+ return true;
+ }
}
return false;
}
@@ -6691,6 +6640,8 @@ X86InstrInfo::getExecutionDomain(const MachineInstr &MI) const {
validDomains = 0xe;
} else if (lookup(opcode, domain, ReplaceableInstrsAVX2)) {
validDomains = Subtarget.hasAVX2() ? 0xe : 0x6;
+ } else if (lookup(opcode, domain, ReplaceableInstrsFP)) {
+ validDomains = 0x6;
} else if (lookup(opcode, domain, ReplaceableInstrsAVX2InsertExtract)) {
// Insert/extract instructions should only effect domain if AVX2
// is enabled.
@@ -6730,6 +6681,11 @@ void X86InstrInfo::setExecutionDomain(MachineInstr &MI, unsigned Domain) const {
"256-bit vector operations only available in AVX2");
table = lookup(MI.getOpcode(), dom, ReplaceableInstrsAVX2);
}
+ if (!table) { // try the FP table
+ table = lookup(MI.getOpcode(), dom, ReplaceableInstrsFP);
+ assert((!table || Domain < 3) &&
+ "Can only select PackedSingle or PackedDouble");
+ }
if (!table) { // try the other table
assert(Subtarget.hasAVX2() &&
"256-bit insert/extract only available in AVX2");
@@ -7140,6 +7096,20 @@ bool X86InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const {
case X86::PADDWrr:
case X86::PADDDrr:
case X86::PADDQrr:
+ case X86::PMULLWrr:
+ case X86::PMULLDrr:
+ case X86::PMAXSBrr:
+ case X86::PMAXSDrr:
+ case X86::PMAXSWrr:
+ case X86::PMAXUBrr:
+ case X86::PMAXUDrr:
+ case X86::PMAXUWrr:
+ case X86::PMINSBrr:
+ case X86::PMINSDrr:
+ case X86::PMINSWrr:
+ case X86::PMINUBrr:
+ case X86::PMINUDrr:
+ case X86::PMINUWrr:
case X86::VPANDrr:
case X86::VPANDYrr:
case X86::VPANDDZ128rr:
@@ -7243,6 +7213,78 @@ bool X86InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const {
case X86::VPMULLQZ128rr:
case X86::VPMULLQZ256rr:
case X86::VPMULLQZrr:
+ case X86::VPMAXSBrr:
+ case X86::VPMAXSBYrr:
+ case X86::VPMAXSBZ128rr:
+ case X86::VPMAXSBZ256rr:
+ case X86::VPMAXSBZrr:
+ case X86::VPMAXSDrr:
+ case X86::VPMAXSDYrr:
+ case X86::VPMAXSDZ128rr:
+ case X86::VPMAXSDZ256rr:
+ case X86::VPMAXSDZrr:
+ case X86::VPMAXSQZ128rr:
+ case X86::VPMAXSQZ256rr:
+ case X86::VPMAXSQZrr:
+ case X86::VPMAXSWrr:
+ case X86::VPMAXSWYrr:
+ case X86::VPMAXSWZ128rr:
+ case X86::VPMAXSWZ256rr:
+ case X86::VPMAXSWZrr:
+ case X86::VPMAXUBrr:
+ case X86::VPMAXUBYrr:
+ case X86::VPMAXUBZ128rr:
+ case X86::VPMAXUBZ256rr:
+ case X86::VPMAXUBZrr:
+ case X86::VPMAXUDrr:
+ case X86::VPMAXUDYrr:
+ case X86::VPMAXUDZ128rr:
+ case X86::VPMAXUDZ256rr:
+ case X86::VPMAXUDZrr:
+ case X86::VPMAXUQZ128rr:
+ case X86::VPMAXUQZ256rr:
+ case X86::VPMAXUQZrr:
+ case X86::VPMAXUWrr:
+ case X86::VPMAXUWYrr:
+ case X86::VPMAXUWZ128rr:
+ case X86::VPMAXUWZ256rr:
+ case X86::VPMAXUWZrr:
+ case X86::VPMINSBrr:
+ case X86::VPMINSBYrr:
+ case X86::VPMINSBZ128rr:
+ case X86::VPMINSBZ256rr:
+ case X86::VPMINSBZrr:
+ case X86::VPMINSDrr:
+ case X86::VPMINSDYrr:
+ case X86::VPMINSDZ128rr:
+ case X86::VPMINSDZ256rr:
+ case X86::VPMINSDZrr:
+ case X86::VPMINSQZ128rr:
+ case X86::VPMINSQZ256rr:
+ case X86::VPMINSQZrr:
+ case X86::VPMINSWrr:
+ case X86::VPMINSWYrr:
+ case X86::VPMINSWZ128rr:
+ case X86::VPMINSWZ256rr:
+ case X86::VPMINSWZrr:
+ case X86::VPMINUBrr:
+ case X86::VPMINUBYrr:
+ case X86::VPMINUBZ128rr:
+ case X86::VPMINUBZ256rr:
+ case X86::VPMINUBZrr:
+ case X86::VPMINUDrr:
+ case X86::VPMINUDYrr:
+ case X86::VPMINUDZ128rr:
+ case X86::VPMINUDZ256rr:
+ case X86::VPMINUDZrr:
+ case X86::VPMINUQZ128rr:
+ case X86::VPMINUQZ256rr:
+ case X86::VPMINUQZrr:
+ case X86::VPMINUWrr:
+ case X86::VPMINUWYrr:
+ case X86::VPMINUWZ128rr:
+ case X86::VPMINUWZ256rr:
+ case X86::VPMINUWZrr:
// Normal min/max instructions are not commutative because of NaN and signed
// zero semantics, but these are. Thus, there's no need to check for global
// relaxed math; the instructions themselves have the properties we need.
@@ -7698,7 +7740,7 @@ bool X86InstrInfo::isFunctionSafeToOutlineFrom(MachineFunction &MF,
// Does the function use a red zone? If it does, then we can't risk messing
// with the stack.
- if (!F.hasFnAttribute(Attribute::NoRedZone)) {
+ if (Subtarget.getFrameLowering()->has128ByteRedZone(MF)) {
// It could have a red zone. If it does, then we don't want to touch it.
const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
if (!X86FI || X86FI->getUsesRedZone())
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index 159cb50afc5c..13ca17139494 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -1,9 +1,8 @@
//===-- X86InstrInfo.h - X86 Instruction Information ------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -36,62 +35,24 @@ enum AsmComments {
AC_EVEX_2_VEX = MachineInstr::TAsmComments
};
-// X86 specific condition code. These correspond to X86_*_COND in
-// X86InstrInfo.td. They must be kept in synch.
-enum CondCode {
- COND_A = 0,
- COND_AE = 1,
- COND_B = 2,
- COND_BE = 3,
- COND_E = 4,
- COND_G = 5,
- COND_GE = 6,
- COND_L = 7,
- COND_LE = 8,
- COND_NE = 9,
- COND_NO = 10,
- COND_NP = 11,
- COND_NS = 12,
- COND_O = 13,
- COND_P = 14,
- COND_S = 15,
- LAST_VALID_COND = COND_S,
-
- // Artificial condition codes. These are used by AnalyzeBranch
- // to indicate a block terminated with two conditional branches that together
- // form a compound condition. They occur in code using FCMP_OEQ or FCMP_UNE,
- // which can't be represented on x86 with a single condition. These
- // are never used in MachineInstrs and are inverses of one another.
- COND_NE_OR_P,
- COND_E_AND_NP,
-
- COND_INVALID
-};
-
-// Turn condition code into conditional branch opcode.
-unsigned GetCondBranchFromCond(CondCode CC);
-
/// Return a pair of condition code for the given predicate and whether
/// the instruction operands should be swaped to match the condition code.
std::pair<CondCode, bool> getX86ConditionCode(CmpInst::Predicate Predicate);
-/// Return a set opcode for the given condition and whether it has
-/// a memory operand.
-unsigned getSETFromCond(CondCode CC, bool HasMemoryOperand = false);
+/// Return a setcc opcode based on whether it has a memory operand.
+unsigned getSETOpc(bool HasMemoryOperand = false);
-/// Return a cmov opcode for the given condition, register size in
-/// bytes, and operand type.
-unsigned getCMovFromCond(CondCode CC, unsigned RegBytes,
- bool HasMemoryOperand = false);
+/// Return a cmov opcode for the given register size in bytes, and operand type.
+unsigned getCMovOpcode(unsigned RegBytes, bool HasMemoryOperand = false);
-// Turn jCC opcode into condition code.
-CondCode getCondFromBranchOpc(unsigned Opc);
+// Turn jCC instruction into condition code.
+CondCode getCondFromBranch(const MachineInstr &MI);
-// Turn setCC opcode into condition code.
-CondCode getCondFromSETOpc(unsigned Opc);
+// Turn setCC instruction into condition code.
+CondCode getCondFromSETCC(const MachineInstr &MI);
-// Turn CMov opcode into condition code.
-CondCode getCondFromCMovOpc(unsigned Opc);
+// Turn CMov instruction into condition code.
+CondCode getCondFromCMov(const MachineInstr &MI);
/// GetOppositeBranchCondition - Return the inverse of the specified cond,
/// e.g. turning COND_E to COND_NE.
@@ -327,7 +288,8 @@ public:
SmallVectorImpl<MachineOperand> &Cond,
bool AllowModify) const override;
- bool getMemOperandWithOffset(MachineInstr &LdSt, MachineOperand *&BaseOp,
+ bool getMemOperandWithOffset(const MachineInstr &LdSt,
+ const MachineOperand *&BaseOp,
int64_t &Offset,
const TargetRegisterInfo *TRI) const override;
bool analyzeBranchPredicate(MachineBasicBlock &MBB,
@@ -388,7 +350,8 @@ public:
foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
ArrayRef<unsigned> Ops,
MachineBasicBlock::iterator InsertPt, int FrameIndex,
- LiveIntervals *LIS = nullptr) const override;
+ LiveIntervals *LIS = nullptr,
+ VirtRegMap *VRM = nullptr) const override;
/// foldMemoryOperand - Same as the previous version except it allows folding
/// of any load and store from / to any address, not just from a specific
@@ -453,7 +416,10 @@ public:
/// conservative. If it cannot definitely determine the safety after visiting
/// a few instructions in each direction it assumes it's not safe.
bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const;
+ MachineBasicBlock::iterator I) const {
+ return MBB.computeRegisterLiveness(&RI, X86::EFLAGS, I, 4) ==
+ MachineBasicBlock::LQR_Dead;
+ }
/// True if MI has a condition code def, e.g. EFLAGS, that is
/// not marked dead.
@@ -590,7 +556,8 @@ private:
MachineInstr *convertToThreeAddressWithLEA(unsigned MIOpc,
MachineFunction::iterator &MFI,
MachineInstr &MI,
- LiveVariables *LV) const;
+ LiveVariables *LV,
+ bool Is8BitOp) const;
/// Handles memory folding for special case instructions, for instance those
/// requiring custom manipulation of the address.
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index e53f83baa3c6..8e05dd8ec5c1 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -1,9 +1,8 @@
//===-- X86InstrInfo.td - Main X86 Instruction Definition --*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -64,6 +63,10 @@ def SDTX86sahf : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i8>]>;
def SDTX86rdrand : SDTypeProfile<2, 0, [SDTCisInt<0>, SDTCisVT<1, i32>]>;
+def SDTX86rdpkru : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
+def SDTX86wrpkru : SDTypeProfile<0, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
+ SDTCisVT<2, i32>]>;
+
def SDTX86cas : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisInt<1>,
SDTCisVT<2, i8>]>;
def SDTX86caspair : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
@@ -124,6 +127,9 @@ def SDT_X86TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisVT<1, i32>]>;
def SDT_X86MEMBARRIER : SDTypeProfile<0, 0, []>;
+def SDT_X86ENQCMD : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
+ SDTCisPtrTy<1>, SDTCisSameAs<1, 2>]>;
+
def X86MemBarrier : SDNode<"X86ISD::MEMBARRIER", SDT_X86MEMBARRIER,
[SDNPHasChain,SDNPSideEffect]>;
def X86MFence : SDNode<"X86ISD::MFENCE", SDT_X86MEMBARRIER,
@@ -152,6 +158,11 @@ def X86rdrand : SDNode<"X86ISD::RDRAND", SDTX86rdrand,
def X86rdseed : SDNode<"X86ISD::RDSEED", SDTX86rdrand,
[SDNPHasChain, SDNPSideEffect]>;
+def X86rdpkru : SDNode<"X86ISD::RDPKRU", SDTX86rdpkru,
+ [SDNPHasChain, SDNPSideEffect]>;
+def X86wrpkru : SDNode<"X86ISD::WRPKRU", SDTX86wrpkru,
+ [SDNPHasChain, SDNPSideEffect]>;
+
def X86cas : SDNode<"X86ISD::LCMPXCHG_DAG", SDTX86cas,
[SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore,
SDNPMayLoad, SDNPMemOperand]>;
@@ -206,13 +217,6 @@ def X86rep_movs: SDNode<"X86ISD::REP_MOVS", SDTX86RepStr,
[SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore,
SDNPMayLoad]>;
-def X86rdtsc : SDNode<"X86ISD::RDTSC_DAG", SDTX86Void,
- [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
-def X86rdtscp : SDNode<"X86ISD::RDTSCP_DAG", SDTX86Void,
- [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
-def X86rdpmc : SDNode<"X86ISD::RDPMC_DAG", SDTX86Void,
- [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
-
def X86Wrapper : SDNode<"X86ISD::Wrapper", SDTX86Wrapper>;
def X86WrapperRIP : SDNode<"X86ISD::WrapperRIP", SDTX86Wrapper>;
@@ -306,6 +310,11 @@ def X86tpause : SDNode<"X86ISD::TPAUSE",
SDTCisVT<2, i32>, SDTCisVT<3, i32>]>,
[SDNPHasChain, SDNPSideEffect]>;
+def X86enqcmd : SDNode<"X86ISD::ENQCMD", SDT_X86ENQCMD,
+ [SDNPHasChain, SDNPSideEffect]>;
+def X86enqcmds : SDNode<"X86ISD::ENQCMDS", SDT_X86ENQCMD,
+ [SDNPHasChain, SDNPSideEffect]>;
+
//===----------------------------------------------------------------------===//
// X86 Operand Definitions.
//
@@ -371,37 +380,35 @@ def anymem : X86MemOperand<"printanymem">;
// restrict to only unsized memory.
def opaquemem : X86MemOperand<"printopaquemem">;
-def i8mem : X86MemOperand<"printi8mem", X86Mem8AsmOperand>;
-def i16mem : X86MemOperand<"printi16mem", X86Mem16AsmOperand>;
-def i32mem : X86MemOperand<"printi32mem", X86Mem32AsmOperand>;
-def i64mem : X86MemOperand<"printi64mem", X86Mem64AsmOperand>;
-def i128mem : X86MemOperand<"printi128mem", X86Mem128AsmOperand>;
-def i256mem : X86MemOperand<"printi256mem", X86Mem256AsmOperand>;
-def i512mem : X86MemOperand<"printi512mem", X86Mem512AsmOperand>;
-def f32mem : X86MemOperand<"printf32mem", X86Mem32AsmOperand>;
-def f64mem : X86MemOperand<"printf64mem", X86Mem64AsmOperand>;
-def f80mem : X86MemOperand<"printf80mem", X86Mem80AsmOperand>;
-def f128mem : X86MemOperand<"printf128mem", X86Mem128AsmOperand>;
-def f256mem : X86MemOperand<"printf256mem", X86Mem256AsmOperand>;
-def f512mem : X86MemOperand<"printf512mem", X86Mem512AsmOperand>;
-
-def v512mem : X86VMemOperand<VR512, "printf512mem", X86Mem512AsmOperand>;
+def i8mem : X86MemOperand<"printbytemem", X86Mem8AsmOperand>;
+def i16mem : X86MemOperand<"printwordmem", X86Mem16AsmOperand>;
+def i32mem : X86MemOperand<"printdwordmem", X86Mem32AsmOperand>;
+def i64mem : X86MemOperand<"printqwordmem", X86Mem64AsmOperand>;
+def i128mem : X86MemOperand<"printxmmwordmem", X86Mem128AsmOperand>;
+def i256mem : X86MemOperand<"printymmwordmem", X86Mem256AsmOperand>;
+def i512mem : X86MemOperand<"printzmmwordmem", X86Mem512AsmOperand>;
+def f32mem : X86MemOperand<"printdwordmem", X86Mem32AsmOperand>;
+def f64mem : X86MemOperand<"printqwordmem", X86Mem64AsmOperand>;
+def f80mem : X86MemOperand<"printtbytemem", X86Mem80AsmOperand>;
+def f128mem : X86MemOperand<"printxmmwordmem", X86Mem128AsmOperand>;
+def f256mem : X86MemOperand<"printymmwordmem", X86Mem256AsmOperand>;
+def f512mem : X86MemOperand<"printzmmwordmem", X86Mem512AsmOperand>;
// Gather mem operands
-def vx64mem : X86VMemOperand<VR128, "printi64mem", X86Mem64_RC128Operand>;
-def vx128mem : X86VMemOperand<VR128, "printi128mem", X86Mem128_RC128Operand>;
-def vx256mem : X86VMemOperand<VR128, "printi256mem", X86Mem256_RC128Operand>;
-def vy128mem : X86VMemOperand<VR256, "printi128mem", X86Mem128_RC256Operand>;
-def vy256mem : X86VMemOperand<VR256, "printi256mem", X86Mem256_RC256Operand>;
-
-def vx64xmem : X86VMemOperand<VR128X, "printi64mem", X86Mem64_RC128XOperand>;
-def vx128xmem : X86VMemOperand<VR128X, "printi128mem", X86Mem128_RC128XOperand>;
-def vx256xmem : X86VMemOperand<VR128X, "printi256mem", X86Mem256_RC128XOperand>;
-def vy128xmem : X86VMemOperand<VR256X, "printi128mem", X86Mem128_RC256XOperand>;
-def vy256xmem : X86VMemOperand<VR256X, "printi256mem", X86Mem256_RC256XOperand>;
-def vy512xmem : X86VMemOperand<VR256X, "printi512mem", X86Mem512_RC256XOperand>;
-def vz256mem : X86VMemOperand<VR512, "printi256mem", X86Mem256_RC512Operand>;
-def vz512mem : X86VMemOperand<VR512, "printi512mem", X86Mem512_RC512Operand>;
+def vx64mem : X86VMemOperand<VR128, "printqwordmem", X86Mem64_RC128Operand>;
+def vx128mem : X86VMemOperand<VR128, "printxmmwordmem", X86Mem128_RC128Operand>;
+def vx256mem : X86VMemOperand<VR128, "printymmwordmem", X86Mem256_RC128Operand>;
+def vy128mem : X86VMemOperand<VR256, "printxmmwordmem", X86Mem128_RC256Operand>;
+def vy256mem : X86VMemOperand<VR256, "printymmwordmem", X86Mem256_RC256Operand>;
+
+def vx64xmem : X86VMemOperand<VR128X, "printqwordmem", X86Mem64_RC128XOperand>;
+def vx128xmem : X86VMemOperand<VR128X, "printxmmwordmem", X86Mem128_RC128XOperand>;
+def vx256xmem : X86VMemOperand<VR128X, "printymmwordmem", X86Mem256_RC128XOperand>;
+def vy128xmem : X86VMemOperand<VR256X, "printxmmwordmem", X86Mem128_RC256XOperand>;
+def vy256xmem : X86VMemOperand<VR256X, "printymmwordmem", X86Mem256_RC256XOperand>;
+def vy512xmem : X86VMemOperand<VR256X, "printzmmwordmem", X86Mem512_RC256XOperand>;
+def vz256mem : X86VMemOperand<VR512, "printymmwordmem", X86Mem256_RC512Operand>;
+def vz512mem : X86VMemOperand<VR512, "printzmmwordmem", X86Mem512_RC512Operand>;
// A version of i8mem for use on x86-64 and x32 that uses a NOREX GPR instead
// of a plain GPR, so that it doesn't potentially require a REX prefix.
@@ -409,7 +416,7 @@ def ptr_rc_norex : PointerLikeRegClass<2>;
def ptr_rc_norex_nosp : PointerLikeRegClass<3>;
def i8mem_NOREX : Operand<iPTR> {
- let PrintMethod = "printi8mem";
+ let PrintMethod = "printbytemem";
let MIOperandInfo = (ops ptr_rc_norex, i8imm, ptr_rc_norex_nosp, i32imm,
SEGMENT_REG);
let ParserMatchClass = X86Mem8AsmOperand;
@@ -424,7 +431,7 @@ def ptr_rc_tailcall : PointerLikeRegClass<4>;
// allowed to use callee-saved registers since they must be scheduled
// after callee-saved register are popped.
def i32mem_TC : Operand<i32> {
- let PrintMethod = "printi32mem";
+ let PrintMethod = "printdwordmem";
let MIOperandInfo = (ops ptr_rc_tailcall, i8imm, ptr_rc_tailcall,
i32imm, SEGMENT_REG);
let ParserMatchClass = X86Mem32AsmOperand;
@@ -435,7 +442,7 @@ def i32mem_TC : Operand<i32> {
// allowed to use callee-saved registers since they must be scheduled
// after callee-saved register are popped.
def i64mem_TC : Operand<i64> {
- let PrintMethod = "printi64mem";
+ let PrintMethod = "printqwordmem";
let MIOperandInfo = (ops ptr_rc_tailcall, i8imm,
ptr_rc_tailcall, i32imm, SEGMENT_REG);
let ParserMatchClass = X86Mem64AsmOperand;
@@ -603,24 +610,10 @@ def offset64_32 : X86MemOffsOperand<i64imm, "printMemOffs32",
def offset64_64 : X86MemOffsOperand<i64imm, "printMemOffs64",
X86MemOffs64_64AsmOperand>;
-def SSECC : Operand<i8> {
- let PrintMethod = "printSSEAVXCC";
- let OperandType = "OPERAND_IMMEDIATE";
-}
-
-def AVXCC : Operand<i8> {
- let PrintMethod = "printSSEAVXCC";
- let OperandType = "OPERAND_IMMEDIATE";
-}
-
-def AVX512ICC : Operand<i8> {
- let PrintMethod = "printSSEAVXCC";
- let OperandType = "OPERAND_IMMEDIATE";
-}
-
-def XOPCC : Operand<i8> {
- let PrintMethod = "printXOPCC";
- let OperandType = "OPERAND_IMMEDIATE";
+def ccode : Operand<i8> {
+ let PrintMethod = "printCondCode";
+ let OperandNamespace = "X86";
+ let OperandType = "OPERAND_COND_CODE";
}
class ImmSExtAsmOperandClass : AsmOperandClass {
@@ -640,7 +633,8 @@ def AVX512RCOperand : AsmOperandClass {
}
def AVX512RC : Operand<i32> {
let PrintMethod = "printRoundingControl";
- let OperandType = "OPERAND_IMMEDIATE";
+ let OperandNamespace = "X86";
+ let OperandType = "OPERAND_ROUNDING_CONTROL";
let ParserMatchClass = AVX512RCOperand;
}
@@ -718,6 +712,14 @@ def u8imm : Operand<i8> {
let OperandType = "OPERAND_IMMEDIATE";
}
+// 16-bit immediate but only 8-bits are significant and they are unsigned.
+// Used by BT instructions.
+def i16u8imm : Operand<i16> {
+ let PrintMethod = "printU8Imm";
+ let ParserMatchClass = ImmUnsignedi8AsmOperand;
+ let OperandType = "OPERAND_IMMEDIATE";
+}
+
// 32-bit immediate but only 8-bits are significant and they are unsigned.
// Used by some SSE/AVX instructions that use intrinsics.
def i32u8imm : Operand<i32> {
@@ -726,6 +728,14 @@ def i32u8imm : Operand<i32> {
let OperandType = "OPERAND_IMMEDIATE";
}
+// 64-bit immediate but only 8-bits are significant and they are unsigned.
+// Used by BT instructions.
+def i64u8imm : Operand<i64> {
+ let PrintMethod = "printU8Imm";
+ let ParserMatchClass = ImmUnsignedi8AsmOperand;
+ let OperandType = "OPERAND_IMMEDIATE";
+}
+
// 64-bits but only 32 bits are significant, and those bits are treated as being
// pc relative.
def i64i32imm_pcrel : Operand<i64> {
@@ -747,6 +757,33 @@ def lea64mem : Operand<i64> {
let ParserMatchClass = X86MemAsmOperand;
}
+let RenderMethod = "addMaskPairOperands" in {
+ def VK1PairAsmOperand : AsmOperandClass { let Name = "VK1Pair"; }
+ def VK2PairAsmOperand : AsmOperandClass { let Name = "VK2Pair"; }
+ def VK4PairAsmOperand : AsmOperandClass { let Name = "VK4Pair"; }
+ def VK8PairAsmOperand : AsmOperandClass { let Name = "VK8Pair"; }
+ def VK16PairAsmOperand : AsmOperandClass { let Name = "VK16Pair"; }
+}
+
+def VK1Pair : RegisterOperand<VK1PAIR, "printVKPair"> {
+ let ParserMatchClass = VK1PairAsmOperand;
+}
+
+def VK2Pair : RegisterOperand<VK2PAIR, "printVKPair"> {
+ let ParserMatchClass = VK2PairAsmOperand;
+}
+
+def VK4Pair : RegisterOperand<VK4PAIR, "printVKPair"> {
+ let ParserMatchClass = VK4PairAsmOperand;
+}
+
+def VK8Pair : RegisterOperand<VK8PAIR, "printVKPair"> {
+ let ParserMatchClass = VK8PairAsmOperand;
+}
+
+def VK16Pair : RegisterOperand<VK16PAIR, "printVKPair"> {
+ let ParserMatchClass = VK16PairAsmOperand;
+}
//===----------------------------------------------------------------------===//
// X86 Complex Pattern Definitions.
@@ -833,6 +870,8 @@ def NoVLX_Or_NoBWI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasBWI()">;
def NoVLX_Or_NoDQI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasDQI()">;
def PKU : Predicate<"Subtarget->hasPKU()">;
def HasVNNI : Predicate<"Subtarget->hasVNNI()">;
+def HasVP2INTERSECT : Predicate<"Subtarget->hasVP2INTERSECT()">;
+def HasBF16 : Predicate<"Subtarget->hasBF16()">;
def HasBITALG : Predicate<"Subtarget->hasBITALG()">;
def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">;
@@ -894,8 +933,10 @@ def HasWBNOINVD : Predicate<"Subtarget->hasWBNOINVD()">;
def HasRDPID : Predicate<"Subtarget->hasRDPID()">;
def HasWAITPKG : Predicate<"Subtarget->hasWAITPKG()">;
def HasINVPCID : Predicate<"Subtarget->hasINVPCID()">;
+def HasCmpxchg8b : Predicate<"Subtarget->hasCmpxchg8b()">;
def HasCmpxchg16b: Predicate<"Subtarget->hasCmpxchg16b()">;
def HasPCONFIG : Predicate<"Subtarget->hasPCONFIG()">;
+def HasENQCMD : Predicate<"Subtarget->hasENQCMD()">;
def Not64BitMode : Predicate<"!Subtarget->is64Bit()">,
AssemblerPredicate<"!Mode64Bit", "Not 64-bit mode">;
def In64BitMode : Predicate<"Subtarget->is64Bit()">,
@@ -928,12 +969,12 @@ def IsNotPIC : Predicate<"!TM.isPositionIndependent()">;
// the Function object through the <Target>Subtarget and objections were raised
// to that (see post-commit review comments for r301750).
let RecomputePerFunction = 1 in {
- def OptForSize : Predicate<"MF->getFunction().optForSize()">;
- def OptForMinSize : Predicate<"MF->getFunction().optForMinSize()">;
- def OptForSpeed : Predicate<"!MF->getFunction().optForSize()">;
+ def OptForSize : Predicate<"MF->getFunction().hasOptSize()">;
+ def OptForMinSize : Predicate<"MF->getFunction().hasMinSize()">;
+ def OptForSpeed : Predicate<"!MF->getFunction().hasOptSize()">;
def UseIncDec : Predicate<"!Subtarget->slowIncDec() || "
- "MF->getFunction().optForSize()">;
- def NoSSE41_Or_OptForSize : Predicate<"MF->getFunction().optForSize() || "
+ "MF->getFunction().hasOptSize()">;
+ def NoSSE41_Or_OptForSize : Predicate<"MF->getFunction().hasOptSize() || "
"!Subtarget->hasSSE41()">;
}
@@ -959,22 +1000,22 @@ include "X86InstrFormats.td"
// X86 specific condition code. These correspond to CondCode in
// X86InstrInfo.h. They must be kept in synch.
-def X86_COND_A : PatLeaf<(i8 0)>; // alt. COND_NBE
-def X86_COND_AE : PatLeaf<(i8 1)>; // alt. COND_NC
+def X86_COND_O : PatLeaf<(i8 0)>;
+def X86_COND_NO : PatLeaf<(i8 1)>;
def X86_COND_B : PatLeaf<(i8 2)>; // alt. COND_C
-def X86_COND_BE : PatLeaf<(i8 3)>; // alt. COND_NA
+def X86_COND_AE : PatLeaf<(i8 3)>; // alt. COND_NC
def X86_COND_E : PatLeaf<(i8 4)>; // alt. COND_Z
-def X86_COND_G : PatLeaf<(i8 5)>; // alt. COND_NLE
-def X86_COND_GE : PatLeaf<(i8 6)>; // alt. COND_NL
-def X86_COND_L : PatLeaf<(i8 7)>; // alt. COND_NGE
-def X86_COND_LE : PatLeaf<(i8 8)>; // alt. COND_NG
-def X86_COND_NE : PatLeaf<(i8 9)>; // alt. COND_NZ
-def X86_COND_NO : PatLeaf<(i8 10)>;
+def X86_COND_NE : PatLeaf<(i8 5)>; // alt. COND_NZ
+def X86_COND_BE : PatLeaf<(i8 6)>; // alt. COND_NA
+def X86_COND_A : PatLeaf<(i8 7)>; // alt. COND_NBE
+def X86_COND_S : PatLeaf<(i8 8)>;
+def X86_COND_NS : PatLeaf<(i8 9)>;
+def X86_COND_P : PatLeaf<(i8 10)>; // alt. COND_PE
def X86_COND_NP : PatLeaf<(i8 11)>; // alt. COND_PO
-def X86_COND_NS : PatLeaf<(i8 12)>;
-def X86_COND_O : PatLeaf<(i8 13)>;
-def X86_COND_P : PatLeaf<(i8 14)>; // alt. COND_PE
-def X86_COND_S : PatLeaf<(i8 15)>;
+def X86_COND_L : PatLeaf<(i8 12)>; // alt. COND_NGE
+def X86_COND_GE : PatLeaf<(i8 13)>; // alt. COND_NL
+def X86_COND_LE : PatLeaf<(i8 14)>; // alt. COND_NG
+def X86_COND_G : PatLeaf<(i8 15)>; // alt. COND_NLE
def i16immSExt8 : ImmLeaf<i16, [{ return isInt<8>(Imm); }]>;
def i32immSExt8 : ImmLeaf<i32, [{ return isInt<8>(Imm); }]>;
@@ -1007,16 +1048,13 @@ def i64relocImmSExt32 : PatLeaf<(i64 relocImm), [{
// Eventually, it would be nice to allow ConstantHoisting to merge constants
// globally for potentially added savings.
//
-def imm8_su : PatLeaf<(i8 relocImm), [{
+def relocImm8_su : PatLeaf<(i8 relocImm), [{
return !shouldAvoidImmediateInstFormsForSize(N);
}]>;
-def imm16_su : PatLeaf<(i16 relocImm), [{
+def relocImm16_su : PatLeaf<(i16 relocImm), [{
return !shouldAvoidImmediateInstFormsForSize(N);
}]>;
-def imm32_su : PatLeaf<(i32 relocImm), [{
- return !shouldAvoidImmediateInstFormsForSize(N);
-}]>;
-def i64immSExt32_su : PatLeaf<(i64immSExt32), [{
+def relocImm32_su : PatLeaf<(i32 relocImm), [{
return !shouldAvoidImmediateInstFormsForSize(N);
}]>;
@@ -1121,7 +1159,19 @@ def extloadi32i16 : PatFrag<(ops node:$ptr), (i32 (extloadi16 node:$ptr))>;
def extloadi64i1 : PatFrag<(ops node:$ptr), (i64 (extloadi1 node:$ptr))>;
def extloadi64i8 : PatFrag<(ops node:$ptr), (i64 (extloadi8 node:$ptr))>;
def extloadi64i16 : PatFrag<(ops node:$ptr), (i64 (extloadi16 node:$ptr))>;
-def extloadi64i32 : PatFrag<(ops node:$ptr), (i64 (extloadi32 node:$ptr))>;
+
+// We can treat an i8/i16 extending load to i64 as a 32 bit load if its known
+// to be 4 byte aligned or better.
+def extloadi64i32 : PatFrag<(ops node:$ptr), (i64 (unindexedload node:$ptr)), [{
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ if (ExtType != ISD::EXTLOAD)
+ return false;
+ if (LD->getMemoryVT() == MVT::i32)
+ return true;
+
+ return LD->getAlignment() >= 4 && !LD->isVolatile();
+}]>;
// An 'and' node with a single use.
@@ -1517,16 +1567,16 @@ def MOV32ri_alt : Ii32<0xC7, MRM0r, (outs GR32:$dst), (ins i32imm:$src),
let SchedRW = [WriteStore] in {
def MOV8mi : Ii8 <0xC6, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src),
"mov{b}\t{$src, $dst|$dst, $src}",
- [(store (i8 imm8_su:$src), addr:$dst)]>;
+ [(store (i8 relocImm8_su:$src), addr:$dst)]>;
def MOV16mi : Ii16<0xC7, MRM0m, (outs), (ins i16mem:$dst, i16imm:$src),
"mov{w}\t{$src, $dst|$dst, $src}",
- [(store (i16 imm16_su:$src), addr:$dst)]>, OpSize16;
+ [(store (i16 relocImm16_su:$src), addr:$dst)]>, OpSize16;
def MOV32mi : Ii32<0xC7, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src),
"mov{l}\t{$src, $dst|$dst, $src}",
- [(store (i32 imm32_su:$src), addr:$dst)]>, OpSize32;
+ [(store (i32 relocImm32_su:$src), addr:$dst)]>, OpSize32;
def MOV64mi32 : RIi32S<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src),
"mov{q}\t{$src, $dst|$dst, $src}",
- [(store i64immSExt32_su:$src, addr:$dst)]>,
+ [(store i64relocImmSExt32_su:$src, addr:$dst)]>,
Requires<[In64BitMode]>;
} // SchedRW
@@ -1773,36 +1823,36 @@ let mayLoad = 1, hasSideEffects = 0, SchedRW = [WriteBitTestRegLd] in {
}
let SchedRW = [WriteBitTest] in {
-def BT16ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR16:$src1, i16i8imm:$src2),
+def BT16ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR16:$src1, i16u8imm:$src2),
"bt{w}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86bt GR16:$src1, i16immSExt8:$src2))]>,
+ [(set EFLAGS, (X86bt GR16:$src1, imm:$src2))]>,
OpSize16, TB;
-def BT32ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR32:$src1, i32i8imm:$src2),
+def BT32ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR32:$src1, i32u8imm:$src2),
"bt{l}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86bt GR32:$src1, i32immSExt8:$src2))]>,
+ [(set EFLAGS, (X86bt GR32:$src1, imm:$src2))]>,
OpSize32, TB;
-def BT64ri8 : RIi8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64i8imm:$src2),
+def BT64ri8 : RIi8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64u8imm:$src2),
"bt{q}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86bt GR64:$src1, i64immSExt8:$src2))]>, TB;
+ [(set EFLAGS, (X86bt GR64:$src1, imm:$src2))]>, TB;
} // SchedRW
// Note that these instructions aren't slow because that only applies when the
// other operand is in a register. When it's an immediate, bt is still fast.
let SchedRW = [WriteBitTestImmLd] in {
-def BT16mi8 : Ii8<0xBA, MRM4m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
+def BT16mi8 : Ii8<0xBA, MRM4m, (outs), (ins i16mem:$src1, i16u8imm:$src2),
"bt{w}\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86bt (loadi16 addr:$src1),
- i16immSExt8:$src2))]>,
+ imm:$src2))]>,
OpSize16, TB;
-def BT32mi8 : Ii8<0xBA, MRM4m, (outs), (ins i32mem:$src1, i32i8imm:$src2),
+def BT32mi8 : Ii8<0xBA, MRM4m, (outs), (ins i32mem:$src1, i32u8imm:$src2),
"bt{l}\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86bt (loadi32 addr:$src1),
- i32immSExt8:$src2))]>,
+ imm:$src2))]>,
OpSize32, TB;
-def BT64mi8 : RIi8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
+def BT64mi8 : RIi8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64u8imm:$src2),
"bt{q}\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86bt (loadi64 addr:$src1),
- i64immSExt8:$src2))]>, TB,
+ imm:$src2))]>, TB,
Requires<[In64BitMode]>;
} // SchedRW
@@ -1832,20 +1882,20 @@ def BTC64mr : RI<0xBB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
}
let SchedRW = [WriteBitTestSet], Constraints = "$src1 = $dst" in {
-def BTC16ri8 : Ii8<0xBA, MRM7r, (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
+def BTC16ri8 : Ii8<0xBA, MRM7r, (outs GR16:$dst), (ins GR16:$src1, i16u8imm:$src2),
"btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize16, TB;
-def BTC32ri8 : Ii8<0xBA, MRM7r, (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
+def BTC32ri8 : Ii8<0xBA, MRM7r, (outs GR32:$dst), (ins GR32:$src1, i32u8imm:$src2),
"btc{l}\t{$src2, $src1|$src1, $src2}", []>, OpSize32, TB;
-def BTC64ri8 : RIi8<0xBA, MRM7r, (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
+def BTC64ri8 : RIi8<0xBA, MRM7r, (outs GR64:$dst), (ins GR64:$src1, i64u8imm:$src2),
"btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
} // SchedRW
let mayLoad = 1, mayStore = 1, SchedRW = [WriteBitTestSetImmRMW] in {
-def BTC16mi8 : Ii8<0xBA, MRM7m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
+def BTC16mi8 : Ii8<0xBA, MRM7m, (outs), (ins i16mem:$src1, i16u8imm:$src2),
"btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize16, TB;
-def BTC32mi8 : Ii8<0xBA, MRM7m, (outs), (ins i32mem:$src1, i32i8imm:$src2),
+def BTC32mi8 : Ii8<0xBA, MRM7m, (outs), (ins i32mem:$src1, i32u8imm:$src2),
"btc{l}\t{$src2, $src1|$src1, $src2}", []>, OpSize32, TB;
-def BTC64mi8 : RIi8<0xBA, MRM7m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
+def BTC64mi8 : RIi8<0xBA, MRM7m, (outs), (ins i64mem:$src1, i64u8imm:$src2),
"btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB,
Requires<[In64BitMode]>;
}
@@ -1875,24 +1925,24 @@ def BTR64mr : RI<0xB3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
}
let SchedRW = [WriteBitTestSet], Constraints = "$src1 = $dst" in {
-def BTR16ri8 : Ii8<0xBA, MRM6r, (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
+def BTR16ri8 : Ii8<0xBA, MRM6r, (outs GR16:$dst), (ins GR16:$src1, i16u8imm:$src2),
"btr{w}\t{$src2, $src1|$src1, $src2}", []>,
OpSize16, TB;
-def BTR32ri8 : Ii8<0xBA, MRM6r, (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
+def BTR32ri8 : Ii8<0xBA, MRM6r, (outs GR32:$dst), (ins GR32:$src1, i32u8imm:$src2),
"btr{l}\t{$src2, $src1|$src1, $src2}", []>,
OpSize32, TB;
-def BTR64ri8 : RIi8<0xBA, MRM6r, (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
+def BTR64ri8 : RIi8<0xBA, MRM6r, (outs GR64:$dst), (ins GR64:$src1, i64u8imm:$src2),
"btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
} // SchedRW
let mayLoad = 1, mayStore = 1, SchedRW = [WriteBitTestSetImmRMW] in {
-def BTR16mi8 : Ii8<0xBA, MRM6m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
+def BTR16mi8 : Ii8<0xBA, MRM6m, (outs), (ins i16mem:$src1, i16u8imm:$src2),
"btr{w}\t{$src2, $src1|$src1, $src2}", []>,
OpSize16, TB;
-def BTR32mi8 : Ii8<0xBA, MRM6m, (outs), (ins i32mem:$src1, i32i8imm:$src2),
+def BTR32mi8 : Ii8<0xBA, MRM6m, (outs), (ins i32mem:$src1, i32u8imm:$src2),
"btr{l}\t{$src2, $src1|$src1, $src2}", []>,
OpSize32, TB;
-def BTR64mi8 : RIi8<0xBA, MRM6m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
+def BTR64mi8 : RIi8<0xBA, MRM6m, (outs), (ins i64mem:$src1, i64u8imm:$src2),
"btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB,
Requires<[In64BitMode]>;
}
@@ -1922,20 +1972,20 @@ def BTS64mr : RI<0xAB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
}
let SchedRW = [WriteBitTestSet], Constraints = "$src1 = $dst" in {
-def BTS16ri8 : Ii8<0xBA, MRM5r, (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
+def BTS16ri8 : Ii8<0xBA, MRM5r, (outs GR16:$dst), (ins GR16:$src1, i16u8imm:$src2),
"bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize16, TB;
-def BTS32ri8 : Ii8<0xBA, MRM5r, (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
+def BTS32ri8 : Ii8<0xBA, MRM5r, (outs GR32:$dst), (ins GR32:$src1, i32u8imm:$src2),
"bts{l}\t{$src2, $src1|$src1, $src2}", []>, OpSize32, TB;
-def BTS64ri8 : RIi8<0xBA, MRM5r, (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
+def BTS64ri8 : RIi8<0xBA, MRM5r, (outs GR64:$dst), (ins GR64:$src1, i64u8imm:$src2),
"bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
} // SchedRW
let mayLoad = 1, mayStore = 1, SchedRW = [WriteBitTestSetImmRMW] in {
-def BTS16mi8 : Ii8<0xBA, MRM5m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
+def BTS16mi8 : Ii8<0xBA, MRM5m, (outs), (ins i16mem:$src1, i16u8imm:$src2),
"bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize16, TB;
-def BTS32mi8 : Ii8<0xBA, MRM5m, (outs), (ins i32mem:$src1, i32i8imm:$src2),
+def BTS32mi8 : Ii8<0xBA, MRM5m, (outs), (ins i32mem:$src1, i32u8imm:$src2),
"bts{l}\t{$src2, $src1|$src1, $src2}", []>, OpSize32, TB;
-def BTS64mi8 : RIi8<0xBA, MRM5m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
+def BTS64mi8 : RIi8<0xBA, MRM5m, (outs), (ins i64mem:$src1, i64u8imm:$src2),
"bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB,
Requires<[In64BitMode]>;
}
@@ -2090,12 +2140,13 @@ def CMPXCHG64rm : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in
def CMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$dst),
- "cmpxchg8b\t$dst", []>, TB;
+ "cmpxchg8b\t$dst", []>, TB, Requires<[HasCmpxchg8b]>;
let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX] in
+// NOTE: In64BitMode check needed for the AssemblerPredicate.
def CMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$dst),
"cmpxchg16b\t$dst", []>,
- TB, Requires<[HasCmpxchg16b, In64BitMode]>;
+ TB, Requires<[HasCmpxchg16b,In64BitMode]>;
} // SchedRW, mayLoad, mayStore, hasSideEffects
@@ -2388,6 +2439,11 @@ def xor_flag_nocf : PatFrag<(ops node:$lhs, node:$rhs),
return hasNoCarryFlagUses(SDValue(N, 1));
}]>;
+def and_flag_nocf : PatFrag<(ops node:$lhs, node:$rhs),
+ (X86and_flag node:$lhs, node:$rhs), [{
+ return hasNoCarryFlagUses(SDValue(N, 1));
+}]>;
+
let Predicates = [HasBMI] in {
// FIXME: patterns for the load versions are not implemented
def : Pat<(and GR32:$src, (add GR32:$src, -1)),
@@ -2406,12 +2462,20 @@ let Predicates = [HasBMI] in {
(BLSI64rr GR64:$src)>;
// Versions to match flag producing ops.
- // X86and_flag nodes are rarely created. Those should use CMP+AND. We do
- // TESTrr matching in PostProcessISelDAG to allow BLSR/BLSI to be formed.
+ def : Pat<(and_flag_nocf GR32:$src, (add GR32:$src, -1)),
+ (BLSR32rr GR32:$src)>;
+ def : Pat<(and_flag_nocf GR64:$src, (add GR64:$src, -1)),
+ (BLSR64rr GR64:$src)>;
+
def : Pat<(xor_flag_nocf GR32:$src, (add GR32:$src, -1)),
(BLSMSK32rr GR32:$src)>;
def : Pat<(xor_flag_nocf GR64:$src, (add GR64:$src, -1)),
(BLSMSK64rr GR64:$src)>;
+
+ def : Pat<(and_flag_nocf GR32:$src, (ineg GR32:$src)),
+ (BLSI32rr GR32:$src)>;
+ def : Pat<(and_flag_nocf GR64:$src, (ineg GR64:$src)),
+ (BLSI64rr GR64:$src)>;
}
multiclass bmi_bextr<bits<8> opc, string mnemonic, RegisterClass RC,
@@ -2653,16 +2717,12 @@ defm LWPVAL64 : lwpval_intr<GR64, int_x86_lwpval64>, VEX_W;
// MONITORX/MWAITX Instructions
//
let SchedRW = [ WriteSystem ] in {
- let usesCustomInserter = 1 in {
- def MONITORX : PseudoI<(outs), (ins i32mem:$src1, GR32:$src2, GR32:$src3),
- [(int_x86_monitorx addr:$src1, GR32:$src2, GR32:$src3)]>,
- Requires<[ HasMWAITX ]>;
- }
-
- let Uses = [ EAX, ECX, EDX ] in {
- def MONITORXrrr : I<0x01, MRM_FA, (outs), (ins), "monitorx", []>,
- TB, Requires<[ HasMWAITX ]>;
- }
+ let Uses = [ EAX, ECX, EDX ] in
+ def MONITORX32rrr : I<0x01, MRM_FA, (outs), (ins), "monitorx", []>,
+ TB, Requires<[ HasMWAITX, Not64BitMode ]>;
+ let Uses = [ RAX, ECX, EDX ] in
+ def MONITORX64rrr : I<0x01, MRM_FA, (outs), (ins), "monitorx", []>,
+ TB, Requires<[ HasMWAITX, In64BitMode ]>;
let Uses = [ ECX, EAX, EBX ] in {
def MWAITXrrr : I<0x01, MRM_FB, (outs), (ins), "mwaitx",
@@ -2676,9 +2736,9 @@ def : InstAlias<"mwaitx\t{%eax, %ecx, %ebx|ebx, ecx, eax}", (MWAITXrrr)>,
def : InstAlias<"mwaitx\t{%rax, %rcx, %rbx|rbx, rcx, rax}", (MWAITXrrr)>,
Requires<[ In64BitMode ]>;
-def : InstAlias<"monitorx\t{%eax, %ecx, %edx|edx, ecx, eax}", (MONITORXrrr)>,
+def : InstAlias<"monitorx\t{%eax, %ecx, %edx|edx, ecx, eax}", (MONITORX32rrr)>,
Requires<[ Not64BitMode ]>;
-def : InstAlias<"monitorx\t{%rax, %rcx, %rdx|rdx, rcx, rax}", (MONITORXrrr)>,
+def : InstAlias<"monitorx\t{%rax, %rcx, %rdx|rdx, rcx, rax}", (MONITORX64rrr)>,
Requires<[ In64BitMode ]>;
//===----------------------------------------------------------------------===//
@@ -2738,21 +2798,50 @@ def MOVDIR64B64 : I<0xF8, MRMSrcMem, (outs), (ins GR64:$dst, i512mem:$src),
} // SchedRW
//===----------------------------------------------------------------------===//
+// ENQCMD/S - Enqueue 64-byte command as user with 64-byte write atomicity
+//
+let SchedRW = [WriteStore], Defs = [EFLAGS] in {
+ def ENQCMD16 : I<0xF8, MRMSrcMem, (outs), (ins GR16:$dst, i512mem:$src),
+ "enqcmd\t{$src, $dst|$dst, $src}",
+ [(set EFLAGS, (X86enqcmd GR16:$dst, addr:$src))]>,
+ T8XD, AdSize16, Requires<[HasENQCMD, Not64BitMode]>;
+ def ENQCMD32 : I<0xF8, MRMSrcMem, (outs), (ins GR32:$dst, i512mem:$src),
+ "enqcmd\t{$src, $dst|$dst, $src}",
+ [(set EFLAGS, (X86enqcmd GR32:$dst, addr:$src))]>,
+ T8XD, AdSize32, Requires<[HasENQCMD]>;
+ def ENQCMD64 : I<0xF8, MRMSrcMem, (outs), (ins GR64:$dst, i512mem:$src),
+ "enqcmd\t{$src, $dst|$dst, $src}",
+ [(set EFLAGS, (X86enqcmd GR64:$dst, addr:$src))]>,
+ T8XD, AdSize64, Requires<[HasENQCMD, In64BitMode]>;
+
+ def ENQCMDS16 : I<0xF8, MRMSrcMem, (outs), (ins GR16:$dst, i512mem:$src),
+ "enqcmds\t{$src, $dst|$dst, $src}",
+ [(set EFLAGS, (X86enqcmds GR16:$dst, addr:$src))]>,
+ T8XS, AdSize16, Requires<[HasENQCMD, Not64BitMode]>;
+ def ENQCMDS32 : I<0xF8, MRMSrcMem, (outs), (ins GR32:$dst, i512mem:$src),
+ "enqcmds\t{$src, $dst|$dst, $src}",
+ [(set EFLAGS, (X86enqcmds GR32:$dst, addr:$src))]>,
+ T8XS, AdSize32, Requires<[HasENQCMD]>;
+ def ENQCMDS64 : I<0xF8, MRMSrcMem, (outs), (ins GR64:$dst, i512mem:$src),
+ "enqcmds\t{$src, $dst|$dst, $src}",
+ [(set EFLAGS, (X86enqcmds GR64:$dst, addr:$src))]>,
+ T8XS, AdSize64, Requires<[HasENQCMD, In64BitMode]>;
+}
+
+//===----------------------------------------------------------------------===//
// CLZERO Instruction
//
let SchedRW = [WriteSystem] in {
let Uses = [EAX] in
- def CLZEROr : I<0x01, MRM_FC, (outs), (ins), "clzero", []>,
- TB, Requires<[HasCLZERO]>;
-
- let usesCustomInserter = 1 in {
- def CLZERO : PseudoI<(outs), (ins i32mem:$src1),
- [(int_x86_clzero addr:$src1)]>, Requires<[HasCLZERO]>;
- }
+ def CLZERO32r : I<0x01, MRM_FC, (outs), (ins), "clzero", []>,
+ TB, Requires<[HasCLZERO, Not64BitMode]>;
+ let Uses = [RAX] in
+ def CLZERO64r : I<0x01, MRM_FC, (outs), (ins), "clzero", []>,
+ TB, Requires<[HasCLZERO, In64BitMode]>;
} // SchedRW
-def : InstAlias<"clzero\t{%eax|eax}", (CLZEROr)>, Requires<[Not64BitMode]>;
-def : InstAlias<"clzero\t{%rax|rax}", (CLZEROr)>, Requires<[In64BitMode]>;
+def : InstAlias<"clzero\t{%eax|eax}", (CLZERO32r)>, Requires<[Not64BitMode]>;
+def : InstAlias<"clzero\t{%rax|rax}", (CLZERO64r)>, Requires<[In64BitMode]>;
//===----------------------------------------------------------------------===//
// Pattern fragments to auto generate TBM instructions.
@@ -2812,8 +2901,6 @@ let Predicates = [HasTBM] in {
(TZMSK64rr GR64:$src)>;
// Patterns to match flag producing ops.
- // X86and_flag nodes are rarely created. Those should use CMP+AND. We do
- // TESTrr matching in PostProcessISelDAG to allow BLSR/BLSI to be formed.
def : Pat<(or_flag_nocf GR32:$src, (not (add GR32:$src, 1))),
(BLCI32rr GR32:$src)>;
def : Pat<(or_flag_nocf GR64:$src, (not (add GR64:$src, 1))),
@@ -2825,6 +2912,11 @@ let Predicates = [HasTBM] in {
def : Pat<(or_flag_nocf GR64:$src, (sub -2, GR64:$src)),
(BLCI64rr GR64:$src)>;
+ def : Pat<(and_flag_nocf (not GR32:$src), (add GR32:$src, 1)),
+ (BLCIC32rr GR32:$src)>;
+ def : Pat<(and_flag_nocf (not GR64:$src), (add GR64:$src, 1)),
+ (BLCIC64rr GR64:$src)>;
+
def : Pat<(xor_flag_nocf GR32:$src, (add GR32:$src, 1)),
(BLCMSK32rr GR32:$src)>;
def : Pat<(xor_flag_nocf GR64:$src, (add GR64:$src, 1)),
@@ -2849,6 +2941,11 @@ let Predicates = [HasTBM] in {
(T1MSKC32rr GR32:$src)>;
def : Pat<(or_flag_nocf (not GR64:$src), (add GR64:$src, 1)),
(T1MSKC64rr GR64:$src)>;
+
+ def : Pat<(and_flag_nocf (not GR32:$src), (add GR32:$src, -1)),
+ (TZMSK32rr GR32:$src)>;
+ def : Pat<(and_flag_nocf (not GR64:$src), (add GR64:$src, -1)),
+ (TZMSK64rr GR64:$src)>;
} // HasTBM
//===----------------------------------------------------------------------===//
@@ -3231,39 +3328,39 @@ def : InstAlias<"fucompi", (UCOM_FIPr ST1), 0>;
// instructions like "fadd %st(0), %st(0)" as "fadd %st(0)" for consistency with
// gas.
multiclass FpUnaryAlias<string Mnemonic, Instruction Inst, bit EmitAlias = 1> {
- def : InstAlias<!strconcat(Mnemonic, "\t{$op, %st(0)|st(0), $op}"),
- (Inst RST:$op), EmitAlias>;
- def : InstAlias<!strconcat(Mnemonic, "\t{%st(0), %st(0)|st(0), st(0)}"),
+ def : InstAlias<!strconcat(Mnemonic, "\t$op"),
+ (Inst RSTi:$op), EmitAlias>;
+ def : InstAlias<!strconcat(Mnemonic, "\t{%st, %st|st, st}"),
(Inst ST0), EmitAlias>;
}
-defm : FpUnaryAlias<"fadd", ADD_FST0r>;
+defm : FpUnaryAlias<"fadd", ADD_FST0r, 0>;
defm : FpUnaryAlias<"faddp", ADD_FPrST0, 0>;
-defm : FpUnaryAlias<"fsub", SUB_FST0r>;
-defm : FpUnaryAlias<"fsub{|r}p", SUBR_FPrST0>;
-defm : FpUnaryAlias<"fsubr", SUBR_FST0r>;
-defm : FpUnaryAlias<"fsub{r|}p", SUB_FPrST0>;
-defm : FpUnaryAlias<"fmul", MUL_FST0r>;
-defm : FpUnaryAlias<"fmulp", MUL_FPrST0>;
-defm : FpUnaryAlias<"fdiv", DIV_FST0r>;
-defm : FpUnaryAlias<"fdiv{|r}p", DIVR_FPrST0>;
-defm : FpUnaryAlias<"fdivr", DIVR_FST0r>;
-defm : FpUnaryAlias<"fdiv{r|}p", DIV_FPrST0>;
+defm : FpUnaryAlias<"fsub", SUB_FST0r, 0>;
+defm : FpUnaryAlias<"fsub{|r}p", SUBR_FPrST0, 0>;
+defm : FpUnaryAlias<"fsubr", SUBR_FST0r, 0>;
+defm : FpUnaryAlias<"fsub{r|}p", SUB_FPrST0, 0>;
+defm : FpUnaryAlias<"fmul", MUL_FST0r, 0>;
+defm : FpUnaryAlias<"fmulp", MUL_FPrST0, 0>;
+defm : FpUnaryAlias<"fdiv", DIV_FST0r, 0>;
+defm : FpUnaryAlias<"fdiv{|r}p", DIVR_FPrST0, 0>;
+defm : FpUnaryAlias<"fdivr", DIVR_FST0r, 0>;
+defm : FpUnaryAlias<"fdiv{r|}p", DIV_FPrST0, 0>;
defm : FpUnaryAlias<"fcomi", COM_FIr, 0>;
defm : FpUnaryAlias<"fucomi", UCOM_FIr, 0>;
-defm : FpUnaryAlias<"fcompi", COM_FIPr>;
-defm : FpUnaryAlias<"fucompi", UCOM_FIPr>;
+defm : FpUnaryAlias<"fcompi", COM_FIPr, 0>;
+defm : FpUnaryAlias<"fucompi", UCOM_FIPr, 0>;
-// Handle "f{mulp,addp} st(0), $op" the same as "f{mulp,addp} $op", since they
+// Handle "f{mulp,addp} $op, %st(0)" the same as "f{mulp,addp} $op", since they
// commute. We also allow fdiv[r]p/fsubrp even though they don't commute,
// solely because gas supports it.
-def : InstAlias<"faddp\t{%st(0), $op|$op, st(0)}", (ADD_FPrST0 RST:$op), 0>;
-def : InstAlias<"fmulp\t{%st(0), $op|$op, st(0)}", (MUL_FPrST0 RST:$op)>;
-def : InstAlias<"fsub{|r}p\t{%st(0), $op|$op, st(0)}", (SUBR_FPrST0 RST:$op)>;
-def : InstAlias<"fsub{r|}p\t{%st(0), $op|$op, st(0)}", (SUB_FPrST0 RST:$op)>;
-def : InstAlias<"fdiv{|r}p\t{%st(0), $op|$op, st(0)}", (DIVR_FPrST0 RST:$op)>;
-def : InstAlias<"fdiv{r|}p\t{%st(0), $op|$op, st(0)}", (DIV_FPrST0 RST:$op)>;
+def : InstAlias<"faddp\t{$op, %st|st, $op}", (ADD_FPrST0 RSTi:$op), 0>;
+def : InstAlias<"fmulp\t{$op, %st|st, $op}", (MUL_FPrST0 RSTi:$op), 0>;
+def : InstAlias<"fsub{|r}p\t{$op, %st|st, $op}", (SUBR_FPrST0 RSTi:$op), 0>;
+def : InstAlias<"fsub{r|}p\t{$op, %st|st, $op}", (SUB_FPrST0 RSTi:$op), 0>;
+def : InstAlias<"fdiv{|r}p\t{$op, %st|st, $op}", (DIVR_FPrST0 RSTi:$op), 0>;
+def : InstAlias<"fdiv{r|}p\t{$op, %st|st, $op}", (DIV_FPrST0 RSTi:$op), 0>;
def : InstAlias<"fnstsw" , (FNSTSW16r), 0>;
diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td
index 8f3357170576..57835b1a256a 100644
--- a/lib/Target/X86/X86InstrMMX.td
+++ b/lib/Target/X86/X86InstrMMX.td
@@ -1,9 +1,8 @@
//===-- X86InstrMMX.td - Describe the MMX Instruction Set --*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -153,7 +152,9 @@ multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC,
// MMX EMMS Instruction
//===----------------------------------------------------------------------===//
-let SchedRW = [WriteEMMS] in
+let SchedRW = [WriteEMMS],
+ Defs = [MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+ ST0, ST1, ST2, ST3, ST4, ST5, ST6, ST7] in
def MMX_EMMS : MMXI<0x77, RawFrm, (outs), (ins), "emms", [(int_x86_mmx_emms)]>;
//===----------------------------------------------------------------------===//
@@ -544,7 +545,7 @@ let Predicates = [HasMMX, HasSSE1] in {
"pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1,
GR32orGR64:$src2, imm:$src3))]>,
- Sched<[WriteVecInsert]>;
+ Sched<[WriteVecInsert, ReadDefault, ReadInt2Fpu]>;
def MMX_PINSRWrm : MMXIi8<0xC4, MRMSrcMem,
(outs VR64:$dst),
diff --git a/lib/Target/X86/X86InstrMPX.td b/lib/Target/X86/X86InstrMPX.td
index c1a8cc7c5fbf..f7d931510fe2 100644
--- a/lib/Target/X86/X86InstrMPX.td
+++ b/lib/Target/X86/X86InstrMPX.td
@@ -1,9 +1,8 @@
//===-- X86InstrMPX.td - MPX Instruction Set ---------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/X86/X86InstrSGX.td b/lib/Target/X86/X86InstrSGX.td
index 488cc4438076..747f5aa86653 100644
--- a/lib/Target/X86/X86InstrSGX.td
+++ b/lib/Target/X86/X86InstrSGX.td
@@ -1,9 +1,8 @@
//===-- X86InstrSGX.td - SGX Instruction Set Extension -----*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index e2bcd18ce660..7d0a5b87baf4 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -1,9 +1,8 @@
//===-- X86InstrSSE.td - SSE Instruction Set ---------------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -22,6 +21,7 @@ multiclass sse12_fp_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
RegisterClass RC, X86MemOperand x86memop,
Domain d, X86FoldableSchedWrite sched,
bit Is2Addr = 1> {
+let isCodeGenOnly = 1 in {
let isCommutable = 1 in {
def rr : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
!if(Is2Addr,
@@ -37,6 +37,7 @@ multiclass sse12_fp_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
[(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))], d>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
+}
/// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class
multiclass sse12_fp_scalar_int<bits<8> opc, string OpcodeStr,
@@ -44,7 +45,7 @@ multiclass sse12_fp_scalar_int<bits<8> opc, string OpcodeStr,
ValueType VT, string asm, Operand memopr,
ComplexPattern mem_cpat, Domain d,
X86FoldableSchedWrite sched, bit Is2Addr = 1> {
-let isCodeGenOnly = 1, hasSideEffects = 0 in {
+let hasSideEffects = 0 in {
def rr_Int : SI_Int<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
!if(Is2Addr,
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
@@ -224,16 +225,29 @@ multiclass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt,
}
// Loading from memory automatically zeroing upper bits.
-multiclass sse12_move_rm<RegisterClass RC, X86MemOperand x86memop,
- PatFrag mem_pat, string OpcodeStr, Domain d> {
- def V#NAME#rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
+multiclass sse12_move_rm<RegisterClass RC, ValueType vt, X86MemOperand x86memop,
+ PatFrag mem_pat, PatFrag vzloadfrag, string OpcodeStr,
+ Domain d> {
+ def V#NAME#rm : SI<0x10, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set RC:$dst, (mem_pat addr:$src))], d>,
+ [(set VR128:$dst, (vt (vzloadfrag addr:$src)))], d>,
VEX, VEX_LIG, Sched<[WriteFLoad]>, VEX_WIG;
- def NAME#rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
+ def NAME#rm : SI<0x10, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set RC:$dst, (mem_pat addr:$src))], d>,
+ [(set VR128:$dst, (vt (vzloadfrag addr:$src)))], d>,
Sched<[WriteFLoad]>;
+
+ // _alt version uses FR32/FR64 register class.
+ let isCodeGenOnly = 1 in {
+ def V#NAME#rm_alt : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set RC:$dst, (mem_pat addr:$src))], d>,
+ VEX, VEX_LIG, Sched<[WriteFLoad]>, VEX_WIG;
+ def NAME#rm_alt : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set RC:$dst, (mem_pat addr:$src))], d>,
+ Sched<[WriteFLoad]>;
+ }
}
defm MOVSS : sse12_move<FR32, X86Movss, v4f32, f32mem, "movss",
@@ -242,49 +256,25 @@ defm MOVSD : sse12_move<FR64, X86Movsd, v2f64, f64mem, "movsd",
SSEPackedDouble, "MOVSD", UseSSE2>, XD;
let canFoldAsLoad = 1, isReMaterializable = 1 in {
- defm MOVSS : sse12_move_rm<FR32, f32mem, loadf32, "movss",
+ defm MOVSS : sse12_move_rm<FR32, v4f32, f32mem, loadf32, X86vzload32, "movss",
SSEPackedSingle>, XS;
- defm MOVSD : sse12_move_rm<FR64, f64mem, loadf64, "movsd",
+ defm MOVSD : sse12_move_rm<FR64, v2f64, f64mem, loadf64, X86vzload64, "movsd",
SSEPackedDouble>, XD;
}
// Patterns
let Predicates = [UseAVX] in {
- // MOVSSrm zeros the high parts of the register; represent this
- // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
- def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
- (COPY_TO_REGCLASS (VMOVSSrm addr:$src), VR128)>;
- def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
- (COPY_TO_REGCLASS (VMOVSSrm addr:$src), VR128)>;
- def : Pat<(v4f32 (X86vzload addr:$src)),
- (COPY_TO_REGCLASS (VMOVSSrm addr:$src), VR128)>;
-
- // MOVSDrm zeros the high parts of the register; represent this
- // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
- def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
- (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>;
- def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
- (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>;
- def : Pat<(v2f64 (X86vzload addr:$src)),
- (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>;
+ def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
+ (VMOVSSrm addr:$src)>;
+ def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
+ (VMOVSDrm addr:$src)>;
// Represent the same patterns above but in the form they appear for
// 256-bit types
- def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
- (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
- (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>;
- def : Pat<(v8f32 (X86vzload addr:$src)),
+ def : Pat<(v8f32 (X86vzload32 addr:$src)),
(SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>;
- def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
- (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
+ def : Pat<(v4f64 (X86vzload64 addr:$src)),
(SUBREG_TO_REG (i32 0), (VMOVSDrm addr:$src), sub_xmm)>;
- def : Pat<(v4f64 (X86vzload addr:$src)),
- (SUBREG_TO_REG (i32 0), (VMOVSDrm addr:$src), sub_xmm)>;
-
- // Extract and store.
- def : Pat<(store (f32 (extractelt (v4f32 VR128:$src), (iPTR 0))),
- addr:$dst),
- (VMOVSSmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128:$src), FR32))>;
}
let Predicates = [UseAVX, OptForSize] in {
@@ -304,59 +294,24 @@ let Predicates = [UseAVX, OptForSize] in {
(SUBREG_TO_REG (i32 0),
(v4i32 (VMOVSSrr (v4i32 (V_SET0)),
(v4i32 (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm)))), sub_xmm)>;
-
- def : Pat<(v4f64 (X86vzmovl (v4f64 VR256:$src))),
- (SUBREG_TO_REG (i32 0),
- (v2f64 (VMOVSDrr (v2f64 (V_SET0)),
- (v2f64 (EXTRACT_SUBREG (v4f64 VR256:$src), sub_xmm)))),
- sub_xmm)>;
- def : Pat<(v4i64 (X86vzmovl (v4i64 VR256:$src))),
- (SUBREG_TO_REG (i32 0),
- (v2i64 (VMOVSDrr (v2i64 (V_SET0)),
- (v2i64 (EXTRACT_SUBREG (v4i64 VR256:$src), sub_xmm)))),
- sub_xmm)>;
}
-let Predicates = [UseSSE1] in {
- let Predicates = [UseSSE1, NoSSE41_Or_OptForSize] in {
- // Move scalar to XMM zero-extended, zeroing a VR128 then do a
- // MOVSS to the lower bits.
- def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
- (MOVSSrr (v4f32 (V_SET0)), VR128:$src)>;
- def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
- (MOVSSrr (v4i32 (V_SET0)), VR128:$src)>;
- }
-
- // MOVSSrm already zeros the high parts of the register.
- def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
- (COPY_TO_REGCLASS (MOVSSrm addr:$src), VR128)>;
- def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
- (COPY_TO_REGCLASS (MOVSSrm addr:$src), VR128)>;
- def : Pat<(v4f32 (X86vzload addr:$src)),
- (COPY_TO_REGCLASS (MOVSSrm addr:$src), VR128)>;
-
- // Extract and store.
- def : Pat<(store (f32 (extractelt (v4f32 VR128:$src), (iPTR 0))),
- addr:$dst),
- (MOVSSmr addr:$dst, (COPY_TO_REGCLASS VR128:$src, FR32))>;
+let Predicates = [UseSSE1, NoSSE41_Or_OptForSize] in {
+// Move scalar to XMM zero-extended, zeroing a VR128 then do a
+// MOVSS to the lower bits.
+def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
+ (MOVSSrr (v4f32 (V_SET0)), VR128:$src)>;
+def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
+ (MOVSSrr (v4i32 (V_SET0)), VR128:$src)>;
}
-let Predicates = [UseSSE2] in {
- // MOVSDrm already zeros the high parts of the register.
- def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
- (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>;
- def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
- (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>;
- def : Pat<(v2f64 (X86vzload addr:$src)),
- (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>;
-}
-
-// Aliases to help the assembler pick two byte VEX encodings by swapping the
-// operands relative to the normal instructions to use VEX.R instead of VEX.B.
-def : InstAlias<"vmovss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- (VMOVSSrr_REV VR128L:$dst, VR128:$src1, VR128H:$src2), 0>;
-def : InstAlias<"vmovsd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- (VMOVSDrr_REV VR128L:$dst, VR128:$src1, VR128H:$src2), 0>;
+let Predicates = [UseSSE2] in
+def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
+ (MOVSDrm addr:$src)>;
+
+let Predicates = [UseSSE1] in
+def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
+ (MOVSSrm addr:$src)>;
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Move Aligned/Unaligned FP Instructions
@@ -504,25 +459,6 @@ let SchedRW = [SchedWriteFMoveLS.YMM.RR] in {
} // SchedRW
} // Predicate
-// Aliases to help the assembler pick two byte VEX encodings by swapping the
-// operands relative to the normal instructions to use VEX.R instead of VEX.B.
-def : InstAlias<"vmovaps\t{$src, $dst|$dst, $src}",
- (VMOVAPSrr_REV VR128L:$dst, VR128H:$src), 0>;
-def : InstAlias<"vmovapd\t{$src, $dst|$dst, $src}",
- (VMOVAPDrr_REV VR128L:$dst, VR128H:$src), 0>;
-def : InstAlias<"vmovups\t{$src, $dst|$dst, $src}",
- (VMOVUPSrr_REV VR128L:$dst, VR128H:$src), 0>;
-def : InstAlias<"vmovupd\t{$src, $dst|$dst, $src}",
- (VMOVUPDrr_REV VR128L:$dst, VR128H:$src), 0>;
-def : InstAlias<"vmovaps\t{$src, $dst|$dst, $src}",
- (VMOVAPSYrr_REV VR256L:$dst, VR256H:$src), 0>;
-def : InstAlias<"vmovapd\t{$src, $dst|$dst, $src}",
- (VMOVAPDYrr_REV VR256L:$dst, VR256H:$src), 0>;
-def : InstAlias<"vmovups\t{$src, $dst|$dst, $src}",
- (VMOVUPSYrr_REV VR256L:$dst, VR256H:$src), 0>;
-def : InstAlias<"vmovupd\t{$src, $dst|$dst, $src}",
- (VMOVUPDYrr_REV VR256L:$dst, VR256H:$src), 0>;
-
// Reversed version with ".s" suffix for GAS compatibility.
def : InstAlias<"vmovaps.s\t{$src, $dst|$dst, $src}",
(VMOVAPSrr_REV VR128:$dst, VR128:$src), 0>;
@@ -700,10 +636,10 @@ defm MOVL : sse12_mov_hilo_packed<0x12, X86Movsd, "movlp">;
let SchedRW = [WriteFStore] in {
let Predicates = [UseAVX] in {
+let mayStore = 1, hasSideEffects = 0 in
def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movlps\t{$src, $dst|$dst, $src}",
- [(store (f64 (extractelt (bc_v2f64 (v4f32 VR128:$src)),
- (iPTR 0))), addr:$dst)]>,
+ []>,
VEX, VEX_WIG;
def VMOVLPDmr : VPDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movlpd\t{$src, $dst|$dst, $src}",
@@ -711,10 +647,10 @@ def VMOVLPDmr : VPDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
(iPTR 0))), addr:$dst)]>,
VEX, VEX_WIG;
}// UseAVX
+let mayStore = 1, hasSideEffects = 0 in
def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movlps\t{$src, $dst|$dst, $src}",
- [(store (f64 (extractelt (bc_v2f64 (v4f32 VR128:$src)),
- (iPTR 0))), addr:$dst)]>;
+ []>;
def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movlpd\t{$src, $dst|$dst, $src}",
[(store (f64 (extractelt (v2f64 VR128:$src),
@@ -722,16 +658,19 @@ def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
} // SchedRW
let Predicates = [UseSSE1] in {
- // (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS
- def : Pat<(store (i64 (extractelt (bc_v2i64 (v4f32 VR128:$src2)),
- (iPTR 0))), addr:$src1),
- (MOVLPSmr addr:$src1, VR128:$src2)>;
-
// This pattern helps select MOVLPS on SSE1 only targets. With SSE2 we'll
// end up with a movsd or blend instead of shufp.
// No need for aligned load, we're only loading 64-bits.
- def : Pat<(X86Shufp (loadv4f32 addr:$src2), VR128:$src1, (i8 -28)),
+ def : Pat<(X86Shufp (v4f32 (nonvolatile_load addr:$src2)), VR128:$src1,
+ (i8 -28)),
(MOVLPSrm VR128:$src1, addr:$src2)>;
+ def : Pat<(X86Shufp (v4f32 (X86vzload64 addr:$src2)), VR128:$src1, (i8 -28)),
+ (MOVLPSrm VR128:$src1, addr:$src2)>;
+
+ def : Pat<(v4f32 (X86vzload64 addr:$src)),
+ (MOVLPSrm (v4f32 (V_SET0)), addr:$src)>;
+ def : Pat<(X86vextractstore64 (v4f32 VR128:$src), addr:$dst),
+ (MOVLPSmr addr:$dst, VR128:$src)>;
}
//===----------------------------------------------------------------------===//
@@ -744,24 +683,20 @@ let SchedRW = [WriteFStore] in {
// v2f64 extract element 1 is always custom lowered to unpack high to low
// and extract element 0 so the non-store version isn't too horrible.
let Predicates = [UseAVX] in {
+let mayStore = 1, hasSideEffects = 0 in
def VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movhps\t{$src, $dst|$dst, $src}",
- [(store (f64 (extractelt
- (X86Unpckh (bc_v2f64 (v4f32 VR128:$src)),
- (bc_v2f64 (v4f32 VR128:$src))),
- (iPTR 0))), addr:$dst)]>, VEX, VEX_WIG;
+ []>, VEX, VEX_WIG;
def VMOVHPDmr : VPDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movhpd\t{$src, $dst|$dst, $src}",
[(store (f64 (extractelt
(v2f64 (X86Unpckh VR128:$src, VR128:$src)),
(iPTR 0))), addr:$dst)]>, VEX, VEX_WIG;
} // UseAVX
+let mayStore = 1, hasSideEffects = 0 in
def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movhps\t{$src, $dst|$dst, $src}",
- [(store (f64 (extractelt
- (X86Unpckh (bc_v2f64 (v4f32 VR128:$src)),
- (bc_v2f64 (v4f32 VR128:$src))),
- (iPTR 0))), addr:$dst)]>;
+ []>;
def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movhpd\t{$src, $dst|$dst, $src}",
[(store (f64 (extractelt
@@ -775,19 +710,31 @@ let Predicates = [UseAVX] in {
def : Pat<(v2f64 (X86Unpckl VR128:$src1,
(bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))),
(VMOVHPDrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v2f64 (X86Unpckl VR128:$src1, (X86vzload64 addr:$src2))),
+ (VMOVHPDrm VR128:$src1, addr:$src2)>;
def : Pat<(store (f64 (extractelt
(v2f64 (X86VPermilpi VR128:$src, (i8 1))),
(iPTR 0))), addr:$dst),
(VMOVHPDmr addr:$dst, VR128:$src)>;
+
+ // MOVLPD patterns
+ def : Pat<(v2f64 (X86Movsd VR128:$src1, (X86vzload64 addr:$src2))),
+ (VMOVLPDrm VR128:$src1, addr:$src2)>;
}
let Predicates = [UseSSE1] in {
// This pattern helps select MOVHPS on SSE1 only targets. With SSE2 we'll
// end up with a movsd or blend instead of shufp.
// No need for aligned load, we're only loading 64-bits.
- def : Pat<(X86Movlhps VR128:$src1, (loadv4f32 addr:$src2)),
+ def : Pat<(X86Movlhps VR128:$src1, (v4f32 (nonvolatile_load addr:$src2))),
+ (MOVHPSrm VR128:$src1, addr:$src2)>;
+ def : Pat<(X86Movlhps VR128:$src1, (v4f32 (X86vzload64 addr:$src2))),
(MOVHPSrm VR128:$src1, addr:$src2)>;
+
+ def : Pat<(X86vextractstore64 (v4f32 (X86Movhlps VR128:$src, VR128:$src)),
+ addr:$dst),
+ (MOVHPSmr addr:$dst, VR128:$src)>;
}
let Predicates = [UseSSE2] in {
@@ -798,11 +745,24 @@ let Predicates = [UseSSE2] in {
def : Pat<(v2f64 (X86Unpckl VR128:$src1,
(bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))),
(MOVHPDrm VR128:$src1, addr:$src2)>;
+ def : Pat<(v2f64 (X86Unpckl VR128:$src1, (X86vzload64 addr:$src2))),
+ (MOVHPDrm VR128:$src1, addr:$src2)>;
def : Pat<(store (f64 (extractelt
(v2f64 (X86Shufp VR128:$src, VR128:$src, (i8 1))),
(iPTR 0))), addr:$dst),
(MOVHPDmr addr:$dst, VR128:$src)>;
+
+ // MOVLPD patterns
+ def : Pat<(v2f64 (X86Movsd VR128:$src1, (X86vzload64 addr:$src2))),
+ (MOVLPDrm VR128:$src1, addr:$src2)>;
+}
+
+let Predicates = [UseSSE2, NoSSE41_Or_OptForSize] in {
+ // Use MOVLPD to load into the low bits from a full vector unless we can use
+ // BLENDPD.
+ def : Pat<(X86Movsd VR128:$src1, (v2f64 (nonvolatile_load addr:$src2))),
+ (MOVLPDrm VR128:$src1, addr:$src2)>;
}
//===----------------------------------------------------------------------===//
@@ -847,13 +807,16 @@ let Constraints = "$src1 = $dst" in {
multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
- string asm, X86FoldableSchedWrite sched> {
- def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
- [(set DstRC:$dst, (OpNode SrcRC:$src))]>,
- Sched<[sched]>;
- def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
- [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))]>,
- Sched<[sched.Folded]>;
+ string asm, string mem, X86FoldableSchedWrite sched,
+ SchedRead Int2Fpu = ReadDefault> {
+ def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
+ !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
+ [(set DstRC:$dst, (OpNode SrcRC:$src))]>,
+ Sched<[sched, Int2Fpu]>;
+ def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
+ mem#"\t{$src, $dst|$dst, $src}",
+ [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))]>,
+ Sched<[sched.Folded]>;
}
multiclass sse12_cvt_p<bits<8> opc, RegisterClass RC, X86MemOperand x86memop,
@@ -872,74 +835,55 @@ let hasSideEffects = 0 in {
}
multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
- X86MemOperand x86memop, string asm,
+ X86MemOperand x86memop, string asm, string mem,
X86FoldableSchedWrite sched> {
let hasSideEffects = 0, Predicates = [UseAVX] in {
def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src),
!strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
- Sched<[sched]>;
+ Sched<[sched, ReadDefault, ReadInt2Fpu]>;
let mayLoad = 1 in
def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
(ins DstRC:$src1, x86memop:$src),
- !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
+ asm#"{"#mem#"}\t{$src, $src1, $dst|$dst, $src1, $src}", []>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
} // hasSideEffects = 0
}
-let Predicates = [UseAVX] in {
+let isCodeGenOnly = 1, Predicates = [UseAVX] in {
defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
- "cvttss2si\t{$src, $dst|$dst, $src}",
+ "cvttss2si", "cvttss2si",
WriteCvtSS2I>,
XS, VEX, VEX_LIG;
defm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32,
- "cvttss2si\t{$src, $dst|$dst, $src}",
+ "cvttss2si", "cvttss2si",
WriteCvtSS2I>,
XS, VEX, VEX_W, VEX_LIG;
defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
- "cvttsd2si\t{$src, $dst|$dst, $src}",
+ "cvttsd2si", "cvttsd2si",
WriteCvtSD2I>,
XD, VEX, VEX_LIG;
defm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64,
- "cvttsd2si\t{$src, $dst|$dst, $src}",
+ "cvttsd2si", "cvttsd2si",
WriteCvtSD2I>,
XD, VEX, VEX_W, VEX_LIG;
-
-def : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}",
- (VCVTTSS2SIrr GR32:$dst, FR32:$src), 0, "att">;
-def : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}",
- (VCVTTSS2SIrm GR32:$dst, f32mem:$src), 0, "att">;
-def : InstAlias<"vcvttsd2si{l}\t{$src, $dst|$dst, $src}",
- (VCVTTSD2SIrr GR32:$dst, FR64:$src), 0, "att">;
-def : InstAlias<"vcvttsd2si{l}\t{$src, $dst|$dst, $src}",
- (VCVTTSD2SIrm GR32:$dst, f64mem:$src), 0, "att">;
-def : InstAlias<"vcvttss2si{q}\t{$src, $dst|$dst, $src}",
- (VCVTTSS2SI64rr GR64:$dst, FR32:$src), 0, "att">;
-def : InstAlias<"vcvttss2si{q}\t{$src, $dst|$dst, $src}",
- (VCVTTSS2SI64rm GR64:$dst, f32mem:$src), 0, "att">;
-def : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}",
- (VCVTTSD2SI64rr GR64:$dst, FR64:$src), 0, "att">;
-def : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}",
- (VCVTTSD2SI64rm GR64:$dst, f64mem:$src), 0, "att">;
}
+
// The assembler can recognize rr 64-bit instructions by seeing a rxx
// register, but the same isn't true when only using memory operands,
// provide other assembly "l" and "q" forms to address this explicitly
// where appropriate to do so.
-defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss{l}",
+let isCodeGenOnly = 1 in {
+defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss", "l",
WriteCvtI2SS>, XS, VEX_4V, VEX_LIG;
-defm VCVTSI642SS : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss{q}",
+defm VCVTSI642SS : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss", "q",
WriteCvtI2SS>, XS, VEX_4V, VEX_W, VEX_LIG;
-defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}",
+defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd", "l",
WriteCvtI2SD>, XD, VEX_4V, VEX_LIG;
-defm VCVTSI642SD : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}",
+defm VCVTSI642SD : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd", "q",
WriteCvtI2SD>, XD, VEX_4V, VEX_W, VEX_LIG;
+} // isCodeGenOnly = 1
let Predicates = [UseAVX] in {
- def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
- (VCVTSI2SSrm FR64:$dst, FR64:$src1, i32mem:$src), 0, "att">;
- def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
- (VCVTSI2SDrm FR64:$dst, FR64:$src1, i32mem:$src), 0, "att">;
-
def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
(VCVTSI2SSrm (f32 (IMPLICIT_DEF)), addr:$src)>;
def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
@@ -959,52 +903,32 @@ let Predicates = [UseAVX] in {
(VCVTSI642SDrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
}
+let isCodeGenOnly = 1 in {
defm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
- "cvttss2si\t{$src, $dst|$dst, $src}",
+ "cvttss2si", "cvttss2si",
WriteCvtSS2I>, XS;
defm CVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32,
- "cvttss2si\t{$src, $dst|$dst, $src}",
+ "cvttss2si", "cvttss2si",
WriteCvtSS2I>, XS, REX_W;
defm CVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
- "cvttsd2si\t{$src, $dst|$dst, $src}",
+ "cvttsd2si", "cvttsd2si",
WriteCvtSD2I>, XD;
defm CVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64,
- "cvttsd2si\t{$src, $dst|$dst, $src}",
+ "cvttsd2si", "cvttsd2si",
WriteCvtSD2I>, XD, REX_W;
defm CVTSI2SS : sse12_cvt_s<0x2A, GR32, FR32, sint_to_fp, i32mem, loadi32,
- "cvtsi2ss{l}\t{$src, $dst|$dst, $src}",
- WriteCvtI2SS>, XS;
+ "cvtsi2ss", "cvtsi2ss{l}",
+ WriteCvtI2SS, ReadInt2Fpu>, XS;
defm CVTSI642SS : sse12_cvt_s<0x2A, GR64, FR32, sint_to_fp, i64mem, loadi64,
- "cvtsi2ss{q}\t{$src, $dst|$dst, $src}",
- WriteCvtI2SS>, XS, REX_W;
+ "cvtsi2ss", "cvtsi2ss{q}",
+ WriteCvtI2SS, ReadInt2Fpu>, XS, REX_W;
defm CVTSI2SD : sse12_cvt_s<0x2A, GR32, FR64, sint_to_fp, i32mem, loadi32,
- "cvtsi2sd{l}\t{$src, $dst|$dst, $src}",
- WriteCvtI2SD>, XD;
+ "cvtsi2sd", "cvtsi2sd{l}",
+ WriteCvtI2SD, ReadInt2Fpu>, XD;
defm CVTSI642SD : sse12_cvt_s<0x2A, GR64, FR64, sint_to_fp, i64mem, loadi64,
- "cvtsi2sd{q}\t{$src, $dst|$dst, $src}",
- WriteCvtI2SD>, XD, REX_W;
-
-def : InstAlias<"cvttss2si{l}\t{$src, $dst|$dst, $src}",
- (CVTTSS2SIrr GR32:$dst, FR32:$src), 0, "att">;
-def : InstAlias<"cvttss2si{l}\t{$src, $dst|$dst, $src}",
- (CVTTSS2SIrm GR32:$dst, f32mem:$src), 0, "att">;
-def : InstAlias<"cvttsd2si{l}\t{$src, $dst|$dst, $src}",
- (CVTTSD2SIrr GR32:$dst, FR64:$src), 0, "att">;
-def : InstAlias<"cvttsd2si{l}\t{$src, $dst|$dst, $src}",
- (CVTTSD2SIrm GR32:$dst, f64mem:$src), 0, "att">;
-def : InstAlias<"cvttss2si{q}\t{$src, $dst|$dst, $src}",
- (CVTTSS2SI64rr GR64:$dst, FR32:$src), 0, "att">;
-def : InstAlias<"cvttss2si{q}\t{$src, $dst|$dst, $src}",
- (CVTTSS2SI64rm GR64:$dst, f32mem:$src), 0, "att">;
-def : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}",
- (CVTTSD2SI64rr GR64:$dst, FR64:$src), 0, "att">;
-def : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}",
- (CVTTSD2SI64rm GR64:$dst, f64mem:$src), 0, "att">;
-
-def : InstAlias<"cvtsi2ss\t{$src, $dst|$dst, $src}",
- (CVTSI2SSrm FR64:$dst, i32mem:$src), 0, "att">;
-def : InstAlias<"cvtsi2sd\t{$src, $dst|$dst, $src}",
- (CVTSI2SDrm FR64:$dst, i32mem:$src), 0, "att">;
+ "cvtsi2sd", "cvtsi2sd{q}",
+ WriteCvtI2SD, ReadInt2Fpu>, XD, REX_W;
+} // isCodeGenOnly = 1
// Conversion Instructions Intrinsics - Match intrinsics which expect MM
// and/or XMM operand(s).
@@ -1025,20 +949,20 @@ multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC,
RegisterClass DstRC, X86MemOperand x86memop,
- string asm, X86FoldableSchedWrite sched,
+ string asm, string mem, X86FoldableSchedWrite sched,
bit Is2Addr = 1> {
let hasSideEffects = 0 in {
def rr_Int : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src2),
!if(Is2Addr,
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- []>, Sched<[sched]>;
+ []>, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
let mayLoad = 1 in
def rm_Int : SI<opc, MRMSrcMem, (outs DstRC:$dst),
(ins DstRC:$src1, x86memop:$src2),
!if(Is2Addr,
- !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
- !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ asm#"{"#mem#"}\t{$src2, $dst|$dst, $src2}",
+ asm#"{"#mem#"}\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -1057,48 +981,73 @@ defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v2f64, X86cvts2si,
sdmem, sse_load_f64, "cvtsd2si", WriteCvtSD2I>, XD, REX_W;
-let isCodeGenOnly = 1 in {
- let Predicates = [UseAVX] in {
- defm VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
- i32mem, "cvtsi2ss{l}", WriteCvtI2SS, 0>, XS, VEX_4V;
- defm VCVTSI642SS : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
- i64mem, "cvtsi2ss{q}", WriteCvtI2SS, 0>, XS, VEX_4V, VEX_W;
- defm VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
- i32mem, "cvtsi2sd{l}", WriteCvtI2SD, 0>, XD, VEX_4V;
- defm VCVTSI642SD : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
- i64mem, "cvtsi2sd{q}", WriteCvtI2SD, 0>, XD, VEX_4V, VEX_W;
- }
- let Constraints = "$src1 = $dst" in {
- defm CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
- i32mem, "cvtsi2ss{l}", WriteCvtI2SS>, XS;
- defm CVTSI642SS : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
- i64mem, "cvtsi2ss{q}", WriteCvtI2SS>, XS, REX_W;
- defm CVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
- i32mem, "cvtsi2sd{l}", WriteCvtI2SD>, XD;
- defm CVTSI642SD : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
- i64mem, "cvtsi2sd{q}", WriteCvtI2SD>, XD, REX_W;
- }
-} // isCodeGenOnly = 1
+let Predicates = [UseAVX] in {
+defm VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
+ i32mem, "cvtsi2ss", "l", WriteCvtI2SS, 0>, XS, VEX_4V, VEX_LIG;
+defm VCVTSI642SS : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
+ i64mem, "cvtsi2ss", "q", WriteCvtI2SS, 0>, XS, VEX_4V, VEX_LIG, VEX_W;
+defm VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
+ i32mem, "cvtsi2sd", "l", WriteCvtI2SD, 0>, XD, VEX_4V, VEX_LIG;
+defm VCVTSI642SD : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
+ i64mem, "cvtsi2sd", "q", WriteCvtI2SD, 0>, XD, VEX_4V, VEX_LIG, VEX_W;
+}
+let Constraints = "$src1 = $dst" in {
+ defm CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
+ i32mem, "cvtsi2ss", "l", WriteCvtI2SS>, XS;
+ defm CVTSI642SS : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
+ i64mem, "cvtsi2ss", "q", WriteCvtI2SS>, XS, REX_W;
+ defm CVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
+ i32mem, "cvtsi2sd", "l", WriteCvtI2SD>, XD;
+ defm CVTSI642SD : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
+ i64mem, "cvtsi2sd", "q", WriteCvtI2SD>, XD, REX_W;
+}
+
+def : InstAlias<"vcvtsi2ss{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ (VCVTSI2SSrr_Int VR128:$dst, VR128:$src1, GR32:$src2), 0, "att">;
+def : InstAlias<"vcvtsi2ss{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ (VCVTSI642SSrr_Int VR128:$dst, VR128:$src1, GR64:$src2), 0, "att">;
+def : InstAlias<"vcvtsi2sd{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ (VCVTSI2SDrr_Int VR128:$dst, VR128:$src1, GR32:$src2), 0, "att">;
+def : InstAlias<"vcvtsi2sd{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ (VCVTSI642SDrr_Int VR128:$dst, VR128:$src1, GR64:$src2), 0, "att">;
+
+def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
+ (VCVTSI2SSrm_Int VR128:$dst, VR128:$src1, i32mem:$src), 0, "att">;
+def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
+ (VCVTSI2SDrm_Int VR128:$dst, VR128:$src1, i32mem:$src), 0, "att">;
+
+def : InstAlias<"cvtsi2ss{l}\t{$src, $dst|$dst, $src}",
+ (CVTSI2SSrr_Int VR128:$dst, GR32:$src), 0, "att">;
+def : InstAlias<"cvtsi2ss{q}\t{$src, $dst|$dst, $src}",
+ (CVTSI642SSrr_Int VR128:$dst, GR64:$src), 0, "att">;
+def : InstAlias<"cvtsi2sd{l}\t{$src, $dst|$dst, $src}",
+ (CVTSI2SDrr_Int VR128:$dst, GR32:$src), 0, "att">;
+def : InstAlias<"cvtsi2sd{q}\t{$src, $dst|$dst, $src}",
+ (CVTSI642SDrr_Int VR128:$dst, GR64:$src), 0, "att">;
+
+def : InstAlias<"cvtsi2ss\t{$src, $dst|$dst, $src}",
+ (CVTSI2SSrm_Int VR128:$dst, i32mem:$src), 0, "att">;
+def : InstAlias<"cvtsi2sd\t{$src, $dst|$dst, $src}",
+ (CVTSI2SDrm_Int VR128:$dst, i32mem:$src), 0, "att">;
/// SSE 1 Only
// Aliases for intrinsics
-let isCodeGenOnly = 1 in {
let Predicates = [UseAVX] in {
defm VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v4f32, X86cvtts2Int,
ssmem, sse_load_f32, "cvttss2si",
- WriteCvtSS2I>, XS, VEX;
+ WriteCvtSS2I>, XS, VEX, VEX_LIG;
defm VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v4f32,
X86cvtts2Int, ssmem, sse_load_f32,
"cvttss2si", WriteCvtSS2I>,
- XS, VEX, VEX_W;
+ XS, VEX, VEX_LIG, VEX_W;
defm VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v2f64, X86cvtts2Int,
sdmem, sse_load_f64, "cvttsd2si",
- WriteCvtSS2I>, XD, VEX;
+ WriteCvtSS2I>, XD, VEX, VEX_LIG;
defm VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v2f64,
X86cvtts2Int, sdmem, sse_load_f64,
"cvttsd2si", WriteCvtSS2I>,
- XD, VEX, VEX_W;
+ XD, VEX, VEX_LIG, VEX_W;
}
defm CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v4f32, X86cvtts2Int,
ssmem, sse_load_f32, "cvttss2si",
@@ -1112,7 +1061,40 @@ defm CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v2f64, X86cvtts2Int,
defm CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v2f64,
X86cvtts2Int, sdmem, sse_load_f64,
"cvttsd2si", WriteCvtSD2I>, XD, REX_W;
-} // isCodeGenOnly = 1
+
+def : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}",
+ (VCVTTSS2SIrr_Int GR32:$dst, VR128:$src), 0, "att">;
+def : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}",
+ (VCVTTSS2SIrm_Int GR32:$dst, f32mem:$src), 0, "att">;
+def : InstAlias<"vcvttsd2si{l}\t{$src, $dst|$dst, $src}",
+ (VCVTTSD2SIrr_Int GR32:$dst, VR128:$src), 0, "att">;
+def : InstAlias<"vcvttsd2si{l}\t{$src, $dst|$dst, $src}",
+ (VCVTTSD2SIrm_Int GR32:$dst, f64mem:$src), 0, "att">;
+def : InstAlias<"vcvttss2si{q}\t{$src, $dst|$dst, $src}",
+ (VCVTTSS2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">;
+def : InstAlias<"vcvttss2si{q}\t{$src, $dst|$dst, $src}",
+ (VCVTTSS2SI64rm_Int GR64:$dst, f32mem:$src), 0, "att">;
+def : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}",
+ (VCVTTSD2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">;
+def : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}",
+ (VCVTTSD2SI64rm_Int GR64:$dst, f64mem:$src), 0, "att">;
+
+def : InstAlias<"cvttss2si{l}\t{$src, $dst|$dst, $src}",
+ (CVTTSS2SIrr_Int GR32:$dst, VR128:$src), 0, "att">;
+def : InstAlias<"cvttss2si{l}\t{$src, $dst|$dst, $src}",
+ (CVTTSS2SIrm_Int GR32:$dst, f32mem:$src), 0, "att">;
+def : InstAlias<"cvttsd2si{l}\t{$src, $dst|$dst, $src}",
+ (CVTTSD2SIrr_Int GR32:$dst, VR128:$src), 0, "att">;
+def : InstAlias<"cvttsd2si{l}\t{$src, $dst|$dst, $src}",
+ (CVTTSD2SIrm_Int GR32:$dst, f64mem:$src), 0, "att">;
+def : InstAlias<"cvttss2si{q}\t{$src, $dst|$dst, $src}",
+ (CVTTSS2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">;
+def : InstAlias<"cvttss2si{q}\t{$src, $dst|$dst, $src}",
+ (CVTTSS2SI64rm_Int GR64:$dst, f32mem:$src), 0, "att">;
+def : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}",
+ (CVTTSD2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">;
+def : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}",
+ (CVTTSD2SI64rm_Int GR64:$dst, f64mem:$src), 0, "att">;
let Predicates = [UseAVX] in {
defm VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v4f32, X86cvts2si,
@@ -1143,7 +1125,7 @@ defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, i128mem, v4f32, v4i32, memop,
SSEPackedSingle, WriteCvtI2PS>,
PS, Requires<[UseSSE2]>;
-let Predicates = [UseAVX] in {
+// AVX aliases
def : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}",
(VCVTSS2SIrr_Int GR32:$dst, VR128:$src), 0, "att">;
def : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}",
@@ -1160,8 +1142,8 @@ def : InstAlias<"vcvtsd2si{q}\t{$src, $dst|$dst, $src}",
(VCVTSD2SI64rr_Int GR64:$dst, VR128:$src), 0, "att">;
def : InstAlias<"vcvtsd2si{q}\t{$src, $dst|$dst, $src}",
(VCVTSD2SI64rm_Int GR64:$dst, sdmem:$src), 0, "att">;
-}
+// SSE aliases
def : InstAlias<"cvtss2si{l}\t{$src, $dst|$dst, $src}",
(CVTSS2SIrr_Int GR32:$dst, VR128:$src), 0, "att">;
def : InstAlias<"cvtss2si{l}\t{$src, $dst|$dst, $src}",
@@ -1182,7 +1164,7 @@ def : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}",
/// SSE 2 Only
// Convert scalar double to scalar single
-let hasSideEffects = 0, Predicates = [UseAVX] in {
+let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [UseAVX] in {
def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst),
(ins FR32:$src1, FR64:$src2),
"cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
@@ -1200,6 +1182,7 @@ def : Pat<(f32 (fpround FR64:$src)),
(VCVTSD2SSrr (f32 (IMPLICIT_DEF)), FR64:$src)>,
Requires<[UseAVX]>;
+let isCodeGenOnly = 1 in {
def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
"cvtsd2ss\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (fpround FR64:$src))]>,
@@ -1209,42 +1192,41 @@ def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src),
[(set FR32:$dst, (fpround (loadf64 addr:$src)))]>,
XD, Requires<[UseSSE2, OptForSize]>,
Sched<[WriteCvtSD2SS.Folded]>;
+}
-let isCodeGenOnly = 1 in {
def VCVTSD2SSrr_Int: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
- (int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))]>,
- XD, VEX_4V, VEX_WIG, Requires<[HasAVX]>,
+ (v4f32 (X86frounds VR128:$src1, (v2f64 VR128:$src2))))]>,
+ XD, VEX_4V, VEX_LIG, VEX_WIG, Requires<[UseAVX]>,
Sched<[WriteCvtSD2SS]>;
def VCVTSD2SSrm_Int: I<0x5A, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst, (int_x86_sse2_cvtsd2ss
- VR128:$src1, sse_load_f64:$src2))]>,
- XD, VEX_4V, VEX_WIG, Requires<[HasAVX]>,
+ [(set VR128:$dst,
+ (v4f32 (X86frounds VR128:$src1, sse_load_f64:$src2)))]>,
+ XD, VEX_4V, VEX_LIG, VEX_WIG, Requires<[UseAVX]>,
Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>;
let Constraints = "$src1 = $dst" in {
def CVTSD2SSrr_Int: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"cvtsd2ss\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
- (int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))]>,
+ (v4f32 (X86frounds VR128:$src1, (v2f64 VR128:$src2))))]>,
XD, Requires<[UseSSE2]>, Sched<[WriteCvtSD2SS]>;
def CVTSD2SSrm_Int: I<0x5A, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
"cvtsd2ss\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (int_x86_sse2_cvtsd2ss
- VR128:$src1, sse_load_f64:$src2))]>,
+ [(set VR128:$dst,
+ (v4f32 (X86frounds VR128:$src1,sse_load_f64:$src2)))]>,
XD, Requires<[UseSSE2]>,
Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>;
}
-} // isCodeGenOnly = 1
// Convert scalar single to scalar double
// SSE2 instructions with XS prefix
-let hasSideEffects = 0 in {
+let isCodeGenOnly = 1, hasSideEffects = 0 in {
def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst),
(ins FR64:$src1, FR32:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
@@ -1257,51 +1239,36 @@ def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst),
XS, VEX_4V, VEX_LIG, VEX_WIG,
Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>,
Requires<[UseAVX, OptForSize]>;
-}
+} // isCodeGenOnly = 1, hasSideEffects = 0
def : Pat<(f64 (fpextend FR32:$src)),
(VCVTSS2SDrr (f64 (IMPLICIT_DEF)), FR32:$src)>, Requires<[UseAVX]>;
def : Pat<(fpextend (loadf32 addr:$src)),
(VCVTSS2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>, Requires<[UseAVX, OptForSize]>;
-def : Pat<(extloadf32 addr:$src),
- (VCVTSS2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>,
- Requires<[UseAVX, OptForSize]>;
-def : Pat<(extloadf32 addr:$src),
- (VCVTSS2SDrr (f64 (IMPLICIT_DEF)), (VMOVSSrm addr:$src))>,
- Requires<[UseAVX, OptForSpeed]>;
-
+let isCodeGenOnly = 1 in {
def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
"cvtss2sd\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (fpextend FR32:$src))]>,
XS, Requires<[UseSSE2]>, Sched<[WriteCvtSS2SD]>;
def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src),
"cvtss2sd\t{$src, $dst|$dst, $src}",
- [(set FR64:$dst, (extloadf32 addr:$src))]>,
+ [(set FR64:$dst, (fpextend (loadf32 addr:$src)))]>,
XS, Requires<[UseSSE2, OptForSize]>,
Sched<[WriteCvtSS2SD.Folded]>;
+} // isCodeGenOnly = 1
-// extload f32 -> f64. This matches load+fpextend because we have a hack in
-// the isel (PreprocessForFPConvert) that can introduce loads after dag
-// combine.
-// Since these loads aren't folded into the fpextend, we have to match it
-// explicitly here.
-def : Pat<(fpextend (loadf32 addr:$src)),
- (CVTSS2SDrm addr:$src)>, Requires<[UseSSE2, OptForSize]>;
-def : Pat<(extloadf32 addr:$src),
- (CVTSS2SDrr (MOVSSrm addr:$src))>, Requires<[UseSSE2, OptForSpeed]>;
-
-let isCodeGenOnly = 1, hasSideEffects = 0 in {
+let hasSideEffects = 0 in {
def VCVTSS2SDrr_Int: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- []>, XS, VEX_4V, VEX_WIG,
+ []>, XS, VEX_4V, VEX_LIG, VEX_WIG,
Requires<[HasAVX]>, Sched<[WriteCvtSS2SD]>;
let mayLoad = 1 in
def VCVTSS2SDrm_Int: I<0x5A, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- []>, XS, VEX_4V, VEX_WIG, Requires<[HasAVX]>,
+ []>, XS, VEX_4V, VEX_LIG, VEX_WIG, Requires<[HasAVX]>,
Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>;
let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix
def CVTSS2SDrr_Int: I<0x5A, MRMSrcReg,
@@ -1316,7 +1283,7 @@ def CVTSS2SDrm_Int: I<0x5A, MRMSrcMem,
[]>, XS, Requires<[UseSSE2]>,
Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>;
}
-} // isCodeGenOnly = 1
+} // hasSideEffects = 0
// Patterns used for matching (v)cvtsi2ss, (v)cvtsi2sd, (v)cvtsd2ss and
// (v)cvtss2sd intrinsic sequences from clang which produce unnecessary
@@ -1476,15 +1443,11 @@ def VCVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
VEX, Sched<[WriteCvtPD2I]>, VEX_WIG;
// XMM only
-def : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}",
- (VCVTPD2DQrr VR128:$dst, VR128:$src), 0>;
def VCVTPD2DQrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"vcvtpd2dq{x}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (X86cvtp2Int (loadv2f64 addr:$src))))]>, VEX,
Sched<[WriteCvtPD2ILd]>, VEX_WIG;
-def : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}",
- (VCVTPD2DQrm VR128:$dst, f128mem:$src), 0, "intel">;
// YMM only
def VCVTPD2DQYrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
@@ -1497,12 +1460,13 @@ def VCVTPD2DQYrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
[(set VR128:$dst,
(v4i32 (X86cvtp2Int (loadv4f64 addr:$src))))]>,
VEX, VEX_L, Sched<[WriteCvtPD2IYLd]>, VEX_WIG;
-def : InstAlias<"vcvtpd2dqy\t{$src, $dst|$dst, $src}",
- (VCVTPD2DQYrr VR128:$dst, VR256:$src), 0>;
-def : InstAlias<"vcvtpd2dqy\t{$src, $dst|$dst, $src}",
- (VCVTPD2DQYrm VR128:$dst, f256mem:$src), 0, "intel">;
}
+def : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}",
+ (VCVTPD2DQrr VR128:$dst, VR128:$src), 0, "att">;
+def : InstAlias<"vcvtpd2dqy\t{$src, $dst|$dst, $src}",
+ (VCVTPD2DQYrr VR128:$dst, VR256:$src), 0, "att">;
+
def CVTPD2DQrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
@@ -1540,17 +1504,6 @@ def VCVTTPS2DQYrm : VS2SI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src)
Sched<[WriteCvtPS2IYLd]>, VEX_WIG;
}
-let Predicates = [HasAVX, NoVLX] in {
- def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))),
- (VCVTTPS2DQrr VR128:$src)>;
- def : Pat<(v4i32 (fp_to_sint (loadv4f32 addr:$src))),
- (VCVTTPS2DQrm addr:$src)>;
- def : Pat<(v8i32 (fp_to_sint (v8f32 VR256:$src))),
- (VCVTTPS2DQYrr VR256:$src)>;
- def : Pat<(v8i32 (fp_to_sint (loadv8f32 addr:$src))),
- (VCVTTPS2DQYrm addr:$src)>;
-}
-
def CVTTPS2DQrr : S2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
@@ -1562,39 +1515,23 @@ def CVTTPS2DQrm : S2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
(v4i32 (X86cvttp2si (memopv4f32 addr:$src))))]>,
Sched<[WriteCvtPS2ILd]>;
-let Predicates = [UseSSE2] in {
- def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))),
- (CVTTPS2DQrr VR128:$src)>;
- def : Pat<(v4i32 (fp_to_sint (memopv4f32 addr:$src))),
- (CVTTPS2DQrm addr:$src)>;
-}
-
-let Predicates = [HasAVX, NoVLX] in
+// The assembler can recognize rr 256-bit instructions by seeing a ymm
+// register, but the same isn't true when using memory operands instead.
+// Provide other assembly rr and rm forms to address this explicitly.
+let Predicates = [HasAVX, NoVLX] in {
+// XMM only
def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (X86cvttp2si (v2f64 VR128:$src))))]>,
VEX, Sched<[WriteCvtPD2I]>, VEX_WIG;
-
-// The assembler can recognize rr 256-bit instructions by seeing a ymm
-// register, but the same isn't true when using memory operands instead.
-// Provide other assembly rr and rm forms to address this explicitly.
-
-// XMM only
-def : InstAlias<"vcvttpd2dqx\t{$src, $dst|$dst, $src}",
- (VCVTTPD2DQrr VR128:$dst, VR128:$src), 0>;
-
-let Predicates = [HasAVX, NoVLX] in
def VCVTTPD2DQrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvttpd2dq{x}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (X86cvttp2si (loadv2f64 addr:$src))))]>,
VEX, Sched<[WriteCvtPD2ILd]>, VEX_WIG;
-def : InstAlias<"vcvttpd2dqx\t{$src, $dst|$dst, $src}",
- (VCVTTPD2DQrm VR128:$dst, f128mem:$src), 0, "intel">;
// YMM only
-let Predicates = [HasAVX, NoVLX] in {
def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
"cvttpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
@@ -1605,11 +1542,12 @@ def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
[(set VR128:$dst,
(v4i32 (X86cvttp2si (loadv4f64 addr:$src))))]>,
VEX, VEX_L, Sched<[WriteCvtPD2IYLd]>, VEX_WIG;
-}
-def : InstAlias<"vcvttpd2dqy\t{$src, $dst|$dst, $src}",
- (VCVTTPD2DQYrr VR128:$dst, VR256:$src), 0>;
+} // Predicates = [HasAVX, NoVLX]
+
+def : InstAlias<"vcvttpd2dqx\t{$src, $dst|$dst, $src}",
+ (VCVTTPD2DQrr VR128:$dst, VR128:$src), 0, "att">;
def : InstAlias<"vcvttpd2dqy\t{$src, $dst|$dst, $src}",
- (VCVTTPD2DQYrm VR128:$dst, f256mem:$src), 0, "intel">;
+ (VCVTTPD2DQYrr VR128:$dst, VR256:$src), 0, "att">;
let Predicates = [HasAVX, NoVLX] in {
def : Pat<(v4i32 (fp_to_sint (v4f64 VR256:$src))),
@@ -1618,21 +1556,6 @@ let Predicates = [HasAVX, NoVLX] in {
(VCVTTPD2DQYrm addr:$src)>;
}
-let Predicates = [HasAVX, NoVLX] in {
- def : Pat<(X86vzmovl (v2i64 (bitconvert
- (v4i32 (X86cvtp2Int (v2f64 VR128:$src)))))),
- (VCVTPD2DQrr VR128:$src)>;
- def : Pat<(X86vzmovl (v2i64 (bitconvert
- (v4i32 (X86cvtp2Int (loadv2f64 addr:$src)))))),
- (VCVTPD2DQrm addr:$src)>;
- def : Pat<(X86vzmovl (v2i64 (bitconvert
- (v4i32 (X86cvttp2si (v2f64 VR128:$src)))))),
- (VCVTTPD2DQrr VR128:$src)>;
- def : Pat<(X86vzmovl (v2i64 (bitconvert
- (v4i32 (X86cvttp2si (loadv2f64 addr:$src)))))),
- (VCVTTPD2DQrm addr:$src)>;
-} // Predicates = [HasAVX, NoVLX]
-
def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
@@ -1644,21 +1567,6 @@ def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src),
(v4i32 (X86cvttp2si (memopv2f64 addr:$src))))]>,
Sched<[WriteCvtPD2ILd]>;
-let Predicates = [UseSSE2] in {
- def : Pat<(X86vzmovl (v2i64 (bitconvert
- (v4i32 (X86cvtp2Int (v2f64 VR128:$src)))))),
- (CVTPD2DQrr VR128:$src)>;
- def : Pat<(X86vzmovl (v2i64 (bitconvert
- (v4i32 (X86cvtp2Int (memopv2f64 addr:$src)))))),
- (CVTPD2DQrm addr:$src)>;
- def : Pat<(X86vzmovl (v2i64 (bitconvert
- (v4i32 (X86cvttp2si (v2f64 VR128:$src)))))),
- (CVTTPD2DQrr VR128:$src)>;
- def : Pat<(X86vzmovl (v2i64 (bitconvert
- (v4i32 (X86cvttp2si (memopv2f64 addr:$src)))))),
- (CVTTPD2DQrm addr:$src)>;
-} // Predicates = [UseSSE2]
-
// Convert packed single to packed double
let Predicates = [HasAVX, NoVLX] in {
// SSE2 instructions without OpSize prefix
@@ -1697,7 +1605,10 @@ let hasSideEffects = 0, mayLoad = 1 in
def VCVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (v2f64 (X86VSintToFP (loadv4i32 addr:$src))))]>,
+ (v2f64 (X86VSintToFP
+ (bc_v4i32
+ (v2i64 (scalar_to_vector
+ (loadi64 addr:$src)))))))]>,
VEX, Sched<[WriteCvtI2PDLd]>, VEX_WIG;
def VCVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
@@ -1721,7 +1632,10 @@ let hasSideEffects = 0, mayLoad = 1 in
def CVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"cvtdq2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (v2f64 (X86VSintToFP (loadv4i32 addr:$src))))]>,
+ (v2f64 (X86VSintToFP
+ (bc_v4i32
+ (v2i64 (scalar_to_vector
+ (loadi64 addr:$src)))))))]>,
Sched<[WriteCvtI2PDLd]>;
def CVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtdq2pd\t{$src, $dst|$dst, $src}",
@@ -1731,17 +1645,13 @@ def CVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
// AVX register conversion intrinsics
let Predicates = [HasAVX, NoVLX] in {
- def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
- (VCVTDQ2PDrm addr:$src)>;
- def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))),
+ def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
(VCVTDQ2PDrm addr:$src)>;
} // Predicates = [HasAVX, NoVLX]
// SSE2 register conversion intrinsics
let Predicates = [UseSSE2] in {
- def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
- (CVTDQ2PDrm addr:$src)>;
- def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))),
+ def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
(CVTDQ2PDrm addr:$src)>;
} // Predicates = [UseSSE2]
@@ -1749,38 +1659,31 @@ let Predicates = [UseSSE2] in {
// The assembler can recognize rr 256-bit instructions by seeing a ymm
// register, but the same isn't true when using memory operands instead.
// Provide other assembly rr and rm forms to address this explicitly.
-let Predicates = [HasAVX, NoVLX] in
+let Predicates = [HasAVX, NoVLX] in {
+// XMM only
def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (X86vfpround (v2f64 VR128:$src)))]>,
VEX, Sched<[WriteCvtPD2PS]>, VEX_WIG;
-
-// XMM only
-def : InstAlias<"vcvtpd2psx\t{$src, $dst|$dst, $src}",
- (VCVTPD2PSrr VR128:$dst, VR128:$src), 0>;
-let Predicates = [HasAVX, NoVLX] in
def VCVTPD2PSrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtpd2ps{x}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (X86vfpround (loadv2f64 addr:$src)))]>,
VEX, Sched<[WriteCvtPD2PS.Folded]>, VEX_WIG;
-def : InstAlias<"vcvtpd2psx\t{$src, $dst|$dst, $src}",
- (VCVTPD2PSrm VR128:$dst, f128mem:$src), 0, "intel">;
-// YMM only
-let Predicates = [HasAVX, NoVLX] in {
def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (fpround VR256:$src))]>,
+ [(set VR128:$dst, (X86vfpround VR256:$src))]>,
VEX, VEX_L, Sched<[WriteCvtPD2PSY]>, VEX_WIG;
def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
"cvtpd2ps{y}\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (fpround (loadv4f64 addr:$src)))]>,
+ [(set VR128:$dst, (X86vfpround (loadv4f64 addr:$src)))]>,
VEX, VEX_L, Sched<[WriteCvtPD2PSY.Folded]>, VEX_WIG;
-}
-def : InstAlias<"vcvtpd2psy\t{$src, $dst|$dst, $src}",
- (VCVTPD2PSYrr VR128:$dst, VR256:$src), 0>;
+} // Predicates = [HasAVX, NoVLX]
+
+def : InstAlias<"vcvtpd2psx\t{$src, $dst|$dst, $src}",
+ (VCVTPD2PSrr VR128:$dst, VR128:$src), 0, "att">;
def : InstAlias<"vcvtpd2psy\t{$src, $dst|$dst, $src}",
- (VCVTPD2PSYrm VR128:$dst, f256mem:$src), 0, "intel">;
+ (VCVTPD2PSYrr VR128:$dst, VR256:$src), 0, "att">;
def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
@@ -1791,28 +1694,11 @@ def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
[(set VR128:$dst, (X86vfpround (memopv2f64 addr:$src)))]>,
Sched<[WriteCvtPD2PS.Folded]>;
-// AVX 256-bit register conversion intrinsics
-// FIXME: Migrate SSE conversion intrinsics matching to use patterns as below
-// whenever possible to avoid declaring two versions of each one.
-
let Predicates = [HasAVX, NoVLX] in {
- // Match fpround and fpextend for 128/256-bit conversions
- def : Pat<(X86vzmovl (v2f64 (bitconvert
- (v4f32 (X86vfpround (v2f64 VR128:$src)))))),
- (VCVTPD2PSrr VR128:$src)>;
- def : Pat<(X86vzmovl (v2f64 (bitconvert
- (v4f32 (X86vfpround (loadv2f64 addr:$src)))))),
- (VCVTPD2PSrm addr:$src)>;
-}
-
-let Predicates = [UseSSE2] in {
- // Match fpround and fpextend for 128 conversions
- def : Pat<(X86vzmovl (v2f64 (bitconvert
- (v4f32 (X86vfpround (v2f64 VR128:$src)))))),
- (CVTPD2PSrr VR128:$src)>;
- def : Pat<(X86vzmovl (v2f64 (bitconvert
- (v4f32 (X86vfpround (memopv2f64 addr:$src)))))),
- (CVTPD2PSrm addr:$src)>;
+ def : Pat<(v4f32 (fpround (v4f64 VR256:$src))),
+ (VCVTPD2PSYrr VR256:$src)>;
+ def : Pat<(v4f32 (fpround (loadv4f64 addr:$src))),
+ (VCVTPD2PSYrm addr:$src)>;
}
//===----------------------------------------------------------------------===//
@@ -1821,94 +1707,80 @@ let Predicates = [UseSSE2] in {
// sse12_cmp_scalar - sse 1 & 2 compare scalar instructions
multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
- Operand CC, SDNode OpNode, ValueType VT,
- PatFrag ld_frag, string asm, string asm_alt,
+ SDNode OpNode, ValueType VT,
+ PatFrag ld_frag, string asm,
X86FoldableSchedWrite sched> {
let isCommutable = 1 in
def rr : SIi8<0xC2, MRMSrcReg,
- (outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
+ (outs RC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc), asm,
[(set RC:$dst, (OpNode (VT RC:$src1), RC:$src2, imm:$cc))]>,
Sched<[sched]>;
def rm : SIi8<0xC2, MRMSrcMem,
- (outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
+ (outs RC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc), asm,
[(set RC:$dst, (OpNode (VT RC:$src1),
(ld_frag addr:$src2), imm:$cc))]>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
-
- // Accept explicit immediate argument form instead of comparison code.
- let isAsmParserOnly = 1, hasSideEffects = 0 in {
- def rr_alt : SIi8<0xC2, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, RC:$src2, u8imm:$cc), asm_alt, []>,
- Sched<[sched]>, NotMemoryFoldable;
- let mayLoad = 1 in
- def rm_alt : SIi8<0xC2, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, x86memop:$src2, u8imm:$cc), asm_alt, []>,
- Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
- }
}
-let ExeDomain = SSEPackedSingle in
-defm VCMPSS : sse12_cmp_scalar<FR32, f32mem, AVXCC, X86cmps, f32, loadf32,
- "cmp${cc}ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- "cmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
- SchedWriteFCmpSizes.PS.Scl>, XS, VEX_4V, VEX_LIG, VEX_WIG;
-let ExeDomain = SSEPackedDouble in
-defm VCMPSD : sse12_cmp_scalar<FR64, f64mem, AVXCC, X86cmps, f64, loadf64,
- "cmp${cc}sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- "cmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
- SchedWriteFCmpSizes.PD.Scl>,
- XD, VEX_4V, VEX_LIG, VEX_WIG;
-
-let Constraints = "$src1 = $dst" in {
+let isCodeGenOnly = 1 in {
let ExeDomain = SSEPackedSingle in
- defm CMPSS : sse12_cmp_scalar<FR32, f32mem, SSECC, X86cmps, f32, loadf32,
- "cmp${cc}ss\t{$src2, $dst|$dst, $src2}",
- "cmpss\t{$cc, $src2, $dst|$dst, $src2, $cc}",
- SchedWriteFCmpSizes.PS.Scl>, XS;
+ defm VCMPSS : sse12_cmp_scalar<FR32, f32mem, X86cmps, f32, loadf32,
+ "cmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
+ SchedWriteFCmpSizes.PS.Scl>, XS, VEX_4V, VEX_LIG, VEX_WIG;
let ExeDomain = SSEPackedDouble in
- defm CMPSD : sse12_cmp_scalar<FR64, f64mem, SSECC, X86cmps, f64, loadf64,
- "cmp${cc}sd\t{$src2, $dst|$dst, $src2}",
- "cmpsd\t{$cc, $src2, $dst|$dst, $src2, $cc}",
- SchedWriteFCmpSizes.PD.Scl>, XD;
+ defm VCMPSD : sse12_cmp_scalar<FR64, f64mem, X86cmps, f64, loadf64,
+ "cmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
+ SchedWriteFCmpSizes.PD.Scl>,
+ XD, VEX_4V, VEX_LIG, VEX_WIG;
+
+ let Constraints = "$src1 = $dst" in {
+ let ExeDomain = SSEPackedSingle in
+ defm CMPSS : sse12_cmp_scalar<FR32, f32mem, X86cmps, f32, loadf32,
+ "cmpss\t{$cc, $src2, $dst|$dst, $src2, $cc}",
+ SchedWriteFCmpSizes.PS.Scl>, XS;
+ let ExeDomain = SSEPackedDouble in
+ defm CMPSD : sse12_cmp_scalar<FR64, f64mem, X86cmps, f64, loadf64,
+ "cmpsd\t{$cc, $src2, $dst|$dst, $src2, $cc}",
+ SchedWriteFCmpSizes.PD.Scl>, XD;
+ }
}
-multiclass sse12_cmp_scalar_int<Operand memop, Operand CC,
+multiclass sse12_cmp_scalar_int<Operand memop,
Intrinsic Int, string asm, X86FoldableSchedWrite sched,
ComplexPattern mem_cpat> {
def rr_Int : SIi8<0xC2, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src, CC:$cc), asm,
+ (ins VR128:$src1, VR128:$src, u8imm:$cc), asm,
[(set VR128:$dst, (Int VR128:$src1,
VR128:$src, imm:$cc))]>,
Sched<[sched]>;
let mayLoad = 1 in
def rm_Int : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, memop:$src, CC:$cc), asm,
+ (ins VR128:$src1, memop:$src, u8imm:$cc), asm,
[(set VR128:$dst, (Int VR128:$src1,
mem_cpat:$src, imm:$cc))]>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
-let isCodeGenOnly = 1 in {
- // Aliases to match intrinsics which expect XMM operand(s).
+// Aliases to match intrinsics which expect XMM operand(s).
+let ExeDomain = SSEPackedSingle in
+defm VCMPSS : sse12_cmp_scalar_int<ssmem, int_x86_sse_cmp_ss,
+ "cmpss\t{$cc, $src, $src1, $dst|$dst, $src1, $src, $cc}",
+ SchedWriteFCmpSizes.PS.Scl, sse_load_f32>,
+ XS, VEX_4V, VEX_LIG, VEX_WIG;
+let ExeDomain = SSEPackedDouble in
+defm VCMPSD : sse12_cmp_scalar_int<sdmem, int_x86_sse2_cmp_sd,
+ "cmpsd\t{$cc, $src, $src1, $dst|$dst, $src1, $src, $cc}",
+ SchedWriteFCmpSizes.PD.Scl, sse_load_f64>,
+ XD, VEX_4V, VEX_LIG, VEX_WIG;
+let Constraints = "$src1 = $dst" in {
let ExeDomain = SSEPackedSingle in
- defm VCMPSS : sse12_cmp_scalar_int<ssmem, AVXCC, int_x86_sse_cmp_ss,
- "cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}",
- SchedWriteFCmpSizes.PS.Scl, sse_load_f32>, XS, VEX_4V;
+ defm CMPSS : sse12_cmp_scalar_int<ssmem, int_x86_sse_cmp_ss,
+ "cmpss\t{$cc, $src, $dst|$dst, $src, $cc}",
+ SchedWriteFCmpSizes.PS.Scl, sse_load_f32>, XS;
let ExeDomain = SSEPackedDouble in
- defm VCMPSD : sse12_cmp_scalar_int<sdmem, AVXCC, int_x86_sse2_cmp_sd,
- "cmp${cc}sd\t{$src, $src1, $dst|$dst, $src1, $src}",
- SchedWriteFCmpSizes.PD.Scl, sse_load_f64>,
- XD, VEX_4V;
- let Constraints = "$src1 = $dst" in {
- let ExeDomain = SSEPackedSingle in
- defm CMPSS : sse12_cmp_scalar_int<ssmem, SSECC, int_x86_sse_cmp_ss,
- "cmp${cc}ss\t{$src, $dst|$dst, $src}",
- SchedWriteFCmpSizes.PS.Scl, sse_load_f32>, XS;
- let ExeDomain = SSEPackedDouble in
- defm CMPSD : sse12_cmp_scalar_int<sdmem, SSECC, int_x86_sse2_cmp_sd,
- "cmp${cc}sd\t{$src, $dst|$dst, $src}",
- SchedWriteFCmpSizes.PD.Scl, sse_load_f64>, XD;
-}
+ defm CMPSD : sse12_cmp_scalar_int<sdmem, int_x86_sse2_cmp_sd,
+ "cmpsd\t{$cc, $src, $dst|$dst, $src, $cc}",
+ SchedWriteFCmpSizes.PD.Scl, sse_load_f64>, XD;
}
@@ -1962,14 +1834,14 @@ let Defs = [EFLAGS] in {
let isCodeGenOnly = 1 in {
defm VUCOMISS : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem,
- sse_load_f32, "ucomiss", WriteFCom>, PS, VEX, VEX_WIG;
+ sse_load_f32, "ucomiss", WriteFCom>, PS, VEX, VEX_LIG, VEX_WIG;
defm VUCOMISD : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem,
- sse_load_f64, "ucomisd", WriteFCom>, PD, VEX, VEX_WIG;
+ sse_load_f64, "ucomisd", WriteFCom>, PD, VEX, VEX_LIG, VEX_WIG;
defm VCOMISS : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem,
- sse_load_f32, "comiss", WriteFCom>, PS, VEX, VEX_WIG;
+ sse_load_f32, "comiss", WriteFCom>, PS, VEX, VEX_LIG, VEX_WIG;
defm VCOMISD : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem,
- sse_load_f64, "comisd", WriteFCom>, PD, VEX, VEX_WIG;
+ sse_load_f64, "comisd", WriteFCom>, PD, VEX, VEX_LIG, VEX_WIG;
}
defm UCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
"ucomiss", WriteFCom>, PS;
@@ -1998,56 +1870,38 @@ let Defs = [EFLAGS] in {
// sse12_cmp_packed - sse 1 & 2 compare packed instructions
multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop,
- Operand CC, ValueType VT, string asm,
- string asm_alt, X86FoldableSchedWrite sched,
+ ValueType VT, string asm,
+ X86FoldableSchedWrite sched,
Domain d, PatFrag ld_frag> {
let isCommutable = 1 in
def rri : PIi8<0xC2, MRMSrcReg,
- (outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
+ (outs RC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc), asm,
[(set RC:$dst, (VT (X86cmpp RC:$src1, RC:$src2, imm:$cc)))], d>,
Sched<[sched]>;
def rmi : PIi8<0xC2, MRMSrcMem,
- (outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
+ (outs RC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc), asm,
[(set RC:$dst,
(VT (X86cmpp RC:$src1, (ld_frag addr:$src2), imm:$cc)))], d>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
-
- // Accept explicit immediate argument form instead of comparison code.
- let isAsmParserOnly = 1, hasSideEffects = 0 in {
- def rri_alt : PIi8<0xC2, MRMSrcReg,
- (outs RC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc),
- asm_alt, [], d>, Sched<[sched]>, NotMemoryFoldable;
- let mayLoad = 1 in
- def rmi_alt : PIi8<0xC2, MRMSrcMem,
- (outs RC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc),
- asm_alt, [], d>, Sched<[sched.Folded, sched.ReadAfterFold]>,
- NotMemoryFoldable;
- }
}
-defm VCMPPS : sse12_cmp_packed<VR128, f128mem, AVXCC, v4f32,
- "cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+defm VCMPPS : sse12_cmp_packed<VR128, f128mem, v4f32,
"cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
SchedWriteFCmpSizes.PS.XMM, SSEPackedSingle, loadv4f32>, PS, VEX_4V, VEX_WIG;
-defm VCMPPD : sse12_cmp_packed<VR128, f128mem, AVXCC, v2f64,
- "cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+defm VCMPPD : sse12_cmp_packed<VR128, f128mem, v2f64,
"cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
SchedWriteFCmpSizes.PD.XMM, SSEPackedDouble, loadv2f64>, PD, VEX_4V, VEX_WIG;
-defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, AVXCC, v8f32,
- "cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, v8f32,
"cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
SchedWriteFCmpSizes.PS.YMM, SSEPackedSingle, loadv8f32>, PS, VEX_4V, VEX_L, VEX_WIG;
-defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, AVXCC, v4f64,
- "cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, v4f64,
"cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
SchedWriteFCmpSizes.PD.YMM, SSEPackedDouble, loadv4f64>, PD, VEX_4V, VEX_L, VEX_WIG;
let Constraints = "$src1 = $dst" in {
- defm CMPPS : sse12_cmp_packed<VR128, f128mem, SSECC, v4f32,
- "cmp${cc}ps\t{$src2, $dst|$dst, $src2}",
+ defm CMPPS : sse12_cmp_packed<VR128, f128mem, v4f32,
"cmpps\t{$cc, $src2, $dst|$dst, $src2, $cc}",
SchedWriteFCmpSizes.PS.XMM, SSEPackedSingle, memopv4f32>, PS;
- defm CMPPD : sse12_cmp_packed<VR128, f128mem, SSECC, v2f64,
- "cmp${cc}pd\t{$src2, $dst|$dst, $src2}",
+ defm CMPPD : sse12_cmp_packed<VR128, f128mem, v2f64,
"cmppd\t{$cc, $src2, $dst|$dst, $src2, $cc}",
SchedWriteFCmpSizes.PD.XMM, SSEPackedDouble, memopv2f64>, PD;
}
@@ -2111,12 +1965,14 @@ let Predicates = [UseSSE1] in {
/// sse12_shuffle - sse 1 & 2 fp shuffle instructions
multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop,
ValueType vt, string asm, PatFrag mem_frag,
- X86FoldableSchedWrite sched, Domain d> {
+ X86FoldableSchedWrite sched, Domain d,
+ bit IsCommutable = 0> {
def rmi : PIi8<0xC6, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, u8imm:$src3), asm,
[(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2),
(i8 imm:$src3))))], d>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
+ let isCommutable = IsCommutable in
def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, u8imm:$src3), asm,
[(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2,
@@ -2148,7 +2004,7 @@ let Constraints = "$src1 = $dst" in {
memopv4f32, SchedWriteFShuffle.XMM, SSEPackedSingle>, PS;
defm SHUFPD : sse12_shuffle<VR128, f128mem, v2f64,
"shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- memopv2f64, SchedWriteFShuffle.XMM, SSEPackedDouble>, PD;
+ memopv2f64, SchedWriteFShuffle.XMM, SSEPackedDouble, 1>, PD;
}
//===----------------------------------------------------------------------===//
@@ -2238,6 +2094,13 @@ let Predicates = [HasAVX1Only] in {
(VUNPCKHPDYrr VR256:$src1, VR256:$src2)>;
}
+let Predicates = [UseSSE2] in {
+ // Use MOVHPD if the load isn't aligned enough for UNPCKLPD.
+ def : Pat<(v2f64 (X86Unpckl VR128:$src1,
+ (v2f64 (nonvolatile_load addr:$src2)))),
+ (MOVHPDrm VR128:$src1, addr:$src2)>;
+}
+
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Extract Floating-Point Sign mask
//===----------------------------------------------------------------------===//
@@ -2523,99 +2386,6 @@ let Predicates = [HasAVX1Only] in {
(VANDNPSYrm VR256:$src1, addr:$src2)>;
}
-let Predicates = [HasAVX, NoVLX_Or_NoDQI] in {
- // Use packed logical operations for scalar ops.
- def : Pat<(f64 (X86fand FR64:$src1, FR64:$src2)),
- (COPY_TO_REGCLASS
- (v2f64 (VANDPDrr (v2f64 (COPY_TO_REGCLASS FR64:$src1, VR128)),
- (v2f64 (COPY_TO_REGCLASS FR64:$src2, VR128)))),
- FR64)>;
- def : Pat<(f64 (X86for FR64:$src1, FR64:$src2)),
- (COPY_TO_REGCLASS
- (v2f64 (VORPDrr (v2f64 (COPY_TO_REGCLASS FR64:$src1, VR128)),
- (v2f64 (COPY_TO_REGCLASS FR64:$src2, VR128)))),
- FR64)>;
- def : Pat<(f64 (X86fxor FR64:$src1, FR64:$src2)),
- (COPY_TO_REGCLASS
- (v2f64 (VXORPDrr (v2f64 (COPY_TO_REGCLASS FR64:$src1, VR128)),
- (v2f64 (COPY_TO_REGCLASS FR64:$src2, VR128)))),
- FR64)>;
- def : Pat<(f64 (X86fandn FR64:$src1, FR64:$src2)),
- (COPY_TO_REGCLASS
- (v2f64 (VANDNPDrr (v2f64 (COPY_TO_REGCLASS FR64:$src1, VR128)),
- (v2f64 (COPY_TO_REGCLASS FR64:$src2, VR128)))),
- FR64)>;
-
- def : Pat<(f32 (X86fand FR32:$src1, FR32:$src2)),
- (COPY_TO_REGCLASS
- (v4f32 (VANDPSrr (v4f32 (COPY_TO_REGCLASS FR32:$src1, VR128)),
- (v4f32 (COPY_TO_REGCLASS FR32:$src2, VR128)))),
- FR32)>;
- def : Pat<(f32 (X86for FR32:$src1, FR32:$src2)),
- (COPY_TO_REGCLASS
- (v4f32 (VORPSrr (v4f32 (COPY_TO_REGCLASS FR32:$src1, VR128)),
- (v4f32 (COPY_TO_REGCLASS FR32:$src2, VR128)))),
- FR32)>;
- def : Pat<(f32 (X86fxor FR32:$src1, FR32:$src2)),
- (COPY_TO_REGCLASS
- (v4f32 (VXORPSrr (v4f32 (COPY_TO_REGCLASS FR32:$src1, VR128)),
- (v4f32 (COPY_TO_REGCLASS FR32:$src2, VR128)))),
- FR32)>;
- def : Pat<(f32 (X86fandn FR32:$src1, FR32:$src2)),
- (COPY_TO_REGCLASS
- (v4f32 (VANDNPSrr (v4f32 (COPY_TO_REGCLASS FR32:$src1, VR128)),
- (v4f32 (COPY_TO_REGCLASS FR32:$src2, VR128)))),
- FR32)>;
-}
-
-let Predicates = [UseSSE1] in {
- // Use packed logical operations for scalar ops.
- def : Pat<(f32 (X86fand FR32:$src1, FR32:$src2)),
- (COPY_TO_REGCLASS
- (v4f32 (ANDPSrr (v4f32 (COPY_TO_REGCLASS FR32:$src1, VR128)),
- (v4f32 (COPY_TO_REGCLASS FR32:$src2, VR128)))),
- FR32)>;
- def : Pat<(f32 (X86for FR32:$src1, FR32:$src2)),
- (COPY_TO_REGCLASS
- (v4f32 (ORPSrr (v4f32 (COPY_TO_REGCLASS FR32:$src1, VR128)),
- (v4f32 (COPY_TO_REGCLASS FR32:$src2, VR128)))),
- FR32)>;
- def : Pat<(f32 (X86fxor FR32:$src1, FR32:$src2)),
- (COPY_TO_REGCLASS
- (v4f32 (XORPSrr (v4f32 (COPY_TO_REGCLASS FR32:$src1, VR128)),
- (v4f32 (COPY_TO_REGCLASS FR32:$src2, VR128)))),
- FR32)>;
- def : Pat<(f32 (X86fandn FR32:$src1, FR32:$src2)),
- (COPY_TO_REGCLASS
- (v4f32 (ANDNPSrr (v4f32 (COPY_TO_REGCLASS FR32:$src1, VR128)),
- (v4f32 (COPY_TO_REGCLASS FR32:$src2, VR128)))),
- FR32)>;
-}
-
-let Predicates = [UseSSE2] in {
- // Use packed logical operations for scalar ops.
- def : Pat<(f64 (X86fand FR64:$src1, FR64:$src2)),
- (COPY_TO_REGCLASS
- (v2f64 (ANDPDrr (v2f64 (COPY_TO_REGCLASS FR64:$src1, VR128)),
- (v2f64 (COPY_TO_REGCLASS FR64:$src2, VR128)))),
- FR64)>;
- def : Pat<(f64 (X86for FR64:$src1, FR64:$src2)),
- (COPY_TO_REGCLASS
- (v2f64 (ORPDrr (v2f64 (COPY_TO_REGCLASS FR64:$src1, VR128)),
- (v2f64 (COPY_TO_REGCLASS FR64:$src2, VR128)))),
- FR64)>;
- def : Pat<(f64 (X86fxor FR64:$src1, FR64:$src2)),
- (COPY_TO_REGCLASS
- (v2f64 (XORPDrr (v2f64 (COPY_TO_REGCLASS FR64:$src1, VR128)),
- (v2f64 (COPY_TO_REGCLASS FR64:$src2, VR128)))),
- FR64)>;
- def : Pat<(f64 (X86fandn FR64:$src1, FR64:$src2)),
- (COPY_TO_REGCLASS
- (v2f64 (ANDNPDrr (v2f64 (COPY_TO_REGCLASS FR64:$src1, VR128)),
- (v2f64 (COPY_TO_REGCLASS FR64:$src2, VR128)))),
- FR64)>;
-}
-
let Predicates = [HasAVX, NoVLX] in {
def : Pat<(v16i8 (and VR128:$src1, VR128:$src2)),
(VPANDrr VR128:$src1, VR128:$src2)>;
@@ -2908,7 +2678,8 @@ let isCodeGenOnly = 1 in {
// patterns we have to try to match.
multiclass scalar_math_patterns<SDNode Op, string OpcPrefix, SDNode Move,
ValueType VT, ValueType EltTy,
- RegisterClass RC, Predicate BasePredicate> {
+ RegisterClass RC, PatFrag ld_frag,
+ Predicate BasePredicate> {
let Predicates = [BasePredicate] in {
// extracted scalar math op with insert via movss/movsd
def : Pat<(VT (Move (VT VR128:$dst),
@@ -2917,6 +2688,11 @@ multiclass scalar_math_patterns<SDNode Op, string OpcPrefix, SDNode Move,
RC:$src))))),
(!cast<Instruction>(OpcPrefix#rr_Int) VT:$dst,
(VT (COPY_TO_REGCLASS RC:$src, VR128)))>;
+ def : Pat<(VT (Move (VT VR128:$dst),
+ (VT (scalar_to_vector
+ (Op (EltTy (extractelt (VT VR128:$dst), (iPTR 0))),
+ (ld_frag addr:$src)))))),
+ (!cast<Instruction>(OpcPrefix#rm_Int) VT:$dst, addr:$src)>;
}
// Repeat for AVX versions of the instructions.
@@ -2928,18 +2704,23 @@ multiclass scalar_math_patterns<SDNode Op, string OpcPrefix, SDNode Move,
RC:$src))))),
(!cast<Instruction>("V"#OpcPrefix#rr_Int) VT:$dst,
(VT (COPY_TO_REGCLASS RC:$src, VR128)))>;
+ def : Pat<(VT (Move (VT VR128:$dst),
+ (VT (scalar_to_vector
+ (Op (EltTy (extractelt (VT VR128:$dst), (iPTR 0))),
+ (ld_frag addr:$src)))))),
+ (!cast<Instruction>("V"#OpcPrefix#rm_Int) VT:$dst, addr:$src)>;
}
}
-defm : scalar_math_patterns<fadd, "ADDSS", X86Movss, v4f32, f32, FR32, UseSSE1>;
-defm : scalar_math_patterns<fsub, "SUBSS", X86Movss, v4f32, f32, FR32, UseSSE1>;
-defm : scalar_math_patterns<fmul, "MULSS", X86Movss, v4f32, f32, FR32, UseSSE1>;
-defm : scalar_math_patterns<fdiv, "DIVSS", X86Movss, v4f32, f32, FR32, UseSSE1>;
+defm : scalar_math_patterns<fadd, "ADDSS", X86Movss, v4f32, f32, FR32, loadf32, UseSSE1>;
+defm : scalar_math_patterns<fsub, "SUBSS", X86Movss, v4f32, f32, FR32, loadf32, UseSSE1>;
+defm : scalar_math_patterns<fmul, "MULSS", X86Movss, v4f32, f32, FR32, loadf32, UseSSE1>;
+defm : scalar_math_patterns<fdiv, "DIVSS", X86Movss, v4f32, f32, FR32, loadf32, UseSSE1>;
-defm : scalar_math_patterns<fadd, "ADDSD", X86Movsd, v2f64, f64, FR64, UseSSE2>;
-defm : scalar_math_patterns<fsub, "SUBSD", X86Movsd, v2f64, f64, FR64, UseSSE2>;
-defm : scalar_math_patterns<fmul, "MULSD", X86Movsd, v2f64, f64, FR64, UseSSE2>;
-defm : scalar_math_patterns<fdiv, "DIVSD", X86Movsd, v2f64, f64, FR64, UseSSE2>;
+defm : scalar_math_patterns<fadd, "ADDSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>;
+defm : scalar_math_patterns<fsub, "SUBSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>;
+defm : scalar_math_patterns<fmul, "MULSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>;
+defm : scalar_math_patterns<fdiv, "DIVSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>;
/// Unop Arithmetic
/// In addition, we also have a special variant of the scalar form here to
@@ -2956,7 +2737,7 @@ multiclass sse_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
ValueType ScalarVT, X86MemOperand x86memop,
Operand intmemop, SDNode OpNode, Domain d,
X86FoldableSchedWrite sched, Predicate target> {
- let hasSideEffects = 0 in {
+ let isCodeGenOnly = 1, hasSideEffects = 0 in {
def r : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1),
!strconcat(OpcodeStr, "\t{$src1, $dst|$dst, $src1}"),
[(set RC:$dst, (OpNode RC:$src1))], d>, Sched<[sched]>,
@@ -2967,8 +2748,9 @@ multiclass sse_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
[(set RC:$dst, (OpNode (load addr:$src1)))], d>,
Sched<[sched.Folded]>,
Requires<[target, OptForSize]>;
+ }
- let isCodeGenOnly = 1, Constraints = "$src1 = $dst", ExeDomain = d in {
+ let hasSideEffects = 0, Constraints = "$src1 = $dst", ExeDomain = d in {
def r_Int : I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), []>,
Sched<[sched]>;
@@ -2977,7 +2759,6 @@ multiclass sse_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), []>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
- }
}
@@ -3022,7 +2803,7 @@ multiclass avx_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
ValueType ScalarVT, X86MemOperand x86memop,
Operand intmemop, SDNode OpNode, Domain d,
X86FoldableSchedWrite sched, Predicate target> {
- let hasSideEffects = 0 in {
+ let isCodeGenOnly = 1, hasSideEffects = 0 in {
def r : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[], d>, Sched<[sched]>;
@@ -3030,7 +2811,8 @@ multiclass avx_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
def m : I<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[], d>, Sched<[sched.Folded, sched.ReadAfterFold]>;
- let isCodeGenOnly = 1, ExeDomain = d in {
+ }
+ let hasSideEffects = 0, ExeDomain = d in {
def r_Int : I<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
@@ -3041,7 +2823,6 @@ multiclass avx_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
- }
// We don't want to fold scalar loads into these instructions unless
// optimizing for size. This is because the folded instruction will have a
@@ -3197,23 +2978,6 @@ multiclass scalar_unary_math_patterns<SDNode OpNode, string OpcPrefix, SDNode Mo
}
}
-multiclass scalar_unary_math_imm_patterns<SDNode OpNode, string OpcPrefix, SDNode Move,
- ValueType VT, bits<8> ImmV,
- Predicate BasePredicate> {
- let Predicates = [BasePredicate] in {
- def : Pat<(VT (Move VT:$dst, (scalar_to_vector
- (OpNode (extractelt VT:$src, 0))))),
- (!cast<Instruction>(OpcPrefix#r_Int) VT:$dst, VT:$src, (i32 ImmV))>;
- }
-
- // Repeat for AVX versions of the instructions.
- let Predicates = [UseAVX] in {
- def : Pat<(VT (Move VT:$dst, (scalar_to_vector
- (OpNode (extractelt VT:$src, 0))))),
- (!cast<Instruction>("V"#OpcPrefix#r_Int) VT:$dst, VT:$src, (i32 ImmV))>;
- }
-}
-
defm : scalar_unary_math_patterns<fsqrt, "SQRTSS", X86Movss, v4f32, UseSSE1>;
defm : scalar_unary_math_patterns<fsqrt, "SQRTSD", X86Movsd, v2f64, UseSSE2>;
@@ -3388,16 +3152,20 @@ def : Pat<(X86MFence), (MFENCE)>;
// SSE 1 & 2 - Load/Store XCSR register
//===----------------------------------------------------------------------===//
+let mayLoad=1, hasSideEffects=1 in
def VLDMXCSR : VPSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
"ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>,
VEX, Sched<[WriteLDMXCSR]>, VEX_WIG;
+let mayStore=1, hasSideEffects=1 in
def VSTMXCSR : VPSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
"stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>,
VEX, Sched<[WriteSTMXCSR]>, VEX_WIG;
+let mayLoad=1, hasSideEffects=1 in
def LDMXCSR : I<0xAE, MRM2m, (outs), (ins i32mem:$src),
"ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>,
TB, Sched<[WriteLDMXCSR]>;
+let mayStore=1, hasSideEffects=1 in
def STMXCSR : I<0xAE, MRM3m, (outs), (ins i32mem:$dst),
"stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>,
TB, Sched<[WriteSTMXCSR]>;
@@ -3529,17 +3297,6 @@ def MOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
} // ExeDomain = SSEPackedInt
-// Aliases to help the assembler pick two byte VEX encodings by swapping the
-// operands relative to the normal instructions to use VEX.R instead of VEX.B.
-def : InstAlias<"vmovdqa\t{$src, $dst|$dst, $src}",
- (VMOVDQArr_REV VR128L:$dst, VR128H:$src), 0>;
-def : InstAlias<"vmovdqa\t{$src, $dst|$dst, $src}",
- (VMOVDQAYrr_REV VR256L:$dst, VR256H:$src), 0>;
-def : InstAlias<"vmovdqu\t{$src, $dst|$dst, $src}",
- (VMOVDQUrr_REV VR128L:$dst, VR128H:$src), 0>;
-def : InstAlias<"vmovdqu\t{$src, $dst|$dst, $src}",
- (VMOVDQUYrr_REV VR256L:$dst, VR256H:$src), 0>;
-
// Reversed version with ".s" suffix for GAS compatibility.
def : InstAlias<"vmovdqa.s\t{$src, $dst|$dst, $src}",
(VMOVDQArr_REV VR128:$dst, VR128:$src), 0>;
@@ -4118,7 +3875,7 @@ multiclass sse2_pinsrw<bit Is2Addr = 1> {
"vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst,
(X86pinsrw VR128:$src1, GR32orGR64:$src2, imm:$src3))]>,
- Sched<[WriteVecInsert]>;
+ Sched<[WriteVecInsert, ReadDefault, ReadInt2Fpu]>;
def rm : Ii8<0xC4, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1,
i16mem:$src2, u8imm:$src3),
@@ -4138,7 +3895,7 @@ def VPEXTRWrr : Ii8<0xC5, MRMSrcReg,
"vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32orGR64:$dst, (X86pextrw (v8i16 VR128:$src1),
imm:$src2))]>,
- PD, VEX, Sched<[WriteVecExtract]>;
+ PD, VEX, VEX_WIG, Sched<[WriteVecExtract]>;
def PEXTRWrr : PDIi8<0xC5, MRMSrcReg,
(outs GR32orGR64:$dst), (ins VR128:$src1, u8imm:$src2),
"pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -4148,7 +3905,7 @@ def PEXTRWrr : PDIi8<0xC5, MRMSrcReg,
// Insert
let Predicates = [HasAVX, NoBWI] in
-defm VPINSRW : sse2_pinsrw<0>, PD, VEX_4V;
+defm VPINSRW : sse2_pinsrw<0>, PD, VEX_4V, VEX_WIG;
let Predicates = [UseSSE2], Constraints = "$src1 = $dst" in
defm PINSRW : sse2_pinsrw, PD;
@@ -4279,19 +4036,11 @@ let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
[(set FR32:$dst, (bitconvert GR32:$src))]>,
VEX, Sched<[WriteVecMoveFromGpr]>;
- def VMOVDI2SSrm : VS2I<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
- "movd\t{$src, $dst|$dst, $src}",
- [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>,
- VEX, Sched<[WriteVecLoad]>;
def MOVDI2SSrr : S2I<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (bitconvert GR32:$src))]>,
Sched<[WriteVecMoveFromGpr]>;
- def MOVDI2SSrm : S2I<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
- "movd\t{$src, $dst|$dst, $src}",
- [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>,
- Sched<[WriteVecLoad]>;
} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
//===---------------------------------------------------------------------===//
@@ -4353,32 +4102,15 @@ def MOVPQIto64mr : RS2I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
// Bitcast FR64 <-> GR64
//
let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
- let Predicates = [UseAVX] in
- def VMOV64toSDrm : VS2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
- "movq\t{$src, $dst|$dst, $src}",
- [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>,
- VEX, Sched<[WriteVecLoad]>;
def VMOVSDto64rr : VRS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
"movq\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (bitconvert FR64:$src))]>,
VEX, Sched<[WriteVecMoveToGpr]>;
- def VMOVSDto64mr : VRS2I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
- "movq\t{$src, $dst|$dst, $src}",
- [(store (i64 (bitconvert FR64:$src)), addr:$dst)]>,
- VEX, Sched<[WriteVecStore]>;
- def MOV64toSDrm : S2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
- "movq\t{$src, $dst|$dst, $src}",
- [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>,
- Sched<[WriteVecLoad]>;
def MOVSDto64rr : RS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
"movq\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (bitconvert FR64:$src))]>,
Sched<[WriteVecMoveToGpr]>;
- def MOVSDto64mr : RS2I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
- "movq\t{$src, $dst|$dst, $src}",
- [(store (i64 (bitconvert FR64:$src)), addr:$dst)]>,
- Sched<[WriteVecStore]>;
} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
//===---------------------------------------------------------------------===//
@@ -4389,18 +4121,10 @@ let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
"movd\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (bitconvert FR32:$src))]>,
VEX, Sched<[WriteVecMoveToGpr]>;
- def VMOVSS2DImr : VS2I<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
- "movd\t{$src, $dst|$dst, $src}",
- [(store (i32 (bitconvert FR32:$src)), addr:$dst)]>,
- VEX, Sched<[WriteVecStore]>;
def MOVSS2DIrr : S2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (bitconvert FR32:$src))]>,
Sched<[WriteVecMoveToGpr]>;
- def MOVSS2DImr : S2I<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
- "movd\t{$src, $dst|$dst, $src}",
- [(store (i32 (bitconvert FR32:$src)), addr:$dst)]>,
- Sched<[WriteVecStore]>;
} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
let Predicates = [UseAVX] in {
@@ -4410,28 +4134,14 @@ let Predicates = [UseAVX] in {
def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
(VMOV64toPQIrr GR64:$src)>;
- def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
- (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
- (SUBREG_TO_REG (i64 0), (v2i64 (VMOV64toPQIrr GR64:$src)), sub_xmm)>;
// AVX 128-bit movd/movq instructions write zeros in the high 128-bit part.
// These instructions also write zeros in the high part of a 256-bit register.
def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))),
(VMOVDI2PDIrm addr:$src)>;
- def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
+ def : Pat<(v4i32 (X86vzload32 addr:$src)),
(VMOVDI2PDIrm addr:$src)>;
- def : Pat<(v4i32 (X86vzmovl (loadv4i32 addr:$src))),
- (VMOVDI2PDIrm addr:$src)>;
- def : Pat<(v4i32 (X86vzload addr:$src)),
- (VMOVDI2PDIrm addr:$src)>;
- def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
- (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
- (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIrm addr:$src)), sub_xmm)>;
- def : Pat<(v8i32 (X86vzload addr:$src)),
+ def : Pat<(v8i32 (X86vzload32 addr:$src)),
(SUBREG_TO_REG (i64 0), (v4i32 (VMOVDI2PDIrm addr:$src)), sub_xmm)>;
- // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext.
- def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
- (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
- (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIrr GR32:$src)), sub_xmm)>;
}
let Predicates = [UseSSE2] in {
@@ -4442,11 +4152,7 @@ let Predicates = [UseSSE2] in {
(MOV64toPQIrr GR64:$src)>;
def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))),
(MOVDI2PDIrm addr:$src)>;
- def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
- (MOVDI2PDIrm addr:$src)>;
- def : Pat<(v4i32 (X86vzmovl (loadv4i32 addr:$src))),
- (MOVDI2PDIrm addr:$src)>;
- def : Pat<(v4i32 (X86vzload addr:$src)),
+ def : Pat<(v4i32 (X86vzload32 addr:$src)),
(MOVDI2PDIrm addr:$src)>;
}
@@ -4508,32 +4214,26 @@ def MOVPQI2QIrr : S2I<0xD6, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
"movq\t{$src, $dst|$dst, $src}", []>;
}
-// Aliases to help the assembler pick two byte VEX encodings by swapping the
-// operands relative to the normal instructions to use VEX.R instead of VEX.B.
-def : InstAlias<"vmovq\t{$src, $dst|$dst, $src}",
- (VMOVPQI2QIrr VR128L:$dst, VR128H:$src), 0>;
-
def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}",
(VMOVPQI2QIrr VR128:$dst, VR128:$src), 0>;
def : InstAlias<"movq.s\t{$src, $dst|$dst, $src}",
(MOVPQI2QIrr VR128:$dst, VR128:$src), 0>;
let Predicates = [UseAVX] in {
- def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
+ def : Pat<(v2i64 (X86vzload64 addr:$src)),
(VMOVQI2PQIrm addr:$src)>;
- def : Pat<(v2i64 (X86vzload addr:$src)),
- (VMOVQI2PQIrm addr:$src)>;
- def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
- (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))),
- (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIrm addr:$src)), sub_xmm)>;
- def : Pat<(v4i64 (X86vzload addr:$src)),
+ def : Pat<(v4i64 (X86vzload64 addr:$src)),
(SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIrm addr:$src)), sub_xmm)>;
+
+ def : Pat<(X86vextractstore64 (v2i64 VR128:$src), addr:$dst),
+ (VMOVPQI2QImr addr:$dst, VR128:$src)>;
}
let Predicates = [UseSSE2] in {
- def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
- (MOVQI2PQIrm addr:$src)>;
- def : Pat<(v2i64 (X86vzload addr:$src)), (MOVQI2PQIrm addr:$src)>;
+ def : Pat<(v2i64 (X86vzload64 addr:$src)), (MOVQI2PQIrm addr:$src)>;
+
+ def : Pat<(X86vextractstore64 (v2i64 VR128:$src), addr:$dst),
+ (MOVPQI2QImr addr:$dst, VR128:$src)>;
}
//===---------------------------------------------------------------------===//
@@ -4560,6 +4260,19 @@ let Predicates = [UseSSE2] in {
(MOVZPQILo2PQIrr VR128:$src)>;
}
+let Predicates = [UseAVX] in {
+ def : Pat<(v4f64 (X86vzmovl (v4f64 VR256:$src))),
+ (SUBREG_TO_REG (i32 0),
+ (v2f64 (VMOVZPQILo2PQIrr
+ (v2f64 (EXTRACT_SUBREG (v4f64 VR256:$src), sub_xmm)))),
+ sub_xmm)>;
+ def : Pat<(v4i64 (X86vzmovl (v4i64 VR256:$src))),
+ (SUBREG_TO_REG (i32 0),
+ (v2i64 (VMOVZPQILo2PQIrr
+ (v2i64 (EXTRACT_SUBREG (v4i64 VR256:$src), sub_xmm)))),
+ sub_xmm)>;
+}
+
//===---------------------------------------------------------------------===//
// SSE3 - Replicate Single FP - MOVSHDUP and MOVSLDUP
//===---------------------------------------------------------------------===//
@@ -4667,17 +4380,17 @@ defm MOVDDUP : sse3_replicate_dfp<"movddup", SchedWriteFShuffle>;
let Predicates = [HasAVX, NoVLX] in {
- def : Pat<(X86Movddup (loadv2f64 addr:$src)),
+ def : Pat<(X86Movddup (v2f64 (nonvolatile_load addr:$src))),
(VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
- def : Pat<(X86Movddup (v2f64 (X86vzload addr:$src))),
+ def : Pat<(X86Movddup (v2f64 (X86vzload64 addr:$src))),
(VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
}
let Predicates = [UseSSE3] in {
// No need for aligned memory as this only loads 64-bits.
- def : Pat<(X86Movddup (loadv2f64 addr:$src)),
+ def : Pat<(X86Movddup (v2f64 (nonvolatile_load addr:$src))),
(MOVDDUPrm addr:$src)>;
- def : Pat<(X86Movddup (v2f64 (X86vzload addr:$src))),
+ def : Pat<(X86Movddup (v2f64 (X86vzload64 addr:$src))),
(MOVDDUPrm addr:$src)>;
}
@@ -5130,15 +4843,12 @@ let Constraints = "$src1 = $dst", Predicates = [UseSSSE3] in
//===---------------------------------------------------------------------===//
let SchedRW = [WriteSystem] in {
-let usesCustomInserter = 1 in {
-def MONITOR : PseudoI<(outs), (ins i32mem:$src1, GR32:$src2, GR32:$src3),
- [(int_x86_sse3_monitor addr:$src1, GR32:$src2, GR32:$src3)]>,
- Requires<[HasSSE3]>;
-}
-
let Uses = [EAX, ECX, EDX] in
-def MONITORrrr : I<0x01, MRM_C8, (outs), (ins), "monitor", []>,
- TB, Requires<[HasSSE3]>;
+def MONITOR32rrr : I<0x01, MRM_C8, (outs), (ins), "monitor", []>,
+ TB, Requires<[HasSSE3, Not64BitMode]>;
+let Uses = [RAX, ECX, EDX] in
+def MONITOR64rrr : I<0x01, MRM_C8, (outs), (ins), "monitor", []>,
+ TB, Requires<[HasSSE3, In64BitMode]>;
let Uses = [ECX, EAX] in
def MWAITrr : I<0x01, MRM_C9, (outs), (ins), "mwait",
@@ -5148,13 +4858,14 @@ def MWAITrr : I<0x01, MRM_C9, (outs), (ins), "mwait",
def : InstAlias<"mwait\t{%eax, %ecx|ecx, eax}", (MWAITrr)>, Requires<[Not64BitMode]>;
def : InstAlias<"mwait\t{%rax, %rcx|rcx, rax}", (MWAITrr)>, Requires<[In64BitMode]>;
-def : InstAlias<"monitor\t{%eax, %ecx, %edx|edx, ecx, eax}", (MONITORrrr)>,
+def : InstAlias<"monitor\t{%eax, %ecx, %edx|edx, ecx, eax}", (MONITOR32rrr)>,
Requires<[Not64BitMode]>;
-def : InstAlias<"monitor\t{%rax, %rcx, %rdx|rdx, rcx, rax}", (MONITORrrr)>,
+def : InstAlias<"monitor\t{%rax, %rcx, %rdx|rdx, rcx, rax}", (MONITOR64rrr)>,
Requires<[In64BitMode]>;
//===----------------------------------------------------------------------===//
// SSE4.1 - Packed Move with Sign/Zero Extend
+// NOTE: Any Extend is promoted to Zero Extend in X86ISelDAGToDAG.cpp
//===----------------------------------------------------------------------===//
multiclass SS41I_pmovx_rrrm<bits<8> opc, string OpcodeStr, X86MemOperand MemOp,
@@ -5202,71 +4913,38 @@ defm WQ : SS41I_pmovx_rm<0x24, "wq", i32mem, i64mem, NoVLX>;
defm BQ : SS41I_pmovx_rm<0x22, "bq", i16mem, i32mem, NoVLX>;
-// Patterns that we also need for any_extend.
-// Any_extend_vector_inreg is currently legalized to zero_extend_vector_inreg.
-multiclass SS41I_pmovx_avx2_patterns_base<string OpcPrefix, SDNode ExtOp> {
- // Register-Register patterns
- let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
- def : Pat<(v16i16 (ExtOp (v16i8 VR128:$src))),
- (!cast<I>(OpcPrefix#BWYrr) VR128:$src)>;
- }
-
- let Predicates = [HasAVX2, NoVLX] in {
- def : Pat<(v8i32 (ExtOp (v8i16 VR128:$src))),
- (!cast<I>(OpcPrefix#WDYrr) VR128:$src)>;
-
- def : Pat<(v4i64 (ExtOp (v4i32 VR128:$src))),
- (!cast<I>(OpcPrefix#DQYrr) VR128:$src)>;
- }
-
- // AVX2 Register-Memory patterns
- let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
- def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))),
- (!cast<I>(OpcPrefix#BWYrm) addr:$src)>;
- def : Pat<(v16i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#BWYrm) addr:$src)>;
- def : Pat<(v16i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#BWYrm) addr:$src)>;
- }
-
- let Predicates = [HasAVX2, NoVLX] in {
- def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))),
- (!cast<I>(OpcPrefix#WDYrm) addr:$src)>;
- def : Pat<(v8i32 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#WDYrm) addr:$src)>;
- def : Pat<(v8i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#WDYrm) addr:$src)>;
-
- def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))),
- (!cast<I>(OpcPrefix#DQYrm) addr:$src)>;
- def : Pat<(v4i64 (ExtOp (v4i32 (vzmovl_v2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#DQYrm) addr:$src)>;
- def : Pat<(v4i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#DQYrm) addr:$src)>;
- }
-}
-
// AVX2 Patterns
multiclass SS41I_pmovx_avx2_patterns<string OpcPrefix, string ExtTy,
- SDNode ExtOp, SDNode InVecOp> :
- SS41I_pmovx_avx2_patterns_base<OpcPrefix, ExtOp> {
-
+ SDNode ExtOp, SDNode InVecOp> {
// Register-Register patterns
+ let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
+ def : Pat<(v16i16 (ExtOp (v16i8 VR128:$src))),
+ (!cast<I>(OpcPrefix#BWYrr) VR128:$src)>;
+ }
let Predicates = [HasAVX2, NoVLX] in {
def : Pat<(v8i32 (InVecOp (v16i8 VR128:$src))),
(!cast<I>(OpcPrefix#BDYrr) VR128:$src)>;
def : Pat<(v4i64 (InVecOp (v16i8 VR128:$src))),
(!cast<I>(OpcPrefix#BQYrr) VR128:$src)>;
+ def : Pat<(v8i32 (ExtOp (v8i16 VR128:$src))),
+ (!cast<I>(OpcPrefix#WDYrr) VR128:$src)>;
def : Pat<(v4i64 (InVecOp (v8i16 VR128:$src))),
(!cast<I>(OpcPrefix#WQYrr) VR128:$src)>;
+
+ def : Pat<(v4i64 (ExtOp (v4i32 VR128:$src))),
+ (!cast<I>(OpcPrefix#DQYrr) VR128:$src)>;
}
// Simple Register-Memory patterns
let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
def : Pat<(v16i16 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)),
(!cast<I>(OpcPrefix#BWYrm) addr:$src)>;
+
+ def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))),
+ (!cast<I>(OpcPrefix#BWYrm) addr:$src)>;
}
+
let Predicates = [HasAVX2, NoVLX] in {
def : Pat<(v8i32 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)),
(!cast<I>(OpcPrefix#BDYrm) addr:$src)>;
@@ -5284,38 +4962,31 @@ multiclass SS41I_pmovx_avx2_patterns<string OpcPrefix, string ExtTy,
// AVX2 Register-Memory patterns
let Predicates = [HasAVX2, NoVLX] in {
+ def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))),
+ (!cast<I>(OpcPrefix#WDYrm) addr:$src)>;
+
def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
(!cast<I>(OpcPrefix#BDYrm) addr:$src)>;
- def : Pat<(v8i32 (InVecOp (v16i8 (vzmovl_v2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#BDYrm) addr:$src)>;
- def : Pat<(v8i32 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#BDYrm) addr:$src)>;
- def : Pat<(v8i32 (InVecOp (loadv16i8 addr:$src))),
+ def : Pat<(v8i32 (InVecOp (v16i8 (X86vzload64 addr:$src)))),
(!cast<I>(OpcPrefix#BDYrm) addr:$src)>;
+ def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))),
+ (!cast<I>(OpcPrefix#DQYrm) addr:$src)>;
+
def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
(!cast<I>(OpcPrefix#BQYrm) addr:$src)>;
- def : Pat<(v4i64 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))),
- (!cast<I>(OpcPrefix#BQYrm) addr:$src)>;
- def : Pat<(v4i64 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#BQYrm) addr:$src)>;
- def : Pat<(v4i64 (InVecOp (loadv16i8 addr:$src))),
+ def : Pat<(v4i64 (InVecOp (v16i8 (X86vzload64 addr:$src)))),
(!cast<I>(OpcPrefix#BQYrm) addr:$src)>;
def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
(!cast<I>(OpcPrefix#WQYrm) addr:$src)>;
- def : Pat<(v4i64 (InVecOp (v8i16 (vzmovl_v2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#WQYrm) addr:$src)>;
- def : Pat<(v4i64 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#WQYrm) addr:$src)>;
- def : Pat<(v4i64 (InVecOp (loadv8i16 addr:$src))),
+ def : Pat<(v4i64 (InVecOp (v8i16 (X86vzload64 addr:$src)))),
(!cast<I>(OpcPrefix#WQYrm) addr:$src)>;
}
}
defm : SS41I_pmovx_avx2_patterns<"VPMOVSX", "s", sext, sext_invec>;
defm : SS41I_pmovx_avx2_patterns<"VPMOVZX", "z", zext, zext_invec>;
-defm : SS41I_pmovx_avx2_patterns_base<"VPMOVZX", anyext>;
// SSE4.1/AVX patterns.
multiclass SS41I_pmovx_patterns<string OpcPrefix, string ExtTy,
@@ -5361,9 +5032,7 @@ multiclass SS41I_pmovx_patterns<string OpcPrefix, string ExtTy,
(!cast<I>(OpcPrefix#BWrm) addr:$src)>;
def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
(!cast<I>(OpcPrefix#BWrm) addr:$src)>;
- def : Pat<(v8i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#BWrm) addr:$src)>;
- def : Pat<(v8i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
+ def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
(!cast<I>(OpcPrefix#BWrm) addr:$src)>;
def : Pat<(v8i16 (ExtOp (loadv16i8 addr:$src))),
(!cast<I>(OpcPrefix#BWrm) addr:$src)>;
@@ -5371,19 +5040,13 @@ multiclass SS41I_pmovx_patterns<string OpcPrefix, string ExtTy,
let Predicates = [HasAVX, NoVLX] in {
def : Pat<(v4i32 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
(!cast<I>(OpcPrefix#BDrm) addr:$src)>;
- def : Pat<(v4i32 (ExtOp (v16i8 (vzmovl_v4i32 addr:$src)))),
- (!cast<I>(OpcPrefix#BDrm) addr:$src)>;
- def : Pat<(v4i32 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
+ def : Pat<(v4i32 (ExtOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
(!cast<I>(OpcPrefix#BDrm) addr:$src)>;
def : Pat<(v4i32 (ExtOp (loadv16i8 addr:$src))),
(!cast<I>(OpcPrefix#BDrm) addr:$src)>;
def : Pat<(v2i64 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))),
(!cast<I>(OpcPrefix#BQrm) addr:$src)>;
- def : Pat<(v2i64 (ExtOp (v16i8 (vzmovl_v4i32 addr:$src)))),
- (!cast<I>(OpcPrefix#BQrm) addr:$src)>;
- def : Pat<(v2i64 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#BQrm) addr:$src)>;
def : Pat<(v2i64 (ExtOp (loadv16i8 addr:$src))),
(!cast<I>(OpcPrefix#BQrm) addr:$src)>;
@@ -5391,18 +5054,14 @@ multiclass SS41I_pmovx_patterns<string OpcPrefix, string ExtTy,
(!cast<I>(OpcPrefix#WDrm) addr:$src)>;
def : Pat<(v4i32 (ExtOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
(!cast<I>(OpcPrefix#WDrm) addr:$src)>;
- def : Pat<(v4i32 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#WDrm) addr:$src)>;
- def : Pat<(v4i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
+ def : Pat<(v4i32 (ExtOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
(!cast<I>(OpcPrefix#WDrm) addr:$src)>;
def : Pat<(v4i32 (ExtOp (loadv8i16 addr:$src))),
(!cast<I>(OpcPrefix#WDrm) addr:$src)>;
def : Pat<(v2i64 (ExtOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
(!cast<I>(OpcPrefix#WQrm) addr:$src)>;
- def : Pat<(v2i64 (ExtOp (v8i16 (vzmovl_v4i32 addr:$src)))),
- (!cast<I>(OpcPrefix#WQrm) addr:$src)>;
- def : Pat<(v2i64 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
+ def : Pat<(v2i64 (ExtOp (bc_v8i16 (v4i32 (X86vzload32 addr:$src))))),
(!cast<I>(OpcPrefix#WQrm) addr:$src)>;
def : Pat<(v2i64 (ExtOp (loadv8i16 addr:$src))),
(!cast<I>(OpcPrefix#WQrm) addr:$src)>;
@@ -5411,9 +5070,7 @@ multiclass SS41I_pmovx_patterns<string OpcPrefix, string ExtTy,
(!cast<I>(OpcPrefix#DQrm) addr:$src)>;
def : Pat<(v2i64 (ExtOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
(!cast<I>(OpcPrefix#DQrm) addr:$src)>;
- def : Pat<(v2i64 (ExtOp (v4i32 (vzmovl_v2i64 addr:$src)))),
- (!cast<I>(OpcPrefix#DQrm) addr:$src)>;
- def : Pat<(v2i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))),
+ def : Pat<(v2i64 (ExtOp (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
(!cast<I>(OpcPrefix#DQrm) addr:$src)>;
def : Pat<(v2i64 (ExtOp (loadv4i32 addr:$src))),
(!cast<I>(OpcPrefix#DQrm) addr:$src)>;
@@ -5451,7 +5108,7 @@ multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> {
}
let Predicates = [HasAVX, NoBWI] in
- defm VPEXTRB : SS41I_extract8<0x14, "vpextrb">, VEX;
+ defm VPEXTRB : SS41I_extract8<0x14, "vpextrb">, VEX, VEX_WIG;
defm PEXTRB : SS41I_extract8<0x14, "pextrb">;
@@ -5475,7 +5132,7 @@ multiclass SS41I_extract16<bits<8> opc, string OpcodeStr> {
}
let Predicates = [HasAVX, NoBWI] in
- defm VPEXTRW : SS41I_extract16<0x15, "vpextrw">, VEX;
+ defm VPEXTRW : SS41I_extract16<0x15, "vpextrw">, VEX, VEX_WIG;
defm PEXTRW : SS41I_extract16<0x15, "pextrw">;
@@ -5548,18 +5205,6 @@ let ExeDomain = SSEPackedSingle in {
defm EXTRACTPS : SS41I_extractf32<0x17, "extractps">;
}
-// Also match an EXTRACTPS store when the store is done as f32 instead of i32.
-def : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)),
- imm:$src2))),
- addr:$dst),
- (VEXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>,
- Requires<[HasAVX]>;
-def : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)),
- imm:$src2))),
- addr:$dst),
- (EXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>,
- Requires<[UseSSE41]>;
-
//===----------------------------------------------------------------------===//
// SSE4.1 - Insert Instructions
//===----------------------------------------------------------------------===//
@@ -5573,7 +5218,7 @@ multiclass SS41I_insert8<bits<8> opc, string asm, bit Is2Addr = 1> {
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set VR128:$dst,
(X86pinsrb VR128:$src1, GR32orGR64:$src2, imm:$src3))]>,
- Sched<[WriteVecInsert]>;
+ Sched<[WriteVecInsert, ReadDefault, ReadInt2Fpu]>;
def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i8mem:$src2, u8imm:$src3),
!if(Is2Addr,
@@ -5586,7 +5231,7 @@ multiclass SS41I_insert8<bits<8> opc, string asm, bit Is2Addr = 1> {
}
let Predicates = [HasAVX, NoBWI] in
- defm VPINSRB : SS41I_insert8<0x20, "vpinsrb", 0>, VEX_4V;
+ defm VPINSRB : SS41I_insert8<0x20, "vpinsrb", 0>, VEX_4V, VEX_WIG;
let Constraints = "$src1 = $dst" in
defm PINSRB : SS41I_insert8<0x20, "pinsrb">;
@@ -5599,7 +5244,7 @@ multiclass SS41I_insert32<bits<8> opc, string asm, bit Is2Addr = 1> {
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set VR128:$dst,
(v4i32 (insertelt VR128:$src1, GR32:$src2, imm:$src3)))]>,
- Sched<[WriteVecInsert]>;
+ Sched<[WriteVecInsert, ReadDefault, ReadInt2Fpu]>;
def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i32mem:$src2, u8imm:$src3),
!if(Is2Addr,
@@ -5625,7 +5270,7 @@ multiclass SS41I_insert64<bits<8> opc, string asm, bit Is2Addr = 1> {
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set VR128:$dst,
(v2i64 (insertelt VR128:$src1, GR64:$src2, imm:$src3)))]>,
- Sched<[WriteVecInsert]>;
+ Sched<[WriteVecInsert, ReadDefault, ReadInt2Fpu]>;
def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i64mem:$src2, u8imm:$src3),
!if(Is2Addr,
@@ -5647,6 +5292,7 @@ let Constraints = "$src1 = $dst" in
// vector. The next one matches the intrinsic and could zero arbitrary elements
// in the target vector.
multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1> {
+ let isCommutable = 1 in
def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, u8imm:$src3),
!if(Is2Addr,
@@ -5853,7 +5499,7 @@ let Predicates = [HasAVX, NoVLX] in {
VEX, VEX_L, VEX_WIG;
}
}
-let Predicates = [HasAVX, NoAVX512] in {
+let Predicates = [UseAVX] in {
defm VROUND : sse41_fp_binop_s<0x0A, 0x0B, "vround", SchedWriteFRnd.Scl,
v4f32, v2f64, X86RndScales, 0>,
VEX_4V, VEX_LIG, VEX_WIG;
@@ -5862,141 +5508,17 @@ let Predicates = [HasAVX, NoAVX512] in {
}
let Predicates = [UseAVX] in {
- def : Pat<(ffloor FR32:$src),
- (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x9))>;
- def : Pat<(f32 (fnearbyint FR32:$src)),
- (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xC))>;
- def : Pat<(f32 (fceil FR32:$src)),
- (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xA))>;
- def : Pat<(f32 (frint FR32:$src)),
- (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x4))>;
- def : Pat<(f32 (ftrunc FR32:$src)),
- (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xB))>;
-
- def : Pat<(f64 (ffloor FR64:$src)),
- (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x9))>;
- def : Pat<(f64 (fnearbyint FR64:$src)),
- (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xC))>;
- def : Pat<(f64 (fceil FR64:$src)),
- (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xA))>;
- def : Pat<(f64 (frint FR64:$src)),
- (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x4))>;
- def : Pat<(f64 (ftrunc FR64:$src)),
- (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xB))>;
+ def : Pat<(X86VRndScale FR32:$src1, imm:$src2),
+ (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src1, imm:$src2)>;
+ def : Pat<(X86VRndScale FR64:$src1, imm:$src2),
+ (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src1, imm:$src2)>;
}
let Predicates = [UseAVX, OptForSize] in {
- def : Pat<(ffloor (loadf32 addr:$src)),
- (VROUNDSSm (f32 (IMPLICIT_DEF)), addr:$src, (i32 0x9))>;
- def : Pat<(f32 (fnearbyint (loadf32 addr:$src))),
- (VROUNDSSm (f32 (IMPLICIT_DEF)), addr:$src, (i32 0xC))>;
- def : Pat<(f32 (fceil (loadf32 addr:$src))),
- (VROUNDSSm (f32 (IMPLICIT_DEF)), addr:$src, (i32 0xA))>;
- def : Pat<(f32 (frint (loadf32 addr:$src))),
- (VROUNDSSm (f32 (IMPLICIT_DEF)), addr:$src, (i32 0x4))>;
- def : Pat<(f32 (ftrunc (loadf32 addr:$src))),
- (VROUNDSSm (f32 (IMPLICIT_DEF)), addr:$src, (i32 0xB))>;
-
- def : Pat<(f64 (ffloor (loadf64 addr:$src))),
- (VROUNDSDm (f64 (IMPLICIT_DEF)), addr:$src, (i32 0x9))>;
- def : Pat<(f64 (fnearbyint (loadf64 addr:$src))),
- (VROUNDSDm (f64 (IMPLICIT_DEF)), addr:$src, (i32 0xC))>;
- def : Pat<(f64 (fceil (loadf64 addr:$src))),
- (VROUNDSDm (f64 (IMPLICIT_DEF)), addr:$src, (i32 0xA))>;
- def : Pat<(f64 (frint (loadf64 addr:$src))),
- (VROUNDSDm (f64 (IMPLICIT_DEF)), addr:$src, (i32 0x4))>;
- def : Pat<(f64 (ftrunc (loadf64 addr:$src))),
- (VROUNDSDm (f64 (IMPLICIT_DEF)), addr:$src, (i32 0xB))>;
-}
-
-let Predicates = [HasAVX, NoVLX] in {
- def : Pat<(v4f32 (ffloor VR128:$src)),
- (VROUNDPSr VR128:$src, (i32 0x9))>;
- def : Pat<(v4f32 (fnearbyint VR128:$src)),
- (VROUNDPSr VR128:$src, (i32 0xC))>;
- def : Pat<(v4f32 (fceil VR128:$src)),
- (VROUNDPSr VR128:$src, (i32 0xA))>;
- def : Pat<(v4f32 (frint VR128:$src)),
- (VROUNDPSr VR128:$src, (i32 0x4))>;
- def : Pat<(v4f32 (ftrunc VR128:$src)),
- (VROUNDPSr VR128:$src, (i32 0xB))>;
-
- def : Pat<(v4f32 (ffloor (loadv4f32 addr:$src))),
- (VROUNDPSm addr:$src, (i32 0x9))>;
- def : Pat<(v4f32 (fnearbyint (loadv4f32 addr:$src))),
- (VROUNDPSm addr:$src, (i32 0xC))>;
- def : Pat<(v4f32 (fceil (loadv4f32 addr:$src))),
- (VROUNDPSm addr:$src, (i32 0xA))>;
- def : Pat<(v4f32 (frint (loadv4f32 addr:$src))),
- (VROUNDPSm addr:$src, (i32 0x4))>;
- def : Pat<(v4f32 (ftrunc (loadv4f32 addr:$src))),
- (VROUNDPSm addr:$src, (i32 0xB))>;
-
- def : Pat<(v2f64 (ffloor VR128:$src)),
- (VROUNDPDr VR128:$src, (i32 0x9))>;
- def : Pat<(v2f64 (fnearbyint VR128:$src)),
- (VROUNDPDr VR128:$src, (i32 0xC))>;
- def : Pat<(v2f64 (fceil VR128:$src)),
- (VROUNDPDr VR128:$src, (i32 0xA))>;
- def : Pat<(v2f64 (frint VR128:$src)),
- (VROUNDPDr VR128:$src, (i32 0x4))>;
- def : Pat<(v2f64 (ftrunc VR128:$src)),
- (VROUNDPDr VR128:$src, (i32 0xB))>;
-
- def : Pat<(v2f64 (ffloor (loadv2f64 addr:$src))),
- (VROUNDPDm addr:$src, (i32 0x9))>;
- def : Pat<(v2f64 (fnearbyint (loadv2f64 addr:$src))),
- (VROUNDPDm addr:$src, (i32 0xC))>;
- def : Pat<(v2f64 (fceil (loadv2f64 addr:$src))),
- (VROUNDPDm addr:$src, (i32 0xA))>;
- def : Pat<(v2f64 (frint (loadv2f64 addr:$src))),
- (VROUNDPDm addr:$src, (i32 0x4))>;
- def : Pat<(v2f64 (ftrunc (loadv2f64 addr:$src))),
- (VROUNDPDm addr:$src, (i32 0xB))>;
-
- def : Pat<(v8f32 (ffloor VR256:$src)),
- (VROUNDPSYr VR256:$src, (i32 0x9))>;
- def : Pat<(v8f32 (fnearbyint VR256:$src)),
- (VROUNDPSYr VR256:$src, (i32 0xC))>;
- def : Pat<(v8f32 (fceil VR256:$src)),
- (VROUNDPSYr VR256:$src, (i32 0xA))>;
- def : Pat<(v8f32 (frint VR256:$src)),
- (VROUNDPSYr VR256:$src, (i32 0x4))>;
- def : Pat<(v8f32 (ftrunc VR256:$src)),
- (VROUNDPSYr VR256:$src, (i32 0xB))>;
-
- def : Pat<(v8f32 (ffloor (loadv8f32 addr:$src))),
- (VROUNDPSYm addr:$src, (i32 0x9))>;
- def : Pat<(v8f32 (fnearbyint (loadv8f32 addr:$src))),
- (VROUNDPSYm addr:$src, (i32 0xC))>;
- def : Pat<(v8f32 (fceil (loadv8f32 addr:$src))),
- (VROUNDPSYm addr:$src, (i32 0xA))>;
- def : Pat<(v8f32 (frint (loadv8f32 addr:$src))),
- (VROUNDPSYm addr:$src, (i32 0x4))>;
- def : Pat<(v8f32 (ftrunc (loadv8f32 addr:$src))),
- (VROUNDPSYm addr:$src, (i32 0xB))>;
-
- def : Pat<(v4f64 (ffloor VR256:$src)),
- (VROUNDPDYr VR256:$src, (i32 0x9))>;
- def : Pat<(v4f64 (fnearbyint VR256:$src)),
- (VROUNDPDYr VR256:$src, (i32 0xC))>;
- def : Pat<(v4f64 (fceil VR256:$src)),
- (VROUNDPDYr VR256:$src, (i32 0xA))>;
- def : Pat<(v4f64 (frint VR256:$src)),
- (VROUNDPDYr VR256:$src, (i32 0x4))>;
- def : Pat<(v4f64 (ftrunc VR256:$src)),
- (VROUNDPDYr VR256:$src, (i32 0xB))>;
-
- def : Pat<(v4f64 (ffloor (loadv4f64 addr:$src))),
- (VROUNDPDYm addr:$src, (i32 0x9))>;
- def : Pat<(v4f64 (fnearbyint (loadv4f64 addr:$src))),
- (VROUNDPDYm addr:$src, (i32 0xC))>;
- def : Pat<(v4f64 (fceil (loadv4f64 addr:$src))),
- (VROUNDPDYm addr:$src, (i32 0xA))>;
- def : Pat<(v4f64 (frint (loadv4f64 addr:$src))),
- (VROUNDPDYm addr:$src, (i32 0x4))>;
- def : Pat<(v4f64 (ftrunc (loadv4f64 addr:$src))),
- (VROUNDPDYm addr:$src, (i32 0xB))>;
+ def : Pat<(X86VRndScale (loadf32 addr:$src1), imm:$src2),
+ (VROUNDSSm (f32 (IMPLICIT_DEF)), addr:$src1, imm:$src2)>;
+ def : Pat<(X86VRndScale (loadf64 addr:$src1), imm:$src2),
+ (VROUNDSDm (f64 (IMPLICIT_DEF)), addr:$src1, imm:$src2)>;
}
let ExeDomain = SSEPackedSingle in
@@ -6013,108 +5535,19 @@ defm ROUND : sse41_fp_binop_s<0x0A, 0x0B, "round", SchedWriteFRnd.Scl,
v4f32, v2f64, X86RndScales>;
let Predicates = [UseSSE41] in {
- def : Pat<(ffloor FR32:$src),
- (ROUNDSSr FR32:$src, (i32 0x9))>;
- def : Pat<(f32 (fnearbyint FR32:$src)),
- (ROUNDSSr FR32:$src, (i32 0xC))>;
- def : Pat<(f32 (fceil FR32:$src)),
- (ROUNDSSr FR32:$src, (i32 0xA))>;
- def : Pat<(f32 (frint FR32:$src)),
- (ROUNDSSr FR32:$src, (i32 0x4))>;
- def : Pat<(f32 (ftrunc FR32:$src)),
- (ROUNDSSr FR32:$src, (i32 0xB))>;
-
- def : Pat<(f64 (ffloor FR64:$src)),
- (ROUNDSDr FR64:$src, (i32 0x9))>;
- def : Pat<(f64 (fnearbyint FR64:$src)),
- (ROUNDSDr FR64:$src, (i32 0xC))>;
- def : Pat<(f64 (fceil FR64:$src)),
- (ROUNDSDr FR64:$src, (i32 0xA))>;
- def : Pat<(f64 (frint FR64:$src)),
- (ROUNDSDr FR64:$src, (i32 0x4))>;
- def : Pat<(f64 (ftrunc FR64:$src)),
- (ROUNDSDr FR64:$src, (i32 0xB))>;
+ def : Pat<(X86VRndScale FR32:$src1, imm:$src2),
+ (ROUNDSSr FR32:$src1, imm:$src2)>;
+ def : Pat<(X86VRndScale FR64:$src1, imm:$src2),
+ (ROUNDSDr FR64:$src1, imm:$src2)>;
}
let Predicates = [UseSSE41, OptForSize] in {
- def : Pat<(ffloor (loadf32 addr:$src)),
- (ROUNDSSm addr:$src, (i32 0x9))>;
- def : Pat<(f32 (fnearbyint (loadf32 addr:$src))),
- (ROUNDSSm addr:$src, (i32 0xC))>;
- def : Pat<(f32 (fceil (loadf32 addr:$src))),
- (ROUNDSSm addr:$src, (i32 0xA))>;
- def : Pat<(f32 (frint (loadf32 addr:$src))),
- (ROUNDSSm addr:$src, (i32 0x4))>;
- def : Pat<(f32 (ftrunc (loadf32 addr:$src))),
- (ROUNDSSm addr:$src, (i32 0xB))>;
-
- def : Pat<(f64 (ffloor (loadf64 addr:$src))),
- (ROUNDSDm addr:$src, (i32 0x9))>;
- def : Pat<(f64 (fnearbyint (loadf64 addr:$src))),
- (ROUNDSDm addr:$src, (i32 0xC))>;
- def : Pat<(f64 (fceil (loadf64 addr:$src))),
- (ROUNDSDm addr:$src, (i32 0xA))>;
- def : Pat<(f64 (frint (loadf64 addr:$src))),
- (ROUNDSDm addr:$src, (i32 0x4))>;
- def : Pat<(f64 (ftrunc (loadf64 addr:$src))),
- (ROUNDSDm addr:$src, (i32 0xB))>;
+ def : Pat<(X86VRndScale (loadf32 addr:$src1), imm:$src2),
+ (ROUNDSSm addr:$src1, imm:$src2)>;
+ def : Pat<(X86VRndScale (loadf64 addr:$src1), imm:$src2),
+ (ROUNDSDm addr:$src1, imm:$src2)>;
}
-let Predicates = [UseSSE41] in {
- def : Pat<(v4f32 (ffloor VR128:$src)),
- (ROUNDPSr VR128:$src, (i32 0x9))>;
- def : Pat<(v4f32 (fnearbyint VR128:$src)),
- (ROUNDPSr VR128:$src, (i32 0xC))>;
- def : Pat<(v4f32 (fceil VR128:$src)),
- (ROUNDPSr VR128:$src, (i32 0xA))>;
- def : Pat<(v4f32 (frint VR128:$src)),
- (ROUNDPSr VR128:$src, (i32 0x4))>;
- def : Pat<(v4f32 (ftrunc VR128:$src)),
- (ROUNDPSr VR128:$src, (i32 0xB))>;
-
- def : Pat<(v4f32 (ffloor (memopv4f32 addr:$src))),
- (ROUNDPSm addr:$src, (i32 0x9))>;
- def : Pat<(v4f32 (fnearbyint (memopv4f32 addr:$src))),
- (ROUNDPSm addr:$src, (i32 0xC))>;
- def : Pat<(v4f32 (fceil (memopv4f32 addr:$src))),
- (ROUNDPSm addr:$src, (i32 0xA))>;
- def : Pat<(v4f32 (frint (memopv4f32 addr:$src))),
- (ROUNDPSm addr:$src, (i32 0x4))>;
- def : Pat<(v4f32 (ftrunc (memopv4f32 addr:$src))),
- (ROUNDPSm addr:$src, (i32 0xB))>;
-
- def : Pat<(v2f64 (ffloor VR128:$src)),
- (ROUNDPDr VR128:$src, (i32 0x9))>;
- def : Pat<(v2f64 (fnearbyint VR128:$src)),
- (ROUNDPDr VR128:$src, (i32 0xC))>;
- def : Pat<(v2f64 (fceil VR128:$src)),
- (ROUNDPDr VR128:$src, (i32 0xA))>;
- def : Pat<(v2f64 (frint VR128:$src)),
- (ROUNDPDr VR128:$src, (i32 0x4))>;
- def : Pat<(v2f64 (ftrunc VR128:$src)),
- (ROUNDPDr VR128:$src, (i32 0xB))>;
-
- def : Pat<(v2f64 (ffloor (memopv2f64 addr:$src))),
- (ROUNDPDm addr:$src, (i32 0x9))>;
- def : Pat<(v2f64 (fnearbyint (memopv2f64 addr:$src))),
- (ROUNDPDm addr:$src, (i32 0xC))>;
- def : Pat<(v2f64 (fceil (memopv2f64 addr:$src))),
- (ROUNDPDm addr:$src, (i32 0xA))>;
- def : Pat<(v2f64 (frint (memopv2f64 addr:$src))),
- (ROUNDPDm addr:$src, (i32 0x4))>;
- def : Pat<(v2f64 (ftrunc (memopv2f64 addr:$src))),
- (ROUNDPDm addr:$src, (i32 0xB))>;
-}
-
-defm : scalar_unary_math_imm_patterns<ffloor, "ROUNDSS", X86Movss,
- v4f32, 0x01, UseSSE41>;
-defm : scalar_unary_math_imm_patterns<fceil, "ROUNDSS", X86Movss,
- v4f32, 0x02, UseSSE41>;
-defm : scalar_unary_math_imm_patterns<ffloor, "ROUNDSD", X86Movsd,
- v2f64, 0x01, UseSSE41>;
-defm : scalar_unary_math_imm_patterns<fceil, "ROUNDSD", X86Movsd,
- v2f64, 0x02, UseSSE41>;
-
//===----------------------------------------------------------------------===//
// SSE4.1 - Packed Bit Test
//===----------------------------------------------------------------------===//
@@ -6449,6 +5882,72 @@ def BlendCommuteImm8 : SDNodeXForm<imm, [{
return getI8Imm(Imm ^ 0xff, SDLoc(N));
}]>;
+// Turn a 4-bit blendi immediate to 8-bit for use with pblendw.
+def BlendScaleImm4 : SDNodeXForm<imm, [{
+ uint8_t Imm = N->getZExtValue();
+ uint8_t NewImm = 0;
+ for (unsigned i = 0; i != 4; ++i) {
+ if (Imm & (1 << i))
+ NewImm |= 0x3 << (i * 2);
+ }
+ return getI8Imm(NewImm, SDLoc(N));
+}]>;
+
+// Turn a 2-bit blendi immediate to 8-bit for use with pblendw.
+def BlendScaleImm2 : SDNodeXForm<imm, [{
+ uint8_t Imm = N->getZExtValue();
+ uint8_t NewImm = 0;
+ for (unsigned i = 0; i != 2; ++i) {
+ if (Imm & (1 << i))
+ NewImm |= 0xf << (i * 4);
+ }
+ return getI8Imm(NewImm, SDLoc(N));
+}]>;
+
+// Turn a 2-bit blendi immediate to 4-bit for use with pblendd.
+def BlendScaleImm2to4 : SDNodeXForm<imm, [{
+ uint8_t Imm = N->getZExtValue();
+ uint8_t NewImm = 0;
+ for (unsigned i = 0; i != 2; ++i) {
+ if (Imm & (1 << i))
+ NewImm |= 0x3 << (i * 2);
+ }
+ return getI8Imm(NewImm, SDLoc(N));
+}]>;
+
+// Turn a 4-bit blendi immediate to 8-bit for use with pblendw and invert it.
+def BlendScaleCommuteImm4 : SDNodeXForm<imm, [{
+ uint8_t Imm = N->getZExtValue();
+ uint8_t NewImm = 0;
+ for (unsigned i = 0; i != 4; ++i) {
+ if (Imm & (1 << i))
+ NewImm |= 0x3 << (i * 2);
+ }
+ return getI8Imm(NewImm ^ 0xff, SDLoc(N));
+}]>;
+
+// Turn a 2-bit blendi immediate to 8-bit for use with pblendw and invert it.
+def BlendScaleCommuteImm2 : SDNodeXForm<imm, [{
+ uint8_t Imm = N->getZExtValue();
+ uint8_t NewImm = 0;
+ for (unsigned i = 0; i != 2; ++i) {
+ if (Imm & (1 << i))
+ NewImm |= 0xf << (i * 4);
+ }
+ return getI8Imm(NewImm ^ 0xff, SDLoc(N));
+}]>;
+
+// Turn a 2-bit blendi immediate to 4-bit for use with pblendd and invert it.
+def BlendScaleCommuteImm2to4 : SDNodeXForm<imm, [{
+ uint8_t Imm = N->getZExtValue();
+ uint8_t NewImm = 0;
+ for (unsigned i = 0; i != 2; ++i) {
+ if (Imm & (1 << i))
+ NewImm |= 0x3 << (i * 2);
+ }
+ return getI8Imm(NewImm ^ 0xf, SDLoc(N));
+}]>;
+
let Predicates = [HasAVX] in {
let isCommutable = 0 in {
defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw,
@@ -6559,6 +6058,42 @@ let Predicates = [HasAVX2] in {
VEX_4V, VEX_L, VEX_WIG;
}
+// Emulate vXi32/vXi64 blends with vXf32/vXf64 or pblendw.
+// ExecutionDomainFixPass will cleanup domains later on.
+let Predicates = [HasAVX1Only] in {
+def : Pat<(X86Blendi (v4i64 VR256:$src1), (v4i64 VR256:$src2), imm:$src3),
+ (VBLENDPDYrri VR256:$src1, VR256:$src2, imm:$src3)>;
+def : Pat<(X86Blendi VR256:$src1, (loadv4i64 addr:$src2), imm:$src3),
+ (VBLENDPDYrmi VR256:$src1, addr:$src2, imm:$src3)>;
+def : Pat<(X86Blendi (loadv4i64 addr:$src2), VR256:$src1, imm:$src3),
+ (VBLENDPDYrmi VR256:$src1, addr:$src2, (BlendCommuteImm4 imm:$src3))>;
+
+// Use pblendw for 128-bit integer to keep it in the integer domain and prevent
+// it from becoming movsd via commuting under optsize.
+def : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), imm:$src3),
+ (VPBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm2 imm:$src3))>;
+def : Pat<(X86Blendi VR128:$src1, (loadv2i64 addr:$src2), imm:$src3),
+ (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm2 imm:$src3))>;
+def : Pat<(X86Blendi (loadv2i64 addr:$src2), VR128:$src1, imm:$src3),
+ (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm2 imm:$src3))>;
+
+def : Pat<(X86Blendi (v8i32 VR256:$src1), (v8i32 VR256:$src2), imm:$src3),
+ (VBLENDPSYrri VR256:$src1, VR256:$src2, imm:$src3)>;
+def : Pat<(X86Blendi VR256:$src1, (loadv8i32 addr:$src2), imm:$src3),
+ (VBLENDPSYrmi VR256:$src1, addr:$src2, imm:$src3)>;
+def : Pat<(X86Blendi (loadv8i32 addr:$src2), VR256:$src1, imm:$src3),
+ (VBLENDPSYrmi VR256:$src1, addr:$src2, (BlendCommuteImm8 imm:$src3))>;
+
+// Use pblendw for 128-bit integer to keep it in the integer domain and prevent
+// it from becoming movss via commuting under optsize.
+def : Pat<(X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), imm:$src3),
+ (VPBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm4 imm:$src3))>;
+def : Pat<(X86Blendi VR128:$src1, (loadv4i32 addr:$src2), imm:$src3),
+ (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm4 imm:$src3))>;
+def : Pat<(X86Blendi (loadv4i32 addr:$src2), VR128:$src1, imm:$src3),
+ (VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm4 imm:$src3))>;
+}
+
defm BLENDPS : SS41I_blend_rmi<0x0C, "blendps", X86Blendi, v4f32,
VR128, memop, f128mem, 1, SSEPackedSingle,
SchedWriteFBlend.XMM, BlendCommuteImm4>;
@@ -6569,6 +6104,24 @@ defm PBLENDW : SS41I_blend_rmi<0x0E, "pblendw", X86Blendi, v8i16,
VR128, memop, i128mem, 1, SSEPackedInt,
SchedWriteBlend.XMM, BlendCommuteImm8>;
+let Predicates = [UseSSE41] in {
+// Use pblendw for 128-bit integer to keep it in the integer domain and prevent
+// it from becoming movss via commuting under optsize.
+def : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), imm:$src3),
+ (PBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm2 imm:$src3))>;
+def : Pat<(X86Blendi VR128:$src1, (memopv2i64 addr:$src2), imm:$src3),
+ (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm2 imm:$src3))>;
+def : Pat<(X86Blendi (memopv2i64 addr:$src2), VR128:$src1, imm:$src3),
+ (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm2 imm:$src3))>;
+
+def : Pat<(X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), imm:$src3),
+ (PBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm4 imm:$src3))>;
+def : Pat<(X86Blendi VR128:$src1, (memopv4i32 addr:$src2), imm:$src3),
+ (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm4 imm:$src3))>;
+def : Pat<(X86Blendi (memopv4i32 addr:$src2), VR128:$src1, imm:$src3),
+ (PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm4 imm:$src3))>;
+}
+
// For insertion into the zero index (low half) of a 256-bit vector, it is
// more efficient to generate a blend with immediate instead of an insert*128.
let Predicates = [HasAVX] in {
@@ -6580,18 +6133,25 @@ def : Pat<(insert_subvector (v8f32 VR256:$src1), (v4f32 VR128:$src2), (iPTR 0)),
(VBLENDPSYrri VR256:$src1,
(INSERT_SUBREG (v8f32 (IMPLICIT_DEF)),
VR128:$src2, sub_xmm), 0xf)>;
+
+def : Pat<(insert_subvector (loadv4f64 addr:$src2), (v2f64 VR128:$src1), (iPTR 0)),
+ (VBLENDPDYrmi (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)),
+ VR128:$src1, sub_xmm), addr:$src2, 0xc)>;
+def : Pat<(insert_subvector (loadv8f32 addr:$src2), (v4f32 VR128:$src1), (iPTR 0)),
+ (VBLENDPSYrmi (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)),
+ VR128:$src1, sub_xmm), addr:$src2, 0xf0)>;
}
-/// SS41I_quaternary_int_avx - AVX SSE 4.1 with 4 operators
-multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr,
- RegisterClass RC, X86MemOperand x86memop,
- PatFrag mem_frag, Intrinsic IntId,
- X86FoldableSchedWrite sched> {
+/// SS41I_quaternary_vx - AVX SSE 4.1 with 4 operators
+multiclass SS41I_quaternary_avx<bits<8> opc, string OpcodeStr, RegisterClass RC,
+ X86MemOperand x86memop, ValueType VT,
+ PatFrag mem_frag, SDNode OpNode,
+ X86FoldableSchedWrite sched> {
def rr : Ii8Reg<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- [(set RC:$dst, (IntId RC:$src1, RC:$src2, RC:$src3))],
+ [(set RC:$dst, (VT (OpNode RC:$src3, RC:$src2, RC:$src1)))],
SSEPackedInt>, TAPD, VEX_4V,
Sched<[sched]>;
@@ -6600,8 +6160,8 @@ multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr,
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst,
- (IntId RC:$src1, (mem_frag addr:$src2),
- RC:$src3))], SSEPackedInt>, TAPD, VEX_4V,
+ (OpNode RC:$src3, (mem_frag addr:$src2),
+ RC:$src1))], SSEPackedInt>, TAPD, VEX_4V,
Sched<[sched.Folded, sched.ReadAfterFold,
// x86memop:$src2
ReadDefault, ReadDefault, ReadDefault, ReadDefault,
@@ -6612,68 +6172,47 @@ multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr,
let Predicates = [HasAVX] in {
let ExeDomain = SSEPackedDouble in {
-defm VBLENDVPD : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR128, f128mem,
- load, int_x86_sse41_blendvpd,
- SchedWriteFVarBlend.XMM>;
-defm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, f256mem,
- loadv4f64, int_x86_avx_blendv_pd_256,
- SchedWriteFVarBlend.YMM>, VEX_L;
+defm VBLENDVPD : SS41I_quaternary_avx<0x4B, "vblendvpd", VR128, f128mem,
+ v2f64, loadv2f64, X86Blendv,
+ SchedWriteFVarBlend.XMM>;
+defm VBLENDVPDY : SS41I_quaternary_avx<0x4B, "vblendvpd", VR256, f256mem,
+ v4f64, loadv4f64, X86Blendv,
+ SchedWriteFVarBlend.YMM>, VEX_L;
} // ExeDomain = SSEPackedDouble
let ExeDomain = SSEPackedSingle in {
-defm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR128, f128mem,
- load, int_x86_sse41_blendvps,
- SchedWriteFVarBlend.XMM>;
-defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, f256mem,
- loadv8f32, int_x86_avx_blendv_ps_256,
- SchedWriteFVarBlend.YMM>, VEX_L;
+defm VBLENDVPS : SS41I_quaternary_avx<0x4A, "vblendvps", VR128, f128mem,
+ v4f32, loadv4f32, X86Blendv,
+ SchedWriteFVarBlend.XMM>;
+defm VBLENDVPSY : SS41I_quaternary_avx<0x4A, "vblendvps", VR256, f256mem,
+ v8f32, loadv8f32, X86Blendv,
+ SchedWriteFVarBlend.YMM>, VEX_L;
} // ExeDomain = SSEPackedSingle
-defm VPBLENDVB : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR128, i128mem,
- load, int_x86_sse41_pblendvb,
- SchedWriteVarBlend.XMM>;
+defm VPBLENDVB : SS41I_quaternary_avx<0x4C, "vpblendvb", VR128, i128mem,
+ v16i8, loadv16i8, X86Blendv,
+ SchedWriteVarBlend.XMM>;
}
let Predicates = [HasAVX2] in {
-defm VPBLENDVBY : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR256, i256mem,
- load, int_x86_avx2_pblendvb,
- SchedWriteVarBlend.YMM>, VEX_L;
+defm VPBLENDVBY : SS41I_quaternary_avx<0x4C, "vpblendvb", VR256, i256mem,
+ v32i8, loadv32i8, X86Blendv,
+ SchedWriteVarBlend.YMM>, VEX_L;
}
let Predicates = [HasAVX] in {
- def : Pat<(v16i8 (vselect (v16i8 VR128:$mask), (v16i8 VR128:$src1),
- (v16i8 VR128:$src2))),
- (VPBLENDVBrr VR128:$src2, VR128:$src1, VR128:$mask)>;
- def : Pat<(v4i32 (vselect (v4i32 VR128:$mask), (v4i32 VR128:$src1),
- (v4i32 VR128:$src2))),
+ def : Pat<(v4i32 (X86Blendv (v4i32 VR128:$mask), (v4i32 VR128:$src1),
+ (v4i32 VR128:$src2))),
(VBLENDVPSrr VR128:$src2, VR128:$src1, VR128:$mask)>;
- def : Pat<(v4f32 (vselect (v4i32 VR128:$mask), (v4f32 VR128:$src1),
- (v4f32 VR128:$src2))),
- (VBLENDVPSrr VR128:$src2, VR128:$src1, VR128:$mask)>;
- def : Pat<(v2i64 (vselect (v2i64 VR128:$mask), (v2i64 VR128:$src1),
- (v2i64 VR128:$src2))),
- (VBLENDVPDrr VR128:$src2, VR128:$src1, VR128:$mask)>;
- def : Pat<(v2f64 (vselect (v2i64 VR128:$mask), (v2f64 VR128:$src1),
- (v2f64 VR128:$src2))),
+ def : Pat<(v2i64 (X86Blendv (v2i64 VR128:$mask), (v2i64 VR128:$src1),
+ (v2i64 VR128:$src2))),
(VBLENDVPDrr VR128:$src2, VR128:$src1, VR128:$mask)>;
- def : Pat<(v8i32 (vselect (v8i32 VR256:$mask), (v8i32 VR256:$src1),
- (v8i32 VR256:$src2))),
+ def : Pat<(v8i32 (X86Blendv (v8i32 VR256:$mask), (v8i32 VR256:$src1),
+ (v8i32 VR256:$src2))),
(VBLENDVPSYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
- def : Pat<(v8f32 (vselect (v8i32 VR256:$mask), (v8f32 VR256:$src1),
- (v8f32 VR256:$src2))),
- (VBLENDVPSYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
- def : Pat<(v4i64 (vselect (v4i64 VR256:$mask), (v4i64 VR256:$src1),
- (v4i64 VR256:$src2))),
- (VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
- def : Pat<(v4f64 (vselect (v4i64 VR256:$mask), (v4f64 VR256:$src1),
- (v4f64 VR256:$src2))),
+ def : Pat<(v4i64 (X86Blendv (v4i64 VR256:$mask), (v4i64 VR256:$src1),
+ (v4i64 VR256:$src2))),
(VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
}
-let Predicates = [HasAVX2] in {
- def : Pat<(v32i8 (vselect (v32i8 VR256:$mask), (v32i8 VR256:$src1),
- (v32i8 VR256:$src2))),
- (VPBLENDVBYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
-}
-
// Prefer a movss or movsd over a blendps when optimizing for size. these were
// changed to use blends because blends have better throughput on sandybridge
// and haswell, but movs[s/d] are 1-2 byte shorter instructions.
@@ -6708,17 +6247,6 @@ let Predicates = [HasAVX, OptForSpeed] in {
(v4i32 (VPBLENDWrri (v4i32 (V_SET0)),
(v4i32 (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm)),
(i8 3))), sub_xmm)>;
-
- def : Pat<(v4f64 (X86vzmovl (v4f64 VR256:$src))),
- (SUBREG_TO_REG (i32 0),
- (v2f64 (VBLENDPDrri (v2f64 (V_SET0)),
- (v2f64 (EXTRACT_SUBREG (v4f64 VR256:$src), sub_xmm)),
- (i8 1))), sub_xmm)>;
- def : Pat<(v4i64 (X86vzmovl (v4i64 VR256:$src))),
- (SUBREG_TO_REG (i32 0),
- (v2i64 (VPBLENDWrri (v2i64 (V_SET0)),
- (v2i64 (EXTRACT_SUBREG (v4i64 VR256:$src), sub_xmm)),
- (i8 0xf))), sub_xmm)>;
}
// Prefer a movss or movsd over a blendps when optimizing for size. these were
@@ -6747,16 +6275,17 @@ let Predicates = [UseSSE41, OptForSpeed] in {
}
-/// SS41I_ternary_int - SSE 4.1 ternary operator
+/// SS41I_ternary - SSE 4.1 ternary operator
let Uses = [XMM0], Constraints = "$src1 = $dst" in {
- multiclass SS41I_ternary_int<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
- X86MemOperand x86memop, Intrinsic IntId,
- X86FoldableSchedWrite sched> {
+ multiclass SS41I_ternary<bits<8> opc, string OpcodeStr, ValueType VT,
+ PatFrag mem_frag, X86MemOperand x86memop,
+ SDNode OpNode, X86FoldableSchedWrite sched> {
def rr0 : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr,
"\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}"),
- [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0))]>,
+ [(set VR128:$dst,
+ (VT (OpNode XMM0, VR128:$src2, VR128:$src1)))]>,
Sched<[sched]>;
def rm0 : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
@@ -6764,20 +6293,19 @@ let Uses = [XMM0], Constraints = "$src1 = $dst" in {
!strconcat(OpcodeStr,
"\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}"),
[(set VR128:$dst,
- (IntId VR128:$src1,
- (mem_frag addr:$src2), XMM0))]>,
+ (OpNode XMM0, (mem_frag addr:$src2), VR128:$src1))]>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
let ExeDomain = SSEPackedDouble in
-defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", memop, f128mem,
- int_x86_sse41_blendvpd, SchedWriteFVarBlend.XMM>;
+defm BLENDVPD : SS41I_ternary<0x15, "blendvpd", v2f64, memopv2f64, f128mem,
+ X86Blendv, SchedWriteFVarBlend.XMM>;
let ExeDomain = SSEPackedSingle in
-defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", memop, f128mem,
- int_x86_sse41_blendvps, SchedWriteFVarBlend.XMM>;
-defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", memop, i128mem,
- int_x86_sse41_pblendvb, SchedWriteVarBlend.XMM>;
+defm BLENDVPS : SS41I_ternary<0x14, "blendvps", v4f32, memopv4f32, f128mem,
+ X86Blendv, SchedWriteFVarBlend.XMM>;
+defm PBLENDVB : SS41I_ternary<0x10, "pblendvb", v16i8, memopv16i8, i128mem,
+ X86Blendv, SchedWriteVarBlend.XMM>;
// Aliases with the implicit xmm0 argument
def : InstAlias<"blendvpd\t{$src2, $dst|$dst, $src2}",
@@ -6794,20 +6322,11 @@ def : InstAlias<"pblendvb\t{$src2, $dst|$dst, $src2}",
(PBLENDVBrm0 VR128:$dst, i128mem:$src2), 0>;
let Predicates = [UseSSE41] in {
- def : Pat<(v16i8 (vselect (v16i8 XMM0), (v16i8 VR128:$src1),
- (v16i8 VR128:$src2))),
- (PBLENDVBrr0 VR128:$src2, VR128:$src1)>;
- def : Pat<(v4i32 (vselect (v4i32 XMM0), (v4i32 VR128:$src1),
- (v4i32 VR128:$src2))),
+ def : Pat<(v4i32 (X86Blendv (v4i32 XMM0), (v4i32 VR128:$src1),
+ (v4i32 VR128:$src2))),
(BLENDVPSrr0 VR128:$src2, VR128:$src1)>;
- def : Pat<(v4f32 (vselect (v4i32 XMM0), (v4f32 VR128:$src1),
- (v4f32 VR128:$src2))),
- (BLENDVPSrr0 VR128:$src2, VR128:$src1)>;
- def : Pat<(v2i64 (vselect (v2i64 XMM0), (v2i64 VR128:$src1),
- (v2i64 VR128:$src2))),
- (BLENDVPDrr0 VR128:$src2, VR128:$src1)>;
- def : Pat<(v2f64 (vselect (v2i64 XMM0), (v2f64 VR128:$src1),
- (v2f64 VR128:$src2))),
+ def : Pat<(v2i64 (X86Blendv (v2i64 XMM0), (v2i64 VR128:$src1),
+ (v2i64 VR128:$src2))),
(BLENDVPDrr0 VR128:$src2, VR128:$src1)>;
}
@@ -7451,17 +6970,6 @@ def VBROADCASTF128 : AVX8I<0x1A, MRMSrcMem, (outs VR256:$dst),
"vbroadcastf128\t{$src, $dst|$dst, $src}", []>,
Sched<[SchedWriteFShuffle.XMM.Folded]>, VEX, VEX_L;
-let Predicates = [HasAVX2, NoVLX] in {
-def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
- (VBROADCASTI128 addr:$src)>;
-def : Pat<(v8i32 (X86SubVBroadcast (loadv4i32 addr:$src))),
- (VBROADCASTI128 addr:$src)>;
-def : Pat<(v16i16 (X86SubVBroadcast (loadv8i16 addr:$src))),
- (VBROADCASTI128 addr:$src)>;
-def : Pat<(v32i8 (X86SubVBroadcast (loadv16i8 addr:$src))),
- (VBROADCASTI128 addr:$src)>;
-}
-
let Predicates = [HasAVX, NoVLX] in {
def : Pat<(v4f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
(VBROADCASTF128 addr:$src)>;
@@ -7469,7 +6977,9 @@ def : Pat<(v8f32 (X86SubVBroadcast (loadv4f32 addr:$src))),
(VBROADCASTF128 addr:$src)>;
}
-let Predicates = [HasAVX1Only] in {
+// NOTE: We're using FP instructions here, but execution domain fixing can
+// convert to integer when profitable.
+let Predicates = [HasAVX, NoVLX] in {
def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
(VBROADCASTF128 addr:$src)>;
def : Pat<(v8i32 (X86SubVBroadcast (loadv4i32 addr:$src))),
@@ -7765,12 +7275,10 @@ let Predicates = [HasF16C, NoVLX] in {
WriteCvtPS2PHYSt>, VEX_L;
// Pattern match vcvtph2ps of a scalar i64 load.
- def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzmovl_v2i64 addr:$src)))),
- (VCVTPH2PSrm addr:$src)>;
- def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzload_v2i64 addr:$src)))),
+ def : Pat<(v4f32 (X86cvtph2ps (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
(VCVTPH2PSrm addr:$src)>;
- def : Pat<(v4f32 (X86cvtph2ps (v8i16 (bitconvert
- (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
+ def : Pat<(v4f32 (X86cvtph2ps (bc_v8i16
+ (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
(VCVTPH2PSrm addr:$src)>;
def : Pat<(store (f64 (extractelt
@@ -7835,6 +7343,7 @@ multiclass AVX2_blend_rmi<bits<8> opc, string OpcodeStr, SDNode OpNode,
(commuteXForm imm:$src3))>;
}
+let Predicates = [HasAVX2] in {
defm VPBLENDD : AVX2_blend_rmi<0x02, "vpblendd", X86Blendi, v4i32,
SchedWriteBlend.XMM, VR128, i128mem,
BlendCommuteImm4>;
@@ -7842,28 +7351,26 @@ defm VPBLENDDY : AVX2_blend_rmi<0x02, "vpblendd", X86Blendi, v8i32,
SchedWriteBlend.YMM, VR256, i256mem,
BlendCommuteImm8>, VEX_L;
-// For insertion into the zero index (low half) of a 256-bit vector, it is
-// more efficient to generate a blend with immediate instead of an insert*128.
-let Predicates = [HasAVX2] in {
-def : Pat<(insert_subvector (v8i32 VR256:$src1), (v4i32 VR128:$src2), (iPTR 0)),
- (VPBLENDDYrri VR256:$src1,
- (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
- VR128:$src2, sub_xmm), 0xf)>;
-def : Pat<(insert_subvector (v4i64 VR256:$src1), (v2i64 VR128:$src2), (iPTR 0)),
- (VPBLENDDYrri VR256:$src1,
- (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
- VR128:$src2, sub_xmm), 0xf)>;
-def : Pat<(insert_subvector (v16i16 VR256:$src1), (v8i16 VR128:$src2), (iPTR 0)),
- (VPBLENDDYrri VR256:$src1,
- (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
- VR128:$src2, sub_xmm), 0xf)>;
-def : Pat<(insert_subvector (v32i8 VR256:$src1), (v16i8 VR128:$src2), (iPTR 0)),
- (VPBLENDDYrri VR256:$src1,
- (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
- VR128:$src2, sub_xmm), 0xf)>;
+def : Pat<(X86Blendi (v4i64 VR256:$src1), (v4i64 VR256:$src2), imm:$src3),
+ (VPBLENDDYrri VR256:$src1, VR256:$src2, (BlendScaleImm4 imm:$src3))>;
+def : Pat<(X86Blendi VR256:$src1, (loadv4i64 addr:$src2), imm:$src3),
+ (VPBLENDDYrmi VR256:$src1, addr:$src2, (BlendScaleImm4 imm:$src3))>;
+def : Pat<(X86Blendi (loadv4i64 addr:$src2), VR256:$src1, imm:$src3),
+ (VPBLENDDYrmi VR256:$src1, addr:$src2, (BlendScaleCommuteImm4 imm:$src3))>;
+
+def : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), imm:$src3),
+ (VPBLENDDrri VR128:$src1, VR128:$src2, (BlendScaleImm2to4 imm:$src3))>;
+def : Pat<(X86Blendi VR128:$src1, (loadv2i64 addr:$src2), imm:$src3),
+ (VPBLENDDrmi VR128:$src1, addr:$src2, (BlendScaleImm2to4 imm:$src3))>;
+def : Pat<(X86Blendi (loadv2i64 addr:$src2), VR128:$src1, imm:$src3),
+ (VPBLENDDrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm2to4 imm:$src3))>;
}
-let Predicates = [HasAVX1Only] in {
+// For insertion into the zero index (low half) of a 256-bit vector, it is
+// more efficient to generate a blend with immediate instead of an insert*128.
+// NOTE: We're using FP instructions here, but exeuction domain fixing should
+// take care of using integer instructions when profitable.
+let Predicates = [HasAVX] in {
def : Pat<(insert_subvector (v8i32 VR256:$src1), (v4i32 VR128:$src2), (iPTR 0)),
(VBLENDPSYrri VR256:$src1,
(INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
@@ -7880,6 +7387,19 @@ def : Pat<(insert_subvector (v32i8 VR256:$src1), (v16i8 VR128:$src2), (iPTR 0)),
(VBLENDPSYrri VR256:$src1,
(INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
VR128:$src2, sub_xmm), 0xf)>;
+
+def : Pat<(insert_subvector (loadv8i32 addr:$src2), (v4i32 VR128:$src1), (iPTR 0)),
+ (VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
+ VR128:$src1, sub_xmm), addr:$src2, 0xf0)>;
+def : Pat<(insert_subvector (loadv4i64 addr:$src2), (v2i64 VR128:$src1), (iPTR 0)),
+ (VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
+ VR128:$src1, sub_xmm), addr:$src2, 0xf0)>;
+def : Pat<(insert_subvector (loadv16i16 addr:$src2), (v8i16 VR128:$src1), (iPTR 0)),
+ (VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
+ VR128:$src1, sub_xmm), addr:$src2, 0xf0)>;
+def : Pat<(insert_subvector (loadv32i8 addr:$src2), (v16i8 VR128:$src1), (iPTR 0)),
+ (VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
+ VR128:$src1, sub_xmm), addr:$src2, 0xf0)>;
}
//===----------------------------------------------------------------------===//
@@ -7930,9 +7450,9 @@ defm VPBROADCASTQ : avx2_broadcast<0x59, "vpbroadcastq", i64mem, loadi64,
let Predicates = [HasAVX2, NoVLX] in {
// 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
- def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload addr:$src)))),
+ def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))),
(VPBROADCASTQrm addr:$src)>;
- def : Pat<(v4i64 (X86VBroadcast (v4i64 (X86vzload addr:$src)))),
+ def : Pat<(v4i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))),
(VPBROADCASTQYrm addr:$src)>;
def : Pat<(v4i32 (X86VBroadcast (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
@@ -7952,9 +7472,15 @@ let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
(VPBROADCASTWYrm addr:$src)>;
def : Pat<(v8i16 (X86VBroadcast
+ (i16 (trunc (i32 (extloadi16 addr:$src)))))),
+ (VPBROADCASTWrm addr:$src)>;
+ def : Pat<(v8i16 (X86VBroadcast
(i16 (trunc (i32 (zextloadi16 addr:$src)))))),
(VPBROADCASTWrm addr:$src)>;
def : Pat<(v16i16 (X86VBroadcast
+ (i16 (trunc (i32 (extloadi16 addr:$src)))))),
+ (VPBROADCASTWYrm addr:$src)>;
+ def : Pat<(v16i16 (X86VBroadcast
(i16 (trunc (i32 (zextloadi16 addr:$src)))))),
(VPBROADCASTWYrm addr:$src)>;
}
@@ -8038,7 +7564,7 @@ let Predicates = [HasAVX, NoVLX] in {
(VMOVDDUPrr VR128:$src)>;
def : Pat<(v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
(VMOVDDUPrm addr:$src)>;
- def : Pat<(v2f64 (X86VBroadcast (v2f64 (X86vzload addr:$src)))),
+ def : Pat<(v2f64 (X86VBroadcast (v2f64 (X86vzload64 addr:$src)))),
(VMOVDDUPrm addr:$src)>;
}
@@ -8236,19 +7762,14 @@ defm VPMASKMOVQ : avx2_pmovmask<"vpmaskmovq",
multiclass maskmov_lowering<string InstrStr, RegisterClass RC, ValueType VT,
ValueType MaskVT, string BlendStr, ValueType ZeroVT> {
// masked store
- def: Pat<(X86mstore (VT RC:$src), addr:$ptr, (MaskVT RC:$mask)),
+ def: Pat<(masked_store (VT RC:$src), addr:$ptr, (MaskVT RC:$mask)),
(!cast<Instruction>(InstrStr#"mr") addr:$ptr, RC:$mask, RC:$src)>;
// masked load
- def: Pat<(VT (X86mload addr:$ptr, (MaskVT RC:$mask), undef)),
+ def: Pat<(VT (masked_load addr:$ptr, (MaskVT RC:$mask), undef)),
(!cast<Instruction>(InstrStr#"rm") RC:$mask, addr:$ptr)>;
- def: Pat<(VT (X86mload addr:$ptr, (MaskVT RC:$mask),
- (VT (bitconvert (ZeroVT immAllZerosV))))),
+ def: Pat<(VT (masked_load addr:$ptr, (MaskVT RC:$mask),
+ (VT immAllZerosV))),
(!cast<Instruction>(InstrStr#"rm") RC:$mask, addr:$ptr)>;
- def: Pat<(VT (X86mload addr:$ptr, (MaskVT RC:$mask), (VT RC:$src0))),
- (!cast<Instruction>(BlendStr#"rr")
- RC:$src0,
- (VT (!cast<Instruction>(InstrStr#"rm") RC:$mask, addr:$ptr)),
- RC:$mask)>;
}
let Predicates = [HasAVX] in {
defm : maskmov_lowering<"VMASKMOVPS", VR128, v4f32, v4i32, "VBLENDVPS", v4i32>;
@@ -8275,21 +7796,6 @@ let Predicates = [HasAVX2] in {
// Provide fallback in case the load node that is used in the patterns above
// is used by additional users, which prevents the pattern selection.
-let Predicates = [HasAVX2, NoVLX] in {
-def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128:$src))),
- (VINSERTI128rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128:$src, sub_xmm),
- (v2i64 VR128:$src), 1)>;
-def : Pat<(v8i32 (X86SubVBroadcast (v4i32 VR128:$src))),
- (VINSERTI128rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128:$src, sub_xmm),
- (v4i32 VR128:$src), 1)>;
-def : Pat<(v16i16 (X86SubVBroadcast (v8i16 VR128:$src))),
- (VINSERTI128rr (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128:$src, sub_xmm),
- (v8i16 VR128:$src), 1)>;
-def : Pat<(v32i8 (X86SubVBroadcast (v16i8 VR128:$src))),
- (VINSERTI128rr (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128:$src, sub_xmm),
- (v16i8 VR128:$src), 1)>;
-}
-
let Predicates = [HasAVX, NoVLX] in {
def : Pat<(v4f64 (X86SubVBroadcast (v2f64 VR128:$src))),
(VINSERTF128rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128:$src, sub_xmm),
@@ -8299,7 +7805,9 @@ def : Pat<(v8f32 (X86SubVBroadcast (v4f32 VR128:$src))),
(v4f32 VR128:$src), 1)>;
}
-let Predicates = [HasAVX1Only] in {
+// NOTE: We're using FP instructions here, but execution domain fixing can
+// convert to integer when profitable.
+let Predicates = [HasAVX, NoVLX] in {
def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128:$src))),
(VINSERTF128rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128:$src, sub_xmm),
(v2i64 VR128:$src), 1)>;
@@ -8350,20 +7858,11 @@ multiclass avx2_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
}
let Predicates = [HasAVX2, NoVLX] in {
- defm VPSLLVD : avx2_var_shift<0x47, "vpsllvd", shl, v4i32, v8i32>;
- defm VPSLLVQ : avx2_var_shift<0x47, "vpsllvq", shl, v2i64, v4i64>, VEX_W;
- defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", srl, v4i32, v8i32>;
- defm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", srl, v2i64, v4i64>, VEX_W;
- defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", sra, v4i32, v8i32>;
-
- def : Pat<(v4i32 (X86vsrav VR128:$src1, VR128:$src2)),
- (VPSRAVDrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v4i32 (X86vsrav VR128:$src1, (load addr:$src2))),
- (VPSRAVDrm VR128:$src1, addr:$src2)>;
- def : Pat<(v8i32 (X86vsrav VR256:$src1, VR256:$src2)),
- (VPSRAVDYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v8i32 (X86vsrav VR256:$src1, (load addr:$src2))),
- (VPSRAVDYrm VR256:$src1, addr:$src2)>;
+ defm VPSLLVD : avx2_var_shift<0x47, "vpsllvd", X86vshlv, v4i32, v8i32>;
+ defm VPSLLVQ : avx2_var_shift<0x47, "vpsllvq", X86vshlv, v2i64, v4i64>, VEX_W;
+ defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", X86vsrlv, v4i32, v8i32>;
+ defm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", X86vsrlv, v2i64, v4i64>, VEX_W;
+ defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", X86vsrav, v4i32, v8i32>;
}
//===----------------------------------------------------------------------===//
@@ -8393,7 +7892,7 @@ multiclass avx2_gather<bits<8> opc, string OpcodeStr, ValueType VTx,
VEX, VEX_L, Sched<[WriteLoad]>;
}
-let Predicates = [UseAVX2] in {
+let Predicates = [HasAVX2] in {
let mayLoad = 1, hasSideEffects = 0, Constraints
= "@earlyclobber $dst,@earlyclobber $mask_wb, $src1 = $dst, $mask = $mask_wb"
in {
diff --git a/lib/Target/X86/X86InstrSVM.td b/lib/Target/X86/X86InstrSVM.td
index 2dc6e8b43667..82c8e74156b2 100644
--- a/lib/Target/X86/X86InstrSVM.td
+++ b/lib/Target/X86/X86InstrSVM.td
@@ -1,9 +1,8 @@
//===-- X86InstrSVM.td - SVM Instruction Set Extension -----*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/X86/X86InstrShiftRotate.td b/lib/Target/X86/X86InstrShiftRotate.td
index 7cd63a6dd820..9d974b716dda 100644
--- a/lib/Target/X86/X86InstrShiftRotate.td
+++ b/lib/Target/X86/X86InstrShiftRotate.td
@@ -1,9 +1,8 @@
//===-- X86InstrShiftRotate.td - Shift and Rotate Instrs ---*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -31,11 +30,11 @@ def SHL64rCL : RI<0xD3, MRM4r, (outs GR64:$dst), (ins GR64:$src1),
[(set GR64:$dst, (shl GR64:$src1, CL))]>;
} // Uses = [CL], SchedRW
+let isConvertibleToThreeAddress = 1 in { // Can transform into LEA.
def SHL8ri : Ii8<0xC0, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1, u8imm:$src2),
"shl{b}\t{$src2, $dst|$dst, $src2}",
[(set GR8:$dst, (shl GR8:$src1, (i8 imm:$src2)))]>;
-let isConvertibleToThreeAddress = 1 in { // Can transform into LEA.
def SHL16ri : Ii8<0xC1, MRM4r, (outs GR16:$dst), (ins GR16:$src1, u8imm:$src2),
"shl{w}\t{$src2, $dst|$dst, $src2}",
[(set GR16:$dst, (shl GR16:$src1, (i8 imm:$src2)))]>,
@@ -473,17 +472,19 @@ def ROL64rCL : RI<0xD3, MRM0r, (outs GR64:$dst), (ins GR64:$src1),
def ROL8ri : Ii8<0xC0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1, u8imm:$src2),
"rol{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (rotl GR8:$src1, (i8 imm:$src2)))]>;
+ [(set GR8:$dst, (rotl GR8:$src1, (i8 relocImm:$src2)))]>;
def ROL16ri : Ii8<0xC1, MRM0r, (outs GR16:$dst), (ins GR16:$src1, u8imm:$src2),
"rol{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (rotl GR16:$src1, (i8 imm:$src2)))]>, OpSize16;
+ [(set GR16:$dst, (rotl GR16:$src1, (i8 relocImm:$src2)))]>,
+ OpSize16;
def ROL32ri : Ii8<0xC1, MRM0r, (outs GR32:$dst), (ins GR32:$src1, u8imm:$src2),
"rol{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (rotl GR32:$src1, (i8 imm:$src2)))]>, OpSize32;
+ [(set GR32:$dst, (rotl GR32:$src1, (i8 relocImm:$src2)))]>,
+ OpSize32;
def ROL64ri : RIi8<0xC1, MRM0r, (outs GR64:$dst),
(ins GR64:$src1, u8imm:$src2),
"rol{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (rotl GR64:$src1, (i8 imm:$src2)))]>;
+ [(set GR64:$dst, (rotl GR64:$src1, (i8 relocImm:$src2)))]>;
// Rotate by 1
def ROL8r1 : I<0xD0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
@@ -586,16 +587,16 @@ def ROR64ri : RIi8<0xC1, MRM1r, (outs GR64:$dst),
// Rotate by 1
def ROR8r1 : I<0xD0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
"ror{b}\t$dst",
- [(set GR8:$dst, (rotl GR8:$src1, (i8 7)))]>;
+ [(set GR8:$dst, (rotr GR8:$src1, (i8 1)))]>;
def ROR16r1 : I<0xD1, MRM1r, (outs GR16:$dst), (ins GR16:$src1),
"ror{w}\t$dst",
- [(set GR16:$dst, (rotl GR16:$src1, (i8 15)))]>, OpSize16;
+ [(set GR16:$dst, (rotr GR16:$src1, (i8 1)))]>, OpSize16;
def ROR32r1 : I<0xD1, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
"ror{l}\t$dst",
- [(set GR32:$dst, (rotl GR32:$src1, (i8 31)))]>, OpSize32;
+ [(set GR32:$dst, (rotr GR32:$src1, (i8 1)))]>, OpSize32;
def ROR64r1 : RI<0xD1, MRM1r, (outs GR64:$dst), (ins GR64:$src1),
"ror{q}\t$dst",
- [(set GR64:$dst, (rotl GR64:$src1, (i8 63)))]>;
+ [(set GR64:$dst, (rotr GR64:$src1, (i8 1)))]>;
} // Constraints = "$src = $dst", SchedRW
let Uses = [CL], SchedRW = [WriteRotateCLLd, WriteRMW] in {
@@ -634,18 +635,18 @@ def ROR64mi : RIi8<0xC1, MRM1m, (outs), (ins i64mem:$dst, u8imm:$src),
// Rotate by 1
def ROR8m1 : I<0xD0, MRM1m, (outs), (ins i8mem :$dst),
"ror{b}\t$dst",
- [(store (rotl (loadi8 addr:$dst), (i8 7)), addr:$dst)]>;
+ [(store (rotr (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
def ROR16m1 : I<0xD1, MRM1m, (outs), (ins i16mem:$dst),
"ror{w}\t$dst",
- [(store (rotl (loadi16 addr:$dst), (i8 15)), addr:$dst)]>,
+ [(store (rotr (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
OpSize16;
def ROR32m1 : I<0xD1, MRM1m, (outs), (ins i32mem:$dst),
"ror{l}\t$dst",
- [(store (rotl (loadi32 addr:$dst), (i8 31)), addr:$dst)]>,
+ [(store (rotr (loadi32 addr:$dst), (i8 1)), addr:$dst)]>,
OpSize32;
def ROR64m1 : RI<0xD1, MRM1m, (outs), (ins i64mem:$dst),
"ror{q}\t$dst",
- [(store (rotl (loadi64 addr:$dst), (i8 63)), addr:$dst)]>,
+ [(store (rotr (loadi64 addr:$dst), (i8 1)), addr:$dst)]>,
Requires<[In64BitMode]>;
} // SchedRW
@@ -807,13 +808,54 @@ def SHRD64mri8 : RIi8<0xAC, MRMDestMem,
} // Defs = [EFLAGS]
+// Use the opposite rotate if allows us to use the rotate by 1 instruction.
+def : Pat<(rotl GR8:$src1, (i8 7)), (ROR8r1 GR8:$src1)>;
+def : Pat<(rotl GR16:$src1, (i8 15)), (ROR16r1 GR16:$src1)>;
+def : Pat<(rotl GR32:$src1, (i8 31)), (ROR32r1 GR32:$src1)>;
+def : Pat<(rotl GR64:$src1, (i8 63)), (ROR64r1 GR64:$src1)>;
+def : Pat<(rotr GR8:$src1, (i8 7)), (ROL8r1 GR8:$src1)>;
+def : Pat<(rotr GR16:$src1, (i8 15)), (ROL16r1 GR16:$src1)>;
+def : Pat<(rotr GR32:$src1, (i8 31)), (ROL32r1 GR32:$src1)>;
+def : Pat<(rotr GR64:$src1, (i8 63)), (ROL64r1 GR64:$src1)>;
+
+def : Pat<(store (rotl (loadi8 addr:$dst), (i8 7)), addr:$dst),
+ (ROR8m1 addr:$dst)>;
+def : Pat<(store (rotl (loadi16 addr:$dst), (i8 15)), addr:$dst),
+ (ROR16m1 addr:$dst)>;
+def : Pat<(store (rotl (loadi32 addr:$dst), (i8 31)), addr:$dst),
+ (ROR32m1 addr:$dst)>;
+def : Pat<(store (rotl (loadi64 addr:$dst), (i8 63)), addr:$dst),
+ (ROR64m1 addr:$dst)>, Requires<[In64BitMode]>;
+
+def : Pat<(store (rotr (loadi8 addr:$dst), (i8 7)), addr:$dst),
+ (ROL8m1 addr:$dst)>;
+def : Pat<(store (rotr (loadi16 addr:$dst), (i8 15)), addr:$dst),
+ (ROL16m1 addr:$dst)>;
+def : Pat<(store (rotr (loadi32 addr:$dst), (i8 31)), addr:$dst),
+ (ROL32m1 addr:$dst)>;
+def : Pat<(store (rotr (loadi64 addr:$dst), (i8 63)), addr:$dst),
+ (ROL64m1 addr:$dst)>, Requires<[In64BitMode]>;
+
// Sandy Bridge and newer Intel processors support faster rotates using
// SHLD to avoid a partial flag update on the normal rotate instructions.
-let Predicates = [HasFastSHLDRotate], AddedComplexity = 5 in {
- def : Pat<(rotl GR32:$src, (i8 imm:$shamt)),
- (SHLD32rri8 GR32:$src, GR32:$src, imm:$shamt)>;
- def : Pat<(rotl GR64:$src, (i8 imm:$shamt)),
- (SHLD64rri8 GR64:$src, GR64:$src, imm:$shamt)>;
+// Use a pseudo so that TwoInstructionPass and register allocation will see
+// this as unary instruction.
+let Predicates = [HasFastSHLDRotate], AddedComplexity = 5,
+ Defs = [EFLAGS], isPseudo = 1, SchedRW = [WriteSHDrri],
+ Constraints = "$src1 = $dst" in {
+ def SHLDROT32ri : I<0, Pseudo, (outs GR32:$dst),
+ (ins GR32:$src1, u8imm:$shamt), "",
+ [(set GR32:$dst, (rotl GR32:$src1, (i8 imm:$shamt)))]>;
+ def SHLDROT64ri : I<0, Pseudo, (outs GR64:$dst),
+ (ins GR64:$src1, u8imm:$shamt), "",
+ [(set GR64:$dst, (rotl GR64:$src1, (i8 imm:$shamt)))]>;
+
+ def SHRDROT32ri : I<0, Pseudo, (outs GR32:$dst),
+ (ins GR32:$src1, u8imm:$shamt), "",
+ [(set GR32:$dst, (rotr GR32:$src1, (i8 imm:$shamt)))]>;
+ def SHRDROT64ri : I<0, Pseudo, (outs GR64:$dst),
+ (ins GR64:$src1, u8imm:$shamt), "",
+ [(set GR64:$dst, (rotr GR64:$src1, (i8 imm:$shamt)))]>;
}
def ROT32L2R_imm8 : SDNodeXForm<imm, [{
@@ -871,19 +913,29 @@ let Predicates = [HasBMI2] in {
// Prefer RORX which is non-destructive and doesn't update EFLAGS.
let AddedComplexity = 10 in {
+ def : Pat<(rotr GR32:$src, (i8 imm:$shamt)),
+ (RORX32ri GR32:$src, imm:$shamt)>;
+ def : Pat<(rotr GR64:$src, (i8 imm:$shamt)),
+ (RORX64ri GR64:$src, imm:$shamt)>;
+
def : Pat<(rotl GR32:$src, (i8 imm:$shamt)),
(RORX32ri GR32:$src, (ROT32L2R_imm8 imm:$shamt))>;
def : Pat<(rotl GR64:$src, (i8 imm:$shamt)),
(RORX64ri GR64:$src, (ROT64L2R_imm8 imm:$shamt))>;
}
+ def : Pat<(rotr (loadi32 addr:$src), (i8 imm:$shamt)),
+ (RORX32mi addr:$src, imm:$shamt)>;
+ def : Pat<(rotr (loadi64 addr:$src), (i8 imm:$shamt)),
+ (RORX64mi addr:$src, imm:$shamt)>;
+
def : Pat<(rotl (loadi32 addr:$src), (i8 imm:$shamt)),
(RORX32mi addr:$src, (ROT32L2R_imm8 imm:$shamt))>;
def : Pat<(rotl (loadi64 addr:$src), (i8 imm:$shamt)),
(RORX64mi addr:$src, (ROT64L2R_imm8 imm:$shamt))>;
// Prefer SARX/SHRX/SHLX over SAR/SHR/SHL with variable shift BUT not
- // immedidate shift, i.e. the following code is considered better
+ // immediate shift, i.e. the following code is considered better
//
// mov %edi, %esi
// shl $imm, %esi
diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td
index 35ee00b9e016..7050e1917494 100644
--- a/lib/Target/X86/X86InstrSystem.td
+++ b/lib/Target/X86/X86InstrSystem.td
@@ -1,9 +1,8 @@
//===-- X86InstrSystem.td - System Instructions ------------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -15,10 +14,10 @@
let SchedRW = [WriteSystem] in {
let Defs = [RAX, RDX] in
- def RDTSC : I<0x31, RawFrm, (outs), (ins), "rdtsc", [(X86rdtsc)]>, TB;
+def RDTSC : I<0x31, RawFrm, (outs), (ins), "rdtsc", []>, TB;
let Defs = [RAX, RCX, RDX] in
- def RDTSCP : I<0x01, MRM_F9, (outs), (ins), "rdtscp", [(X86rdtscp)]>, TB;
+def RDTSCP : I<0x01, MRM_F9, (outs), (ins), "rdtscp", []>, TB;
// CPU flow control instructions
@@ -411,7 +410,7 @@ let Defs = [EAX, EDX], Uses = [ECX] in
def RDMSR : I<0x32, RawFrm, (outs), (ins), "rdmsr", []>, TB;
let Defs = [RAX, RDX], Uses = [ECX] in
- def RDPMC : I<0x33, RawFrm, (outs), (ins), "rdpmc", [(X86rdpmc)]>, TB;
+def RDPMC : I<0x33, RawFrm, (outs), (ins), "rdpmc", []>, TB;
def SMSW16r : I<0x01, MRM4r, (outs GR16:$dst), (ins),
"smsw{w}\t$dst", []>, OpSize16, TB;
@@ -588,18 +587,13 @@ let Defs = [RAX, RDX, RSI], Uses = [RAX, RSI] in
//==-----------------------------------------------------------------------===//
// PKU - enable protection key
-let usesCustomInserter = 1, hasNoSchedulingInfo = 1 in {
- def WRPKRU : PseudoI<(outs), (ins GR32:$src),
- [(int_x86_wrpkru GR32:$src)]>;
- def RDPKRU : PseudoI<(outs GR32:$dst), (ins),
- [(set GR32:$dst, (int_x86_rdpkru))]>;
-}
-
let SchedRW = [WriteSystem] in {
let Defs = [EAX, EDX], Uses = [ECX] in
- def RDPKRUr : I<0x01, MRM_EE, (outs), (ins), "rdpkru", []>, TB;
+ def RDPKRUr : I<0x01, MRM_EE, (outs), (ins), "rdpkru",
+ [(set EAX, (X86rdpkru ECX)), (implicit EDX)]>, TB;
let Uses = [EAX, ECX, EDX] in
- def WRPKRUr : I<0x01, MRM_EF, (outs), (ins), "wrpkru", []>, TB;
+ def WRPKRUr : I<0x01, MRM_EF, (outs), (ins), "wrpkru",
+ [(X86wrpkru EAX, EDX, ECX)]>, TB;
} // SchedRW
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86InstrTSX.td b/lib/Target/X86/X86InstrTSX.td
index 10c6eef78639..fc0da845299f 100644
--- a/lib/Target/X86/X86InstrTSX.td
+++ b/lib/Target/X86/X86InstrTSX.td
@@ -1,9 +1,8 @@
//===-- X86InstrVMX.td - TSX Instruction Set Extension -----*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/X86/X86InstrVMX.td b/lib/Target/X86/X86InstrVMX.td
index 06a438ebfcad..37bc4ce2e053 100644
--- a/lib/Target/X86/X86InstrVMX.td
+++ b/lib/Target/X86/X86InstrVMX.td
@@ -1,9 +1,8 @@
//===-- X86InstrVMX.td - VMX Instruction Set Extension -----*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/X86/X86InstrVecCompiler.td b/lib/Target/X86/X86InstrVecCompiler.td
index c417dc99b84d..e98843bd3ae3 100644
--- a/lib/Target/X86/X86InstrVecCompiler.td
+++ b/lib/Target/X86/X86InstrVecCompiler.td
@@ -1,9 +1,8 @@
//===- X86InstrVecCompiler.td - Vector Compiler Patterns ---*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -99,76 +98,6 @@ defm : subvector_subreg_lowering<VR256, v16i16, VR512, v32i16, sub_ymm>;
defm : subvector_subreg_lowering<VR256, v32i8, VR512, v64i8, sub_ymm>;
-multiclass subvector_store_lowering<string AlignedStr, string UnalignedStr,
- RegisterClass RC, ValueType DstTy,
- ValueType SrcTy, SubRegIndex SubIdx> {
- def : Pat<(alignedstore (DstTy (extract_subvector
- (SrcTy RC:$src), (iPTR 0))), addr:$dst),
- (!cast<Instruction>("VMOV"#AlignedStr#"mr") addr:$dst,
- (DstTy (EXTRACT_SUBREG RC:$src, SubIdx)))>;
-
- def : Pat<(store (DstTy (extract_subvector
- (SrcTy RC:$src), (iPTR 0))), addr:$dst),
- (!cast<Instruction>("VMOV"#UnalignedStr#"mr") addr:$dst,
- (DstTy (EXTRACT_SUBREG RC:$src, SubIdx)))>;
-}
-
-let Predicates = [HasAVX, NoVLX] in {
- defm : subvector_store_lowering<"APD", "UPD", VR256X, v2f64, v4f64, sub_xmm>;
- defm : subvector_store_lowering<"APS", "UPS", VR256X, v4f32, v8f32, sub_xmm>;
- defm : subvector_store_lowering<"DQA", "DQU", VR256X, v2i64, v4i64, sub_xmm>;
- defm : subvector_store_lowering<"DQA", "DQU", VR256X, v4i32, v8i32, sub_xmm>;
- defm : subvector_store_lowering<"DQA", "DQU", VR256X, v8i16, v16i16, sub_xmm>;
- defm : subvector_store_lowering<"DQA", "DQU", VR256X, v16i8, v32i8, sub_xmm>;
-}
-
-let Predicates = [HasVLX] in {
- // Special patterns for storing subvector extracts of lower 128-bits
- // Its cheaper to just use VMOVAPS/VMOVUPS instead of VEXTRACTF128mr
- defm : subvector_store_lowering<"APDZ128", "UPDZ128", VR256X, v2f64, v4f64,
- sub_xmm>;
- defm : subvector_store_lowering<"APSZ128", "UPSZ128", VR256X, v4f32, v8f32,
- sub_xmm>;
- defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR256X, v2i64,
- v4i64, sub_xmm>;
- defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR256X, v4i32,
- v8i32, sub_xmm>;
- defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR256X, v8i16,
- v16i16, sub_xmm>;
- defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR256X, v16i8,
- v32i8, sub_xmm>;
-
- // Special patterns for storing subvector extracts of lower 128-bits of 512.
- // Its cheaper to just use VMOVAPS/VMOVUPS instead of VEXTRACTF128mr
- defm : subvector_store_lowering<"APDZ128", "UPDZ128", VR512, v2f64, v8f64,
- sub_xmm>;
- defm : subvector_store_lowering<"APSZ128", "UPSZ128", VR512, v4f32, v16f32,
- sub_xmm>;
- defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR512, v2i64,
- v8i64, sub_xmm>;
- defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR512, v4i32,
- v16i32, sub_xmm>;
- defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR512, v8i16,
- v32i16, sub_xmm>;
- defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR512, v16i8,
- v64i8, sub_xmm>;
-
- // Special patterns for storing subvector extracts of lower 256-bits of 512.
- // Its cheaper to just use VMOVAPS/VMOVUPS instead of VEXTRACTF128mr
- defm : subvector_store_lowering<"APDZ256", "UPDZ256", VR512, v4f64, v8f64,
- sub_ymm>;
- defm : subvector_store_lowering<"APSZ256", "UPSZ256", VR512, v8f32, v16f32,
- sub_ymm>;
- defm : subvector_store_lowering<"DQA64Z256", "DQU64Z256", VR512, v4i64,
- v8i64, sub_ymm>;
- defm : subvector_store_lowering<"DQA64Z256", "DQU64Z256", VR512, v8i32,
- v16i32, sub_ymm>;
- defm : subvector_store_lowering<"DQA64Z256", "DQU64Z256", VR512, v16i16,
- v32i16, sub_ymm>;
- defm : subvector_store_lowering<"DQA64Z256", "DQU64Z256", VR512, v32i8,
- v64i8, sub_ymm>;
-}
-
// If we're inserting into an all zeros vector, just use a plain move which
// will zero the upper bits. A post-isel hook will take care of removing
// any moves that we can prove are unnecessary.
@@ -176,7 +105,7 @@ multiclass subvec_zero_lowering<string MoveStr,
RegisterClass RC, ValueType DstTy,
ValueType SrcTy, ValueType ZeroTy,
SubRegIndex SubIdx> {
- def : Pat<(DstTy (insert_subvector (bitconvert (ZeroTy immAllZerosV)),
+ def : Pat<(DstTy (insert_subvector immAllZerosV,
(SrcTy RC:$src), (iPTR 0))),
(SUBREG_TO_REG (i64 0),
(SrcTy (!cast<Instruction>("VMOV"#MoveStr#"rr") RC:$src)), SubIdx)>;
@@ -398,7 +327,7 @@ let Predicates = [HasBWI, HasDQI] in {
(COPY_TO_REGCLASS (KMOVBkk VK8:$mask), VK64)>;
}
-let Predicates = [HasBWI, HasVLX] in {
+let Predicates = [HasBWI] in {
def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
(v1i1 VK1:$mask), (iPTR 0))),
(KSHIFTRDri (KSHIFTLDri (COPY_TO_REGCLASS VK1:$mask, VK32),
@@ -487,7 +416,7 @@ def : Pat<(f128 (X86fxor VR128:$src1, VR128:$src2)),
(XORPSrr VR128:$src1, VR128:$src2)>;
}
-let Predicates = [HasAVX] in {
+let Predicates = [HasAVX, NoVLX] in {
// andps is shorter than andpd or pand. andps is SSE and andpd/pand are in SSE2
def : Pat<(f128 (X86fand VR128:$src1, (loadf128 addr:$src2))),
(VANDPSrm VR128:$src1, f128mem:$src2)>;
@@ -507,3 +436,24 @@ def : Pat<(f128 (X86fxor VR128:$src1, (loadf128 addr:$src2))),
def : Pat<(f128 (X86fxor VR128:$src1, VR128:$src2)),
(VXORPSrr VR128:$src1, VR128:$src2)>;
}
+
+let Predicates = [HasVLX] in {
+// andps is shorter than andpd or pand. andps is SSE and andpd/pand are in SSE2
+def : Pat<(f128 (X86fand VR128X:$src1, (loadf128 addr:$src2))),
+ (VANDPSZ128rm VR128X:$src1, f128mem:$src2)>;
+
+def : Pat<(f128 (X86fand VR128X:$src1, VR128X:$src2)),
+ (VANDPSZ128rr VR128X:$src1, VR128X:$src2)>;
+
+def : Pat<(f128 (X86for VR128X:$src1, (loadf128 addr:$src2))),
+ (VORPSZ128rm VR128X:$src1, f128mem:$src2)>;
+
+def : Pat<(f128 (X86for VR128X:$src1, VR128X:$src2)),
+ (VORPSZ128rr VR128X:$src1, VR128X:$src2)>;
+
+def : Pat<(f128 (X86fxor VR128X:$src1, (loadf128 addr:$src2))),
+ (VXORPSZ128rm VR128X:$src1, f128mem:$src2)>;
+
+def : Pat<(f128 (X86fxor VR128X:$src1, VR128X:$src2)),
+ (VXORPSZ128rr VR128X:$src1, VR128X:$src2)>;
+}
diff --git a/lib/Target/X86/X86InstrXOP.td b/lib/Target/X86/X86InstrXOP.td
index 9d810a675e3b..66ca78556b82 100644
--- a/lib/Target/X86/X86InstrXOP.td
+++ b/lib/Target/X86/X86InstrXOP.td
@@ -1,9 +1,8 @@
//===-- X86InstrXOP.td - XOP Instruction Set ---------------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -247,36 +246,22 @@ multiclass xopvpcom<bits<8> opc, string Suffix, SDNode OpNode, ValueType vt128,
let ExeDomain = SSEPackedInt in { // SSE integer instructions
let isCommutable = 1 in
def ri : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, XOPCC:$cc),
- !strconcat("vpcom${cc}", Suffix,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ (ins VR128:$src1, VR128:$src2, u8imm:$cc),
+ !strconcat("vpcom", Suffix,
+ "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
[(set VR128:$dst,
(vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2),
imm:$cc)))]>,
XOP_4V, Sched<[sched]>;
def mi : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2, XOPCC:$cc),
- !strconcat("vpcom${cc}", Suffix,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ (ins VR128:$src1, i128mem:$src2, u8imm:$cc),
+ !strconcat("vpcom", Suffix,
+ "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
[(set VR128:$dst,
(vt128 (OpNode (vt128 VR128:$src1),
(vt128 (load addr:$src2)),
imm:$cc)))]>,
XOP_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
- let isAsmParserOnly = 1, hasSideEffects = 0 in {
- def ri_alt : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, u8imm:$src3),
- !strconcat("vpcom", Suffix,
- "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- []>, XOP_4V, Sched<[sched]>, NotMemoryFoldable;
- let mayLoad = 1 in
- def mi_alt : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2, u8imm:$src3),
- !strconcat("vpcom", Suffix,
- "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- []>, XOP_4V, Sched<[sched.Folded, sched.ReadAfterFold]>,
- NotMemoryFoldable;
- }
}
def : Pat<(OpNode (load addr:$src2),
diff --git a/lib/Target/X86/X86InstructionSelector.cpp b/lib/Target/X86/X86InstructionSelector.cpp
index c20336387b2d..892a083f4d1a 100644
--- a/lib/Target/X86/X86InstructionSelector.cpp
+++ b/lib/Target/X86/X86InstructionSelector.cpp
@@ -1,9 +1,8 @@
//===- X86InstructionSelector.cpp -----------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -419,18 +418,22 @@ unsigned X86InstructionSelector::getLoadStoreOp(const LLT &Ty,
if (X86::GPRRegBankID == RB.getID())
return Isload ? X86::MOV32rm : X86::MOV32mr;
if (X86::VECRRegBankID == RB.getID())
- return Isload ? (HasAVX512 ? X86::VMOVSSZrm
- : HasAVX ? X86::VMOVSSrm : X86::MOVSSrm)
- : (HasAVX512 ? X86::VMOVSSZmr
- : HasAVX ? X86::VMOVSSmr : X86::MOVSSmr);
+ return Isload ? (HasAVX512 ? X86::VMOVSSZrm_alt :
+ HasAVX ? X86::VMOVSSrm_alt :
+ X86::MOVSSrm_alt)
+ : (HasAVX512 ? X86::VMOVSSZmr :
+ HasAVX ? X86::VMOVSSmr :
+ X86::MOVSSmr);
} else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64)) {
if (X86::GPRRegBankID == RB.getID())
return Isload ? X86::MOV64rm : X86::MOV64mr;
if (X86::VECRRegBankID == RB.getID())
- return Isload ? (HasAVX512 ? X86::VMOVSDZrm
- : HasAVX ? X86::VMOVSDrm : X86::MOVSDrm)
- : (HasAVX512 ? X86::VMOVSDZmr
- : HasAVX ? X86::VMOVSDmr : X86::MOVSDmr);
+ return Isload ? (HasAVX512 ? X86::VMOVSDZrm_alt :
+ HasAVX ? X86::VMOVSDrm_alt :
+ X86::MOVSDrm_alt)
+ : (HasAVX512 ? X86::VMOVSDZmr :
+ HasAVX ? X86::VMOVSDmr :
+ X86::MOVSDmr);
} else if (Ty.isVector() && Ty.getSizeInBits() == 128) {
if (Alignment >= 16)
return Isload ? (HasVLX ? X86::VMOVAPSZ128rm
@@ -513,10 +516,22 @@ bool X86InstructionSelector::selectLoadStoreOp(MachineInstr &I,
LLT Ty = MRI.getType(DefReg);
const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
+ assert(I.hasOneMemOperand());
auto &MemOp = **I.memoperands_begin();
- if (MemOp.getOrdering() != AtomicOrdering::NotAtomic) {
- LLVM_DEBUG(dbgs() << "Atomic load/store not supported yet\n");
- return false;
+ if (MemOp.isAtomic()) {
+ // Note: for unordered operations, we rely on the fact the appropriate MMO
+ // is already on the instruction we're mutating, and thus we don't need to
+ // make any changes. So long as we select an opcode which is capable of
+ // loading or storing the appropriate size atomically, the rest of the
+ // backend is required to respect the MMO state.
+ if (!MemOp.isUnordered()) {
+ LLVM_DEBUG(dbgs() << "Atomic ordering not supported yet\n");
+ return false;
+ }
+ if (MemOp.getAlignment() < Ty.getSizeInBits()/8) {
+ LLVM_DEBUG(dbgs() << "Unaligned atomics not supported yet\n");
+ return false;
+ }
}
unsigned NewOpc = getLoadStoreOp(Ty, RB, Opc, MemOp.getAlignment());
@@ -936,7 +951,6 @@ bool X86InstructionSelector::selectCmp(MachineInstr &I,
bool SwapArgs;
std::tie(CC, SwapArgs) = X86::getX86ConditionCode(
(CmpInst::Predicate)I.getOperand(1).getPredicate());
- unsigned OpSet = X86::getSETFromCond(CC);
unsigned LHS = I.getOperand(2).getReg();
unsigned RHS = I.getOperand(3).getReg();
@@ -970,7 +984,7 @@ bool X86InstructionSelector::selectCmp(MachineInstr &I,
.addReg(RHS);
MachineInstr &SetInst = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
- TII.get(OpSet), I.getOperand(0).getReg());
+ TII.get(X86::SETCCr), I.getOperand(0).getReg()).addImm(CC);
constrainSelectedInstRegOperands(CmpInst, TII, TRI, RBI);
constrainSelectedInstRegOperands(SetInst, TII, TRI, RBI);
@@ -991,8 +1005,8 @@ bool X86InstructionSelector::selectFCmp(MachineInstr &I,
// FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
static const uint16_t SETFOpcTable[2][3] = {
- {X86::SETEr, X86::SETNPr, X86::AND8rr},
- {X86::SETNEr, X86::SETPr, X86::OR8rr}};
+ {X86::COND_E, X86::COND_NP, X86::AND8rr},
+ {X86::COND_NE, X86::COND_P, X86::OR8rr}};
const uint16_t *SETFOpc = nullptr;
switch (Predicate) {
default:
@@ -1032,9 +1046,9 @@ bool X86InstructionSelector::selectFCmp(MachineInstr &I,
unsigned FlagReg1 = MRI.createVirtualRegister(&X86::GR8RegClass);
unsigned FlagReg2 = MRI.createVirtualRegister(&X86::GR8RegClass);
MachineInstr &Set1 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
- TII.get(SETFOpc[0]), FlagReg1);
+ TII.get(X86::SETCCr), FlagReg1).addImm(SETFOpc[0]);
MachineInstr &Set2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
- TII.get(SETFOpc[1]), FlagReg2);
+ TII.get(X86::SETCCr), FlagReg2).addImm(SETFOpc[1]);
MachineInstr &Set3 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
TII.get(SETFOpc[2]), ResultReg)
.addReg(FlagReg1)
@@ -1052,7 +1066,6 @@ bool X86InstructionSelector::selectFCmp(MachineInstr &I,
bool SwapArgs;
std::tie(CC, SwapArgs) = X86::getX86ConditionCode(Predicate);
assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
- unsigned Opc = X86::getSETFromCond(CC);
if (SwapArgs)
std::swap(LhsReg, RhsReg);
@@ -1064,7 +1077,7 @@ bool X86InstructionSelector::selectFCmp(MachineInstr &I,
.addReg(RhsReg);
MachineInstr &Set =
- *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Opc), ResultReg);
+ *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::SETCCr), ResultReg).addImm(CC);
constrainSelectedInstRegOperands(CmpInst, TII, TRI, RBI);
constrainSelectedInstRegOperands(Set, TII, TRI, RBI);
I.eraseFromParent();
@@ -1409,8 +1422,8 @@ bool X86InstructionSelector::selectCondBranch(MachineInstr &I,
*BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::TEST8ri))
.addReg(CondReg)
.addImm(1);
- BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::JNE_1))
- .addMBB(DestMBB);
+ BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::JCC_1))
+ .addMBB(DestMBB).addImm(X86::COND_NE);
constrainSelectedInstRegOperands(TestInst, TII, TRI, RBI);
@@ -1530,15 +1543,14 @@ bool X86InstructionSelector::selectShift(MachineInstr &I,
const static struct ShiftEntry {
unsigned SizeInBits;
- unsigned CReg;
unsigned OpLSHR;
unsigned OpASHR;
unsigned OpSHL;
} OpTable[] = {
- {8, X86::CL, X86::SHR8rCL, X86::SAR8rCL, X86::SHL8rCL}, // i8
- {16, X86::CX, X86::SHR16rCL, X86::SAR16rCL, X86::SHL16rCL}, // i16
- {32, X86::ECX, X86::SHR32rCL, X86::SAR32rCL, X86::SHL32rCL}, // i32
- {64, X86::RCX, X86::SHR64rCL, X86::SAR64rCL, X86::SHL64rCL} // i64
+ {8, X86::SHR8rCL, X86::SAR8rCL, X86::SHL8rCL}, // i8
+ {16, X86::SHR16rCL, X86::SAR16rCL, X86::SHL16rCL}, // i16
+ {32, X86::SHR32rCL, X86::SAR32rCL, X86::SHL32rCL}, // i32
+ {64, X86::SHR64rCL, X86::SAR64rCL, X86::SHL64rCL} // i64
};
if (DstRB.getID() != X86::GPRRegBankID)
@@ -1551,7 +1563,6 @@ bool X86InstructionSelector::selectShift(MachineInstr &I,
if (ShiftEntryIt == std::end(OpTable))
return false;
- unsigned CReg = ShiftEntryIt->CReg;
unsigned Opcode = 0;
switch (I.getOpcode()) {
case TargetOpcode::G_SHL:
@@ -1570,16 +1581,11 @@ bool X86InstructionSelector::selectShift(MachineInstr &I,
unsigned Op0Reg = I.getOperand(1).getReg();
unsigned Op1Reg = I.getOperand(2).getReg();
- BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(TargetOpcode::COPY),
- ShiftEntryIt->CReg)
- .addReg(Op1Reg);
+ assert(MRI.getType(Op1Reg).getSizeInBits() == 8);
- // The shift instruction uses X86::CL. If we defined a super-register
- // of X86::CL, emit a subreg KILL to precisely describe what we're doing here.
- if (CReg != X86::CL)
- BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(TargetOpcode::KILL),
- X86::CL)
- .addReg(CReg, RegState::Kill);
+ BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(TargetOpcode::COPY),
+ X86::CL)
+ .addReg(Op1Reg);
MachineInstr &ShiftInst =
*BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Opcode), DstReg)
@@ -1608,8 +1614,8 @@ bool X86InstructionSelector::selectDivRem(MachineInstr &I,
assert(RegTy == MRI.getType(Op1Reg) && RegTy == MRI.getType(Op2Reg) &&
"Arguments and return value types must match");
- const RegisterBank &RegRB = *RBI.getRegBank(DstReg, MRI, TRI);
- if (RegRB.getID() != X86::GPRRegBankID)
+ const RegisterBank *RegRB = RBI.getRegBank(DstReg, MRI, TRI);
+ if (!RegRB || RegRB->getID() != X86::GPRRegBankID)
return false;
const static unsigned NumTypes = 4; // i8, i16, i32, i64
@@ -1707,7 +1713,7 @@ bool X86InstructionSelector::selectDivRem(MachineInstr &I,
const DivRemEntry &TypeEntry = *OpEntryIt;
const DivRemEntry::DivRemResult &OpEntry = TypeEntry.ResultTable[OpIndex];
- const TargetRegisterClass *RegRC = getRegClass(RegTy, RegRB);
+ const TargetRegisterClass *RegRC = getRegClass(RegTy, *RegRB);
if (!RBI.constrainGenericRegister(Op1Reg, *RegRC, MRI) ||
!RBI.constrainGenericRegister(Op2Reg, *RegRC, MRI) ||
!RBI.constrainGenericRegister(DstReg, *RegRC, MRI)) {
diff --git a/lib/Target/X86/X86InterleavedAccess.cpp b/lib/Target/X86/X86InterleavedAccess.cpp
index 28940754a203..8f74a8fe041d 100644
--- a/lib/Target/X86/X86InterleavedAccess.cpp
+++ b/lib/Target/X86/X86InterleavedAccess.cpp
@@ -1,9 +1,8 @@
//===- X86InterleavedAccess.cpp -------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -194,7 +193,7 @@ void X86InterleavedAccessGroup::decompose(
// Decompose the load instruction.
LoadInst *LI = cast<LoadInst>(VecInst);
- Type *VecBasePtrTy = SubVecTy->getPointerTo(LI->getPointerAddressSpace());
+ Type *VecBaseTy, *VecBasePtrTy;
Value *VecBasePtr;
unsigned int NumLoads = NumSubVectors;
// In the case of stride 3 with a vector of 32 elements load the information
@@ -202,18 +201,22 @@ void X86InterleavedAccessGroup::decompose(
// [0,1...,VF/2-1,VF/2+VF,VF/2+VF+1,...,2VF-1]
unsigned VecLength = DL.getTypeSizeInBits(VecWidth);
if (VecLength == 768 || VecLength == 1536) {
- Type *VecTran =
- VectorType::get(Type::getInt8Ty(LI->getContext()), 16)->getPointerTo();
- VecBasePtr = Builder.CreateBitCast(LI->getPointerOperand(), VecTran);
+ VecBaseTy = VectorType::get(Type::getInt8Ty(LI->getContext()), 16);
+ VecBasePtrTy = VecBaseTy->getPointerTo(LI->getPointerAddressSpace());
+ VecBasePtr = Builder.CreateBitCast(LI->getPointerOperand(), VecBasePtrTy);
NumLoads = NumSubVectors * (VecLength / 384);
- } else
+ } else {
+ VecBaseTy = SubVecTy;
+ VecBasePtrTy = VecBaseTy->getPointerTo(LI->getPointerAddressSpace());
VecBasePtr = Builder.CreateBitCast(LI->getPointerOperand(), VecBasePtrTy);
+ }
// Generate N loads of T type.
for (unsigned i = 0; i < NumLoads; i++) {
// TODO: Support inbounds GEP.
- Value *NewBasePtr = Builder.CreateGEP(VecBasePtr, Builder.getInt32(i));
+ Value *NewBasePtr =
+ Builder.CreateGEP(VecBaseTy, VecBasePtr, Builder.getInt32(i));
Instruction *NewLoad =
- Builder.CreateAlignedLoad(NewBasePtr, LI->getAlignment());
+ Builder.CreateAlignedLoad(VecBaseTy, NewBasePtr, LI->getAlignment());
DecomposedVectors.push_back(NewLoad);
}
}
@@ -416,7 +419,7 @@ void X86InterleavedAccessGroup::interleave8bitStride4(
}
reorderSubVector(VT, TransposedMatrix, VecOut, makeArrayRef(Concat, 16),
- NumOfElm, 4, Builder);
+ NumOfElm, 4, Builder);
}
// createShuffleStride returns shuffle mask of size N.
diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h
index 151e1b9136c4..40141d894629 100644
--- a/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/lib/Target/X86/X86IntrinsicsInfo.h
@@ -1,9 +1,8 @@
//===-- X86IntrinsicsInfo.h - X86 Intrinsics ------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -20,21 +19,22 @@
namespace llvm {
enum IntrinsicType : uint16_t {
+ CVTNEPS2BF16_MASK,
GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, XGETBV, ADX, FPCLASSS,
INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP, INTR_TYPE_4OP,
INTR_TYPE_3OP_IMM8,
- CMP_MASK_CC,CMP_MASK_SCALAR_CC, VSHIFT, COMI, COMI_RM,
- CVTPD2PS, CVTPD2PS_MASK, CVTPD2PS_RND_MASK,
- INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM,
- INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM,
- INTR_TYPE_3OP_MASK,
- IFMA_OP, VPERM_2OP, INTR_TYPE_SCALAR_MASK,
- INTR_TYPE_SCALAR_MASK_RM, INTR_TYPE_3OP_SCALAR_MASK,
+ CMP_MASK_CC,CMP_MASK_SCALAR_CC, VSHIFT, COMI, COMI_RM, BLENDV,
+ CVTPD2PS_MASK,
+ INTR_TYPE_1OP_SAE, INTR_TYPE_2OP_SAE,
+ INTR_TYPE_1OP_MASK_SAE, INTR_TYPE_2OP_MASK_SAE, INTR_TYPE_3OP_MASK_SAE,
+ INTR_TYPE_1OP_MASK, INTR_TYPE_2OP_MASK,
+ IFMA_OP, VPERM_2OP, INTR_TYPE_SCALAR_MASK, INTR_TYPE_SCALAR_MASK_SAE,
+ INTR_TYPE_SCALAR_MASK_RND,
+ INTR_TYPE_3OP_SCALAR_MASK_SAE,
COMPRESS_EXPAND_IN_REG,
- TRUNCATE_TO_REG, CVTPS2PH_MASK, CVTPD2I_MASK,
+ TRUNCATE_TO_REG, CVTPS2PH_MASK, CVTPD2DQ_MASK, CVTQQ2PS_MASK,
TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32,
- FIXUPIMM, FIXUPIMM_MASKZ, FIXUPIMMS,
- FIXUPIMMS_MASKZ, GATHER_AVX2,
+ FIXUPIMM, FIXUPIMM_MASKZ, GATHER_AVX2,
ROUNDP, ROUNDS
};
@@ -64,47 +64,47 @@ struct IntrinsicData {
* the alphabetical order.
*/
static const IntrinsicData IntrinsicsWithChain[] = {
- X86_INTRINSIC_DATA(avx2_gather_d_d, GATHER_AVX2, X86::VPGATHERDDrm, 0),
- X86_INTRINSIC_DATA(avx2_gather_d_d_256, GATHER_AVX2, X86::VPGATHERDDYrm, 0),
- X86_INTRINSIC_DATA(avx2_gather_d_pd, GATHER_AVX2, X86::VGATHERDPDrm, 0),
- X86_INTRINSIC_DATA(avx2_gather_d_pd_256, GATHER_AVX2, X86::VGATHERDPDYrm, 0),
- X86_INTRINSIC_DATA(avx2_gather_d_ps, GATHER_AVX2, X86::VGATHERDPSrm, 0),
- X86_INTRINSIC_DATA(avx2_gather_d_ps_256, GATHER_AVX2, X86::VGATHERDPSYrm, 0),
- X86_INTRINSIC_DATA(avx2_gather_d_q, GATHER_AVX2, X86::VPGATHERDQrm, 0),
- X86_INTRINSIC_DATA(avx2_gather_d_q_256, GATHER_AVX2, X86::VPGATHERDQYrm, 0),
- X86_INTRINSIC_DATA(avx2_gather_q_d, GATHER_AVX2, X86::VPGATHERQDrm, 0),
- X86_INTRINSIC_DATA(avx2_gather_q_d_256, GATHER_AVX2, X86::VPGATHERQDYrm, 0),
- X86_INTRINSIC_DATA(avx2_gather_q_pd, GATHER_AVX2, X86::VGATHERQPDrm, 0),
- X86_INTRINSIC_DATA(avx2_gather_q_pd_256, GATHER_AVX2, X86::VGATHERQPDYrm, 0),
- X86_INTRINSIC_DATA(avx2_gather_q_ps, GATHER_AVX2, X86::VGATHERQPSrm, 0),
- X86_INTRINSIC_DATA(avx2_gather_q_ps_256, GATHER_AVX2, X86::VGATHERQPSYrm, 0),
- X86_INTRINSIC_DATA(avx2_gather_q_q, GATHER_AVX2, X86::VPGATHERQQrm, 0),
- X86_INTRINSIC_DATA(avx2_gather_q_q_256, GATHER_AVX2, X86::VPGATHERQQYrm, 0),
+ X86_INTRINSIC_DATA(avx2_gather_d_d, GATHER_AVX2, 0, 0),
+ X86_INTRINSIC_DATA(avx2_gather_d_d_256, GATHER_AVX2, 0, 0),
+ X86_INTRINSIC_DATA(avx2_gather_d_pd, GATHER_AVX2, 0, 0),
+ X86_INTRINSIC_DATA(avx2_gather_d_pd_256, GATHER_AVX2, 0, 0),
+ X86_INTRINSIC_DATA(avx2_gather_d_ps, GATHER_AVX2, 0, 0),
+ X86_INTRINSIC_DATA(avx2_gather_d_ps_256, GATHER_AVX2, 0, 0),
+ X86_INTRINSIC_DATA(avx2_gather_d_q, GATHER_AVX2, 0, 0),
+ X86_INTRINSIC_DATA(avx2_gather_d_q_256, GATHER_AVX2, 0, 0),
+ X86_INTRINSIC_DATA(avx2_gather_q_d, GATHER_AVX2, 0, 0),
+ X86_INTRINSIC_DATA(avx2_gather_q_d_256, GATHER_AVX2, 0, 0),
+ X86_INTRINSIC_DATA(avx2_gather_q_pd, GATHER_AVX2, 0, 0),
+ X86_INTRINSIC_DATA(avx2_gather_q_pd_256, GATHER_AVX2, 0, 0),
+ X86_INTRINSIC_DATA(avx2_gather_q_ps, GATHER_AVX2, 0, 0),
+ X86_INTRINSIC_DATA(avx2_gather_q_ps_256, GATHER_AVX2, 0, 0),
+ X86_INTRINSIC_DATA(avx2_gather_q_q, GATHER_AVX2, 0, 0),
+ X86_INTRINSIC_DATA(avx2_gather_q_q_256, GATHER_AVX2, 0, 0),
- X86_INTRINSIC_DATA(avx512_gather_dpd_512, GATHER, X86::VGATHERDPDZrm, 0),
- X86_INTRINSIC_DATA(avx512_gather_dpi_512, GATHER, X86::VPGATHERDDZrm, 0),
- X86_INTRINSIC_DATA(avx512_gather_dpq_512, GATHER, X86::VPGATHERDQZrm, 0),
- X86_INTRINSIC_DATA(avx512_gather_dps_512, GATHER, X86::VGATHERDPSZrm, 0),
- X86_INTRINSIC_DATA(avx512_gather_qpd_512, GATHER, X86::VGATHERQPDZrm, 0),
- X86_INTRINSIC_DATA(avx512_gather_qpi_512, GATHER, X86::VPGATHERQDZrm, 0),
- X86_INTRINSIC_DATA(avx512_gather_qpq_512, GATHER, X86::VPGATHERQQZrm, 0),
- X86_INTRINSIC_DATA(avx512_gather_qps_512, GATHER, X86::VGATHERQPSZrm, 0),
- X86_INTRINSIC_DATA(avx512_gather3div2_df, GATHER, X86::VGATHERQPDZ128rm, 0),
- X86_INTRINSIC_DATA(avx512_gather3div2_di, GATHER, X86::VPGATHERQQZ128rm, 0),
- X86_INTRINSIC_DATA(avx512_gather3div4_df, GATHER, X86::VGATHERQPDZ256rm, 0),
- X86_INTRINSIC_DATA(avx512_gather3div4_di, GATHER, X86::VPGATHERQQZ256rm, 0),
- X86_INTRINSIC_DATA(avx512_gather3div4_sf, GATHER, X86::VGATHERQPSZ128rm, 0),
- X86_INTRINSIC_DATA(avx512_gather3div4_si, GATHER, X86::VPGATHERQDZ128rm, 0),
- X86_INTRINSIC_DATA(avx512_gather3div8_sf, GATHER, X86::VGATHERQPSZ256rm, 0),
- X86_INTRINSIC_DATA(avx512_gather3div8_si, GATHER, X86::VPGATHERQDZ256rm, 0),
- X86_INTRINSIC_DATA(avx512_gather3siv2_df, GATHER, X86::VGATHERDPDZ128rm, 0),
- X86_INTRINSIC_DATA(avx512_gather3siv2_di, GATHER, X86::VPGATHERDQZ128rm, 0),
- X86_INTRINSIC_DATA(avx512_gather3siv4_df, GATHER, X86::VGATHERDPDZ256rm, 0),
- X86_INTRINSIC_DATA(avx512_gather3siv4_di, GATHER, X86::VPGATHERDQZ256rm, 0),
- X86_INTRINSIC_DATA(avx512_gather3siv4_sf, GATHER, X86::VGATHERDPSZ128rm, 0),
- X86_INTRINSIC_DATA(avx512_gather3siv4_si, GATHER, X86::VPGATHERDDZ128rm, 0),
- X86_INTRINSIC_DATA(avx512_gather3siv8_sf, GATHER, X86::VGATHERDPSZ256rm, 0),
- X86_INTRINSIC_DATA(avx512_gather3siv8_si, GATHER, X86::VPGATHERDDZ256rm, 0),
+ X86_INTRINSIC_DATA(avx512_gather_dpd_512, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_gather_dpi_512, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_gather_dpq_512, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_gather_dps_512, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_gather_qpd_512, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_gather_qpi_512, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_gather_qpq_512, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_gather_qps_512, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_gather3div2_df, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_gather3div2_di, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_gather3div4_df, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_gather3div4_di, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_gather3div4_sf, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_gather3div4_si, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_gather3div8_sf, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_gather3div8_si, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_gather3siv2_df, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_gather3siv2_di, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_gather3siv4_df, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_gather3siv4_di, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_gather3siv4_sf, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_gather3siv4_si, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_gather3siv8_sf, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_gather3siv8_si, GATHER, 0, 0),
X86_INTRINSIC_DATA(avx512_gatherpf_dpd_512, PREFETCH,
X86::VGATHERPF0DPDm, X86::VGATHERPF1DPDm),
@@ -115,30 +115,30 @@ static const IntrinsicData IntrinsicsWithChain[] = {
X86_INTRINSIC_DATA(avx512_gatherpf_qps_512, PREFETCH,
X86::VGATHERPF0QPSm, X86::VGATHERPF1QPSm),
- X86_INTRINSIC_DATA(avx512_mask_gather_dpd_512, GATHER, X86::VGATHERDPDZrm, 0),
- X86_INTRINSIC_DATA(avx512_mask_gather_dpi_512, GATHER, X86::VPGATHERDDZrm, 0),
- X86_INTRINSIC_DATA(avx512_mask_gather_dpq_512, GATHER, X86::VPGATHERDQZrm, 0),
- X86_INTRINSIC_DATA(avx512_mask_gather_dps_512, GATHER, X86::VGATHERDPSZrm, 0),
- X86_INTRINSIC_DATA(avx512_mask_gather_qpd_512, GATHER, X86::VGATHERQPDZrm, 0),
- X86_INTRINSIC_DATA(avx512_mask_gather_qpi_512, GATHER, X86::VPGATHERQDZrm, 0),
- X86_INTRINSIC_DATA(avx512_mask_gather_qpq_512, GATHER, X86::VPGATHERQQZrm, 0),
- X86_INTRINSIC_DATA(avx512_mask_gather_qps_512, GATHER, X86::VGATHERQPSZrm, 0),
- X86_INTRINSIC_DATA(avx512_mask_gather3div2_df, GATHER, X86::VGATHERQPDZ128rm, 0),
- X86_INTRINSIC_DATA(avx512_mask_gather3div2_di, GATHER, X86::VPGATHERQQZ128rm, 0),
- X86_INTRINSIC_DATA(avx512_mask_gather3div4_df, GATHER, X86::VGATHERQPDZ256rm, 0),
- X86_INTRINSIC_DATA(avx512_mask_gather3div4_di, GATHER, X86::VPGATHERQQZ256rm, 0),
- X86_INTRINSIC_DATA(avx512_mask_gather3div4_sf, GATHER, X86::VGATHERQPSZ128rm, 0),
- X86_INTRINSIC_DATA(avx512_mask_gather3div4_si, GATHER, X86::VPGATHERQDZ128rm, 0),
- X86_INTRINSIC_DATA(avx512_mask_gather3div8_sf, GATHER, X86::VGATHERQPSZ256rm, 0),
- X86_INTRINSIC_DATA(avx512_mask_gather3div8_si, GATHER, X86::VPGATHERQDZ256rm, 0),
- X86_INTRINSIC_DATA(avx512_mask_gather3siv2_df, GATHER, X86::VGATHERDPDZ128rm, 0),
- X86_INTRINSIC_DATA(avx512_mask_gather3siv2_di, GATHER, X86::VPGATHERDQZ128rm, 0),
- X86_INTRINSIC_DATA(avx512_mask_gather3siv4_df, GATHER, X86::VGATHERDPDZ256rm, 0),
- X86_INTRINSIC_DATA(avx512_mask_gather3siv4_di, GATHER, X86::VPGATHERDQZ256rm, 0),
- X86_INTRINSIC_DATA(avx512_mask_gather3siv4_sf, GATHER, X86::VGATHERDPSZ128rm, 0),
- X86_INTRINSIC_DATA(avx512_mask_gather3siv4_si, GATHER, X86::VPGATHERDDZ128rm, 0),
- X86_INTRINSIC_DATA(avx512_mask_gather3siv8_sf, GATHER, X86::VGATHERDPSZ256rm, 0),
- X86_INTRINSIC_DATA(avx512_mask_gather3siv8_si, GATHER, X86::VPGATHERDDZ256rm, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather_dpd_512, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather_dpi_512, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather_dpq_512, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather_dps_512, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather_qpd_512, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather_qpi_512, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather_qpq_512, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather_qps_512, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3div2_df, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3div2_di, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3div4_df, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3div4_di, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3div4_sf, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3div4_si, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3div8_sf, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3div8_si, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3siv2_df, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3siv2_di, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3siv4_df, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3siv4_di, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3siv4_sf, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3siv4_si, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3siv8_sf, GATHER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3siv8_si, GATHER, 0, 0),
X86_INTRINSIC_DATA(avx512_mask_pmov_db_mem_128, TRUNCATE_TO_MEM_VI8,
X86ISD::VTRUNC, 0),
@@ -249,47 +249,47 @@ static const IntrinsicData IntrinsicsWithChain[] = {
X86_INTRINSIC_DATA(avx512_mask_pmovus_wb_mem_512, TRUNCATE_TO_MEM_VI8,
X86ISD::VTRUNCUS, 0),
- X86_INTRINSIC_DATA(avx512_mask_scatter_dpd_512, SCATTER, X86::VSCATTERDPDZmr, 0),
- X86_INTRINSIC_DATA(avx512_mask_scatter_dpi_512, SCATTER, X86::VPSCATTERDDZmr, 0),
- X86_INTRINSIC_DATA(avx512_mask_scatter_dpq_512, SCATTER, X86::VPSCATTERDQZmr, 0),
- X86_INTRINSIC_DATA(avx512_mask_scatter_dps_512, SCATTER, X86::VSCATTERDPSZmr, 0),
- X86_INTRINSIC_DATA(avx512_mask_scatter_qpd_512, SCATTER, X86::VSCATTERQPDZmr, 0),
- X86_INTRINSIC_DATA(avx512_mask_scatter_qpi_512, SCATTER, X86::VPSCATTERQDZmr, 0),
- X86_INTRINSIC_DATA(avx512_mask_scatter_qpq_512, SCATTER, X86::VPSCATTERQQZmr, 0),
- X86_INTRINSIC_DATA(avx512_mask_scatter_qps_512, SCATTER, X86::VSCATTERQPSZmr, 0),
- X86_INTRINSIC_DATA(avx512_mask_scatterdiv2_df, SCATTER, X86::VSCATTERQPDZ128mr, 0),
- X86_INTRINSIC_DATA(avx512_mask_scatterdiv2_di, SCATTER, X86::VPSCATTERQQZ128mr, 0),
- X86_INTRINSIC_DATA(avx512_mask_scatterdiv4_df, SCATTER, X86::VSCATTERQPDZ256mr, 0),
- X86_INTRINSIC_DATA(avx512_mask_scatterdiv4_di, SCATTER, X86::VPSCATTERQQZ256mr, 0),
- X86_INTRINSIC_DATA(avx512_mask_scatterdiv4_sf, SCATTER, X86::VSCATTERQPSZ128mr, 0),
- X86_INTRINSIC_DATA(avx512_mask_scatterdiv4_si, SCATTER, X86::VPSCATTERQDZ128mr, 0),
- X86_INTRINSIC_DATA(avx512_mask_scatterdiv8_sf, SCATTER, X86::VSCATTERQPSZ256mr, 0),
- X86_INTRINSIC_DATA(avx512_mask_scatterdiv8_si, SCATTER, X86::VPSCATTERQDZ256mr, 0),
- X86_INTRINSIC_DATA(avx512_mask_scattersiv2_df, SCATTER, X86::VSCATTERDPDZ128mr, 0),
- X86_INTRINSIC_DATA(avx512_mask_scattersiv2_di, SCATTER, X86::VPSCATTERDQZ128mr, 0),
- X86_INTRINSIC_DATA(avx512_mask_scattersiv4_df, SCATTER, X86::VSCATTERDPDZ256mr, 0),
- X86_INTRINSIC_DATA(avx512_mask_scattersiv4_di, SCATTER, X86::VPSCATTERDQZ256mr, 0),
- X86_INTRINSIC_DATA(avx512_mask_scattersiv4_sf, SCATTER, X86::VSCATTERDPSZ128mr, 0),
- X86_INTRINSIC_DATA(avx512_mask_scattersiv4_si, SCATTER, X86::VPSCATTERDDZ128mr, 0),
- X86_INTRINSIC_DATA(avx512_mask_scattersiv8_sf, SCATTER, X86::VSCATTERDPSZ256mr, 0),
- X86_INTRINSIC_DATA(avx512_mask_scattersiv8_si, SCATTER, X86::VPSCATTERDDZ256mr, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scatter_dpd_512, SCATTER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scatter_dpi_512, SCATTER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scatter_dpq_512, SCATTER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scatter_dps_512, SCATTER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scatter_qpd_512, SCATTER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scatter_qpi_512, SCATTER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scatter_qpq_512, SCATTER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scatter_qps_512, SCATTER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scatterdiv2_df, SCATTER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scatterdiv2_di, SCATTER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scatterdiv4_df, SCATTER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scatterdiv4_di, SCATTER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scatterdiv4_sf, SCATTER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scatterdiv4_si, SCATTER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scatterdiv8_sf, SCATTER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scatterdiv8_si, SCATTER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scattersiv2_df, SCATTER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scattersiv2_di, SCATTER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scattersiv4_df, SCATTER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scattersiv4_di, SCATTER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scattersiv4_sf, SCATTER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scattersiv4_si, SCATTER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scattersiv8_sf, SCATTER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scattersiv8_si, SCATTER, 0, 0),
- X86_INTRINSIC_DATA(avx512_scatter_dpd_512, SCATTER, X86::VSCATTERDPDZmr, 0),
- X86_INTRINSIC_DATA(avx512_scatter_dpi_512, SCATTER, X86::VPSCATTERDDZmr, 0),
- X86_INTRINSIC_DATA(avx512_scatter_dpq_512, SCATTER, X86::VPSCATTERDQZmr, 0),
- X86_INTRINSIC_DATA(avx512_scatter_dps_512, SCATTER, X86::VSCATTERDPSZmr, 0),
- X86_INTRINSIC_DATA(avx512_scatter_qpd_512, SCATTER, X86::VSCATTERQPDZmr, 0),
- X86_INTRINSIC_DATA(avx512_scatter_qpi_512, SCATTER, X86::VPSCATTERQDZmr, 0),
- X86_INTRINSIC_DATA(avx512_scatter_qpq_512, SCATTER, X86::VPSCATTERQQZmr, 0),
- X86_INTRINSIC_DATA(avx512_scatter_qps_512, SCATTER, X86::VSCATTERQPSZmr, 0),
- X86_INTRINSIC_DATA(avx512_scatterdiv2_df, SCATTER, X86::VSCATTERQPDZ128mr, 0),
- X86_INTRINSIC_DATA(avx512_scatterdiv2_di, SCATTER, X86::VPSCATTERQQZ128mr, 0),
- X86_INTRINSIC_DATA(avx512_scatterdiv4_df, SCATTER, X86::VSCATTERQPDZ256mr, 0),
- X86_INTRINSIC_DATA(avx512_scatterdiv4_di, SCATTER, X86::VPSCATTERQQZ256mr, 0),
- X86_INTRINSIC_DATA(avx512_scatterdiv4_sf, SCATTER, X86::VSCATTERQPSZ128mr, 0),
- X86_INTRINSIC_DATA(avx512_scatterdiv4_si, SCATTER, X86::VPSCATTERQDZ128mr, 0),
- X86_INTRINSIC_DATA(avx512_scatterdiv8_sf, SCATTER, X86::VSCATTERQPSZ256mr, 0),
- X86_INTRINSIC_DATA(avx512_scatterdiv8_si, SCATTER, X86::VPSCATTERQDZ256mr, 0),
+ X86_INTRINSIC_DATA(avx512_scatter_dpd_512, SCATTER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_scatter_dpi_512, SCATTER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_scatter_dpq_512, SCATTER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_scatter_dps_512, SCATTER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_scatter_qpd_512, SCATTER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_scatter_qpi_512, SCATTER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_scatter_qpq_512, SCATTER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_scatter_qps_512, SCATTER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_scatterdiv2_df, SCATTER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_scatterdiv2_di, SCATTER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_scatterdiv4_df, SCATTER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_scatterdiv4_di, SCATTER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_scatterdiv4_sf, SCATTER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_scatterdiv4_si, SCATTER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_scatterdiv8_sf, SCATTER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_scatterdiv8_si, SCATTER, 0, 0),
X86_INTRINSIC_DATA(avx512_scatterpf_dpd_512, PREFETCH, X86::VSCATTERPF0DPDm,
X86::VSCATTERPF1DPDm),
X86_INTRINSIC_DATA(avx512_scatterpf_dps_512, PREFETCH, X86::VSCATTERPF0DPSm,
@@ -298,24 +298,24 @@ static const IntrinsicData IntrinsicsWithChain[] = {
X86::VSCATTERPF1QPDm),
X86_INTRINSIC_DATA(avx512_scatterpf_qps_512, PREFETCH, X86::VSCATTERPF0QPSm,
X86::VSCATTERPF1QPSm),
- X86_INTRINSIC_DATA(avx512_scattersiv2_df, SCATTER, X86::VSCATTERDPDZ128mr, 0),
- X86_INTRINSIC_DATA(avx512_scattersiv2_di, SCATTER, X86::VPSCATTERDQZ128mr, 0),
- X86_INTRINSIC_DATA(avx512_scattersiv4_df, SCATTER, X86::VSCATTERDPDZ256mr, 0),
- X86_INTRINSIC_DATA(avx512_scattersiv4_di, SCATTER, X86::VPSCATTERDQZ256mr, 0),
- X86_INTRINSIC_DATA(avx512_scattersiv4_sf, SCATTER, X86::VSCATTERDPSZ128mr, 0),
- X86_INTRINSIC_DATA(avx512_scattersiv4_si, SCATTER, X86::VPSCATTERDDZ128mr, 0),
- X86_INTRINSIC_DATA(avx512_scattersiv8_sf, SCATTER, X86::VSCATTERDPSZ256mr, 0),
- X86_INTRINSIC_DATA(avx512_scattersiv8_si, SCATTER, X86::VPSCATTERDDZ256mr, 0),
- X86_INTRINSIC_DATA(rdpmc, RDPMC, X86ISD::RDPMC_DAG, 0),
+ X86_INTRINSIC_DATA(avx512_scattersiv2_df, SCATTER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_scattersiv2_di, SCATTER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_scattersiv4_df, SCATTER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_scattersiv4_di, SCATTER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_scattersiv4_sf, SCATTER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_scattersiv4_si, SCATTER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_scattersiv8_sf, SCATTER, 0, 0),
+ X86_INTRINSIC_DATA(avx512_scattersiv8_si, SCATTER, 0, 0),
+ X86_INTRINSIC_DATA(rdpmc, RDPMC, X86::RDPMC, 0),
X86_INTRINSIC_DATA(rdrand_16, RDRAND, X86ISD::RDRAND, 0),
X86_INTRINSIC_DATA(rdrand_32, RDRAND, X86ISD::RDRAND, 0),
X86_INTRINSIC_DATA(rdrand_64, RDRAND, X86ISD::RDRAND, 0),
X86_INTRINSIC_DATA(rdseed_16, RDSEED, X86ISD::RDSEED, 0),
X86_INTRINSIC_DATA(rdseed_32, RDSEED, X86ISD::RDSEED, 0),
X86_INTRINSIC_DATA(rdseed_64, RDSEED, X86ISD::RDSEED, 0),
- X86_INTRINSIC_DATA(rdtsc, RDTSC, X86ISD::RDTSC_DAG, 0),
- X86_INTRINSIC_DATA(rdtscp, RDTSC, X86ISD::RDTSCP_DAG, 0),
- X86_INTRINSIC_DATA(xgetbv, XGETBV, X86::XGETBV, 0),
+ X86_INTRINSIC_DATA(rdtsc, RDTSC, X86::RDTSC, 0),
+ X86_INTRINSIC_DATA(rdtscp, RDTSC, X86::RDTSCP, 0),
+ X86_INTRINSIC_DATA(xgetbv, XGETBV, X86::XGETBV, 0),
X86_INTRINSIC_DATA(xtest, XTEST, X86ISD::XTEST, 0),
};
@@ -340,9 +340,11 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(addcarry_64, ADX, X86ISD::ADC, X86ISD::ADD),
X86_INTRINSIC_DATA(avx_addsub_pd_256, INTR_TYPE_2OP, X86ISD::ADDSUB, 0),
X86_INTRINSIC_DATA(avx_addsub_ps_256, INTR_TYPE_2OP, X86ISD::ADDSUB, 0),
+ X86_INTRINSIC_DATA(avx_blendv_pd_256, BLENDV, X86ISD::BLENDV, 0),
+ X86_INTRINSIC_DATA(avx_blendv_ps_256, BLENDV, X86ISD::BLENDV, 0),
X86_INTRINSIC_DATA(avx_cmp_pd_256, INTR_TYPE_3OP, X86ISD::CMPP, 0),
X86_INTRINSIC_DATA(avx_cmp_ps_256, INTR_TYPE_3OP, X86ISD::CMPP, 0),
- X86_INTRINSIC_DATA(avx_cvt_pd2_ps_256,CVTPD2PS, ISD::FP_ROUND, 0),
+ X86_INTRINSIC_DATA(avx_cvt_pd2_ps_256,INTR_TYPE_1OP, X86ISD::VFPROUND, 0),
X86_INTRINSIC_DATA(avx_cvt_pd2dq_256, INTR_TYPE_1OP, X86ISD::CVTP2SI, 0),
X86_INTRINSIC_DATA(avx_cvt_ps2dq_256, INTR_TYPE_1OP, X86ISD::CVTP2SI, 0),
X86_INTRINSIC_DATA(avx_cvtt_pd2dq_256,INTR_TYPE_1OP, X86ISD::CVTTP2SI, 0),
@@ -369,6 +371,9 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx2_packsswb, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(avx2_packusdw, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
X86_INTRINSIC_DATA(avx2_packuswb, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
+ X86_INTRINSIC_DATA(avx2_pavg_b, INTR_TYPE_2OP, X86ISD::AVG, 0),
+ X86_INTRINSIC_DATA(avx2_pavg_w, INTR_TYPE_2OP, X86ISD::AVG, 0),
+ X86_INTRINSIC_DATA(avx2_pblendvb, BLENDV, X86ISD::BLENDV, 0),
X86_INTRINSIC_DATA(avx2_permd, VPERM_2OP, X86ISD::VPERMV, 0),
X86_INTRINSIC_DATA(avx2_permps, VPERM_2OP, X86ISD::VPERMV, 0),
X86_INTRINSIC_DATA(avx2_phadd_d, INTR_TYPE_2OP, X86ISD::HADD, 0),
@@ -389,10 +394,10 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx2_pslli_d, VSHIFT, X86ISD::VSHLI, 0),
X86_INTRINSIC_DATA(avx2_pslli_q, VSHIFT, X86ISD::VSHLI, 0),
X86_INTRINSIC_DATA(avx2_pslli_w, VSHIFT, X86ISD::VSHLI, 0),
- X86_INTRINSIC_DATA(avx2_psllv_d, INTR_TYPE_2OP, ISD::SHL, 0),
- X86_INTRINSIC_DATA(avx2_psllv_d_256, INTR_TYPE_2OP, ISD::SHL, 0),
- X86_INTRINSIC_DATA(avx2_psllv_q, INTR_TYPE_2OP, ISD::SHL, 0),
- X86_INTRINSIC_DATA(avx2_psllv_q_256, INTR_TYPE_2OP, ISD::SHL, 0),
+ X86_INTRINSIC_DATA(avx2_psllv_d, INTR_TYPE_2OP, X86ISD::VSHLV, 0),
+ X86_INTRINSIC_DATA(avx2_psllv_d_256, INTR_TYPE_2OP, X86ISD::VSHLV, 0),
+ X86_INTRINSIC_DATA(avx2_psllv_q, INTR_TYPE_2OP, X86ISD::VSHLV, 0),
+ X86_INTRINSIC_DATA(avx2_psllv_q_256, INTR_TYPE_2OP, X86ISD::VSHLV, 0),
X86_INTRINSIC_DATA(avx2_psra_d, INTR_TYPE_2OP, X86ISD::VSRA, 0),
X86_INTRINSIC_DATA(avx2_psra_w, INTR_TYPE_2OP, X86ISD::VSRA, 0),
X86_INTRINSIC_DATA(avx2_psrai_d, VSHIFT, X86ISD::VSRAI, 0),
@@ -405,39 +410,45 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx2_psrli_d, VSHIFT, X86ISD::VSRLI, 0),
X86_INTRINSIC_DATA(avx2_psrli_q, VSHIFT, X86ISD::VSRLI, 0),
X86_INTRINSIC_DATA(avx2_psrli_w, VSHIFT, X86ISD::VSRLI, 0),
- X86_INTRINSIC_DATA(avx2_psrlv_d, INTR_TYPE_2OP, ISD::SRL, 0),
- X86_INTRINSIC_DATA(avx2_psrlv_d_256, INTR_TYPE_2OP, ISD::SRL, 0),
- X86_INTRINSIC_DATA(avx2_psrlv_q, INTR_TYPE_2OP, ISD::SRL, 0),
- X86_INTRINSIC_DATA(avx2_psrlv_q_256, INTR_TYPE_2OP, ISD::SRL, 0),
+ X86_INTRINSIC_DATA(avx2_psrlv_d, INTR_TYPE_2OP, X86ISD::VSRLV, 0),
+ X86_INTRINSIC_DATA(avx2_psrlv_d_256, INTR_TYPE_2OP, X86ISD::VSRLV, 0),
+ X86_INTRINSIC_DATA(avx2_psrlv_q, INTR_TYPE_2OP, X86ISD::VSRLV, 0),
+ X86_INTRINSIC_DATA(avx2_psrlv_q_256, INTR_TYPE_2OP, X86ISD::VSRLV, 0),
X86_INTRINSIC_DATA(avx512_add_pd_512, INTR_TYPE_2OP, ISD::FADD, X86ISD::FADD_RND),
X86_INTRINSIC_DATA(avx512_add_ps_512, INTR_TYPE_2OP, ISD::FADD, X86ISD::FADD_RND),
X86_INTRINSIC_DATA(avx512_cmp_pd_128, CMP_MASK_CC, X86ISD::CMPM, 0),
X86_INTRINSIC_DATA(avx512_cmp_pd_256, CMP_MASK_CC, X86ISD::CMPM, 0),
- X86_INTRINSIC_DATA(avx512_cmp_pd_512, CMP_MASK_CC, X86ISD::CMPM, X86ISD::CMPM_RND),
+ X86_INTRINSIC_DATA(avx512_cmp_pd_512, CMP_MASK_CC, X86ISD::CMPM, X86ISD::CMPM_SAE),
X86_INTRINSIC_DATA(avx512_cmp_ps_128, CMP_MASK_CC, X86ISD::CMPM, 0),
X86_INTRINSIC_DATA(avx512_cmp_ps_256, CMP_MASK_CC, X86ISD::CMPM, 0),
- X86_INTRINSIC_DATA(avx512_cmp_ps_512, CMP_MASK_CC, X86ISD::CMPM, X86ISD::CMPM_RND),
- X86_INTRINSIC_DATA(avx512_cvtsi2sd64, INTR_TYPE_3OP, X86ISD::SCALAR_SINT_TO_FP_RND, 0),
- X86_INTRINSIC_DATA(avx512_cvtsi2ss32, INTR_TYPE_3OP, X86ISD::SCALAR_SINT_TO_FP_RND, 0),
- X86_INTRINSIC_DATA(avx512_cvtsi2ss64, INTR_TYPE_3OP, X86ISD::SCALAR_SINT_TO_FP_RND, 0),
- X86_INTRINSIC_DATA(avx512_cvttsd2si, INTR_TYPE_1OP, X86ISD::CVTTS2SI, X86ISD::CVTTS2SI_RND),
- X86_INTRINSIC_DATA(avx512_cvttsd2si64, INTR_TYPE_1OP, X86ISD::CVTTS2SI, X86ISD::CVTTS2SI_RND),
- X86_INTRINSIC_DATA(avx512_cvttsd2usi, INTR_TYPE_1OP, X86ISD::CVTTS2UI, X86ISD::CVTTS2UI_RND),
- X86_INTRINSIC_DATA(avx512_cvttsd2usi64, INTR_TYPE_1OP, X86ISD::CVTTS2UI, X86ISD::CVTTS2UI_RND),
- X86_INTRINSIC_DATA(avx512_cvttss2si, INTR_TYPE_1OP, X86ISD::CVTTS2SI, X86ISD::CVTTS2SI_RND),
- X86_INTRINSIC_DATA(avx512_cvttss2si64, INTR_TYPE_1OP, X86ISD::CVTTS2SI, X86ISD::CVTTS2SI_RND),
- X86_INTRINSIC_DATA(avx512_cvttss2usi, INTR_TYPE_1OP, X86ISD::CVTTS2UI, X86ISD::CVTTS2UI_RND),
- X86_INTRINSIC_DATA(avx512_cvttss2usi64, INTR_TYPE_1OP, X86ISD::CVTTS2UI, X86ISD::CVTTS2UI_RND),
- X86_INTRINSIC_DATA(avx512_cvtusi2ss, INTR_TYPE_3OP, X86ISD::SCALAR_UINT_TO_FP_RND, 0),
- X86_INTRINSIC_DATA(avx512_cvtusi642sd, INTR_TYPE_3OP, X86ISD::SCALAR_UINT_TO_FP_RND, 0),
- X86_INTRINSIC_DATA(avx512_cvtusi642ss, INTR_TYPE_3OP, X86ISD::SCALAR_UINT_TO_FP_RND, 0),
+ X86_INTRINSIC_DATA(avx512_cmp_ps_512, CMP_MASK_CC, X86ISD::CMPM, X86ISD::CMPM_SAE),
+ X86_INTRINSIC_DATA(avx512_conflict_d_128, INTR_TYPE_1OP, X86ISD::CONFLICT, 0),
+ X86_INTRINSIC_DATA(avx512_conflict_d_256, INTR_TYPE_1OP, X86ISD::CONFLICT, 0),
+ X86_INTRINSIC_DATA(avx512_conflict_d_512, INTR_TYPE_1OP, X86ISD::CONFLICT, 0),
+ X86_INTRINSIC_DATA(avx512_conflict_q_128, INTR_TYPE_1OP, X86ISD::CONFLICT, 0),
+ X86_INTRINSIC_DATA(avx512_conflict_q_256, INTR_TYPE_1OP, X86ISD::CONFLICT, 0),
+ X86_INTRINSIC_DATA(avx512_conflict_q_512, INTR_TYPE_1OP, X86ISD::CONFLICT, 0),
+ X86_INTRINSIC_DATA(avx512_cvtsi2sd64, INTR_TYPE_2OP, X86ISD::SCALAR_SINT_TO_FP, X86ISD::SCALAR_SINT_TO_FP_RND),
+ X86_INTRINSIC_DATA(avx512_cvtsi2ss32, INTR_TYPE_2OP, X86ISD::SCALAR_SINT_TO_FP, X86ISD::SCALAR_SINT_TO_FP_RND),
+ X86_INTRINSIC_DATA(avx512_cvtsi2ss64, INTR_TYPE_2OP, X86ISD::SCALAR_SINT_TO_FP, X86ISD::SCALAR_SINT_TO_FP_RND),
+ X86_INTRINSIC_DATA(avx512_cvttsd2si, INTR_TYPE_1OP_SAE, X86ISD::CVTTS2SI, X86ISD::CVTTS2SI_SAE),
+ X86_INTRINSIC_DATA(avx512_cvttsd2si64, INTR_TYPE_1OP_SAE, X86ISD::CVTTS2SI, X86ISD::CVTTS2SI_SAE),
+ X86_INTRINSIC_DATA(avx512_cvttsd2usi, INTR_TYPE_1OP_SAE, X86ISD::CVTTS2UI, X86ISD::CVTTS2UI_SAE),
+ X86_INTRINSIC_DATA(avx512_cvttsd2usi64, INTR_TYPE_1OP_SAE, X86ISD::CVTTS2UI, X86ISD::CVTTS2UI_SAE),
+ X86_INTRINSIC_DATA(avx512_cvttss2si, INTR_TYPE_1OP_SAE, X86ISD::CVTTS2SI, X86ISD::CVTTS2SI_SAE),
+ X86_INTRINSIC_DATA(avx512_cvttss2si64, INTR_TYPE_1OP_SAE, X86ISD::CVTTS2SI, X86ISD::CVTTS2SI_SAE),
+ X86_INTRINSIC_DATA(avx512_cvttss2usi, INTR_TYPE_1OP_SAE, X86ISD::CVTTS2UI, X86ISD::CVTTS2UI_SAE),
+ X86_INTRINSIC_DATA(avx512_cvttss2usi64, INTR_TYPE_1OP_SAE, X86ISD::CVTTS2UI, X86ISD::CVTTS2UI_SAE),
+ X86_INTRINSIC_DATA(avx512_cvtusi2ss, INTR_TYPE_2OP, X86ISD::SCALAR_UINT_TO_FP, X86ISD::SCALAR_UINT_TO_FP_RND),
+ X86_INTRINSIC_DATA(avx512_cvtusi642sd, INTR_TYPE_2OP, X86ISD::SCALAR_UINT_TO_FP, X86ISD::SCALAR_UINT_TO_FP_RND),
+ X86_INTRINSIC_DATA(avx512_cvtusi642ss, INTR_TYPE_2OP, X86ISD::SCALAR_UINT_TO_FP, X86ISD::SCALAR_UINT_TO_FP_RND),
X86_INTRINSIC_DATA(avx512_dbpsadbw_128, INTR_TYPE_3OP_IMM8, X86ISD::DBPSADBW, 0),
X86_INTRINSIC_DATA(avx512_dbpsadbw_256, INTR_TYPE_3OP_IMM8, X86ISD::DBPSADBW, 0),
X86_INTRINSIC_DATA(avx512_dbpsadbw_512, INTR_TYPE_3OP_IMM8, X86ISD::DBPSADBW, 0),
X86_INTRINSIC_DATA(avx512_div_pd_512, INTR_TYPE_2OP, ISD::FDIV, X86ISD::FDIV_RND),
X86_INTRINSIC_DATA(avx512_div_ps_512, INTR_TYPE_2OP, ISD::FDIV, X86ISD::FDIV_RND),
- X86_INTRINSIC_DATA(avx512_exp2_pd, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0),
- X86_INTRINSIC_DATA(avx512_exp2_ps, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0),
+ X86_INTRINSIC_DATA(avx512_exp2_pd, INTR_TYPE_1OP_MASK_SAE, X86ISD::EXP2, X86ISD::EXP2_SAE),
+ X86_INTRINSIC_DATA(avx512_exp2_ps, INTR_TYPE_1OP_MASK_SAE, X86ISD::EXP2, X86ISD::EXP2_SAE),
X86_INTRINSIC_DATA(avx512_fpclass_pd_128, INTR_TYPE_2OP, X86ISD::VFPCLASS, 0),
X86_INTRINSIC_DATA(avx512_fpclass_pd_256, INTR_TYPE_2OP, X86ISD::VFPCLASS, 0),
X86_INTRINSIC_DATA(avx512_fpclass_pd_512, INTR_TYPE_2OP, X86ISD::VFPCLASS, 0),
@@ -448,80 +459,32 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_kadd_d, INTR_TYPE_2OP, X86ISD::KADD, 0),
X86_INTRINSIC_DATA(avx512_kadd_q, INTR_TYPE_2OP, X86ISD::KADD, 0),
X86_INTRINSIC_DATA(avx512_kadd_w, INTR_TYPE_2OP, X86ISD::KADD, 0),
- X86_INTRINSIC_DATA(avx512_mask_add_sd_round, INTR_TYPE_SCALAR_MASK_RM,
- X86ISD::FADDS_RND, 0),
- X86_INTRINSIC_DATA(avx512_mask_add_ss_round, INTR_TYPE_SCALAR_MASK_RM,
- X86ISD::FADDS_RND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_add_sd_round, INTR_TYPE_SCALAR_MASK,
+ X86ISD::FADDS, X86ISD::FADDS_RND),
+ X86_INTRINSIC_DATA(avx512_mask_add_ss_round, INTR_TYPE_SCALAR_MASK,
+ X86ISD::FADDS, X86ISD::FADDS_RND),
X86_INTRINSIC_DATA(avx512_mask_cmp_sd, CMP_MASK_SCALAR_CC,
- X86ISD::FSETCCM, X86ISD::FSETCCM_RND),
+ X86ISD::FSETCCM, X86ISD::FSETCCM_SAE),
X86_INTRINSIC_DATA(avx512_mask_cmp_ss, CMP_MASK_SCALAR_CC,
- X86ISD::FSETCCM, X86ISD::FSETCCM_RND),
+ X86ISD::FSETCCM, X86ISD::FSETCCM_SAE),
- X86_INTRINSIC_DATA(avx512_mask_compress_b_128, COMPRESS_EXPAND_IN_REG,
- X86ISD::COMPRESS, 0),
- X86_INTRINSIC_DATA(avx512_mask_compress_b_256, COMPRESS_EXPAND_IN_REG,
- X86ISD::COMPRESS, 0),
- X86_INTRINSIC_DATA(avx512_mask_compress_b_512, COMPRESS_EXPAND_IN_REG,
- X86ISD::COMPRESS, 0),
- X86_INTRINSIC_DATA(avx512_mask_compress_d_128, COMPRESS_EXPAND_IN_REG,
- X86ISD::COMPRESS, 0),
- X86_INTRINSIC_DATA(avx512_mask_compress_d_256, COMPRESS_EXPAND_IN_REG,
- X86ISD::COMPRESS, 0),
- X86_INTRINSIC_DATA(avx512_mask_compress_d_512, COMPRESS_EXPAND_IN_REG,
- X86ISD::COMPRESS, 0),
- X86_INTRINSIC_DATA(avx512_mask_compress_pd_128, COMPRESS_EXPAND_IN_REG,
- X86ISD::COMPRESS, 0),
- X86_INTRINSIC_DATA(avx512_mask_compress_pd_256, COMPRESS_EXPAND_IN_REG,
- X86ISD::COMPRESS, 0),
- X86_INTRINSIC_DATA(avx512_mask_compress_pd_512, COMPRESS_EXPAND_IN_REG,
- X86ISD::COMPRESS, 0),
- X86_INTRINSIC_DATA(avx512_mask_compress_ps_128, COMPRESS_EXPAND_IN_REG,
+ X86_INTRINSIC_DATA(avx512_mask_compress, COMPRESS_EXPAND_IN_REG,
X86ISD::COMPRESS, 0),
- X86_INTRINSIC_DATA(avx512_mask_compress_ps_256, COMPRESS_EXPAND_IN_REG,
- X86ISD::COMPRESS, 0),
- X86_INTRINSIC_DATA(avx512_mask_compress_ps_512, COMPRESS_EXPAND_IN_REG,
- X86ISD::COMPRESS, 0),
- X86_INTRINSIC_DATA(avx512_mask_compress_q_128, COMPRESS_EXPAND_IN_REG,
- X86ISD::COMPRESS, 0),
- X86_INTRINSIC_DATA(avx512_mask_compress_q_256, COMPRESS_EXPAND_IN_REG,
- X86ISD::COMPRESS, 0),
- X86_INTRINSIC_DATA(avx512_mask_compress_q_512, COMPRESS_EXPAND_IN_REG,
- X86ISD::COMPRESS, 0),
- X86_INTRINSIC_DATA(avx512_mask_compress_w_128, COMPRESS_EXPAND_IN_REG,
- X86ISD::COMPRESS, 0),
- X86_INTRINSIC_DATA(avx512_mask_compress_w_256, COMPRESS_EXPAND_IN_REG,
- X86ISD::COMPRESS, 0),
- X86_INTRINSIC_DATA(avx512_mask_compress_w_512, COMPRESS_EXPAND_IN_REG,
- X86ISD::COMPRESS, 0),
- X86_INTRINSIC_DATA(avx512_mask_conflict_d_128, INTR_TYPE_1OP_MASK,
- X86ISD::CONFLICT, 0),
- X86_INTRINSIC_DATA(avx512_mask_conflict_d_256, INTR_TYPE_1OP_MASK,
- X86ISD::CONFLICT, 0),
- X86_INTRINSIC_DATA(avx512_mask_conflict_d_512, INTR_TYPE_1OP_MASK,
- X86ISD::CONFLICT, 0),
- X86_INTRINSIC_DATA(avx512_mask_conflict_q_128, INTR_TYPE_1OP_MASK,
- X86ISD::CONFLICT, 0),
- X86_INTRINSIC_DATA(avx512_mask_conflict_q_256, INTR_TYPE_1OP_MASK,
- X86ISD::CONFLICT, 0),
- X86_INTRINSIC_DATA(avx512_mask_conflict_q_512, INTR_TYPE_1OP_MASK,
- X86ISD::CONFLICT, 0),
- X86_INTRINSIC_DATA(avx512_mask_cvtdq2ps_512, INTR_TYPE_1OP_MASK,
- ISD::SINT_TO_FP, X86ISD::SINT_TO_FP_RND), //er
- X86_INTRINSIC_DATA(avx512_mask_cvtpd2dq_128, CVTPD2I_MASK,
+ X86_INTRINSIC_DATA(avx512_mask_cvtpd2dq_128, CVTPD2DQ_MASK,
X86ISD::CVTP2SI, X86ISD::MCVTP2SI),
X86_INTRINSIC_DATA(avx512_mask_cvtpd2dq_512, INTR_TYPE_1OP_MASK,
X86ISD::CVTP2SI, X86ISD::CVTP2SI_RND),
X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps, CVTPD2PS_MASK,
X86ISD::VFPROUND, X86ISD::VMFPROUND),
- X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps_512, CVTPD2PS_RND_MASK,
- ISD::FP_ROUND, X86ISD::VFPROUND_RND),
+ X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VFPROUND, X86ISD::VFPROUND_RND),
X86_INTRINSIC_DATA(avx512_mask_cvtpd2qq_128, INTR_TYPE_1OP_MASK,
X86ISD::CVTP2SI, 0),
X86_INTRINSIC_DATA(avx512_mask_cvtpd2qq_256, INTR_TYPE_1OP_MASK,
X86ISD::CVTP2SI, 0),
X86_INTRINSIC_DATA(avx512_mask_cvtpd2qq_512, INTR_TYPE_1OP_MASK,
X86ISD::CVTP2SI, X86ISD::CVTP2SI_RND),
- X86_INTRINSIC_DATA(avx512_mask_cvtpd2udq_128, CVTPD2I_MASK,
+ X86_INTRINSIC_DATA(avx512_mask_cvtpd2udq_128, CVTPD2DQ_MASK,
X86ISD::CVTP2UI, X86ISD::MCVTP2UI),
X86_INTRINSIC_DATA(avx512_mask_cvtpd2udq_256, INTR_TYPE_1OP_MASK,
X86ISD::CVTP2UI, 0),
@@ -539,8 +502,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::CVTP2SI, 0),
X86_INTRINSIC_DATA(avx512_mask_cvtps2dq_512, INTR_TYPE_1OP_MASK,
X86ISD::CVTP2SI, X86ISD::CVTP2SI_RND),
- X86_INTRINSIC_DATA(avx512_mask_cvtps2pd_512, INTR_TYPE_1OP_MASK,
- ISD::FP_EXTEND, X86ISD::VFPEXT_RND),
+ X86_INTRINSIC_DATA(avx512_mask_cvtps2pd_512, INTR_TYPE_1OP_MASK_SAE,
+ ISD::FP_EXTEND, X86ISD::VFPEXT_SAE),
X86_INTRINSIC_DATA(avx512_mask_cvtps2qq_128, INTR_TYPE_1OP_MASK,
X86ISD::CVTP2SI, 0),
X86_INTRINSIC_DATA(avx512_mask_cvtps2qq_256, INTR_TYPE_1OP_MASK,
@@ -559,164 +522,116 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::CVTP2UI, 0),
X86_INTRINSIC_DATA(avx512_mask_cvtps2uqq_512, INTR_TYPE_1OP_MASK,
X86ISD::CVTP2UI, X86ISD::CVTP2UI_RND),
- X86_INTRINSIC_DATA(avx512_mask_cvtqq2pd_512, INTR_TYPE_1OP_MASK,
- ISD::SINT_TO_FP, X86ISD::SINT_TO_FP_RND),
- X86_INTRINSIC_DATA(avx512_mask_cvtqq2ps_128, INTR_TYPE_1OP_MASK,
- X86ISD::CVTSI2P, 0),
- X86_INTRINSIC_DATA(avx512_mask_cvtqq2ps_256, INTR_TYPE_1OP_MASK,
- ISD::SINT_TO_FP, 0),
- X86_INTRINSIC_DATA(avx512_mask_cvtqq2ps_512, INTR_TYPE_1OP_MASK,
- ISD::SINT_TO_FP, X86ISD::SINT_TO_FP_RND),
- X86_INTRINSIC_DATA(avx512_mask_cvtsd2ss_round, INTR_TYPE_SCALAR_MASK_RM,
- X86ISD::VFPROUNDS_RND, 0),
- X86_INTRINSIC_DATA(avx512_mask_cvtss2sd_round, INTR_TYPE_SCALAR_MASK_RM,
- X86ISD::VFPEXTS_RND, 0),
- X86_INTRINSIC_DATA(avx512_mask_cvttpd2dq_128, CVTPD2I_MASK,
+ X86_INTRINSIC_DATA(avx512_mask_cvtqq2ps_128, CVTQQ2PS_MASK,
+ X86ISD::CVTSI2P, X86ISD::MCVTSI2P),
+ X86_INTRINSIC_DATA(avx512_mask_cvtsd2ss_round, INTR_TYPE_SCALAR_MASK_RND,
+ X86ISD::VFPROUNDS, X86ISD::VFPROUNDS_RND),
+ X86_INTRINSIC_DATA(avx512_mask_cvtss2sd_round, INTR_TYPE_SCALAR_MASK_SAE,
+ X86ISD::VFPEXTS, X86ISD::VFPEXTS_SAE),
+ X86_INTRINSIC_DATA(avx512_mask_cvttpd2dq_128, CVTPD2DQ_MASK,
X86ISD::CVTTP2SI, X86ISD::MCVTTP2SI),
- X86_INTRINSIC_DATA(avx512_mask_cvttpd2dq_512, INTR_TYPE_1OP_MASK,
- X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_RND),
+ X86_INTRINSIC_DATA(avx512_mask_cvttpd2dq_512, INTR_TYPE_1OP_MASK_SAE,
+ X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_SAE),
X86_INTRINSIC_DATA(avx512_mask_cvttpd2qq_128, INTR_TYPE_1OP_MASK,
X86ISD::CVTTP2SI, 0),
X86_INTRINSIC_DATA(avx512_mask_cvttpd2qq_256, INTR_TYPE_1OP_MASK,
X86ISD::CVTTP2SI, 0),
- X86_INTRINSIC_DATA(avx512_mask_cvttpd2qq_512, INTR_TYPE_1OP_MASK,
- X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_RND),
- X86_INTRINSIC_DATA(avx512_mask_cvttpd2udq_128, CVTPD2I_MASK,
+ X86_INTRINSIC_DATA(avx512_mask_cvttpd2qq_512, INTR_TYPE_1OP_MASK_SAE,
+ X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_SAE),
+ X86_INTRINSIC_DATA(avx512_mask_cvttpd2udq_128, CVTPD2DQ_MASK,
X86ISD::CVTTP2UI, X86ISD::MCVTTP2UI),
X86_INTRINSIC_DATA(avx512_mask_cvttpd2udq_256, INTR_TYPE_1OP_MASK,
X86ISD::CVTTP2UI, 0),
- X86_INTRINSIC_DATA(avx512_mask_cvttpd2udq_512, INTR_TYPE_1OP_MASK,
- X86ISD::CVTTP2UI, X86ISD::CVTTP2UI_RND),
+ X86_INTRINSIC_DATA(avx512_mask_cvttpd2udq_512, INTR_TYPE_1OP_MASK_SAE,
+ X86ISD::CVTTP2UI, X86ISD::CVTTP2UI_SAE),
X86_INTRINSIC_DATA(avx512_mask_cvttpd2uqq_128, INTR_TYPE_1OP_MASK,
X86ISD::CVTTP2UI, 0),
X86_INTRINSIC_DATA(avx512_mask_cvttpd2uqq_256, INTR_TYPE_1OP_MASK,
X86ISD::CVTTP2UI, 0),
- X86_INTRINSIC_DATA(avx512_mask_cvttpd2uqq_512, INTR_TYPE_1OP_MASK,
- X86ISD::CVTTP2UI, X86ISD::CVTTP2UI_RND),
- X86_INTRINSIC_DATA(avx512_mask_cvttps2dq_512, INTR_TYPE_1OP_MASK,
- X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_RND),
+ X86_INTRINSIC_DATA(avx512_mask_cvttpd2uqq_512, INTR_TYPE_1OP_MASK_SAE,
+ X86ISD::CVTTP2UI, X86ISD::CVTTP2UI_SAE),
+ X86_INTRINSIC_DATA(avx512_mask_cvttps2dq_512, INTR_TYPE_1OP_MASK_SAE,
+ X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_SAE),
X86_INTRINSIC_DATA(avx512_mask_cvttps2qq_128, INTR_TYPE_1OP_MASK,
X86ISD::CVTTP2SI, 0),
X86_INTRINSIC_DATA(avx512_mask_cvttps2qq_256, INTR_TYPE_1OP_MASK,
X86ISD::CVTTP2SI, 0),
- X86_INTRINSIC_DATA(avx512_mask_cvttps2qq_512, INTR_TYPE_1OP_MASK,
- X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_RND),
+ X86_INTRINSIC_DATA(avx512_mask_cvttps2qq_512, INTR_TYPE_1OP_MASK_SAE,
+ X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_SAE),
X86_INTRINSIC_DATA(avx512_mask_cvttps2udq_128, INTR_TYPE_1OP_MASK,
X86ISD::CVTTP2UI, 0),
X86_INTRINSIC_DATA(avx512_mask_cvttps2udq_256, INTR_TYPE_1OP_MASK,
X86ISD::CVTTP2UI, 0),
- X86_INTRINSIC_DATA(avx512_mask_cvttps2udq_512, INTR_TYPE_1OP_MASK,
- X86ISD::CVTTP2UI, X86ISD::CVTTP2UI_RND),
+ X86_INTRINSIC_DATA(avx512_mask_cvttps2udq_512, INTR_TYPE_1OP_MASK_SAE,
+ X86ISD::CVTTP2UI, X86ISD::CVTTP2UI_SAE),
X86_INTRINSIC_DATA(avx512_mask_cvttps2uqq_128, INTR_TYPE_1OP_MASK,
X86ISD::CVTTP2UI, 0),
X86_INTRINSIC_DATA(avx512_mask_cvttps2uqq_256, INTR_TYPE_1OP_MASK,
X86ISD::CVTTP2UI, 0),
- X86_INTRINSIC_DATA(avx512_mask_cvttps2uqq_512, INTR_TYPE_1OP_MASK,
- X86ISD::CVTTP2UI, X86ISD::CVTTP2UI_RND),
- X86_INTRINSIC_DATA(avx512_mask_cvtudq2ps_512, INTR_TYPE_1OP_MASK,
- ISD::UINT_TO_FP, X86ISD::UINT_TO_FP_RND),
- X86_INTRINSIC_DATA(avx512_mask_cvtuqq2pd_512, INTR_TYPE_1OP_MASK,
- ISD::UINT_TO_FP, X86ISD::UINT_TO_FP_RND),
- X86_INTRINSIC_DATA(avx512_mask_cvtuqq2ps_128, INTR_TYPE_1OP_MASK,
- X86ISD::CVTUI2P, 0),
- X86_INTRINSIC_DATA(avx512_mask_cvtuqq2ps_256, INTR_TYPE_1OP_MASK,
- ISD::UINT_TO_FP, 0),
- X86_INTRINSIC_DATA(avx512_mask_cvtuqq2ps_512, INTR_TYPE_1OP_MASK,
- ISD::UINT_TO_FP, X86ISD::UINT_TO_FP_RND),
- X86_INTRINSIC_DATA(avx512_mask_div_sd_round, INTR_TYPE_SCALAR_MASK_RM,
- X86ISD::FDIVS_RND, 0),
- X86_INTRINSIC_DATA(avx512_mask_div_ss_round, INTR_TYPE_SCALAR_MASK_RM,
- X86ISD::FDIVS_RND, 0),
- X86_INTRINSIC_DATA(avx512_mask_expand_b_128, COMPRESS_EXPAND_IN_REG,
- X86ISD::EXPAND, 0),
- X86_INTRINSIC_DATA(avx512_mask_expand_b_256, COMPRESS_EXPAND_IN_REG,
- X86ISD::EXPAND, 0),
- X86_INTRINSIC_DATA(avx512_mask_expand_b_512, COMPRESS_EXPAND_IN_REG,
- X86ISD::EXPAND, 0),
- X86_INTRINSIC_DATA(avx512_mask_expand_d_128, COMPRESS_EXPAND_IN_REG,
- X86ISD::EXPAND, 0),
- X86_INTRINSIC_DATA(avx512_mask_expand_d_256, COMPRESS_EXPAND_IN_REG,
- X86ISD::EXPAND, 0),
- X86_INTRINSIC_DATA(avx512_mask_expand_d_512, COMPRESS_EXPAND_IN_REG,
- X86ISD::EXPAND, 0),
- X86_INTRINSIC_DATA(avx512_mask_expand_pd_128, COMPRESS_EXPAND_IN_REG,
- X86ISD::EXPAND, 0),
- X86_INTRINSIC_DATA(avx512_mask_expand_pd_256, COMPRESS_EXPAND_IN_REG,
- X86ISD::EXPAND, 0),
- X86_INTRINSIC_DATA(avx512_mask_expand_pd_512, COMPRESS_EXPAND_IN_REG,
- X86ISD::EXPAND, 0),
- X86_INTRINSIC_DATA(avx512_mask_expand_ps_128, COMPRESS_EXPAND_IN_REG,
- X86ISD::EXPAND, 0),
- X86_INTRINSIC_DATA(avx512_mask_expand_ps_256, COMPRESS_EXPAND_IN_REG,
- X86ISD::EXPAND, 0),
- X86_INTRINSIC_DATA(avx512_mask_expand_ps_512, COMPRESS_EXPAND_IN_REG,
- X86ISD::EXPAND, 0),
- X86_INTRINSIC_DATA(avx512_mask_expand_q_128, COMPRESS_EXPAND_IN_REG,
- X86ISD::EXPAND, 0),
- X86_INTRINSIC_DATA(avx512_mask_expand_q_256, COMPRESS_EXPAND_IN_REG,
- X86ISD::EXPAND, 0),
- X86_INTRINSIC_DATA(avx512_mask_expand_q_512, COMPRESS_EXPAND_IN_REG,
- X86ISD::EXPAND, 0),
- X86_INTRINSIC_DATA(avx512_mask_expand_w_128, COMPRESS_EXPAND_IN_REG,
- X86ISD::EXPAND, 0),
- X86_INTRINSIC_DATA(avx512_mask_expand_w_256, COMPRESS_EXPAND_IN_REG,
- X86ISD::EXPAND, 0),
- X86_INTRINSIC_DATA(avx512_mask_expand_w_512, COMPRESS_EXPAND_IN_REG,
+ X86_INTRINSIC_DATA(avx512_mask_cvttps2uqq_512, INTR_TYPE_1OP_MASK_SAE,
+ X86ISD::CVTTP2UI, X86ISD::CVTTP2UI_SAE),
+ X86_INTRINSIC_DATA(avx512_mask_cvtuqq2ps_128, CVTQQ2PS_MASK,
+ X86ISD::CVTUI2P, X86ISD::MCVTUI2P),
+ X86_INTRINSIC_DATA(avx512_mask_div_sd_round, INTR_TYPE_SCALAR_MASK,
+ X86ISD::FDIVS, X86ISD::FDIVS_RND),
+ X86_INTRINSIC_DATA(avx512_mask_div_ss_round, INTR_TYPE_SCALAR_MASK,
+ X86ISD::FDIVS, X86ISD::FDIVS_RND),
+ X86_INTRINSIC_DATA(avx512_mask_expand, COMPRESS_EXPAND_IN_REG,
X86ISD::EXPAND, 0),
X86_INTRINSIC_DATA(avx512_mask_fixupimm_pd_128, FIXUPIMM, X86ISD::VFIXUPIMM, 0),
X86_INTRINSIC_DATA(avx512_mask_fixupimm_pd_256, FIXUPIMM, X86ISD::VFIXUPIMM, 0),
- X86_INTRINSIC_DATA(avx512_mask_fixupimm_pd_512, FIXUPIMM, X86ISD::VFIXUPIMM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_fixupimm_pd_512, FIXUPIMM, X86ISD::VFIXUPIMM, X86ISD::VFIXUPIMM_SAE),
X86_INTRINSIC_DATA(avx512_mask_fixupimm_ps_128, FIXUPIMM, X86ISD::VFIXUPIMM, 0),
X86_INTRINSIC_DATA(avx512_mask_fixupimm_ps_256, FIXUPIMM, X86ISD::VFIXUPIMM, 0),
- X86_INTRINSIC_DATA(avx512_mask_fixupimm_ps_512, FIXUPIMM, X86ISD::VFIXUPIMM, 0),
- X86_INTRINSIC_DATA(avx512_mask_fixupimm_sd, FIXUPIMMS, X86ISD::VFIXUPIMMS, 0),
- X86_INTRINSIC_DATA(avx512_mask_fixupimm_ss, FIXUPIMMS, X86ISD::VFIXUPIMMS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_fixupimm_ps_512, FIXUPIMM, X86ISD::VFIXUPIMM, X86ISD::VFIXUPIMM_SAE),
+ X86_INTRINSIC_DATA(avx512_mask_fixupimm_sd, FIXUPIMM, X86ISD::VFIXUPIMMS, X86ISD::VFIXUPIMMS_SAE),
+ X86_INTRINSIC_DATA(avx512_mask_fixupimm_ss, FIXUPIMM, X86ISD::VFIXUPIMMS, X86ISD::VFIXUPIMMS_SAE),
X86_INTRINSIC_DATA(avx512_mask_fpclass_sd, FPCLASSS, X86ISD::VFPCLASSS, 0),
X86_INTRINSIC_DATA(avx512_mask_fpclass_ss, FPCLASSS, X86ISD::VFPCLASSS, 0),
- X86_INTRINSIC_DATA(avx512_mask_getexp_pd_128, INTR_TYPE_1OP_MASK_RM,
- X86ISD::FGETEXP_RND, 0),
- X86_INTRINSIC_DATA(avx512_mask_getexp_pd_256, INTR_TYPE_1OP_MASK_RM,
- X86ISD::FGETEXP_RND, 0),
- X86_INTRINSIC_DATA(avx512_mask_getexp_pd_512, INTR_TYPE_1OP_MASK_RM,
- X86ISD::FGETEXP_RND, 0),
- X86_INTRINSIC_DATA(avx512_mask_getexp_ps_128, INTR_TYPE_1OP_MASK_RM,
- X86ISD::FGETEXP_RND, 0),
- X86_INTRINSIC_DATA(avx512_mask_getexp_ps_256, INTR_TYPE_1OP_MASK_RM,
- X86ISD::FGETEXP_RND, 0),
- X86_INTRINSIC_DATA(avx512_mask_getexp_ps_512, INTR_TYPE_1OP_MASK_RM,
- X86ISD::FGETEXP_RND, 0),
- X86_INTRINSIC_DATA(avx512_mask_getexp_sd, INTR_TYPE_SCALAR_MASK_RM,
- X86ISD::FGETEXPS_RND, 0),
- X86_INTRINSIC_DATA(avx512_mask_getexp_ss, INTR_TYPE_SCALAR_MASK_RM,
- X86ISD::FGETEXPS_RND, 0),
- X86_INTRINSIC_DATA(avx512_mask_getmant_pd_128, INTR_TYPE_2OP_MASK,
+ X86_INTRINSIC_DATA(avx512_mask_getexp_pd_128, INTR_TYPE_1OP_MASK,
+ X86ISD::FGETEXP, 0),
+ X86_INTRINSIC_DATA(avx512_mask_getexp_pd_256, INTR_TYPE_1OP_MASK,
+ X86ISD::FGETEXP, 0),
+ X86_INTRINSIC_DATA(avx512_mask_getexp_pd_512, INTR_TYPE_1OP_MASK_SAE,
+ X86ISD::FGETEXP, X86ISD::FGETEXP_SAE),
+ X86_INTRINSIC_DATA(avx512_mask_getexp_ps_128, INTR_TYPE_1OP_MASK,
+ X86ISD::FGETEXP, 0),
+ X86_INTRINSIC_DATA(avx512_mask_getexp_ps_256, INTR_TYPE_1OP_MASK,
+ X86ISD::FGETEXP, 0),
+ X86_INTRINSIC_DATA(avx512_mask_getexp_ps_512, INTR_TYPE_1OP_MASK_SAE,
+ X86ISD::FGETEXP, X86ISD::FGETEXP_SAE),
+ X86_INTRINSIC_DATA(avx512_mask_getexp_sd, INTR_TYPE_SCALAR_MASK_SAE,
+ X86ISD::FGETEXPS, X86ISD::FGETEXPS_SAE),
+ X86_INTRINSIC_DATA(avx512_mask_getexp_ss, INTR_TYPE_SCALAR_MASK_SAE,
+ X86ISD::FGETEXPS, X86ISD::FGETEXPS_SAE),
+ X86_INTRINSIC_DATA(avx512_mask_getmant_pd_128, INTR_TYPE_2OP_MASK_SAE,
X86ISD::VGETMANT, 0),
- X86_INTRINSIC_DATA(avx512_mask_getmant_pd_256, INTR_TYPE_2OP_MASK,
+ X86_INTRINSIC_DATA(avx512_mask_getmant_pd_256, INTR_TYPE_2OP_MASK_SAE,
X86ISD::VGETMANT, 0),
- X86_INTRINSIC_DATA(avx512_mask_getmant_pd_512, INTR_TYPE_2OP_MASK,
- X86ISD::VGETMANT, X86ISD::VGETMANT_RND),
- X86_INTRINSIC_DATA(avx512_mask_getmant_ps_128, INTR_TYPE_2OP_MASK,
+ X86_INTRINSIC_DATA(avx512_mask_getmant_pd_512, INTR_TYPE_2OP_MASK_SAE,
+ X86ISD::VGETMANT, X86ISD::VGETMANT_SAE),
+ X86_INTRINSIC_DATA(avx512_mask_getmant_ps_128, INTR_TYPE_2OP_MASK_SAE,
X86ISD::VGETMANT, 0),
- X86_INTRINSIC_DATA(avx512_mask_getmant_ps_256, INTR_TYPE_2OP_MASK,
+ X86_INTRINSIC_DATA(avx512_mask_getmant_ps_256, INTR_TYPE_2OP_MASK_SAE,
X86ISD::VGETMANT, 0),
- X86_INTRINSIC_DATA(avx512_mask_getmant_ps_512, INTR_TYPE_2OP_MASK,
- X86ISD::VGETMANT, X86ISD::VGETMANT_RND),
- X86_INTRINSIC_DATA(avx512_mask_getmant_sd, INTR_TYPE_3OP_SCALAR_MASK,
- X86ISD::VGETMANTS, X86ISD::VGETMANTS_RND),
- X86_INTRINSIC_DATA(avx512_mask_getmant_ss, INTR_TYPE_3OP_SCALAR_MASK,
- X86ISD::VGETMANTS, X86ISD::VGETMANTS_RND),
- X86_INTRINSIC_DATA(avx512_mask_max_sd_round, INTR_TYPE_SCALAR_MASK,
- X86ISD::FMAXS, X86ISD::FMAXS_RND),
- X86_INTRINSIC_DATA(avx512_mask_max_ss_round, INTR_TYPE_SCALAR_MASK,
- X86ISD::FMAXS, X86ISD::FMAXS_RND),
- X86_INTRINSIC_DATA(avx512_mask_min_sd_round, INTR_TYPE_SCALAR_MASK,
- X86ISD::FMINS, X86ISD::FMINS_RND),
- X86_INTRINSIC_DATA(avx512_mask_min_ss_round, INTR_TYPE_SCALAR_MASK,
- X86ISD::FMINS, X86ISD::FMINS_RND),
- X86_INTRINSIC_DATA(avx512_mask_mul_sd_round, INTR_TYPE_SCALAR_MASK_RM,
- X86ISD::FMULS_RND, 0),
- X86_INTRINSIC_DATA(avx512_mask_mul_ss_round, INTR_TYPE_SCALAR_MASK_RM,
- X86ISD::FMULS_RND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_getmant_ps_512, INTR_TYPE_2OP_MASK_SAE,
+ X86ISD::VGETMANT, X86ISD::VGETMANT_SAE),
+ X86_INTRINSIC_DATA(avx512_mask_getmant_sd, INTR_TYPE_3OP_SCALAR_MASK_SAE,
+ X86ISD::VGETMANTS, X86ISD::VGETMANTS_SAE),
+ X86_INTRINSIC_DATA(avx512_mask_getmant_ss, INTR_TYPE_3OP_SCALAR_MASK_SAE,
+ X86ISD::VGETMANTS, X86ISD::VGETMANTS_SAE),
+ X86_INTRINSIC_DATA(avx512_mask_max_sd_round, INTR_TYPE_SCALAR_MASK_SAE,
+ X86ISD::FMAXS, X86ISD::FMAXS_SAE),
+ X86_INTRINSIC_DATA(avx512_mask_max_ss_round, INTR_TYPE_SCALAR_MASK_SAE,
+ X86ISD::FMAXS, X86ISD::FMAXS_SAE),
+ X86_INTRINSIC_DATA(avx512_mask_min_sd_round, INTR_TYPE_SCALAR_MASK_SAE,
+ X86ISD::FMINS, X86ISD::FMINS_SAE),
+ X86_INTRINSIC_DATA(avx512_mask_min_ss_round, INTR_TYPE_SCALAR_MASK_SAE,
+ X86ISD::FMINS, X86ISD::FMINS_SAE),
+ X86_INTRINSIC_DATA(avx512_mask_mul_sd_round, INTR_TYPE_SCALAR_MASK,
+ X86ISD::FMULS, X86ISD::FMULS_RND),
+ X86_INTRINSIC_DATA(avx512_mask_mul_ss_round, INTR_TYPE_SCALAR_MASK,
+ X86ISD::FMULS, X86ISD::FMULS_RND),
X86_INTRINSIC_DATA(avx512_mask_pmov_db_128, TRUNCATE_TO_REG,
X86ISD::VTRUNC, X86ISD::VMTRUNC),
X86_INTRINSIC_DATA(avx512_mask_pmov_db_256, TRUNCATE_TO_REG,
@@ -737,10 +652,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::VTRUNC, X86ISD::VMTRUNC),
X86_INTRINSIC_DATA(avx512_mask_pmov_qd_128, TRUNCATE_TO_REG,
X86ISD::VTRUNC, X86ISD::VMTRUNC),
- X86_INTRINSIC_DATA(avx512_mask_pmov_qd_256, INTR_TYPE_1OP_MASK,
- ISD::TRUNCATE, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmov_qd_512, INTR_TYPE_1OP_MASK,
- ISD::TRUNCATE, 0),
X86_INTRINSIC_DATA(avx512_mask_pmov_qw_128, TRUNCATE_TO_REG,
X86ISD::VTRUNC, X86ISD::VMTRUNC),
X86_INTRINSIC_DATA(avx512_mask_pmov_qw_256, TRUNCATE_TO_REG,
@@ -749,10 +660,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
ISD::TRUNCATE, X86ISD::VMTRUNC),
X86_INTRINSIC_DATA(avx512_mask_pmov_wb_128, TRUNCATE_TO_REG,
X86ISD::VTRUNC, X86ISD::VMTRUNC),
- X86_INTRINSIC_DATA(avx512_mask_pmov_wb_256, INTR_TYPE_1OP_MASK,
- ISD::TRUNCATE, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmov_wb_512, INTR_TYPE_1OP_MASK,
- ISD::TRUNCATE, 0),
X86_INTRINSIC_DATA(avx512_mask_pmovs_db_128, TRUNCATE_TO_REG,
X86ISD::VTRUNCS, X86ISD::VMTRUNCS),
X86_INTRINSIC_DATA(avx512_mask_pmovs_db_256, TRUNCATE_TO_REG,
@@ -825,62 +732,62 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::VTRUNCUS, 0),
X86_INTRINSIC_DATA(avx512_mask_pmovus_wb_512, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNCUS, 0),
- X86_INTRINSIC_DATA(avx512_mask_range_pd_128, INTR_TYPE_3OP_MASK, X86ISD::VRANGE, 0),
- X86_INTRINSIC_DATA(avx512_mask_range_pd_256, INTR_TYPE_3OP_MASK, X86ISD::VRANGE, 0),
- X86_INTRINSIC_DATA(avx512_mask_range_pd_512, INTR_TYPE_3OP_MASK, X86ISD::VRANGE, X86ISD::VRANGE_RND),
- X86_INTRINSIC_DATA(avx512_mask_range_ps_128, INTR_TYPE_3OP_MASK, X86ISD::VRANGE, 0),
- X86_INTRINSIC_DATA(avx512_mask_range_ps_256, INTR_TYPE_3OP_MASK, X86ISD::VRANGE, 0),
- X86_INTRINSIC_DATA(avx512_mask_range_ps_512, INTR_TYPE_3OP_MASK, X86ISD::VRANGE, X86ISD::VRANGE_RND),
- X86_INTRINSIC_DATA(avx512_mask_range_sd, INTR_TYPE_SCALAR_MASK, X86ISD::VRANGES, X86ISD::VRANGES_RND),
- X86_INTRINSIC_DATA(avx512_mask_range_ss, INTR_TYPE_SCALAR_MASK, X86ISD::VRANGES, X86ISD::VRANGES_RND),
- X86_INTRINSIC_DATA(avx512_mask_reduce_pd_128, INTR_TYPE_2OP_MASK, X86ISD::VREDUCE, 0),
- X86_INTRINSIC_DATA(avx512_mask_reduce_pd_256, INTR_TYPE_2OP_MASK, X86ISD::VREDUCE, 0),
- X86_INTRINSIC_DATA(avx512_mask_reduce_pd_512, INTR_TYPE_2OP_MASK, X86ISD::VREDUCE, X86ISD::VREDUCE_RND),
- X86_INTRINSIC_DATA(avx512_mask_reduce_ps_128, INTR_TYPE_2OP_MASK, X86ISD::VREDUCE, 0),
- X86_INTRINSIC_DATA(avx512_mask_reduce_ps_256, INTR_TYPE_2OP_MASK, X86ISD::VREDUCE, 0),
- X86_INTRINSIC_DATA(avx512_mask_reduce_ps_512, INTR_TYPE_2OP_MASK, X86ISD::VREDUCE, X86ISD::VREDUCE_RND),
- X86_INTRINSIC_DATA(avx512_mask_reduce_sd, INTR_TYPE_SCALAR_MASK, X86ISD::VREDUCES, X86ISD::VREDUCES_RND),
- X86_INTRINSIC_DATA(avx512_mask_reduce_ss, INTR_TYPE_SCALAR_MASK, X86ISD::VREDUCES, X86ISD::VREDUCES_RND),
- X86_INTRINSIC_DATA(avx512_mask_rndscale_pd_128, INTR_TYPE_2OP_MASK, X86ISD::VRNDSCALE, 0),
- X86_INTRINSIC_DATA(avx512_mask_rndscale_pd_256, INTR_TYPE_2OP_MASK, X86ISD::VRNDSCALE, 0),
- X86_INTRINSIC_DATA(avx512_mask_rndscale_pd_512, INTR_TYPE_2OP_MASK, X86ISD::VRNDSCALE, X86ISD::VRNDSCALE_RND),
- X86_INTRINSIC_DATA(avx512_mask_rndscale_ps_128, INTR_TYPE_2OP_MASK, X86ISD::VRNDSCALE, 0),
- X86_INTRINSIC_DATA(avx512_mask_rndscale_ps_256, INTR_TYPE_2OP_MASK, X86ISD::VRNDSCALE, 0),
- X86_INTRINSIC_DATA(avx512_mask_rndscale_ps_512, INTR_TYPE_2OP_MASK, X86ISD::VRNDSCALE, X86ISD::VRNDSCALE_RND),
+ X86_INTRINSIC_DATA(avx512_mask_range_pd_128, INTR_TYPE_3OP_MASK_SAE, X86ISD::VRANGE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_range_pd_256, INTR_TYPE_3OP_MASK_SAE, X86ISD::VRANGE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_range_pd_512, INTR_TYPE_3OP_MASK_SAE, X86ISD::VRANGE, X86ISD::VRANGE_SAE),
+ X86_INTRINSIC_DATA(avx512_mask_range_ps_128, INTR_TYPE_3OP_MASK_SAE, X86ISD::VRANGE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_range_ps_256, INTR_TYPE_3OP_MASK_SAE, X86ISD::VRANGE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_range_ps_512, INTR_TYPE_3OP_MASK_SAE, X86ISD::VRANGE, X86ISD::VRANGE_SAE),
+ X86_INTRINSIC_DATA(avx512_mask_range_sd, INTR_TYPE_SCALAR_MASK, X86ISD::VRANGES, X86ISD::VRANGES_SAE),
+ X86_INTRINSIC_DATA(avx512_mask_range_ss, INTR_TYPE_SCALAR_MASK, X86ISD::VRANGES, X86ISD::VRANGES_SAE),
+ X86_INTRINSIC_DATA(avx512_mask_reduce_pd_128, INTR_TYPE_2OP_MASK_SAE, X86ISD::VREDUCE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_reduce_pd_256, INTR_TYPE_2OP_MASK_SAE, X86ISD::VREDUCE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_reduce_pd_512, INTR_TYPE_2OP_MASK_SAE, X86ISD::VREDUCE, X86ISD::VREDUCE_SAE),
+ X86_INTRINSIC_DATA(avx512_mask_reduce_ps_128, INTR_TYPE_2OP_MASK_SAE, X86ISD::VREDUCE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_reduce_ps_256, INTR_TYPE_2OP_MASK_SAE, X86ISD::VREDUCE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_reduce_ps_512, INTR_TYPE_2OP_MASK_SAE, X86ISD::VREDUCE, X86ISD::VREDUCE_SAE),
+ X86_INTRINSIC_DATA(avx512_mask_reduce_sd, INTR_TYPE_SCALAR_MASK, X86ISD::VREDUCES, X86ISD::VREDUCES_SAE),
+ X86_INTRINSIC_DATA(avx512_mask_reduce_ss, INTR_TYPE_SCALAR_MASK, X86ISD::VREDUCES, X86ISD::VREDUCES_SAE),
+ X86_INTRINSIC_DATA(avx512_mask_rndscale_pd_128, INTR_TYPE_2OP_MASK_SAE, X86ISD::VRNDSCALE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_rndscale_pd_256, INTR_TYPE_2OP_MASK_SAE, X86ISD::VRNDSCALE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_rndscale_pd_512, INTR_TYPE_2OP_MASK_SAE, X86ISD::VRNDSCALE, X86ISD::VRNDSCALE_SAE),
+ X86_INTRINSIC_DATA(avx512_mask_rndscale_ps_128, INTR_TYPE_2OP_MASK_SAE, X86ISD::VRNDSCALE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_rndscale_ps_256, INTR_TYPE_2OP_MASK_SAE, X86ISD::VRNDSCALE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_rndscale_ps_512, INTR_TYPE_2OP_MASK_SAE, X86ISD::VRNDSCALE, X86ISD::VRNDSCALE_SAE),
X86_INTRINSIC_DATA(avx512_mask_rndscale_sd, INTR_TYPE_SCALAR_MASK,
- X86ISD::VRNDSCALES, X86ISD::VRNDSCALES_RND),
+ X86ISD::VRNDSCALES, X86ISD::VRNDSCALES_SAE),
X86_INTRINSIC_DATA(avx512_mask_rndscale_ss, INTR_TYPE_SCALAR_MASK,
- X86ISD::VRNDSCALES, X86ISD::VRNDSCALES_RND),
- X86_INTRINSIC_DATA(avx512_mask_scalef_pd_128, INTR_TYPE_2OP_MASK_RM,
+ X86ISD::VRNDSCALES, X86ISD::VRNDSCALES_SAE),
+ X86_INTRINSIC_DATA(avx512_mask_scalef_pd_128, INTR_TYPE_2OP_MASK,
X86ISD::SCALEF, 0),
- X86_INTRINSIC_DATA(avx512_mask_scalef_pd_256, INTR_TYPE_2OP_MASK_RM,
+ X86_INTRINSIC_DATA(avx512_mask_scalef_pd_256, INTR_TYPE_2OP_MASK,
X86ISD::SCALEF, 0),
- X86_INTRINSIC_DATA(avx512_mask_scalef_pd_512, INTR_TYPE_2OP_MASK_RM,
+ X86_INTRINSIC_DATA(avx512_mask_scalef_pd_512, INTR_TYPE_2OP_MASK,
+ X86ISD::SCALEF, X86ISD::SCALEF_RND),
+ X86_INTRINSIC_DATA(avx512_mask_scalef_ps_128, INTR_TYPE_2OP_MASK,
X86ISD::SCALEF, 0),
- X86_INTRINSIC_DATA(avx512_mask_scalef_ps_128, INTR_TYPE_2OP_MASK_RM,
+ X86_INTRINSIC_DATA(avx512_mask_scalef_ps_256, INTR_TYPE_2OP_MASK,
X86ISD::SCALEF, 0),
- X86_INTRINSIC_DATA(avx512_mask_scalef_ps_256, INTR_TYPE_2OP_MASK_RM,
- X86ISD::SCALEF, 0),
- X86_INTRINSIC_DATA(avx512_mask_scalef_ps_512, INTR_TYPE_2OP_MASK_RM,
- X86ISD::SCALEF, 0),
- X86_INTRINSIC_DATA(avx512_mask_scalef_sd, INTR_TYPE_SCALAR_MASK_RM,
- X86ISD::SCALEFS, 0),
- X86_INTRINSIC_DATA(avx512_mask_scalef_ss, INTR_TYPE_SCALAR_MASK_RM,
- X86ISD::SCALEFS, 0),
- X86_INTRINSIC_DATA(avx512_mask_sqrt_sd, INTR_TYPE_SCALAR_MASK_RM,
- X86ISD::FSQRTS_RND, 0),
- X86_INTRINSIC_DATA(avx512_mask_sqrt_ss, INTR_TYPE_SCALAR_MASK_RM,
- X86ISD::FSQRTS_RND, 0),
- X86_INTRINSIC_DATA(avx512_mask_sub_sd_round, INTR_TYPE_SCALAR_MASK_RM,
- X86ISD::FSUBS_RND, 0),
- X86_INTRINSIC_DATA(avx512_mask_sub_ss_round, INTR_TYPE_SCALAR_MASK_RM,
- X86ISD::FSUBS_RND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scalef_ps_512, INTR_TYPE_2OP_MASK,
+ X86ISD::SCALEF, X86ISD::SCALEF_RND),
+ X86_INTRINSIC_DATA(avx512_mask_scalef_sd, INTR_TYPE_SCALAR_MASK,
+ X86ISD::SCALEFS, X86ISD::SCALEFS_RND),
+ X86_INTRINSIC_DATA(avx512_mask_scalef_ss, INTR_TYPE_SCALAR_MASK,
+ X86ISD::SCALEFS, X86ISD::SCALEFS_RND),
+ X86_INTRINSIC_DATA(avx512_mask_sqrt_sd, INTR_TYPE_SCALAR_MASK,
+ X86ISD::FSQRTS, X86ISD::FSQRTS_RND),
+ X86_INTRINSIC_DATA(avx512_mask_sqrt_ss, INTR_TYPE_SCALAR_MASK,
+ X86ISD::FSQRTS, X86ISD::FSQRTS_RND),
+ X86_INTRINSIC_DATA(avx512_mask_sub_sd_round, INTR_TYPE_SCALAR_MASK,
+ X86ISD::FSUBS, X86ISD::FSUBS_RND),
+ X86_INTRINSIC_DATA(avx512_mask_sub_ss_round, INTR_TYPE_SCALAR_MASK,
+ X86ISD::FSUBS, X86ISD::FSUBS_RND),
X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_128, INTR_TYPE_1OP_MASK,
X86ISD::CVTPH2PS, 0),
X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_256, INTR_TYPE_1OP_MASK,
X86ISD::CVTPH2PS, 0),
- X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_512, INTR_TYPE_1OP_MASK,
- X86ISD::CVTPH2PS, X86ISD::CVTPH2PS_RND),
+ X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_512, INTR_TYPE_1OP_MASK_SAE,
+ X86ISD::CVTPH2PS, X86ISD::CVTPH2PS_SAE),
X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_128, CVTPS2PH_MASK,
X86ISD::CVTPS2PH, X86ISD::MCVTPS2PH),
X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_256, CVTPS2PH_MASK,
@@ -893,28 +800,30 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_maskz_fixupimm_pd_256, FIXUPIMM_MASKZ,
X86ISD::VFIXUPIMM, 0),
X86_INTRINSIC_DATA(avx512_maskz_fixupimm_pd_512, FIXUPIMM_MASKZ,
- X86ISD::VFIXUPIMM, 0),
+ X86ISD::VFIXUPIMM, X86ISD::VFIXUPIMM_SAE),
X86_INTRINSIC_DATA(avx512_maskz_fixupimm_ps_128, FIXUPIMM_MASKZ,
X86ISD::VFIXUPIMM, 0),
X86_INTRINSIC_DATA(avx512_maskz_fixupimm_ps_256, FIXUPIMM_MASKZ,
X86ISD::VFIXUPIMM, 0),
X86_INTRINSIC_DATA(avx512_maskz_fixupimm_ps_512, FIXUPIMM_MASKZ,
- X86ISD::VFIXUPIMM, 0),
- X86_INTRINSIC_DATA(avx512_maskz_fixupimm_sd, FIXUPIMMS_MASKZ,
- X86ISD::VFIXUPIMMS, 0),
- X86_INTRINSIC_DATA(avx512_maskz_fixupimm_ss, FIXUPIMMS_MASKZ,
- X86ISD::VFIXUPIMMS, 0),
+ X86ISD::VFIXUPIMM, X86ISD::VFIXUPIMM_SAE),
+ X86_INTRINSIC_DATA(avx512_maskz_fixupimm_sd, FIXUPIMM_MASKZ,
+ X86ISD::VFIXUPIMMS, X86ISD::VFIXUPIMMS_SAE),
+ X86_INTRINSIC_DATA(avx512_maskz_fixupimm_ss, FIXUPIMM_MASKZ,
+ X86ISD::VFIXUPIMMS, X86ISD::VFIXUPIMMS_SAE),
- X86_INTRINSIC_DATA(avx512_max_pd_512, INTR_TYPE_2OP, X86ISD::FMAX, X86ISD::FMAX_RND),
- X86_INTRINSIC_DATA(avx512_max_ps_512, INTR_TYPE_2OP, X86ISD::FMAX, X86ISD::FMAX_RND),
- X86_INTRINSIC_DATA(avx512_min_pd_512, INTR_TYPE_2OP, X86ISD::FMIN, X86ISD::FMIN_RND),
- X86_INTRINSIC_DATA(avx512_min_ps_512, INTR_TYPE_2OP, X86ISD::FMIN, X86ISD::FMIN_RND),
+ X86_INTRINSIC_DATA(avx512_max_pd_512, INTR_TYPE_2OP_SAE, X86ISD::FMAX, X86ISD::FMAX_SAE),
+ X86_INTRINSIC_DATA(avx512_max_ps_512, INTR_TYPE_2OP_SAE, X86ISD::FMAX, X86ISD::FMAX_SAE),
+ X86_INTRINSIC_DATA(avx512_min_pd_512, INTR_TYPE_2OP_SAE, X86ISD::FMIN, X86ISD::FMIN_SAE),
+ X86_INTRINSIC_DATA(avx512_min_ps_512, INTR_TYPE_2OP_SAE, X86ISD::FMIN, X86ISD::FMIN_SAE),
X86_INTRINSIC_DATA(avx512_mul_pd_512, INTR_TYPE_2OP, ISD::FMUL, X86ISD::FMUL_RND),
X86_INTRINSIC_DATA(avx512_mul_ps_512, INTR_TYPE_2OP, ISD::FMUL, X86ISD::FMUL_RND),
X86_INTRINSIC_DATA(avx512_packssdw_512, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(avx512_packsswb_512, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(avx512_packusdw_512, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
X86_INTRINSIC_DATA(avx512_packuswb_512, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
+ X86_INTRINSIC_DATA(avx512_pavg_b_512, INTR_TYPE_2OP, X86ISD::AVG, 0),
+ X86_INTRINSIC_DATA(avx512_pavg_w_512, INTR_TYPE_2OP, X86ISD::AVG, 0),
X86_INTRINSIC_DATA(avx512_permvar_df_256, VPERM_2OP, X86ISD::VPERMV, 0),
X86_INTRINSIC_DATA(avx512_permvar_df_512, VPERM_2OP, X86ISD::VPERMV, 0),
X86_INTRINSIC_DATA(avx512_permvar_di_256, VPERM_2OP, X86ISD::VPERMV, 0),
@@ -943,11 +852,11 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_pslli_d_512, VSHIFT, X86ISD::VSHLI, 0),
X86_INTRINSIC_DATA(avx512_pslli_q_512, VSHIFT, X86ISD::VSHLI, 0),
X86_INTRINSIC_DATA(avx512_pslli_w_512, VSHIFT, X86ISD::VSHLI, 0),
- X86_INTRINSIC_DATA(avx512_psllv_d_512, INTR_TYPE_2OP, ISD::SHL, 0),
- X86_INTRINSIC_DATA(avx512_psllv_q_512, INTR_TYPE_2OP, ISD::SHL, 0),
- X86_INTRINSIC_DATA(avx512_psllv_w_128, INTR_TYPE_2OP, ISD::SHL, 0),
- X86_INTRINSIC_DATA(avx512_psllv_w_256, INTR_TYPE_2OP, ISD::SHL, 0),
- X86_INTRINSIC_DATA(avx512_psllv_w_512, INTR_TYPE_2OP, ISD::SHL, 0),
+ X86_INTRINSIC_DATA(avx512_psllv_d_512, INTR_TYPE_2OP, X86ISD::VSHLV, 0),
+ X86_INTRINSIC_DATA(avx512_psllv_q_512, INTR_TYPE_2OP, X86ISD::VSHLV, 0),
+ X86_INTRINSIC_DATA(avx512_psllv_w_128, INTR_TYPE_2OP, X86ISD::VSHLV, 0),
+ X86_INTRINSIC_DATA(avx512_psllv_w_256, INTR_TYPE_2OP, X86ISD::VSHLV, 0),
+ X86_INTRINSIC_DATA(avx512_psllv_w_512, INTR_TYPE_2OP, X86ISD::VSHLV, 0),
X86_INTRINSIC_DATA(avx512_psra_d_512, INTR_TYPE_2OP, X86ISD::VSRA, 0),
X86_INTRINSIC_DATA(avx512_psra_q_128, INTR_TYPE_2OP, X86ISD::VSRA, 0),
X86_INTRINSIC_DATA(avx512_psra_q_256, INTR_TYPE_2OP, X86ISD::VSRA, 0),
@@ -971,11 +880,11 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_psrli_d_512, VSHIFT, X86ISD::VSRLI, 0),
X86_INTRINSIC_DATA(avx512_psrli_q_512, VSHIFT, X86ISD::VSRLI, 0),
X86_INTRINSIC_DATA(avx512_psrli_w_512, VSHIFT, X86ISD::VSRLI, 0),
- X86_INTRINSIC_DATA(avx512_psrlv_d_512, INTR_TYPE_2OP, ISD::SRL, 0),
- X86_INTRINSIC_DATA(avx512_psrlv_q_512, INTR_TYPE_2OP, ISD::SRL, 0),
- X86_INTRINSIC_DATA(avx512_psrlv_w_128, INTR_TYPE_2OP, ISD::SRL, 0),
- X86_INTRINSIC_DATA(avx512_psrlv_w_256, INTR_TYPE_2OP, ISD::SRL, 0),
- X86_INTRINSIC_DATA(avx512_psrlv_w_512, INTR_TYPE_2OP, ISD::SRL, 0),
+ X86_INTRINSIC_DATA(avx512_psrlv_d_512, INTR_TYPE_2OP, X86ISD::VSRLV, 0),
+ X86_INTRINSIC_DATA(avx512_psrlv_q_512, INTR_TYPE_2OP, X86ISD::VSRLV, 0),
+ X86_INTRINSIC_DATA(avx512_psrlv_w_128, INTR_TYPE_2OP, X86ISD::VSRLV, 0),
+ X86_INTRINSIC_DATA(avx512_psrlv_w_256, INTR_TYPE_2OP, X86ISD::VSRLV, 0),
+ X86_INTRINSIC_DATA(avx512_psrlv_w_512, INTR_TYPE_2OP, X86ISD::VSRLV, 0),
X86_INTRINSIC_DATA(avx512_pternlog_d_128, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0),
X86_INTRINSIC_DATA(avx512_pternlog_d_256, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0),
X86_INTRINSIC_DATA(avx512_pternlog_d_512, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0),
@@ -990,10 +899,10 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_rcp14_ps_512, INTR_TYPE_1OP_MASK, X86ISD::RCP14, 0),
X86_INTRINSIC_DATA(avx512_rcp14_sd, INTR_TYPE_SCALAR_MASK, X86ISD::RCP14S, 0),
X86_INTRINSIC_DATA(avx512_rcp14_ss, INTR_TYPE_SCALAR_MASK, X86ISD::RCP14S, 0),
- X86_INTRINSIC_DATA(avx512_rcp28_pd, INTR_TYPE_1OP_MASK_RM, X86ISD::RCP28, 0),
- X86_INTRINSIC_DATA(avx512_rcp28_ps, INTR_TYPE_1OP_MASK_RM, X86ISD::RCP28, 0),
- X86_INTRINSIC_DATA(avx512_rcp28_sd, INTR_TYPE_SCALAR_MASK_RM, X86ISD::RCP28S, 0),
- X86_INTRINSIC_DATA(avx512_rcp28_ss, INTR_TYPE_SCALAR_MASK_RM, X86ISD::RCP28S, 0),
+ X86_INTRINSIC_DATA(avx512_rcp28_pd, INTR_TYPE_1OP_MASK_SAE, X86ISD::RCP28, X86ISD::RCP28_SAE),
+ X86_INTRINSIC_DATA(avx512_rcp28_ps, INTR_TYPE_1OP_MASK_SAE, X86ISD::RCP28, X86ISD::RCP28_SAE),
+ X86_INTRINSIC_DATA(avx512_rcp28_sd, INTR_TYPE_SCALAR_MASK_SAE, X86ISD::RCP28S, X86ISD::RCP28S_SAE),
+ X86_INTRINSIC_DATA(avx512_rcp28_ss, INTR_TYPE_SCALAR_MASK_SAE, X86ISD::RCP28S, X86ISD::RCP28S_SAE),
X86_INTRINSIC_DATA(avx512_rsqrt14_pd_128, INTR_TYPE_1OP_MASK, X86ISD::RSQRT14, 0),
X86_INTRINSIC_DATA(avx512_rsqrt14_pd_256, INTR_TYPE_1OP_MASK, X86ISD::RSQRT14, 0),
X86_INTRINSIC_DATA(avx512_rsqrt14_pd_512, INTR_TYPE_1OP_MASK, X86ISD::RSQRT14, 0),
@@ -1002,14 +911,16 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_rsqrt14_ps_512, INTR_TYPE_1OP_MASK, X86ISD::RSQRT14, 0),
X86_INTRINSIC_DATA(avx512_rsqrt14_sd, INTR_TYPE_SCALAR_MASK, X86ISD::RSQRT14S, 0),
X86_INTRINSIC_DATA(avx512_rsqrt14_ss, INTR_TYPE_SCALAR_MASK, X86ISD::RSQRT14S, 0),
- X86_INTRINSIC_DATA(avx512_rsqrt28_pd, INTR_TYPE_1OP_MASK_RM,X86ISD::RSQRT28, 0),
- X86_INTRINSIC_DATA(avx512_rsqrt28_ps, INTR_TYPE_1OP_MASK_RM,X86ISD::RSQRT28, 0),
- X86_INTRINSIC_DATA(avx512_rsqrt28_sd, INTR_TYPE_SCALAR_MASK_RM,X86ISD::RSQRT28S, 0),
- X86_INTRINSIC_DATA(avx512_rsqrt28_ss, INTR_TYPE_SCALAR_MASK_RM,X86ISD::RSQRT28S, 0),
+ X86_INTRINSIC_DATA(avx512_rsqrt28_pd, INTR_TYPE_1OP_MASK_SAE,X86ISD::RSQRT28, X86ISD::RSQRT28_SAE),
+ X86_INTRINSIC_DATA(avx512_rsqrt28_ps, INTR_TYPE_1OP_MASK_SAE,X86ISD::RSQRT28, X86ISD::RSQRT28_SAE),
+ X86_INTRINSIC_DATA(avx512_rsqrt28_sd, INTR_TYPE_SCALAR_MASK_SAE,X86ISD::RSQRT28S, X86ISD::RSQRT28S_SAE),
+ X86_INTRINSIC_DATA(avx512_rsqrt28_ss, INTR_TYPE_SCALAR_MASK_SAE,X86ISD::RSQRT28S, X86ISD::RSQRT28S_SAE),
+ X86_INTRINSIC_DATA(avx512_sitofp_round, INTR_TYPE_1OP, ISD::SINT_TO_FP, X86ISD::SINT_TO_FP_RND),
X86_INTRINSIC_DATA(avx512_sqrt_pd_512, INTR_TYPE_1OP, ISD::FSQRT, X86ISD::FSQRT_RND),
X86_INTRINSIC_DATA(avx512_sqrt_ps_512, INTR_TYPE_1OP, ISD::FSQRT, X86ISD::FSQRT_RND),
X86_INTRINSIC_DATA(avx512_sub_pd_512, INTR_TYPE_2OP, ISD::FSUB, X86ISD::FSUB_RND),
X86_INTRINSIC_DATA(avx512_sub_ps_512, INTR_TYPE_2OP, ISD::FSUB, X86ISD::FSUB_RND),
+ X86_INTRINSIC_DATA(avx512_uitofp_round, INTR_TYPE_1OP, ISD::UINT_TO_FP, X86ISD::UINT_TO_FP_RND),
X86_INTRINSIC_DATA(avx512_vcomi_sd, COMI_RM, X86ISD::COMI, X86ISD::UCOMI),
X86_INTRINSIC_DATA(avx512_vcomi_ss, COMI_RM, X86ISD::COMI, X86ISD::UCOMI),
X86_INTRINSIC_DATA(avx512_vcvtsd2si32, INTR_TYPE_1OP, X86ISD::CVTS2SI, X86ISD::CVTS2SI_RND),
@@ -1071,6 +982,16 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_vpshufbitqmb_128, INTR_TYPE_2OP, X86ISD::VPSHUFBITQMB, 0),
X86_INTRINSIC_DATA(avx512_vpshufbitqmb_256, INTR_TYPE_2OP, X86ISD::VPSHUFBITQMB, 0),
X86_INTRINSIC_DATA(avx512_vpshufbitqmb_512, INTR_TYPE_2OP, X86ISD::VPSHUFBITQMB, 0),
+ // bfloat16
+ X86_INTRINSIC_DATA(avx512bf16_cvtne2ps2bf16_128, INTR_TYPE_2OP, X86ISD::CVTNE2PS2BF16, 0),
+ X86_INTRINSIC_DATA(avx512bf16_cvtne2ps2bf16_256, INTR_TYPE_2OP, X86ISD::CVTNE2PS2BF16, 0),
+ X86_INTRINSIC_DATA(avx512bf16_cvtne2ps2bf16_512, INTR_TYPE_2OP, X86ISD::CVTNE2PS2BF16, 0),
+ X86_INTRINSIC_DATA(avx512bf16_cvtneps2bf16_256, INTR_TYPE_1OP, X86ISD::CVTNEPS2BF16, 0),
+ X86_INTRINSIC_DATA(avx512bf16_cvtneps2bf16_512, INTR_TYPE_1OP, X86ISD::CVTNEPS2BF16, 0),
+ X86_INTRINSIC_DATA(avx512bf16_dpbf16ps_128, INTR_TYPE_3OP, X86ISD::DPBF16PS, 0),
+ X86_INTRINSIC_DATA(avx512bf16_dpbf16ps_256, INTR_TYPE_3OP, X86ISD::DPBF16PS, 0),
+ X86_INTRINSIC_DATA(avx512bf16_dpbf16ps_512, INTR_TYPE_3OP, X86ISD::DPBF16PS, 0),
+ X86_INTRINSIC_DATA(avx512bf16_mask_cvtneps2bf16_128, CVTNEPS2BF16_MASK, X86ISD::CVTNEPS2BF16, X86ISD::MCVTNEPS2BF16),
X86_INTRINSIC_DATA(bmi_bextr_32, INTR_TYPE_2OP, X86ISD::BEXTR, 0),
X86_INTRINSIC_DATA(bmi_bextr_64, INTR_TYPE_2OP, X86ISD::BEXTR, 0),
X86_INTRINSIC_DATA(bmi_bzhi_32, INTR_TYPE_2OP, X86ISD::BZHI, 0),
@@ -1111,6 +1032,7 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(sse2_cvtps2dq, INTR_TYPE_1OP, X86ISD::CVTP2SI, 0),
X86_INTRINSIC_DATA(sse2_cvtsd2si, INTR_TYPE_1OP, X86ISD::CVTS2SI, 0),
X86_INTRINSIC_DATA(sse2_cvtsd2si64, INTR_TYPE_1OP, X86ISD::CVTS2SI, 0),
+ X86_INTRINSIC_DATA(sse2_cvtsd2ss, INTR_TYPE_2OP, X86ISD::VFPROUNDS, 0),
X86_INTRINSIC_DATA(sse2_cvttpd2dq, INTR_TYPE_1OP, X86ISD::CVTTP2SI, 0),
X86_INTRINSIC_DATA(sse2_cvttps2dq, INTR_TYPE_1OP, X86ISD::CVTTP2SI, 0),
X86_INTRINSIC_DATA(sse2_cvttsd2si, INTR_TYPE_1OP, X86ISD::CVTTS2SI, 0),
@@ -1123,6 +1045,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(sse2_packssdw_128, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(sse2_packsswb_128, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(sse2_packuswb_128, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
+ X86_INTRINSIC_DATA(sse2_pavg_b, INTR_TYPE_2OP, X86ISD::AVG, 0),
+ X86_INTRINSIC_DATA(sse2_pavg_w, INTR_TYPE_2OP, X86ISD::AVG, 0),
X86_INTRINSIC_DATA(sse2_pmadd_wd, INTR_TYPE_2OP, X86ISD::VPMADDWD, 0),
X86_INTRINSIC_DATA(sse2_pmovmskb_128, INTR_TYPE_1OP, X86ISD::MOVMSK, 0),
X86_INTRINSIC_DATA(sse2_pmulh_w, INTR_TYPE_2OP, ISD::MULHS, 0),
@@ -1156,8 +1080,11 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(sse3_hadd_ps, INTR_TYPE_2OP, X86ISD::FHADD, 0),
X86_INTRINSIC_DATA(sse3_hsub_pd, INTR_TYPE_2OP, X86ISD::FHSUB, 0),
X86_INTRINSIC_DATA(sse3_hsub_ps, INTR_TYPE_2OP, X86ISD::FHSUB, 0),
+ X86_INTRINSIC_DATA(sse41_blendvpd, BLENDV, X86ISD::BLENDV, 0),
+ X86_INTRINSIC_DATA(sse41_blendvps, BLENDV, X86ISD::BLENDV, 0),
X86_INTRINSIC_DATA(sse41_insertps, INTR_TYPE_3OP, X86ISD::INSERTPS, 0),
X86_INTRINSIC_DATA(sse41_packusdw, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
+ X86_INTRINSIC_DATA(sse41_pblendvb, BLENDV, X86ISD::BLENDV, 0),
X86_INTRINSIC_DATA(sse41_phminposuw, INTR_TYPE_1OP, X86ISD::PHMINPOS, 0),
X86_INTRINSIC_DATA(sse41_round_pd, ROUNDP, X86ISD::VRNDSCALE, 0),
X86_INTRINSIC_DATA(sse41_round_ps, ROUNDP, X86ISD::VRNDSCALE, 0),
@@ -1200,14 +1127,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(vgf2p8mulb_512, INTR_TYPE_2OP,
X86ISD::GF2P8MULB, 0),
- X86_INTRINSIC_DATA(xop_vpcomb, INTR_TYPE_3OP, X86ISD::VPCOM, 0),
- X86_INTRINSIC_DATA(xop_vpcomd, INTR_TYPE_3OP, X86ISD::VPCOM, 0),
- X86_INTRINSIC_DATA(xop_vpcomq, INTR_TYPE_3OP, X86ISD::VPCOM, 0),
- X86_INTRINSIC_DATA(xop_vpcomub, INTR_TYPE_3OP, X86ISD::VPCOMU, 0),
- X86_INTRINSIC_DATA(xop_vpcomud, INTR_TYPE_3OP, X86ISD::VPCOMU, 0),
- X86_INTRINSIC_DATA(xop_vpcomuq, INTR_TYPE_3OP, X86ISD::VPCOMU, 0),
- X86_INTRINSIC_DATA(xop_vpcomuw, INTR_TYPE_3OP, X86ISD::VPCOMU, 0),
- X86_INTRINSIC_DATA(xop_vpcomw, INTR_TYPE_3OP, X86ISD::VPCOM, 0),
X86_INTRINSIC_DATA(xop_vpermil2pd, INTR_TYPE_4OP, X86ISD::VPERMIL2, 0),
X86_INTRINSIC_DATA(xop_vpermil2pd_256, INTR_TYPE_4OP, X86ISD::VPERMIL2, 0),
X86_INTRINSIC_DATA(xop_vpermil2ps, INTR_TYPE_4OP, X86ISD::VPERMIL2, 0),
diff --git a/lib/Target/X86/X86LegalizerInfo.cpp b/lib/Target/X86/X86LegalizerInfo.cpp
index 4a49fa68dd06..00fb1b573858 100644
--- a/lib/Target/X86/X86LegalizerInfo.cpp
+++ b/lib/Target/X86/X86LegalizerInfo.cpp
@@ -1,9 +1,8 @@
//===- X86LegalizerInfo.cpp --------------------------------------*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -134,9 +133,15 @@ void X86LegalizerInfo::setLegalizerInfo32bit() {
// Shifts and SDIV
getActionDefinitionsBuilder(
- {G_SHL, G_LSHR, G_ASHR, G_SDIV, G_SREM, G_UDIV, G_UREM})
- .legalFor({s8, s16, s32})
- .clampScalar(0, s8, s32);
+ {G_SDIV, G_SREM, G_UDIV, G_UREM})
+ .legalFor({s8, s16, s32})
+ .clampScalar(0, s8, s32);
+
+ getActionDefinitionsBuilder(
+ {G_SHL, G_LSHR, G_ASHR})
+ .legalFor({{s8, s8}, {s16, s8}, {s32, s8}})
+ .clampScalar(0, s8, s32)
+ .clampScalar(1, s8, s8);
}
// Control-flow
@@ -236,12 +241,19 @@ void X86LegalizerInfo::setLegalizerInfo64bit() {
.clampScalar(1, s32, s64)
.widenScalarToNextPow2(1);
- // Shifts and SDIV
+ // Divisions
getActionDefinitionsBuilder(
- {G_SHL, G_LSHR, G_ASHR, G_SDIV, G_SREM, G_UDIV, G_UREM})
+ {G_SDIV, G_SREM, G_UDIV, G_UREM})
.legalFor({s8, s16, s32, s64})
.clampScalar(0, s8, s64);
+ // Shifts
+ getActionDefinitionsBuilder(
+ {G_SHL, G_LSHR, G_ASHR})
+ .legalFor({{s8, s8}, {s16, s8}, {s32, s8}, {s64, s8}})
+ .clampScalar(0, s8, s64)
+ .clampScalar(1, s8, s8);
+
// Merge/Unmerge
setAction({G_MERGE_VALUES, s128}, Legal);
setAction({G_UNMERGE_VALUES, 1, s128}, Legal);
diff --git a/lib/Target/X86/X86LegalizerInfo.h b/lib/Target/X86/X86LegalizerInfo.h
index 135950a95f84..d21707b9ab9b 100644
--- a/lib/Target/X86/X86LegalizerInfo.h
+++ b/lib/Target/X86/X86LegalizerInfo.h
@@ -1,10 +1,9 @@
//===- X86LegalizerInfo.h ------------------------------------------*- C++
//-*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index 2816f8c62bfb..b1fefaa84be4 100644
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -1,9 +1,8 @@
//===-- X86MCInstLower.cpp - Convert X86 MachineInstr to an MCInst --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -12,9 +11,9 @@
//
//===----------------------------------------------------------------------===//
-#include "InstPrinter/X86ATTInstPrinter.h"
-#include "InstPrinter/X86InstComments.h"
+#include "MCTargetDesc/X86ATTInstPrinter.h"
#include "MCTargetDesc/X86BaseInfo.h"
+#include "MCTargetDesc/X86InstComments.h"
#include "MCTargetDesc/X86TargetStreamer.h"
#include "Utils/X86ShuffleDecode.h"
#include "X86AsmPrinter.h"
@@ -101,9 +100,7 @@ void X86AsmPrinter::StackMapShadowTracker::emitShadowPadding(
}
void X86AsmPrinter::EmitAndCountInstruction(MCInst &Inst) {
- OutStreamer->EmitInstruction(Inst, getSubtargetInfo(),
- EnablePrintSchedInfo &&
- !(Inst.getFlags() & X86::NO_SCHED_INFO));
+ OutStreamer->EmitInstruction(Inst, getSubtargetInfo());
SMShadowTracker.count(Inst, getSubtargetInfo(), CodeEmitter.get());
}
@@ -438,7 +435,6 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
OutMI.addOperand(MaybeMCOp.getValue());
// Handle a few special cases to eliminate operand modifiers.
-ReSimplify:
switch (OutMI.getOpcode()) {
case X86::LEA64_32r:
case X86::LEA64r:
@@ -554,11 +550,6 @@ ReSimplify:
case X86::TAILJMPd64:
Opcode = X86::JMP_1;
goto SetTailJmpOpcode;
- case X86::TAILJMPd_CC:
- case X86::TAILJMPd64_CC:
- Opcode = X86::GetCondBranchFromCond(
- static_cast<X86::CondCode>(MI->getOperand(1).getImm()));
- goto SetTailJmpOpcode;
SetTailJmpOpcode:
MCOperand Saved = OutMI.getOperand(0);
@@ -568,6 +559,17 @@ ReSimplify:
break;
}
+ case X86::TAILJMPd_CC:
+ case X86::TAILJMPd64_CC: {
+ MCOperand Saved = OutMI.getOperand(0);
+ MCOperand Saved2 = OutMI.getOperand(1);
+ OutMI = MCInst();
+ OutMI.setOpcode(X86::JCC_1);
+ OutMI.addOperand(Saved);
+ OutMI.addOperand(Saved2);
+ break;
+ }
+
case X86::DEC16r:
case X86::DEC32r:
case X86::INC16r:
@@ -586,19 +588,6 @@ ReSimplify:
}
break;
- // These are pseudo-ops for OR to help with the OR->ADD transformation. We do
- // this with an ugly goto in case the resultant OR uses EAX and needs the
- // short form.
- case X86::ADD16rr_DB: OutMI.setOpcode(X86::OR16rr); goto ReSimplify;
- case X86::ADD32rr_DB: OutMI.setOpcode(X86::OR32rr); goto ReSimplify;
- case X86::ADD64rr_DB: OutMI.setOpcode(X86::OR64rr); goto ReSimplify;
- case X86::ADD16ri_DB: OutMI.setOpcode(X86::OR16ri); goto ReSimplify;
- case X86::ADD32ri_DB: OutMI.setOpcode(X86::OR32ri); goto ReSimplify;
- case X86::ADD64ri32_DB: OutMI.setOpcode(X86::OR64ri32); goto ReSimplify;
- case X86::ADD16ri8_DB: OutMI.setOpcode(X86::OR16ri8); goto ReSimplify;
- case X86::ADD32ri8_DB: OutMI.setOpcode(X86::OR32ri8); goto ReSimplify;
- case X86::ADD64ri8_DB: OutMI.setOpcode(X86::OR64ri8); goto ReSimplify;
-
// We don't currently select the correct instruction form for instructions
// which have a short %eax, etc. form. Handle this by custom lowering, for
// now.
@@ -694,16 +683,9 @@ ReSimplify:
void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering,
const MachineInstr &MI) {
-
- bool is64Bits = MI.getOpcode() == X86::TLS_addr64 ||
+ bool Is64Bits = MI.getOpcode() == X86::TLS_addr64 ||
MI.getOpcode() == X86::TLS_base_addr64;
-
- bool needsPadding = MI.getOpcode() == X86::TLS_addr64;
-
- MCContext &context = OutStreamer->getContext();
-
- if (needsPadding)
- EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
+ MCContext &Ctx = OutStreamer->getContext();
MCSymbolRefExpr::VariantKind SRVK;
switch (MI.getOpcode()) {
@@ -721,51 +703,86 @@ void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering,
llvm_unreachable("unexpected opcode");
}
- MCSymbol *sym = MCInstLowering.GetSymbolFromOperand(MI.getOperand(3));
- const MCSymbolRefExpr *symRef = MCSymbolRefExpr::create(sym, SRVK, context);
-
- MCInst LEA;
- if (is64Bits) {
- LEA.setOpcode(X86::LEA64r);
- LEA.addOperand(MCOperand::createReg(X86::RDI)); // dest
- LEA.addOperand(MCOperand::createReg(X86::RIP)); // base
- LEA.addOperand(MCOperand::createImm(1)); // scale
- LEA.addOperand(MCOperand::createReg(0)); // index
- LEA.addOperand(MCOperand::createExpr(symRef)); // disp
- LEA.addOperand(MCOperand::createReg(0)); // seg
- } else if (SRVK == MCSymbolRefExpr::VK_TLSLDM) {
- LEA.setOpcode(X86::LEA32r);
- LEA.addOperand(MCOperand::createReg(X86::EAX)); // dest
- LEA.addOperand(MCOperand::createReg(X86::EBX)); // base
- LEA.addOperand(MCOperand::createImm(1)); // scale
- LEA.addOperand(MCOperand::createReg(0)); // index
- LEA.addOperand(MCOperand::createExpr(symRef)); // disp
- LEA.addOperand(MCOperand::createReg(0)); // seg
+ const MCSymbolRefExpr *Sym = MCSymbolRefExpr::create(
+ MCInstLowering.GetSymbolFromOperand(MI.getOperand(3)), SRVK, Ctx);
+
+ // As of binutils 2.32, ld has a bogus TLS relaxation error when the GD/LD
+ // code sequence using R_X86_64_GOTPCREL (instead of R_X86_64_GOTPCRELX) is
+ // attempted to be relaxed to IE/LE (binutils PR24784). Work around the bug by
+ // only using GOT when GOTPCRELX is enabled.
+ // TODO Delete the workaround when GOTPCRELX becomes commonplace.
+ bool UseGot = MMI->getModule()->getRtLibUseGOT() &&
+ Ctx.getAsmInfo()->canRelaxRelocations();
+
+ if (Is64Bits) {
+ bool NeedsPadding = SRVK == MCSymbolRefExpr::VK_TLSGD;
+ if (NeedsPadding)
+ EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
+ EmitAndCountInstruction(MCInstBuilder(X86::LEA64r)
+ .addReg(X86::RDI)
+ .addReg(X86::RIP)
+ .addImm(1)
+ .addReg(0)
+ .addExpr(Sym)
+ .addReg(0));
+ const MCSymbol *TlsGetAddr = Ctx.getOrCreateSymbol("__tls_get_addr");
+ if (NeedsPadding) {
+ if (!UseGot)
+ EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
+ EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
+ EmitAndCountInstruction(MCInstBuilder(X86::REX64_PREFIX));
+ }
+ if (UseGot) {
+ const MCExpr *Expr = MCSymbolRefExpr::create(
+ TlsGetAddr, MCSymbolRefExpr::VK_GOTPCREL, Ctx);
+ EmitAndCountInstruction(MCInstBuilder(X86::CALL64m)
+ .addReg(X86::RIP)
+ .addImm(1)
+ .addReg(0)
+ .addExpr(Expr)
+ .addReg(0));
+ } else {
+ EmitAndCountInstruction(
+ MCInstBuilder(X86::CALL64pcrel32)
+ .addExpr(MCSymbolRefExpr::create(TlsGetAddr,
+ MCSymbolRefExpr::VK_PLT, Ctx)));
+ }
} else {
- LEA.setOpcode(X86::LEA32r);
- LEA.addOperand(MCOperand::createReg(X86::EAX)); // dest
- LEA.addOperand(MCOperand::createReg(0)); // base
- LEA.addOperand(MCOperand::createImm(1)); // scale
- LEA.addOperand(MCOperand::createReg(X86::EBX)); // index
- LEA.addOperand(MCOperand::createExpr(symRef)); // disp
- LEA.addOperand(MCOperand::createReg(0)); // seg
- }
- EmitAndCountInstruction(LEA);
+ if (SRVK == MCSymbolRefExpr::VK_TLSGD && !UseGot) {
+ EmitAndCountInstruction(MCInstBuilder(X86::LEA32r)
+ .addReg(X86::EAX)
+ .addReg(0)
+ .addImm(1)
+ .addReg(X86::EBX)
+ .addExpr(Sym)
+ .addReg(0));
+ } else {
+ EmitAndCountInstruction(MCInstBuilder(X86::LEA32r)
+ .addReg(X86::EAX)
+ .addReg(X86::EBX)
+ .addImm(1)
+ .addReg(0)
+ .addExpr(Sym)
+ .addReg(0));
+ }
- if (needsPadding) {
- EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
- EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
- EmitAndCountInstruction(MCInstBuilder(X86::REX64_PREFIX));
+ const MCSymbol *TlsGetAddr = Ctx.getOrCreateSymbol("___tls_get_addr");
+ if (UseGot) {
+ const MCExpr *Expr =
+ MCSymbolRefExpr::create(TlsGetAddr, MCSymbolRefExpr::VK_GOT, Ctx);
+ EmitAndCountInstruction(MCInstBuilder(X86::CALL32m)
+ .addReg(X86::EBX)
+ .addImm(1)
+ .addReg(0)
+ .addExpr(Expr)
+ .addReg(0));
+ } else {
+ EmitAndCountInstruction(
+ MCInstBuilder(X86::CALLpcrel32)
+ .addExpr(MCSymbolRefExpr::create(TlsGetAddr,
+ MCSymbolRefExpr::VK_PLT, Ctx)));
+ }
}
-
- StringRef name = is64Bits ? "__tls_get_addr" : "___tls_get_addr";
- MCSymbol *tlsGetAddr = context.getOrCreateSymbol(name);
- const MCSymbolRefExpr *tlsRef =
- MCSymbolRefExpr::create(tlsGetAddr, MCSymbolRefExpr::VK_PLT, context);
-
- EmitAndCountInstruction(
- MCInstBuilder(is64Bits ? X86::CALL64pcrel32 : X86::CALLpcrel32)
- .addExpr(tlsRef));
}
/// Emit the largest nop instruction smaller than or equal to \p NumBytes
@@ -778,7 +795,7 @@ static unsigned EmitNop(MCStreamer &OS, unsigned NumBytes, bool Is64Bit,
unsigned NopSize;
unsigned Opc, BaseReg, ScaleVal, IndexReg, Displacement, SegmentReg;
- Opc = IndexReg = Displacement = SegmentReg = 0;
+ IndexReg = Displacement = SegmentReg = 0;
BaseReg = X86::RAX;
ScaleVal = 1;
switch (NumBytes) {
@@ -963,6 +980,7 @@ void X86AsmPrinter::LowerFAULTING_OP(const MachineInstr &FaultingMI,
if (auto MaybeOperand = MCIL.LowerMachineOperand(&FaultingMI, *I))
MI.addOperand(MaybeOperand.getValue());
+ OutStreamer->AddComment("on-fault: " + HandlerLabel->getName());
OutStreamer->EmitInstruction(MI, getSubtargetInfo());
}
@@ -1374,7 +1392,8 @@ PrevCrossBBInst(MachineBasicBlock::const_iterator MBBI) {
MBB = MBB->getPrevNode();
MBBI = MBB->end();
}
- return --MBBI;
+ --MBBI;
+ return MBBI;
}
static const Constant *getConstantFromPool(const MachineInstr &MI,
@@ -1668,6 +1687,77 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
case X86::TLS_base_addr64:
return LowerTlsAddr(MCInstLowering, *MI);
+ // Loading/storing mask pairs requires two kmov operations. The second one of these
+ // needs a 2 byte displacement relative to the specified address (with 32 bit spill
+ // size). The pairs of 1bit masks up to 16 bit masks all use the same spill size,
+ // they all are stored using MASKPAIR16STORE, loaded using MASKPAIR16LOAD.
+ //
+ // The displacement value might wrap around in theory, thus the asserts in both
+ // cases.
+ case X86::MASKPAIR16LOAD: {
+ int64_t Disp = MI->getOperand(1 + X86::AddrDisp).getImm();
+ assert(Disp >= 0 && Disp <= INT32_MAX - 2 && "Unexpected displacement");
+ const X86RegisterInfo *RI =
+ MF->getSubtarget<X86Subtarget>().getRegisterInfo();
+ unsigned Reg = MI->getOperand(0).getReg();
+ unsigned Reg0 = RI->getSubReg(Reg, X86::sub_mask_0);
+ unsigned Reg1 = RI->getSubReg(Reg, X86::sub_mask_1);
+
+ // Load the first mask register
+ MCInstBuilder MIB = MCInstBuilder(X86::KMOVWkm);
+ MIB.addReg(Reg0);
+ for (int i = 0; i < X86::AddrNumOperands; ++i) {
+ auto Op = MCInstLowering.LowerMachineOperand(MI, MI->getOperand(1 + i));
+ MIB.addOperand(Op.getValue());
+ }
+ EmitAndCountInstruction(MIB);
+
+ // Load the second mask register of the pair
+ MIB = MCInstBuilder(X86::KMOVWkm);
+ MIB.addReg(Reg1);
+ for (int i = 0; i < X86::AddrNumOperands; ++i) {
+ if (i == X86::AddrDisp) {
+ MIB.addImm(Disp + 2);
+ } else {
+ auto Op = MCInstLowering.LowerMachineOperand(MI, MI->getOperand(1 + i));
+ MIB.addOperand(Op.getValue());
+ }
+ }
+ EmitAndCountInstruction(MIB);
+ return;
+ }
+
+ case X86::MASKPAIR16STORE: {
+ int64_t Disp = MI->getOperand(X86::AddrDisp).getImm();
+ assert(Disp >= 0 && Disp <= INT32_MAX - 2 && "Unexpected displacement");
+ const X86RegisterInfo *RI =
+ MF->getSubtarget<X86Subtarget>().getRegisterInfo();
+ unsigned Reg = MI->getOperand(X86::AddrNumOperands).getReg();
+ unsigned Reg0 = RI->getSubReg(Reg, X86::sub_mask_0);
+ unsigned Reg1 = RI->getSubReg(Reg, X86::sub_mask_1);
+
+ // Store the first mask register
+ MCInstBuilder MIB = MCInstBuilder(X86::KMOVWmk);
+ for (int i = 0; i < X86::AddrNumOperands; ++i)
+ MIB.addOperand(MCInstLowering.LowerMachineOperand(MI, MI->getOperand(i)).getValue());
+ MIB.addReg(Reg0);
+ EmitAndCountInstruction(MIB);
+
+ // Store the second mask register of the pair
+ MIB = MCInstBuilder(X86::KMOVWmk);
+ for (int i = 0; i < X86::AddrNumOperands; ++i) {
+ if (i == X86::AddrDisp) {
+ MIB.addImm(Disp + 2);
+ } else {
+ auto Op = MCInstLowering.LowerMachineOperand(MI, MI->getOperand(0 + i));
+ MIB.addOperand(Op.getValue());
+ }
+ }
+ MIB.addReg(Reg1);
+ EmitAndCountInstruction(MIB);
+ return;
+ }
+
case X86::MOVPC32r: {
// This is a pseudo op for a two instruction sequence with a label, which
// looks like:
@@ -1861,8 +1951,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
SmallVector<int, 64> Mask;
DecodePSHUFBMask(C, Width, Mask);
if (!Mask.empty())
- OutStreamer->AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask),
- !EnablePrintSchedInfo);
+ OutStreamer->AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask));
}
break;
}
@@ -1934,8 +2023,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
SmallVector<int, 16> Mask;
DecodeVPERMILPMask(C, ElSize, Width, Mask);
if (!Mask.empty())
- OutStreamer->AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask),
- !EnablePrintSchedInfo);
+ OutStreamer->AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask));
}
break;
}
@@ -1966,8 +2054,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
SmallVector<int, 16> Mask;
DecodeVPERMIL2PMask(C, (unsigned)CtrlOp.getImm(), ElSize, Width, Mask);
if (!Mask.empty())
- OutStreamer->AddComment(getShuffleComment(MI, 1, 2, Mask),
- !EnablePrintSchedInfo);
+ OutStreamer->AddComment(getShuffleComment(MI, 1, 2, Mask));
}
break;
}
@@ -1984,8 +2071,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
SmallVector<int, 16> Mask;
DecodeVPPERMMask(C, Width, Mask);
if (!Mask.empty())
- OutStreamer->AddComment(getShuffleComment(MI, 1, 2, Mask),
- !EnablePrintSchedInfo);
+ OutStreamer->AddComment(getShuffleComment(MI, 1, 2, Mask));
}
break;
}
@@ -2002,7 +2088,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";
if (auto *CF = dyn_cast<ConstantFP>(C)) {
CS << "0x" << CF->getValueAPF().bitcastToAPInt().toString(16, false);
- OutStreamer->AddComment(CS.str(), !EnablePrintSchedInfo);
+ OutStreamer->AddComment(CS.str());
}
}
break;
@@ -2099,7 +2185,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
}
CS << "]";
- OutStreamer->AddComment(CS.str(), !EnablePrintSchedInfo);
+ OutStreamer->AddComment(CS.str());
} else if (auto *CV = dyn_cast<ConstantVector>(C)) {
CS << "<";
for (int l = 0; l != NumLanes; ++l) {
@@ -2111,7 +2197,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
}
CS << ">";
- OutStreamer->AddComment(CS.str(), !EnablePrintSchedInfo);
+ OutStreamer->AddComment(CS.str());
}
}
break;
@@ -2198,14 +2284,12 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
printConstant(C, CS);
}
CS << "]";
- OutStreamer->AddComment(CS.str(), !EnablePrintSchedInfo);
+ OutStreamer->AddComment(CS.str());
}
}
MCInst TmpInst;
MCInstLowering.Lower(MI, TmpInst);
- if (MI->getAsmPrinterFlag(MachineInstr::NoSchedComment))
- TmpInst.setFlags(TmpInst.getFlags() | X86::NO_SCHED_INFO);
// Stackmap shadows cannot include branch targets, so we can count the bytes
// in a call towards the shadow, but must ensure that the no thread returns
diff --git a/lib/Target/X86/X86MachineFunctionInfo.cpp b/lib/Target/X86/X86MachineFunctionInfo.cpp
index 5433033671f3..05f846bfb219 100644
--- a/lib/Target/X86/X86MachineFunctionInfo.cpp
+++ b/lib/Target/X86/X86MachineFunctionInfo.cpp
@@ -1,9 +1,8 @@
//===-- X86MachineFunctionInfo.cpp - X86 machine function info ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86MachineFunctionInfo.h b/lib/Target/X86/X86MachineFunctionInfo.h
index e1183bd14796..d7e535598d81 100644
--- a/lib/Target/X86/X86MachineFunctionInfo.h
+++ b/lib/Target/X86/X86MachineFunctionInfo.h
@@ -1,9 +1,8 @@
//===-- X86MachineFunctionInfo.h - X86 machine function info ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/X86/X86MacroFusion.cpp b/lib/Target/X86/X86MacroFusion.cpp
index 5c09597d0442..c6da4b09dd60 100644
--- a/lib/Target/X86/X86MacroFusion.cpp
+++ b/lib/Target/X86/X86MacroFusion.cpp
@@ -1,9 +1,8 @@
//===- X86MacroFusion.cpp - X86 Macro Fusion ------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -19,59 +18,29 @@
using namespace llvm;
-/// Check if the instr pair, FirstMI and SecondMI, should be fused
-/// together. Given SecondMI, when FirstMI is unspecified, then check if
-/// SecondMI may be part of a fused pair at all.
-static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
- const TargetSubtargetInfo &TSI,
- const MachineInstr *FirstMI,
- const MachineInstr &SecondMI) {
- const X86Subtarget &ST = static_cast<const X86Subtarget&>(TSI);
- // Check if this processor supports macro-fusion.
- if (!ST.hasMacroFusion())
- return false;
+namespace {
- enum {
- FuseTest,
- FuseCmp,
- FuseInc
- } FuseKind;
+// The classification for the first instruction.
+enum class FirstInstrKind { Test, Cmp, And, ALU, IncDec, Invalid };
- unsigned FirstOpcode = FirstMI
- ? FirstMI->getOpcode()
- : static_cast<unsigned>(X86::INSTRUCTION_LIST_END);
- unsigned SecondOpcode = SecondMI.getOpcode();
+// The classification for the second instruction (jump).
+enum class JumpKind {
+ // JE, JL, JG and variants.
+ ELG,
+ // JA, JB and variants.
+ AB,
+ // JS, JP, JO and variants.
+ SPO,
+ // Not a fusable jump.
+ Invalid,
+};
- switch (SecondOpcode) {
- default:
- return false;
- case X86::JE_1:
- case X86::JNE_1:
- case X86::JL_1:
- case X86::JLE_1:
- case X86::JG_1:
- case X86::JGE_1:
- FuseKind = FuseInc;
- break;
- case X86::JB_1:
- case X86::JBE_1:
- case X86::JA_1:
- case X86::JAE_1:
- FuseKind = FuseCmp;
- break;
- case X86::JS_1:
- case X86::JNS_1:
- case X86::JP_1:
- case X86::JNP_1:
- case X86::JO_1:
- case X86::JNO_1:
- FuseKind = FuseTest;
- break;
- }
+} // namespace
- switch (FirstOpcode) {
+static FirstInstrKind classifyFirst(const MachineInstr &MI) {
+ switch (MI.getOpcode()) {
default:
- return false;
+ return FirstInstrKind::Invalid;
case X86::TEST8rr:
case X86::TEST16rr:
case X86::TEST32rr:
@@ -84,6 +53,7 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
case X86::TEST16mr:
case X86::TEST32mr:
case X86::TEST64mr:
+ return FirstInstrKind::Test;
case X86::AND16ri:
case X86::AND16ri8:
case X86::AND16rm:
@@ -99,7 +69,7 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
case X86::AND8ri:
case X86::AND8rm:
case X86::AND8rr:
- return true;
+ return FirstInstrKind::And;
case X86::CMP16ri:
case X86::CMP16ri8:
case X86::CMP16rm:
@@ -119,6 +89,7 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
case X86::CMP8rm:
case X86::CMP8rr:
case X86::CMP8mr:
+ return FirstInstrKind::Cmp;
case X86::ADD16ri:
case X86::ADD16ri8:
case X86::ADD16ri8_DB:
@@ -141,8 +112,10 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
case X86::ADD64rr:
case X86::ADD64rr_DB:
case X86::ADD8ri:
+ case X86::ADD8ri_DB:
case X86::ADD8rm:
case X86::ADD8rr:
+ case X86::ADD8rr_DB:
case X86::SUB16ri:
case X86::SUB16ri8:
case X86::SUB16rm:
@@ -158,7 +131,7 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
case X86::SUB8ri:
case X86::SUB8rm:
case X86::SUB8rr:
- return FuseKind == FuseCmp || FuseKind == FuseInc;
+ return FirstInstrKind::ALU;
case X86::INC16r:
case X86::INC32r:
case X86::INC64r:
@@ -167,10 +140,87 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
case X86::DEC32r:
case X86::DEC64r:
case X86::DEC8r:
- return FuseKind == FuseInc;
- case X86::INSTRUCTION_LIST_END:
- return true;
+ return FirstInstrKind::IncDec;
+ }
+}
+
+static JumpKind classifySecond(const MachineInstr &MI) {
+ X86::CondCode CC = X86::getCondFromBranch(MI);
+ if (CC == X86::COND_INVALID)
+ return JumpKind::Invalid;
+
+ switch (CC) {
+ default:
+ return JumpKind::Invalid;
+ case X86::COND_E:
+ case X86::COND_NE:
+ case X86::COND_L:
+ case X86::COND_LE:
+ case X86::COND_G:
+ case X86::COND_GE:
+ return JumpKind::ELG;
+ case X86::COND_B:
+ case X86::COND_BE:
+ case X86::COND_A:
+ case X86::COND_AE:
+ return JumpKind::AB;
+ case X86::COND_S:
+ case X86::COND_NS:
+ case X86::COND_P:
+ case X86::COND_NP:
+ case X86::COND_O:
+ case X86::COND_NO:
+ return JumpKind::SPO;
+ }
+}
+
+/// Check if the instr pair, FirstMI and SecondMI, should be fused
+/// together. Given SecondMI, when FirstMI is unspecified, then check if
+/// SecondMI may be part of a fused pair at all.
+static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
+ const TargetSubtargetInfo &TSI,
+ const MachineInstr *FirstMI,
+ const MachineInstr &SecondMI) {
+ const X86Subtarget &ST = static_cast<const X86Subtarget &>(TSI);
+
+ // Check if this processor supports any kind of fusion.
+ if (!(ST.hasBranchFusion() || ST.hasMacroFusion()))
+ return false;
+
+ const JumpKind BranchKind = classifySecond(SecondMI);
+
+ if (BranchKind == JumpKind::Invalid)
+ return false; // Second cannot be fused with anything.
+
+ if (FirstMI == nullptr)
+ return true; // We're only checking whether Second can be fused at all.
+
+ const FirstInstrKind TestKind = classifyFirst(*FirstMI);
+
+ if (ST.hasBranchFusion()) {
+ // Branch fusion can merge CMP and TEST with all conditional jumps.
+ return (TestKind == FirstInstrKind::Cmp ||
+ TestKind == FirstInstrKind::Test);
+ }
+
+ if (ST.hasMacroFusion()) {
+ // Macro Fusion rules are a bit more complex. See Agner Fog's
+ // Microarchitecture table 9.2 "Instruction Fusion".
+ switch (TestKind) {
+ case FirstInstrKind::Test:
+ case FirstInstrKind::And:
+ return true;
+ case FirstInstrKind::Cmp:
+ case FirstInstrKind::ALU:
+ return BranchKind == JumpKind::ELG || BranchKind == JumpKind::AB;
+ case FirstInstrKind::IncDec:
+ return BranchKind == JumpKind::ELG;
+ case FirstInstrKind::Invalid:
+ return false;
+ }
}
+
+ llvm_unreachable("unknown branch fusion type");
}
namespace llvm {
diff --git a/lib/Target/X86/X86MacroFusion.h b/lib/Target/X86/X86MacroFusion.h
index 97ef1d6d3b61..d4ae54f657a5 100644
--- a/lib/Target/X86/X86MacroFusion.h
+++ b/lib/Target/X86/X86MacroFusion.h
@@ -1,9 +1,8 @@
//===- X86MacroFusion.h - X86 Macro Fusion --------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/X86/X86OptimizeLEAs.cpp b/lib/Target/X86/X86OptimizeLEAs.cpp
index b56d02b6bfb6..7f75598b0655 100644
--- a/lib/Target/X86/X86OptimizeLEAs.cpp
+++ b/lib/Target/X86/X86OptimizeLEAs.cpp
@@ -1,9 +1,8 @@
//===- X86OptimizeLEAs.cpp - optimize usage of LEA instructions -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -569,11 +568,8 @@ MachineInstr *OptimizeLEAPass::replaceDebugValue(MachineInstr &MI,
unsigned VReg,
int64_t AddrDispShift) {
DIExpression *Expr = const_cast<DIExpression *>(MI.getDebugExpression());
-
if (AddrDispShift != 0)
- Expr = DIExpression::prepend(Expr, DIExpression::NoDeref, AddrDispShift,
- DIExpression::NoDeref,
- DIExpression::WithStackValue);
+ Expr = DIExpression::prepend(Expr, DIExpression::StackValue, AddrDispShift);
// Replace DBG_VALUE instruction with modified version.
MachineBasicBlock *MBB = MI.getParent();
@@ -701,7 +697,7 @@ bool OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) {
// Remove redundant address calculations. Do it only for -Os/-Oz since only
// a code size gain is expected from this part of the pass.
- if (MF.getFunction().optForSize())
+ if (MF.getFunction().hasOptSize())
Changed |= removeRedundantAddrCalc(LEAs);
}
diff --git a/lib/Target/X86/X86PadShortFunction.cpp b/lib/Target/X86/X86PadShortFunction.cpp
index 85b9aecc2106..af974c805c36 100644
--- a/lib/Target/X86/X86PadShortFunction.cpp
+++ b/lib/Target/X86/X86PadShortFunction.cpp
@@ -1,9 +1,8 @@
//===-------- X86PadShortFunction.cpp - pad short functions -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -98,7 +97,7 @@ bool PadShortFunc::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
- if (MF.getFunction().optForSize())
+ if (MF.getFunction().hasOptSize())
return false;
if (!MF.getSubtarget<X86Subtarget>().padShortFunctions())
@@ -113,14 +112,11 @@ bool PadShortFunc::runOnMachineFunction(MachineFunction &MF) {
bool MadeChange = false;
- MachineBasicBlock *MBB;
- unsigned int Cycles = 0;
-
// Pad the identified basic blocks with NOOPs
for (DenseMap<MachineBasicBlock*, unsigned int>::iterator I = ReturnBBs.begin();
I != ReturnBBs.end(); ++I) {
- MBB = I->first;
- Cycles = I->second;
+ MachineBasicBlock *MBB = I->first;
+ unsigned Cycles = I->second;
if (Cycles < Threshold) {
// BB ends in a return. Skip over any DBG_VALUE instructions
diff --git a/lib/Target/X86/X86PfmCounters.td b/lib/Target/X86/X86PfmCounters.td
index a1a4210b5ebf..5610f4bc8873 100644
--- a/lib/Target/X86/X86PfmCounters.td
+++ b/lib/Target/X86/X86PfmCounters.td
@@ -1,9 +1,8 @@
//===-- X86PfmCounters.td - X86 Hardware Counters ----------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/X86/X86RegisterBankInfo.cpp b/lib/Target/X86/X86RegisterBankInfo.cpp
index 355291916ee8..78fede3dcde2 100644
--- a/lib/Target/X86/X86RegisterBankInfo.cpp
+++ b/lib/Target/X86/X86RegisterBankInfo.cpp
@@ -1,9 +1,8 @@
//===- X86RegisterBankInfo.cpp -----------------------------------*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -160,7 +159,7 @@ const RegisterBankInfo::InstructionMapping &
X86RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
const MachineFunction &MF = *MI.getParent()->getParent();
const MachineRegisterInfo &MRI = MF.getRegInfo();
- auto Opc = MI.getOpcode();
+ unsigned Opc = MI.getOpcode();
// Try the default logic for non-generic instructions that are either copies
// or already have some operands assigned to banks.
@@ -174,17 +173,22 @@ X86RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case TargetOpcode::G_ADD:
case TargetOpcode::G_SUB:
case TargetOpcode::G_MUL:
- case TargetOpcode::G_SHL:
- case TargetOpcode::G_LSHR:
- case TargetOpcode::G_ASHR:
return getSameOperandsMapping(MI, false);
- break;
case TargetOpcode::G_FADD:
case TargetOpcode::G_FSUB:
case TargetOpcode::G_FMUL:
case TargetOpcode::G_FDIV:
return getSameOperandsMapping(MI, true);
- break;
+ case TargetOpcode::G_SHL:
+ case TargetOpcode::G_LSHR:
+ case TargetOpcode::G_ASHR: {
+ unsigned NumOperands = MI.getNumOperands();
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+
+ auto Mapping = getValueMapping(getPartialMappingIdx(Ty, false), 3);
+ return getInstructionMapping(DefaultMappingID, 1, Mapping, NumOperands);
+
+ }
default:
break;
}
diff --git a/lib/Target/X86/X86RegisterBankInfo.h b/lib/Target/X86/X86RegisterBankInfo.h
index e227880427f3..c1f3001c6180 100644
--- a/lib/Target/X86/X86RegisterBankInfo.h
+++ b/lib/Target/X86/X86RegisterBankInfo.h
@@ -1,9 +1,8 @@
//===- X86RegisterBankInfo ---------------------------------------*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/lib/Target/X86/X86RegisterBanks.td b/lib/Target/X86/X86RegisterBanks.td
index 6d17cd53a0c1..74c515850ab1 100644
--- a/lib/Target/X86/X86RegisterBanks.td
+++ b/lib/Target/X86/X86RegisterBanks.td
@@ -1,9 +1,8 @@
//=- X86RegisterBank.td - Describe the AArch64 Banks -----*- tablegen -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index 55842a4a2091..2e2f1f9e438a 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -1,9 +1,8 @@
//===-- X86RegisterInfo.cpp - X86 Register Information --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -164,6 +163,7 @@ X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
case X86::RFP32RegClassID:
case X86::RFP64RegClassID:
case X86::RFP80RegClassID:
+ case X86::VR512_0_15RegClassID:
case X86::VR512RegClassID:
// Don't return a super-class that would shrink the spill size.
// That can happen with the vector and float classes.
@@ -216,6 +216,21 @@ X86RegisterInfo::getPointerRegClass(const MachineFunction &MF,
}
}
+bool X86RegisterInfo::shouldRewriteCopySrc(const TargetRegisterClass *DefRC,
+ unsigned DefSubReg,
+ const TargetRegisterClass *SrcRC,
+ unsigned SrcSubReg) const {
+ // Prevent rewriting a copy where the destination size is larger than the
+ // input size. See PR41619.
+ // FIXME: Should this be factored into the base implementation somehow.
+ if (DefRC->hasSuperClassEq(&X86::GR64RegClass) && DefSubReg == 0 &&
+ SrcRC->hasSuperClassEq(&X86::GR64RegClass) && SrcSubReg == X86::sub_32bit)
+ return false;
+
+ return TargetRegisterInfo::shouldRewriteCopySrc(DefRC, DefSubReg,
+ SrcRC, SrcSubReg);
+}
+
const TargetRegisterClass *
X86RegisterInfo::getGPRsForTailCall(const MachineFunction &MF) const {
const Function &F = MF.getFunction();
@@ -497,6 +512,9 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
const X86FrameLowering *TFI = getFrameLowering(MF);
+ // Set the floating point control register as reserved.
+ Reserved.set(X86::FPCW);
+
// Set the stack-pointer register and its aliases as reserved.
for (MCSubRegIterator I(X86::RSP, this, /*IncludeSelf=*/true); I.isValid();
++I)
@@ -747,7 +765,7 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
}
}
-unsigned X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+Register X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
const X86FrameLowering *TFI = getFrameLowering(MF);
return TFI->hasFP(MF) ? FramePtr : StackPtr;
}
@@ -760,3 +778,12 @@ X86RegisterInfo::getPtrSizedFrameRegister(const MachineFunction &MF) const {
FrameReg = getX86SubSuperRegister(FrameReg, 32);
return FrameReg;
}
+
+unsigned
+X86RegisterInfo::getPtrSizedStackRegister(const MachineFunction &MF) const {
+ const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
+ unsigned StackReg = getStackRegister();
+ if (Subtarget.isTarget64BitILP32())
+ StackReg = getX86SubSuperRegister(StackReg, 32);
+ return StackReg;
+}
diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h
index 29401dadead0..b82920898069 100644
--- a/lib/Target/X86/X86RegisterInfo.h
+++ b/lib/Target/X86/X86RegisterInfo.h
@@ -1,9 +1,8 @@
//===-- X86RegisterInfo.h - X86 Register Information Impl -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -50,7 +49,7 @@ private:
unsigned BasePtr;
public:
- X86RegisterInfo(const Triple &TT);
+ explicit X86RegisterInfo(const Triple &TT);
// FIXME: This should be tablegen'd like getDwarfRegNum is
int getSEHRegNum(unsigned i) const;
@@ -75,6 +74,11 @@ public:
getLargestLegalSuperClass(const TargetRegisterClass *RC,
const MachineFunction &MF) const override;
+ bool shouldRewriteCopySrc(const TargetRegisterClass *DefRC,
+ unsigned DefSubReg,
+ const TargetRegisterClass *SrcRC,
+ unsigned SrcSubReg) const override;
+
/// getPointerRegClass - Returns a TargetRegisterClass used for pointer
/// values.
const TargetRegisterClass *
@@ -129,15 +133,16 @@ public:
RegScavenger *RS = nullptr) const override;
// Debug information queries.
- unsigned getFrameRegister(const MachineFunction &MF) const override;
+ Register getFrameRegister(const MachineFunction &MF) const override;
unsigned getPtrSizedFrameRegister(const MachineFunction &MF) const;
- unsigned getStackRegister() const { return StackPtr; }
- unsigned getBaseRegister() const { return BasePtr; }
+ unsigned getPtrSizedStackRegister(const MachineFunction &MF) const;
+ Register getStackRegister() const { return StackPtr; }
+ Register getBaseRegister() const { return BasePtr; }
/// Returns physical register used as frame pointer.
/// This will always returns the frame pointer register, contrary to
/// getFrameRegister() which returns the "base pointer" in situations
/// involving a stack, frame and base pointer.
- unsigned getFramePtr() const { return FramePtr; }
+ Register getFramePtr() const { return FramePtr; }
// FIXME: Move to FrameInfok
unsigned getSlotSize() const { return SlotSize; }
};
diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td
index aa20273f89ab..0528b90c1fd5 100644
--- a/lib/Target/X86/X86RegisterInfo.td
+++ b/lib/Target/X86/X86RegisterInfo.td
@@ -1,9 +1,8 @@
//===- X86RegisterInfo.td - Describe the X86 Register File --*- tablegen -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -29,6 +28,8 @@ let Namespace = "X86" in {
def sub_32bit : SubRegIndex<32>;
def sub_xmm : SubRegIndex<128>;
def sub_ymm : SubRegIndex<256>;
+ def sub_mask_0 : SubRegIndex<-1>;
+ def sub_mask_1 : SubRegIndex<-1, -1>;
}
//===----------------------------------------------------------------------===//
@@ -278,7 +279,7 @@ def K7 : X86Reg<"k7", 7>, DwarfRegNum<[125, 100, 100]>;
// pseudo registers, but we still mark them as aliasing FP registers. That
// way both kinds can be live without exceeding the stack depth. ST registers
// are only live around inline assembly.
-def ST0 : X86Reg<"st(0)", 0>, DwarfRegNum<[33, 12, 11]>;
+def ST0 : X86Reg<"st", 0>, DwarfRegNum<[33, 12, 11]>;
def ST1 : X86Reg<"st(1)", 1>, DwarfRegNum<[34, 13, 12]>;
def ST2 : X86Reg<"st(2)", 2>, DwarfRegNum<[35, 14, 13]>;
def ST3 : X86Reg<"st(3)", 3>, DwarfRegNum<[36, 15, 14]>;
@@ -288,7 +289,10 @@ def ST6 : X86Reg<"st(6)", 6>, DwarfRegNum<[39, 18, 17]>;
def ST7 : X86Reg<"st(7)", 7>, DwarfRegNum<[40, 19, 18]>;
// Floating-point status word
-def FPSW : X86Reg<"fpsw", 0>;
+def FPSW : X86Reg<"fpsr", 0>;
+
+// Floating-point control word
+def FPCW : X86Reg<"fpcr", 0>;
// Status flags register.
//
@@ -539,6 +543,9 @@ def RST : RegisterClass<"X86", [f80, f64, f32], 32, (sequence "ST%u", 0, 7)> {
let isAllocatable = 0;
}
+// Helper to allow %st to print as %st(0) when its encoded in the instruction.
+def RSTi : RegisterOperand<RST, "printSTiRegOperand">;
+
// Generic vector registers: VR64 and VR128.
// Ensure that float types are declared first - only float is legal on SSE1.
def VR64: RegisterClass<"X86", [x86mmx], 64, (sequence "MM%u", 0, 7)>;
@@ -547,17 +554,6 @@ def VR128 : RegisterClass<"X86", [v4f32, v2f64, v16i8, v8i16, v4i32, v2i64, f128
def VR256 : RegisterClass<"X86", [v8f32, v4f64, v32i8, v16i16, v8i32, v4i64],
256, (sequence "YMM%u", 0, 15)>;
-// Special classes that help the assembly parser choose some alternate
-// instructions to favor 2-byte VEX encodings.
-def VR128L : RegisterClass<"X86", [v4f32, v2f64, v16i8, v8i16, v4i32, v2i64, f128],
- 128, (sequence "XMM%u", 0, 7)>;
-def VR128H : RegisterClass<"X86", [v4f32, v2f64, v16i8, v8i16, v4i32, v2i64, f128],
- 128, (sequence "XMM%u", 8, 15)>;
-def VR256L : RegisterClass<"X86", [v8f32, v4f64, v32i8, v16i16, v8i32, v4i64],
- 256, (sequence "YMM%u", 0, 7)>;
-def VR256H : RegisterClass<"X86", [v8f32, v4f64, v32i8, v16i16, v8i32, v4i64],
- 256, (sequence "YMM%u", 8, 15)>;
-
// Status flags registers.
def CCR : RegisterClass<"X86", [i32], 32, (add EFLAGS)> {
let CopyCost = -1; // Don't allow copying of status registers.
@@ -576,6 +572,10 @@ def DFCCR : RegisterClass<"X86", [i32], 32, (add DF)> {
def VR512 : RegisterClass<"X86", [v16f32, v8f64, v64i8, v32i16, v16i32, v8i64],
512, (sequence "ZMM%u", 0, 31)>;
+// Represents the lower 16 registers that have VEX/legacy encodable subregs.
+def VR512_0_15 : RegisterClass<"X86", [v16f32, v8f64, v64i8, v32i16, v16i32, v8i64],
+ 512, (sequence "ZMM%u", 0, 15)>;
+
// Scalar AVX-512 floating point registers.
def FR32X : RegisterClass<"X86", [f32], 32, (sequence "XMM%u", 0, 31)>;
@@ -596,6 +596,16 @@ def VK16 : RegisterClass<"X86", [v16i1], 16, (add VK8)> {let Size = 16;}
def VK32 : RegisterClass<"X86", [v32i1], 32, (add VK16)> {let Size = 32;}
def VK64 : RegisterClass<"X86", [v64i1], 64, (add VK32)> {let Size = 64;}
+// Mask register pairs
+def KPAIRS : RegisterTuples<[sub_mask_0, sub_mask_1],
+ [(add K0, K2, K4, K6), (add K1, K3, K5, K7)]>;
+
+def VK1PAIR : RegisterClass<"X86", [untyped], 16, (add KPAIRS)> {let Size = 32;}
+def VK2PAIR : RegisterClass<"X86", [untyped], 16, (add KPAIRS)> {let Size = 32;}
+def VK4PAIR : RegisterClass<"X86", [untyped], 16, (add KPAIRS)> {let Size = 32;}
+def VK8PAIR : RegisterClass<"X86", [untyped], 16, (add KPAIRS)> {let Size = 32;}
+def VK16PAIR : RegisterClass<"X86", [untyped], 16, (add KPAIRS)> {let Size = 32;}
+
def VK1WM : RegisterClass<"X86", [v1i1], 16, (sub VK1, K0)> {let Size = 16;}
def VK2WM : RegisterClass<"X86", [v2i1], 16, (sub VK2, K0)> {let Size = 16;}
def VK4WM : RegisterClass<"X86", [v4i1], 16, (sub VK4, K0)> {let Size = 16;}
diff --git a/lib/Target/X86/X86RetpolineThunks.cpp b/lib/Target/X86/X86RetpolineThunks.cpp
index 08994cccb21e..b435b22e8ac7 100644
--- a/lib/Target/X86/X86RetpolineThunks.cpp
+++ b/lib/Target/X86/X86RetpolineThunks.cpp
@@ -1,9 +1,8 @@
//======- X86RetpolineThunks.cpp - Construct retpoline thunks for x86 --=====//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/lib/Target/X86/X86SchedBroadwell.td b/lib/Target/X86/X86SchedBroadwell.td
index 971a50196e45..7574e4b8f896 100755
--- a/lib/Target/X86/X86SchedBroadwell.td
+++ b/lib/Target/X86/X86SchedBroadwell.td
@@ -1,9 +1,8 @@
//=- X86SchedBroadwell.td - X86 Broadwell Scheduling ---------*- tablegen -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -82,6 +81,8 @@ def : ReadAdvance<ReadAfterVecLd, 5>;
def : ReadAdvance<ReadAfterVecXLd, 5>;
def : ReadAdvance<ReadAfterVecYLd, 6>;
+def : ReadAdvance<ReadInt2Fpu, 0>;
+
// Many SchedWrites are defined in pairs with and without a folded load.
// Instructions with folded loads are usually micro-fused, so they only appear
// as two micro-ops when queued in the reservation station.
@@ -159,7 +160,6 @@ defm : BWWriteResPair<WriteCRC32, [BWPort1], 3>;
def : WriteRes<WriteLEA, [BWPort15]>; // LEA instructions can't fold loads.
defm : BWWriteResPair<WriteCMOV, [BWPort06], 1>; // Conditional move.
-defm : BWWriteResPair<WriteCMOV2, [BWPort06,BWPort0156], 2, [1,1], 2>; // // Conditional (CF + ZF flag) move.
defm : X86WriteRes<WriteFCMOV, [BWPort1], 3, [1], 1>; // x87 conditional move.
def : WriteRes<WriteSETCC, [BWPort06]>; // Setcc.
@@ -186,7 +186,7 @@ defm : BWWriteResPair<WritePOPCNT, [BWPort1], 3>;
// Integer shifts and rotates.
defm : BWWriteResPair<WriteShift, [BWPort06], 1>;
defm : BWWriteResPair<WriteShiftCL, [BWPort06,BWPort0156], 3, [2,1], 3>;
-defm : BWWriteResPair<WriteRotate, [BWPort06], 2, [2], 2>;
+defm : BWWriteResPair<WriteRotate, [BWPort06], 1, [1], 1>;
defm : BWWriteResPair<WriteRotateCL, [BWPort06,BWPort0156], 3, [2,1], 3>;
// SHLD/SHRD.
@@ -732,10 +732,10 @@ def BWWriteResGroup20 : SchedWriteRes<[BWPort06,BWPort0156]> {
}
def: InstRW<[BWWriteResGroup20], (instrs CWD,
JCXZ, JECXZ, JRCXZ,
- ADC8i8, SBB8i8)>;
-def: InstRW<[BWWriteResGroup20], (instregex "ADC8ri",
- "SBB8ri",
- "SET(A|BE)r")>;
+ ADC8i8, SBB8i8,
+ ADC16i16, SBB16i16,
+ ADC32i32, SBB32i32,
+ ADC64i32, SBB64i32)>;
def BWWriteResGroup22 : SchedWriteRes<[BWPort4,BWPort6,BWPort237]> {
let Latency = 2;
@@ -814,7 +814,6 @@ def BWWriteResGroup38 : SchedWriteRes<[BWPort4,BWPort237,BWPort06,BWPort0156]> {
let ResourceCycles = [1,1,1,1];
}
def: InstRW<[BWWriteResGroup38], (instrs CALL64pcrel32)>;
-def: InstRW<[BWWriteResGroup38], (instregex "SET(A|BE)m")>;
def BWWriteResGroup39 : SchedWriteRes<[BWPort0,BWPort1]> {
let Latency = 4;
@@ -890,8 +889,7 @@ def BWWriteResGroup47 : SchedWriteRes<[BWPort0]> {
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[BWWriteResGroup47], (instregex "(V?)PCMPGTQ(Y?)rr",
- "MUL_(FPrST0|FST0r|FrST0)")>;
+def: InstRW<[BWWriteResGroup47], (instregex "MUL_(FPrST0|FST0r|FrST0)")>;
def BWWriteResGroup49 : SchedWriteRes<[BWPort23]> {
let Latency = 5;
@@ -965,6 +963,7 @@ def BWWriteResGroup59 : SchedWriteRes<[BWPort0,BWPort23]> {
}
def: InstRW<[BWWriteResGroup59], (instrs CVTPS2PDrm, VCVTPS2PDrm,
CVTSS2SDrm, VCVTSS2SDrm,
+ CVTSS2SDrm_Int, VCVTSS2SDrm_Int,
VPSLLVQrm,
VPSRLVQrm)>;
@@ -1103,6 +1102,14 @@ def BWWriteResGroup87 : SchedWriteRes<[BWPort4,BWPort23,BWPort237,BWPort06]> {
def: InstRW<[BWWriteResGroup87], (instregex "ROL(8|16|32|64)m(1|i)",
"ROR(8|16|32|64)m(1|i)")>;
+def BWWriteResGroup87_1 : SchedWriteRes<[BWPort06]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+ let ResourceCycles = [2];
+}
+def: InstRW<[BWWriteResGroup87_1], (instrs ROL8r1, ROL16r1, ROL32r1, ROL64r1,
+ ROR8r1, ROR16r1, ROR32r1, ROR64r1)>;
+
def BWWriteResGroup88 : SchedWriteRes<[BWPort4,BWPort23,BWPort237,BWPort0156]> {
let Latency = 7;
let NumMicroOps = 5;
@@ -1592,4 +1599,140 @@ def: InstRW<[BWWriteResGroup202], (instrs FSTENVm)>;
def: InstRW<[WriteZero], (instrs CLC)>;
+
+// Intruction variants handled by the renamer. These might not need execution
+// ports in certain conditions.
+// See Agner's Fog "The microarchitecture of Intel, AMD and VIA CPUs",
+// section "Haswell and Broadwell Pipeline" > "Register allocation and
+// renaming".
+// These can be investigated with llvm-exegesis, e.g.
+// echo 'pxor %mm0, %mm0' | /tmp/llvm-exegesis -mode=uops -snippets-file=-
+// echo 'vxorpd %xmm0, %xmm0, %xmm1' | /tmp/llvm-exegesis -mode=uops -snippets-file=-
+
+def BWWriteZeroLatency : SchedWriteRes<[]> {
+ let Latency = 0;
+}
+
+def BWWriteZeroIdiom : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [BWWriteZeroLatency]>,
+ SchedVar<NoSchedPred, [WriteALU]>
+]>;
+def : InstRW<[BWWriteZeroIdiom], (instrs SUB32rr, SUB64rr,
+ XOR32rr, XOR64rr)>;
+
+def BWWriteFZeroIdiom : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [BWWriteZeroLatency]>,
+ SchedVar<NoSchedPred, [WriteFLogic]>
+]>;
+def : InstRW<[BWWriteFZeroIdiom], (instrs XORPSrr, VXORPSrr, XORPDrr,
+ VXORPDrr)>;
+
+def BWWriteFZeroIdiomY : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [BWWriteZeroLatency]>,
+ SchedVar<NoSchedPred, [WriteFLogicY]>
+]>;
+def : InstRW<[BWWriteFZeroIdiomY], (instrs VXORPSYrr, VXORPDYrr)>;
+
+def BWWriteVZeroIdiomLogicX : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [BWWriteZeroLatency]>,
+ SchedVar<NoSchedPred, [WriteVecLogicX]>
+]>;
+def : InstRW<[BWWriteVZeroIdiomLogicX], (instrs PXORrr, VPXORrr)>;
+
+def BWWriteVZeroIdiomLogicY : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [BWWriteZeroLatency]>,
+ SchedVar<NoSchedPred, [WriteVecLogicY]>
+]>;
+def : InstRW<[BWWriteVZeroIdiomLogicY], (instrs VPXORYrr)>;
+
+def BWWriteVZeroIdiomALUX : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [BWWriteZeroLatency]>,
+ SchedVar<NoSchedPred, [WriteVecALUX]>
+]>;
+def : InstRW<[BWWriteVZeroIdiomALUX], (instrs PSUBBrr, VPSUBBrr,
+ PSUBDrr, VPSUBDrr,
+ PSUBQrr, VPSUBQrr,
+ PSUBWrr, VPSUBWrr,
+ PCMPGTBrr, VPCMPGTBrr,
+ PCMPGTDrr, VPCMPGTDrr,
+ PCMPGTWrr, VPCMPGTWrr)>;
+
+def BWWriteVZeroIdiomALUY : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [BWWriteZeroLatency]>,
+ SchedVar<NoSchedPred, [WriteVecALUY]>
+]>;
+def : InstRW<[BWWriteVZeroIdiomALUY], (instrs VPSUBBYrr,
+ VPSUBDYrr,
+ VPSUBQYrr,
+ VPSUBWYrr,
+ VPCMPGTBYrr,
+ VPCMPGTDYrr,
+ VPCMPGTWYrr)>;
+
+def BWWritePCMPGTQ : SchedWriteRes<[BWPort0]> {
+ let Latency = 5;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+
+def BWWriteVZeroIdiomPCMPGTQ : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [BWWriteZeroLatency]>,
+ SchedVar<NoSchedPred, [BWWritePCMPGTQ]>
+]>;
+def : InstRW<[BWWriteVZeroIdiomPCMPGTQ], (instrs PCMPGTQrr, VPCMPGTQrr,
+ VPCMPGTQYrr)>;
+
+
+// CMOVs that use both Z and C flag require an extra uop.
+def BWWriteCMOVA_CMOVBErr : SchedWriteRes<[BWPort06,BWPort0156]> {
+ let Latency = 2;
+ let ResourceCycles = [1,1];
+ let NumMicroOps = 2;
+}
+
+def BWWriteCMOVA_CMOVBErm : SchedWriteRes<[BWPort23,BWPort06,BWPort0156]> {
+ let Latency = 7;
+ let ResourceCycles = [1,1,1];
+ let NumMicroOps = 3;
+}
+
+def BWCMOVA_CMOVBErr : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<IsCMOVArr_Or_CMOVBErr>, [BWWriteCMOVA_CMOVBErr]>,
+ SchedVar<NoSchedPred, [WriteCMOV]>
+]>;
+
+def BWCMOVA_CMOVBErm : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<IsCMOVArm_Or_CMOVBErm>, [BWWriteCMOVA_CMOVBErm]>,
+ SchedVar<NoSchedPred, [WriteCMOV.Folded]>
+]>;
+
+def : InstRW<[BWCMOVA_CMOVBErr], (instrs CMOV16rr, CMOV32rr, CMOV64rr)>;
+def : InstRW<[BWCMOVA_CMOVBErm], (instrs CMOV16rm, CMOV32rm, CMOV64rm)>;
+
+// SETCCs that use both Z and C flag require an extra uop.
+def BWWriteSETA_SETBEr : SchedWriteRes<[BWPort06,BWPort0156]> {
+ let Latency = 2;
+ let ResourceCycles = [1,1];
+ let NumMicroOps = 2;
+}
+
+def BWWriteSETA_SETBEm : SchedWriteRes<[BWPort4,BWPort237,BWPort06,BWPort0156]> {
+ let Latency = 3;
+ let ResourceCycles = [1,1,1,1];
+ let NumMicroOps = 4;
+}
+
+def BWSETA_SETBErr : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<IsSETAr_Or_SETBEr>, [BWWriteSETA_SETBEr]>,
+ SchedVar<NoSchedPred, [WriteSETCC]>
+]>;
+
+def BWSETA_SETBErm : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<IsSETAm_Or_SETBEm>, [BWWriteSETA_SETBEm]>,
+ SchedVar<NoSchedPred, [WriteSETCCStore]>
+]>;
+
+def : InstRW<[BWSETA_SETBErr], (instrs SETCCr)>;
+def : InstRW<[BWSETA_SETBErm], (instrs SETCCm)>;
+
} // SchedModel
diff --git a/lib/Target/X86/X86SchedHaswell.td b/lib/Target/X86/X86SchedHaswell.td
index 06a32fb0b1cd..284d1567c5c6 100644
--- a/lib/Target/X86/X86SchedHaswell.td
+++ b/lib/Target/X86/X86SchedHaswell.td
@@ -1,9 +1,8 @@
//=- X86SchedHaswell.td - X86 Haswell Scheduling -------------*- tablegen -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -87,6 +86,8 @@ def : ReadAdvance<ReadAfterVecLd, 5>;
def : ReadAdvance<ReadAfterVecXLd, 6>;
def : ReadAdvance<ReadAfterVecYLd, 7>;
+def : ReadAdvance<ReadInt2Fpu, 0>;
+
// Many SchedWrites are defined in pairs with and without a folded load.
// Instructions with folded loads are usually micro-fused, so they only appear
// as two micro-ops when queued in the reservation station.
@@ -151,7 +152,7 @@ defm : X86WriteRes<WriteXCHG, [HWPort0156], 2, [3], 3>;
// Integer shifts and rotates.
defm : HWWriteResPair<WriteShift, [HWPort06], 1>;
defm : HWWriteResPair<WriteShiftCL, [HWPort06, HWPort0156], 3, [2,1], 3>;
-defm : HWWriteResPair<WriteRotate, [HWPort06], 2, [2], 2>;
+defm : HWWriteResPair<WriteRotate, [HWPort06], 1, [1], 1>;
defm : HWWriteResPair<WriteRotateCL, [HWPort06, HWPort0156], 3, [2,1], 3>;
// SHLD/SHRD.
@@ -164,7 +165,6 @@ defm : HWWriteResPair<WriteJump, [HWPort06], 1>;
defm : HWWriteResPair<WriteCRC32, [HWPort1], 3>;
defm : HWWriteResPair<WriteCMOV, [HWPort06,HWPort0156], 2, [1,1], 2>; // Conditional move.
-defm : HWWriteResPair<WriteCMOV2, [HWPort06,HWPort0156], 3, [1,2], 3>; // Conditional (CF + ZF flag) move.
defm : X86WriteRes<WriteFCMOV, [HWPort1], 3, [1], 1>; // x87 conditional move.
def : WriteRes<WriteSETCC, [HWPort06]>; // Setcc.
def : WriteRes<WriteSETCCStore, [HWPort06,HWPort4,HWPort237]> {
@@ -1126,7 +1126,6 @@ def HWWriteResGroup35 : SchedWriteRes<[HWPort06,HWPort0156]> {
let ResourceCycles = [1,1];
}
def: InstRW<[HWWriteResGroup35], (instrs CWD, JCXZ, JECXZ, JRCXZ)>;
-def: InstRW<[HWWriteResGroup35], (instregex "SET(A|BE)r")>;
def HWWriteResGroup36_2 : SchedWriteRes<[HWPort5,HWPort23]> {
let Latency = 7;
@@ -1172,7 +1171,6 @@ def HWWriteResGroup45 : SchedWriteRes<[HWPort4,HWPort237,HWPort06,HWPort0156]> {
let ResourceCycles = [1,1,1,1];
}
def: InstRW<[HWWriteResGroup45], (instrs CALL64pcrel32)>;
-def: InstRW<[HWWriteResGroup45], (instregex "SET(A|BE)m")>;
def HWWriteResGroup46 : SchedWriteRes<[HWPort4,HWPort23,HWPort237,HWPort06]> {
let Latency = 8;
@@ -1182,6 +1180,14 @@ def HWWriteResGroup46 : SchedWriteRes<[HWPort4,HWPort23,HWPort237,HWPort06]> {
def: InstRW<[HWWriteResGroup46], (instregex "ROL(8|16|32|64)m(1|i)",
"ROR(8|16|32|64)m(1|i)")>;
+def HWWriteResGroup46_1 : SchedWriteRes<[HWPort06]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+ let ResourceCycles = [2];
+}
+def: InstRW<[HWWriteResGroup46_1], (instrs ROL8r1, ROL16r1, ROL32r1, ROL64r1,
+ ROR8r1, ROR16r1, ROR32r1, ROR64r1)>;
+
def HWWriteResGroup47 : SchedWriteRes<[HWPort4,HWPort23,HWPort237,HWPort0156]> {
let Latency = 8;
let NumMicroOps = 5;
@@ -1391,8 +1397,8 @@ def HWWriteResGroup78_1 : SchedWriteRes<[HWPort1,HWPort5,HWPort23]> {
let ResourceCycles = [1,1,1];
}
def: InstRW<[HWWriteResGroup78_1], (instrs MMX_CVTPI2PDirm,
- CVTSD2SSrm,
- VCVTSD2SSrm)>;
+ CVTSD2SSrm, CVTSD2SSrm_Int,
+ VCVTSD2SSrm, VCVTSD2SSrm_Int)>;
def HWWriteResGroup80 : SchedWriteRes<[HWPort5,HWPort23,HWPort015]> {
let Latency = 9;
@@ -1442,8 +1448,7 @@ def HWWriteResGroup89 : SchedWriteRes<[HWPort0]> {
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[HWWriteResGroup89], (instregex "(V?)PCMPGTQ(Y?)rr",
- "MUL_(FPrST0|FST0r|FrST0)")>;
+def: InstRW<[HWWriteResGroup89], (instregex "MUL_(FPrST0|FST0r|FrST0)")>;
def HWWriteResGroup91_2 : SchedWriteRes<[HWPort0,HWPort23]> {
let Latency = 11;
@@ -1847,4 +1852,170 @@ def: InstRW<[HWWriteResGroup192], (instrs VGATHERQPSrm,
def: InstRW<[WriteZero], (instrs CLC)>;
+
+// Intruction variants handled by the renamer. These might not need execution
+// ports in certain conditions.
+// See Agner's Fog "The microarchitecture of Intel, AMD and VIA CPUs",
+// section "Haswell and Broadwell Pipeline" > "Register allocation and
+// renaming".
+// These can be investigated with llvm-exegesis, e.g.
+// echo 'pxor %mm0, %mm0' | /tmp/llvm-exegesis -mode=uops -snippets-file=-
+// echo 'vxorpd %xmm0, %xmm0, %xmm1' | /tmp/llvm-exegesis -mode=uops -snippets-file=-
+
+def HWWriteZeroLatency : SchedWriteRes<[]> {
+ let Latency = 0;
+}
+
+def HWWriteZeroIdiom : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [HWWriteZeroLatency]>,
+ SchedVar<NoSchedPred, [WriteALU]>
+]>;
+def : InstRW<[HWWriteZeroIdiom], (instrs SUB32rr, SUB64rr,
+ XOR32rr, XOR64rr)>;
+
+def HWWriteFZeroIdiom : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [HWWriteZeroLatency]>,
+ SchedVar<NoSchedPred, [WriteFLogic]>
+]>;
+def : InstRW<[HWWriteFZeroIdiom], (instrs XORPSrr, VXORPSrr, XORPDrr,
+ VXORPDrr)>;
+
+def HWWriteFZeroIdiomY : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [HWWriteZeroLatency]>,
+ SchedVar<NoSchedPred, [WriteFLogicY]>
+]>;
+def : InstRW<[HWWriteFZeroIdiomY], (instrs VXORPSYrr, VXORPDYrr)>;
+
+def HWWriteVZeroIdiomLogicX : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [HWWriteZeroLatency]>,
+ SchedVar<NoSchedPred, [WriteVecLogicX]>
+]>;
+def : InstRW<[HWWriteVZeroIdiomLogicX], (instrs PXORrr, VPXORrr)>;
+
+def HWWriteVZeroIdiomLogicY : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [HWWriteZeroLatency]>,
+ SchedVar<NoSchedPred, [WriteVecLogicY]>
+]>;
+def : InstRW<[HWWriteVZeroIdiomLogicY], (instrs VPXORYrr)>;
+
+def HWWriteVZeroIdiomALUX : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [HWWriteZeroLatency]>,
+ SchedVar<NoSchedPred, [WriteVecALUX]>
+]>;
+def : InstRW<[HWWriteVZeroIdiomALUX], (instrs PSUBBrr, VPSUBBrr,
+ PSUBDrr, VPSUBDrr,
+ PSUBQrr, VPSUBQrr,
+ PSUBWrr, VPSUBWrr,
+ PCMPGTBrr, VPCMPGTBrr,
+ PCMPGTDrr, VPCMPGTDrr,
+ PCMPGTWrr, VPCMPGTWrr)>;
+
+def HWWriteVZeroIdiomALUY : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [HWWriteZeroLatency]>,
+ SchedVar<NoSchedPred, [WriteVecALUY]>
+]>;
+def : InstRW<[HWWriteVZeroIdiomALUY], (instrs VPSUBBYrr,
+ VPSUBDYrr,
+ VPSUBQYrr,
+ VPSUBWYrr,
+ VPCMPGTBYrr,
+ VPCMPGTDYrr,
+ VPCMPGTWYrr)>;
+
+def HWWritePCMPGTQ : SchedWriteRes<[HWPort0]> {
+ let Latency = 5;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+
+def HWWriteVZeroIdiomPCMPGTQ : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [HWWriteZeroLatency]>,
+ SchedVar<NoSchedPred, [HWWritePCMPGTQ]>
+]>;
+def : InstRW<[HWWriteVZeroIdiomPCMPGTQ], (instrs PCMPGTQrr, VPCMPGTQrr,
+ VPCMPGTQYrr)>;
+
+
+// The 0x83 ADC/SBB opcodes have special support for immediate 0 to only require
+// a single uop. It does not apply to the GR8 encoding. And only applies to the
+// 8-bit immediate since using larger immediate for 0 would be silly.
+// Unfortunately, this optimization does not apply to the AX/EAX/RAX short
+// encodings we convert to in MCInstLowering so we exclude AX/EAX/RAX here since
+// we schedule before that point.
+// TODO: Should we disable using the short encodings on these CPUs?
+def HWFastADC0 : MCSchedPredicate<
+ CheckAll<[
+ CheckImmOperand<2, 0>, // Second MCOperand is Imm and has value 0.
+ CheckNot<CheckRegOperand<1, AX>>, // First MCOperand is not register AX
+ CheckNot<CheckRegOperand<1, EAX>>, // First MCOperand is not register EAX
+ CheckNot<CheckRegOperand<1, RAX>> // First MCOperand is not register RAX
+ ]>
+>;
+
+def HWWriteADC0 : SchedWriteRes<[HWPort06]> {
+ let Latency = 1;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+
+def HWWriteADC : SchedWriteVariant<[
+ SchedVar<HWFastADC0, [HWWriteADC0]>,
+ SchedVar<NoSchedPred, [WriteADC]>
+]>;
+
+def : InstRW<[HWWriteADC], (instrs ADC16ri8, ADC32ri8, ADC64ri8,
+ SBB16ri8, SBB32ri8, SBB64ri8)>;
+
+// CMOVs that use both Z and C flag require an extra uop.
+def HWWriteCMOVA_CMOVBErr : SchedWriteRes<[HWPort06,HWPort0156]> {
+ let Latency = 3;
+ let ResourceCycles = [1,2];
+ let NumMicroOps = 3;
+}
+
+def HWWriteCMOVA_CMOVBErm : SchedWriteRes<[HWPort23,HWPort06,HWPort0156]> {
+ let Latency = 8;
+ let ResourceCycles = [1,1,2];
+ let NumMicroOps = 4;
+}
+
+def HWCMOVA_CMOVBErr : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<IsCMOVArr_Or_CMOVBErr>, [HWWriteCMOVA_CMOVBErr]>,
+ SchedVar<NoSchedPred, [WriteCMOV]>
+]>;
+
+def HWCMOVA_CMOVBErm : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<IsCMOVArm_Or_CMOVBErm>, [HWWriteCMOVA_CMOVBErm]>,
+ SchedVar<NoSchedPred, [WriteCMOV.Folded]>
+]>;
+
+def : InstRW<[HWCMOVA_CMOVBErr], (instrs CMOV16rr, CMOV32rr, CMOV64rr)>;
+def : InstRW<[HWCMOVA_CMOVBErm], (instrs CMOV16rm, CMOV32rm, CMOV64rm)>;
+
+// SETCCs that use both Z and C flag require an extra uop.
+def HWWriteSETA_SETBEr : SchedWriteRes<[HWPort06,HWPort0156]> {
+ let Latency = 2;
+ let ResourceCycles = [1,1];
+ let NumMicroOps = 2;
+}
+
+def HWWriteSETA_SETBEm : SchedWriteRes<[HWPort4,HWPort237,HWPort06,HWPort0156]> {
+ let Latency = 3;
+ let ResourceCycles = [1,1,1,1];
+ let NumMicroOps = 4;
+}
+
+def HWSETA_SETBErr : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<IsSETAr_Or_SETBEr>, [HWWriteSETA_SETBEr]>,
+ SchedVar<NoSchedPred, [WriteSETCC]>
+]>;
+
+def HWSETA_SETBErm : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<IsSETAm_Or_SETBEm>, [HWWriteSETA_SETBEm]>,
+ SchedVar<NoSchedPred, [WriteSETCCStore]>
+]>;
+
+def : InstRW<[HWSETA_SETBErr], (instrs SETCCr)>;
+def : InstRW<[HWSETA_SETBErm], (instrs SETCCm)>;
+
} // SchedModel
diff --git a/lib/Target/X86/X86SchedPredicates.td b/lib/Target/X86/X86SchedPredicates.td
index 1c7f24375f61..41bd776648f7 100644
--- a/lib/Target/X86/X86SchedPredicates.td
+++ b/lib/Target/X86/X86SchedPredicates.td
@@ -1,9 +1,8 @@
//===-- X86SchedPredicates.td - X86 Scheduling Predicates --*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -61,3 +60,27 @@ def IsThreeOperandsLEABody :
// X86GenInstrInfo.
def IsThreeOperandsLEAFn :
TIIPredicate<"isThreeOperandsLEA", IsThreeOperandsLEABody>;
+
+// A predicate to check for COND_A and COND_BE CMOVs which have an extra uop
+// on recent Intel CPUs.
+def IsCMOVArr_Or_CMOVBErr : CheckAny<[
+ CheckImmOperand_s<3, "X86::COND_A">,
+ CheckImmOperand_s<3, "X86::COND_BE">
+]>;
+
+def IsCMOVArm_Or_CMOVBErm : CheckAny<[
+ CheckImmOperand_s<7, "X86::COND_A">,
+ CheckImmOperand_s<7, "X86::COND_BE">
+]>;
+
+// A predicate to check for COND_A and COND_BE SETCCs which have an extra uop
+// on recent Intel CPUs.
+def IsSETAr_Or_SETBEr : CheckAny<[
+ CheckImmOperand_s<1, "X86::COND_A">,
+ CheckImmOperand_s<1, "X86::COND_BE">
+]>;
+
+def IsSETAm_Or_SETBEm : CheckAny<[
+ CheckImmOperand_s<5, "X86::COND_A">,
+ CheckImmOperand_s<5, "X86::COND_BE">
+]>;
diff --git a/lib/Target/X86/X86SchedSandyBridge.td b/lib/Target/X86/X86SchedSandyBridge.td
index 9dbf0976989f..d40bdf728a48 100644
--- a/lib/Target/X86/X86SchedSandyBridge.td
+++ b/lib/Target/X86/X86SchedSandyBridge.td
@@ -1,9 +1,8 @@
//=- X86SchedSandyBridge.td - X86 Sandy Bridge Scheduling ----*- tablegen -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -77,6 +76,8 @@ def : ReadAdvance<ReadAfterVecLd, 5>;
def : ReadAdvance<ReadAfterVecXLd, 6>;
def : ReadAdvance<ReadAfterVecYLd, 7>;
+def : ReadAdvance<ReadInt2Fpu, 0>;
+
// Many SchedWrites are defined in pairs with and without a folded load.
// Instructions with folded loads are usually micro-fused, so they only appear
// as two micro-ops when queued in the reservation station.
@@ -159,7 +160,6 @@ defm : SBWriteResPair<WriteJump, [SBPort5], 1>;
defm : SBWriteResPair<WriteCRC32, [SBPort1], 3, [1], 1, 5>;
defm : SBWriteResPair<WriteCMOV, [SBPort05,SBPort015], 2, [1,1], 2>; // Conditional move.
-defm : SBWriteResPair<WriteCMOV2, [SBPort05,SBPort015], 3, [2,1], 3>; // Conditional (CF + ZF flag) move.
defm : X86WriteRes<WriteFCMOV, [SBPort5,SBPort05], 3, [2,1], 3>; // x87 conditional move.
def : WriteRes<WriteSETCC, [SBPort05]>; // Setcc.
def : WriteRes<WriteSETCCStore, [SBPort05,SBPort4,SBPort23]> {
@@ -615,13 +615,6 @@ def: InstRW<[SBWriteResGroup5], (instrs MMX_PABSBrr,
MMX_PSIGNDrr,
MMX_PSIGNWrr)>;
-def SBWriteResGroup9 : SchedWriteRes<[SBPort05]> {
- let Latency = 2;
- let NumMicroOps = 2;
- let ResourceCycles = [2];
-}
-def: InstRW<[SBWriteResGroup9], (instregex "SET(A|BE)r")>;
-
def SBWriteResGroup11 : SchedWriteRes<[SBPort015]> {
let Latency = 2;
let NumMicroOps = 2;
@@ -705,12 +698,6 @@ def SBWriteResGroup29_2 : SchedWriteRes<[SBPort5,SBPort015]> {
}
def: InstRW<[SBWriteResGroup29_2], (instrs PAUSE)>;
-def SBWriteResGroup30 : SchedWriteRes<[SBPort0]> {
- let Latency = 5;
- let NumMicroOps = 1;
- let ResourceCycles = [1];
-}
-
def SBWriteResGroup31 : SchedWriteRes<[SBPort23]> {
let Latency = 5;
let NumMicroOps = 1;
@@ -772,13 +759,6 @@ def SBWriteResGroup41 : SchedWriteRes<[SBPort5,SBPort015]> {
}
def: InstRW<[SBWriteResGroup41], (instrs FNINIT)>;
-def SBWriteResGroup43 : SchedWriteRes<[SBPort4,SBPort23,SBPort05]> {
- let Latency = 3;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,2];
-}
-def: InstRW<[SBWriteResGroup43], (instregex "SET(A|BE)m")>;
-
def SBWriteResGroup45 : SchedWriteRes<[SBPort0,SBPort4,SBPort23,SBPort15]> {
let Latency = 5;
let NumMicroOps = 4;
@@ -1148,6 +1128,12 @@ def SBWriteFZeroIdiom : SchedWriteVariant<[
def : InstRW<[SBWriteFZeroIdiom], (instrs XORPSrr, VXORPSrr, XORPDrr,
VXORPDrr)>;
+def SBWriteFZeroIdiomY : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SBWriteZeroLatency]>,
+ SchedVar<NoSchedPred, [WriteFLogicY]>
+]>;
+def : InstRW<[SBWriteFZeroIdiomY], (instrs VXORPSYrr, VXORPDYrr)>;
+
def SBWriteVZeroIdiomLogicX : SchedWriteVariant<[
SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SBWriteZeroLatency]>,
SchedVar<NoSchedPred, [WriteVecLogicX]>
@@ -1166,10 +1152,68 @@ def : InstRW<[SBWriteVZeroIdiomALUX], (instrs PSUBBrr, VPSUBBrr,
PCMPGTDrr, VPCMPGTDrr,
PCMPGTWrr, VPCMPGTWrr)>;
+def SBWritePCMPGTQ : SchedWriteRes<[SBPort0]> {
+ let Latency = 5;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+
def SBWriteVZeroIdiomPCMPGTQ : SchedWriteVariant<[
SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SBWriteZeroLatency]>,
- SchedVar<NoSchedPred, [SBWriteResGroup30]>
+ SchedVar<NoSchedPred, [SBWritePCMPGTQ]>
]>;
def : InstRW<[SBWriteVZeroIdiomPCMPGTQ], (instrs PCMPGTQrr, VPCMPGTQrr)>;
+// CMOVs that use both Z and C flag require an extra uop.
+def SBWriteCMOVA_CMOVBErr : SchedWriteRes<[SBPort05,SBPort015]> {
+ let Latency = 3;
+ let ResourceCycles = [2,1];
+ let NumMicroOps = 3;
+}
+
+def SBWriteCMOVA_CMOVBErm : SchedWriteRes<[SBPort23,SBPort05,SBPort015]> {
+ let Latency = 8;
+ let ResourceCycles = [1,2,1];
+ let NumMicroOps = 4;
+}
+
+def SBCMOVA_CMOVBErr : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<IsCMOVArr_Or_CMOVBErr>, [SBWriteCMOVA_CMOVBErr]>,
+ SchedVar<NoSchedPred, [WriteCMOV]>
+]>;
+
+def SBCMOVA_CMOVBErm : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<IsCMOVArm_Or_CMOVBErm>, [SBWriteCMOVA_CMOVBErm]>,
+ SchedVar<NoSchedPred, [WriteCMOV.Folded]>
+]>;
+
+def : InstRW<[SBCMOVA_CMOVBErr], (instrs CMOV16rr, CMOV32rr, CMOV64rr)>;
+def : InstRW<[SBCMOVA_CMOVBErm], (instrs CMOV16rm, CMOV32rm, CMOV64rm)>;
+
+// SETCCs that use both Z and C flag require an extra uop.
+def SBWriteSETA_SETBEr : SchedWriteRes<[SBPort05]> {
+ let Latency = 2;
+ let ResourceCycles = [2];
+ let NumMicroOps = 2;
+}
+
+def SBWriteSETA_SETBEm : SchedWriteRes<[SBPort4,SBPort23,SBPort05]> {
+ let Latency = 3;
+ let ResourceCycles = [1,1,2];
+ let NumMicroOps = 4;
+}
+
+def SBSETA_SETBErr : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<IsSETAr_Or_SETBEr>, [SBWriteSETA_SETBEr]>,
+ SchedVar<NoSchedPred, [WriteSETCC]>
+]>;
+
+def SBSETA_SETBErm : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<IsSETAm_Or_SETBEm>, [SBWriteSETA_SETBEm]>,
+ SchedVar<NoSchedPred, [WriteSETCCStore]>
+]>;
+
+def : InstRW<[SBSETA_SETBErr], (instrs SETCCr)>;
+def : InstRW<[SBSETA_SETBErm], (instrs SETCCm)>;
+
} // SchedModel
diff --git a/lib/Target/X86/X86SchedSkylakeClient.td b/lib/Target/X86/X86SchedSkylakeClient.td
index 2c9eb7516085..8f3e4ae62d53 100644
--- a/lib/Target/X86/X86SchedSkylakeClient.td
+++ b/lib/Target/X86/X86SchedSkylakeClient.td
@@ -1,9 +1,8 @@
//=- X86SchedSkylake.td - X86 Skylake Client Scheduling ------*- tablegen -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -81,6 +80,8 @@ def : ReadAdvance<ReadAfterVecLd, 5>;
def : ReadAdvance<ReadAfterVecXLd, 6>;
def : ReadAdvance<ReadAfterVecYLd, 7>;
+def : ReadAdvance<ReadInt2Fpu, 0>;
+
// Many SchedWrites are defined in pairs with and without a folded load.
// Instructions with folded loads are usually micro-fused, so they only appear
// as two micro-ops when queued in the reservation station.
@@ -157,7 +158,6 @@ defm : SKLWriteResPair<WriteCRC32, [SKLPort1], 3>;
def : WriteRes<WriteLEA, [SKLPort15]>; // LEA instructions can't fold loads.
defm : SKLWriteResPair<WriteCMOV, [SKLPort06], 1, [1], 1>; // Conditional move.
-defm : SKLWriteResPair<WriteCMOV2, [SKLPort06], 2, [2], 2>; // Conditional (CF + ZF flag) move.
defm : X86WriteRes<WriteFCMOV, [SKLPort1], 3, [1], 1>; // x87 conditional move.
def : WriteRes<WriteSETCC, [SKLPort06]>; // Setcc.
def : WriteRes<WriteSETCCStore, [SKLPort06,SKLPort4,SKLPort237]> {
@@ -183,7 +183,7 @@ defm : SKLWriteResPair<WritePOPCNT, [SKLPort1], 3>;
// Integer shifts and rotates.
defm : SKLWriteResPair<WriteShift, [SKLPort06], 1>;
defm : SKLWriteResPair<WriteShiftCL, [SKLPort06], 3, [3], 3>;
-defm : SKLWriteResPair<WriteRotate, [SKLPort06], 2, [2], 2>;
+defm : SKLWriteResPair<WriteRotate, [SKLPort06], 1, [1], 1>;
defm : SKLWriteResPair<WriteRotateCL, [SKLPort06], 3, [3], 3>;
// SHLD/SHRD.
@@ -659,8 +659,7 @@ def SKLWriteResGroup9 : SchedWriteRes<[SKLPort015]> {
let ResourceCycles = [1];
}
def: InstRW<[SKLWriteResGroup9], (instregex "(V?)PADD(B|D|Q|W)(Y?)rr",
- "VPBLENDD(Y?)rri",
- "(V?)PSUB(B|D|Q|W)(Y?)rr")>;
+ "VPBLENDD(Y?)rri")>;
def SKLWriteResGroup10 : SchedWriteRes<[SKLPort0156]> {
let Latency = 1;
@@ -698,13 +697,6 @@ def SKLWriteResGroup14 : SchedWriteRes<[SKLPort05]> {
def: InstRW<[SKLWriteResGroup14], (instrs FDECSTP,
MMX_MOVDQ2Qrr)>;
-def SKLWriteResGroup15 : SchedWriteRes<[SKLPort06]> {
- let Latency = 2;
- let NumMicroOps = 2;
- let ResourceCycles = [2];
-}
-def: InstRW<[SKLWriteResGroup15], (instregex "SET(A|BE)r")>;
-
def SKLWriteResGroup17 : SchedWriteRes<[SKLPort0156]> {
let Latency = 2;
let NumMicroOps = 2;
@@ -735,9 +727,10 @@ def SKLWriteResGroup23 : SchedWriteRes<[SKLPort06,SKLPort0156]> {
}
def: InstRW<[SKLWriteResGroup23], (instrs CWD,
JCXZ, JECXZ, JRCXZ,
- ADC8i8, SBB8i8)>;
-def: InstRW<[SKLWriteResGroup23], (instregex "ADC8ri",
- "SBB8ri")>;
+ ADC8i8, SBB8i8,
+ ADC16i16, SBB16i16,
+ ADC32i32, SBB32i32,
+ ADC64i32, SBB64i32)>;
def SKLWriteResGroup25 : SchedWriteRes<[SKLPort4,SKLPort6,SKLPort237]> {
let Latency = 2;
@@ -776,8 +769,7 @@ def SKLWriteResGroup30 : SchedWriteRes<[SKLPort5]> {
let ResourceCycles = [1];
}
def: InstRW<[SKLWriteResGroup30], (instregex "(ADD|SUB|SUBR)_(FPrST0|FST0r|FrST0)",
- "VPBROADCAST(B|W)rr",
- "(V?)PCMPGTQ(Y?)rr")>;
+ "VPBROADCAST(B|W)rr")>;
def SKLWriteResGroup32 : SchedWriteRes<[SKLPort0,SKLPort0156]> {
let Latency = 3;
@@ -839,13 +831,6 @@ def SKLWriteResGroup43 : SchedWriteRes<[SKLPort0,SKLPort4,SKLPort237]> {
}
def: InstRW<[SKLWriteResGroup43], (instrs FNSTSWm)>;
-def SKLWriteResGroup44 : SchedWriteRes<[SKLPort4,SKLPort237,SKLPort06]> {
- let Latency = 3;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,2];
-}
-def: InstRW<[SKLWriteResGroup44], (instregex "SET(A|BE)m")>;
-
def SKLWriteResGroup45 : SchedWriteRes<[SKLPort4,SKLPort6,SKLPort237,SKLPort0156]> {
let Latency = 3;
let NumMicroOps = 4;
@@ -1183,6 +1168,14 @@ def SKLWriteResGroup100 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort06
def: InstRW<[SKLWriteResGroup100], (instregex "ROL(8|16|32|64)m(1|i)",
"ROR(8|16|32|64)m(1|i)")>;
+def SKLWriteResGroup100_1 : SchedWriteRes<[SKLPort06]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+ let ResourceCycles = [2];
+}
+def: InstRW<[SKLWriteResGroup100_1], (instrs ROL8r1, ROL16r1, ROL32r1, ROL64r1,
+ ROR8r1, ROR16r1, ROR32r1, ROR64r1)>;
+
def SKLWriteResGroup101 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort0156]> {
let Latency = 7;
let NumMicroOps = 5;
@@ -1747,4 +1740,150 @@ def: InstRW<[SKLWriteResGroup223], (instrs FSTENVm)>;
def: InstRW<[WriteZero], (instrs CLC)>;
+
+// Intruction variants handled by the renamer. These might not need execution
+// ports in certain conditions.
+// See Agner's Fog "The microarchitecture of Intel, AMD and VIA CPUs",
+// section "Skylake Pipeline" > "Register allocation and renaming".
+// These can be investigated with llvm-exegesis, e.g.
+// echo 'pxor %mm0, %mm0' | /tmp/llvm-exegesis -mode=uops -snippets-file=-
+// echo 'vxorpd %xmm0, %xmm0, %xmm1' | /tmp/llvm-exegesis -mode=uops -snippets-file=-
+
+def SKLWriteZeroLatency : SchedWriteRes<[]> {
+ let Latency = 0;
+}
+
+def SKLWriteZeroIdiom : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKLWriteZeroLatency]>,
+ SchedVar<NoSchedPred, [WriteALU]>
+]>;
+def : InstRW<[SKLWriteZeroIdiom], (instrs SUB32rr, SUB64rr,
+ XOR32rr, XOR64rr)>;
+
+def SKLWriteFZeroIdiom : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKLWriteZeroLatency]>,
+ SchedVar<NoSchedPred, [WriteFLogic]>
+]>;
+def : InstRW<[SKLWriteFZeroIdiom], (instrs XORPSrr, VXORPSrr, XORPDrr,
+ VXORPDrr)>;
+
+def SKLWriteFZeroIdiomY : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKLWriteZeroLatency]>,
+ SchedVar<NoSchedPred, [WriteFLogicY]>
+]>;
+def : InstRW<[SKLWriteFZeroIdiomY], (instrs VXORPSYrr, VXORPDYrr)>;
+
+def SKLWriteVZeroIdiomLogicX : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKLWriteZeroLatency]>,
+ SchedVar<NoSchedPred, [WriteVecLogicX]>
+]>;
+def : InstRW<[SKLWriteVZeroIdiomLogicX], (instrs PXORrr, VPXORrr)>;
+
+def SKLWriteVZeroIdiomLogicY : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKLWriteZeroLatency]>,
+ SchedVar<NoSchedPred, [WriteVecLogicY]>
+]>;
+def : InstRW<[SKLWriteVZeroIdiomLogicY], (instrs VPXORYrr)>;
+
+def SKLWriteVZeroIdiomALUX : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKLWriteZeroLatency]>,
+ SchedVar<NoSchedPred, [WriteVecALUX]>
+]>;
+def : InstRW<[SKLWriteVZeroIdiomALUX], (instrs PCMPGTBrr, VPCMPGTBrr,
+ PCMPGTDrr, VPCMPGTDrr,
+ PCMPGTWrr, VPCMPGTWrr)>;
+
+def SKLWriteVZeroIdiomALUY : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKLWriteZeroLatency]>,
+ SchedVar<NoSchedPred, [WriteVecALUY]>
+]>;
+def : InstRW<[SKLWriteVZeroIdiomALUY], (instrs VPCMPGTBYrr,
+ VPCMPGTDYrr,
+ VPCMPGTWYrr)>;
+
+def SKLWritePSUB : SchedWriteRes<[SKLPort015]> {
+ let Latency = 1;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+
+def SKLWriteVZeroIdiomPSUB : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKLWriteZeroLatency]>,
+ SchedVar<NoSchedPred, [SKLWritePSUB]>
+]>;
+def : InstRW<[SKLWriteVZeroIdiomPSUB], (instrs PSUBBrr, VPSUBBrr,
+ PSUBDrr, VPSUBDrr,
+ PSUBQrr, VPSUBQrr,
+ PSUBWrr, VPSUBWrr,
+ VPSUBBYrr,
+ VPSUBDYrr,
+ VPSUBQYrr,
+ VPSUBWYrr)>;
+
+def SKLWritePCMPGTQ : SchedWriteRes<[SKLPort5]> {
+ let Latency = 3;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+
+def SKLWriteVZeroIdiomPCMPGTQ : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKLWriteZeroLatency]>,
+ SchedVar<NoSchedPred, [SKLWritePCMPGTQ]>
+]>;
+def : InstRW<[SKLWriteVZeroIdiomPCMPGTQ], (instrs PCMPGTQrr, VPCMPGTQrr,
+ VPCMPGTQYrr)>;
+
+
+// CMOVs that use both Z and C flag require an extra uop.
+def SKLWriteCMOVA_CMOVBErr : SchedWriteRes<[SKLPort06]> {
+ let Latency = 2;
+ let ResourceCycles = [2];
+ let NumMicroOps = 2;
+}
+
+def SKLWriteCMOVA_CMOVBErm : SchedWriteRes<[SKLPort23,SKLPort06]> {
+ let Latency = 7;
+ let ResourceCycles = [1,2];
+ let NumMicroOps = 3;
+}
+
+def SKLCMOVA_CMOVBErr : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<IsCMOVArr_Or_CMOVBErr>, [SKLWriteCMOVA_CMOVBErr]>,
+ SchedVar<NoSchedPred, [WriteCMOV]>
+]>;
+
+def SKLCMOVA_CMOVBErm : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<IsCMOVArm_Or_CMOVBErm>, [SKLWriteCMOVA_CMOVBErm]>,
+ SchedVar<NoSchedPred, [WriteCMOV.Folded]>
+]>;
+
+def : InstRW<[SKLCMOVA_CMOVBErr], (instrs CMOV16rr, CMOV32rr, CMOV64rr)>;
+def : InstRW<[SKLCMOVA_CMOVBErm], (instrs CMOV16rm, CMOV32rm, CMOV64rm)>;
+
+// SETCCs that use both Z and C flag require an extra uop.
+def SKLWriteSETA_SETBEr : SchedWriteRes<[SKLPort06]> {
+ let Latency = 2;
+ let ResourceCycles = [2];
+ let NumMicroOps = 2;
+}
+
+def SKLWriteSETA_SETBEm : SchedWriteRes<[SKLPort4,SKLPort237,SKLPort06]> {
+ let Latency = 3;
+ let ResourceCycles = [1,1,2];
+ let NumMicroOps = 4;
+}
+
+def SKLSETA_SETBErr : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<IsSETAr_Or_SETBEr>, [SKLWriteSETA_SETBEr]>,
+ SchedVar<NoSchedPred, [WriteSETCC]>
+]>;
+
+def SKLSETA_SETBErm : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<IsSETAm_Or_SETBEm>, [SKLWriteSETA_SETBEm]>,
+ SchedVar<NoSchedPred, [WriteSETCCStore]>
+]>;
+
+def : InstRW<[SKLSETA_SETBErr], (instrs SETCCr)>;
+def : InstRW<[SKLSETA_SETBErm], (instrs SETCCm)>;
+
} // SchedModel
diff --git a/lib/Target/X86/X86SchedSkylakeServer.td b/lib/Target/X86/X86SchedSkylakeServer.td
index ec8e4db02d8a..58caf1dacfcb 100755
--- a/lib/Target/X86/X86SchedSkylakeServer.td
+++ b/lib/Target/X86/X86SchedSkylakeServer.td
@@ -1,9 +1,8 @@
//=- X86SchedSkylake.td - X86 Skylake Server Scheduling ------*- tablegen -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -81,6 +80,8 @@ def : ReadAdvance<ReadAfterVecLd, 5>;
def : ReadAdvance<ReadAfterVecXLd, 6>;
def : ReadAdvance<ReadAfterVecYLd, 7>;
+def : ReadAdvance<ReadInt2Fpu, 0>;
+
// Many SchedWrites are defined in pairs with and without a folded load.
// Instructions with folded loads are usually micro-fused, so they only appear
// as two micro-ops when queued in the reservation station.
@@ -158,7 +159,6 @@ defm : SKXWriteResPair<WriteCRC32, [SKXPort1], 3>;
def : WriteRes<WriteLEA, [SKXPort15]>; // LEA instructions can't fold loads.
defm : SKXWriteResPair<WriteCMOV, [SKXPort06], 1, [1], 1>; // Conditional move.
-defm : SKXWriteResPair<WriteCMOV2, [SKXPort06], 2, [2], 2>; // Conditional (CF + ZF flag) move.
defm : X86WriteRes<WriteFCMOV, [SKXPort1], 3, [1], 1>; // x87 conditional move.
def : WriteRes<WriteSETCC, [SKXPort06]>; // Setcc.
def : WriteRes<WriteSETCCStore, [SKXPort06,SKXPort4,SKXPort237]> {
@@ -176,7 +176,7 @@ defm : X86WriteRes<WriteBitTestSetRegLd, [SKXPort0156,SKXPort23], 5, [1,1], 2>;
// Integer shifts and rotates.
defm : SKXWriteResPair<WriteShift, [SKXPort06], 1>;
defm : SKXWriteResPair<WriteShiftCL, [SKXPort06], 3, [3], 3>;
-defm : SKXWriteResPair<WriteRotate, [SKXPort06], 2, [2], 2>;
+defm : SKXWriteResPair<WriteRotate, [SKXPort06], 1, [1], 1>;
defm : SKXWriteResPair<WriteRotateCL, [SKXPort06], 3, [3], 3>;
// SHLD/SHRD.
@@ -680,8 +680,7 @@ def: InstRW<[SKXWriteResGroup9], (instregex "VBLENDMPD(Z128|Z256)rr",
"VPBLENDMD(Z128|Z256)rr",
"VPBLENDMQ(Z128|Z256)rr",
"VPBLENDMW(Z128|Z256)rr",
- "VPSUB(B|D|Q|W)(Y|Z|Z128|Z256)rr",
- "(V?)PSUB(B|D|Q|W)rr",
+ "VPSUB(B|D|Q|W)(Y|Z|Z128|Z256)rrk",
"VPTERNLOGD(Z|Z128|Z256)rri",
"VPTERNLOGQ(Z|Z128|Z256)rri")>;
@@ -722,13 +721,6 @@ def SKXWriteResGroup14 : SchedWriteRes<[SKXPort05]> {
def: InstRW<[SKXWriteResGroup14], (instrs FDECSTP,
MMX_MOVDQ2Qrr)>;
-def SKXWriteResGroup15 : SchedWriteRes<[SKXPort06]> {
- let Latency = 2;
- let NumMicroOps = 2;
- let ResourceCycles = [2];
-}
-def: InstRW<[SKXWriteResGroup15], (instregex "SET(A|BE)r")>;
-
def SKXWriteResGroup17 : SchedWriteRes<[SKXPort0156]> {
let Latency = 2;
let NumMicroOps = 2;
@@ -759,9 +751,10 @@ def SKXWriteResGroup23 : SchedWriteRes<[SKXPort06,SKXPort0156]> {
}
def: InstRW<[SKXWriteResGroup23], (instrs CWD,
JCXZ, JECXZ, JRCXZ,
- ADC8i8, SBB8i8)>;
-def: InstRW<[SKXWriteResGroup23], (instregex "ADC8ri",
- "SBB8ri")>;
+ ADC8i8, SBB8i8,
+ ADC16i16, SBB16i16,
+ ADC32i32, SBB32i32,
+ ADC64i32, SBB64i32)>;
def SKXWriteResGroup25 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort237]> {
let Latency = 2;
@@ -834,7 +827,6 @@ def: InstRW<[SKXWriteResGroup32], (instregex "(ADD|SUB|SUBR)_(FPrST0|FST0r|FrST0
"VPCMPD(Z|Z128|Z256)rri",
"VPCMPEQ(B|D|Q|W)(Z|Z128|Z256)rr",
"VPCMPGT(B|D|Q|W)(Z|Z128|Z256)rr",
- "(V?)PCMPGTQ(Y?)rr",
"VPCMPQ(Z|Z128|Z256)rri",
"VPCMPU(B|D|Q|W)(Z|Z128|Z256)rri",
"VPCMPW(Z|Z128|Z256)rri",
@@ -900,13 +892,6 @@ def SKXWriteResGroup45 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort237]> {
}
def: InstRW<[SKXWriteResGroup45], (instrs FNSTSWm)>;
-def SKXWriteResGroup46 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort06]> {
- let Latency = 3;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,2];
-}
-def: InstRW<[SKXWriteResGroup46], (instregex "SET(A|BE)m")>;
-
def SKXWriteResGroup47 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort237,SKXPort0156]> {
let Latency = 3;
let NumMicroOps = 4;
@@ -1446,6 +1431,14 @@ def SKXWriteResGroup107 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06
def: InstRW<[SKXWriteResGroup107], (instregex "ROL(8|16|32|64)m(1|i)",
"ROR(8|16|32|64)m(1|i)")>;
+def SKXWriteResGroup107_1 : SchedWriteRes<[SKXPort06]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+ let ResourceCycles = [2];
+}
+def: InstRW<[SKXWriteResGroup107_1], (instrs ROL8r1, ROL16r1, ROL32r1, ROL64r1,
+ ROR8r1, ROR16r1, ROR32r1, ROR64r1)>;
+
def SKXWriteResGroup108 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort0156]> {
let Latency = 7;
let NumMicroOps = 5;
@@ -2463,4 +2456,171 @@ def: InstRW<[SKXWriteResGroup267], (instrs PAUSE)>;
def: InstRW<[WriteZero], (instrs CLC)>;
+
+// Intruction variants handled by the renamer. These might not need execution
+// ports in certain conditions.
+// See Agner's Fog "The microarchitecture of Intel, AMD and VIA CPUs",
+// section "Skylake Pipeline" > "Register allocation and renaming".
+// These can be investigated with llvm-exegesis, e.g.
+// echo 'pxor %mm0, %mm0' | /tmp/llvm-exegesis -mode=uops -snippets-file=-
+// echo 'vxorpd %xmm0, %xmm0, %xmm1' | /tmp/llvm-exegesis -mode=uops -snippets-file=-
+
+def SKXWriteZeroLatency : SchedWriteRes<[]> {
+ let Latency = 0;
+}
+
+def SKXWriteZeroIdiom : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>,
+ SchedVar<NoSchedPred, [WriteALU]>
+]>;
+def : InstRW<[SKXWriteZeroIdiom], (instrs SUB32rr, SUB64rr,
+ XOR32rr, XOR64rr)>;
+
+def SKXWriteFZeroIdiom : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>,
+ SchedVar<NoSchedPred, [WriteFLogic]>
+]>;
+def : InstRW<[SKXWriteFZeroIdiom], (instrs XORPSrr, VXORPSrr,
+ XORPDrr, VXORPDrr,
+ VXORPSZ128rr,
+ VXORPDZ128rr)>;
+
+def SKXWriteFZeroIdiomY : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>,
+ SchedVar<NoSchedPred, [WriteFLogicY]>
+]>;
+def : InstRW<[SKXWriteFZeroIdiomY], (instrs VXORPSYrr, VXORPDYrr,
+ VXORPSZ256rr, VXORPDZ256rr)>;
+
+def SKXWriteFZeroIdiomZ : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>,
+ SchedVar<NoSchedPred, [WriteFLogicZ]>
+]>;
+def : InstRW<[SKXWriteFZeroIdiomZ], (instrs VXORPSZrr, VXORPDZrr)>;
+
+def SKXWriteVZeroIdiomLogicX : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>,
+ SchedVar<NoSchedPred, [WriteVecLogicX]>
+]>;
+def : InstRW<[SKXWriteVZeroIdiomLogicX], (instrs PXORrr, VPXORrr,
+ VPXORDZ128rr, VPXORQZ128rr)>;
+
+def SKXWriteVZeroIdiomLogicY : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>,
+ SchedVar<NoSchedPred, [WriteVecLogicY]>
+]>;
+def : InstRW<[SKXWriteVZeroIdiomLogicY], (instrs VPXORYrr,
+ VPXORDZ256rr, VPXORQZ256rr)>;
+
+def SKXWriteVZeroIdiomLogicZ : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>,
+ SchedVar<NoSchedPred, [WriteVecLogicZ]>
+]>;
+def : InstRW<[SKXWriteVZeroIdiomLogicZ], (instrs VPXORDZrr, VPXORQZrr)>;
+
+def SKXWriteVZeroIdiomALUX : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>,
+ SchedVar<NoSchedPred, [WriteVecALUX]>
+]>;
+def : InstRW<[SKXWriteVZeroIdiomALUX], (instrs PCMPGTBrr, VPCMPGTBrr,
+ PCMPGTDrr, VPCMPGTDrr,
+ PCMPGTWrr, VPCMPGTWrr)>;
+
+def SKXWriteVZeroIdiomALUY : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>,
+ SchedVar<NoSchedPred, [WriteVecALUY]>
+]>;
+def : InstRW<[SKXWriteVZeroIdiomALUY], (instrs VPCMPGTBYrr,
+ VPCMPGTDYrr,
+ VPCMPGTWYrr)>;
+
+def SKXWritePSUB : SchedWriteRes<[SKXPort015]> {
+ let Latency = 1;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+
+def SKXWriteVZeroIdiomPSUB : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>,
+ SchedVar<NoSchedPred, [SKXWritePSUB]>
+]>;
+
+def : InstRW<[SKXWriteVZeroIdiomPSUB], (instrs PSUBBrr, VPSUBBrr, VPSUBBZ128rr,
+ PSUBDrr, VPSUBDrr, VPSUBDZ128rr,
+ PSUBQrr, VPSUBQrr, VPSUBQZ128rr,
+ PSUBWrr, VPSUBWrr, VPSUBWZ128rr,
+ VPSUBBYrr, VPSUBBZ256rr,
+ VPSUBDYrr, VPSUBDZ256rr,
+ VPSUBQYrr, VPSUBQZ256rr,
+ VPSUBWYrr, VPSUBWZ256rr,
+ VPSUBBZrr,
+ VPSUBDZrr,
+ VPSUBQZrr,
+ VPSUBWZrr)>;
+def SKXWritePCMPGTQ : SchedWriteRes<[SKXPort5]> {
+ let Latency = 3;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+
+def SKXWriteVZeroIdiomPCMPGTQ : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>,
+ SchedVar<NoSchedPred, [SKXWritePCMPGTQ]>
+]>;
+def : InstRW<[SKXWriteVZeroIdiomPCMPGTQ], (instrs PCMPGTQrr, VPCMPGTQrr,
+ VPCMPGTQYrr)>;
+
+
+// CMOVs that use both Z and C flag require an extra uop.
+def SKXWriteCMOVA_CMOVBErr : SchedWriteRes<[SKXPort06]> {
+ let Latency = 2;
+ let ResourceCycles = [2];
+ let NumMicroOps = 2;
+}
+
+def SKXWriteCMOVA_CMOVBErm : SchedWriteRes<[SKXPort23,SKXPort06]> {
+ let Latency = 7;
+ let ResourceCycles = [1,2];
+ let NumMicroOps = 3;
+}
+
+def SKXCMOVA_CMOVBErr : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<IsCMOVArr_Or_CMOVBErr>, [SKXWriteCMOVA_CMOVBErr]>,
+ SchedVar<NoSchedPred, [WriteCMOV]>
+]>;
+
+def SKXCMOVA_CMOVBErm : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<IsCMOVArm_Or_CMOVBErm>, [SKXWriteCMOVA_CMOVBErm]>,
+ SchedVar<NoSchedPred, [WriteCMOV.Folded]>
+]>;
+
+def : InstRW<[SKXCMOVA_CMOVBErr], (instrs CMOV16rr, CMOV32rr, CMOV64rr)>;
+def : InstRW<[SKXCMOVA_CMOVBErm], (instrs CMOV16rm, CMOV32rm, CMOV64rm)>;
+
+// SETCCs that use both Z and C flag require an extra uop.
+def SKXWriteSETA_SETBEr : SchedWriteRes<[SKXPort06]> {
+ let Latency = 2;
+ let ResourceCycles = [2];
+ let NumMicroOps = 2;
+}
+
+def SKXWriteSETA_SETBEm : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort06]> {
+ let Latency = 3;
+ let ResourceCycles = [1,1,2];
+ let NumMicroOps = 4;
+}
+
+def SKXSETA_SETBErr : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<IsSETAr_Or_SETBEr>, [SKXWriteSETA_SETBEr]>,
+ SchedVar<NoSchedPred, [WriteSETCC]>
+]>;
+
+def SKXSETA_SETBErm : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<IsSETAm_Or_SETBEm>, [SKXWriteSETA_SETBEm]>,
+ SchedVar<NoSchedPred, [WriteSETCCStore]>
+]>;
+
+def : InstRW<[SKXSETA_SETBErr], (instrs SETCCr)>;
+def : InstRW<[SKXSETA_SETBErm], (instrs SETCCm)>;
+
} // SchedModel
diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td
index 25aa83f96d3a..55ca85ec1e3d 100644
--- a/lib/Target/X86/X86Schedule.td
+++ b/lib/Target/X86/X86Schedule.td
@@ -1,9 +1,8 @@
//===-- X86Schedule.td - X86 Scheduling Definitions --------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -18,6 +17,12 @@ def ReadAfterVecLd : SchedRead;
def ReadAfterVecXLd : SchedRead;
def ReadAfterVecYLd : SchedRead;
+// Instructions that move data between general purpose registers and vector
+// registers may be subject to extra latency due to data bypass delays.
+// This SchedRead describes a bypass delay caused by data being moved from the
+// integer unit to the floating point unit.
+def ReadInt2Fpu : SchedRead;
+
// Instructions with both a load and a store folded are modeled as a folded
// load + WriteRMW.
def WriteRMW : SchedWrite;
@@ -158,7 +163,6 @@ defm WritePOPCNT : X86SchedWritePair; // Bit population count.
defm WriteLZCNT : X86SchedWritePair; // Leading zero count.
defm WriteTZCNT : X86SchedWritePair; // Trailing zero count.
defm WriteCMOV : X86SchedWritePair; // Conditional move.
-defm WriteCMOV2 : X86SchedWritePair; // Conditional (CF + ZF flag) move.
def WriteFCMOV : SchedWrite; // X87 conditional move.
def WriteSETCC : SchedWrite; // Set register based on condition code.
def WriteSETCCStore : SchedWrite;
diff --git a/lib/Target/X86/X86ScheduleAtom.td b/lib/Target/X86/X86ScheduleAtom.td
index 1589ff2ef402..b0334655de7e 100644
--- a/lib/Target/X86/X86ScheduleAtom.td
+++ b/lib/Target/X86/X86ScheduleAtom.td
@@ -1,9 +1,8 @@
//===- X86ScheduleAtom.td - X86 Atom Scheduling Definitions -*- tablegen -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -47,6 +46,8 @@ def : ReadAdvance<ReadAfterVecLd, 3>;
def : ReadAdvance<ReadAfterVecXLd, 3>;
def : ReadAdvance<ReadAfterVecYLd, 3>;
+def : ReadAdvance<ReadInt2Fpu, 0>;
+
// Many SchedWrites are defined in pairs with and without a folded load.
// Instructions with folded loads are usually micro-fused, so they only appear
// as two micro-ops when dispatched by the schedulers.
@@ -112,7 +113,6 @@ defm : AtomWriteResPair<WriteIDiv64, [AtomPort01], [AtomPort01],130,130,[130],[1
defm : X86WriteResPairUnsupported<WriteCRC32>;
defm : AtomWriteResPair<WriteCMOV, [AtomPort01], [AtomPort0]>;
-defm : AtomWriteResPair<WriteCMOV2, [AtomPort01], [AtomPort0]>;
defm : X86WriteRes<WriteFCMOV, [AtomPort01], 9, [9], 1>; // x87 conditional move.
def : WriteRes<WriteSETCC, [AtomPort01]>;
@@ -740,7 +740,7 @@ def AtomWrite01_45 : SchedWriteRes<[AtomPort01]> {
let Latency = 45;
let ResourceCycles = [45];
}
-def : InstRW<[AtomWrite01_45], (instrs MONITORrrr)>;
+def : InstRW<[AtomWrite01_45], (instrs MONITOR32rrr, MONITOR64rrr)>;
def AtomWrite01_46 : SchedWriteRes<[AtomPort01]> {
let Latency = 46;
diff --git a/lib/Target/X86/X86ScheduleBdVer2.td b/lib/Target/X86/X86ScheduleBdVer2.td
index 5798e1b2671b..8cc01c3acece 100644
--- a/lib/Target/X86/X86ScheduleBdVer2.td
+++ b/lib/Target/X86/X86ScheduleBdVer2.td
@@ -1,9 +1,8 @@
//=- X86ScheduleBdVer2.td - X86 BdVer2 (Piledriver) Scheduling * tablegen -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -209,7 +208,10 @@ multiclass __pdWriteResPair<X86FoldableSchedWrite SchedRW,
!add(Lat, LoadLat),
!if(!and(!empty(Res), !eq(LoadRes, 1)),
[],
- !listconcat([LoadRes], Res)),
+ !listconcat([LoadRes],
+ !if(!empty(Res),
+ !listsplat(1, !size(ExePorts)),
+ Res))),
!add(UOps, LoadUOps)>;
}
@@ -218,7 +220,7 @@ multiclass PdWriteResExPair<X86FoldableSchedWrite SchedRW,
list<int> Res = [], int UOps = 1,
int LoadUOps = 0> {
defm : __pdWriteResPair<SchedRW, ExePorts, Lat, Res, UOps,
- /*LoadLat*/4, /*LoadRes*/1, LoadUOps>;
+ /*LoadLat*/4, /*LoadRes*/3, LoadUOps>;
}
multiclass PdWriteResXMMPair<X86FoldableSchedWrite SchedRW,
@@ -226,15 +228,15 @@ multiclass PdWriteResXMMPair<X86FoldableSchedWrite SchedRW,
list<int> Res = [], int UOps = 1,
int LoadUOps = 0> {
defm : __pdWriteResPair<SchedRW, ExePorts, Lat, Res, UOps,
- /*LoadLat*/5, /*LoadRes*/1, LoadUOps>;
+ /*LoadLat*/5, /*LoadRes*/3, LoadUOps>;
}
multiclass PdWriteResYMMPair<X86FoldableSchedWrite SchedRW,
list<ProcResourceKind> ExePorts, int Lat,
- list<int> Res, int UOps = 2,
+ list<int> Res = [], int UOps = 2,
int LoadUOps = 0> {
defm : __pdWriteResPair<SchedRW, ExePorts, Lat, Res, UOps,
- /*LoadLat*/5, /*LoadRes*/2, LoadUOps>;
+ /*LoadLat*/5, /*LoadRes*/3, LoadUOps>;
}
//===----------------------------------------------------------------------===//
@@ -251,6 +253,11 @@ def : ReadAdvance<ReadAfterVecLd, 5>;
def : ReadAdvance<ReadAfterVecXLd, 5>;
def : ReadAdvance<ReadAfterVecYLd, 5>;
+// Transfer from int domain to ivec domain incurs additional latency of 8..10cy
+// Reference: Agner, Microarchitecture, "AMD Bulldozer, Piledriver, Steamroller
+// and Excavator pipeline", "Data delay between different execution domains"
+def : ReadAdvance<ReadInt2Fpu, -10>;
+
// A folded store needs a cycle on the PdStore for the store data.
def : WriteRes<WriteRMW, [PdStore]>;
@@ -258,15 +265,15 @@ def : WriteRes<WriteRMW, [PdStore]>;
// Loads, stores, and moves, not folded with other operations.
////////////////////////////////////////////////////////////////////////////////
-def : WriteRes<WriteLoad, [PdLoad]> { let Latency = 5; }
+def : WriteRes<WriteLoad, [PdLoad]> { let Latency = 5; let ResourceCycles = [2]; }
def : WriteRes<WriteStore, [PdStore]>;
def : WriteRes<WriteStoreNT, [PdStore]>;
-def : WriteRes<WriteMove, [PdEX01]>;
+def : WriteRes<WriteMove, [PdEX01]> { let ResourceCycles = [2]; }
// Load/store MXCSR.
// FIXME: These are copy and pasted from WriteLoad/Store.
def : WriteRes<WriteLDMXCSR, [PdLoad]> { let Latency = 5; }
-def : WriteRes<WriteSTMXCSR, [PdStore]> { let NumMicroOps = 2; }
+def : WriteRes<WriteSTMXCSR, [PdStore]> { let NumMicroOps = 2; let ResourceCycles = [18]; }
// Treat misc copies as a move.
def : InstRW<[WriteMove], (instrs COPY)>;
@@ -300,6 +307,7 @@ def : InstRW<[PdWriteXLAT], (instrs XLAT)>;
def PdWriteLARrr : SchedWriteRes<[PdEX01]> {
let Latency = 184;
+ let ResourceCycles = [375];
let NumMicroOps = 45;
}
def : InstRW<[PdWriteLARrr], (instregex "LAR(16|32|64)rr",
@@ -307,22 +315,31 @@ def : InstRW<[PdWriteLARrr], (instregex "LAR(16|32|64)rr",
// Nops don't have dependencies, so there's no actual latency, but we set this
// to '1' to tell the scheduler that the nop uses an ALU slot for a cycle.
-def : WriteRes<WriteNop, [PdEX01]>;
+def : WriteRes<WriteNop, [PdEX01]> { let ResourceCycles = [2]; }
////////////////////////////////////////////////////////////////////////////////
// Arithmetic.
////////////////////////////////////////////////////////////////////////////////
-defm : PdWriteResExPair<WriteALU, [PdEX01]>;
+defm : PdWriteResExPair<WriteALU, [PdEX01], 1, [2]>;
+
+def PdWriteALURMW : SchedWriteRes<[PdLoad, PdEX01, PdStore]> {
+ let Latency = 6;
+ let ResourceCycles = [3, 2, 1];
+ let NumMicroOps = 1;
+}
+def : SchedAlias<WriteALURMW, PdWriteALURMW>;
def PdWriteLXADD : SchedWriteRes<[PdEX01]> {
let Latency = 6;
+ let ResourceCycles = [88];
let NumMicroOps = 4;
}
def : InstRW<[PdWriteLXADD], (instrs LXADD8, LXADD16, LXADD32, LXADD64)>;
def PdWriteBMI1 : SchedWriteRes<[PdEX01]> {
let Latency = 2;
+ let ResourceCycles = [2];
let NumMicroOps = 2;
}
def : InstRW<[PdWriteBMI1],
@@ -332,8 +349,9 @@ def : InstRW<[PdWriteBMI1],
BLSIC32rr, BLSIC64rr, T1MSKC32rr, T1MSKC64rr,
TZMSK32rr, TZMSK64rr)>;
-def PdWriteBMI1m : SchedWriteRes<[PdEX01]> {
+def PdWriteBMI1m : SchedWriteRes<[PdLoad, PdEX01]> {
let Latency = 6;
+ let ResourceCycles = [3, 3];
let NumMicroOps = 2;
}
def : InstRW<[PdWriteBMI1m],
@@ -345,26 +363,34 @@ def : InstRW<[PdWriteBMI1m],
defm : PdWriteResExPair<WriteADC, [PdEX01], 1, [2]>;
-defm : PdWriteRes<WriteBSWAP32, [PdEX1]>;
-defm : PdWriteRes<WriteBSWAP64, [PdEX1]>;
-defm : PdWriteRes<WriteCMPXCHG, [PdEX1], 3, [], 5>;
-defm : PdWriteRes<WriteCMPXCHGRMW, [PdEX1, PdStore, PdLoad], 3, [], 2>;
-defm : PdWriteRes<WriteXCHG, [PdEX1], 1, [], 2>;
+def PdWriteADCSBB64ri32 : SchedWriteRes<[PdEX01]> {
+ let ResourceCycles = [3];
+}
+def : InstRW<[PdWriteADCSBB64ri32], (instrs ADC64ri32, SBB64ri32)>;
+
+defm : PdWriteRes<WriteBSWAP32, [PdEX01]>;
+defm : PdWriteRes<WriteBSWAP64, [PdEX01]>;
+defm : PdWriteRes<WriteCMPXCHG, [PdEX1], 3, [3], 5>;
+defm : PdWriteRes<WriteCMPXCHGRMW, [PdEX1, PdStore, PdLoad], 3, [44, 1, 1], 2>;
+defm : PdWriteRes<WriteXCHG, [PdEX1], 1, [], 2>;
def PdWriteCMPXCHG8rr : SchedWriteRes<[PdEX1]> {
let Latency = 3;
+ let ResourceCycles = [3];
let NumMicroOps = 3;
}
def : InstRW<[PdWriteCMPXCHG8rr], (instrs CMPXCHG8rr)>;
def PdWriteCMPXCHG8rm : SchedWriteRes<[PdEX1]> {
let Latency = 3;
+ let ResourceCycles = [23];
let NumMicroOps = 5;
}
def : InstRW<[PdWriteCMPXCHG8rm], (instrs CMPXCHG8rm)>;
def PdWriteCMPXCHG16rm_CMPXCHG32rm_CMPXCHG64rm : SchedWriteRes<[PdEX1]> {
let Latency = 3;
+ let ResourceCycles = [21];
let NumMicroOps = 6;
}
def : InstRW<[PdWriteCMPXCHG16rm_CMPXCHG32rm_CMPXCHG64rm],
@@ -372,42 +398,40 @@ def : InstRW<[PdWriteCMPXCHG16rm_CMPXCHG32rm_CMPXCHG64rm],
def PdWriteCMPXCHG8B : SchedWriteRes<[PdEX1]> {
let Latency = 3;
+ let ResourceCycles = [26];
let NumMicroOps = 18;
}
def : InstRW<[PdWriteCMPXCHG8B], (instrs CMPXCHG8B)>;
def PdWriteCMPXCHG16B : SchedWriteRes<[PdEX1]> {
let Latency = 3;
+ let ResourceCycles = [69];
let NumMicroOps = 22;
}
def : InstRW<[PdWriteCMPXCHG16B], (instrs CMPXCHG16B)>;
-def PdWriteXCHG16rr : SchedWriteRes<[PdEX1]> {
- let Latency = 2;
- let NumMicroOps = 2;
-}
-def : InstRW<[PdWriteXCHG16rr], (instrs XCHG16rr)>;
-
def PdWriteXADD : SchedWriteRes<[PdEX1]> {
- let Latency = 2;
- let NumMicroOps = 4;
+ let Latency = 1;
+ let ResourceCycles = [1];
+ let NumMicroOps = 2;
}
def : InstRW<[PdWriteXADD], (instrs XADD8rr, XADD16rr, XADD32rr, XADD64rr)>;
def PdWriteXADDm : SchedWriteRes<[PdEX1]> {
-let Latency = 6;
-let NumMicroOps = 4;
+ let Latency = 6;
+ let ResourceCycles = [20];
+ let NumMicroOps = 4;
}
def : InstRW<[PdWriteXADDm], (instrs XADD8rm, XADD16rm, XADD32rm, XADD64rm)>;
-defm : PdWriteResExPair<WriteIMul8, [PdEX1, PdMul], 4>;
-defm : PdWriteResExPair<WriteIMul16, [PdEX1, PdMul], 4, [], 2>;
-defm : PdWriteResExPair<WriteIMul16Imm, [PdEX1, PdMul], 5, [], 2>;
-defm : PdWriteResExPair<WriteIMul16Reg, [PdEX1, PdMul], 4>;
-defm : PdWriteResExPair<WriteIMul32, [PdEX1, PdMul], 4>;
-defm : PdWriteResExPair<WriteIMul32Imm, [PdEX1, PdMul], 4, [], 1, 1>;
-defm : PdWriteResExPair<WriteIMul32Reg, [PdEX1, PdMul], 4>;
-defm : PdWriteResExPair<WriteIMul64, [PdEX1, PdMul], 6, [1, 4]>;
+defm : PdWriteResExPair<WriteIMul8, [PdEX1, PdMul], 4, [1, 4]>;
+defm : PdWriteResExPair<WriteIMul16, [PdEX1, PdMul], 4, [1, 5], 2>;
+defm : PdWriteResExPair<WriteIMul16Imm, [PdEX1, PdMul], 5, [1, 5], 2>;
+defm : PdWriteResExPair<WriteIMul16Reg, [PdEX1, PdMul], 4, [1, 2]>;
+defm : PdWriteResExPair<WriteIMul32, [PdEX1, PdMul], 4, [1, 4]>;
+defm : PdWriteResExPair<WriteIMul32Imm, [PdEX1, PdMul], 4, [1, 2], 1, 1>;
+defm : PdWriteResExPair<WriteIMul32Reg, [PdEX1, PdMul], 4, [1, 2]>;
+defm : PdWriteResExPair<WriteIMul64, [PdEX1, PdMul], 6, [1, 6]>;
defm : PdWriteResExPair<WriteIMul64Imm, [PdEX1, PdMul], 6, [1, 4],1, 1>;
defm : PdWriteResExPair<WriteIMul64Reg, [PdEX1, PdMul], 6, [1, 4]>;
defm : X86WriteResUnsupported<WriteIMulH>; // BMI2 MULX
@@ -422,36 +446,48 @@ defm : PdWriteResExPair<WriteIDiv16, [PdEX1, PdDiv], 15, [1, 17],
defm : PdWriteResExPair<WriteIDiv32, [PdEX1, PdDiv], 14, [1, 25], 2>;
defm : PdWriteResExPair<WriteIDiv64, [PdEX1, PdDiv], 14, [1, 14], 2>;
-defm : PdWriteResExPair<WriteCRC32, [PdEX01], 3, [4], 3>;
+defm : PdWriteResExPair<WriteCRC32, [PdEX01], 2, [4], 3>;
def PdWriteCRC32r32r16 : SchedWriteRes<[PdEX01]> {
let Latency = 5;
- let ResourceCycles = [4];
+ let ResourceCycles = [10];
let NumMicroOps = 5;
}
def : InstRW<[PdWriteCRC32r32r16], (instrs CRC32r32r16)>;
def PdWriteCRC32r32r32 : SchedWriteRes<[PdEX01]> {
let Latency = 6;
- let ResourceCycles = [4];
+ let ResourceCycles = [12];
let NumMicroOps = 7;
}
def : InstRW<[PdWriteCRC32r32r32], (instrs CRC32r32r32)>;
def PdWriteCRC32r64r64 : SchedWriteRes<[PdEX01]> {
let Latency = 10;
- let ResourceCycles = [4];
+ let ResourceCycles = [17];
let NumMicroOps = 11;
}
def : InstRW<[PdWriteCRC32r64r64], (instrs CRC32r64r64)>;
defm : PdWriteResExPair<WriteCMOV, [PdEX01]>; // Conditional move.
-defm : PdWriteResExPair<WriteCMOV2, [PdEX01], 1, [], 1, 1>; // Conditional (CF + ZF flag) move.
-def : InstRW<[WriteCMOV2.Folded], (instrs CMOVG16rm, CMOVG32rm, CMOVG64rm,
- CMOVGE16rm, CMOVGE32rm, CMOVGE64rm,
- CMOVL16rm, CMOVL32rm, CMOVL64rm,
- CMOVLE16rm, CMOVLE32rm, CMOVLE64rm)>;
+def PdWriteCMOVm : SchedWriteRes<[PdLoad, PdEX01]> {
+ let Latency = 5;
+ let ResourceCycles = [3, 3];
+ let NumMicroOps = 2;
+}
+
+def PdWriteCMOVmVar : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<CheckImmOperand_s<7, "X86::COND_BE">>, [PdWriteCMOVm]>,
+ SchedVar<MCSchedPredicate<CheckImmOperand_s<7, "X86::COND_A">>, [PdWriteCMOVm]>,
+ SchedVar<MCSchedPredicate<CheckImmOperand_s<7, "X86::COND_L">>, [PdWriteCMOVm]>,
+ SchedVar<MCSchedPredicate<CheckImmOperand_s<7, "X86::COND_GE">>, [PdWriteCMOVm]>,
+ SchedVar<MCSchedPredicate<CheckImmOperand_s<7, "X86::COND_LE">>, [PdWriteCMOVm]>,
+ SchedVar<MCSchedPredicate<CheckImmOperand_s<7, "X86::COND_G">>, [PdWriteCMOVm]>,
+ SchedVar<NoSchedPred, [WriteCMOV.Folded]>
+]>;
+
+def : InstRW<[PdWriteCMOVmVar], (instrs CMOV16rm, CMOV32rm, CMOV64rm)>;
defm : PdWriteRes<WriteFCMOV, [PdFPU0, PdFPFMA]>; // x87 conditional move.
@@ -462,107 +498,143 @@ def PdWriteSETGEmSETGmSETLEmSETLm : SchedWriteRes<[PdEX01]> {
let ResourceCycles = [2];
let NumMicroOps = 2;
}
-def : InstRW<[PdWriteSETGEmSETGmSETLEmSETLm], (instrs SETGEm, SETGm,
- SETLEm, SETLm)>;
-defm : PdWriteRes<WriteLAHFSAHF, [PdEX01], 2, [], 2>;
+def PdSETGEmSETGmSETLEmSETLm : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<CheckImmOperand_s<5, "X86::COND_GE">>, [PdWriteSETGEmSETGmSETLEmSETLm]>,
+ SchedVar<MCSchedPredicate<CheckImmOperand_s<5, "X86::COND_G">>, [PdWriteSETGEmSETGmSETLEmSETLm]>,
+ SchedVar<MCSchedPredicate<CheckImmOperand_s<5, "X86::COND_LE">>, [PdWriteSETGEmSETGmSETLEmSETLm]>,
+ SchedVar<MCSchedPredicate<CheckImmOperand_s<5, "X86::COND_L">>, [PdWriteSETGEmSETGmSETLEmSETLm]>,
+ SchedVar<NoSchedPred, [WriteSETCCStore]>
+]>;
+def : InstRW<[PdSETGEmSETGmSETLEmSETLm], (instrs SETCCm)>;
+
+defm : PdWriteRes<WriteLAHFSAHF, [PdEX01], 2, [4], 2>;
-def WriteLAHF : SchedWriteRes<[PdEX01]> {
+def PdWriteLAHF : SchedWriteRes<[PdEX01]> {
let Latency = 2;
+ let ResourceCycles = [4];
let NumMicroOps = 4;
}
-def : InstRW<[WriteLAHF], (instrs LAHF)>;
+def : InstRW<[PdWriteLAHF], (instrs LAHF)>;
-def WriteSAHF : SchedWriteRes<[PdEX01]> {
+def PdWriteSAHF : SchedWriteRes<[PdEX01]> {
let Latency = 2;
+ let ResourceCycles = [2];
let NumMicroOps = 2;
}
-def : InstRW<[WriteSAHF], (instrs SAHF)>;
+def : InstRW<[PdWriteSAHF], (instrs SAHF)>;
+
+defm : PdWriteRes<WriteBitTest, [PdEX01], 1, [2], 1>;
+defm : PdWriteRes<WriteBitTestImmLd, [PdEX01, PdLoad], 5, [2, 3], 1>;
+defm : PdWriteRes<WriteBitTestRegLd, [PdEX01, PdLoad], 5, [7, 2], 7>;
+defm : PdWriteRes<WriteBitTestSet, [PdEX01], 2, [2], 2>;
+defm : PdWriteRes<WriteBitTestSetImmLd, [PdEX01, PdLoad], 6, [1, 1], 4>;
+defm : PdWriteRes<WriteBitTestSetRegLd, [PdEX01, PdLoad], 6, [1, 1], 10>;
-defm : PdWriteRes<WriteBitTest, [PdEX01], 1, [1], 1>;
-defm : PdWriteRes<WriteBitTestImmLd, [PdEX01, PdLoad], 5, [1, 1], 1>;
-defm : PdWriteRes<WriteBitTestRegLd, [PdEX01, PdLoad], 5, [1, 1], 7>;
-defm : PdWriteRes<WriteBitTestSet, [PdEX01], 2, [1], 2>;
-defm : PdWriteRes<WriteBitTestSetImmLd, [PdEX01, PdLoad], 6, [1, 1], 4>;
-defm : PdWriteRes<WriteBitTestSetImmRMW, [PdEX01, PdLoad], 6, [1, 1], 4>;
-defm : PdWriteRes<WriteBitTestSetRegLd, [PdEX01, PdLoad], 6, [1, 1], 10>;
-defm : PdWriteRes<WriteBitTestSetRegRMW, [PdEX01, PdLoad], 6, [1, 1], 10>;
+def PdWriteBTSIm : SchedWriteRes<[PdEX01, PdLoad]> {
+ let Latency = 7;
+ let ResourceCycles = [42, 1];
+ let NumMicroOps = 4;
+}
+def : SchedAlias<WriteBitTestSetImmRMW, PdWriteBTSIm>;
+def PdWriteBTSRm : SchedWriteRes<[PdEX01, PdLoad]> {
+ let Latency = 7;
+ let ResourceCycles = [44, 1];
+ let NumMicroOps = 10;
+}
+def : SchedAlias<WriteBitTestSetRegRMW, PdWriteBTSRm>;
// This is for simple LEAs with one or two input operands.
// FIXME: SAGU 3-operand LEA
def : WriteRes<WriteLEA, [PdEX01]> { let NumMicroOps = 2; }
// Bit counts.
-defm : PdWriteResExPair<WriteBSF, [PdEX01], 3, [4], 6, 2>;
-defm : PdWriteResExPair<WriteBSR, [PdEX01], 4, [4], 7, 2>;
-defm : PdWriteResExPair<WritePOPCNT, [PdEX01], 4>;
-defm : PdWriteResExPair<WriteLZCNT, [PdEX01], 2, [], 2>;
-defm : PdWriteResExPair<WriteTZCNT, [PdEX01], 2, [2], 2>;
+defm : PdWriteResExPair<WriteBSF, [PdEX01], 3, [6], 6, 2>;
+defm : PdWriteResExPair<WriteBSR, [PdEX01], 4, [8], 7, 2>;
+defm : PdWriteResExPair<WritePOPCNT, [PdEX01], 4, [4]>;
+defm : PdWriteResExPair<WriteLZCNT, [PdEX0], 2, [2], 2>;
+defm : PdWriteResExPair<WriteTZCNT, [PdEX0], 2, [2], 2>;
// BMI1 BEXTR, BMI2 BZHI
-defm : PdWriteResExPair<WriteBEXTR, [PdEX01], 2, [], 2>;
-defm : PdWriteResExPair<WriteBLS, [PdEX01], 2, [], 2>;
+defm : PdWriteResExPair<WriteBEXTR, [PdEX01], 2, [2], 2>;
+defm : PdWriteResExPair<WriteBLS, [PdEX01], 2, [2], 2>;
defm : PdWriteResExPair<WriteBZHI, [PdEX01]>;
+def PdWriteBEXTRI : SchedWriteRes<[PdEX01]> {
+ let Latency = 2;
+ let ResourceCycles = [4];
+ let NumMicroOps = 2;
+}
+def : InstRW<[PdWriteBEXTRI], (instrs BEXTRI32ri, BEXTRI64ri)>;
+
+def PdWriteBEXTRIm : SchedWriteRes<[PdEX01]> {
+ let Latency = 2;
+ let ResourceCycles = [5];
+ let NumMicroOps = 2;
+}
+def : InstRW<[PdWriteBEXTRIm], (instrs BEXTRI32mi, BEXTRI64mi)>;
+
////////////////////////////////////////////////////////////////////////////////
// Integer shifts and rotates.
////////////////////////////////////////////////////////////////////////////////
-defm : PdWriteResExPair<WriteShift, [PdEX01]>;
+defm : PdWriteResExPair<WriteShift, [PdEX01], 1, [2]>;
defm : PdWriteResExPair<WriteShiftCL, [PdEX01]>;
-defm : PdWriteResExPair<WriteRotate, [PdEX01]>;
+defm : PdWriteResExPair<WriteRotate, [PdEX01], 1, [2]>;
defm : PdWriteResExPair<WriteRotateCL, [PdEX01]>;
def PdWriteRCL8rCL : SchedWriteRes<[PdEX01]> {
let Latency = 12;
+ let ResourceCycles = [24];
let NumMicroOps = 26;
}
def : InstRW<[PdWriteRCL8rCL], (instrs RCL8rCL)>;
def PdWriteRCR8ri : SchedWriteRes<[PdEX01]> {
let Latency = 12;
+ let ResourceCycles = [23];
let NumMicroOps = 23;
}
def : InstRW<[PdWriteRCR8ri], (instrs RCR8ri)>;
def PdWriteRCR8rCL : SchedWriteRes<[PdEX01]> {
let Latency = 11;
+ let ResourceCycles = [22];
let NumMicroOps = 24;
}
def : InstRW<[PdWriteRCR8rCL], (instrs RCR8rCL)>;
def PdWriteRCL16rCL : SchedWriteRes<[PdEX01]> {
let Latency = 10;
+ let ResourceCycles = [20];
let NumMicroOps = 22;
}
def : InstRW<[PdWriteRCL16rCL], (instrs RCL16rCL)>;
def PdWriteRCR16ri : SchedWriteRes<[PdEX01]> {
let Latency = 10;
+ let ResourceCycles = [19];
let NumMicroOps = 19;
}
def : InstRW<[PdWriteRCR16ri], (instrs RCR16ri)>;
-def PdWriteRCL32rCLRCL64rCL : SchedWriteRes<[PdEX01]> {
+def PdWriteRCL3264rCL : SchedWriteRes<[PdEX01]> {
let Latency = 7;
+ let ResourceCycles = [14];
let NumMicroOps = 17;
}
-def : InstRW<[PdWriteRCL32rCLRCL64rCL], (instrs RCL32rCL, RCL64rCL)>;
+def : InstRW<[PdWriteRCL3264rCL], (instrs RCL32rCL, RCL64rCL)>;
-def PdWriteRCR64rCL : SchedWriteRes<[PdEX01]> {
+def PdWriteRCR3264rCL : SchedWriteRes<[PdEX01]> {
let Latency = 7;
+ let ResourceCycles = [13];
let NumMicroOps = 16;
}
-def : InstRW<[PdWriteRCR64rCL], (instrs RCR64rCL)>;
-
-def PdWriteRCR32rCL : SchedWriteRes<[PdEX01]> {
- let Latency = 7;
- let NumMicroOps = 16;
-}
-def : InstRW<[PdWriteRCR32rCL ], (instrs RCR32rCL)>;
+def : InstRW<[PdWriteRCR3264rCL], (instrs RCR32rCL, RCR64rCL)>;
def PdWriteRCR32riRCR64ri : SchedWriteRes<[PdEX01]> {
let Latency = 7;
+ let ResourceCycles = [14];
let NumMicroOps = 15;
}
def : InstRW<[PdWriteRCR32riRCR64ri], (instrs RCR32ri, RCR64ri)>;
@@ -570,31 +642,35 @@ def : InstRW<[PdWriteRCR32riRCR64ri], (instrs RCR32ri, RCR64ri)>;
def PdWriteRCR16rCL : SchedWriteRes<[PdEX01]> {
let Latency = 9;
+ let ResourceCycles = [18];
let NumMicroOps = 20;
}
def : InstRW<[PdWriteRCR16rCL], (instrs RCR16rCL)>;
def PdWriteRCL16ri : SchedWriteRes<[PdEX01]> {
let Latency = 11;
+ let ResourceCycles = [21];
let NumMicroOps = 21;
}
def : InstRW<[PdWriteRCL16ri], (instrs RCL16ri)>;
def PdWriteRCL3264ri : SchedWriteRes<[PdEX01]> {
let Latency = 8;
+ let ResourceCycles = [15];
let NumMicroOps = 16;
}
def : InstRW<[PdWriteRCL3264ri], (instrs RCL32ri, RCL64ri)>;
def PdWriteRCL8ri : SchedWriteRes<[PdEX01]> {
let Latency = 13;
+ let ResourceCycles = [25];
let NumMicroOps = 25;
}
def : InstRW<[PdWriteRCL8ri], (instrs RCL8ri)>;
// SHLD/SHRD.
-defm : PdWriteRes<WriteSHDrri, [PdEX01], 4, [6], 6>;
-defm : PdWriteRes<WriteSHDrrcl, [PdEX01], 4, [8], 7>;
+defm : PdWriteRes<WriteSHDrri, [PdEX01], 3, [6], 6>;
+defm : PdWriteRes<WriteSHDrrcl, [PdEX01], 3, [8], 7>;
def PdWriteSHLD32rri8SHRD16rri8 : SchedWriteRes<[PdEX01]> {
let Latency = 3;
@@ -604,8 +680,8 @@ def PdWriteSHLD32rri8SHRD16rri8 : SchedWriteRes<[PdEX01]> {
def : InstRW<[PdWriteSHLD32rri8SHRD16rri8 ], (instrs SHLD32rri8, SHRD16rri8)>;
def PdWriteSHLD16rrCLSHLD32rrCLSHRD32rrCL : SchedWriteRes<[PdEX01]> {
- let Latency = 4;
- let ResourceCycles = [8];
+ let Latency = 3;
+ let ResourceCycles = [6];
let NumMicroOps = 7;
}
def : InstRW<[PdWriteSHLD16rrCLSHLD32rrCLSHRD32rrCL], (instrs SHLD16rrCL,
@@ -623,19 +699,20 @@ defm : PdWriteRes<WriteFLD0, [PdFPU1, PdFPSTO], 3>;
defm : PdWriteRes<WriteFLD1, [PdFPU1, PdFPSTO], 3>;
defm : PdWriteRes<WriteFLDC, [PdFPU1, PdFPSTO], 3>;
-defm : PdWriteRes<WriteFLoad, [PdLoad, PdFPU01, PdFPFMA], 5>;
-defm : PdWriteRes<WriteFLoadX, [PdLoad, PdFPU01, PdFPFMA], 5>;
-defm : PdWriteRes<WriteFLoadY, [PdLoad, PdFPU01, PdFPFMA], 5, [], 2>;
+defm : PdWriteRes<WriteFLoad, [PdLoad, PdFPU01, PdFPFMA], 5, [3, 1, 3]>;
+defm : PdWriteRes<WriteFLoadX, [PdLoad, PdFPU01, PdFPFMA], 5, [3, 1, 3]>;
+defm : PdWriteRes<WriteFLoadY, [PdLoad, PdFPU01, PdFPFMA], 5, [3, 1, 3], 2>;
-defm : PdWriteRes<WriteFMaskedLoad, [PdLoad, PdFPU01, PdFPFMA], 6, [1, 1, 2]>;
-defm : PdWriteRes<WriteFMaskedLoadY, [PdLoad, PdFPU01, PdFPFMA], 6, [2, 2, 4], 2>;
+defm : PdWriteRes<WriteFMaskedLoad, [PdLoad, PdFPU01, PdFPFMA], 6, [3, 1, 4]>;
+defm : PdWriteRes<WriteFMaskedLoadY, [PdLoad, PdFPU01, PdFPFMA], 6, [3, 2, 4], 2>;
-defm : PdWriteRes<WriteFStore, [PdStore, PdFPU1, PdFPSTO], 2>;
-defm : PdWriteRes<WriteFStoreX, [PdStore, PdFPU1, PdFPSTO]>;
-defm : PdWriteRes<WriteFStoreY, [PdStore, PdFPU1, PdFPSTO], 1, [], 4>;
+defm : PdWriteRes<WriteFStore, [PdStore, PdFPU23, PdFPSTO], 2, [1, 3, 1]>;
+defm : PdWriteRes<WriteFStoreX, [PdStore, PdFPU23, PdFPSTO], 1, [1, 3, 1]>;
+defm : PdWriteRes<WriteFStoreY, [PdStore, PdFPU23, PdFPSTO], 1, [1, 36, 2], 4>;
-def PdWriteMOVHPm : SchedWriteRes<[PdStore, PdFPU1, PdFPSTO]> {
+def PdWriteMOVHPm : SchedWriteRes<[PdStore, PdFPU23, PdFPSTO]> {
let Latency = 2;
+ let ResourceCycles = [1, 3, 1];
let NumMicroOps = 2;
}
def : InstRW<[PdWriteMOVHPm], (instrs MOVHPDmr, MOVHPSmr, VMOVHPDmr, VMOVHPSmr)>;
@@ -649,33 +726,41 @@ defm : PdWriteRes<WriteFStoreNT, [PdStore, PdFPU1, PdFPSTO], 3>;
defm : PdWriteRes<WriteFStoreNTX, [PdStore, PdFPU1, PdFPSTO], 3>;
defm : PdWriteRes<WriteFStoreNTY, [PdStore, PdFPU1, PdFPSTO], 3, [2, 2, 2], 4>;
-defm : PdWriteRes<WriteFMaskedStore, [PdStore, PdFPU01, PdFPFMA], 6, [1, 1, 4], 18>;
-defm : PdWriteRes<WriteFMaskedStoreY, [PdStore, PdFPU01, PdFPFMA], 6, [2, 2, 4], 34>;
+defm : PdWriteRes<WriteFMaskedStore, [PdStore, PdFPU01, PdFPFMA], 6, [1, 1, 188], 18>;
+defm : PdWriteRes<WriteFMaskedStoreY, [PdStore, PdFPU01, PdFPFMA], 6, [2, 2, 376], 34>;
defm : PdWriteRes<WriteFMove, [PdFPU01, PdFPFMA]>;
-defm : PdWriteRes<WriteFMoveX, [PdFPU01, PdFPFMA]>;
+defm : PdWriteRes<WriteFMoveX, [PdFPU01, PdFPFMA], 1, [1, 2]>;
defm : PdWriteRes<WriteFMoveY, [PdFPU01, PdFPFMA], 2, [2, 2], 2>;
defm : PdWriteRes<WriteEMMS, [PdFPU01, PdFPFMA], 2>;
defm : PdWriteResXMMPair<WriteFAdd, [PdFPU0, PdFPFMA], 5>;
defm : PdWriteResXMMPair<WriteFAddX, [PdFPU0, PdFPFMA], 5>;
-defm : PdWriteResYMMPair<WriteFAddY, [PdFPU0, PdFPFMA], 5, [2, 1]>;
+defm : PdWriteResYMMPair<WriteFAddY, [PdFPU0, PdFPFMA], 5, [1, 2]>;
defm : X86WriteResPairUnsupported<WriteFAddZ>;
+def PdWriteX87Add: SchedWriteRes<[PdLoad, PdFPU0, PdFPFMA]> {
+ let Latency = 5;
+ let ResourceCycles = [3, 1, 10];
+}
+def : InstRW<[PdWriteX87Add], (instrs ADD_FI16m, ADD_FI32m, ADD_F32m, ADD_F64m,
+ SUB_FI16m, SUB_FI32m, SUB_F32m, SUB_F64m,
+ SUBR_FI16m, SUBR_FI32m, SUBR_F32m, SUBR_F64m)>;
+
defm : PdWriteResXMMPair<WriteFAdd64, [PdFPU0, PdFPFMA], 5>;
defm : PdWriteResXMMPair<WriteFAdd64X, [PdFPU0, PdFPFMA], 5>;
-defm : PdWriteResYMMPair<WriteFAdd64Y, [PdFPU0, PdFPFMA], 5, [2, 1]>;
+defm : PdWriteResYMMPair<WriteFAdd64Y, [PdFPU0, PdFPFMA], 5, [1, 2]>;
defm : X86WriteResPairUnsupported<WriteFAdd64Z>;
defm : PdWriteResXMMPair<WriteFCmp, [PdFPU0, PdFPFMA], 2>;
defm : PdWriteResXMMPair<WriteFCmpX, [PdFPU0, PdFPFMA], 2>;
-defm : PdWriteResYMMPair<WriteFCmpY, [PdFPU0, PdFPFMA], 2, [2, 1]>;
+defm : PdWriteResYMMPair<WriteFCmpY, [PdFPU0, PdFPFMA], 2, [1, 2]>;
defm : X86WriteResPairUnsupported<WriteFCmpZ>;
defm : PdWriteResXMMPair<WriteFCmp64, [PdFPU0, PdFPFMA], 2>;
defm : PdWriteResXMMPair<WriteFCmp64X, [PdFPU0, PdFPFMA], 2>;
-defm : PdWriteResYMMPair<WriteFCmp64Y, [PdFPU0, PdFPFMA], 2, [2, 1]>;
+defm : PdWriteResYMMPair<WriteFCmp64Y, [PdFPU0, PdFPFMA], 2, [1, 2]>;
defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
defm : PdWriteResXMMPair<WriteFCom, [PdFPU0, PdFPFMA, PdEX0], 1, [], 2>;
@@ -690,29 +775,35 @@ def : InstRW<[PdWriteTST_F_UCOM_FPPr], (instrs TST_F, UCOM_FPPr)>;
defm : PdWriteResXMMPair<WriteFMul, [PdFPU1, PdFPFMA], 5>;
defm : PdWriteResXMMPair<WriteFMulX, [PdFPU1, PdFPFMA], 5>;
-defm : PdWriteResYMMPair<WriteFMulY, [PdFPU1, PdFPFMA], 5, [2, 1]>;
+defm : PdWriteResYMMPair<WriteFMulY, [PdFPU1, PdFPFMA], 5, [1, 2]>;
defm : X86WriteResPairUnsupported<WriteFMulZ>;
+def PdWriteX87Mul: SchedWriteRes<[PdLoad, PdFPU1, PdFPFMA]> {
+ let Latency = 5;
+ let ResourceCycles = [3, 1, 10];
+}
+def : InstRW<[PdWriteX87Mul], (instrs MUL_FI16m, MUL_FI32m, MUL_F32m, MUL_F64m)>;
+
defm : PdWriteResXMMPair<WriteFMul64, [PdFPU1, PdFPFMA], 5>;
defm : PdWriteResXMMPair<WriteFMul64X, [PdFPU1, PdFPFMA], 5>;
-defm : PdWriteResYMMPair<WriteFMul64Y, [PdFPU1, PdFPFMA], 5, [2, 1]>;
+defm : PdWriteResYMMPair<WriteFMul64Y, [PdFPU1, PdFPFMA], 5, [1, 2]>;
defm : X86WriteResPairUnsupported<WriteFMul64Z>;
-defm : PdWriteResXMMPair<WriteFMA, [PdFPU, PdFPFMA], 5>;
-defm : PdWriteResXMMPair<WriteFMAX, [PdFPU, PdFPFMA], 5>;
-defm : PdWriteResYMMPair<WriteFMAY, [PdFPU, PdFPFMA], 5, [1, 1]>;
+defm : PdWriteResXMMPair<WriteFMA, [PdFPU, PdFPFMA], 5, [1, 3]>;
+defm : PdWriteResXMMPair<WriteFMAX, [PdFPU, PdFPFMA], 5, [1, 3]>;
+defm : PdWriteResYMMPair<WriteFMAY, [PdFPU, PdFPFMA], 5, [1, 3]>;
defm : X86WriteResPairUnsupported<WriteFMAZ>;
-defm : PdWriteResXMMPair<WriteDPPD, [PdFPU1, PdFPFMA], 15, [1, 3], 15, 2>;
+defm : PdWriteResXMMPair<WriteDPPD, [PdFPU1, PdFPFMA], 15, [1, 10], 15, 2>;
-defm : PdWriteResXMMPair<WriteDPPS, [PdFPU1, PdFPFMA], 25, [1, 3], 16, 2>;
-defm : PdWriteResYMMPair<WriteDPPSY, [PdFPU1, PdFPFMA], 27, [2, 6], /*or 29*/ 25, 4>;
+defm : PdWriteResXMMPair<WriteDPPS, [PdFPU1, PdFPFMA], 25, [1, 14], 16, 2>;
+defm : PdWriteResYMMPair<WriteDPPSY, [PdFPU1, PdFPFMA], 27, [2, 25], /*or 29*/ 25, 4>;
defm : X86WriteResPairUnsupported<WriteDPPSZ>;
def PdWriteVDPPSrri : SchedWriteRes<[PdFPU1, PdFPFMA]> {
- let Latency = 25;
- let ResourceCycles = [1, 3];
+ let Latency = 27;
+ let ResourceCycles = [1, 14];
let NumMicroOps = 17;
}
def : InstRW<[PdWriteVDPPSrri], (instrs VDPPSrri)>;
@@ -722,118 +813,140 @@ defm : PdWriteResXMMPair<WriteFRcpX, [PdFPU1, PdFPFMA], 5>;
defm : PdWriteResYMMPair<WriteFRcpY, [PdFPU1, PdFPFMA], 5, [2, 1]>;
defm : X86WriteResPairUnsupported<WriteFRcpZ>;
-defm : PdWriteResXMMPair<WriteFRsqrt, [PdFPU1, PdFPFMA], 5>;
+defm : PdWriteResXMMPair<WriteFRsqrt, [PdFPU1, PdFPFMA], 5, [1, 2]>;
defm : PdWriteResXMMPair<WriteFRsqrtX, [PdFPU1, PdFPFMA], 5>;
-defm : PdWriteResYMMPair<WriteFRsqrtY, [PdFPU1, PdFPFMA], 5, [2, 1]>;
+defm : PdWriteResYMMPair<WriteFRsqrtY, [PdFPU1, PdFPFMA], 5, [2, 2]>;
defm : X86WriteResPairUnsupported<WriteFRsqrtZ>;
-defm : PdWriteResXMMPair<WriteFDiv, [PdFPU1, PdFPFMA], 9, [1, 19]>;
-defm : PdWriteResXMMPair<WriteFDivX, [PdFPU1, PdFPFMA], 9, [1, 19]>;
-defm : PdWriteResYMMPair<WriteFDivY, [PdFPU1, PdFPFMA], 9, [2, 38]>;
+defm : PdWriteResXMMPair<WriteFDiv, [PdFPU1, PdFPFMA], 9, [1, 9]>;
+defm : PdWriteResXMMPair<WriteFDivX, [PdFPU1, PdFPFMA], 9, [1, 9]>;
+defm : PdWriteResYMMPair<WriteFDivY, [PdFPU1, PdFPFMA], 9, [2, 18]>;
defm : X86WriteResPairUnsupported<WriteFDivZ>;
-defm : PdWriteResXMMPair<WriteFDiv64, [PdFPU1, PdFPFMA], 9, [1, 19]>;
-defm : PdWriteResXMMPair<WriteFDiv64X, [PdFPU1, PdFPFMA], 9, [1, 19]>;
-defm : PdWriteResYMMPair<WriteFDiv64Y, [PdFPU1, PdFPFMA], 9, [2, 38]>;
+def PdWriteX87Div: SchedWriteRes<[PdLoad, PdFPU0, PdFPFMA]> {
+ let Latency = 9;
+ let ResourceCycles = [3, 1, 18];
+}
+def : InstRW<[PdWriteX87Div], (instrs DIV_FI16m, DIV_FI32m,
+ DIVR_FI16m, DIVR_FI32m,
+ DIV_F32m, DIV_F64m,
+ DIVR_F32m, DIVR_F64m)>;
+
+defm : PdWriteResXMMPair<WriteFDiv64, [PdFPU1, PdFPFMA], 9, [1, 9]>;
+defm : PdWriteResXMMPair<WriteFDiv64X, [PdFPU1, PdFPFMA], 9, [1, 9]>;
+defm : PdWriteResYMMPair<WriteFDiv64Y, [PdFPU1, PdFPFMA], 9, [2, 18]>;
defm : X86WriteResPairUnsupported<WriteFDiv64Z>;
-defm : PdWriteResXMMPair<WriteFSqrt, [PdFPU1, PdFPFMA], 9, [1, 21]>;
-defm : PdWriteResXMMPair<WriteFSqrtX, [PdFPU1, PdFPFMA], 9, [1, 21]>;
-defm : PdWriteResYMMPair<WriteFSqrtY, [PdFPU1, PdFPFMA], 9, [2, 42]>;
+defm : PdWriteResXMMPair<WriteFSqrt, [PdFPU1, PdFPFMA], 9, [1, 9]>;
+defm : PdWriteResXMMPair<WriteFSqrtX, [PdFPU1, PdFPFMA], 9, [1, 9]>;
+defm : PdWriteResYMMPair<WriteFSqrtY, [PdFPU1, PdFPFMA], 9, [2, 18]>;
defm : X86WriteResPairUnsupported<WriteFSqrtZ>;
-defm : PdWriteResXMMPair<WriteFSqrt64, [PdFPU1, PdFPFMA], 9, [1, 27]>;
-defm : PdWriteResXMMPair<WriteFSqrt64X, [PdFPU1, PdFPFMA], 9, [1, 27]>;
-defm : PdWriteResYMMPair<WriteFSqrt64Y, [PdFPU1, PdFPFMA], 9, [2, 54]>;
+defm : PdWriteResXMMPair<WriteFSqrt64, [PdFPU1, PdFPFMA], 9, [1, 9]>;
+defm : PdWriteResXMMPair<WriteFSqrt64X, [PdFPU1, PdFPFMA], 9, [1, 9]>;
+defm : PdWriteResYMMPair<WriteFSqrt64Y, [PdFPU1, PdFPFMA], 9, [2, 18]>;
defm : X86WriteResPairUnsupported<WriteFSqrt64Z>;
-defm : PdWriteResXMMPair<WriteFSqrt80, [PdFPU1, PdFPFMA], 1, [1, 35]>;
-defm : PdWriteResXMMPair<WriteFSign, [PdFPU1, PdFPFMA]>;
+defm : PdWriteResXMMPair<WriteFSqrt80, [PdFPU1, PdFPFMA], 1, [1, 18]>;
+defm : PdWriteResXMMPair<WriteFSign, [PdFPU1, PdFPFMA], 1, [1, 4]>;
-defm : PdWriteResXMMPair<WriteFRnd, [PdFPU1, PdFPSTO], 4>;
+defm : PdWriteResXMMPair<WriteFRnd, [PdFPU1, PdFPSTO], 4, []>;
defm : PdWriteResYMMPair<WriteFRndY, [PdFPU1, PdFPSTO], 4, [2, 1], 2>;
defm : X86WriteResPairUnsupported<WriteFRndZ>;
-def PdWriteVFRCZ : SchedWriteRes<[PdFPU1, PdFPSTO]> {
+def PdWriteVFRCZP : SchedWriteRes<[PdFPU1, PdFPSTO]> {
+ let Latency = 10;
+ let ResourceCycles = [2, 1];
+ let NumMicroOps = 2;
+}
+def : InstRW<[PdWriteVFRCZP], (instrs VFRCZPDrr, VFRCZPSrr)>;
+
+def PdWriteVFRCZS : SchedWriteRes<[PdFPU1, PdFPSTO]> {
let Latency = 10;
+ let ResourceCycles = [10, 1];
let NumMicroOps = 2;
}
-def : InstRW<[PdWriteVFRCZ], (instrs VFRCZPDrr, VFRCZPSrr,
- VFRCZSDrr, VFRCZSSrr)>;
+def : InstRW<[PdWriteVFRCZS], (instrs VFRCZSDrr, VFRCZSSrr)>;
def PdWriteVFRCZm : SchedWriteRes<[PdFPU1, PdFPSTO]> {
let Latency = 15;
- let NumMicroOps = 2;
+ let ResourceCycles = [2, 1];
+ let NumMicroOps = 3;
}
def : InstRW<[PdWriteVFRCZm], (instrs VFRCZPDrm, VFRCZPSrm,
VFRCZSDrm, VFRCZSSrm)>;
def PdWriteVFRCZY : SchedWriteRes<[PdFPU1, PdFPSTO]> {
let Latency = 10;
- let ResourceCycles = [2, 1];
+ let ResourceCycles = [3, 1];
let NumMicroOps = 4;
}
def : InstRW<[PdWriteVFRCZY], (instrs VFRCZPSYrr, VFRCZPDYrr)>;
def PdWriteVFRCZYm : SchedWriteRes<[PdFPU1, PdFPSTO]> {
let Latency = 15;
- let ResourceCycles = [2, 1];
+ let ResourceCycles = [4, 1];
let NumMicroOps = 8;
}
def : InstRW<[PdWriteVFRCZYm], (instrs VFRCZPSYrm, VFRCZPDYrm)>;
-defm : PdWriteResXMMPair<WriteFLogic, [PdFPU01, PdFPFMA], 2>;
+defm : PdWriteResXMMPair<WriteFLogic, [PdFPU01, PdFPFMA], 2, [1, 2]>;
defm : PdWriteResYMMPair<WriteFLogicY, [PdFPU01, PdFPFMA], 2, [2, 2]>;
defm : X86WriteResPairUnsupported<WriteFLogicZ>;
defm : PdWriteResXMMPair<WriteFTest, [PdFPU0, PdFPFMA, PdEX0], 1, [], 2>;
-defm : PdWriteResYMMPair<WriteFTestY, [PdFPU01, PdFPFMA, PdEX0], 1, [2, 2, 1], 4, 2>;
+defm : PdWriteResYMMPair<WriteFTestY, [PdFPU01, PdFPFMA, PdEX0], 1, [4, 4, 1], 4, 2>;
defm : X86WriteResPairUnsupported<WriteFTestZ>;
-defm : PdWriteResXMMPair<WriteFShuffle, [PdFPU01, PdFPFMA], 2>;
-defm : PdWriteResYMMPair<WriteFShuffleY, [PdFPU01, PdFPFMA], 2, [2, 2], 2>;
+defm : PdWriteResXMMPair<WriteFShuffle, [PdFPU01, PdFPFMA], 2, [1, 2]>;
+defm : PdWriteResYMMPair<WriteFShuffleY, [PdFPU01, PdFPFMA], 2, [2, 4], 2>;
defm : X86WriteResPairUnsupported<WriteFShuffleZ>;
def PdWriteVBROADCASTF128 : SchedWriteRes<[PdFPU01, PdFPFMA]> {
let Latency = 7;
+ let ResourceCycles = [1, 3];
let NumMicroOps = 2;
}
def : InstRW<[PdWriteVBROADCASTF128], (instrs VBROADCASTF128)>;
-defm : PdWriteResXMMPair<WriteFVarShuffle, [PdFPU01, PdFPFMA], 3, [1, 4]>;
-defm : PdWriteResYMMPair<WriteFVarShuffleY, [PdFPU01, PdFPFMA], 3, [2, 6], 2>;
+defm : PdWriteResXMMPair<WriteFVarShuffle, [PdFPU01, PdFPFMA], 3, [1, 2]>;
+defm : PdWriteResYMMPair<WriteFVarShuffleY, [PdFPU01, PdFPFMA], 3, [2, 4], 2>;
defm : X86WriteResPairUnsupported<WriteFVarShuffleZ>;
-defm : PdWriteResXMMPair<WriteFBlend, [PdFPU01, PdFPFMA], 2>;
-defm : PdWriteResYMMPair<WriteFBlendY, [PdFPU01, PdFPFMA], 2, [2, 2], 2>;
+defm : PdWriteResXMMPair<WriteFBlend, [PdFPU01, PdFPFMA], 2, [1, 3]>;
+defm : PdWriteResYMMPair<WriteFBlendY, [PdFPU01, PdFPFMA], 2, [2, 3], 2>;
defm : X86WriteResPairUnsupported<WriteFBlendZ>;
-defm : PdWriteResXMMPair<WriteFVarBlend, [PdFPU01, PdFPFMA], 2, [1, 4]>;
-defm : PdWriteResYMMPair<WriteFVarBlendY, [PdFPU01, PdFPFMA], 2, [2, 6], 2>;
+defm : PdWriteResXMMPair<WriteFVarBlend, [PdFPU01, PdFPFMA], 2, [1, 3]>;
+defm : PdWriteResYMMPair<WriteFVarBlendY, [PdFPU01, PdFPFMA], 2, [2, 4], 2>;
defm : X86WriteResPairUnsupported<WriteFVarBlendZ>;
-defm : PdWriteResXMMPair<WriteFShuffle256, [PdFPU01, PdFPFMA], 2, [], 2>;
+defm : PdWriteResXMMPair<WriteFShuffle256, [PdFPU01, PdFPFMA], 2, [1, 3], 2>;
defm : X86WriteResPairUnsupported<WriteFVarShuffle256>;
def PdWriteVEXTRACTF128rr : SchedWriteRes<[PdFPU01, PdFPFMA]> {
let Latency = 2;
+ let ResourceCycles = [1, 2];
}
def : InstRW<[PdWriteVEXTRACTF128rr], (instrs VEXTRACTF128rr)>;
def PdWriteVEXTRACTF128mr : SchedWriteRes<[PdFPU01, PdFPFMA]> {
let Latency = 7;
+ let ResourceCycles = [1, 4];
let NumMicroOps = 2;
}
def : InstRW<[PdWriteVEXTRACTF128mr], (instrs VEXTRACTF128mr)>;
def PdWriteVPERM2F128rr : SchedWriteRes<[PdFPU01, PdFPFMA]> {
let Latency = 4;
+ let ResourceCycles = [1, 6];
let NumMicroOps = 8;
}
def : InstRW<[PdWriteVPERM2F128rr], (instrs VPERM2F128rr)>;
def PdWriteVPERM2F128rm : SchedWriteRes<[PdFPU01, PdFPFMA]> {
let Latency = 8; // 4 + 4
+ let ResourceCycles = [1, 8];
let NumMicroOps = 10;
}
def : InstRW<[PdWriteVPERM2F128rm], (instrs VPERM2F128rm)>;
@@ -842,99 +955,100 @@ def : InstRW<[PdWriteVPERM2F128rm], (instrs VPERM2F128rm)>;
// Conversions.
////////////////////////////////////////////////////////////////////////////////
-defm : PdWriteResXMMPair<WriteCvtSS2I, [PdFPU1, PdFPSTO, PdFPFMA, PdEX0], 13, [], 2>;
+defm : PdWriteResXMMPair<WriteCvtSS2I, [PdFPU0, PdFPCVT, PdFPSTO, PdFPFMA, PdEX0], 13, [], 2>;
-defm : PdWriteResXMMPair<WriteCvtPS2I, [PdFPU1, PdFPSTO], 4>;
-defm : PdWriteResYMMPair<WriteCvtPS2IY, [PdFPU1, PdFPSTO], 4, [2, 1]>;
+defm : PdWriteResXMMPair<WriteCvtPS2I, [PdFPU0, PdFPCVT, PdFPSTO], 4>;
+defm : PdWriteResYMMPair<WriteCvtPS2IY, [PdFPU0, PdFPCVT, PdFPSTO], 4, [1, 2, 1]>;
defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>;
-defm : PdWriteResXMMPair<WriteCvtSD2I, [PdFPU1, PdFPSTO, PdFPFMA, PdEX0], 13, [], 2>;
+defm : PdWriteResXMMPair<WriteCvtSD2I, [PdFPU0, PdFPCVT, PdFPSTO, PdFPFMA, PdEX0], 13, [], 2>;
-defm : PdWriteResXMMPair<WriteCvtPD2I, [PdFPU1, PdFPSTO], 8, [], 2>;
-defm : PdWriteResYMMPair<WriteCvtPD2IY, [PdFPU1, PdFPSTO, PdFPFMA], 8, [2, 1, 1], 4>;
+defm : PdWriteResXMMPair<WriteCvtPD2I, [PdFPU0, PdFPCVT, PdFPSTO], 8, [], 2>;
+defm : PdWriteResYMMPair<WriteCvtPD2IY, [PdFPU0, PdFPCVT, PdFPSTO, PdFPFMA], 8, [1, 2, 1, 1], 4>;
defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>;
-def PdWriteMMX_CVTTPD2PIirr : SchedWriteRes<[PdFPU1, PdFPSTO]> {
+def PdWriteMMX_CVTTPD2PIirr : SchedWriteRes<[PdFPU0, PdFPCVT, PdFPSTO]> {
let Latency = 6;
let NumMicroOps = 2;
}
def : InstRW<[PdWriteMMX_CVTTPD2PIirr], (instrs MMX_CVTTPD2PIirr)>;
// FIXME: f+3 ST, LD+STC latency
-defm : PdWriteResXMMPair<WriteCvtI2SS, [PdFPU1, PdFPSTO], 4, [], 2>;
+defm : PdWriteResXMMPair<WriteCvtI2SS, [PdFPU0, PdFPCVT, PdFPSTO], 4, [], 2>;
// FIXME: .Folded version is one NumMicroOp *less*..
-defm : PdWriteResXMMPair<WriteCvtI2PS, [PdFPU1, PdFPSTO], 4>;
-defm : PdWriteResYMMPair<WriteCvtI2PSY, [PdFPU1, PdFPSTO], 4, [2, 1]>;
+defm : PdWriteResXMMPair<WriteCvtI2PS, [PdFPU0, PdFPCVT, PdFPSTO], 4>;
+defm : PdWriteResYMMPair<WriteCvtI2PSY, [PdFPU0, PdFPCVT, PdFPSTO], 4, [1, 2, 1]>;
defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>;
-defm : PdWriteResXMMPair<WriteCvtI2SD, [PdFPU1, PdFPSTO], 4, [], 2>;
+defm : PdWriteResXMMPair<WriteCvtI2SD, [PdFPU0, PdFPCVT, PdFPSTO], 4, [], 2>;
// FIXME: .Folded version is one NumMicroOp *less*..
-def WriteCVTSI642SDrr : SchedWriteRes<[PdFPU1, PdFPSTO]> {
+def PdWriteCVTSI642SDrr_CVTSI642SSrr_CVTSI2SDr_CVTSI2SSrr : SchedWriteRes<[PdFPU0, PdFPCVT, PdFPSTO]> {
let Latency = 13;
+ let ResourceCycles = [1, 3, 1];
let NumMicroOps = 2;
}
-def : InstRW<[WriteCVTSI642SDrr], (instrs CVTSI642SDrr, CVTSI642SSrr)>;
+def : InstRW<[PdWriteCVTSI642SDrr_CVTSI642SSrr_CVTSI2SDr_CVTSI2SSrr], (instrs CVTSI642SDrr, CVTSI642SSrr, CVTSI2SDrr, CVTSI2SSrr)>;
-defm : PdWriteResXMMPair<WriteCvtI2PD, [PdFPU1, PdFPSTO], 8, [], 2>;
-defm : PdWriteResYMMPair<WriteCvtI2PDY, [PdFPU1, PdFPSTO], 8, [2, 1], 4, 1>;
+defm : PdWriteResXMMPair<WriteCvtI2PD, [PdFPU0, PdFPCVT, PdFPSTO], 8, [], 2>;
+defm : PdWriteResYMMPair<WriteCvtI2PDY, [PdFPU0, PdFPCVT, PdFPSTO], 8, [1, 2, 1], 4, 1>;
defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>;
-defm : PdWriteResXMMPair<WriteCvtSS2SD, [PdFPU1, PdFPSTO], 4>;
+defm : PdWriteResXMMPair<WriteCvtSS2SD, [PdFPU0, PdFPCVT, PdFPSTO], 4, [1, 2, 1]>;
-defm : PdWriteResXMMPair<WriteCvtPS2PD, [PdFPU1, PdFPSTO], 8, [], 2>;
-defm : PdWriteResYMMPair<WriteCvtPS2PDY, [PdFPU1, PdFPSTO], 8, [2, 1], 4, 1>;
+defm : PdWriteResXMMPair<WriteCvtPS2PD, [PdFPU0, PdFPCVT, PdFPSTO], 8, [], 2>;
+defm : PdWriteResYMMPair<WriteCvtPS2PDY, [PdFPU0, PdFPCVT, PdFPSTO], 8, [1, 2, 1], 4, 1>;
defm : X86WriteResPairUnsupported<WriteCvtPS2PDZ>;
-defm : PdWriteResXMMPair<WriteCvtSD2SS, [PdFPU1, PdFPSTO], 4>;
+defm : PdWriteResXMMPair<WriteCvtSD2SS, [PdFPU0, PdFPCVT, PdFPSTO], 4, [1, 2, 1]>;
-defm : PdWriteResXMMPair<WriteCvtPD2PS, [PdFPU1, PdFPSTO], 8, [], 2>;
-defm : PdWriteResYMMPair<WriteCvtPD2PSY, [PdFPU1, PdFPSTO, PdFPFMA], 8, [2, 1, 1], 4>;
+defm : PdWriteResXMMPair<WriteCvtPD2PS, [PdFPU0, PdFPCVT, PdFPSTO], 8, [], 2>;
+defm : PdWriteResYMMPair<WriteCvtPD2PSY, [PdFPU0, PdFPCVT, PdFPSTO, PdFPFMA], 8, [1, 2, 1, 1], 4>;
defm : X86WriteResPairUnsupported<WriteCvtPD2PSZ>;
-def WriteMMX_CVTPD2PIirrMMX_CVTPI2PDirr : SchedWriteRes<[PdFPU1, PdFPSTO]> {
+def PdWriteMMX_CVTPD2PIirrMMX_CVTPI2PDirr : SchedWriteRes<[PdFPU0, PdFPCVT, PdFPSTO]> {
let Latency = 6;
let NumMicroOps = 2;
}
-def : InstRW<[WriteMMX_CVTPD2PIirrMMX_CVTPI2PDirr], (instrs MMX_CVTPD2PIirr,
+def : InstRW<[PdWriteMMX_CVTPD2PIirrMMX_CVTPI2PDirr], (instrs MMX_CVTPD2PIirr,
MMX_CVTPI2PDirr)>;
-def WriteMMX_CVTPI2PSirr : SchedWriteRes<[PdFPU1, PdFPSTO]> {
+def PdWriteMMX_CVTPI2PSirr : SchedWriteRes<[PdFPU0, PdFPCVT, PdFPSTO]> {
let Latency = 4;
let NumMicroOps = 2;
}
-def : InstRW<[WriteMMX_CVTPI2PSirr], (instrs MMX_CVTPI2PSirr)>;
+def : InstRW<[PdWriteMMX_CVTPI2PSirr], (instrs MMX_CVTPI2PSirr)>;
-defm : PdWriteResXMMPair<WriteCvtPH2PS, [PdFPU1, PdFPSTO], 8, [], 2, 1>;
-defm : PdWriteResYMMPair<WriteCvtPH2PSY, [PdFPU1, PdFPSTO], 8, [2, 1], 4, 3>;
+defm : PdWriteResXMMPair<WriteCvtPH2PS, [PdFPU0, PdFPCVT, PdFPSTO], 8, [1, 2, 1], 2, 1>;
+defm : PdWriteResYMMPair<WriteCvtPH2PSY, [PdFPU0, PdFPCVT, PdFPSTO], 8, [1, 2, 1], 4, 3>;
defm : X86WriteResPairUnsupported<WriteCvtPH2PSZ>;
-defm : PdWriteRes<WriteCvtPS2PH, [PdFPU1, PdFPSTO], 8, [], 2>;
-defm : PdWriteRes<WriteCvtPS2PHY, [PdFPU1, PdFPSTO, PdFPFMA], 8, [2, 1, 1], 4>;
+defm : PdWriteRes<WriteCvtPS2PH, [PdFPU0, PdFPCVT, PdFPSTO], 8, [1, 2, 1], 2>;
+defm : PdWriteRes<WriteCvtPS2PHY, [PdFPU0, PdFPCVT, PdFPSTO, PdFPFMA], 8, [1, 2, 1, 1], 4>;
defm : X86WriteResUnsupported<WriteCvtPS2PHZ>;
-defm : PdWriteRes<WriteCvtPS2PHSt, [PdFPU1, PdFPSTO, PdStore], 4, [], 3>;
-defm : PdWriteRes<WriteCvtPS2PHYSt, [PdFPU1, PdFPSTO, PdFPFMA, PdStore], 4, [2, 1, 1, 1], 4>;
+defm : PdWriteRes<WriteCvtPS2PHSt, [PdFPU0, PdFPCVT, PdFPSTO, PdStore], 4, [1, 2, 1, 1], 3>;
+defm : PdWriteRes<WriteCvtPS2PHYSt, [PdFPU0, PdFPCVT, PdFPSTO, PdFPFMA, PdStore], 4, [1, 2, 1, 1, 1], 4>;
defm : X86WriteResUnsupported<WriteCvtPS2PHZSt>;
////////////////////////////////////////////////////////////////////////////////
// Vector integer operations.
////////////////////////////////////////////////////////////////////////////////
-defm : PdWriteRes<WriteVecLoad, [PdLoad, PdFPU01, PdFPMAL], 5>;
-defm : PdWriteRes<WriteVecLoadX, [PdLoad, PdFPU01, PdFPMAL], 5>;
-defm : PdWriteRes<WriteVecLoadY, [PdLoad, PdFPU01, PdFPMAL], 5, [], 2>;
+defm : PdWriteRes<WriteVecLoad, [PdLoad, PdFPU01, PdFPMAL], 5, [3, 1, 3]>;
+defm : PdWriteRes<WriteVecLoadX, [PdLoad, PdFPU01, PdFPMAL], 5, [3, 1, 3]>;
+defm : PdWriteRes<WriteVecLoadY, [PdLoad, PdFPU01, PdFPMAL], 5, [3, 2, 3], 2>;
-defm : PdWriteRes<WriteVecLoadNT, [PdLoad, PdFPU01, PdFPMAL], 5>;
-defm : PdWriteRes<WriteVecLoadNTY, [PdLoad, PdFPU01, PdFPMAL], 5>;
+defm : PdWriteRes<WriteVecLoadNT, [PdLoad, PdFPU01, PdFPMAL], 5, [3, 1, 4]>;
+defm : PdWriteRes<WriteVecLoadNTY, [PdLoad, PdFPU01, PdFPMAL], 5, [3, 2, 4]>;
-defm : PdWriteRes<WriteVecMaskedLoad, [PdLoad, PdFPU01, PdFPMAL], 6, [1, 1, 2]>;
-defm : PdWriteRes<WriteVecMaskedLoadY, [PdLoad, PdFPU01, PdFPMAL], 6, [2, 2, 4], 2>;
+defm : PdWriteRes<WriteVecMaskedLoad, [PdLoad, PdFPU01, PdFPMAL], 6, [3, 1, 2]>;
+defm : PdWriteRes<WriteVecMaskedLoadY, [PdLoad, PdFPU01, PdFPMAL], 6, [3, 2, 4], 2>;
-defm : PdWriteRes<WriteVecStore, [PdStore, PdFPU1, PdFPSTO], 2>;
-defm : PdWriteRes<WriteVecStoreX, [PdStore, PdFPU1, PdFPSTO]>;
-defm : PdWriteRes<WriteVecStoreY, [PdStore, PdFPU1, PdFPSTO], 1, [], 4>;
+defm : PdWriteRes<WriteVecStore, [PdStore, PdFPU23, PdFPSTO], 2, [1, 3, 1]>;
+defm : PdWriteRes<WriteVecStoreX, [PdStore, PdFPU23, PdFPSTO], 1, [1, 3, 1]>;
+defm : PdWriteRes<WriteVecStoreY, [PdStore, PdFPU23, PdFPSTO], 1, [2, 36, 2], 4>;
def PdWriteVMOVDQUYmr : SchedWriteRes<[PdStore, PdFPU1, PdFPSTO]> {
let NumMicroOps = 8;
@@ -948,24 +1062,33 @@ defm : PdWriteRes<WriteVecMaskedStore, [PdStore, PdFPU01, PdFPMAL], 6, [1,
defm : PdWriteRes<WriteVecMaskedStoreY, [PdStore, PdFPU01, PdFPMAL], 6, [2, 2, 4], 2>;
defm : PdWriteRes<WriteVecMove, [PdFPU01, PdFPMAL], 2>;
-defm : PdWriteRes<WriteVecMoveX, [PdFPU01, PdFPMAL], 2>;
+defm : PdWriteRes<WriteVecMoveX, [PdFPU01, PdFPMAL], 1, [1, 2]>;
defm : PdWriteRes<WriteVecMoveY, [PdFPU01, PdFPMAL], 2, [2, 2], 2>;
-defm : PdWriteRes<WriteVecMoveToGpr, [PdFPU0, PdFPFMA, PdEX0], 10>;
-defm : PdWriteRes<WriteVecMoveFromGpr, [PdFPU01, PdFPFMA], 10, [], 2>;
+def PdWriteMOVDQArr : SchedWriteRes<[PdFPU01, PdFPMAL]> {
+}
+def : InstRW<[PdWriteMOVDQArr], (instrs MOVDQArr)>;
+
+def PdWriteMOVQ2DQrr : SchedWriteRes<[PdFPU01, PdFPMAL]> {
+ let Latency = 4;
+}
+def : InstRW<[PdWriteMOVQ2DQrr], (instrs MMX_MOVQ2DQrr)>;
+
+defm : PdWriteRes<WriteVecMoveToGpr, [PdFPU0, PdFPFMA, PdEX0], 11>;
+defm : PdWriteRes<WriteVecMoveFromGpr, [PdFPU01, PdFPFMA], 11, [1, 2], 2>;
defm : PdWriteResXMMPair<WriteVecALU, [PdFPU01, PdFPMAL], 2>;
-defm : PdWriteResXMMPair<WriteVecALUX, [PdFPU01, PdFPMAL], 2>;
+defm : PdWriteResXMMPair<WriteVecALUX, [PdFPU01, PdFPMAL], 2, [1, 2]>;
defm : X86WriteResPairUnsupported<WriteVecALUY>;
defm : X86WriteResPairUnsupported<WriteVecALUZ>;
-defm : PdWriteResXMMPair<WriteVecShift, [PdFPU01, PdFPMAL], 3>;
-defm : PdWriteResXMMPair<WriteVecShiftX, [PdFPU01, PdFPMAL], 3>;
+defm : PdWriteResXMMPair<WriteVecShift, [PdFPU01, PdFPMAL], 3, [1, 2]>;
+defm : PdWriteResXMMPair<WriteVecShiftX, [PdFPU01, PdFPMAL], 3, [1, 2]>;
defm : X86WriteResPairUnsupported<WriteVecShiftY>;
defm : X86WriteResPairUnsupported<WriteVecShiftZ>;
-defm : PdWriteResXMMPair<WriteVecShiftImm, [PdFPU01, PdFPMAL], 2>;
-defm : PdWriteResXMMPair<WriteVecShiftImmX, [PdFPU01, PdFPMAL], 2>;
+defm : PdWriteResXMMPair<WriteVecShiftImm, [PdFPU01, PdFPMAL], 2, [1, 2]>;
+defm : PdWriteResXMMPair<WriteVecShiftImmX, [PdFPU01, PdFPMAL], 2, [1, 2]>;
defm : X86WriteResPairUnsupported<WriteVecShiftImmY>;
defm : X86WriteResPairUnsupported<WriteVecShiftImmZ>;
@@ -978,55 +1101,67 @@ defm : PdWriteResXMMPair<WritePMULLD, [PdFPU0, PdFPU01, PdFPMMA, PdFPMAL]
defm : X86WriteResPairUnsupported<WritePMULLDY>;
defm : X86WriteResPairUnsupported<WritePMULLDZ>;
-def JWriteVPMACS : SchedWriteRes<[PdFPU0, PdFPU01, PdFPMMA, PdFPMAL]> {
+def PdWriteVPMACS : SchedWriteRes<[PdFPU0, PdFPMMA, PdFPMAL]> {
let Latency = 4;
- let ResourceCycles = [2, 1, 2, 1];
}
-def : InstRW<[JWriteVPMACS], (instrs VPMACSDQHrr, VPMACSDQLrr, VPMACSSDQHrr,
- VPMACSSDQLrr)>;
+def : InstRW<[PdWriteVPMACS], (instrs VPMACSDQHrr, VPMACSDQLrr, VPMACSSDQHrr,
+ VPMACSSDQLrr)>;
-defm : PdWriteResXMMPair<WriteMPSAD, [PdFPU0, PdFPMMA], 9, [1, 2], 9>;
+defm : PdWriteResXMMPair<WriteMPSAD, [PdFPU0, PdFPMMA], 9, [1, 4], 8>;
defm : X86WriteResPairUnsupported<WriteMPSADY>;
defm : X86WriteResPairUnsupported<WriteMPSADZ>;
-defm : PdWriteResXMMPair<WritePSADBW, [PdFPU01, PdFPMAL], 4, [], 2>;
-defm : PdWriteResXMMPair<WritePSADBWX, [PdFPU01, PdFPMAL], 4, [], 2>;
+def PdWriteVMPSADBW : SchedWriteRes<[PdFPU0, PdFPMMA]> {
+ let Latency = 8;
+ let ResourceCycles = [1, 4];
+ let NumMicroOps = 10;
+}
+def : InstRW<[PdWriteVMPSADBW], (instrs VMPSADBWrri)>;
+
+defm : PdWriteResXMMPair<WritePSADBW, [PdFPU01, PdFPMAL], 4, [1, 2], 2>;
+defm : PdWriteResXMMPair<WritePSADBWX, [PdFPU01, PdFPMAL], 4, [1, 2], 2>;
defm : X86WriteResPairUnsupported<WritePSADBWY>;
defm : X86WriteResPairUnsupported<WritePSADBWZ>;
defm : PdWriteResXMMPair<WritePHMINPOS, [PdFPU0, PdFPMAL], 4, [], 2>;
-defm : PdWriteResXMMPair<WriteShuffle, [PdFPU01, PdFPMAL], 2>;
-defm : PdWriteResXMMPair<WriteShuffleX, [PdFPU01, PdFPMAL], 2>;
-defm : PdWriteResYMMPair<WriteShuffleY, [PdFPU01, PdFPMAL], 2, [1, 1]>;
+defm : PdWriteResXMMPair<WriteShuffle, [PdFPU01, PdFPMAL], 2, [1, 2]>;
+defm : PdWriteResXMMPair<WriteShuffleX, [PdFPU01, PdFPMAL], 2, [1, 2]>;
+defm : PdWriteResYMMPair<WriteShuffleY, [PdFPU01, PdFPMAL], 2, [1, 4]>;
defm : X86WriteResPairUnsupported<WriteShuffleZ>;
-defm : PdWriteResXMMPair<WriteVarShuffle, [PdFPU01, PdFPMAL], 3, [1, 4]>;
-defm : PdWriteResXMMPair<WriteVarShuffleX, [PdFPU01, PdFPMAL], 3, [1, 4]>;
+defm : PdWriteResXMMPair<WriteVarShuffle, [PdFPU01, PdFPMAL], 3, [1, 2]>;
+defm : PdWriteResXMMPair<WriteVarShuffleX, [PdFPU01, PdFPMAL], 3, [1, 3]>;
defm : X86WriteResPairUnsupported<WriteVarShuffleY>;
defm : X86WriteResPairUnsupported<WriteVarShuffleZ>;
+def PdWriteVPPERM : SchedWriteRes<[PdFPU01, PdFPMAL]> {
+ let Latency = 2;
+ let ResourceCycles = [1, 3];
+}
+def : InstRW<[PdWriteVPPERM], (instrs VPPERMrrr, VPPERMrrr_REV)>;
+
defm : PdWriteResXMMPair<WriteBlend, [PdFPU01, PdFPMAL], 2>;
defm : X86WriteResPairUnsupported<WriteBlendY>;
defm : X86WriteResPairUnsupported<WriteBlendZ>;
-defm : PdWriteResXMMPair<WriteVarBlend, [PdFPU01, PdFPMAL], 2, [1, 4]>;
+defm : PdWriteResXMMPair<WriteVarBlend, [PdFPU01, PdFPMAL], 2, [1, 2]>;
defm : X86WriteResPairUnsupported<WriteVarBlendY>;
defm : X86WriteResPairUnsupported<WriteVarBlendZ>;
defm : PdWriteResXMMPair<WriteVecLogic, [PdFPU01, PdFPMAL], 2>;
-defm : PdWriteResXMMPair<WriteVecLogicX, [PdFPU01, PdFPMAL], 2>;
+defm : PdWriteResXMMPair<WriteVecLogicX, [PdFPU01, PdFPMAL], 2, [1, 2]>;
defm : X86WriteResPairUnsupported<WriteVecLogicY>;
defm : X86WriteResPairUnsupported<WriteVecLogicZ>;
defm : PdWriteResXMMPair<WriteVecTest, [PdFPU0, PdFPFMA, PdEX0], 1, [], 2>;
-defm : PdWriteResYMMPair<WriteVecTestY, [PdFPU01, PdFPFMA, PdEX0], 1, [2, 2, 1], 4, 2>;
+defm : PdWriteResYMMPair<WriteVecTestY, [PdFPU01, PdFPFMA, PdEX0], 1, [2, 4, 1], 4, 2>;
defm : X86WriteResPairUnsupported<WriteVecTestZ>;
defm : PdWriteResXMMPair<WriteShuffle256, [PdFPU01, PdFPMAL]>;
defm : PdWriteResXMMPair<WriteVarShuffle256, [PdFPU01, PdFPMAL]>;
-defm : PdWriteResXMMPair<WriteVarVecShift, [PdFPU01, PdFPMAL], 3>;
+defm : PdWriteResXMMPair<WriteVarVecShift, [PdFPU01, PdFPMAL], 3, [1, 2]>;
defm : X86WriteResPairUnsupported<WriteVarVecShiftY>;
defm : X86WriteResPairUnsupported<WriteVarVecShiftZ>;
@@ -1034,14 +1169,15 @@ defm : X86WriteResPairUnsupported<WriteVarVecShiftZ>;
// Vector insert/extract operations.
////////////////////////////////////////////////////////////////////////////////
-defm : PdWriteRes<WriteVecInsert, [PdFPU01, PdFPMAL], 2, [], 2>;
-defm : PdWriteRes<WriteVecInsertLd, [PdFPU01, PdFPMAL, PdLoad], 6, [], 2>;
+defm : PdWriteRes<WriteVecInsert, [PdFPU01, PdFPMAL], 2, [1, 3], 2>;
+defm : PdWriteRes<WriteVecInsertLd, [PdFPU01, PdFPMAL, PdLoad], 6, [1, 4, 3], 2>;
-defm : PdWriteRes<WriteVecExtract, [PdFPU0, PdFPFMA, PdEX0], 13, [], 2>;
-defm : PdWriteRes<WriteVecExtractSt, [PdFPU1, PdFPSTO, PdStore], 13, [], 2>;
+defm : PdWriteRes<WriteVecExtract, [PdFPU0, PdFPFMA, PdEX0], 12, [1, 3, 1], 2>;
+defm : PdWriteRes<WriteVecExtractSt, [PdFPU1, PdFPSTO, PdStore], 13, [2, 1, 1], 2>;
def PdWriteEXTRQ : SchedWriteRes<[PdFPU01, PdFPMAL]> {
let Latency = 3;
+ let ResourceCycles = [1, 3];
}
def : InstRW<[PdWriteEXTRQ], (instrs EXTRQ, EXTRQI)>;
@@ -1049,19 +1185,19 @@ def : InstRW<[PdWriteEXTRQ], (instrs EXTRQ, EXTRQI)>;
// SSE42 String instructions.
////////////////////////////////////////////////////////////////////////////////
-defm : PdWriteResXMMPair<WritePCmpIStrI, [PdFPU1, PdFPFMA, PdEX0], 14, [1, 2, 1], 7, 1>;
-defm : PdWriteResXMMPair<WritePCmpIStrM, [PdFPU1, PdFPFMA, PdEX0], 6, [1, 2, 1], 7, 2>;
+defm : PdWriteResXMMPair<WritePCmpIStrI, [PdFPU1, PdFPFMA, PdEX0], 11, [1, 6, 1], 7, 1>;
+defm : PdWriteResXMMPair<WritePCmpIStrM, [PdFPU1, PdFPFMA, PdEX0], 7, [1, 8, 1], 7, 2>;
-defm : PdWriteResXMMPair<WritePCmpEStrI, [PdFPU1, PdStore, PdLoad, PdFPMAL, PdFPFMA, PdEX0], 15, [1, 2, 6, 4, 1, 1], 27, 1>;
-defm : PdWriteResXMMPair<WritePCmpEStrM, [PdFPU1, PdStore, PdLoad, PdFPMAL, PdFPFMA, PdEX0], 10, [1, 2, 6, 4, 1, 1], 27, 1>;
+defm : PdWriteResXMMPair<WritePCmpEStrI, [PdFPU1, PdStore, PdLoad, PdFPMAL, PdFPFMA, PdEX0], 14, [1, 10, 10, 10, 1, 1], 27, 1>;
+defm : PdWriteResXMMPair<WritePCmpEStrM, [PdFPU1, PdStore, PdLoad, PdFPMAL, PdFPFMA, PdEX0], 10, [1, 10, 10, 10, 1, 1], 27, 1>;
////////////////////////////////////////////////////////////////////////////////
// MOVMSK Instructions.
////////////////////////////////////////////////////////////////////////////////
-defm : PdWriteRes<WriteFMOVMSK, [PdFPU0, PdFPFMA, PdEX0], 10, [], 2>;
+defm : PdWriteRes<WriteFMOVMSK, [PdFPU0, PdFPFMA, PdEX0], 12, [], 2>;
-defm : PdWriteRes<WriteVecMOVMSK, [PdFPU0, PdFPFMA, PdEX0], 13, [], 2>;
+defm : PdWriteRes<WriteVecMOVMSK, [PdFPU0, PdFPFMA, PdEX0], 12, [], 2>;
defm : X86WriteResUnsupported<WriteVecMOVMSKY>;
// defm : X86WriteResUnsupported<WriteVecMOVMSKZ>;
@@ -1079,12 +1215,12 @@ defm : PdWriteResXMMPair<WriteAESDecEnc, [PdFPU0, PdFPMMA], 9, [], 2>;
// Horizontal add/sub instructions.
////////////////////////////////////////////////////////////////////////////////
-defm : PdWriteResXMMPair<WriteFHAdd, [PdFPU0, PdFPFMA], 11, [], 3, 1>;
-defm : PdWriteResYMMPair<WriteFHAddY, [PdFPU0, PdFPFMA], 11, [2, 1], 8, 2>;
+defm : PdWriteResXMMPair<WriteFHAdd, [PdFPU0, PdFPFMA], 11, [1, 5], 3, 1>;
+defm : PdWriteResYMMPair<WriteFHAddY, [PdFPU0, PdFPFMA], 11, [1, 8], 8, 2>;
defm : X86WriteResPairUnsupported<WriteFHAddZ>;
-defm : PdWriteResXMMPair<WritePHAdd, [PdFPU01, PdFPMAL], 5, [], 3, 1>;
-defm : PdWriteResXMMPair<WritePHAddX, [PdFPU01, PdFPMAL], 2>;
+defm : PdWriteResXMMPair<WritePHAdd, [PdFPU01, PdFPMAL], 5, [1, 4], 3, 1>;
+defm : PdWriteResXMMPair<WritePHAddX, [PdFPU01, PdFPMAL], 2, [1, 2]>;
defm : X86WriteResPairUnsupported<WritePHAddY>;
defm : X86WriteResPairUnsupported<WritePHAddZ>;
@@ -1106,10 +1242,11 @@ def : InstRW<[WritePHAdd.Folded], (instrs PHADDDrm, PHSUBDrm,
// Carry-less multiplication instructions.
////////////////////////////////////////////////////////////////////////////////
-defm : PdWriteResXMMPair<WriteCLMul, [PdFPU0, PdFPMMA], 12, [], 5, 1>;
+defm : PdWriteResXMMPair<WriteCLMul, [PdFPU0, PdFPMMA], 12, [1, 7], 5, 1>;
def PdWriteVPCLMULQDQrr : SchedWriteRes<[PdFPU0, PdFPMMA]> {
- let Latency = 13;
+ let Latency = 12;
+ let ResourceCycles = [1, 7];
let NumMicroOps = 6;
}
def : InstRW<[PdWriteVPCLMULQDQrr], (instrs VPCLMULQDQrr)>;
@@ -1120,9 +1257,15 @@ def : InstRW<[PdWriteVPCLMULQDQrr], (instrs VPCLMULQDQrr)>;
def PdWriteINSERTQ : SchedWriteRes<[PdFPU01, PdFPMAL]> {
let Latency = 3;
- let ResourceCycles = [1, 4];
+ let ResourceCycles = [1, 2];
+}
+def : InstRW<[PdWriteINSERTQ], (instrs INSERTQ)>;
+
+def PdWriteINSERTQI : SchedWriteRes<[PdFPU01, PdFPMAL]> {
+ let Latency = 3;
+ let ResourceCycles = [1, 3];
}
-def : InstRW<[PdWriteINSERTQ], (instrs INSERTQ, INSERTQI)>;
+def : InstRW<[PdWriteINSERTQI], (instrs INSERTQI)>;
////////////////////////////////////////////////////////////////////////////////
// AVX instructions.
diff --git a/lib/Target/X86/X86ScheduleBtVer2.td b/lib/Target/X86/X86ScheduleBtVer2.td
index 33a6b01546d7..2d26232b4132 100644
--- a/lib/Target/X86/X86ScheduleBtVer2.td
+++ b/lib/Target/X86/X86ScheduleBtVer2.td
@@ -1,9 +1,8 @@
//=- X86ScheduleBtVer2.td - X86 BtVer2 (Jaguar) Scheduling ---*- tablegen -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -109,6 +108,11 @@ def : ReadAdvance<ReadAfterVecLd, 5>;
def : ReadAdvance<ReadAfterVecXLd, 5>;
def : ReadAdvance<ReadAfterVecYLd, 5>;
+/// "Additional 6 cycle transfer operation which moves a floating point
+/// operation input value from the integer unit to the floating point unit.
+/// Reference: AMDfam16h SOG (Appendix A "Instruction Latencies", Section A.2).
+def : ReadAdvance<ReadInt2Fpu, -6>;
+
// Many SchedWrites are defined in pairs with and without a folded load.
// Instructions with folded loads are usually micro-fused, so they only appear
// as two micro-ops when dispatched by the schedulers.
@@ -174,6 +178,8 @@ multiclass JWriteResYMMPair<X86FoldableSchedWrite SchedRW,
}
}
+// Instructions that have local forwarding disabled have an extra +1cy latency.
+
// A folded store needs a cycle on the SAGU for the store data,
// most RMW instructions don't need an extra uop.
defm : X86WriteRes<WriteRMW, [JSAGU], 1, [1], 0>;
@@ -215,7 +221,6 @@ defm : JWriteResIntPair<WriteIDiv64, [JALU1, JDiv], 41, [1, 41], 2>;
defm : JWriteResIntPair<WriteCRC32, [JALU01], 3, [4], 3>;
defm : JWriteResIntPair<WriteCMOV, [JALU01], 1>; // Conditional move.
-defm : JWriteResIntPair<WriteCMOV2, [JALU01], 1>; // Conditional (CF + ZF flag) move.
defm : X86WriteRes<WriteFCMOV, [JFPU0, JFPA], 3, [1,1], 1>; // x87 conditional move.
def : WriteRes<WriteSETCC, [JALU01]>; // Setcc.
def : WriteRes<WriteSETCCStore, [JALU01,JSAGU]>;
@@ -262,14 +267,13 @@ defm : X86WriteRes<WriteSHDmrcl,[JLAGU, JALU01], 9, [1, 22], 8>;
// Loads, stores, and moves, not folded with other operations.
////////////////////////////////////////////////////////////////////////////////
-def : WriteRes<WriteLoad, [JLAGU]> { let Latency = 5; }
+def : WriteRes<WriteLoad, [JLAGU]> { let Latency = 3; }
def : WriteRes<WriteStore, [JSAGU]>;
def : WriteRes<WriteStoreNT, [JSAGU]>;
def : WriteRes<WriteMove, [JALU01]>;
// Load/store MXCSR.
-// FIXME: These are copy and pasted from WriteLoad/Store.
-def : WriteRes<WriteLDMXCSR, [JLAGU]> { let Latency = 5; }
+def : WriteRes<WriteLDMXCSR, [JLAGU]> { let Latency = 3; }
def : WriteRes<WriteSTMXCSR, [JSAGU]>;
// Treat misc copies as a move.
@@ -400,8 +404,8 @@ defm : X86WriteResPairUnsupported<WriteFTestZ>;
defm : JWriteResFpuPair<WriteFShuffle, [JFPU01, JFPX], 1>;
defm : JWriteResYMMPair<WriteFShuffleY, [JFPU01, JFPX], 1, [2, 2], 2>;
defm : X86WriteResPairUnsupported<WriteFShuffleZ>;
-defm : JWriteResFpuPair<WriteFVarShuffle, [JFPU01, JFPX], 2, [1, 4], 3>;
-defm : JWriteResYMMPair<WriteFVarShuffleY,[JFPU01, JFPX], 3, [2, 6], 6>;
+defm : JWriteResFpuPair<WriteFVarShuffle, [JFPU01, JFPX], 3, [1, 4], 3>; // +1cy latency.
+defm : JWriteResYMMPair<WriteFVarShuffleY,[JFPU01, JFPX], 4, [2, 6], 6>; // +1cy latency.
defm : X86WriteResPairUnsupported<WriteFVarShuffleZ>;
defm : JWriteResFpuPair<WriteFBlend, [JFPU01, JFPX], 1>;
defm : JWriteResYMMPair<WriteFBlendY, [JFPU01, JFPX], 1, [2, 2], 2>;
@@ -425,12 +429,13 @@ defm : JWriteResFpuPair<WriteCvtPD2I, [JFPU1, JSTC], 3, [1,1], 1>;
defm : JWriteResYMMPair<WriteCvtPD2IY, [JFPU1, JSTC, JFPX], 6, [2,2,4], 3>;
defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>;
-// FIXME: f+3 ST, LD+STC latency
-defm : JWriteResFpuPair<WriteCvtI2SS, [JFPU1, JSTC], 9, [1,1], 2>;
+defm : X86WriteRes<WriteCvtI2SS, [JFPU1, JSTC], 4, [1,1], 2>;
+defm : X86WriteRes<WriteCvtI2SSLd, [JLAGU, JFPU1, JSTC], 9, [1,1,1], 1>;
defm : JWriteResFpuPair<WriteCvtI2PS, [JFPU1, JSTC], 3, [1,1], 1>;
defm : JWriteResYMMPair<WriteCvtI2PSY, [JFPU1, JSTC], 3, [2,2], 2>;
defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>;
-defm : JWriteResFpuPair<WriteCvtI2SD, [JFPU1, JSTC], 9, [1,1], 2>;
+defm : X86WriteRes<WriteCvtI2SD, [JFPU1, JSTC], 4, [1,1], 2>;
+defm : X86WriteRes<WriteCvtI2SDLd, [JLAGU, JFPU1, JSTC], 9, [1,1,1], 1>;
defm : JWriteResFpuPair<WriteCvtI2PD, [JFPU1, JSTC], 3, [1,1], 1>;
defm : JWriteResYMMPair<WriteCvtI2PDY, [JFPU1, JSTC], 3, [2,2], 2>;
defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>;
@@ -487,11 +492,11 @@ defm : JWriteResFpuPair<WriteVecALUX, [JFPU01, JVALU], 1>;
defm : X86WriteResPairUnsupported<WriteVecALUY>;
defm : X86WriteResPairUnsupported<WriteVecALUZ>;
defm : JWriteResFpuPair<WriteVecShift, [JFPU01, JVALU], 1>;
-defm : JWriteResFpuPair<WriteVecShiftX, [JFPU01, JVALU], 1>;
+defm : JWriteResFpuPair<WriteVecShiftX, [JFPU01, JVALU], 2>; // +1cy latency.
defm : X86WriteResPairUnsupported<WriteVecShiftY>;
defm : X86WriteResPairUnsupported<WriteVecShiftZ>;
defm : JWriteResFpuPair<WriteVecShiftImm, [JFPU01, JVALU], 1>;
-defm : JWriteResFpuPair<WriteVecShiftImmX,[JFPU01, JVALU], 1>;
+defm : JWriteResFpuPair<WriteVecShiftImmX,[JFPU01, JVALU], 2>; // +1cy latency.
defm : X86WriteResPairUnsupported<WriteVecShiftImmY>;
defm : X86WriteResPairUnsupported<WriteVecShiftImmZ>;
defm : X86WriteResPairUnsupported<WriteVarVecShift>;
@@ -540,7 +545,7 @@ defm : X86WriteResPairUnsupported<WriteVarShuffle256>;
// Vector insert/extract operations.
////////////////////////////////////////////////////////////////////////////////
-defm : X86WriteRes<WriteVecInsert, [JFPU01, JVALU], 7, [1,1], 2>;
+defm : X86WriteRes<WriteVecInsert, [JFPU01, JVALU], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecInsertLd, [JFPU01, JVALU, JLAGU], 4, [1,1,1], 1>;
defm : X86WriteRes<WriteVecExtract, [JFPU0, JFPA, JALU0], 3, [1,1,1], 1>;
defm : X86WriteRes<WriteVecExtractSt, [JFPU1, JSTC, JSAGU], 3, [1,1,1], 1>;
@@ -575,10 +580,10 @@ defm : JWriteResFpuPair<WriteAESDecEnc, [JFPU01, JVALU, JFPU0, JVIMUL], 3, [1,
// Horizontal add/sub instructions.
////////////////////////////////////////////////////////////////////////////////
-defm : JWriteResFpuPair<WriteFHAdd, [JFPU0, JFPA], 3>;
-defm : JWriteResYMMPair<WriteFHAddY, [JFPU0, JFPA], 3, [2,2], 2>;
-defm : JWriteResFpuPair<WritePHAdd, [JFPU01, JVALU], 1>;
-defm : JWriteResFpuPair<WritePHAddX, [JFPU01, JVALU], 1>;
+defm : JWriteResFpuPair<WriteFHAdd, [JFPU0, JFPA], 4>; // +1cy latency.
+defm : JWriteResYMMPair<WriteFHAddY, [JFPU0, JFPA], 4, [2,2], 2>; // +1cy latency.
+defm : JWriteResFpuPair<WritePHAdd, [JFPU01, JVALU], 1>;
+defm : JWriteResFpuPair<WritePHAddX, [JFPU01, JVALU], 2>; // +1cy latency.
defm : X86WriteResPairUnsupported<WritePHAddY>;
////////////////////////////////////////////////////////////////////////////////
diff --git a/lib/Target/X86/X86ScheduleSLM.td b/lib/Target/X86/X86ScheduleSLM.td
index fcaff7cf810f..34c251a5c5bb 100644
--- a/lib/Target/X86/X86ScheduleSLM.td
+++ b/lib/Target/X86/X86ScheduleSLM.td
@@ -1,9 +1,8 @@
//=- X86ScheduleSLM.td - X86 Silvermont Scheduling -----------*- tablegen -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -53,6 +52,8 @@ def : ReadAdvance<ReadAfterVecLd, 3>;
def : ReadAdvance<ReadAfterVecXLd, 3>;
def : ReadAdvance<ReadAfterVecYLd, 3>;
+def : ReadAdvance<ReadInt2Fpu, 0>;
+
// Many SchedWrites are defined in pairs with and without a folded load.
// Instructions with folded loads are usually micro-fused, so they only appear
// as two micro-ops when queued in the reservation station.
@@ -130,7 +131,6 @@ defm : SLMWriteResPair<WriteJump, [SLM_IEC_RSV1], 1>;
defm : SLMWriteResPair<WriteCRC32, [SLM_IEC_RSV1], 3>;
defm : SLMWriteResPair<WriteCMOV, [SLM_IEC_RSV01], 2, [2]>;
-defm : SLMWriteResPair<WriteCMOV2, [SLM_IEC_RSV01], 2, [2]>;
defm : X86WriteRes<WriteFCMOV, [SLM_FPC_RSV1], 3, [1], 1>; // x87 conditional move.
def : WriteRes<WriteSETCC, [SLM_IEC_RSV01]>;
def : WriteRes<WriteSETCCStore, [SLM_IEC_RSV01, SLM_MEC_RSV]> {
diff --git a/lib/Target/X86/X86ScheduleZnver1.td b/lib/Target/X86/X86ScheduleZnver1.td
index a866f843106b..65f6d89df610 100644
--- a/lib/Target/X86/X86ScheduleZnver1.td
+++ b/lib/Target/X86/X86ScheduleZnver1.td
@@ -1,9 +1,8 @@
//=- X86ScheduleZnver1.td - X86 Znver1 Scheduling -------------*- tablegen -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -95,6 +94,8 @@ def : ReadAdvance<ReadAfterVecLd, 8>;
def : ReadAdvance<ReadAfterVecXLd, 8>;
def : ReadAdvance<ReadAfterVecYLd, 8>;
+def : ReadAdvance<ReadInt2Fpu, 0>;
+
// The Integer PRF for Zen is 168 entries, and it holds the architectural and
// speculative version of the 64-bit integer registers.
// Reference: "Software Optimization Guide for AMD Family 17h Processors"
@@ -214,7 +215,6 @@ defm : ZnWriteResPair<WriteJump, [ZnALU], 1>;
defm : ZnWriteResFpuPair<WriteCRC32, [ZnFPU0], 3>;
defm : ZnWriteResPair<WriteCMOV, [ZnALU], 1>;
-defm : ZnWriteResPair<WriteCMOV2, [ZnALU], 1>;
def : WriteRes<WriteSETCC, [ZnALU]>;
def : WriteRes<WriteSETCCStore, [ZnALU, ZnAGU]>;
defm : X86WriteRes<WriteLAHFSAHF, [ZnALU], 2, [1], 2>;
diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp
index 008a9ec2ba3c..50690953eef5 100644
--- a/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -1,9 +1,8 @@
//===-- X86SelectionDAGInfo.cpp - X86 SelectionDAG Info -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -44,24 +43,6 @@ bool X86SelectionDAGInfo::isBaseRegConflictPossible(
return false;
}
-namespace {
-
-// Represents a cover of a buffer of Size bytes with Count() blocks of type AVT
-// (of size UBytes() bytes), as well as how many bytes remain (BytesLeft() is
-// always smaller than the block size).
-struct RepMovsRepeats {
- RepMovsRepeats(uint64_t Size) : Size(Size) {}
-
- uint64_t Count() const { return Size / UBytes(); }
- uint64_t BytesLeft() const { return Size % UBytes(); }
- uint64_t UBytes() const { return AVT.getSizeInBits() / 8; }
-
- const uint64_t Size;
- MVT AVT = MVT::i8;
-};
-
-} // namespace
-
SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset(
SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Val,
SDValue Size, unsigned Align, bool isVolatile,
@@ -201,98 +182,137 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset(
return Chain;
}
-SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy(
- SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
- SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline,
- MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
- // This requires the copy size to be a constant, preferably
- // within a subtarget-specific limit.
- ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
- const X86Subtarget &Subtarget =
- DAG.getMachineFunction().getSubtarget<X86Subtarget>();
- if (!ConstantSize)
- return SDValue();
- RepMovsRepeats Repeats(ConstantSize->getZExtValue());
- if (!AlwaysInline && Repeats.Size > Subtarget.getMaxInlineSizeThreshold())
+/// Emit a single REP MOVS{B,W,D,Q} instruction.
+static SDValue emitRepmovs(const X86Subtarget &Subtarget, SelectionDAG &DAG,
+ const SDLoc &dl, SDValue Chain, SDValue Dst,
+ SDValue Src, SDValue Size, MVT AVT) {
+ const bool Use64BitRegs = Subtarget.isTarget64BitLP64();
+ const unsigned CX = Use64BitRegs ? X86::RCX : X86::ECX;
+ const unsigned DI = Use64BitRegs ? X86::RDI : X86::EDI;
+ const unsigned SI = Use64BitRegs ? X86::RSI : X86::ESI;
+
+ SDValue InFlag;
+ Chain = DAG.getCopyToReg(Chain, dl, CX, Size, InFlag);
+ InFlag = Chain.getValue(1);
+ Chain = DAG.getCopyToReg(Chain, dl, DI, Dst, InFlag);
+ InFlag = Chain.getValue(1);
+ Chain = DAG.getCopyToReg(Chain, dl, SI, Src, InFlag);
+ InFlag = Chain.getValue(1);
+
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue Ops[] = {Chain, DAG.getValueType(AVT), InFlag};
+ return DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops);
+}
+
+/// Emit a single REP MOVSB instruction for a particular constant size.
+static SDValue emitRepmovsB(const X86Subtarget &Subtarget, SelectionDAG &DAG,
+ const SDLoc &dl, SDValue Chain, SDValue Dst,
+ SDValue Src, uint64_t Size) {
+ return emitRepmovs(Subtarget, DAG, dl, Chain, Dst, Src,
+ DAG.getIntPtrConstant(Size, dl), MVT::i8);
+}
+
+/// Returns the best type to use with repmovs depending on alignment.
+static MVT getOptimalRepmovsType(const X86Subtarget &Subtarget,
+ uint64_t Align) {
+ assert((Align != 0) && "Align is normalized");
+ assert(isPowerOf2_64(Align) && "Align is a power of 2");
+ switch (Align) {
+ case 1:
+ return MVT::i8;
+ case 2:
+ return MVT::i16;
+ case 4:
+ return MVT::i32;
+ default:
+ return Subtarget.is64Bit() ? MVT::i64 : MVT::i32;
+ }
+}
+
+/// Returns a REP MOVS instruction, possibly with a few load/stores to implement
+/// a constant size memory copy. In some cases where we know REP MOVS is
+/// inefficient we return an empty SDValue so the calling code can either
+/// generate a load/store sequence or call the runtime memcpy function.
+static SDValue emitConstantSizeRepmov(
+ SelectionDAG &DAG, const X86Subtarget &Subtarget, const SDLoc &dl,
+ SDValue Chain, SDValue Dst, SDValue Src, uint64_t Size, EVT SizeVT,
+ unsigned Align, bool isVolatile, bool AlwaysInline,
+ MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) {
+
+ /// TODO: Revisit next line: big copy with ERMSB on march >= haswell are very
+ /// efficient.
+ if (!AlwaysInline && Size > Subtarget.getMaxInlineSizeThreshold())
return SDValue();
- /// If not DWORD aligned, it is more efficient to call the library. However
- /// if calling the library is not allowed (AlwaysInline), then soldier on as
- /// the code generated here is better than the long load-store sequence we
- /// would otherwise get.
+ /// If we have enhanced repmovs we use it.
+ if (Subtarget.hasERMSB())
+ return emitRepmovsB(Subtarget, DAG, dl, Chain, Dst, Src, Size);
+
+ assert(!Subtarget.hasERMSB() && "No efficient RepMovs");
+ /// We assume runtime memcpy will do a better job for unaligned copies when
+ /// ERMS is not present.
if (!AlwaysInline && (Align & 3) != 0)
return SDValue();
+ const MVT BlockType = getOptimalRepmovsType(Subtarget, Align);
+ const uint64_t BlockBytes = BlockType.getSizeInBits() / 8;
+ const uint64_t BlockCount = Size / BlockBytes;
+ const uint64_t BytesLeft = Size % BlockBytes;
+ SDValue RepMovs =
+ emitRepmovs(Subtarget, DAG, dl, Chain, Dst, Src,
+ DAG.getIntPtrConstant(BlockCount, dl), BlockType);
+
+ /// RepMov can process the whole length.
+ if (BytesLeft == 0)
+ return RepMovs;
+
+ assert(BytesLeft && "We have leftover at this point");
+
+ /// In case we optimize for size we use repmovsb even if it's less efficient
+ /// so we can save the loads/stores of the leftover.
+ if (DAG.getMachineFunction().getFunction().hasMinSize())
+ return emitRepmovsB(Subtarget, DAG, dl, Chain, Dst, Src, Size);
+
+ // Handle the last 1 - 7 bytes.
+ SmallVector<SDValue, 4> Results;
+ Results.push_back(RepMovs);
+ unsigned Offset = Size - BytesLeft;
+ EVT DstVT = Dst.getValueType();
+ EVT SrcVT = Src.getValueType();
+ Results.push_back(DAG.getMemcpy(
+ Chain, dl,
+ DAG.getNode(ISD::ADD, dl, DstVT, Dst, DAG.getConstant(Offset, dl, DstVT)),
+ DAG.getNode(ISD::ADD, dl, SrcVT, Src, DAG.getConstant(Offset, dl, SrcVT)),
+ DAG.getConstant(BytesLeft, dl, SizeVT), Align, isVolatile,
+ /*AlwaysInline*/ true, /*isTailCall*/ false,
+ DstPtrInfo.getWithOffset(Offset), SrcPtrInfo.getWithOffset(Offset)));
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Results);
+}
+
+SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy(
+ SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline,
+ MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
// If to a segment-relative address space, use the default lowering.
- if (DstPtrInfo.getAddrSpace() >= 256 ||
- SrcPtrInfo.getAddrSpace() >= 256)
+ if (DstPtrInfo.getAddrSpace() >= 256 || SrcPtrInfo.getAddrSpace() >= 256)
return SDValue();
- // If the base register might conflict with our physical registers, bail out.
+ // If the base registers conflict with our physical registers, use the default
+ // lowering.
const MCPhysReg ClobberSet[] = {X86::RCX, X86::RSI, X86::RDI,
X86::ECX, X86::ESI, X86::EDI};
if (isBaseRegConflictPossible(DAG, ClobberSet))
return SDValue();
- // If the target has enhanced REPMOVSB, then it's at least as fast to use
- // REP MOVSB instead of REP MOVS{W,D,Q}, and it avoids having to handle
- // BytesLeft.
- if (!Subtarget.hasERMSB() && !(Align & 1)) {
- if (Align & 2)
- // WORD aligned
- Repeats.AVT = MVT::i16;
- else if (Align & 4)
- // DWORD aligned
- Repeats.AVT = MVT::i32;
- else
- // QWORD aligned
- Repeats.AVT = Subtarget.is64Bit() ? MVT::i64 : MVT::i32;
-
- if (Repeats.BytesLeft() > 0 &&
- DAG.getMachineFunction().getFunction().optForMinSize()) {
- // When aggressively optimizing for size, avoid generating the code to
- // handle BytesLeft.
- Repeats.AVT = MVT::i8;
- }
- }
-
- bool Use64BitRegs = Subtarget.isTarget64BitLP64();
- SDValue InFlag;
- Chain = DAG.getCopyToReg(Chain, dl, Use64BitRegs ? X86::RCX : X86::ECX,
- DAG.getIntPtrConstant(Repeats.Count(), dl), InFlag);
- InFlag = Chain.getValue(1);
- Chain = DAG.getCopyToReg(Chain, dl, Use64BitRegs ? X86::RDI : X86::EDI,
- Dst, InFlag);
- InFlag = Chain.getValue(1);
- Chain = DAG.getCopyToReg(Chain, dl, Use64BitRegs ? X86::RSI : X86::ESI,
- Src, InFlag);
- InFlag = Chain.getValue(1);
-
- SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue Ops[] = { Chain, DAG.getValueType(Repeats.AVT), InFlag };
- SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops);
+ const X86Subtarget &Subtarget =
+ DAG.getMachineFunction().getSubtarget<X86Subtarget>();
- SmallVector<SDValue, 4> Results;
- Results.push_back(RepMovs);
- if (Repeats.BytesLeft()) {
- // Handle the last 1 - 7 bytes.
- unsigned Offset = Repeats.Size - Repeats.BytesLeft();
- EVT DstVT = Dst.getValueType();
- EVT SrcVT = Src.getValueType();
- EVT SizeVT = Size.getValueType();
- Results.push_back(DAG.getMemcpy(Chain, dl,
- DAG.getNode(ISD::ADD, dl, DstVT, Dst,
- DAG.getConstant(Offset, dl,
- DstVT)),
- DAG.getNode(ISD::ADD, dl, SrcVT, Src,
- DAG.getConstant(Offset, dl,
- SrcVT)),
- DAG.getConstant(Repeats.BytesLeft(), dl,
- SizeVT),
- Align, isVolatile, AlwaysInline, false,
- DstPtrInfo.getWithOffset(Offset),
- SrcPtrInfo.getWithOffset(Offset)));
- }
+ /// Handle constant sizes,
+ if (ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size))
+ return emitConstantSizeRepmov(DAG, Subtarget, dl, Chain, Dst, Src,
+ ConstantSize->getZExtValue(),
+ Size.getValueType(), Align, isVolatile,
+ AlwaysInline, DstPtrInfo, SrcPtrInfo);
- return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Results);
+ return SDValue();
}
diff --git a/lib/Target/X86/X86SelectionDAGInfo.h b/lib/Target/X86/X86SelectionDAGInfo.h
index f4a285a5f916..0f2d979f91e3 100644
--- a/lib/Target/X86/X86SelectionDAGInfo.h
+++ b/lib/Target/X86/X86SelectionDAGInfo.h
@@ -1,9 +1,8 @@
//===-- X86SelectionDAGInfo.h - X86 SelectionDAG Info -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp b/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp
index 720be8afa62c..a202fc63637b 100644
--- a/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp
+++ b/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp
@@ -1,9 +1,8 @@
//===-- X86ShuffleDecodeConstantPool.cpp - X86 shuffle decode -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/X86/X86ShuffleDecodeConstantPool.h b/lib/Target/X86/X86ShuffleDecodeConstantPool.h
index b08c31935d28..296341517579 100644
--- a/lib/Target/X86/X86ShuffleDecodeConstantPool.h
+++ b/lib/Target/X86/X86ShuffleDecodeConstantPool.h
@@ -1,9 +1,8 @@
//===-- X86ShuffleDecodeConstantPool.h - X86 shuffle decode -----*-C++-*---===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/X86/X86SpeculativeLoadHardening.cpp b/lib/Target/X86/X86SpeculativeLoadHardening.cpp
index a729161a1beb..40f5dbe57e4b 100644
--- a/lib/Target/X86/X86SpeculativeLoadHardening.cpp
+++ b/lib/Target/X86/X86SpeculativeLoadHardening.cpp
@@ -1,9 +1,8 @@
//====- X86SpeculativeLoadHardening.cpp - A Spectre v1 mitigation ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -123,10 +122,7 @@ namespace {
class X86SpeculativeLoadHardeningPass : public MachineFunctionPass {
public:
- X86SpeculativeLoadHardeningPass() : MachineFunctionPass(ID) {
- initializeX86SpeculativeLoadHardeningPassPass(
- *PassRegistry::getPassRegistry());
- }
+ X86SpeculativeLoadHardeningPass() : MachineFunctionPass(ID) { }
StringRef getPassName() const override {
return "X86 speculative load hardening";
@@ -661,7 +657,7 @@ X86SpeculativeLoadHardeningPass::collectBlockCondInfo(MachineFunction &MF) {
// jmpq *%rax
// ```
// We still want to harden the edge to `L1`.
- if (X86::getCondFromBranchOpc(MI.getOpcode()) == X86::COND_INVALID) {
+ if (X86::getCondFromBranch(MI) == X86::COND_INVALID) {
Info.CondBrs.clear();
Info.UncondBr = &MI;
continue;
@@ -752,7 +748,7 @@ X86SpeculativeLoadHardeningPass::tracePredStateThroughCFG(
for (X86::CondCode Cond : Conds) {
int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8;
- auto CMovOp = X86::getCMovFromCond(Cond, PredStateSizeInBytes);
+ auto CMovOp = X86::getCMovOpcode(PredStateSizeInBytes);
unsigned UpdatedStateReg = MRI->createVirtualRegister(PS->RC);
// Note that we intentionally use an empty debug location so that
@@ -760,7 +756,8 @@ X86SpeculativeLoadHardeningPass::tracePredStateThroughCFG(
auto CMovI = BuildMI(CheckingMBB, InsertPt, DebugLoc(),
TII->get(CMovOp), UpdatedStateReg)
.addReg(CurStateReg)
- .addReg(PS->PoisonReg);
+ .addReg(PS->PoisonReg)
+ .addImm(Cond);
// If this is the last cmov and the EFLAGS weren't originally
// live-in, mark them as killed.
if (!LiveEFLAGS && Cond == Conds.back())
@@ -789,7 +786,7 @@ X86SpeculativeLoadHardeningPass::tracePredStateThroughCFG(
MachineBasicBlock &Succ = *CondBr->getOperand(0).getMBB();
int &SuccCount = SuccCounts[&Succ];
- X86::CondCode Cond = X86::getCondFromBranchOpc(CondBr->getOpcode());
+ X86::CondCode Cond = X86::getCondFromBranch(*CondBr);
X86::CondCode InvCond = X86::GetOppositeBranchCondition(Cond);
UncondCodeSeq.push_back(Cond);
@@ -1177,12 +1174,13 @@ X86SpeculativeLoadHardeningPass::tracePredStateThroughIndirectBranches(
// Now cmov over the predicate if the comparison wasn't equal.
int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8;
- auto CMovOp = X86::getCMovFromCond(X86::COND_NE, PredStateSizeInBytes);
+ auto CMovOp = X86::getCMovOpcode(PredStateSizeInBytes);
unsigned UpdatedStateReg = MRI->createVirtualRegister(PS->RC);
auto CMovI =
BuildMI(MBB, InsertPt, DebugLoc(), TII->get(CMovOp), UpdatedStateReg)
.addReg(PS->InitialReg)
- .addReg(PS->PoisonReg);
+ .addReg(PS->PoisonReg)
+ .addImm(X86::COND_NE);
CMovI->findRegisterUseOperand(X86::EFLAGS)->setIsKill(true);
++NumInstsInserted;
LLVM_DEBUG(dbgs() << " Inserting cmov: "; CMovI->dump(); dbgs() << "\n");
@@ -1963,6 +1961,14 @@ void X86SpeculativeLoadHardeningPass::hardenLoadAddr(
LLVM_DEBUG(
dbgs() << " Skipping hardening base of explicit stack frame load: ";
MI.dump(); dbgs() << "\n");
+ } else if (BaseMO.getReg() == X86::RSP) {
+ // Some idempotent atomic operations are lowered directly to a locked
+ // OR with 0 to the top of stack(or slightly offset from top) which uses an
+ // explicit RSP register as the base.
+ assert(IndexMO.getReg() == X86::NoRegister &&
+ "Explicit RSP access with dynamic index!");
+ LLVM_DEBUG(
+ dbgs() << " Cannot harden base of explicit RSP offset in a load!");
} else if (BaseMO.getReg() == X86::RIP ||
BaseMO.getReg() == X86::NoRegister) {
// For both RIP-relative addressed loads or absolute loads, we cannot
@@ -2464,7 +2470,7 @@ void X86SpeculativeLoadHardeningPass::tracePredStateThroughCall(
// If we have no red zones or if the function returns twice (possibly without
// using the `ret` instruction) like setjmp, we need to save the expected
// return address prior to the call.
- if (MF.getFunction().hasFnAttribute(Attribute::NoRedZone) ||
+ if (!Subtarget->getFrameLowering()->has128ByteRedZone(MF) ||
MF.exposesReturnsTwice()) {
// If we don't have red zones, we need to compute the expected return
// address prior to the call and store it in a register that lives across
@@ -2546,12 +2552,13 @@ void X86SpeculativeLoadHardeningPass::tracePredStateThroughCall(
// Now conditionally update the predicate state we just extracted if we ended
// up at a different return address than expected.
int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8;
- auto CMovOp = X86::getCMovFromCond(X86::COND_NE, PredStateSizeInBytes);
+ auto CMovOp = X86::getCMovOpcode(PredStateSizeInBytes);
unsigned UpdatedStateReg = MRI->createVirtualRegister(PS->RC);
auto CMovI = BuildMI(MBB, InsertPt, Loc, TII->get(CMovOp), UpdatedStateReg)
.addReg(NewStateReg, RegState::Kill)
- .addReg(PS->PoisonReg);
+ .addReg(PS->PoisonReg)
+ .addImm(X86::COND_NE);
CMovI->findRegisterUseOperand(X86::EFLAGS)->setIsKill(true);
++NumInstsInserted;
LLVM_DEBUG(dbgs() << " Inserting cmov: "; CMovI->dump(); dbgs() << "\n");
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 0c9ce8802e1b..d5bb56603df9 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -1,9 +1,8 @@
//===-- X86Subtarget.cpp - X86 Subtarget Information ----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -15,6 +14,7 @@
#include "X86CallLowering.h"
#include "X86LegalizerInfo.h"
+#include "X86MacroFusion.h"
#include "X86RegisterBankInfo.h"
#include "X86Subtarget.h"
#include "MCTargetDesc/X86BaseInfo.h"
@@ -176,10 +176,13 @@ X86Subtarget::classifyGlobalFunctionReference(const GlobalValue *GV,
if (TM.shouldAssumeDSOLocal(M, GV))
return X86II::MO_NO_FLAG;
+ // Functions on COFF can be non-DSO local for two reasons:
+ // - They are marked dllimport
+ // - They are extern_weak, and a stub is needed
if (isTargetCOFF()) {
- assert(GV->hasDLLImportStorageClass() &&
- "shouldAssumeDSOLocal gave inconsistent answer");
- return X86II::MO_DLLIMPORT;
+ if (GV->hasDLLImportStorageClass())
+ return X86II::MO_DLLIMPORT;
+ return X86II::MO_COFFSTUB;
}
const Function *F = dyn_cast_or_null<Function>(GV);
@@ -367,3 +370,8 @@ const RegisterBankInfo *X86Subtarget::getRegBankInfo() const {
bool X86Subtarget::enableEarlyIfConversion() const {
return hasCMov() && X86EarlyIfConv;
}
+
+void X86Subtarget::getPostRAMutations(
+ std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
+ Mutations.push_back(createX86MacroFusionDAGMutation());
+}
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index b1103f823e7f..24ccc9cb7843 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -1,9 +1,8 @@
//===-- X86Subtarget.h - Define Subtarget for the X86 ----------*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -89,6 +88,9 @@ protected:
/// True if the processor supports X87 instructions.
bool HasX87 = false;
+ /// True if the processor supports CMPXCHG8B.
+ bool HasCmpxchg8b = false;
+
/// True if this processor has NOPL instruction
/// (generally pentium pro+).
bool HasNOPL = false;
@@ -295,6 +297,9 @@ protected:
/// True if the processor supports macrofusion.
bool HasMacroFusion = false;
+ /// True if the processor supports branch fusion.
+ bool HasBranchFusion = false;
+
/// True if the processor has enhanced REP MOVSB/STOSB.
bool HasERMSB = false;
@@ -348,9 +353,18 @@ protected:
/// Processor has AVX-512 Vector Neural Network Instructions
bool HasVNNI = false;
+ /// Processor has AVX-512 bfloat16 floating-point extensions
+ bool HasBF16 = false;
+
+ /// Processor supports ENQCMD instructions
+ bool HasENQCMD = false;
+
/// Processor has AVX-512 Bit Algorithms instructions
bool HasBITALG = false;
+ /// Processor has AVX-512 vp2intersect instructions
+ bool HasVP2INTERSECT = false;
+
/// Processor supports MPX - Memory Protection Extensions
bool HasMPX = false;
@@ -388,6 +402,12 @@ protected:
/// Try harder to combine to horizontal vector ops if they are fast.
bool HasFastHorizontalOps = false;
+ /// Prefer a left/right scalar logical shifts pair over a shift+and pair.
+ bool HasFastScalarShiftMasks = false;
+
+ /// Prefer a left/right vector logical shifts pair over a shift+and pair.
+ bool HasFastVectorShiftMasks = false;
+
/// Use a retpoline thunk rather than indirect calls to block speculative
/// execution.
bool UseRetpolineIndirectCalls = false;
@@ -547,6 +567,7 @@ public:
void setPICStyle(PICStyles::Style Style) { PICStyle = Style; }
bool hasX87() const { return HasX87; }
+ bool hasCmpxchg8b() const { return HasCmpxchg8b; }
bool hasNOPL() const { return HasNOPL; }
// SSE codegen depends on cmovs, and all SSE1+ processors support them.
// All 64-bit processors support cmov.
@@ -621,7 +642,7 @@ public:
int getGatherOverhead() const { return GatherOverhead; }
int getScatterOverhead() const { return ScatterOverhead; }
bool hasSSEUnalignedMem() const { return HasSSEUnalignedMem; }
- bool hasCmpxchg16b() const { return HasCmpxchg16b; }
+ bool hasCmpxchg16b() const { return HasCmpxchg16b && is64Bit(); }
bool useLeaForSP() const { return UseLeaForSP; }
bool hasPOPCNTFalseDeps() const { return HasPOPCNTFalseDeps; }
bool hasLZCNTFalseDeps() const { return HasLZCNTFalseDeps; }
@@ -638,7 +659,10 @@ public:
bool hasFastSHLDRotate() const { return HasFastSHLDRotate; }
bool hasFastBEXTR() const { return HasFastBEXTR; }
bool hasFastHorizontalOps() const { return HasFastHorizontalOps; }
+ bool hasFastScalarShiftMasks() const { return HasFastScalarShiftMasks; }
+ bool hasFastVectorShiftMasks() const { return HasFastVectorShiftMasks; }
bool hasMacroFusion() const { return HasMacroFusion; }
+ bool hasBranchFusion() const { return HasBranchFusion; }
bool hasERMSB() const { return HasERMSB; }
bool hasSlowDivide32() const { return HasSlowDivide32; }
bool hasSlowDivide64() const { return HasSlowDivide64; }
@@ -657,6 +681,8 @@ public:
bool hasVLX() const { return HasVLX; }
bool hasPKU() const { return HasPKU; }
bool hasVNNI() const { return HasVNNI; }
+ bool hasBF16() const { return HasBF16; }
+ bool hasVP2INTERSECT() const { return HasVP2INTERSECT; }
bool hasBITALG() const { return HasBITALG; }
bool hasMPX() const { return HasMPX; }
bool hasSHSTK() const { return HasSHSTK; }
@@ -669,6 +695,7 @@ public:
bool hasSGX() const { return HasSGX; }
bool threewayBranchProfitable() const { return ThreewayBranchProfitable; }
bool hasINVPCID() const { return HasINVPCID; }
+ bool hasENQCMD() const { return HasENQCMD; }
bool useRetpolineIndirectCalls() const { return UseRetpolineIndirectCalls; }
bool useRetpolineIndirectBranches() const {
return UseRetpolineIndirectBranches;
@@ -744,10 +771,6 @@ public:
return TargetTriple.isWindowsMSVCEnvironment();
}
- bool isTargetKnownWindowsMSVC() const {
- return TargetTriple.isKnownWindowsMSVCEnvironment();
- }
-
bool isTargetWindowsCoreCLR() const {
return TargetTriple.isWindowsCoreCLREnvironment();
}
@@ -834,11 +857,11 @@ public:
/// Enable the MachineScheduler pass for all X86 subtargets.
bool enableMachineScheduler() const override { return true; }
- // TODO: Update the regression tests and return true.
- bool supportPrintSchedInfo() const override { return false; }
-
bool enableEarlyIfConversion() const override;
+ void getPostRAMutations(std::vector<std::unique_ptr<ScheduleDAGMutation>>
+ &Mutations) const override;
+
AntiDepBreakMode getAntiDepBreakMode() const override {
return TargetSubtargetInfo::ANTIDEP_CRITICAL;
}
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index afcb49dc2263..0cbf13899a29 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -1,9 +1,8 @@
//===-- X86TargetMachine.cpp - Define TargetMachine for the X86 -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -13,6 +12,7 @@
#include "X86TargetMachine.h"
#include "MCTargetDesc/X86MCTargetDesc.h"
+#include "TargetInfo/X86TargetInfo.h"
#include "X86.h"
#include "X86CallLowering.h"
#include "X86LegalizerInfo.h"
@@ -38,6 +38,7 @@
#include "llvm/IR/Attributes.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Pass.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
@@ -70,9 +71,10 @@ extern "C" void LLVMInitializeX86Target() {
initializeFixupBWInstPassPass(PR);
initializeEvexToVexInstPassPass(PR);
initializeFixupLEAPassPass(PR);
- initializeShadowCallStackPass(PR);
+ initializeFPSPass(PR);
initializeX86CallFrameOptimizationPass(PR);
initializeX86CmovConverterPassPass(PR);
+ initializeX86ExpandPseudoPass(PR);
initializeX86ExecutionDomainFixPass(PR);
initializeX86DomainReassignmentPass(PR);
initializeX86AvoidSFBPassPass(PR);
@@ -194,7 +196,7 @@ static CodeModel::Model getEffectiveX86CodeModel(Optional<CodeModel::Model> CM,
bool JIT, bool Is64Bit) {
if (CM) {
if (*CM == CodeModel::Tiny)
- report_fatal_error("Target does not support the tiny CodeModel");
+ report_fatal_error("Target does not support the tiny CodeModel", false);
return *CM;
}
if (JIT)
@@ -357,6 +359,13 @@ public:
return DAG;
}
+ ScheduleDAGInstrs *
+ createPostMachineScheduler(MachineSchedContext *C) const override {
+ ScheduleDAGMI *DAG = createGenericSchedPostRA(C);
+ DAG->addMutation(createX86MacroFusionDAGMutation());
+ return DAG;
+ }
+
void addIRPasses() override;
bool addInstSelector() override;
bool addIRTranslator() override;
@@ -371,6 +380,8 @@ public:
void addPreEmitPass() override;
void addPreEmitPass2() override;
void addPreSched2() override;
+
+ std::unique_ptr<CSEConfigBase> getCSEConfig() const override;
};
class X86ExecutionDomainFix : public ExecutionDomainFix {
@@ -490,7 +501,6 @@ void X86PassConfig::addPreEmitPass() {
addPass(createBreakFalseDeps());
}
- addPass(createShadowCallStackPass());
addPass(createX86IndirectBranchTrackingPass());
if (UseVZeroUpper)
@@ -512,6 +522,13 @@ void X86PassConfig::addPreEmitPass2() {
// correct CFA calculation rule where needed by inserting appropriate CFI
// instructions.
const Triple &TT = TM->getTargetTriple();
- if (!TT.isOSDarwin() && !TT.isOSWindows())
+ const MCAsmInfo *MAI = TM->getMCAsmInfo();
+ if (!TT.isOSDarwin() &&
+ (!TT.isOSWindows() ||
+ MAI->getExceptionHandlingType() == ExceptionHandling::DwarfCFI))
addPass(createCFIInstrInserter());
}
+
+std::unique_ptr<CSEConfigBase> X86PassConfig::getCSEConfig() const {
+ return getStandardCSEConfigForOpt(TM->getOptLevel());
+}
diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h
index f5b45da0c3dc..b999e2e86af6 100644
--- a/lib/Target/X86/X86TargetMachine.h
+++ b/lib/Target/X86/X86TargetMachine.h
@@ -1,9 +1,8 @@
//===-- X86TargetMachine.h - Define TargetMachine for the X86 ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp
index 505c4fa07b77..92e0779c2e74 100644
--- a/lib/Target/X86/X86TargetObjectFile.cpp
+++ b/lib/Target/X86/X86TargetObjectFile.cpp
@@ -1,9 +1,8 @@
//===-- X86TargetObjectFile.cpp - X86 Object Info -------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86TargetObjectFile.h b/lib/Target/X86/X86TargetObjectFile.h
index d045094edb1e..13d7b4ad70d6 100644
--- a/lib/Target/X86/X86TargetObjectFile.h
+++ b/lib/Target/X86/X86TargetObjectFile.h
@@ -1,9 +1,8 @@
//===-- X86TargetObjectFile.h - X86 Object Info -----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp
index 36929a4f5439..3dc59aeb263e 100644
--- a/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -1,9 +1,8 @@
//===-- X86TargetTransformInfo.cpp - X86 specific TTI pass ----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -1651,17 +1650,77 @@ int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
- static const CostTblEntry SSE2CostTbl[] = {
- { ISD::SETCC, MVT::v2i64, 8 },
- { ISD::SETCC, MVT::v4i32, 1 },
- { ISD::SETCC, MVT::v8i16, 1 },
- { ISD::SETCC, MVT::v16i8, 1 },
+ unsigned ExtraCost = 0;
+ if (I && (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp)) {
+ // Some vector comparison predicates cost extra instructions.
+ if (MTy.isVector() &&
+ !((ST->hasXOP() && (!ST->hasAVX2() || MTy.is128BitVector())) ||
+ (ST->hasAVX512() && 32 <= MTy.getScalarSizeInBits()) ||
+ ST->hasBWI())) {
+ switch (cast<CmpInst>(I)->getPredicate()) {
+ case CmpInst::Predicate::ICMP_NE:
+ // xor(cmpeq(x,y),-1)
+ ExtraCost = 1;
+ break;
+ case CmpInst::Predicate::ICMP_SGE:
+ case CmpInst::Predicate::ICMP_SLE:
+ // xor(cmpgt(x,y),-1)
+ ExtraCost = 1;
+ break;
+ case CmpInst::Predicate::ICMP_ULT:
+ case CmpInst::Predicate::ICMP_UGT:
+ // cmpgt(xor(x,signbit),xor(y,signbit))
+ // xor(cmpeq(pmaxu(x,y),x),-1)
+ ExtraCost = 2;
+ break;
+ case CmpInst::Predicate::ICMP_ULE:
+ case CmpInst::Predicate::ICMP_UGE:
+ if ((ST->hasSSE41() && MTy.getScalarSizeInBits() == 32) ||
+ (ST->hasSSE2() && MTy.getScalarSizeInBits() < 32)) {
+ // cmpeq(psubus(x,y),0)
+ // cmpeq(pminu(x,y),x)
+ ExtraCost = 1;
+ } else {
+ // xor(cmpgt(xor(x,signbit),xor(y,signbit)),-1)
+ ExtraCost = 3;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+ }
+
+ static const CostTblEntry AVX512BWCostTbl[] = {
+ { ISD::SETCC, MVT::v32i16, 1 },
+ { ISD::SETCC, MVT::v64i8, 1 },
+
+ { ISD::SELECT, MVT::v32i16, 1 },
+ { ISD::SELECT, MVT::v64i8, 1 },
};
- static const CostTblEntry SSE42CostTbl[] = {
- { ISD::SETCC, MVT::v2f64, 1 },
- { ISD::SETCC, MVT::v4f32, 1 },
- { ISD::SETCC, MVT::v2i64, 1 },
+ static const CostTblEntry AVX512CostTbl[] = {
+ { ISD::SETCC, MVT::v8i64, 1 },
+ { ISD::SETCC, MVT::v16i32, 1 },
+ { ISD::SETCC, MVT::v8f64, 1 },
+ { ISD::SETCC, MVT::v16f32, 1 },
+
+ { ISD::SELECT, MVT::v8i64, 1 },
+ { ISD::SELECT, MVT::v16i32, 1 },
+ { ISD::SELECT, MVT::v8f64, 1 },
+ { ISD::SELECT, MVT::v16f32, 1 },
+ };
+
+ static const CostTblEntry AVX2CostTbl[] = {
+ { ISD::SETCC, MVT::v4i64, 1 },
+ { ISD::SETCC, MVT::v8i32, 1 },
+ { ISD::SETCC, MVT::v16i16, 1 },
+ { ISD::SETCC, MVT::v32i8, 1 },
+
+ { ISD::SELECT, MVT::v4i64, 1 }, // pblendvb
+ { ISD::SELECT, MVT::v8i32, 1 }, // pblendvb
+ { ISD::SELECT, MVT::v16i16, 1 }, // pblendvb
+ { ISD::SELECT, MVT::v32i8, 1 }, // pblendvb
};
static const CostTblEntry AVX1CostTbl[] = {
@@ -1672,50 +1731,83 @@ int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
{ ISD::SETCC, MVT::v8i32, 4 },
{ ISD::SETCC, MVT::v16i16, 4 },
{ ISD::SETCC, MVT::v32i8, 4 },
+
+ { ISD::SELECT, MVT::v4f64, 1 }, // vblendvpd
+ { ISD::SELECT, MVT::v8f32, 1 }, // vblendvps
+ { ISD::SELECT, MVT::v4i64, 1 }, // vblendvpd
+ { ISD::SELECT, MVT::v8i32, 1 }, // vblendvps
+ { ISD::SELECT, MVT::v16i16, 3 }, // vandps + vandnps + vorps
+ { ISD::SELECT, MVT::v32i8, 3 }, // vandps + vandnps + vorps
};
- static const CostTblEntry AVX2CostTbl[] = {
- { ISD::SETCC, MVT::v4i64, 1 },
- { ISD::SETCC, MVT::v8i32, 1 },
- { ISD::SETCC, MVT::v16i16, 1 },
- { ISD::SETCC, MVT::v32i8, 1 },
+ static const CostTblEntry SSE42CostTbl[] = {
+ { ISD::SETCC, MVT::v2f64, 1 },
+ { ISD::SETCC, MVT::v4f32, 1 },
+ { ISD::SETCC, MVT::v2i64, 1 },
};
- static const CostTblEntry AVX512CostTbl[] = {
- { ISD::SETCC, MVT::v8i64, 1 },
- { ISD::SETCC, MVT::v16i32, 1 },
- { ISD::SETCC, MVT::v8f64, 1 },
- { ISD::SETCC, MVT::v16f32, 1 },
+ static const CostTblEntry SSE41CostTbl[] = {
+ { ISD::SELECT, MVT::v2f64, 1 }, // blendvpd
+ { ISD::SELECT, MVT::v4f32, 1 }, // blendvps
+ { ISD::SELECT, MVT::v2i64, 1 }, // pblendvb
+ { ISD::SELECT, MVT::v4i32, 1 }, // pblendvb
+ { ISD::SELECT, MVT::v8i16, 1 }, // pblendvb
+ { ISD::SELECT, MVT::v16i8, 1 }, // pblendvb
};
- static const CostTblEntry AVX512BWCostTbl[] = {
- { ISD::SETCC, MVT::v32i16, 1 },
- { ISD::SETCC, MVT::v64i8, 1 },
+ static const CostTblEntry SSE2CostTbl[] = {
+ { ISD::SETCC, MVT::v2f64, 2 },
+ { ISD::SETCC, MVT::f64, 1 },
+ { ISD::SETCC, MVT::v2i64, 8 },
+ { ISD::SETCC, MVT::v4i32, 1 },
+ { ISD::SETCC, MVT::v8i16, 1 },
+ { ISD::SETCC, MVT::v16i8, 1 },
+
+ { ISD::SELECT, MVT::v2f64, 3 }, // andpd + andnpd + orpd
+ { ISD::SELECT, MVT::v2i64, 3 }, // pand + pandn + por
+ { ISD::SELECT, MVT::v4i32, 3 }, // pand + pandn + por
+ { ISD::SELECT, MVT::v8i16, 3 }, // pand + pandn + por
+ { ISD::SELECT, MVT::v16i8, 3 }, // pand + pandn + por
+ };
+
+ static const CostTblEntry SSE1CostTbl[] = {
+ { ISD::SETCC, MVT::v4f32, 2 },
+ { ISD::SETCC, MVT::f32, 1 },
+
+ { ISD::SELECT, MVT::v4f32, 3 }, // andps + andnps + orps
};
if (ST->hasBWI())
if (const auto *Entry = CostTableLookup(AVX512BWCostTbl, ISD, MTy))
- return LT.first * Entry->Cost;
+ return LT.first * (ExtraCost + Entry->Cost);
if (ST->hasAVX512())
if (const auto *Entry = CostTableLookup(AVX512CostTbl, ISD, MTy))
- return LT.first * Entry->Cost;
+ return LT.first * (ExtraCost + Entry->Cost);
if (ST->hasAVX2())
if (const auto *Entry = CostTableLookup(AVX2CostTbl, ISD, MTy))
- return LT.first * Entry->Cost;
+ return LT.first * (ExtraCost + Entry->Cost);
if (ST->hasAVX())
if (const auto *Entry = CostTableLookup(AVX1CostTbl, ISD, MTy))
- return LT.first * Entry->Cost;
+ return LT.first * (ExtraCost + Entry->Cost);
if (ST->hasSSE42())
if (const auto *Entry = CostTableLookup(SSE42CostTbl, ISD, MTy))
- return LT.first * Entry->Cost;
+ return LT.first * (ExtraCost + Entry->Cost);
+
+ if (ST->hasSSE41())
+ if (const auto *Entry = CostTableLookup(SSE41CostTbl, ISD, MTy))
+ return LT.first * (ExtraCost + Entry->Cost);
if (ST->hasSSE2())
if (const auto *Entry = CostTableLookup(SSE2CostTbl, ISD, MTy))
- return LT.first * Entry->Cost;
+ return LT.first * (ExtraCost + Entry->Cost);
+
+ if (ST->hasSSE1())
+ if (const auto *Entry = CostTableLookup(SSE1CostTbl, ISD, MTy))
+ return LT.first * (ExtraCost + Entry->Cost);
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
}
@@ -1784,6 +1876,10 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
{ ISD::USUBSAT, MVT::v2i64, 2 }, // pmaxuq + psubq
{ ISD::USUBSAT, MVT::v4i64, 2 }, // pmaxuq + psubq
{ ISD::USUBSAT, MVT::v8i64, 2 }, // pmaxuq + psubq
+ { ISD::UADDSAT, MVT::v16i32, 3 }, // not + pminud + paddd
+ { ISD::UADDSAT, MVT::v2i64, 3 }, // not + pminuq + paddq
+ { ISD::UADDSAT, MVT::v4i64, 3 }, // not + pminuq + paddq
+ { ISD::UADDSAT, MVT::v8i64, 3 }, // not + pminuq + paddq
};
static const CostTblEntry XOPCostTbl[] = {
{ ISD::BITREVERSE, MVT::v4i64, 4 },
@@ -1825,6 +1921,7 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
{ ISD::SSUBSAT, MVT::v32i8, 1 },
{ ISD::UADDSAT, MVT::v16i16, 1 },
{ ISD::UADDSAT, MVT::v32i8, 1 },
+ { ISD::UADDSAT, MVT::v8i32, 3 }, // not + pminud + paddd
{ ISD::USUBSAT, MVT::v16i16, 1 },
{ ISD::USUBSAT, MVT::v32i8, 1 },
{ ISD::USUBSAT, MVT::v8i32, 2 }, // pmaxud + psubd
@@ -1861,6 +1958,7 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
{ ISD::SSUBSAT, MVT::v32i8, 4 }, // 2 x 128-bit Op + extract/insert
{ ISD::UADDSAT, MVT::v16i16, 4 }, // 2 x 128-bit Op + extract/insert
{ ISD::UADDSAT, MVT::v32i8, 4 }, // 2 x 128-bit Op + extract/insert
+ { ISD::UADDSAT, MVT::v8i32, 8 }, // 2 x 128-bit Op + extract/insert
{ ISD::USUBSAT, MVT::v16i16, 4 }, // 2 x 128-bit Op + extract/insert
{ ISD::USUBSAT, MVT::v32i8, 4 }, // 2 x 128-bit Op + extract/insert
{ ISD::USUBSAT, MVT::v8i32, 6 }, // 2 x 128-bit Op + extract/insert
@@ -1885,6 +1983,7 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
};
static const CostTblEntry SSE42CostTbl[] = {
{ ISD::USUBSAT, MVT::v4i32, 2 }, // pmaxud + psubd
+ { ISD::UADDSAT, MVT::v4i32, 3 }, // not + pminud + paddd
{ ISD::FSQRT, MVT::f32, 18 }, // Nehalem from http://www.agner.org/
{ ISD::FSQRT, MVT::v4f32, 18 }, // Nehalem from http://www.agner.org/
};
@@ -1945,14 +2044,23 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
{ ISD::FSQRT, MVT::v4f32, 56 }, // Pentium III from http://www.agner.org/
};
static const CostTblEntry X64CostTbl[] = { // 64-bit targets
- { ISD::BITREVERSE, MVT::i64, 14 }
+ { ISD::BITREVERSE, MVT::i64, 14 },
+ { ISD::SADDO, MVT::i64, 1 },
+ { ISD::UADDO, MVT::i64, 1 },
};
static const CostTblEntry X86CostTbl[] = { // 32 or 64-bit targets
{ ISD::BITREVERSE, MVT::i32, 14 },
{ ISD::BITREVERSE, MVT::i16, 14 },
- { ISD::BITREVERSE, MVT::i8, 11 }
+ { ISD::BITREVERSE, MVT::i8, 11 },
+ { ISD::SADDO, MVT::i32, 1 },
+ { ISD::SADDO, MVT::i16, 1 },
+ { ISD::SADDO, MVT::i8, 1 },
+ { ISD::UADDO, MVT::i32, 1 },
+ { ISD::UADDO, MVT::i16, 1 },
+ { ISD::UADDO, MVT::i8, 1 },
};
+ Type *OpTy = RetTy;
unsigned ISD = ISD::DELETED_NODE;
switch (IID) {
default:
@@ -1987,11 +2095,23 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
case Intrinsic::sqrt:
ISD = ISD::FSQRT;
break;
+ case Intrinsic::sadd_with_overflow:
+ case Intrinsic::ssub_with_overflow:
+ // SSUBO has same costs so don't duplicate.
+ ISD = ISD::SADDO;
+ OpTy = RetTy->getContainedType(0);
+ break;
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::usub_with_overflow:
+ // USUBO has same costs so don't duplicate.
+ ISD = ISD::UADDO;
+ OpTy = RetTy->getContainedType(0);
+ break;
}
if (ISD != ISD::DELETED_NODE) {
// Legalize the type.
- std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy);
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, OpTy);
MVT MTy = LT.second;
// Attempt to lookup cost.
@@ -2226,6 +2346,9 @@ int X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
int X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy,
unsigned Alignment,
unsigned AddressSpace) {
+ bool IsLoad = (Instruction::Load == Opcode);
+ bool IsStore = (Instruction::Store == Opcode);
+
VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy);
if (!SrcVTy)
// To calculate scalar take the regular cost, without mask
@@ -2233,10 +2356,9 @@ int X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy,
unsigned NumElem = SrcVTy->getVectorNumElements();
VectorType *MaskTy =
- VectorType::get(Type::getInt8Ty(SrcVTy->getContext()), NumElem);
- if ((Opcode == Instruction::Load && !isLegalMaskedLoad(SrcVTy)) ||
- (Opcode == Instruction::Store && !isLegalMaskedStore(SrcVTy)) ||
- !isPowerOf2_32(NumElem)) {
+ VectorType::get(Type::getInt8Ty(SrcVTy->getContext()), NumElem);
+ if ((IsLoad && !isLegalMaskedLoad(SrcVTy)) ||
+ (IsStore && !isLegalMaskedStore(SrcVTy)) || !isPowerOf2_32(NumElem)) {
// Scalarization
int MaskSplitCost = getScalarizationOverhead(MaskTy, false, true);
int ScalarCompareCost = getCmpSelInstrCost(
@@ -2244,8 +2366,7 @@ int X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy,
int BranchCost = getCFInstrCost(Instruction::Br);
int MaskCmpCost = NumElem * (BranchCost + ScalarCompareCost);
- int ValueSplitCost = getScalarizationOverhead(
- SrcVTy, Opcode == Instruction::Load, Opcode == Instruction::Store);
+ int ValueSplitCost = getScalarizationOverhead(SrcVTy, IsLoad, IsStore);
int MemopCost =
NumElem * BaseT::getMemoryOpCost(Opcode, SrcVTy->getScalarType(),
Alignment, AddressSpace);
@@ -2259,8 +2380,8 @@ int X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy,
if (VT.isSimple() && LT.second != VT.getSimpleVT() &&
LT.second.getVectorNumElements() == NumElem)
// Promotion requires expand/truncate for data and a shuffle for mask.
- Cost += getShuffleCost(TTI::SK_Select, SrcVTy, 0, nullptr) +
- getShuffleCost(TTI::SK_Select, MaskTy, 0, nullptr);
+ Cost += getShuffleCost(TTI::SK_PermuteTwoSrc, SrcVTy, 0, nullptr) +
+ getShuffleCost(TTI::SK_PermuteTwoSrc, MaskTy, 0, nullptr);
else if (LT.second.getVectorNumElements() > NumElem) {
VectorType *NewMaskTy = VectorType::get(MaskTy->getVectorElementType(),
@@ -2268,11 +2389,13 @@ int X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy,
// Expanding requires fill mask with zeroes
Cost += getShuffleCost(TTI::SK_InsertSubvector, NewMaskTy, 0, MaskTy);
}
+
+ // Pre-AVX512 - each maskmov load costs 2 + store costs ~8.
if (!ST->hasAVX512())
- return Cost + LT.first*4; // Each maskmov costs 4
+ return Cost + LT.first * (IsLoad ? 2 : 8);
// AVX-512 masked load/store is cheapper
- return Cost+LT.first;
+ return Cost + LT.first;
}
int X86TTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
@@ -2281,7 +2404,7 @@ int X86TTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
// likely result in more instructions compared to scalar code where the
// computation can more often be merged into the index mode. The resulting
// extra micro-ops can significantly decrease throughput.
- unsigned NumVectorInstToHideOverhead = 10;
+ const unsigned NumVectorInstToHideOverhead = 10;
// Cost modeling of Strided Access Computation is hidden by the indexing
// modes of X86 regardless of the stride value. We dont believe that there
@@ -2369,6 +2492,48 @@ int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy,
return LT.first * Entry->Cost;
}
+ static const CostTblEntry AVX2BoolReduction[] = {
+ { ISD::AND, MVT::v16i16, 2 }, // vpmovmskb + cmp
+ { ISD::AND, MVT::v32i8, 2 }, // vpmovmskb + cmp
+ { ISD::OR, MVT::v16i16, 2 }, // vpmovmskb + cmp
+ { ISD::OR, MVT::v32i8, 2 }, // vpmovmskb + cmp
+ };
+
+ static const CostTblEntry AVX1BoolReduction[] = {
+ { ISD::AND, MVT::v4i64, 2 }, // vmovmskpd + cmp
+ { ISD::AND, MVT::v8i32, 2 }, // vmovmskps + cmp
+ { ISD::AND, MVT::v16i16, 4 }, // vextractf128 + vpand + vpmovmskb + cmp
+ { ISD::AND, MVT::v32i8, 4 }, // vextractf128 + vpand + vpmovmskb + cmp
+ { ISD::OR, MVT::v4i64, 2 }, // vmovmskpd + cmp
+ { ISD::OR, MVT::v8i32, 2 }, // vmovmskps + cmp
+ { ISD::OR, MVT::v16i16, 4 }, // vextractf128 + vpor + vpmovmskb + cmp
+ { ISD::OR, MVT::v32i8, 4 }, // vextractf128 + vpor + vpmovmskb + cmp
+ };
+
+ static const CostTblEntry SSE2BoolReduction[] = {
+ { ISD::AND, MVT::v2i64, 2 }, // movmskpd + cmp
+ { ISD::AND, MVT::v4i32, 2 }, // movmskps + cmp
+ { ISD::AND, MVT::v8i16, 2 }, // pmovmskb + cmp
+ { ISD::AND, MVT::v16i8, 2 }, // pmovmskb + cmp
+ { ISD::OR, MVT::v2i64, 2 }, // movmskpd + cmp
+ { ISD::OR, MVT::v4i32, 2 }, // movmskps + cmp
+ { ISD::OR, MVT::v8i16, 2 }, // pmovmskb + cmp
+ { ISD::OR, MVT::v16i8, 2 }, // pmovmskb + cmp
+ };
+
+ // Handle bool allof/anyof patterns.
+ if (ValTy->getVectorElementType()->isIntegerTy(1)) {
+ if (ST->hasAVX2())
+ if (const auto *Entry = CostTableLookup(AVX2BoolReduction, ISD, MTy))
+ return LT.first * Entry->Cost;
+ if (ST->hasAVX())
+ if (const auto *Entry = CostTableLookup(AVX1BoolReduction, ISD, MTy))
+ return LT.first * Entry->Cost;
+ if (ST->hasSSE2())
+ if (const auto *Entry = CostTableLookup(SSE2BoolReduction, ISD, MTy))
+ return LT.first * Entry->Cost;
+ }
+
return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwise);
}
@@ -2390,15 +2555,37 @@ int X86TTIImpl::getMinMaxReductionCost(Type *ValTy, Type *CondTy,
// We use the Intel Architecture Code Analyzer(IACA) to measure the throughput
// and make it as the cost.
- static const CostTblEntry SSE42CostTblPairWise[] = {
+ static const CostTblEntry SSE1CostTblPairWise[] = {
+ {ISD::FMINNUM, MVT::v4f32, 4},
+ };
+
+ static const CostTblEntry SSE2CostTblPairWise[] = {
{ISD::FMINNUM, MVT::v2f64, 3},
+ {ISD::SMIN, MVT::v2i64, 6},
+ {ISD::UMIN, MVT::v2i64, 8},
+ {ISD::SMIN, MVT::v4i32, 6},
+ {ISD::UMIN, MVT::v4i32, 8},
+ {ISD::SMIN, MVT::v8i16, 4},
+ {ISD::UMIN, MVT::v8i16, 6},
+ {ISD::SMIN, MVT::v16i8, 8},
+ {ISD::UMIN, MVT::v16i8, 6},
+ };
+
+ static const CostTblEntry SSE41CostTblPairWise[] = {
{ISD::FMINNUM, MVT::v4f32, 2},
- {ISD::SMIN, MVT::v2i64, 7}, // The data reported by the IACA is "6.8"
- {ISD::UMIN, MVT::v2i64, 8}, // The data reported by the IACA is "8.6"
+ {ISD::SMIN, MVT::v2i64, 9},
+ {ISD::UMIN, MVT::v2i64,10},
{ISD::SMIN, MVT::v4i32, 1}, // The data reported by the IACA is "1.5"
{ISD::UMIN, MVT::v4i32, 2}, // The data reported by the IACA is "1.8"
{ISD::SMIN, MVT::v8i16, 2},
{ISD::UMIN, MVT::v8i16, 2},
+ {ISD::SMIN, MVT::v16i8, 3},
+ {ISD::UMIN, MVT::v16i8, 3},
+ };
+
+ static const CostTblEntry SSE42CostTblPairWise[] = {
+ {ISD::SMIN, MVT::v2i64, 7}, // The data reported by the IACA is "6.8"
+ {ISD::UMIN, MVT::v2i64, 8}, // The data reported by the IACA is "8.6"
};
static const CostTblEntry AVX1CostTblPairWise[] = {
@@ -2411,8 +2598,16 @@ int X86TTIImpl::getMinMaxReductionCost(Type *ValTy, Type *CondTy,
{ISD::UMIN, MVT::v4i32, 1},
{ISD::SMIN, MVT::v8i16, 1},
{ISD::UMIN, MVT::v8i16, 1},
+ {ISD::SMIN, MVT::v16i8, 2},
+ {ISD::UMIN, MVT::v16i8, 2},
+ {ISD::SMIN, MVT::v4i64, 7},
+ {ISD::UMIN, MVT::v4i64, 7},
{ISD::SMIN, MVT::v8i32, 3},
{ISD::UMIN, MVT::v8i32, 3},
+ {ISD::SMIN, MVT::v16i16, 3},
+ {ISD::UMIN, MVT::v16i16, 3},
+ {ISD::SMIN, MVT::v32i8, 3},
+ {ISD::UMIN, MVT::v32i8, 3},
};
static const CostTblEntry AVX2CostTblPairWise[] = {
@@ -2435,15 +2630,37 @@ int X86TTIImpl::getMinMaxReductionCost(Type *ValTy, Type *CondTy,
{ISD::UMIN, MVT::v16i32, 1},
};
- static const CostTblEntry SSE42CostTblNoPairWise[] = {
+ static const CostTblEntry SSE1CostTblNoPairWise[] = {
+ {ISD::FMINNUM, MVT::v4f32, 4},
+ };
+
+ static const CostTblEntry SSE2CostTblNoPairWise[] = {
{ISD::FMINNUM, MVT::v2f64, 3},
+ {ISD::SMIN, MVT::v2i64, 6},
+ {ISD::UMIN, MVT::v2i64, 8},
+ {ISD::SMIN, MVT::v4i32, 6},
+ {ISD::UMIN, MVT::v4i32, 8},
+ {ISD::SMIN, MVT::v8i16, 4},
+ {ISD::UMIN, MVT::v8i16, 6},
+ {ISD::SMIN, MVT::v16i8, 8},
+ {ISD::UMIN, MVT::v16i8, 6},
+ };
+
+ static const CostTblEntry SSE41CostTblNoPairWise[] = {
{ISD::FMINNUM, MVT::v4f32, 3},
- {ISD::SMIN, MVT::v2i64, 7}, // The data reported by the IACA is "6.8"
- {ISD::UMIN, MVT::v2i64, 9}, // The data reported by the IACA is "8.6"
+ {ISD::SMIN, MVT::v2i64, 9},
+ {ISD::UMIN, MVT::v2i64,11},
{ISD::SMIN, MVT::v4i32, 1}, // The data reported by the IACA is "1.5"
{ISD::UMIN, MVT::v4i32, 2}, // The data reported by the IACA is "1.8"
{ISD::SMIN, MVT::v8i16, 1}, // The data reported by the IACA is "1.5"
{ISD::UMIN, MVT::v8i16, 2}, // The data reported by the IACA is "1.8"
+ {ISD::SMIN, MVT::v16i8, 3},
+ {ISD::UMIN, MVT::v16i8, 3},
+ };
+
+ static const CostTblEntry SSE42CostTblNoPairWise[] = {
+ {ISD::SMIN, MVT::v2i64, 7}, // The data reported by the IACA is "6.8"
+ {ISD::UMIN, MVT::v2i64, 9}, // The data reported by the IACA is "8.6"
};
static const CostTblEntry AVX1CostTblNoPairWise[] = {
@@ -2456,8 +2673,16 @@ int X86TTIImpl::getMinMaxReductionCost(Type *ValTy, Type *CondTy,
{ISD::UMIN, MVT::v4i32, 1},
{ISD::SMIN, MVT::v8i16, 1},
{ISD::UMIN, MVT::v8i16, 1},
+ {ISD::SMIN, MVT::v16i8, 2},
+ {ISD::UMIN, MVT::v16i8, 2},
+ {ISD::SMIN, MVT::v4i64, 7},
+ {ISD::UMIN, MVT::v4i64, 7},
{ISD::SMIN, MVT::v8i32, 2},
{ISD::UMIN, MVT::v8i32, 2},
+ {ISD::SMIN, MVT::v16i16, 2},
+ {ISD::UMIN, MVT::v16i16, 2},
+ {ISD::SMIN, MVT::v32i8, 2},
+ {ISD::UMIN, MVT::v32i8, 2},
};
static const CostTblEntry AVX2CostTblNoPairWise[] = {
@@ -2496,6 +2721,18 @@ int X86TTIImpl::getMinMaxReductionCost(Type *ValTy, Type *CondTy,
if (ST->hasSSE42())
if (const auto *Entry = CostTableLookup(SSE42CostTblPairWise, ISD, MTy))
return LT.first * Entry->Cost;
+
+ if (ST->hasSSE41())
+ if (const auto *Entry = CostTableLookup(SSE41CostTblPairWise, ISD, MTy))
+ return LT.first * Entry->Cost;
+
+ if (ST->hasSSE2())
+ if (const auto *Entry = CostTableLookup(SSE2CostTblPairWise, ISD, MTy))
+ return LT.first * Entry->Cost;
+
+ if (ST->hasSSE1())
+ if (const auto *Entry = CostTableLookup(SSE1CostTblPairWise, ISD, MTy))
+ return LT.first * Entry->Cost;
} else {
if (ST->hasAVX512())
if (const auto *Entry =
@@ -2513,6 +2750,18 @@ int X86TTIImpl::getMinMaxReductionCost(Type *ValTy, Type *CondTy,
if (ST->hasSSE42())
if (const auto *Entry = CostTableLookup(SSE42CostTblNoPairWise, ISD, MTy))
return LT.first * Entry->Cost;
+
+ if (ST->hasSSE41())
+ if (const auto *Entry = CostTableLookup(SSE41CostTblNoPairWise, ISD, MTy))
+ return LT.first * Entry->Cost;
+
+ if (ST->hasSSE2())
+ if (const auto *Entry = CostTableLookup(SSE2CostTblNoPairWise, ISD, MTy))
+ return LT.first * Entry->Cost;
+
+ if (ST->hasSSE1())
+ if (const auto *Entry = CostTableLookup(SSE1CostTblNoPairWise, ISD, MTy))
+ return LT.first * Entry->Cost;
}
return BaseT::getMinMaxReductionCost(ValTy, CondTy, IsPairwise, IsUnsigned);
@@ -2864,26 +3113,106 @@ bool X86TTIImpl::isLSRCostLess(TargetTransformInfo::LSRCost &C1,
}
bool X86TTIImpl::canMacroFuseCmp() {
- return ST->hasMacroFusion();
+ return ST->hasMacroFusion() || ST->hasBranchFusion();
}
bool X86TTIImpl::isLegalMaskedLoad(Type *DataTy) {
+ if (!ST->hasAVX())
+ return false;
+
// The backend can't handle a single element vector.
if (isa<VectorType>(DataTy) && DataTy->getVectorNumElements() == 1)
return false;
Type *ScalarTy = DataTy->getScalarType();
- int DataWidth = isa<PointerType>(ScalarTy) ?
- DL.getPointerSizeInBits() : ScalarTy->getPrimitiveSizeInBits();
- return ((DataWidth == 32 || DataWidth == 64) && ST->hasAVX()) ||
- ((DataWidth == 8 || DataWidth == 16) && ST->hasBWI());
+ if (ScalarTy->isPointerTy())
+ return true;
+
+ if (ScalarTy->isFloatTy() || ScalarTy->isDoubleTy())
+ return true;
+
+ if (!ScalarTy->isIntegerTy())
+ return false;
+
+ unsigned IntWidth = ScalarTy->getIntegerBitWidth();
+ return IntWidth == 32 || IntWidth == 64 ||
+ ((IntWidth == 8 || IntWidth == 16) && ST->hasBWI());
}
bool X86TTIImpl::isLegalMaskedStore(Type *DataType) {
return isLegalMaskedLoad(DataType);
}
+bool X86TTIImpl::isLegalNTLoad(Type *DataType, unsigned Alignment) {
+ unsigned DataSize = DL.getTypeStoreSize(DataType);
+ // The only supported nontemporal loads are for aligned vectors of 16 or 32
+ // bytes. Note that 32-byte nontemporal vector loads are supported by AVX2
+ // (the equivalent stores only require AVX).
+ if (Alignment >= DataSize && (DataSize == 16 || DataSize == 32))
+ return DataSize == 16 ? ST->hasSSE1() : ST->hasAVX2();
+
+ return false;
+}
+
+bool X86TTIImpl::isLegalNTStore(Type *DataType, unsigned Alignment) {
+ unsigned DataSize = DL.getTypeStoreSize(DataType);
+
+ // SSE4A supports nontemporal stores of float and double at arbitrary
+ // alignment.
+ if (ST->hasSSE4A() && (DataType->isFloatTy() || DataType->isDoubleTy()))
+ return true;
+
+ // Besides the SSE4A subtarget exception above, only aligned stores are
+ // available nontemporaly on any other subtarget. And only stores with a size
+ // of 4..32 bytes (powers of 2, only) are permitted.
+ if (Alignment < DataSize || DataSize < 4 || DataSize > 32 ||
+ !isPowerOf2_32(DataSize))
+ return false;
+
+ // 32-byte vector nontemporal stores are supported by AVX (the equivalent
+ // loads require AVX2).
+ if (DataSize == 32)
+ return ST->hasAVX();
+ else if (DataSize == 16)
+ return ST->hasSSE1();
+ return true;
+}
+
+bool X86TTIImpl::isLegalMaskedExpandLoad(Type *DataTy) {
+ if (!isa<VectorType>(DataTy))
+ return false;
+
+ if (!ST->hasAVX512())
+ return false;
+
+ // The backend can't handle a single element vector.
+ if (DataTy->getVectorNumElements() == 1)
+ return false;
+
+ Type *ScalarTy = DataTy->getVectorElementType();
+
+ if (ScalarTy->isFloatTy() || ScalarTy->isDoubleTy())
+ return true;
+
+ if (!ScalarTy->isIntegerTy())
+ return false;
+
+ unsigned IntWidth = ScalarTy->getIntegerBitWidth();
+ return IntWidth == 32 || IntWidth == 64 ||
+ ((IntWidth == 8 || IntWidth == 16) && ST->hasVBMI2());
+}
+
+bool X86TTIImpl::isLegalMaskedCompressStore(Type *DataTy) {
+ return isLegalMaskedExpandLoad(DataTy);
+}
+
bool X86TTIImpl::isLegalMaskedGather(Type *DataTy) {
+ // Some CPUs have better gather performance than others.
+ // TODO: Remove the explicit ST->hasAVX512()?, That would mean we would only
+ // enable gather with a -march.
+ if (!(ST->hasAVX512() || (ST->hasFastGather() && ST->hasAVX2())))
+ return false;
+
// This function is called now in two cases: from the Loop Vectorizer
// and from the Scalarizer.
// When the Loop Vectorizer asks about legality of the feature,
@@ -2902,14 +3231,17 @@ bool X86TTIImpl::isLegalMaskedGather(Type *DataTy) {
return false;
}
Type *ScalarTy = DataTy->getScalarType();
- int DataWidth = isa<PointerType>(ScalarTy) ?
- DL.getPointerSizeInBits() : ScalarTy->getPrimitiveSizeInBits();
+ if (ScalarTy->isPointerTy())
+ return true;
- // Some CPUs have better gather performance than others.
- // TODO: Remove the explicit ST->hasAVX512()?, That would mean we would only
- // enable gather with a -march.
- return (DataWidth == 32 || DataWidth == 64) &&
- (ST->hasAVX512() || (ST->hasFastGather() && ST->hasAVX2()));
+ if (ScalarTy->isFloatTy() || ScalarTy->isDoubleTy())
+ return true;
+
+ if (!ScalarTy->isIntegerTy())
+ return false;
+
+ unsigned IntWidth = ScalarTy->getIntegerBitWidth();
+ return IntWidth == 32 || IntWidth == 64;
}
bool X86TTIImpl::isLegalMaskedScatter(Type *DataType) {
@@ -2938,44 +3270,51 @@ bool X86TTIImpl::areInlineCompatible(const Function *Caller,
const FeatureBitset &CalleeBits =
TM.getSubtargetImpl(*Callee)->getFeatureBits();
- // FIXME: This is likely too limiting as it will include subtarget features
- // that we might not care about for inlining, but it is conservatively
- // correct.
- return (CallerBits & CalleeBits) == CalleeBits;
+ FeatureBitset RealCallerBits = CallerBits & ~InlineFeatureIgnoreList;
+ FeatureBitset RealCalleeBits = CalleeBits & ~InlineFeatureIgnoreList;
+ return (RealCallerBits & RealCalleeBits) == RealCalleeBits;
}
-const X86TTIImpl::TTI::MemCmpExpansionOptions *
-X86TTIImpl::enableMemCmpExpansion(bool IsZeroCmp) const {
- // Only enable vector loads for equality comparison.
- // Right now the vector version is not as fast, see #33329.
- static const auto ThreeWayOptions = [this]() {
- TTI::MemCmpExpansionOptions Options;
- if (ST->is64Bit()) {
- Options.LoadSizes.push_back(8);
- }
- Options.LoadSizes.push_back(4);
- Options.LoadSizes.push_back(2);
- Options.LoadSizes.push_back(1);
- return Options;
- }();
- static const auto EqZeroOptions = [this]() {
- TTI::MemCmpExpansionOptions Options;
+bool X86TTIImpl::areFunctionArgsABICompatible(
+ const Function *Caller, const Function *Callee,
+ SmallPtrSetImpl<Argument *> &Args) const {
+ if (!BaseT::areFunctionArgsABICompatible(Caller, Callee, Args))
+ return false;
+
+ // If we get here, we know the target features match. If one function
+ // considers 512-bit vectors legal and the other does not, consider them
+ // incompatible.
+ // FIXME Look at the arguments and only consider 512 bit or larger vectors?
+ const TargetMachine &TM = getTLI()->getTargetMachine();
+
+ return TM.getSubtarget<X86Subtarget>(*Caller).useAVX512Regs() ==
+ TM.getSubtarget<X86Subtarget>(*Callee).useAVX512Regs();
+}
+
+X86TTIImpl::TTI::MemCmpExpansionOptions
+X86TTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
+ TTI::MemCmpExpansionOptions Options;
+ Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
+ Options.NumLoadsPerBlock = 2;
+ if (IsZeroCmp) {
+ // Only enable vector loads for equality comparison. Right now the vector
+ // version is not as fast for three way compare (see #33329).
// TODO: enable AVX512 when the DAG is ready.
// if (ST->hasAVX512()) Options.LoadSizes.push_back(64);
- if (ST->hasAVX2()) Options.LoadSizes.push_back(32);
- if (ST->hasSSE2()) Options.LoadSizes.push_back(16);
- if (ST->is64Bit()) {
- Options.LoadSizes.push_back(8);
- }
- Options.LoadSizes.push_back(4);
- Options.LoadSizes.push_back(2);
- Options.LoadSizes.push_back(1);
+ const unsigned PreferredWidth = ST->getPreferVectorWidth();
+ if (PreferredWidth >= 256 && ST->hasAVX2()) Options.LoadSizes.push_back(32);
+ if (PreferredWidth >= 128 && ST->hasSSE2()) Options.LoadSizes.push_back(16);
// All GPR and vector loads can be unaligned. SIMD compare requires integer
// vectors (SSE2/AVX2).
Options.AllowOverlappingLoads = true;
- return Options;
- }();
- return IsZeroCmp ? &EqZeroOptions : &ThreeWayOptions;
+ }
+ if (ST->is64Bit()) {
+ Options.LoadSizes.push_back(8);
+ }
+ Options.LoadSizes.push_back(4);
+ Options.LoadSizes.push_back(2);
+ Options.LoadSizes.push_back(1);
+ return Options;
}
bool X86TTIImpl::enableInterleavedAccessVectorization() {
diff --git a/lib/Target/X86/X86TargetTransformInfo.h b/lib/Target/X86/X86TargetTransformInfo.h
index 1637592c81f8..25d9c33eb16d 100644
--- a/lib/Target/X86/X86TargetTransformInfo.h
+++ b/lib/Target/X86/X86TargetTransformInfo.h
@@ -1,9 +1,8 @@
//===-- X86TargetTransformInfo.h - X86 specific TTI -------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -36,6 +35,64 @@ class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> {
const X86Subtarget *getST() const { return ST; }
const X86TargetLowering *getTLI() const { return TLI; }
+ const FeatureBitset InlineFeatureIgnoreList = {
+ // This indicates the CPU is 64 bit capable not that we are in 64-bit
+ // mode.
+ X86::Feature64Bit,
+
+ // These features don't have any intrinsics or ABI effect.
+ X86::FeatureNOPL,
+ X86::FeatureCMPXCHG16B,
+ X86::FeatureLAHFSAHF,
+
+ // Codegen control options.
+ X86::FeatureFast11ByteNOP,
+ X86::FeatureFast15ByteNOP,
+ X86::FeatureFastBEXTR,
+ X86::FeatureFastHorizontalOps,
+ X86::FeatureFastLZCNT,
+ X86::FeatureFastPartialYMMorZMMWrite,
+ X86::FeatureFastScalarFSQRT,
+ X86::FeatureFastSHLDRotate,
+ X86::FeatureFastScalarShiftMasks,
+ X86::FeatureFastVectorShiftMasks,
+ X86::FeatureFastVariableShuffle,
+ X86::FeatureFastVectorFSQRT,
+ X86::FeatureLEAForSP,
+ X86::FeatureLEAUsesAG,
+ X86::FeatureLZCNTFalseDeps,
+ X86::FeatureBranchFusion,
+ X86::FeatureMacroFusion,
+ X86::FeatureMergeToThreeWayBranch,
+ X86::FeaturePadShortFunctions,
+ X86::FeaturePOPCNTFalseDeps,
+ X86::FeatureSSEUnalignedMem,
+ X86::FeatureSlow3OpsLEA,
+ X86::FeatureSlowDivide32,
+ X86::FeatureSlowDivide64,
+ X86::FeatureSlowIncDec,
+ X86::FeatureSlowLEA,
+ X86::FeatureSlowPMADDWD,
+ X86::FeatureSlowPMULLD,
+ X86::FeatureSlowSHLD,
+ X86::FeatureSlowTwoMemOps,
+ X86::FeatureSlowUAMem16,
+
+ // Perf-tuning flags.
+ X86::FeatureHasFastGather,
+ X86::FeatureSlowUAMem32,
+
+ // Based on whether user set the -mprefer-vector-width command line.
+ X86::FeaturePrefer256Bit,
+
+ // CPU name enums. These just follow CPU string.
+ X86::ProcIntelAtom,
+ X86::ProcIntelGLM,
+ X86::ProcIntelGLP,
+ X86::ProcIntelSLM,
+ X86::ProcIntelTRM,
+ };
+
public:
explicit X86TTIImpl(const X86TargetMachine *TM, const Function &F)
: BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
@@ -129,14 +186,21 @@ public:
bool canMacroFuseCmp();
bool isLegalMaskedLoad(Type *DataType);
bool isLegalMaskedStore(Type *DataType);
+ bool isLegalNTLoad(Type *DataType, unsigned Alignment);
+ bool isLegalNTStore(Type *DataType, unsigned Alignment);
bool isLegalMaskedGather(Type *DataType);
bool isLegalMaskedScatter(Type *DataType);
+ bool isLegalMaskedExpandLoad(Type *DataType);
+ bool isLegalMaskedCompressStore(Type *DataType);
bool hasDivRemOp(Type *DataType, bool IsSigned);
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty);
bool areInlineCompatible(const Function *Caller,
const Function *Callee) const;
- const TTI::MemCmpExpansionOptions *enableMemCmpExpansion(
- bool IsZeroCmp) const;
+ bool areFunctionArgsABICompatible(const Function *Caller,
+ const Function *Callee,
+ SmallPtrSetImpl<Argument *> &Args) const;
+ TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
+ bool IsZeroCmp) const;
bool enableInterleavedAccessVectorization();
private:
int getGSScalarCost(unsigned Opcode, Type *DataTy, bool VariableMask,
diff --git a/lib/Target/X86/X86VZeroUpper.cpp b/lib/Target/X86/X86VZeroUpper.cpp
index f882b760927c..a07d2f20acab 100644
--- a/lib/Target/X86/X86VZeroUpper.cpp
+++ b/lib/Target/X86/X86VZeroUpper.cpp
@@ -1,9 +1,8 @@
//===- X86VZeroUpper.cpp - AVX vzeroupper instruction inserter ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/X86/X86WinAllocaExpander.cpp b/lib/Target/X86/X86WinAllocaExpander.cpp
index d298aaa97ecd..9e499db1d7ee 100644
--- a/lib/Target/X86/X86WinAllocaExpander.cpp
+++ b/lib/Target/X86/X86WinAllocaExpander.cpp
@@ -1,9 +1,8 @@
//===----- X86WinAllocaExpander.cpp - Expand WinAlloca pseudo instruction -===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -85,10 +84,6 @@ static int64_t getWinAllocaAmount(MachineInstr *MI, MachineRegisterInfo *MRI) {
unsigned AmountReg = MI->getOperand(0).getReg();
MachineInstr *Def = MRI->getUniqueVRegDef(AmountReg);
- // Look through copies.
- while (Def && Def->isCopy() && Def->getOperand(1).isReg())
- Def = MRI->getUniqueVRegDef(Def->getOperand(1).getReg());
-
if (!Def ||
(Def->getOpcode() != X86::MOV32ri && Def->getOpcode() != X86::MOV64ri) ||
!Def->getOperand(1).isImm())
@@ -210,15 +205,18 @@ void X86WinAllocaExpander::lower(MachineInstr* MI, Lowering L) {
return;
}
+ // These two variables differ on x32, which is a 64-bit target with a
+ // 32-bit alloca.
bool Is64Bit = STI->is64Bit();
+ bool Is64BitAlloca = MI->getOpcode() == X86::WIN_ALLOCA_64;
assert(SlotSize == 4 || SlotSize == 8);
- unsigned RegA = (SlotSize == 8) ? X86::RAX : X86::EAX;
switch (L) {
- case TouchAndSub:
+ case TouchAndSub: {
assert(Amount >= SlotSize);
// Use a push to touch the top of the stack.
+ unsigned RegA = Is64Bit ? X86::RAX : X86::EAX;
BuildMI(*MBB, I, DL, TII->get(Is64Bit ? X86::PUSH64r : X86::PUSH32r))
.addReg(RegA, RegState::Undef);
Amount -= SlotSize;
@@ -227,15 +225,18 @@ void X86WinAllocaExpander::lower(MachineInstr* MI, Lowering L) {
// Fall through to make any remaining adjustment.
LLVM_FALLTHROUGH;
+ }
case Sub:
assert(Amount > 0);
if (Amount == SlotSize) {
// Use push to save size.
+ unsigned RegA = Is64Bit ? X86::RAX : X86::EAX;
BuildMI(*MBB, I, DL, TII->get(Is64Bit ? X86::PUSH64r : X86::PUSH32r))
.addReg(RegA, RegState::Undef);
} else {
// Sub.
- BuildMI(*MBB, I, DL, TII->get(getSubOpcode(Is64Bit, Amount)), StackPtr)
+ BuildMI(*MBB, I, DL,
+ TII->get(getSubOpcode(Is64BitAlloca, Amount)), StackPtr)
.addReg(StackPtr)
.addImm(Amount);
}
@@ -243,16 +244,17 @@ void X86WinAllocaExpander::lower(MachineInstr* MI, Lowering L) {
case Probe:
if (!NoStackArgProbe) {
// The probe lowering expects the amount in RAX/EAX.
+ unsigned RegA = Is64BitAlloca ? X86::RAX : X86::EAX;
BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::COPY), RegA)
.addReg(MI->getOperand(0).getReg());
// Do the probe.
STI->getFrameLowering()->emitStackProbe(*MBB->getParent(), *MBB, MI, DL,
- /*InPrologue=*/false);
+ /*InProlog=*/false);
} else {
// Sub
- BuildMI(*MBB, I, DL, TII->get(Is64Bit ? X86::SUB64rr : X86::SUB32rr),
- StackPtr)
+ BuildMI(*MBB, I, DL,
+ TII->get(Is64BitAlloca ? X86::SUB64rr : X86::SUB32rr), StackPtr)
.addReg(StackPtr)
.addReg(MI->getOperand(0).getReg());
}
@@ -262,18 +264,10 @@ void X86WinAllocaExpander::lower(MachineInstr* MI, Lowering L) {
unsigned AmountReg = MI->getOperand(0).getReg();
MI->eraseFromParent();
- // Delete the definition of AmountReg, possibly walking a chain of copies.
- for (;;) {
- if (!MRI->use_empty(AmountReg))
- break;
- MachineInstr *AmountDef = MRI->getUniqueVRegDef(AmountReg);
- if (!AmountDef)
- break;
- if (AmountDef->isCopy() && AmountDef->getOperand(1).isReg())
- AmountReg = AmountDef->getOperand(1).isReg();
- AmountDef->eraseFromParent();
- break;
- }
+ // Delete the definition of AmountReg.
+ if (MRI->use_empty(AmountReg))
+ if (MachineInstr *AmountDef = MRI->getUniqueVRegDef(AmountReg))
+ AmountDef->eraseFromParent();
}
bool X86WinAllocaExpander::runOnMachineFunction(MachineFunction &MF) {
diff --git a/lib/Target/X86/X86WinEHState.cpp b/lib/Target/X86/X86WinEHState.cpp
index 185deda97c1f..f68d17d7256d 100644
--- a/lib/Target/X86/X86WinEHState.cpp
+++ b/lib/Target/X86/X86WinEHState.cpp
@@ -1,9 +1,8 @@
//===-- X86WinEHState - Insert EH state updates for win32 exceptions ------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -41,9 +40,7 @@ class WinEHStatePass : public FunctionPass {
public:
static char ID; // Pass identification, replacement for typeid.
- WinEHStatePass() : FunctionPass(ID) {
- initializeWinEHStatePassPass(*PassRegistry::getPassRegistry());
- }
+ WinEHStatePass() : FunctionPass(ID) { }
bool runOnFunction(Function &Fn) override;
@@ -87,15 +84,15 @@ private:
StructType *EHLinkRegistrationTy = nullptr;
StructType *CXXEHRegistrationTy = nullptr;
StructType *SEHRegistrationTy = nullptr;
- Constant *SetJmp3 = nullptr;
- Constant *CxxLongjmpUnwind = nullptr;
+ FunctionCallee SetJmp3 = nullptr;
+ FunctionCallee CxxLongjmpUnwind = nullptr;
// Per-function state
EHPersonality Personality = EHPersonality::Unknown;
Function *PersonalityFn = nullptr;
bool UseStackGuard = false;
int ParentBaseState;
- Constant *SehLongjmpUnwind = nullptr;
+ FunctionCallee SehLongjmpUnwind = nullptr;
Constant *Cookie = nullptr;
/// The stack allocation containing all EH data, including the link in the
@@ -304,7 +301,7 @@ void WinEHStatePass::emitExceptionRegistrationRecord(Function *F) {
CxxLongjmpUnwind = TheModule->getOrInsertFunction(
"__CxxLongjmpUnwind",
FunctionType::get(VoidTy, Int8PtrType, /*isVarArg=*/false));
- cast<Function>(CxxLongjmpUnwind->stripPointerCasts())
+ cast<Function>(CxxLongjmpUnwind.getCallee()->stripPointerCasts())
->setCallingConv(CallingConv::X86_StdCall);
} else if (Personality == EHPersonality::MSVC_X86SEH) {
// If _except_handler4 is in use, some additional guard checks and prologue
@@ -357,7 +354,7 @@ void WinEHStatePass::emitExceptionRegistrationRecord(Function *F) {
UseStackGuard ? "_seh_longjmp_unwind4" : "_seh_longjmp_unwind",
FunctionType::get(Type::getVoidTy(TheModule->getContext()), Int8PtrType,
/*isVarArg=*/false));
- cast<Function>(SehLongjmpUnwind->stripPointerCasts())
+ cast<Function>(SehLongjmpUnwind.getCallee()->stripPointerCasts())
->setCallingConv(CallingConv::X86_StdCall);
} else {
llvm_unreachable("unexpected personality function");
@@ -412,7 +409,7 @@ Function *WinEHStatePass::generateLSDAInEAXThunk(Function *ParentFunc) {
Builder.CreateBitCast(PersonalityFn, TargetFuncTy->getPointerTo());
auto AI = Trampoline->arg_begin();
Value *Args[5] = {LSDA, &*AI++, &*AI++, &*AI++, &*AI++};
- CallInst *Call = Builder.CreateCall(CastPersonality, Args);
+ CallInst *Call = Builder.CreateCall(TargetFuncTy, CastPersonality, Args);
// Can't use musttail due to prototype mismatch, but we can use tail.
Call->setTailCall(true);
// Set inreg so we pass it in EAX.
@@ -433,7 +430,7 @@ void WinEHStatePass::linkExceptionRegistration(IRBuilder<> &Builder,
// Next = [fs:00]
Constant *FSZero =
Constant::getNullValue(LinkTy->getPointerTo()->getPointerTo(257));
- Value *Next = Builder.CreateLoad(FSZero);
+ Value *Next = Builder.CreateLoad(LinkTy->getPointerTo(), FSZero);
Builder.CreateStore(Next, Builder.CreateStructGEP(LinkTy, Link, 0));
// [fs:00] = Link
Builder.CreateStore(Link, FSZero);
@@ -448,8 +445,8 @@ void WinEHStatePass::unlinkExceptionRegistration(IRBuilder<> &Builder) {
}
Type *LinkTy = getEHLinkRegistrationType();
// [fs:00] = Link->Next
- Value *Next =
- Builder.CreateLoad(Builder.CreateStructGEP(LinkTy, Link, 0));
+ Value *Next = Builder.CreateLoad(LinkTy->getPointerTo(),
+ Builder.CreateStructGEP(LinkTy, Link, 0));
Constant *FSZero =
Constant::getNullValue(LinkTy->getPointerTo()->getPointerTo(257));
Builder.CreateStore(Next, FSZero);
@@ -472,11 +469,11 @@ void WinEHStatePass::rewriteSetJmpCallSite(IRBuilder<> &Builder, Function &F,
SmallVector<Value *, 3> OptionalArgs;
if (Personality == EHPersonality::MSVC_CXX) {
- OptionalArgs.push_back(CxxLongjmpUnwind);
+ OptionalArgs.push_back(CxxLongjmpUnwind.getCallee());
OptionalArgs.push_back(State);
OptionalArgs.push_back(emitEHLSDA(Builder, &F));
} else if (Personality == EHPersonality::MSVC_X86SEH) {
- OptionalArgs.push_back(SehLongjmpUnwind);
+ OptionalArgs.push_back(SehLongjmpUnwind.getCallee());
OptionalArgs.push_back(State);
if (UseStackGuard)
OptionalArgs.push_back(Cookie);
@@ -767,7 +764,7 @@ void WinEHStatePass::addStateStores(Function &F, WinEHFuncInfo &FuncInfo) {
if (!CS)
continue;
if (CS.getCalledValue()->stripPointerCasts() !=
- SetJmp3->stripPointerCasts())
+ SetJmp3.getCallee()->stripPointerCasts())
continue;
SetJmp3CallSites.push_back(CS);
@@ -782,9 +779,9 @@ void WinEHStatePass::addStateStores(Function &F, WinEHFuncInfo &FuncInfo) {
IRBuilder<> Builder(CS.getInstruction());
Value *State;
if (InCleanup) {
- Value *StateField =
- Builder.CreateStructGEP(nullptr, RegNode, StateFieldIndex);
- State = Builder.CreateLoad(StateField);
+ Value *StateField = Builder.CreateStructGEP(RegNode->getAllocatedType(),
+ RegNode, StateFieldIndex);
+ State = Builder.CreateLoad(Builder.getInt32Ty(), StateField);
} else {
State = Builder.getInt32(getStateForCallSite(BlockColors, FuncInfo, CS));
}
@@ -794,7 +791,7 @@ void WinEHStatePass::addStateStores(Function &F, WinEHFuncInfo &FuncInfo) {
void WinEHStatePass::insertStateNumberStore(Instruction *IP, int State) {
IRBuilder<> Builder(IP);
- Value *StateField =
- Builder.CreateStructGEP(nullptr, RegNode, StateFieldIndex);
+ Value *StateField = Builder.CreateStructGEP(RegNode->getAllocatedType(),
+ RegNode, StateFieldIndex);
Builder.CreateStore(Builder.getInt32(State), StateField);
}
diff --git a/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp b/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
index faf66e5944ab..ff3d41fd5274 100644
--- a/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
+++ b/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
@@ -1,9 +1,8 @@
//===- XCoreDisassembler.cpp - Disassembler for XCore -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -12,6 +11,7 @@
///
//===----------------------------------------------------------------------===//
+#include "TargetInfo/XCoreTargetInfo.h"
#include "XCore.h"
#include "XCoreRegisterInfo.h"
#include "llvm/MC/MCContext.h"
@@ -768,10 +768,6 @@ MCDisassembler::DecodeStatus XCoreDisassembler::getInstruction(
return Fail;
}
-namespace llvm {
- Target &getTheXCoreTarget();
-}
-
static MCDisassembler *createXCoreDisassembler(const Target &T,
const MCSubtargetInfo &STI,
MCContext &Ctx) {
diff --git a/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp b/lib/Target/XCore/MCTargetDesc/XCoreInstPrinter.cpp
index b03c1852281d..d231e0981324 100644
--- a/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp
+++ b/lib/Target/XCore/MCTargetDesc/XCoreInstPrinter.cpp
@@ -1,9 +1,8 @@
//===-- XCoreInstPrinter.cpp - Convert XCore MCInst to assembly syntax ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h b/lib/Target/XCore/MCTargetDesc/XCoreInstPrinter.h
index a0b480026469..4f0940323505 100644
--- a/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h
+++ b/lib/Target/XCore/MCTargetDesc/XCoreInstPrinter.h
@@ -1,9 +1,8 @@
//== XCoreInstPrinter.h - Convert XCore MCInst to assembly syntax -*- C++ -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -13,8 +12,8 @@
///
//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIB_TARGET_XCORE_INSTPRINTER_XCOREINSTPRINTER_H
-#define LLVM_LIB_TARGET_XCORE_INSTPRINTER_XCOREINSTPRINTER_H
+#ifndef LLVM_LIB_TARGET_XCORE_MCTARGETDESC_XCOREINSTPRINTER_H
+#define LLVM_LIB_TARGET_XCORE_MCTARGETDESC_XCOREINSTPRINTER_H
#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCInstPrinter.h"
@@ -44,4 +43,4 @@ private:
} // end namespace llvm
-#endif // LLVM_LIB_TARGET_XCORE_INSTPRINTER_XCOREINSTPRINTER_H
+#endif // LLVM_LIB_TARGET_XCORE_MCTARGETDESC_XCOREINSTPRINTER_H
diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp
index 3178a4edbb3b..ae19e2a78eec 100644
--- a/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp
+++ b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp
@@ -1,9 +1,8 @@
//===-- XCoreMCAsmInfo.cpp - XCore asm properties -------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h
index 39581e424e8c..b1dd247f8468 100644
--- a/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h
+++ b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h
@@ -1,9 +1,8 @@
//===-- XCoreMCAsmInfo.h - XCore asm properties ----------------*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
index 805f1c18b609..877f38e22f9b 100644
--- a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
+++ b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
@@ -1,9 +1,8 @@
//===-- XCoreMCTargetDesc.cpp - XCore Target Descriptions -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -12,8 +11,9 @@
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/XCoreMCTargetDesc.h"
-#include "InstPrinter/XCoreInstPrinter.h"
+#include "MCTargetDesc/XCoreInstPrinter.h"
#include "MCTargetDesc/XCoreMCAsmInfo.h"
+#include "TargetInfo/XCoreTargetInfo.h"
#include "XCoreTargetStreamer.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCDwarf.h"
diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h
index 1dc384fadf69..3e56302f4add 100644
--- a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h
+++ b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h
@@ -1,9 +1,8 @@
//===-- XCoreMCTargetDesc.h - XCore Target Descriptions ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -18,8 +17,6 @@ namespace llvm {
class Target;
-Target &getTheXCoreTarget();
-
} // end namespace llvm
// Defines symbolic names for XCore registers. This defines a mapping from
diff --git a/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp b/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp
index 41f4078cc328..5604f29db3e9 100644
--- a/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp
+++ b/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp
@@ -1,14 +1,12 @@
//===-- XCoreTargetInfo.cpp - XCore Target Implementation -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-#include "XCore.h"
-#include "llvm/IR/Module.h"
+#include "TargetInfo/XCoreTargetInfo.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/lib/Target/XCore/TargetInfo/XCoreTargetInfo.h b/lib/Target/XCore/TargetInfo/XCoreTargetInfo.h
new file mode 100644
index 000000000000..35f05f22e4ce
--- /dev/null
+++ b/lib/Target/XCore/TargetInfo/XCoreTargetInfo.h
@@ -0,0 +1,20 @@
+//===-- XCoreTargetInfo.h - XCore Target Implementation ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_XCORE_TARGETINFO_XCORETARGETINFO_H
+#define LLVM_LIB_TARGET_XCORE_TARGETINFO_XCORETARGETINFO_H
+
+namespace llvm {
+
+class Target;
+
+Target &getTheXCoreTarget();
+
+}
+
+#endif // LLVM_LIB_TARGET_XCORE_TARGETINFO_XCORETARGETINFO_H
diff --git a/lib/Target/XCore/XCore.h b/lib/Target/XCore/XCore.h
index ba6ca843671e..b7b86be9ab51 100644
--- a/lib/Target/XCore/XCore.h
+++ b/lib/Target/XCore/XCore.h
@@ -1,9 +1,8 @@
//===-- XCore.h - Top-level interface for XCore representation --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/XCore/XCore.td b/lib/Target/XCore/XCore.td
index 04a1dd5e95be..a97b3dd1d0a2 100644
--- a/lib/Target/XCore/XCore.td
+++ b/lib/Target/XCore/XCore.td
@@ -1,9 +1,8 @@
//===-- XCore.td - Describe the XCore Target Machine -------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/XCore/XCoreAsmPrinter.cpp b/lib/Target/XCore/XCoreAsmPrinter.cpp
index 916bca6392de..9f615b9e7741 100644
--- a/lib/Target/XCore/XCoreAsmPrinter.cpp
+++ b/lib/Target/XCore/XCoreAsmPrinter.cpp
@@ -1,9 +1,8 @@
//===-- XCoreAsmPrinter.cpp - XCore LLVM assembly writer ------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -12,7 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#include "InstPrinter/XCoreInstPrinter.h"
+#include "MCTargetDesc/XCoreInstPrinter.h"
+#include "TargetInfo/XCoreTargetInfo.h"
#include "XCore.h"
#include "XCoreInstrInfo.h"
#include "XCoreMCInstLower.h"
@@ -67,11 +67,9 @@ namespace {
}
void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
- unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &O) override;
+ const char *ExtraCode, raw_ostream &O) override;
bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
- unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &O) override;
+ const char *ExtraCode, raw_ostream &O) override;
void emitArrayBound(MCSymbol *Sym, const GlobalVariable *GV);
void EmitGlobalVariable(const GlobalVariable *GV) override;
@@ -216,7 +214,7 @@ void XCoreAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
MO.getMBB()->getSymbol()->print(O, MAI);
break;
case MachineOperand::MO_GlobalAddress:
- getSymbol(MO.getGlobal())->print(O, MAI);
+ PrintSymbolOperand(MO, O);
break;
case MachineOperand::MO_ConstantPoolIndex:
O << DL.getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << '_'
@@ -233,8 +231,7 @@ void XCoreAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
/// PrintAsmOperand - Print out an operand for an inline asm expression.
///
bool XCoreAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
- unsigned AsmVariant,const char *ExtraCode,
- raw_ostream &O) {
+ const char *ExtraCode, raw_ostream &O) {
// Print the operand if there is no operand modifier.
if (!ExtraCode || !ExtraCode[0]) {
printOperand(MI, OpNo, O);
@@ -242,13 +239,13 @@ bool XCoreAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
}
// Otherwise fallback on the default implementation.
- return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
+ return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O);
}
-bool XCoreAsmPrinter::
-PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
- unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &O) {
+bool XCoreAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+ unsigned OpNum,
+ const char *ExtraCode,
+ raw_ostream &O) {
if (ExtraCode && ExtraCode[0]) {
return true; // Unknown modifier.
}
diff --git a/lib/Target/XCore/XCoreCallingConv.td b/lib/Target/XCore/XCoreCallingConv.td
index e149e6d9ec20..aec109b83fa2 100644
--- a/lib/Target/XCore/XCoreCallingConv.td
+++ b/lib/Target/XCore/XCoreCallingConv.td
@@ -1,9 +1,8 @@
//===- XCoreCallingConv.td - Calling Conventions for XCore -*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// This describes the calling conventions for XCore architecture.
diff --git a/lib/Target/XCore/XCoreFrameLowering.cpp b/lib/Target/XCore/XCoreFrameLowering.cpp
index fff8a66d0e75..5066407c74aa 100644
--- a/lib/Target/XCore/XCoreFrameLowering.cpp
+++ b/lib/Target/XCore/XCoreFrameLowering.cpp
@@ -1,9 +1,8 @@
//===-- XCoreFrameLowering.cpp - Frame info for XCore Target --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/XCore/XCoreFrameLowering.h b/lib/Target/XCore/XCoreFrameLowering.h
index e98e9cda11db..95c3a2973033 100644
--- a/lib/Target/XCore/XCoreFrameLowering.h
+++ b/lib/Target/XCore/XCoreFrameLowering.h
@@ -1,9 +1,8 @@
//===-- XCoreFrameLowering.h - Frame info for XCore Target ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp b/lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp
index 4b10e71be03d..e433d21c59b7 100644
--- a/lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp
+++ b/lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp
@@ -1,9 +1,8 @@
//===-- XCoreFrameToArgsOffsetElim.cpp ----------------------------*- C++ -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
index 1688c38efc1d..5fd9e23258b0 100644
--- a/lib/Target/XCore/XCoreISelDAGToDAG.cpp
+++ b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
@@ -1,9 +1,8 @@
//===-- XCoreISelDAGToDAG.cpp - A dag to dag inst selector for XCore ------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp
index 75d7ae7048a1..072278d9fc46 100644
--- a/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@@ -1,9 +1,8 @@
//===-- XCoreISelLowering.cpp - XCore DAG Lowering Implementation ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -407,23 +406,16 @@ static bool isWordAligned(SDValue Value, SelectionDAG &DAG)
return Known.countMinTrailingZeros() >= 2;
}
-SDValue XCoreTargetLowering::
-LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
+SDValue XCoreTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ LLVMContext &Context = *DAG.getContext();
LoadSDNode *LD = cast<LoadSDNode>(Op);
assert(LD->getExtensionType() == ISD::NON_EXTLOAD &&
"Unexpected extension type");
assert(LD->getMemoryVT() == MVT::i32 && "Unexpected load EVT");
- if (allowsMisalignedMemoryAccesses(LD->getMemoryVT(),
- LD->getAddressSpace(),
- LD->getAlignment()))
- return SDValue();
- auto &TD = DAG.getDataLayout();
- unsigned ABIAlignment = TD.getABITypeAlignment(
- LD->getMemoryVT().getTypeForEVT(*DAG.getContext()));
- // Leave aligned load alone.
- if (LD->getAlignment() >= ABIAlignment)
+ if (allowsMemoryAccess(Context, DAG.getDataLayout(), LD->getMemoryVT(),
+ *LD->getMemOperand()))
return SDValue();
SDValue Chain = LD->getChain();
@@ -470,7 +462,7 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
}
// Lower to a call to __misaligned_load(BasePtr).
- Type *IntPtrTy = TD.getIntPtrType(*DAG.getContext());
+ Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(Context);
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
@@ -490,23 +482,16 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
return DAG.getMergeValues(Ops, DL);
}
-SDValue XCoreTargetLowering::
-LowerSTORE(SDValue Op, SelectionDAG &DAG) const
-{
+SDValue XCoreTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
+ LLVMContext &Context = *DAG.getContext();
StoreSDNode *ST = cast<StoreSDNode>(Op);
assert(!ST->isTruncatingStore() && "Unexpected store type");
assert(ST->getMemoryVT() == MVT::i32 && "Unexpected store EVT");
- if (allowsMisalignedMemoryAccesses(ST->getMemoryVT(),
- ST->getAddressSpace(),
- ST->getAlignment())) {
- return SDValue();
- }
- unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(
- ST->getMemoryVT().getTypeForEVT(*DAG.getContext()));
- // Leave aligned store alone.
- if (ST->getAlignment() >= ABIAlignment) {
+
+ if (allowsMemoryAccess(Context, DAG.getDataLayout(), ST->getMemoryVT(),
+ *ST->getMemOperand()))
return SDValue();
- }
+
SDValue Chain = ST->getChain();
SDValue BasePtr = ST->getBasePtr();
SDValue Value = ST->getValue();
@@ -515,7 +500,7 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG) const
if (ST->getAlignment() == 2) {
SDValue Low = Value;
SDValue High = DAG.getNode(ISD::SRL, dl, MVT::i32, Value,
- DAG.getConstant(16, dl, MVT::i32));
+ DAG.getConstant(16, dl, MVT::i32));
SDValue StoreLow = DAG.getTruncStore(
Chain, dl, Low, BasePtr, ST->getPointerInfo(), MVT::i16,
/* Alignment = */ 2, ST->getMemOperand()->getFlags());
@@ -528,7 +513,7 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG) const
}
// Lower to a call to __misaligned_store(BasePtr, Value).
- Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
+ Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(Context);
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
@@ -541,7 +526,7 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG) const
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl).setChain(Chain).setCallee(
- CallingConv::C, Type::getVoidTy(*DAG.getContext()),
+ CallingConv::C, Type::getVoidTy(Context),
DAG.getExternalSymbol("__misaligned_store",
getPointerTy(DAG.getDataLayout())),
std::move(Args));
@@ -1009,6 +994,27 @@ LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const {
return SDValue();
}
+MachineMemOperand::Flags
+XCoreTargetLowering::getMMOFlags(const Instruction &I) const {
+ // Because of how we convert atomic_load and atomic_store to normal loads and
+ // stores in the DAG, we need to ensure that the MMOs are marked volatile
+ // since DAGCombine hasn't been updated to account for atomic, but non
+ // volatile loads. (See D57601)
+ if (auto *SI = dyn_cast<StoreInst>(&I))
+ if (SI->isAtomic())
+ return MachineMemOperand::MOVolatile;
+ if (auto *LI = dyn_cast<LoadInst>(&I))
+ if (LI->isAtomic())
+ return MachineMemOperand::MOVolatile;
+ if (auto *AI = dyn_cast<AtomicRMWInst>(&I))
+ if (AI->isAtomic())
+ return MachineMemOperand::MOVolatile;
+ if (auto *AI = dyn_cast<AtomicCmpXchgInst>(&I))
+ if (AI->isAtomic())
+ return MachineMemOperand::MOVolatile;
+ return MachineMemOperand::MONone;
+}
+
//===----------------------------------------------------------------------===//
// Calling Convention Implementation
//===----------------------------------------------------------------------===//
@@ -1772,11 +1778,10 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
break;
case ISD::STORE: {
// Replace unaligned store of unaligned load with memmove.
- StoreSDNode *ST = cast<StoreSDNode>(N);
+ StoreSDNode *ST = cast<StoreSDNode>(N);
if (!DCI.isBeforeLegalize() ||
- allowsMisalignedMemoryAccesses(ST->getMemoryVT(),
- ST->getAddressSpace(),
- ST->getAlignment()) ||
+ allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
+ ST->getMemoryVT(), *ST->getMemOperand()) ||
ST->isVolatile() || ST->isIndexed()) {
break;
}
@@ -1785,12 +1790,7 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
unsigned StoreBits = ST->getMemoryVT().getStoreSizeInBits();
assert((StoreBits % 8) == 0 &&
"Store size in bits must be a multiple of 8");
- unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(
- ST->getMemoryVT().getTypeForEVT(*DCI.DAG.getContext()));
unsigned Alignment = ST->getAlignment();
- if (Alignment >= ABIAlignment) {
- break;
- }
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(ST->getValue())) {
if (LD->hasNUsesOfValue(1, 0) && ST->getMemoryVT() == LD->getMemoryVT() &&
diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h
index 7a99389e54a7..b4f25feda7fe 100644
--- a/lib/Target/XCore/XCoreISelLowering.h
+++ b/lib/Target/XCore/XCoreISelLowering.h
@@ -1,9 +1,8 @@
//===-- XCoreISelLowering.h - XCore DAG Lowering Interface ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -189,6 +188,8 @@ namespace llvm {
SDValue LowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const;
+ MachineMemOperand::Flags getMMOFlags(const Instruction &I) const override;
+
// Inline asm support
std::pair<unsigned, const TargetRegisterClass *>
getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
diff --git a/lib/Target/XCore/XCoreInstrFormats.td b/lib/Target/XCore/XCoreInstrFormats.td
index 379cc39aa617..deb899ddb1af 100644
--- a/lib/Target/XCore/XCoreInstrFormats.td
+++ b/lib/Target/XCore/XCoreInstrFormats.td
@@ -1,9 +1,8 @@
//===-- XCoreInstrFormats.td - XCore Instruction Formats ---*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp
index b0de048672df..bbad8e354586 100644
--- a/lib/Target/XCore/XCoreInstrInfo.cpp
+++ b/lib/Target/XCore/XCoreInstrInfo.cpp
@@ -1,9 +1,8 @@
//===-- XCoreInstrInfo.cpp - XCore Instruction Information ----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/XCore/XCoreInstrInfo.h b/lib/Target/XCore/XCoreInstrInfo.h
index 9d9ee33ce222..b9621f136589 100644
--- a/lib/Target/XCore/XCoreInstrInfo.h
+++ b/lib/Target/XCore/XCoreInstrInfo.h
@@ -1,9 +1,8 @@
//===-- XCoreInstrInfo.h - XCore Instruction Information --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td
index b87ba6548962..18f02e1d80f0 100644
--- a/lib/Target/XCore/XCoreInstrInfo.td
+++ b/lib/Target/XCore/XCoreInstrInfo.td
@@ -1,9 +1,8 @@
//===-- XCoreInstrInfo.td - Target Description for XCore ---*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/XCore/XCoreLowerThreadLocal.cpp b/lib/Target/XCore/XCoreLowerThreadLocal.cpp
index 7455cd997ad6..a18fb28f2fe9 100644
--- a/lib/Target/XCore/XCoreLowerThreadLocal.cpp
+++ b/lib/Target/XCore/XCoreLowerThreadLocal.cpp
@@ -1,9 +1,8 @@
//===-- XCoreLowerThreadLocal - Lower thread local variables --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/lib/Target/XCore/XCoreMCInstLower.cpp b/lib/Target/XCore/XCoreMCInstLower.cpp
index 21270192b234..cd28fa5cd144 100644
--- a/lib/Target/XCore/XCoreMCInstLower.cpp
+++ b/lib/Target/XCore/XCoreMCInstLower.cpp
@@ -1,9 +1,8 @@
//===-- XCoreMCInstLower.cpp - Convert XCore MachineInstr to MCInst -------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/lib/Target/XCore/XCoreMCInstLower.h b/lib/Target/XCore/XCoreMCInstLower.h
index abcb80fcf766..0eaa84ef736b 100644
--- a/lib/Target/XCore/XCoreMCInstLower.h
+++ b/lib/Target/XCore/XCoreMCInstLower.h
@@ -1,9 +1,8 @@
//===-- XCoreMCInstLower.h - Lower MachineInstr to MCInst ------*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/XCore/XCoreMachineFunctionInfo.cpp b/lib/Target/XCore/XCoreMachineFunctionInfo.cpp
index b7b0daab9806..0b4fcffbc655 100644
--- a/lib/Target/XCore/XCoreMachineFunctionInfo.cpp
+++ b/lib/Target/XCore/XCoreMachineFunctionInfo.cpp
@@ -1,9 +1,8 @@
//===-- XCoreMachineFunctionInfo.cpp - XCore machine function info --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/XCore/XCoreMachineFunctionInfo.h b/lib/Target/XCore/XCoreMachineFunctionInfo.h
index 6c05ab3f10df..aebe11b15b54 100644
--- a/lib/Target/XCore/XCoreMachineFunctionInfo.h
+++ b/lib/Target/XCore/XCoreMachineFunctionInfo.h
@@ -1,9 +1,8 @@
//===- XCoreMachineFunctionInfo.h - XCore machine function info -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp
index e119d9555f9d..3752274e2cdf 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.cpp
+++ b/lib/Target/XCore/XCoreRegisterInfo.cpp
@@ -1,9 +1,8 @@
//===-- XCoreRegisterInfo.cpp - XCore Register Information ----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -284,7 +283,7 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
Offset += StackSize;
- unsigned FrameReg = getFrameRegister(MF);
+ Register FrameReg = getFrameRegister(MF);
// Special handling of DBG_VALUE instructions.
if (MI.isDebugValue()) {
@@ -322,7 +321,7 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
}
-unsigned XCoreRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+Register XCoreRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
const XCoreFrameLowering *TFI = getFrameLowering(MF);
return TFI->hasFP(MF) ? XCore::R10 : XCore::SP;
diff --git a/lib/Target/XCore/XCoreRegisterInfo.h b/lib/Target/XCore/XCoreRegisterInfo.h
index 2e9fd98ed34f..35a42e1a1457 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.h
+++ b/lib/Target/XCore/XCoreRegisterInfo.h
@@ -1,9 +1,8 @@
//===-- XCoreRegisterInfo.h - XCore Register Information Impl ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -44,7 +43,7 @@ public:
RegScavenger *RS = nullptr) const override;
// Debug information queries.
- unsigned getFrameRegister(const MachineFunction &MF) const override;
+ Register getFrameRegister(const MachineFunction &MF) const override;
//! Return whether to emit frame moves
static bool needsFrameMoves(const MachineFunction &MF);
diff --git a/lib/Target/XCore/XCoreRegisterInfo.td b/lib/Target/XCore/XCoreRegisterInfo.td
index 6694b2882aca..d9502939bae3 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.td
+++ b/lib/Target/XCore/XCoreRegisterInfo.td
@@ -1,9 +1,8 @@
//===-- XCoreRegisterInfo.td - XCore Register defs ---------*- tablegen -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/XCore/XCoreSelectionDAGInfo.cpp b/lib/Target/XCore/XCoreSelectionDAGInfo.cpp
index 646309e02de8..c86756e345a9 100644
--- a/lib/Target/XCore/XCoreSelectionDAGInfo.cpp
+++ b/lib/Target/XCore/XCoreSelectionDAGInfo.cpp
@@ -1,9 +1,8 @@
//===-- XCoreSelectionDAGInfo.cpp - XCore SelectionDAG Info ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/XCore/XCoreSelectionDAGInfo.h b/lib/Target/XCore/XCoreSelectionDAGInfo.h
index 7cd0d8216e91..5dcef08391c9 100644
--- a/lib/Target/XCore/XCoreSelectionDAGInfo.h
+++ b/lib/Target/XCore/XCoreSelectionDAGInfo.h
@@ -1,9 +1,8 @@
//===-- XCoreSelectionDAGInfo.h - XCore SelectionDAG Info -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/XCore/XCoreSubtarget.cpp b/lib/Target/XCore/XCoreSubtarget.cpp
index 99ad2c88504f..ffeb0862c945 100644
--- a/lib/Target/XCore/XCoreSubtarget.cpp
+++ b/lib/Target/XCore/XCoreSubtarget.cpp
@@ -1,9 +1,8 @@
//===-- XCoreSubtarget.cpp - XCore Subtarget Information ------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/XCore/XCoreSubtarget.h b/lib/Target/XCore/XCoreSubtarget.h
index ed9936ebf2b8..68139da9d1d0 100644
--- a/lib/Target/XCore/XCoreSubtarget.h
+++ b/lib/Target/XCore/XCoreSubtarget.h
@@ -1,9 +1,8 @@
//===-- XCoreSubtarget.h - Define Subtarget for the XCore -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp
index 2aa9932e2465..2a8cd6b657b7 100644
--- a/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -1,9 +1,8 @@
//===-- XCoreTargetMachine.cpp - Define TargetMachine for XCore -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -12,6 +11,7 @@
#include "XCoreTargetMachine.h"
#include "MCTargetDesc/XCoreMCTargetDesc.h"
+#include "TargetInfo/XCoreTargetInfo.h"
#include "XCore.h"
#include "XCoreTargetObjectFile.h"
#include "XCoreTargetTransformInfo.h"
diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h
index 965b9b2c4d65..9c3bdcf78f9c 100644
--- a/lib/Target/XCore/XCoreTargetMachine.h
+++ b/lib/Target/XCore/XCoreTargetMachine.h
@@ -1,9 +1,8 @@
//===-- XCoreTargetMachine.h - Define TargetMachine for XCore ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Target/XCore/XCoreTargetObjectFile.cpp b/lib/Target/XCore/XCoreTargetObjectFile.cpp
index c60a262e719c..fe743b28b4b4 100644
--- a/lib/Target/XCore/XCoreTargetObjectFile.cpp
+++ b/lib/Target/XCore/XCoreTargetObjectFile.cpp
@@ -1,9 +1,8 @@
//===-- XCoreTargetObjectFile.cpp - XCore object files --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/XCore/XCoreTargetObjectFile.h b/lib/Target/XCore/XCoreTargetObjectFile.h
index 5eb423a7435e..fd172c55919f 100644
--- a/lib/Target/XCore/XCoreTargetObjectFile.h
+++ b/lib/Target/XCore/XCoreTargetObjectFile.h
@@ -1,9 +1,8 @@
//===-- XCoreTargetObjectFile.h - XCore Object Info -------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/XCore/XCoreTargetStreamer.h b/lib/Target/XCore/XCoreTargetStreamer.h
index 3563dbc5cb7b..3543fc52ea7f 100644
--- a/lib/Target/XCore/XCoreTargetStreamer.h
+++ b/lib/Target/XCore/XCoreTargetStreamer.h
@@ -1,9 +1,8 @@
//===-- XCoreTargetStreamer.h - XCore Target Streamer ----------*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/XCore/XCoreTargetTransformInfo.h b/lib/Target/XCore/XCoreTargetTransformInfo.h
index aa068b333425..3fecaaa59722 100644
--- a/lib/Target/XCore/XCoreTargetTransformInfo.h
+++ b/lib/Target/XCore/XCoreTargetTransformInfo.h
@@ -1,9 +1,8 @@
//===-- XCoreTargetTransformInfo.h - XCore specific TTI ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/lib/Testing/Support/Annotations.cpp b/lib/Testing/Support/Annotations.cpp
new file mode 100644
index 000000000000..09c572011d36
--- /dev/null
+++ b/lib/Testing/Support/Annotations.cpp
@@ -0,0 +1,95 @@
+//===--- Annotations.cpp - Annotated source code for unit tests --*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Testing/Support/Annotations.h"
+
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+// Crash if the assertion fails, printing the message and testcase.
+// More elegant error handling isn't needed for unit tests.
+static void require(bool Assertion, const char *Msg, llvm::StringRef Code) {
+ if (!Assertion) {
+ llvm::errs() << "Annotated testcase: " << Msg << "\n" << Code << "\n";
+ llvm_unreachable("Annotated testcase assertion failed!");
+ }
+}
+
+Annotations::Annotations(llvm::StringRef Text) {
+ auto Require = [Text](bool Assertion, const char *Msg) {
+ require(Assertion, Msg, Text);
+ };
+ llvm::Optional<llvm::StringRef> Name;
+ llvm::SmallVector<std::pair<llvm::StringRef, size_t>, 8> OpenRanges;
+
+ Code.reserve(Text.size());
+ while (!Text.empty()) {
+ if (Text.consume_front("^")) {
+ Points[Name.getValueOr("")].push_back(Code.size());
+ Name = llvm::None;
+ continue;
+ }
+ if (Text.consume_front("[[")) {
+ OpenRanges.emplace_back(Name.getValueOr(""), Code.size());
+ Name = llvm::None;
+ continue;
+ }
+ Require(!Name, "$name should be followed by ^ or [[");
+ if (Text.consume_front("]]")) {
+ Require(!OpenRanges.empty(), "unmatched ]]");
+ Range R;
+ R.Begin = OpenRanges.back().second;
+ R.End = Code.size();
+ Ranges[OpenRanges.back().first].push_back(R);
+ OpenRanges.pop_back();
+ continue;
+ }
+ if (Text.consume_front("$")) {
+ Name = Text.take_while(llvm::isAlnum);
+ Text = Text.drop_front(Name->size());
+ continue;
+ }
+ Code.push_back(Text.front());
+ Text = Text.drop_front();
+ }
+ Require(!Name, "unterminated $name");
+ Require(OpenRanges.empty(), "unmatched [[");
+}
+
+size_t Annotations::point(llvm::StringRef Name) const {
+ auto I = Points.find(Name);
+ require(I != Points.end() && I->getValue().size() == 1,
+ "expected exactly one point", Code);
+ return I->getValue()[0];
+}
+
+std::vector<size_t> Annotations::points(llvm::StringRef Name) const {
+ auto P = Points.lookup(Name);
+ return {P.begin(), P.end()};
+}
+
+Annotations::Range Annotations::range(llvm::StringRef Name) const {
+ auto I = Ranges.find(Name);
+ require(I != Ranges.end() && I->getValue().size() == 1,
+ "expected exactly one range", Code);
+ return I->getValue()[0];
+}
+
+std::vector<Annotations::Range>
+Annotations::ranges(llvm::StringRef Name) const {
+ auto R = Ranges.lookup(Name);
+ return {R.begin(), R.end()};
+}
+
+llvm::raw_ostream &llvm::operator<<(llvm::raw_ostream &O,
+ const llvm::Annotations::Range &R) {
+ return O << llvm::formatv("[{0}, {1})", R.Begin, R.End);
+}
diff --git a/lib/Testing/Support/Error.cpp b/lib/Testing/Support/Error.cpp
index 5692cdfcdf7b..a5f8f9b47b3f 100644
--- a/lib/Testing/Support/Error.cpp
+++ b/lib/Testing/Support/Error.cpp
@@ -1,9 +1,8 @@
//===- llvm/Testing/Support/Error.cpp -------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/TextAPI/ELF/ELFStub.cpp b/lib/TextAPI/ELF/ELFStub.cpp
index 248a078a2404..f8463497093b 100644
--- a/lib/TextAPI/ELF/ELFStub.cpp
+++ b/lib/TextAPI/ELF/ELFStub.cpp
@@ -1,9 +1,8 @@
//===- ELFStub.cpp --------------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===-----------------------------------------------------------------------===/
diff --git a/lib/TextAPI/ELF/TBEHandler.cpp b/lib/TextAPI/ELF/TBEHandler.cpp
index b621829d9358..cb597d8896e8 100644
--- a/lib/TextAPI/ELF/TBEHandler.cpp
+++ b/lib/TextAPI/ELF/TBEHandler.cpp
@@ -1,9 +1,8 @@
//===- TBEHandler.cpp -----------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===-----------------------------------------------------------------------===/
diff --git a/lib/TextAPI/MachO/Architecture.cpp b/lib/TextAPI/MachO/Architecture.cpp
new file mode 100644
index 000000000000..a66a982fa153
--- /dev/null
+++ b/lib/TextAPI/MachO/Architecture.cpp
@@ -0,0 +1,77 @@
+//===- Architecture.cpp ---------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements the architecture helper functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/TextAPI/MachO/Architecture.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/BinaryFormat/MachO.h"
+
+namespace llvm {
+namespace MachO {
+
+Architecture getArchitectureFromCpuType(uint32_t CPUType, uint32_t CPUSubType) {
+#define ARCHINFO(Arch, Type, Subtype) \
+ if (CPUType == (Type) && \
+ (CPUSubType & ~MachO::CPU_SUBTYPE_MASK) == (Subtype)) \
+ return AK_##Arch;
+#include "llvm/TextAPI/MachO/Architecture.def"
+#undef ARCHINFO
+
+ return AK_unknown;
+}
+
+Architecture getArchitectureFromName(StringRef Name) {
+ return StringSwitch<Architecture>(Name)
+#define ARCHINFO(Arch, Type, Subtype) .Case(#Arch, AK_##Arch)
+#include "llvm/TextAPI/MachO/Architecture.def"
+#undef ARCHINFO
+ .Default(AK_unknown);
+}
+
+StringRef getArchitectureName(Architecture Arch) {
+ switch (Arch) {
+#define ARCHINFO(Arch, Type, Subtype) \
+ case AK_##Arch: \
+ return #Arch;
+#include "llvm/TextAPI/MachO/Architecture.def"
+#undef ARCHINFO
+ case AK_unknown:
+ return "unknown";
+ }
+
+ // Appease some compilers that cannot figure out that this is a fully covered
+ // switch statement.
+ return "unknown";
+}
+
+std::pair<uint32_t, uint32_t> getCPUTypeFromArchitecture(Architecture Arch) {
+ switch (Arch) {
+#define ARCHINFO(Arch, Type, Subtype) \
+ case AK_##Arch: \
+ return std::make_pair(Type, Subtype);
+#include "llvm/TextAPI/MachO/Architecture.def"
+#undef ARCHINFO
+ case AK_unknown:
+ return std::make_pair(0, 0);
+ }
+
+ // Appease some compilers that cannot figure out that this is a fully covered
+ // switch statement.
+ return std::make_pair(0, 0);
+}
+
+raw_ostream &operator<<(raw_ostream &OS, Architecture Arch) {
+ OS << getArchitectureName(Arch);
+ return OS;
+}
+
+} // end namespace MachO.
+} // end namespace llvm.
diff --git a/lib/TextAPI/MachO/ArchitectureSet.cpp b/lib/TextAPI/MachO/ArchitectureSet.cpp
new file mode 100644
index 000000000000..c589671199b7
--- /dev/null
+++ b/lib/TextAPI/MachO/ArchitectureSet.cpp
@@ -0,0 +1,69 @@
+//===- ArchitectureSet.cpp ------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements the architecture set.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/TextAPI/MachO/ArchitectureSet.h"
+
+namespace llvm {
+namespace MachO {
+
+ArchitectureSet::ArchitectureSet(const std::vector<Architecture> &Archs)
+ : ArchitectureSet() {
+ for (auto Arch : Archs) {
+ if (Arch == AK_unknown)
+ continue;
+ set(Arch);
+ }
+}
+
+size_t ArchitectureSet::count() const {
+ // popcnt
+ size_t Cnt = 0;
+ for (unsigned i = 0; i < sizeof(ArchSetType) * 8; ++i)
+ if (ArchSet & (1U << i))
+ ++Cnt;
+ return Cnt;
+}
+
+ArchitectureSet::operator std::string() const {
+ if (empty())
+ return "[(empty)]";
+
+ std::string result;
+ auto size = count();
+ for (auto arch : *this) {
+ result.append(getArchitectureName(arch));
+ size -= 1;
+ if (size)
+ result.append(" ");
+ }
+ return result;
+}
+
+ArchitectureSet::operator std::vector<Architecture>() const {
+ std::vector<Architecture> archs;
+ for (auto arch : *this) {
+ if (arch == AK_unknown)
+ continue;
+ archs.emplace_back(arch);
+ }
+ return archs;
+}
+
+void ArchitectureSet::print(raw_ostream &os) const { os << std::string(*this); }
+
+raw_ostream &operator<<(raw_ostream &os, ArchitectureSet set) {
+ set.print(os);
+ return os;
+}
+
+} // end namespace MachO.
+} // end namespace llvm.
diff --git a/lib/TextAPI/MachO/InterfaceFile.cpp b/lib/TextAPI/MachO/InterfaceFile.cpp
new file mode 100644
index 000000000000..54ba8cc31267
--- /dev/null
+++ b/lib/TextAPI/MachO/InterfaceFile.cpp
@@ -0,0 +1,81 @@
+//===- InterfaceFile.cpp --------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements the Interface File.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/TextAPI/MachO/InterfaceFile.h"
+#include <iomanip>
+#include <sstream>
+
+namespace llvm {
+namespace MachO {
+namespace detail {
+template <typename C>
+typename C::iterator addEntry(C &Container, StringRef InstallName) {
+ auto I = partition_point(Container, [=](const InterfaceFileRef &O) {
+ return O.getInstallName() < InstallName;
+ });
+ if (I != Container.end() && I->getInstallName() == InstallName)
+ return I;
+
+ return Container.emplace(I, InstallName);
+}
+} // end namespace detail.
+
+void InterfaceFile::addAllowableClient(StringRef Name,
+ ArchitectureSet Architectures) {
+ auto Client = detail::addEntry(AllowableClients, Name);
+ Client->addArchitectures(Architectures);
+}
+
+void InterfaceFile::addReexportedLibrary(StringRef InstallName,
+ ArchitectureSet Architectures) {
+ auto Lib = detail::addEntry(ReexportedLibraries, InstallName);
+ Lib->addArchitectures(Architectures);
+}
+
+void InterfaceFile::addUUID(Architecture Arch, StringRef UUID) {
+ auto I = partition_point(UUIDs,
+ [=](const std::pair<Architecture, std::string> &O) {
+ return O.first < Arch;
+ });
+
+ if (I != UUIDs.end() && Arch == I->first) {
+ I->second = UUID;
+ return;
+ }
+
+ UUIDs.emplace(I, Arch, UUID);
+ return;
+}
+
+void InterfaceFile::addUUID(Architecture Arch, uint8_t UUID[16]) {
+ std::stringstream Stream;
+ for (unsigned i = 0; i < 16; ++i) {
+ if (i == 4 || i == 6 || i == 8 || i == 10)
+ Stream << '-';
+ Stream << std::setfill('0') << std::setw(2) << std::uppercase << std::hex
+ << static_cast<int>(UUID[i]);
+ }
+ addUUID(Arch, Stream.str());
+}
+
+void InterfaceFile::addSymbol(SymbolKind Kind, StringRef Name,
+ ArchitectureSet Archs, SymbolFlags Flags) {
+ Name = copyString(Name);
+ auto result = Symbols.try_emplace(SymbolsMapKey{Kind, Name}, nullptr);
+ if (result.second)
+ result.first->second = new (Allocator) Symbol{Kind, Name, Archs, Flags};
+ else
+ result.first->second->addArchitectures(Archs);
+}
+
+} // end namespace MachO.
+} // end namespace llvm.
diff --git a/lib/TextAPI/MachO/PackedVersion.cpp b/lib/TextAPI/MachO/PackedVersion.cpp
new file mode 100644
index 000000000000..8405aba90ed6
--- /dev/null
+++ b/lib/TextAPI/MachO/PackedVersion.cpp
@@ -0,0 +1,113 @@
+//===- PackedVersion.cpp --------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements the Mach-O packed version.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/TextAPI/MachO/PackedVersion.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+namespace MachO {
+
+bool PackedVersion::parse32(StringRef Str) {
+ Version = 0;
+
+ if (Str.empty())
+ return false;
+
+ SmallVector<StringRef, 3> Parts;
+ SplitString(Str, Parts, ".");
+
+ if (Parts.size() > 3)
+ return false;
+
+ unsigned long long Num;
+ if (getAsUnsignedInteger(Parts[0], 10, Num))
+ return false;
+
+ if (Num > UINT16_MAX)
+ return false;
+
+ Version = Num << 16;
+
+ for (unsigned i = 1, ShiftNum = 8; i < Parts.size(); ++i, ShiftNum -= 8) {
+ if (getAsUnsignedInteger(Parts[i], 10, Num))
+ return false;
+
+ if (Num > UINT8_MAX)
+ return false;
+
+ Version |= (Num << ShiftNum);
+ }
+
+ return true;
+}
+
+std::pair<bool, bool> PackedVersion::parse64(StringRef Str) {
+ bool Truncated = false;
+ Version = 0;
+
+ if (Str.empty())
+ return std::make_pair(false, Truncated);
+
+ SmallVector<StringRef, 5> Parts;
+ SplitString(Str, Parts, ".");
+
+ if (Parts.size() > 5)
+ return std::make_pair(false, Truncated);
+
+ unsigned long long Num;
+ if (getAsUnsignedInteger(Parts[0], 10, Num))
+ return std::make_pair(false, Truncated);
+
+ if (Num > 0xFFFFFFULL)
+ return std::make_pair(false, Truncated);
+
+ if (Num > 0xFFFFULL) {
+ Num = 0xFFFFULL;
+ Truncated = true;
+ }
+ Version = Num << 16;
+
+ for (unsigned i = 1, ShiftNum = 8; i < Parts.size() && i < 3;
+ ++i, ShiftNum -= 8) {
+ if (getAsUnsignedInteger(Parts[i], 10, Num))
+ return std::make_pair(false, Truncated);
+
+ if (Num > 0x3FFULL)
+ return std::make_pair(false, Truncated);
+
+ if (Num > 0xFFULL) {
+ Num = 0xFFULL;
+ Truncated = true;
+ }
+ Version |= (Num << ShiftNum);
+ }
+
+ if (Parts.size() > 3)
+ Truncated = true;
+
+ return std::make_pair(true, Truncated);
+}
+
+void PackedVersion::print(raw_ostream &OS) const {
+ OS << format("%d", getMajor());
+ if (getMinor() || getSubminor())
+ OS << format(".%d", getMinor());
+ if (getSubminor())
+ OS << format(".%d", getSubminor());
+}
+
+} // end namespace MachO.
+} // end namespace llvm.
diff --git a/lib/TextAPI/MachO/Symbol.cpp b/lib/TextAPI/MachO/Symbol.cpp
new file mode 100644
index 000000000000..731b264f6082
--- /dev/null
+++ b/lib/TextAPI/MachO/Symbol.cpp
@@ -0,0 +1,49 @@
+//===- Symbol.cpp ---------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements the Symbol.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/TextAPI/MachO/Symbol.h"
+#include <string>
+
+namespace llvm {
+namespace MachO {
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void Symbol::dump(raw_ostream &OS) const {
+ std::string Result;
+ if (isUndefined())
+ Result += "(undef) ";
+ if (isWeakDefined())
+ Result += "(weak-def) ";
+ if (isWeakReferenced())
+ Result += "(weak-ref) ";
+ if (isThreadLocalValue())
+ Result += "(tlv) ";
+ switch (Kind) {
+ case SymbolKind::GlobalSymbol:
+ Result += Name.str();
+ break;
+ case SymbolKind::ObjectiveCClass:
+ Result += "(ObjC Class) " + Name.str();
+ break;
+ case SymbolKind::ObjectiveCClassEHType:
+ Result += "(ObjC Class EH) " + Name.str();
+ break;
+ case SymbolKind::ObjectiveCInstanceVariable:
+ Result += "(ObjC IVar) " + Name.str();
+ break;
+ }
+ OS << Result;
+}
+#endif
+
+} // end namespace MachO.
+} // end namespace llvm.
diff --git a/lib/TextAPI/MachO/TextAPIContext.h b/lib/TextAPI/MachO/TextAPIContext.h
new file mode 100644
index 000000000000..3df40f09f7f7
--- /dev/null
+++ b/lib/TextAPI/MachO/TextAPIContext.h
@@ -0,0 +1,33 @@
+//===- TextAPIContext.h ---------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Defines the YAML Context for the TextAPI Reader/Writer.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TEXTAPI_MACHO_CONTEXT_H
+#define LLVM_TEXTAPI_MACHO_CONTEXT_H
+
+#include "llvm/Support/MemoryBuffer.h"
+#include <string>
+
+namespace llvm {
+namespace MachO {
+
+enum FileType : unsigned;
+
+struct TextAPIContext {
+ std::string ErrorMessage;
+ std::string Path;
+ FileType FileKind;
+};
+
+} // end namespace MachO.
+} // end namespace llvm.
+
+#endif // LLVM_TEXTAPI_MACHO_CONTEXT_H
diff --git a/lib/TextAPI/MachO/TextStub.cpp b/lib/TextAPI/MachO/TextStub.cpp
new file mode 100644
index 000000000000..799ebdc883ab
--- /dev/null
+++ b/lib/TextAPI/MachO/TextStub.cpp
@@ -0,0 +1,660 @@
+//===- TextStub.cpp -------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements the text stub file reader/writer.
+//
+//===----------------------------------------------------------------------===//
+
+#include "TextAPIContext.h"
+#include "TextStubCommon.h"
+#include "llvm/ADT/BitmaskEnum.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/YAMLTraits.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/TextAPI/MachO/Architecture.h"
+#include "llvm/TextAPI/MachO/ArchitectureSet.h"
+#include "llvm/TextAPI/MachO/InterfaceFile.h"
+#include "llvm/TextAPI/MachO/PackedVersion.h"
+#include "llvm/TextAPI/MachO/TextAPIReader.h"
+#include "llvm/TextAPI/MachO/TextAPIWriter.h"
+#include <algorithm>
+#include <set>
+
+// clang-format off
+/*
+
+ YAML Format specification.
+
+ The TBD v1 format only support two level address libraries and is per
+ definition application extension safe.
+
+--- # the tag !tapi-tbd-v1 is optional and
+ # shouldn't be emitted to support older linker.
+archs: [ armv7, armv7s, arm64 ] # the list of architecture slices that are
+ # supported by this file.
+platform: ios # Specifies the platform (macosx, ios, etc)
+install-name: /u/l/libfoo.dylib #
+current-version: 1.2.3 # Optional: defaults to 1.0
+compatibility-version: 1.0 # Optional: defaults to 1.0
+swift-version: 0 # Optional: defaults to 0
+objc-constraint: none # Optional: defaults to none
+exports: # List of export sections
+...
+
+Each export section is defined as following:
+
+ - archs: [ arm64 ] # the list of architecture slices
+ allowed-clients: [ client ] # Optional: List of clients
+ re-exports: [ ] # Optional: List of re-exports
+ symbols: [ _sym ] # Optional: List of symbols
+ objc-classes: [] # Optional: List of Objective-C classes
+ objc-ivars: [] # Optional: List of Objective C Instance
+ # Variables
+ weak-def-symbols: [] # Optional: List of weak defined symbols
+ thread-local-symbols: [] # Optional: List of thread local symbols
+*/
+
+/*
+
+ YAML Format specification.
+
+--- !tapi-tbd-v2
+archs: [ armv7, armv7s, arm64 ] # the list of architecture slices that are
+ # supported by this file.
+uuids: [ armv7:... ] # Optional: List of architecture and UUID pairs.
+platform: ios # Specifies the platform (macosx, ios, etc)
+flags: [] # Optional:
+install-name: /u/l/libfoo.dylib #
+current-version: 1.2.3 # Optional: defaults to 1.0
+compatibility-version: 1.0 # Optional: defaults to 1.0
+swift-version: 0 # Optional: defaults to 0
+objc-constraint: retain_release # Optional: defaults to retain_release
+parent-umbrella: # Optional:
+exports: # List of export sections
+...
+undefineds: # List of undefineds sections
+...
+
+Each export section is defined as following:
+
+- archs: [ arm64 ] # the list of architecture slices
+ allowed-clients: [ client ] # Optional: List of clients
+ re-exports: [ ] # Optional: List of re-exports
+ symbols: [ _sym ] # Optional: List of symbols
+ objc-classes: [] # Optional: List of Objective-C classes
+ objc-ivars: [] # Optional: List of Objective C Instance
+ # Variables
+ weak-def-symbols: [] # Optional: List of weak defined symbols
+ thread-local-symbols: [] # Optional: List of thread local symbols
+
+Each undefineds section is defined as following:
+- archs: [ arm64 ] # the list of architecture slices
+ symbols: [ _sym ] # Optional: List of symbols
+ objc-classes: [] # Optional: List of Objective-C classes
+ objc-ivars: [] # Optional: List of Objective C Instance Variables
+ weak-ref-symbols: [] # Optional: List of weak defined symbols
+*/
+
+/*
+
+ YAML Format specification.
+
+--- !tapi-tbd-v3
+archs: [ armv7, armv7s, arm64 ] # the list of architecture slices that are
+ # supported by this file.
+uuids: [ armv7:... ] # Optional: List of architecture and UUID pairs.
+platform: ios # Specifies the platform (macosx, ios, etc)
+flags: [] # Optional:
+install-name: /u/l/libfoo.dylib #
+current-version: 1.2.3 # Optional: defaults to 1.0
+compatibility-version: 1.0 # Optional: defaults to 1.0
+swift-abi-version: 0 # Optional: defaults to 0
+objc-constraint: retain_release # Optional: defaults to retain_release
+parent-umbrella: # Optional:
+exports: # List of export sections
+...
+undefineds: # List of undefineds sections
+...
+
+Each export section is defined as following:
+
+- archs: [ arm64 ] # the list of architecture slices
+ allowed-clients: [ client ] # Optional: List of clients
+ re-exports: [ ] # Optional: List of re-exports
+ symbols: [ _sym ] # Optional: List of symbols
+ objc-classes: [] # Optional: List of Objective-C classes
+ objc-eh-types: [] # Optional: List of Objective-C classes
+ # with EH
+ objc-ivars: [] # Optional: List of Objective C Instance
+ # Variables
+ weak-def-symbols: [] # Optional: List of weak defined symbols
+ thread-local-symbols: [] # Optional: List of thread local symbols
+
+Each undefineds section is defined as following:
+- archs: [ arm64 ] # the list of architecture slices
+ symbols: [ _sym ] # Optional: List of symbols
+ objc-classes: [] # Optional: List of Objective-C classes
+ objc-eh-types: [] # Optional: List of Objective-C classes
+ # with EH
+ objc-ivars: [] # Optional: List of Objective C Instance Variables
+ weak-ref-symbols: [] # Optional: List of weak defined symbols
+*/
+// clang-format on
+
+using namespace llvm;
+using namespace llvm::yaml;
+using namespace llvm::MachO;
+
+namespace {
+struct ExportSection {
+ std::vector<Architecture> Architectures;
+ std::vector<FlowStringRef> AllowableClients;
+ std::vector<FlowStringRef> ReexportedLibraries;
+ std::vector<FlowStringRef> Symbols;
+ std::vector<FlowStringRef> Classes;
+ std::vector<FlowStringRef> ClassEHs;
+ std::vector<FlowStringRef> IVars;
+ std::vector<FlowStringRef> WeakDefSymbols;
+ std::vector<FlowStringRef> TLVSymbols;
+};
+
+struct UndefinedSection {
+ std::vector<Architecture> Architectures;
+ std::vector<FlowStringRef> Symbols;
+ std::vector<FlowStringRef> Classes;
+ std::vector<FlowStringRef> ClassEHs;
+ std::vector<FlowStringRef> IVars;
+ std::vector<FlowStringRef> WeakRefSymbols;
+};
+
+// clang-format off
+enum TBDFlags : unsigned {
+ None = 0U,
+ FlatNamespace = 1U << 0,
+ NotApplicationExtensionSafe = 1U << 1,
+ InstallAPI = 1U << 2,
+ LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/InstallAPI),
+};
+// clang-format on
+} // end anonymous namespace.
+
+LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(Architecture)
+LLVM_YAML_IS_SEQUENCE_VECTOR(ExportSection)
+LLVM_YAML_IS_SEQUENCE_VECTOR(UndefinedSection)
+
+namespace llvm {
+namespace yaml {
+
+template <> struct MappingTraits<ExportSection> {
+ static void mapping(IO &IO, ExportSection &Section) {
+ const auto *Ctx = reinterpret_cast<TextAPIContext *>(IO.getContext());
+ assert((!Ctx || (Ctx && Ctx->FileKind != FileType::Invalid)) &&
+ "File type is not set in YAML context");
+
+ IO.mapRequired("archs", Section.Architectures);
+ if (Ctx->FileKind == FileType::TBD_V1)
+ IO.mapOptional("allowed-clients", Section.AllowableClients);
+ else
+ IO.mapOptional("allowable-clients", Section.AllowableClients);
+ IO.mapOptional("re-exports", Section.ReexportedLibraries);
+ IO.mapOptional("symbols", Section.Symbols);
+ IO.mapOptional("objc-classes", Section.Classes);
+ if (Ctx->FileKind == FileType::TBD_V3)
+ IO.mapOptional("objc-eh-types", Section.ClassEHs);
+ IO.mapOptional("objc-ivars", Section.IVars);
+ IO.mapOptional("weak-def-symbols", Section.WeakDefSymbols);
+ IO.mapOptional("thread-local-symbols", Section.TLVSymbols);
+ }
+};
+
+template <> struct MappingTraits<UndefinedSection> {
+ static void mapping(IO &IO, UndefinedSection &Section) {
+ const auto *Ctx = reinterpret_cast<TextAPIContext *>(IO.getContext());
+ assert((!Ctx || (Ctx && Ctx->FileKind != FileType::Invalid)) &&
+ "File type is not set in YAML context");
+
+ IO.mapRequired("archs", Section.Architectures);
+ IO.mapOptional("symbols", Section.Symbols);
+ IO.mapOptional("objc-classes", Section.Classes);
+ if (Ctx->FileKind == FileType::TBD_V3)
+ IO.mapOptional("objc-eh-types", Section.ClassEHs);
+ IO.mapOptional("objc-ivars", Section.IVars);
+ IO.mapOptional("weak-ref-symbols", Section.WeakRefSymbols);
+ }
+};
+
+template <> struct ScalarBitSetTraits<TBDFlags> {
+ static void bitset(IO &IO, TBDFlags &Flags) {
+ IO.bitSetCase(Flags, "flat_namespace", TBDFlags::FlatNamespace);
+ IO.bitSetCase(Flags, "not_app_extension_safe",
+ TBDFlags::NotApplicationExtensionSafe);
+ IO.bitSetCase(Flags, "installapi", TBDFlags::InstallAPI);
+ }
+};
+
+template <> struct MappingTraits<const InterfaceFile *> {
+ struct NormalizedTBD {
+ explicit NormalizedTBD(IO &IO) {}
+ NormalizedTBD(IO &IO, const InterfaceFile *&File) {
+ Architectures = File->getArchitectures();
+ UUIDs = File->uuids();
+ Platform = File->getPlatform();
+ InstallName = File->getInstallName();
+ CurrentVersion = PackedVersion(File->getCurrentVersion());
+ CompatibilityVersion = PackedVersion(File->getCompatibilityVersion());
+ SwiftABIVersion = File->getSwiftABIVersion();
+ ObjCConstraint = File->getObjCConstraint();
+
+ Flags = TBDFlags::None;
+ if (!File->isApplicationExtensionSafe())
+ Flags |= TBDFlags::NotApplicationExtensionSafe;
+
+ if (!File->isTwoLevelNamespace())
+ Flags |= TBDFlags::FlatNamespace;
+
+ if (File->isInstallAPI())
+ Flags |= TBDFlags::InstallAPI;
+
+ ParentUmbrella = File->getParentUmbrella();
+
+ std::set<ArchitectureSet> ArchSet;
+ for (const auto &Library : File->allowableClients())
+ ArchSet.insert(Library.getArchitectures());
+
+ for (const auto &Library : File->reexportedLibraries())
+ ArchSet.insert(Library.getArchitectures());
+
+ std::map<const Symbol *, ArchitectureSet> SymbolToArchSet;
+ for (const auto *Symbol : File->exports()) {
+ auto Architectures = Symbol->getArchitectures();
+ SymbolToArchSet[Symbol] = Architectures;
+ ArchSet.insert(Architectures);
+ }
+
+ for (auto Architectures : ArchSet) {
+ ExportSection Section;
+ Section.Architectures = Architectures;
+
+ for (const auto &Library : File->allowableClients())
+ if (Library.getArchitectures() == Architectures)
+ Section.AllowableClients.emplace_back(Library.getInstallName());
+
+ for (const auto &Library : File->reexportedLibraries())
+ if (Library.getArchitectures() == Architectures)
+ Section.ReexportedLibraries.emplace_back(Library.getInstallName());
+
+ for (const auto &SymArch : SymbolToArchSet) {
+ if (SymArch.second != Architectures)
+ continue;
+
+ const auto *Symbol = SymArch.first;
+ switch (Symbol->getKind()) {
+ case SymbolKind::GlobalSymbol:
+ if (Symbol->isWeakDefined())
+ Section.WeakDefSymbols.emplace_back(Symbol->getName());
+ else if (Symbol->isThreadLocalValue())
+ Section.TLVSymbols.emplace_back(Symbol->getName());
+ else
+ Section.Symbols.emplace_back(Symbol->getName());
+ break;
+ case SymbolKind::ObjectiveCClass:
+ if (File->getFileType() != FileType::TBD_V3)
+ Section.Classes.emplace_back(
+ copyString("_" + Symbol->getName().str()));
+ else
+ Section.Classes.emplace_back(Symbol->getName());
+ break;
+ case SymbolKind::ObjectiveCClassEHType:
+ if (File->getFileType() != FileType::TBD_V3)
+ Section.Symbols.emplace_back(
+ copyString("_OBJC_EHTYPE_$_" + Symbol->getName().str()));
+ else
+ Section.ClassEHs.emplace_back(Symbol->getName());
+ break;
+ case SymbolKind::ObjectiveCInstanceVariable:
+ if (File->getFileType() != FileType::TBD_V3)
+ Section.IVars.emplace_back(
+ copyString("_" + Symbol->getName().str()));
+ else
+ Section.IVars.emplace_back(Symbol->getName());
+ break;
+ }
+ }
+ llvm::sort(Section.Symbols.begin(), Section.Symbols.end());
+ llvm::sort(Section.Classes.begin(), Section.Classes.end());
+ llvm::sort(Section.ClassEHs.begin(), Section.ClassEHs.end());
+ llvm::sort(Section.IVars.begin(), Section.IVars.end());
+ llvm::sort(Section.WeakDefSymbols.begin(),
+ Section.WeakDefSymbols.end());
+ llvm::sort(Section.TLVSymbols.begin(), Section.TLVSymbols.end());
+ Exports.emplace_back(std::move(Section));
+ }
+
+ ArchSet.clear();
+ SymbolToArchSet.clear();
+
+ for (const auto *Symbol : File->undefineds()) {
+ auto Architectures = Symbol->getArchitectures();
+ SymbolToArchSet[Symbol] = Architectures;
+ ArchSet.insert(Architectures);
+ }
+
+ for (auto Architectures : ArchSet) {
+ UndefinedSection Section;
+ Section.Architectures = Architectures;
+
+ for (const auto &SymArch : SymbolToArchSet) {
+ if (SymArch.second != Architectures)
+ continue;
+
+ const auto *Symbol = SymArch.first;
+ switch (Symbol->getKind()) {
+ case SymbolKind::GlobalSymbol:
+ if (Symbol->isWeakReferenced())
+ Section.WeakRefSymbols.emplace_back(Symbol->getName());
+ else
+ Section.Symbols.emplace_back(Symbol->getName());
+ break;
+ case SymbolKind::ObjectiveCClass:
+ if (File->getFileType() != FileType::TBD_V3)
+ Section.Classes.emplace_back(
+ copyString("_" + Symbol->getName().str()));
+ else
+ Section.Classes.emplace_back(Symbol->getName());
+ break;
+ case SymbolKind::ObjectiveCClassEHType:
+ if (File->getFileType() != FileType::TBD_V3)
+ Section.Symbols.emplace_back(
+ copyString("_OBJC_EHTYPE_$_" + Symbol->getName().str()));
+ else
+ Section.ClassEHs.emplace_back(Symbol->getName());
+ break;
+ case SymbolKind::ObjectiveCInstanceVariable:
+ if (File->getFileType() != FileType::TBD_V3)
+ Section.IVars.emplace_back(
+ copyString("_" + Symbol->getName().str()));
+ else
+ Section.IVars.emplace_back(Symbol->getName());
+ break;
+ }
+ }
+ llvm::sort(Section.Symbols.begin(), Section.Symbols.end());
+ llvm::sort(Section.Classes.begin(), Section.Classes.end());
+ llvm::sort(Section.ClassEHs.begin(), Section.ClassEHs.end());
+ llvm::sort(Section.IVars.begin(), Section.IVars.end());
+ llvm::sort(Section.WeakRefSymbols.begin(),
+ Section.WeakRefSymbols.end());
+ Undefineds.emplace_back(std::move(Section));
+ }
+ }
+
+ const InterfaceFile *denormalize(IO &IO) {
+ auto Ctx = reinterpret_cast<TextAPIContext *>(IO.getContext());
+ assert(Ctx);
+
+ auto *File = new InterfaceFile;
+ File->setPath(Ctx->Path);
+ File->setFileType(Ctx->FileKind);
+ for (auto &ID : UUIDs)
+ File->addUUID(ID.first, ID.second);
+ File->setPlatform(Platform);
+ File->setArchitectures(Architectures);
+ File->setInstallName(InstallName);
+ File->setCurrentVersion(CurrentVersion);
+ File->setCompatibilityVersion(CompatibilityVersion);
+ File->setSwiftABIVersion(SwiftABIVersion);
+ File->setObjCConstraint(ObjCConstraint);
+ File->setParentUmbrella(ParentUmbrella);
+
+ if (Ctx->FileKind == FileType::TBD_V1) {
+ File->setTwoLevelNamespace();
+ File->setApplicationExtensionSafe();
+ } else {
+ File->setTwoLevelNamespace(!(Flags & TBDFlags::FlatNamespace));
+ File->setApplicationExtensionSafe(
+ !(Flags & TBDFlags::NotApplicationExtensionSafe));
+ File->setInstallAPI(Flags & TBDFlags::InstallAPI);
+ }
+
+ for (const auto &Section : Exports) {
+ for (const auto &Library : Section.AllowableClients)
+ File->addAllowableClient(Library, Section.Architectures);
+ for (const auto &Library : Section.ReexportedLibraries)
+ File->addReexportedLibrary(Library, Section.Architectures);
+
+ for (const auto &Symbol : Section.Symbols) {
+ if (Ctx->FileKind != FileType::TBD_V3 &&
+ Symbol.value.startswith("_OBJC_EHTYPE_$_"))
+ File->addSymbol(SymbolKind::ObjectiveCClassEHType,
+ Symbol.value.drop_front(15), Section.Architectures);
+ else
+ File->addSymbol(SymbolKind::GlobalSymbol, Symbol,
+ Section.Architectures);
+ }
+ for (auto &Symbol : Section.Classes) {
+ auto Name = Symbol.value;
+ if (Ctx->FileKind != FileType::TBD_V3)
+ Name = Name.drop_front();
+ File->addSymbol(SymbolKind::ObjectiveCClass, Name,
+ Section.Architectures);
+ }
+ for (auto &Symbol : Section.ClassEHs)
+ File->addSymbol(SymbolKind::ObjectiveCClassEHType, Symbol,
+ Section.Architectures);
+ for (auto &Symbol : Section.IVars) {
+ auto Name = Symbol.value;
+ if (Ctx->FileKind != FileType::TBD_V3)
+ Name = Name.drop_front();
+ File->addSymbol(SymbolKind::ObjectiveCInstanceVariable, Name,
+ Section.Architectures);
+ }
+ for (auto &Symbol : Section.WeakDefSymbols)
+ File->addSymbol(SymbolKind::GlobalSymbol, Symbol,
+ Section.Architectures, SymbolFlags::WeakDefined);
+ for (auto &Symbol : Section.TLVSymbols)
+ File->addSymbol(SymbolKind::GlobalSymbol, Symbol,
+ Section.Architectures, SymbolFlags::ThreadLocalValue);
+ }
+
+ for (const auto &Section : Undefineds) {
+ for (auto &Symbol : Section.Symbols) {
+ if (Ctx->FileKind != FileType::TBD_V3 &&
+ Symbol.value.startswith("_OBJC_EHTYPE_$_"))
+ File->addSymbol(SymbolKind::ObjectiveCClassEHType,
+ Symbol.value.drop_front(15), Section.Architectures,
+ SymbolFlags::Undefined);
+ else
+ File->addSymbol(SymbolKind::GlobalSymbol, Symbol,
+ Section.Architectures, SymbolFlags::Undefined);
+ }
+ for (auto &Symbol : Section.Classes) {
+ auto Name = Symbol.value;
+ if (Ctx->FileKind != FileType::TBD_V3)
+ Name = Name.drop_front();
+ File->addSymbol(SymbolKind::ObjectiveCClass, Name,
+ Section.Architectures, SymbolFlags::Undefined);
+ }
+ for (auto &Symbol : Section.ClassEHs)
+ File->addSymbol(SymbolKind::ObjectiveCClassEHType, Symbol,
+ Section.Architectures, SymbolFlags::Undefined);
+ for (auto &Symbol : Section.IVars) {
+ auto Name = Symbol.value;
+ if (Ctx->FileKind != FileType::TBD_V3)
+ Name = Name.drop_front();
+ File->addSymbol(SymbolKind::ObjectiveCInstanceVariable, Name,
+ Section.Architectures, SymbolFlags::Undefined);
+ }
+ for (auto &Symbol : Section.WeakRefSymbols)
+ File->addSymbol(SymbolKind::GlobalSymbol, Symbol,
+ Section.Architectures,
+ SymbolFlags::Undefined | SymbolFlags::WeakReferenced);
+ }
+
+ return File;
+ }
+
+ llvm::BumpPtrAllocator Allocator;
+ StringRef copyString(StringRef String) {
+ if (String.empty())
+ return {};
+
+ void *Ptr = Allocator.Allocate(String.size(), 1);
+ memcpy(Ptr, String.data(), String.size());
+ return StringRef(reinterpret_cast<const char *>(Ptr), String.size());
+ }
+
+ std::vector<Architecture> Architectures;
+ std::vector<UUID> UUIDs;
+ PlatformKind Platform{PlatformKind::unknown};
+ StringRef InstallName;
+ PackedVersion CurrentVersion;
+ PackedVersion CompatibilityVersion;
+ SwiftVersion SwiftABIVersion{0};
+ ObjCConstraintType ObjCConstraint{ObjCConstraintType::None};
+ TBDFlags Flags{TBDFlags::None};
+ StringRef ParentUmbrella;
+ std::vector<ExportSection> Exports;
+ std::vector<UndefinedSection> Undefineds;
+ };
+
+ static void mapping(IO &IO, const InterfaceFile *&File) {
+ auto *Ctx = reinterpret_cast<TextAPIContext *>(IO.getContext());
+ assert((!Ctx || !IO.outputting() ||
+ (Ctx && Ctx->FileKind != FileType::Invalid)) &&
+ "File type is not set in YAML context");
+ MappingNormalization<NormalizedTBD, const InterfaceFile *> Keys(IO, File);
+
+ // prope file type when reading.
+ if (!IO.outputting()) {
+ if (IO.mapTag("!tapi-tbd-v2", false))
+ Ctx->FileKind = FileType::TBD_V2;
+ else if (IO.mapTag("!tapi-tbd-v3", false))
+ Ctx->FileKind = FileType::TBD_V2;
+ else if (IO.mapTag("!tapi-tbd-v1", false) ||
+ IO.mapTag("tag:yaml.org,2002:map", false))
+ Ctx->FileKind = FileType::TBD_V1;
+ else {
+ IO.setError("unsupported file type");
+ return;
+ }
+ }
+
+ // Set file tyoe when writing.
+ if (IO.outputting()) {
+ switch (Ctx->FileKind) {
+ default:
+ llvm_unreachable("unexpected file type");
+ case FileType::TBD_V1:
+ // Don't write the tag into the .tbd file for TBD v1.
+ break;
+ case FileType::TBD_V2:
+ IO.mapTag("!tapi-tbd-v2", true);
+ break;
+ case FileType::TBD_V3:
+ IO.mapTag("!tapi-tbd-v3", true);
+ break;
+ }
+ }
+
+ IO.mapRequired("archs", Keys->Architectures);
+ if (Ctx->FileKind != FileType::TBD_V1)
+ IO.mapOptional("uuids", Keys->UUIDs);
+ IO.mapRequired("platform", Keys->Platform);
+ if (Ctx->FileKind != FileType::TBD_V1)
+ IO.mapOptional("flags", Keys->Flags, TBDFlags::None);
+ IO.mapRequired("install-name", Keys->InstallName);
+ IO.mapOptional("current-version", Keys->CurrentVersion,
+ PackedVersion(1, 0, 0));
+ IO.mapOptional("compatibility-version", Keys->CompatibilityVersion,
+ PackedVersion(1, 0, 0));
+ if (Ctx->FileKind != FileType::TBD_V3)
+ IO.mapOptional("swift-version", Keys->SwiftABIVersion, SwiftVersion(0));
+ else
+ IO.mapOptional("swift-abi-version", Keys->SwiftABIVersion,
+ SwiftVersion(0));
+ IO.mapOptional("objc-constraint", Keys->ObjCConstraint,
+ (Ctx->FileKind == FileType::TBD_V1)
+ ? ObjCConstraintType::None
+ : ObjCConstraintType::Retain_Release);
+ if (Ctx->FileKind != FileType::TBD_V1)
+ IO.mapOptional("parent-umbrella", Keys->ParentUmbrella, StringRef());
+ IO.mapOptional("exports", Keys->Exports);
+ if (Ctx->FileKind != FileType::TBD_V1)
+ IO.mapOptional("undefineds", Keys->Undefineds);
+ }
+};
+
+template <>
+struct DocumentListTraits<std::vector<const MachO::InterfaceFile *>> {
+ static size_t size(IO &IO, std::vector<const MachO::InterfaceFile *> &Seq) {
+ return Seq.size();
+ }
+ static const InterfaceFile *&
+ element(IO &IO, std::vector<const InterfaceFile *> &Seq, size_t Index) {
+ if (Index >= Seq.size())
+ Seq.resize(Index + 1);
+ return Seq[Index];
+ }
+};
+
+} // end namespace yaml.
+
+namespace MachO {
+static void DiagHandler(const SMDiagnostic &Diag, void *Context) {
+ auto *File = static_cast<TextAPIContext *>(Context);
+ SmallString<1024> Message;
+ raw_svector_ostream S(Message);
+
+ SMDiagnostic NewDiag(*Diag.getSourceMgr(), Diag.getLoc(), File->Path,
+ Diag.getLineNo(), Diag.getColumnNo(), Diag.getKind(),
+ Diag.getMessage(), Diag.getLineContents(),
+ Diag.getRanges(), Diag.getFixIts());
+
+ NewDiag.print(nullptr, S);
+ File->ErrorMessage = ("malformed file\n" + Message).str();
+}
+
+Expected<std::unique_ptr<InterfaceFile>>
+TextAPIReader::get(std::unique_ptr<MemoryBuffer> InputBuffer) {
+ TextAPIContext Ctx;
+ Ctx.Path = InputBuffer->getBufferIdentifier();
+ yaml::Input YAMLIn(InputBuffer->getBuffer(), &Ctx, DiagHandler, &Ctx);
+
+ // Fill vector with interface file objects created by parsing the YAML file.
+ std::vector<const InterfaceFile *> Files;
+ YAMLIn >> Files;
+
+ auto File = std::unique_ptr<InterfaceFile>(
+ const_cast<InterfaceFile *>(Files.front()));
+
+ if (YAMLIn.error())
+ return make_error<StringError>(Ctx.ErrorMessage, YAMLIn.error());
+
+ return std::move(File);
+}
+
+Error TextAPIWriter::writeToStream(raw_ostream &OS, const InterfaceFile &File) {
+ TextAPIContext Ctx;
+ Ctx.Path = File.getPath();
+ Ctx.FileKind = File.getFileType();
+ llvm::yaml::Output YAMLOut(OS, &Ctx, /*WrapColumn=*/80);
+
+ std::vector<const InterfaceFile *> Files;
+ Files.emplace_back(&File);
+
+ // Stream out yaml.
+ YAMLOut << Files;
+
+ return Error::success();
+}
+
+} // end namespace MachO.
+} // end namespace llvm.
diff --git a/lib/TextAPI/MachO/TextStubCommon.cpp b/lib/TextAPI/MachO/TextStubCommon.cpp
new file mode 100644
index 000000000000..00382cd24573
--- /dev/null
+++ b/lib/TextAPI/MachO/TextStubCommon.cpp
@@ -0,0 +1,178 @@
+//===- TextStubCommon.cpp -------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implememts common Text Stub YAML mappings.
+//
+//===----------------------------------------------------------------------===//
+
+#include "TextStubCommon.h"
+#include "TextAPIContext.h"
+
+using namespace llvm::MachO;
+
+namespace llvm {
+namespace yaml {
+
+void ScalarTraits<FlowStringRef>::output(const FlowStringRef &Value, void *Ctx,
+ raw_ostream &OS) {
+ ScalarTraits<StringRef>::output(Value, Ctx, OS);
+}
+StringRef ScalarTraits<FlowStringRef>::input(StringRef Value, void *Ctx,
+ FlowStringRef &Out) {
+ return ScalarTraits<StringRef>::input(Value, Ctx, Out.value);
+}
+QuotingType ScalarTraits<FlowStringRef>::mustQuote(StringRef Name) {
+ return ScalarTraits<StringRef>::mustQuote(Name);
+}
+
+void ScalarEnumerationTraits<ObjCConstraintType>::enumeration(
+ IO &IO, ObjCConstraintType &Constraint) {
+ IO.enumCase(Constraint, "none", ObjCConstraintType::None);
+ IO.enumCase(Constraint, "retain_release", ObjCConstraintType::Retain_Release);
+ IO.enumCase(Constraint, "retain_release_for_simulator",
+ ObjCConstraintType::Retain_Release_For_Simulator);
+ IO.enumCase(Constraint, "retain_release_or_gc",
+ ObjCConstraintType::Retain_Release_Or_GC);
+ IO.enumCase(Constraint, "gc", ObjCConstraintType::GC);
+}
+
+void ScalarTraits<PlatformKind>::output(const PlatformKind &Value, void *,
+ raw_ostream &OS) {
+ switch (Value) {
+ default:
+ llvm_unreachable("unexpected platform");
+ break;
+ case PlatformKind::macOS:
+ OS << "macosx";
+ break;
+ case PlatformKind::iOS:
+ OS << "ios";
+ break;
+ case PlatformKind::watchOS:
+ OS << "watchos";
+ break;
+ case PlatformKind::tvOS:
+ OS << "tvos";
+ break;
+ case PlatformKind::bridgeOS:
+ OS << "bridgeos";
+ break;
+ }
+}
+StringRef ScalarTraits<PlatformKind>::input(StringRef Scalar, void *,
+ PlatformKind &Value) {
+ Value = StringSwitch<PlatformKind>(Scalar)
+ .Case("macosx", PlatformKind::macOS)
+ .Case("ios", PlatformKind::iOS)
+ .Case("watchos", PlatformKind::watchOS)
+ .Case("tvos", PlatformKind::tvOS)
+ .Case("bridgeos", PlatformKind::bridgeOS)
+ .Default(PlatformKind::unknown);
+
+ if (Value == PlatformKind::unknown)
+ return "unknown platform";
+ return {};
+}
+QuotingType ScalarTraits<PlatformKind>::mustQuote(StringRef) {
+ return QuotingType::None;
+}
+
+void ScalarBitSetTraits<ArchitectureSet>::bitset(IO &IO,
+ ArchitectureSet &Archs) {
+#define ARCHINFO(arch, type, subtype) \
+ IO.bitSetCase(Archs, #arch, 1U << static_cast<int>(AK_##arch));
+#include "llvm/TextAPI/MachO/Architecture.def"
+#undef ARCHINFO
+}
+
+void ScalarTraits<Architecture>::output(const Architecture &Value, void *,
+ raw_ostream &OS) {
+ OS << Value;
+}
+StringRef ScalarTraits<Architecture>::input(StringRef Scalar, void *,
+ Architecture &Value) {
+ Value = getArchitectureFromName(Scalar);
+ return {};
+}
+QuotingType ScalarTraits<Architecture>::mustQuote(StringRef) {
+ return QuotingType::None;
+}
+
+void ScalarTraits<PackedVersion>::output(const PackedVersion &Value, void *,
+ raw_ostream &OS) {
+ OS << Value;
+}
+StringRef ScalarTraits<PackedVersion>::input(StringRef Scalar, void *,
+ PackedVersion &Value) {
+ if (!Value.parse32(Scalar))
+ return "invalid packed version string.";
+ return {};
+}
+QuotingType ScalarTraits<PackedVersion>::mustQuote(StringRef) {
+ return QuotingType::None;
+}
+
+void ScalarTraits<SwiftVersion>::output(const SwiftVersion &Value, void *,
+ raw_ostream &OS) {
+ switch (Value) {
+ case 1:
+ OS << "1.0";
+ break;
+ case 2:
+ OS << "1.1";
+ break;
+ case 3:
+ OS << "2.0";
+ break;
+ case 4:
+ OS << "3.0";
+ break;
+ default:
+ OS << (unsigned)Value;
+ break;
+ }
+}
+StringRef ScalarTraits<SwiftVersion>::input(StringRef Scalar, void *,
+ SwiftVersion &Value) {
+ Value = StringSwitch<SwiftVersion>(Scalar)
+ .Case("1.0", 1)
+ .Case("1.1", 2)
+ .Case("2.0", 3)
+ .Case("3.0", 4)
+ .Default(0);
+ if (Value != SwiftVersion(0))
+ return {};
+
+ if (Scalar.getAsInteger(10, Value))
+ return "invalid Swift ABI version.";
+
+ return StringRef();
+}
+QuotingType ScalarTraits<SwiftVersion>::mustQuote(StringRef) {
+ return QuotingType::None;
+}
+
+void ScalarTraits<UUID>::output(const UUID &Value, void *, raw_ostream &OS) {
+ OS << Value.first << ": " << Value.second;
+}
+StringRef ScalarTraits<UUID>::input(StringRef Scalar, void *, UUID &Value) {
+ auto Split = Scalar.split(':');
+ auto Arch = Split.first.trim();
+ auto UUID = Split.second.trim();
+ if (UUID.empty())
+ return "invalid uuid string pair";
+ Value.first = getArchitectureFromName(Arch);
+ Value.second = UUID;
+ return {};
+}
+QuotingType ScalarTraits<UUID>::mustQuote(StringRef) {
+ return QuotingType::Single;
+}
+
+} // end namespace yaml.
+} // end namespace llvm.
diff --git a/lib/TextAPI/MachO/TextStubCommon.h b/lib/TextAPI/MachO/TextStubCommon.h
new file mode 100644
index 000000000000..c4dd1075b1c8
--- /dev/null
+++ b/lib/TextAPI/MachO/TextStubCommon.h
@@ -0,0 +1,81 @@
+//===- TextStubCommon.h ---------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Defines common Text Stub YAML mappings.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TEXTAPI_TEXT_STUB_COMMON_H
+#define LLVM_TEXTAPI_TEXT_STUB_COMMON_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/Support/YAMLTraits.h"
+#include "llvm/TextAPI/MachO/Architecture.h"
+#include "llvm/TextAPI/MachO/ArchitectureSet.h"
+#include "llvm/TextAPI/MachO/InterfaceFile.h"
+#include "llvm/TextAPI/MachO/PackedVersion.h"
+
+using UUID = std::pair<llvm::MachO::Architecture, std::string>;
+
+LLVM_YAML_STRONG_TYPEDEF(llvm::StringRef, FlowStringRef)
+LLVM_YAML_STRONG_TYPEDEF(uint8_t, SwiftVersion)
+LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(UUID)
+LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(FlowStringRef)
+
+namespace llvm {
+namespace yaml {
+
+template <> struct ScalarTraits<FlowStringRef> {
+ static void output(const FlowStringRef &, void *, raw_ostream &);
+ static StringRef input(StringRef, void *, FlowStringRef &);
+ static QuotingType mustQuote(StringRef);
+};
+
+template <> struct ScalarEnumerationTraits<MachO::ObjCConstraintType> {
+ static void enumeration(IO &, MachO::ObjCConstraintType &);
+};
+
+template <> struct ScalarTraits<MachO::PlatformKind> {
+ static void output(const MachO::PlatformKind &, void *, raw_ostream &);
+ static StringRef input(StringRef, void *, MachO::PlatformKind &);
+ static QuotingType mustQuote(StringRef);
+};
+
+template <> struct ScalarBitSetTraits<MachO::ArchitectureSet> {
+ static void bitset(IO &, MachO::ArchitectureSet &);
+};
+
+template <> struct ScalarTraits<MachO::Architecture> {
+ static void output(const MachO::Architecture &, void *, raw_ostream &);
+ static StringRef input(StringRef, void *, MachO::Architecture &);
+ static QuotingType mustQuote(StringRef);
+};
+
+template <> struct ScalarTraits<MachO::PackedVersion> {
+ static void output(const MachO::PackedVersion &, void *, raw_ostream &);
+ static StringRef input(StringRef, void *, MachO::PackedVersion &);
+ static QuotingType mustQuote(StringRef);
+};
+
+template <> struct ScalarTraits<SwiftVersion> {
+ static void output(const SwiftVersion &, void *, raw_ostream &);
+ static StringRef input(StringRef, void *, SwiftVersion &);
+ static QuotingType mustQuote(StringRef);
+};
+
+template <> struct ScalarTraits<UUID> {
+ static void output(const UUID &, void *, raw_ostream &);
+ static StringRef input(StringRef, void *, UUID &);
+ static QuotingType mustQuote(StringRef);
+};
+
+} // end namespace yaml.
+} // end namespace llvm.
+
+#endif // LLVM_TEXTAPI_TEXT_STUB_COMMON_H
diff --git a/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp b/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp
index c5a28d4f1c08..0b406cc531a4 100644
--- a/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp
+++ b/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp
@@ -1,9 +1,8 @@
//===- DlltoolDriver.cpp - dlltool.exe-compatible driver ------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -109,7 +108,8 @@ int llvm::dlltoolDriverMain(llvm::ArrayRef<const char *> ArgsArr) {
}
for (auto *Arg : Args.filtered(OPT_UNKNOWN))
- llvm::errs() << "ignoring unknown argument: " << Arg->getSpelling() << "\n";
+ llvm::errs() << "ignoring unknown argument: " << Arg->getAsString(Args)
+ << "\n";
if (!Args.hasArg(OPT_d)) {
llvm::errs() << "no definition file specified\n";
diff --git a/lib/ToolDrivers/llvm-lib/LibDriver.cpp b/lib/ToolDrivers/llvm-lib/LibDriver.cpp
index 64f4fe423f25..18ab6637305e 100644
--- a/lib/ToolDrivers/llvm-lib/LibDriver.cpp
+++ b/lib/ToolDrivers/llvm-lib/LibDriver.cpp
@@ -1,9 +1,8 @@
//===- LibDriver.cpp - lib.exe-compatible driver --------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -14,8 +13,12 @@
#include "llvm/ToolDrivers/llvm-lib/LibDriver.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/BinaryFormat/COFF.h"
#include "llvm/BinaryFormat/Magic.h"
+#include "llvm/Bitcode/BitcodeReader.h"
#include "llvm/Object/ArchiveWriter.h"
+#include "llvm/Object/COFF.h"
+#include "llvm/Object/WindowsMachineFlag.h"
#include "llvm/Option/Arg.h"
#include "llvm/Option/ArgList.h"
#include "llvm/Option/Option.h"
@@ -97,6 +100,47 @@ static std::string findInputFile(StringRef File, ArrayRef<StringRef> Paths) {
return "";
}
+static void fatalOpenError(llvm::Error E, Twine File) {
+ if (!E)
+ return;
+ handleAllErrors(std::move(E), [&](const llvm::ErrorInfoBase &EIB) {
+ llvm::errs() << "error opening '" << File << "': " << EIB.message() << '\n';
+ exit(1);
+ });
+}
+
+static void doList(opt::InputArgList& Args) {
+ // lib.exe prints the contents of the first archive file.
+ std::unique_ptr<MemoryBuffer> B;
+ for (auto *Arg : Args.filtered(OPT_INPUT)) {
+ // Create or open the archive object.
+ ErrorOr<std::unique_ptr<MemoryBuffer>> MaybeBuf =
+ MemoryBuffer::getFile(Arg->getValue(), -1, false);
+ fatalOpenError(errorCodeToError(MaybeBuf.getError()), Arg->getValue());
+
+ if (identify_magic(MaybeBuf.get()->getBuffer()) == file_magic::archive) {
+ B = std::move(MaybeBuf.get());
+ break;
+ }
+ }
+
+ // lib.exe doesn't print an error if no .lib files are passed.
+ if (!B)
+ return;
+
+ Error Err = Error::success();
+ object::Archive Archive(B.get()->getMemBufferRef(), Err);
+ fatalOpenError(std::move(Err), B->getBufferIdentifier());
+
+ for (auto &C : Archive.children(Err)) {
+ Expected<StringRef> NameOrErr = C.getName();
+ fatalOpenError(NameOrErr.takeError(), B->getBufferIdentifier());
+ StringRef Name = NameOrErr.get();
+ llvm::outs() << Name << '\n';
+ }
+ fatalOpenError(std::move(Err), B->getBufferIdentifier());
+}
+
int llvm::libDriverMain(ArrayRef<const char *> ArgsArr) {
BumpPtrAllocator Alloc;
StringSaver Saver(Alloc);
@@ -119,7 +163,8 @@ int llvm::libDriverMain(ArrayRef<const char *> ArgsArr) {
return 1;
}
for (auto *Arg : Args.filtered(OPT_UNKNOWN))
- llvm::errs() << "ignoring unknown argument: " << Arg->getSpelling() << "\n";
+ llvm::errs() << "ignoring unknown argument: " << Arg->getAsString(Args)
+ << "\n";
// Handle /help
if (Args.hasArg(OPT_help)) {
@@ -131,8 +176,25 @@ int llvm::libDriverMain(ArrayRef<const char *> ArgsArr) {
if (!Args.hasArgNoClaim(OPT_INPUT))
return 0;
+ if (Args.hasArg(OPT_lst)) {
+ doList(Args);
+ return 0;
+ }
+
std::vector<StringRef> SearchPaths = getSearchPaths(&Args, Saver);
+ COFF::MachineTypes LibMachine = COFF::IMAGE_FILE_MACHINE_UNKNOWN;
+ std::string LibMachineSource;
+ if (auto *Arg = Args.getLastArg(OPT_machine)) {
+ LibMachine = getMachineType(Arg->getValue());
+ if (LibMachine == COFF::IMAGE_FILE_MACHINE_UNKNOWN) {
+ llvm::errs() << "unknown /machine: arg " << Arg->getValue() << '\n';
+ return 1;
+ }
+ LibMachineSource =
+ std::string(" (from '/machine:") + Arg->getValue() + "' flag)";
+ }
+
// Create a NewArchiveMember for each input file.
std::vector<NewArchiveMember> Members;
for (auto *Arg : Args.filtered(OPT_INPUT)) {
@@ -158,11 +220,95 @@ int llvm::libDriverMain(ArrayRef<const char *> ArgsArr) {
<< ": not a COFF object, bitcode or resource file\n";
return 1;
}
+
+ // Check that all input files have the same machine type.
+ // Mixing normal objects and LTO bitcode files is fine as long as they
+ // have the same machine type.
+ // Doing this here duplicates the header parsing work that writeArchive()
+ // below does, but it's not a lot of work and it's a bit awkward to do
+ // in writeArchive() which needs to support many tools, can't assume the
+ // input is COFF, and doesn't have a good way to report errors.
+ COFF::MachineTypes FileMachine = COFF::IMAGE_FILE_MACHINE_UNKNOWN;
+ if (Magic == file_magic::coff_object) {
+ std::error_code EC;
+ object::COFFObjectFile Obj(*MOrErr->Buf, EC);
+ if (EC) {
+ llvm::errs() << Arg->getValue() << ": failed to open: " << EC.message()
+ << '\n';
+ return 1;
+ }
+ uint16_t Machine = Obj.getMachine();
+ if (Machine != COFF::IMAGE_FILE_MACHINE_I386 &&
+ Machine != COFF::IMAGE_FILE_MACHINE_AMD64 &&
+ Machine != COFF::IMAGE_FILE_MACHINE_ARMNT &&
+ Machine != COFF::IMAGE_FILE_MACHINE_ARM64) {
+ llvm::errs() << Arg->getValue() << ": unknown machine: " << Machine
+ << '\n';
+ return 1;
+ }
+ FileMachine = static_cast<COFF::MachineTypes>(Machine);
+ } else if (Magic == file_magic::bitcode) {
+ Expected<std::string> TripleStr = getBitcodeTargetTriple(*MOrErr->Buf);
+ if (!TripleStr) {
+ llvm::errs() << Arg->getValue()
+ << ": failed to get target triple from bitcode\n";
+ return 1;
+ }
+ switch (Triple(*TripleStr).getArch()) {
+ case Triple::x86:
+ FileMachine = COFF::IMAGE_FILE_MACHINE_I386;
+ break;
+ case Triple::x86_64:
+ FileMachine = COFF::IMAGE_FILE_MACHINE_AMD64;
+ break;
+ case Triple::arm:
+ FileMachine = COFF::IMAGE_FILE_MACHINE_ARMNT;
+ break;
+ case Triple::aarch64:
+ FileMachine = COFF::IMAGE_FILE_MACHINE_ARM64;
+ break;
+ default:
+ llvm::errs() << Arg->getValue() << ": unknown arch in target triple "
+ << *TripleStr << '\n';
+ return 1;
+ }
+ }
+
+ // FIXME: Once lld-link rejects multiple resource .obj files:
+ // Call convertResToCOFF() on .res files and add the resulting
+ // COFF file to the .lib output instead of adding the .res file, and remove
+ // this check. See PR42180.
+ if (FileMachine != COFF::IMAGE_FILE_MACHINE_UNKNOWN) {
+ if (LibMachine == COFF::IMAGE_FILE_MACHINE_UNKNOWN) {
+ LibMachine = FileMachine;
+ LibMachineSource = std::string(" (inferred from earlier file '") +
+ Arg->getValue() + "')";
+ } else if (LibMachine != FileMachine) {
+ llvm::errs() << Arg->getValue() << ": file machine type "
+ << machineToStr(FileMachine)
+ << " conflicts with library machine type "
+ << machineToStr(LibMachine) << LibMachineSource << '\n';
+ return 1;
+ }
+ }
+
Members.emplace_back(std::move(*MOrErr));
}
// Create an archive file.
std::string OutputPath = getOutputPath(&Args, Members[0]);
+ // llvm-lib uses relative paths for both regular and thin archives, unlike
+ // standard GNU ar, which only uses relative paths for thin archives and
+ // basenames for regular archives.
+ for (NewArchiveMember &Member : Members) {
+ if (sys::path::is_relative(Member.MemberName)) {
+ Expected<std::string> PathOrErr =
+ computeArchiveRelativePath(OutputPath, Member.MemberName);
+ if (PathOrErr)
+ Member.MemberName = Saver.save(*PathOrErr);
+ }
+ }
+
if (Error E =
writeArchive(OutputPath, Members,
/*WriteSymtab=*/true, object::Archive::K_GNU,
diff --git a/lib/ToolDrivers/llvm-lib/Options.td b/lib/ToolDrivers/llvm-lib/Options.td
index dd41952b7878..7863196126a8 100644
--- a/lib/ToolDrivers/llvm-lib/Options.td
+++ b/lib/ToolDrivers/llvm-lib/Options.td
@@ -3,27 +3,33 @@ include "llvm/Option/OptParser.td"
// lib.exe accepts options starting with either a dash or a slash.
// Flag that takes no arguments.
-class F<string name> : Flag<["/", "-", "-?"], name>;
+class F<string name> : Flag<["/", "-", "/?", "-?"], name>;
// Flag that takes one argument after ":".
class P<string name, string help> :
- Joined<["/", "-", "-?"], name#":">, HelpText<help>;
+ Joined<["/", "-", "/?", "-?"], name#":">, HelpText<help>;
def libpath: P<"libpath", "Object file search path">;
+
+// Can't be called "list" since that's a keyword.
+def lst : F<"list">, HelpText<"List contents of .lib file on stdout">;
def out : P<"out", "Path to file to write output">;
def llvmlibthin : F<"llvmlibthin">,
HelpText<"Make .lib point to .obj files instead of copying their contents">;
+def machine: P<"machine", "Specify target platform">;
+
def help : F<"help">;
-def help_q : Flag<["/?", "-?"], "">, Alias<help>;
+
+// /?? and -?? must be before /? and -? to not confuse lib/Options.
+def help_q : Flag<["/??", "-??", "/?", "-?"], "">, Alias<help>;
//==============================================================================
// The flags below do nothing. They are defined only for lib.exe compatibility.
//==============================================================================
-class QF<string name> : Joined<["/", "-", "-?"], name#":">;
+class QF<string name> : Joined<["/", "-", "/?", "-?"], name#":">;
def ignore : QF<"ignore">;
-def machine: QF<"machine">;
def nologo : F<"nologo">;
diff --git a/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index c795866ec0f2..06222d7e7e44 100644
--- a/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -1,9 +1,8 @@
//===- AggressiveInstCombine.cpp ------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/AggressiveInstCombine/AggressiveInstCombineInternal.h b/lib/Transforms/AggressiveInstCombine/AggressiveInstCombineInternal.h
index f3c8bde9f8ff..44e1c45664e7 100644
--- a/lib/Transforms/AggressiveInstCombine/AggressiveInstCombineInternal.h
+++ b/lib/Transforms/AggressiveInstCombine/AggressiveInstCombineInternal.h
@@ -1,9 +1,8 @@
//===- AggressiveInstCombineInternal.h --------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp b/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp
index 8289b2d68f8a..7c5767912fd3 100644
--- a/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp
+++ b/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp
@@ -1,9 +1,8 @@
//===- TruncInstCombine.cpp -----------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/Coroutines/CoroCleanup.cpp b/lib/Transforms/Coroutines/CoroCleanup.cpp
index 359876627fce..1fb0a114d0c7 100644
--- a/lib/Transforms/Coroutines/CoroCleanup.cpp
+++ b/lib/Transforms/Coroutines/CoroCleanup.cpp
@@ -1,9 +1,8 @@
//===- CoroCleanup.cpp - Coroutine Cleanup Pass ---------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// This pass lowers all remaining coroutine intrinsics.
@@ -50,7 +49,7 @@ static void lowerSubFn(IRBuilder<> &Builder, CoroSubFnInst *SubFn) {
Builder.SetInsertPoint(SubFn);
auto *FramePtr = Builder.CreateBitCast(FrameRaw, FramePtrTy);
auto *Gep = Builder.CreateConstInBoundsGEP2_32(FrameTy, FramePtr, 0, Index);
- auto *Load = Builder.CreateLoad(Gep);
+ auto *Load = Builder.CreateLoad(FrameTy->getElementType(Index), Gep);
SubFn->replaceAllUsesWith(Load);
}
diff --git a/lib/Transforms/Coroutines/CoroEarly.cpp b/lib/Transforms/Coroutines/CoroEarly.cpp
index ac47a06281a5..692697d6f32e 100644
--- a/lib/Transforms/Coroutines/CoroEarly.cpp
+++ b/lib/Transforms/Coroutines/CoroEarly.cpp
@@ -1,9 +1,8 @@
//===- CoroEarly.cpp - Coroutine Early Function Pass ----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// This pass lowers coroutine intrinsics that hide the details of the exact
@@ -98,7 +97,7 @@ void Lowerer::lowerCoroDone(IntrinsicInst *II) {
Builder.SetInsertPoint(II);
auto *BCI = Builder.CreateBitCast(Operand, FramePtrTy);
auto *Gep = Builder.CreateConstInBoundsGEP1_32(FrameTy, BCI, 0);
- auto *Load = Builder.CreateLoad(Gep);
+ auto *Load = Builder.CreateLoad(FrameTy, Gep);
auto *Cond = Builder.CreateICmpEQ(Load, NullPtr);
II->replaceAllUsesWith(Cond);
@@ -114,7 +113,7 @@ void Lowerer::lowerCoroNoop(IntrinsicInst *II) {
StructType *FrameTy = StructType::create(C, "NoopCoro.Frame");
auto *FramePtrTy = FrameTy->getPointerTo();
auto *FnTy = FunctionType::get(Type::getVoidTy(C), FramePtrTy,
- /*IsVarArgs=*/false);
+ /*isVarArg=*/false);
auto *FnPtrTy = FnTy->getPointerTo();
FrameTy->setBody({FnPtrTy, FnPtrTy});
diff --git a/lib/Transforms/Coroutines/CoroElide.cpp b/lib/Transforms/Coroutines/CoroElide.cpp
index 58f952b54f3a..6707aa1c827d 100644
--- a/lib/Transforms/Coroutines/CoroElide.cpp
+++ b/lib/Transforms/Coroutines/CoroElide.cpp
@@ -1,9 +1,8 @@
//===- CoroElide.cpp - Coroutine Frame Allocation Elision Pass ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// This pass replaces dynamic allocation of coroutine frame with alloca and
diff --git a/lib/Transforms/Coroutines/CoroFrame.cpp b/lib/Transforms/Coroutines/CoroFrame.cpp
index 4cb0a52961cc..58bf22bee29b 100644
--- a/lib/Transforms/Coroutines/CoroFrame.cpp
+++ b/lib/Transforms/Coroutines/CoroFrame.cpp
@@ -1,9 +1,8 @@
//===- CoroFrame.cpp - Builds and manipulates coroutine frame -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// This file contains classes used to discover if for a particular value
@@ -53,7 +52,7 @@ public:
}
size_t blockToIndex(BasicBlock *BB) const {
- auto *I = std::lower_bound(V.begin(), V.end(), BB);
+ auto *I = llvm::lower_bound(V, BB);
assert(I != V.end() && *I == BB && "BasicBlockNumberng: Unknown block");
return I - V.begin();
}
@@ -379,7 +378,7 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape,
StructType *FrameTy = StructType::create(C, Name);
auto *FramePtrTy = FrameTy->getPointerTo();
auto *FnTy = FunctionType::get(Type::getVoidTy(C), FramePtrTy,
- /*IsVarArgs=*/false);
+ /*isVarArg=*/false);
auto *FnPtrTy = FnTy->getPointerTo();
// Figure out how wide should be an integer type storing the suspend index.
@@ -403,6 +402,7 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape,
if (CurrentDef == Shape.PromiseAlloca)
continue;
+ uint64_t Count = 1;
Type *Ty = nullptr;
if (auto *AI = dyn_cast<AllocaInst>(CurrentDef)) {
Ty = AI->getAllocatedType();
@@ -414,11 +414,18 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape,
Padder.addType(PaddingTy);
}
}
+ if (auto *CI = dyn_cast<ConstantInt>(AI->getArraySize()))
+ Count = CI->getValue().getZExtValue();
+ else
+ report_fatal_error("Coroutines cannot handle non static allocas yet");
} else {
Ty = CurrentDef->getType();
}
S.setFieldIndex(Types.size());
- Types.push_back(Ty);
+ if (Count == 1)
+ Types.push_back(Ty);
+ else
+ Types.push_back(ArrayType::get(Ty, Count));
Padder.addType(Ty);
}
FrameTy->setBody(Types);
@@ -471,11 +478,12 @@ static Instruction *splitBeforeCatchSwitch(CatchSwitchInst *CatchSwitch) {
//
static Instruction *insertSpills(SpillInfo &Spills, coro::Shape &Shape) {
auto *CB = Shape.CoroBegin;
+ LLVMContext &C = CB->getContext();
IRBuilder<> Builder(CB->getNextNode());
- PointerType *FramePtrTy = Shape.FrameTy->getPointerTo();
+ StructType *FrameTy = Shape.FrameTy;
+ PointerType *FramePtrTy = FrameTy->getPointerTo();
auto *FramePtr =
cast<Instruction>(Builder.CreateBitCast(CB, FramePtrTy, "FramePtr"));
- Type *FrameTy = FramePtrTy->getElementType();
Value *CurrentValue = nullptr;
BasicBlock *CurrentBlock = nullptr;
@@ -492,17 +500,41 @@ static Instruction *insertSpills(SpillInfo &Spills, coro::Shape &Shape) {
if (Shape.PromiseAlloca)
Allocas.emplace_back(Shape.PromiseAlloca, coro::Shape::PromiseField);
+ // Create a GEP with the given index into the coroutine frame for the original
+ // value Orig. Appends an extra 0 index for array-allocas, preserving the
+ // original type.
+ auto GetFramePointer = [&](uint32_t Index, Value *Orig) -> Value * {
+ SmallVector<Value *, 3> Indices = {
+ ConstantInt::get(Type::getInt32Ty(C), 0),
+ ConstantInt::get(Type::getInt32Ty(C), Index),
+ };
+
+ if (auto *AI = dyn_cast<AllocaInst>(Orig)) {
+ if (auto *CI = dyn_cast<ConstantInt>(AI->getArraySize())) {
+ auto Count = CI->getValue().getZExtValue();
+ if (Count > 1) {
+ Indices.push_back(ConstantInt::get(Type::getInt32Ty(C), 0));
+ }
+ } else {
+ report_fatal_error("Coroutines cannot handle non static allocas yet");
+ }
+ }
+
+ return Builder.CreateInBoundsGEP(FrameTy, FramePtr, Indices);
+ };
+
// Create a load instruction to reload the spilled value from the coroutine
// frame.
auto CreateReload = [&](Instruction *InsertBefore) {
assert(Index && "accessing unassigned field number");
Builder.SetInsertPoint(InsertBefore);
- auto *G = Builder.CreateConstInBoundsGEP2_32(FrameTy, FramePtr, 0, Index,
- CurrentValue->getName() +
- Twine(".reload.addr"));
+
+ auto *G = GetFramePointer(Index, CurrentValue);
+ G->setName(CurrentValue->getName() + Twine(".reload.addr"));
+
return isa<AllocaInst>(CurrentValue)
? G
- : Builder.CreateLoad(G,
+ : Builder.CreateLoad(FrameTy->getElementType(Index), G,
CurrentValue->getName() + Twine(".reload"));
};
@@ -589,8 +621,8 @@ static Instruction *insertSpills(SpillInfo &Spills, coro::Shape &Shape) {
Builder.SetInsertPoint(&Shape.AllocaSpillBlock->front());
// If we found any allocas, replace all of their remaining uses with Geps.
for (auto &P : Allocas) {
- auto *G =
- Builder.CreateConstInBoundsGEP2_32(FrameTy, FramePtr, 0, P.second);
+ auto *G = GetFramePointer(P.second, P.first);
+
// We are not using ReplaceInstWithInst(P.first, cast<Instruction>(G)) here,
// as we are changing location of the instruction.
G->takeName(P.first);
diff --git a/lib/Transforms/Coroutines/CoroInstr.h b/lib/Transforms/Coroutines/CoroInstr.h
index 9a8cc5a2591c..5e19d7642e38 100644
--- a/lib/Transforms/Coroutines/CoroInstr.h
+++ b/lib/Transforms/Coroutines/CoroInstr.h
@@ -1,9 +1,8 @@
//===-- CoroInstr.h - Coroutine Intrinsics Instruction Wrappers -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// This file defines classes that make it really easy to deal with intrinsic
diff --git a/lib/Transforms/Coroutines/CoroInternal.h b/lib/Transforms/Coroutines/CoroInternal.h
index 8e690d649cf5..441c8a20f1f3 100644
--- a/lib/Transforms/Coroutines/CoroInternal.h
+++ b/lib/Transforms/Coroutines/CoroInternal.h
@@ -1,9 +1,8 @@
//===- CoroInternal.h - Internal Coroutine interfaces ---------*- C++ -*---===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// Common definitions/declarations used internally by coroutine lowering passes.
diff --git a/lib/Transforms/Coroutines/CoroSplit.cpp b/lib/Transforms/Coroutines/CoroSplit.cpp
index 9eeceb217ba8..5458e70ff16a 100644
--- a/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -1,9 +1,8 @@
//===- CoroSplit.cpp - Converts a coroutine into a state machine ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// This pass builds the coroutine frame and outlines resume and destroy parts
@@ -94,7 +93,7 @@ static BasicBlock *createResumeEntryBlock(Function &F, coro::Shape &Shape) {
auto *FrameTy = Shape.FrameTy;
auto *GepIndex = Builder.CreateConstInBoundsGEP2_32(
FrameTy, FramePtr, 0, coro::Shape::IndexField, "index.addr");
- auto *Index = Builder.CreateLoad(GepIndex, "index");
+ auto *Index = Builder.CreateLoad(Shape.getIndexType(), GepIndex, "index");
auto *Switch =
Builder.CreateSwitch(Index, UnreachBB, Shape.CoroSuspends.size());
Shape.ResumeSwitch = Switch;
@@ -230,7 +229,8 @@ static void handleFinalSuspend(IRBuilder<> &Builder, Value *FramePtr,
Builder.SetInsertPoint(OldSwitchBB->getTerminator());
auto *GepIndex = Builder.CreateConstInBoundsGEP2_32(Shape.FrameTy, FramePtr,
0, 0, "ResumeFn.addr");
- auto *Load = Builder.CreateLoad(GepIndex);
+ auto *Load = Builder.CreateLoad(
+ Shape.FrameTy->getElementType(coro::Shape::ResumeField), GepIndex);
auto *NullPtr =
ConstantPointerNull::get(cast<PointerType>(Load->getType()));
auto *Cond = Builder.CreateICmpEQ(Load, NullPtr);
@@ -777,6 +777,8 @@ static void relocateInstructionBefore(CoroBeginInst *CoroBegin, Function &F) {
}
static void splitCoroutine(Function &F, CallGraph &CG, CallGraphSCC &SCC) {
+ EliminateUnreachableBlocks(F);
+
coro::Shape Shape(F);
if (!Shape.CoroBegin)
return;
@@ -828,6 +830,7 @@ static void splitCoroutine(Function &F, CallGraph &CG, CallGraphSCC &SCC) {
// split.
static void prepareForSplit(Function &F, CallGraph &CG) {
Module &M = *F.getParent();
+ LLVMContext &Context = F.getContext();
#ifndef NDEBUG
Function *DevirtFn = M.getFunction(CORO_DEVIRT_TRIGGER_FN);
assert(DevirtFn && "coro.devirt.trigger function not found");
@@ -842,10 +845,12 @@ static void prepareForSplit(Function &F, CallGraph &CG) {
// call void %1(i8* null)
coro::LowererBase Lowerer(M);
Instruction *InsertPt = F.getEntryBlock().getTerminator();
- auto *Null = ConstantPointerNull::get(Type::getInt8PtrTy(F.getContext()));
+ auto *Null = ConstantPointerNull::get(Type::getInt8PtrTy(Context));
auto *DevirtFnAddr =
Lowerer.makeSubFnCall(Null, CoroSubFnInst::RestartTrigger, InsertPt);
- auto *IndirectCall = CallInst::Create(DevirtFnAddr, Null, "", InsertPt);
+ FunctionType *FnTy = FunctionType::get(Type::getVoidTy(Context),
+ {Type::getInt8PtrTy(Context)}, false);
+ auto *IndirectCall = CallInst::Create(FnTy, DevirtFnAddr, Null, "", InsertPt);
// Update CG graph with an indirect call we just added.
CG[&F]->addCalledFunction(IndirectCall, CG.getCallsExternalNode());
@@ -861,7 +866,7 @@ static void createDevirtTriggerFunc(CallGraph &CG, CallGraphSCC &SCC) {
LLVMContext &C = M.getContext();
auto *FnTy = FunctionType::get(Type::getVoidTy(C), Type::getInt8PtrTy(C),
- /*IsVarArgs=*/false);
+ /*isVarArg=*/false);
Function *DevirtFn =
Function::Create(FnTy, GlobalValue::LinkageTypes::PrivateLinkage,
CORO_DEVIRT_TRIGGER_FN, &M);
@@ -941,7 +946,12 @@ struct CoroSplit : public CallGraphSCCPass {
char CoroSplit::ID = 0;
-INITIALIZE_PASS(
+INITIALIZE_PASS_BEGIN(
+ CoroSplit, "coro-split",
+ "Split coroutine into a set of functions driving its state machine", false,
+ false)
+INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
+INITIALIZE_PASS_END(
CoroSplit, "coro-split",
"Split coroutine into a set of functions driving its state machine", false,
false)
diff --git a/lib/Transforms/Coroutines/Coroutines.cpp b/lib/Transforms/Coroutines/Coroutines.cpp
index cf84f916e24b..a581d1d21169 100644
--- a/lib/Transforms/Coroutines/Coroutines.cpp
+++ b/lib/Transforms/Coroutines/Coroutines.cpp
@@ -1,9 +1,8 @@
//===- Coroutines.cpp -----------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -177,15 +176,15 @@ static void buildCGN(CallGraph &CG, CallGraphNode *Node) {
// Look for calls by this function.
for (Instruction &I : instructions(F))
- if (CallSite CS = CallSite(cast<Value>(&I))) {
- const Function *Callee = CS.getCalledFunction();
+ if (auto *Call = dyn_cast<CallBase>(&I)) {
+ const Function *Callee = Call->getCalledFunction();
if (!Callee || !Intrinsic::isLeaf(Callee->getIntrinsicID()))
// Indirect calls of intrinsics are not allowed so no need to check.
// We can be more precise here by using TargetArg returned by
// Intrinsic::isLeaf.
- Node->addCalledFunction(CS, CG.getCallsExternalNode());
+ Node->addCalledFunction(Call, CG.getCallsExternalNode());
else if (!Callee->isIntrinsic())
- Node->addCalledFunction(CS, CG.getOrInsertFunction(Callee));
+ Node->addCalledFunction(Call, CG.getOrInsertFunction(Callee));
}
}
diff --git a/lib/Transforms/IPO/AlwaysInliner.cpp b/lib/Transforms/IPO/AlwaysInliner.cpp
index 07138718ce2c..c50805692b98 100644
--- a/lib/Transforms/IPO/AlwaysInliner.cpp
+++ b/lib/Transforms/IPO/AlwaysInliner.cpp
@@ -1,9 +1,8 @@
//===- InlineAlways.cpp - Code to inline always_inline functions ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -32,8 +31,17 @@ using namespace llvm;
#define DEBUG_TYPE "inline"
-PreservedAnalyses AlwaysInlinerPass::run(Module &M, ModuleAnalysisManager &) {
- InlineFunctionInfo IFI;
+PreservedAnalyses AlwaysInlinerPass::run(Module &M,
+ ModuleAnalysisManager &MAM) {
+ // Add inline assumptions during code generation.
+ FunctionAnalysisManager &FAM =
+ MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+ std::function<AssumptionCache &(Function &)> GetAssumptionCache =
+ [&](Function &F) -> AssumptionCache & {
+ return FAM.getResult<AssumptionAnalysis>(F);
+ };
+ InlineFunctionInfo IFI(/*cg=*/nullptr, &GetAssumptionCache);
+
SmallSetVector<CallSite, 16> Calls;
bool Changed = false;
SmallVector<Function *, 16> InlinedFunctions;
@@ -146,11 +154,20 @@ InlineCost AlwaysInlinerLegacyPass::getInlineCost(CallSite CS) {
Function *Callee = CS.getCalledFunction();
// Only inline direct calls to functions with always-inline attributes
- // that are viable for inlining. FIXME: We shouldn't even get here for
- // declarations.
- if (Callee && !Callee->isDeclaration() &&
- CS.hasFnAttr(Attribute::AlwaysInline) && isInlineViable(*Callee))
- return InlineCost::getAlways("always inliner");
+ // that are viable for inlining.
+ if (!Callee)
+ return InlineCost::getNever("indirect call");
+
+ // FIXME: We shouldn't even get here for declarations.
+ if (Callee->isDeclaration())
+ return InlineCost::getNever("no definition");
+
+ if (!CS.hasFnAttr(Attribute::AlwaysInline))
+ return InlineCost::getNever("no alwaysinline attribute");
+
+ auto IsViable = isInlineViable(*Callee);
+ if (!IsViable)
+ return InlineCost::getNever(IsViable.message);
- return InlineCost::getNever("always inliner");
+ return InlineCost::getAlways("always inliner");
}
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp
index 4663de0b049e..95a9f31cced3 100644
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -1,9 +1,8 @@
//===- ArgumentPromotion.cpp - Promote by-reference arguments -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -59,11 +58,13 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/NoFolder.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
@@ -243,6 +244,7 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
assert(CS.getCalledFunction() == F);
Instruction *Call = CS.getInstruction();
const AttributeList &CallPAL = CS.getAttributes();
+ IRBuilder<NoFolder> IRB(Call);
// Loop over the operands, inserting GEP and loads in the caller as
// appropriate.
@@ -261,10 +263,11 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr};
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
- Value *Idx = GetElementPtrInst::Create(
- STy, *AI, Idxs, (*AI)->getName() + "." + Twine(i), Call);
+ auto *Idx =
+ IRB.CreateGEP(STy, *AI, Idxs, (*AI)->getName() + "." + Twine(i));
// TODO: Tell AA about the new values?
- Args.push_back(new LoadInst(Idx, Idx->getName() + ".val", Call));
+ Args.push_back(IRB.CreateLoad(STy->getElementType(i), Idx,
+ Idx->getName() + ".val"));
ArgAttrVec.push_back(AttributeSet());
}
} else if (!I->use_empty()) {
@@ -294,13 +297,13 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
ElTy = cast<CompositeType>(ElTy)->getTypeAtIndex(II);
}
// And create a GEP to extract those indices.
- V = GetElementPtrInst::Create(ArgIndex.first, V, Ops,
- V->getName() + ".idx", Call);
+ V = IRB.CreateGEP(ArgIndex.first, V, Ops, V->getName() + ".idx");
Ops.clear();
}
// Since we're replacing a load make sure we take the alignment
// of the previous load.
- LoadInst *newLoad = new LoadInst(V, V->getName() + ".val", Call);
+ LoadInst *newLoad =
+ IRB.CreateLoad(OrigLoad->getType(), V, V->getName() + ".val");
newLoad->setAlignment(OrigLoad->getAlignment());
// Transfer the AA info too.
AAMDNodes AAInfo;
@@ -476,9 +479,9 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
return NF;
}
-/// AllCallersPassInValidPointerForArgument - Return true if we can prove that
-/// all callees pass in a valid pointer for the specified function argument.
-static bool allCallersPassInValidPointerForArgument(Argument *Arg) {
+/// Return true if we can prove that all callees pass in a valid pointer for the
+/// specified function argument.
+static bool allCallersPassValidPointerForArgument(Argument *Arg, Type *Ty) {
Function *Callee = Arg->getParent();
const DataLayout &DL = Callee->getParent()->getDataLayout();
@@ -490,7 +493,7 @@ static bool allCallersPassInValidPointerForArgument(Argument *Arg) {
CallSite CS(U);
assert(CS && "Should only have direct calls!");
- if (!isDereferenceablePointer(CS.getArgument(ArgNo), DL))
+ if (!isDereferenceablePointer(CS.getArgument(ArgNo), Ty, DL))
return false;
}
return true;
@@ -563,8 +566,8 @@ static void markIndicesSafe(const IndicesVector &ToMark,
/// This method limits promotion of aggregates to only promote up to three
/// elements of the aggregate in order to avoid exploding the number of
/// arguments passed in.
-static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca,
- AAResults &AAR, unsigned MaxElements) {
+static bool isSafeToPromoteArgument(Argument *Arg, Type *ByValTy, AAResults &AAR,
+ unsigned MaxElements) {
using GEPIndicesSet = std::set<IndicesVector>;
// Quick exit for unused arguments
@@ -586,9 +589,6 @@ static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca,
//
// This set will contain all sets of indices that are loaded in the entry
// block, and thus are safe to unconditionally load in the caller.
- //
- // This optimization is also safe for InAlloca parameters, because it verifies
- // that the address isn't captured.
GEPIndicesSet SafeToUnconditionallyLoad;
// This set contains all the sets of indices that we are planning to promote.
@@ -596,9 +596,28 @@ static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca,
GEPIndicesSet ToPromote;
// If the pointer is always valid, any load with first index 0 is valid.
- if (isByValOrInAlloca || allCallersPassInValidPointerForArgument(Arg))
+
+ if (ByValTy)
SafeToUnconditionallyLoad.insert(IndicesVector(1, 0));
+ // Whenever a new underlying type for the operand is found, make sure it's
+ // consistent with the GEPs and loads we've already seen and, if necessary,
+ // use it to see if all incoming pointers are valid (which implies the 0-index
+ // is safe).
+ Type *BaseTy = ByValTy;
+ auto UpdateBaseTy = [&](Type *NewBaseTy) {
+ if (BaseTy)
+ return BaseTy == NewBaseTy;
+
+ BaseTy = NewBaseTy;
+ if (allCallersPassValidPointerForArgument(Arg, BaseTy)) {
+ assert(SafeToUnconditionallyLoad.empty());
+ SafeToUnconditionallyLoad.insert(IndicesVector(1, 0));
+ }
+
+ return true;
+ };
+
// First, iterate the entry block and mark loads of (geps of) arguments as
// safe.
BasicBlock &EntryBlock = Arg->getParent()->front();
@@ -621,6 +640,9 @@ static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca,
// right away, can't promote this argument at all.
return false;
+ if (!UpdateBaseTy(GEP->getSourceElementType()))
+ return false;
+
// Indices checked out, mark them as safe
markIndicesSafe(Indices, SafeToUnconditionallyLoad);
Indices.clear();
@@ -628,6 +650,11 @@ static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca,
} else if (V == Arg) {
// Direct loads are equivalent to a GEP with a single 0 index.
markIndicesSafe(IndicesVector(1, 0), SafeToUnconditionallyLoad);
+
+ if (BaseTy && LI->getType() != BaseTy)
+ return false;
+
+ BaseTy = LI->getType();
}
}
@@ -645,6 +672,9 @@ static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca,
Loads.push_back(LI);
// Direct loads are equivalent to a GEP with a zero index and then a load.
Operands.push_back(0);
+
+ if (!UpdateBaseTy(LI->getType()))
+ return false;
} else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(UR)) {
if (GEP->use_empty()) {
// Dead GEP's cause trouble later. Just remove them if we run into
@@ -653,10 +683,12 @@ static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca,
// TODO: This runs the above loop over and over again for dead GEPs
// Couldn't we just do increment the UI iterator earlier and erase the
// use?
- return isSafeToPromoteArgument(Arg, isByValOrInAlloca, AAR,
- MaxElements);
+ return isSafeToPromoteArgument(Arg, ByValTy, AAR, MaxElements);
}
+ if (!UpdateBaseTy(GEP->getSourceElementType()))
+ return false;
+
// Ensure that all of the indices are constants.
for (User::op_iterator i = GEP->idx_begin(), e = GEP->idx_end(); i != e;
++i)
@@ -853,6 +885,11 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,
if (F->isVarArg())
return nullptr;
+ // Don't transform functions that receive inallocas, as the transformation may
+ // not be safe depending on calling convention.
+ if (F->getAttributes().hasAttrSomewhere(Attribute::InAlloca))
+ return nullptr;
+
// First check: see if there are any pointer arguments! If not, quick exit.
SmallVector<Argument *, 16> PointerArgs;
for (Argument &I : F->args())
@@ -911,8 +948,7 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,
// If this is a byval argument, and if the aggregate type is small, just
// pass the elements, which is always safe, if the passed value is densely
- // packed or if we can prove the padding bytes are never accessed. This does
- // not apply to inalloca.
+ // packed or if we can prove the padding bytes are never accessed.
bool isSafeToPromote =
PtrArg->hasByValAttr() &&
(isDenselyPacked(AgTy, DL) || !canPaddingBeAccessed(PtrArg));
@@ -963,8 +999,9 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,
}
// Otherwise, see if we can promote the pointer to its value.
- if (isSafeToPromoteArgument(PtrArg, PtrArg->hasByValOrInAllocaAttr(), AAR,
- MaxElements))
+ Type *ByValTy =
+ PtrArg->hasByValAttr() ? PtrArg->getParamByValType() : nullptr;
+ if (isSafeToPromoteArgument(PtrArg, ByValTy, AAR, MaxElements))
ArgsToPromote.insert(PtrArg);
}
@@ -1101,7 +1138,9 @@ bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) {
CallGraphNode *NewCalleeNode =
CG.getOrInsertFunction(NewCS.getCalledFunction());
CallGraphNode *CallerNode = CG[Caller];
- CallerNode->replaceCallEdge(OldCS, NewCS, NewCalleeNode);
+ CallerNode->replaceCallEdge(*cast<CallBase>(OldCS.getInstruction()),
+ *cast<CallBase>(NewCS.getInstruction()),
+ NewCalleeNode);
};
const TargetTransformInfo &TTI =
diff --git a/lib/Transforms/IPO/Attributor.cpp b/lib/Transforms/IPO/Attributor.cpp
new file mode 100644
index 000000000000..2a52c6b9b4ad
--- /dev/null
+++ b/lib/Transforms/IPO/Attributor.cpp
@@ -0,0 +1,1690 @@
+//===- Attributor.cpp - Module-wide attribute deduction -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements an inter procedural pass that deduces and/or propagating
+// attributes. This is done in an abstract interpretation style fixpoint
+// iteration. See the Attributor.h file comment and the class descriptions in
+// that file for more information.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/Attributor.h"
+
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "attributor"
+
+STATISTIC(NumFnWithExactDefinition,
+ "Number of function with exact definitions");
+STATISTIC(NumFnWithoutExactDefinition,
+ "Number of function without exact definitions");
+STATISTIC(NumAttributesTimedOut,
+ "Number of abstract attributes timed out before fixpoint");
+STATISTIC(NumAttributesValidFixpoint,
+ "Number of abstract attributes in a valid fixpoint state");
+STATISTIC(NumAttributesManifested,
+ "Number of abstract attributes manifested in IR");
+STATISTIC(NumFnNoUnwind, "Number of functions marked nounwind");
+
+STATISTIC(NumFnUniqueReturned, "Number of function with unique return");
+STATISTIC(NumFnKnownReturns, "Number of function with known return values");
+STATISTIC(NumFnArgumentReturned,
+ "Number of function arguments marked returned");
+STATISTIC(NumFnNoSync, "Number of functions marked nosync");
+STATISTIC(NumFnNoFree, "Number of functions marked nofree");
+STATISTIC(NumFnReturnedNonNull,
+ "Number of function return values marked nonnull");
+STATISTIC(NumFnArgumentNonNull, "Number of function arguments marked nonnull");
+STATISTIC(NumCSArgumentNonNull, "Number of call site arguments marked nonnull");
+STATISTIC(NumFnWillReturn, "Number of functions marked willreturn");
+
+// TODO: Determine a good default value.
+//
+// In the LLVM-TS and SPEC2006, 32 seems to not induce compile time overheads
+// (when run with the first 5 abstract attributes). The results also indicate
+// that we never reach 32 iterations but always find a fixpoint sooner.
+//
+// This will become more evolved once we perform two interleaved fixpoint
+// iterations: bottom-up and top-down.
+static cl::opt<unsigned>
+ MaxFixpointIterations("attributor-max-iterations", cl::Hidden,
+ cl::desc("Maximal number of fixpoint iterations."),
+ cl::init(32));
+
+static cl::opt<bool> DisableAttributor(
+ "attributor-disable", cl::Hidden,
+ cl::desc("Disable the attributor inter-procedural deduction pass."),
+ cl::init(true));
+
+static cl::opt<bool> VerifyAttributor(
+ "attributor-verify", cl::Hidden,
+ cl::desc("Verify the Attributor deduction and "
+ "manifestation of attributes -- may issue false-positive errors"),
+ cl::init(false));
+
+/// Logic operators for the change status enum class.
+///
+///{
+ChangeStatus llvm::operator|(ChangeStatus l, ChangeStatus r) {
+ return l == ChangeStatus::CHANGED ? l : r;
+}
+ChangeStatus llvm::operator&(ChangeStatus l, ChangeStatus r) {
+ return l == ChangeStatus::UNCHANGED ? l : r;
+}
+///}
+
+/// Helper to adjust the statistics.
+static void bookkeeping(AbstractAttribute::ManifestPosition MP,
+ const Attribute &Attr) {
+ if (!AreStatisticsEnabled())
+ return;
+
+ if (!Attr.isEnumAttribute())
+ return;
+ switch (Attr.getKindAsEnum()) {
+ case Attribute::NoUnwind:
+ NumFnNoUnwind++;
+ return;
+ case Attribute::Returned:
+ NumFnArgumentReturned++;
+ return;
+ case Attribute::NoSync:
+ NumFnNoSync++;
+ break;
+ case Attribute::NoFree:
+ NumFnNoFree++;
+ break;
+ case Attribute::NonNull:
+ switch (MP) {
+ case AbstractAttribute::MP_RETURNED:
+ NumFnReturnedNonNull++;
+ break;
+ case AbstractAttribute::MP_ARGUMENT:
+ NumFnArgumentNonNull++;
+ break;
+ case AbstractAttribute::MP_CALL_SITE_ARGUMENT:
+ NumCSArgumentNonNull++;
+ break;
+ default:
+ break;
+ }
+ break;
+ case Attribute::WillReturn:
+ NumFnWillReturn++;
+ break;
+ default:
+ return;
+ }
+}
+
+template <typename StateTy>
+using followValueCB_t = std::function<bool(Value *, StateTy &State)>;
+template <typename StateTy>
+using visitValueCB_t = std::function<void(Value *, StateTy &State)>;
+
+/// Recursively visit all values that might become \p InitV at some point. This
+/// will be done by looking through cast instructions, selects, phis, and calls
+/// with the "returned" attribute. The callback \p FollowValueCB is asked before
+/// a potential origin value is looked at. If no \p FollowValueCB is passed, a
+/// default one is used that will make sure we visit every value only once. Once
+/// we cannot look through the value any further, the callback \p VisitValueCB
+/// is invoked and passed the current value and the \p State. To limit how much
+/// effort is invested, we will never visit more than \p MaxValues values.
+template <typename StateTy>
+static bool genericValueTraversal(
+ Value *InitV, StateTy &State, visitValueCB_t<StateTy> &VisitValueCB,
+ followValueCB_t<StateTy> *FollowValueCB = nullptr, int MaxValues = 8) {
+
+ SmallPtrSet<Value *, 16> Visited;
+ followValueCB_t<bool> DefaultFollowValueCB = [&](Value *Val, bool &) {
+ return Visited.insert(Val).second;
+ };
+
+ if (!FollowValueCB)
+ FollowValueCB = &DefaultFollowValueCB;
+
+ SmallVector<Value *, 16> Worklist;
+ Worklist.push_back(InitV);
+
+ int Iteration = 0;
+ do {
+ Value *V = Worklist.pop_back_val();
+
+ // Check if we should process the current value. To prevent endless
+ // recursion keep a record of the values we followed!
+ if (!(*FollowValueCB)(V, State))
+ continue;
+
+ // Make sure we limit the compile time for complex expressions.
+ if (Iteration++ >= MaxValues)
+ return false;
+
+ // Explicitly look through calls with a "returned" attribute if we do
+ // not have a pointer as stripPointerCasts only works on them.
+ if (V->getType()->isPointerTy()) {
+ V = V->stripPointerCasts();
+ } else {
+ CallSite CS(V);
+ if (CS && CS.getCalledFunction()) {
+ Value *NewV = nullptr;
+ for (Argument &Arg : CS.getCalledFunction()->args())
+ if (Arg.hasReturnedAttr()) {
+ NewV = CS.getArgOperand(Arg.getArgNo());
+ break;
+ }
+ if (NewV) {
+ Worklist.push_back(NewV);
+ continue;
+ }
+ }
+ }
+
+ // Look through select instructions, visit both potential values.
+ if (auto *SI = dyn_cast<SelectInst>(V)) {
+ Worklist.push_back(SI->getTrueValue());
+ Worklist.push_back(SI->getFalseValue());
+ continue;
+ }
+
+ // Look through phi nodes, visit all operands.
+ if (auto *PHI = dyn_cast<PHINode>(V)) {
+ Worklist.append(PHI->op_begin(), PHI->op_end());
+ continue;
+ }
+
+ // Once a leaf is reached we inform the user through the callback.
+ VisitValueCB(V, State);
+ } while (!Worklist.empty());
+
+ // All values have been visited.
+ return true;
+}
+
+/// Helper to identify the correct offset into an attribute list.
+static unsigned getAttrIndex(AbstractAttribute::ManifestPosition MP,
+ unsigned ArgNo = 0) {
+ switch (MP) {
+ case AbstractAttribute::MP_ARGUMENT:
+ case AbstractAttribute::MP_CALL_SITE_ARGUMENT:
+ return ArgNo + AttributeList::FirstArgIndex;
+ case AbstractAttribute::MP_FUNCTION:
+ return AttributeList::FunctionIndex;
+ case AbstractAttribute::MP_RETURNED:
+ return AttributeList::ReturnIndex;
+ }
+ llvm_unreachable("Unknown manifest position!");
+}
+
+/// Return true if \p New is equal or worse than \p Old.
+static bool isEqualOrWorse(const Attribute &New, const Attribute &Old) {
+ if (!Old.isIntAttribute())
+ return true;
+
+ return Old.getValueAsInt() >= New.getValueAsInt();
+}
+
+/// Return true if the information provided by \p Attr was added to the
+/// attribute list \p Attrs. This is only the case if it was not already present
+/// in \p Attrs at the position describe by \p MP and \p ArgNo.
+static bool addIfNotExistent(LLVMContext &Ctx, const Attribute &Attr,
+ AttributeList &Attrs,
+ AbstractAttribute::ManifestPosition MP,
+ unsigned ArgNo = 0) {
+ unsigned AttrIdx = getAttrIndex(MP, ArgNo);
+
+ if (Attr.isEnumAttribute()) {
+ Attribute::AttrKind Kind = Attr.getKindAsEnum();
+ if (Attrs.hasAttribute(AttrIdx, Kind))
+ if (isEqualOrWorse(Attr, Attrs.getAttribute(AttrIdx, Kind)))
+ return false;
+ Attrs = Attrs.addAttribute(Ctx, AttrIdx, Attr);
+ return true;
+ }
+ if (Attr.isStringAttribute()) {
+ StringRef Kind = Attr.getKindAsString();
+ if (Attrs.hasAttribute(AttrIdx, Kind))
+ if (isEqualOrWorse(Attr, Attrs.getAttribute(AttrIdx, Kind)))
+ return false;
+ Attrs = Attrs.addAttribute(Ctx, AttrIdx, Attr);
+ return true;
+ }
+
+ llvm_unreachable("Expected enum or string attribute!");
+}
+
+ChangeStatus AbstractAttribute::update(Attributor &A) {
+ ChangeStatus HasChanged = ChangeStatus::UNCHANGED;
+ if (getState().isAtFixpoint())
+ return HasChanged;
+
+ LLVM_DEBUG(dbgs() << "[Attributor] Update: " << *this << "\n");
+
+ HasChanged = updateImpl(A);
+
+ LLVM_DEBUG(dbgs() << "[Attributor] Update " << HasChanged << " " << *this
+ << "\n");
+
+ return HasChanged;
+}
+
+ChangeStatus AbstractAttribute::manifest(Attributor &A) {
+ assert(getState().isValidState() &&
+ "Attempted to manifest an invalid state!");
+ assert(getAssociatedValue() &&
+ "Attempted to manifest an attribute without associated value!");
+
+ ChangeStatus HasChanged = ChangeStatus::UNCHANGED;
+ SmallVector<Attribute, 4> DeducedAttrs;
+ getDeducedAttributes(DeducedAttrs);
+
+ Function &ScopeFn = getAnchorScope();
+ LLVMContext &Ctx = ScopeFn.getContext();
+ ManifestPosition MP = getManifestPosition();
+
+ AttributeList Attrs;
+ SmallVector<unsigned, 4> ArgNos;
+
+ // In the following some generic code that will manifest attributes in
+ // DeducedAttrs if they improve the current IR. Due to the different
+ // annotation positions we use the underlying AttributeList interface.
+ // Note that MP_CALL_SITE_ARGUMENT can annotate multiple locations.
+
+ switch (MP) {
+ case MP_ARGUMENT:
+ ArgNos.push_back(cast<Argument>(getAssociatedValue())->getArgNo());
+ Attrs = ScopeFn.getAttributes();
+ break;
+ case MP_FUNCTION:
+ case MP_RETURNED:
+ ArgNos.push_back(0);
+ Attrs = ScopeFn.getAttributes();
+ break;
+ case MP_CALL_SITE_ARGUMENT: {
+ CallSite CS(&getAnchoredValue());
+ for (unsigned u = 0, e = CS.getNumArgOperands(); u != e; u++)
+ if (CS.getArgOperand(u) == getAssociatedValue())
+ ArgNos.push_back(u);
+ Attrs = CS.getAttributes();
+ }
+ }
+
+ for (const Attribute &Attr : DeducedAttrs) {
+ for (unsigned ArgNo : ArgNos) {
+ if (!addIfNotExistent(Ctx, Attr, Attrs, MP, ArgNo))
+ continue;
+
+ HasChanged = ChangeStatus::CHANGED;
+ bookkeeping(MP, Attr);
+ }
+ }
+
+ if (HasChanged == ChangeStatus::UNCHANGED)
+ return HasChanged;
+
+ switch (MP) {
+ case MP_ARGUMENT:
+ case MP_FUNCTION:
+ case MP_RETURNED:
+ ScopeFn.setAttributes(Attrs);
+ break;
+ case MP_CALL_SITE_ARGUMENT:
+ CallSite(&getAnchoredValue()).setAttributes(Attrs);
+ }
+
+ return HasChanged;
+}
+
+Function &AbstractAttribute::getAnchorScope() {
+ Value &V = getAnchoredValue();
+ if (isa<Function>(V))
+ return cast<Function>(V);
+ if (isa<Argument>(V))
+ return *cast<Argument>(V).getParent();
+ if (isa<Instruction>(V))
+ return *cast<Instruction>(V).getFunction();
+ llvm_unreachable("No scope for anchored value found!");
+}
+
+const Function &AbstractAttribute::getAnchorScope() const {
+ return const_cast<AbstractAttribute *>(this)->getAnchorScope();
+}
+
+/// -----------------------NoUnwind Function Attribute--------------------------
+
+struct AANoUnwindFunction : AANoUnwind, BooleanState {
+
+ AANoUnwindFunction(Function &F, InformationCache &InfoCache)
+ : AANoUnwind(F, InfoCache) {}
+
+ /// See AbstractAttribute::getState()
+ /// {
+ AbstractState &getState() override { return *this; }
+ const AbstractState &getState() const override { return *this; }
+ /// }
+
+ /// See AbstractAttribute::getManifestPosition().
+ ManifestPosition getManifestPosition() const override { return MP_FUNCTION; }
+
+ const std::string getAsStr() const override {
+ return getAssumed() ? "nounwind" : "may-unwind";
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override;
+
+ /// See AANoUnwind::isAssumedNoUnwind().
+ bool isAssumedNoUnwind() const override { return getAssumed(); }
+
+ /// See AANoUnwind::isKnownNoUnwind().
+ bool isKnownNoUnwind() const override { return getKnown(); }
+};
+
+ChangeStatus AANoUnwindFunction::updateImpl(Attributor &A) {
+ Function &F = getAnchorScope();
+
+ // The map from instruction opcodes to those instructions in the function.
+ auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(F);
+ auto Opcodes = {
+ (unsigned)Instruction::Invoke, (unsigned)Instruction::CallBr,
+ (unsigned)Instruction::Call, (unsigned)Instruction::CleanupRet,
+ (unsigned)Instruction::CatchSwitch, (unsigned)Instruction::Resume};
+
+ for (unsigned Opcode : Opcodes) {
+ for (Instruction *I : OpcodeInstMap[Opcode]) {
+ if (!I->mayThrow())
+ continue;
+
+ auto *NoUnwindAA = A.getAAFor<AANoUnwind>(*this, *I);
+
+ if (!NoUnwindAA || !NoUnwindAA->isAssumedNoUnwind()) {
+ indicatePessimisticFixpoint();
+ return ChangeStatus::CHANGED;
+ }
+ }
+ }
+ return ChangeStatus::UNCHANGED;
+}
+
+/// --------------------- Function Return Values -------------------------------
+
+/// "Attribute" that collects all potential returned values and the return
+/// instructions that they arise from.
+///
+/// If there is a unique returned value R, the manifest method will:
+/// - mark R with the "returned" attribute, if R is an argument.
+class AAReturnedValuesImpl final : public AAReturnedValues, AbstractState {
+
+ /// Mapping of values potentially returned by the associated function to the
+ /// return instructions that might return them.
+ DenseMap<Value *, SmallPtrSet<ReturnInst *, 2>> ReturnedValues;
+
+ /// State flags
+ ///
+ ///{
+ bool IsFixed;
+ bool IsValidState;
+ bool HasOverdefinedReturnedCalls;
+ ///}
+
+ /// Collect values that could become \p V in the set \p Values, each mapped to
+ /// \p ReturnInsts.
+ void collectValuesRecursively(
+ Attributor &A, Value *V, SmallPtrSetImpl<ReturnInst *> &ReturnInsts,
+ DenseMap<Value *, SmallPtrSet<ReturnInst *, 2>> &Values) {
+
+ visitValueCB_t<bool> VisitValueCB = [&](Value *Val, bool &) {
+ assert(!isa<Instruction>(Val) ||
+ &getAnchorScope() == cast<Instruction>(Val)->getFunction());
+ Values[Val].insert(ReturnInsts.begin(), ReturnInsts.end());
+ };
+
+ bool UnusedBool;
+ bool Success = genericValueTraversal(V, UnusedBool, VisitValueCB);
+
+ // If we did abort the above traversal we haven't see all the values.
+ // Consequently, we cannot know if the information we would derive is
+ // accurate so we give up early.
+ if (!Success)
+ indicatePessimisticFixpoint();
+ }
+
+public:
+ /// See AbstractAttribute::AbstractAttribute(...).
+ AAReturnedValuesImpl(Function &F, InformationCache &InfoCache)
+ : AAReturnedValues(F, InfoCache) {
+ // We do not have an associated argument yet.
+ AssociatedVal = nullptr;
+ }
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ // Reset the state.
+ AssociatedVal = nullptr;
+ IsFixed = false;
+ IsValidState = true;
+ HasOverdefinedReturnedCalls = false;
+ ReturnedValues.clear();
+
+ Function &F = cast<Function>(getAnchoredValue());
+
+ // The map from instruction opcodes to those instructions in the function.
+ auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(F);
+
+ // Look through all arguments, if one is marked as returned we are done.
+ for (Argument &Arg : F.args()) {
+ if (Arg.hasReturnedAttr()) {
+
+ auto &ReturnInstSet = ReturnedValues[&Arg];
+ for (Instruction *RI : OpcodeInstMap[Instruction::Ret])
+ ReturnInstSet.insert(cast<ReturnInst>(RI));
+
+ indicateOptimisticFixpoint();
+ return;
+ }
+ }
+
+ // If no argument was marked as returned we look at all return instructions
+ // and collect potentially returned values.
+ for (Instruction *RI : OpcodeInstMap[Instruction::Ret]) {
+ SmallPtrSet<ReturnInst *, 1> RISet({cast<ReturnInst>(RI)});
+ collectValuesRecursively(A, cast<ReturnInst>(RI)->getReturnValue(), RISet,
+ ReturnedValues);
+ }
+ }
+
+ /// See AbstractAttribute::manifest(...).
+ ChangeStatus manifest(Attributor &A) override;
+
+ /// See AbstractAttribute::getState(...).
+ AbstractState &getState() override { return *this; }
+
+ /// See AbstractAttribute::getState(...).
+ const AbstractState &getState() const override { return *this; }
+
+ /// See AbstractAttribute::getManifestPosition().
+ ManifestPosition getManifestPosition() const override { return MP_ARGUMENT; }
+
+ /// See AbstractAttribute::updateImpl(Attributor &A).
+ ChangeStatus updateImpl(Attributor &A) override;
+
+ /// Return the number of potential return values, -1 if unknown.
+ size_t getNumReturnValues() const {
+ return isValidState() ? ReturnedValues.size() : -1;
+ }
+
+ /// Return an assumed unique return value if a single candidate is found. If
+ /// there cannot be one, return a nullptr. If it is not clear yet, return the
+ /// Optional::NoneType.
+ Optional<Value *> getAssumedUniqueReturnValue() const;
+
+ /// See AbstractState::checkForallReturnedValues(...).
+ bool
+ checkForallReturnedValues(std::function<bool(Value &)> &Pred) const override;
+
+ /// Pretty print the attribute similar to the IR representation.
+ const std::string getAsStr() const override;
+
+ /// See AbstractState::isAtFixpoint().
+ bool isAtFixpoint() const override { return IsFixed; }
+
+ /// See AbstractState::isValidState().
+ bool isValidState() const override { return IsValidState; }
+
+ /// See AbstractState::indicateOptimisticFixpoint(...).
+ void indicateOptimisticFixpoint() override {
+ IsFixed = true;
+ IsValidState &= true;
+ }
+ void indicatePessimisticFixpoint() override {
+ IsFixed = true;
+ IsValidState = false;
+ }
+};
+
+ChangeStatus AAReturnedValuesImpl::manifest(Attributor &A) {
+ ChangeStatus Changed = ChangeStatus::UNCHANGED;
+
+ // Bookkeeping.
+ assert(isValidState());
+ NumFnKnownReturns++;
+
+ // Check if we have an assumed unique return value that we could manifest.
+ Optional<Value *> UniqueRV = getAssumedUniqueReturnValue();
+
+ if (!UniqueRV.hasValue() || !UniqueRV.getValue())
+ return Changed;
+
+ // Bookkeeping.
+ NumFnUniqueReturned++;
+
+ // If the assumed unique return value is an argument, annotate it.
+ if (auto *UniqueRVArg = dyn_cast<Argument>(UniqueRV.getValue())) {
+ AssociatedVal = UniqueRVArg;
+ Changed = AbstractAttribute::manifest(A) | Changed;
+ }
+
+ return Changed;
+}
+
+const std::string AAReturnedValuesImpl::getAsStr() const {
+ return (isAtFixpoint() ? "returns(#" : "may-return(#") +
+ (isValidState() ? std::to_string(getNumReturnValues()) : "?") + ")";
+}
+
+Optional<Value *> AAReturnedValuesImpl::getAssumedUniqueReturnValue() const {
+ // If checkForallReturnedValues provides a unique value, ignoring potential
+ // undef values that can also be present, it is assumed to be the actual
+ // return value and forwarded to the caller of this method. If there are
+ // multiple, a nullptr is returned indicating there cannot be a unique
+ // returned value.
+ Optional<Value *> UniqueRV;
+
+ std::function<bool(Value &)> Pred = [&](Value &RV) -> bool {
+ // If we found a second returned value and neither the current nor the saved
+ // one is an undef, there is no unique returned value. Undefs are special
+ // since we can pretend they have any value.
+ if (UniqueRV.hasValue() && UniqueRV != &RV &&
+ !(isa<UndefValue>(RV) || isa<UndefValue>(UniqueRV.getValue()))) {
+ UniqueRV = nullptr;
+ return false;
+ }
+
+ // Do not overwrite a value with an undef.
+ if (!UniqueRV.hasValue() || !isa<UndefValue>(RV))
+ UniqueRV = &RV;
+
+ return true;
+ };
+
+ if (!checkForallReturnedValues(Pred))
+ UniqueRV = nullptr;
+
+ return UniqueRV;
+}
+
+bool AAReturnedValuesImpl::checkForallReturnedValues(
+ std::function<bool(Value &)> &Pred) const {
+ if (!isValidState())
+ return false;
+
+ // Check all returned values but ignore call sites as long as we have not
+ // encountered an overdefined one during an update.
+ for (auto &It : ReturnedValues) {
+ Value *RV = It.first;
+
+ ImmutableCallSite ICS(RV);
+ if (ICS && !HasOverdefinedReturnedCalls)
+ continue;
+
+ if (!Pred(*RV))
+ return false;
+ }
+
+ return true;
+}
+
+ChangeStatus AAReturnedValuesImpl::updateImpl(Attributor &A) {
+
+ // Check if we know of any values returned by the associated function,
+ // if not, we are done.
+ if (getNumReturnValues() == 0) {
+ indicateOptimisticFixpoint();
+ return ChangeStatus::UNCHANGED;
+ }
+
+ // Check if any of the returned values is a call site we can refine.
+ decltype(ReturnedValues) AddRVs;
+ bool HasCallSite = false;
+
+ // Look at all returned call sites.
+ for (auto &It : ReturnedValues) {
+ SmallPtrSet<ReturnInst *, 2> &ReturnInsts = It.second;
+ Value *RV = It.first;
+ LLVM_DEBUG(dbgs() << "[AAReturnedValues] Potentially returned value " << *RV
+ << "\n");
+
+ // Only call sites can change during an update, ignore the rest.
+ CallSite RetCS(RV);
+ if (!RetCS)
+ continue;
+
+ // For now, any call site we see will prevent us from directly fixing the
+ // state. However, if the information on the callees is fixed, the call
+ // sites will be removed and we will fix the information for this state.
+ HasCallSite = true;
+
+ // Try to find a assumed unique return value for the called function.
+ auto *RetCSAA = A.getAAFor<AAReturnedValuesImpl>(*this, *RV);
+ if (!RetCSAA) {
+ HasOverdefinedReturnedCalls = true;
+ LLVM_DEBUG(dbgs() << "[AAReturnedValues] Returned call site (" << *RV
+ << ") with " << (RetCSAA ? "invalid" : "no")
+ << " associated state\n");
+ continue;
+ }
+
+ // Try to find a assumed unique return value for the called function.
+ Optional<Value *> AssumedUniqueRV = RetCSAA->getAssumedUniqueReturnValue();
+
+ // If no assumed unique return value was found due to the lack of
+ // candidates, we may need to resolve more calls (through more update
+ // iterations) or the called function will not return. Either way, we simply
+ // stick with the call sites as return values. Because there were not
+ // multiple possibilities, we do not treat it as overdefined.
+ if (!AssumedUniqueRV.hasValue())
+ continue;
+
+ // If multiple, non-refinable values were found, there cannot be a unique
+ // return value for the called function. The returned call is overdefined!
+ if (!AssumedUniqueRV.getValue()) {
+ HasOverdefinedReturnedCalls = true;
+ LLVM_DEBUG(dbgs() << "[AAReturnedValues] Returned call site has multiple "
+ "potentially returned values\n");
+ continue;
+ }
+
+ LLVM_DEBUG({
+ bool UniqueRVIsKnown = RetCSAA->isAtFixpoint();
+ dbgs() << "[AAReturnedValues] Returned call site "
+ << (UniqueRVIsKnown ? "known" : "assumed")
+ << " unique return value: " << *AssumedUniqueRV << "\n";
+ });
+
+ // The assumed unique return value.
+ Value *AssumedRetVal = AssumedUniqueRV.getValue();
+
+ // If the assumed unique return value is an argument, lookup the matching
+ // call site operand and recursively collect new returned values.
+ // If it is not an argument, it is just put into the set of returned values
+ // as we would have already looked through casts, phis, and similar values.
+ if (Argument *AssumedRetArg = dyn_cast<Argument>(AssumedRetVal))
+ collectValuesRecursively(A,
+ RetCS.getArgOperand(AssumedRetArg->getArgNo()),
+ ReturnInsts, AddRVs);
+ else
+ AddRVs[AssumedRetVal].insert(ReturnInsts.begin(), ReturnInsts.end());
+ }
+
+ // Keep track of any change to trigger updates on dependent attributes.
+ ChangeStatus Changed = ChangeStatus::UNCHANGED;
+
+ for (auto &It : AddRVs) {
+ assert(!It.second.empty() && "Entry does not add anything.");
+ auto &ReturnInsts = ReturnedValues[It.first];
+ for (ReturnInst *RI : It.second)
+ if (ReturnInsts.insert(RI).second) {
+ LLVM_DEBUG(dbgs() << "[AAReturnedValues] Add new returned value "
+ << *It.first << " => " << *RI << "\n");
+ Changed = ChangeStatus::CHANGED;
+ }
+ }
+
+ // If there is no call site in the returned values we are done.
+ if (!HasCallSite) {
+ indicateOptimisticFixpoint();
+ return ChangeStatus::CHANGED;
+ }
+
+ return Changed;
+}
+
+/// ------------------------ NoSync Function Attribute -------------------------
+
+struct AANoSyncFunction : AANoSync, BooleanState {
+
+ AANoSyncFunction(Function &F, InformationCache &InfoCache)
+ : AANoSync(F, InfoCache) {}
+
+ /// See AbstractAttribute::getState()
+ /// {
+ AbstractState &getState() override { return *this; }
+ const AbstractState &getState() const override { return *this; }
+ /// }
+
+ /// See AbstractAttribute::getManifestPosition().
+ ManifestPosition getManifestPosition() const override { return MP_FUNCTION; }
+
+ const std::string getAsStr() const override {
+ return getAssumed() ? "nosync" : "may-sync";
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override;
+
+ /// See AANoSync::isAssumedNoSync()
+ bool isAssumedNoSync() const override { return getAssumed(); }
+
+ /// See AANoSync::isKnownNoSync()
+ bool isKnownNoSync() const override { return getKnown(); }
+
+ /// Helper function used to determine whether an instruction is non-relaxed
+ /// atomic. In other words, if an atomic instruction does not have unordered
+ /// or monotonic ordering
+ static bool isNonRelaxedAtomic(Instruction *I);
+
+ /// Helper function used to determine whether an instruction is volatile.
+ static bool isVolatile(Instruction *I);
+
+ /// Helper function uset to check if intrinsic is volatile (memcpy, memmove,
+ /// memset).
+ static bool isNoSyncIntrinsic(Instruction *I);
+};
+
+bool AANoSyncFunction::isNonRelaxedAtomic(Instruction *I) {
+ if (!I->isAtomic())
+ return false;
+
+ AtomicOrdering Ordering;
+ switch (I->getOpcode()) {
+ case Instruction::AtomicRMW:
+ Ordering = cast<AtomicRMWInst>(I)->getOrdering();
+ break;
+ case Instruction::Store:
+ Ordering = cast<StoreInst>(I)->getOrdering();
+ break;
+ case Instruction::Load:
+ Ordering = cast<LoadInst>(I)->getOrdering();
+ break;
+ case Instruction::Fence: {
+ auto *FI = cast<FenceInst>(I);
+ if (FI->getSyncScopeID() == SyncScope::SingleThread)
+ return false;
+ Ordering = FI->getOrdering();
+ break;
+ }
+ case Instruction::AtomicCmpXchg: {
+ AtomicOrdering Success = cast<AtomicCmpXchgInst>(I)->getSuccessOrdering();
+ AtomicOrdering Failure = cast<AtomicCmpXchgInst>(I)->getFailureOrdering();
+ // Only if both are relaxed, than it can be treated as relaxed.
+ // Otherwise it is non-relaxed.
+ if (Success != AtomicOrdering::Unordered &&
+ Success != AtomicOrdering::Monotonic)
+ return true;
+ if (Failure != AtomicOrdering::Unordered &&
+ Failure != AtomicOrdering::Monotonic)
+ return true;
+ return false;
+ }
+ default:
+ llvm_unreachable(
+ "New atomic operations need to be known in the attributor.");
+ }
+
+ // Relaxed.
+ if (Ordering == AtomicOrdering::Unordered ||
+ Ordering == AtomicOrdering::Monotonic)
+ return false;
+ return true;
+}
+
+/// Checks if an intrinsic is nosync. Currently only checks mem* intrinsics.
+/// FIXME: We should ipmrove the handling of intrinsics.
+bool AANoSyncFunction::isNoSyncIntrinsic(Instruction *I) {
+ if (auto *II = dyn_cast<IntrinsicInst>(I)) {
+ switch (II->getIntrinsicID()) {
+ /// Element wise atomic memory intrinsics are can only be unordered,
+ /// therefore nosync.
+ case Intrinsic::memset_element_unordered_atomic:
+ case Intrinsic::memmove_element_unordered_atomic:
+ case Intrinsic::memcpy_element_unordered_atomic:
+ return true;
+ case Intrinsic::memset:
+ case Intrinsic::memmove:
+ case Intrinsic::memcpy:
+ if (!cast<MemIntrinsic>(II)->isVolatile())
+ return true;
+ return false;
+ default:
+ return false;
+ }
+ }
+ return false;
+}
+
+bool AANoSyncFunction::isVolatile(Instruction *I) {
+ assert(!ImmutableCallSite(I) && !isa<CallBase>(I) &&
+ "Calls should not be checked here");
+
+ switch (I->getOpcode()) {
+ case Instruction::AtomicRMW:
+ return cast<AtomicRMWInst>(I)->isVolatile();
+ case Instruction::Store:
+ return cast<StoreInst>(I)->isVolatile();
+ case Instruction::Load:
+ return cast<LoadInst>(I)->isVolatile();
+ case Instruction::AtomicCmpXchg:
+ return cast<AtomicCmpXchgInst>(I)->isVolatile();
+ default:
+ return false;
+ }
+}
+
+ChangeStatus AANoSyncFunction::updateImpl(Attributor &A) {
+ Function &F = getAnchorScope();
+
+ /// We are looking for volatile instructions or Non-Relaxed atomics.
+ /// FIXME: We should ipmrove the handling of intrinsics.
+ for (Instruction *I : InfoCache.getReadOrWriteInstsForFunction(F)) {
+ ImmutableCallSite ICS(I);
+ auto *NoSyncAA = A.getAAFor<AANoSyncFunction>(*this, *I);
+
+ if (isa<IntrinsicInst>(I) && isNoSyncIntrinsic(I))
+ continue;
+
+ if (ICS && (!NoSyncAA || !NoSyncAA->isAssumedNoSync()) &&
+ !ICS.hasFnAttr(Attribute::NoSync)) {
+ indicatePessimisticFixpoint();
+ return ChangeStatus::CHANGED;
+ }
+
+ if (ICS)
+ continue;
+
+ if (!isVolatile(I) && !isNonRelaxedAtomic(I))
+ continue;
+
+ indicatePessimisticFixpoint();
+ return ChangeStatus::CHANGED;
+ }
+
+ auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(F);
+ auto Opcodes = {(unsigned)Instruction::Invoke, (unsigned)Instruction::CallBr,
+ (unsigned)Instruction::Call};
+
+ for (unsigned Opcode : Opcodes) {
+ for (Instruction *I : OpcodeInstMap[Opcode]) {
+ // At this point we handled all read/write effects and they are all
+ // nosync, so they can be skipped.
+ if (I->mayReadOrWriteMemory())
+ continue;
+
+ ImmutableCallSite ICS(I);
+
+ // non-convergent and readnone imply nosync.
+ if (!ICS.isConvergent())
+ continue;
+
+ indicatePessimisticFixpoint();
+ return ChangeStatus::CHANGED;
+ }
+ }
+
+ return ChangeStatus::UNCHANGED;
+}
+
+/// ------------------------ No-Free Attributes ----------------------------
+
+struct AANoFreeFunction : AbstractAttribute, BooleanState {
+
+ /// See AbstractAttribute::AbstractAttribute(...).
+ AANoFreeFunction(Function &F, InformationCache &InfoCache)
+ : AbstractAttribute(F, InfoCache) {}
+
+ /// See AbstractAttribute::getState()
+ ///{
+ AbstractState &getState() override { return *this; }
+ const AbstractState &getState() const override { return *this; }
+ ///}
+
+ /// See AbstractAttribute::getManifestPosition().
+ ManifestPosition getManifestPosition() const override { return MP_FUNCTION; }
+
+ /// See AbstractAttribute::getAsStr().
+ const std::string getAsStr() const override {
+ return getAssumed() ? "nofree" : "may-free";
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override;
+
+ /// See AbstractAttribute::getAttrKind().
+ Attribute::AttrKind getAttrKind() const override { return ID; }
+
+ /// Return true if "nofree" is assumed.
+ bool isAssumedNoFree() const { return getAssumed(); }
+
+ /// Return true if "nofree" is known.
+ bool isKnownNoFree() const { return getKnown(); }
+
+ /// The identifier used by the Attributor for this class of attributes.
+ static constexpr Attribute::AttrKind ID = Attribute::NoFree;
+};
+
+ChangeStatus AANoFreeFunction::updateImpl(Attributor &A) {
+ Function &F = getAnchorScope();
+
+ // The map from instruction opcodes to those instructions in the function.
+ auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(F);
+
+ for (unsigned Opcode :
+ {(unsigned)Instruction::Invoke, (unsigned)Instruction::CallBr,
+ (unsigned)Instruction::Call}) {
+ for (Instruction *I : OpcodeInstMap[Opcode]) {
+
+ auto ICS = ImmutableCallSite(I);
+ auto *NoFreeAA = A.getAAFor<AANoFreeFunction>(*this, *I);
+
+ if ((!NoFreeAA || !NoFreeAA->isAssumedNoFree()) &&
+ !ICS.hasFnAttr(Attribute::NoFree)) {
+ indicatePessimisticFixpoint();
+ return ChangeStatus::CHANGED;
+ }
+ }
+ }
+ return ChangeStatus::UNCHANGED;
+}
+
+/// ------------------------ NonNull Argument Attribute ------------------------
+struct AANonNullImpl : AANonNull, BooleanState {
+
+ AANonNullImpl(Value &V, InformationCache &InfoCache)
+ : AANonNull(V, InfoCache) {}
+
+ AANonNullImpl(Value *AssociatedVal, Value &AnchoredValue,
+ InformationCache &InfoCache)
+ : AANonNull(AssociatedVal, AnchoredValue, InfoCache) {}
+
+ /// See AbstractAttribute::getState()
+ /// {
+ AbstractState &getState() override { return *this; }
+ const AbstractState &getState() const override { return *this; }
+ /// }
+
+ /// See AbstractAttribute::getAsStr().
+ const std::string getAsStr() const override {
+ return getAssumed() ? "nonnull" : "may-null";
+ }
+
+ /// See AANonNull::isAssumedNonNull().
+ bool isAssumedNonNull() const override { return getAssumed(); }
+
+ /// See AANonNull::isKnownNonNull().
+ bool isKnownNonNull() const override { return getKnown(); }
+
+ /// Generate a predicate that checks if a given value is assumed nonnull.
+ /// The generated function returns true if a value satisfies any of
+ /// following conditions.
+ /// (i) A value is known nonZero(=nonnull).
+ /// (ii) A value is associated with AANonNull and its isAssumedNonNull() is
+ /// true.
+ std::function<bool(Value &)> generatePredicate(Attributor &);
+};
+
+std::function<bool(Value &)> AANonNullImpl::generatePredicate(Attributor &A) {
+ // FIXME: The `AAReturnedValues` should provide the predicate with the
+ // `ReturnInst` vector as well such that we can use the control flow sensitive
+ // version of `isKnownNonZero`. This should fix `test11` in
+ // `test/Transforms/FunctionAttrs/nonnull.ll`
+
+ std::function<bool(Value &)> Pred = [&](Value &RV) -> bool {
+ if (isKnownNonZero(&RV, getAnchorScope().getParent()->getDataLayout()))
+ return true;
+
+ auto *NonNullAA = A.getAAFor<AANonNull>(*this, RV);
+
+ ImmutableCallSite ICS(&RV);
+
+ if ((!NonNullAA || !NonNullAA->isAssumedNonNull()) &&
+ (!ICS || !ICS.hasRetAttr(Attribute::NonNull)))
+ return false;
+
+ return true;
+ };
+
+ return Pred;
+}
+
+/// NonNull attribute for function return value.
+struct AANonNullReturned : AANonNullImpl {
+
+ AANonNullReturned(Function &F, InformationCache &InfoCache)
+ : AANonNullImpl(F, InfoCache) {}
+
+ /// See AbstractAttribute::getManifestPosition().
+ ManifestPosition getManifestPosition() const override { return MP_RETURNED; }
+
+ /// See AbstractAttriubute::initialize(...).
+ void initialize(Attributor &A) override {
+ Function &F = getAnchorScope();
+
+ // Already nonnull.
+ if (F.getAttributes().hasAttribute(AttributeList::ReturnIndex,
+ Attribute::NonNull))
+ indicateOptimisticFixpoint();
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override;
+};
+
+ChangeStatus AANonNullReturned::updateImpl(Attributor &A) {
+ Function &F = getAnchorScope();
+
+ auto *AARetVal = A.getAAFor<AAReturnedValues>(*this, F);
+ if (!AARetVal) {
+ indicatePessimisticFixpoint();
+ return ChangeStatus::CHANGED;
+ }
+
+ std::function<bool(Value &)> Pred = this->generatePredicate(A);
+ if (!AARetVal->checkForallReturnedValues(Pred)) {
+ indicatePessimisticFixpoint();
+ return ChangeStatus::CHANGED;
+ }
+ return ChangeStatus::UNCHANGED;
+}
+
+/// NonNull attribute for function argument.
+struct AANonNullArgument : AANonNullImpl {
+
+ AANonNullArgument(Argument &A, InformationCache &InfoCache)
+ : AANonNullImpl(A, InfoCache) {}
+
+ /// See AbstractAttribute::getManifestPosition().
+ ManifestPosition getManifestPosition() const override { return MP_ARGUMENT; }
+
+ /// See AbstractAttriubute::initialize(...).
+ void initialize(Attributor &A) override {
+ Argument *Arg = cast<Argument>(getAssociatedValue());
+ if (Arg->hasNonNullAttr())
+ indicateOptimisticFixpoint();
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override;
+};
+
+/// NonNull attribute for a call site argument.
+struct AANonNullCallSiteArgument : AANonNullImpl {
+
+ /// See AANonNullImpl::AANonNullImpl(...).
+ AANonNullCallSiteArgument(CallSite CS, unsigned ArgNo,
+ InformationCache &InfoCache)
+ : AANonNullImpl(CS.getArgOperand(ArgNo), *CS.getInstruction(), InfoCache),
+ ArgNo(ArgNo) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ CallSite CS(&getAnchoredValue());
+ if (isKnownNonZero(getAssociatedValue(),
+ getAnchorScope().getParent()->getDataLayout()) ||
+ CS.paramHasAttr(ArgNo, getAttrKind()))
+ indicateOptimisticFixpoint();
+ }
+
+ /// See AbstractAttribute::updateImpl(Attributor &A).
+ ChangeStatus updateImpl(Attributor &A) override;
+
+ /// See AbstractAttribute::getManifestPosition().
+ ManifestPosition getManifestPosition() const override {
+ return MP_CALL_SITE_ARGUMENT;
+ };
+
+ // Return argument index of associated value.
+ int getArgNo() const { return ArgNo; }
+
+private:
+ unsigned ArgNo;
+};
+ChangeStatus AANonNullArgument::updateImpl(Attributor &A) {
+ Function &F = getAnchorScope();
+ Argument &Arg = cast<Argument>(getAnchoredValue());
+
+ unsigned ArgNo = Arg.getArgNo();
+
+ // Callback function
+ std::function<bool(CallSite)> CallSiteCheck = [&](CallSite CS) {
+ assert(CS && "Sanity check: Call site was not initialized properly!");
+
+ auto *NonNullAA = A.getAAFor<AANonNull>(*this, *CS.getInstruction(), ArgNo);
+
+ // Check that NonNullAA is AANonNullCallSiteArgument.
+ if (NonNullAA) {
+ ImmutableCallSite ICS(&NonNullAA->getAnchoredValue());
+ if (ICS && CS.getInstruction() == ICS.getInstruction())
+ return NonNullAA->isAssumedNonNull();
+ return false;
+ }
+
+ if (CS.paramHasAttr(ArgNo, Attribute::NonNull))
+ return true;
+
+ Value *V = CS.getArgOperand(ArgNo);
+ if (isKnownNonZero(V, getAnchorScope().getParent()->getDataLayout()))
+ return true;
+
+ return false;
+ };
+ if (!A.checkForAllCallSites(F, CallSiteCheck, true)) {
+ indicatePessimisticFixpoint();
+ return ChangeStatus::CHANGED;
+ }
+ return ChangeStatus::UNCHANGED;
+}
+
+ChangeStatus AANonNullCallSiteArgument::updateImpl(Attributor &A) {
+ // NOTE: Never look at the argument of the callee in this method.
+ // If we do this, "nonnull" is always deduced because of the assumption.
+
+ Value &V = *getAssociatedValue();
+
+ auto *NonNullAA = A.getAAFor<AANonNull>(*this, V);
+
+ if (!NonNullAA || !NonNullAA->isAssumedNonNull()) {
+ indicatePessimisticFixpoint();
+ return ChangeStatus::CHANGED;
+ }
+
+ return ChangeStatus::UNCHANGED;
+}
+
+/// ------------------------ Will-Return Attributes ----------------------------
+
+struct AAWillReturnImpl : public AAWillReturn, BooleanState {
+
+ /// See AbstractAttribute::AbstractAttribute(...).
+ AAWillReturnImpl(Function &F, InformationCache &InfoCache)
+ : AAWillReturn(F, InfoCache) {}
+
+ /// See AAWillReturn::isKnownWillReturn().
+ bool isKnownWillReturn() const override { return getKnown(); }
+
+ /// See AAWillReturn::isAssumedWillReturn().
+ bool isAssumedWillReturn() const override { return getAssumed(); }
+
+ /// See AbstractAttribute::getState(...).
+ AbstractState &getState() override { return *this; }
+
+ /// See AbstractAttribute::getState(...).
+ const AbstractState &getState() const override { return *this; }
+
+ /// See AbstractAttribute::getAsStr()
+ const std::string getAsStr() const override {
+ return getAssumed() ? "willreturn" : "may-noreturn";
+ }
+};
+
+struct AAWillReturnFunction final : AAWillReturnImpl {
+
+ /// See AbstractAttribute::AbstractAttribute(...).
+ AAWillReturnFunction(Function &F, InformationCache &InfoCache)
+ : AAWillReturnImpl(F, InfoCache) {}
+
+ /// See AbstractAttribute::getManifestPosition().
+ ManifestPosition getManifestPosition() const override {
+ return MP_FUNCTION;
+ }
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override;
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override;
+};
+
+// Helper function that checks whether a function has any cycle.
+// TODO: Replace with more efficent code
+bool containsCycle(Function &F) {
+ SmallPtrSet<BasicBlock *, 32> Visited;
+
+ // Traverse BB by dfs and check whether successor is already visited.
+ for (BasicBlock *BB : depth_first(&F)) {
+ Visited.insert(BB);
+ for (auto *SuccBB : successors(BB)) {
+ if (Visited.count(SuccBB))
+ return true;
+ }
+ }
+ return false;
+}
+
+// Helper function that checks the function have a loop which might become an
+// endless loop
+// FIXME: Any cycle is regarded as endless loop for now.
+// We have to allow some patterns.
+bool containsPossiblyEndlessLoop(Function &F) { return containsCycle(F); }
+
+void AAWillReturnFunction::initialize(Attributor &A) {
+ Function &F = getAnchorScope();
+
+ if (containsPossiblyEndlessLoop(F))
+ indicatePessimisticFixpoint();
+}
+
+ChangeStatus AAWillReturnFunction::updateImpl(Attributor &A) {
+ Function &F = getAnchorScope();
+
+ // The map from instruction opcodes to those instructions in the function.
+ auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(F);
+
+ for (unsigned Opcode :
+ {(unsigned)Instruction::Invoke, (unsigned)Instruction::CallBr,
+ (unsigned)Instruction::Call}) {
+ for (Instruction *I : OpcodeInstMap[Opcode]) {
+ auto ICS = ImmutableCallSite(I);
+
+ if (ICS.hasFnAttr(Attribute::WillReturn))
+ continue;
+
+ auto *WillReturnAA = A.getAAFor<AAWillReturn>(*this, *I);
+ if (!WillReturnAA || !WillReturnAA->isAssumedWillReturn()) {
+ indicatePessimisticFixpoint();
+ return ChangeStatus::CHANGED;
+ }
+
+ auto *NoRecurseAA = A.getAAFor<AANoRecurse>(*this, *I);
+
+ // FIXME: (i) Prohibit any recursion for now.
+ // (ii) AANoRecurse isn't implemented yet so currently any call is
+ // regarded as having recursion.
+ // Code below should be
+ // if ((!NoRecurseAA || !NoRecurseAA->isAssumedNoRecurse()) &&
+ if (!NoRecurseAA && !ICS.hasFnAttr(Attribute::NoRecurse)) {
+ indicatePessimisticFixpoint();
+ return ChangeStatus::CHANGED;
+ }
+ }
+ }
+
+ return ChangeStatus::UNCHANGED;
+}
+
+/// ----------------------------------------------------------------------------
+/// Attributor
+/// ----------------------------------------------------------------------------
+
+bool Attributor::checkForAllCallSites(Function &F,
+ std::function<bool(CallSite)> &Pred,
+ bool RequireAllCallSites) {
+ // We can try to determine information from
+ // the call sites. However, this is only possible all call sites are known,
+ // hence the function has internal linkage.
+ if (RequireAllCallSites && !F.hasInternalLinkage()) {
+ LLVM_DEBUG(
+ dbgs()
+ << "Attributor: Function " << F.getName()
+ << " has no internal linkage, hence not all call sites are known\n");
+ return false;
+ }
+
+ for (const Use &U : F.uses()) {
+
+ CallSite CS(U.getUser());
+ if (!CS || !CS.isCallee(&U) || !CS.getCaller()->hasExactDefinition()) {
+ if (!RequireAllCallSites)
+ continue;
+
+ LLVM_DEBUG(dbgs() << "Attributor: User " << *U.getUser()
+ << " is an invalid use of " << F.getName() << "\n");
+ return false;
+ }
+
+ if (Pred(CS))
+ continue;
+
+ LLVM_DEBUG(dbgs() << "Attributor: Call site callback failed for "
+ << *CS.getInstruction() << "\n");
+ return false;
+ }
+
+ return true;
+}
+
+ChangeStatus Attributor::run() {
+ // Initialize all abstract attributes.
+ for (AbstractAttribute *AA : AllAbstractAttributes)
+ AA->initialize(*this);
+
+ LLVM_DEBUG(dbgs() << "[Attributor] Identified and initialized "
+ << AllAbstractAttributes.size()
+ << " abstract attributes.\n");
+
+ // Now that all abstract attributes are collected and initialized we start
+ // the abstract analysis.
+
+ unsigned IterationCounter = 1;
+
+ SmallVector<AbstractAttribute *, 64> ChangedAAs;
+ SetVector<AbstractAttribute *> Worklist;
+ Worklist.insert(AllAbstractAttributes.begin(), AllAbstractAttributes.end());
+
+ do {
+ LLVM_DEBUG(dbgs() << "\n\n[Attributor] #Iteration: " << IterationCounter
+ << ", Worklist size: " << Worklist.size() << "\n");
+
+ // Add all abstract attributes that are potentially dependent on one that
+ // changed to the work list.
+ for (AbstractAttribute *ChangedAA : ChangedAAs) {
+ auto &QuerriedAAs = QueryMap[ChangedAA];
+ Worklist.insert(QuerriedAAs.begin(), QuerriedAAs.end());
+ }
+
+ // Reset the changed set.
+ ChangedAAs.clear();
+
+ // Update all abstract attribute in the work list and record the ones that
+ // changed.
+ for (AbstractAttribute *AA : Worklist)
+ if (AA->update(*this) == ChangeStatus::CHANGED)
+ ChangedAAs.push_back(AA);
+
+ // Reset the work list and repopulate with the changed abstract attributes.
+ // Note that dependent ones are added above.
+ Worklist.clear();
+ Worklist.insert(ChangedAAs.begin(), ChangedAAs.end());
+
+ } while (!Worklist.empty() && ++IterationCounter < MaxFixpointIterations);
+
+ LLVM_DEBUG(dbgs() << "\n[Attributor] Fixpoint iteration done after: "
+ << IterationCounter << "/" << MaxFixpointIterations
+ << " iterations\n");
+
+ bool FinishedAtFixpoint = Worklist.empty();
+
+ // Reset abstract arguments not settled in a sound fixpoint by now. This
+ // happens when we stopped the fixpoint iteration early. Note that only the
+ // ones marked as "changed" *and* the ones transitively depending on them
+ // need to be reverted to a pessimistic state. Others might not be in a
+ // fixpoint state but we can use the optimistic results for them anyway.
+ SmallPtrSet<AbstractAttribute *, 32> Visited;
+ for (unsigned u = 0; u < ChangedAAs.size(); u++) {
+ AbstractAttribute *ChangedAA = ChangedAAs[u];
+ if (!Visited.insert(ChangedAA).second)
+ continue;
+
+ AbstractState &State = ChangedAA->getState();
+ if (!State.isAtFixpoint()) {
+ State.indicatePessimisticFixpoint();
+
+ NumAttributesTimedOut++;
+ }
+
+ auto &QuerriedAAs = QueryMap[ChangedAA];
+ ChangedAAs.append(QuerriedAAs.begin(), QuerriedAAs.end());
+ }
+
+ LLVM_DEBUG({
+ if (!Visited.empty())
+ dbgs() << "\n[Attributor] Finalized " << Visited.size()
+ << " abstract attributes.\n";
+ });
+
+ unsigned NumManifested = 0;
+ unsigned NumAtFixpoint = 0;
+ ChangeStatus ManifestChange = ChangeStatus::UNCHANGED;
+ for (AbstractAttribute *AA : AllAbstractAttributes) {
+ AbstractState &State = AA->getState();
+
+ // If there is not already a fixpoint reached, we can now take the
+ // optimistic state. This is correct because we enforced a pessimistic one
+ // on abstract attributes that were transitively dependent on a changed one
+ // already above.
+ if (!State.isAtFixpoint())
+ State.indicateOptimisticFixpoint();
+
+ // If the state is invalid, we do not try to manifest it.
+ if (!State.isValidState())
+ continue;
+
+ // Manifest the state and record if we changed the IR.
+ ChangeStatus LocalChange = AA->manifest(*this);
+ ManifestChange = ManifestChange | LocalChange;
+
+ NumAtFixpoint++;
+ NumManifested += (LocalChange == ChangeStatus::CHANGED);
+ }
+
+ (void)NumManifested;
+ (void)NumAtFixpoint;
+ LLVM_DEBUG(dbgs() << "\n[Attributor] Manifested " << NumManifested
+ << " arguments while " << NumAtFixpoint
+ << " were in a valid fixpoint state\n");
+
+ // If verification is requested, we finished this run at a fixpoint, and the
+ // IR was changed, we re-run the whole fixpoint analysis, starting at
+ // re-initialization of the arguments. This re-run should not result in an IR
+ // change. Though, the (virtual) state of attributes at the end of the re-run
+ // might be more optimistic than the known state or the IR state if the better
+ // state cannot be manifested.
+ if (VerifyAttributor && FinishedAtFixpoint &&
+ ManifestChange == ChangeStatus::CHANGED) {
+ VerifyAttributor = false;
+ ChangeStatus VerifyStatus = run();
+ if (VerifyStatus != ChangeStatus::UNCHANGED)
+ llvm_unreachable(
+ "Attributor verification failed, re-run did result in an IR change "
+ "even after a fixpoint was reached in the original run. (False "
+ "positives possible!)");
+ VerifyAttributor = true;
+ }
+
+ NumAttributesManifested += NumManifested;
+ NumAttributesValidFixpoint += NumAtFixpoint;
+
+ return ManifestChange;
+}
+
+void Attributor::identifyDefaultAbstractAttributes(
+ Function &F, InformationCache &InfoCache,
+ DenseSet</* Attribute::AttrKind */ unsigned> *Whitelist) {
+
+ // Every function can be nounwind.
+ registerAA(*new AANoUnwindFunction(F, InfoCache));
+
+ // Every function might be marked "nosync"
+ registerAA(*new AANoSyncFunction(F, InfoCache));
+
+ // Every function might be "no-free".
+ registerAA(*new AANoFreeFunction(F, InfoCache));
+
+ // Return attributes are only appropriate if the return type is non void.
+ Type *ReturnType = F.getReturnType();
+ if (!ReturnType->isVoidTy()) {
+ // Argument attribute "returned" --- Create only one per function even
+ // though it is an argument attribute.
+ if (!Whitelist || Whitelist->count(AAReturnedValues::ID))
+ registerAA(*new AAReturnedValuesImpl(F, InfoCache));
+
+ // Every function with pointer return type might be marked nonnull.
+ if (ReturnType->isPointerTy() &&
+ (!Whitelist || Whitelist->count(AANonNullReturned::ID)))
+ registerAA(*new AANonNullReturned(F, InfoCache));
+ }
+
+ // Every argument with pointer type might be marked nonnull.
+ for (Argument &Arg : F.args()) {
+ if (Arg.getType()->isPointerTy())
+ registerAA(*new AANonNullArgument(Arg, InfoCache));
+ }
+
+ // Every function might be "will-return".
+ registerAA(*new AAWillReturnFunction(F, InfoCache));
+
+ // Walk all instructions to find more attribute opportunities and also
+ // interesting instructions that might be queried by abstract attributes
+ // during their initialization or update.
+ auto &ReadOrWriteInsts = InfoCache.FuncRWInstsMap[&F];
+ auto &InstOpcodeMap = InfoCache.FuncInstOpcodeMap[&F];
+
+ for (Instruction &I : instructions(&F)) {
+ bool IsInterestingOpcode = false;
+
+ // To allow easy access to all instructions in a function with a given
+ // opcode we store them in the InfoCache. As not all opcodes are interesting
+ // to concrete attributes we only cache the ones that are as identified in
+ // the following switch.
+ // Note: There are no concrete attributes now so this is initially empty.
+ switch (I.getOpcode()) {
+ default:
+ assert((!ImmutableCallSite(&I)) && (!isa<CallBase>(&I)) &&
+ "New call site/base instruction type needs to be known int the "
+ "attributor.");
+ break;
+ case Instruction::Call:
+ case Instruction::CallBr:
+ case Instruction::Invoke:
+ case Instruction::CleanupRet:
+ case Instruction::CatchSwitch:
+ case Instruction::Resume:
+ case Instruction::Ret:
+ IsInterestingOpcode = true;
+ }
+ if (IsInterestingOpcode)
+ InstOpcodeMap[I.getOpcode()].push_back(&I);
+ if (I.mayReadOrWriteMemory())
+ ReadOrWriteInsts.push_back(&I);
+
+ CallSite CS(&I);
+ if (CS && CS.getCalledFunction()) {
+ for (int i = 0, e = CS.getCalledFunction()->arg_size(); i < e; i++) {
+ if (!CS.getArgument(i)->getType()->isPointerTy())
+ continue;
+
+ // Call site argument attribute "non-null".
+ registerAA(*new AANonNullCallSiteArgument(CS, i, InfoCache), i);
+ }
+ }
+ }
+}
+
+/// Helpers to ease debugging through output streams and print calls.
+///
+///{
+raw_ostream &llvm::operator<<(raw_ostream &OS, ChangeStatus S) {
+ return OS << (S == ChangeStatus::CHANGED ? "changed" : "unchanged");
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS,
+ AbstractAttribute::ManifestPosition AP) {
+ switch (AP) {
+ case AbstractAttribute::MP_ARGUMENT:
+ return OS << "arg";
+ case AbstractAttribute::MP_CALL_SITE_ARGUMENT:
+ return OS << "cs_arg";
+ case AbstractAttribute::MP_FUNCTION:
+ return OS << "fn";
+ case AbstractAttribute::MP_RETURNED:
+ return OS << "fn_ret";
+ }
+ llvm_unreachable("Unknown attribute position!");
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const AbstractState &S) {
+ return OS << (!S.isValidState() ? "top" : (S.isAtFixpoint() ? "fix" : ""));
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const AbstractAttribute &AA) {
+ AA.print(OS);
+ return OS;
+}
+
+void AbstractAttribute::print(raw_ostream &OS) const {
+ OS << "[" << getManifestPosition() << "][" << getAsStr() << "]["
+ << AnchoredVal.getName() << "]";
+}
+///}
+
+/// ----------------------------------------------------------------------------
+/// Pass (Manager) Boilerplate
+/// ----------------------------------------------------------------------------
+
+static bool runAttributorOnModule(Module &M) {
+ if (DisableAttributor)
+ return false;
+
+ LLVM_DEBUG(dbgs() << "[Attributor] Run on module with " << M.size()
+ << " functions.\n");
+
+ // Create an Attributor and initially empty information cache that is filled
+ // while we identify default attribute opportunities.
+ Attributor A;
+ InformationCache InfoCache;
+
+ for (Function &F : M) {
+ // TODO: Not all attributes require an exact definition. Find a way to
+ // enable deduction for some but not all attributes in case the
+ // definition might be changed at runtime, see also
+ // http://lists.llvm.org/pipermail/llvm-dev/2018-February/121275.html.
+ // TODO: We could always determine abstract attributes and if sufficient
+ // information was found we could duplicate the functions that do not
+ // have an exact definition.
+ if (!F.hasExactDefinition()) {
+ NumFnWithoutExactDefinition++;
+ continue;
+ }
+
+ // For now we ignore naked and optnone functions.
+ if (F.hasFnAttribute(Attribute::Naked) ||
+ F.hasFnAttribute(Attribute::OptimizeNone))
+ continue;
+
+ NumFnWithExactDefinition++;
+
+ // Populate the Attributor with abstract attribute opportunities in the
+ // function and the information cache with IR information.
+ A.identifyDefaultAbstractAttributes(F, InfoCache);
+ }
+
+ return A.run() == ChangeStatus::CHANGED;
+}
+
+PreservedAnalyses AttributorPass::run(Module &M, ModuleAnalysisManager &AM) {
+ if (runAttributorOnModule(M)) {
+ // FIXME: Think about passes we will preserve and add them here.
+ return PreservedAnalyses::none();
+ }
+ return PreservedAnalyses::all();
+}
+
+namespace {
+
+struct AttributorLegacyPass : public ModulePass {
+ static char ID;
+
+ AttributorLegacyPass() : ModulePass(ID) {
+ initializeAttributorLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnModule(Module &M) override {
+ if (skipModule(M))
+ return false;
+ return runAttributorOnModule(M);
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ // FIXME: Think about passes we will preserve and add them here.
+ AU.setPreservesCFG();
+ }
+};
+
+} // end anonymous namespace
+
+Pass *llvm::createAttributorLegacyPass() { return new AttributorLegacyPass(); }
+
+char AttributorLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(AttributorLegacyPass, "attributor",
+ "Deduce and propagate attributes", false, false)
+INITIALIZE_PASS_END(AttributorLegacyPass, "attributor",
+ "Deduce and propagate attributes", false, false)
diff --git a/lib/Transforms/IPO/BarrierNoopPass.cpp b/lib/Transforms/IPO/BarrierNoopPass.cpp
index 05fc3dd6950c..6b68aa90c567 100644
--- a/lib/Transforms/IPO/BarrierNoopPass.cpp
+++ b/lib/Transforms/IPO/BarrierNoopPass.cpp
@@ -1,9 +1,8 @@
//===- BarrierNoopPass.cpp - A barrier pass for the pass manager ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/IPO/BlockExtractor.cpp b/lib/Transforms/IPO/BlockExtractor.cpp
index ff5ee817da49..6c365f3f3cbe 100644
--- a/lib/Transforms/IPO/BlockExtractor.cpp
+++ b/lib/Transforms/IPO/BlockExtractor.cpp
@@ -1,9 +1,8 @@
//===- BlockExtractor.cpp - Extracts blocks into their own functions ------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -23,6 +22,7 @@
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/CodeExtractor.h"
+
using namespace llvm;
#define DEBUG_TYPE "block-extractor"
@@ -36,22 +36,48 @@ static cl::opt<std::string> BlockExtractorFile(
cl::opt<bool> BlockExtractorEraseFuncs("extract-blocks-erase-funcs",
cl::desc("Erase the existing functions"),
cl::Hidden);
-
namespace {
class BlockExtractor : public ModulePass {
- SmallVector<BasicBlock *, 16> Blocks;
+ SmallVector<SmallVector<BasicBlock *, 16>, 4> GroupsOfBlocks;
bool EraseFunctions;
- SmallVector<std::pair<std::string, std::string>, 32> BlocksByName;
+ /// Map a function name to groups of blocks.
+ SmallVector<std::pair<std::string, SmallVector<std::string, 4>>, 4>
+ BlocksByName;
+
+ void init(const SmallVectorImpl<SmallVector<BasicBlock *, 16>>
+ &GroupsOfBlocksToExtract) {
+ for (const SmallVectorImpl<BasicBlock *> &GroupOfBlocks :
+ GroupsOfBlocksToExtract) {
+ SmallVector<BasicBlock *, 16> NewGroup;
+ NewGroup.append(GroupOfBlocks.begin(), GroupOfBlocks.end());
+ GroupsOfBlocks.emplace_back(NewGroup);
+ }
+ if (!BlockExtractorFile.empty())
+ loadFile();
+ }
public:
static char ID;
BlockExtractor(const SmallVectorImpl<BasicBlock *> &BlocksToExtract,
bool EraseFunctions)
- : ModulePass(ID), Blocks(BlocksToExtract.begin(), BlocksToExtract.end()),
- EraseFunctions(EraseFunctions) {
- if (!BlockExtractorFile.empty())
- loadFile();
+ : ModulePass(ID), EraseFunctions(EraseFunctions) {
+ // We want one group per element of the input list.
+ SmallVector<SmallVector<BasicBlock *, 16>, 4> MassagedGroupsOfBlocks;
+ for (BasicBlock *BB : BlocksToExtract) {
+ SmallVector<BasicBlock *, 16> NewGroup;
+ NewGroup.push_back(BB);
+ MassagedGroupsOfBlocks.push_back(NewGroup);
+ }
+ init(MassagedGroupsOfBlocks);
}
+
+ BlockExtractor(const SmallVectorImpl<SmallVector<BasicBlock *, 16>>
+ &GroupsOfBlocksToExtract,
+ bool EraseFunctions)
+ : ModulePass(ID), EraseFunctions(EraseFunctions) {
+ init(GroupsOfBlocksToExtract);
+ }
+
BlockExtractor() : BlockExtractor(SmallVector<BasicBlock *, 0>(), false) {}
bool runOnModule(Module &M) override;
@@ -70,6 +96,12 @@ ModulePass *llvm::createBlockExtractorPass(
const SmallVectorImpl<BasicBlock *> &BlocksToExtract, bool EraseFunctions) {
return new BlockExtractor(BlocksToExtract, EraseFunctions);
}
+ModulePass *llvm::createBlockExtractorPass(
+ const SmallVectorImpl<SmallVector<BasicBlock *, 16>>
+ &GroupsOfBlocksToExtract,
+ bool EraseFunctions) {
+ return new BlockExtractor(GroupsOfBlocksToExtract, EraseFunctions);
+}
/// Gets all of the blocks specified in the input file.
void BlockExtractor::loadFile() {
@@ -82,8 +114,17 @@ void BlockExtractor::loadFile() {
Buf->getBuffer().split(Lines, '\n', /*MaxSplit=*/-1,
/*KeepEmpty=*/false);
for (const auto &Line : Lines) {
- auto FBPair = Line.split(' ');
- BlocksByName.push_back({FBPair.first, FBPair.second});
+ SmallVector<StringRef, 4> LineSplit;
+ Line.split(LineSplit, ' ', /*MaxSplit=*/-1,
+ /*KeepEmpty=*/false);
+ if (LineSplit.empty())
+ continue;
+ SmallVector<StringRef, 4> BBNames;
+ LineSplit[1].split(BBNames, ';', /*MaxSplit=*/-1,
+ /*KeepEmpty=*/false);
+ if (BBNames.empty())
+ report_fatal_error("Missing bbs name");
+ BlocksByName.push_back({LineSplit[0], {BBNames.begin(), BBNames.end()}});
}
}
@@ -130,33 +171,46 @@ bool BlockExtractor::runOnModule(Module &M) {
}
// Get all the blocks specified in the input file.
+ unsigned NextGroupIdx = GroupsOfBlocks.size();
+ GroupsOfBlocks.resize(NextGroupIdx + BlocksByName.size());
for (const auto &BInfo : BlocksByName) {
Function *F = M.getFunction(BInfo.first);
if (!F)
report_fatal_error("Invalid function name specified in the input file");
- auto Res = llvm::find_if(*F, [&](const BasicBlock &BB) {
- return BB.getName().equals(BInfo.second);
- });
- if (Res == F->end())
- report_fatal_error("Invalid block name specified in the input file");
- Blocks.push_back(&*Res);
+ for (const auto &BBInfo : BInfo.second) {
+ auto Res = llvm::find_if(*F, [&](const BasicBlock &BB) {
+ return BB.getName().equals(BBInfo);
+ });
+ if (Res == F->end())
+ report_fatal_error("Invalid block name specified in the input file");
+ GroupsOfBlocks[NextGroupIdx].push_back(&*Res);
+ }
+ ++NextGroupIdx;
}
- // Extract basic blocks.
- for (BasicBlock *BB : Blocks) {
- // Check if the module contains BB.
- if (BB->getParent()->getParent() != &M)
- report_fatal_error("Invalid basic block");
- LLVM_DEBUG(dbgs() << "BlockExtractor: Extracting "
- << BB->getParent()->getName() << ":" << BB->getName()
- << "\n");
- SmallVector<BasicBlock *, 2> BlocksToExtractVec;
- BlocksToExtractVec.push_back(BB);
- if (const InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator()))
- BlocksToExtractVec.push_back(II->getUnwindDest());
- CodeExtractor(BlocksToExtractVec).extractCodeRegion();
- ++NumExtracted;
- Changed = true;
+ // Extract each group of basic blocks.
+ for (auto &BBs : GroupsOfBlocks) {
+ SmallVector<BasicBlock *, 32> BlocksToExtractVec;
+ for (BasicBlock *BB : BBs) {
+ // Check if the module contains BB.
+ if (BB->getParent()->getParent() != &M)
+ report_fatal_error("Invalid basic block");
+ LLVM_DEBUG(dbgs() << "BlockExtractor: Extracting "
+ << BB->getParent()->getName() << ":" << BB->getName()
+ << "\n");
+ BlocksToExtractVec.push_back(BB);
+ if (const InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator()))
+ BlocksToExtractVec.push_back(II->getUnwindDest());
+ ++NumExtracted;
+ Changed = true;
+ }
+ Function *F = CodeExtractor(BlocksToExtractVec).extractCodeRegion();
+ if (F)
+ LLVM_DEBUG(dbgs() << "Extracted group '" << (*BBs.begin())->getName()
+ << "' in: " << F->getName() << '\n');
+ else
+ LLVM_DEBUG(dbgs() << "Failed to extract for group '"
+ << (*BBs.begin())->getName() << "'\n");
}
// Erase the functions.
diff --git a/lib/Transforms/IPO/CalledValuePropagation.cpp b/lib/Transforms/IPO/CalledValuePropagation.cpp
index de62cfc0c1db..20cb3213628e 100644
--- a/lib/Transforms/IPO/CalledValuePropagation.cpp
+++ b/lib/Transforms/IPO/CalledValuePropagation.cpp
@@ -1,9 +1,8 @@
//===- CalledValuePropagation.cpp - Propagate called values -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/IPO/ConstantMerge.cpp b/lib/Transforms/IPO/ConstantMerge.cpp
index 81f3634eaf28..ad877ae1786c 100644
--- a/lib/Transforms/IPO/ConstantMerge.cpp
+++ b/lib/Transforms/IPO/ConstantMerge.cpp
@@ -1,9 +1,8 @@
//===- ConstantMerge.cpp - Merge duplicate global constants ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -91,6 +90,16 @@ static unsigned getAlignment(GlobalVariable *GV) {
return GV->getParent()->getDataLayout().getPreferredAlignment(GV);
}
+static bool
+isUnmergeableGlobal(GlobalVariable *GV,
+ const SmallPtrSetImpl<const GlobalValue *> &UsedGlobals) {
+ // Only process constants with initializers in the default address space.
+ return !GV->isConstant() || !GV->hasDefinitiveInitializer() ||
+ GV->getType()->getAddressSpace() != 0 || GV->hasSection() ||
+ // Don't touch values marked with attribute(used).
+ UsedGlobals.count(GV);
+}
+
enum class CanMerge { No, Yes };
static CanMerge makeMergeable(GlobalVariable *Old, GlobalVariable *New) {
if (!Old->hasGlobalUnnamedAddr() && !New->hasGlobalUnnamedAddr())
@@ -155,11 +164,7 @@ static bool mergeConstants(Module &M) {
continue;
}
- // Only process constants with initializers in the default address space.
- if (!GV->isConstant() || !GV->hasDefinitiveInitializer() ||
- GV->getType()->getAddressSpace() != 0 || GV->hasSection() ||
- // Don't touch values marked with attribute(used).
- UsedGlobals.count(GV))
+ if (isUnmergeableGlobal(GV, UsedGlobals))
continue;
// This transformation is legal for weak ODR globals in the sense it
@@ -197,11 +202,7 @@ static bool mergeConstants(Module &M) {
GVI != E; ) {
GlobalVariable *GV = &*GVI++;
- // Only process constants with initializers in the default address space.
- if (!GV->isConstant() || !GV->hasDefinitiveInitializer() ||
- GV->getType()->getAddressSpace() != 0 || GV->hasSection() ||
- // Don't touch values marked with attribute(used).
- UsedGlobals.count(GV))
+ if (isUnmergeableGlobal(GV, UsedGlobals))
continue;
// We can only replace constant with local linkage.
diff --git a/lib/Transforms/IPO/CrossDSOCFI.cpp b/lib/Transforms/IPO/CrossDSOCFI.cpp
index 666f6cc37bfd..e30b33aa4872 100644
--- a/lib/Transforms/IPO/CrossDSOCFI.cpp
+++ b/lib/Transforms/IPO/CrossDSOCFI.cpp
@@ -1,9 +1,8 @@
//===-- CrossDSOCFI.cpp - Externalize this module's CFI checks ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -106,10 +105,10 @@ void CrossDSOCFI::buildCFICheck(Module &M) {
}
LLVMContext &Ctx = M.getContext();
- Constant *C = M.getOrInsertFunction(
+ FunctionCallee C = M.getOrInsertFunction(
"__cfi_check", Type::getVoidTy(Ctx), Type::getInt64Ty(Ctx),
Type::getInt8PtrTy(Ctx), Type::getInt8PtrTy(Ctx));
- Function *F = dyn_cast<Function>(C);
+ Function *F = dyn_cast<Function>(C.getCallee());
// Take over the existing function. The frontend emits a weak stub so that the
// linker knows about the symbol; this pass replaces the function body.
F->deleteBody();
@@ -133,9 +132,9 @@ void CrossDSOCFI::buildCFICheck(Module &M) {
BasicBlock *TrapBB = BasicBlock::Create(Ctx, "fail", F);
IRBuilder<> IRBFail(TrapBB);
- Constant *CFICheckFailFn = M.getOrInsertFunction(
- "__cfi_check_fail", Type::getVoidTy(Ctx), Type::getInt8PtrTy(Ctx),
- Type::getInt8PtrTy(Ctx));
+ FunctionCallee CFICheckFailFn =
+ M.getOrInsertFunction("__cfi_check_fail", Type::getVoidTy(Ctx),
+ Type::getInt8PtrTy(Ctx), Type::getInt8PtrTy(Ctx));
IRBFail.CreateCall(CFICheckFailFn, {&CFICheckFailData, &Addr});
IRBFail.CreateBr(ExitBB);
diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp
index cb30e8f46a54..968a13110b16 100644
--- a/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -1,9 +1,8 @@
//===- DeadArgumentElimination.cpp - Eliminate dead arguments -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -939,7 +938,7 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
NewCS = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
Args, OpBundles, "", Call->getParent());
} else {
- NewCS = CallInst::Create(NF, Args, OpBundles, "", Call);
+ NewCS = CallInst::Create(NFTy, NF, Args, OpBundles, "", Call);
cast<CallInst>(NewCS.getInstruction())
->setTailCallKind(cast<CallInst>(Call)->getTailCallKind());
}
diff --git a/lib/Transforms/IPO/ElimAvailExtern.cpp b/lib/Transforms/IPO/ElimAvailExtern.cpp
index d5fef59286dd..fc52db562c62 100644
--- a/lib/Transforms/IPO/ElimAvailExtern.cpp
+++ b/lib/Transforms/IPO/ElimAvailExtern.cpp
@@ -1,9 +1,8 @@
//===- ElimAvailExtern.cpp - DCE unreachable internal functions -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/IPO/ExtractGV.cpp b/lib/Transforms/IPO/ExtractGV.cpp
index a744d7f2d2d9..f77b528fc42d 100644
--- a/lib/Transforms/IPO/ExtractGV.cpp
+++ b/lib/Transforms/IPO/ExtractGV.cpp
@@ -1,9 +1,8 @@
//===-- ExtractGV.cpp - Global Value extraction pass ----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/IPO/ForceFunctionAttrs.cpp b/lib/Transforms/IPO/ForceFunctionAttrs.cpp
index 4dc1529ddbf5..b38cb6d0ed3f 100644
--- a/lib/Transforms/IPO/ForceFunctionAttrs.cpp
+++ b/lib/Transforms/IPO/ForceFunctionAttrs.cpp
@@ -1,9 +1,8 @@
//===- ForceFunctionAttrs.cpp - Force function attrs for debugging --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -58,6 +57,7 @@ static Attribute::AttrKind parseAttrKind(StringRef Kind) {
.Case("sanitize_hwaddress", Attribute::SanitizeHWAddress)
.Case("sanitize_memory", Attribute::SanitizeMemory)
.Case("sanitize_thread", Attribute::SanitizeThread)
+ .Case("sanitize_memtag", Attribute::SanitizeMemTag)
.Case("speculative_load_hardening", Attribute::SpeculativeLoadHardening)
.Case("ssp", Attribute::StackProtect)
.Case("sspreq", Attribute::StackProtectReq)
diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp
index 4e2a82b56eec..5ccd8bc4b0fb 100644
--- a/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -1,9 +1,8 @@
//===- FunctionAttrs.cpp - Pass which marks functions attributes ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -28,6 +27,7 @@
#include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/LazyCallGraph.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Argument.h"
@@ -76,6 +76,7 @@ STATISTIC(NumNoAlias, "Number of function returns marked noalias");
STATISTIC(NumNonNullReturn, "Number of function returns marked nonnull");
STATISTIC(NumNoRecurse, "Number of functions marked as norecurse");
STATISTIC(NumNoUnwind, "Number of functions marked as nounwind");
+STATISTIC(NumNoFree, "Number of functions marked as nofree");
// FIXME: This is disabled by default to avoid exposing security vulnerabilities
// in C/C++ code compiled by clang:
@@ -89,6 +90,10 @@ static cl::opt<bool> DisableNoUnwindInference(
"disable-nounwind-inference", cl::Hidden,
cl::desc("Stop inferring nounwind attribute during function-attrs pass"));
+static cl::opt<bool> DisableNoFreeInference(
+ "disable-nofree-inference", cl::Hidden,
+ cl::desc("Stop inferring nofree attribute during function-attrs pass"));
+
namespace {
using SCCNodeSet = SmallSetVector<Function *, 8>;
@@ -256,12 +261,15 @@ static bool addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter) {
}
}
+ // If the SCC contains both functions that read and functions that write, then
+ // we cannot add readonly attributes.
+ if (ReadsMemory && WritesMemory)
+ return false;
+
// Success! Functions in this SCC do not access memory, or only read memory.
// Give them the appropriate attribute.
bool MadeChange = false;
- assert(!(ReadsMemory && WritesMemory) &&
- "Function marked read-only and write-only");
for (Function *F : SCCNodes) {
if (F->doesNotAccessMemory())
// Already perfect!
@@ -1228,6 +1236,25 @@ static bool InstrBreaksNonThrowing(Instruction &I, const SCCNodeSet &SCCNodes) {
return true;
}
+/// Helper for NoFree inference predicate InstrBreaksAttribute.
+static bool InstrBreaksNoFree(Instruction &I, const SCCNodeSet &SCCNodes) {
+ CallSite CS(&I);
+ if (!CS)
+ return false;
+
+ Function *Callee = CS.getCalledFunction();
+ if (!Callee)
+ return true;
+
+ if (Callee->doesNotFreeMemory())
+ return false;
+
+ if (SCCNodes.count(Callee) > 0)
+ return false;
+
+ return true;
+}
+
/// Infer attributes from all functions in the SCC by scanning every
/// instruction for compliance to the attribute assumptions. Currently it
/// does:
@@ -1281,6 +1308,29 @@ static bool inferAttrsFromFunctionBodies(const SCCNodeSet &SCCNodes) {
},
/* RequiresExactDefinition= */ true});
+ if (!DisableNoFreeInference)
+ // Request to infer nofree attribute for all the functions in the SCC if
+ // every callsite within the SCC does not directly or indirectly free
+ // memory (except for calls to functions within the SCC). Note that nofree
+ // attribute suffers from derefinement - results may change depending on
+ // how functions are optimized. Thus it can be inferred only from exact
+ // definitions.
+ AI.registerAttrInference(AttributeInferer::InferenceDescriptor{
+ Attribute::NoFree,
+ // Skip functions known not to free memory.
+ [](const Function &F) { return F.doesNotFreeMemory(); },
+ // Instructions that break non-deallocating assumption.
+ [SCCNodes](Instruction &I) {
+ return InstrBreaksNoFree(I, SCCNodes);
+ },
+ [](Function &F) {
+ LLVM_DEBUG(dbgs()
+ << "Adding nofree attr to fn " << F.getName() << "\n");
+ F.setDoesNotFreeMemory();
+ ++NumNoFree;
+ },
+ /* RequiresExactDefinition= */ true});
+
// Perform all the requested attribute inference actions.
return AI.run(SCCNodes);
}
@@ -1301,7 +1351,7 @@ static bool addNoRecurseAttrs(const SCCNodeSet &SCCNodes) {
return false;
Function *F = *SCCNodes.begin();
- if (!F || F->isDeclaration() || F->doesNotRecurse())
+ if (!F || !F->hasExactDefinition() || F->doesNotRecurse())
return false;
// If all of the calls in F are identifiable and are to norecurse functions, F
@@ -1323,7 +1373,8 @@ static bool addNoRecurseAttrs(const SCCNodeSet &SCCNodes) {
}
template <typename AARGetterT>
-static bool deriveAttrsInPostOrder(SCCNodeSet &SCCNodes, AARGetterT &&AARGetter,
+static bool deriveAttrsInPostOrder(SCCNodeSet &SCCNodes,
+ AARGetterT &&AARGetter,
bool HasUnknownCall) {
bool Changed = false;
@@ -1367,8 +1418,7 @@ PreservedAnalyses PostOrderFunctionAttrsPass::run(LazyCallGraph::SCC &C,
bool HasUnknownCall = false;
for (LazyCallGraph::Node &N : C) {
Function &F = N.getFunction();
- if (F.hasFnAttribute(Attribute::OptimizeNone) ||
- F.hasFnAttribute(Attribute::Naked)) {
+ if (F.hasOptNone() || F.hasFnAttribute(Attribute::Naked)) {
// Treat any function we're trying not to optimize as if it were an
// indirect call and omit it from the node set used below.
HasUnknownCall = true;
@@ -1441,8 +1491,7 @@ static bool runImpl(CallGraphSCC &SCC, AARGetterT AARGetter) {
bool ExternalNode = false;
for (CallGraphNode *I : SCC) {
Function *F = I->getFunction();
- if (!F || F->hasFnAttribute(Attribute::OptimizeNone) ||
- F->hasFnAttribute(Attribute::Naked)) {
+ if (!F || F->hasOptNone() || F->hasFnAttribute(Attribute::Naked)) {
// External node or function we're trying not to optimize - we both avoid
// transform them and avoid leveraging information they provide.
ExternalNode = true;
diff --git a/lib/Transforms/IPO/FunctionImport.cpp b/lib/Transforms/IPO/FunctionImport.cpp
index 1223a23512ed..62c7fbd07223 100644
--- a/lib/Transforms/IPO/FunctionImport.cpp
+++ b/lib/Transforms/IPO/FunctionImport.cpp
@@ -1,9 +1,8 @@
//===- FunctionImport.cpp - ThinLTO Summary-based Function Import ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -778,9 +777,7 @@ void llvm::computeDeadSymbols(
if (!VI)
return;
- // We need to make sure all variants of the symbol are scanned, alias can
- // make one (but not all) alive.
- if (llvm::all_of(VI.getSummaryList(),
+ if (llvm::any_of(VI.getSummaryList(),
[](const std::unique_ptr<llvm::GlobalValueSummary> &S) {
return S->isLive();
}))
@@ -820,12 +817,18 @@ void llvm::computeDeadSymbols(
while (!Worklist.empty()) {
auto VI = Worklist.pop_back_val();
for (auto &Summary : VI.getSummaryList()) {
- GlobalValueSummary *Base = Summary->getBaseObject();
- // Set base value live in case it is an alias.
- Base->setLive(true);
- for (auto Ref : Base->refs())
+ if (auto *AS = dyn_cast<AliasSummary>(Summary.get())) {
+ // If this is an alias, visit the aliasee VI to ensure that all copies
+ // are marked live and it is added to the worklist for further
+ // processing of its references.
+ visit(AS->getAliaseeVI());
+ continue;
+ }
+
+ Summary->setLive(true);
+ for (auto Ref : Summary->refs())
visit(Ref);
- if (auto *FS = dyn_cast<FunctionSummary>(Base))
+ if (auto *FS = dyn_cast<FunctionSummary>(Summary.get()))
for (auto Call : FS->calls())
visit(Call.first);
}
@@ -847,14 +850,16 @@ void llvm::computeDeadSymbolsWithConstProp(
bool ImportEnabled) {
computeDeadSymbols(Index, GUIDPreservedSymbols, isPrevailing);
if (ImportEnabled) {
- Index.propagateConstants(GUIDPreservedSymbols);
+ Index.propagateAttributes(GUIDPreservedSymbols);
} else {
- // If import is disabled we should drop read-only attribute
+ // If import is disabled we should drop read/write-only attribute
// from all summaries to prevent internalization.
for (auto &P : Index)
for (auto &S : P.second.SummaryList)
- if (auto *GVS = dyn_cast<GlobalVarSummary>(S.get()))
+ if (auto *GVS = dyn_cast<GlobalVarSummary>(S.get())) {
GVS->setReadOnly(false);
+ GVS->setWriteOnly(false);
+ }
}
}
@@ -973,12 +978,15 @@ void llvm::thinLTOResolvePrevailingInModule(
// changed to enable this for aliases.
llvm_unreachable("Expected GV to be converted");
} else {
- // If the original symbols has global unnamed addr and linkonce_odr linkage,
- // it should be an auto hide symbol. Add hidden visibility to the symbol to
- // preserve the property.
- if (GV.hasLinkOnceODRLinkage() && GV.hasGlobalUnnamedAddr() &&
- NewLinkage == GlobalValue::WeakODRLinkage)
+ // If all copies of the original symbol had global unnamed addr and
+ // linkonce_odr linkage, it should be an auto hide symbol. In that case
+ // the thin link would have marked it as CanAutoHide. Add hidden visibility
+ // to the symbol to preserve the property.
+ if (NewLinkage == GlobalValue::WeakODRLinkage &&
+ GS->second->canAutoHide()) {
+ assert(GV.hasLinkOnceODRLinkage() && GV.hasGlobalUnnamedAddr());
GV.setVisibility(GlobalValue::HiddenVisibility);
+ }
LLVM_DEBUG(dbgs() << "ODR fixing up linkage for `" << GV.getName()
<< "` from " << GV.getLinkage() << " to " << NewLinkage
@@ -1047,9 +1055,10 @@ static Function *replaceAliasWithAliasee(Module *SrcModule, GlobalAlias *GA) {
ValueToValueMapTy VMap;
Function *NewFn = CloneFunction(Fn, VMap);
- // Clone should use the original alias's linkage and name, and we ensure
- // all uses of alias instead use the new clone (casted if necessary).
+ // Clone should use the original alias's linkage, visibility and name, and we
+ // ensure all uses of alias instead use the new clone (casted if necessary).
NewFn->setLinkage(GA->getLinkage());
+ NewFn->setVisibility(GA->getVisibility());
GA->replaceAllUsesWith(ConstantExpr::getBitCast(NewFn, GA->getType()));
NewFn->takeName(GA);
return NewFn;
@@ -1057,7 +1066,7 @@ static Function *replaceAliasWithAliasee(Module *SrcModule, GlobalAlias *GA) {
// Internalize values that we marked with specific attribute
// in processGlobalForThinLTO.
-static void internalizeImmutableGVs(Module &M) {
+static void internalizeGVsAfterImport(Module &M) {
for (auto &GV : M.globals())
// Skip GVs which have been converted to declarations
// by dropDeadSymbols.
@@ -1190,7 +1199,7 @@ Expected<bool> FunctionImporter::importFunctions(
NumImportedModules++;
}
- internalizeImmutableGVs(DestModule);
+ internalizeGVsAfterImport(DestModule);
NumImportedFunctions += (ImportedCount - ImportedGVCount);
NumImportedGlobalVars += ImportedGVCount;
diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp
index 34de87433367..86b7f3e49ee6 100644
--- a/lib/Transforms/IPO/GlobalDCE.cpp
+++ b/lib/Transforms/IPO/GlobalDCE.cpp
@@ -1,9 +1,8 @@
//===-- GlobalDCE.cpp - DCE unreachable internal functions ----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 3005aafd06b1..c4fb3ce77f6e 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -1,9 +1,8 @@
//===- GlobalOpt.cpp - Optimize Global Variables --------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -730,7 +729,8 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
break;
if (Idxs.size() == GEPI->getNumOperands()-1)
Changed |= OptimizeAwayTrappingUsesOfValue(
- GEPI, ConstantExpr::getGetElementPtr(nullptr, NewV, Idxs));
+ GEPI, ConstantExpr::getGetElementPtr(GEPI->getSourceElementType(),
+ NewV, Idxs));
if (GEPI->use_empty()) {
Changed = true;
GEPI->eraseFromParent();
@@ -906,9 +906,10 @@ OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy,
// Replace the cmp X, 0 with a use of the bool value.
// Sink the load to where the compare was, if atomic rules allow us to.
- Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", false, 0,
+ Value *LV = new LoadInst(InitBool->getValueType(), InitBool,
+ InitBool->getName() + ".val", false, 0,
LI->getOrdering(), LI->getSyncScopeID(),
- LI->isUnordered() ? (Instruction*)ICI : LI);
+ LI->isUnordered() ? (Instruction *)ICI : LI);
InitBoolUsed = true;
switch (ICI->getPredicate()) {
default: llvm_unreachable("Unknown ICmp Predicate!");
@@ -1041,7 +1042,8 @@ static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc,
}
// Insert a load from the global, and use it instead of the malloc.
- Value *NL = new LoadInst(GV, GV->getName()+".val", InsertPt);
+ Value *NL =
+ new LoadInst(GV->getValueType(), GV, GV->getName() + ".val", InsertPt);
U->replaceUsesOfWith(Alloc, NL);
}
}
@@ -1164,10 +1166,10 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
if (LoadInst *LI = dyn_cast<LoadInst>(V)) {
// This is a scalarized version of the load from the global. Just create
// a new Load of the scalarized global.
- Result = new LoadInst(GetHeapSROAValue(LI->getOperand(0), FieldNo,
- InsertedScalarizedValues,
- PHIsToRewrite),
- LI->getName()+".f"+Twine(FieldNo), LI);
+ Value *V = GetHeapSROAValue(LI->getOperand(0), FieldNo,
+ InsertedScalarizedValues, PHIsToRewrite);
+ Result = new LoadInst(V->getType()->getPointerElementType(), V,
+ LI->getName() + ".f" + Twine(FieldNo), LI);
} else {
PHINode *PN = cast<PHINode>(V);
// PN's type is pointer to struct. Make a new PHI of pointer to struct
@@ -1357,7 +1359,9 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
// Within the NullPtrBlock, we need to emit a comparison and branch for each
// pointer, because some may be null while others are not.
for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
- Value *GVVal = new LoadInst(FieldGlobals[i], "tmp", NullPtrBlock);
+ Value *GVVal =
+ new LoadInst(cast<GlobalVariable>(FieldGlobals[i])->getValueType(),
+ FieldGlobals[i], "tmp", NullPtrBlock);
Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal,
Constant::getNullValue(GVVal->getType()));
BasicBlock *FreeBlock = BasicBlock::Create(Cmp->getContext(), "free_it",
@@ -1650,6 +1654,9 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
for(auto *GVe : GVs){
DIGlobalVariable *DGV = GVe->getVariable();
DIExpression *E = GVe->getExpression();
+ const DataLayout &DL = GV->getParent()->getDataLayout();
+ unsigned SizeInOctets =
+ DL.getTypeAllocSizeInBits(NewGV->getType()->getElementType()) / 8;
// It is expected that the address of global optimized variable is on
// top of the stack. After optimization, value of that variable will
@@ -1660,10 +1667,12 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
// DW_OP_deref DW_OP_constu <ValMinus>
// DW_OP_mul DW_OP_constu <ValInit> DW_OP_plus DW_OP_stack_value
SmallVector<uint64_t, 12> Ops = {
- dwarf::DW_OP_deref, dwarf::DW_OP_constu, ValMinus,
- dwarf::DW_OP_mul, dwarf::DW_OP_constu, ValInit,
+ dwarf::DW_OP_deref_size, SizeInOctets,
+ dwarf::DW_OP_constu, ValMinus,
+ dwarf::DW_OP_mul, dwarf::DW_OP_constu, ValInit,
dwarf::DW_OP_plus};
- E = DIExpression::prependOpcodes(E, Ops, DIExpression::WithStackValue);
+ bool WithStackValue = true;
+ E = DIExpression::prependOpcodes(E, Ops, WithStackValue);
DIGlobalVariableExpression *DGVE =
DIGlobalVariableExpression::get(NewGV->getContext(), DGV, E);
NewGV->addDebugInfo(DGVE);
@@ -1701,7 +1710,8 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) {
assert(LI->getOperand(0) == GV && "Not a copy!");
// Insert a new load, to preserve the saved value.
- StoreVal = new LoadInst(NewGV, LI->getName()+".b", false, 0,
+ StoreVal = new LoadInst(NewGV->getValueType(), NewGV,
+ LI->getName() + ".b", false, 0,
LI->getOrdering(), LI->getSyncScopeID(), LI);
} else {
assert((isa<CastInst>(StoredVal) || isa<SelectInst>(StoredVal)) &&
@@ -1717,8 +1727,9 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
} else {
// Change the load into a load of bool then a select.
LoadInst *LI = cast<LoadInst>(UI);
- LoadInst *NLI = new LoadInst(NewGV, LI->getName()+".b", false, 0,
- LI->getOrdering(), LI->getSyncScopeID(), LI);
+ LoadInst *NLI =
+ new LoadInst(NewGV->getValueType(), NewGV, LI->getName() + ".b",
+ false, 0, LI->getOrdering(), LI->getSyncScopeID(), LI);
Instruction *NSI;
if (IsOneZero)
NSI = new ZExtInst(NLI, LI->getType(), "", LI);
@@ -1970,7 +1981,12 @@ static bool processInternalGlobal(
}
if (GS.StoredType <= GlobalStatus::InitializerStored) {
LLVM_DEBUG(dbgs() << "MARKING CONSTANT: " << *GV << "\n");
- GV->setConstant(true);
+
+ // Don't actually mark a global constant if it's atomic because atomic loads
+ // are implemented by a trivial cmpxchg in some edge-cases and that usually
+ // requires write access to the variable even if it's not actually changed.
+ if (GS.Ordering == AtomicOrdering::NotAtomic)
+ GV->setConstant(true);
// Clean up any obviously simplifiable users now.
CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, TLI);
@@ -2084,21 +2100,21 @@ static void ChangeCalleesToFastCall(Function *F) {
}
}
-static AttributeList StripNest(LLVMContext &C, AttributeList Attrs) {
- // There can be at most one attribute set with a nest attribute.
- unsigned NestIndex;
- if (Attrs.hasAttrSomewhere(Attribute::Nest, &NestIndex))
- return Attrs.removeAttribute(C, NestIndex, Attribute::Nest);
+static AttributeList StripAttr(LLVMContext &C, AttributeList Attrs,
+ Attribute::AttrKind A) {
+ unsigned AttrIndex;
+ if (Attrs.hasAttrSomewhere(A, &AttrIndex))
+ return Attrs.removeAttribute(C, AttrIndex, A);
return Attrs;
}
-static void RemoveNestAttribute(Function *F) {
- F->setAttributes(StripNest(F->getContext(), F->getAttributes()));
+static void RemoveAttribute(Function *F, Attribute::AttrKind A) {
+ F->setAttributes(StripAttr(F->getContext(), F->getAttributes(), A));
for (User *U : F->users()) {
if (isa<BlockAddress>(U))
continue;
CallSite CS(cast<Instruction>(U));
- CS.setAttributes(StripNest(F->getContext(), CS.getAttributes()));
+ CS.setAttributes(StripAttr(F->getContext(), CS.getAttributes(), A));
}
}
@@ -2113,13 +2129,6 @@ static bool hasChangeableCC(Function *F) {
if (CC != CallingConv::C && CC != CallingConv::X86_ThisCall)
return false;
- // Don't break the invariant that the inalloca parameter is the only parameter
- // passed in memory.
- // FIXME: GlobalOpt should remove inalloca when possible and hoist the dynamic
- // alloca it uses to the entry block if possible.
- if (F->getAttributes().hasAttrSomewhere(Attribute::InAlloca))
- return false;
-
// FIXME: Change CC for the whole chain of musttail calls when possible.
//
// Can't change CC of the function that either has musttail calls, or is a
@@ -2281,6 +2290,17 @@ OptimizeFunctions(Module &M, TargetLibraryInfo *TLI,
if (!F->hasLocalLinkage())
continue;
+ // If we have an inalloca parameter that we can safely remove the
+ // inalloca attribute from, do so. This unlocks optimizations that
+ // wouldn't be safe in the presence of inalloca.
+ // FIXME: We should also hoist alloca affected by this to the entry
+ // block if possible.
+ if (F->getAttributes().hasAttrSomewhere(Attribute::InAlloca) &&
+ !F->hasAddressTaken()) {
+ RemoveAttribute(F, Attribute::InAlloca);
+ Changed = true;
+ }
+
if (hasChangeableCC(F) && !F->isVarArg() && !F->hasAddressTaken()) {
NumInternalFunc++;
TargetTransformInfo &TTI = GetTTI(*F);
@@ -2289,8 +2309,8 @@ OptimizeFunctions(Module &M, TargetLibraryInfo *TLI,
// cold at all call sites and the callers contain no other non coldcc
// calls.
if (EnableColdCCStressTest ||
- (isValidCandidateForColdCC(*F, GetBFI, AllCallsCold) &&
- TTI.useColdCCForColdCall(*F))) {
+ (TTI.useColdCCForColdCall(*F) &&
+ isValidCandidateForColdCC(*F, GetBFI, AllCallsCold))) {
F->setCallingConv(CallingConv::Cold);
changeCallSitesToColdCC(F);
Changed = true;
@@ -2313,7 +2333,7 @@ OptimizeFunctions(Module &M, TargetLibraryInfo *TLI,
!F->hasAddressTaken()) {
// The function is not used by a trampoline intrinsic, so it is safe
// to remove the 'nest' attribute.
- RemoveNestAttribute(F);
+ RemoveAttribute(F, Attribute::Nest);
++NumNestRemoved;
Changed = true;
}
@@ -2808,46 +2828,20 @@ static Function *FindCXAAtExit(Module &M, TargetLibraryInfo *TLI) {
/// Returns whether the given function is an empty C++ destructor and can
/// therefore be eliminated.
/// Note that we assume that other optimization passes have already simplified
-/// the code so we only look for a function with a single basic block, where
-/// the only allowed instructions are 'ret', 'call' to an empty C++ dtor and
-/// other side-effect free instructions.
-static bool cxxDtorIsEmpty(const Function &Fn,
- SmallPtrSet<const Function *, 8> &CalledFunctions) {
+/// the code so we simply check for 'ret'.
+static bool cxxDtorIsEmpty(const Function &Fn) {
// FIXME: We could eliminate C++ destructors if they're readonly/readnone and
// nounwind, but that doesn't seem worth doing.
if (Fn.isDeclaration())
return false;
- if (++Fn.begin() != Fn.end())
- return false;
-
- const BasicBlock &EntryBlock = Fn.getEntryBlock();
- for (BasicBlock::const_iterator I = EntryBlock.begin(), E = EntryBlock.end();
- I != E; ++I) {
- if (const CallInst *CI = dyn_cast<CallInst>(I)) {
- // Ignore debug intrinsics.
- if (isa<DbgInfoIntrinsic>(CI))
- continue;
-
- const Function *CalledFn = CI->getCalledFunction();
-
- if (!CalledFn)
- return false;
-
- SmallPtrSet<const Function *, 8> NewCalledFunctions(CalledFunctions);
-
- // Don't treat recursive functions as empty.
- if (!NewCalledFunctions.insert(CalledFn).second)
- return false;
-
- if (!cxxDtorIsEmpty(*CalledFn, NewCalledFunctions))
- return false;
- } else if (isa<ReturnInst>(*I))
- return true; // We're done.
- else if (I->mayHaveSideEffects())
- return false; // Destructor with side effects, bail.
+ for (auto &I : Fn.getEntryBlock()) {
+ if (isa<DbgInfoIntrinsic>(I))
+ continue;
+ if (isa<ReturnInst>(I))
+ return true;
+ break;
}
-
return false;
}
@@ -2879,11 +2873,7 @@ static bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) {
Function *DtorFn =
dyn_cast<Function>(CI->getArgOperand(0)->stripPointerCasts());
- if (!DtorFn)
- continue;
-
- SmallPtrSet<const Function *, 8> CalledFunctions;
- if (!cxxDtorIsEmpty(*DtorFn, CalledFunctions))
+ if (!DtorFn || !cxxDtorIsEmpty(*DtorFn))
continue;
// Just remove the call.
diff --git a/lib/Transforms/IPO/GlobalSplit.cpp b/lib/Transforms/IPO/GlobalSplit.cpp
index 792f4b3052a3..060043a40b89 100644
--- a/lib/Transforms/IPO/GlobalSplit.cpp
+++ b/lib/Transforms/IPO/GlobalSplit.cpp
@@ -1,9 +1,8 @@
//===- GlobalSplit.cpp - global variable splitter -------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/IPO/HotColdSplitting.cpp b/lib/Transforms/IPO/HotColdSplitting.cpp
index 924a7d5fbd9c..ab1a9a79cad6 100644
--- a/lib/Transforms/IPO/HotColdSplitting.cpp
+++ b/lib/Transforms/IPO/HotColdSplitting.cpp
@@ -1,16 +1,28 @@
//===- HotColdSplitting.cpp -- Outline Cold Regions -------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
-// Outline cold regions to a separate function.
-// TODO: Update BFI and BPI
-// TODO: Add all the outlined functions to a separate section.
-//
+///
+/// \file
+/// The goal of hot/cold splitting is to improve the memory locality of code.
+/// The splitting pass does this by identifying cold blocks and moving them into
+/// separate functions.
+///
+/// When the splitting pass finds a cold block (referred to as "the sink"), it
+/// grows a maximal cold region around that block. The maximal region contains
+/// all blocks (post-)dominated by the sink [*]. In theory, these blocks are as
+/// cold as the sink. Once a region is found, it's split out of the original
+/// function provided it's profitable to do so.
+///
+/// [*] In practice, there is some added complexity because some blocks are not
+/// safe to extract.
+///
+/// TODO: Use the PM to get domtrees, and preserve BFI/BPI.
+/// TODO: Reorder outlined functions.
+///
//===----------------------------------------------------------------------===//
#include "llvm/ADT/PostOrderIterator.h"
@@ -53,7 +65,6 @@
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/CodeExtractor.h"
#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Utils/SSAUpdater.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#include <algorithm>
#include <cassert>
@@ -69,16 +80,12 @@ static cl::opt<bool> EnableStaticAnalyis("hot-cold-static-analysis",
cl::init(true), cl::Hidden);
static cl::opt<int>
- MinOutliningThreshold("min-outlining-thresh", cl::init(3), cl::Hidden,
- cl::desc("Code size threshold for outlining within a "
- "single BB (as a multiple of TCC_Basic)"));
+ SplittingThreshold("hotcoldsplit-threshold", cl::init(2), cl::Hidden,
+ cl::desc("Base penalty for splitting cold code (as a "
+ "multiple of TCC_Basic)"));
namespace {
-struct PostDomTree : PostDomTreeBase<BasicBlock> {
- PostDomTree(Function &F) { recalculate(F); }
-};
-
/// A sequence of basic blocks.
///
/// A 0-sized SmallVector is slightly cheaper to move than a std::vector.
@@ -101,13 +108,14 @@ bool blockEndsInUnreachable(const BasicBlock &BB) {
bool unlikelyExecuted(BasicBlock &BB) {
// Exception handling blocks are unlikely executed.
- if (BB.isEHPad())
+ if (BB.isEHPad() || isa<ResumeInst>(BB.getTerminator()))
return true;
- // The block is cold if it calls/invokes a cold function.
+ // The block is cold if it calls/invokes a cold function. However, do not
+ // mark sanitizer traps as cold.
for (Instruction &I : BB)
if (auto CS = CallSite(&I))
- if (CS.hasFnAttr(Attribute::Cold))
+ if (CS.hasFnAttr(Attribute::Cold) && !CS->getMetadata("nosanitize"))
return true;
// The block is cold if it has an unreachable terminator, unless it's
@@ -125,38 +133,39 @@ bool unlikelyExecuted(BasicBlock &BB) {
/// Check whether it's safe to outline \p BB.
static bool mayExtractBlock(const BasicBlock &BB) {
- return !BB.hasAddressTaken() && !BB.isEHPad();
-}
-
-/// Check whether \p Region is profitable to outline.
-static bool isProfitableToOutline(const BlockSequence &Region,
- TargetTransformInfo &TTI) {
- if (Region.size() > 1)
- return true;
-
- int Cost = 0;
- const BasicBlock &BB = *Region[0];
- for (const Instruction &I : BB) {
- if (isa<DbgInfoIntrinsic>(&I) || &I == BB.getTerminator())
- continue;
-
- Cost += TTI.getInstructionCost(&I, TargetTransformInfo::TCK_CodeSize);
-
- if (Cost >= (MinOutliningThreshold * TargetTransformInfo::TCC_Basic))
- return true;
- }
- return false;
+ // EH pads are unsafe to outline because doing so breaks EH type tables. It
+ // follows that invoke instructions cannot be extracted, because CodeExtractor
+ // requires unwind destinations to be within the extraction region.
+ //
+ // Resumes that are not reachable from a cleanup landing pad are considered to
+ // be unreachable. It’s not safe to split them out either.
+ auto Term = BB.getTerminator();
+ return !BB.hasAddressTaken() && !BB.isEHPad() && !isa<InvokeInst>(Term) &&
+ !isa<ResumeInst>(Term);
}
-/// Mark \p F cold. Return true if it's changed.
-static bool markEntireFunctionCold(Function &F) {
- assert(!F.hasFnAttribute(Attribute::OptimizeNone) && "Can't mark this cold");
+/// Mark \p F cold. Based on this assumption, also optimize it for minimum size.
+/// If \p UpdateEntryCount is true (set when this is a new split function and
+/// module has profile data), set entry count to 0 to ensure treated as cold.
+/// Return true if the function is changed.
+static bool markFunctionCold(Function &F, bool UpdateEntryCount = false) {
+ assert(!F.hasOptNone() && "Can't mark this cold");
bool Changed = false;
+ if (!F.hasFnAttribute(Attribute::Cold)) {
+ F.addFnAttr(Attribute::Cold);
+ Changed = true;
+ }
if (!F.hasFnAttribute(Attribute::MinSize)) {
F.addFnAttr(Attribute::MinSize);
Changed = true;
}
- // TODO: Move this function into a cold section.
+ if (UpdateEntryCount) {
+ // Set the entry count to 0 to ensure it is placed in the unlikely text
+ // section when function sections are enabled.
+ F.setEntryCount(0);
+ Changed = true;
+ }
+
return Changed;
}
@@ -165,24 +174,24 @@ public:
HotColdSplitting(ProfileSummaryInfo *ProfSI,
function_ref<BlockFrequencyInfo *(Function &)> GBFI,
function_ref<TargetTransformInfo &(Function &)> GTTI,
- std::function<OptimizationRemarkEmitter &(Function &)> *GORE)
- : PSI(ProfSI), GetBFI(GBFI), GetTTI(GTTI), GetORE(GORE) {}
+ std::function<OptimizationRemarkEmitter &(Function &)> *GORE,
+ function_ref<AssumptionCache *(Function &)> LAC)
+ : PSI(ProfSI), GetBFI(GBFI), GetTTI(GTTI), GetORE(GORE), LookupAC(LAC) {}
bool run(Module &M);
private:
+ bool isFunctionCold(const Function &F) const;
bool shouldOutlineFrom(const Function &F) const;
- bool outlineColdRegions(Function &F, ProfileSummaryInfo &PSI,
- BlockFrequencyInfo *BFI, TargetTransformInfo &TTI,
- DominatorTree &DT, PostDomTree &PDT,
- OptimizationRemarkEmitter &ORE);
+ bool outlineColdRegions(Function &F, bool HasProfileSummary);
Function *extractColdRegion(const BlockSequence &Region, DominatorTree &DT,
BlockFrequencyInfo *BFI, TargetTransformInfo &TTI,
- OptimizationRemarkEmitter &ORE, unsigned Count);
- SmallPtrSet<const Function *, 2> OutlinedFunctions;
+ OptimizationRemarkEmitter &ORE,
+ AssumptionCache *AC, unsigned Count);
ProfileSummaryInfo *PSI;
function_ref<BlockFrequencyInfo *(Function &)> GetBFI;
function_ref<TargetTransformInfo &(Function &)> GetTTI;
std::function<OptimizationRemarkEmitter &(Function &)> *GetORE;
+ function_ref<AssumptionCache *(Function &)> LookupAC;
};
class HotColdSplittingLegacyPass : public ModulePass {
@@ -193,10 +202,10 @@ public:
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<BlockFrequencyInfoWrapperPass>();
AU.addRequired<ProfileSummaryInfoWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addUsedIfAvailable<AssumptionCacheTracker>();
}
bool runOnModule(Module &M) override;
@@ -204,59 +213,141 @@ public:
} // end anonymous namespace
-// Returns false if the function should not be considered for hot-cold split
-// optimization.
-bool HotColdSplitting::shouldOutlineFrom(const Function &F) const {
- // Do not try to outline again from an already outlined cold function.
- if (OutlinedFunctions.count(&F))
- return false;
+/// Check whether \p F is inherently cold.
+bool HotColdSplitting::isFunctionCold(const Function &F) const {
+ if (F.hasFnAttribute(Attribute::Cold))
+ return true;
- if (F.size() <= 2)
- return false;
+ if (F.getCallingConv() == CallingConv::Cold)
+ return true;
- // TODO: Consider only skipping functions marked `optnone` or `cold`.
+ if (PSI->isFunctionEntryCold(&F))
+ return true;
- if (F.hasAddressTaken())
- return false;
+ return false;
+}
+// Returns false if the function should not be considered for hot-cold split
+// optimization.
+bool HotColdSplitting::shouldOutlineFrom(const Function &F) const {
if (F.hasFnAttribute(Attribute::AlwaysInline))
return false;
if (F.hasFnAttribute(Attribute::NoInline))
return false;
- if (F.getCallingConv() == CallingConv::Cold)
+ if (F.hasFnAttribute(Attribute::SanitizeAddress) ||
+ F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
+ F.hasFnAttribute(Attribute::SanitizeThread) ||
+ F.hasFnAttribute(Attribute::SanitizeMemory))
return false;
- if (PSI->isFunctionEntryCold(&F))
- return false;
return true;
}
+/// Get the benefit score of outlining \p Region.
+static int getOutliningBenefit(ArrayRef<BasicBlock *> Region,
+ TargetTransformInfo &TTI) {
+ // Sum up the code size costs of non-terminator instructions. Tight coupling
+ // with \ref getOutliningPenalty is needed to model the costs of terminators.
+ int Benefit = 0;
+ for (BasicBlock *BB : Region)
+ for (Instruction &I : BB->instructionsWithoutDebug())
+ if (&I != BB->getTerminator())
+ Benefit +=
+ TTI.getInstructionCost(&I, TargetTransformInfo::TCK_CodeSize);
+
+ return Benefit;
+}
+
+/// Get the penalty score for outlining \p Region.
+static int getOutliningPenalty(ArrayRef<BasicBlock *> Region,
+ unsigned NumInputs, unsigned NumOutputs) {
+ int Penalty = SplittingThreshold;
+ LLVM_DEBUG(dbgs() << "Applying penalty for splitting: " << Penalty << "\n");
+
+ // If the splitting threshold is set at or below zero, skip the usual
+ // profitability check.
+ if (SplittingThreshold <= 0)
+ return Penalty;
+
+ // The typical code size cost for materializing an argument for the outlined
+ // call.
+ LLVM_DEBUG(dbgs() << "Applying penalty for: " << NumInputs << " inputs\n");
+ const int CostForArgMaterialization = TargetTransformInfo::TCC_Basic;
+ Penalty += CostForArgMaterialization * NumInputs;
+
+ // The typical code size cost for an output alloca, its associated store, and
+ // its associated reload.
+ LLVM_DEBUG(dbgs() << "Applying penalty for: " << NumOutputs << " outputs\n");
+ const int CostForRegionOutput = 3 * TargetTransformInfo::TCC_Basic;
+ Penalty += CostForRegionOutput * NumOutputs;
+
+ // Find the number of distinct exit blocks for the region. Use a conservative
+ // check to determine whether control returns from the region.
+ bool NoBlocksReturn = true;
+ SmallPtrSet<BasicBlock *, 2> SuccsOutsideRegion;
+ for (BasicBlock *BB : Region) {
+ // If a block has no successors, only assume it does not return if it's
+ // unreachable.
+ if (succ_empty(BB)) {
+ NoBlocksReturn &= isa<UnreachableInst>(BB->getTerminator());
+ continue;
+ }
+
+ for (BasicBlock *SuccBB : successors(BB)) {
+ if (find(Region, SuccBB) == Region.end()) {
+ NoBlocksReturn = false;
+ SuccsOutsideRegion.insert(SuccBB);
+ }
+ }
+ }
+
+ // Apply a `noreturn` bonus.
+ if (NoBlocksReturn) {
+ LLVM_DEBUG(dbgs() << "Applying bonus for: " << Region.size()
+ << " non-returning terminators\n");
+ Penalty -= Region.size();
+ }
+
+ // Apply a penalty for having more than one successor outside of the region.
+ // This penalty accounts for the switch needed in the caller.
+ if (!SuccsOutsideRegion.empty()) {
+ LLVM_DEBUG(dbgs() << "Applying penalty for: " << SuccsOutsideRegion.size()
+ << " non-region successors\n");
+ Penalty += (SuccsOutsideRegion.size() - 1) * TargetTransformInfo::TCC_Basic;
+ }
+
+ return Penalty;
+}
+
Function *HotColdSplitting::extractColdRegion(const BlockSequence &Region,
DominatorTree &DT,
BlockFrequencyInfo *BFI,
TargetTransformInfo &TTI,
OptimizationRemarkEmitter &ORE,
+ AssumptionCache *AC,
unsigned Count) {
assert(!Region.empty());
// TODO: Pass BFI and BPI to update profile information.
CodeExtractor CE(Region, &DT, /* AggregateArgs */ false, /* BFI */ nullptr,
- /* BPI */ nullptr, /* AllowVarArgs */ false,
+ /* BPI */ nullptr, AC, /* AllowVarArgs */ false,
/* AllowAlloca */ false,
/* Suffix */ "cold." + std::to_string(Count));
+ // Perform a simple cost/benefit analysis to decide whether or not to permit
+ // splitting.
SetVector<Value *> Inputs, Outputs, Sinks;
CE.findInputsOutputs(Inputs, Outputs, Sinks);
-
- // Do not extract regions that have live exit variables.
- if (Outputs.size() > 0) {
- LLVM_DEBUG(llvm::dbgs() << "Not outlining; live outputs\n");
+ int OutliningBenefit = getOutliningBenefit(Region, TTI);
+ int OutliningPenalty =
+ getOutliningPenalty(Region, Inputs.size(), Outputs.size());
+ LLVM_DEBUG(dbgs() << "Split profitability: benefit = " << OutliningBenefit
+ << ", penalty = " << OutliningPenalty << "\n");
+ if (OutliningBenefit <= OutliningPenalty)
return nullptr;
- }
- // TODO: Run MergeBasicBlockIntoOnlyPred on the outlined function.
Function *OrigF = Region[0]->getParent();
if (Function *OutF = CE.extractCodeRegion()) {
User *U = *OutF->user_begin();
@@ -269,9 +360,7 @@ Function *HotColdSplitting::extractColdRegion(const BlockSequence &Region,
}
CI->setIsNoInline();
- // Try to make the outlined code as small as possible on the assumption
- // that it's cold.
- markEntireFunctionCold(*OutF);
+ markFunctionCold(*OutF, BFI != nullptr);
LLVM_DEBUG(llvm::dbgs() << "Outlined Region: " << *OutF);
ORE.emit([&]() {
@@ -298,6 +387,8 @@ using BlockTy = std::pair<BasicBlock *, unsigned>;
namespace {
/// A maximal outlining region. This contains all blocks post-dominated by a
/// sink block, the sink block itself, and all blocks dominated by the sink.
+/// If sink-predecessors and sink-successors cannot be extracted in one region,
+/// the static constructor returns a list of suitable extraction regions.
class OutliningRegion {
/// A list of (block, score) pairs. A block's score is non-zero iff it's a
/// viable sub-region entry point. Blocks with higher scores are better entry
@@ -312,12 +403,9 @@ class OutliningRegion {
/// Whether the entire function is cold.
bool EntireFunctionCold = false;
- /// Whether or not \p BB could be the entry point of an extracted region.
- static bool isViableEntryPoint(BasicBlock &BB) { return !BB.isEHPad(); }
-
/// If \p BB is a viable entry point, return \p Score. Return 0 otherwise.
static unsigned getEntryPointScore(BasicBlock &BB, unsigned Score) {
- return isViableEntryPoint(BB) ? Score : 0;
+ return mayExtractBlock(BB) ? Score : 0;
}
/// These scores should be lower than the score for predecessor blocks,
@@ -333,21 +421,23 @@ public:
OutliningRegion(OutliningRegion &&) = default;
OutliningRegion &operator=(OutliningRegion &&) = default;
- static OutliningRegion create(BasicBlock &SinkBB, const DominatorTree &DT,
- const PostDomTree &PDT) {
- OutliningRegion ColdRegion;
-
+ static std::vector<OutliningRegion> create(BasicBlock &SinkBB,
+ const DominatorTree &DT,
+ const PostDominatorTree &PDT) {
+ std::vector<OutliningRegion> Regions;
SmallPtrSet<BasicBlock *, 4> RegionBlocks;
+ Regions.emplace_back();
+ OutliningRegion *ColdRegion = &Regions.back();
+
auto addBlockToRegion = [&](BasicBlock *BB, unsigned Score) {
RegionBlocks.insert(BB);
- ColdRegion.Blocks.emplace_back(BB, Score);
- assert(RegionBlocks.size() == ColdRegion.Blocks.size() && "Duplicate BB");
+ ColdRegion->Blocks.emplace_back(BB, Score);
};
// The ancestor farthest-away from SinkBB, and also post-dominated by it.
unsigned SinkScore = getEntryPointScore(SinkBB, ScoreForSinkBlock);
- ColdRegion.SuggestedEntryPoint = (SinkScore > 0) ? &SinkBB : nullptr;
+ ColdRegion->SuggestedEntryPoint = (SinkScore > 0) ? &SinkBB : nullptr;
unsigned BestScore = SinkScore;
// Visit SinkBB's ancestors using inverse DFS.
@@ -360,8 +450,8 @@ public:
// If the predecessor is cold and has no predecessors, the entire
// function must be cold.
if (SinkPostDom && pred_empty(&PredBB)) {
- ColdRegion.EntireFunctionCold = true;
- return ColdRegion;
+ ColdRegion->EntireFunctionCold = true;
+ return Regions;
}
// If SinkBB does not post-dominate a predecessor, do not mark the
@@ -376,7 +466,7 @@ public:
// considered as entry points before the sink block.
unsigned PredScore = getEntryPointScore(PredBB, PredIt.getPathLength());
if (PredScore > BestScore) {
- ColdRegion.SuggestedEntryPoint = &PredBB;
+ ColdRegion->SuggestedEntryPoint = &PredBB;
BestScore = PredScore;
}
@@ -384,9 +474,19 @@ public:
++PredIt;
}
- // Add SinkBB to the cold region. It's considered as an entry point before
- // any sink-successor blocks.
- addBlockToRegion(&SinkBB, SinkScore);
+ // If the sink can be added to the cold region, do so. It's considered as
+ // an entry point before any sink-successor blocks.
+ //
+ // Otherwise, split cold sink-successor blocks using a separate region.
+ // This satisfies the requirement that all extraction blocks other than the
+ // first have predecessors within the extraction region.
+ if (mayExtractBlock(SinkBB)) {
+ addBlockToRegion(&SinkBB, SinkScore);
+ } else {
+ Regions.emplace_back();
+ ColdRegion = &Regions.back();
+ BestScore = 0;
+ }
// Find all successors of SinkBB dominated by SinkBB using DFS.
auto SuccIt = ++df_begin(&SinkBB);
@@ -407,7 +507,7 @@ public:
unsigned SuccScore = getEntryPointScore(SuccBB, ScoreForSuccBlock);
if (SuccScore > BestScore) {
- ColdRegion.SuggestedEntryPoint = &SuccBB;
+ ColdRegion->SuggestedEntryPoint = &SuccBB;
BestScore = SuccScore;
}
@@ -415,7 +515,7 @@ public:
++SuccIt;
}
- return ColdRegion;
+ return Regions;
}
/// Whether this region has nothing to extract.
@@ -461,11 +561,7 @@ public:
};
} // namespace
-bool HotColdSplitting::outlineColdRegions(Function &F, ProfileSummaryInfo &PSI,
- BlockFrequencyInfo *BFI,
- TargetTransformInfo &TTI,
- DominatorTree &DT, PostDomTree &PDT,
- OptimizationRemarkEmitter &ORE) {
+bool HotColdSplitting::outlineColdRegions(Function &F, bool HasProfileSummary) {
bool Changed = false;
// The set of cold blocks.
@@ -479,17 +575,28 @@ bool HotColdSplitting::outlineColdRegions(Function &F, ProfileSummaryInfo &PSI,
// the first region to contain a block.
ReversePostOrderTraversal<Function *> RPOT(&F);
+ // Calculate domtrees lazily. This reduces compile-time significantly.
+ std::unique_ptr<DominatorTree> DT;
+ std::unique_ptr<PostDominatorTree> PDT;
+
+ // Calculate BFI lazily (it's only used to query ProfileSummaryInfo). This
+ // reduces compile-time significantly. TODO: When we *do* use BFI, we should
+ // be able to salvage its domtrees instead of recomputing them.
+ BlockFrequencyInfo *BFI = nullptr;
+ if (HasProfileSummary)
+ BFI = GetBFI(F);
+
+ TargetTransformInfo &TTI = GetTTI(F);
+ OptimizationRemarkEmitter &ORE = (*GetORE)(F);
+ AssumptionCache *AC = LookupAC(F);
+
// Find all cold regions.
for (BasicBlock *BB : RPOT) {
- // Skip blocks which can't be outlined.
- if (!mayExtractBlock(*BB))
- continue;
-
// This block is already part of some outlining region.
if (ColdBlocks.count(BB))
continue;
- bool Cold = PSI.isColdBlock(BB, BFI) ||
+ bool Cold = (BFI && PSI->isColdBlock(BB, BFI)) ||
(EnableStaticAnalyis && unlikelyExecuted(*BB));
if (!Cold)
continue;
@@ -499,28 +606,35 @@ bool HotColdSplitting::outlineColdRegions(Function &F, ProfileSummaryInfo &PSI,
BB->dump();
});
- auto Region = OutliningRegion::create(*BB, DT, PDT);
- if (Region.empty())
- continue;
+ if (!DT)
+ DT = make_unique<DominatorTree>(F);
+ if (!PDT)
+ PDT = make_unique<PostDominatorTree>(F);
- if (Region.isEntireFunctionCold()) {
- LLVM_DEBUG(dbgs() << "Entire function is cold\n");
- return markEntireFunctionCold(F);
- }
+ auto Regions = OutliningRegion::create(*BB, *DT, *PDT);
+ for (OutliningRegion &Region : Regions) {
+ if (Region.empty())
+ continue;
- // If this outlining region intersects with another, drop the new region.
- //
- // TODO: It's theoretically possible to outline more by only keeping the
- // largest region which contains a block, but the extra bookkeeping to do
- // this is tricky/expensive.
- bool RegionsOverlap = any_of(Region.blocks(), [&](const BlockTy &Block) {
- return !ColdBlocks.insert(Block.first).second;
- });
- if (RegionsOverlap)
- continue;
+ if (Region.isEntireFunctionCold()) {
+ LLVM_DEBUG(dbgs() << "Entire function is cold\n");
+ return markFunctionCold(F);
+ }
+
+ // If this outlining region intersects with another, drop the new region.
+ //
+ // TODO: It's theoretically possible to outline more by only keeping the
+ // largest region which contains a block, but the extra bookkeeping to do
+ // this is tricky/expensive.
+ bool RegionsOverlap = any_of(Region.blocks(), [&](const BlockTy &Block) {
+ return !ColdBlocks.insert(Block.first).second;
+ });
+ if (RegionsOverlap)
+ continue;
- OutliningWorklist.emplace_back(std::move(Region));
- ++NumColdRegionsFound;
+ OutliningWorklist.emplace_back(std::move(Region));
+ ++NumColdRegionsFound;
+ }
}
// Outline single-entry cold regions, splitting up larger regions as needed.
@@ -529,26 +643,17 @@ bool HotColdSplitting::outlineColdRegions(Function &F, ProfileSummaryInfo &PSI,
OutliningRegion Region = OutliningWorklist.pop_back_val();
assert(!Region.empty() && "Empty outlining region in worklist");
do {
- BlockSequence SubRegion = Region.takeSingleEntrySubRegion(DT);
- if (!isProfitableToOutline(SubRegion, TTI)) {
- LLVM_DEBUG({
- dbgs() << "Skipping outlining; not profitable to outline\n";
- SubRegion[0]->dump();
- });
- continue;
- }
-
+ BlockSequence SubRegion = Region.takeSingleEntrySubRegion(*DT);
LLVM_DEBUG({
dbgs() << "Hot/cold splitting attempting to outline these blocks:\n";
for (BasicBlock *BB : SubRegion)
BB->dump();
});
- Function *Outlined =
- extractColdRegion(SubRegion, DT, BFI, TTI, ORE, OutlinedFunctionID);
+ Function *Outlined = extractColdRegion(SubRegion, *DT, BFI, TTI, ORE, AC,
+ OutlinedFunctionID);
if (Outlined) {
++OutlinedFunctionID;
- OutlinedFunctions.insert(Outlined);
Changed = true;
}
} while (!Region.empty());
@@ -559,20 +664,31 @@ bool HotColdSplitting::outlineColdRegions(Function &F, ProfileSummaryInfo &PSI,
bool HotColdSplitting::run(Module &M) {
bool Changed = false;
- OutlinedFunctions.clear();
- for (auto &F : M) {
+ bool HasProfileSummary = (M.getProfileSummary(/* IsCS */ false) != nullptr);
+ for (auto It = M.begin(), End = M.end(); It != End; ++It) {
+ Function &F = *It;
+
+ // Do not touch declarations.
+ if (F.isDeclaration())
+ continue;
+
+ // Do not modify `optnone` functions.
+ if (F.hasOptNone())
+ continue;
+
+ // Detect inherently cold functions and mark them as such.
+ if (isFunctionCold(F)) {
+ Changed |= markFunctionCold(F);
+ continue;
+ }
+
if (!shouldOutlineFrom(F)) {
LLVM_DEBUG(llvm::dbgs() << "Skipping " << F.getName() << "\n");
continue;
}
+
LLVM_DEBUG(llvm::dbgs() << "Outlining in " << F.getName() << "\n");
- DominatorTree DT(F);
- PostDomTree PDT(F);
- PDT.recalculate(F);
- BlockFrequencyInfo *BFI = GetBFI(F);
- TargetTransformInfo &TTI = GetTTI(F);
- OptimizationRemarkEmitter &ORE = (*GetORE)(F);
- Changed |= outlineColdRegions(F, *PSI, BFI, TTI, DT, PDT, ORE);
+ Changed |= outlineColdRegions(F, HasProfileSummary);
}
return Changed;
}
@@ -594,17 +710,21 @@ bool HotColdSplittingLegacyPass::runOnModule(Module &M) {
ORE.reset(new OptimizationRemarkEmitter(&F));
return *ORE.get();
};
+ auto LookupAC = [this](Function &F) -> AssumptionCache * {
+ if (auto *ACT = getAnalysisIfAvailable<AssumptionCacheTracker>())
+ return ACT->lookupAssumptionCache(F);
+ return nullptr;
+ };
- return HotColdSplitting(PSI, GBFI, GTTI, &GetORE).run(M);
+ return HotColdSplitting(PSI, GBFI, GTTI, &GetORE, LookupAC).run(M);
}
PreservedAnalyses
HotColdSplittingPass::run(Module &M, ModuleAnalysisManager &AM) {
auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
- std::function<AssumptionCache &(Function &)> GetAssumptionCache =
- [&FAM](Function &F) -> AssumptionCache & {
- return FAM.getResult<AssumptionAnalysis>(F);
+ auto LookupAC = [&FAM](Function &F) -> AssumptionCache * {
+ return FAM.getCachedResult<AssumptionAnalysis>(F);
};
auto GBFI = [&FAM](Function &F) {
@@ -625,7 +745,7 @@ HotColdSplittingPass::run(Module &M, ModuleAnalysisManager &AM) {
ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);
- if (HotColdSplitting(PSI, GBFI, GTTI, &GetORE).run(M))
+ if (HotColdSplitting(PSI, GBFI, GTTI, &GetORE, LookupAC).run(M))
return PreservedAnalyses::none();
return PreservedAnalyses::all();
}
diff --git a/lib/Transforms/IPO/IPConstantPropagation.cpp b/lib/Transforms/IPO/IPConstantPropagation.cpp
index 7d55ebecbf92..7dc4d9ee9e34 100644
--- a/lib/Transforms/IPO/IPConstantPropagation.cpp
+++ b/lib/Transforms/IPO/IPConstantPropagation.cpp
@@ -1,9 +1,8 @@
//===-- IPConstantPropagation.cpp - Propagate constants through calls -----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -62,32 +61,55 @@ static bool PropagateConstantsIntoArguments(Function &F) {
// Ignore blockaddress uses.
if (isa<BlockAddress>(UR)) continue;
- // Used by a non-instruction, or not the callee of a function, do not
- // transform.
- if (!isa<CallInst>(UR) && !isa<InvokeInst>(UR))
+ // If no abstract call site was created we did not understand the use, bail.
+ AbstractCallSite ACS(&U);
+ if (!ACS)
return false;
- CallSite CS(cast<Instruction>(UR));
- if (!CS.isCallee(&U))
+ // Mismatched argument count is undefined behavior. Simply bail out to avoid
+ // handling of such situations below (avoiding asserts/crashes).
+ unsigned NumActualArgs = ACS.getNumArgOperands();
+ if (F.isVarArg() ? ArgumentConstants.size() > NumActualArgs
+ : ArgumentConstants.size() != NumActualArgs)
return false;
// Check out all of the potentially constant arguments. Note that we don't
// inspect varargs here.
- CallSite::arg_iterator AI = CS.arg_begin();
Function::arg_iterator Arg = F.arg_begin();
- for (unsigned i = 0, e = ArgumentConstants.size(); i != e;
- ++i, ++AI, ++Arg) {
+ for (unsigned i = 0, e = ArgumentConstants.size(); i != e; ++i, ++Arg) {
// If this argument is known non-constant, ignore it.
if (ArgumentConstants[i].second)
continue;
- Constant *C = dyn_cast<Constant>(*AI);
+ Value *V = ACS.getCallArgOperand(i);
+ Constant *C = dyn_cast_or_null<Constant>(V);
+
+ // Mismatched argument type is undefined behavior. Simply bail out to avoid
+ // handling of such situations below (avoiding asserts/crashes).
+ if (C && Arg->getType() != C->getType())
+ return false;
+
+ // We can only propagate thread independent values through callbacks.
+ // This is different to direct/indirect call sites because for them we
+ // know the thread executing the caller and callee is the same. For
+ // callbacks this is not guaranteed, thus a thread dependent value could
+ // be different for the caller and callee, making it invalid to propagate.
+ if (C && ACS.isCallbackCall() && C->isThreadDependent()) {
+ // Argument became non-constant. If all arguments are non-constant now,
+ // give up on this function.
+ if (++NumNonconstant == ArgumentConstants.size())
+ return false;
+
+ ArgumentConstants[i].second = true;
+ continue;
+ }
+
if (C && ArgumentConstants[i].first == nullptr) {
ArgumentConstants[i].first = C; // First constant seen.
} else if (C && ArgumentConstants[i].first == C) {
// Still the constant value we think it is.
- } else if (*AI == &*Arg) {
+ } else if (V == &*Arg) {
// Ignore recursive calls passing argument down.
} else {
// Argument became non-constant. If all arguments are non-constant now,
diff --git a/lib/Transforms/IPO/IPO.cpp b/lib/Transforms/IPO/IPO.cpp
index 973382e2b097..34db75dd8b03 100644
--- a/lib/Transforms/IPO/IPO.cpp
+++ b/lib/Transforms/IPO/IPO.cpp
@@ -1,9 +1,8 @@
//===-- IPO.cpp -----------------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -46,6 +45,7 @@ void llvm::initializeIPO(PassRegistry &Registry) {
initializeLowerTypeTestsPass(Registry);
initializeMergeFunctionsPass(Registry);
initializePartialInlinerLegacyPassPass(Registry);
+ initializeAttributorLegacyPassPass(Registry);
initializePostOrderFunctionAttrsLegacyPassPass(Registry);
initializeReversePostOrderFunctionAttrsLegacyPassPass(Registry);
initializePruneEHPass(Registry);
diff --git a/lib/Transforms/IPO/InferFunctionAttrs.cpp b/lib/Transforms/IPO/InferFunctionAttrs.cpp
index 470f97b8ba61..7f5511e008e1 100644
--- a/lib/Transforms/IPO/InferFunctionAttrs.cpp
+++ b/lib/Transforms/IPO/InferFunctionAttrs.cpp
@@ -1,9 +1,8 @@
//===- InferFunctionAttrs.cpp - Infer implicit function attributes --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -26,7 +25,7 @@ static bool inferAllPrototypeAttributes(Module &M,
for (Function &F : M.functions())
// We only infer things using the prototype and the name; we don't need
// definitions.
- if (F.isDeclaration() && !F.hasFnAttribute((Attribute::OptimizeNone)))
+ if (F.isDeclaration() && !F.hasOptNone())
Changed |= inferLibFuncAttributes(F, TLI);
return Changed;
diff --git a/lib/Transforms/IPO/InlineSimple.cpp b/lib/Transforms/IPO/InlineSimple.cpp
index 82bba1e5c93b..efb71b73cbb7 100644
--- a/lib/Transforms/IPO/InlineSimple.cpp
+++ b/lib/Transforms/IPO/InlineSimple.cpp
@@ -1,9 +1,8 @@
//===- InlineSimple.cpp - Code to perform simple function inlining --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -69,9 +68,9 @@ public:
[&](Function &F) -> AssumptionCache & {
return ACT->getAssumptionCache(F);
};
- return llvm::getInlineCost(CS, Params, TTI, GetAssumptionCache,
- /*GetBFI=*/None, PSI,
- RemarksEnabled ? &ORE : nullptr);
+ return llvm::getInlineCost(
+ cast<CallBase>(*CS.getInstruction()), Params, TTI, GetAssumptionCache,
+ /*GetBFI=*/None, PSI, RemarksEnabled ? &ORE : nullptr);
}
bool runOnSCC(CallGraphSCC &SCC) override;
diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp
index 66a6f80f31e4..945f8affae6e 100644
--- a/lib/Transforms/IPO/Inliner.cpp
+++ b/lib/Transforms/IPO/Inliner.cpp
@@ -1,9 +1,8 @@
//===- Inliner.cpp - Code common to all inliners --------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -672,7 +671,7 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
LLVM_DEBUG(dbgs() << " -> Deleting dead call: " << *Instr << "\n");
// Update the call graph by deleting the edge from Callee to Caller.
setInlineRemark(CS, "trivially dead");
- CG[Caller]->removeCallEdgeFor(CS);
+ CG[Caller]->removeCallEdgeFor(*cast<CallBase>(CS.getInstruction()));
Instr->eraseFromParent();
++NumCallsDeleted;
} else {
@@ -974,7 +973,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
LazyCallGraph::Node &N = *CG.lookup(F);
if (CG.lookupSCC(N) != C)
continue;
- if (F.hasFnAttribute(Attribute::OptimizeNone)) {
+ if (F.hasOptNone()) {
setInlineRemark(Calls[i].first, "optnone attribute");
continue;
}
@@ -1006,8 +1005,12 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
auto GetInlineCost = [&](CallSite CS) {
Function &Callee = *CS.getCalledFunction();
auto &CalleeTTI = FAM.getResult<TargetIRAnalysis>(Callee);
- return getInlineCost(CS, Params, CalleeTTI, GetAssumptionCache, {GetBFI},
- PSI, &ORE);
+ bool RemarksEnabled =
+ Callee.getContext().getDiagHandlerPtr()->isMissedOptRemarkEnabled(
+ DEBUG_TYPE);
+ return getInlineCost(cast<CallBase>(*CS.getInstruction()), Params,
+ CalleeTTI, GetAssumptionCache, {GetBFI}, PSI,
+ RemarksEnabled ? &ORE : nullptr);
};
// Now process as many calls as we have within this caller in the sequnece.
diff --git a/lib/Transforms/IPO/Internalize.cpp b/lib/Transforms/IPO/Internalize.cpp
index a6542d28dfd8..2e269604e379 100644
--- a/lib/Transforms/IPO/Internalize.cpp
+++ b/lib/Transforms/IPO/Internalize.cpp
@@ -1,9 +1,8 @@
//===-- Internalize.cpp - Mark functions internal -------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -28,11 +27,11 @@
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/LineIterator.h"
+#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Utils/GlobalStatus.h"
-#include <fstream>
-#include <set>
using namespace llvm;
#define DEBUG_TYPE "internalize"
@@ -73,18 +72,15 @@ private:
void LoadFile(StringRef Filename) {
// Load the APIFile...
- std::ifstream In(Filename.data());
- if (!In.good()) {
+ ErrorOr<std::unique_ptr<MemoryBuffer>> Buf =
+ MemoryBuffer::getFile(Filename);
+ if (!Buf) {
errs() << "WARNING: Internalize couldn't load file '" << Filename
<< "'! Continuing as if it's empty.\n";
return; // Just continue as if the file were empty
}
- while (In) {
- std::string Symbol;
- In >> Symbol;
- if (!Symbol.empty())
- ExternalNames.insert(Symbol);
- }
+ for (line_iterator I(*Buf->get(), true), E; I != E; ++I)
+ ExternalNames.insert(*I);
}
};
} // end anonymous namespace
@@ -114,7 +110,7 @@ bool InternalizePass::shouldPreserveGV(const GlobalValue &GV) {
}
bool InternalizePass::maybeInternalize(
- GlobalValue &GV, const std::set<const Comdat *> &ExternalComdats) {
+ GlobalValue &GV, const DenseSet<const Comdat *> &ExternalComdats) {
if (Comdat *C = GV.getComdat()) {
if (ExternalComdats.count(C))
return false;
@@ -141,7 +137,7 @@ bool InternalizePass::maybeInternalize(
// If GV is part of a comdat and is externally visible, keep track of its
// comdat so that we don't internalize any of its members.
void InternalizePass::checkComdatVisibility(
- GlobalValue &GV, std::set<const Comdat *> &ExternalComdats) {
+ GlobalValue &GV, DenseSet<const Comdat *> &ExternalComdats) {
Comdat *C = GV.getComdat();
if (!C)
return;
@@ -158,7 +154,7 @@ bool InternalizePass::internalizeModule(Module &M, CallGraph *CG) {
collectUsedGlobalVariables(M, Used, false);
// Collect comdat visiblity information for the module.
- std::set<const Comdat *> ExternalComdats;
+ DenseSet<const Comdat *> ExternalComdats;
if (!M.getComdatSymbolTable().empty()) {
for (Function &F : M)
checkComdatVisibility(F, ExternalComdats);
diff --git a/lib/Transforms/IPO/LoopExtractor.cpp b/lib/Transforms/IPO/LoopExtractor.cpp
index 733235d45a09..91c7b5f5f135 100644
--- a/lib/Transforms/IPO/LoopExtractor.cpp
+++ b/lib/Transforms/IPO/LoopExtractor.cpp
@@ -1,9 +1,8 @@
//===- LoopExtractor.cpp - Extract each loop into a new function ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -15,6 +14,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
@@ -51,6 +51,7 @@ namespace {
AU.addRequiredID(LoopSimplifyID);
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
+ AU.addUsedIfAvailable<AssumptionCacheTracker>();
}
};
}
@@ -139,7 +140,10 @@ bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) {
if (ShouldExtractLoop) {
if (NumLoops == 0) return Changed;
--NumLoops;
- CodeExtractor Extractor(DT, *L);
+ AssumptionCache *AC = nullptr;
+ if (auto *ACT = getAnalysisIfAvailable<AssumptionCacheTracker>())
+ AC = ACT->lookupAssumptionCache(*L->getHeader()->getParent());
+ CodeExtractor Extractor(DT, *L, false, nullptr, nullptr, AC);
if (Extractor.extractCodeRegion() != nullptr) {
Changed = true;
// After extraction, the loop is replaced by a function call, so
diff --git a/lib/Transforms/IPO/LowerTypeTests.cpp b/lib/Transforms/IPO/LowerTypeTests.cpp
index 87c65db09517..f7371284f47e 100644
--- a/lib/Transforms/IPO/LowerTypeTests.cpp
+++ b/lib/Transforms/IPO/LowerTypeTests.cpp
@@ -1,9 +1,8 @@
//===- LowerTypeTests.cpp - type metadata lowering pass -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -549,10 +548,10 @@ ByteArrayInfo *LowerTypeTestsModule::createByteArray(BitSetInfo &BSI) {
}
void LowerTypeTestsModule::allocateByteArrays() {
- std::stable_sort(ByteArrayInfos.begin(), ByteArrayInfos.end(),
- [](const ByteArrayInfo &BAI1, const ByteArrayInfo &BAI2) {
- return BAI1.BitSize > BAI2.BitSize;
- });
+ llvm::stable_sort(ByteArrayInfos,
+ [](const ByteArrayInfo &BAI1, const ByteArrayInfo &BAI2) {
+ return BAI1.BitSize > BAI2.BitSize;
+ });
std::vector<uint64_t> ByteArrayOffsets(ByteArrayInfos.size());
@@ -619,7 +618,7 @@ Value *LowerTypeTestsModule::createBitSetTest(IRBuilder<> &B,
}
Value *ByteAddr = B.CreateGEP(Int8Ty, ByteArray, BitOffset);
- Value *Byte = B.CreateLoad(ByteAddr);
+ Value *Byte = B.CreateLoad(Int8Ty, ByteAddr);
Value *ByteAndMask =
B.CreateAnd(Byte, ConstantExpr::getPtrToInt(TIL.BitMask, Int8Ty));
@@ -1553,11 +1552,10 @@ void LowerTypeTestsModule::buildBitSetsFromDisjointSet(
// Order the sets of indices by size. The GlobalLayoutBuilder works best
// when given small index sets first.
- std::stable_sort(
- TypeMembers.begin(), TypeMembers.end(),
- [](const std::set<uint64_t> &O1, const std::set<uint64_t> &O2) {
- return O1.size() < O2.size();
- });
+ llvm::stable_sort(TypeMembers, [](const std::set<uint64_t> &O1,
+ const std::set<uint64_t> &O2) {
+ return O1.size() < O2.size();
+ });
// Create a GlobalLayoutBuilder and provide it with index sets as layout
// fragments. The GlobalLayoutBuilder tries to lay out members of fragments as
@@ -1693,6 +1691,14 @@ void LowerTypeTestsModule::replaceDirectCalls(Value *Old, Value *New) {
}
bool LowerTypeTestsModule::lower() {
+ // If only some of the modules were split, we cannot correctly perform
+ // this transformation. We already checked for the presense of type tests
+ // with partially split modules during the thin link, and would have emitted
+ // an error if any were found, so here we can simply return.
+ if ((ExportSummary && ExportSummary->partiallySplitLTOUnits()) ||
+ (ImportSummary && ImportSummary->partiallySplitLTOUnits()))
+ return false;
+
Function *TypeTestFunc =
M.getFunction(Intrinsic::getName(Intrinsic::type_test));
Function *ICallBranchFunnelFunc =
@@ -1702,13 +1708,6 @@ bool LowerTypeTestsModule::lower() {
!ExportSummary && !ImportSummary)
return false;
- // If only some of the modules were split, we cannot correctly handle
- // code that contains type tests.
- if (TypeTestFunc && !TypeTestFunc->use_empty() &&
- ((ExportSummary && ExportSummary->partiallySplitLTOUnits()) ||
- (ImportSummary && ImportSummary->partiallySplitLTOUnits())))
- report_fatal_error("inconsistent LTO Unit splitting with llvm.type.test");
-
if (ImportSummary) {
if (TypeTestFunc) {
for (auto UI = TypeTestFunc->use_begin(), UE = TypeTestFunc->use_end();
diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp
index 11efe95b10d4..3a08069dcd4a 100644
--- a/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/lib/Transforms/IPO/MergeFunctions.cpp
@@ -1,9 +1,8 @@
//===- MergeFunctions.cpp - Merge identical functions ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -190,8 +189,6 @@ public:
void replaceBy(Function *G) const {
F = G;
}
-
- void release() { F = nullptr; }
};
/// MergeFunctions finds functions which will generate identical machine code,
@@ -281,8 +278,8 @@ private:
// Replace G with an alias to F (deleting function G)
void writeAlias(Function *F, Function *G);
- // Replace G with an alias to F if possible, or a thunk to F if
- // profitable. Returns false if neither is the case.
+ // Replace G with an alias to F if possible, or a thunk to F if possible.
+ // Returns false if neither is the case.
bool writeThunkOrAlias(Function *F, Function *G);
/// Replace function F with function G in the function tree.
@@ -383,6 +380,11 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakTrackingVH> &Worklist) {
}
#endif
+/// Check whether \p F is eligible for function merging.
+static bool isEligibleForMerging(Function &F) {
+ return !F.isDeclaration() && !F.hasAvailableExternallyLinkage();
+}
+
bool MergeFunctions::runOnModule(Module &M) {
if (skipModule(M))
return false;
@@ -394,17 +396,12 @@ bool MergeFunctions::runOnModule(Module &M) {
std::vector<std::pair<FunctionComparator::FunctionHash, Function *>>
HashedFuncs;
for (Function &Func : M) {
- if (!Func.isDeclaration() && !Func.hasAvailableExternallyLinkage()) {
+ if (isEligibleForMerging(Func)) {
HashedFuncs.push_back({FunctionComparator::functionHash(Func), &Func});
}
}
- std::stable_sort(
- HashedFuncs.begin(), HashedFuncs.end(),
- [](const std::pair<FunctionComparator::FunctionHash, Function *> &a,
- const std::pair<FunctionComparator::FunctionHash, Function *> &b) {
- return a.first < b.first;
- });
+ llvm::stable_sort(HashedFuncs, less_first());
auto S = HashedFuncs.begin();
for (auto I = HashedFuncs.begin(), IE = HashedFuncs.end(); I != IE; ++I) {
@@ -654,12 +651,16 @@ void MergeFunctions::filterInstsUnrelatedToPDI(
LLVM_DEBUG(dbgs() << " }\n");
}
-// Don't merge tiny functions using a thunk, since it can just end up
-// making the function larger.
-static bool isThunkProfitable(Function * F) {
+/// Whether this function may be replaced by a forwarding thunk.
+static bool canCreateThunkFor(Function *F) {
+ if (F->isVarArg())
+ return false;
+
+ // Don't merge tiny functions using a thunk, since it can just end up
+ // making the function larger.
if (F->size() == 1) {
if (F->front().size() <= 2) {
- LLVM_DEBUG(dbgs() << "isThunkProfitable: " << F->getName()
+ LLVM_DEBUG(dbgs() << "canCreateThunkFor: " << F->getName()
<< " is too small to bother creating a thunk for\n");
return false;
}
@@ -695,6 +696,7 @@ void MergeFunctions::writeThunk(Function *F, Function *G) {
} else {
NewG = Function::Create(G->getFunctionType(), G->getLinkage(),
G->getAddressSpace(), "", G->getParent());
+ NewG->setComdat(G->getComdat());
BB = BasicBlock::Create(F->getContext(), "", NewG);
}
@@ -787,7 +789,7 @@ bool MergeFunctions::writeThunkOrAlias(Function *F, Function *G) {
writeAlias(F, G);
return true;
}
- if (isThunkProfitable(F)) {
+ if (canCreateThunkFor(F)) {
writeThunk(F, G);
return true;
}
@@ -802,9 +804,9 @@ void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) {
// Both writeThunkOrAlias() calls below must succeed, either because we can
// create aliases for G and NewF, or because a thunk for F is profitable.
// F here has the same signature as NewF below, so that's what we check.
- if (!isThunkProfitable(F) && (!canCreateAliasFor(F) || !canCreateAliasFor(G))) {
+ if (!canCreateThunkFor(F) &&
+ (!canCreateAliasFor(F) || !canCreateAliasFor(G)))
return;
- }
// Make them both thunks to the same internal function.
Function *NewF = Function::Create(F->getFunctionType(), F->getLinkage(),
@@ -944,25 +946,7 @@ void MergeFunctions::remove(Function *F) {
// For each instruction used by the value, remove() the function that contains
// the instruction. This should happen right before a call to RAUW.
void MergeFunctions::removeUsers(Value *V) {
- std::vector<Value *> Worklist;
- Worklist.push_back(V);
- SmallPtrSet<Value*, 8> Visited;
- Visited.insert(V);
- while (!Worklist.empty()) {
- Value *V = Worklist.back();
- Worklist.pop_back();
-
- for (User *U : V->users()) {
- if (Instruction *I = dyn_cast<Instruction>(U)) {
- remove(I->getFunction());
- } else if (isa<GlobalValue>(U)) {
- // do nothing
- } else if (Constant *C = dyn_cast<Constant>(U)) {
- for (User *UU : C->users()) {
- if (!Visited.insert(UU).second)
- Worklist.push_back(UU);
- }
- }
- }
- }
+ for (User *U : V->users())
+ if (auto *I = dyn_cast<Instruction>(U))
+ remove(I->getFunction());
}
diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp
index da214a1d3b44..733782e8764d 100644
--- a/lib/Transforms/IPO/PartialInlining.cpp
+++ b/lib/Transforms/IPO/PartialInlining.cpp
@@ -1,9 +1,8 @@
//===- PartialInlining.cpp - Inline parts of functions --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -182,11 +181,11 @@ struct FunctionOutliningMultiRegionInfo {
// Container for outline regions
struct OutlineRegionInfo {
- OutlineRegionInfo(SmallVector<BasicBlock *, 8> Region,
+ OutlineRegionInfo(ArrayRef<BasicBlock *> Region,
BasicBlock *EntryBlock, BasicBlock *ExitBlock,
BasicBlock *ReturnBlock)
- : Region(Region), EntryBlock(EntryBlock), ExitBlock(ExitBlock),
- ReturnBlock(ReturnBlock) {}
+ : Region(Region.begin(), Region.end()), EntryBlock(EntryBlock),
+ ExitBlock(ExitBlock), ReturnBlock(ReturnBlock) {}
SmallVector<BasicBlock *, 8> Region;
BasicBlock *EntryBlock;
BasicBlock *ExitBlock;
@@ -200,10 +199,12 @@ struct PartialInlinerImpl {
PartialInlinerImpl(
std::function<AssumptionCache &(Function &)> *GetAC,
+ function_ref<AssumptionCache *(Function &)> LookupAC,
std::function<TargetTransformInfo &(Function &)> *GTTI,
Optional<function_ref<BlockFrequencyInfo &(Function &)>> GBFI,
ProfileSummaryInfo *ProfSI)
- : GetAssumptionCache(GetAC), GetTTI(GTTI), GetBFI(GBFI), PSI(ProfSI) {}
+ : GetAssumptionCache(GetAC), LookupAssumptionCache(LookupAC),
+ GetTTI(GTTI), GetBFI(GBFI), PSI(ProfSI) {}
bool run(Module &M);
// Main part of the transformation that calls helper functions to find
@@ -223,9 +224,11 @@ struct PartialInlinerImpl {
// Two constructors, one for single region outlining, the other for
// multi-region outlining.
FunctionCloner(Function *F, FunctionOutliningInfo *OI,
- OptimizationRemarkEmitter &ORE);
+ OptimizationRemarkEmitter &ORE,
+ function_ref<AssumptionCache *(Function &)> LookupAC);
FunctionCloner(Function *F, FunctionOutliningMultiRegionInfo *OMRI,
- OptimizationRemarkEmitter &ORE);
+ OptimizationRemarkEmitter &ORE,
+ function_ref<AssumptionCache *(Function &)> LookupAC);
~FunctionCloner();
// Prepare for function outlining: making sure there is only
@@ -261,11 +264,13 @@ struct PartialInlinerImpl {
std::unique_ptr<FunctionOutliningMultiRegionInfo> ClonedOMRI = nullptr;
std::unique_ptr<BlockFrequencyInfo> ClonedFuncBFI = nullptr;
OptimizationRemarkEmitter &ORE;
+ function_ref<AssumptionCache *(Function &)> LookupAC;
};
private:
int NumPartialInlining = 0;
std::function<AssumptionCache &(Function &)> *GetAssumptionCache;
+ function_ref<AssumptionCache *(Function &)> LookupAssumptionCache;
std::function<TargetTransformInfo &(Function &)> *GetTTI;
Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI;
ProfileSummaryInfo *PSI;
@@ -366,12 +371,17 @@ struct PartialInlinerLegacyPass : public ModulePass {
return ACT->getAssumptionCache(F);
};
+ auto LookupAssumptionCache = [ACT](Function &F) -> AssumptionCache * {
+ return ACT->lookupAssumptionCache(F);
+ };
+
std::function<TargetTransformInfo &(Function &)> GetTTI =
[&TTIWP](Function &F) -> TargetTransformInfo & {
return TTIWP->getTTI(F);
};
- return PartialInlinerImpl(&GetAssumptionCache, &GetTTI, NoneType::None, PSI)
+ return PartialInlinerImpl(&GetAssumptionCache, LookupAssumptionCache,
+ &GetTTI, NoneType::None, PSI)
.run(M);
}
};
@@ -525,7 +535,6 @@ PartialInlinerImpl::computeOutliningColdRegionsInfo(Function *F,
// assert(ReturnBlock && "ReturnBlock is NULL somehow!");
FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegInfo(
DominateVector, DominateVector.front(), ExitBlock, ReturnBlock);
- RegInfo.Region = DominateVector;
OutliningInfo->ORI.push_back(RegInfo);
#ifndef NDEBUG
if (TracePartialInlining) {
@@ -763,8 +772,13 @@ bool PartialInlinerImpl::shouldPartialInline(
Function *Caller = CS.getCaller();
auto &CalleeTTI = (*GetTTI)(*Callee);
- InlineCost IC = getInlineCost(CS, getInlineParams(), CalleeTTI,
- *GetAssumptionCache, GetBFI, PSI, &ORE);
+ bool RemarksEnabled =
+ Callee->getContext().getDiagHandlerPtr()->isMissedOptRemarkEnabled(
+ DEBUG_TYPE);
+ assert(Call && "invalid callsite for partial inline");
+ InlineCost IC = getInlineCost(cast<CallBase>(*Call), getInlineParams(),
+ CalleeTTI, *GetAssumptionCache, GetBFI, PSI,
+ RemarksEnabled ? &ORE : nullptr);
if (IC.isAlways()) {
ORE.emit([&]() {
@@ -798,7 +812,7 @@ bool PartialInlinerImpl::shouldPartialInline(
const DataLayout &DL = Caller->getParent()->getDataLayout();
// The savings of eliminating the call:
- int NonWeightedSavings = getCallsiteCost(CS, DL);
+ int NonWeightedSavings = getCallsiteCost(cast<CallBase>(*Call), DL);
BlockFrequency NormWeightedSavings(NonWeightedSavings);
// Weighted saving is smaller than weighted cost, return false
@@ -855,12 +869,12 @@ int PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB) {
continue;
if (CallInst *CI = dyn_cast<CallInst>(&I)) {
- InlineCost += getCallsiteCost(CallSite(CI), DL);
+ InlineCost += getCallsiteCost(*CI, DL);
continue;
}
if (InvokeInst *II = dyn_cast<InvokeInst>(&I)) {
- InlineCost += getCallsiteCost(CallSite(II), DL);
+ InlineCost += getCallsiteCost(*II, DL);
continue;
}
@@ -949,8 +963,9 @@ void PartialInlinerImpl::computeCallsiteToProfCountMap(
}
PartialInlinerImpl::FunctionCloner::FunctionCloner(
- Function *F, FunctionOutliningInfo *OI, OptimizationRemarkEmitter &ORE)
- : OrigFunc(F), ORE(ORE) {
+ Function *F, FunctionOutliningInfo *OI, OptimizationRemarkEmitter &ORE,
+ function_ref<AssumptionCache *(Function &)> LookupAC)
+ : OrigFunc(F), ORE(ORE), LookupAC(LookupAC) {
ClonedOI = llvm::make_unique<FunctionOutliningInfo>();
// Clone the function, so that we can hack away on it.
@@ -973,8 +988,9 @@ PartialInlinerImpl::FunctionCloner::FunctionCloner(
PartialInlinerImpl::FunctionCloner::FunctionCloner(
Function *F, FunctionOutliningMultiRegionInfo *OI,
- OptimizationRemarkEmitter &ORE)
- : OrigFunc(F), ORE(ORE) {
+ OptimizationRemarkEmitter &ORE,
+ function_ref<AssumptionCache *(Function &)> LookupAC)
+ : OrigFunc(F), ORE(ORE), LookupAC(LookupAC) {
ClonedOMRI = llvm::make_unique<FunctionOutliningMultiRegionInfo>();
// Clone the function, so that we can hack away on it.
@@ -1112,7 +1128,9 @@ bool PartialInlinerImpl::FunctionCloner::doMultiRegionFunctionOutlining() {
int CurrentOutlinedRegionCost = ComputeRegionCost(RegionInfo.Region);
CodeExtractor CE(RegionInfo.Region, &DT, /*AggregateArgs*/ false,
- ClonedFuncBFI.get(), &BPI, /* AllowVarargs */ false);
+ ClonedFuncBFI.get(), &BPI,
+ LookupAC(*RegionInfo.EntryBlock->getParent()),
+ /* AllowVarargs */ false);
CE.findInputsOutputs(Inputs, Outputs, Sinks);
@@ -1194,7 +1212,7 @@ PartialInlinerImpl::FunctionCloner::doSingleRegionFunctionOutlining() {
// Extract the body of the if.
Function *OutlinedFunc =
CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false,
- ClonedFuncBFI.get(), &BPI,
+ ClonedFuncBFI.get(), &BPI, LookupAC(*ClonedFunc),
/* AllowVarargs */ true)
.extractCodeRegion();
@@ -1258,7 +1276,7 @@ std::pair<bool, Function *> PartialInlinerImpl::unswitchFunction(Function *F) {
std::unique_ptr<FunctionOutliningMultiRegionInfo> OMRI =
computeOutliningColdRegionsInfo(F, ORE);
if (OMRI) {
- FunctionCloner Cloner(F, OMRI.get(), ORE);
+ FunctionCloner Cloner(F, OMRI.get(), ORE, LookupAssumptionCache);
#ifndef NDEBUG
if (TracePartialInlining) {
@@ -1291,7 +1309,7 @@ std::pair<bool, Function *> PartialInlinerImpl::unswitchFunction(Function *F) {
if (!OI)
return {false, nullptr};
- FunctionCloner Cloner(F, OI.get(), ORE);
+ FunctionCloner Cloner(F, OI.get(), ORE, LookupAssumptionCache);
Cloner.NormalizeReturnBlock();
Function *OutlinedFunction = Cloner.doSingleRegionFunctionOutlining();
@@ -1485,6 +1503,10 @@ PreservedAnalyses PartialInlinerPass::run(Module &M,
return FAM.getResult<AssumptionAnalysis>(F);
};
+ auto LookupAssumptionCache = [&FAM](Function &F) -> AssumptionCache * {
+ return FAM.getCachedResult<AssumptionAnalysis>(F);
+ };
+
std::function<BlockFrequencyInfo &(Function &)> GetBFI =
[&FAM](Function &F) -> BlockFrequencyInfo & {
return FAM.getResult<BlockFrequencyAnalysis>(F);
@@ -1497,7 +1519,8 @@ PreservedAnalyses PartialInlinerPass::run(Module &M,
ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);
- if (PartialInlinerImpl(&GetAssumptionCache, &GetTTI, {GetBFI}, PSI)
+ if (PartialInlinerImpl(&GetAssumptionCache, LookupAssumptionCache, &GetTTI,
+ {GetBFI}, PSI)
.run(M))
return PreservedAnalyses::none();
return PreservedAnalyses::all();
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index 9764944dc332..3ea77f08fd3c 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -1,9 +1,8 @@
//===- PassManagerBuilder.cpp - Build Standard Pass -----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -31,6 +30,7 @@
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"
#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/Attributor.h"
#include "llvm/Transforms/IPO/ForceFunctionAttrs.h"
#include "llvm/Transforms/IPO/FunctionAttrs.h"
#include "llvm/Transforms/IPO/InferFunctionAttrs.h"
@@ -39,9 +39,13 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/GVN.h"
#include "llvm/Transforms/Scalar/InstSimplifyPass.h"
+#include "llvm/Transforms/Scalar/LICM.h"
+#include "llvm/Transforms/Scalar/LoopUnrollPass.h"
#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Vectorize.h"
+#include "llvm/Transforms/Vectorize/LoopVectorize.h"
+#include "llvm/Transforms/Vectorize/SLPVectorizer.h"
using namespace llvm;
@@ -50,14 +54,6 @@ static cl::opt<bool>
cl::ZeroOrMore, cl::desc("Run Partial inlinining pass"));
static cl::opt<bool>
- RunLoopVectorization("vectorize-loops", cl::Hidden,
- cl::desc("Run the Loop vectorization passes"));
-
-static cl::opt<bool>
-RunSLPVectorization("vectorize-slp", cl::Hidden,
- cl::desc("Run the SLP vectorization passes"));
-
-static cl::opt<bool>
UseGVNAfterVectorization("use-gvn-after-vectorization",
cl::init(false), cl::Hidden,
cl::desc("Run GVN instead of Early CSE after vectorization passes"));
@@ -73,12 +69,6 @@ RunLoopRerolling("reroll-loops", cl::Hidden,
static cl::opt<bool> RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden,
cl::desc("Run the NewGVN pass"));
-static cl::opt<bool>
-RunSLPAfterLoopVectorization("run-slp-after-loop-vectorization",
- cl::init(true), cl::Hidden,
- cl::desc("Run the SLP vectorizer (and BB vectorizer) after the Loop "
- "vectorizer instead of before"));
-
// Experimental option to use CFL-AA
enum class CFLAAType { None, Steensgaard, Andersen, Both };
static cl::opt<CFLAAType>
@@ -104,23 +94,13 @@ static cl::opt<bool>
EnablePrepareForThinLTO("prepare-for-thinlto", cl::init(false), cl::Hidden,
cl::desc("Enable preparation for ThinLTO."));
+static cl::opt<bool>
+ EnablePerformThinLTO("perform-thinlto", cl::init(false), cl::Hidden,
+ cl::desc("Enable performing ThinLTO."));
+
cl::opt<bool> EnableHotColdSplit("hot-cold-split", cl::init(false), cl::Hidden,
cl::desc("Enable hot-cold splitting pass"));
-
-static cl::opt<bool> RunPGOInstrGen(
- "profile-generate", cl::init(false), cl::Hidden,
- cl::desc("Enable PGO instrumentation."));
-
-static cl::opt<std::string>
- PGOOutputFile("profile-generate-file", cl::init(""), cl::Hidden,
- cl::desc("Specify the path of profile data file."));
-
-static cl::opt<std::string> RunPGOInstrUse(
- "profile-use", cl::init(""), cl::Hidden, cl::value_desc("filename"),
- cl::desc("Enable use phase of PGO instrumentation and specify the path "
- "of profile data file"));
-
static cl::opt<bool> UseLoopVersioningLICM(
"enable-loop-versioning-licm", cl::init(false), cl::Hidden,
cl::desc("Enable the experimental Loop Versioning LICM pass"));
@@ -134,10 +114,6 @@ static cl::opt<int> PreInlineThreshold(
cl::desc("Control the amount of inlining in pre-instrumentation inliner "
"(default = 75)"));
-static cl::opt<bool> EnableEarlyCSEMemSSA(
- "enable-earlycse-memssa", cl::init(true), cl::Hidden,
- cl::desc("Enable the EarlyCSE w/ MemorySSA pass (default = on)"));
-
static cl::opt<bool> EnableGVNHoist(
"enable-gvn-hoist", cl::init(false), cl::Hidden,
cl::desc("Enable the GVN hoisting pass (default = off)"));
@@ -156,10 +132,21 @@ static cl::opt<bool> EnableGVNSink(
"enable-gvn-sink", cl::init(false), cl::Hidden,
cl::desc("Enable the GVN sinking pass (default = off)"));
+// This option is used in simplifying testing SampleFDO optimizations for
+// profile loading.
static cl::opt<bool>
EnableCHR("enable-chr", cl::init(true), cl::Hidden,
cl::desc("Enable control height reduction optimization (CHR)"));
+cl::opt<bool> FlattenedProfileUsed(
+ "flattened-profile-used", cl::init(false), cl::Hidden,
+ cl::desc("Indicate the sample profile being used is flattened, i.e., "
+ "no inline hierachy exists in the profile. "));
+
+cl::opt<bool> EnableOrderFileInstrumentation(
+ "enable-order-file-instrumentation", cl::init(false), cl::Hidden,
+ cl::desc("Enable order file instrumentation (default = off)"));
+
PassManagerBuilder::PassManagerBuilder() {
OptLevel = 2;
SizeLevel = 0;
@@ -167,19 +154,26 @@ PassManagerBuilder::PassManagerBuilder() {
Inliner = nullptr;
DisableUnrollLoops = false;
SLPVectorize = RunSLPVectorization;
- LoopVectorize = RunLoopVectorization;
+ LoopVectorize = EnableLoopVectorization;
+ LoopsInterleaved = EnableLoopInterleaving;
RerollLoops = RunLoopRerolling;
NewGVN = RunNewGVN;
+ LicmMssaOptCap = SetLicmMssaOptCap;
+ LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap;
DisableGVNLoadPRE = false;
+ ForgetAllSCEVInLoopUnroll = ForgetSCEVInLoopUnroll;
VerifyInput = false;
VerifyOutput = false;
MergeFunctions = false;
PrepareForLTO = false;
- EnablePGOInstrGen = RunPGOInstrGen;
- PGOInstrGen = PGOOutputFile;
- PGOInstrUse = RunPGOInstrUse;
+ EnablePGOInstrGen = false;
+ EnablePGOCSInstrGen = false;
+ EnablePGOCSInstrUse = false;
+ PGOInstrGen = "";
+ PGOInstrUse = "";
+ PGOSampleUse = "";
PrepareForThinLTO = EnablePrepareForThinLTO;
- PerformThinLTO = false;
+ PerformThinLTO = EnablePerformThinLTO;
DivergentTarget = false;
}
@@ -272,13 +266,19 @@ void PassManagerBuilder::populateFunctionPassManager(
}
// Do PGO instrumentation generation or use pass as the option specified.
-void PassManagerBuilder::addPGOInstrPasses(legacy::PassManagerBase &MPM) {
- if (!EnablePGOInstrGen && PGOInstrUse.empty() && PGOSampleUse.empty())
+void PassManagerBuilder::addPGOInstrPasses(legacy::PassManagerBase &MPM,
+ bool IsCS = false) {
+ if (IsCS) {
+ if (!EnablePGOCSInstrGen && !EnablePGOCSInstrUse)
+ return;
+ } else if (!EnablePGOInstrGen && PGOInstrUse.empty() && PGOSampleUse.empty())
return;
+
// Perform the preinline and cleanup passes for O1 and above.
// And avoid doing them if optimizing for size.
+ // We will not do this inline for context sensitive PGO (when IsCS is true).
if (OptLevel > 0 && SizeLevel == 0 && !DisablePreInliner &&
- PGOSampleUse.empty()) {
+ PGOSampleUse.empty() && !IsCS) {
// Create preinline pass. We construct an InlineParams object and specify
// the threshold here to avoid the command line options of the regular
// inliner to influence pre-inlining. The only fields of InlineParams we
@@ -296,22 +296,23 @@ void PassManagerBuilder::addPGOInstrPasses(legacy::PassManagerBase &MPM) {
MPM.add(createInstructionCombiningPass()); // Combine silly seq's
addExtensionsToPM(EP_Peephole, MPM);
}
- if (EnablePGOInstrGen) {
- MPM.add(createPGOInstrumentationGenLegacyPass());
+ if ((EnablePGOInstrGen && !IsCS) || (EnablePGOCSInstrGen && IsCS)) {
+ MPM.add(createPGOInstrumentationGenLegacyPass(IsCS));
// Add the profile lowering pass.
InstrProfOptions Options;
if (!PGOInstrGen.empty())
Options.InstrProfileOutput = PGOInstrGen;
Options.DoCounterPromotion = true;
+ Options.UseBFIInPromotion = IsCS;
MPM.add(createLoopRotatePass());
- MPM.add(createInstrProfilingLegacyPass(Options));
+ MPM.add(createInstrProfilingLegacyPass(Options, IsCS));
}
if (!PGOInstrUse.empty())
- MPM.add(createPGOInstrumentationUseLegacyPass(PGOInstrUse));
+ MPM.add(createPGOInstrumentationUseLegacyPass(PGOInstrUse, IsCS));
// Indirect call promotion that promotes intra-module targets only.
// For ThinLTO this is done earlier due to interactions with globalopt
// for imported functions. We don't run this at -O0.
- if (OptLevel > 0)
+ if (OptLevel > 0 && !IsCS)
MPM.add(
createPGOIndirectCallPromotionLegacyPass(false, !PGOSampleUse.empty()));
}
@@ -320,7 +321,7 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
// Start of function pass.
// Break up aggregate allocas, using SSAUpdater.
MPM.add(createSROAPass());
- MPM.add(createEarlyCSEPass(EnableEarlyCSEMemSSA)); // Catch trivial redundancies
+ MPM.add(createEarlyCSEPass(true /* Enable mem-ssa. */)); // Catch trivial redundancies
if (EnableGVNHoist)
MPM.add(createGVNHoistPass());
if (EnableGVNSink) {
@@ -359,7 +360,7 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
}
// Rotate Loop - disable header duplication at -Oz
MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1));
- MPM.add(createLICMPass()); // Hoist loop invariants
+ MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
if (EnableSimpleLoopUnswitch)
MPM.add(createSimpleLoopUnswitchLegacyPass());
else
@@ -378,8 +379,9 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
if (EnableLoopInterchange)
MPM.add(createLoopInterchangePass()); // Interchange loops
- MPM.add(createSimpleLoopUnrollPass(OptLevel,
- DisableUnrollLoops)); // Unroll small loops
+ // Unroll small loops
+ MPM.add(createSimpleLoopUnrollPass(OptLevel, DisableUnrollLoops,
+ ForgetAllSCEVInLoopUnroll));
addExtensionsToPM(EP_LoopOptimizerEnd, MPM);
// This ends the loop pass pipelines.
@@ -403,14 +405,12 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
MPM.add(createJumpThreadingPass()); // Thread jumps
MPM.add(createCorrelatedValuePropagationPass());
MPM.add(createDeadStoreEliminationPass()); // Delete dead stores
- MPM.add(createLICMPass());
+ MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
addExtensionsToPM(EP_ScalarOptimizerLate, MPM);
if (RerollLoops)
MPM.add(createLoopRerollPass());
- if (!RunSLPAfterLoopVectorization && SLPVectorize)
- MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
MPM.add(createAggressiveDCEPass()); // Delete dead instructions
MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
@@ -419,15 +419,23 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
addExtensionsToPM(EP_Peephole, MPM);
if (EnableCHR && OptLevel >= 3 &&
- (!PGOInstrUse.empty() || !PGOSampleUse.empty()))
+ (!PGOInstrUse.empty() || !PGOSampleUse.empty() || EnablePGOCSInstrGen))
MPM.add(createControlHeightReductionLegacyPass());
}
void PassManagerBuilder::populateModulePassManager(
legacy::PassManagerBase &MPM) {
+ // Whether this is a default or *LTO pre-link pipeline. The FullLTO post-link
+ // is handled separately, so just check this is not the ThinLTO post-link.
+ bool DefaultOrPreLinkPipeline = !PerformThinLTO;
+
if (!PGOSampleUse.empty()) {
MPM.add(createPruneEHPass());
- MPM.add(createSampleProfileLoaderPass(PGOSampleUse));
+ // In ThinLTO mode, when flattened profile is used, all the available
+ // profile information will be annotated in PreLink phase so there is
+ // no need to load the profile again in PostLink.
+ if (!(FlattenedProfileUsed && PerformThinLTO))
+ MPM.add(createSampleProfileLoaderPass(PGOSampleUse));
}
// Allow forcing function attributes as a debugging and tuning aid.
@@ -508,6 +516,10 @@ void PassManagerBuilder::populateModulePassManager(
MPM.add(createIPSCCPPass()); // IP SCCP
MPM.add(createCalledValuePropagationPass());
+
+ // Infer attributes on declarations, call sites, arguments, etc.
+ MPM.add(createAttributorLegacyPass());
+
MPM.add(createGlobalOptimizerPass()); // Optimize out global vars
// Promote any localized global vars.
MPM.add(createPromoteMemoryToRegisterPass());
@@ -523,9 +535,14 @@ void PassManagerBuilder::populateModulePassManager(
// profile annotation in backend more difficult.
// PGO instrumentation is added during the compile phase for ThinLTO, do
// not run it a second time
- if (!PerformThinLTO && !PrepareForThinLTOUsingPGOSampleProfile)
+ if (DefaultOrPreLinkPipeline && !PrepareForThinLTOUsingPGOSampleProfile)
addPGOInstrPasses(MPM);
+ // Create profile COMDAT variables. Lld linker wants to see all variables
+ // before the LTO/ThinLTO link since it needs to resolve symbols/comdats.
+ if (!PerformThinLTO && EnablePGOCSInstrGen)
+ MPM.add(createPGOInstrumentationGenCreateVarLegacyPass(PGOInstrGen));
+
// We add a module alias analysis pass here. In part due to bugs in the
// analysis infrastructure this "works" in that the analysis stays alive
// for the entire SCC pass run below.
@@ -567,6 +584,17 @@ void PassManagerBuilder::populateModulePassManager(
// and saves running remaining passes on the eliminated functions.
MPM.add(createEliminateAvailableExternallyPass());
+ // CSFDO instrumentation and use pass. Don't invoke this for Prepare pass
+ // for LTO and ThinLTO -- The actual pass will be called after all inlines
+ // are performed.
+ // Need to do this after COMDAT variables have been eliminated,
+ // (i.e. after EliminateAvailableExternallyPass).
+ if (!(PrepareForLTO || PrepareForThinLTO))
+ addPGOInstrPasses(MPM, /* IsCS */ true);
+
+ if (EnableOrderFileInstrumentation)
+ MPM.add(createInstrOrderFilePass());
+
MPM.add(createReversePostOrderFunctionAttrsPass());
// The inliner performs some kind of dead code elimination as it goes,
@@ -605,7 +633,7 @@ void PassManagerBuilder::populateModulePassManager(
// later might get benefit of no-alias assumption in clone loop.
if (UseLoopVersioningLICM) {
MPM.add(createLoopVersioningLICMPass()); // Do LoopVersioningLICM
- MPM.add(createLICMPass()); // Hoist loop invariants
+ MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
}
// We add a fresh GlobalsModRef run at this point. This is particularly
@@ -640,7 +668,7 @@ void PassManagerBuilder::populateModulePassManager(
// llvm.loop.distribute=true or when -enable-loop-distribute is specified.
MPM.add(createLoopDistributePass());
- MPM.add(createLoopVectorizePass(DisableUnrollLoops, !LoopVectorize));
+ MPM.add(createLoopVectorizePass(!LoopsInterleaved, !LoopVectorize));
// Eliminate loads by forwarding stores from the previous iteration to loads
// of the current iteration.
@@ -662,7 +690,7 @@ void PassManagerBuilder::populateModulePassManager(
MPM.add(createEarlyCSEPass());
MPM.add(createCorrelatedValuePropagationPass());
addInstructionCombiningPass(MPM);
- MPM.add(createLICMPass());
+ MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget));
MPM.add(createCFGSimplificationPass());
addInstructionCombiningPass(MPM);
@@ -675,7 +703,7 @@ void PassManagerBuilder::populateModulePassManager(
// before SLP vectorization.
MPM.add(createCFGSimplificationPass(1, true, true, false, true));
- if (RunSLPAfterLoopVectorization && SLPVectorize) {
+ if (SLPVectorize) {
MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
if (OptLevel > 1 && ExtraVectorizerPasses) {
MPM.add(createEarlyCSEPass());
@@ -692,8 +720,9 @@ void PassManagerBuilder::populateModulePassManager(
MPM.add(createLoopUnrollAndJamPass(OptLevel));
}
- MPM.add(createLoopUnrollPass(OptLevel,
- DisableUnrollLoops)); // Unroll small loops
+ // Unroll small loops
+ MPM.add(createLoopUnrollPass(OptLevel, DisableUnrollLoops,
+ ForgetAllSCEVInLoopUnroll));
if (!DisableUnrollLoops) {
// LoopUnroll may generate some redundency to cleanup.
@@ -703,7 +732,7 @@ void PassManagerBuilder::populateModulePassManager(
// unrolled loop is a inner loop, then the prologue will be inside the
// outer loop. LICM pass can help to promote the runtime check out if the
// checked value is loop invariant.
- MPM.add(createLICMPass());
+ MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
}
MPM.add(createWarnMissedTransformationsPass());
@@ -722,6 +751,11 @@ void PassManagerBuilder::populateModulePassManager(
MPM.add(createConstantMergePass()); // Merge dup global constants
}
+ // See comment in the new PM for justification of scheduling splitting at
+ // this stage (\ref buildModuleSimplificationPipeline).
+ if (EnableHotColdSplit && !(PrepareForLTO || PrepareForThinLTO))
+ MPM.add(createHotColdSplittingPass());
+
if (MergeFunctions)
MPM.add(createMergeFunctionsPass());
@@ -738,9 +772,6 @@ void PassManagerBuilder::populateModulePassManager(
// flattening of blocks.
MPM.add(createDivRemPairsPass());
- if (EnableHotColdSplit)
- MPM.add(createHotColdSplittingPass());
-
// LoopSink (and other loop passes since the last simplifyCFG) might have
// resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
MPM.add(createCFGSimplificationPass());
@@ -793,6 +824,9 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
// Attach metadata to indirect call sites indicating the set of functions
// they may target at run-time. This should follow IPSCCP.
PM.add(createCalledValuePropagationPass());
+
+ // Infer attributes on declarations, call sites, arguments, etc.
+ PM.add(createAttributorLegacyPass());
}
// Infer attributes about definitions. The readnone attribute in particular is
@@ -842,6 +876,9 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
PM.add(createPruneEHPass()); // Remove dead EH info.
+ // CSFDO instrumentation and use pass.
+ addPGOInstrPasses(PM, /* IsCS */ true);
+
// Optimize globals again if we ran the inliner.
if (RunInliner)
PM.add(createGlobalOptimizerPass());
@@ -859,11 +896,16 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
// Break up allocas
PM.add(createSROAPass());
- // Run a few AA driven optimizations here and now, to cleanup the code.
+ // LTO provides additional opportunities for tailcall elimination due to
+ // link-time inlining, and visibility of nocapture attribute.
+ PM.add(createTailCallEliminationPass());
+
+ // Infer attributes on declarations, call sites, arguments, etc.
PM.add(createPostOrderFunctionAttrsLegacyPass()); // Add nocapture.
+ // Run a few AA driven optimizations here and now, to cleanup the code.
PM.add(createGlobalsAAWrapperPass()); // IP alias analysis.
- PM.add(createLICMPass()); // Hoist loop invariants.
+ PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
PM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds.
PM.add(NewGVN ? createNewGVNPass()
: createGVNPass(DisableGVNLoadPRE)); // Remove redundancies.
@@ -878,11 +920,13 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
if (EnableLoopInterchange)
PM.add(createLoopInterchangePass());
- PM.add(createSimpleLoopUnrollPass(OptLevel,
- DisableUnrollLoops)); // Unroll small loops
+ // Unroll small loops
+ PM.add(createSimpleLoopUnrollPass(OptLevel, DisableUnrollLoops,
+ ForgetAllSCEVInLoopUnroll));
PM.add(createLoopVectorizePass(true, !LoopVectorize));
// The vectorizer may have significantly shortened a loop body; unroll again.
- PM.add(createLoopUnrollPass(OptLevel, DisableUnrollLoops));
+ PM.add(createLoopUnrollPass(OptLevel, DisableUnrollLoops,
+ ForgetAllSCEVInLoopUnroll));
PM.add(createWarnMissedTransformationsPass());
@@ -896,9 +940,8 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
PM.add(createBitTrackingDCEPass());
// More scalar chains could be vectorized due to more alias information
- if (RunSLPAfterLoopVectorization)
- if (SLPVectorize)
- PM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
+ if (SLPVectorize)
+ PM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
// After vectorization, assume intrinsics may tell us more about pointer
// alignments.
@@ -913,6 +956,11 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
void PassManagerBuilder::addLateLTOOptimizationPasses(
legacy::PassManagerBase &PM) {
+ // See comment in the new PM for justification of scheduling splitting at
+ // this stage (\ref buildLTODefaultPipeline).
+ if (EnableHotColdSplit)
+ PM.add(createHotColdSplittingPass());
+
// Delete basic blocks, which optimization passes may have killed.
PM.add(createCFGSimplificationPass());
@@ -968,6 +1016,8 @@ void PassManagerBuilder::populateLTOPassManager(legacy::PassManagerBase &PM) {
if (VerifyInput)
PM.add(createVerifierPass());
+ addExtensionsToPM(EP_FullLinkTimeOptimizationEarly, PM);
+
if (OptLevel != 0)
addLTOOptimizationPasses(PM);
else {
@@ -989,6 +1039,8 @@ void PassManagerBuilder::populateLTOPassManager(legacy::PassManagerBase &PM) {
if (OptLevel != 0)
addLateLTOOptimizationPasses(PM);
+ addExtensionsToPM(EP_FullLinkTimeOptimizationLast, PM);
+
if (VerifyOutput)
PM.add(createVerifierPass());
}
diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp
index ae586c017471..cb3915dfb678 100644
--- a/lib/Transforms/IPO/PruneEH.cpp
+++ b/lib/Transforms/IPO/PruneEH.cpp
@@ -1,9 +1,8 @@
//===- PruneEH.cpp - Pass which deletes unused exception handlers ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -204,7 +203,8 @@ static bool SimplifyFunction(Function *F, CallGraph &CG) {
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; )
if (CallInst *CI = dyn_cast<CallInst>(I++))
- if (CI->doesNotReturn() && !isa<UnreachableInst>(I)) {
+ if (CI->doesNotReturn() && !CI->isMustTailCall() &&
+ !isa<UnreachableInst>(I)) {
// This call calls a function that cannot return. Insert an
// unreachable instruction after it and simplify the code. Do this
// by splitting the BB, adding the unreachable, then deleting the
@@ -242,12 +242,12 @@ static void DeleteBasicBlock(BasicBlock *BB, CallGraph &CG) {
break;
}
- if (auto CS = CallSite (&*I)) {
- const Function *Callee = CS.getCalledFunction();
+ if (auto *Call = dyn_cast<CallBase>(&*I)) {
+ const Function *Callee = Call->getCalledFunction();
if (!Callee || !Intrinsic::isLeaf(Callee->getIntrinsicID()))
- CGN->removeCallEdgeFor(CS);
+ CGN->removeCallEdgeFor(*Call);
else if (!Callee->isIntrinsic())
- CGN->removeCallEdgeFor(CS);
+ CGN->removeCallEdgeFor(*Call);
}
if (!I->use_empty())
diff --git a/lib/Transforms/IPO/SCCP.cpp b/lib/Transforms/IPO/SCCP.cpp
index d2c34abfc132..7be3608bd2ec 100644
--- a/lib/Transforms/IPO/SCCP.cpp
+++ b/lib/Transforms/IPO/SCCP.cpp
@@ -79,6 +79,7 @@ char IPSCCPLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(IPSCCPLegacyPass, "ipsccp",
"Interprocedural Sparse Conditional Constant Propagation",
false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(IPSCCPLegacyPass, "ipsccp",
diff --git a/lib/Transforms/IPO/SampleProfile.cpp b/lib/Transforms/IPO/SampleProfile.cpp
index 9f123c2b875e..877d20e72ffc 100644
--- a/lib/Transforms/IPO/SampleProfile.cpp
+++ b/lib/Transforms/IPO/SampleProfile.cpp
@@ -1,9 +1,8 @@
//===- SampleProfile.cpp - Incorporate sample profiles into the IR --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -319,6 +318,14 @@ protected:
/// Optimization Remark Emitter used to emit diagnostic remarks.
OptimizationRemarkEmitter *ORE = nullptr;
+
+ // Information recorded when we declined to inline a call site
+ // because we have determined it is too cold is accumulated for
+ // each callee function. Initially this is just the entry count.
+ struct NotInlinedProfileInfo {
+ uint64_t entryCount;
+ };
+ DenseMap<Function *, NotInlinedProfileInfo> notInlinedCallInfo;
};
class SampleProfileLoaderLegacyPass : public ModulePass {
@@ -745,8 +752,9 @@ bool SampleProfileLoader::inlineCallInstruction(Instruction *I) {
// when cost exceeds threshold without checking all IRs in the callee.
// The acutal cost does not matter because we only checks isNever() to
// see if it is legal to inline the callsite.
- InlineCost Cost = getInlineCost(CS, Params, GetTTI(*CalledFunction), GetAC,
- None, nullptr, nullptr);
+ InlineCost Cost =
+ getInlineCost(cast<CallBase>(*I), Params, GetTTI(*CalledFunction), GetAC,
+ None, nullptr, nullptr);
if (Cost.isNever()) {
ORE->emit(OptimizationRemark(DEBUG_TYPE, "Not inline", DLoc, BB)
<< "incompatible inlining");
@@ -779,6 +787,8 @@ bool SampleProfileLoader::inlineCallInstruction(Instruction *I) {
bool SampleProfileLoader::inlineHotFunctions(
Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
DenseSet<Instruction *> PromotedInsns;
+
+ DenseMap<Instruction *, const FunctionSamples *> localNotInlinedCallSites;
bool Changed = false;
while (true) {
bool LocalChanged = false;
@@ -791,6 +801,8 @@ bool SampleProfileLoader::inlineHotFunctions(
if ((isa<CallInst>(I) || isa<InvokeInst>(I)) &&
!isa<IntrinsicInst>(I) && (FS = findCalleeFunctionSamples(I))) {
Candidates.push_back(&I);
+ if (FS->getEntrySamples() > 0)
+ localNotInlinedCallSites.try_emplace(&I, FS);
if (callsiteIsHot(FS, PSI))
Hot = true;
}
@@ -823,6 +835,9 @@ bool SampleProfileLoader::inlineHotFunctions(
if (CalleeFunctionName == F.getName())
continue;
+ if (!callsiteIsHot(FS, PSI))
+ continue;
+
const char *Reason = "Callee function not available";
auto R = SymbolMap.find(CalleeFunctionName);
if (R != SymbolMap.end() && R->getValue() &&
@@ -836,8 +851,10 @@ bool SampleProfileLoader::inlineHotFunctions(
PromotedInsns.insert(I);
// If profile mismatches, we should not attempt to inline DI.
if ((isa<CallInst>(DI) || isa<InvokeInst>(DI)) &&
- inlineCallInstruction(DI))
+ inlineCallInstruction(DI)) {
+ localNotInlinedCallSites.erase(I);
LocalChanged = true;
+ }
} else {
LLVM_DEBUG(dbgs()
<< "\nFailed to promote indirect call to "
@@ -846,8 +863,10 @@ bool SampleProfileLoader::inlineHotFunctions(
}
} else if (CalledFunction && CalledFunction->getSubprogram() &&
!CalledFunction->isDeclaration()) {
- if (inlineCallInstruction(I))
+ if (inlineCallInstruction(I)) {
+ localNotInlinedCallSites.erase(I);
LocalChanged = true;
+ }
} else if (IsThinLTOPreLink) {
findCalleeFunctionSamples(*I)->findInlinedFunctions(
InlinedGUIDs, F.getParent(), PSI->getOrCompHotCountThreshold());
@@ -859,6 +878,18 @@ bool SampleProfileLoader::inlineHotFunctions(
break;
}
}
+
+ // Accumulate not inlined callsite information into notInlinedSamples
+ for (const auto &Pair : localNotInlinedCallSites) {
+ Instruction *I = Pair.getFirst();
+ Function *Callee = CallSite(I).getCalledFunction();
+ if (!Callee || Callee->isDeclaration())
+ continue;
+ const FunctionSamples *FS = Pair.getSecond();
+ auto pair =
+ notInlinedCallInfo.try_emplace(Callee, NotInlinedProfileInfo{0});
+ pair.first->second.entryCount += FS->getEntrySamples();
+ }
return Changed;
}
@@ -1299,10 +1330,10 @@ void SampleProfileLoader::propagateWeights(Function &F) {
annotateValueSite(*I.getParent()->getParent()->getParent(), I,
SortedCallTargets, Sum, IPVK_IndirectCallTarget,
SortedCallTargets.size());
- } else if (!dyn_cast<IntrinsicInst>(&I)) {
- SmallVector<uint32_t, 1> Weights;
- Weights.push_back(BlockWeights[BB]);
- I.setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
+ } else if (!isa<IntrinsicInst>(&I)) {
+ I.setMetadata(LLVMContext::MD_prof,
+ MDB.createBranchWeights(
+ {static_cast<uint32_t>(BlockWeights[BB])}));
}
}
}
@@ -1568,8 +1599,9 @@ bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
return false;
PSI = _PSI;
- if (M.getProfileSummary() == nullptr)
- M.setProfileSummary(Reader->getSummary().getMD(M.getContext()));
+ if (M.getProfileSummary(/* IsCS */ false) == nullptr)
+ M.setProfileSummary(Reader->getSummary().getMD(M.getContext()),
+ ProfileSummary::PSK_Sample);
// Compute the total number of samples collected in this profile.
for (const auto &I : Reader->getProfiles())
@@ -1601,6 +1633,12 @@ bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
clearFunctionData();
retval |= runOnFunction(F, AM);
}
+
+ // Account for cold calls not inlined....
+ for (const std::pair<Function *, NotInlinedProfileInfo> &pair :
+ notInlinedCallInfo)
+ updateProfileCallee(pair.first, pair.second.entryCount);
+
return retval;
}
diff --git a/lib/Transforms/IPO/StripDeadPrototypes.cpp b/lib/Transforms/IPO/StripDeadPrototypes.cpp
index 3c3c5dd19d1f..106db3c8bd9d 100644
--- a/lib/Transforms/IPO/StripDeadPrototypes.cpp
+++ b/lib/Transforms/IPO/StripDeadPrototypes.cpp
@@ -1,9 +1,8 @@
//===-- StripDeadPrototypes.cpp - Remove unused function declarations ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp
index c9afb060a91a..67a473612fc1 100644
--- a/lib/Transforms/IPO/StripSymbols.cpp
+++ b/lib/Transforms/IPO/StripSymbols.cpp
@@ -1,9 +1,8 @@
//===- StripSymbols.cpp - Strip symbols and debug info from a module ------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/IPO/SyntheticCountsPropagation.cpp b/lib/Transforms/IPO/SyntheticCountsPropagation.cpp
index ba4efb3ff60d..45fd432fd721 100644
--- a/lib/Transforms/IPO/SyntheticCountsPropagation.cpp
+++ b/lib/Transforms/IPO/SyntheticCountsPropagation.cpp
@@ -1,9 +1,8 @@
//=- SyntheticCountsPropagation.cpp - Propagate function counts --*- C++ -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
index 510ecb516dc2..24c476376c14 100644
--- a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
+++ b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
@@ -1,9 +1,8 @@
//===- ThinLTOBitcodeWriter.cpp - Bitcode writing pass for ThinLTO --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -418,34 +417,53 @@ void splitAndWriteThinLTOBitcode(
}
}
-// Returns whether this module needs to be split because splitting is
-// enabled and it uses type metadata.
-bool requiresSplit(Module &M) {
- // First check if the LTO Unit splitting has been enabled.
+// Check if the LTO Unit splitting has been enabled.
+bool enableSplitLTOUnit(Module &M) {
bool EnableSplitLTOUnit = false;
if (auto *MD = mdconst::extract_or_null<ConstantInt>(
M.getModuleFlag("EnableSplitLTOUnit")))
EnableSplitLTOUnit = MD->getZExtValue();
- if (!EnableSplitLTOUnit)
- return false;
+ return EnableSplitLTOUnit;
+}
- // Module only needs to be split if it contains type metadata.
+// Returns whether this module needs to be split because it uses type metadata.
+bool hasTypeMetadata(Module &M) {
for (auto &GO : M.global_objects()) {
if (GO.hasMetadata(LLVMContext::MD_type))
return true;
}
-
return false;
}
void writeThinLTOBitcode(raw_ostream &OS, raw_ostream *ThinLinkOS,
function_ref<AAResults &(Function &)> AARGetter,
Module &M, const ModuleSummaryIndex *Index) {
- // Split module if splitting is enabled and it contains any type metadata.
- if (requiresSplit(M))
- return splitAndWriteThinLTOBitcode(OS, ThinLinkOS, AARGetter, M);
+ std::unique_ptr<ModuleSummaryIndex> NewIndex = nullptr;
+ // See if this module has any type metadata. If so, we try to split it
+ // or at least promote type ids to enable WPD.
+ if (hasTypeMetadata(M)) {
+ if (enableSplitLTOUnit(M))
+ return splitAndWriteThinLTOBitcode(OS, ThinLinkOS, AARGetter, M);
+ // Promote type ids as needed for index-based WPD.
+ std::string ModuleId = getUniqueModuleId(&M);
+ if (!ModuleId.empty()) {
+ promoteTypeIds(M, ModuleId);
+ // Need to rebuild the index so that it contains type metadata
+ // for the newly promoted type ids.
+ // FIXME: Probably should not bother building the index at all
+ // in the caller of writeThinLTOBitcode (which does so via the
+ // ModuleSummaryIndexAnalysis pass), since we have to rebuild it
+ // anyway whenever there is type metadata (here or in
+ // splitAndWriteThinLTOBitcode). Just always build it once via the
+ // buildModuleSummaryIndex when Module(s) are ready.
+ ProfileSummaryInfo PSI(M);
+ NewIndex = llvm::make_unique<ModuleSummaryIndex>(
+ buildModuleSummaryIndex(M, nullptr, &PSI));
+ Index = NewIndex.get();
+ }
+ }
- // Otherwise we can just write it out as a regular module.
+ // Write it out as an unsplit ThinLTO module.
// Save the module hash produced for the full bitcode, which will
// be used in the backends, and use that in the minimized bitcode
diff --git a/lib/Transforms/IPO/WholeProgramDevirt.cpp b/lib/Transforms/IPO/WholeProgramDevirt.cpp
index 48bd0cda759d..6b6dd6194e17 100644
--- a/lib/Transforms/IPO/WholeProgramDevirt.cpp
+++ b/lib/Transforms/IPO/WholeProgramDevirt.cpp
@@ -1,9 +1,8 @@
//===- WholeProgramDevirt.cpp - Whole program virtual call optimization ---===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -882,7 +881,7 @@ void DevirtModule::tryICallBranchFunnel(
}
BasicBlock *BB = BasicBlock::Create(M.getContext(), "", JT, nullptr);
- Constant *Intr =
+ Function *Intr =
Intrinsic::getDeclaration(&M, llvm::Intrinsic::icall_branch_funnel, {});
auto *CI = CallInst::Create(Intr, JTArgs, "", BB);
@@ -921,9 +920,10 @@ void DevirtModule::applyICallBranchFunnel(VTableSlotInfo &SlotInfo,
NewArgs.push_back(Int8PtrTy);
for (Type *T : CS.getFunctionType()->params())
NewArgs.push_back(T);
- PointerType *NewFT = PointerType::getUnqual(
+ FunctionType *NewFT =
FunctionType::get(CS.getFunctionType()->getReturnType(), NewArgs,
- CS.getFunctionType()->isVarArg()));
+ CS.getFunctionType()->isVarArg());
+ PointerType *NewFTPtr = PointerType::getUnqual(NewFT);
IRBuilder<> IRB(CS.getInstruction());
std::vector<Value *> Args;
@@ -933,10 +933,10 @@ void DevirtModule::applyICallBranchFunnel(VTableSlotInfo &SlotInfo,
CallSite NewCS;
if (CS.isCall())
- NewCS = IRB.CreateCall(IRB.CreateBitCast(JT, NewFT), Args);
+ NewCS = IRB.CreateCall(NewFT, IRB.CreateBitCast(JT, NewFTPtr), Args);
else
NewCS = IRB.CreateInvoke(
- IRB.CreateBitCast(JT, NewFT),
+ NewFT, IRB.CreateBitCast(JT, NewFTPtr),
cast<InvokeInst>(CS.getInstruction())->getNormalDest(),
cast<InvokeInst>(CS.getInstruction())->getUnwindDest(), Args);
NewCS.setCallingConv(CS.getCallingConv());
@@ -1183,7 +1183,7 @@ void DevirtModule::applyVirtualConstProp(CallSiteInfo &CSInfo, StringRef FnName,
Value *Addr =
B.CreateGEP(Int8Ty, B.CreateBitCast(Call.VTable, Int8PtrTy), Byte);
if (RetType->getBitWidth() == 1) {
- Value *Bits = B.CreateLoad(Addr);
+ Value *Bits = B.CreateLoad(Int8Ty, Addr);
Value *BitsAndBit = B.CreateAnd(Bits, Bit);
auto IsBitSet = B.CreateICmpNE(BitsAndBit, ConstantInt::get(Int8Ty, 0));
Call.replaceAndErase("virtual-const-prop-1-bit", FnName, RemarksEnabled,
@@ -1495,8 +1495,10 @@ void DevirtModule::importResolution(VTableSlot Slot, VTableSlotInfo &SlotInfo) {
if (Res.TheKind == WholeProgramDevirtResolution::SingleImpl) {
// The type of the function in the declaration is irrelevant because every
// call site will cast it to the correct type.
- auto *SingleImpl = M.getOrInsertFunction(
- Res.SingleImplName, Type::getVoidTy(M.getContext()));
+ Constant *SingleImpl =
+ cast<Constant>(M.getOrInsertFunction(Res.SingleImplName,
+ Type::getVoidTy(M.getContext()))
+ .getCallee());
// This is the import phase so we should not be exporting anything.
bool IsExported = false;
@@ -1538,8 +1540,12 @@ void DevirtModule::importResolution(VTableSlot Slot, VTableSlotInfo &SlotInfo) {
}
if (Res.TheKind == WholeProgramDevirtResolution::BranchFunnel) {
- auto *JT = M.getOrInsertFunction(getGlobalName(Slot, {}, "branch_funnel"),
- Type::getVoidTy(M.getContext()));
+ // The type of the function is irrelevant, because it's bitcast at calls
+ // anyhow.
+ Constant *JT = cast<Constant>(
+ M.getOrInsertFunction(getGlobalName(Slot, {}, "branch_funnel"),
+ Type::getVoidTy(M.getContext()))
+ .getCallee());
bool IsExported = false;
applyICallBranchFunnel(SlotInfo, JT, IsExported);
assert(!IsExported);
@@ -1557,23 +1563,20 @@ void DevirtModule::removeRedundantTypeTests() {
}
bool DevirtModule::run() {
+ // If only some of the modules were split, we cannot correctly perform
+ // this transformation. We already checked for the presense of type tests
+ // with partially split modules during the thin link, and would have emitted
+ // an error if any were found, so here we can simply return.
+ if ((ExportSummary && ExportSummary->partiallySplitLTOUnits()) ||
+ (ImportSummary && ImportSummary->partiallySplitLTOUnits()))
+ return false;
+
Function *TypeTestFunc =
M.getFunction(Intrinsic::getName(Intrinsic::type_test));
Function *TypeCheckedLoadFunc =
M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load));
Function *AssumeFunc = M.getFunction(Intrinsic::getName(Intrinsic::assume));
- // If only some of the modules were split, we cannot correctly handle
- // code that contains type tests or type checked loads.
- if ((ExportSummary && ExportSummary->partiallySplitLTOUnits()) ||
- (ImportSummary && ImportSummary->partiallySplitLTOUnits())) {
- if ((TypeTestFunc && !TypeTestFunc->use_empty()) ||
- (TypeCheckedLoadFunc && !TypeCheckedLoadFunc->use_empty()))
- report_fatal_error("inconsistent LTO Unit splitting with llvm.type.test "
- "or llvm.type.checked.load");
- return false;
- }
-
// Normally if there are no users of the devirtualization intrinsics in the
// module, this pass has nothing to do. But if we are exporting, we also need
// to handle any users that appear only in the function summaries.
diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 6e196bfdbd25..ba15b023f2a3 100644
--- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -1,9 +1,8 @@
//===- InstCombineAddSub.cpp ------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -823,6 +822,47 @@ static Value *checkForNegativeOperand(BinaryOperator &I,
return nullptr;
}
+/// Wrapping flags may allow combining constants separated by an extend.
+static Instruction *foldNoWrapAdd(BinaryOperator &Add,
+ InstCombiner::BuilderTy &Builder) {
+ Value *Op0 = Add.getOperand(0), *Op1 = Add.getOperand(1);
+ Type *Ty = Add.getType();
+ Constant *Op1C;
+ if (!match(Op1, m_Constant(Op1C)))
+ return nullptr;
+
+ // Try this match first because it results in an add in the narrow type.
+ // (zext (X +nuw C2)) + C1 --> zext (X + (C2 + trunc(C1)))
+ Value *X;
+ const APInt *C1, *C2;
+ if (match(Op1, m_APInt(C1)) &&
+ match(Op0, m_OneUse(m_ZExt(m_NUWAdd(m_Value(X), m_APInt(C2))))) &&
+ C1->isNegative() && C1->sge(-C2->sext(C1->getBitWidth()))) {
+ Constant *NewC =
+ ConstantInt::get(X->getType(), *C2 + C1->trunc(C2->getBitWidth()));
+ return new ZExtInst(Builder.CreateNUWAdd(X, NewC), Ty);
+ }
+
+ // More general combining of constants in the wide type.
+ // (sext (X +nsw NarrowC)) + C --> (sext X) + (sext(NarrowC) + C)
+ Constant *NarrowC;
+ if (match(Op0, m_OneUse(m_SExt(m_NSWAdd(m_Value(X), m_Constant(NarrowC)))))) {
+ Constant *WideC = ConstantExpr::getSExt(NarrowC, Ty);
+ Constant *NewC = ConstantExpr::getAdd(WideC, Op1C);
+ Value *WideX = Builder.CreateSExt(X, Ty);
+ return BinaryOperator::CreateAdd(WideX, NewC);
+ }
+ // (zext (X +nuw NarrowC)) + C --> (zext X) + (zext(NarrowC) + C)
+ if (match(Op0, m_OneUse(m_ZExt(m_NUWAdd(m_Value(X), m_Constant(NarrowC)))))) {
+ Constant *WideC = ConstantExpr::getZExt(NarrowC, Ty);
+ Constant *NewC = ConstantExpr::getAdd(WideC, Op1C);
+ Value *WideX = Builder.CreateZExt(X, Ty);
+ return BinaryOperator::CreateAdd(WideX, NewC);
+ }
+
+ return nullptr;
+}
+
Instruction *InstCombiner::foldAddWithConstant(BinaryOperator &Add) {
Value *Op0 = Add.getOperand(0), *Op1 = Add.getOperand(1);
Constant *Op1C;
@@ -832,7 +872,14 @@ Instruction *InstCombiner::foldAddWithConstant(BinaryOperator &Add) {
if (Instruction *NV = foldBinOpIntoSelectOrPhi(Add))
return NV;
- Value *X, *Y;
+ Value *X;
+ Constant *Op00C;
+
+ // add (sub C1, X), C2 --> sub (add C1, C2), X
+ if (match(Op0, m_Sub(m_Constant(Op00C), m_Value(X))))
+ return BinaryOperator::CreateSub(ConstantExpr::getAdd(Op00C, Op1C), X);
+
+ Value *Y;
// add (sub X, Y), -1 --> add (not Y), X
if (match(Op0, m_OneUse(m_Sub(m_Value(X), m_Value(Y)))) &&
@@ -852,6 +899,11 @@ Instruction *InstCombiner::foldAddWithConstant(BinaryOperator &Add) {
if (!match(Op1, m_APInt(C)))
return nullptr;
+ // (X | C2) + C --> (X | C2) ^ C2 iff (C2 == -C)
+ const APInt *C2;
+ if (match(Op0, m_Or(m_Value(), m_APInt(C2))) && *C2 == -*C)
+ return BinaryOperator::CreateXor(Op0, ConstantInt::get(Add.getType(), *C2));
+
if (C->isSignMask()) {
// If wrapping is not allowed, then the addition must set the sign bit:
// X + (signmask) --> X | signmask
@@ -866,19 +918,10 @@ Instruction *InstCombiner::foldAddWithConstant(BinaryOperator &Add) {
// Is this add the last step in a convoluted sext?
// add(zext(xor i16 X, -32768), -32768) --> sext X
Type *Ty = Add.getType();
- const APInt *C2;
if (match(Op0, m_ZExt(m_Xor(m_Value(X), m_APInt(C2)))) &&
C2->isMinSignedValue() && C2->sext(Ty->getScalarSizeInBits()) == *C)
return CastInst::Create(Instruction::SExt, X, Ty);
- // (add (zext (add nuw X, C2)), C) --> (zext (add nuw X, C2 + C))
- if (match(Op0, m_OneUse(m_ZExt(m_NUWAdd(m_Value(X), m_APInt(C2))))) &&
- C->isNegative() && C->sge(-C2->sext(C->getBitWidth()))) {
- Constant *NewC =
- ConstantInt::get(X->getType(), *C2 + C->trunc(C2->getBitWidth()));
- return new ZExtInst(Builder.CreateNUWAdd(X, NewC), Ty);
- }
-
if (C->isOneValue() && Op0->hasOneUse()) {
// add (sext i1 X), 1 --> zext (not X)
// TODO: The smallest IR representation is (select X, 0, 1), and that would
@@ -1032,6 +1075,28 @@ static Instruction *canonicalizeLowbitMask(BinaryOperator &I,
return BinaryOperator::CreateNot(NotMask, I.getName());
}
+static Instruction *foldToUnsignedSaturatedAdd(BinaryOperator &I) {
+ assert(I.getOpcode() == Instruction::Add && "Expecting add instruction");
+ Type *Ty = I.getType();
+ auto getUAddSat = [&]() {
+ return Intrinsic::getDeclaration(I.getModule(), Intrinsic::uadd_sat, Ty);
+ };
+
+ // add (umin X, ~Y), Y --> uaddsat X, Y
+ Value *X, *Y;
+ if (match(&I, m_c_Add(m_c_UMin(m_Value(X), m_Not(m_Value(Y))),
+ m_Deferred(Y))))
+ return CallInst::Create(getUAddSat(), { X, Y });
+
+ // add (umin X, ~C), C --> uaddsat X, C
+ const APInt *C, *NotC;
+ if (match(&I, m_Add(m_UMin(m_Value(X), m_APInt(NotC)), m_APInt(C))) &&
+ *C == ~*NotC)
+ return CallInst::Create(getUAddSat(), { X, ConstantInt::get(Ty, *C) });
+
+ return nullptr;
+}
+
Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
if (Value *V = SimplifyAddInst(I.getOperand(0), I.getOperand(1),
I.hasNoSignedWrap(), I.hasNoUnsignedWrap(),
@@ -1051,6 +1116,9 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
if (Instruction *X = foldAddWithConstant(I))
return X;
+ if (Instruction *X = foldNoWrapAdd(I, Builder))
+ return X;
+
// FIXME: This should be moved into the above helper function to allow these
// transforms for general constant or constant splat vectors.
Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
@@ -1119,6 +1187,12 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
return BinaryOperator::CreateSub(RHS, A);
}
+ // Canonicalize sext to zext for better value tracking potential.
+ // add A, sext(B) --> sub A, zext(B)
+ if (match(&I, m_c_Add(m_Value(A), m_OneUse(m_SExt(m_Value(B))))) &&
+ B->getType()->isIntOrIntVectorTy(1))
+ return BinaryOperator::CreateSub(A, Builder.CreateZExt(B, Ty));
+
// A + -B --> A - B
if (match(RHS, m_Neg(m_Value(B))))
return BinaryOperator::CreateSub(LHS, B);
@@ -1128,7 +1202,10 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
// (A + 1) + ~B --> A - B
// ~B + (A + 1) --> A - B
- if (match(&I, m_c_BinOp(m_Add(m_Value(A), m_One()), m_Not(m_Value(B)))))
+ // (~B + A) + 1 --> A - B
+ // (A + ~B) + 1 --> A - B
+ if (match(&I, m_c_BinOp(m_Add(m_Value(A), m_One()), m_Not(m_Value(B)))) ||
+ match(&I, m_BinOp(m_c_Add(m_Not(m_Value(B)), m_Value(A)), m_One())))
return BinaryOperator::CreateSub(A, B);
// X % C0 + (( X / C0 ) % C1) * C0 => X % (C0 * C1)
@@ -1225,6 +1302,9 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
if (Instruction *V = canonicalizeLowbitMask(I, Builder))
return V;
+ if (Instruction *SatAdd = foldToUnsignedSaturatedAdd(I))
+ return SatAdd;
+
return Changed ? &I : nullptr;
}
@@ -1500,6 +1580,12 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
if (match(Op1, m_OneUse(m_Add(m_Value(X), m_One()))))
return BinaryOperator::CreateAdd(Builder.CreateNot(X), Op0);
+ // Y - ~X --> (X + 1) + Y
+ if (match(Op1, m_OneUse(m_Not(m_Value(X))))) {
+ return BinaryOperator::CreateAdd(
+ Builder.CreateAdd(Op0, ConstantInt::get(I.getType(), 1)), X);
+ }
+
if (Constant *C = dyn_cast<Constant>(Op0)) {
bool IsNegate = match(C, m_ZeroInt());
Value *X;
@@ -1532,8 +1618,13 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
if (Instruction *R = foldOpIntoPhi(I, PN))
return R;
- // C-(X+C2) --> (C-C2)-X
Constant *C2;
+
+ // C-(C2-X) --> X+(C-C2)
+ if (match(Op1, m_Sub(m_Constant(C2), m_Value(X))))
+ return BinaryOperator::CreateAdd(X, ConstantExpr::getSub(C, C2));
+
+ // C-(X+C2) --> (C-C2)-X
if (match(Op1, m_Add(m_Value(X), m_Constant(C2))))
return BinaryOperator::CreateSub(ConstantExpr::getSub(C, C2), X);
}
@@ -1626,9 +1717,15 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
Builder.CreateNot(Y, Y->getName() + ".not"));
// 0 - (X sdiv C) -> (X sdiv -C) provided the negation doesn't overflow.
- if (match(Op1, m_SDiv(m_Value(X), m_Constant(C))) && match(Op0, m_Zero()) &&
- C->isNotMinSignedValue() && !C->isOneValue())
- return BinaryOperator::CreateSDiv(X, ConstantExpr::getNeg(C));
+ // TODO: This could be extended to match arbitrary vector constants.
+ const APInt *DivC;
+ if (match(Op0, m_Zero()) && match(Op1, m_SDiv(m_Value(X), m_APInt(DivC))) &&
+ !DivC->isMinSignedValue() && *DivC != 1) {
+ Constant *NegDivC = ConstantInt::get(I.getType(), -(*DivC));
+ Instruction *BO = BinaryOperator::CreateSDiv(X, NegDivC);
+ BO->setIsExact(cast<BinaryOperator>(Op1)->isExact());
+ return BO;
+ }
// 0 - (X << Y) -> (-X << Y) when X is freely negatable.
if (match(Op1, m_Shl(m_Value(X), m_Value(Y))) && match(Op0, m_Zero()))
@@ -1745,6 +1842,49 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
return Changed ? &I : nullptr;
}
+/// This eliminates floating-point negation in either 'fneg(X)' or
+/// 'fsub(-0.0, X)' form by combining into a constant operand.
+static Instruction *foldFNegIntoConstant(Instruction &I) {
+ Value *X;
+ Constant *C;
+
+ // Fold negation into constant operand. This is limited with one-use because
+ // fneg is assumed better for analysis and cheaper in codegen than fmul/fdiv.
+ // -(X * C) --> X * (-C)
+ // FIXME: It's arguable whether these should be m_OneUse or not. The current
+ // belief is that the FNeg allows for better reassociation opportunities.
+ if (match(&I, m_FNeg(m_OneUse(m_FMul(m_Value(X), m_Constant(C))))))
+ return BinaryOperator::CreateFMulFMF(X, ConstantExpr::getFNeg(C), &I);
+ // -(X / C) --> X / (-C)
+ if (match(&I, m_FNeg(m_OneUse(m_FDiv(m_Value(X), m_Constant(C))))))
+ return BinaryOperator::CreateFDivFMF(X, ConstantExpr::getFNeg(C), &I);
+ // -(C / X) --> (-C) / X
+ if (match(&I, m_FNeg(m_OneUse(m_FDiv(m_Constant(C), m_Value(X))))))
+ return BinaryOperator::CreateFDivFMF(ConstantExpr::getFNeg(C), X, &I);
+
+ return nullptr;
+}
+
+Instruction *InstCombiner::visitFNeg(UnaryOperator &I) {
+ Value *Op = I.getOperand(0);
+
+ if (Value *V = SimplifyFNegInst(Op, I.getFastMathFlags(),
+ SQ.getWithInstruction(&I)))
+ return replaceInstUsesWith(I, V);
+
+ if (Instruction *X = foldFNegIntoConstant(I))
+ return X;
+
+ Value *X, *Y;
+
+ // If we can ignore the sign of zeros: -(X - Y) --> (Y - X)
+ if (I.hasNoSignedZeros() &&
+ match(Op, m_OneUse(m_FSub(m_Value(X), m_Value(Y)))))
+ return BinaryOperator::CreateFSubFMF(Y, X, &I);
+
+ return nullptr;
+}
+
Instruction *InstCombiner::visitFSub(BinaryOperator &I) {
if (Value *V = SimplifyFSubInst(I.getOperand(0), I.getOperand(1),
I.getFastMathFlags(),
@@ -1760,21 +1900,12 @@ Instruction *InstCombiner::visitFSub(BinaryOperator &I) {
if (I.hasNoSignedZeros() && match(Op0, m_PosZeroFP()))
return BinaryOperator::CreateFNegFMF(Op1, &I);
+ if (Instruction *X = foldFNegIntoConstant(I))
+ return X;
+
Value *X, *Y;
Constant *C;
- // Fold negation into constant operand. This is limited with one-use because
- // fneg is assumed better for analysis and cheaper in codegen than fmul/fdiv.
- // -(X * C) --> X * (-C)
- if (match(&I, m_FNeg(m_OneUse(m_FMul(m_Value(X), m_Constant(C))))))
- return BinaryOperator::CreateFMulFMF(X, ConstantExpr::getFNeg(C), &I);
- // -(X / C) --> X / (-C)
- if (match(&I, m_FNeg(m_OneUse(m_FDiv(m_Value(X), m_Constant(C))))))
- return BinaryOperator::CreateFDivFMF(X, ConstantExpr::getFNeg(C), &I);
- // -(C / X) --> (-C) / X
- if (match(&I, m_FNeg(m_OneUse(m_FDiv(m_Constant(C), m_Value(X))))))
- return BinaryOperator::CreateFDivFMF(ConstantExpr::getFNeg(C), X, &I);
-
// If Op0 is not -0.0 or we can ignore -0.0: Z - (X - Y) --> Z + (Y - X)
// Canonicalize to fadd to make analysis easier.
// This can also help codegen because fadd is commutative.
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 404c2ad7e6e7..2b9859b602f4 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -1,9 +1,8 @@
//===- InstCombineAndOrXor.cpp --------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -968,7 +967,7 @@ static Value *foldSignedTruncationCheck(ICmpInst *ICmp0, ICmpInst *ICmp1,
// Can it be decomposed into icmp eq (X & Mask), 0 ?
if (llvm::decomposeBitTestICmp(ICmp->getOperand(0), ICmp->getOperand(1),
Pred, X, UnsetBitsMask,
- /*LookThruTrunc=*/false) &&
+ /*LookThroughTrunc=*/false) &&
Pred == ICmpInst::ICMP_EQ)
return true;
// Is it icmp eq (X & Mask), 0 already?
@@ -1022,6 +1021,36 @@ static Value *foldSignedTruncationCheck(ICmpInst *ICmp0, ICmpInst *ICmp1,
CxtI.getName() + ".simplified");
}
+/// Reduce a pair of compares that check if a value has exactly 1 bit set.
+static Value *foldIsPowerOf2(ICmpInst *Cmp0, ICmpInst *Cmp1, bool JoinedByAnd,
+ InstCombiner::BuilderTy &Builder) {
+ // Handle 'and' / 'or' commutation: make the equality check the first operand.
+ if (JoinedByAnd && Cmp1->getPredicate() == ICmpInst::ICMP_NE)
+ std::swap(Cmp0, Cmp1);
+ else if (!JoinedByAnd && Cmp1->getPredicate() == ICmpInst::ICMP_EQ)
+ std::swap(Cmp0, Cmp1);
+
+ // (X != 0) && (ctpop(X) u< 2) --> ctpop(X) == 1
+ CmpInst::Predicate Pred0, Pred1;
+ Value *X;
+ if (JoinedByAnd && match(Cmp0, m_ICmp(Pred0, m_Value(X), m_ZeroInt())) &&
+ match(Cmp1, m_ICmp(Pred1, m_Intrinsic<Intrinsic::ctpop>(m_Specific(X)),
+ m_SpecificInt(2))) &&
+ Pred0 == ICmpInst::ICMP_NE && Pred1 == ICmpInst::ICMP_ULT) {
+ Value *CtPop = Cmp1->getOperand(0);
+ return Builder.CreateICmpEQ(CtPop, ConstantInt::get(CtPop->getType(), 1));
+ }
+ // (X == 0) || (ctpop(X) u> 1) --> ctpop(X) != 1
+ if (!JoinedByAnd && match(Cmp0, m_ICmp(Pred0, m_Value(X), m_ZeroInt())) &&
+ match(Cmp1, m_ICmp(Pred1, m_Intrinsic<Intrinsic::ctpop>(m_Specific(X)),
+ m_SpecificInt(1))) &&
+ Pred0 == ICmpInst::ICMP_EQ && Pred1 == ICmpInst::ICMP_UGT) {
+ Value *CtPop = Cmp1->getOperand(0);
+ return Builder.CreateICmpNE(CtPop, ConstantInt::get(CtPop->getType(), 1));
+ }
+ return nullptr;
+}
+
/// Fold (icmp)&(icmp) if possible.
Value *InstCombiner::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS,
Instruction &CxtI) {
@@ -1064,6 +1093,9 @@ Value *InstCombiner::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS,
if (Value *V = foldSignedTruncationCheck(LHS, RHS, CxtI, Builder))
return V;
+ if (Value *V = foldIsPowerOf2(LHS, RHS, true /* JoinedByAnd */, Builder))
+ return V;
+
// This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2).
Value *LHS0 = LHS->getOperand(0), *RHS0 = RHS->getOperand(0);
ConstantInt *LHSC = dyn_cast<ConstantInt>(LHS->getOperand(1));
@@ -1259,6 +1291,52 @@ Value *InstCombiner::foldLogicOfFCmps(FCmpInst *LHS, FCmpInst *RHS, bool IsAnd)
return nullptr;
}
+/// This a limited reassociation for a special case (see above) where we are
+/// checking if two values are either both NAN (unordered) or not-NAN (ordered).
+/// This could be handled more generally in '-reassociation', but it seems like
+/// an unlikely pattern for a large number of logic ops and fcmps.
+static Instruction *reassociateFCmps(BinaryOperator &BO,
+ InstCombiner::BuilderTy &Builder) {
+ Instruction::BinaryOps Opcode = BO.getOpcode();
+ assert((Opcode == Instruction::And || Opcode == Instruction::Or) &&
+ "Expecting and/or op for fcmp transform");
+
+ // There are 4 commuted variants of the pattern. Canonicalize operands of this
+ // logic op so an fcmp is operand 0 and a matching logic op is operand 1.
+ Value *Op0 = BO.getOperand(0), *Op1 = BO.getOperand(1), *X;
+ FCmpInst::Predicate Pred;
+ if (match(Op1, m_FCmp(Pred, m_Value(), m_AnyZeroFP())))
+ std::swap(Op0, Op1);
+
+ // Match inner binop and the predicate for combining 2 NAN checks into 1.
+ BinaryOperator *BO1;
+ FCmpInst::Predicate NanPred = Opcode == Instruction::And ? FCmpInst::FCMP_ORD
+ : FCmpInst::FCMP_UNO;
+ if (!match(Op0, m_FCmp(Pred, m_Value(X), m_AnyZeroFP())) || Pred != NanPred ||
+ !match(Op1, m_BinOp(BO1)) || BO1->getOpcode() != Opcode)
+ return nullptr;
+
+ // The inner logic op must have a matching fcmp operand.
+ Value *BO10 = BO1->getOperand(0), *BO11 = BO1->getOperand(1), *Y;
+ if (!match(BO10, m_FCmp(Pred, m_Value(Y), m_AnyZeroFP())) ||
+ Pred != NanPred || X->getType() != Y->getType())
+ std::swap(BO10, BO11);
+
+ if (!match(BO10, m_FCmp(Pred, m_Value(Y), m_AnyZeroFP())) ||
+ Pred != NanPred || X->getType() != Y->getType())
+ return nullptr;
+
+ // and (fcmp ord X, 0), (and (fcmp ord Y, 0), Z) --> and (fcmp ord X, Y), Z
+ // or (fcmp uno X, 0), (or (fcmp uno Y, 0), Z) --> or (fcmp uno X, Y), Z
+ Value *NewFCmp = Builder.CreateFCmp(Pred, X, Y);
+ if (auto *NewFCmpInst = dyn_cast<FCmpInst>(NewFCmp)) {
+ // Intersect FMF from the 2 source fcmps.
+ NewFCmpInst->copyIRFlags(Op0);
+ NewFCmpInst->andIRFlags(BO10);
+ }
+ return BinaryOperator::Create(Opcode, NewFCmp, BO11);
+}
+
/// Match De Morgan's Laws:
/// (~A & ~B) == (~(A | B))
/// (~A | ~B) == (~(A & B))
@@ -1619,6 +1697,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
// ((C1 OP zext(X)) & C2) -> zext((C1-X) & C2) if C2 fits in the bitwidth
// of X and OP behaves well when given trunc(C1) and X.
+ // TODO: Do this for vectors by using m_APInt isntead of m_ConstantInt.
switch (Op0I->getOpcode()) {
default:
break;
@@ -1629,7 +1708,10 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
case Instruction::Sub:
Value *X;
ConstantInt *C1;
- if (match(Op0I, m_c_BinOp(m_ZExt(m_Value(X)), m_ConstantInt(C1)))) {
+ // TODO: The one use restrictions could be relaxed a little if the AND
+ // is going to be removed.
+ if (match(Op0I, m_OneUse(m_c_BinOp(m_OneUse(m_ZExt(m_Value(X))),
+ m_ConstantInt(C1))))) {
if (AndRHSMask.isIntN(X->getType()->getScalarSizeInBits())) {
auto *TruncC1 = ConstantExpr::getTrunc(C1, X->getType());
Value *BinOp;
@@ -1747,6 +1829,9 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
if (Value *Res = foldLogicOfFCmps(LHS, RHS, true))
return replaceInstUsesWith(I, Res);
+ if (Instruction *FoldedFCmps = reassociateFCmps(I, Builder))
+ return FoldedFCmps;
+
if (Instruction *CastedAnd = foldCastedBitwiseLogic(I))
return CastedAnd;
@@ -1820,14 +1905,18 @@ static Instruction *matchRotate(Instruction &Or) {
// First, find an or'd pair of opposite shifts with the same shifted operand:
// or (lshr ShVal, ShAmt0), (shl ShVal, ShAmt1)
- Value *Or0 = Or.getOperand(0), *Or1 = Or.getOperand(1);
+ BinaryOperator *Or0, *Or1;
+ if (!match(Or.getOperand(0), m_BinOp(Or0)) ||
+ !match(Or.getOperand(1), m_BinOp(Or1)))
+ return nullptr;
+
Value *ShVal, *ShAmt0, *ShAmt1;
if (!match(Or0, m_OneUse(m_LogicalShift(m_Value(ShVal), m_Value(ShAmt0)))) ||
!match(Or1, m_OneUse(m_LogicalShift(m_Specific(ShVal), m_Value(ShAmt1)))))
return nullptr;
- auto ShiftOpcode0 = cast<BinaryOperator>(Or0)->getOpcode();
- auto ShiftOpcode1 = cast<BinaryOperator>(Or1)->getOpcode();
+ BinaryOperator::BinaryOps ShiftOpcode0 = Or0->getOpcode();
+ BinaryOperator::BinaryOps ShiftOpcode1 = Or1->getOpcode();
if (ShiftOpcode0 == ShiftOpcode1)
return nullptr;
@@ -1842,6 +1931,13 @@ static Instruction *matchRotate(Instruction &Or) {
match(R, m_And(m_Neg(m_Specific(X)), m_SpecificInt(Mask))))
return X;
+ // Similar to above, but the shift amount may be extended after masking,
+ // so return the extended value as the parameter for the intrinsic.
+ if (match(L, m_ZExt(m_And(m_Value(X), m_SpecificInt(Mask)))) &&
+ match(R, m_And(m_Neg(m_ZExt(m_And(m_Specific(X), m_SpecificInt(Mask)))),
+ m_SpecificInt(Mask))))
+ return L;
+
return nullptr;
};
@@ -2083,6 +2179,9 @@ Value *InstCombiner::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
if (Value *V = foldAndOrOfEqualityCmpsWithConstants(LHS, RHS, false, Builder))
return V;
+ if (Value *V = foldIsPowerOf2(LHS, RHS, false /* JoinedByAnd */, Builder))
+ return V;
+
// This only handles icmp of constants: (icmp1 A, C1) | (icmp2 B, C2).
if (!LHSC || !RHSC)
return nullptr;
@@ -2412,6 +2511,9 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
if (Value *Res = foldLogicOfFCmps(LHS, RHS, false))
return replaceInstUsesWith(I, Res);
+ if (Instruction *FoldedFCmps = reassociateFCmps(I, Builder))
+ return FoldedFCmps;
+
if (Instruction *CastedOr = foldCastedBitwiseLogic(I))
return CastedOr;
diff --git a/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp b/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp
new file mode 100644
index 000000000000..5f37a00f56cf
--- /dev/null
+++ b/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp
@@ -0,0 +1,159 @@
+//===- InstCombineAtomicRMW.cpp -------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visit functions for atomic rmw instructions.
+//
+//===----------------------------------------------------------------------===//
+#include "InstCombineInternal.h"
+#include "llvm/IR/Instructions.h"
+
+using namespace llvm;
+
+namespace {
+/// Return true if and only if the given instruction does not modify the memory
+/// location referenced. Note that an idemptent atomicrmw may still have
+/// ordering effects on nearby instructions, or be volatile.
+/// TODO: Common w/ the version in AtomicExpandPass, and change the term used.
+/// Idemptotent is confusing in this context.
+bool isIdempotentRMW(AtomicRMWInst& RMWI) {
+ if (auto CF = dyn_cast<ConstantFP>(RMWI.getValOperand()))
+ switch(RMWI.getOperation()) {
+ case AtomicRMWInst::FAdd: // -0.0
+ return CF->isZero() && CF->isNegative();
+ case AtomicRMWInst::FSub: // +0.0
+ return CF->isZero() && !CF->isNegative();
+ default:
+ return false;
+ };
+
+ auto C = dyn_cast<ConstantInt>(RMWI.getValOperand());
+ if(!C)
+ return false;
+
+ switch(RMWI.getOperation()) {
+ case AtomicRMWInst::Add:
+ case AtomicRMWInst::Sub:
+ case AtomicRMWInst::Or:
+ case AtomicRMWInst::Xor:
+ return C->isZero();
+ case AtomicRMWInst::And:
+ return C->isMinusOne();
+ case AtomicRMWInst::Min:
+ return C->isMaxValue(true);
+ case AtomicRMWInst::Max:
+ return C->isMinValue(true);
+ case AtomicRMWInst::UMin:
+ return C->isMaxValue(false);
+ case AtomicRMWInst::UMax:
+ return C->isMinValue(false);
+ default:
+ return false;
+ }
+}
+
+/// Return true if the given instruction always produces a value in memory
+/// equivalent to its value operand.
+bool isSaturating(AtomicRMWInst& RMWI) {
+ if (auto CF = dyn_cast<ConstantFP>(RMWI.getValOperand()))
+ switch(RMWI.getOperation()) {
+ case AtomicRMWInst::FAdd:
+ case AtomicRMWInst::FSub:
+ return CF->isNaN();
+ default:
+ return false;
+ };
+
+ auto C = dyn_cast<ConstantInt>(RMWI.getValOperand());
+ if(!C)
+ return false;
+
+ switch(RMWI.getOperation()) {
+ default:
+ return false;
+ case AtomicRMWInst::Xchg:
+ return true;
+ case AtomicRMWInst::Or:
+ return C->isAllOnesValue();
+ case AtomicRMWInst::And:
+ return C->isZero();
+ case AtomicRMWInst::Min:
+ return C->isMinValue(true);
+ case AtomicRMWInst::Max:
+ return C->isMaxValue(true);
+ case AtomicRMWInst::UMin:
+ return C->isMinValue(false);
+ case AtomicRMWInst::UMax:
+ return C->isMaxValue(false);
+ };
+}
+}
+
+Instruction *InstCombiner::visitAtomicRMWInst(AtomicRMWInst &RMWI) {
+
+ // Volatile RMWs perform a load and a store, we cannot replace this by just a
+ // load or just a store. We chose not to canonicalize out of general paranoia
+ // about user expectations around volatile.
+ if (RMWI.isVolatile())
+ return nullptr;
+
+ // Any atomicrmw op which produces a known result in memory can be
+ // replaced w/an atomicrmw xchg.
+ if (isSaturating(RMWI) &&
+ RMWI.getOperation() != AtomicRMWInst::Xchg) {
+ RMWI.setOperation(AtomicRMWInst::Xchg);
+ return &RMWI;
+ }
+
+ AtomicOrdering Ordering = RMWI.getOrdering();
+ assert(Ordering != AtomicOrdering::NotAtomic &&
+ Ordering != AtomicOrdering::Unordered &&
+ "AtomicRMWs don't make sense with Unordered or NotAtomic");
+
+ // Any atomicrmw xchg with no uses can be converted to a atomic store if the
+ // ordering is compatible.
+ if (RMWI.getOperation() == AtomicRMWInst::Xchg &&
+ RMWI.use_empty()) {
+ if (Ordering != AtomicOrdering::Release &&
+ Ordering != AtomicOrdering::Monotonic)
+ return nullptr;
+ auto *SI = new StoreInst(RMWI.getValOperand(),
+ RMWI.getPointerOperand(), &RMWI);
+ SI->setAtomic(Ordering, RMWI.getSyncScopeID());
+ SI->setAlignment(DL.getABITypeAlignment(RMWI.getType()));
+ return eraseInstFromFunction(RMWI);
+ }
+
+ if (!isIdempotentRMW(RMWI))
+ return nullptr;
+
+ // We chose to canonicalize all idempotent operations to an single
+ // operation code and constant. This makes it easier for the rest of the
+ // optimizer to match easily. The choices of or w/0 and fadd w/-0.0 are
+ // arbitrary.
+ if (RMWI.getType()->isIntegerTy() &&
+ RMWI.getOperation() != AtomicRMWInst::Or) {
+ RMWI.setOperation(AtomicRMWInst::Or);
+ RMWI.setOperand(1, ConstantInt::get(RMWI.getType(), 0));
+ return &RMWI;
+ } else if (RMWI.getType()->isFloatingPointTy() &&
+ RMWI.getOperation() != AtomicRMWInst::FAdd) {
+ RMWI.setOperation(AtomicRMWInst::FAdd);
+ RMWI.setOperand(1, ConstantFP::getNegativeZero(RMWI.getType()));
+ return &RMWI;
+ }
+
+ // Check if the required ordering is compatible with an atomic load.
+ if (Ordering != AtomicOrdering::Acquire &&
+ Ordering != AtomicOrdering::Monotonic)
+ return nullptr;
+
+ LoadInst *Load = new LoadInst(RMWI.getType(), RMWI.getPointerOperand());
+ Load->setAtomic(Ordering, RMWI.getSyncScopeID());
+ Load->setAlignment(DL.getABITypeAlignment(RMWI.getType()));
+ return Load;
+}
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index aeb25d530d71..4b3333affa72 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1,19 +1,19 @@
//===- InstCombineCalls.cpp -----------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
-// This file implements the visitCall and visitInvoke functions.
+// This file implements the visitCall, visitInvoke, and visitCallBr functions.
//
//===----------------------------------------------------------------------===//
#include "InstCombineInternal.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
@@ -23,12 +23,12 @@
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/MemoryBuiltins.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@@ -58,6 +58,7 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/InstCombine/InstCombineWorklist.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
#include <algorithm>
#include <cassert>
@@ -121,6 +122,15 @@ Instruction *InstCombiner::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) {
return MI;
}
+ // If we have a store to a location which is known constant, we can conclude
+ // that the store must be storing the constant value (else the memory
+ // wouldn't be constant), and this must be a noop.
+ if (AA->pointsToConstantMemory(MI->getDest())) {
+ // Set the size of the copy to 0, it will be deleted on the next iteration.
+ MI->setLength(Constant::getNullValue(MI->getLength()->getType()));
+ return MI;
+ }
+
// If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
// load/store.
ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getLength());
@@ -173,7 +183,7 @@ Instruction *InstCombiner::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) {
Value *Src = Builder.CreateBitCast(MI->getArgOperand(1), NewSrcPtrTy);
Value *Dest = Builder.CreateBitCast(MI->getArgOperand(0), NewDstPtrTy);
- LoadInst *L = Builder.CreateLoad(Src);
+ LoadInst *L = Builder.CreateLoad(IntType, Src);
// Alignment from the mem intrinsic will be better, so use it.
L->setAlignment(CopySrcAlign);
if (CopyMD)
@@ -219,6 +229,15 @@ Instruction *InstCombiner::SimplifyAnyMemSet(AnyMemSetInst *MI) {
return MI;
}
+ // If we have a store to a location which is known constant, we can conclude
+ // that the store must be storing the constant value (else the memory
+ // wouldn't be constant), and this must be a noop.
+ if (AA->pointsToConstantMemory(MI->getDest())) {
+ // Set the size of the copy to 0, it will be deleted on the next iteration.
+ MI->setLength(Constant::getNullValue(MI->getLength()->getType()));
+ return MI;
+ }
+
// Extract the length and alignment and fill if they are constant.
ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength());
ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
@@ -523,7 +542,8 @@ static Value *simplifyX86varShift(const IntrinsicInst &II,
return Builder.CreateAShr(Vec, ShiftVec);
}
-static Value *simplifyX86pack(IntrinsicInst &II, bool IsSigned) {
+static Value *simplifyX86pack(IntrinsicInst &II,
+ InstCombiner::BuilderTy &Builder, bool IsSigned) {
Value *Arg0 = II.getArgOperand(0);
Value *Arg1 = II.getArgOperand(1);
Type *ResTy = II.getType();
@@ -534,167 +554,58 @@ static Value *simplifyX86pack(IntrinsicInst &II, bool IsSigned) {
Type *ArgTy = Arg0->getType();
unsigned NumLanes = ResTy->getPrimitiveSizeInBits() / 128;
- unsigned NumDstElts = ResTy->getVectorNumElements();
unsigned NumSrcElts = ArgTy->getVectorNumElements();
- assert(NumDstElts == (2 * NumSrcElts) && "Unexpected packing types");
+ assert(ResTy->getVectorNumElements() == (2 * NumSrcElts) &&
+ "Unexpected packing types");
- unsigned NumDstEltsPerLane = NumDstElts / NumLanes;
unsigned NumSrcEltsPerLane = NumSrcElts / NumLanes;
unsigned DstScalarSizeInBits = ResTy->getScalarSizeInBits();
- assert(ArgTy->getScalarSizeInBits() == (2 * DstScalarSizeInBits) &&
+ unsigned SrcScalarSizeInBits = ArgTy->getScalarSizeInBits();
+ assert(SrcScalarSizeInBits == (2 * DstScalarSizeInBits) &&
"Unexpected packing types");
// Constant folding.
- auto *Cst0 = dyn_cast<Constant>(Arg0);
- auto *Cst1 = dyn_cast<Constant>(Arg1);
- if (!Cst0 || !Cst1)
+ if (!isa<Constant>(Arg0) || !isa<Constant>(Arg1))
return nullptr;
- SmallVector<Constant *, 32> Vals;
- for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
- for (unsigned Elt = 0; Elt != NumDstEltsPerLane; ++Elt) {
- unsigned SrcIdx = Lane * NumSrcEltsPerLane + Elt % NumSrcEltsPerLane;
- auto *Cst = (Elt >= NumSrcEltsPerLane) ? Cst1 : Cst0;
- auto *COp = Cst->getAggregateElement(SrcIdx);
- if (COp && isa<UndefValue>(COp)) {
- Vals.push_back(UndefValue::get(ResTy->getScalarType()));
- continue;
- }
-
- auto *CInt = dyn_cast_or_null<ConstantInt>(COp);
- if (!CInt)
- return nullptr;
-
- APInt Val = CInt->getValue();
- assert(Val.getBitWidth() == ArgTy->getScalarSizeInBits() &&
- "Unexpected constant bitwidth");
-
- if (IsSigned) {
- // PACKSS: Truncate signed value with signed saturation.
- // Source values less than dst minint are saturated to minint.
- // Source values greater than dst maxint are saturated to maxint.
- if (Val.isSignedIntN(DstScalarSizeInBits))
- Val = Val.trunc(DstScalarSizeInBits);
- else if (Val.isNegative())
- Val = APInt::getSignedMinValue(DstScalarSizeInBits);
- else
- Val = APInt::getSignedMaxValue(DstScalarSizeInBits);
- } else {
- // PACKUS: Truncate signed value with unsigned saturation.
- // Source values less than zero are saturated to zero.
- // Source values greater than dst maxuint are saturated to maxuint.
- if (Val.isIntN(DstScalarSizeInBits))
- Val = Val.trunc(DstScalarSizeInBits);
- else if (Val.isNegative())
- Val = APInt::getNullValue(DstScalarSizeInBits);
- else
- Val = APInt::getAllOnesValue(DstScalarSizeInBits);
- }
-
- Vals.push_back(ConstantInt::get(ResTy->getScalarType(), Val));
- }
- }
-
- return ConstantVector::get(Vals);
-}
-
-// Replace X86-specific intrinsics with generic floor-ceil where applicable.
-static Value *simplifyX86round(IntrinsicInst &II,
- InstCombiner::BuilderTy &Builder) {
- ConstantInt *Arg = nullptr;
- Intrinsic::ID IntrinsicID = II.getIntrinsicID();
-
- if (IntrinsicID == Intrinsic::x86_sse41_round_ss ||
- IntrinsicID == Intrinsic::x86_sse41_round_sd)
- Arg = dyn_cast<ConstantInt>(II.getArgOperand(2));
- else if (IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ss ||
- IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_sd)
- Arg = dyn_cast<ConstantInt>(II.getArgOperand(4));
- else
- Arg = dyn_cast<ConstantInt>(II.getArgOperand(1));
- if (!Arg)
- return nullptr;
- unsigned RoundControl = Arg->getZExtValue();
-
- Arg = nullptr;
- unsigned SAE = 0;
- if (IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ps_512 ||
- IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_pd_512)
- Arg = dyn_cast<ConstantInt>(II.getArgOperand(4));
- else if (IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ss ||
- IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_sd)
- Arg = dyn_cast<ConstantInt>(II.getArgOperand(5));
- else
- SAE = 4;
- if (!SAE) {
- if (!Arg)
- return nullptr;
- SAE = Arg->getZExtValue();
+ // Clamp Values - signed/unsigned both use signed clamp values, but they
+ // differ on the min/max values.
+ APInt MinValue, MaxValue;
+ if (IsSigned) {
+ // PACKSS: Truncate signed value with signed saturation.
+ // Source values less than dst minint are saturated to minint.
+ // Source values greater than dst maxint are saturated to maxint.
+ MinValue =
+ APInt::getSignedMinValue(DstScalarSizeInBits).sext(SrcScalarSizeInBits);
+ MaxValue =
+ APInt::getSignedMaxValue(DstScalarSizeInBits).sext(SrcScalarSizeInBits);
+ } else {
+ // PACKUS: Truncate signed value with unsigned saturation.
+ // Source values less than zero are saturated to zero.
+ // Source values greater than dst maxuint are saturated to maxuint.
+ MinValue = APInt::getNullValue(SrcScalarSizeInBits);
+ MaxValue = APInt::getLowBitsSet(SrcScalarSizeInBits, DstScalarSizeInBits);
}
- if (SAE != 4 || (RoundControl != 2 /*ceil*/ && RoundControl != 1 /*floor*/))
- return nullptr;
+ auto *MinC = Constant::getIntegerValue(ArgTy, MinValue);
+ auto *MaxC = Constant::getIntegerValue(ArgTy, MaxValue);
+ Arg0 = Builder.CreateSelect(Builder.CreateICmpSLT(Arg0, MinC), MinC, Arg0);
+ Arg1 = Builder.CreateSelect(Builder.CreateICmpSLT(Arg1, MinC), MinC, Arg1);
+ Arg0 = Builder.CreateSelect(Builder.CreateICmpSGT(Arg0, MaxC), MaxC, Arg0);
+ Arg1 = Builder.CreateSelect(Builder.CreateICmpSGT(Arg1, MaxC), MaxC, Arg1);
- Value *Src, *Dst, *Mask;
- bool IsScalar = false;
- if (IntrinsicID == Intrinsic::x86_sse41_round_ss ||
- IntrinsicID == Intrinsic::x86_sse41_round_sd ||
- IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ss ||
- IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_sd) {
- IsScalar = true;
- if (IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ss ||
- IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_sd) {
- Mask = II.getArgOperand(3);
- Value *Zero = Constant::getNullValue(Mask->getType());
- Mask = Builder.CreateAnd(Mask, 1);
- Mask = Builder.CreateICmp(ICmpInst::ICMP_NE, Mask, Zero);
- Dst = II.getArgOperand(2);
- } else
- Dst = II.getArgOperand(0);
- Src = Builder.CreateExtractElement(II.getArgOperand(1), (uint64_t)0);
- } else {
- Src = II.getArgOperand(0);
- if (IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ps_128 ||
- IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ps_256 ||
- IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ps_512 ||
- IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_pd_128 ||
- IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_pd_256 ||
- IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_pd_512) {
- Dst = II.getArgOperand(2);
- Mask = II.getArgOperand(3);
- } else {
- Dst = Src;
- Mask = ConstantInt::getAllOnesValue(
- Builder.getIntNTy(Src->getType()->getVectorNumElements()));
- }
+ // Shuffle clamped args together at the lane level.
+ SmallVector<unsigned, 32> PackMask;
+ for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
+ for (unsigned Elt = 0; Elt != NumSrcEltsPerLane; ++Elt)
+ PackMask.push_back(Elt + (Lane * NumSrcEltsPerLane));
+ for (unsigned Elt = 0; Elt != NumSrcEltsPerLane; ++Elt)
+ PackMask.push_back(Elt + (Lane * NumSrcEltsPerLane) + NumSrcElts);
}
+ auto *Shuffle = Builder.CreateShuffleVector(Arg0, Arg1, PackMask);
- Intrinsic::ID ID = (RoundControl == 2) ? Intrinsic::ceil : Intrinsic::floor;
- Value *Res = Builder.CreateUnaryIntrinsic(ID, Src, &II);
- if (!IsScalar) {
- if (auto *C = dyn_cast<Constant>(Mask))
- if (C->isAllOnesValue())
- return Res;
- auto *MaskTy = VectorType::get(
- Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
- Mask = Builder.CreateBitCast(Mask, MaskTy);
- unsigned Width = Src->getType()->getVectorNumElements();
- if (MaskTy->getVectorNumElements() > Width) {
- uint32_t Indices[4];
- for (unsigned i = 0; i != Width; ++i)
- Indices[i] = i;
- Mask = Builder.CreateShuffleVector(Mask, Mask,
- makeArrayRef(Indices, Width));
- }
- return Builder.CreateSelect(Mask, Res, Dst);
- }
- if (IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ss ||
- IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_sd) {
- Dst = Builder.CreateExtractElement(Dst, (uint64_t)0);
- Res = Builder.CreateSelect(Mask, Res, Dst);
- Dst = II.getArgOperand(0);
- }
- return Builder.CreateInsertElement(Dst, Res, (uint64_t)0);
+ // Truncate to dst size.
+ return Builder.CreateTrunc(Shuffle, ResTy);
}
static Value *simplifyX86movmsk(const IntrinsicInst &II,
@@ -711,43 +622,44 @@ static Value *simplifyX86movmsk(const IntrinsicInst &II,
if (!ArgTy->isVectorTy())
return nullptr;
- if (auto *C = dyn_cast<Constant>(Arg)) {
- // Extract signbits of the vector input and pack into integer result.
- APInt Result(ResTy->getPrimitiveSizeInBits(), 0);
- for (unsigned I = 0, E = ArgTy->getVectorNumElements(); I != E; ++I) {
- auto *COp = C->getAggregateElement(I);
- if (!COp)
- return nullptr;
- if (isa<UndefValue>(COp))
- continue;
-
- auto *CInt = dyn_cast<ConstantInt>(COp);
- auto *CFp = dyn_cast<ConstantFP>(COp);
- if (!CInt && !CFp)
- return nullptr;
-
- if ((CInt && CInt->isNegative()) || (CFp && CFp->isNegative()))
- Result.setBit(I);
- }
- return Constant::getIntegerValue(ResTy, Result);
- }
+ // Expand MOVMSK to compare/bitcast/zext:
+ // e.g. PMOVMSKB(v16i8 x):
+ // %cmp = icmp slt <16 x i8> %x, zeroinitializer
+ // %int = bitcast <16 x i1> %cmp to i16
+ // %res = zext i16 %int to i32
+ unsigned NumElts = ArgTy->getVectorNumElements();
+ Type *IntegerVecTy = VectorType::getInteger(cast<VectorType>(ArgTy));
+ Type *IntegerTy = Builder.getIntNTy(NumElts);
+
+ Value *Res = Builder.CreateBitCast(Arg, IntegerVecTy);
+ Res = Builder.CreateICmpSLT(Res, Constant::getNullValue(IntegerVecTy));
+ Res = Builder.CreateBitCast(Res, IntegerTy);
+ Res = Builder.CreateZExtOrTrunc(Res, ResTy);
+ return Res;
+}
- // Look for a sign-extended boolean source vector as the argument to this
- // movmsk. If the argument is bitcast, look through that, but make sure the
- // source of that bitcast is still a vector with the same number of elements.
- // TODO: We can also convert a bitcast with wider elements, but that requires
- // duplicating the bool source sign bits to match the number of elements
- // expected by the movmsk call.
- Arg = peekThroughBitcast(Arg);
- Value *X;
- if (Arg->getType()->isVectorTy() &&
- Arg->getType()->getVectorNumElements() == ArgTy->getVectorNumElements() &&
- match(Arg, m_SExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1)) {
- // call iM movmsk(sext <N x i1> X) --> zext (bitcast <N x i1> X to iN) to iM
- unsigned NumElts = X->getType()->getVectorNumElements();
- Type *ScalarTy = Type::getIntNTy(Arg->getContext(), NumElts);
- Value *BC = Builder.CreateBitCast(X, ScalarTy);
- return Builder.CreateZExtOrTrunc(BC, ResTy);
+static Value *simplifyX86addcarry(const IntrinsicInst &II,
+ InstCombiner::BuilderTy &Builder) {
+ Value *CarryIn = II.getArgOperand(0);
+ Value *Op1 = II.getArgOperand(1);
+ Value *Op2 = II.getArgOperand(2);
+ Type *RetTy = II.getType();
+ Type *OpTy = Op1->getType();
+ assert(RetTy->getStructElementType(0)->isIntegerTy(8) &&
+ RetTy->getStructElementType(1) == OpTy && OpTy == Op2->getType() &&
+ "Unexpected types for x86 addcarry");
+
+ // If carry-in is zero, this is just an unsigned add with overflow.
+ if (match(CarryIn, m_ZeroInt())) {
+ Value *UAdd = Builder.CreateIntrinsic(Intrinsic::uadd_with_overflow, OpTy,
+ { Op1, Op2 });
+ // The types have to be adjusted to match the x86 call types.
+ Value *UAddResult = Builder.CreateExtractValue(UAdd, 0);
+ Value *UAddOV = Builder.CreateZExt(Builder.CreateExtractValue(UAdd, 1),
+ Builder.getInt8Ty());
+ Value *Res = UndefValue::get(RetTy);
+ Res = Builder.CreateInsertValue(Res, UAddOV, 0);
+ return Builder.CreateInsertValue(Res, UAddResult, 1);
}
return nullptr;
@@ -892,7 +804,7 @@ static Value *simplifyX86extrq(IntrinsicInst &II, Value *Op0,
if (II.getIntrinsicID() == Intrinsic::x86_sse4a_extrq) {
Value *Args[] = {Op0, CILength, CIIndex};
Module *M = II.getModule();
- Value *F = Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_extrqi);
+ Function *F = Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_extrqi);
return Builder.CreateCall(F, Args);
}
}
@@ -993,7 +905,7 @@ static Value *simplifyX86insertq(IntrinsicInst &II, Value *Op0, Value *Op1,
Value *Args[] = {Op0, Op1, CILength, CIIndex};
Module *M = II.getModule();
- Value *F = Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_insertqi);
+ Function *F = Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_insertqi);
return Builder.CreateCall(F, Args);
}
@@ -1134,82 +1046,42 @@ static Value *simplifyX86vpermv(const IntrinsicInst &II,
return Builder.CreateShuffleVector(V1, V2, ShuffleMask);
}
-/// Decode XOP integer vector comparison intrinsics.
-static Value *simplifyX86vpcom(const IntrinsicInst &II,
- InstCombiner::BuilderTy &Builder,
- bool IsSigned) {
- if (auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2))) {
- uint64_t Imm = CInt->getZExtValue() & 0x7;
- VectorType *VecTy = cast<VectorType>(II.getType());
- CmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
-
- switch (Imm) {
- case 0x0:
- Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
- break;
- case 0x1:
- Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
- break;
- case 0x2:
- Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
- break;
- case 0x3:
- Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
- break;
- case 0x4:
- Pred = ICmpInst::ICMP_EQ; break;
- case 0x5:
- Pred = ICmpInst::ICMP_NE; break;
- case 0x6:
- return ConstantInt::getSigned(VecTy, 0); // FALSE
- case 0x7:
- return ConstantInt::getSigned(VecTy, -1); // TRUE
- }
-
- if (Value *Cmp = Builder.CreateICmp(Pred, II.getArgOperand(0),
- II.getArgOperand(1)))
- return Builder.CreateSExtOrTrunc(Cmp, VecTy);
- }
- return nullptr;
-}
+// TODO, Obvious Missing Transforms:
+// * Narrow width by halfs excluding zero/undef lanes
+Value *InstCombiner::simplifyMaskedLoad(IntrinsicInst &II) {
+ Value *LoadPtr = II.getArgOperand(0);
+ unsigned Alignment = cast<ConstantInt>(II.getArgOperand(1))->getZExtValue();
-static bool maskIsAllOneOrUndef(Value *Mask) {
- auto *ConstMask = dyn_cast<Constant>(Mask);
- if (!ConstMask)
- return false;
- if (ConstMask->isAllOnesValue() || isa<UndefValue>(ConstMask))
- return true;
- for (unsigned I = 0, E = ConstMask->getType()->getVectorNumElements(); I != E;
- ++I) {
- if (auto *MaskElt = ConstMask->getAggregateElement(I))
- if (MaskElt->isAllOnesValue() || isa<UndefValue>(MaskElt))
- continue;
- return false;
- }
- return true;
-}
-
-static Value *simplifyMaskedLoad(const IntrinsicInst &II,
- InstCombiner::BuilderTy &Builder) {
// If the mask is all ones or undefs, this is a plain vector load of the 1st
// argument.
- if (maskIsAllOneOrUndef(II.getArgOperand(2))) {
- Value *LoadPtr = II.getArgOperand(0);
- unsigned Alignment = cast<ConstantInt>(II.getArgOperand(1))->getZExtValue();
- return Builder.CreateAlignedLoad(LoadPtr, Alignment, "unmaskedload");
+ if (maskIsAllOneOrUndef(II.getArgOperand(2)))
+ return Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment,
+ "unmaskedload");
+
+ // If we can unconditionally load from this address, replace with a
+ // load/select idiom. TODO: use DT for context sensitive query
+ if (isDereferenceableAndAlignedPointer(LoadPtr, II.getType(), Alignment,
+ II.getModule()->getDataLayout(),
+ &II, nullptr)) {
+ Value *LI = Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment,
+ "unmaskedload");
+ return Builder.CreateSelect(II.getArgOperand(2), LI, II.getArgOperand(3));
}
return nullptr;
}
-static Instruction *simplifyMaskedStore(IntrinsicInst &II, InstCombiner &IC) {
+// TODO, Obvious Missing Transforms:
+// * Single constant active lane -> store
+// * Narrow width by halfs excluding zero/undef lanes
+Instruction *InstCombiner::simplifyMaskedStore(IntrinsicInst &II) {
auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(3));
if (!ConstMask)
return nullptr;
// If the mask is all zeros, this instruction does nothing.
if (ConstMask->isNullValue())
- return IC.eraseInstFromFunction(II);
+ return eraseInstFromFunction(II);
// If the mask is all ones, this is a plain vector store of the 1st argument.
if (ConstMask->isAllOnesValue()) {
@@ -1218,14 +1090,57 @@ static Instruction *simplifyMaskedStore(IntrinsicInst &II, InstCombiner &IC) {
return new StoreInst(II.getArgOperand(0), StorePtr, false, Alignment);
}
+ // Use masked off lanes to simplify operands via SimplifyDemandedVectorElts
+ APInt DemandedElts = possiblyDemandedEltsInMask(ConstMask);
+ APInt UndefElts(DemandedElts.getBitWidth(), 0);
+ if (Value *V = SimplifyDemandedVectorElts(II.getOperand(0),
+ DemandedElts, UndefElts)) {
+ II.setOperand(0, V);
+ return &II;
+ }
+
return nullptr;
}
-static Instruction *simplifyMaskedGather(IntrinsicInst &II, InstCombiner &IC) {
- // If the mask is all zeros, return the "passthru" argument of the gather.
- auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(2));
- if (ConstMask && ConstMask->isNullValue())
- return IC.replaceInstUsesWith(II, II.getArgOperand(3));
+// TODO, Obvious Missing Transforms:
+// * Single constant active lane load -> load
+// * Dereferenceable address & few lanes -> scalarize speculative load/selects
+// * Adjacent vector addresses -> masked.load
+// * Narrow width by halfs excluding zero/undef lanes
+// * Vector splat address w/known mask -> scalar load
+// * Vector incrementing address -> vector masked load
+Instruction *InstCombiner::simplifyMaskedGather(IntrinsicInst &II) {
+ return nullptr;
+}
+
+// TODO, Obvious Missing Transforms:
+// * Single constant active lane -> store
+// * Adjacent vector addresses -> masked.store
+// * Narrow store width by halfs excluding zero/undef lanes
+// * Vector splat address w/known mask -> scalar store
+// * Vector incrementing address -> vector masked store
+Instruction *InstCombiner::simplifyMaskedScatter(IntrinsicInst &II) {
+ auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(3));
+ if (!ConstMask)
+ return nullptr;
+
+ // If the mask is all zeros, a scatter does nothing.
+ if (ConstMask->isNullValue())
+ return eraseInstFromFunction(II);
+
+ // Use masked off lanes to simplify operands via SimplifyDemandedVectorElts
+ APInt DemandedElts = possiblyDemandedEltsInMask(ConstMask);
+ APInt UndefElts(DemandedElts.getBitWidth(), 0);
+ if (Value *V = SimplifyDemandedVectorElts(II.getOperand(0),
+ DemandedElts, UndefElts)) {
+ II.setOperand(0, V);
+ return &II;
+ }
+ if (Value *V = SimplifyDemandedVectorElts(II.getOperand(1),
+ DemandedElts, UndefElts)) {
+ II.setOperand(1, V);
+ return &II;
+ }
return nullptr;
}
@@ -1264,25 +1179,41 @@ static Instruction *simplifyInvariantGroupIntrinsic(IntrinsicInst &II,
return cast<Instruction>(Result);
}
-static Instruction *simplifyMaskedScatter(IntrinsicInst &II, InstCombiner &IC) {
- // If the mask is all zeros, a scatter does nothing.
- auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(3));
- if (ConstMask && ConstMask->isNullValue())
- return IC.eraseInstFromFunction(II);
-
- return nullptr;
-}
-
static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombiner &IC) {
assert((II.getIntrinsicID() == Intrinsic::cttz ||
II.getIntrinsicID() == Intrinsic::ctlz) &&
"Expected cttz or ctlz intrinsic");
+ bool IsTZ = II.getIntrinsicID() == Intrinsic::cttz;
Value *Op0 = II.getArgOperand(0);
+ Value *X;
+ // ctlz(bitreverse(x)) -> cttz(x)
+ // cttz(bitreverse(x)) -> ctlz(x)
+ if (match(Op0, m_BitReverse(m_Value(X)))) {
+ Intrinsic::ID ID = IsTZ ? Intrinsic::ctlz : Intrinsic::cttz;
+ Function *F = Intrinsic::getDeclaration(II.getModule(), ID, II.getType());
+ return CallInst::Create(F, {X, II.getArgOperand(1)});
+ }
+
+ if (IsTZ) {
+ // cttz(-x) -> cttz(x)
+ if (match(Op0, m_Neg(m_Value(X)))) {
+ II.setOperand(0, X);
+ return &II;
+ }
+
+ // cttz(abs(x)) -> cttz(x)
+ // cttz(nabs(x)) -> cttz(x)
+ Value *Y;
+ SelectPatternFlavor SPF = matchSelectPattern(Op0, X, Y).Flavor;
+ if (SPF == SPF_ABS || SPF == SPF_NABS) {
+ II.setOperand(0, X);
+ return &II;
+ }
+ }
KnownBits Known = IC.computeKnownBits(Op0, 0, &II);
// Create a mask for bits above (ctlz) or below (cttz) the first known one.
- bool IsTZ = II.getIntrinsicID() == Intrinsic::cttz;
unsigned PossibleZeros = IsTZ ? Known.countMaxTrailingZeros()
: Known.countMaxLeadingZeros();
unsigned DefiniteZeros = IsTZ ? Known.countMinTrailingZeros()
@@ -1328,6 +1259,14 @@ static Instruction *foldCtpop(IntrinsicInst &II, InstCombiner &IC) {
assert(II.getIntrinsicID() == Intrinsic::ctpop &&
"Expected ctpop intrinsic");
Value *Op0 = II.getArgOperand(0);
+ Value *X;
+ // ctpop(bitreverse(x)) -> ctpop(x)
+ // ctpop(bswap(x)) -> ctpop(x)
+ if (match(Op0, m_BitReverse(m_Value(X))) || match(Op0, m_BSwap(m_Value(X)))) {
+ II.setOperand(0, X);
+ return &II;
+ }
+
// FIXME: Try to simplify vectors of integers.
auto *IT = dyn_cast<IntegerType>(Op0->getType());
if (!IT)
@@ -1513,7 +1452,7 @@ static Value *simplifyNeonVld1(const IntrinsicInst &II,
auto *BCastInst = Builder.CreateBitCast(II.getArgOperand(0),
PointerType::get(II.getType(), 0));
- return Builder.CreateAlignedLoad(BCastInst, Alignment);
+ return Builder.CreateAlignedLoad(II.getType(), BCastInst, Alignment);
}
// Returns true iff the 2 intrinsics have the same operands, limiting the
@@ -1827,8 +1766,18 @@ static Instruction *canonicalizeConstantArg0ToArg1(CallInst &Call) {
return nullptr;
}
+Instruction *InstCombiner::foldIntrinsicWithOverflowCommon(IntrinsicInst *II) {
+ WithOverflowInst *WO = cast<WithOverflowInst>(II);
+ Value *OperationResult = nullptr;
+ Constant *OverflowResult = nullptr;
+ if (OptimizeOverflowCheck(WO->getBinaryOp(), WO->isSigned(), WO->getLHS(),
+ WO->getRHS(), *WO, OperationResult, OverflowResult))
+ return CreateOverflowTuple(WO, OperationResult, OverflowResult);
+ return nullptr;
+}
+
/// CallInst simplification. This mostly only handles folding of intrinsic
-/// instructions. For normal calls, it allows visitCallSite to do the heavy
+/// instructions. For normal calls, it allows visitCallBase to do the heavy
/// lifting.
Instruction *InstCombiner::visitCallInst(CallInst &CI) {
if (Value *V = SimplifyCall(&CI, SQ.getWithInstruction(&CI)))
@@ -1845,10 +1794,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
}
IntrinsicInst *II = dyn_cast<IntrinsicInst>(&CI);
- if (!II) return visitCallSite(&CI);
+ if (!II) return visitCallBase(CI);
- // Intrinsics cannot occur in an invoke, so handle them here instead of in
- // visitCallSite.
+ // Intrinsics cannot occur in an invoke or a callbr, so handle them here
+ // instead of in visitCallBase.
if (auto *MI = dyn_cast<AnyMemIntrinsic>(II)) {
bool Changed = false;
@@ -1908,6 +1857,18 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
if (Changed) return II;
}
+ // For vector result intrinsics, use the generic demanded vector support.
+ if (II->getType()->isVectorTy()) {
+ auto VWidth = II->getType()->getVectorNumElements();
+ APInt UndefElts(VWidth, 0);
+ APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
+ if (Value *V = SimplifyDemandedVectorElts(II, AllOnesEltMask, UndefElts)) {
+ if (V != II)
+ return replaceInstUsesWith(*II, V);
+ return II;
+ }
+ }
+
if (Instruction *I = SimplifyNVVMIntrinsic(II, *this))
return I;
@@ -1918,12 +1879,12 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
return SimplifyDemandedVectorElts(Op, DemandedElts, UndefElts);
};
- switch (II->getIntrinsicID()) {
+ Intrinsic::ID IID = II->getIntrinsicID();
+ switch (IID) {
default: break;
case Intrinsic::objectsize:
- if (ConstantInt *N =
- lowerObjectSizeCall(II, DL, &TLI, /*MustSucceed=*/false))
- return replaceInstUsesWith(CI, N);
+ if (Value *V = lowerObjectSizeCall(II, DL, &TLI, /*MustSucceed=*/false))
+ return replaceInstUsesWith(CI, V);
return nullptr;
case Intrinsic::bswap: {
Value *IIOperand = II->getArgOperand(0);
@@ -1940,15 +1901,15 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
break;
}
case Intrinsic::masked_load:
- if (Value *SimplifiedMaskedOp = simplifyMaskedLoad(*II, Builder))
+ if (Value *SimplifiedMaskedOp = simplifyMaskedLoad(*II))
return replaceInstUsesWith(CI, SimplifiedMaskedOp);
break;
case Intrinsic::masked_store:
- return simplifyMaskedStore(*II, *this);
+ return simplifyMaskedStore(*II);
case Intrinsic::masked_gather:
- return simplifyMaskedGather(*II, *this);
+ return simplifyMaskedGather(*II);
case Intrinsic::masked_scatter:
- return simplifyMaskedScatter(*II, *this);
+ return simplifyMaskedScatter(*II);
case Intrinsic::launder_invariant_group:
case Intrinsic::strip_invariant_group:
if (auto *SkippedBarrier = simplifyInvariantGroupIntrinsic(*II, *this))
@@ -1982,33 +1943,62 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::fshl:
case Intrinsic::fshr: {
- const APInt *SA;
- if (match(II->getArgOperand(2), m_APInt(SA))) {
- Value *Op0 = II->getArgOperand(0), *Op1 = II->getArgOperand(1);
- unsigned BitWidth = SA->getBitWidth();
- uint64_t ShiftAmt = SA->urem(BitWidth);
- assert(ShiftAmt != 0 && "SimplifyCall should have handled zero shift");
- // Normalize to funnel shift left.
- if (II->getIntrinsicID() == Intrinsic::fshr)
- ShiftAmt = BitWidth - ShiftAmt;
-
- // fshl(X, 0, C) -> shl X, C
- // fshl(X, undef, C) -> shl X, C
- if (match(Op1, m_Zero()) || match(Op1, m_Undef()))
- return BinaryOperator::CreateShl(
- Op0, ConstantInt::get(II->getType(), ShiftAmt));
-
- // fshl(0, X, C) -> lshr X, (BW-C)
- // fshl(undef, X, C) -> lshr X, (BW-C)
- if (match(Op0, m_Zero()) || match(Op0, m_Undef()))
- return BinaryOperator::CreateLShr(
- Op1, ConstantInt::get(II->getType(), BitWidth - ShiftAmt));
+ Value *Op0 = II->getArgOperand(0), *Op1 = II->getArgOperand(1);
+ Type *Ty = II->getType();
+ unsigned BitWidth = Ty->getScalarSizeInBits();
+ Constant *ShAmtC;
+ if (match(II->getArgOperand(2), m_Constant(ShAmtC)) &&
+ !isa<ConstantExpr>(ShAmtC) && !ShAmtC->containsConstantExpression()) {
+ // Canonicalize a shift amount constant operand to modulo the bit-width.
+ Constant *WidthC = ConstantInt::get(Ty, BitWidth);
+ Constant *ModuloC = ConstantExpr::getURem(ShAmtC, WidthC);
+ if (ModuloC != ShAmtC) {
+ II->setArgOperand(2, ModuloC);
+ return II;
+ }
+ assert(ConstantExpr::getICmp(ICmpInst::ICMP_UGT, WidthC, ShAmtC) ==
+ ConstantInt::getTrue(CmpInst::makeCmpResultType(Ty)) &&
+ "Shift amount expected to be modulo bitwidth");
+
+ // Canonicalize funnel shift right by constant to funnel shift left. This
+ // is not entirely arbitrary. For historical reasons, the backend may
+ // recognize rotate left patterns but miss rotate right patterns.
+ if (IID == Intrinsic::fshr) {
+ // fshr X, Y, C --> fshl X, Y, (BitWidth - C)
+ Constant *LeftShiftC = ConstantExpr::getSub(WidthC, ShAmtC);
+ Module *Mod = II->getModule();
+ Function *Fshl = Intrinsic::getDeclaration(Mod, Intrinsic::fshl, Ty);
+ return CallInst::Create(Fshl, { Op0, Op1, LeftShiftC });
+ }
+ assert(IID == Intrinsic::fshl &&
+ "All funnel shifts by simple constants should go left");
+
+ // fshl(X, 0, C) --> shl X, C
+ // fshl(X, undef, C) --> shl X, C
+ if (match(Op1, m_ZeroInt()) || match(Op1, m_Undef()))
+ return BinaryOperator::CreateShl(Op0, ShAmtC);
+
+ // fshl(0, X, C) --> lshr X, (BW-C)
+ // fshl(undef, X, C) --> lshr X, (BW-C)
+ if (match(Op0, m_ZeroInt()) || match(Op0, m_Undef()))
+ return BinaryOperator::CreateLShr(Op1,
+ ConstantExpr::getSub(WidthC, ShAmtC));
+
+ // fshl i16 X, X, 8 --> bswap i16 X (reduce to more-specific form)
+ if (Op0 == Op1 && BitWidth == 16 && match(ShAmtC, m_SpecificInt(8))) {
+ Module *Mod = II->getModule();
+ Function *Bswap = Intrinsic::getDeclaration(Mod, Intrinsic::bswap, Ty);
+ return CallInst::Create(Bswap, { Op0 });
+ }
}
+ // Left or right might be masked.
+ if (SimplifyDemandedInstructionBits(*II))
+ return &CI;
+
// The shift amount (operand 2) of a funnel shift is modulo the bitwidth,
// so only the low bits of the shift amount are demanded if the bitwidth is
// a power-of-2.
- unsigned BitWidth = II->getType()->getScalarSizeInBits();
if (!isPowerOf2_32(BitWidth))
break;
APInt Op2Demanded = APInt::getLowBitsSet(BitWidth, Log2_32_Ceil(BitWidth));
@@ -2018,7 +2008,34 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
break;
}
case Intrinsic::uadd_with_overflow:
- case Intrinsic::sadd_with_overflow:
+ case Intrinsic::sadd_with_overflow: {
+ if (Instruction *I = canonicalizeConstantArg0ToArg1(CI))
+ return I;
+ if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
+ return I;
+
+ // Given 2 constant operands whose sum does not overflow:
+ // uaddo (X +nuw C0), C1 -> uaddo X, C0 + C1
+ // saddo (X +nsw C0), C1 -> saddo X, C0 + C1
+ Value *X;
+ const APInt *C0, *C1;
+ Value *Arg0 = II->getArgOperand(0);
+ Value *Arg1 = II->getArgOperand(1);
+ bool IsSigned = IID == Intrinsic::sadd_with_overflow;
+ bool HasNWAdd = IsSigned ? match(Arg0, m_NSWAdd(m_Value(X), m_APInt(C0)))
+ : match(Arg0, m_NUWAdd(m_Value(X), m_APInt(C0)));
+ if (HasNWAdd && match(Arg1, m_APInt(C1))) {
+ bool Overflow;
+ APInt NewC =
+ IsSigned ? C1->sadd_ov(*C0, Overflow) : C1->uadd_ov(*C0, Overflow);
+ if (!Overflow)
+ return replaceInstUsesWith(
+ *II, Builder.CreateBinaryIntrinsic(
+ IID, X, ConstantInt::get(Arg1->getType(), NewC)));
+ }
+ break;
+ }
+
case Intrinsic::umul_with_overflow:
case Intrinsic::smul_with_overflow:
if (Instruction *I = canonicalizeConstantArg0ToArg1(CI))
@@ -2026,16 +2043,29 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
LLVM_FALLTHROUGH;
case Intrinsic::usub_with_overflow:
+ if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
+ return I;
+ break;
+
case Intrinsic::ssub_with_overflow: {
- OverflowCheckFlavor OCF =
- IntrinsicIDToOverflowCheckFlavor(II->getIntrinsicID());
- assert(OCF != OCF_INVALID && "unexpected!");
+ if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
+ return I;
- Value *OperationResult = nullptr;
- Constant *OverflowResult = nullptr;
- if (OptimizeOverflowCheck(OCF, II->getArgOperand(0), II->getArgOperand(1),
- *II, OperationResult, OverflowResult))
- return CreateOverflowTuple(II, OperationResult, OverflowResult);
+ Constant *C;
+ Value *Arg0 = II->getArgOperand(0);
+ Value *Arg1 = II->getArgOperand(1);
+ // Given a constant C that is not the minimum signed value
+ // for an integer of a given bit width:
+ //
+ // ssubo X, C -> saddo X, -C
+ if (match(Arg1, m_Constant(C)) && C->isNotMinSignedValue()) {
+ Value *NegVal = ConstantExpr::getNeg(C);
+ // Build a saddo call that is equivalent to the discovered
+ // ssubo call.
+ return replaceInstUsesWith(
+ *II, Builder.CreateBinaryIntrinsic(Intrinsic::sadd_with_overflow,
+ Arg0, NegVal));
+ }
break;
}
@@ -2047,39 +2077,32 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
LLVM_FALLTHROUGH;
case Intrinsic::usub_sat:
case Intrinsic::ssub_sat: {
- Value *Arg0 = II->getArgOperand(0);
- Value *Arg1 = II->getArgOperand(1);
- Intrinsic::ID IID = II->getIntrinsicID();
+ SaturatingInst *SI = cast<SaturatingInst>(II);
+ Type *Ty = SI->getType();
+ Value *Arg0 = SI->getLHS();
+ Value *Arg1 = SI->getRHS();
// Make use of known overflow information.
- OverflowResult OR;
- switch (IID) {
- default:
- llvm_unreachable("Unexpected intrinsic!");
- case Intrinsic::uadd_sat:
- OR = computeOverflowForUnsignedAdd(Arg0, Arg1, II);
- if (OR == OverflowResult::NeverOverflows)
- return BinaryOperator::CreateNUWAdd(Arg0, Arg1);
- if (OR == OverflowResult::AlwaysOverflows)
- return replaceInstUsesWith(*II,
- ConstantInt::getAllOnesValue(II->getType()));
- break;
- case Intrinsic::usub_sat:
- OR = computeOverflowForUnsignedSub(Arg0, Arg1, II);
- if (OR == OverflowResult::NeverOverflows)
- return BinaryOperator::CreateNUWSub(Arg0, Arg1);
- if (OR == OverflowResult::AlwaysOverflows)
- return replaceInstUsesWith(*II,
- ConstantInt::getNullValue(II->getType()));
- break;
- case Intrinsic::sadd_sat:
- if (willNotOverflowSignedAdd(Arg0, Arg1, *II))
- return BinaryOperator::CreateNSWAdd(Arg0, Arg1);
- break;
- case Intrinsic::ssub_sat:
- if (willNotOverflowSignedSub(Arg0, Arg1, *II))
- return BinaryOperator::CreateNSWSub(Arg0, Arg1);
- break;
+ OverflowResult OR = computeOverflow(SI->getBinaryOp(), SI->isSigned(),
+ Arg0, Arg1, SI);
+ switch (OR) {
+ case OverflowResult::MayOverflow:
+ break;
+ case OverflowResult::NeverOverflows:
+ if (SI->isSigned())
+ return BinaryOperator::CreateNSW(SI->getBinaryOp(), Arg0, Arg1);
+ else
+ return BinaryOperator::CreateNUW(SI->getBinaryOp(), Arg0, Arg1);
+ case OverflowResult::AlwaysOverflowsLow: {
+ unsigned BitWidth = Ty->getScalarSizeInBits();
+ APInt Min = APSInt::getMinValue(BitWidth, !SI->isSigned());
+ return replaceInstUsesWith(*SI, ConstantInt::get(Ty, Min));
+ }
+ case OverflowResult::AlwaysOverflowsHigh: {
+ unsigned BitWidth = Ty->getScalarSizeInBits();
+ APInt Max = APSInt::getMaxValue(BitWidth, !SI->isSigned());
+ return replaceInstUsesWith(*SI, ConstantInt::get(Ty, Max));
+ }
}
// ssub.sat(X, C) -> sadd.sat(X, -C) if C != MIN
@@ -2101,7 +2124,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
APInt NewVal;
bool IsUnsigned =
IID == Intrinsic::uadd_sat || IID == Intrinsic::usub_sat;
- if (Other->getIntrinsicID() == II->getIntrinsicID() &&
+ if (Other->getIntrinsicID() == IID &&
match(Arg1, m_APInt(Val)) &&
match(Other->getArgOperand(0), m_Value(X)) &&
match(Other->getArgOperand(1), m_APInt(Val2))) {
@@ -2136,7 +2159,6 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
return I;
Value *Arg0 = II->getArgOperand(0);
Value *Arg1 = II->getArgOperand(1);
- Intrinsic::ID IID = II->getIntrinsicID();
Value *X, *Y;
if (match(Arg0, m_FNeg(m_Value(X))) && match(Arg1, m_FNeg(m_Value(Y))) &&
(Arg0->hasOneUse() || Arg1->hasOneUse())) {
@@ -2266,8 +2288,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
Value *ExtSrc;
if (match(II->getArgOperand(0), m_OneUse(m_FPExt(m_Value(ExtSrc))))) {
// Narrow the call: intrinsic (fpext x) -> fpext (intrinsic x)
- Value *NarrowII =
- Builder.CreateUnaryIntrinsic(II->getIntrinsicID(), ExtSrc, II);
+ Value *NarrowII = Builder.CreateUnaryIntrinsic(IID, ExtSrc, II);
return new FPExtInst(NarrowII, II->getType());
}
break;
@@ -2302,7 +2323,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
&DT) >= 16) {
Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0),
PointerType::getUnqual(II->getType()));
- return new LoadInst(Ptr);
+ return new LoadInst(II->getType(), Ptr);
}
break;
case Intrinsic::ppc_vsx_lxvw4x:
@@ -2310,7 +2331,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
// Turn PPC VSX loads into normal loads.
Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0),
PointerType::getUnqual(II->getType()));
- return new LoadInst(Ptr, Twine(""), false, 1);
+ return new LoadInst(II->getType(), Ptr, Twine(""), false, 1);
}
case Intrinsic::ppc_altivec_stvx:
case Intrinsic::ppc_altivec_stvxl:
@@ -2338,7 +2359,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
II->getType()->getVectorNumElements());
Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0),
PointerType::getUnqual(VTy));
- Value *Load = Builder.CreateLoad(Ptr);
+ Value *Load = Builder.CreateLoad(VTy, Ptr);
return new FPExtInst(Load, II->getType());
}
break;
@@ -2348,7 +2369,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
&DT) >= 32) {
Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0),
PointerType::getUnqual(II->getType()));
- return new LoadInst(Ptr);
+ return new LoadInst(II->getType(), Ptr);
}
break;
case Intrinsic::ppc_qpx_qvstfs:
@@ -2499,22 +2520,6 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
break;
}
- case Intrinsic::x86_sse41_round_ps:
- case Intrinsic::x86_sse41_round_pd:
- case Intrinsic::x86_avx_round_ps_256:
- case Intrinsic::x86_avx_round_pd_256:
- case Intrinsic::x86_avx512_mask_rndscale_ps_128:
- case Intrinsic::x86_avx512_mask_rndscale_ps_256:
- case Intrinsic::x86_avx512_mask_rndscale_ps_512:
- case Intrinsic::x86_avx512_mask_rndscale_pd_128:
- case Intrinsic::x86_avx512_mask_rndscale_pd_256:
- case Intrinsic::x86_avx512_mask_rndscale_pd_512:
- case Intrinsic::x86_avx512_mask_rndscale_ss:
- case Intrinsic::x86_avx512_mask_rndscale_sd:
- if (Value *V = simplifyX86round(*II, Builder))
- return replaceInstUsesWith(*II, V);
- break;
-
case Intrinsic::x86_mmx_pmovmskb:
case Intrinsic::x86_sse_movmsk_ps:
case Intrinsic::x86_sse2_movmsk_pd:
@@ -2620,7 +2625,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
Value *Arg1 = II->getArgOperand(1);
Value *V;
- switch (II->getIntrinsicID()) {
+ switch (IID) {
default: llvm_unreachable("Case stmts out of sync!");
case Intrinsic::x86_avx512_add_ps_512:
case Intrinsic::x86_avx512_add_pd_512:
@@ -2664,7 +2669,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
Value *RHS = Builder.CreateExtractElement(Arg1, (uint64_t)0);
Value *V;
- switch (II->getIntrinsicID()) {
+ switch (IID) {
default: llvm_unreachable("Case stmts out of sync!");
case Intrinsic::x86_avx512_mask_add_ss_round:
case Intrinsic::x86_avx512_mask_add_sd_round:
@@ -2706,44 +2711,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
return replaceInstUsesWith(*II, V);
}
}
- LLVM_FALLTHROUGH;
-
- // X86 scalar intrinsics simplified with SimplifyDemandedVectorElts.
- case Intrinsic::x86_avx512_mask_max_ss_round:
- case Intrinsic::x86_avx512_mask_min_ss_round:
- case Intrinsic::x86_avx512_mask_max_sd_round:
- case Intrinsic::x86_avx512_mask_min_sd_round:
- case Intrinsic::x86_sse_cmp_ss:
- case Intrinsic::x86_sse_min_ss:
- case Intrinsic::x86_sse_max_ss:
- case Intrinsic::x86_sse2_cmp_sd:
- case Intrinsic::x86_sse2_min_sd:
- case Intrinsic::x86_sse2_max_sd:
- case Intrinsic::x86_xop_vfrcz_ss:
- case Intrinsic::x86_xop_vfrcz_sd: {
- unsigned VWidth = II->getType()->getVectorNumElements();
- APInt UndefElts(VWidth, 0);
- APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
- if (Value *V = SimplifyDemandedVectorElts(II, AllOnesEltMask, UndefElts)) {
- if (V != II)
- return replaceInstUsesWith(*II, V);
- return II;
- }
- break;
- }
- case Intrinsic::x86_sse41_round_ss:
- case Intrinsic::x86_sse41_round_sd: {
- unsigned VWidth = II->getType()->getVectorNumElements();
- APInt UndefElts(VWidth, 0);
- APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
- if (Value *V = SimplifyDemandedVectorElts(II, AllOnesEltMask, UndefElts)) {
- if (V != II)
- return replaceInstUsesWith(*II, V);
- return II;
- } else if (Value *V = simplifyX86round(*II, Builder))
- return replaceInstUsesWith(*II, V);
break;
- }
// Constant fold ashr( <A x Bi>, Ci ).
// Constant fold lshr( <A x Bi>, Ci ).
@@ -2860,7 +2828,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::x86_avx2_packsswb:
case Intrinsic::x86_avx512_packssdw_512:
case Intrinsic::x86_avx512_packsswb_512:
- if (Value *V = simplifyX86pack(*II, true))
+ if (Value *V = simplifyX86pack(*II, Builder, true))
return replaceInstUsesWith(*II, V);
break;
@@ -2870,7 +2838,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::x86_avx2_packuswb:
case Intrinsic::x86_avx512_packusdw_512:
case Intrinsic::x86_avx512_packuswb_512:
- if (Value *V = simplifyX86pack(*II, false))
+ if (Value *V = simplifyX86pack(*II, Builder, false))
return replaceInstUsesWith(*II, V);
break;
@@ -3168,19 +3136,9 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
return nullptr;
break;
- case Intrinsic::x86_xop_vpcomb:
- case Intrinsic::x86_xop_vpcomd:
- case Intrinsic::x86_xop_vpcomq:
- case Intrinsic::x86_xop_vpcomw:
- if (Value *V = simplifyX86vpcom(*II, Builder, true))
- return replaceInstUsesWith(*II, V);
- break;
-
- case Intrinsic::x86_xop_vpcomub:
- case Intrinsic::x86_xop_vpcomud:
- case Intrinsic::x86_xop_vpcomuq:
- case Intrinsic::x86_xop_vpcomuw:
- if (Value *V = simplifyX86vpcom(*II, Builder, false))
+ case Intrinsic::x86_addcarry_32:
+ case Intrinsic::x86_addcarry_64:
+ if (Value *V = simplifyX86addcarry(*II, Builder))
return replaceInstUsesWith(*II, V);
break;
@@ -3296,8 +3254,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
}
// Check for constant LHS & RHS - in this case we just simplify.
- bool Zext = (II->getIntrinsicID() == Intrinsic::arm_neon_vmullu ||
- II->getIntrinsicID() == Intrinsic::aarch64_neon_umull);
+ bool Zext = (IID == Intrinsic::arm_neon_vmullu ||
+ IID == Intrinsic::aarch64_neon_umull);
VectorType *NewVT = cast<VectorType>(II->getType());
if (Constant *CV0 = dyn_cast<Constant>(Arg0)) {
if (Constant *CV1 = dyn_cast<Constant>(Arg1)) {
@@ -3374,7 +3332,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
APFloat Significand = frexp(C->getValueAPF(), Exp,
APFloat::rmNearestTiesToEven);
- if (II->getIntrinsicID() == Intrinsic::amdgcn_frexp_mant) {
+ if (IID == Intrinsic::amdgcn_frexp_mant) {
return replaceInstUsesWith(CI, ConstantFP::get(II->getContext(),
Significand));
}
@@ -3559,7 +3517,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
}
}
- bool Signed = II->getIntrinsicID() == Intrinsic::amdgcn_sbfe;
+ bool Signed = IID == Intrinsic::amdgcn_sbfe;
if (!CWidth || !COffset)
break;
@@ -3587,15 +3545,12 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
}
case Intrinsic::amdgcn_exp:
case Intrinsic::amdgcn_exp_compr: {
- ConstantInt *En = dyn_cast<ConstantInt>(II->getArgOperand(1));
- if (!En) // Illegal.
- break;
-
+ ConstantInt *En = cast<ConstantInt>(II->getArgOperand(1));
unsigned EnBits = En->getZExtValue();
if (EnBits == 0xf)
break; // All inputs enabled.
- bool IsCompr = II->getIntrinsicID() == Intrinsic::amdgcn_exp_compr;
+ bool IsCompr = IID == Intrinsic::amdgcn_exp_compr;
bool Changed = false;
for (int I = 0; I < (IsCompr ? 2 : 4); ++I) {
if ((!IsCompr && (EnBits & (1 << I)) == 0) ||
@@ -3680,13 +3635,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
}
case Intrinsic::amdgcn_icmp:
case Intrinsic::amdgcn_fcmp: {
- const ConstantInt *CC = dyn_cast<ConstantInt>(II->getArgOperand(2));
- if (!CC)
- break;
-
+ const ConstantInt *CC = cast<ConstantInt>(II->getArgOperand(2));
// Guard against invalid arguments.
int64_t CCVal = CC->getZExtValue();
- bool IsInteger = II->getIntrinsicID() == Intrinsic::amdgcn_icmp;
+ bool IsInteger = IID == Intrinsic::amdgcn_icmp;
if ((IsInteger && (CCVal < CmpInst::FIRST_ICMP_PREDICATE ||
CCVal > CmpInst::LAST_ICMP_PREDICATE)) ||
(!IsInteger && (CCVal < CmpInst::FIRST_FCMP_PREDICATE ||
@@ -3709,7 +3661,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
// register (which contains the bitmask of live threads). So a
// comparison that always returns true is the same as a read of the
// EXEC register.
- Value *NewF = Intrinsic::getDeclaration(
+ Function *NewF = Intrinsic::getDeclaration(
II->getModule(), Intrinsic::read_register, II->getType());
Metadata *MDArgs[] = {MDString::get(II->getContext(), "exec")};
MDNode *MD = MDNode::get(II->getContext(), MDArgs);
@@ -3804,8 +3756,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
} else if (!Ty->isFloatTy() && !Ty->isDoubleTy() && !Ty->isHalfTy())
break;
- Value *NewF = Intrinsic::getDeclaration(II->getModule(), NewIID,
- SrcLHS->getType());
+ Function *NewF =
+ Intrinsic::getDeclaration(II->getModule(), NewIID,
+ { II->getType(),
+ SrcLHS->getType() });
Value *Args[] = { SrcLHS, SrcRHS,
ConstantInt::get(CC->getType(), SrcPred) };
CallInst *NewCall = Builder.CreateCall(NewF, Args);
@@ -3833,11 +3787,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::amdgcn_update_dpp: {
Value *Old = II->getArgOperand(0);
- auto BC = dyn_cast<ConstantInt>(II->getArgOperand(5));
- auto RM = dyn_cast<ConstantInt>(II->getArgOperand(3));
- auto BM = dyn_cast<ConstantInt>(II->getArgOperand(4));
- if (!BC || !RM || !BM ||
- BC->isZeroValue() ||
+ auto BC = cast<ConstantInt>(II->getArgOperand(5));
+ auto RM = cast<ConstantInt>(II->getArgOperand(3));
+ auto BM = cast<ConstantInt>(II->getArgOperand(4));
+ if (BC->isZeroValue() ||
RM->getZExtValue() != 0xF ||
BM->getZExtValue() != 0xF ||
isa<UndefValue>(Old))
@@ -3847,6 +3800,37 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
II->setOperand(0, UndefValue::get(Old->getType()));
return II;
}
+ case Intrinsic::amdgcn_readfirstlane:
+ case Intrinsic::amdgcn_readlane: {
+ // A constant value is trivially uniform.
+ if (Constant *C = dyn_cast<Constant>(II->getArgOperand(0)))
+ return replaceInstUsesWith(*II, C);
+
+ // The rest of these may not be safe if the exec may not be the same between
+ // the def and use.
+ Value *Src = II->getArgOperand(0);
+ Instruction *SrcInst = dyn_cast<Instruction>(Src);
+ if (SrcInst && SrcInst->getParent() != II->getParent())
+ break;
+
+ // readfirstlane (readfirstlane x) -> readfirstlane x
+ // readlane (readfirstlane x), y -> readfirstlane x
+ if (match(Src, m_Intrinsic<Intrinsic::amdgcn_readfirstlane>()))
+ return replaceInstUsesWith(*II, Src);
+
+ if (IID == Intrinsic::amdgcn_readfirstlane) {
+ // readfirstlane (readlane x, y) -> readlane x, y
+ if (match(Src, m_Intrinsic<Intrinsic::amdgcn_readlane>()))
+ return replaceInstUsesWith(*II, Src);
+ } else {
+ // readlane (readlane x, y), y -> readlane x, y
+ if (match(Src, m_Intrinsic<Intrinsic::amdgcn_readlane>(
+ m_Value(), m_Specific(II->getArgOperand(1)))))
+ return replaceInstUsesWith(*II, Src);
+ }
+
+ break;
+ }
case Intrinsic::stackrestore: {
// If the save is right next to the restore, remove the restore. This can
// happen when variable allocas are DCE'd.
@@ -3870,14 +3854,14 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
break;
}
if (CallInst *BCI = dyn_cast<CallInst>(BI)) {
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(BCI)) {
+ if (auto *II2 = dyn_cast<IntrinsicInst>(BCI)) {
// If there is a stackrestore below this one, remove this one.
- if (II->getIntrinsicID() == Intrinsic::stackrestore)
+ if (II2->getIntrinsicID() == Intrinsic::stackrestore)
return eraseInstFromFunction(CI);
// Bail if we cross over an intrinsic with side effects, such as
// llvm.stacksave, llvm.read_register, or llvm.setjmp.
- if (II->mayHaveSideEffects()) {
+ if (II2->mayHaveSideEffects()) {
CannotRemove = true;
break;
}
@@ -3920,16 +3904,20 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
// Canonicalize assume(a && b) -> assume(a); assume(b);
// Note: New assumption intrinsics created here are registered by
// the InstCombineIRInserter object.
- Value *AssumeIntrinsic = II->getCalledValue(), *A, *B;
+ FunctionType *AssumeIntrinsicTy = II->getFunctionType();
+ Value *AssumeIntrinsic = II->getCalledValue();
+ Value *A, *B;
if (match(IIOperand, m_And(m_Value(A), m_Value(B)))) {
- Builder.CreateCall(AssumeIntrinsic, A, II->getName());
- Builder.CreateCall(AssumeIntrinsic, B, II->getName());
+ Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic, A, II->getName());
+ Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic, B, II->getName());
return eraseInstFromFunction(*II);
}
// assume(!(a || b)) -> assume(!a); assume(!b);
if (match(IIOperand, m_Not(m_Or(m_Value(A), m_Value(B))))) {
- Builder.CreateCall(AssumeIntrinsic, Builder.CreateNot(A), II->getName());
- Builder.CreateCall(AssumeIntrinsic, Builder.CreateNot(B), II->getName());
+ Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic,
+ Builder.CreateNot(A), II->getName());
+ Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic,
+ Builder.CreateNot(B), II->getName());
return eraseInstFromFunction(*II);
}
@@ -4036,7 +4024,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
break;
}
}
- return visitCallSite(II);
+ return visitCallBase(*II);
}
// Fence instruction simplification
@@ -4051,12 +4039,17 @@ Instruction *InstCombiner::visitFenceInst(FenceInst &FI) {
// InvokeInst simplification
Instruction *InstCombiner::visitInvokeInst(InvokeInst &II) {
- return visitCallSite(&II);
+ return visitCallBase(II);
+}
+
+// CallBrInst simplification
+Instruction *InstCombiner::visitCallBrInst(CallBrInst &CBI) {
+ return visitCallBase(CBI);
}
/// If this cast does not affect the value passed through the varargs area, we
/// can eliminate the use of the cast.
-static bool isSafeToEliminateVarargsCast(const CallSite CS,
+static bool isSafeToEliminateVarargsCast(const CallBase &Call,
const DataLayout &DL,
const CastInst *const CI,
const int ix) {
@@ -4068,18 +4061,20 @@ static bool isSafeToEliminateVarargsCast(const CallSite CS,
// TODO: This is probably something which should be expanded to all
// intrinsics since the entire point of intrinsics is that
// they are understandable by the optimizer.
- if (isStatepoint(CS) || isGCRelocate(CS) || isGCResult(CS))
+ if (isStatepoint(&Call) || isGCRelocate(&Call) || isGCResult(&Call))
return false;
// The size of ByVal or InAlloca arguments is derived from the type, so we
// can't change to a type with a different size. If the size were
// passed explicitly we could avoid this check.
- if (!CS.isByValOrInAllocaArgument(ix))
+ if (!Call.isByValOrInAllocaArgument(ix))
return true;
Type* SrcTy =
cast<PointerType>(CI->getOperand(0)->getType())->getElementType();
- Type* DstTy = cast<PointerType>(CI->getType())->getElementType();
+ Type *DstTy = Call.isByValArgument(ix)
+ ? Call.getParamByValType(ix)
+ : cast<PointerType>(CI->getType())->getElementType();
if (!SrcTy->isSized() || !DstTy->isSized())
return false;
if (DL.getTypeAllocSize(SrcTy) != DL.getTypeAllocSize(DstTy))
@@ -4096,7 +4091,7 @@ Instruction *InstCombiner::tryOptimizeCall(CallInst *CI) {
auto InstCombineErase = [this](Instruction *I) {
eraseInstFromFunction(*I);
};
- LibCallSimplifier Simplifier(DL, &TLI, ORE, InstCombineRAUW,
+ LibCallSimplifier Simplifier(DL, &TLI, ORE, BFI, PSI, InstCombineRAUW,
InstCombineErase);
if (Value *With = Simplifier.optimizeCall(CI)) {
++NumSimplified;
@@ -4182,10 +4177,10 @@ static IntrinsicInst *findInitTrampoline(Value *Callee) {
return nullptr;
}
-/// Improvements for call and invoke instructions.
-Instruction *InstCombiner::visitCallSite(CallSite CS) {
- if (isAllocLikeFn(CS.getInstruction(), &TLI))
- return visitAllocSite(*CS.getInstruction());
+/// Improvements for call, callbr and invoke instructions.
+Instruction *InstCombiner::visitCallBase(CallBase &Call) {
+ if (isAllocLikeFn(&Call, &TLI))
+ return visitAllocSite(Call);
bool Changed = false;
@@ -4195,52 +4190,50 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
SmallVector<unsigned, 4> ArgNos;
unsigned ArgNo = 0;
- for (Value *V : CS.args()) {
+ for (Value *V : Call.args()) {
if (V->getType()->isPointerTy() &&
- !CS.paramHasAttr(ArgNo, Attribute::NonNull) &&
- isKnownNonZero(V, DL, 0, &AC, CS.getInstruction(), &DT))
+ !Call.paramHasAttr(ArgNo, Attribute::NonNull) &&
+ isKnownNonZero(V, DL, 0, &AC, &Call, &DT))
ArgNos.push_back(ArgNo);
ArgNo++;
}
- assert(ArgNo == CS.arg_size() && "sanity check");
+ assert(ArgNo == Call.arg_size() && "sanity check");
if (!ArgNos.empty()) {
- AttributeList AS = CS.getAttributes();
- LLVMContext &Ctx = CS.getInstruction()->getContext();
+ AttributeList AS = Call.getAttributes();
+ LLVMContext &Ctx = Call.getContext();
AS = AS.addParamAttribute(Ctx, ArgNos,
Attribute::get(Ctx, Attribute::NonNull));
- CS.setAttributes(AS);
+ Call.setAttributes(AS);
Changed = true;
}
// If the callee is a pointer to a function, attempt to move any casts to the
- // arguments of the call/invoke.
- Value *Callee = CS.getCalledValue();
- if (!isa<Function>(Callee) && transformConstExprCastCall(CS))
+ // arguments of the call/callbr/invoke.
+ Value *Callee = Call.getCalledValue();
+ if (!isa<Function>(Callee) && transformConstExprCastCall(Call))
return nullptr;
if (Function *CalleeF = dyn_cast<Function>(Callee)) {
// Remove the convergent attr on calls when the callee is not convergent.
- if (CS.isConvergent() && !CalleeF->isConvergent() &&
+ if (Call.isConvergent() && !CalleeF->isConvergent() &&
!CalleeF->isIntrinsic()) {
- LLVM_DEBUG(dbgs() << "Removing convergent attr from instr "
- << CS.getInstruction() << "\n");
- CS.setNotConvergent();
- return CS.getInstruction();
+ LLVM_DEBUG(dbgs() << "Removing convergent attr from instr " << Call
+ << "\n");
+ Call.setNotConvergent();
+ return &Call;
}
// If the call and callee calling conventions don't match, this call must
// be unreachable, as the call is undefined.
- if (CalleeF->getCallingConv() != CS.getCallingConv() &&
+ if (CalleeF->getCallingConv() != Call.getCallingConv() &&
// Only do this for calls to a function with a body. A prototype may
// not actually end up matching the implementation's calling conv for a
// variety of reasons (e.g. it may be written in assembly).
!CalleeF->isDeclaration()) {
- Instruction *OldCall = CS.getInstruction();
- new StoreInst(ConstantInt::getTrue(Callee->getContext()),
- UndefValue::get(Type::getInt1PtrTy(Callee->getContext())),
- OldCall);
+ Instruction *OldCall = &Call;
+ CreateNonTerminatorUnreachable(OldCall);
// If OldCall does not return void then replaceAllUsesWith undef.
// This allows ValueHandlers and custom metadata to adjust itself.
if (!OldCall->getType()->isVoidTy())
@@ -4248,40 +4241,35 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
if (isa<CallInst>(OldCall))
return eraseInstFromFunction(*OldCall);
- // We cannot remove an invoke, because it would change the CFG, just
- // change the callee to a null pointer.
- cast<InvokeInst>(OldCall)->setCalledFunction(
- Constant::getNullValue(CalleeF->getType()));
+ // We cannot remove an invoke or a callbr, because it would change thexi
+ // CFG, just change the callee to a null pointer.
+ cast<CallBase>(OldCall)->setCalledFunction(
+ CalleeF->getFunctionType(),
+ Constant::getNullValue(CalleeF->getType()));
return nullptr;
}
}
if ((isa<ConstantPointerNull>(Callee) &&
- !NullPointerIsDefined(CS.getInstruction()->getFunction())) ||
+ !NullPointerIsDefined(Call.getFunction())) ||
isa<UndefValue>(Callee)) {
- // If CS does not return void then replaceAllUsesWith undef.
+ // If Call does not return void then replaceAllUsesWith undef.
// This allows ValueHandlers and custom metadata to adjust itself.
- if (!CS.getInstruction()->getType()->isVoidTy())
- replaceInstUsesWith(*CS.getInstruction(),
- UndefValue::get(CS.getInstruction()->getType()));
+ if (!Call.getType()->isVoidTy())
+ replaceInstUsesWith(Call, UndefValue::get(Call.getType()));
- if (isa<InvokeInst>(CS.getInstruction())) {
- // Can't remove an invoke because we cannot change the CFG.
+ if (Call.isTerminator()) {
+ // Can't remove an invoke or callbr because we cannot change the CFG.
return nullptr;
}
- // This instruction is not reachable, just remove it. We insert a store to
- // undef so that we know that this code is not reachable, despite the fact
- // that we can't modify the CFG here.
- new StoreInst(ConstantInt::getTrue(Callee->getContext()),
- UndefValue::get(Type::getInt1PtrTy(Callee->getContext())),
- CS.getInstruction());
-
- return eraseInstFromFunction(*CS.getInstruction());
+ // This instruction is not reachable, just remove it.
+ CreateNonTerminatorUnreachable(&Call);
+ return eraseInstFromFunction(Call);
}
if (IntrinsicInst *II = findInitTrampoline(Callee))
- return transformCallThroughTrampoline(CS, II);
+ return transformCallThroughTrampoline(Call, *II);
PointerType *PTy = cast<PointerType>(Callee->getType());
FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
@@ -4289,39 +4277,48 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
int ix = FTy->getNumParams();
// See if we can optimize any arguments passed through the varargs area of
// the call.
- for (CallSite::arg_iterator I = CS.arg_begin() + FTy->getNumParams(),
- E = CS.arg_end(); I != E; ++I, ++ix) {
+ for (auto I = Call.arg_begin() + FTy->getNumParams(), E = Call.arg_end();
+ I != E; ++I, ++ix) {
CastInst *CI = dyn_cast<CastInst>(*I);
- if (CI && isSafeToEliminateVarargsCast(CS, DL, CI, ix)) {
+ if (CI && isSafeToEliminateVarargsCast(Call, DL, CI, ix)) {
*I = CI->getOperand(0);
+
+ // Update the byval type to match the argument type.
+ if (Call.isByValArgument(ix)) {
+ Call.removeParamAttr(ix, Attribute::ByVal);
+ Call.addParamAttr(
+ ix, Attribute::getWithByValType(
+ Call.getContext(),
+ CI->getOperand(0)->getType()->getPointerElementType()));
+ }
Changed = true;
}
}
}
- if (isa<InlineAsm>(Callee) && !CS.doesNotThrow()) {
+ if (isa<InlineAsm>(Callee) && !Call.doesNotThrow()) {
// Inline asm calls cannot throw - mark them 'nounwind'.
- CS.setDoesNotThrow();
+ Call.setDoesNotThrow();
Changed = true;
}
// Try to optimize the call if possible, we require DataLayout for most of
// this. None of these calls are seen as possibly dead so go ahead and
// delete the instruction now.
- if (CallInst *CI = dyn_cast<CallInst>(CS.getInstruction())) {
+ if (CallInst *CI = dyn_cast<CallInst>(&Call)) {
Instruction *I = tryOptimizeCall(CI);
// If we changed something return the result, etc. Otherwise let
// the fallthrough check.
if (I) return eraseInstFromFunction(*I);
}
- return Changed ? CS.getInstruction() : nullptr;
+ return Changed ? &Call : nullptr;
}
/// If the callee is a constexpr cast of a function, attempt to move the cast to
-/// the arguments of the call/invoke.
-bool InstCombiner::transformConstExprCastCall(CallSite CS) {
- auto *Callee = dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts());
+/// the arguments of the call/callbr/invoke.
+bool InstCombiner::transformConstExprCastCall(CallBase &Call) {
+ auto *Callee = dyn_cast<Function>(Call.getCalledValue()->stripPointerCasts());
if (!Callee)
return false;
@@ -4335,11 +4332,11 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
// prototype with the exception of pointee types. The code below doesn't
// implement that, so we can't do this transform.
// TODO: Do the transform if it only requires adding pointer casts.
- if (CS.isMustTailCall())
+ if (Call.isMustTailCall())
return false;
- Instruction *Caller = CS.getInstruction();
- const AttributeList &CallerPAL = CS.getAttributes();
+ Instruction *Caller = &Call;
+ const AttributeList &CallerPAL = Call.getAttributes();
// Okay, this is a cast from a function to a different type. Unless doing so
// would cause a type conversion of one of our arguments, change this call to
@@ -4370,20 +4367,24 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
return false; // Attribute not compatible with transformed value.
}
- // If the callsite is an invoke instruction, and the return value is used by
- // a PHI node in a successor, we cannot change the return type of the call
- // because there is no place to put the cast instruction (without breaking
- // the critical edge). Bail out in this case.
- if (!Caller->use_empty())
+ // If the callbase is an invoke/callbr instruction, and the return value is
+ // used by a PHI node in a successor, we cannot change the return type of
+ // the call because there is no place to put the cast instruction (without
+ // breaking the critical edge). Bail out in this case.
+ if (!Caller->use_empty()) {
if (InvokeInst *II = dyn_cast<InvokeInst>(Caller))
for (User *U : II->users())
if (PHINode *PN = dyn_cast<PHINode>(U))
if (PN->getParent() == II->getNormalDest() ||
PN->getParent() == II->getUnwindDest())
return false;
+ // FIXME: Be conservative for callbr to avoid a quadratic search.
+ if (isa<CallBrInst>(Caller))
+ return false;
+ }
}
- unsigned NumActualArgs = CS.arg_size();
+ unsigned NumActualArgs = Call.arg_size();
unsigned NumCommonArgs = std::min(FT->getNumParams(), NumActualArgs);
// Prevent us turning:
@@ -4398,7 +4399,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
Callee->getAttributes().hasAttrSomewhere(Attribute::ByVal))
return false;
- CallSite::arg_iterator AI = CS.arg_begin();
+ auto AI = Call.arg_begin();
for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) {
Type *ParamTy = FT->getParamType(i);
Type *ActTy = (*AI)->getType();
@@ -4410,7 +4411,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
.overlaps(AttributeFuncs::typeIncompatible(ParamTy)))
return false; // Attribute not compatible with transformed value.
- if (CS.isInAllocaArgument(i))
+ if (Call.isInAllocaArgument(i))
return false; // Cannot transform to and from inalloca.
// If the parameter is passed as a byval argument, then we have to have a
@@ -4420,7 +4421,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
if (!ParamPTy || !ParamPTy->getElementType()->isSized())
return false;
- Type *CurElTy = ActTy->getPointerElementType();
+ Type *CurElTy = Call.getParamByValType(i);
if (DL.getTypeAllocSize(CurElTy) !=
DL.getTypeAllocSize(ParamPTy->getElementType()))
return false;
@@ -4435,7 +4436,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
// If the callee is just a declaration, don't change the varargsness of the
// call. We don't want to introduce a varargs call where one doesn't
// already exist.
- PointerType *APTy = cast<PointerType>(CS.getCalledValue()->getType());
+ PointerType *APTy = cast<PointerType>(Call.getCalledValue()->getType());
if (FT->isVarArg()!=cast<FunctionType>(APTy->getElementType())->isVarArg())
return false;
@@ -4474,7 +4475,8 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
// with the existing attributes. Wipe out any problematic attributes.
RAttrs.remove(AttributeFuncs::typeIncompatible(NewRetTy));
- AI = CS.arg_begin();
+ LLVMContext &Ctx = Call.getContext();
+ AI = Call.arg_begin();
for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) {
Type *ParamTy = FT->getParamType(i);
@@ -4484,7 +4486,12 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
Args.push_back(NewArg);
// Add any parameter attributes.
- ArgAttrs.push_back(CallerPAL.getParamAttributes(i));
+ if (CallerPAL.hasParamAttribute(i, Attribute::ByVal)) {
+ AttrBuilder AB(CallerPAL.getParamAttributes(i));
+ AB.addByValAttr(NewArg->getType()->getPointerElementType());
+ ArgAttrs.push_back(AttributeSet::get(Ctx, AB));
+ } else
+ ArgAttrs.push_back(CallerPAL.getParamAttributes(i));
}
// If the function takes more arguments than the call was taking, add them
@@ -4523,45 +4530,50 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
assert((ArgAttrs.size() == FT->getNumParams() || FT->isVarArg()) &&
"missing argument attributes");
- LLVMContext &Ctx = Callee->getContext();
AttributeList NewCallerPAL = AttributeList::get(
Ctx, FnAttrs, AttributeSet::get(Ctx, RAttrs), ArgAttrs);
SmallVector<OperandBundleDef, 1> OpBundles;
- CS.getOperandBundlesAsDefs(OpBundles);
+ Call.getOperandBundlesAsDefs(OpBundles);
- CallSite NewCS;
+ CallBase *NewCall;
if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
- NewCS = Builder.CreateInvoke(Callee, II->getNormalDest(),
- II->getUnwindDest(), Args, OpBundles);
+ NewCall = Builder.CreateInvoke(Callee, II->getNormalDest(),
+ II->getUnwindDest(), Args, OpBundles);
+ } else if (CallBrInst *CBI = dyn_cast<CallBrInst>(Caller)) {
+ NewCall = Builder.CreateCallBr(Callee, CBI->getDefaultDest(),
+ CBI->getIndirectDests(), Args, OpBundles);
} else {
- NewCS = Builder.CreateCall(Callee, Args, OpBundles);
- cast<CallInst>(NewCS.getInstruction())
- ->setTailCallKind(cast<CallInst>(Caller)->getTailCallKind());
+ NewCall = Builder.CreateCall(Callee, Args, OpBundles);
+ cast<CallInst>(NewCall)->setTailCallKind(
+ cast<CallInst>(Caller)->getTailCallKind());
}
- NewCS->takeName(Caller);
- NewCS.setCallingConv(CS.getCallingConv());
- NewCS.setAttributes(NewCallerPAL);
+ NewCall->takeName(Caller);
+ NewCall->setCallingConv(Call.getCallingConv());
+ NewCall->setAttributes(NewCallerPAL);
// Preserve the weight metadata for the new call instruction. The metadata
// is used by SamplePGO to check callsite's hotness.
uint64_t W;
if (Caller->extractProfTotalWeight(W))
- NewCS->setProfWeight(W);
+ NewCall->setProfWeight(W);
// Insert a cast of the return type as necessary.
- Instruction *NC = NewCS.getInstruction();
+ Instruction *NC = NewCall;
Value *NV = NC;
if (OldRetTy != NV->getType() && !Caller->use_empty()) {
if (!NV->getType()->isVoidTy()) {
NV = NC = CastInst::CreateBitOrPointerCast(NC, OldRetTy);
NC->setDebugLoc(Caller->getDebugLoc());
- // If this is an invoke instruction, we should insert it after the first
- // non-phi, instruction in the normal successor block.
+ // If this is an invoke/callbr instruction, we should insert it after the
+ // first non-phi instruction in the normal successor block.
if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
BasicBlock::iterator I = II->getNormalDest()->getFirstInsertionPt();
InsertNewInstBefore(NC, *I);
+ } else if (CallBrInst *CBI = dyn_cast<CallBrInst>(Caller)) {
+ BasicBlock::iterator I = CBI->getDefaultDest()->getFirstInsertionPt();
+ InsertNewInstBefore(NC, *I);
} else {
// Otherwise, it's a call, just insert cast right after the call.
InsertNewInstBefore(NC, *Caller);
@@ -4590,23 +4602,20 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
/// Turn a call to a function created by init_trampoline / adjust_trampoline
/// intrinsic pair into a direct call to the underlying function.
Instruction *
-InstCombiner::transformCallThroughTrampoline(CallSite CS,
- IntrinsicInst *Tramp) {
- Value *Callee = CS.getCalledValue();
- PointerType *PTy = cast<PointerType>(Callee->getType());
- FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
- AttributeList Attrs = CS.getAttributes();
+InstCombiner::transformCallThroughTrampoline(CallBase &Call,
+ IntrinsicInst &Tramp) {
+ Value *Callee = Call.getCalledValue();
+ Type *CalleeTy = Callee->getType();
+ FunctionType *FTy = Call.getFunctionType();
+ AttributeList Attrs = Call.getAttributes();
// If the call already has the 'nest' attribute somewhere then give up -
// otherwise 'nest' would occur twice after splicing in the chain.
if (Attrs.hasAttrSomewhere(Attribute::Nest))
return nullptr;
- assert(Tramp &&
- "transformCallThroughTrampoline called with incorrect CallSite.");
-
- Function *NestF =cast<Function>(Tramp->getArgOperand(1)->stripPointerCasts());
- FunctionType *NestFTy = cast<FunctionType>(NestF->getValueType());
+ Function *NestF = cast<Function>(Tramp.getArgOperand(1)->stripPointerCasts());
+ FunctionType *NestFTy = NestF->getFunctionType();
AttributeList NestAttrs = NestF->getAttributes();
if (!NestAttrs.isEmpty()) {
@@ -4628,22 +4637,21 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
}
if (NestTy) {
- Instruction *Caller = CS.getInstruction();
std::vector<Value*> NewArgs;
std::vector<AttributeSet> NewArgAttrs;
- NewArgs.reserve(CS.arg_size() + 1);
- NewArgAttrs.reserve(CS.arg_size());
+ NewArgs.reserve(Call.arg_size() + 1);
+ NewArgAttrs.reserve(Call.arg_size());
// Insert the nest argument into the call argument list, which may
// mean appending it. Likewise for attributes.
{
unsigned ArgNo = 0;
- CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
+ auto I = Call.arg_begin(), E = Call.arg_end();
do {
if (ArgNo == NestArgNo) {
// Add the chain argument and attributes.
- Value *NestVal = Tramp->getArgOperand(2);
+ Value *NestVal = Tramp.getArgOperand(2);
if (NestVal->getType() != NestTy)
NestVal = Builder.CreateBitCast(NestVal, NestTy, "nest");
NewArgs.push_back(NestVal);
@@ -4705,24 +4713,30 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
Attrs.getRetAttributes(), NewArgAttrs);
SmallVector<OperandBundleDef, 1> OpBundles;
- CS.getOperandBundlesAsDefs(OpBundles);
+ Call.getOperandBundlesAsDefs(OpBundles);
Instruction *NewCaller;
- if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
- NewCaller = InvokeInst::Create(NewCallee,
+ if (InvokeInst *II = dyn_cast<InvokeInst>(&Call)) {
+ NewCaller = InvokeInst::Create(NewFTy, NewCallee,
II->getNormalDest(), II->getUnwindDest(),
NewArgs, OpBundles);
cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv());
cast<InvokeInst>(NewCaller)->setAttributes(NewPAL);
+ } else if (CallBrInst *CBI = dyn_cast<CallBrInst>(&Call)) {
+ NewCaller =
+ CallBrInst::Create(NewFTy, NewCallee, CBI->getDefaultDest(),
+ CBI->getIndirectDests(), NewArgs, OpBundles);
+ cast<CallBrInst>(NewCaller)->setCallingConv(CBI->getCallingConv());
+ cast<CallBrInst>(NewCaller)->setAttributes(NewPAL);
} else {
- NewCaller = CallInst::Create(NewCallee, NewArgs, OpBundles);
+ NewCaller = CallInst::Create(NewFTy, NewCallee, NewArgs, OpBundles);
cast<CallInst>(NewCaller)->setTailCallKind(
- cast<CallInst>(Caller)->getTailCallKind());
+ cast<CallInst>(Call).getTailCallKind());
cast<CallInst>(NewCaller)->setCallingConv(
- cast<CallInst>(Caller)->getCallingConv());
+ cast<CallInst>(Call).getCallingConv());
cast<CallInst>(NewCaller)->setAttributes(NewPAL);
}
- NewCaller->setDebugLoc(Caller->getDebugLoc());
+ NewCaller->setDebugLoc(Call.getDebugLoc());
return NewCaller;
}
@@ -4731,9 +4745,7 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
// Replace the trampoline call with a direct call. Since there is no 'nest'
// parameter, there is no need to adjust the argument list. Let the generic
// code sort out any function type mismatches.
- Constant *NewCallee =
- NestF->getType() == PTy ? NestF :
- ConstantExpr::getBitCast(NestF, PTy);
- CS.setCalledFunction(NewCallee);
- return CS.getInstruction();
+ Constant *NewCallee = ConstantExpr::getBitCast(NestF, CalleeTy);
+ Call.setCalledFunction(FTy, NewCallee);
+ return &Call;
}
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 1201ac196ec0..2c9ba203fbf3 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -1,9 +1,8 @@
//===- InstCombineCasts.cpp -----------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -1373,10 +1372,8 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
// If we know that the value being extended is positive, we can use a zext
// instead.
KnownBits Known = computeKnownBits(Src, 0, &CI);
- if (Known.isNonNegative()) {
- Value *ZExt = Builder.CreateZExt(Src, DestTy);
- return replaceInstUsesWith(CI, ZExt);
- }
+ if (Known.isNonNegative())
+ return CastInst::Create(Instruction::ZExt, Src, DestTy);
// Try to extend the entire expression tree to the wide destination type.
if (shouldChangeType(SrcTy, DestTy) && canEvaluateSExtd(Src, DestTy)) {
@@ -1618,12 +1615,20 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &FPT) {
return CastInst::CreateFPCast(ExactResult, Ty);
}
}
+ }
- // (fptrunc (fneg x)) -> (fneg (fptrunc x))
- Value *X;
- if (match(OpI, m_FNeg(m_Value(X)))) {
+ // (fptrunc (fneg x)) -> (fneg (fptrunc x))
+ Value *X;
+ Instruction *Op = dyn_cast<Instruction>(FPT.getOperand(0));
+ if (Op && Op->hasOneUse()) {
+ if (match(Op, m_FNeg(m_Value(X)))) {
Value *InnerTrunc = Builder.CreateFPTrunc(X, Ty);
- return BinaryOperator::CreateFNegFMF(InnerTrunc, OpI);
+
+ // FIXME: Once we're sure that unary FNeg optimizations are on par with
+ // binary FNeg, this should always return a unary operator.
+ if (isa<BinaryOperator>(Op))
+ return BinaryOperator::CreateFNegFMF(InnerTrunc, Op);
+ return UnaryOperator::CreateFNegFMF(InnerTrunc, Op);
}
}
@@ -1657,8 +1662,8 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &FPT) {
II->getIntrinsicID(), Ty);
SmallVector<OperandBundleDef, 1> OpBundles;
II->getOperandBundlesAsDefs(OpBundles);
- CallInst *NewCI = CallInst::Create(Overload, { InnerTrunc }, OpBundles,
- II->getName());
+ CallInst *NewCI =
+ CallInst::Create(Overload, {InnerTrunc}, OpBundles, II->getName());
NewCI->copyFastMathFlags(II);
return NewCI;
}
@@ -2167,7 +2172,7 @@ Instruction *InstCombiner::optimizeBitCastFromPhi(CastInst &CI, PHINode *PN) {
SmallSetVector<PHINode *, 4> OldPhiNodes;
// Find all of the A->B casts and PHI nodes.
- // We need to inpect all related PHI nodes, but PHIs can be cyclic, so
+ // We need to inspect all related PHI nodes, but PHIs can be cyclic, so
// OldPhiNodes is used to track all known PHI nodes, before adding a new
// PHI to PhiWorklist, it is checked against and added to OldPhiNodes first.
PhiWorklist.push_back(PN);
@@ -2242,20 +2247,43 @@ Instruction *InstCombiner::optimizeBitCastFromPhi(CastInst &CI, PHINode *PN) {
}
}
+ // Traverse all accumulated PHI nodes and process its users,
+ // which are Stores and BitcCasts. Without this processing
+ // NewPHI nodes could be replicated and could lead to extra
+ // moves generated after DeSSA.
// If there is a store with type B, change it to type A.
- for (User *U : PN->users()) {
- auto *SI = dyn_cast<StoreInst>(U);
- if (SI && SI->isSimple() && SI->getOperand(0) == PN) {
- Builder.SetInsertPoint(SI);
- auto *NewBC =
- cast<BitCastInst>(Builder.CreateBitCast(NewPNodes[PN], SrcTy));
- SI->setOperand(0, NewBC);
- Worklist.Add(SI);
- assert(hasStoreUsersOnly(*NewBC));
+
+
+ // Replace users of BitCast B->A with NewPHI. These will help
+ // later to get rid off a closure formed by OldPHI nodes.
+ Instruction *RetVal = nullptr;
+ for (auto *OldPN : OldPhiNodes) {
+ PHINode *NewPN = NewPNodes[OldPN];
+ for (User *V : OldPN->users()) {
+ if (auto *SI = dyn_cast<StoreInst>(V)) {
+ if (SI->isSimple() && SI->getOperand(0) == OldPN) {
+ Builder.SetInsertPoint(SI);
+ auto *NewBC =
+ cast<BitCastInst>(Builder.CreateBitCast(NewPN, SrcTy));
+ SI->setOperand(0, NewBC);
+ Worklist.Add(SI);
+ assert(hasStoreUsersOnly(*NewBC));
+ }
+ }
+ else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
+ // Verify it's a B->A cast.
+ Type *TyB = BCI->getOperand(0)->getType();
+ Type *TyA = BCI->getType();
+ if (TyA == DestTy && TyB == SrcTy) {
+ Instruction *I = replaceInstUsesWith(*BCI, NewPN);
+ if (BCI == &CI)
+ RetVal = I;
+ }
+ }
}
}
- return replaceInstUsesWith(CI, NewPNodes[PN]);
+ return RetVal;
}
Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
@@ -2310,7 +2338,8 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
// If we found a path from the src to dest, create the getelementptr now.
if (SrcElTy == DstElTy) {
SmallVector<Value *, 8> Idxs(NumZeros + 1, Builder.getInt32(0));
- return GetElementPtrInst::CreateInBounds(Src, Idxs);
+ return GetElementPtrInst::CreateInBounds(SrcPTy->getElementType(), Src,
+ Idxs);
}
}
@@ -2355,11 +2384,10 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
}
// Otherwise, see if our source is an insert. If so, then use the scalar
- // component directly.
- if (InsertElementInst *IEI =
- dyn_cast<InsertElementInst>(CI.getOperand(0)))
- return CastInst::Create(Instruction::BitCast, IEI->getOperand(1),
- DestTy);
+ // component directly:
+ // bitcast (inselt <1 x elt> V, X, 0) to <n x m> --> bitcast X to <n x m>
+ if (auto *InsElt = dyn_cast<InsertElementInst>(Src))
+ return new BitCastInst(InsElt->getOperand(1), DestTy);
}
}
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index b5bbb09935e2..3a4283ae5406 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -1,9 +1,8 @@
//===- InstCombineCompares.cpp --------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -704,7 +703,10 @@ static Value *rewriteGEPAsOffset(Value *Start, Value *Base,
continue;
if (auto *CI = dyn_cast<CastInst>(Val)) {
- NewInsts[CI] = NewInsts[CI->getOperand(0)];
+ // Don't get rid of the intermediate variable here; the store can grow
+ // the map which will invalidate the reference to the input value.
+ Value *V = NewInsts[CI->getOperand(0)];
+ NewInsts[CI] = V;
continue;
}
if (auto *GEP = dyn_cast<GEPOperator>(Val)) {
@@ -1292,8 +1294,8 @@ static Instruction *processUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
// use the sadd_with_overflow intrinsic to efficiently compute both the
// result and the overflow bit.
Type *NewType = IntegerType::get(OrigAdd->getContext(), NewWidth);
- Value *F = Intrinsic::getDeclaration(I.getModule(),
- Intrinsic::sadd_with_overflow, NewType);
+ Function *F = Intrinsic::getDeclaration(
+ I.getModule(), Intrinsic::sadd_with_overflow, NewType);
InstCombiner::BuilderTy &Builder = IC.Builder;
@@ -1315,14 +1317,16 @@ static Instruction *processUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
return ExtractValueInst::Create(Call, 1, "sadd.overflow");
}
-// Handle (icmp sgt smin(PosA, B) 0) -> (icmp sgt B 0)
+// Handle icmp pred X, 0
Instruction *InstCombiner::foldICmpWithZero(ICmpInst &Cmp) {
CmpInst::Predicate Pred = Cmp.getPredicate();
- Value *X = Cmp.getOperand(0);
+ if (!match(Cmp.getOperand(1), m_Zero()))
+ return nullptr;
- if (match(Cmp.getOperand(1), m_Zero()) && Pred == ICmpInst::ICMP_SGT) {
+ // (icmp sgt smin(PosA, B) 0) -> (icmp sgt B 0)
+ if (Pred == ICmpInst::ICMP_SGT) {
Value *A, *B;
- SelectPatternResult SPR = matchSelectPattern(X, A, B);
+ SelectPatternResult SPR = matchSelectPattern(Cmp.getOperand(0), A, B);
if (SPR.Flavor == SPF_SMIN) {
if (isKnownPositive(A, DL, 0, &AC, &Cmp, &DT))
return new ICmpInst(Pred, B, Cmp.getOperand(1));
@@ -1330,6 +1334,20 @@ Instruction *InstCombiner::foldICmpWithZero(ICmpInst &Cmp) {
return new ICmpInst(Pred, A, Cmp.getOperand(1));
}
}
+
+ // Given:
+ // icmp eq/ne (urem %x, %y), 0
+ // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
+ // icmp eq/ne %x, 0
+ Value *X, *Y;
+ if (match(Cmp.getOperand(0), m_URem(m_Value(X), m_Value(Y))) &&
+ ICmpInst::isEquality(Pred)) {
+ KnownBits XKnown = computeKnownBits(X, 0, &Cmp);
+ KnownBits YKnown = computeKnownBits(Y, 0, &Cmp);
+ if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
+ return new ICmpInst(Pred, X, Cmp.getOperand(1));
+ }
+
return nullptr;
}
@@ -1624,20 +1642,43 @@ Instruction *InstCombiner::foldICmpAndShift(ICmpInst &Cmp, BinaryOperator *And,
Instruction *InstCombiner::foldICmpAndConstConst(ICmpInst &Cmp,
BinaryOperator *And,
const APInt &C1) {
+ bool isICMP_NE = Cmp.getPredicate() == ICmpInst::ICMP_NE;
+
// For vectors: icmp ne (and X, 1), 0 --> trunc X to N x i1
// TODO: We canonicalize to the longer form for scalars because we have
// better analysis/folds for icmp, and codegen may be better with icmp.
- if (Cmp.getPredicate() == CmpInst::ICMP_NE && Cmp.getType()->isVectorTy() &&
- C1.isNullValue() && match(And->getOperand(1), m_One()))
+ if (isICMP_NE && Cmp.getType()->isVectorTy() && C1.isNullValue() &&
+ match(And->getOperand(1), m_One()))
return new TruncInst(And->getOperand(0), Cmp.getType());
const APInt *C2;
- if (!match(And->getOperand(1), m_APInt(C2)))
+ Value *X;
+ if (!match(And, m_And(m_Value(X), m_APInt(C2))))
return nullptr;
+ // Don't perform the following transforms if the AND has multiple uses
if (!And->hasOneUse())
return nullptr;
+ if (Cmp.isEquality() && C1.isNullValue()) {
+ // Restrict this fold to single-use 'and' (PR10267).
+ // Replace (and X, (1 << size(X)-1) != 0) with X s< 0
+ if (C2->isSignMask()) {
+ Constant *Zero = Constant::getNullValue(X->getType());
+ auto NewPred = isICMP_NE ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGE;
+ return new ICmpInst(NewPred, X, Zero);
+ }
+
+ // Restrict this fold only for single-use 'and' (PR10267).
+ // ((%x & C) == 0) --> %x u< (-C) iff (-C) is power of two.
+ if ((~(*C2) + 1).isPowerOf2()) {
+ Constant *NegBOC =
+ ConstantExpr::getNeg(cast<Constant>(And->getOperand(1)));
+ auto NewPred = isICMP_NE ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT;
+ return new ICmpInst(NewPred, X, NegBOC);
+ }
+ }
+
// If the LHS is an 'and' of a truncate and we can widen the and/compare to
// the input width without changing the value produced, eliminate the cast:
//
@@ -1772,13 +1813,22 @@ Instruction *InstCombiner::foldICmpOrConstant(ICmpInst &Cmp, BinaryOperator *Or,
ConstantInt::get(V->getType(), 1));
}
- // X | C == C --> X <=u C
- // X | C != C --> X >u C
- // iff C+1 is a power of 2 (C is a bitmask of the low bits)
- if (Cmp.isEquality() && Cmp.getOperand(1) == Or->getOperand(1) &&
- (C + 1).isPowerOf2()) {
- Pred = (Pred == CmpInst::ICMP_EQ) ? CmpInst::ICMP_ULE : CmpInst::ICMP_UGT;
- return new ICmpInst(Pred, Or->getOperand(0), Or->getOperand(1));
+ Value *OrOp0 = Or->getOperand(0), *OrOp1 = Or->getOperand(1);
+ if (Cmp.isEquality() && Cmp.getOperand(1) == OrOp1) {
+ // X | C == C --> X <=u C
+ // X | C != C --> X >u C
+ // iff C+1 is a power of 2 (C is a bitmask of the low bits)
+ if ((C + 1).isPowerOf2()) {
+ Pred = (Pred == CmpInst::ICMP_EQ) ? CmpInst::ICMP_ULE : CmpInst::ICMP_UGT;
+ return new ICmpInst(Pred, OrOp0, OrOp1);
+ }
+ // More general: are all bits outside of a mask constant set or not set?
+ // X | C == C --> (X & ~C) == 0
+ // X | C != C --> (X & ~C) != 0
+ if (Or->hasOneUse()) {
+ Value *A = Builder.CreateAnd(OrOp0, ~C);
+ return new ICmpInst(Pred, A, ConstantInt::getNullValue(OrOp0->getType()));
+ }
}
if (!Cmp.isEquality() || !C.isNullValue() || !Or->hasOneUse())
@@ -1799,8 +1849,8 @@ Instruction *InstCombiner::foldICmpOrConstant(ICmpInst &Cmp, BinaryOperator *Or,
// Are we using xors to bitwise check for a pair of (in)equalities? Convert to
// a shorter form that has more potential to be folded even further.
Value *X1, *X2, *X3, *X4;
- if (match(Or->getOperand(0), m_OneUse(m_Xor(m_Value(X1), m_Value(X2)))) &&
- match(Or->getOperand(1), m_OneUse(m_Xor(m_Value(X3), m_Value(X4))))) {
+ if (match(OrOp0, m_OneUse(m_Xor(m_Value(X1), m_Value(X2)))) &&
+ match(OrOp1, m_OneUse(m_Xor(m_Value(X3), m_Value(X4))))) {
// ((X1 ^ X2) || (X3 ^ X4)) == 0 --> (X1 == X2) && (X3 == X4)
// ((X1 ^ X2) || (X3 ^ X4)) != 0 --> (X1 != X2) || (X3 != X4)
Value *Cmp12 = Builder.CreateICmp(Pred, X1, X2);
@@ -1994,6 +2044,27 @@ Instruction *InstCombiner::foldICmpShlConstant(ICmpInst &Cmp,
And, Constant::getNullValue(ShType));
}
+ // Simplify 'shl' inequality test into 'and' equality test.
+ if (Cmp.isUnsigned() && Shl->hasOneUse()) {
+ // (X l<< C2) u<=/u> C1 iff C1+1 is power of two -> X & (~C1 l>> C2) ==/!= 0
+ if ((C + 1).isPowerOf2() &&
+ (Pred == ICmpInst::ICMP_ULE || Pred == ICmpInst::ICMP_UGT)) {
+ Value *And = Builder.CreateAnd(X, (~C).lshr(ShiftAmt->getZExtValue()));
+ return new ICmpInst(Pred == ICmpInst::ICMP_ULE ? ICmpInst::ICMP_EQ
+ : ICmpInst::ICMP_NE,
+ And, Constant::getNullValue(ShType));
+ }
+ // (X l<< C2) u</u>= C1 iff C1 is power of two -> X & (-C1 l>> C2) ==/!= 0
+ if (C.isPowerOf2() &&
+ (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_UGE)) {
+ Value *And =
+ Builder.CreateAnd(X, (~(C - 1)).lshr(ShiftAmt->getZExtValue()));
+ return new ICmpInst(Pred == ICmpInst::ICMP_ULT ? ICmpInst::ICMP_EQ
+ : ICmpInst::ICMP_NE,
+ And, Constant::getNullValue(ShType));
+ }
+ }
+
// Transform (icmp pred iM (shl iM %v, N), C)
// -> (icmp pred i(M-N) (trunc %v iM to i(M-N)), (trunc (C>>N))
// Transform the shl to a trunc if (trunc (C>>N)) has no loss and M-N.
@@ -2313,6 +2384,16 @@ Instruction *InstCombiner::foldICmpSubConstant(ICmpInst &Cmp,
const APInt &C) {
Value *X = Sub->getOperand(0), *Y = Sub->getOperand(1);
ICmpInst::Predicate Pred = Cmp.getPredicate();
+ const APInt *C2;
+ APInt SubResult;
+
+ // (icmp P (sub nuw|nsw C2, Y), C) -> (icmp swap(P) Y, C2-C)
+ if (match(X, m_APInt(C2)) &&
+ ((Cmp.isUnsigned() && Sub->hasNoUnsignedWrap()) ||
+ (Cmp.isSigned() && Sub->hasNoSignedWrap())) &&
+ !subWithOverflow(SubResult, *C2, C, Cmp.isSigned()))
+ return new ICmpInst(Cmp.getSwappedPredicate(), Y,
+ ConstantInt::get(Y->getType(), SubResult));
// The following transforms are only worth it if the only user of the subtract
// is the icmp.
@@ -2337,7 +2418,6 @@ Instruction *InstCombiner::foldICmpSubConstant(ICmpInst &Cmp,
return new ICmpInst(ICmpInst::ICMP_SLE, X, Y);
}
- const APInt *C2;
if (!match(X, m_APInt(C2)))
return nullptr;
@@ -2482,20 +2562,76 @@ Instruction *InstCombiner::foldICmpSelectConstant(ICmpInst &Cmp,
// the entire original Cmp can be simplified to a false.
Value *Cond = Builder.getFalse();
if (TrueWhenLessThan)
- Cond = Builder.CreateOr(Cond, Builder.CreateICmp(ICmpInst::ICMP_SLT, OrigLHS, OrigRHS));
+ Cond = Builder.CreateOr(Cond, Builder.CreateICmp(ICmpInst::ICMP_SLT,
+ OrigLHS, OrigRHS));
if (TrueWhenEqual)
- Cond = Builder.CreateOr(Cond, Builder.CreateICmp(ICmpInst::ICMP_EQ, OrigLHS, OrigRHS));
+ Cond = Builder.CreateOr(Cond, Builder.CreateICmp(ICmpInst::ICMP_EQ,
+ OrigLHS, OrigRHS));
if (TrueWhenGreaterThan)
- Cond = Builder.CreateOr(Cond, Builder.CreateICmp(ICmpInst::ICMP_SGT, OrigLHS, OrigRHS));
+ Cond = Builder.CreateOr(Cond, Builder.CreateICmp(ICmpInst::ICMP_SGT,
+ OrigLHS, OrigRHS));
return replaceInstUsesWith(Cmp, Cond);
}
return nullptr;
}
-Instruction *InstCombiner::foldICmpBitCastConstant(ICmpInst &Cmp,
- BitCastInst *Bitcast,
- const APInt &C) {
+static Instruction *foldICmpBitCast(ICmpInst &Cmp,
+ InstCombiner::BuilderTy &Builder) {
+ auto *Bitcast = dyn_cast<BitCastInst>(Cmp.getOperand(0));
+ if (!Bitcast)
+ return nullptr;
+
+ ICmpInst::Predicate Pred = Cmp.getPredicate();
+ Value *Op1 = Cmp.getOperand(1);
+ Value *BCSrcOp = Bitcast->getOperand(0);
+
+ // Make sure the bitcast doesn't change the number of vector elements.
+ if (Bitcast->getSrcTy()->getScalarSizeInBits() ==
+ Bitcast->getDestTy()->getScalarSizeInBits()) {
+ // Zero-equality and sign-bit checks are preserved through sitofp + bitcast.
+ Value *X;
+ if (match(BCSrcOp, m_SIToFP(m_Value(X)))) {
+ // icmp eq (bitcast (sitofp X)), 0 --> icmp eq X, 0
+ // icmp ne (bitcast (sitofp X)), 0 --> icmp ne X, 0
+ // icmp slt (bitcast (sitofp X)), 0 --> icmp slt X, 0
+ // icmp sgt (bitcast (sitofp X)), 0 --> icmp sgt X, 0
+ if ((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_SLT ||
+ Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SGT) &&
+ match(Op1, m_Zero()))
+ return new ICmpInst(Pred, X, ConstantInt::getNullValue(X->getType()));
+
+ // icmp slt (bitcast (sitofp X)), 1 --> icmp slt X, 1
+ if (Pred == ICmpInst::ICMP_SLT && match(Op1, m_One()))
+ return new ICmpInst(Pred, X, ConstantInt::get(X->getType(), 1));
+
+ // icmp sgt (bitcast (sitofp X)), -1 --> icmp sgt X, -1
+ if (Pred == ICmpInst::ICMP_SGT && match(Op1, m_AllOnes()))
+ return new ICmpInst(Pred, X,
+ ConstantInt::getAllOnesValue(X->getType()));
+ }
+
+ // Zero-equality checks are preserved through unsigned floating-point casts:
+ // icmp eq (bitcast (uitofp X)), 0 --> icmp eq X, 0
+ // icmp ne (bitcast (uitofp X)), 0 --> icmp ne X, 0
+ if (match(BCSrcOp, m_UIToFP(m_Value(X))))
+ if (Cmp.isEquality() && match(Op1, m_Zero()))
+ return new ICmpInst(Pred, X, ConstantInt::getNullValue(X->getType()));
+ }
+
+ // Test to see if the operands of the icmp are casted versions of other
+ // values. If the ptr->ptr cast can be stripped off both arguments, do so.
+ if (Bitcast->getType()->isPointerTy() &&
+ (isa<Constant>(Op1) || isa<BitCastInst>(Op1))) {
+ // If operand #1 is a bitcast instruction, it must also be a ptr->ptr cast
+ // so eliminate it as well.
+ if (auto *BC2 = dyn_cast<BitCastInst>(Op1))
+ Op1 = BC2->getOperand(0);
+
+ Op1 = Builder.CreateBitCast(Op1, BCSrcOp->getType());
+ return new ICmpInst(Pred, BCSrcOp, Op1);
+ }
+
// Folding: icmp <pred> iN X, C
// where X = bitcast <M x iK> (shufflevector <M x iK> %vec, undef, SC)) to iN
// and C is a splat of a K-bit pattern
@@ -2503,28 +2639,28 @@ Instruction *InstCombiner::foldICmpBitCastConstant(ICmpInst &Cmp,
// Into:
// %E = extractelement <M x iK> %vec, i32 C'
// icmp <pred> iK %E, trunc(C)
- if (!Bitcast->getType()->isIntegerTy() ||
+ const APInt *C;
+ if (!match(Cmp.getOperand(1), m_APInt(C)) ||
+ !Bitcast->getType()->isIntegerTy() ||
!Bitcast->getSrcTy()->isIntOrIntVectorTy())
return nullptr;
- Value *BCIOp = Bitcast->getOperand(0);
- Value *Vec = nullptr; // 1st vector arg of the shufflevector
- Constant *Mask = nullptr; // Mask arg of the shufflevector
- if (match(BCIOp,
+ Value *Vec;
+ Constant *Mask;
+ if (match(BCSrcOp,
m_ShuffleVector(m_Value(Vec), m_Undef(), m_Constant(Mask)))) {
// Check whether every element of Mask is the same constant
if (auto *Elem = dyn_cast_or_null<ConstantInt>(Mask->getSplatValue())) {
- auto *VecTy = cast<VectorType>(BCIOp->getType());
+ auto *VecTy = cast<VectorType>(BCSrcOp->getType());
auto *EltTy = cast<IntegerType>(VecTy->getElementType());
- auto Pred = Cmp.getPredicate();
- if (C.isSplat(EltTy->getBitWidth())) {
+ if (C->isSplat(EltTy->getBitWidth())) {
// Fold the icmp based on the value of C
// If C is M copies of an iK sized bit pattern,
// then:
// => %E = extractelement <N x iK> %vec, i32 Elem
// icmp <pred> iK %SplatVal, <pattern>
Value *Extract = Builder.CreateExtractElement(Vec, Elem);
- Value *NewC = ConstantInt::get(EltTy, C.trunc(EltTy->getBitWidth()));
+ Value *NewC = ConstantInt::get(EltTy, C->trunc(EltTy->getBitWidth()));
return new ICmpInst(Pred, Extract, NewC);
}
}
@@ -2606,13 +2742,9 @@ Instruction *InstCombiner::foldICmpInstWithConstant(ICmpInst &Cmp) {
return I;
}
- if (auto *BCI = dyn_cast<BitCastInst>(Cmp.getOperand(0))) {
- if (Instruction *I = foldICmpBitCastConstant(Cmp, BCI, *C))
+ if (auto *II = dyn_cast<IntrinsicInst>(Cmp.getOperand(0)))
+ if (Instruction *I = foldICmpIntrinsicWithConstant(Cmp, II, *C))
return I;
- }
-
- if (Instruction *I = foldICmpIntrinsicWithConstant(Cmp, *C))
- return I;
return nullptr;
}
@@ -2711,24 +2843,6 @@ Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp,
if (C == *BOC && C.isPowerOf2())
return new ICmpInst(isICMP_NE ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE,
BO, Constant::getNullValue(RHS->getType()));
-
- // Don't perform the following transforms if the AND has multiple uses
- if (!BO->hasOneUse())
- break;
-
- // Replace (and X, (1 << size(X)-1) != 0) with x s< 0
- if (BOC->isSignMask()) {
- Constant *Zero = Constant::getNullValue(BOp0->getType());
- auto NewPred = isICMP_NE ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGE;
- return new ICmpInst(NewPred, BOp0, Zero);
- }
-
- // ((X & ~7) == 0) --> X < 8
- if (C.isNullValue() && (~(*BOC) + 1).isPowerOf2()) {
- Constant *NegBOC = ConstantExpr::getNeg(cast<Constant>(BOp1));
- auto NewPred = isICMP_NE ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT;
- return new ICmpInst(NewPred, BOp0, NegBOC);
- }
}
break;
}
@@ -2756,14 +2870,10 @@ Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp,
return nullptr;
}
-/// Fold an icmp with LLVM intrinsic and constant operand: icmp Pred II, C.
-Instruction *InstCombiner::foldICmpIntrinsicWithConstant(ICmpInst &Cmp,
- const APInt &C) {
- IntrinsicInst *II = dyn_cast<IntrinsicInst>(Cmp.getOperand(0));
- if (!II || !Cmp.isEquality())
- return nullptr;
-
- // Handle icmp {eq|ne} <intrinsic>, Constant.
+/// Fold an equality icmp with LLVM intrinsic and constant operand.
+Instruction *InstCombiner::foldICmpEqIntrinsicWithConstant(ICmpInst &Cmp,
+ IntrinsicInst *II,
+ const APInt &C) {
Type *Ty = II->getType();
unsigned BitWidth = C.getBitWidth();
switch (II->getIntrinsicID()) {
@@ -2823,6 +2933,65 @@ Instruction *InstCombiner::foldICmpIntrinsicWithConstant(ICmpInst &Cmp,
return nullptr;
}
+/// Fold an icmp with LLVM intrinsic and constant operand: icmp Pred II, C.
+Instruction *InstCombiner::foldICmpIntrinsicWithConstant(ICmpInst &Cmp,
+ IntrinsicInst *II,
+ const APInt &C) {
+ if (Cmp.isEquality())
+ return foldICmpEqIntrinsicWithConstant(Cmp, II, C);
+
+ Type *Ty = II->getType();
+ unsigned BitWidth = C.getBitWidth();
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::ctlz: {
+ // ctlz(0bXXXXXXXX) > 3 -> 0bXXXXXXXX < 0b00010000
+ if (Cmp.getPredicate() == ICmpInst::ICMP_UGT && C.ult(BitWidth)) {
+ unsigned Num = C.getLimitedValue();
+ APInt Limit = APInt::getOneBitSet(BitWidth, BitWidth - Num - 1);
+ return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_ULT,
+ II->getArgOperand(0), ConstantInt::get(Ty, Limit));
+ }
+
+ // ctlz(0bXXXXXXXX) < 3 -> 0bXXXXXXXX > 0b00011111
+ if (Cmp.getPredicate() == ICmpInst::ICMP_ULT &&
+ C.uge(1) && C.ule(BitWidth)) {
+ unsigned Num = C.getLimitedValue();
+ APInt Limit = APInt::getLowBitsSet(BitWidth, BitWidth - Num);
+ return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_UGT,
+ II->getArgOperand(0), ConstantInt::get(Ty, Limit));
+ }
+ break;
+ }
+ case Intrinsic::cttz: {
+ // Limit to one use to ensure we don't increase instruction count.
+ if (!II->hasOneUse())
+ return nullptr;
+
+ // cttz(0bXXXXXXXX) > 3 -> 0bXXXXXXXX & 0b00001111 == 0
+ if (Cmp.getPredicate() == ICmpInst::ICMP_UGT && C.ult(BitWidth)) {
+ APInt Mask = APInt::getLowBitsSet(BitWidth, C.getLimitedValue() + 1);
+ return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_EQ,
+ Builder.CreateAnd(II->getArgOperand(0), Mask),
+ ConstantInt::getNullValue(Ty));
+ }
+
+ // cttz(0bXXXXXXXX) < 3 -> 0bXXXXXXXX & 0b00000111 != 0
+ if (Cmp.getPredicate() == ICmpInst::ICMP_ULT &&
+ C.uge(1) && C.ule(BitWidth)) {
+ APInt Mask = APInt::getLowBitsSet(BitWidth, C.getLimitedValue());
+ return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_NE,
+ Builder.CreateAnd(II->getArgOperand(0), Mask),
+ ConstantInt::getNullValue(Ty));
+ }
+ break;
+ }
+ default:
+ break;
+ }
+
+ return nullptr;
+}
+
/// Handle icmp with constant (but not simple integer constant) RHS.
Instruction *InstCombiner::foldICmpInstWithConstantNotInt(ICmpInst &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
@@ -2983,6 +3152,10 @@ static Value *foldICmpWithLowBitMaskedVal(ICmpInst &I,
// x s> x & (-1 >> y) -> x s> (-1 >> y)
if (X != I.getOperand(0)) // X must be on LHS of comparison!
return nullptr; // Ignore the other case.
+ if (!match(M, m_Constant())) // Can not do this fold with non-constant.
+ return nullptr;
+ if (!match(M, m_NonNegative())) // Must not have any -1 vector elements.
+ return nullptr;
DstPred = ICmpInst::Predicate::ICMP_SGT;
break;
case ICmpInst::Predicate::ICMP_SGE:
@@ -3009,6 +3182,10 @@ static Value *foldICmpWithLowBitMaskedVal(ICmpInst &I,
// x s<= x & (-1 >> y) -> x s<= (-1 >> y)
if (X != I.getOperand(0)) // X must be on LHS of comparison!
return nullptr; // Ignore the other case.
+ if (!match(M, m_Constant())) // Can not do this fold with non-constant.
+ return nullptr;
+ if (!match(M, m_NonNegative())) // Must not have any -1 vector elements.
+ return nullptr;
DstPred = ICmpInst::Predicate::ICMP_SLE;
break;
default:
@@ -3093,6 +3270,64 @@ foldICmpWithTruncSignExtendedVal(ICmpInst &I,
return T1;
}
+// Given pattern:
+// icmp eq/ne (and ((x shift Q), (y oppositeshift K))), 0
+// we should move shifts to the same hand of 'and', i.e. rewrite as
+// icmp eq/ne (and (x shift (Q+K)), y), 0 iff (Q+K) u< bitwidth(x)
+// We are only interested in opposite logical shifts here.
+// If we can, we want to end up creating 'lshr' shift.
+static Value *
+foldShiftIntoShiftInAnotherHandOfAndInICmp(ICmpInst &I, const SimplifyQuery SQ,
+ InstCombiner::BuilderTy &Builder) {
+ if (!I.isEquality() || !match(I.getOperand(1), m_Zero()) ||
+ !I.getOperand(0)->hasOneUse())
+ return nullptr;
+
+ auto m_AnyLogicalShift = m_LogicalShift(m_Value(), m_Value());
+ auto m_AnyLShr = m_LShr(m_Value(), m_Value());
+
+ // Look for an 'and' of two (opposite) logical shifts.
+ // Pick the single-use shift as XShift.
+ Value *XShift, *YShift;
+ if (!match(I.getOperand(0),
+ m_c_And(m_OneUse(m_CombineAnd(m_AnyLogicalShift, m_Value(XShift))),
+ m_CombineAnd(m_AnyLogicalShift, m_Value(YShift)))))
+ return nullptr;
+
+ // If YShift is a single-use 'lshr', swap the shifts around.
+ if (match(YShift, m_OneUse(m_AnyLShr)))
+ std::swap(XShift, YShift);
+
+ // The shifts must be in opposite directions.
+ Instruction::BinaryOps XShiftOpcode =
+ cast<BinaryOperator>(XShift)->getOpcode();
+ if (XShiftOpcode == cast<BinaryOperator>(YShift)->getOpcode())
+ return nullptr; // Do not care about same-direction shifts here.
+
+ Value *X, *XShAmt, *Y, *YShAmt;
+ match(XShift, m_BinOp(m_Value(X), m_Value(XShAmt)));
+ match(YShift, m_BinOp(m_Value(Y), m_Value(YShAmt)));
+
+ // Can we fold (XShAmt+YShAmt) ?
+ Value *NewShAmt = SimplifyBinOp(Instruction::BinaryOps::Add, XShAmt, YShAmt,
+ SQ.getWithInstruction(&I));
+ if (!NewShAmt)
+ return nullptr;
+ // Is the new shift amount smaller than the bit width?
+ // FIXME: could also rely on ConstantRange.
+ unsigned BitWidth = X->getType()->getScalarSizeInBits();
+ if (!match(NewShAmt, m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_ULT,
+ APInt(BitWidth, BitWidth))))
+ return nullptr;
+ // All good, we can do this fold. The shift is the same that was for X.
+ Value *T0 = XShiftOpcode == Instruction::BinaryOps::LShr
+ ? Builder.CreateLShr(X, NewShAmt)
+ : Builder.CreateShl(X, NewShAmt);
+ Value *T1 = Builder.CreateAnd(T0, Y);
+ return Builder.CreateICmp(I.getPredicate(), T1,
+ Constant::getNullValue(X->getType()));
+}
+
/// Try to fold icmp (binop), X or icmp X, (binop).
/// TODO: A large part of this logic is duplicated in InstSimplify's
/// simplifyICmpWithBinOp(). We should be able to share that and avoid the code
@@ -3448,6 +3683,9 @@ Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) {
if (Value *V = foldICmpWithTruncSignExtendedVal(I, Builder))
return replaceInstUsesWith(I, V);
+ if (Value *V = foldShiftIntoShiftInAnotherHandOfAndInICmp(I, SQ, Builder))
+ return replaceInstUsesWith(I, V);
+
return nullptr;
}
@@ -3688,6 +3926,30 @@ Instruction *InstCombiner::foldICmpEquality(ICmpInst &I) {
match(Op1, m_BitReverse(m_Value(B)))))
return new ICmpInst(Pred, A, B);
+ // Canonicalize checking for a power-of-2-or-zero value:
+ // (A & (A-1)) == 0 --> ctpop(A) < 2 (two commuted variants)
+ // ((A-1) & A) != 0 --> ctpop(A) > 1 (two commuted variants)
+ if (!match(Op0, m_OneUse(m_c_And(m_Add(m_Value(A), m_AllOnes()),
+ m_Deferred(A)))) ||
+ !match(Op1, m_ZeroInt()))
+ A = nullptr;
+
+ // (A & -A) == A --> ctpop(A) < 2 (four commuted variants)
+ // (-A & A) != A --> ctpop(A) > 1 (four commuted variants)
+ if (match(Op0, m_OneUse(m_c_And(m_Neg(m_Specific(Op1)), m_Specific(Op1)))))
+ A = Op1;
+ else if (match(Op1,
+ m_OneUse(m_c_And(m_Neg(m_Specific(Op0)), m_Specific(Op0)))))
+ A = Op0;
+
+ if (A) {
+ Type *Ty = A->getType();
+ CallInst *CtPop = Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, A);
+ return Pred == ICmpInst::ICMP_EQ
+ ? new ICmpInst(ICmpInst::ICMP_ULT, CtPop, ConstantInt::get(Ty, 2))
+ : new ICmpInst(ICmpInst::ICMP_UGT, CtPop, ConstantInt::get(Ty, 1));
+ }
+
return nullptr;
}
@@ -3698,7 +3960,6 @@ Instruction *InstCombiner::foldICmpWithCastAndCast(ICmpInst &ICmp) {
Value *LHSCIOp = LHSCI->getOperand(0);
Type *SrcTy = LHSCIOp->getType();
Type *DestTy = LHSCI->getType();
- Value *RHSCIOp;
// Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the
// integer type is the same size as the pointer type.
@@ -3740,7 +4001,7 @@ Instruction *InstCombiner::foldICmpWithCastAndCast(ICmpInst &ICmp) {
if (auto *CI = dyn_cast<CastInst>(ICmp.getOperand(1))) {
// Not an extension from the same type?
- RHSCIOp = CI->getOperand(0);
+ Value *RHSCIOp = CI->getOperand(0);
if (RHSCIOp->getType() != LHSCIOp->getType())
return nullptr;
@@ -3813,104 +4074,83 @@ Instruction *InstCombiner::foldICmpWithCastAndCast(ICmpInst &ICmp) {
return BinaryOperator::CreateNot(Result);
}
-bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS,
- Value *RHS, Instruction &OrigI,
- Value *&Result, Constant *&Overflow) {
+static bool isNeutralValue(Instruction::BinaryOps BinaryOp, Value *RHS) {
+ switch (BinaryOp) {
+ default:
+ llvm_unreachable("Unsupported binary op");
+ case Instruction::Add:
+ case Instruction::Sub:
+ return match(RHS, m_Zero());
+ case Instruction::Mul:
+ return match(RHS, m_One());
+ }
+}
+
+OverflowResult InstCombiner::computeOverflow(
+ Instruction::BinaryOps BinaryOp, bool IsSigned,
+ Value *LHS, Value *RHS, Instruction *CxtI) const {
+ switch (BinaryOp) {
+ default:
+ llvm_unreachable("Unsupported binary op");
+ case Instruction::Add:
+ if (IsSigned)
+ return computeOverflowForSignedAdd(LHS, RHS, CxtI);
+ else
+ return computeOverflowForUnsignedAdd(LHS, RHS, CxtI);
+ case Instruction::Sub:
+ if (IsSigned)
+ return computeOverflowForSignedSub(LHS, RHS, CxtI);
+ else
+ return computeOverflowForUnsignedSub(LHS, RHS, CxtI);
+ case Instruction::Mul:
+ if (IsSigned)
+ return computeOverflowForSignedMul(LHS, RHS, CxtI);
+ else
+ return computeOverflowForUnsignedMul(LHS, RHS, CxtI);
+ }
+}
+
+bool InstCombiner::OptimizeOverflowCheck(
+ Instruction::BinaryOps BinaryOp, bool IsSigned, Value *LHS, Value *RHS,
+ Instruction &OrigI, Value *&Result, Constant *&Overflow) {
if (OrigI.isCommutative() && isa<Constant>(LHS) && !isa<Constant>(RHS))
std::swap(LHS, RHS);
- auto SetResult = [&](Value *OpResult, Constant *OverflowVal, bool ReuseName) {
- Result = OpResult;
- Overflow = OverflowVal;
- if (ReuseName)
- Result->takeName(&OrigI);
- return true;
- };
-
// If the overflow check was an add followed by a compare, the insertion point
// may be pointing to the compare. We want to insert the new instructions
// before the add in case there are uses of the add between the add and the
// compare.
Builder.SetInsertPoint(&OrigI);
- switch (OCF) {
- case OCF_INVALID:
- llvm_unreachable("bad overflow check kind!");
-
- case OCF_UNSIGNED_ADD: {
- OverflowResult OR = computeOverflowForUnsignedAdd(LHS, RHS, &OrigI);
- if (OR == OverflowResult::NeverOverflows)
- return SetResult(Builder.CreateNUWAdd(LHS, RHS), Builder.getFalse(),
- true);
-
- if (OR == OverflowResult::AlwaysOverflows)
- return SetResult(Builder.CreateAdd(LHS, RHS), Builder.getTrue(), true);
-
- // Fall through uadd into sadd
- LLVM_FALLTHROUGH;
- }
- case OCF_SIGNED_ADD: {
- // X + 0 -> {X, false}
- if (match(RHS, m_Zero()))
- return SetResult(LHS, Builder.getFalse(), false);
-
- // We can strength reduce this signed add into a regular add if we can prove
- // that it will never overflow.
- if (OCF == OCF_SIGNED_ADD)
- if (willNotOverflowSignedAdd(LHS, RHS, OrigI))
- return SetResult(Builder.CreateNSWAdd(LHS, RHS), Builder.getFalse(),
- true);
- break;
- }
-
- case OCF_UNSIGNED_SUB:
- case OCF_SIGNED_SUB: {
- // X - 0 -> {X, false}
- if (match(RHS, m_Zero()))
- return SetResult(LHS, Builder.getFalse(), false);
-
- if (OCF == OCF_SIGNED_SUB) {
- if (willNotOverflowSignedSub(LHS, RHS, OrigI))
- return SetResult(Builder.CreateNSWSub(LHS, RHS), Builder.getFalse(),
- true);
- } else {
- if (willNotOverflowUnsignedSub(LHS, RHS, OrigI))
- return SetResult(Builder.CreateNUWSub(LHS, RHS), Builder.getFalse(),
- true);
- }
- break;
- }
-
- case OCF_UNSIGNED_MUL: {
- OverflowResult OR = computeOverflowForUnsignedMul(LHS, RHS, &OrigI);
- if (OR == OverflowResult::NeverOverflows)
- return SetResult(Builder.CreateNUWMul(LHS, RHS), Builder.getFalse(),
- true);
- if (OR == OverflowResult::AlwaysOverflows)
- return SetResult(Builder.CreateMul(LHS, RHS), Builder.getTrue(), true);
- LLVM_FALLTHROUGH;
+ if (isNeutralValue(BinaryOp, RHS)) {
+ Result = LHS;
+ Overflow = Builder.getFalse();
+ return true;
}
- case OCF_SIGNED_MUL:
- // X * undef -> undef
- if (isa<UndefValue>(RHS))
- return SetResult(RHS, UndefValue::get(Builder.getInt1Ty()), false);
-
- // X * 0 -> {0, false}
- if (match(RHS, m_Zero()))
- return SetResult(RHS, Builder.getFalse(), false);
-
- // X * 1 -> {X, false}
- if (match(RHS, m_One()))
- return SetResult(LHS, Builder.getFalse(), false);
- if (OCF == OCF_SIGNED_MUL)
- if (willNotOverflowSignedMul(LHS, RHS, OrigI))
- return SetResult(Builder.CreateNSWMul(LHS, RHS), Builder.getFalse(),
- true);
- break;
+ switch (computeOverflow(BinaryOp, IsSigned, LHS, RHS, &OrigI)) {
+ case OverflowResult::MayOverflow:
+ return false;
+ case OverflowResult::AlwaysOverflowsLow:
+ case OverflowResult::AlwaysOverflowsHigh:
+ Result = Builder.CreateBinOp(BinaryOp, LHS, RHS);
+ Result->takeName(&OrigI);
+ Overflow = Builder.getTrue();
+ return true;
+ case OverflowResult::NeverOverflows:
+ Result = Builder.CreateBinOp(BinaryOp, LHS, RHS);
+ Result->takeName(&OrigI);
+ Overflow = Builder.getFalse();
+ if (auto *Inst = dyn_cast<Instruction>(Result)) {
+ if (IsSigned)
+ Inst->setHasNoSignedWrap();
+ else
+ Inst->setHasNoUnsignedWrap();
+ }
+ return true;
}
- return false;
+ llvm_unreachable("Unexpected overflow result");
}
/// Recognize and process idiom involving test for multiplication
@@ -4084,8 +4324,8 @@ static Instruction *processUMulZExtIdiom(ICmpInst &I, Value *MulVal,
MulA = Builder.CreateZExt(A, MulType);
if (WidthB < MulWidth)
MulB = Builder.CreateZExt(B, MulType);
- Value *F = Intrinsic::getDeclaration(I.getModule(),
- Intrinsic::umul_with_overflow, MulType);
+ Function *F = Intrinsic::getDeclaration(
+ I.getModule(), Intrinsic::umul_with_overflow, MulType);
CallInst *Call = Builder.CreateCall(F, {MulA, MulB}, "umul");
IC.Worklist.Add(MulInstr);
@@ -4881,61 +5121,8 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
return New;
}
- // Zero-equality and sign-bit checks are preserved through sitofp + bitcast.
- Value *X;
- if (match(Op0, m_BitCast(m_SIToFP(m_Value(X))))) {
- // icmp eq (bitcast (sitofp X)), 0 --> icmp eq X, 0
- // icmp ne (bitcast (sitofp X)), 0 --> icmp ne X, 0
- // icmp slt (bitcast (sitofp X)), 0 --> icmp slt X, 0
- // icmp sgt (bitcast (sitofp X)), 0 --> icmp sgt X, 0
- if ((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_SLT ||
- Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SGT) &&
- match(Op1, m_Zero()))
- return new ICmpInst(Pred, X, ConstantInt::getNullValue(X->getType()));
-
- // icmp slt (bitcast (sitofp X)), 1 --> icmp slt X, 1
- if (Pred == ICmpInst::ICMP_SLT && match(Op1, m_One()))
- return new ICmpInst(Pred, X, ConstantInt::get(X->getType(), 1));
-
- // icmp sgt (bitcast (sitofp X)), -1 --> icmp sgt X, -1
- if (Pred == ICmpInst::ICMP_SGT && match(Op1, m_AllOnes()))
- return new ICmpInst(Pred, X, ConstantInt::getAllOnesValue(X->getType()));
- }
-
- // Zero-equality checks are preserved through unsigned floating-point casts:
- // icmp eq (bitcast (uitofp X)), 0 --> icmp eq X, 0
- // icmp ne (bitcast (uitofp X)), 0 --> icmp ne X, 0
- if (match(Op0, m_BitCast(m_UIToFP(m_Value(X)))))
- if (I.isEquality() && match(Op1, m_Zero()))
- return new ICmpInst(Pred, X, ConstantInt::getNullValue(X->getType()));
-
- // Test to see if the operands of the icmp are casted versions of other
- // values. If the ptr->ptr cast can be stripped off both arguments, we do so
- // now.
- if (BitCastInst *CI = dyn_cast<BitCastInst>(Op0)) {
- if (Op0->getType()->isPointerTy() &&
- (isa<Constant>(Op1) || isa<BitCastInst>(Op1))) {
- // We keep moving the cast from the left operand over to the right
- // operand, where it can often be eliminated completely.
- Op0 = CI->getOperand(0);
-
- // If operand #1 is a bitcast instruction, it must also be a ptr->ptr cast
- // so eliminate it as well.
- if (BitCastInst *CI2 = dyn_cast<BitCastInst>(Op1))
- Op1 = CI2->getOperand(0);
-
- // If Op1 is a constant, we can fold the cast into the constant.
- if (Op0->getType() != Op1->getType()) {
- if (Constant *Op1C = dyn_cast<Constant>(Op1)) {
- Op1 = ConstantExpr::getBitCast(Op1C, Op0->getType());
- } else {
- // Otherwise, cast the RHS right before the icmp
- Op1 = Builder.CreateBitCast(Op1, Op0->getType());
- }
- }
- return new ICmpInst(I.getPredicate(), Op0, Op1);
- }
- }
+ if (Instruction *Res = foldICmpBitCast(I, Builder))
+ return Res;
if (isa<CastInst>(Op0)) {
// Handle the special case of: icmp (cast bool to X), <cst>
@@ -4984,8 +5171,8 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
isa<IntegerType>(A->getType())) {
Value *Result;
Constant *Overflow;
- if (OptimizeOverflowCheck(OCF_UNSIGNED_ADD, A, B, *AddI, Result,
- Overflow)) {
+ if (OptimizeOverflowCheck(Instruction::Add, /*Signed*/false, A, B,
+ *AddI, Result, Overflow)) {
replaceInstUsesWith(*AddI, Result);
return replaceInstUsesWith(I, Overflow);
}
@@ -5411,6 +5598,8 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
return replaceInstUsesWith(I, V);
// Simplify 'fcmp pred X, X'
+ Type *OpType = Op0->getType();
+ assert(OpType == Op1->getType() && "fcmp with different-typed operands?");
if (Op0 == Op1) {
switch (Pred) {
default: break;
@@ -5420,7 +5609,7 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
case FCmpInst::FCMP_UNE: // True if unordered or not equal
// Canonicalize these to be 'fcmp uno %X, 0.0'.
I.setPredicate(FCmpInst::FCMP_UNO);
- I.setOperand(1, Constant::getNullValue(Op0->getType()));
+ I.setOperand(1, Constant::getNullValue(OpType));
return &I;
case FCmpInst::FCMP_ORD: // True if ordered (no nans)
@@ -5429,7 +5618,7 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
case FCmpInst::FCMP_OLE: // True if ordered and less than or equal
// Canonicalize these to be 'fcmp ord %X, 0.0'.
I.setPredicate(FCmpInst::FCMP_ORD);
- I.setOperand(1, Constant::getNullValue(Op0->getType()));
+ I.setOperand(1, Constant::getNullValue(OpType));
return &I;
}
}
@@ -5438,15 +5627,20 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
// then canonicalize the operand to 0.0.
if (Pred == CmpInst::FCMP_ORD || Pred == CmpInst::FCMP_UNO) {
if (!match(Op0, m_PosZeroFP()) && isKnownNeverNaN(Op0, &TLI)) {
- I.setOperand(0, ConstantFP::getNullValue(Op0->getType()));
+ I.setOperand(0, ConstantFP::getNullValue(OpType));
return &I;
}
if (!match(Op1, m_PosZeroFP()) && isKnownNeverNaN(Op1, &TLI)) {
- I.setOperand(1, ConstantFP::getNullValue(Op0->getType()));
+ I.setOperand(1, ConstantFP::getNullValue(OpType));
return &I;
}
}
+ // fcmp pred (fneg X), (fneg Y) -> fcmp swap(pred) X, Y
+ Value *X, *Y;
+ if (match(Op0, m_FNeg(m_Value(X))) && match(Op1, m_FNeg(m_Value(Y))))
+ return new FCmpInst(I.getSwappedPredicate(), X, Y, "", &I);
+
// Test if the FCmpInst instruction is used exclusively by a select as
// part of a minimum or maximum operation. If so, refrain from doing
// any other folding. This helps out other analyses which understand
@@ -5465,7 +5659,7 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
// The sign of 0.0 is ignored by fcmp, so canonicalize to +0.0:
// fcmp Pred X, -0.0 --> fcmp Pred X, 0.0
if (match(Op1, m_AnyZeroFP()) && !match(Op1, m_PosZeroFP())) {
- I.setOperand(1, ConstantFP::getNullValue(Op1->getType()));
+ I.setOperand(1, ConstantFP::getNullValue(OpType));
return &I;
}
@@ -5505,12 +5699,7 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
if (Instruction *R = foldFabsWithFcmpZero(I))
return R;
- Value *X, *Y;
if (match(Op0, m_FNeg(m_Value(X)))) {
- // fcmp pred (fneg X), (fneg Y) -> fcmp swap(pred) X, Y
- if (match(Op1, m_FNeg(m_Value(Y))))
- return new FCmpInst(I.getSwappedPredicate(), X, Y, "", &I);
-
// fcmp pred (fneg X), C --> fcmp swap(pred) X, -C
Constant *C;
if (match(Op1, m_Constant(C))) {
diff --git a/lib/Transforms/InstCombine/InstCombineInternal.h b/lib/Transforms/InstCombine/InstCombineInternal.h
index 2de41bd5bef5..434b0d591215 100644
--- a/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -1,9 +1,8 @@
//===- InstCombineInternal.h - InstCombine pass internals -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -53,13 +52,14 @@ namespace llvm {
class APInt;
class AssumptionCache;
-class CallSite;
+class BlockFrequencyInfo;
class DataLayout;
class DominatorTree;
class GEPOperator;
class GlobalVariable;
class LoopInfo;
class OptimizationRemarkEmitter;
+class ProfileSummaryInfo;
class TargetLibraryInfo;
class User;
@@ -185,40 +185,6 @@ static inline bool IsFreeToInvert(Value *V, bool WillInvertAllUses) {
return false;
}
-/// Specific patterns of overflow check idioms that we match.
-enum OverflowCheckFlavor {
- OCF_UNSIGNED_ADD,
- OCF_SIGNED_ADD,
- OCF_UNSIGNED_SUB,
- OCF_SIGNED_SUB,
- OCF_UNSIGNED_MUL,
- OCF_SIGNED_MUL,
-
- OCF_INVALID
-};
-
-/// Returns the OverflowCheckFlavor corresponding to a overflow_with_op
-/// intrinsic.
-static inline OverflowCheckFlavor
-IntrinsicIDToOverflowCheckFlavor(unsigned ID) {
- switch (ID) {
- default:
- return OCF_INVALID;
- case Intrinsic::uadd_with_overflow:
- return OCF_UNSIGNED_ADD;
- case Intrinsic::sadd_with_overflow:
- return OCF_SIGNED_ADD;
- case Intrinsic::usub_with_overflow:
- return OCF_UNSIGNED_SUB;
- case Intrinsic::ssub_with_overflow:
- return OCF_SIGNED_SUB;
- case Intrinsic::umul_with_overflow:
- return OCF_UNSIGNED_MUL;
- case Intrinsic::smul_with_overflow:
- return OCF_SIGNED_MUL;
- }
-}
-
/// Some binary operators require special handling to avoid poison and undefined
/// behavior. If a constant vector has undef elements, replace those undefs with
/// identity constants if possible because those are always safe to execute.
@@ -306,6 +272,8 @@ private:
const DataLayout &DL;
const SimplifyQuery SQ;
OptimizationRemarkEmitter &ORE;
+ BlockFrequencyInfo *BFI;
+ ProfileSummaryInfo *PSI;
// Optional analyses. When non-null, these can both be used to do better
// combining and will be updated to reflect any changes.
@@ -317,11 +285,11 @@ public:
InstCombiner(InstCombineWorklist &Worklist, BuilderTy &Builder,
bool MinimizeSize, bool ExpensiveCombines, AliasAnalysis *AA,
AssumptionCache &AC, TargetLibraryInfo &TLI, DominatorTree &DT,
- OptimizationRemarkEmitter &ORE, const DataLayout &DL,
- LoopInfo *LI)
+ OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI,
+ ProfileSummaryInfo *PSI, const DataLayout &DL, LoopInfo *LI)
: Worklist(Worklist), Builder(Builder), MinimizeSize(MinimizeSize),
ExpensiveCombines(ExpensiveCombines), AA(AA), AC(AC), TLI(TLI), DT(DT),
- DL(DL), SQ(DL, &TLI, &DT, &AC), ORE(ORE), LI(LI) {}
+ DL(DL), SQ(DL, &TLI, &DT, &AC), ORE(ORE), BFI(BFI), PSI(PSI), LI(LI) {}
/// Run the combiner over the entire worklist until it is empty.
///
@@ -345,6 +313,7 @@ public:
// I - Change was made, I is still valid, I may be dead though
// otherwise - Change was made, replace I with returned instruction
//
+ Instruction *visitFNeg(UnaryOperator &I);
Instruction *visitAdd(BinaryOperator &I);
Instruction *visitFAdd(BinaryOperator &I);
Value *OptimizePointerDifference(Value *LHS, Value *RHS, Type *Ty);
@@ -394,6 +363,7 @@ public:
Instruction *visitSelectInst(SelectInst &SI);
Instruction *visitCallInst(CallInst &CI);
Instruction *visitInvokeInst(InvokeInst &II);
+ Instruction *visitCallBrInst(CallBrInst &CBI);
Instruction *SliceUpIllegalIntegerPHI(PHINode &PN);
Instruction *visitPHINode(PHINode &PN);
@@ -403,6 +373,7 @@ public:
Instruction *visitFree(CallInst &FI);
Instruction *visitLoadInst(LoadInst &LI);
Instruction *visitStoreInst(StoreInst &SI);
+ Instruction *visitAtomicRMWInst(AtomicRMWInst &SI);
Instruction *visitBranchInst(BranchInst &BI);
Instruction *visitFenceInst(FenceInst &FI);
Instruction *visitSwitchInst(SwitchInst &SI);
@@ -464,16 +435,22 @@ private:
/// operation in OperationResult and result of the overflow check in
/// OverflowResult, and return true. If no simplification is possible,
/// returns false.
- bool OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS, Value *RHS,
+ bool OptimizeOverflowCheck(Instruction::BinaryOps BinaryOp, bool IsSigned,
+ Value *LHS, Value *RHS,
Instruction &CtxI, Value *&OperationResult,
Constant *&OverflowResult);
- Instruction *visitCallSite(CallSite CS);
+ Instruction *visitCallBase(CallBase &Call);
Instruction *tryOptimizeCall(CallInst *CI);
- bool transformConstExprCastCall(CallSite CS);
- Instruction *transformCallThroughTrampoline(CallSite CS,
- IntrinsicInst *Tramp);
-
+ bool transformConstExprCastCall(CallBase &Call);
+ Instruction *transformCallThroughTrampoline(CallBase &Call,
+ IntrinsicInst &Tramp);
+
+ Value *simplifyMaskedLoad(IntrinsicInst &II);
+ Instruction *simplifyMaskedStore(IntrinsicInst &II);
+ Instruction *simplifyMaskedGather(IntrinsicInst &II);
+ Instruction *simplifyMaskedScatter(IntrinsicInst &II);
+
/// Transform (zext icmp) to bitwise / integer operations in order to
/// eliminate it.
///
@@ -592,6 +569,8 @@ private:
Value *matchSelectFromAndOr(Value *A, Value *B, Value *C, Value *D);
Value *getSelectCondition(Value *A, Value *B);
+ Instruction *foldIntrinsicWithOverflowCommon(IntrinsicInst *II);
+
public:
/// Inserts an instruction \p New before instruction \p Old
///
@@ -647,6 +626,16 @@ public:
return InsertValueInst::Create(Struct, Result, 0);
}
+ /// Create and insert the idiom we use to indicate a block is unreachable
+ /// without having to rewrite the CFG from within InstCombine.
+ void CreateNonTerminatorUnreachable(Instruction *InsertAt) {
+ auto &Ctx = InsertAt->getContext();
+ new StoreInst(ConstantInt::getTrue(Ctx),
+ UndefValue::get(Type::getInt1PtrTy(Ctx)),
+ InsertAt);
+ }
+
+
/// Combiner aware instruction erasure.
///
/// When dealing with an instruction that has side effects or produces a void
@@ -703,7 +692,7 @@ public:
}
OverflowResult computeOverflowForSignedMul(const Value *LHS,
- const Value *RHS,
+ const Value *RHS,
const Instruction *CxtI) const {
return llvm::computeOverflowForSignedMul(LHS, RHS, DL, &AC, CxtI, &DT);
}
@@ -731,6 +720,10 @@ public:
return llvm::computeOverflowForSignedSub(LHS, RHS, DL, &AC, CxtI, &DT);
}
+ OverflowResult computeOverflow(
+ Instruction::BinaryOps BinaryOp, bool IsSigned,
+ Value *LHS, Value *RHS, Instruction *CxtI) const;
+
/// Maximum size of array considered when transforming.
uint64_t MaxArraySizeForCombine;
@@ -802,8 +795,7 @@ private:
Value *simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst *II,
APInt DemandedElts,
- int DmaskIdx = -1,
- int TFCIdx = -1);
+ int DmaskIdx = -1);
Value *SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
APInt &UndefElts, unsigned Depth = 0);
@@ -868,8 +860,6 @@ private:
Instruction *foldICmpSelectConstant(ICmpInst &Cmp, SelectInst *Select,
ConstantInt *C);
- Instruction *foldICmpBitCastConstant(ICmpInst &Cmp, BitCastInst *Bitcast,
- const APInt &C);
Instruction *foldICmpTruncConstant(ICmpInst &Cmp, TruncInst *Trunc,
const APInt &C);
Instruction *foldICmpAndConstant(ICmpInst &Cmp, BinaryOperator *And,
@@ -904,7 +894,10 @@ private:
Instruction *foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp,
BinaryOperator *BO,
const APInt &C);
- Instruction *foldICmpIntrinsicWithConstant(ICmpInst &ICI, const APInt &C);
+ Instruction *foldICmpIntrinsicWithConstant(ICmpInst &ICI, IntrinsicInst *II,
+ const APInt &C);
+ Instruction *foldICmpEqIntrinsicWithConstant(ICmpInst &ICI, IntrinsicInst *II,
+ const APInt &C);
// Helpers of visitSelectInst().
Instruction *foldSelectExtConst(SelectInst &Sel);
diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 76ab614090fa..054fb7da09a2 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -1,9 +1,8 @@
//===- InstCombineLoadStoreAlloca.cpp -------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -89,29 +88,29 @@ isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
continue;
}
- if (auto CS = CallSite(I)) {
+ if (auto *Call = dyn_cast<CallBase>(I)) {
// If this is the function being called then we treat it like a load and
// ignore it.
- if (CS.isCallee(&U))
+ if (Call->isCallee(&U))
continue;
- unsigned DataOpNo = CS.getDataOperandNo(&U);
- bool IsArgOperand = CS.isArgOperand(&U);
+ unsigned DataOpNo = Call->getDataOperandNo(&U);
+ bool IsArgOperand = Call->isArgOperand(&U);
// Inalloca arguments are clobbered by the call.
- if (IsArgOperand && CS.isInAllocaArgument(DataOpNo))
+ if (IsArgOperand && Call->isInAllocaArgument(DataOpNo))
return false;
// If this is a readonly/readnone call site, then we know it is just a
// load (but one that potentially returns the value itself), so we can
// ignore it if we know that the value isn't captured.
- if (CS.onlyReadsMemory() &&
- (CS.getInstruction()->use_empty() || CS.doesNotCapture(DataOpNo)))
+ if (Call->onlyReadsMemory() &&
+ (Call->use_empty() || Call->doesNotCapture(DataOpNo)))
continue;
// If this is being passed as a byval argument, the caller is making a
// copy, so it is only a read of the alloca.
- if (IsArgOperand && CS.isByValArgument(DataOpNo))
+ if (IsArgOperand && Call->isByValArgument(DataOpNo))
continue;
}
@@ -213,8 +212,8 @@ static Instruction *simplifyAllocaArraySize(InstCombiner &IC, AllocaInst &AI) {
Type *IdxTy = IC.getDataLayout().getIntPtrType(AI.getType());
Value *NullIdx = Constant::getNullValue(IdxTy);
Value *Idx[2] = {NullIdx, NullIdx};
- Instruction *GEP =
- GetElementPtrInst::CreateInBounds(New, Idx, New->getName() + ".sub");
+ Instruction *GEP = GetElementPtrInst::CreateInBounds(
+ NewTy, New, Idx, New->getName() + ".sub");
IC.InsertNewInstBefore(GEP, *It);
// Now make everything use the getelementptr instead of the original
@@ -299,7 +298,7 @@ void PointerReplacer::replace(Instruction *I) {
if (auto *LT = dyn_cast<LoadInst>(I)) {
auto *V = getReplacement(LT->getPointerOperand());
assert(V && "Operand not replaced");
- auto *NewI = new LoadInst(V);
+ auto *NewI = new LoadInst(I->getType(), V);
NewI->takeName(LT);
IC.InsertNewInstWith(NewI, *LT);
IC.replaceInstUsesWith(*LT, NewI);
@@ -466,7 +465,7 @@ static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewT
NewPtr = IC.Builder.CreateBitCast(Ptr, NewTy->getPointerTo(AS));
LoadInst *NewLoad = IC.Builder.CreateAlignedLoad(
- NewPtr, LI.getAlignment(), LI.isVolatile(), LI.getName() + Suffix);
+ NewTy, NewPtr, LI.getAlignment(), LI.isVolatile(), LI.getName() + Suffix);
NewLoad->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
MDBuilder MDB(NewLoad->getContext());
for (const auto &MDPair : MD) {
@@ -631,7 +630,7 @@ static Instruction *combineLoadToOperationType(InstCombiner &IC, LoadInst &LI) {
// infinite loop).
if (!Ty->isIntegerTy() && Ty->isSized() &&
DL.isLegalInteger(DL.getTypeStoreSizeInBits(Ty)) &&
- DL.getTypeStoreSizeInBits(Ty) == DL.getTypeSizeInBits(Ty) &&
+ DL.typeSizeEqualsStoreSize(Ty) &&
!DL.isNonIntegralPointerType(Ty) &&
!isMinMaxWithLoads(
peekThroughBitcast(LI.getPointerOperand(), /*OneUseOnly=*/true))) {
@@ -725,7 +724,8 @@ static Instruction *unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI) {
auto *Ptr = IC.Builder.CreateInBoundsGEP(ST, Addr, makeArrayRef(Indices),
Name + ".elt");
auto EltAlign = MinAlign(Align, SL->getElementOffset(i));
- auto *L = IC.Builder.CreateAlignedLoad(Ptr, EltAlign, Name + ".unpack");
+ auto *L = IC.Builder.CreateAlignedLoad(ST->getElementType(i), Ptr,
+ EltAlign, Name + ".unpack");
// Propagate AA metadata. It'll still be valid on the narrowed load.
AAMDNodes AAMD;
LI.getAAMetadata(AAMD);
@@ -775,8 +775,8 @@ static Instruction *unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI) {
};
auto *Ptr = IC.Builder.CreateInBoundsGEP(AT, Addr, makeArrayRef(Indices),
Name + ".elt");
- auto *L = IC.Builder.CreateAlignedLoad(Ptr, MinAlign(Align, Offset),
- Name + ".unpack");
+ auto *L = IC.Builder.CreateAlignedLoad(
+ AT->getElementType(), Ptr, MinAlign(Align, Offset), Name + ".unpack");
AAMDNodes AAMD;
LI.getAAMetadata(AAMD);
L->setAAMetadata(AAMD);
@@ -1064,12 +1064,16 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
if (SelectInst *SI = dyn_cast<SelectInst>(Op)) {
// load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2).
unsigned Align = LI.getAlignment();
- if (isSafeToLoadUnconditionally(SI->getOperand(1), Align, DL, SI) &&
- isSafeToLoadUnconditionally(SI->getOperand(2), Align, DL, SI)) {
- LoadInst *V1 = Builder.CreateLoad(SI->getOperand(1),
- SI->getOperand(1)->getName()+".val");
- LoadInst *V2 = Builder.CreateLoad(SI->getOperand(2),
- SI->getOperand(2)->getName()+".val");
+ if (isSafeToLoadUnconditionally(SI->getOperand(1), LI.getType(), Align,
+ DL, SI) &&
+ isSafeToLoadUnconditionally(SI->getOperand(2), LI.getType(), Align,
+ DL, SI)) {
+ LoadInst *V1 =
+ Builder.CreateLoad(LI.getType(), SI->getOperand(1),
+ SI->getOperand(1)->getName() + ".val");
+ LoadInst *V2 =
+ Builder.CreateLoad(LI.getType(), SI->getOperand(2),
+ SI->getOperand(2)->getName() + ".val");
assert(LI.isUnordered() && "implied by above");
V1->setAlignment(Align);
V1->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
@@ -1436,6 +1440,12 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
}
}
+ // If we have a store to a location which is known constant, we can conclude
+ // that the store must be storing the constant value (else the memory
+ // wouldn't be constant), and this must be a noop.
+ if (AA->pointsToConstantMemory(Ptr))
+ return eraseInstFromFunction(SI);
+
// Do really simple DSE, to catch cases where there are several consecutive
// stores to the same location, separated by a few arithmetic operations. This
// situation often occurs with bitfield accesses.
diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 7e99f3e4e500..cc753ce05313 100644
--- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -1,9 +1,8 @@
//===- InstCombineMulDivRem.cpp -------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -375,11 +374,13 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
return BinaryOperator::CreateFMulFMF(X, ConstantExpr::getFNeg(C), &I);
// Sink negation: -X * Y --> -(X * Y)
- if (match(Op0, m_OneUse(m_FNeg(m_Value(X)))))
+ // But don't transform constant expressions because there's an inverse fold.
+ if (match(Op0, m_OneUse(m_FNeg(m_Value(X)))) && !isa<ConstantExpr>(Op0))
return BinaryOperator::CreateFNegFMF(Builder.CreateFMulFMF(X, Op1, &I), &I);
// Sink negation: Y * -X --> -(X * Y)
- if (match(Op1, m_OneUse(m_FNeg(m_Value(X)))))
+ // But don't transform constant expressions because there's an inverse fold.
+ if (match(Op1, m_OneUse(m_FNeg(m_Value(X)))) && !isa<ConstantExpr>(Op1))
return BinaryOperator::CreateFNegFMF(Builder.CreateFMulFMF(X, Op0, &I), &I);
// fabs(X) * fabs(X) -> X * X
@@ -431,6 +432,14 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
}
}
+ Value *Z;
+ if (match(&I, m_c_FMul(m_OneUse(m_FDiv(m_Value(X), m_Value(Y))),
+ m_Value(Z)))) {
+ // Sink division: (X / Y) * Z --> (X * Z) / Y
+ Value *NewFMul = Builder.CreateFMulFMF(X, Z, &I);
+ return BinaryOperator::CreateFDivFMF(NewFMul, Y, &I);
+ }
+
// sqrt(X) * sqrt(Y) -> sqrt(X * Y)
// nnan disallows the possibility of returning a number if both operands are
// negative (in that case, we should return NaN).
@@ -442,6 +451,45 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
return replaceInstUsesWith(I, Sqrt);
}
+ // Like the similar transform in instsimplify, this requires 'nsz' because
+ // sqrt(-0.0) = -0.0, and -0.0 * -0.0 does not simplify to -0.0.
+ if (I.hasNoNaNs() && I.hasNoSignedZeros() && Op0 == Op1 &&
+ Op0->hasNUses(2)) {
+ // Peek through fdiv to find squaring of square root:
+ // (X / sqrt(Y)) * (X / sqrt(Y)) --> (X * X) / Y
+ if (match(Op0, m_FDiv(m_Value(X),
+ m_Intrinsic<Intrinsic::sqrt>(m_Value(Y))))) {
+ Value *XX = Builder.CreateFMulFMF(X, X, &I);
+ return BinaryOperator::CreateFDivFMF(XX, Y, &I);
+ }
+ // (sqrt(Y) / X) * (sqrt(Y) / X) --> Y / (X * X)
+ if (match(Op0, m_FDiv(m_Intrinsic<Intrinsic::sqrt>(m_Value(Y)),
+ m_Value(X)))) {
+ Value *XX = Builder.CreateFMulFMF(X, X, &I);
+ return BinaryOperator::CreateFDivFMF(Y, XX, &I);
+ }
+ }
+
+ // exp(X) * exp(Y) -> exp(X + Y)
+ // Match as long as at least one of exp has only one use.
+ if (match(Op0, m_Intrinsic<Intrinsic::exp>(m_Value(X))) &&
+ match(Op1, m_Intrinsic<Intrinsic::exp>(m_Value(Y))) &&
+ (Op0->hasOneUse() || Op1->hasOneUse())) {
+ Value *XY = Builder.CreateFAddFMF(X, Y, &I);
+ Value *Exp = Builder.CreateUnaryIntrinsic(Intrinsic::exp, XY, &I);
+ return replaceInstUsesWith(I, Exp);
+ }
+
+ // exp2(X) * exp2(Y) -> exp2(X + Y)
+ // Match as long as at least one of exp2 has only one use.
+ if (match(Op0, m_Intrinsic<Intrinsic::exp2>(m_Value(X))) &&
+ match(Op1, m_Intrinsic<Intrinsic::exp2>(m_Value(Y))) &&
+ (Op0->hasOneUse() || Op1->hasOneUse())) {
+ Value *XY = Builder.CreateFAddFMF(X, Y, &I);
+ Value *Exp2 = Builder.CreateUnaryIntrinsic(Intrinsic::exp2, XY, &I);
+ return replaceInstUsesWith(I, Exp2);
+ }
+
// (X*Y) * X => (X*X) * Y where Y != X
// The purpose is two-fold:
// 1) to form a power expression (of X).
@@ -576,7 +624,7 @@ static bool isMultiple(const APInt &C1, const APInt &C2, APInt &Quotient,
if (IsSigned && C1.isMinSignedValue() && C2.isAllOnesValue())
return false;
- APInt Remainder(C1.getBitWidth(), /*Val=*/0ULL, IsSigned);
+ APInt Remainder(C1.getBitWidth(), /*val=*/0ULL, IsSigned);
if (IsSigned)
APInt::sdivrem(C1, C2, Quotient, Remainder);
else
@@ -613,7 +661,7 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
// (X / C1) / C2 -> X / (C1*C2)
if ((IsSigned && match(Op0, m_SDiv(m_Value(X), m_APInt(C1)))) ||
(!IsSigned && match(Op0, m_UDiv(m_Value(X), m_APInt(C1))))) {
- APInt Product(C1->getBitWidth(), /*Val=*/0ULL, IsSigned);
+ APInt Product(C1->getBitWidth(), /*val=*/0ULL, IsSigned);
if (!multiplyOverflows(*C1, *C2, Product, IsSigned))
return BinaryOperator::Create(I.getOpcode(), X,
ConstantInt::get(Ty, Product));
@@ -621,7 +669,7 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
if ((IsSigned && match(Op0, m_NSWMul(m_Value(X), m_APInt(C1)))) ||
(!IsSigned && match(Op0, m_NUWMul(m_Value(X), m_APInt(C1))))) {
- APInt Quotient(C1->getBitWidth(), /*Val=*/0ULL, IsSigned);
+ APInt Quotient(C1->getBitWidth(), /*val=*/0ULL, IsSigned);
// (X * C1) / C2 -> X / (C2 / C1) if C2 is a multiple of C1.
if (isMultiple(*C2, *C1, Quotient, IsSigned)) {
@@ -645,7 +693,7 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
if ((IsSigned && match(Op0, m_NSWShl(m_Value(X), m_APInt(C1))) &&
*C1 != C1->getBitWidth() - 1) ||
(!IsSigned && match(Op0, m_NUWShl(m_Value(X), m_APInt(C1))))) {
- APInt Quotient(C1->getBitWidth(), /*Val=*/0ULL, IsSigned);
+ APInt Quotient(C1->getBitWidth(), /*val=*/0ULL, IsSigned);
APInt C1Shifted = APInt::getOneBitSet(
C1->getBitWidth(), static_cast<unsigned>(C1->getLimitedValue()));
@@ -977,6 +1025,10 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
(match(Op1, m_SExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1)))
return BinaryOperator::CreateNeg(Op0);
+ // X / INT_MIN --> X == INT_MIN
+ if (match(Op1, m_SignMask()))
+ return new ZExtInst(Builder.CreateICmpEQ(Op0, Op1), I.getType());
+
const APInt *Op1C;
if (match(Op1, m_APInt(Op1C))) {
// sdiv exact X, C --> ashr exact X, log2(C)
@@ -1001,22 +1053,25 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
Value *NarrowOp = Builder.CreateSDiv(Op0Src, NarrowDivisor);
return new SExtInst(NarrowOp, Op0->getType());
}
- }
- if (Constant *RHS = dyn_cast<Constant>(Op1)) {
- // X/INT_MIN -> X == INT_MIN
- if (RHS->isMinSignedValue())
- return new ZExtInst(Builder.CreateICmpEQ(Op0, Op1), I.getType());
-
- // -X/C --> X/-C provided the negation doesn't overflow.
- Value *X;
- if (match(Op0, m_NSWSub(m_Zero(), m_Value(X)))) {
- auto *BO = BinaryOperator::CreateSDiv(X, ConstantExpr::getNeg(RHS));
+ // -X / C --> X / -C (if the negation doesn't overflow).
+ // TODO: This could be enhanced to handle arbitrary vector constants by
+ // checking if all elements are not the min-signed-val.
+ if (!Op1C->isMinSignedValue() &&
+ match(Op0, m_NSWSub(m_Zero(), m_Value(X)))) {
+ Constant *NegC = ConstantInt::get(I.getType(), -(*Op1C));
+ Instruction *BO = BinaryOperator::CreateSDiv(X, NegC);
BO->setIsExact(I.isExact());
return BO;
}
}
+ // -X / Y --> -(X / Y)
+ Value *Y;
+ if (match(&I, m_SDiv(m_OneUse(m_NSWSub(m_Zero(), m_Value(X))), m_Value(Y))))
+ return BinaryOperator::CreateNSWNeg(
+ Builder.CreateSDiv(X, Y, I.getName(), I.isExact()));
+
// If the sign bits of both operands are zero (i.e. we can prove they are
// unsigned inputs), turn this into a udiv.
APInt Mask(APInt::getSignMask(I.getType()->getScalarSizeInBits()));
@@ -1161,7 +1216,8 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
IRBuilder<> B(&I);
IRBuilder<>::FastMathFlagGuard FMFGuard(B);
B.setFastMathFlags(I.getFastMathFlags());
- AttributeList Attrs = CallSite(Op0).getCalledFunction()->getAttributes();
+ AttributeList Attrs =
+ cast<CallBase>(Op0)->getCalledFunction()->getAttributes();
Value *Res = emitUnaryFloatFnCall(X, &TLI, LibFunc_tan, LibFunc_tanf,
LibFunc_tanl, B, Attrs);
if (IsCot)
@@ -1305,6 +1361,11 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) {
}
}
+ // -X srem Y --> -(X srem Y)
+ Value *X, *Y;
+ if (match(&I, m_SRem(m_OneUse(m_NSWSub(m_Zero(), m_Value(X))), m_Value(Y))))
+ return BinaryOperator::CreateNSWNeg(Builder.CreateSRem(X, Y));
+
// If the sign bits of both operands are zero (i.e. we can prove they are
// unsigned inputs), turn this into a urem.
APInt Mask(APInt::getSignMask(I.getType()->getScalarSizeInBits()));
diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp
index 7603cf4d7958..5820ab726637 100644
--- a/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -1,9 +1,8 @@
//===- InstCombinePHI.cpp -------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -596,7 +595,8 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
Value *InVal = FirstLI->getOperand(0);
NewPN->addIncoming(InVal, PN.getIncomingBlock(0));
- LoadInst *NewLI = new LoadInst(NewPN, "", isVolatile, LoadAlignment);
+ LoadInst *NewLI =
+ new LoadInst(FirstLI->getType(), NewPN, "", isVolatile, LoadAlignment);
unsigned KnownIDs[] = {
LLVMContext::MD_tbaa,
@@ -1004,6 +1004,11 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
!isa<ConstantInt>(UserI->getOperand(1)))
return nullptr;
+ // Bail on out of range shifts.
+ unsigned SizeInBits = UserI->getType()->getScalarSizeInBits();
+ if (cast<ConstantInt>(UserI->getOperand(1))->getValue().uge(SizeInBits))
+ return nullptr;
+
unsigned Shift = cast<ConstantInt>(UserI->getOperand(1))->getZExtValue();
PHIUsers.push_back(PHIUsageRecord(PHIId, Shift, UserI->user_back()));
}
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index faf58a08976d..aefaf5af1750 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -1,9 +1,8 @@
//===- InstCombineSelect.cpp ----------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -293,6 +292,8 @@ Instruction *InstCombiner::foldSelectOpOp(SelectInst &SI, Instruction *TI,
return nullptr;
// If this is a cast from the same type, merge.
+ Value *Cond = SI.getCondition();
+ Type *CondTy = Cond->getType();
if (TI->getNumOperands() == 1 && TI->isCast()) {
Type *FIOpndTy = FI->getOperand(0)->getType();
if (TI->getOperand(0)->getType() != FIOpndTy)
@@ -300,7 +301,6 @@ Instruction *InstCombiner::foldSelectOpOp(SelectInst &SI, Instruction *TI,
// The select condition may be a vector. We may only change the operand
// type if the vector width remains the same (and matches the condition).
- Type *CondTy = SI.getCondition()->getType();
if (CondTy->isVectorTy()) {
if (!FIOpndTy->isVectorTy())
return nullptr;
@@ -327,12 +327,24 @@ Instruction *InstCombiner::foldSelectOpOp(SelectInst &SI, Instruction *TI,
// Fold this by inserting a select from the input values.
Value *NewSI =
- Builder.CreateSelect(SI.getCondition(), TI->getOperand(0),
- FI->getOperand(0), SI.getName() + ".v", &SI);
+ Builder.CreateSelect(Cond, TI->getOperand(0), FI->getOperand(0),
+ SI.getName() + ".v", &SI);
return CastInst::Create(Instruction::CastOps(TI->getOpcode()), NewSI,
TI->getType());
}
+ // Cond ? -X : -Y --> -(Cond ? X : Y)
+ Value *X, *Y;
+ if (match(TI, m_FNeg(m_Value(X))) && match(FI, m_FNeg(m_Value(Y))) &&
+ (TI->hasOneUse() || FI->hasOneUse())) {
+ Value *NewSel = Builder.CreateSelect(Cond, X, Y, SI.getName() + ".v", &SI);
+ // TODO: Remove the hack for the binop form when the unary op is optimized
+ // properly with all IR passes.
+ if (TI->getOpcode() != Instruction::FNeg)
+ return BinaryOperator::CreateFNegFMF(NewSel, cast<BinaryOperator>(TI));
+ return UnaryOperator::CreateFNeg(NewSel);
+ }
+
// Only handle binary operators (including two-operand getelementptr) with
// one-use here. As with the cast case above, it may be possible to relax the
// one-use constraint, but that needs be examined carefully since it may not
@@ -374,13 +386,12 @@ Instruction *InstCombiner::foldSelectOpOp(SelectInst &SI, Instruction *TI,
// If the select condition is a vector, the operands of the original select's
// operands also must be vectors. This may not be the case for getelementptr
// for example.
- if (SI.getCondition()->getType()->isVectorTy() &&
- (!OtherOpT->getType()->isVectorTy() ||
- !OtherOpF->getType()->isVectorTy()))
+ if (CondTy->isVectorTy() && (!OtherOpT->getType()->isVectorTy() ||
+ !OtherOpF->getType()->isVectorTy()))
return nullptr;
// If we reach here, they do have operations in common.
- Value *NewSI = Builder.CreateSelect(SI.getCondition(), OtherOpT, OtherOpF,
+ Value *NewSI = Builder.CreateSelect(Cond, OtherOpT, OtherOpF,
SI.getName() + ".v", &SI);
Value *Op0 = MatchIsOpZero ? MatchOp : NewSI;
Value *Op1 = MatchIsOpZero ? NewSI : MatchOp;
@@ -521,6 +532,46 @@ static Instruction *foldSelectICmpAndAnd(Type *SelType, const ICmpInst *Cmp,
}
/// We want to turn:
+/// (select (icmp sgt x, C), lshr (X, Y), ashr (X, Y)); iff C s>= -1
+/// (select (icmp slt x, C), ashr (X, Y), lshr (X, Y)); iff C s>= 0
+/// into:
+/// ashr (X, Y)
+static Value *foldSelectICmpLshrAshr(const ICmpInst *IC, Value *TrueVal,
+ Value *FalseVal,
+ InstCombiner::BuilderTy &Builder) {
+ ICmpInst::Predicate Pred = IC->getPredicate();
+ Value *CmpLHS = IC->getOperand(0);
+ Value *CmpRHS = IC->getOperand(1);
+ if (!CmpRHS->getType()->isIntOrIntVectorTy())
+ return nullptr;
+
+ Value *X, *Y;
+ unsigned Bitwidth = CmpRHS->getType()->getScalarSizeInBits();
+ if ((Pred != ICmpInst::ICMP_SGT ||
+ !match(CmpRHS,
+ m_SpecificInt_ICMP(ICmpInst::ICMP_SGE, APInt(Bitwidth, -1)))) &&
+ (Pred != ICmpInst::ICMP_SLT ||
+ !match(CmpRHS,
+ m_SpecificInt_ICMP(ICmpInst::ICMP_SGE, APInt(Bitwidth, 0)))))
+ return nullptr;
+
+ // Canonicalize so that ashr is in FalseVal.
+ if (Pred == ICmpInst::ICMP_SLT)
+ std::swap(TrueVal, FalseVal);
+
+ if (match(TrueVal, m_LShr(m_Value(X), m_Value(Y))) &&
+ match(FalseVal, m_AShr(m_Specific(X), m_Specific(Y))) &&
+ match(CmpLHS, m_Specific(X))) {
+ const auto *Ashr = cast<Instruction>(FalseVal);
+ // if lshr is not exact and ashr is, this new ashr must not be exact.
+ bool IsExact = Ashr->isExact() && cast<Instruction>(TrueVal)->isExact();
+ return Builder.CreateAShr(X, Y, IC->getName(), IsExact);
+ }
+
+ return nullptr;
+}
+
+/// We want to turn:
/// (select (icmp eq (and X, C1), 0), Y, (or Y, C2))
/// into:
/// (or (shl (and X, C1), C3), Y)
@@ -623,11 +674,7 @@ static Value *foldSelectICmpAndOr(const ICmpInst *IC, Value *TrueVal,
return Builder.CreateOr(V, Y);
}
-/// Transform patterns such as: (a > b) ? a - b : 0
-/// into: ((a > b) ? a : b) - b)
-/// This produces a canonical max pattern that is more easily recognized by the
-/// backend and converted into saturated subtraction instructions if those
-/// exist.
+/// Transform patterns such as (a > b) ? a - b : 0 into usub.sat(a, b).
/// There are 8 commuted/swapped variants of this pattern.
/// TODO: Also support a - UMIN(a,b) patterns.
static Value *canonicalizeSaturatedSubtract(const ICmpInst *ICI,
@@ -669,11 +716,73 @@ static Value *canonicalizeSaturatedSubtract(const ICmpInst *ICI,
if (!TrueVal->hasOneUse())
return nullptr;
- // All checks passed, convert to canonical unsigned saturated subtraction
- // form: sub(max()).
- // (a > b) ? a - b : 0 -> ((a > b) ? a : b) - b)
- Value *Max = Builder.CreateSelect(Builder.CreateICmp(Pred, A, B), A, B);
- return IsNegative ? Builder.CreateSub(B, Max) : Builder.CreateSub(Max, B);
+ // (a > b) ? a - b : 0 -> usub.sat(a, b)
+ // (a > b) ? b - a : 0 -> -usub.sat(a, b)
+ Value *Result = Builder.CreateBinaryIntrinsic(Intrinsic::usub_sat, A, B);
+ if (IsNegative)
+ Result = Builder.CreateNeg(Result);
+ return Result;
+}
+
+static Value *canonicalizeSaturatedAdd(ICmpInst *Cmp, Value *TVal, Value *FVal,
+ InstCombiner::BuilderTy &Builder) {
+ if (!Cmp->hasOneUse())
+ return nullptr;
+
+ // Match unsigned saturated add with constant.
+ Value *Cmp0 = Cmp->getOperand(0);
+ Value *Cmp1 = Cmp->getOperand(1);
+ ICmpInst::Predicate Pred = Cmp->getPredicate();
+ Value *X;
+ const APInt *C, *CmpC;
+ if (Pred == ICmpInst::ICMP_ULT &&
+ match(TVal, m_Add(m_Value(X), m_APInt(C))) && X == Cmp0 &&
+ match(FVal, m_AllOnes()) && match(Cmp1, m_APInt(CmpC)) && *CmpC == ~*C) {
+ // (X u< ~C) ? (X + C) : -1 --> uadd.sat(X, C)
+ return Builder.CreateBinaryIntrinsic(
+ Intrinsic::uadd_sat, X, ConstantInt::get(X->getType(), *C));
+ }
+
+ // Match unsigned saturated add of 2 variables with an unnecessary 'not'.
+ // There are 8 commuted variants.
+ // Canonicalize -1 (saturated result) to true value of the select. Just
+ // swapping the compare operands is legal, because the selected value is the
+ // same in case of equality, so we can interchange u< and u<=.
+ if (match(FVal, m_AllOnes())) {
+ std::swap(TVal, FVal);
+ std::swap(Cmp0, Cmp1);
+ }
+ if (!match(TVal, m_AllOnes()))
+ return nullptr;
+
+ // Canonicalize predicate to 'ULT'.
+ if (Pred == ICmpInst::ICMP_UGT) {
+ Pred = ICmpInst::ICMP_ULT;
+ std::swap(Cmp0, Cmp1);
+ }
+ if (Pred != ICmpInst::ICMP_ULT)
+ return nullptr;
+
+ // Match unsigned saturated add of 2 variables with an unnecessary 'not'.
+ Value *Y;
+ if (match(Cmp0, m_Not(m_Value(X))) &&
+ match(FVal, m_c_Add(m_Specific(X), m_Value(Y))) && Y == Cmp1) {
+ // (~X u< Y) ? -1 : (X + Y) --> uadd.sat(X, Y)
+ // (~X u< Y) ? -1 : (Y + X) --> uadd.sat(X, Y)
+ return Builder.CreateBinaryIntrinsic(Intrinsic::uadd_sat, X, Y);
+ }
+ // The 'not' op may be included in the sum but not the compare.
+ X = Cmp0;
+ Y = Cmp1;
+ if (match(FVal, m_c_Add(m_Not(m_Specific(X)), m_Specific(Y)))) {
+ // (X u< Y) ? -1 : (~X + Y) --> uadd.sat(~X, Y)
+ // (X u< Y) ? -1 : (Y + ~X) --> uadd.sat(Y, ~X)
+ BinaryOperator *BO = cast<BinaryOperator>(FVal);
+ return Builder.CreateBinaryIntrinsic(
+ Intrinsic::uadd_sat, BO->getOperand(0), BO->getOperand(1));
+ }
+
+ return nullptr;
}
/// Attempt to fold a cttz/ctlz followed by a icmp plus select into a single
@@ -1043,12 +1152,18 @@ Instruction *InstCombiner::foldSelectInstWithICmp(SelectInst &SI,
if (Value *V = foldSelectICmpAndOr(ICI, TrueVal, FalseVal, Builder))
return replaceInstUsesWith(SI, V);
+ if (Value *V = foldSelectICmpLshrAshr(ICI, TrueVal, FalseVal, Builder))
+ return replaceInstUsesWith(SI, V);
+
if (Value *V = foldSelectCttzCtlz(ICI, TrueVal, FalseVal, Builder))
return replaceInstUsesWith(SI, V);
if (Value *V = canonicalizeSaturatedSubtract(ICI, TrueVal, FalseVal, Builder))
return replaceInstUsesWith(SI, V);
+ if (Value *V = canonicalizeSaturatedAdd(ICI, TrueVal, FalseVal, Builder))
+ return replaceInstUsesWith(SI, V);
+
return Changed ? &SI : nullptr;
}
@@ -1496,6 +1611,43 @@ static Instruction *foldSelectCmpXchg(SelectInst &SI) {
return nullptr;
}
+static Instruction *moveAddAfterMinMax(SelectPatternFlavor SPF, Value *X,
+ Value *Y,
+ InstCombiner::BuilderTy &Builder) {
+ assert(SelectPatternResult::isMinOrMax(SPF) && "Expected min/max pattern");
+ bool IsUnsigned = SPF == SelectPatternFlavor::SPF_UMIN ||
+ SPF == SelectPatternFlavor::SPF_UMAX;
+ // TODO: If InstSimplify could fold all cases where C2 <= C1, we could change
+ // the constant value check to an assert.
+ Value *A;
+ const APInt *C1, *C2;
+ if (IsUnsigned && match(X, m_NUWAdd(m_Value(A), m_APInt(C1))) &&
+ match(Y, m_APInt(C2)) && C2->uge(*C1) && X->hasNUses(2)) {
+ // umin (add nuw A, C1), C2 --> add nuw (umin A, C2 - C1), C1
+ // umax (add nuw A, C1), C2 --> add nuw (umax A, C2 - C1), C1
+ Value *NewMinMax = createMinMax(Builder, SPF, A,
+ ConstantInt::get(X->getType(), *C2 - *C1));
+ return BinaryOperator::CreateNUW(BinaryOperator::Add, NewMinMax,
+ ConstantInt::get(X->getType(), *C1));
+ }
+
+ if (!IsUnsigned && match(X, m_NSWAdd(m_Value(A), m_APInt(C1))) &&
+ match(Y, m_APInt(C2)) && X->hasNUses(2)) {
+ bool Overflow;
+ APInt Diff = C2->ssub_ov(*C1, Overflow);
+ if (!Overflow) {
+ // smin (add nsw A, C1), C2 --> add nsw (smin A, C2 - C1), C1
+ // smax (add nsw A, C1), C2 --> add nsw (smax A, C2 - C1), C1
+ Value *NewMinMax = createMinMax(Builder, SPF, A,
+ ConstantInt::get(X->getType(), Diff));
+ return BinaryOperator::CreateNSW(BinaryOperator::Add, NewMinMax,
+ ConstantInt::get(X->getType(), *C1));
+ }
+ }
+
+ return nullptr;
+}
+
/// Reduce a sequence of min/max with a common operand.
static Instruction *factorizeMinMaxTree(SelectPatternFlavor SPF, Value *LHS,
Value *RHS,
@@ -1757,37 +1909,55 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
// NOTE: if we wanted to, this is where to detect MIN/MAX
}
+ }
- // Canonicalize select with fcmp to fabs(). -0.0 makes this tricky. We need
- // fast-math-flags (nsz) or fsub with +0.0 (not fneg) for this to work. We
- // also require nnan because we do not want to unintentionally change the
- // sign of a NaN value.
- Value *X = FCI->getOperand(0);
- FCmpInst::Predicate Pred = FCI->getPredicate();
- if (match(FCI->getOperand(1), m_AnyZeroFP()) && FCI->hasNoNaNs()) {
- // (X <= +/-0.0) ? (0.0 - X) : X --> fabs(X)
- // (X > +/-0.0) ? X : (0.0 - X) --> fabs(X)
- if ((X == FalseVal && Pred == FCmpInst::FCMP_OLE &&
- match(TrueVal, m_FSub(m_PosZeroFP(), m_Specific(X)))) ||
- (X == TrueVal && Pred == FCmpInst::FCMP_OGT &&
- match(FalseVal, m_FSub(m_PosZeroFP(), m_Specific(X))))) {
- Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, X, FCI);
- return replaceInstUsesWith(SI, Fabs);
- }
- // With nsz:
- // (X < +/-0.0) ? -X : X --> fabs(X)
- // (X <= +/-0.0) ? -X : X --> fabs(X)
- // (X > +/-0.0) ? X : -X --> fabs(X)
- // (X >= +/-0.0) ? X : -X --> fabs(X)
- if (FCI->hasNoSignedZeros() &&
- ((X == FalseVal && match(TrueVal, m_FNeg(m_Specific(X))) &&
- (Pred == FCmpInst::FCMP_OLT || Pred == FCmpInst::FCMP_OLE)) ||
- (X == TrueVal && match(FalseVal, m_FNeg(m_Specific(X))) &&
- (Pred == FCmpInst::FCMP_OGT || Pred == FCmpInst::FCMP_OGE)))) {
- Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, X, FCI);
- return replaceInstUsesWith(SI, Fabs);
- }
- }
+ // Canonicalize select with fcmp to fabs(). -0.0 makes this tricky. We need
+ // fast-math-flags (nsz) or fsub with +0.0 (not fneg) for this to work. We
+ // also require nnan because we do not want to unintentionally change the
+ // sign of a NaN value.
+ // FIXME: These folds should test/propagate FMF from the select, not the
+ // fsub or fneg.
+ // (X <= +/-0.0) ? (0.0 - X) : X --> fabs(X)
+ Instruction *FSub;
+ if (match(CondVal, m_FCmp(Pred, m_Specific(FalseVal), m_AnyZeroFP())) &&
+ match(TrueVal, m_FSub(m_PosZeroFP(), m_Specific(FalseVal))) &&
+ match(TrueVal, m_Instruction(FSub)) && FSub->hasNoNaNs() &&
+ (Pred == FCmpInst::FCMP_OLE || Pred == FCmpInst::FCMP_ULE)) {
+ Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, FalseVal, FSub);
+ return replaceInstUsesWith(SI, Fabs);
+ }
+ // (X > +/-0.0) ? X : (0.0 - X) --> fabs(X)
+ if (match(CondVal, m_FCmp(Pred, m_Specific(TrueVal), m_AnyZeroFP())) &&
+ match(FalseVal, m_FSub(m_PosZeroFP(), m_Specific(TrueVal))) &&
+ match(FalseVal, m_Instruction(FSub)) && FSub->hasNoNaNs() &&
+ (Pred == FCmpInst::FCMP_OGT || Pred == FCmpInst::FCMP_UGT)) {
+ Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, TrueVal, FSub);
+ return replaceInstUsesWith(SI, Fabs);
+ }
+ // With nnan and nsz:
+ // (X < +/-0.0) ? -X : X --> fabs(X)
+ // (X <= +/-0.0) ? -X : X --> fabs(X)
+ Instruction *FNeg;
+ if (match(CondVal, m_FCmp(Pred, m_Specific(FalseVal), m_AnyZeroFP())) &&
+ match(TrueVal, m_FNeg(m_Specific(FalseVal))) &&
+ match(TrueVal, m_Instruction(FNeg)) &&
+ FNeg->hasNoNaNs() && FNeg->hasNoSignedZeros() &&
+ (Pred == FCmpInst::FCMP_OLT || Pred == FCmpInst::FCMP_OLE ||
+ Pred == FCmpInst::FCMP_ULT || Pred == FCmpInst::FCMP_ULE)) {
+ Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, FalseVal, FNeg);
+ return replaceInstUsesWith(SI, Fabs);
+ }
+ // With nnan and nsz:
+ // (X > +/-0.0) ? X : -X --> fabs(X)
+ // (X >= +/-0.0) ? X : -X --> fabs(X)
+ if (match(CondVal, m_FCmp(Pred, m_Specific(TrueVal), m_AnyZeroFP())) &&
+ match(FalseVal, m_FNeg(m_Specific(TrueVal))) &&
+ match(FalseVal, m_Instruction(FNeg)) &&
+ FNeg->hasNoNaNs() && FNeg->hasNoSignedZeros() &&
+ (Pred == FCmpInst::FCMP_OGT || Pred == FCmpInst::FCMP_OGE ||
+ Pred == FCmpInst::FCMP_UGT || Pred == FCmpInst::FCMP_UGE)) {
+ Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, TrueVal, FNeg);
+ return replaceInstUsesWith(SI, Fabs);
}
// See if we are selecting two values based on a comparison of the two values.
@@ -1895,11 +2065,27 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
if (Instruction *I = moveNotAfterMinMax(RHS, LHS))
return I;
+ if (Instruction *I = moveAddAfterMinMax(SPF, LHS, RHS, Builder))
+ return I;
+
if (Instruction *I = factorizeMinMaxTree(SPF, LHS, RHS, Builder))
return I;
}
}
+ // Canonicalize select of FP values where NaN and -0.0 are not valid as
+ // minnum/maxnum intrinsics.
+ if (isa<FPMathOperator>(SI) && SI.hasNoNaNs() && SI.hasNoSignedZeros()) {
+ Value *X, *Y;
+ if (match(&SI, m_OrdFMax(m_Value(X), m_Value(Y))))
+ return replaceInstUsesWith(
+ SI, Builder.CreateBinaryIntrinsic(Intrinsic::maxnum, X, Y, &SI));
+
+ if (match(&SI, m_OrdFMin(m_Value(X), m_Value(Y))))
+ return replaceInstUsesWith(
+ SI, Builder.CreateBinaryIntrinsic(Intrinsic::minnum, X, Y, &SI));
+ }
+
// See if we can fold the select into a phi node if the condition is a select.
if (auto *PN = dyn_cast<PHINode>(SI.getCondition()))
// The true/false values have to be live in the PHI predecessor's blocks.
diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp
index c562d45a9e2b..c821292400cd 100644
--- a/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -1,9 +1,8 @@
//===- InstCombineShifts.cpp ----------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -21,6 +20,51 @@ using namespace PatternMatch;
#define DEBUG_TYPE "instcombine"
+// Given pattern:
+// (x shiftopcode Q) shiftopcode K
+// we should rewrite it as
+// x shiftopcode (Q+K) iff (Q+K) u< bitwidth(x)
+// This is valid for any shift, but they must be identical.
+static Instruction *
+reassociateShiftAmtsOfTwoSameDirectionShifts(BinaryOperator *Sh0,
+ const SimplifyQuery &SQ) {
+ // Look for: (x shiftopcode ShAmt0) shiftopcode ShAmt1
+ Value *X, *ShAmt1, *ShAmt0;
+ Instruction *Sh1;
+ if (!match(Sh0, m_Shift(m_CombineAnd(m_Shift(m_Value(X), m_Value(ShAmt1)),
+ m_Instruction(Sh1)),
+ m_Value(ShAmt0))))
+ return nullptr;
+
+ // The shift opcodes must be identical.
+ Instruction::BinaryOps ShiftOpcode = Sh0->getOpcode();
+ if (ShiftOpcode != Sh1->getOpcode())
+ return nullptr;
+ // Can we fold (ShAmt0+ShAmt1) ?
+ Value *NewShAmt = SimplifyBinOp(Instruction::BinaryOps::Add, ShAmt0, ShAmt1,
+ SQ.getWithInstruction(Sh0));
+ if (!NewShAmt)
+ return nullptr; // Did not simplify.
+ // Is the new shift amount smaller than the bit width?
+ // FIXME: could also rely on ConstantRange.
+ unsigned BitWidth = X->getType()->getScalarSizeInBits();
+ if (!match(NewShAmt, m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_ULT,
+ APInt(BitWidth, BitWidth))))
+ return nullptr;
+ // All good, we can do this fold.
+ BinaryOperator *NewShift = BinaryOperator::Create(ShiftOpcode, X, NewShAmt);
+ // If both of the original shifts had the same flag set, preserve the flag.
+ if (ShiftOpcode == Instruction::BinaryOps::Shl) {
+ NewShift->setHasNoUnsignedWrap(Sh0->hasNoUnsignedWrap() &&
+ Sh1->hasNoUnsignedWrap());
+ NewShift->setHasNoSignedWrap(Sh0->hasNoSignedWrap() &&
+ Sh1->hasNoSignedWrap());
+ } else {
+ NewShift->setIsExact(Sh0->isExact() && Sh1->isExact());
+ }
+ return NewShift;
+}
+
Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
assert(Op0->getType() == Op1->getType());
@@ -39,6 +83,10 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
if (Instruction *Res = FoldShiftByConstant(Op0, CUI, I))
return Res;
+ if (Instruction *NewShift =
+ reassociateShiftAmtsOfTwoSameDirectionShifts(&I, SQ))
+ return NewShift;
+
// (C1 shift (A add C2)) -> (C1 shift C2) shift A)
// iff A and C2 are both positive.
Value *A;
@@ -313,35 +361,17 @@ static Value *getShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
// If this is a bitwise operator or add with a constant RHS we might be able
// to pull it through a shift.
static bool canShiftBinOpWithConstantRHS(BinaryOperator &Shift,
- BinaryOperator *BO,
- const APInt &C) {
- bool IsValid = true; // Valid only for And, Or Xor,
- bool HighBitSet = false; // Transform ifhigh bit of constant set?
-
+ BinaryOperator *BO) {
switch (BO->getOpcode()) {
- default: IsValid = false; break; // Do not perform transform!
+ default:
+ return false; // Do not perform transform!
case Instruction::Add:
- IsValid = Shift.getOpcode() == Instruction::Shl;
- break;
+ return Shift.getOpcode() == Instruction::Shl;
case Instruction::Or:
case Instruction::Xor:
- HighBitSet = false;
- break;
case Instruction::And:
- HighBitSet = true;
- break;
+ return true;
}
-
- // If this is a signed shift right, and the high bit is modified
- // by the logical operation, do not perform the transformation.
- // The HighBitSet boolean indicates the value of the high bit of
- // the constant which would cause it to be modified for this
- // operation.
- //
- if (IsValid && Shift.getOpcode() == Instruction::AShr)
- IsValid = C.isNegative() == HighBitSet;
-
- return IsValid;
}
Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1,
@@ -508,7 +538,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1,
// shift is the only use, we can pull it out of the shift.
const APInt *Op0C;
if (match(Op0BO->getOperand(1), m_APInt(Op0C))) {
- if (canShiftBinOpWithConstantRHS(I, Op0BO, *Op0C)) {
+ if (canShiftBinOpWithConstantRHS(I, Op0BO)) {
Constant *NewRHS = ConstantExpr::get(I.getOpcode(),
cast<Constant>(Op0BO->getOperand(1)), Op1);
@@ -552,7 +582,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1,
const APInt *C;
if (!isa<Constant>(FalseVal) && TBO->getOperand(0) == FalseVal &&
match(TBO->getOperand(1), m_APInt(C)) &&
- canShiftBinOpWithConstantRHS(I, TBO, *C)) {
+ canShiftBinOpWithConstantRHS(I, TBO)) {
Constant *NewRHS = ConstantExpr::get(I.getOpcode(),
cast<Constant>(TBO->getOperand(1)), Op1);
@@ -571,7 +601,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1,
const APInt *C;
if (!isa<Constant>(TrueVal) && FBO->getOperand(0) == TrueVal &&
match(FBO->getOperand(1), m_APInt(C)) &&
- canShiftBinOpWithConstantRHS(I, FBO, *C)) {
+ canShiftBinOpWithConstantRHS(I, FBO)) {
Constant *NewRHS = ConstantExpr::get(I.getOpcode(),
cast<Constant>(FBO->getOperand(1)), Op1);
@@ -601,6 +631,8 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
Type *Ty = I.getType();
+ unsigned BitWidth = Ty->getScalarSizeInBits();
+
const APInt *ShAmtAPInt;
if (match(Op1, m_APInt(ShAmtAPInt))) {
unsigned ShAmt = ShAmtAPInt->getZExtValue();
@@ -689,6 +721,12 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) {
return BinaryOperator::CreateMul(X, ConstantExpr::getShl(C2, C1));
}
+ // (1 << (C - x)) -> ((1 << C) >> x) if C is bitwidth - 1
+ if (match(Op0, m_One()) &&
+ match(Op1, m_Sub(m_SpecificInt(BitWidth - 1), m_Value(X))))
+ return BinaryOperator::CreateLShr(
+ ConstantInt::get(Ty, APInt::getSignMask(BitWidth)), X);
+
return nullptr;
}
diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 9bf87d024607..e0d85c4b49ae 100644
--- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -1,9 +1,8 @@
//===- InstCombineSimplifyDemanded.cpp ------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -366,10 +365,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
KnownBits InputKnown(SrcBitWidth);
if (SimplifyDemandedBits(I, 0, InputDemandedMask, InputKnown, Depth + 1))
return I;
- Known = InputKnown.zextOrTrunc(BitWidth);
- // Any top bits are known to be zero.
- if (BitWidth > SrcBitWidth)
- Known.Zero.setBitsFrom(SrcBitWidth);
+ assert(InputKnown.getBitWidth() == SrcBitWidth && "Src width changed?");
+ Known = InputKnown.zextOrTrunc(BitWidth,
+ true /* ExtendedBitsAreKnownZero */);
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
break;
}
@@ -967,26 +965,16 @@ InstCombiner::simplifyShrShlDemandedBits(Instruction *Shr, const APInt &ShrOp1,
}
/// Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics.
+///
+/// Note: This only supports non-TFE/LWE image intrinsic calls; those have
+/// struct returns.
Value *InstCombiner::simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst *II,
APInt DemandedElts,
- int DMaskIdx,
- int TFCIdx) {
+ int DMaskIdx) {
unsigned VWidth = II->getType()->getVectorNumElements();
if (VWidth == 1)
return nullptr;
- // Need to change to new instruction format
- ConstantInt *TFC = nullptr;
- bool TFELWEEnabled = false;
- if (TFCIdx > 0) {
- TFC = dyn_cast<ConstantInt>(II->getArgOperand(TFCIdx));
- TFELWEEnabled = TFC->getZExtValue() & 0x1 // TFE
- || TFC->getZExtValue() & 0x2; // LWE
- }
-
- if (TFELWEEnabled)
- return nullptr; // TFE not yet supported
-
ConstantInt *NewDMask = nullptr;
if (DMaskIdx < 0) {
@@ -994,10 +982,7 @@ Value *InstCombiner::simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst *II,
// below.
DemandedElts = (1 << DemandedElts.getActiveBits()) - 1;
} else {
- ConstantInt *DMask = dyn_cast<ConstantInt>(II->getArgOperand(DMaskIdx));
- if (!DMask)
- return nullptr; // non-constant dmask is not supported by codegen
-
+ ConstantInt *DMask = cast<ConstantInt>(II->getArgOperand(DMaskIdx));
unsigned DMaskVal = DMask->getZExtValue() & 0xf;
// Mask off values that are undefined because the dmask doesn't cover them
@@ -1018,8 +1003,7 @@ Value *InstCombiner::simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst *II,
NewDMask = ConstantInt::get(DMask->getType(), NewDMaskVal);
}
- // TODO: Handle 3 vectors when supported in code gen.
- unsigned NewNumElts = PowerOf2Ceil(DemandedElts.countPopulation());
+ unsigned NewNumElts = DemandedElts.countPopulation();
if (!NewNumElts)
return UndefValue::get(II->getType());
@@ -1035,13 +1019,12 @@ Value *InstCombiner::simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst *II,
getIntrinsicInfoTableEntries(IID, Table);
ArrayRef<Intrinsic::IITDescriptor> TableRef = Table;
+ // Validate function argument and return types, extracting overloaded types
+ // along the way.
FunctionType *FTy = II->getCalledFunction()->getFunctionType();
SmallVector<Type *, 6> OverloadTys;
- Intrinsic::matchIntrinsicType(FTy->getReturnType(), TableRef, OverloadTys);
- for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
- Intrinsic::matchIntrinsicType(FTy->getParamType(i), TableRef, OverloadTys);
+ Intrinsic::matchIntrinsicSignature(FTy, TableRef, OverloadTys);
- // Get the new return type overload of the intrinsic.
Module *M = II->getParent()->getParent()->getParent();
Type *EltTy = II->getType()->getVectorElementType();
Type *NewTy = (NewNumElts == 1) ? EltTy : VectorType::get(EltTy, NewNumElts);
@@ -1184,6 +1167,39 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
switch (I->getOpcode()) {
default: break;
+ case Instruction::GetElementPtr: {
+ // The LangRef requires that struct geps have all constant indices. As
+ // such, we can't convert any operand to partial undef.
+ auto mayIndexStructType = [](GetElementPtrInst &GEP) {
+ for (auto I = gep_type_begin(GEP), E = gep_type_end(GEP);
+ I != E; I++)
+ if (I.isStruct())
+ return true;;
+ return false;
+ };
+ if (mayIndexStructType(cast<GetElementPtrInst>(*I)))
+ break;
+
+ // Conservatively track the demanded elements back through any vector
+ // operands we may have. We know there must be at least one, or we
+ // wouldn't have a vector result to get here. Note that we intentionally
+ // merge the undef bits here since gepping with either an undef base or
+ // index results in undef.
+ for (unsigned i = 0; i < I->getNumOperands(); i++) {
+ if (isa<UndefValue>(I->getOperand(i))) {
+ // If the entire vector is undefined, just return this info.
+ UndefElts = EltMask;
+ return nullptr;
+ }
+ if (I->getOperand(i)->getType()->isVectorTy()) {
+ APInt UndefEltsOp(VWidth, 0);
+ simplifyAndSetOp(I, i, DemandedElts, UndefEltsOp);
+ UndefElts |= UndefEltsOp;
+ }
+ }
+
+ break;
+ }
case Instruction::InsertElement: {
// If this is a variable index, we don't know which element it overwrites.
// demand exactly the same input as we produce.
@@ -1430,6 +1446,30 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
if (!II) break;
switch (II->getIntrinsicID()) {
+ case Intrinsic::masked_gather: // fallthrough
+ case Intrinsic::masked_load: {
+ // Subtlety: If we load from a pointer, the pointer must be valid
+ // regardless of whether the element is demanded. Doing otherwise risks
+ // segfaults which didn't exist in the original program.
+ APInt DemandedPtrs(APInt::getAllOnesValue(VWidth)),
+ DemandedPassThrough(DemandedElts);
+ if (auto *CV = dyn_cast<ConstantVector>(II->getOperand(2)))
+ for (unsigned i = 0; i < VWidth; i++) {
+ Constant *CElt = CV->getAggregateElement(i);
+ if (CElt->isNullValue())
+ DemandedPtrs.clearBit(i);
+ else if (CElt->isAllOnesValue())
+ DemandedPassThrough.clearBit(i);
+ }
+ if (II->getIntrinsicID() == Intrinsic::masked_gather)
+ simplifyAndSetOp(II, 0, DemandedPtrs, UndefElts2);
+ simplifyAndSetOp(II, 3, DemandedPassThrough, UndefElts3);
+
+ // Output elements are undefined if the element from both sources are.
+ // TODO: can strengthen via mask as well.
+ UndefElts = UndefElts2 & UndefElts3;
+ break;
+ }
case Intrinsic::x86_xop_vfrcz_ss:
case Intrinsic::x86_xop_vfrcz_sd:
// The instructions for these intrinsics are speced to zero upper bits not
@@ -1639,8 +1679,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
return simplifyAMDGCNMemoryIntrinsicDemanded(II, DemandedElts);
default: {
if (getAMDGPUImageDMaskIntrinsic(II->getIntrinsicID()))
- return simplifyAMDGCNMemoryIntrinsicDemanded(
- II, DemandedElts, 0, II->getNumArgOperands() - 2);
+ return simplifyAMDGCNMemoryIntrinsicDemanded(II, DemandedElts, 0);
break;
}
@@ -1667,5 +1706,10 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
UndefElts &= UndefElts2;
}
+ // If we've proven all of the lanes undef, return an undef value.
+ // TODO: Intersect w/demanded lanes
+ if (UndefElts.isAllOnesValue())
+ return UndefValue::get(I->getType());;
+
return MadeChange ? I : nullptr;
}
diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 0ad1fc0e791f..dc9abdd7f47a 100644
--- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -1,9 +1,8 @@
//===- InstCombineVectorOps.cpp -------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -663,18 +662,17 @@ static bool isShuffleEquivalentToSelect(ShuffleVectorInst &Shuf) {
return true;
}
-// Turn a chain of inserts that splats a value into a canonical insert + shuffle
-// splat. That is:
-// insertelt(insertelt(insertelt(insertelt X, %k, 0), %k, 1), %k, 2) ... ->
-// shufflevector(insertelt(X, %k, 0), undef, zero)
-static Instruction *foldInsSequenceIntoBroadcast(InsertElementInst &InsElt) {
- // We are interested in the last insert in a chain. So, if this insert
- // has a single user, and that user is an insert, bail.
+/// Turn a chain of inserts that splats a value into an insert + shuffle:
+/// insertelt(insertelt(insertelt(insertelt X, %k, 0), %k, 1), %k, 2) ... ->
+/// shufflevector(insertelt(X, %k, 0), undef, zero)
+static Instruction *foldInsSequenceIntoSplat(InsertElementInst &InsElt) {
+ // We are interested in the last insert in a chain. So if this insert has a
+ // single user and that user is an insert, bail.
if (InsElt.hasOneUse() && isa<InsertElementInst>(InsElt.user_back()))
return nullptr;
- VectorType *VT = cast<VectorType>(InsElt.getType());
- int NumElements = VT->getNumElements();
+ auto *VecTy = cast<VectorType>(InsElt.getType());
+ unsigned NumElements = VecTy->getNumElements();
// Do not try to do this for a one-element vector, since that's a nop,
// and will cause an inf-loop.
@@ -706,24 +704,66 @@ static Instruction *foldInsSequenceIntoBroadcast(InsertElementInst &InsElt) {
CurrIE = NextIE;
}
- // Make sure we've seen an insert into every element.
- if (llvm::any_of(ElementPresent, [](bool Present) { return !Present; }))
+ // If this is just a single insertelement (not a sequence), we are done.
+ if (FirstIE == &InsElt)
return nullptr;
- // All right, create the insert + shuffle.
- Instruction *InsertFirst;
- if (cast<ConstantInt>(FirstIE->getOperand(2))->isZero())
- InsertFirst = FirstIE;
- else
- InsertFirst = InsertElementInst::Create(
- UndefValue::get(VT), SplatVal,
- ConstantInt::get(Type::getInt32Ty(InsElt.getContext()), 0),
- "", &InsElt);
+ // If we are not inserting into an undef vector, make sure we've seen an
+ // insert into every element.
+ // TODO: If the base vector is not undef, it might be better to create a splat
+ // and then a select-shuffle (blend) with the base vector.
+ if (!isa<UndefValue>(FirstIE->getOperand(0)))
+ if (any_of(ElementPresent, [](bool Present) { return !Present; }))
+ return nullptr;
+
+ // Create the insert + shuffle.
+ Type *Int32Ty = Type::getInt32Ty(InsElt.getContext());
+ UndefValue *UndefVec = UndefValue::get(VecTy);
+ Constant *Zero = ConstantInt::get(Int32Ty, 0);
+ if (!cast<ConstantInt>(FirstIE->getOperand(2))->isZero())
+ FirstIE = InsertElementInst::Create(UndefVec, SplatVal, Zero, "", &InsElt);
- Constant *ZeroMask = ConstantAggregateZero::get(
- VectorType::get(Type::getInt32Ty(InsElt.getContext()), NumElements));
+ // Splat from element 0, but replace absent elements with undef in the mask.
+ SmallVector<Constant *, 16> Mask(NumElements, Zero);
+ for (unsigned i = 0; i != NumElements; ++i)
+ if (!ElementPresent[i])
+ Mask[i] = UndefValue::get(Int32Ty);
- return new ShuffleVectorInst(InsertFirst, UndefValue::get(VT), ZeroMask);
+ return new ShuffleVectorInst(FirstIE, UndefVec, ConstantVector::get(Mask));
+}
+
+/// Try to fold an insert element into an existing splat shuffle by changing
+/// the shuffle's mask to include the index of this insert element.
+static Instruction *foldInsEltIntoSplat(InsertElementInst &InsElt) {
+ // Check if the vector operand of this insert is a canonical splat shuffle.
+ auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
+ if (!Shuf || !Shuf->isZeroEltSplat())
+ return nullptr;
+
+ // Check for a constant insertion index.
+ uint64_t IdxC;
+ if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC)))
+ return nullptr;
+
+ // Check if the splat shuffle's input is the same as this insert's scalar op.
+ Value *X = InsElt.getOperand(1);
+ Value *Op0 = Shuf->getOperand(0);
+ if (!match(Op0, m_InsertElement(m_Undef(), m_Specific(X), m_ZeroInt())))
+ return nullptr;
+
+ // Replace the shuffle mask element at the index of this insert with a zero.
+ // For example:
+ // inselt (shuf (inselt undef, X, 0), undef, <0,undef,0,undef>), X, 1
+ // --> shuf (inselt undef, X, 0), undef, <0,0,0,undef>
+ unsigned NumMaskElts = Shuf->getType()->getVectorNumElements();
+ SmallVector<Constant *, 16> NewMaskVec(NumMaskElts);
+ Type *I32Ty = IntegerType::getInt32Ty(Shuf->getContext());
+ Constant *Zero = ConstantInt::getNullValue(I32Ty);
+ for (unsigned i = 0; i != NumMaskElts; ++i)
+ NewMaskVec[i] = i == IdxC ? Zero : Shuf->getMask()->getAggregateElement(i);
+
+ Constant *NewMask = ConstantVector::get(NewMaskVec);
+ return new ShuffleVectorInst(Op0, UndefValue::get(Op0->getType()), NewMask);
}
/// If we have an insertelement instruction feeding into another insertelement
@@ -864,30 +904,28 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
VecOp, ScalarOp, IdxOp, SQ.getWithInstruction(&IE)))
return replaceInstUsesWith(IE, V);
- // Inserting an undef or into an undefined place, remove this.
- if (isa<UndefValue>(ScalarOp) || isa<UndefValue>(IdxOp))
- replaceInstUsesWith(IE, VecOp);
+ // If the vector and scalar are both bitcast from the same element type, do
+ // the insert in that source type followed by bitcast.
+ Value *VecSrc, *ScalarSrc;
+ if (match(VecOp, m_BitCast(m_Value(VecSrc))) &&
+ match(ScalarOp, m_BitCast(m_Value(ScalarSrc))) &&
+ (VecOp->hasOneUse() || ScalarOp->hasOneUse()) &&
+ VecSrc->getType()->isVectorTy() && !ScalarSrc->getType()->isVectorTy() &&
+ VecSrc->getType()->getVectorElementType() == ScalarSrc->getType()) {
+ // inselt (bitcast VecSrc), (bitcast ScalarSrc), IdxOp -->
+ // bitcast (inselt VecSrc, ScalarSrc, IdxOp)
+ Value *NewInsElt = Builder.CreateInsertElement(VecSrc, ScalarSrc, IdxOp);
+ return new BitCastInst(NewInsElt, IE.getType());
+ }
// If the inserted element was extracted from some other vector and both
- // indexes are constant, try to turn this into a shuffle.
+ // indexes are valid constants, try to turn this into a shuffle.
uint64_t InsertedIdx, ExtractedIdx;
Value *ExtVecOp;
if (match(IdxOp, m_ConstantInt(InsertedIdx)) &&
match(ScalarOp, m_ExtractElement(m_Value(ExtVecOp),
- m_ConstantInt(ExtractedIdx)))) {
- unsigned NumInsertVectorElts = IE.getType()->getNumElements();
- unsigned NumExtractVectorElts = ExtVecOp->getType()->getVectorNumElements();
- if (ExtractedIdx >= NumExtractVectorElts) // Out of range extract.
- return replaceInstUsesWith(IE, VecOp);
-
- if (InsertedIdx >= NumInsertVectorElts) // Out of range insert.
- return replaceInstUsesWith(IE, UndefValue::get(IE.getType()));
-
- // If we are extracting a value from a vector, then inserting it right
- // back into the same place, just use the input vector.
- if (ExtVecOp == VecOp && ExtractedIdx == InsertedIdx)
- return replaceInstUsesWith(IE, VecOp);
-
+ m_ConstantInt(ExtractedIdx))) &&
+ ExtractedIdx < ExtVecOp->getType()->getVectorNumElements()) {
// TODO: Looking at the user(s) to determine if this insert is a
// fold-to-shuffle opportunity does not match the usual instcombine
// constraints. We should decide if the transform is worthy based only
@@ -943,11 +981,12 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
if (Instruction *NewInsElt = hoistInsEltConst(IE, Builder))
return NewInsElt;
- // Turn a sequence of inserts that broadcasts a scalar into a single
- // insert + shufflevector.
- if (Instruction *Broadcast = foldInsSequenceIntoBroadcast(IE))
+ if (Instruction *Broadcast = foldInsSequenceIntoSplat(IE))
return Broadcast;
+ if (Instruction *Splat = foldInsEltIntoSplat(IE))
+ return Splat;
+
return nullptr;
}
@@ -1172,7 +1211,14 @@ static Value *evaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) {
SmallVector<Value*, 8> NewOps;
bool NeedsRebuild = (Mask.size() != I->getType()->getVectorNumElements());
for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
- Value *V = evaluateInDifferentElementOrder(I->getOperand(i), Mask);
+ Value *V;
+ // Recursively call evaluateInDifferentElementOrder on vector arguments
+ // as well. E.g. GetElementPtr may have scalar operands even if the
+ // return value is a vector, so we need to examine the operand type.
+ if (I->getOperand(i)->getType()->isVectorTy())
+ V = evaluateInDifferentElementOrder(I->getOperand(i), Mask);
+ else
+ V = I->getOperand(i);
NewOps.push_back(V);
NeedsRebuild |= (V != I->getOperand(i));
}
@@ -1337,6 +1383,41 @@ static Instruction *foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf) {
return NewBO;
}
+/// If we have an insert of a scalar to a non-zero element of an undefined
+/// vector and then shuffle that value, that's the same as inserting to the zero
+/// element and shuffling. Splatting from the zero element is recognized as the
+/// canonical form of splat.
+static Instruction *canonicalizeInsertSplat(ShuffleVectorInst &Shuf,
+ InstCombiner::BuilderTy &Builder) {
+ Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
+ Constant *Mask = Shuf.getMask();
+ Value *X;
+ uint64_t IndexC;
+
+ // Match a shuffle that is a splat to a non-zero element.
+ if (!match(Op0, m_OneUse(m_InsertElement(m_Undef(), m_Value(X),
+ m_ConstantInt(IndexC)))) ||
+ !match(Op1, m_Undef()) || match(Mask, m_ZeroInt()) || IndexC == 0)
+ return nullptr;
+
+ // Insert into element 0 of an undef vector.
+ UndefValue *UndefVec = UndefValue::get(Shuf.getType());
+ Constant *Zero = Builder.getInt32(0);
+ Value *NewIns = Builder.CreateInsertElement(UndefVec, X, Zero);
+
+ // Splat from element 0. Any mask element that is undefined remains undefined.
+ // For example:
+ // shuf (inselt undef, X, 2), undef, <2,2,undef>
+ // --> shuf (inselt undef, X, 0), undef, <0,0,undef>
+ unsigned NumMaskElts = Shuf.getType()->getVectorNumElements();
+ SmallVector<Constant *, 16> NewMask(NumMaskElts, Zero);
+ for (unsigned i = 0; i != NumMaskElts; ++i)
+ if (isa<UndefValue>(Mask->getAggregateElement(i)))
+ NewMask[i] = Mask->getAggregateElement(i);
+
+ return new ShuffleVectorInst(NewIns, UndefVec, ConstantVector::get(NewMask));
+}
+
/// Try to fold shuffles that are the equivalent of a vector select.
static Instruction *foldSelectShuffle(ShuffleVectorInst &Shuf,
InstCombiner::BuilderTy &Builder,
@@ -1344,6 +1425,15 @@ static Instruction *foldSelectShuffle(ShuffleVectorInst &Shuf,
if (!Shuf.isSelect())
return nullptr;
+ // Canonicalize to choose from operand 0 first.
+ unsigned NumElts = Shuf.getType()->getVectorNumElements();
+ if (Shuf.getMaskValue(0) >= (int)NumElts) {
+ // TODO: Can we assert that both operands of a shuffle-select are not undef
+ // (otherwise, it would have been folded by instsimplify?
+ Shuf.commute();
+ return &Shuf;
+ }
+
if (Instruction *I = foldSelectShuffleWith1Binop(Shuf))
return I;
@@ -1499,6 +1589,11 @@ static Instruction *foldIdentityExtractShuffle(ShuffleVectorInst &Shuf) {
if (!match(Op0, m_ShuffleVector(m_Value(X), m_Value(Y), m_Constant(Mask))))
return nullptr;
+ // Be conservative with shuffle transforms. If we can't kill the 1st shuffle,
+ // then combining may result in worse codegen.
+ if (!Op0->hasOneUse())
+ return nullptr;
+
// We are extracting a subvector from a shuffle. Remove excess elements from
// the 1st shuffle mask to eliminate the extract.
//
@@ -1588,6 +1683,72 @@ static Instruction *foldShuffleWithInsert(ShuffleVectorInst &Shuf) {
return nullptr;
}
+static Instruction *foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf) {
+ // Match the operands as identity with padding (also known as concatenation
+ // with undef) shuffles of the same source type. The backend is expected to
+ // recreate these concatenations from a shuffle of narrow operands.
+ auto *Shuffle0 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(0));
+ auto *Shuffle1 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(1));
+ if (!Shuffle0 || !Shuffle0->isIdentityWithPadding() ||
+ !Shuffle1 || !Shuffle1->isIdentityWithPadding())
+ return nullptr;
+
+ // We limit this transform to power-of-2 types because we expect that the
+ // backend can convert the simplified IR patterns to identical nodes as the
+ // original IR.
+ // TODO: If we can verify the same behavior for arbitrary types, the
+ // power-of-2 checks can be removed.
+ Value *X = Shuffle0->getOperand(0);
+ Value *Y = Shuffle1->getOperand(0);
+ if (X->getType() != Y->getType() ||
+ !isPowerOf2_32(Shuf.getType()->getVectorNumElements()) ||
+ !isPowerOf2_32(Shuffle0->getType()->getVectorNumElements()) ||
+ !isPowerOf2_32(X->getType()->getVectorNumElements()) ||
+ isa<UndefValue>(X) || isa<UndefValue>(Y))
+ return nullptr;
+ assert(isa<UndefValue>(Shuffle0->getOperand(1)) &&
+ isa<UndefValue>(Shuffle1->getOperand(1)) &&
+ "Unexpected operand for identity shuffle");
+
+ // This is a shuffle of 2 widening shuffles. We can shuffle the narrow source
+ // operands directly by adjusting the shuffle mask to account for the narrower
+ // types:
+ // shuf (widen X), (widen Y), Mask --> shuf X, Y, Mask'
+ int NarrowElts = X->getType()->getVectorNumElements();
+ int WideElts = Shuffle0->getType()->getVectorNumElements();
+ assert(WideElts > NarrowElts && "Unexpected types for identity with padding");
+
+ Type *I32Ty = IntegerType::getInt32Ty(Shuf.getContext());
+ SmallVector<int, 16> Mask = Shuf.getShuffleMask();
+ SmallVector<Constant *, 16> NewMask(Mask.size(), UndefValue::get(I32Ty));
+ for (int i = 0, e = Mask.size(); i != e; ++i) {
+ if (Mask[i] == -1)
+ continue;
+
+ // If this shuffle is choosing an undef element from 1 of the sources, that
+ // element is undef.
+ if (Mask[i] < WideElts) {
+ if (Shuffle0->getMaskValue(Mask[i]) == -1)
+ continue;
+ } else {
+ if (Shuffle1->getMaskValue(Mask[i] - WideElts) == -1)
+ continue;
+ }
+
+ // If this shuffle is choosing from the 1st narrow op, the mask element is
+ // the same. If this shuffle is choosing from the 2nd narrow op, the mask
+ // element is offset down to adjust for the narrow vector widths.
+ if (Mask[i] < WideElts) {
+ assert(Mask[i] < NarrowElts && "Unexpected shuffle mask");
+ NewMask[i] = ConstantInt::get(I32Ty, Mask[i]);
+ } else {
+ assert(Mask[i] < (WideElts + NarrowElts) && "Unexpected shuffle mask");
+ NewMask[i] = ConstantInt::get(I32Ty, Mask[i] - (WideElts - NarrowElts));
+ }
+ }
+ return new ShuffleVectorInst(X, Y, ConstantVector::get(NewMask));
+}
+
Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
Value *LHS = SVI.getOperand(0);
Value *RHS = SVI.getOperand(1);
@@ -1595,36 +1756,12 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
LHS, RHS, SVI.getMask(), SVI.getType(), SQ.getWithInstruction(&SVI)))
return replaceInstUsesWith(SVI, V);
- if (Instruction *I = foldSelectShuffle(SVI, Builder, DL))
- return I;
-
- if (Instruction *I = narrowVectorSelect(SVI, Builder))
- return I;
-
+ // Canonicalize shuffle(x ,x,mask) -> shuffle(x, undef,mask')
+ // Canonicalize shuffle(undef,x,mask) -> shuffle(x, undef,mask').
unsigned VWidth = SVI.getType()->getVectorNumElements();
- APInt UndefElts(VWidth, 0);
- APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
- if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) {
- if (V != &SVI)
- return replaceInstUsesWith(SVI, V);
- return &SVI;
- }
-
- if (Instruction *I = foldIdentityExtractShuffle(SVI))
- return I;
-
- // This transform has the potential to lose undef knowledge, so it is
- // intentionally placed after SimplifyDemandedVectorElts().
- if (Instruction *I = foldShuffleWithInsert(SVI))
- return I;
-
+ unsigned LHSWidth = LHS->getType()->getVectorNumElements();
SmallVector<int, 16> Mask = SVI.getShuffleMask();
Type *Int32Ty = Type::getInt32Ty(SVI.getContext());
- unsigned LHSWidth = LHS->getType()->getVectorNumElements();
- bool MadeChange = false;
-
- // Canonicalize shuffle(x ,x,mask) -> shuffle(x, undef,mask')
- // Canonicalize shuffle(undef,x,mask) -> shuffle(x, undef,mask').
if (LHS == RHS || isa<UndefValue>(LHS)) {
// Remap any references to RHS to use LHS.
SmallVector<Constant*, 16> Elts;
@@ -1646,11 +1783,36 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
SVI.setOperand(0, SVI.getOperand(1));
SVI.setOperand(1, UndefValue::get(RHS->getType()));
SVI.setOperand(2, ConstantVector::get(Elts));
- LHS = SVI.getOperand(0);
- RHS = SVI.getOperand(1);
- MadeChange = true;
+ return &SVI;
}
+ if (Instruction *I = canonicalizeInsertSplat(SVI, Builder))
+ return I;
+
+ if (Instruction *I = foldSelectShuffle(SVI, Builder, DL))
+ return I;
+
+ if (Instruction *I = narrowVectorSelect(SVI, Builder))
+ return I;
+
+ APInt UndefElts(VWidth, 0);
+ APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
+ if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) {
+ if (V != &SVI)
+ return replaceInstUsesWith(SVI, V);
+ return &SVI;
+ }
+
+ if (Instruction *I = foldIdentityExtractShuffle(SVI))
+ return I;
+
+ // These transforms have the potential to lose undef knowledge, so they are
+ // intentionally placed after SimplifyDemandedVectorElts().
+ if (Instruction *I = foldShuffleWithInsert(SVI))
+ return I;
+ if (Instruction *I = foldIdentityPaddedShuffles(SVI))
+ return I;
+
if (VWidth == LHSWidth) {
// Analyze the shuffle, are the LHS or RHS and identity shuffles?
bool isLHSID, isRHSID;
@@ -1695,6 +1857,7 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
// +-----------+-----------+-----------+-----------+
// Index range [6,10): ^-----------^ Needs an extra shuffle.
// Target type i40: ^--------------^ Won't work, bail.
+ bool MadeChange = false;
if (isShuffleExtractingFromLHS(SVI, Mask)) {
Value *V = LHS;
unsigned MaskElems = Mask.size();
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index be7d43bbcf2c..385f4926b845 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1,9 +1,8 @@
//===- InstructionCombining.cpp - Combine multiple instructions -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -47,14 +46,17 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetFolder.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -221,6 +223,11 @@ static bool MaintainNoSignedWrap(BinaryOperator &I, Value *B, Value *C) {
return !Overflow;
}
+static bool hasNoUnsignedWrap(BinaryOperator &I) {
+ OverflowingBinaryOperator *OBO = dyn_cast<OverflowingBinaryOperator>(&I);
+ return OBO && OBO->hasNoUnsignedWrap();
+}
+
/// Conservatively clears subclassOptionalData after a reassociation or
/// commutation. We preserve fast-math flags when applicable as they can be
/// preserved.
@@ -327,14 +334,19 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
I.setOperand(1, V);
// Conservatively clear the optional flags, since they may not be
// preserved by the reassociation.
- if (MaintainNoSignedWrap(I, B, C) &&
+ bool IsNUW = hasNoUnsignedWrap(I) && hasNoUnsignedWrap(*Op0);
+ bool IsNSW = MaintainNoSignedWrap(I, B, C);
+
+ ClearSubclassDataAfterReassociation(I);
+
+ if (IsNUW)
+ I.setHasNoUnsignedWrap(true);
+
+ if (IsNSW &&
(!Op0 || (isa<BinaryOperator>(Op0) && Op0->hasNoSignedWrap()))) {
// Note: this is only valid because SimplifyBinOp doesn't look at
// the operands to Op0.
- I.clearSubclassOptionalData();
I.setHasNoSignedWrap(true);
- } else {
- ClearSubclassDataAfterReassociation(I);
}
Changed = true;
@@ -419,8 +431,14 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
Op0->getOpcode() == Opcode && Op1->getOpcode() == Opcode &&
match(Op0, m_OneUse(m_BinOp(m_Value(A), m_Constant(C1)))) &&
match(Op1, m_OneUse(m_BinOp(m_Value(B), m_Constant(C2))))) {
- BinaryOperator *NewBO = BinaryOperator::Create(Opcode, A, B);
- if (isa<FPMathOperator>(NewBO)) {
+ bool IsNUW = hasNoUnsignedWrap(I) &&
+ hasNoUnsignedWrap(*Op0) &&
+ hasNoUnsignedWrap(*Op1);
+ BinaryOperator *NewBO = (IsNUW && Opcode == Instruction::Add) ?
+ BinaryOperator::CreateNUW(Opcode, A, B) :
+ BinaryOperator::Create(Opcode, A, B);
+
+ if (isa<FPMathOperator>(NewBO)) {
FastMathFlags Flags = I.getFastMathFlags();
Flags &= Op0->getFastMathFlags();
Flags &= Op1->getFastMathFlags();
@@ -433,6 +451,8 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
// Conservatively clear the optional flags, since they may not be
// preserved by the reassociation.
ClearSubclassDataAfterReassociation(I);
+ if (IsNUW)
+ I.setHasNoUnsignedWrap(true);
Changed = true;
continue;
@@ -570,32 +590,44 @@ Value *InstCombiner::tryFactorization(BinaryOperator &I,
++NumFactor;
SimplifiedInst->takeName(&I);
- // Check if we can add NSW flag to SimplifiedInst. If so, set NSW flag.
- // TODO: Check for NUW.
+ // Check if we can add NSW/NUW flags to SimplifiedInst. If so, set them.
if (BinaryOperator *BO = dyn_cast<BinaryOperator>(SimplifiedInst)) {
if (isa<OverflowingBinaryOperator>(SimplifiedInst)) {
bool HasNSW = false;
- if (isa<OverflowingBinaryOperator>(&I))
+ bool HasNUW = false;
+ if (isa<OverflowingBinaryOperator>(&I)) {
HasNSW = I.hasNoSignedWrap();
+ HasNUW = I.hasNoUnsignedWrap();
+ }
- if (auto *LOBO = dyn_cast<OverflowingBinaryOperator>(LHS))
+ if (auto *LOBO = dyn_cast<OverflowingBinaryOperator>(LHS)) {
HasNSW &= LOBO->hasNoSignedWrap();
+ HasNUW &= LOBO->hasNoUnsignedWrap();
+ }
- if (auto *ROBO = dyn_cast<OverflowingBinaryOperator>(RHS))
+ if (auto *ROBO = dyn_cast<OverflowingBinaryOperator>(RHS)) {
HasNSW &= ROBO->hasNoSignedWrap();
+ HasNUW &= ROBO->hasNoUnsignedWrap();
+ }
- // We can propagate 'nsw' if we know that
- // %Y = mul nsw i16 %X, C
- // %Z = add nsw i16 %Y, %X
- // =>
- // %Z = mul nsw i16 %X, C+1
- //
- // iff C+1 isn't INT_MIN
const APInt *CInt;
if (TopLevelOpcode == Instruction::Add &&
- InnerOpcode == Instruction::Mul)
- if (match(V, m_APInt(CInt)) && !CInt->isMinSignedValue())
- BO->setHasNoSignedWrap(HasNSW);
+ InnerOpcode == Instruction::Mul) {
+ // We can propagate 'nsw' if we know that
+ // %Y = mul nsw i16 %X, C
+ // %Z = add nsw i16 %Y, %X
+ // =>
+ // %Z = mul nsw i16 %X, C+1
+ //
+ // iff C+1 isn't INT_MIN
+ if (match(V, m_APInt(CInt))) {
+ if (!CInt->isMinSignedValue())
+ BO->setHasNoSignedWrap(HasNSW);
+ }
+
+ // nuw can be propagated with any constant or nuw value.
+ BO->setHasNoUnsignedWrap(HasNUW);
+ }
}
}
}
@@ -922,8 +954,8 @@ Instruction *InstCombiner::foldOpIntoPhi(Instruction &I, PHINode *PN) {
// If the InVal is an invoke at the end of the pred block, then we can't
// insert a computation after it without breaking the edge.
- if (InvokeInst *II = dyn_cast<InvokeInst>(InVal))
- if (II->getParent() == NonConstBB)
+ if (isa<InvokeInst>(InVal))
+ if (cast<Instruction>(InVal)->getParent() == NonConstBB)
return nullptr;
// If the incoming non-constant value is in I's block, we will remove one
@@ -1376,7 +1408,8 @@ Instruction *InstCombiner::foldVectorBinop(BinaryOperator &Inst) {
if (match(LHS, m_ShuffleVector(m_Value(L0), m_Value(L1), m_Constant(Mask))) &&
match(RHS, m_ShuffleVector(m_Value(R0), m_Value(R1), m_Specific(Mask))) &&
LHS->hasOneUse() && RHS->hasOneUse() &&
- cast<ShuffleVectorInst>(LHS)->isConcat()) {
+ cast<ShuffleVectorInst>(LHS)->isConcat() &&
+ cast<ShuffleVectorInst>(RHS)->isConcat()) {
// This transform does not have the speculative execution constraint as
// below because the shuffle is a concatenation. The new binops are
// operating on exactly the same elements as the existing binop.
@@ -1415,6 +1448,30 @@ Instruction *InstCombiner::foldVectorBinop(BinaryOperator &Inst) {
return createBinOpShuffle(V1, V2, Mask);
}
+ // If both arguments of a commutative binop are select-shuffles that use the
+ // same mask with commuted operands, the shuffles are unnecessary.
+ if (Inst.isCommutative() &&
+ match(LHS, m_ShuffleVector(m_Value(V1), m_Value(V2), m_Constant(Mask))) &&
+ match(RHS, m_ShuffleVector(m_Specific(V2), m_Specific(V1),
+ m_Specific(Mask)))) {
+ auto *LShuf = cast<ShuffleVectorInst>(LHS);
+ auto *RShuf = cast<ShuffleVectorInst>(RHS);
+ // TODO: Allow shuffles that contain undefs in the mask?
+ // That is legal, but it reduces undef knowledge.
+ // TODO: Allow arbitrary shuffles by shuffling after binop?
+ // That might be legal, but we have to deal with poison.
+ if (LShuf->isSelect() && !LShuf->getMask()->containsUndefElement() &&
+ RShuf->isSelect() && !RShuf->getMask()->containsUndefElement()) {
+ // Example:
+ // LHS = shuffle V1, V2, <0, 5, 6, 3>
+ // RHS = shuffle V2, V1, <0, 5, 6, 3>
+ // LHS + RHS --> (V10+V20, V21+V11, V22+V12, V13+V23) --> V1 + V2
+ Instruction *NewBO = BinaryOperator::Create(Opcode, V1, V2);
+ NewBO->copyIRFlags(&Inst);
+ return NewBO;
+ }
+ }
+
// If one argument is a shuffle within one vector and the other is a constant,
// try moving the shuffle after the binary operation. This canonicalization
// intends to move shuffles closer to other shuffles and binops closer to
@@ -1557,6 +1614,23 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
if (Value *V = SimplifyGEPInst(GEPEltType, Ops, SQ.getWithInstruction(&GEP)))
return replaceInstUsesWith(GEP, V);
+ // For vector geps, use the generic demanded vector support.
+ if (GEP.getType()->isVectorTy()) {
+ auto VWidth = GEP.getType()->getVectorNumElements();
+ APInt UndefElts(VWidth, 0);
+ APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
+ if (Value *V = SimplifyDemandedVectorElts(&GEP, AllOnesEltMask,
+ UndefElts)) {
+ if (V != &GEP)
+ return replaceInstUsesWith(GEP, V);
+ return &GEP;
+ }
+
+ // TODO: 1) Scalarize splat operands, 2) scalarize entire instruction if
+ // possible (decide on canonical form for pointer broadcast), 3) exploit
+ // undef elements to decrease demanded bits
+ }
+
Value *PtrOp = GEP.getOperand(0);
// Eliminate unneeded casts for indices, and replace indices which displace
@@ -1755,9 +1829,9 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// put NewSrc at same location as %src
Builder.SetInsertPoint(cast<Instruction>(PtrOp));
auto *NewSrc = cast<GetElementPtrInst>(
- Builder.CreateGEP(SO0, GO1, Src->getName()));
+ Builder.CreateGEP(GEPEltType, SO0, GO1, Src->getName()));
NewSrc->setIsInBounds(Src->isInBounds());
- auto *NewGEP = GetElementPtrInst::Create(nullptr, NewSrc, {SO1});
+ auto *NewGEP = GetElementPtrInst::Create(GEPEltType, NewSrc, {SO1});
NewGEP->setIsInBounds(GEP.isInBounds());
return NewGEP;
}
@@ -1881,6 +1955,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
if (StrippedPtr != PtrOp) {
bool HasZeroPointerIndex = false;
+ Type *StrippedPtrEltTy = StrippedPtrTy->getElementType();
+
if (auto *C = dyn_cast<ConstantInt>(GEP.getOperand(1)))
HasZeroPointerIndex = C->isZero();
@@ -1894,11 +1970,11 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
if (HasZeroPointerIndex) {
if (auto *CATy = dyn_cast<ArrayType>(GEPEltType)) {
// GEP (bitcast i8* X to [0 x i8]*), i32 0, ... ?
- if (CATy->getElementType() == StrippedPtrTy->getElementType()) {
+ if (CATy->getElementType() == StrippedPtrEltTy) {
// -> GEP i8* X, ...
SmallVector<Value*, 8> Idx(GEP.idx_begin()+1, GEP.idx_end());
GetElementPtrInst *Res = GetElementPtrInst::Create(
- StrippedPtrTy->getElementType(), StrippedPtr, Idx, GEP.getName());
+ StrippedPtrEltTy, StrippedPtr, Idx, GEP.getName());
Res->setIsInBounds(GEP.isInBounds());
if (StrippedPtrTy->getAddressSpace() == GEP.getAddressSpace())
return Res;
@@ -1911,7 +1987,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
return new AddrSpaceCastInst(Builder.Insert(Res), GEPType);
}
- if (auto *XATy = dyn_cast<ArrayType>(StrippedPtrTy->getElementType())) {
+ if (auto *XATy = dyn_cast<ArrayType>(StrippedPtrEltTy)) {
// GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... ?
if (CATy->getElementType() == XATy->getElementType()) {
// -> GEP [10 x i8]* X, i32 0, ...
@@ -1934,11 +2010,12 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// %0 = GEP [10 x i8] addrspace(1)* X, ...
// addrspacecast i8 addrspace(1)* %0 to i8*
SmallVector<Value*, 8> Idx(GEP.idx_begin(), GEP.idx_end());
- Value *NewGEP = GEP.isInBounds()
- ? Builder.CreateInBoundsGEP(
- nullptr, StrippedPtr, Idx, GEP.getName())
- : Builder.CreateGEP(nullptr, StrippedPtr, Idx,
- GEP.getName());
+ Value *NewGEP =
+ GEP.isInBounds()
+ ? Builder.CreateInBoundsGEP(StrippedPtrEltTy, StrippedPtr,
+ Idx, GEP.getName())
+ : Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, Idx,
+ GEP.getName());
return new AddrSpaceCastInst(NewGEP, GEPType);
}
}
@@ -1947,17 +2024,17 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// Transform things like:
// %t = getelementptr i32* bitcast ([2 x i32]* %str to i32*), i32 %V
// into: %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast
- Type *SrcEltTy = StrippedPtrTy->getElementType();
- if (SrcEltTy->isArrayTy() &&
- DL.getTypeAllocSize(SrcEltTy->getArrayElementType()) ==
+ if (StrippedPtrEltTy->isArrayTy() &&
+ DL.getTypeAllocSize(StrippedPtrEltTy->getArrayElementType()) ==
DL.getTypeAllocSize(GEPEltType)) {
Type *IdxType = DL.getIndexType(GEPType);
Value *Idx[2] = { Constant::getNullValue(IdxType), GEP.getOperand(1) };
Value *NewGEP =
GEP.isInBounds()
- ? Builder.CreateInBoundsGEP(nullptr, StrippedPtr, Idx,
+ ? Builder.CreateInBoundsGEP(StrippedPtrEltTy, StrippedPtr, Idx,
GEP.getName())
- : Builder.CreateGEP(nullptr, StrippedPtr, Idx, GEP.getName());
+ : Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, Idx,
+ GEP.getName());
// V and GEP are both pointer types --> BitCast
return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP, GEPType);
@@ -1967,11 +2044,11 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// %V = mul i64 %N, 4
// %t = getelementptr i8* bitcast (i32* %arr to i8*), i32 %V
// into: %t1 = getelementptr i32* %arr, i32 %N; bitcast
- if (GEPEltType->isSized() && SrcEltTy->isSized()) {
+ if (GEPEltType->isSized() && StrippedPtrEltTy->isSized()) {
// Check that changing the type amounts to dividing the index by a scale
// factor.
uint64_t ResSize = DL.getTypeAllocSize(GEPEltType);
- uint64_t SrcSize = DL.getTypeAllocSize(SrcEltTy);
+ uint64_t SrcSize = DL.getTypeAllocSize(StrippedPtrEltTy);
if (ResSize && SrcSize % ResSize == 0) {
Value *Idx = GEP.getOperand(1);
unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits();
@@ -1990,9 +2067,9 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// GEP may not be "inbounds".
Value *NewGEP =
GEP.isInBounds() && NSW
- ? Builder.CreateInBoundsGEP(nullptr, StrippedPtr, NewIdx,
- GEP.getName())
- : Builder.CreateGEP(nullptr, StrippedPtr, NewIdx,
+ ? Builder.CreateInBoundsGEP(StrippedPtrEltTy, StrippedPtr,
+ NewIdx, GEP.getName())
+ : Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, NewIdx,
GEP.getName());
// The NewGEP must be pointer typed, so must the old one -> BitCast
@@ -2006,13 +2083,13 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// getelementptr i8* bitcast ([100 x double]* X to i8*), i32 %tmp
// (where tmp = 8*tmp2) into:
// getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast
- if (GEPEltType->isSized() && SrcEltTy->isSized() &&
- SrcEltTy->isArrayTy()) {
+ if (GEPEltType->isSized() && StrippedPtrEltTy->isSized() &&
+ StrippedPtrEltTy->isArrayTy()) {
// Check that changing to the array element type amounts to dividing the
// index by a scale factor.
uint64_t ResSize = DL.getTypeAllocSize(GEPEltType);
uint64_t ArrayEltSize =
- DL.getTypeAllocSize(SrcEltTy->getArrayElementType());
+ DL.getTypeAllocSize(StrippedPtrEltTy->getArrayElementType());
if (ResSize && ArrayEltSize % ResSize == 0) {
Value *Idx = GEP.getOperand(1);
unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits();
@@ -2032,11 +2109,12 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
Type *IndTy = DL.getIndexType(GEPType);
Value *Off[2] = {Constant::getNullValue(IndTy), NewIdx};
- Value *NewGEP = GEP.isInBounds() && NSW
- ? Builder.CreateInBoundsGEP(
- SrcEltTy, StrippedPtr, Off, GEP.getName())
- : Builder.CreateGEP(SrcEltTy, StrippedPtr, Off,
- GEP.getName());
+ Value *NewGEP =
+ GEP.isInBounds() && NSW
+ ? Builder.CreateInBoundsGEP(StrippedPtrEltTy, StrippedPtr,
+ Off, GEP.getName())
+ : Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, Off,
+ GEP.getName());
// The NewGEP must be pointer typed, so must the old one -> BitCast
return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP,
GEPType);
@@ -2084,8 +2162,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// constructing an AddrSpaceCastInst
Value *NGEP =
GEP.isInBounds()
- ? Builder.CreateInBoundsGEP(nullptr, SrcOp, {Ops[1], Ops[2]})
- : Builder.CreateGEP(nullptr, SrcOp, {Ops[1], Ops[2]});
+ ? Builder.CreateInBoundsGEP(SrcEltType, SrcOp, {Ops[1], Ops[2]})
+ : Builder.CreateGEP(SrcEltType, SrcOp, {Ops[1], Ops[2]});
NGEP->takeName(&GEP);
// Preserve GEP address space to satisfy users
@@ -2132,8 +2210,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
if (FindElementAtOffset(SrcType, Offset.getSExtValue(), NewIndices)) {
Value *NGEP =
GEP.isInBounds()
- ? Builder.CreateInBoundsGEP(nullptr, SrcOp, NewIndices)
- : Builder.CreateGEP(nullptr, SrcOp, NewIndices);
+ ? Builder.CreateInBoundsGEP(SrcEltType, SrcOp, NewIndices)
+ : Builder.CreateGEP(SrcEltType, SrcOp, NewIndices);
if (NGEP->getType() == GEPType)
return replaceInstUsesWith(GEP, NGEP);
@@ -2159,7 +2237,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
APInt AllocSize(IdxWidth, DL.getTypeAllocSize(AI->getAllocatedType()));
if (BasePtrOffset.ule(AllocSize)) {
return GetElementPtrInst::CreateInBounds(
- PtrOp, makeArrayRef(Ops).slice(1), GEP.getName());
+ GEP.getSourceElementType(), PtrOp, makeArrayRef(Ops).slice(1),
+ GEP.getName());
}
}
}
@@ -2296,8 +2375,8 @@ Instruction *InstCombiner::visitAllocSite(Instruction &MI) {
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
if (II->getIntrinsicID() == Intrinsic::objectsize) {
- ConstantInt *Result = lowerObjectSizeCall(II, DL, &TLI,
- /*MustSucceed=*/true);
+ Value *Result =
+ lowerObjectSizeCall(II, DL, &TLI, /*MustSucceed=*/true);
replaceInstUsesWith(*I, Result);
eraseInstFromFunction(*I);
Users[i] = nullptr; // Skip examining in the next loop.
@@ -2426,9 +2505,8 @@ Instruction *InstCombiner::visitFree(CallInst &FI) {
// free undef -> unreachable.
if (isa<UndefValue>(Op)) {
- // Insert a new store to null because we cannot modify the CFG here.
- Builder.CreateStore(ConstantInt::getTrue(FI.getContext()),
- UndefValue::get(Type::getInt1PtrTy(FI.getContext())));
+ // Leave a marker since we can't modify the CFG here.
+ CreateNonTerminatorUnreachable(&FI);
return eraseInstFromFunction(FI);
}
@@ -2618,53 +2696,28 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
return ExtractValueInst::Create(IV->getInsertedValueOperand(),
makeArrayRef(exti, exte));
}
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Agg)) {
- // We're extracting from an intrinsic, see if we're the only user, which
- // allows us to simplify multiple result intrinsics to simpler things that
- // just get one value.
- if (II->hasOneUse()) {
- // Check if we're grabbing the overflow bit or the result of a 'with
- // overflow' intrinsic. If it's the latter we can remove the intrinsic
+ if (WithOverflowInst *WO = dyn_cast<WithOverflowInst>(Agg)) {
+ // We're extracting from an overflow intrinsic, see if we're the only user,
+ // which allows us to simplify multiple result intrinsics to simpler
+ // things that just get one value.
+ if (WO->hasOneUse()) {
+ // Check if we're grabbing only the result of a 'with overflow' intrinsic
// and replace it with a traditional binary instruction.
- switch (II->getIntrinsicID()) {
- case Intrinsic::uadd_with_overflow:
- case Intrinsic::sadd_with_overflow:
- if (*EV.idx_begin() == 0) { // Normal result.
- Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
- replaceInstUsesWith(*II, UndefValue::get(II->getType()));
- eraseInstFromFunction(*II);
- return BinaryOperator::CreateAdd(LHS, RHS);
- }
-
- // If the normal result of the add is dead, and the RHS is a constant,
- // we can transform this into a range comparison.
- // overflow = uadd a, -4 --> overflow = icmp ugt a, 3
- if (II->getIntrinsicID() == Intrinsic::uadd_with_overflow)
- if (ConstantInt *CI = dyn_cast<ConstantInt>(II->getArgOperand(1)))
- return new ICmpInst(ICmpInst::ICMP_UGT, II->getArgOperand(0),
- ConstantExpr::getNot(CI));
- break;
- case Intrinsic::usub_with_overflow:
- case Intrinsic::ssub_with_overflow:
- if (*EV.idx_begin() == 0) { // Normal result.
- Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
- replaceInstUsesWith(*II, UndefValue::get(II->getType()));
- eraseInstFromFunction(*II);
- return BinaryOperator::CreateSub(LHS, RHS);
- }
- break;
- case Intrinsic::umul_with_overflow:
- case Intrinsic::smul_with_overflow:
- if (*EV.idx_begin() == 0) { // Normal result.
- Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
- replaceInstUsesWith(*II, UndefValue::get(II->getType()));
- eraseInstFromFunction(*II);
- return BinaryOperator::CreateMul(LHS, RHS);
- }
- break;
- default:
- break;
+ if (*EV.idx_begin() == 0) {
+ Instruction::BinaryOps BinOp = WO->getBinaryOp();
+ Value *LHS = WO->getLHS(), *RHS = WO->getRHS();
+ replaceInstUsesWith(*WO, UndefValue::get(WO->getType()));
+ eraseInstFromFunction(*WO);
+ return BinaryOperator::Create(BinOp, LHS, RHS);
}
+
+ // If the normal result of the add is dead, and the RHS is a constant,
+ // we can transform this into a range comparison.
+ // overflow = uadd a, -4 --> overflow = icmp ugt a, 3
+ if (WO->getIntrinsicID() == Intrinsic::uadd_with_overflow)
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(WO->getRHS()))
+ return new ICmpInst(ICmpInst::ICMP_UGT, WO->getLHS(),
+ ConstantExpr::getNot(CI));
}
}
if (LoadInst *L = dyn_cast<LoadInst>(Agg))
@@ -2687,7 +2740,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
Builder.SetInsertPoint(L);
Value *GEP = Builder.CreateInBoundsGEP(L->getType(),
L->getPointerOperand(), Indices);
- Instruction *NL = Builder.CreateLoad(GEP);
+ Instruction *NL = Builder.CreateLoad(EV.getType(), GEP);
// Whatever aliasing information we had for the orignal load must also
// hold for the smaller load, so propagate the annotations.
AAMDNodes Nodes;
@@ -3065,9 +3118,11 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
I->isTerminator())
return false;
- // Do not sink alloca instructions out of the entry block.
- if (isa<AllocaInst>(I) && I->getParent() ==
- &DestBlock->getParent()->getEntryBlock())
+ // Do not sink static or dynamic alloca instructions. Static allocas must
+ // remain in the entry block, and dynamic allocas must not be sunk in between
+ // a stacksave / stackrestore pair, which would incorrectly shorten its
+ // lifetime.
+ if (isa<AllocaInst>(I))
return false;
// Do not sink into catchswitch blocks.
@@ -3093,13 +3148,35 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
++NumSunkInst;
// Also sink all related debug uses from the source basic block. Otherwise we
- // get debug use before the def.
- SmallVector<DbgVariableIntrinsic *, 1> DbgUsers;
+ // get debug use before the def. Attempt to salvage debug uses first, to
+ // maximise the range variables have location for. If we cannot salvage, then
+ // mark the location undef: we know it was supposed to receive a new location
+ // here, but that computation has been sunk.
+ SmallVector<DbgVariableIntrinsic *, 2> DbgUsers;
findDbgUsers(DbgUsers, I);
- for (auto *DII : DbgUsers) {
+ for (auto *DII : reverse(DbgUsers)) {
if (DII->getParent() == SrcBlock) {
- DII->moveBefore(&*InsertPos);
- LLVM_DEBUG(dbgs() << "SINK: " << *DII << '\n');
+ // dbg.value is in the same basic block as the sunk inst, see if we can
+ // salvage it. Clone a new copy of the instruction: on success we need
+ // both salvaged and unsalvaged copies.
+ SmallVector<DbgVariableIntrinsic *, 1> TmpUser{
+ cast<DbgVariableIntrinsic>(DII->clone())};
+
+ if (!salvageDebugInfoForDbgValues(*I, TmpUser)) {
+ // We are unable to salvage: sink the cloned dbg.value, and mark the
+ // original as undef, terminating any earlier variable location.
+ LLVM_DEBUG(dbgs() << "SINK: " << *DII << '\n');
+ TmpUser[0]->insertBefore(&*InsertPos);
+ Value *Undef = UndefValue::get(I->getType());
+ DII->setOperand(0, MetadataAsValue::get(DII->getContext(),
+ ValueAsMetadata::get(Undef)));
+ } else {
+ // We successfully salvaged: place the salvaged dbg.value in the
+ // original location, and move the unmodified dbg.value to sink with
+ // the sunk inst.
+ TmpUser[0]->insertBefore(DII);
+ DII->moveBefore(&*InsertPos);
+ }
}
}
return true;
@@ -3294,7 +3371,8 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL,
if (isInstructionTriviallyDead(Inst, TLI)) {
++NumDeadInst;
LLVM_DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n');
- salvageDebugInfo(*Inst);
+ if (!salvageDebugInfo(*Inst))
+ replaceDbgUsesWithUndef(Inst);
Inst->eraseFromParent();
MadeIRChange = true;
continue;
@@ -3407,7 +3485,8 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,
static bool combineInstructionsOverFunction(
Function &F, InstCombineWorklist &Worklist, AliasAnalysis *AA,
AssumptionCache &AC, TargetLibraryInfo &TLI, DominatorTree &DT,
- OptimizationRemarkEmitter &ORE, bool ExpensiveCombines = true,
+ OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI,
+ ProfileSummaryInfo *PSI, bool ExpensiveCombines = true,
LoopInfo *LI = nullptr) {
auto &DL = F.getParent()->getDataLayout();
ExpensiveCombines |= EnableExpensiveCombines;
@@ -3437,8 +3516,8 @@ static bool combineInstructionsOverFunction(
MadeIRChange |= prepareICWorklistFromFunction(F, DL, &TLI, Worklist);
- InstCombiner IC(Worklist, Builder, F.optForMinSize(), ExpensiveCombines, AA,
- AC, TLI, DT, ORE, DL, LI);
+ InstCombiner IC(Worklist, Builder, F.hasMinSize(), ExpensiveCombines, AA,
+ AC, TLI, DT, ORE, BFI, PSI, DL, LI);
IC.MaxArraySizeForCombine = MaxArraySize;
if (!IC.run())
@@ -3458,8 +3537,15 @@ PreservedAnalyses InstCombinePass::run(Function &F,
auto *LI = AM.getCachedResult<LoopAnalysis>(F);
auto *AA = &AM.getResult<AAManager>(F);
+ const ModuleAnalysisManager &MAM =
+ AM.getResult<ModuleAnalysisManagerFunctionProxy>(F).getManager();
+ ProfileSummaryInfo *PSI =
+ MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
+ auto *BFI = (PSI && PSI->hasProfileSummary()) ?
+ &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;
+
if (!combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, DT, ORE,
- ExpensiveCombines, LI))
+ BFI, PSI, ExpensiveCombines, LI))
// No changes, all analyses are preserved.
return PreservedAnalyses::all();
@@ -3483,6 +3569,8 @@ void InstructionCombiningPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<AAResultsWrapperPass>();
AU.addPreserved<BasicAAWrapperPass>();
AU.addPreserved<GlobalsAAWrapperPass>();
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
+ LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
}
bool InstructionCombiningPass::runOnFunction(Function &F) {
@@ -3499,9 +3587,15 @@ bool InstructionCombiningPass::runOnFunction(Function &F) {
// Optional analyses.
auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
+ ProfileSummaryInfo *PSI =
+ &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+ BlockFrequencyInfo *BFI =
+ (PSI && PSI->hasProfileSummary()) ?
+ &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
+ nullptr;
return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, DT, ORE,
- ExpensiveCombines, LI);
+ BFI, PSI, ExpensiveCombines, LI);
}
char InstructionCombiningPass::ID = 0;
@@ -3514,6 +3608,8 @@ INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LazyBlockFrequencyInfoPass)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
INITIALIZE_PASS_END(InstructionCombiningPass, "instcombine",
"Combine redundant instructions", false, false)
diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index f1558c75cb90..6821e214e921 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -1,9 +1,8 @@
//===- AddressSanitizer.cpp - memory error detector -----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -13,6 +12,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/Instrumentation/AddressSanitizer.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DepthFirstIterator.h"
@@ -25,7 +25,6 @@
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/BinaryFormat/MachO.h"
#include "llvm/IR/Argument.h"
@@ -72,6 +71,7 @@
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Utils/ASanStackFrameLayout.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
#include <algorithm>
@@ -94,9 +94,6 @@ static const uint64_t kDefaultShadowOffset32 = 1ULL << 29;
static const uint64_t kDefaultShadowOffset64 = 1ULL << 44;
static const uint64_t kDynamicShadowSentinel =
std::numeric_limits<uint64_t>::max();
-static const uint64_t kIOSShadowOffset32 = 1ULL << 30;
-static const uint64_t kIOSSimShadowOffset32 = 1ULL << 30;
-static const uint64_t kIOSSimShadowOffset64 = kDefaultShadowOffset64;
static const uint64_t kSmallX86_64ShadowOffsetBase = 0x7FFFFFFF; // < 2G.
static const uint64_t kSmallX86_64ShadowOffsetAlignMask = ~0xFFFULL;
static const uint64_t kLinuxKasan_ShadowOffset64 = 0xdffffc0000000000;
@@ -112,6 +109,7 @@ static const uint64_t kNetBSD_ShadowOffset64 = 1ULL << 46;
static const uint64_t kNetBSDKasan_ShadowOffset64 = 0xdfff900000000000;
static const uint64_t kPS4CPU_ShadowOffset64 = 1ULL << 40;
static const uint64_t kWindowsShadowOffset32 = 3ULL << 28;
+static const uint64_t kEmscriptenShadowOffset = 0;
static const uint64_t kMyriadShadowScale = 5;
static const uint64_t kMyriadMemoryOffset32 = 0x80000000ULL;
@@ -275,6 +273,16 @@ static cl::opt<bool> ClInvalidPointerPairs(
cl::desc("Instrument <, <=, >, >=, - with pointer operands"), cl::Hidden,
cl::init(false));
+static cl::opt<bool> ClInvalidPointerCmp(
+ "asan-detect-invalid-pointer-cmp",
+ cl::desc("Instrument <, <=, >, >= with pointer operands"), cl::Hidden,
+ cl::init(false));
+
+static cl::opt<bool> ClInvalidPointerSub(
+ "asan-detect-invalid-pointer-sub",
+ cl::desc("Instrument - operations with pointer operands"), cl::Hidden,
+ cl::init(false));
+
static cl::opt<unsigned> ClRealignStack(
"asan-realign-stack",
cl::desc("Realign stack to the value of this flag (power of two)"),
@@ -311,10 +319,10 @@ static cl::opt<int> ClMappingScale("asan-mapping-scale",
cl::desc("scale of asan shadow mapping"),
cl::Hidden, cl::init(0));
-static cl::opt<unsigned long long> ClMappingOffset(
- "asan-mapping-offset",
- cl::desc("offset of asan shadow mapping [EXPERIMENTAL]"), cl::Hidden,
- cl::init(0));
+static cl::opt<uint64_t>
+ ClMappingOffset("asan-mapping-offset",
+ cl::desc("offset of asan shadow mapping [EXPERIMENTAL]"),
+ cl::Hidden, cl::init(0));
// Optimization flags. Not user visible, used mostly for testing
// and benchmarking the tool.
@@ -393,87 +401,6 @@ STATISTIC(NumOptimizedAccessesToStackVar,
namespace {
-/// Frontend-provided metadata for source location.
-struct LocationMetadata {
- StringRef Filename;
- int LineNo = 0;
- int ColumnNo = 0;
-
- LocationMetadata() = default;
-
- bool empty() const { return Filename.empty(); }
-
- void parse(MDNode *MDN) {
- assert(MDN->getNumOperands() == 3);
- MDString *DIFilename = cast<MDString>(MDN->getOperand(0));
- Filename = DIFilename->getString();
- LineNo =
- mdconst::extract<ConstantInt>(MDN->getOperand(1))->getLimitedValue();
- ColumnNo =
- mdconst::extract<ConstantInt>(MDN->getOperand(2))->getLimitedValue();
- }
-};
-
-/// Frontend-provided metadata for global variables.
-class GlobalsMetadata {
-public:
- struct Entry {
- LocationMetadata SourceLoc;
- StringRef Name;
- bool IsDynInit = false;
- bool IsBlacklisted = false;
-
- Entry() = default;
- };
-
- GlobalsMetadata() = default;
-
- void reset() {
- inited_ = false;
- Entries.clear();
- }
-
- void init(Module &M) {
- assert(!inited_);
- inited_ = true;
- NamedMDNode *Globals = M.getNamedMetadata("llvm.asan.globals");
- if (!Globals) return;
- for (auto MDN : Globals->operands()) {
- // Metadata node contains the global and the fields of "Entry".
- assert(MDN->getNumOperands() == 5);
- auto *V = mdconst::extract_or_null<Constant>(MDN->getOperand(0));
- // The optimizer may optimize away a global entirely.
- if (!V) continue;
- auto *StrippedV = V->stripPointerCasts();
- auto *GV = dyn_cast<GlobalVariable>(StrippedV);
- if (!GV) continue;
- // We can already have an entry for GV if it was merged with another
- // global.
- Entry &E = Entries[GV];
- if (auto *Loc = cast_or_null<MDNode>(MDN->getOperand(1)))
- E.SourceLoc.parse(Loc);
- if (auto *Name = cast_or_null<MDString>(MDN->getOperand(2)))
- E.Name = Name->getString();
- ConstantInt *IsDynInit =
- mdconst::extract<ConstantInt>(MDN->getOperand(3));
- E.IsDynInit |= IsDynInit->isOne();
- ConstantInt *IsBlacklisted =
- mdconst::extract<ConstantInt>(MDN->getOperand(4));
- E.IsBlacklisted |= IsBlacklisted->isOne();
- }
- }
-
- /// Returns metadata entry for a given global.
- Entry get(GlobalVariable *G) const {
- auto Pos = Entries.find(G);
- return (Pos != Entries.end()) ? Pos->second : Entry();
- }
-
-private:
- bool inited_ = false;
- DenseMap<GlobalVariable *, Entry> Entries;
-};
-
/// This struct defines the shadow mapping using the rule:
/// shadow = (mem >> Scale) ADD-or-OR Offset.
/// If InGlobal is true, then
@@ -499,7 +426,6 @@ static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize,
bool IsPPC64 = TargetTriple.getArch() == Triple::ppc64 ||
TargetTriple.getArch() == Triple::ppc64le;
bool IsSystemZ = TargetTriple.getArch() == Triple::systemz;
- bool IsX86 = TargetTriple.getArch() == Triple::x86;
bool IsX86_64 = TargetTriple.getArch() == Triple::x86_64;
bool IsMIPS32 = TargetTriple.isMIPS32();
bool IsMIPS64 = TargetTriple.isMIPS64();
@@ -508,6 +434,7 @@ static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize,
bool IsWindows = TargetTriple.isOSWindows();
bool IsFuchsia = TargetTriple.isOSFuchsia();
bool IsMyriad = TargetTriple.getVendor() == llvm::Triple::Myriad;
+ bool IsEmscripten = TargetTriple.isOSEmscripten();
ShadowMapping Mapping;
@@ -526,10 +453,11 @@ static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize,
else if (IsNetBSD)
Mapping.Offset = kNetBSD_ShadowOffset32;
else if (IsIOS)
- // If we're targeting iOS and x86, the binary is built for iOS simulator.
- Mapping.Offset = IsX86 ? kIOSSimShadowOffset32 : kIOSShadowOffset32;
+ Mapping.Offset = kDynamicShadowSentinel;
else if (IsWindows)
Mapping.Offset = kWindowsShadowOffset32;
+ else if (IsEmscripten)
+ Mapping.Offset = kEmscriptenShadowOffset;
else if (IsMyriad) {
uint64_t ShadowOffset = (kMyriadMemoryOffset32 + kMyriadMemorySize32 -
(kMyriadMemorySize32 >> Mapping.Scale));
@@ -566,10 +494,7 @@ static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize,
} else if (IsMIPS64)
Mapping.Offset = kMIPS64_ShadowOffset64;
else if (IsIOS)
- // If we're targeting iOS and x86, the binary is built for iOS simulator.
- // We are using dynamic shadow offset on the 64-bit devices.
- Mapping.Offset =
- IsX86_64 ? kIOSSimShadowOffset64 : kDynamicShadowSentinel;
+ Mapping.Offset = kDynamicShadowSentinel;
else if (IsAArch64)
Mapping.Offset = kAArch64_ShadowOffset64;
else
@@ -607,27 +532,53 @@ static size_t RedzoneSizeForScale(int MappingScale) {
namespace {
-/// AddressSanitizer: instrument the code in module to find memory bugs.
-struct AddressSanitizer : public FunctionPass {
- // Pass identification, replacement for typeid
+/// Module analysis for getting various metadata about the module.
+class ASanGlobalsMetadataWrapperPass : public ModulePass {
+public:
static char ID;
- explicit AddressSanitizer(bool CompileKernel = false, bool Recover = false,
- bool UseAfterScope = false)
- : FunctionPass(ID), UseAfterScope(UseAfterScope || ClUseAfterScope) {
- this->Recover = ClRecover.getNumOccurrences() > 0 ? ClRecover : Recover;
- this->CompileKernel = ClEnableKasan.getNumOccurrences() > 0 ?
- ClEnableKasan : CompileKernel;
- initializeAddressSanitizerPass(*PassRegistry::getPassRegistry());
+ ASanGlobalsMetadataWrapperPass() : ModulePass(ID) {
+ initializeASanGlobalsMetadataWrapperPassPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ bool runOnModule(Module &M) override {
+ GlobalsMD = GlobalsMetadata(M);
+ return false;
}
StringRef getPassName() const override {
- return "AddressSanitizerFunctionPass";
+ return "ASanGlobalsMetadataWrapperPass";
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.setPreservesAll();
+ }
+
+ GlobalsMetadata &getGlobalsMD() { return GlobalsMD; }
+
+private:
+ GlobalsMetadata GlobalsMD;
+};
+
+char ASanGlobalsMetadataWrapperPass::ID = 0;
+
+/// AddressSanitizer: instrument the code in module to find memory bugs.
+struct AddressSanitizer {
+ AddressSanitizer(Module &M, GlobalsMetadata &GlobalsMD,
+ bool CompileKernel = false, bool Recover = false,
+ bool UseAfterScope = false)
+ : UseAfterScope(UseAfterScope || ClUseAfterScope), GlobalsMD(GlobalsMD) {
+ this->Recover = ClRecover.getNumOccurrences() > 0 ? ClRecover : Recover;
+ this->CompileKernel =
+ ClEnableKasan.getNumOccurrences() > 0 ? ClEnableKasan : CompileKernel;
+
+ C = &(M.getContext());
+ LongSize = M.getDataLayout().getPointerSizeInBits();
+ IntptrTy = Type::getIntNTy(*C, LongSize);
+ TargetTriple = Triple(M.getTargetTriple());
+
+ Mapping = getShadowMapping(TargetTriple, LongSize, this->CompileKernel);
}
uint64_t getAllocaSizeInBytes(const AllocaInst &AI) const {
@@ -672,14 +623,10 @@ struct AddressSanitizer : public FunctionPass {
Value *SizeArgument, uint32_t Exp);
void instrumentMemIntrinsic(MemIntrinsic *MI);
Value *memToShadow(Value *Shadow, IRBuilder<> &IRB);
- bool runOnFunction(Function &F) override;
+ bool instrumentFunction(Function &F, const TargetLibraryInfo *TLI);
bool maybeInsertAsanInitAtFunctionEntry(Function &F);
void maybeInsertDynamicShadowAtFunctionEntry(Function &F);
void markEscapedLocalAllocas(Function &F);
- bool doInitialization(Module &M) override;
- bool doFinalization(Module &M) override;
-
- DominatorTree &getDominatorTree() const { return *DT; }
private:
friend struct FunctionStackPoisoner;
@@ -715,36 +662,68 @@ private:
bool UseAfterScope;
Type *IntptrTy;
ShadowMapping Mapping;
- DominatorTree *DT;
- Function *AsanHandleNoReturnFunc;
- Function *AsanPtrCmpFunction, *AsanPtrSubFunction;
+ FunctionCallee AsanHandleNoReturnFunc;
+ FunctionCallee AsanPtrCmpFunction, AsanPtrSubFunction;
Constant *AsanShadowGlobal;
// These arrays is indexed by AccessIsWrite, Experiment and log2(AccessSize).
- Function *AsanErrorCallback[2][2][kNumberOfAccessSizes];
- Function *AsanMemoryAccessCallback[2][2][kNumberOfAccessSizes];
+ FunctionCallee AsanErrorCallback[2][2][kNumberOfAccessSizes];
+ FunctionCallee AsanMemoryAccessCallback[2][2][kNumberOfAccessSizes];
// These arrays is indexed by AccessIsWrite and Experiment.
- Function *AsanErrorCallbackSized[2][2];
- Function *AsanMemoryAccessCallbackSized[2][2];
+ FunctionCallee AsanErrorCallbackSized[2][2];
+ FunctionCallee AsanMemoryAccessCallbackSized[2][2];
- Function *AsanMemmove, *AsanMemcpy, *AsanMemset;
+ FunctionCallee AsanMemmove, AsanMemcpy, AsanMemset;
InlineAsm *EmptyAsm;
Value *LocalDynamicShadow = nullptr;
GlobalsMetadata GlobalsMD;
DenseMap<const AllocaInst *, bool> ProcessedAllocas;
};
-class AddressSanitizerModule : public ModulePass {
+class AddressSanitizerLegacyPass : public FunctionPass {
public:
- // Pass identification, replacement for typeid
static char ID;
- explicit AddressSanitizerModule(bool CompileKernel = false,
- bool Recover = false,
- bool UseGlobalsGC = true,
- bool UseOdrIndicator = false)
- : ModulePass(ID), UseGlobalsGC(UseGlobalsGC && ClUseGlobalsGC),
+ explicit AddressSanitizerLegacyPass(bool CompileKernel = false,
+ bool Recover = false,
+ bool UseAfterScope = false)
+ : FunctionPass(ID), CompileKernel(CompileKernel), Recover(Recover),
+ UseAfterScope(UseAfterScope) {
+ initializeAddressSanitizerLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ StringRef getPassName() const override {
+ return "AddressSanitizerFunctionPass";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<ASanGlobalsMetadataWrapperPass>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ }
+
+ bool runOnFunction(Function &F) override {
+ GlobalsMetadata &GlobalsMD =
+ getAnalysis<ASanGlobalsMetadataWrapperPass>().getGlobalsMD();
+ const TargetLibraryInfo *TLI =
+ &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ AddressSanitizer ASan(*F.getParent(), GlobalsMD, CompileKernel, Recover,
+ UseAfterScope);
+ return ASan.instrumentFunction(F, TLI);
+ }
+
+private:
+ bool CompileKernel;
+ bool Recover;
+ bool UseAfterScope;
+};
+
+class ModuleAddressSanitizer {
+public:
+ ModuleAddressSanitizer(Module &M, GlobalsMetadata &GlobalsMD,
+ bool CompileKernel = false, bool Recover = false,
+ bool UseGlobalsGC = true, bool UseOdrIndicator = false)
+ : GlobalsMD(GlobalsMD), UseGlobalsGC(UseGlobalsGC && ClUseGlobalsGC),
// Enable aliases as they should have no downside with ODR indicators.
UsePrivateAlias(UseOdrIndicator || ClUsePrivateAlias),
UseOdrIndicator(UseOdrIndicator || ClUseOdrIndicator),
@@ -759,10 +738,15 @@ public:
this->Recover = ClRecover.getNumOccurrences() > 0 ? ClRecover : Recover;
this->CompileKernel =
ClEnableKasan.getNumOccurrences() > 0 ? ClEnableKasan : CompileKernel;
+
+ C = &(M.getContext());
+ int LongSize = M.getDataLayout().getPointerSizeInBits();
+ IntptrTy = Type::getIntNTy(*C, LongSize);
+ TargetTriple = Triple(M.getTargetTriple());
+ Mapping = getShadowMapping(TargetTriple, LongSize, this->CompileKernel);
}
- bool runOnModule(Module &M) override;
- StringRef getPassName() const override { return "AddressSanitizerModule"; }
+ bool instrumentModule(Module &);
private:
void initializeCallbacks(Module &M);
@@ -810,19 +794,54 @@ private:
LLVMContext *C;
Triple TargetTriple;
ShadowMapping Mapping;
- Function *AsanPoisonGlobals;
- Function *AsanUnpoisonGlobals;
- Function *AsanRegisterGlobals;
- Function *AsanUnregisterGlobals;
- Function *AsanRegisterImageGlobals;
- Function *AsanUnregisterImageGlobals;
- Function *AsanRegisterElfGlobals;
- Function *AsanUnregisterElfGlobals;
+ FunctionCallee AsanPoisonGlobals;
+ FunctionCallee AsanUnpoisonGlobals;
+ FunctionCallee AsanRegisterGlobals;
+ FunctionCallee AsanUnregisterGlobals;
+ FunctionCallee AsanRegisterImageGlobals;
+ FunctionCallee AsanUnregisterImageGlobals;
+ FunctionCallee AsanRegisterElfGlobals;
+ FunctionCallee AsanUnregisterElfGlobals;
Function *AsanCtorFunction = nullptr;
Function *AsanDtorFunction = nullptr;
};
+class ModuleAddressSanitizerLegacyPass : public ModulePass {
+public:
+ static char ID;
+
+ explicit ModuleAddressSanitizerLegacyPass(bool CompileKernel = false,
+ bool Recover = false,
+ bool UseGlobalGC = true,
+ bool UseOdrIndicator = false)
+ : ModulePass(ID), CompileKernel(CompileKernel), Recover(Recover),
+ UseGlobalGC(UseGlobalGC), UseOdrIndicator(UseOdrIndicator) {
+ initializeModuleAddressSanitizerLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ StringRef getPassName() const override { return "ModuleAddressSanitizer"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<ASanGlobalsMetadataWrapperPass>();
+ }
+
+ bool runOnModule(Module &M) override {
+ GlobalsMetadata &GlobalsMD =
+ getAnalysis<ASanGlobalsMetadataWrapperPass>().getGlobalsMD();
+ ModuleAddressSanitizer ASanModule(M, GlobalsMD, CompileKernel, Recover,
+ UseGlobalGC, UseOdrIndicator);
+ return ASanModule.instrumentModule(M);
+ }
+
+private:
+ bool CompileKernel;
+ bool Recover;
+ bool UseGlobalGC;
+ bool UseOdrIndicator;
+};
+
// Stack poisoning does not play well with exception handling.
// When an exception is thrown, we essentially bypass the code
// that unpoisones the stack. This is why the run-time library has
@@ -846,11 +865,11 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
SmallVector<Instruction *, 8> RetVec;
unsigned StackAlignment;
- Function *AsanStackMallocFunc[kMaxAsanStackMallocSizeClass + 1],
- *AsanStackFreeFunc[kMaxAsanStackMallocSizeClass + 1];
- Function *AsanSetShadowFunc[0x100] = {};
- Function *AsanPoisonStackMemoryFunc, *AsanUnpoisonStackMemoryFunc;
- Function *AsanAllocaPoisonFunc, *AsanAllocasUnpoisonFunc;
+ FunctionCallee AsanStackMallocFunc[kMaxAsanStackMallocSizeClass + 1],
+ AsanStackFreeFunc[kMaxAsanStackMallocSizeClass + 1];
+ FunctionCallee AsanSetShadowFunc[0x100] = {};
+ FunctionCallee AsanPoisonStackMemoryFunc, AsanUnpoisonStackMemoryFunc;
+ FunctionCallee AsanAllocaPoisonFunc, AsanAllocasUnpoisonFunc;
// Stores a place and arguments of poisoning/unpoisoning call for alloca.
struct AllocaPoisonCall {
@@ -861,6 +880,7 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
};
SmallVector<AllocaPoisonCall, 8> DynamicAllocaPoisonCallVec;
SmallVector<AllocaPoisonCall, 8> StaticAllocaPoisonCallVec;
+ bool HasUntracedLifetimeIntrinsic = false;
SmallVector<AllocaInst *, 1> DynamicAllocaVec;
SmallVector<IntrinsicInst *, 1> StackRestoreVec;
@@ -876,13 +896,9 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
std::unique_ptr<CallInst> EmptyInlineAsm;
FunctionStackPoisoner(Function &F, AddressSanitizer &ASan)
- : F(F),
- ASan(ASan),
- DIB(*F.getParent(), /*AllowUnresolved*/ false),
- C(ASan.C),
- IntptrTy(ASan.IntptrTy),
- IntptrPtrTy(PointerType::get(IntptrTy, 0)),
- Mapping(ASan.Mapping),
+ : F(F), ASan(ASan), DIB(*F.getParent(), /*AllowUnresolved*/ false),
+ C(ASan.C), IntptrTy(ASan.IntptrTy),
+ IntptrPtrTy(PointerType::get(IntptrTy, 0)), Mapping(ASan.Mapping),
StackAlignment(1 << Mapping.Scale),
EmptyInlineAsm(CallInst::Create(ASan.EmptyAsm)) {}
@@ -899,6 +915,14 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
initializeCallbacks(*F.getParent());
+ if (HasUntracedLifetimeIntrinsic) {
+ // If there are lifetime intrinsics which couldn't be traced back to an
+ // alloca, we may not know exactly when a variable enters scope, and
+ // therefore should "fail safe" by not poisoning them.
+ StaticAllocaPoisonCallVec.clear();
+ DynamicAllocaPoisonCallVec.clear();
+ }
+
processDynamicAllocas();
processStaticAllocas();
@@ -950,8 +974,9 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
DynamicAreaOffset);
}
- IRB.CreateCall(AsanAllocasUnpoisonFunc,
- {IRB.CreateLoad(DynamicAllocaLayout), DynamicAreaPtr});
+ IRB.CreateCall(
+ AsanAllocasUnpoisonFunc,
+ {IRB.CreateLoad(IntptrTy, DynamicAllocaLayout), DynamicAreaPtr});
}
// Unpoison dynamic allocas redzones.
@@ -1018,8 +1043,14 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
!ConstantInt::isValueValidForType(IntptrTy, SizeValue))
return;
// Find alloca instruction that corresponds to llvm.lifetime argument.
- AllocaInst *AI = findAllocaForValue(II.getArgOperand(1));
- if (!AI || !ASan.isInterestingAlloca(*AI))
+ AllocaInst *AI =
+ llvm::findAllocaForValue(II.getArgOperand(1), AllocaForValue);
+ if (!AI) {
+ HasUntracedLifetimeIntrinsic = true;
+ return;
+ }
+ // We're interested only in allocas we can handle.
+ if (!ASan.isInterestingAlloca(*AI))
return;
bool DoPoison = (ID == Intrinsic::lifetime_end);
AllocaPoisonCall APC = {&II, AI, SizeValue, DoPoison};
@@ -1042,16 +1073,6 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
// ---------------------- Helpers.
void initializeCallbacks(Module &M);
- bool doesDominateAllExits(const Instruction *I) const {
- for (auto Ret : RetVec) {
- if (!ASan.getDominatorTree().dominates(I, Ret)) return false;
- }
- return true;
- }
-
- /// Finds alloca where the value comes from.
- AllocaInst *findAllocaForValue(Value *V);
-
// Copies bytes from ShadowBytes into shadow memory for indexes where
// ShadowMask is not zero. If ShadowMask[i] is zero, we assume that
// ShadowBytes[i] is constantly zero and doesn't need to be overwritten.
@@ -1074,16 +1095,111 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
} // end anonymous namespace
-char AddressSanitizer::ID = 0;
+void LocationMetadata::parse(MDNode *MDN) {
+ assert(MDN->getNumOperands() == 3);
+ MDString *DIFilename = cast<MDString>(MDN->getOperand(0));
+ Filename = DIFilename->getString();
+ LineNo = mdconst::extract<ConstantInt>(MDN->getOperand(1))->getLimitedValue();
+ ColumnNo =
+ mdconst::extract<ConstantInt>(MDN->getOperand(2))->getLimitedValue();
+}
+
+// FIXME: It would be cleaner to instead attach relevant metadata to the globals
+// we want to sanitize instead and reading this metadata on each pass over a
+// function instead of reading module level metadata at first.
+GlobalsMetadata::GlobalsMetadata(Module &M) {
+ NamedMDNode *Globals = M.getNamedMetadata("llvm.asan.globals");
+ if (!Globals)
+ return;
+ for (auto MDN : Globals->operands()) {
+ // Metadata node contains the global and the fields of "Entry".
+ assert(MDN->getNumOperands() == 5);
+ auto *V = mdconst::extract_or_null<Constant>(MDN->getOperand(0));
+ // The optimizer may optimize away a global entirely.
+ if (!V)
+ continue;
+ auto *StrippedV = V->stripPointerCasts();
+ auto *GV = dyn_cast<GlobalVariable>(StrippedV);
+ if (!GV)
+ continue;
+ // We can already have an entry for GV if it was merged with another
+ // global.
+ Entry &E = Entries[GV];
+ if (auto *Loc = cast_or_null<MDNode>(MDN->getOperand(1)))
+ E.SourceLoc.parse(Loc);
+ if (auto *Name = cast_or_null<MDString>(MDN->getOperand(2)))
+ E.Name = Name->getString();
+ ConstantInt *IsDynInit = mdconst::extract<ConstantInt>(MDN->getOperand(3));
+ E.IsDynInit |= IsDynInit->isOne();
+ ConstantInt *IsBlacklisted =
+ mdconst::extract<ConstantInt>(MDN->getOperand(4));
+ E.IsBlacklisted |= IsBlacklisted->isOne();
+ }
+}
+
+AnalysisKey ASanGlobalsMetadataAnalysis::Key;
+
+GlobalsMetadata ASanGlobalsMetadataAnalysis::run(Module &M,
+ ModuleAnalysisManager &AM) {
+ return GlobalsMetadata(M);
+}
+
+AddressSanitizerPass::AddressSanitizerPass(bool CompileKernel, bool Recover,
+ bool UseAfterScope)
+ : CompileKernel(CompileKernel), Recover(Recover),
+ UseAfterScope(UseAfterScope) {}
+
+PreservedAnalyses AddressSanitizerPass::run(Function &F,
+ AnalysisManager<Function> &AM) {
+ auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
+ auto &MAM = MAMProxy.getManager();
+ Module &M = *F.getParent();
+ if (auto *R = MAM.getCachedResult<ASanGlobalsMetadataAnalysis>(M)) {
+ const TargetLibraryInfo *TLI = &AM.getResult<TargetLibraryAnalysis>(F);
+ AddressSanitizer Sanitizer(M, *R, CompileKernel, Recover, UseAfterScope);
+ if (Sanitizer.instrumentFunction(F, TLI))
+ return PreservedAnalyses::none();
+ return PreservedAnalyses::all();
+ }
+
+ report_fatal_error(
+ "The ASanGlobalsMetadataAnalysis is required to run before "
+ "AddressSanitizer can run");
+ return PreservedAnalyses::all();
+}
+
+ModuleAddressSanitizerPass::ModuleAddressSanitizerPass(bool CompileKernel,
+ bool Recover,
+ bool UseGlobalGC,
+ bool UseOdrIndicator)
+ : CompileKernel(CompileKernel), Recover(Recover), UseGlobalGC(UseGlobalGC),
+ UseOdrIndicator(UseOdrIndicator) {}
+
+PreservedAnalyses ModuleAddressSanitizerPass::run(Module &M,
+ AnalysisManager<Module> &AM) {
+ GlobalsMetadata &GlobalsMD = AM.getResult<ASanGlobalsMetadataAnalysis>(M);
+ ModuleAddressSanitizer Sanitizer(M, GlobalsMD, CompileKernel, Recover,
+ UseGlobalGC, UseOdrIndicator);
+ if (Sanitizer.instrumentModule(M))
+ return PreservedAnalyses::none();
+ return PreservedAnalyses::all();
+}
+
+INITIALIZE_PASS(ASanGlobalsMetadataWrapperPass, "asan-globals-md",
+ "Read metadata to mark which globals should be instrumented "
+ "when running ASan.",
+ false, true)
+
+char AddressSanitizerLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(
- AddressSanitizer, "asan",
+ AddressSanitizerLegacyPass, "asan",
"AddressSanitizer: detects use-after-free and out-of-bounds bugs.", false,
false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ASanGlobalsMetadataWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(
- AddressSanitizer, "asan",
+ AddressSanitizerLegacyPass, "asan",
"AddressSanitizer: detects use-after-free and out-of-bounds bugs.", false,
false)
@@ -1091,24 +1207,22 @@ FunctionPass *llvm::createAddressSanitizerFunctionPass(bool CompileKernel,
bool Recover,
bool UseAfterScope) {
assert(!CompileKernel || Recover);
- return new AddressSanitizer(CompileKernel, Recover, UseAfterScope);
+ return new AddressSanitizerLegacyPass(CompileKernel, Recover, UseAfterScope);
}
-char AddressSanitizerModule::ID = 0;
+char ModuleAddressSanitizerLegacyPass::ID = 0;
INITIALIZE_PASS(
- AddressSanitizerModule, "asan-module",
+ ModuleAddressSanitizerLegacyPass, "asan-module",
"AddressSanitizer: detects use-after-free and out-of-bounds bugs."
"ModulePass",
false, false)
-ModulePass *llvm::createAddressSanitizerModulePass(bool CompileKernel,
- bool Recover,
- bool UseGlobalsGC,
- bool UseOdrIndicator) {
+ModulePass *llvm::createModuleAddressSanitizerLegacyPassPass(
+ bool CompileKernel, bool Recover, bool UseGlobalsGC, bool UseOdrIndicator) {
assert(!CompileKernel || Recover);
- return new AddressSanitizerModule(CompileKernel, Recover, UseGlobalsGC,
- UseOdrIndicator);
+ return new ModuleAddressSanitizerLegacyPass(CompileKernel, Recover,
+ UseGlobalsGC, UseOdrIndicator);
}
static size_t TypeSizeToSizeIndex(uint32_t TypeSize) {
@@ -1312,11 +1426,24 @@ static bool isPointerOperand(Value *V) {
// This is a rough heuristic; it may cause both false positives and
// false negatives. The proper implementation requires cooperation with
// the frontend.
-static bool isInterestingPointerComparisonOrSubtraction(Instruction *I) {
+static bool isInterestingPointerComparison(Instruction *I) {
if (ICmpInst *Cmp = dyn_cast<ICmpInst>(I)) {
- if (!Cmp->isRelational()) return false;
- } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
- if (BO->getOpcode() != Instruction::Sub) return false;
+ if (!Cmp->isRelational())
+ return false;
+ } else {
+ return false;
+ }
+ return isPointerOperand(I->getOperand(0)) &&
+ isPointerOperand(I->getOperand(1));
+}
+
+// This is a rough heuristic; it may cause both false positives and
+// false negatives. The proper implementation requires cooperation with
+// the frontend.
+static bool isInterestingPointerSubtraction(Instruction *I) {
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
+ if (BO->getOpcode() != Instruction::Sub)
+ return false;
} else {
return false;
}
@@ -1328,13 +1455,16 @@ bool AddressSanitizer::GlobalIsLinkerInitialized(GlobalVariable *G) {
// If a global variable does not have dynamic initialization we don't
// have to instrument it. However, if a global does not have initializer
// at all, we assume it has dynamic initializer (in other TU).
+ //
+ // FIXME: Metadata should be attched directly to the global directly instead
+ // of being added to llvm.asan.globals.
return G->hasInitializer() && !GlobalsMD.get(G).IsDynInit;
}
void AddressSanitizer::instrumentPointerComparisonOrSubtraction(
Instruction *I) {
IRBuilder<> IRB(I);
- Function *F = isa<ICmpInst>(I) ? AsanPtrCmpFunction : AsanPtrSubFunction;
+ FunctionCallee F = isa<ICmpInst>(I) ? AsanPtrCmpFunction : AsanPtrSubFunction;
Value *Param[2] = {I->getOperand(0), I->getOperand(1)};
for (Value *&i : Param) {
if (i->getType()->isPointerTy())
@@ -1392,7 +1522,7 @@ static void instrumentMaskedLoadOrStore(AddressSanitizer *Pass,
IRBuilder<> IRB(InsertBefore);
InstrumentedAddress =
- IRB.CreateGEP(Addr, {Zero, ConstantInt::get(IntptrTy, Idx)});
+ IRB.CreateGEP(VTy, Addr, {Zero, ConstantInt::get(IntptrTy, Idx)});
doInstrumentAddress(Pass, I, InsertBefore, InstrumentedAddress, Alignment,
Granularity, ElemTypeSize, IsWrite, SizeArgument,
UseCalls, Exp);
@@ -1553,7 +1683,7 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
Value *ShadowPtr = memToShadow(AddrLong, IRB);
Value *CmpVal = Constant::getNullValue(ShadowTy);
Value *ShadowValue =
- IRB.CreateLoad(IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy));
+ IRB.CreateLoad(ShadowTy, IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy));
Value *Cmp = IRB.CreateICmpNE(ShadowValue, CmpVal);
size_t Granularity = 1ULL << Mapping.Scale;
@@ -1612,7 +1742,7 @@ void AddressSanitizer::instrumentUnusualSizeOrAlignment(
}
}
-void AddressSanitizerModule::poisonOneInitializer(Function &GlobalInit,
+void ModuleAddressSanitizer::poisonOneInitializer(Function &GlobalInit,
GlobalValue *ModuleName) {
// Set up the arguments to our poison/unpoison functions.
IRBuilder<> IRB(&GlobalInit.front(),
@@ -1628,7 +1758,7 @@ void AddressSanitizerModule::poisonOneInitializer(Function &GlobalInit,
CallInst::Create(AsanUnpoisonGlobals, "", RI);
}
-void AddressSanitizerModule::createInitializerPoisonCalls(
+void ModuleAddressSanitizer::createInitializerPoisonCalls(
Module &M, GlobalValue *ModuleName) {
GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors");
if (!GV)
@@ -1653,10 +1783,12 @@ void AddressSanitizerModule::createInitializerPoisonCalls(
}
}
-bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) {
+bool ModuleAddressSanitizer::ShouldInstrumentGlobal(GlobalVariable *G) {
Type *Ty = G->getValueType();
LLVM_DEBUG(dbgs() << "GLOBAL: " << *G << "\n");
+ // FIXME: Metadata should be attched directly to the global directly instead
+ // of being added to llvm.asan.globals.
if (GlobalsMD.get(G).IsBlacklisted) return false;
if (!Ty->isSized()) return false;
if (!G->hasInitializer()) return false;
@@ -1768,7 +1900,7 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) {
// On Mach-O platforms, we emit global metadata in a separate section of the
// binary in order to allow the linker to properly dead strip. This is only
// supported on recent versions of ld64.
-bool AddressSanitizerModule::ShouldUseMachOGlobalsSection() const {
+bool ModuleAddressSanitizer::ShouldUseMachOGlobalsSection() const {
if (!TargetTriple.isOSBinFormatMachO())
return false;
@@ -1782,7 +1914,7 @@ bool AddressSanitizerModule::ShouldUseMachOGlobalsSection() const {
return false;
}
-StringRef AddressSanitizerModule::getGlobalMetadataSection() const {
+StringRef ModuleAddressSanitizer::getGlobalMetadataSection() const {
switch (TargetTriple.getObjectFormat()) {
case Triple::COFF: return ".ASAN$GL";
case Triple::ELF: return "asan_globals";
@@ -1792,52 +1924,39 @@ StringRef AddressSanitizerModule::getGlobalMetadataSection() const {
llvm_unreachable("unsupported object format");
}
-void AddressSanitizerModule::initializeCallbacks(Module &M) {
+void ModuleAddressSanitizer::initializeCallbacks(Module &M) {
IRBuilder<> IRB(*C);
// Declare our poisoning and unpoisoning functions.
- AsanPoisonGlobals = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- kAsanPoisonGlobalsName, IRB.getVoidTy(), IntptrTy));
- AsanPoisonGlobals->setLinkage(Function::ExternalLinkage);
- AsanUnpoisonGlobals = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- kAsanUnpoisonGlobalsName, IRB.getVoidTy()));
- AsanUnpoisonGlobals->setLinkage(Function::ExternalLinkage);
+ AsanPoisonGlobals =
+ M.getOrInsertFunction(kAsanPoisonGlobalsName, IRB.getVoidTy(), IntptrTy);
+ AsanUnpoisonGlobals =
+ M.getOrInsertFunction(kAsanUnpoisonGlobalsName, IRB.getVoidTy());
// Declare functions that register/unregister globals.
- AsanRegisterGlobals = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- kAsanRegisterGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy));
- AsanRegisterGlobals->setLinkage(Function::ExternalLinkage);
- AsanUnregisterGlobals = checkSanitizerInterfaceFunction(
- M.getOrInsertFunction(kAsanUnregisterGlobalsName, IRB.getVoidTy(),
- IntptrTy, IntptrTy));
- AsanUnregisterGlobals->setLinkage(Function::ExternalLinkage);
+ AsanRegisterGlobals = M.getOrInsertFunction(
+ kAsanRegisterGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy);
+ AsanUnregisterGlobals = M.getOrInsertFunction(
+ kAsanUnregisterGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy);
// Declare the functions that find globals in a shared object and then invoke
// the (un)register function on them.
- AsanRegisterImageGlobals =
- checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- kAsanRegisterImageGlobalsName, IRB.getVoidTy(), IntptrTy));
- AsanRegisterImageGlobals->setLinkage(Function::ExternalLinkage);
-
- AsanUnregisterImageGlobals =
- checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- kAsanUnregisterImageGlobalsName, IRB.getVoidTy(), IntptrTy));
- AsanUnregisterImageGlobals->setLinkage(Function::ExternalLinkage);
+ AsanRegisterImageGlobals = M.getOrInsertFunction(
+ kAsanRegisterImageGlobalsName, IRB.getVoidTy(), IntptrTy);
+ AsanUnregisterImageGlobals = M.getOrInsertFunction(
+ kAsanUnregisterImageGlobalsName, IRB.getVoidTy(), IntptrTy);
- AsanRegisterElfGlobals = checkSanitizerInterfaceFunction(
+ AsanRegisterElfGlobals =
M.getOrInsertFunction(kAsanRegisterElfGlobalsName, IRB.getVoidTy(),
- IntptrTy, IntptrTy, IntptrTy));
- AsanRegisterElfGlobals->setLinkage(Function::ExternalLinkage);
-
- AsanUnregisterElfGlobals = checkSanitizerInterfaceFunction(
+ IntptrTy, IntptrTy, IntptrTy);
+ AsanUnregisterElfGlobals =
M.getOrInsertFunction(kAsanUnregisterElfGlobalsName, IRB.getVoidTy(),
- IntptrTy, IntptrTy, IntptrTy));
- AsanUnregisterElfGlobals->setLinkage(Function::ExternalLinkage);
+ IntptrTy, IntptrTy, IntptrTy);
}
// Put the metadata and the instrumented global in the same group. This ensures
// that the metadata is discarded if the instrumented global is discarded.
-void AddressSanitizerModule::SetComdatForGlobalMetadata(
+void ModuleAddressSanitizer::SetComdatForGlobalMetadata(
GlobalVariable *G, GlobalVariable *Metadata, StringRef InternalSuffix) {
Module &M = *G->getParent();
Comdat *C = G->getComdat();
@@ -1875,7 +1994,7 @@ void AddressSanitizerModule::SetComdatForGlobalMetadata(
// Create a separate metadata global and put it in the appropriate ASan
// global registration section.
GlobalVariable *
-AddressSanitizerModule::CreateMetadataGlobal(Module &M, Constant *Initializer,
+ModuleAddressSanitizer::CreateMetadataGlobal(Module &M, Constant *Initializer,
StringRef OriginalName) {
auto Linkage = TargetTriple.isOSBinFormatMachO()
? GlobalVariable::InternalLinkage
@@ -1887,7 +2006,7 @@ AddressSanitizerModule::CreateMetadataGlobal(Module &M, Constant *Initializer,
return Metadata;
}
-IRBuilder<> AddressSanitizerModule::CreateAsanModuleDtor(Module &M) {
+IRBuilder<> ModuleAddressSanitizer::CreateAsanModuleDtor(Module &M) {
AsanDtorFunction =
Function::Create(FunctionType::get(Type::getVoidTy(*C), false),
GlobalValue::InternalLinkage, kAsanModuleDtorName, &M);
@@ -1896,7 +2015,7 @@ IRBuilder<> AddressSanitizerModule::CreateAsanModuleDtor(Module &M) {
return IRBuilder<>(ReturnInst::Create(*C, AsanDtorBB));
}
-void AddressSanitizerModule::InstrumentGlobalsCOFF(
+void ModuleAddressSanitizer::InstrumentGlobalsCOFF(
IRBuilder<> &IRB, Module &M, ArrayRef<GlobalVariable *> ExtendedGlobals,
ArrayRef<Constant *> MetadataInitializers) {
assert(ExtendedGlobals.size() == MetadataInitializers.size());
@@ -1920,7 +2039,7 @@ void AddressSanitizerModule::InstrumentGlobalsCOFF(
}
}
-void AddressSanitizerModule::InstrumentGlobalsELF(
+void ModuleAddressSanitizer::InstrumentGlobalsELF(
IRBuilder<> &IRB, Module &M, ArrayRef<GlobalVariable *> ExtendedGlobals,
ArrayRef<Constant *> MetadataInitializers,
const std::string &UniqueModuleId) {
@@ -1979,7 +2098,7 @@ void AddressSanitizerModule::InstrumentGlobalsELF(
IRB.CreatePointerCast(StopELFMetadata, IntptrTy)});
}
-void AddressSanitizerModule::InstrumentGlobalsMachO(
+void ModuleAddressSanitizer::InstrumentGlobalsMachO(
IRBuilder<> &IRB, Module &M, ArrayRef<GlobalVariable *> ExtendedGlobals,
ArrayRef<Constant *> MetadataInitializers) {
assert(ExtendedGlobals.size() == MetadataInitializers.size());
@@ -2036,7 +2155,7 @@ void AddressSanitizerModule::InstrumentGlobalsMachO(
{IRB.CreatePointerCast(RegisteredFlag, IntptrTy)});
}
-void AddressSanitizerModule::InstrumentGlobalsWithMetadataArray(
+void ModuleAddressSanitizer::InstrumentGlobalsWithMetadataArray(
IRBuilder<> &IRB, Module &M, ArrayRef<GlobalVariable *> ExtendedGlobals,
ArrayRef<Constant *> MetadataInitializers) {
assert(ExtendedGlobals.size() == MetadataInitializers.size());
@@ -2070,9 +2189,9 @@ void AddressSanitizerModule::InstrumentGlobalsWithMetadataArray(
// redzones and inserts this function into llvm.global_ctors.
// Sets *CtorComdat to true if the global registration code emitted into the
// asan constructor is comdat-compatible.
-bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M, bool *CtorComdat) {
+bool ModuleAddressSanitizer::InstrumentGlobals(IRBuilder<> &IRB, Module &M,
+ bool *CtorComdat) {
*CtorComdat = false;
- GlobalsMD.init(M);
SmallVector<GlobalVariable *, 16> GlobalsToChange;
@@ -2115,6 +2234,8 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M, bool
static const uint64_t kMaxGlobalRedzone = 1 << 18;
GlobalVariable *G = GlobalsToChange[i];
+ // FIXME: Metadata should be attched directly to the global directly instead
+ // of being added to llvm.asan.globals.
auto MD = GlobalsMD.get(G);
StringRef NameForGlobal = G->getName();
// Create string holding the global name (use global name from metadata
@@ -2271,7 +2392,7 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M, bool
return true;
}
-int AddressSanitizerModule::GetAsanVersion(const Module &M) const {
+int ModuleAddressSanitizer::GetAsanVersion(const Module &M) const {
int LongSize = M.getDataLayout().getPointerSizeInBits();
bool isAndroid = Triple(M.getTargetTriple()).isAndroid();
int Version = 8;
@@ -2281,12 +2402,7 @@ int AddressSanitizerModule::GetAsanVersion(const Module &M) const {
return Version;
}
-bool AddressSanitizerModule::runOnModule(Module &M) {
- C = &(M.getContext());
- int LongSize = M.getDataLayout().getPointerSizeInBits();
- IntptrTy = Type::getIntNTy(*C, LongSize);
- TargetTriple = Triple(M.getTargetTriple());
- Mapping = getShadowMapping(TargetTriple, LongSize, CompileKernel);
+bool ModuleAddressSanitizer::instrumentModule(Module &M) {
initializeCallbacks(M);
if (CompileKernel)
@@ -2346,51 +2462,49 @@ void AddressSanitizer::initializeCallbacks(Module &M) {
Args2.push_back(ExpType);
Args1.push_back(ExpType);
}
- AsanErrorCallbackSized[AccessIsWrite][Exp] =
- checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- kAsanReportErrorTemplate + ExpStr + TypeStr + "_n" + EndingStr,
- FunctionType::get(IRB.getVoidTy(), Args2, false)));
+ AsanErrorCallbackSized[AccessIsWrite][Exp] = M.getOrInsertFunction(
+ kAsanReportErrorTemplate + ExpStr + TypeStr + "_n" + EndingStr,
+ FunctionType::get(IRB.getVoidTy(), Args2, false));
- AsanMemoryAccessCallbackSized[AccessIsWrite][Exp] =
- checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- ClMemoryAccessCallbackPrefix + ExpStr + TypeStr + "N" + EndingStr,
- FunctionType::get(IRB.getVoidTy(), Args2, false)));
+ AsanMemoryAccessCallbackSized[AccessIsWrite][Exp] = M.getOrInsertFunction(
+ ClMemoryAccessCallbackPrefix + ExpStr + TypeStr + "N" + EndingStr,
+ FunctionType::get(IRB.getVoidTy(), Args2, false));
for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
AccessSizeIndex++) {
const std::string Suffix = TypeStr + itostr(1ULL << AccessSizeIndex);
AsanErrorCallback[AccessIsWrite][Exp][AccessSizeIndex] =
- checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+ M.getOrInsertFunction(
kAsanReportErrorTemplate + ExpStr + Suffix + EndingStr,
- FunctionType::get(IRB.getVoidTy(), Args1, false)));
+ FunctionType::get(IRB.getVoidTy(), Args1, false));
AsanMemoryAccessCallback[AccessIsWrite][Exp][AccessSizeIndex] =
- checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+ M.getOrInsertFunction(
ClMemoryAccessCallbackPrefix + ExpStr + Suffix + EndingStr,
- FunctionType::get(IRB.getVoidTy(), Args1, false)));
+ FunctionType::get(IRB.getVoidTy(), Args1, false));
}
}
}
const std::string MemIntrinCallbackPrefix =
CompileKernel ? std::string("") : ClMemoryAccessCallbackPrefix;
- AsanMemmove = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- MemIntrinCallbackPrefix + "memmove", IRB.getInt8PtrTy(),
- IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy));
- AsanMemcpy = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- MemIntrinCallbackPrefix + "memcpy", IRB.getInt8PtrTy(),
- IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy));
- AsanMemset = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- MemIntrinCallbackPrefix + "memset", IRB.getInt8PtrTy(),
- IRB.getInt8PtrTy(), IRB.getInt32Ty(), IntptrTy));
-
- AsanHandleNoReturnFunc = checkSanitizerInterfaceFunction(
- M.getOrInsertFunction(kAsanHandleNoReturnName, IRB.getVoidTy()));
-
- AsanPtrCmpFunction = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- kAsanPtrCmp, IRB.getVoidTy(), IntptrTy, IntptrTy));
- AsanPtrSubFunction = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- kAsanPtrSub, IRB.getVoidTy(), IntptrTy, IntptrTy));
+ AsanMemmove = M.getOrInsertFunction(MemIntrinCallbackPrefix + "memmove",
+ IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
+ IRB.getInt8PtrTy(), IntptrTy);
+ AsanMemcpy = M.getOrInsertFunction(MemIntrinCallbackPrefix + "memcpy",
+ IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
+ IRB.getInt8PtrTy(), IntptrTy);
+ AsanMemset = M.getOrInsertFunction(MemIntrinCallbackPrefix + "memset",
+ IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
+ IRB.getInt32Ty(), IntptrTy);
+
+ AsanHandleNoReturnFunc =
+ M.getOrInsertFunction(kAsanHandleNoReturnName, IRB.getVoidTy());
+
+ AsanPtrCmpFunction =
+ M.getOrInsertFunction(kAsanPtrCmp, IRB.getVoidTy(), IntptrTy, IntptrTy);
+ AsanPtrSubFunction =
+ M.getOrInsertFunction(kAsanPtrSub, IRB.getVoidTy(), IntptrTy, IntptrTy);
// We insert an empty inline asm after __asan_report* to avoid callback merge.
EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false),
StringRef(""), StringRef(""),
@@ -2400,25 +2514,6 @@ void AddressSanitizer::initializeCallbacks(Module &M) {
ArrayType::get(IRB.getInt8Ty(), 0));
}
-// virtual
-bool AddressSanitizer::doInitialization(Module &M) {
- // Initialize the private fields. No one has accessed them before.
- GlobalsMD.init(M);
-
- C = &(M.getContext());
- LongSize = M.getDataLayout().getPointerSizeInBits();
- IntptrTy = Type::getIntNTy(*C, LongSize);
- TargetTriple = Triple(M.getTargetTriple());
-
- Mapping = getShadowMapping(TargetTriple, LongSize, CompileKernel);
- return true;
-}
-
-bool AddressSanitizer::doFinalization(Module &M) {
- GlobalsMD.reset();
- return false;
-}
-
bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) {
// For each NSObject descendant having a +load method, this method is invoked
// by the ObjC runtime before any of the static constructors is called.
@@ -2428,7 +2523,7 @@ bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) {
// We cannot just ignore these methods, because they may call other
// instrumented functions.
if (F.getName().find(" load]") != std::string::npos) {
- Function *AsanInitFunction =
+ FunctionCallee AsanInitFunction =
declareSanitizerInitFunction(*F.getParent(), kAsanInitName, {});
IRBuilder<> IRB(&F.front(), F.front().begin());
IRB.CreateCall(AsanInitFunction, {});
@@ -2460,7 +2555,7 @@ void AddressSanitizer::maybeInsertDynamicShadowAtFunctionEntry(Function &F) {
} else {
Value *GlobalDynamicAddress = F.getParent()->getOrInsertGlobal(
kAsanShadowMemoryDynamicAddress, IntptrTy);
- LocalDynamicShadow = IRB.CreateLoad(GlobalDynamicAddress);
+ LocalDynamicShadow = IRB.CreateLoad(IntptrTy, GlobalDynamicAddress);
}
}
@@ -2492,7 +2587,8 @@ void AddressSanitizer::markEscapedLocalAllocas(Function &F) {
}
}
-bool AddressSanitizer::runOnFunction(Function &F) {
+bool AddressSanitizer::instrumentFunction(Function &F,
+ const TargetLibraryInfo *TLI) {
if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) return false;
if (!ClDebugFunc.empty() && ClDebugFunc == F.getName()) return false;
if (F.getName().startswith("__asan_")) return false;
@@ -2511,7 +2607,6 @@ bool AddressSanitizer::runOnFunction(Function &F) {
LLVM_DEBUG(dbgs() << "ASAN instrumenting:\n" << F << "\n");
initializeCallbacks(*F.getParent());
- DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
FunctionStateRAII CleanupObj(this);
@@ -2532,8 +2627,6 @@ bool AddressSanitizer::runOnFunction(Function &F) {
bool IsWrite;
unsigned Alignment;
uint64_t TypeSize;
- const TargetLibraryInfo *TLI =
- &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
// Fill the set of memory operations to instrument.
for (auto &BB : F) {
@@ -2557,8 +2650,10 @@ bool AddressSanitizer::runOnFunction(Function &F) {
continue; // We've seen this temp in the current BB.
}
}
- } else if (ClInvalidPointerPairs &&
- isInterestingPointerComparisonOrSubtraction(&Inst)) {
+ } else if (((ClInvalidPointerPairs || ClInvalidPointerCmp) &&
+ isInterestingPointerComparison(&Inst)) ||
+ ((ClInvalidPointerPairs || ClInvalidPointerSub) &&
+ isInterestingPointerSubtraction(&Inst))) {
PointerComparisonsOrSubtracts.push_back(&Inst);
continue;
} else if (isa<MemIntrinsic>(Inst)) {
@@ -2569,7 +2664,8 @@ bool AddressSanitizer::runOnFunction(Function &F) {
if (CS) {
// A call inside BB.
TempsToInstrument.clear();
- if (CS.doesNotReturn()) NoReturnCalls.push_back(CS.getInstruction());
+ if (CS.doesNotReturn() && !CS->getMetadata("nosanitize"))
+ NoReturnCalls.push_back(CS.getInstruction());
}
if (CallInst *CI = dyn_cast<CallInst>(&Inst))
maybeMarkSanitizerLibraryCallNoBuiltin(CI, TLI);
@@ -2606,7 +2702,7 @@ bool AddressSanitizer::runOnFunction(Function &F) {
FunctionStackPoisoner FSP(F, *this);
bool ChangedStack = FSP.runOnFunction();
- // We must unpoison the stack before every NoReturn call (throw, _exit, etc).
+ // We must unpoison the stack before NoReturn calls (throw, _exit, etc).
// See e.g. https://github.com/google/sanitizers/issues/37
for (auto CI : NoReturnCalls) {
IRBuilder<> IRB(CI);
@@ -2643,20 +2739,17 @@ void FunctionStackPoisoner::initializeCallbacks(Module &M) {
IRBuilder<> IRB(*C);
for (int i = 0; i <= kMaxAsanStackMallocSizeClass; i++) {
std::string Suffix = itostr(i);
- AsanStackMallocFunc[i] = checkSanitizerInterfaceFunction(
- M.getOrInsertFunction(kAsanStackMallocNameTemplate + Suffix, IntptrTy,
- IntptrTy));
- AsanStackFreeFunc[i] = checkSanitizerInterfaceFunction(
+ AsanStackMallocFunc[i] = M.getOrInsertFunction(
+ kAsanStackMallocNameTemplate + Suffix, IntptrTy, IntptrTy);
+ AsanStackFreeFunc[i] =
M.getOrInsertFunction(kAsanStackFreeNameTemplate + Suffix,
- IRB.getVoidTy(), IntptrTy, IntptrTy));
+ IRB.getVoidTy(), IntptrTy, IntptrTy);
}
if (ASan.UseAfterScope) {
- AsanPoisonStackMemoryFunc = checkSanitizerInterfaceFunction(
- M.getOrInsertFunction(kAsanPoisonStackMemoryName, IRB.getVoidTy(),
- IntptrTy, IntptrTy));
- AsanUnpoisonStackMemoryFunc = checkSanitizerInterfaceFunction(
- M.getOrInsertFunction(kAsanUnpoisonStackMemoryName, IRB.getVoidTy(),
- IntptrTy, IntptrTy));
+ AsanPoisonStackMemoryFunc = M.getOrInsertFunction(
+ kAsanPoisonStackMemoryName, IRB.getVoidTy(), IntptrTy, IntptrTy);
+ AsanUnpoisonStackMemoryFunc = M.getOrInsertFunction(
+ kAsanUnpoisonStackMemoryName, IRB.getVoidTy(), IntptrTy, IntptrTy);
}
for (size_t Val : {0x00, 0xf1, 0xf2, 0xf3, 0xf5, 0xf8}) {
@@ -2664,15 +2757,13 @@ void FunctionStackPoisoner::initializeCallbacks(Module &M) {
Name << kAsanSetShadowPrefix;
Name << std::setw(2) << std::setfill('0') << std::hex << Val;
AsanSetShadowFunc[Val] =
- checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- Name.str(), IRB.getVoidTy(), IntptrTy, IntptrTy));
+ M.getOrInsertFunction(Name.str(), IRB.getVoidTy(), IntptrTy, IntptrTy);
}
- AsanAllocaPoisonFunc = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- kAsanAllocaPoison, IRB.getVoidTy(), IntptrTy, IntptrTy));
- AsanAllocasUnpoisonFunc =
- checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- kAsanAllocasUnpoison, IRB.getVoidTy(), IntptrTy, IntptrTy));
+ AsanAllocaPoisonFunc = M.getOrInsertFunction(
+ kAsanAllocaPoison, IRB.getVoidTy(), IntptrTy, IntptrTy);
+ AsanAllocasUnpoisonFunc = M.getOrInsertFunction(
+ kAsanAllocasUnpoison, IRB.getVoidTy(), IntptrTy, IntptrTy);
}
void FunctionStackPoisoner::copyToShadowInline(ArrayRef<uint8_t> ShadowMask,
@@ -2958,7 +3049,7 @@ void FunctionStackPoisoner::processStaticAllocas() {
Value *FakeStack;
Value *LocalStackBase;
Value *LocalStackBaseAlloca;
- bool Deref;
+ uint8_t DIExprFlags = DIExpression::ApplyOffset;
if (DoStackMalloc) {
LocalStackBaseAlloca =
@@ -2969,9 +3060,9 @@ void FunctionStackPoisoner::processStaticAllocas() {
// void *LocalStackBase = (FakeStack) ? FakeStack : alloca(LocalStackSize);
Constant *OptionDetectUseAfterReturn = F.getParent()->getOrInsertGlobal(
kAsanOptionDetectUseAfterReturn, IRB.getInt32Ty());
- Value *UseAfterReturnIsEnabled =
- IRB.CreateICmpNE(IRB.CreateLoad(OptionDetectUseAfterReturn),
- Constant::getNullValue(IRB.getInt32Ty()));
+ Value *UseAfterReturnIsEnabled = IRB.CreateICmpNE(
+ IRB.CreateLoad(IRB.getInt32Ty(), OptionDetectUseAfterReturn),
+ Constant::getNullValue(IRB.getInt32Ty()));
Instruction *Term =
SplitBlockAndInsertIfThen(UseAfterReturnIsEnabled, InsBefore, false);
IRBuilder<> IRBIf(Term);
@@ -2999,7 +3090,7 @@ void FunctionStackPoisoner::processStaticAllocas() {
LocalStackBase = createPHI(IRB, NoFakeStack, AllocaValue, Term, FakeStack);
IRB.SetCurrentDebugLocation(EntryDebugLocation);
IRB.CreateStore(LocalStackBase, LocalStackBaseAlloca);
- Deref = true;
+ DIExprFlags |= DIExpression::DerefBefore;
} else {
// void *FakeStack = nullptr;
// void *LocalStackBase = alloca(LocalStackSize);
@@ -3007,14 +3098,13 @@ void FunctionStackPoisoner::processStaticAllocas() {
LocalStackBase =
DoDynamicAlloca ? createAllocaForLayout(IRB, L, true) : StaticAlloca;
LocalStackBaseAlloca = LocalStackBase;
- Deref = false;
}
// Replace Alloca instructions with base+offset.
for (const auto &Desc : SVD) {
AllocaInst *AI = Desc.AI;
- replaceDbgDeclareForAlloca(AI, LocalStackBaseAlloca, DIB, Deref,
- Desc.Offset, DIExpression::NoDeref);
+ replaceDbgDeclareForAlloca(AI, LocalStackBaseAlloca, DIB, DIExprFlags,
+ Desc.Offset);
Value *NewAllocaPtr = IRB.CreateIntToPtr(
IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy, Desc.Offset)),
AI->getType());
@@ -3105,7 +3195,7 @@ void FunctionStackPoisoner::processStaticAllocas() {
FakeStack,
ConstantInt::get(IntptrTy, ClassSize - ASan.LongSize / 8));
Value *SavedFlagPtr = IRBPoison.CreateLoad(
- IRBPoison.CreateIntToPtr(SavedFlagPtrPtr, IntptrPtrTy));
+ IntptrTy, IRBPoison.CreateIntToPtr(SavedFlagPtrPtr, IntptrPtrTy));
IRBPoison.CreateStore(
Constant::getNullValue(IRBPoison.getInt8Ty()),
IRBPoison.CreateIntToPtr(SavedFlagPtr, IRBPoison.getInt8PtrTy()));
@@ -3145,41 +3235,6 @@ void FunctionStackPoisoner::poisonAlloca(Value *V, uint64_t Size,
// variable may go in and out of scope several times, e.g. in loops).
// (3) if we poisoned at least one %alloca in a function,
// unpoison the whole stack frame at function exit.
-
-AllocaInst *FunctionStackPoisoner::findAllocaForValue(Value *V) {
- if (AllocaInst *AI = dyn_cast<AllocaInst>(V))
- // We're interested only in allocas we can handle.
- return ASan.isInterestingAlloca(*AI) ? AI : nullptr;
- // See if we've already calculated (or started to calculate) alloca for a
- // given value.
- AllocaForValueMapTy::iterator I = AllocaForValue.find(V);
- if (I != AllocaForValue.end()) return I->second;
- // Store 0 while we're calculating alloca for value V to avoid
- // infinite recursion if the value references itself.
- AllocaForValue[V] = nullptr;
- AllocaInst *Res = nullptr;
- if (CastInst *CI = dyn_cast<CastInst>(V))
- Res = findAllocaForValue(CI->getOperand(0));
- else if (PHINode *PN = dyn_cast<PHINode>(V)) {
- for (Value *IncValue : PN->incoming_values()) {
- // Allow self-referencing phi-nodes.
- if (IncValue == PN) continue;
- AllocaInst *IncValueAI = findAllocaForValue(IncValue);
- // AI for incoming values should exist and should all be equal.
- if (IncValueAI == nullptr || (Res != nullptr && IncValueAI != Res))
- return nullptr;
- Res = IncValueAI;
- }
- } else if (GetElementPtrInst *EP = dyn_cast<GetElementPtrInst>(V)) {
- Res = findAllocaForValue(EP->getPointerOperand());
- } else {
- LLVM_DEBUG(dbgs() << "Alloca search canceled on unknown instruction: " << *V
- << "\n");
- }
- if (Res) AllocaForValue[V] = Res;
- return Res;
-}
-
void FunctionStackPoisoner::handleDynamicAllocaCall(AllocaInst *AI) {
IRBuilder<> IRB(AI);
diff --git a/lib/Transforms/Instrumentation/BoundsChecking.cpp b/lib/Transforms/Instrumentation/BoundsChecking.cpp
index a0c78e0468c6..4dc9b611c156 100644
--- a/lib/Transforms/Instrumentation/BoundsChecking.cpp
+++ b/lib/Transforms/Instrumentation/BoundsChecking.cpp
@@ -1,9 +1,8 @@
//===- BoundsChecking.cpp - Instrumentation for run-time bounds checking --===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -143,8 +142,9 @@ static void insertBoundsCheck(Value *Or, BuilderTy IRB, GetTrapBBT GetTrapBB) {
static bool addBoundsChecking(Function &F, TargetLibraryInfo &TLI,
ScalarEvolution &SE) {
const DataLayout &DL = F.getParent()->getDataLayout();
- ObjectSizeOffsetEvaluator ObjSizeEval(DL, &TLI, F.getContext(),
- /*RoundToAlign=*/true);
+ ObjectSizeOpts EvalOpts;
+ EvalOpts.RoundToAlign = true;
+ ObjectSizeOffsetEvaluator ObjSizeEval(DL, &TLI, F.getContext(), EvalOpts);
// check HANDLE_MEMORY_INST in include/llvm/Instruction.def for memory
// touching instructions
diff --git a/lib/Transforms/Instrumentation/CFGMST.h b/lib/Transforms/Instrumentation/CFGMST.h
index e178ef386e68..971e00041762 100644
--- a/lib/Transforms/Instrumentation/CFGMST.h
+++ b/lib/Transforms/Instrumentation/CFGMST.h
@@ -1,9 +1,8 @@
//===-- CFGMST.h - Minimum Spanning Tree for CFG ----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -196,11 +195,10 @@ public:
// Sort CFG edges based on its weight.
void sortEdgesByWeight() {
- std::stable_sort(AllEdges.begin(), AllEdges.end(),
- [](const std::unique_ptr<Edge> &Edge1,
- const std::unique_ptr<Edge> &Edge2) {
- return Edge1->Weight > Edge2->Weight;
- });
+ llvm::stable_sort(AllEdges, [](const std::unique_ptr<Edge> &Edge1,
+ const std::unique_ptr<Edge> &Edge2) {
+ return Edge1->Weight > Edge2->Weight;
+ });
}
// Traverse all the edges and compute the Minimum Weight Spanning Tree
diff --git a/lib/Transforms/Instrumentation/CGProfile.cpp b/lib/Transforms/Instrumentation/CGProfile.cpp
index cdcd01726906..358abab3cceb 100644
--- a/lib/Transforms/Instrumentation/CGProfile.cpp
+++ b/lib/Transforms/Instrumentation/CGProfile.cpp
@@ -1,9 +1,8 @@
//===-- CGProfile.cpp -----------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Transforms/Instrumentation/ControlHeightReduction.cpp b/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
index 1ada0b713092..3f4f9bc7145d 100644
--- a/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
+++ b/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
@@ -1,9 +1,8 @@
//===-- ControlHeightReduction.cpp - Control Height Reduction -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -547,19 +546,25 @@ static std::set<Value *> getBaseValues(Value *V,
static bool
checkHoistValue(Value *V, Instruction *InsertPoint, DominatorTree &DT,
DenseSet<Instruction *> &Unhoistables,
- DenseSet<Instruction *> *HoistStops) {
+ DenseSet<Instruction *> *HoistStops,
+ DenseMap<Instruction *, bool> &Visited) {
assert(InsertPoint && "Null InsertPoint");
if (auto *I = dyn_cast<Instruction>(V)) {
+ if (Visited.count(I)) {
+ return Visited[I];
+ }
assert(DT.getNode(I->getParent()) && "DT must contain I's parent block");
assert(DT.getNode(InsertPoint->getParent()) && "DT must contain Destination");
if (Unhoistables.count(I)) {
// Don't hoist if they are not to be hoisted.
+ Visited[I] = false;
return false;
}
if (DT.dominates(I, InsertPoint)) {
// We are already above the insert point. Stop here.
if (HoistStops)
HoistStops->insert(I);
+ Visited[I] = true;
return true;
}
// We aren't not above the insert point, check if we can hoist it above the
@@ -569,7 +574,8 @@ checkHoistValue(Value *V, Instruction *InsertPoint, DominatorTree &DT,
DenseSet<Instruction *> OpsHoistStops;
bool AllOpsHoisted = true;
for (Value *Op : I->operands()) {
- if (!checkHoistValue(Op, InsertPoint, DT, Unhoistables, &OpsHoistStops)) {
+ if (!checkHoistValue(Op, InsertPoint, DT, Unhoistables, &OpsHoistStops,
+ Visited)) {
AllOpsHoisted = false;
break;
}
@@ -578,9 +584,11 @@ checkHoistValue(Value *V, Instruction *InsertPoint, DominatorTree &DT,
CHR_DEBUG(dbgs() << "checkHoistValue " << *I << "\n");
if (HoistStops)
HoistStops->insert(OpsHoistStops.begin(), OpsHoistStops.end());
+ Visited[I] = true;
return true;
}
}
+ Visited[I] = false;
return false;
}
// Non-instructions are considered hoistable.
@@ -893,8 +901,9 @@ void CHR::checkScopeHoistable(CHRScope *Scope) {
++it;
continue;
}
+ DenseMap<Instruction *, bool> Visited;
bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint,
- DT, Unhoistables, nullptr);
+ DT, Unhoistables, nullptr, Visited);
if (!IsHoistable) {
CHR_DEBUG(dbgs() << "Dropping select " << *SI << "\n");
ORE.emit([&]() {
@@ -913,8 +922,9 @@ void CHR::checkScopeHoistable(CHRScope *Scope) {
InsertPoint = getBranchInsertPoint(RI);
CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
if (RI.HasBranch && InsertPoint != Branch) {
+ DenseMap<Instruction *, bool> Visited;
bool IsHoistable = checkHoistValue(Branch->getCondition(), InsertPoint,
- DT, Unhoistables, nullptr);
+ DT, Unhoistables, nullptr, Visited);
if (!IsHoistable) {
// If the branch isn't hoistable, drop the selects in the entry
// block, preferring the branch, which makes the branch the hoist
@@ -945,15 +955,17 @@ void CHR::checkScopeHoistable(CHRScope *Scope) {
if (RI.HasBranch) {
assert(!DT.dominates(Branch, InsertPoint) &&
"Branch can't be already above the hoist point");
+ DenseMap<Instruction *, bool> Visited;
assert(checkHoistValue(Branch->getCondition(), InsertPoint,
- DT, Unhoistables, nullptr) &&
+ DT, Unhoistables, nullptr, Visited) &&
"checkHoistValue for branch");
}
for (auto *SI : Selects) {
assert(!DT.dominates(SI, InsertPoint) &&
"SI can't be already above the hoist point");
+ DenseMap<Instruction *, bool> Visited;
assert(checkHoistValue(SI->getCondition(), InsertPoint, DT,
- Unhoistables, nullptr) &&
+ Unhoistables, nullptr, Visited) &&
"checkHoistValue for selects");
}
CHR_DEBUG(dbgs() << "Result\n");
@@ -1054,7 +1066,8 @@ static bool shouldSplit(Instruction *InsertPoint,
assert(InsertPoint && "Null InsertPoint");
// If any of Bases isn't hoistable to the hoist point, split.
for (Value *V : ConditionValues) {
- if (!checkHoistValue(V, InsertPoint, DT, Unhoistables, nullptr)) {
+ DenseMap<Instruction *, bool> Visited;
+ if (!checkHoistValue(V, InsertPoint, DT, Unhoistables, nullptr, Visited)) {
CHR_DEBUG(dbgs() << "Split. checkHoistValue false " << *V << "\n");
return true; // Not hoistable, split.
}
@@ -1383,8 +1396,9 @@ void CHR::setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope) {
"Must be truthy or falsy");
auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
// Note checkHoistValue fills in HoistStops.
+ DenseMap<Instruction *, bool> Visited;
bool IsHoistable = checkHoistValue(BI->getCondition(), InsertPoint, DT,
- Unhoistables, &HoistStops);
+ Unhoistables, &HoistStops, Visited);
assert(IsHoistable && "Must be hoistable");
(void)(IsHoistable); // Unused in release build
IsHoisted = true;
@@ -1394,8 +1408,9 @@ void CHR::setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope) {
OutermostScope->FalseBiasedSelects.count(SI) > 0) &&
"Must be true or false biased");
// Note checkHoistValue fills in HoistStops.
+ DenseMap<Instruction *, bool> Visited;
bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, DT,
- Unhoistables, &HoistStops);
+ Unhoistables, &HoistStops, Visited);
assert(IsHoistable && "Must be hoistable");
(void)(IsHoistable); // Unused in release build
IsHoisted = true;
@@ -1417,7 +1432,7 @@ void CHR::sortScopes(SmallVectorImpl<CHRScope *> &Input,
SmallVectorImpl<CHRScope *> &Output) {
Output.resize(Input.size());
llvm::copy(Input, Output.begin());
- std::stable_sort(Output.begin(), Output.end(), CHRScopeSorter);
+ llvm::stable_sort(Output, CHRScopeSorter);
}
// Return true if V is already hoisted or was hoisted (along with its operands)
@@ -1425,7 +1440,8 @@ void CHR::sortScopes(SmallVectorImpl<CHRScope *> &Input,
static void hoistValue(Value *V, Instruction *HoistPoint, Region *R,
HoistStopMapTy &HoistStopMap,
DenseSet<Instruction *> &HoistedSet,
- DenseSet<PHINode *> &TrivialPHIs) {
+ DenseSet<PHINode *> &TrivialPHIs,
+ DominatorTree &DT) {
auto IT = HoistStopMap.find(R);
assert(IT != HoistStopMap.end() && "Region must be in hoist stop map");
DenseSet<Instruction *> &HoistStops = IT->second;
@@ -1445,8 +1461,21 @@ static void hoistValue(Value *V, Instruction *HoistPoint, Region *R,
// Already hoisted, return.
return;
assert(isHoistableInstructionType(I) && "Unhoistable instruction type");
+ assert(DT.getNode(I->getParent()) && "DT must contain I's block");
+ assert(DT.getNode(HoistPoint->getParent()) &&
+ "DT must contain HoistPoint block");
+ if (DT.dominates(I, HoistPoint))
+ // We are already above the hoist point. Stop here. This may be necessary
+ // when multiple scopes would independently hoist the same
+ // instruction. Since an outer (dominating) scope would hoist it to its
+ // entry before an inner (dominated) scope would to its entry, the inner
+ // scope may see the instruction already hoisted, in which case it
+ // potentially wrong for the inner scope to hoist it and could cause bad
+ // IR (non-dominating def), but safe to skip hoisting it instead because
+ // it's already in a block that dominates the inner scope.
+ return;
for (Value *Op : I->operands()) {
- hoistValue(Op, HoistPoint, R, HoistStopMap, HoistedSet, TrivialPHIs);
+ hoistValue(Op, HoistPoint, R, HoistStopMap, HoistedSet, TrivialPHIs, DT);
}
I->moveBefore(HoistPoint);
HoistedSet.insert(I);
@@ -1457,7 +1486,8 @@ static void hoistValue(Value *V, Instruction *HoistPoint, Region *R,
// Hoist the dependent condition values of the branches and the selects in the
// scope to the insert point.
static void hoistScopeConditions(CHRScope *Scope, Instruction *HoistPoint,
- DenseSet<PHINode *> &TrivialPHIs) {
+ DenseSet<PHINode *> &TrivialPHIs,
+ DominatorTree &DT) {
DenseSet<Instruction *> HoistedSet;
for (const RegInfo &RI : Scope->CHRRegions) {
Region *R = RI.R;
@@ -1466,7 +1496,7 @@ static void hoistScopeConditions(CHRScope *Scope, Instruction *HoistPoint,
if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) {
auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
hoistValue(BI->getCondition(), HoistPoint, R, Scope->HoistStopMap,
- HoistedSet, TrivialPHIs);
+ HoistedSet, TrivialPHIs, DT);
}
for (SelectInst *SI : RI.Selects) {
bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
@@ -1474,7 +1504,7 @@ static void hoistScopeConditions(CHRScope *Scope, Instruction *HoistPoint,
if (!(IsTrueBiased || IsFalseBiased))
continue;
hoistValue(SI->getCondition(), HoistPoint, R, Scope->HoistStopMap,
- HoistedSet, TrivialPHIs);
+ HoistedSet, TrivialPHIs, DT);
}
}
}
@@ -1708,7 +1738,7 @@ void CHR::transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs) {
#endif
// Hoist the conditional values of the branches/selects.
- hoistScopeConditions(Scope, PreEntryBlock->getTerminator(), TrivialPHIs);
+ hoistScopeConditions(Scope, PreEntryBlock->getTerminator(), TrivialPHIs, DT);
#ifndef NDEBUG
assertBranchOrSelectConditionHoisted(Scope, PreEntryBlock);
diff --git a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
index 4c3c6c9added..2279c1bcb6a8 100644
--- a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
@@ -1,9 +1,8 @@
//===- DataFlowSanitizer.cpp - dynamic data flow analysis -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -333,6 +332,8 @@ class DataFlowSanitizer : public ModulePass {
Constant *RetvalTLS;
void *(*GetArgTLSPtr)();
void *(*GetRetvalTLSPtr)();
+ FunctionType *GetArgTLSTy;
+ FunctionType *GetRetvalTLSTy;
Constant *GetArgTLS;
Constant *GetRetvalTLS;
Constant *ExternalShadowMask;
@@ -342,13 +343,13 @@ class DataFlowSanitizer : public ModulePass {
FunctionType *DFSanSetLabelFnTy;
FunctionType *DFSanNonzeroLabelFnTy;
FunctionType *DFSanVarargWrapperFnTy;
- Constant *DFSanUnionFn;
- Constant *DFSanCheckedUnionFn;
- Constant *DFSanUnionLoadFn;
- Constant *DFSanUnimplementedFn;
- Constant *DFSanSetLabelFn;
- Constant *DFSanNonzeroLabelFn;
- Constant *DFSanVarargWrapperFn;
+ FunctionCallee DFSanUnionFn;
+ FunctionCallee DFSanCheckedUnionFn;
+ FunctionCallee DFSanUnionLoadFn;
+ FunctionCallee DFSanUnimplementedFn;
+ FunctionCallee DFSanSetLabelFn;
+ FunctionCallee DFSanNonzeroLabelFn;
+ FunctionCallee DFSanVarargWrapperFn;
MDNode *ColdCallWeights;
DFSanABIList ABIList;
DenseMap<Value *, Function *> UnwrappedFnMap;
@@ -436,6 +437,7 @@ public:
}
void visitOperandShadowInst(Instruction &I);
+ void visitUnaryOperator(UnaryOperator &UO);
void visitBinaryOperator(BinaryOperator &BO);
void visitCastInst(CastInst &CI);
void visitCmpInst(CmpInst &CI);
@@ -581,17 +583,17 @@ bool DataFlowSanitizer::doInitialization(Module &M) {
if (GetArgTLSPtr) {
Type *ArgTLSTy = ArrayType::get(ShadowTy, 64);
ArgTLS = nullptr;
+ GetArgTLSTy = FunctionType::get(PointerType::getUnqual(ArgTLSTy), false);
GetArgTLS = ConstantExpr::getIntToPtr(
ConstantInt::get(IntptrTy, uintptr_t(GetArgTLSPtr)),
- PointerType::getUnqual(
- FunctionType::get(PointerType::getUnqual(ArgTLSTy), false)));
+ PointerType::getUnqual(GetArgTLSTy));
}
if (GetRetvalTLSPtr) {
RetvalTLS = nullptr;
+ GetRetvalTLSTy = FunctionType::get(PointerType::getUnqual(ShadowTy), false);
GetRetvalTLS = ConstantExpr::getIntToPtr(
ConstantInt::get(IntptrTy, uintptr_t(GetRetvalTLSPtr)),
- PointerType::getUnqual(
- FunctionType::get(PointerType::getUnqual(ShadowTy), false)));
+ PointerType::getUnqual(GetRetvalTLSTy));
}
ColdCallWeights = MDBuilder(*Ctx).createBranchWeights(1, 1000);
@@ -678,8 +680,8 @@ DataFlowSanitizer::buildWrapperFunction(Function *F, StringRef NewFName,
Constant *DataFlowSanitizer::getOrBuildTrampolineFunction(FunctionType *FT,
StringRef FName) {
FunctionType *FTT = getTrampolineFunctionType(FT);
- Constant *C = Mod->getOrInsertFunction(FName, FTT);
- Function *F = dyn_cast<Function>(C);
+ FunctionCallee C = Mod->getOrInsertFunction(FName, FTT);
+ Function *F = dyn_cast<Function>(C.getCallee());
if (F && F->isDeclaration()) {
F->setLinkage(GlobalValue::LinkOnceODRLinkage);
BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", F);
@@ -687,7 +689,7 @@ Constant *DataFlowSanitizer::getOrBuildTrampolineFunction(FunctionType *FT,
Function::arg_iterator AI = F->arg_begin(); ++AI;
for (unsigned N = FT->getNumParams(); N != 0; ++AI, --N)
Args.push_back(&*AI);
- CallInst *CI = CallInst::Create(&*F->arg_begin(), Args, "", BB);
+ CallInst *CI = CallInst::Create(FT, &*F->arg_begin(), Args, "", BB);
ReturnInst *RI;
if (FT->getReturnType()->isVoidTy())
RI = ReturnInst::Create(*Ctx, BB);
@@ -704,7 +706,7 @@ Constant *DataFlowSanitizer::getOrBuildTrampolineFunction(FunctionType *FT,
&*std::prev(F->arg_end()), RI);
}
- return C;
+ return cast<Constant>(C.getCallee());
}
bool DataFlowSanitizer::runOnModule(Module &M) {
@@ -726,35 +728,51 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
ExternalShadowMask =
Mod->getOrInsertGlobal(kDFSanExternShadowPtrMask, IntptrTy);
- DFSanUnionFn = Mod->getOrInsertFunction("__dfsan_union", DFSanUnionFnTy);
- if (Function *F = dyn_cast<Function>(DFSanUnionFn)) {
- F->addAttribute(AttributeList::FunctionIndex, Attribute::NoUnwind);
- F->addAttribute(AttributeList::FunctionIndex, Attribute::ReadNone);
- F->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt);
- F->addParamAttr(0, Attribute::ZExt);
- F->addParamAttr(1, Attribute::ZExt);
+ {
+ AttributeList AL;
+ AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex,
+ Attribute::NoUnwind);
+ AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex,
+ Attribute::ReadNone);
+ AL = AL.addAttribute(M.getContext(), AttributeList::ReturnIndex,
+ Attribute::ZExt);
+ AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
+ AL = AL.addParamAttribute(M.getContext(), 1, Attribute::ZExt);
+ DFSanUnionFn =
+ Mod->getOrInsertFunction("__dfsan_union", DFSanUnionFnTy, AL);
}
- DFSanCheckedUnionFn = Mod->getOrInsertFunction("dfsan_union", DFSanUnionFnTy);
- if (Function *F = dyn_cast<Function>(DFSanCheckedUnionFn)) {
- F->addAttribute(AttributeList::FunctionIndex, Attribute::NoUnwind);
- F->addAttribute(AttributeList::FunctionIndex, Attribute::ReadNone);
- F->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt);
- F->addParamAttr(0, Attribute::ZExt);
- F->addParamAttr(1, Attribute::ZExt);
+
+ {
+ AttributeList AL;
+ AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex,
+ Attribute::NoUnwind);
+ AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex,
+ Attribute::ReadNone);
+ AL = AL.addAttribute(M.getContext(), AttributeList::ReturnIndex,
+ Attribute::ZExt);
+ AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
+ AL = AL.addParamAttribute(M.getContext(), 1, Attribute::ZExt);
+ DFSanCheckedUnionFn =
+ Mod->getOrInsertFunction("dfsan_union", DFSanUnionFnTy, AL);
}
- DFSanUnionLoadFn =
- Mod->getOrInsertFunction("__dfsan_union_load", DFSanUnionLoadFnTy);
- if (Function *F = dyn_cast<Function>(DFSanUnionLoadFn)) {
- F->addAttribute(AttributeList::FunctionIndex, Attribute::NoUnwind);
- F->addAttribute(AttributeList::FunctionIndex, Attribute::ReadOnly);
- F->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt);
+ {
+ AttributeList AL;
+ AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex,
+ Attribute::NoUnwind);
+ AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex,
+ Attribute::ReadOnly);
+ AL = AL.addAttribute(M.getContext(), AttributeList::ReturnIndex,
+ Attribute::ZExt);
+ DFSanUnionLoadFn =
+ Mod->getOrInsertFunction("__dfsan_union_load", DFSanUnionLoadFnTy, AL);
}
DFSanUnimplementedFn =
Mod->getOrInsertFunction("__dfsan_unimplemented", DFSanUnimplementedFnTy);
- DFSanSetLabelFn =
- Mod->getOrInsertFunction("__dfsan_set_label", DFSanSetLabelFnTy);
- if (Function *F = dyn_cast<Function>(DFSanSetLabelFn)) {
- F->addParamAttr(0, Attribute::ZExt);
+ {
+ AttributeList AL;
+ AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
+ DFSanSetLabelFn =
+ Mod->getOrInsertFunction("__dfsan_set_label", DFSanSetLabelFnTy, AL);
}
DFSanNonzeroLabelFn =
Mod->getOrInsertFunction("__dfsan_nonzero_label", DFSanNonzeroLabelFnTy);
@@ -765,13 +783,13 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
SmallPtrSet<Function *, 2> FnsWithNativeABI;
for (Function &i : M) {
if (!i.isIntrinsic() &&
- &i != DFSanUnionFn &&
- &i != DFSanCheckedUnionFn &&
- &i != DFSanUnionLoadFn &&
- &i != DFSanUnimplementedFn &&
- &i != DFSanSetLabelFn &&
- &i != DFSanNonzeroLabelFn &&
- &i != DFSanVarargWrapperFn)
+ &i != DFSanUnionFn.getCallee()->stripPointerCasts() &&
+ &i != DFSanCheckedUnionFn.getCallee()->stripPointerCasts() &&
+ &i != DFSanUnionLoadFn.getCallee()->stripPointerCasts() &&
+ &i != DFSanUnimplementedFn.getCallee()->stripPointerCasts() &&
+ &i != DFSanSetLabelFn.getCallee()->stripPointerCasts() &&
+ &i != DFSanNonzeroLabelFn.getCallee()->stripPointerCasts() &&
+ &i != DFSanVarargWrapperFn.getCallee()->stripPointerCasts())
FnsToInstrument.push_back(&i);
}
@@ -982,7 +1000,7 @@ Value *DFSanFunction::getArgTLSPtr() {
return ArgTLSPtr = DFS.ArgTLS;
IRBuilder<> IRB(&F->getEntryBlock().front());
- return ArgTLSPtr = IRB.CreateCall(DFS.GetArgTLS, {});
+ return ArgTLSPtr = IRB.CreateCall(DFS.GetArgTLSTy, DFS.GetArgTLS, {});
}
Value *DFSanFunction::getRetvalTLS() {
@@ -992,12 +1010,14 @@ Value *DFSanFunction::getRetvalTLS() {
return RetvalTLSPtr = DFS.RetvalTLS;
IRBuilder<> IRB(&F->getEntryBlock().front());
- return RetvalTLSPtr = IRB.CreateCall(DFS.GetRetvalTLS, {});
+ return RetvalTLSPtr =
+ IRB.CreateCall(DFS.GetRetvalTLSTy, DFS.GetRetvalTLS, {});
}
Value *DFSanFunction::getArgTLS(unsigned Idx, Instruction *Pos) {
IRBuilder<> IRB(Pos);
- return IRB.CreateConstGEP2_64(getArgTLSPtr(), 0, Idx);
+ return IRB.CreateConstGEP2_64(ArrayType::get(DFS.ShadowTy, 64),
+ getArgTLSPtr(), 0, Idx);
}
Value *DFSanFunction::getShadow(Value *V) {
@@ -1015,7 +1035,8 @@ Value *DFSanFunction::getShadow(Value *V) {
DFS.ArgTLS ? &*F->getEntryBlock().begin()
: cast<Instruction>(ArgTLSPtr)->getNextNode();
IRBuilder<> IRB(ArgTLSPos);
- Shadow = IRB.CreateLoad(getArgTLS(A->getArgNo(), ArgTLSPos));
+ Shadow =
+ IRB.CreateLoad(DFS.ShadowTy, getArgTLS(A->getArgNo(), ArgTLSPos));
break;
}
case DataFlowSanitizer::IA_Args: {
@@ -1165,15 +1186,15 @@ Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align,
const auto i = AllocaShadowMap.find(AI);
if (i != AllocaShadowMap.end()) {
IRBuilder<> IRB(Pos);
- return IRB.CreateLoad(i->second);
+ return IRB.CreateLoad(DFS.ShadowTy, i->second);
}
}
uint64_t ShadowAlign = Align * DFS.ShadowWidth / 8;
- SmallVector<Value *, 2> Objs;
+ SmallVector<const Value *, 2> Objs;
GetUnderlyingObjects(Addr, Objs, Pos->getModule()->getDataLayout());
bool AllConstants = true;
- for (Value *Obj : Objs) {
+ for (const Value *Obj : Objs) {
if (isa<Function>(Obj) || isa<BlockAddress>(Obj))
continue;
if (isa<GlobalVariable>(Obj) && cast<GlobalVariable>(Obj)->isConstant())
@@ -1190,7 +1211,7 @@ Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align,
case 0:
return DFS.ZeroShadow;
case 1: {
- LoadInst *LI = new LoadInst(ShadowAddr, "", Pos);
+ LoadInst *LI = new LoadInst(DFS.ShadowTy, ShadowAddr, "", Pos);
LI->setAlignment(ShadowAlign);
return LI;
}
@@ -1198,8 +1219,9 @@ Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align,
IRBuilder<> IRB(Pos);
Value *ShadowAddr1 = IRB.CreateGEP(DFS.ShadowTy, ShadowAddr,
ConstantInt::get(DFS.IntptrTy, 1));
- return combineShadows(IRB.CreateAlignedLoad(ShadowAddr, ShadowAlign),
- IRB.CreateAlignedLoad(ShadowAddr1, ShadowAlign), Pos);
+ return combineShadows(
+ IRB.CreateAlignedLoad(DFS.ShadowTy, ShadowAddr, ShadowAlign),
+ IRB.CreateAlignedLoad(DFS.ShadowTy, ShadowAddr1, ShadowAlign), Pos);
}
}
if (!AvoidNewBlocks && Size % (64 / DFS.ShadowWidth) == 0) {
@@ -1218,7 +1240,8 @@ Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align,
IRBuilder<> IRB(Pos);
Value *WideAddr =
IRB.CreateBitCast(ShadowAddr, Type::getInt64PtrTy(*DFS.Ctx));
- Value *WideShadow = IRB.CreateAlignedLoad(WideAddr, ShadowAlign);
+ Value *WideShadow =
+ IRB.CreateAlignedLoad(IRB.getInt64Ty(), WideAddr, ShadowAlign);
Value *TruncShadow = IRB.CreateTrunc(WideShadow, DFS.ShadowTy);
Value *ShlShadow = IRB.CreateShl(WideShadow, DFS.ShadowWidth);
Value *ShrShadow = IRB.CreateLShr(WideShadow, 64 - DFS.ShadowWidth);
@@ -1251,7 +1274,8 @@ Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align,
IRBuilder<> NextIRB(NextBB);
WideAddr = NextIRB.CreateGEP(Type::getInt64Ty(*DFS.Ctx), WideAddr,
ConstantInt::get(DFS.IntptrTy, 1));
- Value *NextWideShadow = NextIRB.CreateAlignedLoad(WideAddr, ShadowAlign);
+ Value *NextWideShadow = NextIRB.CreateAlignedLoad(NextIRB.getInt64Ty(),
+ WideAddr, ShadowAlign);
ShadowsEq = NextIRB.CreateICmpEQ(WideShadow, NextWideShadow);
LastBr->setSuccessor(0, NextBB);
LastBr = NextIRB.CreateCondBr(ShadowsEq, FallbackBB, FallbackBB);
@@ -1375,6 +1399,10 @@ void DFSanVisitor::visitStoreInst(StoreInst &SI) {
DFSF.storeShadow(SI.getPointerOperand(), Size, Align, Shadow, &SI);
}
+void DFSanVisitor::visitUnaryOperator(UnaryOperator &UO) {
+ visitOperandShadowInst(UO);
+}
+
void DFSanVisitor::visitBinaryOperator(BinaryOperator &BO) {
visitOperandShadowInst(BO);
}
@@ -1470,7 +1498,7 @@ void DFSanVisitor::visitMemTransferInst(MemTransferInst &I) {
DestShadow = IRB.CreateBitCast(DestShadow, Int8Ptr);
SrcShadow = IRB.CreateBitCast(SrcShadow, Int8Ptr);
auto *MTI = cast<MemTransferInst>(
- IRB.CreateCall(I.getCalledValue(),
+ IRB.CreateCall(I.getFunctionType(), I.getCalledValue(),
{DestShadow, SrcShadow, LenShadow, I.getVolatileCst()}));
if (ClPreserveAlignment) {
MTI->setDestAlignment(I.getDestAlignment() * (DFSF.DFS.ShadowWidth / 8));
@@ -1513,7 +1541,7 @@ void DFSanVisitor::visitCallSite(CallSite CS) {
// Calls to this function are synthesized in wrappers, and we shouldn't
// instrument them.
- if (F == DFSF.DFS.DFSanVarargWrapperFn)
+ if (F == DFSF.DFS.DFSanVarargWrapperFn.getCallee()->stripPointerCasts())
return;
IRBuilder<> IRB(CS.getInstruction());
@@ -1546,9 +1574,9 @@ void DFSanVisitor::visitCallSite(CallSite CS) {
TransformedFunction CustomFn = DFSF.DFS.getCustomFunctionType(FT);
std::string CustomFName = "__dfsw_";
CustomFName += F->getName();
- Constant *CustomF = DFSF.DFS.Mod->getOrInsertFunction(
+ FunctionCallee CustomF = DFSF.DFS.Mod->getOrInsertFunction(
CustomFName, CustomFn.TransformedType);
- if (Function *CustomFn = dyn_cast<Function>(CustomF)) {
+ if (Function *CustomFn = dyn_cast<Function>(CustomF.getCallee())) {
CustomFn->copyAttributesFrom(F);
// Custom functions returning non-void will write to the return label.
@@ -1628,7 +1656,8 @@ void DFSanVisitor::visitCallSite(CallSite CS) {
}
if (!FT->getReturnType()->isVoidTy()) {
- LoadInst *LabelLoad = IRB.CreateLoad(DFSF.LabelReturnAlloca);
+ LoadInst *LabelLoad =
+ IRB.CreateLoad(DFSF.DFS.ShadowTy, DFSF.LabelReturnAlloca);
DFSF.setShadow(CustomCI, LabelLoad);
}
@@ -1666,7 +1695,7 @@ void DFSanVisitor::visitCallSite(CallSite CS) {
if (DFSF.DFS.getInstrumentedABI() == DataFlowSanitizer::IA_TLS) {
IRBuilder<> NextIRB(Next);
- LoadInst *LI = NextIRB.CreateLoad(DFSF.getRetvalTLS());
+ LoadInst *LI = NextIRB.CreateLoad(DFSF.DFS.ShadowTy, DFSF.getRetvalTLS());
DFSF.SkipInsts.insert(LI);
DFSF.setShadow(CS.getInstruction(), LI);
DFSF.NonZeroChecks.push_back(LI);
@@ -1706,10 +1735,10 @@ void DFSanVisitor::visitCallSite(CallSite CS) {
CallSite NewCS;
if (InvokeInst *II = dyn_cast<InvokeInst>(CS.getInstruction())) {
- NewCS = IRB.CreateInvoke(Func, II->getNormalDest(), II->getUnwindDest(),
- Args);
+ NewCS = IRB.CreateInvoke(NewFT, Func, II->getNormalDest(),
+ II->getUnwindDest(), Args);
} else {
- NewCS = IRB.CreateCall(Func, Args);
+ NewCS = IRB.CreateCall(NewFT, Func, Args);
}
NewCS.setCallingConv(CS.getCallingConv());
NewCS.setAttributes(CS.getAttributes().removeAttributes(
diff --git a/lib/Transforms/Instrumentation/EfficiencySanitizer.cpp b/lib/Transforms/Instrumentation/EfficiencySanitizer.cpp
deleted file mode 100644
index db438e78ded9..000000000000
--- a/lib/Transforms/Instrumentation/EfficiencySanitizer.cpp
+++ /dev/null
@@ -1,900 +0,0 @@
-//===-- EfficiencySanitizer.cpp - performance tuner -----------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file is a part of EfficiencySanitizer, a family of performance tuners
-// that detects multiple performance issues via separate sub-tools.
-//
-// The instrumentation phase is straightforward:
-// - Take action on every memory access: either inlined instrumentation,
-// or Inserted calls to our run-time library.
-// - Optimizations may apply to avoid instrumenting some of the accesses.
-// - Turn mem{set,cpy,move} instrinsics into library calls.
-// The rest is handled by the run-time library.
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/Type.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Instrumentation.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/ModuleUtils.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "esan"
-
-// The tool type must be just one of these ClTool* options, as the tools
-// cannot be combined due to shadow memory constraints.
-static cl::opt<bool>
- ClToolCacheFrag("esan-cache-frag", cl::init(false),
- cl::desc("Detect data cache fragmentation"), cl::Hidden);
-static cl::opt<bool>
- ClToolWorkingSet("esan-working-set", cl::init(false),
- cl::desc("Measure the working set size"), cl::Hidden);
-// Each new tool will get its own opt flag here.
-// These are converted to EfficiencySanitizerOptions for use
-// in the code.
-
-static cl::opt<bool> ClInstrumentLoadsAndStores(
- "esan-instrument-loads-and-stores", cl::init(true),
- cl::desc("Instrument loads and stores"), cl::Hidden);
-static cl::opt<bool> ClInstrumentMemIntrinsics(
- "esan-instrument-memintrinsics", cl::init(true),
- cl::desc("Instrument memintrinsics (memset/memcpy/memmove)"), cl::Hidden);
-static cl::opt<bool> ClInstrumentFastpath(
- "esan-instrument-fastpath", cl::init(true),
- cl::desc("Instrument fastpath"), cl::Hidden);
-static cl::opt<bool> ClAuxFieldInfo(
- "esan-aux-field-info", cl::init(true),
- cl::desc("Generate binary with auxiliary struct field information"),
- cl::Hidden);
-
-// Experiments show that the performance difference can be 2x or more,
-// and accuracy loss is typically negligible, so we turn this on by default.
-static cl::opt<bool> ClAssumeIntraCacheLine(
- "esan-assume-intra-cache-line", cl::init(true),
- cl::desc("Assume each memory access touches just one cache line, for "
- "better performance but with a potential loss of accuracy."),
- cl::Hidden);
-
-STATISTIC(NumInstrumentedLoads, "Number of instrumented loads");
-STATISTIC(NumInstrumentedStores, "Number of instrumented stores");
-STATISTIC(NumFastpaths, "Number of instrumented fastpaths");
-STATISTIC(NumAccessesWithIrregularSize,
- "Number of accesses with a size outside our targeted callout sizes");
-STATISTIC(NumIgnoredStructs, "Number of ignored structs");
-STATISTIC(NumIgnoredGEPs, "Number of ignored GEP instructions");
-STATISTIC(NumInstrumentedGEPs, "Number of instrumented GEP instructions");
-STATISTIC(NumAssumedIntraCacheLine,
- "Number of accesses assumed to be intra-cache-line");
-
-static const uint64_t EsanCtorAndDtorPriority = 0;
-static const char *const EsanModuleCtorName = "esan.module_ctor";
-static const char *const EsanModuleDtorName = "esan.module_dtor";
-static const char *const EsanInitName = "__esan_init";
-static const char *const EsanExitName = "__esan_exit";
-
-// We need to specify the tool to the runtime earlier than
-// the ctor is called in some cases, so we set a global variable.
-static const char *const EsanWhichToolName = "__esan_which_tool";
-
-// We must keep these Shadow* constants consistent with the esan runtime.
-// FIXME: Try to place these shadow constants, the names of the __esan_*
-// interface functions, and the ToolType enum into a header shared between
-// llvm and compiler-rt.
-struct ShadowMemoryParams {
- uint64_t ShadowMask;
- uint64_t ShadowOffs[3];
-};
-
-static const ShadowMemoryParams ShadowParams47 = {
- 0x00000fffffffffffull,
- {
- 0x0000130000000000ull, 0x0000220000000000ull, 0x0000440000000000ull,
- }};
-
-static const ShadowMemoryParams ShadowParams40 = {
- 0x0fffffffffull,
- {
- 0x1300000000ull, 0x2200000000ull, 0x4400000000ull,
- }};
-
-// This array is indexed by the ToolType enum.
-static const int ShadowScale[] = {
- 0, // ESAN_None.
- 2, // ESAN_CacheFrag: 4B:1B, so 4 to 1 == >>2.
- 6, // ESAN_WorkingSet: 64B:1B, so 64 to 1 == >>6.
-};
-
-// MaxStructCounterNameSize is a soft size limit to avoid insanely long
-// names for those extremely large structs.
-static const unsigned MaxStructCounterNameSize = 512;
-
-namespace {
-
-static EfficiencySanitizerOptions
-OverrideOptionsFromCL(EfficiencySanitizerOptions Options) {
- if (ClToolCacheFrag)
- Options.ToolType = EfficiencySanitizerOptions::ESAN_CacheFrag;
- else if (ClToolWorkingSet)
- Options.ToolType = EfficiencySanitizerOptions::ESAN_WorkingSet;
-
- // Direct opt invocation with no params will have the default ESAN_None.
- // We run the default tool in that case.
- if (Options.ToolType == EfficiencySanitizerOptions::ESAN_None)
- Options.ToolType = EfficiencySanitizerOptions::ESAN_CacheFrag;
-
- return Options;
-}
-
-/// EfficiencySanitizer: instrument each module to find performance issues.
-class EfficiencySanitizer : public ModulePass {
-public:
- EfficiencySanitizer(
- const EfficiencySanitizerOptions &Opts = EfficiencySanitizerOptions())
- : ModulePass(ID), Options(OverrideOptionsFromCL(Opts)) {}
- StringRef getPassName() const override;
- void getAnalysisUsage(AnalysisUsage &AU) const override;
- bool runOnModule(Module &M) override;
- static char ID;
-
-private:
- bool initOnModule(Module &M);
- void initializeCallbacks(Module &M);
- bool shouldIgnoreStructType(StructType *StructTy);
- void createStructCounterName(
- StructType *StructTy, SmallString<MaxStructCounterNameSize> &NameStr);
- void createCacheFragAuxGV(
- Module &M, const DataLayout &DL, StructType *StructTy,
- GlobalVariable *&TypeNames, GlobalVariable *&Offsets, GlobalVariable *&Size);
- GlobalVariable *createCacheFragInfoGV(Module &M, const DataLayout &DL,
- Constant *UnitName);
- Constant *createEsanInitToolInfoArg(Module &M, const DataLayout &DL);
- void createDestructor(Module &M, Constant *ToolInfoArg);
- bool runOnFunction(Function &F, Module &M);
- bool instrumentLoadOrStore(Instruction *I, const DataLayout &DL);
- bool instrumentMemIntrinsic(MemIntrinsic *MI);
- bool instrumentGetElementPtr(Instruction *I, Module &M);
- bool insertCounterUpdate(Instruction *I, StructType *StructTy,
- unsigned CounterIdx);
- unsigned getFieldCounterIdx(StructType *StructTy) {
- return 0;
- }
- unsigned getArrayCounterIdx(StructType *StructTy) {
- return StructTy->getNumElements();
- }
- unsigned getStructCounterSize(StructType *StructTy) {
- // The struct counter array includes:
- // - one counter for each struct field,
- // - one counter for the struct access within an array.
- return (StructTy->getNumElements()/*field*/ + 1/*array*/);
- }
- bool shouldIgnoreMemoryAccess(Instruction *I);
- int getMemoryAccessFuncIndex(Value *Addr, const DataLayout &DL);
- Value *appToShadow(Value *Shadow, IRBuilder<> &IRB);
- bool instrumentFastpath(Instruction *I, const DataLayout &DL, bool IsStore,
- Value *Addr, unsigned Alignment);
- // Each tool has its own fastpath routine:
- bool instrumentFastpathCacheFrag(Instruction *I, const DataLayout &DL,
- Value *Addr, unsigned Alignment);
- bool instrumentFastpathWorkingSet(Instruction *I, const DataLayout &DL,
- Value *Addr, unsigned Alignment);
-
- EfficiencySanitizerOptions Options;
- LLVMContext *Ctx;
- Type *IntptrTy;
- // Our slowpath involves callouts to the runtime library.
- // Access sizes are powers of two: 1, 2, 4, 8, 16.
- static const size_t NumberOfAccessSizes = 5;
- Function *EsanAlignedLoad[NumberOfAccessSizes];
- Function *EsanAlignedStore[NumberOfAccessSizes];
- Function *EsanUnalignedLoad[NumberOfAccessSizes];
- Function *EsanUnalignedStore[NumberOfAccessSizes];
- // For irregular sizes of any alignment:
- Function *EsanUnalignedLoadN, *EsanUnalignedStoreN;
- Function *MemmoveFn, *MemcpyFn, *MemsetFn;
- Function *EsanCtorFunction;
- Function *EsanDtorFunction;
- // Remember the counter variable for each struct type to avoid
- // recomputing the variable name later during instrumentation.
- std::map<Type *, GlobalVariable *> StructTyMap;
- ShadowMemoryParams ShadowParams;
-};
-} // namespace
-
-char EfficiencySanitizer::ID = 0;
-INITIALIZE_PASS_BEGIN(
- EfficiencySanitizer, "esan",
- "EfficiencySanitizer: finds performance issues.", false, false)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_END(
- EfficiencySanitizer, "esan",
- "EfficiencySanitizer: finds performance issues.", false, false)
-
-StringRef EfficiencySanitizer::getPassName() const {
- return "EfficiencySanitizer";
-}
-
-void EfficiencySanitizer::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<TargetLibraryInfoWrapperPass>();
-}
-
-ModulePass *
-llvm::createEfficiencySanitizerPass(const EfficiencySanitizerOptions &Options) {
- return new EfficiencySanitizer(Options);
-}
-
-void EfficiencySanitizer::initializeCallbacks(Module &M) {
- IRBuilder<> IRB(M.getContext());
- // Initialize the callbacks.
- for (size_t Idx = 0; Idx < NumberOfAccessSizes; ++Idx) {
- const unsigned ByteSize = 1U << Idx;
- std::string ByteSizeStr = utostr(ByteSize);
- // We'll inline the most common (i.e., aligned and frequent sizes)
- // load + store instrumentation: these callouts are for the slowpath.
- SmallString<32> AlignedLoadName("__esan_aligned_load" + ByteSizeStr);
- EsanAlignedLoad[Idx] =
- checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- AlignedLoadName, IRB.getVoidTy(), IRB.getInt8PtrTy()));
- SmallString<32> AlignedStoreName("__esan_aligned_store" + ByteSizeStr);
- EsanAlignedStore[Idx] =
- checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- AlignedStoreName, IRB.getVoidTy(), IRB.getInt8PtrTy()));
- SmallString<32> UnalignedLoadName("__esan_unaligned_load" + ByteSizeStr);
- EsanUnalignedLoad[Idx] =
- checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- UnalignedLoadName, IRB.getVoidTy(), IRB.getInt8PtrTy()));
- SmallString<32> UnalignedStoreName("__esan_unaligned_store" + ByteSizeStr);
- EsanUnalignedStore[Idx] =
- checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- UnalignedStoreName, IRB.getVoidTy(), IRB.getInt8PtrTy()));
- }
- EsanUnalignedLoadN = checkSanitizerInterfaceFunction(
- M.getOrInsertFunction("__esan_unaligned_loadN", IRB.getVoidTy(),
- IRB.getInt8PtrTy(), IntptrTy));
- EsanUnalignedStoreN = checkSanitizerInterfaceFunction(
- M.getOrInsertFunction("__esan_unaligned_storeN", IRB.getVoidTy(),
- IRB.getInt8PtrTy(), IntptrTy));
- MemmoveFn = checkSanitizerInterfaceFunction(
- M.getOrInsertFunction("memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
- IRB.getInt8PtrTy(), IntptrTy));
- MemcpyFn = checkSanitizerInterfaceFunction(
- M.getOrInsertFunction("memcpy", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
- IRB.getInt8PtrTy(), IntptrTy));
- MemsetFn = checkSanitizerInterfaceFunction(
- M.getOrInsertFunction("memset", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
- IRB.getInt32Ty(), IntptrTy));
-}
-
-bool EfficiencySanitizer::shouldIgnoreStructType(StructType *StructTy) {
- if (StructTy == nullptr || StructTy->isOpaque() /* no struct body */)
- return true;
- return false;
-}
-
-void EfficiencySanitizer::createStructCounterName(
- StructType *StructTy, SmallString<MaxStructCounterNameSize> &NameStr) {
- // Append NumFields and field type ids to avoid struct conflicts
- // with the same name but different fields.
- if (StructTy->hasName())
- NameStr += StructTy->getName();
- else
- NameStr += "struct.anon";
- // We allow the actual size of the StructCounterName to be larger than
- // MaxStructCounterNameSize and append $NumFields and at least one
- // field type id.
- // Append $NumFields.
- NameStr += "$";
- Twine(StructTy->getNumElements()).toVector(NameStr);
- // Append struct field type ids in the reverse order.
- for (int i = StructTy->getNumElements() - 1; i >= 0; --i) {
- NameStr += "$";
- Twine(StructTy->getElementType(i)->getTypeID()).toVector(NameStr);
- if (NameStr.size() >= MaxStructCounterNameSize)
- break;
- }
- if (StructTy->isLiteral()) {
- // End with $ for literal struct.
- NameStr += "$";
- }
-}
-
-// Create global variables with auxiliary information (e.g., struct field size,
-// offset, and type name) for better user report.
-void EfficiencySanitizer::createCacheFragAuxGV(
- Module &M, const DataLayout &DL, StructType *StructTy,
- GlobalVariable *&TypeName, GlobalVariable *&Offset,
- GlobalVariable *&Size) {
- auto *Int8PtrTy = Type::getInt8PtrTy(*Ctx);
- auto *Int32Ty = Type::getInt32Ty(*Ctx);
- // FieldTypeName.
- auto *TypeNameArrayTy = ArrayType::get(Int8PtrTy, StructTy->getNumElements());
- TypeName = new GlobalVariable(M, TypeNameArrayTy, true,
- GlobalVariable::InternalLinkage, nullptr);
- SmallVector<Constant *, 16> TypeNameVec;
- // FieldOffset.
- auto *OffsetArrayTy = ArrayType::get(Int32Ty, StructTy->getNumElements());
- Offset = new GlobalVariable(M, OffsetArrayTy, true,
- GlobalVariable::InternalLinkage, nullptr);
- SmallVector<Constant *, 16> OffsetVec;
- // FieldSize
- auto *SizeArrayTy = ArrayType::get(Int32Ty, StructTy->getNumElements());
- Size = new GlobalVariable(M, SizeArrayTy, true,
- GlobalVariable::InternalLinkage, nullptr);
- SmallVector<Constant *, 16> SizeVec;
- for (unsigned i = 0; i < StructTy->getNumElements(); ++i) {
- Type *Ty = StructTy->getElementType(i);
- std::string Str;
- raw_string_ostream StrOS(Str);
- Ty->print(StrOS);
- TypeNameVec.push_back(
- ConstantExpr::getPointerCast(
- createPrivateGlobalForString(M, StrOS.str(), true),
- Int8PtrTy));
- OffsetVec.push_back(
- ConstantInt::get(Int32Ty,
- DL.getStructLayout(StructTy)->getElementOffset(i)));
- SizeVec.push_back(ConstantInt::get(Int32Ty,
- DL.getTypeAllocSize(Ty)));
- }
- TypeName->setInitializer(ConstantArray::get(TypeNameArrayTy, TypeNameVec));
- Offset->setInitializer(ConstantArray::get(OffsetArrayTy, OffsetVec));
- Size->setInitializer(ConstantArray::get(SizeArrayTy, SizeVec));
-}
-
-// Create the global variable for the cache-fragmentation tool.
-GlobalVariable *EfficiencySanitizer::createCacheFragInfoGV(
- Module &M, const DataLayout &DL, Constant *UnitName) {
- assert(Options.ToolType == EfficiencySanitizerOptions::ESAN_CacheFrag);
-
- auto *Int8PtrTy = Type::getInt8PtrTy(*Ctx);
- auto *Int8PtrPtrTy = Int8PtrTy->getPointerTo();
- auto *Int32Ty = Type::getInt32Ty(*Ctx);
- auto *Int32PtrTy = Type::getInt32PtrTy(*Ctx);
- auto *Int64Ty = Type::getInt64Ty(*Ctx);
- auto *Int64PtrTy = Type::getInt64PtrTy(*Ctx);
- // This structure should be kept consistent with the StructInfo struct
- // in the runtime library.
- // struct StructInfo {
- // const char *StructName;
- // u32 Size;
- // u32 NumFields;
- // u32 *FieldOffset; // auxiliary struct field info.
- // u32 *FieldSize; // auxiliary struct field info.
- // const char **FieldTypeName; // auxiliary struct field info.
- // u64 *FieldCounters;
- // u64 *ArrayCounter;
- // };
- auto *StructInfoTy =
- StructType::get(Int8PtrTy, Int32Ty, Int32Ty, Int32PtrTy, Int32PtrTy,
- Int8PtrPtrTy, Int64PtrTy, Int64PtrTy);
- auto *StructInfoPtrTy = StructInfoTy->getPointerTo();
- // This structure should be kept consistent with the CacheFragInfo struct
- // in the runtime library.
- // struct CacheFragInfo {
- // const char *UnitName;
- // u32 NumStructs;
- // StructInfo *Structs;
- // };
- auto *CacheFragInfoTy = StructType::get(Int8PtrTy, Int32Ty, StructInfoPtrTy);
-
- std::vector<StructType *> Vec = M.getIdentifiedStructTypes();
- unsigned NumStructs = 0;
- SmallVector<Constant *, 16> Initializers;
-
- for (auto &StructTy : Vec) {
- if (shouldIgnoreStructType(StructTy)) {
- ++NumIgnoredStructs;
- continue;
- }
- ++NumStructs;
-
- // StructName.
- SmallString<MaxStructCounterNameSize> CounterNameStr;
- createStructCounterName(StructTy, CounterNameStr);
- GlobalVariable *StructCounterName = createPrivateGlobalForString(
- M, CounterNameStr, /*AllowMerging*/true);
-
- // Counters.
- // We create the counter array with StructCounterName and weak linkage
- // so that the structs with the same name and layout from different
- // compilation units will be merged into one.
- auto *CounterArrayTy = ArrayType::get(Int64Ty,
- getStructCounterSize(StructTy));
- GlobalVariable *Counters =
- new GlobalVariable(M, CounterArrayTy, false,
- GlobalVariable::WeakAnyLinkage,
- ConstantAggregateZero::get(CounterArrayTy),
- CounterNameStr);
-
- // Remember the counter variable for each struct type.
- StructTyMap.insert(std::pair<Type *, GlobalVariable *>(StructTy, Counters));
-
- // We pass the field type name array, offset array, and size array to
- // the runtime for better reporting.
- GlobalVariable *TypeName = nullptr, *Offset = nullptr, *Size = nullptr;
- if (ClAuxFieldInfo)
- createCacheFragAuxGV(M, DL, StructTy, TypeName, Offset, Size);
-
- Constant *FieldCounterIdx[2];
- FieldCounterIdx[0] = ConstantInt::get(Int32Ty, 0);
- FieldCounterIdx[1] = ConstantInt::get(Int32Ty,
- getFieldCounterIdx(StructTy));
- Constant *ArrayCounterIdx[2];
- ArrayCounterIdx[0] = ConstantInt::get(Int32Ty, 0);
- ArrayCounterIdx[1] = ConstantInt::get(Int32Ty,
- getArrayCounterIdx(StructTy));
- Initializers.push_back(ConstantStruct::get(
- StructInfoTy,
- ConstantExpr::getPointerCast(StructCounterName, Int8PtrTy),
- ConstantInt::get(Int32Ty,
- DL.getStructLayout(StructTy)->getSizeInBytes()),
- ConstantInt::get(Int32Ty, StructTy->getNumElements()),
- Offset == nullptr ? ConstantPointerNull::get(Int32PtrTy)
- : ConstantExpr::getPointerCast(Offset, Int32PtrTy),
- Size == nullptr ? ConstantPointerNull::get(Int32PtrTy)
- : ConstantExpr::getPointerCast(Size, Int32PtrTy),
- TypeName == nullptr
- ? ConstantPointerNull::get(Int8PtrPtrTy)
- : ConstantExpr::getPointerCast(TypeName, Int8PtrPtrTy),
- ConstantExpr::getGetElementPtr(CounterArrayTy, Counters,
- FieldCounterIdx),
- ConstantExpr::getGetElementPtr(CounterArrayTy, Counters,
- ArrayCounterIdx)));
- }
- // Structs.
- Constant *StructInfo;
- if (NumStructs == 0) {
- StructInfo = ConstantPointerNull::get(StructInfoPtrTy);
- } else {
- auto *StructInfoArrayTy = ArrayType::get(StructInfoTy, NumStructs);
- StructInfo = ConstantExpr::getPointerCast(
- new GlobalVariable(M, StructInfoArrayTy, false,
- GlobalVariable::InternalLinkage,
- ConstantArray::get(StructInfoArrayTy, Initializers)),
- StructInfoPtrTy);
- }
-
- auto *CacheFragInfoGV = new GlobalVariable(
- M, CacheFragInfoTy, true, GlobalVariable::InternalLinkage,
- ConstantStruct::get(CacheFragInfoTy, UnitName,
- ConstantInt::get(Int32Ty, NumStructs), StructInfo));
- return CacheFragInfoGV;
-}
-
-// Create the tool-specific argument passed to EsanInit and EsanExit.
-Constant *EfficiencySanitizer::createEsanInitToolInfoArg(Module &M,
- const DataLayout &DL) {
- // This structure contains tool-specific information about each compilation
- // unit (module) and is passed to the runtime library.
- GlobalVariable *ToolInfoGV = nullptr;
-
- auto *Int8PtrTy = Type::getInt8PtrTy(*Ctx);
- // Compilation unit name.
- auto *UnitName = ConstantExpr::getPointerCast(
- createPrivateGlobalForString(M, M.getModuleIdentifier(), true),
- Int8PtrTy);
-
- // Create the tool-specific variable.
- if (Options.ToolType == EfficiencySanitizerOptions::ESAN_CacheFrag)
- ToolInfoGV = createCacheFragInfoGV(M, DL, UnitName);
-
- if (ToolInfoGV != nullptr)
- return ConstantExpr::getPointerCast(ToolInfoGV, Int8PtrTy);
-
- // Create the null pointer if no tool-specific variable created.
- return ConstantPointerNull::get(Int8PtrTy);
-}
-
-void EfficiencySanitizer::createDestructor(Module &M, Constant *ToolInfoArg) {
- PointerType *Int8PtrTy = Type::getInt8PtrTy(*Ctx);
- EsanDtorFunction = Function::Create(FunctionType::get(Type::getVoidTy(*Ctx),
- false),
- GlobalValue::InternalLinkage,
- EsanModuleDtorName, &M);
- ReturnInst::Create(*Ctx, BasicBlock::Create(*Ctx, "", EsanDtorFunction));
- IRBuilder<> IRB_Dtor(EsanDtorFunction->getEntryBlock().getTerminator());
- Function *EsanExit = checkSanitizerInterfaceFunction(
- M.getOrInsertFunction(EsanExitName, IRB_Dtor.getVoidTy(),
- Int8PtrTy));
- EsanExit->setLinkage(Function::ExternalLinkage);
- IRB_Dtor.CreateCall(EsanExit, {ToolInfoArg});
- appendToGlobalDtors(M, EsanDtorFunction, EsanCtorAndDtorPriority);
-}
-
-bool EfficiencySanitizer::initOnModule(Module &M) {
-
- Triple TargetTriple(M.getTargetTriple());
- if (TargetTriple.isMIPS64())
- ShadowParams = ShadowParams40;
- else
- ShadowParams = ShadowParams47;
-
- Ctx = &M.getContext();
- const DataLayout &DL = M.getDataLayout();
- IRBuilder<> IRB(M.getContext());
- IntegerType *OrdTy = IRB.getInt32Ty();
- PointerType *Int8PtrTy = Type::getInt8PtrTy(*Ctx);
- IntptrTy = DL.getIntPtrType(M.getContext());
- // Create the variable passed to EsanInit and EsanExit.
- Constant *ToolInfoArg = createEsanInitToolInfoArg(M, DL);
- // Constructor
- // We specify the tool type both in the EsanWhichToolName global
- // and as an arg to the init routine as a sanity check.
- std::tie(EsanCtorFunction, std::ignore) = createSanitizerCtorAndInitFunctions(
- M, EsanModuleCtorName, EsanInitName, /*InitArgTypes=*/{OrdTy, Int8PtrTy},
- /*InitArgs=*/{
- ConstantInt::get(OrdTy, static_cast<int>(Options.ToolType)),
- ToolInfoArg});
- appendToGlobalCtors(M, EsanCtorFunction, EsanCtorAndDtorPriority);
-
- createDestructor(M, ToolInfoArg);
-
- new GlobalVariable(M, OrdTy, true,
- GlobalValue::WeakAnyLinkage,
- ConstantInt::get(OrdTy,
- static_cast<int>(Options.ToolType)),
- EsanWhichToolName);
-
- return true;
-}
-
-Value *EfficiencySanitizer::appToShadow(Value *Shadow, IRBuilder<> &IRB) {
- // Shadow = ((App & Mask) + Offs) >> Scale
- Shadow = IRB.CreateAnd(Shadow, ConstantInt::get(IntptrTy, ShadowParams.ShadowMask));
- uint64_t Offs;
- int Scale = ShadowScale[Options.ToolType];
- if (Scale <= 2)
- Offs = ShadowParams.ShadowOffs[Scale];
- else
- Offs = ShadowParams.ShadowOffs[0] << Scale;
- Shadow = IRB.CreateAdd(Shadow, ConstantInt::get(IntptrTy, Offs));
- if (Scale > 0)
- Shadow = IRB.CreateLShr(Shadow, Scale);
- return Shadow;
-}
-
-bool EfficiencySanitizer::shouldIgnoreMemoryAccess(Instruction *I) {
- if (Options.ToolType == EfficiencySanitizerOptions::ESAN_CacheFrag) {
- // We'd like to know about cache fragmentation in vtable accesses and
- // constant data references, so we do not currently ignore anything.
- return false;
- } else if (Options.ToolType == EfficiencySanitizerOptions::ESAN_WorkingSet) {
- // TODO: the instrumentation disturbs the data layout on the stack, so we
- // may want to add an option to ignore stack references (if we can
- // distinguish them) to reduce overhead.
- }
- // TODO(bruening): future tools will be returning true for some cases.
- return false;
-}
-
-bool EfficiencySanitizer::runOnModule(Module &M) {
- bool Res = initOnModule(M);
- initializeCallbacks(M);
- for (auto &F : M) {
- Res |= runOnFunction(F, M);
- }
- return Res;
-}
-
-bool EfficiencySanitizer::runOnFunction(Function &F, Module &M) {
- // This is required to prevent instrumenting the call to __esan_init from
- // within the module constructor.
- if (&F == EsanCtorFunction)
- return false;
- SmallVector<Instruction *, 8> LoadsAndStores;
- SmallVector<Instruction *, 8> MemIntrinCalls;
- SmallVector<Instruction *, 8> GetElementPtrs;
- bool Res = false;
- const DataLayout &DL = M.getDataLayout();
- const TargetLibraryInfo *TLI =
- &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
-
- for (auto &BB : F) {
- for (auto &Inst : BB) {
- if ((isa<LoadInst>(Inst) || isa<StoreInst>(Inst) ||
- isa<AtomicRMWInst>(Inst) || isa<AtomicCmpXchgInst>(Inst)) &&
- !shouldIgnoreMemoryAccess(&Inst))
- LoadsAndStores.push_back(&Inst);
- else if (isa<MemIntrinsic>(Inst))
- MemIntrinCalls.push_back(&Inst);
- else if (isa<GetElementPtrInst>(Inst))
- GetElementPtrs.push_back(&Inst);
- else if (CallInst *CI = dyn_cast<CallInst>(&Inst))
- maybeMarkSanitizerLibraryCallNoBuiltin(CI, TLI);
- }
- }
-
- if (ClInstrumentLoadsAndStores) {
- for (auto Inst : LoadsAndStores) {
- Res |= instrumentLoadOrStore(Inst, DL);
- }
- }
-
- if (ClInstrumentMemIntrinsics) {
- for (auto Inst : MemIntrinCalls) {
- Res |= instrumentMemIntrinsic(cast<MemIntrinsic>(Inst));
- }
- }
-
- if (Options.ToolType == EfficiencySanitizerOptions::ESAN_CacheFrag) {
- for (auto Inst : GetElementPtrs) {
- Res |= instrumentGetElementPtr(Inst, M);
- }
- }
-
- return Res;
-}
-
-bool EfficiencySanitizer::instrumentLoadOrStore(Instruction *I,
- const DataLayout &DL) {
- IRBuilder<> IRB(I);
- bool IsStore;
- Value *Addr;
- unsigned Alignment;
- if (LoadInst *Load = dyn_cast<LoadInst>(I)) {
- IsStore = false;
- Alignment = Load->getAlignment();
- Addr = Load->getPointerOperand();
- } else if (StoreInst *Store = dyn_cast<StoreInst>(I)) {
- IsStore = true;
- Alignment = Store->getAlignment();
- Addr = Store->getPointerOperand();
- } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
- IsStore = true;
- Alignment = 0;
- Addr = RMW->getPointerOperand();
- } else if (AtomicCmpXchgInst *Xchg = dyn_cast<AtomicCmpXchgInst>(I)) {
- IsStore = true;
- Alignment = 0;
- Addr = Xchg->getPointerOperand();
- } else
- llvm_unreachable("Unsupported mem access type");
-
- Type *OrigTy = cast<PointerType>(Addr->getType())->getElementType();
- const uint32_t TypeSizeBytes = DL.getTypeStoreSizeInBits(OrigTy) / 8;
- Value *OnAccessFunc = nullptr;
-
- // Convert 0 to the default alignment.
- if (Alignment == 0)
- Alignment = DL.getPrefTypeAlignment(OrigTy);
-
- if (IsStore)
- NumInstrumentedStores++;
- else
- NumInstrumentedLoads++;
- int Idx = getMemoryAccessFuncIndex(Addr, DL);
- if (Idx < 0) {
- OnAccessFunc = IsStore ? EsanUnalignedStoreN : EsanUnalignedLoadN;
- IRB.CreateCall(OnAccessFunc,
- {IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()),
- ConstantInt::get(IntptrTy, TypeSizeBytes)});
- } else {
- if (ClInstrumentFastpath &&
- instrumentFastpath(I, DL, IsStore, Addr, Alignment)) {
- NumFastpaths++;
- return true;
- }
- if (Alignment == 0 || (Alignment % TypeSizeBytes) == 0)
- OnAccessFunc = IsStore ? EsanAlignedStore[Idx] : EsanAlignedLoad[Idx];
- else
- OnAccessFunc = IsStore ? EsanUnalignedStore[Idx] : EsanUnalignedLoad[Idx];
- IRB.CreateCall(OnAccessFunc,
- IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()));
- }
- return true;
-}
-
-// It's simplest to replace the memset/memmove/memcpy intrinsics with
-// calls that the runtime library intercepts.
-// Our pass is late enough that calls should not turn back into intrinsics.
-bool EfficiencySanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
- IRBuilder<> IRB(MI);
- bool Res = false;
- if (isa<MemSetInst>(MI)) {
- IRB.CreateCall(
- MemsetFn,
- {IRB.CreatePointerCast(MI->getArgOperand(0), IRB.getInt8PtrTy()),
- IRB.CreateIntCast(MI->getArgOperand(1), IRB.getInt32Ty(), false),
- IRB.CreateIntCast(MI->getArgOperand(2), IntptrTy, false)});
- MI->eraseFromParent();
- Res = true;
- } else if (isa<MemTransferInst>(MI)) {
- IRB.CreateCall(
- isa<MemCpyInst>(MI) ? MemcpyFn : MemmoveFn,
- {IRB.CreatePointerCast(MI->getArgOperand(0), IRB.getInt8PtrTy()),
- IRB.CreatePointerCast(MI->getArgOperand(1), IRB.getInt8PtrTy()),
- IRB.CreateIntCast(MI->getArgOperand(2), IntptrTy, false)});
- MI->eraseFromParent();
- Res = true;
- } else
- llvm_unreachable("Unsupported mem intrinsic type");
- return Res;
-}
-
-bool EfficiencySanitizer::instrumentGetElementPtr(Instruction *I, Module &M) {
- GetElementPtrInst *GepInst = dyn_cast<GetElementPtrInst>(I);
- bool Res = false;
- if (GepInst == nullptr || GepInst->getNumIndices() == 1) {
- ++NumIgnoredGEPs;
- return false;
- }
- Type *SourceTy = GepInst->getSourceElementType();
- StructType *StructTy = nullptr;
- ConstantInt *Idx;
- // Check if GEP calculates address from a struct array.
- if (isa<StructType>(SourceTy)) {
- StructTy = cast<StructType>(SourceTy);
- Idx = dyn_cast<ConstantInt>(GepInst->getOperand(1));
- if ((Idx == nullptr || Idx->getSExtValue() != 0) &&
- !shouldIgnoreStructType(StructTy) && StructTyMap.count(StructTy) != 0)
- Res |= insertCounterUpdate(I, StructTy, getArrayCounterIdx(StructTy));
- }
- // Iterate all (except the first and the last) idx within each GEP instruction
- // for possible nested struct field address calculation.
- for (unsigned i = 1; i < GepInst->getNumIndices(); ++i) {
- SmallVector<Value *, 8> IdxVec(GepInst->idx_begin(),
- GepInst->idx_begin() + i);
- Type *Ty = GetElementPtrInst::getIndexedType(SourceTy, IdxVec);
- unsigned CounterIdx = 0;
- if (isa<ArrayType>(Ty)) {
- ArrayType *ArrayTy = cast<ArrayType>(Ty);
- StructTy = dyn_cast<StructType>(ArrayTy->getElementType());
- if (shouldIgnoreStructType(StructTy) || StructTyMap.count(StructTy) == 0)
- continue;
- // The last counter for struct array access.
- CounterIdx = getArrayCounterIdx(StructTy);
- } else if (isa<StructType>(Ty)) {
- StructTy = cast<StructType>(Ty);
- if (shouldIgnoreStructType(StructTy) || StructTyMap.count(StructTy) == 0)
- continue;
- // Get the StructTy's subfield index.
- Idx = cast<ConstantInt>(GepInst->getOperand(i+1));
- assert(Idx->getSExtValue() >= 0 &&
- Idx->getSExtValue() < StructTy->getNumElements());
- CounterIdx = getFieldCounterIdx(StructTy) + Idx->getSExtValue();
- }
- Res |= insertCounterUpdate(I, StructTy, CounterIdx);
- }
- if (Res)
- ++NumInstrumentedGEPs;
- else
- ++NumIgnoredGEPs;
- return Res;
-}
-
-bool EfficiencySanitizer::insertCounterUpdate(Instruction *I,
- StructType *StructTy,
- unsigned CounterIdx) {
- GlobalVariable *CounterArray = StructTyMap[StructTy];
- if (CounterArray == nullptr)
- return false;
- IRBuilder<> IRB(I);
- Constant *Indices[2];
- // Xref http://llvm.org/docs/LangRef.html#i-getelementptr and
- // http://llvm.org/docs/GetElementPtr.html.
- // The first index of the GEP instruction steps through the first operand,
- // i.e., the array itself.
- Indices[0] = ConstantInt::get(IRB.getInt32Ty(), 0);
- // The second index is the index within the array.
- Indices[1] = ConstantInt::get(IRB.getInt32Ty(), CounterIdx);
- Constant *Counter =
- ConstantExpr::getGetElementPtr(
- ArrayType::get(IRB.getInt64Ty(), getStructCounterSize(StructTy)),
- CounterArray, Indices);
- Value *Load = IRB.CreateLoad(Counter);
- IRB.CreateStore(IRB.CreateAdd(Load, ConstantInt::get(IRB.getInt64Ty(), 1)),
- Counter);
- return true;
-}
-
-int EfficiencySanitizer::getMemoryAccessFuncIndex(Value *Addr,
- const DataLayout &DL) {
- Type *OrigPtrTy = Addr->getType();
- Type *OrigTy = cast<PointerType>(OrigPtrTy)->getElementType();
- assert(OrigTy->isSized());
- // The size is always a multiple of 8.
- uint32_t TypeSizeBytes = DL.getTypeStoreSizeInBits(OrigTy) / 8;
- if (TypeSizeBytes != 1 && TypeSizeBytes != 2 && TypeSizeBytes != 4 &&
- TypeSizeBytes != 8 && TypeSizeBytes != 16) {
- // Irregular sizes do not have per-size call targets.
- NumAccessesWithIrregularSize++;
- return -1;
- }
- size_t Idx = countTrailingZeros(TypeSizeBytes);
- assert(Idx < NumberOfAccessSizes);
- return Idx;
-}
-
-bool EfficiencySanitizer::instrumentFastpath(Instruction *I,
- const DataLayout &DL, bool IsStore,
- Value *Addr, unsigned Alignment) {
- if (Options.ToolType == EfficiencySanitizerOptions::ESAN_CacheFrag) {
- return instrumentFastpathCacheFrag(I, DL, Addr, Alignment);
- } else if (Options.ToolType == EfficiencySanitizerOptions::ESAN_WorkingSet) {
- return instrumentFastpathWorkingSet(I, DL, Addr, Alignment);
- }
- return false;
-}
-
-bool EfficiencySanitizer::instrumentFastpathCacheFrag(Instruction *I,
- const DataLayout &DL,
- Value *Addr,
- unsigned Alignment) {
- // Do nothing.
- return true; // Return true to avoid slowpath instrumentation.
-}
-
-bool EfficiencySanitizer::instrumentFastpathWorkingSet(
- Instruction *I, const DataLayout &DL, Value *Addr, unsigned Alignment) {
- assert(ShadowScale[Options.ToolType] == 6); // The code below assumes this
- IRBuilder<> IRB(I);
- Type *OrigTy = cast<PointerType>(Addr->getType())->getElementType();
- const uint32_t TypeSize = DL.getTypeStoreSizeInBits(OrigTy);
- // Bail to the slowpath if the access might touch multiple cache lines.
- // An access aligned to its size is guaranteed to be intra-cache-line.
- // getMemoryAccessFuncIndex has already ruled out a size larger than 16
- // and thus larger than a cache line for platforms this tool targets
- // (and our shadow memory setup assumes 64-byte cache lines).
- assert(TypeSize <= 128);
- if (!(TypeSize == 8 ||
- (Alignment % (TypeSize / 8)) == 0)) {
- if (ClAssumeIntraCacheLine)
- ++NumAssumedIntraCacheLine;
- else
- return false;
- }
-
- // We inline instrumentation to set the corresponding shadow bits for
- // each cache line touched by the application. Here we handle a single
- // load or store where we've already ruled out the possibility that it
- // might touch more than one cache line and thus we simply update the
- // shadow memory for a single cache line.
- // Our shadow memory model is fine with races when manipulating shadow values.
- // We generate the following code:
- //
- // const char BitMask = 0x81;
- // char *ShadowAddr = appToShadow(AppAddr);
- // if ((*ShadowAddr & BitMask) != BitMask)
- // *ShadowAddr |= Bitmask;
- //
- Value *AddrPtr = IRB.CreatePointerCast(Addr, IntptrTy);
- Value *ShadowPtr = appToShadow(AddrPtr, IRB);
- Type *ShadowTy = IntegerType::get(*Ctx, 8U);
- Type *ShadowPtrTy = PointerType::get(ShadowTy, 0);
- // The bottom bit is used for the current sampling period's working set.
- // The top bit is used for the total working set. We set both on each
- // memory access, if they are not already set.
- Value *ValueMask = ConstantInt::get(ShadowTy, 0x81); // 10000001B
-
- Value *OldValue = IRB.CreateLoad(IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy));
- // The AND and CMP will be turned into a TEST instruction by the compiler.
- Value *Cmp = IRB.CreateICmpNE(IRB.CreateAnd(OldValue, ValueMask), ValueMask);
- Instruction *CmpTerm = SplitBlockAndInsertIfThen(Cmp, I, false);
- // FIXME: do I need to call SetCurrentDebugLocation?
- IRB.SetInsertPoint(CmpTerm);
- // We use OR to set the shadow bits to avoid corrupting the middle 6 bits,
- // which are used by the runtime library.
- Value *NewVal = IRB.CreateOr(OldValue, ValueMask);
- IRB.CreateStore(NewVal, IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy));
- IRB.SetInsertPoint(I);
-
- return true;
-}
diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index 9af64ed332cd..59950ffc4e9a 100644
--- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -1,9 +1,8 @@
//===- GCOVProfiling.cpp - Insert edge counters for gcov profiling --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -103,11 +102,11 @@ private:
std::vector<Regex> &Regexes);
// Get pointers to the functions in the runtime library.
- Constant *getStartFileFunc();
- Constant *getEmitFunctionFunc();
- Constant *getEmitArcsFunc();
- Constant *getSummaryInfoFunc();
- Constant *getEndFileFunc();
+ FunctionCallee getStartFileFunc();
+ FunctionCallee getEmitFunctionFunc();
+ FunctionCallee getEmitArcsFunc();
+ FunctionCallee getSummaryInfoFunc();
+ FunctionCallee getEndFileFunc();
// Add the function to write out all our counters to the global destructor
// list.
@@ -648,7 +647,7 @@ void GCOVProfiler::AddFlushBeforeForkAndExec() {
for (auto I : ForkAndExecs) {
IRBuilder<> Builder(I);
FunctionType *FTy = FunctionType::get(Builder.getVoidTy(), {}, false);
- Constant *GCOVFlush = M->getOrInsertFunction("__gcov_flush", FTy);
+ FunctionCallee GCOVFlush = M->getOrInsertFunction("__gcov_flush", FTy);
Builder.CreateCall(GCOVFlush);
I->getParent()->splitBasicBlock(I);
}
@@ -811,14 +810,14 @@ bool GCOVProfiler::emitProfileArcs() {
auto It = EdgeToCounter.find({Pred, &BB});
assert(It != EdgeToCounter.end());
const unsigned Edge = It->second;
- Value *EdgeCounter =
- BuilderForPhi.CreateConstInBoundsGEP2_64(Counters, 0, Edge);
+ Value *EdgeCounter = BuilderForPhi.CreateConstInBoundsGEP2_64(
+ Counters->getValueType(), Counters, 0, Edge);
Phi->addIncoming(EdgeCounter, Pred);
}
// Skip phis, landingpads.
IRBuilder<> Builder(&*BB.getFirstInsertionPt());
- Value *Count = Builder.CreateLoad(Phi);
+ Value *Count = Builder.CreateLoad(Builder.getInt64Ty(), Phi);
Count = Builder.CreateAdd(Count, Builder.getInt64(1));
Builder.CreateStore(Count, Phi);
@@ -827,9 +826,9 @@ bool GCOVProfiler::emitProfileArcs() {
auto It = EdgeToCounter.find({&BB, nullptr});
assert(It != EdgeToCounter.end());
const unsigned Edge = It->second;
- Value *Counter =
- Builder.CreateConstInBoundsGEP2_64(Counters, 0, Edge);
- Value *Count = Builder.CreateLoad(Counter);
+ Value *Counter = Builder.CreateConstInBoundsGEP2_64(
+ Counters->getValueType(), Counters, 0, Edge);
+ Value *Count = Builder.CreateLoad(Builder.getInt64Ty(), Counter);
Count = Builder.CreateAdd(Count, Builder.getInt64(1));
Builder.CreateStore(Count, Counter);
}
@@ -864,7 +863,7 @@ bool GCOVProfiler::emitProfileArcs() {
// Initialize the environment and register the local writeout and flush
// functions.
- Constant *GCOVInit = M->getOrInsertFunction("llvm_gcov_init", FTy);
+ FunctionCallee GCOVInit = M->getOrInsertFunction("llvm_gcov_init", FTy);
Builder.CreateCall(GCOVInit, {WriteoutF, FlushF});
Builder.CreateRetVoid();
@@ -874,22 +873,21 @@ bool GCOVProfiler::emitProfileArcs() {
return Result;
}
-Constant *GCOVProfiler::getStartFileFunc() {
+FunctionCallee GCOVProfiler::getStartFileFunc() {
Type *Args[] = {
Type::getInt8PtrTy(*Ctx), // const char *orig_filename
Type::getInt8PtrTy(*Ctx), // const char version[4]
Type::getInt32Ty(*Ctx), // uint32_t checksum
};
FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false);
- auto *Res = M->getOrInsertFunction("llvm_gcda_start_file", FTy);
- if (Function *FunRes = dyn_cast<Function>(Res))
- if (auto AK = TLI->getExtAttrForI32Param(false))
- FunRes->addParamAttr(2, AK);
+ AttributeList AL;
+ if (auto AK = TLI->getExtAttrForI32Param(false))
+ AL = AL.addParamAttribute(*Ctx, 2, AK);
+ FunctionCallee Res = M->getOrInsertFunction("llvm_gcda_start_file", FTy, AL);
return Res;
-
}
-Constant *GCOVProfiler::getEmitFunctionFunc() {
+FunctionCallee GCOVProfiler::getEmitFunctionFunc() {
Type *Args[] = {
Type::getInt32Ty(*Ctx), // uint32_t ident
Type::getInt8PtrTy(*Ctx), // const char *function_name
@@ -898,36 +896,34 @@ Constant *GCOVProfiler::getEmitFunctionFunc() {
Type::getInt32Ty(*Ctx), // uint32_t cfg_checksum
};
FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false);
- auto *Res = M->getOrInsertFunction("llvm_gcda_emit_function", FTy);
- if (Function *FunRes = dyn_cast<Function>(Res))
- if (auto AK = TLI->getExtAttrForI32Param(false)) {
- FunRes->addParamAttr(0, AK);
- FunRes->addParamAttr(2, AK);
- FunRes->addParamAttr(3, AK);
- FunRes->addParamAttr(4, AK);
- }
- return Res;
+ AttributeList AL;
+ if (auto AK = TLI->getExtAttrForI32Param(false)) {
+ AL = AL.addParamAttribute(*Ctx, 0, AK);
+ AL = AL.addParamAttribute(*Ctx, 2, AK);
+ AL = AL.addParamAttribute(*Ctx, 3, AK);
+ AL = AL.addParamAttribute(*Ctx, 4, AK);
+ }
+ return M->getOrInsertFunction("llvm_gcda_emit_function", FTy);
}
-Constant *GCOVProfiler::getEmitArcsFunc() {
+FunctionCallee GCOVProfiler::getEmitArcsFunc() {
Type *Args[] = {
Type::getInt32Ty(*Ctx), // uint32_t num_counters
Type::getInt64PtrTy(*Ctx), // uint64_t *counters
};
FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false);
- auto *Res = M->getOrInsertFunction("llvm_gcda_emit_arcs", FTy);
- if (Function *FunRes = dyn_cast<Function>(Res))
- if (auto AK = TLI->getExtAttrForI32Param(false))
- FunRes->addParamAttr(0, AK);
- return Res;
+ AttributeList AL;
+ if (auto AK = TLI->getExtAttrForI32Param(false))
+ AL = AL.addParamAttribute(*Ctx, 0, AK);
+ return M->getOrInsertFunction("llvm_gcda_emit_arcs", FTy, AL);
}
-Constant *GCOVProfiler::getSummaryInfoFunc() {
+FunctionCallee GCOVProfiler::getSummaryInfoFunc() {
FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
return M->getOrInsertFunction("llvm_gcda_summary_info", FTy);
}
-Constant *GCOVProfiler::getEndFileFunc() {
+FunctionCallee GCOVProfiler::getEndFileFunc() {
FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
return M->getOrInsertFunction("llvm_gcda_end_file", FTy);
}
@@ -947,11 +943,11 @@ Function *GCOVProfiler::insertCounterWriteout(
BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", WriteoutF);
IRBuilder<> Builder(BB);
- Constant *StartFile = getStartFileFunc();
- Constant *EmitFunction = getEmitFunctionFunc();
- Constant *EmitArcs = getEmitArcsFunc();
- Constant *SummaryInfo = getSummaryInfoFunc();
- Constant *EndFile = getEndFileFunc();
+ FunctionCallee StartFile = getStartFileFunc();
+ FunctionCallee EmitFunction = getEmitFunctionFunc();
+ FunctionCallee EmitArcs = getEmitArcsFunc();
+ FunctionCallee SummaryInfo = getSummaryInfoFunc();
+ FunctionCallee EndFile = getEndFileFunc();
NamedMDNode *CUNodes = M->getNamedMetadata("llvm.dbg.cu");
if (!CUNodes) {
@@ -1088,22 +1084,32 @@ Function *GCOVProfiler::insertCounterWriteout(
PHINode *IV =
Builder.CreatePHI(Builder.getInt32Ty(), /*NumReservedValues*/ 2);
IV->addIncoming(Builder.getInt32(0), BB);
- auto *FileInfoPtr =
- Builder.CreateInBoundsGEP(FileInfoArrayGV, {Builder.getInt32(0), IV});
- auto *StartFileCallArgsPtr = Builder.CreateStructGEP(FileInfoPtr, 0);
+ auto *FileInfoPtr = Builder.CreateInBoundsGEP(
+ FileInfoArrayTy, FileInfoArrayGV, {Builder.getInt32(0), IV});
+ auto *StartFileCallArgsPtr =
+ Builder.CreateStructGEP(FileInfoTy, FileInfoPtr, 0);
auto *StartFileCall = Builder.CreateCall(
StartFile,
- {Builder.CreateLoad(Builder.CreateStructGEP(StartFileCallArgsPtr, 0)),
- Builder.CreateLoad(Builder.CreateStructGEP(StartFileCallArgsPtr, 1)),
- Builder.CreateLoad(Builder.CreateStructGEP(StartFileCallArgsPtr, 2))});
+ {Builder.CreateLoad(StartFileCallArgsTy->getElementType(0),
+ Builder.CreateStructGEP(StartFileCallArgsTy,
+ StartFileCallArgsPtr, 0)),
+ Builder.CreateLoad(StartFileCallArgsTy->getElementType(1),
+ Builder.CreateStructGEP(StartFileCallArgsTy,
+ StartFileCallArgsPtr, 1)),
+ Builder.CreateLoad(StartFileCallArgsTy->getElementType(2),
+ Builder.CreateStructGEP(StartFileCallArgsTy,
+ StartFileCallArgsPtr, 2))});
if (auto AK = TLI->getExtAttrForI32Param(false))
StartFileCall->addParamAttr(2, AK);
auto *NumCounters =
- Builder.CreateLoad(Builder.CreateStructGEP(FileInfoPtr, 1));
+ Builder.CreateLoad(FileInfoTy->getElementType(1),
+ Builder.CreateStructGEP(FileInfoTy, FileInfoPtr, 1));
auto *EmitFunctionCallArgsArray =
- Builder.CreateLoad(Builder.CreateStructGEP(FileInfoPtr, 2));
+ Builder.CreateLoad(FileInfoTy->getElementType(2),
+ Builder.CreateStructGEP(FileInfoTy, FileInfoPtr, 2));
auto *EmitArcsCallArgsArray =
- Builder.CreateLoad(Builder.CreateStructGEP(FileInfoPtr, 3));
+ Builder.CreateLoad(FileInfoTy->getElementType(3),
+ Builder.CreateStructGEP(FileInfoTy, FileInfoPtr, 3));
auto *EnterCounterLoopCond =
Builder.CreateICmpSLT(Builder.getInt32(0), NumCounters);
Builder.CreateCondBr(EnterCounterLoopCond, CounterLoopHeader, FileLoopLatch);
@@ -1111,16 +1117,26 @@ Function *GCOVProfiler::insertCounterWriteout(
Builder.SetInsertPoint(CounterLoopHeader);
auto *JV = Builder.CreatePHI(Builder.getInt32Ty(), /*NumReservedValues*/ 2);
JV->addIncoming(Builder.getInt32(0), FileLoopHeader);
- auto *EmitFunctionCallArgsPtr =
- Builder.CreateInBoundsGEP(EmitFunctionCallArgsArray, {JV});
+ auto *EmitFunctionCallArgsPtr = Builder.CreateInBoundsGEP(
+ EmitFunctionCallArgsTy, EmitFunctionCallArgsArray, JV);
auto *EmitFunctionCall = Builder.CreateCall(
EmitFunction,
- {Builder.CreateLoad(Builder.CreateStructGEP(EmitFunctionCallArgsPtr, 0)),
- Builder.CreateLoad(Builder.CreateStructGEP(EmitFunctionCallArgsPtr, 1)),
- Builder.CreateLoad(Builder.CreateStructGEP(EmitFunctionCallArgsPtr, 2)),
- Builder.CreateLoad(Builder.CreateStructGEP(EmitFunctionCallArgsPtr, 3)),
- Builder.CreateLoad(
- Builder.CreateStructGEP(EmitFunctionCallArgsPtr, 4))});
+ {Builder.CreateLoad(EmitFunctionCallArgsTy->getElementType(0),
+ Builder.CreateStructGEP(EmitFunctionCallArgsTy,
+ EmitFunctionCallArgsPtr, 0)),
+ Builder.CreateLoad(EmitFunctionCallArgsTy->getElementType(1),
+ Builder.CreateStructGEP(EmitFunctionCallArgsTy,
+ EmitFunctionCallArgsPtr, 1)),
+ Builder.CreateLoad(EmitFunctionCallArgsTy->getElementType(2),
+ Builder.CreateStructGEP(EmitFunctionCallArgsTy,
+ EmitFunctionCallArgsPtr, 2)),
+ Builder.CreateLoad(EmitFunctionCallArgsTy->getElementType(3),
+ Builder.CreateStructGEP(EmitFunctionCallArgsTy,
+ EmitFunctionCallArgsPtr, 3)),
+ Builder.CreateLoad(EmitFunctionCallArgsTy->getElementType(4),
+ Builder.CreateStructGEP(EmitFunctionCallArgsTy,
+ EmitFunctionCallArgsPtr,
+ 4))});
if (auto AK = TLI->getExtAttrForI32Param(false)) {
EmitFunctionCall->addParamAttr(0, AK);
EmitFunctionCall->addParamAttr(2, AK);
@@ -1128,11 +1144,15 @@ Function *GCOVProfiler::insertCounterWriteout(
EmitFunctionCall->addParamAttr(4, AK);
}
auto *EmitArcsCallArgsPtr =
- Builder.CreateInBoundsGEP(EmitArcsCallArgsArray, {JV});
+ Builder.CreateInBoundsGEP(EmitArcsCallArgsTy, EmitArcsCallArgsArray, JV);
auto *EmitArcsCall = Builder.CreateCall(
EmitArcs,
- {Builder.CreateLoad(Builder.CreateStructGEP(EmitArcsCallArgsPtr, 0)),
- Builder.CreateLoad(Builder.CreateStructGEP(EmitArcsCallArgsPtr, 1))});
+ {Builder.CreateLoad(
+ EmitArcsCallArgsTy->getElementType(0),
+ Builder.CreateStructGEP(EmitArcsCallArgsTy, EmitArcsCallArgsPtr, 0)),
+ Builder.CreateLoad(EmitArcsCallArgsTy->getElementType(1),
+ Builder.CreateStructGEP(EmitArcsCallArgsTy,
+ EmitArcsCallArgsPtr, 1))});
if (auto AK = TLI->getExtAttrForI32Param(false))
EmitArcsCall->addParamAttr(0, AK);
auto *NextJV = Builder.CreateAdd(JV, Builder.getInt32(1));
@@ -1172,7 +1192,7 @@ insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> > CountersBySP) {
BasicBlock *Entry = BasicBlock::Create(*Ctx, "entry", FlushF);
// Write out the current counters.
- Constant *WriteoutF = M->getFunction("__llvm_gcov_writeout");
+ Function *WriteoutF = M->getFunction("__llvm_gcov_writeout");
assert(WriteoutF && "Need to create the writeout function first!");
IRBuilder<> Builder(Entry);
diff --git a/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
index d04c2b76288f..90a9f4955a4b 100644
--- a/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
@@ -1,9 +1,8 @@
//===- HWAddressSanitizer.cpp - detector of uninitialized reads -------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -12,6 +11,7 @@
/// based on tagged addressing.
//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
@@ -21,6 +21,7 @@
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
@@ -125,10 +126,10 @@ static cl::opt<bool> ClEnableKhwasan(
// is accessed. The shadow mapping looks like:
// Shadow = (Mem >> scale) + offset
-static cl::opt<unsigned long long> ClMappingOffset(
- "hwasan-mapping-offset",
- cl::desc("HWASan shadow mapping offset [EXPERIMENTAL]"), cl::Hidden,
- cl::init(0));
+static cl::opt<uint64_t>
+ ClMappingOffset("hwasan-mapping-offset",
+ cl::desc("HWASan shadow mapping offset [EXPERIMENTAL]"),
+ cl::Hidden, cl::init(0));
static cl::opt<bool>
ClWithIfunc("hwasan-with-ifunc",
@@ -148,42 +149,46 @@ static cl::opt<bool>
"in a thread-local ring buffer"),
cl::Hidden, cl::init(true));
static cl::opt<bool>
- ClCreateFrameDescriptions("hwasan-create-frame-descriptions",
- cl::desc("create static frame descriptions"),
- cl::Hidden, cl::init(true));
-
-static cl::opt<bool>
ClInstrumentMemIntrinsics("hwasan-instrument-mem-intrinsics",
cl::desc("instrument memory intrinsics"),
cl::Hidden, cl::init(true));
+
+static cl::opt<bool>
+ ClInstrumentLandingPads("hwasan-instrument-landing-pads",
+ cl::desc("instrument landing pads"), cl::Hidden,
+ cl::init(true));
+
+static cl::opt<bool> ClInlineAllChecks("hwasan-inline-all-checks",
+ cl::desc("inline all checks"),
+ cl::Hidden, cl::init(false));
+
namespace {
/// An instrumentation pass implementing detection of addressability bugs
/// using tagged pointers.
-class HWAddressSanitizer : public FunctionPass {
+class HWAddressSanitizer {
public:
- // Pass identification, replacement for typeid.
- static char ID;
-
- explicit HWAddressSanitizer(bool CompileKernel = false, bool Recover = false)
- : FunctionPass(ID) {
+ explicit HWAddressSanitizer(Module &M, bool CompileKernel = false,
+ bool Recover = false) {
this->Recover = ClRecover.getNumOccurrences() > 0 ? ClRecover : Recover;
this->CompileKernel = ClEnableKhwasan.getNumOccurrences() > 0 ?
ClEnableKhwasan : CompileKernel;
- }
- StringRef getPassName() const override { return "HWAddressSanitizer"; }
+ initializeModule(M);
+ }
- bool runOnFunction(Function &F) override;
- bool doInitialization(Module &M) override;
+ bool sanitizeFunction(Function &F);
+ void initializeModule(Module &M);
void initializeCallbacks(Module &M);
+ Value *getDynamicShadowIfunc(IRBuilder<> &IRB);
Value *getDynamicShadowNonTls(IRBuilder<> &IRB);
void untagPointerOperand(Instruction *I, Value *Addr);
- Value *memToShadow(Value *Shadow, Type *Ty, IRBuilder<> &IRB);
- void instrumentMemAccessInline(Value *PtrLong, bool IsWrite,
+ Value *shadowBase();
+ Value *memToShadow(Value *Shadow, IRBuilder<> &IRB);
+ void instrumentMemAccessInline(Value *Ptr, bool IsWrite,
unsigned AccessSizeIndex,
Instruction *InsertBefore);
void instrumentMemIntrinsic(MemIntrinsic *MI);
@@ -193,11 +198,15 @@ public:
Value **MaybeMask);
bool isInterestingAlloca(const AllocaInst &AI);
- bool tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag);
+ bool tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag, size_t Size);
Value *tagPointer(IRBuilder<> &IRB, Type *Ty, Value *PtrLong, Value *Tag);
Value *untagPointer(IRBuilder<> &IRB, Value *PtrLong);
- bool instrumentStack(SmallVectorImpl<AllocaInst *> &Allocas,
- SmallVectorImpl<Instruction *> &RetVec, Value *StackTag);
+ bool instrumentStack(
+ SmallVectorImpl<AllocaInst *> &Allocas,
+ DenseMap<AllocaInst *, std::vector<DbgDeclareInst *>> &AllocaDeclareMap,
+ SmallVectorImpl<Instruction *> &RetVec, Value *StackTag);
+ Value *readRegister(IRBuilder<> &IRB, StringRef Name);
+ bool instrumentLandingPads(SmallVectorImpl<Instruction *> &RetVec);
Value *getNextTagWithCall(IRBuilder<> &IRB);
Value *getStackBaseTag(IRBuilder<> &IRB);
Value *getAllocaTag(IRBuilder<> &IRB, Value *StackTag, AllocaInst *AI,
@@ -205,31 +214,14 @@ public:
Value *getUARTag(IRBuilder<> &IRB, Value *StackTag);
Value *getHwasanThreadSlotPtr(IRBuilder<> &IRB, Type *Ty);
- Value *emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord);
+ void emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord);
private:
LLVMContext *C;
std::string CurModuleUniqueId;
Triple TargetTriple;
- Function *HWAsanMemmove, *HWAsanMemcpy, *HWAsanMemset;
-
- // Frame description is a way to pass names/sizes of local variables
- // to the run-time w/o adding extra executable code in every function.
- // We do this by creating a separate section with {PC,Descr} pairs and passing
- // the section beg/end to __hwasan_init_frames() at module init time.
- std::string createFrameString(ArrayRef<AllocaInst*> Allocas);
- void createFrameGlobal(Function &F, const std::string &FrameString);
- // Get the section name for frame descriptions. Currently ELF-only.
- const char *getFrameSection() { return "__hwasan_frames"; }
- const char *getFrameSectionBeg() { return "__start___hwasan_frames"; }
- const char *getFrameSectionEnd() { return "__stop___hwasan_frames"; }
- GlobalVariable *createFrameSectionBound(Module &M, Type *Ty,
- const char *Name) {
- auto GV = new GlobalVariable(M, Ty, false, GlobalVariable::ExternalLinkage,
- nullptr, Name);
- GV->setVisibility(GlobalValue::HiddenVisibility);
- return GV;
- }
+ FunctionCallee HWAsanMemmove, HWAsanMemcpy, HWAsanMemset;
+ FunctionCallee HWAsanHandleVfork;
/// This struct defines the shadow mapping using the rule:
/// shadow = (mem >> Scale) + Offset.
@@ -253,48 +245,95 @@ private:
Type *IntptrTy;
Type *Int8PtrTy;
Type *Int8Ty;
+ Type *Int32Ty;
bool CompileKernel;
bool Recover;
Function *HwasanCtorFunction;
- Function *HwasanMemoryAccessCallback[2][kNumberOfAccessSizes];
- Function *HwasanMemoryAccessCallbackSized[2];
+ FunctionCallee HwasanMemoryAccessCallback[2][kNumberOfAccessSizes];
+ FunctionCallee HwasanMemoryAccessCallbackSized[2];
- Function *HwasanTagMemoryFunc;
- Function *HwasanGenerateTagFunc;
- Function *HwasanThreadEnterFunc;
+ FunctionCallee HwasanTagMemoryFunc;
+ FunctionCallee HwasanGenerateTagFunc;
+ FunctionCallee HwasanThreadEnterFunc;
Constant *ShadowGlobal;
Value *LocalDynamicShadow = nullptr;
+ Value *StackBaseTag = nullptr;
GlobalValue *ThreadPtrGlobal = nullptr;
};
+class HWAddressSanitizerLegacyPass : public FunctionPass {
+public:
+ // Pass identification, replacement for typeid.
+ static char ID;
+
+ explicit HWAddressSanitizerLegacyPass(bool CompileKernel = false,
+ bool Recover = false)
+ : FunctionPass(ID), CompileKernel(CompileKernel), Recover(Recover) {}
+
+ StringRef getPassName() const override { return "HWAddressSanitizer"; }
+
+ bool doInitialization(Module &M) override {
+ HWASan = llvm::make_unique<HWAddressSanitizer>(M, CompileKernel, Recover);
+ return true;
+ }
+
+ bool runOnFunction(Function &F) override {
+ return HWASan->sanitizeFunction(F);
+ }
+
+ bool doFinalization(Module &M) override {
+ HWASan.reset();
+ return false;
+ }
+
+private:
+ std::unique_ptr<HWAddressSanitizer> HWASan;
+ bool CompileKernel;
+ bool Recover;
+};
+
} // end anonymous namespace
-char HWAddressSanitizer::ID = 0;
+char HWAddressSanitizerLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(
- HWAddressSanitizer, "hwasan",
+ HWAddressSanitizerLegacyPass, "hwasan",
"HWAddressSanitizer: detect memory bugs using tagged addressing.", false,
false)
INITIALIZE_PASS_END(
- HWAddressSanitizer, "hwasan",
+ HWAddressSanitizerLegacyPass, "hwasan",
"HWAddressSanitizer: detect memory bugs using tagged addressing.", false,
false)
-FunctionPass *llvm::createHWAddressSanitizerPass(bool CompileKernel,
- bool Recover) {
+FunctionPass *llvm::createHWAddressSanitizerLegacyPassPass(bool CompileKernel,
+ bool Recover) {
assert(!CompileKernel || Recover);
- return new HWAddressSanitizer(CompileKernel, Recover);
+ return new HWAddressSanitizerLegacyPass(CompileKernel, Recover);
+}
+
+HWAddressSanitizerPass::HWAddressSanitizerPass(bool CompileKernel, bool Recover)
+ : CompileKernel(CompileKernel), Recover(Recover) {}
+
+PreservedAnalyses HWAddressSanitizerPass::run(Module &M,
+ ModuleAnalysisManager &MAM) {
+ HWAddressSanitizer HWASan(M, CompileKernel, Recover);
+ bool Modified = false;
+ for (Function &F : M)
+ Modified |= HWASan.sanitizeFunction(F);
+ if (Modified)
+ return PreservedAnalyses::none();
+ return PreservedAnalyses::all();
}
/// Module-level initialization.
///
/// inserts a call to __hwasan_init to the module's constructor list.
-bool HWAddressSanitizer::doInitialization(Module &M) {
+void HWAddressSanitizer::initializeModule(Module &M) {
LLVM_DEBUG(dbgs() << "Init " << M.getName() << "\n");
auto &DL = M.getDataLayout();
@@ -308,47 +347,35 @@ bool HWAddressSanitizer::doInitialization(Module &M) {
IntptrTy = IRB.getIntPtrTy(DL);
Int8PtrTy = IRB.getInt8PtrTy();
Int8Ty = IRB.getInt8Ty();
+ Int32Ty = IRB.getInt32Ty();
HwasanCtorFunction = nullptr;
if (!CompileKernel) {
std::tie(HwasanCtorFunction, std::ignore) =
- createSanitizerCtorAndInitFunctions(M, kHwasanModuleCtorName,
- kHwasanInitName,
- /*InitArgTypes=*/{},
- /*InitArgs=*/{});
- Comdat *CtorComdat = M.getOrInsertComdat(kHwasanModuleCtorName);
- HwasanCtorFunction->setComdat(CtorComdat);
- appendToGlobalCtors(M, HwasanCtorFunction, 0, HwasanCtorFunction);
-
- // Create a zero-length global in __hwasan_frame so that the linker will
- // always create start and stop symbols.
- //
- // N.B. If we ever start creating associated metadata in this pass this
- // global will need to be associated with the ctor.
- Type *Int8Arr0Ty = ArrayType::get(Int8Ty, 0);
- auto GV =
- new GlobalVariable(M, Int8Arr0Ty, /*isConstantGlobal*/ true,
- GlobalVariable::PrivateLinkage,
- Constant::getNullValue(Int8Arr0Ty), "__hwasan");
- GV->setSection(getFrameSection());
- GV->setComdat(CtorComdat);
- appendToCompilerUsed(M, GV);
-
- IRBuilder<> IRBCtor(HwasanCtorFunction->getEntryBlock().getTerminator());
- IRBCtor.CreateCall(
- declareSanitizerInitFunction(M, "__hwasan_init_frames",
- {Int8PtrTy, Int8PtrTy}),
- {createFrameSectionBound(M, Int8Ty, getFrameSectionBeg()),
- createFrameSectionBound(M, Int8Ty, getFrameSectionEnd())});
+ getOrCreateSanitizerCtorAndInitFunctions(
+ M, kHwasanModuleCtorName, kHwasanInitName,
+ /*InitArgTypes=*/{},
+ /*InitArgs=*/{},
+ // This callback is invoked when the functions are created the first
+ // time. Hook them into the global ctors list in that case:
+ [&](Function *Ctor, FunctionCallee) {
+ Comdat *CtorComdat = M.getOrInsertComdat(kHwasanModuleCtorName);
+ Ctor->setComdat(CtorComdat);
+ appendToGlobalCtors(M, Ctor, 0, Ctor);
+ });
}
- if (!TargetTriple.isAndroid())
- appendToCompilerUsed(
- M, ThreadPtrGlobal = new GlobalVariable(
- M, IntptrTy, false, GlobalVariable::ExternalLinkage, nullptr,
- "__hwasan_tls", nullptr, GlobalVariable::InitialExecTLSModel));
-
- return true;
+ if (!TargetTriple.isAndroid()) {
+ Constant *C = M.getOrInsertGlobal("__hwasan_tls", IntptrTy, [&] {
+ auto *GV = new GlobalVariable(M, IntptrTy, /*isConstant=*/false,
+ GlobalValue::ExternalLinkage, nullptr,
+ "__hwasan_tls", nullptr,
+ GlobalVariable::InitialExecTLSModel);
+ appendToCompilerUsed(M, GV);
+ return GV;
+ });
+ ThreadPtrGlobal = cast<GlobalVariable>(C);
+ }
}
void HWAddressSanitizer::initializeCallbacks(Module &M) {
@@ -357,44 +384,55 @@ void HWAddressSanitizer::initializeCallbacks(Module &M) {
const std::string TypeStr = AccessIsWrite ? "store" : "load";
const std::string EndingStr = Recover ? "_noabort" : "";
- HwasanMemoryAccessCallbackSized[AccessIsWrite] =
- checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- ClMemoryAccessCallbackPrefix + TypeStr + "N" + EndingStr,
- FunctionType::get(IRB.getVoidTy(), {IntptrTy, IntptrTy}, false)));
+ HwasanMemoryAccessCallbackSized[AccessIsWrite] = M.getOrInsertFunction(
+ ClMemoryAccessCallbackPrefix + TypeStr + "N" + EndingStr,
+ FunctionType::get(IRB.getVoidTy(), {IntptrTy, IntptrTy}, false));
for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
AccessSizeIndex++) {
HwasanMemoryAccessCallback[AccessIsWrite][AccessSizeIndex] =
- checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+ M.getOrInsertFunction(
ClMemoryAccessCallbackPrefix + TypeStr +
itostr(1ULL << AccessSizeIndex) + EndingStr,
- FunctionType::get(IRB.getVoidTy(), {IntptrTy}, false)));
+ FunctionType::get(IRB.getVoidTy(), {IntptrTy}, false));
}
}
- HwasanTagMemoryFunc = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- "__hwasan_tag_memory", IRB.getVoidTy(), Int8PtrTy, Int8Ty, IntptrTy));
- HwasanGenerateTagFunc = checkSanitizerInterfaceFunction(
- M.getOrInsertFunction("__hwasan_generate_tag", Int8Ty));
+ HwasanTagMemoryFunc = M.getOrInsertFunction(
+ "__hwasan_tag_memory", IRB.getVoidTy(), Int8PtrTy, Int8Ty, IntptrTy);
+ HwasanGenerateTagFunc =
+ M.getOrInsertFunction("__hwasan_generate_tag", Int8Ty);
- if (Mapping.InGlobal)
- ShadowGlobal = M.getOrInsertGlobal("__hwasan_shadow",
- ArrayType::get(IRB.getInt8Ty(), 0));
+ ShadowGlobal = M.getOrInsertGlobal("__hwasan_shadow",
+ ArrayType::get(IRB.getInt8Ty(), 0));
const std::string MemIntrinCallbackPrefix =
CompileKernel ? std::string("") : ClMemoryAccessCallbackPrefix;
- HWAsanMemmove = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- MemIntrinCallbackPrefix + "memmove", IRB.getInt8PtrTy(),
- IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy));
- HWAsanMemcpy = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- MemIntrinCallbackPrefix + "memcpy", IRB.getInt8PtrTy(),
- IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy));
- HWAsanMemset = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- MemIntrinCallbackPrefix + "memset", IRB.getInt8PtrTy(),
- IRB.getInt8PtrTy(), IRB.getInt32Ty(), IntptrTy));
-
- HwasanThreadEnterFunc = checkSanitizerInterfaceFunction(
- M.getOrInsertFunction("__hwasan_thread_enter", IRB.getVoidTy()));
+ HWAsanMemmove = M.getOrInsertFunction(MemIntrinCallbackPrefix + "memmove",
+ IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
+ IRB.getInt8PtrTy(), IntptrTy);
+ HWAsanMemcpy = M.getOrInsertFunction(MemIntrinCallbackPrefix + "memcpy",
+ IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
+ IRB.getInt8PtrTy(), IntptrTy);
+ HWAsanMemset = M.getOrInsertFunction(MemIntrinCallbackPrefix + "memset",
+ IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
+ IRB.getInt32Ty(), IntptrTy);
+
+ HWAsanHandleVfork =
+ M.getOrInsertFunction("__hwasan_handle_vfork", IRB.getVoidTy(), IntptrTy);
+
+ HwasanThreadEnterFunc =
+ M.getOrInsertFunction("__hwasan_thread_enter", IRB.getVoidTy());
+}
+
+Value *HWAddressSanitizer::getDynamicShadowIfunc(IRBuilder<> &IRB) {
+ // An empty inline asm with input reg == output reg.
+ // An opaque no-op cast, basically.
+ InlineAsm *Asm = InlineAsm::get(
+ FunctionType::get(Int8PtrTy, {ShadowGlobal->getType()}, false),
+ StringRef(""), StringRef("=r,0"),
+ /*hasSideEffects=*/false);
+ return IRB.CreateCall(Asm, {ShadowGlobal}, ".hwasan.shadow");
}
Value *HWAddressSanitizer::getDynamicShadowNonTls(IRBuilder<> &IRB) {
@@ -403,18 +441,12 @@ Value *HWAddressSanitizer::getDynamicShadowNonTls(IRBuilder<> &IRB) {
return nullptr;
if (Mapping.InGlobal) {
- // An empty inline asm with input reg == output reg.
- // An opaque pointer-to-int cast, basically.
- InlineAsm *Asm = InlineAsm::get(
- FunctionType::get(IntptrTy, {ShadowGlobal->getType()}, false),
- StringRef(""), StringRef("=r,0"),
- /*hasSideEffects=*/false);
- return IRB.CreateCall(Asm, {ShadowGlobal}, ".hwasan.shadow");
+ return getDynamicShadowIfunc(IRB);
} else {
Value *GlobalDynamicAddress =
IRB.GetInsertBlock()->getParent()->getParent()->getOrInsertGlobal(
- kHwasanShadowMemoryDynamicAddress, IntptrTy);
- return IRB.CreateLoad(GlobalDynamicAddress);
+ kHwasanShadowMemoryDynamicAddress, Int8PtrTy);
+ return IRB.CreateLoad(Int8PtrTy, GlobalDynamicAddress);
}
}
@@ -506,29 +538,44 @@ void HWAddressSanitizer::untagPointerOperand(Instruction *I, Value *Addr) {
I->setOperand(getPointerOperandIndex(I), UntaggedPtr);
}
-Value *HWAddressSanitizer::memToShadow(Value *Mem, Type *Ty, IRBuilder<> &IRB) {
+Value *HWAddressSanitizer::shadowBase() {
+ if (LocalDynamicShadow)
+ return LocalDynamicShadow;
+ return ConstantExpr::getIntToPtr(ConstantInt::get(IntptrTy, Mapping.Offset),
+ Int8PtrTy);
+}
+
+Value *HWAddressSanitizer::memToShadow(Value *Mem, IRBuilder<> &IRB) {
// Mem >> Scale
Value *Shadow = IRB.CreateLShr(Mem, Mapping.Scale);
if (Mapping.Offset == 0)
- return Shadow;
+ return IRB.CreateIntToPtr(Shadow, Int8PtrTy);
// (Mem >> Scale) + Offset
- Value *ShadowBase;
- if (LocalDynamicShadow)
- ShadowBase = LocalDynamicShadow;
- else
- ShadowBase = ConstantInt::get(Ty, Mapping.Offset);
- return IRB.CreateAdd(Shadow, ShadowBase);
+ return IRB.CreateGEP(Int8Ty, shadowBase(), Shadow);
}
-void HWAddressSanitizer::instrumentMemAccessInline(Value *PtrLong, bool IsWrite,
+void HWAddressSanitizer::instrumentMemAccessInline(Value *Ptr, bool IsWrite,
unsigned AccessSizeIndex,
Instruction *InsertBefore) {
+ const int64_t AccessInfo = Recover * 0x20 + IsWrite * 0x10 + AccessSizeIndex;
IRBuilder<> IRB(InsertBefore);
+
+ if (!ClInlineAllChecks && TargetTriple.isAArch64() &&
+ TargetTriple.isOSBinFormatELF() && !Recover) {
+ Module *M = IRB.GetInsertBlock()->getParent()->getParent();
+ Ptr = IRB.CreateBitCast(Ptr, Int8PtrTy);
+ IRB.CreateCall(
+ Intrinsic::getDeclaration(M, Intrinsic::hwasan_check_memaccess),
+ {shadowBase(), Ptr, ConstantInt::get(Int32Ty, AccessInfo)});
+ return;
+ }
+
+ Value *PtrLong = IRB.CreatePointerCast(Ptr, IntptrTy);
Value *PtrTag = IRB.CreateTrunc(IRB.CreateLShr(PtrLong, kPointerTagShift),
IRB.getInt8Ty());
Value *AddrLong = untagPointer(IRB, PtrLong);
- Value *ShadowLong = memToShadow(AddrLong, PtrLong->getType(), IRB);
- Value *MemTag = IRB.CreateLoad(IRB.CreateIntToPtr(ShadowLong, Int8PtrTy));
+ Value *Shadow = memToShadow(AddrLong, IRB);
+ Value *MemTag = IRB.CreateLoad(Int8Ty, Shadow);
Value *TagMismatch = IRB.CreateICmpNE(PtrTag, MemTag);
int matchAllTag = ClMatchAllTag.getNumOccurrences() > 0 ?
@@ -540,11 +587,35 @@ void HWAddressSanitizer::instrumentMemAccessInline(Value *PtrLong, bool IsWrite,
}
Instruction *CheckTerm =
- SplitBlockAndInsertIfThen(TagMismatch, InsertBefore, !Recover,
+ SplitBlockAndInsertIfThen(TagMismatch, InsertBefore, false,
MDBuilder(*C).createBranchWeights(1, 100000));
IRB.SetInsertPoint(CheckTerm);
- const int64_t AccessInfo = Recover * 0x20 + IsWrite * 0x10 + AccessSizeIndex;
+ Value *OutOfShortGranuleTagRange =
+ IRB.CreateICmpUGT(MemTag, ConstantInt::get(Int8Ty, 15));
+ Instruction *CheckFailTerm =
+ SplitBlockAndInsertIfThen(OutOfShortGranuleTagRange, CheckTerm, !Recover,
+ MDBuilder(*C).createBranchWeights(1, 100000));
+
+ IRB.SetInsertPoint(CheckTerm);
+ Value *PtrLowBits = IRB.CreateTrunc(IRB.CreateAnd(PtrLong, 15), Int8Ty);
+ PtrLowBits = IRB.CreateAdd(
+ PtrLowBits, ConstantInt::get(Int8Ty, (1 << AccessSizeIndex) - 1));
+ Value *PtrLowBitsOOB = IRB.CreateICmpUGE(PtrLowBits, MemTag);
+ SplitBlockAndInsertIfThen(PtrLowBitsOOB, CheckTerm, false,
+ MDBuilder(*C).createBranchWeights(1, 100000),
+ nullptr, nullptr, CheckFailTerm->getParent());
+
+ IRB.SetInsertPoint(CheckTerm);
+ Value *InlineTagAddr = IRB.CreateOr(AddrLong, 15);
+ InlineTagAddr = IRB.CreateIntToPtr(InlineTagAddr, Int8PtrTy);
+ Value *InlineTag = IRB.CreateLoad(Int8Ty, InlineTagAddr);
+ Value *InlineTagMismatch = IRB.CreateICmpNE(PtrTag, InlineTag);
+ SplitBlockAndInsertIfThen(InlineTagMismatch, CheckTerm, false,
+ MDBuilder(*C).createBranchWeights(1, 100000),
+ nullptr, nullptr, CheckFailTerm->getParent());
+
+ IRB.SetInsertPoint(CheckFailTerm);
InlineAsm *Asm;
switch (TargetTriple.getArch()) {
case Triple::x86_64:
@@ -568,6 +639,8 @@ void HWAddressSanitizer::instrumentMemAccessInline(Value *PtrLong, bool IsWrite,
report_fatal_error("unsupported architecture");
}
IRB.CreateCall(Asm, PtrLong);
+ if (Recover)
+ cast<BranchInst>(CheckFailTerm)->setSuccessor(0, CheckTerm->getParent());
}
void HWAddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
@@ -610,7 +683,6 @@ bool HWAddressSanitizer::instrumentMemAccess(Instruction *I) {
return false; //FIXME
IRBuilder<> IRB(I);
- Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
if (isPowerOf2_64(TypeSize) &&
(TypeSize / 8 <= (1UL << (kNumberOfAccessSizes - 1))) &&
(Alignment >= (1UL << Mapping.Scale) || Alignment == 0 ||
@@ -618,13 +690,14 @@ bool HWAddressSanitizer::instrumentMemAccess(Instruction *I) {
size_t AccessSizeIndex = TypeSizeToSizeIndex(TypeSize);
if (ClInstrumentWithCalls) {
IRB.CreateCall(HwasanMemoryAccessCallback[IsWrite][AccessSizeIndex],
- AddrLong);
+ IRB.CreatePointerCast(Addr, IntptrTy));
} else {
- instrumentMemAccessInline(AddrLong, IsWrite, AccessSizeIndex, I);
+ instrumentMemAccessInline(Addr, IsWrite, AccessSizeIndex, I);
}
} else {
IRB.CreateCall(HwasanMemoryAccessCallbackSized[IsWrite],
- {AddrLong, ConstantInt::get(IntptrTy, TypeSize / 8)});
+ {IRB.CreatePointerCast(Addr, IntptrTy),
+ ConstantInt::get(IntptrTy, TypeSize / 8)});
}
untagPointerOperand(I, Addr);
@@ -644,27 +717,33 @@ static uint64_t getAllocaSizeInBytes(const AllocaInst &AI) {
}
bool HWAddressSanitizer::tagAlloca(IRBuilder<> &IRB, AllocaInst *AI,
- Value *Tag) {
- size_t Size = (getAllocaSizeInBytes(*AI) + Mapping.getAllocaAlignment() - 1) &
- ~(Mapping.getAllocaAlignment() - 1);
+ Value *Tag, size_t Size) {
+ size_t AlignedSize = alignTo(Size, Mapping.getAllocaAlignment());
Value *JustTag = IRB.CreateTrunc(Tag, IRB.getInt8Ty());
if (ClInstrumentWithCalls) {
IRB.CreateCall(HwasanTagMemoryFunc,
{IRB.CreatePointerCast(AI, Int8PtrTy), JustTag,
- ConstantInt::get(IntptrTy, Size)});
+ ConstantInt::get(IntptrTy, AlignedSize)});
} else {
size_t ShadowSize = Size >> Mapping.Scale;
- Value *ShadowPtr = IRB.CreateIntToPtr(
- memToShadow(IRB.CreatePointerCast(AI, IntptrTy), AI->getType(), IRB),
- Int8PtrTy);
+ Value *ShadowPtr = memToShadow(IRB.CreatePointerCast(AI, IntptrTy), IRB);
// If this memset is not inlined, it will be intercepted in the hwasan
// runtime library. That's OK, because the interceptor skips the checks if
// the address is in the shadow region.
// FIXME: the interceptor is not as fast as real memset. Consider lowering
// llvm.memset right here into either a sequence of stores, or a call to
// hwasan_tag_memory.
- IRB.CreateMemSet(ShadowPtr, JustTag, ShadowSize, /*Align=*/1);
+ if (ShadowSize)
+ IRB.CreateMemSet(ShadowPtr, JustTag, ShadowSize, /*Align=*/1);
+ if (Size != AlignedSize) {
+ IRB.CreateStore(
+ ConstantInt::get(Int8Ty, Size % Mapping.getAllocaAlignment()),
+ IRB.CreateConstGEP1_32(Int8Ty, ShadowPtr, ShadowSize));
+ IRB.CreateStore(JustTag, IRB.CreateConstGEP1_32(
+ Int8Ty, IRB.CreateBitCast(AI, Int8PtrTy),
+ AlignedSize - 1));
+ }
}
return true;
}
@@ -674,10 +753,16 @@ static unsigned RetagMask(unsigned AllocaNo) {
// x = x ^ (mask << 56) can be encoded as a single armv8 instruction for these
// masks.
// The list does not include the value 255, which is used for UAR.
- static unsigned FastMasks[] = {
- 0, 1, 2, 3, 4, 6, 7, 8, 12, 14, 15, 16, 24,
- 28, 30, 31, 32, 48, 56, 60, 62, 63, 64, 96, 112, 120,
- 124, 126, 127, 128, 192, 224, 240, 248, 252, 254};
+ //
+ // Because we are more likely to use earlier elements of this list than later
+ // ones, it is sorted in increasing order of probability of collision with a
+ // mask allocated (temporally) nearby. The program that generated this list
+ // can be found at:
+ // https://github.com/google/sanitizers/blob/master/hwaddress-sanitizer/sort_masks.py
+ static unsigned FastMasks[] = {0, 128, 64, 192, 32, 96, 224, 112, 240,
+ 48, 16, 120, 248, 56, 24, 8, 124, 252,
+ 60, 28, 12, 4, 126, 254, 62, 30, 14,
+ 6, 2, 127, 63, 31, 15, 7, 3, 1};
return FastMasks[AllocaNo % (sizeof(FastMasks) / sizeof(FastMasks[0]))];
}
@@ -688,6 +773,8 @@ Value *HWAddressSanitizer::getNextTagWithCall(IRBuilder<> &IRB) {
Value *HWAddressSanitizer::getStackBaseTag(IRBuilder<> &IRB) {
if (ClGenerateTagsWithCalls)
return getNextTagWithCall(IRB);
+ if (StackBaseTag)
+ return StackBaseTag;
// FIXME: use addressofreturnaddress (but implement it in aarch64 backend
// first).
Module *M = IRB.GetInsertBlock()->getParent()->getParent();
@@ -763,7 +850,8 @@ Value *HWAddressSanitizer::getHwasanThreadSlotPtr(IRBuilder<> &IRB, Type *Ty) {
Function *ThreadPointerFunc =
Intrinsic::getDeclaration(M, Intrinsic::thread_pointer);
Value *SlotPtr = IRB.CreatePointerCast(
- IRB.CreateConstGEP1_32(IRB.CreateCall(ThreadPointerFunc), 0x30),
+ IRB.CreateConstGEP1_32(IRB.getInt8Ty(),
+ IRB.CreateCall(ThreadPointerFunc), 0x30),
Ty->getPointerTo(0));
return SlotPtr;
}
@@ -774,45 +862,21 @@ Value *HWAddressSanitizer::getHwasanThreadSlotPtr(IRBuilder<> &IRB, Type *Ty) {
return nullptr;
}
-// Creates a string with a description of the stack frame (set of Allocas).
-// The string is intended to be human readable.
-// The current form is: Size1 Name1; Size2 Name2; ...
-std::string
-HWAddressSanitizer::createFrameString(ArrayRef<AllocaInst *> Allocas) {
- std::ostringstream Descr;
- for (auto AI : Allocas)
- Descr << getAllocaSizeInBytes(*AI) << " " << AI->getName().str() << "; ";
- return Descr.str();
-}
-
-// Creates a global in the frame section which consists of two pointers:
-// the function PC and the frame string constant.
-void HWAddressSanitizer::createFrameGlobal(Function &F,
- const std::string &FrameString) {
- Module &M = *F.getParent();
- auto DescrGV = createPrivateGlobalForString(M, FrameString, true);
- auto PtrPairTy = StructType::get(F.getType(), DescrGV->getType());
- auto GV = new GlobalVariable(
- M, PtrPairTy, /*isConstantGlobal*/ true, GlobalVariable::PrivateLinkage,
- ConstantStruct::get(PtrPairTy, (Constant *)&F, (Constant *)DescrGV),
- "__hwasan");
- GV->setSection(getFrameSection());
- appendToCompilerUsed(M, GV);
- // Put GV into the F's Comadat so that if F is deleted GV can be deleted too.
- if (auto Comdat =
- GetOrCreateFunctionComdat(F, TargetTriple, CurModuleUniqueId))
- GV->setComdat(Comdat);
-}
+void HWAddressSanitizer::emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord) {
+ if (!Mapping.InTls) {
+ LocalDynamicShadow = getDynamicShadowNonTls(IRB);
+ return;
+ }
-Value *HWAddressSanitizer::emitPrologue(IRBuilder<> &IRB,
- bool WithFrameRecord) {
- if (!Mapping.InTls)
- return getDynamicShadowNonTls(IRB);
+ if (!WithFrameRecord && TargetTriple.isAndroid()) {
+ LocalDynamicShadow = getDynamicShadowIfunc(IRB);
+ return;
+ }
Value *SlotPtr = getHwasanThreadSlotPtr(IRB, IntptrTy);
assert(SlotPtr);
- Instruction *ThreadLong = IRB.CreateLoad(SlotPtr);
+ Instruction *ThreadLong = IRB.CreateLoad(IntptrTy, SlotPtr);
Function *F = IRB.GetInsertBlock()->getParent();
if (F->getFnAttribute("hwasan-abi").getValueAsString() == "interceptor") {
@@ -826,7 +890,7 @@ Value *HWAddressSanitizer::emitPrologue(IRBuilder<> &IRB,
// FIXME: This should call a new runtime function with a custom calling
// convention to avoid needing to spill all arguments here.
IRB.CreateCall(HwasanThreadEnterFunc);
- LoadInst *ReloadThreadLong = IRB.CreateLoad(SlotPtr);
+ LoadInst *ReloadThreadLong = IRB.CreateLoad(IntptrTy, SlotPtr);
IRB.SetInsertPoint(&*Br->getSuccessor(0)->begin());
PHINode *ThreadLongPhi = IRB.CreatePHI(IntptrTy, 2);
@@ -840,15 +904,21 @@ Value *HWAddressSanitizer::emitPrologue(IRBuilder<> &IRB,
TargetTriple.isAArch64() ? ThreadLong : untagPointer(IRB, ThreadLong);
if (WithFrameRecord) {
+ StackBaseTag = IRB.CreateAShr(ThreadLong, 3);
+
// Prepare ring buffer data.
- auto PC = IRB.CreatePtrToInt(F, IntptrTy);
+ Value *PC;
+ if (TargetTriple.getArch() == Triple::aarch64)
+ PC = readRegister(IRB, "pc");
+ else
+ PC = IRB.CreatePtrToInt(F, IntptrTy);
auto GetStackPointerFn =
Intrinsic::getDeclaration(F->getParent(), Intrinsic::frameaddress);
Value *SP = IRB.CreatePtrToInt(
IRB.CreateCall(GetStackPointerFn,
{Constant::getNullValue(IRB.getInt32Ty())}),
IntptrTy);
- // Mix SP and PC. TODO: also add the tag to the mix.
+ // Mix SP and PC.
// Assumptions:
// PC is 0x0000PPPPPPPPPPPP (48 bits are meaningful, others are zero)
// SP is 0xsssssssssssSSSS0 (4 lower bits are zero)
@@ -879,16 +949,38 @@ Value *HWAddressSanitizer::emitPrologue(IRBuilder<> &IRB,
// Get shadow base address by aligning RecordPtr up.
// Note: this is not correct if the pointer is already aligned.
// Runtime library will make sure this never happens.
- Value *ShadowBase = IRB.CreateAdd(
+ LocalDynamicShadow = IRB.CreateAdd(
IRB.CreateOr(
ThreadLongMaybeUntagged,
ConstantInt::get(IntptrTy, (1ULL << kShadowBaseAlignment) - 1)),
ConstantInt::get(IntptrTy, 1), "hwasan.shadow");
- return ShadowBase;
+ LocalDynamicShadow = IRB.CreateIntToPtr(LocalDynamicShadow, Int8PtrTy);
+}
+
+Value *HWAddressSanitizer::readRegister(IRBuilder<> &IRB, StringRef Name) {
+ Module *M = IRB.GetInsertBlock()->getParent()->getParent();
+ Function *ReadRegister =
+ Intrinsic::getDeclaration(M, Intrinsic::read_register, IntptrTy);
+ MDNode *MD = MDNode::get(*C, {MDString::get(*C, Name)});
+ Value *Args[] = {MetadataAsValue::get(*C, MD)};
+ return IRB.CreateCall(ReadRegister, Args);
+}
+
+bool HWAddressSanitizer::instrumentLandingPads(
+ SmallVectorImpl<Instruction *> &LandingPadVec) {
+ for (auto *LP : LandingPadVec) {
+ IRBuilder<> IRB(LP->getNextNode());
+ IRB.CreateCall(
+ HWAsanHandleVfork,
+ {readRegister(IRB, (TargetTriple.getArch() == Triple::x86_64) ? "rsp"
+ : "sp")});
+ }
+ return true;
}
bool HWAddressSanitizer::instrumentStack(
SmallVectorImpl<AllocaInst *> &Allocas,
+ DenseMap<AllocaInst *, std::vector<DbgDeclareInst *>> &AllocaDeclareMap,
SmallVectorImpl<Instruction *> &RetVec, Value *StackTag) {
// Ideally, we want to calculate tagged stack base pointer, and rewrite all
// alloca addresses using that. Unfortunately, offsets are not known yet
@@ -913,14 +1005,22 @@ bool HWAddressSanitizer::instrumentStack(
U.set(Replacement);
}
- tagAlloca(IRB, AI, Tag);
+ for (auto *DDI : AllocaDeclareMap.lookup(AI)) {
+ DIExpression *OldExpr = DDI->getExpression();
+ DIExpression *NewExpr = DIExpression::append(
+ OldExpr, {dwarf::DW_OP_LLVM_tag_offset, RetagMask(N)});
+ DDI->setArgOperand(2, MetadataAsValue::get(*C, NewExpr));
+ }
+
+ size_t Size = getAllocaSizeInBytes(*AI);
+ tagAlloca(IRB, AI, Tag, Size);
for (auto RI : RetVec) {
IRB.SetInsertPoint(RI);
// Re-tag alloca memory with the special UAR tag.
Value *Tag = getUARTag(IRB, StackTag);
- tagAlloca(IRB, AI, Tag);
+ tagAlloca(IRB, AI, Tag, alignTo(Size, Mapping.getAllocaAlignment()));
}
}
@@ -943,7 +1043,7 @@ bool HWAddressSanitizer::isInterestingAlloca(const AllocaInst &AI) {
!AI.isSwiftError());
}
-bool HWAddressSanitizer::runOnFunction(Function &F) {
+bool HWAddressSanitizer::sanitizeFunction(Function &F) {
if (&F == HwasanCtorFunction)
return false;
@@ -955,15 +1055,12 @@ bool HWAddressSanitizer::runOnFunction(Function &F) {
SmallVector<Instruction*, 16> ToInstrument;
SmallVector<AllocaInst*, 8> AllocasToInstrument;
SmallVector<Instruction*, 8> RetVec;
+ SmallVector<Instruction*, 8> LandingPadVec;
+ DenseMap<AllocaInst *, std::vector<DbgDeclareInst *>> AllocaDeclareMap;
for (auto &BB : F) {
for (auto &Inst : BB) {
if (ClInstrumentStack)
if (AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
- // Realign all allocas. We don't want small uninteresting allocas to
- // hide in instrumented alloca's padding.
- if (AI->getAlignment() < Mapping.getAllocaAlignment())
- AI->setAlignment(Mapping.getAllocaAlignment());
- // Instrument some of them.
if (isInterestingAlloca(*AI))
AllocasToInstrument.push_back(AI);
continue;
@@ -973,6 +1070,13 @@ bool HWAddressSanitizer::runOnFunction(Function &F) {
isa<CleanupReturnInst>(Inst))
RetVec.push_back(&Inst);
+ if (auto *DDI = dyn_cast<DbgDeclareInst>(&Inst))
+ if (auto *Alloca = dyn_cast_or_null<AllocaInst>(DDI->getAddress()))
+ AllocaDeclareMap[Alloca].push_back(DDI);
+
+ if (ClInstrumentLandingPads && isa<LandingPadInst>(Inst))
+ LandingPadVec.push_back(&Inst);
+
Value *MaybeMask = nullptr;
bool IsWrite;
unsigned Alignment;
@@ -984,33 +1088,93 @@ bool HWAddressSanitizer::runOnFunction(Function &F) {
}
}
- if (AllocasToInstrument.empty() && ToInstrument.empty())
- return false;
+ initializeCallbacks(*F.getParent());
- if (ClCreateFrameDescriptions && !AllocasToInstrument.empty())
- createFrameGlobal(F, createFrameString(AllocasToInstrument));
+ if (!LandingPadVec.empty())
+ instrumentLandingPads(LandingPadVec);
- initializeCallbacks(*F.getParent());
+ if (AllocasToInstrument.empty() && ToInstrument.empty())
+ return false;
assert(!LocalDynamicShadow);
Instruction *InsertPt = &*F.getEntryBlock().begin();
IRBuilder<> EntryIRB(InsertPt);
- LocalDynamicShadow = emitPrologue(EntryIRB,
- /*WithFrameRecord*/ ClRecordStackHistory &&
- !AllocasToInstrument.empty());
+ emitPrologue(EntryIRB,
+ /*WithFrameRecord*/ ClRecordStackHistory &&
+ !AllocasToInstrument.empty());
bool Changed = false;
if (!AllocasToInstrument.empty()) {
Value *StackTag =
ClGenerateTagsWithCalls ? nullptr : getStackBaseTag(EntryIRB);
- Changed |= instrumentStack(AllocasToInstrument, RetVec, StackTag);
+ Changed |= instrumentStack(AllocasToInstrument, AllocaDeclareMap, RetVec,
+ StackTag);
+ }
+
+ // Pad and align each of the allocas that we instrumented to stop small
+ // uninteresting allocas from hiding in instrumented alloca's padding and so
+ // that we have enough space to store real tags for short granules.
+ DenseMap<AllocaInst *, AllocaInst *> AllocaToPaddedAllocaMap;
+ for (AllocaInst *AI : AllocasToInstrument) {
+ uint64_t Size = getAllocaSizeInBytes(*AI);
+ uint64_t AlignedSize = alignTo(Size, Mapping.getAllocaAlignment());
+ AI->setAlignment(std::max(AI->getAlignment(), 16u));
+ if (Size != AlignedSize) {
+ Type *AllocatedType = AI->getAllocatedType();
+ if (AI->isArrayAllocation()) {
+ uint64_t ArraySize =
+ cast<ConstantInt>(AI->getArraySize())->getZExtValue();
+ AllocatedType = ArrayType::get(AllocatedType, ArraySize);
+ }
+ Type *TypeWithPadding = StructType::get(
+ AllocatedType, ArrayType::get(Int8Ty, AlignedSize - Size));
+ auto *NewAI = new AllocaInst(
+ TypeWithPadding, AI->getType()->getAddressSpace(), nullptr, "", AI);
+ NewAI->takeName(AI);
+ NewAI->setAlignment(AI->getAlignment());
+ NewAI->setUsedWithInAlloca(AI->isUsedWithInAlloca());
+ NewAI->setSwiftError(AI->isSwiftError());
+ NewAI->copyMetadata(*AI);
+ auto *Bitcast = new BitCastInst(NewAI, AI->getType(), "", AI);
+ AI->replaceAllUsesWith(Bitcast);
+ AllocaToPaddedAllocaMap[AI] = NewAI;
+ }
+ }
+
+ if (!AllocaToPaddedAllocaMap.empty()) {
+ for (auto &BB : F)
+ for (auto &Inst : BB)
+ if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(&Inst))
+ if (auto *AI =
+ dyn_cast_or_null<AllocaInst>(DVI->getVariableLocation()))
+ if (auto *NewAI = AllocaToPaddedAllocaMap.lookup(AI))
+ DVI->setArgOperand(
+ 0, MetadataAsValue::get(*C, LocalAsMetadata::get(NewAI)));
+ for (auto &P : AllocaToPaddedAllocaMap)
+ P.first->eraseFromParent();
+ }
+
+ // If we split the entry block, move any allocas that were originally in the
+ // entry block back into the entry block so that they aren't treated as
+ // dynamic allocas.
+ if (EntryIRB.GetInsertBlock() != &F.getEntryBlock()) {
+ InsertPt = &*F.getEntryBlock().begin();
+ for (auto II = EntryIRB.GetInsertBlock()->begin(),
+ IE = EntryIRB.GetInsertBlock()->end();
+ II != IE;) {
+ Instruction *I = &*II++;
+ if (auto *AI = dyn_cast<AllocaInst>(I))
+ if (isa<ConstantInt>(AI->getArraySize()))
+ I->moveBefore(InsertPt);
+ }
}
for (auto Inst : ToInstrument)
Changed |= instrumentMemAccess(Inst);
LocalDynamicShadow = nullptr;
+ StackBaseTag = nullptr;
return Changed;
}
diff --git a/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
index 58436c8560ad..c7371f567ff3 100644
--- a/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
+++ b/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
@@ -1,9 +1,8 @@
//===- IndirectCallPromotion.cpp - Optimizations based on value profiling -===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -239,7 +238,7 @@ ICallPromotionFunc::getPromotionCandidatesForCallSite(
LLVM_DEBUG(dbgs() << " Candidate " << I << " Count=" << Count
<< " Target_func: " << Target << "\n");
- if (ICPInvokeOnly && dyn_cast<CallInst>(Inst)) {
+ if (ICPInvokeOnly && isa<CallInst>(Inst)) {
LLVM_DEBUG(dbgs() << " Not promote: User options.\n");
ORE.emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions", Inst)
@@ -247,7 +246,7 @@ ICallPromotionFunc::getPromotionCandidatesForCallSite(
});
break;
}
- if (ICPCallOnly && dyn_cast<InvokeInst>(Inst)) {
+ if (ICPCallOnly && isa<InvokeInst>(Inst)) {
LLVM_DEBUG(dbgs() << " Not promote: User option.\n");
ORE.emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions", Inst)
@@ -311,10 +310,10 @@ Instruction *llvm::pgo::promoteIndirectCall(Instruction *Inst,
promoteCallWithIfThenElse(CallSite(Inst), DirectCallee, BranchWeights);
if (AttachProfToDirectCall) {
- SmallVector<uint32_t, 1> Weights;
- Weights.push_back(Count);
MDBuilder MDB(NewInst->getContext());
- NewInst->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
+ NewInst->setMetadata(
+ LLVMContext::MD_prof,
+ MDB.createBranchWeights({static_cast<uint32_t>(Count)}));
}
using namespace ore;
@@ -394,9 +393,7 @@ static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI,
}
bool Changed = false;
for (auto &F : M) {
- if (F.isDeclaration())
- continue;
- if (F.hasFnAttribute(Attribute::OptimizeNone))
+ if (F.isDeclaration() || F.hasOptNone())
continue;
std::unique_ptr<OptimizationRemarkEmitter> OwnedORE;
diff --git a/lib/Transforms/Instrumentation/InstrOrderFile.cpp b/lib/Transforms/Instrumentation/InstrOrderFile.cpp
new file mode 100644
index 000000000000..a2c1ddfd279e
--- /dev/null
+++ b/lib/Transforms/Instrumentation/InstrOrderFile.cpp
@@ -0,0 +1,211 @@
+//===- InstrOrderFile.cpp ---- Late IR instrumentation for order file ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
+#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/Transforms/Instrumentation/InstrOrderFile.h"
+#include <fstream>
+#include <map>
+#include <mutex>
+#include <set>
+#include <sstream>
+
+using namespace llvm;
+#define DEBUG_TYPE "instrorderfile"
+
+static cl::opt<std::string> ClOrderFileWriteMapping(
+ "orderfile-write-mapping", cl::init(""),
+ cl::desc(
+ "Dump functions and their MD5 hash to deobfuscate profile data"),
+ cl::Hidden);
+
+namespace {
+
+// We need a global bitmap to tell if a function is executed. We also
+// need a global variable to save the order of functions. We can use a
+// fixed-size buffer that saves the MD5 hash of the function. We need
+// a global variable to save the index into the buffer.
+
+std::mutex MappingMutex;
+
+struct InstrOrderFile {
+private:
+ GlobalVariable *OrderFileBuffer;
+ GlobalVariable *BufferIdx;
+ GlobalVariable *BitMap;
+ ArrayType *BufferTy;
+ ArrayType *MapTy;
+
+public:
+ InstrOrderFile() {}
+
+ void createOrderFileData(Module &M) {
+ LLVMContext &Ctx = M.getContext();
+ int NumFunctions = 0;
+ for (Function &F : M) {
+ if (!F.isDeclaration())
+ NumFunctions++;
+ }
+
+ BufferTy =
+ ArrayType::get(Type::getInt64Ty(Ctx), INSTR_ORDER_FILE_BUFFER_SIZE);
+ Type *IdxTy = Type::getInt32Ty(Ctx);
+ MapTy = ArrayType::get(Type::getInt8Ty(Ctx), NumFunctions);
+
+ // Create the global variables.
+ std::string SymbolName = INSTR_PROF_ORDERFILE_BUFFER_NAME_STR;
+ OrderFileBuffer = new GlobalVariable(M, BufferTy, false, GlobalValue::LinkOnceODRLinkage,
+ Constant::getNullValue(BufferTy), SymbolName);
+ Triple TT = Triple(M.getTargetTriple());
+ OrderFileBuffer->setSection(
+ getInstrProfSectionName(IPSK_orderfile, TT.getObjectFormat()));
+
+ std::string IndexName = INSTR_PROF_ORDERFILE_BUFFER_IDX_NAME_STR;
+ BufferIdx = new GlobalVariable(M, IdxTy, false, GlobalValue::LinkOnceODRLinkage,
+ Constant::getNullValue(IdxTy), IndexName);
+
+ std::string BitMapName = "bitmap_0";
+ BitMap = new GlobalVariable(M, MapTy, false, GlobalValue::PrivateLinkage,
+ Constant::getNullValue(MapTy), BitMapName);
+ }
+
+ // Generate the code sequence in the entry block of each function to
+ // update the buffer.
+ void generateCodeSequence(Module &M, Function &F, int FuncId) {
+ if (!ClOrderFileWriteMapping.empty()) {
+ std::lock_guard<std::mutex> LogLock(MappingMutex);
+ std::error_code EC;
+ llvm::raw_fd_ostream OS(ClOrderFileWriteMapping, EC, llvm::sys::fs::F_Append);
+ if (EC) {
+ report_fatal_error(Twine("Failed to open ") + ClOrderFileWriteMapping +
+ " to save mapping file for order file instrumentation\n");
+ } else {
+ std::stringstream stream;
+ stream << std::hex << MD5Hash(F.getName());
+ std::string singleLine = "MD5 " + stream.str() + " " +
+ std::string(F.getName()) + '\n';
+ OS << singleLine;
+ }
+ }
+
+ BasicBlock *OrigEntry = &F.getEntryBlock();
+
+ LLVMContext &Ctx = M.getContext();
+ IntegerType *Int32Ty = Type::getInt32Ty(Ctx);
+ IntegerType *Int8Ty = Type::getInt8Ty(Ctx);
+
+ // Create a new entry block for instrumentation. We will check the bitmap
+ // in this basic block.
+ BasicBlock *NewEntry =
+ BasicBlock::Create(M.getContext(), "order_file_entry", &F, OrigEntry);
+ IRBuilder<> entryB(NewEntry);
+ // Create a basic block for updating the circular buffer.
+ BasicBlock *UpdateOrderFileBB =
+ BasicBlock::Create(M.getContext(), "order_file_set", &F, OrigEntry);
+ IRBuilder<> updateB(UpdateOrderFileBB);
+
+ // Check the bitmap, if it is already 1, do nothing.
+ // Otherwise, set the bit, grab the index, update the buffer.
+ Value *IdxFlags[] = {ConstantInt::get(Int32Ty, 0),
+ ConstantInt::get(Int32Ty, FuncId)};
+ Value *MapAddr = entryB.CreateGEP(MapTy, BitMap, IdxFlags, "");
+ LoadInst *loadBitMap = entryB.CreateLoad(Int8Ty, MapAddr, "");
+ entryB.CreateStore(ConstantInt::get(Int8Ty, 1), MapAddr);
+ Value *IsNotExecuted =
+ entryB.CreateICmpEQ(loadBitMap, ConstantInt::get(Int8Ty, 0));
+ entryB.CreateCondBr(IsNotExecuted, UpdateOrderFileBB, OrigEntry);
+
+ // Fill up UpdateOrderFileBB: grab the index, update the buffer!
+ Value *IdxVal = updateB.CreateAtomicRMW(
+ AtomicRMWInst::Add, BufferIdx, ConstantInt::get(Int32Ty, 1),
+ AtomicOrdering::SequentiallyConsistent);
+ // We need to wrap around the index to fit it inside the buffer.
+ Value *WrappedIdx = updateB.CreateAnd(
+ IdxVal, ConstantInt::get(Int32Ty, INSTR_ORDER_FILE_BUFFER_MASK));
+ Value *BufferGEPIdx[] = {ConstantInt::get(Int32Ty, 0), WrappedIdx};
+ Value *BufferAddr =
+ updateB.CreateGEP(BufferTy, OrderFileBuffer, BufferGEPIdx, "");
+ updateB.CreateStore(ConstantInt::get(Type::getInt64Ty(Ctx), MD5Hash(F.getName())),
+ BufferAddr);
+ updateB.CreateBr(OrigEntry);
+ }
+
+ bool run(Module &M) {
+ createOrderFileData(M);
+
+ int FuncId = 0;
+ for (Function &F : M) {
+ if (F.isDeclaration())
+ continue;
+ generateCodeSequence(M, F, FuncId);
+ ++FuncId;
+ }
+
+ return true;
+ }
+
+}; // End of InstrOrderFile struct
+
+class InstrOrderFileLegacyPass : public ModulePass {
+public:
+ static char ID;
+
+ InstrOrderFileLegacyPass() : ModulePass(ID) {
+ initializeInstrOrderFileLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ bool runOnModule(Module &M) override;
+};
+
+} // End anonymous namespace
+
+bool InstrOrderFileLegacyPass::runOnModule(Module &M) {
+ if (skipModule(M))
+ return false;
+
+ return InstrOrderFile().run(M);
+}
+
+PreservedAnalyses
+InstrOrderFilePass::run(Module &M, ModuleAnalysisManager &AM) {
+ if (InstrOrderFile().run(M))
+ return PreservedAnalyses::none();
+ return PreservedAnalyses::all();
+}
+
+INITIALIZE_PASS_BEGIN(InstrOrderFileLegacyPass, "instrorderfile",
+ "Instrumentation for Order File", false, false)
+INITIALIZE_PASS_END(InstrOrderFileLegacyPass, "instrorderfile",
+ "Instrumentation for Order File", false, false)
+
+char InstrOrderFileLegacyPass::ID = 0;
+
+ModulePass *llvm::createInstrOrderFilePass() {
+ return new InstrOrderFileLegacyPass();
+}
diff --git a/lib/Transforms/Instrumentation/InstrProfiling.cpp b/lib/Transforms/Instrumentation/InstrProfiling.cpp
index 15b94388cbe5..63c2b8078967 100644
--- a/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -1,9 +1,8 @@
//===-- InstrProfiling.cpp - Frontend instrumentation based profiling -----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -19,6 +18,8 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/Attributes.h"
@@ -148,8 +149,8 @@ public:
static char ID;
InstrProfilingLegacyPass() : ModulePass(ID) {}
- InstrProfilingLegacyPass(const InstrProfOptions &Options)
- : ModulePass(ID), InstrProf(Options) {}
+ InstrProfilingLegacyPass(const InstrProfOptions &Options, bool IsCS = false)
+ : ModulePass(ID), InstrProf(Options, IsCS) {}
StringRef getPassName() const override {
return "Frontend instrumentation-based coverage lowering";
@@ -187,7 +188,7 @@ public:
SSA.AddAvailableValue(PH, Init);
}
- void doExtraRewritesBeforeFinalDeletion() const override {
+ void doExtraRewritesBeforeFinalDeletion() override {
for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
BasicBlock *ExitBlock = ExitBlocks[i];
Instruction *InsertPos = InsertPts[i];
@@ -196,6 +197,7 @@ public:
// block.
Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
Value *Addr = cast<StoreInst>(Store)->getPointerOperand();
+ Type *Ty = LiveInValue->getType();
IRBuilder<> Builder(InsertPos);
if (AtomicCounterUpdatePromoted)
// automic update currently can only be promoted across the current
@@ -203,7 +205,7 @@ public:
Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, LiveInValue,
AtomicOrdering::SequentiallyConsistent);
else {
- LoadInst *OldVal = Builder.CreateLoad(Addr, "pgocount.promoted");
+ LoadInst *OldVal = Builder.CreateLoad(Ty, Addr, "pgocount.promoted");
auto *NewVal = Builder.CreateAdd(OldVal, LiveInValue);
auto *NewStore = Builder.CreateStore(NewVal, Addr);
@@ -232,9 +234,9 @@ class PGOCounterPromoter {
public:
PGOCounterPromoter(
DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCands,
- Loop &CurLoop, LoopInfo &LI)
+ Loop &CurLoop, LoopInfo &LI, BlockFrequencyInfo *BFI)
: LoopToCandidates(LoopToCands), ExitBlocks(), InsertPts(), L(CurLoop),
- LI(LI) {
+ LI(LI), BFI(BFI) {
SmallVector<BasicBlock *, 8> LoopExitBlocks;
SmallPtrSet<BasicBlock *, 8> BlockSet;
@@ -263,6 +265,20 @@ public:
SSAUpdater SSA(&NewPHIs);
Value *InitVal = ConstantInt::get(Cand.first->getType(), 0);
+ // If BFI is set, we will use it to guide the promotions.
+ if (BFI) {
+ auto *BB = Cand.first->getParent();
+ auto InstrCount = BFI->getBlockProfileCount(BB);
+ if (!InstrCount)
+ continue;
+ auto PreheaderCount = BFI->getBlockProfileCount(L.getLoopPreheader());
+ // If the average loop trip count is not greater than 1.5, we skip
+ // promotion.
+ if (PreheaderCount &&
+ (PreheaderCount.getValue() * 3) >= (InstrCount.getValue() * 2))
+ continue;
+ }
+
PGOCounterPromoterHelper Promoter(Cand.first, Cand.second, SSA, InitVal,
L.getLoopPreheader(), ExitBlocks,
InsertPts, LoopToCandidates, LI);
@@ -312,6 +328,11 @@ private:
SmallVector<BasicBlock *, 8> ExitingBlocks;
LP->getExitingBlocks(ExitingBlocks);
+
+ // If BFI is set, we do more aggressive promotions based on BFI.
+ if (BFI)
+ return (unsigned)-1;
+
// Not considierered speculative.
if (ExitingBlocks.size() == 1)
return MaxNumOfPromotionsPerLoop;
@@ -343,6 +364,7 @@ private:
SmallVector<Instruction *, 8> InsertPts;
Loop &L;
LoopInfo &LI;
+ BlockFrequencyInfo *BFI;
};
} // end anonymous namespace
@@ -365,8 +387,9 @@ INITIALIZE_PASS_END(
"Frontend instrumentation-based coverage lowering.", false, false)
ModulePass *
-llvm::createInstrProfilingLegacyPass(const InstrProfOptions &Options) {
- return new InstrProfilingLegacyPass(Options);
+llvm::createInstrProfilingLegacyPass(const InstrProfOptions &Options,
+ bool IsCS) {
+ return new InstrProfilingLegacyPass(Options, IsCS);
}
static InstrProfIncrementInst *castToIncrementInst(Instruction *Instr) {
@@ -415,6 +438,13 @@ void InstrProfiling::promoteCounterLoadStores(Function *F) {
LoopInfo LI(DT);
DenseMap<Loop *, SmallVector<LoadStorePair, 8>> LoopPromotionCandidates;
+ std::unique_ptr<BlockFrequencyInfo> BFI;
+ if (Options.UseBFIInPromotion) {
+ std::unique_ptr<BranchProbabilityInfo> BPI;
+ BPI.reset(new BranchProbabilityInfo(*F, LI, TLI));
+ BFI.reset(new BlockFrequencyInfo(*F, *BPI, LI));
+ }
+
for (const auto &LoadStore : PromotionCandidates) {
auto *CounterLoad = LoadStore.first;
auto *CounterStore = LoadStore.second;
@@ -430,7 +460,7 @@ void InstrProfiling::promoteCounterLoadStores(Function *F) {
// Do a post-order traversal of the loops so that counter updates can be
// iteratively hoisted outside the loop nest.
for (auto *Loop : llvm::reverse(Loops)) {
- PGOCounterPromoter Promoter(LoopPromotionCandidates, *Loop, LI);
+ PGOCounterPromoter Promoter(LoopPromotionCandidates, *Loop, LI, BFI.get());
Promoter.run(&TotalCountersPromoted);
}
}
@@ -509,13 +539,16 @@ bool InstrProfiling::run(Module &M, const TargetLibraryInfo &TLI) {
return true;
}
-static Constant *getOrInsertValueProfilingCall(Module &M,
- const TargetLibraryInfo &TLI,
- bool IsRange = false) {
+static FunctionCallee
+getOrInsertValueProfilingCall(Module &M, const TargetLibraryInfo &TLI,
+ bool IsRange = false) {
LLVMContext &Ctx = M.getContext();
auto *ReturnTy = Type::getVoidTy(M.getContext());
- Constant *Res;
+ AttributeList AL;
+ if (auto AK = TLI.getExtAttrForI32Param(false))
+ AL = AL.addParamAttribute(M.getContext(), 2, AK);
+
if (!IsRange) {
Type *ParamTypes[] = {
#define VALUE_PROF_FUNC_PARAM(ParamType, ParamName, ParamLLVMType) ParamLLVMType
@@ -523,8 +556,8 @@ static Constant *getOrInsertValueProfilingCall(Module &M,
};
auto *ValueProfilingCallTy =
FunctionType::get(ReturnTy, makeArrayRef(ParamTypes), false);
- Res = M.getOrInsertFunction(getInstrProfValueProfFuncName(),
- ValueProfilingCallTy);
+ return M.getOrInsertFunction(getInstrProfValueProfFuncName(),
+ ValueProfilingCallTy, AL);
} else {
Type *RangeParamTypes[] = {
#define VALUE_RANGE_PROF 1
@@ -534,15 +567,9 @@ static Constant *getOrInsertValueProfilingCall(Module &M,
};
auto *ValueRangeProfilingCallTy =
FunctionType::get(ReturnTy, makeArrayRef(RangeParamTypes), false);
- Res = M.getOrInsertFunction(getInstrProfValueRangeProfFuncName(),
- ValueRangeProfilingCallTy);
+ return M.getOrInsertFunction(getInstrProfValueRangeProfFuncName(),
+ ValueRangeProfilingCallTy, AL);
}
-
- if (Function *FunRes = dyn_cast<Function>(Res)) {
- if (auto AK = TLI.getExtAttrForI32Param(false))
- FunRes->addParamAttr(2, AK);
- }
- return Res;
}
void InstrProfiling::computeNumValueSiteCounts(InstrProfValueProfileInst *Ind) {
@@ -601,13 +628,15 @@ void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) {
IRBuilder<> Builder(Inc);
uint64_t Index = Inc->getIndex()->getZExtValue();
- Value *Addr = Builder.CreateConstInBoundsGEP2_64(Counters, 0, Index);
+ Value *Addr = Builder.CreateConstInBoundsGEP2_64(Counters->getValueType(),
+ Counters, 0, Index);
if (Options.Atomic || AtomicCounterUpdateAll) {
Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, Inc->getStep(),
AtomicOrdering::Monotonic);
} else {
- Value *Load = Builder.CreateLoad(Addr, "pgocount");
+ Value *IncStep = Inc->getStep();
+ Value *Load = Builder.CreateLoad(IncStep->getType(), Addr, "pgocount");
auto *Count = Builder.CreateAdd(Load, Inc->getStep());
auto *Store = Builder.CreateStore(Count, Addr);
if (isCounterPromotionEnabled())
@@ -678,32 +707,14 @@ static inline bool shouldRecordFunctionAddr(Function *F) {
return F->hasAddressTaken() || F->hasLinkOnceLinkage();
}
-static inline Comdat *getOrCreateProfileComdat(Module &M, Function &F,
- InstrProfIncrementInst *Inc) {
- if (!needsComdatForCounter(F, M))
- return nullptr;
-
- // COFF format requires a COMDAT section to have a key symbol with the same
- // name. The linker targeting COFF also requires that the COMDAT
- // a section is associated to must precede the associating section. For this
- // reason, we must choose the counter var's name as the name of the comdat.
- StringRef ComdatPrefix = (Triple(M.getTargetTriple()).isOSBinFormatCOFF()
- ? getInstrProfCountersVarPrefix()
- : getInstrProfComdatPrefix());
- return M.getOrInsertComdat(StringRef(getVarName(Inc, ComdatPrefix)));
-}
-
-static bool needsRuntimeRegistrationOfSectionRange(const Module &M) {
+static bool needsRuntimeRegistrationOfSectionRange(const Triple &TT) {
// Don't do this for Darwin. compiler-rt uses linker magic.
- if (Triple(M.getTargetTriple()).isOSDarwin())
+ if (TT.isOSDarwin())
return false;
-
// Use linker script magic to get data/cnts/name start/end.
- if (Triple(M.getTargetTriple()).isOSLinux() ||
- Triple(M.getTargetTriple()).isOSFreeBSD() ||
- Triple(M.getTargetTriple()).isOSNetBSD() ||
- Triple(M.getTargetTriple()).isOSFuchsia() ||
- Triple(M.getTargetTriple()).isPS4CPU())
+ if (TT.isOSLinux() || TT.isOSFreeBSD() || TT.isOSNetBSD() ||
+ TT.isOSSolaris() || TT.isOSFuchsia() || TT.isPS4CPU() ||
+ TT.isOSWindows())
return false;
return true;
@@ -720,13 +731,37 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
PD = It->second;
}
- // Move the name variable to the right section. Place them in a COMDAT group
- // if the associated function is a COMDAT. This will make sure that
- // only one copy of counters of the COMDAT function will be emitted after
- // linking.
+ // Match the linkage and visibility of the name global, except on COFF, where
+ // the linkage must be local and consequentially the visibility must be
+ // default.
Function *Fn = Inc->getParent()->getParent();
- Comdat *ProfileVarsComdat = nullptr;
- ProfileVarsComdat = getOrCreateProfileComdat(*M, *Fn, Inc);
+ GlobalValue::LinkageTypes Linkage = NamePtr->getLinkage();
+ GlobalValue::VisibilityTypes Visibility = NamePtr->getVisibility();
+ if (TT.isOSBinFormatCOFF()) {
+ Linkage = GlobalValue::InternalLinkage;
+ Visibility = GlobalValue::DefaultVisibility;
+ }
+
+ // Move the name variable to the right section. Place them in a COMDAT group
+ // if the associated function is a COMDAT. This will make sure that only one
+ // copy of counters of the COMDAT function will be emitted after linking. Keep
+ // in mind that this pass may run before the inliner, so we need to create a
+ // new comdat group for the counters and profiling data. If we use the comdat
+ // of the parent function, that will result in relocations against discarded
+ // sections.
+ Comdat *Cmdt = nullptr;
+ GlobalValue::LinkageTypes CounterLinkage = Linkage;
+ if (needsComdatForCounter(*Fn, *M)) {
+ StringRef CmdtPrefix = getInstrProfComdatPrefix();
+ if (TT.isOSBinFormatCOFF()) {
+ // For COFF, the comdat group name must be the name of a symbol in the
+ // group. Use the counter variable name, and upgrade its linkage to
+ // something externally visible, like linkonce_odr.
+ CmdtPrefix = getInstrProfCountersVarPrefix();
+ CounterLinkage = GlobalValue::LinkOnceODRLinkage;
+ }
+ Cmdt = M->getOrInsertComdat(getVarName(Inc, CmdtPrefix));
+ }
uint64_t NumCounters = Inc->getNumCounters()->getZExtValue();
LLVMContext &Ctx = M->getContext();
@@ -734,20 +769,21 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
// Create the counters variable.
auto *CounterPtr =
- new GlobalVariable(*M, CounterTy, false, NamePtr->getLinkage(),
+ new GlobalVariable(*M, CounterTy, false, Linkage,
Constant::getNullValue(CounterTy),
getVarName(Inc, getInstrProfCountersVarPrefix()));
- CounterPtr->setVisibility(NamePtr->getVisibility());
+ CounterPtr->setVisibility(Visibility);
CounterPtr->setSection(
getInstrProfSectionName(IPSK_cnts, TT.getObjectFormat()));
CounterPtr->setAlignment(8);
- CounterPtr->setComdat(ProfileVarsComdat);
+ CounterPtr->setComdat(Cmdt);
+ CounterPtr->setLinkage(CounterLinkage);
auto *Int8PtrTy = Type::getInt8PtrTy(Ctx);
// Allocate statically the array of pointers to value profile nodes for
// the current function.
Constant *ValuesPtrExpr = ConstantPointerNull::get(Int8PtrTy);
- if (ValueProfileStaticAlloc && !needsRuntimeRegistrationOfSectionRange(*M)) {
+ if (ValueProfileStaticAlloc && !needsRuntimeRegistrationOfSectionRange(TT)) {
uint64_t NS = 0;
for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
NS += PD.NumValueSites[Kind];
@@ -755,14 +791,14 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
ArrayType *ValuesTy = ArrayType::get(Type::getInt64Ty(Ctx), NS);
auto *ValuesVar =
- new GlobalVariable(*M, ValuesTy, false, NamePtr->getLinkage(),
+ new GlobalVariable(*M, ValuesTy, false, Linkage,
Constant::getNullValue(ValuesTy),
getVarName(Inc, getInstrProfValuesVarPrefix()));
- ValuesVar->setVisibility(NamePtr->getVisibility());
+ ValuesVar->setVisibility(Visibility);
ValuesVar->setSection(
getInstrProfSectionName(IPSK_vals, TT.getObjectFormat()));
ValuesVar->setAlignment(8);
- ValuesVar->setComdat(ProfileVarsComdat);
+ ValuesVar->setComdat(Cmdt);
ValuesPtrExpr =
ConstantExpr::getBitCast(ValuesVar, Type::getInt8PtrTy(Ctx));
}
@@ -789,13 +825,13 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
#define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Init,
#include "llvm/ProfileData/InstrProfData.inc"
};
- auto *Data = new GlobalVariable(*M, DataTy, false, NamePtr->getLinkage(),
+ auto *Data = new GlobalVariable(*M, DataTy, false, Linkage,
ConstantStruct::get(DataTy, DataVals),
getVarName(Inc, getInstrProfDataVarPrefix()));
- Data->setVisibility(NamePtr->getVisibility());
+ Data->setVisibility(Visibility);
Data->setSection(getInstrProfSectionName(IPSK_data, TT.getObjectFormat()));
Data->setAlignment(INSTR_PROF_DATA_ALIGNMENT);
- Data->setComdat(ProfileVarsComdat);
+ Data->setComdat(Cmdt);
PD.RegionCounters = CounterPtr;
PD.DataVar = Data;
@@ -820,7 +856,7 @@ void InstrProfiling::emitVNodes() {
// For now only support this on platforms that do
// not require runtime registration to discover
// named section start/end.
- if (needsRuntimeRegistrationOfSectionRange(*M))
+ if (needsRuntimeRegistrationOfSectionRange(TT))
return;
size_t TotalNS = 0;
@@ -881,6 +917,10 @@ void InstrProfiling::emitNameData() {
NamesSize = CompressedNameStr.size();
NamesVar->setSection(
getInstrProfSectionName(IPSK_name, TT.getObjectFormat()));
+ // On COFF, it's important to reduce the alignment down to 1 to prevent the
+ // linker from inserting padding before the start of the names section or
+ // between names entries.
+ NamesVar->setAlignment(1);
UsedVars.push_back(NamesVar);
for (auto *NamePtr : ReferencedNames)
@@ -888,7 +928,7 @@ void InstrProfiling::emitNameData() {
}
void InstrProfiling::emitRegistration() {
- if (!needsRuntimeRegistrationOfSectionRange(*M))
+ if (!needsRuntimeRegistrationOfSectionRange(TT))
return;
// Construct the function.
@@ -929,7 +969,7 @@ void InstrProfiling::emitRegistration() {
bool InstrProfiling::emitRuntimeHook() {
// We expect the linker to be invoked with -u<hook_var> flag for linux,
// for which case there is no need to emit the user function.
- if (Triple(M->getTargetTriple()).isOSLinux())
+ if (TT.isOSLinux())
return false;
// If the module's provided its own runtime, we don't need to do anything.
@@ -950,11 +990,11 @@ bool InstrProfiling::emitRuntimeHook() {
if (Options.NoRedZone)
User->addFnAttr(Attribute::NoRedZone);
User->setVisibility(GlobalValue::HiddenVisibility);
- if (Triple(M->getTargetTriple()).supportsCOMDAT())
+ if (TT.supportsCOMDAT())
User->setComdat(M->getOrInsertComdat(User->getName()));
IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", User));
- auto *Load = IRB.CreateLoad(Var);
+ auto *Load = IRB.CreateLoad(Int32Ty, Var);
IRB.CreateRet(Load);
// Mark the user variable as used so that it isn't stripped out.
@@ -968,23 +1008,13 @@ void InstrProfiling::emitUses() {
}
void InstrProfiling::emitInitialization() {
- StringRef InstrProfileOutput = Options.InstrProfileOutput;
-
- if (!InstrProfileOutput.empty()) {
- // Create variable for profile name.
- Constant *ProfileNameConst =
- ConstantDataArray::getString(M->getContext(), InstrProfileOutput, true);
- GlobalVariable *ProfileNameVar = new GlobalVariable(
- *M, ProfileNameConst->getType(), true, GlobalValue::WeakAnyLinkage,
- ProfileNameConst, INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_NAME_VAR));
- if (TT.supportsCOMDAT()) {
- ProfileNameVar->setLinkage(GlobalValue::ExternalLinkage);
- ProfileNameVar->setComdat(M->getOrInsertComdat(
- StringRef(INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_NAME_VAR))));
- }
- }
-
- Constant *RegisterF = M->getFunction(getInstrProfRegFuncsName());
+ // Create ProfileFileName variable. Don't don't this for the
+ // context-sensitive instrumentation lowering: This lowering is after
+ // LTO/ThinLTO linking. Pass PGOInstrumentationGenCreateVar should
+ // have already create the variable before LTO/ThinLTO linking.
+ if (!IsCS)
+ createProfileFileNameVar(*M, Options.InstrProfileOutput);
+ Function *RegisterF = M->getFunction(getInstrProfRegFuncsName());
if (!RegisterF)
return;
@@ -1000,8 +1030,7 @@ void InstrProfiling::emitInitialization() {
// Add the basic block and the necessary calls.
IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", F));
- if (RegisterF)
- IRB.CreateCall(RegisterF, {});
+ IRB.CreateCall(RegisterF, {});
IRB.CreateRetVoid();
appendToGlobalCtors(*M, F, 0);
diff --git a/lib/Transforms/Instrumentation/Instrumentation.cpp b/lib/Transforms/Instrumentation/Instrumentation.cpp
index c3e323613c70..f56a1bd91b89 100644
--- a/lib/Transforms/Instrumentation/Instrumentation.cpp
+++ b/lib/Transforms/Instrumentation/Instrumentation.cpp
@@ -1,9 +1,8 @@
//===-- Instrumentation.cpp - TransformUtils Infrastructure ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -25,10 +24,12 @@ using namespace llvm;
/// Moves I before IP. Returns new insert point.
static BasicBlock::iterator moveBeforeInsertPoint(BasicBlock::iterator I, BasicBlock::iterator IP) {
// If I is IP, move the insert point down.
- if (I == IP)
- return ++IP;
- // Otherwise, move I before IP and return IP.
- I->moveBefore(&*IP);
+ if (I == IP) {
+ ++IP;
+ } else {
+ // Otherwise, move I before IP and return IP.
+ I->moveBefore(&*IP);
+ }
return IP;
}
@@ -101,8 +102,8 @@ Comdat *llvm::GetOrCreateFunctionComdat(Function &F, Triple &T,
/// initializeInstrumentation - Initialize all passes in the TransformUtils
/// library.
void llvm::initializeInstrumentation(PassRegistry &Registry) {
- initializeAddressSanitizerPass(Registry);
- initializeAddressSanitizerModulePass(Registry);
+ initializeAddressSanitizerLegacyPassPass(Registry);
+ initializeModuleAddressSanitizerLegacyPassPass(Registry);
initializeBoundsCheckingLegacyPassPass(Registry);
initializeControlHeightReductionLegacyPassPass(Registry);
initializeGCOVProfilerLegacyPassPass(Registry);
@@ -110,13 +111,13 @@ void llvm::initializeInstrumentation(PassRegistry &Registry) {
initializePGOInstrumentationUseLegacyPassPass(Registry);
initializePGOIndirectCallPromotionLegacyPassPass(Registry);
initializePGOMemOPSizeOptLegacyPassPass(Registry);
+ initializeInstrOrderFileLegacyPassPass(Registry);
initializeInstrProfilingLegacyPassPass(Registry);
initializeMemorySanitizerLegacyPassPass(Registry);
- initializeHWAddressSanitizerPass(Registry);
+ initializeHWAddressSanitizerLegacyPassPass(Registry);
initializeThreadSanitizerLegacyPassPass(Registry);
initializeSanitizerCoverageModulePass(Registry);
initializeDataFlowSanitizerPass(Registry);
- initializeEfficiencySanitizerPass(Registry);
}
/// LLVMInitializeInstrumentation - C binding for
diff --git a/lib/Transforms/Instrumentation/MaximumSpanningTree.h b/lib/Transforms/Instrumentation/MaximumSpanningTree.h
index 4eb758c69c58..892a6a26da91 100644
--- a/lib/Transforms/Instrumentation/MaximumSpanningTree.h
+++ b/lib/Transforms/Instrumentation/MaximumSpanningTree.h
@@ -1,9 +1,8 @@
//===- llvm/Analysis/MaximumSpanningTree.h - Interface ----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -68,8 +67,7 @@ namespace llvm {
/// MaximumSpanningTree() - Takes a vector of weighted edges and returns a
/// spanning tree.
MaximumSpanningTree(EdgeWeights &EdgeVector) {
-
- std::stable_sort(EdgeVector.begin(), EdgeVector.end(), EdgeWeightCompare());
+ llvm::stable_sort(EdgeVector, EdgeWeightCompare());
// Create spanning tree, Forest contains a special data structure
// that makes checking if two nodes are already in a common (sub-)tree
diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index e6573af2077d..b25cbed1bb02 100644
--- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -1,9 +1,8 @@
//===- MemorySanitizer.cpp - detector of uninitialized reads --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -144,6 +143,7 @@
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
@@ -248,6 +248,13 @@ static cl::opt<bool> ClHandleICmpExact("msan-handle-icmp-exact",
cl::desc("exact handling of relational integer ICmp"),
cl::Hidden, cl::init(false));
+static cl::opt<bool> ClHandleLifetimeIntrinsics(
+ "msan-handle-lifetime-intrinsics",
+ cl::desc(
+ "when possible, poison scoped variables at the beginning of the scope "
+ "(slower, but more precise)"),
+ cl::Hidden, cl::init(true));
+
// When compiling the Linux kernel, we sometimes see false positives related to
// MSan being unable to understand that inline assembly calls may initialize
// local variables.
@@ -305,22 +312,23 @@ static cl::opt<bool> ClWithComdat("msan-with-comdat",
// These options allow to specify custom memory map parameters
// See MemoryMapParams for details.
-static cl::opt<unsigned long long> ClAndMask("msan-and-mask",
- cl::desc("Define custom MSan AndMask"),
- cl::Hidden, cl::init(0));
+static cl::opt<uint64_t> ClAndMask("msan-and-mask",
+ cl::desc("Define custom MSan AndMask"),
+ cl::Hidden, cl::init(0));
-static cl::opt<unsigned long long> ClXorMask("msan-xor-mask",
- cl::desc("Define custom MSan XorMask"),
- cl::Hidden, cl::init(0));
+static cl::opt<uint64_t> ClXorMask("msan-xor-mask",
+ cl::desc("Define custom MSan XorMask"),
+ cl::Hidden, cl::init(0));
-static cl::opt<unsigned long long> ClShadowBase("msan-shadow-base",
- cl::desc("Define custom MSan ShadowBase"),
- cl::Hidden, cl::init(0));
+static cl::opt<uint64_t> ClShadowBase("msan-shadow-base",
+ cl::desc("Define custom MSan ShadowBase"),
+ cl::Hidden, cl::init(0));
-static cl::opt<unsigned long long> ClOriginBase("msan-origin-base",
- cl::desc("Define custom MSan OriginBase"),
- cl::Hidden, cl::init(0));
+static cl::opt<uint64_t> ClOriginBase("msan-origin-base",
+ cl::desc("Define custom MSan OriginBase"),
+ cl::Hidden, cl::init(0));
+static const char *const kMsanModuleCtorName = "msan.module_ctor";
static const char *const kMsanInitName = "__msan_init";
namespace {
@@ -454,17 +462,16 @@ namespace {
/// the module.
class MemorySanitizer {
public:
- MemorySanitizer(Module &M, int TrackOrigins = 0, bool Recover = false,
- bool EnableKmsan = false) {
+ MemorySanitizer(Module &M, MemorySanitizerOptions Options) {
this->CompileKernel =
- ClEnableKmsan.getNumOccurrences() > 0 ? ClEnableKmsan : EnableKmsan;
+ ClEnableKmsan.getNumOccurrences() > 0 ? ClEnableKmsan : Options.Kernel;
if (ClTrackOrigins.getNumOccurrences() > 0)
this->TrackOrigins = ClTrackOrigins;
else
- this->TrackOrigins = this->CompileKernel ? 2 : TrackOrigins;
+ this->TrackOrigins = this->CompileKernel ? 2 : Options.TrackOrigins;
this->Recover = ClKeepGoing.getNumOccurrences() > 0
? ClKeepGoing
- : (this->CompileKernel | Recover);
+ : (this->CompileKernel | Options.Recover);
initializeModule(M);
}
@@ -536,41 +543,42 @@ private:
bool CallbacksInitialized = false;
/// The run-time callback to print a warning.
- Value *WarningFn;
+ FunctionCallee WarningFn;
// These arrays are indexed by log2(AccessSize).
- Value *MaybeWarningFn[kNumberOfAccessSizes];
- Value *MaybeStoreOriginFn[kNumberOfAccessSizes];
+ FunctionCallee MaybeWarningFn[kNumberOfAccessSizes];
+ FunctionCallee MaybeStoreOriginFn[kNumberOfAccessSizes];
/// Run-time helper that generates a new origin value for a stack
/// allocation.
- Value *MsanSetAllocaOrigin4Fn;
+ FunctionCallee MsanSetAllocaOrigin4Fn;
/// Run-time helper that poisons stack on function entry.
- Value *MsanPoisonStackFn;
+ FunctionCallee MsanPoisonStackFn;
/// Run-time helper that records a store (or any event) of an
/// uninitialized value and returns an updated origin id encoding this info.
- Value *MsanChainOriginFn;
+ FunctionCallee MsanChainOriginFn;
/// MSan runtime replacements for memmove, memcpy and memset.
- Value *MemmoveFn, *MemcpyFn, *MemsetFn;
+ FunctionCallee MemmoveFn, MemcpyFn, MemsetFn;
/// KMSAN callback for task-local function argument shadow.
- Value *MsanGetContextStateFn;
+ StructType *MsanContextStateTy;
+ FunctionCallee MsanGetContextStateFn;
/// Functions for poisoning/unpoisoning local variables
- Value *MsanPoisonAllocaFn, *MsanUnpoisonAllocaFn;
+ FunctionCallee MsanPoisonAllocaFn, MsanUnpoisonAllocaFn;
/// Each of the MsanMetadataPtrXxx functions returns a pair of shadow/origin
/// pointers.
- Value *MsanMetadataPtrForLoadN, *MsanMetadataPtrForStoreN;
- Value *MsanMetadataPtrForLoad_1_8[4];
- Value *MsanMetadataPtrForStore_1_8[4];
- Value *MsanInstrumentAsmStoreFn;
+ FunctionCallee MsanMetadataPtrForLoadN, MsanMetadataPtrForStoreN;
+ FunctionCallee MsanMetadataPtrForLoad_1_8[4];
+ FunctionCallee MsanMetadataPtrForStore_1_8[4];
+ FunctionCallee MsanInstrumentAsmStoreFn;
/// Helper to choose between different MsanMetadataPtrXxx().
- Value *getKmsanShadowOriginAccessFn(bool isStore, int size);
+ FunctionCallee getKmsanShadowOriginAccessFn(bool isStore, int size);
/// Memory map parameters used in application-to-shadow calculation.
const MemoryMapParams *MapParams;
@@ -586,6 +594,8 @@ private:
/// An empty volatile inline asm that prevents callback merge.
InlineAsm *EmptyAsm;
+
+ Function *MsanCtorFunction;
};
/// A legacy function pass for msan instrumentation.
@@ -595,10 +605,8 @@ struct MemorySanitizerLegacyPass : public FunctionPass {
// Pass identification, replacement for typeid.
static char ID;
- MemorySanitizerLegacyPass(int TrackOrigins = 0, bool Recover = false,
- bool EnableKmsan = false)
- : FunctionPass(ID), TrackOrigins(TrackOrigins), Recover(Recover),
- EnableKmsan(EnableKmsan) {}
+ MemorySanitizerLegacyPass(MemorySanitizerOptions Options = {})
+ : FunctionPass(ID), Options(Options) {}
StringRef getPassName() const override { return "MemorySanitizerLegacyPass"; }
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -612,16 +620,14 @@ struct MemorySanitizerLegacyPass : public FunctionPass {
bool doInitialization(Module &M) override;
Optional<MemorySanitizer> MSan;
- int TrackOrigins;
- bool Recover;
- bool EnableKmsan;
+ MemorySanitizerOptions Options;
};
} // end anonymous namespace
PreservedAnalyses MemorySanitizerPass::run(Function &F,
FunctionAnalysisManager &FAM) {
- MemorySanitizer Msan(*F.getParent(), TrackOrigins, Recover, EnableKmsan);
+ MemorySanitizer Msan(*F.getParent(), Options);
if (Msan.sanitizeFunction(F, FAM.getResult<TargetLibraryAnalysis>(F)))
return PreservedAnalyses::none();
return PreservedAnalyses::all();
@@ -637,10 +643,9 @@ INITIALIZE_PASS_END(MemorySanitizerLegacyPass, "msan",
"MemorySanitizer: detects uninitialized reads.", false,
false)
-FunctionPass *llvm::createMemorySanitizerLegacyPassPass(int TrackOrigins,
- bool Recover,
- bool CompileKernel) {
- return new MemorySanitizerLegacyPass(TrackOrigins, Recover, CompileKernel);
+FunctionPass *
+llvm::createMemorySanitizerLegacyPassPass(MemorySanitizerOptions Options) {
+ return new MemorySanitizerLegacyPass(Options);
}
/// Create a non-const global initialized with the given string.
@@ -675,18 +680,15 @@ void MemorySanitizer::createKernelApi(Module &M) {
IRB.getInt32Ty());
// Requests the per-task context state (kmsan_context_state*) from the
// runtime library.
+ MsanContextStateTy = StructType::get(
+ ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8),
+ ArrayType::get(IRB.getInt64Ty(), kRetvalTLSSize / 8),
+ ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8),
+ ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8), /* va_arg_origin */
+ IRB.getInt64Ty(), ArrayType::get(OriginTy, kParamTLSSize / 4), OriginTy,
+ OriginTy);
MsanGetContextStateFn = M.getOrInsertFunction(
- "__msan_get_context_state",
- PointerType::get(
- StructType::get(ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8),
- ArrayType::get(IRB.getInt64Ty(), kRetvalTLSSize / 8),
- ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8),
- ArrayType::get(IRB.getInt64Ty(),
- kParamTLSSize / 8), /* va_arg_origin */
- IRB.getInt64Ty(),
- ArrayType::get(OriginTy, kParamTLSSize / 4), OriginTy,
- OriginTy),
- 0));
+ "__msan_get_context_state", PointerType::get(MsanContextStateTy, 0));
Type *RetTy = StructType::get(PointerType::get(IRB.getInt8Ty(), 0),
PointerType::get(IRB.getInt32Ty(), 0));
@@ -821,8 +823,9 @@ void MemorySanitizer::initializeCallbacks(Module &M) {
CallbacksInitialized = true;
}
-Value *MemorySanitizer::getKmsanShadowOriginAccessFn(bool isStore, int size) {
- Value **Fns =
+FunctionCallee MemorySanitizer::getKmsanShadowOriginAccessFn(bool isStore,
+ int size) {
+ FunctionCallee *Fns =
isStore ? MsanMetadataPtrForStore_1_8 : MsanMetadataPtrForLoad_1_8;
switch (size) {
case 1:
@@ -839,6 +842,8 @@ Value *MemorySanitizer::getKmsanShadowOriginAccessFn(bool isStore, int size) {
}
/// Module-level initialization.
+///
+/// inserts a call to __msan_init to the module's constructor list.
void MemorySanitizer::initializeModule(Module &M) {
auto &DL = M.getDataLayout();
@@ -913,7 +918,22 @@ void MemorySanitizer::initializeModule(Module &M) {
OriginStoreWeights = MDBuilder(*C).createBranchWeights(1, 1000);
if (!CompileKernel) {
- getOrCreateInitFunction(M, kMsanInitName);
+ std::tie(MsanCtorFunction, std::ignore) =
+ getOrCreateSanitizerCtorAndInitFunctions(
+ M, kMsanModuleCtorName, kMsanInitName,
+ /*InitArgTypes=*/{},
+ /*InitArgs=*/{},
+ // This callback is invoked when the functions are created the first
+ // time. Hook them into the global ctors list in that case:
+ [&](Function *Ctor, FunctionCallee) {
+ if (!ClWithComdat) {
+ appendToGlobalCtors(M, Ctor, 0);
+ return;
+ }
+ Comdat *MsanCtorComdat = M.getOrInsertComdat(kMsanModuleCtorName);
+ Ctor->setComdat(MsanCtorComdat);
+ appendToGlobalCtors(M, Ctor, 0, Ctor);
+ });
if (TrackOrigins)
M.getOrInsertGlobal("__msan_track_origins", IRB.getInt32Ty(), [&] {
@@ -932,7 +952,7 @@ void MemorySanitizer::initializeModule(Module &M) {
}
bool MemorySanitizerLegacyPass::doInitialization(Module &M) {
- MSan.emplace(M, TrackOrigins, Recover, EnableKmsan);
+ MSan.emplace(M, Options);
return true;
}
@@ -1011,6 +1031,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
: Shadow(S), Origin(O), OrigIns(I) {}
};
SmallVector<ShadowOriginAndInsertPoint, 16> InstrumentationList;
+ bool InstrumentLifetimeStart = ClHandleLifetimeIntrinsics;
+ SmallSet<AllocaInst *, 16> AllocaSet;
+ SmallVector<std::pair<IntrinsicInst *, AllocaInst *>, 16> LifetimeStartList;
SmallVector<StoreInst *, 16> StoreList;
MemorySanitizerVisitor(Function &F, MemorySanitizer &MS,
@@ -1076,7 +1099,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
for (unsigned i = Ofs; i < (Size + kOriginSize - 1) / kOriginSize; ++i) {
Value *GEP =
- i ? IRB.CreateConstGEP1_32(nullptr, OriginPtr, i) : OriginPtr;
+ i ? IRB.CreateConstGEP1_32(MS.OriginTy, OriginPtr, i) : OriginPtr;
IRB.CreateAlignedStore(Origin, GEP, CurrentAlignment);
CurrentAlignment = kMinOriginAlignment;
}
@@ -1104,7 +1127,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
DL.getTypeSizeInBits(ConvertedShadow->getType());
unsigned SizeIndex = TypeSizeToSizeIndex(TypeSizeInBits);
if (AsCall && SizeIndex < kNumberOfAccessSizes && !MS.CompileKernel) {
- Value *Fn = MS.MaybeStoreOriginFn[SizeIndex];
+ FunctionCallee Fn = MS.MaybeStoreOriginFn[SizeIndex];
Value *ConvertedShadow2 = IRB.CreateZExt(
ConvertedShadow, IRB.getIntNTy(8 * (1 << SizeIndex)));
IRB.CreateCall(Fn, {ConvertedShadow2,
@@ -1186,7 +1209,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
unsigned TypeSizeInBits = DL.getTypeSizeInBits(ConvertedShadow->getType());
unsigned SizeIndex = TypeSizeToSizeIndex(TypeSizeInBits);
if (AsCall && SizeIndex < kNumberOfAccessSizes && !MS.CompileKernel) {
- Value *Fn = MS.MaybeWarningFn[SizeIndex];
+ FunctionCallee Fn = MS.MaybeWarningFn[SizeIndex];
Value *ConvertedShadow2 =
IRB.CreateZExt(ConvertedShadow, IRB.getIntNTy(8 * (1 << SizeIndex)));
IRB.CreateCall(Fn, {ConvertedShadow2, MS.TrackOrigins && Origin
@@ -1221,20 +1244,22 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI());
Value *ContextState = IRB.CreateCall(MS.MsanGetContextStateFn, {});
Constant *Zero = IRB.getInt32(0);
- MS.ParamTLS =
- IRB.CreateGEP(ContextState, {Zero, IRB.getInt32(0)}, "param_shadow");
- MS.RetvalTLS =
- IRB.CreateGEP(ContextState, {Zero, IRB.getInt32(1)}, "retval_shadow");
- MS.VAArgTLS =
- IRB.CreateGEP(ContextState, {Zero, IRB.getInt32(2)}, "va_arg_shadow");
- MS.VAArgOriginTLS =
- IRB.CreateGEP(ContextState, {Zero, IRB.getInt32(3)}, "va_arg_origin");
- MS.VAArgOverflowSizeTLS = IRB.CreateGEP(
- ContextState, {Zero, IRB.getInt32(4)}, "va_arg_overflow_size");
- MS.ParamOriginTLS =
- IRB.CreateGEP(ContextState, {Zero, IRB.getInt32(5)}, "param_origin");
+ MS.ParamTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
+ {Zero, IRB.getInt32(0)}, "param_shadow");
+ MS.RetvalTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
+ {Zero, IRB.getInt32(1)}, "retval_shadow");
+ MS.VAArgTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
+ {Zero, IRB.getInt32(2)}, "va_arg_shadow");
+ MS.VAArgOriginTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
+ {Zero, IRB.getInt32(3)}, "va_arg_origin");
+ MS.VAArgOverflowSizeTLS =
+ IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
+ {Zero, IRB.getInt32(4)}, "va_arg_overflow_size");
+ MS.ParamOriginTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
+ {Zero, IRB.getInt32(5)}, "param_origin");
MS.RetvalOriginTLS =
- IRB.CreateGEP(ContextState, {Zero, IRB.getInt32(6)}, "retval_origin");
+ IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
+ {Zero, IRB.getInt32(6)}, "retval_origin");
return ret;
}
@@ -1265,6 +1290,19 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
VAHelper->finalizeInstrumentation();
+ // Poison llvm.lifetime.start intrinsics, if we haven't fallen back to
+ // instrumenting only allocas.
+ if (InstrumentLifetimeStart) {
+ for (auto Item : LifetimeStartList) {
+ instrumentAlloca(*Item.second, Item.first);
+ AllocaSet.erase(Item.second);
+ }
+ }
+ // Poison the allocas for which we didn't instrument the corresponding
+ // lifetime intrinsics.
+ for (AllocaInst *AI : AllocaSet)
+ instrumentAlloca(*AI);
+
bool InstrumentWithCalls = ClInstrumentationWithCallThreshold >= 0 &&
InstrumentationList.size() + StoreList.size() >
(unsigned)ClInstrumentationWithCallThreshold;
@@ -1381,7 +1419,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
IRB.CreateAnd(OriginLong, ConstantInt::get(MS.IntptrTy, ~Mask));
}
OriginPtr =
- IRB.CreateIntToPtr(OriginLong, PointerType::get(IRB.getInt32Ty(), 0));
+ IRB.CreateIntToPtr(OriginLong, PointerType::get(MS.OriginTy, 0));
}
return std::make_pair(ShadowPtr, OriginPtr);
}
@@ -1393,7 +1431,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
const DataLayout &DL = F.getParent()->getDataLayout();
int Size = DL.getTypeStoreSize(ShadowTy);
- Value *Getter = MS.getKmsanShadowOriginAccessFn(isStore, Size);
+ FunctionCallee Getter = MS.getKmsanShadowOriginAccessFn(isStore, Size);
Value *AddrCast =
IRB.CreatePointerCast(Addr, PointerType::get(IRB.getInt8Ty(), 0));
if (Getter) {
@@ -1598,8 +1636,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// ParamTLS overflow.
*ShadowPtr = getCleanShadow(V);
} else {
- *ShadowPtr =
- EntryIRB.CreateAlignedLoad(Base, kShadowTLSAlignment);
+ *ShadowPtr = EntryIRB.CreateAlignedLoad(getShadowTy(&FArg), Base,
+ kShadowTLSAlignment);
}
}
LLVM_DEBUG(dbgs()
@@ -1607,7 +1645,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
if (MS.TrackOrigins && !Overflow) {
Value *OriginPtr =
getOriginPtrForArgument(&FArg, EntryIRB, ArgOffset);
- setOrigin(A, EntryIRB.CreateLoad(OriginPtr));
+ setOrigin(A, EntryIRB.CreateLoad(MS.OriginTy, OriginPtr));
} else {
setOrigin(A, getCleanOrigin());
}
@@ -1738,7 +1776,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
if (PropagateShadow) {
std::tie(ShadowPtr, OriginPtr) =
getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ false);
- setShadow(&I, IRB.CreateAlignedLoad(ShadowPtr, Alignment, "_msld"));
+ setShadow(&I,
+ IRB.CreateAlignedLoad(ShadowTy, ShadowPtr, Alignment, "_msld"));
} else {
setShadow(&I, getCleanShadow(&I));
}
@@ -1752,7 +1791,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
if (MS.TrackOrigins) {
if (PropagateShadow) {
unsigned OriginAlignment = std::max(kMinOriginAlignment, Alignment);
- setOrigin(&I, IRB.CreateAlignedLoad(OriginPtr, OriginAlignment));
+ setOrigin(
+ &I, IRB.CreateAlignedLoad(MS.OriginTy, OriginPtr, OriginAlignment));
} else {
setOrigin(&I, getCleanOrigin());
}
@@ -1903,7 +1943,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Value *S1S2 = IRB.CreateAnd(S1, S2);
Value *V1S2 = IRB.CreateAnd(V1, S2);
Value *S1V2 = IRB.CreateAnd(S1, V2);
- setShadow(&I, IRB.CreateOr(S1S2, IRB.CreateOr(V1S2, S1V2)));
+ setShadow(&I, IRB.CreateOr({S1S2, V1S2, S1V2}));
setOriginForNaryOp(I);
}
@@ -1925,7 +1965,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Value *S1S2 = IRB.CreateAnd(S1, S2);
Value *V1S2 = IRB.CreateAnd(V1, S2);
Value *S1V2 = IRB.CreateAnd(S1, V2);
- setShadow(&I, IRB.CreateOr(S1S2, IRB.CreateOr(V1S2, S1V2)));
+ setShadow(&I, IRB.CreateOr({S1S2, V1S2, S1V2}));
setOriginForNaryOp(I);
}
@@ -2070,6 +2110,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
SC.Done(&I);
}
+ void visitFNeg(UnaryOperator &I) { handleShadowOr(I); }
+
// Handle multiplication by constant.
//
// Handle a special case of multiplication by constant that may have one or
@@ -2432,7 +2474,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
unsigned Alignment = 1;
std::tie(ShadowPtr, OriginPtr) =
getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ false);
- setShadow(&I, IRB.CreateAlignedLoad(ShadowPtr, Alignment, "_msld"));
+ setShadow(&I,
+ IRB.CreateAlignedLoad(ShadowTy, ShadowPtr, Alignment, "_msld"));
} else {
setShadow(&I, getCleanShadow(&I));
}
@@ -2442,7 +2485,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
if (MS.TrackOrigins) {
if (PropagateShadow)
- setOrigin(&I, IRB.CreateLoad(OriginPtr));
+ setOrigin(&I, IRB.CreateLoad(MS.OriginTy, OriginPtr));
else
setOrigin(&I, getCleanOrigin());
}
@@ -2519,6 +2562,17 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
return false;
}
+ void handleLifetimeStart(IntrinsicInst &I) {
+ if (!PoisonStack)
+ return;
+ DenseMap<Value *, AllocaInst *> AllocaForValue;
+ AllocaInst *AI =
+ llvm::findAllocaForValue(I.getArgOperand(1), AllocaForValue);
+ if (!AI)
+ InstrumentLifetimeStart = false;
+ LifetimeStartList.push_back(std::make_pair(&I, AI));
+ }
+
void handleBswap(IntrinsicInst &I) {
IRBuilder<> IRB(&I);
Value *Op = I.getArgOperand(0);
@@ -2650,7 +2704,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
: Lower64ShadowExtend(IRB, S2, getShadowTy(&I));
Value *V1 = I.getOperand(0);
Value *V2 = I.getOperand(1);
- Value *Shift = IRB.CreateCall(I.getCalledValue(),
+ Value *Shift = IRB.CreateCall(I.getFunctionType(), I.getCalledValue(),
{IRB.CreateBitCast(S1, V1->getType()), V2});
Shift = IRB.CreateBitCast(Shift, getShadowTy(&I));
setShadow(&I, IRB.CreateOr(Shift, S2Conv));
@@ -2660,6 +2714,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// Get an X86_MMX-sized vector type.
Type *getMMXVectorTy(unsigned EltSizeInBits) {
const unsigned X86_MMXSizeInBits = 64;
+ assert(EltSizeInBits != 0 && (X86_MMXSizeInBits % EltSizeInBits) == 0 &&
+ "Illegal MMX vector element size");
return VectorType::get(IntegerType::get(*MS.C, EltSizeInBits),
X86_MMXSizeInBits / EltSizeInBits);
}
@@ -2825,9 +2881,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
if (ClCheckAccessAddress)
insertShadowCheck(Addr, &I);
- Value *Shadow = IRB.CreateAlignedLoad(ShadowPtr, Alignment, "_ldmxcsr");
- Value *Origin =
- MS.TrackOrigins ? IRB.CreateLoad(OriginPtr) : getCleanOrigin();
+ Value *Shadow = IRB.CreateAlignedLoad(Ty, ShadowPtr, Alignment, "_ldmxcsr");
+ Value *Origin = MS.TrackOrigins ? IRB.CreateLoad(MS.OriginTy, OriginPtr)
+ : getCleanOrigin();
insertShadowCheck(Shadow, Origin, &I);
}
@@ -2901,7 +2957,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Value *Origin = IRB.CreateSelect(
IRB.CreateICmpNE(Acc, Constant::getNullValue(Acc->getType())),
- getOrigin(PassThru), IRB.CreateLoad(OriginPtr));
+ getOrigin(PassThru), IRB.CreateLoad(MS.OriginTy, OriginPtr));
setOrigin(&I, Origin);
} else {
@@ -2911,9 +2967,32 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
return true;
}
+ // Instrument BMI / BMI2 intrinsics.
+ // All of these intrinsics are Z = I(X, Y)
+ // where the types of all operands and the result match, and are either i32 or i64.
+ // The following instrumentation happens to work for all of them:
+ // Sz = I(Sx, Y) | (sext (Sy != 0))
+ void handleBmiIntrinsic(IntrinsicInst &I) {
+ IRBuilder<> IRB(&I);
+ Type *ShadowTy = getShadowTy(&I);
+
+ // If any bit of the mask operand is poisoned, then the whole thing is.
+ Value *SMask = getShadow(&I, 1);
+ SMask = IRB.CreateSExt(IRB.CreateICmpNE(SMask, getCleanShadow(ShadowTy)),
+ ShadowTy);
+ // Apply the same intrinsic to the shadow of the first operand.
+ Value *S = IRB.CreateCall(I.getCalledFunction(),
+ {getShadow(&I, 0), I.getOperand(1)});
+ S = IRB.CreateOr(SMask, S);
+ setShadow(&I, S);
+ setOriginForNaryOp(I);
+ }
void visitIntrinsicInst(IntrinsicInst &I) {
switch (I.getIntrinsicID()) {
+ case Intrinsic::lifetime_start:
+ handleLifetimeStart(I);
+ break;
case Intrinsic::bswap:
handleBswap(I);
break;
@@ -3127,6 +3206,17 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
handleVectorComparePackedIntrinsic(I);
break;
+ case Intrinsic::x86_bmi_bextr_32:
+ case Intrinsic::x86_bmi_bextr_64:
+ case Intrinsic::x86_bmi_bzhi_32:
+ case Intrinsic::x86_bmi_bzhi_64:
+ case Intrinsic::x86_bmi_pdep_32:
+ case Intrinsic::x86_bmi_pdep_64:
+ case Intrinsic::x86_bmi_pext_32:
+ case Intrinsic::x86_bmi_pext_64:
+ handleBmiIntrinsic(I);
+ break;
+
case Intrinsic::is_constant:
// The result of llvm.is.constant() is always defined.
setShadow(&I, getCleanShadow(&I));
@@ -3143,21 +3233,21 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
void visitCallSite(CallSite CS) {
Instruction &I = *CS.getInstruction();
assert(!I.getMetadata("nosanitize"));
- assert((CS.isCall() || CS.isInvoke()) && "Unknown type of CallSite");
+ assert((CS.isCall() || CS.isInvoke() || CS.isCallBr()) &&
+ "Unknown type of CallSite");
+ if (CS.isCallBr() || (CS.isCall() && cast<CallInst>(&I)->isInlineAsm())) {
+ // For inline asm (either a call to asm function, or callbr instruction),
+ // do the usual thing: check argument shadow and mark all outputs as
+ // clean. Note that any side effects of the inline asm that are not
+ // immediately visible in its constraints are not handled.
+ if (ClHandleAsmConservative && MS.CompileKernel)
+ visitAsmInstruction(I);
+ else
+ visitInstruction(I);
+ return;
+ }
if (CS.isCall()) {
CallInst *Call = cast<CallInst>(&I);
-
- // For inline asm, do the usual thing: check argument shadow and mark all
- // outputs as clean. Note that any side effects of the inline asm that are
- // not immediately visible in its constraints are not handled.
- if (Call->isInlineAsm()) {
- if (ClHandleAsmConservative && MS.CompileKernel)
- visitAsmInstruction(I);
- else
- visitInstruction(I);
- return;
- }
-
assert(!isa<IntrinsicInst>(&I) && "intrinsics are handled elsewhere");
// We are going to insert code that relies on the fact that the callee
@@ -3264,12 +3354,13 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
"Could not find insertion point for retval shadow load");
}
IRBuilder<> IRBAfter(&*NextInsn);
- Value *RetvalShadow =
- IRBAfter.CreateAlignedLoad(getShadowPtrForRetval(&I, IRBAfter),
- kShadowTLSAlignment, "_msret");
+ Value *RetvalShadow = IRBAfter.CreateAlignedLoad(
+ getShadowTy(&I), getShadowPtrForRetval(&I, IRBAfter),
+ kShadowTLSAlignment, "_msret");
setShadow(&I, RetvalShadow);
if (MS.TrackOrigins)
- setOrigin(&I, IRBAfter.CreateLoad(getOriginPtrForRetval(IRBAfter)));
+ setOrigin(&I, IRBAfter.CreateLoad(MS.OriginTy,
+ getOriginPtrForRetval(IRBAfter)));
}
bool isAMustTailRetVal(Value *RetVal) {
@@ -3330,7 +3421,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
StackDescription.str());
}
- void instrumentAllocaUserspace(AllocaInst &I, IRBuilder<> &IRB, Value *Len) {
+ void poisonAllocaUserspace(AllocaInst &I, IRBuilder<> &IRB, Value *Len) {
if (PoisonStack && ClPoisonStackWithCall) {
IRB.CreateCall(MS.MsanPoisonStackFn,
{IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len});
@@ -3352,7 +3443,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
}
}
- void instrumentAllocaKmsan(AllocaInst &I, IRBuilder<> &IRB, Value *Len) {
+ void poisonAllocaKmsan(AllocaInst &I, IRBuilder<> &IRB, Value *Len) {
Value *Descr = getLocalVarDescription(I);
if (PoisonStack) {
IRB.CreateCall(MS.MsanPoisonAllocaFn,
@@ -3364,10 +3455,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
}
}
- void visitAllocaInst(AllocaInst &I) {
- setShadow(&I, getCleanShadow(&I));
- setOrigin(&I, getCleanOrigin());
- IRBuilder<> IRB(I.getNextNode());
+ void instrumentAlloca(AllocaInst &I, Instruction *InsPoint = nullptr) {
+ if (!InsPoint)
+ InsPoint = &I;
+ IRBuilder<> IRB(InsPoint->getNextNode());
const DataLayout &DL = F.getParent()->getDataLayout();
uint64_t TypeSize = DL.getTypeAllocSize(I.getAllocatedType());
Value *Len = ConstantInt::get(MS.IntptrTy, TypeSize);
@@ -3375,9 +3466,17 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Len = IRB.CreateMul(Len, I.getArraySize());
if (MS.CompileKernel)
- instrumentAllocaKmsan(I, IRB, Len);
+ poisonAllocaKmsan(I, IRB, Len);
else
- instrumentAllocaUserspace(I, IRB, Len);
+ poisonAllocaUserspace(I, IRB, Len);
+ }
+
+ void visitAllocaInst(AllocaInst &I) {
+ setShadow(&I, getCleanShadow(&I));
+ setOrigin(&I, getCleanOrigin());
+ // We'll get to this alloca later unless it's poisoned at the corresponding
+ // llvm.lifetime.start.
+ AllocaSet.insert(&I);
}
void visitSelectInst(SelectInst& I) {
@@ -3409,7 +3508,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
D = CreateAppToShadowCast(IRB, D);
// Result shadow if condition shadow is 1.
- Sa1 = IRB.CreateOr(IRB.CreateXor(C, D), IRB.CreateOr(Sc, Sd));
+ Sa1 = IRB.CreateOr({IRB.CreateXor(C, D), Sc, Sd});
}
Value *Sa = IRB.CreateSelect(Sb, Sa1, Sa0, "_msprop_select");
setShadow(&I, Sa);
@@ -3525,10 +3624,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
}
/// Get the number of output arguments returned by pointers.
- int getNumOutputArgs(InlineAsm *IA, CallInst *CI) {
+ int getNumOutputArgs(InlineAsm *IA, CallBase *CB) {
int NumRetOutputs = 0;
int NumOutputs = 0;
- Type *RetTy = dyn_cast<Value>(CI)->getType();
+ Type *RetTy = dyn_cast<Value>(CB)->getType();
if (!RetTy->isVoidTy()) {
// Register outputs are returned via the CallInst return value.
StructType *ST = dyn_cast_or_null<StructType>(RetTy);
@@ -3568,24 +3667,24 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// corresponding CallInst has nO+nI+1 operands (the last operand is the
// function to be called).
const DataLayout &DL = F.getParent()->getDataLayout();
- CallInst *CI = dyn_cast<CallInst>(&I);
+ CallBase *CB = dyn_cast<CallBase>(&I);
IRBuilder<> IRB(&I);
- InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue());
- int OutputArgs = getNumOutputArgs(IA, CI);
+ InlineAsm *IA = cast<InlineAsm>(CB->getCalledValue());
+ int OutputArgs = getNumOutputArgs(IA, CB);
// The last operand of a CallInst is the function itself.
- int NumOperands = CI->getNumOperands() - 1;
+ int NumOperands = CB->getNumOperands() - 1;
// Check input arguments. Doing so before unpoisoning output arguments, so
// that we won't overwrite uninit values before checking them.
for (int i = OutputArgs; i < NumOperands; i++) {
- Value *Operand = CI->getOperand(i);
+ Value *Operand = CB->getOperand(i);
instrumentAsmArgument(Operand, I, IRB, DL, /*isOutput*/ false);
}
// Unpoison output arguments. This must happen before the actual InlineAsm
// call, so that the shadow for memory published in the asm() statement
// remains valid.
for (int i = 0; i < OutputArgs; i++) {
- Value *Operand = CI->getOperand(i);
+ Value *Operand = CB->getOperand(i);
instrumentAsmArgument(Operand, I, IRB, DL, /*isOutput*/ true);
}
@@ -3817,7 +3916,8 @@ struct VarArgAMD64Helper : public VarArgHelper {
// If there is a va_start in this function, make a backup copy of
// va_arg_tls somewhere in the function entry block.
IRBuilder<> IRB(MSV.ActualFnStart->getFirstNonPHI());
- VAArgOverflowSize = IRB.CreateLoad(MS.VAArgOverflowSizeTLS);
+ VAArgOverflowSize =
+ IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
Value *CopySize =
IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, AMD64FpEndOffset),
VAArgOverflowSize);
@@ -3836,11 +3936,13 @@ struct VarArgAMD64Helper : public VarArgHelper {
IRBuilder<> IRB(OrigInst->getNextNode());
Value *VAListTag = OrigInst->getArgOperand(0);
+ Type *RegSaveAreaPtrTy = Type::getInt64PtrTy(*MS.C);
Value *RegSaveAreaPtrPtr = IRB.CreateIntToPtr(
IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
ConstantInt::get(MS.IntptrTy, 16)),
- PointerType::get(Type::getInt64PtrTy(*MS.C), 0));
- Value *RegSaveAreaPtr = IRB.CreateLoad(RegSaveAreaPtrPtr);
+ PointerType::get(RegSaveAreaPtrTy, 0));
+ Value *RegSaveAreaPtr =
+ IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr);
Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
unsigned Alignment = 16;
std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
@@ -3851,11 +3953,13 @@ struct VarArgAMD64Helper : public VarArgHelper {
if (MS.TrackOrigins)
IRB.CreateMemCpy(RegSaveAreaOriginPtr, Alignment, VAArgTLSOriginCopy,
Alignment, AMD64FpEndOffset);
+ Type *OverflowArgAreaPtrTy = Type::getInt64PtrTy(*MS.C);
Value *OverflowArgAreaPtrPtr = IRB.CreateIntToPtr(
IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
ConstantInt::get(MS.IntptrTy, 8)),
- PointerType::get(Type::getInt64PtrTy(*MS.C), 0));
- Value *OverflowArgAreaPtr = IRB.CreateLoad(OverflowArgAreaPtrPtr);
+ PointerType::get(OverflowArgAreaPtrTy, 0));
+ Value *OverflowArgAreaPtr =
+ IRB.CreateLoad(OverflowArgAreaPtrTy, OverflowArgAreaPtrPtr);
Value *OverflowArgAreaShadowPtr, *OverflowArgAreaOriginPtr;
std::tie(OverflowArgAreaShadowPtr, OverflowArgAreaOriginPtr) =
MSV.getShadowOriginPtr(OverflowArgAreaPtr, IRB, IRB.getInt8Ty(),
@@ -3957,7 +4061,7 @@ struct VarArgMIPS64Helper : public VarArgHelper {
assert(!VAArgSize && !VAArgTLSCopy &&
"finalizeInstrumentation called twice");
IRBuilder<> IRB(MSV.ActualFnStart->getFirstNonPHI());
- VAArgSize = IRB.CreateLoad(MS.VAArgOverflowSizeTLS);
+ VAArgSize = IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
Value *CopySize = IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, 0),
VAArgSize);
@@ -3974,10 +4078,12 @@ struct VarArgMIPS64Helper : public VarArgHelper {
CallInst *OrigInst = VAStartInstrumentationList[i];
IRBuilder<> IRB(OrigInst->getNextNode());
Value *VAListTag = OrigInst->getArgOperand(0);
+ Type *RegSaveAreaPtrTy = Type::getInt64PtrTy(*MS.C);
Value *RegSaveAreaPtrPtr =
IRB.CreateIntToPtr(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
- PointerType::get(Type::getInt64PtrTy(*MS.C), 0));
- Value *RegSaveAreaPtr = IRB.CreateLoad(RegSaveAreaPtrPtr);
+ PointerType::get(RegSaveAreaPtrTy, 0));
+ Value *RegSaveAreaPtr =
+ IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr);
Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
unsigned Alignment = 8;
std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
@@ -4127,7 +4233,7 @@ struct VarArgAArch64Helper : public VarArgHelper {
IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
ConstantInt::get(MS.IntptrTy, offset)),
Type::getInt64PtrTy(*MS.C));
- return IRB.CreateLoad(SaveAreaPtrPtr);
+ return IRB.CreateLoad(Type::getInt64Ty(*MS.C), SaveAreaPtrPtr);
}
// Retrieve a va_list field of 'int' size.
@@ -4137,7 +4243,7 @@ struct VarArgAArch64Helper : public VarArgHelper {
IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
ConstantInt::get(MS.IntptrTy, offset)),
Type::getInt32PtrTy(*MS.C));
- Value *SaveArea32 = IRB.CreateLoad(SaveAreaPtr);
+ Value *SaveArea32 = IRB.CreateLoad(IRB.getInt32Ty(), SaveAreaPtr);
return IRB.CreateSExt(SaveArea32, MS.IntptrTy);
}
@@ -4148,7 +4254,8 @@ struct VarArgAArch64Helper : public VarArgHelper {
// If there is a va_start in this function, make a backup copy of
// va_arg_tls somewhere in the function entry block.
IRBuilder<> IRB(MSV.ActualFnStart->getFirstNonPHI());
- VAArgOverflowSize = IRB.CreateLoad(MS.VAArgOverflowSizeTLS);
+ VAArgOverflowSize =
+ IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
Value *CopySize =
IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, AArch64VAEndOffset),
VAArgOverflowSize);
@@ -4391,7 +4498,7 @@ struct VarArgPowerPC64Helper : public VarArgHelper {
assert(!VAArgSize && !VAArgTLSCopy &&
"finalizeInstrumentation called twice");
IRBuilder<> IRB(MSV.ActualFnStart->getFirstNonPHI());
- VAArgSize = IRB.CreateLoad(MS.VAArgOverflowSizeTLS);
+ VAArgSize = IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
Value *CopySize = IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, 0),
VAArgSize);
@@ -4408,10 +4515,12 @@ struct VarArgPowerPC64Helper : public VarArgHelper {
CallInst *OrigInst = VAStartInstrumentationList[i];
IRBuilder<> IRB(OrigInst->getNextNode());
Value *VAListTag = OrigInst->getArgOperand(0);
+ Type *RegSaveAreaPtrTy = Type::getInt64PtrTy(*MS.C);
Value *RegSaveAreaPtrPtr =
IRB.CreateIntToPtr(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
- PointerType::get(Type::getInt64PtrTy(*MS.C), 0));
- Value *RegSaveAreaPtr = IRB.CreateLoad(RegSaveAreaPtrPtr);
+ PointerType::get(RegSaveAreaPtrTy, 0));
+ Value *RegSaveAreaPtr =
+ IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr);
Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
unsigned Alignment = 8;
std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
@@ -4458,6 +4567,8 @@ static VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan,
}
bool MemorySanitizer::sanitizeFunction(Function &F, TargetLibraryInfo &TLI) {
+ if (!CompileKernel && (&F == MsanCtorFunction))
+ return false;
MemorySanitizerVisitor Visitor(F, *this, TLI);
// Clear out readonly/readnone attributes.
diff --git a/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index f043325f5bba..6fec3c9c79ee 100644
--- a/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -1,9 +1,8 @@
//===- PGOInstrumentation.cpp - MST-based PGO Instrumentation -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -48,7 +47,6 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
#include "CFGMST.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
@@ -66,6 +64,7 @@
#include "llvm/Analysis/IndirectCallVisitor.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
@@ -107,6 +106,7 @@
#include "llvm/Support/JamCRC.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include <algorithm>
#include <cassert>
@@ -133,6 +133,19 @@ STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts.");
STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile.");
STATISTIC(NumOfPGOMissing, "Number of functions without profile.");
STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations.");
+STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO.");
+STATISTIC(NumOfCSPGOSelectInsts,
+ "Number of select instruction instrumented in CSPGO.");
+STATISTIC(NumOfCSPGOMemIntrinsics,
+ "Number of mem intrinsics instrumented in CSPGO.");
+STATISTIC(NumOfCSPGOEdge, "Number of edges in CSPGO.");
+STATISTIC(NumOfCSPGOBB, "Number of basic-blocks in CSPGO.");
+STATISTIC(NumOfCSPGOSplit, "Number of critical edge splits in CSPGO.");
+STATISTIC(NumOfCSPGOFunc,
+ "Number of functions having valid profile counts in CSPGO.");
+STATISTIC(NumOfCSPGOMismatch,
+ "Number of functions having mismatch profile in CSPGO.");
+STATISTIC(NumOfCSPGOMissing, "Number of functions without profile in CSPGO.");
// Command line option to specify the file to read profile from. This is
// mainly used for testing.
@@ -384,7 +397,8 @@ class PGOInstrumentationGenLegacyPass : public ModulePass {
public:
static char ID;
- PGOInstrumentationGenLegacyPass() : ModulePass(ID) {
+ PGOInstrumentationGenLegacyPass(bool IsCS = false)
+ : ModulePass(ID), IsCS(IsCS) {
initializePGOInstrumentationGenLegacyPassPass(
*PassRegistry::getPassRegistry());
}
@@ -392,6 +406,8 @@ public:
StringRef getPassName() const override { return "PGOInstrumentationGenPass"; }
private:
+ // Is this is context-sensitive instrumentation.
+ bool IsCS;
bool runOnModule(Module &M) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -404,8 +420,8 @@ public:
static char ID;
// Provide the profile filename as the parameter.
- PGOInstrumentationUseLegacyPass(std::string Filename = "")
- : ModulePass(ID), ProfileFileName(std::move(Filename)) {
+ PGOInstrumentationUseLegacyPass(std::string Filename = "", bool IsCS = false)
+ : ModulePass(ID), ProfileFileName(std::move(Filename)), IsCS(IsCS) {
if (!PGOTestProfileFile.empty())
ProfileFileName = PGOTestProfileFile;
initializePGOInstrumentationUseLegacyPassPass(
@@ -416,14 +432,38 @@ public:
private:
std::string ProfileFileName;
+ // Is this is context-sensitive instrumentation use.
+ bool IsCS;
bool runOnModule(Module &M) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
AU.addRequired<BlockFrequencyInfoWrapperPass>();
}
};
+class PGOInstrumentationGenCreateVarLegacyPass : public ModulePass {
+public:
+ static char ID;
+ StringRef getPassName() const override {
+ return "PGOInstrumentationGenCreateVarPass";
+ }
+ PGOInstrumentationGenCreateVarLegacyPass(std::string CSInstrName = "")
+ : ModulePass(ID), InstrProfileOutput(CSInstrName) {
+ initializePGOInstrumentationGenCreateVarLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+private:
+ bool runOnModule(Module &M) override {
+ createProfileFileNameVar(M, InstrProfileOutput);
+ createIRLevelProfileFlagVar(M, true);
+ return false;
+ }
+ std::string InstrProfileOutput;
+};
+
} // end anonymous namespace
char PGOInstrumentationGenLegacyPass::ID = 0;
@@ -435,8 +475,8 @@ INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass)
INITIALIZE_PASS_END(PGOInstrumentationGenLegacyPass, "pgo-instr-gen",
"PGO instrumentation.", false, false)
-ModulePass *llvm::createPGOInstrumentationGenLegacyPass() {
- return new PGOInstrumentationGenLegacyPass();
+ModulePass *llvm::createPGOInstrumentationGenLegacyPass(bool IsCS) {
+ return new PGOInstrumentationGenLegacyPass(IsCS);
}
char PGOInstrumentationUseLegacyPass::ID = 0;
@@ -445,11 +485,25 @@ INITIALIZE_PASS_BEGIN(PGOInstrumentationUseLegacyPass, "pgo-instr-use",
"Read PGO instrumentation profile.", false, false)
INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
INITIALIZE_PASS_END(PGOInstrumentationUseLegacyPass, "pgo-instr-use",
"Read PGO instrumentation profile.", false, false)
-ModulePass *llvm::createPGOInstrumentationUseLegacyPass(StringRef Filename) {
- return new PGOInstrumentationUseLegacyPass(Filename.str());
+ModulePass *llvm::createPGOInstrumentationUseLegacyPass(StringRef Filename,
+ bool IsCS) {
+ return new PGOInstrumentationUseLegacyPass(Filename.str(), IsCS);
+}
+
+char PGOInstrumentationGenCreateVarLegacyPass::ID = 0;
+
+INITIALIZE_PASS(PGOInstrumentationGenCreateVarLegacyPass,
+ "pgo-instr-gen-create-var",
+ "Create PGO instrumentation version variable for CSPGO.", false,
+ false)
+
+ModulePass *
+llvm::createPGOInstrumentationGenCreateVarLegacyPass(StringRef CSInstrName) {
+ return new PGOInstrumentationGenCreateVarLegacyPass(CSInstrName);
}
namespace {
@@ -490,6 +544,12 @@ struct BBInfo {
const std::string infoString() const {
return (Twine("Index=") + Twine(Index)).str();
}
+
+ // Empty function -- only applicable to UseBBInfo.
+ void addOutEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {}
+
+ // Empty function -- only applicable to UseBBInfo.
+ void addInEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {}
};
// This class implements the CFG edges. Note the CFG can be a multi-graph.
@@ -497,6 +557,9 @@ template <class Edge, class BBInfo> class FuncPGOInstrumentation {
private:
Function &F;
+ // Is this is context-sensitive instrumentation.
+ bool IsCS;
+
// A map that stores the Comdat group in function F.
std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers;
@@ -516,6 +579,10 @@ public:
// The Minimum Spanning Tree of function CFG.
CFGMST<Edge, BBInfo> MST;
+ // Collect all the BBs that will be instrumented, and store them in
+ // InstrumentBBs.
+ void getInstrumentBBs(std::vector<BasicBlock *> &InstrumentBBs);
+
// Give an edge, find the BB that will be instrumented.
// Return nullptr if there is no BB to be instrumented.
BasicBlock *getInstrBB(Edge *E);
@@ -536,15 +603,23 @@ public:
Function &Func,
std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr,
- BlockFrequencyInfo *BFI = nullptr)
- : F(Func), ComdatMembers(ComdatMembers), ValueSites(IPVK_Last + 1),
- SIVisitor(Func), MIVisitor(Func), MST(F, BPI, BFI) {
+ BlockFrequencyInfo *BFI = nullptr, bool IsCS = false)
+ : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers),
+ ValueSites(IPVK_Last + 1), SIVisitor(Func), MIVisitor(Func),
+ MST(F, BPI, BFI) {
// This should be done before CFG hash computation.
SIVisitor.countSelects(Func);
MIVisitor.countMemIntrinsics(Func);
- NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
- NumOfPGOMemIntrinsics += MIVisitor.getNumOfMemIntrinsics();
- ValueSites[IPVK_IndirectCallTarget] = findIndirectCalls(Func);
+ if (!IsCS) {
+ NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
+ NumOfPGOMemIntrinsics += MIVisitor.getNumOfMemIntrinsics();
+ NumOfPGOBB += MST.BBInfos.size();
+ ValueSites[IPVK_IndirectCallTarget] = findIndirectCalls(Func);
+ } else {
+ NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
+ NumOfCSPGOMemIntrinsics += MIVisitor.getNumOfMemIntrinsics();
+ NumOfCSPGOBB += MST.BBInfos.size();
+ }
ValueSites[IPVK_MemOPSize] = MIVisitor.findMemIntrinsics(Func);
FuncName = getPGOFuncName(F);
@@ -553,28 +628,17 @@ public:
renameComdatFunction();
LLVM_DEBUG(dumpInfo("after CFGMST"));
- NumOfPGOBB += MST.BBInfos.size();
for (auto &E : MST.AllEdges) {
if (E->Removed)
continue;
- NumOfPGOEdge++;
+ IsCS ? NumOfCSPGOEdge++ : NumOfPGOEdge++;
if (!E->InMST)
- NumOfPGOInstrument++;
+ IsCS ? NumOfCSPGOInstrument++ : NumOfPGOInstrument++;
}
if (CreateGlobalVar)
FuncNameVar = createPGOFuncNameVar(F, FuncName);
}
-
- // Return the number of profile counters needed for the function.
- unsigned getNumCounters() {
- unsigned NumCounters = 0;
- for (auto &E : this->MST.AllEdges) {
- if (!E->InMST && !E->Removed)
- NumCounters++;
- }
- return NumCounters + SIVisitor.getNumOfSelectInsts();
- }
};
} // end anonymous namespace
@@ -598,9 +662,17 @@ void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {
}
}
JC.update(Indexes);
+
+ // Hash format for context sensitive profile. Reserve 4 bits for other
+ // information.
FunctionHash = (uint64_t)SIVisitor.getNumOfSelectInsts() << 56 |
(uint64_t)ValueSites[IPVK_IndirectCallTarget].size() << 48 |
+ //(uint64_t)ValueSites[IPVK_MemOPSize].size() << 40 |
(uint64_t)MST.AllEdges.size() << 32 | JC.getCRC();
+ // Reserve bit 60-63 for other information purpose.
+ FunctionHash &= 0x0FFFFFFFFFFFFFFF;
+ if (IsCS)
+ NamedInstrProfRecord::setCSFlagInHash(FunctionHash);
LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n"
<< " CRC = " << JC.getCRC()
<< ", Selects = " << SIVisitor.getNumOfSelectInsts()
@@ -681,6 +753,36 @@ void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() {
}
}
+// Collect all the BBs that will be instruments and return them in
+// InstrumentBBs and setup InEdges/OutEdge for UseBBInfo.
+template <class Edge, class BBInfo>
+void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs(
+ std::vector<BasicBlock *> &InstrumentBBs) {
+ // Use a worklist as we will update the vector during the iteration.
+ std::vector<Edge *> EdgeList;
+ EdgeList.reserve(MST.AllEdges.size());
+ for (auto &E : MST.AllEdges)
+ EdgeList.push_back(E.get());
+
+ for (auto &E : EdgeList) {
+ BasicBlock *InstrBB = getInstrBB(E);
+ if (InstrBB)
+ InstrumentBBs.push_back(InstrBB);
+ }
+
+ // Set up InEdges/OutEdges for all BBs.
+ for (auto &E : MST.AllEdges) {
+ if (E->Removed)
+ continue;
+ const BasicBlock *SrcBB = E->SrcBB;
+ const BasicBlock *DestBB = E->DestBB;
+ BBInfo &SrcInfo = getBBInfo(SrcBB);
+ BBInfo &DestInfo = getBBInfo(DestBB);
+ SrcInfo.addOutEdge(E.get());
+ DestInfo.addInEdge(E.get());
+ }
+}
+
// Given a CFG E to be instrumented, find which BB to place the instrumented
// code. The function will split the critical edge if necessary.
template <class Edge, class BBInfo>
@@ -696,46 +798,64 @@ BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) {
if (DestBB == nullptr)
return SrcBB;
+ auto canInstrument = [](BasicBlock *BB) -> BasicBlock * {
+ // There are basic blocks (such as catchswitch) cannot be instrumented.
+ // If the returned first insertion point is the end of BB, skip this BB.
+ if (BB->getFirstInsertionPt() == BB->end())
+ return nullptr;
+ return BB;
+ };
+
// Instrument the SrcBB if it has a single successor,
// otherwise, the DestBB if this is not a critical edge.
Instruction *TI = SrcBB->getTerminator();
if (TI->getNumSuccessors() <= 1)
- return SrcBB;
+ return canInstrument(SrcBB);
if (!E->IsCritical)
- return DestBB;
+ return canInstrument(DestBB);
+ unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
+ BasicBlock *InstrBB = SplitCriticalEdge(TI, SuccNum);
+ if (!InstrBB) {
+ LLVM_DEBUG(
+ dbgs() << "Fail to split critical edge: not instrument this edge.\n");
+ return nullptr;
+ }
// For a critical edge, we have to split. Instrument the newly
// created BB.
- NumOfPGOSplit++;
+ IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++;
LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index
<< " --> " << getBBInfo(DestBB).Index << "\n");
- unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
- BasicBlock *InstrBB = SplitCriticalEdge(TI, SuccNum);
- assert(InstrBB && "Critical edge is not split");
-
+ // Need to add two new edges. First one: Add new edge of SrcBB->InstrBB.
+ MST.addEdge(SrcBB, InstrBB, 0);
+ // Second one: Add new edge of InstrBB->DestBB.
+ Edge &NewEdge1 = MST.addEdge(InstrBB, DestBB, 0);
+ NewEdge1.InMST = true;
E->Removed = true;
- return InstrBB;
+
+ return canInstrument(InstrBB);
}
// Visit all edge and instrument the edges not in MST, and do value profiling.
// Critical edges will be split.
static void instrumentOneFunc(
Function &F, Module *M, BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFI,
- std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
+ std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
+ bool IsCS) {
// Split indirectbr critical edges here before computing the MST rather than
// later in getInstrBB() to avoid invalidating it.
SplitIndirectBrCriticalEdges(F, BPI, BFI);
+
FuncPGOInstrumentation<PGOEdge, BBInfo> FuncInfo(F, ComdatMembers, true, BPI,
- BFI);
- unsigned NumCounters = FuncInfo.getNumCounters();
+ BFI, IsCS);
+ std::vector<BasicBlock *> InstrumentBBs;
+ FuncInfo.getInstrumentBBs(InstrumentBBs);
+ unsigned NumCounters =
+ InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
uint32_t I = 0;
Type *I8PtrTy = Type::getInt8PtrTy(M->getContext());
- for (auto &E : FuncInfo.MST.AllEdges) {
- BasicBlock *InstrBB = FuncInfo.getInstrBB(E.get());
- if (!InstrBB)
- continue;
-
+ for (auto *InstrBB : InstrumentBBs) {
IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt());
assert(Builder.GetInsertPoint() != InstrBB->end() &&
"Cannot get the Instrumentation point");
@@ -831,6 +951,18 @@ struct UseBBInfo : public BBInfo {
return BBInfo::infoString();
return (Twine(BBInfo::infoString()) + " Count=" + Twine(CountValue)).str();
}
+
+ // Add an OutEdge and update the edge count.
+ void addOutEdge(PGOUseEdge *E) {
+ OutEdges.push_back(E);
+ UnknownCountOutEdge++;
+ }
+
+ // Add an InEdge and update the edge count.
+ void addInEdge(PGOUseEdge *E) {
+ InEdges.push_back(E);
+ UnknownCountInEdge++;
+ }
};
} // end anonymous namespace
@@ -853,10 +985,10 @@ public:
PGOUseFunc(Function &Func, Module *Modu,
std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
BranchProbabilityInfo *BPI = nullptr,
- BlockFrequencyInfo *BFIin = nullptr)
+ BlockFrequencyInfo *BFIin = nullptr, bool IsCS = false)
: F(Func), M(Modu), BFI(BFIin),
- FuncInfo(Func, ComdatMembers, false, BPI, BFIin),
- FreqAttr(FFA_Normal) {}
+ FuncInfo(Func, ComdatMembers, false, BPI, BFIin, IsCS),
+ FreqAttr(FFA_Normal), IsCS(IsCS) {}
// Read counts for the instrumented BB from profile.
bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros);
@@ -929,8 +1061,11 @@ private:
// Function hotness info derived from profile.
FuncFreqAttr FreqAttr;
- // Find the Instrumented BB and set the value.
- void setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile);
+ // Is to use the context sensitive profile.
+ bool IsCS;
+
+ // Find the Instrumented BB and set the value. Return false on error.
+ bool setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile);
// Set the edge counter value for the unknown edge -- there should be only
// one unknown edge.
@@ -959,41 +1094,64 @@ private:
} // end anonymous namespace
// Visit all the edges and assign the count value for the instrumented
-// edges and the BB.
-void PGOUseFunc::setInstrumentedCounts(
+// edges and the BB. Return false on error.
+bool PGOUseFunc::setInstrumentedCounts(
const std::vector<uint64_t> &CountFromProfile) {
- assert(FuncInfo.getNumCounters() == CountFromProfile.size());
- // Use a worklist as we will update the vector during the iteration.
- std::vector<PGOUseEdge *> WorkList;
- for (auto &E : FuncInfo.MST.AllEdges)
- WorkList.push_back(E.get());
+ std::vector<BasicBlock *> InstrumentBBs;
+ FuncInfo.getInstrumentBBs(InstrumentBBs);
+ unsigned NumCounters =
+ InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
+ // The number of counters here should match the number of counters
+ // in profile. Return if they mismatch.
+ if (NumCounters != CountFromProfile.size()) {
+ return false;
+ }
+ // Set the profile count to the Instrumented BBs.
uint32_t I = 0;
- for (auto &E : WorkList) {
- BasicBlock *InstrBB = FuncInfo.getInstrBB(E);
- if (!InstrBB)
- continue;
+ for (BasicBlock *InstrBB : InstrumentBBs) {
uint64_t CountValue = CountFromProfile[I++];
- if (!E->Removed) {
- getBBInfo(InstrBB).setBBInfoCount(CountValue);
- E->setEdgeCount(CountValue);
- continue;
- }
-
- // Need to add two new edges.
- BasicBlock *SrcBB = const_cast<BasicBlock *>(E->SrcBB);
- BasicBlock *DestBB = const_cast<BasicBlock *>(E->DestBB);
- // Add new edge of SrcBB->InstrBB.
- PGOUseEdge &NewEdge = FuncInfo.MST.addEdge(SrcBB, InstrBB, 0);
- NewEdge.setEdgeCount(CountValue);
- // Add new edge of InstrBB->DestBB.
- PGOUseEdge &NewEdge1 = FuncInfo.MST.addEdge(InstrBB, DestBB, 0);
- NewEdge1.setEdgeCount(CountValue);
- NewEdge1.InMST = true;
- getBBInfo(InstrBB).setBBInfoCount(CountValue);
+ UseBBInfo &Info = getBBInfo(InstrBB);
+ Info.setBBInfoCount(CountValue);
}
ProfileCountSize = CountFromProfile.size();
CountPosition = I;
+
+ // Set the edge count and update the count of unknown edges for BBs.
+ auto setEdgeCount = [this](PGOUseEdge *E, uint64_t Value) -> void {
+ E->setEdgeCount(Value);
+ this->getBBInfo(E->SrcBB).UnknownCountOutEdge--;
+ this->getBBInfo(E->DestBB).UnknownCountInEdge--;
+ };
+
+ // Set the profile count the Instrumented edges. There are BBs that not in
+ // MST but not instrumented. Need to set the edge count value so that we can
+ // populate the profile counts later.
+ for (auto &E : FuncInfo.MST.AllEdges) {
+ if (E->Removed || E->InMST)
+ continue;
+ const BasicBlock *SrcBB = E->SrcBB;
+ UseBBInfo &SrcInfo = getBBInfo(SrcBB);
+
+ // If only one out-edge, the edge profile count should be the same as BB
+ // profile count.
+ if (SrcInfo.CountValid && SrcInfo.OutEdges.size() == 1)
+ setEdgeCount(E.get(), SrcInfo.CountValue);
+ else {
+ const BasicBlock *DestBB = E->DestBB;
+ UseBBInfo &DestInfo = getBBInfo(DestBB);
+ // If only one in-edge, the edge profile count should be the same as BB
+ // profile count.
+ if (DestInfo.CountValid && DestInfo.InEdges.size() == 1)
+ setEdgeCount(E.get(), DestInfo.CountValue);
+ }
+ if (E->CountValid)
+ continue;
+ // E's count should have been set from profile. If not, this meenas E skips
+ // the instrumentation. We set the count to 0.
+ setEdgeCount(E.get(), 0);
+ }
+ return true;
}
// Set the count value for the unknown edge. There should be one and only one
@@ -1022,23 +1180,31 @@ bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros)
handleAllErrors(std::move(E), [&](const InstrProfError &IPE) {
auto Err = IPE.get();
bool SkipWarning = false;
+ LLVM_DEBUG(dbgs() << "Error in reading profile for Func "
+ << FuncInfo.FuncName << ": ");
if (Err == instrprof_error::unknown_function) {
- NumOfPGOMissing++;
+ IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++;
SkipWarning = !PGOWarnMissing;
+ LLVM_DEBUG(dbgs() << "unknown function");
} else if (Err == instrprof_error::hash_mismatch ||
Err == instrprof_error::malformed) {
- NumOfPGOMismatch++;
+ IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++;
SkipWarning =
NoPGOWarnMismatch ||
(NoPGOWarnMismatchComdat &&
(F.hasComdat() ||
F.getLinkage() == GlobalValue::AvailableExternallyLinkage));
+ LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")");
}
+ LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n");
if (SkipWarning)
return;
- std::string Msg = IPE.message() + std::string(" ") + F.getName().str();
+ std::string Msg = IPE.message() + std::string(" ") + F.getName().str() +
+ std::string(" Hash = ") +
+ std::to_string(FuncInfo.FunctionHash);
+
Ctx.diagnose(
DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning));
});
@@ -1047,7 +1213,7 @@ bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros)
ProfileRecord = std::move(Result.get());
std::vector<uint64_t> &CountFromProfile = ProfileRecord.Counts;
- NumOfPGOFunc++;
+ IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++;
LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n");
uint64_t ValueSum = 0;
for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) {
@@ -1061,34 +1227,23 @@ bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros)
getBBInfo(nullptr).UnknownCountOutEdge = 2;
getBBInfo(nullptr).UnknownCountInEdge = 2;
- setInstrumentedCounts(CountFromProfile);
- ProgramMaxCount = PGOReader->getMaximumFunctionCount();
+ if (!setInstrumentedCounts(CountFromProfile)) {
+ LLVM_DEBUG(
+ dbgs() << "Inconsistent number of counts, skipping this function");
+ Ctx.diagnose(DiagnosticInfoPGOProfile(
+ M->getName().data(),
+ Twine("Inconsistent number of counts in ") + F.getName().str()
+ + Twine(": the profile may be stale or there is a function name collision."),
+ DS_Warning));
+ return false;
+ }
+ ProgramMaxCount = PGOReader->getMaximumFunctionCount(IsCS);
return true;
}
// Populate the counters from instrumented BBs to all BBs.
// In the end of this operation, all BBs should have a valid count value.
void PGOUseFunc::populateCounters() {
- // First set up Count variable for all BBs.
- for (auto &E : FuncInfo.MST.AllEdges) {
- if (E->Removed)
- continue;
-
- const BasicBlock *SrcBB = E->SrcBB;
- const BasicBlock *DestBB = E->DestBB;
- UseBBInfo &SrcInfo = getBBInfo(SrcBB);
- UseBBInfo &DestInfo = getBBInfo(DestBB);
- SrcInfo.OutEdges.push_back(E.get());
- DestInfo.InEdges.push_back(E.get());
- SrcInfo.UnknownCountOutEdge++;
- DestInfo.UnknownCountInEdge++;
-
- if (!E->CountValid)
- continue;
- DestInfo.UnknownCountInEdge--;
- SrcInfo.UnknownCountOutEdge--;
- }
-
bool Changes = true;
unsigned NumPasses = 0;
while (Changes) {
@@ -1167,7 +1322,8 @@ void PGOUseFunc::populateCounters() {
// Assign the scaled count values to the BB with multiple out edges.
void PGOUseFunc::setBranchWeights() {
// Generate MD_prof metadata for every branch instruction.
- LLVM_DEBUG(dbgs() << "\nSetting branch weights.\n");
+ LLVM_DEBUG(dbgs() << "\nSetting branch weights for func " << F.getName()
+ << " IsCS=" << IsCS << "\n");
for (auto &BB : F) {
Instruction *TI = BB.getTerminator();
if (TI->getNumSuccessors() < 2)
@@ -1175,6 +1331,7 @@ void PGOUseFunc::setBranchWeights() {
if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) ||
isa<IndirectBrInst>(TI)))
continue;
+
if (getBBInfo(&BB).CountValue == 0)
continue;
@@ -1282,7 +1439,7 @@ void MemIntrinsicVisitor::instrumentOneMemIntrinsic(MemIntrinsic &MI) {
Type *Int64Ty = Builder.getInt64Ty();
Type *I8PtrTy = Builder.getInt8PtrTy();
Value *Length = MI.getLength();
- assert(!dyn_cast<ConstantInt>(Length));
+ assert(!isa<ConstantInt>(Length));
Builder.CreateCall(
Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile),
{ConstantExpr::getBitCast(FuncNameVar, I8PtrTy),
@@ -1325,8 +1482,14 @@ void PGOUseFunc::annotateValueSites() {
annotateValueSites(Kind);
}
+static const char *ValueProfKindDescr[] = {
+#define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr,
+#include "llvm/ProfileData/InstrProfData.inc"
+};
+
// Annotate the instructions for a specific value kind.
void PGOUseFunc::annotateValueSites(uint32_t Kind) {
+ assert(Kind <= IPVK_Last);
unsigned ValueSiteIndex = 0;
auto &ValueSites = FuncInfo.ValueSites[Kind];
unsigned NumValueSites = ProfileRecord.getNumValueSites(Kind);
@@ -1334,8 +1497,10 @@ void PGOUseFunc::annotateValueSites(uint32_t Kind) {
auto &Ctx = M->getContext();
Ctx.diagnose(DiagnosticInfoPGOProfile(
M->getName().data(),
- Twine("Inconsistent number of value sites for kind = ") + Twine(Kind) +
- " in " + F.getName().str(),
+ Twine("Inconsistent number of value sites for ") +
+ Twine(ValueProfKindDescr[Kind]) +
+ Twine(" profiling in \"") + F.getName().str() +
+ Twine("\", possibly due to the use of a stale profile."),
DS_Warning));
return;
}
@@ -1352,24 +1517,6 @@ void PGOUseFunc::annotateValueSites(uint32_t Kind) {
}
}
-// Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime
-// aware this is an ir_level profile so it can set the version flag.
-static void createIRLevelProfileFlagVariable(Module &M) {
- Type *IntTy64 = Type::getInt64Ty(M.getContext());
- uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF);
- auto IRLevelVersionVariable = new GlobalVariable(
- M, IntTy64, true, GlobalVariable::ExternalLinkage,
- Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)),
- INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR));
- IRLevelVersionVariable->setVisibility(GlobalValue::DefaultVisibility);
- Triple TT(M.getTargetTriple());
- if (!TT.supportsCOMDAT())
- IRLevelVersionVariable->setLinkage(GlobalValue::WeakAnyLinkage);
- else
- IRLevelVersionVariable->setComdat(M.getOrInsertComdat(
- StringRef(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR))));
-}
-
// Collect the set of members for each Comdat in module M and store
// in ComdatMembers.
static void collectComdatMembers(
@@ -1390,8 +1537,11 @@ static void collectComdatMembers(
static bool InstrumentAllFunctions(
Module &M, function_ref<BranchProbabilityInfo *(Function &)> LookupBPI,
- function_ref<BlockFrequencyInfo *(Function &)> LookupBFI) {
- createIRLevelProfileFlagVariable(M);
+ function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) {
+ // For the context-sensitve instrumentation, we should have a separated pass
+ // (before LTO/ThinLTO linking) to create these variables.
+ if (!IsCS)
+ createIRLevelProfileFlagVar(M, /* IsCS */ false);
std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
collectComdatMembers(M, ComdatMembers);
@@ -1400,11 +1550,18 @@ static bool InstrumentAllFunctions(
continue;
auto *BPI = LookupBPI(F);
auto *BFI = LookupBFI(F);
- instrumentOneFunc(F, &M, BPI, BFI, ComdatMembers);
+ instrumentOneFunc(F, &M, BPI, BFI, ComdatMembers, IsCS);
}
return true;
}
+PreservedAnalyses
+PGOInstrumentationGenCreateVar::run(Module &M, ModuleAnalysisManager &AM) {
+ createProfileFileNameVar(M, CSInstrName);
+ createIRLevelProfileFlagVar(M, /* IsCS */ true);
+ return PreservedAnalyses::all();
+}
+
bool PGOInstrumentationGenLegacyPass::runOnModule(Module &M) {
if (skipModule(M))
return false;
@@ -1415,7 +1572,7 @@ bool PGOInstrumentationGenLegacyPass::runOnModule(Module &M) {
auto LookupBFI = [this](Function &F) {
return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
};
- return InstrumentAllFunctions(M, LookupBPI, LookupBFI);
+ return InstrumentAllFunctions(M, LookupBPI, LookupBFI, IsCS);
}
PreservedAnalyses PGOInstrumentationGen::run(Module &M,
@@ -1429,7 +1586,7 @@ PreservedAnalyses PGOInstrumentationGen::run(Module &M,
return &FAM.getResult<BlockFrequencyAnalysis>(F);
};
- if (!InstrumentAllFunctions(M, LookupBPI, LookupBFI))
+ if (!InstrumentAllFunctions(M, LookupBPI, LookupBFI, IsCS))
return PreservedAnalyses::all();
return PreservedAnalyses::none();
@@ -1438,7 +1595,7 @@ PreservedAnalyses PGOInstrumentationGen::run(Module &M,
static bool annotateAllFunctions(
Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName,
function_ref<BranchProbabilityInfo *(Function &)> LookupBPI,
- function_ref<BlockFrequencyInfo *(Function &)> LookupBFI) {
+ function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) {
LLVM_DEBUG(dbgs() << "Read in profile counters: ");
auto &Ctx = M.getContext();
// Read the counter array from file.
@@ -1459,6 +1616,9 @@ static bool annotateAllFunctions(
StringRef("Cannot get PGOReader")));
return false;
}
+ if (!PGOReader->hasCSIRLevelProfile() && IsCS)
+ return false;
+
// TODO: might need to change the warning once the clang option is finalized.
if (!PGOReader->isIRLevelProfile()) {
Ctx.diagnose(DiagnosticInfoPGOProfile(
@@ -1478,7 +1638,7 @@ static bool annotateAllFunctions(
// Split indirectbr critical edges here before computing the MST rather than
// later in getInstrBB() to avoid invalidating it.
SplitIndirectBrCriticalEdges(F, BPI, BFI);
- PGOUseFunc Func(F, &M, ComdatMembers, BPI, BFI);
+ PGOUseFunc Func(F, &M, ComdatMembers, BPI, BFI, IsCS);
bool AllZeros = false;
if (!Func.readCounters(PGOReader.get(), AllZeros))
continue;
@@ -1526,7 +1686,10 @@ static bool annotateAllFunctions(
}
}
}
- M.setProfileSummary(PGOReader->getSummary().getMD(M.getContext()));
+ M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()),
+ IsCS ? ProfileSummary::PSK_CSInstr
+ : ProfileSummary::PSK_Instr);
+
// Set function hotness attribute from the profile.
// We have to apply these attributes at the end because their presence
// can affect the BranchProbabilityInfo of any callers, resulting in an
@@ -1545,9 +1708,10 @@ static bool annotateAllFunctions(
}
PGOInstrumentationUse::PGOInstrumentationUse(std::string Filename,
- std::string RemappingFilename)
+ std::string RemappingFilename,
+ bool IsCS)
: ProfileFileName(std::move(Filename)),
- ProfileRemappingFileName(std::move(RemappingFilename)) {
+ ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS) {
if (!PGOTestProfileFile.empty())
ProfileFileName = PGOTestProfileFile;
if (!PGOTestProfileRemappingFile.empty())
@@ -1567,7 +1731,7 @@ PreservedAnalyses PGOInstrumentationUse::run(Module &M,
};
if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName,
- LookupBPI, LookupBFI))
+ LookupBPI, LookupBFI, IsCS))
return PreservedAnalyses::all();
return PreservedAnalyses::none();
@@ -1584,7 +1748,8 @@ bool PGOInstrumentationUseLegacyPass::runOnModule(Module &M) {
return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
};
- return annotateAllFunctions(M, ProfileFileName, "", LookupBPI, LookupBFI);
+ return annotateAllFunctions(M, ProfileFileName, "", LookupBPI, LookupBFI,
+ IsCS);
}
static std::string getSimpleNodeName(const BasicBlock *Node) {
diff --git a/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp b/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp
index 2c71e75dadcc..188f95b4676b 100644
--- a/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp
+++ b/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp
@@ -1,9 +1,8 @@
//===-- PGOMemOPSizeOpt.cpp - Optimizations based on value profiling ===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -20,12 +19,12 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/DomTreeUpdater.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
diff --git a/lib/Transforms/Instrumentation/PoisonChecking.cpp b/lib/Transforms/Instrumentation/PoisonChecking.cpp
new file mode 100644
index 000000000000..81d92e724c7d
--- /dev/null
+++ b/lib/Transforms/Instrumentation/PoisonChecking.cpp
@@ -0,0 +1,357 @@
+//===- PoisonChecking.cpp - -----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements a transform pass which instruments IR such that poison semantics
+// are made explicit. That is, it provides a (possibly partial) executable
+// semantics for every instruction w.r.t. poison as specified in the LLVM
+// LangRef. There are obvious parallels to the sanitizer tools, but this pass
+// is focused purely on the semantics of LLVM IR, not any particular source
+// language. If you're looking for something to see if your C/C++ contains
+// UB, this is not it.
+//
+// The rewritten semantics of each instruction will include the following
+// components:
+//
+// 1) The original instruction, unmodified.
+// 2) A propagation rule which translates dynamic information about the poison
+// state of each input to whether the dynamic output of the instruction
+// produces poison.
+// 3) A flag validation rule which validates any poison producing flags on the
+// instruction itself (e.g. checks for overflow on nsw).
+// 4) A check rule which traps (to a handler function) if this instruction must
+// execute undefined behavior given the poison state of it's inputs.
+//
+// At the moment, the UB detection is done in a best effort manner; that is,
+// the resulting code may produce a false negative result (not report UB when
+// it actually exists according to the LangRef spec), but should never produce
+// a false positive (report UB where it doesn't exist). The intention is to
+// eventually support a "strict" mode which never dynamically reports a false
+// negative at the cost of rejecting some valid inputs to translation.
+//
+// Use cases for this pass include:
+// - Understanding (and testing!) the implications of the definition of poison
+// from the LangRef.
+// - Validating the output of a IR fuzzer to ensure that all programs produced
+// are well defined on the specific input used.
+// - Finding/confirming poison specific miscompiles by checking the poison
+// status of an input/IR pair is the same before and after an optimization
+// transform.
+// - Checking that a bugpoint reduction does not introduce UB which didn't
+// exist in the original program being reduced.
+//
+// The major sources of inaccuracy are currently:
+// - Most validation rules not yet implemented for instructions with poison
+// relavant flags. At the moment, only nsw/nuw on add/sub are supported.
+// - UB which is control dependent on a branch on poison is not yet
+// reported. Currently, only data flow dependence is modeled.
+// - Poison which is propagated through memory is not modeled. As such,
+// storing poison to memory and then reloading it will cause a false negative
+// as we consider the reloaded value to not be poisoned.
+// - Poison propagation across function boundaries is not modeled. At the
+// moment, all arguments and return values are assumed not to be poison.
+// - Undef is not modeled. In particular, the optimizer's freedom to pick
+// concrete values for undef bits so as to maximize potential for producing
+// poison is not modeled.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Instrumentation/PoisonChecking.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/InstVisitor.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "poison-checking"
+
+static cl::opt<bool>
+LocalCheck("poison-checking-function-local",
+ cl::init(false),
+ cl::desc("Check that returns are non-poison (for testing)"));
+
+
+static bool isConstantFalse(Value* V) {
+ assert(V->getType()->isIntegerTy(1));
+ if (auto *CI = dyn_cast<ConstantInt>(V))
+ return CI->isZero();
+ return false;
+}
+
+static Value *buildOrChain(IRBuilder<> &B, ArrayRef<Value*> Ops) {
+ if (Ops.size() == 0)
+ return B.getFalse();
+ unsigned i = 0;
+ for (; i < Ops.size() && isConstantFalse(Ops[i]); i++) {}
+ if (i == Ops.size())
+ return B.getFalse();
+ Value *Accum = Ops[i++];
+ for (; i < Ops.size(); i++)
+ if (!isConstantFalse(Ops[i]))
+ Accum = B.CreateOr(Accum, Ops[i]);
+ return Accum;
+}
+
+static void generatePoisonChecksForBinOp(Instruction &I,
+ SmallVector<Value*, 2> &Checks) {
+ assert(isa<BinaryOperator>(I));
+
+ IRBuilder<> B(&I);
+ Value *LHS = I.getOperand(0);
+ Value *RHS = I.getOperand(1);
+ switch (I.getOpcode()) {
+ default:
+ return;
+ case Instruction::Add: {
+ if (I.hasNoSignedWrap()) {
+ auto *OverflowOp =
+ B.CreateBinaryIntrinsic(Intrinsic::sadd_with_overflow, LHS, RHS);
+ Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
+ }
+ if (I.hasNoUnsignedWrap()) {
+ auto *OverflowOp =
+ B.CreateBinaryIntrinsic(Intrinsic::uadd_with_overflow, LHS, RHS);
+ Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
+ }
+ break;
+ }
+ case Instruction::Sub: {
+ if (I.hasNoSignedWrap()) {
+ auto *OverflowOp =
+ B.CreateBinaryIntrinsic(Intrinsic::ssub_with_overflow, LHS, RHS);
+ Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
+ }
+ if (I.hasNoUnsignedWrap()) {
+ auto *OverflowOp =
+ B.CreateBinaryIntrinsic(Intrinsic::usub_with_overflow, LHS, RHS);
+ Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
+ }
+ break;
+ }
+ case Instruction::Mul: {
+ if (I.hasNoSignedWrap()) {
+ auto *OverflowOp =
+ B.CreateBinaryIntrinsic(Intrinsic::smul_with_overflow, LHS, RHS);
+ Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
+ }
+ if (I.hasNoUnsignedWrap()) {
+ auto *OverflowOp =
+ B.CreateBinaryIntrinsic(Intrinsic::umul_with_overflow, LHS, RHS);
+ Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
+ }
+ break;
+ }
+ case Instruction::UDiv: {
+ if (I.isExact()) {
+ auto *Check =
+ B.CreateICmp(ICmpInst::ICMP_NE, B.CreateURem(LHS, RHS),
+ ConstantInt::get(LHS->getType(), 0));
+ Checks.push_back(Check);
+ }
+ break;
+ }
+ case Instruction::SDiv: {
+ if (I.isExact()) {
+ auto *Check =
+ B.CreateICmp(ICmpInst::ICMP_NE, B.CreateSRem(LHS, RHS),
+ ConstantInt::get(LHS->getType(), 0));
+ Checks.push_back(Check);
+ }
+ break;
+ }
+ case Instruction::AShr:
+ case Instruction::LShr:
+ case Instruction::Shl: {
+ Value *ShiftCheck =
+ B.CreateICmp(ICmpInst::ICMP_UGE, RHS,
+ ConstantInt::get(RHS->getType(),
+ LHS->getType()->getScalarSizeInBits()));
+ Checks.push_back(ShiftCheck);
+ break;
+ }
+ };
+}
+
+static Value* generatePoisonChecks(Instruction &I) {
+ IRBuilder<> B(&I);
+ SmallVector<Value*, 2> Checks;
+ if (isa<BinaryOperator>(I) && !I.getType()->isVectorTy())
+ generatePoisonChecksForBinOp(I, Checks);
+
+ // Handle non-binops seperately
+ switch (I.getOpcode()) {
+ default:
+ break;
+ case Instruction::ExtractElement: {
+ Value *Vec = I.getOperand(0);
+ if (Vec->getType()->getVectorIsScalable())
+ break;
+ Value *Idx = I.getOperand(1);
+ unsigned NumElts = Vec->getType()->getVectorNumElements();
+ Value *Check =
+ B.CreateICmp(ICmpInst::ICMP_UGE, Idx,
+ ConstantInt::get(Idx->getType(), NumElts));
+ Checks.push_back(Check);
+ break;
+ }
+ case Instruction::InsertElement: {
+ Value *Vec = I.getOperand(0);
+ if (Vec->getType()->getVectorIsScalable())
+ break;
+ Value *Idx = I.getOperand(2);
+ unsigned NumElts = Vec->getType()->getVectorNumElements();
+ Value *Check =
+ B.CreateICmp(ICmpInst::ICMP_UGE, Idx,
+ ConstantInt::get(Idx->getType(), NumElts));
+ Checks.push_back(Check);
+ break;
+ }
+ };
+ return buildOrChain(B, Checks);
+}
+
+static Value *getPoisonFor(DenseMap<Value *, Value *> &ValToPoison, Value *V) {
+ auto Itr = ValToPoison.find(V);
+ if (Itr != ValToPoison.end())
+ return Itr->second;
+ if (isa<Constant>(V)) {
+ return ConstantInt::getFalse(V->getContext());
+ }
+ // Return false for unknwon values - this implements a non-strict mode where
+ // unhandled IR constructs are simply considered to never produce poison. At
+ // some point in the future, we probably want a "strict mode" for testing if
+ // nothing else.
+ return ConstantInt::getFalse(V->getContext());
+}
+
+static void CreateAssert(IRBuilder<> &B, Value *Cond) {
+ assert(Cond->getType()->isIntegerTy(1));
+ if (auto *CI = dyn_cast<ConstantInt>(Cond))
+ if (CI->isAllOnesValue())
+ return;
+
+ Module *M = B.GetInsertBlock()->getModule();
+ M->getOrInsertFunction("__poison_checker_assert",
+ Type::getVoidTy(M->getContext()),
+ Type::getInt1Ty(M->getContext()));
+ Function *TrapFunc = M->getFunction("__poison_checker_assert");
+ B.CreateCall(TrapFunc, Cond);
+}
+
+static void CreateAssertNot(IRBuilder<> &B, Value *Cond) {
+ assert(Cond->getType()->isIntegerTy(1));
+ CreateAssert(B, B.CreateNot(Cond));
+}
+
+static bool rewrite(Function &F) {
+ auto * const Int1Ty = Type::getInt1Ty(F.getContext());
+
+ DenseMap<Value *, Value *> ValToPoison;
+
+ for (BasicBlock &BB : F)
+ for (auto I = BB.begin(); isa<PHINode>(&*I); I++) {
+ auto *OldPHI = cast<PHINode>(&*I);
+ auto *NewPHI = PHINode::Create(Int1Ty,
+ OldPHI->getNumIncomingValues());
+ for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++)
+ NewPHI->addIncoming(UndefValue::get(Int1Ty),
+ OldPHI->getIncomingBlock(i));
+ NewPHI->insertBefore(OldPHI);
+ ValToPoison[OldPHI] = NewPHI;
+ }
+
+ for (BasicBlock &BB : F)
+ for (Instruction &I : BB) {
+ if (isa<PHINode>(I)) continue;
+
+ IRBuilder<> B(cast<Instruction>(&I));
+
+ // Note: There are many more sources of documented UB, but this pass only
+ // attempts to find UB triggered by propagation of poison.
+ if (Value *Op = const_cast<Value*>(getGuaranteedNonFullPoisonOp(&I)))
+ CreateAssertNot(B, getPoisonFor(ValToPoison, Op));
+
+ if (LocalCheck)
+ if (auto *RI = dyn_cast<ReturnInst>(&I))
+ if (RI->getNumOperands() != 0) {
+ Value *Op = RI->getOperand(0);
+ CreateAssertNot(B, getPoisonFor(ValToPoison, Op));
+ }
+
+ SmallVector<Value*, 4> Checks;
+ if (propagatesFullPoison(&I))
+ for (Value *V : I.operands())
+ Checks.push_back(getPoisonFor(ValToPoison, V));
+
+ if (auto *Check = generatePoisonChecks(I))
+ Checks.push_back(Check);
+ ValToPoison[&I] = buildOrChain(B, Checks);
+ }
+
+ for (BasicBlock &BB : F)
+ for (auto I = BB.begin(); isa<PHINode>(&*I); I++) {
+ auto *OldPHI = cast<PHINode>(&*I);
+ if (!ValToPoison.count(OldPHI))
+ continue; // skip the newly inserted phis
+ auto *NewPHI = cast<PHINode>(ValToPoison[OldPHI]);
+ for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++) {
+ auto *OldVal = OldPHI->getIncomingValue(i);
+ NewPHI->setIncomingValue(i, getPoisonFor(ValToPoison, OldVal));
+ }
+ }
+ return true;
+}
+
+
+PreservedAnalyses PoisonCheckingPass::run(Module &M,
+ ModuleAnalysisManager &AM) {
+ bool Changed = false;
+ for (auto &F : M)
+ Changed |= rewrite(F);
+
+ return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
+}
+
+PreservedAnalyses PoisonCheckingPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ return rewrite(F) ? PreservedAnalyses::none() : PreservedAnalyses::all();
+}
+
+
+/* Major TODO Items:
+ - Control dependent poison UB
+ - Strict mode - (i.e. must analyze every operand)
+ - Poison through memory
+ - Function ABIs
+ - Full coverage of intrinsics, etc.. (ouch)
+
+ Instructions w/Unclear Semantics:
+ - shufflevector - It would seem reasonable for an out of bounds mask element
+ to produce poison, but the LangRef does not state.
+ - and/or - It would seem reasonable for poison to propagate from both
+ arguments, but LangRef doesn't state and propagatesFullPoison doesn't
+ include these two.
+ - all binary ops w/vector operands - The likely interpretation would be that
+ any element overflowing should produce poison for the entire result, but
+ the LangRef does not state.
+ - Floating point binary ops w/fmf flags other than (nnan, noinfs). It seems
+ strange that only certian flags should be documented as producing poison.
+
+ Cases of clear poison semantics not yet implemented:
+ - Exact flags on ashr/lshr produce poison
+ - NSW/NUW flags on shl produce poison
+ - Inbounds flag on getelementptr produce poison
+ - fptosi/fptoui (out of bounds input) produce poison
+ - Scalable vector types for insertelement/extractelement
+ - Floating point binary ops w/fmf nnan/noinfs flags produce poison
+ */
diff --git a/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
index 0ba8d5765e8c..ca0cb4bdbe84 100644
--- a/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
+++ b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
@@ -1,9 +1,8 @@
//===-- SanitizerCoverage.cpp - coverage instrumentation for sanitizers ---===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -62,7 +61,10 @@ static const char *const SanCovTraceDiv4 = "__sanitizer_cov_trace_div4";
static const char *const SanCovTraceDiv8 = "__sanitizer_cov_trace_div8";
static const char *const SanCovTraceGep = "__sanitizer_cov_trace_gep";
static const char *const SanCovTraceSwitchName = "__sanitizer_cov_trace_switch";
-static const char *const SanCovModuleCtorName = "sancov.module_ctor";
+static const char *const SanCovModuleCtorTracePcGuardName =
+ "sancov.module_ctor_trace_pc_guard";
+static const char *const SanCovModuleCtor8bitCountersName =
+ "sancov.module_ctor_8bit_counters";
static const uint64_t SanCtorAndDtorPriority = 2;
static const char *const SanCovTracePCGuardName =
@@ -210,8 +212,9 @@ private:
void CreateFunctionLocalArrays(Function &F, ArrayRef<BasicBlock *> AllBlocks);
void InjectCoverageAtBlock(Function &F, BasicBlock &BB, size_t Idx,
bool IsLeafFunc = true);
- Function *CreateInitCallsForSections(Module &M, const char *InitFunctionName,
- Type *Ty, const char *Section);
+ Function *CreateInitCallsForSections(Module &M, const char *CtorName,
+ const char *InitFunctionName, Type *Ty,
+ const char *Section);
std::pair<Value *, Value *> CreateSecStartEnd(Module &M, const char *Section,
Type *Ty);
@@ -223,13 +226,13 @@ private:
std::string getSectionName(const std::string &Section) const;
std::string getSectionStart(const std::string &Section) const;
std::string getSectionEnd(const std::string &Section) const;
- Function *SanCovTracePCIndir;
- Function *SanCovTracePC, *SanCovTracePCGuard;
- Function *SanCovTraceCmpFunction[4];
- Function *SanCovTraceConstCmpFunction[4];
- Function *SanCovTraceDivFunction[2];
- Function *SanCovTraceGepFunction;
- Function *SanCovTraceSwitchFunction;
+ FunctionCallee SanCovTracePCIndir;
+ FunctionCallee SanCovTracePC, SanCovTracePCGuard;
+ FunctionCallee SanCovTraceCmpFunction[4];
+ FunctionCallee SanCovTraceConstCmpFunction[4];
+ FunctionCallee SanCovTraceDivFunction[2];
+ FunctionCallee SanCovTraceGepFunction;
+ FunctionCallee SanCovTraceSwitchFunction;
GlobalVariable *SanCovLowestStack;
InlineAsm *EmptyAsm;
Type *IntptrTy, *IntptrPtrTy, *Int64Ty, *Int64PtrTy, *Int32Ty, *Int32PtrTy,
@@ -270,24 +273,25 @@ SanitizerCoverageModule::CreateSecStartEnd(Module &M, const char *Section,
// Account for the fact that on windows-msvc __start_* symbols actually
// point to a uint64_t before the start of the array.
auto SecStartI8Ptr = IRB.CreatePointerCast(SecStart, Int8PtrTy);
- auto GEP = IRB.CreateGEP(SecStartI8Ptr,
+ auto GEP = IRB.CreateGEP(Int8Ty, SecStartI8Ptr,
ConstantInt::get(IntptrTy, sizeof(uint64_t)));
return std::make_pair(IRB.CreatePointerCast(GEP, Ty), SecEndPtr);
}
Function *SanitizerCoverageModule::CreateInitCallsForSections(
- Module &M, const char *InitFunctionName, Type *Ty,
+ Module &M, const char *CtorName, const char *InitFunctionName, Type *Ty,
const char *Section) {
auto SecStartEnd = CreateSecStartEnd(M, Section, Ty);
auto SecStart = SecStartEnd.first;
auto SecEnd = SecStartEnd.second;
Function *CtorFunc;
std::tie(CtorFunc, std::ignore) = createSanitizerCtorAndInitFunctions(
- M, SanCovModuleCtorName, InitFunctionName, {Ty, Ty}, {SecStart, SecEnd});
+ M, CtorName, InitFunctionName, {Ty, Ty}, {SecStart, SecEnd});
+ assert(CtorFunc->getName() == CtorName);
if (TargetTriple.supportsCOMDAT()) {
// Use comdat to dedup CtorFunc.
- CtorFunc->setComdat(M.getOrInsertComdat(SanCovModuleCtorName));
+ CtorFunc->setComdat(M.getOrInsertComdat(CtorName));
appendToGlobalCtors(M, CtorFunc, SanCtorAndDtorPriority, CtorFunc);
} else {
appendToGlobalCtors(M, CtorFunc, SanCtorAndDtorPriority);
@@ -329,77 +333,74 @@ bool SanitizerCoverageModule::runOnModule(Module &M) {
Int16Ty = IRB.getInt16Ty();
Int8Ty = IRB.getInt8Ty();
- SanCovTracePCIndir = checkSanitizerInterfaceFunction(
- M.getOrInsertFunction(SanCovTracePCIndirName, VoidTy, IntptrTy));
+ SanCovTracePCIndir =
+ M.getOrInsertFunction(SanCovTracePCIndirName, VoidTy, IntptrTy);
+ // Make sure smaller parameters are zero-extended to i64 as required by the
+ // x86_64 ABI.
+ AttributeList SanCovTraceCmpZeroExtAL;
+ if (TargetTriple.getArch() == Triple::x86_64) {
+ SanCovTraceCmpZeroExtAL =
+ SanCovTraceCmpZeroExtAL.addParamAttribute(*C, 0, Attribute::ZExt);
+ SanCovTraceCmpZeroExtAL =
+ SanCovTraceCmpZeroExtAL.addParamAttribute(*C, 1, Attribute::ZExt);
+ }
+
SanCovTraceCmpFunction[0] =
- checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- SanCovTraceCmp1, VoidTy, IRB.getInt8Ty(), IRB.getInt8Ty()));
- SanCovTraceCmpFunction[1] = checkSanitizerInterfaceFunction(
- M.getOrInsertFunction(SanCovTraceCmp2, VoidTy, IRB.getInt16Ty(),
- IRB.getInt16Ty()));
- SanCovTraceCmpFunction[2] = checkSanitizerInterfaceFunction(
- M.getOrInsertFunction(SanCovTraceCmp4, VoidTy, IRB.getInt32Ty(),
- IRB.getInt32Ty()));
+ M.getOrInsertFunction(SanCovTraceCmp1, SanCovTraceCmpZeroExtAL, VoidTy,
+ IRB.getInt8Ty(), IRB.getInt8Ty());
+ SanCovTraceCmpFunction[1] =
+ M.getOrInsertFunction(SanCovTraceCmp2, SanCovTraceCmpZeroExtAL, VoidTy,
+ IRB.getInt16Ty(), IRB.getInt16Ty());
+ SanCovTraceCmpFunction[2] =
+ M.getOrInsertFunction(SanCovTraceCmp4, SanCovTraceCmpZeroExtAL, VoidTy,
+ IRB.getInt32Ty(), IRB.getInt32Ty());
SanCovTraceCmpFunction[3] =
- checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- SanCovTraceCmp8, VoidTy, Int64Ty, Int64Ty));
-
- SanCovTraceConstCmpFunction[0] =
- checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- SanCovTraceConstCmp1, VoidTy, Int8Ty, Int8Ty));
- SanCovTraceConstCmpFunction[1] =
- checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- SanCovTraceConstCmp2, VoidTy, Int16Ty, Int16Ty));
- SanCovTraceConstCmpFunction[2] =
- checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- SanCovTraceConstCmp4, VoidTy, Int32Ty, Int32Ty));
+ M.getOrInsertFunction(SanCovTraceCmp8, VoidTy, Int64Ty, Int64Ty);
+
+ SanCovTraceConstCmpFunction[0] = M.getOrInsertFunction(
+ SanCovTraceConstCmp1, SanCovTraceCmpZeroExtAL, VoidTy, Int8Ty, Int8Ty);
+ SanCovTraceConstCmpFunction[1] = M.getOrInsertFunction(
+ SanCovTraceConstCmp2, SanCovTraceCmpZeroExtAL, VoidTy, Int16Ty, Int16Ty);
+ SanCovTraceConstCmpFunction[2] = M.getOrInsertFunction(
+ SanCovTraceConstCmp4, SanCovTraceCmpZeroExtAL, VoidTy, Int32Ty, Int32Ty);
SanCovTraceConstCmpFunction[3] =
- checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- SanCovTraceConstCmp8, VoidTy, Int64Ty, Int64Ty));
-
- SanCovTraceDivFunction[0] =
- checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- SanCovTraceDiv4, VoidTy, IRB.getInt32Ty()));
+ M.getOrInsertFunction(SanCovTraceConstCmp8, VoidTy, Int64Ty, Int64Ty);
+
+ {
+ AttributeList AL;
+ if (TargetTriple.getArch() == Triple::x86_64)
+ AL = AL.addParamAttribute(*C, 0, Attribute::ZExt);
+ SanCovTraceDivFunction[0] =
+ M.getOrInsertFunction(SanCovTraceDiv4, AL, VoidTy, IRB.getInt32Ty());
+ }
SanCovTraceDivFunction[1] =
- checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- SanCovTraceDiv8, VoidTy, Int64Ty));
+ M.getOrInsertFunction(SanCovTraceDiv8, VoidTy, Int64Ty);
SanCovTraceGepFunction =
- checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- SanCovTraceGep, VoidTy, IntptrTy));
+ M.getOrInsertFunction(SanCovTraceGep, VoidTy, IntptrTy);
SanCovTraceSwitchFunction =
- checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- SanCovTraceSwitchName, VoidTy, Int64Ty, Int64PtrTy));
+ M.getOrInsertFunction(SanCovTraceSwitchName, VoidTy, Int64Ty, Int64PtrTy);
Constant *SanCovLowestStackConstant =
M.getOrInsertGlobal(SanCovLowestStackName, IntptrTy);
- SanCovLowestStack = cast<GlobalVariable>(SanCovLowestStackConstant);
+ SanCovLowestStack = dyn_cast<GlobalVariable>(SanCovLowestStackConstant);
+ if (!SanCovLowestStack) {
+ C->emitError(StringRef("'") + SanCovLowestStackName +
+ "' should not be declared by the user");
+ return true;
+ }
SanCovLowestStack->setThreadLocalMode(
GlobalValue::ThreadLocalMode::InitialExecTLSModel);
if (Options.StackDepth && !SanCovLowestStack->isDeclaration())
SanCovLowestStack->setInitializer(Constant::getAllOnesValue(IntptrTy));
- // Make sure smaller parameters are zero-extended to i64 as required by the
- // x86_64 ABI.
- if (TargetTriple.getArch() == Triple::x86_64) {
- for (int i = 0; i < 3; i++) {
- SanCovTraceCmpFunction[i]->addParamAttr(0, Attribute::ZExt);
- SanCovTraceCmpFunction[i]->addParamAttr(1, Attribute::ZExt);
- SanCovTraceConstCmpFunction[i]->addParamAttr(0, Attribute::ZExt);
- SanCovTraceConstCmpFunction[i]->addParamAttr(1, Attribute::ZExt);
- }
- SanCovTraceDivFunction[0]->addParamAttr(0, Attribute::ZExt);
- }
-
-
// We insert an empty inline asm after cov callbacks to avoid callback merge.
EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false),
StringRef(""), StringRef(""),
/*hasSideEffects=*/true);
- SanCovTracePC = checkSanitizerInterfaceFunction(
- M.getOrInsertFunction(SanCovTracePCName, VoidTy));
- SanCovTracePCGuard = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- SanCovTracePCGuardName, VoidTy, Int32PtrTy));
+ SanCovTracePC = M.getOrInsertFunction(SanCovTracePCName, VoidTy);
+ SanCovTracePCGuard =
+ M.getOrInsertFunction(SanCovTracePCGuardName, VoidTy, Int32PtrTy);
for (auto &F : M)
runOnFunction(F);
@@ -407,14 +408,16 @@ bool SanitizerCoverageModule::runOnModule(Module &M) {
Function *Ctor = nullptr;
if (FunctionGuardArray)
- Ctor = CreateInitCallsForSections(M, SanCovTracePCGuardInitName, Int32PtrTy,
+ Ctor = CreateInitCallsForSections(M, SanCovModuleCtorTracePcGuardName,
+ SanCovTracePCGuardInitName, Int32PtrTy,
SanCovGuardsSectionName);
if (Function8bitCounterArray)
- Ctor = CreateInitCallsForSections(M, SanCov8bitCountersInitName, Int8PtrTy,
+ Ctor = CreateInitCallsForSections(M, SanCovModuleCtor8bitCountersName,
+ SanCov8bitCountersInitName, Int8PtrTy,
SanCovCountersSectionName);
if (Ctor && Options.PCTable) {
auto SecStartEnd = CreateSecStartEnd(M, SanCovPCsSectionName, IntptrPtrTy);
- Function *InitFunction = declareSanitizerInitFunction(
+ FunctionCallee InitFunction = declareSanitizerInitFunction(
M, SanCovPCsInitName, {IntptrPtrTy, IntptrPtrTy});
IRBuilder<> IRBCtor(Ctor->getEntryBlock().getTerminator());
IRBCtor.CreateCall(InitFunction, {SecStartEnd.first, SecStartEnd.second});
@@ -458,12 +461,12 @@ static bool shouldInstrumentBlock(const Function &F, const BasicBlock *BB,
const DominatorTree *DT,
const PostDominatorTree *PDT,
const SanitizerCoverageOptions &Options) {
- // Don't insert coverage for unreachable blocks: we will never call
- // __sanitizer_cov() for them, so counting them in
+ // Don't insert coverage for blocks containing nothing but unreachable: we
+ // will never call __sanitizer_cov() for them, so counting them in
// NumberOfInstrumentedBlocks() might complicate calculation of code coverage
// percentage. Also, unreachable instructions frequently have no debug
// locations.
- if (isa<UnreachableInst>(BB->getTerminator()))
+ if (isa<UnreachableInst>(BB->getFirstNonPHIOrDbgOrLifetime()))
return false;
// Don't insert coverage into blocks without a valid insertion point
@@ -484,6 +487,37 @@ static bool shouldInstrumentBlock(const Function &F, const BasicBlock *BB,
&& !(isFullPostDominator(BB, PDT) && !BB->getSinglePredecessor());
}
+
+// Returns true iff From->To is a backedge.
+// A twist here is that we treat From->To as a backedge if
+// * To dominates From or
+// * To->UniqueSuccessor dominates From
+static bool IsBackEdge(BasicBlock *From, BasicBlock *To,
+ const DominatorTree *DT) {
+ if (DT->dominates(To, From))
+ return true;
+ if (auto Next = To->getUniqueSuccessor())
+ if (DT->dominates(Next, From))
+ return true;
+ return false;
+}
+
+// Prunes uninteresting Cmp instrumentation:
+// * CMP instructions that feed into loop backedge branch.
+//
+// Note that Cmp pruning is controlled by the same flag as the
+// BB pruning.
+static bool IsInterestingCmp(ICmpInst *CMP, const DominatorTree *DT,
+ const SanitizerCoverageOptions &Options) {
+ if (!Options.NoPrune)
+ if (CMP->hasOneUse())
+ if (auto BR = dyn_cast<BranchInst>(CMP->user_back()))
+ for (BasicBlock *B : BR->successors())
+ if (IsBackEdge(BR->getParent(), B, DT))
+ return false;
+ return true;
+}
+
bool SanitizerCoverageModule::runOnFunction(Function &F) {
if (F.empty())
return false;
@@ -508,7 +542,7 @@ bool SanitizerCoverageModule::runOnFunction(Function &F) {
isAsynchronousEHPersonality(classifyEHPersonality(F.getPersonalityFn())))
return false;
if (Options.CoverageType >= SanitizerCoverageOptions::SCK_Edge)
- SplitAllCriticalEdges(F);
+ SplitAllCriticalEdges(F, CriticalEdgeSplittingOptions().setIgnoreUnreachableDests());
SmallVector<Instruction *, 8> IndirCalls;
SmallVector<BasicBlock *, 16> BlocksToInstrument;
SmallVector<Instruction *, 8> CmpTraceTargets;
@@ -532,8 +566,9 @@ bool SanitizerCoverageModule::runOnFunction(Function &F) {
IndirCalls.push_back(&Inst);
}
if (Options.TraceCmp) {
- if (isa<ICmpInst>(&Inst))
- CmpTraceTargets.push_back(&Inst);
+ if (ICmpInst *CMP = dyn_cast<ICmpInst>(&Inst))
+ if (IsInterestingCmp(CMP, DT, Options))
+ CmpTraceTargets.push_back(&Inst);
if (isa<SwitchInst>(&Inst))
SwitchTraceTargets.push_back(&Inst);
}
@@ -797,9 +832,9 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
}
if (Options.Inline8bitCounters) {
auto CounterPtr = IRB.CreateGEP(
- Function8bitCounterArray,
+ Function8bitCounterArray->getValueType(), Function8bitCounterArray,
{ConstantInt::get(IntptrTy, 0), ConstantInt::get(IntptrTy, Idx)});
- auto Load = IRB.CreateLoad(CounterPtr);
+ auto Load = IRB.CreateLoad(Int8Ty, CounterPtr);
auto Inc = IRB.CreateAdd(Load, ConstantInt::get(Int8Ty, 1));
auto Store = IRB.CreateStore(Inc, CounterPtr);
SetNoSanitizeMetadata(Load);
@@ -812,7 +847,7 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
auto FrameAddrPtr =
IRB.CreateCall(GetFrameAddr, {Constant::getNullValue(Int32Ty)});
auto FrameAddrInt = IRB.CreatePtrToInt(FrameAddrPtr, IntptrTy);
- auto LowestStack = IRB.CreateLoad(SanCovLowestStack);
+ auto LowestStack = IRB.CreateLoad(IntptrTy, SanCovLowestStack);
auto IsStackLower = IRB.CreateICmpULT(FrameAddrInt, LowestStack);
auto ThenTerm = SplitBlockAndInsertIfThen(IsStackLower, &*IP, false);
IRBuilder<> ThenIRB(ThenTerm);
diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index 077364e15c4f..5be13fa745cb 100644
--- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -1,9 +1,8 @@
//===-- ThreadSanitizer.cpp - race detector -------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -111,25 +110,26 @@ private:
Type *IntptrTy;
IntegerType *OrdTy;
// Callbacks to run-time library are computed in doInitialization.
- Function *TsanFuncEntry;
- Function *TsanFuncExit;
- Function *TsanIgnoreBegin;
- Function *TsanIgnoreEnd;
+ FunctionCallee TsanFuncEntry;
+ FunctionCallee TsanFuncExit;
+ FunctionCallee TsanIgnoreBegin;
+ FunctionCallee TsanIgnoreEnd;
// Accesses sizes are powers of two: 1, 2, 4, 8, 16.
static const size_t kNumberOfAccessSizes = 5;
- Function *TsanRead[kNumberOfAccessSizes];
- Function *TsanWrite[kNumberOfAccessSizes];
- Function *TsanUnalignedRead[kNumberOfAccessSizes];
- Function *TsanUnalignedWrite[kNumberOfAccessSizes];
- Function *TsanAtomicLoad[kNumberOfAccessSizes];
- Function *TsanAtomicStore[kNumberOfAccessSizes];
- Function *TsanAtomicRMW[AtomicRMWInst::LAST_BINOP + 1][kNumberOfAccessSizes];
- Function *TsanAtomicCAS[kNumberOfAccessSizes];
- Function *TsanAtomicThreadFence;
- Function *TsanAtomicSignalFence;
- Function *TsanVptrUpdate;
- Function *TsanVptrLoad;
- Function *MemmoveFn, *MemcpyFn, *MemsetFn;
+ FunctionCallee TsanRead[kNumberOfAccessSizes];
+ FunctionCallee TsanWrite[kNumberOfAccessSizes];
+ FunctionCallee TsanUnalignedRead[kNumberOfAccessSizes];
+ FunctionCallee TsanUnalignedWrite[kNumberOfAccessSizes];
+ FunctionCallee TsanAtomicLoad[kNumberOfAccessSizes];
+ FunctionCallee TsanAtomicStore[kNumberOfAccessSizes];
+ FunctionCallee TsanAtomicRMW[AtomicRMWInst::LAST_BINOP + 1]
+ [kNumberOfAccessSizes];
+ FunctionCallee TsanAtomicCAS[kNumberOfAccessSizes];
+ FunctionCallee TsanAtomicThreadFence;
+ FunctionCallee TsanAtomicSignalFence;
+ FunctionCallee TsanVptrUpdate;
+ FunctionCallee TsanVptrLoad;
+ FunctionCallee MemmoveFn, MemcpyFn, MemsetFn;
Function *TsanCtorFunction;
};
@@ -189,14 +189,14 @@ void ThreadSanitizer::initializeCallbacks(Module &M) {
Attr = Attr.addAttribute(M.getContext(), AttributeList::FunctionIndex,
Attribute::NoUnwind);
// Initialize the callbacks.
- TsanFuncEntry = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- "__tsan_func_entry", Attr, IRB.getVoidTy(), IRB.getInt8PtrTy()));
- TsanFuncExit = checkSanitizerInterfaceFunction(
- M.getOrInsertFunction("__tsan_func_exit", Attr, IRB.getVoidTy()));
- TsanIgnoreBegin = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- "__tsan_ignore_thread_begin", Attr, IRB.getVoidTy()));
- TsanIgnoreEnd = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- "__tsan_ignore_thread_end", Attr, IRB.getVoidTy()));
+ TsanFuncEntry = M.getOrInsertFunction("__tsan_func_entry", Attr,
+ IRB.getVoidTy(), IRB.getInt8PtrTy());
+ TsanFuncExit =
+ M.getOrInsertFunction("__tsan_func_exit", Attr, IRB.getVoidTy());
+ TsanIgnoreBegin = M.getOrInsertFunction("__tsan_ignore_thread_begin", Attr,
+ IRB.getVoidTy());
+ TsanIgnoreEnd =
+ M.getOrInsertFunction("__tsan_ignore_thread_end", Attr, IRB.getVoidTy());
OrdTy = IRB.getInt32Ty();
for (size_t i = 0; i < kNumberOfAccessSizes; ++i) {
const unsigned ByteSize = 1U << i;
@@ -204,32 +204,30 @@ void ThreadSanitizer::initializeCallbacks(Module &M) {
std::string ByteSizeStr = utostr(ByteSize);
std::string BitSizeStr = utostr(BitSize);
SmallString<32> ReadName("__tsan_read" + ByteSizeStr);
- TsanRead[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- ReadName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy()));
+ TsanRead[i] = M.getOrInsertFunction(ReadName, Attr, IRB.getVoidTy(),
+ IRB.getInt8PtrTy());
SmallString<32> WriteName("__tsan_write" + ByteSizeStr);
- TsanWrite[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- WriteName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy()));
+ TsanWrite[i] = M.getOrInsertFunction(WriteName, Attr, IRB.getVoidTy(),
+ IRB.getInt8PtrTy());
SmallString<64> UnalignedReadName("__tsan_unaligned_read" + ByteSizeStr);
- TsanUnalignedRead[i] =
- checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- UnalignedReadName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy()));
+ TsanUnalignedRead[i] = M.getOrInsertFunction(
+ UnalignedReadName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy());
SmallString<64> UnalignedWriteName("__tsan_unaligned_write" + ByteSizeStr);
- TsanUnalignedWrite[i] =
- checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- UnalignedWriteName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy()));
+ TsanUnalignedWrite[i] = M.getOrInsertFunction(
+ UnalignedWriteName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy());
Type *Ty = Type::getIntNTy(M.getContext(), BitSize);
Type *PtrTy = Ty->getPointerTo();
SmallString<32> AtomicLoadName("__tsan_atomic" + BitSizeStr + "_load");
- TsanAtomicLoad[i] = checkSanitizerInterfaceFunction(
- M.getOrInsertFunction(AtomicLoadName, Attr, Ty, PtrTy, OrdTy));
+ TsanAtomicLoad[i] =
+ M.getOrInsertFunction(AtomicLoadName, Attr, Ty, PtrTy, OrdTy);
SmallString<32> AtomicStoreName("__tsan_atomic" + BitSizeStr + "_store");
- TsanAtomicStore[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- AtomicStoreName, Attr, IRB.getVoidTy(), PtrTy, Ty, OrdTy));
+ TsanAtomicStore[i] = M.getOrInsertFunction(
+ AtomicStoreName, Attr, IRB.getVoidTy(), PtrTy, Ty, OrdTy);
for (int op = AtomicRMWInst::FIRST_BINOP;
op <= AtomicRMWInst::LAST_BINOP; ++op) {
@@ -252,34 +250,34 @@ void ThreadSanitizer::initializeCallbacks(Module &M) {
else
continue;
SmallString<32> RMWName("__tsan_atomic" + itostr(BitSize) + NamePart);
- TsanAtomicRMW[op][i] = checkSanitizerInterfaceFunction(
- M.getOrInsertFunction(RMWName, Attr, Ty, PtrTy, Ty, OrdTy));
+ TsanAtomicRMW[op][i] =
+ M.getOrInsertFunction(RMWName, Attr, Ty, PtrTy, Ty, OrdTy);
}
SmallString<32> AtomicCASName("__tsan_atomic" + BitSizeStr +
"_compare_exchange_val");
- TsanAtomicCAS[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- AtomicCASName, Attr, Ty, PtrTy, Ty, Ty, OrdTy, OrdTy));
+ TsanAtomicCAS[i] = M.getOrInsertFunction(AtomicCASName, Attr, Ty, PtrTy, Ty,
+ Ty, OrdTy, OrdTy);
}
- TsanVptrUpdate = checkSanitizerInterfaceFunction(
+ TsanVptrUpdate =
M.getOrInsertFunction("__tsan_vptr_update", Attr, IRB.getVoidTy(),
- IRB.getInt8PtrTy(), IRB.getInt8PtrTy()));
- TsanVptrLoad = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- "__tsan_vptr_read", Attr, IRB.getVoidTy(), IRB.getInt8PtrTy()));
- TsanAtomicThreadFence = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- "__tsan_atomic_thread_fence", Attr, IRB.getVoidTy(), OrdTy));
- TsanAtomicSignalFence = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- "__tsan_atomic_signal_fence", Attr, IRB.getVoidTy(), OrdTy));
-
- MemmoveFn = checkSanitizerInterfaceFunction(
- M.getOrInsertFunction("memmove", Attr, IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
- IRB.getInt8PtrTy(), IntptrTy));
- MemcpyFn = checkSanitizerInterfaceFunction(
- M.getOrInsertFunction("memcpy", Attr, IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
- IRB.getInt8PtrTy(), IntptrTy));
- MemsetFn = checkSanitizerInterfaceFunction(
- M.getOrInsertFunction("memset", Attr, IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
- IRB.getInt32Ty(), IntptrTy));
+ IRB.getInt8PtrTy(), IRB.getInt8PtrTy());
+ TsanVptrLoad = M.getOrInsertFunction("__tsan_vptr_read", Attr,
+ IRB.getVoidTy(), IRB.getInt8PtrTy());
+ TsanAtomicThreadFence = M.getOrInsertFunction("__tsan_atomic_thread_fence",
+ Attr, IRB.getVoidTy(), OrdTy);
+ TsanAtomicSignalFence = M.getOrInsertFunction("__tsan_atomic_signal_fence",
+ Attr, IRB.getVoidTy(), OrdTy);
+
+ MemmoveFn =
+ M.getOrInsertFunction("memmove", Attr, IRB.getInt8PtrTy(),
+ IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy);
+ MemcpyFn =
+ M.getOrInsertFunction("memcpy", Attr, IRB.getInt8PtrTy(),
+ IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy);
+ MemsetFn =
+ M.getOrInsertFunction("memset", Attr, IRB.getInt8PtrTy(),
+ IRB.getInt8PtrTy(), IRB.getInt32Ty(), IntptrTy);
}
ThreadSanitizer::ThreadSanitizer(Module &M) {
@@ -291,7 +289,9 @@ ThreadSanitizer::ThreadSanitizer(Module &M) {
/*InitArgs=*/{},
// This callback is invoked when the functions are created the first
// time. Hook them into the global ctors list in that case:
- [&](Function *Ctor, Function *) { appendToGlobalCtors(M, Ctor, 0); });
+ [&](Function *Ctor, FunctionCallee) {
+ appendToGlobalCtors(M, Ctor, 0);
+ });
}
static bool isVtableAccess(Instruction *I) {
@@ -559,7 +559,7 @@ bool ThreadSanitizer::instrumentLoadOrStore(Instruction *I,
: cast<LoadInst>(I)->getAlignment();
Type *OrigTy = cast<PointerType>(Addr->getType())->getElementType();
const uint32_t TypeSize = DL.getTypeStoreSizeInBits(OrigTy);
- Value *OnAccessFunc = nullptr;
+ FunctionCallee OnAccessFunc = nullptr;
if (Alignment == 0 || Alignment >= 8 || (Alignment % (TypeSize / 8)) == 0)
OnAccessFunc = IsWrite ? TsanWrite[Idx] : TsanRead[Idx];
else
@@ -659,7 +659,7 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I, const DataLayout &DL) {
int Idx = getMemoryAccessFuncIndex(Addr, DL);
if (Idx < 0)
return false;
- Function *F = TsanAtomicRMW[RMWI->getOperation()][Idx];
+ FunctionCallee F = TsanAtomicRMW[RMWI->getOperation()][Idx];
if (!F)
return false;
const unsigned ByteSize = 1U << Idx;
@@ -706,8 +706,9 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I, const DataLayout &DL) {
I->eraseFromParent();
} else if (FenceInst *FI = dyn_cast<FenceInst>(I)) {
Value *Args[] = {createOrdering(&IRB, FI->getOrdering())};
- Function *F = FI->getSyncScopeID() == SyncScope::SingleThread ?
- TsanAtomicSignalFence : TsanAtomicThreadFence;
+ FunctionCallee F = FI->getSyncScopeID() == SyncScope::SingleThread
+ ? TsanAtomicSignalFence
+ : TsanAtomicThreadFence;
CallInst *C = CallInst::Create(F, Args);
ReplaceInstWithInst(I, C);
}
diff --git a/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h b/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h
index 7f6b157304a3..e1e95cd6a407 100644
--- a/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h
+++ b/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h
@@ -1,9 +1,8 @@
//===- ARCRuntimeEntryPoints.h - ObjC ARC Optimization ----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -34,7 +33,7 @@
namespace llvm {
-class Constant;
+class Function;
class LLVMContext;
namespace objcarc {
@@ -70,7 +69,7 @@ public:
RetainAutoreleaseRV = nullptr;
}
- Constant *get(ARCRuntimeEntryPointKind kind) {
+ Function *get(ARCRuntimeEntryPointKind kind) {
assert(TheModule != nullptr && "Not initialized.");
switch (kind) {
@@ -106,33 +105,33 @@ private:
Module *TheModule = nullptr;
/// Declaration for ObjC runtime function objc_autoreleaseReturnValue.
- Constant *AutoreleaseRV = nullptr;
+ Function *AutoreleaseRV = nullptr;
/// Declaration for ObjC runtime function objc_release.
- Constant *Release = nullptr;
+ Function *Release = nullptr;
/// Declaration for ObjC runtime function objc_retain.
- Constant *Retain = nullptr;
+ Function *Retain = nullptr;
/// Declaration for ObjC runtime function objc_retainBlock.
- Constant *RetainBlock = nullptr;
+ Function *RetainBlock = nullptr;
/// Declaration for ObjC runtime function objc_autorelease.
- Constant *Autorelease = nullptr;
+ Function *Autorelease = nullptr;
/// Declaration for objc_storeStrong().
- Constant *StoreStrong = nullptr;
+ Function *StoreStrong = nullptr;
/// Declaration for objc_retainAutoreleasedReturnValue().
- Constant *RetainRV = nullptr;
+ Function *RetainRV = nullptr;
/// Declaration for objc_retainAutorelease().
- Constant *RetainAutorelease = nullptr;
+ Function *RetainAutorelease = nullptr;
/// Declaration for objc_retainAutoreleaseReturnValue().
- Constant *RetainAutoreleaseRV = nullptr;
+ Function *RetainAutoreleaseRV = nullptr;
- Constant *getIntrinsicEntryPoint(Constant *&Decl, Intrinsic::ID IntID) {
+ Function *getIntrinsicEntryPoint(Function *&Decl, Intrinsic::ID IntID) {
if (Decl)
return Decl;
diff --git a/lib/Transforms/ObjCARC/BlotMapVector.h b/lib/Transforms/ObjCARC/BlotMapVector.h
index 9ade14c1177a..2fa07cfb32c0 100644
--- a/lib/Transforms/ObjCARC/BlotMapVector.h
+++ b/lib/Transforms/ObjCARC/BlotMapVector.h
@@ -1,9 +1,8 @@
//===- BlotMapVector.h - A MapVector with the blot operation ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Transforms/ObjCARC/DependencyAnalysis.cpp b/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
index 4bd5fd1acd4c..e8f8fb6f3a7c 100644
--- a/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
+++ b/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
@@ -1,9 +1,8 @@
//===- DependencyAnalysis.cpp - ObjC ARC Optimization ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/lib/Transforms/ObjCARC/DependencyAnalysis.h b/lib/Transforms/ObjCARC/DependencyAnalysis.h
index 0f13b02c806f..ed89c8c8fc89 100644
--- a/lib/Transforms/ObjCARC/DependencyAnalysis.h
+++ b/lib/Transforms/ObjCARC/DependencyAnalysis.h
@@ -1,9 +1,8 @@
//===- DependencyAnalysis.h - ObjC ARC Optimization ---*- C++ -*-----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/lib/Transforms/ObjCARC/ObjCARC.cpp b/lib/Transforms/ObjCARC/ObjCARC.cpp
index c30aaebd0f4d..f4da51650a7d 100644
--- a/lib/Transforms/ObjCARC/ObjCARC.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARC.cpp
@@ -1,9 +1,8 @@
//===-- ObjCARC.cpp -------------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/ObjCARC/ObjCARC.h b/lib/Transforms/ObjCARC/ObjCARC.h
index 751c8f30e814..d465630800b9 100644
--- a/lib/Transforms/ObjCARC/ObjCARC.h
+++ b/lib/Transforms/ObjCARC/ObjCARC.h
@@ -1,9 +1,8 @@
//===- ObjCARC.h - ObjC ARC Optimization --------------*- C++ -*-----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp b/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp
index 8d3ef8fde534..b341dd807508 100644
--- a/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp
@@ -1,9 +1,8 @@
//===- ObjCARCAPElim.cpp - ObjC ARC Optimization --------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/lib/Transforms/ObjCARC/ObjCARCContract.cpp b/lib/Transforms/ObjCARC/ObjCARCContract.cpp
index abe2871c0b8f..36aa513ec554 100644
--- a/lib/Transforms/ObjCARC/ObjCARCContract.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARCContract.cpp
@@ -1,9 +1,8 @@
//===- ObjCARCContract.cpp - ObjC ARC Optimization ------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -46,6 +45,10 @@ using namespace llvm::objcarc;
STATISTIC(NumPeeps, "Number of calls peephole-optimized");
STATISTIC(NumStoreStrongs, "Number objc_storeStrong calls formed");
+static cl::opt<unsigned> MaxBBSize("arc-contract-max-bb-size", cl::Hidden,
+ cl::desc("Maximum basic block size to discover the dominance relation of "
+ "two instructions in the same basic block"), cl::init(65535));
+
//===----------------------------------------------------------------------===//
// Declarations
//===----------------------------------------------------------------------===//
@@ -140,7 +143,7 @@ bool ObjCARCContract::optimizeRetainCall(Function &F, Instruction *Retain) {
// We do not have to worry about tail calls/does not throw since
// retain/retainRV have the same properties.
- Constant *Decl = EP.get(ARCRuntimeEntryPointKind::RetainRV);
+ Function *Decl = EP.get(ARCRuntimeEntryPointKind::RetainRV);
cast<CallInst>(Retain)->setCalledFunction(Decl);
LLVM_DEBUG(dbgs() << "New: " << *Retain << "\n");
@@ -189,7 +192,7 @@ bool ObjCARCContract::contractAutorelease(
" Retain: "
<< *Retain << "\n");
- Constant *Decl = EP.get(Class == ARCInstKind::AutoreleaseRV
+ Function *Decl = EP.get(Class == ARCInstKind::AutoreleaseRV
? ARCRuntimeEntryPointKind::RetainAutoreleaseRV
: ARCRuntimeEntryPointKind::RetainAutorelease);
Retain->setCalledFunction(Decl);
@@ -314,8 +317,8 @@ findRetainForStoreStrongContraction(Value *New, StoreInst *Store,
/// Create a call instruction with the correct funclet token. Should be used
/// instead of calling CallInst::Create directly.
static CallInst *
-createCallInst(Value *Func, ArrayRef<Value *> Args, const Twine &NameStr,
- Instruction *InsertBefore,
+createCallInst(FunctionType *FTy, Value *Func, ArrayRef<Value *> Args,
+ const Twine &NameStr, Instruction *InsertBefore,
const DenseMap<BasicBlock *, ColorVector> &BlockColors) {
SmallVector<OperandBundleDef, 1> OpBundles;
if (!BlockColors.empty()) {
@@ -326,7 +329,15 @@ createCallInst(Value *Func, ArrayRef<Value *> Args, const Twine &NameStr,
OpBundles.emplace_back("funclet", EHPad);
}
- return CallInst::Create(Func, Args, OpBundles, NameStr, InsertBefore);
+ return CallInst::Create(FTy, Func, Args, OpBundles, NameStr, InsertBefore);
+}
+
+static CallInst *
+createCallInst(FunctionCallee Func, ArrayRef<Value *> Args, const Twine &NameStr,
+ Instruction *InsertBefore,
+ const DenseMap<BasicBlock *, ColorVector> &BlockColors) {
+ return createCallInst(Func.getFunctionType(), Func.getCallee(), Args, NameStr,
+ InsertBefore, BlockColors);
}
/// Attempt to merge an objc_release with a store, load, and objc_retain to form
@@ -409,7 +420,7 @@ void ObjCARCContract::tryToContractReleaseIntoStoreStrong(
Args[0] = new BitCastInst(Args[0], I8XX, "", Store);
if (Args[1]->getType() != I8X)
Args[1] = new BitCastInst(Args[1], I8X, "", Store);
- Constant *Decl = EP.get(ARCRuntimeEntryPointKind::StoreStrong);
+ Function *Decl = EP.get(ARCRuntimeEntryPointKind::StoreStrong);
CallInst *StoreStrong = createCallInst(Decl, Args, "", Store, BlockColors);
StoreStrong->setDoesNotThrow();
StoreStrong->setDebugLoc(Store->getDebugLoc());
@@ -432,102 +443,100 @@ void ObjCARCContract::tryToContractReleaseIntoStoreStrong(
}
bool ObjCARCContract::tryToPeepholeInstruction(
- Function &F, Instruction *Inst, inst_iterator &Iter,
- SmallPtrSetImpl<Instruction *> &DependingInsts,
- SmallPtrSetImpl<const BasicBlock *> &Visited,
- bool &TailOkForStoreStrongs,
- const DenseMap<BasicBlock *, ColorVector> &BlockColors) {
- // Only these library routines return their argument. In particular,
- // objc_retainBlock does not necessarily return its argument.
+ Function &F, Instruction *Inst, inst_iterator &Iter,
+ SmallPtrSetImpl<Instruction *> &DependingInsts,
+ SmallPtrSetImpl<const BasicBlock *> &Visited, bool &TailOkForStoreStrongs,
+ const DenseMap<BasicBlock *, ColorVector> &BlockColors) {
+ // Only these library routines return their argument. In particular,
+ // objc_retainBlock does not necessarily return its argument.
ARCInstKind Class = GetBasicARCInstKind(Inst);
- switch (Class) {
- case ARCInstKind::FusedRetainAutorelease:
- case ARCInstKind::FusedRetainAutoreleaseRV:
+ switch (Class) {
+ case ARCInstKind::FusedRetainAutorelease:
+ case ARCInstKind::FusedRetainAutoreleaseRV:
+ return false;
+ case ARCInstKind::Autorelease:
+ case ARCInstKind::AutoreleaseRV:
+ return contractAutorelease(F, Inst, Class, DependingInsts, Visited);
+ case ARCInstKind::Retain:
+ // Attempt to convert retains to retainrvs if they are next to function
+ // calls.
+ if (!optimizeRetainCall(F, Inst))
return false;
- case ARCInstKind::Autorelease:
- case ARCInstKind::AutoreleaseRV:
- return contractAutorelease(F, Inst, Class, DependingInsts, Visited);
- case ARCInstKind::Retain:
- // Attempt to convert retains to retainrvs if they are next to function
- // calls.
- if (!optimizeRetainCall(F, Inst))
- return false;
- // If we succeed in our optimization, fall through.
- LLVM_FALLTHROUGH;
- case ARCInstKind::RetainRV:
- case ARCInstKind::ClaimRV: {
- // If we're compiling for a target which needs a special inline-asm
- // marker to do the return value optimization, insert it now.
- if (!RVInstMarker)
- return false;
- BasicBlock::iterator BBI = Inst->getIterator();
- BasicBlock *InstParent = Inst->getParent();
-
- // Step up to see if the call immediately precedes the RV call.
- // If it's an invoke, we have to cross a block boundary. And we have
- // to carefully dodge no-op instructions.
- do {
- if (BBI == InstParent->begin()) {
- BasicBlock *Pred = InstParent->getSinglePredecessor();
- if (!Pred)
- goto decline_rv_optimization;
- BBI = Pred->getTerminator()->getIterator();
- break;
- }
- --BBI;
- } while (IsNoopInstruction(&*BBI));
-
- if (&*BBI == GetArgRCIdentityRoot(Inst)) {
- LLVM_DEBUG(dbgs() << "Adding inline asm marker for the return value "
- "optimization.\n");
- Changed = true;
- InlineAsm *IA = InlineAsm::get(
- FunctionType::get(Type::getVoidTy(Inst->getContext()),
- /*isVarArg=*/false),
- RVInstMarker->getString(),
- /*Constraints=*/"", /*hasSideEffects=*/true);
-
- createCallInst(IA, None, "", Inst, BlockColors);
- }
- decline_rv_optimization:
+ // If we succeed in our optimization, fall through.
+ LLVM_FALLTHROUGH;
+ case ARCInstKind::RetainRV:
+ case ARCInstKind::ClaimRV: {
+ // If we're compiling for a target which needs a special inline-asm
+ // marker to do the return value optimization, insert it now.
+ if (!RVInstMarker)
return false;
- }
- case ARCInstKind::InitWeak: {
- // objc_initWeak(p, null) => *p = null
- CallInst *CI = cast<CallInst>(Inst);
- if (IsNullOrUndef(CI->getArgOperand(1))) {
- Value *Null =
- ConstantPointerNull::get(cast<PointerType>(CI->getType()));
- Changed = true;
- new StoreInst(Null, CI->getArgOperand(0), CI);
-
- LLVM_DEBUG(dbgs() << "OBJCARCContract: Old = " << *CI << "\n"
- << " New = " << *Null << "\n");
-
- CI->replaceAllUsesWith(Null);
- CI->eraseFromParent();
+ BasicBlock::iterator BBI = Inst->getIterator();
+ BasicBlock *InstParent = Inst->getParent();
+
+ // Step up to see if the call immediately precedes the RV call.
+ // If it's an invoke, we have to cross a block boundary. And we have
+ // to carefully dodge no-op instructions.
+ do {
+ if (BBI == InstParent->begin()) {
+ BasicBlock *Pred = InstParent->getSinglePredecessor();
+ if (!Pred)
+ goto decline_rv_optimization;
+ BBI = Pred->getTerminator()->getIterator();
+ break;
}
- return true;
+ --BBI;
+ } while (IsNoopInstruction(&*BBI));
+
+ if (&*BBI == GetArgRCIdentityRoot(Inst)) {
+ LLVM_DEBUG(dbgs() << "Adding inline asm marker for the return value "
+ "optimization.\n");
+ Changed = true;
+ InlineAsm *IA =
+ InlineAsm::get(FunctionType::get(Type::getVoidTy(Inst->getContext()),
+ /*isVarArg=*/false),
+ RVInstMarker->getString(),
+ /*Constraints=*/"", /*hasSideEffects=*/true);
+
+ createCallInst(IA, None, "", Inst, BlockColors);
}
- case ARCInstKind::Release:
- // Try to form an objc store strong from our release. If we fail, there is
- // nothing further to do below, so continue.
- tryToContractReleaseIntoStoreStrong(Inst, Iter, BlockColors);
- return true;
- case ARCInstKind::User:
- // Be conservative if the function has any alloca instructions.
- // Technically we only care about escaping alloca instructions,
- // but this is sufficient to handle some interesting cases.
- if (isa<AllocaInst>(Inst))
- TailOkForStoreStrongs = false;
- return true;
- case ARCInstKind::IntrinsicUser:
- // Remove calls to @llvm.objc.clang.arc.use(...).
- Inst->eraseFromParent();
- return true;
- default:
- return true;
+ decline_rv_optimization:
+ return false;
+ }
+ case ARCInstKind::InitWeak: {
+ // objc_initWeak(p, null) => *p = null
+ CallInst *CI = cast<CallInst>(Inst);
+ if (IsNullOrUndef(CI->getArgOperand(1))) {
+ Value *Null = ConstantPointerNull::get(cast<PointerType>(CI->getType()));
+ Changed = true;
+ new StoreInst(Null, CI->getArgOperand(0), CI);
+
+ LLVM_DEBUG(dbgs() << "OBJCARCContract: Old = " << *CI << "\n"
+ << " New = " << *Null << "\n");
+
+ CI->replaceAllUsesWith(Null);
+ CI->eraseFromParent();
}
+ return true;
+ }
+ case ARCInstKind::Release:
+ // Try to form an objc store strong from our release. If we fail, there is
+ // nothing further to do below, so continue.
+ tryToContractReleaseIntoStoreStrong(Inst, Iter, BlockColors);
+ return true;
+ case ARCInstKind::User:
+ // Be conservative if the function has any alloca instructions.
+ // Technically we only care about escaping alloca instructions,
+ // but this is sufficient to handle some interesting cases.
+ if (isa<AllocaInst>(Inst))
+ TailOkForStoreStrongs = false;
+ return true;
+ case ARCInstKind::IntrinsicUser:
+ // Remove calls to @llvm.objc.clang.arc.use(...).
+ Inst->eraseFromParent();
+ return true;
+ default:
+ return true;
+ }
}
//===----------------------------------------------------------------------===//
@@ -568,6 +577,24 @@ bool ObjCARCContract::runOnFunction(Function &F) {
// reduces register pressure.
SmallPtrSet<Instruction *, 4> DependingInstructions;
SmallPtrSet<const BasicBlock *, 4> Visited;
+
+ // Cache the basic block size.
+ DenseMap<const BasicBlock *, unsigned> BBSizeMap;
+
+ // A lambda that lazily computes the size of a basic block and determines
+ // whether the size exceeds MaxBBSize.
+ auto IsLargeBB = [&](const BasicBlock *BB) {
+ unsigned BBSize;
+ auto I = BBSizeMap.find(BB);
+
+ if (I != BBSizeMap.end())
+ BBSize = I->second;
+ else
+ BBSize = BBSizeMap[BB] = BB->size();
+
+ return BBSize > MaxBBSize;
+ };
+
for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E;) {
Instruction *Inst = &*I++;
@@ -585,7 +612,7 @@ bool ObjCARCContract::runOnFunction(Function &F) {
// and such; to do the replacement, the argument must have type i8*.
// Function for replacing uses of Arg dominated by Inst.
- auto ReplaceArgUses = [Inst, this](Value *Arg) {
+ auto ReplaceArgUses = [Inst, IsLargeBB, this](Value *Arg) {
// If we're compiling bugpointed code, don't get in trouble.
if (!isa<Instruction>(Arg) && !isa<Argument>(Arg))
return;
@@ -597,6 +624,17 @@ bool ObjCARCContract::runOnFunction(Function &F) {
Use &U = *UI++;
unsigned OperandNo = U.getOperandNo();
+ // Don't replace the uses if Inst and the user belong to the same basic
+ // block and the size of the basic block is large. We don't want to call
+ // DominatorTree::dominate in that case. We can remove this check if we
+ // can use OrderedBasicBlock to compute the dominance relation between
+ // two instructions, but that's not currently possible since it doesn't
+ // recompute the instruction ordering when new instructions are inserted
+ // to the basic block.
+ if (Inst->getParent() == cast<Instruction>(U.getUser())->getParent() &&
+ IsLargeBB(Inst->getParent()))
+ continue;
+
// If the call's return value dominates a use of the call's argument
// value, rewrite the use to use the return value. We check for
// reachability here because an unreachable call is considered to
@@ -737,15 +775,8 @@ bool ObjCARCContract::doInitialization(Module &M) {
EP.init(&M);
// Initialize RVInstMarker.
- RVInstMarker = nullptr;
- if (NamedMDNode *NMD =
- M.getNamedMetadata("clang.arc.retainAutoreleasedReturnValueMarker"))
- if (NMD->getNumOperands() == 1) {
- const MDNode *N = NMD->getOperand(0);
- if (N->getNumOperands() == 1)
- if (const MDString *S = dyn_cast<MDString>(N->getOperand(0)))
- RVInstMarker = S;
- }
+ const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
+ RVInstMarker = dyn_cast_or_null<MDString>(M.getModuleFlag(MarkerKey));
return false;
}
diff --git a/lib/Transforms/ObjCARC/ObjCARCExpand.cpp b/lib/Transforms/ObjCARC/ObjCARCExpand.cpp
index 6a345ef56e1b..04e98d8f5577 100644
--- a/lib/Transforms/ObjCARC/ObjCARCExpand.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARCExpand.cpp
@@ -1,9 +1,8 @@
//===- ObjCARCExpand.cpp - ObjC ARC Optimization --------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
index 9a02174556fc..6653ff0bb91a 100644
--- a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
@@ -1,9 +1,8 @@
//===- ObjCARCOpts.cpp - ObjC ARC Optimization ----------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -74,6 +73,11 @@ using namespace llvm::objcarc;
#define DEBUG_TYPE "objc-arc-opts"
+static cl::opt<unsigned> MaxPtrStates("arc-opt-max-ptr-states",
+ cl::Hidden,
+ cl::desc("Maximum number of ptr states the optimizer keeps track of"),
+ cl::init(4095));
+
/// \defgroup ARCUtilities Utility declarations/definitions specific to ARC.
/// @{
@@ -220,6 +224,10 @@ namespace {
return !PerPtrTopDown.empty();
}
+ unsigned top_down_ptr_list_size() const {
+ return std::distance(top_down_ptr_begin(), top_down_ptr_end());
+ }
+
using bottom_up_ptr_iterator = decltype(PerPtrBottomUp)::iterator;
using const_bottom_up_ptr_iterator =
decltype(PerPtrBottomUp)::const_iterator;
@@ -238,6 +246,10 @@ namespace {
return !PerPtrBottomUp.empty();
}
+ unsigned bottom_up_ptr_list_size() const {
+ return std::distance(bottom_up_ptr_begin(), bottom_up_ptr_end());
+ }
+
/// Mark this block as being an entry block, which has one path from the
/// entry by definition.
void SetAsEntry() { TopDownPathCount = 1; }
@@ -481,6 +493,10 @@ namespace {
/// A flag indicating whether this optimization pass should run.
bool Run;
+ /// A flag indicating whether the optimization that removes or moves
+ /// retain/release pairs should be performed.
+ bool DisableRetainReleasePairing = false;
+
/// Flags which determine whether each of the interesting runtime functions
/// is in fact used in the current function.
unsigned UsedInThisFunction;
@@ -642,7 +658,7 @@ ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) {
"Old = "
<< *RetainRV << "\n");
- Constant *NewDecl = EP.get(ARCRuntimeEntryPointKind::Retain);
+ Function *NewDecl = EP.get(ARCRuntimeEntryPointKind::Retain);
cast<CallInst>(RetainRV)->setCalledFunction(NewDecl);
LLVM_DEBUG(dbgs() << "New = " << *RetainRV << "\n");
@@ -691,7 +707,7 @@ void ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F,
<< *AutoreleaseRV << "\n");
CallInst *AutoreleaseRVCI = cast<CallInst>(AutoreleaseRV);
- Constant *NewDecl = EP.get(ARCRuntimeEntryPointKind::Autorelease);
+ Function *NewDecl = EP.get(ARCRuntimeEntryPointKind::Autorelease);
AutoreleaseRVCI->setCalledFunction(NewDecl);
AutoreleaseRVCI->setTailCall(false); // Never tail call objc_autorelease.
Class = ARCInstKind::Autorelease;
@@ -744,6 +760,19 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
LLVM_DEBUG(dbgs() << "Visiting: Class: " << Class << "; " << *Inst << "\n");
+ // Some of the ARC calls can be deleted if their arguments are global
+ // variables that are inert in ARC.
+ if (IsNoopOnGlobal(Class)) {
+ Value *Opnd = Inst->getOperand(0);
+ if (auto *GV = dyn_cast<GlobalVariable>(Opnd->stripPointerCasts()))
+ if (GV->hasAttribute("objc_arc_inert")) {
+ if (!Inst->getType()->isVoidTy())
+ Inst->replaceAllUsesWith(Opnd);
+ Inst->eraseFromParent();
+ continue;
+ }
+ }
+
switch (Class) {
default: break;
@@ -830,7 +859,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
// Create the declaration lazily.
LLVMContext &C = Inst->getContext();
- Constant *Decl = EP.get(ARCRuntimeEntryPointKind::Release);
+ Function *Decl = EP.get(ARCRuntimeEntryPointKind::Release);
CallInst *NewCall = CallInst::Create(Decl, Call->getArgOperand(0), "",
Call);
NewCall->setMetadata(MDKindCache.get(ARCMDKindID::ImpreciseRelease),
@@ -849,7 +878,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
// For functions which can never be passed stack arguments, add
// a tail keyword.
- if (IsAlwaysTail(Class)) {
+ if (IsAlwaysTail(Class) && !cast<CallInst>(Inst)->isNoTailCall()) {
Changed = true;
LLVM_DEBUG(
dbgs() << "Adding tail keyword to function since it can never be "
@@ -1273,6 +1302,13 @@ bool ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
LLVM_DEBUG(dbgs() << " Visiting " << *Inst << "\n");
NestingDetected |= VisitInstructionBottomUp(Inst, BB, Retains, MyStates);
+
+ // Bail out if the number of pointers being tracked becomes too large so
+ // that this pass can complete in a reasonable amount of time.
+ if (MyStates.bottom_up_ptr_list_size() > MaxPtrStates) {
+ DisableRetainReleasePairing = true;
+ return false;
+ }
}
// If there's a predecessor with an invoke, visit the invoke as if it were
@@ -1395,6 +1431,13 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB,
LLVM_DEBUG(dbgs() << " Visiting " << Inst << "\n");
NestingDetected |= VisitInstructionTopDown(&Inst, Releases, MyStates);
+
+ // Bail out if the number of pointers being tracked becomes too large so
+ // that this pass can complete in a reasonable amount of time.
+ if (MyStates.top_down_ptr_list_size() > MaxPtrStates) {
+ DisableRetainReleasePairing = true;
+ return false;
+ }
}
LLVM_DEBUG(dbgs() << "\nState Before Checking for CFG Hazards:\n"
@@ -1501,13 +1544,19 @@ bool ObjCARCOpt::Visit(Function &F,
// Use reverse-postorder on the reverse CFG for bottom-up.
bool BottomUpNestingDetected = false;
- for (BasicBlock *BB : llvm::reverse(ReverseCFGPostOrder))
+ for (BasicBlock *BB : llvm::reverse(ReverseCFGPostOrder)) {
BottomUpNestingDetected |= VisitBottomUp(BB, BBStates, Retains);
+ if (DisableRetainReleasePairing)
+ return false;
+ }
// Use reverse-postorder for top-down.
bool TopDownNestingDetected = false;
- for (BasicBlock *BB : llvm::reverse(PostOrder))
+ for (BasicBlock *BB : llvm::reverse(PostOrder)) {
TopDownNestingDetected |= VisitTopDown(BB, BBStates, Releases);
+ if (DisableRetainReleasePairing)
+ return false;
+ }
return TopDownNestingDetected && BottomUpNestingDetected;
}
@@ -1528,7 +1577,7 @@ void ObjCARCOpt::MoveCalls(Value *Arg, RRInfo &RetainsToMove,
for (Instruction *InsertPt : ReleasesToMove.ReverseInsertPts) {
Value *MyArg = ArgTy == ParamTy ? Arg :
new BitCastInst(Arg, ParamTy, "", InsertPt);
- Constant *Decl = EP.get(ARCRuntimeEntryPointKind::Retain);
+ Function *Decl = EP.get(ARCRuntimeEntryPointKind::Retain);
CallInst *Call = CallInst::Create(Decl, MyArg, "", InsertPt);
Call->setDoesNotThrow();
Call->setTailCall();
@@ -1541,7 +1590,7 @@ void ObjCARCOpt::MoveCalls(Value *Arg, RRInfo &RetainsToMove,
for (Instruction *InsertPt : RetainsToMove.ReverseInsertPts) {
Value *MyArg = ArgTy == ParamTy ? Arg :
new BitCastInst(Arg, ParamTy, "", InsertPt);
- Constant *Decl = EP.get(ARCRuntimeEntryPointKind::Release);
+ Function *Decl = EP.get(ARCRuntimeEntryPointKind::Release);
CallInst *Call = CallInst::Create(Decl, MyArg, "", InsertPt);
// Attach a clang.imprecise_release metadata tag, if appropriate.
if (MDNode *M = ReleasesToMove.ReleaseMetadata)
@@ -1877,7 +1926,7 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
Changed = true;
// If the load has a builtin retain, insert a plain retain for it.
if (Class == ARCInstKind::LoadWeakRetained) {
- Constant *Decl = EP.get(ARCRuntimeEntryPointKind::Retain);
+ Function *Decl = EP.get(ARCRuntimeEntryPointKind::Retain);
CallInst *CI = CallInst::Create(Decl, EarlierCall, "", Call);
CI->setTailCall();
}
@@ -1906,7 +1955,7 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
Changed = true;
// If the load has a builtin retain, insert a plain retain for it.
if (Class == ARCInstKind::LoadWeakRetained) {
- Constant *Decl = EP.get(ARCRuntimeEntryPointKind::Retain);
+ Function *Decl = EP.get(ARCRuntimeEntryPointKind::Retain);
CallInst *CI = CallInst::Create(Decl, EarlierCall, "", Call);
CI->setTailCall();
}
@@ -2003,6 +2052,9 @@ bool ObjCARCOpt::OptimizeSequences(Function &F) {
// Analyze the CFG of the function, and all instructions.
bool NestingDetected = Visit(F, BBStates, Retains, Releases);
+ if (DisableRetainReleasePairing)
+ return false;
+
// Transform.
bool AnyPairsCompletelyEliminated = PerformCodePlacement(BBStates, Retains,
Releases,
diff --git a/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp b/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp
index 3004fffb9745..c6138edba95a 100644
--- a/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp
+++ b/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp
@@ -1,9 +1,8 @@
//===- ProvenanceAnalysis.cpp - ObjC ARC Optimization ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/ObjCARC/ProvenanceAnalysis.h b/lib/Transforms/ObjCARC/ProvenanceAnalysis.h
index 1276f564a022..8fd842fd42d6 100644
--- a/lib/Transforms/ObjCARC/ProvenanceAnalysis.h
+++ b/lib/Transforms/ObjCARC/ProvenanceAnalysis.h
@@ -1,9 +1,8 @@
//===- ProvenanceAnalysis.h - ObjC ARC Optimization -------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp b/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp
index 870a5f600fd8..b768f7973b87 100644
--- a/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp
+++ b/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp
@@ -1,9 +1,8 @@
//===- ProvenanceAnalysisEvaluator.cpp - ObjC ARC Optimization ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Transforms/ObjCARC/PtrState.cpp b/lib/Transforms/ObjCARC/PtrState.cpp
index 8a7b6a74fae2..3243481dee0d 100644
--- a/lib/Transforms/ObjCARC/PtrState.cpp
+++ b/lib/Transforms/ObjCARC/PtrState.cpp
@@ -1,9 +1,8 @@
//===- PtrState.cpp -------------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Transforms/ObjCARC/PtrState.h b/lib/Transforms/ObjCARC/PtrState.h
index f5b9b853d8e3..66614c06cb79 100644
--- a/lib/Transforms/ObjCARC/PtrState.h
+++ b/lib/Transforms/ObjCARC/PtrState.h
@@ -1,9 +1,8 @@
//===- PtrState.h - ARC State for a Ptr -------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/Scalar/ADCE.cpp b/lib/Transforms/Scalar/ADCE.cpp
index b0602d96798c..7f7460c5746a 100644
--- a/lib/Transforms/Scalar/ADCE.cpp
+++ b/lib/Transforms/Scalar/ADCE.cpp
@@ -1,9 +1,8 @@
//===- ADCE.cpp - Code to perform dead code elimination -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -20,9 +19,11 @@
#include "llvm/ADT/GraphTraits.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/IteratedDominanceFrontier.h"
#include "llvm/Analysis/PostDominators.h"
@@ -30,7 +31,6 @@
#include "llvm/IR/CFG.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
-#include "llvm/IR/DomTreeUpdater.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
@@ -136,7 +136,7 @@ class AggressiveDeadCodeElimination {
SmallPtrSet<const Metadata *, 32> AliveScopes;
/// Set of blocks with not known to have live terminators.
- SmallPtrSet<BasicBlock *, 16> BlocksWithDeadTerminators;
+ SmallSetVector<BasicBlock *, 16> BlocksWithDeadTerminators;
/// The set of blocks which we have determined whose control
/// dependence sources must be live and which have not had
@@ -390,7 +390,7 @@ void AggressiveDeadCodeElimination::markLive(Instruction *I) {
// Mark the containing block live
auto &BBInfo = *Info.Block;
if (BBInfo.Terminator == I) {
- BlocksWithDeadTerminators.erase(BBInfo.BB);
+ BlocksWithDeadTerminators.remove(BBInfo.BB);
// For live terminators, mark destination blocks
// live to preserve this control flow edges.
if (!BBInfo.UnconditionalBranch)
@@ -479,10 +479,14 @@ void AggressiveDeadCodeElimination::markLiveBranchesFromControlDependences() {
// which currently have dead terminators that are control
// dependence sources of a block which is in NewLiveBlocks.
+ const SmallPtrSet<BasicBlock *, 16> BWDT{
+ BlocksWithDeadTerminators.begin(),
+ BlocksWithDeadTerminators.end()
+ };
SmallVector<BasicBlock *, 32> IDFBlocks;
ReverseIDFCalculator IDFs(PDT);
IDFs.setDefiningBlocks(NewLiveBlocks);
- IDFs.setLiveInBlocks(BlocksWithDeadTerminators);
+ IDFs.setLiveInBlocks(BWDT);
IDFs.calculate(IDFBlocks);
NewLiveBlocks.clear();
diff --git a/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
index 0830ff5dd042..de9a62e88c27 100644
--- a/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
+++ b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
@@ -1,10 +1,9 @@
//===----------------------- AlignmentFromAssumptions.cpp -----------------===//
// Set Load/Store Alignments From Assumptions
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/Scalar/BDCE.cpp b/lib/Transforms/Scalar/BDCE.cpp
index d3c9b9a270aa..9bd387c33e80 100644
--- a/lib/Transforms/Scalar/BDCE.cpp
+++ b/lib/Transforms/Scalar/BDCE.cpp
@@ -1,9 +1,8 @@
//===---- BDCE.cpp - Bit-tracking dead code elimination -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -42,14 +41,17 @@ static void clearAssumptionsOfUsers(Instruction *I, DemandedBits &DB) {
"Trivializing a non-integer value?");
// Initialize the worklist with eligible direct users.
+ SmallPtrSet<Instruction *, 16> Visited;
SmallVector<Instruction *, 16> WorkList;
for (User *JU : I->users()) {
// If all bits of a user are demanded, then we know that nothing below that
// in the def-use chain needs to be changed.
auto *J = dyn_cast<Instruction>(JU);
if (J && J->getType()->isIntOrIntVectorTy() &&
- !DB.getDemandedBits(J).isAllOnesValue())
+ !DB.getDemandedBits(J).isAllOnesValue()) {
+ Visited.insert(J);
WorkList.push_back(J);
+ }
// Note that we need to check for non-int types above before asking for
// demanded bits. Normally, the only way to reach an instruction with an
@@ -62,7 +64,6 @@ static void clearAssumptionsOfUsers(Instruction *I, DemandedBits &DB) {
}
// DFS through subsequent users while tracking visits to avoid cycles.
- SmallPtrSet<Instruction *, 16> Visited;
while (!WorkList.empty()) {
Instruction *J = WorkList.pop_back_val();
@@ -73,13 +74,11 @@ static void clearAssumptionsOfUsers(Instruction *I, DemandedBits &DB) {
// 1. llvm.assume demands its operand, so trivializing can't change it.
// 2. range metadata only applies to memory accesses which demand all bits.
- Visited.insert(J);
-
for (User *KU : J->users()) {
// If all bits of a user are demanded, then we know that nothing below
// that in the def-use chain needs to be changed.
auto *K = dyn_cast<Instruction>(KU);
- if (K && !Visited.count(K) && K->getType()->isIntOrIntVectorTy() &&
+ if (K && Visited.insert(K).second && K->getType()->isIntOrIntVectorTy() &&
!DB.getDemandedBits(K).isAllOnesValue())
WorkList.push_back(K);
}
diff --git a/lib/Transforms/Scalar/CallSiteSplitting.cpp b/lib/Transforms/Scalar/CallSiteSplitting.cpp
index a806d6faed60..3519b000a33f 100644
--- a/lib/Transforms/Scalar/CallSiteSplitting.cpp
+++ b/lib/Transforms/Scalar/CallSiteSplitting.cpp
@@ -1,9 +1,8 @@
//===- CallSiteSplitting.cpp ----------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -184,6 +183,9 @@ static SmallVector<BasicBlock *, 2> getTwoPredecessors(BasicBlock *BB) {
}
static bool canSplitCallSite(CallSite CS, TargetTransformInfo &TTI) {
+ if (CS.isConvergent() || CS.cannotDuplicate())
+ return false;
+
// FIXME: As of now we handle only CallInst. InvokeInst could be handled
// without too much effort.
Instruction *Instr = CS.getInstruction();
@@ -367,7 +369,7 @@ static void splitCallSite(
assert(Splits.size() == 2 && "Expected exactly 2 splits!");
for (unsigned i = 0; i < Splits.size(); i++) {
Splits[i]->getTerminator()->eraseFromParent();
- DTU.deleteEdge(Splits[i], TailBB);
+ DTU.applyUpdatesPermissive({{DominatorTree::Delete, Splits[i], TailBB}});
}
// Erase the tail block once done with musttail patching
diff --git a/lib/Transforms/Scalar/ConstantHoisting.cpp b/lib/Transforms/Scalar/ConstantHoisting.cpp
index beac0d967a98..98243a23f1ef 100644
--- a/lib/Transforms/Scalar/ConstantHoisting.cpp
+++ b/lib/Transforms/Scalar/ConstantHoisting.cpp
@@ -1,9 +1,8 @@
//===- ConstantHoisting.cpp - Prepare code for expensive constants --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -42,6 +41,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/IR/BasicBlock.h"
@@ -61,6 +61,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/SizeOpts.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -112,11 +113,10 @@ public:
if (ConstHoistWithBlockFrequency)
AU.addRequired<BlockFrequencyInfoWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
}
- void releaseMemory() override { Impl.releaseMemory(); }
-
private:
ConstantHoistingPass Impl;
};
@@ -129,6 +129,7 @@ INITIALIZE_PASS_BEGIN(ConstantHoistingLegacyPass, "consthoist",
"Constant Hoisting", false, false)
INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_END(ConstantHoistingLegacyPass, "consthoist",
"Constant Hoisting", false, false)
@@ -151,7 +152,8 @@ bool ConstantHoistingLegacyPass::runOnFunction(Function &Fn) {
ConstHoistWithBlockFrequency
? &getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI()
: nullptr,
- Fn.getEntryBlock());
+ Fn.getEntryBlock(),
+ &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI());
if (MadeChange) {
LLVM_DEBUG(dbgs() << "********** Function after Constant Hoisting: "
@@ -211,6 +213,9 @@ static void findBestInsertionSet(DominatorTree &DT, BlockFrequencyInfo &BFI,
// in the dominator tree from Entry to 'BB'.
SmallPtrSet<BasicBlock *, 16> Candidates;
for (auto BB : BBs) {
+ // Ignore unreachable basic blocks.
+ if (!DT.isReachableFromEntry(BB))
+ continue;
Path.clear();
// Walk up the dominator tree until Entry or another BB in BBs
// is reached. Insert the nodes on the way to the Path.
@@ -548,7 +553,9 @@ ConstantHoistingPass::maximizeConstantsInRange(ConstCandVecType::iterator S,
ConstCandVecType::iterator &MaxCostItr) {
unsigned NumUses = 0;
- if(!Entry->getParent()->optForSize() || std::distance(S,E) > 100) {
+ bool OptForSize = Entry->getParent()->hasOptSize() ||
+ llvm::shouldOptimizeForSize(Entry->getParent(), PSI, BFI);
+ if (!OptForSize || std::distance(S,E) > 100) {
for (auto ConstCand = S; ConstCand != E; ++ConstCand) {
NumUses += ConstCand->Uses.size();
if (ConstCand->CumulativeCost > MaxCostItr->CumulativeCost)
@@ -640,8 +647,8 @@ void ConstantHoistingPass::findBaseConstants(GlobalVariable *BaseGV) {
ConstGEPInfoMap[BaseGV] : ConstIntInfoVec;
// Sort the constants by value and type. This invalidates the mapping!
- std::stable_sort(ConstCandVec.begin(), ConstCandVec.end(),
- [](const ConstantCandidate &LHS, const ConstantCandidate &RHS) {
+ llvm::stable_sort(ConstCandVec, [](const ConstantCandidate &LHS,
+ const ConstantCandidate &RHS) {
if (LHS.ConstInt->getType() != RHS.ConstInt->getType())
return LHS.ConstInt->getType()->getBitWidth() <
RHS.ConstInt->getType()->getBitWidth();
@@ -824,7 +831,9 @@ bool ConstantHoistingPass::emitBaseConstants(GlobalVariable *BaseGV) {
BaseGV ? ConstGEPInfoMap[BaseGV] : ConstIntInfoVec;
for (auto const &ConstInfo : ConstInfoVec) {
SmallPtrSet<Instruction *, 8> IPSet = findConstantInsertionPoint(ConstInfo);
- assert(!IPSet.empty() && "IPSet is empty");
+ // We can have an empty set if the function contains unreachable blocks.
+ if (IPSet.empty())
+ continue;
unsigned UsesNum = 0;
unsigned ReBasesNum = 0;
@@ -917,13 +926,14 @@ void ConstantHoistingPass::deleteDeadCastInst() const {
/// Optimize expensive integer constants in the given function.
bool ConstantHoistingPass::runImpl(Function &Fn, TargetTransformInfo &TTI,
DominatorTree &DT, BlockFrequencyInfo *BFI,
- BasicBlock &Entry) {
+ BasicBlock &Entry, ProfileSummaryInfo *PSI) {
this->TTI = &TTI;
this->DT = &DT;
this->BFI = BFI;
this->DL = &Fn.getParent()->getDataLayout();
this->Ctx = &Fn.getContext();
this->Entry = &Entry;
+ this->PSI = PSI;
// Collect all constant candidates.
collectConstantCandidates(Fn);
@@ -948,6 +958,8 @@ bool ConstantHoistingPass::runImpl(Function &Fn, TargetTransformInfo &TTI,
// Cleanup dead instructions.
deleteDeadCastInst();
+ cleanup();
+
return MadeChange;
}
@@ -958,7 +970,9 @@ PreservedAnalyses ConstantHoistingPass::run(Function &F,
auto BFI = ConstHoistWithBlockFrequency
? &AM.getResult<BlockFrequencyAnalysis>(F)
: nullptr;
- if (!runImpl(F, TTI, DT, BFI, F.getEntryBlock()))
+ auto &MAM = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F).getManager();
+ auto *PSI = MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
+ if (!runImpl(F, TTI, DT, BFI, F.getEntryBlock(), PSI))
return PreservedAnalyses::all();
PreservedAnalyses PA;
diff --git a/lib/Transforms/Scalar/ConstantProp.cpp b/lib/Transforms/Scalar/ConstantProp.cpp
index 51032b0625f8..770321c740a0 100644
--- a/lib/Transforms/Scalar/ConstantProp.cpp
+++ b/lib/Transforms/Scalar/ConstantProp.cpp
@@ -1,9 +1,8 @@
//===- ConstantProp.cpp - Code to perform Simple Constant Propagation -----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index d0105701c73f..89497177524f 100644
--- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -1,9 +1,8 @@
//===- CorrelatedValuePropagation.cpp - Propagate CFG-derived info --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -16,6 +15,7 @@
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LazyValueInfo.h"
@@ -27,7 +27,6 @@
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/DomTreeUpdater.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstrTypes.h"
@@ -64,8 +63,10 @@ STATISTIC(NumUDivs, "Number of udivs whose width was decreased");
STATISTIC(NumAShrs, "Number of ashr converted to lshr");
STATISTIC(NumSRems, "Number of srem converted to urem");
STATISTIC(NumOverflows, "Number of overflow checks removed");
+STATISTIC(NumSaturating,
+ "Number of saturating arithmetics converted to normal arithmetics");
-static cl::opt<bool> DontProcessAdds("cvp-dont-process-adds", cl::init(true));
+static cl::opt<bool> DontAddNoWrapFlags("cvp-dont-add-nowrap-flags", cl::init(false));
namespace {
@@ -307,11 +308,11 @@ static bool processCmp(CmpInst *Cmp, LazyValueInfo *LVI) {
/// that cannot fire no matter what the incoming edge can safely be removed. If
/// a case fires on every incoming edge then the entire switch can be removed
/// and replaced with a branch to the case destination.
-static bool processSwitch(SwitchInst *SI, LazyValueInfo *LVI,
+static bool processSwitch(SwitchInst *I, LazyValueInfo *LVI,
DominatorTree *DT) {
DomTreeUpdater DTU(*DT, DomTreeUpdater::UpdateStrategy::Lazy);
- Value *Cond = SI->getCondition();
- BasicBlock *BB = SI->getParent();
+ Value *Cond = I->getCondition();
+ BasicBlock *BB = I->getParent();
// If the condition was defined in same block as the switch then LazyValueInfo
// currently won't say anything useful about it, though in theory it could.
@@ -328,67 +329,72 @@ static bool processSwitch(SwitchInst *SI, LazyValueInfo *LVI,
for (auto *Succ : successors(BB))
SuccessorsCount[Succ]++;
- for (auto CI = SI->case_begin(), CE = SI->case_end(); CI != CE;) {
- ConstantInt *Case = CI->getCaseValue();
-
- // Check to see if the switch condition is equal to/not equal to the case
- // value on every incoming edge, equal/not equal being the same each time.
- LazyValueInfo::Tristate State = LazyValueInfo::Unknown;
- for (pred_iterator PI = PB; PI != PE; ++PI) {
- // Is the switch condition equal to the case value?
- LazyValueInfo::Tristate Value = LVI->getPredicateOnEdge(CmpInst::ICMP_EQ,
- Cond, Case, *PI,
- BB, SI);
- // Give up on this case if nothing is known.
- if (Value == LazyValueInfo::Unknown) {
- State = LazyValueInfo::Unknown;
- break;
+ { // Scope for SwitchInstProfUpdateWrapper. It must not live during
+ // ConstantFoldTerminator() as the underlying SwitchInst can be changed.
+ SwitchInstProfUpdateWrapper SI(*I);
+
+ for (auto CI = SI->case_begin(), CE = SI->case_end(); CI != CE;) {
+ ConstantInt *Case = CI->getCaseValue();
+
+ // Check to see if the switch condition is equal to/not equal to the case
+ // value on every incoming edge, equal/not equal being the same each time.
+ LazyValueInfo::Tristate State = LazyValueInfo::Unknown;
+ for (pred_iterator PI = PB; PI != PE; ++PI) {
+ // Is the switch condition equal to the case value?
+ LazyValueInfo::Tristate Value = LVI->getPredicateOnEdge(CmpInst::ICMP_EQ,
+ Cond, Case, *PI,
+ BB, SI);
+ // Give up on this case if nothing is known.
+ if (Value == LazyValueInfo::Unknown) {
+ State = LazyValueInfo::Unknown;
+ break;
+ }
+
+ // If this was the first edge to be visited, record that all other edges
+ // need to give the same result.
+ if (PI == PB) {
+ State = Value;
+ continue;
+ }
+
+ // If this case is known to fire for some edges and known not to fire for
+ // others then there is nothing we can do - give up.
+ if (Value != State) {
+ State = LazyValueInfo::Unknown;
+ break;
+ }
}
- // If this was the first edge to be visited, record that all other edges
- // need to give the same result.
- if (PI == PB) {
- State = Value;
+ if (State == LazyValueInfo::False) {
+ // This case never fires - remove it.
+ BasicBlock *Succ = CI->getCaseSuccessor();
+ Succ->removePredecessor(BB);
+ CI = SI.removeCase(CI);
+ CE = SI->case_end();
+
+ // The condition can be modified by removePredecessor's PHI simplification
+ // logic.
+ Cond = SI->getCondition();
+
+ ++NumDeadCases;
+ Changed = true;
+ if (--SuccessorsCount[Succ] == 0)
+ DTU.applyUpdatesPermissive({{DominatorTree::Delete, BB, Succ}});
continue;
}
-
- // If this case is known to fire for some edges and known not to fire for
- // others then there is nothing we can do - give up.
- if (Value != State) {
- State = LazyValueInfo::Unknown;
+ if (State == LazyValueInfo::True) {
+ // This case always fires. Arrange for the switch to be turned into an
+ // unconditional branch by replacing the switch condition with the case
+ // value.
+ SI->setCondition(Case);
+ NumDeadCases += SI->getNumCases();
+ Changed = true;
break;
}
- }
- if (State == LazyValueInfo::False) {
- // This case never fires - remove it.
- BasicBlock *Succ = CI->getCaseSuccessor();
- Succ->removePredecessor(BB);
- CI = SI->removeCase(CI);
- CE = SI->case_end();
-
- // The condition can be modified by removePredecessor's PHI simplification
- // logic.
- Cond = SI->getCondition();
-
- ++NumDeadCases;
- Changed = true;
- if (--SuccessorsCount[Succ] == 0)
- DTU.deleteEdge(BB, Succ);
- continue;
- }
- if (State == LazyValueInfo::True) {
- // This case always fires. Arrange for the switch to be turned into an
- // unconditional branch by replacing the switch condition with the case
- // value.
- SI->setCondition(Case);
- NumDeadCases += SI->getNumCases();
- Changed = true;
- break;
+ // Increment the case iterator since we didn't delete it.
+ ++CI;
}
-
- // Increment the case iterator since we didn't delete it.
- ++CI;
}
if (Changed)
@@ -399,56 +405,48 @@ static bool processSwitch(SwitchInst *SI, LazyValueInfo *LVI,
return Changed;
}
-// See if we can prove that the given overflow intrinsic will not overflow.
-static bool willNotOverflow(IntrinsicInst *II, LazyValueInfo *LVI) {
- using OBO = OverflowingBinaryOperator;
- auto NoWrap = [&] (Instruction::BinaryOps BinOp, unsigned NoWrapKind) {
- Value *RHS = II->getOperand(1);
- ConstantRange RRange = LVI->getConstantRange(RHS, II->getParent(), II);
- ConstantRange NWRegion = ConstantRange::makeGuaranteedNoWrapRegion(
- BinOp, RRange, NoWrapKind);
- // As an optimization, do not compute LRange if we do not need it.
- if (NWRegion.isEmptySet())
- return false;
- Value *LHS = II->getOperand(0);
- ConstantRange LRange = LVI->getConstantRange(LHS, II->getParent(), II);
- return NWRegion.contains(LRange);
- };
- switch (II->getIntrinsicID()) {
- default:
- break;
- case Intrinsic::uadd_with_overflow:
- return NoWrap(Instruction::Add, OBO::NoUnsignedWrap);
- case Intrinsic::sadd_with_overflow:
- return NoWrap(Instruction::Add, OBO::NoSignedWrap);
- case Intrinsic::usub_with_overflow:
- return NoWrap(Instruction::Sub, OBO::NoUnsignedWrap);
- case Intrinsic::ssub_with_overflow:
- return NoWrap(Instruction::Sub, OBO::NoSignedWrap);
- }
- return false;
+// See if we can prove that the given binary op intrinsic will not overflow.
+static bool willNotOverflow(BinaryOpIntrinsic *BO, LazyValueInfo *LVI) {
+ ConstantRange LRange = LVI->getConstantRange(
+ BO->getLHS(), BO->getParent(), BO);
+ ConstantRange RRange = LVI->getConstantRange(
+ BO->getRHS(), BO->getParent(), BO);
+ ConstantRange NWRegion = ConstantRange::makeGuaranteedNoWrapRegion(
+ BO->getBinaryOp(), RRange, BO->getNoWrapKind());
+ return NWRegion.contains(LRange);
}
-static void processOverflowIntrinsic(IntrinsicInst *II) {
- IRBuilder<> B(II);
- Value *NewOp = nullptr;
- switch (II->getIntrinsicID()) {
- default:
- llvm_unreachable("Unexpected instruction.");
- case Intrinsic::uadd_with_overflow:
- case Intrinsic::sadd_with_overflow:
- NewOp = B.CreateAdd(II->getOperand(0), II->getOperand(1), II->getName());
- break;
- case Intrinsic::usub_with_overflow:
- case Intrinsic::ssub_with_overflow:
- NewOp = B.CreateSub(II->getOperand(0), II->getOperand(1), II->getName());
- break;
+static void processOverflowIntrinsic(WithOverflowInst *WO) {
+ IRBuilder<> B(WO);
+ Value *NewOp = B.CreateBinOp(
+ WO->getBinaryOp(), WO->getLHS(), WO->getRHS(), WO->getName());
+ // Constant-folding could have happened.
+ if (auto *Inst = dyn_cast<Instruction>(NewOp)) {
+ if (WO->isSigned())
+ Inst->setHasNoSignedWrap();
+ else
+ Inst->setHasNoUnsignedWrap();
}
+
+ Value *NewI = B.CreateInsertValue(UndefValue::get(WO->getType()), NewOp, 0);
+ NewI = B.CreateInsertValue(NewI, ConstantInt::getFalse(WO->getContext()), 1);
+ WO->replaceAllUsesWith(NewI);
+ WO->eraseFromParent();
++NumOverflows;
- Value *NewI = B.CreateInsertValue(UndefValue::get(II->getType()), NewOp, 0);
- NewI = B.CreateInsertValue(NewI, ConstantInt::getFalse(II->getContext()), 1);
- II->replaceAllUsesWith(NewI);
- II->eraseFromParent();
+}
+
+static void processSaturatingInst(SaturatingInst *SI) {
+ BinaryOperator *BinOp = BinaryOperator::Create(
+ SI->getBinaryOp(), SI->getLHS(), SI->getRHS(), SI->getName(), SI);
+ BinOp->setDebugLoc(SI->getDebugLoc());
+ if (SI->isSigned())
+ BinOp->setHasNoSignedWrap();
+ else
+ BinOp->setHasNoUnsignedWrap();
+
+ SI->replaceAllUsesWith(BinOp);
+ SI->eraseFromParent();
+ ++NumSaturating;
}
/// Infer nonnull attributes for the arguments at the specified callsite.
@@ -456,13 +454,44 @@ static bool processCallSite(CallSite CS, LazyValueInfo *LVI) {
SmallVector<unsigned, 4> ArgNos;
unsigned ArgNo = 0;
- if (auto *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
- if (willNotOverflow(II, LVI)) {
- processOverflowIntrinsic(II);
+ if (auto *WO = dyn_cast<WithOverflowInst>(CS.getInstruction())) {
+ if (WO->getLHS()->getType()->isIntegerTy() && willNotOverflow(WO, LVI)) {
+ processOverflowIntrinsic(WO);
+ return true;
+ }
+ }
+
+ if (auto *SI = dyn_cast<SaturatingInst>(CS.getInstruction())) {
+ if (SI->getType()->isIntegerTy() && willNotOverflow(SI, LVI)) {
+ processSaturatingInst(SI);
return true;
}
}
+ // Deopt bundle operands are intended to capture state with minimal
+ // perturbance of the code otherwise. If we can find a constant value for
+ // any such operand and remove a use of the original value, that's
+ // desireable since it may allow further optimization of that value (e.g. via
+ // single use rules in instcombine). Since deopt uses tend to,
+ // idiomatically, appear along rare conditional paths, it's reasonable likely
+ // we may have a conditional fact with which LVI can fold.
+ if (auto DeoptBundle = CS.getOperandBundle(LLVMContext::OB_deopt)) {
+ bool Progress = false;
+ for (const Use &ConstU : DeoptBundle->Inputs) {
+ Use &U = const_cast<Use&>(ConstU);
+ Value *V = U.get();
+ if (V->getType()->isVectorTy()) continue;
+ if (isa<Constant>(V)) continue;
+
+ Constant *C = LVI->getConstant(V, CS.getParent(), CS.getInstruction());
+ if (!C) continue;
+ U.set(C);
+ Progress = true;
+ }
+ if (Progress)
+ return true;
+ }
+
for (Value *V : CS.args()) {
PointerType *Type = dyn_cast<PointerType>(V->getType());
// Try to mark pointer typed parameters as non-null. We skip the
@@ -512,7 +541,7 @@ static bool processUDivOrURem(BinaryOperator *Instr, LazyValueInfo *LVI) {
// Find the smallest power of two bitwidth that's sufficient to hold Instr's
// operands.
auto OrigWidth = Instr->getType()->getIntegerBitWidth();
- ConstantRange OperandRange(OrigWidth, /*isFullset=*/false);
+ ConstantRange OperandRange(OrigWidth, /*isFullSet=*/false);
for (Value *Operand : Instr->operands()) {
OperandRange = OperandRange.unionWith(
LVI->getConstantRange(Operand, Instr->getParent()));
@@ -603,55 +632,42 @@ static bool processAShr(BinaryOperator *SDI, LazyValueInfo *LVI) {
return true;
}
-static bool processAdd(BinaryOperator *AddOp, LazyValueInfo *LVI) {
+static bool processBinOp(BinaryOperator *BinOp, LazyValueInfo *LVI) {
using OBO = OverflowingBinaryOperator;
- if (DontProcessAdds)
+ if (DontAddNoWrapFlags)
return false;
- if (AddOp->getType()->isVectorTy())
+ if (BinOp->getType()->isVectorTy())
return false;
- bool NSW = AddOp->hasNoSignedWrap();
- bool NUW = AddOp->hasNoUnsignedWrap();
+ bool NSW = BinOp->hasNoSignedWrap();
+ bool NUW = BinOp->hasNoUnsignedWrap();
if (NSW && NUW)
return false;
- BasicBlock *BB = AddOp->getParent();
+ BasicBlock *BB = BinOp->getParent();
- Value *LHS = AddOp->getOperand(0);
- Value *RHS = AddOp->getOperand(1);
+ Value *LHS = BinOp->getOperand(0);
+ Value *RHS = BinOp->getOperand(1);
- ConstantRange LRange = LVI->getConstantRange(LHS, BB, AddOp);
-
- // Initialize RRange only if we need it. If we know that guaranteed no wrap
- // range for the given LHS range is empty don't spend time calculating the
- // range for the RHS.
- Optional<ConstantRange> RRange;
- auto LazyRRange = [&] () {
- if (!RRange)
- RRange = LVI->getConstantRange(RHS, BB, AddOp);
- return RRange.getValue();
- };
+ ConstantRange LRange = LVI->getConstantRange(LHS, BB, BinOp);
+ ConstantRange RRange = LVI->getConstantRange(RHS, BB, BinOp);
bool Changed = false;
if (!NUW) {
ConstantRange NUWRange = ConstantRange::makeGuaranteedNoWrapRegion(
- BinaryOperator::Add, LRange, OBO::NoUnsignedWrap);
- if (!NUWRange.isEmptySet()) {
- bool NewNUW = NUWRange.contains(LazyRRange());
- AddOp->setHasNoUnsignedWrap(NewNUW);
- Changed |= NewNUW;
- }
+ BinOp->getOpcode(), RRange, OBO::NoUnsignedWrap);
+ bool NewNUW = NUWRange.contains(LRange);
+ BinOp->setHasNoUnsignedWrap(NewNUW);
+ Changed |= NewNUW;
}
if (!NSW) {
ConstantRange NSWRange = ConstantRange::makeGuaranteedNoWrapRegion(
- BinaryOperator::Add, LRange, OBO::NoSignedWrap);
- if (!NSWRange.isEmptySet()) {
- bool NewNSW = NSWRange.contains(LazyRRange());
- AddOp->setHasNoSignedWrap(NewNSW);
- Changed |= NewNSW;
- }
+ BinOp->getOpcode(), RRange, OBO::NoSignedWrap);
+ bool NewNSW = NSWRange.contains(LRange);
+ BinOp->setHasNoSignedWrap(NewNSW);
+ Changed |= NewNSW;
}
return Changed;
@@ -725,7 +741,8 @@ static bool runImpl(Function &F, LazyValueInfo *LVI, DominatorTree *DT,
BBChanged |= processAShr(cast<BinaryOperator>(II), LVI);
break;
case Instruction::Add:
- BBChanged |= processAdd(cast<BinaryOperator>(II), LVI);
+ case Instruction::Sub:
+ BBChanged |= processBinOp(cast<BinaryOperator>(II), LVI);
break;
}
}
diff --git a/lib/Transforms/Scalar/DCE.cpp b/lib/Transforms/Scalar/DCE.cpp
index 4c964e6e888c..479e0ed74074 100644
--- a/lib/Transforms/Scalar/DCE.cpp
+++ b/lib/Transforms/Scalar/DCE.cpp
@@ -1,9 +1,8 @@
//===- DCE.cpp - Code to perform dead code elimination --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 469930ca6a19..a81645745b48 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -1,9 +1,8 @@
//===- DeadStoreElimination.cpp - Fast Dead Store Elimination -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -29,8 +28,8 @@
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/OrderedBasicBlock.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/BasicBlock.h"
@@ -57,6 +56,7 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
@@ -98,9 +98,8 @@ using InstOverlapIntervalsTy = DenseMap<Instruction *, OverlapIntervalsTy>;
static void
deleteDeadInstruction(Instruction *I, BasicBlock::iterator *BBI,
MemoryDependenceResults &MD, const TargetLibraryInfo &TLI,
- InstOverlapIntervalsTy &IOL,
- DenseMap<Instruction*, size_t> *InstrOrdering,
- SmallSetVector<Value *, 16> *ValueSet = nullptr) {
+ InstOverlapIntervalsTy &IOL, OrderedBasicBlock &OBB,
+ SmallSetVector<const Value *, 16> *ValueSet = nullptr) {
SmallVector<Instruction*, 32> NowDeadInsts;
NowDeadInsts.push_back(I);
@@ -136,8 +135,8 @@ deleteDeadInstruction(Instruction *I, BasicBlock::iterator *BBI,
}
if (ValueSet) ValueSet->remove(DeadInst);
- InstrOrdering->erase(DeadInst);
IOL.erase(DeadInst);
+ OBB.eraseInstruction(DeadInst);
if (NewIter == DeadInst->getIterator())
NewIter = DeadInst->eraseFromParent();
@@ -657,8 +656,7 @@ static void findUnconditionalPreds(SmallVectorImpl<BasicBlock *> &Blocks,
static bool handleFree(CallInst *F, AliasAnalysis *AA,
MemoryDependenceResults *MD, DominatorTree *DT,
const TargetLibraryInfo *TLI,
- InstOverlapIntervalsTy &IOL,
- DenseMap<Instruction*, size_t> *InstrOrdering) {
+ InstOverlapIntervalsTy &IOL, OrderedBasicBlock &OBB) {
bool MadeChange = false;
MemoryLocation Loc = MemoryLocation(F->getOperand(0));
@@ -692,7 +690,7 @@ static bool handleFree(CallInst *F, AliasAnalysis *AA,
// DCE instructions only used to calculate that store.
BasicBlock::iterator BBI(Dependency);
- deleteDeadInstruction(Dependency, &BBI, *MD, *TLI, IOL, InstrOrdering);
+ deleteDeadInstruction(Dependency, &BBI, *MD, *TLI, IOL, OBB);
++NumFastStores;
MadeChange = true;
@@ -715,7 +713,7 @@ static bool handleFree(CallInst *F, AliasAnalysis *AA,
/// the DeadStackObjects set. If so, they become live because the location is
/// being loaded.
static void removeAccessedObjects(const MemoryLocation &LoadedLoc,
- SmallSetVector<Value *, 16> &DeadStackObjects,
+ SmallSetVector<const Value *, 16> &DeadStackObjects,
const DataLayout &DL, AliasAnalysis *AA,
const TargetLibraryInfo *TLI,
const Function *F) {
@@ -728,12 +726,12 @@ static void removeAccessedObjects(const MemoryLocation &LoadedLoc,
// If the kill pointer can be easily reduced to an alloca, don't bother doing
// extraneous AA queries.
if (isa<AllocaInst>(UnderlyingPointer) || isa<Argument>(UnderlyingPointer)) {
- DeadStackObjects.remove(const_cast<Value*>(UnderlyingPointer));
+ DeadStackObjects.remove(UnderlyingPointer);
return;
}
// Remove objects that could alias LoadedLoc.
- DeadStackObjects.remove_if([&](Value *I) {
+ DeadStackObjects.remove_if([&](const Value *I) {
// See if the loaded location could alias the stack location.
MemoryLocation StackLoc(I, getPointerSize(I, DL, *TLI, F));
return !AA->isNoAlias(StackLoc, LoadedLoc);
@@ -747,15 +745,15 @@ static void removeAccessedObjects(const MemoryLocation &LoadedLoc,
/// store i32 1, i32* %A
/// ret void
static bool handleEndBlock(BasicBlock &BB, AliasAnalysis *AA,
- MemoryDependenceResults *MD,
- const TargetLibraryInfo *TLI,
- InstOverlapIntervalsTy &IOL,
- DenseMap<Instruction*, size_t> *InstrOrdering) {
+ MemoryDependenceResults *MD,
+ const TargetLibraryInfo *TLI,
+ InstOverlapIntervalsTy &IOL,
+ OrderedBasicBlock &OBB) {
bool MadeChange = false;
// Keep track of all of the stack objects that are dead at the end of the
// function.
- SmallSetVector<Value*, 16> DeadStackObjects;
+ SmallSetVector<const Value*, 16> DeadStackObjects;
// Find all of the alloca'd pointers in the entry block.
BasicBlock &Entry = BB.getParent()->front();
@@ -784,12 +782,12 @@ static bool handleEndBlock(BasicBlock &BB, AliasAnalysis *AA,
// If we find a store, check to see if it points into a dead stack value.
if (hasAnalyzableMemoryWrite(&*BBI, *TLI) && isRemovable(&*BBI)) {
// See through pointer-to-pointer bitcasts
- SmallVector<Value *, 4> Pointers;
+ SmallVector<const Value *, 4> Pointers;
GetUnderlyingObjects(getStoredPointerOperand(&*BBI), Pointers, DL);
// Stores to stack values are valid candidates for removal.
bool AllDead = true;
- for (Value *Pointer : Pointers)
+ for (const Value *Pointer : Pointers)
if (!DeadStackObjects.count(Pointer)) {
AllDead = false;
break;
@@ -800,7 +798,8 @@ static bool handleEndBlock(BasicBlock &BB, AliasAnalysis *AA,
LLVM_DEBUG(dbgs() << "DSE: Dead Store at End of Block:\n DEAD: "
<< *Dead << "\n Objects: ";
- for (SmallVectorImpl<Value *>::iterator I = Pointers.begin(),
+ for (SmallVectorImpl<const Value *>::iterator I =
+ Pointers.begin(),
E = Pointers.end();
I != E; ++I) {
dbgs() << **I;
@@ -810,7 +809,8 @@ static bool handleEndBlock(BasicBlock &BB, AliasAnalysis *AA,
<< '\n');
// DCE instructions only used to calculate that store.
- deleteDeadInstruction(Dead, &BBI, *MD, *TLI, IOL, InstrOrdering, &DeadStackObjects);
+ deleteDeadInstruction(Dead, &BBI, *MD, *TLI, IOL, OBB,
+ &DeadStackObjects);
++NumFastStores;
MadeChange = true;
continue;
@@ -821,7 +821,8 @@ static bool handleEndBlock(BasicBlock &BB, AliasAnalysis *AA,
if (isInstructionTriviallyDead(&*BBI, TLI)) {
LLVM_DEBUG(dbgs() << "DSE: Removing trivially dead instruction:\n DEAD: "
<< *&*BBI << '\n');
- deleteDeadInstruction(&*BBI, &BBI, *MD, *TLI, IOL, InstrOrdering, &DeadStackObjects);
+ deleteDeadInstruction(&*BBI, &BBI, *MD, *TLI, IOL, OBB,
+ &DeadStackObjects);
++NumFastOther;
MadeChange = true;
continue;
@@ -847,7 +848,7 @@ static bool handleEndBlock(BasicBlock &BB, AliasAnalysis *AA,
// If the call might load from any of our allocas, then any store above
// the call is live.
- DeadStackObjects.remove_if([&](Value *I) {
+ DeadStackObjects.remove_if([&](const Value *I) {
// See if the call site touches the value.
return isRefSet(AA->getModRefInfo(
Call, I, getPointerSize(I, DL, *TLI, BB.getParent())));
@@ -946,7 +947,9 @@ static bool tryToShorten(Instruction *EarlierWrite, int64_t &EarlierOffset,
Value *Indices[1] = {
ConstantInt::get(EarlierWriteLength->getType(), OffsetMoved)};
GetElementPtrInst *NewDestGEP = GetElementPtrInst::CreateInBounds(
+ EarlierIntrinsic->getRawDest()->getType()->getPointerElementType(),
EarlierIntrinsic->getRawDest(), Indices, "", EarlierWrite);
+ NewDestGEP->setDebugLoc(EarlierIntrinsic->getDebugLoc());
EarlierIntrinsic->setDest(NewDestGEP);
EarlierOffset = EarlierOffset + OffsetMoved;
}
@@ -1025,7 +1028,7 @@ static bool eliminateNoopStore(Instruction *Inst, BasicBlock::iterator &BBI,
const DataLayout &DL,
const TargetLibraryInfo *TLI,
InstOverlapIntervalsTy &IOL,
- DenseMap<Instruction*, size_t> *InstrOrdering) {
+ OrderedBasicBlock &OBB) {
// Must be a store instruction.
StoreInst *SI = dyn_cast<StoreInst>(Inst);
if (!SI)
@@ -1041,7 +1044,7 @@ static bool eliminateNoopStore(Instruction *Inst, BasicBlock::iterator &BBI,
dbgs() << "DSE: Remove Store Of Load from same pointer:\n LOAD: "
<< *DepLoad << "\n STORE: " << *SI << '\n');
- deleteDeadInstruction(SI, &BBI, *MD, *TLI, IOL, InstrOrdering);
+ deleteDeadInstruction(SI, &BBI, *MD, *TLI, IOL, OBB);
++NumRedundantStores;
return true;
}
@@ -1059,7 +1062,7 @@ static bool eliminateNoopStore(Instruction *Inst, BasicBlock::iterator &BBI,
dbgs() << "DSE: Remove null store to the calloc'ed object:\n DEAD: "
<< *Inst << "\n OBJECT: " << *UnderlyingPointer << '\n');
- deleteDeadInstruction(SI, &BBI, *MD, *TLI, IOL, InstrOrdering);
+ deleteDeadInstruction(SI, &BBI, *MD, *TLI, IOL, OBB);
++NumRedundantStores;
return true;
}
@@ -1073,11 +1076,8 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
const DataLayout &DL = BB.getModule()->getDataLayout();
bool MadeChange = false;
- // FIXME: Maybe change this to use some abstraction like OrderedBasicBlock?
- // The current OrderedBasicBlock can't deal with mutation at the moment.
- size_t LastThrowingInstIndex = 0;
- DenseMap<Instruction*, size_t> InstrOrdering;
- size_t InstrIndex = 1;
+ OrderedBasicBlock OBB(&BB);
+ Instruction *LastThrowing = nullptr;
// A map of interval maps representing partially-overwritten value parts.
InstOverlapIntervalsTy IOL;
@@ -1086,7 +1086,7 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
for (BasicBlock::iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE; ) {
// Handle 'free' calls specially.
if (CallInst *F = isFreeCall(&*BBI, TLI)) {
- MadeChange |= handleFree(F, AA, MD, DT, TLI, IOL, &InstrOrdering);
+ MadeChange |= handleFree(F, AA, MD, DT, TLI, IOL, OBB);
// Increment BBI after handleFree has potentially deleted instructions.
// This ensures we maintain a valid iterator.
++BBI;
@@ -1095,10 +1095,8 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
Instruction *Inst = &*BBI++;
- size_t CurInstNumber = InstrIndex++;
- InstrOrdering.insert(std::make_pair(Inst, CurInstNumber));
if (Inst->mayThrow()) {
- LastThrowingInstIndex = CurInstNumber;
+ LastThrowing = Inst;
continue;
}
@@ -1107,13 +1105,13 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
continue;
// eliminateNoopStore will update in iterator, if necessary.
- if (eliminateNoopStore(Inst, BBI, AA, MD, DL, TLI, IOL, &InstrOrdering)) {
+ if (eliminateNoopStore(Inst, BBI, AA, MD, DL, TLI, IOL, OBB)) {
MadeChange = true;
continue;
}
// If we find something that writes memory, get its memory dependence.
- MemDepResult InstDep = MD->getDependency(Inst);
+ MemDepResult InstDep = MD->getDependency(Inst, &OBB);
// Ignore any store where we can't find a local dependence.
// FIXME: cross-block DSE would be fun. :)
@@ -1158,9 +1156,7 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
// If the underlying object is a non-escaping memory allocation, any store
// to it is dead along the unwind edge. Otherwise, we need to preserve
// the store.
- size_t DepIndex = InstrOrdering.lookup(DepWrite);
- assert(DepIndex && "Unexpected instruction");
- if (DepIndex <= LastThrowingInstIndex) {
+ if (LastThrowing && OBB.dominates(DepWrite, LastThrowing)) {
const Value* Underlying = GetUnderlyingObject(DepLoc.Ptr, DL);
bool IsStoreDeadOnUnwind = isa<AllocaInst>(Underlying);
if (!IsStoreDeadOnUnwind) {
@@ -1191,12 +1187,12 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
<< "\n KILLER: " << *Inst << '\n');
// Delete the store and now-dead instructions that feed it.
- deleteDeadInstruction(DepWrite, &BBI, *MD, *TLI, IOL, &InstrOrdering);
+ deleteDeadInstruction(DepWrite, &BBI, *MD, *TLI, IOL, OBB);
++NumFastStores;
MadeChange = true;
// We erased DepWrite; start over.
- InstDep = MD->getDependency(Inst);
+ InstDep = MD->getDependency(Inst, &OBB);
continue;
} else if ((OR == OW_End && isShortenableAtTheEnd(DepWrite)) ||
((OR == OW_Begin &&
@@ -1215,12 +1211,17 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
auto *Earlier = dyn_cast<StoreInst>(DepWrite);
auto *Later = dyn_cast<StoreInst>(Inst);
if (Earlier && isa<ConstantInt>(Earlier->getValueOperand()) &&
+ DL.typeSizeEqualsStoreSize(
+ Earlier->getValueOperand()->getType()) &&
Later && isa<ConstantInt>(Later->getValueOperand()) &&
+ DL.typeSizeEqualsStoreSize(
+ Later->getValueOperand()->getType()) &&
memoryIsNotModifiedBetween(Earlier, Later, AA)) {
// If the store we find is:
// a) partially overwritten by the store to 'Loc'
// b) the later store is fully contained in the earlier one and
// c) they both have a constant value
+ // d) none of the two stores need padding
// Merge the two stores, replacing the earlier store's value with a
// merge of both values.
// TODO: Deal with other constant types (vectors, etc), and probably
@@ -1264,14 +1265,11 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
++NumModifiedStores;
// Remove earlier, wider, store
- size_t Idx = InstrOrdering.lookup(DepWrite);
- InstrOrdering.erase(DepWrite);
- InstrOrdering.insert(std::make_pair(SI, Idx));
+ OBB.replaceInstruction(DepWrite, SI);
// Delete the old stores and now-dead instructions that feed them.
- deleteDeadInstruction(Inst, &BBI, *MD, *TLI, IOL, &InstrOrdering);
- deleteDeadInstruction(DepWrite, &BBI, *MD, *TLI, IOL,
- &InstrOrdering);
+ deleteDeadInstruction(Inst, &BBI, *MD, *TLI, IOL, OBB);
+ deleteDeadInstruction(DepWrite, &BBI, *MD, *TLI, IOL, OBB);
MadeChange = true;
// We erased DepWrite and Inst (Loc); start over.
@@ -1306,7 +1304,7 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
// If this block ends in a return, unwind, or unreachable, all allocas are
// dead at its end, which means stores to them are also dead.
if (BB.getTerminator()->getNumSuccessors() == 0)
- MadeChange |= handleEndBlock(BB, AA, MD, TLI, IOL, &InstrOrdering);
+ MadeChange |= handleEndBlock(BB, AA, MD, TLI, IOL, OBB);
return MadeChange;
}
diff --git a/lib/Transforms/Scalar/DivRemPairs.cpp b/lib/Transforms/Scalar/DivRemPairs.cpp
index ffcf34f1cf7a..876681b4f9de 100644
--- a/lib/Transforms/Scalar/DivRemPairs.cpp
+++ b/lib/Transforms/Scalar/DivRemPairs.cpp
@@ -1,9 +1,8 @@
//===- DivRemPairs.cpp - Hoist/decompose division and remainder -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp
index 1f09979b3382..f1f075257020 100644
--- a/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -1,9 +1,8 @@
//===- EarlyCSE.cpp - Simple and fast CSE pass ----------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -76,6 +75,16 @@ STATISTIC(NumDSE, "Number of trivial dead stores removed");
DEBUG_COUNTER(CSECounter, "early-cse",
"Controls which instructions are removed");
+static cl::opt<unsigned> EarlyCSEMssaOptCap(
+ "earlycse-mssa-optimization-cap", cl::init(500), cl::Hidden,
+ cl::desc("Enable imprecision in EarlyCSE in pathological cases, in exchange "
+ "for faster compile. Caps the MemorySSA clobbering calls."));
+
+static cl::opt<bool> EarlyCSEDebugHash(
+ "earlycse-debug-hash", cl::init(false), cl::Hidden,
+ cl::desc("Perform extra assertion checking to verify that SimpleValue's hash "
+ "function is well-behaved w.r.t. its isEqual predicate"));
+
//===----------------------------------------------------------------------===//
// SimpleValue
//===----------------------------------------------------------------------===//
@@ -126,7 +135,33 @@ template <> struct DenseMapInfo<SimpleValue> {
} // end namespace llvm
-unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
+/// Match a 'select' including an optional 'not's of the condition.
+static bool matchSelectWithOptionalNotCond(Value *V, Value *&Cond, Value *&A,
+ Value *&B,
+ SelectPatternFlavor &Flavor) {
+ // Return false if V is not even a select.
+ if (!match(V, m_Select(m_Value(Cond), m_Value(A), m_Value(B))))
+ return false;
+
+ // Look through a 'not' of the condition operand by swapping A/B.
+ Value *CondNot;
+ if (match(Cond, m_Not(m_Value(CondNot)))) {
+ Cond = CondNot;
+ std::swap(A, B);
+ }
+
+ // Set flavor if we find a match, or set it to unknown otherwise; in
+ // either case, return true to indicate that this is a select we can
+ // process.
+ if (auto *CmpI = dyn_cast<ICmpInst>(Cond))
+ Flavor = matchDecomposedSelectPattern(CmpI, A, B, A, B).Flavor;
+ else
+ Flavor = SPF_UNKNOWN;
+
+ return true;
+}
+
+static unsigned getHashValueImpl(SimpleValue Val) {
Instruction *Inst = Val.Inst;
// Hash in all of the operands as pointers.
if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst)) {
@@ -139,32 +174,56 @@ unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
}
if (CmpInst *CI = dyn_cast<CmpInst>(Inst)) {
+ // Compares can be commuted by swapping the comparands and
+ // updating the predicate. Choose the form that has the
+ // comparands in sorted order, or in the case of a tie, the
+ // one with the lower predicate.
Value *LHS = CI->getOperand(0);
Value *RHS = CI->getOperand(1);
CmpInst::Predicate Pred = CI->getPredicate();
- if (Inst->getOperand(0) > Inst->getOperand(1)) {
+ CmpInst::Predicate SwappedPred = CI->getSwappedPredicate();
+ if (std::tie(LHS, Pred) > std::tie(RHS, SwappedPred)) {
std::swap(LHS, RHS);
- Pred = CI->getSwappedPredicate();
+ Pred = SwappedPred;
}
return hash_combine(Inst->getOpcode(), Pred, LHS, RHS);
}
- // Hash min/max/abs (cmp + select) to allow for commuted operands.
- // Min/max may also have non-canonical compare predicate (eg, the compare for
- // smin may use 'sgt' rather than 'slt'), and non-canonical operands in the
- // compare.
- Value *A, *B;
- SelectPatternFlavor SPF = matchSelectPattern(Inst, A, B).Flavor;
- // TODO: We should also detect FP min/max.
- if (SPF == SPF_SMIN || SPF == SPF_SMAX ||
- SPF == SPF_UMIN || SPF == SPF_UMAX) {
- if (A > B)
+ // Hash general selects to allow matching commuted true/false operands.
+ SelectPatternFlavor SPF;
+ Value *Cond, *A, *B;
+ if (matchSelectWithOptionalNotCond(Inst, Cond, A, B, SPF)) {
+ // Hash min/max/abs (cmp + select) to allow for commuted operands.
+ // Min/max may also have non-canonical compare predicate (eg, the compare for
+ // smin may use 'sgt' rather than 'slt'), and non-canonical operands in the
+ // compare.
+ // TODO: We should also detect FP min/max.
+ if (SPF == SPF_SMIN || SPF == SPF_SMAX ||
+ SPF == SPF_UMIN || SPF == SPF_UMAX) {
+ if (A > B)
+ std::swap(A, B);
+ return hash_combine(Inst->getOpcode(), SPF, A, B);
+ }
+ if (SPF == SPF_ABS || SPF == SPF_NABS) {
+ // ABS/NABS always puts the input in A and its negation in B.
+ return hash_combine(Inst->getOpcode(), SPF, A, B);
+ }
+
+ // Hash general selects to allow matching commuted true/false operands.
+
+ // If we do not have a compare as the condition, just hash in the condition.
+ CmpInst::Predicate Pred;
+ Value *X, *Y;
+ if (!match(Cond, m_Cmp(Pred, m_Value(X), m_Value(Y))))
+ return hash_combine(Inst->getOpcode(), Cond, A, B);
+
+ // Similar to cmp normalization (above) - canonicalize the predicate value:
+ // select (icmp Pred, X, Y), A, B --> select (icmp InvPred, X, Y), B, A
+ if (CmpInst::getInversePredicate(Pred) < Pred) {
+ Pred = CmpInst::getInversePredicate(Pred);
std::swap(A, B);
- return hash_combine(Inst->getOpcode(), SPF, A, B);
- }
- if (SPF == SPF_ABS || SPF == SPF_NABS) {
- // ABS/NABS always puts the input in A and its negation in B.
- return hash_combine(Inst->getOpcode(), SPF, A, B);
+ }
+ return hash_combine(Inst->getOpcode(), Pred, X, Y, A, B);
}
if (CastInst *CI = dyn_cast<CastInst>(Inst))
@@ -179,8 +238,7 @@ unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
IVI->getOperand(1),
hash_combine_range(IVI->idx_begin(), IVI->idx_end()));
- assert((isa<CallInst>(Inst) || isa<BinaryOperator>(Inst) ||
- isa<GetElementPtrInst>(Inst) || isa<SelectInst>(Inst) ||
+ assert((isa<CallInst>(Inst) || isa<GetElementPtrInst>(Inst) ||
isa<ExtractElementInst>(Inst) || isa<InsertElementInst>(Inst) ||
isa<ShuffleVectorInst>(Inst)) &&
"Invalid/unknown instruction");
@@ -191,7 +249,19 @@ unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
hash_combine_range(Inst->value_op_begin(), Inst->value_op_end()));
}
-bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) {
+unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
+#ifndef NDEBUG
+ // If -earlycse-debug-hash was specified, return a constant -- this
+ // will force all hashing to collide, so we'll exhaustively search
+ // the table for a match, and the assertion in isEqual will fire if
+ // there's a bug causing equal keys to hash differently.
+ if (EarlyCSEDebugHash)
+ return 0;
+#endif
+ return getHashValueImpl(Val);
+}
+
+static bool isEqualImpl(SimpleValue LHS, SimpleValue RHS) {
Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
if (LHS.isSentinel() || RHS.isSentinel())
@@ -227,26 +297,68 @@ bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) {
// Min/max/abs can occur with commuted operands, non-canonical predicates,
// and/or non-canonical operands.
- Value *LHSA, *LHSB;
- SelectPatternFlavor LSPF = matchSelectPattern(LHSI, LHSA, LHSB).Flavor;
- // TODO: We should also detect FP min/max.
- if (LSPF == SPF_SMIN || LSPF == SPF_SMAX ||
- LSPF == SPF_UMIN || LSPF == SPF_UMAX ||
- LSPF == SPF_ABS || LSPF == SPF_NABS) {
- Value *RHSA, *RHSB;
- SelectPatternFlavor RSPF = matchSelectPattern(RHSI, RHSA, RHSB).Flavor;
+ // Selects can be non-trivially equivalent via inverted conditions and swaps.
+ SelectPatternFlavor LSPF, RSPF;
+ Value *CondL, *CondR, *LHSA, *RHSA, *LHSB, *RHSB;
+ if (matchSelectWithOptionalNotCond(LHSI, CondL, LHSA, LHSB, LSPF) &&
+ matchSelectWithOptionalNotCond(RHSI, CondR, RHSA, RHSB, RSPF)) {
if (LSPF == RSPF) {
- // Abs results are placed in a defined order by matchSelectPattern.
- if (LSPF == SPF_ABS || LSPF == SPF_NABS)
+ // TODO: We should also detect FP min/max.
+ if (LSPF == SPF_SMIN || LSPF == SPF_SMAX ||
+ LSPF == SPF_UMIN || LSPF == SPF_UMAX)
+ return ((LHSA == RHSA && LHSB == RHSB) ||
+ (LHSA == RHSB && LHSB == RHSA));
+
+ if (LSPF == SPF_ABS || LSPF == SPF_NABS) {
+ // Abs results are placed in a defined order by matchSelectPattern.
return LHSA == RHSA && LHSB == RHSB;
- return ((LHSA == RHSA && LHSB == RHSB) ||
- (LHSA == RHSB && LHSB == RHSA));
+ }
+
+ // select Cond, A, B <--> select not(Cond), B, A
+ if (CondL == CondR && LHSA == RHSA && LHSB == RHSB)
+ return true;
+ }
+
+ // If the true/false operands are swapped and the conditions are compares
+ // with inverted predicates, the selects are equal:
+ // select (icmp Pred, X, Y), A, B <--> select (icmp InvPred, X, Y), B, A
+ //
+ // This also handles patterns with a double-negation in the sense of not +
+ // inverse, because we looked through a 'not' in the matching function and
+ // swapped A/B:
+ // select (cmp Pred, X, Y), A, B <--> select (not (cmp InvPred, X, Y)), B, A
+ //
+ // This intentionally does NOT handle patterns with a double-negation in
+ // the sense of not + not, because doing so could result in values
+ // comparing
+ // as equal that hash differently in the min/max/abs cases like:
+ // select (cmp slt, X, Y), X, Y <--> select (not (not (cmp slt, X, Y))), X, Y
+ // ^ hashes as min ^ would not hash as min
+ // In the context of the EarlyCSE pass, however, such cases never reach
+ // this code, as we simplify the double-negation before hashing the second
+ // select (and so still succeed at CSEing them).
+ if (LHSA == RHSB && LHSB == RHSA) {
+ CmpInst::Predicate PredL, PredR;
+ Value *X, *Y;
+ if (match(CondL, m_Cmp(PredL, m_Value(X), m_Value(Y))) &&
+ match(CondR, m_Cmp(PredR, m_Specific(X), m_Specific(Y))) &&
+ CmpInst::getInversePredicate(PredL) == PredR)
+ return true;
}
}
return false;
}
+bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) {
+ // These comparisons are nontrivial, so assert that equality implies
+ // hash equality (DenseMap demands this as an invariant).
+ bool Result = isEqualImpl(LHS, RHS);
+ assert(!Result || (LHS.isSentinel() && LHS.Inst == RHS.Inst) ||
+ getHashValueImpl(LHS) == getHashValueImpl(RHS));
+ return Result;
+}
+
//===----------------------------------------------------------------------===//
// CallValue
//===----------------------------------------------------------------------===//
@@ -419,6 +531,7 @@ public:
bool run();
private:
+ unsigned ClobberCounter = 0;
// Almost a POD, but needs to call the constructors for the scoped hash
// tables so that a new scope gets pushed on. These are RAII so that the
// scope gets popped when the NodeScope is destroyed.
@@ -608,36 +721,11 @@ private:
MSSA->verifyMemorySSA();
// Removing a store here can leave MemorySSA in an unoptimized state by
// creating MemoryPhis that have identical arguments and by creating
- // MemoryUses whose defining access is not an actual clobber. We handle the
- // phi case eagerly here. The non-optimized MemoryUse case is lazily
- // updated by MemorySSA getClobberingMemoryAccess.
- if (MemoryAccess *MA = MSSA->getMemoryAccess(Inst)) {
- // Optimize MemoryPhi nodes that may become redundant by having all the
- // same input values once MA is removed.
- SmallSetVector<MemoryPhi *, 4> PhisToCheck;
- SmallVector<MemoryAccess *, 8> WorkQueue;
- WorkQueue.push_back(MA);
- // Process MemoryPhi nodes in FIFO order using a ever-growing vector since
- // we shouldn't be processing that many phis and this will avoid an
- // allocation in almost all cases.
- for (unsigned I = 0; I < WorkQueue.size(); ++I) {
- MemoryAccess *WI = WorkQueue[I];
-
- for (auto *U : WI->users())
- if (MemoryPhi *MP = dyn_cast<MemoryPhi>(U))
- PhisToCheck.insert(MP);
-
- MSSAUpdater->removeMemoryAccess(WI);
-
- for (MemoryPhi *MP : PhisToCheck) {
- MemoryAccess *FirstIn = MP->getIncomingValue(0);
- if (llvm::all_of(MP->incoming_values(),
- [=](Use &In) { return In == FirstIn; }))
- WorkQueue.push_back(MP);
- }
- PhisToCheck.clear();
- }
- }
+ // MemoryUses whose defining access is not an actual clobber. The phi case
+ // is handled by MemorySSA when passing OptimizePhis = true to
+ // removeMemoryAccess. The non-optimized MemoryUse case is lazily updated
+ // by MemorySSA's getClobberingMemoryAccess.
+ MSSAUpdater->removeMemoryAccess(Inst, true);
}
};
@@ -688,8 +776,13 @@ bool EarlyCSE::isSameMemGeneration(unsigned EarlierGeneration,
// LaterInst, if LaterDef dominates EarlierInst then it can't occur between
// EarlierInst and LaterInst and neither can any other write that potentially
// clobbers LaterInst.
- MemoryAccess *LaterDef =
- MSSA->getWalker()->getClobberingMemoryAccess(LaterInst);
+ MemoryAccess *LaterDef;
+ if (ClobberCounter < EarlyCSEMssaOptCap) {
+ LaterDef = MSSA->getWalker()->getClobberingMemoryAccess(LaterInst);
+ ClobberCounter++;
+ } else
+ LaterDef = LaterMA->getDefiningAccess();
+
return MSSA->dominates(LaterDef, EarlierMA);
}
@@ -1117,7 +1210,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
// At the moment, we don't remove ordered stores, but do remove
// unordered atomic stores. There's no special requirement (for
// unordered atomics) about removing atomic stores only in favor of
- // other atomic stores since we we're going to execute the non-atomic
+ // other atomic stores since we were going to execute the non-atomic
// one anyway and the atomic one might never have become visible.
if (LastStore) {
ParseMemoryInst LastStoreMemInst(LastStore, TTI);
@@ -1184,8 +1277,7 @@ bool EarlyCSE::run() {
CurrentGeneration, DT.getRootNode(),
DT.getRootNode()->begin(), DT.getRootNode()->end()));
- // Save the current generation.
- unsigned LiveOutGeneration = CurrentGeneration;
+ assert(!CurrentGeneration && "Create a new EarlyCSE instance to rerun it.");
// Process the stack.
while (!nodesToProcess.empty()) {
@@ -1217,9 +1309,6 @@ bool EarlyCSE::run() {
}
} // while (!nodes...)
- // Reset the current generation.
- CurrentGeneration = LiveOutGeneration;
-
return Changed;
}
diff --git a/lib/Transforms/Scalar/FlattenCFGPass.cpp b/lib/Transforms/Scalar/FlattenCFGPass.cpp
index 117b19fb8a42..31670b1464e4 100644
--- a/lib/Transforms/Scalar/FlattenCFGPass.cpp
+++ b/lib/Transforms/Scalar/FlattenCFGPass.cpp
@@ -1,9 +1,8 @@
//===- FlattenCFGPass.cpp - CFG Flatten Pass ----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/Scalar/Float2Int.cpp b/lib/Transforms/Scalar/Float2Int.cpp
index f2828e80bc58..4f83e869b303 100644
--- a/lib/Transforms/Scalar/Float2Int.cpp
+++ b/lib/Transforms/Scalar/Float2Int.cpp
@@ -1,9 +1,8 @@
//===- Float2Int.cpp - Demote floating point ops to work on integers ------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -148,10 +147,10 @@ void Float2IntPass::seen(Instruction *I, ConstantRange R) {
// Helper - get a range representing a poison value.
ConstantRange Float2IntPass::badRange() {
- return ConstantRange(MaxIntegerBW + 1, true);
+ return ConstantRange::getFull(MaxIntegerBW + 1);
}
ConstantRange Float2IntPass::unknownRange() {
- return ConstantRange(MaxIntegerBW + 1, false);
+ return ConstantRange::getEmpty(MaxIntegerBW + 1);
}
ConstantRange Float2IntPass::validateRange(ConstantRange R) {
if (R.getBitWidth() > MaxIntegerBW + 1)
@@ -195,12 +194,13 @@ void Float2IntPass::walkBackwards(const SmallPtrSetImpl<Instruction*> &Roots) {
// Path terminated cleanly - use the type of the integer input to seed
// the analysis.
unsigned BW = I->getOperand(0)->getType()->getPrimitiveSizeInBits();
- auto Input = ConstantRange(BW, true);
+ auto Input = ConstantRange::getFull(BW);
auto CastOp = (Instruction::CastOps)I->getOpcode();
seen(I, validateRange(Input.castOp(CastOp, MaxIntegerBW+1)));
continue;
}
+ case Instruction::FNeg:
case Instruction::FAdd:
case Instruction::FSub:
case Instruction::FMul:
@@ -241,6 +241,15 @@ void Float2IntPass::walkForwards() {
case Instruction::SIToFP:
llvm_unreachable("Should have been handled in walkForwards!");
+ case Instruction::FNeg:
+ Op = [](ArrayRef<ConstantRange> Ops) {
+ assert(Ops.size() == 1 && "FNeg is a unary operator!");
+ unsigned Size = Ops[0].getBitWidth();
+ auto Zero = ConstantRange(APInt::getNullValue(Size));
+ return Zero.sub(Ops[0]);
+ };
+ break;
+
case Instruction::FAdd:
case Instruction::FSub:
case Instruction::FMul:
@@ -427,7 +436,7 @@ Value *Float2IntPass::convert(Instruction *I, Type *ToTy) {
} else if (Instruction *VI = dyn_cast<Instruction>(V)) {
NewOperands.push_back(convert(VI, ToTy));
} else if (ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
- APSInt Val(ToTy->getPrimitiveSizeInBits(), /*IsUnsigned=*/false);
+ APSInt Val(ToTy->getPrimitiveSizeInBits(), /*isUnsigned=*/false);
bool Exact;
CF->getValueAPF().convertToInteger(Val,
APFloat::rmNearestTiesToEven,
@@ -467,6 +476,10 @@ Value *Float2IntPass::convert(Instruction *I, Type *ToTy) {
NewV = IRB.CreateSExtOrTrunc(NewOperands[0], ToTy);
break;
+ case Instruction::FNeg:
+ NewV = IRB.CreateNeg(NewOperands[0], I->getName());
+ break;
+
case Instruction::FAdd:
case Instruction::FSub:
case Instruction::FMul:
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 9861948c8297..1a02e9d33f49 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -1,9 +1,8 @@
//===- GVN.cpp - Eliminate redundant values and loads ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -30,6 +29,7 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
@@ -46,8 +46,8 @@
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
-#include "llvm/IR/DomTreeUpdater.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstrTypes.h"
@@ -330,36 +330,15 @@ GVN::Expression GVN::ValueTable::createExtractvalueExpr(ExtractValueInst *EI) {
e.type = EI->getType();
e.opcode = 0;
- IntrinsicInst *I = dyn_cast<IntrinsicInst>(EI->getAggregateOperand());
- if (I != nullptr && EI->getNumIndices() == 1 && *EI->idx_begin() == 0 ) {
- // EI might be an extract from one of our recognised intrinsics. If it
- // is we'll synthesize a semantically equivalent expression instead on
- // an extract value expression.
- switch (I->getIntrinsicID()) {
- case Intrinsic::sadd_with_overflow:
- case Intrinsic::uadd_with_overflow:
- e.opcode = Instruction::Add;
- break;
- case Intrinsic::ssub_with_overflow:
- case Intrinsic::usub_with_overflow:
- e.opcode = Instruction::Sub;
- break;
- case Intrinsic::smul_with_overflow:
- case Intrinsic::umul_with_overflow:
- e.opcode = Instruction::Mul;
- break;
- default:
- break;
- }
-
- if (e.opcode != 0) {
- // Intrinsic recognized. Grab its args to finish building the expression.
- assert(I->getNumArgOperands() == 2 &&
- "Expect two args for recognised intrinsics.");
- e.varargs.push_back(lookupOrAdd(I->getArgOperand(0)));
- e.varargs.push_back(lookupOrAdd(I->getArgOperand(1)));
- return e;
- }
+ WithOverflowInst *WO = dyn_cast<WithOverflowInst>(EI->getAggregateOperand());
+ if (WO != nullptr && EI->getNumIndices() == 1 && *EI->idx_begin() == 0) {
+ // EI is an extract from one of our with.overflow intrinsics. Synthesize
+ // a semantically equivalent expression instead of an extract value
+ // expression.
+ e.opcode = WO->getBinaryOp();
+ e.varargs.push_back(lookupOrAdd(WO->getLHS()));
+ e.varargs.push_back(lookupOrAdd(WO->getRHS()));
+ return e;
}
// Not a recognised intrinsic. Fall back to producing an extract value
@@ -513,6 +492,7 @@ uint32_t GVN::ValueTable::lookupOrAdd(Value *V) {
switch (I->getOpcode()) {
case Instruction::Call:
return lookupOrAddCall(cast<CallInst>(I));
+ case Instruction::FNeg:
case Instruction::Add:
case Instruction::FAdd:
case Instruction::Sub:
@@ -544,6 +524,7 @@ uint32_t GVN::ValueTable::lookupOrAdd(Value *V) {
case Instruction::FPExt:
case Instruction::PtrToInt:
case Instruction::IntToPtr:
+ case Instruction::AddrSpaceCast:
case Instruction::BitCast:
case Instruction::Select:
case Instruction::ExtractElement:
@@ -879,11 +860,12 @@ bool GVN::AnalyzeLoadAvailability(LoadInst *LI, MemDepResult DepInfo,
const DataLayout &DL = LI->getModule()->getDataLayout();
+ Instruction *DepInst = DepInfo.getInst();
if (DepInfo.isClobber()) {
// If the dependence is to a store that writes to a superset of the bits
// read by the load, we can extract the bits we need for the load from the
// stored value.
- if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInfo.getInst())) {
+ if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) {
// Can't forward from non-atomic to atomic without violating memory model.
if (Address && LI->isAtomic() <= DepSI->isAtomic()) {
int Offset =
@@ -899,7 +881,7 @@ bool GVN::AnalyzeLoadAvailability(LoadInst *LI, MemDepResult DepInfo,
// load i32* P
// load i8* (P+1)
// if we have this, replace the later with an extraction from the former.
- if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInfo.getInst())) {
+ if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInst)) {
// If this is a clobber and L is the first instruction in its block, then
// we have the first instruction in the entry block.
// Can't forward from non-atomic to atomic without violating memory model.
@@ -916,7 +898,7 @@ bool GVN::AnalyzeLoadAvailability(LoadInst *LI, MemDepResult DepInfo,
// If the clobbering value is a memset/memcpy/memmove, see if we can
// forward a value on from it.
- if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInfo.getInst())) {
+ if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInst)) {
if (Address && !LI->isAtomic()) {
int Offset = analyzeLoadFromClobberingMemInst(LI->getType(), Address,
DepMI, DL);
@@ -930,8 +912,7 @@ bool GVN::AnalyzeLoadAvailability(LoadInst *LI, MemDepResult DepInfo,
LLVM_DEBUG(
// fast print dep, using operator<< on instruction is too slow.
dbgs() << "GVN: load "; LI->printAsOperand(dbgs());
- Instruction *I = DepInfo.getInst();
- dbgs() << " is clobbered by " << *I << '\n';);
+ dbgs() << " is clobbered by " << *DepInst << '\n';);
if (ORE->allowExtraAnalysis(DEBUG_TYPE))
reportMayClobberedLoad(LI, DepInfo, DT, ORE);
@@ -939,8 +920,6 @@ bool GVN::AnalyzeLoadAvailability(LoadInst *LI, MemDepResult DepInfo,
}
assert(DepInfo.isDef() && "follows from above");
- Instruction *DepInst = DepInfo.getInst();
-
// Loading the allocation -> undef.
if (isa<AllocaInst>(DepInst) || isMallocLikeFn(DepInst, TLI) ||
// Loading immediately after lifetime begin -> undef.
@@ -959,9 +938,8 @@ bool GVN::AnalyzeLoadAvailability(LoadInst *LI, MemDepResult DepInfo,
// Reject loads and stores that are to the same address but are of
// different types if we have to. If the stored value is larger or equal to
// the loaded value, we can reuse it.
- if (S->getValueOperand()->getType() != LI->getType() &&
- !canCoerceMustAliasedValueToLoad(S->getValueOperand(),
- LI->getType(), DL))
+ if (!canCoerceMustAliasedValueToLoad(S->getValueOperand(), LI->getType(),
+ DL))
return false;
// Can't forward from non-atomic to atomic without violating memory model.
@@ -976,8 +954,7 @@ bool GVN::AnalyzeLoadAvailability(LoadInst *LI, MemDepResult DepInfo,
// If the types mismatch and we can't handle it, reject reuse of the load.
// If the stored value is larger or equal to the loaded value, we can reuse
// it.
- if (LD->getType() != LI->getType() &&
- !canCoerceMustAliasedValueToLoad(LD, LI->getType(), DL))
+ if (!canCoerceMustAliasedValueToLoad(LD, LI->getType(), DL))
return false;
// Can't forward from non-atomic to atomic without violating memory model.
@@ -1132,6 +1109,14 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
return false;
}
+ // FIXME: Can we support the fallthrough edge?
+ if (isa<CallBrInst>(Pred->getTerminator())) {
+ LLVM_DEBUG(
+ dbgs() << "COULD NOT PRE LOAD BECAUSE OF CALLBR CRITICAL EDGE '"
+ << Pred->getName() << "': " << *LI << '\n');
+ return false;
+ }
+
if (LoadBB->isEHPad()) {
LLVM_DEBUG(
dbgs() << "COULD NOT PRE LOAD BECAUSE OF AN EH PAD CRITICAL EDGE '"
@@ -1220,9 +1205,8 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
// Instructions that have been inserted in predecessor(s) to materialize
// the load address do not retain their original debug locations. Doing
// so could lead to confusing (but correct) source attributions.
- // FIXME: How do we retain source locations without causing poor debugging
- // behavior?
- I->setDebugLoc(DebugLoc());
+ if (const DebugLoc &DL = I->getDebugLoc())
+ I->setDebugLoc(DebugLoc::get(0, 0, DL.getScope(), DL.getInlinedAt()));
// FIXME: We really _ought_ to insert these value numbers into their
// parent's availability map. However, in doing so, we risk getting into
@@ -1235,10 +1219,10 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
BasicBlock *UnavailablePred = PredLoad.first;
Value *LoadPtr = PredLoad.second;
- auto *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre",
- LI->isVolatile(), LI->getAlignment(),
- LI->getOrdering(), LI->getSyncScopeID(),
- UnavailablePred->getTerminator());
+ auto *NewLoad =
+ new LoadInst(LI->getType(), LoadPtr, LI->getName() + ".pre",
+ LI->isVolatile(), LI->getAlignment(), LI->getOrdering(),
+ LI->getSyncScopeID(), UnavailablePred->getTerminator());
NewLoad->setDebugLoc(LI->getDebugLoc());
// Transfer the old load's AA tags to the new load.
@@ -2168,8 +2152,8 @@ bool GVN::performScalarPRE(Instruction *CurInst) {
return false;
// We don't currently value number ANY inline asm calls.
- if (CallInst *CallI = dyn_cast<CallInst>(CurInst))
- if (CallI->isInlineAsm())
+ if (auto *CallB = dyn_cast<CallBase>(CurInst))
+ if (CallB->isInlineAsm())
return false;
uint32_t ValNo = VN.lookup(CurInst);
@@ -2252,6 +2236,11 @@ bool GVN::performScalarPRE(Instruction *CurInst) {
if (isa<IndirectBrInst>(PREPred->getTerminator()))
return false;
+ // Don't do PRE across callbr.
+ // FIXME: Can we do this across the fallthrough edge?
+ if (isa<CallBrInst>(PREPred->getTerminator()))
+ return false;
+
// We can't do PRE safely on a critical edge, so instead we schedule
// the edge to be split and perform the PRE the next time we iterate
// on the function.
@@ -2479,8 +2468,7 @@ void GVN::addDeadBlock(BasicBlock *BB) {
for (BasicBlock::iterator II = B->begin(); isa<PHINode>(II); ++II) {
PHINode &Phi = cast<PHINode>(*II);
- Phi.setIncomingValue(Phi.getBasicBlockIndex(P),
- UndefValue::get(Phi.getType()));
+ Phi.setIncomingValueForBlock(P, UndefValue::get(Phi.getType()));
if (MD)
MD->invalidateCachedPointerInfo(&Phi);
}
diff --git a/lib/Transforms/Scalar/GVNHoist.cpp b/lib/Transforms/Scalar/GVNHoist.cpp
index 76a42d7fe750..7614599653c4 100644
--- a/lib/Transforms/Scalar/GVNHoist.cpp
+++ b/lib/Transforms/Scalar/GVNHoist.cpp
@@ -1,9 +1,8 @@
//===- GVNHoist.cpp - Hoist scalar and load expressions -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -703,7 +702,7 @@ private:
// Vector of PHIs contains PHIs for different instructions.
// Sort the args according to their VNs, such that identical
// instructions are together.
- std::stable_sort(CHIs.begin(), CHIs.end(), cmpVN);
+ llvm::stable_sort(CHIs, cmpVN);
auto TI = BB->getTerminator();
auto B = CHIs.begin();
// [PreIt, PHIIt) form a range of CHIs which have identical VNs.
diff --git a/lib/Transforms/Scalar/GVNSink.cpp b/lib/Transforms/Scalar/GVNSink.cpp
index 1df5f5400c14..054025755c69 100644
--- a/lib/Transforms/Scalar/GVNSink.cpp
+++ b/lib/Transforms/Scalar/GVNSink.cpp
@@ -1,9 +1,8 @@
//===- GVNSink.cpp - sink expressions into successors ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -442,6 +441,7 @@ public:
break;
case Instruction::Call:
case Instruction::Invoke:
+ case Instruction::FNeg:
case Instruction::Add:
case Instruction::FAdd:
case Instruction::Sub:
@@ -714,6 +714,15 @@ Optional<SinkingInstructionCandidate> GVNSink::analyzeInstructionForSinking(
// FIXME: If any of these fail, we should partition up the candidates to
// try and continue making progress.
Instruction *I0 = NewInsts[0];
+
+ // If all instructions that are going to participate don't have the same
+ // number of operands, we can't do any useful PHI analysis for all operands.
+ auto hasDifferentNumOperands = [&I0](Instruction *I) {
+ return I->getNumOperands() != I0->getNumOperands();
+ };
+ if (any_of(NewInsts, hasDifferentNumOperands))
+ return None;
+
for (unsigned OpNum = 0, E = I0->getNumOperands(); OpNum != E; ++OpNum) {
ModelledPHI PHI(NewInsts, OpNum, ActivePreds);
if (PHI.areAllIncomingValuesSame())
@@ -791,10 +800,7 @@ unsigned GVNSink::sinkBB(BasicBlock *BBEnd) {
--LRI;
}
- std::stable_sort(
- Candidates.begin(), Candidates.end(),
- [](const SinkingInstructionCandidate &A,
- const SinkingInstructionCandidate &B) { return A > B; });
+ llvm::stable_sort(Candidates, std::greater<SinkingInstructionCandidate>());
LLVM_DEBUG(dbgs() << " -- Sinking candidates:\n"; for (auto &C
: Candidates) dbgs()
<< " " << C << "\n";);
diff --git a/lib/Transforms/Scalar/GuardWidening.cpp b/lib/Transforms/Scalar/GuardWidening.cpp
index efc204d4f74b..e14f44bb7069 100644
--- a/lib/Transforms/Scalar/GuardWidening.cpp
+++ b/lib/Transforms/Scalar/GuardWidening.cpp
@@ -1,9 +1,8 @@
//===- GuardWidening.cpp - ---- Guard widening ----------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -83,6 +82,11 @@ static cl::opt<unsigned> FrequentBranchThreshold(
"it is considered frequently taken"),
cl::init(1000));
+static cl::opt<bool>
+ WidenBranchGuards("guard-widening-widen-branch-guards", cl::Hidden,
+ cl::desc("Whether or not we should widen guards "
+ "expressed as branches by widenable conditions"),
+ cl::init(true));
namespace {
@@ -93,6 +97,10 @@ static Value *getCondition(Instruction *I) {
"Bad guard intrinsic?");
return GI->getArgOperand(0);
}
+ if (isGuardAsWidenableBranch(I)) {
+ auto *Cond = cast<BranchInst>(I)->getCondition();
+ return cast<BinaryOperator>(Cond)->getOperand(0);
+ }
return cast<BranchInst>(I)->getCondition();
}
@@ -133,12 +141,12 @@ class GuardWideningImpl {
/// guards.
DenseSet<Instruction *> WidenedGuards;
- /// Try to eliminate guard \p Guard by widening it into an earlier dominating
- /// guard. \p DFSI is the DFS iterator on the dominator tree that is
- /// currently visiting the block containing \p Guard, and \p GuardsPerBlock
+ /// Try to eliminate instruction \p Instr by widening it into an earlier
+ /// dominating guard. \p DFSI is the DFS iterator on the dominator tree that
+ /// is currently visiting the block containing \p Guard, and \p GuardsPerBlock
/// maps BasicBlocks to the set of guards seen in that block.
- bool eliminateGuardViaWidening(
- Instruction *Guard, const df_iterator<DomTreeNode *> &DFSI,
+ bool eliminateInstrViaWidening(
+ Instruction *Instr, const df_iterator<DomTreeNode *> &DFSI,
const DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> &
GuardsPerBlock, bool InvertCondition = false);
@@ -162,28 +170,25 @@ class GuardWideningImpl {
static StringRef scoreTypeToString(WideningScore WS);
- /// Compute the score for widening the condition in \p DominatedGuard
- /// (contained in \p DominatedGuardLoop) into \p DominatingGuard (contained in
- /// \p DominatingGuardLoop). If \p InvertCond is set, then we widen the
+ /// Compute the score for widening the condition in \p DominatedInstr
+ /// into \p DominatingGuard. If \p InvertCond is set, then we widen the
/// inverted condition of the dominating guard.
- WideningScore computeWideningScore(Instruction *DominatedGuard,
- Loop *DominatedGuardLoop,
+ WideningScore computeWideningScore(Instruction *DominatedInstr,
Instruction *DominatingGuard,
- Loop *DominatingGuardLoop,
bool InvertCond);
/// Helper to check if \p V can be hoisted to \p InsertPos.
- bool isAvailableAt(Value *V, Instruction *InsertPos) {
- SmallPtrSet<Instruction *, 8> Visited;
+ bool isAvailableAt(const Value *V, const Instruction *InsertPos) const {
+ SmallPtrSet<const Instruction *, 8> Visited;
return isAvailableAt(V, InsertPos, Visited);
}
- bool isAvailableAt(Value *V, Instruction *InsertPos,
- SmallPtrSetImpl<Instruction *> &Visited);
+ bool isAvailableAt(const Value *V, const Instruction *InsertPos,
+ SmallPtrSetImpl<const Instruction *> &Visited) const;
/// Helper to hoist \p V to \p InsertPos. Guaranteed to succeed if \c
/// isAvailableAt returned true.
- void makeAvailableAt(Value *V, Instruction *InsertPos);
+ void makeAvailableAt(Value *V, Instruction *InsertPos) const;
/// Common helper used by \c widenGuard and \c isWideningCondProfitable. Try
/// to generate an expression computing the logical AND of \p Cond0 and (\p
@@ -200,23 +205,23 @@ class GuardWideningImpl {
/// pre-existing instruction in the IR that computes the result of this range
/// check.
class RangeCheck {
- Value *Base;
- ConstantInt *Offset;
- Value *Length;
+ const Value *Base;
+ const ConstantInt *Offset;
+ const Value *Length;
ICmpInst *CheckInst;
public:
- explicit RangeCheck(Value *Base, ConstantInt *Offset, Value *Length,
- ICmpInst *CheckInst)
+ explicit RangeCheck(const Value *Base, const ConstantInt *Offset,
+ const Value *Length, ICmpInst *CheckInst)
: Base(Base), Offset(Offset), Length(Length), CheckInst(CheckInst) {}
- void setBase(Value *NewBase) { Base = NewBase; }
- void setOffset(ConstantInt *NewOffset) { Offset = NewOffset; }
+ void setBase(const Value *NewBase) { Base = NewBase; }
+ void setOffset(const ConstantInt *NewOffset) { Offset = NewOffset; }
- Value *getBase() const { return Base; }
- ConstantInt *getOffset() const { return Offset; }
+ const Value *getBase() const { return Base; }
+ const ConstantInt *getOffset() const { return Offset; }
const APInt &getOffsetValue() const { return getOffset()->getValue(); }
- Value *getLength() const { return Length; };
+ const Value *getLength() const { return Length; };
ICmpInst *getCheckInst() const { return CheckInst; }
void print(raw_ostream &OS, bool PrintTypes = false) {
@@ -238,19 +243,19 @@ class GuardWideningImpl {
/// append them to \p Checks. Returns true on success, may clobber \c Checks
/// on failure.
bool parseRangeChecks(Value *CheckCond, SmallVectorImpl<RangeCheck> &Checks) {
- SmallPtrSet<Value *, 8> Visited;
+ SmallPtrSet<const Value *, 8> Visited;
return parseRangeChecks(CheckCond, Checks, Visited);
}
bool parseRangeChecks(Value *CheckCond, SmallVectorImpl<RangeCheck> &Checks,
- SmallPtrSetImpl<Value *> &Visited);
+ SmallPtrSetImpl<const Value *> &Visited);
/// Combine the checks in \p Checks into a smaller set of checks and append
/// them into \p CombinedChecks. Return true on success (i.e. all of checks
/// in \p Checks were combined into \p CombinedChecks). Clobbers \p Checks
/// and \p CombinedChecks on success and on failure.
bool combineRangeChecks(SmallVectorImpl<RangeCheck> &Checks,
- SmallVectorImpl<RangeCheck> &CombinedChecks);
+ SmallVectorImpl<RangeCheck> &CombinedChecks) const;
/// Can we compute the logical AND of \p Cond0 and \p Cond1 for the price of
/// computing only one of the two expressions?
@@ -266,8 +271,16 @@ class GuardWideningImpl {
void widenGuard(Instruction *ToWiden, Value *NewCondition,
bool InvertCondition) {
Value *Result;
- widenCondCommon(ToWiden->getOperand(0), NewCondition, ToWiden, Result,
+ widenCondCommon(getCondition(ToWiden), NewCondition, ToWiden, Result,
InvertCondition);
+ Value *WidenableCondition = nullptr;
+ if (isGuardAsWidenableBranch(ToWiden)) {
+ auto *Cond = cast<BranchInst>(ToWiden)->getCondition();
+ WidenableCondition = cast<BinaryOperator>(Cond)->getOperand(1);
+ }
+ if (WidenableCondition)
+ Result = BinaryOperator::CreateAnd(Result, WidenableCondition,
+ "guard.chk", ToWiden);
setCondition(ToWiden, Result);
}
@@ -285,6 +298,14 @@ public:
};
}
+static bool isSupportedGuardInstruction(const Instruction *Insn) {
+ if (isGuard(Insn))
+ return true;
+ if (WidenBranchGuards && isGuardAsWidenableBranch(Insn))
+ return true;
+ return false;
+}
+
bool GuardWideningImpl::run() {
DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> GuardsInBlock;
bool Changed = false;
@@ -304,20 +325,20 @@ bool GuardWideningImpl::run() {
auto &CurrentList = GuardsInBlock[BB];
for (auto &I : *BB)
- if (isGuard(&I))
+ if (isSupportedGuardInstruction(&I))
CurrentList.push_back(cast<Instruction>(&I));
for (auto *II : CurrentList)
- Changed |= eliminateGuardViaWidening(II, DFI, GuardsInBlock);
+ Changed |= eliminateInstrViaWidening(II, DFI, GuardsInBlock);
if (WidenFrequentBranches && BPI)
if (auto *BI = dyn_cast<BranchInst>(BB->getTerminator()))
if (BI->isConditional()) {
// If one of branches of a conditional is likely taken, try to
// eliminate it.
if (BPI->getEdgeProbability(BB, 0U) >= *LikelyTaken)
- Changed |= eliminateGuardViaWidening(BI, DFI, GuardsInBlock);
+ Changed |= eliminateInstrViaWidening(BI, DFI, GuardsInBlock);
else if (BPI->getEdgeProbability(BB, 1U) >= *LikelyTaken)
- Changed |= eliminateGuardViaWidening(BI, DFI, GuardsInBlock,
+ Changed |= eliminateInstrViaWidening(BI, DFI, GuardsInBlock,
/*InvertCondition*/true);
}
}
@@ -326,7 +347,7 @@ bool GuardWideningImpl::run() {
for (auto *I : EliminatedGuardsAndBranches)
if (!WidenedGuards.count(I)) {
assert(isa<ConstantInt>(getCondition(I)) && "Should be!");
- if (isGuard(I))
+ if (isSupportedGuardInstruction(I))
eliminateGuard(I);
else {
assert(isa<BranchInst>(I) &&
@@ -338,19 +359,18 @@ bool GuardWideningImpl::run() {
return Changed;
}
-bool GuardWideningImpl::eliminateGuardViaWidening(
- Instruction *GuardInst, const df_iterator<DomTreeNode *> &DFSI,
+bool GuardWideningImpl::eliminateInstrViaWidening(
+ Instruction *Instr, const df_iterator<DomTreeNode *> &DFSI,
const DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> &
GuardsInBlock, bool InvertCondition) {
// Ignore trivial true or false conditions. These instructions will be
// trivially eliminated by any cleanup pass. Do not erase them because other
// guards can possibly be widened into them.
- if (isa<ConstantInt>(getCondition(GuardInst)))
+ if (isa<ConstantInt>(getCondition(Instr)))
return false;
Instruction *BestSoFar = nullptr;
auto BestScoreSoFar = WS_IllegalOrNegative;
- auto *GuardInstLoop = LI.getLoopFor(GuardInst->getParent());
// In the set of dominating guards, find the one we can merge GuardInst with
// for the most profit.
@@ -358,12 +378,13 @@ bool GuardWideningImpl::eliminateGuardViaWidening(
auto *CurBB = DFSI.getPath(i)->getBlock();
if (!BlockFilter(CurBB))
break;
- auto *CurLoop = LI.getLoopFor(CurBB);
assert(GuardsInBlock.count(CurBB) && "Must have been populated by now!");
const auto &GuardsInCurBB = GuardsInBlock.find(CurBB)->second;
auto I = GuardsInCurBB.begin();
- auto E = GuardsInCurBB.end();
+ auto E = Instr->getParent() == CurBB
+ ? std::find(GuardsInCurBB.begin(), GuardsInCurBB.end(), Instr)
+ : GuardsInCurBB.end();
#ifndef NDEBUG
{
@@ -379,21 +400,11 @@ bool GuardWideningImpl::eliminateGuardViaWidening(
}
#endif
- assert((i == (e - 1)) == (GuardInst->getParent() == CurBB) && "Bad DFS?");
-
- if (i == (e - 1) && CurBB->getTerminator() != GuardInst) {
- // Corner case: make sure we're only looking at guards strictly dominating
- // GuardInst when visiting GuardInst->getParent().
- auto NewEnd = std::find(I, E, GuardInst);
- assert(NewEnd != E && "GuardInst not in its own block?");
- E = NewEnd;
- }
+ assert((i == (e - 1)) == (Instr->getParent() == CurBB) && "Bad DFS?");
for (auto *Candidate : make_range(I, E)) {
- auto Score =
- computeWideningScore(GuardInst, GuardInstLoop, Candidate, CurLoop,
- InvertCondition);
- LLVM_DEBUG(dbgs() << "Score between " << *getCondition(GuardInst)
+ auto Score = computeWideningScore(Instr, Candidate, InvertCondition);
+ LLVM_DEBUG(dbgs() << "Score between " << *getCondition(Instr)
<< " and " << *getCondition(Candidate) << " is "
<< scoreTypeToString(Score) << "\n");
if (Score > BestScoreSoFar) {
@@ -404,42 +415,45 @@ bool GuardWideningImpl::eliminateGuardViaWidening(
}
if (BestScoreSoFar == WS_IllegalOrNegative) {
- LLVM_DEBUG(dbgs() << "Did not eliminate guard " << *GuardInst << "\n");
+ LLVM_DEBUG(dbgs() << "Did not eliminate guard " << *Instr << "\n");
return false;
}
- assert(BestSoFar != GuardInst && "Should have never visited same guard!");
- assert(DT.dominates(BestSoFar, GuardInst) && "Should be!");
+ assert(BestSoFar != Instr && "Should have never visited same guard!");
+ assert(DT.dominates(BestSoFar, Instr) && "Should be!");
- LLVM_DEBUG(dbgs() << "Widening " << *GuardInst << " into " << *BestSoFar
+ LLVM_DEBUG(dbgs() << "Widening " << *Instr << " into " << *BestSoFar
<< " with score " << scoreTypeToString(BestScoreSoFar)
<< "\n");
- widenGuard(BestSoFar, getCondition(GuardInst), InvertCondition);
+ widenGuard(BestSoFar, getCondition(Instr), InvertCondition);
auto NewGuardCondition = InvertCondition
- ? ConstantInt::getFalse(GuardInst->getContext())
- : ConstantInt::getTrue(GuardInst->getContext());
- setCondition(GuardInst, NewGuardCondition);
- EliminatedGuardsAndBranches.push_back(GuardInst);
+ ? ConstantInt::getFalse(Instr->getContext())
+ : ConstantInt::getTrue(Instr->getContext());
+ setCondition(Instr, NewGuardCondition);
+ EliminatedGuardsAndBranches.push_back(Instr);
WidenedGuards.insert(BestSoFar);
return true;
}
-GuardWideningImpl::WideningScore GuardWideningImpl::computeWideningScore(
- Instruction *DominatedGuard, Loop *DominatedGuardLoop,
- Instruction *DominatingGuard, Loop *DominatingGuardLoop, bool InvertCond) {
+GuardWideningImpl::WideningScore
+GuardWideningImpl::computeWideningScore(Instruction *DominatedInstr,
+ Instruction *DominatingGuard,
+ bool InvertCond) {
+ Loop *DominatedInstrLoop = LI.getLoopFor(DominatedInstr->getParent());
+ Loop *DominatingGuardLoop = LI.getLoopFor(DominatingGuard->getParent());
bool HoistingOutOfLoop = false;
- if (DominatingGuardLoop != DominatedGuardLoop) {
+ if (DominatingGuardLoop != DominatedInstrLoop) {
// Be conservative and don't widen into a sibling loop. TODO: If the
// sibling is colder, we should consider allowing this.
if (DominatingGuardLoop &&
- !DominatingGuardLoop->contains(DominatedGuardLoop))
+ !DominatingGuardLoop->contains(DominatedInstrLoop))
return WS_IllegalOrNegative;
HoistingOutOfLoop = true;
}
- if (!isAvailableAt(getCondition(DominatedGuard), DominatingGuard))
+ if (!isAvailableAt(getCondition(DominatedInstr), DominatingGuard))
return WS_IllegalOrNegative;
// If the guard was conditional executed, it may never be reached
@@ -450,7 +464,7 @@ GuardWideningImpl::WideningScore GuardWideningImpl::computeWideningScore(
// here. TODO: evaluate cost model for spurious deopt
// NOTE: As written, this also lets us hoist right over another guard which
// is essentially just another spelling for control flow.
- if (isWideningCondProfitable(getCondition(DominatedGuard),
+ if (isWideningCondProfitable(getCondition(DominatedInstr),
getCondition(DominatingGuard), InvertCond))
return HoistingOutOfLoop ? WS_VeryPositive : WS_Positive;
@@ -462,7 +476,9 @@ GuardWideningImpl::WideningScore GuardWideningImpl::computeWideningScore(
// throw, etc...). That choice appears arbitrary.
auto MaybeHoistingOutOfIf = [&]() {
auto *DominatingBlock = DominatingGuard->getParent();
- auto *DominatedBlock = DominatedGuard->getParent();
+ auto *DominatedBlock = DominatedInstr->getParent();
+ if (isGuardAsWidenableBranch(DominatingGuard))
+ DominatingBlock = cast<BranchInst>(DominatingGuard)->getSuccessor(0);
// Same Block?
if (DominatedBlock == DominatingBlock)
@@ -478,8 +494,9 @@ GuardWideningImpl::WideningScore GuardWideningImpl::computeWideningScore(
return MaybeHoistingOutOfIf() ? WS_IllegalOrNegative : WS_Neutral;
}
-bool GuardWideningImpl::isAvailableAt(Value *V, Instruction *Loc,
- SmallPtrSetImpl<Instruction *> &Visited) {
+bool GuardWideningImpl::isAvailableAt(
+ const Value *V, const Instruction *Loc,
+ SmallPtrSetImpl<const Instruction *> &Visited) const {
auto *Inst = dyn_cast<Instruction>(V);
if (!Inst || DT.dominates(Inst, Loc) || Visited.count(Inst))
return true;
@@ -499,7 +516,7 @@ bool GuardWideningImpl::isAvailableAt(Value *V, Instruction *Loc,
[&](Value *Op) { return isAvailableAt(Op, Loc, Visited); });
}
-void GuardWideningImpl::makeAvailableAt(Value *V, Instruction *Loc) {
+void GuardWideningImpl::makeAvailableAt(Value *V, Instruction *Loc) const {
auto *Inst = dyn_cast<Instruction>(V);
if (!Inst || DT.dominates(Inst, Loc))
return;
@@ -597,7 +614,7 @@ bool GuardWideningImpl::widenCondCommon(Value *Cond0, Value *Cond1,
bool GuardWideningImpl::parseRangeChecks(
Value *CheckCond, SmallVectorImpl<GuardWideningImpl::RangeCheck> &Checks,
- SmallPtrSetImpl<Value *> &Visited) {
+ SmallPtrSetImpl<const Value *> &Visited) {
if (!Visited.insert(CheckCond).second)
return true;
@@ -616,7 +633,7 @@ bool GuardWideningImpl::parseRangeChecks(
IC->getPredicate() != ICmpInst::ICMP_UGT))
return false;
- Value *CmpLHS = IC->getOperand(0), *CmpRHS = IC->getOperand(1);
+ const Value *CmpLHS = IC->getOperand(0), *CmpRHS = IC->getOperand(1);
if (IC->getPredicate() == ICmpInst::ICMP_UGT)
std::swap(CmpLHS, CmpRHS);
@@ -669,13 +686,13 @@ bool GuardWideningImpl::parseRangeChecks(
bool GuardWideningImpl::combineRangeChecks(
SmallVectorImpl<GuardWideningImpl::RangeCheck> &Checks,
- SmallVectorImpl<GuardWideningImpl::RangeCheck> &RangeChecksOut) {
+ SmallVectorImpl<GuardWideningImpl::RangeCheck> &RangeChecksOut) const {
unsigned OldCount = Checks.size();
while (!Checks.empty()) {
// Pick all of the range checks with a specific base and length, and try to
// merge them.
- Value *CurrentBase = Checks.front().getBase();
- Value *CurrentLength = Checks.front().getLength();
+ const Value *CurrentBase = Checks.front().getBase();
+ const Value *CurrentLength = Checks.front().getLength();
SmallVector<GuardWideningImpl::RangeCheck, 3> CurrentChecks;
@@ -704,8 +721,8 @@ bool GuardWideningImpl::combineRangeChecks(
// Note: std::sort should not invalidate the ChecksStart iterator.
- ConstantInt *MinOffset = CurrentChecks.front().getOffset(),
- *MaxOffset = CurrentChecks.back().getOffset();
+ const ConstantInt *MinOffset = CurrentChecks.front().getOffset();
+ const ConstantInt *MaxOffset = CurrentChecks.back().getOffset();
unsigned BitWidth = MaxOffset->getValue().getBitWidth();
if ((MaxOffset->getValue() - MinOffset->getValue())
@@ -800,6 +817,31 @@ PreservedAnalyses GuardWideningPass::run(Function &F,
return PA;
}
+PreservedAnalyses GuardWideningPass::run(Loop &L, LoopAnalysisManager &AM,
+ LoopStandardAnalysisResults &AR,
+ LPMUpdater &U) {
+
+ const auto &FAM =
+ AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager();
+ Function &F = *L.getHeader()->getParent();
+ BranchProbabilityInfo *BPI = nullptr;
+ if (WidenFrequentBranches)
+ BPI = FAM.getCachedResult<BranchProbabilityAnalysis>(F);
+
+ BasicBlock *RootBB = L.getLoopPredecessor();
+ if (!RootBB)
+ RootBB = L.getHeader();
+ auto BlockFilter = [&](BasicBlock *BB) {
+ return BB == RootBB || L.contains(BB);
+ };
+ if (!GuardWideningImpl(AR.DT, nullptr, AR.LI, BPI,
+ AR.DT.getNode(RootBB),
+ BlockFilter).run())
+ return PreservedAnalyses::all();
+
+ return getLoopPassPreservedAnalyses();
+}
+
namespace {
struct GuardWideningLegacyPass : public FunctionPass {
static char ID;
diff --git a/lib/Transforms/Scalar/IVUsersPrinter.cpp b/lib/Transforms/Scalar/IVUsersPrinter.cpp
index 807593379283..e2022aba97c4 100644
--- a/lib/Transforms/Scalar/IVUsersPrinter.cpp
+++ b/lib/Transforms/Scalar/IVUsersPrinter.cpp
@@ -1,9 +1,8 @@
//===- IVUsersPrinter.cpp - Induction Variable Users Printer ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 48d8e457ba7c..f9fc698a4a9b 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -1,9 +1,8 @@
//===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -32,6 +31,7 @@
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
@@ -43,6 +43,7 @@
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
@@ -101,7 +102,7 @@ static cl::opt<bool> VerifyIndvars(
"verify-indvars", cl::Hidden,
cl::desc("Verify the ScalarEvolution result after running indvars"));
-enum ReplaceExitVal { NeverRepl, OnlyCheapRepl, AlwaysRepl };
+enum ReplaceExitVal { NeverRepl, OnlyCheapRepl, NoHardUse, AlwaysRepl };
static cl::opt<ReplaceExitVal> ReplaceExitValue(
"replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
@@ -109,6 +110,8 @@ static cl::opt<ReplaceExitVal> ReplaceExitValue(
cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"),
clEnumValN(OnlyCheapRepl, "cheap",
"only replace exit value when the cost is cheap"),
+ clEnumValN(NoHardUse, "noharduse",
+ "only replace exit values when loop def likely dead"),
clEnumValN(AlwaysRepl, "always",
"always replace exit value whenever possible")));
@@ -141,13 +144,15 @@ class IndVarSimplify {
bool rewriteNonIntegerIVs(Loop *L);
bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
+ bool optimizeLoopExits(Loop *L);
bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet);
bool rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
bool rewriteFirstIterationLoopExitValues(Loop *L);
bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) const;
- bool linearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
+ bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
+ const SCEV *ExitCount,
PHINode *IndVar, SCEVExpander &Rewriter);
bool sinkUnusedInvariants(Loop *L);
@@ -218,7 +223,9 @@ bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
/// Determine the insertion point for this user. By default, insert immediately
/// before the user. SCEVExpander or LICM will hoist loop invariants out of the
/// loop. For PHI nodes, there may be multiple uses, so compute the nearest
-/// common dominator for the incoming blocks.
+/// common dominator for the incoming blocks. A nullptr can be returned if no
+/// viable location is found: it may happen if User is a PHI and Def only comes
+/// to this PHI from unreachable blocks.
static Instruction *getInsertPointForUses(Instruction *User, Value *Def,
DominatorTree *DT, LoopInfo *LI) {
PHINode *PHI = dyn_cast<PHINode>(User);
@@ -231,6 +238,10 @@ static Instruction *getInsertPointForUses(Instruction *User, Value *Def,
continue;
BasicBlock *InsertBB = PHI->getIncomingBlock(i);
+
+ if (!DT->isReachableFromEntry(InsertBB))
+ continue;
+
if (!InsertPt) {
InsertPt = InsertBB->getTerminator();
continue;
@@ -238,7 +249,11 @@ static Instruction *getInsertPointForUses(Instruction *User, Value *Def,
InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB);
InsertPt = InsertBB->getTerminator();
}
- assert(InsertPt && "Missing phi operand");
+
+ // If we have skipped all inputs, it means that Def only comes to Phi from
+ // unreachable blocks.
+ if (!InsertPt)
+ return nullptr;
auto *DefI = dyn_cast<Instruction>(Def);
if (!DefI)
@@ -621,8 +636,12 @@ bool IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
// Computing the value outside of the loop brings no benefit if it is
// definitely used inside the loop in a way which can not be optimized
- // away.
- if (!isa<SCEVConstant>(ExitValue) && hasHardUserWithinLoop(L, Inst))
+ // away. Avoid doing so unless we know we have a value which computes
+ // the ExitValue already. TODO: This should be merged into SCEV
+ // expander to leverage its knowledge of existing expressions.
+ if (ReplaceExitValue != AlwaysRepl &&
+ !isa<SCEVConstant>(ExitValue) && !isa<SCEVUnknown>(ExitValue) &&
+ hasHardUserWithinLoop(L, Inst))
continue;
bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst);
@@ -707,8 +726,6 @@ bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
SmallVector<BasicBlock *, 8> ExitBlocks;
L->getUniqueExitBlocks(ExitBlocks);
- auto *LoopHeader = L->getHeader();
- assert(LoopHeader && "Invalid loop");
bool MadeAnyChanges = false;
for (auto *ExitBB : ExitBlocks) {
@@ -719,11 +736,13 @@ bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
IncomingValIdx != E; ++IncomingValIdx) {
auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
- // We currently only support loop exits from loop header. If the
- // incoming block is not loop header, we need to recursively check
- // all conditions starting from loop header are loop invariants.
- // Additional support might be added in the future.
- if (IncomingBB != LoopHeader)
+ // Can we prove that the exit must run on the first iteration if it
+ // runs at all? (i.e. early exits are fine for our purposes, but
+ // traces which lead to this exit being taken on the 2nd iteration
+ // aren't.) Note that this is about whether the exit branch is
+ // executed, not about whether it is taken.
+ if (!L->getLoopLatch() ||
+ !DT->dominates(IncomingBB, L->getLoopLatch()))
continue;
// Get condition that leads to the exit path.
@@ -744,8 +763,8 @@ bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
- // Only deal with PHIs.
- if (!ExitVal)
+ // Only deal with PHIs in the loop header.
+ if (!ExitVal || ExitVal->getParent() != L->getHeader())
continue;
// If ExitVal is a PHI on the loop header, then we know its
@@ -755,7 +774,7 @@ bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
assert(LoopPreheader && "Invalid loop");
int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
if (PreheaderIdx != -1) {
- assert(ExitVal->getParent() == LoopHeader &&
+ assert(ExitVal->getParent() == L->getHeader() &&
"ExitVal must be in loop header");
MadeAnyChanges = true;
PN.setIncomingValue(IncomingValIdx,
@@ -1022,24 +1041,13 @@ protected:
} // end anonymous namespace
-/// Perform a quick domtree based check for loop invariance assuming that V is
-/// used within the loop. LoopInfo::isLoopInvariant() seems gratuitous for this
-/// purpose.
-static bool isLoopInvariant(Value *V, const Loop *L, const DominatorTree *DT) {
- Instruction *Inst = dyn_cast<Instruction>(V);
- if (!Inst)
- return true;
-
- return DT->properlyDominates(Inst->getParent(), L->getHeader());
-}
-
Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType,
bool IsSigned, Instruction *Use) {
// Set the debug location and conservative insertion point.
IRBuilder<> Builder(Use);
// Hoist the insertion point into loop preheaders as far as possible.
for (const Loop *L = LI->getLoopFor(Use->getParent());
- L && L->getLoopPreheader() && isLoopInvariant(NarrowOper, L, DT);
+ L && L->getLoopPreheader() && L->isLoopInvariant(NarrowOper);
L = L->getParentLoop())
Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator());
@@ -1305,13 +1313,15 @@ WidenIV::WidenedRecTy WidenIV::getWideRecurrence(NarrowIVDefUse DU) {
return {AddRec, ExtKind};
}
-/// This IV user cannot be widen. Replace this use of the original narrow IV
+/// This IV user cannot be widened. Replace this use of the original narrow IV
/// with a truncation of the new wide IV to isolate and eliminate the narrow IV.
static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) {
+ auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
+ if (!InsertPt)
+ return;
LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user "
<< *DU.NarrowUse << "\n");
- IRBuilder<> Builder(
- getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI));
+ IRBuilder<> Builder(InsertPt);
Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
}
@@ -1348,8 +1358,10 @@ bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) {
assert(CastWidth <= IVWidth && "Unexpected width while widening compare.");
// Widen the compare instruction.
- IRBuilder<> Builder(
- getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI));
+ auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
+ if (!InsertPt)
+ return false;
+ IRBuilder<> Builder(InsertPt);
DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
// Widen the other operand of the compare, if necessary.
@@ -1977,41 +1989,10 @@ bool IndVarSimplify::simplifyAndExtend(Loop *L,
// linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
//===----------------------------------------------------------------------===//
-/// Return true if this loop's backedge taken count expression can be safely and
-/// cheaply expanded into an instruction sequence that can be used by
-/// linearFunctionTestReplace.
-///
-/// TODO: This fails for pointer-type loop counters with greater than one byte
-/// strides, consequently preventing LFTR from running. For the purpose of LFTR
-/// we could skip this check in the case that the LFTR loop counter (chosen by
-/// FindLoopCounter) is also pointer type. Instead, we could directly convert
-/// the loop test to an inequality test by checking the target data's alignment
-/// of element types (given that the initial pointer value originates from or is
-/// used by ABI constrained operation, as opposed to inttoptr/ptrtoint).
-/// However, we don't yet have a strong motivation for converting loop tests
-/// into inequality tests.
-static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE,
- SCEVExpander &Rewriter) {
- const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
- if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) ||
- BackedgeTakenCount->isZero())
- return false;
-
- if (!L->getExitingBlock())
- return false;
-
- // Can't rewrite non-branch yet.
- if (!isa<BranchInst>(L->getExitingBlock()->getTerminator()))
- return false;
-
- if (Rewriter.isHighCostExpansion(BackedgeTakenCount, L))
- return false;
-
- return true;
-}
-
-/// Return the loop header phi IFF IncV adds a loop invariant value to the phi.
-static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) {
+/// Given an Value which is hoped to be part of an add recurance in the given
+/// loop, return the associated Phi node if so. Otherwise, return null. Note
+/// that this is less general than SCEVs AddRec checking.
+static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) {
Instruction *IncI = dyn_cast<Instruction>(IncV);
if (!IncI)
return nullptr;
@@ -2031,7 +2012,7 @@ static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) {
PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
if (Phi && Phi->getParent() == L->getHeader()) {
- if (isLoopInvariant(IncI->getOperand(1), L, DT))
+ if (L->isLoopInvariant(IncI->getOperand(1)))
return Phi;
return nullptr;
}
@@ -2041,32 +2022,40 @@ static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) {
// Allow add/sub to be commuted.
Phi = dyn_cast<PHINode>(IncI->getOperand(1));
if (Phi && Phi->getParent() == L->getHeader()) {
- if (isLoopInvariant(IncI->getOperand(0), L, DT))
+ if (L->isLoopInvariant(IncI->getOperand(0)))
return Phi;
}
return nullptr;
}
-/// Return the compare guarding the loop latch, or NULL for unrecognized tests.
-static ICmpInst *getLoopTest(Loop *L) {
- assert(L->getExitingBlock() && "expected loop exit");
-
- BasicBlock *LatchBlock = L->getLoopLatch();
- // Don't bother with LFTR if the loop is not properly simplified.
- if (!LatchBlock)
- return nullptr;
-
- BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
- assert(BI && "expected exit branch");
+/// Whether the current loop exit test is based on this value. Currently this
+/// is limited to a direct use in the loop condition.
+static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
+ BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
+ ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
+ // TODO: Allow non-icmp loop test.
+ if (!ICmp)
+ return false;
- return dyn_cast<ICmpInst>(BI->getCondition());
+ // TODO: Allow indirect use.
+ return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
}
/// linearFunctionTestReplace policy. Return true unless we can show that the
/// current exit test is already sufficiently canonical.
-static bool needsLFTR(Loop *L, DominatorTree *DT) {
+static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
+ assert(L->getLoopLatch() && "Must be in simplified form");
+
+ // Avoid converting a constant or loop invariant test back to a runtime
+ // test. This is critical for when SCEV's cached ExitCount is less precise
+ // than the current IR (such as after we've proven a particular exit is
+ // actually dead and thus the BE count never reaches our ExitCount.)
+ BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
+ if (L->isLoopInvariant(BI->getCondition()))
+ return false;
+
// Do LFTR to simplify the exit condition to an ICMP.
- ICmpInst *Cond = getLoopTest(L);
+ ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
if (!Cond)
return true;
@@ -2078,15 +2067,15 @@ static bool needsLFTR(Loop *L, DominatorTree *DT) {
// Look for a loop invariant RHS
Value *LHS = Cond->getOperand(0);
Value *RHS = Cond->getOperand(1);
- if (!isLoopInvariant(RHS, L, DT)) {
- if (!isLoopInvariant(LHS, L, DT))
+ if (!L->isLoopInvariant(RHS)) {
+ if (!L->isLoopInvariant(LHS))
return true;
std::swap(LHS, RHS);
}
// Look for a simple IV counter LHS
PHINode *Phi = dyn_cast<PHINode>(LHS);
if (!Phi)
- Phi = getLoopPhiForCounter(LHS, L, DT);
+ Phi = getLoopPhiForCounter(LHS, L);
if (!Phi)
return true;
@@ -2098,7 +2087,49 @@ static bool needsLFTR(Loop *L, DominatorTree *DT) {
// Do LFTR if the exit condition's IV is *not* a simple counter.
Value *IncV = Phi->getIncomingValue(Idx);
- return Phi != getLoopPhiForCounter(IncV, L, DT);
+ return Phi != getLoopPhiForCounter(IncV, L);
+}
+
+/// Return true if undefined behavior would provable be executed on the path to
+/// OnPathTo if Root produced a posion result. Note that this doesn't say
+/// anything about whether OnPathTo is actually executed or whether Root is
+/// actually poison. This can be used to assess whether a new use of Root can
+/// be added at a location which is control equivalent with OnPathTo (such as
+/// immediately before it) without introducing UB which didn't previously
+/// exist. Note that a false result conveys no information.
+static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root,
+ Instruction *OnPathTo,
+ DominatorTree *DT) {
+ // Basic approach is to assume Root is poison, propagate poison forward
+ // through all users we can easily track, and then check whether any of those
+ // users are provable UB and must execute before out exiting block might
+ // exit.
+
+ // The set of all recursive users we've visited (which are assumed to all be
+ // poison because of said visit)
+ SmallSet<const Value *, 16> KnownPoison;
+ SmallVector<const Instruction*, 16> Worklist;
+ Worklist.push_back(Root);
+ while (!Worklist.empty()) {
+ const Instruction *I = Worklist.pop_back_val();
+
+ // If we know this must trigger UB on a path leading our target.
+ if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo))
+ return true;
+
+ // If we can't analyze propagation through this instruction, just skip it
+ // and transitive users. Safe as false is a conservative result.
+ if (!propagatesFullPoison(I) && I != Root)
+ continue;
+
+ if (KnownPoison.insert(I).second)
+ for (const User *User : I->users())
+ Worklist.push_back(cast<Instruction>(User));
+ }
+
+ // Might be non-UB, or might have a path we couldn't prove must execute on
+ // way to exiting bb.
+ return false;
}
/// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
@@ -2157,46 +2188,62 @@ static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
return true;
}
-/// Find an affine IV in canonical form.
+/// Return true if the given phi is a "counter" in L. A counter is an
+/// add recurance (of integer or pointer type) with an arbitrary start, and a
+/// step of 1. Note that L must have exactly one latch.
+static bool isLoopCounter(PHINode* Phi, Loop *L,
+ ScalarEvolution *SE) {
+ assert(Phi->getParent() == L->getHeader());
+ assert(L->getLoopLatch());
+
+ if (!SE->isSCEVable(Phi->getType()))
+ return false;
+
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
+ if (!AR || AR->getLoop() != L || !AR->isAffine())
+ return false;
+
+ const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
+ if (!Step || !Step->isOne())
+ return false;
+
+ int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
+ Value *IncV = Phi->getIncomingValue(LatchIdx);
+ return (getLoopPhiForCounter(IncV, L) == Phi);
+}
+
+/// Search the loop header for a loop counter (anadd rec w/step of one)
+/// suitable for use by LFTR. If multiple counters are available, select the
+/// "best" one based profitable heuristics.
///
/// BECount may be an i8* pointer type. The pointer difference is already
/// valid count without scaling the address stride, so it remains a pointer
/// expression as far as SCEV is concerned.
-///
-/// Currently only valid for LFTR. See the comments on hasConcreteDef below.
-///
-/// FIXME: Accept -1 stride and set IVLimit = IVInit - BECount
-///
-/// FIXME: Accept non-unit stride as long as SCEV can reduce BECount * Stride.
-/// This is difficult in general for SCEV because of potential overflow. But we
-/// could at least handle constant BECounts.
-static PHINode *FindLoopCounter(Loop *L, const SCEV *BECount,
+static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
+ const SCEV *BECount,
ScalarEvolution *SE, DominatorTree *DT) {
uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
- Value *Cond =
- cast<BranchInst>(L->getExitingBlock()->getTerminator())->getCondition();
+ Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
// Loop over all of the PHI nodes, looking for a simple counter.
PHINode *BestPhi = nullptr;
const SCEV *BestInit = nullptr;
BasicBlock *LatchBlock = L->getLoopLatch();
- assert(LatchBlock && "needsLFTR should guarantee a loop latch");
+ assert(LatchBlock && "Must be in simplified form");
const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
PHINode *Phi = cast<PHINode>(I);
- if (!SE->isSCEVable(Phi->getType()))
+ if (!isLoopCounter(Phi, L, SE))
continue;
// Avoid comparing an integer IV against a pointer Limit.
if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
continue;
- const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
- if (!AR || AR->getLoop() != L || !AR->isAffine())
- continue;
-
+ const auto *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
+
// AR may be a pointer type, while BECount is an integer type.
// AR may be wider than BECount. With eq/ne tests overflow is immaterial.
// AR may not be a narrower type, or we may never exit.
@@ -2204,28 +2251,30 @@ static PHINode *FindLoopCounter(Loop *L, const SCEV *BECount,
if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
continue;
- const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
- if (!Step || !Step->isOne())
- continue;
-
- int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
- Value *IncV = Phi->getIncomingValue(LatchIdx);
- if (getLoopPhiForCounter(IncV, L, DT) != Phi)
- continue;
-
// Avoid reusing a potentially undef value to compute other values that may
// have originally had a concrete definition.
if (!hasConcreteDef(Phi)) {
// We explicitly allow unknown phis as long as they are already used by
- // the loop test. In this case we assume that performing LFTR could not
- // increase the number of undef users.
- if (ICmpInst *Cond = getLoopTest(L)) {
- if (Phi != getLoopPhiForCounter(Cond->getOperand(0), L, DT) &&
- Phi != getLoopPhiForCounter(Cond->getOperand(1), L, DT)) {
- continue;
- }
- }
+ // the loop exit test. This is legal since performing LFTR could not
+ // increase the number of undef users.
+ Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
+ if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
+ !isLoopExitTestBasedOn(IncPhi, ExitingBB))
+ continue;
}
+
+ // Avoid introducing undefined behavior due to poison which didn't exist in
+ // the original program. (Annoyingly, the rules for poison and undef
+ // propagation are distinct, so this does NOT cover the undef case above.)
+ // We have to ensure that we don't introduce UB by introducing a use on an
+ // iteration where said IV produces poison. Our strategy here differs for
+ // pointers and integer IVs. For integers, we strip and reinfer as needed,
+ // see code in linearFunctionTestReplace. For pointers, we restrict
+ // transforms as there is no good way to reinfer inbounds once lost.
+ if (!Phi->getType()->isIntegerTy() &&
+ !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
+ continue;
+
const SCEV *Init = AR->getStart();
if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
@@ -2251,47 +2300,49 @@ static PHINode *FindLoopCounter(Loop *L, const SCEV *BECount,
return BestPhi;
}
-/// Help linearFunctionTestReplace by generating a value that holds the RHS of
-/// the new loop test.
-static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
+/// Insert an IR expression which computes the value held by the IV IndVar
+/// (which must be an loop counter w/unit stride) after the backedge of loop L
+/// is taken ExitCount times.
+static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
+ const SCEV *ExitCount, bool UsePostInc, Loop *L,
SCEVExpander &Rewriter, ScalarEvolution *SE) {
- const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
- assert(AR && AR->getLoop() == L && AR->isAffine() && "bad loop counter");
+ assert(isLoopCounter(IndVar, L, SE));
+ const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
const SCEV *IVInit = AR->getStart();
- // IVInit may be a pointer while IVCount is an integer when FindLoopCounter
- // finds a valid pointer IV. Sign extend BECount in order to materialize a
+ // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter
+ // finds a valid pointer IV. Sign extend ExitCount in order to materialize a
// GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
// the existing GEPs whenever possible.
- if (IndVar->getType()->isPointerTy() && !IVCount->getType()->isPointerTy()) {
+ if (IndVar->getType()->isPointerTy() &&
+ !ExitCount->getType()->isPointerTy()) {
// IVOffset will be the new GEP offset that is interpreted by GEP as a
- // signed value. IVCount on the other hand represents the loop trip count,
+ // signed value. ExitCount on the other hand represents the loop trip count,
// which is an unsigned value. FindLoopCounter only allows induction
// variables that have a positive unit stride of one. This means we don't
// have to handle the case of negative offsets (yet) and just need to zero
- // extend IVCount.
+ // extend ExitCount.
Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
- const SCEV *IVOffset = SE->getTruncateOrZeroExtend(IVCount, OfsTy);
+ const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy);
+ if (UsePostInc)
+ IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy));
// Expand the code for the iteration count.
assert(SE->isLoopInvariant(IVOffset, L) &&
"Computed iteration count is not loop invariant!");
- BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
- Value *GEPOffset = Rewriter.expandCodeFor(IVOffset, OfsTy, BI);
- Value *GEPBase = IndVar->getIncomingValueForBlock(L->getLoopPreheader());
- assert(AR->getStart() == SE->getSCEV(GEPBase) && "bad loop counter");
// We could handle pointer IVs other than i8*, but we need to compensate for
- // gep index scaling. See canExpandBackedgeTakenCount comments.
+ // gep index scaling.
assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()),
- cast<PointerType>(GEPBase->getType())
+ cast<PointerType>(IndVar->getType())
->getElementType())->isOne() &&
"unit stride pointer IV must be i8*");
- IRBuilder<> Builder(L->getLoopPreheader()->getTerminator());
- return Builder.CreateGEP(nullptr, GEPBase, GEPOffset, "lftr.limit");
+ const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset);
+ BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
+ return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI);
} else {
- // In any other case, convert both IVInit and IVCount to integers before
+ // In any other case, convert both IVInit and ExitCount to integers before
// comparing. This may result in SCEV expansion of pointers, but in practice
// SCEV will fold the pointer arithmetic away as such:
// BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
@@ -2299,35 +2350,40 @@ static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
// Valid Cases: (1) both integers is most common; (2) both may be pointers
// for simple memset-style loops.
//
- // IVInit integer and IVCount pointer would only occur if a canonical IV
+ // IVInit integer and ExitCount pointer would only occur if a canonical IV
// were generated on top of case #2, which is not expected.
- const SCEV *IVLimit = nullptr;
- // For unit stride, IVCount = Start + BECount with 2's complement overflow.
- // For non-zero Start, compute IVCount here.
- if (AR->getStart()->isZero())
- IVLimit = IVCount;
- else {
- assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
- const SCEV *IVInit = AR->getStart();
+ assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
+ // For unit stride, IVCount = Start + ExitCount with 2's complement
+ // overflow.
+
+ // For integer IVs, truncate the IV before computing IVInit + BECount,
+ // unless we know apriori that the limit must be a constant when evaluated
+ // in the bitwidth of the IV. We prefer (potentially) keeping a truncate
+ // of the IV in the loop over a (potentially) expensive expansion of the
+ // widened exit count add(zext(add)) expression.
+ if (SE->getTypeSizeInBits(IVInit->getType())
+ > SE->getTypeSizeInBits(ExitCount->getType())) {
+ if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount))
+ ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType());
+ else
+ IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType());
+ }
- // For integer IVs, truncate the IV before computing IVInit + BECount.
- if (SE->getTypeSizeInBits(IVInit->getType())
- > SE->getTypeSizeInBits(IVCount->getType()))
- IVInit = SE->getTruncateExpr(IVInit, IVCount->getType());
+ const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount);
+
+ if (UsePostInc)
+ IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType()));
- IVLimit = SE->getAddExpr(IVInit, IVCount);
- }
// Expand the code for the iteration count.
- BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
- IRBuilder<> Builder(BI);
assert(SE->isLoopInvariant(IVLimit, L) &&
"Computed iteration count is not loop invariant!");
// Ensure that we generate the same type as IndVar, or a smaller integer
// type. In the presence of null pointer values, we have an integer type
// SCEV expression (IVInit) for a pointer type IV value (IndVar).
- Type *LimitTy = IVCount->getType()->isPointerTy() ?
- IndVar->getType() : IVCount->getType();
+ Type *LimitTy = ExitCount->getType()->isPointerTy() ?
+ IndVar->getType() : ExitCount->getType();
+ BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
}
}
@@ -2338,51 +2394,70 @@ static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
/// determine a loop-invariant trip count of the loop, which is actually a much
/// broader range than just linear tests.
bool IndVarSimplify::
-linearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
+linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
+ const SCEV *ExitCount,
PHINode *IndVar, SCEVExpander &Rewriter) {
- assert(canExpandBackedgeTakenCount(L, SE, Rewriter) && "precondition");
+ assert(L->getLoopLatch() && "Loop no longer in simplified form?");
+ assert(isLoopCounter(IndVar, L, SE));
+ Instruction * const IncVar =
+ cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
- // Initialize CmpIndVar and IVCount to their preincremented values.
+ // Initialize CmpIndVar to the preincremented IV.
Value *CmpIndVar = IndVar;
- const SCEV *IVCount = BackedgeTakenCount;
-
- assert(L->getLoopLatch() && "Loop no longer in simplified form?");
+ bool UsePostInc = false;
// If the exiting block is the same as the backedge block, we prefer to
// compare against the post-incremented value, otherwise we must compare
// against the preincremented value.
- if (L->getExitingBlock() == L->getLoopLatch()) {
- // Add one to the "backedge-taken" count to get the trip count.
- // This addition may overflow, which is valid as long as the comparison is
- // truncated to BackedgeTakenCount->getType().
- IVCount = SE->getAddExpr(BackedgeTakenCount,
- SE->getOne(BackedgeTakenCount->getType()));
- // The BackedgeTaken expression contains the number of times that the
- // backedge branches to the loop header. This is one less than the
- // number of times the loop executes, so use the incremented indvar.
- CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock());
+ if (ExitingBB == L->getLoopLatch()) {
+ // For pointer IVs, we chose to not strip inbounds which requires us not
+ // to add a potentially UB introducing use. We need to either a) show
+ // the loop test we're modifying is already in post-inc form, or b) show
+ // that adding a use must not introduce UB.
+ bool SafeToPostInc =
+ IndVar->getType()->isIntegerTy() ||
+ isLoopExitTestBasedOn(IncVar, ExitingBB) ||
+ mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
+ if (SafeToPostInc) {
+ UsePostInc = true;
+ CmpIndVar = IncVar;
+ }
}
- Value *ExitCnt = genLoopLimit(IndVar, IVCount, L, Rewriter, SE);
+ // It may be necessary to drop nowrap flags on the incrementing instruction
+ // if either LFTR moves from a pre-inc check to a post-inc check (in which
+ // case the increment might have previously been poison on the last iteration
+ // only) or if LFTR switches to a different IV that was previously dynamically
+ // dead (and as such may be arbitrarily poison). We remove any nowrap flags
+ // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
+ // check), because the pre-inc addrec flags may be adopted from the original
+ // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
+ // TODO: This handling is inaccurate for one case: If we switch to a
+ // dynamically dead IV that wraps on the first loop iteration only, which is
+ // not covered by the post-inc addrec. (If the new IV was not dynamically
+ // dead, it could not be poison on the first iteration in the first place.)
+ if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
+ const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
+ if (BO->hasNoUnsignedWrap())
+ BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
+ if (BO->hasNoSignedWrap())
+ BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
+ }
+
+ Value *ExitCnt = genLoopLimit(
+ IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
assert(ExitCnt->getType()->isPointerTy() ==
IndVar->getType()->isPointerTy() &&
"genLoopLimit missed a cast");
// Insert a new icmp_ne or icmp_eq instruction before the branch.
- BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
+ BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
ICmpInst::Predicate P;
if (L->contains(BI->getSuccessor(0)))
P = ICmpInst::ICMP_NE;
else
P = ICmpInst::ICMP_EQ;
- LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
- << " LHS:" << *CmpIndVar << '\n'
- << " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
- << "\n"
- << " RHS:\t" << *ExitCnt << "\n"
- << " IVCount:\t" << *IVCount << "\n");
-
IRBuilder<> Builder(BI);
// The new loop exit condition should reuse the debug location of the
@@ -2390,67 +2465,58 @@ linearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
- // LFTR can ignore IV overflow and truncate to the width of
- // BECount. This avoids materializing the add(zext(add)) expression.
+ // For integer IVs, if we evaluated the limit in the narrower bitwidth to
+ // avoid the expensive expansion of the limit expression in the wider type,
+ // emit a truncate to narrow the IV to the ExitCount type. This is safe
+ // since we know (from the exit count bitwidth), that we can't self-wrap in
+ // the narrower type.
unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
if (CmpIndVarSize > ExitCntSize) {
- const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
- const SCEV *ARStart = AR->getStart();
- const SCEV *ARStep = AR->getStepRecurrence(*SE);
- // For constant IVCount, avoid truncation.
- if (isa<SCEVConstant>(ARStart) && isa<SCEVConstant>(IVCount)) {
- const APInt &Start = cast<SCEVConstant>(ARStart)->getAPInt();
- APInt Count = cast<SCEVConstant>(IVCount)->getAPInt();
- // Note that the post-inc value of BackedgeTakenCount may have overflowed
- // above such that IVCount is now zero.
- if (IVCount != BackedgeTakenCount && Count == 0) {
- Count = APInt::getMaxValue(Count.getBitWidth()).zext(CmpIndVarSize);
- ++Count;
- }
- else
- Count = Count.zext(CmpIndVarSize);
- APInt NewLimit;
- if (cast<SCEVConstant>(ARStep)->getValue()->isNegative())
- NewLimit = Start - Count;
- else
- NewLimit = Start + Count;
- ExitCnt = ConstantInt::get(CmpIndVar->getType(), NewLimit);
-
- LLVM_DEBUG(dbgs() << " Widen RHS:\t" << *ExitCnt << "\n");
+ assert(!CmpIndVar->getType()->isPointerTy() &&
+ !ExitCnt->getType()->isPointerTy());
+
+ // Before resorting to actually inserting the truncate, use the same
+ // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
+ // the other side of the comparison instead. We still evaluate the limit
+ // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
+ // a truncate within in.
+ bool Extended = false;
+ const SCEV *IV = SE->getSCEV(CmpIndVar);
+ const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
+ ExitCnt->getType());
+ const SCEV *ZExtTrunc =
+ SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
+
+ if (ZExtTrunc == IV) {
+ Extended = true;
+ ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
+ "wide.trip.count");
} else {
- // We try to extend trip count first. If that doesn't work we truncate IV.
- // Zext(trunc(IV)) == IV implies equivalence of the following two:
- // Trunc(IV) == ExitCnt and IV == zext(ExitCnt). Similarly for sext. If
- // one of the two holds, extend the trip count, otherwise we truncate IV.
- bool Extended = false;
- const SCEV *IV = SE->getSCEV(CmpIndVar);
- const SCEV *ZExtTrunc =
- SE->getZeroExtendExpr(SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
- ExitCnt->getType()),
- CmpIndVar->getType());
-
- if (ZExtTrunc == IV) {
+ const SCEV *SExtTrunc =
+ SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
+ if (SExtTrunc == IV) {
Extended = true;
- ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
+ ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
"wide.trip.count");
- } else {
- const SCEV *SExtTrunc =
- SE->getSignExtendExpr(SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
- ExitCnt->getType()),
- CmpIndVar->getType());
- if (SExtTrunc == IV) {
- Extended = true;
- ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
- "wide.trip.count");
- }
}
-
- if (!Extended)
- CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
- "lftr.wideiv");
}
+
+ if (Extended) {
+ bool Discard;
+ L->makeLoopInvariant(ExitCnt, Discard);
+ } else
+ CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
+ "lftr.wideiv");
}
+ LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
+ << " LHS:" << *CmpIndVar << '\n'
+ << " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
+ << "\n"
+ << " RHS:\t" << *ExitCnt << "\n"
+ << "ExitCount:\t" << *ExitCount << "\n"
+ << " was: " << *BI->getCondition() << "\n");
+
Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
Value *OrigCond = BI->getCondition();
// It's tempting to use replaceAllUsesWith here to fully replace the old
@@ -2558,6 +2624,111 @@ bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
return MadeAnyChanges;
}
+bool IndVarSimplify::optimizeLoopExits(Loop *L) {
+ SmallVector<BasicBlock*, 16> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+
+ // Form an expression for the maximum exit count possible for this loop. We
+ // merge the max and exact information to approximate a version of
+ // getMaxBackedgeTakenInfo which isn't restricted to just constants.
+ // TODO: factor this out as a version of getMaxBackedgeTakenCount which
+ // isn't guaranteed to return a constant.
+ SmallVector<const SCEV*, 4> ExitCounts;
+ const SCEV *MaxConstEC = SE->getMaxBackedgeTakenCount(L);
+ if (!isa<SCEVCouldNotCompute>(MaxConstEC))
+ ExitCounts.push_back(MaxConstEC);
+ for (BasicBlock *ExitingBB : ExitingBlocks) {
+ const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
+ if (!isa<SCEVCouldNotCompute>(ExitCount)) {
+ assert(DT->dominates(ExitingBB, L->getLoopLatch()) &&
+ "We should only have known counts for exiting blocks that "
+ "dominate latch!");
+ ExitCounts.push_back(ExitCount);
+ }
+ }
+ if (ExitCounts.empty())
+ return false;
+ const SCEV *MaxExitCount = SE->getUMinFromMismatchedTypes(ExitCounts);
+
+ bool Changed = false;
+ for (BasicBlock *ExitingBB : ExitingBlocks) {
+ // If our exitting block exits multiple loops, we can only rewrite the
+ // innermost one. Otherwise, we're changing how many times the innermost
+ // loop runs before it exits.
+ if (LI->getLoopFor(ExitingBB) != L)
+ continue;
+
+ // Can't rewrite non-branch yet.
+ BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
+ if (!BI)
+ continue;
+
+ // If already constant, nothing to do.
+ if (isa<Constant>(BI->getCondition()))
+ continue;
+
+ const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
+ if (isa<SCEVCouldNotCompute>(ExitCount))
+ continue;
+
+ // If we know we'd exit on the first iteration, rewrite the exit to
+ // reflect this. This does not imply the loop must exit through this
+ // exit; there may be an earlier one taken on the first iteration.
+ // TODO: Given we know the backedge can't be taken, we should go ahead
+ // and break it. Or at least, kill all the header phis and simplify.
+ if (ExitCount->isZero()) {
+ bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
+ auto *OldCond = BI->getCondition();
+ auto *NewCond = ExitIfTrue ? ConstantInt::getTrue(OldCond->getType()) :
+ ConstantInt::getFalse(OldCond->getType());
+ BI->setCondition(NewCond);
+ if (OldCond->use_empty())
+ DeadInsts.push_back(OldCond);
+ Changed = true;
+ continue;
+ }
+
+ // If we end up with a pointer exit count, bail.
+ if (!ExitCount->getType()->isIntegerTy() ||
+ !MaxExitCount->getType()->isIntegerTy())
+ return false;
+
+ Type *WiderType =
+ SE->getWiderType(MaxExitCount->getType(), ExitCount->getType());
+ ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType);
+ MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType);
+ assert(MaxExitCount->getType() == ExitCount->getType());
+
+ // Can we prove that some other exit must be taken strictly before this
+ // one? TODO: handle cases where ule is known, and equality is covered
+ // by a dominating exit
+ if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT,
+ MaxExitCount, ExitCount)) {
+ bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
+ auto *OldCond = BI->getCondition();
+ auto *NewCond = ExitIfTrue ? ConstantInt::getFalse(OldCond->getType()) :
+ ConstantInt::getTrue(OldCond->getType());
+ BI->setCondition(NewCond);
+ if (OldCond->use_empty())
+ DeadInsts.push_back(OldCond);
+ Changed = true;
+ continue;
+ }
+
+ // TODO: If we can prove that the exiting iteration is equal to the exit
+ // count for this exit and that no previous exit oppurtunities exist within
+ // the loop, then we can discharge all other exits. (May fall out of
+ // previous TODO.)
+
+ // TODO: If we can't prove any relation between our exit count and the
+ // loops exit count, but taking this exit doesn't require actually running
+ // the loop (i.e. no side effects, no computed values used in exit), then
+ // we can replace the exit test with a loop invariant test which exits on
+ // the first iteration.
+ }
+ return Changed;
+}
+
//===----------------------------------------------------------------------===//
// IndVarSimplify driver. Manage several subpasses of IV simplification.
//===----------------------------------------------------------------------===//
@@ -2614,23 +2785,60 @@ bool IndVarSimplify::run(Loop *L) {
// Eliminate redundant IV cycles.
NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
+ Changed |= optimizeLoopExits(L);
+
// If we have a trip count expression, rewrite the loop's exit condition
- // using it. We can currently only handle loops with a single exit.
- if (!DisableLFTR && canExpandBackedgeTakenCount(L, SE, Rewriter) &&
- needsLFTR(L, DT)) {
- PHINode *IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT);
- if (IndVar) {
+ // using it.
+ if (!DisableLFTR) {
+ SmallVector<BasicBlock*, 16> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+ for (BasicBlock *ExitingBB : ExitingBlocks) {
+ // Can't rewrite non-branch yet.
+ if (!isa<BranchInst>(ExitingBB->getTerminator()))
+ continue;
+
+ // If our exitting block exits multiple loops, we can only rewrite the
+ // innermost one. Otherwise, we're changing how many times the innermost
+ // loop runs before it exits.
+ if (LI->getLoopFor(ExitingBB) != L)
+ continue;
+
+ if (!needsLFTR(L, ExitingBB))
+ continue;
+
+ const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
+ if (isa<SCEVCouldNotCompute>(ExitCount))
+ continue;
+
+ // This was handled above, but as we form SCEVs, we can sometimes refine
+ // existing ones; this allows exit counts to be folded to zero which
+ // weren't when optimizeLoopExits saw them. Arguably, we should iterate
+ // until stable to handle cases like this better.
+ if (ExitCount->isZero())
+ continue;
+
+ PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
+ if (!IndVar)
+ continue;
+
+ // Avoid high cost expansions. Note: This heuristic is questionable in
+ // that our definition of "high cost" is not exactly principled.
+ if (Rewriter.isHighCostExpansion(ExitCount, L))
+ continue;
+
// Check preconditions for proper SCEVExpander operation. SCEV does not
- // express SCEVExpander's dependencies, such as LoopSimplify. Instead any
- // pass that uses the SCEVExpander must do it. This does not work well for
- // loop passes because SCEVExpander makes assumptions about all loops,
- // while LoopPassManager only forces the current loop to be simplified.
+ // express SCEVExpander's dependencies, such as LoopSimplify. Instead
+ // any pass that uses the SCEVExpander must do it. This does not work
+ // well for loop passes because SCEVExpander makes assumptions about
+ // all loops, while LoopPassManager only forces the current loop to be
+ // simplified.
//
// FIXME: SCEV expansion has no way to bail out, so the caller must
// explicitly check any assumptions made by SCEV. Brittle.
- const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(BackedgeTakenCount);
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount);
if (!AR || AR->getLoop()->getLoopPreheader())
- Changed |= linearFunctionTestReplace(L, BackedgeTakenCount, IndVar,
+ Changed |= linearFunctionTestReplace(L, ExitingBB,
+ ExitCount, IndVar,
Rewriter);
}
}
diff --git a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
index 1c701bbee185..997d68838152 100644
--- a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
+++ b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
@@ -1,9 +1,8 @@
//===- InductiveRangeCheckElimination.cpp - -------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -116,6 +115,11 @@ static cl::opt<bool> SkipProfitabilityChecks("irce-skip-profitability-checks",
static cl::opt<bool> AllowUnsignedLatchCondition("irce-allow-unsigned-latch",
cl::Hidden, cl::init(true));
+static cl::opt<bool> AllowNarrowLatchCondition(
+ "irce-allow-narrow-latch", cl::Hidden, cl::init(true),
+ cl::desc("If set to true, IRCE may eliminate wide range checks in loops "
+ "with narrow latch condition."));
+
static const char *ClonedLoopTag = "irce.loop.clone";
#define DEBUG_TYPE "irce"
@@ -532,12 +536,6 @@ class LoopConstrainer {
Optional<const SCEV *> HighLimit;
};
- // A utility function that does a `replaceUsesOfWith' on the incoming block
- // set of a `PHINode' -- replaces instances of `Block' in the `PHINode's
- // incoming block list with `ReplaceBy'.
- static void replacePHIBlock(PHINode *PN, BasicBlock *Block,
- BasicBlock *ReplaceBy);
-
// Compute a safe set of limits for the main loop to run in -- effectively the
// intersection of `Range' and the iteration space of the original loop.
// Return None if unable to compute the set of subranges.
@@ -639,13 +637,6 @@ public:
} // end anonymous namespace
-void LoopConstrainer::replacePHIBlock(PHINode *PN, BasicBlock *Block,
- BasicBlock *ReplaceBy) {
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
- if (PN->getIncomingBlock(i) == Block)
- PN->setIncomingBlock(i, ReplaceBy);
-}
-
/// Given a loop with an deccreasing induction variable, is it possible to
/// safely calculate the bounds of a new loop using the given Predicate.
static bool isSafeDecreasingBound(const SCEV *Start,
@@ -868,7 +859,7 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE,
assert(!StepCI->isZero() && "Zero step?");
bool IsIncreasing = !StepCI->isNegative();
- bool IsSignedPredicate = ICmpInst::isSigned(Pred);
+ bool IsSignedPredicate;
const SCEV *StartNext = IndVarBase->getStart();
const SCEV *Addend = SE.getNegativeSCEV(IndVarBase->getStepRecurrence(SE));
const SCEV *IndVarStart = SE.getAddExpr(StartNext, Addend);
@@ -1045,11 +1036,23 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE,
return Result;
}
+/// If the type of \p S matches with \p Ty, return \p S. Otherwise, return
+/// signed or unsigned extension of \p S to type \p Ty.
+static const SCEV *NoopOrExtend(const SCEV *S, Type *Ty, ScalarEvolution &SE,
+ bool Signed) {
+ return Signed ? SE.getNoopOrSignExtend(S, Ty) : SE.getNoopOrZeroExtend(S, Ty);
+}
+
Optional<LoopConstrainer::SubRanges>
LoopConstrainer::calculateSubRanges(bool IsSignedPredicate) const {
IntegerType *Ty = cast<IntegerType>(LatchTakenCount->getType());
- if (Range.getType() != Ty)
+ auto *RTy = cast<IntegerType>(Range.getType());
+
+ // We only support wide range checks and narrow latches.
+ if (!AllowNarrowLatchCondition && RTy != Ty)
+ return None;
+ if (RTy->getBitWidth() < Ty->getBitWidth())
return None;
LoopConstrainer::SubRanges Result;
@@ -1057,8 +1060,10 @@ LoopConstrainer::calculateSubRanges(bool IsSignedPredicate) const {
// I think we can be more aggressive here and make this nuw / nsw if the
// addition that feeds into the icmp for the latch's terminating branch is nuw
// / nsw. In any case, a wrapping 2's complement addition is safe.
- const SCEV *Start = SE.getSCEV(MainLoopStructure.IndVarStart);
- const SCEV *End = SE.getSCEV(MainLoopStructure.LoopExitAt);
+ const SCEV *Start = NoopOrExtend(SE.getSCEV(MainLoopStructure.IndVarStart),
+ RTy, SE, IsSignedPredicate);
+ const SCEV *End = NoopOrExtend(SE.getSCEV(MainLoopStructure.LoopExitAt), RTy,
+ SE, IsSignedPredicate);
bool Increasing = MainLoopStructure.IndVarIncreasing;
@@ -1068,7 +1073,7 @@ LoopConstrainer::calculateSubRanges(bool IsSignedPredicate) const {
const SCEV *Smallest = nullptr, *Greatest = nullptr, *GreatestSeen = nullptr;
- const SCEV *One = SE.getOne(Ty);
+ const SCEV *One = SE.getOne(RTy);
if (Increasing) {
Smallest = Start;
Greatest = End;
@@ -1257,6 +1262,13 @@ LoopConstrainer::RewrittenRangeInfo LoopConstrainer::changeIterationSpaceEnd(
bool IsSignedPredicate = LS.IsSignedPredicate;
IRBuilder<> B(PreheaderJump);
+ auto *RangeTy = Range.getBegin()->getType();
+ auto NoopOrExt = [&](Value *V) {
+ if (V->getType() == RangeTy)
+ return V;
+ return IsSignedPredicate ? B.CreateSExt(V, RangeTy, "wide." + V->getName())
+ : B.CreateZExt(V, RangeTy, "wide." + V->getName());
+ };
// EnterLoopCond - is it okay to start executing this `LS'?
Value *EnterLoopCond = nullptr;
@@ -1264,15 +1276,16 @@ LoopConstrainer::RewrittenRangeInfo LoopConstrainer::changeIterationSpaceEnd(
Increasing
? (IsSignedPredicate ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT)
: (IsSignedPredicate ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT);
- EnterLoopCond = B.CreateICmp(Pred, LS.IndVarStart, ExitSubloopAt);
+ Value *IndVarStart = NoopOrExt(LS.IndVarStart);
+ EnterLoopCond = B.CreateICmp(Pred, IndVarStart, ExitSubloopAt);
B.CreateCondBr(EnterLoopCond, LS.Header, RRI.PseudoExit);
PreheaderJump->eraseFromParent();
LS.LatchBr->setSuccessor(LS.LatchBrExitIdx, RRI.ExitSelector);
B.SetInsertPoint(LS.LatchBr);
- Value *TakeBackedgeLoopCond = B.CreateICmp(Pred, LS.IndVarBase,
- ExitSubloopAt);
+ Value *IndVarBase = NoopOrExt(LS.IndVarBase);
+ Value *TakeBackedgeLoopCond = B.CreateICmp(Pred, IndVarBase, ExitSubloopAt);
Value *CondForBranch = LS.LatchBrExitIdx == 1
? TakeBackedgeLoopCond
@@ -1285,7 +1298,8 @@ LoopConstrainer::RewrittenRangeInfo LoopConstrainer::changeIterationSpaceEnd(
// IterationsLeft - are there any more iterations left, given the original
// upper bound on the induction variable? If not, we branch to the "real"
// exit.
- Value *IterationsLeft = B.CreateICmp(Pred, LS.IndVarBase, LS.LoopExitAt);
+ Value *LoopExitAt = NoopOrExt(LS.LoopExitAt);
+ Value *IterationsLeft = B.CreateICmp(Pred, IndVarBase, LoopExitAt);
B.CreateCondBr(IterationsLeft, RRI.PseudoExit, LS.LatchExit);
BranchInst *BranchToContinuation =
@@ -1304,15 +1318,14 @@ LoopConstrainer::RewrittenRangeInfo LoopConstrainer::changeIterationSpaceEnd(
RRI.PHIValuesAtPseudoExit.push_back(NewPHI);
}
- RRI.IndVarEnd = PHINode::Create(LS.IndVarBase->getType(), 2, "indvar.end",
+ RRI.IndVarEnd = PHINode::Create(IndVarBase->getType(), 2, "indvar.end",
BranchToContinuation);
- RRI.IndVarEnd->addIncoming(LS.IndVarStart, Preheader);
- RRI.IndVarEnd->addIncoming(LS.IndVarBase, RRI.ExitSelector);
+ RRI.IndVarEnd->addIncoming(IndVarStart, Preheader);
+ RRI.IndVarEnd->addIncoming(IndVarBase, RRI.ExitSelector);
// The latch exit now has a branch from `RRI.ExitSelector' instead of
// `LS.Latch'. The PHI nodes need to be updated to reflect that.
- for (PHINode &PN : LS.LatchExit->phis())
- replacePHIBlock(&PN, LS.Latch, RRI.ExitSelector);
+ LS.LatchExit->replacePhiUsesWith(LS.Latch, RRI.ExitSelector);
return RRI;
}
@@ -1322,9 +1335,8 @@ void LoopConstrainer::rewriteIncomingValuesForPHIs(
const LoopConstrainer::RewrittenRangeInfo &RRI) const {
unsigned PHIIndex = 0;
for (PHINode &PN : LS.Header->phis())
- for (unsigned i = 0, e = PN.getNumIncomingValues(); i < e; ++i)
- if (PN.getIncomingBlock(i) == ContinuationBlock)
- PN.setIncomingValue(i, RRI.PHIValuesAtPseudoExit[PHIIndex++]);
+ PN.setIncomingValueForBlock(ContinuationBlock,
+ RRI.PHIValuesAtPseudoExit[PHIIndex++]);
LS.IndVarStart = RRI.IndVarEnd;
}
@@ -1335,9 +1347,7 @@ BasicBlock *LoopConstrainer::createPreheader(const LoopStructure &LS,
BasicBlock *Preheader = BasicBlock::Create(Ctx, Tag, &F, LS.Header);
BranchInst::Create(LS.Header, Preheader);
- for (PHINode &PN : LS.Header->phis())
- for (unsigned i = 0, e = PN.getNumIncomingValues(); i < e; ++i)
- replacePHIBlock(&PN, OldPreheader, Preheader);
+ LS.Header->replacePhiUsesWith(OldPreheader, Preheader);
return Preheader;
}
@@ -1393,7 +1403,7 @@ bool LoopConstrainer::run() {
SubRanges SR = MaybeSR.getValue();
bool Increasing = MainLoopStructure.IndVarIncreasing;
IntegerType *IVTy =
- cast<IntegerType>(MainLoopStructure.IndVarBase->getType());
+ cast<IntegerType>(Range.getBegin()->getType());
SCEVExpander Expander(SE, F.getParent()->getDataLayout(), "irce");
Instruction *InsertPt = OriginalPreheader->getTerminator();
@@ -1534,7 +1544,7 @@ bool LoopConstrainer::run() {
// This function canonicalizes the loop into Loop-Simplify and LCSSA forms.
auto CanonicalizeLoop = [&] (Loop *L, bool IsOriginalLoop) {
formLCSSARecursively(*L, DT, &LI, &SE);
- simplifyLoop(L, &DT, &LI, &SE, nullptr, true);
+ simplifyLoop(L, &DT, &LI, &SE, nullptr, nullptr, true);
// Pre/post loops are slow paths, we do not need to perform any loop
// optimizations on them.
if (!IsOriginalLoop)
@@ -1556,6 +1566,12 @@ Optional<InductiveRangeCheck::Range>
InductiveRangeCheck::computeSafeIterationSpace(
ScalarEvolution &SE, const SCEVAddRecExpr *IndVar,
bool IsLatchSigned) const {
+ // We can deal when types of latch check and range checks don't match in case
+ // if latch check is more narrow.
+ auto *IVType = cast<IntegerType>(IndVar->getType());
+ auto *RCType = cast<IntegerType>(getBegin()->getType());
+ if (IVType->getBitWidth() > RCType->getBitWidth())
+ return None;
// IndVar is of the form "A + B * I" (where "I" is the canonical induction
// variable, that may or may not exist as a real llvm::Value in the loop) and
// this inductive range check is a range check on the "C + D * I" ("C" is
@@ -1579,8 +1595,9 @@ InductiveRangeCheck::computeSafeIterationSpace(
if (!IndVar->isAffine())
return None;
- const SCEV *A = IndVar->getStart();
- const SCEVConstant *B = dyn_cast<SCEVConstant>(IndVar->getStepRecurrence(SE));
+ const SCEV *A = NoopOrExtend(IndVar->getStart(), RCType, SE, IsLatchSigned);
+ const SCEVConstant *B = dyn_cast<SCEVConstant>(
+ NoopOrExtend(IndVar->getStepRecurrence(SE), RCType, SE, IsLatchSigned));
if (!B)
return None;
assert(!B->isZero() && "Recurrence with zero step?");
@@ -1591,7 +1608,7 @@ InductiveRangeCheck::computeSafeIterationSpace(
return None;
assert(!D->getValue()->isZero() && "Recurrence with zero step?");
- unsigned BitWidth = cast<IntegerType>(IndVar->getType())->getBitWidth();
+ unsigned BitWidth = RCType->getBitWidth();
const SCEV *SIntMax = SE.getConstant(APInt::getSignedMaxValue(BitWidth));
// Subtract Y from X so that it does not go through border of the IV
diff --git a/lib/Transforms/Scalar/InferAddressSpaces.cpp b/lib/Transforms/Scalar/InferAddressSpaces.cpp
index fbbc09eb487f..5f0e2001c73d 100644
--- a/lib/Transforms/Scalar/InferAddressSpaces.cpp
+++ b/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -1,9 +1,8 @@
//===- InferAddressSpace.cpp - --------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -149,7 +148,9 @@ class InferAddressSpaces : public FunctionPass {
public:
static char ID;
- InferAddressSpaces() : FunctionPass(ID) {}
+ InferAddressSpaces() :
+ FunctionPass(ID), FlatAddrSpace(UninitializedAddressSpace) {}
+ InferAddressSpaces(unsigned AS) : FunctionPass(ID), FlatAddrSpace(AS) {}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
@@ -218,13 +219,17 @@ static bool isAddressExpression(const Value &V) {
if (!isa<Operator>(V))
return false;
- switch (cast<Operator>(V).getOpcode()) {
+ const Operator &Op = cast<Operator>(V);
+ switch (Op.getOpcode()) {
case Instruction::PHI:
+ assert(Op.getType()->isPointerTy());
+ return true;
case Instruction::BitCast:
case Instruction::AddrSpaceCast:
case Instruction::GetElementPtr:
- case Instruction::Select:
return true;
+ case Instruction::Select:
+ return Op.getType()->isPointerTy();
default:
return false;
}
@@ -548,10 +553,17 @@ static Value *cloneConstantExprWithNewAddressSpace(
if (Value *NewOperand = ValueWithNewAddrSpace.lookup(Operand)) {
IsNew = true;
NewOperands.push_back(cast<Constant>(NewOperand));
- } else {
- // Otherwise, reuses the old operand.
- NewOperands.push_back(Operand);
+ continue;
}
+ if (auto CExpr = dyn_cast<ConstantExpr>(Operand))
+ if (Value *NewOperand = cloneConstantExprWithNewAddressSpace(
+ CExpr, NewAddrSpace, ValueWithNewAddrSpace)) {
+ IsNew = true;
+ NewOperands.push_back(cast<Constant>(NewOperand));
+ continue;
+ }
+ // Otherwise, reuses the old operand.
+ NewOperands.push_back(Operand);
}
// If !IsNew, we will replace the Value with itself. However, replaced values
@@ -621,9 +633,12 @@ bool InferAddressSpaces::runOnFunction(Function &F) {
const TargetTransformInfo &TTI =
getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- FlatAddrSpace = TTI.getFlatAddressSpace();
- if (FlatAddrSpace == UninitializedAddressSpace)
- return false;
+
+ if (FlatAddrSpace == UninitializedAddressSpace) {
+ FlatAddrSpace = TTI.getFlatAddressSpace();
+ if (FlatAddrSpace == UninitializedAddressSpace)
+ return false;
+ }
// Collects all flat address expressions in postorder.
std::vector<WeakTrackingVH> Postorder = collectFlatAddressExpressions(F);
@@ -991,8 +1006,12 @@ bool InferAddressSpaces::rewriteWithNewAddressSpaces(
}
// Otherwise, replaces the use with flat(NewV).
- if (Instruction *I = dyn_cast<Instruction>(V)) {
- BasicBlock::iterator InsertPos = std::next(I->getIterator());
+ if (Instruction *Inst = dyn_cast<Instruction>(V)) {
+ // Don't create a copy of the original addrspacecast.
+ if (U == V && isa<AddrSpaceCastInst>(V))
+ continue;
+
+ BasicBlock::iterator InsertPos = std::next(Inst->getIterator());
while (isa<PHINode>(InsertPos))
++InsertPos;
U.set(new AddrSpaceCastInst(NewV, V->getType(), "", &*InsertPos));
@@ -1015,6 +1034,6 @@ bool InferAddressSpaces::rewriteWithNewAddressSpaces(
return true;
}
-FunctionPass *llvm::createInferAddressSpacesPass() {
- return new InferAddressSpaces();
+FunctionPass *llvm::createInferAddressSpacesPass(unsigned AddressSpace) {
+ return new InferAddressSpaces(AddressSpace);
}
diff --git a/lib/Transforms/Scalar/InstSimplifyPass.cpp b/lib/Transforms/Scalar/InstSimplifyPass.cpp
index 05cd48d83267..6616364ab203 100644
--- a/lib/Transforms/Scalar/InstSimplifyPass.cpp
+++ b/lib/Transforms/Scalar/InstSimplifyPass.cpp
@@ -1,9 +1,8 @@
//===- InstSimplifyPass.cpp -----------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index 48de56a02834..b86bf2fefbe5 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -1,9 +1,8 @@
//===- JumpThreading.cpp - Thread control through conditional blocks ------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -24,6 +23,7 @@
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/GuardUtils.h"
#include "llvm/Analysis/InstructionSimplify.h"
@@ -38,7 +38,6 @@
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DomTreeUpdater.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstrTypes.h"
@@ -103,6 +102,12 @@ static cl::opt<bool> PrintLVIAfterJumpThreading(
cl::desc("Print the LazyValueInfo cache after JumpThreading"), cl::init(false),
cl::Hidden);
+static cl::opt<bool> ThreadAcrossLoopHeaders(
+ "jump-threading-across-loop-headers",
+ cl::desc("Allow JumpThreading to thread across loop headers, for testing"),
+ cl::init(false), cl::Hidden);
+
+
namespace {
/// This pass performs 'jump threading', which looks at blocks that have
@@ -369,7 +374,8 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
if (!DT.isReachableFromEntry(&BB))
Unreachable.insert(&BB);
- FindLoopHeaders(F);
+ if (!ThreadAcrossLoopHeaders)
+ FindLoopHeaders(F);
bool EverChanged = false;
bool Changed;
@@ -1056,7 +1062,7 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
Condition = IB->getAddress()->stripPointerCasts();
Preference = WantBlockAddress;
} else {
- return false; // Must be an invoke.
+ return false; // Must be an invoke or callbr.
}
// Run constant folding to see if we can reduce the condition to a simple
@@ -1092,7 +1098,7 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
<< "' folding undef terminator: " << *BBTerm << '\n');
BranchInst::Create(BBTerm->getSuccessor(BestSucc), BBTerm);
BBTerm->eraseFromParent();
- DTU->applyUpdates(Updates);
+ DTU->applyUpdatesPermissive(Updates);
return true;
}
@@ -1143,7 +1149,9 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
unsigned ToKeep = Ret == LazyValueInfo::True ? 0 : 1;
BasicBlock *ToRemoveSucc = CondBr->getSuccessor(ToRemove);
ToRemoveSucc->removePredecessor(BB, true);
- BranchInst::Create(CondBr->getSuccessor(ToKeep), CondBr);
+ BranchInst *UncondBr =
+ BranchInst::Create(CondBr->getSuccessor(ToKeep), CondBr);
+ UncondBr->setDebugLoc(CondBr->getDebugLoc());
CondBr->eraseFromParent();
if (CondCmp->use_empty())
CondCmp->eraseFromParent();
@@ -1160,7 +1168,8 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
ConstantInt::getFalse(CondCmp->getType());
ReplaceFoldableUses(CondCmp, CI);
}
- DTU->deleteEdgeRelaxed(BB, ToRemoveSucc);
+ DTU->applyUpdatesPermissive(
+ {{DominatorTree::Delete, BB, ToRemoveSucc}});
return true;
}
@@ -1172,7 +1181,8 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
}
if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator()))
- TryToUnfoldSelect(SI, BB);
+ if (TryToUnfoldSelect(SI, BB))
+ return true;
// Check for some cases that are worth simplifying. Right now we want to look
// for loads that are used by a switch or by the condition for the branch. If
@@ -1245,9 +1255,10 @@ bool JumpThreadingPass::ProcessImpliedCondition(BasicBlock *BB) {
BasicBlock *KeepSucc = BI->getSuccessor(*Implication ? 0 : 1);
BasicBlock *RemoveSucc = BI->getSuccessor(*Implication ? 1 : 0);
RemoveSucc->removePredecessor(BB);
- BranchInst::Create(KeepSucc, BI);
+ BranchInst *UncondBI = BranchInst::Create(KeepSucc, BI);
+ UncondBI->setDebugLoc(BI->getDebugLoc());
BI->eraseFromParent();
- DTU->deleteEdgeRelaxed(BB, RemoveSucc);
+ DTU->applyUpdatesPermissive({{DominatorTree::Delete, BB, RemoveSucc}});
return true;
}
CurrentBB = CurrentPred;
@@ -1429,7 +1440,9 @@ bool JumpThreadingPass::SimplifyPartiallyRedundantLoad(LoadInst *LoadI) {
// Add all the unavailable predecessors to the PredsToSplit list.
for (BasicBlock *P : predecessors(LoadBB)) {
// If the predecessor is an indirect goto, we can't split the edge.
- if (isa<IndirectBrInst>(P->getTerminator()))
+ // Same for CallBr.
+ if (isa<IndirectBrInst>(P->getTerminator()) ||
+ isa<CallBrInst>(P->getTerminator()))
return false;
if (!AvailablePredSet.count(P))
@@ -1446,11 +1459,11 @@ bool JumpThreadingPass::SimplifyPartiallyRedundantLoad(LoadInst *LoadI) {
if (UnavailablePred) {
assert(UnavailablePred->getTerminator()->getNumSuccessors() == 1 &&
"Can't handle critical edge here!");
- LoadInst *NewVal =
- new LoadInst(LoadedPtr->DoPHITranslation(LoadBB, UnavailablePred),
- LoadI->getName() + ".pr", false, LoadI->getAlignment(),
- LoadI->getOrdering(), LoadI->getSyncScopeID(),
- UnavailablePred->getTerminator());
+ LoadInst *NewVal = new LoadInst(
+ LoadI->getType(), LoadedPtr->DoPHITranslation(LoadBB, UnavailablePred),
+ LoadI->getName() + ".pr", false, LoadI->getAlignment(),
+ LoadI->getOrdering(), LoadI->getSyncScopeID(),
+ UnavailablePred->getTerminator());
NewVal->setDebugLoc(LoadI->getDebugLoc());
if (AATags)
NewVal->setAAMetadata(AATags);
@@ -1474,8 +1487,7 @@ bool JumpThreadingPass::SimplifyPartiallyRedundantLoad(LoadInst *LoadI) {
for (pred_iterator PI = PB; PI != PE; ++PI) {
BasicBlock *P = *PI;
AvailablePredsTy::iterator I =
- std::lower_bound(AvailablePreds.begin(), AvailablePreds.end(),
- std::make_pair(P, (Value*)nullptr));
+ llvm::lower_bound(AvailablePreds, std::make_pair(P, (Value *)nullptr));
assert(I != AvailablePreds.end() && I->first == P &&
"Didn't find entry for predecessor!");
@@ -1601,7 +1613,6 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
Constant *OnlyVal = nullptr;
Constant *MultipleVal = (Constant *)(intptr_t)~0ULL;
- unsigned PredWithKnownDest = 0;
for (const auto &PredValue : PredValues) {
BasicBlock *Pred = PredValue.second;
if (!SeenPreds.insert(Pred).second)
@@ -1638,12 +1649,10 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
OnlyVal = MultipleVal;
}
- // We know where this predecessor is going.
- ++PredWithKnownDest;
-
// If the predecessor ends with an indirect goto, we can't change its
- // destination.
- if (isa<IndirectBrInst>(Pred->getTerminator()))
+ // destination. Same for CallBr.
+ if (isa<IndirectBrInst>(Pred->getTerminator()) ||
+ isa<CallBrInst>(Pred->getTerminator()))
continue;
PredToDestList.push_back(std::make_pair(Pred, DestBB));
@@ -1657,7 +1666,7 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
// not thread. By doing so, we do not need to duplicate the current block and
// also miss potential opportunities in case we dont/cant duplicate.
if (OnlyDest && OnlyDest != MultipleDestSentinel) {
- if (PredWithKnownDest == (size_t)pred_size(BB)) {
+ if (BB->hasNPredecessors(PredToDestList.size())) {
bool SeenFirstBranchToOnlyDest = false;
std::vector <DominatorTree::UpdateType> Updates;
Updates.reserve(BB->getTerminator()->getNumSuccessors() - 1);
@@ -1674,7 +1683,7 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
Instruction *Term = BB->getTerminator();
BranchInst::Create(OnlyDest, Term);
Term->eraseFromParent();
- DTU->applyUpdates(Updates);
+ DTU->applyUpdatesPermissive(Updates);
// If the condition is now dead due to the removal of the old terminator,
// erase it.
@@ -1976,8 +1985,14 @@ bool JumpThreadingPass::ThreadEdge(BasicBlock *BB,
}
BasicBlock::iterator BI = BB->begin();
- for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
- ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
+ // Clone the phi nodes of BB into NewBB. The resulting phi nodes are trivial,
+ // since NewBB only has one predecessor, but SSAUpdater might need to rewrite
+ // the operand of the cloned phi.
+ for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI) {
+ PHINode *NewPN = PHINode::Create(PN->getType(), 1, PN->getName(), NewBB);
+ NewPN->addIncoming(PN->getIncomingValueForBlock(PredBB), PredBB);
+ ValueMapping[PN] = NewPN;
+ }
// Clone the non-phi instructions of BB into NewBB, keeping track of the
// mapping and using it to remap operands in the cloned instructions.
@@ -2016,9 +2031,9 @@ bool JumpThreadingPass::ThreadEdge(BasicBlock *BB,
}
// Enqueue required DT updates.
- DTU->applyUpdates({{DominatorTree::Insert, NewBB, SuccBB},
- {DominatorTree::Insert, PredBB, NewBB},
- {DominatorTree::Delete, PredBB, BB}});
+ DTU->applyUpdatesPermissive({{DominatorTree::Insert, NewBB, SuccBB},
+ {DominatorTree::Insert, PredBB, NewBB},
+ {DominatorTree::Delete, PredBB, BB}});
// If there were values defined in BB that are used outside the block, then we
// now have to update all uses of the value to use either the original value,
@@ -2112,7 +2127,7 @@ BasicBlock *JumpThreadingPass::SplitBlockPreds(BasicBlock *BB,
BFI->setBlockFreq(NewBB, NewBBFreq.getFrequency());
}
- DTU->applyUpdates(Updates);
+ DTU->applyUpdatesPermissive(Updates);
return NewBBs[0];
}
@@ -2385,7 +2400,7 @@ bool JumpThreadingPass::DuplicateCondBranchOnPHIIntoPred(
// Remove the unconditional branch at the end of the PredBB block.
OldPredBranch->eraseFromParent();
- DTU->applyUpdates(Updates);
+ DTU->applyUpdatesPermissive(Updates);
++NumDupes;
return true;
@@ -2421,8 +2436,8 @@ void JumpThreadingPass::UnfoldSelectInstr(BasicBlock *Pred, BasicBlock *BB,
// The select is now dead.
SI->eraseFromParent();
- DTU->applyUpdates({{DominatorTree::Insert, NewBB, BB},
- {DominatorTree::Insert, Pred, NewBB}});
+ DTU->applyUpdatesPermissive({{DominatorTree::Insert, NewBB, BB},
+ {DominatorTree::Insert, Pred, NewBB}});
// Update any other PHI nodes in BB.
for (BasicBlock::iterator BI = BB->begin();
@@ -2599,7 +2614,7 @@ bool JumpThreadingPass::TryToUnfoldSelectInCurrBB(BasicBlock *BB) {
Updates.push_back({DominatorTree::Delete, BB, Succ});
Updates.push_back({DominatorTree::Insert, SplitBB, Succ});
}
- DTU->applyUpdates(Updates);
+ DTU->applyUpdatesPermissive(Updates);
return true;
}
return false;
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index d204654c3915..d9dda4cef2d2 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -1,9 +1,8 @@
//===-- LICM.cpp - Loop Invariant Code Motion Pass ------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -55,6 +54,7 @@
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
@@ -107,17 +107,29 @@ static cl::opt<int>
LICMN2Theshold("licm-n2-threshold", cl::Hidden, cl::init(0),
cl::desc("How many instruction to cross product using AA"));
-// Experimental option to allow imprecision in LICM (use MemorySSA cap) in
-// pathological cases, in exchange for faster compile. This is to be removed
-// if MemorySSA starts to address the same issue. This flag applies only when
-// LICM uses MemorySSA instead on AliasSetTracker. When the flag is disabled
-// (default), LICM calls MemorySSAWalker's getClobberingMemoryAccess, which
-// gets perfect accuracy. When flag is enabled, LICM will call into MemorySSA's
-// getDefiningAccess, which may not be precise, since optimizeUses is capped.
-static cl::opt<bool> EnableLicmCap(
- "enable-licm-cap", cl::init(false), cl::Hidden,
- cl::desc("Enable imprecision in LICM (uses MemorySSA cap) in "
- "pathological cases, in exchange for faster compile"));
+// Experimental option to allow imprecision in LICM in pathological cases, in
+// exchange for faster compile. This is to be removed if MemorySSA starts to
+// address the same issue. This flag applies only when LICM uses MemorySSA
+// instead on AliasSetTracker. LICM calls MemorySSAWalker's
+// getClobberingMemoryAccess, up to the value of the Cap, getting perfect
+// accuracy. Afterwards, LICM will call into MemorySSA's getDefiningAccess,
+// which may not be precise, since optimizeUses is capped. The result is
+// correct, but we may not get as "far up" as possible to get which access is
+// clobbering the one queried.
+cl::opt<unsigned> llvm::SetLicmMssaOptCap(
+ "licm-mssa-optimization-cap", cl::init(100), cl::Hidden,
+ cl::desc("Enable imprecision in LICM in pathological cases, in exchange "
+ "for faster compile. Caps the MemorySSA clobbering calls."));
+
+// Experimentally, memory promotion carries less importance than sinking and
+// hoisting. Limit when we do promotion when using MemorySSA, in order to save
+// compile time.
+cl::opt<unsigned> llvm::SetLicmMssaNoAccForPromotionCap(
+ "licm-mssa-max-acc-promotion", cl::init(250), cl::Hidden,
+ cl::desc("[LICM & MemorySSA] When MSSA in LICM is disabled, this has no "
+ "effect. When MSSA in LICM is enabled, then this is the maximum "
+ "number of accesses allowed to be present in a loop in order to "
+ "enable memory promotion."));
static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI);
static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
@@ -128,8 +140,7 @@ static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE);
static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo,
- MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE,
- bool FreeInLoop);
+ MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE);
static bool isSafeToExecuteUnconditionally(Instruction &Inst,
const DominatorTree *DT,
const Loop *CurLoop,
@@ -140,7 +151,8 @@ static bool pointerInvalidatedByLoop(MemoryLocation MemLoc,
AliasSetTracker *CurAST, Loop *CurLoop,
AliasAnalysis *AA);
static bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU,
- Loop *CurLoop);
+ Loop *CurLoop,
+ SinkAndHoistLICMFlags &Flags);
static Instruction *CloneInstructionInExitBlock(
Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI,
const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU);
@@ -149,7 +161,8 @@ static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
AliasSetTracker *AST, MemorySSAUpdater *MSSAU);
static void moveInstructionBefore(Instruction &I, Instruction &Dest,
- ICFLoopSafetyInfo &SafetyInfo);
+ ICFLoopSafetyInfo &SafetyInfo,
+ MemorySSAUpdater *MSSAU);
namespace {
struct LoopInvariantCodeMotion {
@@ -160,17 +173,29 @@ struct LoopInvariantCodeMotion {
OptimizationRemarkEmitter *ORE, bool DeleteAST);
ASTrackerMapTy &getLoopToAliasSetMap() { return LoopToAliasSetMap; }
+ LoopInvariantCodeMotion(unsigned LicmMssaOptCap,
+ unsigned LicmMssaNoAccForPromotionCap)
+ : LicmMssaOptCap(LicmMssaOptCap),
+ LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap) {}
private:
ASTrackerMapTy LoopToAliasSetMap;
+ unsigned LicmMssaOptCap;
+ unsigned LicmMssaNoAccForPromotionCap;
std::unique_ptr<AliasSetTracker>
collectAliasInfoForLoop(Loop *L, LoopInfo *LI, AliasAnalysis *AA);
+ std::unique_ptr<AliasSetTracker>
+ collectAliasInfoForLoopWithMSSA(Loop *L, AliasAnalysis *AA,
+ MemorySSAUpdater *MSSAU);
};
struct LegacyLICMPass : public LoopPass {
static char ID; // Pass identification, replacement for typeid
- LegacyLICMPass() : LoopPass(ID) {
+ LegacyLICMPass(
+ unsigned LicmMssaOptCap = SetLicmMssaOptCap,
+ unsigned LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap)
+ : LoopPass(ID), LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap) {
initializeLegacyLICMPassPass(*PassRegistry::getPassRegistry());
}
@@ -219,8 +244,16 @@ struct LegacyLICMPass : public LoopPass {
using llvm::Pass::doFinalization;
bool doFinalization() override {
- assert(LICM.getLoopToAliasSetMap().empty() &&
+ auto &AliasSetMap = LICM.getLoopToAliasSetMap();
+ // All loops in the AliasSetMap should be cleaned up already. The only case
+ // where we fail to do so is if an outer loop gets deleted before LICM
+ // visits it.
+ assert(all_of(AliasSetMap,
+ [](LoopInvariantCodeMotion::ASTrackerMapTy::value_type &KV) {
+ return !KV.first->getParentLoop();
+ }) &&
"Didn't free loop alias sets");
+ AliasSetMap.clear();
return false;
}
@@ -252,7 +285,7 @@ PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM,
report_fatal_error("LICM: OptimizationRemarkEmitterAnalysis not "
"cached at a higher level");
- LoopInvariantCodeMotion LICM;
+ LoopInvariantCodeMotion LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap);
if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, &AR.TLI, &AR.TTI, &AR.SE,
AR.MSSA, ORE, true))
return PreservedAnalyses::all();
@@ -261,6 +294,8 @@ PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM,
PA.preserve<DominatorTreeAnalysis>();
PA.preserve<LoopAnalysis>();
+ if (EnableMSSALoopDependency)
+ PA.preserve<MemorySSAAnalysis>();
return PA;
}
@@ -276,6 +311,10 @@ INITIALIZE_PASS_END(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false,
false)
Pass *llvm::createLICMPass() { return new LegacyLICMPass(); }
+Pass *llvm::createLICMPass(unsigned LicmMssaOptCap,
+ unsigned LicmMssaNoAccForPromotionCap) {
+ return new LegacyLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap);
+}
/// Hoist expressions out of the specified loop. Note, alias info for inner
/// loop is not preserved so it is not a good idea to run LICM multiple
@@ -293,12 +332,31 @@ bool LoopInvariantCodeMotion::runOnLoop(
std::unique_ptr<AliasSetTracker> CurAST;
std::unique_ptr<MemorySSAUpdater> MSSAU;
+ bool NoOfMemAccTooLarge = false;
+ unsigned LicmMssaOptCounter = 0;
+
if (!MSSA) {
LLVM_DEBUG(dbgs() << "LICM: Using Alias Set Tracker.\n");
CurAST = collectAliasInfoForLoop(L, LI, AA);
} else {
- LLVM_DEBUG(dbgs() << "LICM: Using MemorySSA. Promotion disabled.\n");
+ LLVM_DEBUG(dbgs() << "LICM: Using MemorySSA.\n");
MSSAU = make_unique<MemorySSAUpdater>(MSSA);
+
+ unsigned AccessCapCount = 0;
+ for (auto *BB : L->getBlocks()) {
+ if (auto *Accesses = MSSA->getBlockAccesses(BB)) {
+ for (const auto &MA : *Accesses) {
+ (void)MA;
+ AccessCapCount++;
+ if (AccessCapCount > LicmMssaNoAccForPromotionCap) {
+ NoOfMemAccTooLarge = true;
+ break;
+ }
+ }
+ }
+ if (NoOfMemAccTooLarge)
+ break;
+ }
}
// Get the preheader block to move instructions into...
@@ -317,13 +375,16 @@ bool LoopInvariantCodeMotion::runOnLoop(
// that we are guaranteed to see definitions before we see uses. This allows
// us to sink instructions in one pass, without iteration. After sinking
// instructions, we perform another pass to hoist them out of the loop.
- //
+ SinkAndHoistLICMFlags Flags = {NoOfMemAccTooLarge, LicmMssaOptCounter,
+ LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
+ /*IsSink=*/true};
if (L->hasDedicatedExits())
Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L,
- CurAST.get(), MSSAU.get(), &SafetyInfo, ORE);
+ CurAST.get(), MSSAU.get(), &SafetyInfo, Flags, ORE);
+ Flags.IsSink = false;
if (Preheader)
Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L,
- CurAST.get(), MSSAU.get(), &SafetyInfo, ORE);
+ CurAST.get(), MSSAU.get(), &SafetyInfo, Flags, ORE);
// Now that all loop invariants have been removed from the loop, promote any
// memory references to scalars that we can.
@@ -332,7 +393,8 @@ bool LoopInvariantCodeMotion::runOnLoop(
// make sure we catch that. An additional load may be generated in the
// preheader for SSA updater, so also avoid sinking when no preheader
// is available.
- if (!DisablePromotion && Preheader && L->hasDedicatedExits()) {
+ if (!DisablePromotion && Preheader && L->hasDedicatedExits() &&
+ !NoOfMemAccTooLarge) {
// Figure out the loop exits and their insertion points
SmallVector<BasicBlock *, 8> ExitBlocks;
L->getUniqueExitBlocks(ExitBlocks);
@@ -344,38 +406,45 @@ bool LoopInvariantCodeMotion::runOnLoop(
if (!HasCatchSwitch) {
SmallVector<Instruction *, 8> InsertPts;
+ SmallVector<MemoryAccess *, 8> MSSAInsertPts;
InsertPts.reserve(ExitBlocks.size());
- for (BasicBlock *ExitBlock : ExitBlocks)
+ if (MSSAU)
+ MSSAInsertPts.reserve(ExitBlocks.size());
+ for (BasicBlock *ExitBlock : ExitBlocks) {
InsertPts.push_back(&*ExitBlock->getFirstInsertionPt());
+ if (MSSAU)
+ MSSAInsertPts.push_back(nullptr);
+ }
PredIteratorCache PIC;
bool Promoted = false;
- if (CurAST.get()) {
- // Loop over all of the alias sets in the tracker object.
- for (AliasSet &AS : *CurAST) {
- // We can promote this alias set if it has a store, if it is a "Must"
- // alias set, if the pointer is loop invariant, and if we are not
- // eliminating any volatile loads or stores.
- if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() ||
- !L->isLoopInvariant(AS.begin()->getValue()))
- continue;
-
- assert(
- !AS.empty() &&
- "Must alias set should have at least one pointer element in it!");
-
- SmallSetVector<Value *, 8> PointerMustAliases;
- for (const auto &ASI : AS)
- PointerMustAliases.insert(ASI.getValue());
-
- Promoted |= promoteLoopAccessesToScalars(
- PointerMustAliases, ExitBlocks, InsertPts, PIC, LI, DT, TLI, L,
- CurAST.get(), &SafetyInfo, ORE);
- }
+ // Build an AST using MSSA.
+ if (!CurAST.get())
+ CurAST = collectAliasInfoForLoopWithMSSA(L, AA, MSSAU.get());
+
+ // Loop over all of the alias sets in the tracker object.
+ for (AliasSet &AS : *CurAST) {
+ // We can promote this alias set if it has a store, if it is a "Must"
+ // alias set, if the pointer is loop invariant, and if we are not
+ // eliminating any volatile loads or stores.
+ if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() ||
+ !L->isLoopInvariant(AS.begin()->getValue()))
+ continue;
+
+ assert(
+ !AS.empty() &&
+ "Must alias set should have at least one pointer element in it!");
+
+ SmallSetVector<Value *, 8> PointerMustAliases;
+ for (const auto &ASI : AS)
+ PointerMustAliases.insert(ASI.getValue());
+
+ Promoted |= promoteLoopAccessesToScalars(
+ PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, LI,
+ DT, TLI, L, CurAST.get(), MSSAU.get(), &SafetyInfo, ORE);
}
- // FIXME: Promotion initially disabled when using MemorySSA.
// Once we have promoted values across the loop body we have to
// recursively reform LCSSA as any nested loop may now have values defined
@@ -399,7 +468,7 @@ bool LoopInvariantCodeMotion::runOnLoop(
// If this loop is nested inside of another one, save the alias information
// for when we process the outer loop.
- if (CurAST.get() && L->getParentLoop() && !DeleteAST)
+ if (!MSSAU.get() && CurAST.get() && L->getParentLoop() && !DeleteAST)
LoopToAliasSetMap[L] = std::move(CurAST);
if (MSSAU.get() && VerifyMemorySSA)
@@ -420,6 +489,7 @@ bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
TargetTransformInfo *TTI, Loop *CurLoop,
AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
ICFLoopSafetyInfo *SafetyInfo,
+ SinkAndHoistLICMFlags &Flags,
OptimizationRemarkEmitter *ORE) {
// Verify inputs.
@@ -463,9 +533,10 @@ bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
//
bool FreeInLoop = false;
if (isNotUsedOrFreeInLoop(I, CurLoop, SafetyInfo, TTI, FreeInLoop) &&
- canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, ORE) &&
+ canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, &Flags,
+ ORE) &&
!I.mayHaveSideEffects()) {
- if (sink(I, LI, DT, CurLoop, SafetyInfo, MSSAU, ORE, FreeInLoop)) {
+ if (sink(I, LI, DT, CurLoop, SafetyInfo, MSSAU, ORE)) {
if (!FreeInLoop) {
++II;
eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
@@ -718,6 +789,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop,
AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
ICFLoopSafetyInfo *SafetyInfo,
+ SinkAndHoistLICMFlags &Flags,
OptimizationRemarkEmitter *ORE) {
// Verify inputs.
assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
@@ -770,7 +842,8 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
// and we have accurately duplicated the control flow from the loop header
// to that block.
if (CurLoop->hasLoopInvariantOperands(&I) &&
- canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, ORE) &&
+ canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, &Flags,
+ ORE) &&
isSafeToExecuteUnconditionally(
I, DT, CurLoop, SafetyInfo, ORE,
CurLoop->getLoopPreheader()->getTerminator())) {
@@ -808,13 +881,18 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
continue;
}
- using namespace PatternMatch;
- if (((I.use_empty() &&
- match(&I, m_Intrinsic<Intrinsic::invariant_start>())) ||
- isGuard(&I)) &&
+ auto IsInvariantStart = [&](Instruction &I) {
+ using namespace PatternMatch;
+ return I.use_empty() &&
+ match(&I, m_Intrinsic<Intrinsic::invariant_start>());
+ };
+ auto MustExecuteWithoutWritesBefore = [&](Instruction &I) {
+ return SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop) &&
+ SafetyInfo->doesNotWriteMemoryBefore(I, CurLoop);
+ };
+ if ((IsInvariantStart(I) || isGuard(&I)) &&
CurLoop->hasLoopInvariantOperands(&I) &&
- SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop) &&
- SafetyInfo->doesNotWriteMemoryBefore(I, CurLoop)) {
+ MustExecuteWithoutWritesBefore(I)) {
hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
MSSAU, ORE);
HoistedInstructions.push_back(&I);
@@ -867,7 +945,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
LLVM_DEBUG(dbgs() << "LICM rehoisting to "
<< HoistPoint->getParent()->getName()
<< ": " << *I << "\n");
- moveInstructionBefore(*I, *HoistPoint, *SafetyInfo);
+ moveInstructionBefore(*I, *HoistPoint, *SafetyInfo, MSSAU);
HoistPoint = I;
Changed = true;
}
@@ -897,8 +975,7 @@ static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT,
Loop *CurLoop) {
Value *Addr = LI->getOperand(0);
const DataLayout &DL = LI->getModule()->getDataLayout();
- const uint32_t LocSizeInBits = DL.getTypeSizeInBits(
- cast<PointerType>(Addr->getType())->getElementType());
+ const uint32_t LocSizeInBits = DL.getTypeSizeInBits(LI->getType());
// if the type is i8 addrspace(x)*, we know this is the type of
// llvm.invariant.start operand
@@ -945,16 +1022,15 @@ static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT,
namespace {
/// Return true if-and-only-if we know how to (mechanically) both hoist and
/// sink a given instruction out of a loop. Does not address legality
-/// concerns such as aliasing or speculation safety.
+/// concerns such as aliasing or speculation safety.
bool isHoistableAndSinkableInst(Instruction &I) {
// Only these instructions are hoistable/sinkable.
- return (isa<LoadInst>(I) || isa<StoreInst>(I) ||
- isa<CallInst>(I) || isa<FenceInst>(I) ||
- isa<BinaryOperator>(I) || isa<CastInst>(I) ||
- isa<SelectInst>(I) || isa<GetElementPtrInst>(I) ||
- isa<CmpInst>(I) || isa<InsertElementInst>(I) ||
- isa<ExtractElementInst>(I) || isa<ShuffleVectorInst>(I) ||
- isa<ExtractValueInst>(I) || isa<InsertValueInst>(I));
+ return (isa<LoadInst>(I) || isa<StoreInst>(I) || isa<CallInst>(I) ||
+ isa<FenceInst>(I) || isa<BinaryOperator>(I) || isa<CastInst>(I) ||
+ isa<SelectInst>(I) || isa<GetElementPtrInst>(I) || isa<CmpInst>(I) ||
+ isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) ||
+ isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) ||
+ isa<InsertValueInst>(I));
}
/// Return true if all of the alias sets within this AST are known not to
/// contain a Mod, or if MSSA knows thare are no MemoryDefs in the loop.
@@ -997,12 +1073,15 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
Loop *CurLoop, AliasSetTracker *CurAST,
MemorySSAUpdater *MSSAU,
bool TargetExecutesOncePerLoop,
+ SinkAndHoistLICMFlags *Flags,
OptimizationRemarkEmitter *ORE) {
// If we don't understand the instruction, bail early.
if (!isHoistableAndSinkableInst(I))
return false;
MemorySSA *MSSA = MSSAU ? MSSAU->getMemorySSA() : nullptr;
+ if (MSSA)
+ assert(Flags != nullptr && "Flags cannot be null.");
// Loads have extra constraints we have to verify before we can hoist them.
if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
@@ -1029,7 +1108,7 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
CurLoop, AA);
else
Invalidated = pointerInvalidatedByLoopWithMSSA(
- MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(LI)), CurLoop);
+ MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(LI)), CurLoop, *Flags);
// Check loop-invariant address because this may also be a sinkable load
// whose address is not necessarily loop-invariant.
if (ORE && Invalidated && CurLoop->isLoopInvariant(LI->getPointerOperand()))
@@ -1074,7 +1153,8 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
CurAST, CurLoop, AA);
else
Invalidated = pointerInvalidatedByLoopWithMSSA(
- MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(CI)), CurLoop);
+ MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(CI)), CurLoop,
+ *Flags);
if (Invalidated)
return false;
}
@@ -1133,13 +1213,46 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
} else { // MSSAU
if (isOnlyMemoryAccess(SI, CurLoop, MSSAU))
return true;
- if (!EnableLicmCap) {
- auto *Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(SI);
- if (MSSA->isLiveOnEntryDef(Source) ||
- !CurLoop->contains(Source->getBlock()))
- return true;
- }
- return false;
+ // If there are more accesses than the Promotion cap, give up, we're not
+ // walking a list that long.
+ if (Flags->NoOfMemAccTooLarge)
+ return false;
+ // Check store only if there's still "quota" to check clobber.
+ if (Flags->LicmMssaOptCounter >= Flags->LicmMssaOptCap)
+ return false;
+ // If there are interfering Uses (i.e. their defining access is in the
+ // loop), or ordered loads (stored as Defs!), don't move this store.
+ // Could do better here, but this is conservatively correct.
+ // TODO: Cache set of Uses on the first walk in runOnLoop, update when
+ // moving accesses. Can also extend to dominating uses.
+ auto *SIMD = MSSA->getMemoryAccess(SI);
+ for (auto *BB : CurLoop->getBlocks())
+ if (auto *Accesses = MSSA->getBlockAccesses(BB)) {
+ for (const auto &MA : *Accesses)
+ if (const auto *MU = dyn_cast<MemoryUse>(&MA)) {
+ auto *MD = MU->getDefiningAccess();
+ if (!MSSA->isLiveOnEntryDef(MD) &&
+ CurLoop->contains(MD->getBlock()))
+ return false;
+ // Disable hoisting past potentially interfering loads. Optimized
+ // Uses may point to an access outside the loop, as getClobbering
+ // checks the previous iteration when walking the backedge.
+ // FIXME: More precise: no Uses that alias SI.
+ if (!Flags->IsSink && !MSSA->dominates(SIMD, MU))
+ return false;
+ } else if (const auto *MD = dyn_cast<MemoryDef>(&MA))
+ if (auto *LI = dyn_cast<LoadInst>(MD->getMemoryInst())) {
+ (void)LI; // Silence warning.
+ assert(!LI->isUnordered() && "Expected unordered load");
+ return false;
+ }
+ }
+
+ auto *Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(SI);
+ Flags->LicmMssaOptCounter++;
+ // If there are no clobbering Defs in the loop, store is safe to hoist.
+ return MSSA->isLiveOnEntryDef(Source) ||
+ !CurLoop->contains(Source->getBlock());
}
}
@@ -1233,7 +1346,7 @@ static Instruction *CloneInstructionInExitBlock(
// Sinking call-sites need to be handled differently from other
// instructions. The cloned call-site needs a funclet bundle operand
- // appropriate for it's location in the CFG.
+ // appropriate for its location in the CFG.
SmallVector<OperandBundleDef, 1> OpBundles;
for (unsigned BundleIdx = 0, BundleEnd = CI->getNumOperandBundles();
BundleIdx != BundleEnd; ++BundleIdx) {
@@ -1310,10 +1423,15 @@ static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
}
static void moveInstructionBefore(Instruction &I, Instruction &Dest,
- ICFLoopSafetyInfo &SafetyInfo) {
+ ICFLoopSafetyInfo &SafetyInfo,
+ MemorySSAUpdater *MSSAU) {
SafetyInfo.removeInstruction(&I);
SafetyInfo.insertInstructionTo(&I, Dest.getParent());
I.moveBefore(&Dest);
+ if (MSSAU)
+ if (MemoryUseOrDef *OldMemAcc = cast_or_null<MemoryUseOrDef>(
+ MSSAU->getMemorySSA()->getMemoryAccess(&I)))
+ MSSAU->moveToPlace(OldMemAcc, Dest.getParent(), MemorySSA::End);
}
static Instruction *sinkThroughTriviallyReplaceablePHI(
@@ -1426,8 +1544,7 @@ static void splitPredecessorsOfLoopExit(PHINode *PN, DominatorTree *DT,
///
static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo,
- MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE,
- bool FreeInLoop) {
+ MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE) {
LLVM_DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
ORE->emit([&]() {
return OptimizationRemark(DEBUG_TYPE, "InstSunk", &I)
@@ -1441,7 +1558,7 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
++NumSunk;
// Iterate over users to be ready for actual sinking. Replace users via
- // unrechable blocks with undef and make all user PHIs trivially replcable.
+ // unreachable blocks with undef and make all user PHIs trivially replaceable.
SmallPtrSet<Instruction *, 8> VisitedUsers;
for (Value::user_iterator UI = I.user_begin(), UE = I.user_end(); UI != UE;) {
auto *User = cast<Instruction>(*UI);
@@ -1549,25 +1666,15 @@ static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
if (isa<PHINode>(I))
// Move the new node to the end of the phi list in the destination block.
- moveInstructionBefore(I, *Dest->getFirstNonPHI(), *SafetyInfo);
+ moveInstructionBefore(I, *Dest->getFirstNonPHI(), *SafetyInfo, MSSAU);
else
// Move the new node to the destination block, before its terminator.
- moveInstructionBefore(I, *Dest->getTerminator(), *SafetyInfo);
- if (MSSAU) {
- // If moving, I just moved a load or store, so update MemorySSA.
- MemoryUseOrDef *OldMemAcc = cast_or_null<MemoryUseOrDef>(
- MSSAU->getMemorySSA()->getMemoryAccess(&I));
- if (OldMemAcc)
- MSSAU->moveToPlace(OldMemAcc, Dest, MemorySSA::End);
- }
+ moveInstructionBefore(I, *Dest->getTerminator(), *SafetyInfo, MSSAU);
- // Do not retain debug locations when we are moving instructions to different
- // basic blocks, because we want to avoid jumpy line tables. Calls, however,
- // need to retain their debug locs because they may be inlined.
- // FIXME: How do we retain source locations without causing poor debugging
- // behavior?
- if (!isa<CallInst>(I))
- I.setDebugLoc(DebugLoc());
+ // Apply line 0 debug locations when we are moving instructions to different
+ // basic blocks because we want to avoid jumpy line tables.
+ if (const DebugLoc &DL = I.getDebugLoc())
+ I.setDebugLoc(DebugLoc::get(0, 0, DL.getScope(), DL.getInlinedAt()));
if (isa<LoadInst>(I))
++NumMovedLoads;
@@ -1611,8 +1718,10 @@ class LoopPromoter : public LoadAndStorePromoter {
const SmallSetVector<Value *, 8> &PointerMustAliases;
SmallVectorImpl<BasicBlock *> &LoopExitBlocks;
SmallVectorImpl<Instruction *> &LoopInsertPts;
+ SmallVectorImpl<MemoryAccess *> &MSSAInsertPts;
PredIteratorCache &PredCache;
AliasSetTracker &AST;
+ MemorySSAUpdater *MSSAU;
LoopInfo &LI;
DebugLoc DL;
int Alignment;
@@ -1639,15 +1748,16 @@ public:
LoopPromoter(Value *SP, ArrayRef<const Instruction *> Insts, SSAUpdater &S,
const SmallSetVector<Value *, 8> &PMA,
SmallVectorImpl<BasicBlock *> &LEB,
- SmallVectorImpl<Instruction *> &LIP, PredIteratorCache &PIC,
- AliasSetTracker &ast, LoopInfo &li, DebugLoc dl, int alignment,
- bool UnorderedAtomic, const AAMDNodes &AATags,
- ICFLoopSafetyInfo &SafetyInfo)
+ SmallVectorImpl<Instruction *> &LIP,
+ SmallVectorImpl<MemoryAccess *> &MSSAIP, PredIteratorCache &PIC,
+ AliasSetTracker &ast, MemorySSAUpdater *MSSAU, LoopInfo &li,
+ DebugLoc dl, int alignment, bool UnorderedAtomic,
+ const AAMDNodes &AATags, ICFLoopSafetyInfo &SafetyInfo)
: LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA),
- LoopExitBlocks(LEB), LoopInsertPts(LIP), PredCache(PIC), AST(ast),
- LI(li), DL(std::move(dl)), Alignment(alignment),
- UnorderedAtomic(UnorderedAtomic), AATags(AATags), SafetyInfo(SafetyInfo)
- {}
+ LoopExitBlocks(LEB), LoopInsertPts(LIP), MSSAInsertPts(MSSAIP),
+ PredCache(PIC), AST(ast), MSSAU(MSSAU), LI(li), DL(std::move(dl)),
+ Alignment(alignment), UnorderedAtomic(UnorderedAtomic), AATags(AATags),
+ SafetyInfo(SafetyInfo) {}
bool isInstInList(Instruction *I,
const SmallVectorImpl<Instruction *> &) const override {
@@ -1659,7 +1769,7 @@ public:
return PointerMustAliases.count(Ptr);
}
- void doExtraRewritesBeforeFinalDeletion() const override {
+ void doExtraRewritesBeforeFinalDeletion() override {
// Insert stores after in the loop exit blocks. Each exit block gets a
// store of the live-out values that feed them. Since we've already told
// the SSA updater about the defs in the loop and the preheader
@@ -1677,6 +1787,21 @@ public:
NewSI->setDebugLoc(DL);
if (AATags)
NewSI->setAAMetadata(AATags);
+
+ if (MSSAU) {
+ MemoryAccess *MSSAInsertPoint = MSSAInsertPts[i];
+ MemoryAccess *NewMemAcc;
+ if (!MSSAInsertPoint) {
+ NewMemAcc = MSSAU->createMemoryAccessInBB(
+ NewSI, nullptr, NewSI->getParent(), MemorySSA::Beginning);
+ } else {
+ NewMemAcc =
+ MSSAU->createMemoryAccessAfter(NewSI, nullptr, MSSAInsertPoint);
+ }
+ MSSAInsertPts[i] = NewMemAcc;
+ MSSAU->insertDef(cast<MemoryDef>(NewMemAcc), true);
+ // FIXME: true for safety, false may still be correct.
+ }
}
}
@@ -1687,6 +1812,8 @@ public:
void instructionDeleted(Instruction *I) const override {
SafetyInfo.removeInstruction(I);
AST.deleteValue(I);
+ if (MSSAU)
+ MSSAU->removeMemoryAccess(I);
}
};
@@ -1723,10 +1850,11 @@ bool isKnownNonEscaping(Value *Object, const TargetLibraryInfo *TLI) {
bool llvm::promoteLoopAccessesToScalars(
const SmallSetVector<Value *, 8> &PointerMustAliases,
SmallVectorImpl<BasicBlock *> &ExitBlocks,
- SmallVectorImpl<Instruction *> &InsertPts, PredIteratorCache &PIC,
+ SmallVectorImpl<Instruction *> &InsertPts,
+ SmallVectorImpl<MemoryAccess *> &MSSAInsertPts, PredIteratorCache &PIC,
LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI,
- Loop *CurLoop, AliasSetTracker *CurAST, ICFLoopSafetyInfo *SafetyInfo,
- OptimizationRemarkEmitter *ORE) {
+ Loop *CurLoop, AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
+ ICFLoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE) {
// Verify inputs.
assert(LI != nullptr && DT != nullptr && CurLoop != nullptr &&
CurAST != nullptr && SafetyInfo != nullptr &&
@@ -1827,9 +1955,21 @@ bool llvm::promoteLoopAccessesToScalars(
SawUnorderedAtomic |= Load->isAtomic();
SawNotAtomic |= !Load->isAtomic();
- if (!DereferenceableInPH)
- DereferenceableInPH = isSafeToExecuteUnconditionally(
- *Load, DT, CurLoop, SafetyInfo, ORE, Preheader->getTerminator());
+ unsigned InstAlignment = Load->getAlignment();
+ if (!InstAlignment)
+ InstAlignment =
+ MDL.getABITypeAlignment(Load->getType());
+
+ // Note that proving a load safe to speculate requires proving
+ // sufficient alignment at the target location. Proving it guaranteed
+ // to execute does as well. Thus we can increase our guaranteed
+ // alignment as well.
+ if (!DereferenceableInPH || (InstAlignment > Alignment))
+ if (isSafeToExecuteUnconditionally(*Load, DT, CurLoop, SafetyInfo,
+ ORE, Preheader->getTerminator())) {
+ DereferenceableInPH = true;
+ Alignment = std::max(Alignment, InstAlignment);
+ }
} else if (const StoreInst *Store = dyn_cast<StoreInst>(UI)) {
// Stores *of* the pointer are not interesting, only stores *to* the
// pointer.
@@ -1875,8 +2015,8 @@ bool llvm::promoteLoopAccessesToScalars(
// deref info through it.
if (!DereferenceableInPH) {
DereferenceableInPH = isDereferenceableAndAlignedPointer(
- Store->getPointerOperand(), Store->getAlignment(), MDL,
- Preheader->getTerminator(), DT);
+ Store->getPointerOperand(), Store->getValueOperand()->getType(),
+ Store->getAlignment(), MDL, Preheader->getTerminator(), DT);
}
} else
return false; // Not a load or store.
@@ -1900,6 +2040,14 @@ bool llvm::promoteLoopAccessesToScalars(
if (SawUnorderedAtomic && SawNotAtomic)
return false;
+ // If we're inserting an atomic load in the preheader, we must be able to
+ // lower it. We're only guaranteed to be able to lower naturally aligned
+ // atomics.
+ auto *SomePtrElemType = SomePtr->getType()->getPointerElementType();
+ if (SawUnorderedAtomic &&
+ Alignment < MDL.getTypeStoreSize(SomePtrElemType))
+ return false;
+
// If we couldn't prove we can hoist the load, bail.
if (!DereferenceableInPH)
return false;
@@ -1943,13 +2091,14 @@ bool llvm::promoteLoopAccessesToScalars(
SmallVector<PHINode *, 16> NewPHIs;
SSAUpdater SSA(&NewPHIs);
LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
- InsertPts, PIC, *CurAST, *LI, DL, Alignment,
- SawUnorderedAtomic, AATags, *SafetyInfo);
+ InsertPts, MSSAInsertPts, PIC, *CurAST, MSSAU, *LI, DL,
+ Alignment, SawUnorderedAtomic, AATags, *SafetyInfo);
// Set up the preheader to have a definition of the value. It is the live-out
// value from the preheader that uses in the loop will use.
LoadInst *PreheaderLoad = new LoadInst(
- SomePtr, SomePtr->getName() + ".promoted", Preheader->getTerminator());
+ SomePtr->getType()->getPointerElementType(), SomePtr,
+ SomePtr->getName() + ".promoted", Preheader->getTerminator());
if (SawUnorderedAtomic)
PreheaderLoad->setOrdering(AtomicOrdering::Unordered);
PreheaderLoad->setAlignment(Alignment);
@@ -1958,13 +2107,23 @@ bool llvm::promoteLoopAccessesToScalars(
PreheaderLoad->setAAMetadata(AATags);
SSA.AddAvailableValue(Preheader, PreheaderLoad);
+ MemoryAccess *PreheaderLoadMemoryAccess;
+ if (MSSAU) {
+ PreheaderLoadMemoryAccess = MSSAU->createMemoryAccessInBB(
+ PreheaderLoad, nullptr, PreheaderLoad->getParent(), MemorySSA::End);
+ MemoryUse *NewMemUse = cast<MemoryUse>(PreheaderLoadMemoryAccess);
+ MSSAU->insertUse(NewMemUse);
+ }
+
// Rewrite all the loads in the loop and remember all the definitions from
// stores in the loop.
Promoter.run(LoopUses);
+ if (MSSAU && VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
// If the SSAUpdater didn't use the load in the preheader, just zap it now.
if (PreheaderLoad->use_empty())
- eraseInstruction(*PreheaderLoad, *SafetyInfo, CurAST, nullptr);
+ eraseInstruction(*PreheaderLoad, *SafetyInfo, CurAST, MSSAU);
return true;
}
@@ -2017,6 +2176,15 @@ LoopInvariantCodeMotion::collectAliasInfoForLoop(Loop *L, LoopInfo *LI,
return CurAST;
}
+std::unique_ptr<AliasSetTracker>
+LoopInvariantCodeMotion::collectAliasInfoForLoopWithMSSA(
+ Loop *L, AliasAnalysis *AA, MemorySSAUpdater *MSSAU) {
+ auto *MSSA = MSSAU->getMemorySSA();
+ auto CurAST = make_unique<AliasSetTracker>(*AA, MSSA, L);
+ CurAST->addAllInstructionsInLoopUsingMSSA();
+ return CurAST;
+}
+
/// Simple analysis hook. Clone alias set info.
///
void LegacyLICMPass::cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To,
@@ -2095,15 +2263,49 @@ static bool pointerInvalidatedByLoop(MemoryLocation MemLoc,
}
static bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU,
- Loop *CurLoop) {
- MemoryAccess *Source;
- // See declaration of EnableLicmCap for usage details.
- if (EnableLicmCap)
- Source = MU->getDefiningAccess();
- else
- Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(MU);
- return !MSSA->isLiveOnEntryDef(Source) &&
- CurLoop->contains(Source->getBlock());
+ Loop *CurLoop,
+ SinkAndHoistLICMFlags &Flags) {
+ // For hoisting, use the walker to determine safety
+ if (!Flags.IsSink) {
+ MemoryAccess *Source;
+ // See declaration of SetLicmMssaOptCap for usage details.
+ if (Flags.LicmMssaOptCounter >= Flags.LicmMssaOptCap)
+ Source = MU->getDefiningAccess();
+ else {
+ Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(MU);
+ Flags.LicmMssaOptCounter++;
+ }
+ return !MSSA->isLiveOnEntryDef(Source) &&
+ CurLoop->contains(Source->getBlock());
+ }
+
+ // For sinking, we'd need to check all Defs below this use. The getClobbering
+ // call will look on the backedge of the loop, but will check aliasing with
+ // the instructions on the previous iteration.
+ // For example:
+ // for (i ... )
+ // load a[i] ( Use (LoE)
+ // store a[i] ( 1 = Def (2), with 2 = Phi for the loop.
+ // i++;
+ // The load sees no clobbering inside the loop, as the backedge alias check
+ // does phi translation, and will check aliasing against store a[i-1].
+ // However sinking the load outside the loop, below the store is incorrect.
+
+ // For now, only sink if there are no Defs in the loop, and the existing ones
+ // precede the use and are in the same block.
+ // FIXME: Increase precision: Safe to sink if Use post dominates the Def;
+ // needs PostDominatorTreeAnalysis.
+ // FIXME: More precise: no Defs that alias this Use.
+ if (Flags.NoOfMemAccTooLarge)
+ return true;
+ for (auto *BB : CurLoop->getBlocks())
+ if (auto *Accesses = MSSA->getBlockDefs(BB))
+ for (const auto &MA : *Accesses)
+ if (const auto *MD = dyn_cast<MemoryDef>(&MA))
+ if (MU->getBlock() != MD->getBlock() ||
+ !MSSA->locallyDominates(MD, MU))
+ return true;
+ return false;
}
/// Little predicate that returns true if the specified basic block is in
diff --git a/lib/Transforms/Scalar/LoopAccessAnalysisPrinter.cpp b/lib/Transforms/Scalar/LoopAccessAnalysisPrinter.cpp
index a64c99117d64..1c3ff1a61b7e 100644
--- a/lib/Transforms/Scalar/LoopAccessAnalysisPrinter.cpp
+++ b/lib/Transforms/Scalar/LoopAccessAnalysisPrinter.cpp
@@ -1,9 +1,8 @@
//===- LoopAccessAnalysisPrinter.cpp - Loop Access Analysis Printer --------==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Transforms/Scalar/LoopDataPrefetch.cpp b/lib/Transforms/Scalar/LoopDataPrefetch.cpp
index 3b41b5d96c86..1fcf1315a177 100644
--- a/lib/Transforms/Scalar/LoopDataPrefetch.cpp
+++ b/lib/Transforms/Scalar/LoopDataPrefetch.cpp
@@ -1,9 +1,8 @@
//===-------- LoopDataPrefetch.cpp - Loop Data Prefetching Pass -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -313,7 +312,8 @@ bool LoopDataPrefetch::runOnLoop(Loop *L) {
IRBuilder<> Builder(MemI);
Module *M = BB->getParent()->getParent();
Type *I32 = Type::getInt32Ty(BB->getContext());
- Value *PrefetchFunc = Intrinsic::getDeclaration(M, Intrinsic::prefetch);
+ Function *PrefetchFunc =
+ Intrinsic::getDeclaration(M, Intrinsic::prefetch);
Builder.CreateCall(
PrefetchFunc,
{PrefPtrValue,
@@ -333,4 +333,3 @@ bool LoopDataPrefetch::runOnLoop(Loop *L) {
return MadeChange;
}
-
diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp
index d412025d7e94..8371367e24e7 100644
--- a/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -1,9 +1,8 @@
//===- LoopDeletion.cpp - Dead Loop Deletion Pass ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/Scalar/LoopDistribute.cpp b/lib/Transforms/Scalar/LoopDistribute.cpp
index d797c9dc9e72..f45e5fd0f50b 100644
--- a/lib/Transforms/Scalar/LoopDistribute.cpp
+++ b/lib/Transforms/Scalar/LoopDistribute.cpp
@@ -1,9 +1,8 @@
//===- LoopDistribute.cpp - Loop Distribution Pass ------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -767,8 +766,14 @@ public:
"cannot isolate unsafe dependencies");
}
- // Don't distribute the loop if we need too many SCEV run-time checks.
+ // Don't distribute the loop if we need too many SCEV run-time checks, or
+ // any if it's illegal.
const SCEVUnionPredicate &Pred = LAI->getPSE().getUnionPredicate();
+ if (LAI->hasConvergentOp() && !Pred.isAlwaysTrue()) {
+ return fail("RuntimeCheckWithConvergent",
+ "may not insert runtime check with convergent operation");
+ }
+
if (Pred.getComplexity() > (IsForced.getValueOr(false)
? PragmaDistributeSCEVCheckThreshold
: DistributeSCEVCheckThreshold))
@@ -796,7 +801,14 @@ public:
auto Checks = includeOnlyCrossPartitionChecks(AllChecks, PtrToPartition,
RtPtrChecking);
+ if (LAI->hasConvergentOp() && !Checks.empty()) {
+ return fail("RuntimeCheckWithConvergent",
+ "may not insert runtime check with convergent operation");
+ }
+
if (!Pred.isAlwaysTrue() || !Checks.empty()) {
+ assert(!LAI->hasConvergentOp() && "inserting illegal loop versioning");
+
MDNode *OrigLoopID = L->getLoopID();
LLVM_DEBUG(dbgs() << "\nPointers:\n");
diff --git a/lib/Transforms/Scalar/LoopFuse.cpp b/lib/Transforms/Scalar/LoopFuse.cpp
new file mode 100644
index 000000000000..0bc2bcff2ae1
--- /dev/null
+++ b/lib/Transforms/Scalar/LoopFuse.cpp
@@ -0,0 +1,1215 @@
+//===- LoopFuse.cpp - Loop Fusion Pass ------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file implements the loop fusion pass.
+/// The implementation is largely based on the following document:
+///
+/// Code Transformations to Augment the Scope of Loop Fusion in a
+/// Production Compiler
+/// Christopher Mark Barton
+/// MSc Thesis
+/// https://webdocs.cs.ualberta.ca/~amaral/thesis/ChristopherBartonMSc.pdf
+///
+/// The general approach taken is to collect sets of control flow equivalent
+/// loops and test whether they can be fused. The necessary conditions for
+/// fusion are:
+/// 1. The loops must be adjacent (there cannot be any statements between
+/// the two loops).
+/// 2. The loops must be conforming (they must execute the same number of
+/// iterations).
+/// 3. The loops must be control flow equivalent (if one loop executes, the
+/// other is guaranteed to execute).
+/// 4. There cannot be any negative distance dependencies between the loops.
+/// If all of these conditions are satisfied, it is safe to fuse the loops.
+///
+/// This implementation creates FusionCandidates that represent the loop and the
+/// necessary information needed by fusion. It then operates on the fusion
+/// candidates, first confirming that the candidate is eligible for fusion. The
+/// candidates are then collected into control flow equivalent sets, sorted in
+/// dominance order. Each set of control flow equivalent candidates is then
+/// traversed, attempting to fuse pairs of candidates in the set. If all
+/// requirements for fusion are met, the two candidates are fused, creating a
+/// new (fused) candidate which is then added back into the set to consider for
+/// additional fusion.
+///
+/// This implementation currently does not make any modifications to remove
+/// conditions for fusion. Code transformations to make loops conform to each of
+/// the conditions for fusion are discussed in more detail in the document
+/// above. These can be added to the current implementation in the future.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar/LoopFuse.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/DependenceAnalysis.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "loop-fusion"
+
+STATISTIC(FuseCounter, "Count number of loop fusions performed");
+STATISTIC(NumFusionCandidates, "Number of candidates for loop fusion");
+STATISTIC(InvalidPreheader, "Loop has invalid preheader");
+STATISTIC(InvalidHeader, "Loop has invalid header");
+STATISTIC(InvalidExitingBlock, "Loop has invalid exiting blocks");
+STATISTIC(InvalidExitBlock, "Loop has invalid exit block");
+STATISTIC(InvalidLatch, "Loop has invalid latch");
+STATISTIC(InvalidLoop, "Loop is invalid");
+STATISTIC(AddressTakenBB, "Basic block has address taken");
+STATISTIC(MayThrowException, "Loop may throw an exception");
+STATISTIC(ContainsVolatileAccess, "Loop contains a volatile access");
+STATISTIC(NotSimplifiedForm, "Loop is not in simplified form");
+STATISTIC(InvalidDependencies, "Dependencies prevent fusion");
+STATISTIC(InvalidTripCount,
+ "Loop does not have invariant backedge taken count");
+STATISTIC(UncomputableTripCount, "SCEV cannot compute trip count of loop");
+STATISTIC(NonEqualTripCount, "Candidate trip counts are not the same");
+STATISTIC(NonAdjacent, "Candidates are not adjacent");
+STATISTIC(NonEmptyPreheader, "Candidate has a non-empty preheader");
+
+enum FusionDependenceAnalysisChoice {
+ FUSION_DEPENDENCE_ANALYSIS_SCEV,
+ FUSION_DEPENDENCE_ANALYSIS_DA,
+ FUSION_DEPENDENCE_ANALYSIS_ALL,
+};
+
+static cl::opt<FusionDependenceAnalysisChoice> FusionDependenceAnalysis(
+ "loop-fusion-dependence-analysis",
+ cl::desc("Which dependence analysis should loop fusion use?"),
+ cl::values(clEnumValN(FUSION_DEPENDENCE_ANALYSIS_SCEV, "scev",
+ "Use the scalar evolution interface"),
+ clEnumValN(FUSION_DEPENDENCE_ANALYSIS_DA, "da",
+ "Use the dependence analysis interface"),
+ clEnumValN(FUSION_DEPENDENCE_ANALYSIS_ALL, "all",
+ "Use all available analyses")),
+ cl::Hidden, cl::init(FUSION_DEPENDENCE_ANALYSIS_ALL), cl::ZeroOrMore);
+
+#ifndef NDEBUG
+static cl::opt<bool>
+ VerboseFusionDebugging("loop-fusion-verbose-debug",
+ cl::desc("Enable verbose debugging for Loop Fusion"),
+ cl::Hidden, cl::init(false), cl::ZeroOrMore);
+#endif
+
+/// This class is used to represent a candidate for loop fusion. When it is
+/// constructed, it checks the conditions for loop fusion to ensure that it
+/// represents a valid candidate. It caches several parts of a loop that are
+/// used throughout loop fusion (e.g., loop preheader, loop header, etc) instead
+/// of continually querying the underlying Loop to retrieve these values. It is
+/// assumed these will not change throughout loop fusion.
+///
+/// The invalidate method should be used to indicate that the FusionCandidate is
+/// no longer a valid candidate for fusion. Similarly, the isValid() method can
+/// be used to ensure that the FusionCandidate is still valid for fusion.
+struct FusionCandidate {
+ /// Cache of parts of the loop used throughout loop fusion. These should not
+ /// need to change throughout the analysis and transformation.
+ /// These parts are cached to avoid repeatedly looking up in the Loop class.
+
+ /// Preheader of the loop this candidate represents
+ BasicBlock *Preheader;
+ /// Header of the loop this candidate represents
+ BasicBlock *Header;
+ /// Blocks in the loop that exit the loop
+ BasicBlock *ExitingBlock;
+ /// The successor block of this loop (where the exiting blocks go to)
+ BasicBlock *ExitBlock;
+ /// Latch of the loop
+ BasicBlock *Latch;
+ /// The loop that this fusion candidate represents
+ Loop *L;
+ /// Vector of instructions in this loop that read from memory
+ SmallVector<Instruction *, 16> MemReads;
+ /// Vector of instructions in this loop that write to memory
+ SmallVector<Instruction *, 16> MemWrites;
+ /// Are all of the members of this fusion candidate still valid
+ bool Valid;
+
+ /// Dominator and PostDominator trees are needed for the
+ /// FusionCandidateCompare function, required by FusionCandidateSet to
+ /// determine where the FusionCandidate should be inserted into the set. These
+ /// are used to establish ordering of the FusionCandidates based on dominance.
+ const DominatorTree *DT;
+ const PostDominatorTree *PDT;
+
+ FusionCandidate(Loop *L, const DominatorTree *DT,
+ const PostDominatorTree *PDT)
+ : Preheader(L->getLoopPreheader()), Header(L->getHeader()),
+ ExitingBlock(L->getExitingBlock()), ExitBlock(L->getExitBlock()),
+ Latch(L->getLoopLatch()), L(L), Valid(true), DT(DT), PDT(PDT) {
+
+ // Walk over all blocks in the loop and check for conditions that may
+ // prevent fusion. For each block, walk over all instructions and collect
+ // the memory reads and writes If any instructions that prevent fusion are
+ // found, invalidate this object and return.
+ for (BasicBlock *BB : L->blocks()) {
+ if (BB->hasAddressTaken()) {
+ AddressTakenBB++;
+ invalidate();
+ return;
+ }
+
+ for (Instruction &I : *BB) {
+ if (I.mayThrow()) {
+ MayThrowException++;
+ invalidate();
+ return;
+ }
+ if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
+ if (SI->isVolatile()) {
+ ContainsVolatileAccess++;
+ invalidate();
+ return;
+ }
+ }
+ if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
+ if (LI->isVolatile()) {
+ ContainsVolatileAccess++;
+ invalidate();
+ return;
+ }
+ }
+ if (I.mayWriteToMemory())
+ MemWrites.push_back(&I);
+ if (I.mayReadFromMemory())
+ MemReads.push_back(&I);
+ }
+ }
+ }
+
+ /// Check if all members of the class are valid.
+ bool isValid() const {
+ return Preheader && Header && ExitingBlock && ExitBlock && Latch && L &&
+ !L->isInvalid() && Valid;
+ }
+
+ /// Verify that all members are in sync with the Loop object.
+ void verify() const {
+ assert(isValid() && "Candidate is not valid!!");
+ assert(!L->isInvalid() && "Loop is invalid!");
+ assert(Preheader == L->getLoopPreheader() && "Preheader is out of sync");
+ assert(Header == L->getHeader() && "Header is out of sync");
+ assert(ExitingBlock == L->getExitingBlock() &&
+ "Exiting Blocks is out of sync");
+ assert(ExitBlock == L->getExitBlock() && "Exit block is out of sync");
+ assert(Latch == L->getLoopLatch() && "Latch is out of sync");
+ }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ LLVM_DUMP_METHOD void dump() const {
+ dbgs() << "\tPreheader: " << (Preheader ? Preheader->getName() : "nullptr")
+ << "\n"
+ << "\tHeader: " << (Header ? Header->getName() : "nullptr") << "\n"
+ << "\tExitingBB: "
+ << (ExitingBlock ? ExitingBlock->getName() : "nullptr") << "\n"
+ << "\tExitBB: " << (ExitBlock ? ExitBlock->getName() : "nullptr")
+ << "\n"
+ << "\tLatch: " << (Latch ? Latch->getName() : "nullptr") << "\n";
+ }
+#endif
+
+private:
+ // This is only used internally for now, to clear the MemWrites and MemReads
+ // list and setting Valid to false. I can't envision other uses of this right
+ // now, since once FusionCandidates are put into the FusionCandidateSet they
+ // are immutable. Thus, any time we need to change/update a FusionCandidate,
+ // we must create a new one and insert it into the FusionCandidateSet to
+ // ensure the FusionCandidateSet remains ordered correctly.
+ void invalidate() {
+ MemWrites.clear();
+ MemReads.clear();
+ Valid = false;
+ }
+};
+
+inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
+ const FusionCandidate &FC) {
+ if (FC.isValid())
+ OS << FC.Preheader->getName();
+ else
+ OS << "<Invalid>";
+
+ return OS;
+}
+
+struct FusionCandidateCompare {
+ /// Comparison functor to sort two Control Flow Equivalent fusion candidates
+ /// into dominance order.
+ /// If LHS dominates RHS and RHS post-dominates LHS, return true;
+ /// IF RHS dominates LHS and LHS post-dominates RHS, return false;
+ bool operator()(const FusionCandidate &LHS,
+ const FusionCandidate &RHS) const {
+ const DominatorTree *DT = LHS.DT;
+
+ // Do not save PDT to local variable as it is only used in asserts and thus
+ // will trigger an unused variable warning if building without asserts.
+ assert(DT && LHS.PDT && "Expecting valid dominator tree");
+
+ // Do this compare first so if LHS == RHS, function returns false.
+ if (DT->dominates(RHS.Preheader, LHS.Preheader)) {
+ // RHS dominates LHS
+ // Verify LHS post-dominates RHS
+ assert(LHS.PDT->dominates(LHS.Preheader, RHS.Preheader));
+ return false;
+ }
+
+ if (DT->dominates(LHS.Preheader, RHS.Preheader)) {
+ // Verify RHS Postdominates LHS
+ assert(LHS.PDT->dominates(RHS.Preheader, LHS.Preheader));
+ return true;
+ }
+
+ // If LHS does not dominate RHS and RHS does not dominate LHS then there is
+ // no dominance relationship between the two FusionCandidates. Thus, they
+ // should not be in the same set together.
+ llvm_unreachable(
+ "No dominance relationship between these fusion candidates!");
+ }
+};
+
+namespace {
+using LoopVector = SmallVector<Loop *, 4>;
+
+// Set of Control Flow Equivalent (CFE) Fusion Candidates, sorted in dominance
+// order. Thus, if FC0 comes *before* FC1 in a FusionCandidateSet, then FC0
+// dominates FC1 and FC1 post-dominates FC0.
+// std::set was chosen because we want a sorted data structure with stable
+// iterators. A subsequent patch to loop fusion will enable fusing non-ajdacent
+// loops by moving intervening code around. When this intervening code contains
+// loops, those loops will be moved also. The corresponding FusionCandidates
+// will also need to be moved accordingly. As this is done, having stable
+// iterators will simplify the logic. Similarly, having an efficient insert that
+// keeps the FusionCandidateSet sorted will also simplify the implementation.
+using FusionCandidateSet = std::set<FusionCandidate, FusionCandidateCompare>;
+using FusionCandidateCollection = SmallVector<FusionCandidateSet, 4>;
+} // namespace
+
+inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
+ const FusionCandidateSet &CandSet) {
+ for (auto IT : CandSet)
+ OS << IT << "\n";
+
+ return OS;
+}
+
+#if !defined(NDEBUG)
+static void
+printFusionCandidates(const FusionCandidateCollection &FusionCandidates) {
+ dbgs() << "Fusion Candidates: \n";
+ for (const auto &CandidateSet : FusionCandidates) {
+ dbgs() << "*** Fusion Candidate Set ***\n";
+ dbgs() << CandidateSet;
+ dbgs() << "****************************\n";
+ }
+}
+#endif
+
+/// Collect all loops in function at the same nest level, starting at the
+/// outermost level.
+///
+/// This data structure collects all loops at the same nest level for a
+/// given function (specified by the LoopInfo object). It starts at the
+/// outermost level.
+struct LoopDepthTree {
+ using LoopsOnLevelTy = SmallVector<LoopVector, 4>;
+ using iterator = LoopsOnLevelTy::iterator;
+ using const_iterator = LoopsOnLevelTy::const_iterator;
+
+ LoopDepthTree(LoopInfo &LI) : Depth(1) {
+ if (!LI.empty())
+ LoopsOnLevel.emplace_back(LoopVector(LI.rbegin(), LI.rend()));
+ }
+
+ /// Test whether a given loop has been removed from the function, and thus is
+ /// no longer valid.
+ bool isRemovedLoop(const Loop *L) const { return RemovedLoops.count(L); }
+
+ /// Record that a given loop has been removed from the function and is no
+ /// longer valid.
+ void removeLoop(const Loop *L) { RemovedLoops.insert(L); }
+
+ /// Descend the tree to the next (inner) nesting level
+ void descend() {
+ LoopsOnLevelTy LoopsOnNextLevel;
+
+ for (const LoopVector &LV : *this)
+ for (Loop *L : LV)
+ if (!isRemovedLoop(L) && L->begin() != L->end())
+ LoopsOnNextLevel.emplace_back(LoopVector(L->begin(), L->end()));
+
+ LoopsOnLevel = LoopsOnNextLevel;
+ RemovedLoops.clear();
+ Depth++;
+ }
+
+ bool empty() const { return size() == 0; }
+ size_t size() const { return LoopsOnLevel.size() - RemovedLoops.size(); }
+ unsigned getDepth() const { return Depth; }
+
+ iterator begin() { return LoopsOnLevel.begin(); }
+ iterator end() { return LoopsOnLevel.end(); }
+ const_iterator begin() const { return LoopsOnLevel.begin(); }
+ const_iterator end() const { return LoopsOnLevel.end(); }
+
+private:
+ /// Set of loops that have been removed from the function and are no longer
+ /// valid.
+ SmallPtrSet<const Loop *, 8> RemovedLoops;
+
+ /// Depth of the current level, starting at 1 (outermost loops).
+ unsigned Depth;
+
+ /// Vector of loops at the current depth level that have the same parent loop
+ LoopsOnLevelTy LoopsOnLevel;
+};
+
+#ifndef NDEBUG
+static void printLoopVector(const LoopVector &LV) {
+ dbgs() << "****************************\n";
+ for (auto L : LV)
+ printLoop(*L, dbgs());
+ dbgs() << "****************************\n";
+}
+#endif
+
+static void reportLoopFusion(const FusionCandidate &FC0,
+ const FusionCandidate &FC1,
+ OptimizationRemarkEmitter &ORE) {
+ using namespace ore;
+ ORE.emit(
+ OptimizationRemark(DEBUG_TYPE, "LoopFusion", FC0.Preheader->getParent())
+ << "Fused " << NV("Cand1", StringRef(FC0.Preheader->getName()))
+ << " with " << NV("Cand2", StringRef(FC1.Preheader->getName())));
+}
+
+struct LoopFuser {
+private:
+ // Sets of control flow equivalent fusion candidates for a given nest level.
+ FusionCandidateCollection FusionCandidates;
+
+ LoopDepthTree LDT;
+ DomTreeUpdater DTU;
+
+ LoopInfo &LI;
+ DominatorTree &DT;
+ DependenceInfo &DI;
+ ScalarEvolution &SE;
+ PostDominatorTree &PDT;
+ OptimizationRemarkEmitter &ORE;
+
+public:
+ LoopFuser(LoopInfo &LI, DominatorTree &DT, DependenceInfo &DI,
+ ScalarEvolution &SE, PostDominatorTree &PDT,
+ OptimizationRemarkEmitter &ORE, const DataLayout &DL)
+ : LDT(LI), DTU(DT, PDT, DomTreeUpdater::UpdateStrategy::Lazy), LI(LI),
+ DT(DT), DI(DI), SE(SE), PDT(PDT), ORE(ORE) {}
+
+ /// This is the main entry point for loop fusion. It will traverse the
+ /// specified function and collect candidate loops to fuse, starting at the
+ /// outermost nesting level and working inwards.
+ bool fuseLoops(Function &F) {
+#ifndef NDEBUG
+ if (VerboseFusionDebugging) {
+ LI.print(dbgs());
+ }
+#endif
+
+ LLVM_DEBUG(dbgs() << "Performing Loop Fusion on function " << F.getName()
+ << "\n");
+ bool Changed = false;
+
+ while (!LDT.empty()) {
+ LLVM_DEBUG(dbgs() << "Got " << LDT.size() << " loop sets for depth "
+ << LDT.getDepth() << "\n";);
+
+ for (const LoopVector &LV : LDT) {
+ assert(LV.size() > 0 && "Empty loop set was build!");
+
+ // Skip singleton loop sets as they do not offer fusion opportunities on
+ // this level.
+ if (LV.size() == 1)
+ continue;
+#ifndef NDEBUG
+ if (VerboseFusionDebugging) {
+ LLVM_DEBUG({
+ dbgs() << " Visit loop set (#" << LV.size() << "):\n";
+ printLoopVector(LV);
+ });
+ }
+#endif
+
+ collectFusionCandidates(LV);
+ Changed |= fuseCandidates();
+ }
+
+ // Finished analyzing candidates at this level.
+ // Descend to the next level and clear all of the candidates currently
+ // collected. Note that it will not be possible to fuse any of the
+ // existing candidates with new candidates because the new candidates will
+ // be at a different nest level and thus not be control flow equivalent
+ // with all of the candidates collected so far.
+ LLVM_DEBUG(dbgs() << "Descend one level!\n");
+ LDT.descend();
+ FusionCandidates.clear();
+ }
+
+ if (Changed)
+ LLVM_DEBUG(dbgs() << "Function after Loop Fusion: \n"; F.dump(););
+
+#ifndef NDEBUG
+ assert(DT.verify());
+ assert(PDT.verify());
+ LI.verify(DT);
+ SE.verify();
+#endif
+
+ LLVM_DEBUG(dbgs() << "Loop Fusion complete\n");
+ return Changed;
+ }
+
+private:
+ /// Determine if two fusion candidates are control flow equivalent.
+ ///
+ /// Two fusion candidates are control flow equivalent if when one executes,
+ /// the other is guaranteed to execute. This is determined using dominators
+ /// and post-dominators: if A dominates B and B post-dominates A then A and B
+ /// are control-flow equivalent.
+ bool isControlFlowEquivalent(const FusionCandidate &FC0,
+ const FusionCandidate &FC1) const {
+ assert(FC0.Preheader && FC1.Preheader && "Expecting valid preheaders");
+
+ if (DT.dominates(FC0.Preheader, FC1.Preheader))
+ return PDT.dominates(FC1.Preheader, FC0.Preheader);
+
+ if (DT.dominates(FC1.Preheader, FC0.Preheader))
+ return PDT.dominates(FC0.Preheader, FC1.Preheader);
+
+ return false;
+ }
+
+ /// Determine if a fusion candidate (representing a loop) is eligible for
+ /// fusion. Note that this only checks whether a single loop can be fused - it
+ /// does not check whether it is *legal* to fuse two loops together.
+ bool eligibleForFusion(const FusionCandidate &FC) const {
+ if (!FC.isValid()) {
+ LLVM_DEBUG(dbgs() << "FC " << FC << " has invalid CFG requirements!\n");
+ if (!FC.Preheader)
+ InvalidPreheader++;
+ if (!FC.Header)
+ InvalidHeader++;
+ if (!FC.ExitingBlock)
+ InvalidExitingBlock++;
+ if (!FC.ExitBlock)
+ InvalidExitBlock++;
+ if (!FC.Latch)
+ InvalidLatch++;
+ if (FC.L->isInvalid())
+ InvalidLoop++;
+
+ return false;
+ }
+
+ // Require ScalarEvolution to be able to determine a trip count.
+ if (!SE.hasLoopInvariantBackedgeTakenCount(FC.L)) {
+ LLVM_DEBUG(dbgs() << "Loop " << FC.L->getName()
+ << " trip count not computable!\n");
+ InvalidTripCount++;
+ return false;
+ }
+
+ if (!FC.L->isLoopSimplifyForm()) {
+ LLVM_DEBUG(dbgs() << "Loop " << FC.L->getName()
+ << " is not in simplified form!\n");
+ NotSimplifiedForm++;
+ return false;
+ }
+
+ return true;
+ }
+
+ /// Iterate over all loops in the given loop set and identify the loops that
+ /// are eligible for fusion. Place all eligible fusion candidates into Control
+ /// Flow Equivalent sets, sorted by dominance.
+ void collectFusionCandidates(const LoopVector &LV) {
+ for (Loop *L : LV) {
+ FusionCandidate CurrCand(L, &DT, &PDT);
+ if (!eligibleForFusion(CurrCand))
+ continue;
+
+ // Go through each list in FusionCandidates and determine if L is control
+ // flow equivalent with the first loop in that list. If it is, append LV.
+ // If not, go to the next list.
+ // If no suitable list is found, start another list and add it to
+ // FusionCandidates.
+ bool FoundSet = false;
+
+ for (auto &CurrCandSet : FusionCandidates) {
+ if (isControlFlowEquivalent(*CurrCandSet.begin(), CurrCand)) {
+ CurrCandSet.insert(CurrCand);
+ FoundSet = true;
+#ifndef NDEBUG
+ if (VerboseFusionDebugging)
+ LLVM_DEBUG(dbgs() << "Adding " << CurrCand
+ << " to existing candidate set\n");
+#endif
+ break;
+ }
+ }
+ if (!FoundSet) {
+ // No set was found. Create a new set and add to FusionCandidates
+#ifndef NDEBUG
+ if (VerboseFusionDebugging)
+ LLVM_DEBUG(dbgs() << "Adding " << CurrCand << " to new set\n");
+#endif
+ FusionCandidateSet NewCandSet;
+ NewCandSet.insert(CurrCand);
+ FusionCandidates.push_back(NewCandSet);
+ }
+ NumFusionCandidates++;
+ }
+ }
+
+ /// Determine if it is beneficial to fuse two loops.
+ ///
+ /// For now, this method simply returns true because we want to fuse as much
+ /// as possible (primarily to test the pass). This method will evolve, over
+ /// time, to add heuristics for profitability of fusion.
+ bool isBeneficialFusion(const FusionCandidate &FC0,
+ const FusionCandidate &FC1) {
+ return true;
+ }
+
+ /// Determine if two fusion candidates have the same trip count (i.e., they
+ /// execute the same number of iterations).
+ ///
+ /// Note that for now this method simply returns a boolean value because there
+ /// are no mechanisms in loop fusion to handle different trip counts. In the
+ /// future, this behaviour can be extended to adjust one of the loops to make
+ /// the trip counts equal (e.g., loop peeling). When this is added, this
+ /// interface may need to change to return more information than just a
+ /// boolean value.
+ bool identicalTripCounts(const FusionCandidate &FC0,
+ const FusionCandidate &FC1) const {
+ const SCEV *TripCount0 = SE.getBackedgeTakenCount(FC0.L);
+ if (isa<SCEVCouldNotCompute>(TripCount0)) {
+ UncomputableTripCount++;
+ LLVM_DEBUG(dbgs() << "Trip count of first loop could not be computed!");
+ return false;
+ }
+
+ const SCEV *TripCount1 = SE.getBackedgeTakenCount(FC1.L);
+ if (isa<SCEVCouldNotCompute>(TripCount1)) {
+ UncomputableTripCount++;
+ LLVM_DEBUG(dbgs() << "Trip count of second loop could not be computed!");
+ return false;
+ }
+ LLVM_DEBUG(dbgs() << "\tTrip counts: " << *TripCount0 << " & "
+ << *TripCount1 << " are "
+ << (TripCount0 == TripCount1 ? "identical" : "different")
+ << "\n");
+
+ return (TripCount0 == TripCount1);
+ }
+
+ /// Walk each set of control flow equivalent fusion candidates and attempt to
+ /// fuse them. This does a single linear traversal of all candidates in the
+ /// set. The conditions for legal fusion are checked at this point. If a pair
+ /// of fusion candidates passes all legality checks, they are fused together
+ /// and a new fusion candidate is created and added to the FusionCandidateSet.
+ /// The original fusion candidates are then removed, as they are no longer
+ /// valid.
+ bool fuseCandidates() {
+ bool Fused = false;
+ LLVM_DEBUG(printFusionCandidates(FusionCandidates));
+ for (auto &CandidateSet : FusionCandidates) {
+ if (CandidateSet.size() < 2)
+ continue;
+
+ LLVM_DEBUG(dbgs() << "Attempting fusion on Candidate Set:\n"
+ << CandidateSet << "\n");
+
+ for (auto FC0 = CandidateSet.begin(); FC0 != CandidateSet.end(); ++FC0) {
+ assert(!LDT.isRemovedLoop(FC0->L) &&
+ "Should not have removed loops in CandidateSet!");
+ auto FC1 = FC0;
+ for (++FC1; FC1 != CandidateSet.end(); ++FC1) {
+ assert(!LDT.isRemovedLoop(FC1->L) &&
+ "Should not have removed loops in CandidateSet!");
+
+ LLVM_DEBUG(dbgs() << "Attempting to fuse candidate \n"; FC0->dump();
+ dbgs() << " with\n"; FC1->dump(); dbgs() << "\n");
+
+ FC0->verify();
+ FC1->verify();
+
+ if (!identicalTripCounts(*FC0, *FC1)) {
+ LLVM_DEBUG(dbgs() << "Fusion candidates do not have identical trip "
+ "counts. Not fusing.\n");
+ NonEqualTripCount++;
+ continue;
+ }
+
+ if (!isAdjacent(*FC0, *FC1)) {
+ LLVM_DEBUG(dbgs()
+ << "Fusion candidates are not adjacent. Not fusing.\n");
+ NonAdjacent++;
+ continue;
+ }
+
+ // For now we skip fusing if the second candidate has any instructions
+ // in the preheader. This is done because we currently do not have the
+ // safety checks to determine if it is save to move the preheader of
+ // the second candidate past the body of the first candidate. Once
+ // these checks are added, this condition can be removed.
+ if (!isEmptyPreheader(*FC1)) {
+ LLVM_DEBUG(dbgs() << "Fusion candidate does not have empty "
+ "preheader. Not fusing.\n");
+ NonEmptyPreheader++;
+ continue;
+ }
+
+ if (!dependencesAllowFusion(*FC0, *FC1)) {
+ LLVM_DEBUG(dbgs() << "Memory dependencies do not allow fusion!\n");
+ continue;
+ }
+
+ bool BeneficialToFuse = isBeneficialFusion(*FC0, *FC1);
+ LLVM_DEBUG(dbgs()
+ << "\tFusion appears to be "
+ << (BeneficialToFuse ? "" : "un") << "profitable!\n");
+ if (!BeneficialToFuse)
+ continue;
+
+ // All analysis has completed and has determined that fusion is legal
+ // and profitable. At this point, start transforming the code and
+ // perform fusion.
+
+ LLVM_DEBUG(dbgs() << "\tFusion is performed: " << *FC0 << " and "
+ << *FC1 << "\n");
+
+ // Report fusion to the Optimization Remarks.
+ // Note this needs to be done *before* performFusion because
+ // performFusion will change the original loops, making it not
+ // possible to identify them after fusion is complete.
+ reportLoopFusion(*FC0, *FC1, ORE);
+
+ FusionCandidate FusedCand(performFusion(*FC0, *FC1), &DT, &PDT);
+ FusedCand.verify();
+ assert(eligibleForFusion(FusedCand) &&
+ "Fused candidate should be eligible for fusion!");
+
+ // Notify the loop-depth-tree that these loops are not valid objects
+ // anymore.
+ LDT.removeLoop(FC1->L);
+
+ CandidateSet.erase(FC0);
+ CandidateSet.erase(FC1);
+
+ auto InsertPos = CandidateSet.insert(FusedCand);
+
+ assert(InsertPos.second &&
+ "Unable to insert TargetCandidate in CandidateSet!");
+
+ // Reset FC0 and FC1 the new (fused) candidate. Subsequent iterations
+ // of the FC1 loop will attempt to fuse the new (fused) loop with the
+ // remaining candidates in the current candidate set.
+ FC0 = FC1 = InsertPos.first;
+
+ LLVM_DEBUG(dbgs() << "Candidate Set (after fusion): " << CandidateSet
+ << "\n");
+
+ Fused = true;
+ }
+ }
+ }
+ return Fused;
+ }
+
+ /// Rewrite all additive recurrences in a SCEV to use a new loop.
+ class AddRecLoopReplacer : public SCEVRewriteVisitor<AddRecLoopReplacer> {
+ public:
+ AddRecLoopReplacer(ScalarEvolution &SE, const Loop &OldL, const Loop &NewL,
+ bool UseMax = true)
+ : SCEVRewriteVisitor(SE), Valid(true), UseMax(UseMax), OldL(OldL),
+ NewL(NewL) {}
+
+ const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
+ const Loop *ExprL = Expr->getLoop();
+ SmallVector<const SCEV *, 2> Operands;
+ if (ExprL == &OldL) {
+ Operands.append(Expr->op_begin(), Expr->op_end());
+ return SE.getAddRecExpr(Operands, &NewL, Expr->getNoWrapFlags());
+ }
+
+ if (OldL.contains(ExprL)) {
+ bool Pos = SE.isKnownPositive(Expr->getStepRecurrence(SE));
+ if (!UseMax || !Pos || !Expr->isAffine()) {
+ Valid = false;
+ return Expr;
+ }
+ return visit(Expr->getStart());
+ }
+
+ for (const SCEV *Op : Expr->operands())
+ Operands.push_back(visit(Op));
+ return SE.getAddRecExpr(Operands, ExprL, Expr->getNoWrapFlags());
+ }
+
+ bool wasValidSCEV() const { return Valid; }
+
+ private:
+ bool Valid, UseMax;
+ const Loop &OldL, &NewL;
+ };
+
+ /// Return false if the access functions of \p I0 and \p I1 could cause
+ /// a negative dependence.
+ bool accessDiffIsPositive(const Loop &L0, const Loop &L1, Instruction &I0,
+ Instruction &I1, bool EqualIsInvalid) {
+ Value *Ptr0 = getLoadStorePointerOperand(&I0);
+ Value *Ptr1 = getLoadStorePointerOperand(&I1);
+ if (!Ptr0 || !Ptr1)
+ return false;
+
+ const SCEV *SCEVPtr0 = SE.getSCEVAtScope(Ptr0, &L0);
+ const SCEV *SCEVPtr1 = SE.getSCEVAtScope(Ptr1, &L1);
+#ifndef NDEBUG
+ if (VerboseFusionDebugging)
+ LLVM_DEBUG(dbgs() << " Access function check: " << *SCEVPtr0 << " vs "
+ << *SCEVPtr1 << "\n");
+#endif
+ AddRecLoopReplacer Rewriter(SE, L0, L1);
+ SCEVPtr0 = Rewriter.visit(SCEVPtr0);
+#ifndef NDEBUG
+ if (VerboseFusionDebugging)
+ LLVM_DEBUG(dbgs() << " Access function after rewrite: " << *SCEVPtr0
+ << " [Valid: " << Rewriter.wasValidSCEV() << "]\n");
+#endif
+ if (!Rewriter.wasValidSCEV())
+ return false;
+
+ // TODO: isKnownPredicate doesnt work well when one SCEV is loop carried (by
+ // L0) and the other is not. We could check if it is monotone and test
+ // the beginning and end value instead.
+
+ BasicBlock *L0Header = L0.getHeader();
+ auto HasNonLinearDominanceRelation = [&](const SCEV *S) {
+ const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S);
+ if (!AddRec)
+ return false;
+ return !DT.dominates(L0Header, AddRec->getLoop()->getHeader()) &&
+ !DT.dominates(AddRec->getLoop()->getHeader(), L0Header);
+ };
+ if (SCEVExprContains(SCEVPtr1, HasNonLinearDominanceRelation))
+ return false;
+
+ ICmpInst::Predicate Pred =
+ EqualIsInvalid ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_SGE;
+ bool IsAlwaysGE = SE.isKnownPredicate(Pred, SCEVPtr0, SCEVPtr1);
+#ifndef NDEBUG
+ if (VerboseFusionDebugging)
+ LLVM_DEBUG(dbgs() << " Relation: " << *SCEVPtr0
+ << (IsAlwaysGE ? " >= " : " may < ") << *SCEVPtr1
+ << "\n");
+#endif
+ return IsAlwaysGE;
+ }
+
+ /// Return true if the dependences between @p I0 (in @p L0) and @p I1 (in
+ /// @p L1) allow loop fusion of @p L0 and @p L1. The dependence analyses
+ /// specified by @p DepChoice are used to determine this.
+ bool dependencesAllowFusion(const FusionCandidate &FC0,
+ const FusionCandidate &FC1, Instruction &I0,
+ Instruction &I1, bool AnyDep,
+ FusionDependenceAnalysisChoice DepChoice) {
+#ifndef NDEBUG
+ if (VerboseFusionDebugging) {
+ LLVM_DEBUG(dbgs() << "Check dep: " << I0 << " vs " << I1 << " : "
+ << DepChoice << "\n");
+ }
+#endif
+ switch (DepChoice) {
+ case FUSION_DEPENDENCE_ANALYSIS_SCEV:
+ return accessDiffIsPositive(*FC0.L, *FC1.L, I0, I1, AnyDep);
+ case FUSION_DEPENDENCE_ANALYSIS_DA: {
+ auto DepResult = DI.depends(&I0, &I1, true);
+ if (!DepResult)
+ return true;
+#ifndef NDEBUG
+ if (VerboseFusionDebugging) {
+ LLVM_DEBUG(dbgs() << "DA res: "; DepResult->dump(dbgs());
+ dbgs() << " [#l: " << DepResult->getLevels() << "][Ordered: "
+ << (DepResult->isOrdered() ? "true" : "false")
+ << "]\n");
+ LLVM_DEBUG(dbgs() << "DepResult Levels: " << DepResult->getLevels()
+ << "\n");
+ }
+#endif
+
+ if (DepResult->getNextPredecessor() || DepResult->getNextSuccessor())
+ LLVM_DEBUG(
+ dbgs() << "TODO: Implement pred/succ dependence handling!\n");
+
+ // TODO: Can we actually use the dependence info analysis here?
+ return false;
+ }
+
+ case FUSION_DEPENDENCE_ANALYSIS_ALL:
+ return dependencesAllowFusion(FC0, FC1, I0, I1, AnyDep,
+ FUSION_DEPENDENCE_ANALYSIS_SCEV) ||
+ dependencesAllowFusion(FC0, FC1, I0, I1, AnyDep,
+ FUSION_DEPENDENCE_ANALYSIS_DA);
+ }
+
+ llvm_unreachable("Unknown fusion dependence analysis choice!");
+ }
+
+ /// Perform a dependence check and return if @p FC0 and @p FC1 can be fused.
+ bool dependencesAllowFusion(const FusionCandidate &FC0,
+ const FusionCandidate &FC1) {
+ LLVM_DEBUG(dbgs() << "Check if " << FC0 << " can be fused with " << FC1
+ << "\n");
+ assert(FC0.L->getLoopDepth() == FC1.L->getLoopDepth());
+ assert(DT.dominates(FC0.Preheader, FC1.Preheader));
+
+ for (Instruction *WriteL0 : FC0.MemWrites) {
+ for (Instruction *WriteL1 : FC1.MemWrites)
+ if (!dependencesAllowFusion(FC0, FC1, *WriteL0, *WriteL1,
+ /* AnyDep */ false,
+ FusionDependenceAnalysis)) {
+ InvalidDependencies++;
+ return false;
+ }
+ for (Instruction *ReadL1 : FC1.MemReads)
+ if (!dependencesAllowFusion(FC0, FC1, *WriteL0, *ReadL1,
+ /* AnyDep */ false,
+ FusionDependenceAnalysis)) {
+ InvalidDependencies++;
+ return false;
+ }
+ }
+
+ for (Instruction *WriteL1 : FC1.MemWrites) {
+ for (Instruction *WriteL0 : FC0.MemWrites)
+ if (!dependencesAllowFusion(FC0, FC1, *WriteL0, *WriteL1,
+ /* AnyDep */ false,
+ FusionDependenceAnalysis)) {
+ InvalidDependencies++;
+ return false;
+ }
+ for (Instruction *ReadL0 : FC0.MemReads)
+ if (!dependencesAllowFusion(FC0, FC1, *ReadL0, *WriteL1,
+ /* AnyDep */ false,
+ FusionDependenceAnalysis)) {
+ InvalidDependencies++;
+ return false;
+ }
+ }
+
+ // Walk through all uses in FC1. For each use, find the reaching def. If the
+ // def is located in FC0 then it is is not safe to fuse.
+ for (BasicBlock *BB : FC1.L->blocks())
+ for (Instruction &I : *BB)
+ for (auto &Op : I.operands())
+ if (Instruction *Def = dyn_cast<Instruction>(Op))
+ if (FC0.L->contains(Def->getParent())) {
+ InvalidDependencies++;
+ return false;
+ }
+
+ return true;
+ }
+
+ /// Determine if the exit block of \p FC0 is the preheader of \p FC1. In this
+ /// case, there is no code in between the two fusion candidates, thus making
+ /// them adjacent.
+ bool isAdjacent(const FusionCandidate &FC0,
+ const FusionCandidate &FC1) const {
+ return FC0.ExitBlock == FC1.Preheader;
+ }
+
+ bool isEmptyPreheader(const FusionCandidate &FC) const {
+ return FC.Preheader->size() == 1;
+ }
+
+ /// Fuse two fusion candidates, creating a new fused loop.
+ ///
+ /// This method contains the mechanics of fusing two loops, represented by \p
+ /// FC0 and \p FC1. It is assumed that \p FC0 dominates \p FC1 and \p FC1
+ /// postdominates \p FC0 (making them control flow equivalent). It also
+ /// assumes that the other conditions for fusion have been met: adjacent,
+ /// identical trip counts, and no negative distance dependencies exist that
+ /// would prevent fusion. Thus, there is no checking for these conditions in
+ /// this method.
+ ///
+ /// Fusion is performed by rewiring the CFG to update successor blocks of the
+ /// components of tho loop. Specifically, the following changes are done:
+ ///
+ /// 1. The preheader of \p FC1 is removed as it is no longer necessary
+ /// (because it is currently only a single statement block).
+ /// 2. The latch of \p FC0 is modified to jump to the header of \p FC1.
+ /// 3. The latch of \p FC1 i modified to jump to the header of \p FC0.
+ /// 4. All blocks from \p FC1 are removed from FC1 and added to FC0.
+ ///
+ /// All of these modifications are done with dominator tree updates, thus
+ /// keeping the dominator (and post dominator) information up-to-date.
+ ///
+ /// This can be improved in the future by actually merging blocks during
+ /// fusion. For example, the preheader of \p FC1 can be merged with the
+ /// preheader of \p FC0. This would allow loops with more than a single
+ /// statement in the preheader to be fused. Similarly, the latch blocks of the
+ /// two loops could also be fused into a single block. This will require
+ /// analysis to prove it is safe to move the contents of the block past
+ /// existing code, which currently has not been implemented.
+ Loop *performFusion(const FusionCandidate &FC0, const FusionCandidate &FC1) {
+ assert(FC0.isValid() && FC1.isValid() &&
+ "Expecting valid fusion candidates");
+
+ LLVM_DEBUG(dbgs() << "Fusion Candidate 0: \n"; FC0.dump();
+ dbgs() << "Fusion Candidate 1: \n"; FC1.dump(););
+
+ assert(FC1.Preheader == FC0.ExitBlock);
+ assert(FC1.Preheader->size() == 1 &&
+ FC1.Preheader->getSingleSuccessor() == FC1.Header);
+
+ // Remember the phi nodes originally in the header of FC0 in order to rewire
+ // them later. However, this is only necessary if the new loop carried
+ // values might not dominate the exiting branch. While we do not generally
+ // test if this is the case but simply insert intermediate phi nodes, we
+ // need to make sure these intermediate phi nodes have different
+ // predecessors. To this end, we filter the special case where the exiting
+ // block is the latch block of the first loop. Nothing needs to be done
+ // anyway as all loop carried values dominate the latch and thereby also the
+ // exiting branch.
+ SmallVector<PHINode *, 8> OriginalFC0PHIs;
+ if (FC0.ExitingBlock != FC0.Latch)
+ for (PHINode &PHI : FC0.Header->phis())
+ OriginalFC0PHIs.push_back(&PHI);
+
+ // Replace incoming blocks for header PHIs first.
+ FC1.Preheader->replaceSuccessorsPhiUsesWith(FC0.Preheader);
+ FC0.Latch->replaceSuccessorsPhiUsesWith(FC1.Latch);
+
+ // Then modify the control flow and update DT and PDT.
+ SmallVector<DominatorTree::UpdateType, 8> TreeUpdates;
+
+ // The old exiting block of the first loop (FC0) has to jump to the header
+ // of the second as we need to execute the code in the second header block
+ // regardless of the trip count. That is, if the trip count is 0, so the
+ // back edge is never taken, we still have to execute both loop headers,
+ // especially (but not only!) if the second is a do-while style loop.
+ // However, doing so might invalidate the phi nodes of the first loop as
+ // the new values do only need to dominate their latch and not the exiting
+ // predicate. To remedy this potential problem we always introduce phi
+ // nodes in the header of the second loop later that select the loop carried
+ // value, if the second header was reached through an old latch of the
+ // first, or undef otherwise. This is sound as exiting the first implies the
+ // second will exit too, __without__ taking the back-edge. [Their
+ // trip-counts are equal after all.
+ // KB: Would this sequence be simpler to just just make FC0.ExitingBlock go
+ // to FC1.Header? I think this is basically what the three sequences are
+ // trying to accomplish; however, doing this directly in the CFG may mean
+ // the DT/PDT becomes invalid
+ FC0.ExitingBlock->getTerminator()->replaceUsesOfWith(FC1.Preheader,
+ FC1.Header);
+ TreeUpdates.emplace_back(DominatorTree::UpdateType(
+ DominatorTree::Delete, FC0.ExitingBlock, FC1.Preheader));
+ TreeUpdates.emplace_back(DominatorTree::UpdateType(
+ DominatorTree::Insert, FC0.ExitingBlock, FC1.Header));
+
+ // The pre-header of L1 is not necessary anymore.
+ assert(pred_begin(FC1.Preheader) == pred_end(FC1.Preheader));
+ FC1.Preheader->getTerminator()->eraseFromParent();
+ new UnreachableInst(FC1.Preheader->getContext(), FC1.Preheader);
+ TreeUpdates.emplace_back(DominatorTree::UpdateType(
+ DominatorTree::Delete, FC1.Preheader, FC1.Header));
+
+ // Moves the phi nodes from the second to the first loops header block.
+ while (PHINode *PHI = dyn_cast<PHINode>(&FC1.Header->front())) {
+ if (SE.isSCEVable(PHI->getType()))
+ SE.forgetValue(PHI);
+ if (PHI->hasNUsesOrMore(1))
+ PHI->moveBefore(&*FC0.Header->getFirstInsertionPt());
+ else
+ PHI->eraseFromParent();
+ }
+
+ // Introduce new phi nodes in the second loop header to ensure
+ // exiting the first and jumping to the header of the second does not break
+ // the SSA property of the phis originally in the first loop. See also the
+ // comment above.
+ Instruction *L1HeaderIP = &FC1.Header->front();
+ for (PHINode *LCPHI : OriginalFC0PHIs) {
+ int L1LatchBBIdx = LCPHI->getBasicBlockIndex(FC1.Latch);
+ assert(L1LatchBBIdx >= 0 &&
+ "Expected loop carried value to be rewired at this point!");
+
+ Value *LCV = LCPHI->getIncomingValue(L1LatchBBIdx);
+
+ PHINode *L1HeaderPHI = PHINode::Create(
+ LCV->getType(), 2, LCPHI->getName() + ".afterFC0", L1HeaderIP);
+ L1HeaderPHI->addIncoming(LCV, FC0.Latch);
+ L1HeaderPHI->addIncoming(UndefValue::get(LCV->getType()),
+ FC0.ExitingBlock);
+
+ LCPHI->setIncomingValue(L1LatchBBIdx, L1HeaderPHI);
+ }
+
+ // Replace latch terminator destinations.
+ FC0.Latch->getTerminator()->replaceUsesOfWith(FC0.Header, FC1.Header);
+ FC1.Latch->getTerminator()->replaceUsesOfWith(FC1.Header, FC0.Header);
+
+ // If FC0.Latch and FC0.ExitingBlock are the same then we have already
+ // performed the updates above.
+ if (FC0.Latch != FC0.ExitingBlock)
+ TreeUpdates.emplace_back(DominatorTree::UpdateType(
+ DominatorTree::Insert, FC0.Latch, FC1.Header));
+
+ TreeUpdates.emplace_back(DominatorTree::UpdateType(DominatorTree::Delete,
+ FC0.Latch, FC0.Header));
+ TreeUpdates.emplace_back(DominatorTree::UpdateType(DominatorTree::Insert,
+ FC1.Latch, FC0.Header));
+ TreeUpdates.emplace_back(DominatorTree::UpdateType(DominatorTree::Delete,
+ FC1.Latch, FC1.Header));
+
+ // Update DT/PDT
+ DTU.applyUpdates(TreeUpdates);
+
+ LI.removeBlock(FC1.Preheader);
+ DTU.deleteBB(FC1.Preheader);
+ DTU.flush();
+
+ // Is there a way to keep SE up-to-date so we don't need to forget the loops
+ // and rebuild the information in subsequent passes of fusion?
+ SE.forgetLoop(FC1.L);
+ SE.forgetLoop(FC0.L);
+
+ // Merge the loops.
+ SmallVector<BasicBlock *, 8> Blocks(FC1.L->block_begin(),
+ FC1.L->block_end());
+ for (BasicBlock *BB : Blocks) {
+ FC0.L->addBlockEntry(BB);
+ FC1.L->removeBlockFromLoop(BB);
+ if (LI.getLoopFor(BB) != FC1.L)
+ continue;
+ LI.changeLoopFor(BB, FC0.L);
+ }
+ while (!FC1.L->empty()) {
+ const auto &ChildLoopIt = FC1.L->begin();
+ Loop *ChildLoop = *ChildLoopIt;
+ FC1.L->removeChildLoop(ChildLoopIt);
+ FC0.L->addChildLoop(ChildLoop);
+ }
+
+ // Delete the now empty loop L1.
+ LI.erase(FC1.L);
+
+#ifndef NDEBUG
+ assert(!verifyFunction(*FC0.Header->getParent(), &errs()));
+ assert(DT.verify(DominatorTree::VerificationLevel::Fast));
+ assert(PDT.verify());
+ LI.verify(DT);
+ SE.verify();
+#endif
+
+ FuseCounter++;
+
+ LLVM_DEBUG(dbgs() << "Fusion done:\n");
+
+ return FC0.L;
+ }
+};
+
+struct LoopFuseLegacy : public FunctionPass {
+
+ static char ID;
+
+ LoopFuseLegacy() : FunctionPass(ID) {
+ initializeLoopFuseLegacyPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<PostDominatorTreeWrapperPass>();
+ AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
+ AU.addRequired<DependenceAnalysisWrapperPass>();
+
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
+ AU.addPreserved<LoopInfoWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addPreserved<PostDominatorTreeWrapperPass>();
+ }
+
+ bool runOnFunction(Function &F) override {
+ if (skipFunction(F))
+ return false;
+ auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ auto &DI = getAnalysis<DependenceAnalysisWrapperPass>().getDI();
+ auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
+ auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
+
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ LoopFuser LF(LI, DT, DI, SE, PDT, ORE, DL);
+ return LF.fuseLoops(F);
+ }
+};
+
+PreservedAnalyses LoopFusePass::run(Function &F, FunctionAnalysisManager &AM) {
+ auto &LI = AM.getResult<LoopAnalysis>(F);
+ auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
+ auto &DI = AM.getResult<DependenceAnalysis>(F);
+ auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
+ auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
+ auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ LoopFuser LF(LI, DT, DI, SE, PDT, ORE, DL);
+ bool Changed = LF.fuseLoops(F);
+ if (!Changed)
+ return PreservedAnalyses::all();
+
+ PreservedAnalyses PA;
+ PA.preserve<DominatorTreeAnalysis>();
+ PA.preserve<PostDominatorTreeAnalysis>();
+ PA.preserve<ScalarEvolutionAnalysis>();
+ PA.preserve<LoopAnalysis>();
+ return PA;
+}
+
+char LoopFuseLegacy::ID = 0;
+
+INITIALIZE_PASS_BEGIN(LoopFuseLegacy, "loop-fusion", "Loop Fusion", false,
+ false)
+INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DependenceAnalysisWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
+INITIALIZE_PASS_END(LoopFuseLegacy, "loop-fusion", "Loop Fusion", false, false)
+
+FunctionPass *llvm::createLoopFusePass() { return new LoopFuseLegacy(); }
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index fbffa1920a84..e561494f19cf 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -1,9 +1,8 @@
//===- LoopIdiomRecognize.cpp - Loop idiom recognition --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -37,6 +36,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/Scalar/LoopIdiomRecognize.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
@@ -51,12 +51,12 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
@@ -87,8 +87,8 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Scalar/LoopIdiomRecognize.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include <algorithm>
#include <cassert>
@@ -120,6 +120,7 @@ class LoopIdiomRecognize {
TargetLibraryInfo *TLI;
const TargetTransformInfo *TTI;
const DataLayout *DL;
+ OptimizationRemarkEmitter &ORE;
bool ApplyCodeSizeHeuristics;
public:
@@ -127,8 +128,9 @@ public:
LoopInfo *LI, ScalarEvolution *SE,
TargetLibraryInfo *TLI,
const TargetTransformInfo *TTI,
- const DataLayout *DL)
- : AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI), DL(DL) {}
+ const DataLayout *DL,
+ OptimizationRemarkEmitter &ORE)
+ : AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI), DL(DL), ORE(ORE) {}
bool runOnLoop(Loop *L);
@@ -221,7 +223,12 @@ public:
*L->getHeader()->getParent());
const DataLayout *DL = &L->getHeader()->getModule()->getDataLayout();
- LoopIdiomRecognize LIR(AA, DT, LI, SE, TLI, TTI, DL);
+ // For the old PM, we can't use OptimizationRemarkEmitter as an analysis
+ // pass. Function analyses need to be preserved across loop transformations
+ // but ORE cannot be preserved (see comment before the pass definition).
+ OptimizationRemarkEmitter ORE(L->getHeader()->getParent());
+
+ LoopIdiomRecognize LIR(AA, DT, LI, SE, TLI, TTI, DL, ORE);
return LIR.runOnLoop(L);
}
@@ -243,7 +250,19 @@ PreservedAnalyses LoopIdiomRecognizePass::run(Loop &L, LoopAnalysisManager &AM,
LPMUpdater &) {
const auto *DL = &L.getHeader()->getModule()->getDataLayout();
- LoopIdiomRecognize LIR(&AR.AA, &AR.DT, &AR.LI, &AR.SE, &AR.TLI, &AR.TTI, DL);
+ const auto &FAM =
+ AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager();
+ Function *F = L.getHeader()->getParent();
+
+ auto *ORE = FAM.getCachedResult<OptimizationRemarkEmitterAnalysis>(*F);
+ // FIXME: This should probably be optional rather than required.
+ if (!ORE)
+ report_fatal_error(
+ "LoopIdiomRecognizePass: OptimizationRemarkEmitterAnalysis not cached "
+ "at a higher level");
+
+ LoopIdiomRecognize LIR(&AR.AA, &AR.DT, &AR.LI, &AR.SE, &AR.TLI, &AR.TTI, DL,
+ *ORE);
if (!LIR.runOnLoop(&L))
return PreservedAnalyses::all();
@@ -285,7 +304,7 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L) {
// Determine if code size heuristics need to be applied.
ApplyCodeSizeHeuristics =
- L->getHeader()->getParent()->optForSize() && UseLIRCodeSizeHeurs;
+ L->getHeader()->getParent()->hasOptSize() && UseLIRCodeSizeHeurs;
HasMemset = TLI->has(LibFunc_memset);
HasMemsetPattern = TLI->has(LibFunc_memset_pattern16);
@@ -313,9 +332,10 @@ bool LoopIdiomRecognize::runOnCountableLoop() {
SmallVector<BasicBlock *, 8> ExitBlocks;
CurLoop->getUniqueExitBlocks(ExitBlocks);
- LLVM_DEBUG(dbgs() << "loop-idiom Scanning: F["
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE " Scanning: F["
<< CurLoop->getHeader()->getParent()->getName()
- << "] Loop %" << CurLoop->getHeader()->getName() << "\n");
+ << "] Countable Loop %" << CurLoop->getHeader()->getName()
+ << "\n");
bool MadeChange = false;
@@ -430,7 +450,7 @@ LoopIdiomRecognize::isLegalStore(StoreInst *SI) {
// turned into a memset of i8 -1, assuming that all the consecutive bytes
// are stored. A store of i32 0x01020304 can never be turned into a memset,
// but it can be turned into memset_pattern if the target supports it.
- Value *SplatValue = isBytewiseValue(StoredVal);
+ Value *SplatValue = isBytewiseValue(StoredVal, *DL);
Constant *PatternValue = nullptr;
// Note: memset and memset_pattern on unordered-atomic is yet not supported
@@ -607,7 +627,7 @@ bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl<StoreInst *> &SL,
Constant *FirstPatternValue = nullptr;
if (For == ForMemset::Yes)
- FirstSplatValue = isBytewiseValue(FirstStoredVal);
+ FirstSplatValue = isBytewiseValue(FirstStoredVal, *DL);
else
FirstPatternValue = getMemSetPatternValue(FirstStoredVal, DL);
@@ -640,7 +660,7 @@ bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl<StoreInst *> &SL,
Constant *SecondPatternValue = nullptr;
if (For == ForMemset::Yes)
- SecondSplatValue = isBytewiseValue(SecondStoredVal);
+ SecondSplatValue = isBytewiseValue(SecondStoredVal, *DL);
else
SecondPatternValue = getMemSetPatternValue(SecondStoredVal, DL);
@@ -860,7 +880,7 @@ bool LoopIdiomRecognize::processLoopStridedStore(
Value *StoredVal, Instruction *TheStore,
SmallPtrSetImpl<Instruction *> &Stores, const SCEVAddRecExpr *Ev,
const SCEV *BECount, bool NegStride, bool IsLoopMemset) {
- Value *SplatValue = isBytewiseValue(StoredVal);
+ Value *SplatValue = isBytewiseValue(StoredVal, *DL);
Constant *PatternValue = nullptr;
if (!SplatValue)
@@ -931,9 +951,8 @@ bool LoopIdiomRecognize::processLoopStridedStore(
Module *M = TheStore->getModule();
StringRef FuncName = "memset_pattern16";
- Value *MSP =
- M->getOrInsertFunction(FuncName, Builder.getVoidTy(),
- Int8PtrTy, Int8PtrTy, IntPtr);
+ FunctionCallee MSP = M->getOrInsertFunction(FuncName, Builder.getVoidTy(),
+ Int8PtrTy, Int8PtrTy, IntPtr);
inferLibFuncAttributes(M, FuncName, *TLI);
// Otherwise we should form a memset_pattern16. PatternValue is known to be
@@ -952,6 +971,14 @@ bool LoopIdiomRecognize::processLoopStridedStore(
<< "\n");
NewCall->setDebugLoc(TheStore->getDebugLoc());
+ ORE.emit([&]() {
+ return OptimizationRemark(DEBUG_TYPE, "ProcessLoopStridedStore",
+ NewCall->getDebugLoc(), Preheader)
+ << "Transformed loop-strided store into a call to "
+ << ore::NV("NewFunction", NewCall->getCalledFunction())
+ << "() function";
+ });
+
// Okay, the memset has been formed. Zap the original store and anything that
// feeds into it.
for (auto *I : Stores)
@@ -1084,6 +1111,14 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
<< " from store ptr=" << *StoreEv << " at: " << *SI
<< "\n");
+ ORE.emit([&]() {
+ return OptimizationRemark(DEBUG_TYPE, "ProcessLoopStoreOfLoopLoad",
+ NewCall->getDebugLoc(), Preheader)
+ << "Formed a call to "
+ << ore::NV("NewFunction", NewCall->getCalledFunction())
+ << "() function";
+ });
+
// Okay, the memcpy has been formed. Zap the original store and anything that
// feeds into it.
deleteDeadInstruction(SI);
@@ -1109,6 +1144,11 @@ bool LoopIdiomRecognize::avoidLIRForMultiBlockLoop(bool IsMemset,
}
bool LoopIdiomRecognize::runOnNoncountableLoop() {
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE " Scanning: F["
+ << CurLoop->getHeader()->getParent()->getName()
+ << "] Noncountable Loop %"
+ << CurLoop->getHeader()->getName() << "\n");
+
return recognizePopcount() || recognizeAndInsertFFS();
}
@@ -1462,9 +1502,15 @@ bool LoopIdiomRecognize::recognizeAndInsertFFS() {
const Value *Args[] =
{InitX, ZeroCheck ? ConstantInt::getTrue(InitX->getContext())
: ConstantInt::getFalse(InitX->getContext())};
- if (CurLoop->getHeader()->size() != IdiomCanonicalSize &&
+
+ // @llvm.dbg doesn't count as they have no semantic effect.
+ auto InstWithoutDebugIt = CurLoop->getHeader()->instructionsWithoutDebug();
+ uint32_t HeaderSize =
+ std::distance(InstWithoutDebugIt.begin(), InstWithoutDebugIt.end());
+
+ if (HeaderSize != IdiomCanonicalSize &&
TTI->getIntrinsicCost(IntrinID, InitX->getType(), Args) >
- TargetTransformInfo::TCC_Basic)
+ TargetTransformInfo::TCC_Basic)
return false;
transformLoopToCountable(IntrinID, PH, CntInst, CntPhi, InitX, DefX,
@@ -1529,7 +1575,7 @@ static CallInst *createPopcntIntrinsic(IRBuilder<> &IRBuilder, Value *Val,
Type *Tys[] = {Val->getType()};
Module *M = IRBuilder.GetInsertBlock()->getParent()->getParent();
- Value *Func = Intrinsic::getDeclaration(M, Intrinsic::ctpop, Tys);
+ Function *Func = Intrinsic::getDeclaration(M, Intrinsic::ctpop, Tys);
CallInst *CI = IRBuilder.CreateCall(Func, Ops);
CI->setDebugLoc(DL);
@@ -1543,7 +1589,7 @@ static CallInst *createFFSIntrinsic(IRBuilder<> &IRBuilder, Value *Val,
Type *Tys[] = {Val->getType()};
Module *M = IRBuilder.GetInsertBlock()->getParent()->getParent();
- Value *Func = Intrinsic::getDeclaration(M, IID, Tys);
+ Function *Func = Intrinsic::getDeclaration(M, IID, Tys);
CallInst *CI = IRBuilder.CreateCall(Func, Ops);
CI->setDebugLoc(DL);
diff --git a/lib/Transforms/Scalar/LoopInstSimplify.cpp b/lib/Transforms/Scalar/LoopInstSimplify.cpp
index 6f7dc2429c09..31191b52895c 100644
--- a/lib/Transforms/Scalar/LoopInstSimplify.cpp
+++ b/lib/Transforms/Scalar/LoopInstSimplify.cpp
@@ -1,9 +1,8 @@
//===- LoopInstSimplify.cpp - Loop Instruction Simplification Pass --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -234,6 +233,8 @@ PreservedAnalyses LoopInstSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
auto PA = getLoopPassPreservedAnalyses();
PA.preserveSet<CFGAnalyses>();
+ if (EnableMSSALoopDependency)
+ PA.preserve<MemorySSAAnalysis>();
return PA;
}
diff --git a/lib/Transforms/Scalar/LoopInterchange.cpp b/lib/Transforms/Scalar/LoopInterchange.cpp
index 766e39b439a0..9a42365adc1b 100644
--- a/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -1,9 +1,8 @@
//===- LoopInterchange.cpp - Loop interchange pass-------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -1265,9 +1264,7 @@ bool LoopInterchangeTransform::transform() {
}
void LoopInterchangeTransform::splitInnerLoopLatch(Instruction *Inc) {
- BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
- BasicBlock *InnerLoopLatchPred = InnerLoopLatch;
- InnerLoopLatch = SplitBlock(InnerLoopLatchPred, Inc, DT, LI);
+ SplitBlock(InnerLoop->getLoopLatch(), Inc, DT, LI);
}
/// \brief Move all instructions except the terminator from FromBB right before
@@ -1280,17 +1277,6 @@ static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore) {
FromBB->getTerminator()->getIterator());
}
-static void updateIncomingBlock(BasicBlock *CurrBlock, BasicBlock *OldPred,
- BasicBlock *NewPred) {
- for (PHINode &PHI : CurrBlock->phis()) {
- unsigned Num = PHI.getNumIncomingValues();
- for (unsigned i = 0; i < Num; ++i) {
- if (PHI.getIncomingBlock(i) == OldPred)
- PHI.setIncomingBlock(i, NewPred);
- }
- }
-}
-
/// Update BI to jump to NewBB instead of OldBB. Records updates to
/// the dominator tree in DTUpdates, if DT should be preserved.
static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB,
@@ -1313,8 +1299,41 @@ static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB,
}
// Move Lcssa PHIs to the right place.
-static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerLatch,
- BasicBlock *OuterLatch) {
+static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader,
+ BasicBlock *InnerLatch, BasicBlock *OuterHeader,
+ BasicBlock *OuterLatch, BasicBlock *OuterExit) {
+
+ // Deal with LCSSA PHI nodes in the exit block of the inner loop, that are
+ // defined either in the header or latch. Those blocks will become header and
+ // latch of the new outer loop, and the only possible users can PHI nodes
+ // in the exit block of the loop nest or the outer loop header (reduction
+ // PHIs, in that case, the incoming value must be defined in the inner loop
+ // header). We can just substitute the user with the incoming value and remove
+ // the PHI.
+ for (PHINode &P : make_early_inc_range(InnerExit->phis())) {
+ assert(P.getNumIncomingValues() == 1 &&
+ "Only loops with a single exit are supported!");
+
+ // Incoming values are guaranteed be instructions currently.
+ auto IncI = cast<Instruction>(P.getIncomingValueForBlock(InnerLatch));
+ // Skip phis with incoming values from the inner loop body, excluding the
+ // header and latch.
+ if (IncI->getParent() != InnerLatch && IncI->getParent() != InnerHeader)
+ continue;
+
+ assert(all_of(P.users(),
+ [OuterHeader, OuterExit, IncI, InnerHeader](User *U) {
+ return (cast<PHINode>(U)->getParent() == OuterHeader &&
+ IncI->getParent() == InnerHeader) ||
+ cast<PHINode>(U)->getParent() == OuterExit;
+ }) &&
+ "Can only replace phis iff the uses are in the loop nest exit or "
+ "the incoming value is defined in the inner header (it will "
+ "dominate all loop blocks after interchanging)");
+ P.replaceAllUsesWith(IncI);
+ P.eraseFromParent();
+ }
+
SmallVector<PHINode *, 8> LcssaInnerExit;
for (PHINode &P : InnerExit->phis())
LcssaInnerExit.push_back(&P);
@@ -1327,35 +1346,43 @@ static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerLatch,
// If a PHI node has users outside of InnerExit, it has a use outside the
// interchanged loop and we have to preserve it. We move these to
// InnerLatch, which will become the new exit block for the innermost
- // loop after interchanging. For PHIs only used in InnerExit, we can just
- // replace them with the incoming value.
- for (PHINode *P : LcssaInnerExit) {
- bool hasUsersOutside = false;
- for (auto UI = P->use_begin(), E = P->use_end(); UI != E;) {
- Use &U = *UI;
- ++UI;
- auto *Usr = cast<Instruction>(U.getUser());
- if (Usr->getParent() != InnerExit) {
- hasUsersOutside = true;
- continue;
- }
- U.set(P->getIncomingValueForBlock(InnerLatch));
- }
- if (hasUsersOutside)
- P->moveBefore(InnerLatch->getFirstNonPHI());
- else
- P->eraseFromParent();
- }
+ // loop after interchanging.
+ for (PHINode *P : LcssaInnerExit)
+ P->moveBefore(InnerLatch->getFirstNonPHI());
// If the inner loop latch contains LCSSA PHIs, those come from a child loop
// and we have to move them to the new inner latch.
for (PHINode *P : LcssaInnerLatch)
P->moveBefore(InnerExit->getFirstNonPHI());
+ // Deal with LCSSA PHI nodes in the loop nest exit block. For PHIs that have
+ // incoming values from the outer latch or header, we have to add a new PHI
+ // in the inner loop latch, which became the exit block of the outer loop,
+ // after interchanging.
+ if (OuterExit) {
+ for (PHINode &P : OuterExit->phis()) {
+ if (P.getNumIncomingValues() != 1)
+ continue;
+ // Skip Phis with incoming values not defined in the outer loop's header
+ // and latch. Also skip incoming phis defined in the latch. Those should
+ // already have been updated.
+ auto I = dyn_cast<Instruction>(P.getIncomingValue(0));
+ if (!I || ((I->getParent() != OuterLatch || isa<PHINode>(I)) &&
+ I->getParent() != OuterHeader))
+ continue;
+
+ PHINode *NewPhi = dyn_cast<PHINode>(P.clone());
+ NewPhi->setIncomingValue(0, P.getIncomingValue(0));
+ NewPhi->setIncomingBlock(0, OuterLatch);
+ NewPhi->insertBefore(InnerLatch->getFirstNonPHI());
+ P.setIncomingValue(0, NewPhi);
+ }
+ }
+
// Now adjust the incoming blocks for the LCSSA PHIs.
// For PHIs moved from Inner's exit block, we need to replace Inner's latch
// with the new latch.
- updateIncomingBlock(InnerLatch, InnerLatch, OuterLatch);
+ InnerLatch->replacePhiUsesWith(InnerLatch, OuterLatch);
}
bool LoopInterchangeTransform::adjustLoopBranches() {
@@ -1374,9 +1401,11 @@ bool LoopInterchangeTransform::adjustLoopBranches() {
// preheaders do not satisfy those conditions.
if (isa<PHINode>(OuterLoopPreHeader->begin()) ||
!OuterLoopPreHeader->getUniquePredecessor())
- OuterLoopPreHeader = InsertPreheaderForLoop(OuterLoop, DT, LI, true);
+ OuterLoopPreHeader =
+ InsertPreheaderForLoop(OuterLoop, DT, LI, nullptr, true);
if (InnerLoopPreHeader == OuterLoop->getHeader())
- InnerLoopPreHeader = InsertPreheaderForLoop(InnerLoop, DT, LI, true);
+ InnerLoopPreHeader =
+ InsertPreheaderForLoop(InnerLoop, DT, LI, nullptr, true);
// Adjust the loop preheader
BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
@@ -1422,8 +1451,8 @@ bool LoopInterchangeTransform::adjustLoopBranches() {
InnerLoopHeaderSuccessor, DTUpdates);
// Adjust reduction PHI's now that the incoming block has changed.
- updateIncomingBlock(InnerLoopHeaderSuccessor, InnerLoopHeader,
- OuterLoopHeader);
+ InnerLoopHeaderSuccessor->replacePhiUsesWith(InnerLoopHeader,
+ OuterLoopHeader);
updateSuccessor(InnerLoopHeaderBI, InnerLoopHeaderSuccessor,
OuterLoopPreHeader, DTUpdates);
@@ -1452,10 +1481,11 @@ bool LoopInterchangeTransform::adjustLoopBranches() {
restructureLoops(OuterLoop, InnerLoop, InnerLoopPreHeader,
OuterLoopPreHeader);
- moveLCSSAPhis(InnerLoopLatchSuccessor, InnerLoopLatch, OuterLoopLatch);
+ moveLCSSAPhis(InnerLoopLatchSuccessor, InnerLoopHeader, InnerLoopLatch,
+ OuterLoopHeader, OuterLoopLatch, InnerLoop->getExitBlock());
// For PHIs in the exit block of the outer loop, outer's latch has been
// replaced by Inners'.
- updateIncomingBlock(OuterLoopLatchSuccessor, OuterLoopLatch, InnerLoopLatch);
+ OuterLoopLatchSuccessor->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch);
// Now update the reduction PHIs in the inner and outer loop headers.
SmallVector<PHINode *, 4> InnerLoopPHIs, OuterLoopPHIs;
@@ -1482,10 +1512,10 @@ bool LoopInterchangeTransform::adjustLoopBranches() {
}
// Update the incoming blocks for moved PHI nodes.
- updateIncomingBlock(OuterLoopHeader, InnerLoopPreHeader, OuterLoopPreHeader);
- updateIncomingBlock(OuterLoopHeader, InnerLoopLatch, OuterLoopLatch);
- updateIncomingBlock(InnerLoopHeader, OuterLoopPreHeader, InnerLoopPreHeader);
- updateIncomingBlock(InnerLoopHeader, OuterLoopLatch, InnerLoopLatch);
+ OuterLoopHeader->replacePhiUsesWith(InnerLoopPreHeader, OuterLoopPreHeader);
+ OuterLoopHeader->replacePhiUsesWith(InnerLoopLatch, OuterLoopLatch);
+ InnerLoopHeader->replacePhiUsesWith(OuterLoopPreHeader, InnerLoopPreHeader);
+ InnerLoopHeader->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch);
return true;
}
diff --git a/lib/Transforms/Scalar/LoopLoadElimination.cpp b/lib/Transforms/Scalar/LoopLoadElimination.cpp
index 19bd9ebcc15b..2b3d5e0ce9b7 100644
--- a/lib/Transforms/Scalar/LoopLoadElimination.cpp
+++ b/lib/Transforms/Scalar/LoopLoadElimination.cpp
@@ -1,9 +1,8 @@
//===- LoopLoadElimination.cpp - Loop Load Elimination Pass ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -30,10 +29,14 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
@@ -54,6 +57,7 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/LoopVersioning.h"
+#include "llvm/Transforms/Utils/SizeOpts.h"
#include <algorithm>
#include <cassert>
#include <forward_list>
@@ -159,8 +163,9 @@ namespace {
class LoadEliminationForLoop {
public:
LoadEliminationForLoop(Loop *L, LoopInfo *LI, const LoopAccessInfo &LAI,
- DominatorTree *DT)
- : L(L), LI(LI), LAI(LAI), DT(DT), PSE(LAI.getPSE()) {}
+ DominatorTree *DT, BlockFrequencyInfo *BFI,
+ ProfileSummaryInfo* PSI)
+ : L(L), LI(LI), LAI(LAI), DT(DT), BFI(BFI), PSI(PSI), PSE(LAI.getPSE()) {}
/// Look through the loop-carried and loop-independent dependences in
/// this loop and find store->load dependences.
@@ -428,9 +433,9 @@ public:
auto *PH = L->getLoopPreheader();
Value *InitialPtr = SEE.expandCodeFor(PtrSCEV->getStart(), Ptr->getType(),
PH->getTerminator());
- Value *Initial =
- new LoadInst(InitialPtr, "load_initial", /* isVolatile */ false,
- Cand.Load->getAlignment(), PH->getTerminator());
+ Value *Initial = new LoadInst(
+ Cand.Load->getType(), InitialPtr, "load_initial",
+ /* isVolatile */ false, Cand.Load->getAlignment(), PH->getTerminator());
PHINode *PHI = PHINode::Create(Initial->getType(), 2, "store_forwarded",
&L->getHeader()->front());
@@ -529,7 +534,17 @@ public:
}
if (!Checks.empty() || !LAI.getPSE().getUnionPredicate().isAlwaysTrue()) {
- if (L->getHeader()->getParent()->optForSize()) {
+ if (LAI.hasConvergentOp()) {
+ LLVM_DEBUG(dbgs() << "Versioning is needed but not allowed with "
+ "convergent calls\n");
+ return false;
+ }
+
+ auto *HeaderBB = L->getHeader();
+ auto *F = HeaderBB->getParent();
+ bool OptForSize = F->hasOptSize() ||
+ llvm::shouldOptimizeForSize(HeaderBB, PSI, BFI);
+ if (OptForSize) {
LLVM_DEBUG(
dbgs() << "Versioning is needed but not allowed when optimizing "
"for size.\n");
@@ -572,6 +587,8 @@ private:
LoopInfo *LI;
const LoopAccessInfo &LAI;
DominatorTree *DT;
+ BlockFrequencyInfo *BFI;
+ ProfileSummaryInfo *PSI;
PredicatedScalarEvolution PSE;
};
@@ -579,6 +596,7 @@ private:
static bool
eliminateLoadsAcrossLoops(Function &F, LoopInfo &LI, DominatorTree &DT,
+ BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
function_ref<const LoopAccessInfo &(Loop &)> GetLAI) {
// Build up a worklist of inner-loops to transform to avoid iterator
// invalidation.
@@ -597,7 +615,7 @@ eliminateLoadsAcrossLoops(Function &F, LoopInfo &LI, DominatorTree &DT,
bool Changed = false;
for (Loop *L : Worklist) {
// The actual work is performed by LoadEliminationForLoop.
- LoadEliminationForLoop LEL(L, &LI, GetLAI(*L), &DT);
+ LoadEliminationForLoop LEL(L, &LI, GetLAI(*L), &DT, BFI, PSI);
Changed |= LEL.processLoop();
}
return Changed;
@@ -622,10 +640,14 @@ public:
auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
auto &LAA = getAnalysis<LoopAccessLegacyAnalysis>();
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+ auto *BFI = (PSI && PSI->hasProfileSummary()) ?
+ &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
+ nullptr;
// Process each loop nest in the function.
return eliminateLoadsAcrossLoops(
- F, LI, DT,
+ F, LI, DT, BFI, PSI,
[&LAA](Loop &L) -> const LoopAccessInfo & { return LAA.getInfo(&L); });
}
@@ -638,6 +660,8 @@ public:
AU.addRequired<DominatorTreeWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addPreserved<GlobalsAAWrapperPass>();
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
+ LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
}
};
@@ -653,6 +677,8 @@ INITIALIZE_PASS_DEPENDENCY(LoopAccessLegacyAnalysis)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LazyBlockFrequencyInfoPass)
INITIALIZE_PASS_END(LoopLoadElimination, LLE_OPTION, LLE_name, false, false)
FunctionPass *llvm::createLoopLoadEliminationPass() {
@@ -668,12 +694,18 @@ PreservedAnalyses LoopLoadEliminationPass::run(Function &F,
auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
auto &AA = AM.getResult<AAManager>(F);
auto &AC = AM.getResult<AssumptionAnalysis>(F);
+ auto &MAM = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F).getManager();
+ auto *PSI = MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
+ auto *BFI = (PSI && PSI->hasProfileSummary()) ?
+ &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;
+ MemorySSA *MSSA = EnableMSSALoopDependency
+ ? &AM.getResult<MemorySSAAnalysis>(F).getMSSA()
+ : nullptr;
auto &LAM = AM.getResult<LoopAnalysisManagerFunctionProxy>(F).getManager();
bool Changed = eliminateLoadsAcrossLoops(
- F, LI, DT, [&](Loop &L) -> const LoopAccessInfo & {
- LoopStandardAnalysisResults AR = {AA, AC, DT, LI,
- SE, TLI, TTI, nullptr};
+ F, LI, DT, BFI, PSI, [&](Loop &L) -> const LoopAccessInfo & {
+ LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, TLI, TTI, MSSA};
return LAM.getResult<LoopAccessAnalysis>(L, AR);
});
diff --git a/lib/Transforms/Scalar/LoopPassManager.cpp b/lib/Transforms/Scalar/LoopPassManager.cpp
index 774ad7b945a0..f3bfbd3564ab 100644
--- a/lib/Transforms/Scalar/LoopPassManager.cpp
+++ b/lib/Transforms/Scalar/LoopPassManager.cpp
@@ -1,9 +1,8 @@
//===- LoopPassManager.cpp - Loop pass management -------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Transforms/Scalar/LoopPredication.cpp b/lib/Transforms/Scalar/LoopPredication.cpp
index 5983c804c0c1..507a1e251ca6 100644
--- a/lib/Transforms/Scalar/LoopPredication.cpp
+++ b/lib/Transforms/Scalar/LoopPredication.cpp
@@ -1,9 +1,8 @@
//===-- LoopPredication.cpp - Guard based loop predication pass -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -179,6 +178,7 @@
#include "llvm/Transforms/Scalar/LoopPredication.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/GuardUtils.h"
#include "llvm/Analysis/LoopInfo.h"
@@ -194,6 +194,7 @@
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#define DEBUG_TYPE "loop-predication"
@@ -222,24 +223,31 @@ static cl::opt<float> LatchExitProbabilityScale(
cl::desc("scale factor for the latch probability. Value should be greater "
"than 1. Lower values are ignored"));
+static cl::opt<bool> PredicateWidenableBranchGuards(
+ "loop-predication-predicate-widenable-branches-to-deopt", cl::Hidden,
+ cl::desc("Whether or not we should predicate guards "
+ "expressed as widenable branches to deoptimize blocks"),
+ cl::init(true));
+
namespace {
-class LoopPredication {
- /// Represents an induction variable check:
- /// icmp Pred, <induction variable>, <loop invariant limit>
- struct LoopICmp {
- ICmpInst::Predicate Pred;
- const SCEVAddRecExpr *IV;
- const SCEV *Limit;
- LoopICmp(ICmpInst::Predicate Pred, const SCEVAddRecExpr *IV,
- const SCEV *Limit)
- : Pred(Pred), IV(IV), Limit(Limit) {}
- LoopICmp() {}
- void dump() {
- dbgs() << "LoopICmp Pred = " << Pred << ", IV = " << *IV
- << ", Limit = " << *Limit << "\n";
- }
- };
+/// Represents an induction variable check:
+/// icmp Pred, <induction variable>, <loop invariant limit>
+struct LoopICmp {
+ ICmpInst::Predicate Pred;
+ const SCEVAddRecExpr *IV;
+ const SCEV *Limit;
+ LoopICmp(ICmpInst::Predicate Pred, const SCEVAddRecExpr *IV,
+ const SCEV *Limit)
+ : Pred(Pred), IV(IV), Limit(Limit) {}
+ LoopICmp() {}
+ void dump() {
+ dbgs() << "LoopICmp Pred = " << Pred << ", IV = " << *IV
+ << ", Limit = " << *Limit << "\n";
+ }
+};
+class LoopPredication {
+ AliasAnalysis *AA;
ScalarEvolution *SE;
BranchProbabilityInfo *BPI;
@@ -249,58 +257,53 @@ class LoopPredication {
LoopICmp LatchCheck;
bool isSupportedStep(const SCEV* Step);
- Optional<LoopICmp> parseLoopICmp(ICmpInst *ICI) {
- return parseLoopICmp(ICI->getPredicate(), ICI->getOperand(0),
- ICI->getOperand(1));
- }
- Optional<LoopICmp> parseLoopICmp(ICmpInst::Predicate Pred, Value *LHS,
- Value *RHS);
-
+ Optional<LoopICmp> parseLoopICmp(ICmpInst *ICI);
Optional<LoopICmp> parseLoopLatchICmp();
- bool CanExpand(const SCEV* S);
- Value *expandCheck(SCEVExpander &Expander, IRBuilder<> &Builder,
- ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS,
- Instruction *InsertAt);
+ /// Return an insertion point suitable for inserting a safe to speculate
+ /// instruction whose only user will be 'User' which has operands 'Ops'. A
+ /// trivial result would be the at the User itself, but we try to return a
+ /// loop invariant location if possible.
+ Instruction *findInsertPt(Instruction *User, ArrayRef<Value*> Ops);
+ /// Same as above, *except* that this uses the SCEV definition of invariant
+ /// which is that an expression *can be made* invariant via SCEVExpander.
+ /// Thus, this version is only suitable for finding an insert point to be be
+ /// passed to SCEVExpander!
+ Instruction *findInsertPt(Instruction *User, ArrayRef<const SCEV*> Ops);
+
+ /// Return true if the value is known to produce a single fixed value across
+ /// all iterations on which it executes. Note that this does not imply
+ /// speculation safety. That must be established seperately.
+ bool isLoopInvariantValue(const SCEV* S);
+
+ Value *expandCheck(SCEVExpander &Expander, Instruction *Guard,
+ ICmpInst::Predicate Pred, const SCEV *LHS,
+ const SCEV *RHS);
Optional<Value *> widenICmpRangeCheck(ICmpInst *ICI, SCEVExpander &Expander,
- IRBuilder<> &Builder);
+ Instruction *Guard);
Optional<Value *> widenICmpRangeCheckIncrementingLoop(LoopICmp LatchCheck,
LoopICmp RangeCheck,
SCEVExpander &Expander,
- IRBuilder<> &Builder);
+ Instruction *Guard);
Optional<Value *> widenICmpRangeCheckDecrementingLoop(LoopICmp LatchCheck,
LoopICmp RangeCheck,
SCEVExpander &Expander,
- IRBuilder<> &Builder);
+ Instruction *Guard);
+ unsigned collectChecks(SmallVectorImpl<Value *> &Checks, Value *Condition,
+ SCEVExpander &Expander, Instruction *Guard);
bool widenGuardConditions(IntrinsicInst *II, SCEVExpander &Expander);
-
+ bool widenWidenableBranchGuardConditions(BranchInst *Guard, SCEVExpander &Expander);
// If the loop always exits through another block in the loop, we should not
// predicate based on the latch check. For example, the latch check can be a
// very coarse grained check and there can be more fine grained exit checks
// within the loop. We identify such unprofitable loops through BPI.
bool isLoopProfitableToPredicate();
- // When the IV type is wider than the range operand type, we can still do loop
- // predication, by generating SCEVs for the range and latch that are of the
- // same type. We achieve this by generating a SCEV truncate expression for the
- // latch IV. This is done iff truncation of the IV is a safe operation,
- // without loss of information.
- // Another way to achieve this is by generating a wider type SCEV for the
- // range check operand, however, this needs a more involved check that
- // operands do not overflow. This can lead to loss of information when the
- // range operand is of the form: add i32 %offset, %iv. We need to prove that
- // sext(x + y) is same as sext(x) + sext(y).
- // This function returns true if we can safely represent the IV type in
- // the RangeCheckType without loss of information.
- bool isSafeToTruncateWideIVType(Type *RangeCheckType);
- // Return the loopLatchCheck corresponding to the RangeCheckType if safe to do
- // so.
- Optional<LoopICmp> generateLoopLatchCheck(Type *RangeCheckType);
-
public:
- LoopPredication(ScalarEvolution *SE, BranchProbabilityInfo *BPI)
- : SE(SE), BPI(BPI){};
+ LoopPredication(AliasAnalysis *AA, ScalarEvolution *SE,
+ BranchProbabilityInfo *BPI)
+ : AA(AA), SE(SE), BPI(BPI){};
bool runOnLoop(Loop *L);
};
@@ -322,7 +325,8 @@ public:
auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
BranchProbabilityInfo &BPI =
getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
- LoopPredication LP(SE, &BPI);
+ auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+ LoopPredication LP(AA, SE, &BPI);
return LP.runOnLoop(L);
}
};
@@ -348,16 +352,19 @@ PreservedAnalyses LoopPredicationPass::run(Loop &L, LoopAnalysisManager &AM,
AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager();
Function *F = L.getHeader()->getParent();
auto *BPI = FAM.getCachedResult<BranchProbabilityAnalysis>(*F);
- LoopPredication LP(&AR.SE, BPI);
+ LoopPredication LP(&AR.AA, &AR.SE, BPI);
if (!LP.runOnLoop(&L))
return PreservedAnalyses::all();
return getLoopPassPreservedAnalyses();
}
-Optional<LoopPredication::LoopICmp>
-LoopPredication::parseLoopICmp(ICmpInst::Predicate Pred, Value *LHS,
- Value *RHS) {
+Optional<LoopICmp>
+LoopPredication::parseLoopICmp(ICmpInst *ICI) {
+ auto Pred = ICI->getPredicate();
+ auto *LHS = ICI->getOperand(0);
+ auto *RHS = ICI->getOperand(1);
+
const SCEV *LHSS = SE->getSCEV(LHS);
if (isa<SCEVCouldNotCompute>(LHSS))
return None;
@@ -380,42 +387,98 @@ LoopPredication::parseLoopICmp(ICmpInst::Predicate Pred, Value *LHS,
}
Value *LoopPredication::expandCheck(SCEVExpander &Expander,
- IRBuilder<> &Builder,
+ Instruction *Guard,
ICmpInst::Predicate Pred, const SCEV *LHS,
- const SCEV *RHS, Instruction *InsertAt) {
- // TODO: we can check isLoopEntryGuardedByCond before emitting the check
-
+ const SCEV *RHS) {
Type *Ty = LHS->getType();
assert(Ty == RHS->getType() && "expandCheck operands have different types?");
- if (SE->isLoopEntryGuardedByCond(L, Pred, LHS, RHS))
- return Builder.getTrue();
+ if (SE->isLoopInvariant(LHS, L) && SE->isLoopInvariant(RHS, L)) {
+ IRBuilder<> Builder(Guard);
+ if (SE->isLoopEntryGuardedByCond(L, Pred, LHS, RHS))
+ return Builder.getTrue();
+ if (SE->isLoopEntryGuardedByCond(L, ICmpInst::getInversePredicate(Pred),
+ LHS, RHS))
+ return Builder.getFalse();
+ }
- Value *LHSV = Expander.expandCodeFor(LHS, Ty, InsertAt);
- Value *RHSV = Expander.expandCodeFor(RHS, Ty, InsertAt);
+ Value *LHSV = Expander.expandCodeFor(LHS, Ty, findInsertPt(Guard, {LHS}));
+ Value *RHSV = Expander.expandCodeFor(RHS, Ty, findInsertPt(Guard, {RHS}));
+ IRBuilder<> Builder(findInsertPt(Guard, {LHSV, RHSV}));
return Builder.CreateICmp(Pred, LHSV, RHSV);
}
-Optional<LoopPredication::LoopICmp>
-LoopPredication::generateLoopLatchCheck(Type *RangeCheckType) {
+
+// Returns true if its safe to truncate the IV to RangeCheckType.
+// When the IV type is wider than the range operand type, we can still do loop
+// predication, by generating SCEVs for the range and latch that are of the
+// same type. We achieve this by generating a SCEV truncate expression for the
+// latch IV. This is done iff truncation of the IV is a safe operation,
+// without loss of information.
+// Another way to achieve this is by generating a wider type SCEV for the
+// range check operand, however, this needs a more involved check that
+// operands do not overflow. This can lead to loss of information when the
+// range operand is of the form: add i32 %offset, %iv. We need to prove that
+// sext(x + y) is same as sext(x) + sext(y).
+// This function returns true if we can safely represent the IV type in
+// the RangeCheckType without loss of information.
+static bool isSafeToTruncateWideIVType(const DataLayout &DL,
+ ScalarEvolution &SE,
+ const LoopICmp LatchCheck,
+ Type *RangeCheckType) {
+ if (!EnableIVTruncation)
+ return false;
+ assert(DL.getTypeSizeInBits(LatchCheck.IV->getType()) >
+ DL.getTypeSizeInBits(RangeCheckType) &&
+ "Expected latch check IV type to be larger than range check operand "
+ "type!");
+ // The start and end values of the IV should be known. This is to guarantee
+ // that truncating the wide type will not lose information.
+ auto *Limit = dyn_cast<SCEVConstant>(LatchCheck.Limit);
+ auto *Start = dyn_cast<SCEVConstant>(LatchCheck.IV->getStart());
+ if (!Limit || !Start)
+ return false;
+ // This check makes sure that the IV does not change sign during loop
+ // iterations. Consider latchType = i64, LatchStart = 5, Pred = ICMP_SGE,
+ // LatchEnd = 2, rangeCheckType = i32. If it's not a monotonic predicate, the
+ // IV wraps around, and the truncation of the IV would lose the range of
+ // iterations between 2^32 and 2^64.
+ bool Increasing;
+ if (!SE.isMonotonicPredicate(LatchCheck.IV, LatchCheck.Pred, Increasing))
+ return false;
+ // The active bits should be less than the bits in the RangeCheckType. This
+ // guarantees that truncating the latch check to RangeCheckType is a safe
+ // operation.
+ auto RangeCheckTypeBitSize = DL.getTypeSizeInBits(RangeCheckType);
+ return Start->getAPInt().getActiveBits() < RangeCheckTypeBitSize &&
+ Limit->getAPInt().getActiveBits() < RangeCheckTypeBitSize;
+}
+
+
+// Return an LoopICmp describing a latch check equivlent to LatchCheck but with
+// the requested type if safe to do so. May involve the use of a new IV.
+static Optional<LoopICmp> generateLoopLatchCheck(const DataLayout &DL,
+ ScalarEvolution &SE,
+ const LoopICmp LatchCheck,
+ Type *RangeCheckType) {
auto *LatchType = LatchCheck.IV->getType();
if (RangeCheckType == LatchType)
return LatchCheck;
// For now, bail out if latch type is narrower than range type.
- if (DL->getTypeSizeInBits(LatchType) < DL->getTypeSizeInBits(RangeCheckType))
+ if (DL.getTypeSizeInBits(LatchType) < DL.getTypeSizeInBits(RangeCheckType))
return None;
- if (!isSafeToTruncateWideIVType(RangeCheckType))
+ if (!isSafeToTruncateWideIVType(DL, SE, LatchCheck, RangeCheckType))
return None;
// We can now safely identify the truncated version of the IV and limit for
// RangeCheckType.
LoopICmp NewLatchCheck;
NewLatchCheck.Pred = LatchCheck.Pred;
NewLatchCheck.IV = dyn_cast<SCEVAddRecExpr>(
- SE->getTruncateExpr(LatchCheck.IV, RangeCheckType));
+ SE.getTruncateExpr(LatchCheck.IV, RangeCheckType));
if (!NewLatchCheck.IV)
return None;
- NewLatchCheck.Limit = SE->getTruncateExpr(LatchCheck.Limit, RangeCheckType);
+ NewLatchCheck.Limit = SE.getTruncateExpr(LatchCheck.Limit, RangeCheckType);
LLVM_DEBUG(dbgs() << "IV of type: " << *LatchType
<< "can be represented as range check type:"
<< *RangeCheckType << "\n");
@@ -428,13 +491,66 @@ bool LoopPredication::isSupportedStep(const SCEV* Step) {
return Step->isOne() || (Step->isAllOnesValue() && EnableCountDownLoop);
}
-bool LoopPredication::CanExpand(const SCEV* S) {
- return SE->isLoopInvariant(S, L) && isSafeToExpand(S, *SE);
+Instruction *LoopPredication::findInsertPt(Instruction *Use,
+ ArrayRef<Value*> Ops) {
+ for (Value *Op : Ops)
+ if (!L->isLoopInvariant(Op))
+ return Use;
+ return Preheader->getTerminator();
+}
+
+Instruction *LoopPredication::findInsertPt(Instruction *Use,
+ ArrayRef<const SCEV*> Ops) {
+ // Subtlety: SCEV considers things to be invariant if the value produced is
+ // the same across iterations. This is not the same as being able to
+ // evaluate outside the loop, which is what we actually need here.
+ for (const SCEV *Op : Ops)
+ if (!SE->isLoopInvariant(Op, L) ||
+ !isSafeToExpandAt(Op, Preheader->getTerminator(), *SE))
+ return Use;
+ return Preheader->getTerminator();
+}
+
+bool LoopPredication::isLoopInvariantValue(const SCEV* S) {
+ // Handling expressions which produce invariant results, but *haven't* yet
+ // been removed from the loop serves two important purposes.
+ // 1) Most importantly, it resolves a pass ordering cycle which would
+ // otherwise need us to iteration licm, loop-predication, and either
+ // loop-unswitch or loop-peeling to make progress on examples with lots of
+ // predicable range checks in a row. (Since, in the general case, we can't
+ // hoist the length checks until the dominating checks have been discharged
+ // as we can't prove doing so is safe.)
+ // 2) As a nice side effect, this exposes the value of peeling or unswitching
+ // much more obviously in the IR. Otherwise, the cost modeling for other
+ // transforms would end up needing to duplicate all of this logic to model a
+ // check which becomes predictable based on a modeled peel or unswitch.
+ //
+ // The cost of doing so in the worst case is an extra fill from the stack in
+ // the loop to materialize the loop invariant test value instead of checking
+ // against the original IV which is presumable in a register inside the loop.
+ // Such cases are presumably rare, and hint at missing oppurtunities for
+ // other passes.
+
+ if (SE->isLoopInvariant(S, L))
+ // Note: This the SCEV variant, so the original Value* may be within the
+ // loop even though SCEV has proven it is loop invariant.
+ return true;
+
+ // Handle a particular important case which SCEV doesn't yet know about which
+ // shows up in range checks on arrays with immutable lengths.
+ // TODO: This should be sunk inside SCEV.
+ if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S))
+ if (const auto *LI = dyn_cast<LoadInst>(U->getValue()))
+ if (LI->isUnordered() && L->hasLoopInvariantOperands(LI))
+ if (AA->pointsToConstantMemory(LI->getOperand(0)) ||
+ LI->getMetadata(LLVMContext::MD_invariant_load) != nullptr)
+ return true;
+ return false;
}
Optional<Value *> LoopPredication::widenICmpRangeCheckIncrementingLoop(
- LoopPredication::LoopICmp LatchCheck, LoopPredication::LoopICmp RangeCheck,
- SCEVExpander &Expander, IRBuilder<> &Builder) {
+ LoopICmp LatchCheck, LoopICmp RangeCheck,
+ SCEVExpander &Expander, Instruction *Guard) {
auto *Ty = RangeCheck.IV->getType();
// Generate the widened condition for the forward loop:
// guardStart u< guardLimit &&
@@ -446,40 +562,61 @@ Optional<Value *> LoopPredication::widenICmpRangeCheckIncrementingLoop(
const SCEV *GuardLimit = RangeCheck.Limit;
const SCEV *LatchStart = LatchCheck.IV->getStart();
const SCEV *LatchLimit = LatchCheck.Limit;
+ // Subtlety: We need all the values to be *invariant* across all iterations,
+ // but we only need to check expansion safety for those which *aren't*
+ // already guaranteed to dominate the guard.
+ if (!isLoopInvariantValue(GuardStart) ||
+ !isLoopInvariantValue(GuardLimit) ||
+ !isLoopInvariantValue(LatchStart) ||
+ !isLoopInvariantValue(LatchLimit)) {
+ LLVM_DEBUG(dbgs() << "Can't expand limit check!\n");
+ return None;
+ }
+ if (!isSafeToExpandAt(LatchStart, Guard, *SE) ||
+ !isSafeToExpandAt(LatchLimit, Guard, *SE)) {
+ LLVM_DEBUG(dbgs() << "Can't expand limit check!\n");
+ return None;
+ }
// guardLimit - guardStart + latchStart - 1
const SCEV *RHS =
SE->getAddExpr(SE->getMinusSCEV(GuardLimit, GuardStart),
SE->getMinusSCEV(LatchStart, SE->getOne(Ty)));
- if (!CanExpand(GuardStart) || !CanExpand(GuardLimit) ||
- !CanExpand(LatchLimit) || !CanExpand(RHS)) {
- LLVM_DEBUG(dbgs() << "Can't expand limit check!\n");
- return None;
- }
auto LimitCheckPred =
ICmpInst::getFlippedStrictnessPredicate(LatchCheck.Pred);
LLVM_DEBUG(dbgs() << "LHS: " << *LatchLimit << "\n");
LLVM_DEBUG(dbgs() << "RHS: " << *RHS << "\n");
LLVM_DEBUG(dbgs() << "Pred: " << LimitCheckPred << "\n");
-
- Instruction *InsertAt = Preheader->getTerminator();
+
auto *LimitCheck =
- expandCheck(Expander, Builder, LimitCheckPred, LatchLimit, RHS, InsertAt);
- auto *FirstIterationCheck = expandCheck(Expander, Builder, RangeCheck.Pred,
- GuardStart, GuardLimit, InsertAt);
+ expandCheck(Expander, Guard, LimitCheckPred, LatchLimit, RHS);
+ auto *FirstIterationCheck = expandCheck(Expander, Guard, RangeCheck.Pred,
+ GuardStart, GuardLimit);
+ IRBuilder<> Builder(findInsertPt(Guard, {FirstIterationCheck, LimitCheck}));
return Builder.CreateAnd(FirstIterationCheck, LimitCheck);
}
Optional<Value *> LoopPredication::widenICmpRangeCheckDecrementingLoop(
- LoopPredication::LoopICmp LatchCheck, LoopPredication::LoopICmp RangeCheck,
- SCEVExpander &Expander, IRBuilder<> &Builder) {
+ LoopICmp LatchCheck, LoopICmp RangeCheck,
+ SCEVExpander &Expander, Instruction *Guard) {
auto *Ty = RangeCheck.IV->getType();
const SCEV *GuardStart = RangeCheck.IV->getStart();
const SCEV *GuardLimit = RangeCheck.Limit;
+ const SCEV *LatchStart = LatchCheck.IV->getStart();
const SCEV *LatchLimit = LatchCheck.Limit;
- if (!CanExpand(GuardStart) || !CanExpand(GuardLimit) ||
- !CanExpand(LatchLimit)) {
+ // Subtlety: We need all the values to be *invariant* across all iterations,
+ // but we only need to check expansion safety for those which *aren't*
+ // already guaranteed to dominate the guard.
+ if (!isLoopInvariantValue(GuardStart) ||
+ !isLoopInvariantValue(GuardLimit) ||
+ !isLoopInvariantValue(LatchStart) ||
+ !isLoopInvariantValue(LatchLimit)) {
+ LLVM_DEBUG(dbgs() << "Can't expand limit check!\n");
+ return None;
+ }
+ if (!isSafeToExpandAt(LatchStart, Guard, *SE) ||
+ !isSafeToExpandAt(LatchLimit, Guard, *SE)) {
LLVM_DEBUG(dbgs() << "Can't expand limit check!\n");
return None;
}
@@ -497,22 +634,35 @@ Optional<Value *> LoopPredication::widenICmpRangeCheckDecrementingLoop(
// guardStart u< guardLimit &&
// latchLimit <pred> 1.
// See the header comment for reasoning of the checks.
- Instruction *InsertAt = Preheader->getTerminator();
auto LimitCheckPred =
ICmpInst::getFlippedStrictnessPredicate(LatchCheck.Pred);
- auto *FirstIterationCheck = expandCheck(Expander, Builder, ICmpInst::ICMP_ULT,
- GuardStart, GuardLimit, InsertAt);
- auto *LimitCheck = expandCheck(Expander, Builder, LimitCheckPred, LatchLimit,
- SE->getOne(Ty), InsertAt);
+ auto *FirstIterationCheck = expandCheck(Expander, Guard,
+ ICmpInst::ICMP_ULT,
+ GuardStart, GuardLimit);
+ auto *LimitCheck = expandCheck(Expander, Guard, LimitCheckPred, LatchLimit,
+ SE->getOne(Ty));
+ IRBuilder<> Builder(findInsertPt(Guard, {FirstIterationCheck, LimitCheck}));
return Builder.CreateAnd(FirstIterationCheck, LimitCheck);
}
+static void normalizePredicate(ScalarEvolution *SE, Loop *L,
+ LoopICmp& RC) {
+ // LFTR canonicalizes checks to the ICMP_NE/EQ form; normalize back to the
+ // ULT/UGE form for ease of handling by our caller.
+ if (ICmpInst::isEquality(RC.Pred) &&
+ RC.IV->getStepRecurrence(*SE)->isOne() &&
+ SE->isKnownPredicate(ICmpInst::ICMP_ULE, RC.IV->getStart(), RC.Limit))
+ RC.Pred = RC.Pred == ICmpInst::ICMP_NE ?
+ ICmpInst::ICMP_ULT : ICmpInst::ICMP_UGE;
+}
+
+
/// If ICI can be widened to a loop invariant condition emits the loop
/// invariant condition in the loop preheader and return it, otherwise
/// returns None.
Optional<Value *> LoopPredication::widenICmpRangeCheck(ICmpInst *ICI,
SCEVExpander &Expander,
- IRBuilder<> &Builder) {
+ Instruction *Guard) {
LLVM_DEBUG(dbgs() << "Analyzing ICmpInst condition:\n");
LLVM_DEBUG(ICI->dump());
@@ -545,7 +695,7 @@ Optional<Value *> LoopPredication::widenICmpRangeCheck(ICmpInst *ICI,
return None;
}
auto *Ty = RangeCheckIV->getType();
- auto CurrLatchCheckOpt = generateLoopLatchCheck(Ty);
+ auto CurrLatchCheckOpt = generateLoopLatchCheck(*DL, *SE, LatchCheck, Ty);
if (!CurrLatchCheckOpt) {
LLVM_DEBUG(dbgs() << "Failed to generate a loop latch check "
"corresponding to range type: "
@@ -566,34 +716,27 @@ Optional<Value *> LoopPredication::widenICmpRangeCheck(ICmpInst *ICI,
if (Step->isOne())
return widenICmpRangeCheckIncrementingLoop(CurrLatchCheck, *RangeCheck,
- Expander, Builder);
+ Expander, Guard);
else {
assert(Step->isAllOnesValue() && "Step should be -1!");
return widenICmpRangeCheckDecrementingLoop(CurrLatchCheck, *RangeCheck,
- Expander, Builder);
+ Expander, Guard);
}
}
-bool LoopPredication::widenGuardConditions(IntrinsicInst *Guard,
- SCEVExpander &Expander) {
- LLVM_DEBUG(dbgs() << "Processing guard:\n");
- LLVM_DEBUG(Guard->dump());
-
- TotalConsidered++;
-
- IRBuilder<> Builder(cast<Instruction>(Preheader->getTerminator()));
-
+unsigned LoopPredication::collectChecks(SmallVectorImpl<Value *> &Checks,
+ Value *Condition,
+ SCEVExpander &Expander,
+ Instruction *Guard) {
+ unsigned NumWidened = 0;
// The guard condition is expected to be in form of:
// cond1 && cond2 && cond3 ...
// Iterate over subconditions looking for icmp conditions which can be
// widened across loop iterations. Widening these conditions remember the
// resulting list of subconditions in Checks vector.
- SmallVector<Value *, 4> Worklist(1, Guard->getOperand(0));
+ SmallVector<Value *, 4> Worklist(1, Condition);
SmallPtrSet<Value *, 4> Visited;
-
- SmallVector<Value *, 4> Checks;
-
- unsigned NumWidened = 0;
+ Value *WideableCond = nullptr;
do {
Value *Condition = Worklist.pop_back_val();
if (!Visited.insert(Condition).second)
@@ -607,8 +750,16 @@ bool LoopPredication::widenGuardConditions(IntrinsicInst *Guard,
continue;
}
+ if (match(Condition,
+ m_Intrinsic<Intrinsic::experimental_widenable_condition>())) {
+ // Pick any, we don't care which
+ WideableCond = Condition;
+ continue;
+ }
+
if (ICmpInst *ICI = dyn_cast<ICmpInst>(Condition)) {
- if (auto NewRangeCheck = widenICmpRangeCheck(ICI, Expander, Builder)) {
+ if (auto NewRangeCheck = widenICmpRangeCheck(ICI, Expander,
+ Guard)) {
Checks.push_back(NewRangeCheck.getValue());
NumWidened++;
continue;
@@ -617,28 +768,70 @@ bool LoopPredication::widenGuardConditions(IntrinsicInst *Guard,
// Save the condition as is if we can't widen it
Checks.push_back(Condition);
- } while (Worklist.size() != 0);
+ } while (!Worklist.empty());
+ // At the moment, our matching logic for wideable conditions implicitly
+ // assumes we preserve the form: (br (and Cond, WC())). FIXME
+ // Note that if there were multiple calls to wideable condition in the
+ // traversal, we only need to keep one, and which one is arbitrary.
+ if (WideableCond)
+ Checks.push_back(WideableCond);
+ return NumWidened;
+}
+
+bool LoopPredication::widenGuardConditions(IntrinsicInst *Guard,
+ SCEVExpander &Expander) {
+ LLVM_DEBUG(dbgs() << "Processing guard:\n");
+ LLVM_DEBUG(Guard->dump());
+ TotalConsidered++;
+ SmallVector<Value *, 4> Checks;
+ unsigned NumWidened = collectChecks(Checks, Guard->getOperand(0), Expander,
+ Guard);
+ if (NumWidened == 0)
+ return false;
+
+ TotalWidened += NumWidened;
+
+ // Emit the new guard condition
+ IRBuilder<> Builder(findInsertPt(Guard, Checks));
+ Value *AllChecks = Builder.CreateAnd(Checks);
+ auto *OldCond = Guard->getOperand(0);
+ Guard->setOperand(0, AllChecks);
+ RecursivelyDeleteTriviallyDeadInstructions(OldCond);
+
+ LLVM_DEBUG(dbgs() << "Widened checks = " << NumWidened << "\n");
+ return true;
+}
+
+bool LoopPredication::widenWidenableBranchGuardConditions(
+ BranchInst *BI, SCEVExpander &Expander) {
+ assert(isGuardAsWidenableBranch(BI) && "Must be!");
+ LLVM_DEBUG(dbgs() << "Processing guard:\n");
+ LLVM_DEBUG(BI->dump());
+
+ TotalConsidered++;
+ SmallVector<Value *, 4> Checks;
+ unsigned NumWidened = collectChecks(Checks, BI->getCondition(),
+ Expander, BI);
if (NumWidened == 0)
return false;
TotalWidened += NumWidened;
// Emit the new guard condition
- Builder.SetInsertPoint(Guard);
- Value *LastCheck = nullptr;
- for (auto *Check : Checks)
- if (!LastCheck)
- LastCheck = Check;
- else
- LastCheck = Builder.CreateAnd(LastCheck, Check);
- Guard->setOperand(0, LastCheck);
+ IRBuilder<> Builder(findInsertPt(BI, Checks));
+ Value *AllChecks = Builder.CreateAnd(Checks);
+ auto *OldCond = BI->getCondition();
+ BI->setCondition(AllChecks);
+ assert(isGuardAsWidenableBranch(BI) &&
+ "Stopped being a guard after transform?");
+ RecursivelyDeleteTriviallyDeadInstructions(OldCond);
LLVM_DEBUG(dbgs() << "Widened checks = " << NumWidened << "\n");
return true;
}
-Optional<LoopPredication::LoopICmp> LoopPredication::parseLoopLatchICmp() {
+Optional<LoopICmp> LoopPredication::parseLoopLatchICmp() {
using namespace PatternMatch;
BasicBlock *LoopLatch = L->getLoopLatch();
@@ -647,27 +840,30 @@ Optional<LoopPredication::LoopICmp> LoopPredication::parseLoopLatchICmp() {
return None;
}
- ICmpInst::Predicate Pred;
- Value *LHS, *RHS;
- BasicBlock *TrueDest, *FalseDest;
-
- if (!match(LoopLatch->getTerminator(),
- m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)), TrueDest,
- FalseDest))) {
+ auto *BI = dyn_cast<BranchInst>(LoopLatch->getTerminator());
+ if (!BI || !BI->isConditional()) {
LLVM_DEBUG(dbgs() << "Failed to match the latch terminator!\n");
return None;
}
- assert((TrueDest == L->getHeader() || FalseDest == L->getHeader()) &&
- "One of the latch's destinations must be the header");
- if (TrueDest != L->getHeader())
- Pred = ICmpInst::getInversePredicate(Pred);
-
- auto Result = parseLoopICmp(Pred, LHS, RHS);
+ BasicBlock *TrueDest = BI->getSuccessor(0);
+ assert(
+ (TrueDest == L->getHeader() || BI->getSuccessor(1) == L->getHeader()) &&
+ "One of the latch's destinations must be the header");
+
+ auto *ICI = dyn_cast<ICmpInst>(BI->getCondition());
+ if (!ICI) {
+ LLVM_DEBUG(dbgs() << "Failed to match the latch condition!\n");
+ return None;
+ }
+ auto Result = parseLoopICmp(ICI);
if (!Result) {
LLVM_DEBUG(dbgs() << "Failed to parse the loop latch condition!\n");
return None;
}
+ if (TrueDest != L->getHeader())
+ Result->Pred = ICmpInst::getInversePredicate(Result->Pred);
+
// Check affine first, so if it's not we don't try to compute the step
// recurrence.
if (!Result->IV->isAffine()) {
@@ -692,49 +888,22 @@ Optional<LoopPredication::LoopICmp> LoopPredication::parseLoopLatchICmp() {
}
};
+ normalizePredicate(SE, L, *Result);
if (IsUnsupportedPredicate(Step, Result->Pred)) {
LLVM_DEBUG(dbgs() << "Unsupported loop latch predicate(" << Result->Pred
<< ")!\n");
return None;
}
+
return Result;
}
-// Returns true if its safe to truncate the IV to RangeCheckType.
-bool LoopPredication::isSafeToTruncateWideIVType(Type *RangeCheckType) {
- if (!EnableIVTruncation)
- return false;
- assert(DL->getTypeSizeInBits(LatchCheck.IV->getType()) >
- DL->getTypeSizeInBits(RangeCheckType) &&
- "Expected latch check IV type to be larger than range check operand "
- "type!");
- // The start and end values of the IV should be known. This is to guarantee
- // that truncating the wide type will not lose information.
- auto *Limit = dyn_cast<SCEVConstant>(LatchCheck.Limit);
- auto *Start = dyn_cast<SCEVConstant>(LatchCheck.IV->getStart());
- if (!Limit || !Start)
- return false;
- // This check makes sure that the IV does not change sign during loop
- // iterations. Consider latchType = i64, LatchStart = 5, Pred = ICMP_SGE,
- // LatchEnd = 2, rangeCheckType = i32. If it's not a monotonic predicate, the
- // IV wraps around, and the truncation of the IV would lose the range of
- // iterations between 2^32 and 2^64.
- bool Increasing;
- if (!SE->isMonotonicPredicate(LatchCheck.IV, LatchCheck.Pred, Increasing))
- return false;
- // The active bits should be less than the bits in the RangeCheckType. This
- // guarantees that truncating the latch check to RangeCheckType is a safe
- // operation.
- auto RangeCheckTypeBitSize = DL->getTypeSizeInBits(RangeCheckType);
- return Start->getAPInt().getActiveBits() < RangeCheckTypeBitSize &&
- Limit->getAPInt().getActiveBits() < RangeCheckTypeBitSize;
-}
bool LoopPredication::isLoopProfitableToPredicate() {
if (SkipProfitabilityChecks || !BPI)
return true;
- SmallVector<std::pair<const BasicBlock *, const BasicBlock *>, 8> ExitEdges;
+ SmallVector<std::pair<BasicBlock *, BasicBlock *>, 8> ExitEdges;
L->getExitEdges(ExitEdges);
// If there is only one exiting edge in the loop, it is always profitable to
// predicate the loop.
@@ -795,7 +964,12 @@ bool LoopPredication::runOnLoop(Loop *Loop) {
// There is nothing to do if the module doesn't use guards
auto *GuardDecl =
M->getFunction(Intrinsic::getName(Intrinsic::experimental_guard));
- if (!GuardDecl || GuardDecl->use_empty())
+ bool HasIntrinsicGuards = GuardDecl && !GuardDecl->use_empty();
+ auto *WCDecl = M->getFunction(
+ Intrinsic::getName(Intrinsic::experimental_widenable_condition));
+ bool HasWidenableConditions =
+ PredicateWidenableBranchGuards && WCDecl && !WCDecl->use_empty();
+ if (!HasIntrinsicGuards && !HasWidenableConditions)
return false;
DL = &M->getDataLayout();
@@ -819,12 +993,18 @@ bool LoopPredication::runOnLoop(Loop *Loop) {
// Collect all the guards into a vector and process later, so as not
// to invalidate the instruction iterator.
SmallVector<IntrinsicInst *, 4> Guards;
- for (const auto BB : L->blocks())
+ SmallVector<BranchInst *, 4> GuardsAsWidenableBranches;
+ for (const auto BB : L->blocks()) {
for (auto &I : *BB)
if (isGuard(&I))
Guards.push_back(cast<IntrinsicInst>(&I));
+ if (PredicateWidenableBranchGuards &&
+ isGuardAsWidenableBranch(BB->getTerminator()))
+ GuardsAsWidenableBranches.push_back(
+ cast<BranchInst>(BB->getTerminator()));
+ }
- if (Guards.empty())
+ if (Guards.empty() && GuardsAsWidenableBranches.empty())
return false;
SCEVExpander Expander(*SE, *DL, "loop-predication");
@@ -832,6 +1012,8 @@ bool LoopPredication::runOnLoop(Loop *Loop) {
bool Changed = false;
for (auto *Guard : Guards)
Changed |= widenGuardConditions(Guard, Expander);
+ for (auto *Guard : GuardsAsWidenableBranches)
+ Changed |= widenWidenableBranchGuardConditions(Guard, Expander);
return Changed;
}
diff --git a/lib/Transforms/Scalar/LoopRerollPass.cpp b/lib/Transforms/Scalar/LoopRerollPass.cpp
index 9a99e5925572..166b57f20b43 100644
--- a/lib/Transforms/Scalar/LoopRerollPass.cpp
+++ b/lib/Transforms/Scalar/LoopRerollPass.cpp
@@ -1,9 +1,8 @@
//===- LoopReroll.cpp - Loop rerolling pass -------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -892,12 +891,22 @@ bool LoopReroll::DAGRootTracker::validateRootSet(DAGRootSet &DRS) {
const auto *ADR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(DRS.BaseInst));
if (!ADR)
return false;
+
+ // Check that the first root is evenly spaced.
unsigned N = DRS.Roots.size() + 1;
const SCEV *StepSCEV = SE->getMinusSCEV(SE->getSCEV(DRS.Roots[0]), ADR);
const SCEV *ScaleSCEV = SE->getConstant(StepSCEV->getType(), N);
if (ADR->getStepRecurrence(*SE) != SE->getMulExpr(StepSCEV, ScaleSCEV))
return false;
+ // Check that the remainling roots are evenly spaced.
+ for (unsigned i = 1; i < N - 1; ++i) {
+ const SCEV *NewStepSCEV = SE->getMinusSCEV(SE->getSCEV(DRS.Roots[i]),
+ SE->getSCEV(DRS.Roots[i-1]));
+ if (NewStepSCEV != StepSCEV)
+ return false;
+ }
+
return true;
}
diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp
index fd22128f7fe6..e009947690af 100644
--- a/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/lib/Transforms/Scalar/LoopRotation.cpp
@@ -1,9 +1,8 @@
//===- LoopRotation.cpp - Loop Rotation Pass ------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -55,7 +54,10 @@ PreservedAnalyses LoopRotatePass::run(Loop &L, LoopAnalysisManager &AM,
if (AR.MSSA && VerifyMemorySSA)
AR.MSSA->verifyMemorySSA();
- return getLoopPassPreservedAnalyses();
+ auto PA = getLoopPassPreservedAnalyses();
+ if (EnableMSSALoopDependency)
+ PA.preserve<MemorySSAAnalysis>();
+ return PA;
}
namespace {
diff --git a/lib/Transforms/Scalar/LoopSimplifyCFG.cpp b/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
index 2e5927f9a068..046f4c8af492 100644
--- a/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
+++ b/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
@@ -1,9 +1,8 @@
//===--------- LoopSimplifyCFG.cpp - Loop CFG Simplification Pass ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -21,6 +20,7 @@
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/DependenceAnalysis.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
@@ -29,7 +29,6 @@
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/IR/DomTreeUpdater.h"
#include "llvm/IR/Dominators.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/LoopPassManager.h"
@@ -42,7 +41,7 @@ using namespace llvm;
#define DEBUG_TYPE "loop-simplifycfg"
static cl::opt<bool> EnableTermFolding("enable-loop-simplifycfg-term-folding",
- cl::init(false));
+ cl::init(true));
STATISTIC(NumTerminatorsFolded,
"Number of terminators folded to unconditional branches");
@@ -80,6 +79,36 @@ static BasicBlock *getOnlyLiveSuccessor(BasicBlock *BB) {
return nullptr;
}
+/// Removes \p BB from all loops from [FirstLoop, LastLoop) in parent chain.
+static void removeBlockFromLoops(BasicBlock *BB, Loop *FirstLoop,
+ Loop *LastLoop = nullptr) {
+ assert((!LastLoop || LastLoop->contains(FirstLoop->getHeader())) &&
+ "First loop is supposed to be inside of last loop!");
+ assert(FirstLoop->contains(BB) && "Must be a loop block!");
+ for (Loop *Current = FirstLoop; Current != LastLoop;
+ Current = Current->getParentLoop())
+ Current->removeBlockFromLoop(BB);
+}
+
+/// Find innermost loop that contains at least one block from \p BBs and
+/// contains the header of loop \p L.
+static Loop *getInnermostLoopFor(SmallPtrSetImpl<BasicBlock *> &BBs,
+ Loop &L, LoopInfo &LI) {
+ Loop *Innermost = nullptr;
+ for (BasicBlock *BB : BBs) {
+ Loop *BBL = LI.getLoopFor(BB);
+ while (BBL && !BBL->contains(L.getHeader()))
+ BBL = BBL->getParentLoop();
+ if (BBL == &L)
+ BBL = BBL->getParentLoop();
+ if (!BBL)
+ continue;
+ if (!Innermost || BBL->getLoopDepth() > Innermost->getLoopDepth())
+ Innermost = BBL;
+ }
+ return Innermost;
+}
+
namespace {
/// Helper class that can turn branches and switches with constant conditions
/// into unconditional branches.
@@ -90,6 +119,9 @@ private:
DominatorTree &DT;
ScalarEvolution &SE;
MemorySSAUpdater *MSSAU;
+ LoopBlocksDFS DFS;
+ DomTreeUpdater DTU;
+ SmallVector<DominatorTree::UpdateType, 16> DTUpdates;
// Whether or not the current loop has irreducible CFG.
bool HasIrreducibleCFG = false;
@@ -175,7 +207,6 @@ private:
/// Fill all information about status of blocks and exits of the current loop
/// if constant folding of all branches will be done.
void analyze() {
- LoopBlocksDFS DFS(&L);
DFS.perform(&LI);
assert(DFS.isComplete() && "DFS is expected to be finished");
@@ -208,12 +239,13 @@ private:
// folding. Only handle blocks from current loop: branches in child loops
// are skipped because if they can be folded, they should be folded during
// the processing of child loops.
- if (TheOnlySucc && LI.getLoopFor(BB) == &L)
+ bool TakeFoldCandidate = TheOnlySucc && LI.getLoopFor(BB) == &L;
+ if (TakeFoldCandidate)
FoldCandidates.push_back(BB);
// Handle successors.
for (BasicBlock *Succ : successors(BB))
- if (!TheOnlySucc || TheOnlySucc == Succ) {
+ if (!TakeFoldCandidate || TheOnlySucc == Succ) {
if (L.contains(Succ))
LiveLoopBlocks.insert(Succ);
else
@@ -229,8 +261,10 @@ private:
// Now, all exit blocks that are not marked as live are dead.
SmallVector<BasicBlock *, 8> ExitBlocks;
L.getExitBlocks(ExitBlocks);
+ SmallPtrSet<BasicBlock *, 8> UniqueDeadExits;
for (auto *ExitBlock : ExitBlocks)
- if (!LiveExitBlocks.count(ExitBlock))
+ if (!LiveExitBlocks.count(ExitBlock) &&
+ UniqueDeadExits.insert(ExitBlock).second)
DeadExitBlocks.push_back(ExitBlock);
// Whether or not the edge From->To will still be present in graph after the
@@ -239,7 +273,7 @@ private:
if (!LiveLoopBlocks.count(From))
return false;
BasicBlock *TheOnlySucc = getOnlyLiveSuccessor(From);
- return !TheOnlySucc || TheOnlySucc == To;
+ return !TheOnlySucc || TheOnlySucc == To || LI.getLoopFor(From) != &L;
};
// The loop will not be destroyed if its latch is live.
@@ -317,14 +351,10 @@ private:
// Construct split preheader and the dummy switch to thread edges from it to
// dead exits.
- DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
BasicBlock *Preheader = L.getLoopPreheader();
- BasicBlock *NewPreheader = Preheader->splitBasicBlock(
- Preheader->getTerminator(),
- Twine(Preheader->getName()).concat("-split"));
- DTU.deleteEdge(Preheader, L.getHeader());
- DTU.insertEdge(NewPreheader, L.getHeader());
- DTU.insertEdge(Preheader, NewPreheader);
+ BasicBlock *NewPreheader = llvm::SplitBlock(
+ Preheader, Preheader->getTerminator(), &DT, &LI, MSSAU);
+
IRBuilder<> Builder(Preheader->getTerminator());
SwitchInst *DummySwitch =
Builder.CreateSwitch(Builder.getInt32(0), NewPreheader);
@@ -343,75 +373,106 @@ private:
}
assert(DummyIdx != 0 && "Too many dead exits!");
DummySwitch->addCase(Builder.getInt32(DummyIdx++), BB);
- DTU.insertEdge(Preheader, BB);
+ DTUpdates.push_back({DominatorTree::Insert, Preheader, BB});
++NumLoopExitsDeleted;
}
assert(L.getLoopPreheader() == NewPreheader && "Malformed CFG?");
if (Loop *OuterLoop = LI.getLoopFor(Preheader)) {
- OuterLoop->addBasicBlockToLoop(NewPreheader, LI);
-
// When we break dead edges, the outer loop may become unreachable from
// the current loop. We need to fix loop info accordingly. For this, we
// find the most nested loop that still contains L and remove L from all
// loops that are inside of it.
- Loop *StillReachable = nullptr;
- for (BasicBlock *BB : LiveExitBlocks) {
- Loop *BBL = LI.getLoopFor(BB);
- if (BBL && BBL->contains(L.getHeader()))
- if (!StillReachable ||
- BBL->getLoopDepth() > StillReachable->getLoopDepth())
- StillReachable = BBL;
- }
+ Loop *StillReachable = getInnermostLoopFor(LiveExitBlocks, L, LI);
// Okay, our loop is no longer in the outer loop (and maybe not in some of
// its parents as well). Make the fixup.
if (StillReachable != OuterLoop) {
LI.changeLoopFor(NewPreheader, StillReachable);
- for (Loop *NotContaining = OuterLoop; NotContaining != StillReachable;
- NotContaining = NotContaining->getParentLoop()) {
- NotContaining->removeBlockFromLoop(NewPreheader);
- for (auto *BB : L.blocks())
- NotContaining->removeBlockFromLoop(BB);
- }
+ removeBlockFromLoops(NewPreheader, OuterLoop, StillReachable);
+ for (auto *BB : L.blocks())
+ removeBlockFromLoops(BB, OuterLoop, StillReachable);
OuterLoop->removeChildLoop(&L);
if (StillReachable)
StillReachable->addChildLoop(&L);
else
LI.addTopLevelLoop(&L);
+
+ // Some values from loops in [OuterLoop, StillReachable) could be used
+ // in the current loop. Now it is not their child anymore, so such uses
+ // require LCSSA Phis.
+ Loop *FixLCSSALoop = OuterLoop;
+ while (FixLCSSALoop->getParentLoop() != StillReachable)
+ FixLCSSALoop = FixLCSSALoop->getParentLoop();
+ assert(FixLCSSALoop && "Should be a loop!");
+ // We need all DT updates to be done before forming LCSSA.
+ DTU.applyUpdates(DTUpdates);
+ if (MSSAU)
+ MSSAU->applyUpdates(DTUpdates, DT);
+ DTUpdates.clear();
+ formLCSSARecursively(*FixLCSSALoop, DT, &LI, &SE);
}
}
+
+ if (MSSAU) {
+ // Clear all updates now. Facilitates deletes that follow.
+ DTU.applyUpdates(DTUpdates);
+ MSSAU->applyUpdates(DTUpdates, DT);
+ DTUpdates.clear();
+ if (VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
+ }
}
/// Delete loop blocks that have become unreachable after folding. Make all
/// relevant updates to DT and LI.
void deleteDeadLoopBlocks() {
- DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
if (MSSAU) {
- SmallPtrSet<BasicBlock *, 8> DeadLoopBlocksSet(DeadLoopBlocks.begin(),
- DeadLoopBlocks.end());
+ SmallSetVector<BasicBlock *, 8> DeadLoopBlocksSet(DeadLoopBlocks.begin(),
+ DeadLoopBlocks.end());
MSSAU->removeBlocks(DeadLoopBlocksSet);
}
+
+ // The function LI.erase has some invariants that need to be preserved when
+ // it tries to remove a loop which is not the top-level loop. In particular,
+ // it requires loop's preheader to be strictly in loop's parent. We cannot
+ // just remove blocks one by one, because after removal of preheader we may
+ // break this invariant for the dead loop. So we detatch and erase all dead
+ // loops beforehand.
+ for (auto *BB : DeadLoopBlocks)
+ if (LI.isLoopHeader(BB)) {
+ assert(LI.getLoopFor(BB) != &L && "Attempt to remove current loop!");
+ Loop *DL = LI.getLoopFor(BB);
+ if (DL->getParentLoop()) {
+ for (auto *PL = DL->getParentLoop(); PL; PL = PL->getParentLoop())
+ for (auto *BB : DL->getBlocks())
+ PL->removeBlockFromLoop(BB);
+ DL->getParentLoop()->removeChildLoop(DL);
+ LI.addTopLevelLoop(DL);
+ }
+ LI.erase(DL);
+ }
+
for (auto *BB : DeadLoopBlocks) {
assert(BB != L.getHeader() &&
"Header of the current loop cannot be dead!");
LLVM_DEBUG(dbgs() << "Deleting dead loop block " << BB->getName()
<< "\n");
- if (LI.isLoopHeader(BB)) {
- assert(LI.getLoopFor(BB) != &L && "Attempt to remove current loop!");
- LI.erase(LI.getLoopFor(BB));
- }
LI.removeBlock(BB);
- DeleteDeadBlock(BB, &DTU);
- ++NumLoopBlocksDeleted;
}
+
+ DetatchDeadBlocks(DeadLoopBlocks, &DTUpdates, /*KeepOneInputPHIs*/true);
+ DTU.applyUpdates(DTUpdates);
+ DTUpdates.clear();
+ for (auto *BB : DeadLoopBlocks)
+ DTU.deleteBB(BB);
+
+ NumLoopBlocksDeleted += DeadLoopBlocks.size();
}
/// Constant-fold terminators of blocks acculumated in FoldCandidates into the
/// unconditional branches.
void foldTerminators() {
- DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
-
for (BasicBlock *BB : FoldCandidates) {
assert(LI.getLoopFor(BB) == &L && "Should be a loop block!");
BasicBlock *TheOnlySucc = getOnlyLiveSuccessor(BB);
@@ -453,7 +514,7 @@ private:
Term->eraseFromParent();
for (auto *DeadSucc : DeadSuccessors)
- DTU.deleteEdge(BB, DeadSucc);
+ DTUpdates.push_back({DominatorTree::Delete, BB, DeadSucc});
++NumTerminatorsFolded;
}
@@ -463,15 +524,18 @@ public:
ConstantTerminatorFoldingImpl(Loop &L, LoopInfo &LI, DominatorTree &DT,
ScalarEvolution &SE,
MemorySSAUpdater *MSSAU)
- : L(L), LI(LI), DT(DT), SE(SE), MSSAU(MSSAU) {}
+ : L(L), LI(LI), DT(DT), SE(SE), MSSAU(MSSAU), DFS(&L),
+ DTU(DT, DomTreeUpdater::UpdateStrategy::Eager) {}
bool run() {
assert(L.getLoopLatch() && "Should be single latch!");
// Collect all available information about status of blocks after constant
// folding.
analyze();
+ BasicBlock *Header = L.getHeader();
+ (void)Header;
- LLVM_DEBUG(dbgs() << "In function " << L.getHeader()->getParent()->getName()
+ LLVM_DEBUG(dbgs() << "In function " << Header->getParent()->getName()
<< ": ");
if (HasIrreducibleCFG) {
@@ -483,7 +547,7 @@ public:
if (FoldCandidates.empty()) {
LLVM_DEBUG(
dbgs() << "No constant terminator folding candidates found in loop "
- << L.getHeader()->getName() << "\n");
+ << Header->getName() << "\n");
return false;
}
@@ -491,8 +555,7 @@ public:
if (DeleteCurrentLoop) {
LLVM_DEBUG(
dbgs()
- << "Give up constant terminator folding in loop "
- << L.getHeader()->getName()
+ << "Give up constant terminator folding in loop " << Header->getName()
<< ": we don't currently support deletion of the current loop.\n");
return false;
}
@@ -503,8 +566,7 @@ public:
L.getNumBlocks()) {
LLVM_DEBUG(
dbgs() << "Give up constant terminator folding in loop "
- << L.getHeader()->getName()
- << ": we don't currently"
+ << Header->getName() << ": we don't currently"
" support blocks that are not dead, but will stop "
"being a part of the loop after constant-folding.\n");
return false;
@@ -515,8 +577,7 @@ public:
LLVM_DEBUG(dump());
LLVM_DEBUG(dbgs() << "Constant-folding " << FoldCandidates.size()
- << " terminators in loop " << L.getHeader()->getName()
- << "\n");
+ << " terminators in loop " << Header->getName() << "\n");
// Make the actual transforms.
handleDeadExits();
@@ -524,20 +585,36 @@ public:
if (!DeadLoopBlocks.empty()) {
LLVM_DEBUG(dbgs() << "Deleting " << DeadLoopBlocks.size()
- << " dead blocks in loop " << L.getHeader()->getName()
- << "\n");
+ << " dead blocks in loop " << Header->getName() << "\n");
deleteDeadLoopBlocks();
+ } else {
+ // If we didn't do updates inside deleteDeadLoopBlocks, do them here.
+ DTU.applyUpdates(DTUpdates);
+ DTUpdates.clear();
}
+ if (MSSAU && VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
+
#ifndef NDEBUG
// Make sure that we have preserved all data structures after the transform.
- DT.verify();
- assert(DT.isReachableFromEntry(L.getHeader()));
+#if defined(EXPENSIVE_CHECKS)
+ assert(DT.verify(DominatorTree::VerificationLevel::Full) &&
+ "DT broken after transform!");
+#else
+ assert(DT.verify(DominatorTree::VerificationLevel::Fast) &&
+ "DT broken after transform!");
+#endif
+ assert(DT.isReachableFromEntry(Header));
LI.verify(DT);
#endif
return true;
}
+
+ bool foldingBreaksCurrentLoop() const {
+ return DeleteCurrentLoop;
+ }
};
} // namespace
@@ -545,7 +622,8 @@ public:
/// branches.
static bool constantFoldTerminators(Loop &L, DominatorTree &DT, LoopInfo &LI,
ScalarEvolution &SE,
- MemorySSAUpdater *MSSAU) {
+ MemorySSAUpdater *MSSAU,
+ bool &IsLoopDeleted) {
if (!EnableTermFolding)
return false;
@@ -555,7 +633,9 @@ static bool constantFoldTerminators(Loop &L, DominatorTree &DT, LoopInfo &LI,
return false;
ConstantTerminatorFoldingImpl BranchFolder(L, LI, DT, SE, MSSAU);
- return BranchFolder.run();
+ bool Changed = BranchFolder.run();
+ IsLoopDeleted = Changed && BranchFolder.foldingBreaksCurrentLoop();
+ return Changed;
}
static bool mergeBlocksIntoPredecessors(Loop &L, DominatorTree &DT,
@@ -587,11 +667,15 @@ static bool mergeBlocksIntoPredecessors(Loop &L, DominatorTree &DT,
}
static bool simplifyLoopCFG(Loop &L, DominatorTree &DT, LoopInfo &LI,
- ScalarEvolution &SE, MemorySSAUpdater *MSSAU) {
+ ScalarEvolution &SE, MemorySSAUpdater *MSSAU,
+ bool &isLoopDeleted) {
bool Changed = false;
// Constant-fold terminators with known constant conditions.
- Changed |= constantFoldTerminators(L, DT, LI, SE, MSSAU);
+ Changed |= constantFoldTerminators(L, DT, LI, SE, MSSAU, isLoopDeleted);
+
+ if (isLoopDeleted)
+ return true;
// Eliminate unconditional branches by merging blocks into their predecessors.
Changed |= mergeBlocksIntoPredecessors(L, DT, LI, MSSAU);
@@ -604,15 +688,23 @@ static bool simplifyLoopCFG(Loop &L, DominatorTree &DT, LoopInfo &LI,
PreservedAnalyses LoopSimplifyCFGPass::run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR,
- LPMUpdater &) {
+ LPMUpdater &LPMU) {
Optional<MemorySSAUpdater> MSSAU;
if (EnableMSSALoopDependency && AR.MSSA)
MSSAU = MemorySSAUpdater(AR.MSSA);
+ bool DeleteCurrentLoop = false;
if (!simplifyLoopCFG(L, AR.DT, AR.LI, AR.SE,
- MSSAU.hasValue() ? MSSAU.getPointer() : nullptr))
+ MSSAU.hasValue() ? MSSAU.getPointer() : nullptr,
+ DeleteCurrentLoop))
return PreservedAnalyses::all();
- return getLoopPassPreservedAnalyses();
+ if (DeleteCurrentLoop)
+ LPMU.markLoopAsDeleted(L, "loop-simplifycfg");
+
+ auto PA = getLoopPassPreservedAnalyses();
+ if (EnableMSSALoopDependency)
+ PA.preserve<MemorySSAAnalysis>();
+ return PA;
}
namespace {
@@ -623,7 +715,7 @@ public:
initializeLoopSimplifyCFGLegacyPassPass(*PassRegistry::getPassRegistry());
}
- bool runOnLoop(Loop *L, LPPassManager &) override {
+ bool runOnLoop(Loop *L, LPPassManager &LPM) override {
if (skipLoop(L))
return false;
@@ -637,8 +729,13 @@ public:
if (VerifyMemorySSA)
MSSA->verifyMemorySSA();
}
- return simplifyLoopCFG(*L, DT, LI, SE,
- MSSAU.hasValue() ? MSSAU.getPointer() : nullptr);
+ bool DeleteCurrentLoop = false;
+ bool Changed = simplifyLoopCFG(
+ *L, DT, LI, SE, MSSAU.hasValue() ? MSSAU.getPointer() : nullptr,
+ DeleteCurrentLoop);
+ if (DeleteCurrentLoop)
+ LPM.markLoopAsDeleted(*L);
+ return Changed;
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
diff --git a/lib/Transforms/Scalar/LoopSink.cpp b/lib/Transforms/Scalar/LoopSink.cpp
index 2f7ad2126ed3..975452e13f09 100644
--- a/lib/Transforms/Scalar/LoopSink.cpp
+++ b/lib/Transforms/Scalar/LoopSink.cpp
@@ -1,9 +1,8 @@
//===-- LoopSink.cpp - Loop Sink Pass -------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -291,10 +290,9 @@ static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI,
ColdLoopBBs.push_back(B);
LoopBlockNumber[B] = ++i;
}
- std::stable_sort(ColdLoopBBs.begin(), ColdLoopBBs.end(),
- [&](BasicBlock *A, BasicBlock *B) {
- return BFI.getBlockFreq(A) < BFI.getBlockFreq(B);
- });
+ llvm::stable_sort(ColdLoopBBs, [&](BasicBlock *A, BasicBlock *B) {
+ return BFI.getBlockFreq(A) < BFI.getBlockFreq(B);
+ });
// Traverse preheader's instructions in reverse order becaue if A depends
// on B (A appears after B), A needs to be sinked first before B can be
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 773ffb9df0a2..59a387a186b8 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -1,9 +1,8 @@
//===- LoopStrengthReduce.cpp - Strength Reduce IVs in Loops --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -116,6 +115,7 @@
#include <cstdlib>
#include <iterator>
#include <limits>
+#include <numeric>
#include <map>
#include <utility>
@@ -155,11 +155,19 @@ static cl::opt<bool> FilterSameScaledReg(
cl::desc("Narrow LSR search space by filtering non-optimal formulae"
" with the same ScaledReg and Scale"));
+static cl::opt<bool> EnableBackedgeIndexing(
+ "lsr-backedge-indexing", cl::Hidden, cl::init(true),
+ cl::desc("Enable the generation of cross iteration indexed memops"));
+
static cl::opt<unsigned> ComplexityLimit(
"lsr-complexity-limit", cl::Hidden,
cl::init(std::numeric_limits<uint16_t>::max()),
cl::desc("LSR search space complexity limit"));
+static cl::opt<unsigned> SetupCostDepthLimit(
+ "lsr-setupcost-depth-limit", cl::Hidden, cl::init(7),
+ cl::desc("The limit on recursion depth for LSRs setup cost"));
+
#ifndef NDEBUG
// Stress test IV chain generation.
static cl::opt<bool> StressIVChain(
@@ -1007,10 +1015,15 @@ namespace {
/// This class is used to measure and compare candidate formulae.
class Cost {
+ const Loop *L = nullptr;
+ ScalarEvolution *SE = nullptr;
+ const TargetTransformInfo *TTI = nullptr;
TargetTransformInfo::LSRCost C;
public:
- Cost() {
+ Cost() = delete;
+ Cost(const Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI) :
+ L(L), SE(&SE), TTI(&TTI) {
C.Insns = 0;
C.NumRegs = 0;
C.AddRecCost = 0;
@@ -1021,7 +1034,7 @@ public:
C.ScaleCost = 0;
}
- bool isLess(Cost &Other, const TargetTransformInfo &TTI);
+ bool isLess(Cost &Other);
void Lose();
@@ -1040,12 +1053,9 @@ public:
return C.NumRegs == ~0u;
}
- void RateFormula(const TargetTransformInfo &TTI,
- const Formula &F,
+ void RateFormula(const Formula &F,
SmallPtrSetImpl<const SCEV *> &Regs,
const DenseSet<const SCEV *> &VisitedRegs,
- const Loop *L,
- ScalarEvolution &SE, DominatorTree &DT,
const LSRUse &LU,
SmallPtrSetImpl<const SCEV *> *LoserRegs = nullptr);
@@ -1053,17 +1063,11 @@ public:
void dump() const;
private:
- void RateRegister(const SCEV *Reg,
- SmallPtrSetImpl<const SCEV *> &Regs,
- const Loop *L,
- ScalarEvolution &SE, DominatorTree &DT,
- const TargetTransformInfo &TTI);
- void RatePrimaryRegister(const SCEV *Reg,
+ void RateRegister(const Formula &F, const SCEV *Reg,
+ SmallPtrSetImpl<const SCEV *> &Regs);
+ void RatePrimaryRegister(const Formula &F, const SCEV *Reg,
SmallPtrSetImpl<const SCEV *> &Regs,
- const Loop *L,
- ScalarEvolution &SE, DominatorTree &DT,
- SmallPtrSetImpl<const SCEV *> *LoserRegs,
- const TargetTransformInfo &TTI);
+ SmallPtrSetImpl<const SCEV *> *LoserRegs);
};
/// An operand value in an instruction which is to be replaced with some
@@ -1208,19 +1212,36 @@ static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
bool HasBaseReg, int64_t Scale,
Instruction *Fixup = nullptr);
+static unsigned getSetupCost(const SCEV *Reg, unsigned Depth) {
+ if (isa<SCEVUnknown>(Reg) || isa<SCEVConstant>(Reg))
+ return 1;
+ if (Depth == 0)
+ return 0;
+ if (const auto *S = dyn_cast<SCEVAddRecExpr>(Reg))
+ return getSetupCost(S->getStart(), Depth - 1);
+ if (auto S = dyn_cast<SCEVCastExpr>(Reg))
+ return getSetupCost(S->getOperand(), Depth - 1);
+ if (auto S = dyn_cast<SCEVNAryExpr>(Reg))
+ return std::accumulate(S->op_begin(), S->op_end(), 0,
+ [&](unsigned i, const SCEV *Reg) {
+ return i + getSetupCost(Reg, Depth - 1);
+ });
+ if (auto S = dyn_cast<SCEVUDivExpr>(Reg))
+ return getSetupCost(S->getLHS(), Depth - 1) +
+ getSetupCost(S->getRHS(), Depth - 1);
+ return 0;
+}
+
/// Tally up interesting quantities from the given register.
-void Cost::RateRegister(const SCEV *Reg,
- SmallPtrSetImpl<const SCEV *> &Regs,
- const Loop *L,
- ScalarEvolution &SE, DominatorTree &DT,
- const TargetTransformInfo &TTI) {
+void Cost::RateRegister(const Formula &F, const SCEV *Reg,
+ SmallPtrSetImpl<const SCEV *> &Regs) {
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg)) {
// If this is an addrec for another loop, it should be an invariant
// with respect to L since L is the innermost loop (at least
// for now LSR only handles innermost loops).
if (AR->getLoop() != L) {
// If the AddRec exists, consider it's register free and leave it alone.
- if (isExistingPhi(AR, SE))
+ if (isExistingPhi(AR, *SE))
return;
// It is bad to allow LSR for current loop to add induction variables
@@ -1236,16 +1257,24 @@ void Cost::RateRegister(const SCEV *Reg,
}
unsigned LoopCost = 1;
- if (TTI.shouldFavorPostInc()) {
- const SCEV *LoopStep = AR->getStepRecurrence(SE);
- if (isa<SCEVConstant>(LoopStep)) {
- // Check if a post-indexed load/store can be used.
- if (TTI.isIndexedLoadLegal(TTI.MIM_PostInc, AR->getType()) ||
- TTI.isIndexedStoreLegal(TTI.MIM_PostInc, AR->getType())) {
+ if (TTI->isIndexedLoadLegal(TTI->MIM_PostInc, AR->getType()) ||
+ TTI->isIndexedStoreLegal(TTI->MIM_PostInc, AR->getType())) {
+
+ // If the step size matches the base offset, we could use pre-indexed
+ // addressing.
+ if (TTI->shouldFavorBackedgeIndex(L)) {
+ if (auto *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)))
+ if (Step->getAPInt() == F.BaseOffset)
+ LoopCost = 0;
+ }
+
+ if (TTI->shouldFavorPostInc()) {
+ const SCEV *LoopStep = AR->getStepRecurrence(*SE);
+ if (isa<SCEVConstant>(LoopStep)) {
const SCEV *LoopStart = AR->getStart();
if (!isa<SCEVConstant>(LoopStart) &&
- SE.isLoopInvariant(LoopStart, L))
- LoopCost = 0;
+ SE->isLoopInvariant(LoopStart, L))
+ LoopCost = 0;
}
}
}
@@ -1255,7 +1284,7 @@ void Cost::RateRegister(const SCEV *Reg,
// TODO: The non-affine case isn't precisely modeled here.
if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1))) {
if (!Regs.count(AR->getOperand(1))) {
- RateRegister(AR->getOperand(1), Regs, L, SE, DT, TTI);
+ RateRegister(F, AR->getOperand(1), Regs);
if (isLoser())
return;
}
@@ -1265,43 +1294,34 @@ void Cost::RateRegister(const SCEV *Reg,
// Rough heuristic; favor registers which don't require extra setup
// instructions in the preheader.
- if (!isa<SCEVUnknown>(Reg) &&
- !isa<SCEVConstant>(Reg) &&
- !(isa<SCEVAddRecExpr>(Reg) &&
- (isa<SCEVUnknown>(cast<SCEVAddRecExpr>(Reg)->getStart()) ||
- isa<SCEVConstant>(cast<SCEVAddRecExpr>(Reg)->getStart()))))
- ++C.SetupCost;
+ C.SetupCost += getSetupCost(Reg, SetupCostDepthLimit);
+ // Ensure we don't, even with the recusion limit, produce invalid costs.
+ C.SetupCost = std::min<unsigned>(C.SetupCost, 1 << 16);
C.NumIVMuls += isa<SCEVMulExpr>(Reg) &&
- SE.hasComputableLoopEvolution(Reg, L);
+ SE->hasComputableLoopEvolution(Reg, L);
}
/// Record this register in the set. If we haven't seen it before, rate
/// it. Optional LoserRegs provides a way to declare any formula that refers to
/// one of those regs an instant loser.
-void Cost::RatePrimaryRegister(const SCEV *Reg,
+void Cost::RatePrimaryRegister(const Formula &F, const SCEV *Reg,
SmallPtrSetImpl<const SCEV *> &Regs,
- const Loop *L,
- ScalarEvolution &SE, DominatorTree &DT,
- SmallPtrSetImpl<const SCEV *> *LoserRegs,
- const TargetTransformInfo &TTI) {
+ SmallPtrSetImpl<const SCEV *> *LoserRegs) {
if (LoserRegs && LoserRegs->count(Reg)) {
Lose();
return;
}
if (Regs.insert(Reg).second) {
- RateRegister(Reg, Regs, L, SE, DT, TTI);
+ RateRegister(F, Reg, Regs);
if (LoserRegs && isLoser())
LoserRegs->insert(Reg);
}
}
-void Cost::RateFormula(const TargetTransformInfo &TTI,
- const Formula &F,
+void Cost::RateFormula(const Formula &F,
SmallPtrSetImpl<const SCEV *> &Regs,
const DenseSet<const SCEV *> &VisitedRegs,
- const Loop *L,
- ScalarEvolution &SE, DominatorTree &DT,
const LSRUse &LU,
SmallPtrSetImpl<const SCEV *> *LoserRegs) {
assert(F.isCanonical(*L) && "Cost is accurate only for canonical formula");
@@ -1314,7 +1334,7 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
Lose();
return;
}
- RatePrimaryRegister(ScaledReg, Regs, L, SE, DT, LoserRegs, TTI);
+ RatePrimaryRegister(F, ScaledReg, Regs, LoserRegs);
if (isLoser())
return;
}
@@ -1323,7 +1343,7 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
Lose();
return;
}
- RatePrimaryRegister(BaseReg, Regs, L, SE, DT, LoserRegs, TTI);
+ RatePrimaryRegister(F, BaseReg, Regs, LoserRegs);
if (isLoser())
return;
}
@@ -1334,11 +1354,11 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
// Do not count the base and a possible second register if the target
// allows to fold 2 registers.
C.NumBaseAdds +=
- NumBaseParts - (1 + (F.Scale && isAMCompletelyFolded(TTI, LU, F)));
+ NumBaseParts - (1 + (F.Scale && isAMCompletelyFolded(*TTI, LU, F)));
C.NumBaseAdds += (F.UnfoldedOffset != 0);
// Accumulate non-free scaling amounts.
- C.ScaleCost += getScalingFactorCost(TTI, LU, F, *L);
+ C.ScaleCost += getScalingFactorCost(*TTI, LU, F, *L);
// Tally up the non-zero immediates.
for (const LSRFixup &Fixup : LU.Fixups) {
@@ -1353,7 +1373,7 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
// Check with target if this offset with this instruction is
// specifically not supported.
if (LU.Kind == LSRUse::Address && Offset != 0 &&
- !isAMCompletelyFolded(TTI, LSRUse::Address, LU.AccessTy, F.BaseGV,
+ !isAMCompletelyFolded(*TTI, LSRUse::Address, LU.AccessTy, F.BaseGV,
Offset, F.HasBaseReg, F.Scale, Fixup.UserInst))
C.NumBaseAdds++;
}
@@ -1366,7 +1386,7 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
// Treat every new register that exceeds TTI.getNumberOfRegisters() - 1 as
// additional instruction (at least fill).
- unsigned TTIRegNum = TTI.getNumberOfRegisters(false) - 1;
+ unsigned TTIRegNum = TTI->getNumberOfRegisters(false) - 1;
if (C.NumRegs > TTIRegNum) {
// Cost already exceeded TTIRegNum, then only newly added register can add
// new instructions.
@@ -1386,7 +1406,8 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
//
// For {-10, +, 1}:
// i = i + 1;
- if (LU.Kind == LSRUse::ICmpZero && !F.hasZeroEnd() && !TTI.canMacroFuseCmp())
+ if (LU.Kind == LSRUse::ICmpZero && !F.hasZeroEnd() &&
+ !TTI->canMacroFuseCmp())
C.Insns++;
// Each new AddRec adds 1 instruction to calculation.
C.Insns += (C.AddRecCost - PrevAddRecCost);
@@ -1410,11 +1431,11 @@ void Cost::Lose() {
}
/// Choose the lower cost.
-bool Cost::isLess(Cost &Other, const TargetTransformInfo &TTI) {
+bool Cost::isLess(Cost &Other) {
if (InsnsCost.getNumOccurrences() > 0 && InsnsCost &&
C.Insns != Other.C.Insns)
return C.Insns < Other.C.Insns;
- return TTI.isLSRCostLess(C, Other.C);
+ return TTI->isLSRCostLess(C, Other.C);
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -1888,8 +1909,11 @@ class LSRInstance {
ScalarEvolution &SE;
DominatorTree &DT;
LoopInfo &LI;
+ AssumptionCache &AC;
+ TargetLibraryInfo &LibInfo;
const TargetTransformInfo &TTI;
Loop *const L;
+ bool FavorBackedgeIndex = false;
bool Changed = false;
/// This is the insert position that the current loop's induction variable
@@ -1910,7 +1934,7 @@ class LSRInstance {
SmallSetVector<Type *, 4> Types;
/// The list of interesting uses.
- SmallVector<LSRUse, 16> Uses;
+ mutable SmallVector<LSRUse, 16> Uses;
/// Track which uses use which register candidates.
RegUseTracker RegUses;
@@ -2025,7 +2049,8 @@ class LSRInstance {
public:
LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT,
- LoopInfo &LI, const TargetTransformInfo &TTI);
+ LoopInfo &LI, const TargetTransformInfo &TTI, AssumptionCache &AC,
+ TargetLibraryInfo &LibInfo);
bool getChanged() const { return Changed; }
@@ -2804,7 +2829,7 @@ bool IVChain::isProfitableIncrement(const SCEV *OperExpr,
/// TODO: Consider IVInc free if it's already used in another chains.
static bool
isProfitableChain(IVChain &Chain, SmallPtrSetImpl<Instruction*> &Users,
- ScalarEvolution &SE, const TargetTransformInfo &TTI) {
+ ScalarEvolution &SE) {
if (StressIVChain)
return true;
@@ -3064,7 +3089,7 @@ void LSRInstance::CollectChains() {
for (unsigned UsersIdx = 0, NChains = IVChainVec.size();
UsersIdx < NChains; ++UsersIdx) {
if (!isProfitableChain(IVChainVec[UsersIdx],
- ChainUsersVec[UsersIdx].FarUsers, SE, TTI))
+ ChainUsersVec[UsersIdx].FarUsers, SE))
continue;
// Preserve the chain at UsesIdx.
if (ChainIdx != UsersIdx)
@@ -3078,7 +3103,7 @@ void LSRInstance::CollectChains() {
void LSRInstance::FinalizeChain(IVChain &Chain) {
assert(!Chain.Incs.empty() && "empty IV chains are not allowed");
LLVM_DEBUG(dbgs() << "Final Chain: " << *Chain.Incs[0].UserInst << "\n");
-
+
for (const IVInc &Inc : Chain) {
LLVM_DEBUG(dbgs() << " Inc: " << *Inc.UserInst << "\n");
auto UseI = find(Inc.UserInst->operands(), Inc.IVOperand);
@@ -3100,7 +3125,7 @@ static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst,
MemAccessTy AccessTy = getAccessType(TTI, UserInst, Operand);
int64_t IncOffset = IncConst->getValue()->getSExtValue();
if (!isAlwaysFoldable(TTI, LSRUse::Address, AccessTy, /*BaseGV=*/nullptr,
- IncOffset, /*HaseBaseReg=*/false))
+ IncOffset, /*HasBaseReg=*/false))
return false;
return true;
@@ -3210,6 +3235,9 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
}
void LSRInstance::CollectFixupsAndInitialFormulae() {
+ BranchInst *ExitBranch = nullptr;
+ bool SaveCmp = TTI.canSaveCmp(L, &ExitBranch, &SE, &LI, &DT, &AC, &LibInfo);
+
for (const IVStrideUse &U : IU) {
Instruction *UserInst = U.getUser();
// Skip IV users that are part of profitable IV Chains.
@@ -3239,6 +3267,10 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
// equality icmps, thanks to IndVarSimplify.
if (ICmpInst *CI = dyn_cast<ICmpInst>(UserInst))
if (CI->isEquality()) {
+ // If CI can be saved in some target, like replaced inside hardware loop
+ // in PowerPC, no need to generate initial formulae for it.
+ if (SaveCmp && CI == dyn_cast<ICmpInst>(ExitBranch->getCondition()))
+ continue;
// Swap the operands if needed to put the OperandValToReplace on the
// left, for consistency.
Value *NV = CI->getOperand(1);
@@ -3738,10 +3770,11 @@ void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx,
void LSRInstance::GenerateConstantOffsetsImpl(
LSRUse &LU, unsigned LUIdx, const Formula &Base,
const SmallVectorImpl<int64_t> &Worklist, size_t Idx, bool IsScaledReg) {
- const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
- for (int64_t Offset : Worklist) {
+
+ auto GenerateOffset = [&](const SCEV *G, int64_t Offset) {
Formula F = Base;
F.BaseOffset = (uint64_t)Base.BaseOffset - Offset;
+
if (isLegalUse(TTI, LU.MinOffset - Offset, LU.MaxOffset - Offset, LU.Kind,
LU.AccessTy, F)) {
// Add the offset to the base register.
@@ -3761,7 +3794,35 @@ void LSRInstance::GenerateConstantOffsetsImpl(
(void)InsertFormula(LU, LUIdx, F);
}
+ };
+
+ const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
+
+ // With constant offsets and constant steps, we can generate pre-inc
+ // accesses by having the offset equal the step. So, for access #0 with a
+ // step of 8, we generate a G - 8 base which would require the first access
+ // to be ((G - 8) + 8),+,8. The pre-indexed access then updates the pointer
+ // for itself and hopefully becomes the base for other accesses. This means
+ // means that a single pre-indexed access can be generated to become the new
+ // base pointer for each iteration of the loop, resulting in no extra add/sub
+ // instructions for pointer updating.
+ if (FavorBackedgeIndex && LU.Kind == LSRUse::Address) {
+ if (auto *GAR = dyn_cast<SCEVAddRecExpr>(G)) {
+ if (auto *StepRec =
+ dyn_cast<SCEVConstant>(GAR->getStepRecurrence(SE))) {
+ const APInt &StepInt = StepRec->getAPInt();
+ int64_t Step = StepInt.isNegative() ?
+ StepInt.getSExtValue() : StepInt.getZExtValue();
+
+ for (int64_t Offset : Worklist) {
+ Offset -= Step;
+ GenerateOffset(G, Offset);
+ }
+ }
+ }
}
+ for (int64_t Offset : Worklist)
+ GenerateOffset(G, Offset);
int64_t Imm = ExtractImmediate(G, SE);
if (G->isZero() || Imm == 0)
@@ -3968,9 +4029,27 @@ void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
if (SrcTy != DstTy && TTI.isTruncateFree(SrcTy, DstTy)) {
Formula F = Base;
- if (F.ScaledReg) F.ScaledReg = SE.getAnyExtendExpr(F.ScaledReg, SrcTy);
- for (const SCEV *&BaseReg : F.BaseRegs)
- BaseReg = SE.getAnyExtendExpr(BaseReg, SrcTy);
+ // Sometimes SCEV is able to prove zero during ext transform. It may
+ // happen if SCEV did not do all possible transforms while creating the
+ // initial node (maybe due to depth limitations), but it can do them while
+ // taking ext.
+ if (F.ScaledReg) {
+ const SCEV *NewScaledReg = SE.getAnyExtendExpr(F.ScaledReg, SrcTy);
+ if (NewScaledReg->isZero())
+ continue;
+ F.ScaledReg = NewScaledReg;
+ }
+ bool HasZeroBaseReg = false;
+ for (const SCEV *&BaseReg : F.BaseRegs) {
+ const SCEV *NewBaseReg = SE.getAnyExtendExpr(BaseReg, SrcTy);
+ if (NewBaseReg->isZero()) {
+ HasZeroBaseReg = true;
+ break;
+ }
+ BaseReg = NewBaseReg;
+ }
+ if (HasZeroBaseReg)
+ continue;
// TODO: This assumes we've done basic processing on all uses and
// have an idea what the register usage is.
@@ -4067,11 +4146,17 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
// Conservatively examine offsets between this orig reg a few selected
// other orig regs.
+ int64_t First = Imms.begin()->first;
+ int64_t Last = std::prev(Imms.end())->first;
+ // Compute (First + Last) / 2 without overflow using the fact that
+ // First + Last = 2 * (First + Last) + (First ^ Last).
+ int64_t Avg = (First & Last) + ((First ^ Last) >> 1);
+ // If the result is negative and First is odd and Last even (or vice versa),
+ // we rounded towards -inf. Add 1 in that case, to round towards 0.
+ Avg = Avg + ((First ^ Last) & ((uint64_t)Avg >> 63));
ImmMapTy::const_iterator OtherImms[] = {
- Imms.begin(), std::prev(Imms.end()),
- Imms.lower_bound((Imms.begin()->first + std::prev(Imms.end())->first) /
- 2)
- };
+ Imms.begin(), std::prev(Imms.end()),
+ Imms.lower_bound(Avg)};
for (size_t i = 0, e = array_lengthof(OtherImms); i != e; ++i) {
ImmMapTy::const_iterator M = OtherImms[i];
if (M == J || M == JE) continue;
@@ -4249,9 +4334,9 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
// avoids the need to recompute this information across formulae using the
// same bad AddRec. Passing LoserRegs is also essential unless we remove
// the corresponding bad register from the Regs set.
- Cost CostF;
+ Cost CostF(L, SE, TTI);
Regs.clear();
- CostF.RateFormula(TTI, F, Regs, VisitedRegs, L, SE, DT, LU, &LoserRegs);
+ CostF.RateFormula(F, Regs, VisitedRegs, LU, &LoserRegs);
if (CostF.isLoser()) {
// During initial formula generation, undesirable formulae are generated
// by uses within other loops that have some non-trivial address mode or
@@ -4282,10 +4367,10 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
Formula &Best = LU.Formulae[P.first->second];
- Cost CostBest;
+ Cost CostBest(L, SE, TTI);
Regs.clear();
- CostBest.RateFormula(TTI, Best, Regs, VisitedRegs, L, SE, DT, LU);
- if (CostF.isLess(CostBest, TTI))
+ CostBest.RateFormula(Best, Regs, VisitedRegs, LU);
+ if (CostF.isLess(CostBest))
std::swap(F, Best);
LLVM_DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs());
dbgs() << "\n"
@@ -4357,7 +4442,9 @@ void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
I = F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I) {
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*I)) {
Formula NewF = F;
- NewF.BaseOffset += C->getValue()->getSExtValue();
+ //FIXME: Formulas should store bitwidth to do wrapping properly.
+ // See PR41034.
+ NewF.BaseOffset += (uint64_t)C->getValue()->getSExtValue();
NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
(I - F.BaseRegs.begin()));
if (LU.HasFormulaWithSameRegs(NewF)) {
@@ -4400,7 +4487,7 @@ void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
/// When there are many registers for expressions like A, A+1, A+2, etc.,
/// allocate a single register for them.
void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
- if (EstimateSearchSpaceComplexity() < ComplexityLimit)
+ if (EstimateSearchSpaceComplexity() < ComplexityLimit)
return;
LLVM_DEBUG(
@@ -4533,12 +4620,13 @@ void LSRInstance::NarrowSearchSpaceByFilterFormulaWithSameScaledReg() {
// If the new register numbers are the same, choose the Formula with
// less Cost.
- Cost CostFA, CostFB;
+ Cost CostFA(L, SE, TTI);
+ Cost CostFB(L, SE, TTI);
Regs.clear();
- CostFA.RateFormula(TTI, FA, Regs, VisitedRegs, L, SE, DT, LU);
+ CostFA.RateFormula(FA, Regs, VisitedRegs, LU);
Regs.clear();
- CostFB.RateFormula(TTI, FB, Regs, VisitedRegs, L, SE, DT, LU);
- return CostFA.isLess(CostFB, TTI);
+ CostFB.RateFormula(FB, Regs, VisitedRegs, LU);
+ return CostFA.isLess(CostFB);
};
bool Any = false;
@@ -4824,7 +4912,7 @@ void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
ReqRegs.insert(S);
SmallPtrSet<const SCEV *, 16> NewRegs;
- Cost NewCost;
+ Cost NewCost(L, SE, TTI);
for (const Formula &F : LU.Formulae) {
// Ignore formulae which may not be ideal in terms of register reuse of
// ReqRegs. The formula should use all required registers before
@@ -4848,8 +4936,8 @@ void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
// the current best, prune the search at that point.
NewCost = CurCost;
NewRegs = CurRegs;
- NewCost.RateFormula(TTI, F, NewRegs, VisitedRegs, L, SE, DT, LU);
- if (NewCost.isLess(SolutionCost, TTI)) {
+ NewCost.RateFormula(F, NewRegs, VisitedRegs, LU);
+ if (NewCost.isLess(SolutionCost)) {
Workspace.push_back(&F);
if (Workspace.size() != Uses.size()) {
SolveRecurse(Solution, SolutionCost, Workspace, NewCost,
@@ -4858,9 +4946,9 @@ void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
VisitedRegs.insert(F.ScaledReg ? F.ScaledReg : F.BaseRegs[0]);
} else {
LLVM_DEBUG(dbgs() << "New best at "; NewCost.print(dbgs());
- dbgs() << ".\n Regs:"; for (const SCEV *S
- : NewRegs) dbgs()
- << ' ' << *S;
+ dbgs() << ".\nRegs:\n";
+ for (const SCEV *S : NewRegs) dbgs()
+ << "- " << *S << "\n";
dbgs() << '\n');
SolutionCost = NewCost;
@@ -4875,9 +4963,9 @@ void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
/// vector.
void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const {
SmallVector<const Formula *, 8> Workspace;
- Cost SolutionCost;
+ Cost SolutionCost(L, SE, TTI);
SolutionCost.Lose();
- Cost CurCost;
+ Cost CurCost(L, SE, TTI);
SmallPtrSet<const SCEV *, 16> CurRegs;
DenseSet<const SCEV *> VisitedRegs;
Workspace.reserve(Uses.size());
@@ -5215,6 +5303,7 @@ void LSRInstance::RewriteForPHI(
DenseMap<BasicBlock *, Value *> Inserted;
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
if (PN->getIncomingValue(i) == LF.OperandValToReplace) {
+ bool needUpdateFixups = false;
BasicBlock *BB = PN->getIncomingBlock(i);
// If this is a critical edge, split the edge so that we do not insert
@@ -5233,7 +5322,7 @@ void LSRInstance::RewriteForPHI(
NewBB = SplitCriticalEdge(BB, Parent,
CriticalEdgeSplittingOptions(&DT, &LI)
.setMergeIdenticalEdges()
- .setDontDeleteUselessPHIs());
+ .setKeepOneInputPHIs());
} else {
SmallVector<BasicBlock*, 2> NewBBs;
SplitLandingPadPredecessors(Parent, BB, "", "", NewBBs, &DT, &LI);
@@ -5253,6 +5342,8 @@ void LSRInstance::RewriteForPHI(
e = PN->getNumIncomingValues();
BB = NewBB;
i = PN->getBasicBlockIndex(BB);
+
+ needUpdateFixups = true;
}
}
}
@@ -5277,6 +5368,44 @@ void LSRInstance::RewriteForPHI(
PN->setIncomingValue(i, FullV);
Pair.first->second = FullV;
}
+
+ // If LSR splits critical edge and phi node has other pending
+ // fixup operands, we need to update those pending fixups. Otherwise
+ // formulae will not be implemented completely and some instructions
+ // will not be eliminated.
+ if (needUpdateFixups) {
+ for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx)
+ for (LSRFixup &Fixup : Uses[LUIdx].Fixups)
+ // If fixup is supposed to rewrite some operand in the phi
+ // that was just updated, it may be already moved to
+ // another phi node. Such fixup requires update.
+ if (Fixup.UserInst == PN) {
+ // Check if the operand we try to replace still exists in the
+ // original phi.
+ bool foundInOriginalPHI = false;
+ for (const auto &val : PN->incoming_values())
+ if (val == Fixup.OperandValToReplace) {
+ foundInOriginalPHI = true;
+ break;
+ }
+
+ // If fixup operand found in original PHI - nothing to do.
+ if (foundInOriginalPHI)
+ continue;
+
+ // Otherwise it might be moved to another PHI and requires update.
+ // If fixup operand not found in any of the incoming blocks that
+ // means we have already rewritten it - nothing to do.
+ for (const auto &Block : PN->blocks())
+ for (BasicBlock::iterator I = Block->begin(); isa<PHINode>(I);
+ ++I) {
+ PHINode *NewPN = cast<PHINode>(I);
+ for (const auto &val : NewPN->incoming_values())
+ if (val == Fixup.OperandValToReplace)
+ Fixup.UserInst = NewPN;
+ }
+ }
+ }
}
}
@@ -5360,8 +5489,11 @@ void LSRInstance::ImplementSolution(
LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE,
DominatorTree &DT, LoopInfo &LI,
- const TargetTransformInfo &TTI)
- : IU(IU), SE(SE), DT(DT), LI(LI), TTI(TTI), L(L) {
+ const TargetTransformInfo &TTI, AssumptionCache &AC,
+ TargetLibraryInfo &LibInfo)
+ : IU(IU), SE(SE), DT(DT), LI(LI), AC(AC), LibInfo(LibInfo), TTI(TTI), L(L),
+ FavorBackedgeIndex(EnableBackedgeIndexing &&
+ TTI.shouldFavorBackedgeIndex(L)) {
// If LoopSimplify form is not available, stay out of trouble.
if (!L->isLoopSimplifyForm())
return;
@@ -5556,6 +5688,8 @@ void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addRequired<ScalarEvolutionWrapperPass>();
AU.addPreserved<ScalarEvolutionWrapperPass>();
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
// Requiring LoopSimplify a second time here prevents IVUsers from running
// twice, since LoopSimplify was invalidated by running ScalarEvolution.
AU.addRequiredID(LoopSimplifyID);
@@ -5566,11 +5700,14 @@ void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE,
DominatorTree &DT, LoopInfo &LI,
- const TargetTransformInfo &TTI) {
+ const TargetTransformInfo &TTI,
+ AssumptionCache &AC,
+ TargetLibraryInfo &LibInfo) {
+
bool Changed = false;
// Run the main LSR transformation.
- Changed |= LSRInstance(L, IU, SE, DT, LI, TTI).getChanged();
+ Changed |= LSRInstance(L, IU, SE, DT, LI, TTI, AC, LibInfo).getChanged();
// Remove any extra phis created by processing inner loops.
Changed |= DeleteDeadPHIs(L->getHeader());
@@ -5601,14 +5738,17 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
const auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
*L->getHeader()->getParent());
- return ReduceLoopStrength(L, IU, SE, DT, LI, TTI);
+ auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
+ *L->getHeader()->getParent());
+ auto &LibInfo = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ return ReduceLoopStrength(L, IU, SE, DT, LI, TTI, AC, LibInfo);
}
PreservedAnalyses LoopStrengthReducePass::run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR,
LPMUpdater &) {
if (!ReduceLoopStrength(&L, AM.getResult<IVUsersAnalysis>(L, AR), AR.SE,
- AR.DT, AR.LI, AR.TTI))
+ AR.DT, AR.LI, AR.TTI, AR.AC, AR.TLI))
return PreservedAnalyses::all();
return getLoopPassPreservedAnalyses();
diff --git a/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp b/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
index da46210b6fdd..86891eb451bb 100644
--- a/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
@@ -1,9 +1,8 @@
//===- LoopUnrollAndJam.cpp - Loop unroll and jam pass --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -295,7 +294,8 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
return LoopUnrollResult::Unmodified;
TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences(
- L, SE, TTI, OptLevel, None, None, None, None, None, None);
+ L, SE, TTI, nullptr, nullptr, OptLevel,
+ None, None, None, None, None, None);
if (AllowUnrollAndJam.getNumOccurrences() > 0)
UP.UnrollAndJam = AllowUnrollAndJam;
if (UnrollAndJamThreshold.getNumOccurrences() > 0)
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 38b80f48ed0e..2fa7436213dd 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -1,9 +1,8 @@
//===- LoopUnroll.cpp - Loop unroller pass --------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -24,7 +23,9 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
@@ -56,6 +57,7 @@
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/LoopSimplify.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/Transforms/Utils/SizeOpts.h"
#include "llvm/Transforms/Utils/UnrollLoop.h"
#include <algorithm>
#include <cassert>
@@ -69,6 +71,12 @@ using namespace llvm;
#define DEBUG_TYPE "loop-unroll"
+cl::opt<bool> llvm::ForgetSCEVInLoopUnroll(
+ "forget-scev-loop-unroll", cl::init(false), cl::Hidden,
+ cl::desc("Forget everything in SCEV when doing LoopUnroll, instead of just"
+ " the current top-most loop. This is somtimes preferred to reduce"
+ " compile time."));
+
static cl::opt<unsigned>
UnrollThreshold("unroll-threshold", cl::Hidden,
cl::desc("The cost threshold for loop unrolling"));
@@ -166,7 +174,8 @@ static const unsigned NoThreshold = std::numeric_limits<unsigned>::max();
/// Gather the various unrolling parameters based on the defaults, compiler
/// flags, TTI overrides and user specified parameters.
TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
- Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, int OptLevel,
+ Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI,
+ BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, int OptLevel,
Optional<unsigned> UserThreshold, Optional<unsigned> UserCount,
Optional<bool> UserAllowPartial, Optional<bool> UserRuntime,
Optional<bool> UserUpperBound, Optional<bool> UserAllowPeeling) {
@@ -199,9 +208,12 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
TTI.getUnrollingPreferences(L, SE, UP);
// Apply size attributes
- if (L->getHeader()->getParent()->optForSize()) {
+ bool OptForSize = L->getHeader()->getParent()->hasOptSize() ||
+ llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI);
+ if (OptForSize) {
UP.Threshold = UP.OptSizeThreshold;
UP.PartialThreshold = UP.PartialOptSizeThreshold;
+ UP.MaxPercentThresholdBoost = 100;
}
// Apply any user values specified by cl::opt
@@ -964,8 +976,10 @@ bool llvm::computeUnrollCount(
static LoopUnrollResult tryToUnrollLoop(
Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
const TargetTransformInfo &TTI, AssumptionCache &AC,
- OptimizationRemarkEmitter &ORE, bool PreserveLCSSA, int OptLevel,
- bool OnlyWhenForced, Optional<unsigned> ProvidedCount,
+ OptimizationRemarkEmitter &ORE,
+ BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
+ bool PreserveLCSSA, int OptLevel,
+ bool OnlyWhenForced, bool ForgetAllSCEV, Optional<unsigned> ProvidedCount,
Optional<unsigned> ProvidedThreshold, Optional<bool> ProvidedAllowPartial,
Optional<bool> ProvidedRuntime, Optional<bool> ProvidedUpperBound,
Optional<bool> ProvidedAllowPeeling) {
@@ -986,15 +1000,19 @@ static LoopUnrollResult tryToUnrollLoop(
if (OnlyWhenForced && !(TM & TM_Enable))
return LoopUnrollResult::Unmodified;
+ bool OptForSize = L->getHeader()->getParent()->hasOptSize();
unsigned NumInlineCandidates;
bool NotDuplicatable;
bool Convergent;
TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences(
- L, SE, TTI, OptLevel, ProvidedThreshold, ProvidedCount,
+ L, SE, TTI, BFI, PSI, OptLevel, ProvidedThreshold, ProvidedCount,
ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound,
ProvidedAllowPeeling);
- // Exit early if unrolling is disabled.
- if (UP.Threshold == 0 && (!UP.Partial || UP.PartialThreshold == 0))
+
+ // Exit early if unrolling is disabled. For OptForSize, we pick the loop size
+ // as threshold later on.
+ if (UP.Threshold == 0 && (!UP.Partial || UP.PartialThreshold == 0) &&
+ !OptForSize)
return LoopUnrollResult::Unmodified;
SmallPtrSet<const Value *, 32> EphValues;
@@ -1009,6 +1027,12 @@ static LoopUnrollResult tryToUnrollLoop(
<< " instructions.\n");
return LoopUnrollResult::Unmodified;
}
+
+ // When optimizing for size, use LoopSize as threshold, to (fully) unroll
+ // loops, if it does not increase code size.
+ if (OptForSize)
+ UP.Threshold = std::max(UP.Threshold, LoopSize);
+
if (NumInlineCandidates != 0) {
LLVM_DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n");
return LoopUnrollResult::Unmodified;
@@ -1081,8 +1105,10 @@ static LoopUnrollResult tryToUnrollLoop(
// Unroll the loop.
Loop *RemainderLoop = nullptr;
LoopUnrollResult UnrollResult = UnrollLoop(
- L, UP.Count, TripCount, UP.Force, UP.Runtime, UP.AllowExpensiveTripCount,
- UseUpperBound, MaxOrZero, TripMultiple, UP.PeelCount, UP.UnrollRemainder,
+ L,
+ {UP.Count, TripCount, UP.Force, UP.Runtime, UP.AllowExpensiveTripCount,
+ UseUpperBound, MaxOrZero, TripMultiple, UP.PeelCount, UP.UnrollRemainder,
+ ForgetAllSCEV},
LI, &SE, &DT, &AC, &ORE, PreserveLCSSA, &RemainderLoop);
if (UnrollResult == LoopUnrollResult::Unmodified)
return LoopUnrollResult::Unmodified;
@@ -1132,6 +1158,11 @@ public:
/// metadata are considered. All other loops are skipped.
bool OnlyWhenForced;
+ /// If false, when SCEV is invalidated, only forget everything in the
+ /// top-most loop (call forgetTopMostLoop), of the loop being processed.
+ /// Otherwise, forgetAllLoops and rebuild when needed next.
+ bool ForgetAllSCEV;
+
Optional<unsigned> ProvidedCount;
Optional<unsigned> ProvidedThreshold;
Optional<bool> ProvidedAllowPartial;
@@ -1140,15 +1171,16 @@ public:
Optional<bool> ProvidedAllowPeeling;
LoopUnroll(int OptLevel = 2, bool OnlyWhenForced = false,
- Optional<unsigned> Threshold = None,
+ bool ForgetAllSCEV = false, Optional<unsigned> Threshold = None,
Optional<unsigned> Count = None,
Optional<bool> AllowPartial = None, Optional<bool> Runtime = None,
Optional<bool> UpperBound = None,
Optional<bool> AllowPeeling = None)
: LoopPass(ID), OptLevel(OptLevel), OnlyWhenForced(OnlyWhenForced),
- ProvidedCount(std::move(Count)), ProvidedThreshold(Threshold),
- ProvidedAllowPartial(AllowPartial), ProvidedRuntime(Runtime),
- ProvidedUpperBound(UpperBound), ProvidedAllowPeeling(AllowPeeling) {
+ ForgetAllSCEV(ForgetAllSCEV), ProvidedCount(std::move(Count)),
+ ProvidedThreshold(Threshold), ProvidedAllowPartial(AllowPartial),
+ ProvidedRuntime(Runtime), ProvidedUpperBound(UpperBound),
+ ProvidedAllowPeeling(AllowPeeling) {
initializeLoopUnrollPass(*PassRegistry::getPassRegistry());
}
@@ -1171,9 +1203,10 @@ public:
bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
LoopUnrollResult Result = tryToUnrollLoop(
- L, DT, LI, SE, TTI, AC, ORE, PreserveLCSSA, OptLevel, OnlyWhenForced,
- ProvidedCount, ProvidedThreshold, ProvidedAllowPartial, ProvidedRuntime,
- ProvidedUpperBound, ProvidedAllowPeeling);
+ L, DT, LI, SE, TTI, AC, ORE, nullptr, nullptr,
+ PreserveLCSSA, OptLevel, OnlyWhenForced,
+ ForgetAllSCEV, ProvidedCount, ProvidedThreshold, ProvidedAllowPartial,
+ ProvidedRuntime, ProvidedUpperBound, ProvidedAllowPeeling);
if (Result == LoopUnrollResult::FullyUnrolled)
LPM.markLoopAsDeleted(*L);
@@ -1203,14 +1236,14 @@ INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
Pass *llvm::createLoopUnrollPass(int OptLevel, bool OnlyWhenForced,
- int Threshold, int Count, int AllowPartial,
- int Runtime, int UpperBound,
+ bool ForgetAllSCEV, int Threshold, int Count,
+ int AllowPartial, int Runtime, int UpperBound,
int AllowPeeling) {
// TODO: It would make more sense for this function to take the optionals
// directly, but that's dangerous since it would silently break out of tree
// callers.
return new LoopUnroll(
- OptLevel, OnlyWhenForced,
+ OptLevel, OnlyWhenForced, ForgetAllSCEV,
Threshold == -1 ? None : Optional<unsigned>(Threshold),
Count == -1 ? None : Optional<unsigned>(Count),
AllowPartial == -1 ? None : Optional<bool>(AllowPartial),
@@ -1219,8 +1252,10 @@ Pass *llvm::createLoopUnrollPass(int OptLevel, bool OnlyWhenForced,
AllowPeeling == -1 ? None : Optional<bool>(AllowPeeling));
}
-Pass *llvm::createSimpleLoopUnrollPass(int OptLevel, bool OnlyWhenForced) {
- return createLoopUnrollPass(OptLevel, OnlyWhenForced, -1, -1, 0, 0, 0, 0);
+Pass *llvm::createSimpleLoopUnrollPass(int OptLevel, bool OnlyWhenForced,
+ bool ForgetAllSCEV) {
+ return createLoopUnrollPass(OptLevel, OnlyWhenForced, ForgetAllSCEV, -1, -1,
+ 0, 0, 0, 0);
}
PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM,
@@ -1250,8 +1285,9 @@ PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM,
bool Changed =
tryToUnrollLoop(&L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, *ORE,
+ /*BFI*/ nullptr, /*PSI*/ nullptr,
/*PreserveLCSSA*/ true, OptLevel, OnlyWhenForced,
- /*Count*/ None,
+ ForgetSCEV, /*Count*/ None,
/*Threshold*/ None, /*AllowPartial*/ false,
/*Runtime*/ false, /*UpperBound*/ false,
/*AllowPeeling*/ false) != LoopUnrollResult::Unmodified;
@@ -1352,6 +1388,8 @@ PreservedAnalyses LoopUnrollPass::run(Function &F,
AM.getResult<ModuleAnalysisManagerFunctionProxy>(F).getManager();
ProfileSummaryInfo *PSI =
MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
+ auto *BFI = (PSI && PSI->hasProfileSummary()) ?
+ &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;
bool Changed = false;
@@ -1361,7 +1399,8 @@ PreservedAnalyses LoopUnrollPass::run(Function &F,
// will simplify all loops, regardless of whether anything end up being
// unrolled.
for (auto &L : LI) {
- Changed |= simplifyLoop(L, &DT, &LI, &SE, &AC, false /* PreserveLCSSA */);
+ Changed |=
+ simplifyLoop(L, &DT, &LI, &SE, &AC, nullptr, false /* PreserveLCSSA */);
Changed |= formLCSSARecursively(*L, DT, &LI, &SE);
}
@@ -1387,9 +1426,9 @@ PreservedAnalyses LoopUnrollPass::run(Function &F,
// The API here is quite complex to call and we allow to select some
// flavors of unrolling during construction time (by setting UnrollOpts).
LoopUnrollResult Result = tryToUnrollLoop(
- &L, DT, &LI, SE, TTI, AC, ORE,
+ &L, DT, &LI, SE, TTI, AC, ORE, BFI, PSI,
/*PreserveLCSSA*/ true, UnrollOpts.OptLevel, UnrollOpts.OnlyWhenForced,
- /*Count*/ None,
+ UnrollOpts.ForgetSCEV, /*Count*/ None,
/*Threshold*/ None, UnrollOpts.AllowPartial, UnrollOpts.AllowRuntime,
UnrollOpts.AllowUpperBound, LocalAllowPeeling);
Changed |= Result != LoopUnrollResult::Unmodified;
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index 4a089dfa7dbf..b5b8e720069c 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -1,9 +1,8 @@
//===- LoopUnswitch.cpp - Hoist loop-invariant conditionals in loop -------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -658,7 +657,7 @@ bool LoopUnswitch::processCurrentLoop() {
}
// Do not do non-trivial unswitch while optimizing for size.
- // FIXME: Use Function::optForSize().
+ // FIXME: Use Function::hasOptSize().
if (OptimizeForSize ||
loopHeader->getParent()->hasFnAttribute(Attribute::OptimizeForSize))
return false;
@@ -1405,8 +1404,8 @@ static void RemoveFromWorklist(Instruction *I,
/// When we find that I really equals V, remove I from the
/// program, replacing all uses with V and update the worklist.
static void ReplaceUsesOfWith(Instruction *I, Value *V,
- std::vector<Instruction*> &Worklist,
- Loop *L, LPPassManager *LPM) {
+ std::vector<Instruction *> &Worklist, Loop *L,
+ LPPassManager *LPM, MemorySSAUpdater *MSSAU) {
LLVM_DEBUG(dbgs() << "Replace with '" << *V << "': " << *I << "\n");
// Add uses to the worklist, which may be dead now.
@@ -1420,8 +1419,11 @@ static void ReplaceUsesOfWith(Instruction *I, Value *V,
LPM->deleteSimpleAnalysisValue(I, L);
RemoveFromWorklist(I, Worklist);
I->replaceAllUsesWith(V);
- if (!I->mayHaveSideEffects())
+ if (!I->mayHaveSideEffects()) {
+ if (MSSAU)
+ MSSAU->removeMemoryAccess(I);
I->eraseFromParent();
+ }
++NumSimplify;
}
@@ -1548,8 +1550,7 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
ConstantInt::getTrue(Context), NewSISucc);
// Release the PHI operands for this edge.
for (PHINode &PN : NewSISucc->phis())
- PN.setIncomingValue(PN.getBasicBlockIndex(Switch),
- UndefValue::get(PN.getType()));
+ PN.setIncomingValueForBlock(Switch, UndefValue::get(PN.getType()));
// Tell the domtree about the new block. We don't fully update the
// domtree here -- instead we force it to do a full recomputation
// after the pass is complete -- but we do need to inform it of
@@ -1596,7 +1597,7 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
// 'false'. TODO: update the domtree properly so we can pass it here.
if (Value *V = SimplifyInstruction(I, DL))
if (LI->replacementPreservesLCSSAForm(I, V)) {
- ReplaceUsesOfWith(I, V, Worklist, L, LPM);
+ ReplaceUsesOfWith(I, V, Worklist, L, LPM, MSSAU.get());
continue;
}
@@ -1616,7 +1617,8 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
// Resolve any single entry PHI nodes in Succ.
while (PHINode *PN = dyn_cast<PHINode>(Succ->begin()))
- ReplaceUsesOfWith(PN, PN->getIncomingValue(0), Worklist, L, LPM);
+ ReplaceUsesOfWith(PN, PN->getIncomingValue(0), Worklist, L, LPM,
+ MSSAU.get());
// If Succ has any successors with PHI nodes, update them to have
// entries coming from Pred instead of Succ.
diff --git a/lib/Transforms/Scalar/LoopVersioningLICM.cpp b/lib/Transforms/Scalar/LoopVersioningLICM.cpp
index 83861b98fbd8..896dd8bcb922 100644
--- a/lib/Transforms/Scalar/LoopVersioningLICM.cpp
+++ b/lib/Transforms/Scalar/LoopVersioningLICM.cpp
@@ -1,9 +1,8 @@
//===- LoopVersioningLICM.cpp - LICM Loop Versioning ----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -357,14 +356,22 @@ bool LoopVersioningLICM::legalLoopMemoryAccesses() {
/// 1) Check all load store in loop body are non atomic & non volatile.
/// 2) Check function call safety, by ensuring its not accessing memory.
/// 3) Loop body shouldn't have any may throw instruction.
+/// 4) Loop body shouldn't have any convergent or noduplicate instructions.
bool LoopVersioningLICM::instructionSafeForVersioning(Instruction *I) {
assert(I != nullptr && "Null instruction found!");
// Check function call safety
- if (auto *Call = dyn_cast<CallBase>(I))
+ if (auto *Call = dyn_cast<CallBase>(I)) {
+ if (Call->isConvergent() || Call->cannotDuplicate()) {
+ LLVM_DEBUG(dbgs() << " Convergent call site found.\n");
+ return false;
+ }
+
if (!AA->doesNotAccessMemory(Call)) {
LLVM_DEBUG(dbgs() << " Unsafe call site found.\n");
return false;
}
+ }
+
// Avoid loops with possiblity of throw
if (I->mayThrow()) {
LLVM_DEBUG(dbgs() << " May throw instruction found in loop body\n");
diff --git a/lib/Transforms/Scalar/LowerAtomic.cpp b/lib/Transforms/Scalar/LowerAtomic.cpp
index c165c5ece95c..e076424d9042 100644
--- a/lib/Transforms/Scalar/LowerAtomic.cpp
+++ b/lib/Transforms/Scalar/LowerAtomic.cpp
@@ -1,9 +1,8 @@
//===- LowerAtomic.cpp - Lower atomic intrinsics --------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -27,7 +26,7 @@ static bool LowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI) {
Value *Cmp = CXI->getCompareOperand();
Value *Val = CXI->getNewValOperand();
- LoadInst *Orig = Builder.CreateLoad(Ptr);
+ LoadInst *Orig = Builder.CreateLoad(Val->getType(), Ptr);
Value *Equal = Builder.CreateICmpEQ(Orig, Cmp);
Value *Res = Builder.CreateSelect(Equal, Val, Orig);
Builder.CreateStore(Res, Ptr);
@@ -45,7 +44,7 @@ static bool LowerAtomicRMWInst(AtomicRMWInst *RMWI) {
Value *Ptr = RMWI->getPointerOperand();
Value *Val = RMWI->getValOperand();
- LoadInst *Orig = Builder.CreateLoad(Ptr);
+ LoadInst *Orig = Builder.CreateLoad(Val->getType(), Ptr);
Value *Res = nullptr;
switch (RMWI->getOperation()) {
@@ -87,6 +86,12 @@ static bool LowerAtomicRMWInst(AtomicRMWInst *RMWI) {
Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Val),
Orig, Val);
break;
+ case AtomicRMWInst::FAdd:
+ Res = Builder.CreateFAdd(Orig, Val);
+ break;
+ case AtomicRMWInst::FSub:
+ Res = Builder.CreateFSub(Orig, Val);
+ break;
}
Builder.CreateStore(Res, Ptr);
RMWI->replaceAllUsesWith(Orig);
diff --git a/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp b/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
index 68bfa0030395..0d67c0d740ec 100644
--- a/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
+++ b/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
@@ -1,9 +1,8 @@
//===- LowerExpectIntrinsic.cpp - Lower expect intrinsic ------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp b/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp
index 4867b33d671f..9489e01774d6 100644
--- a/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp
+++ b/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp
@@ -1,9 +1,8 @@
//===- LowerGuardIntrinsic.cpp - Lower the guard intrinsic ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/Scalar/LowerWidenableCondition.cpp b/lib/Transforms/Scalar/LowerWidenableCondition.cpp
new file mode 100644
index 000000000000..5342f2ddcb6b
--- /dev/null
+++ b/lib/Transforms/Scalar/LowerWidenableCondition.cpp
@@ -0,0 +1,85 @@
+//===- LowerWidenableCondition.cpp - Lower the guard intrinsic ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass lowers the llvm.widenable.condition intrinsic to default value
+// which is i1 true.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar/LowerWidenableCondition.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/GuardUtils.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/GuardUtils.h"
+
+using namespace llvm;
+
+namespace {
+struct LowerWidenableConditionLegacyPass : public FunctionPass {
+ static char ID;
+ LowerWidenableConditionLegacyPass() : FunctionPass(ID) {
+ initializeLowerWidenableConditionLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override;
+};
+}
+
+static bool lowerWidenableCondition(Function &F) {
+ // Check if we can cheaply rule out the possibility of not having any work to
+ // do.
+ auto *WCDecl = F.getParent()->getFunction(
+ Intrinsic::getName(Intrinsic::experimental_widenable_condition));
+ if (!WCDecl || WCDecl->use_empty())
+ return false;
+
+ using namespace llvm::PatternMatch;
+ SmallVector<CallInst *, 8> ToLower;
+ for (auto &I : instructions(F))
+ if (match(&I, m_Intrinsic<Intrinsic::experimental_widenable_condition>()))
+ ToLower.push_back(cast<CallInst>(&I));
+
+ if (ToLower.empty())
+ return false;
+
+ for (auto *CI : ToLower) {
+ CI->replaceAllUsesWith(ConstantInt::getTrue(CI->getContext()));
+ CI->eraseFromParent();
+ }
+ return true;
+}
+
+bool LowerWidenableConditionLegacyPass::runOnFunction(Function &F) {
+ return lowerWidenableCondition(F);
+}
+
+char LowerWidenableConditionLegacyPass::ID = 0;
+INITIALIZE_PASS(LowerWidenableConditionLegacyPass, "lower-widenable-condition",
+ "Lower the widenable condition to default true value", false,
+ false)
+
+Pass *llvm::createLowerWidenableConditionPass() {
+ return new LowerWidenableConditionLegacyPass();
+}
+
+PreservedAnalyses LowerWidenableConditionPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ if (lowerWidenableCondition(F))
+ return PreservedAnalyses::none();
+
+ return PreservedAnalyses::all();
+}
diff --git a/lib/Transforms/Scalar/MakeGuardsExplicit.cpp b/lib/Transforms/Scalar/MakeGuardsExplicit.cpp
index 1ba3994eba0e..789232e0f5ce 100644
--- a/lib/Transforms/Scalar/MakeGuardsExplicit.cpp
+++ b/lib/Transforms/Scalar/MakeGuardsExplicit.cpp
@@ -1,9 +1,8 @@
//===- MakeGuardsExplicit.cpp - Turn guard intrinsics into guard branches -===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index ced923d6973d..5a055139be4f 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -1,9 +1,8 @@
//===- MemCpyOptimizer.cpp - Optimize use of memcpy and friends -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -279,8 +278,8 @@ void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
unsigned Alignment, Instruction *Inst) {
int64_t End = Start+Size;
- range_iterator I = std::lower_bound(Ranges.begin(), Ranges.end(), Start,
- [](const MemsetRange &LHS, int64_t RHS) { return LHS.End < RHS; });
+ range_iterator I = partition_point(
+ Ranges, [=](const MemsetRange &O) { return O.End < Start; });
// We now know that I == E, in which case we didn't find anything to merge
// with, or that Start <= I->End. If End < I->Start or I == E, then we need
@@ -413,7 +412,7 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
if (!NextStore->isSimple()) break;
// Check to see if this stored value is of the same byte-splattable value.
- Value *StoredByte = isBytewiseValue(NextStore->getOperand(0));
+ Value *StoredByte = isBytewiseValue(NextStore->getOperand(0), DL);
if (isa<UndefValue>(ByteVal) && StoredByte)
ByteVal = StoredByte;
if (ByteVal != StoredByte)
@@ -750,7 +749,7 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
// byte at a time like "0" or "-1" or any width, as well as things like
// 0xA0A0A0A0 and 0.0.
auto *V = SI->getOperand(0);
- if (Value *ByteVal = isBytewiseValue(V)) {
+ if (Value *ByteVal = isBytewiseValue(V, DL)) {
if (Instruction *I = tryMergingIntoMemset(SI, SI->getPointerOperand(),
ByteVal)) {
BBI = I->getIterator(); // Don't invalidate iterator.
@@ -1135,8 +1134,10 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
Value *SizeDiff = Builder.CreateSub(DestSize, SrcSize);
Value *MemsetLen = Builder.CreateSelect(
Ule, ConstantInt::getNullValue(DestSize->getType()), SizeDiff);
- Builder.CreateMemSet(Builder.CreateGEP(Dest, SrcSize), MemSet->getOperand(1),
- MemsetLen, Align);
+ Builder.CreateMemSet(
+ Builder.CreateGEP(Dest->getType()->getPointerElementType(), Dest,
+ SrcSize),
+ MemSet->getOperand(1), MemsetLen, Align);
MD->removeInstruction(MemSet);
MemSet->eraseFromParent();
@@ -1228,7 +1229,8 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M) {
// If copying from a constant, try to turn the memcpy into a memset.
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(M->getSource()))
if (GV->isConstant() && GV->hasDefinitiveInitializer())
- if (Value *ByteVal = isBytewiseValue(GV->getInitializer())) {
+ if (Value *ByteVal = isBytewiseValue(GV->getInitializer(),
+ M->getModule()->getDataLayout())) {
IRBuilder<> Builder(M);
Builder.CreateMemSet(M->getRawDest(), ByteVal, M->getLength(),
M->getDestAlignment(), false);
diff --git a/lib/Transforms/Scalar/MergeICmps.cpp b/lib/Transforms/Scalar/MergeICmps.cpp
index 69fd8b163a07..3d047a193267 100644
--- a/lib/Transforms/Scalar/MergeICmps.cpp
+++ b/lib/Transforms/Scalar/MergeICmps.cpp
@@ -1,9 +1,8 @@
//===- MergeICmps.cpp - Optimize chains of integer comparisons ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -11,29 +10,54 @@
// later typically inlined as a chain of efficient hardware comparisons). This
// typically benefits c++ member or nonmember operator==().
//
-// The basic idea is to replace a larger chain of integer comparisons loaded
-// from contiguous memory locations into a smaller chain of such integer
+// The basic idea is to replace a longer chain of integer comparisons loaded
+// from contiguous memory locations into a shorter chain of larger integer
// comparisons. Benefits are double:
// - There are less jumps, and therefore less opportunities for mispredictions
// and I-cache misses.
// - Code size is smaller, both because jumps are removed and because the
// encoding of a 2*n byte compare is smaller than that of two n-byte
// compares.
-
+//
+// Example:
+//
+// struct S {
+// int a;
+// char b;
+// char c;
+// uint16_t d;
+// bool operator==(const S& o) const {
+// return a == o.a && b == o.b && c == o.c && d == o.d;
+// }
+// };
+//
+// Is optimized as :
+//
+// bool S::operator==(const S& o) const {
+// return memcmp(this, &o, 8) == 0;
+// }
+//
+// Which will later be expanded (ExpandMemCmp) as a single 8-bytes icmp.
+//
//===----------------------------------------------------------------------===//
-#include <algorithm>
-#include <numeric>
-#include <utility>
-#include <vector>
+#include "llvm/Transforms/Scalar/MergeICmps.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
+#include <algorithm>
+#include <numeric>
+#include <utility>
+#include <vector>
using namespace llvm;
@@ -50,76 +74,109 @@ static bool isSimpleLoadOrStore(const Instruction *I) {
return false;
}
-// A BCE atom.
+// A BCE atom "Binary Compare Expression Atom" represents an integer load
+// that is a constant offset from a base value, e.g. `a` or `o.c` in the example
+// at the top.
struct BCEAtom {
- BCEAtom() : GEP(nullptr), LoadI(nullptr), Offset() {}
-
- const Value *Base() const { return GEP ? GEP->getPointerOperand() : nullptr; }
-
+ BCEAtom() = default;
+ BCEAtom(GetElementPtrInst *GEP, LoadInst *LoadI, int BaseId, APInt Offset)
+ : GEP(GEP), LoadI(LoadI), BaseId(BaseId), Offset(Offset) {}
+
+ BCEAtom(const BCEAtom &) = delete;
+ BCEAtom &operator=(const BCEAtom &) = delete;
+
+ BCEAtom(BCEAtom &&that) = default;
+ BCEAtom &operator=(BCEAtom &&that) {
+ if (this == &that)
+ return *this;
+ GEP = that.GEP;
+ LoadI = that.LoadI;
+ BaseId = that.BaseId;
+ Offset = std::move(that.Offset);
+ return *this;
+ }
+
+ // We want to order BCEAtoms by (Base, Offset). However we cannot use
+ // the pointer values for Base because these are non-deterministic.
+ // To make sure that the sort order is stable, we first assign to each atom
+ // base value an index based on its order of appearance in the chain of
+ // comparisons. We call this index `BaseOrdering`. For example, for:
+ // b[3] == c[2] && a[1] == d[1] && b[4] == c[3]
+ // | block 1 | | block 2 | | block 3 |
+ // b gets assigned index 0 and a index 1, because b appears as LHS in block 1,
+ // which is before block 2.
+ // We then sort by (BaseOrdering[LHS.Base()], LHS.Offset), which is stable.
bool operator<(const BCEAtom &O) const {
- assert(Base() && "invalid atom");
- assert(O.Base() && "invalid atom");
- // Just ordering by (Base(), Offset) is sufficient. However because this
- // means that the ordering will depend on the addresses of the base
- // values, which are not reproducible from run to run. To guarantee
- // stability, we use the names of the values if they exist; we sort by:
- // (Base.getName(), Base(), Offset).
- const int NameCmp = Base()->getName().compare(O.Base()->getName());
- if (NameCmp == 0) {
- if (Base() == O.Base()) {
- return Offset.slt(O.Offset);
- }
- return Base() < O.Base();
- }
- return NameCmp < 0;
+ return BaseId != O.BaseId ? BaseId < O.BaseId : Offset.slt(O.Offset);
}
- GetElementPtrInst *GEP;
- LoadInst *LoadI;
+ GetElementPtrInst *GEP = nullptr;
+ LoadInst *LoadI = nullptr;
+ unsigned BaseId = 0;
APInt Offset;
};
+// A class that assigns increasing ids to values in the order in which they are
+// seen. See comment in `BCEAtom::operator<()``.
+class BaseIdentifier {
+public:
+ // Returns the id for value `Base`, after assigning one if `Base` has not been
+ // seen before.
+ int getBaseId(const Value *Base) {
+ assert(Base && "invalid base");
+ const auto Insertion = BaseToIndex.try_emplace(Base, Order);
+ if (Insertion.second)
+ ++Order;
+ return Insertion.first->second;
+ }
+
+private:
+ unsigned Order = 1;
+ DenseMap<const Value*, int> BaseToIndex;
+};
+
// If this value is a load from a constant offset w.r.t. a base address, and
// there are no other users of the load or address, returns the base address and
// the offset.
-BCEAtom visitICmpLoadOperand(Value *const Val) {
- BCEAtom Result;
- if (auto *const LoadI = dyn_cast<LoadInst>(Val)) {
- LLVM_DEBUG(dbgs() << "load\n");
- if (LoadI->isUsedOutsideOfBlock(LoadI->getParent())) {
- LLVM_DEBUG(dbgs() << "used outside of block\n");
- return {};
- }
- // Do not optimize atomic loads to non-atomic memcmp
- if (!LoadI->isSimple()) {
- LLVM_DEBUG(dbgs() << "volatile or atomic\n");
- return {};
- }
- Value *const Addr = LoadI->getOperand(0);
- if (auto *const GEP = dyn_cast<GetElementPtrInst>(Addr)) {
- LLVM_DEBUG(dbgs() << "GEP\n");
- if (GEP->isUsedOutsideOfBlock(LoadI->getParent())) {
- LLVM_DEBUG(dbgs() << "used outside of block\n");
- return {};
- }
- const auto &DL = GEP->getModule()->getDataLayout();
- if (!isDereferenceablePointer(GEP, DL)) {
- LLVM_DEBUG(dbgs() << "not dereferenceable\n");
- // We need to make sure that we can do comparison in any order, so we
- // require memory to be unconditionnally dereferencable.
- return {};
- }
- Result.Offset = APInt(DL.getPointerTypeSizeInBits(GEP->getType()), 0);
- if (GEP->accumulateConstantOffset(DL, Result.Offset)) {
- Result.GEP = GEP;
- Result.LoadI = LoadI;
- }
- }
+BCEAtom visitICmpLoadOperand(Value *const Val, BaseIdentifier &BaseId) {
+ auto *const LoadI = dyn_cast<LoadInst>(Val);
+ if (!LoadI)
+ return {};
+ LLVM_DEBUG(dbgs() << "load\n");
+ if (LoadI->isUsedOutsideOfBlock(LoadI->getParent())) {
+ LLVM_DEBUG(dbgs() << "used outside of block\n");
+ return {};
+ }
+ // Do not optimize atomic loads to non-atomic memcmp
+ if (!LoadI->isSimple()) {
+ LLVM_DEBUG(dbgs() << "volatile or atomic\n");
+ return {};
}
- return Result;
+ Value *const Addr = LoadI->getOperand(0);
+ auto *const GEP = dyn_cast<GetElementPtrInst>(Addr);
+ if (!GEP)
+ return {};
+ LLVM_DEBUG(dbgs() << "GEP\n");
+ if (GEP->isUsedOutsideOfBlock(LoadI->getParent())) {
+ LLVM_DEBUG(dbgs() << "used outside of block\n");
+ return {};
+ }
+ const auto &DL = GEP->getModule()->getDataLayout();
+ if (!isDereferenceablePointer(GEP, LoadI->getType(), DL)) {
+ LLVM_DEBUG(dbgs() << "not dereferenceable\n");
+ // We need to make sure that we can do comparison in any order, so we
+ // require memory to be unconditionnally dereferencable.
+ return {};
+ }
+ APInt Offset = APInt(DL.getPointerTypeSizeInBits(GEP->getType()), 0);
+ if (!GEP->accumulateConstantOffset(DL, Offset))
+ return {};
+ return BCEAtom(GEP, LoadI, BaseId.getBaseId(GEP->getPointerOperand()),
+ Offset);
}
-// A basic block with a comparison between two BCE atoms.
+// A basic block with a comparison between two BCE atoms, e.g. `a == o.a` in the
+// example at the top.
// The block might do extra work besides the atom comparison, in which case
// doesOtherWork() returns true. Under some conditions, the block can be
// split into the atom comparison part and the "other work" part
@@ -133,13 +190,11 @@ class BCECmpBlock {
BCECmpBlock() {}
BCECmpBlock(BCEAtom L, BCEAtom R, int SizeBits)
- : Lhs_(L), Rhs_(R), SizeBits_(SizeBits) {
+ : Lhs_(std::move(L)), Rhs_(std::move(R)), SizeBits_(SizeBits) {
if (Rhs_ < Lhs_) std::swap(Rhs_, Lhs_);
}
- bool IsValid() const {
- return Lhs_.Base() != nullptr && Rhs_.Base() != nullptr;
- }
+ bool IsValid() const { return Lhs_.BaseId != 0 && Rhs_.BaseId != 0; }
// Assert the block is consistent: If valid, it should also have
// non-null members besides Lhs_ and Rhs_.
@@ -160,19 +215,19 @@ class BCECmpBlock {
// Returns true if the non-BCE-cmp instructions can be separated from BCE-cmp
// instructions in the block.
- bool canSplit(AliasAnalysis *AA) const;
+ bool canSplit(AliasAnalysis &AA) const;
// Return true if this all the relevant instructions in the BCE-cmp-block can
// be sunk below this instruction. By doing this, we know we can separate the
// BCE-cmp-block instructions from the non-BCE-cmp-block instructions in the
// block.
bool canSinkBCECmpInst(const Instruction *, DenseSet<Instruction *> &,
- AliasAnalysis *AA) const;
+ AliasAnalysis &AA) const;
// We can separate the BCE-cmp-block instructions and the non-BCE-cmp-block
// instructions. Split the old block and move all non-BCE-cmp-insts into the
// new parent block.
- void split(BasicBlock *NewParent, AliasAnalysis *AA) const;
+ void split(BasicBlock *NewParent, AliasAnalysis &AA) const;
// The basic block where this comparison happens.
BasicBlock *BB = nullptr;
@@ -191,7 +246,7 @@ private:
bool BCECmpBlock::canSinkBCECmpInst(const Instruction *Inst,
DenseSet<Instruction *> &BlockInsts,
- AliasAnalysis *AA) const {
+ AliasAnalysis &AA) const {
// If this instruction has side effects and its in middle of the BCE cmp block
// instructions, then bail for now.
if (Inst->mayHaveSideEffects()) {
@@ -201,9 +256,9 @@ bool BCECmpBlock::canSinkBCECmpInst(const Instruction *Inst,
// Disallow stores that might alias the BCE operands
MemoryLocation LLoc = MemoryLocation::get(Lhs_.LoadI);
MemoryLocation RLoc = MemoryLocation::get(Rhs_.LoadI);
- if (isModSet(AA->getModRefInfo(Inst, LLoc)) ||
- isModSet(AA->getModRefInfo(Inst, RLoc)))
- return false;
+ if (isModSet(AA.getModRefInfo(Inst, LLoc)) ||
+ isModSet(AA.getModRefInfo(Inst, RLoc)))
+ return false;
}
// Make sure this instruction does not use any of the BCE cmp block
// instructions as operand.
@@ -214,7 +269,7 @@ bool BCECmpBlock::canSinkBCECmpInst(const Instruction *Inst,
return true;
}
-void BCECmpBlock::split(BasicBlock *NewParent, AliasAnalysis *AA) const {
+void BCECmpBlock::split(BasicBlock *NewParent, AliasAnalysis &AA) const {
DenseSet<Instruction *> BlockInsts(
{Lhs_.GEP, Rhs_.GEP, Lhs_.LoadI, Rhs_.LoadI, CmpI, BranchI});
llvm::SmallVector<Instruction *, 4> OtherInsts;
@@ -234,7 +289,7 @@ void BCECmpBlock::split(BasicBlock *NewParent, AliasAnalysis *AA) const {
}
}
-bool BCECmpBlock::canSplit(AliasAnalysis *AA) const {
+bool BCECmpBlock::canSplit(AliasAnalysis &AA) const {
DenseSet<Instruction *> BlockInsts(
{Lhs_.GEP, Rhs_.GEP, Lhs_.LoadI, Rhs_.LoadI, CmpI, BranchI});
for (Instruction &Inst : *BB) {
@@ -265,7 +320,8 @@ bool BCECmpBlock::doesOtherWork() const {
// Visit the given comparison. If this is a comparison between two valid
// BCE atoms, returns the comparison.
BCECmpBlock visitICmp(const ICmpInst *const CmpI,
- const ICmpInst::Predicate ExpectedPredicate) {
+ const ICmpInst::Predicate ExpectedPredicate,
+ BaseIdentifier &BaseId) {
// The comparison can only be used once:
// - For intermediate blocks, as a branch condition.
// - For the final block, as an incoming value for the Phi.
@@ -275,25 +331,27 @@ BCECmpBlock visitICmp(const ICmpInst *const CmpI,
LLVM_DEBUG(dbgs() << "cmp has several uses\n");
return {};
}
- if (CmpI->getPredicate() == ExpectedPredicate) {
- LLVM_DEBUG(dbgs() << "cmp "
- << (ExpectedPredicate == ICmpInst::ICMP_EQ ? "eq" : "ne")
- << "\n");
- auto Lhs = visitICmpLoadOperand(CmpI->getOperand(0));
- if (!Lhs.Base()) return {};
- auto Rhs = visitICmpLoadOperand(CmpI->getOperand(1));
- if (!Rhs.Base()) return {};
- const auto &DL = CmpI->getModule()->getDataLayout();
- return BCECmpBlock(std::move(Lhs), std::move(Rhs),
- DL.getTypeSizeInBits(CmpI->getOperand(0)->getType()));
- }
- return {};
+ if (CmpI->getPredicate() != ExpectedPredicate)
+ return {};
+ LLVM_DEBUG(dbgs() << "cmp "
+ << (ExpectedPredicate == ICmpInst::ICMP_EQ ? "eq" : "ne")
+ << "\n");
+ auto Lhs = visitICmpLoadOperand(CmpI->getOperand(0), BaseId);
+ if (!Lhs.BaseId)
+ return {};
+ auto Rhs = visitICmpLoadOperand(CmpI->getOperand(1), BaseId);
+ if (!Rhs.BaseId)
+ return {};
+ const auto &DL = CmpI->getModule()->getDataLayout();
+ return BCECmpBlock(std::move(Lhs), std::move(Rhs),
+ DL.getTypeSizeInBits(CmpI->getOperand(0)->getType()));
}
// Visit the given comparison block. If this is a comparison between two valid
// BCE atoms, returns the comparison.
BCECmpBlock visitCmpBlock(Value *const Val, BasicBlock *const Block,
- const BasicBlock *const PhiBlock) {
+ const BasicBlock *const PhiBlock,
+ BaseIdentifier &BaseId) {
if (Block->empty()) return {};
auto *const BranchI = dyn_cast<BranchInst>(Block->getTerminator());
if (!BranchI) return {};
@@ -306,7 +364,7 @@ BCECmpBlock visitCmpBlock(Value *const Val, BasicBlock *const Block,
auto *const CmpI = dyn_cast<ICmpInst>(Val);
if (!CmpI) return {};
LLVM_DEBUG(dbgs() << "icmp\n");
- auto Result = visitICmp(CmpI, ICmpInst::ICMP_EQ);
+ auto Result = visitICmp(CmpI, ICmpInst::ICMP_EQ, BaseId);
Result.CmpI = CmpI;
Result.BranchI = BranchI;
return Result;
@@ -323,7 +381,8 @@ BCECmpBlock visitCmpBlock(Value *const Val, BasicBlock *const Block,
assert(BranchI->getNumSuccessors() == 2 && "expecting a cond branch");
BasicBlock *const FalseBlock = BranchI->getSuccessor(1);
auto Result = visitICmp(
- CmpI, FalseBlock == PhiBlock ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE);
+ CmpI, FalseBlock == PhiBlock ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE,
+ BaseId);
Result.CmpI = CmpI;
Result.BranchI = BranchI;
return Result;
@@ -332,47 +391,41 @@ BCECmpBlock visitCmpBlock(Value *const Val, BasicBlock *const Block,
}
static inline void enqueueBlock(std::vector<BCECmpBlock> &Comparisons,
- BCECmpBlock &Comparison) {
+ BCECmpBlock &&Comparison) {
LLVM_DEBUG(dbgs() << "Block '" << Comparison.BB->getName()
<< "': Found cmp of " << Comparison.SizeBits()
- << " bits between " << Comparison.Lhs().Base() << " + "
+ << " bits between " << Comparison.Lhs().BaseId << " + "
<< Comparison.Lhs().Offset << " and "
- << Comparison.Rhs().Base() << " + "
+ << Comparison.Rhs().BaseId << " + "
<< Comparison.Rhs().Offset << "\n");
LLVM_DEBUG(dbgs() << "\n");
- Comparisons.push_back(Comparison);
+ Comparisons.push_back(std::move(Comparison));
}
// A chain of comparisons.
class BCECmpChain {
public:
- BCECmpChain(const std::vector<BasicBlock *> &Blocks, PHINode &Phi,
- AliasAnalysis *AA);
+ BCECmpChain(const std::vector<BasicBlock *> &Blocks, PHINode &Phi,
+ AliasAnalysis &AA);
- int size() const { return Comparisons_.size(); }
+ int size() const { return Comparisons_.size(); }
#ifdef MERGEICMPS_DOT_ON
void dump() const;
#endif // MERGEICMPS_DOT_ON
- bool simplify(const TargetLibraryInfo *const TLI, AliasAnalysis *AA);
+ bool simplify(const TargetLibraryInfo &TLI, AliasAnalysis &AA,
+ DomTreeUpdater &DTU);
- private:
+private:
static bool IsContiguous(const BCECmpBlock &First,
const BCECmpBlock &Second) {
- return First.Lhs().Base() == Second.Lhs().Base() &&
- First.Rhs().Base() == Second.Rhs().Base() &&
+ return First.Lhs().BaseId == Second.Lhs().BaseId &&
+ First.Rhs().BaseId == Second.Rhs().BaseId &&
First.Lhs().Offset + First.SizeBits() / 8 == Second.Lhs().Offset &&
First.Rhs().Offset + First.SizeBits() / 8 == Second.Rhs().Offset;
}
- // Merges the given comparison blocks into one memcmp block and update
- // branches. Comparisons are assumed to be continguous. If NextBBInChain is
- // null, the merged block will link to the phi block.
- void mergeComparisons(ArrayRef<BCECmpBlock> Comparisons,
- BasicBlock *const NextBBInChain, PHINode &Phi,
- const TargetLibraryInfo *const TLI, AliasAnalysis *AA);
-
PHINode &Phi_;
std::vector<BCECmpBlock> Comparisons_;
// The original entry block (before sorting);
@@ -380,16 +433,17 @@ class BCECmpChain {
};
BCECmpChain::BCECmpChain(const std::vector<BasicBlock *> &Blocks, PHINode &Phi,
- AliasAnalysis *AA)
+ AliasAnalysis &AA)
: Phi_(Phi) {
assert(!Blocks.empty() && "a chain should have at least one block");
// Now look inside blocks to check for BCE comparisons.
std::vector<BCECmpBlock> Comparisons;
+ BaseIdentifier BaseId;
for (size_t BlockIdx = 0; BlockIdx < Blocks.size(); ++BlockIdx) {
BasicBlock *const Block = Blocks[BlockIdx];
assert(Block && "invalid block");
BCECmpBlock Comparison = visitCmpBlock(Phi.getIncomingValueForBlock(Block),
- Block, Phi.getParent());
+ Block, Phi.getParent(), BaseId);
Comparison.BB = Block;
if (!Comparison.IsValid()) {
LLVM_DEBUG(dbgs() << "chain with invalid BCECmpBlock, no merge.\n");
@@ -411,13 +465,13 @@ BCECmpChain::BCECmpChain(const std::vector<BasicBlock *> &Blocks, PHINode &Phi,
// chain before sorting. Unless we can abort the chain at this point
// and start anew.
//
- // NOTE: we only handle block with single predecessor for now.
+ // NOTE: we only handle blocks a with single predecessor for now.
if (Comparison.canSplit(AA)) {
LLVM_DEBUG(dbgs()
<< "Split initial block '" << Comparison.BB->getName()
<< "' that does extra work besides compare\n");
Comparison.RequireSplit = true;
- enqueueBlock(Comparisons, Comparison);
+ enqueueBlock(Comparisons, std::move(Comparison));
} else {
LLVM_DEBUG(dbgs()
<< "ignoring initial block '" << Comparison.BB->getName()
@@ -450,7 +504,7 @@ BCECmpChain::BCECmpChain(const std::vector<BasicBlock *> &Blocks, PHINode &Phi,
// We could still merge bb1 and bb2 though.
return;
}
- enqueueBlock(Comparisons, Comparison);
+ enqueueBlock(Comparisons, std::move(Comparison));
}
// It is possible we have no suitable comparison to merge.
@@ -466,9 +520,11 @@ BCECmpChain::BCECmpChain(const std::vector<BasicBlock *> &Blocks, PHINode &Phi,
#endif // MERGEICMPS_DOT_ON
// Reorder blocks by LHS. We can do that without changing the
// semantics because we are only accessing dereferencable memory.
- llvm::sort(Comparisons_, [](const BCECmpBlock &a, const BCECmpBlock &b) {
- return a.Lhs() < b.Lhs();
- });
+ llvm::sort(Comparisons_,
+ [](const BCECmpBlock &LhsBlock, const BCECmpBlock &RhsBlock) {
+ return std::tie(LhsBlock.Lhs(), LhsBlock.Rhs()) <
+ std::tie(RhsBlock.Lhs(), RhsBlock.Rhs());
+ });
#ifdef MERGEICMPS_DOT_ON
errs() << "AFTER REORDERING:\n\n";
dump();
@@ -498,162 +554,205 @@ void BCECmpChain::dump() const {
}
#endif // MERGEICMPS_DOT_ON
-bool BCECmpChain::simplify(const TargetLibraryInfo *const TLI,
- AliasAnalysis *AA) {
- // First pass to check if there is at least one merge. If not, we don't do
- // anything and we keep analysis passes intact.
- {
- bool AtLeastOneMerged = false;
- for (size_t I = 1; I < Comparisons_.size(); ++I) {
- if (IsContiguous(Comparisons_[I - 1], Comparisons_[I])) {
- AtLeastOneMerged = true;
- break;
+namespace {
+
+// A class to compute the name of a set of merged basic blocks.
+// This is optimized for the common case of no block names.
+class MergedBlockName {
+ // Storage for the uncommon case of several named blocks.
+ SmallString<16> Scratch;
+
+public:
+ explicit MergedBlockName(ArrayRef<BCECmpBlock> Comparisons)
+ : Name(makeName(Comparisons)) {}
+ const StringRef Name;
+
+private:
+ StringRef makeName(ArrayRef<BCECmpBlock> Comparisons) {
+ assert(!Comparisons.empty() && "no basic block");
+ // Fast path: only one block, or no names at all.
+ if (Comparisons.size() == 1)
+ return Comparisons[0].BB->getName();
+ const int size = std::accumulate(Comparisons.begin(), Comparisons.end(), 0,
+ [](int i, const BCECmpBlock &Cmp) {
+ return i + Cmp.BB->getName().size();
+ });
+ if (size == 0)
+ return StringRef("", 0);
+
+ // Slow path: at least two blocks, at least one block with a name.
+ Scratch.clear();
+ // We'll have `size` bytes for name and `Comparisons.size() - 1` bytes for
+ // separators.
+ Scratch.reserve(size + Comparisons.size() - 1);
+ const auto append = [this](StringRef str) {
+ Scratch.append(str.begin(), str.end());
+ };
+ append(Comparisons[0].BB->getName());
+ for (int I = 1, E = Comparisons.size(); I < E; ++I) {
+ const BasicBlock *const BB = Comparisons[I].BB;
+ if (!BB->getName().empty()) {
+ append("+");
+ append(BB->getName());
}
}
- if (!AtLeastOneMerged) return false;
+ return StringRef(Scratch);
}
+};
+} // namespace
+
+// Merges the given contiguous comparison blocks into one memcmp block.
+static BasicBlock *mergeComparisons(ArrayRef<BCECmpBlock> Comparisons,
+ BasicBlock *const InsertBefore,
+ BasicBlock *const NextCmpBlock,
+ PHINode &Phi, const TargetLibraryInfo &TLI,
+ AliasAnalysis &AA, DomTreeUpdater &DTU) {
+ assert(!Comparisons.empty() && "merging zero comparisons");
+ LLVMContext &Context = NextCmpBlock->getContext();
+ const BCECmpBlock &FirstCmp = Comparisons[0];
+
+ // Create a new cmp block before next cmp block.
+ BasicBlock *const BB =
+ BasicBlock::Create(Context, MergedBlockName(Comparisons).Name,
+ NextCmpBlock->getParent(), InsertBefore);
+ IRBuilder<> Builder(BB);
+ // Add the GEPs from the first BCECmpBlock.
+ Value *const Lhs = Builder.Insert(FirstCmp.Lhs().GEP->clone());
+ Value *const Rhs = Builder.Insert(FirstCmp.Rhs().GEP->clone());
+
+ Value *IsEqual = nullptr;
+ LLVM_DEBUG(dbgs() << "Merging " << Comparisons.size() << " comparisons -> "
+ << BB->getName() << "\n");
+ if (Comparisons.size() == 1) {
+ LLVM_DEBUG(dbgs() << "Only one comparison, updating branches\n");
+ Value *const LhsLoad =
+ Builder.CreateLoad(FirstCmp.Lhs().LoadI->getType(), Lhs);
+ Value *const RhsLoad =
+ Builder.CreateLoad(FirstCmp.Rhs().LoadI->getType(), Rhs);
+ // There are no blocks to merge, just do the comparison.
+ IsEqual = Builder.CreateICmpEQ(LhsLoad, RhsLoad);
+ } else {
+ // If there is one block that requires splitting, we do it now, i.e.
+ // just before we know we will collapse the chain. The instructions
+ // can be executed before any of the instructions in the chain.
+ const auto ToSplit =
+ std::find_if(Comparisons.begin(), Comparisons.end(),
+ [](const BCECmpBlock &B) { return B.RequireSplit; });
+ if (ToSplit != Comparisons.end()) {
+ LLVM_DEBUG(dbgs() << "Splitting non_BCE work to header\n");
+ ToSplit->split(BB, AA);
+ }
- // Remove phi references to comparison blocks, they will be rebuilt as we
- // merge the blocks.
- for (const auto &Comparison : Comparisons_) {
- Phi_.removeIncomingValue(Comparison.BB, false);
- }
+ const unsigned TotalSizeBits = std::accumulate(
+ Comparisons.begin(), Comparisons.end(), 0u,
+ [](int Size, const BCECmpBlock &C) { return Size + C.SizeBits(); });
- // If entry block is part of the chain, we need to make the first block
- // of the chain the new entry block of the function.
- BasicBlock *Entry = &Comparisons_[0].BB->getParent()->getEntryBlock();
- for (size_t I = 1; I < Comparisons_.size(); ++I) {
- if (Entry == Comparisons_[I].BB) {
- BasicBlock *NEntryBB = BasicBlock::Create(Entry->getContext(), "",
- Entry->getParent(), Entry);
- BranchInst::Create(Entry, NEntryBB);
- break;
- }
+ // Create memcmp() == 0.
+ const auto &DL = Phi.getModule()->getDataLayout();
+ Value *const MemCmpCall = emitMemCmp(
+ Lhs, Rhs,
+ ConstantInt::get(DL.getIntPtrType(Context), TotalSizeBits / 8), Builder,
+ DL, &TLI);
+ IsEqual = Builder.CreateICmpEQ(
+ MemCmpCall, ConstantInt::get(Type::getInt32Ty(Context), 0));
}
- // Point the predecessors of the chain to the first comparison block (which is
- // the new entry point) and update the entry block of the chain.
- if (EntryBlock_ != Comparisons_[0].BB) {
- EntryBlock_->replaceAllUsesWith(Comparisons_[0].BB);
- EntryBlock_ = Comparisons_[0].BB;
+ BasicBlock *const PhiBB = Phi.getParent();
+ // Add a branch to the next basic block in the chain.
+ if (NextCmpBlock == PhiBB) {
+ // Continue to phi, passing it the comparison result.
+ Builder.CreateBr(PhiBB);
+ Phi.addIncoming(IsEqual, BB);
+ DTU.applyUpdates({{DominatorTree::Insert, BB, PhiBB}});
+ } else {
+ // Continue to next block if equal, exit to phi else.
+ Builder.CreateCondBr(IsEqual, NextCmpBlock, PhiBB);
+ Phi.addIncoming(ConstantInt::getFalse(Context), BB);
+ DTU.applyUpdates({{DominatorTree::Insert, BB, NextCmpBlock},
+ {DominatorTree::Insert, BB, PhiBB}});
}
+ return BB;
+}
+
+bool BCECmpChain::simplify(const TargetLibraryInfo &TLI, AliasAnalysis &AA,
+ DomTreeUpdater &DTU) {
+ assert(Comparisons_.size() >= 2 && "simplifying trivial BCECmpChain");
+ // First pass to check if there is at least one merge. If not, we don't do
+ // anything and we keep analysis passes intact.
+ const auto AtLeastOneMerged = [this]() {
+ for (size_t I = 1; I < Comparisons_.size(); ++I) {
+ if (IsContiguous(Comparisons_[I - 1], Comparisons_[I]))
+ return true;
+ }
+ return false;
+ };
+ if (!AtLeastOneMerged())
+ return false;
- // Effectively merge blocks.
+ LLVM_DEBUG(dbgs() << "Simplifying comparison chain starting at block "
+ << EntryBlock_->getName() << "\n");
+
+ // Effectively merge blocks. We go in the reverse direction from the phi block
+ // so that the next block is always available to branch to.
+ const auto mergeRange = [this, &TLI, &AA, &DTU](int I, int Num,
+ BasicBlock *InsertBefore,
+ BasicBlock *Next) {
+ return mergeComparisons(makeArrayRef(Comparisons_).slice(I, Num),
+ InsertBefore, Next, Phi_, TLI, AA, DTU);
+ };
int NumMerged = 1;
- for (size_t I = 1; I < Comparisons_.size(); ++I) {
- if (IsContiguous(Comparisons_[I - 1], Comparisons_[I])) {
+ BasicBlock *NextCmpBlock = Phi_.getParent();
+ for (int I = static_cast<int>(Comparisons_.size()) - 2; I >= 0; --I) {
+ if (IsContiguous(Comparisons_[I], Comparisons_[I + 1])) {
+ LLVM_DEBUG(dbgs() << "Merging block " << Comparisons_[I].BB->getName()
+ << " into " << Comparisons_[I + 1].BB->getName()
+ << "\n");
++NumMerged;
} else {
- // Merge all previous comparisons and start a new merge block.
- mergeComparisons(
- makeArrayRef(Comparisons_).slice(I - NumMerged, NumMerged),
- Comparisons_[I].BB, Phi_, TLI, AA);
+ NextCmpBlock = mergeRange(I + 1, NumMerged, NextCmpBlock, NextCmpBlock);
NumMerged = 1;
}
}
- mergeComparisons(makeArrayRef(Comparisons_)
- .slice(Comparisons_.size() - NumMerged, NumMerged),
- nullptr, Phi_, TLI, AA);
-
- return true;
-}
-
-void BCECmpChain::mergeComparisons(ArrayRef<BCECmpBlock> Comparisons,
- BasicBlock *const NextBBInChain,
- PHINode &Phi,
- const TargetLibraryInfo *const TLI,
- AliasAnalysis *AA) {
- assert(!Comparisons.empty());
- const auto &FirstComparison = *Comparisons.begin();
- BasicBlock *const BB = FirstComparison.BB;
- LLVMContext &Context = BB->getContext();
-
- if (Comparisons.size() >= 2) {
- // If there is one block that requires splitting, we do it now, i.e.
- // just before we know we will collapse the chain. The instructions
- // can be executed before any of the instructions in the chain.
- auto C = std::find_if(Comparisons.begin(), Comparisons.end(),
- [](const BCECmpBlock &B) { return B.RequireSplit; });
- if (C != Comparisons.end())
- C->split(EntryBlock_, AA);
-
- LLVM_DEBUG(dbgs() << "Merging " << Comparisons.size() << " comparisons\n");
- const auto TotalSize =
- std::accumulate(Comparisons.begin(), Comparisons.end(), 0,
- [](int Size, const BCECmpBlock &C) {
- return Size + C.SizeBits();
- }) /
- 8;
-
- // Incoming edges do not need to be updated, and both GEPs are already
- // computing the right address, we just need to:
- // - replace the two loads and the icmp with the memcmp
- // - update the branch
- // - update the incoming values in the phi.
- FirstComparison.BranchI->eraseFromParent();
- FirstComparison.CmpI->eraseFromParent();
- FirstComparison.Lhs().LoadI->eraseFromParent();
- FirstComparison.Rhs().LoadI->eraseFromParent();
-
- IRBuilder<> Builder(BB);
- const auto &DL = Phi.getModule()->getDataLayout();
- Value *const MemCmpCall = emitMemCmp(
- FirstComparison.Lhs().GEP, FirstComparison.Rhs().GEP,
- ConstantInt::get(DL.getIntPtrType(Context), TotalSize),
- Builder, DL, TLI);
- Value *const MemCmpIsZero = Builder.CreateICmpEQ(
- MemCmpCall, ConstantInt::get(Type::getInt32Ty(Context), 0));
+ // Insert the entry block for the new chain before the old entry block.
+ // If the old entry block was the function entry, this ensures that the new
+ // entry can become the function entry.
+ NextCmpBlock = mergeRange(0, NumMerged, EntryBlock_, NextCmpBlock);
+
+ // Replace the original cmp chain with the new cmp chain by pointing all
+ // predecessors of EntryBlock_ to NextCmpBlock instead. This makes all cmp
+ // blocks in the old chain unreachable.
+ while (!pred_empty(EntryBlock_)) {
+ BasicBlock* const Pred = *pred_begin(EntryBlock_);
+ LLVM_DEBUG(dbgs() << "Updating jump into old chain from " << Pred->getName()
+ << "\n");
+ Pred->getTerminator()->replaceUsesOfWith(EntryBlock_, NextCmpBlock);
+ DTU.applyUpdates({{DominatorTree::Delete, Pred, EntryBlock_},
+ {DominatorTree::Insert, Pred, NextCmpBlock}});
+ }
- // Add a branch to the next basic block in the chain.
- if (NextBBInChain) {
- Builder.CreateCondBr(MemCmpIsZero, NextBBInChain, Phi.getParent());
- Phi.addIncoming(ConstantInt::getFalse(Context), BB);
- } else {
- Builder.CreateBr(Phi.getParent());
- Phi.addIncoming(MemCmpIsZero, BB);
- }
+ // If the old cmp chain was the function entry, we need to update the function
+ // entry.
+ const bool ChainEntryIsFnEntry =
+ (EntryBlock_ == &EntryBlock_->getParent()->getEntryBlock());
+ if (ChainEntryIsFnEntry && DTU.hasDomTree()) {
+ LLVM_DEBUG(dbgs() << "Changing function entry from "
+ << EntryBlock_->getName() << " to "
+ << NextCmpBlock->getName() << "\n");
+ DTU.getDomTree().setNewRoot(NextCmpBlock);
+ DTU.applyUpdates({{DominatorTree::Delete, NextCmpBlock, EntryBlock_}});
+ }
+ EntryBlock_ = nullptr;
- // Delete merged blocks.
- for (size_t I = 1; I < Comparisons.size(); ++I) {
- BasicBlock *CBB = Comparisons[I].BB;
- CBB->replaceAllUsesWith(BB);
- CBB->eraseFromParent();
- }
- } else {
- assert(Comparisons.size() == 1);
- // There are no blocks to merge, but we still need to update the branches.
- LLVM_DEBUG(dbgs() << "Only one comparison, updating branches\n");
- if (NextBBInChain) {
- if (FirstComparison.BranchI->isConditional()) {
- LLVM_DEBUG(dbgs() << "conditional -> conditional\n");
- // Just update the "true" target, the "false" target should already be
- // the phi block.
- assert(FirstComparison.BranchI->getSuccessor(1) == Phi.getParent());
- FirstComparison.BranchI->setSuccessor(0, NextBBInChain);
- Phi.addIncoming(ConstantInt::getFalse(Context), BB);
- } else {
- LLVM_DEBUG(dbgs() << "unconditional -> conditional\n");
- // Replace the unconditional branch by a conditional one.
- FirstComparison.BranchI->eraseFromParent();
- IRBuilder<> Builder(BB);
- Builder.CreateCondBr(FirstComparison.CmpI, NextBBInChain,
- Phi.getParent());
- Phi.addIncoming(FirstComparison.CmpI, BB);
- }
- } else {
- if (FirstComparison.BranchI->isConditional()) {
- LLVM_DEBUG(dbgs() << "conditional -> unconditional\n");
- // Replace the conditional branch by an unconditional one.
- FirstComparison.BranchI->eraseFromParent();
- IRBuilder<> Builder(BB);
- Builder.CreateBr(Phi.getParent());
- Phi.addIncoming(FirstComparison.CmpI, BB);
- } else {
- LLVM_DEBUG(dbgs() << "unconditional -> unconditional\n");
- Phi.addIncoming(FirstComparison.CmpI, BB);
- }
- }
+ // Delete merged blocks. This also removes incoming values in phi.
+ SmallVector<BasicBlock *, 16> DeadBlocks;
+ for (auto &Cmp : Comparisons_) {
+ LLVM_DEBUG(dbgs() << "Deleting merged block " << Cmp.BB->getName() << "\n");
+ DeadBlocks.push_back(Cmp.BB);
}
+ DeleteDeadBlocks(DeadBlocks, &DTU);
+
+ Comparisons_.clear();
+ return true;
}
std::vector<BasicBlock *> getOrderedBlocks(PHINode &Phi,
@@ -691,8 +790,8 @@ std::vector<BasicBlock *> getOrderedBlocks(PHINode &Phi,
return Blocks;
}
-bool processPhi(PHINode &Phi, const TargetLibraryInfo *const TLI,
- AliasAnalysis *AA) {
+bool processPhi(PHINode &Phi, const TargetLibraryInfo &TLI, AliasAnalysis &AA,
+ DomTreeUpdater &DTU) {
LLVM_DEBUG(dbgs() << "processPhi()\n");
if (Phi.getNumIncomingValues() <= 1) {
LLVM_DEBUG(dbgs() << "skip: only one incoming value in phi\n");
@@ -757,24 +856,54 @@ bool processPhi(PHINode &Phi, const TargetLibraryInfo *const TLI,
return false;
}
- return CmpChain.simplify(TLI, AA);
+ return CmpChain.simplify(TLI, AA, DTU);
}
-class MergeICmps : public FunctionPass {
- public:
+static bool runImpl(Function &F, const TargetLibraryInfo &TLI,
+ const TargetTransformInfo &TTI, AliasAnalysis &AA,
+ DominatorTree *DT) {
+ LLVM_DEBUG(dbgs() << "MergeICmpsLegacyPass: " << F.getName() << "\n");
+
+ // We only try merging comparisons if the target wants to expand memcmp later.
+ // The rationale is to avoid turning small chains into memcmp calls.
+ if (!TTI.enableMemCmpExpansion(F.hasOptSize(), true))
+ return false;
+
+ // If we don't have memcmp avaiable we can't emit calls to it.
+ if (!TLI.has(LibFunc_memcmp))
+ return false;
+
+ DomTreeUpdater DTU(DT, /*PostDominatorTree*/ nullptr,
+ DomTreeUpdater::UpdateStrategy::Eager);
+
+ bool MadeChange = false;
+
+ for (auto BBIt = ++F.begin(); BBIt != F.end(); ++BBIt) {
+ // A Phi operation is always first in a basic block.
+ if (auto *const Phi = dyn_cast<PHINode>(&*BBIt->begin()))
+ MadeChange |= processPhi(*Phi, TLI, AA, DTU);
+ }
+
+ return MadeChange;
+}
+
+class MergeICmpsLegacyPass : public FunctionPass {
+public:
static char ID;
- MergeICmps() : FunctionPass(ID) {
- initializeMergeICmpsPass(*PassRegistry::getPassRegistry());
+ MergeICmpsLegacyPass() : FunctionPass(ID) {
+ initializeMergeICmpsLegacyPassPass(*PassRegistry::getPassRegistry());
}
bool runOnFunction(Function &F) override {
if (skipFunction(F)) return false;
const auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
const auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
- auto PA = runImpl(F, &TLI, &TTI, AA);
- return !PA.areAllPreserved();
+ // MergeICmps does not need the DominatorTree, but we update it if it's
+ // already available.
+ auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+ auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
+ return runImpl(F, TLI, TTI, AA, DTWP ? &DTWP->getDomTree() : nullptr);
}
private:
@@ -782,46 +911,35 @@ class MergeICmps : public FunctionPass {
AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
AU.addRequired<AAResultsWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
}
-
- PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI,
- const TargetTransformInfo *TTI, AliasAnalysis *AA);
};
-PreservedAnalyses MergeICmps::runImpl(Function &F, const TargetLibraryInfo *TLI,
- const TargetTransformInfo *TTI,
- AliasAnalysis *AA) {
- LLVM_DEBUG(dbgs() << "MergeICmpsPass: " << F.getName() << "\n");
-
- // We only try merging comparisons if the target wants to expand memcmp later.
- // The rationale is to avoid turning small chains into memcmp calls.
- if (!TTI->enableMemCmpExpansion(true)) return PreservedAnalyses::all();
-
- // If we don't have memcmp avaiable we can't emit calls to it.
- if (!TLI->has(LibFunc_memcmp))
- return PreservedAnalyses::all();
-
- bool MadeChange = false;
-
- for (auto BBIt = ++F.begin(); BBIt != F.end(); ++BBIt) {
- // A Phi operation is always first in a basic block.
- if (auto *const Phi = dyn_cast<PHINode>(&*BBIt->begin()))
- MadeChange |= processPhi(*Phi, TLI, AA);
- }
-
- if (MadeChange) return PreservedAnalyses::none();
- return PreservedAnalyses::all();
-}
+} // namespace
-} // namespace
-
-char MergeICmps::ID = 0;
-INITIALIZE_PASS_BEGIN(MergeICmps, "mergeicmps",
+char MergeICmpsLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(MergeICmpsLegacyPass, "mergeicmps",
"Merge contiguous icmps into a memcmp", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
-INITIALIZE_PASS_END(MergeICmps, "mergeicmps",
+INITIALIZE_PASS_END(MergeICmpsLegacyPass, "mergeicmps",
"Merge contiguous icmps into a memcmp", false, false)
-Pass *llvm::createMergeICmpsPass() { return new MergeICmps(); }
+Pass *llvm::createMergeICmpsLegacyPass() { return new MergeICmpsLegacyPass(); }
+
+PreservedAnalyses MergeICmpsPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
+ auto &TTI = AM.getResult<TargetIRAnalysis>(F);
+ auto &AA = AM.getResult<AAManager>(F);
+ auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(F);
+ const bool MadeChanges = runImpl(F, TLI, TTI, AA, DT);
+ if (!MadeChanges)
+ return PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ PA.preserve<GlobalsAA>();
+ PA.preserve<DominatorTreeAnalysis>();
+ return PA;
+}
diff --git a/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp b/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
index ee21feca8d2c..30645f4400e3 100644
--- a/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
+++ b/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
@@ -1,9 +1,8 @@
//===- MergedLoadStoreMotion.cpp - merge and hoist/sink load/stores -------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/Scalar/NaryReassociate.cpp b/lib/Transforms/Scalar/NaryReassociate.cpp
index 7106ea216ad6..94436b55752a 100644
--- a/lib/Transforms/Scalar/NaryReassociate.cpp
+++ b/lib/Transforms/Scalar/NaryReassociate.cpp
@@ -1,9 +1,8 @@
//===- NaryReassociate.cpp - Reassociate n-ary expressions ----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -427,8 +426,8 @@ NaryReassociatePass::tryReassociateGEPAtIndex(GetElementPtrInst *GEP,
RHS = Builder.CreateMul(
RHS, ConstantInt::get(IntPtrTy, IndexedSize / ElementSize));
}
- GetElementPtrInst *NewGEP =
- cast<GetElementPtrInst>(Builder.CreateGEP(Candidate, RHS));
+ GetElementPtrInst *NewGEP = cast<GetElementPtrInst>(
+ Builder.CreateGEP(GEP->getResultElementType(), Candidate, RHS));
NewGEP->setIsInBounds(GEP->isInBounds());
NewGEP->takeName(GEP);
return NewGEP;
diff --git a/lib/Transforms/Scalar/NewGVN.cpp b/lib/Transforms/Scalar/NewGVN.cpp
index 7cbb0fe70f82..08ac2b666fce 100644
--- a/lib/Transforms/Scalar/NewGVN.cpp
+++ b/lib/Transforms/Scalar/NewGVN.cpp
@@ -1,9 +1,8 @@
//===- NewGVN.cpp - Global Value Numbering Pass ---------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -1167,9 +1166,9 @@ const Expression *NewGVN::createExpression(Instruction *I) const {
SimplifyBinOp(E->getOpcode(), E->getOperand(0), E->getOperand(1), SQ);
if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V))
return SimplifiedE;
- } else if (auto *BI = dyn_cast<BitCastInst>(I)) {
+ } else if (auto *CI = dyn_cast<CastInst>(I)) {
Value *V =
- SimplifyCastInst(BI->getOpcode(), BI->getOperand(0), BI->getType(), SQ);
+ SimplifyCastInst(CI->getOpcode(), E->getOperand(0), CI->getType(), SQ);
if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V))
return SimplifiedE;
} else if (isa<GetElementPtrInst>(I)) {
@@ -1815,39 +1814,13 @@ NewGVN::performSymbolicPHIEvaluation(ArrayRef<ValPair> PHIOps,
const Expression *
NewGVN::performSymbolicAggrValueEvaluation(Instruction *I) const {
if (auto *EI = dyn_cast<ExtractValueInst>(I)) {
- auto *II = dyn_cast<IntrinsicInst>(EI->getAggregateOperand());
- if (II && EI->getNumIndices() == 1 && *EI->idx_begin() == 0) {
- unsigned Opcode = 0;
- // EI might be an extract from one of our recognised intrinsics. If it
- // is we'll synthesize a semantically equivalent expression instead on
- // an extract value expression.
- switch (II->getIntrinsicID()) {
- case Intrinsic::sadd_with_overflow:
- case Intrinsic::uadd_with_overflow:
- Opcode = Instruction::Add;
- break;
- case Intrinsic::ssub_with_overflow:
- case Intrinsic::usub_with_overflow:
- Opcode = Instruction::Sub;
- break;
- case Intrinsic::smul_with_overflow:
- case Intrinsic::umul_with_overflow:
- Opcode = Instruction::Mul;
- break;
- default:
- break;
- }
-
- if (Opcode != 0) {
- // Intrinsic recognized. Grab its args to finish building the
- // expression.
- assert(II->getNumArgOperands() == 2 &&
- "Expect two args for recognised intrinsics.");
- return createBinaryExpression(Opcode, EI->getType(),
- II->getArgOperand(0),
- II->getArgOperand(1), I);
- }
- }
+ auto *WO = dyn_cast<WithOverflowInst>(EI->getAggregateOperand());
+ if (WO && EI->getNumIndices() == 1 && *EI->idx_begin() == 0)
+ // EI is an extract from one of our with.overflow intrinsics. Synthesize
+ // a semantically equivalent expression instead of an extract value
+ // expression.
+ return createBinaryExpression(WO->getBinaryOp(), EI->getType(),
+ WO->getLHS(), WO->getRHS(), I);
}
return createAggregateValueExpression(I);
@@ -2011,12 +1984,14 @@ NewGVN::performSymbolicEvaluation(Value *V,
E = performSymbolicLoadEvaluation(I);
break;
case Instruction::BitCast:
+ case Instruction::AddrSpaceCast:
E = createExpression(I);
break;
case Instruction::ICmp:
case Instruction::FCmp:
E = performSymbolicCmpEvaluation(I);
break;
+ case Instruction::FNeg:
case Instruction::Add:
case Instruction::FAdd:
case Instruction::Sub:
@@ -2122,7 +2097,7 @@ void NewGVN::addPredicateUsers(const PredicateBase *PB, Instruction *I) const {
if (auto *PBranch = dyn_cast<PredicateBranch>(PB))
PredicateToUsers[PBranch->Condition].insert(I);
- else if (auto *PAssume = dyn_cast<PredicateBranch>(PB))
+ else if (auto *PAssume = dyn_cast<PredicateAssume>(PB))
PredicateToUsers[PAssume->Condition].insert(I);
}
@@ -2524,9 +2499,6 @@ void NewGVN::processOutgoingEdges(Instruction *TI, BasicBlock *B) {
// For switches, propagate the case values into the case
// destinations.
- // Remember how many outgoing edges there are to every successor.
- SmallDenseMap<BasicBlock *, unsigned, 16> SwitchEdges;
-
Value *SwitchCond = SI->getCondition();
Value *CondEvaluated = findConditionEquivalence(SwitchCond);
// See if we were able to turn this switch statement into a constant.
@@ -2547,7 +2519,6 @@ void NewGVN::processOutgoingEdges(Instruction *TI, BasicBlock *B) {
} else {
for (unsigned i = 0, e = SI->getNumSuccessors(); i != e; ++i) {
BasicBlock *TargetBlock = SI->getSuccessor(i);
- ++SwitchEdges[TargetBlock];
updateReachableEdge(B, TargetBlock);
}
}
@@ -3503,7 +3474,7 @@ bool NewGVN::runGVN() {
"BB containing ToErase deleted unexpectedly!");
ToErase->eraseFromParent();
}
- Changed |= !InstructionsToErase.empty();
+ Changed |= !InstructionsToErase.empty();
// Delete all unreachable blocks.
auto UnreachableBlockPred = [&](const BasicBlock &BB) {
diff --git a/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp b/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
index 05ea9144f66c..039123218544 100644
--- a/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
+++ b/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
@@ -1,9 +1,8 @@
//===--- PartiallyInlineLibCalls.cpp - Partially inline libcalls ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/Scalar/PlaceSafepoints.cpp b/lib/Transforms/Scalar/PlaceSafepoints.cpp
index fd2eb85fd7bf..b544f0a39ea8 100644
--- a/lib/Transforms/Scalar/PlaceSafepoints.cpp
+++ b/lib/Transforms/Scalar/PlaceSafepoints.cpp
@@ -1,9 +1,8 @@
//===- PlaceSafepoints.cpp - Place GC Safepoints --------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -56,7 +55,6 @@
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LegacyPassManager.h"
@@ -179,19 +177,18 @@ struct PlaceSafepoints : public FunctionPass {
// callers job.
static void
InsertSafepointPoll(Instruction *InsertBefore,
- std::vector<CallSite> &ParsePointsNeeded /*rval*/,
+ std::vector<CallBase *> &ParsePointsNeeded /*rval*/,
const TargetLibraryInfo &TLI);
-static bool needsStatepoint(const CallSite &CS, const TargetLibraryInfo &TLI) {
- if (callsGCLeafFunction(CS, TLI))
+static bool needsStatepoint(CallBase *Call, const TargetLibraryInfo &TLI) {
+ if (callsGCLeafFunction(Call, TLI))
return false;
- if (CS.isCall()) {
- CallInst *call = cast<CallInst>(CS.getInstruction());
- if (call->isInlineAsm())
+ if (auto *CI = dyn_cast<CallInst>(Call)) {
+ if (CI->isInlineAsm())
return false;
}
- return !(isStatepoint(CS) || isGCRelocate(CS) || isGCResult(CS));
+ return !(isStatepoint(Call) || isGCRelocate(Call) || isGCResult(Call));
}
/// Returns true if this loop is known to contain a call safepoint which
@@ -217,14 +214,14 @@ static bool containsUnconditionalCallSafepoint(Loop *L, BasicBlock *Header,
BasicBlock *Current = Pred;
while (true) {
for (Instruction &I : *Current) {
- if (auto CS = CallSite(&I))
+ if (auto *Call = dyn_cast<CallBase>(&I))
// Note: Technically, needing a safepoint isn't quite the right
// condition here. We should instead be checking if the target method
// has an
// unconditional poll. In practice, this is only a theoretical concern
// since we don't have any methods with conditional-only safepoint
// polls.
- if (needsStatepoint(CS, TLI))
+ if (needsStatepoint(Call, TLI))
return true;
}
@@ -360,9 +357,8 @@ bool PlaceBackedgeSafepointsImpl::runOnLoop(Loop *L) {
/// Returns true if an entry safepoint is not required before this callsite in
/// the caller function.
-static bool doesNotRequireEntrySafepointBefore(const CallSite &CS) {
- Instruction *Inst = CS.getInstruction();
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
+static bool doesNotRequireEntrySafepointBefore(CallBase *Call) {
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Call)) {
switch (II->getIntrinsicID()) {
case Intrinsic::experimental_gc_statepoint:
case Intrinsic::experimental_patchpoint_void:
@@ -424,8 +420,8 @@ static Instruction *findLocationForEntrySafepoint(Function &F,
// which can grow the stack by an unbounded amount. This isn't required
// for GC semantics per se, but is a common requirement for languages
// which detect stack overflow via guard pages and then throw exceptions.
- if (auto CS = CallSite(Cursor)) {
- if (doesNotRequireEntrySafepointBefore(CS))
+ if (auto *Call = dyn_cast<CallBase>(Cursor)) {
+ if (doesNotRequireEntrySafepointBefore(Call))
continue;
break;
}
@@ -500,7 +496,7 @@ bool PlaceSafepoints::runOnFunction(Function &F) {
DT.recalculate(F);
SmallVector<Instruction *, 16> PollsNeeded;
- std::vector<CallSite> ParsePointNeeded;
+ std::vector<CallBase *> ParsePointNeeded;
if (enableBackedgeSafepoints(F)) {
// Construct a pass manager to run the LoopPass backedge logic. We
@@ -589,7 +585,7 @@ bool PlaceSafepoints::runOnFunction(Function &F) {
// Now that we've identified all the needed safepoint poll locations, insert
// safepoint polls themselves.
for (Instruction *PollLocation : PollsNeeded) {
- std::vector<CallSite> RuntimeCalls;
+ std::vector<CallBase *> RuntimeCalls;
InsertSafepointPoll(PollLocation, RuntimeCalls, TLI);
ParsePointNeeded.insert(ParsePointNeeded.end(), RuntimeCalls.begin(),
RuntimeCalls.end());
@@ -622,7 +618,7 @@ INITIALIZE_PASS_END(PlaceSafepoints, "place-safepoints", "Place Safepoints",
static void
InsertSafepointPoll(Instruction *InsertBefore,
- std::vector<CallSite> &ParsePointsNeeded /*rval*/,
+ std::vector<CallBase *> &ParsePointsNeeded /*rval*/,
const TargetLibraryInfo &TLI) {
BasicBlock *OrigBB = InsertBefore->getParent();
Module *M = InsertBefore->getModule();
@@ -687,7 +683,7 @@ InsertSafepointPoll(Instruction *InsertBefore,
// These are likely runtime calls. Should we assert that via calling
// convention or something?
- ParsePointsNeeded.push_back(CallSite(CI));
+ ParsePointsNeeded.push_back(CI);
}
assert(ParsePointsNeeded.size() <= Calls.size());
}
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index cb893eab1654..fa8c9e2a5fe4 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -1,9 +1,8 @@
//===- Reassociate.cpp - Reassociate binary expressions -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -267,12 +266,16 @@ static BinaryOperator *CreateNeg(Value *S1, const Twine &Name,
/// Replace 0-X with X*-1.
static BinaryOperator *LowerNegateToMultiply(Instruction *Neg) {
+ assert((isa<UnaryOperator>(Neg) || isa<BinaryOperator>(Neg)) &&
+ "Expected a Negate!");
+ // FIXME: It's not safe to lower a unary FNeg into a FMul by -1.0.
+ unsigned OpNo = isa<BinaryOperator>(Neg) ? 1 : 0;
Type *Ty = Neg->getType();
Constant *NegOne = Ty->isIntOrIntVectorTy() ?
ConstantInt::getAllOnesValue(Ty) : ConstantFP::get(Ty, -1.0);
- BinaryOperator *Res = CreateMul(Neg->getOperand(1), NegOne, "", Neg, Neg);
- Neg->setOperand(1, Constant::getNullValue(Ty)); // Drop use of op.
+ BinaryOperator *Res = CreateMul(Neg->getOperand(OpNo), NegOne, "", Neg, Neg);
+ Neg->setOperand(OpNo, Constant::getNullValue(Ty)); // Drop use of op.
Res->takeName(Neg);
Neg->replaceAllUsesWith(Res);
Res->setDebugLoc(Neg->getDebugLoc());
@@ -445,8 +448,10 @@ using RepeatedValue = std::pair<Value*, APInt>;
/// that have all uses inside the expression (i.e. only used by non-leaf nodes
/// of the expression) if it can turn them into binary operators of the right
/// type and thus make the expression bigger.
-static bool LinearizeExprTree(BinaryOperator *I,
+static bool LinearizeExprTree(Instruction *I,
SmallVectorImpl<RepeatedValue> &Ops) {
+ assert((isa<UnaryOperator>(I) || isa<BinaryOperator>(I)) &&
+ "Expected a UnaryOperator or BinaryOperator!");
LLVM_DEBUG(dbgs() << "LINEARIZE: " << *I << '\n');
unsigned Bitwidth = I->getType()->getScalarType()->getPrimitiveSizeInBits();
unsigned Opcode = I->getOpcode();
@@ -463,7 +468,7 @@ static bool LinearizeExprTree(BinaryOperator *I,
// with their weights, representing a certain number of paths to the operator.
// If an operator occurs in the worklist multiple times then we found multiple
// ways to get to it.
- SmallVector<std::pair<BinaryOperator*, APInt>, 8> Worklist; // (Op, Weight)
+ SmallVector<std::pair<Instruction*, APInt>, 8> Worklist; // (Op, Weight)
Worklist.push_back(std::make_pair(I, APInt(Bitwidth, 1)));
bool Changed = false;
@@ -490,10 +495,10 @@ static bool LinearizeExprTree(BinaryOperator *I,
SmallPtrSet<Value *, 8> Visited; // For sanity checking the iteration scheme.
#endif
while (!Worklist.empty()) {
- std::pair<BinaryOperator*, APInt> P = Worklist.pop_back_val();
+ std::pair<Instruction*, APInt> P = Worklist.pop_back_val();
I = P.first; // We examine the operands of this binary operator.
- for (unsigned OpIdx = 0; OpIdx < 2; ++OpIdx) { // Visit operands.
+ for (unsigned OpIdx = 0; OpIdx < I->getNumOperands(); ++OpIdx) { // Visit operands.
Value *Op = I->getOperand(OpIdx);
APInt Weight = P.second; // Number of paths to this operand.
LLVM_DEBUG(dbgs() << "OPERAND: " << *Op << " (" << Weight << ")\n");
@@ -573,14 +578,14 @@ static bool LinearizeExprTree(BinaryOperator *I,
// If this is a multiply expression, turn any internal negations into
// multiplies by -1 so they can be reassociated.
- if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Op))
- if ((Opcode == Instruction::Mul && match(BO, m_Neg(m_Value()))) ||
- (Opcode == Instruction::FMul && match(BO, m_FNeg(m_Value())))) {
+ if (Instruction *Tmp = dyn_cast<Instruction>(Op))
+ if ((Opcode == Instruction::Mul && match(Tmp, m_Neg(m_Value()))) ||
+ (Opcode == Instruction::FMul && match(Tmp, m_FNeg(m_Value())))) {
LLVM_DEBUG(dbgs()
<< "MORPH LEAF: " << *Op << " (" << Weight << ") TO ");
- BO = LowerNegateToMultiply(BO);
- LLVM_DEBUG(dbgs() << *BO << '\n');
- Worklist.push_back(std::make_pair(BO, Weight));
+ Tmp = LowerNegateToMultiply(Tmp);
+ LLVM_DEBUG(dbgs() << *Tmp << '\n');
+ Worklist.push_back(std::make_pair(Tmp, Weight));
Changed = true;
continue;
}
@@ -862,6 +867,8 @@ static Value *NegateValue(Value *V, Instruction *BI,
if (TheNeg->getParent()->getParent() != BI->getParent()->getParent())
continue;
+ bool FoundCatchSwitch = false;
+
BasicBlock::iterator InsertPt;
if (Instruction *InstInput = dyn_cast<Instruction>(V)) {
if (InvokeInst *II = dyn_cast<InvokeInst>(InstInput)) {
@@ -869,10 +876,30 @@ static Value *NegateValue(Value *V, Instruction *BI,
} else {
InsertPt = ++InstInput->getIterator();
}
- while (isa<PHINode>(InsertPt)) ++InsertPt;
+
+ const BasicBlock *BB = InsertPt->getParent();
+
+ // Make sure we don't move anything before PHIs or exception
+ // handling pads.
+ while (InsertPt != BB->end() && (isa<PHINode>(InsertPt) ||
+ InsertPt->isEHPad())) {
+ if (isa<CatchSwitchInst>(InsertPt))
+ // A catchswitch cannot have anything in the block except
+ // itself and PHIs. We'll bail out below.
+ FoundCatchSwitch = true;
+ ++InsertPt;
+ }
} else {
InsertPt = TheNeg->getParent()->getParent()->getEntryBlock().begin();
}
+
+ // We found a catchswitch in the block where we want to move the
+ // neg. We cannot move anything into that block. Bail and just
+ // create the neg before BI, as if we hadn't found an existing
+ // neg.
+ if (FoundCatchSwitch)
+ break;
+
TheNeg->moveBefore(&*InsertPt);
if (TheNeg->getOpcode() == Instruction::Sub) {
TheNeg->setHasNoUnsignedWrap(false);
@@ -1329,8 +1356,7 @@ Value *ReassociatePass::OptimizeXor(Instruction *I,
// So, if Rank(X) < Rank(Y) < Rank(Z), it means X is defined earlier
// than Y which is defined earlier than Z. Permute "x | 1", "Y & 2",
// "z" in the order of X-Y-Z is better than any other orders.
- std::stable_sort(OpndPtrs.begin(), OpndPtrs.end(),
- [](XorOpnd *LHS, XorOpnd *RHS) {
+ llvm::stable_sort(OpndPtrs, [](XorOpnd *LHS, XorOpnd *RHS) {
return LHS->getSymbolicRank() < RHS->getSymbolicRank();
});
@@ -1687,8 +1713,7 @@ static bool collectMultiplyFactors(SmallVectorImpl<ValueEntry> &Ops,
// below our mininum of '4'.
assert(FactorPowerSum >= 4);
- std::stable_sort(Factors.begin(), Factors.end(),
- [](const Factor &LHS, const Factor &RHS) {
+ llvm::stable_sort(Factors, [](const Factor &LHS, const Factor &RHS) {
return LHS.Power > RHS.Power;
});
return true;
@@ -1801,7 +1826,7 @@ Value *ReassociatePass::OptimizeMul(BinaryOperator *I,
return V;
ValueEntry NewEntry = ValueEntry(getRank(V), V);
- Ops.insert(std::lower_bound(Ops.begin(), Ops.end(), NewEntry), NewEntry);
+ Ops.insert(llvm::lower_bound(Ops, NewEntry), NewEntry);
return nullptr;
}
@@ -2001,7 +2026,7 @@ Instruction *ReassociatePass::canonicalizeNegConstExpr(Instruction *I) {
/// instructions is not allowed.
void ReassociatePass::OptimizeInst(Instruction *I) {
// Only consider operations that we understand.
- if (!isa<BinaryOperator>(I))
+ if (!isa<UnaryOperator>(I) && !isa<BinaryOperator>(I))
return;
if (I->getOpcode() == Instruction::Shl && isa<ConstantInt>(I->getOperand(1)))
@@ -2066,7 +2091,8 @@ void ReassociatePass::OptimizeInst(Instruction *I) {
I = NI;
}
}
- } else if (I->getOpcode() == Instruction::FSub) {
+ } else if (I->getOpcode() == Instruction::FNeg ||
+ I->getOpcode() == Instruction::FSub) {
if (ShouldBreakUpSubtract(I)) {
Instruction *NI = BreakUpSubtract(I, RedoInsts);
RedoInsts.insert(I);
@@ -2075,7 +2101,9 @@ void ReassociatePass::OptimizeInst(Instruction *I) {
} else if (match(I, m_FNeg(m_Value()))) {
// Otherwise, this is a negation. See if the operand is a multiply tree
// and if this is not an inner node of a multiply tree.
- if (isReassociableOp(I->getOperand(1), Instruction::FMul) &&
+ Value *Op = isa<BinaryOperator>(I) ? I->getOperand(1) :
+ I->getOperand(0);
+ if (isReassociableOp(Op, Instruction::FMul) &&
(!I->hasOneUse() ||
!isReassociableOp(I->user_back(), Instruction::FMul))) {
// If the negate was simplified, revisit the users to see if we can
@@ -2142,7 +2170,7 @@ void ReassociatePass::ReassociateExpression(BinaryOperator *I) {
// positions maintained (and so the compiler is deterministic). Note that
// this sorts so that the highest ranking values end up at the beginning of
// the vector.
- std::stable_sort(Ops.begin(), Ops.end());
+ llvm::stable_sort(Ops);
// Now that we have the expression tree in a convenient
// sorted form, optimize it globally if possible.
@@ -2218,8 +2246,15 @@ void ReassociatePass::ReassociateExpression(BinaryOperator *I) {
if (std::less<Value *>()(Op1, Op0))
std::swap(Op0, Op1);
auto it = PairMap[Idx].find({Op0, Op1});
- if (it != PairMap[Idx].end())
- Score += it->second;
+ if (it != PairMap[Idx].end()) {
+ // Functions like BreakUpSubtract() can erase the Values we're using
+ // as keys and create new Values after we built the PairMap. There's a
+ // small chance that the new nodes can have the same address as
+ // something already in the table. We shouldn't accumulate the stored
+ // score in that case as it refers to the wrong Value.
+ if (it->second.isValid())
+ Score += it->second.Score;
+ }
unsigned MaxRank = std::max(Ops[i].Rank, Ops[j].Rank);
if (Score > Max || (Score == Max && MaxRank < BestRank)) {
@@ -2288,9 +2323,15 @@ ReassociatePass::BuildPairMap(ReversePostOrderTraversal<Function *> &RPOT) {
std::swap(Op0, Op1);
if (!Visited.insert({Op0, Op1}).second)
continue;
- auto res = PairMap[BinaryIdx].insert({{Op0, Op1}, 1});
- if (!res.second)
- ++res.first->second;
+ auto res = PairMap[BinaryIdx].insert({{Op0, Op1}, {Op0, Op1, 1}});
+ if (!res.second) {
+ // If either key value has been erased then we've got the same
+ // address by coincidence. That can't happen here because nothing is
+ // erasing values but it can happen by the time we're querying the
+ // map.
+ assert(res.first->second.isValid() && "WeakVH invalidated");
+ ++res.first->second.Score;
+ }
}
}
}
diff --git a/lib/Transforms/Scalar/Reg2Mem.cpp b/lib/Transforms/Scalar/Reg2Mem.cpp
index 018feb035a4f..3296322e00d5 100644
--- a/lib/Transforms/Scalar/Reg2Mem.cpp
+++ b/lib/Transforms/Scalar/Reg2Mem.cpp
@@ -1,9 +1,8 @@
//===- Reg2Mem.cpp - Convert registers to allocas -------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index 42d7ed5bc534..c358258d24cf 100644
--- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -1,9 +1,8 @@
//===- RewriteStatepointsForGC.cpp - Make GC relocations explicit ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -26,18 +25,17 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/iterator_range.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/DomTreeUpdater.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
@@ -286,9 +284,9 @@ struct PartiallyConstructedSafepointRecord {
} // end anonymous namespace
-static ArrayRef<Use> GetDeoptBundleOperands(ImmutableCallSite CS) {
+static ArrayRef<Use> GetDeoptBundleOperands(const CallBase *Call) {
Optional<OperandBundleUse> DeoptBundle =
- CS.getOperandBundle(LLVMContext::OB_deopt);
+ Call->getOperandBundle(LLVMContext::OB_deopt);
if (!DeoptBundle.hasValue()) {
assert(AllowStatepointWithNoDeoptInfo &&
@@ -370,14 +368,11 @@ static std::string suffixed_name_or(Value *V, StringRef Suffix,
// given instruction. The analysis is performed immediately before the
// given instruction. Values defined by that instruction are not considered
// live. Values used by that instruction are considered live.
-static void
-analyzeParsePointLiveness(DominatorTree &DT,
- GCPtrLivenessData &OriginalLivenessData, CallSite CS,
- PartiallyConstructedSafepointRecord &Result) {
- Instruction *Inst = CS.getInstruction();
-
+static void analyzeParsePointLiveness(
+ DominatorTree &DT, GCPtrLivenessData &OriginalLivenessData, CallBase *Call,
+ PartiallyConstructedSafepointRecord &Result) {
StatepointLiveSetTy LiveSet;
- findLiveSetAtInst(Inst, OriginalLivenessData, LiveSet);
+ findLiveSetAtInst(Call, OriginalLivenessData, LiveSet);
if (PrintLiveSet) {
dbgs() << "Live Variables:\n";
@@ -385,7 +380,7 @@ analyzeParsePointLiveness(DominatorTree &DT,
dbgs() << " " << V->getName() << " " << *V << "\n";
}
if (PrintLiveSetSize) {
- dbgs() << "Safepoint For: " << CS.getCalledValue()->getName() << "\n";
+ dbgs() << "Safepoint For: " << Call->getCalledValue()->getName() << "\n";
dbgs() << "Number live values: " << LiveSet.size() << "\n";
}
Result.LiveSet = LiveSet;
@@ -1178,7 +1173,7 @@ findBasePointers(const StatepointLiveSetTy &live,
/// Find the required based pointers (and adjust the live set) for the given
/// parse point.
static void findBasePointers(DominatorTree &DT, DefiningValueMapTy &DVCache,
- CallSite CS,
+ CallBase *Call,
PartiallyConstructedSafepointRecord &result) {
MapVector<Value *, Value *> PointerToBase;
findBasePointers(result.LiveSet, PointerToBase, &DT, DVCache);
@@ -1200,11 +1195,11 @@ static void findBasePointers(DominatorTree &DT, DefiningValueMapTy &DVCache,
/// Given an updated version of the dataflow liveness results, update the
/// liveset and base pointer maps for the call site CS.
static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData,
- CallSite CS,
+ CallBase *Call,
PartiallyConstructedSafepointRecord &result);
static void recomputeLiveInValues(
- Function &F, DominatorTree &DT, ArrayRef<CallSite> toUpdate,
+ Function &F, DominatorTree &DT, ArrayRef<CallBase *> toUpdate,
MutableArrayRef<struct PartiallyConstructedSafepointRecord> records) {
// TODO-PERF: reuse the original liveness, then simply run the dataflow
// again. The old values are still live and will help it stabilize quickly.
@@ -1307,7 +1302,7 @@ static void CreateGCRelocates(ArrayRef<Value *> LiveVariables,
// Lazily populated map from input types to the canonicalized form mentioned
// in the comment above. This should probably be cached somewhere more
// broadly.
- DenseMap<Type*, Value*> TypeToDeclMap;
+ DenseMap<Type *, Function *> TypeToDeclMap;
for (unsigned i = 0; i < LiveVariables.size(); i++) {
// Generate the gc.relocate call and save the result
@@ -1318,7 +1313,7 @@ static void CreateGCRelocates(ArrayRef<Value *> LiveVariables,
Type *Ty = LiveVariables[i]->getType();
if (!TypeToDeclMap.count(Ty))
TypeToDeclMap[Ty] = getGCRelocateDecl(Ty);
- Value *GCRelocateDecl = TypeToDeclMap[Ty];
+ Function *GCRelocateDecl = TypeToDeclMap[Ty];
// only specify a debug name if we can give a useful one
CallInst *Reloc = Builder.CreateCall(
@@ -1399,16 +1394,16 @@ public:
} // end anonymous namespace
-static StringRef getDeoptLowering(CallSite CS) {
+static StringRef getDeoptLowering(CallBase *Call) {
const char *DeoptLowering = "deopt-lowering";
- if (CS.hasFnAttr(DeoptLowering)) {
- // FIXME: CallSite has a *really* confusing interface around attributes
+ if (Call->hasFnAttr(DeoptLowering)) {
+ // FIXME: Calls have a *really* confusing interface around attributes
// with values.
- const AttributeList &CSAS = CS.getAttributes();
+ const AttributeList &CSAS = Call->getAttributes();
if (CSAS.hasAttribute(AttributeList::FunctionIndex, DeoptLowering))
return CSAS.getAttribute(AttributeList::FunctionIndex, DeoptLowering)
.getValueAsString();
- Function *F = CS.getCalledFunction();
+ Function *F = Call->getCalledFunction();
assert(F && F->hasFnAttribute(DeoptLowering));
return F->getFnAttribute(DeoptLowering).getValueAsString();
}
@@ -1416,7 +1411,7 @@ static StringRef getDeoptLowering(CallSite CS) {
}
static void
-makeStatepointExplicitImpl(const CallSite CS, /* to replace */
+makeStatepointExplicitImpl(CallBase *Call, /* to replace */
const SmallVectorImpl<Value *> &BasePtrs,
const SmallVectorImpl<Value *> &LiveVariables,
PartiallyConstructedSafepointRecord &Result,
@@ -1427,19 +1422,18 @@ makeStatepointExplicitImpl(const CallSite CS, /* to replace */
// immediately before the previous instruction under the assumption that all
// arguments will be available here. We can't insert afterwards since we may
// be replacing a terminator.
- Instruction *InsertBefore = CS.getInstruction();
- IRBuilder<> Builder(InsertBefore);
+ IRBuilder<> Builder(Call);
ArrayRef<Value *> GCArgs(LiveVariables);
uint64_t StatepointID = StatepointDirectives::DefaultStatepointID;
uint32_t NumPatchBytes = 0;
uint32_t Flags = uint32_t(StatepointFlags::None);
- ArrayRef<Use> CallArgs(CS.arg_begin(), CS.arg_end());
- ArrayRef<Use> DeoptArgs = GetDeoptBundleOperands(CS);
+ ArrayRef<Use> CallArgs(Call->arg_begin(), Call->arg_end());
+ ArrayRef<Use> DeoptArgs = GetDeoptBundleOperands(Call);
ArrayRef<Use> TransitionArgs;
if (auto TransitionBundle =
- CS.getOperandBundle(LLVMContext::OB_gc_transition)) {
+ Call->getOperandBundle(LLVMContext::OB_gc_transition)) {
Flags |= uint32_t(StatepointFlags::GCTransition);
TransitionArgs = TransitionBundle->Inputs;
}
@@ -1450,21 +1444,21 @@ makeStatepointExplicitImpl(const CallSite CS, /* to replace */
bool IsDeoptimize = false;
StatepointDirectives SD =
- parseStatepointDirectivesFromAttrs(CS.getAttributes());
+ parseStatepointDirectivesFromAttrs(Call->getAttributes());
if (SD.NumPatchBytes)
NumPatchBytes = *SD.NumPatchBytes;
if (SD.StatepointID)
StatepointID = *SD.StatepointID;
// Pass through the requested lowering if any. The default is live-through.
- StringRef DeoptLowering = getDeoptLowering(CS);
+ StringRef DeoptLowering = getDeoptLowering(Call);
if (DeoptLowering.equals("live-in"))
Flags |= uint32_t(StatepointFlags::DeoptLiveIn);
else {
assert(DeoptLowering.equals("live-through") && "Unsupported value!");
}
- Value *CallTarget = CS.getCalledValue();
+ Value *CallTarget = Call->getCalledValue();
if (Function *F = dyn_cast<Function>(CallTarget)) {
if (F->getIntrinsicID() == Intrinsic::experimental_deoptimize) {
// Calls to llvm.experimental.deoptimize are lowered to calls to the
@@ -1481,8 +1475,9 @@ makeStatepointExplicitImpl(const CallSite CS, /* to replace */
// calls to @llvm.experimental.deoptimize with different argument types in
// the same module. This is fine -- we assume the frontend knew what it
// was doing when generating this kind of IR.
- CallTarget =
- F->getParent()->getOrInsertFunction("__llvm_deoptimize", FTy);
+ CallTarget = F->getParent()
+ ->getOrInsertFunction("__llvm_deoptimize", FTy)
+ .getCallee();
IsDeoptimize = true;
}
@@ -1490,57 +1485,56 @@ makeStatepointExplicitImpl(const CallSite CS, /* to replace */
// Create the statepoint given all the arguments
Instruction *Token = nullptr;
- if (CS.isCall()) {
- CallInst *ToReplace = cast<CallInst>(CS.getInstruction());
- CallInst *Call = Builder.CreateGCStatepointCall(
+ if (auto *CI = dyn_cast<CallInst>(Call)) {
+ CallInst *SPCall = Builder.CreateGCStatepointCall(
StatepointID, NumPatchBytes, CallTarget, Flags, CallArgs,
TransitionArgs, DeoptArgs, GCArgs, "safepoint_token");
- Call->setTailCallKind(ToReplace->getTailCallKind());
- Call->setCallingConv(ToReplace->getCallingConv());
+ SPCall->setTailCallKind(CI->getTailCallKind());
+ SPCall->setCallingConv(CI->getCallingConv());
// Currently we will fail on parameter attributes and on certain
// function attributes. In case if we can handle this set of attributes -
// set up function attrs directly on statepoint and return attrs later for
// gc_result intrinsic.
- Call->setAttributes(legalizeCallAttributes(ToReplace->getAttributes()));
+ SPCall->setAttributes(legalizeCallAttributes(CI->getAttributes()));
- Token = Call;
+ Token = SPCall;
// Put the following gc_result and gc_relocate calls immediately after the
// the old call (which we're about to delete)
- assert(ToReplace->getNextNode() && "Not a terminator, must have next!");
- Builder.SetInsertPoint(ToReplace->getNextNode());
- Builder.SetCurrentDebugLocation(ToReplace->getNextNode()->getDebugLoc());
+ assert(CI->getNextNode() && "Not a terminator, must have next!");
+ Builder.SetInsertPoint(CI->getNextNode());
+ Builder.SetCurrentDebugLocation(CI->getNextNode()->getDebugLoc());
} else {
- InvokeInst *ToReplace = cast<InvokeInst>(CS.getInstruction());
+ auto *II = cast<InvokeInst>(Call);
// Insert the new invoke into the old block. We'll remove the old one in a
// moment at which point this will become the new terminator for the
// original block.
- InvokeInst *Invoke = Builder.CreateGCStatepointInvoke(
- StatepointID, NumPatchBytes, CallTarget, ToReplace->getNormalDest(),
- ToReplace->getUnwindDest(), Flags, CallArgs, TransitionArgs, DeoptArgs,
- GCArgs, "statepoint_token");
+ InvokeInst *SPInvoke = Builder.CreateGCStatepointInvoke(
+ StatepointID, NumPatchBytes, CallTarget, II->getNormalDest(),
+ II->getUnwindDest(), Flags, CallArgs, TransitionArgs, DeoptArgs, GCArgs,
+ "statepoint_token");
- Invoke->setCallingConv(ToReplace->getCallingConv());
+ SPInvoke->setCallingConv(II->getCallingConv());
// Currently we will fail on parameter attributes and on certain
// function attributes. In case if we can handle this set of attributes -
// set up function attrs directly on statepoint and return attrs later for
// gc_result intrinsic.
- Invoke->setAttributes(legalizeCallAttributes(ToReplace->getAttributes()));
+ SPInvoke->setAttributes(legalizeCallAttributes(II->getAttributes()));
- Token = Invoke;
+ Token = SPInvoke;
// Generate gc relocates in exceptional path
- BasicBlock *UnwindBlock = ToReplace->getUnwindDest();
+ BasicBlock *UnwindBlock = II->getUnwindDest();
assert(!isa<PHINode>(UnwindBlock->begin()) &&
UnwindBlock->getUniquePredecessor() &&
"can't safely insert in this block!");
Builder.SetInsertPoint(&*UnwindBlock->getFirstInsertionPt());
- Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc());
+ Builder.SetCurrentDebugLocation(II->getDebugLoc());
// Attach exceptional gc relocates to the landingpad.
Instruction *ExceptionalToken = UnwindBlock->getLandingPadInst();
@@ -1551,7 +1545,7 @@ makeStatepointExplicitImpl(const CallSite CS, /* to replace */
Builder);
// Generate gc relocates and returns for normal block
- BasicBlock *NormalDest = ToReplace->getNormalDest();
+ BasicBlock *NormalDest = II->getNormalDest();
assert(!isa<PHINode>(NormalDest->begin()) &&
NormalDest->getUniquePredecessor() &&
"can't safely insert in this block!");
@@ -1568,16 +1562,15 @@ makeStatepointExplicitImpl(const CallSite CS, /* to replace */
// transform the tail-call like structure to a call to a void function
// followed by unreachable to get better codegen.
Replacements.push_back(
- DeferredReplacement::createDeoptimizeReplacement(CS.getInstruction()));
+ DeferredReplacement::createDeoptimizeReplacement(Call));
} else {
Token->setName("statepoint_token");
- if (!CS.getType()->isVoidTy() && !CS.getInstruction()->use_empty()) {
- StringRef Name =
- CS.getInstruction()->hasName() ? CS.getInstruction()->getName() : "";
- CallInst *GCResult = Builder.CreateGCResult(Token, CS.getType(), Name);
+ if (!Call->getType()->isVoidTy() && !Call->use_empty()) {
+ StringRef Name = Call->hasName() ? Call->getName() : "";
+ CallInst *GCResult = Builder.CreateGCResult(Token, Call->getType(), Name);
GCResult->setAttributes(
AttributeList::get(GCResult->getContext(), AttributeList::ReturnIndex,
- CS.getAttributes().getRetAttributes()));
+ Call->getAttributes().getRetAttributes()));
// We cannot RAUW or delete CS.getInstruction() because it could be in the
// live set of some other safepoint, in which case that safepoint's
@@ -1586,10 +1579,9 @@ makeStatepointExplicitImpl(const CallSite CS, /* to replace */
// after the live sets have been made explicit in the IR, and we no longer
// have raw pointers to worry about.
Replacements.emplace_back(
- DeferredReplacement::createRAUW(CS.getInstruction(), GCResult));
+ DeferredReplacement::createRAUW(Call, GCResult));
} else {
- Replacements.emplace_back(
- DeferredReplacement::createDelete(CS.getInstruction()));
+ Replacements.emplace_back(DeferredReplacement::createDelete(Call));
}
}
@@ -1606,7 +1598,7 @@ makeStatepointExplicitImpl(const CallSite CS, /* to replace */
// WARNING: Does not do any fixup to adjust users of the original live
// values. That's the callers responsibility.
static void
-makeStatepointExplicit(DominatorTree &DT, CallSite CS,
+makeStatepointExplicit(DominatorTree &DT, CallBase *Call,
PartiallyConstructedSafepointRecord &Result,
std::vector<DeferredReplacement> &Replacements) {
const auto &LiveSet = Result.LiveSet;
@@ -1625,7 +1617,7 @@ makeStatepointExplicit(DominatorTree &DT, CallSite CS,
assert(LiveVec.size() == BaseVec.size());
// Do the actual rewriting and delete the old statepoint
- makeStatepointExplicitImpl(CS, BaseVec, LiveVec, Result, Replacements);
+ makeStatepointExplicitImpl(Call, BaseVec, LiveVec, Result, Replacements);
}
// Helper function for the relocationViaAlloca.
@@ -1636,7 +1628,7 @@ makeStatepointExplicit(DominatorTree &DT, CallSite CS,
// for sanity checking.
static void
insertRelocationStores(iterator_range<Value::user_iterator> GCRelocs,
- DenseMap<Value *, Value *> &AllocaMap,
+ DenseMap<Value *, AllocaInst *> &AllocaMap,
DenseSet<Value *> &VisitedLiveValues) {
for (User *U : GCRelocs) {
GCRelocateInst *Relocate = dyn_cast<GCRelocateInst>(U);
@@ -1671,7 +1663,7 @@ insertRelocationStores(iterator_range<Value::user_iterator> GCRelocs,
// "insertRelocationStores" but works for rematerialized values.
static void insertRematerializationStores(
const RematerializedValueMapTy &RematerializedValues,
- DenseMap<Value *, Value *> &AllocaMap,
+ DenseMap<Value *, AllocaInst *> &AllocaMap,
DenseSet<Value *> &VisitedLiveValues) {
for (auto RematerializedValuePair: RematerializedValues) {
Instruction *RematerializedValue = RematerializedValuePair.first;
@@ -1704,7 +1696,7 @@ static void relocationViaAlloca(
#endif
// TODO-PERF: change data structures, reserve
- DenseMap<Value *, Value *> AllocaMap;
+ DenseMap<Value *, AllocaInst *> AllocaMap;
SmallVector<AllocaInst *, 200> PromotableAllocas;
// Used later to chack that we have enough allocas to store all values
std::size_t NumRematerializedValues = 0;
@@ -1774,7 +1766,7 @@ static void relocationViaAlloca(
SmallVector<AllocaInst *, 64> ToClobber;
for (auto Pair : AllocaMap) {
Value *Def = Pair.first;
- AllocaInst *Alloca = cast<AllocaInst>(Pair.second);
+ AllocaInst *Alloca = Pair.second;
// This value was relocated
if (VisitedLiveValues.count(Def)) {
@@ -1806,7 +1798,7 @@ static void relocationViaAlloca(
// Update use with load allocas and add store for gc_relocated.
for (auto Pair : AllocaMap) {
Value *Def = Pair.first;
- Value *Alloca = Pair.second;
+ AllocaInst *Alloca = Pair.second;
// We pre-record the uses of allocas so that we dont have to worry about
// later update that changes the user information..
@@ -1834,13 +1826,15 @@ static void relocationViaAlloca(
PHINode *Phi = cast<PHINode>(Use);
for (unsigned i = 0; i < Phi->getNumIncomingValues(); i++) {
if (Def == Phi->getIncomingValue(i)) {
- LoadInst *Load = new LoadInst(
- Alloca, "", Phi->getIncomingBlock(i)->getTerminator());
+ LoadInst *Load =
+ new LoadInst(Alloca->getAllocatedType(), Alloca, "",
+ Phi->getIncomingBlock(i)->getTerminator());
Phi->setIncomingValue(i, Load);
}
}
} else {
- LoadInst *Load = new LoadInst(Alloca, "", Use);
+ LoadInst *Load =
+ new LoadInst(Alloca->getAllocatedType(), Alloca, "", Use);
Use->replaceUsesOfWith(Def, Load);
}
}
@@ -1893,25 +1887,25 @@ template <typename T> static void unique_unsorted(SmallVectorImpl<T> &Vec) {
/// Insert holders so that each Value is obviously live through the entire
/// lifetime of the call.
-static void insertUseHolderAfter(CallSite &CS, const ArrayRef<Value *> Values,
+static void insertUseHolderAfter(CallBase *Call, const ArrayRef<Value *> Values,
SmallVectorImpl<CallInst *> &Holders) {
if (Values.empty())
// No values to hold live, might as well not insert the empty holder
return;
- Module *M = CS.getInstruction()->getModule();
+ Module *M = Call->getModule();
// Use a dummy vararg function to actually hold the values live
- Function *Func = cast<Function>(M->getOrInsertFunction(
- "__tmp_use", FunctionType::get(Type::getVoidTy(M->getContext()), true)));
- if (CS.isCall()) {
+ FunctionCallee Func = M->getOrInsertFunction(
+ "__tmp_use", FunctionType::get(Type::getVoidTy(M->getContext()), true));
+ if (isa<CallInst>(Call)) {
// For call safepoints insert dummy calls right after safepoint
- Holders.push_back(CallInst::Create(Func, Values, "",
- &*++CS.getInstruction()->getIterator()));
+ Holders.push_back(
+ CallInst::Create(Func, Values, "", &*++Call->getIterator()));
return;
}
// For invoke safepooints insert dummy calls both in normal and
// exceptional destination blocks
- auto *II = cast<InvokeInst>(CS.getInstruction());
+ auto *II = cast<InvokeInst>(Call);
Holders.push_back(CallInst::Create(
Func, Values, "", &*II->getNormalDest()->getFirstInsertionPt()));
Holders.push_back(CallInst::Create(
@@ -1919,7 +1913,7 @@ static void insertUseHolderAfter(CallSite &CS, const ArrayRef<Value *> Values,
}
static void findLiveReferences(
- Function &F, DominatorTree &DT, ArrayRef<CallSite> toUpdate,
+ Function &F, DominatorTree &DT, ArrayRef<CallBase *> toUpdate,
MutableArrayRef<struct PartiallyConstructedSafepointRecord> records) {
GCPtrLivenessData OriginalLivenessData;
computeLiveInValues(DT, F, OriginalLivenessData);
@@ -2022,7 +2016,7 @@ static bool AreEquivalentPhiNodes(PHINode &OrigRootPhi, PHINode &AlternateRootPh
// to relocate. Remove this values from the live set, rematerialize them after
// statepoint and record them in "Info" structure. Note that similar to
// relocated values we don't do any user adjustments here.
-static void rematerializeLiveValues(CallSite CS,
+static void rematerializeLiveValues(CallBase *Call,
PartiallyConstructedSafepointRecord &Info,
TargetTransformInfo &TTI) {
const unsigned int ChainLengthThreshold = 10;
@@ -2076,7 +2070,7 @@ static void rematerializeLiveValues(CallSite CS,
// For invokes we need to rematerialize each chain twice - for normal and
// for unwind basic blocks. Model this by multiplying cost by two.
- if (CS.isInvoke()) {
+ if (isa<InvokeInst>(Call)) {
Cost *= 2;
}
// If it's too expensive - skip it
@@ -2144,14 +2138,14 @@ static void rematerializeLiveValues(CallSite CS,
// Different cases for calls and invokes. For invokes we need to clone
// instructions both on normal and unwind path.
- if (CS.isCall()) {
- Instruction *InsertBefore = CS.getInstruction()->getNextNode();
+ if (isa<CallInst>(Call)) {
+ Instruction *InsertBefore = Call->getNextNode();
assert(InsertBefore);
Instruction *RematerializedValue = rematerializeChain(
InsertBefore, RootOfChain, Info.PointerToBase[LiveValue]);
Info.RematerializedValues[RematerializedValue] = LiveValue;
} else {
- InvokeInst *Invoke = cast<InvokeInst>(CS.getInstruction());
+ auto *Invoke = cast<InvokeInst>(Call);
Instruction *NormalInsertBefore =
&*Invoke->getNormalDest()->getFirstInsertionPt();
@@ -2176,25 +2170,25 @@ static void rematerializeLiveValues(CallSite CS,
static bool insertParsePoints(Function &F, DominatorTree &DT,
TargetTransformInfo &TTI,
- SmallVectorImpl<CallSite> &ToUpdate) {
+ SmallVectorImpl<CallBase *> &ToUpdate) {
#ifndef NDEBUG
// sanity check the input
- std::set<CallSite> Uniqued;
+ std::set<CallBase *> Uniqued;
Uniqued.insert(ToUpdate.begin(), ToUpdate.end());
assert(Uniqued.size() == ToUpdate.size() && "no duplicates please!");
- for (CallSite CS : ToUpdate)
- assert(CS.getInstruction()->getFunction() == &F);
+ for (CallBase *Call : ToUpdate)
+ assert(Call->getFunction() == &F);
#endif
// When inserting gc.relocates for invokes, we need to be able to insert at
// the top of the successor blocks. See the comment on
// normalForInvokeSafepoint on exactly what is needed. Note that this step
// may restructure the CFG.
- for (CallSite CS : ToUpdate) {
- if (!CS.isInvoke())
+ for (CallBase *Call : ToUpdate) {
+ auto *II = dyn_cast<InvokeInst>(Call);
+ if (!II)
continue;
- auto *II = cast<InvokeInst>(CS.getInstruction());
normalizeForInvokeSafepoint(II->getNormalDest(), II->getParent(), DT);
normalizeForInvokeSafepoint(II->getUnwindDest(), II->getParent(), DT);
}
@@ -2207,17 +2201,17 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
// actual safepoint insertion as arguments. This ensures reference operands
// in the deopt argument list are considered live through the safepoint (and
// thus makes sure they get relocated.)
- for (CallSite CS : ToUpdate) {
+ for (CallBase *Call : ToUpdate) {
SmallVector<Value *, 64> DeoptValues;
- for (Value *Arg : GetDeoptBundleOperands(CS)) {
+ for (Value *Arg : GetDeoptBundleOperands(Call)) {
assert(!isUnhandledGCPointerType(Arg->getType()) &&
"support for FCA unimplemented");
if (isHandledGCPointerType(Arg->getType()))
DeoptValues.push_back(Arg);
}
- insertUseHolderAfter(CS, DeoptValues, Holders);
+ insertUseHolderAfter(Call, DeoptValues, Holders);
}
SmallVector<PartiallyConstructedSafepointRecord, 64> Records(ToUpdate.size());
@@ -2319,7 +2313,7 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
for (size_t i = 0; i < Records.size(); i++)
makeStatepointExplicit(DT, ToUpdate[i], Records[i], Replacements);
- ToUpdate.clear(); // prevent accident use of invalid CallSites
+ ToUpdate.clear(); // prevent accident use of invalid calls.
for (auto &PR : Replacements)
PR.doReplacement();
@@ -2384,7 +2378,7 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
return !Records.empty();
}
-// Handles both return values and arguments for Functions and CallSites.
+// Handles both return values and arguments for Functions and calls.
template <typename AttrHolder>
static void RemoveNonValidAttrAtIndex(LLVMContext &Ctx, AttrHolder &AH,
unsigned Index) {
@@ -2476,12 +2470,13 @@ static void stripNonValidDataFromBody(Function &F) {
stripInvalidMetadataFromInstruction(I);
- if (CallSite CS = CallSite(&I)) {
- for (int i = 0, e = CS.arg_size(); i != e; i++)
- if (isa<PointerType>(CS.getArgument(i)->getType()))
- RemoveNonValidAttrAtIndex(Ctx, CS, i + AttributeList::FirstArgIndex);
- if (isa<PointerType>(CS.getType()))
- RemoveNonValidAttrAtIndex(Ctx, CS, AttributeList::ReturnIndex);
+ if (auto *Call = dyn_cast<CallBase>(&I)) {
+ for (int i = 0, e = Call->arg_size(); i != e; i++)
+ if (isa<PointerType>(Call->getArgOperand(i)->getType()))
+ RemoveNonValidAttrAtIndex(Ctx, *Call,
+ i + AttributeList::FirstArgIndex);
+ if (isa<PointerType>(Call->getType()))
+ RemoveNonValidAttrAtIndex(Ctx, *Call, AttributeList::ReturnIndex);
}
}
@@ -2526,12 +2521,11 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F, DominatorTree &DT,
assert(shouldRewriteStatepointsIn(F) && "mismatch in rewrite decision");
auto NeedsRewrite = [&TLI](Instruction &I) {
- if (ImmutableCallSite CS = ImmutableCallSite(&I))
- return !callsGCLeafFunction(CS, TLI) && !isStatepoint(CS);
+ if (const auto *Call = dyn_cast<CallBase>(&I))
+ return !callsGCLeafFunction(Call, TLI) && !isStatepoint(Call);
return false;
};
-
// Delete any unreachable statepoints so that we don't have unrewritten
// statepoints surviving this pass. This makes testing easier and the
// resulting IR less confusing to human readers.
@@ -2543,7 +2537,7 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F, DominatorTree &DT,
// Gather all the statepoints which need rewritten. Be careful to only
// consider those in reachable code since we need to ask dominance queries
// when rewriting. We'll delete the unreachable ones in a moment.
- SmallVector<CallSite, 64> ParsePointNeeded;
+ SmallVector<CallBase *, 64> ParsePointNeeded;
for (Instruction &I : instructions(F)) {
// TODO: only the ones with the flag set!
if (NeedsRewrite(I)) {
@@ -2553,7 +2547,7 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F, DominatorTree &DT,
// isReachableFromEntry() returns true.
assert(DT.isReachableFromEntry(I.getParent()) &&
"no unreachable blocks expected");
- ParsePointNeeded.push_back(CallSite(&I));
+ ParsePointNeeded.push_back(cast<CallBase>(&I));
}
}
@@ -2602,6 +2596,33 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F, DominatorTree &DT,
}
}
+ // Nasty workaround - The base computation code in the main algorithm doesn't
+ // consider the fact that a GEP can be used to convert a scalar to a vector.
+ // The right fix for this is to integrate GEPs into the base rewriting
+ // algorithm properly, this is just a short term workaround to prevent
+ // crashes by canonicalizing such GEPs into fully vector GEPs.
+ for (Instruction &I : instructions(F)) {
+ if (!isa<GetElementPtrInst>(I))
+ continue;
+
+ unsigned VF = 0;
+ for (unsigned i = 0; i < I.getNumOperands(); i++)
+ if (I.getOperand(i)->getType()->isVectorTy()) {
+ assert(VF == 0 ||
+ VF == I.getOperand(i)->getType()->getVectorNumElements());
+ VF = I.getOperand(i)->getType()->getVectorNumElements();
+ }
+
+ // It's the vector to scalar traversal through the pointer operand which
+ // confuses base pointer rewriting, so limit ourselves to that case.
+ if (!I.getOperand(0)->getType()->isVectorTy() && VF != 0) {
+ IRBuilder<> B(&I);
+ auto *Splat = B.CreateVectorSplat(VF, I.getOperand(0));
+ I.setOperand(0, Splat);
+ MadeChange = true;
+ }
+ }
+
MadeChange |= insertParsePoints(F, DT, TTI, ParsePointNeeded);
return MadeChange;
}
@@ -2786,11 +2807,10 @@ static void findLiveSetAtInst(Instruction *Inst, GCPtrLivenessData &Data,
}
static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData,
- CallSite CS,
+ CallBase *Call,
PartiallyConstructedSafepointRecord &Info) {
- Instruction *Inst = CS.getInstruction();
StatepointLiveSetTy Updated;
- findLiveSetAtInst(Inst, RevisedLivenessData, Updated);
+ findLiveSetAtInst(Call, RevisedLivenessData, Updated);
// We may have base pointers which are now live that weren't before. We need
// to update the PointerToBase structure to reflect this.
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index 2f6ed05c023b..4093e50ce899 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -1,9 +1,8 @@
//===- SCCP.cpp - Sparse Conditional Constant Propagation -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -21,6 +20,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -210,11 +210,11 @@ class SCCPSolver : public InstVisitor<SCCPSolver> {
/// TrackedRetVals - If we are tracking arguments into and the return
/// value out of a function, it will have an entry in this map, indicating
/// what the known return value for the function is.
- DenseMap<Function *, LatticeVal> TrackedRetVals;
+ MapVector<Function *, LatticeVal> TrackedRetVals;
/// TrackedMultipleRetVals - Same as TrackedRetVals, but used for functions
/// that return multiple values.
- DenseMap<std::pair<Function *, unsigned>, LatticeVal> TrackedMultipleRetVals;
+ MapVector<std::pair<Function *, unsigned>, LatticeVal> TrackedMultipleRetVals;
/// MRVFunctionsTracked - Each function in TrackedMultipleRetVals is
/// represented here for efficient lookup.
@@ -372,7 +372,7 @@ public:
}
/// getTrackedRetVals - Get the inferred return value map.
- const DenseMap<Function*, LatticeVal> &getTrackedRetVals() {
+ const MapVector<Function*, LatticeVal> &getTrackedRetVals() {
return TrackedRetVals;
}
@@ -614,6 +614,7 @@ private:
void visitCastInst(CastInst &I);
void visitSelectInst(SelectInst &I);
+ void visitUnaryOperator(Instruction &I);
void visitBinaryOperator(Instruction &I);
void visitCmpInst(CmpInst &I);
void visitExtractValueInst(ExtractValueInst &EVI);
@@ -639,6 +640,11 @@ private:
visitTerminator(II);
}
+ void visitCallBrInst (CallBrInst &CBI) {
+ visitCallSite(&CBI);
+ visitTerminator(CBI);
+ }
+
void visitCallSite (CallSite CS);
void visitResumeInst (ResumeInst &I) { /*returns void*/ }
void visitUnreachableInst(UnreachableInst &I) { /*returns void*/ }
@@ -734,6 +740,13 @@ void SCCPSolver::getFeasibleSuccessors(Instruction &TI,
return;
}
+ // In case of callbr, we pessimistically assume that all successors are
+ // feasible.
+ if (isa<CallBrInst>(&TI)) {
+ Succs.assign(TI.getNumSuccessors(), true);
+ return;
+ }
+
LLVM_DEBUG(dbgs() << "Unknown terminator instruction: " << TI << '\n');
llvm_unreachable("SCCP: Don't know how to handle this terminator!");
}
@@ -825,7 +838,7 @@ void SCCPSolver::visitReturnInst(ReturnInst &I) {
// If we are tracking the return value of this function, merge it in.
if (!TrackedRetVals.empty() && !ResultOp->getType()->isStructTy()) {
- DenseMap<Function*, LatticeVal>::iterator TFRVI =
+ MapVector<Function*, LatticeVal>::iterator TFRVI =
TrackedRetVals.find(F);
if (TFRVI != TrackedRetVals.end()) {
mergeInValue(TFRVI->second, F, getValueState(ResultOp));
@@ -958,6 +971,29 @@ void SCCPSolver::visitSelectInst(SelectInst &I) {
markOverdefined(&I);
}
+// Handle Unary Operators.
+void SCCPSolver::visitUnaryOperator(Instruction &I) {
+ LatticeVal V0State = getValueState(I.getOperand(0));
+
+ LatticeVal &IV = ValueState[&I];
+ if (IV.isOverdefined()) return;
+
+ if (V0State.isConstant()) {
+ Constant *C = ConstantExpr::get(I.getOpcode(), V0State.getConstant());
+
+ // op Y -> undef.
+ if (isa<UndefValue>(C))
+ return;
+ return (void)markConstant(IV, &I, C);
+ }
+
+ // If something is undef, wait for it to resolve.
+ if (!V0State.isOverdefined())
+ return;
+
+ markOverdefined(&I);
+}
+
// Handle Binary Operators.
void SCCPSolver::visitBinaryOperator(Instruction &I) {
LatticeVal V1State = getValueState(I.getOperand(0));
@@ -1232,7 +1268,7 @@ CallOverdefined:
// Otherwise, if we have a single return value case, and if the function is
// a declaration, maybe we can constant fold it.
if (F && F->isDeclaration() && !I->getType()->isStructTy() &&
- canConstantFoldCallTo(CS, F)) {
+ canConstantFoldCallTo(cast<CallBase>(CS.getInstruction()), F)) {
SmallVector<Constant*, 8> Operands;
for (CallSite::arg_iterator AI = CS.arg_begin(), E = CS.arg_end();
AI != E; ++AI) {
@@ -1253,7 +1289,8 @@ CallOverdefined:
// If we can constant fold this, mark the result of the call as a
// constant.
- if (Constant *C = ConstantFoldCall(CS, F, Operands, TLI)) {
+ if (Constant *C = ConstantFoldCall(cast<CallBase>(CS.getInstruction()), F,
+ Operands, TLI)) {
// call -> undef.
if (isa<UndefValue>(C))
return;
@@ -1315,7 +1352,7 @@ CallOverdefined:
mergeInValue(getStructValueState(I, i), I,
TrackedMultipleRetVals[std::make_pair(F, i)]);
} else {
- DenseMap<Function*, LatticeVal>::iterator TFRVI = TrackedRetVals.find(F);
+ MapVector<Function*, LatticeVal>::iterator TFRVI = TrackedRetVals.find(F);
if (TFRVI == TrackedRetVals.end())
goto CallOverdefined; // Not tracking this callee.
@@ -1472,6 +1509,8 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
else
markOverdefined(&I);
return true;
+ case Instruction::FNeg:
+ break; // fneg undef -> undef
case Instruction::ZExt:
case Instruction::SExt:
case Instruction::FPToUI:
@@ -1598,6 +1637,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
return true;
case Instruction::Call:
case Instruction::Invoke:
+ case Instruction::CallBr:
// There are two reasons a call can have an undef result
// 1. It could be tracked.
// 2. It could be constant-foldable.
@@ -2070,12 +2110,22 @@ bool llvm::runIPSCCP(
// If we have forced an edge for an indeterminate value, then force the
// terminator to fold to that edge.
forceIndeterminateEdge(I, Solver);
- bool Folded = ConstantFoldTerminator(I->getParent(),
+ BasicBlock *InstBB = I->getParent();
+ bool Folded = ConstantFoldTerminator(InstBB,
/*DeleteDeadConditions=*/false,
/*TLI=*/nullptr, &DTU);
assert(Folded &&
"Expect TermInst on constantint or blockaddress to be folded");
(void) Folded;
+ // If we folded the terminator to an unconditional branch to another
+ // dead block, replace it with Unreachable, to avoid trying to fold that
+ // branch again.
+ BranchInst *BI = cast<BranchInst>(InstBB->getTerminator());
+ if (BI && BI->isUnconditional() &&
+ !Solver.isBlockExecutable(BI->getSuccessor(0))) {
+ InstBB->getTerminator()->eraseFromParent();
+ new UnreachableInst(InstBB->getContext(), InstBB);
+ }
}
// Mark dead BB for deletion.
DTU.deleteBB(DeadBB);
@@ -2109,7 +2159,7 @@ bool llvm::runIPSCCP(
// whether other functions are optimizable.
SmallVector<ReturnInst*, 8> ReturnsToZap;
- const DenseMap<Function*, LatticeVal> &RV = Solver.getTrackedRetVals();
+ const MapVector<Function*, LatticeVal> &RV = Solver.getTrackedRetVals();
for (const auto &I : RV) {
Function *F = I.first;
if (I.second.isOverdefined() || F->getReturnType()->isVoidTy())
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index eab77cf4cda9..33f90d0b01e4 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -1,9 +1,8 @@
//===- SROA.cpp - Scalar Replacement Of Aggregates ------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -222,13 +221,6 @@ public:
} // end anonymous namespace
-namespace llvm {
-
-template <typename T> struct isPodLike;
-template <> struct isPodLike<Slice> { static const bool value = true; };
-
-} // end namespace llvm
-
/// Representation of the alloca slices.
///
/// This class represents the slices of an alloca which are formed by its
@@ -721,6 +713,13 @@ private:
return Base::visitBitCastInst(BC);
}
+ void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
+ if (ASC.use_empty())
+ return markAsDead(ASC);
+
+ return Base::visitAddrSpaceCastInst(ASC);
+ }
+
void visitGetElementPtrInst(GetElementPtrInst &GEPI) {
if (GEPI.use_empty())
return markAsDead(GEPI);
@@ -784,7 +783,10 @@ private:
if (!IsOffsetKnown)
return PI.setAborted(&LI);
- const DataLayout &DL = LI.getModule()->getDataLayout();
+ if (LI.isVolatile() &&
+ LI.getPointerAddressSpace() != DL.getAllocaAddrSpace())
+ return PI.setAborted(&LI);
+
uint64_t Size = DL.getTypeStoreSize(LI.getType());
return handleLoadOrStore(LI.getType(), LI, Offset, Size, LI.isVolatile());
}
@@ -796,7 +798,10 @@ private:
if (!IsOffsetKnown)
return PI.setAborted(&SI);
- const DataLayout &DL = SI.getModule()->getDataLayout();
+ if (SI.isVolatile() &&
+ SI.getPointerAddressSpace() != DL.getAllocaAddrSpace())
+ return PI.setAborted(&SI);
+
uint64_t Size = DL.getTypeStoreSize(ValOp->getType());
// If this memory access can be shown to *statically* extend outside the
@@ -831,6 +836,11 @@ private:
if (!IsOffsetKnown)
return PI.setAborted(&II);
+ // Don't replace this with a store with a different address space. TODO:
+ // Use a store with the casted new alloca?
+ if (II.isVolatile() && II.getDestAddressSpace() != DL.getAllocaAddrSpace())
+ return PI.setAborted(&II);
+
insertUse(II, Offset, Length ? Length->getLimitedValue()
: AllocSize - Offset.getLimitedValue(),
(bool)Length);
@@ -850,6 +860,13 @@ private:
if (!IsOffsetKnown)
return PI.setAborted(&II);
+ // Don't replace this with a load/store with a different address space.
+ // TODO: Use a store with the casted new alloca?
+ if (II.isVolatile() &&
+ (II.getDestAddressSpace() != DL.getAllocaAddrSpace() ||
+ II.getSourceAddressSpace() != DL.getAllocaAddrSpace()))
+ return PI.setAborted(&II);
+
// This side of the transfer is completely out-of-bounds, and so we can
// nuke the entire transfer. However, we also need to nuke the other side
// if already added to our partitions.
@@ -957,7 +974,7 @@ private:
if (!GEP->hasAllZeroIndices())
return GEP;
} else if (!isa<BitCastInst>(I) && !isa<PHINode>(I) &&
- !isa<SelectInst>(I)) {
+ !isa<SelectInst>(I) && !isa<AddrSpaceCastInst>(I)) {
return I;
}
@@ -1173,12 +1190,16 @@ static Type *findCommonType(AllocaSlices::const_iterator B,
/// FIXME: This should be hoisted into a generic utility, likely in
/// Transforms/Util/Local.h
static bool isSafePHIToSpeculate(PHINode &PN) {
+ const DataLayout &DL = PN.getModule()->getDataLayout();
+
// For now, we can only do this promotion if the load is in the same block
// as the PHI, and if there are no stores between the phi and load.
// TODO: Allow recursive phi users.
// TODO: Allow stores.
BasicBlock *BB = PN.getParent();
unsigned MaxAlign = 0;
+ uint64_t APWidth = DL.getIndexTypeSizeInBits(PN.getType());
+ APInt MaxSize(APWidth, 0);
bool HaveLoad = false;
for (User *U : PN.users()) {
LoadInst *LI = dyn_cast<LoadInst>(U);
@@ -1197,15 +1218,15 @@ static bool isSafePHIToSpeculate(PHINode &PN) {
if (BBI->mayWriteToMemory())
return false;
+ uint64_t Size = DL.getTypeStoreSizeInBits(LI->getType());
MaxAlign = std::max(MaxAlign, LI->getAlignment());
+ MaxSize = MaxSize.ult(Size) ? APInt(APWidth, Size) : MaxSize;
HaveLoad = true;
}
if (!HaveLoad)
return false;
- const DataLayout &DL = PN.getModule()->getDataLayout();
-
// We can only transform this if it is safe to push the loads into the
// predecessor blocks. The only thing to watch out for is that we can't put
// a possibly trapping load in the predecessor if it is a critical edge.
@@ -1227,7 +1248,7 @@ static bool isSafePHIToSpeculate(PHINode &PN) {
// If this pointer is always safe to load, or if we can prove that there
// is already a load in the block, then we can move the load to the pred
// block.
- if (isSafeToLoadUnconditionally(InVal, MaxAlign, DL, TI))
+ if (isSafeToLoadUnconditionally(InVal, MaxAlign, MaxSize, DL, TI))
continue;
return false;
@@ -1239,15 +1260,14 @@ static bool isSafePHIToSpeculate(PHINode &PN) {
static void speculatePHINodeLoads(PHINode &PN) {
LLVM_DEBUG(dbgs() << " original: " << PN << "\n");
- Type *LoadTy = cast<PointerType>(PN.getType())->getElementType();
+ LoadInst *SomeLoad = cast<LoadInst>(PN.user_back());
+ Type *LoadTy = SomeLoad->getType();
IRBuilderTy PHIBuilder(&PN);
PHINode *NewPN = PHIBuilder.CreatePHI(LoadTy, PN.getNumIncomingValues(),
PN.getName() + ".sroa.speculated");
// Get the AA tags and alignment to use from one of the loads. It doesn't
// matter which one we get and if any differ.
- LoadInst *SomeLoad = cast<LoadInst>(PN.user_back());
-
AAMDNodes AATags;
SomeLoad->getAAMetadata(AATags);
unsigned Align = SomeLoad->getAlignment();
@@ -1278,7 +1298,8 @@ static void speculatePHINodeLoads(PHINode &PN) {
IRBuilderTy PredBuilder(TI);
LoadInst *Load = PredBuilder.CreateLoad(
- InVal, (PN.getName() + ".sroa.speculate.load." + Pred->getName()));
+ LoadTy, InVal,
+ (PN.getName() + ".sroa.speculate.load." + Pred->getName()));
++NumLoadsSpeculated;
Load->setAlignment(Align);
if (AATags)
@@ -1317,9 +1338,11 @@ static bool isSafeSelectToSpeculate(SelectInst &SI) {
// Both operands to the select need to be dereferenceable, either
// absolutely (e.g. allocas) or at this point because we can see other
// accesses to it.
- if (!isSafeToLoadUnconditionally(TValue, LI->getAlignment(), DL, LI))
+ if (!isSafeToLoadUnconditionally(TValue, LI->getType(), LI->getAlignment(),
+ DL, LI))
return false;
- if (!isSafeToLoadUnconditionally(FValue, LI->getAlignment(), DL, LI))
+ if (!isSafeToLoadUnconditionally(FValue, LI->getType(), LI->getAlignment(),
+ DL, LI))
return false;
}
@@ -1338,10 +1361,10 @@ static void speculateSelectInstLoads(SelectInst &SI) {
assert(LI->isSimple() && "We only speculate simple loads");
IRB.SetInsertPoint(LI);
- LoadInst *TL =
- IRB.CreateLoad(TV, LI->getName() + ".sroa.speculate.load.true");
- LoadInst *FL =
- IRB.CreateLoad(FV, LI->getName() + ".sroa.speculate.load.false");
+ LoadInst *TL = IRB.CreateLoad(LI->getType(), TV,
+ LI->getName() + ".sroa.speculate.load.true");
+ LoadInst *FL = IRB.CreateLoad(LI->getType(), FV,
+ LI->getName() + ".sroa.speculate.load.false");
NumLoadsSpeculated += 2;
// Transfer alignment and AA info if present.
@@ -1379,8 +1402,8 @@ static Value *buildGEP(IRBuilderTy &IRB, Value *BasePtr,
if (Indices.size() == 1 && cast<ConstantInt>(Indices.back())->isZero())
return BasePtr;
- return IRB.CreateInBoundsGEP(nullptr, BasePtr, Indices,
- NamePrefix + "sroa_idx");
+ return IRB.CreateInBoundsGEP(BasePtr->getType()->getPointerElementType(),
+ BasePtr, Indices, NamePrefix + "sroa_idx");
}
/// Get a natural GEP off of the BasePtr walking through Ty toward
@@ -1569,7 +1592,14 @@ static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr,
Value *Int8Ptr = nullptr;
APInt Int8PtrOffset(Offset.getBitWidth(), 0);
- Type *TargetTy = PointerTy->getPointerElementType();
+ PointerType *TargetPtrTy = cast<PointerType>(PointerTy);
+ Type *TargetTy = TargetPtrTy->getElementType();
+
+ // As `addrspacecast` is , `Ptr` (the storage pointer) may have different
+ // address space from the expected `PointerTy` (the pointer to be used).
+ // Adjust the pointer type based the original storage pointer.
+ auto AS = cast<PointerType>(Ptr->getType())->getAddressSpace();
+ PointerTy = TargetTy->getPointerTo(AS);
do {
// First fold any existing GEPs into the offset.
@@ -1599,7 +1629,7 @@ static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr,
OffsetBasePtr = Ptr;
// If we also found a pointer of the right type, we're done.
if (P->getType() == PointerTy)
- return P;
+ break;
}
// Stash this pointer if we've found an i8*.
@@ -1638,8 +1668,11 @@ static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr,
Ptr = OffsetPtr;
// On the off chance we were targeting i8*, guard the bitcast here.
- if (Ptr->getType() != PointerTy)
- Ptr = IRB.CreateBitCast(Ptr, PointerTy, NamePrefix + "sroa_cast");
+ if (cast<PointerType>(Ptr->getType()) != TargetPtrTy) {
+ Ptr = IRB.CreatePointerBitCastOrAddrSpaceCast(Ptr,
+ TargetPtrTy,
+ NamePrefix + "sroa_cast");
+ }
return Ptr;
}
@@ -2418,14 +2451,16 @@ private:
unsigned EndIndex = getIndex(NewEndOffset);
assert(EndIndex > BeginIndex && "Empty vector!");
- Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), "load");
+ Value *V = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
+ NewAI.getAlignment(), "load");
return extractVector(IRB, V, BeginIndex, EndIndex, "vec");
}
Value *rewriteIntegerLoad(LoadInst &LI) {
assert(IntTy && "We cannot insert an integer to the alloca");
assert(!LI.isVolatile());
- Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), "load");
+ Value *V = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
+ NewAI.getAlignment(), "load");
V = convertValue(DL, IRB, V, IntTy);
assert(NewBeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
@@ -2469,7 +2504,8 @@ private:
(canConvertValue(DL, NewAllocaTy, TargetTy) ||
(IsLoadPastEnd && NewAllocaTy->isIntegerTy() &&
TargetTy->isIntegerTy()))) {
- LoadInst *NewLI = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+ LoadInst *NewLI = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
+ NewAI.getAlignment(),
LI.isVolatile(), LI.getName());
if (AATags)
NewLI->setAAMetadata(AATags);
@@ -2505,9 +2541,9 @@ private:
}
} else {
Type *LTy = TargetTy->getPointerTo(AS);
- LoadInst *NewLI = IRB.CreateAlignedLoad(getNewAllocaSlicePtr(IRB, LTy),
- getSliceAlign(TargetTy),
- LI.isVolatile(), LI.getName());
+ LoadInst *NewLI = IRB.CreateAlignedLoad(
+ TargetTy, getNewAllocaSlicePtr(IRB, LTy), getSliceAlign(TargetTy),
+ LI.isVolatile(), LI.getName());
if (AATags)
NewLI->setAAMetadata(AATags);
if (LI.isVolatile())
@@ -2524,8 +2560,7 @@ private:
"Only integer type loads and stores are split");
assert(SliceSize < DL.getTypeStoreSize(LI.getType()) &&
"Split load isn't smaller than original load");
- assert(LI.getType()->getIntegerBitWidth() ==
- DL.getTypeStoreSizeInBits(LI.getType()) &&
+ assert(DL.typeSizeEqualsStoreSize(LI.getType()) &&
"Non-byte-multiple bit width");
// Move the insertion point just past the load so that we can refer to it.
IRB.SetInsertPoint(&*std::next(BasicBlock::iterator(&LI)));
@@ -2533,8 +2568,8 @@ private:
// basis for the new value. This allows us to replace the uses of LI with
// the computed value, and then replace the placeholder with LI, leaving
// LI only used for this computation.
- Value *Placeholder =
- new LoadInst(UndefValue::get(LI.getType()->getPointerTo(AS)));
+ Value *Placeholder = new LoadInst(
+ LI.getType(), UndefValue::get(LI.getType()->getPointerTo(AS)));
V = insertInteger(DL, IRB, Placeholder, V, NewBeginOffset - BeginOffset,
"insert");
LI.replaceAllUsesWith(V);
@@ -2565,7 +2600,8 @@ private:
V = convertValue(DL, IRB, V, SliceTy);
// Mix in the existing elements.
- Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), "load");
+ Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
+ NewAI.getAlignment(), "load");
V = insertVector(IRB, Old, V, BeginIndex, "vec");
}
StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
@@ -2581,8 +2617,8 @@ private:
assert(IntTy && "We cannot extract an integer from the alloca");
assert(!SI.isVolatile());
if (DL.getTypeSizeInBits(V->getType()) != IntTy->getBitWidth()) {
- Value *Old =
- IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), "oldload");
+ Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
+ NewAI.getAlignment(), "oldload");
Old = convertValue(DL, IRB, Old, IntTy);
assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
@@ -2619,8 +2655,7 @@ private:
assert(!SI.isVolatile());
assert(V->getType()->isIntegerTy() &&
"Only integer type loads and stores are split");
- assert(V->getType()->getIntegerBitWidth() ==
- DL.getTypeStoreSizeInBits(V->getType()) &&
+ assert(DL.typeSizeEqualsStoreSize(V->getType()) &&
"Non-byte-multiple bit width");
IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), SliceSize * 8);
V = extractInteger(DL, IRB, V, NarrowTy, NewBeginOffset - BeginOffset,
@@ -2731,15 +2766,26 @@ private:
Type *AllocaTy = NewAI.getAllocatedType();
Type *ScalarTy = AllocaTy->getScalarType();
+
+ const bool CanContinue = [&]() {
+ if (VecTy || IntTy)
+ return true;
+ if (BeginOffset > NewAllocaBeginOffset ||
+ EndOffset < NewAllocaEndOffset)
+ return false;
+ auto *C = cast<ConstantInt>(II.getLength());
+ if (C->getBitWidth() > 64)
+ return false;
+ const auto Len = C->getZExtValue();
+ auto *Int8Ty = IntegerType::getInt8Ty(NewAI.getContext());
+ auto *SrcTy = VectorType::get(Int8Ty, Len);
+ return canConvertValue(DL, SrcTy, AllocaTy) &&
+ DL.isLegalInteger(DL.getTypeSizeInBits(ScalarTy));
+ }();
// If this doesn't map cleanly onto the alloca type, and that type isn't
// a single value type, just emit a memset.
- if (!VecTy && !IntTy &&
- (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset ||
- SliceSize != DL.getTypeStoreSize(AllocaTy) ||
- !AllocaTy->isSingleValueType() ||
- !DL.isLegalInteger(DL.getTypeSizeInBits(ScalarTy)) ||
- DL.getTypeSizeInBits(ScalarTy) % 8 != 0)) {
+ if (!CanContinue) {
Type *SizeTy = II.getLength()->getType();
Constant *Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset);
CallInst *New = IRB.CreateMemSet(
@@ -2774,8 +2820,8 @@ private:
if (NumElements > 1)
Splat = getVectorSplat(Splat, NumElements);
- Value *Old =
- IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), "oldload");
+ Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
+ NewAI.getAlignment(), "oldload");
V = insertVector(IRB, Old, Splat, BeginIndex, "vec");
} else if (IntTy) {
// If this is a memset on an alloca where we can widen stores, insert the
@@ -2787,8 +2833,8 @@ private:
if (IntTy && (BeginOffset != NewAllocaBeginOffset ||
EndOffset != NewAllocaBeginOffset)) {
- Value *Old =
- IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), "oldload");
+ Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
+ NewAI.getAlignment(), "oldload");
Old = convertValue(DL, IRB, Old, IntTy);
uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
V = insertInteger(DL, IRB, Old, V, Offset, "insert");
@@ -2948,18 +2994,18 @@ private:
// Reset the other pointer type to match the register type we're going to
// use, but using the address space of the original other pointer.
+ Type *OtherTy;
if (VecTy && !IsWholeAlloca) {
if (NumElements == 1)
- OtherPtrTy = VecTy->getElementType();
+ OtherTy = VecTy->getElementType();
else
- OtherPtrTy = VectorType::get(VecTy->getElementType(), NumElements);
-
- OtherPtrTy = OtherPtrTy->getPointerTo(OtherAS);
+ OtherTy = VectorType::get(VecTy->getElementType(), NumElements);
} else if (IntTy && !IsWholeAlloca) {
- OtherPtrTy = SubIntTy->getPointerTo(OtherAS);
+ OtherTy = SubIntTy;
} else {
- OtherPtrTy = NewAllocaTy->getPointerTo(OtherAS);
+ OtherTy = NewAllocaTy;
}
+ OtherPtrTy = OtherTy->getPointerTo(OtherAS);
Value *SrcPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy,
OtherPtr->getName() + ".");
@@ -2973,28 +3019,30 @@ private:
Value *Src;
if (VecTy && !IsWholeAlloca && !IsDest) {
- Src = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), "load");
+ Src = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
+ NewAI.getAlignment(), "load");
Src = extractVector(IRB, Src, BeginIndex, EndIndex, "vec");
} else if (IntTy && !IsWholeAlloca && !IsDest) {
- Src = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), "load");
+ Src = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
+ NewAI.getAlignment(), "load");
Src = convertValue(DL, IRB, Src, IntTy);
uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
Src = extractInteger(DL, IRB, Src, SubIntTy, Offset, "extract");
} else {
- LoadInst *Load = IRB.CreateAlignedLoad(SrcPtr, SrcAlign, II.isVolatile(),
- "copyload");
+ LoadInst *Load = IRB.CreateAlignedLoad(OtherTy, SrcPtr, SrcAlign,
+ II.isVolatile(), "copyload");
if (AATags)
Load->setAAMetadata(AATags);
Src = Load;
}
if (VecTy && !IsWholeAlloca && IsDest) {
- Value *Old =
- IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), "oldload");
+ Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
+ NewAI.getAlignment(), "oldload");
Src = insertVector(IRB, Old, Src, BeginIndex, "vec");
} else if (IntTy && !IsWholeAlloca && IsDest) {
- Value *Old =
- IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), "oldload");
+ Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
+ NewAI.getAlignment(), "oldload");
Old = convertValue(DL, IRB, Old, IntTy);
uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
Src = insertInteger(DL, IRB, Old, Src, Offset, "insert");
@@ -3031,7 +3079,10 @@ private:
ConstantInt *Size =
ConstantInt::get(cast<IntegerType>(II.getArgOperand(0)->getType()),
NewEndOffset - NewBeginOffset);
- Value *Ptr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
+ // Lifetime intrinsics always expect an i8* so directly get such a pointer
+ // for the new alloca slice.
+ Type *PointerTy = IRB.getInt8PtrTy(OldPtr->getType()->getPointerAddressSpace());
+ Value *Ptr = getNewAllocaSlicePtr(IRB, PointerTy);
Value *New;
if (II.getIntrinsicID() == Intrinsic::lifetime_start)
New = IRB.CreateLifetimeStart(Ptr, Size);
@@ -3072,8 +3123,9 @@ private:
continue;
}
- assert(isa<BitCastInst>(I) || isa<PHINode>(I) ||
- isa<SelectInst>(I) || isa<GetElementPtrInst>(I));
+ assert(isa<BitCastInst>(I) || isa<AddrSpaceCastInst>(I) ||
+ isa<PHINode>(I) || isa<SelectInst>(I) ||
+ isa<GetElementPtrInst>(I));
for (User *U : I->users())
if (Visited.insert(cast<Instruction>(U)).second)
Uses.push_back(cast<Instruction>(U));
@@ -3297,8 +3349,8 @@ private:
assert(Ty->isSingleValueType());
// Load the single value and insert it using the indices.
Value *GEP =
- IRB.CreateInBoundsGEP(nullptr, Ptr, GEPIndices, Name + ".gep");
- LoadInst *Load = IRB.CreateAlignedLoad(GEP, Align, Name + ".load");
+ IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name + ".gep");
+ LoadInst *Load = IRB.CreateAlignedLoad(Ty, GEP, Align, Name + ".load");
if (AATags)
Load->setAAMetadata(AATags);
Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert");
@@ -3342,7 +3394,7 @@ private:
Value *ExtractValue =
IRB.CreateExtractValue(Agg, Indices, Name + ".extract");
Value *InBoundsGEP =
- IRB.CreateInBoundsGEP(nullptr, Ptr, GEPIndices, Name + ".gep");
+ IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name + ".gep");
StoreInst *Store =
IRB.CreateAlignedStore(ExtractValue, InBoundsGEP, Align);
if (AATags)
@@ -3374,6 +3426,11 @@ private:
return false;
}
+ bool visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
+ enqueueUsers(ASC);
+ return false;
+ }
+
bool visitGetElementPtrInst(GetElementPtrInst &GEPI) {
enqueueUsers(GEPI);
return false;
@@ -3792,6 +3849,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
auto AS = LI->getPointerAddressSpace();
auto *PartPtrTy = PartTy->getPointerTo(AS);
LoadInst *PLoad = IRB.CreateAlignedLoad(
+ PartTy,
getAdjustedPtr(IRB, DL, BasePtr,
APInt(DL.getIndexSizeInBits(AS), PartOffset),
PartPtrTy, BasePtr->getName() + "."),
@@ -3933,6 +3991,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
IRB.SetInsertPoint(LI);
auto AS = LI->getPointerAddressSpace();
PLoad = IRB.CreateAlignedLoad(
+ PartTy,
getAdjustedPtr(IRB, DL, LoadBasePtr,
APInt(DL.getIndexSizeInBits(AS), PartOffset),
LoadPartPtrTy, LoadBasePtr->getName() + "."),
diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp
index 976daf4c78c2..869cf00e0a89 100644
--- a/lib/Transforms/Scalar/Scalar.cpp
+++ b/lib/Transforms/Scalar/Scalar.cpp
@@ -1,9 +1,8 @@
//===-- Scalar.cpp --------------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -63,6 +62,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeJumpThreadingPass(Registry);
initializeLegacyLICMPassPass(Registry);
initializeLegacyLoopSinkPassPass(Registry);
+ initializeLoopFuseLegacyPass(Registry);
initializeLoopDataPrefetchLegacyPassPass(Registry);
initializeLoopDeletionLegacyPassPass(Registry);
initializeLoopAccessLegacyAnalysisPass(Registry);
@@ -81,8 +81,9 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeLowerAtomicLegacyPassPass(Registry);
initializeLowerExpectIntrinsicPass(Registry);
initializeLowerGuardIntrinsicLegacyPassPass(Registry);
+ initializeLowerWidenableConditionLegacyPassPass(Registry);
initializeMemCpyOptLegacyPassPass(Registry);
- initializeMergeICmpsPass(Registry);
+ initializeMergeICmpsLegacyPassPass(Registry);
initializeMergedLoadStoreMotionLegacyPassPass(Registry);
initializeNaryReassociateLegacyPassPass(Registry);
initializePartiallyInlineLibCallsLegacyPassPass(Registry);
diff --git a/lib/Transforms/Scalar/Scalarizer.cpp b/lib/Transforms/Scalar/Scalarizer.cpp
index 5eb3fdab6d5c..2ee1a3a95f2a 100644
--- a/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/lib/Transforms/Scalar/Scalarizer.cpp
@@ -1,9 +1,8 @@
//===- Scalarizer.cpp - Scalarize vector operations -----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -125,6 +124,18 @@ struct ICmpSplitter {
ICmpInst &ICI;
};
+// UnarySpliiter(UO)(Builder, X, Name) uses Builder to create
+// a unary operator like UO called Name with operand X.
+struct UnarySplitter {
+ UnarySplitter(UnaryOperator &uo) : UO(uo) {}
+
+ Value *operator()(IRBuilder<> &Builder, Value *Op, const Twine &Name) const {
+ return Builder.CreateUnOp(UO.getOpcode(), Op, Name);
+ }
+
+ UnaryOperator &UO;
+};
+
// BinarySpliiter(BO)(Builder, X, Y, Name) uses Builder to create
// a binary operator like BO called Name with operands X and Y.
struct BinarySplitter {
@@ -174,6 +185,7 @@ public:
bool visitSelectInst(SelectInst &SI);
bool visitICmpInst(ICmpInst &ICI);
bool visitFCmpInst(FCmpInst &FCI);
+ bool visitUnaryOperator(UnaryOperator &UO);
bool visitBinaryOperator(BinaryOperator &BO);
bool visitGetElementPtrInst(GetElementPtrInst &GEPI);
bool visitCastInst(CastInst &CI);
@@ -188,11 +200,12 @@ private:
Scatterer scatter(Instruction *Point, Value *V);
void gather(Instruction *Op, const ValueVector &CV);
bool canTransferMetadata(unsigned Kind);
- void transferMetadata(Instruction *Op, const ValueVector &CV);
+ void transferMetadataAndIRFlags(Instruction *Op, const ValueVector &CV);
bool getVectorLayout(Type *Ty, unsigned Alignment, VectorLayout &Layout,
const DataLayout &DL);
bool finish();
+ template<typename T> bool splitUnary(Instruction &, const T &);
template<typename T> bool splitBinary(Instruction &, const T &);
bool splitCall(CallInst &CI);
@@ -246,14 +259,13 @@ Value *Scatterer::operator[](unsigned I) {
return CV[I];
IRBuilder<> Builder(BB, BBI);
if (PtrTy) {
+ Type *ElTy = PtrTy->getElementType()->getVectorElementType();
if (!CV[0]) {
- Type *Ty =
- PointerType::get(PtrTy->getElementType()->getVectorElementType(),
- PtrTy->getAddressSpace());
- CV[0] = Builder.CreateBitCast(V, Ty, V->getName() + ".i0");
+ Type *NewPtrTy = PointerType::get(ElTy, PtrTy->getAddressSpace());
+ CV[0] = Builder.CreateBitCast(V, NewPtrTy, V->getName() + ".i0");
}
if (I != 0)
- CV[I] = Builder.CreateConstGEP1_32(nullptr, CV[0], I,
+ CV[I] = Builder.CreateConstGEP1_32(ElTy, CV[0], I,
V->getName() + ".i" + Twine(I));
} else {
// Search through a chain of InsertElementInsts looking for element I.
@@ -349,7 +361,7 @@ void ScalarizerVisitor::gather(Instruction *Op, const ValueVector &CV) {
for (unsigned I = 0, E = Op->getNumOperands(); I != E; ++I)
Op->setOperand(I, UndefValue::get(Op->getOperand(I)->getType()));
- transferMetadata(Op, CV);
+ transferMetadataAndIRFlags(Op, CV);
// If we already have a scattered form of Op (created from ExtractElements
// of Op itself), replace them with the new form.
@@ -385,7 +397,8 @@ bool ScalarizerVisitor::canTransferMetadata(unsigned Tag) {
// Transfer metadata from Op to the instructions in CV if it is known
// to be safe to do so.
-void ScalarizerVisitor::transferMetadata(Instruction *Op, const ValueVector &CV) {
+void ScalarizerVisitor::transferMetadataAndIRFlags(Instruction *Op,
+ const ValueVector &CV) {
SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
Op->getAllMetadataOtherThanDebugLoc(MDs);
for (unsigned I = 0, E = CV.size(); I != E; ++I) {
@@ -393,6 +406,7 @@ void ScalarizerVisitor::transferMetadata(Instruction *Op, const ValueVector &CV)
for (const auto &MD : MDs)
if (canTransferMetadata(MD.first))
New->setMetadata(MD.first, MD.second);
+ New->copyIRFlags(Op);
if (Op->getDebugLoc() && !New->getDebugLoc())
New->setDebugLoc(Op->getDebugLoc());
}
@@ -410,8 +424,7 @@ bool ScalarizerVisitor::getVectorLayout(Type *Ty, unsigned Alignment,
// Check that we're dealing with full-byte elements.
Layout.ElemTy = Layout.VecTy->getElementType();
- if (DL.getTypeSizeInBits(Layout.ElemTy) !=
- DL.getTypeStoreSizeInBits(Layout.ElemTy))
+ if (!DL.typeSizeEqualsStoreSize(Layout.ElemTy))
return false;
if (Alignment)
@@ -422,6 +435,26 @@ bool ScalarizerVisitor::getVectorLayout(Type *Ty, unsigned Alignment,
return true;
}
+// Scalarize one-operand instruction I, using Split(Builder, X, Name)
+// to create an instruction like I with operand X and name Name.
+template<typename Splitter>
+bool ScalarizerVisitor::splitUnary(Instruction &I, const Splitter &Split) {
+ VectorType *VT = dyn_cast<VectorType>(I.getType());
+ if (!VT)
+ return false;
+
+ unsigned NumElems = VT->getNumElements();
+ IRBuilder<> Builder(&I);
+ Scatterer Op = scatter(&I, I.getOperand(0));
+ assert(Op.size() == NumElems && "Mismatched unary operation");
+ ValueVector Res;
+ Res.resize(NumElems);
+ for (unsigned Elem = 0; Elem < NumElems; ++Elem)
+ Res[Elem] = Split(Builder, Op[Elem], I.getName() + ".i" + Twine(Elem));
+ gather(&I, Res);
+ return true;
+}
+
// Scalarize two-operand instruction I, using Split(Builder, X, Y, Name)
// to create an instruction like I with operands X and Y and name Name.
template<typename Splitter>
@@ -554,6 +587,10 @@ bool ScalarizerVisitor::visitFCmpInst(FCmpInst &FCI) {
return splitBinary(FCI, FCmpSplitter(FCI));
}
+bool ScalarizerVisitor::visitUnaryOperator(UnaryOperator &UO) {
+ return splitUnary(UO, UnarySplitter(UO));
+}
+
bool ScalarizerVisitor::visitBinaryOperator(BinaryOperator &BO) {
return splitBinary(BO, BinarySplitter(BO));
}
@@ -744,7 +781,8 @@ bool ScalarizerVisitor::visitLoadInst(LoadInst &LI) {
Res.resize(NumElems);
for (unsigned I = 0; I < NumElems; ++I)
- Res[I] = Builder.CreateAlignedLoad(Ptr[I], Layout.getElemAlign(I),
+ Res[I] = Builder.CreateAlignedLoad(Layout.VecTy->getElementType(), Ptr[I],
+ Layout.getElemAlign(I),
LI.getName() + ".i" + Twine(I));
gather(&LI, Res);
return true;
@@ -773,7 +811,7 @@ bool ScalarizerVisitor::visitStoreInst(StoreInst &SI) {
unsigned Align = Layout.getElemAlign(I);
Stores[I] = Builder.CreateAlignedStore(Val[I], Ptr[I], Align);
}
- transferMetadata(&SI, Stores);
+ transferMetadataAndIRFlags(&SI, Stores);
return true;
}
diff --git a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
index 967f4a42a8fb..f6a12fb13142 100644
--- a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -1,9 +1,8 @@
//===- SeparateConstOffsetFromGEP.cpp -------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index 5a67178cef37..aeac6f548b32 100644
--- a/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -1,9 +1,8 @@
///===- SimpleLoopUnswitch.cpp - Hoist loop-invariant control flow ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -181,14 +180,9 @@ static void buildPartialUnswitchConditionalBranch(BasicBlock &BB,
BasicBlock &UnswitchedSucc,
BasicBlock &NormalSucc) {
IRBuilder<> IRB(&BB);
- Value *Cond = Invariants.front();
- for (Value *Invariant :
- make_range(std::next(Invariants.begin()), Invariants.end()))
- if (Direction)
- Cond = IRB.CreateOr(Cond, Invariant);
- else
- Cond = IRB.CreateAnd(Cond, Invariant);
-
+
+ Value *Cond = Direction ? IRB.CreateOr(Invariants) :
+ IRB.CreateAnd(Invariants);
IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
Direction ? &NormalSucc : &UnswitchedSucc);
}
@@ -268,7 +262,8 @@ static void rewritePHINodesForExitAndUnswitchedBlocks(BasicBlock &ExitBB,
/// loops reachable and need to move the current loop up the loop nest or even
/// to an entirely separate nest.
static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader,
- DominatorTree &DT, LoopInfo &LI) {
+ DominatorTree &DT, LoopInfo &LI,
+ MemorySSAUpdater *MSSAU) {
// If the loop is already at the top level, we can't hoist it anywhere.
Loop *OldParentL = L.getParentLoop();
if (!OldParentL)
@@ -329,7 +324,8 @@ static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader,
// unswitching it is possible to get new non-dedicated exits out of parent
// loop so let's conservatively form dedicated exit blocks and figure out
// if we can optimize later.
- formDedicatedExitBlocks(OldContainingL, &DT, &LI, /*PreserveLCSSA*/ true);
+ formDedicatedExitBlocks(OldContainingL, &DT, &LI, MSSAU,
+ /*PreserveLCSSA*/ true);
}
}
@@ -536,7 +532,10 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
// If this was full unswitching, we may have changed the nesting relationship
// for this loop so hoist it to its correct parent if needed.
if (FullUnswitch)
- hoistLoopToNewParent(L, *NewPH, DT, LI);
+ hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU);
+
+ if (MSSAU && VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
LLVM_DEBUG(dbgs() << " done: unswitching trivial branch...\n");
++NumTrivial;
@@ -590,11 +589,13 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
ExitCaseIndices.push_back(Case.getCaseIndex());
}
BasicBlock *DefaultExitBB = nullptr;
+ SwitchInstProfUpdateWrapper::CaseWeightOpt DefaultCaseWeight =
+ SwitchInstProfUpdateWrapper::getSuccessorWeight(SI, 0);
if (!L.contains(SI.getDefaultDest()) &&
areLoopExitPHIsLoopInvariant(L, *ParentBB, *SI.getDefaultDest()) &&
- !isa<UnreachableInst>(SI.getDefaultDest()->getTerminator()))
+ !isa<UnreachableInst>(SI.getDefaultDest()->getTerminator())) {
DefaultExitBB = SI.getDefaultDest();
- else if (ExitCaseIndices.empty())
+ } else if (ExitCaseIndices.empty())
return false;
LLVM_DEBUG(dbgs() << " unswitching trivial switch...\n");
@@ -618,8 +619,11 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
// Store the exit cases into a separate data structure and remove them from
// the switch.
- SmallVector<std::pair<ConstantInt *, BasicBlock *>, 4> ExitCases;
+ SmallVector<std::tuple<ConstantInt *, BasicBlock *,
+ SwitchInstProfUpdateWrapper::CaseWeightOpt>,
+ 4> ExitCases;
ExitCases.reserve(ExitCaseIndices.size());
+ SwitchInstProfUpdateWrapper SIW(SI);
// We walk the case indices backwards so that we remove the last case first
// and don't disrupt the earlier indices.
for (unsigned Index : reverse(ExitCaseIndices)) {
@@ -629,9 +633,10 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
if (!ExitL || ExitL->contains(OuterL))
OuterL = ExitL;
// Save the value of this case.
- ExitCases.push_back({CaseI->getCaseValue(), CaseI->getCaseSuccessor()});
+ auto W = SIW.getSuccessorWeight(CaseI->getSuccessorIndex());
+ ExitCases.emplace_back(CaseI->getCaseValue(), CaseI->getCaseSuccessor(), W);
// Delete the unswitched cases.
- SI.removeCase(CaseI);
+ SIW.removeCase(CaseI);
}
if (SE) {
@@ -669,6 +674,7 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
// Now add the unswitched switch.
auto *NewSI = SwitchInst::Create(LoopCond, NewPH, ExitCases.size(), OldPH);
+ SwitchInstProfUpdateWrapper NewSIW(*NewSI);
// Rewrite the IR for the unswitched basic blocks. This requires two steps.
// First, we split any exit blocks with remaining in-loop predecessors. Then
@@ -696,9 +702,9 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
}
// Note that we must use a reference in the for loop so that we update the
// container.
- for (auto &CasePair : reverse(ExitCases)) {
+ for (auto &ExitCase : reverse(ExitCases)) {
// Grab a reference to the exit block in the pair so that we can update it.
- BasicBlock *ExitBB = CasePair.second;
+ BasicBlock *ExitBB = std::get<1>(ExitCase);
// If this case is the last edge into the exit block, we can simply reuse it
// as it will no longer be a loop exit. No mapping necessary.
@@ -720,27 +726,39 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
/*FullUnswitch*/ true);
}
// Update the case pair to point to the split block.
- CasePair.second = SplitExitBB;
+ std::get<1>(ExitCase) = SplitExitBB;
}
// Now add the unswitched cases. We do this in reverse order as we built them
// in reverse order.
- for (auto CasePair : reverse(ExitCases)) {
- ConstantInt *CaseVal = CasePair.first;
- BasicBlock *UnswitchedBB = CasePair.second;
+ for (auto &ExitCase : reverse(ExitCases)) {
+ ConstantInt *CaseVal = std::get<0>(ExitCase);
+ BasicBlock *UnswitchedBB = std::get<1>(ExitCase);
- NewSI->addCase(CaseVal, UnswitchedBB);
+ NewSIW.addCase(CaseVal, UnswitchedBB, std::get<2>(ExitCase));
}
// If the default was unswitched, re-point it and add explicit cases for
// entering the loop.
if (DefaultExitBB) {
- NewSI->setDefaultDest(DefaultExitBB);
+ NewSIW->setDefaultDest(DefaultExitBB);
+ NewSIW.setSuccessorWeight(0, DefaultCaseWeight);
// We removed all the exit cases, so we just copy the cases to the
// unswitched switch.
- for (auto Case : SI.cases())
- NewSI->addCase(Case.getCaseValue(), NewPH);
+ for (const auto &Case : SI.cases())
+ NewSIW.addCase(Case.getCaseValue(), NewPH,
+ SIW.getSuccessorWeight(Case.getSuccessorIndex()));
+ } else if (DefaultCaseWeight) {
+ // We have to set branch weight of the default case.
+ uint64_t SW = *DefaultCaseWeight;
+ for (const auto &Case : SI.cases()) {
+ auto W = SIW.getSuccessorWeight(Case.getSuccessorIndex());
+ assert(W &&
+ "case weight must be defined as default case weight is defined");
+ SW += *W;
+ }
+ NewSIW.setSuccessorWeight(0, SW);
}
// If we ended up with a common successor for every path through the switch
@@ -762,10 +780,10 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
continue;
}
CommonSuccBB->removePredecessor(BB,
- /*DontDeleteUselessPHIs*/ true);
+ /*KeepOneInputPHIs*/ true);
}
// Now nuke the switch and replace it with a direct branch.
- SI.eraseFromParent();
+ SIW.eraseFromParent();
BranchInst::Create(CommonSuccBB, BB);
} else if (DefaultExitBB) {
assert(SI.getNumCases() > 0 &&
@@ -775,8 +793,11 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
// being simple and keeping the number of edges from this switch to
// successors the same, and avoiding any PHI update complexity.
auto LastCaseI = std::prev(SI.case_end());
+
SI.setDefaultDest(LastCaseI->getCaseSuccessor());
- SI.removeCase(LastCaseI);
+ SIW.setSuccessorWeight(
+ 0, SIW.getSuccessorWeight(LastCaseI->getSuccessorIndex()));
+ SIW.removeCase(LastCaseI);
}
// Walk the unswitched exit blocks and the unswitched split blocks and update
@@ -789,9 +810,8 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
DTUpdates.push_back({DT.Insert, OldPH, UnswitchedExitBB});
}
for (auto SplitUnswitchedPair : SplitExitBBMap) {
- auto *UnswitchedBB = SplitUnswitchedPair.second;
- DTUpdates.push_back({DT.Delete, ParentBB, UnswitchedBB});
- DTUpdates.push_back({DT.Insert, OldPH, UnswitchedBB});
+ DTUpdates.push_back({DT.Delete, ParentBB, SplitUnswitchedPair.first});
+ DTUpdates.push_back({DT.Insert, OldPH, SplitUnswitchedPair.second});
}
DT.applyUpdates(DTUpdates);
@@ -805,7 +825,10 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
// We may have changed the nesting relationship for this loop so hoist it to
// its correct parent if needed.
- hoistLoopToNewParent(L, *NewPH, DT, LI);
+ hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU);
+
+ if (MSSAU && VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
++NumTrivial;
++NumSwitches;
@@ -848,6 +871,10 @@ static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT,
// Check if there are any side-effecting instructions (e.g. stores, calls,
// volatile loads) in the part of the loop that the code *would* execute
// without unswitching.
+ if (MSSAU) // Possible early exit with MSSA
+ if (auto *Defs = MSSAU->getMemorySSA()->getBlockDefs(CurrentBB))
+ if (!isa<MemoryPhi>(*Defs->begin()) || (++Defs->begin() != Defs->end()))
+ return Changed;
if (llvm::any_of(*CurrentBB,
[](Instruction &I) { return I.mayHaveSideEffects(); }))
return Changed;
@@ -1066,7 +1093,7 @@ static BasicBlock *buildClonedLoopBlocks(
continue;
ClonedSuccBB->removePredecessor(ClonedParentBB,
- /*DontDeleteUselessPHIs*/ true);
+ /*KeepOneInputPHIs*/ true);
}
// Replace the cloned branch with an unconditional branch to the cloned
@@ -1436,8 +1463,8 @@ deleteDeadClonedBlocks(Loop &L, ArrayRef<BasicBlock *> ExitBlocks,
// Remove all MemorySSA in the dead blocks
if (MSSAU) {
- SmallPtrSet<BasicBlock *, 16> DeadBlockSet(DeadBlocks.begin(),
- DeadBlocks.end());
+ SmallSetVector<BasicBlock *, 8> DeadBlockSet(DeadBlocks.begin(),
+ DeadBlocks.end());
MSSAU->removeBlocks(DeadBlockSet);
}
@@ -1455,7 +1482,7 @@ static void deleteDeadBlocksFromLoop(Loop &L,
MemorySSAUpdater *MSSAU) {
// Find all the dead blocks tied to this loop, and remove them from their
// successors.
- SmallPtrSet<BasicBlock *, 16> DeadBlockSet;
+ SmallSetVector<BasicBlock *, 8> DeadBlockSet;
// Start with loop/exit blocks and get a transitive closure of reachable dead
// blocks.
@@ -1712,10 +1739,9 @@ static bool rebuildLoopAfterUnswitch(Loop &L, ArrayRef<BasicBlock *> ExitBlocks,
// Sort the exits in ascending loop depth, we'll work backwards across these
// to process them inside out.
- std::stable_sort(ExitsInLoops.begin(), ExitsInLoops.end(),
- [&](BasicBlock *LHS, BasicBlock *RHS) {
- return LI.getLoopDepth(LHS) < LI.getLoopDepth(RHS);
- });
+ llvm::stable_sort(ExitsInLoops, [&](BasicBlock *LHS, BasicBlock *RHS) {
+ return LI.getLoopDepth(LHS) < LI.getLoopDepth(RHS);
+ });
// We'll build up a set for each exit loop.
SmallPtrSet<BasicBlock *, 16> NewExitLoopBlocks;
@@ -2075,7 +2101,7 @@ static void unswitchNontrivialInvariants(
"Only one possible unswitched block for a branch!");
BasicBlock *UnswitchedSuccBB = *UnswitchedSuccBBs.begin();
UnswitchedSuccBB->removePredecessor(ParentBB,
- /*DontDeleteUselessPHIs*/ true);
+ /*KeepOneInputPHIs*/ true);
DTUpdates.push_back({DominatorTree::Delete, ParentBB, UnswitchedSuccBB});
} else {
// Note that we actually want to remove the parent block as a predecessor
@@ -2090,7 +2116,7 @@ static void unswitchNontrivialInvariants(
for (auto &Case : NewSI->cases())
Case.getCaseSuccessor()->removePredecessor(
ParentBB,
- /*DontDeleteUselessPHIs*/ true);
+ /*KeepOneInputPHIs*/ true);
// We need to use the set to populate domtree updates as even when there
// are multiple cases pointing at the same successor we only want to
@@ -2236,7 +2262,7 @@ static void unswitchNontrivialInvariants(
// introduced new, non-dedicated exits. At least try to re-form dedicated
// exits for these loops. This may fail if they couldn't have dedicated
// exits to start with.
- formDedicatedExitBlocks(&UpdateL, &DT, &LI, /*PreserveLCSSA*/ true);
+ formDedicatedExitBlocks(&UpdateL, &DT, &LI, MSSAU, /*PreserveLCSSA*/ true);
};
// For non-child cloned loops and hoisted loops, we just need to update LCSSA
@@ -2526,7 +2552,7 @@ unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI,
// We can only consider fully loop-invariant switch conditions as we need
// to completely eliminate the switch after unswitching.
if (!isa<Constant>(SI->getCondition()) &&
- L.isLoopInvariant(SI->getCondition()))
+ L.isLoopInvariant(SI->getCondition()) && !BB->getUniqueSuccessor())
UnswitchCandidates.push_back({SI, {SI->getCondition()}});
continue;
}
@@ -2852,7 +2878,11 @@ PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM,
// Historically this pass has had issues with the dominator tree so verify it
// in asserts builds.
assert(AR.DT.verify(DominatorTree::VerificationLevel::Fast));
- return getLoopPassPreservedAnalyses();
+
+ auto PA = getLoopPassPreservedAnalyses();
+ if (EnableMSSALoopDependency)
+ PA.preserve<MemorySSAAnalysis>();
+ return PA;
}
namespace {
diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index b7b1db76b492..4544975a4887 100644
--- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -1,9 +1,8 @@
//===- SimplifyCFGPass.cpp - CFG Simplification Pass ----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/Scalar/Sink.cpp b/lib/Transforms/Scalar/Sink.cpp
index c99da8f0737a..90f3a2aa46e1 100644
--- a/lib/Transforms/Scalar/Sink.cpp
+++ b/lib/Transforms/Scalar/Sink.cpp
@@ -1,9 +1,8 @@
//===-- Sink.cpp - Code Sinking -------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp b/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp
index c0f75ddddbe0..c13fb3e04516 100644
--- a/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp
+++ b/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp
@@ -1,9 +1,8 @@
//===- SpeculateAroundPHIs.cpp --------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -68,6 +67,14 @@ isSafeToSpeculatePHIUsers(PHINode &PN, DominatorTree &DT,
return false;
}
+ if (auto CS = ImmutableCallSite(UI)) {
+ if (CS.isConvergent() || CS.cannotDuplicate()) {
+ LLVM_DEBUG(dbgs() << " Unsafe: convergent "
+ "callsite cannot de duplicated: " << *UI << '\n');
+ return false;
+ }
+ }
+
// FIXME: This check is much too conservative. We're not going to move these
// instructions onto new dynamic paths through the program unless there is
// a call instruction between the use and the PHI node. And memory isn't
diff --git a/lib/Transforms/Scalar/SpeculativeExecution.cpp b/lib/Transforms/Scalar/SpeculativeExecution.cpp
index f5e1dd6ed850..f9d027eb4a3b 100644
--- a/lib/Transforms/Scalar/SpeculativeExecution.cpp
+++ b/lib/Transforms/Scalar/SpeculativeExecution.cpp
@@ -1,9 +1,8 @@
//===- SpeculativeExecution.cpp ---------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -241,6 +240,7 @@ static unsigned ComputeSpeculationCost(const Instruction *I,
case Instruction::FMul:
case Instruction::FDiv:
case Instruction::FRem:
+ case Instruction::FNeg:
case Instruction::ICmp:
case Instruction::FCmp:
return TTI.getUserCost(I);
diff --git a/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp b/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
index b5089b006bdd..a58c32cc5894 100644
--- a/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
@@ -1,9 +1,8 @@
//===- StraightLineStrengthReduce.cpp - -----------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -683,9 +682,13 @@ void StraightLineStrengthReduce::rewriteCandidateWithBasis(
// Canonicalize bump to pointer size.
Bump = Builder.CreateSExtOrTrunc(Bump, IntPtrTy);
if (InBounds)
- Reduced = Builder.CreateInBoundsGEP(nullptr, Basis.Ins, Bump);
+ Reduced = Builder.CreateInBoundsGEP(
+ cast<GetElementPtrInst>(Basis.Ins)->getResultElementType(),
+ Basis.Ins, Bump);
else
- Reduced = Builder.CreateGEP(nullptr, Basis.Ins, Bump);
+ Reduced = Builder.CreateGEP(
+ cast<GetElementPtrInst>(Basis.Ins)->getResultElementType(),
+ Basis.Ins, Bump);
}
break;
}
diff --git a/lib/Transforms/Scalar/StructurizeCFG.cpp b/lib/Transforms/Scalar/StructurizeCFG.cpp
index 0db762d846f2..e5400676c7e8 100644
--- a/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -1,9 +1,8 @@
//===- StructurizeCFG.cpp -------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -63,6 +62,11 @@ static cl::opt<bool> ForceSkipUniformRegions(
cl::desc("Force whether the StructurizeCFG pass skips uniform regions"),
cl::init(false));
+static cl::opt<bool>
+ RelaxedUniformRegions("structurizecfg-relaxed-uniform-regions", cl::Hidden,
+ cl::desc("Allow relaxed uniform region checks"),
+ cl::init(false));
+
// Definition of the complex types used in this pass.
using BBValuePair = std::pair<BasicBlock *, Value *>;
@@ -624,11 +628,8 @@ void StructurizeCFG::setPhiValues() {
if (!Dominator.resultIsRememberedBlock())
Updater.AddAvailableValue(Dominator.result(), Undef);
- for (BasicBlock *FI : From) {
- int Idx = Phi->getBasicBlockIndex(FI);
- assert(Idx != -1);
- Phi->setIncomingValue(Idx, Updater.GetValueAtEndOfBlock(FI));
- }
+ for (BasicBlock *FI : From)
+ Phi->setIncomingValueForBlock(FI, Updater.GetValueAtEndOfBlock(FI));
}
DeletedPhis.erase(To);
@@ -937,6 +938,11 @@ void StructurizeCFG::rebuildSSA() {
static bool hasOnlyUniformBranches(Region *R, unsigned UniformMDKindID,
const LegacyDivergenceAnalysis &DA) {
+ // Bool for if all sub-regions are uniform.
+ bool SubRegionsAreUniform = true;
+ // Count of how many direct children are conditional.
+ unsigned ConditionalDirectChildren = 0;
+
for (auto E : R->elements()) {
if (!E->isSubRegion()) {
auto Br = dyn_cast<BranchInst>(E->getEntry()->getTerminator());
@@ -945,6 +951,10 @@ static bool hasOnlyUniformBranches(Region *R, unsigned UniformMDKindID,
if (!DA.isUniform(Br))
return false;
+
+ // One of our direct children is conditional.
+ ConditionalDirectChildren++;
+
LLVM_DEBUG(dbgs() << "BB: " << Br->getParent()->getName()
<< " has uniform terminator\n");
} else {
@@ -962,12 +972,25 @@ static bool hasOnlyUniformBranches(Region *R, unsigned UniformMDKindID,
if (!Br || !Br->isConditional())
continue;
- if (!Br->getMetadata(UniformMDKindID))
- return false;
+ if (!Br->getMetadata(UniformMDKindID)) {
+ // Early exit if we cannot have relaxed uniform regions.
+ if (!RelaxedUniformRegions)
+ return false;
+
+ SubRegionsAreUniform = false;
+ break;
+ }
}
}
}
- return true;
+
+ // Our region is uniform if:
+ // 1. All conditional branches that are direct children are uniform (checked
+ // above).
+ // 2. And either:
+ // a. All sub-regions are uniform.
+ // b. There is one or less conditional branches among the direct children.
+ return SubRegionsAreUniform || (ConditionalDirectChildren <= 1);
}
/// Run the transformation for each region found
diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp
index 0f6db21f73b6..f0b79079d817 100644
--- a/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -1,9 +1,8 @@
//===- TailRecursionElimination.cpp - Eliminate Tail Calls ----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -56,6 +55,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/InstructionSimplify.h"
@@ -69,7 +69,6 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
-#include "llvm/IR/DomTreeUpdater.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstIterator.h"
@@ -341,7 +340,7 @@ static bool canMoveAboveCall(Instruction *I, CallInst *CI, AliasAnalysis *AA) {
// being loaded from.
const DataLayout &DL = L->getModule()->getDataLayout();
if (isModSet(AA->getModRefInfo(CI, MemoryLocation::get(L))) ||
- !isSafeToLoadUnconditionally(L->getPointerOperand(),
+ !isSafeToLoadUnconditionally(L->getPointerOperand(), L->getType(),
L->getAlignment(), DL, L))
return false;
}
@@ -679,7 +678,7 @@ static bool eliminateRecursiveTailCall(
BB->getInstList().erase(Ret); // Remove return.
BB->getInstList().erase(CI); // Remove call.
- DTU.insertEdge(BB, OldEntry);
+ DTU.applyUpdates({{DominatorTree::Insert, BB, OldEntry}});
++NumEliminated;
return true;
}
diff --git a/lib/Transforms/Scalar/WarnMissedTransforms.cpp b/lib/Transforms/Scalar/WarnMissedTransforms.cpp
index 80f761e53774..707adf46d1f4 100644
--- a/lib/Transforms/Scalar/WarnMissedTransforms.cpp
+++ b/lib/Transforms/Scalar/WarnMissedTransforms.cpp
@@ -1,9 +1,8 @@
//===- LoopTransformWarning.cpp - ----------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -93,7 +92,7 @@ PreservedAnalyses
WarnMissedTransformationsPass::run(Function &F, FunctionAnalysisManager &AM) {
// Do not warn about not applied transformations if optimizations are
// disabled.
- if (F.hasFnAttribute(Attribute::OptimizeNone))
+ if (F.hasOptNone())
return PreservedAnalyses::all();
auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
diff --git a/lib/Transforms/Utils/ASanStackFrameLayout.cpp b/lib/Transforms/Utils/ASanStackFrameLayout.cpp
index 364878dc588d..01912297324a 100644
--- a/lib/Transforms/Utils/ASanStackFrameLayout.cpp
+++ b/lib/Transforms/Utils/ASanStackFrameLayout.cpp
@@ -1,9 +1,8 @@
//===-- ASanStackFrameLayout.cpp - helper for AddressSanitizer ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -63,7 +62,7 @@ ComputeASanStackFrameLayout(SmallVectorImpl<ASanStackVariableDescription> &Vars,
for (size_t i = 0; i < NumVars; i++)
Vars[i].Alignment = std::max(Vars[i].Alignment, kMinAlignment);
- std::stable_sort(Vars.begin(), Vars.end(), CompareVars);
+ llvm::stable_sort(Vars, CompareVars);
ASanStackFrameLayout Layout;
Layout.Granularity = Granularity;
diff --git a/lib/Transforms/Utils/AddDiscriminators.cpp b/lib/Transforms/Utils/AddDiscriminators.cpp
index 564537af0c2a..ee0973002c47 100644
--- a/lib/Transforms/Utils/AddDiscriminators.cpp
+++ b/lib/Transforms/Utils/AddDiscriminators.cpp
@@ -1,9 +1,8 @@
//===- AddDiscriminators.cpp - Insert DWARF path discriminators -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -209,7 +208,7 @@ static bool addDiscriminators(Function &F) {
// Only the lowest 7 bits are used to represent a discriminator to fit
// it in 1 byte ULEB128 representation.
unsigned Discriminator = R.second ? ++LDM[L] : LDM[L];
- auto NewDIL = DIL->setBaseDiscriminator(Discriminator);
+ auto NewDIL = DIL->cloneWithBaseDiscriminator(Discriminator);
if (!NewDIL) {
LLVM_DEBUG(dbgs() << "Could not encode discriminator: "
<< DIL->getFilename() << ":" << DIL->getLine() << ":"
@@ -246,7 +245,7 @@ static bool addDiscriminators(Function &F) {
std::make_pair(CurrentDIL->getFilename(), CurrentDIL->getLine());
if (!CallLocations.insert(L).second) {
unsigned Discriminator = ++LDM[L];
- auto NewDIL = CurrentDIL->setBaseDiscriminator(Discriminator);
+ auto NewDIL = CurrentDIL->cloneWithBaseDiscriminator(Discriminator);
if (!NewDIL) {
LLVM_DEBUG(dbgs()
<< "Could not encode discriminator: "
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index 7da768252fc1..5fa371377c85 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -1,9 +1,8 @@
//===- BasicBlockUtils.cpp - BasicBlock Utilities --------------------------==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -18,6 +17,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Analysis/MemorySSAUpdater.h"
@@ -26,7 +26,6 @@
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfoMetadata.h"
-#include "llvm/IR/DomTreeUpdater.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstrTypes.h"
@@ -39,6 +38,8 @@
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/Local.h"
#include <cassert>
#include <cstdint>
@@ -48,30 +49,20 @@
using namespace llvm;
-void llvm::DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU) {
- SmallVector<BasicBlock *, 1> BBs = {BB};
- DeleteDeadBlocks(BBs, DTU);
-}
-
-void llvm::DeleteDeadBlocks(SmallVectorImpl <BasicBlock *> &BBs,
- DomTreeUpdater *DTU) {
-#ifndef NDEBUG
- // Make sure that all predecessors of each dead block is also dead.
- SmallPtrSet<BasicBlock *, 4> Dead(BBs.begin(), BBs.end());
- assert(Dead.size() == BBs.size() && "Duplicating blocks?");
- for (auto *BB : Dead)
- for (BasicBlock *Pred : predecessors(BB))
- assert(Dead.count(Pred) && "All predecessors must be dead!");
-#endif
+#define DEBUG_TYPE "basicblock-utils"
- SmallVector<DominatorTree::UpdateType, 4> Updates;
+void llvm::DetatchDeadBlocks(
+ ArrayRef<BasicBlock *> BBs,
+ SmallVectorImpl<DominatorTree::UpdateType> *Updates,
+ bool KeepOneInputPHIs) {
for (auto *BB : BBs) {
// Loop through all of our successors and make sure they know that one
// of their predecessors is going away.
+ SmallPtrSet<BasicBlock *, 4> UniqueSuccessors;
for (BasicBlock *Succ : successors(BB)) {
- Succ->removePredecessor(BB);
- if (DTU)
- Updates.push_back({DominatorTree::Delete, BB, Succ});
+ Succ->removePredecessor(BB, KeepOneInputPHIs);
+ if (Updates && UniqueSuccessors.insert(Succ).second)
+ Updates->push_back({DominatorTree::Delete, BB, Succ});
}
// Zap all the instructions in the block.
@@ -92,8 +83,29 @@ void llvm::DeleteDeadBlocks(SmallVectorImpl <BasicBlock *> &BBs,
"The successor list of BB isn't empty before "
"applying corresponding DTU updates.");
}
+}
+
+void llvm::DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU,
+ bool KeepOneInputPHIs) {
+ DeleteDeadBlocks({BB}, DTU, KeepOneInputPHIs);
+}
+
+void llvm::DeleteDeadBlocks(ArrayRef <BasicBlock *> BBs, DomTreeUpdater *DTU,
+ bool KeepOneInputPHIs) {
+#ifndef NDEBUG
+ // Make sure that all predecessors of each dead block is also dead.
+ SmallPtrSet<BasicBlock *, 4> Dead(BBs.begin(), BBs.end());
+ assert(Dead.size() == BBs.size() && "Duplicating blocks?");
+ for (auto *BB : Dead)
+ for (BasicBlock *Pred : predecessors(BB))
+ assert(Dead.count(Pred) && "All predecessors must be dead!");
+#endif
+
+ SmallVector<DominatorTree::UpdateType, 4> Updates;
+ DetatchDeadBlocks(BBs, DTU ? &Updates : nullptr, KeepOneInputPHIs);
+
if (DTU)
- DTU->applyUpdates(Updates, /*ForceRemoveDuplicates*/ true);
+ DTU->applyUpdatesPermissive(Updates);
for (BasicBlock *BB : BBs)
if (DTU)
@@ -102,6 +114,28 @@ void llvm::DeleteDeadBlocks(SmallVectorImpl <BasicBlock *> &BBs,
BB->eraseFromParent();
}
+bool llvm::EliminateUnreachableBlocks(Function &F, DomTreeUpdater *DTU,
+ bool KeepOneInputPHIs) {
+ df_iterator_default_set<BasicBlock*> Reachable;
+
+ // Mark all reachable blocks.
+ for (BasicBlock *BB : depth_first_ext(&F, Reachable))
+ (void)BB/* Mark all reachable blocks */;
+
+ // Collect all dead blocks.
+ std::vector<BasicBlock*> DeadBlocks;
+ for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
+ if (!Reachable.count(&*I)) {
+ BasicBlock *BB = &*I;
+ DeadBlocks.push_back(BB);
+ }
+
+ // Delete the dead blocks.
+ DeleteDeadBlocks(DeadBlocks, DTU, KeepOneInputPHIs);
+
+ return !DeadBlocks.empty();
+}
+
void llvm::FoldSingleEntryPHINodes(BasicBlock *BB,
MemoryDependenceResults *MemDep) {
if (!isa<PHINode>(BB->begin())) return;
@@ -160,6 +194,9 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU,
if (IncValue == &PN)
return false;
+ LLVM_DEBUG(dbgs() << "Merging: " << BB->getName() << " into "
+ << PredBB->getName() << "\n");
+
// Begin by getting rid of unneeded PHIs.
SmallVector<AssertingVH<Value>, 4> IncomingValues;
if (isa<PHINode>(BB->front())) {
@@ -175,11 +212,19 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU,
std::vector<DominatorTree::UpdateType> Updates;
if (DTU) {
Updates.reserve(1 + (2 * succ_size(BB)));
- Updates.push_back({DominatorTree::Delete, PredBB, BB});
- for (auto I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
+ // Add insert edges first. Experimentally, for the particular case of two
+ // blocks that can be merged, with a single successor and single predecessor
+ // respectively, it is beneficial to have all insert updates first. Deleting
+ // edges first may lead to unreachable blocks, followed by inserting edges
+ // making the blocks reachable again. Such DT updates lead to high compile
+ // times. We add inserts before deletes here to reduce compile time.
+ for (auto I = succ_begin(BB), E = succ_end(BB); I != E; ++I)
+ // This successor of BB may already have PredBB as a predecessor.
+ if (llvm::find(successors(PredBB), *I) == succ_end(PredBB))
+ Updates.push_back({DominatorTree::Insert, PredBB, *I});
+ for (auto I = succ_begin(BB), E = succ_end(BB); I != E; ++I)
Updates.push_back({DominatorTree::Delete, BB, *I});
- Updates.push_back({DominatorTree::Insert, PredBB, *I});
- }
+ Updates.push_back({DominatorTree::Delete, PredBB, BB});
}
if (MSSAU)
@@ -227,7 +272,7 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU,
isa<UnreachableInst>(BB->getTerminator()) &&
"The successor list of BB isn't empty before "
"applying corresponding DTU updates.");
- DTU->applyUpdates(Updates, /*ForceRemoveDuplicates*/ true);
+ DTU->applyUpdatesPermissive(Updates);
DTU->deleteBB(BB);
}
@@ -534,7 +579,13 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
// The new block unconditionally branches to the old block.
BranchInst *BI = BranchInst::Create(BB, NewBB);
- BI->setDebugLoc(BB->getFirstNonPHIOrDbg()->getDebugLoc());
+ // Splitting the predecessors of a loop header creates a preheader block.
+ if (LI && LI->isLoopHeader(BB))
+ // Using the loop start line number prevents debuggers stepping into the
+ // loop body for this instruction.
+ BI->setDebugLoc(LI->getLoopFor(BB)->getStartLoc());
+ else
+ BI->setDebugLoc(BB->getFirstNonPHIOrDbg()->getDebugLoc());
// Move the edges from Preds to point to NewBB instead of BB.
for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
@@ -543,6 +594,8 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
// all BlockAddress uses would need to be updated.
assert(!isa<IndirectBrInst>(Preds[i]->getTerminator()) &&
"Cannot split an edge from an IndirectBrInst");
+ assert(!isa<CallBrInst>(Preds[i]->getTerminator()) &&
+ "Cannot split an edge from a CallBrInst");
Preds[i]->getTerminator()->replaceUsesOfWith(BB, NewBB);
}
@@ -711,7 +764,7 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
UncondBranch->eraseFromParent();
if (DTU)
- DTU->deleteEdge(Pred, BB);
+ DTU->applyUpdates({{DominatorTree::Delete, Pred, BB}});
return cast<ReturnInst>(NewRet);
}
@@ -720,18 +773,23 @@ Instruction *llvm::SplitBlockAndInsertIfThen(Value *Cond,
Instruction *SplitBefore,
bool Unreachable,
MDNode *BranchWeights,
- DominatorTree *DT, LoopInfo *LI) {
+ DominatorTree *DT, LoopInfo *LI,
+ BasicBlock *ThenBlock) {
BasicBlock *Head = SplitBefore->getParent();
BasicBlock *Tail = Head->splitBasicBlock(SplitBefore->getIterator());
Instruction *HeadOldTerm = Head->getTerminator();
LLVMContext &C = Head->getContext();
- BasicBlock *ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail);
Instruction *CheckTerm;
- if (Unreachable)
- CheckTerm = new UnreachableInst(C, ThenBlock);
- else
- CheckTerm = BranchInst::Create(Tail, ThenBlock);
- CheckTerm->setDebugLoc(SplitBefore->getDebugLoc());
+ bool CreateThenBlock = (ThenBlock == nullptr);
+ if (CreateThenBlock) {
+ ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail);
+ if (Unreachable)
+ CheckTerm = new UnreachableInst(C, ThenBlock);
+ else
+ CheckTerm = BranchInst::Create(Tail, ThenBlock);
+ CheckTerm->setDebugLoc(SplitBefore->getDebugLoc());
+ } else
+ CheckTerm = ThenBlock->getTerminator();
BranchInst *HeadNewTerm =
BranchInst::Create(/*ifTrue*/ThenBlock, /*ifFalse*/Tail, Cond);
HeadNewTerm->setMetadata(LLVMContext::MD_prof, BranchWeights);
@@ -746,7 +804,10 @@ Instruction *llvm::SplitBlockAndInsertIfThen(Value *Cond,
DT->changeImmediateDominator(Child, NewNode);
// Head dominates ThenBlock.
- DT->addNewBlock(ThenBlock, Head);
+ if (CreateThenBlock)
+ DT->addNewBlock(ThenBlock, Head);
+ else
+ DT->changeImmediateDominator(ThenBlock, Head);
}
}
diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp
index fafc9aaba5c9..f5e4b53f6d97 100644
--- a/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -1,9 +1,8 @@
//===- BreakCriticalEdges.cpp - Critical Edge Elimination Pass ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -24,6 +23,7 @@
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemorySSAUpdater.h"
+#include "llvm/Analysis/PostDominators.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
@@ -49,10 +49,14 @@ namespace {
bool runOnFunction(Function &F) override {
auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
+
+ auto *PDTWP = getAnalysisIfAvailable<PostDominatorTreeWrapperPass>();
+ auto *PDT = PDTWP ? &PDTWP->getPostDomTree() : nullptr;
+
auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
unsigned N =
- SplitAllCriticalEdges(F, CriticalEdgeSplittingOptions(DT, LI));
+ SplitAllCriticalEdges(F, CriticalEdgeSplittingOptions(DT, LI, nullptr, PDT));
NumBroken += N;
return N > 0;
}
@@ -145,6 +149,14 @@ llvm::SplitCriticalEdge(Instruction *TI, unsigned SuccNum,
// it in this generic function.
if (DestBB->isEHPad()) return nullptr;
+ // Don't split the non-fallthrough edge from a callbr.
+ if (isa<CallBrInst>(TI) && SuccNum > 0)
+ return nullptr;
+
+ if (Options.IgnoreUnreachableDests &&
+ isa<UnreachableInst>(DestBB->getFirstNonPHIOrDbgOrLifetime()))
+ return nullptr;
+
// Create a new basic block, linking it into the CFG.
BasicBlock *NewBB = BasicBlock::Create(TI->getContext(),
TIBB->getName() + "." + DestBB->getName() + "_crit_edge");
@@ -189,7 +201,7 @@ llvm::SplitCriticalEdge(Instruction *TI, unsigned SuccNum,
if (TI->getSuccessor(i) != DestBB) continue;
// Remove an entry for TIBB from DestBB phi nodes.
- DestBB->removePredecessor(TIBB, Options.DontDeleteUselessPHIs);
+ DestBB->removePredecessor(TIBB, Options.KeepOneInputPHIs);
// We found another edge to DestBB, go to NewBB instead.
TI->setSuccessor(i, NewBB);
@@ -198,16 +210,17 @@ llvm::SplitCriticalEdge(Instruction *TI, unsigned SuccNum,
// If we have nothing to update, just return.
auto *DT = Options.DT;
+ auto *PDT = Options.PDT;
auto *LI = Options.LI;
auto *MSSAU = Options.MSSAU;
if (MSSAU)
MSSAU->wireOldPredecessorsToNewImmediatePredecessor(
DestBB, NewBB, {TIBB}, Options.MergeIdenticalEdges);
- if (!DT && !LI)
+ if (!DT && !PDT && !LI)
return NewBB;
- if (DT) {
+ if (DT || PDT) {
// Update the DominatorTree.
// ---> NewBB -----\
// / V
@@ -223,7 +236,10 @@ llvm::SplitCriticalEdge(Instruction *TI, unsigned SuccNum,
if (llvm::find(successors(TIBB), DestBB) == succ_end(TIBB))
Updates.push_back({DominatorTree::Delete, TIBB, DestBB});
- DT->applyUpdates(Updates);
+ if (DT)
+ DT->applyUpdates(Updates);
+ if (PDT)
+ PDT->applyUpdates(Updates);
}
// Update LoopInfo if it is around.
diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp
index 3466dedd3236..27f110e24f9c 100644
--- a/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -1,9 +1,8 @@
//===- BuildLibCalls.cpp - Utility builder for libcalls -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -23,6 +22,7 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
using namespace llvm;
@@ -121,6 +121,13 @@ static bool setNonLazyBind(Function &F) {
return true;
}
+static bool setDoesNotFreeMemory(Function &F) {
+ if (F.hasFnAttribute(Attribute::NoFree))
+ return false;
+ F.addFnAttr(Attribute::NoFree);
+ return true;
+}
+
bool llvm::inferLibFuncAttributes(Module *M, StringRef Name,
const TargetLibraryInfo &TLI) {
Function *F = M->getFunction(Name);
@@ -136,6 +143,9 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
bool Changed = false;
+ if(!isLibFreeFunction(&F, TheLibFunc) && !isReallocLikeFn(&F, &TLI))
+ Changed |= setDoesNotFreeMemory(F);
+
if (F.getParent() != nullptr && F.getParent()->getRtLibUseGOT())
Changed |= setNonLazyBind(F);
@@ -790,95 +800,76 @@ Value *llvm::castToCStr(Value *V, IRBuilder<> &B) {
return B.CreateBitCast(V, B.getInt8PtrTy(AS), "cstr");
}
-Value *llvm::emitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout &DL,
- const TargetLibraryInfo *TLI) {
- if (!TLI->has(LibFunc_strlen))
+static Value *emitLibCall(LibFunc TheLibFunc, Type *ReturnType,
+ ArrayRef<Type *> ParamTypes,
+ ArrayRef<Value *> Operands, IRBuilder<> &B,
+ const TargetLibraryInfo *TLI,
+ bool IsVaArgs = false) {
+ if (!TLI->has(TheLibFunc))
return nullptr;
Module *M = B.GetInsertBlock()->getModule();
- StringRef StrlenName = TLI->getName(LibFunc_strlen);
- LLVMContext &Context = B.GetInsertBlock()->getContext();
- Constant *StrLen = M->getOrInsertFunction(StrlenName, DL.getIntPtrType(Context),
- B.getInt8PtrTy());
- inferLibFuncAttributes(M, StrlenName, *TLI);
- CallInst *CI = B.CreateCall(StrLen, castToCStr(Ptr, B), StrlenName);
- if (const Function *F = dyn_cast<Function>(StrLen->stripPointerCasts()))
+ StringRef FuncName = TLI->getName(TheLibFunc);
+ FunctionType *FuncType = FunctionType::get(ReturnType, ParamTypes, IsVaArgs);
+ FunctionCallee Callee = M->getOrInsertFunction(FuncName, FuncType);
+ inferLibFuncAttributes(M, FuncName, *TLI);
+ CallInst *CI = B.CreateCall(Callee, Operands, FuncName);
+ if (const Function *F =
+ dyn_cast<Function>(Callee.getCallee()->stripPointerCasts()))
CI->setCallingConv(F->getCallingConv());
-
return CI;
}
-Value *llvm::emitStrChr(Value *Ptr, char C, IRBuilder<> &B,
+Value *llvm::emitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout &DL,
const TargetLibraryInfo *TLI) {
- if (!TLI->has(LibFunc_strchr))
- return nullptr;
+ LLVMContext &Context = B.GetInsertBlock()->getContext();
+ return emitLibCall(LibFunc_strlen, DL.getIntPtrType(Context),
+ B.getInt8PtrTy(), castToCStr(Ptr, B), B, TLI);
+}
- Module *M = B.GetInsertBlock()->getModule();
- StringRef StrChrName = TLI->getName(LibFunc_strchr);
+Value *llvm::emitStrChr(Value *Ptr, char C, IRBuilder<> &B,
+ const TargetLibraryInfo *TLI) {
Type *I8Ptr = B.getInt8PtrTy();
Type *I32Ty = B.getInt32Ty();
- Constant *StrChr =
- M->getOrInsertFunction(StrChrName, I8Ptr, I8Ptr, I32Ty);
- inferLibFuncAttributes(M, StrChrName, *TLI);
- CallInst *CI = B.CreateCall(
- StrChr, {castToCStr(Ptr, B), ConstantInt::get(I32Ty, C)}, StrChrName);
- if (const Function *F = dyn_cast<Function>(StrChr->stripPointerCasts()))
- CI->setCallingConv(F->getCallingConv());
- return CI;
+ return emitLibCall(LibFunc_strchr, I8Ptr, {I8Ptr, I32Ty},
+ {castToCStr(Ptr, B), ConstantInt::get(I32Ty, C)}, B, TLI);
}
Value *llvm::emitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B,
const DataLayout &DL, const TargetLibraryInfo *TLI) {
- if (!TLI->has(LibFunc_strncmp))
- return nullptr;
-
- Module *M = B.GetInsertBlock()->getModule();
- StringRef StrNCmpName = TLI->getName(LibFunc_strncmp);
LLVMContext &Context = B.GetInsertBlock()->getContext();
- Value *StrNCmp = M->getOrInsertFunction(StrNCmpName, B.getInt32Ty(),
- B.getInt8PtrTy(), B.getInt8PtrTy(),
- DL.getIntPtrType(Context));
- inferLibFuncAttributes(M, StrNCmpName, *TLI);
- CallInst *CI = B.CreateCall(
- StrNCmp, {castToCStr(Ptr1, B), castToCStr(Ptr2, B), Len}, StrNCmpName);
-
- if (const Function *F = dyn_cast<Function>(StrNCmp->stripPointerCasts()))
- CI->setCallingConv(F->getCallingConv());
-
- return CI;
+ return emitLibCall(
+ LibFunc_strncmp, B.getInt32Ty(),
+ {B.getInt8PtrTy(), B.getInt8PtrTy(), DL.getIntPtrType(Context)},
+ {castToCStr(Ptr1, B), castToCStr(Ptr2, B), Len}, B, TLI);
}
Value *llvm::emitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B,
- const TargetLibraryInfo *TLI, StringRef Name) {
- if (!TLI->has(LibFunc_strcpy))
- return nullptr;
+ const TargetLibraryInfo *TLI) {
+ Type *I8Ptr = B.getInt8PtrTy();
+ return emitLibCall(LibFunc_strcpy, I8Ptr, {I8Ptr, I8Ptr},
+ {castToCStr(Dst, B), castToCStr(Src, B)}, B, TLI);
+}
- Module *M = B.GetInsertBlock()->getModule();
+Value *llvm::emitStpCpy(Value *Dst, Value *Src, IRBuilder<> &B,
+ const TargetLibraryInfo *TLI) {
Type *I8Ptr = B.getInt8PtrTy();
- Value *StrCpy = M->getOrInsertFunction(Name, I8Ptr, I8Ptr, I8Ptr);
- inferLibFuncAttributes(M, Name, *TLI);
- CallInst *CI =
- B.CreateCall(StrCpy, {castToCStr(Dst, B), castToCStr(Src, B)}, Name);
- if (const Function *F = dyn_cast<Function>(StrCpy->stripPointerCasts()))
- CI->setCallingConv(F->getCallingConv());
- return CI;
+ return emitLibCall(LibFunc_stpcpy, I8Ptr, {I8Ptr, I8Ptr},
+ {castToCStr(Dst, B), castToCStr(Src, B)}, B, TLI);
}
Value *llvm::emitStrNCpy(Value *Dst, Value *Src, Value *Len, IRBuilder<> &B,
- const TargetLibraryInfo *TLI, StringRef Name) {
- if (!TLI->has(LibFunc_strncpy))
- return nullptr;
+ const TargetLibraryInfo *TLI) {
+ Type *I8Ptr = B.getInt8PtrTy();
+ return emitLibCall(LibFunc_strncpy, I8Ptr, {I8Ptr, I8Ptr, Len->getType()},
+ {castToCStr(Dst, B), castToCStr(Src, B), Len}, B, TLI);
+}
- Module *M = B.GetInsertBlock()->getModule();
+Value *llvm::emitStpNCpy(Value *Dst, Value *Src, Value *Len, IRBuilder<> &B,
+ const TargetLibraryInfo *TLI) {
Type *I8Ptr = B.getInt8PtrTy();
- Value *StrNCpy = M->getOrInsertFunction(Name, I8Ptr, I8Ptr, I8Ptr,
- Len->getType());
- inferLibFuncAttributes(M, Name, *TLI);
- CallInst *CI = B.CreateCall(
- StrNCpy, {castToCStr(Dst, B), castToCStr(Src, B), Len}, Name);
- if (const Function *F = dyn_cast<Function>(StrNCpy->stripPointerCasts()))
- CI->setCallingConv(F->getCallingConv());
- return CI;
+ return emitLibCall(LibFunc_stpncpy, I8Ptr, {I8Ptr, I8Ptr, Len->getType()},
+ {castToCStr(Dst, B), castToCStr(Src, B), Len}, B, TLI);
}
Value *llvm::emitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
@@ -892,57 +883,115 @@ Value *llvm::emitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
AS = AttributeList::get(M->getContext(), AttributeList::FunctionIndex,
Attribute::NoUnwind);
LLVMContext &Context = B.GetInsertBlock()->getContext();
- Value *MemCpy = M->getOrInsertFunction(
+ FunctionCallee MemCpy = M->getOrInsertFunction(
"__memcpy_chk", AttributeList::get(M->getContext(), AS), B.getInt8PtrTy(),
B.getInt8PtrTy(), B.getInt8PtrTy(), DL.getIntPtrType(Context),
DL.getIntPtrType(Context));
Dst = castToCStr(Dst, B);
Src = castToCStr(Src, B);
CallInst *CI = B.CreateCall(MemCpy, {Dst, Src, Len, ObjSize});
- if (const Function *F = dyn_cast<Function>(MemCpy->stripPointerCasts()))
+ if (const Function *F =
+ dyn_cast<Function>(MemCpy.getCallee()->stripPointerCasts()))
CI->setCallingConv(F->getCallingConv());
return CI;
}
Value *llvm::emitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilder<> &B,
const DataLayout &DL, const TargetLibraryInfo *TLI) {
- if (!TLI->has(LibFunc_memchr))
- return nullptr;
-
- Module *M = B.GetInsertBlock()->getModule();
- StringRef MemChrName = TLI->getName(LibFunc_memchr);
LLVMContext &Context = B.GetInsertBlock()->getContext();
- Value *MemChr = M->getOrInsertFunction(MemChrName, B.getInt8PtrTy(),
- B.getInt8PtrTy(), B.getInt32Ty(),
- DL.getIntPtrType(Context));
- inferLibFuncAttributes(M, MemChrName, *TLI);
- CallInst *CI = B.CreateCall(MemChr, {castToCStr(Ptr, B), Val, Len}, MemChrName);
-
- if (const Function *F = dyn_cast<Function>(MemChr->stripPointerCasts()))
- CI->setCallingConv(F->getCallingConv());
-
- return CI;
+ return emitLibCall(
+ LibFunc_memchr, B.getInt8PtrTy(),
+ {B.getInt8PtrTy(), B.getInt32Ty(), DL.getIntPtrType(Context)},
+ {castToCStr(Ptr, B), Val, Len}, B, TLI);
}
Value *llvm::emitMemCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B,
const DataLayout &DL, const TargetLibraryInfo *TLI) {
- if (!TLI->has(LibFunc_memcmp))
- return nullptr;
+ LLVMContext &Context = B.GetInsertBlock()->getContext();
+ return emitLibCall(
+ LibFunc_memcmp, B.getInt32Ty(),
+ {B.getInt8PtrTy(), B.getInt8PtrTy(), DL.getIntPtrType(Context)},
+ {castToCStr(Ptr1, B), castToCStr(Ptr2, B), Len}, B, TLI);
+}
- Module *M = B.GetInsertBlock()->getModule();
- StringRef MemCmpName = TLI->getName(LibFunc_memcmp);
+Value *llvm::emitBCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B,
+ const DataLayout &DL, const TargetLibraryInfo *TLI) {
LLVMContext &Context = B.GetInsertBlock()->getContext();
- Value *MemCmp = M->getOrInsertFunction(MemCmpName, B.getInt32Ty(),
- B.getInt8PtrTy(), B.getInt8PtrTy(),
- DL.getIntPtrType(Context));
- inferLibFuncAttributes(M, MemCmpName, *TLI);
- CallInst *CI = B.CreateCall(
- MemCmp, {castToCStr(Ptr1, B), castToCStr(Ptr2, B), Len}, MemCmpName);
-
- if (const Function *F = dyn_cast<Function>(MemCmp->stripPointerCasts()))
- CI->setCallingConv(F->getCallingConv());
+ return emitLibCall(
+ LibFunc_bcmp, B.getInt32Ty(),
+ {B.getInt8PtrTy(), B.getInt8PtrTy(), DL.getIntPtrType(Context)},
+ {castToCStr(Ptr1, B), castToCStr(Ptr2, B), Len}, B, TLI);
+}
- return CI;
+Value *llvm::emitMemCCpy(Value *Ptr1, Value *Ptr2, Value *Val, Value *Len,
+ IRBuilder<> &B, const TargetLibraryInfo *TLI) {
+ return emitLibCall(
+ LibFunc_memccpy, B.getInt8PtrTy(),
+ {B.getInt8PtrTy(), B.getInt8PtrTy(), B.getInt32Ty(), Len->getType()},
+ {Ptr1, Ptr2, Val, Len}, B, TLI);
+}
+
+Value *llvm::emitSNPrintf(Value *Dest, Value *Size, Value *Fmt,
+ ArrayRef<Value *> VariadicArgs, IRBuilder<> &B,
+ const TargetLibraryInfo *TLI) {
+ SmallVector<Value *, 8> Args{castToCStr(Dest, B), Size, castToCStr(Fmt, B)};
+ Args.insert(Args.end(), VariadicArgs.begin(), VariadicArgs.end());
+ return emitLibCall(LibFunc_snprintf, B.getInt32Ty(),
+ {B.getInt8PtrTy(), Size->getType(), B.getInt8PtrTy()},
+ Args, B, TLI, /*IsVaArgs=*/true);
+}
+
+Value *llvm::emitSPrintf(Value *Dest, Value *Fmt,
+ ArrayRef<Value *> VariadicArgs, IRBuilder<> &B,
+ const TargetLibraryInfo *TLI) {
+ SmallVector<Value *, 8> Args{castToCStr(Dest, B), castToCStr(Fmt, B)};
+ Args.insert(Args.end(), VariadicArgs.begin(), VariadicArgs.end());
+ return emitLibCall(LibFunc_sprintf, B.getInt32Ty(),
+ {B.getInt8PtrTy(), B.getInt8PtrTy()}, Args, B, TLI,
+ /*IsVaArgs=*/true);
+}
+
+Value *llvm::emitStrCat(Value *Dest, Value *Src, IRBuilder<> &B,
+ const TargetLibraryInfo *TLI) {
+ return emitLibCall(LibFunc_strcat, B.getInt8PtrTy(),
+ {B.getInt8PtrTy(), B.getInt8PtrTy()},
+ {castToCStr(Dest, B), castToCStr(Src, B)}, B, TLI);
+}
+
+Value *llvm::emitStrLCpy(Value *Dest, Value *Src, Value *Size, IRBuilder<> &B,
+ const TargetLibraryInfo *TLI) {
+ return emitLibCall(LibFunc_strlcpy, Size->getType(),
+ {B.getInt8PtrTy(), B.getInt8PtrTy(), Size->getType()},
+ {castToCStr(Dest, B), castToCStr(Src, B), Size}, B, TLI);
+}
+
+Value *llvm::emitStrLCat(Value *Dest, Value *Src, Value *Size, IRBuilder<> &B,
+ const TargetLibraryInfo *TLI) {
+ return emitLibCall(LibFunc_strlcat, Size->getType(),
+ {B.getInt8PtrTy(), B.getInt8PtrTy(), Size->getType()},
+ {castToCStr(Dest, B), castToCStr(Src, B), Size}, B, TLI);
+}
+
+Value *llvm::emitStrNCat(Value *Dest, Value *Src, Value *Size, IRBuilder<> &B,
+ const TargetLibraryInfo *TLI) {
+ return emitLibCall(LibFunc_strncat, B.getInt8PtrTy(),
+ {B.getInt8PtrTy(), B.getInt8PtrTy(), Size->getType()},
+ {castToCStr(Dest, B), castToCStr(Src, B), Size}, B, TLI);
+}
+
+Value *llvm::emitVSNPrintf(Value *Dest, Value *Size, Value *Fmt, Value *VAList,
+ IRBuilder<> &B, const TargetLibraryInfo *TLI) {
+ return emitLibCall(
+ LibFunc_vsnprintf, B.getInt32Ty(),
+ {B.getInt8PtrTy(), Size->getType(), B.getInt8PtrTy(), VAList->getType()},
+ {castToCStr(Dest, B), Size, castToCStr(Fmt, B), VAList}, B, TLI);
+}
+
+Value *llvm::emitVSPrintf(Value *Dest, Value *Fmt, Value *VAList,
+ IRBuilder<> &B, const TargetLibraryInfo *TLI) {
+ return emitLibCall(LibFunc_vsprintf, B.getInt32Ty(),
+ {B.getInt8PtrTy(), B.getInt8PtrTy(), VAList->getType()},
+ {castToCStr(Dest, B), castToCStr(Fmt, B), VAList}, B, TLI);
}
/// Append a suffix to the function name according to the type of 'Op'.
@@ -966,8 +1015,8 @@ static Value *emitUnaryFloatFnCallHelper(Value *Op, StringRef Name,
assert((Name != "") && "Must specify Name to emitUnaryFloatFnCall");
Module *M = B.GetInsertBlock()->getModule();
- Value *Callee = M->getOrInsertFunction(Name, Op->getType(),
- Op->getType());
+ FunctionCallee Callee =
+ M->getOrInsertFunction(Name, Op->getType(), Op->getType());
CallInst *CI = B.CreateCall(Callee, Op, Name);
// The incoming attribute set may have come from a speculatable intrinsic, but
@@ -976,7 +1025,8 @@ static Value *emitUnaryFloatFnCallHelper(Value *Op, StringRef Name,
CI->setAttributes(Attrs.removeAttribute(B.getContext(),
AttributeList::FunctionIndex,
Attribute::Speculatable));
- if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
+ if (const Function *F =
+ dyn_cast<Function>(Callee.getCallee()->stripPointerCasts()))
CI->setCallingConv(F->getCallingConv());
return CI;
@@ -1009,11 +1059,12 @@ Value *llvm::emitBinaryFloatFnCall(Value *Op1, Value *Op2, StringRef Name,
appendTypeSuffix(Op1, Name, NameBuffer);
Module *M = B.GetInsertBlock()->getModule();
- Value *Callee = M->getOrInsertFunction(Name, Op1->getType(), Op1->getType(),
- Op2->getType());
+ FunctionCallee Callee = M->getOrInsertFunction(
+ Name, Op1->getType(), Op1->getType(), Op2->getType());
CallInst *CI = B.CreateCall(Callee, {Op1, Op2}, Name);
CI->setAttributes(Attrs);
- if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
+ if (const Function *F =
+ dyn_cast<Function>(Callee.getCallee()->stripPointerCasts()))
CI->setCallingConv(F->getCallingConv());
return CI;
@@ -1026,7 +1077,8 @@ Value *llvm::emitPutChar(Value *Char, IRBuilder<> &B,
Module *M = B.GetInsertBlock()->getModule();
StringRef PutCharName = TLI->getName(LibFunc_putchar);
- Value *PutChar = M->getOrInsertFunction(PutCharName, B.getInt32Ty(), B.getInt32Ty());
+ FunctionCallee PutChar =
+ M->getOrInsertFunction(PutCharName, B.getInt32Ty(), B.getInt32Ty());
inferLibFuncAttributes(M, PutCharName, *TLI);
CallInst *CI = B.CreateCall(PutChar,
B.CreateIntCast(Char,
@@ -1035,7 +1087,8 @@ Value *llvm::emitPutChar(Value *Char, IRBuilder<> &B,
"chari"),
PutCharName);
- if (const Function *F = dyn_cast<Function>(PutChar->stripPointerCasts()))
+ if (const Function *F =
+ dyn_cast<Function>(PutChar.getCallee()->stripPointerCasts()))
CI->setCallingConv(F->getCallingConv());
return CI;
}
@@ -1047,11 +1100,12 @@ Value *llvm::emitPutS(Value *Str, IRBuilder<> &B,
Module *M = B.GetInsertBlock()->getModule();
StringRef PutsName = TLI->getName(LibFunc_puts);
- Value *PutS =
+ FunctionCallee PutS =
M->getOrInsertFunction(PutsName, B.getInt32Ty(), B.getInt8PtrTy());
inferLibFuncAttributes(M, PutsName, *TLI);
CallInst *CI = B.CreateCall(PutS, castToCStr(Str, B), PutsName);
- if (const Function *F = dyn_cast<Function>(PutS->stripPointerCasts()))
+ if (const Function *F =
+ dyn_cast<Function>(PutS.getCallee()->stripPointerCasts()))
CI->setCallingConv(F->getCallingConv());
return CI;
}
@@ -1063,15 +1117,16 @@ Value *llvm::emitFPutC(Value *Char, Value *File, IRBuilder<> &B,
Module *M = B.GetInsertBlock()->getModule();
StringRef FPutcName = TLI->getName(LibFunc_fputc);
- Constant *F = M->getOrInsertFunction(FPutcName, B.getInt32Ty(), B.getInt32Ty(),
- File->getType());
+ FunctionCallee F = M->getOrInsertFunction(FPutcName, B.getInt32Ty(),
+ B.getInt32Ty(), File->getType());
if (File->getType()->isPointerTy())
inferLibFuncAttributes(M, FPutcName, *TLI);
Char = B.CreateIntCast(Char, B.getInt32Ty(), /*isSigned*/true,
"chari");
CallInst *CI = B.CreateCall(F, {Char, File}, FPutcName);
- if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+ if (const Function *Fn =
+ dyn_cast<Function>(F.getCallee()->stripPointerCasts()))
CI->setCallingConv(Fn->getCallingConv());
return CI;
}
@@ -1083,14 +1138,15 @@ Value *llvm::emitFPutCUnlocked(Value *Char, Value *File, IRBuilder<> &B,
Module *M = B.GetInsertBlock()->getModule();
StringRef FPutcUnlockedName = TLI->getName(LibFunc_fputc_unlocked);
- Constant *F = M->getOrInsertFunction(FPutcUnlockedName, B.getInt32Ty(),
- B.getInt32Ty(), File->getType());
+ FunctionCallee F = M->getOrInsertFunction(FPutcUnlockedName, B.getInt32Ty(),
+ B.getInt32Ty(), File->getType());
if (File->getType()->isPointerTy())
inferLibFuncAttributes(M, FPutcUnlockedName, *TLI);
Char = B.CreateIntCast(Char, B.getInt32Ty(), /*isSigned*/ true, "chari");
CallInst *CI = B.CreateCall(F, {Char, File}, FPutcUnlockedName);
- if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+ if (const Function *Fn =
+ dyn_cast<Function>(F.getCallee()->stripPointerCasts()))
CI->setCallingConv(Fn->getCallingConv());
return CI;
}
@@ -1102,13 +1158,14 @@ Value *llvm::emitFPutS(Value *Str, Value *File, IRBuilder<> &B,
Module *M = B.GetInsertBlock()->getModule();
StringRef FPutsName = TLI->getName(LibFunc_fputs);
- Constant *F = M->getOrInsertFunction(
- FPutsName, B.getInt32Ty(), B.getInt8PtrTy(), File->getType());
+ FunctionCallee F = M->getOrInsertFunction(FPutsName, B.getInt32Ty(),
+ B.getInt8PtrTy(), File->getType());
if (File->getType()->isPointerTy())
inferLibFuncAttributes(M, FPutsName, *TLI);
CallInst *CI = B.CreateCall(F, {castToCStr(Str, B), File}, FPutsName);
- if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+ if (const Function *Fn =
+ dyn_cast<Function>(F.getCallee()->stripPointerCasts()))
CI->setCallingConv(Fn->getCallingConv());
return CI;
}
@@ -1120,13 +1177,14 @@ Value *llvm::emitFPutSUnlocked(Value *Str, Value *File, IRBuilder<> &B,
Module *M = B.GetInsertBlock()->getModule();
StringRef FPutsUnlockedName = TLI->getName(LibFunc_fputs_unlocked);
- Constant *F = M->getOrInsertFunction(FPutsUnlockedName, B.getInt32Ty(),
- B.getInt8PtrTy(), File->getType());
+ FunctionCallee F = M->getOrInsertFunction(FPutsUnlockedName, B.getInt32Ty(),
+ B.getInt8PtrTy(), File->getType());
if (File->getType()->isPointerTy())
inferLibFuncAttributes(M, FPutsUnlockedName, *TLI);
CallInst *CI = B.CreateCall(F, {castToCStr(Str, B), File}, FPutsUnlockedName);
- if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+ if (const Function *Fn =
+ dyn_cast<Function>(F.getCallee()->stripPointerCasts()))
CI->setCallingConv(Fn->getCallingConv());
return CI;
}
@@ -1139,7 +1197,7 @@ Value *llvm::emitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilder<> &B,
Module *M = B.GetInsertBlock()->getModule();
LLVMContext &Context = B.GetInsertBlock()->getContext();
StringRef FWriteName = TLI->getName(LibFunc_fwrite);
- Constant *F = M->getOrInsertFunction(
+ FunctionCallee F = M->getOrInsertFunction(
FWriteName, DL.getIntPtrType(Context), B.getInt8PtrTy(),
DL.getIntPtrType(Context), DL.getIntPtrType(Context), File->getType());
@@ -1149,7 +1207,8 @@ Value *llvm::emitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilder<> &B,
B.CreateCall(F, {castToCStr(Ptr, B), Size,
ConstantInt::get(DL.getIntPtrType(Context), 1), File});
- if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+ if (const Function *Fn =
+ dyn_cast<Function>(F.getCallee()->stripPointerCasts()))
CI->setCallingConv(Fn->getCallingConv());
return CI;
}
@@ -1162,12 +1221,13 @@ Value *llvm::emitMalloc(Value *Num, IRBuilder<> &B, const DataLayout &DL,
Module *M = B.GetInsertBlock()->getModule();
StringRef MallocName = TLI->getName(LibFunc_malloc);
LLVMContext &Context = B.GetInsertBlock()->getContext();
- Value *Malloc = M->getOrInsertFunction(MallocName, B.getInt8PtrTy(),
- DL.getIntPtrType(Context));
+ FunctionCallee Malloc = M->getOrInsertFunction(MallocName, B.getInt8PtrTy(),
+ DL.getIntPtrType(Context));
inferLibFuncAttributes(M, MallocName, *TLI);
CallInst *CI = B.CreateCall(Malloc, Num, MallocName);
- if (const Function *F = dyn_cast<Function>(Malloc->stripPointerCasts()))
+ if (const Function *F =
+ dyn_cast<Function>(Malloc.getCallee()->stripPointerCasts()))
CI->setCallingConv(F->getCallingConv());
return CI;
@@ -1182,12 +1242,13 @@ Value *llvm::emitCalloc(Value *Num, Value *Size, const AttributeList &Attrs,
StringRef CallocName = TLI.getName(LibFunc_calloc);
const DataLayout &DL = M->getDataLayout();
IntegerType *PtrType = DL.getIntPtrType((B.GetInsertBlock()->getContext()));
- Value *Calloc = M->getOrInsertFunction(CallocName, Attrs, B.getInt8PtrTy(),
- PtrType, PtrType);
+ FunctionCallee Calloc = M->getOrInsertFunction(
+ CallocName, Attrs, B.getInt8PtrTy(), PtrType, PtrType);
inferLibFuncAttributes(M, CallocName, TLI);
CallInst *CI = B.CreateCall(Calloc, {Num, Size}, CallocName);
- if (const auto *F = dyn_cast<Function>(Calloc->stripPointerCasts()))
+ if (const auto *F =
+ dyn_cast<Function>(Calloc.getCallee()->stripPointerCasts()))
CI->setCallingConv(F->getCallingConv());
return CI;
@@ -1202,7 +1263,7 @@ Value *llvm::emitFWriteUnlocked(Value *Ptr, Value *Size, Value *N, Value *File,
Module *M = B.GetInsertBlock()->getModule();
LLVMContext &Context = B.GetInsertBlock()->getContext();
StringRef FWriteUnlockedName = TLI->getName(LibFunc_fwrite_unlocked);
- Constant *F = M->getOrInsertFunction(
+ FunctionCallee F = M->getOrInsertFunction(
FWriteUnlockedName, DL.getIntPtrType(Context), B.getInt8PtrTy(),
DL.getIntPtrType(Context), DL.getIntPtrType(Context), File->getType());
@@ -1210,7 +1271,8 @@ Value *llvm::emitFWriteUnlocked(Value *Ptr, Value *Size, Value *N, Value *File,
inferLibFuncAttributes(M, FWriteUnlockedName, *TLI);
CallInst *CI = B.CreateCall(F, {castToCStr(Ptr, B), Size, N, File});
- if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+ if (const Function *Fn =
+ dyn_cast<Function>(F.getCallee()->stripPointerCasts()))
CI->setCallingConv(Fn->getCallingConv());
return CI;
}
@@ -1222,13 +1284,14 @@ Value *llvm::emitFGetCUnlocked(Value *File, IRBuilder<> &B,
Module *M = B.GetInsertBlock()->getModule();
StringRef FGetCUnlockedName = TLI->getName(LibFunc_fgetc_unlocked);
- Constant *F =
- M->getOrInsertFunction(FGetCUnlockedName, B.getInt32Ty(), File->getType());
+ FunctionCallee F = M->getOrInsertFunction(FGetCUnlockedName, B.getInt32Ty(),
+ File->getType());
if (File->getType()->isPointerTy())
inferLibFuncAttributes(M, FGetCUnlockedName, *TLI);
CallInst *CI = B.CreateCall(F, File, FGetCUnlockedName);
- if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+ if (const Function *Fn =
+ dyn_cast<Function>(F.getCallee()->stripPointerCasts()))
CI->setCallingConv(Fn->getCallingConv());
return CI;
}
@@ -1240,14 +1303,15 @@ Value *llvm::emitFGetSUnlocked(Value *Str, Value *Size, Value *File,
Module *M = B.GetInsertBlock()->getModule();
StringRef FGetSUnlockedName = TLI->getName(LibFunc_fgets_unlocked);
- Constant *F =
+ FunctionCallee F =
M->getOrInsertFunction(FGetSUnlockedName, B.getInt8PtrTy(),
B.getInt8PtrTy(), B.getInt32Ty(), File->getType());
inferLibFuncAttributes(M, FGetSUnlockedName, *TLI);
CallInst *CI =
B.CreateCall(F, {castToCStr(Str, B), Size, File}, FGetSUnlockedName);
- if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+ if (const Function *Fn =
+ dyn_cast<Function>(F.getCallee()->stripPointerCasts()))
CI->setCallingConv(Fn->getCallingConv());
return CI;
}
@@ -1261,7 +1325,7 @@ Value *llvm::emitFReadUnlocked(Value *Ptr, Value *Size, Value *N, Value *File,
Module *M = B.GetInsertBlock()->getModule();
LLVMContext &Context = B.GetInsertBlock()->getContext();
StringRef FReadUnlockedName = TLI->getName(LibFunc_fread_unlocked);
- Constant *F = M->getOrInsertFunction(
+ FunctionCallee F = M->getOrInsertFunction(
FReadUnlockedName, DL.getIntPtrType(Context), B.getInt8PtrTy(),
DL.getIntPtrType(Context), DL.getIntPtrType(Context), File->getType());
@@ -1269,7 +1333,8 @@ Value *llvm::emitFReadUnlocked(Value *Ptr, Value *Size, Value *N, Value *File,
inferLibFuncAttributes(M, FReadUnlockedName, *TLI);
CallInst *CI = B.CreateCall(F, {castToCStr(Ptr, B), Size, N, File});
- if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+ if (const Function *Fn =
+ dyn_cast<Function>(F.getCallee()->stripPointerCasts()))
CI->setCallingConv(Fn->getCallingConv());
return CI;
}
diff --git a/lib/Transforms/Utils/BypassSlowDivision.cpp b/lib/Transforms/Utils/BypassSlowDivision.cpp
index e7828af648a9..df299f673f65 100644
--- a/lib/Transforms/Utils/BypassSlowDivision.cpp
+++ b/lib/Transforms/Utils/BypassSlowDivision.cpp
@@ -1,9 +1,8 @@
//===- BypassSlowDivision.cpp - Bypass slow division ----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/Utils/CallPromotionUtils.cpp b/lib/Transforms/Utils/CallPromotionUtils.cpp
index e58ddcf34667..f04d76e70c0d 100644
--- a/lib/Transforms/Utils/CallPromotionUtils.cpp
+++ b/lib/Transforms/Utils/CallPromotionUtils.cpp
@@ -1,9 +1,8 @@
//===- CallPromotionUtils.cpp - Utilities for call promotion ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -367,8 +366,9 @@ Instruction *llvm::promoteCall(CallSite CS, Function *Callee,
CastInst **RetBitCast) {
assert(!CS.getCalledFunction() && "Only indirect call sites can be promoted");
- // Set the called function of the call site to be the given callee.
- CS.setCalledFunction(Callee);
+ // Set the called function of the call site to be the given callee (but don't
+ // change the type).
+ cast<CallBase>(CS.getInstruction())->setCalledOperand(Callee);
// Since the call site will no longer be direct, we must clear metadata that
// is only appropriate for indirect calls. This includes !prof and !callees
@@ -412,6 +412,15 @@ Instruction *llvm::promoteCall(CallSite CS, Function *Callee,
// Remove any incompatible attributes for the argument.
AttrBuilder ArgAttrs(CallerPAL.getParamAttributes(ArgNo));
ArgAttrs.remove(AttributeFuncs::typeIncompatible(FormalTy));
+
+ // If byval is used, this must be a pointer type, and the byval type must
+ // match the element type. Update it if present.
+ if (ArgAttrs.getByValType()) {
+ Type *NewTy = Callee->getParamByValType(ArgNo);
+ ArgAttrs.addByValAttr(
+ NewTy ? NewTy : cast<PointerType>(FormalTy)->getElementType());
+ }
+
NewArgAttrs.push_back(AttributeSet::get(Ctx, ArgAttrs));
AttributeChanged = true;
} else
diff --git a/lib/Transforms/Utils/CanonicalizeAliases.cpp b/lib/Transforms/Utils/CanonicalizeAliases.cpp
index cf41fd2e14c0..455fcbb1cf98 100644
--- a/lib/Transforms/Utils/CanonicalizeAliases.cpp
+++ b/lib/Transforms/Utils/CanonicalizeAliases.cpp
@@ -1,9 +1,8 @@
//===- CanonicalizeAliases.cpp - ThinLTO Support: Canonicalize Aliases ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index 8f8c601f5f13..1026c9d37038 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -1,9 +1,8 @@
//===- CloneFunction.cpp - Clone a function into another function ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -16,13 +15,13 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/DomTreeUpdater.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Instructions.h"
@@ -740,12 +739,12 @@ Loop *llvm::cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB,
const Twine &NameSuffix, LoopInfo *LI,
DominatorTree *DT,
SmallVectorImpl<BasicBlock *> &Blocks) {
- assert(OrigLoop->getSubLoops().empty() &&
- "Loop to be cloned cannot have inner loop");
Function *F = OrigLoop->getHeader()->getParent();
Loop *ParentLoop = OrigLoop->getParentLoop();
+ DenseMap<Loop *, Loop *> LMap;
Loop *NewLoop = LI->AllocateLoop();
+ LMap[OrigLoop] = NewLoop;
if (ParentLoop)
ParentLoop->addChildLoop(NewLoop);
else
@@ -765,14 +764,36 @@ Loop *llvm::cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB,
// Update DominatorTree.
DT->addNewBlock(NewPH, LoopDomBB);
+ for (Loop *CurLoop : OrigLoop->getLoopsInPreorder()) {
+ Loop *&NewLoop = LMap[CurLoop];
+ if (!NewLoop) {
+ NewLoop = LI->AllocateLoop();
+
+ // Establish the parent/child relationship.
+ Loop *OrigParent = CurLoop->getParentLoop();
+ assert(OrigParent && "Could not find the original parent loop");
+ Loop *NewParentLoop = LMap[OrigParent];
+ assert(NewParentLoop && "Could not find the new parent loop");
+
+ NewParentLoop->addChildLoop(NewLoop);
+ }
+ }
+
for (BasicBlock *BB : OrigLoop->getBlocks()) {
+ Loop *CurLoop = LI->getLoopFor(BB);
+ Loop *&NewLoop = LMap[CurLoop];
+ assert(NewLoop && "Expecting new loop to be allocated");
+
BasicBlock *NewBB = CloneBasicBlock(BB, VMap, NameSuffix, F);
VMap[BB] = NewBB;
// Update LoopInfo.
NewLoop->addBasicBlockToLoop(NewBB, *LI);
+ if (BB == CurLoop->getHeader())
+ NewLoop->moveToHeader(NewBB);
- // Add DominatorTree node. After seeing all blocks, update to correct IDom.
+ // Add DominatorTree node. After seeing all blocks, update to correct
+ // IDom.
DT->addNewBlock(NewBB, NewPH);
Blocks.push_back(NewBB);
diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp
index 659993aa5478..7ddf59becba9 100644
--- a/lib/Transforms/Utils/CloneModule.cpp
+++ b/lib/Transforms/Utils/CloneModule.cpp
@@ -1,9 +1,8 @@
//===- CloneModule.cpp - Clone an entire module ---------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp
index 25d4ae583ecc..fa6d3f8ae873 100644
--- a/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/lib/Transforms/Utils/CodeExtractor.cpp
@@ -1,9 +1,8 @@
//===- CodeExtractor.cpp - Pull code region into a new function -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -21,6 +20,7 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
@@ -44,6 +44,7 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
@@ -67,6 +68,7 @@
#include <vector>
using namespace llvm;
+using namespace llvm::PatternMatch;
using ProfileCount = Function::ProfileCount;
#define DEBUG_TYPE "code-extractor"
@@ -207,6 +209,9 @@ buildExtractionBlockSet(ArrayRef<BasicBlock *> BBs, DominatorTree *DT,
llvm_unreachable("Repeated basic blocks in extraction input");
}
+ LLVM_DEBUG(dbgs() << "Region front block: " << Result.front()->getName()
+ << '\n');
+
for (auto *BB : Result) {
if (!isBlockValidForExtraction(*BB, Result, AllowVarArgs, AllowAlloca))
return {};
@@ -224,9 +229,11 @@ buildExtractionBlockSet(ArrayRef<BasicBlock *> BBs, DominatorTree *DT,
// the subgraph which is being extracted.
for (auto *PBB : predecessors(BB))
if (!Result.count(PBB)) {
- LLVM_DEBUG(
- dbgs() << "No blocks in this region may have entries from "
- "outside the region except for the first block!\n");
+ LLVM_DEBUG(dbgs() << "No blocks in this region may have entries from "
+ "outside the region except for the first block!\n"
+ << "Problematic source BB: " << BB->getName() << "\n"
+ << "Problematic destination BB: " << PBB->getName()
+ << "\n");
return {};
}
}
@@ -236,18 +243,20 @@ buildExtractionBlockSet(ArrayRef<BasicBlock *> BBs, DominatorTree *DT,
CodeExtractor::CodeExtractor(ArrayRef<BasicBlock *> BBs, DominatorTree *DT,
bool AggregateArgs, BlockFrequencyInfo *BFI,
- BranchProbabilityInfo *BPI, bool AllowVarArgs,
- bool AllowAlloca, std::string Suffix)
+ BranchProbabilityInfo *BPI, AssumptionCache *AC,
+ bool AllowVarArgs, bool AllowAlloca,
+ std::string Suffix)
: DT(DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI),
- BPI(BPI), AllowVarArgs(AllowVarArgs),
+ BPI(BPI), AC(AC), AllowVarArgs(AllowVarArgs),
Blocks(buildExtractionBlockSet(BBs, DT, AllowVarArgs, AllowAlloca)),
Suffix(Suffix) {}
CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs,
BlockFrequencyInfo *BFI,
- BranchProbabilityInfo *BPI, std::string Suffix)
+ BranchProbabilityInfo *BPI, AssumptionCache *AC,
+ std::string Suffix)
: DT(&DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI),
- BPI(BPI), AllowVarArgs(false),
+ BPI(BPI), AC(AC), AllowVarArgs(false),
Blocks(buildExtractionBlockSet(L.getBlocks(), &DT,
/* AllowVarArgs */ false,
/* AllowAlloca */ false)),
@@ -325,7 +334,7 @@ bool CodeExtractor::isLegalToShrinkwrapLifetimeMarkers(
if (dyn_cast<Constant>(MemAddr))
break;
Value *Base = MemAddr->stripInBoundsConstantOffsets();
- if (!dyn_cast<AllocaInst>(Base) || Base == AI)
+ if (!isa<AllocaInst>(Base) || Base == AI)
return false;
break;
}
@@ -401,11 +410,74 @@ CodeExtractor::findOrCreateBlockForHoisting(BasicBlock *CommonExitBlock) {
return CommonExitBlock;
}
+// Find the pair of life time markers for address 'Addr' that are either
+// defined inside the outline region or can legally be shrinkwrapped into the
+// outline region. If there are not other untracked uses of the address, return
+// the pair of markers if found; otherwise return a pair of nullptr.
+CodeExtractor::LifetimeMarkerInfo
+CodeExtractor::getLifetimeMarkers(Instruction *Addr,
+ BasicBlock *ExitBlock) const {
+ LifetimeMarkerInfo Info;
+
+ for (User *U : Addr->users()) {
+ IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(U);
+ if (IntrInst) {
+ if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start) {
+ // Do not handle the case where Addr has multiple start markers.
+ if (Info.LifeStart)
+ return {};
+ Info.LifeStart = IntrInst;
+ }
+ if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_end) {
+ if (Info.LifeEnd)
+ return {};
+ Info.LifeEnd = IntrInst;
+ }
+ continue;
+ }
+ // Find untracked uses of the address, bail.
+ if (!definedInRegion(Blocks, U))
+ return {};
+ }
+
+ if (!Info.LifeStart || !Info.LifeEnd)
+ return {};
+
+ Info.SinkLifeStart = !definedInRegion(Blocks, Info.LifeStart);
+ Info.HoistLifeEnd = !definedInRegion(Blocks, Info.LifeEnd);
+ // Do legality check.
+ if ((Info.SinkLifeStart || Info.HoistLifeEnd) &&
+ !isLegalToShrinkwrapLifetimeMarkers(Addr))
+ return {};
+
+ // Check to see if we have a place to do hoisting, if not, bail.
+ if (Info.HoistLifeEnd && !ExitBlock)
+ return {};
+
+ return Info;
+}
+
void CodeExtractor::findAllocas(ValueSet &SinkCands, ValueSet &HoistCands,
BasicBlock *&ExitBlock) const {
Function *Func = (*Blocks.begin())->getParent();
ExitBlock = getCommonExitBlock(Blocks);
+ auto moveOrIgnoreLifetimeMarkers =
+ [&](const LifetimeMarkerInfo &LMI) -> bool {
+ if (!LMI.LifeStart)
+ return false;
+ if (LMI.SinkLifeStart) {
+ LLVM_DEBUG(dbgs() << "Sinking lifetime.start: " << *LMI.LifeStart
+ << "\n");
+ SinkCands.insert(LMI.LifeStart);
+ }
+ if (LMI.HoistLifeEnd) {
+ LLVM_DEBUG(dbgs() << "Hoisting lifetime.end: " << *LMI.LifeEnd << "\n");
+ HoistCands.insert(LMI.LifeEnd);
+ }
+ return true;
+ };
+
for (BasicBlock &BB : *Func) {
if (Blocks.count(&BB))
continue;
@@ -414,95 +486,52 @@ void CodeExtractor::findAllocas(ValueSet &SinkCands, ValueSet &HoistCands,
if (!AI)
continue;
- // Find the pair of life time markers for address 'Addr' that are either
- // defined inside the outline region or can legally be shrinkwrapped into
- // the outline region. If there are not other untracked uses of the
- // address, return the pair of markers if found; otherwise return a pair
- // of nullptr.
- auto GetLifeTimeMarkers =
- [&](Instruction *Addr, bool &SinkLifeStart,
- bool &HoistLifeEnd) -> std::pair<Instruction *, Instruction *> {
- Instruction *LifeStart = nullptr, *LifeEnd = nullptr;
-
- for (User *U : Addr->users()) {
- IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(U);
- if (IntrInst) {
- if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start) {
- // Do not handle the case where AI has multiple start markers.
- if (LifeStart)
- return std::make_pair<Instruction *>(nullptr, nullptr);
- LifeStart = IntrInst;
- }
- if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_end) {
- if (LifeEnd)
- return std::make_pair<Instruction *>(nullptr, nullptr);
- LifeEnd = IntrInst;
- }
- continue;
- }
- // Find untracked uses of the address, bail.
- if (!definedInRegion(Blocks, U))
- return std::make_pair<Instruction *>(nullptr, nullptr);
- }
-
- if (!LifeStart || !LifeEnd)
- return std::make_pair<Instruction *>(nullptr, nullptr);
-
- SinkLifeStart = !definedInRegion(Blocks, LifeStart);
- HoistLifeEnd = !definedInRegion(Blocks, LifeEnd);
- // Do legality Check.
- if ((SinkLifeStart || HoistLifeEnd) &&
- !isLegalToShrinkwrapLifetimeMarkers(Addr))
- return std::make_pair<Instruction *>(nullptr, nullptr);
-
- // Check to see if we have a place to do hoisting, if not, bail.
- if (HoistLifeEnd && !ExitBlock)
- return std::make_pair<Instruction *>(nullptr, nullptr);
-
- return std::make_pair(LifeStart, LifeEnd);
- };
-
- bool SinkLifeStart = false, HoistLifeEnd = false;
- auto Markers = GetLifeTimeMarkers(AI, SinkLifeStart, HoistLifeEnd);
-
- if (Markers.first) {
- if (SinkLifeStart)
- SinkCands.insert(Markers.first);
+ LifetimeMarkerInfo MarkerInfo = getLifetimeMarkers(AI, ExitBlock);
+ bool Moved = moveOrIgnoreLifetimeMarkers(MarkerInfo);
+ if (Moved) {
+ LLVM_DEBUG(dbgs() << "Sinking alloca: " << *AI << "\n");
SinkCands.insert(AI);
- if (HoistLifeEnd)
- HoistCands.insert(Markers.second);
continue;
}
- // Follow the bitcast.
- Instruction *MarkerAddr = nullptr;
+ // Follow any bitcasts.
+ SmallVector<Instruction *, 2> Bitcasts;
+ SmallVector<LifetimeMarkerInfo, 2> BitcastLifetimeInfo;
for (User *U : AI->users()) {
if (U->stripInBoundsConstantOffsets() == AI) {
- SinkLifeStart = false;
- HoistLifeEnd = false;
Instruction *Bitcast = cast<Instruction>(U);
- Markers = GetLifeTimeMarkers(Bitcast, SinkLifeStart, HoistLifeEnd);
- if (Markers.first) {
- MarkerAddr = Bitcast;
+ LifetimeMarkerInfo LMI = getLifetimeMarkers(Bitcast, ExitBlock);
+ if (LMI.LifeStart) {
+ Bitcasts.push_back(Bitcast);
+ BitcastLifetimeInfo.push_back(LMI);
continue;
}
}
// Found unknown use of AI.
if (!definedInRegion(Blocks, U)) {
- MarkerAddr = nullptr;
+ Bitcasts.clear();
break;
}
}
- if (MarkerAddr) {
- if (SinkLifeStart)
- SinkCands.insert(Markers.first);
- if (!definedInRegion(Blocks, MarkerAddr))
- SinkCands.insert(MarkerAddr);
- SinkCands.insert(AI);
- if (HoistLifeEnd)
- HoistCands.insert(Markers.second);
+ // Either no bitcasts reference the alloca or there are unknown uses.
+ if (Bitcasts.empty())
+ continue;
+
+ LLVM_DEBUG(dbgs() << "Sinking alloca (via bitcast): " << *AI << "\n");
+ SinkCands.insert(AI);
+ for (unsigned I = 0, E = Bitcasts.size(); I != E; ++I) {
+ Instruction *BitcastAddr = Bitcasts[I];
+ const LifetimeMarkerInfo &LMI = BitcastLifetimeInfo[I];
+ assert(LMI.LifeStart &&
+ "Unsafe to sink bitcast without lifetime markers");
+ moveOrIgnoreLifetimeMarkers(LMI);
+ if (!definedInRegion(Blocks, BitcastAddr)) {
+ LLVM_DEBUG(dbgs() << "Sinking bitcast-of-alloca: " << *BitcastAddr
+ << "\n");
+ SinkCands.insert(BitcastAddr);
+ }
}
}
}
@@ -780,6 +809,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
case Attribute::NoBuiltin:
case Attribute::NoCapture:
case Attribute::NoReturn:
+ case Attribute::NoSync:
case Attribute::None:
case Attribute::NonNull:
case Attribute::ReadNone:
@@ -792,8 +822,10 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
case Attribute::StructRet:
case Attribute::SwiftError:
case Attribute::SwiftSelf:
+ case Attribute::WillReturn:
case Attribute::WriteOnly:
case Attribute::ZExt:
+ case Attribute::ImmArg:
case Attribute::EndAttrKinds:
continue;
// Those attributes should be safe to propagate to the extracted function.
@@ -803,6 +835,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
case Attribute::InlineHint:
case Attribute::MinSize:
case Attribute::NoDuplicate:
+ case Attribute::NoFree:
case Attribute::NoImplicitFloat:
case Attribute::NoInline:
case Attribute::NonLazyBind:
@@ -817,6 +850,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
case Attribute::SanitizeMemory:
case Attribute::SanitizeThread:
case Attribute::SanitizeHWAddress:
+ case Attribute::SanitizeMemTag:
case Attribute::SpeculativeLoadHardening:
case Attribute::StackProtect:
case Attribute::StackProtectReq:
@@ -845,7 +879,8 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
Instruction *TI = newFunction->begin()->getTerminator();
GetElementPtrInst *GEP = GetElementPtrInst::Create(
StructTy, &*AI, Idx, "gep_" + inputs[i]->getName(), TI);
- RewriteVal = new LoadInst(GEP, "loadgep_" + inputs[i]->getName(), TI);
+ RewriteVal = new LoadInst(StructTy->getElementType(i), GEP,
+ "loadgep_" + inputs[i]->getName(), TI);
} else
RewriteVal = &*AI++;
@@ -880,6 +915,88 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
return newFunction;
}
+/// Erase lifetime.start markers which reference inputs to the extraction
+/// region, and insert the referenced memory into \p LifetimesStart.
+///
+/// The extraction region is defined by a set of blocks (\p Blocks), and a set
+/// of allocas which will be moved from the caller function into the extracted
+/// function (\p SunkAllocas).
+static void eraseLifetimeMarkersOnInputs(const SetVector<BasicBlock *> &Blocks,
+ const SetVector<Value *> &SunkAllocas,
+ SetVector<Value *> &LifetimesStart) {
+ for (BasicBlock *BB : Blocks) {
+ for (auto It = BB->begin(), End = BB->end(); It != End;) {
+ auto *II = dyn_cast<IntrinsicInst>(&*It);
+ ++It;
+ if (!II || !II->isLifetimeStartOrEnd())
+ continue;
+
+ // Get the memory operand of the lifetime marker. If the underlying
+ // object is a sunk alloca, or is otherwise defined in the extraction
+ // region, the lifetime marker must not be erased.
+ Value *Mem = II->getOperand(1)->stripInBoundsOffsets();
+ if (SunkAllocas.count(Mem) || definedInRegion(Blocks, Mem))
+ continue;
+
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start)
+ LifetimesStart.insert(Mem);
+ II->eraseFromParent();
+ }
+ }
+}
+
+/// Insert lifetime start/end markers surrounding the call to the new function
+/// for objects defined in the caller.
+static void insertLifetimeMarkersSurroundingCall(
+ Module *M, ArrayRef<Value *> LifetimesStart, ArrayRef<Value *> LifetimesEnd,
+ CallInst *TheCall) {
+ LLVMContext &Ctx = M->getContext();
+ auto Int8PtrTy = Type::getInt8PtrTy(Ctx);
+ auto NegativeOne = ConstantInt::getSigned(Type::getInt64Ty(Ctx), -1);
+ Instruction *Term = TheCall->getParent()->getTerminator();
+
+ // The memory argument to a lifetime marker must be a i8*. Cache any bitcasts
+ // needed to satisfy this requirement so they may be reused.
+ DenseMap<Value *, Value *> Bitcasts;
+
+ // Emit lifetime markers for the pointers given in \p Objects. Insert the
+ // markers before the call if \p InsertBefore, and after the call otherwise.
+ auto insertMarkers = [&](Function *MarkerFunc, ArrayRef<Value *> Objects,
+ bool InsertBefore) {
+ for (Value *Mem : Objects) {
+ assert((!isa<Instruction>(Mem) || cast<Instruction>(Mem)->getFunction() ==
+ TheCall->getFunction()) &&
+ "Input memory not defined in original function");
+ Value *&MemAsI8Ptr = Bitcasts[Mem];
+ if (!MemAsI8Ptr) {
+ if (Mem->getType() == Int8PtrTy)
+ MemAsI8Ptr = Mem;
+ else
+ MemAsI8Ptr =
+ CastInst::CreatePointerCast(Mem, Int8PtrTy, "lt.cast", TheCall);
+ }
+
+ auto Marker = CallInst::Create(MarkerFunc, {NegativeOne, MemAsI8Ptr});
+ if (InsertBefore)
+ Marker->insertBefore(TheCall);
+ else
+ Marker->insertBefore(Term);
+ }
+ };
+
+ if (!LifetimesStart.empty()) {
+ auto StartFn = llvm::Intrinsic::getDeclaration(
+ M, llvm::Intrinsic::lifetime_start, Int8PtrTy);
+ insertMarkers(StartFn, LifetimesStart, /*InsertBefore=*/true);
+ }
+
+ if (!LifetimesEnd.empty()) {
+ auto EndFn = llvm::Intrinsic::getDeclaration(
+ M, llvm::Intrinsic::lifetime_end, Int8PtrTy);
+ insertMarkers(EndFn, LifetimesEnd, /*InsertBefore=*/false);
+ }
+}
+
/// emitCallAndSwitchStatement - This method sets up the caller side by adding
/// the call instruction, splitting any PHI nodes in the header block as
/// necessary.
@@ -897,11 +1014,18 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction,
CallInst *call = nullptr;
// Add inputs as params, or to be filled into the struct
- for (Value *input : inputs)
+ unsigned ArgNo = 0;
+ SmallVector<unsigned, 1> SwiftErrorArgs;
+ for (Value *input : inputs) {
if (AggregateArgs)
StructValues.push_back(input);
- else
+ else {
params.push_back(input);
+ if (input->isSwiftError())
+ SwiftErrorArgs.push_back(ArgNo);
+ }
+ ++ArgNo;
+ }
// Create allocas for the outputs
for (Value *output : outputs) {
@@ -957,13 +1081,18 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction,
}
codeReplacer->getInstList().push_back(call);
+ // Set swifterror parameter attributes.
+ for (unsigned SwiftErrArgNo : SwiftErrorArgs) {
+ call->addParamAttr(SwiftErrArgNo, Attribute::SwiftError);
+ newFunction->addParamAttr(SwiftErrArgNo, Attribute::SwiftError);
+ }
+
Function::arg_iterator OutputArgBegin = newFunction->arg_begin();
unsigned FirstOut = inputs.size();
if (!AggregateArgs)
std::advance(OutputArgBegin, inputs.size());
// Reload the outputs passed in by reference.
- Function::arg_iterator OAI = OutputArgBegin;
for (unsigned i = 0, e = outputs.size(); i != e; ++i) {
Value *Output = nullptr;
if (AggregateArgs) {
@@ -977,7 +1106,8 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction,
} else {
Output = ReloadOutputs[i];
}
- LoadInst *load = new LoadInst(Output, outputs[i]->getName()+".reload");
+ LoadInst *load = new LoadInst(outputs[i]->getType(), Output,
+ outputs[i]->getName() + ".reload");
Reloads.push_back(load);
codeReplacer->getInstList().push_back(load);
std::vector<User *> Users(outputs[i]->user_begin(), outputs[i]->user_end());
@@ -986,40 +1116,6 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction,
if (!Blocks.count(inst->getParent()))
inst->replaceUsesOfWith(outputs[i], load);
}
-
- // Store to argument right after the definition of output value.
- auto *OutI = dyn_cast<Instruction>(outputs[i]);
- if (!OutI)
- continue;
-
- // Find proper insertion point.
- BasicBlock::iterator InsertPt;
- // In case OutI is an invoke, we insert the store at the beginning in the
- // 'normal destination' BB. Otherwise we insert the store right after OutI.
- if (auto *InvokeI = dyn_cast<InvokeInst>(OutI))
- InsertPt = InvokeI->getNormalDest()->getFirstInsertionPt();
- else if (auto *Phi = dyn_cast<PHINode>(OutI))
- InsertPt = Phi->getParent()->getFirstInsertionPt();
- else
- InsertPt = std::next(OutI->getIterator());
-
- assert(OAI != newFunction->arg_end() &&
- "Number of output arguments should match "
- "the amount of defined values");
- if (AggregateArgs) {
- Value *Idx[2];
- Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
- Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i);
- GetElementPtrInst *GEP = GetElementPtrInst::Create(
- StructArgTy, &*OAI, Idx, "gep_" + outputs[i]->getName(), &*InsertPt);
- new StoreInst(outputs[i], GEP, &*InsertPt);
- // Since there should be only one struct argument aggregating
- // all the output values, we shouldn't increment OAI, which always
- // points to the struct argument, in this case.
- } else {
- new StoreInst(outputs[i], &*OAI, &*InsertPt);
- ++OAI;
- }
}
// Now we can emit a switch statement using the call as a value.
@@ -1075,6 +1171,50 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction,
}
}
+ // Store the arguments right after the definition of output value.
+ // This should be proceeded after creating exit stubs to be ensure that invoke
+ // result restore will be placed in the outlined function.
+ Function::arg_iterator OAI = OutputArgBegin;
+ for (unsigned i = 0, e = outputs.size(); i != e; ++i) {
+ auto *OutI = dyn_cast<Instruction>(outputs[i]);
+ if (!OutI)
+ continue;
+
+ // Find proper insertion point.
+ BasicBlock::iterator InsertPt;
+ // In case OutI is an invoke, we insert the store at the beginning in the
+ // 'normal destination' BB. Otherwise we insert the store right after OutI.
+ if (auto *InvokeI = dyn_cast<InvokeInst>(OutI))
+ InsertPt = InvokeI->getNormalDest()->getFirstInsertionPt();
+ else if (auto *Phi = dyn_cast<PHINode>(OutI))
+ InsertPt = Phi->getParent()->getFirstInsertionPt();
+ else
+ InsertPt = std::next(OutI->getIterator());
+
+ Instruction *InsertBefore = &*InsertPt;
+ assert((InsertBefore->getFunction() == newFunction ||
+ Blocks.count(InsertBefore->getParent())) &&
+ "InsertPt should be in new function");
+ assert(OAI != newFunction->arg_end() &&
+ "Number of output arguments should match "
+ "the amount of defined values");
+ if (AggregateArgs) {
+ Value *Idx[2];
+ Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
+ Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i);
+ GetElementPtrInst *GEP = GetElementPtrInst::Create(
+ StructArgTy, &*OAI, Idx, "gep_" + outputs[i]->getName(),
+ InsertBefore);
+ new StoreInst(outputs[i], GEP, InsertBefore);
+ // Since there should be only one struct argument aggregating
+ // all the output values, we shouldn't increment OAI, which always
+ // points to the struct argument, in this case.
+ } else {
+ new StoreInst(outputs[i], &*OAI, InsertBefore);
+ ++OAI;
+ }
+ }
+
// Now that we've done the deed, simplify the switch instruction.
Type *OldFnRetTy = TheSwitch->getParent()->getParent()->getReturnType();
switch (NumExitBlocks) {
@@ -1119,6 +1259,10 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction,
break;
}
+ // Insert lifetime markers around the reloads of any output values. The
+ // allocas output values are stored in are only in-use in the codeRepl block.
+ insertLifetimeMarkersSurroundingCall(M, ReloadOutputs, ReloadOutputs, call);
+
return call;
}
@@ -1133,6 +1277,13 @@ void CodeExtractor::moveCodeToFunction(Function *newFunction) {
// Insert this basic block into the new function
newBlocks.push_back(Block);
+
+ // Remove @llvm.assume calls that were moved to the new function from the
+ // old function's assumption cache.
+ if (AC)
+ for (auto &I : *Block)
+ if (match(&I, m_Intrinsic<Intrinsic::assume>()))
+ AC->unregisterAssumption(cast<CallInst>(&I));
}
}
@@ -1181,71 +1332,6 @@ void CodeExtractor::calculateNewCallTerminatorWeights(
MDBuilder(TI->getContext()).createBranchWeights(BranchWeights));
}
-/// Scan the extraction region for lifetime markers which reference inputs.
-/// Erase these markers. Return the inputs which were referenced.
-///
-/// The extraction region is defined by a set of blocks (\p Blocks), and a set
-/// of allocas which will be moved from the caller function into the extracted
-/// function (\p SunkAllocas).
-static SetVector<Value *>
-eraseLifetimeMarkersOnInputs(const SetVector<BasicBlock *> &Blocks,
- const SetVector<Value *> &SunkAllocas) {
- SetVector<Value *> InputObjectsWithLifetime;
- for (BasicBlock *BB : Blocks) {
- for (auto It = BB->begin(), End = BB->end(); It != End;) {
- auto *II = dyn_cast<IntrinsicInst>(&*It);
- ++It;
- if (!II || !II->isLifetimeStartOrEnd())
- continue;
-
- // Get the memory operand of the lifetime marker. If the underlying
- // object is a sunk alloca, or is otherwise defined in the extraction
- // region, the lifetime marker must not be erased.
- Value *Mem = II->getOperand(1)->stripInBoundsOffsets();
- if (SunkAllocas.count(Mem) || definedInRegion(Blocks, Mem))
- continue;
-
- InputObjectsWithLifetime.insert(Mem);
- II->eraseFromParent();
- }
- }
- return InputObjectsWithLifetime;
-}
-
-/// Insert lifetime start/end markers surrounding the call to the new function
-/// for objects defined in the caller.
-static void insertLifetimeMarkersSurroundingCall(
- Module *M, const SetVector<Value *> &InputObjectsWithLifetime,
- CallInst *TheCall) {
- if (InputObjectsWithLifetime.empty())
- return;
-
- LLVMContext &Ctx = M->getContext();
- auto Int8PtrTy = Type::getInt8PtrTy(Ctx);
- auto NegativeOne = ConstantInt::getSigned(Type::getInt64Ty(Ctx), -1);
- auto LifetimeStartFn = llvm::Intrinsic::getDeclaration(
- M, llvm::Intrinsic::lifetime_start, Int8PtrTy);
- auto LifetimeEndFn = llvm::Intrinsic::getDeclaration(
- M, llvm::Intrinsic::lifetime_end, Int8PtrTy);
- for (Value *Mem : InputObjectsWithLifetime) {
- assert((!isa<Instruction>(Mem) ||
- cast<Instruction>(Mem)->getFunction() == TheCall->getFunction()) &&
- "Input memory not defined in original function");
- Value *MemAsI8Ptr = nullptr;
- if (Mem->getType() == Int8PtrTy)
- MemAsI8Ptr = Mem;
- else
- MemAsI8Ptr =
- CastInst::CreatePointerCast(Mem, Int8PtrTy, "lt.cast", TheCall);
-
- auto StartMarker =
- CallInst::Create(LifetimeStartFn, {NegativeOne, MemAsI8Ptr});
- StartMarker->insertBefore(TheCall);
- auto EndMarker = CallInst::Create(LifetimeEndFn, {NegativeOne, MemAsI8Ptr});
- EndMarker->insertAfter(TheCall);
- }
-}
-
Function *CodeExtractor::extractCodeRegion() {
if (!isEligible())
return nullptr;
@@ -1348,10 +1434,24 @@ Function *CodeExtractor::extractCodeRegion() {
// Find inputs to, outputs from the code region.
findInputsOutputs(inputs, outputs, SinkingCands);
- // Now sink all instructions which only have non-phi uses inside the region
- for (auto *II : SinkingCands)
- cast<Instruction>(II)->moveBefore(*newFuncRoot,
- newFuncRoot->getFirstInsertionPt());
+ // Now sink all instructions which only have non-phi uses inside the region.
+ // Group the allocas at the start of the block, so that any bitcast uses of
+ // the allocas are well-defined.
+ AllocaInst *FirstSunkAlloca = nullptr;
+ for (auto *II : SinkingCands) {
+ if (auto *AI = dyn_cast<AllocaInst>(II)) {
+ AI->moveBefore(*newFuncRoot, newFuncRoot->getFirstInsertionPt());
+ if (!FirstSunkAlloca)
+ FirstSunkAlloca = AI;
+ }
+ }
+ assert((SinkingCands.empty() || FirstSunkAlloca) &&
+ "Did not expect a sink candidate without any allocas");
+ for (auto *II : SinkingCands) {
+ if (!isa<AllocaInst>(II)) {
+ cast<Instruction>(II)->moveAfter(FirstSunkAlloca);
+ }
+ }
if (!HoistingCands.empty()) {
auto *HoistToBlock = findOrCreateBlockForHoisting(CommonExit);
@@ -1361,11 +1461,11 @@ Function *CodeExtractor::extractCodeRegion() {
}
// Collect objects which are inputs to the extraction region and also
- // referenced by lifetime start/end markers within it. The effects of these
+ // referenced by lifetime start markers within it. The effects of these
// markers must be replicated in the calling function to prevent the stack
// coloring pass from merging slots which store input objects.
- ValueSet InputObjectsWithLifetime =
- eraseLifetimeMarkersOnInputs(Blocks, SinkingCands);
+ ValueSet LifetimesStart;
+ eraseLifetimeMarkersOnInputs(Blocks, SinkingCands, LifetimesStart);
// Construct new function based on inputs/outputs & add allocas for all defs.
Function *newFunction =
@@ -1388,8 +1488,8 @@ Function *CodeExtractor::extractCodeRegion() {
// Replicate the effects of any lifetime start/end markers which referenced
// input objects in the extraction region by placing markers around the call.
- insertLifetimeMarkersSurroundingCall(oldFunction->getParent(),
- InputObjectsWithLifetime, TheCall);
+ insertLifetimeMarkersSurroundingCall(
+ oldFunction->getParent(), LifetimesStart.getArrayRef(), {}, TheCall);
// Propagate personality info to the new function if there is one.
if (oldFunction->hasPersonalityFn())
diff --git a/lib/Transforms/Utils/CtorUtils.cpp b/lib/Transforms/Utils/CtorUtils.cpp
index 4e7da7d0449f..069a86f6ab33 100644
--- a/lib/Transforms/Utils/CtorUtils.cpp
+++ b/lib/Transforms/Utils/CtorUtils.cpp
@@ -1,9 +1,8 @@
//===- CtorUtils.cpp - Helpers for working with global_ctors ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/Utils/DemoteRegToStack.cpp b/lib/Transforms/Utils/DemoteRegToStack.cpp
index 975b363859a9..5f53d794fe8a 100644
--- a/lib/Transforms/Utils/DemoteRegToStack.cpp
+++ b/lib/Transforms/Utils/DemoteRegToStack.cpp
@@ -1,9 +1,8 @@
//===- DemoteRegToStack.cpp - Move a virtual register to the stack --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -73,7 +72,8 @@ AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
Value *&V = Loads[PN->getIncomingBlock(i)];
if (!V) {
// Insert the load into the predecessor block
- V = new LoadInst(Slot, I.getName()+".reload", VolatileLoads,
+ V = new LoadInst(I.getType(), Slot, I.getName() + ".reload",
+ VolatileLoads,
PN->getIncomingBlock(i)->getTerminator());
}
PN->setIncomingValue(i, V);
@@ -81,7 +81,8 @@ AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
} else {
// If this is a normal instruction, just insert a load.
- Value *V = new LoadInst(Slot, I.getName()+".reload", VolatileLoads, U);
+ Value *V = new LoadInst(I.getType(), Slot, I.getName() + ".reload",
+ VolatileLoads, U);
U->replaceUsesOfWith(&I, V);
}
}
@@ -142,7 +143,8 @@ AllocaInst *llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) {
for (; isa<PHINode>(InsertPt) || InsertPt->isEHPad(); ++InsertPt)
/* empty */; // Don't insert before PHI nodes or landingpad instrs.
- Value *V = new LoadInst(Slot, P->getName() + ".reload", &*InsertPt);
+ Value *V =
+ new LoadInst(P->getType(), Slot, P->getName() + ".reload", &*InsertPt);
P->replaceAllUsesWith(V);
// Delete PHI.
diff --git a/lib/Transforms/Utils/EntryExitInstrumenter.cpp b/lib/Transforms/Utils/EntryExitInstrumenter.cpp
index 569ea58a3047..4aa40eeadda4 100644
--- a/lib/Transforms/Utils/EntryExitInstrumenter.cpp
+++ b/lib/Transforms/Utils/EntryExitInstrumenter.cpp
@@ -1,9 +1,8 @@
//===- EntryExitInstrumenter.cpp - Function Entry/Exit Instrumentation ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -31,7 +30,7 @@ static void insertCall(Function &CurFn, StringRef Func,
Func == "__mcount" ||
Func == "_mcount" ||
Func == "__cyg_profile_func_enter_bare") {
- Constant *Fn = M.getOrInsertFunction(Func, Type::getVoidTy(C));
+ FunctionCallee Fn = M.getOrInsertFunction(Func, Type::getVoidTy(C));
CallInst *Call = CallInst::Create(Fn, "", InsertionPt);
Call->setDebugLoc(DL);
return;
@@ -40,7 +39,7 @@ static void insertCall(Function &CurFn, StringRef Func,
if (Func == "__cyg_profile_func_enter" || Func == "__cyg_profile_func_exit") {
Type *ArgTypes[] = {Type::getInt8PtrTy(C), Type::getInt8PtrTy(C)};
- Constant *Fn = M.getOrInsertFunction(
+ FunctionCallee Fn = M.getOrInsertFunction(
Func, FunctionType::get(Type::getVoidTy(C), ArgTypes, false));
Instruction *RetAddr = CallInst::Create(
diff --git a/lib/Transforms/Utils/EscapeEnumerator.cpp b/lib/Transforms/Utils/EscapeEnumerator.cpp
index 762a374c135c..914babeb6829 100644
--- a/lib/Transforms/Utils/EscapeEnumerator.cpp
+++ b/lib/Transforms/Utils/EscapeEnumerator.cpp
@@ -1,9 +1,8 @@
//===- EscapeEnumerator.cpp -----------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -19,7 +18,7 @@
#include "llvm/IR/Module.h"
using namespace llvm;
-static Constant *getDefaultPersonalityFn(Module *M) {
+static FunctionCallee getDefaultPersonalityFn(Module *M) {
LLVMContext &C = M->getContext();
Triple T(M->getTargetTriple());
EHPersonality Pers = getDefaultEHPersonality(T);
@@ -69,8 +68,8 @@ IRBuilder<> *EscapeEnumerator::Next() {
BasicBlock *CleanupBB = BasicBlock::Create(C, CleanupBBName, &F);
Type *ExnTy = StructType::get(Type::getInt8PtrTy(C), Type::getInt32Ty(C));
if (!F.hasPersonalityFn()) {
- Constant *PersFn = getDefaultPersonalityFn(F.getParent());
- F.setPersonalityFn(PersFn);
+ FunctionCallee PersFn = getDefaultPersonalityFn(F.getParent());
+ F.setPersonalityFn(cast<Constant>(PersFn.getCallee()));
}
if (isScopedEHPersonality(classifyEHPersonality(F.getPersonalityFn()))) {
diff --git a/lib/Transforms/Utils/Evaluator.cpp b/lib/Transforms/Utils/Evaluator.cpp
index e875cd686b00..0e203f4e075d 100644
--- a/lib/Transforms/Utils/Evaluator.cpp
+++ b/lib/Transforms/Utils/Evaluator.cpp
@@ -1,9 +1,8 @@
//===- Evaluator.cpp - LLVM IR evaluator ----------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -175,6 +174,34 @@ static bool isSimpleEnoughPointerToCommit(Constant *C) {
return false;
}
+/// Apply 'Func' to Ptr. If this returns nullptr, introspect the pointer's
+/// type and walk down through the initial elements to obtain additional
+/// pointers to try. Returns the first non-null return value from Func, or
+/// nullptr if the type can't be introspected further.
+static Constant *
+evaluateBitcastFromPtr(Constant *Ptr, const DataLayout &DL,
+ const TargetLibraryInfo *TLI,
+ std::function<Constant *(Constant *)> Func) {
+ Constant *Val;
+ while (!(Val = Func(Ptr))) {
+ // If Ty is a struct, we can convert the pointer to the struct
+ // into a pointer to its first member.
+ // FIXME: This could be extended to support arrays as well.
+ Type *Ty = cast<PointerType>(Ptr->getType())->getElementType();
+ if (!isa<StructType>(Ty))
+ break;
+
+ IntegerType *IdxTy = IntegerType::get(Ty->getContext(), 32);
+ Constant *IdxZero = ConstantInt::get(IdxTy, 0, false);
+ Constant *const IdxList[] = {IdxZero, IdxZero};
+
+ Ptr = ConstantExpr::getGetElementPtr(Ty, Ptr, IdxList);
+ if (auto *FoldedPtr = ConstantFoldConstant(Ptr, DL, TLI))
+ Ptr = FoldedPtr;
+ }
+ return Val;
+}
+
static Constant *getInitializer(Constant *C) {
auto *GV = dyn_cast<GlobalVariable>(C);
return GV && GV->hasDefinitiveInitializer() ? GV->getInitializer() : nullptr;
@@ -185,8 +212,14 @@ static Constant *getInitializer(Constant *C) {
Constant *Evaluator::ComputeLoadResult(Constant *P) {
// If this memory location has been recently stored, use the stored value: it
// is the most up-to-date.
- DenseMap<Constant*, Constant*>::const_iterator I = MutatedMemory.find(P);
- if (I != MutatedMemory.end()) return I->second;
+ auto findMemLoc = [this](Constant *Ptr) {
+ DenseMap<Constant *, Constant *>::const_iterator I =
+ MutatedMemory.find(Ptr);
+ return I != MutatedMemory.end() ? I->second : nullptr;
+ };
+
+ if (Constant *Val = findMemLoc(P))
+ return Val;
// Access it.
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) {
@@ -204,13 +237,17 @@ Constant *Evaluator::ComputeLoadResult(Constant *P) {
break;
// Handle a constantexpr bitcast.
case Instruction::BitCast:
- Constant *Val = getVal(CE->getOperand(0));
- auto MM = MutatedMemory.find(Val);
- auto *I = (MM != MutatedMemory.end()) ? MM->second
- : getInitializer(CE->getOperand(0));
- if (I)
+ // We're evaluating a load through a pointer that was bitcast to a
+ // different type. See if the "from" pointer has recently been stored.
+ // If it hasn't, we may still be able to find a stored pointer by
+ // introspecting the type.
+ Constant *Val =
+ evaluateBitcastFromPtr(CE->getOperand(0), DL, TLI, findMemLoc);
+ if (!Val)
+ Val = getInitializer(CE->getOperand(0));
+ if (Val)
return ConstantFoldLoadThroughBitcast(
- I, P->getType()->getPointerElementType(), DL);
+ Val, P->getType()->getPointerElementType(), DL);
break;
}
}
@@ -330,37 +367,26 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
<< "Attempting to resolve bitcast on constant ptr.\n");
// If we're evaluating a store through a bitcast, then we need
// to pull the bitcast off the pointer type and push it onto the
- // stored value.
- Ptr = CE->getOperand(0);
-
- Type *NewTy = cast<PointerType>(Ptr->getType())->getElementType();
-
- // In order to push the bitcast onto the stored value, a bitcast
- // from NewTy to Val's type must be legal. If it's not, we can try
- // introspecting NewTy to find a legal conversion.
- Constant *NewVal;
- while (!(NewVal = ConstantFoldLoadThroughBitcast(Val, NewTy, DL))) {
- // If NewTy is a struct, we can convert the pointer to the struct
- // into a pointer to its first member.
- // FIXME: This could be extended to support arrays as well.
- if (StructType *STy = dyn_cast<StructType>(NewTy)) {
- NewTy = STy->getTypeAtIndex(0U);
-
- IntegerType *IdxTy = IntegerType::get(NewTy->getContext(), 32);
- Constant *IdxZero = ConstantInt::get(IdxTy, 0, false);
- Constant * const IdxList[] = {IdxZero, IdxZero};
-
- Ptr = ConstantExpr::getGetElementPtr(nullptr, Ptr, IdxList);
- if (auto *FoldedPtr = ConstantFoldConstant(Ptr, DL, TLI))
- Ptr = FoldedPtr;
-
- // If we can't improve the situation by introspecting NewTy,
- // we have to give up.
- } else {
- LLVM_DEBUG(dbgs() << "Failed to bitcast constant ptr, can not "
- "evaluate.\n");
- return false;
+ // stored value. In order to push the bitcast onto the stored value,
+ // a bitcast from the pointer's element type to Val's type must be
+ // legal. If it's not, we can try introspecting the type to find a
+ // legal conversion.
+
+ auto castValTy = [&](Constant *P) -> Constant * {
+ Type *Ty = cast<PointerType>(P->getType())->getElementType();
+ if (Constant *FV = ConstantFoldLoadThroughBitcast(Val, Ty, DL)) {
+ Ptr = P;
+ return FV;
}
+ return nullptr;
+ };
+
+ Constant *NewVal =
+ evaluateBitcastFromPtr(CE->getOperand(0), DL, TLI, castValTy);
+ if (!NewVal) {
+ LLVM_DEBUG(dbgs() << "Failed to bitcast constant ptr, can not "
+ "evaluate.\n");
+ return false;
}
Val = NewVal;
@@ -541,7 +567,8 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
if (Callee->isDeclaration()) {
// If this is a function we can constant fold, do it.
- if (Constant *C = ConstantFoldCall(CS, Callee, Formals, TLI)) {
+ if (Constant *C = ConstantFoldCall(cast<CallBase>(CS.getInstruction()),
+ Callee, Formals, TLI)) {
InstResult = castCallResultIfNeeded(CS.getCalledValue(), C);
if (!InstResult)
return false;
diff --git a/lib/Transforms/Utils/FlattenCFG.cpp b/lib/Transforms/Utils/FlattenCFG.cpp
index d9778f4a1fb7..0c52e6f3703b 100644
--- a/lib/Transforms/Utils/FlattenCFG.cpp
+++ b/lib/Transforms/Utils/FlattenCFG.cpp
@@ -1,9 +1,8 @@
//===- FlatternCFG.cpp - Code to perform CFG flattening -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/Utils/FunctionComparator.cpp b/lib/Transforms/Utils/FunctionComparator.cpp
index a717d9b72819..a9b28754c8e9 100644
--- a/lib/Transforms/Utils/FunctionComparator.cpp
+++ b/lib/Transforms/Utils/FunctionComparator.cpp
@@ -1,9 +1,8 @@
//===- FunctionComparator.h - Function Comparator -------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -114,6 +113,19 @@ int FunctionComparator::cmpAttrs(const AttributeList L,
for (; LI != LE && RI != RE; ++LI, ++RI) {
Attribute LA = *LI;
Attribute RA = *RI;
+ if (LA.isTypeAttribute() && RA.isTypeAttribute()) {
+ if (LA.getKindAsEnum() != RA.getKindAsEnum())
+ return cmpNumbers(LA.getKindAsEnum(), RA.getKindAsEnum());
+
+ Type *TyL = LA.getValueAsType();
+ Type *TyR = RA.getValueAsType();
+ if (TyL && TyR)
+ return cmpTypes(TyL, TyR);
+
+ // Two pointers, at least one null, so the comparison result is
+ // independent of the value of a real pointer.
+ return cmpNumbers((uint64_t)TyL, (uint64_t)TyR);
+ }
if (LA < RA)
return -1;
if (RA < LA)
@@ -557,31 +569,20 @@ int FunctionComparator::cmpOperations(const Instruction *L,
}
if (const CmpInst *CI = dyn_cast<CmpInst>(L))
return cmpNumbers(CI->getPredicate(), cast<CmpInst>(R)->getPredicate());
- if (const CallInst *CI = dyn_cast<CallInst>(L)) {
- if (int Res = cmpNumbers(CI->getCallingConv(),
- cast<CallInst>(R)->getCallingConv()))
+ if (auto CSL = CallSite(const_cast<Instruction *>(L))) {
+ auto CSR = CallSite(const_cast<Instruction *>(R));
+ if (int Res = cmpNumbers(CSL.getCallingConv(), CSR.getCallingConv()))
return Res;
- if (int Res =
- cmpAttrs(CI->getAttributes(), cast<CallInst>(R)->getAttributes()))
+ if (int Res = cmpAttrs(CSL.getAttributes(), CSR.getAttributes()))
return Res;
- if (int Res = cmpOperandBundlesSchema(CI, R))
- return Res;
- return cmpRangeMetadata(
- CI->getMetadata(LLVMContext::MD_range),
- cast<CallInst>(R)->getMetadata(LLVMContext::MD_range));
- }
- if (const InvokeInst *II = dyn_cast<InvokeInst>(L)) {
- if (int Res = cmpNumbers(II->getCallingConv(),
- cast<InvokeInst>(R)->getCallingConv()))
+ if (int Res = cmpOperandBundlesSchema(L, R))
return Res;
- if (int Res =
- cmpAttrs(II->getAttributes(), cast<InvokeInst>(R)->getAttributes()))
- return Res;
- if (int Res = cmpOperandBundlesSchema(II, R))
- return Res;
- return cmpRangeMetadata(
- II->getMetadata(LLVMContext::MD_range),
- cast<InvokeInst>(R)->getMetadata(LLVMContext::MD_range));
+ if (const CallInst *CI = dyn_cast<CallInst>(L))
+ if (int Res = cmpNumbers(CI->getTailCallKind(),
+ cast<CallInst>(R)->getTailCallKind()))
+ return Res;
+ return cmpRangeMetadata(L->getMetadata(LLVMContext::MD_range),
+ R->getMetadata(LLVMContext::MD_range));
}
if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(L)) {
ArrayRef<unsigned> LIndices = IVI->getIndices();
diff --git a/lib/Transforms/Utils/FunctionImportUtils.cpp b/lib/Transforms/Utils/FunctionImportUtils.cpp
index a9772e31da50..c9cc0990f237 100644
--- a/lib/Transforms/Utils/FunctionImportUtils.cpp
+++ b/lib/Transforms/Utils/FunctionImportUtils.cpp
@@ -1,9 +1,8 @@
//===- lib/Transforms/Utils/FunctionImportUtils.cpp - Importing utilities -===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -130,7 +129,7 @@ FunctionImportGlobalProcessing::getLinkage(const GlobalValue *SGV,
// definitions upon import, so that they are available for inlining
// and/or optimization, but are turned into declarations later
// during the EliminateAvailableExternally pass.
- if (doImportAsDefinition(SGV) && !dyn_cast<GlobalAlias>(SGV))
+ if (doImportAsDefinition(SGV) && !isa<GlobalAlias>(SGV))
return GlobalValue::AvailableExternallyLinkage;
// An imported external declaration stays external.
return SGV->getLinkage();
@@ -159,7 +158,7 @@ FunctionImportGlobalProcessing::getLinkage(const GlobalValue *SGV,
// equivalent, so the issue described above for weak_any does not exist,
// and the definition can be imported. It can be treated similarly
// to an imported externally visible global value.
- if (doImportAsDefinition(SGV) && !dyn_cast<GlobalAlias>(SGV))
+ if (doImportAsDefinition(SGV) && !isa<GlobalAlias>(SGV))
return GlobalValue::AvailableExternallyLinkage;
else
return GlobalValue::ExternalLinkage;
@@ -177,7 +176,7 @@ FunctionImportGlobalProcessing::getLinkage(const GlobalValue *SGV,
// If we are promoting the local to global scope, it is handled
// similarly to a normal externally visible global.
if (DoPromote) {
- if (doImportAsDefinition(SGV) && !dyn_cast<GlobalAlias>(SGV))
+ if (doImportAsDefinition(SGV) && !isa<GlobalAlias>(SGV))
return GlobalValue::AvailableExternallyLinkage;
else
return GlobalValue::ExternalLinkage;
@@ -230,11 +229,11 @@ void FunctionImportGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) {
}
}
- // Mark read-only variables which can be imported with specific attribute.
- // We can't internalize them now because IRMover will fail to link variable
- // definitions to their external declarations during ThinLTO import. We'll
- // internalize read-only variables later, after import is finished.
- // See internalizeImmutableGVs.
+ // Mark read/write-only variables which can be imported with specific
+ // attribute. We can't internalize them now because IRMover will fail
+ // to link variable definitions to their external declarations during
+ // ThinLTO import. We'll internalize read-only variables later, after
+ // import is finished. See internalizeGVsAfterImport.
//
// If global value dead stripping is not enabled in summary then
// propagateConstants hasn't been run. We can't internalize GV
@@ -242,13 +241,16 @@ void FunctionImportGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) {
if (!GV.isDeclaration() && VI && ImportIndex.withGlobalValueDeadStripping()) {
const auto &SL = VI.getSummaryList();
auto *GVS = SL.empty() ? nullptr : dyn_cast<GlobalVarSummary>(SL[0].get());
- if (GVS && GVS->isReadOnly())
+ // At this stage "maybe" is "definitely"
+ if (GVS && (GVS->maybeReadOnly() || GVS->maybeWriteOnly()))
cast<GlobalVariable>(&GV)->addAttribute("thinlto-internalize");
}
bool DoPromote = false;
if (GV.hasLocalLinkage() &&
((DoPromote = shouldPromoteLocalToGlobal(&GV)) || isPerformingImport())) {
+ // Save the original name string before we rename GV below.
+ auto Name = GV.getName().str();
// Once we change the name or linkage it is difficult to determine
// again whether we should promote since shouldPromoteLocalToGlobal needs
// to locate the summary (based on GUID from name and linkage). Therefore,
@@ -257,6 +259,12 @@ void FunctionImportGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) {
GV.setLinkage(getLinkage(&GV, DoPromote));
if (!GV.hasLocalLinkage())
GV.setVisibility(GlobalValue::HiddenVisibility);
+
+ // If we are renaming a COMDAT leader, ensure that we record the COMDAT
+ // for later renaming as well. This is required for COFF.
+ if (const auto *C = GV.getComdat())
+ if (C->getName() == Name)
+ RenamedComdats.try_emplace(C, M.getOrInsertComdat(GV.getName()));
} else
GV.setLinkage(getLinkage(&GV, /* DoPromote */ false));
@@ -281,6 +289,16 @@ void FunctionImportGlobalProcessing::processGlobalsForThinLTO() {
processGlobalForThinLTO(SF);
for (GlobalAlias &GA : M.aliases())
processGlobalForThinLTO(GA);
+
+ // Replace any COMDATS that required renaming (because the COMDAT leader was
+ // promoted and renamed).
+ if (!RenamedComdats.empty())
+ for (auto &GO : M.global_objects())
+ if (auto *C = GO.getComdat()) {
+ auto Replacement = RenamedComdats.find(C);
+ if (Replacement != RenamedComdats.end())
+ GO.setComdat(Replacement->second);
+ }
}
bool FunctionImportGlobalProcessing::run() {
diff --git a/lib/Transforms/Utils/GlobalStatus.cpp b/lib/Transforms/Utils/GlobalStatus.cpp
index ff6970db47da..a2942869130d 100644
--- a/lib/Transforms/Utils/GlobalStatus.cpp
+++ b/lib/Transforms/Utils/GlobalStatus.cpp
@@ -1,9 +1,8 @@
//===-- GlobalStatus.cpp - Compute status info for globals -----------------==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Transforms/Utils/GuardUtils.cpp b/lib/Transforms/Utils/GuardUtils.cpp
index 08de0a4c53e9..34c32d9c0c98 100644
--- a/lib/Transforms/Utils/GuardUtils.cpp
+++ b/lib/Transforms/Utils/GuardUtils.cpp
@@ -1,9 +1,8 @@
//===-- GuardUtils.cpp - Utils for work with guards -------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// Utils that are used to perform transformations related to guards and their
diff --git a/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp b/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp
index 02482c550321..8041e66e6c4c 100644
--- a/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp
+++ b/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp
@@ -1,9 +1,8 @@
//===-- ImportedFunctionsInliningStats.cpp ----------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// Generating inliner statistics for imported functions, mostly useful for
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index 623fe91a5a60..a7f0f7ac5d61 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -1,9 +1,8 @@
//===- InlineFunction.cpp - Code to perform function inlining -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -85,16 +84,10 @@ PreserveAlignmentAssumptions("preserve-alignment-assumptions-during-inlining",
cl::init(true), cl::Hidden,
cl::desc("Convert align attributes to assumptions during inlining."));
-llvm::InlineResult llvm::InlineFunction(CallInst *CI, InlineFunctionInfo &IFI,
- AAResults *CalleeAAR,
- bool InsertLifetime) {
- return InlineFunction(CallSite(CI), IFI, CalleeAAR, InsertLifetime);
-}
-
-llvm::InlineResult llvm::InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI,
+llvm::InlineResult llvm::InlineFunction(CallBase *CB, InlineFunctionInfo &IFI,
AAResults *CalleeAAR,
bool InsertLifetime) {
- return InlineFunction(CallSite(II), IFI, CalleeAAR, InsertLifetime);
+ return InlineFunction(CallSite(CB), IFI, CalleeAAR, InsertLifetime);
}
namespace {
@@ -1042,11 +1035,10 @@ static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap,
SmallSetVector<const Argument *, 4> NAPtrArgs;
for (const Value *V : PtrArgs) {
- SmallVector<Value *, 4> Objects;
- GetUnderlyingObjects(const_cast<Value*>(V),
- Objects, DL, /* LI = */ nullptr);
+ SmallVector<const Value *, 4> Objects;
+ GetUnderlyingObjects(V, Objects, DL, /* LI = */ nullptr);
- for (Value *O : Objects)
+ for (const Value *O : Objects)
ObjSet.insert(O);
}
@@ -1216,14 +1208,14 @@ static void UpdateCallGraphAfterInlining(CallSite CS,
// If the call was inlined, but then constant folded, there is no edge to
// add. Check for this case.
- Instruction *NewCall = dyn_cast<Instruction>(VMI->second);
+ auto *NewCall = dyn_cast<CallBase>(VMI->second);
if (!NewCall)
continue;
// We do not treat intrinsic calls like real function calls because we
// expect them to become inline code; do not add an edge for an intrinsic.
- CallSite CS = CallSite(NewCall);
- if (CS && CS.getCalledFunction() && CS.getCalledFunction()->isIntrinsic())
+ if (NewCall->getCalledFunction() &&
+ NewCall->getCalledFunction()->isIntrinsic())
continue;
// Remember that this call site got inlined for the client of
@@ -1236,19 +1228,19 @@ static void UpdateCallGraphAfterInlining(CallSite CS,
// destination. This can also happen if the call graph node of the caller
// was just unnecessarily imprecise.
if (!I->second->getFunction())
- if (Function *F = CallSite(NewCall).getCalledFunction()) {
+ if (Function *F = NewCall->getCalledFunction()) {
// Indirect call site resolved to direct call.
- CallerNode->addCalledFunction(CallSite(NewCall), CG[F]);
+ CallerNode->addCalledFunction(NewCall, CG[F]);
continue;
}
- CallerNode->addCalledFunction(CallSite(NewCall), I->second);
+ CallerNode->addCalledFunction(NewCall, I->second);
}
// Update the call graph by deleting the edge from Callee to Caller. We must
// do this after the loop above in case Caller and Callee are the same.
- CallerNode->removeCallEdgeFor(CS);
+ CallerNode->removeCallEdgeFor(*cast<CallBase>(CS.getInstruction()));
}
static void HandleByValArgumentInit(Value *Dst, Value *Src, Module *M,
@@ -1353,6 +1345,44 @@ static bool allocaWouldBeStaticInEntry(const AllocaInst *AI ) {
return isa<Constant>(AI->getArraySize()) && !AI->isUsedWithInAlloca();
}
+/// Returns a DebugLoc for a new DILocation which is a clone of \p OrigDL
+/// inlined at \p InlinedAt. \p IANodes is an inlined-at cache.
+static DebugLoc inlineDebugLoc(DebugLoc OrigDL, DILocation *InlinedAt,
+ LLVMContext &Ctx,
+ DenseMap<const MDNode *, MDNode *> &IANodes) {
+ auto IA = DebugLoc::appendInlinedAt(OrigDL, InlinedAt, Ctx, IANodes);
+ return DebugLoc::get(OrigDL.getLine(), OrigDL.getCol(), OrigDL.getScope(),
+ IA);
+}
+
+/// Returns the LoopID for a loop which has has been cloned from another
+/// function for inlining with the new inlined-at start and end locs.
+static MDNode *inlineLoopID(const MDNode *OrigLoopId, DILocation *InlinedAt,
+ LLVMContext &Ctx,
+ DenseMap<const MDNode *, MDNode *> &IANodes) {
+ assert(OrigLoopId && OrigLoopId->getNumOperands() > 0 &&
+ "Loop ID needs at least one operand");
+ assert(OrigLoopId && OrigLoopId->getOperand(0).get() == OrigLoopId &&
+ "Loop ID should refer to itself");
+
+ // Save space for the self-referential LoopID.
+ SmallVector<Metadata *, 4> MDs = {nullptr};
+
+ for (unsigned i = 1; i < OrigLoopId->getNumOperands(); ++i) {
+ Metadata *MD = OrigLoopId->getOperand(i);
+ // Update the DILocations to encode the inlined-at metadata.
+ if (DILocation *DL = dyn_cast<DILocation>(MD))
+ MDs.push_back(inlineDebugLoc(DL, InlinedAt, Ctx, IANodes));
+ else
+ MDs.push_back(MD);
+ }
+
+ MDNode *NewLoopID = MDNode::getDistinct(Ctx, MDs);
+ // Insert the self-referential LoopID.
+ NewLoopID->replaceOperandWith(0, NewLoopID);
+ return NewLoopID;
+}
+
/// Update inlined instructions' line numbers to
/// to encode location where these instructions are inlined.
static void fixupLineNumbers(Function *Fn, Function::iterator FI,
@@ -1378,10 +1408,17 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI,
for (; FI != Fn->end(); ++FI) {
for (BasicBlock::iterator BI = FI->begin(), BE = FI->end();
BI != BE; ++BI) {
+ // Loop metadata needs to be updated so that the start and end locs
+ // reference inlined-at locations.
+ if (MDNode *LoopID = BI->getMetadata(LLVMContext::MD_loop)) {
+ MDNode *NewLoopID =
+ inlineLoopID(LoopID, InlinedAtNode, BI->getContext(), IANodes);
+ BI->setMetadata(LLVMContext::MD_loop, NewLoopID);
+ }
+
if (DebugLoc DL = BI->getDebugLoc()) {
- auto IA = DebugLoc::appendInlinedAt(DL, InlinedAtNode, BI->getContext(),
- IANodes);
- auto IDL = DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(), IA);
+ DebugLoc IDL =
+ inlineDebugLoc(DL, InlinedAtNode, BI->getContext(), IANodes);
BI->setDebugLoc(IDL);
continue;
}
@@ -1448,47 +1485,45 @@ static void updateCallProfile(Function *Callee, const ValueToValueMapTy &VMap,
CalleeEntryCount.getCount() < 1)
return;
auto CallSiteCount = PSI ? PSI->getProfileCount(TheCall, CallerBFI) : None;
- uint64_t CallCount =
+ int64_t CallCount =
std::min(CallSiteCount.hasValue() ? CallSiteCount.getValue() : 0,
CalleeEntryCount.getCount());
-
- for (auto const &Entry : VMap)
- if (isa<CallInst>(Entry.first))
- if (auto *CI = dyn_cast_or_null<CallInst>(Entry.second))
- CI->updateProfWeight(CallCount, CalleeEntryCount.getCount());
- for (BasicBlock &BB : *Callee)
- // No need to update the callsite if it is pruned during inlining.
- if (VMap.count(&BB))
- for (Instruction &I : BB)
- if (CallInst *CI = dyn_cast<CallInst>(&I))
- CI->updateProfWeight(CalleeEntryCount.getCount() - CallCount,
- CalleeEntryCount.getCount());
+ updateProfileCallee(Callee, -CallCount, &VMap);
}
-/// Update the entry count of callee after inlining.
-///
-/// The callsite's block count is subtracted from the callee's function entry
-/// count.
-static void updateCalleeCount(BlockFrequencyInfo *CallerBFI, BasicBlock *CallBB,
- Instruction *CallInst, Function *Callee,
- ProfileSummaryInfo *PSI) {
- // If the callee has a original count of N, and the estimated count of
- // callsite is M, the new callee count is set to N - M. M is estimated from
- // the caller's entry count, its entry block frequency and the block frequency
- // of the callsite.
+void llvm::updateProfileCallee(
+ Function *Callee, int64_t entryDelta,
+ const ValueMap<const Value *, WeakTrackingVH> *VMap) {
auto CalleeCount = Callee->getEntryCount();
- if (!CalleeCount.hasValue() || !PSI)
- return;
- auto CallCount = PSI->getProfileCount(CallInst, CallerBFI);
- if (!CallCount.hasValue())
+ if (!CalleeCount.hasValue())
return;
+
+ uint64_t priorEntryCount = CalleeCount.getCount();
+ uint64_t newEntryCount;
+
// Since CallSiteCount is an estimate, it could exceed the original callee
- // count and has to be set to 0.
- if (CallCount.getValue() > CalleeCount.getCount())
- CalleeCount.setCount(0);
+ // count and has to be set to 0 so guard against underflow.
+ if (entryDelta < 0 && static_cast<uint64_t>(-entryDelta) > priorEntryCount)
+ newEntryCount = 0;
else
- CalleeCount.setCount(CalleeCount.getCount() - CallCount.getValue());
- Callee->setEntryCount(CalleeCount);
+ newEntryCount = priorEntryCount + entryDelta;
+
+ Callee->setEntryCount(newEntryCount);
+
+ // During inlining ?
+ if (VMap) {
+ uint64_t cloneEntryCount = priorEntryCount - newEntryCount;
+ for (auto const &Entry : *VMap)
+ if (isa<CallInst>(Entry.first))
+ if (auto *CI = dyn_cast_or_null<CallInst>(Entry.second))
+ CI->updateProfWeight(cloneEntryCount, priorEntryCount);
+ }
+ for (BasicBlock &BB : *Callee)
+ // No need to update the callsite if it is pruned during inlining.
+ if (!VMap || VMap->count(&BB))
+ for (Instruction &I : BB)
+ if (CallInst *CI = dyn_cast<CallInst>(&I))
+ CI->updateProfWeight(newEntryCount, priorEntryCount);
}
/// This function inlines the called function into the basic block of the
@@ -1507,6 +1542,10 @@ llvm::InlineResult llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
assert(TheCall->getParent() && TheCall->getFunction()
&& "Instruction not in function!");
+ // FIXME: we don't inline callbr yet.
+ if (isa<CallBrInst>(TheCall))
+ return false;
+
// If IFI has any state in it, zap it before we fill it in.
IFI.reset();
@@ -1684,8 +1723,6 @@ llvm::InlineResult llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
updateCallProfile(CalledFunc, VMap, CalledFunc->getEntryCount(), TheCall,
IFI.PSI, IFI.CallerBFI);
- // Update the profile count of callee.
- updateCalleeCount(IFI.CallerBFI, OrigBB, TheCall, CalledFunc, IFI.PSI);
// Inject byval arguments initialization.
for (std::pair<Value*, Value*> &Init : ByValInit)
@@ -1734,6 +1771,8 @@ llvm::InlineResult llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
Instruction *NewI = nullptr;
if (isa<CallInst>(I))
NewI = CallInst::Create(cast<CallInst>(I), OpDefs, I);
+ else if (isa<CallBrInst>(I))
+ NewI = CallBrInst::Create(cast<CallBrInst>(I), OpDefs, I);
else
NewI = InvokeInst::Create(cast<InvokeInst>(I), OpDefs, I);
@@ -1817,8 +1856,7 @@ llvm::InlineResult llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// Move any dbg.declares describing the allocas into the entry basic block.
DIBuilder DIB(*Caller->getParent());
for (auto &AI : IFI.StaticAllocas)
- replaceDbgDeclareForAlloca(AI, AI, DIB, DIExpression::NoDeref, 0,
- DIExpression::NoDeref);
+ replaceDbgDeclareForAlloca(AI, AI, DIB, DIExpression::ApplyOffset, 0);
}
SmallVector<Value*,4> VarArgsToForward;
@@ -1869,10 +1907,8 @@ llvm::InlineResult llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// Add VarArgs to existing parameters.
SmallVector<Value *, 6> Params(CI->arg_operands());
Params.append(VarArgsToForward.begin(), VarArgsToForward.end());
- CallInst *NewCI =
- CallInst::Create(CI->getCalledFunction() ? CI->getCalledFunction()
- : CI->getCalledValue(),
- Params, "", CI);
+ CallInst *NewCI = CallInst::Create(
+ CI->getFunctionType(), CI->getCalledOperand(), Params, "", CI);
NewCI->setDebugLoc(CI->getDebugLoc());
NewCI->setAttributes(Attrs);
NewCI->setCallingConv(CI->getCallingConv());
@@ -2038,6 +2074,8 @@ llvm::InlineResult llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
Instruction *NewInst;
if (CS.isCall())
NewInst = CallInst::Create(cast<CallInst>(I), OpBundles, I);
+ else if (CS.isCallBr())
+ NewInst = CallBrInst::Create(cast<CallBrInst>(I), OpBundles, I);
else
NewInst = InvokeInst::Create(cast<InvokeInst>(I), OpBundles, I);
NewInst->takeName(I);
diff --git a/lib/Transforms/Utils/InstructionNamer.cpp b/lib/Transforms/Utils/InstructionNamer.cpp
index 003721f2b939..6c4fc1ceb991 100644
--- a/lib/Transforms/Utils/InstructionNamer.cpp
+++ b/lib/Transforms/Utils/InstructionNamer.cpp
@@ -1,9 +1,8 @@
//===- InstructionNamer.cpp - Give anonymous instructions names -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/Utils/IntegerDivision.cpp b/lib/Transforms/Utils/IntegerDivision.cpp
index 4a359b99bebd..9082049c82da 100644
--- a/lib/Transforms/Utils/IntegerDivision.cpp
+++ b/lib/Transforms/Utils/IntegerDivision.cpp
@@ -1,9 +1,8 @@
//===-- IntegerDivision.cpp - Expand integer division ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp
index 53d444b309d5..29e7c5260f46 100644
--- a/lib/Transforms/Utils/LCSSA.cpp
+++ b/lib/Transforms/Utils/LCSSA.cpp
@@ -1,9 +1,8 @@
//===-- LCSSA.cpp - Convert loops into loop-closed SSA form ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -32,11 +31,12 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
@@ -45,6 +45,7 @@
#include "llvm/IR/PredIteratorCache.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Utils.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
using namespace llvm;
@@ -198,6 +199,17 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
continue;
}
+ // If we added a single PHI, it must dominate all uses and we can directly
+ // rename it.
+ if (AddedPHIs.size() == 1) {
+ // Tell the VHs that the uses changed. This updates SCEV's caches.
+ // We might call ValueIsRAUWd multiple times for the same value.
+ if (UseToRewrite->get()->hasValueHandle())
+ ValueHandleBase::ValueIsRAUWd(*UseToRewrite, AddedPHIs[0]);
+ UseToRewrite->set(AddedPHIs[0]);
+ continue;
+ }
+
// Otherwise, do full PHI insertion.
SSAUpdate.RewriteUse(*UseToRewrite);
}
@@ -211,9 +223,12 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
BasicBlock *UserBB = DVI->getParent();
if (InstBB == UserBB || L->contains(UserBB))
continue;
- // We currently only handle debug values residing in blocks where we have
- // inserted a PHI instruction.
- if (Value *V = SSAUpdate.FindValueForBlock(UserBB))
+ // We currently only handle debug values residing in blocks that were
+ // traversed while rewriting the uses. If we inserted just a single PHI,
+ // we will handle all relevant debug values.
+ Value *V = AddedPHIs.size() == 1 ? AddedPHIs[0]
+ : SSAUpdate.FindValueForBlock(UserBB);
+ if (V)
DVI->setOperand(0, MetadataAsValue::get(Ctx, ValueAsMetadata::get(V)));
}
@@ -306,6 +321,12 @@ bool llvm::formLCSSA(Loop &L, DominatorTree &DT, LoopInfo *LI,
ScalarEvolution *SE) {
bool Changed = false;
+#ifdef EXPENSIVE_CHECKS
+ // Verify all sub-loops are in LCSSA form already.
+ for (Loop *SubLoop: L)
+ assert(SubLoop->isRecursivelyLCSSAForm(DT, *LI) && "Subloop not in LCSSA!");
+#endif
+
SmallVector<BasicBlock *, 8> ExitBlocks;
L.getExitBlocks(ExitBlocks);
if (ExitBlocks.empty())
@@ -325,6 +346,10 @@ bool llvm::formLCSSA(Loop &L, DominatorTree &DT, LoopInfo *LI,
// Look at all the instructions in the loop, checking to see if they have uses
// outside the loop. If so, put them into the worklist to rewrite those uses.
for (BasicBlock *BB : BlocksDominatingExits) {
+ // Skip blocks that are part of any sub-loops, they must be in LCSSA
+ // already.
+ if (LI->getLoopFor(BB) != &L)
+ continue;
for (Instruction &I : *BB) {
// Reject two common cases fast: instructions with no uses (like stores)
// and instructions with one use that is in the same block as this.
@@ -419,6 +444,8 @@ struct LCSSAWrapperPass : public FunctionPass {
AU.addPreserved<GlobalsAAWrapperPass>();
AU.addPreserved<ScalarEvolutionWrapperPass>();
AU.addPreserved<SCEVAAWrapperPass>();
+ AU.addPreserved<BranchProbabilityInfoWrapperPass>();
+ AU.addPreserved<MemorySSAWrapperPass>();
// This is needed to perform LCSSA verification inside LPPassManager
AU.addRequired<LCSSAVerificationPass>();
@@ -462,5 +489,9 @@ PreservedAnalyses LCSSAPass::run(Function &F, FunctionAnalysisManager &AM) {
PA.preserve<GlobalsAA>();
PA.preserve<SCEVAA>();
PA.preserve<ScalarEvolutionAnalysis>();
+ // BPI maps terminators to probabilities, since we don't modify the CFG, no
+ // updates are needed to preserve it.
+ PA.preserve<BranchProbabilityAnalysis>();
+ PA.preserve<MemorySSAAnalysis>();
return PA;
}
diff --git a/lib/Transforms/Utils/LibCallsShrinkWrap.cpp b/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
index e1592c867636..8c67d1dc6eb3 100644
--- a/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
+++ b/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
@@ -1,9 +1,8 @@
//===-- LibCallsShrinkWrap.cpp ----------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 499e611acb57..39b6b889f91c 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -1,9 +1,8 @@
//===- Local.cpp - Functions to perform local transformations -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -27,6 +26,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/TinyPtrVector.h"
#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LazyValueInfo.h"
@@ -49,7 +49,6 @@
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/DomTreeUpdater.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
@@ -92,6 +91,10 @@ using namespace llvm::PatternMatch;
STATISTIC(NumRemoved, "Number of unreachable basic blocks removed");
+// Max recursion depth for collectBitParts used when detecting bswap and
+// bitreverse idioms
+static const unsigned BitPartRecursionMaxDepth = 64;
+
//===----------------------------------------------------------------------===//
// Local constant propagation.
//
@@ -129,7 +132,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
Builder.CreateBr(Destination);
BI->eraseFromParent();
if (DTU)
- DTU->deleteEdgeRelaxed(BB, OldDest);
+ DTU->applyUpdatesPermissive({{DominatorTree::Delete, BB, OldDest}});
return true;
}
@@ -205,7 +208,8 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
i = SI->removeCase(i);
e = SI->case_end();
if (DTU)
- DTU->deleteEdgeRelaxed(ParentBB, DefaultDest);
+ DTU->applyUpdatesPermissive(
+ {{DominatorTree::Delete, ParentBB, DefaultDest}});
continue;
}
@@ -253,7 +257,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
if (DeleteDeadConditions)
RecursivelyDeleteTriviallyDeadInstructions(Cond, TLI);
if (DTU)
- DTU->applyUpdates(Updates, /*ForceRemoveDuplicates*/ true);
+ DTU->applyUpdatesPermissive(Updates);
return true;
}
@@ -331,7 +335,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
}
if (DTU)
- DTU->applyUpdates(Updates, /*ForceRemoveDuplicates*/ true);
+ DTU->applyUpdatesPermissive(Updates);
return true;
}
}
@@ -416,8 +420,8 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I,
if (Constant *C = dyn_cast<Constant>(CI->getArgOperand(0)))
return C->isNullValue() || isa<UndefValue>(C);
- if (CallSite CS = CallSite(I))
- if (isMathLibCallNoop(CS, TLI))
+ if (auto *Call = dyn_cast<CallBase>(I))
+ if (isMathLibCallNoop(Call, TLI))
return true;
return false;
@@ -430,7 +434,7 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I,
bool llvm::RecursivelyDeleteTriviallyDeadInstructions(
Value *V, const TargetLibraryInfo *TLI, MemorySSAUpdater *MSSAU) {
Instruction *I = dyn_cast<Instruction>(V);
- if (!I || !I->use_empty() || !isInstructionTriviallyDead(I, TLI))
+ if (!I || !isInstructionTriviallyDead(I, TLI))
return false;
SmallVector<Instruction*, 16> DeadInsts;
@@ -665,7 +669,7 @@ void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred,
if (PhiIt != OldPhiIt) PhiIt = &BB->front();
}
if (DTU)
- DTU->deleteEdgeRelaxed(Pred, BB);
+ DTU->applyUpdatesPermissive({{DominatorTree::Delete, Pred, BB}});
}
/// MergeBasicBlockIntoOnlyPred - DestBB is a block with one predecessor and its
@@ -734,7 +738,7 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB,
isa<UnreachableInst>(PredBB->getTerminator()) &&
"The successor list of PredBB isn't empty before "
"applying corresponding DTU updates.");
- DTU->applyUpdates(Updates, /*ForceRemoveDuplicates*/ true);
+ DTU->applyUpdatesPermissive(Updates);
DTU->deleteBB(PredBB);
// Recalculation of DomTree is needed when updating a forward DomTree and
// the Entry BB is replaced.
@@ -997,6 +1001,18 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB,
}
}
+ // We cannot fold the block if it's a branch to an already present callbr
+ // successor because that creates duplicate successors.
+ for (auto I = pred_begin(BB), E = pred_end(BB); I != E; ++I) {
+ if (auto *CBI = dyn_cast<CallBrInst>((*I)->getTerminator())) {
+ if (Succ == CBI->getDefaultDest())
+ return false;
+ for (unsigned i = 0, e = CBI->getNumIndirectDests(); i != e; ++i)
+ if (Succ == CBI->getIndirectDest(i))
+ return false;
+ }
+ }
+
LLVM_DEBUG(dbgs() << "Killing Trivial BB: \n" << *BB);
SmallVector<DominatorTree::UpdateType, 32> Updates;
@@ -1064,7 +1080,7 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB,
"applying corresponding DTU updates.");
if (DTU) {
- DTU->applyUpdates(Updates, /*ForceRemoveDuplicates*/ true);
+ DTU->applyUpdatesPermissive(Updates);
DTU->deleteBB(BB);
} else {
BB->eraseFromParent(); // Delete the old basic block.
@@ -1272,6 +1288,19 @@ static bool valueCoversEntireFragment(Type *ValTy, DbgVariableIntrinsic *DII) {
return false;
}
+/// Produce a DebugLoc to use for each dbg.declare/inst pair that are promoted
+/// to a dbg.value. Because no machine insts can come from debug intrinsics,
+/// only the scope and inlinedAt is significant. Zero line numbers are used in
+/// case this DebugLoc leaks into any adjacent instructions.
+static DebugLoc getDebugValueLoc(DbgVariableIntrinsic *DII, Instruction *Src) {
+ // Original dbg.declare must have a location.
+ DebugLoc DeclareLoc = DII->getDebugLoc();
+ MDNode *Scope = DeclareLoc.getScope();
+ DILocation *InlinedAt = DeclareLoc.getInlinedAt();
+ // Produce an unknown location with the correct scope / inlinedAt fields.
+ return DebugLoc::get(0, 0, Scope, InlinedAt);
+}
+
/// Inserts a llvm.dbg.value intrinsic before a store to an alloca'd value
/// that has an associated llvm.dbg.declare or llvm.dbg.addr intrinsic.
void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII,
@@ -1280,9 +1309,11 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII,
auto *DIVar = DII->getVariable();
assert(DIVar && "Missing variable");
auto *DIExpr = DII->getExpression();
- Value *DV = SI->getOperand(0);
+ Value *DV = SI->getValueOperand();
+
+ DebugLoc NewLoc = getDebugValueLoc(DII, SI);
- if (!valueCoversEntireFragment(SI->getValueOperand()->getType(), DII)) {
+ if (!valueCoversEntireFragment(DV->getType(), DII)) {
// FIXME: If storing to a part of the variable described by the dbg.declare,
// then we want to insert a dbg.value for the corresponding fragment.
LLVM_DEBUG(dbgs() << "Failed to convert dbg.declare to dbg.value: "
@@ -1292,14 +1323,12 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII,
// know nothing about the variable's content.
DV = UndefValue::get(DV->getType());
if (!LdStHasDebugValue(DIVar, DIExpr, SI))
- Builder.insertDbgValueIntrinsic(DV, DIVar, DIExpr, DII->getDebugLoc(),
- SI);
+ Builder.insertDbgValueIntrinsic(DV, DIVar, DIExpr, NewLoc, SI);
return;
}
if (!LdStHasDebugValue(DIVar, DIExpr, SI))
- Builder.insertDbgValueIntrinsic(DV, DIVar, DIExpr, DII->getDebugLoc(),
- SI);
+ Builder.insertDbgValueIntrinsic(DV, DIVar, DIExpr, NewLoc, SI);
}
/// Inserts a llvm.dbg.value intrinsic before a load of an alloca'd value
@@ -1322,12 +1351,14 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII,
return;
}
+ DebugLoc NewLoc = getDebugValueLoc(DII, nullptr);
+
// We are now tracking the loaded value instead of the address. In the
// future if multi-location support is added to the IR, it might be
// preferable to keep tracking both the loaded value and the original
// address in case the alloca can not be elided.
Instruction *DbgValue = Builder.insertDbgValueIntrinsic(
- LI, DIVar, DIExpr, DII->getDebugLoc(), (Instruction *)nullptr);
+ LI, DIVar, DIExpr, NewLoc, (Instruction *)nullptr);
DbgValue->insertAfter(LI);
}
@@ -1354,12 +1385,13 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII,
BasicBlock *BB = APN->getParent();
auto InsertionPt = BB->getFirstInsertionPt();
+ DebugLoc NewLoc = getDebugValueLoc(DII, nullptr);
+
// The block may be a catchswitch block, which does not have a valid
// insertion point.
// FIXME: Insert dbg.value markers in the successors when appropriate.
if (InsertionPt != BB->end())
- Builder.insertDbgValueIntrinsic(APN, DIVar, DIExpr, DII->getDebugLoc(),
- &*InsertionPt);
+ Builder.insertDbgValueIntrinsic(APN, DIVar, DIExpr, NewLoc, &*InsertionPt);
}
/// Determine whether this alloca is either a VLA or an array.
@@ -1414,10 +1446,11 @@ bool llvm::LowerDbgDeclare(Function &F) {
// This is a call by-value or some other instruction that takes a
// pointer to the variable. Insert a *value* intrinsic that describes
// the variable by dereferencing the alloca.
+ DebugLoc NewLoc = getDebugValueLoc(DDI, nullptr);
auto *DerefExpr =
DIExpression::append(DDI->getExpression(), dwarf::DW_OP_deref);
- DIB.insertDbgValueIntrinsic(AI, DDI->getVariable(), DerefExpr,
- DDI->getDebugLoc(), CI);
+ DIB.insertDbgValueIntrinsic(AI, DDI->getVariable(), DerefExpr, NewLoc,
+ CI);
}
}
DDI->eraseFromParent();
@@ -1519,14 +1552,14 @@ void llvm::findDbgUsers(SmallVectorImpl<DbgVariableIntrinsic *> &DbgUsers,
bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress,
Instruction *InsertBefore, DIBuilder &Builder,
- bool DerefBefore, int Offset, bool DerefAfter) {
+ uint8_t DIExprFlags, int Offset) {
auto DbgAddrs = FindDbgAddrUses(Address);
for (DbgVariableIntrinsic *DII : DbgAddrs) {
DebugLoc Loc = DII->getDebugLoc();
auto *DIVar = DII->getVariable();
auto *DIExpr = DII->getExpression();
assert(DIVar && "Missing variable");
- DIExpr = DIExpression::prepend(DIExpr, DerefBefore, Offset, DerefAfter);
+ DIExpr = DIExpression::prepend(DIExpr, DIExprFlags, Offset);
// Insert llvm.dbg.declare immediately before InsertBefore, and remove old
// llvm.dbg.declare.
Builder.insertDeclare(NewAddress, DIVar, DIExpr, Loc, InsertBefore);
@@ -1538,10 +1571,10 @@ bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress,
}
bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
- DIBuilder &Builder, bool DerefBefore,
- int Offset, bool DerefAfter) {
+ DIBuilder &Builder, uint8_t DIExprFlags,
+ int Offset) {
return replaceDbgDeclare(AI, NewAllocaAddress, AI->getNextNode(), Builder,
- DerefBefore, Offset, DerefAfter);
+ DIExprFlags, Offset);
}
static void replaceOneDbgValueForAlloca(DbgValueInst *DVI, Value *NewAddress,
@@ -1594,120 +1627,119 @@ bool llvm::salvageDebugInfo(Instruction &I) {
if (DbgUsers.empty())
return false;
- auto &M = *I.getModule();
- auto &DL = M.getDataLayout();
+ return salvageDebugInfoForDbgValues(I, DbgUsers);
+}
+
+bool llvm::salvageDebugInfoForDbgValues(
+ Instruction &I, ArrayRef<DbgVariableIntrinsic *> DbgUsers) {
auto &Ctx = I.getContext();
auto wrapMD = [&](Value *V) { return wrapValueInMetadata(Ctx, V); };
- auto doSalvage = [&](DbgVariableIntrinsic *DII, SmallVectorImpl<uint64_t> &Ops) {
- auto *DIExpr = DII->getExpression();
- if (!Ops.empty()) {
- // Do not add DW_OP_stack_value for DbgDeclare and DbgAddr, because they
- // are implicitly pointing out the value as a DWARF memory location
- // description.
- bool WithStackValue = isa<DbgValueInst>(DII);
- DIExpr = DIExpression::prependOpcodes(DIExpr, Ops, WithStackValue);
- }
+ for (auto *DII : DbgUsers) {
+ // Do not add DW_OP_stack_value for DbgDeclare and DbgAddr, because they
+ // are implicitly pointing out the value as a DWARF memory location
+ // description.
+ bool StackValue = isa<DbgValueInst>(DII);
+
+ DIExpression *DIExpr =
+ salvageDebugInfoImpl(I, DII->getExpression(), StackValue);
+
+ // salvageDebugInfoImpl should fail on examining the first element of
+ // DbgUsers, or none of them.
+ if (!DIExpr)
+ return false;
+
DII->setOperand(0, wrapMD(I.getOperand(0)));
DII->setOperand(2, MetadataAsValue::get(Ctx, DIExpr));
LLVM_DEBUG(dbgs() << "SALVAGE: " << *DII << '\n');
+ }
+
+ return true;
+}
+
+DIExpression *llvm::salvageDebugInfoImpl(Instruction &I,
+ DIExpression *SrcDIExpr,
+ bool WithStackValue) {
+ auto &M = *I.getModule();
+ auto &DL = M.getDataLayout();
+
+ // Apply a vector of opcodes to the source DIExpression.
+ auto doSalvage = [&](SmallVectorImpl<uint64_t> &Ops) -> DIExpression * {
+ DIExpression *DIExpr = SrcDIExpr;
+ if (!Ops.empty()) {
+ DIExpr = DIExpression::prependOpcodes(DIExpr, Ops, WithStackValue);
+ }
+ return DIExpr;
};
- auto applyOffset = [&](DbgVariableIntrinsic *DII, uint64_t Offset) {
+ // Apply the given offset to the source DIExpression.
+ auto applyOffset = [&](uint64_t Offset) -> DIExpression * {
SmallVector<uint64_t, 8> Ops;
DIExpression::appendOffset(Ops, Offset);
- doSalvage(DII, Ops);
+ return doSalvage(Ops);
};
- auto applyOps = [&](DbgVariableIntrinsic *DII,
- std::initializer_list<uint64_t> Opcodes) {
+ // initializer-list helper for applying operators to the source DIExpression.
+ auto applyOps =
+ [&](std::initializer_list<uint64_t> Opcodes) -> DIExpression * {
SmallVector<uint64_t, 8> Ops(Opcodes);
- doSalvage(DII, Ops);
+ return doSalvage(Ops);
};
if (auto *CI = dyn_cast<CastInst>(&I)) {
- if (!CI->isNoopCast(DL))
- return false;
-
- // No-op casts are irrelevant for debug info.
- MetadataAsValue *CastSrc = wrapMD(I.getOperand(0));
- for (auto *DII : DbgUsers) {
- DII->setOperand(0, CastSrc);
- LLVM_DEBUG(dbgs() << "SALVAGE: " << *DII << '\n');
- }
- return true;
+ // No-op casts and zexts are irrelevant for debug info.
+ if (CI->isNoopCast(DL) || isa<ZExtInst>(&I))
+ return SrcDIExpr;
+ return nullptr;
} else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
unsigned BitWidth =
M.getDataLayout().getIndexSizeInBits(GEP->getPointerAddressSpace());
- // Rewrite a constant GEP into a DIExpression. Since we are performing
- // arithmetic to compute the variable's *value* in the DIExpression, we
- // need to mark the expression with a DW_OP_stack_value.
+ // Rewrite a constant GEP into a DIExpression.
APInt Offset(BitWidth, 0);
- if (GEP->accumulateConstantOffset(M.getDataLayout(), Offset))
- for (auto *DII : DbgUsers)
- applyOffset(DII, Offset.getSExtValue());
- return true;
+ if (GEP->accumulateConstantOffset(M.getDataLayout(), Offset)) {
+ return applyOffset(Offset.getSExtValue());
+ } else {
+ return nullptr;
+ }
} else if (auto *BI = dyn_cast<BinaryOperator>(&I)) {
// Rewrite binary operations with constant integer operands.
auto *ConstInt = dyn_cast<ConstantInt>(I.getOperand(1));
if (!ConstInt || ConstInt->getBitWidth() > 64)
- return false;
+ return nullptr;
uint64_t Val = ConstInt->getSExtValue();
- for (auto *DII : DbgUsers) {
- switch (BI->getOpcode()) {
- case Instruction::Add:
- applyOffset(DII, Val);
- break;
- case Instruction::Sub:
- applyOffset(DII, -int64_t(Val));
- break;
- case Instruction::Mul:
- applyOps(DII, {dwarf::DW_OP_constu, Val, dwarf::DW_OP_mul});
- break;
- case Instruction::SDiv:
- applyOps(DII, {dwarf::DW_OP_constu, Val, dwarf::DW_OP_div});
- break;
- case Instruction::SRem:
- applyOps(DII, {dwarf::DW_OP_constu, Val, dwarf::DW_OP_mod});
- break;
- case Instruction::Or:
- applyOps(DII, {dwarf::DW_OP_constu, Val, dwarf::DW_OP_or});
- break;
- case Instruction::And:
- applyOps(DII, {dwarf::DW_OP_constu, Val, dwarf::DW_OP_and});
- break;
- case Instruction::Xor:
- applyOps(DII, {dwarf::DW_OP_constu, Val, dwarf::DW_OP_xor});
- break;
- case Instruction::Shl:
- applyOps(DII, {dwarf::DW_OP_constu, Val, dwarf::DW_OP_shl});
- break;
- case Instruction::LShr:
- applyOps(DII, {dwarf::DW_OP_constu, Val, dwarf::DW_OP_shr});
- break;
- case Instruction::AShr:
- applyOps(DII, {dwarf::DW_OP_constu, Val, dwarf::DW_OP_shra});
- break;
- default:
- // TODO: Salvage constants from each kind of binop we know about.
- return false;
- }
+ switch (BI->getOpcode()) {
+ case Instruction::Add:
+ return applyOffset(Val);
+ case Instruction::Sub:
+ return applyOffset(-int64_t(Val));
+ case Instruction::Mul:
+ return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_mul});
+ case Instruction::SDiv:
+ return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_div});
+ case Instruction::SRem:
+ return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_mod});
+ case Instruction::Or:
+ return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_or});
+ case Instruction::And:
+ return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_and});
+ case Instruction::Xor:
+ return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_xor});
+ case Instruction::Shl:
+ return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_shl});
+ case Instruction::LShr:
+ return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_shr});
+ case Instruction::AShr:
+ return applyOps({dwarf::DW_OP_constu, Val, dwarf::DW_OP_shra});
+ default:
+ // TODO: Salvage constants from each kind of binop we know about.
+ return nullptr;
}
- return true;
- } else if (isa<LoadInst>(&I)) {
- MetadataAsValue *AddrMD = wrapMD(I.getOperand(0));
- for (auto *DII : DbgUsers) {
- // Rewrite the load into DW_OP_deref.
- auto *DIExpr = DII->getExpression();
- DIExpr = DIExpression::prepend(DIExpr, DIExpression::WithDeref);
- DII->setOperand(0, AddrMD);
- DII->setOperand(2, MetadataAsValue::get(Ctx, DIExpr));
- LLVM_DEBUG(dbgs() << "SALVAGE: " << *DII << '\n');
- }
- return true;
+ // *Not* to do: we should not attempt to salvage load instructions,
+ // because the validity and lifetime of a dbg.value containing
+ // DW_OP_deref becomes difficult to analyze. See PR40628 for examples.
}
- return false;
+ return nullptr;
}
/// A replacement for a dbg.value expression.
@@ -1849,21 +1881,10 @@ bool llvm::replaceAllDbgUsesWith(Instruction &From, Value &To,
return None;
bool Signed = *Signedness == DIBasicType::Signedness::Signed;
-
- if (!Signed) {
- // In the unsigned case, assume that a debugger will initialize the
- // high bits to 0 and do a no-op conversion.
- return Identity(DII);
- } else {
- // In the signed case, the high bits are given by sign extension, i.e:
- // (To >> (ToBits - 1)) * ((2 ^ FromBits) - 1)
- // Calculate the high bits and OR them together with the low bits.
- SmallVector<uint64_t, 8> Ops({dwarf::DW_OP_dup, dwarf::DW_OP_constu,
- (ToBits - 1), dwarf::DW_OP_shr,
- dwarf::DW_OP_lit0, dwarf::DW_OP_not,
- dwarf::DW_OP_mul, dwarf::DW_OP_or});
- return DIExpression::appendToStack(DII.getExpression(), Ops);
- }
+ dwarf::TypeKind TK = Signed ? dwarf::DW_ATE_signed : dwarf::DW_ATE_unsigned;
+ SmallVector<uint64_t, 8> Ops({dwarf::DW_OP_LLVM_convert, ToBits, TK,
+ dwarf::DW_OP_LLVM_convert, FromBits, TK});
+ return DIExpression::appendToStack(DII.getExpression(), Ops);
};
return rewriteDebugUsers(From, To, DomPoint, DT, SignOrZeroExt);
}
@@ -1894,10 +1915,14 @@ unsigned llvm::removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB) {
}
unsigned llvm::changeToUnreachable(Instruction *I, bool UseLLVMTrap,
- bool PreserveLCSSA, DomTreeUpdater *DTU) {
+ bool PreserveLCSSA, DomTreeUpdater *DTU,
+ MemorySSAUpdater *MSSAU) {
BasicBlock *BB = I->getParent();
std::vector <DominatorTree::UpdateType> Updates;
+ if (MSSAU)
+ MSSAU->changeToUnreachable(I);
+
// Loop over all of the successors, removing BB's entry from any PHI
// nodes.
if (DTU)
@@ -1928,7 +1953,7 @@ unsigned llvm::changeToUnreachable(Instruction *I, bool UseLLVMTrap,
++NumInstrsRemoved;
}
if (DTU)
- DTU->applyUpdates(Updates, /*ForceRemoveDuplicates*/ true);
+ DTU->applyUpdatesPermissive(Updates);
return NumInstrsRemoved;
}
@@ -1937,8 +1962,8 @@ static void changeToCall(InvokeInst *II, DomTreeUpdater *DTU = nullptr) {
SmallVector<Value*, 8> Args(II->arg_begin(), II->arg_end());
SmallVector<OperandBundleDef, 1> OpBundles;
II->getOperandBundlesAsDefs(OpBundles);
- CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args, OpBundles,
- "", II);
+ CallInst *NewCall = CallInst::Create(
+ II->getFunctionType(), II->getCalledValue(), Args, OpBundles, "", II);
NewCall->takeName(II);
NewCall->setCallingConv(II->getCallingConv());
NewCall->setAttributes(II->getAttributes());
@@ -1956,7 +1981,7 @@ static void changeToCall(InvokeInst *II, DomTreeUpdater *DTU = nullptr) {
UnwindDestBB->removePredecessor(BB);
II->eraseFromParent();
if (DTU)
- DTU->deleteEdgeRelaxed(BB, UnwindDestBB);
+ DTU->applyUpdatesPermissive({{DominatorTree::Delete, BB, UnwindDestBB}});
}
BasicBlock *llvm::changeToInvokeAndSplitBasicBlock(CallInst *CI,
@@ -1981,8 +2006,9 @@ BasicBlock *llvm::changeToInvokeAndSplitBasicBlock(CallInst *CI,
// can potentially be avoided with a cleverer API design that we do not have
// as of this time.
- InvokeInst *II = InvokeInst::Create(CI->getCalledValue(), Split, UnwindEdge,
- InvokeArgs, OpBundles, CI->getName(), BB);
+ InvokeInst *II =
+ InvokeInst::Create(CI->getFunctionType(), CI->getCalledValue(), Split,
+ UnwindEdge, InvokeArgs, OpBundles, CI->getName(), BB);
II->setDebugLoc(CI->getDebugLoc());
II->setCallingConv(CI->getCallingConv());
II->setAttributes(CI->getAttributes());
@@ -2052,7 +2078,7 @@ static bool markAliveBlocks(Function &F,
Changed = true;
break;
}
- if (CI->doesNotReturn()) {
+ if (CI->doesNotReturn() && !CI->isMustTailCall()) {
// If we found a call to a no-return function, insert an unreachable
// instruction after it. Make sure there isn't *already* one there
// though.
@@ -2102,7 +2128,8 @@ static bool markAliveBlocks(Function &F,
UnwindDestBB->removePredecessor(II->getParent());
II->eraseFromParent();
if (DTU)
- DTU->deleteEdgeRelaxed(BB, UnwindDestBB);
+ DTU->applyUpdatesPermissive(
+ {{DominatorTree::Delete, BB, UnwindDestBB}});
} else
changeToCall(II, DTU);
Changed = true;
@@ -2191,7 +2218,7 @@ void llvm::removeUnwindEdge(BasicBlock *BB, DomTreeUpdater *DTU) {
TI->replaceAllUsesWith(NewTI);
TI->eraseFromParent();
if (DTU)
- DTU->deleteEdgeRelaxed(BB, UnwindDest);
+ DTU->applyUpdatesPermissive({{DominatorTree::Delete, BB, UnwindDest}});
}
/// removeUnreachableBlocks - Remove blocks that are not reachable, even
@@ -2211,7 +2238,7 @@ bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI,
assert(Reachable.size() < F.size());
NumRemoved += F.size()-Reachable.size();
- SmallPtrSet<BasicBlock *, 16> DeadBlockSet;
+ SmallSetVector<BasicBlock *, 8> DeadBlockSet;
for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ++I) {
auto *BB = &*I;
if (Reachable.count(BB))
@@ -2256,7 +2283,7 @@ bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI,
}
if (DTU) {
- DTU->applyUpdates(Updates, /*ForceRemoveDuplicates*/ true);
+ DTU->applyUpdatesPermissive(Updates);
bool Deleted = false;
for (auto *BB : DeadBlockSet) {
if (DTU->isBBPendingDeletion(BB))
@@ -2450,12 +2477,12 @@ unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To,
return ::replaceDominatedUsesWith(From, To, BB, ProperlyDominates);
}
-bool llvm::callsGCLeafFunction(ImmutableCallSite CS,
+bool llvm::callsGCLeafFunction(const CallBase *Call,
const TargetLibraryInfo &TLI) {
// Check if the function is specifically marked as a gc leaf function.
- if (CS.hasFnAttr("gc-leaf-function"))
+ if (Call->hasFnAttr("gc-leaf-function"))
return true;
- if (const Function *F = CS.getCalledFunction()) {
+ if (const Function *F = Call->getCalledFunction()) {
if (F->hasFnAttribute("gc-leaf-function"))
return true;
@@ -2469,7 +2496,7 @@ bool llvm::callsGCLeafFunction(ImmutableCallSite CS,
// marked as 'gc-leaf-function.' All available Libcalls are
// GC-leaf.
LibFunc LF;
- if (TLI.getLibFunc(CS, LF)) {
+ if (TLI.getLibFunc(ImmutableCallSite(Call), LF)) {
return TLI.has(LF);
}
@@ -2530,13 +2557,13 @@ void llvm::hoistAllInstructionsInto(BasicBlock *DomBlock, Instruction *InsertPt,
BasicBlock *BB) {
// Since we are moving the instructions out of its basic block, we do not
// retain their original debug locations (DILocations) and debug intrinsic
- // instructions (dbg.values).
+ // instructions.
//
// Doing so would degrade the debugging experience and adversely affect the
// accuracy of profiling information.
//
// Currently, when hoisting the instructions, we take the following actions:
- // - Remove their dbg.values.
+ // - Remove their debug intrinsic instructions.
// - Set their debug locations to the values from the insertion point.
//
// As per PR39141 (comment #8), the more fundamental reason why the dbg.values
@@ -2554,7 +2581,7 @@ void llvm::hoistAllInstructionsInto(BasicBlock *DomBlock, Instruction *InsertPt,
I->dropUnknownNonDebugMetadata();
if (I->isUsedByMetadata())
dropDebugUsers(*I);
- if (isa<DbgVariableIntrinsic>(I)) {
+ if (isa<DbgInfoIntrinsic>(I)) {
// Remove DbgInfo Intrinsics.
II = I->eraseFromParent();
continue;
@@ -2613,7 +2640,7 @@ struct BitPart {
/// does not invalidate internal references (std::map instead of DenseMap).
static const Optional<BitPart> &
collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals,
- std::map<Value *, Optional<BitPart>> &BPS) {
+ std::map<Value *, Optional<BitPart>> &BPS, int Depth) {
auto I = BPS.find(V);
if (I != BPS.end())
return I->second;
@@ -2621,13 +2648,19 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals,
auto &Result = BPS[V] = None;
auto BitWidth = cast<IntegerType>(V->getType())->getBitWidth();
+ // Prevent stack overflow by limiting the recursion depth
+ if (Depth == BitPartRecursionMaxDepth) {
+ LLVM_DEBUG(dbgs() << "collectBitParts max recursion depth reached.\n");
+ return Result;
+ }
+
if (Instruction *I = dyn_cast<Instruction>(V)) {
// If this is an or instruction, it may be an inner node of the bswap.
if (I->getOpcode() == Instruction::Or) {
auto &A = collectBitParts(I->getOperand(0), MatchBSwaps,
- MatchBitReversals, BPS);
+ MatchBitReversals, BPS, Depth + 1);
auto &B = collectBitParts(I->getOperand(1), MatchBSwaps,
- MatchBitReversals, BPS);
+ MatchBitReversals, BPS, Depth + 1);
if (!A || !B)
return Result;
@@ -2660,7 +2693,7 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals,
return Result;
auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps,
- MatchBitReversals, BPS);
+ MatchBitReversals, BPS, Depth + 1);
if (!Res)
return Result;
Result = Res;
@@ -2692,7 +2725,7 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals,
return Result;
auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps,
- MatchBitReversals, BPS);
+ MatchBitReversals, BPS, Depth + 1);
if (!Res)
return Result;
Result = Res;
@@ -2707,7 +2740,7 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals,
// If this is a zext instruction zero extend the result.
if (I->getOpcode() == Instruction::ZExt) {
auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps,
- MatchBitReversals, BPS);
+ MatchBitReversals, BPS, Depth + 1);
if (!Res)
return Result;
@@ -2769,7 +2802,7 @@ bool llvm::recognizeBSwapOrBitReverseIdiom(
// Try to find all the pieces corresponding to the bswap.
std::map<Value *, Optional<BitPart>> BPS;
- auto Res = collectBitParts(I, MatchBSwaps, MatchBitReversals, BPS);
+ auto Res = collectBitParts(I, MatchBSwaps, MatchBitReversals, BPS, 0);
if (!Res)
return false;
auto &BitProvenance = Res->Provenance;
@@ -2883,3 +2916,41 @@ bool llvm::canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx) {
return true;
}
}
+
+using AllocaForValueMapTy = DenseMap<Value *, AllocaInst *>;
+AllocaInst *llvm::findAllocaForValue(Value *V,
+ AllocaForValueMapTy &AllocaForValue) {
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(V))
+ return AI;
+ // See if we've already calculated (or started to calculate) alloca for a
+ // given value.
+ AllocaForValueMapTy::iterator I = AllocaForValue.find(V);
+ if (I != AllocaForValue.end())
+ return I->second;
+ // Store 0 while we're calculating alloca for value V to avoid
+ // infinite recursion if the value references itself.
+ AllocaForValue[V] = nullptr;
+ AllocaInst *Res = nullptr;
+ if (CastInst *CI = dyn_cast<CastInst>(V))
+ Res = findAllocaForValue(CI->getOperand(0), AllocaForValue);
+ else if (PHINode *PN = dyn_cast<PHINode>(V)) {
+ for (Value *IncValue : PN->incoming_values()) {
+ // Allow self-referencing phi-nodes.
+ if (IncValue == PN)
+ continue;
+ AllocaInst *IncValueAI = findAllocaForValue(IncValue, AllocaForValue);
+ // AI for incoming values should exist and should all be equal.
+ if (IncValueAI == nullptr || (Res != nullptr && IncValueAI != Res))
+ return nullptr;
+ Res = IncValueAI;
+ }
+ } else if (GetElementPtrInst *EP = dyn_cast<GetElementPtrInst>(V)) {
+ Res = findAllocaForValue(EP->getPointerOperand(), AllocaForValue);
+ } else {
+ LLVM_DEBUG(dbgs() << "Alloca search cancelled on unknown instruction: "
+ << *V << "\n");
+ }
+ if (Res)
+ AllocaForValue[V] = Res;
+ return Res;
+}
diff --git a/lib/Transforms/Utils/LoopRotationUtils.cpp b/lib/Transforms/Utils/LoopRotationUtils.cpp
index 41f14a834617..37389a695b45 100644
--- a/lib/Transforms/Utils/LoopRotationUtils.cpp
+++ b/lib/Transforms/Utils/LoopRotationUtils.cpp
@@ -1,9 +1,8 @@
//===----------------- LoopRotationUtils.cpp -----------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -17,6 +16,7 @@
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopPass.h"
@@ -28,7 +28,6 @@
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/DebugInfoMetadata.h"
-#include "llvm/IR/DomTreeUpdater.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -296,7 +295,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// Begin by walking OrigHeader and populating ValueMap with an entry for
// each Instruction.
BasicBlock::iterator I = OrigHeader->begin(), E = OrigHeader->end();
- ValueToValueMapTy ValueMap;
+ ValueToValueMapTy ValueMap, ValueMapMSSA;
// For PHI nodes, the value available in OldPreHeader is just the
// incoming value from OldPreHeader.
@@ -375,6 +374,9 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
if (auto *II = dyn_cast<IntrinsicInst>(C))
if (II->getIntrinsicID() == Intrinsic::assume)
AC->registerAssumption(II);
+ // MemorySSA cares whether the cloned instruction was inserted or not, and
+ // not whether it can be remapped to a simplified value.
+ ValueMapMSSA[Inst] = C;
}
}
@@ -392,10 +394,11 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
LoopEntryBranch->eraseFromParent();
// Update MemorySSA before the rewrite call below changes the 1:1
- // instruction:cloned_instruction_or_value mapping in ValueMap.
+ // instruction:cloned_instruction_or_value mapping.
if (MSSAU) {
- ValueMap[OrigHeader] = OrigPreheader;
- MSSAU->updateForClonedBlockIntoPred(OrigHeader, OrigPreheader, ValueMap);
+ ValueMapMSSA[OrigHeader] = OrigPreheader;
+ MSSAU->updateForClonedBlockIntoPred(OrigHeader, OrigPreheader,
+ ValueMapMSSA);
}
SmallVector<PHINode*, 2> InsertedPHIs;
@@ -463,9 +466,8 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
for (BasicBlock *ExitPred : ExitPreds) {
// We only need to split loop exit edges.
Loop *PredLoop = LI->getLoopFor(ExitPred);
- if (!PredLoop || PredLoop->contains(Exit))
- continue;
- if (isa<IndirectBrInst>(ExitPred->getTerminator()))
+ if (!PredLoop || PredLoop->contains(Exit) ||
+ ExitPred->getTerminator()->isIndirectTerminator())
continue;
SplitLatchEdge |= L->getLoopLatch() == ExitPred;
BasicBlock *ExitSplit = SplitCriticalEdge(
diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp
index 380f4fca54d9..7e6da02d5707 100644
--- a/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/lib/Transforms/Utils/LoopSimplify.cpp
@@ -1,9 +1,8 @@
//===- LoopSimplify.cpp - Loop Canonicalization Pass ----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -28,6 +27,9 @@
// to transform the loop and make these guarantees. Client code should check
// that these conditions are true before relying on them.
//
+// Similar complications arise from callbr instructions, particularly in
+// asm-goto where blockaddress expressions are used.
+//
// Note that the simplifycfg pass will clean up blocks which are split out but
// end up being unnecessary, so usage of this pass should not pessimize
// generated code.
@@ -46,13 +48,15 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/DependenceAnalysis.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@@ -67,6 +71,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
using namespace llvm;
@@ -115,7 +120,8 @@ static void placeSplitBlockCarefully(BasicBlock *NewBB,
/// preheader insertion and analysis updating.
///
BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, DominatorTree *DT,
- LoopInfo *LI, bool PreserveLCSSA) {
+ LoopInfo *LI, MemorySSAUpdater *MSSAU,
+ bool PreserveLCSSA) {
BasicBlock *Header = L->getHeader();
// Compute the set of predecessors of the loop that are not in the loop.
@@ -124,10 +130,11 @@ BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, DominatorTree *DT,
PI != PE; ++PI) {
BasicBlock *P = *PI;
if (!L->contains(P)) { // Coming in from outside the loop?
- // If the loop is branched to from an indirect branch, we won't
+ // If the loop is branched to from an indirect terminator, we won't
// be able to fully transform the loop, because it prohibits
// edge splitting.
- if (isa<IndirectBrInst>(P->getTerminator())) return nullptr;
+ if (P->getTerminator()->isIndirectTerminator())
+ return nullptr;
// Keep track of it.
OutsideBlocks.push_back(P);
@@ -137,7 +144,7 @@ BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, DominatorTree *DT,
// Split out the loop pre-header.
BasicBlock *PreheaderBB;
PreheaderBB = SplitBlockPredecessors(Header, OutsideBlocks, ".preheader", DT,
- LI, nullptr, PreserveLCSSA);
+ LI, MSSAU, PreserveLCSSA);
if (!PreheaderBB)
return nullptr;
@@ -217,7 +224,7 @@ static PHINode *findPHIToPartitionLoops(Loop *L, DominatorTree *DT,
static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,
DominatorTree *DT, LoopInfo *LI,
ScalarEvolution *SE, bool PreserveLCSSA,
- AssumptionCache *AC) {
+ AssumptionCache *AC, MemorySSAUpdater *MSSAU) {
// Don't try to separate loops without a preheader.
if (!Preheader)
return nullptr;
@@ -236,8 +243,8 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
if (PN->getIncomingValue(i) != PN ||
!L->contains(PN->getIncomingBlock(i))) {
- // We can't split indirectbr edges.
- if (isa<IndirectBrInst>(PN->getIncomingBlock(i)->getTerminator()))
+ // We can't split indirect control flow edges.
+ if (PN->getIncomingBlock(i)->getTerminator()->isIndirectTerminator())
return nullptr;
OuterLoopPreds.push_back(PN->getIncomingBlock(i));
}
@@ -251,7 +258,7 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,
SE->forgetLoop(L);
BasicBlock *NewBB = SplitBlockPredecessors(Header, OuterLoopPreds, ".outer",
- DT, LI, nullptr, PreserveLCSSA);
+ DT, LI, MSSAU, PreserveLCSSA);
// Make sure that NewBB is put someplace intelligent, which doesn't mess up
// code layout too horribly.
@@ -314,7 +321,7 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,
// Split edges to exit blocks from the inner loop, if they emerged in the
// process of separating the outer one.
- formDedicatedExitBlocks(L, DT, LI, PreserveLCSSA);
+ formDedicatedExitBlocks(L, DT, LI, MSSAU, PreserveLCSSA);
if (PreserveLCSSA) {
// Fix LCSSA form for L. Some values, which previously were only used inside
@@ -339,7 +346,8 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,
/// and have that block branch to the loop header. This ensures that loops
/// have exactly one backedge.
static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
- DominatorTree *DT, LoopInfo *LI) {
+ DominatorTree *DT, LoopInfo *LI,
+ MemorySSAUpdater *MSSAU) {
assert(L->getNumBackEdges() > 1 && "Must have > 1 backedge!");
// Get information about the loop
@@ -358,8 +366,8 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
for (pred_iterator I = pred_begin(Header), E = pred_end(Header); I != E; ++I){
BasicBlock *P = *I;
- // Indirectbr edges cannot be split, so we must fail if we find one.
- if (isa<IndirectBrInst>(P->getTerminator()))
+ // Indirect edges cannot be split, so we must fail if we find one.
+ if (P->getTerminator()->isIndirectTerminator())
return nullptr;
if (P != Preheader) BackedgeBlocks.push_back(P);
@@ -439,9 +447,7 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
if (!LoopMD)
LoopMD = TI->getMetadata(LoopMDKind);
TI->setMetadata(LoopMDKind, nullptr);
- for (unsigned Op = 0, e = TI->getNumSuccessors(); Op != e; ++Op)
- if (TI->getSuccessor(Op) == Header)
- TI->setSuccessor(Op, BEBlock);
+ TI->replaceSuccessorWith(Header, BEBlock);
}
BEBlock->getTerminator()->setMetadata(LoopMDKind, LoopMD);
@@ -454,6 +460,10 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
// Update dominator information
DT->splitBlock(BEBlock);
+ if (MSSAU)
+ MSSAU->updatePhisWhenInsertingUniqueBackedgeBlock(Header, Preheader,
+ BEBlock);
+
return BEBlock;
}
@@ -461,8 +471,11 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
static bool simplifyOneLoop(Loop *L, SmallVectorImpl<Loop *> &Worklist,
DominatorTree *DT, LoopInfo *LI,
ScalarEvolution *SE, AssumptionCache *AC,
- bool PreserveLCSSA) {
+ MemorySSAUpdater *MSSAU, bool PreserveLCSSA) {
bool Changed = false;
+ if (MSSAU && VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
+
ReprocessLoop:
// Check to see that no blocks (other than the header) in this loop have
@@ -489,11 +502,15 @@ ReprocessLoop:
// Zap the dead pred's terminator and replace it with unreachable.
Instruction *TI = P->getTerminator();
- changeToUnreachable(TI, /*UseLLVMTrap=*/false, PreserveLCSSA);
+ changeToUnreachable(TI, /*UseLLVMTrap=*/false, PreserveLCSSA,
+ /*DTU=*/nullptr, MSSAU);
Changed = true;
}
}
+ if (MSSAU && VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
+
// If there are exiting blocks with branches on undef, resolve the undef in
// the direction which will exit the loop. This will help simplify loop
// trip count computations.
@@ -518,7 +535,7 @@ ReprocessLoop:
// Does the loop already have a preheader? If so, don't insert one.
BasicBlock *Preheader = L->getLoopPreheader();
if (!Preheader) {
- Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA);
+ Preheader = InsertPreheaderForLoop(L, DT, LI, MSSAU, PreserveLCSSA);
if (Preheader)
Changed = true;
}
@@ -527,9 +544,12 @@ ReprocessLoop:
// predecessors that are inside of the loop. This check guarantees that the
// loop preheader/header will dominate the exit blocks. If the exit block has
// predecessors from outside of the loop, split the edge now.
- if (formDedicatedExitBlocks(L, DT, LI, PreserveLCSSA))
+ if (formDedicatedExitBlocks(L, DT, LI, MSSAU, PreserveLCSSA))
Changed = true;
+ if (MSSAU && VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
+
// If the header has more than two predecessors at this point (from the
// preheader and from multiple backedges), we must adjust the loop.
BasicBlock *LoopLatch = L->getLoopLatch();
@@ -538,8 +558,8 @@ ReprocessLoop:
// this for loops with a giant number of backedges, just factor them into a
// common backedge instead.
if (L->getNumBackEdges() < 8) {
- if (Loop *OuterL =
- separateNestedLoop(L, Preheader, DT, LI, SE, PreserveLCSSA, AC)) {
+ if (Loop *OuterL = separateNestedLoop(L, Preheader, DT, LI, SE,
+ PreserveLCSSA, AC, MSSAU)) {
++NumNested;
// Enqueue the outer loop as it should be processed next in our
// depth-first nest walk.
@@ -556,11 +576,14 @@ ReprocessLoop:
// If we either couldn't, or didn't want to, identify nesting of the loops,
// insert a new block that all backedges target, then make it jump to the
// loop header.
- LoopLatch = insertUniqueBackedgeBlock(L, Preheader, DT, LI);
+ LoopLatch = insertUniqueBackedgeBlock(L, Preheader, DT, LI, MSSAU);
if (LoopLatch)
Changed = true;
}
+ if (MSSAU && VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
+
const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
// Scan over the PHI nodes in the loop header. Since they now have only two
@@ -618,9 +641,9 @@ ReprocessLoop:
Instruction *Inst = &*I++;
if (Inst == CI)
continue;
- if (!L->makeLoopInvariant(Inst, AnyInvariant,
- Preheader ? Preheader->getTerminator()
- : nullptr)) {
+ if (!L->makeLoopInvariant(
+ Inst, AnyInvariant,
+ Preheader ? Preheader->getTerminator() : nullptr, MSSAU)) {
AllInvariant = false;
break;
}
@@ -637,7 +660,7 @@ ReprocessLoop:
// The block has now been cleared of all instructions except for
// a comparison and a conditional branch. SimplifyCFG may be able
// to fold it now.
- if (!FoldBranchToCommonDest(BI))
+ if (!FoldBranchToCommonDest(BI, MSSAU))
continue;
// Success. The block is now dead, so remove it from the loop,
@@ -657,11 +680,16 @@ ReprocessLoop:
DT->changeImmediateDominator(Child, Node->getIDom());
}
DT->eraseNode(ExitingBlock);
+ if (MSSAU) {
+ SmallSetVector<BasicBlock *, 8> ExitBlockSet;
+ ExitBlockSet.insert(ExitingBlock);
+ MSSAU->removeBlocks(ExitBlockSet);
+ }
BI->getSuccessor(0)->removePredecessor(
- ExitingBlock, /* DontDeleteUselessPHIs */ PreserveLCSSA);
+ ExitingBlock, /* KeepOneInputPHIs */ PreserveLCSSA);
BI->getSuccessor(1)->removePredecessor(
- ExitingBlock, /* DontDeleteUselessPHIs */ PreserveLCSSA);
+ ExitingBlock, /* KeepOneInputPHIs */ PreserveLCSSA);
ExitingBlock->eraseFromParent();
}
}
@@ -672,12 +700,15 @@ ReprocessLoop:
if (Changed && SE)
SE->forgetTopmostLoop(L);
+ if (MSSAU && VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
+
return Changed;
}
bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI,
ScalarEvolution *SE, AssumptionCache *AC,
- bool PreserveLCSSA) {
+ MemorySSAUpdater *MSSAU, bool PreserveLCSSA) {
bool Changed = false;
#ifndef NDEBUG
@@ -705,7 +736,7 @@ bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI,
while (!Worklist.empty())
Changed |= simplifyOneLoop(Worklist.pop_back_val(), Worklist, DT, LI, SE,
- AC, PreserveLCSSA);
+ AC, MSSAU, PreserveLCSSA);
return Changed;
}
@@ -737,6 +768,9 @@ namespace {
AU.addPreservedID(LCSSAID);
AU.addPreserved<DependenceAnalysisWrapperPass>();
AU.addPreservedID(BreakCriticalEdgesID); // No critical edges added.
+ AU.addPreserved<BranchProbabilityInfoWrapperPass>();
+ if (EnableMSSALoopDependency)
+ AU.addPreserved<MemorySSAWrapperPass>();
}
/// verifyAnalysis() - Verify LoopSimplifyForm's guarantees.
@@ -768,12 +802,21 @@ bool LoopSimplify::runOnFunction(Function &F) {
ScalarEvolution *SE = SEWP ? &SEWP->getSE() : nullptr;
AssumptionCache *AC =
&getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+ MemorySSA *MSSA = nullptr;
+ std::unique_ptr<MemorySSAUpdater> MSSAU;
+ if (EnableMSSALoopDependency) {
+ auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
+ if (MSSAAnalysis) {
+ MSSA = &MSSAAnalysis->getMSSA();
+ MSSAU = make_unique<MemorySSAUpdater>(MSSA);
+ }
+ }
bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
// Simplify each loop nest in the function.
for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
- Changed |= simplifyLoop(*I, DT, LI, SE, AC, PreserveLCSSA);
+ Changed |= simplifyLoop(*I, DT, LI, SE, AC, MSSAU.get(), PreserveLCSSA);
#ifndef NDEBUG
if (PreserveLCSSA) {
@@ -794,9 +837,10 @@ PreservedAnalyses LoopSimplifyPass::run(Function &F,
AssumptionCache *AC = &AM.getResult<AssumptionAnalysis>(F);
// Note that we don't preserve LCSSA in the new PM, if you need it run LCSSA
- // after simplifying the loops.
+ // after simplifying the loops. MemorySSA is not preserved either.
for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
- Changed |= simplifyLoop(*I, DT, LI, SE, AC, /*PreserveLCSSA*/ false);
+ Changed |=
+ simplifyLoop(*I, DT, LI, SE, AC, nullptr, /*PreserveLCSSA*/ false);
if (!Changed)
return PreservedAnalyses::all();
@@ -809,6 +853,12 @@ PreservedAnalyses LoopSimplifyPass::run(Function &F,
PA.preserve<SCEVAA>();
PA.preserve<ScalarEvolutionAnalysis>();
PA.preserve<DependenceAnalysis>();
+ // BPI maps conditional terminators to probabilities, LoopSimplify can insert
+ // blocks, but it does so only by splitting existing blocks and edges. This
+ // results in the interesting property that all new terminators inserted are
+ // unconditional branches which do not appear in BPI. All deletions are
+ // handled via ValueHandle callbacks w/in BPI.
+ PA.preserve<BranchProbabilityAnalysis>();
return PA;
}
diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp
index da7ed2bd1652..e39ade523714 100644
--- a/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/lib/Transforms/Utils/LoopUnroll.cpp
@@ -1,9 +1,8 @@
//===-- UnrollLoop.cpp - Loop unrolling utilities -------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -45,6 +44,8 @@ using namespace llvm;
// TODO: Should these be here or in LoopUnroll?
STATISTIC(NumCompletelyUnrolled, "Number of loops completely unrolled");
STATISTIC(NumUnrolled, "Number of loops unrolled (completely or otherwise)");
+STATISTIC(NumUnrolledWithHeader, "Number of loops unrolled without a "
+ "conditional latch (completely or otherwise)");
static cl::opt<bool>
UnrollRuntimeEpilog("unroll-runtime-epilog", cl::init(false), cl::Hidden,
@@ -94,66 +95,6 @@ void llvm::remapInstruction(Instruction *I, ValueToValueMapTy &VMap) {
}
}
-/// Folds a basic block into its predecessor if it only has one predecessor, and
-/// that predecessor only has one successor.
-/// The LoopInfo Analysis that is passed will be kept consistent.
-BasicBlock *llvm::foldBlockIntoPredecessor(BasicBlock *BB, LoopInfo *LI,
- ScalarEvolution *SE,
- DominatorTree *DT) {
- // Merge basic blocks into their predecessor if there is only one distinct
- // pred, and if there is only one distinct successor of the predecessor, and
- // if there are no PHI nodes.
- BasicBlock *OnlyPred = BB->getSinglePredecessor();
- if (!OnlyPred) return nullptr;
-
- if (OnlyPred->getTerminator()->getNumSuccessors() != 1)
- return nullptr;
-
- LLVM_DEBUG(dbgs() << "Merging: " << BB->getName() << " into "
- << OnlyPred->getName() << "\n");
-
- // Resolve any PHI nodes at the start of the block. They are all
- // guaranteed to have exactly one entry if they exist, unless there are
- // multiple duplicate (but guaranteed to be equal) entries for the
- // incoming edges. This occurs when there are multiple edges from
- // OnlyPred to OnlySucc.
- FoldSingleEntryPHINodes(BB);
-
- // Delete the unconditional branch from the predecessor...
- OnlyPred->getInstList().pop_back();
-
- // Make all PHI nodes that referred to BB now refer to Pred as their
- // source...
- BB->replaceAllUsesWith(OnlyPred);
-
- // Move all definitions in the successor to the predecessor...
- OnlyPred->getInstList().splice(OnlyPred->end(), BB->getInstList());
-
- // OldName will be valid until erased.
- StringRef OldName = BB->getName();
-
- // Erase the old block and update dominator info.
- if (DT)
- if (DomTreeNode *DTN = DT->getNode(BB)) {
- DomTreeNode *PredDTN = DT->getNode(OnlyPred);
- SmallVector<DomTreeNode *, 8> Children(DTN->begin(), DTN->end());
- for (auto *DI : Children)
- DT->changeImmediateDominator(DI, PredDTN);
-
- DT->eraseNode(BB);
- }
-
- LI->removeBlock(BB);
-
- // Inherit predecessor's name if it exists...
- if (!OldName.empty() && !OnlyPred->hasName())
- OnlyPred->setName(OldName);
-
- BB->eraseFromParent();
-
- return OnlyPred;
-}
-
/// Check if unrolling created a situation where we need to insert phi nodes to
/// preserve LCSSA form.
/// \param Blocks is a vector of basic blocks representing unrolled loop.
@@ -332,12 +273,11 @@ void llvm::simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI,
///
/// If RemainderLoop is non-null, it will receive the remainder loop (if
/// required and not fully unrolled).
-LoopUnrollResult llvm::UnrollLoop(
- Loop *L, unsigned Count, unsigned TripCount, bool Force, bool AllowRuntime,
- bool AllowExpensiveTripCount, bool PreserveCondBr, bool PreserveOnlyFirst,
- unsigned TripMultiple, unsigned PeelCount, bool UnrollRemainder,
- LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC,
- OptimizationRemarkEmitter *ORE, bool PreserveLCSSA, Loop **RemainderLoop) {
+LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
+ ScalarEvolution *SE, DominatorTree *DT,
+ AssumptionCache *AC,
+ OptimizationRemarkEmitter *ORE,
+ bool PreserveLCSSA, Loop **RemainderLoop) {
BasicBlock *Preheader = L->getLoopPreheader();
if (!Preheader) {
@@ -357,28 +297,46 @@ LoopUnrollResult llvm::UnrollLoop(
return LoopUnrollResult::Unmodified;
}
- // The current loop unroll pass can only unroll loops with a single latch
+ // The current loop unroll pass can unroll loops with a single latch or header
// that's a conditional branch exiting the loop.
// FIXME: The implementation can be extended to work with more complicated
// cases, e.g. loops with multiple latches.
BasicBlock *Header = L->getHeader();
+ BranchInst *HeaderBI = dyn_cast<BranchInst>(Header->getTerminator());
BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator());
- if (!BI || BI->isUnconditional()) {
- // The loop-rotate pass can be helpful to avoid this in many cases.
+ // FIXME: Support loops without conditional latch and multiple exiting blocks.
+ if (!BI ||
+ (BI->isUnconditional() && (!HeaderBI || HeaderBI->isUnconditional() ||
+ L->getExitingBlock() != Header))) {
+ LLVM_DEBUG(dbgs() << " Can't unroll; loop not terminated by a conditional "
+ "branch in the latch or header.\n");
+ return LoopUnrollResult::Unmodified;
+ }
+
+ auto CheckLatchSuccessors = [&](unsigned S1, unsigned S2) {
+ return BI->isConditional() && BI->getSuccessor(S1) == Header &&
+ !L->contains(BI->getSuccessor(S2));
+ };
+
+ // If we have a conditional latch, it must exit the loop.
+ if (BI && BI->isConditional() && !CheckLatchSuccessors(0, 1) &&
+ !CheckLatchSuccessors(1, 0)) {
LLVM_DEBUG(
- dbgs()
- << " Can't unroll; loop not terminated by a conditional branch.\n");
+ dbgs() << "Can't unroll; a conditional latch must exit the loop");
return LoopUnrollResult::Unmodified;
}
- auto CheckSuccessors = [&](unsigned S1, unsigned S2) {
- return BI->getSuccessor(S1) == Header && !L->contains(BI->getSuccessor(S2));
+ auto CheckHeaderSuccessors = [&](unsigned S1, unsigned S2) {
+ return HeaderBI && HeaderBI->isConditional() &&
+ L->contains(HeaderBI->getSuccessor(S1)) &&
+ !L->contains(HeaderBI->getSuccessor(S2));
};
- if (!CheckSuccessors(0, 1) && !CheckSuccessors(1, 0)) {
- LLVM_DEBUG(dbgs() << "Can't unroll; only loops with one conditional latch"
- " exiting the loop can be unrolled\n");
+ // If we do not have a conditional latch, the header must exit the loop.
+ if (BI && !BI->isConditional() && HeaderBI && HeaderBI->isConditional() &&
+ !CheckHeaderSuccessors(0, 1) && !CheckHeaderSuccessors(1, 0)) {
+ LLVM_DEBUG(dbgs() << "Can't unroll; conditional header must exit the loop");
return LoopUnrollResult::Unmodified;
}
@@ -389,28 +347,28 @@ LoopUnrollResult llvm::UnrollLoop(
return LoopUnrollResult::Unmodified;
}
- if (TripCount != 0)
- LLVM_DEBUG(dbgs() << " Trip Count = " << TripCount << "\n");
- if (TripMultiple != 1)
- LLVM_DEBUG(dbgs() << " Trip Multiple = " << TripMultiple << "\n");
+ if (ULO.TripCount != 0)
+ LLVM_DEBUG(dbgs() << " Trip Count = " << ULO.TripCount << "\n");
+ if (ULO.TripMultiple != 1)
+ LLVM_DEBUG(dbgs() << " Trip Multiple = " << ULO.TripMultiple << "\n");
// Effectively "DCE" unrolled iterations that are beyond the tripcount
// and will never be executed.
- if (TripCount != 0 && Count > TripCount)
- Count = TripCount;
+ if (ULO.TripCount != 0 && ULO.Count > ULO.TripCount)
+ ULO.Count = ULO.TripCount;
// Don't enter the unroll code if there is nothing to do.
- if (TripCount == 0 && Count < 2 && PeelCount == 0) {
+ if (ULO.TripCount == 0 && ULO.Count < 2 && ULO.PeelCount == 0) {
LLVM_DEBUG(dbgs() << "Won't unroll; almost nothing to do\n");
return LoopUnrollResult::Unmodified;
}
- assert(Count > 0);
- assert(TripMultiple > 0);
- assert(TripCount == 0 || TripCount % TripMultiple == 0);
+ assert(ULO.Count > 0);
+ assert(ULO.TripMultiple > 0);
+ assert(ULO.TripCount == 0 || ULO.TripCount % ULO.TripMultiple == 0);
// Are we eliminating the loop control altogether?
- bool CompletelyUnroll = Count == TripCount;
+ bool CompletelyUnroll = ULO.Count == ULO.TripCount;
SmallVector<BasicBlock *, 4> ExitBlocks;
L->getExitBlocks(ExitBlocks);
std::vector<BasicBlock*> OriginalLoopBlocks = L->getBlocks();
@@ -429,24 +387,29 @@ LoopUnrollResult llvm::UnrollLoop(
// We assume a run-time trip count if the compiler cannot
// figure out the loop trip count and the unroll-runtime
// flag is specified.
- bool RuntimeTripCount = (TripCount == 0 && Count > 0 && AllowRuntime);
+ bool RuntimeTripCount =
+ (ULO.TripCount == 0 && ULO.Count > 0 && ULO.AllowRuntime);
- assert((!RuntimeTripCount || !PeelCount) &&
+ assert((!RuntimeTripCount || !ULO.PeelCount) &&
"Did not expect runtime trip-count unrolling "
"and peeling for the same loop");
bool Peeled = false;
- if (PeelCount) {
- Peeled = peelLoop(L, PeelCount, LI, SE, DT, AC, PreserveLCSSA);
+ if (ULO.PeelCount) {
+ Peeled = peelLoop(L, ULO.PeelCount, LI, SE, DT, AC, PreserveLCSSA);
// Successful peeling may result in a change in the loop preheader/trip
// counts. If we later unroll the loop, we want these to be updated.
if (Peeled) {
- BasicBlock *ExitingBlock = L->getExitingBlock();
+ // According to our guards and profitability checks the only
+ // meaningful exit should be latch block. Other exits go to deopt,
+ // so we do not worry about them.
+ BasicBlock *ExitingBlock = L->getLoopLatch();
assert(ExitingBlock && "Loop without exiting block?");
+ assert(L->isLoopExiting(ExitingBlock) && "Latch is not exiting?");
Preheader = L->getLoopPreheader();
- TripCount = SE->getSmallConstantTripCount(L, ExitingBlock);
- TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock);
+ ULO.TripCount = SE->getSmallConstantTripCount(L, ExitingBlock);
+ ULO.TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock);
}
}
@@ -459,7 +422,7 @@ LoopUnrollResult llvm::UnrollLoop(
for (auto &I : *BB)
if (auto CS = CallSite(&I))
HasConvergent |= CS.isConvergent();
- assert((!HasConvergent || TripMultiple % Count == 0) &&
+ assert((!HasConvergent || ULO.TripMultiple % ULO.Count == 0) &&
"Unroll count must divide trip multiple if loop contains a "
"convergent operation.");
});
@@ -468,11 +431,12 @@ LoopUnrollResult llvm::UnrollLoop(
UnrollRuntimeEpilog.getNumOccurrences() ? UnrollRuntimeEpilog
: isEpilogProfitable(L);
- if (RuntimeTripCount && TripMultiple % Count != 0 &&
- !UnrollRuntimeLoopRemainder(L, Count, AllowExpensiveTripCount,
- EpilogProfitability, UnrollRemainder, LI, SE,
- DT, AC, PreserveLCSSA, RemainderLoop)) {
- if (Force)
+ if (RuntimeTripCount && ULO.TripMultiple % ULO.Count != 0 &&
+ !UnrollRuntimeLoopRemainder(L, ULO.Count, ULO.AllowExpensiveTripCount,
+ EpilogProfitability, ULO.UnrollRemainder,
+ ULO.ForgetAllSCEV, LI, SE, DT, AC,
+ PreserveLCSSA, RemainderLoop)) {
+ if (ULO.Force)
RuntimeTripCount = false;
else {
LLVM_DEBUG(dbgs() << "Won't unroll; remainder loop could not be "
@@ -483,35 +447,35 @@ LoopUnrollResult llvm::UnrollLoop(
// If we know the trip count, we know the multiple...
unsigned BreakoutTrip = 0;
- if (TripCount != 0) {
- BreakoutTrip = TripCount % Count;
- TripMultiple = 0;
+ if (ULO.TripCount != 0) {
+ BreakoutTrip = ULO.TripCount % ULO.Count;
+ ULO.TripMultiple = 0;
} else {
// Figure out what multiple to use.
- BreakoutTrip = TripMultiple =
- (unsigned)GreatestCommonDivisor64(Count, TripMultiple);
+ BreakoutTrip = ULO.TripMultiple =
+ (unsigned)GreatestCommonDivisor64(ULO.Count, ULO.TripMultiple);
}
using namespace ore;
// Report the unrolling decision.
if (CompletelyUnroll) {
LLVM_DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName()
- << " with trip count " << TripCount << "!\n");
+ << " with trip count " << ULO.TripCount << "!\n");
if (ORE)
ORE->emit([&]() {
return OptimizationRemark(DEBUG_TYPE, "FullyUnrolled", L->getStartLoc(),
L->getHeader())
<< "completely unrolled loop with "
- << NV("UnrollCount", TripCount) << " iterations";
+ << NV("UnrollCount", ULO.TripCount) << " iterations";
});
- } else if (PeelCount) {
+ } else if (ULO.PeelCount) {
LLVM_DEBUG(dbgs() << "PEELING loop %" << Header->getName()
- << " with iteration count " << PeelCount << "!\n");
+ << " with iteration count " << ULO.PeelCount << "!\n");
if (ORE)
ORE->emit([&]() {
return OptimizationRemark(DEBUG_TYPE, "Peeled", L->getStartLoc(),
L->getHeader())
- << " peeled loop by " << NV("PeelCount", PeelCount)
+ << " peeled loop by " << NV("PeelCount", ULO.PeelCount)
<< " iterations";
});
} else {
@@ -519,24 +483,25 @@ LoopUnrollResult llvm::UnrollLoop(
OptimizationRemark Diag(DEBUG_TYPE, "PartialUnrolled", L->getStartLoc(),
L->getHeader());
return Diag << "unrolled loop by a factor of "
- << NV("UnrollCount", Count);
+ << NV("UnrollCount", ULO.Count);
};
LLVM_DEBUG(dbgs() << "UNROLLING loop %" << Header->getName() << " by "
- << Count);
- if (TripMultiple == 0 || BreakoutTrip != TripMultiple) {
+ << ULO.Count);
+ if (ULO.TripMultiple == 0 || BreakoutTrip != ULO.TripMultiple) {
LLVM_DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip);
if (ORE)
ORE->emit([&]() {
return DiagBuilder() << " with a breakout at trip "
<< NV("BreakoutTrip", BreakoutTrip);
});
- } else if (TripMultiple != 1) {
- LLVM_DEBUG(dbgs() << " with " << TripMultiple << " trips per branch");
+ } else if (ULO.TripMultiple != 1) {
+ LLVM_DEBUG(dbgs() << " with " << ULO.TripMultiple << " trips per branch");
if (ORE)
ORE->emit([&]() {
- return DiagBuilder() << " with " << NV("TripMultiple", TripMultiple)
- << " trips per branch";
+ return DiagBuilder()
+ << " with " << NV("TripMultiple", ULO.TripMultiple)
+ << " trips per branch";
});
} else if (RuntimeTripCount) {
LLVM_DEBUG(dbgs() << " with run-time trip count");
@@ -555,11 +520,24 @@ LoopUnrollResult llvm::UnrollLoop(
// and if something changes inside them then any of outer loops may also
// change. When we forget outermost loop, we also forget all contained loops
// and this is what we need here.
- if (SE)
- SE->forgetTopmostLoop(L);
+ if (SE) {
+ if (ULO.ForgetAllSCEV)
+ SE->forgetAllLoops();
+ else
+ SE->forgetTopmostLoop(L);
+ }
- bool ContinueOnTrue = L->contains(BI->getSuccessor(0));
- BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue);
+ bool ContinueOnTrue;
+ bool LatchIsExiting = BI->isConditional();
+ BasicBlock *LoopExit = nullptr;
+ if (LatchIsExiting) {
+ ContinueOnTrue = L->contains(BI->getSuccessor(0));
+ LoopExit = BI->getSuccessor(ContinueOnTrue);
+ } else {
+ NumUnrolledWithHeader++;
+ ContinueOnTrue = L->contains(HeaderBI->getSuccessor(0));
+ LoopExit = HeaderBI->getSuccessor(ContinueOnTrue);
+ }
// For the first iteration of the loop, we should use the precloned values for
// PHI nodes. Insert associations now.
@@ -569,11 +547,23 @@ LoopUnrollResult llvm::UnrollLoop(
OrigPHINode.push_back(cast<PHINode>(I));
}
- std::vector<BasicBlock*> Headers;
- std::vector<BasicBlock*> Latches;
+ std::vector<BasicBlock *> Headers;
+ std::vector<BasicBlock *> HeaderSucc;
+ std::vector<BasicBlock *> Latches;
Headers.push_back(Header);
Latches.push_back(LatchBlock);
+ if (!LatchIsExiting) {
+ auto *Term = cast<BranchInst>(Header->getTerminator());
+ if (Term->isUnconditional() || L->contains(Term->getSuccessor(0))) {
+ assert(L->contains(Term->getSuccessor(0)));
+ HeaderSucc.push_back(Term->getSuccessor(0));
+ } else {
+ assert(L->contains(Term->getSuccessor(1)));
+ HeaderSucc.push_back(Term->getSuccessor(1));
+ }
+ }
+
// The current on-the-fly SSA update requires blocks to be processed in
// reverse postorder so that LastValueMap contains the correct value at each
// exit.
@@ -599,7 +589,7 @@ LoopUnrollResult llvm::UnrollLoop(
for (Instruction &I : *BB)
if (!isa<DbgInfoIntrinsic>(&I))
if (const DILocation *DIL = I.getDebugLoc()) {
- auto NewDIL = DIL->cloneWithDuplicationFactor(Count);
+ auto NewDIL = DIL->cloneByMultiplyingDuplicationFactor(ULO.Count);
if (NewDIL)
I.setDebugLoc(NewDIL.getValue());
else
@@ -608,7 +598,7 @@ LoopUnrollResult llvm::UnrollLoop(
<< DIL->getFilename() << " Line: " << DIL->getLine());
}
- for (unsigned It = 1; It != Count; ++It) {
+ for (unsigned It = 1; It != ULO.Count; ++It) {
std::vector<BasicBlock*> NewBlocks;
SmallDenseMap<const Loop *, Loop *, 4> NewLoops;
NewLoops[L] = L;
@@ -663,6 +653,13 @@ LoopUnrollResult llvm::UnrollLoop(
if (*BB == LatchBlock)
Latches.push_back(New);
+ // Keep track of the successor of the new header in the current iteration.
+ for (auto *Pred : predecessors(*BB))
+ if (Pred == Header) {
+ HeaderSucc.push_back(New);
+ break;
+ }
+
NewBlocks.push_back(New);
UnrolledLoopBlocks.push_back(New);
@@ -699,8 +696,7 @@ LoopUnrollResult llvm::UnrollLoop(
if (CompletelyUnroll) {
PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader));
Header->getInstList().erase(PN);
- }
- else if (Count > 1) {
+ } else if (ULO.Count > 1) {
Value *InVal = PN->removeIncomingValue(LatchBlock, false);
// If this value was defined in the loop, take the value defined by the
// last iteration of the loop.
@@ -713,39 +709,11 @@ LoopUnrollResult llvm::UnrollLoop(
}
}
- // Now that all the basic blocks for the unrolled iterations are in place,
- // set up the branches to connect them.
- for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
- // The original branch was replicated in each unrolled iteration.
- BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator());
-
- // The branch destination.
- unsigned j = (i + 1) % e;
- BasicBlock *Dest = Headers[j];
- bool NeedConditional = true;
-
- if (RuntimeTripCount && j != 0) {
- NeedConditional = false;
- }
-
- // For a complete unroll, make the last iteration end with a branch
- // to the exit block.
- if (CompletelyUnroll) {
- if (j == 0)
- Dest = LoopExit;
- // If using trip count upper bound to completely unroll, we need to keep
- // the conditional branch except the last one because the loop may exit
- // after any iteration.
- assert(NeedConditional &&
- "NeedCondition cannot be modified by both complete "
- "unrolling and runtime unrolling");
- NeedConditional = (PreserveCondBr && j && !(PreserveOnlyFirst && i != 0));
- } else if (j != BreakoutTrip && (TripMultiple == 0 || j % TripMultiple != 0)) {
- // If we know the trip count or a multiple of it, we can safely use an
- // unconditional branch for some iterations.
- NeedConditional = false;
- }
-
+ auto setDest = [LoopExit, ContinueOnTrue](BasicBlock *Src, BasicBlock *Dest,
+ ArrayRef<BasicBlock *> NextBlocks,
+ BasicBlock *CurrentHeader,
+ bool NeedConditional) {
+ auto *Term = cast<BranchInst>(Src->getTerminator());
if (NeedConditional) {
// Update the conditional branch's successor for the following
// iteration.
@@ -753,9 +721,9 @@ LoopUnrollResult llvm::UnrollLoop(
} else {
// Remove phi operands at this loop exit
if (Dest != LoopExit) {
- BasicBlock *BB = Latches[i];
- for (BasicBlock *Succ: successors(BB)) {
- if (Succ == Headers[i])
+ BasicBlock *BB = Src;
+ for (BasicBlock *Succ : successors(BB)) {
+ if (Succ == CurrentHeader)
continue;
for (PHINode &Phi : Succ->phis())
Phi.removeIncomingValue(BB, false);
@@ -765,13 +733,97 @@ LoopUnrollResult llvm::UnrollLoop(
BranchInst::Create(Dest, Term);
Term->eraseFromParent();
}
+ };
+
+ // Now that all the basic blocks for the unrolled iterations are in place,
+ // set up the branches to connect them.
+ if (LatchIsExiting) {
+ // Set up latches to branch to the new header in the unrolled iterations or
+ // the loop exit for the last latch in a fully unrolled loop.
+ for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
+ // The branch destination.
+ unsigned j = (i + 1) % e;
+ BasicBlock *Dest = Headers[j];
+ bool NeedConditional = true;
+
+ if (RuntimeTripCount && j != 0) {
+ NeedConditional = false;
+ }
+
+ // For a complete unroll, make the last iteration end with a branch
+ // to the exit block.
+ if (CompletelyUnroll) {
+ if (j == 0)
+ Dest = LoopExit;
+ // If using trip count upper bound to completely unroll, we need to keep
+ // the conditional branch except the last one because the loop may exit
+ // after any iteration.
+ assert(NeedConditional &&
+ "NeedCondition cannot be modified by both complete "
+ "unrolling and runtime unrolling");
+ NeedConditional =
+ (ULO.PreserveCondBr && j && !(ULO.PreserveOnlyFirst && i != 0));
+ } else if (j != BreakoutTrip &&
+ (ULO.TripMultiple == 0 || j % ULO.TripMultiple != 0)) {
+ // If we know the trip count or a multiple of it, we can safely use an
+ // unconditional branch for some iterations.
+ NeedConditional = false;
+ }
+
+ setDest(Latches[i], Dest, Headers, Headers[i], NeedConditional);
+ }
+ } else {
+ // Setup headers to branch to their new successors in the unrolled
+ // iterations.
+ for (unsigned i = 0, e = Headers.size(); i != e; ++i) {
+ // The branch destination.
+ unsigned j = (i + 1) % e;
+ BasicBlock *Dest = HeaderSucc[i];
+ bool NeedConditional = true;
+
+ if (RuntimeTripCount && j != 0)
+ NeedConditional = false;
+
+ if (CompletelyUnroll)
+ // We cannot drop the conditional branch for the last condition, as we
+ // may have to execute the loop body depending on the condition.
+ NeedConditional = j == 0 || ULO.PreserveCondBr;
+ else if (j != BreakoutTrip &&
+ (ULO.TripMultiple == 0 || j % ULO.TripMultiple != 0))
+ // If we know the trip count or a multiple of it, we can safely use an
+ // unconditional branch for some iterations.
+ NeedConditional = false;
+
+ setDest(Headers[i], Dest, Headers, Headers[i], NeedConditional);
+ }
+
+ // Set up latches to branch to the new header in the unrolled iterations or
+ // the loop exit for the last latch in a fully unrolled loop.
+
+ for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
+ // The original branch was replicated in each unrolled iteration.
+ BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator());
+
+ // The branch destination.
+ unsigned j = (i + 1) % e;
+ BasicBlock *Dest = Headers[j];
+
+ // When completely unrolling, the last latch becomes unreachable.
+ if (CompletelyUnroll && j == 0)
+ new UnreachableInst(Term->getContext(), Term);
+ else
+ // Replace the conditional branch with an unconditional one.
+ BranchInst::Create(Dest, Term);
+
+ Term->eraseFromParent();
+ }
}
// Update dominators of blocks we might reach through exits.
// Immediate dominator of such block might change, because we add more
// routes which can lead to the exit: we can now reach it from the copied
// iterations too.
- if (DT && Count > 1) {
+ if (DT && ULO.Count > 1) {
for (auto *BB : OriginalLoopBlocks) {
auto *BBDomNode = DT->getNode(BB);
SmallVector<BasicBlock *, 16> ChildrenToUpdate;
@@ -781,7 +833,9 @@ LoopUnrollResult llvm::UnrollLoop(
ChildrenToUpdate.push_back(ChildBB);
}
BasicBlock *NewIDom;
- if (BB == LatchBlock) {
+ BasicBlock *&TermBlock = LatchIsExiting ? LatchBlock : Header;
+ auto &TermBlocks = LatchIsExiting ? Latches : Headers;
+ if (BB == TermBlock) {
// The latch is special because we emit unconditional branches in
// some cases where the original loop contained a conditional branch.
// Since the latch is always at the bottom of the loop, if the latch
@@ -789,11 +843,13 @@ LoopUnrollResult llvm::UnrollLoop(
// must also be a latch. Specifically, the dominator is the first
// latch which ends in a conditional branch, or the last latch if
// there is no such latch.
- NewIDom = Latches.back();
- for (BasicBlock *IterLatch : Latches) {
- Instruction *Term = IterLatch->getTerminator();
+ // For loops exiting from the header, we limit the supported loops
+ // to have a single exiting block.
+ NewIDom = TermBlocks.back();
+ for (BasicBlock *Iter : TermBlocks) {
+ Instruction *Term = Iter->getTerminator();
if (isa<BranchInst>(Term) && cast<BranchInst>(Term)->isConditional()) {
- NewIDom = IterLatch;
+ NewIDom = Iter;
break;
}
}
@@ -810,14 +866,20 @@ LoopUnrollResult llvm::UnrollLoop(
}
assert(!DT || !UnrollVerifyDomtree ||
- DT->verify(DominatorTree::VerificationLevel::Fast));
+ DT->verify(DominatorTree::VerificationLevel::Fast));
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
// Merge adjacent basic blocks, if possible.
for (BasicBlock *Latch : Latches) {
- BranchInst *Term = cast<BranchInst>(Latch->getTerminator());
- if (Term->isUnconditional()) {
+ BranchInst *Term = dyn_cast<BranchInst>(Latch->getTerminator());
+ assert((Term ||
+ (CompletelyUnroll && !LatchIsExiting && Latch == Latches.back())) &&
+ "Need a branch as terminator, except when fully unrolling with "
+ "unconditional latch");
+ if (Term && Term->isUnconditional()) {
BasicBlock *Dest = Term->getSuccessor(0);
- if (BasicBlock *Fold = foldBlockIntoPredecessor(Dest, LI, SE, DT)) {
+ BasicBlock *Fold = Dest->getUniquePredecessor();
+ if (MergeBlockIntoPredecessor(Dest, &DTU, LI)) {
// Dest has been folded into Fold. Update our worklists accordingly.
std::replace(Latches.begin(), Latches.end(), Dest, Fold);
UnrolledLoopBlocks.erase(std::remove(UnrolledLoopBlocks.begin(),
@@ -829,8 +891,8 @@ LoopUnrollResult llvm::UnrollLoop(
// At this point, the code is well formed. We now simplify the unrolled loop,
// doing constant propagation and dead code elimination as we go.
- simplifyLoopAfterUnroll(L, !CompletelyUnroll && (Count > 1 || Peeled), LI, SE,
- DT, AC);
+ simplifyLoopAfterUnroll(L, !CompletelyUnroll && (ULO.Count > 1 || Peeled), LI,
+ SE, DT, AC);
NumCompletelyUnrolled += CompletelyUnroll;
++NumUnrolled;
@@ -878,11 +940,11 @@ LoopUnrollResult llvm::UnrollLoop(
// TODO: That potentially might be compile-time expensive. We should try
// to fix the loop-simplified form incrementally.
- simplifyLoop(OuterL, DT, LI, SE, AC, PreserveLCSSA);
+ simplifyLoop(OuterL, DT, LI, SE, AC, nullptr, PreserveLCSSA);
} else {
// Simplify loops for which we might've broken loop-simplify form.
for (Loop *SubLoop : LoopsToSimplify)
- simplifyLoop(SubLoop, DT, LI, SE, AC, PreserveLCSSA);
+ simplifyLoop(SubLoop, DT, LI, SE, AC, nullptr, PreserveLCSSA);
}
}
diff --git a/lib/Transforms/Utils/LoopUnrollAndJam.cpp b/lib/Transforms/Utils/LoopUnrollAndJam.cpp
index e26762639c13..ff49d83f25c5 100644
--- a/lib/Transforms/Utils/LoopUnrollAndJam.cpp
+++ b/lib/Transforms/Utils/LoopUnrollAndJam.cpp
@@ -1,9 +1,8 @@
//===-- LoopUnrollAndJam.cpp - Loop unrolling utilities -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -198,8 +197,8 @@ LoopUnrollResult llvm::UnrollAndJamLoop(
if (TripMultiple == 1 || TripMultiple % Count != 0) {
if (!UnrollRuntimeLoopRemainder(L, Count, /*AllowExpensiveTripCount*/ false,
/*UseEpilogRemainder*/ true,
- UnrollRemainder, LI, SE, DT, AC, true,
- EpilogueLoop)) {
+ UnrollRemainder, /*ForgetAllSCEV*/ false,
+ LI, SE, DT, AC, true, EpilogueLoop)) {
LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; remainder loop could not be "
"generated when assuming runtime trip count\n");
return LoopUnrollResult::Unmodified;
@@ -301,7 +300,7 @@ LoopUnrollResult llvm::UnrollAndJamLoop(
for (Instruction &I : *BB)
if (!isa<DbgInfoIntrinsic>(&I))
if (const DILocation *DIL = I.getDebugLoc()) {
- auto NewDIL = DIL->cloneWithDuplicationFactor(Count);
+ auto NewDIL = DIL->cloneByMultiplyingDuplicationFactor(Count);
if (NewDIL)
I.setDebugLoc(NewDIL.getValue());
else
@@ -539,12 +538,14 @@ LoopUnrollResult llvm::UnrollAndJamLoop(
MergeBlocks.insert(ForeBlocksLast.begin(), ForeBlocksLast.end());
MergeBlocks.insert(SubLoopBlocksLast.begin(), SubLoopBlocksLast.end());
MergeBlocks.insert(AftBlocksLast.begin(), AftBlocksLast.end());
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
while (!MergeBlocks.empty()) {
BasicBlock *BB = *MergeBlocks.begin();
BranchInst *Term = dyn_cast<BranchInst>(BB->getTerminator());
if (Term && Term->isUnconditional() && L->contains(Term->getSuccessor(0))) {
BasicBlock *Dest = Term->getSuccessor(0);
- if (BasicBlock *Fold = foldBlockIntoPredecessor(Dest, LI, SE, DT)) {
+ BasicBlock *Fold = Dest->getUniquePredecessor();
+ if (MergeBlockIntoPredecessor(Dest, &DTU, LI)) {
// Don't remove BB and add Fold as they are the same BB
assert(Fold == BB);
(void)Fold;
diff --git a/lib/Transforms/Utils/LoopUnrollPeel.cpp b/lib/Transforms/Utils/LoopUnrollPeel.cpp
index 151a285af4e9..005306cf1898 100644
--- a/lib/Transforms/Utils/LoopUnrollPeel.cpp
+++ b/lib/Transforms/Utils/LoopUnrollPeel.cpp
@@ -1,9 +1,8 @@
//===- UnrollLoopPeel.cpp - Loop peeling utilities ------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -62,6 +61,10 @@ static cl::opt<unsigned> UnrollForcePeelCount(
"unroll-force-peel-count", cl::init(0), cl::Hidden,
cl::desc("Force a peel count regardless of profiling information."));
+static cl::opt<bool> UnrollPeelMultiDeoptExit(
+ "unroll-peel-multi-deopt-exit", cl::init(false), cl::Hidden,
+ cl::desc("Allow peeling of loops with multiple deopt exits."));
+
// Designates that a Phi is estimated to become invariant after an "infinite"
// number of loop iterations (i.e. only may become an invariant if the loop is
// fully unrolled).
@@ -74,6 +77,22 @@ bool llvm::canPeel(Loop *L) {
if (!L->isLoopSimplifyForm())
return false;
+ if (UnrollPeelMultiDeoptExit) {
+ SmallVector<BasicBlock *, 4> Exits;
+ L->getUniqueNonLatchExitBlocks(Exits);
+
+ if (!Exits.empty()) {
+ // Latch's terminator is a conditional branch, Latch is exiting and
+ // all non Latch exits ends up with deoptimize.
+ const BasicBlock *Latch = L->getLoopLatch();
+ const BranchInst *T = dyn_cast<BranchInst>(Latch->getTerminator());
+ return T && T->isConditional() && L->isLoopExiting(Latch) &&
+ all_of(Exits, [](const BasicBlock *BB) {
+ return BB->getTerminatingDeoptimizeCall();
+ });
+ }
+ }
+
// Only peel loops that contain a single exit
if (!L->getExitingBlock() || !L->getUniqueExitBlock())
return false;
@@ -363,41 +382,89 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
static void updateBranchWeights(BasicBlock *Header, BranchInst *LatchBR,
unsigned IterNumber, unsigned AvgIters,
uint64_t &PeeledHeaderWeight) {
+ if (!PeeledHeaderWeight)
+ return;
// FIXME: Pick a more realistic distribution.
// Currently the proportion of weight we assign to the fall-through
// side of the branch drops linearly with the iteration number, and we use
// a 0.9 fudge factor to make the drop-off less sharp...
- if (PeeledHeaderWeight) {
- uint64_t FallThruWeight =
- PeeledHeaderWeight * ((float)(AvgIters - IterNumber) / AvgIters * 0.9);
- uint64_t ExitWeight = PeeledHeaderWeight - FallThruWeight;
- PeeledHeaderWeight -= ExitWeight;
-
- unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1);
- MDBuilder MDB(LatchBR->getContext());
- MDNode *WeightNode =
- HeaderIdx ? MDB.createBranchWeights(ExitWeight, FallThruWeight)
- : MDB.createBranchWeights(FallThruWeight, ExitWeight);
- LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
- }
+ uint64_t FallThruWeight =
+ PeeledHeaderWeight * ((float)(AvgIters - IterNumber) / AvgIters * 0.9);
+ uint64_t ExitWeight = PeeledHeaderWeight - FallThruWeight;
+ PeeledHeaderWeight -= ExitWeight;
+
+ unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1);
+ MDBuilder MDB(LatchBR->getContext());
+ MDNode *WeightNode =
+ HeaderIdx ? MDB.createBranchWeights(ExitWeight, FallThruWeight)
+ : MDB.createBranchWeights(FallThruWeight, ExitWeight);
+ LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
+}
+
+/// Initialize the weights.
+///
+/// \param Header The header block.
+/// \param LatchBR The latch branch.
+/// \param AvgIters The average number of iterations we expect the loop to have.
+/// \param[out] ExitWeight The # of times the edge from Latch to Exit is taken.
+/// \param[out] CurHeaderWeight The # of times the header is executed.
+static void initBranchWeights(BasicBlock *Header, BranchInst *LatchBR,
+ unsigned AvgIters, uint64_t &ExitWeight,
+ uint64_t &CurHeaderWeight) {
+ uint64_t TrueWeight, FalseWeight;
+ if (!LatchBR->extractProfMetadata(TrueWeight, FalseWeight))
+ return;
+ unsigned HeaderIdx = LatchBR->getSuccessor(0) == Header ? 0 : 1;
+ ExitWeight = HeaderIdx ? TrueWeight : FalseWeight;
+ // The # of times the loop body executes is the sum of the exit block
+ // is taken and the # of times the backedges are taken.
+ CurHeaderWeight = TrueWeight + FalseWeight;
+}
+
+/// Update the weights of original Latch block after peeling off all iterations.
+///
+/// \param Header The header block.
+/// \param LatchBR The latch branch.
+/// \param ExitWeight The weight of the edge from Latch to Exit block.
+/// \param CurHeaderWeight The # of time the header is executed.
+static void fixupBranchWeights(BasicBlock *Header, BranchInst *LatchBR,
+ uint64_t ExitWeight, uint64_t CurHeaderWeight) {
+ // Adjust the branch weights on the loop exit.
+ if (!ExitWeight)
+ return;
+
+ // The backedge count is the difference of current header weight and
+ // current loop exit weight. If the current header weight is smaller than
+ // the current loop exit weight, we mark the loop backedge weight as 1.
+ uint64_t BackEdgeWeight = 0;
+ if (ExitWeight < CurHeaderWeight)
+ BackEdgeWeight = CurHeaderWeight - ExitWeight;
+ else
+ BackEdgeWeight = 1;
+ MDBuilder MDB(LatchBR->getContext());
+ unsigned HeaderIdx = LatchBR->getSuccessor(0) == Header ? 0 : 1;
+ MDNode *WeightNode =
+ HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight)
+ : MDB.createBranchWeights(BackEdgeWeight, ExitWeight);
+ LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
}
/// Clones the body of the loop L, putting it between \p InsertTop and \p
/// InsertBot.
/// \param IterNumber The serial number of the iteration currently being
/// peeled off.
-/// \param Exit The exit block of the original loop.
+/// \param ExitEdges The exit edges of the original loop.
/// \param[out] NewBlocks A list of the blocks in the newly created clone
/// \param[out] VMap The value map between the loop and the new clone.
/// \param LoopBlocks A helper for DFS-traversal of the loop.
/// \param LVMap A value-map that maps instructions from the original loop to
/// instructions in the last peeled-off iteration.
-static void cloneLoopBlocks(Loop *L, unsigned IterNumber, BasicBlock *InsertTop,
- BasicBlock *InsertBot, BasicBlock *Exit,
- SmallVectorImpl<BasicBlock *> &NewBlocks,
- LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap,
- ValueToValueMapTy &LVMap, DominatorTree *DT,
- LoopInfo *LI) {
+static void cloneLoopBlocks(
+ Loop *L, unsigned IterNumber, BasicBlock *InsertTop, BasicBlock *InsertBot,
+ SmallVectorImpl<std::pair<BasicBlock *, BasicBlock *> > &ExitEdges,
+ SmallVectorImpl<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
+ ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, DominatorTree *DT,
+ LoopInfo *LI) {
BasicBlock *Header = L->getHeader();
BasicBlock *Latch = L->getLoopLatch();
BasicBlock *PreHeader = L->getLoopPreheader();
@@ -443,9 +510,11 @@ static void cloneLoopBlocks(Loop *L, unsigned IterNumber, BasicBlock *InsertTop,
// iteration (for every other iteration)
BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]);
BranchInst *LatchBR = cast<BranchInst>(NewLatch->getTerminator());
- unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1);
- LatchBR->setSuccessor(HeaderIdx, InsertBot);
- LatchBR->setSuccessor(1 - HeaderIdx, Exit);
+ for (unsigned idx = 0, e = LatchBR->getNumSuccessors(); idx < e; ++idx)
+ if (LatchBR->getSuccessor(idx) == Header) {
+ LatchBR->setSuccessor(idx, InsertBot);
+ break;
+ }
if (DT)
DT->changeImmediateDominator(InsertBot, NewLatch);
@@ -476,14 +545,14 @@ static void cloneLoopBlocks(Loop *L, unsigned IterNumber, BasicBlock *InsertTop,
// we've just created. Note that this must happen *after* the incoming
// values are adjusted, since the value going out of the latch may also be
// a value coming into the header.
- for (BasicBlock::iterator I = Exit->begin(); isa<PHINode>(I); ++I) {
- PHINode *PHI = cast<PHINode>(I);
- Value *LatchVal = PHI->getIncomingValueForBlock(Latch);
- Instruction *LatchInst = dyn_cast<Instruction>(LatchVal);
- if (LatchInst && L->contains(LatchInst))
- LatchVal = VMap[LatchVal];
- PHI->addIncoming(LatchVal, cast<BasicBlock>(VMap[Latch]));
- }
+ for (auto Edge : ExitEdges)
+ for (PHINode &PHI : Edge.second->phis()) {
+ Value *LatchVal = PHI.getIncomingValueForBlock(Edge.first);
+ Instruction *LatchInst = dyn_cast<Instruction>(LatchVal);
+ if (LatchInst && L->contains(LatchInst))
+ LatchVal = VMap[LatchVal];
+ PHI.addIncoming(LatchVal, cast<BasicBlock>(VMap[Edge.first]));
+ }
// LastValueMap is updated with the values for the current loop
// which are used the next time this function is called.
@@ -512,7 +581,20 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
BasicBlock *Header = L->getHeader();
BasicBlock *PreHeader = L->getLoopPreheader();
BasicBlock *Latch = L->getLoopLatch();
- BasicBlock *Exit = L->getUniqueExitBlock();
+ SmallVector<std::pair<BasicBlock *, BasicBlock *>, 4> ExitEdges;
+ L->getExitEdges(ExitEdges);
+
+ DenseMap<BasicBlock *, BasicBlock *> ExitIDom;
+ if (DT) {
+ assert(L->hasDedicatedExits() && "No dedicated exits?");
+ for (auto Edge : ExitEdges) {
+ if (ExitIDom.count(Edge.second))
+ continue;
+ BasicBlock *BB = DT->getNode(Edge.second)->getIDom()->getBlock();
+ assert(L->contains(BB) && "IDom is not in a loop");
+ ExitIDom[Edge.second] = BB;
+ }
+ }
Function *F = Header->getParent();
@@ -577,16 +659,8 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
// newly created branches.
BranchInst *LatchBR =
cast<BranchInst>(cast<BasicBlock>(Latch)->getTerminator());
- unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1);
-
- uint64_t TrueWeight, FalseWeight;
uint64_t ExitWeight = 0, CurHeaderWeight = 0;
- if (LatchBR->extractProfMetadata(TrueWeight, FalseWeight)) {
- ExitWeight = HeaderIdx ? TrueWeight : FalseWeight;
- // The # of times the loop body executes is the sum of the exit block
- // weight and the # of times the backedges are taken.
- CurHeaderWeight = TrueWeight + FalseWeight;
- }
+ initBranchWeights(Header, LatchBR, PeelCount, ExitWeight, CurHeaderWeight);
// For each peeled-off iteration, make a copy of the loop.
for (unsigned Iter = 0; Iter < PeelCount; ++Iter) {
@@ -602,8 +676,8 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
else
CurHeaderWeight = 1;
- cloneLoopBlocks(L, Iter, InsertTop, InsertBot, Exit,
- NewBlocks, LoopBlocks, VMap, LVMap, DT, LI);
+ cloneLoopBlocks(L, Iter, InsertTop, InsertBot, ExitEdges, NewBlocks,
+ LoopBlocks, VMap, LVMap, DT, LI);
// Remap to use values from the current iteration instead of the
// previous one.
@@ -614,7 +688,9 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
// latter is the first cloned loop body, as original PreHeader dominates
// the original loop body.
if (Iter == 0)
- DT->changeImmediateDominator(Exit, cast<BasicBlock>(LVMap[Latch]));
+ for (auto Exit : ExitIDom)
+ DT->changeImmediateDominator(Exit.first,
+ cast<BasicBlock>(LVMap[Exit.second]));
#ifdef EXPENSIVE_CHECKS
assert(DT->verify(DominatorTree::VerificationLevel::Fast));
#endif
@@ -645,36 +721,22 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
if (LatchInst && L->contains(LatchInst))
NewVal = LVMap[LatchInst];
- PHI->setIncomingValue(PHI->getBasicBlockIndex(NewPreHeader), NewVal);
+ PHI->setIncomingValueForBlock(NewPreHeader, NewVal);
}
- // Adjust the branch weights on the loop exit.
- if (ExitWeight) {
- // The backedge count is the difference of current header weight and
- // current loop exit weight. If the current header weight is smaller than
- // the current loop exit weight, we mark the loop backedge weight as 1.
- uint64_t BackEdgeWeight = 0;
- if (ExitWeight < CurHeaderWeight)
- BackEdgeWeight = CurHeaderWeight - ExitWeight;
- else
- BackEdgeWeight = 1;
- MDBuilder MDB(LatchBR->getContext());
- MDNode *WeightNode =
- HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight)
- : MDB.createBranchWeights(BackEdgeWeight, ExitWeight);
- LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
- }
+ fixupBranchWeights(Header, LatchBR, ExitWeight, CurHeaderWeight);
- // If the loop is nested, we changed the parent loop, update SE.
- if (Loop *ParentLoop = L->getParentLoop()) {
- SE->forgetLoop(ParentLoop);
+ if (Loop *ParentLoop = L->getParentLoop())
+ L = ParentLoop;
- // FIXME: Incrementally update loop-simplify
- simplifyLoop(ParentLoop, DT, LI, SE, AC, PreserveLCSSA);
- } else {
- // FIXME: Incrementally update loop-simplify
- simplifyLoop(L, DT, LI, SE, AC, PreserveLCSSA);
- }
+ // We modified the loop, update SE.
+ SE->forgetTopmostLoop(L);
+
+ // Finally DomtTree must be correct.
+ assert(DT->verify(DominatorTree::VerificationLevel::Fast));
+
+ // FIXME: Incrementally update loop-simplify
+ simplifyLoop(L, DT, LI, SE, AC, nullptr, PreserveLCSSA);
NumPeeled++;
diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index 00d2fd2fdbac..d22fdb4d52dc 100644
--- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -1,9 +1,8 @@
//===-- UnrollLoopRuntime.cpp - Runtime Loop unrolling utilities ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -125,11 +124,10 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
// Update the existing PHI node operand with the value from the
// new PHI node. How this is done depends on if the existing
// PHI node is in the original loop block, or the exit block.
- if (L->contains(&PN)) {
- PN.setIncomingValue(PN.getBasicBlockIndex(NewPreHeader), NewPN);
- } else {
+ if (L->contains(&PN))
+ PN.setIncomingValueForBlock(NewPreHeader, NewPN);
+ else
PN.addIncoming(NewPN, PrologExit);
- }
}
}
@@ -265,7 +263,7 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
// Update the existing PHI node operand with the value from the new PHI
// node. Corresponding instruction in epilog loop should be PHI.
PHINode *VPN = cast<PHINode>(VMap[&PN]);
- VPN->setIncomingValue(VPN->getBasicBlockIndex(EpilogPreHeader), NewPN);
+ VPN->setIncomingValueForBlock(EpilogPreHeader, NewPN);
}
}
@@ -426,10 +424,9 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop,
/// Returns true if we can safely unroll a multi-exit/exiting loop. OtherExits
/// is populated with all the loop exit blocks other than the LatchExit block.
-static bool
-canSafelyUnrollMultiExitLoop(Loop *L, SmallVectorImpl<BasicBlock *> &OtherExits,
- BasicBlock *LatchExit, bool PreserveLCSSA,
- bool UseEpilogRemainder) {
+static bool canSafelyUnrollMultiExitLoop(Loop *L, BasicBlock *LatchExit,
+ bool PreserveLCSSA,
+ bool UseEpilogRemainder) {
// We currently have some correctness constrains in unrolling a multi-exit
// loop. Check for these below.
@@ -437,11 +434,6 @@ canSafelyUnrollMultiExitLoop(Loop *L, SmallVectorImpl<BasicBlock *> &OtherExits,
// We rely on LCSSA form being preserved when the exit blocks are transformed.
if (!PreserveLCSSA)
return false;
- SmallVector<BasicBlock *, 4> Exits;
- L->getUniqueExitBlocks(Exits);
- for (auto *BB : Exits)
- if (BB != LatchExit)
- OtherExits.push_back(BB);
// TODO: Support multiple exiting blocks jumping to the `LatchExit` when
// UnrollRuntimeMultiExit is true. This will need updating the logic in
@@ -471,9 +463,8 @@ static bool canProfitablyUnrollMultiExitLoop(
bool PreserveLCSSA, bool UseEpilogRemainder) {
#if !defined(NDEBUG)
- SmallVector<BasicBlock *, 8> OtherExitsDummyCheck;
- assert(canSafelyUnrollMultiExitLoop(L, OtherExitsDummyCheck, LatchExit,
- PreserveLCSSA, UseEpilogRemainder) &&
+ assert(canSafelyUnrollMultiExitLoop(L, LatchExit, PreserveLCSSA,
+ UseEpilogRemainder) &&
"Should be safe to unroll before checking profitability!");
#endif
@@ -554,10 +545,10 @@ static bool canProfitablyUnrollMultiExitLoop(
bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
bool AllowExpensiveTripCount,
bool UseEpilogRemainder,
- bool UnrollRemainder, LoopInfo *LI,
- ScalarEvolution *SE, DominatorTree *DT,
- AssumptionCache *AC, bool PreserveLCSSA,
- Loop **ResultLoop) {
+ bool UnrollRemainder, bool ForgetAllSCEV,
+ LoopInfo *LI, ScalarEvolution *SE,
+ DominatorTree *DT, AssumptionCache *AC,
+ bool PreserveLCSSA, Loop **ResultLoop) {
LLVM_DEBUG(dbgs() << "Trying runtime unrolling on Loop: \n");
LLVM_DEBUG(L->dump());
LLVM_DEBUG(UseEpilogRemainder ? dbgs() << "Using epilog remainder.\n"
@@ -597,8 +588,9 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
// These are exit blocks other than the target of the latch exiting block.
SmallVector<BasicBlock *, 4> OtherExits;
+ L->getUniqueNonLatchExitBlocks(OtherExits);
bool isMultiExitUnrollingEnabled =
- canSafelyUnrollMultiExitLoop(L, OtherExits, LatchExit, PreserveLCSSA,
+ canSafelyUnrollMultiExitLoop(L, LatchExit, PreserveLCSSA,
UseEpilogRemainder) &&
canProfitablyUnrollMultiExitLoop(L, OtherExits, LatchExit, PreserveLCSSA,
UseEpilogRemainder);
@@ -939,23 +931,24 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
if (OtherExits.size() > 0) {
// Generate dedicated exit blocks for the original loop, to preserve
// LoopSimplifyForm.
- formDedicatedExitBlocks(L, DT, LI, PreserveLCSSA);
+ formDedicatedExitBlocks(L, DT, LI, nullptr, PreserveLCSSA);
// Generate dedicated exit blocks for the remainder loop if one exists, to
// preserve LoopSimplifyForm.
if (remainderLoop)
- formDedicatedExitBlocks(remainderLoop, DT, LI, PreserveLCSSA);
+ formDedicatedExitBlocks(remainderLoop, DT, LI, nullptr, PreserveLCSSA);
}
auto UnrollResult = LoopUnrollResult::Unmodified;
if (remainderLoop && UnrollRemainder) {
LLVM_DEBUG(dbgs() << "Unrolling remainder loop\n");
UnrollResult =
- UnrollLoop(remainderLoop, /*Count*/ Count - 1, /*TripCount*/ Count - 1,
- /*Force*/ false, /*AllowRuntime*/ false,
- /*AllowExpensiveTripCount*/ false, /*PreserveCondBr*/ true,
- /*PreserveOnlyFirst*/ false, /*TripMultiple*/ 1,
- /*PeelCount*/ 0, /*UnrollRemainder*/ false, LI, SE, DT, AC,
- /*ORE*/ nullptr, PreserveLCSSA);
+ UnrollLoop(remainderLoop,
+ {/*Count*/ Count - 1, /*TripCount*/ Count - 1,
+ /*Force*/ false, /*AllowRuntime*/ false,
+ /*AllowExpensiveTripCount*/ false, /*PreserveCondBr*/ true,
+ /*PreserveOnlyFirst*/ false, /*TripMultiple*/ 1,
+ /*PeelCount*/ 0, /*UnrollRemainder*/ false, ForgetAllSCEV},
+ LI, SE, DT, AC, /*ORE*/ nullptr, PreserveLCSSA);
}
if (ResultLoop && UnrollResult != LoopUnrollResult::FullyUnrolled)
diff --git a/lib/Transforms/Utils/LoopUtils.cpp b/lib/Transforms/Utils/LoopUtils.cpp
index a93d1aeb62ef..ec226e65f650 100644
--- a/lib/Transforms/Utils/LoopUtils.cpp
+++ b/lib/Transforms/Utils/LoopUtils.cpp
@@ -1,9 +1,8 @@
//===-- LoopUtils.cpp - Loop Utility functions -------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -15,10 +14,12 @@
#include "llvm/ADT/ScopeExit.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/MustExecute.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
@@ -27,7 +28,6 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DIBuilder.h"
-#include "llvm/IR/DomTreeUpdater.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -47,6 +47,7 @@ using namespace llvm::PatternMatch;
static const char *LLVMLoopDisableNonforced = "llvm.loop.disable_nonforced";
bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI,
+ MemorySSAUpdater *MSSAU,
bool PreserveLCSSA) {
bool Changed = false;
@@ -66,6 +67,9 @@ bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI,
if (isa<IndirectBrInst>(PredBB->getTerminator()))
// We cannot rewrite exiting edges from an indirectbr.
return false;
+ if (isa<CallBrInst>(PredBB->getTerminator()))
+ // We cannot rewrite exiting edges from a callbr.
+ return false;
InLoopPredecessors.push_back(PredBB);
} else {
@@ -79,7 +83,7 @@ bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI,
return false;
auto *NewExitBB = SplitBlockPredecessors(
- BB, InLoopPredecessors, ".loopexit", DT, LI, nullptr, PreserveLCSSA);
+ BB, InLoopPredecessors, ".loopexit", DT, LI, MSSAU, PreserveLCSSA);
if (!NewExitBB)
LLVM_DEBUG(
@@ -217,7 +221,10 @@ static Optional<bool> getOptionalBoolLoopAttribute(const Loop *TheLoop,
// When the value is absent it is interpreted as 'attribute set'.
return true;
case 2:
- return mdconst::extract_or_null<ConstantInt>(MD->getOperand(1).get());
+ if (ConstantInt *IntMD =
+ mdconst::extract_or_null<ConstantInt>(MD->getOperand(1).get()))
+ return IntMD->getZExtValue();
+ return true;
}
llvm_unreachable("unexpected number of options");
}
@@ -376,17 +383,17 @@ TransformationMode llvm::hasVectorizeTransformation(Loop *L) {
Optional<int> InterleaveCount =
getOptionalIntLoopAttribute(L, "llvm.loop.interleave.count");
- if (Enable == true) {
- // 'Forcing' vector width and interleave count to one effectively disables
- // this tranformation.
- if (VectorizeWidth == 1 && InterleaveCount == 1)
- return TM_SuppressedByUser;
- return TM_ForcedByUser;
- }
+ // 'Forcing' vector width and interleave count to one effectively disables
+ // this tranformation.
+ if (Enable == true && VectorizeWidth == 1 && InterleaveCount == 1)
+ return TM_SuppressedByUser;
if (getBooleanLoopAttribute(L, "llvm.loop.isvectorized"))
return TM_Disable;
+ if (Enable == true)
+ return TM_ForcedByUser;
+
if (VectorizeWidth == 1 && InterleaveCount == 1)
return TM_Disable;
@@ -528,10 +535,9 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT = nullptr,
DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
if (DT) {
// Update the dominator tree by informing it about the new edge from the
- // preheader to the exit.
- DTU.insertEdge(Preheader, ExitBlock);
- // Inform the dominator tree about the removed edge.
- DTU.deleteEdge(Preheader, L->getHeader());
+ // preheader to the exit and the removed edge.
+ DTU.applyUpdates({{DominatorTree::Insert, Preheader, ExitBlock},
+ {DominatorTree::Delete, Preheader, L->getHeader()}});
}
// Use a map to unique and a vector to guarantee deterministic ordering.
@@ -578,10 +584,14 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT = nullptr,
// dbg.value truncates the range of any dbg.value before the loop where the
// loop used to be. This is particularly important for constant values.
DIBuilder DIB(*ExitBlock->getModule());
+ Instruction *InsertDbgValueBefore = ExitBlock->getFirstNonPHI();
+ assert(InsertDbgValueBefore &&
+ "There should be a non-PHI instruction in exit block, else these "
+ "instructions will have no parent.");
for (auto *DVI : DeadDebugInst)
- DIB.insertDbgValueIntrinsic(
- UndefValue::get(Builder.getInt32Ty()), DVI->getVariable(),
- DVI->getExpression(), DVI->getDebugLoc(), ExitBlock->getFirstNonPHI());
+ DIB.insertDbgValueIntrinsic(UndefValue::get(Builder.getInt32Ty()),
+ DVI->getVariable(), DVI->getExpression(),
+ DVI->getDebugLoc(), InsertDbgValueBefore);
// Remove the block from the reference counting scheme, so that we can
// delete it freely later.
@@ -611,20 +621,28 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT = nullptr,
}
Optional<unsigned> llvm::getLoopEstimatedTripCount(Loop *L) {
- // Only support loops with a unique exiting block, and a latch.
- if (!L->getExitingBlock())
- return None;
+ // Support loops with an exiting latch and other existing exists only
+ // deoptimize.
// Get the branch weights for the loop's backedge.
- BranchInst *LatchBR =
- dyn_cast<BranchInst>(L->getLoopLatch()->getTerminator());
- if (!LatchBR || LatchBR->getNumSuccessors() != 2)
+ BasicBlock *Latch = L->getLoopLatch();
+ if (!Latch)
+ return None;
+ BranchInst *LatchBR = dyn_cast<BranchInst>(Latch->getTerminator());
+ if (!LatchBR || LatchBR->getNumSuccessors() != 2 || !L->isLoopExiting(Latch))
return None;
assert((LatchBR->getSuccessor(0) == L->getHeader() ||
LatchBR->getSuccessor(1) == L->getHeader()) &&
"At least one edge out of the latch must go to the header");
+ SmallVector<BasicBlock *, 4> ExitBlocks;
+ L->getUniqueNonLatchExitBlocks(ExitBlocks);
+ if (any_of(ExitBlocks, [](const BasicBlock *EB) {
+ return !EB->getTerminatingDeoptimizeCall();
+ }))
+ return None;
+
// To estimate the number of times the loop body was executed, we want to
// know the number of times the backedge was taken, vs. the number of times
// we exited the loop.
@@ -665,16 +683,6 @@ bool llvm::hasIterationCountInvariantInParent(Loop *InnerLoop,
return true;
}
-/// Adds a 'fast' flag to floating point operations.
-static Value *addFastMathFlag(Value *V) {
- if (isa<FPMathOperator>(V)) {
- FastMathFlags Flags;
- Flags.setFast();
- cast<Instruction>(V)->setFastMathFlags(Flags);
- }
- return V;
-}
-
Value *llvm::createMinMaxOp(IRBuilder<> &Builder,
RecurrenceDescriptor::MinMaxRecurrenceKind RK,
Value *Left, Value *Right) {
@@ -778,9 +786,9 @@ llvm::getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op,
ConstantVector::get(ShuffleMask), "rdx.shuf");
if (Op != Instruction::ICmp && Op != Instruction::FCmp) {
- // Floating point operations had to be 'fast' to enable the reduction.
- TmpVec = addFastMathFlag(Builder.CreateBinOp((Instruction::BinaryOps)Op,
- TmpVec, Shuf, "bin.rdx"));
+ // The builder propagates its fast-math-flags setting.
+ TmpVec = Builder.CreateBinOp((Instruction::BinaryOps)Op, TmpVec, Shuf,
+ "bin.rdx");
} else {
assert(MinMaxKind != RecurrenceDescriptor::MRK_Invalid &&
"Invalid min/max");
@@ -801,13 +809,9 @@ Value *llvm::createSimpleTargetReduction(
ArrayRef<Value *> RedOps) {
assert(isa<VectorType>(Src->getType()) && "Type must be a vector");
- Value *ScalarUdf = UndefValue::get(Src->getType()->getVectorElementType());
std::function<Value *()> BuildFunc;
using RD = RecurrenceDescriptor;
RD::MinMaxRecurrenceKind MinMaxKind = RD::MRK_Invalid;
- // TODO: Support creating ordered reductions.
- FastMathFlags FMFFast;
- FMFFast.setFast();
switch (Opcode) {
case Instruction::Add:
@@ -827,15 +831,15 @@ Value *llvm::createSimpleTargetReduction(
break;
case Instruction::FAdd:
BuildFunc = [&]() {
- auto Rdx = Builder.CreateFAddReduce(ScalarUdf, Src);
- cast<CallInst>(Rdx)->setFastMathFlags(FMFFast);
+ auto Rdx = Builder.CreateFAddReduce(
+ Constant::getNullValue(Src->getType()->getVectorElementType()), Src);
return Rdx;
};
break;
case Instruction::FMul:
BuildFunc = [&]() {
- auto Rdx = Builder.CreateFMulReduce(ScalarUdf, Src);
- cast<CallInst>(Rdx)->setFastMathFlags(FMFFast);
+ Type *Ty = Src->getType()->getVectorElementType();
+ auto Rdx = Builder.CreateFMulReduce(ConstantFP::get(Ty, 1.0), Src);
return Rdx;
};
break;
@@ -880,6 +884,12 @@ Value *llvm::createTargetReduction(IRBuilder<> &B,
RD::RecurrenceKind RecKind = Desc.getRecurrenceKind();
TargetTransformInfo::ReductionFlags Flags;
Flags.NoNaN = NoNaN;
+
+ // All ops in the reduction inherit fast-math-flags from the recurrence
+ // descriptor.
+ IRBuilder<>::FastMathFlagGuard FMFGuard(B);
+ B.setFastMathFlags(Desc.getFastMathFlags());
+
switch (RecKind) {
case RD::RK_FloatAdd:
return createSimpleTargetReduction(B, TTI, Instruction::FAdd, Src, Flags);
diff --git a/lib/Transforms/Utils/LoopVersioning.cpp b/lib/Transforms/Utils/LoopVersioning.cpp
index abbcd5f9e3b8..a9a480a4b7f9 100644
--- a/lib/Transforms/Utils/LoopVersioning.cpp
+++ b/lib/Transforms/Utils/LoopVersioning.cpp
@@ -1,9 +1,8 @@
//===- LoopVersioning.cpp - Utility to version a loop ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -281,8 +280,9 @@ public:
bool Changed = false;
for (Loop *L : Worklist) {
const LoopAccessInfo &LAI = LAA->getInfo(L);
- if (L->isLoopSimplifyForm() && (LAI.getNumRuntimePointerChecks() ||
- !LAI.getPSE().getUnionPredicate().isAlwaysTrue())) {
+ if (L->isLoopSimplifyForm() && !LAI.hasConvergentOp() &&
+ (LAI.getNumRuntimePointerChecks() ||
+ !LAI.getPSE().getUnionPredicate().isAlwaysTrue())) {
LoopVersioning LVer(LAI, L, LI, DT, SE);
LVer.versionLoop();
LVer.annotateLoopWithNoAlias();
diff --git a/lib/Transforms/Utils/LowerInvoke.cpp b/lib/Transforms/Utils/LowerInvoke.cpp
index c852d538b0d1..fe67e191dc62 100644
--- a/lib/Transforms/Utils/LowerInvoke.cpp
+++ b/lib/Transforms/Utils/LowerInvoke.cpp
@@ -1,9 +1,8 @@
//===- LowerInvoke.cpp - Eliminate Invoke instructions --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -53,7 +52,8 @@ static bool runImpl(Function &F) {
II->getOperandBundlesAsDefs(OpBundles);
// Insert a normal call instruction...
CallInst *NewCall =
- CallInst::Create(II->getCalledValue(), CallArgs, OpBundles, "", II);
+ CallInst::Create(II->getFunctionType(), II->getCalledValue(),
+ CallArgs, OpBundles, "", II);
NewCall->takeName(II);
NewCall->setCallingConv(II->getCallingConv());
NewCall->setAttributes(II->getAttributes());
diff --git a/lib/Transforms/Utils/LowerMemIntrinsics.cpp b/lib/Transforms/Utils/LowerMemIntrinsics.cpp
index 661b4fa5bcb7..0cc085dc366c 100644
--- a/lib/Transforms/Utils/LowerMemIntrinsics.cpp
+++ b/lib/Transforms/Utils/LowerMemIntrinsics.cpp
@@ -1,9 +1,8 @@
//===- LowerMemIntrinsics.cpp ----------------------------------*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -73,7 +72,7 @@ void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr,
// Loop Body
Value *SrcGEP =
LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex);
- Value *Load = LoopBuilder.CreateLoad(SrcGEP, SrcIsVolatile);
+ Value *Load = LoopBuilder.CreateLoad(LoopOpType, SrcGEP, SrcIsVolatile);
Value *DstGEP =
LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex);
LoopBuilder.CreateStore(Load, DstGEP, DstIsVolatile);
@@ -115,7 +114,7 @@ void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr,
: RBuilder.CreateBitCast(SrcAddr, SrcPtrType);
Value *SrcGEP = RBuilder.CreateInBoundsGEP(
OpTy, CastedSrc, ConstantInt::get(TypeOfCopyLen, GepIndex));
- Value *Load = RBuilder.CreateLoad(SrcGEP, SrcIsVolatile);
+ Value *Load = RBuilder.CreateLoad(OpTy, SrcGEP, SrcIsVolatile);
// Cast destination to operand type and store.
PointerType *DstPtrType = PointerType::get(OpTy, DstAS);
@@ -182,7 +181,7 @@ void llvm::createMemCpyLoopUnknownSize(Instruction *InsertBefore,
LoopIndex->addIncoming(ConstantInt::get(CopyLenType, 0U), PreLoopBB);
Value *SrcGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex);
- Value *Load = LoopBuilder.CreateLoad(SrcGEP, SrcIsVolatile);
+ Value *Load = LoopBuilder.CreateLoad(LoopOpType, SrcGEP, SrcIsVolatile);
Value *DstGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex);
LoopBuilder.CreateStore(Load, DstGEP, DstIsVolatile);
@@ -235,7 +234,7 @@ void llvm::createMemCpyLoopUnknownSize(Instruction *InsertBefore,
Value *FullOffset = ResBuilder.CreateAdd(RuntimeBytesCopied, ResidualIndex);
Value *SrcGEP =
ResBuilder.CreateInBoundsGEP(Int8Type, SrcAsInt8, FullOffset);
- Value *Load = ResBuilder.CreateLoad(SrcGEP, SrcIsVolatile);
+ Value *Load = ResBuilder.CreateLoad(Int8Type, SrcGEP, SrcIsVolatile);
Value *DstGEP =
ResBuilder.CreateInBoundsGEP(Int8Type, DstAsInt8, FullOffset);
ResBuilder.CreateStore(Load, DstGEP, DstIsVolatile);
@@ -293,6 +292,8 @@ static void createMemMoveLoop(Instruction *InsertBefore,
BasicBlock *OrigBB = InsertBefore->getParent();
Function *F = OrigBB->getParent();
+ Type *EltTy = cast<PointerType>(SrcAddr->getType())->getElementType();
+
// Create the a comparison of src and dst, based on which we jump to either
// the forward-copy part of the function (if src >= dst) or the backwards-copy
// part (if src < dst).
@@ -331,9 +332,10 @@ static void createMemMoveLoop(Instruction *InsertBefore,
Value *IndexPtr = LoopBuilder.CreateSub(
LoopPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_ptr");
Value *Element = LoopBuilder.CreateLoad(
- LoopBuilder.CreateInBoundsGEP(SrcAddr, IndexPtr), "element");
- LoopBuilder.CreateStore(Element,
- LoopBuilder.CreateInBoundsGEP(DstAddr, IndexPtr));
+ EltTy, LoopBuilder.CreateInBoundsGEP(EltTy, SrcAddr, IndexPtr),
+ "element");
+ LoopBuilder.CreateStore(
+ Element, LoopBuilder.CreateInBoundsGEP(EltTy, DstAddr, IndexPtr));
LoopBuilder.CreateCondBr(
LoopBuilder.CreateICmpEQ(IndexPtr, ConstantInt::get(TypeOfCopyLen, 0)),
ExitBB, LoopBB);
@@ -348,9 +350,10 @@ static void createMemMoveLoop(Instruction *InsertBefore,
IRBuilder<> FwdLoopBuilder(FwdLoopBB);
PHINode *FwdCopyPhi = FwdLoopBuilder.CreatePHI(TypeOfCopyLen, 0, "index_ptr");
Value *FwdElement = FwdLoopBuilder.CreateLoad(
- FwdLoopBuilder.CreateInBoundsGEP(SrcAddr, FwdCopyPhi), "element");
+ EltTy, FwdLoopBuilder.CreateInBoundsGEP(EltTy, SrcAddr, FwdCopyPhi),
+ "element");
FwdLoopBuilder.CreateStore(
- FwdElement, FwdLoopBuilder.CreateInBoundsGEP(DstAddr, FwdCopyPhi));
+ FwdElement, FwdLoopBuilder.CreateInBoundsGEP(EltTy, DstAddr, FwdCopyPhi));
Value *FwdIndexPtr = FwdLoopBuilder.CreateAdd(
FwdCopyPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_increment");
FwdLoopBuilder.CreateCondBr(FwdLoopBuilder.CreateICmpEQ(FwdIndexPtr, CopyLen),
diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp
index d019a44fc705..8256e3b5f5af 100644
--- a/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/lib/Transforms/Utils/LowerSwitch.cpp
@@ -1,9 +1,8 @@
//===- LowerSwitch.cpp - Eliminate Switch instructions --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -17,8 +16,12 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/LazyValueInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
+#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstrTypes.h"
@@ -28,6 +31,7 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/KnownBits.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -58,9 +62,8 @@ static bool IsInRanges(const IntRange &R,
// Find the first range whose High field is >= R.High,
// then check if the Low field is <= R.Low. If so, we
// have a Range that covers R.
- auto I = std::lower_bound(
- Ranges.begin(), Ranges.end(), R,
- [](const IntRange &A, const IntRange &B) { return A.High < B.High; });
+ auto I = llvm::lower_bound(
+ Ranges, R, [](IntRange A, IntRange B) { return A.High < B.High; });
return I != Ranges.end() && I->Low <= R.Low;
}
@@ -78,6 +81,10 @@ namespace {
bool runOnFunction(Function &F) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<LazyValueInfoWrapperPass>();
+ }
+
struct CaseRange {
ConstantInt* Low;
ConstantInt* High;
@@ -91,15 +98,18 @@ namespace {
using CaseItr = std::vector<CaseRange>::iterator;
private:
- void processSwitchInst(SwitchInst *SI, SmallPtrSetImpl<BasicBlock*> &DeleteList);
+ void processSwitchInst(SwitchInst *SI,
+ SmallPtrSetImpl<BasicBlock *> &DeleteList,
+ AssumptionCache *AC, LazyValueInfo *LVI);
BasicBlock *switchConvert(CaseItr Begin, CaseItr End,
ConstantInt *LowerBound, ConstantInt *UpperBound,
Value *Val, BasicBlock *Predecessor,
BasicBlock *OrigBlock, BasicBlock *Default,
const std::vector<IntRange> &UnreachableRanges);
- BasicBlock *newLeafBlock(CaseRange &Leaf, Value *Val, BasicBlock *OrigBlock,
- BasicBlock *Default);
+ BasicBlock *newLeafBlock(CaseRange &Leaf, Value *Val,
+ ConstantInt *LowerBound, ConstantInt *UpperBound,
+ BasicBlock *OrigBlock, BasicBlock *Default);
unsigned Clusterify(CaseVector &Cases, SwitchInst *SI);
};
@@ -121,8 +131,12 @@ char LowerSwitch::ID = 0;
// Publicly exposed interface to pass...
char &llvm::LowerSwitchID = LowerSwitch::ID;
-INITIALIZE_PASS(LowerSwitch, "lowerswitch",
- "Lower SwitchInst's to branches", false, false)
+INITIALIZE_PASS_BEGIN(LowerSwitch, "lowerswitch",
+ "Lower SwitchInst's to branches", false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(LazyValueInfoWrapperPass)
+INITIALIZE_PASS_END(LowerSwitch, "lowerswitch",
+ "Lower SwitchInst's to branches", false, false)
// createLowerSwitchPass - Interface to this file...
FunctionPass *llvm::createLowerSwitchPass() {
@@ -130,6 +144,17 @@ FunctionPass *llvm::createLowerSwitchPass() {
}
bool LowerSwitch::runOnFunction(Function &F) {
+ LazyValueInfo *LVI = &getAnalysis<LazyValueInfoWrapperPass>().getLVI();
+ auto *ACT = getAnalysisIfAvailable<AssumptionCacheTracker>();
+ AssumptionCache *AC = ACT ? &ACT->getAssumptionCache(F) : nullptr;
+ // Prevent LazyValueInfo from using the DominatorTree as LowerSwitch does not
+ // preserve it and it becomes stale (when available) pretty much immediately.
+ // Currently the DominatorTree is only used by LowerSwitch indirectly via LVI
+ // and computeKnownBits to refine isValidAssumeForContext's results. Given
+ // that the latter can handle some of the simple cases w/o a DominatorTree,
+ // it's easier to refrain from using the tree than to keep it up to date.
+ LVI->disableDT();
+
bool Changed = false;
SmallPtrSet<BasicBlock*, 8> DeleteList;
@@ -143,11 +168,12 @@ bool LowerSwitch::runOnFunction(Function &F) {
if (SwitchInst *SI = dyn_cast<SwitchInst>(Cur->getTerminator())) {
Changed = true;
- processSwitchInst(SI, DeleteList);
+ processSwitchInst(SI, DeleteList, AC, LVI);
}
}
for (BasicBlock* BB: DeleteList) {
+ LVI->eraseBlock(BB);
DeleteDeadBlock(BB);
}
@@ -160,10 +186,11 @@ static raw_ostream &operator<<(raw_ostream &O,
const LowerSwitch::CaseVector &C) {
O << "[";
- for (LowerSwitch::CaseVector::const_iterator B = C.begin(),
- E = C.end(); B != E; ) {
- O << *B->Low << " -" << *B->High;
- if (++B != E) O << ", ";
+ for (LowerSwitch::CaseVector::const_iterator B = C.begin(), E = C.end();
+ B != E;) {
+ O << "[" << B->Low->getValue() << ", " << B->High->getValue() << "]";
+ if (++B != E)
+ O << ", ";
}
return O << "]";
@@ -179,8 +206,9 @@ static raw_ostream &operator<<(raw_ostream &O,
/// 2) Removed if subsequent incoming values now share the same case, i.e.,
/// multiple outcome edges are condensed into one. This is necessary to keep the
/// number of phi values equal to the number of branches to SuccBB.
-static void fixPhis(BasicBlock *SuccBB, BasicBlock *OrigBB, BasicBlock *NewBB,
- unsigned NumMergedCases) {
+static void
+fixPhis(BasicBlock *SuccBB, BasicBlock *OrigBB, BasicBlock *NewBB,
+ const unsigned NumMergedCases = std::numeric_limits<unsigned>::max()) {
for (BasicBlock::iterator I = SuccBB->begin(),
IE = SuccBB->getFirstNonPHI()->getIterator();
I != IE; ++I) {
@@ -222,6 +250,7 @@ LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound,
BasicBlock *Predecessor, BasicBlock *OrigBlock,
BasicBlock *Default,
const std::vector<IntRange> &UnreachableRanges) {
+ assert(LowerBound && UpperBound && "Bounds must be initialized");
unsigned Size = End - Begin;
if (Size == 1) {
@@ -231,13 +260,12 @@ LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound,
// because the bounds already tell us so.
if (Begin->Low == LowerBound && Begin->High == UpperBound) {
unsigned NumMergedCases = 0;
- if (LowerBound && UpperBound)
- NumMergedCases =
- UpperBound->getSExtValue() - LowerBound->getSExtValue();
+ NumMergedCases = UpperBound->getSExtValue() - LowerBound->getSExtValue();
fixPhis(Begin->BB, OrigBlock, Predecessor, NumMergedCases);
return Begin->BB;
}
- return newLeafBlock(*Begin, Val, OrigBlock, Default);
+ return newLeafBlock(*Begin, Val, LowerBound, UpperBound, OrigBlock,
+ Default);
}
unsigned Mid = Size / 2;
@@ -247,8 +275,8 @@ LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound,
LLVM_DEBUG(dbgs() << "RHS: " << RHS << "\n");
CaseRange &Pivot = *(Begin + Mid);
- LLVM_DEBUG(dbgs() << "Pivot ==> " << Pivot.Low->getValue() << " -"
- << Pivot.High->getValue() << "\n");
+ LLVM_DEBUG(dbgs() << "Pivot ==> [" << Pivot.Low->getValue() << ", "
+ << Pivot.High->getValue() << "]\n");
// NewLowerBound here should never be the integer minimal value.
// This is because it is computed from a case range that is never
@@ -270,14 +298,10 @@ LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound,
NewUpperBound = LHS.back().High;
}
- LLVM_DEBUG(dbgs() << "LHS Bounds ==> "; if (LowerBound) {
- dbgs() << LowerBound->getSExtValue();
- } else { dbgs() << "NONE"; } dbgs() << " - "
- << NewUpperBound->getSExtValue() << "\n";
- dbgs() << "RHS Bounds ==> ";
- dbgs() << NewLowerBound->getSExtValue() << " - "; if (UpperBound) {
- dbgs() << UpperBound->getSExtValue() << "\n";
- } else { dbgs() << "NONE\n"; });
+ LLVM_DEBUG(dbgs() << "LHS Bounds ==> [" << LowerBound->getSExtValue() << ", "
+ << NewUpperBound->getSExtValue() << "]\n"
+ << "RHS Bounds ==> [" << NewLowerBound->getSExtValue()
+ << ", " << UpperBound->getSExtValue() << "]\n");
// Create a new node that checks if the value is < pivot. Go to the
// left branch if it is and right branch if not.
@@ -305,9 +329,11 @@ LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound,
/// switch's value == the case's value. If not, then it jumps to the default
/// branch. At this point in the tree, the value can't be another valid case
/// value, so the jump to the "default" branch is warranted.
-BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val,
- BasicBlock* OrigBlock,
- BasicBlock* Default) {
+BasicBlock *LowerSwitch::newLeafBlock(CaseRange &Leaf, Value *Val,
+ ConstantInt *LowerBound,
+ ConstantInt *UpperBound,
+ BasicBlock *OrigBlock,
+ BasicBlock *Default) {
Function* F = OrigBlock->getParent();
BasicBlock* NewLeaf = BasicBlock::Create(Val->getContext(), "LeafBlock");
F->getBasicBlockList().insert(++OrigBlock->getIterator(), NewLeaf);
@@ -320,10 +346,14 @@ BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val,
Leaf.Low, "SwitchLeaf");
} else {
// Make range comparison
- if (Leaf.Low->isMinValue(true /*isSigned*/)) {
+ if (Leaf.Low == LowerBound) {
// Val >= Min && Val <= Hi --> Val <= Hi
Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_SLE, Val, Leaf.High,
"SwitchLeaf");
+ } else if (Leaf.High == UpperBound) {
+ // Val <= Max && Val >= Lo --> Val >= Lo
+ Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_SGE, Val, Leaf.Low,
+ "SwitchLeaf");
} else if (Leaf.Low->isZero()) {
// Val >= 0 && Val <= Hi --> Val <=u Hi
Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Val, Leaf.High,
@@ -363,14 +393,20 @@ BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val,
return NewLeaf;
}
-/// Transform simple list of Cases into list of CaseRange's.
+/// Transform simple list of \p SI's cases into list of CaseRange's \p Cases.
+/// \post \p Cases wouldn't contain references to \p SI's default BB.
+/// \returns Number of \p SI's cases that do not reference \p SI's default BB.
unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) {
- unsigned numCmps = 0;
+ unsigned NumSimpleCases = 0;
// Start with "simple" cases
- for (auto Case : SI->cases())
+ for (auto Case : SI->cases()) {
+ if (Case.getCaseSuccessor() == SI->getDefaultDest())
+ continue;
Cases.push_back(CaseRange(Case.getCaseValue(), Case.getCaseValue(),
Case.getCaseSuccessor()));
+ ++NumSimpleCases;
+ }
llvm::sort(Cases, CaseCmp());
@@ -396,60 +432,88 @@ unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) {
Cases.erase(std::next(I), Cases.end());
}
- for (CaseItr I=Cases.begin(), E=Cases.end(); I!=E; ++I, ++numCmps) {
- if (I->Low != I->High)
- // A range counts double, since it requires two compares.
- ++numCmps;
- }
-
- return numCmps;
+ return NumSimpleCases;
}
/// Replace the specified switch instruction with a sequence of chained if-then
/// insts in a balanced binary search.
void LowerSwitch::processSwitchInst(SwitchInst *SI,
- SmallPtrSetImpl<BasicBlock*> &DeleteList) {
- BasicBlock *CurBlock = SI->getParent();
- BasicBlock *OrigBlock = CurBlock;
- Function *F = CurBlock->getParent();
+ SmallPtrSetImpl<BasicBlock *> &DeleteList,
+ AssumptionCache *AC, LazyValueInfo *LVI) {
+ BasicBlock *OrigBlock = SI->getParent();
+ Function *F = OrigBlock->getParent();
Value *Val = SI->getCondition(); // The value we are switching on...
BasicBlock* Default = SI->getDefaultDest();
// Don't handle unreachable blocks. If there are successors with phis, this
// would leave them behind with missing predecessors.
- if ((CurBlock != &F->getEntryBlock() && pred_empty(CurBlock)) ||
- CurBlock->getSinglePredecessor() == CurBlock) {
- DeleteList.insert(CurBlock);
+ if ((OrigBlock != &F->getEntryBlock() && pred_empty(OrigBlock)) ||
+ OrigBlock->getSinglePredecessor() == OrigBlock) {
+ DeleteList.insert(OrigBlock);
return;
}
+ // Prepare cases vector.
+ CaseVector Cases;
+ const unsigned NumSimpleCases = Clusterify(Cases, SI);
+ LLVM_DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size()
+ << ". Total non-default cases: " << NumSimpleCases
+ << "\nCase clusters: " << Cases << "\n");
+
// If there is only the default destination, just branch.
- if (!SI->getNumCases()) {
- BranchInst::Create(Default, CurBlock);
+ if (Cases.empty()) {
+ BranchInst::Create(Default, OrigBlock);
+ // Remove all the references from Default's PHIs to OrigBlock, but one.
+ fixPhis(Default, OrigBlock, OrigBlock);
SI->eraseFromParent();
return;
}
- // Prepare cases vector.
- CaseVector Cases;
- unsigned numCmps = Clusterify(Cases, SI);
- LLVM_DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size()
- << ". Total compares: " << numCmps << "\n");
- LLVM_DEBUG(dbgs() << "Cases: " << Cases << "\n");
- (void)numCmps;
-
ConstantInt *LowerBound = nullptr;
ConstantInt *UpperBound = nullptr;
- std::vector<IntRange> UnreachableRanges;
+ bool DefaultIsUnreachableFromSwitch = false;
if (isa<UnreachableInst>(Default->getFirstNonPHIOrDbg())) {
// Make the bounds tightly fitted around the case value range, because we
// know that the value passed to the switch must be exactly one of the case
// values.
- assert(!Cases.empty());
LowerBound = Cases.front().Low;
UpperBound = Cases.back().High;
+ DefaultIsUnreachableFromSwitch = true;
+ } else {
+ // Constraining the range of the value being switched over helps eliminating
+ // unreachable BBs and minimizing the number of `add` instructions
+ // newLeafBlock ends up emitting. Running CorrelatedValuePropagation after
+ // LowerSwitch isn't as good, and also much more expensive in terms of
+ // compile time for the following reasons:
+ // 1. it processes many kinds of instructions, not just switches;
+ // 2. even if limited to icmp instructions only, it will have to process
+ // roughly C icmp's per switch, where C is the number of cases in the
+ // switch, while LowerSwitch only needs to call LVI once per switch.
+ const DataLayout &DL = F->getParent()->getDataLayout();
+ KnownBits Known = computeKnownBits(Val, DL, /*Depth=*/0, AC, SI);
+ // TODO Shouldn't this create a signed range?
+ ConstantRange KnownBitsRange =
+ ConstantRange::fromKnownBits(Known, /*IsSigned=*/false);
+ const ConstantRange LVIRange = LVI->getConstantRange(Val, OrigBlock, SI);
+ ConstantRange ValRange = KnownBitsRange.intersectWith(LVIRange);
+ // We delegate removal of unreachable non-default cases to other passes. In
+ // the unlikely event that some of them survived, we just conservatively
+ // maintain the invariant that all the cases lie between the bounds. This
+ // may, however, still render the default case effectively unreachable.
+ APInt Low = Cases.front().Low->getValue();
+ APInt High = Cases.back().High->getValue();
+ APInt Min = APIntOps::smin(ValRange.getSignedMin(), Low);
+ APInt Max = APIntOps::smax(ValRange.getSignedMax(), High);
+
+ LowerBound = ConstantInt::get(SI->getContext(), Min);
+ UpperBound = ConstantInt::get(SI->getContext(), Max);
+ DefaultIsUnreachableFromSwitch = (Min + (NumSimpleCases - 1) == Max);
+ }
+
+ std::vector<IntRange> UnreachableRanges;
+ if (DefaultIsUnreachableFromSwitch) {
DenseMap<BasicBlock *, unsigned> Popularity;
unsigned MaxPop = 0;
BasicBlock *PopSucc = nullptr;
@@ -496,8 +560,10 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI,
#endif
// As the default block in the switch is unreachable, update the PHI nodes
- // (remove the entry to the default block) to reflect this.
- Default->removePredecessor(OrigBlock);
+ // (remove all of the references to the default block) to reflect this.
+ const unsigned NumDefaultEdges = SI->getNumCases() + 1 - NumSimpleCases;
+ for (unsigned I = 0; I < NumDefaultEdges; ++I)
+ Default->removePredecessor(OrigBlock);
// Use the most popular block as the new default, reducing the number of
// cases.
@@ -510,7 +576,7 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI,
// If there are no cases left, just branch.
if (Cases.empty()) {
- BranchInst::Create(Default, CurBlock);
+ BranchInst::Create(Default, OrigBlock);
SI->eraseFromParent();
// As all the cases have been replaced with a single branch, only keep
// one entry in the PHI nodes.
@@ -518,12 +584,12 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI,
PopSucc->removePredecessor(OrigBlock);
return;
}
- }
- unsigned NrOfDefaults = (SI->getDefaultDest() == Default) ? 1 : 0;
- for (const auto &Case : SI->cases())
- if (Case.getCaseSuccessor() == Default)
- NrOfDefaults++;
+ // If the condition was a PHI node with the switch block as a predecessor
+ // removing predecessors may have caused the condition to be erased.
+ // Getting the condition value again here protects against that.
+ Val = SI->getCondition();
+ }
// Create a new, empty default block so that the new hierarchy of
// if-then statements go to this and the PHI nodes are happy.
@@ -537,14 +603,14 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI,
// If there are entries in any PHI nodes for the default edge, make sure
// to update them as well.
- fixPhis(Default, OrigBlock, NewDefault, NrOfDefaults);
+ fixPhis(Default, OrigBlock, NewDefault);
// Branch to our shiny new if-then stuff...
BranchInst::Create(SwitchBlock, OrigBlock);
// We are now done with the switch instruction, delete it.
BasicBlock *OldDefault = SI->getDefaultDest();
- CurBlock->getInstList().erase(SI);
+ OrigBlock->getInstList().erase(SI);
// If the Default block has no more predecessors just add it to DeleteList.
if (pred_begin(OldDefault) == pred_end(OldDefault))
diff --git a/lib/Transforms/Utils/Mem2Reg.cpp b/lib/Transforms/Utils/Mem2Reg.cpp
index 23145e584751..cd2c81b6abc8 100644
--- a/lib/Transforms/Utils/Mem2Reg.cpp
+++ b/lib/Transforms/Utils/Mem2Reg.cpp
@@ -1,9 +1,8 @@
//===- Mem2Reg.cpp - The -mem2reg pass, a wrapper around the Utils lib ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/Utils/MetaRenamer.cpp b/lib/Transforms/Utils/MetaRenamer.cpp
index 88d595ee02ab..c0b7edc547fd 100644
--- a/lib/Transforms/Utils/MetaRenamer.cpp
+++ b/lib/Transforms/Utils/MetaRenamer.cpp
@@ -1,9 +1,8 @@
//===- MetaRenamer.cpp - Rename everything with metasyntatic names --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/Utils/ModuleUtils.cpp b/lib/Transforms/Utils/ModuleUtils.cpp
index ae5e72ea4d30..c84beceee191 100644
--- a/lib/Transforms/Utils/ModuleUtils.cpp
+++ b/lib/Transforms/Utils/ModuleUtils.cpp
@@ -1,9 +1,8 @@
//===-- ModuleUtils.cpp - Functions to manipulate Modules -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -28,44 +27,24 @@ static void appendToGlobalArray(const char *Array, Module &M, Function *F,
// Get the current set of static global constructors and add the new ctor
// to the list.
SmallVector<Constant *, 16> CurrentCtors;
- StructType *EltTy;
+ StructType *EltTy = StructType::get(
+ IRB.getInt32Ty(), PointerType::getUnqual(FnTy), IRB.getInt8PtrTy());
if (GlobalVariable *GVCtor = M.getNamedGlobal(Array)) {
- ArrayType *ATy = cast<ArrayType>(GVCtor->getValueType());
- StructType *OldEltTy = cast<StructType>(ATy->getElementType());
- // Upgrade a 2-field global array type to the new 3-field format if needed.
- if (Data && OldEltTy->getNumElements() < 3)
- EltTy = StructType::get(IRB.getInt32Ty(), PointerType::getUnqual(FnTy),
- IRB.getInt8PtrTy());
- else
- EltTy = OldEltTy;
if (Constant *Init = GVCtor->getInitializer()) {
unsigned n = Init->getNumOperands();
CurrentCtors.reserve(n + 1);
- for (unsigned i = 0; i != n; ++i) {
- auto Ctor = cast<Constant>(Init->getOperand(i));
- if (EltTy != OldEltTy)
- Ctor =
- ConstantStruct::get(EltTy, Ctor->getAggregateElement((unsigned)0),
- Ctor->getAggregateElement(1),
- Constant::getNullValue(IRB.getInt8PtrTy()));
- CurrentCtors.push_back(Ctor);
- }
+ for (unsigned i = 0; i != n; ++i)
+ CurrentCtors.push_back(cast<Constant>(Init->getOperand(i)));
}
GVCtor->eraseFromParent();
- } else {
- // Use the new three-field struct if there isn't one already.
- EltTy = StructType::get(IRB.getInt32Ty(), PointerType::getUnqual(FnTy),
- IRB.getInt8PtrTy());
}
- // Build a 2 or 3 field global_ctor entry. We don't take a comdat key.
+ // Build a 3 field global_ctor entry. We don't take a comdat key.
Constant *CSVals[3];
CSVals[0] = IRB.getInt32(Priority);
CSVals[1] = F;
- // FIXME: Drop support for the two element form in LLVM 4.0.
- if (EltTy->getNumElements() >= 3)
- CSVals[2] = Data ? ConstantExpr::getPointerCast(Data, IRB.getInt8PtrTy())
- : Constant::getNullValue(IRB.getInt8PtrTy());
+ CSVals[2] = Data ? ConstantExpr::getPointerCast(Data, IRB.getInt8PtrTy())
+ : Constant::getNullValue(IRB.getInt8PtrTy());
Constant *RuntimeCtorInit =
ConstantStruct::get(EltTy, makeArrayRef(CSVals, EltTy->getNumElements()));
@@ -127,36 +106,24 @@ void llvm::appendToCompilerUsed(Module &M, ArrayRef<GlobalValue *> Values) {
appendToUsedList(M, "llvm.compiler.used", Values);
}
-Function *llvm::checkSanitizerInterfaceFunction(Constant *FuncOrBitcast) {
- if (isa<Function>(FuncOrBitcast))
- return cast<Function>(FuncOrBitcast);
- FuncOrBitcast->print(errs());
- errs() << '\n';
- std::string Err;
- raw_string_ostream Stream(Err);
- Stream << "Sanitizer interface function redefined: " << *FuncOrBitcast;
- report_fatal_error(Err);
-}
-
-Function *llvm::declareSanitizerInitFunction(Module &M, StringRef InitName,
- ArrayRef<Type *> InitArgTypes) {
+FunctionCallee
+llvm::declareSanitizerInitFunction(Module &M, StringRef InitName,
+ ArrayRef<Type *> InitArgTypes) {
assert(!InitName.empty() && "Expected init function name");
- Function *F = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+ return M.getOrInsertFunction(
InitName,
FunctionType::get(Type::getVoidTy(M.getContext()), InitArgTypes, false),
- AttributeList()));
- F->setLinkage(Function::ExternalLinkage);
- return F;
+ AttributeList());
}
-std::pair<Function *, Function *> llvm::createSanitizerCtorAndInitFunctions(
+std::pair<Function *, FunctionCallee> llvm::createSanitizerCtorAndInitFunctions(
Module &M, StringRef CtorName, StringRef InitName,
ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs,
StringRef VersionCheckName) {
assert(!InitName.empty() && "Expected init function name");
assert(InitArgs.size() == InitArgTypes.size() &&
"Sanitizer's init function expects different number of arguments");
- Function *InitFunction =
+ FunctionCallee InitFunction =
declareSanitizerInitFunction(M, InitName, InitArgTypes);
Function *Ctor = Function::Create(
FunctionType::get(Type::getVoidTy(M.getContext()), false),
@@ -165,20 +132,19 @@ std::pair<Function *, Function *> llvm::createSanitizerCtorAndInitFunctions(
IRBuilder<> IRB(ReturnInst::Create(M.getContext(), CtorBB));
IRB.CreateCall(InitFunction, InitArgs);
if (!VersionCheckName.empty()) {
- Function *VersionCheckFunction =
- checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- VersionCheckName, FunctionType::get(IRB.getVoidTy(), {}, false),
- AttributeList()));
+ FunctionCallee VersionCheckFunction = M.getOrInsertFunction(
+ VersionCheckName, FunctionType::get(IRB.getVoidTy(), {}, false),
+ AttributeList());
IRB.CreateCall(VersionCheckFunction, {});
}
return std::make_pair(Ctor, InitFunction);
}
-std::pair<Function *, Function *>
+std::pair<Function *, FunctionCallee>
llvm::getOrCreateSanitizerCtorAndInitFunctions(
Module &M, StringRef CtorName, StringRef InitName,
ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs,
- function_ref<void(Function *, Function *)> FunctionsCreatedCallback,
+ function_ref<void(Function *, FunctionCallee)> FunctionsCreatedCallback,
StringRef VersionCheckName) {
assert(!CtorName.empty() && "Expected ctor function name");
@@ -189,7 +155,8 @@ llvm::getOrCreateSanitizerCtorAndInitFunctions(
Ctor->getReturnType() == Type::getVoidTy(M.getContext()))
return {Ctor, declareSanitizerInitFunction(M, InitName, InitArgTypes)};
- Function *Ctor, *InitFunction;
+ Function *Ctor;
+ FunctionCallee InitFunction;
std::tie(Ctor, InitFunction) = llvm::createSanitizerCtorAndInitFunctions(
M, CtorName, InitName, InitArgTypes, InitArgs, VersionCheckName);
FunctionsCreatedCallback(Ctor, InitFunction);
@@ -208,9 +175,10 @@ Function *llvm::getOrCreateInitFunction(Module &M, StringRef Name) {
}
return F;
}
- Function *F = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- Name, AttributeList(), Type::getVoidTy(M.getContext())));
- F->setLinkage(Function::ExternalLinkage);
+ Function *F =
+ cast<Function>(M.getOrInsertFunction(Name, AttributeList(),
+ Type::getVoidTy(M.getContext()))
+ .getCallee());
appendToGlobalCtors(M, F, 0);
diff --git a/lib/Transforms/Utils/NameAnonGlobals.cpp b/lib/Transforms/Utils/NameAnonGlobals.cpp
index 34dc1cccdd5b..ac8991e9d475 100644
--- a/lib/Transforms/Utils/NameAnonGlobals.cpp
+++ b/lib/Transforms/Utils/NameAnonGlobals.cpp
@@ -1,9 +1,8 @@
//===- NameAnonGlobals.cpp - ThinLTO Support: Name Unnamed Globals --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/Utils/PredicateInfo.cpp b/lib/Transforms/Utils/PredicateInfo.cpp
index 585ce6b4c118..bdf24d80bd17 100644
--- a/lib/Transforms/Utils/PredicateInfo.cpp
+++ b/lib/Transforms/Utils/PredicateInfo.cpp
@@ -1,9 +1,8 @@
//===-- PredicateInfo.cpp - PredicateInfo Builder--------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------===//
//
@@ -474,7 +473,8 @@ void PredicateInfo::buildPredicateInfo() {
}
for (auto &Assume : AC.assumptions()) {
if (auto *II = dyn_cast_or_null<IntrinsicInst>(Assume))
- processAssume(II, II->getParent(), OpsToRename);
+ if (DT.isReachableFromEntry(II->getParent()))
+ processAssume(II, II->getParent(), OpsToRename);
}
// Now rename all our operations.
renameUses(OpsToRename);
@@ -489,8 +489,10 @@ void PredicateInfo::buildPredicateInfo() {
// tricky (FIXME).
static Function *getCopyDeclaration(Module *M, Type *Ty) {
std::string Name = "llvm.ssa.copy." + utostr((uintptr_t) Ty);
- return cast<Function>(M->getOrInsertFunction(
- Name, getType(M->getContext(), Intrinsic::ssa_copy, Ty)));
+ return cast<Function>(
+ M->getOrInsertFunction(Name,
+ getType(M->getContext(), Intrinsic::ssa_copy, Ty))
+ .getCallee());
}
// Given the renaming stack, make all the operands currently on the stack real
@@ -633,7 +635,7 @@ void PredicateInfo::renameUses(SmallPtrSetImpl<Value *> &OpSet) {
// uses in the same instruction do not have a strict sort order
// currently and will be considered equal. We could get rid of the
// stable sort by creating one if we wanted.
- std::stable_sort(OrderedUses.begin(), OrderedUses.end(), Compare);
+ llvm::stable_sort(OrderedUses, Compare);
SmallVector<ValueDFS, 8> RenameStack;
// For each use, sorted into dfs order, push values and replaces uses with
// top of stack, which will represent the reaching def.
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index 91e4f4254b3e..d58e1ea574ef 100644
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -1,9 +1,8 @@
//===- PromoteMemoryToRegister.cpp - Convert allocas to registers ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -114,7 +113,6 @@ struct AllocaInfo {
BasicBlock *OnlyBlock;
bool OnlyUsedInOneBlock;
- Value *AllocaPointerVal;
TinyPtrVector<DbgVariableIntrinsic *> DbgDeclares;
void clear() {
@@ -123,7 +121,6 @@ struct AllocaInfo {
OnlyStore = nullptr;
OnlyBlock = nullptr;
OnlyUsedInOneBlock = true;
- AllocaPointerVal = nullptr;
DbgDeclares.clear();
}
@@ -141,14 +138,12 @@ struct AllocaInfo {
if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
// Remember the basic blocks which define new values for the alloca
DefiningBlocks.push_back(SI->getParent());
- AllocaPointerVal = SI->getOperand(0);
OnlyStore = SI;
} else {
LoadInst *LI = cast<LoadInst>(User);
// Otherwise it must be a load instruction, keep track of variable
// reads.
UsingBlocks.push_back(LI->getParent());
- AllocaPointerVal = LI;
}
if (OnlyUsedInOneBlock) {
@@ -254,11 +249,6 @@ struct PromoteMem2Reg {
/// to.
DenseMap<PHINode *, unsigned> PhiToAllocaMap;
- /// If we are updating an AliasSetTracker, then for each alloca that is of
- /// pointer type, we keep track of what to copyValue to the inserted PHI
- /// nodes here.
- std::vector<Value *> PointerAllocaValues;
-
/// For each alloca, we keep track of the dbg.declare intrinsic that
/// describes it, if any, so that we can convert it to a dbg.value
/// intrinsic if the alloca gets promoted.
@@ -367,10 +357,8 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
for (auto UI = AI->user_begin(), E = AI->user_end(); UI != E;) {
Instruction *UserInst = cast<Instruction>(*UI++);
- if (!isa<LoadInst>(UserInst)) {
- assert(UserInst == OnlyStore && "Should only have load/stores");
+ if (UserInst == OnlyStore)
continue;
- }
LoadInst *LI = cast<LoadInst>(UserInst);
// Okay, if we have a load from the alloca, we want to replace it with the
@@ -390,8 +378,7 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
Info.UsingBlocks.push_back(StoreBB);
continue;
}
- } else if (LI->getParent() != StoreBB &&
- !DT.dominates(StoreBB, LI->getParent())) {
+ } else if (!DT.dominates(StoreBB, LI->getParent())) {
// If the load and store are in different blocks, use BB dominance to
// check their relationships. If the store doesn't dom the use, bail
// out.
@@ -429,14 +416,12 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false);
ConvertDebugDeclareToDebugValue(DII, Info.OnlyStore, DIB);
DII->eraseFromParent();
- LBI.deleteValue(DII);
}
// Remove the (now dead) store and alloca.
Info.OnlyStore->eraseFromParent();
LBI.deleteValue(Info.OnlyStore);
AI->eraseFromParent();
- LBI.deleteValue(AI);
return true;
}
@@ -488,11 +473,10 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
unsigned LoadIdx = LBI.getInstructionIndex(LI);
// Find the nearest store that has a lower index than this load.
- StoresByIndexTy::iterator I =
- std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(),
- std::make_pair(LoadIdx,
- static_cast<StoreInst *>(nullptr)),
- less_first());
+ StoresByIndexTy::iterator I = llvm::lower_bound(
+ StoresByIndex,
+ std::make_pair(LoadIdx, static_cast<StoreInst *>(nullptr)),
+ less_first());
if (I == StoresByIndex.begin()) {
if (StoresByIndex.empty())
// If there are no stores, the load takes the undef value.
@@ -535,13 +519,10 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
}
AI->eraseFromParent();
- LBI.deleteValue(AI);
// The alloca's debuginfo can be removed as well.
- for (DbgVariableIntrinsic *DII : Info.DbgDeclares) {
+ for (DbgVariableIntrinsic *DII : Info.DbgDeclares)
DII->eraseFromParent();
- LBI.deleteValue(DII);
- }
++NumLocalPromoted;
return true;
@@ -620,8 +601,8 @@ void PromoteMem2Reg::run() {
// dead phi nodes.
// Unique the set of defining blocks for efficient lookup.
- SmallPtrSet<BasicBlock *, 32> DefBlocks;
- DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end());
+ SmallPtrSet<BasicBlock *, 32> DefBlocks(Info.DefiningBlocks.begin(),
+ Info.DefiningBlocks.end());
// Determine which blocks the value is live in. These are blocks which lead
// to uses.
@@ -636,10 +617,9 @@ void PromoteMem2Reg::run() {
IDF.setDefiningBlocks(DefBlocks);
SmallVector<BasicBlock *, 32> PHIBlocks;
IDF.calculate(PHIBlocks);
- if (PHIBlocks.size() > 1)
- llvm::sort(PHIBlocks, [this](BasicBlock *A, BasicBlock *B) {
- return BBNumbers.lookup(A) < BBNumbers.lookup(B);
- });
+ llvm::sort(PHIBlocks, [this](BasicBlock *A, BasicBlock *B) {
+ return BBNumbers.find(A)->second < BBNumbers.find(B)->second;
+ });
unsigned CurrentVersion = 0;
for (BasicBlock *BB : PHIBlocks)
@@ -751,7 +731,7 @@ void PromoteMem2Reg::run() {
// basic blocks. Start by sorting the incoming predecessors for efficient
// access.
auto CompareBBNumbers = [this](BasicBlock *A, BasicBlock *B) {
- return BBNumbers.lookup(A) < BBNumbers.lookup(B);
+ return BBNumbers.find(A)->second < BBNumbers.find(B)->second;
};
llvm::sort(Preds, CompareBBNumbers);
@@ -759,9 +739,8 @@ void PromoteMem2Reg::run() {
// them from the Preds list.
for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i) {
// Do a log(n) search of the Preds list for the entry we want.
- SmallVectorImpl<BasicBlock *>::iterator EntIt = std::lower_bound(
- Preds.begin(), Preds.end(), SomePHI->getIncomingBlock(i),
- CompareBBNumbers);
+ SmallVectorImpl<BasicBlock *>::iterator EntIt = llvm::lower_bound(
+ Preds, SomePHI->getIncomingBlock(i), CompareBBNumbers);
assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i) &&
"PHI node has entry for a block which is not a predecessor!");
@@ -825,14 +804,11 @@ void PromoteMem2Reg::ComputeLiveInBlocks(
break;
}
- if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
- if (LI->getOperand(0) != AI)
- continue;
-
+ if (LoadInst *LI = dyn_cast<LoadInst>(I))
// Okay, we found a load before a store to the alloca. It is actually
// live into this block.
- break;
- }
+ if (LI->getOperand(0) == AI)
+ break;
}
}
diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp
index 9e5fb0e7172d..bffdd115d940 100644
--- a/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/lib/Transforms/Utils/SSAUpdater.cpp
@@ -1,9 +1,8 @@
//===- SSAUpdater.cpp - Unstructured SSA Update Tool ----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -350,8 +349,7 @@ LoadAndStorePromoter(ArrayRef<const Instruction *> Insts,
SSA.Initialize(SomeVal->getType(), BaseName);
}
-void LoadAndStorePromoter::
-run(const SmallVectorImpl<Instruction *> &Insts) const {
+void LoadAndStorePromoter::run(const SmallVectorImpl<Instruction *> &Insts) {
// First step: bucket up uses of the alloca by the block they occur in.
// This is important because we have to handle multiple defs/uses in a block
// ourselves: SSAUpdater is purely for cross-block references.
diff --git a/lib/Transforms/Utils/SSAUpdaterBulk.cpp b/lib/Transforms/Utils/SSAUpdaterBulk.cpp
index 397bac2940a4..917d5e0a1ef0 100644
--- a/lib/Transforms/Utils/SSAUpdaterBulk.cpp
+++ b/lib/Transforms/Utils/SSAUpdaterBulk.cpp
@@ -1,9 +1,8 @@
//===- SSAUpdaterBulk.cpp - Unstructured SSA Update Tool ------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/Utils/SanitizerStats.cpp b/lib/Transforms/Utils/SanitizerStats.cpp
index 8c23957ac43e..a1313c77ed77 100644
--- a/lib/Transforms/Utils/SanitizerStats.cpp
+++ b/lib/Transforms/Utils/SanitizerStats.cpp
@@ -1,9 +1,8 @@
//===- SanitizerStats.cpp - Sanitizer statistics gathering ----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -57,8 +56,8 @@ void SanitizerStatReport::create(IRBuilder<> &B, SanitizerStatKind SK) {
FunctionType *StatReportTy =
FunctionType::get(B.getVoidTy(), Int8PtrTy, false);
- Constant *StatReport = M->getOrInsertFunction(
- "__sanitizer_stat_report", StatReportTy);
+ FunctionCallee StatReport =
+ M->getOrInsertFunction("__sanitizer_stat_report", StatReportTy);
auto InitAddr = ConstantExpr::getGetElementPtr(
EmptyModuleStatsTy, ModuleStatsGV,
@@ -98,8 +97,8 @@ void SanitizerStatReport::finish() {
IRBuilder<> B(BB);
FunctionType *StatInitTy = FunctionType::get(VoidTy, Int8PtrTy, false);
- Constant *StatInit = M->getOrInsertFunction(
- "__sanitizer_stat_init", StatInitTy);
+ FunctionCallee StatInit =
+ M->getOrInsertFunction("__sanitizer_stat_init", StatInitTy);
B.CreateCall(StatInit, ConstantExpr::getBitCast(NewModuleStatsGV, Int8PtrTy));
B.CreateRetVoid();
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 03b73954321d..11651d040dc0 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -1,9 +1,8 @@
//===- SimplifyCFG.cpp - Code to perform CFG simplification ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -26,8 +25,9 @@
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
@@ -66,6 +66,7 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#include <algorithm>
#include <cassert>
@@ -292,9 +293,13 @@ isProfitableToFoldUnconditional(BranchInst *SI1, BranchInst *SI2,
/// will be the same as those coming in from ExistPred, an existing predecessor
/// of Succ.
static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
- BasicBlock *ExistPred) {
+ BasicBlock *ExistPred,
+ MemorySSAUpdater *MSSAU = nullptr) {
for (PHINode &PN : Succ->phis())
PN.addIncoming(PN.getIncomingValueForBlock(ExistPred), NewPred);
+ if (MSSAU)
+ if (auto *MPhi = MSSAU->getMemorySSA()->getMemoryAccess(Succ))
+ MPhi->addIncoming(MPhi->getIncomingValueForBlock(ExistPred), NewPred);
}
/// Compute an abstract "cost" of speculating the given instruction,
@@ -670,7 +675,8 @@ private:
} // end anonymous namespace
-static void EraseTerminatorAndDCECond(Instruction *TI) {
+static void EraseTerminatorAndDCECond(Instruction *TI,
+ MemorySSAUpdater *MSSAU = nullptr) {
Instruction *Cond = nullptr;
if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
Cond = dyn_cast<Instruction>(SI->getCondition());
@@ -683,7 +689,7 @@ static void EraseTerminatorAndDCECond(Instruction *TI) {
TI->eraseFromParent();
if (Cond)
- RecursivelyDeleteTriviallyDeadInstructions(Cond);
+ RecursivelyDeleteTriviallyDeadInstructions(Cond, nullptr, MSSAU);
}
/// Return true if the specified terminator checks
@@ -858,7 +864,7 @@ bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor(
return true;
}
- SwitchInst *SI = cast<SwitchInst>(TI);
+ SwitchInstProfUpdateWrapper SI = *cast<SwitchInst>(TI);
// Okay, TI has cases that are statically dead, prune them away.
SmallPtrSet<Constant *, 16> DeadCases;
for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
@@ -867,30 +873,13 @@ bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor(
LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
<< "Through successor TI: " << *TI);
- // Collect branch weights into a vector.
- SmallVector<uint32_t, 8> Weights;
- MDNode *MD = SI->getMetadata(LLVMContext::MD_prof);
- bool HasWeight = MD && (MD->getNumOperands() == 2 + SI->getNumCases());
- if (HasWeight)
- for (unsigned MD_i = 1, MD_e = MD->getNumOperands(); MD_i < MD_e;
- ++MD_i) {
- ConstantInt *CI = mdconst::extract<ConstantInt>(MD->getOperand(MD_i));
- Weights.push_back(CI->getValue().getZExtValue());
- }
for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) {
--i;
if (DeadCases.count(i->getCaseValue())) {
- if (HasWeight) {
- std::swap(Weights[i->getCaseIndex() + 1], Weights.back());
- Weights.pop_back();
- }
i->getCaseSuccessor()->removePredecessor(TI->getParent());
- SI->removeCase(i);
+ SI.removeCase(i);
}
}
- if (HasWeight && Weights.size() >= 2)
- setBranchWeights(SI, Weights);
-
LLVM_DEBUG(dbgs() << "Leaving: " << *TI << "\n");
return true;
}
@@ -1266,8 +1255,10 @@ static bool HoistThenElseCodeToIf(BranchInst *BI,
while (isa<DbgInfoIntrinsic>(I2))
I2 = &*BB2_Itr++;
}
+ // FIXME: Can we define a safety predicate for CallBr?
if (isa<PHINode>(I1) || !I1->isIdenticalToWhenDefined(I2) ||
- (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2)))
+ (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2)) ||
+ isa<CallBrInst>(I1))
return false;
BasicBlock *BIParent = BI->getParent();
@@ -1350,9 +1341,14 @@ static bool HoistThenElseCodeToIf(BranchInst *BI,
HoistTerminator:
// It may not be possible to hoist an invoke.
+ // FIXME: Can we define a safety predicate for CallBr?
if (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2))
return Changed;
+ // TODO: callbr hoisting currently disabled pending further study.
+ if (isa<CallBrInst>(I1))
+ return Changed;
+
for (BasicBlock *Succ : successors(BB1)) {
for (PHINode &PN : Succ->phis()) {
Value *BB1V = PN.getIncomingValueForBlock(BB1);
@@ -1432,9 +1428,10 @@ HoistTerminator:
static bool canSinkInstructions(
ArrayRef<Instruction *> Insts,
DenseMap<Instruction *, SmallVector<Value *, 4>> &PHIOperands) {
- // Prune out obviously bad instructions to move. Any non-store instruction
- // must have exactly one use, and we check later that use is by a single,
- // common PHI instruction in the successor.
+ // Prune out obviously bad instructions to move. Each instruction must have
+ // exactly zero or one use, and we check later that use is by a single, common
+ // PHI instruction in the successor.
+ bool HasUse = !Insts.front()->user_empty();
for (auto *I : Insts) {
// These instructions may change or break semantics if moved.
if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
@@ -1444,13 +1441,14 @@ static bool canSinkInstructions(
// Conservatively return false if I is an inline-asm instruction. Sinking
// and merging inline-asm instructions can potentially create arguments
// that cannot satisfy the inline-asm constraints.
- if (const auto *C = dyn_cast<CallInst>(I))
+ if (const auto *C = dyn_cast<CallBase>(I))
if (C->isInlineAsm())
return false;
- // Everything must have only one use too, apart from stores which
- // have no uses.
- if (!isa<StoreInst>(I) && !I->hasOneUse())
+ // Each instruction must have zero or one use.
+ if (HasUse && !I->hasOneUse())
+ return false;
+ if (!HasUse && !I->user_empty())
return false;
}
@@ -1459,11 +1457,11 @@ static bool canSinkInstructions(
if (!I->isSameOperationAs(I0))
return false;
- // All instructions in Insts are known to be the same opcode. If they aren't
- // stores, check the only user of each is a PHI or in the same block as the
- // instruction, because if a user is in the same block as an instruction
- // we're contemplating sinking, it must already be determined to be sinkable.
- if (!isa<StoreInst>(I0)) {
+ // All instructions in Insts are known to be the same opcode. If they have a
+ // use, check that the only user is a PHI or in the same block as the
+ // instruction, because if a user is in the same block as an instruction we're
+ // contemplating sinking, it must already be determined to be sinkable.
+ if (HasUse) {
auto *PNUse = dyn_cast<PHINode>(*I0->user_begin());
auto *Succ = I0->getParent()->getTerminator()->getSuccessor(0);
if (!all_of(Insts, [&PNUse,&Succ](const Instruction *I) -> bool {
@@ -1507,7 +1505,7 @@ static bool canSinkInstructions(
// We can't create a PHI from this GEP.
return false;
// Don't create indirect calls! The called value is the final operand.
- if ((isa<CallInst>(I0) || isa<InvokeInst>(I0)) && OI == OE - 1) {
+ if (isa<CallBase>(I0) && OI == OE - 1) {
// FIXME: if the call was *already* indirect, we should do this.
return false;
}
@@ -1541,7 +1539,7 @@ static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
// it is slightly over-aggressive - it gets confused by commutative instructions
// so double-check it here.
Instruction *I0 = Insts.front();
- if (!isa<StoreInst>(I0)) {
+ if (!I0->user_empty()) {
auto *PNUse = dyn_cast<PHINode>(*I0->user_begin());
if (!all_of(Insts, [&PNUse](const Instruction *I) -> bool {
auto *U = cast<Instruction>(*I->user_begin());
@@ -1599,11 +1597,10 @@ static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
I0->andIRFlags(I);
}
- if (!isa<StoreInst>(I0)) {
+ if (!I0->user_empty()) {
// canSinkLastInstruction checked that all instructions were used by
// one and only one PHI node. Find that now, RAUW it to our common
// instruction and nuke it.
- assert(I0->hasOneUse());
auto *PN = cast<PHINode>(*I0->user_begin());
PN->replaceAllUsesWith(I0);
PN->eraseFromParent();
@@ -2203,7 +2200,8 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL,
BasicBlock *EdgeBB =
BasicBlock::Create(BB->getContext(), RealDest->getName() + ".critedge",
RealDest->getParent(), RealDest);
- BranchInst::Create(RealDest, EdgeBB);
+ BranchInst *CritEdgeBranch = BranchInst::Create(RealDest, EdgeBB);
+ CritEdgeBranch->setDebugLoc(BI->getDebugLoc());
// Update PHI nodes.
AddPredecessorToBlock(RealDest, EdgeBB, BB);
@@ -2539,7 +2537,8 @@ static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI,
/// If this basic block is simple enough, and if a predecessor branches to us
/// and one of our successors, fold the block into the predecessor and use
/// logical operations to pick the right destination.
-bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
+bool llvm::FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU,
+ unsigned BonusInstThreshold) {
BasicBlock *BB = BI->getParent();
const unsigned PredCount = pred_size(BB);
@@ -2594,7 +2593,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
// unconditionally. We denote all involved instructions except the condition
// as "bonus instructions", and only allow this transformation when the
// number of the bonus instructions we'll need to create when cloning into
- // each predecessor does not exceed a certain threshold.
+ // each predecessor does not exceed a certain threshold.
unsigned NumBonusInsts = 0;
for (auto I = BB->begin(); Cond != &*I; ++I) {
// Ignore dbg intrinsics.
@@ -2611,7 +2610,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
// and Cond.
// Account for the cost of duplicating this instruction into each
- // predecessor.
+ // predecessor.
NumBonusInsts += PredCount;
// Early exits once we reach the limit.
if (NumBonusInsts > BonusInstThreshold)
@@ -2750,7 +2749,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
(SuccFalseWeight + SuccTrueWeight) +
PredTrueWeight * SuccFalseWeight);
}
- AddPredecessorToBlock(TrueDest, PredBlock, BB);
+ AddPredecessorToBlock(TrueDest, PredBlock, BB, MSSAU);
PBI->setSuccessor(0, TrueDest);
}
if (PBI->getSuccessor(1) == BB) {
@@ -2765,7 +2764,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
// FalseWeight is FalseWeight for PBI * FalseWeight for BI.
NewWeights.push_back(PredFalseWeight * SuccFalseWeight);
}
- AddPredecessorToBlock(FalseDest, PredBlock, BB);
+ AddPredecessorToBlock(FalseDest, PredBlock, BB, MSSAU);
PBI->setSuccessor(1, FalseDest);
}
if (NewWeights.size() == 2) {
@@ -2810,12 +2809,17 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
}
}
// Update PHI Node.
- PHIs[i]->setIncomingValue(PHIs[i]->getBasicBlockIndex(PBI->getParent()),
- MergedCond);
+ PHIs[i]->setIncomingValueForBlock(PBI->getParent(), MergedCond);
}
+
+ // PBI is changed to branch to TrueDest below. Remove itself from
+ // potential phis from all other successors.
+ if (MSSAU)
+ MSSAU->changeCondBranchToUnconditionalTo(PBI, TrueDest);
+
// Change PBI from Conditional to Unconditional.
BranchInst *New_PBI = BranchInst::Create(TrueDest, PBI);
- EraseTerminatorAndDCECond(PBI);
+ EraseTerminatorAndDCECond(PBI, MSSAU);
PBI = New_PBI;
}
@@ -3430,7 +3434,7 @@ static bool SimplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond,
KeepEdge2 = nullptr;
else
Succ->removePredecessor(OldTerm->getParent(),
- /*DontDeleteUselessPHIs=*/true);
+ /*KeepOneInputPHIs=*/true);
}
IRBuilder<> Builder(OldTerm);
@@ -3622,20 +3626,16 @@ bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt(
// the switch to the merge point on the compared value.
BasicBlock *NewBB =
BasicBlock::Create(BB->getContext(), "switch.edge", BB->getParent(), BB);
- SmallVector<uint64_t, 8> Weights;
- bool HasWeights = HasBranchWeights(SI);
- if (HasWeights) {
- GetBranchWeights(SI, Weights);
- if (Weights.size() == 1 + SI->getNumCases()) {
- // Split weight for default case to case for "Cst".
- Weights[0] = (Weights[0] + 1) >> 1;
- Weights.push_back(Weights[0]);
-
- SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
- setBranchWeights(SI, MDWeights);
+ {
+ SwitchInstProfUpdateWrapper SIW(*SI);
+ auto W0 = SIW.getSuccessorWeight(0);
+ SwitchInstProfUpdateWrapper::CaseWeightOpt NewW;
+ if (W0) {
+ NewW = ((uint64_t(*W0) + 1) >> 1);
+ SIW.setSuccessorWeight(0, *NewW);
}
+ SIW.addCase(Cst, NewBB, NewW);
}
- SI->addCase(Cst, NewBB);
// NewBB branches to the phi block, add the uncond branch and the phi entry.
Builder.SetInsertPoint(NewBB);
@@ -4184,24 +4184,28 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
Changed = true;
}
} else {
+ Value* Cond = BI->getCondition();
if (BI->getSuccessor(0) == BB) {
+ Builder.CreateAssumption(Builder.CreateNot(Cond));
Builder.CreateBr(BI->getSuccessor(1));
EraseTerminatorAndDCECond(BI);
} else if (BI->getSuccessor(1) == BB) {
+ Builder.CreateAssumption(Cond);
Builder.CreateBr(BI->getSuccessor(0));
EraseTerminatorAndDCECond(BI);
Changed = true;
}
}
} else if (auto *SI = dyn_cast<SwitchInst>(TI)) {
- for (auto i = SI->case_begin(), e = SI->case_end(); i != e;) {
+ SwitchInstProfUpdateWrapper SU(*SI);
+ for (auto i = SU->case_begin(), e = SU->case_end(); i != e;) {
if (i->getCaseSuccessor() != BB) {
++i;
continue;
}
- BB->removePredecessor(SI->getParent());
- i = SI->removeCase(i);
- e = SI->case_end();
+ BB->removePredecessor(SU->getParent());
+ i = SU.removeCase(i);
+ e = SU->case_end();
Changed = true;
}
} else if (auto *II = dyn_cast<InvokeInst>(TI)) {
@@ -4435,33 +4439,20 @@ static bool eliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC,
return true;
}
- SmallVector<uint64_t, 8> Weights;
- bool HasWeight = HasBranchWeights(SI);
- if (HasWeight) {
- GetBranchWeights(SI, Weights);
- HasWeight = (Weights.size() == 1 + SI->getNumCases());
- }
+ if (DeadCases.empty())
+ return false;
- // Remove dead cases from the switch.
+ SwitchInstProfUpdateWrapper SIW(*SI);
for (ConstantInt *DeadCase : DeadCases) {
SwitchInst::CaseIt CaseI = SI->findCaseValue(DeadCase);
assert(CaseI != SI->case_default() &&
"Case was not found. Probably mistake in DeadCases forming.");
- if (HasWeight) {
- std::swap(Weights[CaseI->getCaseIndex() + 1], Weights.back());
- Weights.pop_back();
- }
-
// Prune unused values from PHI nodes.
CaseI->getCaseSuccessor()->removePredecessor(SI->getParent());
- SI->removeCase(CaseI);
- }
- if (HasWeight && Weights.size() >= 2) {
- SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
- setBranchWeights(SI, MDWeights);
+ SIW.removeCase(CaseI);
}
- return !DeadCases.empty();
+ return true;
}
/// If BB would be eligible for simplification by
@@ -5034,7 +5025,7 @@ SwitchLookupTable::SwitchLookupTable(
ArrayType *ArrayTy = ArrayType::get(ValueType, TableSize);
Constant *Initializer = ConstantArray::get(ArrayTy, TableContents);
- Array = new GlobalVariable(M, ArrayTy, /*constant=*/true,
+ Array = new GlobalVariable(M, ArrayTy, /*isConstant=*/true,
GlobalVariable::PrivateLinkage, Initializer,
"switch.table." + FuncName);
Array->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
@@ -5091,7 +5082,9 @@ Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) {
Value *GEPIndices[] = {Builder.getInt32(0), Index};
Value *GEP = Builder.CreateInBoundsGEP(Array->getValueType(), Array,
GEPIndices, "switch.gep");
- return Builder.CreateLoad(GEP, "switch.load");
+ return Builder.CreateLoad(
+ cast<ArrayType>(Array->getValueType())->getElementType(), GEP,
+ "switch.load");
}
}
llvm_unreachable("Unknown lookup table kind!");
@@ -5425,7 +5418,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
// We cached PHINodes in PHIs. To avoid accessing deleted PHINodes later,
// do not delete PHINodes here.
SI->getDefaultDest()->removePredecessor(SI->getParent(),
- /*DontDeleteUselessPHIs=*/true);
+ /*KeepOneInputPHIs=*/true);
}
bool ReturnedEarly = false;
@@ -5533,25 +5526,23 @@ static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
// Now we have signed numbers that have been shifted so that, given enough
// precision, there are no negative values. Since the rest of the transform
// is bitwise only, we switch now to an unsigned representation.
- uint64_t GCD = 0;
- for (auto &V : Values)
- GCD = GreatestCommonDivisor64(GCD, (uint64_t)V);
- // This transform can be done speculatively because it is so cheap - it results
- // in a single rotate operation being inserted. This can only happen if the
- // factor extracted is a power of 2.
- // FIXME: If the GCD is an odd number we can multiply by the multiplicative
- // inverse of GCD and then perform this transform.
+ // This transform can be done speculatively because it is so cheap - it
+ // results in a single rotate operation being inserted.
// FIXME: It's possible that optimizing a switch on powers of two might also
// be beneficial - flag values are often powers of two and we could use a CLZ
// as the key function.
- if (GCD <= 1 || !isPowerOf2_64(GCD))
- // No common divisor found or too expensive to compute key function.
- return false;
- unsigned Shift = Log2_64(GCD);
+ // countTrailingZeros(0) returns 64. As Values is guaranteed to have more than
+ // one element and LLVM disallows duplicate cases, Shift is guaranteed to be
+ // less than 64.
+ unsigned Shift = 64;
for (auto &V : Values)
- V = (int64_t)((uint64_t)V >> Shift);
+ Shift = std::min(Shift, countTrailingZeros((uint64_t)V));
+ assert(Shift < 64);
+ if (Shift > 0)
+ for (auto &V : Values)
+ V = (int64_t)((uint64_t)V >> Shift);
if (!isSwitchDense(Values))
// Transform didn't create a dense switch.
@@ -5796,7 +5787,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI,
// branches to us and our successor, fold the comparison into the
// predecessor and use logical operations to update the incoming value
// for PHI nodes in common successor.
- if (FoldBranchToCommonDest(BI, Options.BonusInstThreshold))
+ if (FoldBranchToCommonDest(BI, nullptr, Options.BonusInstThreshold))
return requestResimplify();
return false;
}
@@ -5860,7 +5851,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
// If this basic block is ONLY a compare and a branch, and if a predecessor
// branches to us and one of our successors, fold the comparison into the
// predecessor and use logical operations to pick the right destination.
- if (FoldBranchToCommonDest(BI, Options.BonusInstThreshold))
+ if (FoldBranchToCommonDest(BI, nullptr, Options.BonusInstThreshold))
return requestResimplify();
// We have a conditional branch to two blocks that are only reachable
diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp
index 7faf291e73d9..cbb114f9a47a 100644
--- a/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -1,9 +1,8 @@
//===-- SimplifyIndVar.cpp - Induction variable simplification ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -23,6 +22,7 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -80,7 +80,8 @@ namespace {
bool eliminateIdentitySCEV(Instruction *UseInst, Instruction *IVOperand);
bool replaceIVUserWithLoopInvariant(Instruction *UseInst);
- bool eliminateOverflowIntrinsic(CallInst *CI);
+ bool eliminateOverflowIntrinsic(WithOverflowInst *WO);
+ bool eliminateSaturatingIntrinsic(SaturatingInst *SI);
bool eliminateTrunc(TruncInst *TI);
bool eliminateIVUser(Instruction *UseInst, Instruction *IVOperand);
bool makeIVComparisonInvariant(ICmpInst *ICmp, Value *IVOperand);
@@ -401,61 +402,29 @@ void SimplifyIndvar::simplifyIVRemainder(BinaryOperator *Rem, Value *IVOperand,
replaceSRemWithURem(Rem);
}
-bool SimplifyIndvar::eliminateOverflowIntrinsic(CallInst *CI) {
- auto *F = CI->getCalledFunction();
- if (!F)
- return false;
-
- typedef const SCEV *(ScalarEvolution::*OperationFunctionTy)(
- const SCEV *, const SCEV *, SCEV::NoWrapFlags, unsigned);
- typedef const SCEV *(ScalarEvolution::*ExtensionFunctionTy)(
- const SCEV *, Type *, unsigned);
-
- OperationFunctionTy Operation;
- ExtensionFunctionTy Extension;
-
- Instruction::BinaryOps RawOp;
-
- // We always have exactly one of nsw or nuw. If NoSignedOverflow is false, we
- // have nuw.
- bool NoSignedOverflow;
-
- switch (F->getIntrinsicID()) {
+static bool willNotOverflow(ScalarEvolution *SE, Instruction::BinaryOps BinOp,
+ bool Signed, const SCEV *LHS, const SCEV *RHS) {
+ const SCEV *(ScalarEvolution::*Operation)(const SCEV *, const SCEV *,
+ SCEV::NoWrapFlags, unsigned);
+ switch (BinOp) {
default:
- return false;
-
- case Intrinsic::sadd_with_overflow:
- Operation = &ScalarEvolution::getAddExpr;
- Extension = &ScalarEvolution::getSignExtendExpr;
- RawOp = Instruction::Add;
- NoSignedOverflow = true;
- break;
-
- case Intrinsic::uadd_with_overflow:
+ llvm_unreachable("Unsupported binary op");
+ case Instruction::Add:
Operation = &ScalarEvolution::getAddExpr;
- Extension = &ScalarEvolution::getZeroExtendExpr;
- RawOp = Instruction::Add;
- NoSignedOverflow = false;
break;
-
- case Intrinsic::ssub_with_overflow:
+ case Instruction::Sub:
Operation = &ScalarEvolution::getMinusSCEV;
- Extension = &ScalarEvolution::getSignExtendExpr;
- RawOp = Instruction::Sub;
- NoSignedOverflow = true;
break;
-
- case Intrinsic::usub_with_overflow:
- Operation = &ScalarEvolution::getMinusSCEV;
- Extension = &ScalarEvolution::getZeroExtendExpr;
- RawOp = Instruction::Sub;
- NoSignedOverflow = false;
+ case Instruction::Mul:
+ Operation = &ScalarEvolution::getMulExpr;
break;
}
- const SCEV *LHS = SE->getSCEV(CI->getArgOperand(0));
- const SCEV *RHS = SE->getSCEV(CI->getArgOperand(1));
+ const SCEV *(ScalarEvolution::*Extension)(const SCEV *, Type *, unsigned) =
+ Signed ? &ScalarEvolution::getSignExtendExpr
+ : &ScalarEvolution::getZeroExtendExpr;
+ // Check ext(LHS op RHS) == ext(LHS) op ext(RHS)
auto *NarrowTy = cast<IntegerType>(LHS->getType());
auto *WideTy =
IntegerType::get(NarrowTy->getContext(), NarrowTy->getBitWidth() * 2);
@@ -466,27 +435,32 @@ bool SimplifyIndvar::eliminateOverflowIntrinsic(CallInst *CI) {
const SCEV *B =
(SE->*Operation)((SE->*Extension)(LHS, WideTy, 0),
(SE->*Extension)(RHS, WideTy, 0), SCEV::FlagAnyWrap, 0);
+ return A == B;
+}
- if (A != B)
+bool SimplifyIndvar::eliminateOverflowIntrinsic(WithOverflowInst *WO) {
+ const SCEV *LHS = SE->getSCEV(WO->getLHS());
+ const SCEV *RHS = SE->getSCEV(WO->getRHS());
+ if (!willNotOverflow(SE, WO->getBinaryOp(), WO->isSigned(), LHS, RHS))
return false;
// Proved no overflow, nuke the overflow check and, if possible, the overflow
// intrinsic as well.
BinaryOperator *NewResult = BinaryOperator::Create(
- RawOp, CI->getArgOperand(0), CI->getArgOperand(1), "", CI);
+ WO->getBinaryOp(), WO->getLHS(), WO->getRHS(), "", WO);
- if (NoSignedOverflow)
+ if (WO->isSigned())
NewResult->setHasNoSignedWrap(true);
else
NewResult->setHasNoUnsignedWrap(true);
SmallVector<ExtractValueInst *, 4> ToDelete;
- for (auto *U : CI->users()) {
+ for (auto *U : WO->users()) {
if (auto *EVI = dyn_cast<ExtractValueInst>(U)) {
if (EVI->getIndices()[0] == 1)
- EVI->replaceAllUsesWith(ConstantInt::getFalse(CI->getContext()));
+ EVI->replaceAllUsesWith(ConstantInt::getFalse(WO->getContext()));
else {
assert(EVI->getIndices()[0] == 0 && "Only two possibilities!");
EVI->replaceAllUsesWith(NewResult);
@@ -498,9 +472,28 @@ bool SimplifyIndvar::eliminateOverflowIntrinsic(CallInst *CI) {
for (auto *EVI : ToDelete)
EVI->eraseFromParent();
- if (CI->use_empty())
- CI->eraseFromParent();
+ if (WO->use_empty())
+ WO->eraseFromParent();
+
+ return true;
+}
+
+bool SimplifyIndvar::eliminateSaturatingIntrinsic(SaturatingInst *SI) {
+ const SCEV *LHS = SE->getSCEV(SI->getLHS());
+ const SCEV *RHS = SE->getSCEV(SI->getRHS());
+ if (!willNotOverflow(SE, SI->getBinaryOp(), SI->isSigned(), LHS, RHS))
+ return false;
+
+ BinaryOperator *BO = BinaryOperator::Create(
+ SI->getBinaryOp(), SI->getLHS(), SI->getRHS(), SI->getName(), SI);
+ if (SI->isSigned())
+ BO->setHasNoSignedWrap();
+ else
+ BO->setHasNoUnsignedWrap();
+ SI->replaceAllUsesWith(BO);
+ DeadInsts.emplace_back(SI);
+ Changed = true;
return true;
}
@@ -548,20 +541,19 @@ bool SimplifyIndvar::eliminateTrunc(TruncInst *TI) {
if (isa<Instruction>(U) &&
!DT->isReachableFromEntry(cast<Instruction>(U)->getParent()))
continue;
- if (ICmpInst *ICI = dyn_cast<ICmpInst>(U)) {
- if (ICI->getOperand(0) == TI && L->isLoopInvariant(ICI->getOperand(1))) {
- assert(L->contains(ICI->getParent()) && "LCSSA form broken?");
- // If we cannot get rid of trunc, bail.
- if (ICI->isSigned() && !DoesSExtCollapse)
- return false;
- if (ICI->isUnsigned() && !DoesZExtCollapse)
- return false;
- // For equality, either signed or unsigned works.
- ICmpUsers.push_back(ICI);
- } else
- return false;
- } else
+ ICmpInst *ICI = dyn_cast<ICmpInst>(U);
+ if (!ICI) return false;
+ assert(L->contains(ICI->getParent()) && "LCSSA form broken?");
+ if (!(ICI->getOperand(0) == TI && L->isLoopInvariant(ICI->getOperand(1))) &&
+ !(ICI->getOperand(1) == TI && L->isLoopInvariant(ICI->getOperand(0))))
return false;
+ // If we cannot get rid of trunc, bail.
+ if (ICI->isSigned() && !DoesSExtCollapse)
+ return false;
+ if (ICI->isUnsigned() && !DoesZExtCollapse)
+ return false;
+ // For equality, either signed or unsigned works.
+ ICmpUsers.push_back(ICI);
}
auto CanUseZExt = [&](ICmpInst *ICI) {
@@ -584,7 +576,8 @@ bool SimplifyIndvar::eliminateTrunc(TruncInst *TI) {
};
// Replace all comparisons against trunc with comparisons against IV.
for (auto *ICI : ICmpUsers) {
- auto *Op1 = ICI->getOperand(1);
+ bool IsSwapped = L->isLoopInvariant(ICI->getOperand(0));
+ auto *Op1 = IsSwapped ? ICI->getOperand(0) : ICI->getOperand(1);
Instruction *Ext = nullptr;
// For signed/unsigned predicate, replace the old comparison with comparison
// of immediate IV against sext/zext of the invariant argument. If we can
@@ -593,6 +586,7 @@ bool SimplifyIndvar::eliminateTrunc(TruncInst *TI) {
// TODO: If we see a signed comparison which can be turned into unsigned,
// we can do it here for canonicalization purposes.
ICmpInst::Predicate Pred = ICI->getPredicate();
+ if (IsSwapped) Pred = ICmpInst::getSwappedPredicate(Pred);
if (CanUseZExt(ICI)) {
assert(DoesZExtCollapse && "Unprofitable zext?");
Ext = new ZExtInst(Op1, IVTy, "zext", ICI);
@@ -636,8 +630,12 @@ bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst,
return eliminateSDiv(Bin);
}
- if (auto *CI = dyn_cast<CallInst>(UseInst))
- if (eliminateOverflowIntrinsic(CI))
+ if (auto *WO = dyn_cast<WithOverflowInst>(UseInst))
+ if (eliminateOverflowIntrinsic(WO))
+ return true;
+
+ if (auto *SI = dyn_cast<SaturatingInst>(UseInst))
+ if (eliminateSaturatingIntrinsic(SI))
return true;
if (auto *TI = dyn_cast<TruncInst>(UseInst))
@@ -730,59 +728,31 @@ bool SimplifyIndvar::eliminateIdentitySCEV(Instruction *UseInst,
/// unsigned-overflow. Returns true if anything changed, false otherwise.
bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO,
Value *IVOperand) {
-
// Fastpath: we don't have any work to do if `BO` is `nuw` and `nsw`.
if (BO->hasNoUnsignedWrap() && BO->hasNoSignedWrap())
return false;
- const SCEV *(ScalarEvolution::*GetExprForBO)(const SCEV *, const SCEV *,
- SCEV::NoWrapFlags, unsigned);
- switch (BO->getOpcode()) {
- default:
+ if (BO->getOpcode() != Instruction::Add &&
+ BO->getOpcode() != Instruction::Sub &&
+ BO->getOpcode() != Instruction::Mul)
return false;
- case Instruction::Add:
- GetExprForBO = &ScalarEvolution::getAddExpr;
- break;
-
- case Instruction::Sub:
- GetExprForBO = &ScalarEvolution::getMinusSCEV;
- break;
-
- case Instruction::Mul:
- GetExprForBO = &ScalarEvolution::getMulExpr;
- break;
- }
-
- unsigned BitWidth = cast<IntegerType>(BO->getType())->getBitWidth();
- Type *WideTy = IntegerType::get(BO->getContext(), BitWidth * 2);
const SCEV *LHS = SE->getSCEV(BO->getOperand(0));
const SCEV *RHS = SE->getSCEV(BO->getOperand(1));
-
bool Changed = false;
- if (!BO->hasNoUnsignedWrap()) {
- const SCEV *ExtendAfterOp = SE->getZeroExtendExpr(SE->getSCEV(BO), WideTy);
- const SCEV *OpAfterExtend = (SE->*GetExprForBO)(
- SE->getZeroExtendExpr(LHS, WideTy), SE->getZeroExtendExpr(RHS, WideTy),
- SCEV::FlagAnyWrap, 0u);
- if (ExtendAfterOp == OpAfterExtend) {
- BO->setHasNoUnsignedWrap();
- SE->forgetValue(BO);
- Changed = true;
- }
+ if (!BO->hasNoUnsignedWrap() &&
+ willNotOverflow(SE, BO->getOpcode(), /* Signed */ false, LHS, RHS)) {
+ BO->setHasNoUnsignedWrap();
+ SE->forgetValue(BO);
+ Changed = true;
}
- if (!BO->hasNoSignedWrap()) {
- const SCEV *ExtendAfterOp = SE->getSignExtendExpr(SE->getSCEV(BO), WideTy);
- const SCEV *OpAfterExtend = (SE->*GetExprForBO)(
- SE->getSignExtendExpr(LHS, WideTy), SE->getSignExtendExpr(RHS, WideTy),
- SCEV::FlagAnyWrap, 0u);
- if (ExtendAfterOp == OpAfterExtend) {
- BO->setHasNoSignedWrap();
- SE->forgetValue(BO);
- Changed = true;
- }
+ if (!BO->hasNoSignedWrap() &&
+ willNotOverflow(SE, BO->getOpcode(), /* Signed */ true, LHS, RHS)) {
+ BO->setHasNoSignedWrap();
+ SE->forgetValue(BO);
+ Changed = true;
}
return Changed;
diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 1bb26caa2af2..e0def81d5eee 100644
--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -1,9 +1,8 @@
//===------ SimplifyLibCalls.cpp - Library calls simplifier ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -17,8 +16,10 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -35,6 +36,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
+#include "llvm/Transforms/Utils/SizeOpts.h"
using namespace llvm;
using namespace PatternMatch;
@@ -105,6 +107,12 @@ static bool callHasFloatingPointArgument(const CallInst *CI) {
});
}
+static bool callHasFP128Argument(const CallInst *CI) {
+ return any_of(CI->operands(), [](const Use &OI) {
+ return OI->getType()->isFP128Ty();
+ });
+}
+
static Value *convertStrToNumber(CallInst *CI, StringRef &Str, int64_t Base) {
if (Base < 2 || Base > 36)
// handle special zero base
@@ -334,11 +342,12 @@ Value *LibCallSimplifier::optimizeStrCmp(CallInst *CI, IRBuilder<> &B) {
return ConstantInt::get(CI->getType(), Str1.compare(Str2));
if (HasStr1 && Str1.empty()) // strcmp("", x) -> -*x
- return B.CreateNeg(
- B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"), CI->getType()));
+ return B.CreateNeg(B.CreateZExt(
+ B.CreateLoad(B.getInt8Ty(), Str2P, "strcmpload"), CI->getType()));
if (HasStr2 && Str2.empty()) // strcmp(x,"") -> *x
- return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType());
+ return B.CreateZExt(B.CreateLoad(B.getInt8Ty(), Str1P, "strcmpload"),
+ CI->getType());
// strcmp(P, "x") -> memcmp(P, "x", 2)
uint64_t Len1 = GetStringLength(Str1P);
@@ -398,11 +407,12 @@ Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilder<> &B) {
}
if (HasStr1 && Str1.empty()) // strncmp("", x, n) -> -*x
- return B.CreateNeg(
- B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"), CI->getType()));
+ return B.CreateNeg(B.CreateZExt(
+ B.CreateLoad(B.getInt8Ty(), Str2P, "strcmpload"), CI->getType()));
if (HasStr2 && Str2.empty()) // strncmp(x, "", n) -> *x
- return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType());
+ return B.CreateZExt(B.CreateLoad(B.getInt8Ty(), Str1P, "strcmpload"),
+ CI->getType());
uint64_t Len1 = GetStringLength(Str1P);
uint64_t Len2 = GetStringLength(Str2P);
@@ -591,7 +601,8 @@ Value *LibCallSimplifier::optimizeStringLength(CallInst *CI, IRBuilder<> &B,
// strlen(x) != 0 --> *x != 0
// strlen(x) == 0 --> *x == 0
if (isOnlyUsedInZeroEqualityComparison(CI))
- return B.CreateZExt(B.CreateLoad(Src, "strlenfirst"), CI->getType());
+ return B.CreateZExt(B.CreateLoad(B.getIntNTy(CharSize), Src, "strlenfirst"),
+ CI->getType());
return nullptr;
}
@@ -735,7 +746,8 @@ Value *LibCallSimplifier::optimizeStrStr(CallInst *CI, IRBuilder<> &B) {
// strstr("abcd", "bc") -> gep((char*)"abcd", 1)
Value *Result = castToCStr(CI->getArgOperand(0), B);
- Result = B.CreateConstInBoundsGEP1_64(Result, Offset, "strstr");
+ Result =
+ B.CreateConstInBoundsGEP1_64(B.getInt8Ty(), Result, Offset, "strstr");
return B.CreateBitCast(Result, CI->getType());
}
@@ -773,7 +785,8 @@ Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilder<> &B) {
// It would be really nice to reuse switch lowering here but we can't change
// the CFG at this point.
//
- // memchr("\r\n", C, 2) != nullptr -> (C & ((1 << '\r') | (1 << '\n'))) != 0
+ // memchr("\r\n", C, 2) != nullptr -> (1 << C & ((1 << '\r') | (1 << '\n')))
+ // != 0
// after bounds check.
if (!CharC && !Str.empty() && isOnlyUsedInZeroEqualityComparison(CI)) {
unsigned char Max =
@@ -828,27 +841,20 @@ Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilder<> &B) {
return B.CreateGEP(B.getInt8Ty(), SrcStr, B.getInt64(I), "memchr");
}
-Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) {
- Value *LHS = CI->getArgOperand(0), *RHS = CI->getArgOperand(1);
-
- if (LHS == RHS) // memcmp(s,s,x) -> 0
- return Constant::getNullValue(CI->getType());
-
- // Make sure we have a constant length.
- ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getArgOperand(2));
- if (!LenC)
- return nullptr;
-
- uint64_t Len = LenC->getZExtValue();
+static Value *optimizeMemCmpConstantSize(CallInst *CI, Value *LHS, Value *RHS,
+ uint64_t Len, IRBuilder<> &B,
+ const DataLayout &DL) {
if (Len == 0) // memcmp(s1,s2,0) -> 0
return Constant::getNullValue(CI->getType());
// memcmp(S1,S2,1) -> *(unsigned char*)LHS - *(unsigned char*)RHS
if (Len == 1) {
- Value *LHSV = B.CreateZExt(B.CreateLoad(castToCStr(LHS, B), "lhsc"),
- CI->getType(), "lhsv");
- Value *RHSV = B.CreateZExt(B.CreateLoad(castToCStr(RHS, B), "rhsc"),
- CI->getType(), "rhsv");
+ Value *LHSV =
+ B.CreateZExt(B.CreateLoad(B.getInt8Ty(), castToCStr(LHS, B), "lhsc"),
+ CI->getType(), "lhsv");
+ Value *RHSV =
+ B.CreateZExt(B.CreateLoad(B.getInt8Ty(), castToCStr(RHS, B), "rhsc"),
+ CI->getType(), "rhsv");
return B.CreateSub(LHSV, RHSV, "chardiff");
}
@@ -878,12 +884,12 @@ Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) {
if (!LHSV) {
Type *LHSPtrTy =
IntType->getPointerTo(LHS->getType()->getPointerAddressSpace());
- LHSV = B.CreateLoad(B.CreateBitCast(LHS, LHSPtrTy), "lhsv");
+ LHSV = B.CreateLoad(IntType, B.CreateBitCast(LHS, LHSPtrTy), "lhsv");
}
if (!RHSV) {
Type *RHSPtrTy =
IntType->getPointerTo(RHS->getType()->getPointerAddressSpace());
- RHSV = B.CreateLoad(B.CreateBitCast(RHS, RHSPtrTy), "rhsv");
+ RHSV = B.CreateLoad(IntType, B.CreateBitCast(RHS, RHSPtrTy), "rhsv");
}
return B.CreateZExt(B.CreateICmpNE(LHSV, RHSV), CI->getType(), "memcmp");
}
@@ -907,10 +913,48 @@ Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) {
Ret = 1;
return ConstantInt::get(CI->getType(), Ret);
}
+ return nullptr;
+}
+
+// Most simplifications for memcmp also apply to bcmp.
+Value *LibCallSimplifier::optimizeMemCmpBCmpCommon(CallInst *CI,
+ IRBuilder<> &B) {
+ Value *LHS = CI->getArgOperand(0), *RHS = CI->getArgOperand(1);
+ Value *Size = CI->getArgOperand(2);
+
+ if (LHS == RHS) // memcmp(s,s,x) -> 0
+ return Constant::getNullValue(CI->getType());
+
+ // Handle constant lengths.
+ if (ConstantInt *LenC = dyn_cast<ConstantInt>(Size))
+ if (Value *Res = optimizeMemCmpConstantSize(CI, LHS, RHS,
+ LenC->getZExtValue(), B, DL))
+ return Res;
+
+ return nullptr;
+}
+
+Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) {
+ if (Value *V = optimizeMemCmpBCmpCommon(CI, B))
+ return V;
+
+ // memcmp(x, y, Len) == 0 -> bcmp(x, y, Len) == 0
+ // `bcmp` can be more efficient than memcmp because it only has to know that
+ // there is a difference, not where it is.
+ if (isOnlyUsedInZeroEqualityComparison(CI) && TLI->has(LibFunc_bcmp)) {
+ Value *LHS = CI->getArgOperand(0);
+ Value *RHS = CI->getArgOperand(1);
+ Value *Size = CI->getArgOperand(2);
+ return emitBCmp(LHS, RHS, Size, B, DL, TLI);
+ }
return nullptr;
}
+Value *LibCallSimplifier::optimizeBCmp(CallInst *CI, IRBuilder<> &B) {
+ return optimizeMemCmpBCmpCommon(CI, B);
+}
+
Value *LibCallSimplifier::optimizeMemCpy(CallInst *CI, IRBuilder<> &B) {
// memcpy(x, y, n) -> llvm.memcpy(align 1 x, align 1 y, n)
B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1,
@@ -1031,7 +1075,8 @@ static Value *valueHasFloatPrecision(Value *Val) {
/// Shrink double -> float functions.
static Value *optimizeDoubleFP(CallInst *CI, IRBuilder<> &B,
bool isBinary, bool isPrecise = false) {
- if (!CI->getType()->isDoubleTy())
+ Function *CalleeFn = CI->getCalledFunction();
+ if (!CI->getType()->isDoubleTy() || !CalleeFn)
return nullptr;
// If not all the uses of the function are converted to float, then bail out.
@@ -1051,15 +1096,16 @@ static Value *optimizeDoubleFP(CallInst *CI, IRBuilder<> &B,
if (!V[0] || (isBinary && !V[1]))
return nullptr;
+ StringRef CalleeNm = CalleeFn->getName();
+ AttributeList CalleeAt = CalleeFn->getAttributes();
+ bool CalleeIn = CalleeFn->isIntrinsic();
+
// If call isn't an intrinsic, check that it isn't within a function with the
// same name as the float version of this call, otherwise the result is an
// infinite loop. For example, from MinGW-w64:
//
// float expf(float val) { return (float) exp((double) val); }
- Function *CalleeFn = CI->getCalledFunction();
- StringRef CalleeNm = CalleeFn->getName();
- AttributeList CalleeAt = CalleeFn->getAttributes();
- if (CalleeFn && !CalleeFn->isIntrinsic()) {
+ if (!CalleeIn) {
const Function *Fn = CI->getFunction();
StringRef FnName = Fn->getName();
if (FnName.back() == 'f' &&
@@ -1074,7 +1120,7 @@ static Value *optimizeDoubleFP(CallInst *CI, IRBuilder<> &B,
// g((double) float) -> (double) gf(float)
Value *R;
- if (CalleeFn->isIntrinsic()) {
+ if (CalleeIn) {
Module *M = CI->getModule();
Intrinsic::ID IID = CalleeFn->getIntrinsicID();
Function *Fn = Intrinsic::getDeclaration(M, IID, B.getFloatTy());
@@ -1132,10 +1178,10 @@ static Value *optimizeTrigReflections(CallInst *Call, LibFunc Func,
IRBuilder<> &B) {
if (!isa<FPMathOperator>(Call))
return nullptr;
-
+
IRBuilder<>::FastMathFlagGuard Guard(B);
B.setFastMathFlags(Call->getFastMathFlags());
-
+
// TODO: Can this be shared to also handle LLVM intrinsics?
Value *X;
switch (Func) {
@@ -1189,7 +1235,8 @@ static Value *getPow(Value *InnerChain[33], unsigned Exp, IRBuilder<> &B) {
}
/// Use exp{,2}(x * y) for pow(exp{,2}(x), y);
-/// exp2(n * x) for pow(2.0 ** n, x); exp10(x) for pow(10.0, x).
+/// exp2(n * x) for pow(2.0 ** n, x); exp10(x) for pow(10.0, x);
+/// exp2(log2(n) * x) for pow(n, x).
Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilder<> &B) {
Value *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1);
AttributeList Attrs = Pow->getCalledFunction()->getAttributes();
@@ -1276,12 +1323,12 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilder<> &B) {
APFloat BaseR = APFloat(1.0);
BaseR.convert(BaseF->getSemantics(), APFloat::rmTowardZero, &Ignored);
BaseR = BaseR / *BaseF;
- bool IsInteger = BaseF->isInteger(),
- IsReciprocal = BaseR.isInteger();
+ bool IsInteger = BaseF->isInteger(), IsReciprocal = BaseR.isInteger();
const APFloat *NF = IsReciprocal ? &BaseR : BaseF;
APSInt NI(64, false);
if ((IsInteger || IsReciprocal) &&
- !NF->convertToInteger(NI, APFloat::rmTowardZero, &Ignored) &&
+ NF->convertToInteger(NI, APFloat::rmTowardZero, &Ignored) ==
+ APFloat::opOK &&
NI > 1 && NI.isPowerOf2()) {
double N = NI.logBase2() * (IsReciprocal ? -1.0 : 1.0);
Value *FMul = B.CreateFMul(Expo, ConstantFP::get(Ty, N), "mul");
@@ -1301,6 +1348,28 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilder<> &B) {
return emitUnaryFloatFnCall(Expo, TLI, LibFunc_exp10, LibFunc_exp10f,
LibFunc_exp10l, B, Attrs);
+ // pow(n, x) -> exp2(log2(n) * x)
+ if (Pow->hasOneUse() && Pow->hasApproxFunc() && Pow->hasNoNaNs() &&
+ Pow->hasNoInfs() && BaseF->isNormal() && !BaseF->isNegative()) {
+ Value *Log = nullptr;
+ if (Ty->isFloatTy())
+ Log = ConstantFP::get(Ty, std::log2(BaseF->convertToFloat()));
+ else if (Ty->isDoubleTy())
+ Log = ConstantFP::get(Ty, std::log2(BaseF->convertToDouble()));
+
+ if (Log) {
+ Value *FMul = B.CreateFMul(Log, Expo, "mul");
+ if (Pow->doesNotAccessMemory()) {
+ return B.CreateCall(Intrinsic::getDeclaration(Mod, Intrinsic::exp2, Ty),
+ FMul, "exp2");
+ } else {
+ if (hasUnaryFloatFn(TLI, Ty, LibFunc_exp2, LibFunc_exp2f,
+ LibFunc_exp2l))
+ return emitUnaryFloatFnCall(FMul, TLI, LibFunc_exp2, LibFunc_exp2f,
+ LibFunc_exp2l, B, Attrs);
+ }
+ }
+ }
return nullptr;
}
@@ -1364,12 +1433,22 @@ Value *LibCallSimplifier::replacePowWithSqrt(CallInst *Pow, IRBuilder<> &B) {
return Sqrt;
}
+static Value *createPowWithIntegerExponent(Value *Base, Value *Expo, Module *M,
+ IRBuilder<> &B) {
+ Value *Args[] = {Base, Expo};
+ Function *F = Intrinsic::getDeclaration(M, Intrinsic::powi, Base->getType());
+ return B.CreateCall(F, Args);
+}
+
Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) {
- Value *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1);
+ Value *Base = Pow->getArgOperand(0);
+ Value *Expo = Pow->getArgOperand(1);
Function *Callee = Pow->getCalledFunction();
StringRef Name = Callee->getName();
Type *Ty = Pow->getType();
+ Module *M = Pow->getModule();
Value *Shrunk = nullptr;
+ bool AllowApprox = Pow->hasApproxFunc();
bool Ignored;
// Bail out if simplifying libcalls to pow() is disabled.
@@ -1382,8 +1461,8 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) {
// Shrink pow() to powf() if the arguments are single precision,
// unless the result is expected to be double precision.
- if (UnsafeFPShrink &&
- Name == TLI->getName(LibFunc_pow) && hasFloatVersion(Name))
+ if (UnsafeFPShrink && Name == TLI->getName(LibFunc_pow) &&
+ hasFloatVersion(Name))
Shrunk = optimizeBinaryDoubleFP(Pow, B, true);
// Evaluate special cases related to the base.
@@ -1403,7 +1482,7 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) {
// pow(x, 0.0) -> 1.0
if (match(Expo, m_SpecificFP(0.0)))
- return ConstantFP::get(Ty, 1.0);
+ return ConstantFP::get(Ty, 1.0);
// pow(x, 1.0) -> x
if (match(Expo, m_FPOne()))
@@ -1418,7 +1497,7 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) {
// pow(x, n) -> x * x * x * ...
const APFloat *ExpoF;
- if (Pow->isFast() && match(Expo, m_APFloat(ExpoF))) {
+ if (AllowApprox && match(Expo, m_APFloat(ExpoF))) {
// We limit to a max of 7 multiplications, thus the maximum exponent is 32.
// If the exponent is an integer+0.5 we generate a call to sqrt and an
// additional fmul.
@@ -1442,9 +1521,8 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) {
if (!Expo2.isInteger())
return nullptr;
- Sqrt =
- getSqrtCall(Base, Pow->getCalledFunction()->getAttributes(),
- Pow->doesNotAccessMemory(), Pow->getModule(), B, TLI);
+ Sqrt = getSqrtCall(Base, Pow->getCalledFunction()->getAttributes(),
+ Pow->doesNotAccessMemory(), M, B, TLI);
}
// We will memoize intermediate products of the Addition Chain.
@@ -1467,6 +1545,29 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilder<> &B) {
return FMul;
}
+
+ APSInt IntExpo(32, /*isUnsigned=*/false);
+ // powf(x, n) -> powi(x, n) if n is a constant signed integer value
+ if (ExpoF->isInteger() &&
+ ExpoF->convertToInteger(IntExpo, APFloat::rmTowardZero, &Ignored) ==
+ APFloat::opOK) {
+ return createPowWithIntegerExponent(
+ Base, ConstantInt::get(B.getInt32Ty(), IntExpo), M, B);
+ }
+ }
+
+ // powf(x, itofp(y)) -> powi(x, y)
+ if (AllowApprox && (isa<SIToFPInst>(Expo) || isa<UIToFPInst>(Expo))) {
+ Value *IntExpo = cast<Instruction>(Expo)->getOperand(0);
+ Value *NewExpo = nullptr;
+ unsigned BitWidth = IntExpo->getType()->getPrimitiveSizeInBits();
+ if (isa<SIToFPInst>(Expo) && BitWidth == 32)
+ NewExpo = IntExpo;
+ else if (BitWidth < 32)
+ NewExpo = isa<SIToFPInst>(Expo) ? B.CreateSExt(IntExpo, B.getInt32Ty())
+ : B.CreateZExt(IntExpo, B.getInt32Ty());
+ if (NewExpo)
+ return createPowWithIntegerExponent(Base, NewExpo, M, B);
}
return Shrunk;
@@ -1504,9 +1605,8 @@ Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilder<> &B) {
One = ConstantExpr::getFPExtend(One, Op->getType());
Module *M = CI->getModule();
- Value *NewCallee =
- M->getOrInsertFunction(TLI->getName(LdExp), Op->getType(),
- Op->getType(), B.getInt32Ty());
+ FunctionCallee NewCallee = M->getOrInsertFunction(
+ TLI->getName(LdExp), Op->getType(), Op->getType(), B.getInt32Ty());
CallInst *CI = B.CreateCall(NewCallee, {One, LdExpArg});
if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
CI->setCallingConv(F->getCallingConv());
@@ -1518,40 +1618,30 @@ Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilder<> &B) {
}
Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilder<> &B) {
- Function *Callee = CI->getCalledFunction();
// If we can shrink the call to a float function rather than a double
// function, do that first.
+ Function *Callee = CI->getCalledFunction();
StringRef Name = Callee->getName();
if ((Name == "fmin" || Name == "fmax") && hasFloatVersion(Name))
if (Value *Ret = optimizeBinaryDoubleFP(CI, B))
return Ret;
+ // The LLVM intrinsics minnum/maxnum correspond to fmin/fmax. Canonicalize to
+ // the intrinsics for improved optimization (for example, vectorization).
+ // No-signed-zeros is implied by the definitions of fmax/fmin themselves.
+ // From the C standard draft WG14/N1256:
+ // "Ideally, fmax would be sensitive to the sign of zero, for example
+ // fmax(-0.0, +0.0) would return +0; however, implementation in software
+ // might be impractical."
IRBuilder<>::FastMathFlagGuard Guard(B);
- FastMathFlags FMF;
- if (CI->isFast()) {
- // If the call is 'fast', then anything we create here will also be 'fast'.
- FMF.setFast();
- } else {
- // At a minimum, no-nans-fp-math must be true.
- if (!CI->hasNoNaNs())
- return nullptr;
- // No-signed-zeros is implied by the definitions of fmax/fmin themselves:
- // "Ideally, fmax would be sensitive to the sign of zero, for example
- // fmax(-0. 0, +0. 0) would return +0; however, implementation in software
- // might be impractical."
- FMF.setNoSignedZeros();
- FMF.setNoNaNs();
- }
+ FastMathFlags FMF = CI->getFastMathFlags();
+ FMF.setNoSignedZeros();
B.setFastMathFlags(FMF);
- // We have a relaxed floating-point environment. We can ignore NaN-handling
- // and transform to a compare and select. We do not have to consider errno or
- // exceptions, because fmin/fmax do not have those.
- Value *Op0 = CI->getArgOperand(0);
- Value *Op1 = CI->getArgOperand(1);
- Value *Cmp = Callee->getName().startswith("fmin") ?
- B.CreateFCmpOLT(Op0, Op1) : B.CreateFCmpOGT(Op0, Op1);
- return B.CreateSelect(Cmp, Op0, Op1);
+ Intrinsic::ID IID = Callee->getName().startswith("fmin") ? Intrinsic::minnum
+ : Intrinsic::maxnum;
+ Function *F = Intrinsic::getDeclaration(CI->getModule(), IID, CI->getType());
+ return B.CreateCall(F, { CI->getArgOperand(0), CI->getArgOperand(1) });
}
Value *LibCallSimplifier::optimizeLog(CallInst *CI, IRBuilder<> &B) {
@@ -1654,13 +1744,13 @@ Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) {
// replace it with the fabs of that factor.
Module *M = Callee->getParent();
Type *ArgType = I->getType();
- Value *Fabs = Intrinsic::getDeclaration(M, Intrinsic::fabs, ArgType);
+ Function *Fabs = Intrinsic::getDeclaration(M, Intrinsic::fabs, ArgType);
Value *FabsCall = B.CreateCall(Fabs, RepeatOp, "fabs");
if (OtherOp) {
// If we found a non-repeated factor, we still need to get its square
// root. We then multiply that by the value that was simplified out
// of the square root calculation.
- Value *Sqrt = Intrinsic::getDeclaration(M, Intrinsic::sqrt, ArgType);
+ Function *Sqrt = Intrinsic::getDeclaration(M, Intrinsic::sqrt, ArgType);
Value *SqrtCall = B.CreateCall(Sqrt, OtherOp, "sqrt");
return B.CreateFMul(FabsCall, SqrtCall);
}
@@ -1728,8 +1818,8 @@ static void insertSinCosCall(IRBuilder<> &B, Function *OrigCallee, Value *Arg,
}
Module *M = OrigCallee->getParent();
- Value *Callee = M->getOrInsertFunction(Name, OrigCallee->getAttributes(),
- ResTy, ArgTy);
+ FunctionCallee Callee =
+ M->getOrInsertFunction(Name, OrigCallee->getAttributes(), ResTy, ArgTy);
if (Instruction *ArgInst = dyn_cast<Instruction>(Arg)) {
// If the argument is an instruction, it must dominate all uses so put our
@@ -1840,8 +1930,8 @@ Value *LibCallSimplifier::optimizeFFS(CallInst *CI, IRBuilder<> &B) {
// ffs(x) -> x != 0 ? (i32)llvm.cttz(x)+1 : 0
Value *Op = CI->getArgOperand(0);
Type *ArgType = Op->getType();
- Value *F = Intrinsic::getDeclaration(CI->getCalledFunction()->getParent(),
- Intrinsic::cttz, ArgType);
+ Function *F = Intrinsic::getDeclaration(CI->getCalledFunction()->getParent(),
+ Intrinsic::cttz, ArgType);
Value *V = B.CreateCall(F, {Op, B.getTrue()}, "cttz");
V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1));
V = B.CreateIntCast(V, B.getInt32Ty(), false);
@@ -1854,8 +1944,8 @@ Value *LibCallSimplifier::optimizeFls(CallInst *CI, IRBuilder<> &B) {
// fls(x) -> (i32)(sizeInBits(x) - llvm.ctlz(x, false))
Value *Op = CI->getArgOperand(0);
Type *ArgType = Op->getType();
- Value *F = Intrinsic::getDeclaration(CI->getCalledFunction()->getParent(),
- Intrinsic::ctlz, ArgType);
+ Function *F = Intrinsic::getDeclaration(CI->getCalledFunction()->getParent(),
+ Intrinsic::ctlz, ArgType);
Value *V = B.CreateCall(F, {Op, B.getFalse()}, "ctlz");
V = B.CreateSub(ConstantInt::get(V->getType(), ArgType->getIntegerBitWidth()),
V);
@@ -2026,13 +2116,27 @@ Value *LibCallSimplifier::optimizePrintF(CallInst *CI, IRBuilder<> &B) {
// arguments.
if (TLI->has(LibFunc_iprintf) && !callHasFloatingPointArgument(CI)) {
Module *M = B.GetInsertBlock()->getParent()->getParent();
- Constant *IPrintFFn =
+ FunctionCallee IPrintFFn =
M->getOrInsertFunction("iprintf", FT, Callee->getAttributes());
CallInst *New = cast<CallInst>(CI->clone());
New->setCalledFunction(IPrintFFn);
B.Insert(New);
return New;
}
+
+ // printf(format, ...) -> __small_printf(format, ...) if no 128-bit floating point
+ // arguments.
+ if (TLI->has(LibFunc_small_printf) && !callHasFP128Argument(CI)) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ auto SmallPrintFFn =
+ M->getOrInsertFunction(TLI->getName(LibFunc_small_printf),
+ FT, Callee->getAttributes());
+ CallInst *New = cast<CallInst>(CI->clone());
+ New->setCalledFunction(SmallPrintFFn);
+ B.Insert(New);
+ return New;
+ }
+
return nullptr;
}
@@ -2077,7 +2181,8 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI, IRBuilder<> &B) {
}
if (FormatStr[1] == 's') {
- // sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1)
+ // sprintf(dest, "%s", str) -> llvm.memcpy(align 1 dest, align 1 str,
+ // strlen(str)+1)
if (!CI->getArgOperand(2)->getType()->isPointerTy())
return nullptr;
@@ -2105,13 +2210,27 @@ Value *LibCallSimplifier::optimizeSPrintF(CallInst *CI, IRBuilder<> &B) {
// point arguments.
if (TLI->has(LibFunc_siprintf) && !callHasFloatingPointArgument(CI)) {
Module *M = B.GetInsertBlock()->getParent()->getParent();
- Constant *SIPrintFFn =
+ FunctionCallee SIPrintFFn =
M->getOrInsertFunction("siprintf", FT, Callee->getAttributes());
CallInst *New = cast<CallInst>(CI->clone());
New->setCalledFunction(SIPrintFFn);
B.Insert(New);
return New;
}
+
+ // sprintf(str, format, ...) -> __small_sprintf(str, format, ...) if no 128-bit
+ // floating point arguments.
+ if (TLI->has(LibFunc_small_sprintf) && !callHasFP128Argument(CI)) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ auto SmallSPrintFFn =
+ M->getOrInsertFunction(TLI->getName(LibFunc_small_sprintf),
+ FT, Callee->getAttributes());
+ CallInst *New = cast<CallInst>(CI->clone());
+ New->setCalledFunction(SmallSPrintFFn);
+ B.Insert(New);
+ return New;
+ }
+
return nullptr;
}
@@ -2140,7 +2259,7 @@ Value *LibCallSimplifier::optimizeSnPrintFString(CallInst *CI, IRBuilder<> &B) {
else if (N < FormatStr.size() + 1)
return nullptr;
- // sprintf(str, size, fmt) -> llvm.memcpy(align 1 str, align 1 fmt,
+ // snprintf(dst, size, fmt) -> llvm.memcpy(align 1 dst, align 1 fmt,
// strlen(fmt)+1)
B.CreateMemCpy(
CI->getArgOperand(0), 1, CI->getArgOperand(2), 1,
@@ -2262,13 +2381,27 @@ Value *LibCallSimplifier::optimizeFPrintF(CallInst *CI, IRBuilder<> &B) {
// floating point arguments.
if (TLI->has(LibFunc_fiprintf) && !callHasFloatingPointArgument(CI)) {
Module *M = B.GetInsertBlock()->getParent()->getParent();
- Constant *FIPrintFFn =
+ FunctionCallee FIPrintFFn =
M->getOrInsertFunction("fiprintf", FT, Callee->getAttributes());
CallInst *New = cast<CallInst>(CI->clone());
New->setCalledFunction(FIPrintFFn);
B.Insert(New);
return New;
}
+
+ // fprintf(stream, format, ...) -> __small_fprintf(stream, format, ...) if no
+ // 128-bit floating point arguments.
+ if (TLI->has(LibFunc_small_fprintf) && !callHasFP128Argument(CI)) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ auto SmallFPrintFFn =
+ M->getOrInsertFunction(TLI->getName(LibFunc_small_fprintf),
+ FT, Callee->getAttributes());
+ CallInst *New = cast<CallInst>(CI->clone());
+ New->setCalledFunction(SmallFPrintFFn);
+ B.Insert(New);
+ return New;
+ }
+
return nullptr;
}
@@ -2288,7 +2421,8 @@ Value *LibCallSimplifier::optimizeFWrite(CallInst *CI, IRBuilder<> &B) {
// If this is writing one byte, turn it into fputc.
// This optimisation is only valid, if the return value is unused.
if (Bytes == 1 && CI->use_empty()) { // fwrite(S,1,1,F) -> fputc(S[0],F)
- Value *Char = B.CreateLoad(castToCStr(CI->getArgOperand(0), B), "char");
+ Value *Char = B.CreateLoad(B.getInt8Ty(),
+ castToCStr(CI->getArgOperand(0), B), "char");
Value *NewCI = emitFPutC(Char, CI->getArgOperand(3), B, TLI);
return NewCI ? ConstantInt::get(CI->getType(), 1) : nullptr;
}
@@ -2307,7 +2441,9 @@ Value *LibCallSimplifier::optimizeFPuts(CallInst *CI, IRBuilder<> &B) {
// Don't rewrite fputs to fwrite when optimising for size because fwrite
// requires more arguments and thus extra MOVs are required.
- if (CI->getFunction()->optForSize())
+ bool OptForSize = CI->getFunction()->hasOptSize() ||
+ llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI);
+ if (OptForSize)
return nullptr;
// Check if has any use
@@ -2320,7 +2456,7 @@ Value *LibCallSimplifier::optimizeFPuts(CallInst *CI, IRBuilder<> &B) {
return nullptr;
}
- // fputs(s,F) --> fwrite(s,1,strlen(s),F)
+ // fputs(s,F) --> fwrite(s,strlen(s),1,F)
uint64_t Len = GetStringLength(CI->getArgOperand(0));
if (!Len)
return nullptr;
@@ -2367,18 +2503,14 @@ Value *LibCallSimplifier::optimizeFRead(CallInst *CI, IRBuilder<> &B) {
}
Value *LibCallSimplifier::optimizePuts(CallInst *CI, IRBuilder<> &B) {
- // Check for a constant string.
- StringRef Str;
- if (!getConstantStringInfo(CI->getArgOperand(0), Str))
+ if (!CI->use_empty())
return nullptr;
- if (Str.empty() && CI->use_empty()) {
- // puts("") -> putchar('\n')
- Value *Res = emitPutChar(B.getInt32('\n'), B, TLI);
- if (CI->use_empty() || !Res)
- return Res;
- return B.CreateIntCast(Res, CI->getType(), true);
- }
+ // Check for a constant string.
+ // puts("") -> putchar('\n')
+ StringRef Str;
+ if (getConstantStringInfo(CI->getArgOperand(0), Str) && Str.empty())
+ return emitPutChar(B.getInt32('\n'), B, TLI);
return nullptr;
}
@@ -2441,6 +2573,8 @@ Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI,
return optimizeStrStr(CI, Builder);
case LibFunc_memchr:
return optimizeMemChr(CI, Builder);
+ case LibFunc_bcmp:
+ return optimizeBCmp(CI, Builder);
case LibFunc_memcmp:
return optimizeMemCmp(CI, Builder);
case LibFunc_memcpy:
@@ -2686,9 +2820,10 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
LibCallSimplifier::LibCallSimplifier(
const DataLayout &DL, const TargetLibraryInfo *TLI,
OptimizationRemarkEmitter &ORE,
+ BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
function_ref<void(Instruction *, Value *)> Replacer,
function_ref<void(Instruction *)> Eraser)
- : FortifiedSimplifier(TLI), DL(DL), TLI(TLI), ORE(ORE),
+ : FortifiedSimplifier(TLI), DL(DL), TLI(TLI), ORE(ORE), BFI(BFI), PSI(PSI),
UnsafeFPShrink(false), Replacer(Replacer), Eraser(Eraser) {}
void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) {
@@ -2735,12 +2870,23 @@ void LibCallSimplifier::eraseFromParent(Instruction *I) {
// Fortified Library Call Optimizations
//===----------------------------------------------------------------------===//
-bool FortifiedLibCallSimplifier::isFortifiedCallFoldable(CallInst *CI,
- unsigned ObjSizeOp,
- unsigned SizeOp,
- bool isString) {
- if (CI->getArgOperand(ObjSizeOp) == CI->getArgOperand(SizeOp))
+bool
+FortifiedLibCallSimplifier::isFortifiedCallFoldable(CallInst *CI,
+ unsigned ObjSizeOp,
+ Optional<unsigned> SizeOp,
+ Optional<unsigned> StrOp,
+ Optional<unsigned> FlagOp) {
+ // If this function takes a flag argument, the implementation may use it to
+ // perform extra checks. Don't fold into the non-checking variant.
+ if (FlagOp) {
+ ConstantInt *Flag = dyn_cast<ConstantInt>(CI->getArgOperand(*FlagOp));
+ if (!Flag || !Flag->isZero())
+ return false;
+ }
+
+ if (SizeOp && CI->getArgOperand(ObjSizeOp) == CI->getArgOperand(*SizeOp))
return true;
+
if (ConstantInt *ObjSizeCI =
dyn_cast<ConstantInt>(CI->getArgOperand(ObjSizeOp))) {
if (ObjSizeCI->isMinusOne())
@@ -2748,23 +2894,27 @@ bool FortifiedLibCallSimplifier::isFortifiedCallFoldable(CallInst *CI,
// If the object size wasn't -1 (unknown), bail out if we were asked to.
if (OnlyLowerUnknownSize)
return false;
- if (isString) {
- uint64_t Len = GetStringLength(CI->getArgOperand(SizeOp));
+ if (StrOp) {
+ uint64_t Len = GetStringLength(CI->getArgOperand(*StrOp));
// If the length is 0 we don't know how long it is and so we can't
// remove the check.
if (Len == 0)
return false;
return ObjSizeCI->getZExtValue() >= Len;
}
- if (ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getArgOperand(SizeOp)))
- return ObjSizeCI->getZExtValue() >= SizeCI->getZExtValue();
+
+ if (SizeOp) {
+ if (ConstantInt *SizeCI =
+ dyn_cast<ConstantInt>(CI->getArgOperand(*SizeOp)))
+ return ObjSizeCI->getZExtValue() >= SizeCI->getZExtValue();
+ }
}
return false;
}
Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI,
IRBuilder<> &B) {
- if (isFortifiedCallFoldable(CI, 3, 2, false)) {
+ if (isFortifiedCallFoldable(CI, 3, 2)) {
B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1,
CI->getArgOperand(2));
return CI->getArgOperand(0);
@@ -2774,7 +2924,7 @@ Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI,
Value *FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst *CI,
IRBuilder<> &B) {
- if (isFortifiedCallFoldable(CI, 3, 2, false)) {
+ if (isFortifiedCallFoldable(CI, 3, 2)) {
B.CreateMemMove(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1,
CI->getArgOperand(2));
return CI->getArgOperand(0);
@@ -2786,7 +2936,7 @@ Value *FortifiedLibCallSimplifier::optimizeMemSetChk(CallInst *CI,
IRBuilder<> &B) {
// TODO: Try foldMallocMemset() here.
- if (isFortifiedCallFoldable(CI, 3, 2, false)) {
+ if (isFortifiedCallFoldable(CI, 3, 2)) {
Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false);
B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1);
return CI->getArgOperand(0);
@@ -2797,8 +2947,6 @@ Value *FortifiedLibCallSimplifier::optimizeMemSetChk(CallInst *CI,
Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI,
IRBuilder<> &B,
LibFunc Func) {
- Function *Callee = CI->getCalledFunction();
- StringRef Name = Callee->getName();
const DataLayout &DL = CI->getModule()->getDataLayout();
Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1),
*ObjSize = CI->getArgOperand(2);
@@ -2814,8 +2962,12 @@ Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI,
// st[rp]cpy_chk call which may fail at runtime if the size is too long.
// TODO: It might be nice to get a maximum length out of the possible
// string lengths for varying.
- if (isFortifiedCallFoldable(CI, 2, 1, true))
- return emitStrCpy(Dst, Src, B, TLI, Name.substr(2, 6));
+ if (isFortifiedCallFoldable(CI, 2, None, 1)) {
+ if (Func == LibFunc_strcpy_chk)
+ return emitStrCpy(Dst, Src, B, TLI);
+ else
+ return emitStpCpy(Dst, Src, B, TLI);
+ }
if (OnlyLowerUnknownSize)
return nullptr;
@@ -2838,13 +2990,99 @@ Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI,
Value *FortifiedLibCallSimplifier::optimizeStrpNCpyChk(CallInst *CI,
IRBuilder<> &B,
LibFunc Func) {
- Function *Callee = CI->getCalledFunction();
- StringRef Name = Callee->getName();
- if (isFortifiedCallFoldable(CI, 3, 2, false)) {
- Value *Ret = emitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
- CI->getArgOperand(2), B, TLI, Name.substr(2, 7));
- return Ret;
+ if (isFortifiedCallFoldable(CI, 3, 2)) {
+ if (Func == LibFunc_strncpy_chk)
+ return emitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), B, TLI);
+ else
+ return emitStpNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), B, TLI);
}
+
+ return nullptr;
+}
+
+Value *FortifiedLibCallSimplifier::optimizeMemCCpyChk(CallInst *CI,
+ IRBuilder<> &B) {
+ if (isFortifiedCallFoldable(CI, 4, 3))
+ return emitMemCCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), CI->getArgOperand(3), B, TLI);
+
+ return nullptr;
+}
+
+Value *FortifiedLibCallSimplifier::optimizeSNPrintfChk(CallInst *CI,
+ IRBuilder<> &B) {
+ if (isFortifiedCallFoldable(CI, 3, 1, None, 2)) {
+ SmallVector<Value *, 8> VariadicArgs(CI->arg_begin() + 5, CI->arg_end());
+ return emitSNPrintf(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(4), VariadicArgs, B, TLI);
+ }
+
+ return nullptr;
+}
+
+Value *FortifiedLibCallSimplifier::optimizeSPrintfChk(CallInst *CI,
+ IRBuilder<> &B) {
+ if (isFortifiedCallFoldable(CI, 2, None, None, 1)) {
+ SmallVector<Value *, 8> VariadicArgs(CI->arg_begin() + 4, CI->arg_end());
+ return emitSPrintf(CI->getArgOperand(0), CI->getArgOperand(3), VariadicArgs,
+ B, TLI);
+ }
+
+ return nullptr;
+}
+
+Value *FortifiedLibCallSimplifier::optimizeStrCatChk(CallInst *CI,
+ IRBuilder<> &B) {
+ if (isFortifiedCallFoldable(CI, 2))
+ return emitStrCat(CI->getArgOperand(0), CI->getArgOperand(1), B, TLI);
+
+ return nullptr;
+}
+
+Value *FortifiedLibCallSimplifier::optimizeStrLCat(CallInst *CI,
+ IRBuilder<> &B) {
+ if (isFortifiedCallFoldable(CI, 3))
+ return emitStrLCat(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), B, TLI);
+
+ return nullptr;
+}
+
+Value *FortifiedLibCallSimplifier::optimizeStrNCatChk(CallInst *CI,
+ IRBuilder<> &B) {
+ if (isFortifiedCallFoldable(CI, 3))
+ return emitStrNCat(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), B, TLI);
+
+ return nullptr;
+}
+
+Value *FortifiedLibCallSimplifier::optimizeStrLCpyChk(CallInst *CI,
+ IRBuilder<> &B) {
+ if (isFortifiedCallFoldable(CI, 3))
+ return emitStrLCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), B, TLI);
+
+ return nullptr;
+}
+
+Value *FortifiedLibCallSimplifier::optimizeVSNPrintfChk(CallInst *CI,
+ IRBuilder<> &B) {
+ if (isFortifiedCallFoldable(CI, 3, 1, None, 2))
+ return emitVSNPrintf(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(4), CI->getArgOperand(5), B, TLI);
+
+ return nullptr;
+}
+
+Value *FortifiedLibCallSimplifier::optimizeVSPrintfChk(CallInst *CI,
+ IRBuilder<> &B) {
+ if (isFortifiedCallFoldable(CI, 2, None, None, 1))
+ return emitVSPrintf(CI->getArgOperand(0), CI->getArgOperand(3),
+ CI->getArgOperand(4), B, TLI);
+
return nullptr;
}
@@ -2892,6 +3130,24 @@ Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI) {
case LibFunc_stpncpy_chk:
case LibFunc_strncpy_chk:
return optimizeStrpNCpyChk(CI, Builder, Func);
+ case LibFunc_memccpy_chk:
+ return optimizeMemCCpyChk(CI, Builder);
+ case LibFunc_snprintf_chk:
+ return optimizeSNPrintfChk(CI, Builder);
+ case LibFunc_sprintf_chk:
+ return optimizeSPrintfChk(CI, Builder);
+ case LibFunc_strcat_chk:
+ return optimizeStrCatChk(CI, Builder);
+ case LibFunc_strlcat_chk:
+ return optimizeStrLCat(CI, Builder);
+ case LibFunc_strncat_chk:
+ return optimizeStrNCatChk(CI, Builder);
+ case LibFunc_strlcpy_chk:
+ return optimizeStrLCpyChk(CI, Builder);
+ case LibFunc_vsnprintf_chk:
+ return optimizeVSNPrintfChk(CI, Builder);
+ case LibFunc_vsprintf_chk:
+ return optimizeVSPrintfChk(CI, Builder);
default:
break;
}
diff --git a/lib/Transforms/Utils/SizeOpts.cpp b/lib/Transforms/Utils/SizeOpts.cpp
new file mode 100644
index 000000000000..1519751197d2
--- /dev/null
+++ b/lib/Transforms/Utils/SizeOpts.cpp
@@ -0,0 +1,37 @@
+//===-- SizeOpts.cpp - code size optimization related code ----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains some shared code size optimization related code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/Utils/SizeOpts.h"
+using namespace llvm;
+
+static cl::opt<bool> ProfileGuidedSizeOpt(
+ "pgso", cl::Hidden, cl::init(true),
+ cl::desc("Enable the profile guided size optimization. "));
+
+bool llvm::shouldOptimizeForSize(Function *F, ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI) {
+ assert(F);
+ if (!PSI || !BFI || !PSI->hasProfileSummary())
+ return false;
+ return ProfileGuidedSizeOpt && PSI->isFunctionColdInCallGraph(F, *BFI);
+}
+
+bool llvm::shouldOptimizeForSize(BasicBlock *BB, ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI) {
+ assert(BB);
+ if (!PSI || !BFI || !PSI->hasProfileSummary())
+ return false;
+ return ProfileGuidedSizeOpt && PSI->isColdBlock(BB, BFI);
+}
diff --git a/lib/Transforms/Utils/SplitModule.cpp b/lib/Transforms/Utils/SplitModule.cpp
index 5db4d2e4df9d..e2c387cb8983 100644
--- a/lib/Transforms/Utils/SplitModule.cpp
+++ b/lib/Transforms/Utils/SplitModule.cpp
@@ -1,9 +1,8 @@
//===- SplitModule.cpp - Split a module into partitions -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/Utils/StripGCRelocates.cpp b/lib/Transforms/Utils/StripGCRelocates.cpp
index ac0b519f4a77..50844cf9d1c5 100644
--- a/lib/Transforms/Utils/StripGCRelocates.cpp
+++ b/lib/Transforms/Utils/StripGCRelocates.cpp
@@ -1,9 +1,8 @@
//===- StripGCRelocates.cpp - Remove gc.relocates inserted by RewriteStatePoints===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp b/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp
index 8956a089a99c..97a4533fabe5 100644
--- a/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp
+++ b/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp
@@ -1,9 +1,8 @@
//===- StripNonLineTableDebugInfo.cpp -- Strip parts of Debug Info --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Transforms/Utils/SymbolRewriter.cpp b/lib/Transforms/Utils/SymbolRewriter.cpp
index fd0da79487f1..456724779b43 100644
--- a/lib/Transforms/Utils/SymbolRewriter.cpp
+++ b/lib/Transforms/Utils/SymbolRewriter.cpp
@@ -1,9 +1,8 @@
//===- SymbolRewriter.cpp - Symbol Rewriter -------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
index d49b26472548..7f7bdf8a3d6d 100644
--- a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
+++ b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
@@ -1,9 +1,8 @@
//===- UnifyFunctionExitNodes.cpp - Make all functions have a single exit -===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/Transforms/Utils/Utils.cpp b/lib/Transforms/Utils/Utils.cpp
index 95416de07439..5272ab6e95d5 100644
--- a/lib/Transforms/Utils/Utils.cpp
+++ b/lib/Transforms/Utils/Utils.cpp
@@ -1,9 +1,8 @@
//===-- Utils.cpp - TransformUtils Infrastructure -------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -55,3 +54,6 @@ void LLVMAddPromoteMemoryToRegisterPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createPromoteMemoryToRegisterPass());
}
+void LLVMAddAddDiscriminatorsPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createAddDiscriminatorsPass());
+}
diff --git a/lib/Transforms/Utils/VNCoercion.cpp b/lib/Transforms/Utils/VNCoercion.cpp
index 948d9bd5baad..a77bf50fe10b 100644
--- a/lib/Transforms/Utils/VNCoercion.cpp
+++ b/lib/Transforms/Utils/VNCoercion.cpp
@@ -14,13 +14,17 @@ namespace VNCoercion {
/// Return true if coerceAvailableValueToLoadType will succeed.
bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy,
const DataLayout &DL) {
+ Type *StoredTy = StoredVal->getType();
+ if (StoredTy == LoadTy)
+ return true;
+
// If the loaded or stored value is an first class array or struct, don't try
// to transform them. We need to be able to bitcast to integer.
- if (LoadTy->isStructTy() || LoadTy->isArrayTy() ||
- StoredVal->getType()->isStructTy() || StoredVal->getType()->isArrayTy())
+ if (LoadTy->isStructTy() || LoadTy->isArrayTy() || StoredTy->isStructTy() ||
+ StoredTy->isArrayTy())
return false;
- uint64_t StoreSize = DL.getTypeSizeInBits(StoredVal->getType());
+ uint64_t StoreSize = DL.getTypeSizeInBits(StoredTy);
// The store size must be byte-aligned to support future type casts.
if (llvm::alignTo(StoreSize, 8) != StoreSize)
@@ -31,10 +35,16 @@ bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy,
return false;
// Don't coerce non-integral pointers to integers or vice versa.
- if (DL.isNonIntegralPointerType(StoredVal->getType()) !=
- DL.isNonIntegralPointerType(LoadTy))
+ if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()) !=
+ DL.isNonIntegralPointerType(LoadTy->getScalarType())) {
+ // As a special case, allow coercion of memset used to initialize
+ // an array w/null. Despite non-integral pointers not generally having a
+ // specific bit pattern, we do assume null is zero.
+ if (auto *CI = dyn_cast<Constant>(StoredVal))
+ return CI->isNullValue();
return false;
-
+ }
+
return true;
}
@@ -207,11 +217,22 @@ static int analyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr,
/// memdep query of a load that ends up being a clobbering store.
int analyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr,
StoreInst *DepSI, const DataLayout &DL) {
+ auto *StoredVal = DepSI->getValueOperand();
+
// Cannot handle reading from store of first-class aggregate yet.
- if (DepSI->getValueOperand()->getType()->isStructTy() ||
- DepSI->getValueOperand()->getType()->isArrayTy())
+ if (StoredVal->getType()->isStructTy() ||
+ StoredVal->getType()->isArrayTy())
return -1;
+ // Don't coerce non-integral pointers to integers or vice versa.
+ if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()) !=
+ DL.isNonIntegralPointerType(LoadTy->getScalarType())) {
+ // Allow casts of zero values to null as a special case
+ auto *CI = dyn_cast<Constant>(StoredVal);
+ if (!CI || !CI->isNullValue())
+ return -1;
+ }
+
Value *StorePtr = DepSI->getPointerOperand();
uint64_t StoreSize =
DL.getTypeSizeInBits(DepSI->getValueOperand()->getType());
@@ -228,6 +249,11 @@ int analyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr, LoadInst *DepLI,
if (DepLI->getType()->isStructTy() || DepLI->getType()->isArrayTy())
return -1;
+ // Don't coerce non-integral pointers to integers or vice versa.
+ if (DL.isNonIntegralPointerType(DepLI->getType()->getScalarType()) !=
+ DL.isNonIntegralPointerType(LoadTy->getScalarType()))
+ return -1;
+
Value *DepPtr = DepLI->getPointerOperand();
uint64_t DepSize = DL.getTypeSizeInBits(DepLI->getType());
int R = analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, DepSize, DL);
@@ -264,9 +290,15 @@ int analyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
// If this is memset, we just need to see if the offset is valid in the size
// of the memset..
- if (MI->getIntrinsicID() == Intrinsic::memset)
+ if (MI->getIntrinsicID() == Intrinsic::memset) {
+ if (DL.isNonIntegralPointerType(LoadTy->getScalarType())) {
+ auto *CI = dyn_cast<ConstantInt>(cast<MemSetInst>(MI)->getValue());
+ if (!CI || !CI->isZero())
+ return -1;
+ }
return analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(),
MemSizeInBits, DL);
+ }
// If we have a memcpy/memmove, the only case we can handle is if this is a
// copy from constant memory. In that case, we can read directly from the
@@ -278,7 +310,7 @@ int analyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
return -1;
GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(Src, DL));
- if (!GV || !GV->isConstant())
+ if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer())
return -1;
// See if the access is within the bounds of the transfer.
@@ -287,6 +319,12 @@ int analyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
if (Offset == -1)
return Offset;
+ // Don't coerce non-integral pointers to integers or vice versa, and the
+ // memtransfer is implicitly a raw byte code
+ if (DL.isNonIntegralPointerType(LoadTy->getScalarType()))
+ // TODO: Can allow nullptrs from constant zeros
+ return -1;
+
unsigned AS = Src->getType()->getPointerAddressSpace();
// Otherwise, see if we can constant fold a load from the constant with the
// offset applied as appropriate.
@@ -386,12 +424,12 @@ Value *getLoadValueForLoad(LoadInst *SrcVal, unsigned Offset, Type *LoadTy,
// memdep queries will find the new load. We can't easily remove the old
// load completely because it is already in the value numbering table.
IRBuilder<> Builder(SrcVal->getParent(), ++BasicBlock::iterator(SrcVal));
- Type *DestPTy = IntegerType::get(LoadTy->getContext(), NewLoadSize * 8);
- DestPTy =
- PointerType::get(DestPTy, PtrVal->getType()->getPointerAddressSpace());
+ Type *DestTy = IntegerType::get(LoadTy->getContext(), NewLoadSize * 8);
+ Type *DestPTy =
+ PointerType::get(DestTy, PtrVal->getType()->getPointerAddressSpace());
Builder.SetCurrentDebugLocation(SrcVal->getDebugLoc());
PtrVal = Builder.CreateBitCast(PtrVal, DestPTy);
- LoadInst *NewLoad = Builder.CreateLoad(PtrVal);
+ LoadInst *NewLoad = Builder.CreateLoad(DestTy, PtrVal);
NewLoad->takeName(SrcVal);
NewLoad->setAlignment(SrcVal->getAlignment());
diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp
index 55fff3f3872a..fbc3407c301f 100644
--- a/lib/Transforms/Utils/ValueMapper.cpp
+++ b/lib/Transforms/Utils/ValueMapper.cpp
@@ -1,9 +1,8 @@
//===- ValueMapper.cpp - Interface shared by lib/Transforms/Utils ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -914,6 +913,21 @@ void Mapper::remapInstruction(Instruction *I) {
Tys.push_back(TypeMapper->remapType(Ty));
CS.mutateFunctionType(FunctionType::get(
TypeMapper->remapType(I->getType()), Tys, FTy->isVarArg()));
+
+ LLVMContext &C = CS->getContext();
+ AttributeList Attrs = CS.getAttributes();
+ for (unsigned i = 0; i < Attrs.getNumAttrSets(); ++i) {
+ if (Attrs.hasAttribute(i, Attribute::ByVal)) {
+ Type *Ty = Attrs.getAttribute(i, Attribute::ByVal).getValueAsType();
+ if (!Ty)
+ continue;
+
+ Attrs = Attrs.removeAttribute(C, i, Attribute::ByVal);
+ Attrs = Attrs.addAttribute(
+ C, i, Attribute::getWithByValType(C, TypeMapper->remapType(Ty)));
+ }
+ }
+ CS.setAttributes(Attrs);
return;
}
if (auto *AI = dyn_cast<AllocaInst>(I))
diff --git a/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
index 9ff18328c219..4273080ddd91 100644
--- a/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
+++ b/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
@@ -1,9 +1,8 @@
//===- LoadStoreVectorizer.cpp - GPU Load & Store Vectorizer --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -927,7 +926,7 @@ bool Vectorizer::vectorizeStoreChain(
StoreInst *S0 = cast<StoreInst>(Chain[0]);
// If the vector has an int element, default to int for the whole store.
- Type *StoreTy;
+ Type *StoreTy = nullptr;
for (Instruction *I : Chain) {
StoreTy = cast<StoreInst>(I)->getValueOperand()->getType();
if (StoreTy->isIntOrIntVectorTy())
@@ -939,6 +938,7 @@ bool Vectorizer::vectorizeStoreChain(
break;
}
}
+ assert(StoreTy && "Failed to find store type");
unsigned Sz = DL.getTypeSizeInBits(StoreTy);
unsigned AS = S0->getPointerAddressSpace();
@@ -1152,13 +1152,8 @@ bool Vectorizer::vectorizeLoadChain(
vectorizeLoadChain(Chains.second, InstructionsProcessed);
}
- unsigned NewAlign = getOrEnforceKnownAlignment(L0->getPointerOperand(),
- StackAdjustedAlignment,
- DL, L0, nullptr, &DT);
- if (NewAlign != 0)
- Alignment = NewAlign;
-
- Alignment = NewAlign;
+ Alignment = getOrEnforceKnownAlignment(
+ L0->getPointerOperand(), StackAdjustedAlignment, DL, L0, nullptr, &DT);
}
if (!TTI.isLegalToVectorizeLoadChain(SzInBytes, Alignment, AS)) {
@@ -1182,7 +1177,7 @@ bool Vectorizer::vectorizeLoadChain(
Value *Bitcast =
Builder.CreateBitCast(L0->getPointerOperand(), VecTy->getPointerTo(AS));
- LoadInst *LI = Builder.CreateAlignedLoad(Bitcast, Alignment);
+ LoadInst *LI = Builder.CreateAlignedLoad(VecTy, Bitcast, Alignment);
propagateMetadata(LI, Chain);
if (VecLoadTy) {
diff --git a/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index b44fe5a52a2f..6ef8dc2d3cd7 100644
--- a/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -1,9 +1,8 @@
//===- LoopVectorizationLegality.cpp --------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -23,6 +22,8 @@ using namespace llvm;
#define LV_NAME "loop-vectorize"
#define DEBUG_TYPE LV_NAME
+extern cl::opt<bool> EnableVPlanPredication;
+
static cl::opt<bool>
EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
cl::desc("Enable if-conversion during vectorization."));
@@ -46,6 +47,18 @@ static const unsigned MaxInterleaveFactor = 16;
namespace llvm {
+#ifndef NDEBUG
+static void debugVectorizationFailure(const StringRef DebugMsg,
+ Instruction *I) {
+ dbgs() << "LV: Not vectorizing: " << DebugMsg;
+ if (I != nullptr)
+ dbgs() << " " << *I;
+ else
+ dbgs() << '.';
+ dbgs() << '\n';
+}
+#endif
+
OptimizationRemarkAnalysis createLVMissedAnalysis(const char *PassName,
StringRef RemarkName,
Loop *TheLoop,
@@ -103,6 +116,25 @@ LoopVectorizeHints::LoopVectorizeHints(const Loop *L,
<< "LV: Interleaving disabled by the pass manager\n");
}
+void LoopVectorizeHints::setAlreadyVectorized() {
+ LLVMContext &Context = TheLoop->getHeader()->getContext();
+
+ MDNode *IsVectorizedMD = MDNode::get(
+ Context,
+ {MDString::get(Context, "llvm.loop.isvectorized"),
+ ConstantAsMetadata::get(ConstantInt::get(Context, APInt(32, 1)))});
+ MDNode *LoopID = TheLoop->getLoopID();
+ MDNode *NewLoopID =
+ makePostTransformationMetadata(Context, LoopID,
+ {Twine(Prefix(), "vectorize.").str(),
+ Twine(Prefix(), "interleave.").str()},
+ {IsVectorizedMD});
+ TheLoop->setLoopID(NewLoopID);
+
+ // Update internal cache.
+ IsVectorized.Value = 1;
+}
+
bool LoopVectorizeHints::allowVectorization(
Function *F, Loop *L, bool VectorizeOnlyWhenForced) const {
if (getForce() == LoopVectorizeHints::FK_Disabled) {
@@ -230,57 +262,6 @@ void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) {
}
}
-MDNode *LoopVectorizeHints::createHintMetadata(StringRef Name,
- unsigned V) const {
- LLVMContext &Context = TheLoop->getHeader()->getContext();
- Metadata *MDs[] = {
- MDString::get(Context, Name),
- ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(Context), V))};
- return MDNode::get(Context, MDs);
-}
-
-bool LoopVectorizeHints::matchesHintMetadataName(MDNode *Node,
- ArrayRef<Hint> HintTypes) {
- MDString *Name = dyn_cast<MDString>(Node->getOperand(0));
- if (!Name)
- return false;
-
- for (auto H : HintTypes)
- if (Name->getString().endswith(H.Name))
- return true;
- return false;
-}
-
-void LoopVectorizeHints::writeHintsToMetadata(ArrayRef<Hint> HintTypes) {
- if (HintTypes.empty())
- return;
-
- // Reserve the first element to LoopID (see below).
- SmallVector<Metadata *, 4> MDs(1);
- // If the loop already has metadata, then ignore the existing operands.
- MDNode *LoopID = TheLoop->getLoopID();
- if (LoopID) {
- for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
- MDNode *Node = cast<MDNode>(LoopID->getOperand(i));
- // If node in update list, ignore old value.
- if (!matchesHintMetadataName(Node, HintTypes))
- MDs.push_back(Node);
- }
- }
-
- // Now, add the missing hints.
- for (auto H : HintTypes)
- MDs.push_back(createHintMetadata(Twine(Prefix(), H.Name).str(), H.Value));
-
- // Replace current metadata node with new one.
- LLVMContext &Context = TheLoop->getHeader()->getContext();
- MDNode *NewLoopID = MDNode::get(Context, MDs);
- // Set operand 0 to refer to the loop id itself.
- NewLoopID->replaceOperandWith(0, NewLoopID);
-
- TheLoop->setLoopID(NewLoopID);
-}
-
bool LoopVectorizationRequirements::doesNotMeet(
Function *F, Loop *L, const LoopVectorizeHints &Hints) {
const char *PassName = Hints.vectorizeAnalysisPassName();
@@ -464,6 +445,14 @@ bool LoopVectorizationLegality::isUniform(Value *V) {
return LAI->isUniform(V);
}
+void LoopVectorizationLegality::reportVectorizationFailure(
+ const StringRef DebugMsg, const StringRef OREMsg,
+ const StringRef ORETag, Instruction *I) const {
+ LLVM_DEBUG(debugVectorizationFailure(DebugMsg, I));
+ ORE->emit(createLVMissedAnalysis(Hints->vectorizeAnalysisPassName(),
+ ORETag, TheLoop, I) << OREMsg);
+}
+
bool LoopVectorizationLegality::canVectorizeOuterLoop() {
assert(!TheLoop->empty() && "We are not vectorizing an outer loop.");
// Store the result and return it at the end instead of exiting early, in case
@@ -476,9 +465,9 @@ bool LoopVectorizationLegality::canVectorizeOuterLoop() {
// not supported yet.
auto *Br = dyn_cast<BranchInst>(BB->getTerminator());
if (!Br) {
- LLVM_DEBUG(dbgs() << "LV: Unsupported basic block terminator.\n");
- ORE->emit(createMissedAnalysis("CFGNotUnderstood")
- << "loop control flow is not understood by vectorizer");
+ reportVectorizationFailure("Unsupported basic block terminator",
+ "loop control flow is not understood by vectorizer",
+ "CFGNotUnderstood");
if (DoExtraAnalysis)
Result = false;
else
@@ -488,13 +477,16 @@ bool LoopVectorizationLegality::canVectorizeOuterLoop() {
// Check whether the BranchInst is a supported one. Only unconditional
// branches, conditional branches with an outer loop invariant condition or
// backedges are supported.
- if (Br && Br->isConditional() &&
+ // FIXME: We skip these checks when VPlan predication is enabled as we
+ // want to allow divergent branches. This whole check will be removed
+ // once VPlan predication is on by default.
+ if (!EnableVPlanPredication && Br && Br->isConditional() &&
!TheLoop->isLoopInvariant(Br->getCondition()) &&
!LI->isLoopHeader(Br->getSuccessor(0)) &&
!LI->isLoopHeader(Br->getSuccessor(1))) {
- LLVM_DEBUG(dbgs() << "LV: Unsupported conditional branch.\n");
- ORE->emit(createMissedAnalysis("CFGNotUnderstood")
- << "loop control flow is not understood by vectorizer");
+ reportVectorizationFailure("Unsupported conditional branch",
+ "loop control flow is not understood by vectorizer",
+ "CFGNotUnderstood");
if (DoExtraAnalysis)
Result = false;
else
@@ -506,11 +498,9 @@ bool LoopVectorizationLegality::canVectorizeOuterLoop() {
// simple outer loops scenarios with uniform nested loops.
if (!isUniformLoopNest(TheLoop /*loop nest*/,
TheLoop /*context outer loop*/)) {
- LLVM_DEBUG(
- dbgs()
- << "LV: Not vectorizing: Outer loop contains divergent loops.\n");
- ORE->emit(createMissedAnalysis("CFGNotUnderstood")
- << "loop control flow is not understood by vectorizer");
+ reportVectorizationFailure("Outer loop contains divergent loops",
+ "loop control flow is not understood by vectorizer",
+ "CFGNotUnderstood");
if (DoExtraAnalysis)
Result = false;
else
@@ -519,10 +509,9 @@ bool LoopVectorizationLegality::canVectorizeOuterLoop() {
// Check whether we are able to set up outer loop induction.
if (!setupOuterLoopInductions()) {
- LLVM_DEBUG(
- dbgs() << "LV: Not vectorizing: Unsupported outer loop Phi(s).\n");
- ORE->emit(createMissedAnalysis("UnsupportedPhi")
- << "Unsupported outer loop Phi(s)");
+ reportVectorizationFailure("Unsupported outer loop Phi(s)",
+ "Unsupported outer loop Phi(s)",
+ "UnsupportedPhi");
if (DoExtraAnalysis)
Result = false;
else
@@ -627,9 +616,9 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// Check that this PHI type is allowed.
if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
!PhiTy->isPointerTy()) {
- ORE->emit(createMissedAnalysis("CFGNotUnderstood", Phi)
- << "loop control flow is not understood by vectorizer");
- LLVM_DEBUG(dbgs() << "LV: Found an non-int non-pointer PHI.\n");
+ reportVectorizationFailure("Found a non-int non-pointer PHI",
+ "loop control flow is not understood by vectorizer",
+ "CFGNotUnderstood");
return false;
}
@@ -647,9 +636,9 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// We only allow if-converted PHIs with exactly two incoming values.
if (Phi->getNumIncomingValues() != 2) {
- ORE->emit(createMissedAnalysis("CFGNotUnderstood", Phi)
- << "control flow not understood by vectorizer");
- LLVM_DEBUG(dbgs() << "LV: Found an invalid PHI.\n");
+ reportVectorizationFailure("Found an invalid PHI",
+ "loop control flow is not understood by vectorizer",
+ "CFGNotUnderstood", Phi);
return false;
}
@@ -698,10 +687,10 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
continue;
}
- ORE->emit(createMissedAnalysis("NonReductionValueUsedOutsideLoop", Phi)
- << "value that could not be identified as "
- "reduction is used outside the loop");
- LLVM_DEBUG(dbgs() << "LV: Found an unidentified PHI." << *Phi << "\n");
+ reportVectorizationFailure("Found an unidentified PHI",
+ "value that could not be identified as "
+ "reduction is used outside the loop",
+ "NonReductionValueUsedOutsideLoop", Phi);
return false;
} // end of PHI handling
@@ -728,31 +717,33 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// but it's hard to provide meaningful yet generic advice.
// Also, should this be guarded by allowExtraAnalysis() and/or be part
// of the returned info from isFunctionVectorizable()?
- ORE->emit(createMissedAnalysis("CantVectorizeLibcall", CI)
- << "library call cannot be vectorized. "
- "Try compiling with -fno-math-errno, -ffast-math, "
- "or similar flags");
+ reportVectorizationFailure("Found a non-intrinsic callsite",
+ "library call cannot be vectorized. "
+ "Try compiling with -fno-math-errno, -ffast-math, "
+ "or similar flags",
+ "CantVectorizeLibcall", CI);
} else {
- ORE->emit(createMissedAnalysis("CantVectorizeCall", CI)
- << "call instruction cannot be vectorized");
+ reportVectorizationFailure("Found a non-intrinsic callsite",
+ "call instruction cannot be vectorized",
+ "CantVectorizeLibcall", CI);
}
- LLVM_DEBUG(
- dbgs() << "LV: Found a non-intrinsic callsite.\n");
return false;
}
- // Intrinsics such as powi,cttz and ctlz are legal to vectorize if the
- // second argument is the same (i.e. loop invariant)
- if (CI && hasVectorInstrinsicScalarOpd(
- getVectorIntrinsicIDForCall(CI, TLI), 1)) {
+ // Some intrinsics have scalar arguments and should be same in order for
+ // them to be vectorized (i.e. loop invariant).
+ if (CI) {
auto *SE = PSE.getSE();
- if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(1)), TheLoop)) {
- ORE->emit(createMissedAnalysis("CantVectorizeIntrinsic", CI)
- << "intrinsic instruction cannot be vectorized");
- LLVM_DEBUG(dbgs()
- << "LV: Found unvectorizable intrinsic " << *CI << "\n");
- return false;
- }
+ Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI);
+ for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i)
+ if (hasVectorInstrinsicScalarOpd(IntrinID, i)) {
+ if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(i)), TheLoop)) {
+ reportVectorizationFailure("Found unvectorizable intrinsic",
+ "intrinsic instruction cannot be vectorized",
+ "CantVectorizeIntrinsic", CI);
+ return false;
+ }
+ }
}
// Check that the instruction return type is vectorizable.
@@ -760,9 +751,9 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
if ((!VectorType::isValidElementType(I.getType()) &&
!I.getType()->isVoidTy()) ||
isa<ExtractElementInst>(I)) {
- ORE->emit(createMissedAnalysis("CantVectorizeInstructionReturnType", &I)
- << "instruction return type cannot be vectorized");
- LLVM_DEBUG(dbgs() << "LV: Found unvectorizable type.\n");
+ reportVectorizationFailure("Found unvectorizable type",
+ "instruction return type cannot be vectorized",
+ "CantVectorizeInstructionReturnType", &I);
return false;
}
@@ -770,11 +761,44 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
if (auto *ST = dyn_cast<StoreInst>(&I)) {
Type *T = ST->getValueOperand()->getType();
if (!VectorType::isValidElementType(T)) {
- ORE->emit(createMissedAnalysis("CantVectorizeStore", ST)
- << "store instruction cannot be vectorized");
+ reportVectorizationFailure("Store instruction cannot be vectorized",
+ "store instruction cannot be vectorized",
+ "CantVectorizeStore", ST);
return false;
}
+ // For nontemporal stores, check that a nontemporal vector version is
+ // supported on the target.
+ if (ST->getMetadata(LLVMContext::MD_nontemporal)) {
+ // Arbitrarily try a vector of 2 elements.
+ Type *VecTy = VectorType::get(T, /*NumElements=*/2);
+ assert(VecTy && "did not find vectorized version of stored type");
+ unsigned Alignment = getLoadStoreAlignment(ST);
+ if (!TTI->isLegalNTStore(VecTy, Alignment)) {
+ reportVectorizationFailure(
+ "nontemporal store instruction cannot be vectorized",
+ "nontemporal store instruction cannot be vectorized",
+ "CantVectorizeNontemporalStore", ST);
+ return false;
+ }
+ }
+
+ } else if (auto *LD = dyn_cast<LoadInst>(&I)) {
+ if (LD->getMetadata(LLVMContext::MD_nontemporal)) {
+ // For nontemporal loads, check that a nontemporal vector version is
+ // supported on the target (arbitrarily try a vector of 2 elements).
+ Type *VecTy = VectorType::get(I.getType(), /*NumElements=*/2);
+ assert(VecTy && "did not find vectorized version of load type");
+ unsigned Alignment = getLoadStoreAlignment(LD);
+ if (!TTI->isLegalNTLoad(VecTy, Alignment)) {
+ reportVectorizationFailure(
+ "nontemporal load instruction cannot be vectorized",
+ "nontemporal load instruction cannot be vectorized",
+ "CantVectorizeNontemporalLoad", LD);
+ return false;
+ }
+ }
+
// FP instructions can allow unsafe algebra, thus vectorizable by
// non-IEEE-754 compliant SIMD units.
// This applies to floating-point math operations and calls, not memory
@@ -797,23 +821,27 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
AllowedExit.insert(&I);
continue;
}
- ORE->emit(createMissedAnalysis("ValueUsedOutsideLoop", &I)
- << "value cannot be used outside the loop");
+ reportVectorizationFailure("Value cannot be used outside the loop",
+ "value cannot be used outside the loop",
+ "ValueUsedOutsideLoop", &I);
return false;
}
} // next instr.
}
if (!PrimaryInduction) {
- LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
if (Inductions.empty()) {
- ORE->emit(createMissedAnalysis("NoInductionVariable")
- << "loop induction variable could not be identified");
+ reportVectorizationFailure("Did not find one integer induction var",
+ "loop induction variable could not be identified",
+ "NoInductionVariable");
return false;
} else if (!WidestIndTy) {
- ORE->emit(createMissedAnalysis("NoIntegerInductionVariable")
- << "integer loop induction variable could not be identified");
+ reportVectorizationFailure("Did not find one integer induction var",
+ "integer loop induction variable could not be identified",
+ "NoIntegerInductionVariable");
return false;
+ } else {
+ LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
}
}
@@ -839,11 +867,9 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
return false;
if (LAI->hasDependenceInvolvingLoopInvariantAddress()) {
- ORE->emit(createMissedAnalysis("CantVectorizeStoreToLoopInvariantAddress")
- << "write to a loop invariant address could not "
- "be vectorized");
- LLVM_DEBUG(
- dbgs() << "LV: Non vectorizable stores to a uniform address\n");
+ reportVectorizationFailure("Stores to a uniform address",
+ "write to a loop invariant address could not be vectorized",
+ "CantVectorizeStoreToLoopInvariantAddress");
return false;
}
Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks());
@@ -925,8 +951,9 @@ bool LoopVectorizationLegality::blockCanBePredicated(
bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
if (!EnableIfConversion) {
- ORE->emit(createMissedAnalysis("IfConversionDisabled")
- << "if-conversion is disabled");
+ reportVectorizationFailure("If-conversion is disabled",
+ "if-conversion is disabled",
+ "IfConversionDisabled");
return false;
}
@@ -950,21 +977,26 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
for (BasicBlock *BB : TheLoop->blocks()) {
// We don't support switch statements inside loops.
if (!isa<BranchInst>(BB->getTerminator())) {
- ORE->emit(createMissedAnalysis("LoopContainsSwitch", BB->getTerminator())
- << "loop contains a switch statement");
+ reportVectorizationFailure("Loop contains a switch statement",
+ "loop contains a switch statement",
+ "LoopContainsSwitch", BB->getTerminator());
return false;
}
// We must be able to predicate all blocks that need to be predicated.
if (blockNeedsPredication(BB)) {
if (!blockCanBePredicated(BB, SafePointes)) {
- ORE->emit(createMissedAnalysis("NoCFGForSelect", BB->getTerminator())
- << "control flow cannot be substituted for a select");
+ reportVectorizationFailure(
+ "Control flow cannot be substituted for a select",
+ "control flow cannot be substituted for a select",
+ "NoCFGForSelect", BB->getTerminator());
return false;
}
} else if (BB != Header && !canIfConvertPHINodes(BB)) {
- ORE->emit(createMissedAnalysis("NoCFGForSelect", BB->getTerminator())
- << "control flow cannot be substituted for a select");
+ reportVectorizationFailure(
+ "Control flow cannot be substituted for a select",
+ "control flow cannot be substituted for a select",
+ "NoCFGForSelect", BB->getTerminator());
return false;
}
}
@@ -992,9 +1024,9 @@ bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
// We must have a loop in canonical form. Loops with indirectbr in them cannot
// be canonicalized.
if (!Lp->getLoopPreheader()) {
- LLVM_DEBUG(dbgs() << "LV: Loop doesn't have a legal pre-header.\n");
- ORE->emit(createMissedAnalysis("CFGNotUnderstood")
- << "loop control flow is not understood by vectorizer");
+ reportVectorizationFailure("Loop doesn't have a legal pre-header",
+ "loop control flow is not understood by vectorizer",
+ "CFGNotUnderstood");
if (DoExtraAnalysis)
Result = false;
else
@@ -1003,8 +1035,9 @@ bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
// We must have a single backedge.
if (Lp->getNumBackEdges() != 1) {
- ORE->emit(createMissedAnalysis("CFGNotUnderstood")
- << "loop control flow is not understood by vectorizer");
+ reportVectorizationFailure("The loop must have a single backedge",
+ "loop control flow is not understood by vectorizer",
+ "CFGNotUnderstood");
if (DoExtraAnalysis)
Result = false;
else
@@ -1013,8 +1046,9 @@ bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
// We must have a single exiting block.
if (!Lp->getExitingBlock()) {
- ORE->emit(createMissedAnalysis("CFGNotUnderstood")
- << "loop control flow is not understood by vectorizer");
+ reportVectorizationFailure("The loop must have an exiting block",
+ "loop control flow is not understood by vectorizer",
+ "CFGNotUnderstood");
if (DoExtraAnalysis)
Result = false;
else
@@ -1025,8 +1059,9 @@ bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
// checked at the end of each iteration. With that we can assume that all
// instructions in the loop are executed the same number of times.
if (Lp->getExitingBlock() != Lp->getLoopLatch()) {
- ORE->emit(createMissedAnalysis("CFGNotUnderstood")
- << "loop control flow is not understood by vectorizer");
+ reportVectorizationFailure("The exiting block is not the loop latch",
+ "loop control flow is not understood by vectorizer",
+ "CFGNotUnderstood");
if (DoExtraAnalysis)
Result = false;
else
@@ -1087,7 +1122,9 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
assert(UseVPlanNativePath && "VPlan-native path is not enabled.");
if (!canVectorizeOuterLoop()) {
- LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Unsupported outer loop.\n");
+ reportVectorizationFailure("Unsupported outer loop",
+ "unsupported outer loop",
+ "UnsupportedOuterLoop");
// TODO: Implement DoExtraAnalysis when subsequent legal checks support
// outer loops.
return false;
@@ -1137,10 +1174,9 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
SCEVThreshold = PragmaVectorizeSCEVCheckThreshold;
if (PSE.getUnionPredicate().getComplexity() > SCEVThreshold) {
- ORE->emit(createMissedAnalysis("TooManySCEVRunTimeChecks")
- << "Too many SCEV assumptions need to be made and checked "
- << "at runtime");
- LLVM_DEBUG(dbgs() << "LV: Too many SCEV checks needed.\n");
+ reportVectorizationFailure("Too many SCEV checks needed",
+ "Too many SCEV assumptions need to be made and checked at runtime",
+ "TooManySCEVRunTimeChecks");
if (DoExtraAnalysis)
Result = false;
else
@@ -1159,20 +1195,20 @@ bool LoopVectorizationLegality::canFoldTailByMasking() {
LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");
if (!PrimaryInduction) {
- ORE->emit(createMissedAnalysis("NoPrimaryInduction")
- << "Missing a primary induction variable in the loop, which is "
- << "needed in order to fold tail by masking as required.");
- LLVM_DEBUG(dbgs() << "LV: No primary induction, cannot fold tail by "
- << "masking.\n");
+ reportVectorizationFailure(
+ "No primary induction, cannot fold tail by masking",
+ "Missing a primary induction variable in the loop, which is "
+ "needed in order to fold tail by masking as required.",
+ "NoPrimaryInduction");
return false;
}
// TODO: handle reductions when tail is folded by masking.
if (!Reductions.empty()) {
- ORE->emit(createMissedAnalysis("ReductionFoldingTailByMasking")
- << "Cannot fold tail by masking in the presence of reductions.");
- LLVM_DEBUG(dbgs() << "LV: Loop has reductions, cannot fold tail by "
- << "masking.\n");
+ reportVectorizationFailure(
+ "Loop has reductions, cannot fold tail by masking",
+ "Cannot fold tail by masking in the presence of reductions.",
+ "ReductionFoldingTailByMasking");
return false;
}
@@ -1183,10 +1219,10 @@ bool LoopVectorizationLegality::canFoldTailByMasking() {
Instruction *UI = cast<Instruction>(U);
if (TheLoop->contains(UI))
continue;
- ORE->emit(createMissedAnalysis("LiveOutFoldingTailByMasking")
- << "Cannot fold tail by masking in the presence of live outs.");
- LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking, loop has an "
- << "outside user for : " << *UI << '\n');
+ reportVectorizationFailure(
+ "Cannot fold tail by masking, loop has an outside user for",
+ "Cannot fold tail by masking in the presence of live outs.",
+ "LiveOutFoldingTailByMasking", UI);
return false;
}
}
@@ -1198,9 +1234,10 @@ bool LoopVectorizationLegality::canFoldTailByMasking() {
// do not need predication such as the header block.
for (BasicBlock *BB : TheLoop->blocks()) {
if (!blockCanBePredicated(BB, SafePointers)) {
- ORE->emit(createMissedAnalysis("NoCFGForSelect", BB->getTerminator())
- << "control flow cannot be substituted for a select");
- LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking as required.\n");
+ reportVectorizationFailure(
+ "Cannot fold tail by masking as required",
+ "control flow cannot be substituted for a select",
+ "NoCFGForSelect", BB->getTerminator());
return false;
}
}
diff --git a/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index 2aa219064299..97077cce83e3 100644
--- a/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -1,9 +1,8 @@
//===- LoopVectorizationPlanner.h - Planner for LoopVectorization ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -172,6 +171,13 @@ struct VectorizationFactor {
unsigned Width;
// Cost of the loop with that width
unsigned Cost;
+
+ // Width 1 means no vectorization, cost 0 means uncomputed cost.
+ static VectorizationFactor Disabled() { return {1, 0}; }
+
+ bool operator==(const VectorizationFactor &rhs) const {
+ return Width == rhs.Width && Cost == rhs.Cost;
+ }
};
/// Planner drives the vectorization process after having passed
@@ -192,11 +198,9 @@ class LoopVectorizationPlanner {
/// The legality analysis.
LoopVectorizationLegality *Legal;
- /// The profitablity analysis.
+ /// The profitability analysis.
LoopVectorizationCostModel &CM;
- using VPlanPtr = std::unique_ptr<VPlan>;
-
SmallVector<VPlanPtr, 4> VPlans;
/// This class is used to enable the VPlan to invoke a method of ILV. This is
@@ -222,8 +226,9 @@ public:
LoopVectorizationCostModel &CM)
: OrigLoop(L), LI(LI), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM) {}
- /// Plan how to best vectorize, return the best VF and its cost.
- VectorizationFactor plan(bool OptForSize, unsigned UserVF);
+ /// Plan how to best vectorize, return the best VF and its cost, or None if
+ /// vectorization and interleaving should be avoided up front.
+ Optional<VectorizationFactor> plan(bool OptForSize, unsigned UserVF);
/// Use the VPlan-native path to plan how to best vectorize, return the best
/// VF and its cost.
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index c45dee590b84..46265e3f3e13 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1,9 +1,8 @@
//===- LoopVectorize.cpp - A Loop Vectorizer ------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -57,8 +56,10 @@
#include "llvm/Transforms/Vectorize/LoopVectorize.h"
#include "LoopVectorizationPlanner.h"
#include "VPRecipeBuilder.h"
+#include "VPlan.h"
#include "VPlanHCFGBuilder.h"
#include "VPlanHCFGTransforms.h"
+#include "VPlanPredicator.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
@@ -86,7 +87,9 @@
#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
+#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
@@ -133,6 +136,7 @@
#include "llvm/Transforms/Utils/LoopSimplify.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/LoopVersioning.h"
+#include "llvm/Transforms/Utils/SizeOpts.h"
#include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
#include <algorithm>
#include <cassert>
@@ -256,6 +260,13 @@ cl::opt<bool> EnableVPlanNativePath(
cl::desc("Enable VPlan-native vectorization path with "
"support for outer loop vectorization."));
+// FIXME: Remove this switch once we have divergence analysis. Currently we
+// assume divergent non-backedge branches when this switch is true.
+cl::opt<bool> EnableVPlanPredication(
+ "enable-vplan-predication", cl::init(false), cl::Hidden,
+ cl::desc("Enable VPlan-native vectorization path predicator with "
+ "support for outer loop vectorization."));
+
// This flag enables the stress testing of the VPlan H-CFG construction in the
// VPlan-native vectorization path. It must be used in conjuction with
// -enable-vplan-native-path. -vplan-verify-hcfg can also be used to enable the
@@ -267,6 +278,13 @@ static cl::opt<bool> VPlanBuildStressTest(
"out right after the build (stress test the VPlan H-CFG construction "
"in the VPlan-native vectorization path)."));
+cl::opt<bool> llvm::EnableLoopInterleaving(
+ "interleave-loops", cl::init(true), cl::Hidden,
+ cl::desc("Enable loop interleaving in Loop vectorization passes"));
+cl::opt<bool> llvm::EnableLoopVectorization(
+ "vectorize-loops", cl::init(true), cl::Hidden,
+ cl::desc("Run the Loop vectorization passes"));
+
/// A helper function for converting Scalar types to vector types.
/// If the incoming type is void, we return void. If the VF is 1, we return
/// the scalar type.
@@ -311,11 +329,14 @@ static unsigned getReciprocalPredBlockProb() { return 2; }
/// A helper function that adds a 'fast' flag to floating-point operations.
static Value *addFastMathFlag(Value *V) {
- if (isa<FPMathOperator>(V)) {
- FastMathFlags Flags;
- Flags.setFast();
- cast<Instruction>(V)->setFastMathFlags(Flags);
- }
+ if (isa<FPMathOperator>(V))
+ cast<Instruction>(V)->setFastMathFlags(FastMathFlags::getFast());
+ return V;
+}
+
+static Value *addFastMathFlag(Value *V, FastMathFlags FMF) {
+ if (isa<FPMathOperator>(V))
+ cast<Instruction>(V)->setFastMathFlags(FMF);
return V;
}
@@ -760,7 +781,7 @@ void InnerLoopVectorizer::setDebugLocFromInst(IRBuilder<> &B, const Value *Ptr)
const DILocation *DIL = Inst->getDebugLoc();
if (DIL && Inst->getFunction()->isDebugInfoForProfiling() &&
!isa<DbgInfoIntrinsic>(Inst)) {
- auto NewDIL = DIL->cloneWithDuplicationFactor(UF * VF);
+ auto NewDIL = DIL->cloneByMultiplyingDuplicationFactor(UF * VF);
if (NewDIL)
B.SetCurrentDebugLocation(NewDIL.getValue());
else
@@ -836,7 +857,7 @@ public:
AC(AC), ORE(ORE), TheFunction(F), Hints(Hints), InterleaveInfo(IAI) {}
/// \return An upper bound for the vectorization factor, or None if
- /// vectorization should be avoided up front.
+ /// vectorization and interleaving should be avoided up front.
Optional<unsigned> computeMaxVF(bool OptForSize);
/// \return The most profitable vectorization factor and the cost of that VF.
@@ -1149,6 +1170,18 @@ public:
return foldTailByMasking() || Legal->blockNeedsPredication(BB);
}
+ /// Estimate cost of an intrinsic call instruction CI if it were vectorized
+ /// with factor VF. Return the cost of the instruction, including
+ /// scalarization overhead if it's needed.
+ unsigned getVectorIntrinsicCost(CallInst *CI, unsigned VF);
+
+ /// Estimate cost of a call instruction CI if it were vectorized with factor
+ /// VF. Return the cost of the instruction, including scalarization overhead
+ /// if it's needed. The flag NeedToScalarize shows if the call needs to be
+ /// scalarized -
+ /// i.e. either vector version isn't available, or is too expensive.
+ unsigned getVectorCallCost(CallInst *CI, unsigned VF, bool &NeedToScalarize);
+
private:
unsigned NumPredStores = 0;
@@ -1201,6 +1234,10 @@ private:
/// element)
unsigned getUniformMemOpCost(Instruction *I, unsigned VF);
+ /// Estimate the overhead of scalarizing an instruction. This is a
+ /// convenience wrapper for the type-based getScalarizationOverhead API.
+ unsigned getScalarizationOverhead(Instruction *I, unsigned VF);
+
/// Returns whether the instruction is a load or store and will be a emitted
/// as a vector operation.
bool isConsecutiveLoadOrStore(Instruction *I);
@@ -1295,6 +1332,30 @@ private:
DecisionList WideningDecisions;
+ /// Returns true if \p V is expected to be vectorized and it needs to be
+ /// extracted.
+ bool needsExtract(Value *V, unsigned VF) const {
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (VF == 1 || !I || !TheLoop->contains(I) || TheLoop->isLoopInvariant(I))
+ return false;
+
+ // Assume we can vectorize V (and hence we need extraction) if the
+ // scalars are not computed yet. This can happen, because it is called
+ // via getScalarizationOverhead from setCostBasedWideningDecision, before
+ // the scalars are collected. That should be a safe assumption in most
+ // cases, because we check if the operands have vectorizable types
+ // beforehand in LoopVectorizationLegality.
+ return Scalars.find(VF) == Scalars.end() ||
+ !isScalarAfterVectorization(I, VF);
+ };
+
+ /// Returns a range containing only operands needing to be extracted.
+ SmallVector<Value *, 4> filterExtractingOperands(Instruction::op_range Ops,
+ unsigned VF) {
+ return SmallVector<Value *, 4>(make_filter_range(
+ Ops, [this, VF](Value *V) { return this->needsExtract(V, VF); }));
+ }
+
public:
/// The loop that we evaluate.
Loop *TheLoop;
@@ -1372,12 +1433,6 @@ static bool isExplicitVecOuterLoop(Loop *OuterLp,
return false;
}
- if (!Hints.getWidth()) {
- LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No user vector width.\n");
- Hints.emitRemarkWithHints();
- return false;
- }
-
if (Hints.getInterleave() > 1) {
// TODO: Interleave support is future work.
LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Interleave is not supported for "
@@ -1447,12 +1502,13 @@ struct LoopVectorize : public FunctionPass {
auto *LAA = &getAnalysis<LoopAccessLegacyAnalysis>();
auto *DB = &getAnalysis<DemandedBitsWrapperPass>().getDemandedBits();
auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
+ auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
std::function<const LoopAccessInfo &(Loop &)> GetLAA =
[&](Loop &L) -> const LoopAccessInfo & { return LAA->getInfo(&L); };
return Impl.runImpl(F, *SE, *LI, *TTI, *DT, *BFI, TLI, *DB, *AA, *AC,
- GetLAA, *ORE);
+ GetLAA, *ORE, PSI);
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -1478,6 +1534,7 @@ struct LoopVectorize : public FunctionPass {
AU.addPreserved<BasicAAWrapperPass>();
AU.addPreserved<GlobalsAAWrapperPass>();
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
}
};
@@ -2051,7 +2108,7 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr,
// A[i] = b; // Member of index 0
// A[i+2] = c; // Member of index 2 (Current instruction)
// Current pointer is pointed to A[i+2], adjust it to A[i].
- NewPtr = Builder.CreateGEP(NewPtr, Builder.getInt32(-Index));
+ NewPtr = Builder.CreateGEP(ScalarTy, NewPtr, Builder.getInt32(-Index));
if (InBounds)
cast<GetElementPtrInst>(NewPtr)->setIsInBounds(true);
@@ -2093,8 +2150,8 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr,
GroupMask, UndefVec, "wide.masked.vec");
}
else
- NewLoad = Builder.CreateAlignedLoad(NewPtrs[Part],
- Group->getAlignment(), "wide.vec");
+ NewLoad = Builder.CreateAlignedLoad(VecTy, NewPtrs[Part],
+ Group->getAlignment(), "wide.vec");
Group->addMetadata(NewLoad);
NewLoads.push_back(NewLoad);
}
@@ -2239,16 +2296,16 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
// If the address is consecutive but reversed, then the
// wide store needs to start at the last vector element.
PartPtr = cast<GetElementPtrInst>(
- Builder.CreateGEP(Ptr, Builder.getInt32(-Part * VF)));
+ Builder.CreateGEP(ScalarDataTy, Ptr, Builder.getInt32(-Part * VF)));
PartPtr->setIsInBounds(InBounds);
PartPtr = cast<GetElementPtrInst>(
- Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF)));
+ Builder.CreateGEP(ScalarDataTy, PartPtr, Builder.getInt32(1 - VF)));
PartPtr->setIsInBounds(InBounds);
if (isMaskRequired) // Reverse of a null all-one mask is a null mask.
Mask[Part] = reverseVector(Mask[Part]);
} else {
PartPtr = cast<GetElementPtrInst>(
- Builder.CreateGEP(Ptr, Builder.getInt32(Part * VF)));
+ Builder.CreateGEP(ScalarDataTy, Ptr, Builder.getInt32(Part * VF)));
PartPtr->setIsInBounds(InBounds);
}
@@ -2305,7 +2362,8 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
UndefValue::get(DataTy),
"wide.masked.load");
else
- NewLI = Builder.CreateAlignedLoad(VecPtr, Alignment, "wide.load");
+ NewLI =
+ Builder.CreateAlignedLoad(DataTy, VecPtr, Alignment, "wide.load");
// Add metadata to the load, but setVectorValue to the reverse shuffle.
addMetadata(NewLI, LI);
@@ -2665,7 +2723,7 @@ Value *InnerLoopVectorizer::emitTransformedIndex(
assert(isa<SCEVConstant>(Step) &&
"Expected constant step for pointer induction");
return B.CreateGEP(
- nullptr, StartValue,
+ StartValue->getType()->getPointerElementType(), StartValue,
CreateMul(Index, Exp.expandCodeFor(Step, Index->getType(),
&*B.GetInsertPoint())));
}
@@ -2849,26 +2907,42 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() {
BCResumeVal->addIncoming(EndValue, MiddleBlock);
// Fix the scalar body counter (PHI node).
- unsigned BlockIdx = OrigPhi->getBasicBlockIndex(ScalarPH);
-
// The old induction's phi node in the scalar body needs the truncated
// value.
for (BasicBlock *BB : LoopBypassBlocks)
BCResumeVal->addIncoming(II.getStartValue(), BB);
- OrigPhi->setIncomingValue(BlockIdx, BCResumeVal);
+ OrigPhi->setIncomingValueForBlock(ScalarPH, BCResumeVal);
}
+ // We need the OrigLoop (scalar loop part) latch terminator to help
+ // produce correct debug info for the middle block BB instructions.
+ // The legality check stage guarantees that the loop will have a single
+ // latch.
+ assert(isa<BranchInst>(OrigLoop->getLoopLatch()->getTerminator()) &&
+ "Scalar loop latch terminator isn't a branch");
+ BranchInst *ScalarLatchBr =
+ cast<BranchInst>(OrigLoop->getLoopLatch()->getTerminator());
+
// Add a check in the middle block to see if we have completed
// all of the iterations in the first vector loop.
// If (N - N%VF) == N, then we *don't* need to run the remainder.
// If tail is to be folded, we know we don't need to run the remainder.
Value *CmpN = Builder.getTrue();
- if (!Cost->foldTailByMasking())
+ if (!Cost->foldTailByMasking()) {
CmpN =
CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, Count,
CountRoundDown, "cmp.n", MiddleBlock->getTerminator());
- ReplaceInstWithInst(MiddleBlock->getTerminator(),
- BranchInst::Create(ExitBlock, ScalarPH, CmpN));
+
+ // Here we use the same DebugLoc as the scalar loop latch branch instead
+ // of the corresponding compare because they may have ended up with
+ // different line numbers and we want to avoid awkward line stepping while
+ // debugging. Eg. if the compare has got a line number inside the loop.
+ cast<Instruction>(CmpN)->setDebugLoc(ScalarLatchBr->getDebugLoc());
+ }
+
+ BranchInst *BrInst = BranchInst::Create(ExitBlock, ScalarPH, CmpN);
+ BrInst->setDebugLoc(ScalarLatchBr->getDebugLoc());
+ ReplaceInstWithInst(MiddleBlock->getTerminator(), BrInst);
// Get ready to start creating new instructions into the vectorized body.
Builder.SetInsertPoint(&*VecBody->getFirstInsertionPt());
@@ -3022,45 +3096,9 @@ static void cse(BasicBlock *BB) {
}
}
-/// Estimate the overhead of scalarizing an instruction. This is a
-/// convenience wrapper for the type-based getScalarizationOverhead API.
-static unsigned getScalarizationOverhead(Instruction *I, unsigned VF,
- const TargetTransformInfo &TTI) {
- if (VF == 1)
- return 0;
-
- unsigned Cost = 0;
- Type *RetTy = ToVectorTy(I->getType(), VF);
- if (!RetTy->isVoidTy() &&
- (!isa<LoadInst>(I) ||
- !TTI.supportsEfficientVectorElementLoadStore()))
- Cost += TTI.getScalarizationOverhead(RetTy, true, false);
-
- // Some targets keep addresses scalar.
- if (isa<LoadInst>(I) && !TTI.prefersVectorizedAddressing())
- return Cost;
-
- if (CallInst *CI = dyn_cast<CallInst>(I)) {
- SmallVector<const Value *, 4> Operands(CI->arg_operands());
- Cost += TTI.getOperandsScalarizationOverhead(Operands, VF);
- }
- else if (!isa<StoreInst>(I) ||
- !TTI.supportsEfficientVectorElementLoadStore()) {
- SmallVector<const Value *, 4> Operands(I->operand_values());
- Cost += TTI.getOperandsScalarizationOverhead(Operands, VF);
- }
-
- return Cost;
-}
-
-// Estimate cost of a call instruction CI if it were vectorized with factor VF.
-// Return the cost of the instruction, including scalarization overhead if it's
-// needed. The flag NeedToScalarize shows if the call needs to be scalarized -
-// i.e. either vector version isn't available, or is too expensive.
-static unsigned getVectorCallCost(CallInst *CI, unsigned VF,
- const TargetTransformInfo &TTI,
- const TargetLibraryInfo *TLI,
- bool &NeedToScalarize) {
+unsigned LoopVectorizationCostModel::getVectorCallCost(CallInst *CI,
+ unsigned VF,
+ bool &NeedToScalarize) {
Function *F = CI->getCalledFunction();
StringRef FnName = CI->getCalledFunction()->getName();
Type *ScalarRetTy = CI->getType();
@@ -3083,7 +3121,7 @@ static unsigned getVectorCallCost(CallInst *CI, unsigned VF,
// Compute costs of unpacking argument values for the scalar calls and
// packing the return values to a vector.
- unsigned ScalarizationCost = getScalarizationOverhead(CI, VF, TTI);
+ unsigned ScalarizationCost = getScalarizationOverhead(CI, VF);
unsigned Cost = ScalarCallCost * VF + ScalarizationCost;
@@ -3102,12 +3140,8 @@ static unsigned getVectorCallCost(CallInst *CI, unsigned VF,
return Cost;
}
-// Estimate cost of an intrinsic call instruction CI if it were vectorized with
-// factor VF. Return the cost of the instruction, including scalarization
-// overhead if it's needed.
-static unsigned getVectorIntrinsicCost(CallInst *CI, unsigned VF,
- const TargetTransformInfo &TTI,
- const TargetLibraryInfo *TLI) {
+unsigned LoopVectorizationCostModel::getVectorIntrinsicCost(CallInst *CI,
+ unsigned VF) {
Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
assert(ID && "Expected intrinsic call!");
@@ -3468,7 +3502,7 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi) {
Start->addIncoming(Incoming, BB);
}
- Phi->setIncomingValue(Phi->getBasicBlockIndex(LoopScalarPreHeader), Start);
+ Phi->setIncomingValueForBlock(LoopScalarPreHeader, Start);
Phi->setName("scalar.recur");
// Finally, fix users of the recurrence outside the loop. The users will need
@@ -3596,14 +3630,23 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) {
// Reduce all of the unrolled parts into a single vector.
Value *ReducedPartRdx = VectorLoopValueMap.getVectorValue(LoopExitInst, 0);
unsigned Op = RecurrenceDescriptor::getRecurrenceBinOp(RK);
- setDebugLocFromInst(Builder, ReducedPartRdx);
+
+ // The middle block terminator has already been assigned a DebugLoc here (the
+ // OrigLoop's single latch terminator). We want the whole middle block to
+ // appear to execute on this line because: (a) it is all compiler generated,
+ // (b) these instructions are always executed after evaluating the latch
+ // conditional branch, and (c) other passes may add new predecessors which
+ // terminate on this line. This is the easiest way to ensure we don't
+ // accidentally cause an extra step back into the loop while debugging.
+ setDebugLocFromInst(Builder, LoopMiddleBlock->getTerminator());
for (unsigned Part = 1; Part < UF; ++Part) {
Value *RdxPart = VectorLoopValueMap.getVectorValue(LoopExitInst, Part);
if (Op != Instruction::ICmp && Op != Instruction::FCmp)
// Floating point operations had to be 'fast' to enable the reduction.
ReducedPartRdx = addFastMathFlag(
Builder.CreateBinOp((Instruction::BinaryOps)Op, RdxPart,
- ReducedPartRdx, "bin.rdx"));
+ ReducedPartRdx, "bin.rdx"),
+ RdxDesc.getFastMathFlags());
else
ReducedPartRdx = createMinMaxOp(Builder, MinMaxKind, ReducedPartRdx,
RdxPart);
@@ -3935,9 +3978,11 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I) {
// Create the new GEP. Note that this GEP may be a scalar if VF == 1,
// but it should be a vector, otherwise.
- auto *NewGEP = GEP->isInBounds()
- ? Builder.CreateInBoundsGEP(Ptr, Indices)
- : Builder.CreateGEP(Ptr, Indices);
+ auto *NewGEP =
+ GEP->isInBounds()
+ ? Builder.CreateInBoundsGEP(GEP->getSourceElementType(), Ptr,
+ Indices)
+ : Builder.CreateGEP(GEP->getSourceElementType(), Ptr, Indices);
assert((VF == 1 || NewGEP->getType()->isVectorTy()) &&
"NewGEP is not a pointer vector");
VectorLoopValueMap.setVectorValue(&I, Part, NewGEP);
@@ -3955,6 +4000,7 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I) {
case Instruction::FAdd:
case Instruction::Sub:
case Instruction::FSub:
+ case Instruction::FNeg:
case Instruction::Mul:
case Instruction::FMul:
case Instruction::FDiv:
@@ -3965,21 +4011,22 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I) {
case Instruction::And:
case Instruction::Or:
case Instruction::Xor: {
- // Just widen binops.
- auto *BinOp = cast<BinaryOperator>(&I);
- setDebugLocFromInst(Builder, BinOp);
+ // Just widen unops and binops.
+ setDebugLocFromInst(Builder, &I);
for (unsigned Part = 0; Part < UF; ++Part) {
- Value *A = getOrCreateVectorValue(BinOp->getOperand(0), Part);
- Value *B = getOrCreateVectorValue(BinOp->getOperand(1), Part);
- Value *V = Builder.CreateBinOp(BinOp->getOpcode(), A, B);
+ SmallVector<Value *, 2> Ops;
+ for (Value *Op : I.operands())
+ Ops.push_back(getOrCreateVectorValue(Op, Part));
- if (BinaryOperator *VecOp = dyn_cast<BinaryOperator>(V))
- VecOp->copyIRFlags(BinOp);
+ Value *V = Builder.CreateNAryOp(I.getOpcode(), Ops);
+
+ if (auto *VecOp = dyn_cast<Instruction>(V))
+ VecOp->copyIRFlags(&I);
// Use this vector value for all users of the original instruction.
VectorLoopValueMap.setVectorValue(&I, Part, V);
- addMetadata(V, BinOp);
+ addMetadata(V, &I);
}
break;
@@ -4088,9 +4135,9 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I) {
// version of the instruction.
// Is it beneficial to perform intrinsic call compared to lib call?
bool NeedToScalarize;
- unsigned CallCost = getVectorCallCost(CI, VF, *TTI, TLI, NeedToScalarize);
+ unsigned CallCost = Cost->getVectorCallCost(CI, VF, NeedToScalarize);
bool UseVectorIntrinsic =
- ID && getVectorIntrinsicCost(CI, VF, *TTI, TLI) <= CallCost;
+ ID && Cost->getVectorIntrinsicCost(CI, VF) <= CallCost;
assert((UseVectorIntrinsic || !NeedToScalarize) &&
"Instruction should be scalarized elsewhere.");
@@ -4395,6 +4442,13 @@ bool LoopVectorizationCostModel::interleavedAccessCanBeWidened(Instruction *I,
auto *Group = getInterleavedAccessGroup(I);
assert(Group && "Must have a group.");
+ // If the instruction's allocated size doesn't equal it's type size, it
+ // requires padding and will be scalarized.
+ auto &DL = I->getModule()->getDataLayout();
+ auto *ScalarTy = getMemInstValueType(I);
+ if (hasIrregularType(ScalarTy, DL, VF))
+ return false;
+
// Check if masking is required.
// A Group may need masking for one of two reasons: it resides in a block that
// needs predication, or it was decided to use masking to deal with gaps.
@@ -4987,6 +5041,8 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize,
if (LoopCost == 0)
LoopCost = expectedCost(VF).first;
+ assert(LoopCost && "Non-zero loop cost expected");
+
// Clamp the calculated IC to be between the 1 and the max interleave count
// that the target allows.
if (IC > MaxInterleaveCount)
@@ -5314,15 +5370,6 @@ int LoopVectorizationCostModel::computePredInstDiscount(
return true;
};
- // Returns true if an operand that cannot be scalarized must be extracted
- // from a vector. We will account for this scalarization overhead below. Note
- // that the non-void predicated instructions are placed in their own blocks,
- // and their return values are inserted into vectors. Thus, an extract would
- // still be required.
- auto needsExtract = [&](Instruction *I) -> bool {
- return TheLoop->contains(I) && !isScalarAfterVectorization(I, VF);
- };
-
// Compute the expected cost discount from scalarizing the entire expression
// feeding the predicated instruction. We currently only consider expressions
// that are single-use instruction chains.
@@ -5362,7 +5409,7 @@ int LoopVectorizationCostModel::computePredInstDiscount(
"Instruction has non-scalar type");
if (canBeScalarized(J))
Worklist.push_back(J);
- else if (needsExtract(J))
+ else if (needsExtract(J, VF))
ScalarCost += TTI.getScalarizationOverhead(
ToVectorTy(J->getType(),VF), false, true);
}
@@ -5484,7 +5531,7 @@ unsigned LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
// Get the overhead of the extractelement and insertelement instructions
// we might create due to scalarization.
- Cost += getScalarizationOverhead(I, VF, TTI);
+ Cost += getScalarizationOverhead(I, VF);
// If we have a predicated store, it may not be executed for each vector
// lane. Scale the cost by the probability of executing the predicated
@@ -5636,6 +5683,36 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
return VectorizationCostTy(C, TypeNotScalarized);
}
+unsigned LoopVectorizationCostModel::getScalarizationOverhead(Instruction *I,
+ unsigned VF) {
+
+ if (VF == 1)
+ return 0;
+
+ unsigned Cost = 0;
+ Type *RetTy = ToVectorTy(I->getType(), VF);
+ if (!RetTy->isVoidTy() &&
+ (!isa<LoadInst>(I) || !TTI.supportsEfficientVectorElementLoadStore()))
+ Cost += TTI.getScalarizationOverhead(RetTy, true, false);
+
+ // Some targets keep addresses scalar.
+ if (isa<LoadInst>(I) && !TTI.prefersVectorizedAddressing())
+ return Cost;
+
+ // Some targets support efficient element stores.
+ if (isa<StoreInst>(I) && TTI.supportsEfficientVectorElementLoadStore())
+ return Cost;
+
+ // Collect operands to consider.
+ CallInst *CI = dyn_cast<CallInst>(I);
+ Instruction::op_range Ops = CI ? CI->arg_operands() : I->operands();
+
+ // Skip operands that do not require extraction/scalarization and do not incur
+ // any overhead.
+ return Cost + TTI.getOperandsScalarizationOverhead(
+ filterExtractingOperands(Ops, VF), VF);
+}
+
void LoopVectorizationCostModel::setCostBasedWideningDecision(unsigned VF) {
if (VF == 1)
return;
@@ -5876,7 +5953,7 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
// The cost of insertelement and extractelement instructions needed for
// scalarization.
- Cost += getScalarizationOverhead(I, VF, TTI);
+ Cost += getScalarizationOverhead(I, VF);
// Scale the cost by the probability of executing the predicated blocks.
// This assumes the predicated block for each vector lane is equally
@@ -5916,6 +5993,14 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
I->getOpcode(), VectorTy, TargetTransformInfo::OK_AnyValue,
Op2VK, TargetTransformInfo::OP_None, Op2VP, Operands);
}
+ case Instruction::FNeg: {
+ unsigned N = isScalarAfterVectorization(I, VF) ? VF : 1;
+ return N * TTI.getArithmeticInstrCost(
+ I->getOpcode(), VectorTy, TargetTransformInfo::OK_AnyValue,
+ TargetTransformInfo::OK_AnyValue,
+ TargetTransformInfo::OP_None, TargetTransformInfo::OP_None,
+ I->getOperand(0));
+ }
case Instruction::Select: {
SelectInst *SI = cast<SelectInst>(I);
const SCEV *CondSCEV = SE->getSCEV(SI->getCondition());
@@ -5997,16 +6082,16 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
case Instruction::Call: {
bool NeedToScalarize;
CallInst *CI = cast<CallInst>(I);
- unsigned CallCost = getVectorCallCost(CI, VF, TTI, TLI, NeedToScalarize);
+ unsigned CallCost = getVectorCallCost(CI, VF, NeedToScalarize);
if (getVectorIntrinsicIDForCall(CI, TLI))
- return std::min(CallCost, getVectorIntrinsicCost(CI, VF, TTI, TLI));
+ return std::min(CallCost, getVectorIntrinsicCost(CI, VF));
return CallCost;
}
default:
// The cost of executing VF copies of the scalar instruction. This opcode
// is unknown. Assume that it is the same as 'mul'.
return VF * TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy) +
- getScalarizationOverhead(I, VF, TTI);
+ getScalarizationOverhead(I, VF);
} // end of switch.
}
@@ -6027,10 +6112,13 @@ INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopAccessLegacyAnalysis)
INITIALIZE_PASS_DEPENDENCY(DemandedBitsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
INITIALIZE_PASS_END(LoopVectorize, LV_NAME, lv_name, false, false)
namespace llvm {
+Pass *createLoopVectorizePass() { return new LoopVectorize(); }
+
Pass *createLoopVectorizePass(bool InterleaveOnlyWhenForced,
bool VectorizeOnlyWhenForced) {
return new LoopVectorize(InterleaveOnlyWhenForced, VectorizeOnlyWhenForced);
@@ -6066,50 +6154,65 @@ void LoopVectorizationCostModel::collectValuesToIgnore() {
}
}
+// TODO: we could return a pair of values that specify the max VF and
+// min VF, to be used in `buildVPlans(MinVF, MaxVF)` instead of
+// `buildVPlans(VF, VF)`. We cannot do it because VPLAN at the moment
+// doesn't have a cost model that can choose which plan to execute if
+// more than one is generated.
+static unsigned determineVPlanVF(const unsigned WidestVectorRegBits,
+ LoopVectorizationCostModel &CM) {
+ unsigned WidestType;
+ std::tie(std::ignore, WidestType) = CM.getSmallestAndWidestTypes();
+ return WidestVectorRegBits / WidestType;
+}
+
VectorizationFactor
LoopVectorizationPlanner::planInVPlanNativePath(bool OptForSize,
unsigned UserVF) {
- // Width 1 means no vectorization, cost 0 means uncomputed cost.
- const VectorizationFactor NoVectorization = {1U, 0U};
-
+ unsigned VF = UserVF;
// Outer loop handling: They may require CFG and instruction level
// transformations before even evaluating whether vectorization is profitable.
// Since we cannot modify the incoming IR, we need to build VPlan upfront in
// the vectorization pipeline.
if (!OrigLoop->empty()) {
- // TODO: If UserVF is not provided, we set UserVF to 4 for stress testing.
- // This won't be necessary when UserVF is not required in the VPlan-native
- // path.
- if (VPlanBuildStressTest && !UserVF)
- UserVF = 4;
-
+ // If the user doesn't provide a vectorization factor, determine a
+ // reasonable one.
+ if (!UserVF) {
+ VF = determineVPlanVF(TTI->getRegisterBitWidth(true /* Vector*/), CM);
+ LLVM_DEBUG(dbgs() << "LV: VPlan computed VF " << VF << ".\n");
+
+ // Make sure we have a VF > 1 for stress testing.
+ if (VPlanBuildStressTest && VF < 2) {
+ LLVM_DEBUG(dbgs() << "LV: VPlan stress testing: "
+ << "overriding computed VF.\n");
+ VF = 4;
+ }
+ }
assert(EnableVPlanNativePath && "VPlan-native path is not enabled.");
- assert(UserVF && "Expected UserVF for outer loop vectorization.");
- assert(isPowerOf2_32(UserVF) && "VF needs to be a power of two");
- LLVM_DEBUG(dbgs() << "LV: Using user VF " << UserVF << ".\n");
- buildVPlans(UserVF, UserVF);
+ assert(isPowerOf2_32(VF) && "VF needs to be a power of two");
+ LLVM_DEBUG(dbgs() << "LV: Using " << (UserVF ? "user " : "") << "VF " << VF
+ << " to build VPlans.\n");
+ buildVPlans(VF, VF);
// For VPlan build stress testing, we bail out after VPlan construction.
if (VPlanBuildStressTest)
- return NoVectorization;
+ return VectorizationFactor::Disabled();
- return {UserVF, 0};
+ return {VF, 0};
}
LLVM_DEBUG(
dbgs() << "LV: Not vectorizing. Inner loops aren't supported in the "
"VPlan-native path.\n");
- return NoVectorization;
+ return VectorizationFactor::Disabled();
}
-VectorizationFactor
-LoopVectorizationPlanner::plan(bool OptForSize, unsigned UserVF) {
+Optional<VectorizationFactor> LoopVectorizationPlanner::plan(bool OptForSize,
+ unsigned UserVF) {
assert(OrigLoop->empty() && "Inner loop expected.");
- // Width 1 means no vectorization, cost 0 means uncomputed cost.
- const VectorizationFactor NoVectorization = {1U, 0U};
Optional<unsigned> MaybeMaxVF = CM.computeMaxVF(OptForSize);
- if (!MaybeMaxVF.hasValue()) // Cases considered too costly to vectorize.
- return NoVectorization;
+ if (!MaybeMaxVF) // Cases that should not to be vectorized nor interleaved.
+ return None;
// Invalidate interleave groups if all blocks of loop will be predicated.
if (CM.blockNeedsPredication(OrigLoop->getHeader()) &&
@@ -6129,7 +6232,7 @@ LoopVectorizationPlanner::plan(bool OptForSize, unsigned UserVF) {
CM.selectUserVectorizationFactor(UserVF);
buildVPlansWithVPRecipes(UserVF, UserVF);
LLVM_DEBUG(printPlans(dbgs()));
- return {UserVF, 0};
+ return {{UserVF, 0}};
}
unsigned MaxVF = MaybeMaxVF.getValue();
@@ -6148,7 +6251,7 @@ LoopVectorizationPlanner::plan(bool OptForSize, unsigned UserVF) {
buildVPlansWithVPRecipes(1, MaxVF);
LLVM_DEBUG(printPlans(dbgs()));
if (MaxVF == 1)
- return NoVectorization;
+ return VectorizationFactor::Disabled();
// Select the optimal vectorization factor.
return CM.selectVectorizationFactor(MaxVF);
@@ -6527,6 +6630,7 @@ bool VPRecipeBuilder::tryToWiden(Instruction *I, VPBasicBlock *VPBB,
case Instruction::FCmp:
case Instruction::FDiv:
case Instruction::FMul:
+ case Instruction::FNeg:
case Instruction::FPExt:
case Instruction::FPToSI:
case Instruction::FPToUI:
@@ -6582,9 +6686,9 @@ bool VPRecipeBuilder::tryToWiden(Instruction *I, VPBasicBlock *VPBB,
// version of the instruction.
// Is it beneficial to perform intrinsic call compared to lib call?
bool NeedToScalarize;
- unsigned CallCost = getVectorCallCost(CI, VF, *TTI, TLI, NeedToScalarize);
+ unsigned CallCost = CM.getVectorCallCost(CI, VF, NeedToScalarize);
bool UseVectorIntrinsic =
- ID && getVectorIntrinsicCost(CI, VF, *TTI, TLI) <= CallCost;
+ ID && CM.getVectorIntrinsicCost(CI, VF) <= CallCost;
return UseVectorIntrinsic || !NeedToScalarize;
}
if (isa<LoadInst>(I) || isa<StoreInst>(I)) {
@@ -6756,8 +6860,7 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(unsigned MinVF,
}
}
-LoopVectorizationPlanner::VPlanPtr
-LoopVectorizationPlanner::buildVPlanWithVPRecipes(
+VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
VFRange &Range, SmallPtrSetImpl<Value *> &NeedDef,
SmallPtrSetImpl<Instruction *> &DeadInstructions) {
// Hold a mapping from predicated instructions to their recipes, in order to
@@ -6772,7 +6875,7 @@ LoopVectorizationPlanner::buildVPlanWithVPRecipes(
VPBasicBlock *VPBB = new VPBasicBlock("Pre-Entry");
auto Plan = llvm::make_unique<VPlan>(VPBB);
- VPRecipeBuilder RecipeBuilder(OrigLoop, TLI, TTI, Legal, CM, Builder);
+ VPRecipeBuilder RecipeBuilder(OrigLoop, TLI, Legal, CM, Builder);
// Represent values that will have defs inside VPlan.
for (Value *V : NeedDef)
Plan->addVPValue(V);
@@ -6881,8 +6984,7 @@ LoopVectorizationPlanner::buildVPlanWithVPRecipes(
return Plan;
}
-LoopVectorizationPlanner::VPlanPtr
-LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
+VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
// Outer loop handling: They may require CFG and instruction level
// transformations before even evaluating whether vectorization is profitable.
// Since we cannot modify the incoming IR, we need to build VPlan upfront in
@@ -6897,13 +6999,22 @@ LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan);
HCFGBuilder.buildHierarchicalCFG();
+ for (unsigned VF = Range.Start; VF < Range.End; VF *= 2)
+ Plan->addVF(VF);
+
+ if (EnableVPlanPredication) {
+ VPlanPredicator VPP(*Plan);
+ VPP.predicate();
+
+ // Avoid running transformation to recipes until masked code generation in
+ // VPlan-native path is in place.
+ return Plan;
+ }
+
SmallPtrSet<Instruction *, 1> DeadInstructions;
VPlanHCFGTransforms::VPInstructionsToVPRecipes(
Plan, Legal->getInductionVars(), DeadInstructions);
- for (unsigned VF = Range.Start; VF < Range.End; VF *= 2)
- Plan->addVF(VF);
-
return Plan;
}
@@ -7096,7 +7207,8 @@ static bool processLoopInVPlanNativePath(
Loop *L, PredicatedScalarEvolution &PSE, LoopInfo *LI, DominatorTree *DT,
LoopVectorizationLegality *LVL, TargetTransformInfo *TTI,
TargetLibraryInfo *TLI, DemandedBits *DB, AssumptionCache *AC,
- OptimizationRemarkEmitter *ORE, LoopVectorizeHints &Hints) {
+ OptimizationRemarkEmitter *ORE, BlockFrequencyInfo *BFI,
+ ProfileSummaryInfo *PSI, LoopVectorizeHints &Hints) {
assert(EnableVPlanNativePath && "VPlan-native path is disabled.");
Function *F = L->getHeader()->getParent();
@@ -7109,24 +7221,28 @@ static bool processLoopInVPlanNativePath(
LoopVectorizationPlanner LVP(L, LI, TLI, TTI, LVL, CM);
// Get user vectorization factor.
- unsigned UserVF = Hints.getWidth();
+ const unsigned UserVF = Hints.getWidth();
- // Check the function attributes to find out if this function should be
- // optimized for size.
+ // Check the function attributes and profiles to find out if this function
+ // should be optimized for size.
bool OptForSize =
- Hints.getForce() != LoopVectorizeHints::FK_Enabled && F->optForSize();
+ Hints.getForce() != LoopVectorizeHints::FK_Enabled &&
+ (F->hasOptSize() ||
+ llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI));
// Plan how to best vectorize, return the best VF and its cost.
- VectorizationFactor VF = LVP.planInVPlanNativePath(OptForSize, UserVF);
+ const VectorizationFactor VF = LVP.planInVPlanNativePath(OptForSize, UserVF);
// If we are stress testing VPlan builds, do not attempt to generate vector
- // code.
- if (VPlanBuildStressTest)
+ // code. Masked vector code generation support will follow soon.
+ // Also, do not attempt to vectorize if no vector code will be produced.
+ if (VPlanBuildStressTest || EnableVPlanPredication ||
+ VectorizationFactor::Disabled() == VF)
return false;
LVP.setBestPlan(VF.Width, 1);
- InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, UserVF, 1, LVL,
+ InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width, 1, LVL,
&CM);
LLVM_DEBUG(dbgs() << "Vectorizing outer loop in \""
<< L->getHeader()->getParent()->getName() << "\"\n");
@@ -7184,7 +7300,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
// Check if it is legal to vectorize the loop.
LoopVectorizationRequirements Requirements(*ORE);
- LoopVectorizationLegality LVL(L, PSE, DT, TLI, AA, F, GetLAA, LI, ORE,
+ LoopVectorizationLegality LVL(L, PSE, DT, TTI, TLI, AA, F, GetLAA, LI, ORE,
&Requirements, &Hints, DB, AC);
if (!LVL.canVectorize(EnableVPlanNativePath)) {
LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n");
@@ -7192,10 +7308,12 @@ bool LoopVectorizePass::processLoop(Loop *L) {
return false;
}
- // Check the function attributes to find out if this function should be
- // optimized for size.
+ // Check the function attributes and profiles to find out if this function
+ // should be optimized for size.
bool OptForSize =
- Hints.getForce() != LoopVectorizeHints::FK_Enabled && F->optForSize();
+ Hints.getForce() != LoopVectorizeHints::FK_Enabled &&
+ (F->hasOptSize() ||
+ llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI));
// Entrance to the VPlan-native vectorization path. Outer loops are processed
// here. They may require CFG and instruction level transformations before
@@ -7204,7 +7322,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
// pipeline.
if (!L->empty())
return processLoopInVPlanNativePath(L, PSE, LI, DT, &LVL, TTI, TLI, DB, AC,
- ORE, Hints);
+ ORE, BFI, PSI, Hints);
assert(L->empty() && "Inner loop expected.");
// Check the loop for a trip count threshold: vectorize loops with a tiny trip
@@ -7304,14 +7422,18 @@ bool LoopVectorizePass::processLoop(Loop *L) {
unsigned UserVF = Hints.getWidth();
// Plan how to best vectorize, return the best VF and its cost.
- VectorizationFactor VF = LVP.plan(OptForSize, UserVF);
+ Optional<VectorizationFactor> MaybeVF = LVP.plan(OptForSize, UserVF);
- // Select the interleave count.
- unsigned IC = CM.selectInterleaveCount(OptForSize, VF.Width, VF.Cost);
-
- // Get user interleave count.
+ VectorizationFactor VF = VectorizationFactor::Disabled();
+ unsigned IC = 1;
unsigned UserIC = Hints.getInterleave();
+ if (MaybeVF) {
+ VF = *MaybeVF;
+ // Select the interleave count.
+ IC = CM.selectInterleaveCount(OptForSize, VF.Width, VF.Cost);
+ }
+
// Identify the diagnostic messages that should be produced.
std::pair<StringRef, std::string> VecDiagMsg, IntDiagMsg;
bool VectorizeLoop = true, InterleaveLoop = true;
@@ -7330,7 +7452,16 @@ bool LoopVectorizePass::processLoop(Loop *L) {
VectorizeLoop = false;
}
- if (IC == 1 && UserIC <= 1) {
+ if (!MaybeVF && UserIC > 1) {
+ // Tell the user interleaving was avoided up-front, despite being explicitly
+ // requested.
+ LLVM_DEBUG(dbgs() << "LV: Ignoring UserIC, because vectorization and "
+ "interleaving should be avoided up front\n");
+ IntDiagMsg = std::make_pair(
+ "InterleavingAvoided",
+ "Ignoring UserIC, because interleaving was avoided up front");
+ InterleaveLoop = false;
+ } else if (IC == 1 && UserIC <= 1) {
// Tell the user interleaving is not beneficial.
LLVM_DEBUG(dbgs() << "LV: Interleaving is not beneficial.\n");
IntDiagMsg = std::make_pair(
@@ -7457,7 +7588,7 @@ bool LoopVectorizePass::runImpl(
DominatorTree &DT_, BlockFrequencyInfo &BFI_, TargetLibraryInfo *TLI_,
DemandedBits &DB_, AliasAnalysis &AA_, AssumptionCache &AC_,
std::function<const LoopAccessInfo &(Loop &)> &GetLAA_,
- OptimizationRemarkEmitter &ORE_) {
+ OptimizationRemarkEmitter &ORE_, ProfileSummaryInfo *PSI_) {
SE = &SE_;
LI = &LI_;
TTI = &TTI_;
@@ -7469,6 +7600,7 @@ bool LoopVectorizePass::runImpl(
GetLAA = &GetLAA_;
DB = &DB_;
ORE = &ORE_;
+ PSI = PSI_;
// Don't attempt if
// 1. the target claims to have no vector registers, and
@@ -7488,7 +7620,8 @@ bool LoopVectorizePass::runImpl(
// will simplify all loops, regardless of whether anything end up being
// vectorized.
for (auto &L : *LI)
- Changed |= simplifyLoop(L, DT, LI, SE, AC, false /* PreserveLCSSA */);
+ Changed |=
+ simplifyLoop(L, DT, LI, SE, AC, nullptr, false /* PreserveLCSSA */);
// Build up a worklist of inner-loops to vectorize. This is necessary as
// the act of vectorizing or partially unrolling a loop creates new loops
@@ -7527,15 +7660,22 @@ PreservedAnalyses LoopVectorizePass::run(Function &F,
auto &AC = AM.getResult<AssumptionAnalysis>(F);
auto &DB = AM.getResult<DemandedBitsAnalysis>(F);
auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+ MemorySSA *MSSA = EnableMSSALoopDependency
+ ? &AM.getResult<MemorySSAAnalysis>(F).getMSSA()
+ : nullptr;
auto &LAM = AM.getResult<LoopAnalysisManagerFunctionProxy>(F).getManager();
std::function<const LoopAccessInfo &(Loop &)> GetLAA =
[&](Loop &L) -> const LoopAccessInfo & {
- LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, TLI, TTI, nullptr};
+ LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, TLI, TTI, MSSA};
return LAM.getResult<LoopAccessAnalysis>(L, AR);
};
+ const ModuleAnalysisManager &MAM =
+ AM.getResult<ModuleAnalysisManagerFunctionProxy>(F).getManager();
+ ProfileSummaryInfo *PSI =
+ MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
bool Changed =
- runImpl(F, SE, LI, TTI, DT, BFI, &TLI, DB, AA, AC, GetLAA, ORE);
+ runImpl(F, SE, LI, TTI, DT, BFI, &TLI, DB, AA, AC, GetLAA, ORE, PSI);
if (!Changed)
return PreservedAnalyses::all();
PreservedAnalyses PA;
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 2e856a7e6802..27a86c0bca91 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1,9 +1,8 @@
//===- SLPVectorizer.cpp - A bottom up SLP Vectorizer ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -106,6 +105,10 @@ using namespace slpvectorizer;
STATISTIC(NumVectorInstructions, "Number of vector instructions generated");
+cl::opt<bool>
+ llvm::RunSLPVectorization("vectorize-slp", cl::init(false), cl::Hidden,
+ cl::desc("Run the SLP vectorization passes"));
+
static cl::opt<int>
SLPCostThreshold("slp-threshold", cl::init(0), cl::Hidden,
cl::desc("Only vectorize if you gain more than this "
@@ -207,6 +210,13 @@ static bool isSplat(ArrayRef<Value *> VL) {
return true;
}
+/// \returns True if \p I is commutative, handles CmpInst as well as Instruction.
+static bool isCommutative(Instruction *I) {
+ if (auto *IC = dyn_cast<CmpInst>(I))
+ return IC->isCommutative();
+ return I->isCommutative();
+}
+
/// Checks if the vector of instructions can be represented as a shuffle, like:
/// %x0 = extractelement <4 x i8> %x, i32 0
/// %x3 = extractelement <4 x i8> %x, i32 3
@@ -438,8 +448,9 @@ static bool InTreeUserNeedToExtract(Value *Scalar, Instruction *UserInst,
case Instruction::Call: {
CallInst *CI = cast<CallInst>(UserInst);
Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
- if (hasVectorInstrinsicScalarOpd(ID, 1)) {
- return (CI->getArgOperand(1) == Scalar);
+ for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) {
+ if (hasVectorInstrinsicScalarOpd(ID, i))
+ return (CI->getArgOperand(i) == Scalar);
}
LLVM_FALLTHROUGH;
}
@@ -474,6 +485,8 @@ namespace slpvectorizer {
/// Bottom Up SLP Vectorizer.
class BoUpSLP {
+ struct TreeEntry;
+
public:
using ValueList = SmallVector<Value *, 8>;
using InstrList = SmallVector<Instruction *, 16>;
@@ -517,7 +530,7 @@ public:
/// \returns the cost incurred by unwanted spills and fills, caused by
/// holding live values over call sites.
- int getSpillCost();
+ int getSpillCost() const;
/// \returns the vectorization cost of the subtree that starts at \p VL.
/// A negative number means that this is profitable.
@@ -576,7 +589,7 @@ public:
/// the stored value. Otherwise, the size is the width of the largest loaded
/// value reaching V. This method is used by the vectorizer to calculate
/// vectorization factors.
- unsigned getVectorElementSize(Value *V);
+ unsigned getVectorElementSize(Value *V) const;
/// Compute the minimum type sizes required to represent the entries in a
/// vectorizable tree.
@@ -599,13 +612,512 @@ public:
/// \returns True if the VectorizableTree is both tiny and not fully
/// vectorizable. We do not vectorize such trees.
- bool isTreeTinyAndNotFullyVectorizable();
+ bool isTreeTinyAndNotFullyVectorizable() const;
OptimizationRemarkEmitter *getORE() { return ORE; }
-private:
- struct TreeEntry;
+ /// This structure holds any data we need about the edges being traversed
+ /// during buildTree_rec(). We keep track of:
+ /// (i) the user TreeEntry index, and
+ /// (ii) the index of the edge.
+ struct EdgeInfo {
+ EdgeInfo() = default;
+ EdgeInfo(TreeEntry *UserTE, unsigned EdgeIdx)
+ : UserTE(UserTE), EdgeIdx(EdgeIdx) {}
+ /// The user TreeEntry.
+ TreeEntry *UserTE = nullptr;
+ /// The operand index of the use.
+ unsigned EdgeIdx = UINT_MAX;
+#ifndef NDEBUG
+ friend inline raw_ostream &operator<<(raw_ostream &OS,
+ const BoUpSLP::EdgeInfo &EI) {
+ EI.dump(OS);
+ return OS;
+ }
+ /// Debug print.
+ void dump(raw_ostream &OS) const {
+ OS << "{User:" << (UserTE ? std::to_string(UserTE->Idx) : "null")
+ << " EdgeIdx:" << EdgeIdx << "}";
+ }
+ LLVM_DUMP_METHOD void dump() const { dump(dbgs()); }
+#endif
+ };
+
+ /// A helper data structure to hold the operands of a vector of instructions.
+ /// This supports a fixed vector length for all operand vectors.
+ class VLOperands {
+ /// For each operand we need (i) the value, and (ii) the opcode that it
+ /// would be attached to if the expression was in a left-linearized form.
+ /// This is required to avoid illegal operand reordering.
+ /// For example:
+ /// \verbatim
+ /// 0 Op1
+ /// |/
+ /// Op1 Op2 Linearized + Op2
+ /// \ / ----------> |/
+ /// - -
+ ///
+ /// Op1 - Op2 (0 + Op1) - Op2
+ /// \endverbatim
+ ///
+ /// Value Op1 is attached to a '+' operation, and Op2 to a '-'.
+ ///
+ /// Another way to think of this is to track all the operations across the
+ /// path from the operand all the way to the root of the tree and to
+ /// calculate the operation that corresponds to this path. For example, the
+ /// path from Op2 to the root crosses the RHS of the '-', therefore the
+ /// corresponding operation is a '-' (which matches the one in the
+ /// linearized tree, as shown above).
+ ///
+ /// For lack of a better term, we refer to this operation as Accumulated
+ /// Path Operation (APO).
+ struct OperandData {
+ OperandData() = default;
+ OperandData(Value *V, bool APO, bool IsUsed)
+ : V(V), APO(APO), IsUsed(IsUsed) {}
+ /// The operand value.
+ Value *V = nullptr;
+ /// TreeEntries only allow a single opcode, or an alternate sequence of
+ /// them (e.g, +, -). Therefore, we can safely use a boolean value for the
+ /// APO. It is set to 'true' if 'V' is attached to an inverse operation
+ /// in the left-linearized form (e.g., Sub/Div), and 'false' otherwise
+ /// (e.g., Add/Mul)
+ bool APO = false;
+ /// Helper data for the reordering function.
+ bool IsUsed = false;
+ };
+
+ /// During operand reordering, we are trying to select the operand at lane
+ /// that matches best with the operand at the neighboring lane. Our
+ /// selection is based on the type of value we are looking for. For example,
+ /// if the neighboring lane has a load, we need to look for a load that is
+ /// accessing a consecutive address. These strategies are summarized in the
+ /// 'ReorderingMode' enumerator.
+ enum class ReorderingMode {
+ Load, ///< Matching loads to consecutive memory addresses
+ Opcode, ///< Matching instructions based on opcode (same or alternate)
+ Constant, ///< Matching constants
+ Splat, ///< Matching the same instruction multiple times (broadcast)
+ Failed, ///< We failed to create a vectorizable group
+ };
+
+ using OperandDataVec = SmallVector<OperandData, 2>;
+
+ /// A vector of operand vectors.
+ SmallVector<OperandDataVec, 4> OpsVec;
+
+ const DataLayout &DL;
+ ScalarEvolution &SE;
+
+ /// \returns the operand data at \p OpIdx and \p Lane.
+ OperandData &getData(unsigned OpIdx, unsigned Lane) {
+ return OpsVec[OpIdx][Lane];
+ }
+
+ /// \returns the operand data at \p OpIdx and \p Lane. Const version.
+ const OperandData &getData(unsigned OpIdx, unsigned Lane) const {
+ return OpsVec[OpIdx][Lane];
+ }
+
+ /// Clears the used flag for all entries.
+ void clearUsed() {
+ for (unsigned OpIdx = 0, NumOperands = getNumOperands();
+ OpIdx != NumOperands; ++OpIdx)
+ for (unsigned Lane = 0, NumLanes = getNumLanes(); Lane != NumLanes;
+ ++Lane)
+ OpsVec[OpIdx][Lane].IsUsed = false;
+ }
+
+ /// Swap the operand at \p OpIdx1 with that one at \p OpIdx2.
+ void swap(unsigned OpIdx1, unsigned OpIdx2, unsigned Lane) {
+ std::swap(OpsVec[OpIdx1][Lane], OpsVec[OpIdx2][Lane]);
+ }
+
+ // Search all operands in Ops[*][Lane] for the one that matches best
+ // Ops[OpIdx][LastLane] and return its opreand index.
+ // If no good match can be found, return None.
+ Optional<unsigned>
+ getBestOperand(unsigned OpIdx, int Lane, int LastLane,
+ ArrayRef<ReorderingMode> ReorderingModes) {
+ unsigned NumOperands = getNumOperands();
+
+ // The operand of the previous lane at OpIdx.
+ Value *OpLastLane = getData(OpIdx, LastLane).V;
+
+ // Our strategy mode for OpIdx.
+ ReorderingMode RMode = ReorderingModes[OpIdx];
+
+ // The linearized opcode of the operand at OpIdx, Lane.
+ bool OpIdxAPO = getData(OpIdx, Lane).APO;
+
+ const unsigned BestScore = 2;
+ const unsigned GoodScore = 1;
+
+ // The best operand index and its score.
+ // Sometimes we have more than one option (e.g., Opcode and Undefs), so we
+ // are using the score to differentiate between the two.
+ struct BestOpData {
+ Optional<unsigned> Idx = None;
+ unsigned Score = 0;
+ } BestOp;
+
+ // Iterate through all unused operands and look for the best.
+ for (unsigned Idx = 0; Idx != NumOperands; ++Idx) {
+ // Get the operand at Idx and Lane.
+ OperandData &OpData = getData(Idx, Lane);
+ Value *Op = OpData.V;
+ bool OpAPO = OpData.APO;
+
+ // Skip already selected operands.
+ if (OpData.IsUsed)
+ continue;
+
+ // Skip if we are trying to move the operand to a position with a
+ // different opcode in the linearized tree form. This would break the
+ // semantics.
+ if (OpAPO != OpIdxAPO)
+ continue;
+
+ // Look for an operand that matches the current mode.
+ switch (RMode) {
+ case ReorderingMode::Load:
+ if (isa<LoadInst>(Op)) {
+ // Figure out which is left and right, so that we can check for
+ // consecutive loads
+ bool LeftToRight = Lane > LastLane;
+ Value *OpLeft = (LeftToRight) ? OpLastLane : Op;
+ Value *OpRight = (LeftToRight) ? Op : OpLastLane;
+ if (isConsecutiveAccess(cast<LoadInst>(OpLeft),
+ cast<LoadInst>(OpRight), DL, SE))
+ BestOp.Idx = Idx;
+ }
+ break;
+ case ReorderingMode::Opcode:
+ // We accept both Instructions and Undefs, but with different scores.
+ if ((isa<Instruction>(Op) && isa<Instruction>(OpLastLane) &&
+ cast<Instruction>(Op)->getOpcode() ==
+ cast<Instruction>(OpLastLane)->getOpcode()) ||
+ (isa<UndefValue>(OpLastLane) && isa<Instruction>(Op)) ||
+ isa<UndefValue>(Op)) {
+ // An instruction has a higher score than an undef.
+ unsigned Score = (isa<UndefValue>(Op)) ? GoodScore : BestScore;
+ if (Score > BestOp.Score) {
+ BestOp.Idx = Idx;
+ BestOp.Score = Score;
+ }
+ }
+ break;
+ case ReorderingMode::Constant:
+ if (isa<Constant>(Op)) {
+ unsigned Score = (isa<UndefValue>(Op)) ? GoodScore : BestScore;
+ if (Score > BestOp.Score) {
+ BestOp.Idx = Idx;
+ BestOp.Score = Score;
+ }
+ }
+ break;
+ case ReorderingMode::Splat:
+ if (Op == OpLastLane)
+ BestOp.Idx = Idx;
+ break;
+ case ReorderingMode::Failed:
+ return None;
+ }
+ }
+
+ if (BestOp.Idx) {
+ getData(BestOp.Idx.getValue(), Lane).IsUsed = true;
+ return BestOp.Idx;
+ }
+ // If we could not find a good match return None.
+ return None;
+ }
+
+ /// Helper for reorderOperandVecs. \Returns the lane that we should start
+ /// reordering from. This is the one which has the least number of operands
+ /// that can freely move about.
+ unsigned getBestLaneToStartReordering() const {
+ unsigned BestLane = 0;
+ unsigned Min = UINT_MAX;
+ for (unsigned Lane = 0, NumLanes = getNumLanes(); Lane != NumLanes;
+ ++Lane) {
+ unsigned NumFreeOps = getMaxNumOperandsThatCanBeReordered(Lane);
+ if (NumFreeOps < Min) {
+ Min = NumFreeOps;
+ BestLane = Lane;
+ }
+ }
+ return BestLane;
+ }
+
+ /// \Returns the maximum number of operands that are allowed to be reordered
+ /// for \p Lane. This is used as a heuristic for selecting the first lane to
+ /// start operand reordering.
+ unsigned getMaxNumOperandsThatCanBeReordered(unsigned Lane) const {
+ unsigned CntTrue = 0;
+ unsigned NumOperands = getNumOperands();
+ // Operands with the same APO can be reordered. We therefore need to count
+ // how many of them we have for each APO, like this: Cnt[APO] = x.
+ // Since we only have two APOs, namely true and false, we can avoid using
+ // a map. Instead we can simply count the number of operands that
+ // correspond to one of them (in this case the 'true' APO), and calculate
+ // the other by subtracting it from the total number of operands.
+ for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx)
+ if (getData(OpIdx, Lane).APO)
+ ++CntTrue;
+ unsigned CntFalse = NumOperands - CntTrue;
+ return std::max(CntTrue, CntFalse);
+ }
+
+ /// Go through the instructions in VL and append their operands.
+ void appendOperandsOfVL(ArrayRef<Value *> VL) {
+ assert(!VL.empty() && "Bad VL");
+ assert((empty() || VL.size() == getNumLanes()) &&
+ "Expected same number of lanes");
+ assert(isa<Instruction>(VL[0]) && "Expected instruction");
+ unsigned NumOperands = cast<Instruction>(VL[0])->getNumOperands();
+ OpsVec.resize(NumOperands);
+ unsigned NumLanes = VL.size();
+ for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
+ OpsVec[OpIdx].resize(NumLanes);
+ for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
+ assert(isa<Instruction>(VL[Lane]) && "Expected instruction");
+ // Our tree has just 3 nodes: the root and two operands.
+ // It is therefore trivial to get the APO. We only need to check the
+ // opcode of VL[Lane] and whether the operand at OpIdx is the LHS or
+ // RHS operand. The LHS operand of both add and sub is never attached
+ // to an inversese operation in the linearized form, therefore its APO
+ // is false. The RHS is true only if VL[Lane] is an inverse operation.
+
+ // Since operand reordering is performed on groups of commutative
+ // operations or alternating sequences (e.g., +, -), we can safely
+ // tell the inverse operations by checking commutativity.
+ bool IsInverseOperation = !isCommutative(cast<Instruction>(VL[Lane]));
+ bool APO = (OpIdx == 0) ? false : IsInverseOperation;
+ OpsVec[OpIdx][Lane] = {cast<Instruction>(VL[Lane])->getOperand(OpIdx),
+ APO, false};
+ }
+ }
+ }
+
+ /// \returns the number of operands.
+ unsigned getNumOperands() const { return OpsVec.size(); }
+
+ /// \returns the number of lanes.
+ unsigned getNumLanes() const { return OpsVec[0].size(); }
+
+ /// \returns the operand value at \p OpIdx and \p Lane.
+ Value *getValue(unsigned OpIdx, unsigned Lane) const {
+ return getData(OpIdx, Lane).V;
+ }
+ /// \returns true if the data structure is empty.
+ bool empty() const { return OpsVec.empty(); }
+
+ /// Clears the data.
+ void clear() { OpsVec.clear(); }
+
+ /// \Returns true if there are enough operands identical to \p Op to fill
+ /// the whole vector.
+ /// Note: This modifies the 'IsUsed' flag, so a cleanUsed() must follow.
+ bool shouldBroadcast(Value *Op, unsigned OpIdx, unsigned Lane) {
+ bool OpAPO = getData(OpIdx, Lane).APO;
+ for (unsigned Ln = 0, Lns = getNumLanes(); Ln != Lns; ++Ln) {
+ if (Ln == Lane)
+ continue;
+ // This is set to true if we found a candidate for broadcast at Lane.
+ bool FoundCandidate = false;
+ for (unsigned OpI = 0, OpE = getNumOperands(); OpI != OpE; ++OpI) {
+ OperandData &Data = getData(OpI, Ln);
+ if (Data.APO != OpAPO || Data.IsUsed)
+ continue;
+ if (Data.V == Op) {
+ FoundCandidate = true;
+ Data.IsUsed = true;
+ break;
+ }
+ }
+ if (!FoundCandidate)
+ return false;
+ }
+ return true;
+ }
+
+ public:
+ /// Initialize with all the operands of the instruction vector \p RootVL.
+ VLOperands(ArrayRef<Value *> RootVL, const DataLayout &DL,
+ ScalarEvolution &SE)
+ : DL(DL), SE(SE) {
+ // Append all the operands of RootVL.
+ appendOperandsOfVL(RootVL);
+ }
+
+ /// \Returns a value vector with the operands across all lanes for the
+ /// opearnd at \p OpIdx.
+ ValueList getVL(unsigned OpIdx) const {
+ ValueList OpVL(OpsVec[OpIdx].size());
+ assert(OpsVec[OpIdx].size() == getNumLanes() &&
+ "Expected same num of lanes across all operands");
+ for (unsigned Lane = 0, Lanes = getNumLanes(); Lane != Lanes; ++Lane)
+ OpVL[Lane] = OpsVec[OpIdx][Lane].V;
+ return OpVL;
+ }
+
+ // Performs operand reordering for 2 or more operands.
+ // The original operands are in OrigOps[OpIdx][Lane].
+ // The reordered operands are returned in 'SortedOps[OpIdx][Lane]'.
+ void reorder() {
+ unsigned NumOperands = getNumOperands();
+ unsigned NumLanes = getNumLanes();
+ // Each operand has its own mode. We are using this mode to help us select
+ // the instructions for each lane, so that they match best with the ones
+ // we have selected so far.
+ SmallVector<ReorderingMode, 2> ReorderingModes(NumOperands);
+
+ // This is a greedy single-pass algorithm. We are going over each lane
+ // once and deciding on the best order right away with no back-tracking.
+ // However, in order to increase its effectiveness, we start with the lane
+ // that has operands that can move the least. For example, given the
+ // following lanes:
+ // Lane 0 : A[0] = B[0] + C[0] // Visited 3rd
+ // Lane 1 : A[1] = C[1] - B[1] // Visited 1st
+ // Lane 2 : A[2] = B[2] + C[2] // Visited 2nd
+ // Lane 3 : A[3] = C[3] - B[3] // Visited 4th
+ // we will start at Lane 1, since the operands of the subtraction cannot
+ // be reordered. Then we will visit the rest of the lanes in a circular
+ // fashion. That is, Lanes 2, then Lane 0, and finally Lane 3.
+
+ // Find the first lane that we will start our search from.
+ unsigned FirstLane = getBestLaneToStartReordering();
+
+ // Initialize the modes.
+ for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
+ Value *OpLane0 = getValue(OpIdx, FirstLane);
+ // Keep track if we have instructions with all the same opcode on one
+ // side.
+ if (isa<LoadInst>(OpLane0))
+ ReorderingModes[OpIdx] = ReorderingMode::Load;
+ else if (isa<Instruction>(OpLane0)) {
+ // Check if OpLane0 should be broadcast.
+ if (shouldBroadcast(OpLane0, OpIdx, FirstLane))
+ ReorderingModes[OpIdx] = ReorderingMode::Splat;
+ else
+ ReorderingModes[OpIdx] = ReorderingMode::Opcode;
+ }
+ else if (isa<Constant>(OpLane0))
+ ReorderingModes[OpIdx] = ReorderingMode::Constant;
+ else if (isa<Argument>(OpLane0))
+ // Our best hope is a Splat. It may save some cost in some cases.
+ ReorderingModes[OpIdx] = ReorderingMode::Splat;
+ else
+ // NOTE: This should be unreachable.
+ ReorderingModes[OpIdx] = ReorderingMode::Failed;
+ }
+
+ // If the initial strategy fails for any of the operand indexes, then we
+ // perform reordering again in a second pass. This helps avoid assigning
+ // high priority to the failed strategy, and should improve reordering for
+ // the non-failed operand indexes.
+ for (int Pass = 0; Pass != 2; ++Pass) {
+ // Skip the second pass if the first pass did not fail.
+ bool StrategyFailed = false;
+ // Mark all operand data as free to use.
+ clearUsed();
+ // We keep the original operand order for the FirstLane, so reorder the
+ // rest of the lanes. We are visiting the nodes in a circular fashion,
+ // using FirstLane as the center point and increasing the radius
+ // distance.
+ for (unsigned Distance = 1; Distance != NumLanes; ++Distance) {
+ // Visit the lane on the right and then the lane on the left.
+ for (int Direction : {+1, -1}) {
+ int Lane = FirstLane + Direction * Distance;
+ if (Lane < 0 || Lane >= (int)NumLanes)
+ continue;
+ int LastLane = Lane - Direction;
+ assert(LastLane >= 0 && LastLane < (int)NumLanes &&
+ "Out of bounds");
+ // Look for a good match for each operand.
+ for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
+ // Search for the operand that matches SortedOps[OpIdx][Lane-1].
+ Optional<unsigned> BestIdx =
+ getBestOperand(OpIdx, Lane, LastLane, ReorderingModes);
+ // By not selecting a value, we allow the operands that follow to
+ // select a better matching value. We will get a non-null value in
+ // the next run of getBestOperand().
+ if (BestIdx) {
+ // Swap the current operand with the one returned by
+ // getBestOperand().
+ swap(OpIdx, BestIdx.getValue(), Lane);
+ } else {
+ // We failed to find a best operand, set mode to 'Failed'.
+ ReorderingModes[OpIdx] = ReorderingMode::Failed;
+ // Enable the second pass.
+ StrategyFailed = true;
+ }
+ }
+ }
+ }
+ // Skip second pass if the strategy did not fail.
+ if (!StrategyFailed)
+ break;
+ }
+ }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ LLVM_DUMP_METHOD static StringRef getModeStr(ReorderingMode RMode) {
+ switch (RMode) {
+ case ReorderingMode::Load:
+ return "Load";
+ case ReorderingMode::Opcode:
+ return "Opcode";
+ case ReorderingMode::Constant:
+ return "Constant";
+ case ReorderingMode::Splat:
+ return "Splat";
+ case ReorderingMode::Failed:
+ return "Failed";
+ }
+ llvm_unreachable("Unimplemented Reordering Type");
+ }
+
+ LLVM_DUMP_METHOD static raw_ostream &printMode(ReorderingMode RMode,
+ raw_ostream &OS) {
+ return OS << getModeStr(RMode);
+ }
+
+ /// Debug print.
+ LLVM_DUMP_METHOD static void dumpMode(ReorderingMode RMode) {
+ printMode(RMode, dbgs());
+ }
+
+ friend raw_ostream &operator<<(raw_ostream &OS, ReorderingMode RMode) {
+ return printMode(RMode, OS);
+ }
+
+ LLVM_DUMP_METHOD raw_ostream &print(raw_ostream &OS) const {
+ const unsigned Indent = 2;
+ unsigned Cnt = 0;
+ for (const OperandDataVec &OpDataVec : OpsVec) {
+ OS << "Operand " << Cnt++ << "\n";
+ for (const OperandData &OpData : OpDataVec) {
+ OS.indent(Indent) << "{";
+ if (Value *V = OpData.V)
+ OS << *V;
+ else
+ OS << "null";
+ OS << ", APO:" << OpData.APO << "}\n";
+ }
+ OS << "\n";
+ }
+ return OS;
+ }
+
+ /// Debug print.
+ LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
+#endif
+ };
+
+private:
/// Checks if all users of \p I are the part of the vectorization tree.
bool areAllUsersVectorized(Instruction *I) const;
@@ -613,7 +1125,8 @@ private:
int getEntryCost(TreeEntry *E);
/// This is the recursive part of buildTree.
- void buildTree_rec(ArrayRef<Value *> Roots, unsigned Depth, int);
+ void buildTree_rec(ArrayRef<Value *> Roots, unsigned Depth,
+ const EdgeInfo &EI);
/// \returns true if the ExtractElement/ExtractValue instructions in \p VL can
/// be vectorized to use the original vector (or aggregate "bitcast" to a
@@ -631,12 +1144,12 @@ private:
/// \returns the scalarization cost for this type. Scalarization in this
/// context means the creation of vectors from a group of scalars.
- int getGatherCost(Type *Ty, const DenseSet<unsigned> &ShuffledIndices);
+ int getGatherCost(Type *Ty, const DenseSet<unsigned> &ShuffledIndices) const;
/// \returns the scalarization cost for this list of values. Assuming that
/// this subtree gets vectorized, we may need to extract the values from the
/// roots. This method calculates the cost of extracting the values.
- int getGatherCost(ArrayRef<Value *> VL);
+ int getGatherCost(ArrayRef<Value *> VL) const;
/// Set the Builder insert point to one after the last instruction in
/// the bundle
@@ -648,22 +1161,18 @@ private:
/// \returns whether the VectorizableTree is fully vectorizable and will
/// be beneficial even the tree height is tiny.
- bool isFullyVectorizableTinyTree();
+ bool isFullyVectorizableTinyTree() const;
- /// \reorder commutative operands in alt shuffle if they result in
- /// vectorized code.
- void reorderAltShuffleOperands(const InstructionsState &S,
- ArrayRef<Value *> VL,
- SmallVectorImpl<Value *> &Left,
- SmallVectorImpl<Value *> &Right);
-
- /// \reorder commutative operands to get better probability of
+ /// Reorder commutative or alt operands to get better probability of
/// generating vectorized code.
- void reorderInputsAccordingToOpcode(unsigned Opcode, ArrayRef<Value *> VL,
- SmallVectorImpl<Value *> &Left,
- SmallVectorImpl<Value *> &Right);
+ static void reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
+ SmallVectorImpl<Value *> &Left,
+ SmallVectorImpl<Value *> &Right,
+ const DataLayout &DL,
+ ScalarEvolution &SE);
struct TreeEntry {
- TreeEntry(std::vector<TreeEntry> &Container) : Container(Container) {}
+ using VecTreeTy = SmallVector<std::unique_ptr<TreeEntry>, 8>;
+ TreeEntry(VecTreeTy &Container) : Container(Container) {}
/// \returns true if the scalars in VL are equal to this entry.
bool isSame(ArrayRef<Value *> VL) const {
@@ -696,20 +1205,103 @@ private:
/// to be a pointer and needs to be able to initialize the child iterator.
/// Thus we need a reference back to the container to translate the indices
/// to entries.
- std::vector<TreeEntry> &Container;
+ VecTreeTy &Container;
/// The TreeEntry index containing the user of this entry. We can actually
/// have multiple users so the data structure is not truly a tree.
- SmallVector<int, 1> UserTreeIndices;
+ SmallVector<EdgeInfo, 1> UserTreeIndices;
+
+ /// The index of this treeEntry in VectorizableTree.
+ int Idx = -1;
+
+ private:
+ /// The operands of each instruction in each lane Operands[op_index][lane].
+ /// Note: This helps avoid the replication of the code that performs the
+ /// reordering of operands during buildTree_rec() and vectorizeTree().
+ SmallVector<ValueList, 2> Operands;
+
+ public:
+ /// Set this bundle's \p OpIdx'th operand to \p OpVL.
+ void setOperand(unsigned OpIdx, ArrayRef<Value *> OpVL,
+ ArrayRef<unsigned> ReuseShuffleIndices) {
+ if (Operands.size() < OpIdx + 1)
+ Operands.resize(OpIdx + 1);
+ assert(Operands[OpIdx].size() == 0 && "Already resized?");
+ Operands[OpIdx].resize(Scalars.size());
+ for (unsigned Lane = 0, E = Scalars.size(); Lane != E; ++Lane)
+ Operands[OpIdx][Lane] = (!ReuseShuffleIndices.empty())
+ ? OpVL[ReuseShuffleIndices[Lane]]
+ : OpVL[Lane];
+ }
+
+ /// If there is a user TreeEntry, then set its operand.
+ void trySetUserTEOperand(const EdgeInfo &UserTreeIdx,
+ ArrayRef<Value *> OpVL,
+ ArrayRef<unsigned> ReuseShuffleIndices) {
+ if (UserTreeIdx.UserTE)
+ UserTreeIdx.UserTE->setOperand(UserTreeIdx.EdgeIdx, OpVL,
+ ReuseShuffleIndices);
+ }
+
+ /// \returns the \p OpIdx operand of this TreeEntry.
+ ValueList &getOperand(unsigned OpIdx) {
+ assert(OpIdx < Operands.size() && "Off bounds");
+ return Operands[OpIdx];
+ }
+
+ /// \return the single \p OpIdx operand.
+ Value *getSingleOperand(unsigned OpIdx) const {
+ assert(OpIdx < Operands.size() && "Off bounds");
+ assert(!Operands[OpIdx].empty() && "No operand available");
+ return Operands[OpIdx][0];
+ }
+
+#ifndef NDEBUG
+ /// Debug printer.
+ LLVM_DUMP_METHOD void dump() const {
+ dbgs() << Idx << ".\n";
+ for (unsigned OpI = 0, OpE = Operands.size(); OpI != OpE; ++OpI) {
+ dbgs() << "Operand " << OpI << ":\n";
+ for (const Value *V : Operands[OpI])
+ dbgs().indent(2) << *V << "\n";
+ }
+ dbgs() << "Scalars: \n";
+ for (Value *V : Scalars)
+ dbgs().indent(2) << *V << "\n";
+ dbgs() << "NeedToGather: " << NeedToGather << "\n";
+ dbgs() << "VectorizedValue: ";
+ if (VectorizedValue)
+ dbgs() << *VectorizedValue;
+ else
+ dbgs() << "NULL";
+ dbgs() << "\n";
+ dbgs() << "ReuseShuffleIndices: ";
+ if (ReuseShuffleIndices.empty())
+ dbgs() << "Emtpy";
+ else
+ for (unsigned Idx : ReuseShuffleIndices)
+ dbgs() << Idx << ", ";
+ dbgs() << "\n";
+ dbgs() << "ReorderIndices: ";
+ for (unsigned Idx : ReorderIndices)
+ dbgs() << Idx << ", ";
+ dbgs() << "\n";
+ dbgs() << "UserTreeIndices: ";
+ for (const auto &EInfo : UserTreeIndices)
+ dbgs() << EInfo << ", ";
+ dbgs() << "\n";
+ }
+#endif
};
/// Create a new VectorizableTree entry.
- void newTreeEntry(ArrayRef<Value *> VL, bool Vectorized, int &UserTreeIdx,
- ArrayRef<unsigned> ReuseShuffleIndices = None,
- ArrayRef<unsigned> ReorderIndices = None) {
- VectorizableTree.emplace_back(VectorizableTree);
- int idx = VectorizableTree.size() - 1;
- TreeEntry *Last = &VectorizableTree[idx];
+ TreeEntry *newTreeEntry(ArrayRef<Value *> VL, bool Vectorized,
+ const EdgeInfo &UserTreeIdx,
+ ArrayRef<unsigned> ReuseShuffleIndices = None,
+ ArrayRef<unsigned> ReorderIndices = None) {
+ VectorizableTree.push_back(llvm::make_unique<TreeEntry>(VectorizableTree));
+ TreeEntry *Last = VectorizableTree.back().get();
+ Last->Idx = VectorizableTree.size() - 1;
Last->Scalars.insert(Last->Scalars.begin(), VL.begin(), VL.end());
Last->NeedToGather = !Vectorized;
Last->ReuseShuffleIndices.append(ReuseShuffleIndices.begin(),
@@ -718,25 +1310,44 @@ private:
if (Vectorized) {
for (int i = 0, e = VL.size(); i != e; ++i) {
assert(!getTreeEntry(VL[i]) && "Scalar already in tree!");
- ScalarToTreeEntry[VL[i]] = idx;
+ ScalarToTreeEntry[VL[i]] = Last->Idx;
}
} else {
MustGather.insert(VL.begin(), VL.end());
}
- if (UserTreeIdx >= 0)
+ if (UserTreeIdx.UserTE)
Last->UserTreeIndices.push_back(UserTreeIdx);
- UserTreeIdx = idx;
+
+ Last->trySetUserTEOperand(UserTreeIdx, VL, ReuseShuffleIndices);
+ return Last;
}
/// -- Vectorization State --
/// Holds all of the tree entries.
- std::vector<TreeEntry> VectorizableTree;
+ TreeEntry::VecTreeTy VectorizableTree;
+
+#ifndef NDEBUG
+ /// Debug printer.
+ LLVM_DUMP_METHOD void dumpVectorizableTree() const {
+ for (unsigned Id = 0, IdE = VectorizableTree.size(); Id != IdE; ++Id) {
+ VectorizableTree[Id]->dump();
+ dbgs() << "\n";
+ }
+ }
+#endif
TreeEntry *getTreeEntry(Value *V) {
auto I = ScalarToTreeEntry.find(V);
if (I != ScalarToTreeEntry.end())
- return &VectorizableTree[I->second];
+ return VectorizableTree[I->second].get();
+ return nullptr;
+ }
+
+ const TreeEntry *getTreeEntry(Value *V) const {
+ auto I = ScalarToTreeEntry.find(V);
+ if (I != ScalarToTreeEntry.end())
+ return VectorizableTree[I->second].get();
return nullptr;
}
@@ -1246,21 +1857,25 @@ template <> struct GraphTraits<BoUpSLP *> {
/// NodeRef has to be a pointer per the GraphWriter.
using NodeRef = TreeEntry *;
+ using ContainerTy = BoUpSLP::TreeEntry::VecTreeTy;
+
/// Add the VectorizableTree to the index iterator to be able to return
/// TreeEntry pointers.
struct ChildIteratorType
- : public iterator_adaptor_base<ChildIteratorType,
- SmallVector<int, 1>::iterator> {
- std::vector<TreeEntry> &VectorizableTree;
+ : public iterator_adaptor_base<
+ ChildIteratorType, SmallVector<BoUpSLP::EdgeInfo, 1>::iterator> {
+ ContainerTy &VectorizableTree;
- ChildIteratorType(SmallVector<int, 1>::iterator W,
- std::vector<TreeEntry> &VT)
+ ChildIteratorType(SmallVector<BoUpSLP::EdgeInfo, 1>::iterator W,
+ ContainerTy &VT)
: ChildIteratorType::iterator_adaptor_base(W), VectorizableTree(VT) {}
- NodeRef operator*() { return &VectorizableTree[*I]; }
+ NodeRef operator*() { return I->UserTE; }
};
- static NodeRef getEntryNode(BoUpSLP &R) { return &R.VectorizableTree[0]; }
+ static NodeRef getEntryNode(BoUpSLP &R) {
+ return R.VectorizableTree[0].get();
+ }
static ChildIteratorType child_begin(NodeRef N) {
return {N->UserTreeIndices.begin(), N->Container};
@@ -1272,7 +1887,19 @@ template <> struct GraphTraits<BoUpSLP *> {
/// For the node iterator we just need to turn the TreeEntry iterator into a
/// TreeEntry* iterator so that it dereferences to NodeRef.
- using nodes_iterator = pointer_iterator<std::vector<TreeEntry>::iterator>;
+ class nodes_iterator {
+ using ItTy = ContainerTy::iterator;
+ ItTy It;
+
+ public:
+ nodes_iterator(const ItTy &It2) : It(It2) {}
+ NodeRef operator*() { return It->get(); }
+ nodes_iterator operator++() {
+ ++It;
+ return *this;
+ }
+ bool operator!=(const nodes_iterator &N2) const { return N2.It != It; }
+ };
static nodes_iterator nodes_begin(BoUpSLP *R) {
return nodes_iterator(R->VectorizableTree.begin());
@@ -1331,11 +1958,11 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
UserIgnoreList = UserIgnoreLst;
if (!allSameType(Roots))
return;
- buildTree_rec(Roots, 0, -1);
+ buildTree_rec(Roots, 0, EdgeInfo());
// Collect the values that we need to extract from the tree.
- for (TreeEntry &EIdx : VectorizableTree) {
- TreeEntry *Entry = &EIdx;
+ for (auto &TEPtr : VectorizableTree) {
+ TreeEntry *Entry = TEPtr.get();
// No need to handle users of gathered values.
if (Entry->NeedToGather)
@@ -1393,7 +2020,7 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
}
void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
- int UserTreeIdx) {
+ const EdgeInfo &UserTreeIdx) {
assert((allConstant(VL) || allSameType(VL)) && "Invalid types!");
InstructionsState S = getSameOpcode(VL);
@@ -1450,6 +2077,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
E->UserTreeIndices.push_back(UserTreeIdx);
LLVM_DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *S.OpValue
<< ".\n");
+ E->trySetUserTEOperand(UserTreeIdx, VL, None);
return;
}
@@ -1468,8 +2096,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
// If any of the scalars is marked as a value that needs to stay scalar, then
// we need to gather the scalars.
+ // The reduction nodes (stored in UserIgnoreList) also should stay scalar.
for (unsigned i = 0, e = VL.size(); i != e; ++i) {
- if (MustGather.count(VL[i])) {
+ if (MustGather.count(VL[i]) || is_contained(UserIgnoreList, VL[i])) {
LLVM_DEBUG(dbgs() << "SLP: Gathering due to gathered scalar.\n");
newTreeEntry(VL, false, UserTreeIdx);
return;
@@ -1548,7 +2177,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
}
}
- newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
+ auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: added a vector of PHINodes.\n");
for (unsigned i = 0, e = PH->getNumIncomingValues(); i < e; ++i) {
@@ -1558,7 +2187,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
Operands.push_back(cast<PHINode>(j)->getIncomingValueForBlock(
PH->getIncomingBlock(i)));
- buildTree_rec(Operands, Depth + 1, UserTreeIdx);
+ buildTree_rec(Operands, Depth + 1, {TE, i});
}
return;
}
@@ -1571,6 +2200,11 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
++NumOpsWantToKeepOriginalOrder;
newTreeEntry(VL, /*Vectorized=*/true, UserTreeIdx,
ReuseShuffleIndicies);
+ // This is a special case, as it does not gather, but at the same time
+ // we are not extending buildTree_rec() towards the operands.
+ ValueList Op0;
+ Op0.assign(VL.size(), VL0->getOperand(0));
+ VectorizableTree.back()->setOperand(0, Op0, ReuseShuffleIndicies);
return;
}
if (!CurrentOrder.empty()) {
@@ -1588,6 +2222,11 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
++StoredCurrentOrderAndNum->getSecond();
newTreeEntry(VL, /*Vectorized=*/true, UserTreeIdx, ReuseShuffleIndicies,
StoredCurrentOrderAndNum->getFirst());
+ // This is a special case, as it does not gather, but at the same time
+ // we are not extending buildTree_rec() towards the operands.
+ ValueList Op0;
+ Op0.assign(VL.size(), VL0->getOperand(0));
+ VectorizableTree.back()->setOperand(0, Op0, ReuseShuffleIndicies);
return;
}
LLVM_DEBUG(dbgs() << "SLP: Gather extract sequence.\n");
@@ -1693,7 +2332,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
return;
}
}
- newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
+ auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: added a vector of casts.\n");
for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
@@ -1702,7 +2341,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
for (Value *j : VL)
Operands.push_back(cast<Instruction>(j)->getOperand(i));
- buildTree_rec(Operands, Depth + 1, UserTreeIdx);
+ buildTree_rec(Operands, Depth + 1, {TE, i});
}
return;
}
@@ -1710,10 +2349,11 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
case Instruction::FCmp: {
// Check that all of the compares have the same predicate.
CmpInst::Predicate P0 = cast<CmpInst>(VL0)->getPredicate();
+ CmpInst::Predicate SwapP0 = CmpInst::getSwappedPredicate(P0);
Type *ComparedTy = VL0->getOperand(0)->getType();
for (unsigned i = 1, e = VL.size(); i < e; ++i) {
CmpInst *Cmp = cast<CmpInst>(VL[i]);
- if (Cmp->getPredicate() != P0 ||
+ if ((Cmp->getPredicate() != P0 && Cmp->getPredicate() != SwapP0) ||
Cmp->getOperand(0)->getType() != ComparedTy) {
BS.cancelScheduling(VL, VL0);
newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
@@ -1723,20 +2363,34 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
}
}
- newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
+ auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: added a vector of compares.\n");
- for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
- ValueList Operands;
- // Prepare the operand vector.
- for (Value *j : VL)
- Operands.push_back(cast<Instruction>(j)->getOperand(i));
-
- buildTree_rec(Operands, Depth + 1, UserTreeIdx);
+ ValueList Left, Right;
+ if (cast<CmpInst>(VL0)->isCommutative()) {
+ // Commutative predicate - collect + sort operands of the instructions
+ // so that each side is more likely to have the same opcode.
+ assert(P0 == SwapP0 && "Commutative Predicate mismatch");
+ reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE);
+ } else {
+ // Collect operands - commute if it uses the swapped predicate.
+ for (Value *V : VL) {
+ auto *Cmp = cast<CmpInst>(V);
+ Value *LHS = Cmp->getOperand(0);
+ Value *RHS = Cmp->getOperand(1);
+ if (Cmp->getPredicate() != P0)
+ std::swap(LHS, RHS);
+ Left.push_back(LHS);
+ Right.push_back(RHS);
+ }
}
+
+ buildTree_rec(Left, Depth + 1, {TE, 0});
+ buildTree_rec(Right, Depth + 1, {TE, 1});
return;
}
case Instruction::Select:
+ case Instruction::FNeg:
case Instruction::Add:
case Instruction::FAdd:
case Instruction::Sub:
@@ -1754,17 +2408,17 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
case Instruction::AShr:
case Instruction::And:
case Instruction::Or:
- case Instruction::Xor:
- newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
- LLVM_DEBUG(dbgs() << "SLP: added a vector of bin op.\n");
+ case Instruction::Xor: {
+ auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
+ LLVM_DEBUG(dbgs() << "SLP: added a vector of un/bin op.\n");
// Sort operands of the instructions so that each side is more likely to
// have the same opcode.
if (isa<BinaryOperator>(VL0) && VL0->isCommutative()) {
ValueList Left, Right;
- reorderInputsAccordingToOpcode(S.getOpcode(), VL, Left, Right);
- buildTree_rec(Left, Depth + 1, UserTreeIdx);
- buildTree_rec(Right, Depth + 1, UserTreeIdx);
+ reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE);
+ buildTree_rec(Left, Depth + 1, {TE, 0});
+ buildTree_rec(Right, Depth + 1, {TE, 1});
return;
}
@@ -1774,10 +2428,10 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
for (Value *j : VL)
Operands.push_back(cast<Instruction>(j)->getOperand(i));
- buildTree_rec(Operands, Depth + 1, UserTreeIdx);
+ buildTree_rec(Operands, Depth + 1, {TE, i});
}
return;
-
+ }
case Instruction::GetElementPtr: {
// We don't combine GEPs with complicated (nested) indexing.
for (unsigned j = 0; j < VL.size(); ++j) {
@@ -1815,7 +2469,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
}
}
- newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
+ auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: added a vector of GEPs.\n");
for (unsigned i = 0, e = 2; i < e; ++i) {
ValueList Operands;
@@ -1823,7 +2477,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
for (Value *j : VL)
Operands.push_back(cast<Instruction>(j)->getOperand(i));
- buildTree_rec(Operands, Depth + 1, UserTreeIdx);
+ buildTree_rec(Operands, Depth + 1, {TE, i});
}
return;
}
@@ -1837,14 +2491,14 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
return;
}
- newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
+ auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: added a vector of stores.\n");
ValueList Operands;
for (Value *j : VL)
Operands.push_back(cast<Instruction>(j)->getOperand(0));
- buildTree_rec(Operands, Depth + 1, UserTreeIdx);
+ buildTree_rec(Operands, Depth + 1, {TE, 0});
return;
}
case Instruction::Call: {
@@ -1860,9 +2514,11 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
return;
}
Function *Int = CI->getCalledFunction();
- Value *A1I = nullptr;
- if (hasVectorInstrinsicScalarOpd(ID, 1))
- A1I = CI->getArgOperand(1);
+ unsigned NumArgs = CI->getNumArgOperands();
+ SmallVector<Value*, 4> ScalarArgs(NumArgs, nullptr);
+ for (unsigned j = 0; j != NumArgs; ++j)
+ if (hasVectorInstrinsicScalarOpd(ID, j))
+ ScalarArgs[j] = CI->getArgOperand(j);
for (unsigned i = 1, e = VL.size(); i != e; ++i) {
CallInst *CI2 = dyn_cast<CallInst>(VL[i]);
if (!CI2 || CI2->getCalledFunction() != Int ||
@@ -1874,16 +2530,19 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
<< "\n");
return;
}
- // ctlz,cttz and powi are special intrinsics whose second argument
- // should be same in order for them to be vectorized.
- if (hasVectorInstrinsicScalarOpd(ID, 1)) {
- Value *A1J = CI2->getArgOperand(1);
- if (A1I != A1J) {
- BS.cancelScheduling(VL, VL0);
- newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
- LLVM_DEBUG(dbgs() << "SLP: mismatched arguments in call:" << *CI
- << " argument " << A1I << "!=" << A1J << "\n");
- return;
+ // Some intrinsics have scalar arguments and should be same in order for
+ // them to be vectorized.
+ for (unsigned j = 0; j != NumArgs; ++j) {
+ if (hasVectorInstrinsicScalarOpd(ID, j)) {
+ Value *A1J = CI2->getArgOperand(j);
+ if (ScalarArgs[j] != A1J) {
+ BS.cancelScheduling(VL, VL0);
+ newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
+ LLVM_DEBUG(dbgs() << "SLP: mismatched arguments in call:" << *CI
+ << " argument " << ScalarArgs[j] << "!=" << A1J
+ << "\n");
+ return;
+ }
}
}
// Verify that the bundle operands are identical between the two calls.
@@ -1899,7 +2558,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
}
}
- newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
+ auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) {
ValueList Operands;
// Prepare the operand vector.
@@ -1907,11 +2566,11 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
CallInst *CI2 = dyn_cast<CallInst>(j);
Operands.push_back(CI2->getArgOperand(i));
}
- buildTree_rec(Operands, Depth + 1, UserTreeIdx);
+ buildTree_rec(Operands, Depth + 1, {TE, i});
}
return;
}
- case Instruction::ShuffleVector:
+ case Instruction::ShuffleVector: {
// If this is not an alternate sequence of opcode like add-sub
// then do not vectorize this instruction.
if (!S.isAltShuffle()) {
@@ -1920,15 +2579,15 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
LLVM_DEBUG(dbgs() << "SLP: ShuffleVector are not vectorized.\n");
return;
}
- newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
+ auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: added a ShuffleVector op.\n");
// Reorder operands if reordering would enable vectorization.
if (isa<BinaryOperator>(VL0)) {
ValueList Left, Right;
- reorderAltShuffleOperands(S, VL, Left, Right);
- buildTree_rec(Left, Depth + 1, UserTreeIdx);
- buildTree_rec(Right, Depth + 1, UserTreeIdx);
+ reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE);
+ buildTree_rec(Left, Depth + 1, {TE, 0});
+ buildTree_rec(Right, Depth + 1, {TE, 1});
return;
}
@@ -1938,10 +2597,10 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
for (Value *j : VL)
Operands.push_back(cast<Instruction>(j)->getOperand(i));
- buildTree_rec(Operands, Depth + 1, UserTreeIdx);
+ buildTree_rec(Operands, Depth + 1, {TE, i});
}
return;
-
+ }
default:
BS.cancelScheduling(VL, VL0);
newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
@@ -2223,6 +2882,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
int VecCost = TTI->getCmpSelInstrCost(S.getOpcode(), VecTy, MaskTy, VL0);
return ReuseShuffleCost + VecCost - ScalarCost;
}
+ case Instruction::FNeg:
case Instruction::Add:
case Instruction::FAdd:
case Instruction::Sub:
@@ -2260,7 +2920,8 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
ConstantInt *CInt0 = nullptr;
for (unsigned i = 0, e = VL.size(); i < e; ++i) {
const Instruction *I = cast<Instruction>(VL[i]);
- ConstantInt *CInt = dyn_cast<ConstantInt>(I->getOperand(1));
+ unsigned OpIdx = isa<BinaryOperator>(I) ? 1 : 0;
+ ConstantInt *CInt = dyn_cast<ConstantInt>(I->getOperand(OpIdx));
if (!CInt) {
Op2VK = TargetTransformInfo::OK_AnyValue;
Op2VP = TargetTransformInfo::OP_None;
@@ -2413,31 +3074,31 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
}
}
-bool BoUpSLP::isFullyVectorizableTinyTree() {
+bool BoUpSLP::isFullyVectorizableTinyTree() const {
LLVM_DEBUG(dbgs() << "SLP: Check whether the tree with height "
<< VectorizableTree.size() << " is fully vectorizable .\n");
// We only handle trees of heights 1 and 2.
- if (VectorizableTree.size() == 1 && !VectorizableTree[0].NeedToGather)
+ if (VectorizableTree.size() == 1 && !VectorizableTree[0]->NeedToGather)
return true;
if (VectorizableTree.size() != 2)
return false;
// Handle splat and all-constants stores.
- if (!VectorizableTree[0].NeedToGather &&
- (allConstant(VectorizableTree[1].Scalars) ||
- isSplat(VectorizableTree[1].Scalars)))
+ if (!VectorizableTree[0]->NeedToGather &&
+ (allConstant(VectorizableTree[1]->Scalars) ||
+ isSplat(VectorizableTree[1]->Scalars)))
return true;
// Gathering cost would be too much for tiny trees.
- if (VectorizableTree[0].NeedToGather || VectorizableTree[1].NeedToGather)
+ if (VectorizableTree[0]->NeedToGather || VectorizableTree[1]->NeedToGather)
return false;
return true;
}
-bool BoUpSLP::isTreeTinyAndNotFullyVectorizable() {
+bool BoUpSLP::isTreeTinyAndNotFullyVectorizable() const {
// We can vectorize the tree if its size is greater than or equal to the
// minimum size specified by the MinTreeSize command line option.
if (VectorizableTree.size() >= MinTreeSize)
@@ -2457,19 +3118,19 @@ bool BoUpSLP::isTreeTinyAndNotFullyVectorizable() {
return true;
}
-int BoUpSLP::getSpillCost() {
+int BoUpSLP::getSpillCost() const {
// Walk from the bottom of the tree to the top, tracking which values are
// live. When we see a call instruction that is not part of our tree,
// query TTI to see if there is a cost to keeping values live over it
// (for example, if spills and fills are required).
- unsigned BundleWidth = VectorizableTree.front().Scalars.size();
+ unsigned BundleWidth = VectorizableTree.front()->Scalars.size();
int Cost = 0;
SmallPtrSet<Instruction*, 4> LiveValues;
Instruction *PrevInst = nullptr;
- for (const auto &N : VectorizableTree) {
- Instruction *Inst = dyn_cast<Instruction>(N.Scalars[0]);
+ for (const auto &TEPtr : VectorizableTree) {
+ Instruction *Inst = dyn_cast<Instruction>(TEPtr->Scalars[0]);
if (!Inst)
continue;
@@ -2494,6 +3155,7 @@ int BoUpSLP::getSpillCost() {
});
// Now find the sequence of instructions between PrevInst and Inst.
+ unsigned NumCalls = 0;
BasicBlock::reverse_iterator InstIt = ++Inst->getIterator().getReverse(),
PrevInstIt =
PrevInst->getIterator().getReverse();
@@ -2506,16 +3168,19 @@ int BoUpSLP::getSpillCost() {
// Debug informations don't impact spill cost.
if ((isa<CallInst>(&*PrevInstIt) &&
!isa<DbgInfoIntrinsic>(&*PrevInstIt)) &&
- &*PrevInstIt != PrevInst) {
- SmallVector<Type*, 4> V;
- for (auto *II : LiveValues)
- V.push_back(VectorType::get(II->getType(), BundleWidth));
- Cost += TTI->getCostOfKeepingLiveOverCall(V);
- }
+ &*PrevInstIt != PrevInst)
+ NumCalls++;
++PrevInstIt;
}
+ if (NumCalls) {
+ SmallVector<Type*, 4> V;
+ for (auto *II : LiveValues)
+ V.push_back(VectorType::get(II->getType(), BundleWidth));
+ Cost += NumCalls * TTI->getCostOfKeepingLiveOverCall(V);
+ }
+
PrevInst = Inst;
}
@@ -2527,10 +3192,10 @@ int BoUpSLP::getTreeCost() {
LLVM_DEBUG(dbgs() << "SLP: Calculating cost for tree of size "
<< VectorizableTree.size() << ".\n");
- unsigned BundleWidth = VectorizableTree[0].Scalars.size();
+ unsigned BundleWidth = VectorizableTree[0]->Scalars.size();
for (unsigned I = 0, E = VectorizableTree.size(); I < E; ++I) {
- TreeEntry &TE = VectorizableTree[I];
+ TreeEntry &TE = *VectorizableTree[I].get();
// We create duplicate tree entries for gather sequences that have multiple
// uses. However, we should not compute the cost of duplicate sequences.
@@ -2545,10 +3210,11 @@ int BoUpSLP::getTreeCost() {
// existing heuristics based on tree size may yield different results.
//
if (TE.NeedToGather &&
- std::any_of(std::next(VectorizableTree.begin(), I + 1),
- VectorizableTree.end(), [TE](TreeEntry &Entry) {
- return Entry.NeedToGather && Entry.isSame(TE.Scalars);
- }))
+ std::any_of(
+ std::next(VectorizableTree.begin(), I + 1), VectorizableTree.end(),
+ [TE](const std::unique_ptr<TreeEntry> &EntryPtr) {
+ return EntryPtr->NeedToGather && EntryPtr->isSame(TE.Scalars);
+ }))
continue;
int C = getEntryCost(&TE);
@@ -2575,7 +3241,7 @@ int BoUpSLP::getTreeCost() {
// extend the extracted value back to the original type. Here, we account
// for the extract and the added cost of the sign extend if needed.
auto *VecTy = VectorType::get(EU.Scalar->getType(), BundleWidth);
- auto *ScalarRoot = VectorizableTree[0].Scalars[0];
+ auto *ScalarRoot = VectorizableTree[0]->Scalars[0];
if (MinBWs.count(ScalarRoot)) {
auto *MinTy = IntegerType::get(F->getContext(), MinBWs[ScalarRoot].first);
auto Extend =
@@ -2608,17 +3274,17 @@ int BoUpSLP::getTreeCost() {
}
int BoUpSLP::getGatherCost(Type *Ty,
- const DenseSet<unsigned> &ShuffledIndices) {
+ const DenseSet<unsigned> &ShuffledIndices) const {
int Cost = 0;
for (unsigned i = 0, e = cast<VectorType>(Ty)->getNumElements(); i < e; ++i)
if (!ShuffledIndices.count(i))
Cost += TTI->getVectorInstrCost(Instruction::InsertElement, Ty, i);
if (!ShuffledIndices.empty())
- Cost += TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, Ty);
+ Cost += TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, Ty);
return Cost;
}
-int BoUpSLP::getGatherCost(ArrayRef<Value *> VL) {
+int BoUpSLP::getGatherCost(ArrayRef<Value *> VL) const {
// Find the type of the operands in VL.
Type *ScalarTy = VL[0]->getType();
if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
@@ -2638,221 +3304,19 @@ int BoUpSLP::getGatherCost(ArrayRef<Value *> VL) {
return getGatherCost(VecTy, ShuffledElements);
}
-// Reorder commutative operations in alternate shuffle if the resulting vectors
-// are consecutive loads. This would allow us to vectorize the tree.
-// If we have something like-
-// load a[0] - load b[0]
-// load b[1] + load a[1]
-// load a[2] - load b[2]
-// load a[3] + load b[3]
-// Reordering the second load b[1] load a[1] would allow us to vectorize this
-// code.
-void BoUpSLP::reorderAltShuffleOperands(const InstructionsState &S,
- ArrayRef<Value *> VL,
- SmallVectorImpl<Value *> &Left,
- SmallVectorImpl<Value *> &Right) {
- // Push left and right operands of binary operation into Left and Right
- for (Value *V : VL) {
- auto *I = cast<Instruction>(V);
- assert(S.isOpcodeOrAlt(I) && "Incorrect instruction in vector");
- Left.push_back(I->getOperand(0));
- Right.push_back(I->getOperand(1));
- }
-
- // Reorder if we have a commutative operation and consecutive access
- // are on either side of the alternate instructions.
- for (unsigned j = 0; j < VL.size() - 1; ++j) {
- if (LoadInst *L = dyn_cast<LoadInst>(Left[j])) {
- if (LoadInst *L1 = dyn_cast<LoadInst>(Right[j + 1])) {
- Instruction *VL1 = cast<Instruction>(VL[j]);
- Instruction *VL2 = cast<Instruction>(VL[j + 1]);
- if (VL1->isCommutative() && isConsecutiveAccess(L, L1, *DL, *SE)) {
- std::swap(Left[j], Right[j]);
- continue;
- } else if (VL2->isCommutative() &&
- isConsecutiveAccess(L, L1, *DL, *SE)) {
- std::swap(Left[j + 1], Right[j + 1]);
- continue;
- }
- // else unchanged
- }
- }
- if (LoadInst *L = dyn_cast<LoadInst>(Right[j])) {
- if (LoadInst *L1 = dyn_cast<LoadInst>(Left[j + 1])) {
- Instruction *VL1 = cast<Instruction>(VL[j]);
- Instruction *VL2 = cast<Instruction>(VL[j + 1]);
- if (VL1->isCommutative() && isConsecutiveAccess(L, L1, *DL, *SE)) {
- std::swap(Left[j], Right[j]);
- continue;
- } else if (VL2->isCommutative() &&
- isConsecutiveAccess(L, L1, *DL, *SE)) {
- std::swap(Left[j + 1], Right[j + 1]);
- continue;
- }
- // else unchanged
- }
- }
- }
-}
-
-// Return true if I should be commuted before adding it's left and right
-// operands to the arrays Left and Right.
-//
-// The vectorizer is trying to either have all elements one side being
-// instruction with the same opcode to enable further vectorization, or having
-// a splat to lower the vectorizing cost.
-static bool shouldReorderOperands(
- int i, unsigned Opcode, Instruction &I, ArrayRef<Value *> Left,
- ArrayRef<Value *> Right, bool AllSameOpcodeLeft, bool AllSameOpcodeRight,
- bool SplatLeft, bool SplatRight, Value *&VLeft, Value *&VRight) {
- VLeft = I.getOperand(0);
- VRight = I.getOperand(1);
- // If we have "SplatRight", try to see if commuting is needed to preserve it.
- if (SplatRight) {
- if (VRight == Right[i - 1])
- // Preserve SplatRight
- return false;
- if (VLeft == Right[i - 1]) {
- // Commuting would preserve SplatRight, but we don't want to break
- // SplatLeft either, i.e. preserve the original order if possible.
- // (FIXME: why do we care?)
- if (SplatLeft && VLeft == Left[i - 1])
- return false;
- return true;
- }
- }
- // Symmetrically handle Right side.
- if (SplatLeft) {
- if (VLeft == Left[i - 1])
- // Preserve SplatLeft
- return false;
- if (VRight == Left[i - 1])
- return true;
- }
-
- Instruction *ILeft = dyn_cast<Instruction>(VLeft);
- Instruction *IRight = dyn_cast<Instruction>(VRight);
-
- // If we have "AllSameOpcodeRight", try to see if the left operands preserves
- // it and not the right, in this case we want to commute.
- if (AllSameOpcodeRight) {
- unsigned RightPrevOpcode = cast<Instruction>(Right[i - 1])->getOpcode();
- if (IRight && RightPrevOpcode == IRight->getOpcode())
- // Do not commute, a match on the right preserves AllSameOpcodeRight
- return false;
- if (ILeft && RightPrevOpcode == ILeft->getOpcode()) {
- // We have a match and may want to commute, but first check if there is
- // not also a match on the existing operands on the Left to preserve
- // AllSameOpcodeLeft, i.e. preserve the original order if possible.
- // (FIXME: why do we care?)
- if (AllSameOpcodeLeft && ILeft &&
- cast<Instruction>(Left[i - 1])->getOpcode() == ILeft->getOpcode())
- return false;
- return true;
- }
- }
- // Symmetrically handle Left side.
- if (AllSameOpcodeLeft) {
- unsigned LeftPrevOpcode = cast<Instruction>(Left[i - 1])->getOpcode();
- if (ILeft && LeftPrevOpcode == ILeft->getOpcode())
- return false;
- if (IRight && LeftPrevOpcode == IRight->getOpcode())
- return true;
- }
- return false;
-}
-
-void BoUpSLP::reorderInputsAccordingToOpcode(unsigned Opcode,
- ArrayRef<Value *> VL,
- SmallVectorImpl<Value *> &Left,
- SmallVectorImpl<Value *> &Right) {
- if (!VL.empty()) {
- // Peel the first iteration out of the loop since there's nothing
- // interesting to do anyway and it simplifies the checks in the loop.
- auto *I = cast<Instruction>(VL[0]);
- Value *VLeft = I->getOperand(0);
- Value *VRight = I->getOperand(1);
- if (!isa<Instruction>(VRight) && isa<Instruction>(VLeft))
- // Favor having instruction to the right. FIXME: why?
- std::swap(VLeft, VRight);
- Left.push_back(VLeft);
- Right.push_back(VRight);
- }
-
- // Keep track if we have instructions with all the same opcode on one side.
- bool AllSameOpcodeLeft = isa<Instruction>(Left[0]);
- bool AllSameOpcodeRight = isa<Instruction>(Right[0]);
- // Keep track if we have one side with all the same value (broadcast).
- bool SplatLeft = true;
- bool SplatRight = true;
-
- for (unsigned i = 1, e = VL.size(); i != e; ++i) {
- Instruction *I = cast<Instruction>(VL[i]);
- assert(((I->getOpcode() == Opcode && I->isCommutative()) ||
- (I->getOpcode() != Opcode && Instruction::isCommutative(Opcode))) &&
- "Can only process commutative instruction");
- // Commute to favor either a splat or maximizing having the same opcodes on
- // one side.
- Value *VLeft;
- Value *VRight;
- if (shouldReorderOperands(i, Opcode, *I, Left, Right, AllSameOpcodeLeft,
- AllSameOpcodeRight, SplatLeft, SplatRight, VLeft,
- VRight)) {
- Left.push_back(VRight);
- Right.push_back(VLeft);
- } else {
- Left.push_back(VLeft);
- Right.push_back(VRight);
- }
- // Update Splat* and AllSameOpcode* after the insertion.
- SplatRight = SplatRight && (Right[i - 1] == Right[i]);
- SplatLeft = SplatLeft && (Left[i - 1] == Left[i]);
- AllSameOpcodeLeft = AllSameOpcodeLeft && isa<Instruction>(Left[i]) &&
- (cast<Instruction>(Left[i - 1])->getOpcode() ==
- cast<Instruction>(Left[i])->getOpcode());
- AllSameOpcodeRight = AllSameOpcodeRight && isa<Instruction>(Right[i]) &&
- (cast<Instruction>(Right[i - 1])->getOpcode() ==
- cast<Instruction>(Right[i])->getOpcode());
- }
-
- // If one operand end up being broadcast, return this operand order.
- if (SplatRight || SplatLeft)
+// Perform operand reordering on the instructions in VL and return the reordered
+// operands in Left and Right.
+void BoUpSLP::reorderInputsAccordingToOpcode(
+ ArrayRef<Value *> VL, SmallVectorImpl<Value *> &Left,
+ SmallVectorImpl<Value *> &Right, const DataLayout &DL,
+ ScalarEvolution &SE) {
+ if (VL.empty())
return;
-
- // Finally check if we can get longer vectorizable chain by reordering
- // without breaking the good operand order detected above.
- // E.g. If we have something like-
- // load a[0] load b[0]
- // load b[1] load a[1]
- // load a[2] load b[2]
- // load a[3] load b[3]
- // Reordering the second load b[1] load a[1] would allow us to vectorize
- // this code and we still retain AllSameOpcode property.
- // FIXME: This load reordering might break AllSameOpcode in some rare cases
- // such as-
- // add a[0],c[0] load b[0]
- // add a[1],c[2] load b[1]
- // b[2] load b[2]
- // add a[3],c[3] load b[3]
- for (unsigned j = 0, e = VL.size() - 1; j < e; ++j) {
- if (LoadInst *L = dyn_cast<LoadInst>(Left[j])) {
- if (LoadInst *L1 = dyn_cast<LoadInst>(Right[j + 1])) {
- if (isConsecutiveAccess(L, L1, *DL, *SE)) {
- std::swap(Left[j + 1], Right[j + 1]);
- continue;
- }
- }
- }
- if (LoadInst *L = dyn_cast<LoadInst>(Right[j])) {
- if (LoadInst *L1 = dyn_cast<LoadInst>(Left[j + 1])) {
- if (isConsecutiveAccess(L, L1, *DL, *SE)) {
- std::swap(Left[j + 1], Right[j + 1]);
- continue;
- }
- }
- }
- // else unchanged
- }
+ VLOperands Ops(VL, DL, SE);
+ // Reorder the operands in place.
+ Ops.reorder();
+ Left = Ops.getVL(0);
+ Right = Ops.getVL(1);
}
void BoUpSLP::setInsertPointAfterBundle(ArrayRef<Value *> VL,
@@ -3082,13 +3546,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
continue;
}
- // Prepare the operand vector.
- for (Value *V : E->Scalars)
- Operands.push_back(cast<PHINode>(V)->getIncomingValueForBlock(IBB));
-
Builder.SetInsertPoint(IBB->getTerminator());
Builder.SetCurrentDebugLocation(PH->getDebugLoc());
- Value *Vec = vectorizeTree(Operands);
+ Value *Vec = vectorizeTree(E->getOperand(i));
NewPhi->addIncoming(Vec, IBB);
}
@@ -3099,7 +3559,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
case Instruction::ExtractElement: {
if (!E->NeedToGather) {
- Value *V = VL0->getOperand(0);
+ Value *V = E->getSingleOperand(0);
if (!E->ReorderIndices.empty()) {
OrdersType Mask;
inversePermutation(E->ReorderIndices, Mask);
@@ -3132,11 +3592,11 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
}
case Instruction::ExtractValue: {
if (!E->NeedToGather) {
- LoadInst *LI = cast<LoadInst>(VL0->getOperand(0));
+ LoadInst *LI = cast<LoadInst>(E->getSingleOperand(0));
Builder.SetInsertPoint(LI);
PointerType *PtrTy = PointerType::get(VecTy, LI->getPointerAddressSpace());
Value *Ptr = Builder.CreateBitCast(LI->getOperand(0), PtrTy);
- LoadInst *V = Builder.CreateAlignedLoad(Ptr, LI->getAlignment());
+ LoadInst *V = Builder.CreateAlignedLoad(VecTy, Ptr, LI->getAlignment());
Value *NewV = propagateMetadata(V, E->Scalars);
if (!E->ReorderIndices.empty()) {
OrdersType Mask;
@@ -3177,13 +3637,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
case Instruction::Trunc:
case Instruction::FPTrunc:
case Instruction::BitCast: {
- ValueList INVL;
- for (Value *V : E->Scalars)
- INVL.push_back(cast<Instruction>(V)->getOperand(0));
-
setInsertPointAfterBundle(E->Scalars, S);
- Value *InVec = vectorizeTree(INVL);
+ Value *InVec = vectorizeTree(E->getOperand(0));
if (E->VectorizedValue) {
LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
@@ -3202,16 +3658,10 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
}
case Instruction::FCmp:
case Instruction::ICmp: {
- ValueList LHSV, RHSV;
- for (Value *V : E->Scalars) {
- LHSV.push_back(cast<Instruction>(V)->getOperand(0));
- RHSV.push_back(cast<Instruction>(V)->getOperand(1));
- }
-
setInsertPointAfterBundle(E->Scalars, S);
- Value *L = vectorizeTree(LHSV);
- Value *R = vectorizeTree(RHSV);
+ Value *L = vectorizeTree(E->getOperand(0));
+ Value *R = vectorizeTree(E->getOperand(1));
if (E->VectorizedValue) {
LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
@@ -3235,31 +3685,49 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
return V;
}
case Instruction::Select: {
- ValueList TrueVec, FalseVec, CondVec;
- for (Value *V : E->Scalars) {
- CondVec.push_back(cast<Instruction>(V)->getOperand(0));
- TrueVec.push_back(cast<Instruction>(V)->getOperand(1));
- FalseVec.push_back(cast<Instruction>(V)->getOperand(2));
+ setInsertPointAfterBundle(E->Scalars, S);
+
+ Value *Cond = vectorizeTree(E->getOperand(0));
+ Value *True = vectorizeTree(E->getOperand(1));
+ Value *False = vectorizeTree(E->getOperand(2));
+
+ if (E->VectorizedValue) {
+ LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
+ return E->VectorizedValue;
}
+ Value *V = Builder.CreateSelect(Cond, True, False);
+ if (NeedToShuffleReuses) {
+ V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
+ E->ReuseShuffleIndices, "shuffle");
+ }
+ E->VectorizedValue = V;
+ ++NumVectorInstructions;
+ return V;
+ }
+ case Instruction::FNeg: {
setInsertPointAfterBundle(E->Scalars, S);
- Value *Cond = vectorizeTree(CondVec);
- Value *True = vectorizeTree(TrueVec);
- Value *False = vectorizeTree(FalseVec);
+ Value *Op = vectorizeTree(E->getOperand(0));
if (E->VectorizedValue) {
LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
return E->VectorizedValue;
}
- Value *V = Builder.CreateSelect(Cond, True, False);
+ Value *V = Builder.CreateUnOp(
+ static_cast<Instruction::UnaryOps>(S.getOpcode()), Op);
+ propagateIRFlags(V, E->Scalars, VL0);
+ if (auto *I = dyn_cast<Instruction>(V))
+ V = propagateMetadata(I, E->Scalars);
+
if (NeedToShuffleReuses) {
V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
E->ReuseShuffleIndices, "shuffle");
}
E->VectorizedValue = V;
++NumVectorInstructions;
+
return V;
}
case Instruction::Add:
@@ -3280,21 +3748,10 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
case Instruction::And:
case Instruction::Or:
case Instruction::Xor: {
- ValueList LHSVL, RHSVL;
- if (isa<BinaryOperator>(VL0) && VL0->isCommutative())
- reorderInputsAccordingToOpcode(S.getOpcode(), E->Scalars, LHSVL,
- RHSVL);
- else
- for (Value *V : E->Scalars) {
- auto *I = cast<Instruction>(V);
- LHSVL.push_back(I->getOperand(0));
- RHSVL.push_back(I->getOperand(1));
- }
-
setInsertPointAfterBundle(E->Scalars, S);
- Value *LHS = vectorizeTree(LHSVL);
- Value *RHS = vectorizeTree(RHSVL);
+ Value *LHS = vectorizeTree(E->getOperand(0));
+ Value *RHS = vectorizeTree(E->getOperand(1));
if (E->VectorizedValue) {
LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
@@ -3341,7 +3798,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
ExternalUses.push_back(ExternalUser(PO, cast<User>(VecPtr), 0));
unsigned Alignment = LI->getAlignment();
- LI = Builder.CreateLoad(VecPtr);
+ LI = Builder.CreateLoad(VecTy, VecPtr);
if (!Alignment) {
Alignment = DL->getABITypeAlignment(ScalarLoadTy);
}
@@ -3367,13 +3824,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
unsigned Alignment = SI->getAlignment();
unsigned AS = SI->getPointerAddressSpace();
- ValueList ScalarStoreValues;
- for (Value *V : E->Scalars)
- ScalarStoreValues.push_back(cast<StoreInst>(V)->getValueOperand());
-
setInsertPointAfterBundle(E->Scalars, S);
- Value *VecValue = vectorizeTree(ScalarStoreValues);
+ Value *VecValue = vectorizeTree(E->getOperand(0));
Value *ScalarPtr = SI->getPointerOperand();
Value *VecPtr = Builder.CreateBitCast(ScalarPtr, VecTy->getPointerTo(AS));
StoreInst *ST = Builder.CreateStore(VecValue, VecPtr);
@@ -3400,20 +3853,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
case Instruction::GetElementPtr: {
setInsertPointAfterBundle(E->Scalars, S);
- ValueList Op0VL;
- for (Value *V : E->Scalars)
- Op0VL.push_back(cast<GetElementPtrInst>(V)->getOperand(0));
-
- Value *Op0 = vectorizeTree(Op0VL);
+ Value *Op0 = vectorizeTree(E->getOperand(0));
std::vector<Value *> OpVecs;
for (int j = 1, e = cast<GetElementPtrInst>(VL0)->getNumOperands(); j < e;
++j) {
- ValueList OpVL;
- for (Value *V : E->Scalars)
- OpVL.push_back(cast<GetElementPtrInst>(V)->getOperand(j));
-
- Value *OpVec = vectorizeTree(OpVL);
+ Value *OpVec = vectorizeTree(E->getOperand(j));
OpVecs.push_back(OpVec);
}
@@ -3443,20 +3888,16 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
std::vector<Value *> OpVecs;
for (int j = 0, e = CI->getNumArgOperands(); j < e; ++j) {
ValueList OpVL;
- // ctlz,cttz and powi are special intrinsics whose second argument is
- // a scalar. This argument should not be vectorized.
- if (hasVectorInstrinsicScalarOpd(IID, 1) && j == 1) {
+ // Some intrinsics have scalar arguments. This argument should not be
+ // vectorized.
+ if (hasVectorInstrinsicScalarOpd(IID, j)) {
CallInst *CEI = cast<CallInst>(VL0);
ScalarArg = CEI->getArgOperand(j);
OpVecs.push_back(CEI->getArgOperand(j));
continue;
}
- for (Value *V : E->Scalars) {
- CallInst *CEI = cast<CallInst>(V);
- OpVL.push_back(CEI->getArgOperand(j));
- }
- Value *OpVec = vectorizeTree(OpVL);
+ Value *OpVec = vectorizeTree(E->getOperand(j));
LLVM_DEBUG(dbgs() << "SLP: OpVec[" << j << "]: " << *OpVec << "\n");
OpVecs.push_back(OpVec);
}
@@ -3485,7 +3926,6 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
return V;
}
case Instruction::ShuffleVector: {
- ValueList LHSVL, RHSVL;
assert(S.isAltShuffle() &&
((Instruction::isBinaryOp(S.getOpcode()) &&
Instruction::isBinaryOp(S.getAltOpcode())) ||
@@ -3495,16 +3935,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
Value *LHS, *RHS;
if (Instruction::isBinaryOp(S.getOpcode())) {
- reorderAltShuffleOperands(S, E->Scalars, LHSVL, RHSVL);
setInsertPointAfterBundle(E->Scalars, S);
- LHS = vectorizeTree(LHSVL);
- RHS = vectorizeTree(RHSVL);
+ LHS = vectorizeTree(E->getOperand(0));
+ RHS = vectorizeTree(E->getOperand(1));
} else {
- ValueList INVL;
- for (Value *V : E->Scalars)
- INVL.push_back(cast<Instruction>(V)->getOperand(0));
setInsertPointAfterBundle(E->Scalars, S);
- LHS = vectorizeTree(INVL);
+ LHS = vectorizeTree(E->getOperand(0));
}
if (E->VectorizedValue) {
@@ -3578,20 +4014,20 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
}
Builder.SetInsertPoint(&F->getEntryBlock().front());
- auto *VectorRoot = vectorizeTree(&VectorizableTree[0]);
+ auto *VectorRoot = vectorizeTree(VectorizableTree[0].get());
// If the vectorized tree can be rewritten in a smaller type, we truncate the
// vectorized root. InstCombine will then rewrite the entire expression. We
// sign extend the extracted values below.
- auto *ScalarRoot = VectorizableTree[0].Scalars[0];
+ auto *ScalarRoot = VectorizableTree[0]->Scalars[0];
if (MinBWs.count(ScalarRoot)) {
if (auto *I = dyn_cast<Instruction>(VectorRoot))
Builder.SetInsertPoint(&*++BasicBlock::iterator(I));
- auto BundleWidth = VectorizableTree[0].Scalars.size();
+ auto BundleWidth = VectorizableTree[0]->Scalars.size();
auto *MinTy = IntegerType::get(F->getContext(), MinBWs[ScalarRoot].first);
auto *VecTy = VectorType::get(MinTy, BundleWidth);
auto *Trunc = Builder.CreateTrunc(VectorRoot, VecTy);
- VectorizableTree[0].VectorizedValue = Trunc;
+ VectorizableTree[0]->VectorizedValue = Trunc;
}
LLVM_DEBUG(dbgs() << "SLP: Extracting " << ExternalUses.size()
@@ -3687,8 +4123,8 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
}
// For each vectorized value:
- for (TreeEntry &EIdx : VectorizableTree) {
- TreeEntry *Entry = &EIdx;
+ for (auto &TEPtr : VectorizableTree) {
+ TreeEntry *Entry = TEPtr.get();
// No need to handle users of gathered values.
if (Entry->NeedToGather)
@@ -3721,7 +4157,7 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
Builder.ClearInsertionPoint();
- return VectorizableTree[0].VectorizedValue;
+ return VectorizableTree[0]->VectorizedValue;
}
void BoUpSLP::optimizeGatherSequence() {
@@ -3767,10 +4203,10 @@ void BoUpSLP::optimizeGatherSequence() {
// Sort blocks by domination. This ensures we visit a block after all blocks
// dominating it are visited.
- std::stable_sort(CSEWorkList.begin(), CSEWorkList.end(),
- [this](const DomTreeNode *A, const DomTreeNode *B) {
- return DT->properlyDominates(A, B);
- });
+ llvm::stable_sort(CSEWorkList,
+ [this](const DomTreeNode *A, const DomTreeNode *B) {
+ return DT->properlyDominates(A, B);
+ });
// Perform O(N^2) search over the gather sequences and merge identical
// instructions. TODO: We can further optimize this scan if we split the
@@ -3989,7 +4425,7 @@ bool BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V,
<< "\n");
return true;
}
- UpIter++;
+ ++UpIter;
}
if (DownIter != LowerEnd) {
if (&*DownIter == I) {
@@ -4003,7 +4439,7 @@ bool BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V,
<< "\n");
return true;
}
- DownIter++;
+ ++DownIter;
}
assert((UpIter != UpperEnd || DownIter != LowerEnd) &&
"instruction not found in block");
@@ -4253,7 +4689,7 @@ void BoUpSLP::scheduleBlock(BlockScheduling *BS) {
BS->ScheduleStart = nullptr;
}
-unsigned BoUpSLP::getVectorElementSize(Value *V) {
+unsigned BoUpSLP::getVectorElementSize(Value *V) const {
// If V is a store, just return the width of the stored value without
// traversing the expression tree. This is the common case.
if (auto *Store = dyn_cast<StoreInst>(V))
@@ -4390,7 +4826,7 @@ void BoUpSLP::computeMinimumValueSizes() {
return;
// We only attempt to truncate integer expressions.
- auto &TreeRoot = VectorizableTree[0].Scalars;
+ auto &TreeRoot = VectorizableTree[0]->Scalars;
auto *TreeRootIT = dyn_cast<IntegerType>(TreeRoot[0]->getType());
if (!TreeRootIT)
return;
@@ -4411,8 +4847,8 @@ void BoUpSLP::computeMinimumValueSizes() {
// Collect the scalar values of the vectorizable expression. We will use this
// context to determine which values can be demoted. If we see a truncation,
// we mark it as seeding another demotion.
- for (auto &Entry : VectorizableTree)
- Expr.insert(Entry.Scalars.begin(), Entry.Scalars.end());
+ for (auto &EntryPtr : VectorizableTree)
+ Expr.insert(EntryPtr->Scalars.begin(), EntryPtr->Scalars.end());
// Ensure the roots of the vectorizable tree don't form a cycle. They must
// have a single external user that is not in the vectorizable tree.
@@ -4746,38 +5182,29 @@ bool SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores,
BoUpSLP::ValueSet VectorizedStores;
bool Changed = false;
- // Do a quadratic search on all of the given stores in reverse order and find
- // all of the pairs of stores that follow each other.
- SmallVector<unsigned, 16> IndexQueue;
- unsigned E = Stores.size();
- IndexQueue.resize(E - 1);
- for (unsigned I = E; I > 0; --I) {
- unsigned Idx = I - 1;
- // If a store has multiple consecutive store candidates, search Stores
- // array according to the sequence: Idx-1, Idx+1, Idx-2, Idx+2, ...
- // This is because usually pairing with immediate succeeding or preceding
- // candidate create the best chance to find slp vectorization opportunity.
- unsigned Offset = 1;
- unsigned Cnt = 0;
- for (unsigned J = 0; J < E - 1; ++J, ++Offset) {
- if (Idx >= Offset) {
- IndexQueue[Cnt] = Idx - Offset;
- ++Cnt;
- }
- if (Idx + Offset < E) {
- IndexQueue[Cnt] = Idx + Offset;
- ++Cnt;
- }
- }
+ auto &&FindConsecutiveAccess =
+ [this, &Stores, &Heads, &Tails, &ConsecutiveChain] (int K, int Idx) {
+ if (!isConsecutiveAccess(Stores[K], Stores[Idx], *DL, *SE))
+ return false;
- for (auto K : IndexQueue) {
- if (isConsecutiveAccess(Stores[K], Stores[Idx], *DL, *SE)) {
Tails.insert(Stores[Idx]);
Heads.insert(Stores[K]);
ConsecutiveChain[Stores[K]] = Stores[Idx];
+ return true;
+ };
+
+ // Do a quadratic search on all of the given stores in reverse order and find
+ // all of the pairs of stores that follow each other.
+ int E = Stores.size();
+ for (int Idx = E - 1; Idx >= 0; --Idx) {
+ // If a store has multiple consecutive store candidates, search according
+ // to the sequence: Idx-1, Idx+1, Idx-2, Idx+2, ...
+ // This is because usually pairing with immediate succeeding or preceding
+ // candidate create the best chance to find slp vectorization opportunity.
+ for (int Offset = 1, F = std::max(E - Idx, Idx + 1); Offset < F; ++Offset)
+ if ((Idx >= Offset && FindConsecutiveAccess(Idx - Offset, Idx)) ||
+ (Idx + Offset < E && FindConsecutiveAccess(Idx + Offset, Idx)))
break;
- }
- }
}
// For stores that start but don't end a link in the chain:
@@ -5740,6 +6167,9 @@ public:
unsigned ReduxWidth = PowerOf2Floor(NumReducedVals);
Value *VectorizedTree = nullptr;
+
+ // FIXME: Fast-math-flags should be set based on the instructions in the
+ // reduction (not all of 'fast' are required).
IRBuilder<> Builder(cast<Instruction>(ReductionRoot));
FastMathFlags Unsafe;
Unsafe.setFast();
@@ -5929,10 +6359,14 @@ private:
assert(isPowerOf2_32(ReduxWidth) &&
"We only handle power-of-two reductions for now");
- if (!IsPairwiseReduction)
+ if (!IsPairwiseReduction) {
+ // FIXME: The builder should use an FMF guard. It should not be hard-coded
+ // to 'fast'.
+ assert(Builder.getFastMathFlags().isFast() && "Expected 'fast' FMF");
return createSimpleTargetReduction(
Builder, TTI, ReductionData.getOpcode(), VectorizedValue,
ReductionData.getFlags(), ReductionOps.back());
+ }
Value *TmpVec = VectorizedValue;
for (unsigned i = ReduxWidth / 2; i != 0; i >>= 1) {
@@ -6256,7 +6690,7 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
}
// Sort by type.
- std::stable_sort(Incoming.begin(), Incoming.end(), PhiTypeSorterFunc);
+ llvm::stable_sort(Incoming, PhiTypeSorterFunc);
// Try to vectorize elements base on their type.
for (SmallVector<Value *, 4>::iterator IncIt = Incoming.begin(),
@@ -6297,7 +6731,7 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
SmallVector<WeakVH, 8> PostProcessInstructions;
SmallDenseSet<Instruction *, 4> KeyNodes;
- for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; it++) {
+ for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
// We may go through BB multiple times so skip the one we have checked.
if (!VisitedInstrs.insert(&*it).second) {
if (it->use_empty() && KeyNodes.count(&*it) > 0 &&
diff --git a/lib/Transforms/Vectorize/VPRecipeBuilder.h b/lib/Transforms/Vectorize/VPRecipeBuilder.h
index 15d38ac9c84c..0ca6a6b93cfd 100644
--- a/lib/Transforms/Vectorize/VPRecipeBuilder.h
+++ b/lib/Transforms/Vectorize/VPRecipeBuilder.h
@@ -1,9 +1,8 @@
//===- VPRecipeBuilder.h - Helper class to build recipes --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -30,9 +29,6 @@ class VPRecipeBuilder {
/// Target Library Info.
const TargetLibraryInfo *TLI;
- /// Target Transform Info.
- const TargetTransformInfo *TTI;
-
/// The legality analysis.
LoopVectorizationLegality *Legal;
@@ -105,11 +101,9 @@ public:
public:
VPRecipeBuilder(Loop *OrigLoop, const TargetLibraryInfo *TLI,
- const TargetTransformInfo *TTI,
LoopVectorizationLegality *Legal,
LoopVectorizationCostModel &CM, VPBuilder &Builder)
- : OrigLoop(OrigLoop), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM),
- Builder(Builder) {}
+ : OrigLoop(OrigLoop), TLI(TLI), Legal(Legal), CM(CM), Builder(Builder) {}
/// Check if a recipe can be create for \p I withing the given VF \p Range.
/// If a recipe can be created, it adds it to \p VPBB.
diff --git a/lib/Transforms/Vectorize/VPlan.cpp b/lib/Transforms/Vectorize/VPlan.cpp
index 05a5400beb4e..517d759d7bfc 100644
--- a/lib/Transforms/Vectorize/VPlan.cpp
+++ b/lib/Transforms/Vectorize/VPlan.cpp
@@ -1,9 +1,8 @@
//===- VPlan.cpp - Vectorizer Plan ----------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -374,10 +373,9 @@ void VPlan::execute(VPTransformState *State) {
BasicBlock *VectorPreHeaderBB = State->CFG.PrevBB;
BasicBlock *VectorHeaderBB = VectorPreHeaderBB->getSingleSuccessor();
assert(VectorHeaderBB && "Loop preheader does not have a single successor.");
- BasicBlock *VectorLatchBB = VectorHeaderBB;
// 1. Make room to generate basic-blocks inside loop body if needed.
- VectorLatchBB = VectorHeaderBB->splitBasicBlock(
+ BasicBlock *VectorLatchBB = VectorHeaderBB->splitBasicBlock(
VectorHeaderBB->getFirstInsertionPt(), "vector.body.latch");
Loop *L = State->LI->getLoopFor(VectorHeaderBB);
L->addBasicBlockToLoop(VectorLatchBB, *State->LI);
@@ -561,6 +559,19 @@ void VPlanPrinter::dumpBasicBlock(const VPBasicBlock *BasicBlock) {
bumpIndent(1);
OS << Indent << "\"" << DOT::EscapeString(BasicBlock->getName()) << ":\\n\"";
bumpIndent(1);
+
+ // Dump the block predicate.
+ const VPValue *Pred = BasicBlock->getPredicate();
+ if (Pred) {
+ OS << " +\n" << Indent << " \"BlockPredicate: ";
+ if (const VPInstruction *PredI = dyn_cast<VPInstruction>(Pred)) {
+ PredI->printAsOperand(OS);
+ OS << " (" << DOT::EscapeString(PredI->getParent()->getName())
+ << ")\\l\"";
+ } else
+ Pred->printAsOperand(OS);
+ }
+
for (const VPRecipeBase &Recipe : *BasicBlock)
Recipe.print(OS, Indent);
diff --git a/lib/Transforms/Vectorize/VPlan.h b/lib/Transforms/Vectorize/VPlan.h
index 5c1b4a83c30e..8a06412ad590 100644
--- a/lib/Transforms/Vectorize/VPlan.h
+++ b/lib/Transforms/Vectorize/VPlan.h
@@ -1,9 +1,8 @@
//===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -353,6 +352,9 @@ private:
/// Successor selector, null for zero or single successor blocks.
VPValue *CondBit = nullptr;
+ /// Current block predicate - null if the block does not need a predicate.
+ VPValue *Predicate = nullptr;
+
/// Add \p Successor as the last successor to this block.
void appendSuccessor(VPBlockBase *Successor) {
assert(Successor && "Cannot add nullptr successor!");
@@ -491,6 +493,12 @@ public:
void setCondBit(VPValue *CV) { CondBit = CV; }
+ VPValue *getPredicate() { return Predicate; }
+
+ const VPValue *getPredicate() const { return Predicate; }
+
+ void setPredicate(VPValue *Pred) { Predicate = Pred; }
+
/// Set a given VPBlockBase \p Successor as the single successor of this
/// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor.
/// This VPBlockBase must have no successors.
@@ -521,6 +529,15 @@ public:
appendPredecessor(Pred);
}
+ /// Remove all the predecessor of this block.
+ void clearPredecessors() { Predecessors.clear(); }
+
+ /// Remove all the successors of this block and set to null its condition bit
+ void clearSuccessors() {
+ Successors.clear();
+ CondBit = nullptr;
+ }
+
/// The method which generates the output IR that correspond to this
/// VPBlockBase, thereby "executing" the VPlan.
virtual void execute(struct VPTransformState *State) = 0;
@@ -1491,6 +1508,41 @@ public:
From->removeSuccessor(To);
To->removePredecessor(From);
}
+
+ /// Returns true if the edge \p FromBlock -> \p ToBlock is a back-edge.
+ static bool isBackEdge(const VPBlockBase *FromBlock,
+ const VPBlockBase *ToBlock, const VPLoopInfo *VPLI) {
+ assert(FromBlock->getParent() == ToBlock->getParent() &&
+ FromBlock->getParent() && "Must be in same region");
+ const VPLoop *FromLoop = VPLI->getLoopFor(FromBlock);
+ const VPLoop *ToLoop = VPLI->getLoopFor(ToBlock);
+ if (!FromLoop || !ToLoop || FromLoop != ToLoop)
+ return false;
+
+ // A back-edge is a branch from the loop latch to its header.
+ return ToLoop->isLoopLatch(FromBlock) && ToBlock == ToLoop->getHeader();
+ }
+
+ /// Returns true if \p Block is a loop latch
+ static bool blockIsLoopLatch(const VPBlockBase *Block,
+ const VPLoopInfo *VPLInfo) {
+ if (const VPLoop *ParentVPL = VPLInfo->getLoopFor(Block))
+ return ParentVPL->isLoopLatch(Block);
+
+ return false;
+ }
+
+ /// Count and return the number of succesors of \p PredBlock excluding any
+ /// backedges.
+ static unsigned countSuccessorsNoBE(VPBlockBase *PredBlock,
+ VPLoopInfo *VPLI) {
+ unsigned Count = 0;
+ for (VPBlockBase *SuccBlock : PredBlock->getSuccessors()) {
+ if (!VPBlockUtils::isBackEdge(PredBlock, SuccBlock, VPLI))
+ Count++;
+ }
+ return Count;
+ }
};
class VPInterleavedAccessInfo {
diff --git a/lib/Transforms/Vectorize/VPlanDominatorTree.h b/lib/Transforms/Vectorize/VPlanDominatorTree.h
index 1b81097b6d31..19f5d2c00c60 100644
--- a/lib/Transforms/Vectorize/VPlanDominatorTree.h
+++ b/lib/Transforms/Vectorize/VPlanDominatorTree.h
@@ -1,9 +1,8 @@
//===-- VPlanDominatorTree.h ------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp b/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
index 0f42694e193b..df96f67288f1 100644
--- a/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
+++ b/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
@@ -1,9 +1,8 @@
//===-- VPlanHCFGBuilder.cpp ----------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -64,7 +63,9 @@ private:
void setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB);
void fixPhiNodes();
VPBasicBlock *getOrCreateVPBB(BasicBlock *BB);
+#ifndef NDEBUG
bool isExternalDef(Value *Val);
+#endif
VPValue *getOrCreateVPOperand(Value *IRVal);
void createVPInstructionsForVPBB(VPBasicBlock *VPBB, BasicBlock *BB);
@@ -119,6 +120,7 @@ VPBasicBlock *PlainCFGBuilder::getOrCreateVPBB(BasicBlock *BB) {
return VPBB;
}
+#ifndef NDEBUG
// Return true if \p Val is considered an external definition. An external
// definition is either:
// 1. A Value that is not an Instruction. This will be refined in the future.
@@ -154,6 +156,7 @@ bool PlainCFGBuilder::isExternalDef(Value *Val) {
// Check whether Instruction definition is in loop body.
return !TheLoop->contains(Inst);
}
+#endif
// Create a new VPValue or retrieve an existing one for the Instruction's
// operand \p IRVal. This function must only be used to create/retrieve VPValues
diff --git a/lib/Transforms/Vectorize/VPlanHCFGBuilder.h b/lib/Transforms/Vectorize/VPlanHCFGBuilder.h
index 3f11dcb5164d..238ee7e6347c 100644
--- a/lib/Transforms/Vectorize/VPlanHCFGBuilder.h
+++ b/lib/Transforms/Vectorize/VPlanHCFGBuilder.h
@@ -1,9 +1,8 @@
//===-- VPlanHCFGBuilder.h --------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/lib/Transforms/Vectorize/VPlanHCFGTransforms.cpp b/lib/Transforms/Vectorize/VPlanHCFGTransforms.cpp
index 3ad7fc7e7b96..7ed7d21b6caa 100644
--- a/lib/Transforms/Vectorize/VPlanHCFGTransforms.cpp
+++ b/lib/Transforms/Vectorize/VPlanHCFGTransforms.cpp
@@ -1,9 +1,8 @@
//===-- VPlanHCFGTransforms.cpp - Utility VPlan to VPlan transforms -------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/lib/Transforms/Vectorize/VPlanHCFGTransforms.h b/lib/Transforms/Vectorize/VPlanHCFGTransforms.h
index ae549c6871b3..79a23c33184f 100644
--- a/lib/Transforms/Vectorize/VPlanHCFGTransforms.h
+++ b/lib/Transforms/Vectorize/VPlanHCFGTransforms.h
@@ -1,9 +1,8 @@
//===- VPlanHCFGTransforms.h - Utility VPlan to VPlan transforms ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/lib/Transforms/Vectorize/VPlanLoopInfo.h b/lib/Transforms/Vectorize/VPlanLoopInfo.h
index 5c2485fc2145..5208f2d58e2b 100644
--- a/lib/Transforms/Vectorize/VPlanLoopInfo.h
+++ b/lib/Transforms/Vectorize/VPlanLoopInfo.h
@@ -1,9 +1,8 @@
//===-- VPLoopInfo.h --------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/lib/Transforms/Vectorize/VPlanPredicator.cpp b/lib/Transforms/Vectorize/VPlanPredicator.cpp
new file mode 100644
index 000000000000..7a80f3ff80a5
--- /dev/null
+++ b/lib/Transforms/Vectorize/VPlanPredicator.cpp
@@ -0,0 +1,248 @@
+//===-- VPlanPredicator.cpp -------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file implements the VPlanPredicator class which contains the public
+/// interfaces to predicate and linearize the VPlan region.
+///
+//===----------------------------------------------------------------------===//
+
+#include "VPlanPredicator.h"
+#include "VPlan.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/GraphTraits.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define DEBUG_TYPE "VPlanPredicator"
+
+using namespace llvm;
+
+// Generate VPInstructions at the beginning of CurrBB that calculate the
+// predicate being propagated from PredBB to CurrBB depending on the edge type
+// between them. For example if:
+// i. PredBB is controlled by predicate %BP, and
+// ii. The edge PredBB->CurrBB is the false edge, controlled by the condition
+// bit value %CBV then this function will generate the following two
+// VPInstructions at the start of CurrBB:
+// %IntermediateVal = not %CBV
+// %FinalVal = and %BP %IntermediateVal
+// It returns %FinalVal.
+VPValue *VPlanPredicator::getOrCreateNotPredicate(VPBasicBlock *PredBB,
+ VPBasicBlock *CurrBB) {
+ VPValue *CBV = PredBB->getCondBit();
+
+ // Set the intermediate value - this is either 'CBV', or 'not CBV'
+ // depending on the edge type.
+ EdgeType ET = getEdgeTypeBetween(PredBB, CurrBB);
+ VPValue *IntermediateVal = nullptr;
+ switch (ET) {
+ case EdgeType::TRUE_EDGE:
+ // CurrBB is the true successor of PredBB - nothing to do here.
+ IntermediateVal = CBV;
+ break;
+
+ case EdgeType::FALSE_EDGE:
+ // CurrBB is the False successor of PredBB - compute not of CBV.
+ IntermediateVal = Builder.createNot(CBV);
+ break;
+ }
+
+ // Now AND intermediate value with PredBB's block predicate if it has one.
+ VPValue *BP = PredBB->getPredicate();
+ if (BP)
+ return Builder.createAnd(BP, IntermediateVal);
+ else
+ return IntermediateVal;
+}
+
+// Generate a tree of ORs for all IncomingPredicates in WorkList.
+// Note: This function destroys the original Worklist.
+//
+// P1 P2 P3 P4 P5
+// \ / \ / /
+// OR1 OR2 /
+// \ | /
+// \ +/-+
+// \ / |
+// OR3 |
+// \ |
+// OR4 <- Returns this
+// |
+//
+// The algorithm uses a worklist of predicates as its main data structure.
+// We pop a pair of values from the front (e.g. P1 and P2), generate an OR
+// (in this example OR1), and push it back. In this example the worklist
+// contains {P3, P4, P5, OR1}.
+// The process iterates until we have only one element in the Worklist (OR4).
+// The last element is the root predicate which is returned.
+VPValue *VPlanPredicator::genPredicateTree(std::list<VPValue *> &Worklist) {
+ if (Worklist.empty())
+ return nullptr;
+
+ // The worklist initially contains all the leaf nodes. Initialize the tree
+ // using them.
+ while (Worklist.size() >= 2) {
+ // Pop a pair of values from the front.
+ VPValue *LHS = Worklist.front();
+ Worklist.pop_front();
+ VPValue *RHS = Worklist.front();
+ Worklist.pop_front();
+
+ // Create an OR of these values.
+ VPValue *Or = Builder.createOr(LHS, RHS);
+
+ // Push OR to the back of the worklist.
+ Worklist.push_back(Or);
+ }
+
+ assert(Worklist.size() == 1 && "Expected 1 item in worklist");
+
+ // The root is the last node in the worklist.
+ VPValue *Root = Worklist.front();
+
+ // This root needs to replace the existing block predicate. This is done in
+ // the caller function.
+ return Root;
+}
+
+// Return whether the edge FromBlock -> ToBlock is a TRUE_EDGE or FALSE_EDGE
+VPlanPredicator::EdgeType
+VPlanPredicator::getEdgeTypeBetween(VPBlockBase *FromBlock,
+ VPBlockBase *ToBlock) {
+ unsigned Count = 0;
+ for (VPBlockBase *SuccBlock : FromBlock->getSuccessors()) {
+ if (SuccBlock == ToBlock) {
+ assert(Count < 2 && "Switch not supported currently");
+ return (Count == 0) ? EdgeType::TRUE_EDGE : EdgeType::FALSE_EDGE;
+ }
+ Count++;
+ }
+
+ llvm_unreachable("Broken getEdgeTypeBetween");
+}
+
+// Generate all predicates needed for CurrBlock by going through its immediate
+// predecessor blocks.
+void VPlanPredicator::createOrPropagatePredicates(VPBlockBase *CurrBlock,
+ VPRegionBlock *Region) {
+ // Blocks that dominate region exit inherit the predicate from the region.
+ // Return after setting the predicate.
+ if (VPDomTree.dominates(CurrBlock, Region->getExit())) {
+ VPValue *RegionBP = Region->getPredicate();
+ CurrBlock->setPredicate(RegionBP);
+ return;
+ }
+
+ // Collect all incoming predicates in a worklist.
+ std::list<VPValue *> IncomingPredicates;
+
+ // Set the builder's insertion point to the top of the current BB
+ VPBasicBlock *CurrBB = cast<VPBasicBlock>(CurrBlock->getEntryBasicBlock());
+ Builder.setInsertPoint(CurrBB, CurrBB->begin());
+
+ // For each predecessor, generate the VPInstructions required for
+ // computing 'BP AND (not) CBV" at the top of CurrBB.
+ // Collect the outcome of this calculation for all predecessors
+ // into IncomingPredicates.
+ for (VPBlockBase *PredBlock : CurrBlock->getPredecessors()) {
+ // Skip back-edges
+ if (VPBlockUtils::isBackEdge(PredBlock, CurrBlock, VPLI))
+ continue;
+
+ VPValue *IncomingPredicate = nullptr;
+ unsigned NumPredSuccsNoBE =
+ VPBlockUtils::countSuccessorsNoBE(PredBlock, VPLI);
+
+ // If there is an unconditional branch to the currBB, then we don't create
+ // edge predicates. We use the predecessor's block predicate instead.
+ if (NumPredSuccsNoBE == 1)
+ IncomingPredicate = PredBlock->getPredicate();
+ else if (NumPredSuccsNoBE == 2) {
+ // Emit recipes into CurrBlock if required
+ assert(isa<VPBasicBlock>(PredBlock) && "Only BBs have multiple exits");
+ IncomingPredicate =
+ getOrCreateNotPredicate(cast<VPBasicBlock>(PredBlock), CurrBB);
+ } else
+ llvm_unreachable("FIXME: switch statement ?");
+
+ if (IncomingPredicate)
+ IncomingPredicates.push_back(IncomingPredicate);
+ }
+
+ // Logically OR all incoming predicates by building the Predicate Tree.
+ VPValue *Predicate = genPredicateTree(IncomingPredicates);
+
+ // Now update the block's predicate with the new one.
+ CurrBlock->setPredicate(Predicate);
+}
+
+// Generate all predicates needed for Region.
+void VPlanPredicator::predicateRegionRec(VPRegionBlock *Region) {
+ VPBasicBlock *EntryBlock = cast<VPBasicBlock>(Region->getEntry());
+ ReversePostOrderTraversal<VPBlockBase *> RPOT(EntryBlock);
+
+ // Generate edge predicates and append them to the block predicate. RPO is
+ // necessary since the predecessor blocks' block predicate needs to be set
+ // before the current block's block predicate can be computed.
+ for (VPBlockBase *Block : make_range(RPOT.begin(), RPOT.end())) {
+ // TODO: Handle nested regions once we start generating the same.
+ assert(!isa<VPRegionBlock>(Block) && "Nested region not expected");
+ createOrPropagatePredicates(Block, Region);
+ }
+}
+
+// Linearize the CFG within Region.
+// TODO: Predication and linearization need RPOT for every region.
+// This traversal is expensive. Since predication is not adding new
+// blocks, we should be able to compute RPOT once in predication and
+// reuse it here. This becomes even more important once we have nested
+// regions.
+void VPlanPredicator::linearizeRegionRec(VPRegionBlock *Region) {
+ ReversePostOrderTraversal<VPBlockBase *> RPOT(Region->getEntry());
+ VPBlockBase *PrevBlock = nullptr;
+
+ for (VPBlockBase *CurrBlock : make_range(RPOT.begin(), RPOT.end())) {
+ // TODO: Handle nested regions once we start generating the same.
+ assert(!isa<VPRegionBlock>(CurrBlock) && "Nested region not expected");
+
+ // Linearize control flow by adding an unconditional edge between PrevBlock
+ // and CurrBlock skipping loop headers and latches to keep intact loop
+ // header predecessors and loop latch successors.
+ if (PrevBlock && !VPLI->isLoopHeader(CurrBlock) &&
+ !VPBlockUtils::blockIsLoopLatch(PrevBlock, VPLI)) {
+
+ LLVM_DEBUG(dbgs() << "Linearizing: " << PrevBlock->getName() << "->"
+ << CurrBlock->getName() << "\n");
+
+ PrevBlock->clearSuccessors();
+ CurrBlock->clearPredecessors();
+ VPBlockUtils::connectBlocks(PrevBlock, CurrBlock);
+ }
+
+ PrevBlock = CurrBlock;
+ }
+}
+
+// Entry point. The driver function for the predicator.
+void VPlanPredicator::predicate(void) {
+ // Predicate the blocks within Region.
+ predicateRegionRec(cast<VPRegionBlock>(Plan.getEntry()));
+
+ // Linearlize the blocks with Region.
+ linearizeRegionRec(cast<VPRegionBlock>(Plan.getEntry()));
+}
+
+VPlanPredicator::VPlanPredicator(VPlan &Plan)
+ : Plan(Plan), VPLI(&(Plan.getVPLoopInfo())) {
+ // FIXME: Predicator is currently computing the dominator information for the
+ // top region. Once we start storing dominator information in a VPRegionBlock,
+ // we can avoid this recalculation.
+ VPDomTree.recalculate(*(cast<VPRegionBlock>(Plan.getEntry())));
+}
diff --git a/lib/Transforms/Vectorize/VPlanPredicator.h b/lib/Transforms/Vectorize/VPlanPredicator.h
new file mode 100644
index 000000000000..692afd2978d5
--- /dev/null
+++ b/lib/Transforms/Vectorize/VPlanPredicator.h
@@ -0,0 +1,74 @@
+//===-- VPlanPredicator.h ---------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines the VPlanPredicator class which contains the public
+/// interfaces to predicate and linearize the VPlan region.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_PREDICATOR_H
+#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_PREDICATOR_H
+
+#include "LoopVectorizationPlanner.h"
+#include "VPlan.h"
+#include "VPlanDominatorTree.h"
+
+namespace llvm {
+
+class VPlanPredicator {
+private:
+ enum class EdgeType {
+ TRUE_EDGE,
+ FALSE_EDGE,
+ };
+
+ // VPlan being predicated.
+ VPlan &Plan;
+
+ // VPLoopInfo for Plan's HCFG.
+ VPLoopInfo *VPLI;
+
+ // Dominator tree for Plan's HCFG.
+ VPDominatorTree VPDomTree;
+
+ // VPlan builder used to generate VPInstructions for block predicates.
+ VPBuilder Builder;
+
+ /// Get the type of edge from \p FromBlock to \p ToBlock. Returns TRUE_EDGE if
+ /// \p ToBlock is either the unconditional successor or the conditional true
+ /// successor of \p FromBlock and FALSE_EDGE otherwise.
+ EdgeType getEdgeTypeBetween(VPBlockBase *FromBlock, VPBlockBase *ToBlock);
+
+ /// Create and return VPValue corresponding to the predicate for the edge from
+ /// \p PredBB to \p CurrentBlock.
+ VPValue *getOrCreateNotPredicate(VPBasicBlock *PredBB, VPBasicBlock *CurrBB);
+
+ /// Generate and return the result of ORing all the predicate VPValues in \p
+ /// Worklist.
+ VPValue *genPredicateTree(std::list<VPValue *> &Worklist);
+
+ /// Create or propagate predicate for \p CurrBlock in region \p Region using
+ /// predicate(s) of its predecessor(s)
+ void createOrPropagatePredicates(VPBlockBase *CurrBlock,
+ VPRegionBlock *Region);
+
+ /// Predicate the CFG within \p Region.
+ void predicateRegionRec(VPRegionBlock *Region);
+
+ /// Linearize the CFG within \p Region.
+ void linearizeRegionRec(VPRegionBlock *Region);
+
+public:
+ VPlanPredicator(VPlan &Plan);
+
+ /// Predicate Plan's HCFG.
+ void predicate(void);
+};
+} // end namespace llvm
+#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_PREDICATOR_H
diff --git a/lib/Transforms/Vectorize/VPlanSLP.cpp b/lib/Transforms/Vectorize/VPlanSLP.cpp
index ad3a85a6f760..e5ab24e52df6 100644
--- a/lib/Transforms/Vectorize/VPlanSLP.cpp
+++ b/lib/Transforms/Vectorize/VPlanSLP.cpp
@@ -1,9 +1,8 @@
//===- VPlanSLP.cpp - SLP Analysis based on VPlan -------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// This file implements SLP analysis based on VPlan. The analysis is based on
diff --git a/lib/Transforms/Vectorize/VPlanValue.h b/lib/Transforms/Vectorize/VPlanValue.h
index b473579b699f..7b6c228c229e 100644
--- a/lib/Transforms/Vectorize/VPlanValue.h
+++ b/lib/Transforms/Vectorize/VPlanValue.h
@@ -1,9 +1,8 @@
//===- VPlanValue.h - Represent Values in Vectorizer Plan -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/lib/Transforms/Vectorize/VPlanVerifier.cpp b/lib/Transforms/Vectorize/VPlanVerifier.cpp
index 054bed4e177f..394b1b93113b 100644
--- a/lib/Transforms/Vectorize/VPlanVerifier.cpp
+++ b/lib/Transforms/Vectorize/VPlanVerifier.cpp
@@ -1,9 +1,8 @@
//===-- VPlanVerifier.cpp -------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/lib/Transforms/Vectorize/VPlanVerifier.h b/lib/Transforms/Vectorize/VPlanVerifier.h
index d2f99d006a66..7d2b26252172 100644
--- a/lib/Transforms/Vectorize/VPlanVerifier.h
+++ b/lib/Transforms/Vectorize/VPlanVerifier.h
@@ -1,9 +1,8 @@
//===-- VPlanVerifier.h -----------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/lib/Transforms/Vectorize/Vectorize.cpp b/lib/Transforms/Vectorize/Vectorize.cpp
index 559ab1968844..6a4f9169c2af 100644
--- a/lib/Transforms/Vectorize/Vectorize.cpp
+++ b/lib/Transforms/Vectorize/Vectorize.cpp
@@ -1,9 +1,8 @@
//===-- Vectorize.cpp -----------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/WindowsManifest/WindowsManifestMerger.cpp b/lib/WindowsManifest/WindowsManifestMerger.cpp
index 0a8abed230d4..d092ab493c9b 100644
--- a/lib/WindowsManifest/WindowsManifestMerger.cpp
+++ b/lib/WindowsManifest/WindowsManifestMerger.cpp
@@ -1,9 +1,8 @@
//===-- WindowsManifestMerger.cpp ------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===---------------------------------------------------------------------===//
//
diff --git a/lib/XRay/BlockIndexer.cpp b/lib/XRay/BlockIndexer.cpp
index 4dbe2d2717ad..a99a6815f0d1 100644
--- a/lib/XRay/BlockIndexer.cpp
+++ b/lib/XRay/BlockIndexer.cpp
@@ -1,9 +1,8 @@
//===- BlockIndexer.cpp - FDR Block Indexing VIsitor ----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/XRay/BlockPrinter.cpp b/lib/XRay/BlockPrinter.cpp
index 0acebee0cbdd..63a60c3c56a3 100644
--- a/lib/XRay/BlockPrinter.cpp
+++ b/lib/XRay/BlockPrinter.cpp
@@ -1,9 +1,8 @@
//===- BlockPrinter.cpp - FDR Block Pretty Printer Implementation --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "llvm/XRay/BlockPrinter.h"
diff --git a/lib/XRay/BlockVerifier.cpp b/lib/XRay/BlockVerifier.cpp
index 5e949ec4e46a..9fb49fa9a860 100644
--- a/lib/XRay/BlockVerifier.cpp
+++ b/lib/XRay/BlockVerifier.cpp
@@ -1,9 +1,8 @@
//===- BlockVerifier.cpp - FDR Block Verifier -----------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "llvm/XRay/BlockVerifier.h"
diff --git a/lib/XRay/FDRRecordProducer.cpp b/lib/XRay/FDRRecordProducer.cpp
index 25b3ee8af219..452bc6c55fb8 100644
--- a/lib/XRay/FDRRecordProducer.cpp
+++ b/lib/XRay/FDRRecordProducer.cpp
@@ -1,9 +1,8 @@
//===- FDRRecordProducer.cpp - XRay FDR Mode Record Producer --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "llvm/XRay/FDRRecordProducer.h"
diff --git a/lib/XRay/FDRRecords.cpp b/lib/XRay/FDRRecords.cpp
index 2a40d5e06229..ff315d35417d 100644
--- a/lib/XRay/FDRRecords.cpp
+++ b/lib/XRay/FDRRecords.cpp
@@ -1,9 +1,8 @@
//===- FDRRecords.cpp - XRay Flight Data Recorder Mode Records -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/XRay/FDRTraceExpander.cpp b/lib/XRay/FDRTraceExpander.cpp
index a6e1521da87f..cb7f66bccd7e 100644
--- a/lib/XRay/FDRTraceExpander.cpp
+++ b/lib/XRay/FDRTraceExpander.cpp
@@ -1,9 +1,8 @@
//===- FDRTraceExpander.cpp -----------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "llvm/XRay/FDRTraceExpander.h"
diff --git a/lib/XRay/FDRTraceWriter.cpp b/lib/XRay/FDRTraceWriter.cpp
index c5224f4be094..f50dc19b4be8 100644
--- a/lib/XRay/FDRTraceWriter.cpp
+++ b/lib/XRay/FDRTraceWriter.cpp
@@ -1,9 +1,8 @@
//===- FDRTraceWriter.cpp - XRay FDR Trace Writer ---------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/XRay/FileHeaderReader.cpp b/lib/XRay/FileHeaderReader.cpp
index 0b3fb8b6f692..3fb021906a6f 100644
--- a/lib/XRay/FileHeaderReader.cpp
+++ b/lib/XRay/FileHeaderReader.cpp
@@ -1,9 +1,8 @@
//===- FileHeaderReader.cpp - XRay File Header Reader --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "llvm/XRay/FileHeaderReader.h"
diff --git a/lib/XRay/InstrumentationMap.cpp b/lib/XRay/InstrumentationMap.cpp
index 9f2b179486f0..fe5e941f7ea6 100644
--- a/lib/XRay/InstrumentationMap.cpp
+++ b/lib/XRay/InstrumentationMap.cpp
@@ -1,9 +1,8 @@
//===- InstrumentationMap.cpp - XRay Instrumentation Map ------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -79,9 +78,10 @@ loadObj(StringRef Filename, object::OwningBinary<object::ObjectFile> &ObjFile,
"Failed to find XRay instrumentation map.",
std::make_error_code(std::errc::executable_format_error));
- if (I->getContents(Contents))
- return errorCodeToError(
- std::make_error_code(std::errc::executable_format_error));
+ if (Expected<StringRef> E = I->getContents())
+ Contents = *E;
+ else
+ return E.takeError();
RelocMap Relocs;
if (ObjFile.getBinary()->isELF()) {
@@ -172,13 +172,14 @@ loadObj(StringRef Filename, object::OwningBinary<object::ObjectFile> &ObjFile,
}
static Error
-loadYAML(int Fd, size_t FileSize, StringRef Filename,
+loadYAML(sys::fs::file_t Fd, size_t FileSize, StringRef Filename,
InstrumentationMap::SledContainer &Sleds,
InstrumentationMap::FunctionAddressMap &FunctionAddresses,
InstrumentationMap::FunctionAddressReverseMap &FunctionIds) {
std::error_code EC;
sys::fs::mapped_file_region MappedFile(
Fd, sys::fs::mapped_file_region::mapmode::readonly, FileSize, 0, EC);
+ sys::fs::closeFile(Fd);
if (EC)
return make_error<StringError>(
Twine("Failed memory-mapping file '") + Filename + "'.", EC);
@@ -214,9 +215,12 @@ llvm::xray::loadInstrumentationMap(StringRef Filename) {
if (!ObjectFileOrError) {
auto E = ObjectFileOrError.takeError();
// We try to load it as YAML if the ELF load didn't work.
- int Fd;
- if (sys::fs::openFileForRead(Filename, Fd))
+ Expected<sys::fs::file_t> FdOrErr = sys::fs::openNativeFileForRead(Filename);
+ if (!FdOrErr) {
+ // Report the ELF load error if YAML failed.
+ consumeError(FdOrErr.takeError());
return std::move(E);
+ }
uint64_t FileSize;
if (sys::fs::file_size(Filename, FileSize))
@@ -229,7 +233,7 @@ llvm::xray::loadInstrumentationMap(StringRef Filename) {
// From this point on the errors will be only for the YAML parts, so we
// consume the errors at this point.
consumeError(std::move(E));
- if (auto E = loadYAML(Fd, FileSize, Filename, Map.Sleds,
+ if (auto E = loadYAML(*FdOrErr, FileSize, Filename, Map.Sleds,
Map.FunctionAddresses, Map.FunctionIds))
return std::move(E);
} else if (auto E = loadObj(Filename, *ObjectFileOrError, Map.Sleds,
diff --git a/lib/XRay/LogBuilderConsumer.cpp b/lib/XRay/LogBuilderConsumer.cpp
index 88b7d2d728b1..ffb49f9eb4e9 100644
--- a/lib/XRay/LogBuilderConsumer.cpp
+++ b/lib/XRay/LogBuilderConsumer.cpp
@@ -1,9 +1,8 @@
//===- FDRRecordConsumer.h - XRay Flight Data Recorder Mode Records -------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "llvm/XRay/FDRRecordConsumer.h"
diff --git a/lib/XRay/Profile.cpp b/lib/XRay/Profile.cpp
index e8a082884d69..e34b182f2e02 100644
--- a/lib/XRay/Profile.cpp
+++ b/lib/XRay/Profile.cpp
@@ -1,9 +1,8 @@
//===- Profile.cpp - XRay Profile Abstraction -----------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -261,10 +260,9 @@ Profile mergeProfilesByStack(const Profile &L, const Profile &R) {
}
Expected<Profile> loadProfile(StringRef Filename) {
- int Fd;
- if (auto EC = sys::fs::openFileForRead(Filename, Fd))
- return make_error<StringError>(
- Twine("Cannot read profile from '") + Filename + "'", EC);
+ Expected<sys::fs::file_t> FdOrErr = sys::fs::openNativeFileForRead(Filename);
+ if (!FdOrErr)
+ return FdOrErr.takeError();
uint64_t FileSize;
if (auto EC = sys::fs::file_size(Filename, FileSize))
@@ -273,7 +271,9 @@ Expected<Profile> loadProfile(StringRef Filename) {
std::error_code EC;
sys::fs::mapped_file_region MappedFile(
- Fd, sys::fs::mapped_file_region::mapmode::readonly, FileSize, 0, EC);
+ *FdOrErr, sys::fs::mapped_file_region::mapmode::readonly, FileSize, 0,
+ EC);
+ sys::fs::closeFile(*FdOrErr);
if (EC)
return make_error<StringError>(
Twine("Cannot mmap profile '") + Filename + "'", EC);
diff --git a/lib/XRay/RecordInitializer.cpp b/lib/XRay/RecordInitializer.cpp
index f136a1e456b7..78163031a8cc 100644
--- a/lib/XRay/RecordInitializer.cpp
+++ b/lib/XRay/RecordInitializer.cpp
@@ -1,9 +1,8 @@
//===- FDRRecordProducer.cpp - XRay FDR Mode Record Producer --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "llvm/XRay/FDRRecords.h"
diff --git a/lib/XRay/RecordPrinter.cpp b/lib/XRay/RecordPrinter.cpp
index 71ea7d0e969f..32d42104db95 100644
--- a/lib/XRay/RecordPrinter.cpp
+++ b/lib/XRay/RecordPrinter.cpp
@@ -1,9 +1,8 @@
//===- RecordPrinter.cpp - FDR Record Printer -----------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "llvm/XRay/RecordPrinter.h"
diff --git a/lib/XRay/Trace.cpp b/lib/XRay/Trace.cpp
index 4f28f3f754c1..b9b67c561c66 100644
--- a/lib/XRay/Trace.cpp
+++ b/lib/XRay/Trace.cpp
@@ -1,9 +1,8 @@
//===- Trace.cpp - XRay Trace Loading implementation. ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -372,11 +371,9 @@ Error loadYAMLLog(StringRef Data, XRayFileHeader &FileHeader,
} // namespace
Expected<Trace> llvm::xray::loadTraceFile(StringRef Filename, bool Sort) {
- int Fd;
- if (auto EC = sys::fs::openFileForRead(Filename, Fd)) {
- return make_error<StringError>(
- Twine("Cannot read log from '") + Filename + "'", EC);
- }
+ Expected<sys::fs::file_t> FdOrErr = sys::fs::openNativeFileForRead(Filename);
+ if (!FdOrErr)
+ return FdOrErr.takeError();
uint64_t FileSize;
if (auto EC = sys::fs::file_size(Filename, FileSize)) {
@@ -392,7 +389,9 @@ Expected<Trace> llvm::xray::loadTraceFile(StringRef Filename, bool Sort) {
// Map the opened file into memory and use a StringRef to access it later.
std::error_code EC;
sys::fs::mapped_file_region MappedFile(
- Fd, sys::fs::mapped_file_region::mapmode::readonly, FileSize, 0, EC);
+ *FdOrErr, sys::fs::mapped_file_region::mapmode::readonly, FileSize, 0,
+ EC);
+ sys::fs::closeFile(*FdOrErr);
if (EC) {
return make_error<StringError>(
Twine("Cannot read log from '") + Filename + "'", EC);
@@ -462,10 +461,9 @@ Expected<Trace> llvm::xray::loadTrace(const DataExtractor &DE, bool Sort) {
}
if (Sort)
- std::stable_sort(T.Records.begin(), T.Records.end(),
- [&](const XRayRecord &L, const XRayRecord &R) {
- return L.TSC < R.TSC;
- });
+ llvm::stable_sort(T.Records, [&](const XRayRecord &L, const XRayRecord &R) {
+ return L.TSC < R.TSC;
+ });
return std::move(T);
}
diff --git a/tools/bugpoint/BugDriver.cpp b/tools/bugpoint/BugDriver.cpp
index 3832e075a693..942028cad80b 100644
--- a/tools/bugpoint/BugDriver.cpp
+++ b/tools/bugpoint/BugDriver.cpp
@@ -1,9 +1,8 @@
//===- BugDriver.cpp - Top-Level BugPoint class implementation ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/tools/bugpoint/BugDriver.h b/tools/bugpoint/BugDriver.h
index bc60ae753548..75f166b21b2c 100644
--- a/tools/bugpoint/BugDriver.h
+++ b/tools/bugpoint/BugDriver.h
@@ -1,9 +1,8 @@
//===- BugDriver.h - Top-Level BugPoint class -------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/tools/bugpoint/CrashDebugger.cpp b/tools/bugpoint/CrashDebugger.cpp
index ef6a214fde20..aab9debf9b59 100644
--- a/tools/bugpoint/CrashDebugger.cpp
+++ b/tools/bugpoint/CrashDebugger.cpp
@@ -1,9 +1,8 @@
//===- CrashDebugger.cpp - Debug compilation crashes ----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/tools/bugpoint/ExecutionDriver.cpp b/tools/bugpoint/ExecutionDriver.cpp
index 1b86b103d835..40f198b88d1a 100644
--- a/tools/bugpoint/ExecutionDriver.cpp
+++ b/tools/bugpoint/ExecutionDriver.cpp
@@ -1,9 +1,8 @@
//===- ExecutionDriver.cpp - Allow execution of LLVM program --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/tools/bugpoint/ExtractFunction.cpp b/tools/bugpoint/ExtractFunction.cpp
index 48f1575c25eb..105702de3f1d 100644
--- a/tools/bugpoint/ExtractFunction.cpp
+++ b/tools/bugpoint/ExtractFunction.cpp
@@ -1,9 +1,8 @@
//===- ExtractFunction.cpp - Extract a function from Program --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/tools/bugpoint/FindBugs.cpp b/tools/bugpoint/FindBugs.cpp
index a695e875b787..2b1146da9680 100644
--- a/tools/bugpoint/FindBugs.cpp
+++ b/tools/bugpoint/FindBugs.cpp
@@ -1,9 +1,8 @@
//===-- FindBugs.cpp - Run Many Different Optimizations -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/tools/bugpoint/ListReducer.h b/tools/bugpoint/ListReducer.h
index 0f9db022d555..04f2207a31ed 100644
--- a/tools/bugpoint/ListReducer.h
+++ b/tools/bugpoint/ListReducer.h
@@ -1,9 +1,8 @@
//===- ListReducer.h - Trim down list while retaining property --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/tools/bugpoint/Miscompilation.cpp b/tools/bugpoint/Miscompilation.cpp
index 375bee7a0d50..1621a51c91d6 100644
--- a/tools/bugpoint/Miscompilation.cpp
+++ b/tools/bugpoint/Miscompilation.cpp
@@ -1,9 +1,8 @@
//===- Miscompilation.cpp - Debug program miscompilations -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -592,9 +591,6 @@ ExtractBlocks(BugDriver &BD,
if (Linker::linkModules(*ProgClone, std::move(Extracted)))
exit(1);
- // Set the new program and delete the old one.
- BD.setNewProgram(std::move(ProgClone));
-
// Update the list of miscompiled functions.
MiscompiledFunctions.clear();
@@ -604,6 +600,9 @@ ExtractBlocks(BugDriver &BD,
MiscompiledFunctions.push_back(NewF);
}
+ // Set the new program and delete the old one.
+ BD.setNewProgram(std::move(ProgClone));
+
return true;
}
@@ -706,8 +705,8 @@ static Expected<bool> TestOptimizer(BugDriver &BD, std::unique_ptr<Module> Test,
if (!Optimized) {
errs() << " Error running this sequence of passes"
<< " on the input program!\n";
- BD.setNewProgram(std::move(Test));
BD.EmitProgressBitcode(*Test, "pass-error", false);
+ BD.setNewProgram(std::move(Test));
if (Error E = BD.debugOptimizerCrash())
return std::move(E);
return false;
@@ -827,13 +826,14 @@ CleanupAndPrepareModules(BugDriver &BD, std::unique_ptr<Module> Test,
// Add the resolver to the Safe module.
// Prototype: void *getPointerToNamedFunction(const char* Name)
- Constant *resolverFunc = Safe->getOrInsertFunction(
+ FunctionCallee resolverFunc = Safe->getOrInsertFunction(
"getPointerToNamedFunction", Type::getInt8PtrTy(Safe->getContext()),
Type::getInt8PtrTy(Safe->getContext()));
// Use the function we just added to get addresses of functions we need.
for (Module::iterator F = Safe->begin(), E = Safe->end(); F != E; ++F) {
- if (F->isDeclaration() && !F->use_empty() && &*F != resolverFunc &&
+ if (F->isDeclaration() && !F->use_empty() &&
+ &*F != resolverFunc.getCallee() &&
!F->isIntrinsic() /* ignore intrinsics */) {
Function *TestFn = Test->getFunction(F->getName());
@@ -879,7 +879,8 @@ CleanupAndPrepareModules(BugDriver &BD, std::unique_ptr<Module> Test,
BasicBlock::Create(F->getContext(), "lookupfp", FuncWrapper);
// Check to see if we already looked up the value.
- Value *CachedVal = new LoadInst(Cache, "fpcache", EntryBB);
+ Value *CachedVal =
+ new LoadInst(F->getType(), Cache, "fpcache", EntryBB);
Value *IsNull = new ICmpInst(*EntryBB, ICmpInst::ICMP_EQ, CachedVal,
NullPtr, "isNull");
BranchInst::Create(LookupBB, DoCallBB, IsNull, EntryBB);
@@ -911,11 +912,11 @@ CleanupAndPrepareModules(BugDriver &BD, std::unique_ptr<Module> Test,
// Pass on the arguments to the real function, return its result
if (F->getReturnType()->isVoidTy()) {
- CallInst::Create(FuncPtr, Args, "", DoCallBB);
+ CallInst::Create(FuncTy, FuncPtr, Args, "", DoCallBB);
ReturnInst::Create(F->getContext(), DoCallBB);
} else {
CallInst *Call =
- CallInst::Create(FuncPtr, Args, "retval", DoCallBB);
+ CallInst::Create(FuncTy, FuncPtr, Args, "retval", DoCallBB);
ReturnInst::Create(F->getContext(), Call, DoCallBB);
}
diff --git a/tools/bugpoint/OptimizerDriver.cpp b/tools/bugpoint/OptimizerDriver.cpp
index 64fe675de20c..562de7952388 100644
--- a/tools/bugpoint/OptimizerDriver.cpp
+++ b/tools/bugpoint/OptimizerDriver.cpp
@@ -1,9 +1,8 @@
//===- OptimizerDriver.cpp - Allow BugPoint to run passes safely ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/tools/bugpoint/ToolRunner.cpp b/tools/bugpoint/ToolRunner.cpp
index 7ba8ea1f16c5..da4244345e3b 100644
--- a/tools/bugpoint/ToolRunner.cpp
+++ b/tools/bugpoint/ToolRunner.cpp
@@ -1,9 +1,8 @@
//===-- ToolRunner.cpp ----------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/tools/bugpoint/ToolRunner.h b/tools/bugpoint/ToolRunner.h
index ef8551cc669b..dde4ec539cfb 100644
--- a/tools/bugpoint/ToolRunner.h
+++ b/tools/bugpoint/ToolRunner.h
@@ -1,9 +1,8 @@
//===-- tools/bugpoint/ToolRunner.h -----------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/tools/bugpoint/bugpoint.cpp b/tools/bugpoint/bugpoint.cpp
index f6b7d08455d4..2d5322a351ad 100644
--- a/tools/bugpoint/bugpoint.cpp
+++ b/tools/bugpoint/bugpoint.cpp
@@ -1,9 +1,8 @@
//===- bugpoint.cpp - The LLVM Bugpoint utility ---------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/tools/llc/llc.cpp b/tools/llc/llc.cpp
index 2329fb3e87c9..76da843f065e 100644
--- a/tools/llc/llc.cpp
+++ b/tools/llc/llc.cpp
@@ -1,9 +1,8 @@
//===-- llc.cpp - Implement the LLVM Native Code Generator ----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -32,6 +31,7 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/RemarkStreamer.h"
#include "llvm/IR/Verifier.h"
#include "llvm/IRReader/IRReader.h"
#include "llvm/MC/SubtargetFeature.h"
@@ -133,21 +133,33 @@ static cl::opt<bool> DiscardValueNames(
static cl::list<std::string> IncludeDirs("I", cl::desc("include search path"));
-static cl::opt<bool> PassRemarksWithHotness(
+static cl::opt<bool> RemarksWithHotness(
"pass-remarks-with-hotness",
cl::desc("With PGO, include profile count in optimization remarks"),
cl::Hidden);
-static cl::opt<unsigned> PassRemarksHotnessThreshold(
- "pass-remarks-hotness-threshold",
- cl::desc("Minimum profile count required for an optimization remark to be output"),
- cl::Hidden);
+static cl::opt<unsigned>
+ RemarksHotnessThreshold("pass-remarks-hotness-threshold",
+ cl::desc("Minimum profile count required for "
+ "an optimization remark to be output"),
+ cl::Hidden);
static cl::opt<std::string>
RemarksFilename("pass-remarks-output",
- cl::desc("YAML output filename for pass remarks"),
+ cl::desc("Output filename for pass remarks"),
cl::value_desc("filename"));
+static cl::opt<std::string>
+ RemarksPasses("pass-remarks-filter",
+ cl::desc("Only record optimization remarks from passes whose "
+ "names match the given regular expression"),
+ cl::value_desc("regex"));
+
+static cl::opt<std::string> RemarksFormat(
+ "pass-remarks-format",
+ cl::desc("The format used for serializing remarks (default: YAML)"),
+ cl::value_desc("format"), cl::init("yaml"));
+
namespace {
static ManagedStatic<std::vector<std::string>> RunPassNames;
@@ -302,6 +314,7 @@ int main(int argc, char **argv) {
initializeVectorization(*Registry);
initializeScalarizeMaskedMemIntrinPass(*Registry);
initializeExpandReductionsPass(*Registry);
+ initializeHardwareLoopsPass(*Registry);
// Initialize debugging passes.
initializeScavengerTestPass(*Registry);
@@ -319,24 +332,15 @@ int main(int argc, char **argv) {
llvm::make_unique<LLCDiagnosticHandler>(&HasError));
Context.setInlineAsmDiagnosticHandler(InlineAsmDiagHandler, &HasError);
- if (PassRemarksWithHotness)
- Context.setDiagnosticsHotnessRequested(true);
-
- if (PassRemarksHotnessThreshold)
- Context.setDiagnosticsHotnessThreshold(PassRemarksHotnessThreshold);
-
- std::unique_ptr<ToolOutputFile> YamlFile;
- if (RemarksFilename != "") {
- std::error_code EC;
- YamlFile =
- llvm::make_unique<ToolOutputFile>(RemarksFilename, EC, sys::fs::F_None);
- if (EC) {
- WithColor::error(errs(), argv[0]) << EC.message() << '\n';
- return 1;
- }
- Context.setDiagnosticsOutputFile(
- llvm::make_unique<yaml::Output>(YamlFile->os()));
+ Expected<std::unique_ptr<ToolOutputFile>> RemarksFileOrErr =
+ setupOptimizationRemarks(Context, RemarksFilename, RemarksPasses,
+ RemarksFormat, RemarksWithHotness,
+ RemarksHotnessThreshold);
+ if (Error E = RemarksFileOrErr.takeError()) {
+ WithColor::error(errs(), argv[0]) << toString(std::move(E)) << '\n';
+ return 1;
}
+ std::unique_ptr<ToolOutputFile> RemarksFile = std::move(*RemarksFileOrErr);
if (InputLanguage != "" && InputLanguage != "ir" &&
InputLanguage != "mir") {
@@ -351,8 +355,8 @@ int main(int argc, char **argv) {
if (int RetVal = compileModule(argv, Context))
return RetVal;
- if (YamlFile)
- YamlFile->keep();
+ if (RemarksFile)
+ RemarksFile->keep();
return 0;
}
diff --git a/tools/lli/RemoteJITUtils.h b/tools/lli/RemoteJITUtils.h
index 944881070c70..8e80e73c8082 100644
--- a/tools/lli/RemoteJITUtils.h
+++ b/tools/lli/RemoteJITUtils.h
@@ -1,9 +1,8 @@
//===-- RemoteJITUtils.h - Utilities for remote-JITing with LLI -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -76,7 +75,7 @@ std::unique_ptr<FDRawChannel> launchRemote();
namespace llvm {
-// ForwardingMM - Adapter to connect MCJIT to Orc's Remote8
+// ForwardingMM - Adapter to connect MCJIT to Orc's Remote
// memory manager.
class ForwardingMemoryManager : public llvm::RTDyldMemoryManager {
public:
diff --git a/tools/lli/lli.cpp b/tools/lli/lli.cpp
index 7e93d31361aa..8c8cd88c9711 100644
--- a/tools/lli/lli.cpp
+++ b/tools/lli/lli.cpp
@@ -1,9 +1,8 @@
//===- lli.cpp - LLVM Interpreter / Dynamic compiler ----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -84,18 +83,15 @@ namespace {
cl::desc("Force interpretation: disable JIT"),
cl::init(false));
- cl::opt<JITKind> UseJITKind("jit-kind",
- cl::desc("Choose underlying JIT kind."),
- cl::init(JITKind::MCJIT),
- cl::values(
- clEnumValN(JITKind::MCJIT, "mcjit",
- "MCJIT"),
- clEnumValN(JITKind::OrcMCJITReplacement,
- "orc-mcjit",
- "Orc-based MCJIT replacement"),
- clEnumValN(JITKind::OrcLazy,
- "orc-lazy",
- "Orc-based lazy JIT.")));
+ cl::opt<JITKind> UseJITKind(
+ "jit-kind", cl::desc("Choose underlying JIT kind."),
+ cl::init(JITKind::MCJIT),
+ cl::values(clEnumValN(JITKind::MCJIT, "mcjit", "MCJIT"),
+ clEnumValN(JITKind::OrcMCJITReplacement, "orc-mcjit",
+ "Orc-based MCJIT replacement "
+ "(deprecated)"),
+ clEnumValN(JITKind::OrcLazy, "orc-lazy",
+ "Orc-based lazy JIT.")));
cl::opt<unsigned>
LazyJITCompileThreads("compile-threads",
@@ -173,7 +169,7 @@ namespace {
cl::opt<bool>
EnableCacheManager("enable-cache-manager",
- cl::desc("Use cache manager to save/load mdoules"),
+ cl::desc("Use cache manager to save/load modules"),
cl::init(false));
cl::opt<std::string>
@@ -420,7 +416,8 @@ int main(int argc, char **argv, char * const *envp) {
builder.setEngineKind(ForceInterpreter
? EngineKind::Interpreter
: EngineKind::JIT);
- builder.setUseOrcMCJITReplacement(UseJITKind == JITKind::OrcMCJITReplacement);
+ builder.setUseOrcMCJITReplacement(AcknowledgeORCv1Deprecation,
+ UseJITKind == JITKind::OrcMCJITReplacement);
// If we are supposed to override the target triple, do so now.
if (!TargetTriple.empty())
@@ -596,8 +593,8 @@ int main(int argc, char **argv, char * const *envp) {
if (!RemoteMCJIT) {
// If the program doesn't explicitly call exit, we will need the Exit
// function later on to make an explicit call, so get the function now.
- Constant *Exit = Mod->getOrInsertFunction("exit", Type::getVoidTy(Context),
- Type::getInt32Ty(Context));
+ FunctionCallee Exit = Mod->getOrInsertFunction(
+ "exit", Type::getVoidTy(Context), Type::getInt32Ty(Context));
// Run static constructors.
if (!ForceInterpreter) {
@@ -621,19 +618,21 @@ int main(int argc, char **argv, char * const *envp) {
// If the program didn't call exit explicitly, we should call it now.
// This ensures that any atexit handlers get called correctly.
- if (Function *ExitF = dyn_cast<Function>(Exit)) {
- std::vector<GenericValue> Args;
- GenericValue ResultGV;
- ResultGV.IntVal = APInt(32, Result);
- Args.push_back(ResultGV);
- EE->runFunction(ExitF, Args);
- WithColor::error(errs(), argv[0]) << "exit(" << Result << ") returned!\n";
- abort();
- } else {
- WithColor::error(errs(), argv[0])
- << "exit defined with wrong prototype!\n";
- abort();
+ if (Function *ExitF =
+ dyn_cast<Function>(Exit.getCallee()->stripPointerCasts())) {
+ if (ExitF->getFunctionType() == Exit.getFunctionType()) {
+ std::vector<GenericValue> Args;
+ GenericValue ResultGV;
+ ResultGV.IntVal = APInt(32, Result);
+ Args.push_back(ResultGV);
+ EE->runFunction(ExitF, Args);
+ WithColor::error(errs(), argv[0])
+ << "exit(" << Result << ") returned!\n";
+ abort();
+ }
}
+ WithColor::error(errs(), argv[0]) << "exit defined with wrong prototype!\n";
+ abort();
} else {
// else == "if (RemoteMCJIT)"
@@ -664,6 +663,7 @@ int main(int argc, char **argv, char * const *envp) {
// Forward MCJIT's symbol resolution calls to the remote.
static_cast<ForwardingMemoryManager *>(RTDyldMM)->setResolver(
orc::createLambdaResolver(
+ AcknowledgeORCv1Deprecation,
[](const std::string &Name) { return nullptr; },
[&](const std::string &Name) {
if (auto Addr = ExitOnErr(R->getSymbolAddress(Name)))
@@ -762,14 +762,17 @@ int runOrcLazyJIT(const char *ProgName) {
reportError(Err, ProgName);
const auto &TT = MainModule.getModule()->getTargetTriple();
- orc::JITTargetMachineBuilder JTMB =
+ orc::LLLazyJITBuilder Builder;
+
+ Builder.setJITTargetMachineBuilder(
TT.empty() ? ExitOnErr(orc::JITTargetMachineBuilder::detectHost())
- : orc::JITTargetMachineBuilder(Triple(TT));
+ : orc::JITTargetMachineBuilder(Triple(TT)));
if (!MArch.empty())
- JTMB.getTargetTriple().setArchName(MArch);
+ Builder.getJITTargetMachineBuilder()->getTargetTriple().setArchName(MArch);
- JTMB.setCPU(getCPUStr())
+ Builder.getJITTargetMachineBuilder()
+ ->setCPU(getCPUStr())
.addFeatures(getFeatureList())
.setRelocationModel(RelocModel.getNumOccurrences()
? Optional<Reloc::Model>(RelocModel)
@@ -778,12 +781,11 @@ int runOrcLazyJIT(const char *ProgName) {
? Optional<CodeModel::Model>(CMModel)
: None);
- DataLayout DL = ExitOnErr(JTMB.getDefaultDataLayoutForTarget());
+ Builder.setLazyCompileFailureAddr(
+ pointerToJITTargetAddress(exitOnLazyCallThroughFailure));
+ Builder.setNumCompileThreads(LazyJITCompileThreads);
- auto J = ExitOnErr(orc::LLLazyJIT::Create(
- std::move(JTMB), DL,
- pointerToJITTargetAddress(exitOnLazyCallThroughFailure),
- LazyJITCompileThreads));
+ auto J = ExitOnErr(Builder.create());
if (PerModuleLazy)
J->setPartitionFunction(orc::CompileOnDemandLayer::compileWholeModule);
@@ -799,9 +801,10 @@ int runOrcLazyJIT(const char *ProgName) {
return Dump(std::move(TSM), R);
});
J->getMainJITDylib().setGenerator(
- ExitOnErr(orc::DynamicLibrarySearchGenerator::GetForCurrentProcess(DL)));
+ ExitOnErr(orc::DynamicLibrarySearchGenerator::GetForCurrentProcess(
+ J->getDataLayout().getGlobalPrefix())));
- orc::MangleAndInterner Mangle(J->getExecutionSession(), DL);
+ orc::MangleAndInterner Mangle(J->getExecutionSession(), J->getDataLayout());
orc::LocalCXXRuntimeOverrides CXXRuntimeOverrides;
ExitOnErr(CXXRuntimeOverrides.enable(J->getMainJITDylib(), Mangle));
@@ -817,8 +820,10 @@ int runOrcLazyJIT(const char *ProgName) {
IdxToDylib[0] = &J->getMainJITDylib();
for (auto JDItr = JITDylibs.begin(), JDEnd = JITDylibs.end();
JDItr != JDEnd; ++JDItr) {
- IdxToDylib[JITDylibs.getPosition(JDItr - JITDylibs.begin())] =
- &J->createJITDylib(*JDItr);
+ orc::JITDylib *JD = J->getJITDylibByName(*JDItr);
+ if (!JD)
+ JD = &J->createJITDylib(*JDItr);
+ IdxToDylib[JITDylibs.getPosition(JDItr - JITDylibs.begin())] = JD;
}
for (auto EMItr = ExtraModules.begin(), EMEnd = ExtraModules.end();
@@ -861,8 +866,6 @@ int runOrcLazyJIT(const char *ProgName) {
AltEntryThreads.push_back(std::thread([EntryPoint]() { EntryPoint(); }));
}
- J->getExecutionSession().dump(llvm::dbgs());
-
// Run main.
auto MainSym = ExitOnErr(J->lookup("main"));
typedef int (*MainFnPtr)(int, const char *[]);
diff --git a/tools/llvm-ar/llvm-ar.cpp b/tools/llvm-ar/llvm-ar.cpp
index 1c453ee0b569..91746d0fab37 100644
--- a/tools/llvm-ar/llvm-ar.cpp
+++ b/tools/llvm-ar/llvm-ar.cpp
@@ -1,9 +1,8 @@
//===-- llvm-ar.cpp - LLVM archive librarian utility ----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -67,7 +66,7 @@ OPTIONS:
const char ArHelp[] = R"(
OVERVIEW: LLVM Archiver
-USAGE: llvm-ar [options] [-]<operation>[modifiers] [relpos] <archive> [files]
+USAGE: llvm-ar [options] [-]<operation>[modifiers] [relpos] [count] <archive> [files]
llvm-ar -M [<mri-script]
OPTIONS:
@@ -79,6 +78,7 @@ OPTIONS:
--plugin=<string> - Ignored for compatibility
--help - Display available options
--version - Display the version of this program
+ @<file> - read options from <file>
OPERATIONS:
d - delete [files] from the archive
@@ -98,7 +98,9 @@ MODIFIERS:
[i] - put [files] before [relpos] (same as [b])
[l] - ignored for compatibility
[L] - add archive's contents
+ [N] - use instance [count] of name
[o] - preserve original dates
+ [P] - use full names when matching (implied for thin archives)
[s] - create an archive index (cf. ranlib)
[S] - do not build a symbol table
[T] - create a thin archive
@@ -169,16 +171,17 @@ enum ArchiveOperation {
};
// Modifiers to follow operation to vary behavior
-static bool AddAfter = false; ///< 'a' modifier
-static bool AddBefore = false; ///< 'b' modifier
-static bool Create = false; ///< 'c' modifier
-static bool OriginalDates = false; ///< 'o' modifier
-static bool OnlyUpdate = false; ///< 'u' modifier
-static bool Verbose = false; ///< 'v' modifier
-static bool Symtab = true; ///< 's' modifier
-static bool Deterministic = true; ///< 'D' and 'U' modifiers
-static bool Thin = false; ///< 'T' modifier
-static bool AddLibrary = false; ///< 'L' modifier
+static bool AddAfter = false; ///< 'a' modifier
+static bool AddBefore = false; ///< 'b' modifier
+static bool Create = false; ///< 'c' modifier
+static bool OriginalDates = false; ///< 'o' modifier
+static bool CompareFullPath = false; ///< 'P' modifier
+static bool OnlyUpdate = false; ///< 'u' modifier
+static bool Verbose = false; ///< 'v' modifier
+static bool Symtab = true; ///< 's' modifier
+static bool Deterministic = true; ///< 'D' and 'U' modifiers
+static bool Thin = false; ///< 'T' modifier
+static bool AddLibrary = false; ///< 'L' modifier
// Relative Positional Argument (for insert/move). This variable holds
// the name of the archive member to which the 'a', 'b' or 'i' modifier
@@ -186,6 +189,11 @@ static bool AddLibrary = false; ///< 'L' modifier
// one variable.
static std::string RelPos;
+// Count parameter for 'N' modifier. This variable specifies which file should
+// match for extract/delete operations when there are multiple matches. This is
+// 1-indexed. A value of 0 is invalid, and implies 'N' is not used.
+static int CountParam = 0;
+
// This variable holds the name of the archive file as given on the
// command line.
static std::string ArchiveName;
@@ -194,6 +202,9 @@ static std::string ArchiveName;
// on the command line.
static std::vector<StringRef> Members;
+// Static buffer to hold StringRefs.
+static BumpPtrAllocator Alloc;
+
// Extract the member filename from the command line for the [relpos] argument
// associated with a, b, and i modifiers
static void getRelPos() {
@@ -203,6 +214,19 @@ static void getRelPos() {
PositionalArgs.erase(PositionalArgs.begin());
}
+// Extract the parameter from the command line for the [count] argument
+// associated with the N modifier
+static void getCountParam() {
+ if (PositionalArgs.empty())
+ fail("Expected [count] for N modifier");
+ auto CountParamArg = StringRef(PositionalArgs[0]);
+ if (CountParamArg.getAsInteger(10, CountParam))
+ fail("Value for [count] must be numeric, got: " + CountParamArg);
+ if (CountParam < 1)
+ fail("Value for [count] must be positive, got: " + CountParamArg);
+ PositionalArgs.erase(PositionalArgs.begin());
+}
+
// Get the archive file name from the command line
static void getArchive() {
if (PositionalArgs.empty())
@@ -295,6 +319,9 @@ static ArchiveOperation parseCommandLine() {
case 'o':
OriginalDates = true;
break;
+ case 'P':
+ CompareFullPath = true;
+ break;
case 's':
Symtab = true;
MaybeJustCreateSymTab = true;
@@ -329,8 +356,13 @@ static ArchiveOperation parseCommandLine() {
case 'U':
Deterministic = false;
break;
+ case 'N':
+ getCountParam();
+ break;
case 'T':
Thin = true;
+ // Thin archives store path names, so P should be forced.
+ CompareFullPath = true;
break;
case 'L':
AddLibrary = true;
@@ -362,11 +394,14 @@ static ArchiveOperation parseCommandLine() {
fail("Only one operation may be specified");
if (NumPositional > 1)
fail("You may only specify one of a, b, and i modifiers");
- if (AddAfter || AddBefore) {
+ if (AddAfter || AddBefore)
if (Operation != Move && Operation != ReplaceOrInsert)
fail("The 'a', 'b' and 'i' modifiers can only be specified with "
"the 'm' or 'r' operations");
- }
+ if (CountParam)
+ if (Operation != Extract && Operation != Delete)
+ fail("The 'N' modifier can only be specified with the 'x' or 'd' "
+ "operations");
if (OriginalDates && Operation != Extract)
fail("The 'o' modifier is only applicable to the 'x' operation");
if (OnlyUpdate && Operation != ReplaceOrInsert)
@@ -430,12 +465,19 @@ static void doDisplayTable(StringRef Name, const object::Archive::Child &C) {
}
if (C.getParent()->isThin()) {
- outs() << sys::path::parent_path(ArchiveName);
- outs() << '/';
+ if (!sys::path::is_absolute(Name)) {
+ StringRef ParentDir = sys::path::parent_path(ArchiveName);
+ if (!ParentDir.empty())
+ outs() << sys::path::convert_to_slash(ParentDir) << '/';
+ }
}
outs() << Name << "\n";
}
+static StringRef normalizePath(StringRef Path) {
+ return CompareFullPath ? Path : sys::path::filename(Path);
+}
+
// Implement the 'x' operation. This function extracts files back to the file
// system.
static void doExtract(StringRef Name, const object::Archive::Child &C) {
@@ -499,6 +541,7 @@ static void performReadOperation(ArchiveOperation Operation,
fail("extracting from a thin archive is not supported");
bool Filter = !Members.empty();
+ StringMap<int> MemberCount;
{
Error Err = Error::success();
for (auto &C : OldArchive->children(Err)) {
@@ -507,9 +550,13 @@ static void performReadOperation(ArchiveOperation Operation,
StringRef Name = NameOrErr.get();
if (Filter) {
- auto I = find(Members, Name);
+ auto I = find_if(Members, [Name](StringRef Path) {
+ return Name == normalizePath(Path);
+ });
if (I == Members.end())
continue;
+ if (CountParam && ++MemberCount[Name] != CountParam)
+ continue;
Members.erase(I);
}
@@ -545,6 +592,23 @@ static void addChildMember(std::vector<NewArchiveMember> &Members,
Expected<NewArchiveMember> NMOrErr =
NewArchiveMember::getOldMember(M, Deterministic);
failIfError(NMOrErr.takeError());
+ // If the child member we're trying to add is thin, use the path relative to
+ // the archive it's in, so the file resolves correctly.
+ if (Thin && FlattenArchive) {
+ StringSaver Saver(Alloc);
+ Expected<std::string> FileNameOrErr = M.getName();
+ failIfError(FileNameOrErr.takeError());
+ if (sys::path::is_absolute(*FileNameOrErr)) {
+ NMOrErr->MemberName = Saver.save(sys::path::convert_to_slash(*FileNameOrErr));
+ } else {
+ FileNameOrErr = M.getFullName();
+ failIfError(FileNameOrErr.takeError());
+ Expected<std::string> PathOrErr =
+ computeArchiveRelativePath(ArchiveName, *FileNameOrErr);
+ NMOrErr->MemberName = Saver.save(
+ PathOrErr ? *PathOrErr : sys::path::convert_to_slash(*FileNameOrErr));
+ }
+ }
if (FlattenArchive &&
identify_magic(NMOrErr->Buf->getBuffer()) == file_magic::archive) {
Expected<std::string> FileNameOrErr = M.getFullName();
@@ -568,6 +632,23 @@ static void addMember(std::vector<NewArchiveMember> &Members,
Expected<NewArchiveMember> NMOrErr =
NewArchiveMember::getFile(FileName, Deterministic);
failIfError(NMOrErr.takeError(), FileName);
+ StringSaver Saver(Alloc);
+ // For regular archives, use the basename of the object path for the member
+ // name. For thin archives, use the full relative paths so the file resolves
+ // correctly.
+ if (!Thin) {
+ NMOrErr->MemberName = sys::path::filename(NMOrErr->MemberName);
+ } else {
+ if (sys::path::is_absolute(FileName))
+ NMOrErr->MemberName = Saver.save(sys::path::convert_to_slash(FileName));
+ else {
+ Expected<std::string> PathOrErr =
+ computeArchiveRelativePath(ArchiveName, FileName);
+ NMOrErr->MemberName = Saver.save(
+ PathOrErr ? *PathOrErr : sys::path::convert_to_slash(FileName));
+ }
+ }
+
if (FlattenArchive &&
identify_magic(NMOrErr->Buf->getBuffer()) == file_magic::archive) {
object::Archive &Lib = readLibrary(FileName);
@@ -581,8 +662,6 @@ static void addMember(std::vector<NewArchiveMember> &Members,
return;
}
}
- // Use the basename of the object path for the member name.
- NMOrErr->MemberName = sys::path::filename(NMOrErr->MemberName);
Members.push_back(std::move(*NMOrErr));
}
@@ -597,27 +676,29 @@ enum InsertAction {
static InsertAction computeInsertAction(ArchiveOperation Operation,
const object::Archive::Child &Member,
StringRef Name,
- std::vector<StringRef>::iterator &Pos) {
+ std::vector<StringRef>::iterator &Pos,
+ StringMap<int> &MemberCount) {
if (Operation == QuickAppend || Members.empty())
return IA_AddOldMember;
-
- auto MI = find_if(Members, [Name](StringRef Path) {
- return Name == sys::path::filename(Path);
- });
+ auto MI = find_if(
+ Members, [Name](StringRef Path) { return Name == normalizePath(Path); });
if (MI == Members.end())
return IA_AddOldMember;
Pos = MI;
- if (Operation == Delete)
+ if (Operation == Delete) {
+ if (CountParam && ++MemberCount[Name] != CountParam)
+ return IA_AddOldMember;
return IA_Delete;
+ }
if (Operation == Move)
return IA_MoveOldMember;
if (Operation == ReplaceOrInsert) {
- StringRef PosName = sys::path::filename(RelPos);
+ StringRef PosName = normalizePath(RelPos);
if (!OnlyUpdate) {
if (PosName.empty())
return IA_AddNewMember;
@@ -651,9 +732,10 @@ computeNewArchiveMembers(ArchiveOperation Operation,
std::vector<NewArchiveMember> Ret;
std::vector<NewArchiveMember> Moved;
int InsertPos = -1;
- StringRef PosName = sys::path::filename(RelPos);
+ StringRef PosName = normalizePath(RelPos);
if (OldArchive) {
Error Err = Error::success();
+ StringMap<int> MemberCount;
for (auto &Child : OldArchive->children(Err)) {
int Pos = Ret.size();
Expected<StringRef> NameOrErr = Child.getName();
@@ -669,10 +751,10 @@ computeNewArchiveMembers(ArchiveOperation Operation,
std::vector<StringRef>::iterator MemberI = Members.end();
InsertAction Action =
- computeInsertAction(Operation, Child, Name, MemberI);
+ computeInsertAction(Operation, Child, Name, MemberI, MemberCount);
switch (Action) {
case IA_AddOldMember:
- addChildMember(Ret, Child);
+ addChildMember(Ret, Child, /*FlattenArchive=*/Thin);
break;
case IA_AddNewMember:
addMember(Ret, *MemberI);
@@ -680,13 +762,18 @@ computeNewArchiveMembers(ArchiveOperation Operation,
case IA_Delete:
break;
case IA_MoveOldMember:
- addChildMember(Moved, Child);
+ addChildMember(Moved, Child, /*FlattenArchive=*/Thin);
break;
case IA_MoveNewMember:
addMember(Moved, *MemberI);
break;
}
- if (MemberI != Members.end())
+ // When processing elements with the count param, we need to preserve the
+ // full members list when iterating over all archive members. For
+ // instance, "llvm-ar dN 2 archive.a member.o" should delete the second
+ // file named member.o it sees; we are not done with member.o the first
+ // time we see it in the archive.
+ if (MemberI != Members.end() && !CountParam)
Members.erase(MemberI);
}
failIfError(std::move(Err));
@@ -843,6 +930,8 @@ static int performOperation(ArchiveOperation Operation,
EC = errorToErrorCode(std::move(Err));
failIfError(EC,
"error loading '" + ArchiveName + "': " + EC.message() + "!");
+ if (Archive.isThin())
+ CompareFullPath = true;
performOperation(Operation, &Archive, std::move(Buf.get()), NewMembers);
return 0;
}
@@ -864,7 +953,7 @@ static int performOperation(ArchiveOperation Operation,
}
static void runMRIScript() {
- enum class MRICommand { AddLib, AddMod, Create, Delete, Save, End, Invalid };
+ enum class MRICommand { AddLib, AddMod, Create, CreateThin, Delete, Save, End, Invalid };
ErrorOr<std::unique_ptr<MemoryBuffer>> Buf = MemoryBuffer::getSTDIN();
failIfError(Buf.getError());
@@ -888,6 +977,7 @@ static void runMRIScript() {
.Case("addlib", MRICommand::AddLib)
.Case("addmod", MRICommand::AddMod)
.Case("create", MRICommand::Create)
+ .Case("createthin", MRICommand::CreateThin)
.Case("delete", MRICommand::Delete)
.Case("save", MRICommand::Save)
.Case("end", MRICommand::End)
@@ -899,7 +989,7 @@ static void runMRIScript() {
{
Error Err = Error::success();
for (auto &Member : Lib.children(Err))
- addChildMember(NewMembers, Member);
+ addChildMember(NewMembers, Member, /*FlattenArchive=*/Thin);
failIfError(std::move(Err));
}
break;
@@ -907,6 +997,9 @@ static void runMRIScript() {
case MRICommand::AddMod:
addMember(NewMembers, Rest);
break;
+ case MRICommand::CreateThin:
+ Thin = true;
+ LLVM_FALLTHROUGH;
case MRICommand::Create:
Create = true;
if (!ArchiveName.empty())
@@ -916,7 +1009,7 @@ static void runMRIScript() {
ArchiveName = Rest;
break;
case MRICommand::Delete: {
- StringRef Name = sys::path::filename(Rest);
+ StringRef Name = normalizePath(Rest);
llvm::erase_if(NewMembers,
[=](NewArchiveMember &M) { return M.MemberName == Name; });
break;
@@ -951,7 +1044,6 @@ static bool handleGenericOption(StringRef arg) {
static int ar_main(int argc, char **argv) {
SmallVector<const char *, 0> Argv(argv, argv + argc);
- BumpPtrAllocator Alloc;
StringSaver Saver(Alloc);
cl::ExpandResponseFiles(Saver, cl::TokenizeGNUCommandLine, Argv);
for (size_t i = 1; i < Argv.size(); ++i) {
diff --git a/tools/llvm-as/llvm-as.cpp b/tools/llvm-as/llvm-as.cpp
index bb4233aa9ba0..234fef907a38 100644
--- a/tools/llvm-as/llvm-as.cpp
+++ b/tools/llvm-as/llvm-as.cpp
@@ -1,9 +1,8 @@
//===--- llvm-as.cpp - The low-level LLVM assembler -----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -31,38 +30,43 @@
#include <memory>
using namespace llvm;
+cl::OptionCategory AsCat("llvm-as Options");
+
static cl::opt<std::string> InputFilename(cl::Positional,
cl::desc("<input .llvm file>"),
cl::init("-"));
static cl::opt<std::string> OutputFilename("o",
cl::desc("Override output filename"),
- cl::value_desc("filename"));
+ cl::value_desc("filename"),
+ cl::cat(AsCat));
-static cl::opt<bool> Force("f", cl::desc("Enable binary output on terminals"));
+static cl::opt<bool> Force("f", cl::desc("Enable binary output on terminals"),
+ cl::cat(AsCat));
static cl::opt<bool> DisableOutput("disable-output", cl::desc("Disable output"),
- cl::init(false));
+ cl::init(false), cl::cat(AsCat));
static cl::opt<bool> EmitModuleHash("module-hash", cl::desc("Emit module hash"),
- cl::init(false));
+ cl::init(false), cl::cat(AsCat));
static cl::opt<bool> DumpAsm("d", cl::desc("Print assembly as parsed"),
- cl::Hidden);
+ cl::Hidden, cl::cat(AsCat));
static cl::opt<bool>
DisableVerify("disable-verify", cl::Hidden,
- cl::desc("Do not run verifier on input LLVM (dangerous!)"));
+ cl::desc("Do not run verifier on input LLVM (dangerous!)"),
+ cl::cat(AsCat));
static cl::opt<bool> PreserveBitcodeUseListOrder(
"preserve-bc-uselistorder",
cl::desc("Preserve use-list order when writing LLVM bitcode."),
- cl::init(true), cl::Hidden);
+ cl::init(true), cl::Hidden, cl::cat(AsCat));
static cl::opt<std::string> ClDataLayout("data-layout",
cl::desc("data layout string to use"),
cl::value_desc("layout-string"),
- cl::init(""));
+ cl::init(""), cl::cat(AsCat));
static void WriteOutputFile(const Module *M, const ModuleSummaryIndex *Index) {
// Infer the output filename if needed.
@@ -110,6 +114,7 @@ static void WriteOutputFile(const Module *M, const ModuleSummaryIndex *Index) {
int main(int argc, char **argv) {
InitLLVM X(argc, argv);
LLVMContext Context;
+ cl::HideUnrelatedOptions(AsCat);
cl::ParseCommandLineOptions(argc, argv, "llvm .ll -> .bc assembler\n");
// Parse the file now...
diff --git a/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp b/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
index 789a666cb41a..01cba1f6e3c9 100644
--- a/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
+++ b/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
@@ -1,9 +1,8 @@
//===-- llvm-bcanalyzer.cpp - Bitcode Analyzer --------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -27,22 +26,18 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/Bitcode/BitcodeReader.h"
-#include "llvm/Bitcode/BitstreamReader.h"
-#include "llvm/Bitcode/LLVMBitCodes.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/Bitcode/BitcodeAnalyzer.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Format.h"
+#include "llvm/Support/Error.h"
#include "llvm/Support/InitLLVM.h"
-#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/SHA1.h"
-#include "llvm/Support/WithColor.h"
#include "llvm/Support/raw_ostream.h"
+#include <memory>
using namespace llvm;
static cl::opt<std::string>
- InputFilename(cl::Positional, cl::desc("<input bitcode>"), cl::init("-"));
+ InputFilename(cl::Positional, cl::desc("<input bitcode>"), cl::init("-"));
static cl::opt<bool> Dump("dump", cl::desc("Dump low level bitcode trace"));
@@ -53,951 +48,66 @@ static cl::opt<bool> Dump("dump", cl::desc("Dump low level bitcode trace"));
static cl::opt<bool> NoHistogram("disable-histogram",
cl::desc("Do not print per-code histogram"));
-static cl::opt<bool>
-NonSymbolic("non-symbolic",
- cl::desc("Emit numeric info in dump even if"
- " symbolic info is available"));
+static cl::opt<bool> NonSymbolic("non-symbolic",
+ cl::desc("Emit numeric info in dump even if"
+ " symbolic info is available"));
static cl::opt<std::string>
- BlockInfoFilename("block-info",
- cl::desc("Use the BLOCK_INFO from the given file"));
+ BlockInfoFilename("block-info",
+ cl::desc("Use the BLOCK_INFO from the given file"));
static cl::opt<bool>
- ShowBinaryBlobs("show-binary-blobs",
- cl::desc("Print binary blobs using hex escapes"));
+ ShowBinaryBlobs("show-binary-blobs",
+ cl::desc("Print binary blobs using hex escapes"));
static cl::opt<std::string> CheckHash(
"check-hash",
cl::desc("Check module hash using the argument as a string table"));
-namespace {
-
-/// CurStreamTypeType - A type for CurStreamType
-enum CurStreamTypeType {
- UnknownBitstream,
- LLVMIRBitstream,
- ClangSerializedASTBitstream,
- ClangSerializedDiagnosticsBitstream,
-};
-
-}
-
-/// GetBlockName - Return a symbolic block name if known, otherwise return
-/// null.
-static const char *GetBlockName(unsigned BlockID,
- const BitstreamBlockInfo &BlockInfo,
- CurStreamTypeType CurStreamType) {
- // Standard blocks for all bitcode files.
- if (BlockID < bitc::FIRST_APPLICATION_BLOCKID) {
- if (BlockID == bitc::BLOCKINFO_BLOCK_ID)
- return "BLOCKINFO_BLOCK";
- return nullptr;
- }
-
- // Check to see if we have a blockinfo record for this block, with a name.
- if (const BitstreamBlockInfo::BlockInfo *Info =
- BlockInfo.getBlockInfo(BlockID)) {
- if (!Info->Name.empty())
- return Info->Name.c_str();
- }
-
-
- if (CurStreamType != LLVMIRBitstream) return nullptr;
-
- switch (BlockID) {
- default: return nullptr;
- case bitc::OPERAND_BUNDLE_TAGS_BLOCK_ID: return "OPERAND_BUNDLE_TAGS_BLOCK";
- case bitc::MODULE_BLOCK_ID: return "MODULE_BLOCK";
- case bitc::PARAMATTR_BLOCK_ID: return "PARAMATTR_BLOCK";
- case bitc::PARAMATTR_GROUP_BLOCK_ID: return "PARAMATTR_GROUP_BLOCK_ID";
- case bitc::TYPE_BLOCK_ID_NEW: return "TYPE_BLOCK_ID";
- case bitc::CONSTANTS_BLOCK_ID: return "CONSTANTS_BLOCK";
- case bitc::FUNCTION_BLOCK_ID: return "FUNCTION_BLOCK";
- case bitc::IDENTIFICATION_BLOCK_ID:
- return "IDENTIFICATION_BLOCK_ID";
- case bitc::VALUE_SYMTAB_BLOCK_ID: return "VALUE_SYMTAB";
- case bitc::METADATA_BLOCK_ID: return "METADATA_BLOCK";
- case bitc::METADATA_KIND_BLOCK_ID: return "METADATA_KIND_BLOCK";
- case bitc::METADATA_ATTACHMENT_ID: return "METADATA_ATTACHMENT_BLOCK";
- case bitc::USELIST_BLOCK_ID: return "USELIST_BLOCK_ID";
- case bitc::GLOBALVAL_SUMMARY_BLOCK_ID:
- return "GLOBALVAL_SUMMARY_BLOCK";
- case bitc::FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID:
- return "FULL_LTO_GLOBALVAL_SUMMARY_BLOCK";
- case bitc::MODULE_STRTAB_BLOCK_ID: return "MODULE_STRTAB_BLOCK";
- case bitc::STRTAB_BLOCK_ID: return "STRTAB_BLOCK";
- case bitc::SYMTAB_BLOCK_ID: return "SYMTAB_BLOCK";
- }
-}
-
-/// GetCodeName - Return a symbolic code name if known, otherwise return
-/// null.
-static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
- const BitstreamBlockInfo &BlockInfo,
- CurStreamTypeType CurStreamType) {
- // Standard blocks for all bitcode files.
- if (BlockID < bitc::FIRST_APPLICATION_BLOCKID) {
- if (BlockID == bitc::BLOCKINFO_BLOCK_ID) {
- switch (CodeID) {
- default: return nullptr;
- case bitc::BLOCKINFO_CODE_SETBID: return "SETBID";
- case bitc::BLOCKINFO_CODE_BLOCKNAME: return "BLOCKNAME";
- case bitc::BLOCKINFO_CODE_SETRECORDNAME: return "SETRECORDNAME";
- }
- }
- return nullptr;
- }
-
- // Check to see if we have a blockinfo record for this record, with a name.
- if (const BitstreamBlockInfo::BlockInfo *Info =
- BlockInfo.getBlockInfo(BlockID)) {
- for (unsigned i = 0, e = Info->RecordNames.size(); i != e; ++i)
- if (Info->RecordNames[i].first == CodeID)
- return Info->RecordNames[i].second.c_str();
- }
-
-
- if (CurStreamType != LLVMIRBitstream) return nullptr;
-
-#define STRINGIFY_CODE(PREFIX, CODE) \
- case bitc::PREFIX##_##CODE: \
- return #CODE;
- switch (BlockID) {
- default: return nullptr;
- case bitc::MODULE_BLOCK_ID:
- switch (CodeID) {
- default: return nullptr;
- STRINGIFY_CODE(MODULE_CODE, VERSION)
- STRINGIFY_CODE(MODULE_CODE, TRIPLE)
- STRINGIFY_CODE(MODULE_CODE, DATALAYOUT)
- STRINGIFY_CODE(MODULE_CODE, ASM)
- STRINGIFY_CODE(MODULE_CODE, SECTIONNAME)
- STRINGIFY_CODE(MODULE_CODE, DEPLIB) // FIXME: Remove in 4.0
- STRINGIFY_CODE(MODULE_CODE, GLOBALVAR)
- STRINGIFY_CODE(MODULE_CODE, FUNCTION)
- STRINGIFY_CODE(MODULE_CODE, ALIAS)
- STRINGIFY_CODE(MODULE_CODE, GCNAME)
- STRINGIFY_CODE(MODULE_CODE, VSTOFFSET)
- STRINGIFY_CODE(MODULE_CODE, METADATA_VALUES_UNUSED)
- STRINGIFY_CODE(MODULE_CODE, SOURCE_FILENAME)
- STRINGIFY_CODE(MODULE_CODE, HASH)
- }
- case bitc::IDENTIFICATION_BLOCK_ID:
- switch (CodeID) {
- default:
- return nullptr;
- STRINGIFY_CODE(IDENTIFICATION_CODE, STRING)
- STRINGIFY_CODE(IDENTIFICATION_CODE, EPOCH)
- }
- case bitc::PARAMATTR_BLOCK_ID:
- switch (CodeID) {
- default: return nullptr;
- // FIXME: Should these be different?
- case bitc::PARAMATTR_CODE_ENTRY_OLD: return "ENTRY";
- case bitc::PARAMATTR_CODE_ENTRY: return "ENTRY";
- }
- case bitc::PARAMATTR_GROUP_BLOCK_ID:
- switch (CodeID) {
- default: return nullptr;
- case bitc::PARAMATTR_GRP_CODE_ENTRY: return "ENTRY";
- }
- case bitc::TYPE_BLOCK_ID_NEW:
- switch (CodeID) {
- default: return nullptr;
- STRINGIFY_CODE(TYPE_CODE, NUMENTRY)
- STRINGIFY_CODE(TYPE_CODE, VOID)
- STRINGIFY_CODE(TYPE_CODE, FLOAT)
- STRINGIFY_CODE(TYPE_CODE, DOUBLE)
- STRINGIFY_CODE(TYPE_CODE, LABEL)
- STRINGIFY_CODE(TYPE_CODE, OPAQUE)
- STRINGIFY_CODE(TYPE_CODE, INTEGER)
- STRINGIFY_CODE(TYPE_CODE, POINTER)
- STRINGIFY_CODE(TYPE_CODE, ARRAY)
- STRINGIFY_CODE(TYPE_CODE, VECTOR)
- STRINGIFY_CODE(TYPE_CODE, X86_FP80)
- STRINGIFY_CODE(TYPE_CODE, FP128)
- STRINGIFY_CODE(TYPE_CODE, PPC_FP128)
- STRINGIFY_CODE(TYPE_CODE, METADATA)
- STRINGIFY_CODE(TYPE_CODE, STRUCT_ANON)
- STRINGIFY_CODE(TYPE_CODE, STRUCT_NAME)
- STRINGIFY_CODE(TYPE_CODE, STRUCT_NAMED)
- STRINGIFY_CODE(TYPE_CODE, FUNCTION)
- }
-
- case bitc::CONSTANTS_BLOCK_ID:
- switch (CodeID) {
- default: return nullptr;
- STRINGIFY_CODE(CST_CODE, SETTYPE)
- STRINGIFY_CODE(CST_CODE, NULL)
- STRINGIFY_CODE(CST_CODE, UNDEF)
- STRINGIFY_CODE(CST_CODE, INTEGER)
- STRINGIFY_CODE(CST_CODE, WIDE_INTEGER)
- STRINGIFY_CODE(CST_CODE, FLOAT)
- STRINGIFY_CODE(CST_CODE, AGGREGATE)
- STRINGIFY_CODE(CST_CODE, STRING)
- STRINGIFY_CODE(CST_CODE, CSTRING)
- STRINGIFY_CODE(CST_CODE, CE_BINOP)
- STRINGIFY_CODE(CST_CODE, CE_CAST)
- STRINGIFY_CODE(CST_CODE, CE_GEP)
- STRINGIFY_CODE(CST_CODE, CE_INBOUNDS_GEP)
- STRINGIFY_CODE(CST_CODE, CE_SELECT)
- STRINGIFY_CODE(CST_CODE, CE_EXTRACTELT)
- STRINGIFY_CODE(CST_CODE, CE_INSERTELT)
- STRINGIFY_CODE(CST_CODE, CE_SHUFFLEVEC)
- STRINGIFY_CODE(CST_CODE, CE_CMP)
- STRINGIFY_CODE(CST_CODE, INLINEASM)
- STRINGIFY_CODE(CST_CODE, CE_SHUFVEC_EX)
- STRINGIFY_CODE(CST_CODE, CE_UNOP)
- case bitc::CST_CODE_BLOCKADDRESS: return "CST_CODE_BLOCKADDRESS";
- STRINGIFY_CODE(CST_CODE, DATA)
- }
- case bitc::FUNCTION_BLOCK_ID:
- switch (CodeID) {
- default: return nullptr;
- STRINGIFY_CODE(FUNC_CODE, DECLAREBLOCKS)
- STRINGIFY_CODE(FUNC_CODE, INST_BINOP)
- STRINGIFY_CODE(FUNC_CODE, INST_CAST)
- STRINGIFY_CODE(FUNC_CODE, INST_GEP_OLD)
- STRINGIFY_CODE(FUNC_CODE, INST_INBOUNDS_GEP_OLD)
- STRINGIFY_CODE(FUNC_CODE, INST_SELECT)
- STRINGIFY_CODE(FUNC_CODE, INST_EXTRACTELT)
- STRINGIFY_CODE(FUNC_CODE, INST_INSERTELT)
- STRINGIFY_CODE(FUNC_CODE, INST_SHUFFLEVEC)
- STRINGIFY_CODE(FUNC_CODE, INST_CMP)
- STRINGIFY_CODE(FUNC_CODE, INST_RET)
- STRINGIFY_CODE(FUNC_CODE, INST_BR)
- STRINGIFY_CODE(FUNC_CODE, INST_SWITCH)
- STRINGIFY_CODE(FUNC_CODE, INST_INVOKE)
- STRINGIFY_CODE(FUNC_CODE, INST_UNOP)
- STRINGIFY_CODE(FUNC_CODE, INST_UNREACHABLE)
- STRINGIFY_CODE(FUNC_CODE, INST_CLEANUPRET)
- STRINGIFY_CODE(FUNC_CODE, INST_CATCHRET)
- STRINGIFY_CODE(FUNC_CODE, INST_CATCHPAD)
- STRINGIFY_CODE(FUNC_CODE, INST_PHI)
- STRINGIFY_CODE(FUNC_CODE, INST_ALLOCA)
- STRINGIFY_CODE(FUNC_CODE, INST_LOAD)
- STRINGIFY_CODE(FUNC_CODE, INST_VAARG)
- STRINGIFY_CODE(FUNC_CODE, INST_STORE)
- STRINGIFY_CODE(FUNC_CODE, INST_EXTRACTVAL)
- STRINGIFY_CODE(FUNC_CODE, INST_INSERTVAL)
- STRINGIFY_CODE(FUNC_CODE, INST_CMP2)
- STRINGIFY_CODE(FUNC_CODE, INST_VSELECT)
- STRINGIFY_CODE(FUNC_CODE, DEBUG_LOC_AGAIN)
- STRINGIFY_CODE(FUNC_CODE, INST_CALL)
- STRINGIFY_CODE(FUNC_CODE, DEBUG_LOC)
- STRINGIFY_CODE(FUNC_CODE, INST_GEP)
- STRINGIFY_CODE(FUNC_CODE, OPERAND_BUNDLE)
- STRINGIFY_CODE(FUNC_CODE, INST_FENCE)
- STRINGIFY_CODE(FUNC_CODE, INST_ATOMICRMW)
- STRINGIFY_CODE(FUNC_CODE, INST_LOADATOMIC)
- STRINGIFY_CODE(FUNC_CODE, INST_STOREATOMIC)
- STRINGIFY_CODE(FUNC_CODE, INST_CMPXCHG)
- }
- case bitc::VALUE_SYMTAB_BLOCK_ID:
- switch (CodeID) {
- default: return nullptr;
- STRINGIFY_CODE(VST_CODE, ENTRY)
- STRINGIFY_CODE(VST_CODE, BBENTRY)
- STRINGIFY_CODE(VST_CODE, FNENTRY)
- STRINGIFY_CODE(VST_CODE, COMBINED_ENTRY)
- }
- case bitc::MODULE_STRTAB_BLOCK_ID:
- switch (CodeID) {
- default:
- return nullptr;
- STRINGIFY_CODE(MST_CODE, ENTRY)
- STRINGIFY_CODE(MST_CODE, HASH)
- }
- case bitc::GLOBALVAL_SUMMARY_BLOCK_ID:
- case bitc::FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID:
- switch (CodeID) {
- default:
- return nullptr;
- STRINGIFY_CODE(FS, PERMODULE)
- STRINGIFY_CODE(FS, PERMODULE_PROFILE)
- STRINGIFY_CODE(FS, PERMODULE_RELBF)
- STRINGIFY_CODE(FS, PERMODULE_GLOBALVAR_INIT_REFS)
- STRINGIFY_CODE(FS, COMBINED)
- STRINGIFY_CODE(FS, COMBINED_PROFILE)
- STRINGIFY_CODE(FS, COMBINED_GLOBALVAR_INIT_REFS)
- STRINGIFY_CODE(FS, ALIAS)
- STRINGIFY_CODE(FS, COMBINED_ALIAS)
- STRINGIFY_CODE(FS, COMBINED_ORIGINAL_NAME)
- STRINGIFY_CODE(FS, VERSION)
- STRINGIFY_CODE(FS, FLAGS)
- STRINGIFY_CODE(FS, TYPE_TESTS)
- STRINGIFY_CODE(FS, TYPE_TEST_ASSUME_VCALLS)
- STRINGIFY_CODE(FS, TYPE_CHECKED_LOAD_VCALLS)
- STRINGIFY_CODE(FS, TYPE_TEST_ASSUME_CONST_VCALL)
- STRINGIFY_CODE(FS, TYPE_CHECKED_LOAD_CONST_VCALL)
- STRINGIFY_CODE(FS, VALUE_GUID)
- STRINGIFY_CODE(FS, CFI_FUNCTION_DEFS)
- STRINGIFY_CODE(FS, CFI_FUNCTION_DECLS)
- STRINGIFY_CODE(FS, TYPE_ID)
- }
- case bitc::METADATA_ATTACHMENT_ID:
- switch(CodeID) {
- default:return nullptr;
- STRINGIFY_CODE(METADATA, ATTACHMENT)
- }
- case bitc::METADATA_BLOCK_ID:
- switch(CodeID) {
- default:return nullptr;
- STRINGIFY_CODE(METADATA, STRING_OLD)
- STRINGIFY_CODE(METADATA, VALUE)
- STRINGIFY_CODE(METADATA, NODE)
- STRINGIFY_CODE(METADATA, NAME)
- STRINGIFY_CODE(METADATA, DISTINCT_NODE)
- STRINGIFY_CODE(METADATA, KIND) // Older bitcode has it in a MODULE_BLOCK
- STRINGIFY_CODE(METADATA, LOCATION)
- STRINGIFY_CODE(METADATA, OLD_NODE)
- STRINGIFY_CODE(METADATA, OLD_FN_NODE)
- STRINGIFY_CODE(METADATA, NAMED_NODE)
- STRINGIFY_CODE(METADATA, GENERIC_DEBUG)
- STRINGIFY_CODE(METADATA, SUBRANGE)
- STRINGIFY_CODE(METADATA, ENUMERATOR)
- STRINGIFY_CODE(METADATA, BASIC_TYPE)
- STRINGIFY_CODE(METADATA, FILE)
- STRINGIFY_CODE(METADATA, DERIVED_TYPE)
- STRINGIFY_CODE(METADATA, COMPOSITE_TYPE)
- STRINGIFY_CODE(METADATA, SUBROUTINE_TYPE)
- STRINGIFY_CODE(METADATA, COMPILE_UNIT)
- STRINGIFY_CODE(METADATA, SUBPROGRAM)
- STRINGIFY_CODE(METADATA, LEXICAL_BLOCK)
- STRINGIFY_CODE(METADATA, LEXICAL_BLOCK_FILE)
- STRINGIFY_CODE(METADATA, NAMESPACE)
- STRINGIFY_CODE(METADATA, TEMPLATE_TYPE)
- STRINGIFY_CODE(METADATA, TEMPLATE_VALUE)
- STRINGIFY_CODE(METADATA, GLOBAL_VAR)
- STRINGIFY_CODE(METADATA, LOCAL_VAR)
- STRINGIFY_CODE(METADATA, EXPRESSION)
- STRINGIFY_CODE(METADATA, OBJC_PROPERTY)
- STRINGIFY_CODE(METADATA, IMPORTED_ENTITY)
- STRINGIFY_CODE(METADATA, MODULE)
- STRINGIFY_CODE(METADATA, MACRO)
- STRINGIFY_CODE(METADATA, MACRO_FILE)
- STRINGIFY_CODE(METADATA, STRINGS)
- STRINGIFY_CODE(METADATA, GLOBAL_DECL_ATTACHMENT)
- STRINGIFY_CODE(METADATA, GLOBAL_VAR_EXPR)
- STRINGIFY_CODE(METADATA, INDEX_OFFSET)
- STRINGIFY_CODE(METADATA, INDEX)
- }
- case bitc::METADATA_KIND_BLOCK_ID:
- switch (CodeID) {
- default:
- return nullptr;
- STRINGIFY_CODE(METADATA, KIND)
- }
- case bitc::USELIST_BLOCK_ID:
- switch(CodeID) {
- default:return nullptr;
- case bitc::USELIST_CODE_DEFAULT: return "USELIST_CODE_DEFAULT";
- case bitc::USELIST_CODE_BB: return "USELIST_CODE_BB";
- }
-
- case bitc::OPERAND_BUNDLE_TAGS_BLOCK_ID:
- switch(CodeID) {
- default: return nullptr;
- case bitc::OPERAND_BUNDLE_TAG: return "OPERAND_BUNDLE_TAG";
- }
- case bitc::STRTAB_BLOCK_ID:
- switch(CodeID) {
- default: return nullptr;
- case bitc::STRTAB_BLOB: return "BLOB";
- }
- case bitc::SYMTAB_BLOCK_ID:
- switch(CodeID) {
- default: return nullptr;
- case bitc::SYMTAB_BLOB: return "BLOB";
- }
- }
-#undef STRINGIFY_CODE
-}
-
-struct PerRecordStats {
- unsigned NumInstances;
- unsigned NumAbbrev;
- uint64_t TotalBits;
-
- PerRecordStats() : NumInstances(0), NumAbbrev(0), TotalBits(0) {}
-};
-
-struct PerBlockIDStats {
- /// NumInstances - This the number of times this block ID has been seen.
- unsigned NumInstances;
-
- /// NumBits - The total size in bits of all of these blocks.
- uint64_t NumBits;
-
- /// NumSubBlocks - The total number of blocks these blocks contain.
- unsigned NumSubBlocks;
-
- /// NumAbbrevs - The total number of abbreviations.
- unsigned NumAbbrevs;
-
- /// NumRecords - The total number of records these blocks contain, and the
- /// number that are abbreviated.
- unsigned NumRecords, NumAbbreviatedRecords;
-
- /// CodeFreq - Keep track of the number of times we see each code.
- std::vector<PerRecordStats> CodeFreq;
-
- PerBlockIDStats()
- : NumInstances(0), NumBits(0),
- NumSubBlocks(0), NumAbbrevs(0), NumRecords(0), NumAbbreviatedRecords(0) {}
-};
-
-static std::map<unsigned, PerBlockIDStats> BlockIDStats;
-
-
-
-/// ReportError - All bitcode analysis errors go through this function, making this a
-/// good place to breakpoint if debugging.
-static bool ReportError(const Twine &Err) {
- WithColor::error() << Err << "\n";
- return true;
-}
-
-static bool decodeMetadataStringsBlob(StringRef Indent,
- ArrayRef<uint64_t> Record,
- StringRef Blob) {
- if (Blob.empty())
- return true;
-
- if (Record.size() != 2)
- return true;
-
- unsigned NumStrings = Record[0];
- unsigned StringsOffset = Record[1];
- outs() << " num-strings = " << NumStrings << " {\n";
-
- StringRef Lengths = Blob.slice(0, StringsOffset);
- SimpleBitstreamCursor R(Lengths);
- StringRef Strings = Blob.drop_front(StringsOffset);
- do {
- if (R.AtEndOfStream())
- return ReportError("bad length");
-
- unsigned Size = R.ReadVBR(6);
- if (Strings.size() < Size)
- return ReportError("truncated chars");
-
- outs() << Indent << " '";
- outs().write_escaped(Strings.slice(0, Size), /*hex=*/true);
- outs() << "'\n";
- Strings = Strings.drop_front(Size);
- } while (--NumStrings);
-
- outs() << Indent << " }";
- return false;
-}
-
-static bool decodeBlob(unsigned Code, unsigned BlockID, StringRef Indent,
- ArrayRef<uint64_t> Record, StringRef Blob) {
- if (BlockID != bitc::METADATA_BLOCK_ID)
- return true;
- if (Code != bitc::METADATA_STRINGS)
- return true;
-
- return decodeMetadataStringsBlob(Indent, Record, Blob);
-}
-
-/// ParseBlock - Read a block, updating statistics, etc.
-static bool ParseBlock(BitstreamCursor &Stream, BitstreamBlockInfo &BlockInfo,
- unsigned BlockID, unsigned IndentLevel,
- CurStreamTypeType CurStreamType) {
- std::string Indent(IndentLevel*2, ' ');
- uint64_t BlockBitStart = Stream.GetCurrentBitNo();
-
- // Get the statistics for this BlockID.
- PerBlockIDStats &BlockStats = BlockIDStats[BlockID];
-
- BlockStats.NumInstances++;
-
- // BLOCKINFO is a special part of the stream.
- bool DumpRecords = Dump;
- if (BlockID == bitc::BLOCKINFO_BLOCK_ID) {
- if (Dump) outs() << Indent << "<BLOCKINFO_BLOCK/>\n";
- Optional<BitstreamBlockInfo> NewBlockInfo =
- Stream.ReadBlockInfoBlock(/*ReadBlockInfoNames=*/true);
- if (!NewBlockInfo)
- return ReportError("Malformed BlockInfoBlock");
- BlockInfo = std::move(*NewBlockInfo);
- Stream.JumpToBit(BlockBitStart);
- // It's not really interesting to dump the contents of the blockinfo block.
- DumpRecords = false;
- }
-
- unsigned NumWords = 0;
- if (Stream.EnterSubBlock(BlockID, &NumWords))
- return ReportError("Malformed block record");
-
- // Keep it for later, when we see a MODULE_HASH record
- uint64_t BlockEntryPos = Stream.getCurrentByteNo();
-
- const char *BlockName = nullptr;
- if (DumpRecords) {
- outs() << Indent << "<";
- if ((BlockName = GetBlockName(BlockID, BlockInfo, CurStreamType)))
- outs() << BlockName;
- else
- outs() << "UnknownBlock" << BlockID;
-
- if (NonSymbolic && BlockName)
- outs() << " BlockID=" << BlockID;
-
- outs() << " NumWords=" << NumWords
- << " BlockCodeSize=" << Stream.getAbbrevIDWidth() << ">\n";
- }
-
- SmallVector<uint64_t, 64> Record;
-
- // Keep the offset to the metadata index if seen.
- uint64_t MetadataIndexOffset = 0;
-
- // Read all the records for this block.
- while (1) {
- if (Stream.AtEndOfStream())
- return ReportError("Premature end of bitstream");
-
- uint64_t RecordStartBit = Stream.GetCurrentBitNo();
-
- BitstreamEntry Entry =
- Stream.advance(BitstreamCursor::AF_DontAutoprocessAbbrevs);
-
- switch (Entry.Kind) {
- case BitstreamEntry::Error:
- return ReportError("malformed bitcode file");
- case BitstreamEntry::EndBlock: {
- uint64_t BlockBitEnd = Stream.GetCurrentBitNo();
- BlockStats.NumBits += BlockBitEnd-BlockBitStart;
- if (DumpRecords) {
- outs() << Indent << "</";
- if (BlockName)
- outs() << BlockName << ">\n";
- else
- outs() << "UnknownBlock" << BlockID << ">\n";
- }
- return false;
- }
-
- case BitstreamEntry::SubBlock: {
- uint64_t SubBlockBitStart = Stream.GetCurrentBitNo();
- if (ParseBlock(Stream, BlockInfo, Entry.ID, IndentLevel + 1,
- CurStreamType))
- return true;
- ++BlockStats.NumSubBlocks;
- uint64_t SubBlockBitEnd = Stream.GetCurrentBitNo();
-
- // Don't include subblock sizes in the size of this block.
- BlockBitStart += SubBlockBitEnd-SubBlockBitStart;
- continue;
- }
- case BitstreamEntry::Record:
- // The interesting case.
- break;
- }
-
- if (Entry.ID == bitc::DEFINE_ABBREV) {
- Stream.ReadAbbrevRecord();
- ++BlockStats.NumAbbrevs;
- continue;
- }
-
- Record.clear();
-
- ++BlockStats.NumRecords;
-
- StringRef Blob;
- uint64_t CurrentRecordPos = Stream.GetCurrentBitNo();
- unsigned Code = Stream.readRecord(Entry.ID, Record, &Blob);
-
- // Increment the # occurrences of this code.
- if (BlockStats.CodeFreq.size() <= Code)
- BlockStats.CodeFreq.resize(Code+1);
- BlockStats.CodeFreq[Code].NumInstances++;
- BlockStats.CodeFreq[Code].TotalBits +=
- Stream.GetCurrentBitNo()-RecordStartBit;
- if (Entry.ID != bitc::UNABBREV_RECORD) {
- BlockStats.CodeFreq[Code].NumAbbrev++;
- ++BlockStats.NumAbbreviatedRecords;
- }
-
- if (DumpRecords) {
- outs() << Indent << " <";
- if (const char *CodeName =
- GetCodeName(Code, BlockID, BlockInfo, CurStreamType))
- outs() << CodeName;
- else
- outs() << "UnknownCode" << Code;
- if (NonSymbolic && GetCodeName(Code, BlockID, BlockInfo, CurStreamType))
- outs() << " codeid=" << Code;
- const BitCodeAbbrev *Abbv = nullptr;
- if (Entry.ID != bitc::UNABBREV_RECORD) {
- Abbv = Stream.getAbbrev(Entry.ID);
- outs() << " abbrevid=" << Entry.ID;
- }
-
- for (unsigned i = 0, e = Record.size(); i != e; ++i)
- outs() << " op" << i << "=" << (int64_t)Record[i];
-
- // If we found a metadata index, let's verify that we had an offset before
- // and validate its forward reference offset was correct!
- if (BlockID == bitc::METADATA_BLOCK_ID) {
- if (Code == bitc::METADATA_INDEX_OFFSET) {
- if (Record.size() != 2)
- outs() << "(Invalid record)";
- else {
- auto Offset = Record[0] + (Record[1] << 32);
- MetadataIndexOffset = Stream.GetCurrentBitNo() + Offset;
- }
- }
- if (Code == bitc::METADATA_INDEX) {
- outs() << " (offset ";
- if (MetadataIndexOffset == RecordStartBit)
- outs() << "match)";
- else
- outs() << "mismatch: " << MetadataIndexOffset << " vs "
- << RecordStartBit << ")";
- }
- }
-
- // If we found a module hash, let's verify that it matches!
- if (BlockID == bitc::MODULE_BLOCK_ID && Code == bitc::MODULE_CODE_HASH &&
- !CheckHash.empty()) {
- if (Record.size() != 5)
- outs() << " (invalid)";
- else {
- // Recompute the hash and compare it to the one in the bitcode
- SHA1 Hasher;
- StringRef Hash;
- Hasher.update(CheckHash);
- {
- int BlockSize = (CurrentRecordPos / 8) - BlockEntryPos;
- auto Ptr = Stream.getPointerToByte(BlockEntryPos, BlockSize);
- Hasher.update(ArrayRef<uint8_t>(Ptr, BlockSize));
- Hash = Hasher.result();
- }
- SmallString<20> RecordedHash;
- RecordedHash.resize(20);
- int Pos = 0;
- for (auto &Val : Record) {
- assert(!(Val >> 32) && "Unexpected high bits set");
- RecordedHash[Pos++] = (Val >> 24) & 0xFF;
- RecordedHash[Pos++] = (Val >> 16) & 0xFF;
- RecordedHash[Pos++] = (Val >> 8) & 0xFF;
- RecordedHash[Pos++] = (Val >> 0) & 0xFF;
- }
- if (Hash == RecordedHash)
- outs() << " (match)";
- else
- outs() << " (!mismatch!)";
- }
- }
-
- outs() << "/>";
-
- if (Abbv) {
- for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i != e; ++i) {
- const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
- if (!Op.isEncoding() || Op.getEncoding() != BitCodeAbbrevOp::Array)
- continue;
- assert(i + 2 == e && "Array op not second to last");
- std::string Str;
- bool ArrayIsPrintable = true;
- for (unsigned j = i - 1, je = Record.size(); j != je; ++j) {
- if (!isPrint(static_cast<unsigned char>(Record[j]))) {
- ArrayIsPrintable = false;
- break;
- }
- Str += (char)Record[j];
- }
- if (ArrayIsPrintable)
- outs() << " record string = '" << Str << "'";
- break;
- }
- }
-
- if (Blob.data() && decodeBlob(Code, BlockID, Indent, Record, Blob)) {
- outs() << " blob data = ";
- if (ShowBinaryBlobs) {
- outs() << "'";
- outs().write_escaped(Blob, /*hex=*/true) << "'";
- } else {
- bool BlobIsPrintable = true;
- for (unsigned i = 0, e = Blob.size(); i != e; ++i)
- if (!isPrint(static_cast<unsigned char>(Blob[i]))) {
- BlobIsPrintable = false;
- break;
- }
-
- if (BlobIsPrintable)
- outs() << "'" << Blob << "'";
- else
- outs() << "unprintable, " << Blob.size() << " bytes.";
- }
- }
-
- outs() << "\n";
- }
-
- // Make sure that we can skip the current record.
- Stream.JumpToBit(CurrentRecordPos);
- Stream.skipRecord(Entry.ID);
- }
-}
-
-static void PrintSize(double Bits) {
- outs() << format("%.2f/%.2fB/%luW", Bits, Bits/8,(unsigned long)(Bits/32));
-}
-static void PrintSize(uint64_t Bits) {
- outs() << format("%lub/%.2fB/%luW", (unsigned long)Bits,
- (double)Bits/8, (unsigned long)(Bits/32));
+static Error reportError(StringRef Message) {
+ return createStringError(std::errc::illegal_byte_sequence, Message.data());
}
-static CurStreamTypeType ReadSignature(BitstreamCursor &Stream) {
- char Signature[6];
- Signature[0] = Stream.Read(8);
- Signature[1] = Stream.Read(8);
-
- // Autodetect the file contents, if it is one we know.
- if (Signature[0] == 'C' && Signature[1] == 'P') {
- Signature[2] = Stream.Read(8);
- Signature[3] = Stream.Read(8);
- if (Signature[2] == 'C' && Signature[3] == 'H')
- return ClangSerializedASTBitstream;
- } else if (Signature[0] == 'D' && Signature[1] == 'I') {
- Signature[2] = Stream.Read(8);
- Signature[3] = Stream.Read(8);
- if (Signature[2] == 'A' && Signature[3] == 'G')
- return ClangSerializedDiagnosticsBitstream;
- } else {
- Signature[2] = Stream.Read(4);
- Signature[3] = Stream.Read(4);
- Signature[4] = Stream.Read(4);
- Signature[5] = Stream.Read(4);
- if (Signature[0] == 'B' && Signature[1] == 'C' &&
- Signature[2] == 0x0 && Signature[3] == 0xC &&
- Signature[4] == 0xE && Signature[5] == 0xD)
- return LLVMIRBitstream;
- }
- return UnknownBitstream;
-}
-
-static bool openBitcodeFile(StringRef Path,
- std::unique_ptr<MemoryBuffer> &MemBuf,
- BitstreamCursor &Stream,
- CurStreamTypeType &CurStreamType) {
+static Expected<std::unique_ptr<MemoryBuffer>> openBitcodeFile(StringRef Path) {
// Read the input file.
- ErrorOr<std::unique_ptr<MemoryBuffer>> MemBufOrErr =
- MemoryBuffer::getFileOrSTDIN(Path);
- if (std::error_code EC = MemBufOrErr.getError())
- return ReportError(Twine("ReportError reading '") + Path + "': " + EC.message());
- MemBuf = std::move(MemBufOrErr.get());
-
- if (MemBuf->getBufferSize() & 3)
- return ReportError("Bitcode stream should be a multiple of 4 bytes in length");
-
- const unsigned char *BufPtr = (const unsigned char *)MemBuf->getBufferStart();
- const unsigned char *EndBufPtr = BufPtr + MemBuf->getBufferSize();
+ Expected<std::unique_ptr<MemoryBuffer>> MemBufOrErr =
+ errorOrToExpected(MemoryBuffer::getFileOrSTDIN(Path));
+ if (Error E = MemBufOrErr.takeError())
+ return std::move(E);
- // If we have a wrapper header, parse it and ignore the non-bc file contents.
- // The magic number is 0x0B17C0DE stored in little endian.
- if (isBitcodeWrapper(BufPtr, EndBufPtr)) {
- if (MemBuf->getBufferSize() < BWH_HeaderSize)
- return ReportError("Invalid bitcode wrapper header");
+ std::unique_ptr<MemoryBuffer> MemBuf = std::move(*MemBufOrErr);
- if (Dump) {
- unsigned Magic = support::endian::read32le(&BufPtr[BWH_MagicField]);
- unsigned Version = support::endian::read32le(&BufPtr[BWH_VersionField]);
- unsigned Offset = support::endian::read32le(&BufPtr[BWH_OffsetField]);
- unsigned Size = support::endian::read32le(&BufPtr[BWH_SizeField]);
- unsigned CPUType = support::endian::read32le(&BufPtr[BWH_CPUTypeField]);
-
- outs() << "<BITCODE_WRAPPER_HEADER"
- << " Magic=" << format_hex(Magic, 10)
- << " Version=" << format_hex(Version, 10)
- << " Offset=" << format_hex(Offset, 10)
- << " Size=" << format_hex(Size, 10)
- << " CPUType=" << format_hex(CPUType, 10) << "/>\n";
- }
-
- if (SkipBitcodeWrapperHeader(BufPtr, EndBufPtr, true))
- return ReportError("Invalid bitcode wrapper header");
- }
-
- Stream = BitstreamCursor(ArrayRef<uint8_t>(BufPtr, EndBufPtr));
- CurStreamType = ReadSignature(Stream);
-
- return false;
+ if (MemBuf->getBufferSize() & 3)
+ return reportError(
+ "Bitcode stream should be a multiple of 4 bytes in length");
+ return std::move(MemBuf);
}
-/// AnalyzeBitcode - Analyze the bitcode file specified by InputFilename.
-static int AnalyzeBitcode() {
- std::unique_ptr<MemoryBuffer> StreamBuffer;
- BitstreamCursor Stream;
- BitstreamBlockInfo BlockInfo;
- CurStreamTypeType CurStreamType;
- if (openBitcodeFile(InputFilename, StreamBuffer, Stream, CurStreamType))
- return true;
- Stream.setBlockInfo(&BlockInfo);
-
- // Read block info from BlockInfoFilename, if specified.
- // The block info must be a top-level block.
- if (!BlockInfoFilename.empty()) {
- std::unique_ptr<MemoryBuffer> BlockInfoBuffer;
- BitstreamCursor BlockInfoCursor;
- CurStreamTypeType BlockInfoStreamType;
- if (openBitcodeFile(BlockInfoFilename, BlockInfoBuffer, BlockInfoCursor,
- BlockInfoStreamType))
- return true;
-
- while (!BlockInfoCursor.AtEndOfStream()) {
- unsigned Code = BlockInfoCursor.ReadCode();
- if (Code != bitc::ENTER_SUBBLOCK)
- return ReportError("Invalid record at top-level in block info file");
-
- unsigned BlockID = BlockInfoCursor.ReadSubBlockID();
- if (BlockID == bitc::BLOCKINFO_BLOCK_ID) {
- Optional<BitstreamBlockInfo> NewBlockInfo =
- BlockInfoCursor.ReadBlockInfoBlock(/*ReadBlockInfoNames=*/true);
- if (!NewBlockInfo)
- return ReportError("Malformed BlockInfoBlock in block info file");
- BlockInfo = std::move(*NewBlockInfo);
- break;
- }
-
- BlockInfoCursor.SkipBlock();
- }
- }
-
- unsigned NumTopBlocks = 0;
-
- // Parse the top-level structure. We only allow blocks at the top-level.
- while (!Stream.AtEndOfStream()) {
- unsigned Code = Stream.ReadCode();
- if (Code != bitc::ENTER_SUBBLOCK)
- return ReportError("Invalid record at top-level");
-
- unsigned BlockID = Stream.ReadSubBlockID();
-
- if (ParseBlock(Stream, BlockInfo, BlockID, 0, CurStreamType))
- return true;
- ++NumTopBlocks;
- }
-
- if (Dump) outs() << "\n\n";
-
- uint64_t BufferSizeBits = Stream.getBitcodeBytes().size() * CHAR_BIT;
- // Print a summary of the read file.
- outs() << "Summary of " << InputFilename << ":\n";
- outs() << " Total size: ";
- PrintSize(BufferSizeBits);
- outs() << "\n";
- outs() << " Stream type: ";
- switch (CurStreamType) {
- case UnknownBitstream:
- outs() << "unknown\n";
- break;
- case LLVMIRBitstream:
- outs() << "LLVM IR\n";
- break;
- case ClangSerializedASTBitstream:
- outs() << "Clang Serialized AST\n";
- break;
- case ClangSerializedDiagnosticsBitstream:
- outs() << "Clang Serialized Diagnostics\n";
- break;
- }
- outs() << " # Toplevel Blocks: " << NumTopBlocks << "\n";
- outs() << "\n";
-
- // Emit per-block stats.
- outs() << "Per-block Summary:\n";
- for (std::map<unsigned, PerBlockIDStats>::iterator I = BlockIDStats.begin(),
- E = BlockIDStats.end(); I != E; ++I) {
- outs() << " Block ID #" << I->first;
- if (const char *BlockName =
- GetBlockName(I->first, BlockInfo, CurStreamType))
- outs() << " (" << BlockName << ")";
- outs() << ":\n";
-
- const PerBlockIDStats &Stats = I->second;
- outs() << " Num Instances: " << Stats.NumInstances << "\n";
- outs() << " Total Size: ";
- PrintSize(Stats.NumBits);
- outs() << "\n";
- double pct = (Stats.NumBits * 100.0) / BufferSizeBits;
- outs() << " Percent of file: " << format("%2.4f%%", pct) << "\n";
- if (Stats.NumInstances > 1) {
- outs() << " Average Size: ";
- PrintSize(Stats.NumBits/(double)Stats.NumInstances);
- outs() << "\n";
- outs() << " Tot/Avg SubBlocks: " << Stats.NumSubBlocks << "/"
- << Stats.NumSubBlocks/(double)Stats.NumInstances << "\n";
- outs() << " Tot/Avg Abbrevs: " << Stats.NumAbbrevs << "/"
- << Stats.NumAbbrevs/(double)Stats.NumInstances << "\n";
- outs() << " Tot/Avg Records: " << Stats.NumRecords << "/"
- << Stats.NumRecords/(double)Stats.NumInstances << "\n";
- } else {
- outs() << " Num SubBlocks: " << Stats.NumSubBlocks << "\n";
- outs() << " Num Abbrevs: " << Stats.NumAbbrevs << "\n";
- outs() << " Num Records: " << Stats.NumRecords << "\n";
- }
- if (Stats.NumRecords) {
- double pct = (Stats.NumAbbreviatedRecords * 100.0) / Stats.NumRecords;
- outs() << " Percent Abbrevs: " << format("%2.4f%%", pct) << "\n";
- }
- outs() << "\n";
-
- // Print a histogram of the codes we see.
- if (!NoHistogram && !Stats.CodeFreq.empty()) {
- std::vector<std::pair<unsigned, unsigned> > FreqPairs; // <freq,code>
- for (unsigned i = 0, e = Stats.CodeFreq.size(); i != e; ++i)
- if (unsigned Freq = Stats.CodeFreq[i].NumInstances)
- FreqPairs.push_back(std::make_pair(Freq, i));
- std::stable_sort(FreqPairs.begin(), FreqPairs.end());
- std::reverse(FreqPairs.begin(), FreqPairs.end());
+int main(int argc, char **argv) {
+ InitLLVM X(argc, argv);
+ cl::ParseCommandLineOptions(argc, argv, "llvm-bcanalyzer file analyzer\n");
+ ExitOnError ExitOnErr("llvm-bcanalyzer: ");
- outs() << "\tRecord Histogram:\n";
- outs() << "\t\t Count # Bits b/Rec % Abv Record Kind\n";
- for (unsigned i = 0, e = FreqPairs.size(); i != e; ++i) {
- const PerRecordStats &RecStats = Stats.CodeFreq[FreqPairs[i].second];
+ std::unique_ptr<MemoryBuffer> MB = ExitOnErr(openBitcodeFile(InputFilename));
+ std::unique_ptr<MemoryBuffer> BlockInfoMB = nullptr;
+ if (!BlockInfoFilename.empty())
+ BlockInfoMB = ExitOnErr(openBitcodeFile(BlockInfoFilename));
- outs() << format("\t\t%7d %9lu",
- RecStats.NumInstances,
- (unsigned long)RecStats.TotalBits);
+ BitcodeAnalyzer BA(MB->getBuffer(),
+ BlockInfoMB ? Optional<StringRef>(BlockInfoMB->getBuffer())
+ : None);
- if (RecStats.NumInstances > 1)
- outs() << format(" %9.1f",
- (double)RecStats.TotalBits/RecStats.NumInstances);
- else
- outs() << " ";
+ BCDumpOptions O(outs());
+ O.Histogram = !NoHistogram;
+ O.Symbolic = !NonSymbolic;
+ O.ShowBinaryBlobs = ShowBinaryBlobs;
- if (RecStats.NumAbbrev)
- outs() <<
- format(" %7.2f",
- (double)RecStats.NumAbbrev/RecStats.NumInstances*100);
- else
- outs() << " ";
+ ExitOnErr(
+ BA.analyze(O, CheckHash.empty() ? None : Optional<StringRef>(CheckHash)));
- outs() << " ";
- if (const char *CodeName = GetCodeName(FreqPairs[i].second, I->first,
- BlockInfo, CurStreamType))
- outs() << CodeName << "\n";
- else
- outs() << "UnknownCode" << FreqPairs[i].second << "\n";
- }
- outs() << "\n";
+ if (Dump)
+ outs() << "\n\n";
- }
- }
+ BA.printStats(O, StringRef(InputFilename.getValue()));
return 0;
}
-
-
-int main(int argc, char **argv) {
- InitLLVM X(argc, argv);
- cl::ParseCommandLineOptions(argc, argv, "llvm-bcanalyzer file analyzer\n");
- return AnalyzeBitcode();
-}
diff --git a/tools/llvm-cov/CodeCoverage.cpp b/tools/llvm-cov/CodeCoverage.cpp
index 728e00e7c3c2..f707e3c7ab53 100644
--- a/tools/llvm-cov/CodeCoverage.cpp
+++ b/tools/llvm-cov/CodeCoverage.cpp
@@ -1,9 +1,8 @@
//===- CodeCoverage.cpp - Coverage tool based on profiling instrumentation-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -1007,10 +1006,23 @@ int CodeCoverageTool::doReport(int argc, const char **argv,
int CodeCoverageTool::doExport(int argc, const char **argv,
CommandLineParserType commandLineParser) {
+ cl::OptionCategory ExportCategory("Exporting options");
+
+ cl::opt<bool> SkipExpansions("skip-expansions", cl::Optional,
+ cl::desc("Don't export expanded source regions"),
+ cl::cat(ExportCategory));
+
+ cl::opt<bool> SkipFunctions("skip-functions", cl::Optional,
+ cl::desc("Don't export per-function data"),
+ cl::cat(ExportCategory));
+
auto Err = commandLineParser(argc, argv);
if (Err)
return Err;
+ ViewOpts.SkipExpansions = SkipExpansions;
+ ViewOpts.SkipFunctions = SkipFunctions;
+
if (ViewOpts.Format != CoverageViewOptions::OutputFormat::Text &&
ViewOpts.Format != CoverageViewOptions::OutputFormat::Lcov) {
error("Coverage data can only be exported as textual JSON or an "
diff --git a/tools/llvm-cov/CoverageExporter.h b/tools/llvm-cov/CoverageExporter.h
index b226d68813d9..751e55dc0916 100644
--- a/tools/llvm-cov/CoverageExporter.h
+++ b/tools/llvm-cov/CoverageExporter.h
@@ -1,9 +1,8 @@
//===- CoverageExporter.h - Code coverage exporter ------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/tools/llvm-cov/CoverageExporterJson.cpp b/tools/llvm-cov/CoverageExporterJson.cpp
index 22243f8e2c3e..181d428ed9d8 100644
--- a/tools/llvm-cov/CoverageExporterJson.cpp
+++ b/tools/llvm-cov/CoverageExporterJson.cpp
@@ -1,9 +1,8 @@
//===- CoverageExporterJson.cpp - Code coverage export --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -43,7 +42,14 @@
#include "CoverageExporterJson.h"
#include "CoverageReport.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/Support/JSON.h"
+#include "llvm/Support/ThreadPool.h"
+#include "llvm/Support/Threading.h"
+#include <algorithm>
+#include <mutex>
+#include <utility>
/// The semantic version combined as a string.
#define LLVM_COVERAGE_EXPORT_JSON_STR "2.0.0"
@@ -128,13 +134,15 @@ json::Array renderFileSegments(const coverage::CoverageData &FileCoverage,
json::Object renderFile(const coverage::CoverageMapping &Coverage,
const std::string &Filename,
const FileCoverageSummary &FileReport,
- bool ExportSummaryOnly) {
+ const CoverageViewOptions &Options) {
json::Object File({{"filename", Filename}});
- if (!ExportSummaryOnly) {
+ if (!Options.ExportSummaryOnly) {
// Calculate and render detailed coverage information for given file.
auto FileCoverage = Coverage.getCoverageForFile(Filename);
File["segments"] = renderFileSegments(FileCoverage, FileReport);
- File["expansions"] = renderFileExpansions(FileCoverage, FileReport);
+ if (!Options.SkipExpansions) {
+ File["expansions"] = renderFileExpansions(FileCoverage, FileReport);
+ }
}
File["summary"] = renderSummary(FileReport);
return File;
@@ -143,11 +151,28 @@ json::Object renderFile(const coverage::CoverageMapping &Coverage,
json::Array renderFiles(const coverage::CoverageMapping &Coverage,
ArrayRef<std::string> SourceFiles,
ArrayRef<FileCoverageSummary> FileReports,
- bool ExportSummaryOnly) {
+ const CoverageViewOptions &Options) {
+ auto NumThreads = Options.NumThreads;
+ if (NumThreads == 0) {
+ NumThreads = std::max(1U, std::min(llvm::heavyweight_hardware_concurrency(),
+ unsigned(SourceFiles.size())));
+ }
+ ThreadPool Pool(NumThreads);
json::Array FileArray;
- for (unsigned I = 0, E = SourceFiles.size(); I < E; ++I)
- FileArray.push_back(renderFile(Coverage, SourceFiles[I], FileReports[I],
- ExportSummaryOnly));
+ std::mutex FileArrayMutex;
+
+ for (unsigned I = 0, E = SourceFiles.size(); I < E; ++I) {
+ auto &SourceFile = SourceFiles[I];
+ auto &FileReport = FileReports[I];
+ Pool.async([&] {
+ auto File = renderFile(Coverage, SourceFile, FileReport, Options);
+ {
+ std::lock_guard<std::mutex> Lock(FileArrayMutex);
+ FileArray.push_back(std::move(File));
+ }
+ });
+ }
+ Pool.wait();
return FileArray;
}
@@ -178,12 +203,22 @@ void CoverageExporterJson::renderRoot(ArrayRef<std::string> SourceFiles) {
FileCoverageSummary Totals = FileCoverageSummary("Totals");
auto FileReports = CoverageReport::prepareFileReports(Coverage, Totals,
SourceFiles, Options);
- auto Export =
- json::Object({{"files", renderFiles(Coverage, SourceFiles, FileReports,
- Options.ExportSummaryOnly)},
- {"totals", renderSummary(Totals)}});
- // Skip functions-level information for summary-only export mode.
- if (!Options.ExportSummaryOnly)
+ auto Files = renderFiles(Coverage, SourceFiles, FileReports, Options);
+ // Sort files in order of their names.
+ std::sort(Files.begin(), Files.end(),
+ [](const json::Value &A, const json::Value &B) {
+ const json::Object *ObjA = A.getAsObject();
+ const json::Object *ObjB = B.getAsObject();
+ assert(ObjA != nullptr && "Value A was not an Object");
+ assert(ObjB != nullptr && "Value B was not an Object");
+ const StringRef FilenameA = ObjA->getString("filename").getValue();
+ const StringRef FilenameB = ObjB->getString("filename").getValue();
+ return FilenameA.compare(FilenameB) < 0;
+ });
+ auto Export = json::Object(
+ {{"files", std::move(Files)}, {"totals", renderSummary(Totals)}});
+ // Skip functions-level information if necessary.
+ if (!Options.ExportSummaryOnly && !Options.SkipFunctions)
Export["functions"] = renderFunctions(Coverage.getCoveredFunctions());
auto ExportArray = json::Array({std::move(Export)});
diff --git a/tools/llvm-cov/CoverageExporterJson.h b/tools/llvm-cov/CoverageExporterJson.h
index c37c86b42be9..c19475005552 100644
--- a/tools/llvm-cov/CoverageExporterJson.h
+++ b/tools/llvm-cov/CoverageExporterJson.h
@@ -1,9 +1,8 @@
//===- CoverageExporterJson.h - Code coverage JSON exporter ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/tools/llvm-cov/CoverageExporterLcov.cpp b/tools/llvm-cov/CoverageExporterLcov.cpp
index d149ba1a4c87..d9b0c3b0d7a8 100644
--- a/tools/llvm-cov/CoverageExporterLcov.cpp
+++ b/tools/llvm-cov/CoverageExporterLcov.cpp
@@ -1,9 +1,8 @@
//===- CoverageExporterLcov.cpp - Code coverage export --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -83,7 +82,7 @@ void renderFile(raw_ostream &OS, const coverage::CoverageMapping &Coverage,
OS << "SF:" << Filename << '\n';
if (!ExportSummaryOnly) {
- renderFunctions(OS, Coverage.getCoveredFunctions());
+ renderFunctions(OS, Coverage.getCoveredFunctions(Filename));
}
renderFunctionSummary(OS, FileReport);
diff --git a/tools/llvm-cov/CoverageExporterLcov.h b/tools/llvm-cov/CoverageExporterLcov.h
index 539b2dacd384..e8a260bf4937 100644
--- a/tools/llvm-cov/CoverageExporterLcov.h
+++ b/tools/llvm-cov/CoverageExporterLcov.h
@@ -1,9 +1,8 @@
//===- CoverageExporterLcov.h - Code coverage lcov exporter ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/tools/llvm-cov/CoverageFilters.cpp b/tools/llvm-cov/CoverageFilters.cpp
index 4dd0f552c7e0..ca241e386e87 100644
--- a/tools/llvm-cov/CoverageFilters.cpp
+++ b/tools/llvm-cov/CoverageFilters.cpp
@@ -1,9 +1,8 @@
//===- CoverageFilters.cpp - Function coverage mapping filters ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/tools/llvm-cov/CoverageFilters.h b/tools/llvm-cov/CoverageFilters.h
index 6424ca5a8081..ce56e1607111 100644
--- a/tools/llvm-cov/CoverageFilters.h
+++ b/tools/llvm-cov/CoverageFilters.h
@@ -1,9 +1,8 @@
//===- CoverageFilters.h - Function coverage mapping filters --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/tools/llvm-cov/CoverageReport.cpp b/tools/llvm-cov/CoverageReport.cpp
index 607a3ceb30cb..82259542c597 100644
--- a/tools/llvm-cov/CoverageReport.cpp
+++ b/tools/llvm-cov/CoverageReport.cpp
@@ -1,9 +1,8 @@
//===- CoverageReport.cpp - Code coverage report -------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/tools/llvm-cov/CoverageReport.h b/tools/llvm-cov/CoverageReport.h
index 4a6527e9fe5d..f9a092f510b5 100644
--- a/tools/llvm-cov/CoverageReport.h
+++ b/tools/llvm-cov/CoverageReport.h
@@ -1,9 +1,8 @@
//===- CoverageReport.h - Code coverage report ----------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/tools/llvm-cov/CoverageSummaryInfo.cpp b/tools/llvm-cov/CoverageSummaryInfo.cpp
index 7847a2abf48c..1029f7784040 100644
--- a/tools/llvm-cov/CoverageSummaryInfo.cpp
+++ b/tools/llvm-cov/CoverageSummaryInfo.cpp
@@ -1,9 +1,8 @@
//===- CoverageSummaryInfo.cpp - Coverage summary for function/file -------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/tools/llvm-cov/CoverageSummaryInfo.h b/tools/llvm-cov/CoverageSummaryInfo.h
index 0845e2ce2e77..97beacb26d07 100644
--- a/tools/llvm-cov/CoverageSummaryInfo.h
+++ b/tools/llvm-cov/CoverageSummaryInfo.h
@@ -1,9 +1,8 @@
//===- CoverageSummaryInfo.h - Coverage summary for function/file ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/tools/llvm-cov/CoverageViewOptions.h b/tools/llvm-cov/CoverageViewOptions.h
index c8a472860027..dde0c692ab05 100644
--- a/tools/llvm-cov/CoverageViewOptions.h
+++ b/tools/llvm-cov/CoverageViewOptions.h
@@ -1,9 +1,8 @@
//===- CoverageViewOptions.h - Code coverage display options -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -35,6 +34,8 @@ struct CoverageViewOptions {
bool ShowRegionSummary;
bool ShowInstantiationSummary;
bool ExportSummaryOnly;
+ bool SkipExpansions;
+ bool SkipFunctions;
OutputFormat Format;
std::string ShowOutputDirectory;
std::vector<std::string> DemanglerOpts;
diff --git a/tools/llvm-cov/RenderingSupport.h b/tools/llvm-cov/RenderingSupport.h
index 2cfe24919142..0674fbac9a3c 100644
--- a/tools/llvm-cov/RenderingSupport.h
+++ b/tools/llvm-cov/RenderingSupport.h
@@ -1,9 +1,8 @@
//===- RenderingSupport.h - output stream rendering support functions ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/tools/llvm-cov/SourceCoverageView.cpp b/tools/llvm-cov/SourceCoverageView.cpp
index cebaf63adb12..616f667e2c84 100644
--- a/tools/llvm-cov/SourceCoverageView.cpp
+++ b/tools/llvm-cov/SourceCoverageView.cpp
@@ -1,9 +1,8 @@
//===- SourceCoverageView.cpp - Code coverage view for source code --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -190,8 +189,8 @@ void SourceCoverageView::print(raw_ostream &OS, bool WholeFile,
// We need the expansions and instantiations sorted so we can go through them
// while we iterate lines.
- std::stable_sort(ExpansionSubViews.begin(), ExpansionSubViews.end());
- std::stable_sort(InstantiationSubViews.begin(), InstantiationSubViews.end());
+ llvm::stable_sort(ExpansionSubViews);
+ llvm::stable_sort(InstantiationSubViews);
auto NextESV = ExpansionSubViews.begin();
auto EndESV = ExpansionSubViews.end();
auto NextISV = InstantiationSubViews.begin();
diff --git a/tools/llvm-cov/SourceCoverageView.h b/tools/llvm-cov/SourceCoverageView.h
index e3a2f9e5c0b4..9ae928443651 100644
--- a/tools/llvm-cov/SourceCoverageView.h
+++ b/tools/llvm-cov/SourceCoverageView.h
@@ -1,9 +1,8 @@
//===- SourceCoverageView.h - Code coverage view for source code ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/tools/llvm-cov/SourceCoverageViewHTML.cpp b/tools/llvm-cov/SourceCoverageViewHTML.cpp
index 3f730bb7bc82..e3332245f9c8 100644
--- a/tools/llvm-cov/SourceCoverageViewHTML.cpp
+++ b/tools/llvm-cov/SourceCoverageViewHTML.cpp
@@ -1,9 +1,8 @@
//===- SourceCoverageViewHTML.cpp - A html code coverage view -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/tools/llvm-cov/SourceCoverageViewHTML.h b/tools/llvm-cov/SourceCoverageViewHTML.h
index cb41fcaf37b9..9834040008a6 100644
--- a/tools/llvm-cov/SourceCoverageViewHTML.h
+++ b/tools/llvm-cov/SourceCoverageViewHTML.h
@@ -1,9 +1,8 @@
//===- SourceCoverageViewHTML.h - A html code coverage view ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/tools/llvm-cov/SourceCoverageViewText.cpp b/tools/llvm-cov/SourceCoverageViewText.cpp
index aac70baed613..fcabee2ee69d 100644
--- a/tools/llvm-cov/SourceCoverageViewText.cpp
+++ b/tools/llvm-cov/SourceCoverageViewText.cpp
@@ -1,9 +1,8 @@
//===- SourceCoverageViewText.cpp - A text-based code coverage view -------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/tools/llvm-cov/SourceCoverageViewText.h b/tools/llvm-cov/SourceCoverageViewText.h
index a46f35cc6495..c8c4632c3b9d 100644
--- a/tools/llvm-cov/SourceCoverageViewText.h
+++ b/tools/llvm-cov/SourceCoverageViewText.h
@@ -1,9 +1,8 @@
//===- SourceCoverageViewText.h - A text-based code coverage view ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/tools/llvm-cov/TestingSupport.cpp b/tools/llvm-cov/TestingSupport.cpp
index 16a1c2665299..3ee318c9c640 100644
--- a/tools/llvm-cov/TestingSupport.cpp
+++ b/tools/llvm-cov/TestingSupport.cpp
@@ -1,9 +1,8 @@
//===- TestingSupport.cpp - Convert objects files into test files --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -70,9 +69,18 @@ int convertForTestingMain(int argc, const char *argv[]) {
uint64_t ProfileNamesAddress = ProfileNames.getAddress();
StringRef CoverageMappingData;
StringRef ProfileNamesData;
- if (CoverageMapping.getContents(CoverageMappingData) ||
- ProfileNames.getContents(ProfileNamesData))
+ if (Expected<StringRef> E = CoverageMapping.getContents())
+ CoverageMappingData = *E;
+ else {
+ consumeError(E.takeError());
+ return 1;
+ }
+ if (Expected<StringRef> E = ProfileNames.getContents())
+ ProfileNamesData = *E;
+ else {
+ consumeError(E.takeError());
return 1;
+ }
int FD;
if (auto Err = sys::fs::openFileForWrite(OutputFilename, FD)) {
diff --git a/tools/llvm-cov/gcov.cpp b/tools/llvm-cov/gcov.cpp
index 7776f2aa9a68..8a00ff64711f 100644
--- a/tools/llvm-cov/gcov.cpp
+++ b/tools/llvm-cov/gcov.cpp
@@ -1,9 +1,8 @@
//===- gcov.cpp - GCOV compatible LLVM coverage tool ----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -125,6 +124,11 @@ int gcovMain(int argc, const char *argv[]) {
"(requires -b)"));
cl::alias UncondBranchA("unconditional-branches", cl::aliasopt(UncondBranch));
+ cl::opt<bool> HashFilenames("x", cl::Grouping, cl::init(false),
+ cl::desc("Hash long pathnames"));
+ cl::alias HashFilenamesA("hash-filenames", cl::aliasopt(HashFilenames));
+
+
cl::OptionCategory DebugCat("Internal and debugging options");
cl::opt<bool> DumpGCOV("dump", cl::init(false), cl::cat(DebugCat),
cl::desc("Dump the gcov file to stderr"));
@@ -136,7 +140,8 @@ int gcovMain(int argc, const char *argv[]) {
cl::ParseCommandLineOptions(argc, argv, "LLVM code coverage tool\n");
GCOV::Options Options(AllBlocks, BranchProb, BranchCount, FuncSummary,
- PreservePaths, UncondBranch, LongNames, NoOutput);
+ PreservePaths, UncondBranch, LongNames, NoOutput,
+ HashFilenames);
for (const auto &SourceFile : SourceFiles)
reportCoverage(SourceFile, ObjectDir, InputGCNO, InputGCDA, DumpGCOV,
diff --git a/tools/llvm-cov/llvm-cov.cpp b/tools/llvm-cov/llvm-cov.cpp
index 4c3b574451c3..172ec9f3cedf 100644
--- a/tools/llvm-cov/llvm-cov.cpp
+++ b/tools/llvm-cov/llvm-cov.cpp
@@ -1,9 +1,8 @@
//===- llvm-cov.cpp - LLVM coverage tool ----------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/tools/llvm-cxxdump/Error.cpp b/tools/llvm-cxxdump/Error.cpp
index 54207fad32af..25317820409c 100644
--- a/tools/llvm-cxxdump/Error.cpp
+++ b/tools/llvm-cxxdump/Error.cpp
@@ -1,9 +1,8 @@
//===- Error.cpp - system_error extensions for llvm-cxxdump -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/tools/llvm-cxxdump/Error.h b/tools/llvm-cxxdump/Error.h
index 7caf6d6447c9..439902fa3803 100644
--- a/tools/llvm-cxxdump/Error.h
+++ b/tools/llvm-cxxdump/Error.h
@@ -1,9 +1,8 @@
//===- Error.h - system_error extensions for llvm-cxxdump -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/tools/llvm-cxxdump/llvm-cxxdump.cpp b/tools/llvm-cxxdump/llvm-cxxdump.cpp
index 7594066a395d..833312655788 100644
--- a/tools/llvm-cxxdump/llvm-cxxdump.cpp
+++ b/tools/llvm-cxxdump/llvm-cxxdump.cpp
@@ -1,9 +1,8 @@
//===- llvm-cxxdump.cpp - Dump C++ data in an Object File -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -49,15 +48,20 @@ static void error(std::error_code EC) {
exit(1);
}
-static void error(Error Err) {
- if (!Err)
- return;
+LLVM_ATTRIBUTE_NORETURN static void error(Error Err) {
logAllUnhandledErrors(std::move(Err), WithColor::error(outs()),
"reading file: ");
outs().flush();
exit(1);
}
+template <typename T>
+T unwrapOrError(Expected<T> EO) {
+ if (!EO)
+ error(EO.takeError());
+ return std::move(*EO);
+}
+
} // namespace llvm
static void reportError(StringRef Input, StringRef Message) {
@@ -196,8 +200,7 @@ static void dumpCXXData(const ObjectFile *Obj) {
// Skip virtual or BSS sections.
if (Sec.isBSS() || Sec.isVirtual())
continue;
- StringRef SecContents;
- error(Sec.getContents(SecContents));
+ StringRef SecContents = unwrapOrError(Sec.getContents());
Expected<uint64_t> SymAddressOrErr = Sym.getAddress();
error(errorToErrorCode(SymAddressOrErr.takeError()));
uint64_t SymAddress = *SymAddressOrErr;
@@ -511,7 +514,8 @@ static void dumpArchive(const Archive *Arc) {
else
reportError(Arc->getFileName(), cxxdump_error::unrecognized_file_format);
}
- error(std::move(Err));
+ if (Err)
+ error(std::move(Err));
}
static void dumpInput(StringRef File) {
diff --git a/tools/llvm-cxxdump/llvm-cxxdump.h b/tools/llvm-cxxdump/llvm-cxxdump.h
index daa05cb2ca0a..739cfe481a4b 100644
--- a/tools/llvm-cxxdump/llvm-cxxdump.h
+++ b/tools/llvm-cxxdump/llvm-cxxdump.h
@@ -1,9 +1,8 @@
//===-- llvm-cxxdump.h ------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/tools/llvm-cxxfilt/llvm-cxxfilt.cpp b/tools/llvm-cxxfilt/llvm-cxxfilt.cpp
index afc1e4a8d128..9ac8bcf0ff01 100644
--- a/tools/llvm-cxxfilt/llvm-cxxfilt.cpp
+++ b/tools/llvm-cxxfilt/llvm-cxxfilt.cpp
@@ -1,12 +1,12 @@
//===-- llvm-c++filt.cpp --------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/StringExtras.h"
#include "llvm/Demangle/Demangle.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/InitLLVM.h"
@@ -25,7 +25,7 @@ enum Style {
EDG, ///< EDG compiler
GNUv3, ///< GNU C++ v3 ABI
Java, ///< Java (gcj)
- GNAT ///< ADA copiler (gnat)
+ GNAT ///< ADA compiler (gnat)
};
static cl::opt<Style>
Format("format", cl::desc("decoration style"),
@@ -52,31 +52,84 @@ static cl::alias TypesShort("t", cl::desc("alias for --types"),
static cl::list<std::string>
Decorated(cl::Positional, cl::desc("<mangled>"), cl::ZeroOrMore);
-static void demangle(llvm::raw_ostream &OS, const std::string &Mangled) {
+static cl::extrahelp
+ HelpResponse("\nPass @FILE as argument to read options from FILE.\n");
+
+static std::string demangle(llvm::raw_ostream &OS, const std::string &Mangled) {
int Status;
- const char *Decorated = Mangled.c_str();
+ const char *DecoratedStr = Mangled.c_str();
if (StripUnderscore)
- if (Decorated[0] == '_')
- ++Decorated;
- size_t DecoratedLength = strlen(Decorated);
+ if (DecoratedStr[0] == '_')
+ ++DecoratedStr;
+ size_t DecoratedLength = strlen(DecoratedStr);
char *Undecorated = nullptr;
- if (Types || ((DecoratedLength >= 2 && strncmp(Decorated, "_Z", 2) == 0) ||
- (DecoratedLength >= 4 && strncmp(Decorated, "___Z", 4) == 0)))
- Undecorated = itaniumDemangle(Decorated, nullptr, nullptr, &Status);
+ if (Types ||
+ ((DecoratedLength >= 2 && strncmp(DecoratedStr, "_Z", 2) == 0) ||
+ (DecoratedLength >= 4 && strncmp(DecoratedStr, "___Z", 4) == 0)))
+ Undecorated = itaniumDemangle(DecoratedStr, nullptr, nullptr, &Status);
if (!Undecorated &&
- (DecoratedLength > 6 && strncmp(Decorated, "__imp_", 6) == 0)) {
+ (DecoratedLength > 6 && strncmp(DecoratedStr, "__imp_", 6) == 0)) {
OS << "import thunk for ";
- Undecorated = itaniumDemangle(Decorated + 6, nullptr, nullptr, &Status);
+ Undecorated = itaniumDemangle(DecoratedStr + 6, nullptr, nullptr, &Status);
}
- OS << (Undecorated ? Undecorated : Mangled) << '\n';
- OS.flush();
-
+ std::string Result(Undecorated ? Undecorated : Mangled);
free(Undecorated);
+ return Result;
+}
+
+// Split 'Source' on any character that fails to pass 'IsLegalChar'. The
+// returned vector consists of pairs where 'first' is the delimited word, and
+// 'second' are the delimiters following that word.
+static void SplitStringDelims(
+ StringRef Source,
+ SmallVectorImpl<std::pair<StringRef, StringRef>> &OutFragments,
+ function_ref<bool(char)> IsLegalChar) {
+ // The beginning of the input string.
+ const auto Head = Source.begin();
+
+ // Obtain any leading delimiters.
+ auto Start = std::find_if(Head, Source.end(), IsLegalChar);
+ if (Start != Head)
+ OutFragments.push_back({"", Source.slice(0, Start - Head)});
+
+ // Capture each word and the delimiters following that word.
+ while (Start != Source.end()) {
+ Start = std::find_if(Start, Source.end(), IsLegalChar);
+ auto End = std::find_if_not(Start, Source.end(), IsLegalChar);
+ auto DEnd = std::find_if(End, Source.end(), IsLegalChar);
+ OutFragments.push_back({Source.slice(Start - Head, End - Head),
+ Source.slice(End - Head, DEnd - Head)});
+ Start = DEnd;
+ }
+}
+
+// This returns true if 'C' is a character that can show up in an
+// Itanium-mangled string.
+static bool IsLegalItaniumChar(char C) {
+ // Itanium CXX ABI [External Names]p5.1.1:
+ // '$' and '.' in mangled names are reserved for private implementations.
+ return isalnum(C) || C == '.' || C == '$' || C == '_';
+}
+
+// If 'Split' is true, then 'Mangled' is broken into individual words and each
+// word is demangled. Otherwise, the entire string is treated as a single
+// mangled item. The result is output to 'OS'.
+static void demangleLine(llvm::raw_ostream &OS, StringRef Mangled, bool Split) {
+ std::string Result;
+ if (Split) {
+ SmallVector<std::pair<StringRef, StringRef>, 16> Words;
+ SplitStringDelims(Mangled, Words, IsLegalItaniumChar);
+ for (const auto &Word : Words)
+ Result += demangle(OS, Word.first) + Word.second.str();
+ } else
+ Result = demangle(OS, Mangled);
+ OS << Result << '\n';
+ OS.flush();
}
int main(int argc, char **argv) {
@@ -86,10 +139,10 @@ int main(int argc, char **argv) {
if (Decorated.empty())
for (std::string Mangled; std::getline(std::cin, Mangled);)
- demangle(llvm::outs(), Mangled);
+ demangleLine(llvm::outs(), Mangled, true);
else
for (const auto &Symbol : Decorated)
- demangle(llvm::outs(), Symbol);
+ demangleLine(llvm::outs(), Symbol, false);
return EXIT_SUCCESS;
}
diff --git a/tools/llvm-cxxmap/llvm-cxxmap.cpp b/tools/llvm-cxxmap/llvm-cxxmap.cpp
index 39028cc86723..87d4d06bbc96 100644
--- a/tools/llvm-cxxmap/llvm-cxxmap.cpp
+++ b/tools/llvm-cxxmap/llvm-cxxmap.cpp
@@ -1,9 +1,8 @@
//===- llvm-cxxmap.cpp ----------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/tools/llvm-diff/DiffConsumer.cpp b/tools/llvm-diff/DiffConsumer.cpp
index ec189df27521..b797143bde1b 100644
--- a/tools/llvm-diff/DiffConsumer.cpp
+++ b/tools/llvm-diff/DiffConsumer.cpp
@@ -1,9 +1,8 @@
//===-- DiffConsumer.cpp - Difference Consumer ------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -13,8 +12,8 @@
#include "DiffConsumer.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
using namespace llvm;
diff --git a/tools/llvm-diff/DiffConsumer.h b/tools/llvm-diff/DiffConsumer.h
index 82f5ce598b44..6cb8f2eb7eeb 100644
--- a/tools/llvm-diff/DiffConsumer.h
+++ b/tools/llvm-diff/DiffConsumer.h
@@ -1,9 +1,8 @@
//===-- DiffConsumer.h - Difference Consumer --------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/tools/llvm-diff/DiffLog.cpp b/tools/llvm-diff/DiffLog.cpp
index 50c0c4cff2fc..6484197521f2 100644
--- a/tools/llvm-diff/DiffLog.cpp
+++ b/tools/llvm-diff/DiffLog.cpp
@@ -1,9 +1,8 @@
//===-- DiffLog.h - Difference Log Builder and accessories ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/tools/llvm-diff/DiffLog.h b/tools/llvm-diff/DiffLog.h
index 8f28461afdde..0c8952496155 100644
--- a/tools/llvm-diff/DiffLog.h
+++ b/tools/llvm-diff/DiffLog.h
@@ -1,9 +1,8 @@
//===-- DiffLog.h - Difference Log Builder and accessories ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/tools/llvm-diff/DifferenceEngine.cpp b/tools/llvm-diff/DifferenceEngine.cpp
index acff8bb3e89b..bc93ece86490 100644
--- a/tools/llvm-diff/DifferenceEngine.cpp
+++ b/tools/llvm-diff/DifferenceEngine.cpp
@@ -1,9 +1,8 @@
//===-- DifferenceEngine.cpp - Structural function/module comparison ------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -68,7 +67,7 @@ public:
unsigned NewSize = Storage.size() - 1;
if (NewSize) {
// Move the slot at the end to the beginning.
- if (isPodLike<T>::value)
+ if (is_trivially_copyable<T>::value)
Storage[0] = Storage[NewSize];
else
std::swap(Storage[0], Storage[NewSize]);
diff --git a/tools/llvm-diff/DifferenceEngine.h b/tools/llvm-diff/DifferenceEngine.h
index 7f084a377f0c..da1b6526a6e2 100644
--- a/tools/llvm-diff/DifferenceEngine.h
+++ b/tools/llvm-diff/DifferenceEngine.h
@@ -1,9 +1,8 @@
//===-- DifferenceEngine.h - Module comparator ------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/tools/llvm-diff/llvm-diff.cpp b/tools/llvm-diff/llvm-diff.cpp
index e449d6994784..aaf7989e2e3d 100644
--- a/tools/llvm-diff/llvm-diff.cpp
+++ b/tools/llvm-diff/llvm-diff.cpp
@@ -1,9 +1,8 @@
//===-- llvm-diff.cpp - Module comparator command-line driver ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/tools/llvm-dis/llvm-dis.cpp b/tools/llvm-dis/llvm-dis.cpp
index 8143a2a5a934..3f337b874b16 100644
--- a/tools/llvm-dis/llvm-dis.cpp
+++ b/tools/llvm-dis/llvm-dis.cpp
@@ -1,9 +1,8 @@
//===-- llvm-dis.cpp - The low-level LLVM disassembler --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/tools/llvm-dwarfdump/Statistics.cpp b/tools/llvm-dwarfdump/Statistics.cpp
index 5fe7e8b4615b..f26369b935cb 100644
--- a/tools/llvm-dwarfdump/Statistics.cpp
+++ b/tools/llvm-dwarfdump/Statistics.cpp
@@ -15,14 +15,38 @@ using namespace object;
struct PerFunctionStats {
/// Number of inlined instances of this function.
unsigned NumFnInlined = 0;
- /// Number of variables with location across all inlined instances.
+ /// Number of inlined instances that have abstract origins.
+ unsigned NumAbstractOrigins = 0;
+ /// Number of variables and parameters with location across all inlined
+ /// instances.
unsigned TotalVarWithLoc = 0;
/// Number of constants with location across all inlined instances.
unsigned ConstantMembers = 0;
- /// List of all Variables in this function.
+ /// List of all Variables and parameters in this function.
StringSet<> VarsInFunction;
/// Compile units also cover a PC range, but have this flag set to false.
bool IsFunction = false;
+ /// Verify function definition has PC addresses (for detecting when
+ /// a function has been inlined everywhere).
+ bool HasPCAddresses = false;
+ /// Function has source location information.
+ bool HasSourceLocation = false;
+ /// Number of function parameters.
+ unsigned NumParams = 0;
+ /// Number of function parameters with source location.
+ unsigned NumParamSourceLocations = 0;
+ /// Number of function parameters with type.
+ unsigned NumParamTypes = 0;
+ /// Number of function parameters with a DW_AT_location.
+ unsigned NumParamLocations = 0;
+ /// Number of variables.
+ unsigned NumVars = 0;
+ /// Number of variables with source location.
+ unsigned NumVarSourceLocations = 0;
+ /// Number of variables wtih type.
+ unsigned NumVarTypes = 0;
+ /// Number of variables wtih DW_AT_location.
+ unsigned NumVarLocations = 0;
};
/// Holds accumulated global statistics about DIEs.
@@ -32,7 +56,8 @@ struct GlobalStats {
/// Total number of PC range bytes in each variable's enclosing scope,
/// starting from the first definition of the variable.
unsigned ScopeBytesFromFirstDefinition = 0;
- /// Total number of call site entries (DW_TAG_call_site).
+ /// Total number of call site entries (DW_TAG_call_site) or
+ /// (DW_AT_call_file & DW_AT_call_line).
unsigned CallSiteEntries = 0;
/// Total byte size of concrete functions. This byte size includes
/// inline functions contained in the concrete functions.
@@ -59,11 +84,13 @@ static uint64_t getLowPC(DWARFDie Die) {
/// Collect debug info quality metrics for one DIE.
static void collectStatsForDie(DWARFDie Die, std::string FnPrefix,
std::string VarPrefix, uint64_t ScopeLowPC,
- uint64_t BytesInScope,
- uint32_t InlineDepth,
+ uint64_t BytesInScope, uint32_t InlineDepth,
StringMap<PerFunctionStats> &FnStatMap,
GlobalStats &GlobalStats) {
bool HasLoc = false;
+ bool HasSrcLoc = false;
+ bool HasType = false;
+ bool IsArtificial = false;
uint64_t BytesCovered = 0;
uint64_t OffsetToFirstDefinition = 0;
@@ -79,6 +106,16 @@ static void collectStatsForDie(DWARFDie Die, std::string FnPrefix,
return;
}
+ if (Die.findRecursively(dwarf::DW_AT_decl_file) &&
+ Die.findRecursively(dwarf::DW_AT_decl_line))
+ HasSrcLoc = true;
+
+ if (Die.findRecursively(dwarf::DW_AT_type))
+ HasType = true;
+
+ if (Die.find(dwarf::DW_AT_artificial))
+ IsArtificial = true;
+
if (Die.find(dwarf::DW_AT_const_value)) {
// This catches constant members *and* variables.
HasLoc = true;
@@ -125,7 +162,7 @@ static void collectStatsForDie(DWARFDie Die, std::string FnPrefix,
// By using the variable name + the path through the lexical block tree, the
// keys are consistent across duplicate abstract origins in different CUs.
std::string VarName = StringRef(Die.getName(DINameKind::ShortName));
- FnStats.VarsInFunction.insert(VarPrefix+VarName);
+ FnStats.VarsInFunction.insert(VarPrefix + VarName);
if (BytesInScope) {
FnStats.TotalVarWithLoc += (unsigned)HasLoc;
// Adjust for the fact the variables often start their lifetime in the
@@ -136,16 +173,36 @@ static void collectStatsForDie(DWARFDie Die, std::string FnPrefix,
GlobalStats.ScopeBytesFromFirstDefinition += BytesInScope;
assert(GlobalStats.ScopeBytesCovered <=
GlobalStats.ScopeBytesFromFirstDefinition);
- } else {
+ } else if (Die.getTag() == dwarf::DW_TAG_member) {
FnStats.ConstantMembers++;
+ } else {
+ FnStats.TotalVarWithLoc += (unsigned)HasLoc;
+ }
+ if (!IsArtificial) {
+ if (Die.getTag() == dwarf::DW_TAG_formal_parameter) {
+ FnStats.NumParams++;
+ if (HasType)
+ FnStats.NumParamTypes++;
+ if (HasSrcLoc)
+ FnStats.NumParamSourceLocations++;
+ if (HasLoc)
+ FnStats.NumParamLocations++;
+ } else if (Die.getTag() == dwarf::DW_TAG_variable) {
+ FnStats.NumVars++;
+ if (HasType)
+ FnStats.NumVarTypes++;
+ if (HasSrcLoc)
+ FnStats.NumVarSourceLocations++;
+ if (HasLoc)
+ FnStats.NumVarLocations++;
+ }
}
}
/// Recursively collect debug info quality metrics.
static void collectStatsRecursive(DWARFDie Die, std::string FnPrefix,
std::string VarPrefix, uint64_t ScopeLowPC,
- uint64_t BytesInScope,
- uint32_t InlineDepth,
+ uint64_t BytesInScope, uint32_t InlineDepth,
StringMap<PerFunctionStats> &FnStatMap,
GlobalStats &GlobalStats) {
// Handle any kind of lexical scope.
@@ -164,20 +221,9 @@ static void collectStatsRecursive(DWARFDie Die, std::string FnPrefix,
if (Die.find(dwarf::DW_AT_declaration))
return;
- // Count the function.
- if (!IsBlock) {
- StringRef Name = Die.getName(DINameKind::LinkageName);
- if (Name.empty())
- Name = Die.getName(DINameKind::ShortName);
- FnPrefix = Name;
- // Skip over abstract origins.
- if (Die.find(dwarf::DW_AT_inline))
- return;
- // We've seen an (inlined) instance of this function.
- auto &FnStats = FnStatMap[Name];
- FnStats.NumFnInlined++;
- FnStats.IsFunction = true;
- }
+ // Check for call sites.
+ if (Die.find(dwarf::DW_AT_call_file) && Die.find(dwarf::DW_AT_call_line))
+ GlobalStats.CallSiteEntries++;
// PC Ranges.
auto RangesOrError = Die.getAddressRanges();
@@ -192,6 +238,31 @@ static void collectStatsRecursive(DWARFDie Die, std::string FnPrefix,
BytesInThisScope += Range.HighPC - Range.LowPC;
ScopeLowPC = getLowPC(Die);
+ // Count the function.
+ if (!IsBlock) {
+ StringRef Name = Die.getName(DINameKind::LinkageName);
+ if (Name.empty())
+ Name = Die.getName(DINameKind::ShortName);
+ FnPrefix = Name;
+ // Skip over abstract origins.
+ if (Die.find(dwarf::DW_AT_inline))
+ return;
+ // We've seen an (inlined) instance of this function.
+ auto &FnStats = FnStatMap[Name];
+ if (IsInlinedFunction) {
+ FnStats.NumFnInlined++;
+ if (Die.findRecursively(dwarf::DW_AT_abstract_origin))
+ FnStats.NumAbstractOrigins++;
+ }
+ FnStats.IsFunction = true;
+ if (BytesInThisScope && !IsInlinedFunction)
+ FnStats.HasPCAddresses = true;
+ std::string FnName = StringRef(Die.getName(DINameKind::ShortName));
+ if (Die.findRecursively(dwarf::DW_AT_decl_file) &&
+ Die.findRecursively(dwarf::DW_AT_decl_line))
+ FnStats.HasSourceLocation = true;
+ }
+
if (BytesInThisScope) {
BytesInScope = BytesInThisScope;
if (IsFunction)
@@ -252,29 +323,53 @@ bool collectStatsForObjectFile(ObjectFile &Obj, DWARFContext &DICtx,
GlobalStats GlobalStats;
StringMap<PerFunctionStats> Statistics;
for (const auto &CU : static_cast<DWARFContext *>(&DICtx)->compile_units())
- if (DWARFDie CUDie = CU->getUnitDIE(false))
+ if (DWARFDie CUDie = CU->getNonSkeletonUnitDIE(false))
collectStatsRecursive(CUDie, "/", "g", 0, 0, 0, Statistics, GlobalStats);
/// The version number should be increased every time the algorithm is changed
/// (including bug fixes). New metrics may be added without increasing the
/// version.
- unsigned Version = 1;
- unsigned VarTotal = 0;
- unsigned VarUnique = 0;
- unsigned VarWithLoc = 0;
+ unsigned Version = 3;
+ unsigned VarParamTotal = 0;
+ unsigned VarParamUnique = 0;
+ unsigned VarParamWithLoc = 0;
unsigned NumFunctions = 0;
unsigned NumInlinedFunctions = 0;
+ unsigned NumFuncsWithSrcLoc = 0;
+ unsigned NumAbstractOrigins = 0;
+ unsigned ParamTotal = 0;
+ unsigned ParamWithType = 0;
+ unsigned ParamWithLoc = 0;
+ unsigned ParamWithSrcLoc = 0;
+ unsigned VarTotal = 0;
+ unsigned VarWithType = 0;
+ unsigned VarWithSrcLoc = 0;
+ unsigned VarWithLoc = 0;
for (auto &Entry : Statistics) {
PerFunctionStats &Stats = Entry.getValue();
unsigned TotalVars = Stats.VarsInFunction.size() * Stats.NumFnInlined;
+ // Count variables in concrete out-of-line functions and in global scope.
+ if (Stats.HasPCAddresses || !Stats.IsFunction)
+ TotalVars += Stats.VarsInFunction.size();
unsigned Constants = Stats.ConstantMembers;
- VarWithLoc += Stats.TotalVarWithLoc + Constants;
- VarTotal += TotalVars + Constants;
- VarUnique += Stats.VarsInFunction.size();
- LLVM_DEBUG(for (auto &V : Stats.VarsInFunction) llvm::dbgs()
+ VarParamWithLoc += Stats.TotalVarWithLoc + Constants;
+ VarParamTotal += TotalVars;
+ VarParamUnique += Stats.VarsInFunction.size();
+ LLVM_DEBUG(for (auto &V
+ : Stats.VarsInFunction) llvm::dbgs()
<< Entry.getKey() << ": " << V.getKey() << "\n");
NumFunctions += Stats.IsFunction;
+ NumFuncsWithSrcLoc += Stats.HasSourceLocation;
NumInlinedFunctions += Stats.IsFunction * Stats.NumFnInlined;
+ NumAbstractOrigins += Stats.IsFunction * Stats.NumAbstractOrigins;
+ ParamTotal += Stats.NumParams;
+ ParamWithType += Stats.NumParamTypes;
+ ParamWithLoc += Stats.NumParamLocations;
+ ParamWithSrcLoc += Stats.NumParamSourceLocations;
+ VarTotal += Stats.NumVars;
+ VarWithType += Stats.NumVarTypes;
+ VarWithLoc += Stats.NumVarLocations;
+ VarWithSrcLoc += Stats.NumVarSourceLocations;
}
// Print summary.
@@ -285,20 +380,31 @@ bool collectStatsForObjectFile(ObjectFile &Obj, DWARFContext &DICtx,
printDatum(OS, "file", Filename.str());
printDatum(OS, "format", FormatName);
printDatum(OS, "source functions", NumFunctions);
+ printDatum(OS, "source functions with location", NumFuncsWithSrcLoc);
printDatum(OS, "inlined functions", NumInlinedFunctions);
- printDatum(OS, "unique source variables", VarUnique);
- printDatum(OS, "source variables", VarTotal);
- printDatum(OS, "variables with location", VarWithLoc);
+ printDatum(OS, "inlined funcs with abstract origins", NumAbstractOrigins);
+ printDatum(OS, "unique source variables", VarParamUnique);
+ printDatum(OS, "source variables", VarParamTotal);
+ printDatum(OS, "variables with location", VarParamWithLoc);
printDatum(OS, "call site entries", GlobalStats.CallSiteEntries);
printDatum(OS, "scope bytes total",
GlobalStats.ScopeBytesFromFirstDefinition);
printDatum(OS, "scope bytes covered", GlobalStats.ScopeBytesCovered);
printDatum(OS, "total function size", GlobalStats.FunctionSize);
printDatum(OS, "total inlined function size", GlobalStats.InlineFunctionSize);
+ printDatum(OS, "total formal params", ParamTotal);
+ printDatum(OS, "formal params with source location", ParamWithSrcLoc);
+ printDatum(OS, "formal params with type", ParamWithType);
+ printDatum(OS, "formal params with binary location", ParamWithLoc);
+ printDatum(OS, "total vars", VarTotal);
+ printDatum(OS, "vars with source location", VarWithSrcLoc);
+ printDatum(OS, "vars with type", VarWithType);
+ printDatum(OS, "vars with binary location", VarWithLoc);
OS << "}\n";
LLVM_DEBUG(
llvm::dbgs() << "Total Availability: "
- << (int)std::round((VarWithLoc * 100.0) / VarTotal) << "%\n";
+ << (int)std::round((VarParamWithLoc * 100.0) / VarParamTotal)
+ << "%\n";
llvm::dbgs() << "PC Ranges covered: "
<< (int)std::round((GlobalStats.ScopeBytesCovered * 100.0) /
GlobalStats.ScopeBytesFromFirstDefinition)
diff --git a/tools/llvm-dwarfdump/llvm-dwarfdump.cpp b/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
index d9e8e36efe5c..05a7aef67ece 100644
--- a/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
+++ b/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
@@ -1,9 +1,8 @@
//===-- llvm-dwarfdump.cpp - Debug info dumping utility for llvm ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -93,8 +92,6 @@ namespace {
using namespace cl;
OptionCategory DwarfDumpCategory("Specific Options");
-static opt<bool> Help("h", desc("Alias for -help"), Hidden,
- cat(DwarfDumpCategory));
static list<std::string>
InputFilenames(Positional, desc("<input object files or .dSYM bundles>"),
ZeroOrMore, cat(DwarfDumpCategory));
@@ -142,10 +139,9 @@ static list<std::string>
"-name option can be used instead."),
value_desc("name"), cat(DwarfDumpCategory));
static alias FindAlias("f", desc("Alias for -find."), aliasopt(Find));
-static opt<bool>
- IgnoreCase("ignore-case",
- desc("Ignore case distinctions in when searching by name."),
- value_desc("i"), cat(DwarfDumpCategory));
+static opt<bool> IgnoreCase("ignore-case",
+ desc("Ignore case distinctions when searching."),
+ value_desc("i"), cat(DwarfDumpCategory));
static alias IgnoreCaseAlias("i", desc("Alias for -ignore-case."),
aliasopt(IgnoreCase));
static list<std::string> Name(
@@ -155,17 +151,17 @@ static list<std::string> Name(
"the -regex option <pattern> is interpreted as a regular expression."),
value_desc("pattern"), cat(DwarfDumpCategory));
static alias NameAlias("n", desc("Alias for -name"), aliasopt(Name));
-static opt<unsigned long long> Lookup("lookup",
+static opt<uint64_t>
+ Lookup("lookup",
desc("Lookup <address> in the debug information and print out any "
"available file, function, block and line table details."),
value_desc("address"), cat(DwarfDumpCategory));
static opt<std::string>
- OutputFilename("out-file", cl::init(""),
+ OutputFilename("o", cl::init("-"),
cl::desc("Redirect output to the specified file."),
- cl::value_desc("filename"));
-static alias OutputFilenameAlias("o", desc("Alias for -out-file."),
- aliasopt(OutputFilename),
- cat(DwarfDumpCategory));
+ cl::value_desc("filename"), cat(DwarfDumpCategory));
+static alias OutputFilenameAlias("out-file", desc("Alias for -o."),
+ aliasopt(OutputFilename));
static opt<bool>
UseRegex("regex",
desc("Treat any <pattern> strings as regular expressions when "
@@ -175,14 +171,14 @@ static alias RegexAlias("x", desc("Alias for -regex"), aliasopt(UseRegex));
static opt<bool>
ShowChildren("show-children",
desc("Show a debug info entry's children when selectively "
- "printing with the =<offset> option."),
+ "printing entries."),
cat(DwarfDumpCategory));
static alias ShowChildrenAlias("c", desc("Alias for -show-children."),
aliasopt(ShowChildren));
static opt<bool>
ShowParents("show-parents",
desc("Show a debug info entry's parents when selectively "
- "printing with the =<offset> option."),
+ "printing entries."),
cat(DwarfDumpCategory));
static alias ShowParentsAlias("p", desc("Alias for -show-parents."),
aliasopt(ShowParents));
@@ -192,13 +188,18 @@ static opt<bool>
cat(DwarfDumpCategory));
static alias ShowFormAlias("F", desc("Alias for -show-form."),
aliasopt(ShowForm), cat(DwarfDumpCategory));
-static opt<unsigned> RecurseDepth(
- "recurse-depth",
- desc("Only recurse to a depth of N when displaying debug info entries."),
- cat(DwarfDumpCategory), init(-1U), value_desc("N"));
-static alias RecurseDepthAlias("r", desc("Alias for -recurse-depth."),
- aliasopt(RecurseDepth));
-
+static opt<unsigned>
+ ChildRecurseDepth("recurse-depth",
+ desc("Only recurse to a depth of N when displaying "
+ "children of debug info entries."),
+ cat(DwarfDumpCategory), init(-1U), value_desc("N"));
+static alias ChildRecurseDepthAlias("r", desc("Alias for -recurse-depth."),
+ aliasopt(ChildRecurseDepth));
+static opt<unsigned>
+ ParentRecurseDepth("parent-recurse-depth",
+ desc("Only recurse to a depth of N when displaying "
+ "parents of debug info entries."),
+ cat(DwarfDumpCategory), init(-1U), value_desc("N"));
static opt<bool>
SummarizeTypes("summarize-types",
desc("Abbreviate the description of type unit entries."),
@@ -219,6 +220,8 @@ static opt<bool> Verbose("verbose",
cat(DwarfDumpCategory));
static alias VerboseAlias("v", desc("Alias for -verbose."), aliasopt(Verbose),
cat(DwarfDumpCategory));
+static cl::extrahelp
+ HelpResponse("\nPass @FILE as argument to read options from FILE.\n");
} // namespace
/// @}
//===----------------------------------------------------------------------===//
@@ -233,7 +236,8 @@ static void error(StringRef Prefix, std::error_code EC) {
static DIDumpOptions getDumpOpts() {
DIDumpOptions DumpOpts;
DumpOpts.DumpType = DumpType;
- DumpOpts.RecurseDepth = RecurseDepth;
+ DumpOpts.ChildRecurseDepth = ChildRecurseDepth;
+ DumpOpts.ParentRecurseDepth = ParentRecurseDepth;
DumpOpts.ShowAddresses = !Diff;
DumpOpts.ShowChildren = ShowChildren;
DumpOpts.ShowParents = ShowParents;
@@ -259,19 +263,16 @@ static bool filterArch(ObjectFile &Obj) {
return true;
if (auto *MachO = dyn_cast<MachOObjectFile>(&Obj)) {
- std::string ObjArch =
- Triple::getArchTypeName(MachO->getArchTriple().getArch());
-
for (auto Arch : ArchFilters) {
- // Match name.
- if (Arch == ObjArch)
- return true;
-
// Match architecture number.
unsigned Value;
if (!StringRef(Arch).getAsInteger(0, Value))
if (Value == getCPUType(*MachO))
return true;
+
+ // Match as name.
+ if (MachO->getArchTriple().getArch() == Triple(Arch).getArch())
+ return true;
}
}
return false;
@@ -380,14 +381,19 @@ static void filterByAccelName(ArrayRef<std::string> Names, DWARFContext &DICtx,
/// Handle the --lookup option and dump the DIEs and line info for the given
/// address.
-static bool lookup(DWARFContext &DICtx, uint64_t Address, raw_ostream &OS) {
+/// TODO: specified Address for --lookup option could relate for several
+/// different sections(in case not-linked object file). llvm-dwarfdump
+/// need to do something with this: extend lookup option with section
+/// information or probably display all matched entries, or something else...
+static bool lookup(ObjectFile &Obj, DWARFContext &DICtx, uint64_t Address,
+ raw_ostream &OS) {
auto DIEsForAddr = DICtx.getDIEsForAddress(Lookup);
if (!DIEsForAddr)
return false;
DIDumpOptions DumpOpts = getDumpOpts();
- DumpOpts.RecurseDepth = 0;
+ DumpOpts.ChildRecurseDepth = 0;
DIEsForAddr.CompileUnit->dump(OS, DumpOpts);
if (DIEsForAddr.FunctionDIE) {
DIEsForAddr.FunctionDIE.dump(OS, 2, DumpOpts);
@@ -395,7 +401,10 @@ static bool lookup(DWARFContext &DICtx, uint64_t Address, raw_ostream &OS) {
DIEsForAddr.BlockDIE.dump(OS, 4, DumpOpts);
}
- if (DILineInfo LineInfo = DICtx.getLineInfoForAddress(Lookup))
+ // TODO: it is neccessary to set proper SectionIndex here.
+ // object::SectionedAddress::UndefSection works for only absolute addresses.
+ if (DILineInfo LineInfo = DICtx.getLineInfoForAddress(
+ {Lookup, object::SectionedAddress::UndefSection}))
LineInfo.dump(OS);
return true;
@@ -414,7 +423,7 @@ static bool dumpObjectFile(ObjectFile &Obj, DWARFContext &DICtx, Twine Filename,
// Handle the --lookup option.
if (Lookup)
- return lookup(DICtx, Lookup, OS);
+ return lookup(Obj, DICtx, Lookup, OS);
// Handle the --name option.
if (!Name.empty()) {
@@ -566,11 +575,6 @@ int main(int argc, char **argv) {
"pretty-print DWARF debug information in object files"
" and debug info archives.\n");
- if (Help) {
- PrintHelpMessage(/*Hidden =*/false, /*Categorized =*/true);
- return 0;
- }
-
// FIXME: Audit interactions between these two options and make them
// compatible.
if (Diff && Verbose) {
@@ -579,17 +583,12 @@ int main(int argc, char **argv) {
return 0;
}
- std::unique_ptr<ToolOutputFile> OutputFile;
- if (!OutputFilename.empty()) {
- std::error_code EC;
- OutputFile = llvm::make_unique<ToolOutputFile>(OutputFilename, EC,
- sys::fs::F_None);
- error("Unable to open output file" + OutputFilename, EC);
- // Don't remove output file if we exit with an error.
- OutputFile->keep();
- }
+ std::error_code EC;
+ ToolOutputFile OutputFile(OutputFilename, EC, sys::fs::OF_None);
+ error("Unable to open output file" + OutputFilename, EC);
+ // Don't remove output file if we exit with an error.
+ OutputFile.keep();
- raw_ostream &OS = OutputFile ? OutputFile->os() : outs();
bool OffsetRequested = false;
// Defaults to dumping all sections, unless brief mode is specified in which
@@ -633,15 +632,15 @@ int main(int argc, char **argv) {
if (Verify) {
// If we encountered errors during verify, exit with a non-zero exit status.
if (!all_of(Objects, [&](std::string Object) {
- return handleFile(Object, verifyObjectFile, OS);
+ return handleFile(Object, verifyObjectFile, OutputFile.os());
}))
- exit(1);
+ return 1;
} else if (Statistics)
for (auto Object : Objects)
- handleFile(Object, collectStatsForObjectFile, OS);
+ handleFile(Object, collectStatsForObjectFile, OutputFile.os());
else
for (auto Object : Objects)
- handleFile(Object, dumpObjectFile, OS);
+ handleFile(Object, dumpObjectFile, OutputFile.os());
return EXIT_SUCCESS;
}
diff --git a/tools/llvm-extract/llvm-extract.cpp b/tools/llvm-extract/llvm-extract.cpp
index 94aaa2f52eb5..300bc0b4bd52 100644
--- a/tools/llvm-extract/llvm-extract.cpp
+++ b/tools/llvm-extract/llvm-extract.cpp
@@ -1,9 +1,8 @@
//===- llvm-extract.cpp - LLVM function extraction utility ----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -34,86 +33,99 @@
#include <memory>
using namespace llvm;
+cl::OptionCategory ExtractCat("llvm-extract Options");
+
// InputFilename - The filename to read from.
-static cl::opt<std::string>
-InputFilename(cl::Positional, cl::desc("<input bitcode file>"),
- cl::init("-"), cl::value_desc("filename"));
+static cl::opt<std::string> InputFilename(cl::Positional,
+ cl::desc("<input bitcode file>"),
+ cl::init("-"),
+ cl::value_desc("filename"));
-static cl::opt<std::string>
-OutputFilename("o", cl::desc("Specify output filename"),
- cl::value_desc("filename"), cl::init("-"));
+static cl::opt<std::string> OutputFilename("o",
+ cl::desc("Specify output filename"),
+ cl::value_desc("filename"),
+ cl::init("-"), cl::cat(ExtractCat));
-static cl::opt<bool>
-Force("f", cl::desc("Enable binary output on terminals"));
+static cl::opt<bool> Force("f", cl::desc("Enable binary output on terminals"),
+ cl::cat(ExtractCat));
-static cl::opt<bool>
-DeleteFn("delete", cl::desc("Delete specified Globals from Module"));
+static cl::opt<bool> DeleteFn("delete",
+ cl::desc("Delete specified Globals from Module"),
+ cl::cat(ExtractCat));
static cl::opt<bool>
- Recursive("recursive",
- cl::desc("Recursively extract all called functions"));
+ Recursive("recursive", cl::desc("Recursively extract all called functions"),
+ cl::cat(ExtractCat));
// ExtractFuncs - The functions to extract from the module.
static cl::list<std::string>
-ExtractFuncs("func", cl::desc("Specify function to extract"),
- cl::ZeroOrMore, cl::value_desc("function"));
+ ExtractFuncs("func", cl::desc("Specify function to extract"),
+ cl::ZeroOrMore, cl::value_desc("function"),
+ cl::cat(ExtractCat));
// ExtractRegExpFuncs - The functions, matched via regular expression, to
// extract from the module.
static cl::list<std::string>
-ExtractRegExpFuncs("rfunc", cl::desc("Specify function(s) to extract using a "
- "regular expression"),
- cl::ZeroOrMore, cl::value_desc("rfunction"));
+ ExtractRegExpFuncs("rfunc",
+ cl::desc("Specify function(s) to extract using a "
+ "regular expression"),
+ cl::ZeroOrMore, cl::value_desc("rfunction"),
+ cl::cat(ExtractCat));
// ExtractBlocks - The blocks to extract from the module.
-static cl::list<std::string>
- ExtractBlocks("bb",
- cl::desc("Specify <function, basic block> pairs to extract"),
- cl::ZeroOrMore, cl::value_desc("function:bb"));
+static cl::list<std::string> ExtractBlocks(
+ "bb", cl::desc("Specify <function, basic block> pairs to extract"),
+ cl::ZeroOrMore, cl::value_desc("function:bb"), cl::cat(ExtractCat));
// ExtractAlias - The alias to extract from the module.
static cl::list<std::string>
-ExtractAliases("alias", cl::desc("Specify alias to extract"),
- cl::ZeroOrMore, cl::value_desc("alias"));
-
+ ExtractAliases("alias", cl::desc("Specify alias to extract"),
+ cl::ZeroOrMore, cl::value_desc("alias"),
+ cl::cat(ExtractCat));
// ExtractRegExpAliases - The aliases, matched via regular expression, to
// extract from the module.
static cl::list<std::string>
-ExtractRegExpAliases("ralias", cl::desc("Specify alias(es) to extract using a "
- "regular expression"),
- cl::ZeroOrMore, cl::value_desc("ralias"));
+ ExtractRegExpAliases("ralias",
+ cl::desc("Specify alias(es) to extract using a "
+ "regular expression"),
+ cl::ZeroOrMore, cl::value_desc("ralias"),
+ cl::cat(ExtractCat));
// ExtractGlobals - The globals to extract from the module.
static cl::list<std::string>
-ExtractGlobals("glob", cl::desc("Specify global to extract"),
- cl::ZeroOrMore, cl::value_desc("global"));
+ ExtractGlobals("glob", cl::desc("Specify global to extract"),
+ cl::ZeroOrMore, cl::value_desc("global"),
+ cl::cat(ExtractCat));
// ExtractRegExpGlobals - The globals, matched via regular expression, to
// extract from the module...
static cl::list<std::string>
-ExtractRegExpGlobals("rglob", cl::desc("Specify global(s) to extract using a "
- "regular expression"),
- cl::ZeroOrMore, cl::value_desc("rglobal"));
+ ExtractRegExpGlobals("rglob",
+ cl::desc("Specify global(s) to extract using a "
+ "regular expression"),
+ cl::ZeroOrMore, cl::value_desc("rglobal"),
+ cl::cat(ExtractCat));
-static cl::opt<bool>
-OutputAssembly("S",
- cl::desc("Write output as LLVM assembly"), cl::Hidden);
+static cl::opt<bool> OutputAssembly("S",
+ cl::desc("Write output as LLVM assembly"),
+ cl::Hidden, cl::cat(ExtractCat));
static cl::opt<bool> PreserveBitcodeUseListOrder(
"preserve-bc-uselistorder",
cl::desc("Preserve use-list order when writing LLVM bitcode."),
- cl::init(true), cl::Hidden);
+ cl::init(true), cl::Hidden, cl::cat(ExtractCat));
static cl::opt<bool> PreserveAssemblyUseListOrder(
"preserve-ll-uselistorder",
cl::desc("Preserve use-list order when writing LLVM assembly."),
- cl::init(false), cl::Hidden);
+ cl::init(false), cl::Hidden, cl::cat(ExtractCat));
int main(int argc, char **argv) {
InitLLVM X(argc, argv);
LLVMContext Context;
+ cl::HideUnrelatedOptions(ExtractCat);
cl::ParseCommandLineOptions(argc, argv, "llvm extractor\n");
// Use lazy loading, since we only care about selected global values.
@@ -230,7 +242,7 @@ int main(int argc, char **argv) {
}
// Figure out which BasicBlocks we should extract.
- SmallVector<BasicBlock *, 4> BBs;
+ SmallVector<SmallVector<BasicBlock *, 16>, 4> GroupOfBBs;
for (StringRef StrPair : ExtractBlocks) {
auto BBInfo = StrPair.split(':');
// Get the function.
@@ -242,17 +254,24 @@ int main(int argc, char **argv) {
}
// Do not materialize this function.
GVs.insert(F);
- // Get the basic block.
- auto Res = llvm::find_if(*F, [&](const BasicBlock &BB) {
- return BB.getName().equals(BBInfo.second);
- });
- if (Res == F->end()) {
- errs() << argv[0] << ": function " << F->getName()
- << " doesn't contain a basic block named '" << BBInfo.second
- << "'!\n";
- return 1;
+ // Get the basic blocks.
+ SmallVector<BasicBlock *, 16> BBs;
+ SmallVector<StringRef, 16> BBNames;
+ BBInfo.second.split(BBNames, ';', /*MaxSplit=*/-1,
+ /*KeepEmpty=*/false);
+ for (StringRef BBName : BBNames) {
+ auto Res = llvm::find_if(*F, [&](const BasicBlock &BB) {
+ return BB.getName().equals(BBName);
+ });
+ if (Res == F->end()) {
+ errs() << argv[0] << ": function " << F->getName()
+ << " doesn't contain a basic block named '" << BBInfo.second
+ << "'!\n";
+ return 1;
+ }
+ BBs.push_back(&*Res);
}
- BBs.push_back(&*Res);
+ GroupOfBBs.push_back(BBs);
}
// Use *argv instead of argv[0] to work around a wrong GCC warning.
@@ -271,10 +290,10 @@ int main(int argc, char **argv) {
ExitOnErr(F->materialize());
for (auto &BB : *F) {
for (auto &I : BB) {
- auto *CI = dyn_cast<CallInst>(&I);
- if (!CI)
+ CallBase *CB = dyn_cast<CallBase>(&I);
+ if (!CB)
continue;
- Function *CF = CI->getCalledFunction();
+ Function *CF = CB->getCalledFunction();
if (!CF)
continue;
if (CF->isDeclaration() || GVs.count(CF))
@@ -317,7 +336,7 @@ int main(int argc, char **argv) {
// functions.
if (!ExtractBlocks.empty()) {
legacy::PassManager PM;
- PM.add(createBlockExtractorPass(BBs, true));
+ PM.add(createBlockExtractorPass(GroupOfBBs, true));
PM.run(*M);
}
diff --git a/tools/llvm-link/llvm-link.cpp b/tools/llvm-link/llvm-link.cpp
index b7a888375b3d..50ba57178d02 100644
--- a/tools/llvm-link/llvm-link.cpp
+++ b/tools/llvm-link/llvm-link.cpp
@@ -1,9 +1,8 @@
//===- llvm-link.cpp - Low-level LLVM linker ------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/tools/llvm-lto/llvm-lto.cpp b/tools/llvm-lto/llvm-lto.cpp
index b6facc919b51..585207b25185 100644
--- a/tools/llvm-lto/llvm-lto.cpp
+++ b/tools/llvm-lto/llvm-lto.cpp
@@ -1,9 +1,8 @@
//===- llvm-lto: a simple command-line program to link modules with LTO ---===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -158,8 +157,8 @@ static cl::opt<int>
ThinLTOCachePruningInterval("thinlto-cache-pruning-interval",
cl::init(1200), cl::desc("Set ThinLTO cache pruning interval."));
-static cl::opt<unsigned long long>
- ThinLTOCacheMaxSizeBytes("thinlto-cache-max-size-bytes",
+static cl::opt<uint64_t> ThinLTOCacheMaxSizeBytes(
+ "thinlto-cache-max-size-bytes",
cl::desc("Set ThinLTO cache pruning directory maximum size in bytes."));
static cl::opt<int>
@@ -205,6 +204,10 @@ static cl::opt<bool> ListSymbolsOnly(
"list-symbols-only", cl::init(false),
cl::desc("Instead of running LTO, list the symbols in each IR file"));
+static cl::opt<bool> ListDependentLibrariesOnly(
+ "list-dependent-libraries-only", cl::init(false),
+ cl::desc("Instead of running LTO, list the dependent libraries in each IR file"));
+
static cl::opt<bool> SetMergedModule(
"set-merged-module", cl::init(false),
cl::desc("Use the first input module as the merged module"));
@@ -373,6 +376,34 @@ static void listSymbols(const TargetOptions &Options) {
}
}
+static std::unique_ptr<MemoryBuffer> loadFile(StringRef Filename) {
+ ExitOnError ExitOnErr("llvm-lto: error loading file '" + Filename.str() +
+ "': ");
+ return ExitOnErr(errorOrToExpected(MemoryBuffer::getFileOrSTDIN(Filename)));
+}
+
+static void listDependentLibraries() {
+ for (auto &Filename : InputFilenames) {
+ auto Buffer = loadFile(Filename);
+ std::string E;
+ std::unique_ptr<lto::InputFile> Input(LTOModule::createInputFile(
+ Buffer->getBufferStart(), Buffer->getBufferSize(), Filename.c_str(),
+ E));
+ if (!Input)
+ error(E);
+
+ // List the dependent libraries.
+ outs() << Filename << ":\n";
+ for (size_t I = 0, C = LTOModule::getDependentLibraryCount(Input.get());
+ I != C; ++I) {
+ size_t L = 0;
+ const char *S = LTOModule::getDependentLibrary(Input.get(), I, &L);
+ assert(S);
+ outs() << StringRef(S, L) << "\n";
+ }
+ }
+}
+
/// Create a combined index file from the input IR files and write it.
///
/// This is meant to enable testing of ThinLTO combined index generation,
@@ -450,22 +481,31 @@ std::unique_ptr<ModuleSummaryIndex> loadCombinedIndex() {
return ExitOnErr(getModuleSummaryIndexForFile(ThinLTOIndex));
}
-static std::unique_ptr<Module> loadModule(StringRef Filename,
- LLVMContext &Ctx) {
- SMDiagnostic Err;
- std::unique_ptr<Module> M(parseIRFile(Filename, Err, Ctx));
- if (!M) {
- Err.print("llvm-lto", errs());
- report_fatal_error("Can't load module for file " + Filename);
- }
- maybeVerifyModule(*M);
+static std::unique_ptr<lto::InputFile> loadInputFile(MemoryBufferRef Buffer) {
+ ExitOnError ExitOnErr("llvm-lto: error loading input '" +
+ Buffer.getBufferIdentifier().str() + "': ");
+ return ExitOnErr(lto::InputFile::create(Buffer));
+}
+static std::unique_ptr<Module> loadModuleFromInput(lto::InputFile &File,
+ LLVMContext &CTX) {
+ auto &Mod = File.getSingleBitcodeModule();
+ auto ModuleOrErr = Mod.parseModule(CTX);
+ if (!ModuleOrErr) {
+ handleAllErrors(ModuleOrErr.takeError(), [&](ErrorInfoBase &EIB) {
+ SMDiagnostic Err = SMDiagnostic(Mod.getModuleIdentifier(),
+ SourceMgr::DK_Error, EIB.message());
+ Err.print("llvm-lto", errs());
+ });
+ report_fatal_error("Can't load module, abort.");
+ }
+ maybeVerifyModule(**ModuleOrErr);
if (ThinLTOModuleId.getNumOccurrences()) {
if (InputFilenames.size() != 1)
report_fatal_error("Can't override the module id for multiple files");
- M->setModuleIdentifier(ThinLTOModuleId);
+ (*ModuleOrErr)->setModuleIdentifier(ThinLTOModuleId);
}
- return M;
+ return std::move(*ModuleOrErr);
}
static void writeModuleToFile(Module &TheModule, StringRef Filename) {
@@ -563,13 +603,15 @@ private:
auto Index = loadCombinedIndex();
for (auto &Filename : InputFilenames) {
LLVMContext Ctx;
- auto TheModule = loadModule(Filename, Ctx);
+ auto Buffer = loadFile(Filename);
+ auto Input = loadInputFile(Buffer->getMemBufferRef());
+ auto TheModule = loadModuleFromInput(*Input, Ctx);
// Build a map of module to the GUIDs and summary objects that should
// be written to its index.
std::map<std::string, GVSummaryMapTy> ModuleToSummariesForIndex;
- ThinGenerator.gatherImportedSummariesForModule(*TheModule, *Index,
- ModuleToSummariesForIndex);
+ ThinGenerator.gatherImportedSummariesForModule(
+ *TheModule, *Index, ModuleToSummariesForIndex, *Input);
std::string OutputName = OutputFilename;
if (OutputName.empty()) {
@@ -598,13 +640,16 @@ private:
auto Index = loadCombinedIndex();
for (auto &Filename : InputFilenames) {
LLVMContext Ctx;
- auto TheModule = loadModule(Filename, Ctx);
+ auto Buffer = loadFile(Filename);
+ auto Input = loadInputFile(Buffer->getMemBufferRef());
+ auto TheModule = loadModuleFromInput(*Input, Ctx);
std::string OutputName = OutputFilename;
if (OutputName.empty()) {
OutputName = Filename + ".imports";
}
- OutputName = getThinLTOOutputFile(OutputName, OldPrefix, NewPrefix);
- ThinGenerator.emitImports(*TheModule, OutputName, *Index);
+ OutputName =
+ getThinLTOOutputFile(OutputName, OldPrefix, NewPrefix);
+ ThinGenerator.emitImports(*TheModule, OutputName, *Index, *Input);
}
}
@@ -622,9 +667,11 @@ private:
auto Index = loadCombinedIndex();
for (auto &Filename : InputFilenames) {
LLVMContext Ctx;
- auto TheModule = loadModule(Filename, Ctx);
+ auto Buffer = loadFile(Filename);
+ auto Input = loadInputFile(Buffer->getMemBufferRef());
+ auto TheModule = loadModuleFromInput(*Input, Ctx);
- ThinGenerator.promote(*TheModule, *Index);
+ ThinGenerator.promote(*TheModule, *Index, *Input);
std::string OutputName = OutputFilename;
if (OutputName.empty()) {
@@ -653,9 +700,11 @@ private:
for (auto &Filename : InputFilenames) {
LLVMContext Ctx;
- auto TheModule = loadModule(Filename, Ctx);
+ auto Buffer = loadFile(Filename);
+ auto Input = loadInputFile(Buffer->getMemBufferRef());
+ auto TheModule = loadModuleFromInput(*Input, Ctx);
- ThinGenerator.crossModuleImport(*TheModule, *Index);
+ ThinGenerator.crossModuleImport(*TheModule, *Index, *Input);
std::string OutputName = OutputFilename;
if (OutputName.empty()) {
@@ -684,9 +733,11 @@ private:
for (auto &Filename : InputFilenames) {
LLVMContext Ctx;
- auto TheModule = loadModule(Filename, Ctx);
+ auto Buffer = loadFile(Filename);
+ auto Input = loadInputFile(Buffer->getMemBufferRef());
+ auto TheModule = loadModuleFromInput(*Input, Ctx);
- ThinGenerator.internalize(*TheModule, *Index);
+ ThinGenerator.internalize(*TheModule, *Index, *Input);
std::string OutputName = OutputFilename;
if (OutputName.empty()) {
@@ -707,7 +758,9 @@ private:
for (auto &Filename : InputFilenames) {
LLVMContext Ctx;
- auto TheModule = loadModule(Filename, Ctx);
+ auto Buffer = loadFile(Filename);
+ auto Input = loadInputFile(Buffer->getMemBufferRef());
+ auto TheModule = loadModuleFromInput(*Input, Ctx);
ThinGenerator.optimize(*TheModule);
@@ -827,6 +880,11 @@ int main(int argc, char **argv) {
return 0;
}
+ if (ListDependentLibrariesOnly) {
+ listDependentLibraries();
+ return 0;
+ }
+
if (IndexStats) {
printIndexStats();
return 0;
diff --git a/tools/llvm-lto2/llvm-lto2.cpp b/tools/llvm-lto2/llvm-lto2.cpp
index 26426367e252..0bd9289dc938 100644
--- a/tools/llvm-lto2/llvm-lto2.cpp
+++ b/tools/llvm-lto2/llvm-lto2.cpp
@@ -1,9 +1,8 @@
//===-- llvm-lto2: test harness for the resolution-based LTO interface ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -92,19 +91,40 @@ static cl::opt<std::string> DefaultTriple(
cl::desc(
"Replace unspecified target triples in input files with this triple"));
+static cl::opt<bool> RemarksWithHotness(
+ "pass-remarks-with-hotness",
+ cl::desc("With PGO, include profile count in optimization remarks"),
+ cl::Hidden);
+
static cl::opt<std::string>
- OptRemarksOutput("pass-remarks-output",
- cl::desc("YAML output file for optimization remarks"));
+ RemarksFilename("pass-remarks-output",
+ cl::desc("Output filename for pass remarks"),
+ cl::value_desc("filename"));
-static cl::opt<bool> OptRemarksWithHotness(
- "pass-remarks-with-hotness",
- cl::desc("Whether to include hotness informations in the remarks.\n"
- "Has effect only if -pass-remarks-output is specified."));
+static cl::opt<std::string>
+ RemarksPasses("pass-remarks-filter",
+ cl::desc("Only record optimization remarks from passes whose "
+ "names match the given regular expression"),
+ cl::value_desc("regex"));
+
+static cl::opt<std::string> RemarksFormat(
+ "pass-remarks-format",
+ cl::desc("The format used for serializing remarks (default: YAML)"),
+ cl::value_desc("format"), cl::init("yaml"));
static cl::opt<std::string>
SamplePGOFile("lto-sample-profile-file",
cl::desc("Specify a SamplePGO profile file"));
+static cl::opt<std::string>
+ CSPGOFile("lto-cspgo-profile-file",
+ cl::desc("Specify a context sensitive PGO profile file"));
+
+static cl::opt<bool>
+ RunCSIRInstr("lto-cspgo-gen",
+ cl::desc("Run PGO context sensitive IR instrumentation"),
+ cl::init(false), cl::Hidden);
+
static cl::opt<bool>
UseNewPM("use-new-pm",
cl::desc("Run LTO passes using the new pass manager"),
@@ -211,10 +231,14 @@ static int run(int argc, char **argv) {
"Config::addSaveTemps failed");
// Optimization remarks.
- Conf.RemarksFilename = OptRemarksOutput;
- Conf.RemarksWithHotness = OptRemarksWithHotness;
+ Conf.RemarksFilename = RemarksFilename;
+ Conf.RemarksPasses = RemarksPasses;
+ Conf.RemarksWithHotness = RemarksWithHotness;
+ Conf.RemarksFormat = RemarksFormat;
Conf.SampleProfile = SamplePGOFile;
+ Conf.CSIRProfile = CSPGOFile;
+ Conf.RunCSIRInstr = RunCSIRInstr;
// Run a custom pipeline, if asked for.
Conf.OptPipeline = OptPipeline;
@@ -343,6 +367,13 @@ static int dumpSymtab(int argc, char **argv) {
if (TT.isOSBinFormatCOFF())
outs() << "linker opts: " << Input->getCOFFLinkerOpts() << '\n';
+ if (TT.isOSBinFormatELF()) {
+ outs() << "dependent libraries:";
+ for (auto L : Input->getDependentLibraries())
+ outs() << " \"" << L << "\"";
+ outs() << '\n';
+ }
+
std::vector<StringRef> ComdatTable = Input->getComdatTable();
for (const InputFile::Symbol &Sym : Input->symbols()) {
switch (Sym.getVisibility()) {
diff --git a/tools/llvm-mc/Disassembler.cpp b/tools/llvm-mc/Disassembler.cpp
index acc5a5f4cab2..e2af2e7f2e32 100644
--- a/tools/llvm-mc/Disassembler.cpp
+++ b/tools/llvm-mc/Disassembler.cpp
@@ -1,9 +1,8 @@
//===- Disassembler.cpp - Disassembler for hex strings --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/tools/llvm-mc/Disassembler.h b/tools/llvm-mc/Disassembler.h
index 1f18ac075f85..11b685233abc 100644
--- a/tools/llvm-mc/Disassembler.h
+++ b/tools/llvm-mc/Disassembler.h
@@ -1,9 +1,8 @@
//===- Disassembler.h - Text File Disassembler ----------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/tools/llvm-mc/llvm-mc.cpp b/tools/llvm-mc/llvm-mc.cpp
index c0976502f545..ec189c297860 100644
--- a/tools/llvm-mc/llvm-mc.cpp
+++ b/tools/llvm-mc/llvm-mc.cpp
@@ -1,9 +1,8 @@
//===-- llvm-mc.cpp - Machine Code Hacking Driver ---------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -402,18 +401,8 @@ int main(int argc, char **argv) {
}
if (!MainFileName.empty())
Ctx.setMainFileName(MainFileName);
- if (GenDwarfForAssembly && DwarfVersion >= 5) {
- // DWARF v5 needs the root file as well as the compilation directory.
- // If we find a '.file 0' directive that will supersede these values.
- MD5 Hash;
- MD5::MD5Result *Cksum =
- (MD5::MD5Result *)Ctx.allocate(sizeof(MD5::MD5Result), 1);
- Hash.update(Buffer->getBuffer());
- Hash.final(*Cksum);
- Ctx.setMCLineTableRootFile(
- /*CUID=*/0, Ctx.getCompilationDir(),
- !MainFileName.empty() ? MainFileName : InputFilename, Cksum, None);
- }
+ if (GenDwarfForAssembly)
+ Ctx.setGenDwarfRootFile(InputFilename, Buffer->getBuffer());
// Package up features to be passed to target/subtarget
std::string FeaturesStr;
diff --git a/tools/llvm-mca/CodeRegion.cpp b/tools/llvm-mca/CodeRegion.cpp
index 29a27c50c171..bf592f67245e 100644
--- a/tools/llvm-mca/CodeRegion.cpp
+++ b/tools/llvm-mca/CodeRegion.cpp
@@ -1,9 +1,8 @@
//===-------------------------- CodeRegion.cpp -----------------*- C++ -* -===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -17,7 +16,12 @@
namespace llvm {
namespace mca {
-bool CodeRegion::isLocInRange(llvm::SMLoc Loc) const {
+CodeRegions::CodeRegions(llvm::SourceMgr &S) : SM(S), FoundErrors(false) {
+ // Create a default region for the input code sequence.
+ Regions.emplace_back(make_unique<CodeRegion>("", SMLoc()));
+}
+
+bool CodeRegion::isLocInRange(SMLoc Loc) const {
if (RangeEnd.isValid() && Loc.getPointer() > RangeEnd.getPointer())
return false;
if (RangeStart.isValid() && Loc.getPointer() < RangeStart.getPointer())
@@ -25,42 +29,88 @@ bool CodeRegion::isLocInRange(llvm::SMLoc Loc) const {
return true;
}
-void CodeRegions::beginRegion(llvm::StringRef Description, llvm::SMLoc Loc) {
- assert(!Regions.empty() && "Missing Default region");
- const CodeRegion &CurrentRegion = *Regions.back();
- if (CurrentRegion.startLoc().isValid() && !CurrentRegion.endLoc().isValid()) {
- SM.PrintMessage(Loc, llvm::SourceMgr::DK_Warning,
- "Ignoring invalid region start");
- return;
+void CodeRegions::beginRegion(StringRef Description, SMLoc Loc) {
+ if (ActiveRegions.empty()) {
+ // Remove the default region if there is at least one user defined region.
+ // By construction, only the default region has an invalid start location.
+ if (Regions.size() == 1 && !Regions[0]->startLoc().isValid() &&
+ !Regions[0]->endLoc().isValid()) {
+ ActiveRegions[Description] = 0;
+ Regions[0] = make_unique<CodeRegion>(Description, Loc);
+ return;
+ }
+ } else {
+ auto It = ActiveRegions.find(Description);
+ if (It != ActiveRegions.end()) {
+ const CodeRegion &R = *Regions[It->second];
+ if (Description.empty()) {
+ SM.PrintMessage(Loc, SourceMgr::DK_Error,
+ "found multiple overlapping anonymous regions");
+ SM.PrintMessage(R.startLoc(), SourceMgr::DK_Note,
+ "Previous anonymous region was defined here");
+ FoundErrors = true;
+ return;
+ }
+
+ SM.PrintMessage(Loc, SourceMgr::DK_Error,
+ "overlapping regions cannot have the same name");
+ SM.PrintMessage(R.startLoc(), SourceMgr::DK_Note,
+ "region " + Description + " was previously defined here");
+ FoundErrors = true;
+ return;
+ }
}
- // Remove the default region if there are user defined regions.
- if (!CurrentRegion.startLoc().isValid())
- Regions.erase(Regions.begin());
- addRegion(Description, Loc);
+ ActiveRegions[Description] = Regions.size();
+ Regions.emplace_back(make_unique<CodeRegion>(Description, Loc));
+ return;
}
-void CodeRegions::endRegion(llvm::SMLoc Loc) {
- assert(!Regions.empty() && "Missing Default region");
- CodeRegion &CurrentRegion = *Regions.back();
- if (CurrentRegion.endLoc().isValid()) {
- SM.PrintMessage(Loc, llvm::SourceMgr::DK_Warning,
- "Ignoring invalid region end");
+void CodeRegions::endRegion(StringRef Description, SMLoc Loc) {
+ if (Description.empty()) {
+ // Special case where there is only one user defined region,
+ // and this LLVM-MCA-END directive doesn't provide a region name.
+ // In this case, we assume that the user simply wanted to just terminate
+ // the only active region.
+ if (ActiveRegions.size() == 1) {
+ auto It = ActiveRegions.begin();
+ Regions[It->second]->setEndLocation(Loc);
+ ActiveRegions.erase(It);
+ return;
+ }
+
+ // Special case where the region end marker applies to the default region.
+ if (ActiveRegions.empty() && Regions.size() == 1 &&
+ !Regions[0]->startLoc().isValid() && !Regions[0]->endLoc().isValid()) {
+ Regions[0]->setEndLocation(Loc);
+ return;
+ }
+ }
+
+ auto It = ActiveRegions.find(Description);
+ if (It != ActiveRegions.end()) {
+ Regions[It->second]->setEndLocation(Loc);
+ ActiveRegions.erase(It);
return;
}
- CurrentRegion.setEndLocation(Loc);
+ FoundErrors = true;
+ SM.PrintMessage(Loc, SourceMgr::DK_Error,
+ "found an invalid region end directive");
+ if (!Description.empty()) {
+ SM.PrintMessage(Loc, SourceMgr::DK_Note,
+ "unable to find an active region named " + Description);
+ } else {
+ SM.PrintMessage(Loc, SourceMgr::DK_Note,
+ "unable to find an active anonymous region");
+ }
}
-void CodeRegions::addInstruction(const llvm::MCInst &Instruction) {
- const llvm::SMLoc &Loc = Instruction.getLoc();
- const auto It =
- std::find_if(Regions.rbegin(), Regions.rend(),
- [Loc](const std::unique_ptr<CodeRegion> &Region) {
- return Region->isLocInRange(Loc);
- });
- if (It != Regions.rend())
- (*It)->addInstruction(Instruction);
+void CodeRegions::addInstruction(const MCInst &Instruction) {
+ SMLoc Loc = Instruction.getLoc();
+ for (UniqueCodeRegion &Region : Regions)
+ if (Region->isLocInRange(Loc))
+ Region->addInstruction(Instruction);
}
} // namespace mca
diff --git a/tools/llvm-mca/CodeRegion.h b/tools/llvm-mca/CodeRegion.h
index 867aa18bb4fe..cabb4a5d4484 100644
--- a/tools/llvm-mca/CodeRegion.h
+++ b/tools/llvm-mca/CodeRegion.h
@@ -1,9 +1,8 @@
//===-------------------------- CodeRegion.h -------------------*- C++ -* -===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -35,6 +34,7 @@
#define LLVM_TOOLS_LLVM_MCA_CODEREGION_H
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCInst.h"
#include "llvm/Support/SMLoc.h"
@@ -51,7 +51,7 @@ class CodeRegion {
// An optional descriptor for this region.
llvm::StringRef Description;
// Instructions that form this region.
- std::vector<llvm::MCInst> Instructions;
+ llvm::SmallVector<llvm::MCInst, 8> Instructions;
// Source location range.
llvm::SMLoc RangeStart;
llvm::SMLoc RangeEnd;
@@ -79,24 +79,25 @@ public:
llvm::StringRef getDescription() const { return Description; }
};
+class CodeRegionParseError final : public Error {};
+
class CodeRegions {
// A source manager. Used by the tool to generate meaningful warnings.
llvm::SourceMgr &SM;
- std::vector<std::unique_ptr<CodeRegion>> Regions;
-
- // Construct a new region of code guarded by LLVM-MCA comments.
- void addRegion(llvm::StringRef Description, llvm::SMLoc Loc) {
- Regions.emplace_back(llvm::make_unique<CodeRegion>(Description, Loc));
- }
+ using UniqueCodeRegion = std::unique_ptr<CodeRegion>;
+ std::vector<UniqueCodeRegion> Regions;
+ llvm::StringMap<unsigned> ActiveRegions;
+ bool FoundErrors;
CodeRegions(const CodeRegions &) = delete;
CodeRegions &operator=(const CodeRegions &) = delete;
public:
- typedef std::vector<std::unique_ptr<CodeRegion>>::iterator iterator;
- typedef std::vector<std::unique_ptr<CodeRegion>>::const_iterator
- const_iterator;
+ CodeRegions(llvm::SourceMgr &S);
+
+ typedef std::vector<UniqueCodeRegion>::iterator iterator;
+ typedef std::vector<UniqueCodeRegion>::const_iterator const_iterator;
iterator begin() { return Regions.begin(); }
iterator end() { return Regions.end(); }
@@ -104,24 +105,21 @@ public:
const_iterator end() const { return Regions.cend(); }
void beginRegion(llvm::StringRef Description, llvm::SMLoc Loc);
- void endRegion(llvm::SMLoc Loc);
+ void endRegion(llvm::StringRef Description, llvm::SMLoc Loc);
void addInstruction(const llvm::MCInst &Instruction);
llvm::SourceMgr &getSourceMgr() const { return SM; }
- CodeRegions(llvm::SourceMgr &S) : SM(S) {
- // Create a default region for the input code sequence.
- addRegion("Default", llvm::SMLoc());
- }
-
llvm::ArrayRef<llvm::MCInst> getInstructionSequence(unsigned Idx) const {
return Regions[Idx]->getInstructions();
}
bool empty() const {
- return llvm::all_of(Regions, [](const std::unique_ptr<CodeRegion> &Region) {
+ return llvm::all_of(Regions, [](const UniqueCodeRegion &Region) {
return Region->empty();
});
}
+
+ bool isValid() const { return !FoundErrors; }
};
} // namespace mca
diff --git a/tools/llvm-mca/CodeRegionGenerator.cpp b/tools/llvm-mca/CodeRegionGenerator.cpp
index 5bd37adeeae9..c793169e64e0 100644
--- a/tools/llvm-mca/CodeRegionGenerator.cpp
+++ b/tools/llvm-mca/CodeRegionGenerator.cpp
@@ -1,9 +1,8 @@
//===----------------------- CodeRegionGenerator.cpp ------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -49,8 +48,7 @@ public:
// We only want to intercept the emission of new instructions.
virtual void EmitInstruction(const MCInst &Inst,
- const MCSubtargetInfo & /* unused */,
- bool /* unused */) override {
+ const MCSubtargetInfo &/* unused */) override {
Regions.addInstruction(Inst);
}
@@ -88,7 +86,11 @@ void MCACommentConsumer::HandleComment(SMLoc Loc, StringRef CommentText) {
Comment = Comment.drop_front(Position);
if (Comment.consume_front("LLVM-MCA-END")) {
- Regions.endRegion(Loc);
+ // Skip spaces and tabs.
+ Position = Comment.find_first_not_of(" \t");
+ if (Position < Comment.size())
+ Comment = Comment.drop_front(Position);
+ Regions.endRegion(Comment, Loc);
return;
}
@@ -117,7 +119,6 @@ Expected<const CodeRegions &> AsmCodeRegionGenerator::parseCodeRegions() {
MCACommentConsumer CC(Regions);
Lexer.setCommentConsumer(&CC);
- // Create a target-specific parser and perform the parse.
std::unique_ptr<MCTargetAsmParser> TAP(
TheTarget.createMCAsmParser(STI, *Parser, MCII, Opts));
if (!TAP)
@@ -127,7 +128,7 @@ Expected<const CodeRegions &> AsmCodeRegionGenerator::parseCodeRegions() {
Parser->setTargetParser(*TAP);
Parser->Run(false);
- // Get the assembler dialect from the input. llvm-mca will use this as the
+ // Set the assembler dialect from the input. llvm-mca will use this as the
// default dialect when printing reports.
AssemblerDialect = Parser->getAssemblerDialect();
return Regions;
diff --git a/tools/llvm-mca/CodeRegionGenerator.h b/tools/llvm-mca/CodeRegionGenerator.h
index 892cafb92686..9a10aa2c148b 100644
--- a/tools/llvm-mca/CodeRegionGenerator.h
+++ b/tools/llvm-mca/CodeRegionGenerator.h
@@ -1,9 +1,8 @@
//===----------------------- CodeRegionGenerator.h --------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/tools/llvm-mca/PipelinePrinter.cpp b/tools/llvm-mca/PipelinePrinter.cpp
index 18ef45fc2a65..90d468075996 100644
--- a/tools/llvm-mca/PipelinePrinter.cpp
+++ b/tools/llvm-mca/PipelinePrinter.cpp
@@ -1,9 +1,8 @@
//===--------------------- PipelinePrinter.cpp ------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/tools/llvm-mca/PipelinePrinter.h b/tools/llvm-mca/PipelinePrinter.h
index 456026e12df3..004309cd7b8e 100644
--- a/tools/llvm-mca/PipelinePrinter.h
+++ b/tools/llvm-mca/PipelinePrinter.h
@@ -1,9 +1,8 @@
//===--------------------- PipelinePrinter.h --------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/tools/llvm-mca/Views/BottleneckAnalysis.cpp b/tools/llvm-mca/Views/BottleneckAnalysis.cpp
new file mode 100644
index 000000000000..560c6c6e8a33
--- /dev/null
+++ b/tools/llvm-mca/Views/BottleneckAnalysis.cpp
@@ -0,0 +1,624 @@
+//===--------------------- BottleneckAnalysis.cpp ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements the functionalities used by the BottleneckAnalysis
+/// to report bottleneck info.
+///
+//===----------------------------------------------------------------------===//
+
+#include "Views/BottleneckAnalysis.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MCA/Support.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/FormattedStream.h"
+
+namespace llvm {
+namespace mca {
+
+#define DEBUG_TYPE "llvm-mca"
+
+PressureTracker::PressureTracker(const MCSchedModel &Model)
+ : SM(Model),
+ ResourcePressureDistribution(Model.getNumProcResourceKinds(), 0),
+ ProcResID2Mask(Model.getNumProcResourceKinds(), 0),
+ ResIdx2ProcResID(Model.getNumProcResourceKinds(), 0),
+ ProcResID2ResourceUsersIndex(Model.getNumProcResourceKinds(), 0) {
+ computeProcResourceMasks(SM, ProcResID2Mask);
+
+ // Ignore the invalid resource at index zero.
+ unsigned NextResourceUsersIdx = 0;
+ for (unsigned I = 1, E = Model.getNumProcResourceKinds(); I < E; ++I) {
+ const MCProcResourceDesc &ProcResource = *SM.getProcResource(I);
+ ProcResID2ResourceUsersIndex[I] = NextResourceUsersIdx;
+ NextResourceUsersIdx += ProcResource.NumUnits;
+ uint64_t ResourceMask = ProcResID2Mask[I];
+ ResIdx2ProcResID[getResourceStateIndex(ResourceMask)] = I;
+ }
+
+ ResourceUsers.resize(NextResourceUsersIdx);
+ std::fill(ResourceUsers.begin(), ResourceUsers.end(),
+ std::make_pair<unsigned, unsigned>(~0U, 0U));
+}
+
+void PressureTracker::getResourceUsers(uint64_t ResourceMask,
+ SmallVectorImpl<User> &Users) const {
+ unsigned Index = getResourceStateIndex(ResourceMask);
+ unsigned ProcResID = ResIdx2ProcResID[Index];
+ const MCProcResourceDesc &PRDesc = *SM.getProcResource(ProcResID);
+ for (unsigned I = 0, E = PRDesc.NumUnits; I < E; ++I) {
+ const User U = getResourceUser(ProcResID, I);
+ if (U.second && IPI.find(U.first) != IPI.end())
+ Users.emplace_back(U);
+ }
+}
+
+void PressureTracker::onInstructionDispatched(unsigned IID) {
+ IPI.insert(std::make_pair(IID, InstructionPressureInfo()));
+}
+
+void PressureTracker::onInstructionExecuted(unsigned IID) { IPI.erase(IID); }
+
+void PressureTracker::handleInstructionIssuedEvent(
+ const HWInstructionIssuedEvent &Event) {
+ unsigned IID = Event.IR.getSourceIndex();
+ using ResourceRef = HWInstructionIssuedEvent::ResourceRef;
+ using ResourceUse = std::pair<ResourceRef, ResourceCycles>;
+ for (const ResourceUse &Use : Event.UsedResources) {
+ const ResourceRef &RR = Use.first;
+ unsigned Index = ProcResID2ResourceUsersIndex[RR.first];
+ Index += countTrailingZeros(RR.second);
+ ResourceUsers[Index] = std::make_pair(IID, Use.second.getNumerator());
+ }
+}
+
+void PressureTracker::updateResourcePressureDistribution(
+ uint64_t CumulativeMask) {
+ while (CumulativeMask) {
+ uint64_t Current = CumulativeMask & (-CumulativeMask);
+ unsigned ResIdx = getResourceStateIndex(Current);
+ unsigned ProcResID = ResIdx2ProcResID[ResIdx];
+ uint64_t Mask = ProcResID2Mask[ProcResID];
+
+ if (Mask == Current) {
+ ResourcePressureDistribution[ProcResID]++;
+ CumulativeMask ^= Current;
+ continue;
+ }
+
+ Mask ^= Current;
+ while (Mask) {
+ uint64_t SubUnit = Mask & (-Mask);
+ ResIdx = getResourceStateIndex(SubUnit);
+ ProcResID = ResIdx2ProcResID[ResIdx];
+ ResourcePressureDistribution[ProcResID]++;
+ Mask ^= SubUnit;
+ }
+
+ CumulativeMask ^= Current;
+ }
+}
+
+void PressureTracker::handlePressureEvent(const HWPressureEvent &Event) {
+ assert(Event.Reason != HWPressureEvent::INVALID &&
+ "Unexpected invalid event!");
+
+ switch (Event.Reason) {
+ default:
+ break;
+
+ case HWPressureEvent::RESOURCES: {
+ const uint64_t ResourceMask = Event.ResourceMask;
+ updateResourcePressureDistribution(Event.ResourceMask);
+
+ for (const InstRef &IR : Event.AffectedInstructions) {
+ const Instruction &IS = *IR.getInstruction();
+ unsigned BusyResources = IS.getCriticalResourceMask() & ResourceMask;
+ if (!BusyResources)
+ continue;
+
+ unsigned IID = IR.getSourceIndex();
+ IPI[IID].ResourcePressureCycles++;
+ }
+ break;
+ }
+
+ case HWPressureEvent::REGISTER_DEPS:
+ for (const InstRef &IR : Event.AffectedInstructions) {
+ unsigned IID = IR.getSourceIndex();
+ IPI[IID].RegisterPressureCycles++;
+ }
+ break;
+
+ case HWPressureEvent::MEMORY_DEPS:
+ for (const InstRef &IR : Event.AffectedInstructions) {
+ unsigned IID = IR.getSourceIndex();
+ IPI[IID].MemoryPressureCycles++;
+ }
+ }
+}
+
+#ifndef NDEBUG
+void DependencyGraph::dumpDependencyEdge(raw_ostream &OS,
+ const DependencyEdge &DepEdge,
+ MCInstPrinter &MCIP) const {
+ unsigned FromIID = DepEdge.FromIID;
+ unsigned ToIID = DepEdge.ToIID;
+ assert(FromIID < ToIID && "Graph should be acyclic!");
+
+ const DependencyEdge::Dependency &DE = DepEdge.Dep;
+ assert(DE.Type != DependencyEdge::DT_INVALID && "Unexpected invalid edge!");
+
+ OS << " FROM: " << FromIID << " TO: " << ToIID << " ";
+ if (DE.Type == DependencyEdge::DT_REGISTER) {
+ OS << " - REGISTER: ";
+ MCIP.printRegName(OS, DE.ResourceOrRegID);
+ } else if (DE.Type == DependencyEdge::DT_MEMORY) {
+ OS << " - MEMORY";
+ } else {
+ assert(DE.Type == DependencyEdge::DT_RESOURCE &&
+ "Unsupported dependency type!");
+ OS << " - RESOURCE MASK: " << DE.ResourceOrRegID;
+ }
+ OS << " - CYCLES: " << DE.Cost << '\n';
+}
+#endif // NDEBUG
+
+void DependencyGraph::initializeRootSet(
+ SmallVectorImpl<unsigned> &RootSet) const {
+ for (unsigned I = 0, E = Nodes.size(); I < E; ++I) {
+ const DGNode &N = Nodes[I];
+ if (N.NumPredecessors == 0 && !N.OutgoingEdges.empty())
+ RootSet.emplace_back(I);
+ }
+}
+
+void DependencyGraph::propagateThroughEdges(
+ SmallVectorImpl<unsigned> &RootSet) {
+ SmallVector<unsigned, 8> ToVisit;
+
+ // A critical sequence is computed as the longest path from a node of the
+ // RootSet to a leaf node (i.e. a node with no successors). The RootSet is
+ // composed of nodes with at least one successor, and no predecessors.
+ //
+ // Each node of the graph starts with an initial default cost of zero. The
+ // cost of a node is a measure of criticality: the higher the cost, the bigger
+ // is the performance impact.
+ //
+ // This algorithm is very similar to a (reverse) Dijkstra. Every iteration of
+ // the inner loop selects (i.e. visits) a node N from a set of `unvisited
+ // nodes`, and then propagates the cost of N to all its neighbors.
+ //
+ // The `unvisited nodes` set initially contains all the nodes from the
+ // RootSet. A node N is added to the `unvisited nodes` if all its
+ // predecessors have been visited already.
+ //
+ // For simplicity, every node tracks the number of unvisited incoming edges in
+ // field `NumVisitedPredecessors`. When the value of that field drops to
+ // zero, then the corresponding node is added to a `ToVisit` set.
+ //
+ // At the end of every iteration of the outer loop, set `ToVisit` becomes our
+ // new `unvisited nodes` set.
+ //
+ // The algorithm terminates when the set of unvisited nodes (i.e. our RootSet)
+ // is empty. This algorithm works under the assumption that the graph is
+ // acyclic.
+ do {
+ for (unsigned IID : RootSet) {
+ const DGNode &N = Nodes[IID];
+ for (const DependencyEdge &DepEdge : N.OutgoingEdges) {
+ unsigned ToIID = DepEdge.ToIID;
+ DGNode &To = Nodes[ToIID];
+ uint64_t Cost = N.Cost + DepEdge.Dep.Cost;
+ // Check if this is the most expensive incoming edge seen so far. In
+ // case, update the total cost of the destination node (ToIID), as well
+ // its field `CriticalPredecessor`.
+ if (Cost > To.Cost) {
+ To.CriticalPredecessor = DepEdge;
+ To.Cost = Cost;
+ To.Depth = N.Depth + 1;
+ }
+ To.NumVisitedPredecessors++;
+ if (To.NumVisitedPredecessors == To.NumPredecessors)
+ ToVisit.emplace_back(ToIID);
+ }
+ }
+
+ std::swap(RootSet, ToVisit);
+ ToVisit.clear();
+ } while (!RootSet.empty());
+}
+
+void DependencyGraph::getCriticalSequence(
+ SmallVectorImpl<const DependencyEdge *> &Seq) const {
+ // At this stage, nodes of the graph have been already visited, and costs have
+ // been propagated through the edges (see method `propagateThroughEdges()`).
+
+ // Identify the node N with the highest cost in the graph. By construction,
+ // that node is the last instruction of our critical sequence.
+ // Field N.Depth would tell us the total length of the sequence.
+ //
+ // To obtain the sequence of critical edges, we simply follow the chain of critical
+ // predecessors starting from node N (field DGNode::CriticalPredecessor).
+ const auto It = std::max_element(
+ Nodes.begin(), Nodes.end(),
+ [](const DGNode &Lhs, const DGNode &Rhs) { return Lhs.Cost < Rhs.Cost; });
+ unsigned IID = std::distance(Nodes.begin(), It);
+ Seq.resize(Nodes[IID].Depth);
+ for (unsigned I = Seq.size(), E = 0; I > E; --I) {
+ const DGNode &N = Nodes[IID];
+ Seq[I - 1] = &N.CriticalPredecessor;
+ IID = N.CriticalPredecessor.FromIID;
+ }
+}
+
+static void printInstruction(formatted_raw_ostream &FOS,
+ const MCSubtargetInfo &STI, MCInstPrinter &MCIP,
+ const MCInst &MCI,
+ bool UseDifferentColor = false) {
+ std::string Instruction;
+ raw_string_ostream InstrStream(Instruction);
+
+ FOS.PadToColumn(14);
+
+ MCIP.printInst(&MCI, InstrStream, "", STI);
+ InstrStream.flush();
+
+ if (UseDifferentColor)
+ FOS.changeColor(raw_ostream::CYAN, true, false);
+ FOS << StringRef(Instruction).ltrim();
+ if (UseDifferentColor)
+ FOS.resetColor();
+}
+
+void BottleneckAnalysis::printCriticalSequence(raw_ostream &OS) const {
+ SmallVector<const DependencyEdge *, 16> Seq;
+ DG.getCriticalSequence(Seq);
+ if (Seq.empty())
+ return;
+
+ OS << "\nCritical sequence based on the simulation:\n\n";
+
+ const DependencyEdge &FirstEdge = *Seq[0];
+ unsigned FromIID = FirstEdge.FromIID % Source.size();
+ unsigned ToIID = FirstEdge.ToIID % Source.size();
+ bool IsLoopCarried = FromIID >= ToIID;
+
+ formatted_raw_ostream FOS(OS);
+ FOS.PadToColumn(14);
+ FOS << "Instruction";
+ FOS.PadToColumn(58);
+ FOS << "Dependency Information";
+
+ bool HasColors = FOS.has_colors();
+
+ unsigned CurrentIID = 0;
+ if (IsLoopCarried) {
+ FOS << "\n +----< " << FromIID << ".";
+ printInstruction(FOS, STI, MCIP, Source[FromIID], HasColors);
+ FOS << "\n |\n | < loop carried > \n |";
+ } else {
+ while (CurrentIID < FromIID) {
+ FOS << "\n " << CurrentIID << ".";
+ printInstruction(FOS, STI, MCIP, Source[CurrentIID]);
+ CurrentIID++;
+ }
+
+ FOS << "\n +----< " << CurrentIID << ".";
+ printInstruction(FOS, STI, MCIP, Source[CurrentIID], HasColors);
+ CurrentIID++;
+ }
+
+ for (const DependencyEdge *&DE : Seq) {
+ ToIID = DE->ToIID % Source.size();
+ unsigned LastIID = CurrentIID > ToIID ? Source.size() : ToIID;
+
+ while (CurrentIID < LastIID) {
+ FOS << "\n | " << CurrentIID << ".";
+ printInstruction(FOS, STI, MCIP, Source[CurrentIID]);
+ CurrentIID++;
+ }
+
+ if (CurrentIID == ToIID) {
+ FOS << "\n +----> " << ToIID << ".";
+ printInstruction(FOS, STI, MCIP, Source[CurrentIID], HasColors);
+ } else {
+ FOS << "\n |\n | < loop carried > \n |"
+ << "\n +----> " << ToIID << ".";
+ printInstruction(FOS, STI, MCIP, Source[ToIID], HasColors);
+ }
+ FOS.PadToColumn(58);
+
+ const DependencyEdge::Dependency &Dep = DE->Dep;
+ if (HasColors)
+ FOS.changeColor(raw_ostream::SAVEDCOLOR, true, false);
+
+ if (Dep.Type == DependencyEdge::DT_REGISTER) {
+ FOS << "## REGISTER dependency: ";
+ if (HasColors)
+ FOS.changeColor(raw_ostream::MAGENTA, true, false);
+ MCIP.printRegName(FOS, Dep.ResourceOrRegID);
+ } else if (Dep.Type == DependencyEdge::DT_MEMORY) {
+ FOS << "## MEMORY dependency.";
+ } else {
+ assert(Dep.Type == DependencyEdge::DT_RESOURCE &&
+ "Unsupported dependency type!");
+ FOS << "## RESOURCE interference: ";
+ if (HasColors)
+ FOS.changeColor(raw_ostream::MAGENTA, true, false);
+ FOS << Tracker.resolveResourceName(Dep.ResourceOrRegID);
+ if (HasColors) {
+ FOS.resetColor();
+ FOS.changeColor(raw_ostream::SAVEDCOLOR, true, false);
+ }
+ FOS << " [ probability: " << ((DE->Frequency * 100) / Iterations)
+ << "% ]";
+ }
+ if (HasColors)
+ FOS.resetColor();
+ ++CurrentIID;
+ }
+
+ while (CurrentIID < Source.size()) {
+ FOS << "\n " << CurrentIID << ".";
+ printInstruction(FOS, STI, MCIP, Source[CurrentIID]);
+ CurrentIID++;
+ }
+
+ FOS << '\n';
+ FOS.flush();
+}
+
+#ifndef NDEBUG
+void DependencyGraph::dump(raw_ostream &OS, MCInstPrinter &MCIP) const {
+ OS << "\nREG DEPS\n";
+ for (const DGNode &Node : Nodes)
+ for (const DependencyEdge &DE : Node.OutgoingEdges)
+ if (DE.Dep.Type == DependencyEdge::DT_REGISTER)
+ dumpDependencyEdge(OS, DE, MCIP);
+
+ OS << "\nMEM DEPS\n";
+ for (const DGNode &Node : Nodes)
+ for (const DependencyEdge &DE : Node.OutgoingEdges)
+ if (DE.Dep.Type == DependencyEdge::DT_MEMORY)
+ dumpDependencyEdge(OS, DE, MCIP);
+
+ OS << "\nRESOURCE DEPS\n";
+ for (const DGNode &Node : Nodes)
+ for (const DependencyEdge &DE : Node.OutgoingEdges)
+ if (DE.Dep.Type == DependencyEdge::DT_RESOURCE)
+ dumpDependencyEdge(OS, DE, MCIP);
+}
+#endif // NDEBUG
+
+void DependencyGraph::addDependency(unsigned From, unsigned To,
+ DependencyEdge::Dependency &&Dep) {
+ DGNode &NodeFrom = Nodes[From];
+ DGNode &NodeTo = Nodes[To];
+ SmallVectorImpl<DependencyEdge> &Vec = NodeFrom.OutgoingEdges;
+
+ auto It = find_if(Vec, [To, Dep](DependencyEdge &DE) {
+ return DE.ToIID == To && DE.Dep.ResourceOrRegID == Dep.ResourceOrRegID;
+ });
+
+ if (It != Vec.end()) {
+ It->Dep.Cost += Dep.Cost;
+ It->Frequency++;
+ return;
+ }
+
+ DependencyEdge DE = {Dep, From, To, 1};
+ Vec.emplace_back(DE);
+ NodeTo.NumPredecessors++;
+}
+
+BottleneckAnalysis::BottleneckAnalysis(const MCSubtargetInfo &sti,
+ MCInstPrinter &Printer,
+ ArrayRef<MCInst> S, unsigned NumIter)
+ : STI(sti), MCIP(Printer), Tracker(STI.getSchedModel()), DG(S.size() * 3),
+ Source(S), Iterations(NumIter), TotalCycles(0),
+ PressureIncreasedBecauseOfResources(false),
+ PressureIncreasedBecauseOfRegisterDependencies(false),
+ PressureIncreasedBecauseOfMemoryDependencies(false),
+ SeenStallCycles(false), BPI() {}
+
+void BottleneckAnalysis::addRegisterDep(unsigned From, unsigned To,
+ unsigned RegID, unsigned Cost) {
+ bool IsLoopCarried = From >= To;
+ unsigned SourceSize = Source.size();
+ if (IsLoopCarried) {
+ Cost *= Iterations / 2;
+ DG.addRegisterDep(From, To + SourceSize, RegID, Cost);
+ DG.addRegisterDep(From + SourceSize, To + (SourceSize * 2), RegID, Cost);
+ return;
+ }
+ DG.addRegisterDep(From + SourceSize, To + SourceSize, RegID, Cost);
+}
+
+void BottleneckAnalysis::addMemoryDep(unsigned From, unsigned To,
+ unsigned Cost) {
+ bool IsLoopCarried = From >= To;
+ unsigned SourceSize = Source.size();
+ if (IsLoopCarried) {
+ Cost *= Iterations / 2;
+ DG.addMemoryDep(From, To + SourceSize, Cost);
+ DG.addMemoryDep(From + SourceSize, To + (SourceSize * 2), Cost);
+ return;
+ }
+ DG.addMemoryDep(From + SourceSize, To + SourceSize, Cost);
+}
+
+void BottleneckAnalysis::addResourceDep(unsigned From, unsigned To,
+ uint64_t Mask, unsigned Cost) {
+ bool IsLoopCarried = From >= To;
+ unsigned SourceSize = Source.size();
+ if (IsLoopCarried) {
+ Cost *= Iterations / 2;
+ DG.addResourceDep(From, To + SourceSize, Mask, Cost);
+ DG.addResourceDep(From + SourceSize, To + (SourceSize * 2), Mask, Cost);
+ return;
+ }
+ DG.addResourceDep(From + SourceSize, To + SourceSize, Mask, Cost);
+}
+
+void BottleneckAnalysis::onEvent(const HWInstructionEvent &Event) {
+ const unsigned IID = Event.IR.getSourceIndex();
+ if (Event.Type == HWInstructionEvent::Dispatched) {
+ Tracker.onInstructionDispatched(IID);
+ return;
+ }
+ if (Event.Type == HWInstructionEvent::Executed) {
+ Tracker.onInstructionExecuted(IID);
+ return;
+ }
+
+ if (Event.Type != HWInstructionEvent::Issued)
+ return;
+
+ const Instruction &IS = *Event.IR.getInstruction();
+ unsigned To = IID % Source.size();
+
+ unsigned Cycles = 2 * Tracker.getResourcePressureCycles(IID);
+ uint64_t ResourceMask = IS.getCriticalResourceMask();
+ SmallVector<std::pair<unsigned, unsigned>, 4> Users;
+ while (ResourceMask) {
+ uint64_t Current = ResourceMask & (-ResourceMask);
+ Tracker.getResourceUsers(Current, Users);
+ for (const std::pair<unsigned, unsigned> &U : Users)
+ addResourceDep(U.first % Source.size(), To, Current, U.second + Cycles);
+ Users.clear();
+ ResourceMask ^= Current;
+ }
+
+ const CriticalDependency &RegDep = IS.getCriticalRegDep();
+ if (RegDep.Cycles) {
+ Cycles = RegDep.Cycles + 2 * Tracker.getRegisterPressureCycles(IID);
+ unsigned From = RegDep.IID % Source.size();
+ addRegisterDep(From, To, RegDep.RegID, Cycles);
+ }
+
+ const CriticalDependency &MemDep = IS.getCriticalMemDep();
+ if (MemDep.Cycles) {
+ Cycles = MemDep.Cycles + 2 * Tracker.getMemoryPressureCycles(IID);
+ unsigned From = MemDep.IID % Source.size();
+ addMemoryDep(From, To, Cycles);
+ }
+
+ Tracker.handleInstructionIssuedEvent(
+ static_cast<const HWInstructionIssuedEvent &>(Event));
+
+ // Check if this is the last simulated instruction.
+ if (IID == ((Iterations * Source.size()) - 1))
+ DG.finalizeGraph();
+}
+
+void BottleneckAnalysis::onEvent(const HWPressureEvent &Event) {
+ assert(Event.Reason != HWPressureEvent::INVALID &&
+ "Unexpected invalid event!");
+
+ Tracker.handlePressureEvent(Event);
+
+ switch (Event.Reason) {
+ default:
+ break;
+
+ case HWPressureEvent::RESOURCES:
+ PressureIncreasedBecauseOfResources = true;
+ break;
+ case HWPressureEvent::REGISTER_DEPS:
+ PressureIncreasedBecauseOfRegisterDependencies = true;
+ break;
+ case HWPressureEvent::MEMORY_DEPS:
+ PressureIncreasedBecauseOfMemoryDependencies = true;
+ break;
+ }
+}
+
+void BottleneckAnalysis::onCycleEnd() {
+ ++TotalCycles;
+
+ bool PressureIncreasedBecauseOfDataDependencies =
+ PressureIncreasedBecauseOfRegisterDependencies ||
+ PressureIncreasedBecauseOfMemoryDependencies;
+ if (!PressureIncreasedBecauseOfResources &&
+ !PressureIncreasedBecauseOfDataDependencies)
+ return;
+
+ ++BPI.PressureIncreaseCycles;
+ if (PressureIncreasedBecauseOfRegisterDependencies)
+ ++BPI.RegisterDependencyCycles;
+ if (PressureIncreasedBecauseOfMemoryDependencies)
+ ++BPI.MemoryDependencyCycles;
+ if (PressureIncreasedBecauseOfDataDependencies)
+ ++BPI.DataDependencyCycles;
+ if (PressureIncreasedBecauseOfResources)
+ ++BPI.ResourcePressureCycles;
+ PressureIncreasedBecauseOfResources = false;
+ PressureIncreasedBecauseOfRegisterDependencies = false;
+ PressureIncreasedBecauseOfMemoryDependencies = false;
+}
+
+void BottleneckAnalysis::printBottleneckHints(raw_ostream &OS) const {
+ if (!SeenStallCycles || !BPI.PressureIncreaseCycles) {
+ OS << "\n\nNo resource or data dependency bottlenecks discovered.\n";
+ return;
+ }
+
+ double PressurePerCycle =
+ (double)BPI.PressureIncreaseCycles * 100 / TotalCycles;
+ double ResourcePressurePerCycle =
+ (double)BPI.ResourcePressureCycles * 100 / TotalCycles;
+ double DDPerCycle = (double)BPI.DataDependencyCycles * 100 / TotalCycles;
+ double RegDepPressurePerCycle =
+ (double)BPI.RegisterDependencyCycles * 100 / TotalCycles;
+ double MemDepPressurePerCycle =
+ (double)BPI.MemoryDependencyCycles * 100 / TotalCycles;
+
+ OS << "\n\nCycles with backend pressure increase [ "
+ << format("%.2f", floor((PressurePerCycle * 100) + 0.5) / 100) << "% ]";
+
+ OS << "\nThroughput Bottlenecks: "
+ << "\n Resource Pressure [ "
+ << format("%.2f", floor((ResourcePressurePerCycle * 100) + 0.5) / 100)
+ << "% ]";
+
+ if (BPI.PressureIncreaseCycles) {
+ ArrayRef<unsigned> Distribution = Tracker.getResourcePressureDistribution();
+ const MCSchedModel &SM = STI.getSchedModel();
+ for (unsigned I = 0, E = Distribution.size(); I < E; ++I) {
+ unsigned ResourceCycles = Distribution[I];
+ if (ResourceCycles) {
+ double Frequency = (double)ResourceCycles * 100 / TotalCycles;
+ const MCProcResourceDesc &PRDesc = *SM.getProcResource(I);
+ OS << "\n - " << PRDesc.Name << " [ "
+ << format("%.2f", floor((Frequency * 100) + 0.5) / 100) << "% ]";
+ }
+ }
+ }
+
+ OS << "\n Data Dependencies: [ "
+ << format("%.2f", floor((DDPerCycle * 100) + 0.5) / 100) << "% ]";
+ OS << "\n - Register Dependencies [ "
+ << format("%.2f", floor((RegDepPressurePerCycle * 100) + 0.5) / 100)
+ << "% ]";
+ OS << "\n - Memory Dependencies [ "
+ << format("%.2f", floor((MemDepPressurePerCycle * 100) + 0.5) / 100)
+ << "% ]\n";
+}
+
+void BottleneckAnalysis::printView(raw_ostream &OS) const {
+ std::string Buffer;
+ raw_string_ostream TempStream(Buffer);
+ printBottleneckHints(TempStream);
+ TempStream.flush();
+ OS << Buffer;
+ printCriticalSequence(OS);
+}
+
+} // namespace mca.
+} // namespace llvm
diff --git a/tools/llvm-mca/Views/BottleneckAnalysis.h b/tools/llvm-mca/Views/BottleneckAnalysis.h
new file mode 100644
index 000000000000..7564b1a48206
--- /dev/null
+++ b/tools/llvm-mca/Views/BottleneckAnalysis.h
@@ -0,0 +1,341 @@
+//===--------------------- BottleneckAnalysis.h -----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements the bottleneck analysis view.
+///
+/// This view internally observes backend pressure increase events in order to
+/// identify problematic data dependencies and processor resource interferences.
+///
+/// Example of bottleneck analysis report for a dot-product on X86 btver2:
+///
+/// Cycles with backend pressure increase [ 40.76% ]
+/// Throughput Bottlenecks:
+/// Resource Pressure [ 39.34% ]
+/// - JFPA [ 39.34% ]
+/// - JFPU0 [ 39.34% ]
+/// Data Dependencies: [ 1.42% ]
+/// - Register Dependencies [ 1.42% ]
+/// - Memory Dependencies [ 0.00% ]
+///
+/// According to the example, backend pressure increased during the 40.76% of
+/// the simulated cycles. In particular, the major cause of backend pressure
+/// increases was the contention on floating point adder JFPA accessible from
+/// pipeline resource JFPU0.
+///
+/// At the end of each cycle, if pressure on the simulated out-of-order buffers
+/// has increased, a backend pressure event is reported.
+/// In particular, this occurs when there is a delta between the number of uOps
+/// dispatched and the number of uOps issued to the underlying pipelines.
+///
+/// The bottleneck analysis view is also responsible for identifying and printing
+/// the most "critical" sequence of dependent instructions according to the
+/// simulated run.
+///
+/// Below is the critical sequence computed for the dot-product example on
+/// btver2:
+///
+/// Instruction Dependency Information
+/// +----< 2. vhaddps %xmm3, %xmm3, %xmm4
+/// |
+/// | < loop carried >
+/// |
+/// | 0. vmulps %xmm0, %xmm0, %xmm2
+/// +----> 1. vhaddps %xmm2, %xmm2, %xmm3 ## RESOURCE interference: JFPA [ probability: 73% ]
+/// +----> 2. vhaddps %xmm3, %xmm3, %xmm4 ## REGISTER dependency: %xmm3
+/// |
+/// | < loop carried >
+/// |
+/// +----> 1. vhaddps %xmm2, %xmm2, %xmm3 ## RESOURCE interference: JFPA [ probability: 73% ]
+///
+///
+/// The algorithm that computes the critical sequence is very similar to a
+/// critical path analysis.
+///
+/// A dependency graph is used internally to track dependencies between nodes.
+/// Nodes of the graph represent instructions from the input assembly sequence,
+/// and edges of the graph represent data dependencies or processor resource
+/// interferences.
+///
+/// Edges are dynamically 'discovered' by observing instruction state transitions
+/// and backend pressure increase events. Edges are internally ranked based on
+/// their "criticality". A dependency is considered to be critical if it takes a
+/// long time to execute, and if it contributes to backend pressure increases.
+/// Criticality is internally measured in terms of cycles; it is computed for
+/// every edge in the graph as a function of the edge latency and the number of
+/// backend pressure increase cycles contributed by that edge.
+///
+/// At the end of simulation, costs are propagated to nodes through the edges of
+/// the graph, and the most expensive path connecting the root-set (a
+/// set of nodes with no predecessors) to a leaf node is reported as critical
+/// sequence.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_MCA_BOTTLENECK_ANALYSIS_H
+#define LLVM_TOOLS_LLVM_MCA_BOTTLENECK_ANALYSIS_H
+
+#include "Views/View.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCSchedule.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+namespace mca {
+
+class PressureTracker {
+ const MCSchedModel &SM;
+
+ // Resource pressure distribution. There is an element for every processor
+ // resource declared by the scheduling model. Quantities are number of cycles.
+ SmallVector<unsigned, 4> ResourcePressureDistribution;
+
+ // Each processor resource is associated with a so-called processor resource
+ // mask. This vector allows to correlate processor resource IDs with processor
+ // resource masks. There is exactly one element per each processor resource
+ // declared by the scheduling model.
+ SmallVector<uint64_t, 4> ProcResID2Mask;
+
+ // Maps processor resource state indices (returned by calls to
+ // `getResourceStateIndex(Mask)` to processor resource identifiers.
+ SmallVector<unsigned, 4> ResIdx2ProcResID;
+
+ // Maps Processor Resource identifiers to ResourceUsers indices.
+ SmallVector<unsigned, 4> ProcResID2ResourceUsersIndex;
+
+ // Identifies the last user of a processor resource unit.
+ // This vector is updated on every instruction issued event.
+ // There is one entry for every processor resource unit declared by the
+ // processor model. An all_ones value is treated like an invalid instruction
+ // identifier.
+ using User = std::pair<unsigned, unsigned>;
+ SmallVector<User, 4> ResourceUsers;
+
+ struct InstructionPressureInfo {
+ unsigned RegisterPressureCycles;
+ unsigned MemoryPressureCycles;
+ unsigned ResourcePressureCycles;
+ };
+ DenseMap<unsigned, InstructionPressureInfo> IPI;
+
+ void updateResourcePressureDistribution(uint64_t CumulativeMask);
+
+ User getResourceUser(unsigned ProcResID, unsigned UnitID) const {
+ unsigned Index = ProcResID2ResourceUsersIndex[ProcResID];
+ return ResourceUsers[Index + UnitID];
+ }
+
+public:
+ PressureTracker(const MCSchedModel &Model);
+
+ ArrayRef<unsigned> getResourcePressureDistribution() const {
+ return ResourcePressureDistribution;
+ }
+
+ void getResourceUsers(uint64_t ResourceMask,
+ SmallVectorImpl<User> &Users) const;
+
+ unsigned getRegisterPressureCycles(unsigned IID) const {
+ assert(IPI.find(IID) != IPI.end() && "Instruction is not tracked!");
+ const InstructionPressureInfo &Info = IPI.find(IID)->second;
+ return Info.RegisterPressureCycles;
+ }
+
+ unsigned getMemoryPressureCycles(unsigned IID) const {
+ assert(IPI.find(IID) != IPI.end() && "Instruction is not tracked!");
+ const InstructionPressureInfo &Info = IPI.find(IID)->second;
+ return Info.MemoryPressureCycles;
+ }
+
+ unsigned getResourcePressureCycles(unsigned IID) const {
+ assert(IPI.find(IID) != IPI.end() && "Instruction is not tracked!");
+ const InstructionPressureInfo &Info = IPI.find(IID)->second;
+ return Info.ResourcePressureCycles;
+ }
+
+ const char *resolveResourceName(uint64_t ResourceMask) const {
+ unsigned Index = getResourceStateIndex(ResourceMask);
+ unsigned ProcResID = ResIdx2ProcResID[Index];
+ const MCProcResourceDesc &PRDesc = *SM.getProcResource(ProcResID);
+ return PRDesc.Name;
+ }
+
+ void onInstructionDispatched(unsigned IID);
+ void onInstructionExecuted(unsigned IID);
+
+ void handlePressureEvent(const HWPressureEvent &Event);
+ void handleInstructionIssuedEvent(const HWInstructionIssuedEvent &Event);
+};
+
+// A dependency edge.
+struct DependencyEdge {
+ enum DependencyType { DT_INVALID, DT_REGISTER, DT_MEMORY, DT_RESOURCE };
+
+ // Dependency edge descriptor.
+ //
+ // It specifies the dependency type, as well as the edge cost in cycles.
+ struct Dependency {
+ DependencyType Type;
+ uint64_t ResourceOrRegID;
+ uint64_t Cost;
+ };
+ Dependency Dep;
+
+ unsigned FromIID;
+ unsigned ToIID;
+
+ // Used by the bottleneck analysis to compute the interference
+ // probability for processor resources.
+ unsigned Frequency;
+};
+
+// A dependency graph used by the bottleneck analysis to describe data
+// dependencies and processor resource interferences between instructions.
+//
+// There is a node (an instance of struct DGNode) for every instruction in the
+// input assembly sequence. Edges of the graph represent dependencies between
+// instructions.
+//
+// Each edge of the graph is associated with a cost value which is used
+// internally to rank dependency based on their impact on the runtime
+// performance (see field DependencyEdge::Dependency::Cost). In general, the
+// higher the cost of an edge, the higher the impact on performance.
+//
+// The cost of a dependency is a function of both the latency and the number of
+// cycles where the dependency has been seen as critical (i.e. contributing to
+// back-pressure increases).
+//
+// Loop carried dependencies are carefully expanded by the bottleneck analysis
+// to guarantee that the graph stays acyclic. To this end, extra nodes are
+// pre-allocated at construction time to describe instructions from "past and
+// future" iterations. The graph is kept acyclic mainly because it simplifies the
+// complexity of the algorithm that computes the critical sequence.
+class DependencyGraph {
+ struct DGNode {
+ unsigned NumPredecessors;
+ unsigned NumVisitedPredecessors;
+ uint64_t Cost;
+ unsigned Depth;
+
+ DependencyEdge CriticalPredecessor;
+ SmallVector<DependencyEdge, 8> OutgoingEdges;
+ };
+ SmallVector<DGNode, 16> Nodes;
+
+ DependencyGraph(const DependencyGraph &) = delete;
+ DependencyGraph &operator=(const DependencyGraph &) = delete;
+
+ void addDependency(unsigned From, unsigned To,
+ DependencyEdge::Dependency &&DE);
+
+ void initializeRootSet(SmallVectorImpl<unsigned> &RootSet) const;
+ void propagateThroughEdges(SmallVectorImpl<unsigned> &RootSet);
+
+#ifndef NDEBUG
+ void dumpDependencyEdge(raw_ostream &OS, const DependencyEdge &DE,
+ MCInstPrinter &MCIP) const;
+#endif
+
+public:
+ DependencyGraph(unsigned Size) : Nodes(Size) {}
+
+ void addRegisterDep(unsigned From, unsigned To, unsigned RegID,
+ unsigned Cost) {
+ addDependency(From, To, {DependencyEdge::DT_REGISTER, RegID, Cost});
+ }
+
+ void addMemoryDep(unsigned From, unsigned To, unsigned Cost) {
+ addDependency(From, To, {DependencyEdge::DT_MEMORY, /* unused */ 0, Cost});
+ }
+
+ void addResourceDep(unsigned From, unsigned To, uint64_t Mask,
+ unsigned Cost) {
+ addDependency(From, To, {DependencyEdge::DT_RESOURCE, Mask, Cost});
+ }
+
+ // Called by the bottleneck analysis at the end of simulation to propagate
+ // costs through the edges of the graph, and compute a critical path.
+ void finalizeGraph() {
+ SmallVector<unsigned, 16> RootSet;
+ initializeRootSet(RootSet);
+ propagateThroughEdges(RootSet);
+ }
+
+ // Returns a sequence of edges representing the critical sequence based on the
+ // simulated run. It assumes that the graph has already been finalized (i.e.
+ // method `finalizeGraph()` has already been called on this graph).
+ void getCriticalSequence(SmallVectorImpl<const DependencyEdge *> &Seq) const;
+
+#ifndef NDEBUG
+ void dump(raw_ostream &OS, MCInstPrinter &MCIP) const;
+#endif
+};
+
+/// A view that collects and prints a few performance numbers.
+class BottleneckAnalysis : public View {
+ const MCSubtargetInfo &STI;
+ MCInstPrinter &MCIP;
+ PressureTracker Tracker;
+ DependencyGraph DG;
+
+ ArrayRef<MCInst> Source;
+ unsigned Iterations;
+ unsigned TotalCycles;
+
+ bool PressureIncreasedBecauseOfResources;
+ bool PressureIncreasedBecauseOfRegisterDependencies;
+ bool PressureIncreasedBecauseOfMemoryDependencies;
+ // True if throughput was affected by dispatch stalls.
+ bool SeenStallCycles;
+
+ struct BackPressureInfo {
+ // Cycles where backpressure increased.
+ unsigned PressureIncreaseCycles;
+ // Cycles where backpressure increased because of pipeline pressure.
+ unsigned ResourcePressureCycles;
+ // Cycles where backpressure increased because of data dependencies.
+ unsigned DataDependencyCycles;
+ // Cycles where backpressure increased because of register dependencies.
+ unsigned RegisterDependencyCycles;
+ // Cycles where backpressure increased because of memory dependencies.
+ unsigned MemoryDependencyCycles;
+ };
+ BackPressureInfo BPI;
+
+ // Used to populate the dependency graph DG.
+ void addRegisterDep(unsigned From, unsigned To, unsigned RegID, unsigned Cy);
+ void addMemoryDep(unsigned From, unsigned To, unsigned Cy);
+ void addResourceDep(unsigned From, unsigned To, uint64_t Mask, unsigned Cy);
+
+ // Prints a bottleneck message to OS.
+ void printBottleneckHints(raw_ostream &OS) const;
+ void printCriticalSequence(raw_ostream &OS) const;
+
+public:
+ BottleneckAnalysis(const MCSubtargetInfo &STI, MCInstPrinter &MCIP,
+ ArrayRef<MCInst> Sequence, unsigned Iterations);
+
+ void onCycleEnd() override;
+ void onEvent(const HWStallEvent &Event) override { SeenStallCycles = true; }
+ void onEvent(const HWPressureEvent &Event) override;
+ void onEvent(const HWInstructionEvent &Event) override;
+
+ void printView(raw_ostream &OS) const override;
+
+#ifndef NDEBUG
+ void dump(raw_ostream &OS, MCInstPrinter &MCIP) const { DG.dump(OS, MCIP); }
+#endif
+};
+
+} // namespace mca
+} // namespace llvm
+
+#endif
diff --git a/tools/llvm-mca/Views/DispatchStatistics.cpp b/tools/llvm-mca/Views/DispatchStatistics.cpp
index 2562c82407bf..557b8ba17b17 100644
--- a/tools/llvm-mca/Views/DispatchStatistics.cpp
+++ b/tools/llvm-mca/Views/DispatchStatistics.cpp
@@ -1,10 +1,9 @@
//===--------------------- DispatchStatistics.cpp ---------------------*- C++
//-*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/tools/llvm-mca/Views/DispatchStatistics.h b/tools/llvm-mca/Views/DispatchStatistics.h
index 6679c81efe95..07c0f5a4c68f 100644
--- a/tools/llvm-mca/Views/DispatchStatistics.h
+++ b/tools/llvm-mca/Views/DispatchStatistics.h
@@ -1,9 +1,8 @@
//===--------------------- DispatchStatistics.h -----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/tools/llvm-mca/Views/InstructionInfoView.cpp b/tools/llvm-mca/Views/InstructionInfoView.cpp
index 5016afb49e44..1fbffa3e5b69 100644
--- a/tools/llvm-mca/Views/InstructionInfoView.cpp
+++ b/tools/llvm-mca/Views/InstructionInfoView.cpp
@@ -1,9 +1,8 @@
//===--------------------- InstructionInfoView.cpp --------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -44,6 +43,9 @@ void InstructionInfoView::printView(raw_ostream &OS) const {
const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID);
unsigned NumMicroOpcodes = SCDesc.NumMicroOps;
unsigned Latency = MCSchedModel::computeInstrLatency(STI, SCDesc);
+ // Add extra latency due to delays in the forwarding data paths.
+ Latency += MCSchedModel::getForwardingDelayCycles(
+ STI.getReadAdvanceEntries(SCDesc));
Optional<double> RThroughput =
MCSchedModel::getReciprocalThroughput(STI, SCDesc);
diff --git a/tools/llvm-mca/Views/InstructionInfoView.h b/tools/llvm-mca/Views/InstructionInfoView.h
index 3ef95d474490..640d87383436 100644
--- a/tools/llvm-mca/Views/InstructionInfoView.h
+++ b/tools/llvm-mca/Views/InstructionInfoView.h
@@ -1,9 +1,8 @@
//===--------------------- InstructionInfoView.h ----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/tools/llvm-mca/Views/RegisterFileStatistics.cpp b/tools/llvm-mca/Views/RegisterFileStatistics.cpp
index 06202bc41421..58736ee0d18c 100644
--- a/tools/llvm-mca/Views/RegisterFileStatistics.cpp
+++ b/tools/llvm-mca/Views/RegisterFileStatistics.cpp
@@ -1,9 +1,8 @@
//===--------------------- RegisterFileStatistics.cpp -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/tools/llvm-mca/Views/RegisterFileStatistics.h b/tools/llvm-mca/Views/RegisterFileStatistics.h
index a2c52a668dae..a2273dd48b22 100644
--- a/tools/llvm-mca/Views/RegisterFileStatistics.h
+++ b/tools/llvm-mca/Views/RegisterFileStatistics.h
@@ -1,9 +1,8 @@
//===--------------------- RegisterFileStatistics.h -------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/tools/llvm-mca/Views/ResourcePressureView.cpp b/tools/llvm-mca/Views/ResourcePressureView.cpp
index 6df61840437d..38a2478cf4fe 100644
--- a/tools/llvm-mca/Views/ResourcePressureView.cpp
+++ b/tools/llvm-mca/Views/ResourcePressureView.cpp
@@ -1,9 +1,8 @@
//===--------------------- ResourcePressureView.cpp -------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/tools/llvm-mca/Views/ResourcePressureView.h b/tools/llvm-mca/Views/ResourcePressureView.h
index 572ce6fe6b70..0fa0b9a36aa3 100644
--- a/tools/llvm-mca/Views/ResourcePressureView.h
+++ b/tools/llvm-mca/Views/ResourcePressureView.h
@@ -1,9 +1,8 @@
//===--------------------- ResourcePressureView.h ---------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/tools/llvm-mca/Views/RetireControlUnitStatistics.cpp b/tools/llvm-mca/Views/RetireControlUnitStatistics.cpp
index 54eb28f1add9..cb4fbae78039 100644
--- a/tools/llvm-mca/Views/RetireControlUnitStatistics.cpp
+++ b/tools/llvm-mca/Views/RetireControlUnitStatistics.cpp
@@ -1,9 +1,8 @@
//===--------------------- RetireControlUnitStatistics.cpp ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/tools/llvm-mca/Views/RetireControlUnitStatistics.h b/tools/llvm-mca/Views/RetireControlUnitStatistics.h
index 02aa13bc444a..1a4d3dec5c56 100644
--- a/tools/llvm-mca/Views/RetireControlUnitStatistics.h
+++ b/tools/llvm-mca/Views/RetireControlUnitStatistics.h
@@ -1,9 +1,8 @@
//===--------------------- RetireControlUnitStatistics.h --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/tools/llvm-mca/Views/SchedulerStatistics.cpp b/tools/llvm-mca/Views/SchedulerStatistics.cpp
index 670f90127f18..bd0ba350ab68 100644
--- a/tools/llvm-mca/Views/SchedulerStatistics.cpp
+++ b/tools/llvm-mca/Views/SchedulerStatistics.cpp
@@ -1,9 +1,8 @@
//===--------------------- SchedulerStatistics.cpp --------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -23,7 +22,6 @@ SchedulerStatistics::SchedulerStatistics(const llvm::MCSubtargetInfo &STI)
: SM(STI.getSchedModel()), LQResourceID(0), SQResourceID(0), NumIssued(0),
NumCycles(0), MostRecentLoadDispatched(~0U),
MostRecentStoreDispatched(~0U),
- IssuedPerCycle(STI.getSchedModel().NumProcResourceKinds, 0),
Usage(STI.getSchedModel().NumProcResourceKinds, {0, 0, 0}) {
if (SM.hasExtraProcessorInfo()) {
const MCExtraProcessorInfo &EPI = SM.getExtraProcessorInfo();
@@ -44,9 +42,10 @@ SchedulerStatistics::SchedulerStatistics(const llvm::MCSubtargetInfo &STI)
// In future we should add a new "memory queue" event type, so that we stop
// making assumptions on how LSUnit internally works (See PR39828).
void SchedulerStatistics::onEvent(const HWInstructionEvent &Event) {
- if (Event.Type == HWInstructionEvent::Issued)
- ++NumIssued;
- else if (Event.Type == HWInstructionEvent::Dispatched) {
+ if (Event.Type == HWInstructionEvent::Issued) {
+ const Instruction &Inst = *Event.IR.getInstruction();
+ NumIssued += Inst.getDesc().NumMicroOps;
+ } else if (Event.Type == HWInstructionEvent::Dispatched) {
const Instruction &Inst = *Event.IR.getInstruction();
const unsigned Index = Event.IR.getSourceIndex();
if (LQResourceID && Inst.getDesc().MayLoad &&
@@ -96,29 +95,25 @@ void SchedulerStatistics::updateHistograms() {
BU.MaxUsedSlots = std::max(BU.MaxUsedSlots, BU.SlotsInUse);
}
- IssuedPerCycle[NumIssued]++;
+ IssueWidthPerCycle[NumIssued]++;
NumIssued = 0;
}
void SchedulerStatistics::printSchedulerStats(raw_ostream &OS) const {
OS << "\n\nSchedulers - "
- << "number of cycles where we saw N instructions issued:\n";
+ << "number of cycles where we saw N micro opcodes issued:\n";
OS << "[# issued], [# cycles]\n";
- const auto It =
- std::max_element(IssuedPerCycle.begin(), IssuedPerCycle.end());
- unsigned Index = std::distance(IssuedPerCycle.begin(), It);
-
bool HasColors = OS.has_colors();
- for (unsigned I = 0, E = IssuedPerCycle.size(); I < E; ++I) {
- unsigned IPC = IssuedPerCycle[I];
- if (!IPC)
- continue;
-
- if (I == Index && HasColors)
+ const auto It =
+ std::max_element(IssueWidthPerCycle.begin(), IssueWidthPerCycle.end());
+ for (const std::pair<unsigned, unsigned> &Entry : IssueWidthPerCycle) {
+ unsigned NumIssued = Entry.first;
+ if (NumIssued == It->first && HasColors)
OS.changeColor(raw_ostream::SAVEDCOLOR, true, false);
- OS << " " << I << ", " << IPC << " ("
+ unsigned IPC = Entry.second;
+ OS << " " << NumIssued << ", " << IPC << " ("
<< format("%.1f", ((double)IPC / NumCycles) * 100) << "%)\n";
if (HasColors)
OS.resetColor();
diff --git a/tools/llvm-mca/Views/SchedulerStatistics.h b/tools/llvm-mca/Views/SchedulerStatistics.h
index d99a395a726d..32711b4483b4 100644
--- a/tools/llvm-mca/Views/SchedulerStatistics.h
+++ b/tools/llvm-mca/Views/SchedulerStatistics.h
@@ -1,9 +1,8 @@
//===--------------------- SchedulerStatistics.h ----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -63,7 +62,9 @@ class SchedulerStatistics final : public View {
uint64_t CumulativeNumUsedSlots;
};
- std::vector<unsigned> IssuedPerCycle;
+ using Histogram = std::map<unsigned, unsigned>;
+ Histogram IssueWidthPerCycle;
+
std::vector<BufferUsage> Usage;
void updateHistograms();
diff --git a/tools/llvm-mca/Views/SummaryView.cpp b/tools/llvm-mca/Views/SummaryView.cpp
index d8ac709e784d..ef5550048f4c 100644
--- a/tools/llvm-mca/Views/SummaryView.cpp
+++ b/tools/llvm-mca/Views/SummaryView.cpp
@@ -1,9 +1,8 @@
//===--------------------- SummaryView.cpp -------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -25,11 +24,17 @@ namespace mca {
SummaryView::SummaryView(const MCSchedModel &Model, ArrayRef<MCInst> S,
unsigned Width)
- : SM(Model), Source(S), DispatchWidth(Width), LastInstructionIdx(0),
+ : SM(Model), Source(S), DispatchWidth(Width?Width: Model.IssueWidth),
+ LastInstructionIdx(0),
TotalCycles(0), NumMicroOps(0),
ProcResourceUsage(Model.getNumProcResourceKinds(), 0),
- ProcResourceMasks(Model.getNumProcResourceKinds()) {
+ ProcResourceMasks(Model.getNumProcResourceKinds()),
+ ResIdx2ProcResID(Model.getNumProcResourceKinds(), 0) {
computeProcResourceMasks(SM, ProcResourceMasks);
+ for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+ unsigned Index = getResourceStateIndex(ProcResourceMasks[I]);
+ ResIdx2ProcResID[Index] = I;
+ }
}
void SummaryView::onEvent(const HWInstructionEvent &Event) {
@@ -51,11 +56,8 @@ void SummaryView::onEvent(const HWInstructionEvent &Event) {
NumMicroOps += Desc.NumMicroOps;
for (const std::pair<uint64_t, const ResourceUsage> &RU : Desc.Resources) {
if (RU.second.size()) {
- const auto It = find(ProcResourceMasks, RU.first);
- assert(It != ProcResourceMasks.end() &&
- "Invalid processor resource mask!");
- ProcResourceUsage[std::distance(ProcResourceMasks.begin(), It)] +=
- RU.second.size();
+ unsigned ProcResID = ResIdx2ProcResID[getResourceStateIndex(RU.first)];
+ ProcResourceUsage[ProcResID] += RU.second.size();
}
}
}
@@ -87,5 +89,6 @@ void SummaryView::printView(raw_ostream &OS) const {
TempStream.flush();
OS << Buffer;
}
+
} // namespace mca.
} // namespace llvm
diff --git a/tools/llvm-mca/Views/SummaryView.h b/tools/llvm-mca/Views/SummaryView.h
index f59fd4233fbe..9be31b7d51bd 100644
--- a/tools/llvm-mca/Views/SummaryView.h
+++ b/tools/llvm-mca/Views/SummaryView.h
@@ -1,9 +1,8 @@
//===--------------------- SummaryView.h ---------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -46,6 +45,7 @@ class SummaryView : public View {
unsigned TotalCycles;
// The total number of micro opcodes contributed by a block of instructions.
unsigned NumMicroOps;
+
// For each processor resource, this vector stores the cumulative number of
// resource cycles consumed by the analyzed code block.
llvm::SmallVector<unsigned, 8> ProcResourceUsage;
@@ -56,6 +56,9 @@ class SummaryView : public View {
// declared by the scheduling model.
llvm::SmallVector<uint64_t, 8> ProcResourceMasks;
+ // Used to map resource indices to actual processor resource IDs.
+ llvm::SmallVector<unsigned, 8> ResIdx2ProcResID;
+
// Compute the reciprocal throughput for the analyzed code block.
// The reciprocal block throughput is computed as the MAX between:
// - NumMicroOps / DispatchWidth
@@ -68,9 +71,9 @@ public:
void onCycleEnd() override { ++TotalCycles; }
void onEvent(const HWInstructionEvent &Event) override;
-
void printView(llvm::raw_ostream &OS) const override;
};
+
} // namespace mca
} // namespace llvm
diff --git a/tools/llvm-mca/Views/TimelineView.cpp b/tools/llvm-mca/Views/TimelineView.cpp
index 7d55bbc99c73..fe3f16ba344c 100644
--- a/tools/llvm-mca/Views/TimelineView.cpp
+++ b/tools/llvm-mca/Views/TimelineView.cpp
@@ -1,9 +1,8 @@
//===--------------------- TimelineView.cpp ---------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \brief
diff --git a/tools/llvm-mca/Views/TimelineView.h b/tools/llvm-mca/Views/TimelineView.h
index ee981800161c..b63b234293cd 100644
--- a/tools/llvm-mca/Views/TimelineView.h
+++ b/tools/llvm-mca/Views/TimelineView.h
@@ -1,9 +1,8 @@
//===--------------------- TimelineView.h -----------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \brief
diff --git a/tools/llvm-mca/Views/View.cpp b/tools/llvm-mca/Views/View.cpp
index 6cfb9dd9f394..8e5c34d2d5c2 100644
--- a/tools/llvm-mca/Views/View.cpp
+++ b/tools/llvm-mca/Views/View.cpp
@@ -1,9 +1,8 @@
//===----------------------- View.cpp ---------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/tools/llvm-mca/Views/View.h b/tools/llvm-mca/Views/View.h
index 4b82b0da0d27..3b52511b4d29 100644
--- a/tools/llvm-mca/Views/View.h
+++ b/tools/llvm-mca/Views/View.h
@@ -1,9 +1,8 @@
//===----------------------- View.h -----------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/tools/llvm-mca/llvm-mca.cpp b/tools/llvm-mca/llvm-mca.cpp
index 68d63db599d7..b3590b5910ec 100644
--- a/tools/llvm-mca/llvm-mca.cpp
+++ b/tools/llvm-mca/llvm-mca.cpp
@@ -1,9 +1,8 @@
//===-- llvm-mca.cpp - Machine Code Analyzer -------------------*- C++ -* -===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -24,6 +23,7 @@
#include "CodeRegion.h"
#include "CodeRegionGenerator.h"
#include "PipelinePrinter.h"
+#include "Views/BottleneckAnalysis.h"
#include "Views/DispatchStatistics.h"
#include "Views/InstructionInfoView.h"
#include "Views/RegisterFileStatistics.h"
@@ -68,8 +68,9 @@ static cl::opt<std::string> OutputFilename("o", cl::desc("Output filename"),
cl::value_desc("filename"));
static cl::opt<std::string>
- ArchName("march", cl::desc("Target architecture. "
- "See -version for available targets"),
+ ArchName("march",
+ cl::desc("Target architecture. "
+ "See -version for available targets"),
cl::cat(ToolOptions));
static cl::opt<std::string>
@@ -101,6 +102,17 @@ static cl::opt<unsigned>
"be used for register mappings"),
cl::cat(ToolOptions), cl::init(0));
+static cl::opt<unsigned>
+ MicroOpQueue("micro-op-queue-size", cl::Hidden,
+ cl::desc("Number of entries in the micro-op queue"),
+ cl::cat(ToolOptions), cl::init(0));
+
+static cl::opt<unsigned>
+ DecoderThroughput("decoder-throughput", cl::Hidden,
+ cl::desc("Maximum throughput from the decoders "
+ "(instructions per cycle)"),
+ cl::cat(ToolOptions), cl::init(0));
+
static cl::opt<bool>
PrintRegisterFileStats("register-file-stats",
cl::desc("Print register file statistics"),
@@ -176,6 +188,11 @@ static cl::opt<bool>
cl::desc("Print all views including hardware statistics"),
cl::cat(ViewOptions), cl::init(false));
+static cl::opt<bool> EnableBottleneckAnalysis(
+ "bottleneck-analysis",
+ cl::desc("Enable bottleneck analysis (disabled by default)"),
+ cl::cat(ViewOptions), cl::init(false));
+
namespace {
const Target *getTarget(const char *ProgName) {
@@ -220,6 +237,7 @@ static void processViewOptions() {
if (EnableAllViews.getNumOccurrences()) {
processOptionImpl(PrintSummaryView, EnableAllViews);
+ processOptionImpl(EnableBottleneckAnalysis, EnableAllViews);
processOptionImpl(PrintResourcePressureView, EnableAllViews);
processOptionImpl(PrintTimelineView, EnableAllViews);
processOptionImpl(PrintInstructionInfoView, EnableAllViews);
@@ -348,6 +366,11 @@ int main(int argc, char **argv) {
return 1;
}
const mca::CodeRegions &Regions = *RegionsOrErr;
+
+ // Early exit if errors were found by the code region parsing logic.
+ if (!Regions.isValid())
+ return 1;
+
if (Regions.empty()) {
WithColor::error() << "no assembly instructions found.\n";
return 1;
@@ -377,18 +400,15 @@ int main(int argc, char **argv) {
const MCSchedModel &SM = STI->getSchedModel();
- unsigned Width = SM.IssueWidth;
- if (DispatchWidth)
- Width = DispatchWidth;
-
// Create an instruction builder.
mca::InstrBuilder IB(*STI, *MCII, *MRI, MCIA.get());
// Create a context to control ownership of the pipeline hardware.
mca::Context MCA(*MRI, *STI);
- mca::PipelineOptions PO(Width, RegisterFileSize, LoadQueueSize,
- StoreQueueSize, AssumeNoAlias);
+ mca::PipelineOptions PO(MicroOpQueue, DecoderThroughput, DispatchWidth,
+ RegisterFileSize, LoadQueueSize, StoreQueueSize,
+ AssumeNoAlias, EnableBottleneckAnalysis);
// Number each region in the sequence.
unsigned RegionIdx = 0;
@@ -423,8 +443,8 @@ int main(int argc, char **argv) {
WithColor::error() << IE.Message << '\n';
IP->printInst(&IE.Inst, SS, "", *STI);
SS.flush();
- WithColor::note() << "instruction: " << InstructionStr
- << '\n';
+ WithColor::note()
+ << "instruction: " << InstructionStr << '\n';
})) {
// Default case.
WithColor::error() << toString(std::move(NewE));
@@ -464,7 +484,13 @@ int main(int argc, char **argv) {
mca::PipelinePrinter Printer(*P);
if (PrintSummaryView)
- Printer.addView(llvm::make_unique<mca::SummaryView>(SM, Insts, Width));
+ Printer.addView(
+ llvm::make_unique<mca::SummaryView>(SM, Insts, DispatchWidth));
+
+ if (EnableBottleneckAnalysis) {
+ Printer.addView(llvm::make_unique<mca::BottleneckAnalysis>(
+ *STI, *IP, Insts, S.getNumIterations()));
+ }
if (PrintInstructionInfoView)
Printer.addView(
diff --git a/tools/llvm-modextract/llvm-modextract.cpp b/tools/llvm-modextract/llvm-modextract.cpp
index 9fd8340505aa..3adefc5f0d3e 100644
--- a/tools/llvm-modextract/llvm-modextract.cpp
+++ b/tools/llvm-modextract/llvm-modextract.cpp
@@ -1,9 +1,8 @@
//===-- llvm-modextract.cpp - LLVM module extractor utility ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/tools/llvm-nm/llvm-nm.cpp b/tools/llvm-nm/llvm-nm.cpp
index 042e284e8369..aa62e6f0209b 100644
--- a/tools/llvm-nm/llvm-nm.cpp
+++ b/tools/llvm-nm/llvm-nm.cpp
@@ -1,9 +1,8 @@
//===-- llvm-nm.cpp - Symbol table dumping utility for llvm ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -47,12 +46,15 @@ using namespace object;
namespace {
enum OutputFormatTy { bsd, sysv, posix, darwin };
+
+cl::OptionCategory NMCat("llvm-nm Options");
+
cl::opt<OutputFormatTy> OutputFormat(
"format", cl::desc("Specify output format"),
cl::values(clEnumVal(bsd, "BSD format"), clEnumVal(sysv, "System V format"),
clEnumVal(posix, "POSIX.2 format"),
clEnumVal(darwin, "Darwin -m format")),
- cl::init(bsd));
+ cl::init(bsd), cl::cat(NMCat));
cl::alias OutputFormat2("f", cl::desc("Alias for --format"),
cl::aliasopt(OutputFormat));
@@ -60,50 +62,53 @@ cl::list<std::string> InputFilenames(cl::Positional, cl::desc("<input files>"),
cl::ZeroOrMore);
cl::opt<bool> UndefinedOnly("undefined-only",
- cl::desc("Show only undefined symbols"));
+ cl::desc("Show only undefined symbols"),
+ cl::cat(NMCat));
cl::alias UndefinedOnly2("u", cl::desc("Alias for --undefined-only"),
cl::aliasopt(UndefinedOnly), cl::Grouping);
cl::opt<bool> DynamicSyms("dynamic",
cl::desc("Display the dynamic symbols instead "
- "of normal symbols."));
+ "of normal symbols."),
+ cl::cat(NMCat));
cl::alias DynamicSyms2("D", cl::desc("Alias for --dynamic"),
cl::aliasopt(DynamicSyms), cl::Grouping);
-cl::opt<bool> DefinedOnly("defined-only",
- cl::desc("Show only defined symbols"));
+cl::opt<bool> DefinedOnly("defined-only", cl::desc("Show only defined symbols"),
+ cl::cat(NMCat));
cl::alias DefinedOnly2("U", cl::desc("Alias for --defined-only"),
cl::aliasopt(DefinedOnly), cl::Grouping);
cl::opt<bool> ExternalOnly("extern-only",
cl::desc("Show only external symbols"),
- cl::ZeroOrMore);
+ cl::ZeroOrMore, cl::cat(NMCat));
cl::alias ExternalOnly2("g", cl::desc("Alias for --extern-only"),
cl::aliasopt(ExternalOnly), cl::Grouping,
cl::ZeroOrMore);
-cl::opt<bool> NoWeakSymbols("no-weak",
- cl::desc("Show only non-weak symbols"));
+cl::opt<bool> NoWeakSymbols("no-weak", cl::desc("Show only non-weak symbols"),
+ cl::cat(NMCat));
cl::alias NoWeakSymbols2("W", cl::desc("Alias for --no-weak"),
cl::aliasopt(NoWeakSymbols), cl::Grouping);
-cl::opt<bool> BSDFormat("B", cl::desc("Alias for --format=bsd"),
- cl::Grouping);
+cl::opt<bool> BSDFormat("B", cl::desc("Alias for --format=bsd"), cl::Grouping,
+ cl::cat(NMCat));
cl::opt<bool> POSIXFormat("P", cl::desc("Alias for --format=posix"),
- cl::Grouping);
+ cl::Grouping, cl::cat(NMCat));
cl::alias Portability("portability", cl::desc("Alias for --format=posix"),
cl::aliasopt(POSIXFormat), cl::NotHidden);
cl::opt<bool> DarwinFormat("m", cl::desc("Alias for --format=darwin"),
- cl::Grouping);
+ cl::Grouping, cl::cat(NMCat));
static cl::list<std::string>
ArchFlags("arch", cl::desc("architecture(s) from a Mach-O file to dump"),
- cl::ZeroOrMore);
+ cl::ZeroOrMore, cl::cat(NMCat));
bool ArchAll = false;
cl::opt<bool> PrintFileName(
"print-file-name",
- cl::desc("Precede each symbol with the object file it came from"));
+ cl::desc("Precede each symbol with the object file it came from"),
+ cl::cat(NMCat));
cl::alias PrintFileNameA("A", cl::desc("Alias for --print-file-name"),
cl::aliasopt(PrintFileName), cl::Grouping);
@@ -111,40 +116,52 @@ cl::alias PrintFileNameo("o", cl::desc("Alias for --print-file-name"),
cl::aliasopt(PrintFileName), cl::Grouping);
cl::opt<bool> DebugSyms("debug-syms",
- cl::desc("Show all symbols, even debugger only"));
+ cl::desc("Show all symbols, even debugger only"),
+ cl::cat(NMCat));
cl::alias DebugSymsa("a", cl::desc("Alias for --debug-syms"),
cl::aliasopt(DebugSyms), cl::Grouping);
-cl::opt<bool> NumericSort("numeric-sort", cl::desc("Sort symbols by address"));
+cl::opt<bool> NumericSort("numeric-sort", cl::desc("Sort symbols by address"),
+ cl::cat(NMCat));
cl::alias NumericSortn("n", cl::desc("Alias for --numeric-sort"),
cl::aliasopt(NumericSort), cl::Grouping);
cl::alias NumericSortv("v", cl::desc("Alias for --numeric-sort"),
cl::aliasopt(NumericSort), cl::Grouping);
-cl::opt<bool> NoSort("no-sort", cl::desc("Show symbols in order encountered"));
+cl::opt<bool> NoSort("no-sort", cl::desc("Show symbols in order encountered"),
+ cl::cat(NMCat));
cl::alias NoSortp("p", cl::desc("Alias for --no-sort"), cl::aliasopt(NoSort),
cl::Grouping);
-cl::opt<bool> Demangle("demangle", cl::desc("Demangle C++ symbol names"));
-cl::alias DemangleC("C", cl::desc("Alias for --demangle"), cl::aliasopt(Demangle),
- cl::Grouping);
+cl::opt<bool> Demangle("demangle", cl::ZeroOrMore,
+ cl::desc("Demangle C++ symbol names"), cl::cat(NMCat));
+cl::alias DemangleC("C", cl::desc("Alias for --demangle"),
+ cl::aliasopt(Demangle), cl::Grouping);
+cl::opt<bool> NoDemangle("no-demangle", cl::init(false), cl::ZeroOrMore,
+ cl::desc("Don't demangle symbol names"),
+ cl::cat(NMCat));
-cl::opt<bool> ReverseSort("reverse-sort", cl::desc("Sort in reverse order"));
+cl::opt<bool> ReverseSort("reverse-sort", cl::desc("Sort in reverse order"),
+ cl::cat(NMCat));
cl::alias ReverseSortr("r", cl::desc("Alias for --reverse-sort"),
cl::aliasopt(ReverseSort), cl::Grouping);
cl::opt<bool> PrintSize("print-size",
- cl::desc("Show symbol size instead of address"));
+ cl::desc("Show symbol size as well as address"),
+ cl::cat(NMCat));
cl::alias PrintSizeS("S", cl::desc("Alias for --print-size"),
cl::aliasopt(PrintSize), cl::Grouping);
bool MachOPrintSizeWarning = false;
-cl::opt<bool> SizeSort("size-sort", cl::desc("Sort symbols by size"));
+cl::opt<bool> SizeSort("size-sort", cl::desc("Sort symbols by size"),
+ cl::cat(NMCat));
cl::opt<bool> WithoutAliases("without-aliases", cl::Hidden,
- cl::desc("Exclude aliases from output"));
+ cl::desc("Exclude aliases from output"),
+ cl::cat(NMCat));
-cl::opt<bool> ArchiveMap("print-armap", cl::desc("Print the archive map"));
+cl::opt<bool> ArchiveMap("print-armap", cl::desc("Print the archive map"),
+ cl::cat(NMCat));
cl::alias ArchiveMaps("M", cl::desc("Alias for --print-armap"),
cl::aliasopt(ArchiveMap), cl::Grouping);
@@ -153,38 +170,45 @@ cl::opt<Radix>
AddressRadix("radix", cl::desc("Radix (o/d/x) for printing symbol Values"),
cl::values(clEnumVal(d, "decimal"), clEnumVal(o, "octal"),
clEnumVal(x, "hexadecimal")),
- cl::init(x));
+ cl::init(x), cl::cat(NMCat));
cl::alias RadixAlias("t", cl::desc("Alias for --radix"),
cl::aliasopt(AddressRadix));
cl::opt<bool> JustSymbolName("just-symbol-name",
- cl::desc("Print just the symbol's name"));
+ cl::desc("Print just the symbol's name"),
+ cl::cat(NMCat));
cl::alias JustSymbolNames("j", cl::desc("Alias for --just-symbol-name"),
cl::aliasopt(JustSymbolName), cl::Grouping);
-// FIXME: This option takes exactly two strings and should be allowed anywhere
-// on the command line. Such that "llvm-nm -s __TEXT __text foo.o" would work.
-// But that does not as the CommandLine Library does not have a way to make
-// this work. For now the "-s __TEXT __text" has to be last on the command
-// line.
-cl::list<std::string> SegSect("s", cl::Positional, cl::ZeroOrMore,
+cl::opt<bool> SpecialSyms("special-syms",
+ cl::desc("No-op. Used for GNU compatibility only"));
+
+cl::list<std::string> SegSect("s", cl::multi_val(2), cl::ZeroOrMore,
+ cl::value_desc("segment section"), cl::Hidden,
cl::desc("Dump only symbols from this segment "
- "and section name, Mach-O only"));
+ "and section name, Mach-O only"),
+ cl::cat(NMCat));
-cl::opt<bool> FormatMachOasHex("x", cl::desc("Print symbol entry in hex, "
- "Mach-O only"), cl::Grouping);
+cl::opt<bool> FormatMachOasHex("x",
+ cl::desc("Print symbol entry in hex, "
+ "Mach-O only"),
+ cl::Grouping, cl::cat(NMCat));
cl::opt<bool> AddDyldInfo("add-dyldinfo",
cl::desc("Add symbols from the dyldinfo not already "
- "in the symbol table, Mach-O only"));
+ "in the symbol table, Mach-O only"),
+ cl::cat(NMCat));
cl::opt<bool> NoDyldInfo("no-dyldinfo",
cl::desc("Don't add any symbols from the dyldinfo, "
- "Mach-O only"));
+ "Mach-O only"),
+ cl::cat(NMCat));
cl::opt<bool> DyldInfoOnly("dyldinfo-only",
cl::desc("Show only symbols from the dyldinfo, "
- "Mach-O only"));
+ "Mach-O only"),
+ cl::cat(NMCat));
cl::opt<bool> NoLLVMBitcode("no-llvm-bc",
- cl::desc("Disable LLVM bitcode reader"));
+ cl::desc("Disable LLVM bitcode reader"),
+ cl::cat(NMCat));
cl::extrahelp HelpResponse("\nPass @FILE as argument to read options from FILE.\n");
@@ -263,6 +287,8 @@ struct NMSymbol {
uint64_t Size;
char TypeChar;
StringRef Name;
+ StringRef SectionName;
+ StringRef TypeName;
BasicSymbolRef Sym;
// The Sym field above points to the native symbol in the object file,
// for Mach-O when we are creating symbols from the dyld info the above
@@ -316,8 +342,7 @@ static char isSymbolList64Bit(SymbolicFile &Obj) {
}
static StringRef CurrentFilename;
-typedef std::vector<NMSymbol> SymbolListT;
-static SymbolListT SymbolList;
+static std::vector<NMSymbol> SymbolList;
static char getSymbolNMTypeChar(IRObjectFile &Obj, basic_symbol_iterator I);
@@ -326,9 +351,10 @@ static char getSymbolNMTypeChar(IRObjectFile &Obj, basic_symbol_iterator I);
// the darwin format it produces the same output as darwin's nm(1) -m output
// and when printing Mach-O symbols in hex it produces the same output as
// darwin's nm(1) -x format.
-static void darwinPrintSymbol(SymbolicFile &Obj, SymbolListT::iterator I,
+static void darwinPrintSymbol(SymbolicFile &Obj, const NMSymbol &S,
char *SymbolAddrStr, const char *printBlanks,
- const char *printDashes, const char *printFormat) {
+ const char *printDashes,
+ const char *printFormat) {
MachO::mach_header H;
MachO::mach_header_64 H_64;
uint32_t Filetype = MachO::MH_OBJECT;
@@ -340,7 +366,7 @@ static void darwinPrintSymbol(SymbolicFile &Obj, SymbolListT::iterator I,
uint64_t NValue = 0;
MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(&Obj);
if (Obj.isIR()) {
- uint32_t SymFlags = I->Sym.getFlags();
+ uint32_t SymFlags = S.Sym.getFlags();
if (SymFlags & SymbolRef::SF_Global)
NType |= MachO::N_EXT;
if (SymFlags & SymbolRef::SF_Hidden)
@@ -362,7 +388,7 @@ static void darwinPrintSymbol(SymbolicFile &Obj, SymbolListT::iterator I,
if (SymFlags & SymbolRef::SF_Weak)
NDesc |= MachO::N_WEAK_DEF;
} else {
- DataRefImpl SymDRI = I->Sym.getRawDataRefImpl();
+ DataRefImpl SymDRI = S.Sym.getRawDataRefImpl();
if (MachO->is64Bit()) {
H_64 = MachO->MachOObjectFile::getHeader64();
Filetype = H_64.filetype;
@@ -375,11 +401,11 @@ static void darwinPrintSymbol(SymbolicFile &Obj, SymbolListT::iterator I,
NStrx = STE_64.n_strx;
NValue = STE_64.n_value;
} else {
- NType = I->NType;
- NSect = I->NSect;
- NDesc = I->NDesc;
+ NType = S.NType;
+ NSect = S.NSect;
+ NDesc = S.NDesc;
NStrx = 0;
- NValue = I->Address;
+ NValue = S.Address;
}
} else {
H = MachO->MachOObjectFile::getHeader();
@@ -393,42 +419,31 @@ static void darwinPrintSymbol(SymbolicFile &Obj, SymbolListT::iterator I,
NStrx = STE.n_strx;
NValue = STE.n_value;
} else {
- NType = I->NType;
- NSect = I->NSect;
- NDesc = I->NDesc;
+ NType = S.NType;
+ NSect = S.NSect;
+ NDesc = S.NDesc;
NStrx = 0;
- NValue = I->Address;
+ NValue = S.Address;
}
}
}
// If we are printing Mach-O symbols in hex do that and return.
if (FormatMachOasHex) {
- char Str[18] = "";
- format(printFormat, NValue).print(Str, sizeof(Str));
- outs() << Str << ' ';
- format("%02x", NType).print(Str, sizeof(Str));
- outs() << Str << ' ';
- format("%02x", NSect).print(Str, sizeof(Str));
- outs() << Str << ' ';
- format("%04x", NDesc).print(Str, sizeof(Str));
- outs() << Str << ' ';
- format("%08x", NStrx).print(Str, sizeof(Str));
- outs() << Str << ' ';
- outs() << I->Name;
+ outs() << format(printFormat, NValue) << ' '
+ << format("%02x %02x %04x %08x", NType, NSect, NDesc, NStrx) << ' '
+ << S.Name;
if ((NType & MachO::N_TYPE) == MachO::N_INDR) {
outs() << " (indirect for ";
- format(printFormat, NValue).print(Str, sizeof(Str));
- outs() << Str << ' ';
+ outs() << format(printFormat, NValue) << ' ';
StringRef IndirectName;
- if (I->Sym.getRawDataRefImpl().p) {
- if (MachO->getIndirectName(I->Sym.getRawDataRefImpl(), IndirectName))
+ if (S.Sym.getRawDataRefImpl().p) {
+ if (MachO->getIndirectName(S.Sym.getRawDataRefImpl(), IndirectName))
outs() << "?)";
else
outs() << IndirectName << ")";
- }
- else
- outs() << I->IndirectName << ")";
+ } else
+ outs() << S.IndirectName << ")";
}
outs() << "\n";
return;
@@ -487,9 +502,9 @@ static void darwinPrintSymbol(SymbolicFile &Obj, SymbolListT::iterator I,
break;
}
section_iterator Sec = SectionRef();
- if (I->Sym.getRawDataRefImpl().p) {
+ if (S.Sym.getRawDataRefImpl().p) {
Expected<section_iterator> SecOrErr =
- MachO->getSymbolSection(I->Sym.getRawDataRefImpl());
+ MachO->getSymbolSection(S.Sym.getRawDataRefImpl());
if (!SecOrErr) {
consumeError(SecOrErr.takeError());
outs() << "(?,?) ";
@@ -501,11 +516,12 @@ static void darwinPrintSymbol(SymbolicFile &Obj, SymbolListT::iterator I,
break;
}
} else {
- Sec = I->Section;
+ Sec = S.Section;
}
DataRefImpl Ref = Sec->getRawDataRefImpl();
StringRef SectionName;
- MachO->getSectionName(Ref, SectionName);
+ if (Expected<StringRef> NameOrErr = MachO->getSectionName(Ref))
+ SectionName = *NameOrErr;
StringRef SegmentName = MachO->getSectionFinalSegmentName(Ref);
outs() << "(" << SegmentName << "," << SectionName << ") ";
break;
@@ -541,39 +557,36 @@ static void darwinPrintSymbol(SymbolicFile &Obj, SymbolListT::iterator I,
outs() << "non-external ";
}
- if (Filetype == MachO::MH_OBJECT &&
- (NDesc & MachO::N_NO_DEAD_STRIP) == MachO::N_NO_DEAD_STRIP)
- outs() << "[no dead strip] ";
-
- if (Filetype == MachO::MH_OBJECT &&
- ((NType & MachO::N_TYPE) != MachO::N_UNDF) &&
- (NDesc & MachO::N_SYMBOL_RESOLVER) == MachO::N_SYMBOL_RESOLVER)
- outs() << "[symbol resolver] ";
-
- if (Filetype == MachO::MH_OBJECT &&
- ((NType & MachO::N_TYPE) != MachO::N_UNDF) &&
- (NDesc & MachO::N_ALT_ENTRY) == MachO::N_ALT_ENTRY)
- outs() << "[alt entry] ";
+ if (Filetype == MachO::MH_OBJECT) {
+ if (NDesc & MachO::N_NO_DEAD_STRIP)
+ outs() << "[no dead strip] ";
+ if ((NType & MachO::N_TYPE) != MachO::N_UNDF &&
+ NDesc & MachO::N_SYMBOL_RESOLVER)
+ outs() << "[symbol resolver] ";
+ if ((NType & MachO::N_TYPE) != MachO::N_UNDF && NDesc & MachO::N_ALT_ENTRY)
+ outs() << "[alt entry] ";
+ if ((NType & MachO::N_TYPE) != MachO::N_UNDF && NDesc & MachO::N_COLD_FUNC)
+ outs() << "[cold func] ";
+ }
if ((NDesc & MachO::N_ARM_THUMB_DEF) == MachO::N_ARM_THUMB_DEF)
outs() << "[Thumb] ";
if ((NType & MachO::N_TYPE) == MachO::N_INDR) {
- outs() << I->Name << " (for ";
+ outs() << S.Name << " (for ";
StringRef IndirectName;
if (MachO) {
- if (I->Sym.getRawDataRefImpl().p) {
- if (MachO->getIndirectName(I->Sym.getRawDataRefImpl(), IndirectName))
+ if (S.Sym.getRawDataRefImpl().p) {
+ if (MachO->getIndirectName(S.Sym.getRawDataRefImpl(), IndirectName))
outs() << "?)";
else
outs() << IndirectName << ")";
- }
- else
- outs() << I->IndirectName << ")";
+ } else
+ outs() << S.IndirectName << ")";
} else
outs() << "?)";
} else
- outs() << I->Name;
+ outs() << S.Name;
if ((Flags & MachO::MH_TWOLEVEL) == MachO::MH_TWOLEVEL &&
(((NType & MachO::N_TYPE) == MachO::N_UNDF && NValue == 0) ||
@@ -635,25 +648,24 @@ static const struct DarwinStabName DarwinStabNames[] = {
{MachO::N_ECOMM, "ECOMM"},
{MachO::N_ECOML, "ECOML"},
{MachO::N_LENG, "LENG"},
- {0, nullptr}};
+};
static const char *getDarwinStabString(uint8_t NType) {
- for (unsigned i = 0; DarwinStabNames[i].Name; i++) {
- if (DarwinStabNames[i].NType == NType)
- return DarwinStabNames[i].Name;
- }
+ for (auto I : makeArrayRef(DarwinStabNames))
+ if (I.NType == NType)
+ return I.Name;
return nullptr;
}
// darwinPrintStab() prints the n_sect, n_desc along with a symbolic name of
// a stab n_type value in a Mach-O file.
-static void darwinPrintStab(MachOObjectFile *MachO, SymbolListT::iterator I) {
+static void darwinPrintStab(MachOObjectFile *MachO, const NMSymbol &S) {
MachO::nlist_64 STE_64;
MachO::nlist STE;
uint8_t NType;
uint8_t NSect;
uint16_t NDesc;
- DataRefImpl SymDRI = I->Sym.getRawDataRefImpl();
+ DataRefImpl SymDRI = S.Sym.getRawDataRefImpl();
if (MachO->is64Bit()) {
STE_64 = MachO->getSymbol64TableEntry(SymDRI);
NType = STE_64.n_type;
@@ -666,16 +678,11 @@ static void darwinPrintStab(MachOObjectFile *MachO, SymbolListT::iterator I) {
NDesc = STE.n_desc;
}
- char Str[18] = "";
- format("%02x", NSect).print(Str, sizeof(Str));
- outs() << ' ' << Str << ' ';
- format("%04x", NDesc).print(Str, sizeof(Str));
- outs() << Str << ' ';
+ outs() << format(" %02x %04x ", NSect, NDesc);
if (const char *stabString = getDarwinStabString(NType))
- format("%5.5s", stabString).print(Str, sizeof(Str));
+ outs() << format("%5.5s", stabString);
else
- format(" %02x", NType).print(Str, sizeof(Str));
- outs() << Str;
+ outs() << format(" %02x", NType);
}
static Optional<std::string> demangle(StringRef Name, bool StripUnderscore) {
@@ -780,26 +787,24 @@ static void sortAndPrintSymbolList(SymbolicFile &Obj, bool printName,
errs() << "no symbols\n";
}
- for (SymbolListT::iterator I = SymbolList.begin(), E = SymbolList.end();
- I != E; ++I) {
+ for (const NMSymbol &S : SymbolList) {
uint32_t SymFlags;
- std::string Name = I->Name.str();
+ std::string Name = S.Name.str();
MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(&Obj);
if (Demangle) {
- if (Optional<std::string> Opt = demangle(I->Name, MachO))
+ if (Optional<std::string> Opt = demangle(S.Name, MachO))
Name = *Opt;
}
- if (I->Sym.getRawDataRefImpl().p)
- SymFlags = I->Sym.getFlags();
+ if (S.Sym.getRawDataRefImpl().p)
+ SymFlags = S.Sym.getFlags();
else
- SymFlags = I->SymFlags;
+ SymFlags = S.SymFlags;
bool Undefined = SymFlags & SymbolRef::SF_Undefined;
bool Global = SymFlags & SymbolRef::SF_Global;
bool Weak = SymFlags & SymbolRef::SF_Weak;
if ((!Undefined && UndefinedOnly) || (Undefined && DefinedOnly) ||
- (!Global && ExternalOnly) || (SizeSort && !PrintAddress) ||
- (Weak && NoWeakSymbols))
+ (!Global && ExternalOnly) || (Weak && NoWeakSymbols))
continue;
if (PrintFileName)
writeFileName(outs());
@@ -810,32 +815,30 @@ static void sortAndPrintSymbolList(SymbolicFile &Obj, bool printName,
continue;
}
- char SymbolAddrStr[18] = "";
- char SymbolSizeStr[18] = "";
+ char SymbolAddrStr[23], SymbolSizeStr[23];
// If the format is SysV or the symbol isn't defined, then print spaces.
- if (OutputFormat == sysv || !symbolIsDefined(*I)) {
+ if (OutputFormat == sysv || !symbolIsDefined(S)) {
if (OutputFormat == posix) {
- format(printFormat, I->Address)
- .print(SymbolAddrStr, sizeof(SymbolAddrStr));
- format(printFormat, I->Size)
- .print(SymbolSizeStr, sizeof(SymbolSizeStr));
+ format(printFormat, S.Address)
+ .print(SymbolAddrStr, sizeof(SymbolAddrStr));
+ format(printFormat, S.Size).print(SymbolSizeStr, sizeof(SymbolSizeStr));
} else {
strcpy(SymbolAddrStr, printBlanks);
strcpy(SymbolSizeStr, printBlanks);
}
}
- // Otherwise, print the symbol address and size.
- if (symbolIsDefined(*I)) {
+ if (symbolIsDefined(S)) {
+ // Otherwise, print the symbol address and size.
if (Obj.isIR())
strcpy(SymbolAddrStr, printDashes);
- else if(MachO && I->TypeChar == 'I')
+ else if (MachO && S.TypeChar == 'I')
strcpy(SymbolAddrStr, printBlanks);
else
- format(printFormat, I->Address)
- .print(SymbolAddrStr, sizeof(SymbolAddrStr));
- format(printFormat, I->Size).print(SymbolSizeStr, sizeof(SymbolSizeStr));
+ format(printFormat, S.Address)
+ .print(SymbolAddrStr, sizeof(SymbolAddrStr));
+ format(printFormat, S.Size).print(SymbolSizeStr, sizeof(SymbolSizeStr));
}
// If OutputFormat is darwin or we are printing Mach-O symbols in hex and
@@ -844,43 +847,36 @@ static void sortAndPrintSymbolList(SymbolicFile &Obj, bool printName,
// printing Mach-O symbols in hex and not a Mach-O object fall back to
// OutputFormat bsd (see below).
if ((OutputFormat == darwin || FormatMachOasHex) && (MachO || Obj.isIR())) {
- darwinPrintSymbol(Obj, I, SymbolAddrStr, printBlanks, printDashes,
+ darwinPrintSymbol(Obj, S, SymbolAddrStr, printBlanks, printDashes,
printFormat);
} else if (OutputFormat == posix) {
- outs() << Name << " " << I->TypeChar << " ";
- if (MachO)
- outs() << SymbolAddrStr << " " << "0" /* SymbolSizeStr */ << "\n";
- else
- outs() << SymbolAddrStr << " " << SymbolSizeStr << "\n";
+ outs() << Name << " " << S.TypeChar << " " << SymbolAddrStr << " "
+ << (MachO ? "0" : SymbolSizeStr) << "\n";
} else if (OutputFormat == bsd || (OutputFormat == darwin && !MachO)) {
if (PrintAddress)
outs() << SymbolAddrStr << ' ';
- if (PrintSize) {
- outs() << SymbolSizeStr;
- outs() << ' ';
- }
- outs() << I->TypeChar;
- if (I->TypeChar == '-' && MachO)
- darwinPrintStab(MachO, I);
+ if (PrintSize)
+ outs() << SymbolSizeStr << ' ';
+ outs() << S.TypeChar;
+ if (S.TypeChar == '-' && MachO)
+ darwinPrintStab(MachO, S);
outs() << " " << Name;
- if (I->TypeChar == 'I' && MachO) {
+ if (S.TypeChar == 'I' && MachO) {
outs() << " (indirect for ";
- if (I->Sym.getRawDataRefImpl().p) {
+ if (S.Sym.getRawDataRefImpl().p) {
StringRef IndirectName;
- if (MachO->getIndirectName(I->Sym.getRawDataRefImpl(), IndirectName))
+ if (MachO->getIndirectName(S.Sym.getRawDataRefImpl(), IndirectName))
outs() << "?)";
else
outs() << IndirectName << ")";
} else
- outs() << I->IndirectName << ")";
+ outs() << S.IndirectName << ")";
}
outs() << "\n";
} else if (OutputFormat == sysv) {
- std::string PaddedName(Name);
- while (PaddedName.length() < 20)
- PaddedName += " ";
- outs() << PaddedName << "|" << SymbolAddrStr << "| " << I->TypeChar
- << " | |" << SymbolSizeStr << "| |\n";
+ outs() << left_justify(Name, 20) << "|" << SymbolAddrStr << "| "
+ << S.TypeChar << " |" << right_justify(S.TypeName, 18) << "|"
+ << SymbolSizeStr << "| |" << S.SectionName << "\n";
}
}
@@ -898,44 +894,35 @@ static char getSymbolNMTypeChar(ELFObjectFileBase &Obj,
return '?';
}
+ uint8_t Binding = SymI->getBinding();
+ if (Binding == ELF::STB_GNU_UNIQUE)
+ return 'u';
+
+ assert(Binding != ELF::STB_WEAK && "STB_WEAK not tested in calling function");
+ if (Binding != ELF::STB_GLOBAL && Binding != ELF::STB_LOCAL)
+ return '?';
+
elf_section_iterator SecI = *SecIOrErr;
if (SecI != Obj.section_end()) {
- switch (SecI->getType()) {
- case ELF::SHT_PROGBITS:
- case ELF::SHT_DYNAMIC:
- switch (SecI->getFlags()) {
- case (ELF::SHF_ALLOC | ELF::SHF_EXECINSTR):
- return 't';
- case (ELF::SHF_TLS | ELF::SHF_ALLOC | ELF::SHF_WRITE):
- case (ELF::SHF_ALLOC | ELF::SHF_WRITE):
- return 'd';
- case ELF::SHF_ALLOC:
- case (ELF::SHF_ALLOC | ELF::SHF_MERGE):
- case (ELF::SHF_ALLOC | ELF::SHF_MERGE | ELF::SHF_STRINGS):
- return 'r';
- }
- break;
- case ELF::SHT_NOBITS:
- return 'b';
- case ELF::SHT_INIT_ARRAY:
- case ELF::SHT_FINI_ARRAY:
+ uint32_t Type = SecI->getType();
+ uint64_t Flags = SecI->getFlags();
+ if (Flags & ELF::SHF_EXECINSTR)
return 't';
- }
- }
+ if (Type == ELF::SHT_NOBITS)
+ return 'b';
+ if (Flags & ELF::SHF_ALLOC)
+ return Flags & ELF::SHF_WRITE ? 'd' : 'r';
- if (SymI->getELFType() == ELF::STT_SECTION) {
- Expected<StringRef> Name = SymI->getName();
- if (!Name) {
- consumeError(Name.takeError());
+ StringRef SecName;
+ if (SecI->getName(SecName))
return '?';
- }
- return StringSwitch<char>(*Name)
- .StartsWith(".debug", 'N')
- .StartsWith(".note", 'n')
- .Default('?');
+ if (SecName.startswith(".debug"))
+ return 'N';
+ if (!(Flags & ELF::SHF_WRITE))
+ return 'n';
}
- return 'n';
+ return '?';
}
static char getSymbolNMTypeChar(COFFObjectFile &Obj, symbol_iterator I) {
@@ -967,10 +954,9 @@ static char getSymbolNMTypeChar(COFFObjectFile &Obj, symbol_iterator I) {
section_iterator SecI = *SecIOrErr;
const coff_section *Section = Obj.getCOFFSection(*SecI);
Characteristics = Section->Characteristics;
- StringRef SectionName;
- Obj.getSectionName(Section, SectionName);
- if (SectionName.startswith(".idata"))
- return 'i';
+ if (Expected<StringRef> NameOrErr = Obj.getSectionName(Section))
+ if (NameOrErr->startswith(".idata"))
+ return 'i';
}
switch (Symb.getSectionNumber()) {
@@ -1030,7 +1016,8 @@ static char getSymbolNMTypeChar(MachOObjectFile &Obj, basic_symbol_iterator I) {
return 's';
DataRefImpl Ref = Sec->getRawDataRefImpl();
StringRef SectionName;
- Obj.getSectionName(Ref, SectionName);
+ if (Expected<StringRef> NameOrErr = Obj.getSectionName(Ref))
+ SectionName = *NameOrErr;
StringRef SegmentName = Obj.getSectionFinalSegmentName(Ref);
if (Obj.is64Bit() && Obj.getHeader64().filetype == MachO::MH_KEXT_BUNDLE &&
SegmentName == "__TEXT_EXEC" && SectionName == "__text")
@@ -1074,8 +1061,40 @@ static bool isObject(SymbolicFile &Obj, basic_symbol_iterator I) {
: elf_symbol_iterator(I)->getELFType() == ELF::STT_OBJECT;
}
-static char getNMTypeChar(SymbolicFile &Obj, basic_symbol_iterator I) {
+// For ELF object files, Set TypeName to the symbol typename, to be printed
+// in the 'Type' column of the SYSV format output.
+static StringRef getNMTypeName(SymbolicFile &Obj, basic_symbol_iterator I) {
+ if (isa<ELFObjectFileBase>(&Obj)) {
+ elf_symbol_iterator SymI(I);
+ return SymI->getELFTypeName();
+ }
+ return "";
+}
+
+// Return Posix nm class type tag (single letter), but also set SecName and
+// section and name, to be used in format=sysv output.
+static char getNMSectionTagAndName(SymbolicFile &Obj, basic_symbol_iterator I,
+ StringRef &SecName) {
uint32_t Symflags = I->getFlags();
+ if (isa<ELFObjectFileBase>(&Obj)) {
+ if (Symflags & object::SymbolRef::SF_Absolute)
+ SecName = "*ABS*";
+ else if (Symflags & object::SymbolRef::SF_Common)
+ SecName = "*COM*";
+ else if (Symflags & object::SymbolRef::SF_Undefined)
+ SecName = "*UND*";
+ else {
+ elf_symbol_iterator SymI(I);
+ Expected<elf_section_iterator> SecIOrErr = SymI->getSection();
+ if (!SecIOrErr) {
+ consumeError(SecIOrErr.takeError());
+ return '?';
+ }
+ elf_section_iterator secT = *SecIOrErr;
+ secT->getName(SecName);
+ }
+ }
+
if ((Symflags & object::SymbolRef::SF_Weak) && !isa<MachOObjectFile>(Obj)) {
char Ret = isObject(Obj, I) ? 'v' : 'w';
return (!(Symflags & object::SymbolRef::SF_Undefined)) ? toupper(Ret) : Ret;
@@ -1103,10 +1122,13 @@ static char getNMTypeChar(SymbolicFile &Obj, basic_symbol_iterator I) {
else
Ret = getSymbolNMTypeChar(cast<ELFObjectFileBase>(Obj), I);
- if (Symflags & object::SymbolRef::SF_Global)
- Ret = toupper(Ret);
+ if (!(Symflags & object::SymbolRef::SF_Global))
+ return Ret;
- return Ret;
+ if (Obj.isELF() && ELFSymbolRef(*I).getBinding() == ELF::STB_GNU_UNIQUE)
+ return Ret;
+
+ return toupper(Ret);
}
// getNsectForSegSect() is used to implement the Mach-O "-s segname sectname"
@@ -1120,7 +1142,8 @@ static unsigned getNsectForSegSect(MachOObjectFile *Obj) {
for (auto &S : Obj->sections()) {
DataRefImpl Ref = S.getRawDataRefImpl();
StringRef SectionName;
- Obj->getSectionName(Ref, SectionName);
+ if (Expected<StringRef> NameOrErr = Obj->getSectionName(Ref))
+ SectionName = *NameOrErr;
StringRef SegmentName = Obj->getSectionFinalSegmentName(Ref);
if (SegmentName == SegSect[0] && SectionName == SegSect[1])
return Nsect;
@@ -1155,9 +1178,7 @@ dumpSymbolNamesFromObject(SymbolicFile &Obj, bool printName,
error("File format has no dynamic symbol table", Obj.getFileName());
return;
}
- auto DynSymbols = E->getDynamicSymbolIterators();
- Symbols =
- make_range<basic_symbol_iterator>(DynSymbols.begin(), DynSymbols.end());
+ Symbols = E->getDynamicSymbolIterators();
}
std::string NameBuffer;
raw_string_ostream OS(NameBuffer);
@@ -1186,10 +1207,8 @@ dumpSymbolNamesFromObject(SymbolicFile &Obj, bool printName,
NMSymbol S = {};
S.Size = 0;
S.Address = 0;
- if (PrintSize) {
- if (isa<ELFObjectFileBase>(&Obj))
- S.Size = ELFSymbolRef(Sym).getSize();
- }
+ if (isa<ELFObjectFileBase>(&Obj))
+ S.Size = ELFSymbolRef(Sym).getSize();
if (PrintAddress && isa<ObjectFile>(Obj)) {
SymbolRef SymRef(Sym);
Expected<uint64_t> AddressOrErr = SymRef.getAddress();
@@ -1199,12 +1218,15 @@ dumpSymbolNamesFromObject(SymbolicFile &Obj, bool printName,
}
S.Address = *AddressOrErr;
}
- S.TypeChar = getNMTypeChar(Obj, Sym);
- std::error_code EC = Sym.printName(OS);
- if (EC && MachO)
- OS << "bad string index";
- else
- error(EC);
+ S.TypeName = getNMTypeName(Obj, Sym);
+ S.TypeChar = getNMSectionTagAndName(Obj, Sym, S.SectionName);
+ if (Error E = Sym.printName(OS)) {
+ if (MachO) {
+ OS << "bad string index";
+ consumeError(std::move(E));
+ } else
+ error(std::move(E), Obj.getFileName());
+ }
OS << '\0';
S.Sym = Sym;
SymbolList.push_back(S);
@@ -1270,11 +1292,12 @@ dumpSymbolNamesFromObject(SymbolicFile &Obj, bool printName,
bool found = false;
bool ReExport = false;
if (!DyldInfoOnly) {
- for (unsigned J = 0; J < SymbolList.size() && !found; ++J) {
- if (SymbolList[J].Address == Entry.address() + BaseSegmentAddress &&
- SymbolList[J].Name == Entry.name())
+ for (const NMSymbol &S : SymbolList)
+ if (S.Address == Entry.address() + BaseSegmentAddress &&
+ S.Name == Entry.name()) {
found = true;
- }
+ break;
+ }
}
if (!found) {
NMSymbol S = {};
@@ -1445,7 +1468,6 @@ dumpSymbolNamesFromObject(SymbolicFile &Obj, bool printName,
B.NType = MachO::N_EXT | MachO::N_UNDF;
B.NSect = 0;
B.NDesc = 0;
- B.NDesc = 0;
MachO::SET_LIBRARY_ORDINAL(B.NDesc, Entry.ordinal());
B.IndirectName = StringRef();
B.Name = Entry.symbolName();
@@ -1735,8 +1757,9 @@ static void dumpSymbolNamesFromFile(std::string &Filename) {
return;
LLVMContext Context;
- Expected<std::unique_ptr<Binary>> BinaryOrErr = createBinary(
- BufferOrErr.get()->getMemBufferRef(), NoLLVMBitcode ? nullptr : &Context);
+ LLVMContext *ContextPtr = NoLLVMBitcode ? nullptr : &Context;
+ Expected<std::unique_ptr<Binary>> BinaryOrErr =
+ createBinary(BufferOrErr.get()->getMemBufferRef(), ContextPtr);
if (!BinaryOrErr) {
error(BinaryOrErr.takeError(), Filename);
return;
@@ -1770,7 +1793,8 @@ static void dumpSymbolNamesFromFile(std::string &Filename) {
{
Error Err = Error::success();
for (auto &C : A->children(Err)) {
- Expected<std::unique_ptr<Binary>> ChildOrErr = C.getAsBinary(&Context);
+ Expected<std::unique_ptr<Binary>> ChildOrErr =
+ C.getAsBinary(ContextPtr);
if (!ChildOrErr) {
if (auto E = isNotObjectErrorInvalidFileType(ChildOrErr.takeError()))
error(std::move(E), Filename, C);
@@ -1841,7 +1865,7 @@ static void dumpSymbolNamesFromFile(std::string &Filename) {
Error Err = Error::success();
for (auto &C : A->children(Err)) {
Expected<std::unique_ptr<Binary>> ChildOrErr =
- C.getAsBinary(&Context);
+ C.getAsBinary(ContextPtr);
if (!ChildOrErr) {
if (auto E = isNotObjectErrorInvalidFileType(
ChildOrErr.takeError())) {
@@ -1912,7 +1936,7 @@ static void dumpSymbolNamesFromFile(std::string &Filename) {
Error Err = Error::success();
for (auto &C : A->children(Err)) {
Expected<std::unique_ptr<Binary>> ChildOrErr =
- C.getAsBinary(&Context);
+ C.getAsBinary(ContextPtr);
if (!ChildOrErr) {
if (auto E = isNotObjectErrorInvalidFileType(
ChildOrErr.takeError()))
@@ -1946,10 +1970,8 @@ static void dumpSymbolNamesFromFile(std::string &Filename) {
// Either all architectures have been specified or none have been specified
// and this does not contain the host architecture so dump all the slices.
bool moreThanOneArch = UB->getNumberOfObjects() > 1;
- for (MachOUniversalBinary::object_iterator I = UB->begin_objects(),
- E = UB->end_objects();
- I != E; ++I) {
- Expected<std::unique_ptr<ObjectFile>> ObjOrErr = I->getAsObjectFile();
+ for (const MachOUniversalBinary::ObjectForArch &O : UB->objects()) {
+ Expected<std::unique_ptr<ObjectFile>> ObjOrErr = O.getAsObjectFile();
std::string ArchiveName;
std::string ArchitectureName;
ArchiveName.clear();
@@ -1958,28 +1980,28 @@ static void dumpSymbolNamesFromFile(std::string &Filename) {
ObjectFile &Obj = *ObjOrErr.get();
if (PrintFileName) {
if (isa<MachOObjectFile>(Obj) && moreThanOneArch)
- ArchitectureName = I->getArchFlagName();
+ ArchitectureName = O.getArchFlagName();
} else {
if (moreThanOneArch)
outs() << "\n";
outs() << Obj.getFileName();
if (isa<MachOObjectFile>(Obj) && moreThanOneArch)
- outs() << " (for architecture " << I->getArchFlagName() << ")";
+ outs() << " (for architecture " << O.getArchFlagName() << ")";
outs() << ":\n";
}
dumpSymbolNamesFromObject(Obj, false, ArchiveName, ArchitectureName);
} else if (auto E = isNotObjectErrorInvalidFileType(
ObjOrErr.takeError())) {
error(std::move(E), Filename, moreThanOneArch ?
- StringRef(I->getArchFlagName()) : StringRef());
+ StringRef(O.getArchFlagName()) : StringRef());
continue;
} else if (Expected<std::unique_ptr<Archive>> AOrErr =
- I->getAsArchive()) {
+ O.getAsArchive()) {
std::unique_ptr<Archive> &A = *AOrErr;
Error Err = Error::success();
for (auto &C : A->children(Err)) {
Expected<std::unique_ptr<Binary>> ChildOrErr =
- C.getAsBinary(&Context);
+ C.getAsBinary(ContextPtr);
if (!ChildOrErr) {
if (auto E = isNotObjectErrorInvalidFileType(
ChildOrErr.takeError()))
@@ -1987,23 +2009,23 @@ static void dumpSymbolNamesFromFile(std::string &Filename) {
StringRef(ArchitectureName) : StringRef());
continue;
}
- if (SymbolicFile *O = dyn_cast<SymbolicFile>(&*ChildOrErr.get())) {
+ if (SymbolicFile *F = dyn_cast<SymbolicFile>(&*ChildOrErr.get())) {
if (PrintFileName) {
ArchiveName = A->getFileName();
- if (isa<MachOObjectFile>(O) && moreThanOneArch)
- ArchitectureName = I->getArchFlagName();
+ if (isa<MachOObjectFile>(F) && moreThanOneArch)
+ ArchitectureName = O.getArchFlagName();
} else {
outs() << "\n" << A->getFileName();
- if (isa<MachOObjectFile>(O)) {
- outs() << "(" << O->getFileName() << ")";
+ if (isa<MachOObjectFile>(F)) {
+ outs() << "(" << F->getFileName() << ")";
if (moreThanOneArch)
- outs() << " (for architecture " << I->getArchFlagName()
+ outs() << " (for architecture " << O.getArchFlagName()
<< ")";
} else
- outs() << ":" << O->getFileName();
+ outs() << ":" << F->getFileName();
outs() << ":\n";
}
- dumpSymbolNamesFromObject(*O, false, ArchiveName, ArchitectureName);
+ dumpSymbolNamesFromObject(*F, false, ArchiveName, ArchitectureName);
}
}
if (Err)
@@ -2011,7 +2033,7 @@ static void dumpSymbolNamesFromFile(std::string &Filename) {
} else {
consumeError(AOrErr.takeError());
error(Filename + " for architecture " +
- StringRef(I->getArchFlagName()) +
+ StringRef(O.getArchFlagName()) +
" is not a Mach-O file or an archive file",
"Mach-O universal file");
}
@@ -2021,7 +2043,7 @@ static void dumpSymbolNamesFromFile(std::string &Filename) {
if (SymbolicFile *O = dyn_cast<SymbolicFile>(&Bin)) {
if (!MachOPrintSizeWarning && PrintSize && isa<MachOObjectFile>(O)) {
WithColor::warning(errs(), ToolName)
- << "sizes with -print-size for Mach-O files are always zero.\n";
+ << "sizes with --print-size for Mach-O files are always zero.\n";
MachOPrintSizeWarning = true;
}
if (!checkMachOAndArchFlags(O, Filename))
@@ -2032,6 +2054,7 @@ static void dumpSymbolNamesFromFile(std::string &Filename) {
int main(int argc, char **argv) {
InitLLVM X(argc, argv);
+ cl::HideUnrelatedOptions(NMCat);
cl::ParseCommandLineOptions(argc, argv, "llvm symbol table dumper\n");
// llvm-nm only reads binary files.
@@ -2063,13 +2086,17 @@ int main(int argc, char **argv) {
if (InputFilenames.size() > 1)
MultipleFiles = true;
+ // If both --demangle and --no-demangle are specified then pick the last one.
+ if (NoDemangle.getPosition() > Demangle.getPosition())
+ Demangle = !NoDemangle;
+
for (unsigned i = 0; i < ArchFlags.size(); ++i) {
if (ArchFlags[i] == "all") {
ArchAll = true;
} else {
if (!MachOObjectFile::isValidArch(ArchFlags[i]))
error("Unknown architecture named '" + ArchFlags[i] + "'",
- "for the -arch option");
+ "for the --arch option");
}
}
@@ -2078,7 +2105,7 @@ int main(int argc, char **argv) {
"for the -s option");
if (NoDyldInfo && (AddDyldInfo || DyldInfoOnly))
- error("-no-dyldinfo can't be used with -add-dyldinfo or -dyldinfo-only");
+ error("--no-dyldinfo can't be used with --add-dyldinfo or --dyldinfo-only");
llvm::for_each(InputFilenames, dumpSymbolNamesFromFile);
diff --git a/tools/llvm-objcopy/Buffer.cpp b/tools/llvm-objcopy/Buffer.cpp
index 8044b023aaad..06b2a20a762f 100644
--- a/tools/llvm-objcopy/Buffer.cpp
+++ b/tools/llvm-objcopy/Buffer.cpp
@@ -1,16 +1,16 @@
//===- Buffer.cpp ---------------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "Buffer.h"
-#include "llvm-objcopy.h"
#include "llvm/Support/FileOutputBuffer.h"
+#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Process.h"
#include <memory>
namespace llvm {
@@ -18,23 +18,51 @@ namespace objcopy {
Buffer::~Buffer() {}
-void FileBuffer::allocate(size_t Size) {
+static Error createEmptyFile(StringRef FileName) {
+ // Create an empty tempfile and atomically swap it in place with the desired
+ // output file.
+ Expected<sys::fs::TempFile> Temp =
+ sys::fs::TempFile::create(FileName + ".temp-empty-%%%%%%%");
+ return Temp ? Temp->keep(FileName) : Temp.takeError();
+}
+
+Error FileBuffer::allocate(size_t Size) {
+ // When a 0-sized file is requested, skip allocation but defer file
+ // creation/truncation until commit() to avoid side effects if something
+ // happens between allocate() and commit().
+ if (Size == 0) {
+ EmptyFile = true;
+ return Error::success();
+ }
+
Expected<std::unique_ptr<FileOutputBuffer>> BufferOrErr =
FileOutputBuffer::create(getName(), Size, FileOutputBuffer::F_executable);
- handleAllErrors(BufferOrErr.takeError(), [this](const ErrorInfoBase &E) {
- error("failed to open " + getName() + ": " + E.message());
- });
+ // FileOutputBuffer::create() returns an Error that is just a wrapper around
+ // std::error_code. Wrap it in FileError to include the actual filename.
+ if (!BufferOrErr)
+ return createFileError(getName(), BufferOrErr.takeError());
Buf = std::move(*BufferOrErr);
+ return Error::success();
}
-Error FileBuffer::commit() { return Buf->commit(); }
+Error FileBuffer::commit() {
+ if (EmptyFile)
+ return createEmptyFile(getName());
+
+ assert(Buf && "allocate() not called before commit()!");
+ Error Err = Buf->commit();
+ // FileOutputBuffer::commit() returns an Error that is just a wrapper around
+ // std::error_code. Wrap it in FileError to include the actual filename.
+ return Err ? createFileError(getName(), std::move(Err)) : std::move(Err);
+}
uint8_t *FileBuffer::getBufferStart() {
return reinterpret_cast<uint8_t *>(Buf->getBufferStart());
}
-void MemBuffer::allocate(size_t Size) {
+Error MemBuffer::allocate(size_t Size) {
Buf = WritableMemoryBuffer::getNewMemBuffer(Size, getName());
+ return Error::success();
}
Error MemBuffer::commit() { return Error::success(); }
diff --git a/tools/llvm-objcopy/Buffer.h b/tools/llvm-objcopy/Buffer.h
index e5b9c5b2d22b..487d5585c364 100644
--- a/tools/llvm-objcopy/Buffer.h
+++ b/tools/llvm-objcopy/Buffer.h
@@ -1,9 +1,8 @@
//===- Buffer.h -------------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -28,7 +27,7 @@ class Buffer {
public:
virtual ~Buffer();
- virtual void allocate(size_t Size) = 0;
+ virtual Error allocate(size_t Size) = 0;
virtual uint8_t *getBufferStart() = 0;
virtual Error commit() = 0;
@@ -38,9 +37,12 @@ public:
class FileBuffer : public Buffer {
std::unique_ptr<FileOutputBuffer> Buf;
+ // Indicates that allocate(0) was called, and commit() should create or
+ // truncate a file instead of using a FileOutputBuffer.
+ bool EmptyFile = false;
public:
- void allocate(size_t Size) override;
+ Error allocate(size_t Size) override;
uint8_t *getBufferStart() override;
Error commit() override;
@@ -51,7 +53,7 @@ class MemBuffer : public Buffer {
std::unique_ptr<WritableMemoryBuffer> Buf;
public:
- void allocate(size_t Size) override;
+ Error allocate(size_t Size) override;
uint8_t *getBufferStart() override;
Error commit() override;
diff --git a/tools/llvm-objcopy/COFF/COFFObjcopy.cpp b/tools/llvm-objcopy/COFF/COFFObjcopy.cpp
index 6b386d29979c..4ae46851a66f 100644
--- a/tools/llvm-objcopy/COFF/COFFObjcopy.cpp
+++ b/tools/llvm-objcopy/COFF/COFFObjcopy.cpp
@@ -1,9 +1,8 @@
//===- COFFObjcopy.cpp ----------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -18,6 +17,8 @@
#include "llvm/Object/Binary.h"
#include "llvm/Object/COFF.h"
#include "llvm/Support/Errc.h"
+#include "llvm/Support/JamCRC.h"
+#include "llvm/Support/Path.h"
#include <cassert>
namespace llvm {
@@ -27,14 +28,104 @@ namespace coff {
using namespace object;
using namespace COFF;
+static bool isDebugSection(const Section &Sec) {
+ return Sec.Name.startswith(".debug");
+}
+
+static uint64_t getNextRVA(const Object &Obj) {
+ if (Obj.getSections().empty())
+ return 0;
+ const Section &Last = Obj.getSections().back();
+ return alignTo(Last.Header.VirtualAddress + Last.Header.VirtualSize,
+ Obj.IsPE ? Obj.PeHeader.SectionAlignment : 1);
+}
+
+static uint32_t getCRC32(StringRef Data) {
+ JamCRC CRC;
+ CRC.update(ArrayRef<char>(Data.data(), Data.size()));
+ // The CRC32 value needs to be complemented because the JamCRC dosn't
+ // finalize the CRC32 value. It also dosn't negate the initial CRC32 value
+ // but it starts by default at 0xFFFFFFFF which is the complement of zero.
+ return ~CRC.getCRC();
+}
+
+static std::vector<uint8_t> createGnuDebugLinkSectionContents(StringRef File) {
+ ErrorOr<std::unique_ptr<MemoryBuffer>> LinkTargetOrErr =
+ MemoryBuffer::getFile(File);
+ if (!LinkTargetOrErr)
+ error("'" + File + "': " + LinkTargetOrErr.getError().message());
+ auto LinkTarget = std::move(*LinkTargetOrErr);
+ uint32_t CRC32 = getCRC32(LinkTarget->getBuffer());
+
+ StringRef FileName = sys::path::filename(File);
+ size_t CRCPos = alignTo(FileName.size() + 1, 4);
+ std::vector<uint8_t> Data(CRCPos + 4);
+ memcpy(Data.data(), FileName.data(), FileName.size());
+ support::endian::write32le(Data.data() + CRCPos, CRC32);
+ return Data;
+}
+
+static void addGnuDebugLink(Object &Obj, StringRef DebugLinkFile) {
+ uint32_t StartRVA = getNextRVA(Obj);
+
+ std::vector<Section> Sections;
+ Section Sec;
+ Sec.setOwnedContents(createGnuDebugLinkSectionContents(DebugLinkFile));
+ Sec.Name = ".gnu_debuglink";
+ Sec.Header.VirtualSize = Sec.getContents().size();
+ Sec.Header.VirtualAddress = StartRVA;
+ Sec.Header.SizeOfRawData = alignTo(Sec.Header.VirtualSize,
+ Obj.IsPE ? Obj.PeHeader.FileAlignment : 1);
+ // Sec.Header.PointerToRawData is filled in by the writer.
+ Sec.Header.PointerToRelocations = 0;
+ Sec.Header.PointerToLinenumbers = 0;
+ // Sec.Header.NumberOfRelocations is filled in by the writer.
+ Sec.Header.NumberOfLinenumbers = 0;
+ Sec.Header.Characteristics = IMAGE_SCN_CNT_INITIALIZED_DATA |
+ IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_DISCARDABLE;
+ Sections.push_back(Sec);
+ Obj.addSections(Sections);
+}
+
static Error handleArgs(const CopyConfig &Config, Object &Obj) {
+ // Perform the actual section removals.
+ Obj.removeSections([&Config](const Section &Sec) {
+ // Contrary to --only-keep-debug, --only-section fully removes sections that
+ // aren't mentioned.
+ if (!Config.OnlySection.empty() &&
+ !is_contained(Config.OnlySection, Sec.Name))
+ return true;
+
+ if (Config.StripDebug || Config.StripAll || Config.StripAllGNU ||
+ Config.DiscardMode == DiscardType::All || Config.StripUnneeded) {
+ if (isDebugSection(Sec) &&
+ (Sec.Header.Characteristics & IMAGE_SCN_MEM_DISCARDABLE) != 0)
+ return true;
+ }
+
+ if (is_contained(Config.ToRemove, Sec.Name))
+ return true;
+
+ return false;
+ });
+
+ if (Config.OnlyKeepDebug) {
+ // For --only-keep-debug, we keep all other sections, but remove their
+ // content. The VirtualSize field in the section header is kept intact.
+ Obj.truncateSections([](const Section &Sec) {
+ return !isDebugSection(Sec) && Sec.Name != ".buildid" &&
+ ((Sec.Header.Characteristics &
+ (IMAGE_SCN_CNT_CODE | IMAGE_SCN_CNT_INITIALIZED_DATA)) != 0);
+ });
+ }
+
// StripAll removes all symbols and thus also removes all relocations.
if (Config.StripAll || Config.StripAllGNU)
- for (Section &Sec : Obj.Sections)
+ for (Section &Sec : Obj.getMutableSections())
Sec.Relocs.clear();
// If we need to do per-symbol removals, initialize the Referenced field.
- if (Config.StripUnneeded || Config.DiscardAll ||
+ if (Config.StripUnneeded || Config.DiscardMode == DiscardType::All ||
!Config.SymbolsToRemove.empty())
if (Error E = Obj.markSymbols())
return E;
@@ -50,47 +141,74 @@ static Error handleArgs(const CopyConfig &Config, Object &Obj) {
// Explicitly removing a referenced symbol is an error.
if (Sym.Referenced)
reportError(Config.OutputFilename,
- make_error<StringError>(
- "not stripping symbol '" + Sym.Name +
- "' because it is named in a relocation.",
- llvm::errc::invalid_argument));
+ createStringError(llvm::errc::invalid_argument,
+ "not stripping symbol '%s' because it is "
+ "named in a relocation",
+ Sym.Name.str().c_str()));
return true;
}
if (!Sym.Referenced) {
// With --strip-unneeded, GNU objcopy removes all unreferenced local
// symbols, and any unreferenced undefined external.
- if (Config.StripUnneeded &&
- (Sym.Sym.StorageClass == IMAGE_SYM_CLASS_STATIC ||
- Sym.Sym.SectionNumber == 0))
- return true;
+ // With --strip-unneeded-symbol we strip only specific unreferenced
+ // local symbol instead of removing all of such.
+ if (Sym.Sym.StorageClass == IMAGE_SYM_CLASS_STATIC ||
+ Sym.Sym.SectionNumber == 0)
+ if (Config.StripUnneeded ||
+ is_contained(Config.UnneededSymbolsToRemove, Sym.Name))
+ return true;
// GNU objcopy keeps referenced local symbols and external symbols
// if --discard-all is set, similar to what --strip-unneeded does,
// but undefined local symbols are kept when --discard-all is set.
- if (Config.DiscardAll && Sym.Sym.StorageClass == IMAGE_SYM_CLASS_STATIC &&
+ if (Config.DiscardMode == DiscardType::All &&
+ Sym.Sym.StorageClass == IMAGE_SYM_CLASS_STATIC &&
Sym.Sym.SectionNumber != 0)
return true;
}
return false;
});
+
+ if (!Config.AddGnuDebugLink.empty())
+ addGnuDebugLink(Obj, Config.AddGnuDebugLink);
+
+ if (Config.AllowBrokenLinks || !Config.BuildIdLinkDir.empty() ||
+ Config.BuildIdLinkInput || Config.BuildIdLinkOutput ||
+ !Config.SplitDWO.empty() || !Config.SymbolsPrefix.empty() ||
+ !Config.AllocSectionsPrefix.empty() || !Config.AddSection.empty() ||
+ !Config.DumpSection.empty() || !Config.KeepSection.empty() ||
+ !Config.SymbolsToGlobalize.empty() || !Config.SymbolsToKeep.empty() ||
+ !Config.SymbolsToLocalize.empty() || !Config.SymbolsToWeaken.empty() ||
+ !Config.SymbolsToKeepGlobal.empty() || !Config.SectionsToRename.empty() ||
+ !Config.SetSectionFlags.empty() || !Config.SymbolsToRename.empty() ||
+ Config.ExtractDWO || Config.KeepFileSymbols || Config.LocalizeHidden ||
+ Config.PreserveDates || Config.StripDWO || Config.StripNonAlloc ||
+ Config.StripSections || Config.Weaken || Config.DecompressDebugSections ||
+ Config.DiscardMode == DiscardType::Locals ||
+ !Config.SymbolsToAdd.empty() || Config.EntryExpr) {
+ return createStringError(llvm::errc::invalid_argument,
+ "option not supported by llvm-objcopy for COFF");
+ }
+
return Error::success();
}
-void executeObjcopyOnBinary(const CopyConfig &Config,
- object::COFFObjectFile &In, Buffer &Out) {
+Error executeObjcopyOnBinary(const CopyConfig &Config, COFFObjectFile &In,
+ Buffer &Out) {
COFFReader Reader(In);
Expected<std::unique_ptr<Object>> ObjOrErr = Reader.create();
if (!ObjOrErr)
- reportError(Config.InputFilename, ObjOrErr.takeError());
+ return createFileError(Config.InputFilename, ObjOrErr.takeError());
Object *Obj = ObjOrErr->get();
assert(Obj && "Unable to deserialize COFF object");
if (Error E = handleArgs(Config, *Obj))
- reportError(Config.InputFilename, std::move(E));
+ return createFileError(Config.InputFilename, std::move(E));
COFFWriter Writer(*Obj, Out);
if (Error E = Writer.write())
- reportError(Config.OutputFilename, std::move(E));
+ return createFileError(Config.OutputFilename, std::move(E));
+ return Error::success();
}
} // end namespace coff
diff --git a/tools/llvm-objcopy/COFF/COFFObjcopy.h b/tools/llvm-objcopy/COFF/COFFObjcopy.h
index bf70bd9b4d84..858759e52c4a 100644
--- a/tools/llvm-objcopy/COFF/COFFObjcopy.h
+++ b/tools/llvm-objcopy/COFF/COFFObjcopy.h
@@ -1,9 +1,8 @@
//===- COFFObjcopy.h --------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -11,6 +10,7 @@
#define LLVM_TOOLS_OBJCOPY_COFFOBJCOPY_H
namespace llvm {
+class Error;
namespace object {
class COFFObjectFile;
@@ -21,8 +21,8 @@ struct CopyConfig;
class Buffer;
namespace coff {
-void executeObjcopyOnBinary(const CopyConfig &Config,
- object::COFFObjectFile &In, Buffer &Out);
+Error executeObjcopyOnBinary(const CopyConfig &Config,
+ object::COFFObjectFile &In, Buffer &Out);
} // end namespace coff
} // end namespace objcopy
diff --git a/tools/llvm-objcopy/COFF/Object.cpp b/tools/llvm-objcopy/COFF/Object.cpp
index 315d3a778623..b07532c1dc39 100644
--- a/tools/llvm-objcopy/COFF/Object.cpp
+++ b/tools/llvm-objcopy/COFF/Object.cpp
@@ -1,13 +1,13 @@
//===- Object.cpp ---------------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "Object.h"
+#include "llvm/ADT/DenseSet.h"
#include <algorithm>
namespace llvm {
@@ -26,12 +26,8 @@ void Object::addSymbols(ArrayRef<Symbol> NewSymbols) {
void Object::updateSymbols() {
SymbolMap = DenseMap<size_t, Symbol *>(Symbols.size());
- size_t RawSymIndex = 0;
- for (Symbol &Sym : Symbols) {
+ for (Symbol &Sym : Symbols)
SymbolMap[Sym.UniqueId] = &Sym;
- Sym.RawIndex = RawSymIndex;
- RawSymIndex += 1 + Sym.Sym.NumberOfAuxSymbols;
- }
}
const Symbol *Object::findSymbol(size_t UniqueId) const {
@@ -56,15 +52,86 @@ Error Object::markSymbols() {
for (const Relocation &R : Sec.Relocs) {
auto It = SymbolMap.find(R.Target);
if (It == SymbolMap.end())
- return make_error<StringError>("Relocation target " + Twine(R.Target) +
- " not found",
- object_error::invalid_symbol_index);
+ return createStringError(object_error::invalid_symbol_index,
+ "relocation target %zu not found", R.Target);
It->second->Referenced = true;
}
}
return Error::success();
}
+void Object::addSections(ArrayRef<Section> NewSections) {
+ for (Section S : NewSections) {
+ S.UniqueId = NextSectionUniqueId++;
+ Sections.emplace_back(S);
+ }
+ updateSections();
+}
+
+void Object::updateSections() {
+ SectionMap = DenseMap<ssize_t, Section *>(Sections.size());
+ size_t Index = 1;
+ for (Section &S : Sections) {
+ SectionMap[S.UniqueId] = &S;
+ S.Index = Index++;
+ }
+}
+
+const Section *Object::findSection(ssize_t UniqueId) const {
+ auto It = SectionMap.find(UniqueId);
+ if (It == SectionMap.end())
+ return nullptr;
+ return It->second;
+}
+
+void Object::removeSections(function_ref<bool(const Section &)> ToRemove) {
+ DenseSet<ssize_t> AssociatedSections;
+ auto RemoveAssociated = [&AssociatedSections](const Section &Sec) {
+ return AssociatedSections.count(Sec.UniqueId) == 1;
+ };
+ do {
+ DenseSet<ssize_t> RemovedSections;
+ Sections.erase(
+ std::remove_if(std::begin(Sections), std::end(Sections),
+ [ToRemove, &RemovedSections](const Section &Sec) {
+ bool Remove = ToRemove(Sec);
+ if (Remove)
+ RemovedSections.insert(Sec.UniqueId);
+ return Remove;
+ }),
+ std::end(Sections));
+ // Remove all symbols referring to the removed sections.
+ AssociatedSections.clear();
+ Symbols.erase(
+ std::remove_if(
+ std::begin(Symbols), std::end(Symbols),
+ [&RemovedSections, &AssociatedSections](const Symbol &Sym) {
+ // If there are sections that are associative to a removed
+ // section,
+ // remove those as well as nothing will include them (and we can't
+ // leave them dangling).
+ if (RemovedSections.count(Sym.AssociativeComdatTargetSectionId) ==
+ 1)
+ AssociatedSections.insert(Sym.TargetSectionId);
+ return RemovedSections.count(Sym.TargetSectionId) == 1;
+ }),
+ std::end(Symbols));
+ ToRemove = RemoveAssociated;
+ } while (!AssociatedSections.empty());
+ updateSections();
+ updateSymbols();
+}
+
+void Object::truncateSections(function_ref<bool(const Section &)> ToTruncate) {
+ for (Section &Sec : Sections) {
+ if (ToTruncate(Sec)) {
+ Sec.clearContents();
+ Sec.Relocs.clear();
+ Sec.Header.SizeOfRawData = 0;
+ }
+ }
+}
+
} // end namespace coff
} // end namespace objcopy
} // end namespace llvm
diff --git a/tools/llvm-objcopy/COFF/Object.h b/tools/llvm-objcopy/COFF/Object.h
index 7531fb4cf39e..21475b068629 100644
--- a/tools/llvm-objcopy/COFF/Object.h
+++ b/tools/llvm-objcopy/COFF/Object.h
@@ -1,9 +1,8 @@
//===- Object.h -------------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -12,6 +11,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/BinaryFormat/COFF.h"
@@ -35,15 +35,58 @@ struct Relocation {
struct Section {
object::coff_section Header;
- ArrayRef<uint8_t> Contents;
std::vector<Relocation> Relocs;
StringRef Name;
+ ssize_t UniqueId;
+ size_t Index;
+
+ ArrayRef<uint8_t> getContents() const {
+ if (!OwnedContents.empty())
+ return OwnedContents;
+ return ContentsRef;
+ }
+
+ void setContentsRef(ArrayRef<uint8_t> Data) {
+ OwnedContents.clear();
+ ContentsRef = Data;
+ }
+
+ void setOwnedContents(std::vector<uint8_t> &&Data) {
+ ContentsRef = ArrayRef<uint8_t>();
+ OwnedContents = std::move(Data);
+ }
+
+ void clearContents() {
+ ContentsRef = ArrayRef<uint8_t>();
+ OwnedContents.clear();
+ }
+
+private:
+ ArrayRef<uint8_t> ContentsRef;
+ std::vector<uint8_t> OwnedContents;
+};
+
+struct AuxSymbol {
+ AuxSymbol(ArrayRef<uint8_t> In) {
+ assert(In.size() == sizeof(Opaque));
+ std::copy(In.begin(), In.end(), Opaque);
+ }
+
+ ArrayRef<uint8_t> getRef() const {
+ return ArrayRef<uint8_t>(Opaque, sizeof(Opaque));
+ }
+
+ uint8_t Opaque[sizeof(object::coff_symbol16)];
};
struct Symbol {
object::coff_symbol32 Sym;
StringRef Name;
- ArrayRef<uint8_t> AuxData;
+ std::vector<AuxSymbol> AuxData;
+ StringRef AuxFile;
+ ssize_t TargetSectionId;
+ ssize_t AssociativeComdatTargetSectionId = 0;
+ Optional<size_t> WeakTargetSymbolId;
size_t UniqueId;
size_t RawIndex;
bool Referenced;
@@ -62,7 +105,6 @@ struct Object {
uint32_t BaseOfData = 0; // pe32plus_header lacks this field.
std::vector<object::data_directory> DataDirectories;
- std::vector<Section> Sections;
ArrayRef<Symbol> getSymbols() const { return Symbols; }
// This allows mutating individual Symbols, but not mutating the list
@@ -80,14 +122,35 @@ struct Object {
// all sections.
Error markSymbols();
+ ArrayRef<Section> getSections() const { return Sections; }
+ // This allows mutating individual Sections, but not mutating the list
+ // of symbols itself.
+ iterator_range<std::vector<Section>::iterator> getMutableSections() {
+ return make_range(Sections.begin(), Sections.end());
+ }
+
+ const Section *findSection(ssize_t UniqueId) const;
+
+ void addSections(ArrayRef<Section> NewSections);
+ void removeSections(function_ref<bool(const Section &)> ToRemove);
+ void truncateSections(function_ref<bool(const Section &)> ToTruncate);
+
private:
std::vector<Symbol> Symbols;
DenseMap<size_t, Symbol *> SymbolMap;
size_t NextSymbolUniqueId = 0;
- // Update SymbolMap and RawIndex in each Symbol.
+ std::vector<Section> Sections;
+ DenseMap<ssize_t, Section *> SectionMap;
+
+ ssize_t NextSectionUniqueId = 1; // Allow a UniqueId 0 to mean undefined.
+
+ // Update SymbolMap.
void updateSymbols();
+
+ // Update SectionMap and Index in each Section.
+ void updateSections();
};
// Copy between coff_symbol16 and coff_symbol32.
diff --git a/tools/llvm-objcopy/COFF/Reader.cpp b/tools/llvm-objcopy/COFF/Reader.cpp
index a01768392d7d..1f0ec9fa9691 100644
--- a/tools/llvm-objcopy/COFF/Reader.cpp
+++ b/tools/llvm-objcopy/COFF/Reader.cpp
@@ -1,17 +1,16 @@
//===- Reader.cpp ---------------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "Reader.h"
#include "Object.h"
-#include "llvm-objcopy.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/COFF.h"
#include "llvm/Object/COFF.h"
#include "llvm/Support/ErrorHandling.h"
#include <cstddef>
@@ -22,6 +21,7 @@ namespace objcopy {
namespace coff {
using namespace object;
+using namespace COFF;
Error COFFReader::readExecutableHeaders(Object &Obj) const {
const dos_header *DH = COFFObj.getDOSHeader();
@@ -59,31 +59,38 @@ Error COFFReader::readExecutableHeaders(Object &Obj) const {
}
Error COFFReader::readSections(Object &Obj) const {
+ std::vector<Section> Sections;
// Section indexing starts from 1.
for (size_t I = 1, E = COFFObj.getNumberOfSections(); I <= E; I++) {
const coff_section *Sec;
if (auto EC = COFFObj.getSection(I, Sec))
return errorCodeToError(EC);
- Obj.Sections.push_back(Section());
- Section &S = Obj.Sections.back();
+ Sections.push_back(Section());
+ Section &S = Sections.back();
S.Header = *Sec;
- if (auto EC = COFFObj.getSectionContents(Sec, S.Contents))
- return errorCodeToError(EC);
+ ArrayRef<uint8_t> Contents;
+ if (Error E = COFFObj.getSectionContents(Sec, Contents))
+ return E;
+ S.setContentsRef(Contents);
ArrayRef<coff_relocation> Relocs = COFFObj.getRelocations(Sec);
for (const coff_relocation &R : Relocs)
S.Relocs.push_back(R);
- if (auto EC = COFFObj.getSectionName(Sec, S.Name))
- return errorCodeToError(EC);
+ if (Expected<StringRef> NameOrErr = COFFObj.getSectionName(Sec))
+ S.Name = *NameOrErr;
+ else
+ return NameOrErr.takeError();
if (Sec->hasExtendedRelocations())
- return make_error<StringError>("Extended relocations not supported yet",
- object_error::parse_failed);
+ return createStringError(object_error::parse_failed,
+ "extended relocations not supported yet");
}
+ Obj.addSections(Sections);
return Error::success();
}
Error COFFReader::readSymbols(Object &Obj, bool IsBigObj) const {
std::vector<Symbol> Symbols;
Symbols.reserve(COFFObj.getRawNumberOfSymbols());
+ ArrayRef<Section> Sections = Obj.getSections();
for (uint32_t I = 0, E = COFFObj.getRawNumberOfSymbols(); I < E;) {
Expected<COFFSymbolRef> SymOrErr = COFFObj.getSymbol(I);
if (!SymOrErr)
@@ -101,31 +108,86 @@ Error COFFReader::readSymbols(Object &Obj, bool IsBigObj) const {
*reinterpret_cast<const coff_symbol16 *>(SymRef.getRawPtr()));
if (auto EC = COFFObj.getSymbolName(SymRef, Sym.Name))
return errorCodeToError(EC);
- Sym.AuxData = COFFObj.getSymbolAuxData(SymRef);
- assert((Sym.AuxData.size() %
- (IsBigObj ? sizeof(coff_symbol32) : sizeof(coff_symbol16))) == 0);
+
+ ArrayRef<uint8_t> AuxData = COFFObj.getSymbolAuxData(SymRef);
+ size_t SymSize = IsBigObj ? sizeof(coff_symbol32) : sizeof(coff_symbol16);
+ assert(AuxData.size() == SymSize * SymRef.getNumberOfAuxSymbols());
+ // The auxillary symbols are structs of sizeof(coff_symbol16) each.
+ // In the big object format (where symbols are coff_symbol32), each
+ // auxillary symbol is padded with 2 bytes at the end. Copy each
+ // auxillary symbol to the Sym.AuxData vector. For file symbols,
+ // the whole range of aux symbols are interpreted as one null padded
+ // string instead.
+ if (SymRef.isFileRecord())
+ Sym.AuxFile = StringRef(reinterpret_cast<const char *>(AuxData.data()),
+ AuxData.size())
+ .rtrim('\0');
+ else
+ for (size_t I = 0; I < SymRef.getNumberOfAuxSymbols(); I++)
+ Sym.AuxData.push_back(AuxData.slice(I * SymSize, sizeof(AuxSymbol)));
+
+ // Find the unique id of the section
+ if (SymRef.getSectionNumber() <=
+ 0) // Special symbol (undefined/absolute/debug)
+ Sym.TargetSectionId = SymRef.getSectionNumber();
+ else if (static_cast<uint32_t>(SymRef.getSectionNumber() - 1) <
+ Sections.size())
+ Sym.TargetSectionId = Sections[SymRef.getSectionNumber() - 1].UniqueId;
+ else
+ return createStringError(object_error::parse_failed,
+ "section number out of range");
+ // For section definitions, check if it is comdat associative, and if
+ // it is, find the target section unique id.
+ const coff_aux_section_definition *SD = SymRef.getSectionDefinition();
+ const coff_aux_weak_external *WE = SymRef.getWeakExternal();
+ if (SD && SD->Selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE) {
+ int32_t Index = SD->getNumber(IsBigObj);
+ if (Index <= 0 || static_cast<uint32_t>(Index - 1) >= Sections.size())
+ return createStringError(object_error::parse_failed,
+ "unexpected associative section index");
+ Sym.AssociativeComdatTargetSectionId = Sections[Index - 1].UniqueId;
+ } else if (WE) {
+ // This is a raw symbol index for now, but store it in the Symbol
+ // until we've added them to the Object, which assigns the final
+ // unique ids.
+ Sym.WeakTargetSymbolId = WE->TagIndex;
+ }
I += 1 + SymRef.getNumberOfAuxSymbols();
}
Obj.addSymbols(Symbols);
return Error::success();
}
-Error COFFReader::setRelocTargets(Object &Obj) const {
+Error COFFReader::setSymbolTargets(Object &Obj) const {
std::vector<const Symbol *> RawSymbolTable;
for (const Symbol &Sym : Obj.getSymbols()) {
RawSymbolTable.push_back(&Sym);
for (size_t I = 0; I < Sym.Sym.NumberOfAuxSymbols; I++)
RawSymbolTable.push_back(nullptr);
}
- for (Section &Sec : Obj.Sections) {
+ for (Symbol &Sym : Obj.getMutableSymbols()) {
+ // Convert WeakTargetSymbolId from the original raw symbol index to
+ // a proper unique id.
+ if (Sym.WeakTargetSymbolId) {
+ if (*Sym.WeakTargetSymbolId >= RawSymbolTable.size())
+ return createStringError(object_error::parse_failed,
+ "weak external reference out of range");
+ const Symbol *Target = RawSymbolTable[*Sym.WeakTargetSymbolId];
+ if (Target == nullptr)
+ return createStringError(object_error::parse_failed,
+ "invalid SymbolTableIndex");
+ Sym.WeakTargetSymbolId = Target->UniqueId;
+ }
+ }
+ for (Section &Sec : Obj.getMutableSections()) {
for (Relocation &R : Sec.Relocs) {
if (R.Reloc.SymbolTableIndex >= RawSymbolTable.size())
- return make_error<StringError>("SymbolTableIndex out of range",
- object_error::parse_failed);
+ return createStringError(object_error::parse_failed,
+ "SymbolTableIndex out of range");
const Symbol *Sym = RawSymbolTable[R.Reloc.SymbolTableIndex];
if (Sym == nullptr)
- return make_error<StringError>("Invalid SymbolTableIndex",
- object_error::parse_failed);
+ return createStringError(object_error::parse_failed,
+ "invalid SymbolTableIndex");
R.Target = Sym->UniqueId;
R.TargetName = Sym->Name;
}
@@ -145,8 +207,8 @@ Expected<std::unique_ptr<Object>> COFFReader::create() const {
Obj->CoffFileHeader = *CFH;
} else {
if (!CBFH)
- return make_error<StringError>("No COFF file header returned",
- object_error::parse_failed);
+ return createStringError(object_error::parse_failed,
+ "no COFF file header returned");
// Only copying the few fields from the bigobj header that we need
// and won't recreate in the end.
Obj->CoffFileHeader.Machine = CBFH->Machine;
@@ -160,7 +222,7 @@ Expected<std::unique_ptr<Object>> COFFReader::create() const {
return std::move(E);
if (Error E = readSymbols(*Obj, IsBigObj))
return std::move(E);
- if (Error E = setRelocTargets(*Obj))
+ if (Error E = setSymbolTargets(*Obj))
return std::move(E);
return std::move(Obj);
diff --git a/tools/llvm-objcopy/COFF/Reader.h b/tools/llvm-objcopy/COFF/Reader.h
index ca7057d08c9f..ec15369db0b8 100644
--- a/tools/llvm-objcopy/COFF/Reader.h
+++ b/tools/llvm-objcopy/COFF/Reader.h
@@ -1,9 +1,8 @@
//===- Reader.h -------------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -29,7 +28,7 @@ class COFFReader {
Error readExecutableHeaders(Object &Obj) const;
Error readSections(Object &Obj) const;
Error readSymbols(Object &Obj, bool IsBigObj) const;
- Error setRelocTargets(Object &Obj) const;
+ Error setSymbolTargets(Object &Obj) const;
public:
explicit COFFReader(const COFFObjectFile &O) : COFFObj(O) {}
diff --git a/tools/llvm-objcopy/COFF/Writer.cpp b/tools/llvm-objcopy/COFF/Writer.cpp
index 385d43b1bae5..f3bb1ce331f2 100644
--- a/tools/llvm-objcopy/COFF/Writer.cpp
+++ b/tools/llvm-objcopy/COFF/Writer.cpp
@@ -1,15 +1,13 @@
//===- Writer.cpp ---------------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "Writer.h"
#include "Object.h"
-#include "llvm-objcopy.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/COFF.h"
@@ -26,22 +24,75 @@ using namespace object;
using namespace COFF;
Error COFFWriter::finalizeRelocTargets() {
- for (Section &Sec : Obj.Sections) {
+ for (Section &Sec : Obj.getMutableSections()) {
for (Relocation &R : Sec.Relocs) {
const Symbol *Sym = Obj.findSymbol(R.Target);
if (Sym == nullptr)
- return make_error<StringError>("Relocation target " + R.TargetName +
- " (" + Twine(R.Target) +
- ") not found",
- object_error::invalid_symbol_index);
+ return createStringError(object_error::invalid_symbol_index,
+ "relocation target '%s' (%zu) not found",
+ R.TargetName.str().c_str(), R.Target);
R.Reloc.SymbolTableIndex = Sym->RawIndex;
}
}
return Error::success();
}
+Error COFFWriter::finalizeSymbolContents() {
+ for (Symbol &Sym : Obj.getMutableSymbols()) {
+ if (Sym.TargetSectionId <= 0) {
+ // Undefined, or a special kind of symbol. These negative values
+ // are stored in the SectionNumber field which is unsigned.
+ Sym.Sym.SectionNumber = static_cast<uint32_t>(Sym.TargetSectionId);
+ } else {
+ const Section *Sec = Obj.findSection(Sym.TargetSectionId);
+ if (Sec == nullptr)
+ return createStringError(object_error::invalid_symbol_index,
+ "symbol '%s' points to a removed section",
+ Sym.Name.str().c_str());
+ Sym.Sym.SectionNumber = Sec->Index;
+
+ if (Sym.Sym.NumberOfAuxSymbols == 1 &&
+ Sym.Sym.StorageClass == IMAGE_SYM_CLASS_STATIC) {
+ coff_aux_section_definition *SD =
+ reinterpret_cast<coff_aux_section_definition *>(
+ Sym.AuxData[0].Opaque);
+ uint32_t SDSectionNumber;
+ if (Sym.AssociativeComdatTargetSectionId == 0) {
+ // Not a comdat associative section; just set the Number field to
+ // the number of the section itself.
+ SDSectionNumber = Sec->Index;
+ } else {
+ Sec = Obj.findSection(Sym.AssociativeComdatTargetSectionId);
+ if (Sec == nullptr)
+ return createStringError(
+ object_error::invalid_symbol_index,
+ "symbol '%s' is associative to a removed section",
+ Sym.Name.str().c_str());
+ SDSectionNumber = Sec->Index;
+ }
+ // Update the section definition with the new section number.
+ SD->NumberLowPart = static_cast<uint16_t>(SDSectionNumber);
+ SD->NumberHighPart = static_cast<uint16_t>(SDSectionNumber >> 16);
+ }
+ }
+ // Check that we actually have got AuxData to match the weak symbol target
+ // we want to set. Only >= 1 would be required, but only == 1 makes sense.
+ if (Sym.WeakTargetSymbolId && Sym.Sym.NumberOfAuxSymbols == 1) {
+ coff_aux_weak_external *WE =
+ reinterpret_cast<coff_aux_weak_external *>(Sym.AuxData[0].Opaque);
+ const Symbol *Target = Obj.findSymbol(*Sym.WeakTargetSymbolId);
+ if (Target == nullptr)
+ return createStringError(object_error::invalid_symbol_index,
+ "symbol '%s' is missing its weak target",
+ Sym.Name.str().c_str());
+ WE->TagIndex = Target->RawIndex;
+ }
+ }
+ return Error::success();
+}
+
void COFFWriter::layoutSections() {
- for (auto &S : Obj.Sections) {
+ for (auto &S : Obj.getMutableSections()) {
if (S.Header.SizeOfRawData > 0)
S.Header.PointerToRawData = FileSize;
FileSize += S.Header.SizeOfRawData; // For executables, this is already
@@ -58,7 +109,7 @@ void COFFWriter::layoutSections() {
}
size_t COFFWriter::finalizeStringTable() {
- for (auto &S : Obj.Sections)
+ for (const auto &S : Obj.getSections())
if (S.Name.size() > COFF::NameSize)
StrTabBuilder.add(S.Name);
@@ -68,8 +119,9 @@ size_t COFFWriter::finalizeStringTable() {
StrTabBuilder.finalize();
- for (auto &S : Obj.Sections) {
+ for (auto &S : Obj.getMutableSections()) {
if (S.Name.size() > COFF::NameSize) {
+ memset(S.Header.Name, 0, sizeof(S.Header.Name));
snprintf(S.Header.Name, sizeof(S.Header.Name), "/%d",
(int)StrTabBuilder.getOffset(S.Name));
} else {
@@ -89,15 +141,30 @@ size_t COFFWriter::finalizeStringTable() {
template <class SymbolTy>
std::pair<size_t, size_t> COFFWriter::finalizeSymbolTable() {
- size_t SymTabSize = Obj.getSymbols().size() * sizeof(SymbolTy);
- for (const auto &S : Obj.getSymbols())
- SymTabSize += S.AuxData.size();
- return std::make_pair(SymTabSize, sizeof(SymbolTy));
+ size_t RawSymIndex = 0;
+ for (auto &S : Obj.getMutableSymbols()) {
+ // Symbols normally have NumberOfAuxSymbols set correctly all the time.
+ // For file symbols, we need to know the output file's symbol size to be
+ // able to calculate the number of slots it occupies.
+ if (!S.AuxFile.empty())
+ S.Sym.NumberOfAuxSymbols =
+ alignTo(S.AuxFile.size(), sizeof(SymbolTy)) / sizeof(SymbolTy);
+ S.RawIndex = RawSymIndex;
+ RawSymIndex += 1 + S.Sym.NumberOfAuxSymbols;
+ }
+ return std::make_pair(RawSymIndex * sizeof(SymbolTy), sizeof(SymbolTy));
}
Error COFFWriter::finalize(bool IsBigObj) {
+ size_t SymTabSize, SymbolSize;
+ std::tie(SymTabSize, SymbolSize) = IsBigObj
+ ? finalizeSymbolTable<coff_symbol32>()
+ : finalizeSymbolTable<coff_symbol16>();
+
if (Error E = finalizeRelocTargets())
return E;
+ if (Error E = finalizeSymbolContents())
+ return E;
size_t SizeOfHeaders = 0;
FileAlignment = 1;
@@ -114,10 +181,10 @@ Error COFFWriter::finalize(bool IsBigObj) {
SizeOfHeaders +=
PeHeaderSize + sizeof(data_directory) * Obj.DataDirectories.size();
}
- Obj.CoffFileHeader.NumberOfSections = Obj.Sections.size();
+ Obj.CoffFileHeader.NumberOfSections = Obj.getSections().size();
SizeOfHeaders +=
IsBigObj ? sizeof(coff_bigobj_file_header) : sizeof(coff_file_header);
- SizeOfHeaders += sizeof(coff_section) * Obj.Sections.size();
+ SizeOfHeaders += sizeof(coff_section) * Obj.getSections().size();
SizeOfHeaders = alignTo(SizeOfHeaders, FileAlignment);
Obj.CoffFileHeader.SizeOfOptionalHeader =
@@ -132,8 +199,8 @@ Error COFFWriter::finalize(bool IsBigObj) {
Obj.PeHeader.SizeOfHeaders = SizeOfHeaders;
Obj.PeHeader.SizeOfInitializedData = SizeOfInitializedData;
- if (!Obj.Sections.empty()) {
- const Section &S = Obj.Sections.back();
+ if (!Obj.getSections().empty()) {
+ const Section &S = Obj.getSections().back();
Obj.PeHeader.SizeOfImage =
alignTo(S.Header.VirtualAddress + S.Header.VirtualSize,
Obj.PeHeader.SectionAlignment);
@@ -145,10 +212,6 @@ Error COFFWriter::finalize(bool IsBigObj) {
}
size_t StrTabSize = finalizeStringTable();
- size_t SymTabSize, SymbolSize;
- std::tie(SymTabSize, SymbolSize) = IsBigObj
- ? finalizeSymbolTable<coff_symbol32>()
- : finalizeSymbolTable<coff_symbol16>();
size_t PointerToSymbolTable = FileSize;
// StrTabSize <= 4 is the size of an empty string table, only consisting
@@ -199,7 +262,7 @@ void COFFWriter::writeHeaders(bool IsBigObj) {
BigObjHeader.unused4 = 0;
// The value in Obj.CoffFileHeader.NumberOfSections is truncated, thus
// get the original one instead.
- BigObjHeader.NumberOfSections = Obj.Sections.size();
+ BigObjHeader.NumberOfSections = Obj.getSections().size();
BigObjHeader.PointerToSymbolTable = Obj.CoffFileHeader.PointerToSymbolTable;
BigObjHeader.NumberOfSymbols = Obj.CoffFileHeader.NumberOfSymbols;
@@ -224,23 +287,24 @@ void COFFWriter::writeHeaders(bool IsBigObj) {
Ptr += sizeof(DD);
}
}
- for (const auto &S : Obj.Sections) {
+ for (const auto &S : Obj.getSections()) {
memcpy(Ptr, &S.Header, sizeof(S.Header));
Ptr += sizeof(S.Header);
}
}
void COFFWriter::writeSections() {
- for (const auto &S : Obj.Sections) {
+ for (const auto &S : Obj.getSections()) {
uint8_t *Ptr = Buf.getBufferStart() + S.Header.PointerToRawData;
- std::copy(S.Contents.begin(), S.Contents.end(), Ptr);
+ ArrayRef<uint8_t> Contents = S.getContents();
+ std::copy(Contents.begin(), Contents.end(), Ptr);
// For executable sections, pad the remainder of the raw data size with
// 0xcc, which is int3 on x86.
if ((S.Header.Characteristics & IMAGE_SCN_CNT_CODE) &&
- S.Header.SizeOfRawData > S.Contents.size())
- memset(Ptr + S.Contents.size(), 0xcc,
- S.Header.SizeOfRawData - S.Contents.size());
+ S.Header.SizeOfRawData > Contents.size())
+ memset(Ptr + Contents.size(), 0xcc,
+ S.Header.SizeOfRawData - Contents.size());
Ptr += S.Header.SizeOfRawData;
for (const auto &R : S.Relocs) {
@@ -257,8 +321,23 @@ template <class SymbolTy> void COFFWriter::writeSymbolStringTables() {
copySymbol<SymbolTy, coff_symbol32>(*reinterpret_cast<SymbolTy *>(Ptr),
S.Sym);
Ptr += sizeof(SymbolTy);
- std::copy(S.AuxData.begin(), S.AuxData.end(), Ptr);
- Ptr += S.AuxData.size();
+ if (!S.AuxFile.empty()) {
+ // For file symbols, just write the string into the aux symbol slots,
+ // assuming that the unwritten parts are initialized to zero in the memory
+ // mapped file.
+ std::copy(S.AuxFile.begin(), S.AuxFile.end(), Ptr);
+ Ptr += S.Sym.NumberOfAuxSymbols * sizeof(SymbolTy);
+ } else {
+ // For other auxillary symbols, write their opaque payload into one symbol
+ // table slot each. For big object files, the symbols are larger than the
+ // opaque auxillary symbol struct and we leave padding at the end of each
+ // entry.
+ for (const AuxSymbol &AuxSym : S.AuxData) {
+ ArrayRef<uint8_t> Ref = AuxSym.getRef();
+ std::copy(Ref.begin(), Ref.end(), Ptr);
+ Ptr += sizeof(SymbolTy);
+ }
+ }
}
if (StrTabBuilder.getSize() > 4 || !Obj.IsPE) {
// Always write a string table in object files, even an empty one.
@@ -271,7 +350,8 @@ Error COFFWriter::write(bool IsBigObj) {
if (Error E = finalize(IsBigObj))
return E;
- Buf.allocate(FileSize);
+ if (Error E = Buf.allocate(FileSize))
+ return E;
writeHeaders(IsBigObj);
writeSections();
@@ -296,15 +376,14 @@ Error COFFWriter::patchDebugDirectory() {
const data_directory *Dir = &Obj.DataDirectories[DEBUG_DIRECTORY];
if (Dir->Size <= 0)
return Error::success();
- for (const auto &S : Obj.Sections) {
+ for (const auto &S : Obj.getSections()) {
if (Dir->RelativeVirtualAddress >= S.Header.VirtualAddress &&
Dir->RelativeVirtualAddress <
S.Header.VirtualAddress + S.Header.SizeOfRawData) {
if (Dir->RelativeVirtualAddress + Dir->Size >
S.Header.VirtualAddress + S.Header.SizeOfRawData)
- return make_error<StringError>(
- "Debug directory extends past end of section",
- object_error::parse_failed);
+ return createStringError(object_error::parse_failed,
+ "debug directory extends past end of section");
size_t Offset = Dir->RelativeVirtualAddress - S.Header.VirtualAddress;
uint8_t *Ptr = Buf.getBufferStart() + S.Header.PointerToRawData + Offset;
@@ -320,15 +399,15 @@ Error COFFWriter::patchDebugDirectory() {
return Error::success();
}
}
- return make_error<StringError>("Debug directory not found",
- object_error::parse_failed);
+ return createStringError(object_error::parse_failed,
+ "debug directory not found");
}
Error COFFWriter::write() {
- bool IsBigObj = Obj.Sections.size() > MaxNumberOfSections16;
+ bool IsBigObj = Obj.getSections().size() > MaxNumberOfSections16;
if (IsBigObj && Obj.IsPE)
- return make_error<StringError>("Too many sections for executable",
- object_error::parse_failed);
+ return createStringError(object_error::parse_failed,
+ "too many sections for executable");
return write(IsBigObj);
}
diff --git a/tools/llvm-objcopy/COFF/Writer.h b/tools/llvm-objcopy/COFF/Writer.h
index ab66e0cc1134..681a8d5e4a66 100644
--- a/tools/llvm-objcopy/COFF/Writer.h
+++ b/tools/llvm-objcopy/COFF/Writer.h
@@ -1,9 +1,8 @@
//===- Writer.h -------------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -31,10 +30,11 @@ class COFFWriter {
size_t SizeOfInitializedData;
StringTableBuilder StrTabBuilder;
+ template <class SymbolTy> std::pair<size_t, size_t> finalizeSymbolTable();
Error finalizeRelocTargets();
+ Error finalizeSymbolContents();
void layoutSections();
size_t finalizeStringTable();
- template <class SymbolTy> std::pair<size_t, size_t> finalizeSymbolTable();
Error finalize(bool IsBigObj);
diff --git a/tools/llvm-objcopy/CopyConfig.cpp b/tools/llvm-objcopy/CopyConfig.cpp
index 3737f571ae61..8d6431b3044f 100644
--- a/tools/llvm-objcopy/CopyConfig.cpp
+++ b/tools/llvm-objcopy/CopyConfig.cpp
@@ -1,27 +1,26 @@
//===- CopyConfig.cpp -----------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "CopyConfig.h"
-#include "llvm-objcopy.h"
-#include "llvm/ADT/BitmaskEnum.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/Object/ELFTypes.h"
+#include "llvm/ADT/StringSet.h"
#include "llvm/Option/Arg.h"
#include "llvm/Option/ArgList.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compression.h"
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/JamCRC.h"
#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/StringSaver.h"
#include <memory>
-#include <string>
namespace llvm {
namespace objcopy {
@@ -93,45 +92,47 @@ public:
StripOptTable() : OptTable(StripInfoTable) {}
};
-enum SectionFlag {
- SecNone = 0,
- SecAlloc = 1 << 0,
- SecLoad = 1 << 1,
- SecNoload = 1 << 2,
- SecReadonly = 1 << 3,
- SecDebug = 1 << 4,
- SecCode = 1 << 5,
- SecData = 1 << 6,
- SecRom = 1 << 7,
- SecMerge = 1 << 8,
- SecStrings = 1 << 9,
- SecContents = 1 << 10,
- SecShare = 1 << 11,
- LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue = */ SecShare)
-};
-
} // namespace
static SectionFlag parseSectionRenameFlag(StringRef SectionName) {
return llvm::StringSwitch<SectionFlag>(SectionName)
- .Case("alloc", SectionFlag::SecAlloc)
- .Case("load", SectionFlag::SecLoad)
- .Case("noload", SectionFlag::SecNoload)
- .Case("readonly", SectionFlag::SecReadonly)
- .Case("debug", SectionFlag::SecDebug)
- .Case("code", SectionFlag::SecCode)
- .Case("data", SectionFlag::SecData)
- .Case("rom", SectionFlag::SecRom)
- .Case("merge", SectionFlag::SecMerge)
- .Case("strings", SectionFlag::SecStrings)
- .Case("contents", SectionFlag::SecContents)
- .Case("share", SectionFlag::SecShare)
+ .CaseLower("alloc", SectionFlag::SecAlloc)
+ .CaseLower("load", SectionFlag::SecLoad)
+ .CaseLower("noload", SectionFlag::SecNoload)
+ .CaseLower("readonly", SectionFlag::SecReadonly)
+ .CaseLower("debug", SectionFlag::SecDebug)
+ .CaseLower("code", SectionFlag::SecCode)
+ .CaseLower("data", SectionFlag::SecData)
+ .CaseLower("rom", SectionFlag::SecRom)
+ .CaseLower("merge", SectionFlag::SecMerge)
+ .CaseLower("strings", SectionFlag::SecStrings)
+ .CaseLower("contents", SectionFlag::SecContents)
+ .CaseLower("share", SectionFlag::SecShare)
.Default(SectionFlag::SecNone);
}
-static SectionRename parseRenameSectionValue(StringRef FlagValue) {
+static Expected<SectionFlag>
+parseSectionFlagSet(ArrayRef<StringRef> SectionFlags) {
+ SectionFlag ParsedFlags = SectionFlag::SecNone;
+ for (StringRef Flag : SectionFlags) {
+ SectionFlag ParsedFlag = parseSectionRenameFlag(Flag);
+ if (ParsedFlag == SectionFlag::SecNone)
+ return createStringError(
+ errc::invalid_argument,
+ "unrecognized section flag '%s'. Flags supported for GNU "
+ "compatibility: alloc, load, noload, readonly, debug, code, data, "
+ "rom, share, contents, merge, strings",
+ Flag.str().c_str());
+ ParsedFlags |= ParsedFlag;
+ }
+
+ return ParsedFlags;
+}
+
+static Expected<SectionRename> parseRenameSectionValue(StringRef FlagValue) {
if (!FlagValue.contains('='))
- error("Bad format for --rename-section: missing '='");
+ return createStringError(errc::invalid_argument,
+ "bad format for --rename-section: missing '='");
// Initial split: ".foo" = ".bar,f1,f2,..."
auto Old2New = FlagValue.split('=');
@@ -144,73 +145,210 @@ static SectionRename parseRenameSectionValue(StringRef FlagValue) {
SR.NewName = NameAndFlags[0];
if (NameAndFlags.size() > 1) {
- SectionFlag Flags = SectionFlag::SecNone;
- for (size_t I = 1, Size = NameAndFlags.size(); I < Size; ++I) {
- SectionFlag Flag = parseSectionRenameFlag(NameAndFlags[I]);
- if (Flag == SectionFlag::SecNone)
- error("Unrecognized section flag '" + NameAndFlags[I] +
- "'. Flags supported for GNU compatibility: alloc, load, noload, "
- "readonly, debug, code, data, rom, share, contents, merge, "
- "strings.");
- Flags |= Flag;
- }
-
- SR.NewFlags = 0;
- if (Flags & SectionFlag::SecAlloc)
- *SR.NewFlags |= ELF::SHF_ALLOC;
- if (!(Flags & SectionFlag::SecReadonly))
- *SR.NewFlags |= ELF::SHF_WRITE;
- if (Flags & SectionFlag::SecCode)
- *SR.NewFlags |= ELF::SHF_EXECINSTR;
- if (Flags & SectionFlag::SecMerge)
- *SR.NewFlags |= ELF::SHF_MERGE;
- if (Flags & SectionFlag::SecStrings)
- *SR.NewFlags |= ELF::SHF_STRINGS;
+ Expected<SectionFlag> ParsedFlagSet =
+ parseSectionFlagSet(makeArrayRef(NameAndFlags).drop_front());
+ if (!ParsedFlagSet)
+ return ParsedFlagSet.takeError();
+ SR.NewFlags = *ParsedFlagSet;
}
return SR;
}
+static Expected<SectionFlagsUpdate>
+parseSetSectionFlagValue(StringRef FlagValue) {
+ if (!StringRef(FlagValue).contains('='))
+ return createStringError(errc::invalid_argument,
+ "bad format for --set-section-flags: missing '='");
+
+ // Initial split: ".foo" = "f1,f2,..."
+ auto Section2Flags = StringRef(FlagValue).split('=');
+ SectionFlagsUpdate SFU;
+ SFU.Name = Section2Flags.first;
+
+ // Flags split: "f1" "f2" ...
+ SmallVector<StringRef, 6> SectionFlags;
+ Section2Flags.second.split(SectionFlags, ',');
+ Expected<SectionFlag> ParsedFlagSet = parseSectionFlagSet(SectionFlags);
+ if (!ParsedFlagSet)
+ return ParsedFlagSet.takeError();
+ SFU.NewFlags = *ParsedFlagSet;
+
+ return SFU;
+}
+
+static Expected<NewSymbolInfo> parseNewSymbolInfo(StringRef FlagValue) {
+ // Parse value given with --add-symbol option and create the
+ // new symbol if possible. The value format for --add-symbol is:
+ //
+ // <name>=[<section>:]<value>[,<flags>]
+ //
+ // where:
+ // <name> - symbol name, can be empty string
+ // <section> - optional section name. If not given ABS symbol is created
+ // <value> - symbol value, can be decimal or hexadecimal number prefixed
+ // with 0x.
+ // <flags> - optional flags affecting symbol type, binding or visibility:
+ // The following are currently supported:
+ //
+ // global, local, weak, default, hidden, file, section, object,
+ // indirect-function.
+ //
+ // The following flags are ignored and provided for GNU
+ // compatibility only:
+ //
+ // warning, debug, constructor, indirect, synthetic,
+ // unique-object, before=<symbol>.
+ NewSymbolInfo SI;
+ StringRef Value;
+ std::tie(SI.SymbolName, Value) = FlagValue.split('=');
+ if (Value.empty())
+ return createStringError(
+ errc::invalid_argument,
+ "bad format for --add-symbol, missing '=' after '%s'",
+ SI.SymbolName.str().c_str());
+
+ if (Value.contains(':')) {
+ std::tie(SI.SectionName, Value) = Value.split(':');
+ if (SI.SectionName.empty() || Value.empty())
+ return createStringError(
+ errc::invalid_argument,
+ "bad format for --add-symbol, missing section name or symbol value");
+ }
+
+ SmallVector<StringRef, 6> Flags;
+ Value.split(Flags, ',');
+ if (Flags[0].getAsInteger(0, SI.Value))
+ return createStringError(errc::invalid_argument, "bad symbol value: '%s'",
+ Flags[0].str().c_str());
+
+ using Functor = std::function<void(void)>;
+ SmallVector<StringRef, 6> UnsupportedFlags;
+ for (size_t I = 1, NumFlags = Flags.size(); I < NumFlags; ++I)
+ static_cast<Functor>(
+ StringSwitch<Functor>(Flags[I])
+ .CaseLower("global", [&SI] { SI.Bind = ELF::STB_GLOBAL; })
+ .CaseLower("local", [&SI] { SI.Bind = ELF::STB_LOCAL; })
+ .CaseLower("weak", [&SI] { SI.Bind = ELF::STB_WEAK; })
+ .CaseLower("default", [&SI] { SI.Visibility = ELF::STV_DEFAULT; })
+ .CaseLower("hidden", [&SI] { SI.Visibility = ELF::STV_HIDDEN; })
+ .CaseLower("file", [&SI] { SI.Type = ELF::STT_FILE; })
+ .CaseLower("section", [&SI] { SI.Type = ELF::STT_SECTION; })
+ .CaseLower("object", [&SI] { SI.Type = ELF::STT_OBJECT; })
+ .CaseLower("function", [&SI] { SI.Type = ELF::STT_FUNC; })
+ .CaseLower("indirect-function",
+ [&SI] { SI.Type = ELF::STT_GNU_IFUNC; })
+ .CaseLower("debug", [] {})
+ .CaseLower("constructor", [] {})
+ .CaseLower("warning", [] {})
+ .CaseLower("indirect", [] {})
+ .CaseLower("synthetic", [] {})
+ .CaseLower("unique-object", [] {})
+ .StartsWithLower("before", [] {})
+ .Default([&] { UnsupportedFlags.push_back(Flags[I]); }))();
+ if (!UnsupportedFlags.empty())
+ return createStringError(errc::invalid_argument,
+ "unsupported flag%s for --add-symbol: '%s'",
+ UnsupportedFlags.size() > 1 ? "s" : "",
+ join(UnsupportedFlags, "', '").c_str());
+ return SI;
+}
+
static const StringMap<MachineInfo> ArchMap{
// Name, {EMachine, 64bit, LittleEndian}
{"aarch64", {ELF::EM_AARCH64, true, true}},
{"arm", {ELF::EM_ARM, false, true}},
{"i386", {ELF::EM_386, false, true}},
{"i386:x86-64", {ELF::EM_X86_64, true, true}},
+ {"mips", {ELF::EM_MIPS, false, false}},
{"powerpc:common64", {ELF::EM_PPC64, true, true}},
- {"sparc", {ELF::EM_SPARC, false, true}},
+ {"riscv:rv32", {ELF::EM_RISCV, false, true}},
+ {"riscv:rv64", {ELF::EM_RISCV, true, true}},
+ {"sparc", {ELF::EM_SPARC, false, false}},
+ {"sparcel", {ELF::EM_SPARC, false, true}},
{"x86-64", {ELF::EM_X86_64, true, true}},
};
-static const MachineInfo &getMachineInfo(StringRef Arch) {
+static Expected<const MachineInfo &> getMachineInfo(StringRef Arch) {
auto Iter = ArchMap.find(Arch);
if (Iter == std::end(ArchMap))
- error("Invalid architecture: '" + Arch + "'");
+ return createStringError(errc::invalid_argument,
+ "invalid architecture: '%s'", Arch.str().c_str());
return Iter->getValue();
}
-static const StringMap<MachineInfo> OutputFormatMap{
+struct TargetInfo {
+ FileFormat Format;
+ MachineInfo Machine;
+};
+
+// FIXME: consolidate with the bfd parsing used by lld.
+static const StringMap<MachineInfo> TargetMap{
// Name, {EMachine, 64bit, LittleEndian}
+ // x86
{"elf32-i386", {ELF::EM_386, false, true}},
- {"elf32-powerpcle", {ELF::EM_PPC, false, true}},
{"elf32-x86-64", {ELF::EM_X86_64, false, true}},
- {"elf64-powerpcle", {ELF::EM_PPC64, true, true}},
{"elf64-x86-64", {ELF::EM_X86_64, true, true}},
+ // Intel MCU
+ {"elf32-iamcu", {ELF::EM_IAMCU, false, true}},
+ // ARM
+ {"elf32-littlearm", {ELF::EM_ARM, false, true}},
+ // ARM AArch64
+ {"elf64-aarch64", {ELF::EM_AARCH64, true, true}},
+ {"elf64-littleaarch64", {ELF::EM_AARCH64, true, true}},
+ // RISC-V
+ {"elf32-littleriscv", {ELF::EM_RISCV, false, true}},
+ {"elf64-littleriscv", {ELF::EM_RISCV, true, true}},
+ // PowerPC
+ {"elf32-powerpc", {ELF::EM_PPC, false, false}},
+ {"elf32-powerpcle", {ELF::EM_PPC, false, true}},
+ {"elf64-powerpc", {ELF::EM_PPC64, true, false}},
+ {"elf64-powerpcle", {ELF::EM_PPC64, true, true}},
+ // MIPS
+ {"elf32-bigmips", {ELF::EM_MIPS, false, false}},
+ {"elf32-ntradbigmips", {ELF::EM_MIPS, false, false}},
+ {"elf32-ntradlittlemips", {ELF::EM_MIPS, false, true}},
+ {"elf32-tradbigmips", {ELF::EM_MIPS, false, false}},
+ {"elf32-tradlittlemips", {ELF::EM_MIPS, false, true}},
+ {"elf64-tradbigmips", {ELF::EM_MIPS, true, false}},
+ {"elf64-tradlittlemips", {ELF::EM_MIPS, true, true}},
+ // SPARC
+ {"elf32-sparc", {ELF::EM_SPARC, false, false}},
+ {"elf32-sparcel", {ELF::EM_SPARC, false, true}},
};
-static const MachineInfo &getOutputFormatMachineInfo(StringRef Format) {
- auto Iter = OutputFormatMap.find(Format);
- if (Iter == std::end(OutputFormatMap))
- error("Invalid output format: '" + Format + "'");
- return Iter->getValue();
+static Expected<TargetInfo>
+getOutputTargetInfoByTargetName(StringRef TargetName) {
+ StringRef OriginalTargetName = TargetName;
+ bool IsFreeBSD = TargetName.consume_back("-freebsd");
+ auto Iter = TargetMap.find(TargetName);
+ if (Iter == std::end(TargetMap))
+ return createStringError(errc::invalid_argument,
+ "invalid output format: '%s'",
+ OriginalTargetName.str().c_str());
+ MachineInfo MI = Iter->getValue();
+ if (IsFreeBSD)
+ MI.OSABI = ELF::ELFOSABI_FREEBSD;
+
+ FileFormat Format;
+ if (TargetName.startswith("elf"))
+ Format = FileFormat::ELF;
+ else
+ // This should never happen because `TargetName` is valid (it certainly
+ // exists in the TargetMap).
+ llvm_unreachable("unknown target prefix");
+
+ return {TargetInfo{Format, MI}};
}
-static void addGlobalSymbolsFromFile(std::vector<std::string> &Symbols,
- StringRef Filename) {
+static Error addSymbolsFromFile(std::vector<NameOrRegex> &Symbols,
+ BumpPtrAllocator &Alloc, StringRef Filename,
+ bool UseRegex) {
+ StringSaver Saver(Alloc);
SmallVector<StringRef, 16> Lines;
auto BufOrErr = MemoryBuffer::getFile(Filename);
if (!BufOrErr)
- reportError(Filename, BufOrErr.getError());
+ return createFileError(Filename, BufOrErr.getError());
BufOrErr.get()->getBuffer().split(Lines, '\n');
for (StringRef Line : Lines) {
@@ -218,14 +356,62 @@ static void addGlobalSymbolsFromFile(std::vector<std::string> &Symbols,
// it's not empty.
auto TrimmedLine = Line.split('#').first.trim();
if (!TrimmedLine.empty())
- Symbols.push_back(TrimmedLine.str());
+ Symbols.emplace_back(Saver.save(TrimmedLine), UseRegex);
}
+
+ return Error::success();
+}
+
+NameOrRegex::NameOrRegex(StringRef Pattern, bool IsRegex) {
+ if (!IsRegex) {
+ Name = Pattern;
+ return;
+ }
+
+ SmallVector<char, 32> Data;
+ R = std::make_shared<Regex>(
+ ("^" + Pattern.ltrim('^').rtrim('$') + "$").toStringRef(Data));
+}
+
+static Error addSymbolsToRenameFromFile(StringMap<StringRef> &SymbolsToRename,
+ BumpPtrAllocator &Alloc,
+ StringRef Filename) {
+ StringSaver Saver(Alloc);
+ SmallVector<StringRef, 16> Lines;
+ auto BufOrErr = MemoryBuffer::getFile(Filename);
+ if (!BufOrErr)
+ return createFileError(Filename, BufOrErr.getError());
+
+ BufOrErr.get()->getBuffer().split(Lines, '\n');
+ size_t NumLines = Lines.size();
+ for (size_t LineNo = 0; LineNo < NumLines; ++LineNo) {
+ StringRef TrimmedLine = Lines[LineNo].split('#').first.trim();
+ if (TrimmedLine.empty())
+ continue;
+
+ std::pair<StringRef, StringRef> Pair = Saver.save(TrimmedLine).split(' ');
+ StringRef NewName = Pair.second.trim();
+ if (NewName.empty())
+ return createStringError(errc::invalid_argument,
+ "%s:%zu: missing new symbol name",
+ Filename.str().c_str(), LineNo + 1);
+ SymbolsToRename.insert({Pair.first, NewName});
+ }
+ return Error::success();
+}
+
+template <class T> static ErrorOr<T> getAsInteger(StringRef Val) {
+ T Result;
+ if (Val.getAsInteger(0, Result))
+ return errc::invalid_argument;
+ return Result;
}
// ParseObjcopyOptions returns the config and sets the input arguments. If a
// help flag is set then ParseObjcopyOptions will print the help messege and
// exit.
-DriverConfig parseObjcopyOptions(ArrayRef<const char *> ArgsArr) {
+Expected<DriverConfig> parseObjcopyOptions(ArrayRef<const char *> ArgsArr) {
+ DriverConfig DC;
ObjcopyOptTable T;
unsigned MissingArgumentIndex, MissingArgumentCount;
llvm::opt::InputArgList InputArgs =
@@ -250,16 +436,18 @@ DriverConfig parseObjcopyOptions(ArrayRef<const char *> ArgsArr) {
SmallVector<const char *, 2> Positional;
for (auto Arg : InputArgs.filtered(OBJCOPY_UNKNOWN))
- error("unknown argument '" + Arg->getAsString(InputArgs) + "'");
+ return createStringError(errc::invalid_argument, "unknown argument '%s'",
+ Arg->getAsString(InputArgs).c_str());
for (auto Arg : InputArgs.filtered(OBJCOPY_INPUT))
Positional.push_back(Arg->getValue());
if (Positional.empty())
- error("No input file specified");
+ return createStringError(errc::invalid_argument, "no input file specified");
if (Positional.size() > 2)
- error("Too many positional arguments");
+ return createStringError(errc::invalid_argument,
+ "too many positional arguments");
CopyConfig Config;
Config.InputFilename = Positional[0];
@@ -267,23 +455,50 @@ DriverConfig parseObjcopyOptions(ArrayRef<const char *> ArgsArr) {
if (InputArgs.hasArg(OBJCOPY_target) &&
(InputArgs.hasArg(OBJCOPY_input_target) ||
InputArgs.hasArg(OBJCOPY_output_target)))
- error("--target cannot be used with --input-target or --output-target");
+ return createStringError(
+ errc::invalid_argument,
+ "--target cannot be used with --input-target or --output-target");
+ bool UseRegex = InputArgs.hasArg(OBJCOPY_regex);
+ StringRef InputFormat, OutputFormat;
if (InputArgs.hasArg(OBJCOPY_target)) {
- Config.InputFormat = InputArgs.getLastArgValue(OBJCOPY_target);
- Config.OutputFormat = InputArgs.getLastArgValue(OBJCOPY_target);
+ InputFormat = InputArgs.getLastArgValue(OBJCOPY_target);
+ OutputFormat = InputArgs.getLastArgValue(OBJCOPY_target);
} else {
- Config.InputFormat = InputArgs.getLastArgValue(OBJCOPY_input_target);
- Config.OutputFormat = InputArgs.getLastArgValue(OBJCOPY_output_target);
+ InputFormat = InputArgs.getLastArgValue(OBJCOPY_input_target);
+ OutputFormat = InputArgs.getLastArgValue(OBJCOPY_output_target);
}
- if (Config.InputFormat == "binary") {
+
+ // FIXME: Currently, we ignore the target for non-binary/ihex formats
+ // explicitly specified by -I option (e.g. -Ielf32-x86-64) and guess the
+ // format by llvm::object::createBinary regardless of the option value.
+ Config.InputFormat = StringSwitch<FileFormat>(InputFormat)
+ .Case("binary", FileFormat::Binary)
+ .Case("ihex", FileFormat::IHex)
+ .Default(FileFormat::Unspecified);
+ if (Config.InputFormat == FileFormat::Binary) {
auto BinaryArch = InputArgs.getLastArgValue(OBJCOPY_binary_architecture);
if (BinaryArch.empty())
- error("Specified binary input without specifiying an architecture");
- Config.BinaryArch = getMachineInfo(BinaryArch);
+ return createStringError(
+ errc::invalid_argument,
+ "specified binary input without specifiying an architecture");
+ Expected<const MachineInfo &> MI = getMachineInfo(BinaryArch);
+ if (!MI)
+ return MI.takeError();
+ Config.BinaryArch = *MI;
+ }
+
+ Config.OutputFormat = StringSwitch<FileFormat>(OutputFormat)
+ .Case("binary", FileFormat::Binary)
+ .Case("ihex", FileFormat::IHex)
+ .Default(FileFormat::Unspecified);
+ if (Config.OutputFormat == FileFormat::Unspecified && !OutputFormat.empty()) {
+ Expected<TargetInfo> Target = getOutputTargetInfoByTargetName(OutputFormat);
+ if (!Target)
+ return Target.takeError();
+ Config.OutputFormat = Target->Format;
+ Config.OutputArch = Target->Machine;
}
- if (!Config.OutputFormat.empty() && Config.OutputFormat != "binary")
- Config.OutputArch = getOutputFormatMachineInfo(Config.OutputFormat);
if (auto Arg = InputArgs.getLastArg(OBJCOPY_compress_debug_sections,
OBJCOPY_compress_debug_sections_eq)) {
@@ -297,14 +512,36 @@ DriverConfig parseObjcopyOptions(ArrayRef<const char *> ArgsArr) {
.Case("zlib", DebugCompressionType::Z)
.Default(DebugCompressionType::None);
if (Config.CompressionType == DebugCompressionType::None)
- error("Invalid or unsupported --compress-debug-sections format: " +
- InputArgs.getLastArgValue(OBJCOPY_compress_debug_sections_eq));
- if (!zlib::isAvailable())
- error("LLVM was not compiled with LLVM_ENABLE_ZLIB: can not compress.");
+ return createStringError(
+ errc::invalid_argument,
+ "invalid or unsupported --compress-debug-sections format: %s",
+ InputArgs.getLastArgValue(OBJCOPY_compress_debug_sections_eq)
+ .str()
+ .c_str());
}
+ if (!zlib::isAvailable())
+ return createStringError(
+ errc::invalid_argument,
+ "LLVM was not compiled with LLVM_ENABLE_ZLIB: can not compress");
}
Config.AddGnuDebugLink = InputArgs.getLastArgValue(OBJCOPY_add_gnu_debuglink);
+ // The gnu_debuglink's target is expected to not change or else its CRC would
+ // become invalidated and get rejected. We can avoid recalculating the
+ // checksum for every target file inside an archive by precomputing the CRC
+ // here. This prevents a significant amount of I/O.
+ if (!Config.AddGnuDebugLink.empty()) {
+ auto DebugOrErr = MemoryBuffer::getFile(Config.AddGnuDebugLink);
+ if (!DebugOrErr)
+ return createFileError(Config.AddGnuDebugLink, DebugOrErr.getError());
+ auto Debug = std::move(*DebugOrErr);
+ JamCRC CRC;
+ CRC.update(
+ ArrayRef<char>(Debug->getBuffer().data(), Debug->getBuffer().size()));
+ // The CRC32 value needs to be complemented because the JamCRC doesn't
+ // finalize the CRC32 value.
+ Config.GnuDebugLinkCRC32 = ~CRC.getCRC();
+ }
Config.BuildIdLinkDir = InputArgs.getLastArgValue(OBJCOPY_build_id_link_dir);
if (InputArgs.hasArg(OBJCOPY_build_id_link_input))
Config.BuildIdLinkInput =
@@ -314,27 +551,72 @@ DriverConfig parseObjcopyOptions(ArrayRef<const char *> ArgsArr) {
InputArgs.getLastArgValue(OBJCOPY_build_id_link_output);
Config.SplitDWO = InputArgs.getLastArgValue(OBJCOPY_split_dwo);
Config.SymbolsPrefix = InputArgs.getLastArgValue(OBJCOPY_prefix_symbols);
+ Config.AllocSectionsPrefix =
+ InputArgs.getLastArgValue(OBJCOPY_prefix_alloc_sections);
+ if (auto Arg = InputArgs.getLastArg(OBJCOPY_extract_partition))
+ Config.ExtractPartition = Arg->getValue();
for (auto Arg : InputArgs.filtered(OBJCOPY_redefine_symbol)) {
if (!StringRef(Arg->getValue()).contains('='))
- error("Bad format for --redefine-sym");
+ return createStringError(errc::invalid_argument,
+ "bad format for --redefine-sym");
auto Old2New = StringRef(Arg->getValue()).split('=');
if (!Config.SymbolsToRename.insert(Old2New).second)
- error("Multiple redefinition of symbol " + Old2New.first);
+ return createStringError(errc::invalid_argument,
+ "multiple redefinition of symbol '%s'",
+ Old2New.first.str().c_str());
}
+ for (auto Arg : InputArgs.filtered(OBJCOPY_redefine_symbols))
+ if (Error E = addSymbolsToRenameFromFile(Config.SymbolsToRename, DC.Alloc,
+ Arg->getValue()))
+ return std::move(E);
+
for (auto Arg : InputArgs.filtered(OBJCOPY_rename_section)) {
- SectionRename SR = parseRenameSectionValue(StringRef(Arg->getValue()));
- if (!Config.SectionsToRename.try_emplace(SR.OriginalName, SR).second)
- error("Multiple renames of section " + SR.OriginalName);
+ Expected<SectionRename> SR =
+ parseRenameSectionValue(StringRef(Arg->getValue()));
+ if (!SR)
+ return SR.takeError();
+ if (!Config.SectionsToRename.try_emplace(SR->OriginalName, *SR).second)
+ return createStringError(errc::invalid_argument,
+ "multiple renames of section '%s'",
+ SR->OriginalName.str().c_str());
+ }
+ for (auto Arg : InputArgs.filtered(OBJCOPY_set_section_flags)) {
+ Expected<SectionFlagsUpdate> SFU =
+ parseSetSectionFlagValue(Arg->getValue());
+ if (!SFU)
+ return SFU.takeError();
+ if (!Config.SetSectionFlags.try_emplace(SFU->Name, *SFU).second)
+ return createStringError(
+ errc::invalid_argument,
+ "--set-section-flags set multiple times for section '%s'",
+ SFU->Name.str().c_str());
+ }
+ // Prohibit combinations of --set-section-flags when the section name is used
+ // by --rename-section, either as a source or a destination.
+ for (const auto &E : Config.SectionsToRename) {
+ const SectionRename &SR = E.second;
+ if (Config.SetSectionFlags.count(SR.OriginalName))
+ return createStringError(
+ errc::invalid_argument,
+ "--set-section-flags=%s conflicts with --rename-section=%s=%s",
+ SR.OriginalName.str().c_str(), SR.OriginalName.str().c_str(),
+ SR.NewName.str().c_str());
+ if (Config.SetSectionFlags.count(SR.NewName))
+ return createStringError(
+ errc::invalid_argument,
+ "--set-section-flags=%s conflicts with --rename-section=%s=%s",
+ SR.NewName.str().c_str(), SR.OriginalName.str().c_str(),
+ SR.NewName.str().c_str());
}
for (auto Arg : InputArgs.filtered(OBJCOPY_remove_section))
- Config.ToRemove.push_back(Arg->getValue());
+ Config.ToRemove.emplace_back(Arg->getValue(), UseRegex);
for (auto Arg : InputArgs.filtered(OBJCOPY_keep_section))
- Config.KeepSection.push_back(Arg->getValue());
+ Config.KeepSection.emplace_back(Arg->getValue(), UseRegex);
for (auto Arg : InputArgs.filtered(OBJCOPY_only_section))
- Config.OnlySection.push_back(Arg->getValue());
+ Config.OnlySection.emplace_back(Arg->getValue(), UseRegex);
for (auto Arg : InputArgs.filtered(OBJCOPY_add_section))
Config.AddSection.push_back(Arg->getValue());
for (auto Arg : InputArgs.filtered(OBJCOPY_dump_section))
@@ -347,27 +629,71 @@ DriverConfig parseObjcopyOptions(ArrayRef<const char *> ArgsArr) {
Config.StripNonAlloc = InputArgs.hasArg(OBJCOPY_strip_non_alloc);
Config.StripUnneeded = InputArgs.hasArg(OBJCOPY_strip_unneeded);
Config.ExtractDWO = InputArgs.hasArg(OBJCOPY_extract_dwo);
+ Config.ExtractMainPartition =
+ InputArgs.hasArg(OBJCOPY_extract_main_partition);
Config.LocalizeHidden = InputArgs.hasArg(OBJCOPY_localize_hidden);
Config.Weaken = InputArgs.hasArg(OBJCOPY_weaken);
- Config.DiscardAll = InputArgs.hasArg(OBJCOPY_discard_all);
+ if (InputArgs.hasArg(OBJCOPY_discard_all, OBJCOPY_discard_locals))
+ Config.DiscardMode =
+ InputArgs.hasFlag(OBJCOPY_discard_all, OBJCOPY_discard_locals)
+ ? DiscardType::All
+ : DiscardType::Locals;
Config.OnlyKeepDebug = InputArgs.hasArg(OBJCOPY_only_keep_debug);
Config.KeepFileSymbols = InputArgs.hasArg(OBJCOPY_keep_file_symbols);
Config.DecompressDebugSections =
InputArgs.hasArg(OBJCOPY_decompress_debug_sections);
+ if (Config.DiscardMode == DiscardType::All)
+ Config.StripDebug = true;
for (auto Arg : InputArgs.filtered(OBJCOPY_localize_symbol))
- Config.SymbolsToLocalize.push_back(Arg->getValue());
+ Config.SymbolsToLocalize.emplace_back(Arg->getValue(), UseRegex);
+ for (auto Arg : InputArgs.filtered(OBJCOPY_localize_symbols))
+ if (Error E = addSymbolsFromFile(Config.SymbolsToLocalize, DC.Alloc,
+ Arg->getValue(), UseRegex))
+ return std::move(E);
for (auto Arg : InputArgs.filtered(OBJCOPY_keep_global_symbol))
- Config.SymbolsToKeepGlobal.push_back(Arg->getValue());
+ Config.SymbolsToKeepGlobal.emplace_back(Arg->getValue(), UseRegex);
for (auto Arg : InputArgs.filtered(OBJCOPY_keep_global_symbols))
- addGlobalSymbolsFromFile(Config.SymbolsToKeepGlobal, Arg->getValue());
+ if (Error E = addSymbolsFromFile(Config.SymbolsToKeepGlobal, DC.Alloc,
+ Arg->getValue(), UseRegex))
+ return std::move(E);
for (auto Arg : InputArgs.filtered(OBJCOPY_globalize_symbol))
- Config.SymbolsToGlobalize.push_back(Arg->getValue());
+ Config.SymbolsToGlobalize.emplace_back(Arg->getValue(), UseRegex);
+ for (auto Arg : InputArgs.filtered(OBJCOPY_globalize_symbols))
+ if (Error E = addSymbolsFromFile(Config.SymbolsToGlobalize, DC.Alloc,
+ Arg->getValue(), UseRegex))
+ return std::move(E);
for (auto Arg : InputArgs.filtered(OBJCOPY_weaken_symbol))
- Config.SymbolsToWeaken.push_back(Arg->getValue());
+ Config.SymbolsToWeaken.emplace_back(Arg->getValue(), UseRegex);
+ for (auto Arg : InputArgs.filtered(OBJCOPY_weaken_symbols))
+ if (Error E = addSymbolsFromFile(Config.SymbolsToWeaken, DC.Alloc,
+ Arg->getValue(), UseRegex))
+ return std::move(E);
for (auto Arg : InputArgs.filtered(OBJCOPY_strip_symbol))
- Config.SymbolsToRemove.push_back(Arg->getValue());
+ Config.SymbolsToRemove.emplace_back(Arg->getValue(), UseRegex);
+ for (auto Arg : InputArgs.filtered(OBJCOPY_strip_symbols))
+ if (Error E = addSymbolsFromFile(Config.SymbolsToRemove, DC.Alloc,
+ Arg->getValue(), UseRegex))
+ return std::move(E);
+ for (auto Arg : InputArgs.filtered(OBJCOPY_strip_unneeded_symbol))
+ Config.UnneededSymbolsToRemove.emplace_back(Arg->getValue(), UseRegex);
+ for (auto Arg : InputArgs.filtered(OBJCOPY_strip_unneeded_symbols))
+ if (Error E = addSymbolsFromFile(Config.UnneededSymbolsToRemove, DC.Alloc,
+ Arg->getValue(), UseRegex))
+ return std::move(E);
for (auto Arg : InputArgs.filtered(OBJCOPY_keep_symbol))
- Config.SymbolsToKeep.push_back(Arg->getValue());
+ Config.SymbolsToKeep.emplace_back(Arg->getValue(), UseRegex);
+ for (auto Arg : InputArgs.filtered(OBJCOPY_keep_symbols))
+ if (Error E = addSymbolsFromFile(Config.SymbolsToKeep, DC.Alloc,
+ Arg->getValue(), UseRegex))
+ return std::move(E);
+ for (auto Arg : InputArgs.filtered(OBJCOPY_add_symbol)) {
+ Expected<NewSymbolInfo> NSI = parseNewSymbolInfo(Arg->getValue());
+ if (!NSI)
+ return NSI.takeError();
+ Config.SymbolsToAdd.push_back(*NSI);
+ }
+
+ Config.AllowBrokenLinks = InputArgs.hasArg(OBJCOPY_allow_broken_links);
Config.DeterministicArchives = InputArgs.hasFlag(
OBJCOPY_enable_deterministic_archives,
@@ -375,24 +701,60 @@ DriverConfig parseObjcopyOptions(ArrayRef<const char *> ArgsArr) {
Config.PreserveDates = InputArgs.hasArg(OBJCOPY_preserve_dates);
+ if (Config.PreserveDates &&
+ (Config.OutputFilename == "-" || Config.InputFilename == "-"))
+ return createStringError(errc::invalid_argument,
+ "--preserve-dates requires a file");
+
+ for (auto Arg : InputArgs)
+ if (Arg->getOption().matches(OBJCOPY_set_start)) {
+ auto EAddr = getAsInteger<uint64_t>(Arg->getValue());
+ if (!EAddr)
+ return createStringError(
+ EAddr.getError(), "bad entry point address: '%s'", Arg->getValue());
+
+ Config.EntryExpr = [EAddr](uint64_t) { return *EAddr; };
+ } else if (Arg->getOption().matches(OBJCOPY_change_start)) {
+ auto EIncr = getAsInteger<int64_t>(Arg->getValue());
+ if (!EIncr)
+ return createStringError(EIncr.getError(),
+ "bad entry point increment: '%s'",
+ Arg->getValue());
+ auto Expr = Config.EntryExpr ? std::move(Config.EntryExpr)
+ : [](uint64_t A) { return A; };
+ Config.EntryExpr = [Expr, EIncr](uint64_t EAddr) {
+ return Expr(EAddr) + *EIncr;
+ };
+ }
+
if (Config.DecompressDebugSections &&
Config.CompressionType != DebugCompressionType::None) {
- error("Cannot specify --compress-debug-sections at the same time as "
- "--decompress-debug-sections at the same time");
+ return createStringError(
+ errc::invalid_argument,
+ "cannot specify both --compress-debug-sections and "
+ "--decompress-debug-sections");
}
if (Config.DecompressDebugSections && !zlib::isAvailable())
- error("LLVM was not compiled with LLVM_ENABLE_ZLIB: cannot decompress.");
+ return createStringError(
+ errc::invalid_argument,
+ "LLVM was not compiled with LLVM_ENABLE_ZLIB: cannot decompress");
+
+ if (Config.ExtractPartition && Config.ExtractMainPartition)
+ return createStringError(errc::invalid_argument,
+ "cannot specify --extract-partition together with "
+ "--extract-main-partition");
- DriverConfig DC;
DC.CopyConfigs.push_back(std::move(Config));
- return DC;
+ return std::move(DC);
}
// ParseStripOptions returns the config and sets the input arguments. If a
// help flag is set then ParseStripOptions will print the help messege and
// exit.
-DriverConfig parseStripOptions(ArrayRef<const char *> ArgsArr) {
+Expected<DriverConfig>
+parseStripOptions(ArrayRef<const char *> ArgsArr,
+ std::function<Error(Error)> ErrorCallback) {
StripOptTable T;
unsigned MissingArgumentIndex, MissingArgumentCount;
llvm::opt::InputArgList InputArgs =
@@ -414,44 +776,65 @@ DriverConfig parseStripOptions(ArrayRef<const char *> ArgsArr) {
exit(0);
}
- SmallVector<const char *, 2> Positional;
+ SmallVector<StringRef, 2> Positional;
for (auto Arg : InputArgs.filtered(STRIP_UNKNOWN))
- error("unknown argument '" + Arg->getAsString(InputArgs) + "'");
+ return createStringError(errc::invalid_argument, "unknown argument '%s'",
+ Arg->getAsString(InputArgs).c_str());
for (auto Arg : InputArgs.filtered(STRIP_INPUT))
Positional.push_back(Arg->getValue());
if (Positional.empty())
- error("No input file specified");
+ return createStringError(errc::invalid_argument, "no input file specified");
if (Positional.size() > 1 && InputArgs.hasArg(STRIP_output))
- error("Multiple input files cannot be used in combination with -o");
+ return createStringError(
+ errc::invalid_argument,
+ "multiple input files cannot be used in combination with -o");
CopyConfig Config;
+ bool UseRegexp = InputArgs.hasArg(STRIP_regex);
+ Config.AllowBrokenLinks = InputArgs.hasArg(STRIP_allow_broken_links);
Config.StripDebug = InputArgs.hasArg(STRIP_strip_debug);
- Config.DiscardAll = InputArgs.hasArg(STRIP_discard_all);
+ if (InputArgs.hasArg(STRIP_discard_all, STRIP_discard_locals))
+ Config.DiscardMode =
+ InputArgs.hasFlag(STRIP_discard_all, STRIP_discard_locals)
+ ? DiscardType::All
+ : DiscardType::Locals;
Config.StripUnneeded = InputArgs.hasArg(STRIP_strip_unneeded);
- Config.StripAll = InputArgs.hasArg(STRIP_strip_all);
+ if (auto Arg = InputArgs.getLastArg(STRIP_strip_all, STRIP_no_strip_all))
+ Config.StripAll = Arg->getOption().getID() == STRIP_strip_all;
Config.StripAllGNU = InputArgs.hasArg(STRIP_strip_all_gnu);
-
- if (!Config.StripDebug && !Config.StripUnneeded && !Config.DiscardAll &&
- !Config.StripAllGNU)
- Config.StripAll = true;
+ Config.OnlyKeepDebug = InputArgs.hasArg(STRIP_only_keep_debug);
+ Config.KeepFileSymbols = InputArgs.hasArg(STRIP_keep_file_symbols);
for (auto Arg : InputArgs.filtered(STRIP_keep_section))
- Config.KeepSection.push_back(Arg->getValue());
+ Config.KeepSection.emplace_back(Arg->getValue(), UseRegexp);
for (auto Arg : InputArgs.filtered(STRIP_remove_section))
- Config.ToRemove.push_back(Arg->getValue());
+ Config.ToRemove.emplace_back(Arg->getValue(), UseRegexp);
+
+ for (auto Arg : InputArgs.filtered(STRIP_strip_symbol))
+ Config.SymbolsToRemove.emplace_back(Arg->getValue(), UseRegexp);
for (auto Arg : InputArgs.filtered(STRIP_keep_symbol))
- Config.SymbolsToKeep.push_back(Arg->getValue());
+ Config.SymbolsToKeep.emplace_back(Arg->getValue(), UseRegexp);
+
+ if (!InputArgs.hasArg(STRIP_no_strip_all) && !Config.StripDebug &&
+ !Config.StripUnneeded && Config.DiscardMode == DiscardType::None &&
+ !Config.StripAllGNU && Config.SymbolsToRemove.empty())
+ Config.StripAll = true;
+
+ if (Config.DiscardMode == DiscardType::All)
+ Config.StripDebug = true;
Config.DeterministicArchives =
InputArgs.hasFlag(STRIP_enable_deterministic_archives,
STRIP_disable_deterministic_archives, /*default=*/true);
Config.PreserveDates = InputArgs.hasArg(STRIP_preserve_dates);
+ Config.InputFormat = FileFormat::Unspecified;
+ Config.OutputFormat = FileFormat::Unspecified;
DriverConfig DC;
if (Positional.size() == 1) {
@@ -460,14 +843,30 @@ DriverConfig parseStripOptions(ArrayRef<const char *> ArgsArr) {
InputArgs.getLastArgValue(STRIP_output, Positional[0]);
DC.CopyConfigs.push_back(std::move(Config));
} else {
- for (const char *Filename : Positional) {
+ StringMap<unsigned> InputFiles;
+ for (StringRef Filename : Positional) {
+ if (InputFiles[Filename]++ == 1) {
+ if (Filename == "-")
+ return createStringError(
+ errc::invalid_argument,
+ "cannot specify '-' as an input file more than once");
+ if (Error E = ErrorCallback(createStringError(
+ errc::invalid_argument, "'%s' was already specified",
+ Filename.str().c_str())))
+ return std::move(E);
+ }
Config.InputFilename = Filename;
Config.OutputFilename = Filename;
DC.CopyConfigs.push_back(Config);
}
}
- return DC;
+ if (Config.PreserveDates && (is_contained(Positional, "-") ||
+ InputArgs.getLastArgValue(STRIP_output) == "-"))
+ return createStringError(errc::invalid_argument,
+ "--preserve-dates requires a file");
+
+ return std::move(DC);
}
} // namespace objcopy
diff --git a/tools/llvm-objcopy/CopyConfig.h b/tools/llvm-objcopy/CopyConfig.h
index 71a2423ae1c8..aff3631a487c 100644
--- a/tools/llvm-objcopy/CopyConfig.h
+++ b/tools/llvm-objcopy/CopyConfig.h
@@ -1,9 +1,8 @@
//===- CopyConfig.h -------------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -11,40 +10,110 @@
#define LLVM_TOOLS_LLVM_OBJCOPY_COPY_CONFIG_H
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitmaskEnum.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/Object/ELFTypes.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/Regex.h"
// Necessary for llvm::DebugCompressionType::None
#include "llvm/Target/TargetOptions.h"
-#include <string>
#include <vector>
namespace llvm {
namespace objcopy {
+enum class FileFormat {
+ Unspecified,
+ ELF,
+ Binary,
+ IHex,
+};
+
// This type keeps track of the machine info for various architectures. This
// lets us map architecture names to ELF types and the e_machine value of the
// ELF file.
struct MachineInfo {
+ MachineInfo(uint16_t EM, uint8_t ABI, bool Is64, bool IsLittle)
+ : EMachine(EM), OSABI(ABI), Is64Bit(Is64), IsLittleEndian(IsLittle) {}
+ // Alternative constructor that defaults to NONE for OSABI.
+ MachineInfo(uint16_t EM, bool Is64, bool IsLittle)
+ : MachineInfo(EM, ELF::ELFOSABI_NONE, Is64, IsLittle) {}
+ // Default constructor for unset fields.
+ MachineInfo() : MachineInfo(0, 0, false, false) {}
uint16_t EMachine;
+ uint8_t OSABI;
bool Is64Bit;
bool IsLittleEndian;
};
+// Flags set by --set-section-flags or --rename-section. Interpretation of these
+// is format-specific and not all flags are meaningful for all object file
+// formats. This is a bitmask; many section flags may be set.
+enum SectionFlag {
+ SecNone = 0,
+ SecAlloc = 1 << 0,
+ SecLoad = 1 << 1,
+ SecNoload = 1 << 2,
+ SecReadonly = 1 << 3,
+ SecDebug = 1 << 4,
+ SecCode = 1 << 5,
+ SecData = 1 << 6,
+ SecRom = 1 << 7,
+ SecMerge = 1 << 8,
+ SecStrings = 1 << 9,
+ SecContents = 1 << 10,
+ SecShare = 1 << 11,
+ LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue = */ SecShare)
+};
+
struct SectionRename {
StringRef OriginalName;
StringRef NewName;
- Optional<uint64_t> NewFlags;
+ Optional<SectionFlag> NewFlags;
+};
+
+struct SectionFlagsUpdate {
+ StringRef Name;
+ SectionFlag NewFlags;
+};
+
+enum class DiscardType {
+ None, // Default
+ All, // --discard-all (-x)
+ Locals, // --discard-locals (-X)
+};
+
+class NameOrRegex {
+ StringRef Name;
+ // Regex is shared between multiple CopyConfig instances.
+ std::shared_ptr<Regex> R;
+
+public:
+ NameOrRegex(StringRef Pattern, bool IsRegex);
+ bool operator==(StringRef S) const { return R ? R->match(S) : Name == S; }
+ bool operator!=(StringRef S) const { return !operator==(S); }
+};
+
+struct NewSymbolInfo {
+ StringRef SymbolName;
+ StringRef SectionName;
+ uint64_t Value = 0;
+ uint8_t Type = ELF::STT_NOTYPE;
+ uint8_t Bind = ELF::STB_GLOBAL;
+ uint8_t Visibility = ELF::STV_DEFAULT;
};
// Configuration for copying/stripping a single file.
struct CopyConfig {
// Main input/output options
StringRef InputFilename;
- StringRef InputFormat;
+ FileFormat InputFormat;
StringRef OutputFilename;
- StringRef OutputFormat;
+ FileFormat OutputFormat;
// Only applicable for --input-format=binary
MachineInfo BinaryArch;
@@ -53,33 +122,48 @@ struct CopyConfig {
// Advanced options
StringRef AddGnuDebugLink;
+ // Cached gnu_debuglink's target CRC
+ uint32_t GnuDebugLinkCRC32;
StringRef BuildIdLinkDir;
Optional<StringRef> BuildIdLinkInput;
Optional<StringRef> BuildIdLinkOutput;
+ Optional<StringRef> ExtractPartition;
StringRef SplitDWO;
StringRef SymbolsPrefix;
+ StringRef AllocSectionsPrefix;
+ DiscardType DiscardMode = DiscardType::None;
// Repeated options
std::vector<StringRef> AddSection;
std::vector<StringRef> DumpSection;
- std::vector<StringRef> KeepSection;
- std::vector<StringRef> OnlySection;
- std::vector<StringRef> SymbolsToGlobalize;
- std::vector<StringRef> SymbolsToKeep;
- std::vector<StringRef> SymbolsToLocalize;
- std::vector<StringRef> SymbolsToRemove;
- std::vector<StringRef> SymbolsToWeaken;
- std::vector<StringRef> ToRemove;
- std::vector<std::string> SymbolsToKeepGlobal;
+ std::vector<NewSymbolInfo> SymbolsToAdd;
+ std::vector<NameOrRegex> KeepSection;
+ std::vector<NameOrRegex> OnlySection;
+ std::vector<NameOrRegex> SymbolsToGlobalize;
+ std::vector<NameOrRegex> SymbolsToKeep;
+ std::vector<NameOrRegex> SymbolsToLocalize;
+ std::vector<NameOrRegex> SymbolsToRemove;
+ std::vector<NameOrRegex> UnneededSymbolsToRemove;
+ std::vector<NameOrRegex> SymbolsToWeaken;
+ std::vector<NameOrRegex> ToRemove;
+ std::vector<NameOrRegex> SymbolsToKeepGlobal;
// Map options
StringMap<SectionRename> SectionsToRename;
+ StringMap<SectionFlagsUpdate> SetSectionFlags;
StringMap<StringRef> SymbolsToRename;
+ // ELF entry point address expression. The input parameter is an entry point
+ // address in the input ELF file. The entry address in the output file is
+ // calculated with EntryExpr(input_address), when either --set-start or
+ // --change-start is used.
+ std::function<uint64_t(uint64_t)> EntryExpr;
+
// Boolean options
+ bool AllowBrokenLinks = false;
bool DeterministicArchives = true;
- bool DiscardAll = false;
bool ExtractDWO = false;
+ bool ExtractMainPartition = false;
bool KeepFileSymbols = false;
bool LocalizeHidden = false;
bool OnlyKeepDebug = false;
@@ -101,17 +185,21 @@ struct CopyConfig {
// will contain one or more CopyConfigs.
struct DriverConfig {
SmallVector<CopyConfig, 1> CopyConfigs;
+ BumpPtrAllocator Alloc;
};
// ParseObjcopyOptions returns the config and sets the input arguments. If a
// help flag is set then ParseObjcopyOptions will print the help messege and
// exit.
-DriverConfig parseObjcopyOptions(ArrayRef<const char *> ArgsArr);
+Expected<DriverConfig> parseObjcopyOptions(ArrayRef<const char *> ArgsArr);
// ParseStripOptions returns the config and sets the input arguments. If a
// help flag is set then ParseStripOptions will print the help messege and
-// exit.
-DriverConfig parseStripOptions(ArrayRef<const char *> ArgsArr);
+// exit. ErrorCallback is used to handle recoverable errors. An Error returned
+// by the callback aborts the parsing and is then returned by this function.
+Expected<DriverConfig>
+parseStripOptions(ArrayRef<const char *> ArgsArr,
+ std::function<Error(Error)> ErrorCallback);
} // namespace objcopy
} // namespace llvm
diff --git a/tools/llvm-objcopy/ELF/ELFObjcopy.cpp b/tools/llvm-objcopy/ELF/ELFObjcopy.cpp
index f5ab8e708267..b366c6e55987 100644
--- a/tools/llvm-objcopy/ELF/ELFObjcopy.cpp
+++ b/tools/llvm-objcopy/ELF/ELFObjcopy.cpp
@@ -1,9 +1,8 @@
//===- ELFObjcopy.cpp -----------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -14,6 +13,7 @@
#include "llvm-objcopy.h"
#include "llvm/ADT/BitmaskEnum.h"
+#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
@@ -71,6 +71,44 @@ static bool onlyKeepDWOPred(const Object &Obj, const SectionBase &Sec) {
return !isDWOSection(Sec);
}
+uint64_t getNewShfFlags(SectionFlag AllFlags) {
+ uint64_t NewFlags = 0;
+ if (AllFlags & SectionFlag::SecAlloc)
+ NewFlags |= ELF::SHF_ALLOC;
+ if (!(AllFlags & SectionFlag::SecReadonly))
+ NewFlags |= ELF::SHF_WRITE;
+ if (AllFlags & SectionFlag::SecCode)
+ NewFlags |= ELF::SHF_EXECINSTR;
+ if (AllFlags & SectionFlag::SecMerge)
+ NewFlags |= ELF::SHF_MERGE;
+ if (AllFlags & SectionFlag::SecStrings)
+ NewFlags |= ELF::SHF_STRINGS;
+ return NewFlags;
+}
+
+static uint64_t getSectionFlagsPreserveMask(uint64_t OldFlags,
+ uint64_t NewFlags) {
+ // Preserve some flags which should not be dropped when setting flags.
+ // Also, preserve anything OS/processor dependant.
+ const uint64_t PreserveMask = ELF::SHF_COMPRESSED | ELF::SHF_EXCLUDE |
+ ELF::SHF_GROUP | ELF::SHF_LINK_ORDER |
+ ELF::SHF_MASKOS | ELF::SHF_MASKPROC |
+ ELF::SHF_TLS | ELF::SHF_INFO_LINK;
+ return (OldFlags & PreserveMask) | (NewFlags & ~PreserveMask);
+}
+
+static void setSectionFlagsAndType(SectionBase &Sec, SectionFlag Flags) {
+ Sec.Flags = getSectionFlagsPreserveMask(Sec.Flags, getNewShfFlags(Flags));
+
+ // In GNU objcopy, certain flags promote SHT_NOBITS to SHT_PROGBITS. This rule
+ // may promote more non-ALLOC sections than GNU objcopy, but it is fine as
+ // non-ALLOC SHT_NOBITS sections do not make much sense.
+ if (Sec.Type == SHT_NOBITS &&
+ (!(Sec.Flags & ELF::SHF_ALLOC) ||
+ Flags & (SectionFlag::SecContents | SectionFlag::SecLoad)))
+ Sec.Type = SHT_PROGBITS;
+}
+
static ElfType getOutputElfType(const Binary &Bin) {
// Infer output ELF type from the input ELF object
if (isa<ELFObjectFile<ELF32LE>>(Bin))
@@ -92,12 +130,9 @@ static ElfType getOutputElfType(const MachineInfo &MI) {
return MI.IsLittleEndian ? ELFT_ELF32LE : ELFT_ELF32BE;
}
-static std::unique_ptr<Writer> createWriter(const CopyConfig &Config,
- Object &Obj, Buffer &Buf,
- ElfType OutputElfType) {
- if (Config.OutputFormat == "binary") {
- return llvm::make_unique<BinaryWriter>(Obj, Buf);
- }
+static std::unique_ptr<Writer> createELFWriter(const CopyConfig &Config,
+ Object &Obj, Buffer &Buf,
+ ElfType OutputElfType) {
// Depending on the initial ELFT and OutputFormat we need a different Writer.
switch (OutputElfType) {
case ELFT_ELF32LE:
@@ -116,10 +151,27 @@ static std::unique_ptr<Writer> createWriter(const CopyConfig &Config,
llvm_unreachable("Invalid output format");
}
+static std::unique_ptr<Writer> createWriter(const CopyConfig &Config,
+ Object &Obj, Buffer &Buf,
+ ElfType OutputElfType) {
+ switch (Config.OutputFormat) {
+ case FileFormat::Binary:
+ return llvm::make_unique<BinaryWriter>(Obj, Buf);
+ case FileFormat::IHex:
+ return llvm::make_unique<IHexWriter>(Obj, Buf);
+ default:
+ return createELFWriter(Config, Obj, Buf, OutputElfType);
+ }
+}
+
template <class ELFT>
static Expected<ArrayRef<uint8_t>>
-findBuildID(const object::ELFFile<ELFT> &In) {
- for (const auto &Phdr : unwrapOrError(In.program_headers())) {
+findBuildID(const CopyConfig &Config, const object::ELFFile<ELFT> &In) {
+ auto PhdrsOrErr = In.program_headers();
+ if (auto Err = PhdrsOrErr.takeError())
+ return createFileError(Config.InputFilename, std::move(Err));
+
+ for (const auto &Phdr : *PhdrsOrErr) {
if (Phdr.p_type != PT_NOTE)
continue;
Error Err = Error::success();
@@ -127,58 +179,106 @@ findBuildID(const object::ELFFile<ELFT> &In) {
if (Note.getType() == NT_GNU_BUILD_ID && Note.getName() == ELF_NOTE_GNU)
return Note.getDesc();
if (Err)
- return std::move(Err);
+ return createFileError(Config.InputFilename, std::move(Err));
}
- return createStringError(llvm::errc::invalid_argument,
- "Could not find build ID.");
+
+ return createFileError(
+ Config.InputFilename,
+ createStringError(llvm::errc::invalid_argument,
+ "could not find build ID"));
}
static Expected<ArrayRef<uint8_t>>
-findBuildID(const object::ELFObjectFileBase &In) {
+findBuildID(const CopyConfig &Config, const object::ELFObjectFileBase &In) {
if (auto *O = dyn_cast<ELFObjectFile<ELF32LE>>(&In))
- return findBuildID(*O->getELFFile());
+ return findBuildID(Config, *O->getELFFile());
else if (auto *O = dyn_cast<ELFObjectFile<ELF64LE>>(&In))
- return findBuildID(*O->getELFFile());
+ return findBuildID(Config, *O->getELFFile());
else if (auto *O = dyn_cast<ELFObjectFile<ELF32BE>>(&In))
- return findBuildID(*O->getELFFile());
+ return findBuildID(Config, *O->getELFFile());
else if (auto *O = dyn_cast<ELFObjectFile<ELF64BE>>(&In))
- return findBuildID(*O->getELFFile());
+ return findBuildID(Config, *O->getELFFile());
llvm_unreachable("Bad file format");
}
-static void linkToBuildIdDir(const CopyConfig &Config, StringRef ToLink,
- StringRef Suffix, ArrayRef<uint8_t> BuildIdBytes) {
+template <class... Ts>
+static Error makeStringError(std::error_code EC, const Twine &Msg, Ts &&... Args) {
+ std::string FullMsg = (EC.message() + ": " + Msg).str();
+ return createStringError(EC, FullMsg.c_str(), std::forward<Ts>(Args)...);
+}
+
+#define MODEL_8 "%%%%%%%%"
+#define MODEL_16 MODEL_8 MODEL_8
+#define MODEL_32 (MODEL_16 MODEL_16)
+
+static Error linkToBuildIdDir(const CopyConfig &Config, StringRef ToLink,
+ StringRef Suffix,
+ ArrayRef<uint8_t> BuildIdBytes) {
SmallString<128> Path = Config.BuildIdLinkDir;
sys::path::append(Path, llvm::toHex(BuildIdBytes[0], /*LowerCase*/ true));
if (auto EC = sys::fs::create_directories(Path))
- error("cannot create build ID link directory " + Path + ": " +
- EC.message());
+ return createFileError(
+ Path.str(),
+ makeStringError(EC, "cannot create build ID link directory"));
sys::path::append(Path,
llvm::toHex(BuildIdBytes.slice(1), /*LowerCase*/ true));
Path += Suffix;
- if (auto EC = sys::fs::create_hard_link(ToLink, Path)) {
- // Hard linking failed, try to remove the file first if it exists.
- if (sys::fs::exists(Path))
- sys::fs::remove(Path);
- EC = sys::fs::create_hard_link(ToLink, Path);
- if (EC)
- error("cannot link " + ToLink + " to " + Path + ": " + EC.message());
+ SmallString<128> TmpPath;
+ // create_hard_link races so we need to link to a temporary path but
+ // we want to make sure that we choose a filename that does not exist.
+ // By using 32 model characters we get 128-bits of entropy. It is
+ // unlikely that this string has ever existed before much less exists
+ // on this disk or in the current working directory.
+ // Additionally we prepend the original Path for debugging but also
+ // because it ensures that we're linking within a directory on the same
+ // partition on the same device which is critical. It has the added
+ // win of yet further decreasing the odds of a conflict.
+ sys::fs::createUniquePath(Twine(Path) + "-" + MODEL_32 + ".tmp", TmpPath,
+ /*MakeAbsolute*/ false);
+ if (auto EC = sys::fs::create_hard_link(ToLink, TmpPath)) {
+ Path.push_back('\0');
+ return makeStringError(EC, "cannot link '%s' to '%s'", ToLink.data(),
+ Path.data());
+ }
+ // We then atomically rename the link into place which will just move the
+ // link. If rename fails something is more seriously wrong so just return
+ // an error.
+ if (auto EC = sys::fs::rename(TmpPath, Path)) {
+ Path.push_back('\0');
+ return makeStringError(EC, "cannot link '%s' to '%s'", ToLink.data(),
+ Path.data());
+ }
+ // If `Path` was already a hard-link to the same underlying file then the
+ // temp file will be left so we need to remove it. Remove will not cause
+ // an error by default if the file is already gone so just blindly remove
+ // it rather than checking.
+ if (auto EC = sys::fs::remove(TmpPath)) {
+ TmpPath.push_back('\0');
+ return makeStringError(EC, "could not remove '%s'", TmpPath.data());
}
+ return Error::success();
}
-static void splitDWOToFile(const CopyConfig &Config, const Reader &Reader,
- StringRef File, ElfType OutputElfType) {
+static Error splitDWOToFile(const CopyConfig &Config, const Reader &Reader,
+ StringRef File, ElfType OutputElfType) {
auto DWOFile = Reader.create();
- DWOFile->removeSections(
- [&](const SectionBase &Sec) { return onlyKeepDWOPred(*DWOFile, Sec); });
- if (Config.OutputArch)
+ auto OnlyKeepDWOPred = [&DWOFile](const SectionBase &Sec) {
+ return onlyKeepDWOPred(*DWOFile, Sec);
+ };
+ if (Error E = DWOFile->removeSections(Config.AllowBrokenLinks,
+ OnlyKeepDWOPred))
+ return E;
+ if (Config.OutputArch) {
DWOFile->Machine = Config.OutputArch.getValue().EMachine;
+ DWOFile->OSABI = Config.OutputArch.getValue().OSABI;
+ }
FileBuffer FB(File);
auto Writer = createWriter(Config, *DWOFile, FB, OutputElfType);
- Writer->finalize();
- Writer->write();
+ if (Error E = Writer->finalize())
+ return E;
+ return Writer->write();
}
static Error dumpSectionToFile(StringRef SecName, StringRef Filename,
@@ -186,9 +286,9 @@ static Error dumpSectionToFile(StringRef SecName, StringRef Filename,
for (auto &Sec : Obj.sections()) {
if (Sec.Name == SecName) {
if (Sec.OriginalData.empty())
- return make_error<StringError>("Can't dump section \"" + SecName +
- "\": it has no contents",
- object_error::parse_failed);
+ return createStringError(object_error::parse_failed,
+ "cannot dump section '%s': it has no contents",
+ SecName.str().c_str());
Expected<std::unique_ptr<FileOutputBuffer>> BufferOrErr =
FileOutputBuffer::create(Filename, Sec.OriginalData.size());
if (!BufferOrErr)
@@ -201,149 +301,143 @@ static Error dumpSectionToFile(StringRef SecName, StringRef Filename,
return Error::success();
}
}
- return make_error<StringError>("Section not found",
- object_error::parse_failed);
-}
-
-static bool isCompressed(const SectionBase &Section) {
- const char *Magic = "ZLIB";
- return StringRef(Section.Name).startswith(".zdebug") ||
- (Section.OriginalData.size() > strlen(Magic) &&
- !strncmp(reinterpret_cast<const char *>(Section.OriginalData.data()),
- Magic, strlen(Magic))) ||
- (Section.Flags & ELF::SHF_COMPRESSED);
+ return createStringError(object_error::parse_failed, "section '%s' not found",
+ SecName.str().c_str());
}
static bool isCompressable(const SectionBase &Section) {
- return !isCompressed(Section) && isDebugSection(Section) &&
- Section.Name != ".gdb_index";
+ return !(Section.Flags & ELF::SHF_COMPRESSED) &&
+ StringRef(Section.Name).startswith(".debug");
}
static void replaceDebugSections(
- const CopyConfig &Config, Object &Obj, SectionPred &RemovePred,
+ Object &Obj, SectionPred &RemovePred,
function_ref<bool(const SectionBase &)> shouldReplace,
function_ref<SectionBase *(const SectionBase *)> addSection) {
+ // Build a list of the debug sections we are going to replace.
+ // We can't call `addSection` while iterating over sections,
+ // because it would mutate the sections array.
SmallVector<SectionBase *, 13> ToReplace;
- SmallVector<RelocationSection *, 13> RelocationSections;
- for (auto &Sec : Obj.sections()) {
- if (RelocationSection *R = dyn_cast<RelocationSection>(&Sec)) {
- if (shouldReplace(*R->getSection()))
- RelocationSections.push_back(R);
- continue;
- }
-
+ for (auto &Sec : Obj.sections())
if (shouldReplace(Sec))
ToReplace.push_back(&Sec);
- }
- for (SectionBase *S : ToReplace) {
- SectionBase *NewSection = addSection(S);
+ // Build a mapping from original section to a new one.
+ DenseMap<SectionBase *, SectionBase *> FromTo;
+ for (SectionBase *S : ToReplace)
+ FromTo[S] = addSection(S);
- for (RelocationSection *RS : RelocationSections) {
- if (RS->getSection() == S)
- RS->setSection(NewSection);
- }
- }
+ // Now we want to update the target sections of relocation
+ // sections. Also we will update the relocations themselves
+ // to update the symbol references.
+ for (auto &Sec : Obj.sections())
+ Sec.replaceSectionReferences(FromTo);
RemovePred = [shouldReplace, RemovePred](const SectionBase &Sec) {
return shouldReplace(Sec) || RemovePred(Sec);
};
}
-// This function handles the high level operations of GNU objcopy including
-// handling command line options. It's important to outline certain properties
-// we expect to hold of the command line operations. Any operation that "keeps"
-// should keep regardless of a remove. Additionally any removal should respect
-// any previous removals. Lastly whether or not something is removed shouldn't
-// depend a) on the order the options occur in or b) on some opaque priority
-// system. The only priority is that keeps/copies overrule removes.
-static void handleArgs(const CopyConfig &Config, Object &Obj,
- const Reader &Reader, ElfType OutputElfType) {
-
- if (!Config.SplitDWO.empty()) {
- splitDWOToFile(Config, Reader, Config.SplitDWO, OutputElfType);
- }
- if (Config.OutputArch)
- Obj.Machine = Config.OutputArch.getValue().EMachine;
+static bool isUnneededSymbol(const Symbol &Sym) {
+ return !Sym.Referenced &&
+ (Sym.Binding == STB_LOCAL || Sym.getShndx() == SHN_UNDEF) &&
+ Sym.Type != STT_SECTION;
+}
+static Error updateAndRemoveSymbols(const CopyConfig &Config, Object &Obj) {
// TODO: update or remove symbols only if there is an option that affects
// them.
- if (Obj.SymbolTable) {
- Obj.SymbolTable->updateSymbols([&](Symbol &Sym) {
- if (!Sym.isCommon() &&
- ((Config.LocalizeHidden &&
- (Sym.Visibility == STV_HIDDEN || Sym.Visibility == STV_INTERNAL)) ||
- is_contained(Config.SymbolsToLocalize, Sym.Name)))
- Sym.Binding = STB_LOCAL;
-
- // Note: these two globalize flags have very similar names but different
- // meanings:
- //
- // --globalize-symbol: promote a symbol to global
- // --keep-global-symbol: all symbols except for these should be made local
- //
- // If --globalize-symbol is specified for a given symbol, it will be
- // global in the output file even if it is not included via
- // --keep-global-symbol. Because of that, make sure to check
- // --globalize-symbol second.
- if (!Config.SymbolsToKeepGlobal.empty() &&
- !is_contained(Config.SymbolsToKeepGlobal, Sym.Name) &&
- Sym.getShndx() != SHN_UNDEF)
- Sym.Binding = STB_LOCAL;
-
- if (is_contained(Config.SymbolsToGlobalize, Sym.Name) &&
- Sym.getShndx() != SHN_UNDEF)
- Sym.Binding = STB_GLOBAL;
-
- if (is_contained(Config.SymbolsToWeaken, Sym.Name) &&
- Sym.Binding == STB_GLOBAL)
- Sym.Binding = STB_WEAK;
-
- if (Config.Weaken && Sym.Binding == STB_GLOBAL &&
- Sym.getShndx() != SHN_UNDEF)
- Sym.Binding = STB_WEAK;
-
- const auto I = Config.SymbolsToRename.find(Sym.Name);
- if (I != Config.SymbolsToRename.end())
- Sym.Name = I->getValue();
-
- if (!Config.SymbolsPrefix.empty() && Sym.Type != STT_SECTION)
- Sym.Name = (Config.SymbolsPrefix + Sym.Name).str();
- });
-
- // The purpose of this loop is to mark symbols referenced by sections
- // (like GroupSection or RelocationSection). This way, we know which
- // symbols are still 'needed' and which are not.
- if (Config.StripUnneeded) {
- for (auto &Section : Obj.sections())
- Section.markSymbols();
- }
+ if (!Obj.SymbolTable)
+ return Error::success();
+
+ Obj.SymbolTable->updateSymbols([&](Symbol &Sym) {
+ // Common and undefined symbols don't make sense as local symbols, and can
+ // even cause crashes if we localize those, so skip them.
+ if (!Sym.isCommon() && Sym.getShndx() != SHN_UNDEF &&
+ ((Config.LocalizeHidden &&
+ (Sym.Visibility == STV_HIDDEN || Sym.Visibility == STV_INTERNAL)) ||
+ is_contained(Config.SymbolsToLocalize, Sym.Name)))
+ Sym.Binding = STB_LOCAL;
+
+ // Note: these two globalize flags have very similar names but different
+ // meanings:
+ //
+ // --globalize-symbol: promote a symbol to global
+ // --keep-global-symbol: all symbols except for these should be made local
+ //
+ // If --globalize-symbol is specified for a given symbol, it will be
+ // global in the output file even if it is not included via
+ // --keep-global-symbol. Because of that, make sure to check
+ // --globalize-symbol second.
+ if (!Config.SymbolsToKeepGlobal.empty() &&
+ !is_contained(Config.SymbolsToKeepGlobal, Sym.Name) &&
+ Sym.getShndx() != SHN_UNDEF)
+ Sym.Binding = STB_LOCAL;
+
+ if (is_contained(Config.SymbolsToGlobalize, Sym.Name) &&
+ Sym.getShndx() != SHN_UNDEF)
+ Sym.Binding = STB_GLOBAL;
+
+ if (is_contained(Config.SymbolsToWeaken, Sym.Name) &&
+ Sym.Binding == STB_GLOBAL)
+ Sym.Binding = STB_WEAK;
+
+ if (Config.Weaken && Sym.Binding == STB_GLOBAL &&
+ Sym.getShndx() != SHN_UNDEF)
+ Sym.Binding = STB_WEAK;
+
+ const auto I = Config.SymbolsToRename.find(Sym.Name);
+ if (I != Config.SymbolsToRename.end())
+ Sym.Name = I->getValue();
+
+ if (!Config.SymbolsPrefix.empty() && Sym.Type != STT_SECTION)
+ Sym.Name = (Config.SymbolsPrefix + Sym.Name).str();
+ });
+
+ // The purpose of this loop is to mark symbols referenced by sections
+ // (like GroupSection or RelocationSection). This way, we know which
+ // symbols are still 'needed' and which are not.
+ if (Config.StripUnneeded || !Config.UnneededSymbolsToRemove.empty() ||
+ !Config.OnlySection.empty()) {
+ for (auto &Section : Obj.sections())
+ Section.markSymbols();
+ }
- Obj.removeSymbols([&](const Symbol &Sym) {
- if (is_contained(Config.SymbolsToKeep, Sym.Name) ||
- (Config.KeepFileSymbols && Sym.Type == STT_FILE))
- return false;
+ auto RemoveSymbolsPred = [&](const Symbol &Sym) {
+ if (is_contained(Config.SymbolsToKeep, Sym.Name) ||
+ (Config.KeepFileSymbols && Sym.Type == STT_FILE))
+ return false;
- if (Config.DiscardAll && Sym.Binding == STB_LOCAL &&
- Sym.getShndx() != SHN_UNDEF && Sym.Type != STT_FILE &&
- Sym.Type != STT_SECTION)
- return true;
+ if ((Config.DiscardMode == DiscardType::All ||
+ (Config.DiscardMode == DiscardType::Locals &&
+ StringRef(Sym.Name).startswith(".L"))) &&
+ Sym.Binding == STB_LOCAL && Sym.getShndx() != SHN_UNDEF &&
+ Sym.Type != STT_FILE && Sym.Type != STT_SECTION)
+ return true;
- if (Config.StripAll || Config.StripAllGNU)
- return true;
+ if (Config.StripAll || Config.StripAllGNU)
+ return true;
- if (is_contained(Config.SymbolsToRemove, Sym.Name))
- return true;
+ if (is_contained(Config.SymbolsToRemove, Sym.Name))
+ return true;
- if (Config.StripUnneeded && !Sym.Referenced &&
- (Sym.Binding == STB_LOCAL || Sym.getShndx() == SHN_UNDEF) &&
- Sym.Type != STT_FILE && Sym.Type != STT_SECTION)
- return true;
+ if ((Config.StripUnneeded ||
+ is_contained(Config.UnneededSymbolsToRemove, Sym.Name)) &&
+ isUnneededSymbol(Sym))
+ return true;
- return false;
- });
- }
+ // We want to remove undefined symbols if all references have been stripped.
+ if (!Config.OnlySection.empty() && !Sym.Referenced &&
+ Sym.getShndx() == SHN_UNDEF)
+ return true;
+
+ return false;
+ };
+ return Obj.removeSymbols(RemoveSymbolsPred);
+}
+
+static Error replaceAndRemoveSections(const CopyConfig &Config, Object &Obj) {
SectionPred RemovePred = [](const SectionBase &) { return false; };
// Removes:
@@ -383,7 +477,7 @@ static void handleArgs(const CopyConfig &Config, Object &Obj,
if (Config.StripSections) {
RemovePred = [RemovePred](const SectionBase &Sec) {
- return RemovePred(Sec) || (Sec.Flags & SHF_ALLOC) == 0;
+ return RemovePred(Sec) || Sec.ParentSegment == nullptr;
};
}
@@ -399,7 +493,7 @@ static void handleArgs(const CopyConfig &Config, Object &Obj,
return true;
if (&Sec == Obj.SectionNames)
return false;
- return (Sec.Flags & SHF_ALLOC) == 0;
+ return (Sec.Flags & SHF_ALLOC) == 0 && Sec.ParentSegment == nullptr;
};
if (Config.StripAll)
@@ -410,9 +504,21 @@ static void handleArgs(const CopyConfig &Config, Object &Obj,
return false;
if (StringRef(Sec.Name).startswith(".gnu.warning"))
return false;
+ if (Sec.ParentSegment != nullptr)
+ return false;
return (Sec.Flags & SHF_ALLOC) == 0;
};
+ if (Config.ExtractPartition || Config.ExtractMainPartition) {
+ RemovePred = [RemovePred](const SectionBase &Sec) {
+ if (RemovePred(Sec))
+ return true;
+ if (Sec.Type == SHT_LLVM_PART_EHDR || Sec.Type == SHT_LLVM_PART_PHDR)
+ return true;
+ return (Sec.Flags & SHF_ALLOC) != 0 && !Sec.ParentSegment;
+ };
+ }
+
// Explicit copies:
if (!Config.OnlySection.empty()) {
RemovePred = [&Config, RemovePred, &Obj](const SectionBase &Sec) {
@@ -461,95 +567,210 @@ static void handleArgs(const CopyConfig &Config, Object &Obj,
}
if (Config.CompressionType != DebugCompressionType::None)
- replaceDebugSections(Config, Obj, RemovePred, isCompressable,
+ replaceDebugSections(Obj, RemovePred, isCompressable,
[&Config, &Obj](const SectionBase *S) {
return &Obj.addSection<CompressedSection>(
- *S, Config.CompressionType);
- });
+ *S, Config.CompressionType);
+ });
else if (Config.DecompressDebugSections)
replaceDebugSections(
- Config, Obj, RemovePred,
+ Obj, RemovePred,
[](const SectionBase &S) { return isa<CompressedSection>(&S); },
[&Obj](const SectionBase *S) {
auto CS = cast<CompressedSection>(S);
return &Obj.addSection<DecompressedSection>(*CS);
});
- Obj.removeSections(RemovePred);
+ return Obj.removeSections(Config.AllowBrokenLinks, RemovePred);
+}
- if (!Config.SectionsToRename.empty()) {
+// This function handles the high level operations of GNU objcopy including
+// handling command line options. It's important to outline certain properties
+// we expect to hold of the command line operations. Any operation that "keeps"
+// should keep regardless of a remove. Additionally any removal should respect
+// any previous removals. Lastly whether or not something is removed shouldn't
+// depend a) on the order the options occur in or b) on some opaque priority
+// system. The only priority is that keeps/copies overrule removes.
+static Error handleArgs(const CopyConfig &Config, Object &Obj,
+ const Reader &Reader, ElfType OutputElfType) {
+
+ if (!Config.SplitDWO.empty())
+ if (Error E =
+ splitDWOToFile(Config, Reader, Config.SplitDWO, OutputElfType))
+ return E;
+
+ if (Config.OutputArch) {
+ Obj.Machine = Config.OutputArch.getValue().EMachine;
+ Obj.OSABI = Config.OutputArch.getValue().OSABI;
+ }
+
+ // It is important to remove the sections first. For example, we want to
+ // remove the relocation sections before removing the symbols. That allows
+ // us to avoid reporting the inappropriate errors about removing symbols
+ // named in relocations.
+ if (Error E = replaceAndRemoveSections(Config, Obj))
+ return E;
+
+ if (Error E = updateAndRemoveSymbols(Config, Obj))
+ return E;
+
+ if (!Config.SectionsToRename.empty() || !Config.AllocSectionsPrefix.empty()) {
+ DenseSet<SectionBase *> PrefixedSections;
for (auto &Sec : Obj.sections()) {
const auto Iter = Config.SectionsToRename.find(Sec.Name);
if (Iter != Config.SectionsToRename.end()) {
const SectionRename &SR = Iter->second;
Sec.Name = SR.NewName;
- if (SR.NewFlags.hasValue()) {
- // Preserve some flags which should not be dropped when setting flags.
- // Also, preserve anything OS/processor dependant.
- const uint64_t PreserveMask = ELF::SHF_COMPRESSED | ELF::SHF_EXCLUDE |
- ELF::SHF_GROUP | ELF::SHF_LINK_ORDER |
- ELF::SHF_MASKOS | ELF::SHF_MASKPROC |
- ELF::SHF_TLS | ELF::SHF_INFO_LINK;
- Sec.Flags = (Sec.Flags & PreserveMask) |
- (SR.NewFlags.getValue() & ~PreserveMask);
+ if (SR.NewFlags.hasValue())
+ setSectionFlagsAndType(Sec, SR.NewFlags.getValue());
+ }
+
+ // Add a prefix to allocated sections and their relocation sections. This
+ // should be done after renaming the section by Config.SectionToRename to
+ // imitate the GNU objcopy behavior.
+ if (!Config.AllocSectionsPrefix.empty()) {
+ if (Sec.Flags & SHF_ALLOC) {
+ Sec.Name = (Config.AllocSectionsPrefix + Sec.Name).str();
+ PrefixedSections.insert(&Sec);
+
+ // Rename relocation sections associated to the allocated sections.
+ // For example, if we rename .text to .prefix.text, we also rename
+ // .rel.text to .rel.prefix.text.
+ //
+ // Dynamic relocation sections (SHT_REL[A] with SHF_ALLOC) are handled
+ // above, e.g., .rela.plt is renamed to .prefix.rela.plt, not
+ // .rela.prefix.plt since GNU objcopy does so.
+ } else if (auto *RelocSec = dyn_cast<RelocationSectionBase>(&Sec)) {
+ auto *TargetSec = RelocSec->getSection();
+ if (TargetSec && (TargetSec->Flags & SHF_ALLOC)) {
+ StringRef prefix;
+ switch (Sec.Type) {
+ case SHT_REL:
+ prefix = ".rel";
+ break;
+ case SHT_RELA:
+ prefix = ".rela";
+ break;
+ default:
+ continue;
+ }
+
+ // If the relocation section comes *after* the target section, we
+ // don't add Config.AllocSectionsPrefix because we've already added
+ // the prefix to TargetSec->Name. Otherwise, if the relocation
+ // section comes *before* the target section, we add the prefix.
+ if (PrefixedSections.count(TargetSec)) {
+ Sec.Name = (prefix + TargetSec->Name).str();
+ } else {
+ const auto Iter = Config.SectionsToRename.find(TargetSec->Name);
+ if (Iter != Config.SectionsToRename.end()) {
+ // Both `--rename-section` and `--prefix-alloc-sections` are
+ // given but the target section is not yet renamed.
+ Sec.Name =
+ (prefix + Config.AllocSectionsPrefix + Iter->second.NewName)
+ .str();
+ } else {
+ Sec.Name =
+ (prefix + Config.AllocSectionsPrefix + TargetSec->Name)
+ .str();
+ }
+ }
+ }
}
}
}
}
- if (!Config.AddSection.empty()) {
- for (const auto &Flag : Config.AddSection) {
- std::pair<StringRef, StringRef> SecPair = Flag.split("=");
- StringRef SecName = SecPair.first;
- StringRef File = SecPair.second;
- ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
- MemoryBuffer::getFile(File);
- if (!BufOrErr)
- reportError(File, BufOrErr.getError());
- std::unique_ptr<MemoryBuffer> Buf = std::move(*BufOrErr);
- ArrayRef<uint8_t> Data(
- reinterpret_cast<const uint8_t *>(Buf->getBufferStart()),
- Buf->getBufferSize());
- OwnedDataSection &NewSection =
- Obj.addSection<OwnedDataSection>(SecName, Data);
- if (SecName.startswith(".note") && SecName != ".note.GNU-stack")
- NewSection.Type = SHT_NOTE;
+ if (!Config.SetSectionFlags.empty()) {
+ for (auto &Sec : Obj.sections()) {
+ const auto Iter = Config.SetSectionFlags.find(Sec.Name);
+ if (Iter != Config.SetSectionFlags.end()) {
+ const SectionFlagsUpdate &SFU = Iter->second;
+ setSectionFlagsAndType(Sec, SFU.NewFlags);
+ }
}
}
- if (!Config.DumpSection.empty()) {
- for (const auto &Flag : Config.DumpSection) {
- std::pair<StringRef, StringRef> SecPair = Flag.split("=");
- StringRef SecName = SecPair.first;
- StringRef File = SecPair.second;
- if (Error E = dumpSectionToFile(SecName, File, Obj))
- reportError(Config.InputFilename, std::move(E));
- }
+ for (const auto &Flag : Config.AddSection) {
+ std::pair<StringRef, StringRef> SecPair = Flag.split("=");
+ StringRef SecName = SecPair.first;
+ StringRef File = SecPair.second;
+ ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
+ MemoryBuffer::getFile(File);
+ if (!BufOrErr)
+ return createFileError(File, errorCodeToError(BufOrErr.getError()));
+ std::unique_ptr<MemoryBuffer> Buf = std::move(*BufOrErr);
+ ArrayRef<uint8_t> Data(
+ reinterpret_cast<const uint8_t *>(Buf->getBufferStart()),
+ Buf->getBufferSize());
+ OwnedDataSection &NewSection =
+ Obj.addSection<OwnedDataSection>(SecName, Data);
+ if (SecName.startswith(".note") && SecName != ".note.GNU-stack")
+ NewSection.Type = SHT_NOTE;
+ }
+
+ for (const auto &Flag : Config.DumpSection) {
+ std::pair<StringRef, StringRef> SecPair = Flag.split("=");
+ StringRef SecName = SecPair.first;
+ StringRef File = SecPair.second;
+ if (Error E = dumpSectionToFile(SecName, File, Obj))
+ return E;
}
if (!Config.AddGnuDebugLink.empty())
- Obj.addSection<GnuDebugLinkSection>(Config.AddGnuDebugLink);
+ Obj.addSection<GnuDebugLinkSection>(Config.AddGnuDebugLink,
+ Config.GnuDebugLinkCRC32);
+
+ for (const NewSymbolInfo &SI : Config.SymbolsToAdd) {
+ SectionBase *Sec = Obj.findSection(SI.SectionName);
+ uint64_t Value = Sec ? Sec->Addr + SI.Value : SI.Value;
+ Obj.SymbolTable->addSymbol(
+ SI.SymbolName, SI.Bind, SI.Type, Sec, Value, SI.Visibility,
+ Sec ? (uint16_t)SYMBOL_SIMPLE_INDEX : (uint16_t)SHN_ABS, 0);
+ }
+
+ if (Config.EntryExpr)
+ Obj.Entry = Config.EntryExpr(Obj.Entry);
+ return Error::success();
}
-void executeObjcopyOnRawBinary(const CopyConfig &Config, MemoryBuffer &In,
- Buffer &Out) {
+static Error writeOutput(const CopyConfig &Config, Object &Obj, Buffer &Out,
+ ElfType OutputElfType) {
+ std::unique_ptr<Writer> Writer =
+ createWriter(Config, Obj, Out, OutputElfType);
+ if (Error E = Writer->finalize())
+ return E;
+ return Writer->write();
+}
+
+Error executeObjcopyOnIHex(const CopyConfig &Config, MemoryBuffer &In,
+ Buffer &Out) {
+ IHexReader Reader(&In);
+ std::unique_ptr<Object> Obj = Reader.create();
+ const ElfType OutputElfType =
+ getOutputElfType(Config.OutputArch.getValueOr(Config.BinaryArch));
+ if (Error E = handleArgs(Config, *Obj, Reader, OutputElfType))
+ return E;
+ return writeOutput(Config, *Obj, Out, OutputElfType);
+}
+
+Error executeObjcopyOnRawBinary(const CopyConfig &Config, MemoryBuffer &In,
+ Buffer &Out) {
BinaryReader Reader(Config.BinaryArch, &In);
std::unique_ptr<Object> Obj = Reader.create();
// Prefer OutputArch (-O<format>) if set, otherwise fallback to BinaryArch
// (-B<arch>).
- const ElfType OutputElfType = getOutputElfType(
- Config.OutputArch ? Config.OutputArch.getValue() : Config.BinaryArch);
- handleArgs(Config, *Obj, Reader, OutputElfType);
- std::unique_ptr<Writer> Writer =
- createWriter(Config, *Obj, Out, OutputElfType);
- Writer->finalize();
- Writer->write();
+ const ElfType OutputElfType =
+ getOutputElfType(Config.OutputArch.getValueOr(Config.BinaryArch));
+ if (Error E = handleArgs(Config, *Obj, Reader, OutputElfType))
+ return E;
+ return writeOutput(Config, *Obj, Out, OutputElfType);
}
-void executeObjcopyOnBinary(const CopyConfig &Config,
- object::ELFObjectFileBase &In, Buffer &Out) {
- ELFReader Reader(&In);
+Error executeObjcopyOnBinary(const CopyConfig &Config,
+ object::ELFObjectFileBase &In, Buffer &Out) {
+ ELFReader Reader(&In, Config.ExtractPartition);
std::unique_ptr<Object> Obj = Reader.create();
// Prefer OutputArch (-O<format>) if set, otherwise infer it from the input.
const ElfType OutputElfType =
@@ -558,25 +779,36 @@ void executeObjcopyOnBinary(const CopyConfig &Config,
ArrayRef<uint8_t> BuildIdBytes;
if (!Config.BuildIdLinkDir.empty()) {
- BuildIdBytes = unwrapOrError(findBuildID(In));
+ auto BuildIdBytesOrErr = findBuildID(Config, In);
+ if (auto E = BuildIdBytesOrErr.takeError())
+ return E;
+ BuildIdBytes = *BuildIdBytesOrErr;
+
if (BuildIdBytes.size() < 2)
- error("build ID in file '" + Config.InputFilename +
- "' is smaller than two bytes");
+ return createFileError(
+ Config.InputFilename,
+ createStringError(object_error::parse_failed,
+ "build ID is smaller than two bytes"));
}
- if (!Config.BuildIdLinkDir.empty() && Config.BuildIdLinkInput) {
- linkToBuildIdDir(Config, Config.InputFilename,
- Config.BuildIdLinkInput.getValue(), BuildIdBytes);
- }
- handleArgs(Config, *Obj, Reader, OutputElfType);
- std::unique_ptr<Writer> Writer =
- createWriter(Config, *Obj, Out, OutputElfType);
- Writer->finalize();
- Writer->write();
- if (!Config.BuildIdLinkDir.empty() && Config.BuildIdLinkOutput) {
- linkToBuildIdDir(Config, Config.OutputFilename,
- Config.BuildIdLinkOutput.getValue(), BuildIdBytes);
- }
+ if (!Config.BuildIdLinkDir.empty() && Config.BuildIdLinkInput)
+ if (Error E =
+ linkToBuildIdDir(Config, Config.InputFilename,
+ Config.BuildIdLinkInput.getValue(), BuildIdBytes))
+ return E;
+
+ if (Error E = handleArgs(Config, *Obj, Reader, OutputElfType))
+ return createFileError(Config.InputFilename, std::move(E));
+
+ if (Error E = writeOutput(Config, *Obj, Out, OutputElfType))
+ return createFileError(Config.InputFilename, std::move(E));
+ if (!Config.BuildIdLinkDir.empty() && Config.BuildIdLinkOutput)
+ if (Error E =
+ linkToBuildIdDir(Config, Config.OutputFilename,
+ Config.BuildIdLinkOutput.getValue(), BuildIdBytes))
+ return createFileError(Config.OutputFilename, std::move(E));
+
+ return Error::success();
}
} // end namespace elf
diff --git a/tools/llvm-objcopy/ELF/ELFObjcopy.h b/tools/llvm-objcopy/ELF/ELFObjcopy.h
index 43f41c00ce5b..e13e237e29c4 100644
--- a/tools/llvm-objcopy/ELF/ELFObjcopy.h
+++ b/tools/llvm-objcopy/ELF/ELFObjcopy.h
@@ -1,9 +1,8 @@
//===- ELFObjcopy.h ---------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -11,6 +10,7 @@
#define LLVM_TOOLS_OBJCOPY_ELFOBJCOPY_H
namespace llvm {
+class Error;
class MemoryBuffer;
namespace object {
@@ -22,10 +22,12 @@ struct CopyConfig;
class Buffer;
namespace elf {
-void executeObjcopyOnRawBinary(const CopyConfig &Config, MemoryBuffer &In,
- Buffer &Out);
-void executeObjcopyOnBinary(const CopyConfig &Config,
- object::ELFObjectFileBase &In, Buffer &Out);
+Error executeObjcopyOnIHex(const CopyConfig &Config, MemoryBuffer &In,
+ Buffer &Out);
+Error executeObjcopyOnRawBinary(const CopyConfig &Config, MemoryBuffer &In,
+ Buffer &Out);
+Error executeObjcopyOnBinary(const CopyConfig &Config,
+ object::ELFObjectFileBase &In, Buffer &Out);
} // end namespace elf
} // end namespace objcopy
diff --git a/tools/llvm-objcopy/ELF/Object.cpp b/tools/llvm-objcopy/ELF/Object.cpp
index 3d3e029c09eb..fa696380e17c 100644
--- a/tools/llvm-objcopy/ELF/Object.cpp
+++ b/tools/llvm-objcopy/ELF/Object.cpp
@@ -1,9 +1,8 @@
//===- Object.cpp ---------------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -18,6 +17,7 @@
#include "llvm/MC/MCTargetOptions.h"
#include "llvm/Object/ELFObjectFile.h"
#include "llvm/Support/Compression.h"
+#include "llvm/Support/Endian.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FileOutputBuffer.h"
#include "llvm/Support/Path.h"
@@ -25,6 +25,7 @@
#include <cstddef>
#include <cstdint>
#include <iterator>
+#include <unordered_set>
#include <utility>
#include <vector>
@@ -36,8 +37,8 @@ using namespace object;
using namespace ELF;
template <class ELFT> void ELFWriter<ELFT>::writePhdr(const Segment &Seg) {
- uint8_t *B = Buf.getBufferStart();
- B += Obj.ProgramHdrSegment.Offset + Seg.Index * sizeof(Elf_Phdr);
+ uint8_t *B = Buf.getBufferStart() + Obj.ProgramHdrSegment.Offset +
+ Seg.Index * sizeof(Elf_Phdr);
Elf_Phdr &Phdr = *reinterpret_cast<Elf_Phdr *>(B);
Phdr.p_type = Seg.Type;
Phdr.p_flags = Seg.Flags;
@@ -49,15 +50,24 @@ template <class ELFT> void ELFWriter<ELFT>::writePhdr(const Segment &Seg) {
Phdr.p_align = Seg.Align;
}
-void SectionBase::removeSectionReferences(const SectionBase *Sec) {}
-void SectionBase::removeSymbols(function_ref<bool(const Symbol &)> ToRemove) {}
+Error SectionBase::removeSectionReferences(
+ bool AllowBrokenLinks,
+ function_ref<bool(const SectionBase *)> ToRemove) {
+ return Error::success();
+}
+
+Error SectionBase::removeSymbols(function_ref<bool(const Symbol &)> ToRemove) {
+ return Error::success();
+}
+
void SectionBase::initialize(SectionTableRef SecTable) {}
void SectionBase::finalize() {}
void SectionBase::markSymbols() {}
+void SectionBase::replaceSectionReferences(
+ const DenseMap<SectionBase *, SectionBase *> &) {}
template <class ELFT> void ELFWriter<ELFT>::writeShdr(const SectionBase &Sec) {
- uint8_t *B = Buf.getBufferStart();
- B += Sec.HeaderOffset;
+ uint8_t *B = Buf.getBufferStart() + Sec.HeaderOffset;
Elf_Shdr &Shdr = *reinterpret_cast<Elf_Shdr *>(B);
Shdr.sh_name = Sec.NameIndex;
Shdr.sh_type = Sec.Type;
@@ -113,30 +123,270 @@ template <class ELFT>
void ELFSectionSizer<ELFT>::visit(DecompressedSection &Sec) {}
void BinarySectionWriter::visit(const SectionIndexSection &Sec) {
- error("Cannot write symbol section index table '" + Sec.Name + "' ");
+ error("cannot write symbol section index table '" + Sec.Name + "' ");
}
void BinarySectionWriter::visit(const SymbolTableSection &Sec) {
- error("Cannot write symbol table '" + Sec.Name + "' out to binary");
+ error("cannot write symbol table '" + Sec.Name + "' out to binary");
}
void BinarySectionWriter::visit(const RelocationSection &Sec) {
- error("Cannot write relocation section '" + Sec.Name + "' out to binary");
+ error("cannot write relocation section '" + Sec.Name + "' out to binary");
}
void BinarySectionWriter::visit(const GnuDebugLinkSection &Sec) {
- error("Cannot write '" + Sec.Name + "' out to binary");
+ error("cannot write '" + Sec.Name + "' out to binary");
}
void BinarySectionWriter::visit(const GroupSection &Sec) {
- error("Cannot write '" + Sec.Name + "' out to binary");
+ error("cannot write '" + Sec.Name + "' out to binary");
}
void SectionWriter::visit(const Section &Sec) {
- if (Sec.Type == SHT_NOBITS)
- return;
- uint8_t *Buf = Out.getBufferStart() + Sec.Offset;
- llvm::copy(Sec.Contents, Buf);
+ if (Sec.Type != SHT_NOBITS)
+ llvm::copy(Sec.Contents, Out.getBufferStart() + Sec.Offset);
+}
+
+static bool addressOverflows32bit(uint64_t Addr) {
+ // Sign extended 32 bit addresses (e.g 0xFFFFFFFF80000000) are ok
+ return Addr > UINT32_MAX && Addr + 0x80000000 > UINT32_MAX;
+}
+
+template <class T> static T checkedGetHex(StringRef S) {
+ T Value;
+ bool Fail = S.getAsInteger(16, Value);
+ assert(!Fail);
+ (void)Fail;
+ return Value;
+}
+
+// Fills exactly Len bytes of buffer with hexadecimal characters
+// representing value 'X'
+template <class T, class Iterator>
+static Iterator utohexstr(T X, Iterator It, size_t Len) {
+ // Fill range with '0'
+ std::fill(It, It + Len, '0');
+
+ for (long I = Len - 1; I >= 0; --I) {
+ unsigned char Mod = static_cast<unsigned char>(X) & 15;
+ *(It + I) = hexdigit(Mod, false);
+ X >>= 4;
+ }
+ assert(X == 0);
+ return It + Len;
+}
+
+uint8_t IHexRecord::getChecksum(StringRef S) {
+ assert((S.size() & 1) == 0);
+ uint8_t Checksum = 0;
+ while (!S.empty()) {
+ Checksum += checkedGetHex<uint8_t>(S.take_front(2));
+ S = S.drop_front(2);
+ }
+ return -Checksum;
+}
+
+IHexLineData IHexRecord::getLine(uint8_t Type, uint16_t Addr,
+ ArrayRef<uint8_t> Data) {
+ IHexLineData Line(getLineLength(Data.size()));
+ assert(Line.size());
+ auto Iter = Line.begin();
+ *Iter++ = ':';
+ Iter = utohexstr(Data.size(), Iter, 2);
+ Iter = utohexstr(Addr, Iter, 4);
+ Iter = utohexstr(Type, Iter, 2);
+ for (uint8_t X : Data)
+ Iter = utohexstr(X, Iter, 2);
+ StringRef S(Line.data() + 1, std::distance(Line.begin() + 1, Iter));
+ Iter = utohexstr(getChecksum(S), Iter, 2);
+ *Iter++ = '\r';
+ *Iter++ = '\n';
+ assert(Iter == Line.end());
+ return Line;
+}
+
+static Error checkRecord(const IHexRecord &R) {
+ switch (R.Type) {
+ case IHexRecord::Data:
+ if (R.HexData.size() == 0)
+ return createStringError(
+ errc::invalid_argument,
+ "zero data length is not allowed for data records");
+ break;
+ case IHexRecord::EndOfFile:
+ break;
+ case IHexRecord::SegmentAddr:
+ // 20-bit segment address. Data length must be 2 bytes
+ // (4 bytes in hex)
+ if (R.HexData.size() != 4)
+ return createStringError(
+ errc::invalid_argument,
+ "segment address data should be 2 bytes in size");
+ break;
+ case IHexRecord::StartAddr80x86:
+ case IHexRecord::StartAddr:
+ if (R.HexData.size() != 8)
+ return createStringError(errc::invalid_argument,
+ "start address data should be 4 bytes in size");
+ // According to Intel HEX specification '03' record
+ // only specifies the code address within the 20-bit
+ // segmented address space of the 8086/80186. This
+ // means 12 high order bits should be zeroes.
+ if (R.Type == IHexRecord::StartAddr80x86 &&
+ R.HexData.take_front(3) != "000")
+ return createStringError(errc::invalid_argument,
+ "start address exceeds 20 bit for 80x86");
+ break;
+ case IHexRecord::ExtendedAddr:
+ // 16-31 bits of linear base address
+ if (R.HexData.size() != 4)
+ return createStringError(
+ errc::invalid_argument,
+ "extended address data should be 2 bytes in size");
+ break;
+ default:
+ // Unknown record type
+ return createStringError(errc::invalid_argument, "unknown record type: %u",
+ static_cast<unsigned>(R.Type));
+ }
+ return Error::success();
+}
+
+// Checks that IHEX line contains valid characters.
+// This allows converting hexadecimal data to integers
+// without extra verification.
+static Error checkChars(StringRef Line) {
+ assert(!Line.empty());
+ if (Line[0] != ':')
+ return createStringError(errc::invalid_argument,
+ "missing ':' in the beginning of line.");
+
+ for (size_t Pos = 1; Pos < Line.size(); ++Pos)
+ if (hexDigitValue(Line[Pos]) == -1U)
+ return createStringError(errc::invalid_argument,
+ "invalid character at position %zu.", Pos + 1);
+ return Error::success();
+}
+
+Expected<IHexRecord> IHexRecord::parse(StringRef Line) {
+ assert(!Line.empty());
+
+ // ':' + Length + Address + Type + Checksum with empty data ':LLAAAATTCC'
+ if (Line.size() < 11)
+ return createStringError(errc::invalid_argument,
+ "line is too short: %zu chars.", Line.size());
+
+ if (Error E = checkChars(Line))
+ return std::move(E);
+
+ IHexRecord Rec;
+ size_t DataLen = checkedGetHex<uint8_t>(Line.substr(1, 2));
+ if (Line.size() != getLength(DataLen))
+ return createStringError(errc::invalid_argument,
+ "invalid line length %zu (should be %zu)",
+ Line.size(), getLength(DataLen));
+
+ Rec.Addr = checkedGetHex<uint16_t>(Line.substr(3, 4));
+ Rec.Type = checkedGetHex<uint8_t>(Line.substr(7, 2));
+ Rec.HexData = Line.substr(9, DataLen * 2);
+
+ if (getChecksum(Line.drop_front(1)) != 0)
+ return createStringError(errc::invalid_argument, "incorrect checksum.");
+ if (Error E = checkRecord(Rec))
+ return std::move(E);
+ return Rec;
+}
+
+static uint64_t sectionPhysicalAddr(const SectionBase *Sec) {
+ Segment *Seg = Sec->ParentSegment;
+ if (Seg && Seg->Type != ELF::PT_LOAD)
+ Seg = nullptr;
+ return Seg ? Seg->PAddr + Sec->OriginalOffset - Seg->OriginalOffset
+ : Sec->Addr;
+}
+
+void IHexSectionWriterBase::writeSection(const SectionBase *Sec,
+ ArrayRef<uint8_t> Data) {
+ assert(Data.size() == Sec->Size);
+ const uint32_t ChunkSize = 16;
+ uint32_t Addr = sectionPhysicalAddr(Sec) & 0xFFFFFFFFU;
+ while (!Data.empty()) {
+ uint64_t DataSize = std::min<uint64_t>(Data.size(), ChunkSize);
+ if (Addr > SegmentAddr + BaseAddr + 0xFFFFU) {
+ if (Addr > 0xFFFFFU) {
+ // Write extended address record, zeroing segment address
+ // if needed.
+ if (SegmentAddr != 0)
+ SegmentAddr = writeSegmentAddr(0U);
+ BaseAddr = writeBaseAddr(Addr);
+ } else {
+ // We can still remain 16-bit
+ SegmentAddr = writeSegmentAddr(Addr);
+ }
+ }
+ uint64_t SegOffset = Addr - BaseAddr - SegmentAddr;
+ assert(SegOffset <= 0xFFFFU);
+ DataSize = std::min(DataSize, 0x10000U - SegOffset);
+ writeData(0, SegOffset, Data.take_front(DataSize));
+ Addr += DataSize;
+ Data = Data.drop_front(DataSize);
+ }
+}
+
+uint64_t IHexSectionWriterBase::writeSegmentAddr(uint64_t Addr) {
+ assert(Addr <= 0xFFFFFU);
+ uint8_t Data[] = {static_cast<uint8_t>((Addr & 0xF0000U) >> 12), 0};
+ writeData(2, 0, Data);
+ return Addr & 0xF0000U;
+}
+
+uint64_t IHexSectionWriterBase::writeBaseAddr(uint64_t Addr) {
+ assert(Addr <= 0xFFFFFFFFU);
+ uint64_t Base = Addr & 0xFFFF0000U;
+ uint8_t Data[] = {static_cast<uint8_t>(Base >> 24),
+ static_cast<uint8_t>((Base >> 16) & 0xFF)};
+ writeData(4, 0, Data);
+ return Base;
+}
+
+void IHexSectionWriterBase::writeData(uint8_t Type, uint16_t Addr,
+ ArrayRef<uint8_t> Data) {
+ Offset += IHexRecord::getLineLength(Data.size());
+}
+
+void IHexSectionWriterBase::visit(const Section &Sec) {
+ writeSection(&Sec, Sec.Contents);
+}
+
+void IHexSectionWriterBase::visit(const OwnedDataSection &Sec) {
+ writeSection(&Sec, Sec.Data);
+}
+
+void IHexSectionWriterBase::visit(const StringTableSection &Sec) {
+ // Check that sizer has already done its work
+ assert(Sec.Size == Sec.StrTabBuilder.getSize());
+ // We are free to pass an invalid pointer to writeSection as long
+ // as we don't actually write any data. The real writer class has
+ // to override this method .
+ writeSection(&Sec, {nullptr, static_cast<size_t>(Sec.Size)});
+}
+
+void IHexSectionWriterBase::visit(const DynamicRelocationSection &Sec) {
+ writeSection(&Sec, Sec.Contents);
+}
+
+void IHexSectionWriter::writeData(uint8_t Type, uint16_t Addr,
+ ArrayRef<uint8_t> Data) {
+ IHexLineData HexData = IHexRecord::getLine(Type, Addr, Data);
+ memcpy(Out.getBufferStart() + Offset, HexData.data(), HexData.size());
+ Offset += HexData.size();
+}
+
+void IHexSectionWriter::visit(const StringTableSection &Sec) {
+ assert(Sec.Size == Sec.StrTabBuilder.getSize());
+ std::vector<uint8_t> Data(Sec.Size);
+ Sec.StrTabBuilder.write(Data.data());
+ writeSection(&Sec, Data);
}
void Section::accept(SectionVisitor &Visitor) const { Visitor.visit(*this); }
@@ -144,8 +394,7 @@ void Section::accept(SectionVisitor &Visitor) const { Visitor.visit(*this); }
void Section::accept(MutableSectionVisitor &Visitor) { Visitor.visit(*this); }
void SectionWriter::visit(const OwnedDataSection &Sec) {
- uint8_t *Buf = Out.getBufferStart() + Sec.Offset;
- llvm::copy(Sec.Data, Buf);
+ llvm::copy(Sec.Data, Out.getBufferStart() + Sec.Offset);
}
static const std::vector<uint8_t> ZlibGnuMagic = {'Z', 'L', 'I', 'B'};
@@ -161,8 +410,7 @@ getDecompressedSizeAndAlignment(ArrayRef<uint8_t> Data) {
const bool IsGnuDebug = isDataGnuCompressed(Data);
const uint64_t DecompressedSize =
IsGnuDebug
- ? support::endian::read64be(reinterpret_cast<const uint64_t *>(
- Data.data() + ZlibGnuMagic.size()))
+ ? support::endian::read64be(Data.data() + ZlibGnuMagic.size())
: reinterpret_cast<const Elf_Chdr_Impl<ELFT> *>(Data.data())->ch_size;
const uint64_t DecompressedAlign =
IsGnuDebug ? 1
@@ -174,13 +422,6 @@ getDecompressedSizeAndAlignment(ArrayRef<uint8_t> Data) {
template <class ELFT>
void ELFSectionWriter<ELFT>::visit(const DecompressedSection &Sec) {
- uint8_t *Buf = Out.getBufferStart() + Sec.Offset;
-
- if (!zlib::isAvailable()) {
- std::copy(Sec.OriginalData.begin(), Sec.OriginalData.end(), Buf);
- return;
- }
-
const size_t DataOffset = isDataGnuCompressed(Sec.OriginalData)
? (ZlibGnuMagic.size() + sizeof(Sec.Size))
: sizeof(Elf_Chdr_Impl<ELFT>);
@@ -194,11 +435,12 @@ void ELFSectionWriter<ELFT>::visit(const DecompressedSection &Sec) {
static_cast<size_t>(Sec.Size)))
reportError(Sec.Name, std::move(E));
+ uint8_t *Buf = Out.getBufferStart() + Sec.Offset;
std::copy(DecompressedContent.begin(), DecompressedContent.end(), Buf);
}
void BinarySectionWriter::visit(const DecompressedSection &Sec) {
- error("Cannot write compressed section '" + Sec.Name + "' ");
+ error("cannot write compressed section '" + Sec.Name + "' ");
}
void DecompressedSection::accept(SectionVisitor &Visitor) const {
@@ -217,15 +459,22 @@ void OwnedDataSection::accept(MutableSectionVisitor &Visitor) {
Visitor.visit(*this);
}
+void OwnedDataSection::appendHexData(StringRef HexData) {
+ assert((HexData.size() & 1) == 0);
+ while (!HexData.empty()) {
+ Data.push_back(checkedGetHex<uint8_t>(HexData.take_front(2)));
+ HexData = HexData.drop_front(2);
+ }
+ Size = Data.size();
+}
+
void BinarySectionWriter::visit(const CompressedSection &Sec) {
- error("Cannot write compressed section '" + Sec.Name + "' ");
+ error("cannot write compressed section '" + Sec.Name + "' ");
}
template <class ELFT>
void ELFSectionWriter<ELFT>::visit(const CompressedSection &Sec) {
- uint8_t *Buf = Out.getBufferStart();
- Buf += Sec.Offset;
-
+ uint8_t *Buf = Out.getBufferStart() + Sec.Offset;
if (Sec.CompressionType == DebugCompressionType::None) {
std::copy(Sec.OriginalData.begin(), Sec.OriginalData.end(), Buf);
return;
@@ -255,12 +504,6 @@ CompressedSection::CompressedSection(const SectionBase &Sec,
DebugCompressionType CompressionType)
: SectionBase(Sec), CompressionType(CompressionType),
DecompressedSize(Sec.OriginalData.size()), DecompressedAlign(Sec.Align) {
-
- if (!zlib::isAvailable()) {
- CompressionType = DebugCompressionType::None;
- return;
- }
-
if (Error E = zlib::compress(
StringRef(reinterpret_cast<const char *>(OriginalData.data()),
OriginalData.size()),
@@ -299,16 +542,16 @@ void CompressedSection::accept(MutableSectionVisitor &Visitor) {
Visitor.visit(*this);
}
-void StringTableSection::addString(StringRef Name) {
- StrTabBuilder.add(Name);
- Size = StrTabBuilder.getSize();
-}
+void StringTableSection::addString(StringRef Name) { StrTabBuilder.add(Name); }
uint32_t StringTableSection::findIndex(StringRef Name) const {
return StrTabBuilder.getOffset(Name);
}
-void StringTableSection::finalize() { StrTabBuilder.finalize(); }
+void StringTableSection::prepareForLayout() {
+ StrTabBuilder.finalize();
+ Size = StrTabBuilder.getSize();
+}
void SectionWriter::visit(const StringTableSection &Sec) {
Sec.StrTabBuilder.write(Out.getBufferStart() + Sec.Offset);
@@ -325,8 +568,7 @@ void StringTableSection::accept(MutableSectionVisitor &Visitor) {
template <class ELFT>
void ELFSectionWriter<ELFT>::visit(const SectionIndexSection &Sec) {
uint8_t *Buf = Out.getBufferStart() + Sec.Offset;
- auto *IndexesBuffer = reinterpret_cast<Elf_Word *>(Buf);
- llvm::copy(Sec.Indexes, IndexesBuffer);
+ llvm::copy(Sec.Indexes, reinterpret_cast<Elf_Word *>(Buf));
}
void SectionIndexSection::initialize(SectionTableRef SecTable) {
@@ -355,6 +597,11 @@ static bool isValidReservedSectionIndex(uint16_t Index, uint16_t Machine) {
case SHN_COMMON:
return true;
}
+
+ if (Machine == EM_AMDGPU) {
+ return Index == SHN_AMDGPU_LDS;
+ }
+
if (Machine == EM_HEXAGON) {
switch (Index) {
case SHN_HEXAGON_SCOMMON:
@@ -376,21 +623,17 @@ uint16_t Symbol::getShndx() const {
return SHN_XINDEX;
return DefinedIn->Index;
}
- switch (ShndxType) {
- // This means that we don't have a defined section but we do need to
- // output a legitimate section index.
- case SYMBOL_SIMPLE_INDEX:
+
+ if (ShndxType == SYMBOL_SIMPLE_INDEX) {
+ // This means that we don't have a defined section but we do need to
+ // output a legitimate section index.
return SHN_UNDEF;
- case SYMBOL_ABS:
- case SYMBOL_COMMON:
- case SYMBOL_HEXAGON_SCOMMON:
- case SYMBOL_HEXAGON_SCOMMON_2:
- case SYMBOL_HEXAGON_SCOMMON_4:
- case SYMBOL_HEXAGON_SCOMMON_8:
- case SYMBOL_XINDEX:
- return static_cast<uint16_t>(ShndxType);
}
- llvm_unreachable("Symbol with invalid ShndxType encountered");
+
+ assert(ShndxType == SYMBOL_ABS || ShndxType == SYMBOL_COMMON ||
+ (ShndxType >= SYMBOL_LOPROC && ShndxType <= SYMBOL_HIPROC) ||
+ (ShndxType >= SYMBOL_LOOS && ShndxType <= SYMBOL_HIOS));
+ return static_cast<uint16_t>(ShndxType);
}
bool Symbol::isCommon() const { return getShndx() == SHN_COMMON; }
@@ -404,7 +647,7 @@ void SymbolTableSection::assignIndices() {
void SymbolTableSection::addSymbol(Twine Name, uint8_t Bind, uint8_t Type,
SectionBase *DefinedIn, uint64_t Value,
uint8_t Visibility, uint16_t Shndx,
- uint64_t Size) {
+ uint64_t SymbolSize) {
Symbol Sym;
Sym.Name = Name.str();
Sym.Binding = Bind;
@@ -420,21 +663,28 @@ void SymbolTableSection::addSymbol(Twine Name, uint8_t Bind, uint8_t Type,
}
Sym.Value = Value;
Sym.Visibility = Visibility;
- Sym.Size = Size;
+ Sym.Size = SymbolSize;
Sym.Index = Symbols.size();
Symbols.emplace_back(llvm::make_unique<Symbol>(Sym));
Size += this->EntrySize;
}
-void SymbolTableSection::removeSectionReferences(const SectionBase *Sec) {
- if (SectionIndexTable == Sec)
+Error SymbolTableSection::removeSectionReferences(
+ bool AllowBrokenLinks,
+ function_ref<bool(const SectionBase *)> ToRemove) {
+ if (ToRemove(SectionIndexTable))
SectionIndexTable = nullptr;
- if (SymbolNames == Sec) {
- error("String table " + SymbolNames->Name +
- " cannot be removed because it is referenced by the symbol table " +
- this->Name);
+ if (ToRemove(SymbolNames)) {
+ if (!AllowBrokenLinks)
+ return createStringError(
+ llvm::errc::invalid_argument,
+ "string table '%s' cannot be removed because it is "
+ "referenced by the symbol table '%s'",
+ SymbolNames->Name.data(), this->Name.data());
+ SymbolNames = nullptr;
}
- removeSymbols([Sec](const Symbol &Sym) { return Sym.DefinedIn == Sec; });
+ return removeSymbols(
+ [ToRemove](const Symbol &Sym) { return ToRemove(Sym.DefinedIn); });
}
void SymbolTableSection::updateSymbols(function_ref<void(Symbol &)> Callable) {
@@ -446,7 +696,7 @@ void SymbolTableSection::updateSymbols(function_ref<void(Symbol &)> Callable) {
assignIndices();
}
-void SymbolTableSection::removeSymbols(
+Error SymbolTableSection::removeSymbols(
function_ref<bool(const Symbol &)> ToRemove) {
Symbols.erase(
std::remove_if(std::begin(Symbols) + 1, std::end(Symbols),
@@ -454,6 +704,14 @@ void SymbolTableSection::removeSymbols(
std::end(Symbols));
Size = Symbols.size() * EntrySize;
assignIndices();
+ return Error::success();
+}
+
+void SymbolTableSection::replaceSectionReferences(
+ const DenseMap<SectionBase *, SectionBase *> &FromTo) {
+ for (std::unique_ptr<Symbol> &Sym : Symbols)
+ if (SectionBase *To = FromTo.lookup(Sym->DefinedIn))
+ Sym->DefinedIn = To;
}
void SymbolTableSection::initialize(SectionTableRef SecTable) {
@@ -467,40 +725,50 @@ void SymbolTableSection::initialize(SectionTableRef SecTable) {
}
void SymbolTableSection::finalize() {
- // Make sure SymbolNames is finalized before getting name indexes.
- SymbolNames->finalize();
-
uint32_t MaxLocalIndex = 0;
- for (auto &Sym : Symbols) {
- Sym->NameIndex = SymbolNames->findIndex(Sym->Name);
+ for (std::unique_ptr<Symbol> &Sym : Symbols) {
+ Sym->NameIndex =
+ SymbolNames == nullptr ? 0 : SymbolNames->findIndex(Sym->Name);
if (Sym->Binding == STB_LOCAL)
MaxLocalIndex = std::max(MaxLocalIndex, Sym->Index);
}
// Now we need to set the Link and Info fields.
- Link = SymbolNames->Index;
+ Link = SymbolNames == nullptr ? 0 : SymbolNames->Index;
Info = MaxLocalIndex + 1;
}
void SymbolTableSection::prepareForLayout() {
- // Add all potential section indexes before file layout so that the section
- // index section has the approprite size.
- if (SectionIndexTable != nullptr) {
- for (const auto &Sym : Symbols) {
- if (Sym->DefinedIn != nullptr && Sym->DefinedIn->Index >= SHN_LORESERVE)
- SectionIndexTable->addIndex(Sym->DefinedIn->Index);
- else
- SectionIndexTable->addIndex(SHN_UNDEF);
- }
- }
+ // Reserve proper amount of space in section index table, so we can
+ // layout sections correctly. We will fill the table with correct
+ // indexes later in fillShdnxTable.
+ if (SectionIndexTable)
+ SectionIndexTable->reserve(Symbols.size());
+
// Add all of our strings to SymbolNames so that SymbolNames has the right
// size before layout is decided.
- for (auto &Sym : Symbols)
- SymbolNames->addString(Sym->Name);
+ // If the symbol names section has been removed, don't try to add strings to
+ // the table.
+ if (SymbolNames != nullptr)
+ for (std::unique_ptr<Symbol> &Sym : Symbols)
+ SymbolNames->addString(Sym->Name);
+}
+
+void SymbolTableSection::fillShndxTable() {
+ if (SectionIndexTable == nullptr)
+ return;
+ // Fill section index table with real section indexes. This function must
+ // be called after assignOffsets.
+ for (const std::unique_ptr<Symbol> &Sym : Symbols) {
+ if (Sym->DefinedIn != nullptr && Sym->DefinedIn->Index >= SHN_LORESERVE)
+ SectionIndexTable->addIndex(Sym->DefinedIn->Index);
+ else
+ SectionIndexTable->addIndex(SHN_UNDEF);
+ }
}
const Symbol *SymbolTableSection::getSymbolByIndex(uint32_t Index) const {
if (Symbols.size() <= Index)
- error("Invalid symbol index: " + Twine(Index));
+ error("invalid symbol index: " + Twine(Index));
return Symbols[Index].get();
}
@@ -511,11 +779,9 @@ Symbol *SymbolTableSection::getSymbolByIndex(uint32_t Index) {
template <class ELFT>
void ELFSectionWriter<ELFT>::visit(const SymbolTableSection &Sec) {
- uint8_t *Buf = Out.getBufferStart();
- Buf += Sec.Offset;
- Elf_Sym *Sym = reinterpret_cast<Elf_Sym *>(Buf);
+ Elf_Sym *Sym = reinterpret_cast<Elf_Sym *>(Out.getBufferStart() + Sec.Offset);
// Loop though symbols setting each entry of the symbol table.
- for (auto &Symbol : Sec.Symbols) {
+ for (const std::unique_ptr<Symbol> &Symbol : Sec.Symbols) {
Sym->st_name = Symbol->NameIndex;
Sym->st_value = Symbol->Value;
Sym->st_size = Symbol->Size;
@@ -535,16 +801,31 @@ void SymbolTableSection::accept(MutableSectionVisitor &Visitor) {
Visitor.visit(*this);
}
-template <class SymTabType>
-void RelocSectionWithSymtabBase<SymTabType>::removeSectionReferences(
- const SectionBase *Sec) {
- if (Symbols == Sec) {
- error("Symbol table " + Symbols->Name +
- " cannot be removed because it is "
- "referenced by the relocation "
- "section " +
- this->Name);
+Error RelocationSection::removeSectionReferences(
+ bool AllowBrokenLinks,
+ function_ref<bool(const SectionBase *)> ToRemove) {
+ if (ToRemove(Symbols)) {
+ if (!AllowBrokenLinks)
+ return createStringError(
+ llvm::errc::invalid_argument,
+ "symbol table '%s' cannot be removed because it is "
+ "referenced by the relocation section '%s'",
+ Symbols->Name.data(), this->Name.data());
+ Symbols = nullptr;
}
+
+ for (const Relocation &R : Relocations) {
+ if (!R.RelocSymbol->DefinedIn || !ToRemove(R.RelocSymbol->DefinedIn))
+ continue;
+ return createStringError(llvm::errc::invalid_argument,
+ "section '%s' cannot be removed: (%s+0x%" PRIx64
+ ") has relocation against symbol '%s'",
+ R.RelocSymbol->DefinedIn->Name.data(),
+ SecToApplyRel->Name.data(), R.Offset,
+ R.RelocSymbol->Name.c_str());
+ }
+
+ return Error::success();
}
template <class SymTabType>
@@ -609,12 +890,15 @@ void RelocationSection::accept(MutableSectionVisitor &Visitor) {
Visitor.visit(*this);
}
-void RelocationSection::removeSymbols(
+Error RelocationSection::removeSymbols(
function_ref<bool(const Symbol &)> ToRemove) {
for (const Relocation &Reloc : Relocations)
if (ToRemove(*Reloc.RelocSymbol))
- error("not stripping symbol '" + Reloc.RelocSymbol->Name +
- "' because it is named in a relocation");
+ return createStringError(
+ llvm::errc::invalid_argument,
+ "not stripping symbol '%s' because it is named in a relocation",
+ Reloc.RelocSymbol->Name.data());
+ return Error::success();
}
void RelocationSection::markSymbols() {
@@ -622,9 +906,15 @@ void RelocationSection::markSymbols() {
Reloc.RelocSymbol->Referenced = true;
}
+void RelocationSection::replaceSectionReferences(
+ const DenseMap<SectionBase *, SectionBase *> &FromTo) {
+ // Update the target section if it was replaced.
+ if (SectionBase *To = FromTo.lookup(SecToApplyRel))
+ SecToApplyRel = To;
+}
+
void SectionWriter::visit(const DynamicRelocationSection &Sec) {
- llvm::copy(Sec.Contents,
- Out.getBufferStart() + Sec.Offset);
+ llvm::copy(Sec.Contents, Out.getBufferStart() + Sec.Offset);
}
void DynamicRelocationSection::accept(SectionVisitor &Visitor) const {
@@ -635,13 +925,38 @@ void DynamicRelocationSection::accept(MutableSectionVisitor &Visitor) {
Visitor.visit(*this);
}
-void Section::removeSectionReferences(const SectionBase *Sec) {
- if (LinkSection == Sec) {
- error("Section " + LinkSection->Name +
- " cannot be removed because it is "
- "referenced by the section " +
- this->Name);
+Error DynamicRelocationSection::removeSectionReferences(
+ bool AllowBrokenLinks, function_ref<bool(const SectionBase *)> ToRemove) {
+ if (ToRemove(Symbols)) {
+ if (!AllowBrokenLinks)
+ return createStringError(
+ llvm::errc::invalid_argument,
+ "symbol table '%s' cannot be removed because it is "
+ "referenced by the relocation section '%s'",
+ Symbols->Name.data(), this->Name.data());
+ Symbols = nullptr;
+ }
+
+ // SecToApplyRel contains a section referenced by sh_info field. It keeps
+ // a section to which the relocation section applies. When we remove any
+ // sections we also remove their relocation sections. Since we do that much
+ // earlier, this assert should never be triggered.
+ assert(!SecToApplyRel || !ToRemove(SecToApplyRel));
+ return Error::success();
+}
+
+Error Section::removeSectionReferences(
+ bool AllowBrokenDependency,
+ function_ref<bool(const SectionBase *)> ToRemove) {
+ if (ToRemove(LinkSection)) {
+ if (!AllowBrokenDependency)
+ return createStringError(llvm::errc::invalid_argument,
+ "section '%s' cannot be removed because it is "
+ "referenced by the section '%s'",
+ LinkSection->Name.data(), this->Name.data());
+ LinkSection = nullptr;
}
+ return Error::success();
}
void GroupSection::finalize() {
@@ -649,13 +964,13 @@ void GroupSection::finalize() {
this->Link = SymTab->Index;
}
-void GroupSection::removeSymbols(function_ref<bool(const Symbol &)> ToRemove) {
- if (ToRemove(*Sym)) {
- error("Symbol " + Sym->Name +
- " cannot be removed because it is "
- "referenced by the section " +
- this->Name + "[" + Twine(this->Index) + "]");
- }
+Error GroupSection::removeSymbols(function_ref<bool(const Symbol &)> ToRemove) {
+ if (ToRemove(*Sym))
+ return createStringError(llvm::errc::invalid_argument,
+ "symbol '%s' cannot be removed because it is "
+ "referenced by the section '%s[%d]'",
+ Sym->Name.data(), this->Name.data(), this->Index);
+ return Error::success();
}
void GroupSection::markSymbols() {
@@ -663,19 +978,26 @@ void GroupSection::markSymbols() {
Sym->Referenced = true;
}
+void GroupSection::replaceSectionReferences(
+ const DenseMap<SectionBase *, SectionBase *> &FromTo) {
+ for (SectionBase *&Sec : GroupMembers)
+ if (SectionBase *To = FromTo.lookup(Sec))
+ Sec = To;
+}
+
void Section::initialize(SectionTableRef SecTable) {
- if (Link != ELF::SHN_UNDEF) {
- LinkSection =
- SecTable.getSection(Link, "Link field value " + Twine(Link) +
- " in section " + Name + " is invalid");
- if (LinkSection->Type == ELF::SHT_SYMTAB)
- LinkSection = nullptr;
- }
+ if (Link == ELF::SHN_UNDEF)
+ return;
+ LinkSection =
+ SecTable.getSection(Link, "Link field value " + Twine(Link) +
+ " in section " + Name + " is invalid");
+ if (LinkSection->Type == ELF::SHT_SYMTAB)
+ LinkSection = nullptr;
}
void Section::finalize() { this->Link = LinkSection ? LinkSection->Index : 0; }
-void GnuDebugLinkSection::init(StringRef File, StringRef Data) {
+void GnuDebugLinkSection::init(StringRef File) {
FileName = sys::path::filename(File);
// The format for the .gnu_debuglink starts with the file name and is
// followed by a null terminator and then the CRC32 of the file. The CRC32
@@ -690,31 +1012,21 @@ void GnuDebugLinkSection::init(StringRef File, StringRef Data) {
// establish the order that sections should go in. By using the maximum
// possible offset we cause this section to wind up at the end.
OriginalOffset = std::numeric_limits<uint64_t>::max();
- JamCRC CRC;
- CRC.update(ArrayRef<char>(Data.data(), Data.size()));
- // The CRC32 value needs to be complemented because the JamCRC dosn't
- // finalize the CRC32 value. It also dosn't negate the initial CRC32 value
- // but it starts by default at 0xFFFFFFFF which is the complement of zero.
- CRC32 = ~CRC.getCRC();
}
-GnuDebugLinkSection::GnuDebugLinkSection(StringRef File) : FileName(File) {
- // Read in the file to compute the CRC of it.
- auto DebugOrErr = MemoryBuffer::getFile(File);
- if (!DebugOrErr)
- error("'" + File + "': " + DebugOrErr.getError().message());
- auto Debug = std::move(*DebugOrErr);
- init(File, Debug->getBuffer());
+GnuDebugLinkSection::GnuDebugLinkSection(StringRef File,
+ uint32_t PrecomputedCRC)
+ : FileName(File), CRC32(PrecomputedCRC) {
+ init(File);
}
template <class ELFT>
void ELFSectionWriter<ELFT>::visit(const GnuDebugLinkSection &Sec) {
- auto Buf = Out.getBufferStart() + Sec.Offset;
- char *File = reinterpret_cast<char *>(Buf);
+ unsigned char *Buf = Out.getBufferStart() + Sec.Offset;
Elf_Word *CRC =
reinterpret_cast<Elf_Word *>(Buf + Sec.Size - sizeof(Elf_Word));
*CRC = Sec.CRC32;
- llvm::copy(Sec.FileName, File);
+ llvm::copy(Sec.FileName, Buf);
}
void GnuDebugLinkSection::accept(SectionVisitor &Visitor) const {
@@ -730,7 +1042,7 @@ void ELFSectionWriter<ELFT>::visit(const GroupSection &Sec) {
ELF::Elf32_Word *Buf =
reinterpret_cast<ELF::Elf32_Word *>(Out.getBufferStart() + Sec.Offset);
*Buf++ = Sec.FlagWord;
- for (const auto *S : Sec.GroupMembers)
+ for (SectionBase *S : Sec.GroupMembers)
support::endian::write32<ELFT::TargetEndianness>(Buf++, S->Index);
}
@@ -750,6 +1062,20 @@ static bool sectionWithinSegment(const SectionBase &Section,
// segments and ensures that the section "belongs" to the second segment and
// not the first.
uint64_t SecSize = Section.Size ? Section.Size : 1;
+
+ if (Section.Type == SHT_NOBITS) {
+ if (!(Section.Flags & SHF_ALLOC))
+ return false;
+
+ bool SectionIsTLS = Section.Flags & SHF_TLS;
+ bool SegmentIsTLS = Segment.Type == PT_TLS;
+ if (SectionIsTLS != SegmentIsTLS)
+ return false;
+
+ return Segment.VAddr <= Section.Addr &&
+ Segment.VAddr + Segment.MemSize >= Section.Addr + SecSize;
+ }
+
return Segment.Offset <= Section.OriginalOffset &&
Segment.Offset + Segment.FileSize >= Section.OriginalOffset + SecSize;
}
@@ -781,7 +1107,7 @@ static bool compareSegmentsByPAddr(const Segment *A, const Segment *B) {
return A->Index < B->Index;
}
-void BinaryELFBuilder::initFileHeader() {
+void BasicELFBuilder::initFileHeader() {
Obj->Flags = 0x0;
Obj->Type = ET_REL;
Obj->OSABI = ELFOSABI_NONE;
@@ -791,9 +1117,9 @@ void BinaryELFBuilder::initFileHeader() {
Obj->Version = 1;
}
-void BinaryELFBuilder::initHeaderSegment() { Obj->ElfHdrSegment.Index = 0; }
+void BasicELFBuilder::initHeaderSegment() { Obj->ElfHdrSegment.Index = 0; }
-StringTableSection *BinaryELFBuilder::addStrTab() {
+StringTableSection *BasicELFBuilder::addStrTab() {
auto &StrTab = Obj->addSection<StringTableSection>();
StrTab.Name = ".strtab";
@@ -801,7 +1127,7 @@ StringTableSection *BinaryELFBuilder::addStrTab() {
return &StrTab;
}
-SymbolTableSection *BinaryELFBuilder::addSymTab(StringTableSection *StrTab) {
+SymbolTableSection *BasicELFBuilder::addSymTab(StringTableSection *StrTab) {
auto &SymTab = Obj->addSection<SymbolTableSection>();
SymTab.Name = ".symtab";
@@ -814,6 +1140,11 @@ SymbolTableSection *BinaryELFBuilder::addSymTab(StringTableSection *StrTab) {
return &SymTab;
}
+void BasicELFBuilder::initSections() {
+ for (auto &Section : Obj->sections())
+ Section.initialize(Obj->sections());
+}
+
void BinaryELFBuilder::addData(SymbolTableSection *SymTab) {
auto Data = ArrayRef<uint8_t>(
reinterpret_cast<const uint8_t *>(MemBuf->getBufferStart()),
@@ -837,25 +1168,75 @@ void BinaryELFBuilder::addData(SymbolTableSection *SymTab) {
/*Value=*/DataSection.Size, STV_DEFAULT, SHN_ABS, 0);
}
-void BinaryELFBuilder::initSections() {
- for (auto &Section : Obj->sections()) {
- Section.initialize(Obj->sections());
+std::unique_ptr<Object> BinaryELFBuilder::build() {
+ initFileHeader();
+ initHeaderSegment();
+
+ SymbolTableSection *SymTab = addSymTab(addStrTab());
+ initSections();
+ addData(SymTab);
+
+ return std::move(Obj);
+}
+
+// Adds sections from IHEX data file. Data should have been
+// fully validated by this time.
+void IHexELFBuilder::addDataSections() {
+ OwnedDataSection *Section = nullptr;
+ uint64_t SegmentAddr = 0, BaseAddr = 0;
+ uint32_t SecNo = 1;
+
+ for (const IHexRecord &R : Records) {
+ uint64_t RecAddr;
+ switch (R.Type) {
+ case IHexRecord::Data:
+ // Ignore empty data records
+ if (R.HexData.empty())
+ continue;
+ RecAddr = R.Addr + SegmentAddr + BaseAddr;
+ if (!Section || Section->Addr + Section->Size != RecAddr)
+ // OriginalOffset field is only used to sort section properly, so
+ // instead of keeping track of real offset in IHEX file, we use
+ // section number.
+ Section = &Obj->addSection<OwnedDataSection>(
+ ".sec" + std::to_string(SecNo++), RecAddr,
+ ELF::SHF_ALLOC | ELF::SHF_WRITE, SecNo);
+ Section->appendHexData(R.HexData);
+ break;
+ case IHexRecord::EndOfFile:
+ break;
+ case IHexRecord::SegmentAddr:
+ // 20-bit segment address.
+ SegmentAddr = checkedGetHex<uint16_t>(R.HexData) << 4;
+ break;
+ case IHexRecord::StartAddr80x86:
+ case IHexRecord::StartAddr:
+ Obj->Entry = checkedGetHex<uint32_t>(R.HexData);
+ assert(Obj->Entry <= 0xFFFFFU);
+ break;
+ case IHexRecord::ExtendedAddr:
+ // 16-31 bits of linear base address
+ BaseAddr = checkedGetHex<uint16_t>(R.HexData) << 16;
+ break;
+ default:
+ llvm_unreachable("unknown record type");
+ }
}
}
-std::unique_ptr<Object> BinaryELFBuilder::build() {
+std::unique_ptr<Object> IHexELFBuilder::build() {
initFileHeader();
initHeaderSegment();
StringTableSection *StrTab = addStrTab();
- SymbolTableSection *SymTab = addSymTab(StrTab);
+ addSymTab(StrTab);
initSections();
- addData(SymTab);
+ addDataSections();
return std::move(Obj);
}
template <class ELFT> void ELFBuilder<ELFT>::setParentSegment(Segment &Child) {
- for (auto &Parent : Obj.segments()) {
+ for (Segment &Parent : Obj.segments()) {
// Every segment will overlap with itself but we don't want a segment to
// be it's own parent so we avoid that situation.
if (&Child != &Parent && segmentOverlapsSegment(Child, Parent)) {
@@ -870,23 +1251,43 @@ template <class ELFT> void ELFBuilder<ELFT>::setParentSegment(Segment &Child) {
}
}
-template <class ELFT> void ELFBuilder<ELFT>::readProgramHeaders() {
+template <class ELFT> void ELFBuilder<ELFT>::findEhdrOffset() {
+ if (!ExtractPartition)
+ return;
+
+ for (const SectionBase &Section : Obj.sections()) {
+ if (Section.Type == SHT_LLVM_PART_EHDR &&
+ Section.Name == *ExtractPartition) {
+ EhdrOffset = Section.Offset;
+ return;
+ }
+ }
+ error("could not find partition named '" + *ExtractPartition + "'");
+}
+
+template <class ELFT>
+void ELFBuilder<ELFT>::readProgramHeaders(const ELFFile<ELFT> &HeadersFile) {
uint32_t Index = 0;
- for (const auto &Phdr : unwrapOrError(ElfFile.program_headers())) {
- ArrayRef<uint8_t> Data{ElfFile.base() + Phdr.p_offset,
+ for (const auto &Phdr : unwrapOrError(HeadersFile.program_headers())) {
+ if (Phdr.p_offset + Phdr.p_filesz > HeadersFile.getBufSize())
+ error("program header with offset 0x" + Twine::utohexstr(Phdr.p_offset) +
+ " and file size 0x" + Twine::utohexstr(Phdr.p_filesz) +
+ " goes past the end of the file");
+
+ ArrayRef<uint8_t> Data{HeadersFile.base() + Phdr.p_offset,
(size_t)Phdr.p_filesz};
Segment &Seg = Obj.addSegment(Data);
Seg.Type = Phdr.p_type;
Seg.Flags = Phdr.p_flags;
- Seg.OriginalOffset = Phdr.p_offset;
- Seg.Offset = Phdr.p_offset;
+ Seg.OriginalOffset = Phdr.p_offset + EhdrOffset;
+ Seg.Offset = Phdr.p_offset + EhdrOffset;
Seg.VAddr = Phdr.p_vaddr;
Seg.PAddr = Phdr.p_paddr;
Seg.FileSize = Phdr.p_filesz;
Seg.MemSize = Phdr.p_memsz;
Seg.Align = Phdr.p_align;
Seg.Index = Index++;
- for (auto &Section : Obj.sections()) {
+ for (SectionBase &Section : Obj.sections()) {
if (sectionWithinSegment(Section, Seg)) {
Seg.addSection(&Section);
if (!Section.ParentSegment ||
@@ -899,8 +1300,9 @@ template <class ELFT> void ELFBuilder<ELFT>::readProgramHeaders() {
auto &ElfHdr = Obj.ElfHdrSegment;
ElfHdr.Index = Index++;
+ ElfHdr.OriginalOffset = ElfHdr.Offset = EhdrOffset;
- const auto &Ehdr = *ElfFile.getHeader();
+ const auto &Ehdr = *HeadersFile.getHeader();
auto &PrHdr = Obj.ProgramHdrSegment;
PrHdr.Type = PT_PHDR;
PrHdr.Flags = 0;
@@ -908,7 +1310,7 @@ template <class ELFT> void ELFBuilder<ELFT>::readProgramHeaders() {
// Whereas this works automatically for ElfHdr, here OriginalOffset is
// always non-zero and to ensure the equation we assign the same value to
// VAddr as well.
- PrHdr.OriginalOffset = PrHdr.Offset = PrHdr.VAddr = Ehdr.e_phoff;
+ PrHdr.OriginalOffset = PrHdr.Offset = PrHdr.VAddr = EhdrOffset + Ehdr.e_phoff;
PrHdr.PAddr = 0;
PrHdr.FileSize = PrHdr.MemSize = Ehdr.e_phentsize * Ehdr.e_phnum;
// The spec requires us to naturally align all the fields.
@@ -917,7 +1319,7 @@ template <class ELFT> void ELFBuilder<ELFT>::readProgramHeaders() {
// Now we do an O(n^2) loop through the segments in order to match up
// segments.
- for (auto &Child : Obj.segments())
+ for (Segment &Child : Obj.segments())
setParentSegment(Child);
setParentSegment(ElfHdr);
setParentSegment(PrHdr);
@@ -925,22 +1327,25 @@ template <class ELFT> void ELFBuilder<ELFT>::readProgramHeaders() {
template <class ELFT>
void ELFBuilder<ELFT>::initGroupSection(GroupSection *GroupSec) {
- auto SecTable = Obj.sections();
+ if (GroupSec->Align % sizeof(ELF::Elf32_Word) != 0)
+ error("invalid alignment " + Twine(GroupSec->Align) + " of group section '" +
+ GroupSec->Name + "'");
+ SectionTableRef SecTable = Obj.sections();
auto SymTab = SecTable.template getSectionOfType<SymbolTableSection>(
GroupSec->Link,
- "Link field value " + Twine(GroupSec->Link) + " in section " +
- GroupSec->Name + " is invalid",
- "Link field value " + Twine(GroupSec->Link) + " in section " +
- GroupSec->Name + " is not a symbol table");
- auto Sym = SymTab->getSymbolByIndex(GroupSec->Info);
+ "link field value '" + Twine(GroupSec->Link) + "' in section '" +
+ GroupSec->Name + "' is invalid",
+ "link field value '" + Twine(GroupSec->Link) + "' in section '" +
+ GroupSec->Name + "' is not a symbol table");
+ Symbol *Sym = SymTab->getSymbolByIndex(GroupSec->Info);
if (!Sym)
- error("Info field value " + Twine(GroupSec->Info) + " in section " +
- GroupSec->Name + " is not a valid symbol index");
+ error("info field value '" + Twine(GroupSec->Info) + "' in section '" +
+ GroupSec->Name + "' is not a valid symbol index");
GroupSec->setSymTab(SymTab);
GroupSec->setSymbol(Sym);
if (GroupSec->Contents.size() % sizeof(ELF::Elf32_Word) ||
GroupSec->Contents.empty())
- error("The content of the section " + GroupSec->Name + " is malformed");
+ error("the content of the section " + GroupSec->Name + " is malformed");
const ELF::Elf32_Word *Word =
reinterpret_cast<const ELF::Elf32_Word *>(GroupSec->Contents.data());
const ELF::Elf32_Word *End =
@@ -949,8 +1354,8 @@ void ELFBuilder<ELFT>::initGroupSection(GroupSection *GroupSec) {
for (; Word != End; ++Word) {
uint32_t Index = support::endian::read32<ELFT::TargetEndianness>(Word);
GroupSec->addMember(SecTable.getSection(
- Index, "Group member index " + Twine(Index) + " in section " +
- GroupSec->Name + " is invalid"));
+ Index, "group member index " + Twine(Index) + " in section '" +
+ GroupSec->Name + "' is invalid"));
}
}
@@ -967,31 +1372,31 @@ void ELFBuilder<ELFT>::initSymbolTable(SymbolTableSection *SymTab) {
if (Sym.st_shndx == SHN_XINDEX) {
if (SymTab->getShndxTable() == nullptr)
- error("Symbol '" + Name +
- "' has index SHN_XINDEX but no SHT_SYMTAB_SHNDX section exists.");
+ error("symbol '" + Name +
+ "' has index SHN_XINDEX but no SHT_SYMTAB_SHNDX section exists");
if (ShndxData.data() == nullptr) {
const Elf_Shdr &ShndxSec =
*unwrapOrError(ElfFile.getSection(SymTab->getShndxTable()->Index));
ShndxData = unwrapOrError(
ElfFile.template getSectionContentsAsArray<Elf_Word>(&ShndxSec));
if (ShndxData.size() != Symbols.size())
- error("Symbol section index table does not have the same number of "
- "entries as the symbol table.");
+ error("symbol section index table does not have the same number of "
+ "entries as the symbol table");
}
Elf_Word Index = ShndxData[&Sym - Symbols.begin()];
DefSection = Obj.sections().getSection(
Index,
- "Symbol '" + Name + "' has invalid section index " + Twine(Index));
+ "symbol '" + Name + "' has invalid section index " + Twine(Index));
} else if (Sym.st_shndx >= SHN_LORESERVE) {
if (!isValidReservedSectionIndex(Sym.st_shndx, Obj.Machine)) {
error(
- "Symbol '" + Name +
+ "symbol '" + Name +
"' has unsupported value greater than or equal to SHN_LORESERVE: " +
Twine(Sym.st_shndx));
}
} else if (Sym.st_shndx != SHN_UNDEF) {
DefSection = Obj.sections().getSection(
- Sym.st_shndx, "Symbol '" + Name +
+ Sym.st_shndx, "symbol '" + Name +
"' is defined has invalid section index " +
Twine(Sym.st_shndx));
}
@@ -1086,7 +1491,8 @@ SectionBase &ELFBuilder<ELFT>::makeSection(const Elf_Shdr &Shdr) {
default: {
Data = unwrapOrError(ElfFile.getSectionContents(&Shdr));
- if (isDataGnuCompressed(Data) || (Shdr.sh_flags & ELF::SHF_COMPRESSED)) {
+ StringRef Name = unwrapOrError(ElfFile.getSectionName(&Shdr));
+ if (Name.startswith(".zdebug") || (Shdr.sh_flags & ELF::SHF_COMPRESSED)) {
uint64_t DecompressedSize, DecompressedAlign;
std::tie(DecompressedSize, DecompressedAlign) =
getDecompressedSizeAndAlignment<ELFT>(Data);
@@ -1123,7 +1529,9 @@ template <class ELFT> void ELFBuilder<ELFT>::readSectionHeaders() {
ArrayRef<uint8_t>(ElfFile.base() + Shdr.sh_offset,
(Shdr.sh_type == SHT_NOBITS) ? 0 : Shdr.sh_size);
}
+}
+template <class ELFT> void ELFBuilder<ELFT>::readSections() {
// If a section index table exists we'll need to initialize it before we
// initialize the symbol table because the symbol table might need to
// reference it.
@@ -1157,11 +1565,34 @@ template <class ELFT> void ELFBuilder<ELFT>::readSectionHeaders() {
initGroupSection(GroupSec);
}
}
+
+ uint32_t ShstrIndex = ElfFile.getHeader()->e_shstrndx;
+ if (ShstrIndex == SHN_XINDEX)
+ ShstrIndex = unwrapOrError(ElfFile.getSection(0))->sh_link;
+
+ if (ShstrIndex == SHN_UNDEF)
+ Obj.HadShdrs = false;
+ else
+ Obj.SectionNames =
+ Obj.sections().template getSectionOfType<StringTableSection>(
+ ShstrIndex,
+ "e_shstrndx field value " + Twine(ShstrIndex) + " in elf header " +
+ " is invalid",
+ "e_shstrndx field value " + Twine(ShstrIndex) + " in elf header " +
+ " is not a string table");
}
template <class ELFT> void ELFBuilder<ELFT>::build() {
- const auto &Ehdr = *ElfFile.getHeader();
+ readSectionHeaders();
+ findEhdrOffset();
+
+ // The ELFFile whose ELF headers and program headers are copied into the
+ // output file. Normally the same as ElfFile, but if we're extracting a
+ // loadable partition it will point to the partition's headers.
+ ELFFile<ELFT> HeadersFile = unwrapOrError(ELFFile<ELFT>::create(toStringRef(
+ {ElfFile.base() + EhdrOffset, ElfFile.getBufSize() - EhdrOffset})));
+ auto &Ehdr = *HeadersFile.getHeader();
Obj.OSABI = Ehdr.e_ident[EI_OSABI];
Obj.ABIVersion = Ehdr.e_ident[EI_ABIVERSION];
Obj.Type = Ehdr.e_type;
@@ -1170,25 +1601,8 @@ template <class ELFT> void ELFBuilder<ELFT>::build() {
Obj.Entry = Ehdr.e_entry;
Obj.Flags = Ehdr.e_flags;
- readSectionHeaders();
- readProgramHeaders();
-
- uint32_t ShstrIndex = Ehdr.e_shstrndx;
- if (ShstrIndex == SHN_XINDEX)
- ShstrIndex = unwrapOrError(ElfFile.getSection(0))->sh_link;
-
- Obj.SectionNames =
- Obj.sections().template getSectionOfType<StringTableSection>(
- ShstrIndex,
- "e_shstrndx field value " + Twine(Ehdr.e_shstrndx) +
- " in elf header " + " is invalid",
- "e_shstrndx field value " + Twine(Ehdr.e_shstrndx) +
- " in elf header " + " is not a string table");
-}
-
-// A generic size function which computes sizes of any random access range.
-template <class R> size_t size(R &&Range) {
- return static_cast<size_t>(std::end(Range) - std::begin(Range));
+ readSections();
+ readProgramHeaders(HeadersFile);
}
Writer::~Writer() {}
@@ -1199,31 +1613,61 @@ std::unique_ptr<Object> BinaryReader::create() const {
return BinaryELFBuilder(MInfo.EMachine, MemBuf).build();
}
+Expected<std::vector<IHexRecord>> IHexReader::parse() const {
+ SmallVector<StringRef, 16> Lines;
+ std::vector<IHexRecord> Records;
+ bool HasSections = false;
+
+ MemBuf->getBuffer().split(Lines, '\n');
+ Records.reserve(Lines.size());
+ for (size_t LineNo = 1; LineNo <= Lines.size(); ++LineNo) {
+ StringRef Line = Lines[LineNo - 1].trim();
+ if (Line.empty())
+ continue;
+
+ Expected<IHexRecord> R = IHexRecord::parse(Line);
+ if (!R)
+ return parseError(LineNo, R.takeError());
+ if (R->Type == IHexRecord::EndOfFile)
+ break;
+ HasSections |= (R->Type == IHexRecord::Data);
+ Records.push_back(*R);
+ }
+ if (!HasSections)
+ return parseError(-1U, "no sections");
+
+ return std::move(Records);
+}
+
+std::unique_ptr<Object> IHexReader::create() const {
+ std::vector<IHexRecord> Records = unwrapOrError(parse());
+ return IHexELFBuilder(Records).build();
+}
+
std::unique_ptr<Object> ELFReader::create() const {
auto Obj = llvm::make_unique<Object>();
if (auto *O = dyn_cast<ELFObjectFile<ELF32LE>>(Bin)) {
- ELFBuilder<ELF32LE> Builder(*O, *Obj);
+ ELFBuilder<ELF32LE> Builder(*O, *Obj, ExtractPartition);
Builder.build();
return Obj;
} else if (auto *O = dyn_cast<ELFObjectFile<ELF64LE>>(Bin)) {
- ELFBuilder<ELF64LE> Builder(*O, *Obj);
+ ELFBuilder<ELF64LE> Builder(*O, *Obj, ExtractPartition);
Builder.build();
return Obj;
} else if (auto *O = dyn_cast<ELFObjectFile<ELF32BE>>(Bin)) {
- ELFBuilder<ELF32BE> Builder(*O, *Obj);
+ ELFBuilder<ELF32BE> Builder(*O, *Obj, ExtractPartition);
Builder.build();
return Obj;
} else if (auto *O = dyn_cast<ELFObjectFile<ELF64BE>>(Bin)) {
- ELFBuilder<ELF64BE> Builder(*O, *Obj);
+ ELFBuilder<ELF64BE> Builder(*O, *Obj, ExtractPartition);
Builder.build();
return Obj;
}
- error("Invalid file type");
+ error("invalid file type");
}
template <class ELFT> void ELFWriter<ELFT>::writeEhdr() {
- uint8_t *B = Buf.getBufferStart();
- Elf_Ehdr &Ehdr = *reinterpret_cast<Elf_Ehdr *>(B);
+ Elf_Ehdr &Ehdr = *reinterpret_cast<Elf_Ehdr *>(Buf.getBufferStart());
std::fill(Ehdr.e_ident, Ehdr.e_ident + 16, 0);
Ehdr.e_ident[EI_MAG0] = 0x7f;
Ehdr.e_ident[EI_MAG1] = 'E';
@@ -1247,7 +1691,7 @@ template <class ELFT> void ELFWriter<ELFT>::writeEhdr() {
Ehdr.e_phentsize = (Ehdr.e_phnum != 0) ? sizeof(Elf_Phdr) : 0;
Ehdr.e_flags = Obj.Flags;
Ehdr.e_ehsize = sizeof(Elf_Ehdr);
- if (WriteSectionHeaders && size(Obj.sections()) != 0) {
+ if (WriteSectionHeaders && Obj.sections().size() != 0) {
Ehdr.e_shentsize = sizeof(Elf_Shdr);
Ehdr.e_shoff = Obj.SHOffset;
// """
@@ -1256,7 +1700,7 @@ template <class ELFT> void ELFWriter<ELFT>::writeEhdr() {
// number of section header table entries is contained in the sh_size field
// of the section header at index 0.
// """
- auto Shnum = size(Obj.sections()) + 1;
+ auto Shnum = Obj.sections().size() + 1;
if (Shnum >= SHN_LORESERVE)
Ehdr.e_shnum = 0;
else
@@ -1285,17 +1729,17 @@ template <class ELFT> void ELFWriter<ELFT>::writePhdrs() {
}
template <class ELFT> void ELFWriter<ELFT>::writeShdrs() {
- uint8_t *B = Buf.getBufferStart() + Obj.SHOffset;
// This reference serves to write the dummy section header at the begining
// of the file. It is not used for anything else
- Elf_Shdr &Shdr = *reinterpret_cast<Elf_Shdr *>(B);
+ Elf_Shdr &Shdr =
+ *reinterpret_cast<Elf_Shdr *>(Buf.getBufferStart() + Obj.SHOffset);
Shdr.sh_name = 0;
Shdr.sh_type = SHT_NULL;
Shdr.sh_flags = 0;
Shdr.sh_addr = 0;
Shdr.sh_offset = 0;
// See writeEhdr for why we do this.
- uint64_t Shnum = size(Obj.sections()) + 1;
+ uint64_t Shnum = Obj.sections().size() + 1;
if (Shnum >= SHN_LORESERVE)
Shdr.sh_size = Shnum;
else
@@ -1309,16 +1753,44 @@ template <class ELFT> void ELFWriter<ELFT>::writeShdrs() {
Shdr.sh_addralign = 0;
Shdr.sh_entsize = 0;
- for (auto &Sec : Obj.sections())
+ for (SectionBase &Sec : Obj.sections())
writeShdr(Sec);
}
template <class ELFT> void ELFWriter<ELFT>::writeSectionData() {
- for (auto &Sec : Obj.sections())
- Sec.accept(*SecWriter);
+ for (SectionBase &Sec : Obj.sections())
+ // Segments are responsible for writing their contents, so only write the
+ // section data if the section is not in a segment. Note that this renders
+ // sections in segments effectively immutable.
+ if (Sec.ParentSegment == nullptr)
+ Sec.accept(*SecWriter);
+}
+
+template <class ELFT> void ELFWriter<ELFT>::writeSegmentData() {
+ for (Segment &Seg : Obj.segments()) {
+ uint8_t *B = Buf.getBufferStart() + Seg.Offset;
+ assert(Seg.FileSize == Seg.getContents().size() &&
+ "Segment size must match contents size");
+ std::memcpy(B, Seg.getContents().data(), Seg.FileSize);
+ }
+
+ // Iterate over removed sections and overwrite their old data with zeroes.
+ for (auto &Sec : Obj.removedSections()) {
+ Segment *Parent = Sec.ParentSegment;
+ if (Parent == nullptr || Sec.Type == SHT_NOBITS || Sec.Size == 0)
+ continue;
+ uint64_t Offset =
+ Sec.OriginalOffset - Parent->OriginalOffset + Parent->Offset;
+ std::memset(Buf.getBufferStart() + Offset, 0, Sec.Size);
+ }
}
-void Object::removeSections(std::function<bool(const SectionBase &)> ToRemove) {
+template <class ELFT>
+ELFWriter<ELFT>::ELFWriter(Object &Obj, Buffer &Buf, bool WSH)
+ : Writer(Obj, Buf), WriteSectionHeaders(WSH && Obj.HadShdrs) {}
+
+Error Object::removeSections(bool AllowBrokenLinks,
+ std::function<bool(const SectionBase &)> ToRemove) {
auto Iter = std::stable_partition(
std::begin(Sections), std::end(Sections), [=](const SecPtr &Sec) {
@@ -1339,32 +1811,55 @@ void Object::removeSections(std::function<bool(const SectionBase &)> ToRemove) {
// Now make sure there are no remaining references to the sections that will
// be removed. Sometimes it is impossible to remove a reference so we emit
// an error here instead.
+ std::unordered_set<const SectionBase *> RemoveSections;
+ RemoveSections.reserve(std::distance(Iter, std::end(Sections)));
for (auto &RemoveSec : make_range(Iter, std::end(Sections))) {
for (auto &Segment : Segments)
Segment->removeSection(RemoveSec.get());
- for (auto &KeepSec : make_range(std::begin(Sections), Iter))
- KeepSec->removeSectionReferences(RemoveSec.get());
+ RemoveSections.insert(RemoveSec.get());
}
- // Now finally get rid of them all togethor.
+
+ // For each section that remains alive, we want to remove the dead references.
+ // This either might update the content of the section (e.g. remove symbols
+ // from symbol table that belongs to removed section) or trigger an error if
+ // a live section critically depends on a section being removed somehow
+ // (e.g. the removed section is referenced by a relocation).
+ for (auto &KeepSec : make_range(std::begin(Sections), Iter)) {
+ if (Error E = KeepSec->removeSectionReferences(AllowBrokenLinks,
+ [&RemoveSections](const SectionBase *Sec) {
+ return RemoveSections.find(Sec) != RemoveSections.end();
+ }))
+ return E;
+ }
+
+ // Transfer removed sections into the Object RemovedSections container for use
+ // later.
+ std::move(Iter, Sections.end(), std::back_inserter(RemovedSections));
+ // Now finally get rid of them all together.
Sections.erase(Iter, std::end(Sections));
+ return Error::success();
}
-void Object::removeSymbols(function_ref<bool(const Symbol &)> ToRemove) {
- if (!SymbolTable)
- return;
-
- for (const SecPtr &Sec : Sections)
- Sec->removeSymbols(ToRemove);
+Error Object::removeSymbols(function_ref<bool(const Symbol &)> ToRemove) {
+ if (SymbolTable)
+ for (const SecPtr &Sec : Sections)
+ if (Error E = Sec->removeSymbols(ToRemove))
+ return E;
+ return Error::success();
}
void Object::sortSections() {
- // Put all sections in offset order. Maintain the ordering as closely as
- // possible while meeting that demand however.
- auto CompareSections = [](const SecPtr &A, const SecPtr &B) {
+ // Use stable_sort to maintain the original ordering as closely as possible.
+ llvm::stable_sort(Sections, [](const SecPtr &A, const SecPtr &B) {
+ // Put SHT_GROUP sections first, since group section headers must come
+ // before the sections they contain. This also matches what GNU objcopy
+ // does.
+ if (A->Type != B->Type &&
+ (A->Type == ELF::SHT_GROUP || B->Type == ELF::SHT_GROUP))
+ return A->Type == ELF::SHT_GROUP;
+ // For all other sections, sort by offset order.
return A->OriginalOffset < B->OriginalOffset;
- };
- std::stable_sort(std::begin(this->Sections), std::end(this->Sections),
- CompareSections);
+ });
}
static uint64_t alignToAddr(uint64_t Offset, uint64_t Addr, uint64_t Align) {
@@ -1382,14 +1877,13 @@ static uint64_t alignToAddr(uint64_t Offset, uint64_t Addr, uint64_t Align) {
// Orders segments such that if x = y->ParentSegment then y comes before x.
static void orderSegments(std::vector<Segment *> &Segments) {
- std::stable_sort(std::begin(Segments), std::end(Segments),
- compareSegmentsByOffset);
+ llvm::stable_sort(Segments, compareSegmentsByOffset);
}
// This function finds a consistent layout for a list of segments starting from
// an Offset. It assumes that Segments have been sorted by OrderSegments and
// returns an Offset one past the end of the last segment.
-static uint64_t LayoutSegments(std::vector<Segment *> &Segments,
+static uint64_t layoutSegments(std::vector<Segment *> &Segments,
uint64_t Offset) {
assert(std::is_sorted(std::begin(Segments), std::end(Segments),
compareSegmentsByOffset));
@@ -1398,20 +1892,20 @@ static uint64_t LayoutSegments(std::vector<Segment *> &Segments,
// then it's acceptable, but not ideal, to simply move it to after the
// segments. So we can simply layout segments one after the other accounting
// for alignment.
- for (auto &Segment : Segments) {
+ for (Segment *Seg : Segments) {
// We assume that segments have been ordered by OriginalOffset and Index
// such that a parent segment will always come before a child segment in
// OrderedSegments. This means that the Offset of the ParentSegment should
// already be set and we can set our offset relative to it.
- if (Segment->ParentSegment != nullptr) {
- auto Parent = Segment->ParentSegment;
- Segment->Offset =
- Parent->Offset + Segment->OriginalOffset - Parent->OriginalOffset;
+ if (Seg->ParentSegment != nullptr) {
+ Segment *Parent = Seg->ParentSegment;
+ Seg->Offset =
+ Parent->Offset + Seg->OriginalOffset - Parent->OriginalOffset;
} else {
- Offset = alignToAddr(Offset, Segment->VAddr, Segment->Align);
- Segment->Offset = Offset;
+ Offset = alignToAddr(Offset, Seg->VAddr, Seg->Align);
+ Seg->Offset = Offset;
}
- Offset = std::max(Offset, Segment->Offset + Segment->FileSize);
+ Offset = std::max(Offset, Seg->Offset + Seg->FileSize);
}
return Offset;
}
@@ -1448,10 +1942,9 @@ static uint64_t layoutSections(Range Sections, uint64_t Offset) {
}
template <class ELFT> void ELFWriter<ELFT>::initEhdrSegment() {
- auto &ElfHdr = Obj.ElfHdrSegment;
+ Segment &ElfHdr = Obj.ElfHdrSegment;
ElfHdr.Type = PT_PHDR;
ElfHdr.Flags = 0;
- ElfHdr.OriginalOffset = ElfHdr.Offset = 0;
ElfHdr.VAddr = 0;
ElfHdr.PAddr = 0;
ElfHdr.FileSize = ElfHdr.MemSize = sizeof(Elf_Ehdr);
@@ -1463,7 +1956,7 @@ template <class ELFT> void ELFWriter<ELFT>::assignOffsets() {
// so that we know that anytime ->ParentSegment is set that segment has
// already had its offset properly set.
std::vector<Segment *> OrderedSegments;
- for (auto &Segment : Obj.segments())
+ for (Segment &Segment : Obj.segments())
OrderedSegments.push_back(&Segment);
OrderedSegments.push_back(&Obj.ElfHdrSegment);
OrderedSegments.push_back(&Obj.ProgramHdrSegment);
@@ -1472,7 +1965,7 @@ template <class ELFT> void ELFWriter<ELFT>::assignOffsets() {
// Since the ELF Header (ElfHdrSegment) must be at the start of the file,
// we start at offset 0.
uint64_t Offset = 0;
- Offset = LayoutSegments(OrderedSegments, Offset);
+ Offset = layoutSegments(OrderedSegments, Offset);
Offset = layoutSections(Obj.sections(), Offset);
// If we need to write the section header table out then we need to align the
// Offset so that SHOffset is valid.
@@ -1484,28 +1977,32 @@ template <class ELFT> void ELFWriter<ELFT>::assignOffsets() {
template <class ELFT> size_t ELFWriter<ELFT>::totalSize() const {
// We already have the section header offset so we can calculate the total
// size by just adding up the size of each section header.
- auto NullSectionSize = WriteSectionHeaders ? sizeof(Elf_Shdr) : 0;
- return Obj.SHOffset + size(Obj.sections()) * sizeof(Elf_Shdr) +
- NullSectionSize;
+ if (!WriteSectionHeaders)
+ return Obj.SHOffset;
+ size_t ShdrCount = Obj.sections().size() + 1; // Includes null shdr.
+ return Obj.SHOffset + ShdrCount * sizeof(Elf_Shdr);
}
-template <class ELFT> void ELFWriter<ELFT>::write() {
+template <class ELFT> Error ELFWriter<ELFT>::write() {
+ // Segment data must be written first, so that the ELF header and program
+ // header tables can overwrite it, if covered by a segment.
+ writeSegmentData();
writeEhdr();
writePhdrs();
writeSectionData();
if (WriteSectionHeaders)
writeShdrs();
- if (auto E = Buf.commit())
- reportError(Buf.getName(), errorToErrorCode(std::move(E)));
+ return Buf.commit();
}
-template <class ELFT> void ELFWriter<ELFT>::finalize() {
+template <class ELFT> Error ELFWriter<ELFT>::finalize() {
// It could happen that SectionNames has been removed and yet the user wants
// a section header table output. We need to throw an error if a user tries
// to do that.
if (Obj.SectionNames == nullptr && WriteSectionHeaders)
- error("Cannot write section header table because section header string "
- "table was removed.");
+ return createStringError(llvm::errc::invalid_argument,
+ "cannot write section header table because "
+ "section header string table was removed");
Obj.sortSections();
@@ -1513,8 +2010,8 @@ template <class ELFT> void ELFWriter<ELFT>::finalize() {
// if we need large indexes or not. We can assign indexes first and check as
// we go to see if we will actully need large indexes.
bool NeedsLargeIndexes = false;
- if (size(Obj.sections()) >= SHN_LORESERVE) {
- auto Sections = Obj.sections();
+ if (Obj.sections().size() >= SHN_LORESERVE) {
+ SectionTableRef Sections = Obj.sections();
NeedsLargeIndexes =
std::any_of(Sections.begin() + SHN_LORESERVE, Sections.end(),
[](const SectionBase &Sec) { return Sec.HasSymbol; });
@@ -1536,9 +2033,12 @@ template <class ELFT> void ELFWriter<ELFT>::finalize() {
// Since we don't need SectionIndexTable we should remove it and all
// references to it.
if (Obj.SectionIndexTable != nullptr) {
- Obj.removeSections([this](const SectionBase &Sec) {
- return &Sec == Obj.SectionIndexTable;
- });
+ // We do not support sections referring to the section index table.
+ if (Error E = Obj.removeSections(false /*AllowBrokenLinks*/,
+ [this](const SectionBase &Sec) {
+ return &Sec == Obj.SectionIndexTable;
+ }))
+ return E;
}
}
@@ -1567,15 +2067,23 @@ template <class ELFT> void ELFWriter<ELFT>::finalize() {
if (Obj.SymbolTable != nullptr)
Obj.SymbolTable->prepareForLayout();
+ // Now that all strings are added we want to finalize string table builders,
+ // because that affects section sizes which in turn affects section offsets.
+ for (SectionBase &Sec : Obj.sections())
+ if (auto StrTab = dyn_cast<StringTableSection>(&Sec))
+ StrTab->prepareForLayout();
+
assignOffsets();
- // Finalize SectionNames first so that we can assign name indexes.
- if (Obj.SectionNames != nullptr)
- Obj.SectionNames->finalize();
+ // layoutSections could have modified section indexes, so we need
+ // to fill the index table after assignOffsets.
+ if (Obj.SymbolTable != nullptr)
+ Obj.SymbolTable->fillShndxTable();
+
// Finally now that all offsets and indexes have been set we can finalize any
// remaining issues.
uint64_t Offset = Obj.SHOffset + sizeof(Elf_Shdr);
- for (auto &Section : Obj.sections()) {
+ for (SectionBase &Section : Obj.sections()) {
Section.HeaderOffset = Offset;
Offset += sizeof(Elf_Shdr);
if (WriteSectionHeaders)
@@ -1583,21 +2091,20 @@ template <class ELFT> void ELFWriter<ELFT>::finalize() {
Section.finalize();
}
- Buf.allocate(totalSize());
+ if (Error E = Buf.allocate(totalSize()))
+ return E;
SecWriter = llvm::make_unique<ELFSectionWriter<ELFT>>(Buf);
+ return Error::success();
}
-void BinaryWriter::write() {
- for (auto &Section : Obj.sections()) {
- if ((Section.Flags & SHF_ALLOC) == 0)
- continue;
- Section.accept(*SecWriter);
- }
- if (auto E = Buf.commit())
- reportError(Buf.getName(), errorToErrorCode(std::move(E)));
+Error BinaryWriter::write() {
+ for (auto &Section : Obj.sections())
+ if (Section.Flags & SHF_ALLOC)
+ Section.accept(*SecWriter);
+ return Buf.commit();
}
-void BinaryWriter::finalize() {
+Error BinaryWriter::finalize() {
// TODO: Create a filter range to construct OrderedSegments from so that this
// code can be deduped with assignOffsets above. This should also solve the
// todo below for LayoutSections.
@@ -1606,11 +2113,9 @@ void BinaryWriter::finalize() {
// already had it's offset properly set. We only want to consider the segments
// that will affect layout of allocated sections so we only add those.
std::vector<Segment *> OrderedSegments;
- for (auto &Section : Obj.sections()) {
- if ((Section.Flags & SHF_ALLOC) != 0 && Section.ParentSegment != nullptr) {
+ for (SectionBase &Section : Obj.sections())
+ if ((Section.Flags & SHF_ALLOC) != 0 && Section.ParentSegment != nullptr)
OrderedSegments.push_back(Section.ParentSegment);
- }
- }
// For binary output, we're going to use physical addresses instead of
// virtual addresses, since a binary output is used for cases like ROM
@@ -1622,8 +2127,7 @@ void BinaryWriter::finalize() {
for (Segment *Seg : OrderedSegments)
Seg->PAddr = Seg->VAddr;
- std::stable_sort(std::begin(OrderedSegments), std::end(OrderedSegments),
- compareSegmentsByPAddr);
+ llvm::stable_sort(OrderedSegments, compareSegmentsByPAddr);
// Because we add a ParentSegment for each section we might have duplicate
// segments in OrderedSegments. If there were duplicates then LayoutSegments
@@ -1638,8 +2142,8 @@ void BinaryWriter::finalize() {
// our layout algorithm to proceed as expected while not writing out the gap
// at the start.
if (!OrderedSegments.empty()) {
- auto Seg = OrderedSegments[0];
- auto Sec = Seg->firstSection();
+ Segment *Seg = OrderedSegments[0];
+ const SectionBase *Sec = Seg->firstSection();
auto Diff = Sec->OriginalOffset - Seg->OriginalOffset;
Seg->OriginalOffset += Diff;
// The size needs to be shrunk as well.
@@ -1648,7 +2152,7 @@ void BinaryWriter::finalize() {
// section.
Seg->PAddr += Diff;
uint64_t LowestPAddr = Seg->PAddr;
- for (auto &Segment : OrderedSegments) {
+ for (Segment *Segment : OrderedSegments) {
Segment->Offset = Segment->PAddr - LowestPAddr;
Offset = std::max(Offset, Segment->Offset + Segment->FileSize);
}
@@ -1659,11 +2163,9 @@ void BinaryWriter::finalize() {
// not hold. Then pass such a range to LayoutSections instead of constructing
// AllocatedSections here.
std::vector<SectionBase *> AllocatedSections;
- for (auto &Section : Obj.sections()) {
- if ((Section.Flags & SHF_ALLOC) == 0)
- continue;
- AllocatedSections.push_back(&Section);
- }
+ for (SectionBase &Section : Obj.sections())
+ if (Section.Flags & SHF_ALLOC)
+ AllocatedSections.push_back(&Section);
layoutSections(make_pointee_range(AllocatedSections), Offset);
// Now that every section has been laid out we just need to compute the total
@@ -1671,13 +2173,117 @@ void BinaryWriter::finalize() {
// LayoutSections, because we want to truncate the last segment to the end of
// its last section, to match GNU objcopy's behaviour.
TotalSize = 0;
- for (const auto &Section : AllocatedSections) {
+ for (SectionBase *Section : AllocatedSections)
if (Section->Type != SHT_NOBITS)
TotalSize = std::max(TotalSize, Section->Offset + Section->Size);
- }
- Buf.allocate(TotalSize);
+ if (Error E = Buf.allocate(TotalSize))
+ return E;
SecWriter = llvm::make_unique<BinarySectionWriter>(Buf);
+ return Error::success();
+}
+
+bool IHexWriter::SectionCompare::operator()(const SectionBase *Lhs,
+ const SectionBase *Rhs) const {
+ return (sectionPhysicalAddr(Lhs) & 0xFFFFFFFFU) <
+ (sectionPhysicalAddr(Rhs) & 0xFFFFFFFFU);
+}
+
+uint64_t IHexWriter::writeEntryPointRecord(uint8_t *Buf) {
+ IHexLineData HexData;
+ uint8_t Data[4] = {};
+ // We don't write entry point record if entry is zero.
+ if (Obj.Entry == 0)
+ return 0;
+
+ if (Obj.Entry <= 0xFFFFFU) {
+ Data[0] = ((Obj.Entry & 0xF0000U) >> 12) & 0xFF;
+ support::endian::write(&Data[2], static_cast<uint16_t>(Obj.Entry),
+ support::big);
+ HexData = IHexRecord::getLine(IHexRecord::StartAddr80x86, 0, Data);
+ } else {
+ support::endian::write(Data, static_cast<uint32_t>(Obj.Entry),
+ support::big);
+ HexData = IHexRecord::getLine(IHexRecord::StartAddr, 0, Data);
+ }
+ memcpy(Buf, HexData.data(), HexData.size());
+ return HexData.size();
+}
+
+uint64_t IHexWriter::writeEndOfFileRecord(uint8_t *Buf) {
+ IHexLineData HexData = IHexRecord::getLine(IHexRecord::EndOfFile, 0, {});
+ memcpy(Buf, HexData.data(), HexData.size());
+ return HexData.size();
+}
+
+Error IHexWriter::write() {
+ IHexSectionWriter Writer(Buf);
+ // Write sections.
+ for (const SectionBase *Sec : Sections)
+ Sec->accept(Writer);
+
+ uint64_t Offset = Writer.getBufferOffset();
+ // Write entry point address.
+ Offset += writeEntryPointRecord(Buf.getBufferStart() + Offset);
+ // Write EOF.
+ Offset += writeEndOfFileRecord(Buf.getBufferStart() + Offset);
+ assert(Offset == TotalSize);
+ return Buf.commit();
+}
+
+Error IHexWriter::checkSection(const SectionBase &Sec) {
+ uint64_t Addr = sectionPhysicalAddr(&Sec);
+ if (addressOverflows32bit(Addr) || addressOverflows32bit(Addr + Sec.Size - 1))
+ return createStringError(
+ errc::invalid_argument,
+ "Section '%s' address range [0x%llx, 0x%llx] is not 32 bit", Sec.Name.c_str(),
+ Addr, Addr + Sec.Size - 1);
+ return Error::success();
+}
+
+Error IHexWriter::finalize() {
+ bool UseSegments = false;
+ auto ShouldWrite = [](const SectionBase &Sec) {
+ return (Sec.Flags & ELF::SHF_ALLOC) && (Sec.Type != ELF::SHT_NOBITS);
+ };
+ auto IsInPtLoad = [](const SectionBase &Sec) {
+ return Sec.ParentSegment && Sec.ParentSegment->Type == ELF::PT_LOAD;
+ };
+
+ // We can't write 64-bit addresses.
+ if (addressOverflows32bit(Obj.Entry))
+ return createStringError(errc::invalid_argument,
+ "Entry point address 0x%llx overflows 32 bits.",
+ Obj.Entry);
+
+ // If any section we're to write has segment then we
+ // switch to using physical addresses. Otherwise we
+ // use section virtual address.
+ for (auto &Section : Obj.sections())
+ if (ShouldWrite(Section) && IsInPtLoad(Section)) {
+ UseSegments = true;
+ break;
+ }
+
+ for (auto &Section : Obj.sections())
+ if (ShouldWrite(Section) && (!UseSegments || IsInPtLoad(Section))) {
+ if (Error E = checkSection(Section))
+ return E;
+ Sections.insert(&Section);
+ }
+
+ IHexSectionWriterBase LengthCalc(Buf);
+ for (const SectionBase *Sec : Sections)
+ Sec->accept(LengthCalc);
+
+ // We need space to write section records + StartAddress record
+ // (if start adress is not zero) + EndOfFile record.
+ TotalSize = LengthCalc.getBufferOffset() +
+ (Obj.Entry ? IHexRecord::getLineLength(4) : 0) +
+ IHexRecord::getLineLength(0);
+ if (Error E = Buf.allocate(TotalSize))
+ return E;
+ return Error::success();
}
template class ELFBuilder<ELF64LE>;
diff --git a/tools/llvm-objcopy/ELF/Object.h b/tools/llvm-objcopy/ELF/Object.h
index e5730cd543ee..f3df93b9662f 100644
--- a/tools/llvm-objcopy/ELF/Object.h
+++ b/tools/llvm-objcopy/ELF/Object.h
@@ -1,9 +1,8 @@
//===- Object.h -------------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -18,8 +17,8 @@
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/StringTableBuilder.h"
#include "llvm/Object/ELFObjectFile.h"
+#include "llvm/Support/Errc.h"
#include "llvm/Support/FileOutputBuffer.h"
-#include "llvm/Support/JamCRC.h"
#include <cstddef>
#include <cstdint>
#include <functional>
@@ -60,6 +59,7 @@ public:
iterator begin() { return iterator(Sections.data()); }
iterator end() { return iterator(Sections.data() + Sections.size()); }
+ size_t size() const { return Sections.size(); }
SectionBase *getSection(uint32_t Index, Twine ErrMsg);
@@ -108,7 +108,7 @@ protected:
Buffer &Out;
public:
- virtual ~SectionWriter(){};
+ virtual ~SectionWriter() = default;
void visit(const Section &Sec) override;
void visit(const OwnedDataSection &Sec) override;
@@ -169,6 +169,8 @@ public:
#define MAKE_SEC_WRITER_FRIEND \
friend class SectionWriter; \
+ friend class IHexSectionWriterBase; \
+ friend class IHexSectionWriter; \
template <class ELFT> friend class ELFSectionWriter; \
template <class ELFT> friend class ELFSectionSizer;
@@ -187,6 +189,118 @@ public:
explicit BinarySectionWriter(Buffer &Buf) : SectionWriter(Buf) {}
};
+using IHexLineData = SmallVector<char, 64>;
+
+struct IHexRecord {
+ // Memory address of the record.
+ uint16_t Addr;
+ // Record type (see below).
+ uint16_t Type;
+ // Record data in hexadecimal form.
+ StringRef HexData;
+
+ // Helper method to get file length of the record
+ // including newline character
+ static size_t getLength(size_t DataSize) {
+ // :LLAAAATT[DD...DD]CC'
+ return DataSize * 2 + 11;
+ }
+
+ // Gets length of line in a file (getLength + CRLF).
+ static size_t getLineLength(size_t DataSize) {
+ return getLength(DataSize) + 2;
+ }
+
+ // Given type, address and data returns line which can
+ // be written to output file.
+ static IHexLineData getLine(uint8_t Type, uint16_t Addr,
+ ArrayRef<uint8_t> Data);
+
+ // Parses the line and returns record if possible.
+ // Line should be trimmed from whitespace characters.
+ static Expected<IHexRecord> parse(StringRef Line);
+
+ // Calculates checksum of stringified record representation
+ // S must NOT contain leading ':' and trailing whitespace
+ // characters
+ static uint8_t getChecksum(StringRef S);
+
+ enum Type {
+ // Contains data and a 16-bit starting address for the data.
+ // The byte count specifies number of data bytes in the record.
+ Data = 0,
+ // Must occur exactly once per file in the last line of the file.
+ // The data field is empty (thus byte count is 00) and the address
+ // field is typically 0000.
+ EndOfFile = 1,
+ // The data field contains a 16-bit segment base address (thus byte
+ // count is always 02) compatible with 80x86 real mode addressing.
+ // The address field (typically 0000) is ignored. The segment address
+ // from the most recent 02 record is multiplied by 16 and added to each
+ // subsequent data record address to form the physical starting address
+ // for the data. This allows addressing up to one megabyte of address
+ // space.
+ SegmentAddr = 2,
+ // or 80x86 processors, specifies the initial content of the CS:IP
+ // registers. The address field is 0000, the byte count is always 04,
+ // the first two data bytes are the CS value, the latter two are the
+ // IP value.
+ StartAddr80x86 = 3,
+ // Allows for 32 bit addressing (up to 4GiB). The record's address field
+ // is ignored (typically 0000) and its byte count is always 02. The two
+ // data bytes (big endian) specify the upper 16 bits of the 32 bit
+ // absolute address for all subsequent type 00 records
+ ExtendedAddr = 4,
+ // The address field is 0000 (not used) and the byte count is always 04.
+ // The four data bytes represent a 32-bit address value. In the case of
+ // 80386 and higher CPUs, this address is loaded into the EIP register.
+ StartAddr = 5,
+ // We have no other valid types
+ InvalidType = 6
+ };
+};
+
+// Base class for IHexSectionWriter. This class implements writing algorithm,
+// but doesn't actually write records. It is used for output buffer size
+// calculation in IHexWriter::finalize.
+class IHexSectionWriterBase : public BinarySectionWriter {
+ // 20-bit segment address
+ uint32_t SegmentAddr = 0;
+ // Extended linear address
+ uint32_t BaseAddr = 0;
+
+ // Write segment address corresponding to 'Addr'
+ uint64_t writeSegmentAddr(uint64_t Addr);
+ // Write extended linear (base) address corresponding to 'Addr'
+ uint64_t writeBaseAddr(uint64_t Addr);
+
+protected:
+ // Offset in the output buffer
+ uint64_t Offset = 0;
+
+ void writeSection(const SectionBase *Sec, ArrayRef<uint8_t> Data);
+ virtual void writeData(uint8_t Type, uint16_t Addr, ArrayRef<uint8_t> Data);
+
+public:
+ explicit IHexSectionWriterBase(Buffer &Buf) : BinarySectionWriter(Buf) {}
+
+ uint64_t getBufferOffset() const { return Offset; }
+ void visit(const Section &Sec) final;
+ void visit(const OwnedDataSection &Sec) final;
+ void visit(const StringTableSection &Sec) override;
+ void visit(const DynamicRelocationSection &Sec) final;
+ using BinarySectionWriter::visit;
+};
+
+// Real IHEX section writer
+class IHexSectionWriter : public IHexSectionWriterBase {
+public:
+ IHexSectionWriter(Buffer &Buf) : IHexSectionWriterBase(Buf) {}
+
+ void writeData(uint8_t Type, uint16_t Addr, ArrayRef<uint8_t> Data) override;
+ void visit(const StringTableSection &Sec) override;
+};
+
class Writer {
protected:
Object &Obj;
@@ -194,8 +308,8 @@ protected:
public:
virtual ~Writer();
- virtual void finalize() = 0;
- virtual void write() = 0;
+ virtual Error finalize() = 0;
+ virtual Error write() = 0;
Writer(Object &O, Buffer &B) : Obj(O), Buf(B) {}
};
@@ -216,6 +330,7 @@ private:
void writePhdrs();
void writeShdrs();
void writeSectionData();
+ void writeSegmentData();
void assignOffsets();
@@ -225,12 +340,11 @@ private:
public:
virtual ~ELFWriter() {}
- bool WriteSectionHeaders = true;
+ bool WriteSectionHeaders;
- void finalize() override;
- void write() override;
- ELFWriter(Object &Obj, Buffer &Buf, bool WSH)
- : Writer(Obj, Buf), WriteSectionHeaders(WSH) {}
+ Error finalize() override;
+ Error write() override;
+ ELFWriter(Object &Obj, Buffer &Buf, bool WSH);
};
class BinaryWriter : public Writer {
@@ -241,11 +355,30 @@ private:
public:
~BinaryWriter() {}
- void finalize() override;
- void write() override;
+ Error finalize() override;
+ Error write() override;
BinaryWriter(Object &Obj, Buffer &Buf) : Writer(Obj, Buf) {}
};
+class IHexWriter : public Writer {
+ struct SectionCompare {
+ bool operator()(const SectionBase *Lhs, const SectionBase *Rhs) const;
+ };
+
+ std::set<const SectionBase *, SectionCompare> Sections;
+ size_t TotalSize;
+
+ Error checkSection(const SectionBase &Sec);
+ uint64_t writeEntryPointRecord(uint8_t *Buf);
+ uint64_t writeEndOfFileRecord(uint8_t *Buf);
+
+public:
+ ~IHexWriter() {}
+ Error finalize() override;
+ Error write() override;
+ IHexWriter(Object &Obj, Buffer &Buf) : Writer(Obj, Buf) {}
+};
+
class SectionBase {
public:
std::string Name;
@@ -274,11 +407,16 @@ public:
virtual void initialize(SectionTableRef SecTable);
virtual void finalize();
- virtual void removeSectionReferences(const SectionBase *Sec);
- virtual void removeSymbols(function_ref<bool(const Symbol &)> ToRemove);
+ // Remove references to these sections. The list of sections must be sorted.
+ virtual Error
+ removeSectionReferences(bool AllowBrokenLinks,
+ function_ref<bool(const SectionBase *)> ToRemove);
+ virtual Error removeSymbols(function_ref<bool(const Symbol &)> ToRemove);
virtual void accept(SectionVisitor &Visitor) const = 0;
virtual void accept(MutableSectionVisitor &Visitor) = 0;
virtual void markSymbols();
+ virtual void
+ replaceSectionReferences(const DenseMap<SectionBase *, SectionBase *> &);
};
class Segment {
@@ -322,6 +460,8 @@ public:
void removeSection(const SectionBase *Sec) { Sections.erase(Sec); }
void addSection(const SectionBase *Sec) { Sections.insert(Sec); }
+
+ ArrayRef<uint8_t> getContents() const { return Contents; }
};
class Section : public SectionBase {
@@ -335,7 +475,8 @@ public:
void accept(SectionVisitor &Visitor) const override;
void accept(MutableSectionVisitor &Visitor) override;
- void removeSectionReferences(const SectionBase *Sec) override;
+ Error removeSectionReferences(bool AllowBrokenLinks,
+ function_ref<bool(const SectionBase *)> ToRemove) override;
void initialize(SectionTableRef SecTable) override;
void finalize() override;
};
@@ -354,6 +495,16 @@ public:
OriginalOffset = std::numeric_limits<uint64_t>::max();
}
+ OwnedDataSection(const Twine &SecName, uint64_t SecAddr, uint64_t SecFlags,
+ uint64_t SecOff) {
+ Name = SecName.str();
+ Type = ELF::SHT_PROGBITS;
+ Addr = SecAddr;
+ Flags = SecFlags;
+ OriginalOffset = SecOff;
+ }
+
+ void appendHexData(StringRef HexData);
void accept(SectionVisitor &Sec) const override;
void accept(MutableSectionVisitor &Visitor) override;
};
@@ -421,7 +572,7 @@ public:
void addString(StringRef Name);
uint32_t findIndex(StringRef Name) const;
- void finalize() override;
+ void prepareForLayout();
void accept(SectionVisitor &Visitor) const override;
void accept(MutableSectionVisitor &Visitor) override;
@@ -440,10 +591,15 @@ enum SymbolShndxType {
SYMBOL_SIMPLE_INDEX = 0,
SYMBOL_ABS = ELF::SHN_ABS,
SYMBOL_COMMON = ELF::SHN_COMMON,
+ SYMBOL_LOPROC = ELF::SHN_LOPROC,
+ SYMBOL_AMDGPU_LDS = ELF::SHN_AMDGPU_LDS,
SYMBOL_HEXAGON_SCOMMON = ELF::SHN_HEXAGON_SCOMMON,
SYMBOL_HEXAGON_SCOMMON_2 = ELF::SHN_HEXAGON_SCOMMON_2,
SYMBOL_HEXAGON_SCOMMON_4 = ELF::SHN_HEXAGON_SCOMMON_4,
SYMBOL_HEXAGON_SCOMMON_8 = ELF::SHN_HEXAGON_SCOMMON_8,
+ SYMBOL_HIPROC = ELF::SHN_HIPROC,
+ SYMBOL_LOOS = ELF::SHN_LOOS,
+ SYMBOL_HIOS = ELF::SHN_HIOS,
SYMBOL_XINDEX = ELF::SHN_XINDEX,
};
@@ -474,9 +630,14 @@ private:
public:
virtual ~SectionIndexSection() {}
void addIndex(uint32_t Index) {
- Indexes.push_back(Index);
- Size += 4;
+ assert(Size > 0);
+ Indexes.push_back(Index);
}
+
+ void reserve(size_t NumSymbols) {
+ Indexes.reserve(NumSymbols);
+ Size = NumSymbols * 4;
+ }
void setSymTab(SymbolTableSection *SymTab) { Symbols = SymTab; }
void initialize(SectionTableRef SecTable) override;
void finalize() override;
@@ -509,7 +670,7 @@ public:
void addSymbol(Twine Name, uint8_t Bind, uint8_t Type, SectionBase *DefinedIn,
uint64_t Value, uint8_t Visibility, uint16_t Shndx,
- uint64_t Size);
+ uint64_t SymbolSize);
void prepareForLayout();
// An 'empty' symbol table still contains a null symbol.
bool empty() const { return Symbols.size() == 1; }
@@ -517,17 +678,21 @@ public:
SectionIndexTable = ShndxTable;
}
const SectionIndexSection *getShndxTable() const { return SectionIndexTable; }
+ void fillShndxTable();
const SectionBase *getStrTab() const { return SymbolNames; }
const Symbol *getSymbolByIndex(uint32_t Index) const;
Symbol *getSymbolByIndex(uint32_t Index);
void updateSymbols(function_ref<void(Symbol &)> Callable);
- void removeSectionReferences(const SectionBase *Sec) override;
+ Error removeSectionReferences(bool AllowBrokenLinks,
+ function_ref<bool(const SectionBase *)> ToRemove) override;
void initialize(SectionTableRef SecTable) override;
void finalize() override;
void accept(SectionVisitor &Visitor) const override;
void accept(MutableSectionVisitor &Visitor) override;
- void removeSymbols(function_ref<bool(const Symbol &)> ToRemove) override;
+ Error removeSymbols(function_ref<bool(const Symbol &)> ToRemove) override;
+ void replaceSectionReferences(
+ const DenseMap<SectionBase *, SectionBase *> &FromTo) override;
static bool classof(const SectionBase *S) {
return S->Type == ELF::SHT_SYMTAB;
@@ -567,14 +732,14 @@ public:
// that code between the two symbol table types.
template <class SymTabType>
class RelocSectionWithSymtabBase : public RelocationSectionBase {
- SymTabType *Symbols = nullptr;
void setSymTab(SymTabType *SymTab) { Symbols = SymTab; }
protected:
RelocSectionWithSymtabBase() = default;
+ SymTabType *Symbols = nullptr;
+
public:
- void removeSectionReferences(const SectionBase *Sec) override;
void initialize(SectionTableRef SecTable) override;
void finalize() override;
};
@@ -589,8 +754,12 @@ public:
void addRelocation(Relocation Rel) { Relocations.push_back(Rel); }
void accept(SectionVisitor &Visitor) const override;
void accept(MutableSectionVisitor &Visitor) override;
- void removeSymbols(function_ref<bool(const Symbol &)> ToRemove) override;
+ Error removeSectionReferences(bool AllowBrokenLinks,
+ function_ref<bool(const SectionBase *)> ToRemove) override;
+ Error removeSymbols(function_ref<bool(const Symbol &)> ToRemove) override;
void markSymbols() override;
+ void replaceSectionReferences(
+ const DenseMap<SectionBase *, SectionBase *> &FromTo) override;
static bool classof(const SectionBase *S) {
if (S->Flags & ELF::SHF_ALLOC)
@@ -624,8 +793,10 @@ public:
void accept(SectionVisitor &) const override;
void accept(MutableSectionVisitor &Visitor) override;
void finalize() override;
- void removeSymbols(function_ref<bool(const Symbol &)> ToRemove) override;
+ Error removeSymbols(function_ref<bool(const Symbol &)> ToRemove) override;
void markSymbols() override;
+ void replaceSectionReferences(
+ const DenseMap<SectionBase *, SectionBase *> &FromTo) override;
static bool classof(const SectionBase *S) {
return S->Type == ELF::SHT_GROUP;
@@ -662,6 +833,9 @@ public:
void accept(SectionVisitor &) const override;
void accept(MutableSectionVisitor &Visitor) override;
+ Error removeSectionReferences(
+ bool AllowBrokenLinks,
+ function_ref<bool(const SectionBase *)> ToRemove) override;
static bool classof(const SectionBase *S) {
if (!(S->Flags & ELF::SHF_ALLOC))
@@ -677,11 +851,11 @@ private:
StringRef FileName;
uint32_t CRC32;
- void init(StringRef File, StringRef Data);
+ void init(StringRef File);
public:
// If we add this section from an external source we can use this ctor.
- explicit GnuDebugLinkSection(StringRef File);
+ explicit GnuDebugLinkSection(StringRef File, uint32_t PrecomputedCRC);
void accept(SectionVisitor &Visitor) const override;
void accept(MutableSectionVisitor &Visitor) override;
};
@@ -697,21 +871,41 @@ using object::ELFFile;
using object::ELFObjectFile;
using object::OwningBinary;
-class BinaryELFBuilder {
+class BasicELFBuilder {
+protected:
uint16_t EMachine;
- MemoryBuffer *MemBuf;
std::unique_ptr<Object> Obj;
void initFileHeader();
void initHeaderSegment();
StringTableSection *addStrTab();
SymbolTableSection *addSymTab(StringTableSection *StrTab);
- void addData(SymbolTableSection *SymTab);
void initSections();
public:
+ BasicELFBuilder(uint16_t EM)
+ : EMachine(EM), Obj(llvm::make_unique<Object>()) {}
+};
+
+class BinaryELFBuilder : public BasicELFBuilder {
+ MemoryBuffer *MemBuf;
+ void addData(SymbolTableSection *SymTab);
+
+public:
BinaryELFBuilder(uint16_t EM, MemoryBuffer *MB)
- : EMachine(EM), MemBuf(MB), Obj(llvm::make_unique<Object>()) {}
+ : BasicELFBuilder(EM), MemBuf(MB) {}
+
+ std::unique_ptr<Object> build();
+};
+
+class IHexELFBuilder : public BasicELFBuilder {
+ const std::vector<IHexRecord> &Records;
+
+ void addDataSections();
+
+public:
+ IHexELFBuilder(const std::vector<IHexRecord> &Records)
+ : BasicELFBuilder(ELF::EM_386), Records(Records) {}
std::unique_ptr<Object> build();
};
@@ -724,17 +918,23 @@ private:
const ELFFile<ELFT> &ElfFile;
Object &Obj;
+ size_t EhdrOffset = 0;
+ Optional<StringRef> ExtractPartition;
void setParentSegment(Segment &Child);
- void readProgramHeaders();
+ void readProgramHeaders(const ELFFile<ELFT> &HeadersFile);
void initGroupSection(GroupSection *GroupSec);
void initSymbolTable(SymbolTableSection *SymTab);
void readSectionHeaders();
+ void readSections();
+ void findEhdrOffset();
SectionBase &makeSection(const Elf_Shdr &Shdr);
public:
- ELFBuilder(const ELFObjectFile<ELFT> &ElfObj, Object &Obj)
- : ElfFile(*ElfObj.getELFFile()), Obj(Obj) {}
+ ELFBuilder(const ELFObjectFile<ELFT> &ElfObj, Object &Obj,
+ Optional<StringRef> ExtractPartition)
+ : ElfFile(*ElfObj.getELFFile()), Obj(Obj),
+ ExtractPartition(ExtractPartition) {}
void build();
};
@@ -749,12 +949,36 @@ public:
std::unique_ptr<Object> create() const override;
};
+class IHexReader : public Reader {
+ MemoryBuffer *MemBuf;
+
+ Expected<std::vector<IHexRecord>> parse() const;
+ Error parseError(size_t LineNo, Error E) const {
+ return LineNo == -1U
+ ? createFileError(MemBuf->getBufferIdentifier(), std::move(E))
+ : createFileError(MemBuf->getBufferIdentifier(), LineNo,
+ std::move(E));
+ }
+ template <typename... Ts>
+ Error parseError(size_t LineNo, char const *Fmt, const Ts &... Vals) const {
+ Error E = createStringError(errc::invalid_argument, Fmt, Vals...);
+ return parseError(LineNo, std::move(E));
+ }
+
+public:
+ IHexReader(MemoryBuffer *MB) : MemBuf(MB) {}
+
+ std::unique_ptr<Object> create() const override;
+};
+
class ELFReader : public Reader {
Binary *Bin;
+ Optional<StringRef> ExtractPartition;
public:
std::unique_ptr<Object> create() const override;
- explicit ELFReader(Binary *B) : Bin(B) {}
+ explicit ELFReader(Binary *B, Optional<StringRef> ExtractPartition)
+ : Bin(B), ExtractPartition(ExtractPartition) {}
};
class Object {
@@ -764,6 +988,7 @@ private:
std::vector<SecPtr> Sections;
std::vector<SegPtr> Segments;
+ std::vector<SecPtr> RemovedSections;
public:
template <class T>
@@ -792,6 +1017,7 @@ public:
uint32_t Version;
uint32_t Flags;
+ bool HadShdrs = true;
StringTableSection *SectionNames = nullptr;
SymbolTableSection *SymbolTable = nullptr;
SectionIndexSection *SectionIndexTable = nullptr;
@@ -801,11 +1027,19 @@ public:
ConstRange<SectionBase> sections() const {
return make_pointee_range(Sections);
}
+ SectionBase *findSection(StringRef Name) {
+ auto SecIt =
+ find_if(Sections, [&](const SecPtr &Sec) { return Sec->Name == Name; });
+ return SecIt == Sections.end() ? nullptr : SecIt->get();
+ }
+ SectionTableRef removedSections() { return SectionTableRef(RemovedSections); }
+
Range<Segment> segments() { return make_pointee_range(Segments); }
ConstRange<Segment> segments() const { return make_pointee_range(Segments); }
- void removeSections(std::function<bool(const SectionBase &)> ToRemove);
- void removeSymbols(function_ref<bool(const Symbol &)> ToRemove);
+ Error removeSections(bool AllowBrokenLinks,
+ std::function<bool(const SectionBase &)> ToRemove);
+ Error removeSymbols(function_ref<bool(const Symbol &)> ToRemove);
template <class T, class... Ts> T &addSection(Ts &&... Args) {
auto Sec = llvm::make_unique<T>(std::forward<Ts>(Args)...);
auto Ptr = Sec.get();
diff --git a/tools/llvm-objcopy/MachO/MachOObjcopy.cpp b/tools/llvm-objcopy/MachO/MachOObjcopy.cpp
new file mode 100644
index 000000000000..19343b65dd1e
--- /dev/null
+++ b/tools/llvm-objcopy/MachO/MachOObjcopy.cpp
@@ -0,0 +1,68 @@
+//===- MachOObjcopy.cpp -----------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "MachOObjcopy.h"
+#include "../CopyConfig.h"
+#include "MachOReader.h"
+#include "MachOWriter.h"
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/Error.h"
+
+namespace llvm {
+namespace objcopy {
+namespace macho {
+
+using namespace object;
+
+static Error handleArgs(const CopyConfig &Config, Object &Obj) {
+ if (Config.AllowBrokenLinks || !Config.BuildIdLinkDir.empty() ||
+ Config.BuildIdLinkInput || Config.BuildIdLinkOutput ||
+ !Config.SplitDWO.empty() || !Config.SymbolsPrefix.empty() ||
+ !Config.AllocSectionsPrefix.empty() || !Config.AddSection.empty() ||
+ !Config.DumpSection.empty() || !Config.KeepSection.empty() ||
+ !Config.OnlySection.empty() || !Config.SymbolsToGlobalize.empty() ||
+ !Config.SymbolsToKeep.empty() || !Config.SymbolsToLocalize.empty() ||
+ !Config.SymbolsToWeaken.empty() || !Config.SymbolsToKeepGlobal.empty() ||
+ !Config.SectionsToRename.empty() || !Config.SymbolsToRename.empty() ||
+ !Config.UnneededSymbolsToRemove.empty() ||
+ !Config.SetSectionFlags.empty() || !Config.ToRemove.empty() ||
+ Config.ExtractDWO || Config.KeepFileSymbols || Config.LocalizeHidden ||
+ Config.PreserveDates || Config.StripDWO || Config.StripNonAlloc ||
+ Config.StripSections || Config.Weaken || Config.DecompressDebugSections ||
+ Config.StripDebug || Config.StripNonAlloc || Config.StripSections ||
+ Config.StripUnneeded || Config.DiscardMode != DiscardType::None ||
+ !Config.SymbolsToAdd.empty() || Config.EntryExpr) {
+ return createStringError(llvm::errc::invalid_argument,
+ "option not supported by llvm-objcopy for MachO");
+ }
+
+ return Error::success();
+}
+
+Error executeObjcopyOnBinary(const CopyConfig &Config,
+ object::MachOObjectFile &In, Buffer &Out) {
+ MachOReader Reader(In);
+ std::unique_ptr<Object> O = Reader.create();
+ if (!O)
+ return createFileError(
+ Config.InputFilename,
+ createStringError(object_error::parse_failed,
+ "unable to deserialize MachO object"));
+
+ if (Error E = handleArgs(Config, *O))
+ return createFileError(Config.InputFilename, std::move(E));
+
+ MachOWriter Writer(*O, In.is64Bit(), In.isLittleEndian(), Out);
+ if (auto E = Writer.finalize())
+ return E;
+ return Writer.write();
+}
+
+} // end namespace macho
+} // end namespace objcopy
+} // end namespace llvm
diff --git a/tools/llvm-objcopy/MachO/MachOObjcopy.h b/tools/llvm-objcopy/MachO/MachOObjcopy.h
new file mode 100644
index 000000000000..f34e361db7ea
--- /dev/null
+++ b/tools/llvm-objcopy/MachO/MachOObjcopy.h
@@ -0,0 +1,31 @@
+//===- MachOObjcopy.h -------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_OBJCOPY_MACHOOBJCOPY_H
+#define LLVM_TOOLS_OBJCOPY_MACHOOBJCOPY_H
+
+namespace llvm {
+class Error;
+
+namespace object {
+class MachOObjectFile;
+class MachOUniversalBinary;
+} // end namespace object
+
+namespace objcopy {
+struct CopyConfig;
+class Buffer;
+
+namespace macho {
+Error executeObjcopyOnBinary(const CopyConfig &Config,
+ object::MachOObjectFile &In, Buffer &Out);
+} // end namespace macho
+} // end namespace objcopy
+} // end namespace llvm
+
+#endif // LLVM_TOOLS_OBJCOPY_MACHOOBJCOPY_H
diff --git a/tools/llvm-objcopy/MachO/MachOReader.cpp b/tools/llvm-objcopy/MachO/MachOReader.cpp
new file mode 100644
index 000000000000..d31293034608
--- /dev/null
+++ b/tools/llvm-objcopy/MachO/MachOReader.cpp
@@ -0,0 +1,241 @@
+//===- MachOReader.cpp ------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "MachOReader.h"
+#include "../llvm-objcopy.h"
+#include "Object.h"
+#include "llvm/BinaryFormat/MachO.h"
+#include "llvm/Object/MachO.h"
+#include <memory>
+
+namespace llvm {
+namespace objcopy {
+namespace macho {
+
+void MachOReader::readHeader(Object &O) const {
+ O.Header.Magic = MachOObj.getHeader().magic;
+ O.Header.CPUType = MachOObj.getHeader().cputype;
+ O.Header.CPUSubType = MachOObj.getHeader().cpusubtype;
+ O.Header.FileType = MachOObj.getHeader().filetype;
+ O.Header.NCmds = MachOObj.getHeader().ncmds;
+ O.Header.SizeOfCmds = MachOObj.getHeader().sizeofcmds;
+ O.Header.Flags = MachOObj.getHeader().flags;
+}
+
+template <typename SectionType>
+Section constructSectionCommon(SectionType Sec) {
+ Section S;
+ S.Sectname =
+ StringRef(Sec.sectname, strnlen(Sec.sectname, sizeof(Sec.sectname)))
+ .str();
+ S.Segname =
+ StringRef(Sec.segname, strnlen(Sec.segname, sizeof(Sec.sectname))).str();
+ S.Addr = Sec.addr;
+ S.Size = Sec.size;
+ S.Offset = Sec.offset;
+ S.Align = Sec.align;
+ S.RelOff = Sec.reloff;
+ S.NReloc = Sec.nreloc;
+ S.Flags = Sec.flags;
+ S.Reserved1 = Sec.reserved1;
+ S.Reserved2 = Sec.reserved2;
+ S.Reserved3 = 0;
+ return S;
+}
+
+template <typename SectionType> Section constructSection(SectionType Sec);
+
+template <> Section constructSection(MachO::section Sec) {
+ return constructSectionCommon(Sec);
+}
+
+template <> Section constructSection(MachO::section_64 Sec) {
+ Section S = constructSectionCommon(Sec);
+ S.Reserved3 = Sec.reserved3;
+ return S;
+}
+
+// TODO: get rid of reportError and make MachOReader return Expected<> instead.
+template <typename SectionType, typename SegmentType>
+std::vector<Section>
+extractSections(const object::MachOObjectFile::LoadCommandInfo &LoadCmd,
+ const object::MachOObjectFile &MachOObj,
+ size_t &NextSectionIndex) {
+ auto End = LoadCmd.Ptr + LoadCmd.C.cmdsize;
+ const SectionType *Curr =
+ reinterpret_cast<const SectionType *>(LoadCmd.Ptr + sizeof(SegmentType));
+ std::vector<Section> Sections;
+ for (; reinterpret_cast<const void *>(Curr) < End; Curr++) {
+ if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) {
+ SectionType Sec;
+ memcpy((void *)&Sec, Curr, sizeof(SectionType));
+ MachO::swapStruct(Sec);
+ Sections.push_back(constructSection(Sec));
+ } else {
+ Sections.push_back(constructSection(*Curr));
+ }
+
+ Section &S = Sections.back();
+
+ Expected<object::SectionRef> SecRef =
+ MachOObj.getSection(NextSectionIndex++);
+ if (!SecRef)
+ reportError(MachOObj.getFileName(), SecRef.takeError());
+
+ if (Expected<ArrayRef<uint8_t>> E =
+ MachOObj.getSectionContents(SecRef->getRawDataRefImpl()))
+ S.Content =
+ StringRef(reinterpret_cast<const char *>(E->data()), E->size());
+ else
+ reportError(MachOObj.getFileName(), E.takeError());
+
+ S.Relocations.reserve(S.NReloc);
+ for (auto RI = MachOObj.section_rel_begin(SecRef->getRawDataRefImpl()),
+ RE = MachOObj.section_rel_end(SecRef->getRawDataRefImpl());
+ RI != RE; ++RI) {
+ RelocationInfo R;
+ R.Symbol = nullptr; // We'll fill this field later.
+ R.Info = MachOObj.getRelocation(RI->getRawDataRefImpl());
+ R.Scattered = MachOObj.isRelocationScattered(R.Info);
+ S.Relocations.push_back(R);
+ }
+
+ assert(S.NReloc == S.Relocations.size() &&
+ "Incorrect number of relocations");
+ }
+ return Sections;
+}
+
+void MachOReader::readLoadCommands(Object &O) const {
+ // For MachO sections indices start from 1.
+ size_t NextSectionIndex = 1;
+ for (auto LoadCmd : MachOObj.load_commands()) {
+ LoadCommand LC;
+ switch (LoadCmd.C.cmd) {
+ case MachO::LC_SEGMENT:
+ LC.Sections = extractSections<MachO::section, MachO::segment_command>(
+ LoadCmd, MachOObj, NextSectionIndex);
+ break;
+ case MachO::LC_SEGMENT_64:
+ LC.Sections =
+ extractSections<MachO::section_64, MachO::segment_command_64>(
+ LoadCmd, MachOObj, NextSectionIndex);
+ break;
+ case MachO::LC_SYMTAB:
+ O.SymTabCommandIndex = O.LoadCommands.size();
+ break;
+ case MachO::LC_DYLD_INFO:
+ case MachO::LC_DYLD_INFO_ONLY:
+ O.DyLdInfoCommandIndex = O.LoadCommands.size();
+ break;
+ }
+#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \
+ case MachO::LCName: \
+ memcpy((void *)&(LC.MachOLoadCommand.LCStruct##_data), LoadCmd.Ptr, \
+ sizeof(MachO::LCStruct)); \
+ if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) \
+ MachO::swapStruct(LC.MachOLoadCommand.LCStruct##_data); \
+ LC.Payload = ArrayRef<uint8_t>( \
+ reinterpret_cast<uint8_t *>(const_cast<char *>(LoadCmd.Ptr)) + \
+ sizeof(MachO::LCStruct), \
+ LoadCmd.C.cmdsize - sizeof(MachO::LCStruct)); \
+ break;
+
+ switch (LoadCmd.C.cmd) {
+ default:
+ memcpy((void *)&(LC.MachOLoadCommand.load_command_data), LoadCmd.Ptr,
+ sizeof(MachO::load_command));
+ if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost)
+ MachO::swapStruct(LC.MachOLoadCommand.load_command_data);
+ LC.Payload = ArrayRef<uint8_t>(
+ reinterpret_cast<uint8_t *>(const_cast<char *>(LoadCmd.Ptr)) +
+ sizeof(MachO::load_command),
+ LoadCmd.C.cmdsize - sizeof(MachO::load_command));
+ break;
+#include "llvm/BinaryFormat/MachO.def"
+ }
+ O.LoadCommands.push_back(std::move(LC));
+ }
+}
+
+template <typename nlist_t>
+SymbolEntry constructSymbolEntry(StringRef StrTable, const nlist_t &nlist) {
+ assert(nlist.n_strx < StrTable.size() &&
+ "n_strx exceeds the size of the string table");
+ SymbolEntry SE;
+ SE.Name = StringRef(StrTable.data() + nlist.n_strx).str();
+ SE.n_type = nlist.n_type;
+ SE.n_sect = nlist.n_sect;
+ SE.n_desc = nlist.n_desc;
+ SE.n_value = nlist.n_value;
+ return SE;
+}
+
+void MachOReader::readSymbolTable(Object &O) const {
+ StringRef StrTable = MachOObj.getStringTableData();
+ for (auto Symbol : MachOObj.symbols()) {
+ SymbolEntry SE =
+ (MachOObj.is64Bit()
+ ? constructSymbolEntry(
+ StrTable,
+ MachOObj.getSymbol64TableEntry(Symbol.getRawDataRefImpl()))
+ : constructSymbolEntry(
+ StrTable,
+ MachOObj.getSymbolTableEntry(Symbol.getRawDataRefImpl())));
+
+ O.SymTable.Symbols.push_back(llvm::make_unique<SymbolEntry>(SE));
+ }
+}
+
+void MachOReader::setSymbolInRelocationInfo(Object &O) const {
+ for (auto &LC : O.LoadCommands)
+ for (auto &Sec : LC.Sections)
+ for (auto &Reloc : Sec.Relocations)
+ if (!Reloc.Scattered) {
+ auto *Info = reinterpret_cast<MachO::relocation_info *>(&Reloc.Info);
+ Reloc.Symbol = O.SymTable.getSymbolByIndex(Info->r_symbolnum);
+ }
+}
+
+void MachOReader::readRebaseInfo(Object &O) const {
+ O.Rebases.Opcodes = MachOObj.getDyldInfoRebaseOpcodes();
+}
+
+void MachOReader::readBindInfo(Object &O) const {
+ O.Binds.Opcodes = MachOObj.getDyldInfoBindOpcodes();
+}
+
+void MachOReader::readWeakBindInfo(Object &O) const {
+ O.WeakBinds.Opcodes = MachOObj.getDyldInfoWeakBindOpcodes();
+}
+
+void MachOReader::readLazyBindInfo(Object &O) const {
+ O.LazyBinds.Opcodes = MachOObj.getDyldInfoLazyBindOpcodes();
+}
+
+void MachOReader::readExportInfo(Object &O) const {
+ O.Exports.Trie = MachOObj.getDyldInfoExportsTrie();
+}
+
+std::unique_ptr<Object> MachOReader::create() const {
+ auto Obj = llvm::make_unique<Object>();
+ readHeader(*Obj);
+ readLoadCommands(*Obj);
+ readSymbolTable(*Obj);
+ setSymbolInRelocationInfo(*Obj);
+ readRebaseInfo(*Obj);
+ readBindInfo(*Obj);
+ readWeakBindInfo(*Obj);
+ readLazyBindInfo(*Obj);
+ readExportInfo(*Obj);
+ return Obj;
+}
+
+} // end namespace macho
+} // end namespace objcopy
+} // end namespace llvm
diff --git a/tools/llvm-objcopy/MachO/MachOReader.h b/tools/llvm-objcopy/MachO/MachOReader.h
new file mode 100644
index 000000000000..795e5cc2363d
--- /dev/null
+++ b/tools/llvm-objcopy/MachO/MachOReader.h
@@ -0,0 +1,48 @@
+//===- MachOReader.h --------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "MachOObjcopy.h"
+#include "Object.h"
+#include "llvm/BinaryFormat/MachO.h"
+#include "llvm/Object/MachO.h"
+#include <memory>
+
+namespace llvm {
+namespace objcopy {
+namespace macho {
+
+// The hierarchy of readers is responsible for parsing different inputs:
+// raw binaries and regular MachO object files.
+class Reader {
+public:
+ virtual ~Reader(){};
+ virtual std::unique_ptr<Object> create() const = 0;
+};
+
+class MachOReader : public Reader {
+ const object::MachOObjectFile &MachOObj;
+
+ void readHeader(Object &O) const;
+ void readLoadCommands(Object &O) const;
+ void readSymbolTable(Object &O) const;
+ void setSymbolInRelocationInfo(Object &O) const;
+ void readRebaseInfo(Object &O) const;
+ void readBindInfo(Object &O) const;
+ void readWeakBindInfo(Object &O) const;
+ void readLazyBindInfo(Object &O) const;
+ void readExportInfo(Object &O) const;
+
+public:
+ explicit MachOReader(const object::MachOObjectFile &Obj) : MachOObj(Obj) {}
+
+ std::unique_ptr<Object> create() const override;
+};
+
+} // end namespace macho
+} // end namespace objcopy
+} // end namespace llvm
diff --git a/tools/llvm-objcopy/MachO/MachOWriter.cpp b/tools/llvm-objcopy/MachO/MachOWriter.cpp
new file mode 100644
index 000000000000..74200c5aa62a
--- /dev/null
+++ b/tools/llvm-objcopy/MachO/MachOWriter.cpp
@@ -0,0 +1,590 @@
+//===- MachOWriter.cpp ------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "MachOWriter.h"
+#include "Object.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/BinaryFormat/MachO.h"
+#include "llvm/Object/MachO.h"
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <memory>
+
+namespace llvm {
+namespace objcopy {
+namespace macho {
+
+size_t MachOWriter::headerSize() const {
+ return Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header);
+}
+
+size_t MachOWriter::loadCommandsSize() const { return O.Header.SizeOfCmds; }
+
+size_t MachOWriter::symTableSize() const {
+ return O.SymTable.Symbols.size() *
+ (Is64Bit ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist));
+}
+
+size_t MachOWriter::totalSize() const {
+ // Going from tail to head and looking for an appropriate "anchor" to
+ // calculate the total size assuming that all the offsets are either valid
+ // ("true") or 0 (0 indicates that the corresponding part is missing).
+
+ SmallVector<size_t, 7> Ends;
+ if (O.SymTabCommandIndex) {
+ const MachO::symtab_command &SymTabCommand =
+ O.LoadCommands[*O.SymTabCommandIndex]
+ .MachOLoadCommand.symtab_command_data;
+ if (SymTabCommand.symoff) {
+ assert((SymTabCommand.nsyms == O.SymTable.Symbols.size()) &&
+ "Incorrect number of symbols");
+ Ends.push_back(SymTabCommand.symoff + symTableSize());
+ }
+ if (SymTabCommand.stroff) {
+ assert((SymTabCommand.strsize == StrTableBuilder.getSize()) &&
+ "Incorrect string table size");
+ Ends.push_back(SymTabCommand.stroff + SymTabCommand.strsize);
+ }
+ }
+ if (O.DyLdInfoCommandIndex) {
+ const MachO::dyld_info_command &DyLdInfoCommand =
+ O.LoadCommands[*O.DyLdInfoCommandIndex]
+ .MachOLoadCommand.dyld_info_command_data;
+ if (DyLdInfoCommand.rebase_off) {
+ assert((DyLdInfoCommand.rebase_size == O.Rebases.Opcodes.size()) &&
+ "Incorrect rebase opcodes size");
+ Ends.push_back(DyLdInfoCommand.rebase_off + DyLdInfoCommand.rebase_size);
+ }
+ if (DyLdInfoCommand.bind_off) {
+ assert((DyLdInfoCommand.bind_size == O.Binds.Opcodes.size()) &&
+ "Incorrect bind opcodes size");
+ Ends.push_back(DyLdInfoCommand.bind_off + DyLdInfoCommand.bind_size);
+ }
+ if (DyLdInfoCommand.weak_bind_off) {
+ assert((DyLdInfoCommand.weak_bind_size == O.WeakBinds.Opcodes.size()) &&
+ "Incorrect weak bind opcodes size");
+ Ends.push_back(DyLdInfoCommand.weak_bind_off +
+ DyLdInfoCommand.weak_bind_size);
+ }
+ if (DyLdInfoCommand.lazy_bind_off) {
+ assert((DyLdInfoCommand.lazy_bind_size == O.LazyBinds.Opcodes.size()) &&
+ "Incorrect lazy bind opcodes size");
+ Ends.push_back(DyLdInfoCommand.lazy_bind_off +
+ DyLdInfoCommand.lazy_bind_size);
+ }
+ if (DyLdInfoCommand.export_off) {
+ assert((DyLdInfoCommand.export_size == O.Exports.Trie.size()) &&
+ "Incorrect trie size");
+ Ends.push_back(DyLdInfoCommand.export_off + DyLdInfoCommand.export_size);
+ }
+ }
+
+ // Otherwise, use the last section / reloction.
+ for (const auto &LC : O.LoadCommands)
+ for (const auto &S : LC.Sections) {
+ Ends.push_back(S.Offset + S.Size);
+ if (S.RelOff)
+ Ends.push_back(S.RelOff +
+ S.NReloc * sizeof(MachO::any_relocation_info));
+ }
+
+ if (!Ends.empty())
+ return *std::max_element(Ends.begin(), Ends.end());
+
+ // Otherwise, we have only Mach header and load commands.
+ return headerSize() + loadCommandsSize();
+}
+
+void MachOWriter::writeHeader() {
+ MachO::mach_header_64 Header;
+
+ Header.magic = O.Header.Magic;
+ Header.cputype = O.Header.CPUType;
+ Header.cpusubtype = O.Header.CPUSubType;
+ Header.filetype = O.Header.FileType;
+ Header.ncmds = O.Header.NCmds;
+ Header.sizeofcmds = O.Header.SizeOfCmds;
+ Header.flags = O.Header.Flags;
+ Header.reserved = O.Header.Reserved;
+
+ if (IsLittleEndian != sys::IsLittleEndianHost)
+ MachO::swapStruct(Header);
+
+ auto HeaderSize =
+ Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header);
+ memcpy(B.getBufferStart(), &Header, HeaderSize);
+}
+
+void MachOWriter::updateSymbolIndexes() {
+ uint32_t Index = 0;
+ for (auto &Symbol : O.SymTable.Symbols) {
+ Symbol->Index = Index;
+ Index++;
+ }
+}
+
+void MachOWriter::writeLoadCommands() {
+ uint8_t *Begin = B.getBufferStart() + headerSize();
+ for (const auto &LC : O.LoadCommands) {
+ // Construct a load command.
+ MachO::macho_load_command MLC = LC.MachOLoadCommand;
+ switch (MLC.load_command_data.cmd) {
+ case MachO::LC_SEGMENT:
+ if (IsLittleEndian != sys::IsLittleEndianHost)
+ MachO::swapStruct(MLC.segment_command_data);
+ memcpy(Begin, &MLC.segment_command_data, sizeof(MachO::segment_command));
+ Begin += sizeof(MachO::segment_command);
+
+ for (const auto &Sec : LC.Sections)
+ writeSectionInLoadCommand<MachO::section>(Sec, Begin);
+ continue;
+ case MachO::LC_SEGMENT_64:
+ if (IsLittleEndian != sys::IsLittleEndianHost)
+ MachO::swapStruct(MLC.segment_command_64_data);
+ memcpy(Begin, &MLC.segment_command_64_data,
+ sizeof(MachO::segment_command_64));
+ Begin += sizeof(MachO::segment_command_64);
+
+ for (const auto &Sec : LC.Sections)
+ writeSectionInLoadCommand<MachO::section_64>(Sec, Begin);
+ continue;
+ }
+
+#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \
+ case MachO::LCName: \
+ assert(sizeof(MachO::LCStruct) + LC.Payload.size() == \
+ MLC.load_command_data.cmdsize); \
+ if (IsLittleEndian != sys::IsLittleEndianHost) \
+ MachO::swapStruct(MLC.LCStruct##_data); \
+ memcpy(Begin, &MLC.LCStruct##_data, sizeof(MachO::LCStruct)); \
+ Begin += sizeof(MachO::LCStruct); \
+ memcpy(Begin, LC.Payload.data(), LC.Payload.size()); \
+ Begin += LC.Payload.size(); \
+ break;
+
+ // Copy the load command as it is.
+ switch (MLC.load_command_data.cmd) {
+ default:
+ assert(sizeof(MachO::load_command) + LC.Payload.size() ==
+ MLC.load_command_data.cmdsize);
+ if (IsLittleEndian != sys::IsLittleEndianHost)
+ MachO::swapStruct(MLC.load_command_data);
+ memcpy(Begin, &MLC.load_command_data, sizeof(MachO::load_command));
+ Begin += sizeof(MachO::load_command);
+ memcpy(Begin, LC.Payload.data(), LC.Payload.size());
+ Begin += LC.Payload.size();
+ break;
+#include "llvm/BinaryFormat/MachO.def"
+ }
+ }
+}
+
+template <typename StructType>
+void MachOWriter::writeSectionInLoadCommand(const Section &Sec, uint8_t *&Out) {
+ StructType Temp;
+ assert(Sec.Segname.size() <= sizeof(Temp.segname) && "too long segment name");
+ assert(Sec.Sectname.size() <= sizeof(Temp.sectname) &&
+ "too long section name");
+ memset(&Temp, 0, sizeof(StructType));
+ memcpy(Temp.segname, Sec.Segname.data(), Sec.Segname.size());
+ memcpy(Temp.sectname, Sec.Sectname.data(), Sec.Sectname.size());
+ Temp.addr = Sec.Addr;
+ Temp.size = Sec.Size;
+ Temp.offset = Sec.Offset;
+ Temp.align = Sec.Align;
+ Temp.reloff = Sec.RelOff;
+ Temp.nreloc = Sec.NReloc;
+ Temp.flags = Sec.Flags;
+ Temp.reserved1 = Sec.Reserved1;
+ Temp.reserved2 = Sec.Reserved2;
+
+ if (IsLittleEndian != sys::IsLittleEndianHost)
+ MachO::swapStruct(Temp);
+ memcpy(Out, &Temp, sizeof(StructType));
+ Out += sizeof(StructType);
+}
+
+void MachOWriter::writeSections() {
+ for (const auto &LC : O.LoadCommands)
+ for (const auto &Sec : LC.Sections) {
+ if (Sec.isVirtualSection())
+ continue;
+
+ assert(Sec.Offset && "Section offset can not be zero");
+ assert((Sec.Size == Sec.Content.size()) && "Incorrect section size");
+ memcpy(B.getBufferStart() + Sec.Offset, Sec.Content.data(),
+ Sec.Content.size());
+ for (size_t Index = 0; Index < Sec.Relocations.size(); ++Index) {
+ auto RelocInfo = Sec.Relocations[Index];
+ if (!RelocInfo.Scattered) {
+ auto *Info =
+ reinterpret_cast<MachO::relocation_info *>(&RelocInfo.Info);
+ Info->r_symbolnum = RelocInfo.Symbol->Index;
+ }
+
+ if (IsLittleEndian != sys::IsLittleEndianHost)
+ MachO::swapStruct(
+ reinterpret_cast<MachO::any_relocation_info &>(RelocInfo.Info));
+ memcpy(B.getBufferStart() + Sec.RelOff +
+ Index * sizeof(MachO::any_relocation_info),
+ &RelocInfo.Info, sizeof(RelocInfo.Info));
+ }
+ }
+}
+
+template <typename NListType>
+void writeNListEntry(const SymbolEntry &SE, bool IsLittleEndian, char *&Out,
+ uint32_t Nstrx) {
+ NListType ListEntry;
+ ListEntry.n_strx = Nstrx;
+ ListEntry.n_type = SE.n_type;
+ ListEntry.n_sect = SE.n_sect;
+ ListEntry.n_desc = SE.n_desc;
+ ListEntry.n_value = SE.n_value;
+
+ if (IsLittleEndian != sys::IsLittleEndianHost)
+ MachO::swapStruct(ListEntry);
+ memcpy(Out, reinterpret_cast<const char *>(&ListEntry), sizeof(NListType));
+ Out += sizeof(NListType);
+}
+
+void MachOWriter::writeSymbolTable() {
+ if (!O.SymTabCommandIndex)
+ return;
+ const MachO::symtab_command &SymTabCommand =
+ O.LoadCommands[*O.SymTabCommandIndex]
+ .MachOLoadCommand.symtab_command_data;
+
+ uint8_t *StrTable = (uint8_t *)B.getBufferStart() + SymTabCommand.stroff;
+ StrTableBuilder.write(StrTable);
+}
+
+void MachOWriter::writeStringTable() {
+ if (!O.SymTabCommandIndex)
+ return;
+ const MachO::symtab_command &SymTabCommand =
+ O.LoadCommands[*O.SymTabCommandIndex]
+ .MachOLoadCommand.symtab_command_data;
+
+ char *SymTable = (char *)B.getBufferStart() + SymTabCommand.symoff;
+ for (auto Iter = O.SymTable.Symbols.begin(), End = O.SymTable.Symbols.end();
+ Iter != End; Iter++) {
+ SymbolEntry *Sym = Iter->get();
+ auto Nstrx = StrTableBuilder.getOffset(Sym->Name);
+
+ if (Is64Bit)
+ writeNListEntry<MachO::nlist_64>(*Sym, IsLittleEndian, SymTable, Nstrx);
+ else
+ writeNListEntry<MachO::nlist>(*Sym, IsLittleEndian, SymTable, Nstrx);
+ }
+}
+
+void MachOWriter::writeRebaseInfo() {
+ if (!O.DyLdInfoCommandIndex)
+ return;
+ const MachO::dyld_info_command &DyLdInfoCommand =
+ O.LoadCommands[*O.DyLdInfoCommandIndex]
+ .MachOLoadCommand.dyld_info_command_data;
+ char *Out = (char *)B.getBufferStart() + DyLdInfoCommand.rebase_off;
+ assert((DyLdInfoCommand.rebase_size == O.Rebases.Opcodes.size()) &&
+ "Incorrect rebase opcodes size");
+ memcpy(Out, O.Rebases.Opcodes.data(), O.Rebases.Opcodes.size());
+}
+
+void MachOWriter::writeBindInfo() {
+ if (!O.DyLdInfoCommandIndex)
+ return;
+ const MachO::dyld_info_command &DyLdInfoCommand =
+ O.LoadCommands[*O.DyLdInfoCommandIndex]
+ .MachOLoadCommand.dyld_info_command_data;
+ char *Out = (char *)B.getBufferStart() + DyLdInfoCommand.bind_off;
+ assert((DyLdInfoCommand.bind_size == O.Binds.Opcodes.size()) &&
+ "Incorrect bind opcodes size");
+ memcpy(Out, O.Binds.Opcodes.data(), O.Binds.Opcodes.size());
+}
+
+void MachOWriter::writeWeakBindInfo() {
+ if (!O.DyLdInfoCommandIndex)
+ return;
+ const MachO::dyld_info_command &DyLdInfoCommand =
+ O.LoadCommands[*O.DyLdInfoCommandIndex]
+ .MachOLoadCommand.dyld_info_command_data;
+ char *Out = (char *)B.getBufferStart() + DyLdInfoCommand.weak_bind_off;
+ assert((DyLdInfoCommand.weak_bind_size == O.WeakBinds.Opcodes.size()) &&
+ "Incorrect weak bind opcodes size");
+ memcpy(Out, O.WeakBinds.Opcodes.data(), O.WeakBinds.Opcodes.size());
+}
+
+void MachOWriter::writeLazyBindInfo() {
+ if (!O.DyLdInfoCommandIndex)
+ return;
+ const MachO::dyld_info_command &DyLdInfoCommand =
+ O.LoadCommands[*O.DyLdInfoCommandIndex]
+ .MachOLoadCommand.dyld_info_command_data;
+ char *Out = (char *)B.getBufferStart() + DyLdInfoCommand.lazy_bind_off;
+ assert((DyLdInfoCommand.lazy_bind_size == O.LazyBinds.Opcodes.size()) &&
+ "Incorrect lazy bind opcodes size");
+ memcpy(Out, O.LazyBinds.Opcodes.data(), O.LazyBinds.Opcodes.size());
+}
+
+void MachOWriter::writeExportInfo() {
+ if (!O.DyLdInfoCommandIndex)
+ return;
+ const MachO::dyld_info_command &DyLdInfoCommand =
+ O.LoadCommands[*O.DyLdInfoCommandIndex]
+ .MachOLoadCommand.dyld_info_command_data;
+ char *Out = (char *)B.getBufferStart() + DyLdInfoCommand.export_off;
+ assert((DyLdInfoCommand.export_size == O.Exports.Trie.size()) &&
+ "Incorrect export trie size");
+ memcpy(Out, O.Exports.Trie.data(), O.Exports.Trie.size());
+}
+
+void MachOWriter::writeTail() {
+ typedef void (MachOWriter::*WriteHandlerType)(void);
+ typedef std::pair<uint64_t, WriteHandlerType> WriteOperation;
+ SmallVector<WriteOperation, 7> Queue;
+
+ if (O.SymTabCommandIndex) {
+ const MachO::symtab_command &SymTabCommand =
+ O.LoadCommands[*O.SymTabCommandIndex]
+ .MachOLoadCommand.symtab_command_data;
+ if (SymTabCommand.symoff)
+ Queue.push_back({SymTabCommand.symoff, &MachOWriter::writeSymbolTable});
+ if (SymTabCommand.stroff)
+ Queue.push_back({SymTabCommand.stroff, &MachOWriter::writeStringTable});
+ }
+
+ if (O.DyLdInfoCommandIndex) {
+ const MachO::dyld_info_command &DyLdInfoCommand =
+ O.LoadCommands[*O.DyLdInfoCommandIndex]
+ .MachOLoadCommand.dyld_info_command_data;
+ if (DyLdInfoCommand.rebase_off)
+ Queue.push_back(
+ {DyLdInfoCommand.rebase_off, &MachOWriter::writeRebaseInfo});
+ if (DyLdInfoCommand.bind_off)
+ Queue.push_back({DyLdInfoCommand.bind_off, &MachOWriter::writeBindInfo});
+ if (DyLdInfoCommand.weak_bind_off)
+ Queue.push_back(
+ {DyLdInfoCommand.weak_bind_off, &MachOWriter::writeWeakBindInfo});
+ if (DyLdInfoCommand.lazy_bind_off)
+ Queue.push_back(
+ {DyLdInfoCommand.lazy_bind_off, &MachOWriter::writeLazyBindInfo});
+ if (DyLdInfoCommand.export_off)
+ Queue.push_back(
+ {DyLdInfoCommand.export_off, &MachOWriter::writeExportInfo});
+ }
+
+ llvm::sort(Queue, [](const WriteOperation &LHS, const WriteOperation &RHS) {
+ return LHS.first < RHS.first;
+ });
+
+ for (auto WriteOp : Queue)
+ (this->*WriteOp.second)();
+}
+
+void MachOWriter::updateSizeOfCmds() {
+ auto Size = 0;
+ for (const auto &LC : O.LoadCommands) {
+ auto &MLC = LC.MachOLoadCommand;
+ auto cmd = MLC.load_command_data.cmd;
+
+ switch (cmd) {
+ case MachO::LC_SEGMENT:
+ Size += sizeof(MachO::segment_command) +
+ sizeof(MachO::section) * LC.Sections.size();
+ continue;
+ case MachO::LC_SEGMENT_64:
+ Size += sizeof(MachO::segment_command_64) +
+ sizeof(MachO::section_64) * LC.Sections.size();
+ continue;
+ }
+
+ switch (cmd) {
+#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \
+ case MachO::LCName: \
+ Size += sizeof(MachO::LCStruct); \
+ break;
+#include "llvm/BinaryFormat/MachO.def"
+#undef HANDLE_LOAD_COMMAND
+ }
+ }
+
+ O.Header.SizeOfCmds = Size;
+}
+
+// Updates the index and the number of local/external/undefined symbols. Here we
+// assume that MLC is a LC_DYSYMTAB and the nlist entries in the symbol table
+// are already sorted by the those types.
+void MachOWriter::updateDySymTab(MachO::macho_load_command &MLC) {
+ uint32_t NumLocalSymbols = 0;
+ auto Iter = O.SymTable.Symbols.begin();
+ auto End = O.SymTable.Symbols.end();
+ for (; Iter != End; Iter++) {
+ if ((*Iter)->n_type & (MachO::N_EXT | MachO::N_PEXT))
+ break;
+
+ NumLocalSymbols++;
+ }
+
+ uint32_t NumExtDefSymbols = 0;
+ for (; Iter != End; Iter++) {
+ if (((*Iter)->n_type & MachO::N_TYPE) == MachO::N_UNDF)
+ break;
+
+ NumExtDefSymbols++;
+ }
+
+ MLC.dysymtab_command_data.ilocalsym = 0;
+ MLC.dysymtab_command_data.nlocalsym = NumLocalSymbols;
+ MLC.dysymtab_command_data.iextdefsym = NumLocalSymbols;
+ MLC.dysymtab_command_data.nextdefsym = NumExtDefSymbols;
+ MLC.dysymtab_command_data.iundefsym = NumLocalSymbols + NumExtDefSymbols;
+ MLC.dysymtab_command_data.nundefsym =
+ O.SymTable.Symbols.size() - (NumLocalSymbols + NumExtDefSymbols);
+}
+
+// Recomputes and updates offset and size fields in load commands and sections
+// since they could be modified.
+Error MachOWriter::layout() {
+ auto SizeOfCmds = loadCommandsSize();
+ auto Offset = headerSize() + SizeOfCmds;
+ O.Header.NCmds = O.LoadCommands.size();
+ O.Header.SizeOfCmds = SizeOfCmds;
+
+ // Lay out sections.
+ for (auto &LC : O.LoadCommands) {
+ uint64_t FileOff = Offset;
+ uint64_t VMSize = 0;
+ uint64_t FileOffsetInSegment = 0;
+ for (auto &Sec : LC.Sections) {
+ if (!Sec.isVirtualSection()) {
+ auto FilePaddingSize =
+ OffsetToAlignment(FileOffsetInSegment, 1ull << Sec.Align);
+ Sec.Offset = Offset + FileOffsetInSegment + FilePaddingSize;
+ Sec.Size = Sec.Content.size();
+ FileOffsetInSegment += FilePaddingSize + Sec.Size;
+ }
+
+ VMSize = std::max(VMSize, Sec.Addr + Sec.Size);
+ }
+
+ // TODO: Handle the __PAGEZERO segment.
+ auto &MLC = LC.MachOLoadCommand;
+ switch (MLC.load_command_data.cmd) {
+ case MachO::LC_SEGMENT:
+ MLC.segment_command_data.cmdsize =
+ sizeof(MachO::segment_command) +
+ sizeof(MachO::section) * LC.Sections.size();
+ MLC.segment_command_data.nsects = LC.Sections.size();
+ MLC.segment_command_data.fileoff = FileOff;
+ MLC.segment_command_data.vmsize = VMSize;
+ MLC.segment_command_data.filesize = FileOffsetInSegment;
+ break;
+ case MachO::LC_SEGMENT_64:
+ MLC.segment_command_64_data.cmdsize =
+ sizeof(MachO::segment_command_64) +
+ sizeof(MachO::section_64) * LC.Sections.size();
+ MLC.segment_command_64_data.nsects = LC.Sections.size();
+ MLC.segment_command_64_data.fileoff = FileOff;
+ MLC.segment_command_64_data.vmsize = VMSize;
+ MLC.segment_command_64_data.filesize = FileOffsetInSegment;
+ break;
+ }
+
+ Offset += FileOffsetInSegment;
+ }
+
+ // Lay out relocations.
+ for (auto &LC : O.LoadCommands)
+ for (auto &Sec : LC.Sections) {
+ Sec.RelOff = Sec.Relocations.empty() ? 0 : Offset;
+ Sec.NReloc = Sec.Relocations.size();
+ Offset += sizeof(MachO::any_relocation_info) * Sec.NReloc;
+ }
+
+ // Lay out tail stuff.
+ auto NListSize = Is64Bit ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist);
+ for (auto &LC : O.LoadCommands) {
+ auto &MLC = LC.MachOLoadCommand;
+ auto cmd = MLC.load_command_data.cmd;
+ switch (cmd) {
+ case MachO::LC_SYMTAB:
+ MLC.symtab_command_data.nsyms = O.SymTable.Symbols.size();
+ MLC.symtab_command_data.strsize = StrTableBuilder.getSize();
+ MLC.symtab_command_data.symoff = Offset;
+ Offset += NListSize * MLC.symtab_command_data.nsyms;
+ MLC.symtab_command_data.stroff = Offset;
+ Offset += MLC.symtab_command_data.strsize;
+ break;
+ case MachO::LC_DYSYMTAB: {
+ if (MLC.dysymtab_command_data.ntoc != 0 ||
+ MLC.dysymtab_command_data.nmodtab != 0 ||
+ MLC.dysymtab_command_data.nextrefsyms != 0 ||
+ MLC.dysymtab_command_data.nlocrel != 0 ||
+ MLC.dysymtab_command_data.nextrel != 0)
+ return createStringError(llvm::errc::not_supported,
+ "shared library is not yet supported");
+
+ if (MLC.dysymtab_command_data.nindirectsyms != 0)
+ return createStringError(llvm::errc::not_supported,
+ "indirect symbol table is not yet supported");
+
+ updateDySymTab(MLC);
+ break;
+ }
+ case MachO::LC_SEGMENT:
+ case MachO::LC_SEGMENT_64:
+ case MachO::LC_VERSION_MIN_MACOSX:
+ case MachO::LC_BUILD_VERSION:
+ case MachO::LC_ID_DYLIB:
+ case MachO::LC_LOAD_DYLIB:
+ case MachO::LC_UUID:
+ case MachO::LC_SOURCE_VERSION:
+ // Nothing to update.
+ break;
+ default:
+ // Abort if it's unsupported in order to prevent corrupting the object.
+ return createStringError(llvm::errc::not_supported,
+ "unsupported load command (cmd=0x%x)", cmd);
+ }
+ }
+
+ return Error::success();
+}
+
+void MachOWriter::constructStringTable() {
+ for (std::unique_ptr<SymbolEntry> &Sym : O.SymTable.Symbols)
+ StrTableBuilder.add(Sym->Name);
+ StrTableBuilder.finalize();
+}
+
+Error MachOWriter::finalize() {
+ updateSizeOfCmds();
+ constructStringTable();
+
+ if (auto E = layout())
+ return E;
+
+ return Error::success();
+}
+
+Error MachOWriter::write() {
+ if (Error E = B.allocate(totalSize()))
+ return E;
+ memset(B.getBufferStart(), 0, totalSize());
+ writeHeader();
+ updateSymbolIndexes();
+ writeLoadCommands();
+ writeSections();
+ writeTail();
+ return B.commit();
+}
+
+} // end namespace macho
+} // end namespace objcopy
+} // end namespace llvm
diff --git a/tools/llvm-objcopy/MachO/MachOWriter.h b/tools/llvm-objcopy/MachO/MachOWriter.h
new file mode 100644
index 000000000000..ecf12d62de2c
--- /dev/null
+++ b/tools/llvm-objcopy/MachO/MachOWriter.h
@@ -0,0 +1,64 @@
+//===- MachOWriter.h --------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "../Buffer.h"
+#include "MachOObjcopy.h"
+#include "Object.h"
+#include "llvm/BinaryFormat/MachO.h"
+#include "llvm/Object/MachO.h"
+
+namespace llvm {
+class Error;
+
+namespace objcopy {
+namespace macho {
+
+class MachOWriter {
+ Object &O;
+ bool Is64Bit;
+ bool IsLittleEndian;
+ Buffer &B;
+ StringTableBuilder StrTableBuilder{StringTableBuilder::MachO};
+
+ size_t headerSize() const;
+ size_t loadCommandsSize() const;
+ size_t symTableSize() const;
+ size_t strTableSize() const;
+
+ void updateDySymTab(MachO::macho_load_command &MLC);
+ void updateSizeOfCmds();
+ void updateSymbolIndexes();
+ void constructStringTable();
+ Error layout();
+
+ void writeHeader();
+ void writeLoadCommands();
+ template <typename StructType>
+ void writeSectionInLoadCommand(const Section &Sec, uint8_t *&Out);
+ void writeSections();
+ void writeSymbolTable();
+ void writeStringTable();
+ void writeRebaseInfo();
+ void writeBindInfo();
+ void writeWeakBindInfo();
+ void writeLazyBindInfo();
+ void writeExportInfo();
+ void writeTail();
+
+public:
+ MachOWriter(Object &O, bool Is64Bit, bool IsLittleEndian, Buffer &B)
+ : O(O), Is64Bit(Is64Bit), IsLittleEndian(IsLittleEndian), B(B) {}
+
+ size_t totalSize() const;
+ Error finalize();
+ Error write();
+};
+
+} // end namespace macho
+} // end namespace objcopy
+} // end namespace llvm
diff --git a/tools/llvm-objcopy/MachO/Object.cpp b/tools/llvm-objcopy/MachO/Object.cpp
new file mode 100644
index 000000000000..264f39c28ed2
--- /dev/null
+++ b/tools/llvm-objcopy/MachO/Object.cpp
@@ -0,0 +1,15 @@
+#include "Object.h"
+#include "../llvm-objcopy.h"
+
+namespace llvm {
+namespace objcopy {
+namespace macho {
+
+const SymbolEntry *SymbolTable::getSymbolByIndex(uint32_t Index) const {
+ assert(Index < Symbols.size() && "invalid symbol index");
+ return Symbols[Index].get();
+}
+
+} // end namespace macho
+} // end namespace objcopy
+} // end namespace llvm
diff --git a/tools/llvm-objcopy/MachO/Object.h b/tools/llvm-objcopy/MachO/Object.h
new file mode 100644
index 000000000000..ed85fcbc47f7
--- /dev/null
+++ b/tools/llvm-objcopy/MachO/Object.h
@@ -0,0 +1,232 @@
+//===- Object.h - Mach-O object file model ----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_OBJCOPY_MACHO_OBJECT_H
+#define LLVM_OBJCOPY_MACHO_OBJECT_H
+
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/MachO.h"
+#include "llvm/MC/StringTableBuilder.h"
+#include "llvm/ObjectYAML/DWARFYAML.h"
+#include "llvm/Support/YAMLTraits.h"
+#include <cstdint>
+#include <string>
+#include <vector>
+
+namespace llvm {
+namespace objcopy {
+namespace macho {
+
+struct MachHeader {
+ uint32_t Magic;
+ uint32_t CPUType;
+ uint32_t CPUSubType;
+ uint32_t FileType;
+ uint32_t NCmds;
+ uint32_t SizeOfCmds;
+ uint32_t Flags;
+ uint32_t Reserved = 0;
+};
+
+struct RelocationInfo;
+struct Section {
+ std::string Sectname;
+ std::string Segname;
+ uint64_t Addr;
+ uint64_t Size;
+ uint32_t Offset;
+ uint32_t Align;
+ uint32_t RelOff;
+ uint32_t NReloc;
+ uint32_t Flags;
+ uint32_t Reserved1;
+ uint32_t Reserved2;
+ uint32_t Reserved3;
+
+ StringRef Content;
+ std::vector<RelocationInfo> Relocations;
+
+ MachO::SectionType getType() const {
+ return static_cast<MachO::SectionType>(Flags & MachO::SECTION_TYPE);
+ }
+
+ bool isVirtualSection() const {
+ return (getType() == MachO::S_ZEROFILL ||
+ getType() == MachO::S_GB_ZEROFILL ||
+ getType() == MachO::S_THREAD_LOCAL_ZEROFILL);
+ }
+};
+
+struct LoadCommand {
+ // The type MachO::macho_load_command is defined in llvm/BinaryFormat/MachO.h
+ // and it is a union of all the structs corresponding to various load
+ // commands.
+ MachO::macho_load_command MachOLoadCommand;
+
+ // The raw content of the payload of the load command (located right after the
+ // corresponding struct). In some cases it is either empty or can be
+ // copied-over without digging into its structure.
+ ArrayRef<uint8_t> Payload;
+
+ // Some load commands can contain (inside the payload) an array of sections,
+ // though the contents of the sections are stored separately. The struct
+ // Section describes only sections' metadata and where to find the
+ // corresponding content inside the binary.
+ std::vector<Section> Sections;
+};
+
+// A symbol information. Fields which starts with "n_" are same as them in the
+// nlist.
+struct SymbolEntry {
+ std::string Name;
+ uint32_t Index;
+ uint8_t n_type;
+ uint8_t n_sect;
+ uint16_t n_desc;
+ uint64_t n_value;
+};
+
+/// The location of the symbol table inside the binary is described by LC_SYMTAB
+/// load command.
+struct SymbolTable {
+ std::vector<std::unique_ptr<SymbolEntry>> Symbols;
+
+ const SymbolEntry *getSymbolByIndex(uint32_t Index) const;
+};
+
+/// The location of the string table inside the binary is described by LC_SYMTAB
+/// load command.
+struct StringTable {
+ std::vector<std::string> Strings;
+};
+
+struct RelocationInfo {
+ const SymbolEntry *Symbol;
+ // True if Info is a scattered_relocation_info.
+ bool Scattered;
+ MachO::any_relocation_info Info;
+};
+
+/// The location of the rebase info inside the binary is described by
+/// LC_DYLD_INFO load command. Dyld rebases an image whenever dyld loads it at
+/// an address different from its preferred address. The rebase information is
+/// a stream of byte sized opcodes whose symbolic names start with
+/// REBASE_OPCODE_. Conceptually the rebase information is a table of tuples:
+/// <seg-index, seg-offset, type>
+/// The opcodes are a compressed way to encode the table by only
+/// encoding when a column changes. In addition simple patterns
+/// like "every n'th offset for m times" can be encoded in a few
+/// bytes.
+struct RebaseInfo {
+ // At the moment we do not parse this info (and it is simply copied over),
+ // but the proper support will be added later.
+ ArrayRef<uint8_t> Opcodes;
+};
+
+/// The location of the bind info inside the binary is described by
+/// LC_DYLD_INFO load command. Dyld binds an image during the loading process,
+/// if the image requires any pointers to be initialized to symbols in other
+/// images. The bind information is a stream of byte sized opcodes whose
+/// symbolic names start with BIND_OPCODE_. Conceptually the bind information is
+/// a table of tuples: <seg-index, seg-offset, type, symbol-library-ordinal,
+/// symbol-name, addend> The opcodes are a compressed way to encode the table by
+/// only encoding when a column changes. In addition simple patterns like for
+/// runs of pointers initialized to the same value can be encoded in a few
+/// bytes.
+struct BindInfo {
+ // At the moment we do not parse this info (and it is simply copied over),
+ // but the proper support will be added later.
+ ArrayRef<uint8_t> Opcodes;
+};
+
+/// The location of the weak bind info inside the binary is described by
+/// LC_DYLD_INFO load command. Some C++ programs require dyld to unique symbols
+/// so that all images in the process use the same copy of some code/data. This
+/// step is done after binding. The content of the weak_bind info is an opcode
+/// stream like the bind_info. But it is sorted alphabetically by symbol name.
+/// This enable dyld to walk all images with weak binding information in order
+/// and look for collisions. If there are no collisions, dyld does no updating.
+/// That means that some fixups are also encoded in the bind_info. For
+/// instance, all calls to "operator new" are first bound to libstdc++.dylib
+/// using the information in bind_info. Then if some image overrides operator
+/// new that is detected when the weak_bind information is processed and the
+/// call to operator new is then rebound.
+struct WeakBindInfo {
+ // At the moment we do not parse this info (and it is simply copied over),
+ // but the proper support will be added later.
+ ArrayRef<uint8_t> Opcodes;
+};
+
+/// The location of the lazy bind info inside the binary is described by
+/// LC_DYLD_INFO load command. Some uses of external symbols do not need to be
+/// bound immediately. Instead they can be lazily bound on first use. The
+/// lazy_bind contains a stream of BIND opcodes to bind all lazy symbols. Normal
+/// use is that dyld ignores the lazy_bind section when loading an image.
+/// Instead the static linker arranged for the lazy pointer to initially point
+/// to a helper function which pushes the offset into the lazy_bind area for the
+/// symbol needing to be bound, then jumps to dyld which simply adds the offset
+/// to lazy_bind_off to get the information on what to bind.
+struct LazyBindInfo {
+ ArrayRef<uint8_t> Opcodes;
+};
+
+/// The location of the export info inside the binary is described by
+/// LC_DYLD_INFO load command. The symbols exported by a dylib are encoded in a
+/// trie. This is a compact representation that factors out common prefixes. It
+/// also reduces LINKEDIT pages in RAM because it encodes all information (name,
+/// address, flags) in one small, contiguous range. The export area is a stream
+/// of nodes. The first node sequentially is the start node for the trie. Nodes
+/// for a symbol start with a uleb128 that is the length of the exported symbol
+/// information for the string so far. If there is no exported symbol, the node
+/// starts with a zero byte. If there is exported info, it follows the length.
+/// First is a uleb128 containing flags. Normally, it is followed by
+/// a uleb128 encoded offset which is location of the content named
+/// by the symbol from the mach_header for the image. If the flags
+/// is EXPORT_SYMBOL_FLAGS_REEXPORT, then following the flags is
+/// a uleb128 encoded library ordinal, then a zero terminated
+/// UTF8 string. If the string is zero length, then the symbol
+/// is re-export from the specified dylib with the same name.
+/// If the flags is EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER, then following
+/// the flags is two uleb128s: the stub offset and the resolver offset.
+/// The stub is used by non-lazy pointers. The resolver is used
+/// by lazy pointers and must be called to get the actual address to use.
+/// After the optional exported symbol information is a byte of
+/// how many edges (0-255) that this node has leaving it,
+/// followed by each edge.
+/// Each edge is a zero terminated UTF8 of the addition chars
+/// in the symbol, followed by a uleb128 offset for the node that
+/// edge points to.
+struct ExportInfo {
+ ArrayRef<uint8_t> Trie;
+};
+
+struct Object {
+ MachHeader Header;
+ std::vector<LoadCommand> LoadCommands;
+
+ SymbolTable SymTable;
+ StringTable StrTable;
+
+ RebaseInfo Rebases;
+ BindInfo Binds;
+ WeakBindInfo WeakBinds;
+ LazyBindInfo LazyBinds;
+ ExportInfo Exports;
+
+ /// The index of LC_SYMTAB load command if present.
+ Optional<size_t> SymTabCommandIndex;
+ /// The index of LC_DYLD_INFO or LC_DYLD_INFO_ONLY load command if present.
+ Optional<size_t> DyLdInfoCommandIndex;
+};
+
+} // end namespace macho
+} // end namespace objcopy
+} // end namespace llvm
+
+#endif // LLVM_OBJCOPY_MACHO_OBJECT_H
diff --git a/tools/llvm-objcopy/ObjcopyOpts.td b/tools/llvm-objcopy/ObjcopyOpts.td
index 1f7e64e4091c..5fce4fbde539 100644
--- a/tools/llvm-objcopy/ObjcopyOpts.td
+++ b/tools/llvm-objcopy/ObjcopyOpts.td
@@ -1,13 +1,20 @@
include "llvm/Option/OptParser.td"
multiclass Eq<string name, string help> {
- def NAME : Separate<["--", "-"], name>;
- def NAME #_eq : Joined<["--", "-"], name #"=">,
+ def NAME : Separate<["--"], name>;
+ def NAME #_eq : Joined<["--"], name #"=">,
Alias<!cast<Separate>(NAME)>,
HelpText<help>;
}
-def help : Flag<["-", "--"], "help">;
+def help : Flag<["--"], "help">;
+def h : Flag<["-"], "h">, Alias<help>;
+
+def allow_broken_links
+ : Flag<["--"], "allow-broken-links">,
+ HelpText<"Allow llvm-objcopy to remove sections even if it would leave "
+ "invalid section references. The appropriate sh_link fields "
+ "will be set to zero.">;
defm binary_architecture
: Eq<"binary-architecture", "Used when transforming an architecture-less "
@@ -26,13 +33,13 @@ defm output_target : Eq<"output-target", "Format of the output file">,
Values<"binary">;
def O : JoinedOrSeparate<["-"], "O">, Alias<output_target>;
-def compress_debug_sections : Flag<["--", "-"], "compress-debug-sections">;
+def compress_debug_sections : Flag<["--"], "compress-debug-sections">;
def compress_debug_sections_eq
- : Joined<["--", "-"], "compress-debug-sections=">,
+ : Joined<["--"], "compress-debug-sections=">,
MetaVarName<"[ zlib | zlib-gnu ]">,
HelpText<"Compress DWARF debug sections using specified style. Supported "
"styles: 'zlib-gnu' and 'zlib'">;
-def decompress_debug_sections : Flag<["-", "--"], "decompress-debug-sections">,
+def decompress_debug_sections : Flag<["--"], "decompress-debug-sections">,
HelpText<"Decompress DWARF debug sections.">;
defm split_dwo
: Eq<"split-dwo", "Equivalent to extract-dwo on the input file to "
@@ -40,7 +47,7 @@ defm split_dwo
MetaVarName<"dwo-file">;
def enable_deterministic_archives
- : Flag<["-", "--"], "enable-deterministic-archives">,
+ : Flag<["--"], "enable-deterministic-archives">,
HelpText<"Enable deterministic mode when copying archives (use zero for "
"UIDs, GIDs, and timestamps).">;
def D : Flag<["-"], "D">,
@@ -48,14 +55,14 @@ def D : Flag<["-"], "D">,
HelpText<"Alias for --enable-deterministic-archives">;
def disable_deterministic_archives
- : Flag<["-", "--"], "disable-deterministic-archives">,
+ : Flag<["--"], "disable-deterministic-archives">,
HelpText<"Disable deterministic mode when copying archives (use real "
"values for UIDs, GIDs, and timestamps).">;
def U : Flag<["-"], "U">,
Alias<disable_deterministic_archives>,
HelpText<"Alias for --disable-deterministic-archives">;
-def preserve_dates : Flag<["-", "--"], "preserve-dates">,
+def preserve_dates : Flag<["--"], "preserve-dates">,
HelpText<"Preserve access and modification timestamps">;
def p : Flag<["-"], "p">, Alias<preserve_dates>;
@@ -76,6 +83,16 @@ defm rename_section
defm redefine_symbol
: Eq<"redefine-sym", "Change the name of a symbol old to new">,
MetaVarName<"old=new">;
+defm redefine_symbols
+ : Eq<"redefine-syms",
+ "Reads a list of symbol pairs from <filename> and runs as if "
+ "--redefine-sym=<old>=<new> is set for each one. <filename> "
+ "contains two symbols per line separated with whitespace and may "
+ "contain comments beginning with '#'. Leading and trailing "
+ "whitespace is stripped from each line. May be repeated to read "
+ "symbols from many files.">,
+ MetaVarName<"filename">;
+
defm keep_section : Eq<"keep-section", "Keep <section>">,
MetaVarName<"section">;
defm only_section : Eq<"only-section", "Remove all but <section>">,
@@ -86,39 +103,76 @@ defm add_section
"Make a section named <section> with the contents of <file>.">,
MetaVarName<"section=file">;
-def strip_all
- : Flag<["-", "--"], "strip-all">,
- HelpText<
- "Remove non-allocated sections other than .gnu.warning* sections">;
+defm set_section_flags
+ : Eq<"set-section-flags",
+ "Set section flags for a given section. Flags supported for GNU "
+ "compatibility: alloc, load, noload, readonly, debug, code, data, "
+ "rom, share, contents, merge, strings.">,
+ MetaVarName<"section=flag1[,flag2,...]">;
+
+def strip_all : Flag<["--"], "strip-all">,
+ HelpText<"Remove non-allocated sections outside segments. "
+ ".gnu.warning* sections are not removed">;
def S : Flag<["-"], "S">, Alias<strip_all>;
-def strip_all_gnu : Flag<["-", "--"], "strip-all-gnu">,
+def strip_all_gnu : Flag<["--"], "strip-all-gnu">,
HelpText<"Compatible with GNU objcopy's --strip-all">;
-def strip_debug : Flag<["-", "--"], "strip-debug">,
+def strip_debug : Flag<["--"], "strip-debug">,
HelpText<"Remove all debug information">;
-def strip_dwo : Flag<["-", "--"], "strip-dwo">,
+def g : Flag<["-"], "g">, Alias<strip_debug>,
+ HelpText<"Alias for --strip-debug">;
+def strip_dwo : Flag<["--"], "strip-dwo">,
HelpText<"Remove all DWARF .dwo sections from file">;
-def strip_sections : Flag<["-", "--"], "strip-sections">,
- HelpText<"Remove all section headers">;
-def strip_non_alloc : Flag<["-", "--"], "strip-non-alloc">,
- HelpText<"Remove all non-allocated sections">;
-def strip_unneeded : Flag<["-", "--"], "strip-unneeded">,
+def strip_sections
+ : Flag<["--"], "strip-sections">,
+ HelpText<"Remove all section headers and all sections not in segments">;
+def strip_non_alloc
+ : Flag<["--"], "strip-non-alloc">,
+ HelpText<"Remove all non-allocated sections outside segments">;
+def strip_unneeded : Flag<["--"], "strip-unneeded">,
HelpText<"Remove all symbols not needed by relocations">;
+defm strip_unneeded_symbol
+ : Eq<"strip-unneeded-symbol",
+ "Remove symbol <symbol> if it is not needed by relocations">,
+ MetaVarName<"symbol">;
+defm strip_unneeded_symbols
+ : Eq<"strip-unneeded-symbols",
+ "Reads a list of symbols from <filename> and removes them "
+ "if they are not needed by relocations">,
+ MetaVarName<"filename">;
def extract_dwo
- : Flag<["-", "--"], "extract-dwo">,
+ : Flag<["--"], "extract-dwo">,
HelpText<
"Remove all sections that are not DWARF .dwo sections from file">;
+defm extract_partition
+ : Eq<"extract-partition", "Extract named partition from input file">,
+ MetaVarName<"name">;
+def extract_main_partition
+ : Flag<["--"], "extract-main-partition">,
+ HelpText<"Extract main partition from the input file">;
+
def localize_hidden
- : Flag<["-", "--"], "localize-hidden">,
+ : Flag<["--"], "localize-hidden">,
HelpText<
"Mark all symbols that have hidden or internal visibility as local">;
defm localize_symbol : Eq<"localize-symbol", "Mark <symbol> as local">,
MetaVarName<"symbol">;
+defm localize_symbols
+ : Eq<"localize-symbols",
+ "Reads a list of symbols from <filename> and marks them local.">,
+ MetaVarName<"filename">;
+
def L : JoinedOrSeparate<["-"], "L">, Alias<localize_symbol>;
defm globalize_symbol : Eq<"globalize-symbol", "Mark <symbol> as global">,
MetaVarName<"symbol">;
+
+defm globalize_symbols
+ : Eq<"globalize-symbols",
+ "Reads a list of symbols from <filename> and marks them global.">,
+ MetaVarName<"filename">;
+
defm keep_global_symbol
: Eq<"keep-global-symbol",
"Convert all symbols except <symbol> to local. May be repeated to "
@@ -137,23 +191,51 @@ defm keep_global_symbols
defm weaken_symbol : Eq<"weaken-symbol", "Mark <symbol> as weak">,
MetaVarName<"symbol">;
+defm weaken_symbols
+ : Eq<"weaken-symbols",
+ "Reads a list of symbols from <filename> and marks them weak.">,
+ MetaVarName<"filename">;
+
def W : JoinedOrSeparate<["-"], "W">, Alias<weaken_symbol>;
-def weaken : Flag<["-", "--"], "weaken">,
+def weaken : Flag<["--"], "weaken">,
HelpText<"Mark all global symbols as weak">;
+
+def discard_locals : Flag<["--"], "discard-locals">,
+ HelpText<"Remove compiler-generated local symbols, (e.g. "
+ "symbols starting with .L)">;
+def X : Flag<["-"], "X">, Alias<discard_locals>;
+
def discard_all
- : Flag<["-", "--"], "discard-all">,
+ : Flag<["--"], "discard-all">,
HelpText<"Remove all local symbols except file and section symbols">;
def x : Flag<["-"], "x">, Alias<discard_all>;
defm strip_symbol : Eq<"strip-symbol", "Remove symbol <symbol>">,
MetaVarName<"symbol">;
+defm strip_symbols
+ : Eq<"strip-symbols",
+ "Reads a list of symbols from <filename> and removes them.">,
+ MetaVarName<"filename">;
+
def N : JoinedOrSeparate<["-"], "N">, Alias<strip_symbol>;
defm keep_symbol : Eq<"keep-symbol", "Do not remove symbol <symbol>">,
MetaVarName<"symbol">;
def K : JoinedOrSeparate<["-"], "K">, Alias<keep_symbol>;
+
+defm keep_symbols
+ : Eq<"keep-symbols",
+ "Reads a list of symbols from <filename> and runs as if "
+ "--keep-symbol=<symbol> is set for each one. <filename> "
+ "contains one symbol per line and may contain comments beginning with "
+ "'#'. Leading and trailing whitespace is stripped from each line. May "
+ "be repeated to read symbols from many files.">,
+ MetaVarName<"filename">;
+
def only_keep_debug
- : Flag<["-", "--"], "only-keep-debug">,
- HelpText<"Currently ignored. Only for compatibility with GNU objcopy.">;
-def keep_file_symbols : Flag<["-", "--"], "keep-file-symbols">,
+ : Flag<["--"], "only-keep-debug">,
+ HelpText<"Clear sections that would not be stripped by --strip-debug. "
+ "Currently only implemented for COFF.">;
+
+def keep_file_symbols : Flag<["--"], "keep-file-symbols">,
HelpText<"Do not remove file symbols">;
defm dump_section
: Eq<"dump-section",
@@ -163,7 +245,11 @@ defm prefix_symbols
: Eq<"prefix-symbols", "Add <prefix> to the start of every symbol name">,
MetaVarName<"prefix">;
-def version : Flag<["-", "--"], "version">,
+defm prefix_alloc_sections
+ : Eq<"prefix-alloc-sections", "Add <prefix> to the start of every allocated section name">,
+ MetaVarName<"prefix">;
+
+def version : Flag<["--"], "version">,
HelpText<"Print the version and exit.">;
def V : Flag<["-"], "V">, Alias<version>;
defm build_id_link_dir
@@ -178,3 +264,25 @@ defm build_id_link_output
: Eq<"build-id-link-output", "Hard-link the output to <dir>/xx/xxx<suffix> "
"name derived from hex build ID">,
MetaVarName<"suffix">;
+
+def regex
+ : Flag<["--"], "regex">,
+ HelpText<"Permit regular expressions in name comparison">;
+
+defm set_start : Eq<"set-start", "Set the start address to <addr>. Overrides "
+ "any previous --change-start or --adjust-start values.">,
+ MetaVarName<"addr">;
+defm change_start : Eq<"change-start", "Add <incr> to the start address. Can be "
+ "specified multiple times, all values will be applied "
+ "cumulatively.">,
+ MetaVarName<"incr">;
+def adjust_start : JoinedOrSeparate<["--"], "adjust-start">,
+ Alias<change_start>;
+
+defm add_symbol
+ : Eq<"add-symbol", "Add new symbol <name> to .symtab. Accepted flags: "
+ "global, local, weak, default, hidden, file, section, object, "
+ "function, indirect-function. Accepted but ignored for "
+ "compatibility: debug, constructor, warning, indirect, synthetic, "
+ "unique-object, before.">,
+ MetaVarName<"name=[section:]value[,flags]">;
diff --git a/tools/llvm-objcopy/StripOpts.td b/tools/llvm-objcopy/StripOpts.td
index fa98e27e9321..1d06bb3dfb38 100644
--- a/tools/llvm-objcopy/StripOpts.td
+++ b/tools/llvm-objcopy/StripOpts.td
@@ -1,16 +1,23 @@
include "llvm/Option/OptParser.td"
multiclass Eq<string name, string help> {
- def NAME : Separate<["--", "-"], name>;
- def NAME #_eq : Joined<["--", "-"], name #"=">,
+ def NAME : Separate<["--"], name>;
+ def NAME #_eq : Joined<["--"], name #"=">,
Alias<!cast<Separate>(NAME)>,
HelpText<help>;
}
-def help : Flag<["-", "--"], "help">;
+def help : Flag<["--"], "help">;
+def h : Flag<["-"], "h">, Alias<help>;
+
+def allow_broken_links
+ : Flag<["--"], "allow-broken-links">,
+ HelpText<"Allow llvm-strip to remove sections even if it would leave "
+ "invalid section references. The appropriate sh_link fields "
+ "will be set to zero.">;
def enable_deterministic_archives
- : Flag<["-", "--"], "enable-deterministic-archives">,
+ : Flag<["--"], "enable-deterministic-archives">,
HelpText<"Enable deterministic mode when stripping archives (use zero "
"for UIDs, GIDs, and timestamps).">;
def D : Flag<["-"], "D">,
@@ -18,50 +25,72 @@ def D : Flag<["-"], "D">,
HelpText<"Alias for --enable-deterministic-archives">;
def disable_deterministic_archives
- : Flag<["-", "--"], "disable-deterministic-archives">,
+ : Flag<["--"], "disable-deterministic-archives">,
HelpText<"Disable deterministic mode when stripping archives (use real "
"values for UIDs, GIDs, and timestamps).">;
def U : Flag<["-"], "U">,
Alias<disable_deterministic_archives>,
HelpText<"Alias for --disable-deterministic-archives">;
-defm output : Eq<"o", "Write output to <file>">, MetaVarName<"output">;
+def output : JoinedOrSeparate<["-"], "o">, HelpText<"Write output to <file>">;
-def preserve_dates : Flag<["-", "--"], "preserve-dates">,
+def preserve_dates : Flag<["--"], "preserve-dates">,
HelpText<"Preserve access and modification timestamps">;
def p : Flag<["-"], "p">, Alias<preserve_dates>;
-def strip_all
- : Flag<["-", "--"], "strip-all">,
- HelpText<
- "Remove non-allocated sections other than .gnu.warning* sections">;
+def strip_all : Flag<["--"], "strip-all">,
+ HelpText<"Remove non-allocated sections outside segments. "
+ ".gnu.warning* sections are not removed">;
def s : Flag<["-"], "s">, Alias<strip_all>;
+def no_strip_all : Flag<["--"], "no-strip-all">,
+ HelpText<"Disable --strip-all">;
-def strip_all_gnu : Flag<["-", "--"], "strip-all-gnu">,
+def strip_all_gnu : Flag<["--"], "strip-all-gnu">,
HelpText<"Compatible with GNU strip's --strip-all">;
-def strip_debug : Flag<["-", "--"], "strip-debug">,
+def strip_debug : Flag<["--"], "strip-debug">,
HelpText<"Remove debugging symbols only">;
def d : Flag<["-"], "d">, Alias<strip_debug>;
def g : Flag<["-"], "g">, Alias<strip_debug>;
def S : Flag<["-"], "S">, Alias<strip_debug>;
-def strip_unneeded : Flag<["-", "--"], "strip-unneeded">,
+def strip_unneeded : Flag<["--"], "strip-unneeded">,
HelpText<"Remove all symbols not needed by relocations">;
defm remove_section : Eq<"remove-section", "Remove <section>">,
MetaVarName<"section">;
def R : JoinedOrSeparate<["-"], "R">, Alias<remove_section>;
+defm strip_symbol : Eq<"strip-symbol", "Strip <symbol>">,
+ MetaVarName<"symbol">;
+def N : JoinedOrSeparate<["-"], "N">, Alias<strip_symbol>;
+
defm keep_section : Eq<"keep-section", "Keep <section>">,
MetaVarName<"section">;
defm keep_symbol : Eq<"keep-symbol", "Do not remove symbol <symbol>">,
MetaVarName<"symbol">;
+def keep_file_symbols : Flag<["--"], "keep-file-symbols">,
+ HelpText<"Do not remove file symbols">;
+
def K : JoinedOrSeparate<["-"], "K">, Alias<keep_symbol>;
+def only_keep_debug
+ : Flag<["--"], "only-keep-debug">,
+ HelpText<"Clear sections that would not be stripped by --strip-debug. "
+ "Currently only implemented for COFF.">;
+
+def discard_locals : Flag<["--"], "discard-locals">,
+ HelpText<"Remove compiler-generated local symbols, (e.g. "
+ "symbols starting with .L)">;
+def X : Flag<["-"], "X">, Alias<discard_locals>;
+
def discard_all
- : Flag<["-", "--"], "discard-all">,
+ : Flag<["--"], "discard-all">,
HelpText<"Remove all local symbols except file and section symbols">;
def x : Flag<["-"], "x">, Alias<discard_all>;
-def version : Flag<["-", "--"], "version">,
+def regex
+ : Flag<["--"], "regex">,
+ HelpText<"Permit regular expressions in name comparison">;
+
+def version : Flag<["--"], "version">,
HelpText<"Print the version and exit.">;
def V : Flag<["-"], "V">, Alias<version>;
diff --git a/tools/llvm-objcopy/llvm-objcopy.cpp b/tools/llvm-objcopy/llvm-objcopy.cpp
index fb1ff18b015b..e9372176e43b 100644
--- a/tools/llvm-objcopy/llvm-objcopy.cpp
+++ b/tools/llvm-objcopy/llvm-objcopy.cpp
@@ -1,17 +1,17 @@
//===- llvm-objcopy.cpp ---------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "llvm-objcopy.h"
#include "Buffer.h"
-#include "COFF/COFFObjcopy.h"
#include "CopyConfig.h"
#include "ELF/ELFObjcopy.h"
+#include "COFF/COFFObjcopy.h"
+#include "MachO/MachOObjcopy.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
@@ -24,6 +24,7 @@
#include "llvm/Object/ELFObjectFile.h"
#include "llvm/Object/ELFTypes.h"
#include "llvm/Object/Error.h"
+#include "llvm/Object/MachO.h"
#include "llvm/Option/Arg.h"
#include "llvm/Option/ArgList.h"
#include "llvm/Option/Option.h"
@@ -52,16 +53,23 @@ namespace objcopy {
StringRef ToolName;
LLVM_ATTRIBUTE_NORETURN void error(Twine Message) {
- WithColor::error(errs(), ToolName) << Message << ".\n";
- errs().flush();
+ WithColor::error(errs(), ToolName) << Message << "\n";
+ exit(1);
+}
+
+LLVM_ATTRIBUTE_NORETURN void error(Error E) {
+ assert(E);
+ std::string Buf;
+ raw_string_ostream OS(Buf);
+ logAllUnhandledErrors(std::move(E), OS);
+ OS.flush();
+ WithColor::error(errs(), ToolName) << Buf;
exit(1);
}
LLVM_ATTRIBUTE_NORETURN void reportError(StringRef File, std::error_code EC) {
assert(EC);
- WithColor::error(errs(), ToolName)
- << "'" << File << "': " << EC.message() << ".\n";
- exit(1);
+ error(createFileError(File, EC));
}
LLVM_ATTRIBUTE_NORETURN void reportError(StringRef File, Error E) {
@@ -74,6 +82,12 @@ LLVM_ATTRIBUTE_NORETURN void reportError(StringRef File, Error E) {
exit(1);
}
+ErrorSuccess reportWarning(Error E) {
+ assert(E);
+ WithColor::warning(errs(), ToolName) << toString(std::move(E));
+ return Error::success();
+}
+
} // end namespace objcopy
} // end namespace llvm
@@ -87,10 +101,13 @@ static Error deepWriteArchive(StringRef ArcName,
ArrayRef<NewArchiveMember> NewMembers,
bool WriteSymtab, object::Archive::Kind Kind,
bool Deterministic, bool Thin) {
- Error E =
- writeArchive(ArcName, NewMembers, WriteSymtab, Kind, Deterministic, Thin);
- if (!Thin || E)
- return E;
+ if (Error E = writeArchive(ArcName, NewMembers, WriteSymtab, Kind,
+ Deterministic, Thin))
+ return createFileError(ArcName, std::move(E));
+
+ if (!Thin)
+ return Error::success();
+
for (const NewArchiveMember &Member : NewMembers) {
// Internally, FileBuffer will use the buffer created by
// FileOutputBuffer::create, for regular files (that is the case for
@@ -101,132 +118,212 @@ static Error deepWriteArchive(StringRef ArcName,
// NewArchiveMember still requires them even though writeArchive does not
// write them on disk.
FileBuffer FB(Member.MemberName);
- FB.allocate(Member.Buf->getBufferSize());
+ if (Error E = FB.allocate(Member.Buf->getBufferSize()))
+ return E;
std::copy(Member.Buf->getBufferStart(), Member.Buf->getBufferEnd(),
FB.getBufferStart());
- if (auto E = FB.commit())
+ if (Error E = FB.commit())
return E;
}
return Error::success();
}
+/// The function executeObjcopyOnIHex does the dispatch based on the format
+/// of the output specified by the command line options.
+static Error executeObjcopyOnIHex(const CopyConfig &Config, MemoryBuffer &In,
+ Buffer &Out) {
+ // TODO: support output formats other than ELF.
+ return elf::executeObjcopyOnIHex(Config, In, Out);
+}
+
/// The function executeObjcopyOnRawBinary does the dispatch based on the format
/// of the output specified by the command line options.
-static void executeObjcopyOnRawBinary(const CopyConfig &Config,
- MemoryBuffer &In, Buffer &Out) {
- // TODO: llvm-objcopy should parse CopyConfig.OutputFormat to recognize
- // formats other than ELF / "binary" and invoke
- // elf::executeObjcopyOnRawBinary, macho::executeObjcopyOnRawBinary or
- // coff::executeObjcopyOnRawBinary accordingly.
- return elf::executeObjcopyOnRawBinary(Config, In, Out);
+static Error executeObjcopyOnRawBinary(const CopyConfig &Config,
+ MemoryBuffer &In, Buffer &Out) {
+ switch (Config.OutputFormat) {
+ case FileFormat::ELF:
+ // FIXME: Currently, we call elf::executeObjcopyOnRawBinary even if the
+ // output format is binary/ihex or it's not given. This behavior differs from
+ // GNU objcopy. See https://bugs.llvm.org/show_bug.cgi?id=42171 for details.
+ case FileFormat::Binary:
+ case FileFormat::IHex:
+ case FileFormat::Unspecified:
+ return elf::executeObjcopyOnRawBinary(Config, In, Out);
+ }
+
+ llvm_unreachable("unsupported output format");
}
/// The function executeObjcopyOnBinary does the dispatch based on the format
/// of the input binary (ELF, MachO or COFF).
-static void executeObjcopyOnBinary(const CopyConfig &Config, object::Binary &In,
- Buffer &Out) {
+static Error executeObjcopyOnBinary(const CopyConfig &Config,
+ object::Binary &In, Buffer &Out) {
if (auto *ELFBinary = dyn_cast<object::ELFObjectFileBase>(&In))
return elf::executeObjcopyOnBinary(Config, *ELFBinary, Out);
else if (auto *COFFBinary = dyn_cast<object::COFFObjectFile>(&In))
return coff::executeObjcopyOnBinary(Config, *COFFBinary, Out);
+ else if (auto *MachOBinary = dyn_cast<object::MachOObjectFile>(&In))
+ return macho::executeObjcopyOnBinary(Config, *MachOBinary, Out);
else
- error("Unsupported object file format");
+ return createStringError(object_error::invalid_file_type,
+ "unsupported object file format");
}
-static void executeObjcopyOnArchive(const CopyConfig &Config,
- const Archive &Ar) {
+static Error executeObjcopyOnArchive(const CopyConfig &Config,
+ const Archive &Ar) {
std::vector<NewArchiveMember> NewArchiveMembers;
Error Err = Error::success();
for (const Archive::Child &Child : Ar.children(Err)) {
- Expected<std::unique_ptr<Binary>> ChildOrErr = Child.getAsBinary();
- if (!ChildOrErr)
- reportError(Ar.getFileName(), ChildOrErr.takeError());
- Binary *Bin = ChildOrErr->get();
-
Expected<StringRef> ChildNameOrErr = Child.getName();
if (!ChildNameOrErr)
- reportError(Ar.getFileName(), ChildNameOrErr.takeError());
+ return createFileError(Ar.getFileName(), ChildNameOrErr.takeError());
+
+ Expected<std::unique_ptr<Binary>> ChildOrErr = Child.getAsBinary();
+ if (!ChildOrErr)
+ return createFileError(Ar.getFileName() + "(" + *ChildNameOrErr + ")",
+ ChildOrErr.takeError());
MemBuffer MB(ChildNameOrErr.get());
- executeObjcopyOnBinary(Config, *Bin, MB);
+ if (Error E = executeObjcopyOnBinary(Config, *ChildOrErr->get(), MB))
+ return E;
Expected<NewArchiveMember> Member =
NewArchiveMember::getOldMember(Child, Config.DeterministicArchives);
if (!Member)
- reportError(Ar.getFileName(), Member.takeError());
+ return createFileError(Ar.getFileName(), Member.takeError());
Member->Buf = MB.releaseMemoryBuffer();
Member->MemberName = Member->Buf->getBufferIdentifier();
NewArchiveMembers.push_back(std::move(*Member));
}
-
if (Err)
- reportError(Config.InputFilename, std::move(Err));
- if (Error E = deepWriteArchive(Config.OutputFilename, NewArchiveMembers,
- Ar.hasSymbolTable(), Ar.kind(),
- Config.DeterministicArchives, Ar.isThin()))
- reportError(Config.OutputFilename, std::move(E));
+ return createFileError(Config.InputFilename, std::move(Err));
+
+ return deepWriteArchive(Config.OutputFilename, NewArchiveMembers,
+ Ar.hasSymbolTable(), Ar.kind(),
+ Config.DeterministicArchives, Ar.isThin());
}
-static void restoreDateOnFile(StringRef Filename,
- const sys::fs::file_status &Stat) {
+static Error restoreStatOnFile(StringRef Filename,
+ const sys::fs::file_status &Stat,
+ bool PreserveDates) {
int FD;
+ // Writing to stdout should not be treated as an error here, just
+ // do not set access/modification times or permissions.
+ if (Filename == "-")
+ return Error::success();
+
if (auto EC =
sys::fs::openFileForWrite(Filename, FD, sys::fs::CD_OpenExisting))
- reportError(Filename, EC);
+ return createFileError(Filename, EC);
+
+ if (PreserveDates)
+ if (auto EC = sys::fs::setLastAccessAndModificationTime(
+ FD, Stat.getLastAccessedTime(), Stat.getLastModificationTime()))
+ return createFileError(Filename, EC);
- if (auto EC = sys::fs::setLastAccessAndModificationTime(
- FD, Stat.getLastAccessedTime(), Stat.getLastModificationTime()))
- reportError(Filename, EC);
+ sys::fs::file_status OStat;
+ if (std::error_code EC = sys::fs::status(FD, OStat))
+ return createFileError(Filename, EC);
+ if (OStat.type() == sys::fs::file_type::regular_file)
+#ifdef _WIN32
+ if (auto EC = sys::fs::setPermissions(
+ Filename, static_cast<sys::fs::perms>(Stat.permissions() &
+ ~sys::fs::getUmask())))
+#else
+ if (auto EC = sys::fs::setPermissions(
+ FD, static_cast<sys::fs::perms>(Stat.permissions() &
+ ~sys::fs::getUmask())))
+#endif
+ return createFileError(Filename, EC);
if (auto EC = sys::Process::SafelyCloseFileDescriptor(FD))
- reportError(Filename, EC);
+ return createFileError(Filename, EC);
+
+ return Error::success();
}
/// The function executeObjcopy does the higher level dispatch based on the type
/// of input (raw binary, archive or single object file) and takes care of the
/// format-agnostic modifications, i.e. preserving dates.
-static void executeObjcopy(const CopyConfig &Config) {
+static Error executeObjcopy(const CopyConfig &Config) {
sys::fs::file_status Stat;
- if (Config.PreserveDates)
+ if (Config.InputFilename != "-") {
if (auto EC = sys::fs::status(Config.InputFilename, Stat))
- reportError(Config.InputFilename, EC);
+ return createFileError(Config.InputFilename, EC);
+ } else {
+ Stat.permissions(static_cast<sys::fs::perms>(0777));
+ }
- if (Config.InputFormat == "binary") {
- auto BufOrErr = MemoryBuffer::getFile(Config.InputFilename);
+ typedef Error (*ProcessRawFn)(const CopyConfig &, MemoryBuffer &, Buffer &);
+ ProcessRawFn ProcessRaw;
+ switch (Config.InputFormat) {
+ case FileFormat::Binary:
+ ProcessRaw = executeObjcopyOnRawBinary;
+ break;
+ case FileFormat::IHex:
+ ProcessRaw = executeObjcopyOnIHex;
+ break;
+ default:
+ ProcessRaw = nullptr;
+ }
+
+ if (ProcessRaw) {
+ auto BufOrErr = MemoryBuffer::getFileOrSTDIN(Config.InputFilename);
if (!BufOrErr)
- reportError(Config.InputFilename, BufOrErr.getError());
+ return createFileError(Config.InputFilename, BufOrErr.getError());
FileBuffer FB(Config.OutputFilename);
- executeObjcopyOnRawBinary(Config, *BufOrErr->get(), FB);
+ if (Error E = ProcessRaw(Config, *BufOrErr->get(), FB))
+ return E;
} else {
Expected<OwningBinary<llvm::object::Binary>> BinaryOrErr =
createBinary(Config.InputFilename);
if (!BinaryOrErr)
- reportError(Config.InputFilename, BinaryOrErr.takeError());
+ return createFileError(Config.InputFilename, BinaryOrErr.takeError());
if (Archive *Ar = dyn_cast<Archive>(BinaryOrErr.get().getBinary())) {
- executeObjcopyOnArchive(Config, *Ar);
+ if (Error E = executeObjcopyOnArchive(Config, *Ar))
+ return E;
} else {
FileBuffer FB(Config.OutputFilename);
- executeObjcopyOnBinary(Config, *BinaryOrErr.get().getBinary(), FB);
+ if (Error E = executeObjcopyOnBinary(Config,
+ *BinaryOrErr.get().getBinary(), FB))
+ return E;
}
}
- if (Config.PreserveDates) {
- restoreDateOnFile(Config.OutputFilename, Stat);
- if (!Config.SplitDWO.empty())
- restoreDateOnFile(Config.SplitDWO, Stat);
+ if (Error E =
+ restoreStatOnFile(Config.OutputFilename, Stat, Config.PreserveDates))
+ return E;
+
+ if (!Config.SplitDWO.empty()) {
+ Stat.permissions(static_cast<sys::fs::perms>(0666));
+ if (Error E =
+ restoreStatOnFile(Config.SplitDWO, Stat, Config.PreserveDates))
+ return E;
}
+
+ return Error::success();
}
int main(int argc, char **argv) {
InitLLVM X(argc, argv);
ToolName = argv[0];
- DriverConfig DriverConfig;
- if (sys::path::stem(ToolName).contains("strip"))
- DriverConfig = parseStripOptions(makeArrayRef(argv + 1, argc));
- else
- DriverConfig = parseObjcopyOptions(makeArrayRef(argv + 1, argc));
- for (const CopyConfig &CopyConfig : DriverConfig.CopyConfigs)
- executeObjcopy(CopyConfig);
+ bool IsStrip = sys::path::stem(ToolName).contains("strip");
+ Expected<DriverConfig> DriverConfig =
+ IsStrip ? parseStripOptions(makeArrayRef(argv + 1, argc), reportWarning)
+ : parseObjcopyOptions(makeArrayRef(argv + 1, argc));
+ if (!DriverConfig) {
+ logAllUnhandledErrors(DriverConfig.takeError(),
+ WithColor::error(errs(), ToolName));
+ return 1;
+ }
+ for (const CopyConfig &CopyConfig : DriverConfig->CopyConfigs) {
+ if (Error E = executeObjcopy(CopyConfig)) {
+ logAllUnhandledErrors(std::move(E), WithColor::error(errs(), ToolName));
+ return 1;
+ }
+ }
+
+ return 0;
}
diff --git a/tools/llvm-objcopy/llvm-objcopy.h b/tools/llvm-objcopy/llvm-objcopy.h
index d8edf3e29ee0..18a789ca1f83 100644
--- a/tools/llvm-objcopy/llvm-objcopy.h
+++ b/tools/llvm-objcopy/llvm-objcopy.h
@@ -1,9 +1,8 @@
//===- llvm-objcopy.h -------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -20,6 +19,7 @@ namespace llvm {
namespace objcopy {
LLVM_ATTRIBUTE_NORETURN extern void error(Twine Message);
+LLVM_ATTRIBUTE_NORETURN extern void error(Error E);
LLVM_ATTRIBUTE_NORETURN extern void reportError(StringRef File, Error E);
LLVM_ATTRIBUTE_NORETURN extern void reportError(StringRef File,
std::error_code EC);
diff --git a/tools/llvm-objdump/COFFDump.cpp b/tools/llvm-objdump/COFFDump.cpp
index 55607ec299be..1ba0a68902c9 100644
--- a/tools/llvm-objdump/COFFDump.cpp
+++ b/tools/llvm-objdump/COFFDump.cpp
@@ -1,9 +1,8 @@
//===-- COFFDump.cpp - COFF-specific dumper ---------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -25,10 +24,10 @@
#include "llvm/Support/WithColor.h"
#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-using namespace object;
+using namespace llvm::object;
using namespace llvm::Win64EH;
+namespace llvm {
// Returns the name of the unwind code.
static StringRef getUnwindCodeTypeName(uint8_t Code) {
switch(Code) {
@@ -156,70 +155,68 @@ static void printAllUnwindCodes(ArrayRef<UnwindCode> UCs) {
}
// Given a symbol sym this functions returns the address and section of it.
-static std::error_code
-resolveSectionAndAddress(const COFFObjectFile *Obj, const SymbolRef &Sym,
- const coff_section *&ResolvedSection,
- uint64_t &ResolvedAddr) {
+static Error resolveSectionAndAddress(const COFFObjectFile *Obj,
+ const SymbolRef &Sym,
+ const coff_section *&ResolvedSection,
+ uint64_t &ResolvedAddr) {
Expected<uint64_t> ResolvedAddrOrErr = Sym.getAddress();
if (!ResolvedAddrOrErr)
- return errorToErrorCode(ResolvedAddrOrErr.takeError());
+ return ResolvedAddrOrErr.takeError();
ResolvedAddr = *ResolvedAddrOrErr;
Expected<section_iterator> Iter = Sym.getSection();
if (!Iter)
- return errorToErrorCode(Iter.takeError());
+ return Iter.takeError();
ResolvedSection = Obj->getCOFFSection(**Iter);
- return std::error_code();
+ return Error::success();
}
// Given a vector of relocations for a section and an offset into this section
// the function returns the symbol used for the relocation at the offset.
-static std::error_code resolveSymbol(const std::vector<RelocationRef> &Rels,
+static Error resolveSymbol(const std::vector<RelocationRef> &Rels,
uint64_t Offset, SymbolRef &Sym) {
for (auto &R : Rels) {
uint64_t Ofs = R.getOffset();
if (Ofs == Offset) {
Sym = *R.getSymbol();
- return std::error_code();
+ return Error::success();
}
}
- return object_error::parse_failed;
+ return make_error<BinaryError>();
}
// Given a vector of relocations for a section and an offset into this section
// the function resolves the symbol used for the relocation at the offset and
// returns the section content and the address inside the content pointed to
// by the symbol.
-static std::error_code
+static Error
getSectionContents(const COFFObjectFile *Obj,
const std::vector<RelocationRef> &Rels, uint64_t Offset,
ArrayRef<uint8_t> &Contents, uint64_t &Addr) {
SymbolRef Sym;
- if (std::error_code EC = resolveSymbol(Rels, Offset, Sym))
- return EC;
+ if (Error E = resolveSymbol(Rels, Offset, Sym))
+ return E;
const coff_section *Section;
- if (std::error_code EC = resolveSectionAndAddress(Obj, Sym, Section, Addr))
- return EC;
- if (std::error_code EC = Obj->getSectionContents(Section, Contents))
- return EC;
- return std::error_code();
+ if (Error E = resolveSectionAndAddress(Obj, Sym, Section, Addr))
+ return E;
+ return Obj->getSectionContents(Section, Contents);
}
// Given a vector of relocations for a section and an offset into this section
// the function returns the name of the symbol used for the relocation at the
// offset.
-static std::error_code resolveSymbolName(const std::vector<RelocationRef> &Rels,
- uint64_t Offset, StringRef &Name) {
+static Error resolveSymbolName(const std::vector<RelocationRef> &Rels,
+ uint64_t Offset, StringRef &Name) {
SymbolRef Sym;
- if (std::error_code EC = resolveSymbol(Rels, Offset, Sym))
+ if (Error EC = resolveSymbol(Rels, Offset, Sym))
return EC;
Expected<StringRef> NameOrErr = Sym.getName();
if (!NameOrErr)
- return errorToErrorCode(NameOrErr.takeError());
+ return NameOrErr.takeError();
Name = *NameOrErr;
- return std::error_code();
+ return Error::success();
}
-static void printCOFFSymbolAddress(llvm::raw_ostream &Out,
+static void printCOFFSymbolAddress(raw_ostream &Out,
const std::vector<RelocationRef> &Rels,
uint64_t Offset, uint32_t Disp) {
StringRef Sym;
@@ -469,6 +466,18 @@ static bool getPDataSection(const COFFObjectFile *Obj,
return false;
}
+Error getCOFFRelocationValueString(const COFFObjectFile *Obj,
+ const RelocationRef &Rel,
+ SmallVectorImpl<char> &Result) {
+ symbol_iterator SymI = Rel.getSymbol();
+ Expected<StringRef> SymNameOrErr = SymI->getName();
+ if (!SymNameOrErr)
+ return SymNameOrErr.takeError();
+ StringRef SymName = *SymNameOrErr;
+ Result.append(SymName.begin(), SymName.end());
+ return Error::success();
+}
+
static void printWin64EHUnwindInfo(const Win64EH::UnwindInfo *UI) {
// The casts to int are required in order to output the value as number.
// Without the casts the value would be interpreted as char data (which
@@ -578,7 +587,7 @@ static void printRuntimeFunctionRels(const COFFObjectFile *Obj,
printWin64EHUnwindInfo(UI);
}
-void llvm::printCOFFUnwindInfo(const COFFObjectFile *Obj) {
+void printCOFFUnwindInfo(const COFFObjectFile *Obj) {
if (Obj->getMachine() != COFF::IMAGE_FILE_MACHINE_AMD64) {
WithColor::error(errs(), "llvm-objdump")
<< "unsupported image machine type "
@@ -607,7 +616,7 @@ void llvm::printCOFFUnwindInfo(const COFFObjectFile *Obj) {
}
}
-void llvm::printCOFFFileHeader(const object::ObjectFile *Obj) {
+void printCOFFFileHeader(const object::ObjectFile *Obj) {
const COFFObjectFile *file = dyn_cast<const COFFObjectFile>(Obj);
printTLSDirectory(file);
printLoadConfiguration(file);
@@ -615,7 +624,7 @@ void llvm::printCOFFFileHeader(const object::ObjectFile *Obj) {
printExportTable(file);
}
-void llvm::printCOFFSymbolTable(const object::COFFImportFile *i) {
+void printCOFFSymbolTable(const object::COFFImportFile *i) {
unsigned Index = 0;
bool IsCode = i->getCOFFImportHeader()->getType() == COFF::IMPORT_CODE;
@@ -623,7 +632,7 @@ void llvm::printCOFFSymbolTable(const object::COFFImportFile *i) {
std::string Name;
raw_string_ostream NS(Name);
- Sym.printName(NS);
+ cantFail(Sym.printName(NS));
NS.flush();
outs() << "[" << format("%2d", Index) << "]"
@@ -638,11 +647,11 @@ void llvm::printCOFFSymbolTable(const object::COFFImportFile *i) {
}
}
-void llvm::printCOFFSymbolTable(const COFFObjectFile *coff) {
+void printCOFFSymbolTable(const COFFObjectFile *coff) {
for (unsigned SI = 0, SE = coff->getNumberOfSymbols(); SI != SE; ++SI) {
Expected<COFFSymbolRef> Symbol = coff->getSymbol(SI);
StringRef Name;
- error(errorToErrorCode(Symbol.takeError()));
+ error(Symbol.takeError());
error(coff->getSymbolName(*Symbol, Name));
outs() << "[" << format("%2d", SI) << "]"
@@ -709,3 +718,4 @@ void llvm::printCOFFSymbolTable(const COFFObjectFile *coff) {
}
}
}
+} // namespace llvm
diff --git a/tools/llvm-objdump/ELFDump.cpp b/tools/llvm-objdump/ELFDump.cpp
index b17a15a0d8fc..9c4d67d0f1bd 100644
--- a/tools/llvm-objdump/ELFDump.cpp
+++ b/tools/llvm-objdump/ELFDump.cpp
@@ -1,9 +1,8 @@
//===-- ELFDump.cpp - ELF-specific dumper -----------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -13,23 +12,22 @@
//===----------------------------------------------------------------------===//
#include "llvm-objdump.h"
+#include "llvm/Demangle/Demangle.h"
#include "llvm/Object/ELFObjectFile.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
using namespace llvm::object;
+namespace llvm {
template <class ELFT>
-Expected<StringRef> getDynamicStrTab(const ELFFile<ELFT> *Elf) {
- typedef ELFFile<ELFT> ELFO;
-
+static Expected<StringRef> getDynamicStrTab(const ELFFile<ELFT> *Elf) {
auto DynamicEntriesOrError = Elf->dynamicEntries();
if (!DynamicEntriesOrError)
return DynamicEntriesOrError.takeError();
- for (const typename ELFO::Elf_Dyn &Dyn : *DynamicEntriesOrError) {
+ for (const typename ELFT::Dyn &Dyn : *DynamicEntriesOrError) {
if (Dyn.d_tag == ELF::DT_STRTAB) {
auto MappedAddrOrError = Elf->toMappedAddr(Dyn.getPtr());
if (!MappedAddrOrError)
@@ -43,7 +41,7 @@ Expected<StringRef> getDynamicStrTab(const ELFFile<ELFT> *Elf) {
if (!SectionsOrError)
return SectionsOrError.takeError();
- for (const typename ELFO::Elf_Shdr &Sec : *SectionsOrError) {
+ for (const typename ELFT::Shdr &Sec : *SectionsOrError) {
if (Sec.sh_type == ELF::SHT_DYNSYM)
return Elf->getStringTableForSymtab(Sec);
}
@@ -52,40 +50,135 @@ Expected<StringRef> getDynamicStrTab(const ELFFile<ELFT> *Elf) {
}
template <class ELFT>
-void printDynamicSection(const ELFFile<ELFT> *Elf, StringRef Filename) {
- auto ProgramHeaderOrError = Elf->program_headers();
- if (!ProgramHeaderOrError)
- report_error(Filename, ProgramHeaderOrError.takeError());
+static Error getRelocationValueString(const ELFObjectFile<ELFT> *Obj,
+ const RelocationRef &RelRef,
+ SmallVectorImpl<char> &Result) {
+ const ELFFile<ELFT> &EF = *Obj->getELFFile();
+ DataRefImpl Rel = RelRef.getRawDataRefImpl();
+ auto SecOrErr = EF.getSection(Rel.d.a);
+ if (!SecOrErr)
+ return SecOrErr.takeError();
- auto DynamicEntriesOrError = Elf->dynamicEntries();
- if (!DynamicEntriesOrError)
- report_error(Filename, DynamicEntriesOrError.takeError());
+ int64_t Addend = 0;
+ // If there is no Symbol associated with the relocation, we set the undef
+ // boolean value to 'true'. This will prevent us from calling functions that
+ // requires the relocation to be associated with a symbol.
+ //
+ // In SHT_REL case we would need to read the addend from section data.
+ // GNU objdump does not do that and we just follow for simplicity atm.
+ bool Undef = false;
+ if ((*SecOrErr)->sh_type == ELF::SHT_RELA) {
+ const typename ELFT::Rela *ERela = Obj->getRela(Rel);
+ Addend = ERela->r_addend;
+ Undef = ERela->getSymbol(false) == 0;
+ } else if ((*SecOrErr)->sh_type != ELF::SHT_REL) {
+ return make_error<BinaryError>();
+ }
+
+ // Default scheme is to print Target, as well as "+ <addend>" for nonzero
+ // addend. Should be acceptable for all normal purposes.
+ std::string FmtBuf;
+ raw_string_ostream Fmt(FmtBuf);
+
+ if (!Undef) {
+ symbol_iterator SI = RelRef.getSymbol();
+ const typename ELFT::Sym *Sym = Obj->getSymbol(SI->getRawDataRefImpl());
+ if (Sym->getType() == ELF::STT_SECTION) {
+ Expected<section_iterator> SymSI = SI->getSection();
+ if (!SymSI)
+ return SymSI.takeError();
+ const typename ELFT::Shdr *SymSec =
+ Obj->getSection((*SymSI)->getRawDataRefImpl());
+ auto SecName = EF.getSectionName(SymSec);
+ if (!SecName)
+ return SecName.takeError();
+ Fmt << *SecName;
+ } else {
+ Expected<StringRef> SymName = SI->getName();
+ if (!SymName)
+ return SymName.takeError();
+ if (Demangle)
+ Fmt << demangle(*SymName);
+ else
+ Fmt << *SymName;
+ }
+ } else {
+ Fmt << "*ABS*";
+ }
+
+ if (Addend != 0)
+ Fmt << (Addend < 0 ? "" : "+") << Addend;
+ Fmt.flush();
+ Result.append(FmtBuf.begin(), FmtBuf.end());
+ return Error::success();
+}
+Error getELFRelocationValueString(const ELFObjectFileBase *Obj,
+ const RelocationRef &Rel,
+ SmallVectorImpl<char> &Result) {
+ if (auto *ELF32LE = dyn_cast<ELF32LEObjectFile>(Obj))
+ return getRelocationValueString(ELF32LE, Rel, Result);
+ if (auto *ELF64LE = dyn_cast<ELF64LEObjectFile>(Obj))
+ return getRelocationValueString(ELF64LE, Rel, Result);
+ if (auto *ELF32BE = dyn_cast<ELF32BEObjectFile>(Obj))
+ return getRelocationValueString(ELF32BE, Rel, Result);
+ auto *ELF64BE = cast<ELF64BEObjectFile>(Obj);
+ return getRelocationValueString(ELF64BE, Rel, Result);
+}
+
+template <class ELFT>
+static uint64_t getSectionLMA(const ELFFile<ELFT> *Obj,
+ const object::ELFSectionRef &Sec) {
+ auto PhdrRangeOrErr = Obj->program_headers();
+ if (!PhdrRangeOrErr)
+ report_fatal_error(toString(PhdrRangeOrErr.takeError()));
+
+ // Search for a PT_LOAD segment containing the requested section. Use this
+ // segment's p_addr to calculate the section's LMA.
+ for (const typename ELFT::Phdr &Phdr : *PhdrRangeOrErr)
+ if ((Phdr.p_type == ELF::PT_LOAD) && (Phdr.p_vaddr <= Sec.getAddress()) &&
+ (Phdr.p_vaddr + Phdr.p_memsz > Sec.getAddress()))
+ return Sec.getAddress() - Phdr.p_vaddr + Phdr.p_paddr;
+
+ // Return section's VMA if it isn't in a PT_LOAD segment.
+ return Sec.getAddress();
+}
+
+uint64_t getELFSectionLMA(const object::ELFSectionRef &Sec) {
+ if (const auto *ELFObj = dyn_cast<ELF32LEObjectFile>(Sec.getObject()))
+ return getSectionLMA(ELFObj->getELFFile(), Sec);
+ else if (const auto *ELFObj = dyn_cast<ELF32BEObjectFile>(Sec.getObject()))
+ return getSectionLMA(ELFObj->getELFFile(), Sec);
+ else if (const auto *ELFObj = dyn_cast<ELF64LEObjectFile>(Sec.getObject()))
+ return getSectionLMA(ELFObj->getELFFile(), Sec);
+ const auto *ELFObj = cast<ELF64BEObjectFile>(Sec.getObject());
+ return getSectionLMA(ELFObj->getELFFile(), Sec);
+}
+
+template <class ELFT>
+void printDynamicSection(const ELFFile<ELFT> *Elf, StringRef Filename) {
+ ArrayRef<typename ELFT::Dyn> DynamicEntries =
+ unwrapOrError(Elf->dynamicEntries(), Filename);
outs() << "Dynamic Section:\n";
- for (const auto &Dyn : *DynamicEntriesOrError) {
+ for (const typename ELFT::Dyn &Dyn : DynamicEntries) {
if (Dyn.d_tag == ELF::DT_NULL)
continue;
- StringRef Str = StringRef(Elf->getDynamicTagAsString(Dyn.d_tag));
-
- if (Str.empty()) {
- std::string HexStr = utohexstr(static_cast<uint64_t>(Dyn.d_tag), true);
- outs() << format(" 0x%-19s", HexStr.c_str());
- } else {
- // We use "-21" in order to match GNU objdump's output.
- outs() << format(" %-21s", Str.data());
- }
+ std::string Str = Elf->getDynamicTagAsString(Dyn.d_tag);
+ outs() << format(" %-21s", Str.c_str());
const char *Fmt =
ELFT::Is64Bits ? "0x%016" PRIx64 "\n" : "0x%08" PRIx64 "\n";
- if (Dyn.d_tag == ELF::DT_NEEDED) {
+ if (Dyn.d_tag == ELF::DT_NEEDED || Dyn.d_tag == ELF::DT_RPATH ||
+ Dyn.d_tag == ELF::DT_RUNPATH || Dyn.d_tag == ELF::DT_SONAME ||
+ Dyn.d_tag == ELF::DT_AUXILIARY || Dyn.d_tag == ELF::DT_FILTER) {
Expected<StringRef> StrTabOrErr = getDynamicStrTab(Elf);
if (StrTabOrErr) {
const char *Data = StrTabOrErr.get().data();
outs() << (Data + Dyn.d_un.d_val) << "\n";
continue;
}
- warn(errorToErrorCode(StrTabOrErr.takeError()).message());
+ warn(toString(StrTabOrErr.takeError()));
consumeError(StrTabOrErr.takeError());
}
outs() << format(Fmt, (uint64_t)Dyn.d_un.d_val);
@@ -93,13 +186,11 @@ void printDynamicSection(const ELFFile<ELFT> *Elf, StringRef Filename) {
}
template <class ELFT> void printProgramHeaders(const ELFFile<ELFT> *o) {
- typedef ELFFile<ELFT> ELFO;
outs() << "Program Header:\n";
auto ProgramHeaderOrError = o->program_headers();
if (!ProgramHeaderOrError)
- report_fatal_error(
- errorToErrorCode(ProgramHeaderOrError.takeError()).message());
- for (const typename ELFO::Elf_Phdr &Phdr : *ProgramHeaderOrError) {
+ report_fatal_error(toString(ProgramHeaderOrError.takeError()));
+ for (const typename ELFT::Phdr &Phdr : *ProgramHeaderOrError) {
switch (Phdr.p_type) {
case ELF::PT_DYNAMIC:
outs() << " DYNAMIC ";
@@ -157,7 +248,86 @@ template <class ELFT> void printProgramHeaders(const ELFFile<ELFT> *o) {
outs() << "\n";
}
-void llvm::printELFFileHeader(const object::ObjectFile *Obj) {
+template <class ELFT>
+void printSymbolVersionDependency(ArrayRef<uint8_t> Contents,
+ StringRef StrTab) {
+ outs() << "Version References:\n";
+
+ const uint8_t *Buf = Contents.data();
+ while (Buf) {
+ auto *Verneed = reinterpret_cast<const typename ELFT::Verneed *>(Buf);
+ outs() << " required from "
+ << StringRef(StrTab.drop_front(Verneed->vn_file).data()) << ":\n";
+
+ const uint8_t *BufAux = Buf + Verneed->vn_aux;
+ while (BufAux) {
+ auto *Vernaux = reinterpret_cast<const typename ELFT::Vernaux *>(BufAux);
+ outs() << " "
+ << format("0x%08" PRIx32 " ", (uint32_t)Vernaux->vna_hash)
+ << format("0x%02" PRIx16 " ", (uint16_t)Vernaux->vna_flags)
+ << format("%02" PRIu16 " ", (uint16_t)Vernaux->vna_other)
+ << StringRef(StrTab.drop_front(Vernaux->vna_name).data()) << '\n';
+ BufAux = Vernaux->vna_next ? BufAux + Vernaux->vna_next : nullptr;
+ }
+ Buf = Verneed->vn_next ? Buf + Verneed->vn_next : nullptr;
+ }
+}
+
+template <class ELFT>
+void printSymbolVersionDefinition(const typename ELFT::Shdr &Shdr,
+ ArrayRef<uint8_t> Contents,
+ StringRef StrTab) {
+ outs() << "Version definitions:\n";
+
+ const uint8_t *Buf = Contents.data();
+ uint32_t VerdefIndex = 1;
+ // sh_info contains the number of entries in the SHT_GNU_verdef section. To
+ // make the index column have consistent width, we should insert blank spaces
+ // according to sh_info.
+ uint16_t VerdefIndexWidth = std::to_string(Shdr.sh_info).size();
+ while (Buf) {
+ auto *Verdef = reinterpret_cast<const typename ELFT::Verdef *>(Buf);
+ outs() << format_decimal(VerdefIndex++, VerdefIndexWidth) << " "
+ << format("0x%02" PRIx16 " ", (uint16_t)Verdef->vd_flags)
+ << format("0x%08" PRIx32 " ", (uint32_t)Verdef->vd_hash);
+
+ const uint8_t *BufAux = Buf + Verdef->vd_aux;
+ uint16_t VerdauxIndex = 0;
+ while (BufAux) {
+ auto *Verdaux = reinterpret_cast<const typename ELFT::Verdaux *>(BufAux);
+ if (VerdauxIndex)
+ outs() << std::string(VerdefIndexWidth + 17, ' ');
+ outs() << StringRef(StrTab.drop_front(Verdaux->vda_name).data()) << '\n';
+ BufAux = Verdaux->vda_next ? BufAux + Verdaux->vda_next : nullptr;
+ ++VerdauxIndex;
+ }
+ Buf = Verdef->vd_next ? Buf + Verdef->vd_next : nullptr;
+ }
+}
+
+template <class ELFT>
+void printSymbolVersionInfo(const ELFFile<ELFT> *Elf, StringRef FileName) {
+ ArrayRef<typename ELFT::Shdr> Sections =
+ unwrapOrError(Elf->sections(), FileName);
+ for (const typename ELFT::Shdr &Shdr : Sections) {
+ if (Shdr.sh_type != ELF::SHT_GNU_verneed &&
+ Shdr.sh_type != ELF::SHT_GNU_verdef)
+ continue;
+
+ ArrayRef<uint8_t> Contents =
+ unwrapOrError(Elf->getSectionContents(&Shdr), FileName);
+ const typename ELFT::Shdr *StrTabSec =
+ unwrapOrError(Elf->getSection(Shdr.sh_link), FileName);
+ StringRef StrTab = unwrapOrError(Elf->getStringTable(StrTabSec), FileName);
+
+ if (Shdr.sh_type == ELF::SHT_GNU_verneed)
+ printSymbolVersionDependency<ELFT>(Contents, StrTab);
+ else
+ printSymbolVersionDefinition<ELFT>(Shdr, Contents, StrTab);
+ }
+}
+
+void printELFFileHeader(const object::ObjectFile *Obj) {
if (const auto *ELFObj = dyn_cast<ELF32LEObjectFile>(Obj))
printProgramHeaders(ELFObj->getELFFile());
else if (const auto *ELFObj = dyn_cast<ELF32BEObjectFile>(Obj))
@@ -168,7 +338,7 @@ void llvm::printELFFileHeader(const object::ObjectFile *Obj) {
printProgramHeaders(ELFObj->getELFFile());
}
-void llvm::printELFDynamicSection(const object::ObjectFile *Obj) {
+void printELFDynamicSection(const object::ObjectFile *Obj) {
if (const auto *ELFObj = dyn_cast<ELF32LEObjectFile>(Obj))
printDynamicSection(ELFObj->getELFFile(), Obj->getFileName());
else if (const auto *ELFObj = dyn_cast<ELF32BEObjectFile>(Obj))
@@ -178,3 +348,15 @@ void llvm::printELFDynamicSection(const object::ObjectFile *Obj) {
else if (const auto *ELFObj = dyn_cast<ELF64BEObjectFile>(Obj))
printDynamicSection(ELFObj->getELFFile(), Obj->getFileName());
}
+
+void printELFSymbolVersionInfo(const object::ObjectFile *Obj) {
+ if (const auto *ELFObj = dyn_cast<ELF32LEObjectFile>(Obj))
+ printSymbolVersionInfo(ELFObj->getELFFile(), Obj->getFileName());
+ else if (const auto *ELFObj = dyn_cast<ELF32BEObjectFile>(Obj))
+ printSymbolVersionInfo(ELFObj->getELFFile(), Obj->getFileName());
+ else if (const auto *ELFObj = dyn_cast<ELF64LEObjectFile>(Obj))
+ printSymbolVersionInfo(ELFObj->getELFFile(), Obj->getFileName());
+ else if (const auto *ELFObj = dyn_cast<ELF64BEObjectFile>(Obj))
+ printSymbolVersionInfo(ELFObj->getELFFile(), Obj->getFileName());
+}
+} // namespace llvm
diff --git a/tools/llvm-objdump/MachODump.cpp b/tools/llvm-objdump/MachODump.cpp
index 5ef7058ec9da..58ff7be4543c 100644
--- a/tools/llvm-objdump/MachODump.cpp
+++ b/tools/llvm-objdump/MachODump.cpp
@@ -1,9 +1,8 @@
//===-- MachODump.cpp - Object file dumping utility for llvm --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -56,83 +55,140 @@ extern "C" {
}
#endif
-using namespace llvm;
-using namespace object;
+using namespace llvm::object;
+
+namespace llvm {
+
+cl::OptionCategory MachOCat("llvm-objdump MachO Specific Options");
+
+extern cl::opt<bool> ArchiveHeaders;
+extern cl::opt<bool> Disassemble;
+extern cl::opt<bool> DisassembleAll;
+extern cl::opt<DIDumpType> DwarfDumpType;
+extern cl::list<std::string> FilterSections;
+extern cl::list<std::string> MAttrs;
+extern cl::opt<std::string> MCPU;
+extern cl::opt<bool> NoShowRawInsn;
+extern cl::opt<bool> NoLeadingAddr;
+extern cl::opt<bool> PrintImmHex;
+extern cl::opt<bool> PrivateHeaders;
+extern cl::opt<bool> Relocations;
+extern cl::opt<bool> SectionHeaders;
+extern cl::opt<bool> SectionContents;
+extern cl::opt<bool> SymbolTable;
+extern cl::opt<std::string> TripleName;
+extern cl::opt<bool> UnwindInfo;
+
+cl::opt<bool>
+ FirstPrivateHeader("private-header",
+ cl::desc("Display only the first format specific file "
+ "header"),
+ cl::cat(MachOCat));
+
+cl::opt<bool> ExportsTrie("exports-trie",
+ cl::desc("Display mach-o exported symbols"),
+ cl::cat(MachOCat));
+
+cl::opt<bool> Rebase("rebase", cl::desc("Display mach-o rebasing info"),
+ cl::cat(MachOCat));
+
+cl::opt<bool> Bind("bind", cl::desc("Display mach-o binding info"),
+ cl::cat(MachOCat));
+
+cl::opt<bool> LazyBind("lazy-bind",
+ cl::desc("Display mach-o lazy binding info"),
+ cl::cat(MachOCat));
+
+cl::opt<bool> WeakBind("weak-bind",
+ cl::desc("Display mach-o weak binding info"),
+ cl::cat(MachOCat));
static cl::opt<bool>
- UseDbg("g",
- cl::desc("Print line information from debug info if available"));
+ UseDbg("g", cl::Grouping,
+ cl::desc("Print line information from debug info if available"),
+ cl::cat(MachOCat));
static cl::opt<std::string> DSYMFile("dsym",
- cl::desc("Use .dSYM file for debug info"));
+ cl::desc("Use .dSYM file for debug info"),
+ cl::cat(MachOCat));
static cl::opt<bool> FullLeadingAddr("full-leading-addr",
- cl::desc("Print full leading address"));
+ cl::desc("Print full leading address"),
+ cl::cat(MachOCat));
static cl::opt<bool> NoLeadingHeaders("no-leading-headers",
- cl::desc("Print no leading headers"));
+ cl::desc("Print no leading headers"),
+ cl::cat(MachOCat));
-cl::opt<bool> llvm::UniversalHeaders("universal-headers",
- cl::desc("Print Mach-O universal headers "
- "(requires -macho)"));
+cl::opt<bool> UniversalHeaders("universal-headers",
+ cl::desc("Print Mach-O universal headers "
+ "(requires -macho)"),
+ cl::cat(MachOCat));
cl::opt<bool>
ArchiveMemberOffsets("archive-member-offsets",
cl::desc("Print the offset to each archive member for "
"Mach-O archives (requires -macho and "
- "-archive-headers)"));
-
-cl::opt<bool>
- llvm::IndirectSymbols("indirect-symbols",
- cl::desc("Print indirect symbol table for Mach-O "
- "objects (requires -macho)"));
+ "-archive-headers)"),
+ cl::cat(MachOCat));
-cl::opt<bool>
- llvm::DataInCode("data-in-code",
- cl::desc("Print the data in code table for Mach-O objects "
- "(requires -macho)"));
-
-cl::opt<bool>
- llvm::LinkOptHints("link-opt-hints",
- cl::desc("Print the linker optimization hints for "
- "Mach-O objects (requires -macho)"));
-
-cl::opt<bool>
- llvm::InfoPlist("info-plist",
- cl::desc("Print the info plist section as strings for "
- "Mach-O objects (requires -macho)"));
+cl::opt<bool> IndirectSymbols("indirect-symbols",
+ cl::desc("Print indirect symbol table for Mach-O "
+ "objects (requires -macho)"),
+ cl::cat(MachOCat));
cl::opt<bool>
- llvm::DylibsUsed("dylibs-used",
- cl::desc("Print the shared libraries used for linked "
- "Mach-O files (requires -macho)"));
+ DataInCode("data-in-code",
+ cl::desc("Print the data in code table for Mach-O objects "
+ "(requires -macho)"),
+ cl::cat(MachOCat));
+
+cl::opt<bool> LinkOptHints("link-opt-hints",
+ cl::desc("Print the linker optimization hints for "
+ "Mach-O objects (requires -macho)"),
+ cl::cat(MachOCat));
+
+cl::opt<bool> InfoPlist("info-plist",
+ cl::desc("Print the info plist section as strings for "
+ "Mach-O objects (requires -macho)"),
+ cl::cat(MachOCat));
+
+cl::opt<bool> DylibsUsed("dylibs-used",
+ cl::desc("Print the shared libraries used for linked "
+ "Mach-O files (requires -macho)"),
+ cl::cat(MachOCat));
cl::opt<bool>
- llvm::DylibId("dylib-id",
- cl::desc("Print the shared library's id for the dylib Mach-O "
- "file (requires -macho)"));
+ DylibId("dylib-id",
+ cl::desc("Print the shared library's id for the dylib Mach-O "
+ "file (requires -macho)"),
+ cl::cat(MachOCat));
cl::opt<bool>
- llvm::NonVerbose("non-verbose",
- cl::desc("Print the info for Mach-O objects in "
- "non-verbose or numeric form (requires -macho)"));
+ NonVerbose("non-verbose",
+ cl::desc("Print the info for Mach-O objects in "
+ "non-verbose or numeric form (requires -macho)"),
+ cl::cat(MachOCat));
cl::opt<bool>
- llvm::ObjcMetaData("objc-meta-data",
- cl::desc("Print the Objective-C runtime meta data for "
- "Mach-O files (requires -macho)"));
+ ObjcMetaData("objc-meta-data",
+ cl::desc("Print the Objective-C runtime meta data for "
+ "Mach-O files (requires -macho)"),
+ cl::cat(MachOCat));
-cl::opt<std::string> llvm::DisSymName(
+cl::opt<std::string> DisSymName(
"dis-symname",
- cl::desc("disassemble just this symbol's instructions (requires -macho)"));
+ cl::desc("disassemble just this symbol's instructions (requires -macho)"),
+ cl::cat(MachOCat));
static cl::opt<bool> NoSymbolicOperands(
"no-symbolic-operands",
- cl::desc("do not symbolic operands when disassembling (requires -macho)"));
+ cl::desc("do not symbolic operands when disassembling (requires -macho)"),
+ cl::cat(MachOCat));
static cl::list<std::string>
ArchFlags("arch", cl::desc("architecture(s) from a Mach-O file to dump"),
- cl::ZeroOrMore);
+ cl::ZeroOrMore, cl::cat(MachOCat));
bool ArchAll = false;
@@ -142,7 +198,7 @@ static const Target *GetTarget(const MachOObjectFile *MachOObj,
const char **McpuDefault,
const Target **ThumbTarget) {
// Figure out the target triple.
- llvm::Triple TT(TripleName);
+ Triple TT(TripleName);
if (TripleName.empty()) {
TT = MachOObj->getArchTriple(McpuDefault);
TripleName = TT.str();
@@ -151,7 +207,7 @@ static const Target *GetTarget(const MachOObjectFile *MachOObj,
if (TT.getArch() == Triple::arm) {
// We've inferred a 32-bit ARM target from the object file. All MachO CPUs
// that support ARM are also capable of Thumb mode.
- llvm::Triple ThumbTriple = TT;
+ Triple ThumbTriple = TT;
std::string ThumbName = (Twine("thumb") + TT.getArchName().substr(3)).str();
ThumbTriple.setArchName(ThumbName);
ThumbTripleName = ThumbTriple.str();
@@ -180,11 +236,11 @@ struct SymbolSorter {
bool operator()(const SymbolRef &A, const SymbolRef &B) {
Expected<SymbolRef::Type> ATypeOrErr = A.getType();
if (!ATypeOrErr)
- report_error(A.getObject()->getFileName(), ATypeOrErr.takeError());
+ report_error(ATypeOrErr.takeError(), A.getObject()->getFileName());
SymbolRef::Type AType = *ATypeOrErr;
Expected<SymbolRef::Type> BTypeOrErr = B.getType();
if (!BTypeOrErr)
- report_error(B.getObject()->getFileName(), BTypeOrErr.takeError());
+ report_error(BTypeOrErr.takeError(), B.getObject()->getFileName());
SymbolRef::Type BType = *BTypeOrErr;
uint64_t AAddr = (AType != SymbolRef::ST_Function) ? 0 : A.getValue();
uint64_t BAddr = (BType != SymbolRef::ST_Function) ? 0 : B.getValue();
@@ -308,11 +364,10 @@ static void getSectionsAndSymbols(MachOObjectFile *MachOObj,
std::vector<SymbolRef> &Symbols,
SmallVectorImpl<uint64_t> &FoundFns,
uint64_t &BaseSegmentAddress) {
+ const StringRef FileName = MachOObj->getFileName();
for (const SymbolRef &Symbol : MachOObj->symbols()) {
- Expected<StringRef> SymName = Symbol.getName();
- if (!SymName)
- report_error(MachOObj->getFileName(), SymName.takeError());
- if (!SymName->startswith("ltmp"))
+ StringRef SymName = unwrapOrError(Symbol.getName(), FileName);
+ if (!SymName.startswith("ltmp"))
Symbols.push_back(Symbol);
}
@@ -342,6 +397,254 @@ static void getSectionsAndSymbols(MachOObjectFile *MachOObj,
}
}
+static void printRelocationTargetName(const MachOObjectFile *O,
+ const MachO::any_relocation_info &RE,
+ raw_string_ostream &Fmt) {
+ // Target of a scattered relocation is an address. In the interest of
+ // generating pretty output, scan through the symbol table looking for a
+ // symbol that aligns with that address. If we find one, print it.
+ // Otherwise, we just print the hex address of the target.
+ const StringRef FileName = O->getFileName();
+ if (O->isRelocationScattered(RE)) {
+ uint32_t Val = O->getPlainRelocationSymbolNum(RE);
+
+ for (const SymbolRef &Symbol : O->symbols()) {
+ uint64_t Addr = unwrapOrError(Symbol.getAddress(), FileName);
+ if (Addr != Val)
+ continue;
+ Fmt << unwrapOrError(Symbol.getName(), FileName);
+ return;
+ }
+
+ // If we couldn't find a symbol that this relocation refers to, try
+ // to find a section beginning instead.
+ for (const SectionRef &Section : ToolSectionFilter(*O)) {
+ StringRef Name;
+ uint64_t Addr = Section.getAddress();
+ if (Addr != Val)
+ continue;
+ if (std::error_code EC = Section.getName(Name))
+ report_error(errorCodeToError(EC), O->getFileName());
+ Fmt << Name;
+ return;
+ }
+
+ Fmt << format("0x%x", Val);
+ return;
+ }
+
+ StringRef S;
+ bool isExtern = O->getPlainRelocationExternal(RE);
+ uint64_t Val = O->getPlainRelocationSymbolNum(RE);
+
+ if (O->getAnyRelocationType(RE) == MachO::ARM64_RELOC_ADDEND) {
+ Fmt << format("0x%0" PRIx64, Val);
+ return;
+ }
+
+ if (isExtern) {
+ symbol_iterator SI = O->symbol_begin();
+ advance(SI, Val);
+ S = unwrapOrError(SI->getName(), FileName);
+ } else {
+ section_iterator SI = O->section_begin();
+ // Adjust for the fact that sections are 1-indexed.
+ if (Val == 0) {
+ Fmt << "0 (?,?)";
+ return;
+ }
+ uint32_t I = Val - 1;
+ while (I != 0 && SI != O->section_end()) {
+ --I;
+ advance(SI, 1);
+ }
+ if (SI == O->section_end())
+ Fmt << Val << " (?,?)";
+ else
+ SI->getName(S);
+ }
+
+ Fmt << S;
+}
+
+Error getMachORelocationValueString(const MachOObjectFile *Obj,
+ const RelocationRef &RelRef,
+ SmallVectorImpl<char> &Result) {
+ DataRefImpl Rel = RelRef.getRawDataRefImpl();
+ MachO::any_relocation_info RE = Obj->getRelocation(Rel);
+
+ unsigned Arch = Obj->getArch();
+
+ std::string FmtBuf;
+ raw_string_ostream Fmt(FmtBuf);
+ unsigned Type = Obj->getAnyRelocationType(RE);
+ bool IsPCRel = Obj->getAnyRelocationPCRel(RE);
+
+ // Determine any addends that should be displayed with the relocation.
+ // These require decoding the relocation type, which is triple-specific.
+
+ // X86_64 has entirely custom relocation types.
+ if (Arch == Triple::x86_64) {
+ switch (Type) {
+ case MachO::X86_64_RELOC_GOT_LOAD:
+ case MachO::X86_64_RELOC_GOT: {
+ printRelocationTargetName(Obj, RE, Fmt);
+ Fmt << "@GOT";
+ if (IsPCRel)
+ Fmt << "PCREL";
+ break;
+ }
+ case MachO::X86_64_RELOC_SUBTRACTOR: {
+ DataRefImpl RelNext = Rel;
+ Obj->moveRelocationNext(RelNext);
+ MachO::any_relocation_info RENext = Obj->getRelocation(RelNext);
+
+ // X86_64_RELOC_SUBTRACTOR must be followed by a relocation of type
+ // X86_64_RELOC_UNSIGNED.
+ // NOTE: Scattered relocations don't exist on x86_64.
+ unsigned RType = Obj->getAnyRelocationType(RENext);
+ if (RType != MachO::X86_64_RELOC_UNSIGNED)
+ report_error(Obj->getFileName(), "Expected X86_64_RELOC_UNSIGNED after "
+ "X86_64_RELOC_SUBTRACTOR.");
+
+ // The X86_64_RELOC_UNSIGNED contains the minuend symbol;
+ // X86_64_RELOC_SUBTRACTOR contains the subtrahend.
+ printRelocationTargetName(Obj, RENext, Fmt);
+ Fmt << "-";
+ printRelocationTargetName(Obj, RE, Fmt);
+ break;
+ }
+ case MachO::X86_64_RELOC_TLV:
+ printRelocationTargetName(Obj, RE, Fmt);
+ Fmt << "@TLV";
+ if (IsPCRel)
+ Fmt << "P";
+ break;
+ case MachO::X86_64_RELOC_SIGNED_1:
+ printRelocationTargetName(Obj, RE, Fmt);
+ Fmt << "-1";
+ break;
+ case MachO::X86_64_RELOC_SIGNED_2:
+ printRelocationTargetName(Obj, RE, Fmt);
+ Fmt << "-2";
+ break;
+ case MachO::X86_64_RELOC_SIGNED_4:
+ printRelocationTargetName(Obj, RE, Fmt);
+ Fmt << "-4";
+ break;
+ default:
+ printRelocationTargetName(Obj, RE, Fmt);
+ break;
+ }
+ // X86 and ARM share some relocation types in common.
+ } else if (Arch == Triple::x86 || Arch == Triple::arm ||
+ Arch == Triple::ppc) {
+ // Generic relocation types...
+ switch (Type) {
+ case MachO::GENERIC_RELOC_PAIR: // prints no info
+ return Error::success();
+ case MachO::GENERIC_RELOC_SECTDIFF: {
+ DataRefImpl RelNext = Rel;
+ Obj->moveRelocationNext(RelNext);
+ MachO::any_relocation_info RENext = Obj->getRelocation(RelNext);
+
+ // X86 sect diff's must be followed by a relocation of type
+ // GENERIC_RELOC_PAIR.
+ unsigned RType = Obj->getAnyRelocationType(RENext);
+
+ if (RType != MachO::GENERIC_RELOC_PAIR)
+ report_error(Obj->getFileName(), "Expected GENERIC_RELOC_PAIR after "
+ "GENERIC_RELOC_SECTDIFF.");
+
+ printRelocationTargetName(Obj, RE, Fmt);
+ Fmt << "-";
+ printRelocationTargetName(Obj, RENext, Fmt);
+ break;
+ }
+ }
+
+ if (Arch == Triple::x86 || Arch == Triple::ppc) {
+ switch (Type) {
+ case MachO::GENERIC_RELOC_LOCAL_SECTDIFF: {
+ DataRefImpl RelNext = Rel;
+ Obj->moveRelocationNext(RelNext);
+ MachO::any_relocation_info RENext = Obj->getRelocation(RelNext);
+
+ // X86 sect diff's must be followed by a relocation of type
+ // GENERIC_RELOC_PAIR.
+ unsigned RType = Obj->getAnyRelocationType(RENext);
+ if (RType != MachO::GENERIC_RELOC_PAIR)
+ report_error(Obj->getFileName(), "Expected GENERIC_RELOC_PAIR after "
+ "GENERIC_RELOC_LOCAL_SECTDIFF.");
+
+ printRelocationTargetName(Obj, RE, Fmt);
+ Fmt << "-";
+ printRelocationTargetName(Obj, RENext, Fmt);
+ break;
+ }
+ case MachO::GENERIC_RELOC_TLV: {
+ printRelocationTargetName(Obj, RE, Fmt);
+ Fmt << "@TLV";
+ if (IsPCRel)
+ Fmt << "P";
+ break;
+ }
+ default:
+ printRelocationTargetName(Obj, RE, Fmt);
+ }
+ } else { // ARM-specific relocations
+ switch (Type) {
+ case MachO::ARM_RELOC_HALF:
+ case MachO::ARM_RELOC_HALF_SECTDIFF: {
+ // Half relocations steal a bit from the length field to encode
+ // whether this is an upper16 or a lower16 relocation.
+ bool isUpper = (Obj->getAnyRelocationLength(RE) & 0x1) == 1;
+
+ if (isUpper)
+ Fmt << ":upper16:(";
+ else
+ Fmt << ":lower16:(";
+ printRelocationTargetName(Obj, RE, Fmt);
+
+ DataRefImpl RelNext = Rel;
+ Obj->moveRelocationNext(RelNext);
+ MachO::any_relocation_info RENext = Obj->getRelocation(RelNext);
+
+ // ARM half relocs must be followed by a relocation of type
+ // ARM_RELOC_PAIR.
+ unsigned RType = Obj->getAnyRelocationType(RENext);
+ if (RType != MachO::ARM_RELOC_PAIR)
+ report_error(Obj->getFileName(), "Expected ARM_RELOC_PAIR after "
+ "ARM_RELOC_HALF");
+
+ // NOTE: The half of the target virtual address is stashed in the
+ // address field of the secondary relocation, but we can't reverse
+ // engineer the constant offset from it without decoding the movw/movt
+ // instruction to find the other half in its immediate field.
+
+ // ARM_RELOC_HALF_SECTDIFF encodes the second section in the
+ // symbol/section pointer of the follow-on relocation.
+ if (Type == MachO::ARM_RELOC_HALF_SECTDIFF) {
+ Fmt << "-";
+ printRelocationTargetName(Obj, RENext, Fmt);
+ }
+
+ Fmt << ")";
+ break;
+ }
+ default: {
+ printRelocationTargetName(Obj, RE, Fmt);
+ }
+ }
+ }
+ } else
+ printRelocationTargetName(Obj, RE, Fmt);
+
+ Fmt.flush();
+ Result.append(FmtBuf.begin(), FmtBuf.end());
+ return Error::success();
+}
+
static void PrintIndirectSymbolTable(MachOObjectFile *O, bool verbose,
uint32_t n, uint32_t count,
uint32_t stride, uint64_t addr) {
@@ -389,10 +692,7 @@ static void PrintIndirectSymbolTable(MachOObjectFile *O, bool verbose,
if (indirect_symbol < Symtab.nsyms) {
symbol_iterator Sym = O->getSymbolByIndex(indirect_symbol);
SymbolRef Symbol = *Sym;
- Expected<StringRef> SymName = Symbol.getName();
- if (!SymName)
- report_error(O->getFileName(), SymName.takeError());
- outs() << *SymName;
+ outs() << unwrapOrError(Symbol.getName(), O->getFileName());
} else {
outs() << "?";
}
@@ -500,6 +800,7 @@ static void PrintRType(const uint64_t cputype, const unsigned r_type) {
outs() << arm_r_types[r_type];
break;
case MachO::CPU_TYPE_ARM64:
+ case MachO::CPU_TYPE_ARM64_32:
outs() << arm64_r_types[r_type];
break;
default:
@@ -510,9 +811,8 @@ static void PrintRType(const uint64_t cputype, const unsigned r_type) {
static void PrintRLength(const uint64_t cputype, const unsigned r_type,
const unsigned r_length, const bool previous_arm_half){
if (cputype == MachO::CPU_TYPE_ARM &&
- (r_type == llvm::MachO::ARM_RELOC_HALF ||
- r_type == llvm::MachO::ARM_RELOC_HALF_SECTDIFF ||
- previous_arm_half == true)) {
+ (r_type == MachO::ARM_RELOC_HALF ||
+ r_type == MachO::ARM_RELOC_HALF_SECTDIFF || previous_arm_half == true)) {
if ((r_length & 0x1) == 0)
outs() << "lo/";
else
@@ -573,9 +873,8 @@ static void PrintRelocationEntries(const MachOObjectFile *O,
if (verbose) {
// scattered: address
if ((cputype == MachO::CPU_TYPE_I386 &&
- r_type == llvm::MachO::GENERIC_RELOC_PAIR) ||
- (cputype == MachO::CPU_TYPE_ARM &&
- r_type == llvm::MachO::ARM_RELOC_PAIR))
+ r_type == MachO::GENERIC_RELOC_PAIR) ||
+ (cputype == MachO::CPU_TYPE_ARM && r_type == MachO::ARM_RELOC_PAIR))
outs() << " ";
else
outs() << format("%08x ", (unsigned int)r_address);
@@ -597,29 +896,27 @@ static void PrintRelocationEntries(const MachOObjectFile *O,
outs() << format("True 0x%08x", (unsigned int)r_value);
if (previous_sectdiff == false) {
if ((cputype == MachO::CPU_TYPE_ARM &&
- r_type == llvm::MachO::ARM_RELOC_PAIR))
+ r_type == MachO::ARM_RELOC_PAIR))
outs() << format(" half = 0x%04x ", (unsigned int)r_address);
- }
- else if (cputype == MachO::CPU_TYPE_ARM &&
- sectdiff_r_type == llvm::MachO::ARM_RELOC_HALF_SECTDIFF)
+ } else if (cputype == MachO::CPU_TYPE_ARM &&
+ sectdiff_r_type == MachO::ARM_RELOC_HALF_SECTDIFF)
outs() << format(" other_half = 0x%04x ", (unsigned int)r_address);
if ((cputype == MachO::CPU_TYPE_I386 &&
- (r_type == llvm::MachO::GENERIC_RELOC_SECTDIFF ||
- r_type == llvm::MachO::GENERIC_RELOC_LOCAL_SECTDIFF)) ||
+ (r_type == MachO::GENERIC_RELOC_SECTDIFF ||
+ r_type == MachO::GENERIC_RELOC_LOCAL_SECTDIFF)) ||
(cputype == MachO::CPU_TYPE_ARM &&
- (sectdiff_r_type == llvm::MachO::ARM_RELOC_SECTDIFF ||
- sectdiff_r_type == llvm::MachO::ARM_RELOC_LOCAL_SECTDIFF ||
- sectdiff_r_type == llvm::MachO::ARM_RELOC_HALF_SECTDIFF))) {
- previous_sectdiff = true;
- sectdiff_r_type = r_type;
- }
- else {
+ (sectdiff_r_type == MachO::ARM_RELOC_SECTDIFF ||
+ sectdiff_r_type == MachO::ARM_RELOC_LOCAL_SECTDIFF ||
+ sectdiff_r_type == MachO::ARM_RELOC_HALF_SECTDIFF))) {
+ previous_sectdiff = true;
+ sectdiff_r_type = r_type;
+ } else {
previous_sectdiff = false;
sectdiff_r_type = 0;
}
if (cputype == MachO::CPU_TYPE_ARM &&
- (r_type == llvm::MachO::ARM_RELOC_HALF ||
- r_type == llvm::MachO::ARM_RELOC_HALF_SECTDIFF))
+ (r_type == MachO::ARM_RELOC_HALF ||
+ r_type == MachO::ARM_RELOC_HALF_SECTDIFF))
previous_arm_half = true;
else
previous_arm_half = false;
@@ -635,8 +932,7 @@ static void PrintRelocationEntries(const MachOObjectFile *O,
else {
if (verbose) {
// plain: address
- if (cputype == MachO::CPU_TYPE_ARM &&
- r_type == llvm::MachO::ARM_RELOC_PAIR)
+ if (cputype == MachO::CPU_TYPE_ARM && r_type == MachO::ARM_RELOC_PAIR)
outs() << " ";
else
outs() << format("%08x ", (unsigned int)r_address);
@@ -678,28 +974,27 @@ static void PrintRelocationEntries(const MachOObjectFile *O,
outs() << "False ";
// plain: symbolnum/value
- if (cputype == MachO::CPU_TYPE_ARM &&
- r_type == llvm::MachO::ARM_RELOC_PAIR)
+ if (cputype == MachO::CPU_TYPE_ARM && r_type == MachO::ARM_RELOC_PAIR)
outs() << format("other_half = 0x%04x\n", (unsigned int)r_address);
- else if (cputype == MachO::CPU_TYPE_ARM64 &&
- r_type == llvm::MachO::ARM64_RELOC_ADDEND)
+ else if ((cputype == MachO::CPU_TYPE_ARM64 ||
+ cputype == MachO::CPU_TYPE_ARM64_32) &&
+ r_type == MachO::ARM64_RELOC_ADDEND)
outs() << format("addend = 0x%06x\n", (unsigned int)r_symbolnum);
else {
outs() << format("%d ", r_symbolnum);
- if (r_symbolnum == llvm::MachO::R_ABS)
+ if (r_symbolnum == MachO::R_ABS)
outs() << "R_ABS\n";
else {
// in this case, r_symbolnum is actually a 1-based section number
uint32_t nsects = O->section_end()->getRawDataRefImpl().d.a;
if (r_symbolnum > 0 && r_symbolnum <= nsects) {
- llvm::object::DataRefImpl DRI;
+ object::DataRefImpl DRI;
DRI.d.a = r_symbolnum-1;
StringRef SegName = O->getSectionFinalSegmentName(DRI);
- StringRef SectName;
- if (O->getSectionName(DRI, SectName))
- outs() << "(?,?)\n";
+ if (Expected<StringRef> NameOrErr = O->getSectionName(DRI))
+ outs() << "(" << SegName << "," << *NameOrErr << ")\n";
else
- outs() << "(" << SegName << "," << SectName << ")\n";
+ outs() << "(?,?)\n";
}
else {
outs() << "(?,?)\n";
@@ -708,8 +1003,8 @@ static void PrintRelocationEntries(const MachOObjectFile *O,
}
}
if (cputype == MachO::CPU_TYPE_ARM &&
- (r_type == llvm::MachO::ARM_RELOC_HALF ||
- r_type == llvm::MachO::ARM_RELOC_HALF_SECTDIFF))
+ (r_type == MachO::ARM_RELOC_HALF ||
+ r_type == MachO::ARM_RELOC_HALF_SECTDIFF))
previous_arm_half = true;
else
previous_arm_half = false;
@@ -752,13 +1047,12 @@ static void PrintRelocations(const MachOObjectFile *O, const bool verbose) {
DataRefImpl DRI;
DRI.d.a = J;
const StringRef SegName = O->getSectionFinalSegmentName(DRI);
- StringRef SectName;
- if (O->getSectionName(DRI, SectName))
+ if (Expected<StringRef> NameOrErr = O->getSectionName(DRI))
+ outs() << "Relocation information (" << SegName << "," << *NameOrErr
+ << format(") %u entries", Sec.nreloc);
+ else
outs() << "Relocation information (" << SegName << ",?) "
<< format("%u entries", Sec.nreloc);
- else
- outs() << "Relocation information (" << SegName << ","
- << SectName << format(") %u entries", Sec.nreloc);
outs() << "\naddress pcrel length extern type scattered "
"symbolnum/value\n";
PrintRelocationEntries(O, O->section_rel_begin(DRI),
@@ -773,13 +1067,12 @@ static void PrintRelocations(const MachOObjectFile *O, const bool verbose) {
DataRefImpl DRI;
DRI.d.a = J;
const StringRef SegName = O->getSectionFinalSegmentName(DRI);
- StringRef SectName;
- if (O->getSectionName(DRI, SectName))
+ if (Expected<StringRef> NameOrErr = O->getSectionName(DRI))
+ outs() << "Relocation information (" << SegName << "," << *NameOrErr
+ << format(") %u entries", Sec.nreloc);
+ else
outs() << "Relocation information (" << SegName << ",?) "
<< format("%u entries", Sec.nreloc);
- else
- outs() << "Relocation information (" << SegName << ","
- << SectName << format(") %u entries", Sec.nreloc);
outs() << "\naddress pcrel length extern type scattered "
"symbolnum/value\n";
PrintRelocationEntries(O, O->section_rel_begin(DRI),
@@ -913,7 +1206,16 @@ static void PrintDylibs(MachOObjectFile *O, bool JustId) {
outs() << " current version "
<< ((dl.dylib.current_version >> 16) & 0xffff) << "."
<< ((dl.dylib.current_version >> 8) & 0xff) << "."
- << (dl.dylib.current_version & 0xff) << ")\n";
+ << (dl.dylib.current_version & 0xff);
+ if (Load.C.cmd == MachO::LC_LOAD_WEAK_DYLIB)
+ outs() << ", weak";
+ if (Load.C.cmd == MachO::LC_REEXPORT_DYLIB)
+ outs() << ", reexport";
+ if (Load.C.cmd == MachO::LC_LOAD_UPWARD_DYLIB)
+ outs() << ", upward";
+ if (Load.C.cmd == MachO::LC_LAZY_LOAD_DYLIB)
+ outs() << ", lazy";
+ outs() << ")\n";
}
} else {
outs() << "\tBad offset (" << dl.dylib.name << ") for name of ";
@@ -942,18 +1244,13 @@ typedef DenseMap<uint64_t, StringRef> SymbolAddressMap;
static void CreateSymbolAddressMap(MachOObjectFile *O,
SymbolAddressMap *AddrMap) {
// Create a map of symbol addresses to symbol names.
+ const StringRef FileName = O->getFileName();
for (const SymbolRef &Symbol : O->symbols()) {
- Expected<SymbolRef::Type> STOrErr = Symbol.getType();
- if (!STOrErr)
- report_error(O->getFileName(), STOrErr.takeError());
- SymbolRef::Type ST = *STOrErr;
+ SymbolRef::Type ST = unwrapOrError(Symbol.getType(), FileName);
if (ST == SymbolRef::ST_Function || ST == SymbolRef::ST_Data ||
ST == SymbolRef::ST_Other) {
uint64_t Address = Symbol.getValue();
- Expected<StringRef> SymNameOrErr = Symbol.getName();
- if (!SymNameOrErr)
- report_error(O->getFileName(), SymNameOrErr.takeError());
- StringRef SymName = *SymNameOrErr;
+ StringRef SymName = unwrapOrError(Symbol.getName(), FileName);
if (!SymName.startswith(".objc"))
(*AddrMap)[Address] = SymName;
}
@@ -1186,10 +1483,8 @@ static void DumpLiteralPointerSection(MachOObjectFile *O,
});
if (Reloc != Relocs.end()) {
symbol_iterator RelocSym = Reloc->second;
- Expected<StringRef> SymName = RelocSym->getName();
- if (!SymName)
- report_error(O->getFileName(), SymName.takeError());
- outs() << "external relocation entry for symbol:" << *SymName << "\n";
+ StringRef SymName = unwrapOrError(RelocSym->getName(), O->getFileName());
+ outs() << "external relocation entry for symbol:" << SymName << "\n";
continue;
}
@@ -1220,8 +1515,8 @@ static void DumpLiteralPointerSection(MachOObjectFile *O,
section_type = Sec.flags & MachO::SECTION_TYPE;
}
- StringRef BytesStr;
- Sect->getContents(BytesStr);
+ StringRef BytesStr = unwrapOrError(Sect->getContents(), O->getFileName());
+
const char *Contents = reinterpret_cast<const char *>(BytesStr.data());
switch (section_type) {
@@ -1333,10 +1628,7 @@ static void DumpInitTermPointerSection(MachOObjectFile *O,
});
if (Reloc != Relocs.end()) {
symbol_iterator RelocSym = Reloc->second;
- Expected<StringRef> SymName = RelocSym->getName();
- if (!SymName)
- report_error(O->getFileName(), SymName.takeError());
- outs() << " " << *SymName;
+ outs() << " " << unwrapOrError(RelocSym->getName(), O->getFileName());
} else {
SymbolName = GuessSymbolName(p, AddrMap);
if (SymbolName)
@@ -1438,8 +1730,8 @@ static void DumpSectionContents(StringRef Filename, MachOObjectFile *O,
}
uint32_t section_type = section_flags & MachO::SECTION_TYPE;
- StringRef BytesStr;
- Section.getContents(BytesStr);
+ StringRef BytesStr =
+ unwrapOrError(Section.getContents(), O->getFileName());
const char *sect = reinterpret_cast<const char *>(BytesStr.data());
uint32_t sect_size = BytesStr.size();
uint64_t sect_addr = Section.getAddress();
@@ -1523,8 +1815,8 @@ static void DumpInfoPlistSectionContents(StringRef Filename,
if (SegName == "__TEXT" && SectName == "__info_plist") {
if (!NoLeadingHeaders)
outs() << "Contents of (" << SegName << "," << SectName << ") section\n";
- StringRef BytesStr;
- Section.getContents(BytesStr);
+ StringRef BytesStr =
+ unwrapOrError(Section.getContents(), O->getFileName());
const char *sect = reinterpret_cast<const char *>(BytesStr.data());
outs() << format("%.*s", BytesStr.size(), sect) << "\n";
return;
@@ -1609,8 +1901,8 @@ static void ProcessMachO(StringRef Name, MachOObjectFile *MachOOF,
// the error message.
if (Disassemble || IndirectSymbols || !FilterSections.empty() || UnwindInfo)
if (Error Err = MachOOF->checkSymbolTable())
- report_error(ArchiveName, FileName, std::move(Err), ArchitectureName);
-
+ report_error(std::move(Err), ArchiveName, FileName, ArchitectureName);
+
if (DisassembleAll) {
for (const SectionRef &Section : MachOOF->sections()) {
StringRef SectName;
@@ -1774,6 +2066,21 @@ static void printCPUType(uint32_t cputype, uint32_t cpusubtype) {
outs() << " cputype CPU_TYPE_ARM64\n";
outs() << " cpusubtype CPU_SUBTYPE_ARM64_ALL\n";
break;
+ case MachO::CPU_SUBTYPE_ARM64E:
+ outs() << " cputype CPU_TYPE_ARM64\n";
+ outs() << " cpusubtype CPU_SUBTYPE_ARM64E\n";
+ break;
+ default:
+ printUnknownCPUType(cputype, cpusubtype);
+ break;
+ }
+ break;
+ case MachO::CPU_TYPE_ARM64_32:
+ switch (cpusubtype & ~MachO::CPU_SUBTYPE_MASK) {
+ case MachO::CPU_SUBTYPE_ARM64_32_V8:
+ outs() << " cputype CPU_TYPE_ARM64_32\n";
+ outs() << " cpusubtype CPU_SUBTYPE_ARM64_32_V8\n";
+ break;
default:
printUnknownCPUType(cputype, cpusubtype);
break;
@@ -1862,10 +2169,8 @@ static void printArchiveChild(StringRef Filename, const Archive::Child &C,
StringRef ArchitectureName = StringRef()) {
if (print_offset)
outs() << C.getChildOffset() << "\t";
- Expected<sys::fs::perms> ModeOrErr = C.getAccessMode();
- if (!ModeOrErr)
- report_error(Filename, C, ModeOrErr.takeError(), ArchitectureName);
- sys::fs::perms Mode = ModeOrErr.get();
+ sys::fs::perms Mode =
+ unwrapOrError(C.getAccessMode(), Filename, C, ArchitectureName);
if (verbose) {
// FIXME: this first dash, "-", is for (Mode & S_IFMT) == S_IFREG.
// But there is nothing in sys::fs::perms for S_IFMT or S_IFREG.
@@ -1883,20 +2188,11 @@ static void printArchiveChild(StringRef Filename, const Archive::Child &C,
outs() << format("0%o ", Mode);
}
- Expected<unsigned> UIDOrErr = C.getUID();
- if (!UIDOrErr)
- report_error(Filename, C, UIDOrErr.takeError(), ArchitectureName);
- unsigned UID = UIDOrErr.get();
- outs() << format("%3d/", UID);
- Expected<unsigned> GIDOrErr = C.getGID();
- if (!GIDOrErr)
- report_error(Filename, C, GIDOrErr.takeError(), ArchitectureName);
- unsigned GID = GIDOrErr.get();
- outs() << format("%-3d ", GID);
- Expected<uint64_t> Size = C.getRawSize();
- if (!Size)
- report_error(Filename, C, Size.takeError(), ArchitectureName);
- outs() << format("%5" PRId64, Size.get()) << " ";
+ outs() << format(
+ "%3d/%-3d %5" PRId64 " ",
+ unwrapOrError(C.getUID(), Filename, C, ArchitectureName),
+ unwrapOrError(C.getGID(), Filename, C, ArchitectureName),
+ unwrapOrError(C.getRawSize(), Filename, C, ArchitectureName));
StringRef RawLastModified = C.getRawLastModified();
if (verbose) {
@@ -1919,21 +2215,15 @@ static void printArchiveChild(StringRef Filename, const Archive::Child &C,
Expected<StringRef> NameOrErr = C.getName();
if (!NameOrErr) {
consumeError(NameOrErr.takeError());
- Expected<StringRef> NameOrErr = C.getRawName();
- if (!NameOrErr)
- report_error(Filename, C, NameOrErr.takeError(), ArchitectureName);
- StringRef RawName = NameOrErr.get();
- outs() << RawName << "\n";
+ outs() << unwrapOrError(C.getRawName(), Filename, C, ArchitectureName)
+ << "\n";
} else {
StringRef Name = NameOrErr.get();
outs() << Name << "\n";
}
} else {
- Expected<StringRef> NameOrErr = C.getRawName();
- if (!NameOrErr)
- report_error(Filename, C, NameOrErr.takeError(), ArchitectureName);
- StringRef RawName = NameOrErr.get();
- outs() << RawName << "\n";
+ outs() << unwrapOrError(C.getRawName(), Filename, C, ArchitectureName)
+ << "\n";
}
}
@@ -1941,12 +2231,11 @@ static void printArchiveHeaders(StringRef Filename, Archive *A, bool verbose,
bool print_offset,
StringRef ArchitectureName = StringRef()) {
Error Err = Error::success();
- ;
for (const auto &C : A->children(Err, false))
printArchiveChild(Filename, C, verbose, print_offset, ArchitectureName);
if (Err)
- report_error(StringRef(), Filename, std::move(Err), ArchitectureName);
+ report_error(std::move(Err), StringRef(), Filename, ArchitectureName);
}
static bool ValidateArchFlags() {
@@ -1970,15 +2259,15 @@ static bool ValidateArchFlags() {
// -arch flags selecting just those slices as specified by them and also parses
// archive files. Then for each individual Mach-O file ProcessMachO() is
// called to process the file based on the command line options.
-void llvm::parseInputMachO(StringRef Filename) {
+void parseInputMachO(StringRef Filename) {
if (!ValidateArchFlags())
return;
// Attempt to open the binary.
Expected<OwningBinary<Binary>> BinaryOrErr = createBinary(Filename);
if (!BinaryOrErr) {
- if (auto E = isNotObjectErrorInvalidFileType(BinaryOrErr.takeError()))
- report_error(Filename, std::move(E));
+ if (Error E = isNotObjectErrorInvalidFileType(BinaryOrErr.takeError()))
+ report_error(std::move(E), Filename);
else
outs() << Filename << ": is not an object file\n";
return;
@@ -1994,8 +2283,8 @@ void llvm::parseInputMachO(StringRef Filename) {
for (auto &C : A->children(Err)) {
Expected<std::unique_ptr<Binary>> ChildOrErr = C.getAsBinary();
if (!ChildOrErr) {
- if (auto E = isNotObjectErrorInvalidFileType(ChildOrErr.takeError()))
- report_error(Filename, C, std::move(E));
+ if (Error E = isNotObjectErrorInvalidFileType(ChildOrErr.takeError()))
+ report_error(std::move(E), Filename, C);
continue;
}
if (MachOObjectFile *O = dyn_cast<MachOObjectFile>(&*ChildOrErr.get())) {
@@ -2005,7 +2294,7 @@ void llvm::parseInputMachO(StringRef Filename) {
}
}
if (Err)
- report_error(Filename, std::move(Err));
+ report_error(std::move(Err), Filename);
return;
}
if (MachOUniversalBinary *UB = dyn_cast<MachOUniversalBinary>(&Bin)) {
@@ -2026,7 +2315,7 @@ void llvm::parseInputMachO(StringRef Filename) {
llvm_unreachable("Input object can't be invalid at this point");
}
-void llvm::parseInputMachO(MachOUniversalBinary *UB) {
+void parseInputMachO(MachOUniversalBinary *UB) {
if (!ValidateArchFlags())
return;
@@ -2055,13 +2344,12 @@ void llvm::parseInputMachO(MachOUniversalBinary *UB) {
ObjectFile &O = *ObjOrErr.get();
if (MachOObjectFile *MachOOF = dyn_cast<MachOObjectFile>(&O))
ProcessMachO(Filename, MachOOF, "", ArchitectureName);
- } else if (auto E = isNotObjectErrorInvalidFileType(
- ObjOrErr.takeError())) {
- report_error(Filename, StringRef(), std::move(E),
- ArchitectureName);
+ } else if (Error E = isNotObjectErrorInvalidFileType(
+ ObjOrErr.takeError())) {
+ report_error(std::move(E), Filename, StringRef(), ArchitectureName);
continue;
} else if (Expected<std::unique_ptr<Archive>> AOrErr =
- I->getAsArchive()) {
+ I->getAsArchive()) {
std::unique_ptr<Archive> &A = *AOrErr;
outs() << "Archive : " << Filename;
if (!ArchitectureName.empty())
@@ -2074,8 +2362,8 @@ void llvm::parseInputMachO(MachOUniversalBinary *UB) {
for (auto &C : A->children(Err)) {
Expected<std::unique_ptr<Binary>> ChildOrErr = C.getAsBinary();
if (!ChildOrErr) {
- if (auto E = isNotObjectErrorInvalidFileType(ChildOrErr.takeError()))
- report_error(Filename, C, std::move(E), ArchitectureName);
+ if (Error E = isNotObjectErrorInvalidFileType(ChildOrErr.takeError()))
+ report_error(std::move(E), Filename, C, ArchitectureName);
continue;
}
if (MachOObjectFile *O =
@@ -2083,7 +2371,7 @@ void llvm::parseInputMachO(MachOUniversalBinary *UB) {
ProcessMachO(Filename, O, O->getFileName(), ArchitectureName);
}
if (Err)
- report_error(Filename, std::move(Err));
+ report_error(std::move(Err), Filename);
} else {
consumeError(AOrErr.takeError());
error("Mach-O universal file: " + Filename + " for " +
@@ -2116,11 +2404,11 @@ void llvm::parseInputMachO(MachOUniversalBinary *UB) {
ObjectFile &O = *ObjOrErr.get();
if (MachOObjectFile *MachOOF = dyn_cast<MachOObjectFile>(&O))
ProcessMachO(Filename, MachOOF);
- } else if (auto E = isNotObjectErrorInvalidFileType(
- ObjOrErr.takeError())) {
- report_error(Filename, std::move(E));
+ } else if (Error E =
+ isNotObjectErrorInvalidFileType(ObjOrErr.takeError())) {
+ report_error(std::move(E), Filename);
} else if (Expected<std::unique_ptr<Archive>> AOrErr =
- I->getAsArchive()) {
+ I->getAsArchive()) {
std::unique_ptr<Archive> &A = *AOrErr;
outs() << "Archive : " << Filename << "\n";
if (ArchiveHeaders)
@@ -2130,8 +2418,9 @@ void llvm::parseInputMachO(MachOUniversalBinary *UB) {
for (auto &C : A->children(Err)) {
Expected<std::unique_ptr<Binary>> ChildOrErr = C.getAsBinary();
if (!ChildOrErr) {
- if (auto E = isNotObjectErrorInvalidFileType(ChildOrErr.takeError()))
- report_error(Filename, C, std::move(E));
+ if (Error E =
+ isNotObjectErrorInvalidFileType(ChildOrErr.takeError()))
+ report_error(std::move(E), Filename, C);
continue;
}
if (MachOObjectFile *O =
@@ -2139,7 +2428,7 @@ void llvm::parseInputMachO(MachOUniversalBinary *UB) {
ProcessMachO(Filename, O, O->getFileName());
}
if (Err)
- report_error(Filename, std::move(Err));
+ report_error(std::move(Err), Filename);
} else {
consumeError(AOrErr.takeError());
error("Mach-O universal file: " + Filename + " for architecture " +
@@ -2164,11 +2453,10 @@ void llvm::parseInputMachO(MachOUniversalBinary *UB) {
ObjectFile &Obj = *ObjOrErr.get();
if (MachOObjectFile *MachOOF = dyn_cast<MachOObjectFile>(&Obj))
ProcessMachO(Filename, MachOOF, "", ArchitectureName);
- } else if (auto E = isNotObjectErrorInvalidFileType(
- ObjOrErr.takeError())) {
- report_error(StringRef(), Filename, std::move(E), ArchitectureName);
- } else if (Expected<std::unique_ptr<Archive>> AOrErr =
- I->getAsArchive()) {
+ } else if (Error E =
+ isNotObjectErrorInvalidFileType(ObjOrErr.takeError())) {
+ report_error(std::move(E), StringRef(), Filename, ArchitectureName);
+ } else if (Expected<std::unique_ptr<Archive>> AOrErr = I->getAsArchive()) {
std::unique_ptr<Archive> &A = *AOrErr;
outs() << "Archive : " << Filename;
if (!ArchitectureName.empty())
@@ -2181,8 +2469,8 @@ void llvm::parseInputMachO(MachOUniversalBinary *UB) {
for (auto &C : A->children(Err)) {
Expected<std::unique_ptr<Binary>> ChildOrErr = C.getAsBinary();
if (!ChildOrErr) {
- if (auto E = isNotObjectErrorInvalidFileType(ChildOrErr.takeError()))
- report_error(Filename, C, std::move(E), ArchitectureName);
+ if (Error E = isNotObjectErrorInvalidFileType(ChildOrErr.takeError()))
+ report_error(std::move(E), Filename, C, ArchitectureName);
continue;
}
if (MachOObjectFile *O =
@@ -2193,7 +2481,7 @@ void llvm::parseInputMachO(MachOUniversalBinary *UB) {
}
}
if (Err)
- report_error(Filename, std::move(Err));
+ report_error(std::move(Err), Filename);
} else {
consumeError(AOrErr.takeError());
error("Mach-O universal file: " + Filename + " for architecture " +
@@ -2308,12 +2596,9 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
}
}
if (reloc_found && isExtern) {
- Expected<StringRef> SymName = Symbol.getName();
- if (!SymName)
- report_error(info->O->getFileName(), SymName.takeError());
- const char *name = SymName->data();
op_info->AddSymbol.Present = 1;
- op_info->AddSymbol.Name = name;
+ op_info->AddSymbol.Name =
+ unwrapOrError(Symbol.getName(), info->O->getFileName()).data();
// For i386 extern relocation entries the value in the instruction is
// the offset from the symbol, and value is already set in op_info->Value.
return 1;
@@ -2372,10 +2657,8 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
// is the offset from the external symbol.
if (info->O->getAnyRelocationPCRel(RE))
op_info->Value -= Pc + Offset + Size;
- Expected<StringRef> SymName = Symbol.getName();
- if (!SymName)
- report_error(info->O->getFileName(), SymName.takeError());
- const char *name = SymName->data();
+ const char *name =
+ unwrapOrError(Symbol.getName(), info->O->getFileName()).data();
op_info->AddSymbol.Present = 1;
op_info->AddSymbol.Name = name;
return 1;
@@ -2412,10 +2695,8 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
// is the offset from the external symbol.
if (info->O->getAnyRelocationPCRel(RE))
op_info->Value -= Pc + Offset + Size;
- Expected<StringRef> SymName = Symbol.getName();
- if (!SymName)
- report_error(info->O->getFileName(), SymName.takeError());
- const char *name = SymName->data();
+ const char *name =
+ unwrapOrError(Symbol.getName(), info->O->getFileName()).data();
unsigned Type = info->O->getAnyRelocationType(RE);
if (Type == MachO::X86_64_RELOC_SUBTRACTOR) {
DataRefImpl RelNext = Rel;
@@ -2429,10 +2710,7 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
op_info->SubtractSymbol.Name = name;
symbol_iterator RelocSymNext = info->O->getSymbolByIndex(SymbolNum);
Symbol = *RelocSymNext;
- Expected<StringRef> SymNameNext = Symbol.getName();
- if (!SymNameNext)
- report_error(info->O->getFileName(), SymNameNext.takeError());
- name = SymNameNext->data();
+ name = unwrapOrError(Symbol.getName(), info->O->getFileName()).data();
}
}
// TODO: add the VariantKinds to op_info->VariantKind for relocation types
@@ -2501,10 +2779,8 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
}
if (isExtern) {
- Expected<StringRef> SymName = Symbol.getName();
- if (!SymName)
- report_error(info->O->getFileName(), SymName.takeError());
- const char *name = SymName->data();
+ const char *name =
+ unwrapOrError(Symbol.getName(), info->O->getFileName()).data();
op_info->AddSymbol.Present = 1;
op_info->AddSymbol.Name = name;
switch (r_type) {
@@ -2620,10 +2896,9 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
// NOTE: Scattered relocations don't exist on arm64.
if (!info->O->getPlainRelocationExternal(RE))
return 0;
- Expected<StringRef> SymName = Reloc->getSymbol()->getName();
- if (!SymName)
- report_error(info->O->getFileName(), SymName.takeError());
- const char *name = SymName->data();
+ const char *name =
+ unwrapOrError(Reloc->getSymbol()->getName(), info->O->getFileName())
+ .data();
op_info->AddSymbol.Present = 1;
op_info->AddSymbol.Name = name;
@@ -2749,12 +3024,8 @@ static const char *GuessIndirectSymbol(uint64_t ReferenceValue,
info->O->getIndirectSymbolTableEntry(Dysymtab, index);
if (indirect_symbol < Symtab.nsyms) {
symbol_iterator Sym = info->O->getSymbolByIndex(indirect_symbol);
- SymbolRef Symbol = *Sym;
- Expected<StringRef> SymName = Symbol.getName();
- if (!SymName)
- report_error(info->O->getFileName(), SymName.takeError());
- const char *name = SymName->data();
- return name;
+ return unwrapOrError(Sym->getName(), info->O->getFileName())
+ .data();
}
}
}
@@ -2784,12 +3055,8 @@ static const char *GuessIndirectSymbol(uint64_t ReferenceValue,
info->O->getIndirectSymbolTableEntry(Dysymtab, index);
if (indirect_symbol < Symtab.nsyms) {
symbol_iterator Sym = info->O->getSymbolByIndex(indirect_symbol);
- SymbolRef Symbol = *Sym;
- Expected<StringRef> SymName = Symbol.getName();
- if (!SymName)
- report_error(info->O->getFileName(), SymName.takeError());
- const char *name = SymName->data();
- return name;
+ return unwrapOrError(Sym->getName(), info->O->getFileName())
+ .data();
}
}
}
@@ -2960,8 +3227,8 @@ static const char *get_pointer_64(uint64_t Address, uint32_t &offset,
S = (*(info->Sections))[SectIdx];
offset = Address - SectAddress;
left = SectSize - offset;
- StringRef SectContents;
- ((*(info->Sections))[SectIdx]).getContents(SectContents);
+ StringRef SectContents = unwrapOrError(
+ ((*(info->Sections))[SectIdx]).getContents(), info->O->getFileName());
return SectContents.data() + offset;
}
}
@@ -3015,10 +3282,7 @@ static const char *get_symbol_64(uint32_t sect_offset, SectionRef S,
const char *SymbolName = nullptr;
if (reloc_found && isExtern) {
n_value = Symbol.getValue();
- Expected<StringRef> NameOrError = Symbol.getName();
- if (!NameOrError)
- report_error(info->O->getFileName(), NameOrError.takeError());
- StringRef Name = *NameOrError;
+ StringRef Name = unwrapOrError(Symbol.getName(), info->O->getFileName());
if (!Name.empty()) {
SymbolName = Name.data();
return SymbolName;
@@ -3767,8 +4031,7 @@ walk_pointer_list_64(const char *listname, const SectionRef S,
StringRef SegName = O->getSectionFinalSegmentName(Ref);
outs() << "Contents of (" << SegName << "," << SectName << ") section\n";
- StringRef BytesStr;
- S.getContents(BytesStr);
+ StringRef BytesStr = unwrapOrError(S.getContents(), O->getFileName());
const char *Contents = reinterpret_cast<const char *>(BytesStr.data());
for (uint32_t i = 0; i < S.getSize(); i += sizeof(uint64_t)) {
@@ -3818,8 +4081,7 @@ walk_pointer_list_32(const char *listname, const SectionRef S,
StringRef SegName = O->getSectionFinalSegmentName(Ref);
outs() << "Contents of (" << SegName << "," << SectName << ") section\n";
- StringRef BytesStr;
- S.getContents(BytesStr);
+ StringRef BytesStr = unwrapOrError(S.getContents(), O->getFileName());
const char *Contents = reinterpret_cast<const char *>(BytesStr.data());
for (uint32_t i = 0; i < S.getSize(); i += sizeof(uint32_t)) {
@@ -6970,32 +7232,78 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
raw_ostream &DebugOut = nulls();
#endif
+ // Try to find debug info and set up the DIContext for it.
std::unique_ptr<DIContext> diContext;
- ObjectFile *DbgObj = MachOOF;
+ std::unique_ptr<Binary> DSYMBinary;
std::unique_ptr<MemoryBuffer> DSYMBuf;
- // Try to find debug info and set up the DIContext for it.
if (UseDbg) {
+ ObjectFile *DbgObj = MachOOF;
+
// A separate DSym file path was specified, parse it as a macho file,
// get the sections and supply it to the section name parsing machinery.
if (!DSYMFile.empty()) {
ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
MemoryBuffer::getFileOrSTDIN(DSYMFile);
if (std::error_code EC = BufOrErr.getError()) {
- report_error(DSYMFile, errorCodeToError(EC));
+ report_error(errorCodeToError(EC), DSYMFile);
return;
}
- Expected<std::unique_ptr<MachOObjectFile>> DbgObjCheck =
- ObjectFile::createMachOObjectFile(BufOrErr.get()->getMemBufferRef());
+ // We need to keep the file alive, because we're replacing DbgObj with it.
+ DSYMBuf = std::move(BufOrErr.get());
- if (Error E = DbgObjCheck.takeError()) {
- report_error(DSYMFile, std::move(E));
+ Expected<std::unique_ptr<Binary>> BinaryOrErr =
+ createBinary(DSYMBuf.get()->getMemBufferRef());
+ if (!BinaryOrErr) {
+ report_error(BinaryOrErr.takeError(), DSYMFile);
return;
}
- DbgObj = DbgObjCheck.get().release();
- // We need to keep the file alive, because we're replacing DbgObj with it.
- DSYMBuf = std::move(BufOrErr.get());
+ // We need to keep the Binary elive with the buffer
+ DSYMBinary = std::move(BinaryOrErr.get());
+
+ if (ObjectFile *O = dyn_cast<ObjectFile>(DSYMBinary.get())) {
+ // this is a Mach-O object file, use it
+ if (MachOObjectFile *MachDSYM = dyn_cast<MachOObjectFile>(&*O)) {
+ DbgObj = MachDSYM;
+ }
+ else {
+ WithColor::error(errs(), "llvm-objdump")
+ << DSYMFile << " is not a Mach-O file type.\n";
+ return;
+ }
+ }
+ else if (auto UB = dyn_cast<MachOUniversalBinary>(DSYMBinary.get())){
+ // this is a Universal Binary, find a Mach-O for this architecture
+ uint32_t CPUType, CPUSubType;
+ const char *ArchFlag;
+ if (MachOOF->is64Bit()) {
+ const MachO::mach_header_64 H_64 = MachOOF->getHeader64();
+ CPUType = H_64.cputype;
+ CPUSubType = H_64.cpusubtype;
+ } else {
+ const MachO::mach_header H = MachOOF->getHeader();
+ CPUType = H.cputype;
+ CPUSubType = H.cpusubtype;
+ }
+ Triple T = MachOObjectFile::getArchTriple(CPUType, CPUSubType, nullptr,
+ &ArchFlag);
+ Expected<std::unique_ptr<MachOObjectFile>> MachDSYM =
+ UB->getObjectForArch(ArchFlag);
+ if (!MachDSYM) {
+ report_error(MachDSYM.takeError(), DSYMFile);
+ return;
+ }
+
+ // We need to keep the Binary elive with the buffer
+ DbgObj = &*MachDSYM.get();
+ DSYMBinary = std::move(*MachDSYM);
+ }
+ else {
+ WithColor::error(errs(), "llvm-objdump")
+ << DSYMFile << " is not a Mach-O or Universal file type.\n";
+ return;
+ }
}
// Setup the DIContext
@@ -7016,10 +7324,9 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
if (SegmentName != DisSegName)
continue;
- StringRef BytesStr;
- Sections[SectIdx].getContents(BytesStr);
- ArrayRef<uint8_t> Bytes(reinterpret_cast<const uint8_t *>(BytesStr.data()),
- BytesStr.size());
+ StringRef BytesStr =
+ unwrapOrError(Sections[SectIdx].getContents(), Filename);
+ ArrayRef<uint8_t> Bytes = arrayRefFromStringRef(BytesStr);
uint64_t SectAddress = Sections[SectIdx].getAddress();
bool symbolTableWorked = false;
@@ -7029,17 +7336,13 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
SymbolAddressMap AddrMap;
bool DisSymNameFound = false;
for (const SymbolRef &Symbol : MachOOF->symbols()) {
- Expected<SymbolRef::Type> STOrErr = Symbol.getType();
- if (!STOrErr)
- report_error(MachOOF->getFileName(), STOrErr.takeError());
- SymbolRef::Type ST = *STOrErr;
+ SymbolRef::Type ST =
+ unwrapOrError(Symbol.getType(), MachOOF->getFileName());
if (ST == SymbolRef::ST_Function || ST == SymbolRef::ST_Data ||
ST == SymbolRef::ST_Other) {
uint64_t Address = Symbol.getValue();
- Expected<StringRef> SymNameOrErr = Symbol.getName();
- if (!SymNameOrErr)
- report_error(MachOOF->getFileName(), SymNameOrErr.takeError());
- StringRef SymName = *SymNameOrErr;
+ StringRef SymName =
+ unwrapOrError(Symbol.getName(), MachOOF->getFileName());
AddrMap[Address] = SymName;
if (!DisSymName.empty() && DisSymName == SymName)
DisSymNameFound = true;
@@ -7076,15 +7379,10 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
// Disassemble symbol by symbol.
for (unsigned SymIdx = 0; SymIdx != Symbols.size(); SymIdx++) {
- Expected<StringRef> SymNameOrErr = Symbols[SymIdx].getName();
- if (!SymNameOrErr)
- report_error(MachOOF->getFileName(), SymNameOrErr.takeError());
- StringRef SymName = *SymNameOrErr;
-
- Expected<SymbolRef::Type> STOrErr = Symbols[SymIdx].getType();
- if (!STOrErr)
- report_error(MachOOF->getFileName(), STOrErr.takeError());
- SymbolRef::Type ST = *STOrErr;
+ StringRef SymName =
+ unwrapOrError(Symbols[SymIdx].getName(), MachOOF->getFileName());
+ SymbolRef::Type ST =
+ unwrapOrError(Symbols[SymIdx].getType(), MachOOF->getFileName());
if (ST != SymbolRef::ST_Function && ST != SymbolRef::ST_Data)
continue;
@@ -7137,10 +7435,8 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
uint64_t NextSym = 0;
uint64_t NextSymIdx = SymIdx + 1;
while (Symbols.size() > NextSymIdx) {
- Expected<SymbolRef::Type> STOrErr = Symbols[NextSymIdx].getType();
- if (!STOrErr)
- report_error(MachOOF->getFileName(), STOrErr.takeError());
- SymbolRef::Type NextSymType = *STOrErr;
+ SymbolRef::Type NextSymType = unwrapOrError(
+ Symbols[NextSymIdx].getType(), MachOOF->getFileName());
if (NextSymType == SymbolRef::ST_Function) {
containsNextSym =
Sections[SectIdx].containsSymbol(Symbols[NextSymIdx]);
@@ -7243,7 +7539,7 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
// Print debug info.
if (diContext) {
- DILineInfo dli = diContext->getLineInfoForAddress(PC);
+ DILineInfo dli = diContext->getLineInfoForAddress({PC, SectIdx});
// Print valid line info if it changed.
if (dli != lastLine && dli.Line != 0)
outs() << "\t## " << dli.FileName << ':' << dli.Line << ':'
@@ -7415,10 +7711,7 @@ static void findUnwindRelocNameAddend(const MachOObjectFile *Obj,
const RelocationRef &Reloc, uint64_t Addr,
StringRef &Name, uint64_t &Addend) {
if (Reloc.getSymbol() != Obj->symbol_end()) {
- Expected<StringRef> NameOrErr = Reloc.getSymbol()->getName();
- if (!NameOrErr)
- report_error(Obj->getFileName(), NameOrErr.takeError());
- Name = *NameOrErr;
+ Name = unwrapOrError(Reloc.getSymbol()->getName(), Obj->getFileName());
Addend = Addr;
return;
}
@@ -7440,16 +7733,11 @@ static void findUnwindRelocNameAddend(const MachOObjectFile *Obj,
// Go back one so that SymbolAddress <= Addr.
--Sym;
- auto SectOrErr = Sym->second.getSection();
- if (!SectOrErr)
- report_error(Obj->getFileName(), SectOrErr.takeError());
- section_iterator SymSection = *SectOrErr;
+ section_iterator SymSection =
+ unwrapOrError(Sym->second.getSection(), Obj->getFileName());
if (RelocSection == *SymSection) {
// There's a valid symbol in the same section before this reference.
- Expected<StringRef> NameOrErr = Sym->second.getName();
- if (!NameOrErr)
- report_error(Obj->getFileName(), NameOrErr.takeError());
- Name = *NameOrErr;
+ Name = unwrapOrError(Sym->second.getName(), Obj->getFileName());
Addend = Addr - Sym->first;
return;
}
@@ -7490,9 +7778,8 @@ printMachOCompactUnwindSection(const MachOObjectFile *Obj,
uint32_t PointerSize = Is64 ? sizeof(uint64_t) : sizeof(uint32_t);
uint32_t EntrySize = 3 * PointerSize + 2 * sizeof(uint32_t);
- StringRef Contents;
- CompactUnwind.getContents(Contents);
-
+ StringRef Contents =
+ unwrapOrError(CompactUnwind.getContents(), Obj->getFileName());
SmallVector<CompactUnwindEntry, 4> CompactUnwinds;
// First populate the initial raw offsets, encodings and so on from the entry.
@@ -7633,8 +7920,8 @@ static void printMachOUnwindInfoSection(const MachOObjectFile *Obj,
outs() << "Contents of __unwind_info section:\n";
- StringRef Contents;
- UnwindInfo.getContents(Contents);
+ StringRef Contents =
+ unwrapOrError(UnwindInfo.getContents(), Obj->getFileName());
ptrdiff_t Pos = 0;
//===----------------------------------
@@ -7801,7 +8088,7 @@ static void printMachOUnwindInfoSection(const MachOObjectFile *Obj,
}
}
-void llvm::printMachOUnwindInfo(const MachOObjectFile *Obj) {
+void printMachOUnwindInfo(const MachOObjectFile *Obj) {
std::map<uint64_t, SymbolRef> Symbols;
for (const SymbolRef &SymRef : Obj->symbols()) {
// Discard any undefined or absolute symbols. They're not going to take part
@@ -7917,6 +8204,20 @@ static void PrintMachHeader(uint32_t magic, uint32_t cputype,
case MachO::CPU_SUBTYPE_ARM64_ALL:
outs() << " ALL";
break;
+ case MachO::CPU_SUBTYPE_ARM64E:
+ outs() << " E";
+ break;
+ default:
+ outs() << format(" %10d", cpusubtype & ~MachO::CPU_SUBTYPE_MASK);
+ break;
+ }
+ break;
+ case MachO::CPU_TYPE_ARM64_32:
+ outs() << " ARM64_32";
+ switch (cpusubtype & ~MachO::CPU_SUBTYPE_MASK) {
+ case MachO::CPU_SUBTYPE_ARM64_32_V8:
+ outs() << " V8";
+ break;
default:
outs() << format(" %10d", cpusubtype & ~MachO::CPU_SUBTYPE_MASK);
break;
@@ -9485,7 +9786,8 @@ static void PrintThreadCommand(MachO::thread_command t, const char *Ptr,
begin += count * sizeof(uint32_t);
}
}
- } else if (cputype == MachO::CPU_TYPE_ARM64) {
+ } else if (cputype == MachO::CPU_TYPE_ARM64 ||
+ cputype == MachO::CPU_TYPE_ARM64_32) {
while (begin < end) {
if (end - begin > (ptrdiff_t)sizeof(uint32_t)) {
memcpy((char *)&flavor, begin, sizeof(uint32_t));
@@ -9790,12 +10092,12 @@ static void PrintMachHeader(const MachOObjectFile *Obj, bool verbose) {
}
}
-void llvm::printMachOFileHeader(const object::ObjectFile *Obj) {
+void printMachOFileHeader(const object::ObjectFile *Obj) {
const MachOObjectFile *file = dyn_cast<const MachOObjectFile>(Obj);
PrintMachHeader(file, !NonVerbose);
}
-void llvm::printMachOLoadCommands(const object::ObjectFile *Obj) {
+void printMachOLoadCommands(const object::ObjectFile *Obj) {
const MachOObjectFile *file = dyn_cast<const MachOObjectFile>(Obj);
uint32_t filetype = 0;
uint32_t cputype = 0;
@@ -9817,7 +10119,7 @@ void llvm::printMachOLoadCommands(const object::ObjectFile *Obj) {
// export trie dumping
//===----------------------------------------------------------------------===//
-void llvm::printMachOExportsTrie(const object::MachOObjectFile *Obj) {
+void printMachOExportsTrie(const object::MachOObjectFile *Obj) {
uint64_t BaseSegmentAddress = 0;
for (const auto &Command : Obj->load_commands()) {
if (Command.C.cmd == MachO::LC_SEGMENT) {
@@ -9835,7 +10137,7 @@ void llvm::printMachOExportsTrie(const object::MachOObjectFile *Obj) {
}
}
Error Err = Error::success();
- for (const llvm::object::ExportEntry &Entry : Obj->exports(Err)) {
+ for (const object::ExportEntry &Entry : Obj->exports(Err)) {
uint64_t Flags = Entry.flags();
bool ReExport = (Flags & MachO::EXPORT_SYMBOL_FLAGS_REEXPORT);
bool WeakDef = (Flags & MachO::EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION);
@@ -9889,17 +10191,17 @@ void llvm::printMachOExportsTrie(const object::MachOObjectFile *Obj) {
outs() << "\n";
}
if (Err)
- report_error(Obj->getFileName(), std::move(Err));
+ report_error(std::move(Err), Obj->getFileName());
}
//===----------------------------------------------------------------------===//
// rebase table dumping
//===----------------------------------------------------------------------===//
-void llvm::printMachORebaseTable(object::MachOObjectFile *Obj) {
+void printMachORebaseTable(object::MachOObjectFile *Obj) {
outs() << "segment section address type\n";
Error Err = Error::success();
- for (const llvm::object::MachORebaseEntry &Entry : Obj->rebaseTable(Err)) {
+ for (const object::MachORebaseEntry &Entry : Obj->rebaseTable(Err)) {
StringRef SegmentName = Entry.segmentName();
StringRef SectionName = Entry.sectionName();
uint64_t Address = Entry.address();
@@ -9910,7 +10212,7 @@ void llvm::printMachORebaseTable(object::MachOObjectFile *Obj) {
Address, Entry.typeName().str().c_str());
}
if (Err)
- report_error(Obj->getFileName(), std::move(Err));
+ report_error(std::move(Err), Obj->getFileName());
}
static StringRef ordinalName(const object::MachOObjectFile *Obj, int Ordinal) {
@@ -9938,12 +10240,12 @@ static StringRef ordinalName(const object::MachOObjectFile *Obj, int Ordinal) {
// bind table dumping
//===----------------------------------------------------------------------===//
-void llvm::printMachOBindTable(object::MachOObjectFile *Obj) {
+void printMachOBindTable(object::MachOObjectFile *Obj) {
// Build table of sections so names can used in final output.
outs() << "segment section address type "
"addend dylib symbol\n";
Error Err = Error::success();
- for (const llvm::object::MachOBindEntry &Entry : Obj->bindTable(Err)) {
+ for (const object::MachOBindEntry &Entry : Obj->bindTable(Err)) {
StringRef SegmentName = Entry.segmentName();
StringRef SectionName = Entry.sectionName();
uint64_t Address = Entry.address();
@@ -9962,18 +10264,18 @@ void llvm::printMachOBindTable(object::MachOObjectFile *Obj) {
<< Entry.symbolName() << Attr << "\n";
}
if (Err)
- report_error(Obj->getFileName(), std::move(Err));
+ report_error(std::move(Err), Obj->getFileName());
}
//===----------------------------------------------------------------------===//
// lazy bind table dumping
//===----------------------------------------------------------------------===//
-void llvm::printMachOLazyBindTable(object::MachOObjectFile *Obj) {
+void printMachOLazyBindTable(object::MachOObjectFile *Obj) {
outs() << "segment section address "
"dylib symbol\n";
Error Err = Error::success();
- for (const llvm::object::MachOBindEntry &Entry : Obj->lazyBindTable(Err)) {
+ for (const object::MachOBindEntry &Entry : Obj->lazyBindTable(Err)) {
StringRef SegmentName = Entry.segmentName();
StringRef SectionName = Entry.sectionName();
uint64_t Address = Entry.address();
@@ -9987,18 +10289,18 @@ void llvm::printMachOLazyBindTable(object::MachOObjectFile *Obj) {
<< Entry.symbolName() << "\n";
}
if (Err)
- report_error(Obj->getFileName(), std::move(Err));
+ report_error(std::move(Err), Obj->getFileName());
}
//===----------------------------------------------------------------------===//
// weak bind table dumping
//===----------------------------------------------------------------------===//
-void llvm::printMachOWeakBindTable(object::MachOObjectFile *Obj) {
+void printMachOWeakBindTable(object::MachOObjectFile *Obj) {
outs() << "segment section address "
"type addend symbol\n";
Error Err = Error::success();
- for (const llvm::object::MachOBindEntry &Entry : Obj->weakBindTable(Err)) {
+ for (const object::MachOBindEntry &Entry : Obj->weakBindTable(Err)) {
// Strong symbols don't have a location to update.
if (Entry.flags() & MachO::BIND_SYMBOL_FLAGS_NON_WEAK_DEFINITION) {
outs() << " strong "
@@ -10019,7 +10321,7 @@ void llvm::printMachOWeakBindTable(object::MachOObjectFile *Obj) {
<< "\n";
}
if (Err)
- report_error(Obj->getFileName(), std::move(Err));
+ report_error(std::move(Err), Obj->getFileName());
}
// get_dyld_bind_info_symbolname() is used for disassembly and passed an
@@ -10031,16 +10333,66 @@ static const char *get_dyld_bind_info_symbolname(uint64_t ReferenceValue,
if (info->bindtable == nullptr) {
info->bindtable = llvm::make_unique<SymbolAddressMap>();
Error Err = Error::success();
- for (const llvm::object::MachOBindEntry &Entry : info->O->bindTable(Err)) {
+ for (const object::MachOBindEntry &Entry : info->O->bindTable(Err)) {
uint64_t Address = Entry.address();
StringRef name = Entry.symbolName();
if (!name.empty())
(*info->bindtable)[Address] = name;
}
if (Err)
- report_error(info->O->getFileName(), std::move(Err));
+ report_error(std::move(Err), info->O->getFileName());
}
auto name = info->bindtable->lookup(ReferenceValue);
return !name.empty() ? name.data() : nullptr;
}
+void printLazyBindTable(ObjectFile *o) {
+ outs() << "Lazy bind table:\n";
+ if (MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(o))
+ printMachOLazyBindTable(MachO);
+ else
+ WithColor::error()
+ << "This operation is only currently supported "
+ "for Mach-O executable files.\n";
+}
+
+void printWeakBindTable(ObjectFile *o) {
+ outs() << "Weak bind table:\n";
+ if (MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(o))
+ printMachOWeakBindTable(MachO);
+ else
+ WithColor::error()
+ << "This operation is only currently supported "
+ "for Mach-O executable files.\n";
+}
+
+void printExportsTrie(const ObjectFile *o) {
+ outs() << "Exports trie:\n";
+ if (const MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(o))
+ printMachOExportsTrie(MachO);
+ else
+ WithColor::error()
+ << "This operation is only currently supported "
+ "for Mach-O executable files.\n";
+}
+
+void printRebaseTable(ObjectFile *o) {
+ outs() << "Rebase table:\n";
+ if (MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(o))
+ printMachORebaseTable(MachO);
+ else
+ WithColor::error()
+ << "This operation is only currently supported "
+ "for Mach-O executable files.\n";
+}
+
+void printBindTable(ObjectFile *o) {
+ outs() << "Bind table:\n";
+ if (MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(o))
+ printMachOBindTable(MachO);
+ else
+ WithColor::error()
+ << "This operation is only currently supported "
+ "for Mach-O executable files.\n";
+}
+} // namespace llvm
diff --git a/tools/llvm-objdump/WasmDump.cpp b/tools/llvm-objdump/WasmDump.cpp
index 045002cd4b34..da27a4acbb5f 100644
--- a/tools/llvm-objdump/WasmDump.cpp
+++ b/tools/llvm-objdump/WasmDump.cpp
@@ -1,9 +1,8 @@
//===-- WasmDump.cpp - wasm-specific dumper ---------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -15,14 +14,39 @@
#include "llvm-objdump.h"
#include "llvm/Object/Wasm.h"
-using namespace llvm;
-using namespace object;
+using namespace llvm::object;
-void llvm::printWasmFileHeader(const object::ObjectFile *Obj) {
- const WasmObjectFile *File = dyn_cast<const WasmObjectFile>(Obj);
+namespace llvm {
+void printWasmFileHeader(const object::ObjectFile *Obj) {
+ const auto *File = dyn_cast<const WasmObjectFile>(Obj);
outs() << "Program Header:\n";
outs() << "Version: 0x";
outs().write_hex(File->getHeader().Version);
outs() << "\n";
}
+
+Error getWasmRelocationValueString(const WasmObjectFile *Obj,
+ const RelocationRef &RelRef,
+ SmallVectorImpl<char> &Result) {
+ const wasm::WasmRelocation &Rel = Obj->getWasmRelocation(RelRef);
+ symbol_iterator SI = RelRef.getSymbol();
+ std::string FmtBuf;
+ raw_string_ostream Fmt(FmtBuf);
+ if (SI == Obj->symbol_end()) {
+ // Not all wasm relocations have symbols associated with them.
+ // In particular R_WASM_TYPE_INDEX_LEB.
+ Fmt << Rel.Index;
+ } else {
+ Expected<StringRef> SymNameOrErr = SI->getName();
+ if (!SymNameOrErr)
+ return SymNameOrErr.takeError();
+ StringRef SymName = *SymNameOrErr;
+ Result.append(SymName.begin(), SymName.end());
+ }
+ Fmt << (Rel.Addend < 0 ? "" : "+") << Rel.Addend;
+ Fmt.flush();
+ Result.append(FmtBuf.begin(), FmtBuf.end());
+ return Error::success();
+}
+} // namespace llvm
diff --git a/tools/llvm-objdump/llvm-objdump.cpp b/tools/llvm-objdump/llvm-objdump.cpp
index ba8d3c5b8d5c..58981203c59e 100644
--- a/tools/llvm-objdump/llvm-objdump.cpp
+++ b/tools/llvm-objdump/llvm-objdump.cpp
@@ -1,9 +1,8 @@
//===-- llvm-objdump.cpp - Object file dumping utility for llvm -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -19,6 +18,7 @@
#include "llvm-objdump.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/ADT/Triple.h"
@@ -68,283 +68,298 @@
#include <unordered_map>
#include <utility>
-using namespace llvm;
-using namespace object;
+using namespace llvm::object;
+
+namespace llvm {
+
+cl::OptionCategory ObjdumpCat("llvm-objdump Options");
+
+// MachO specific
+extern cl::OptionCategory MachOCat;
+extern cl::opt<bool> Bind;
+extern cl::opt<bool> DataInCode;
+extern cl::opt<bool> DylibsUsed;
+extern cl::opt<bool> DylibId;
+extern cl::opt<bool> ExportsTrie;
+extern cl::opt<bool> FirstPrivateHeader;
+extern cl::opt<bool> IndirectSymbols;
+extern cl::opt<bool> InfoPlist;
+extern cl::opt<bool> LazyBind;
+extern cl::opt<bool> LinkOptHints;
+extern cl::opt<bool> ObjcMetaData;
+extern cl::opt<bool> Rebase;
+extern cl::opt<bool> UniversalHeaders;
+extern cl::opt<bool> WeakBind;
+
+static cl::opt<uint64_t> AdjustVMA(
+ "adjust-vma",
+ cl::desc("Increase the displayed address by the specified offset"),
+ cl::value_desc("offset"), cl::init(0), cl::cat(ObjdumpCat));
-cl::opt<bool>
- llvm::AllHeaders("all-headers",
- cl::desc("Display all available header information"));
+static cl::opt<bool>
+ AllHeaders("all-headers",
+ cl::desc("Display all available header information"),
+ cl::cat(ObjdumpCat));
static cl::alias AllHeadersShort("x", cl::desc("Alias for --all-headers"),
+ cl::NotHidden, cl::Grouping,
cl::aliasopt(AllHeaders));
-static cl::list<std::string>
-InputFilenames(cl::Positional, cl::desc("<input object files>"),cl::ZeroOrMore);
+static cl::opt<std::string>
+ ArchName("arch-name",
+ cl::desc("Target arch to disassemble for, "
+ "see -version for available targets"),
+ cl::cat(ObjdumpCat));
+
+cl::opt<bool> ArchiveHeaders("archive-headers",
+ cl::desc("Display archive header information"),
+ cl::cat(ObjdumpCat));
+static cl::alias ArchiveHeadersShort("a",
+ cl::desc("Alias for --archive-headers"),
+ cl::NotHidden, cl::Grouping,
+ cl::aliasopt(ArchiveHeaders));
+
+cl::opt<bool> Demangle("demangle", cl::desc("Demangle symbols names"),
+ cl::init(false), cl::cat(ObjdumpCat));
+static cl::alias DemangleShort("C", cl::desc("Alias for --demangle"),
+ cl::NotHidden, cl::Grouping,
+ cl::aliasopt(Demangle));
+
+cl::opt<bool> Disassemble(
+ "disassemble",
+ cl::desc("Display assembler mnemonics for the machine instructions"),
+ cl::cat(ObjdumpCat));
+static cl::alias DisassembleShort("d", cl::desc("Alias for --disassemble"),
+ cl::NotHidden, cl::Grouping,
+ cl::aliasopt(Disassemble));
+
+cl::opt<bool> DisassembleAll(
+ "disassemble-all",
+ cl::desc("Display assembler mnemonics for the machine instructions"),
+ cl::cat(ObjdumpCat));
+static cl::alias DisassembleAllShort("D",
+ cl::desc("Alias for --disassemble-all"),
+ cl::NotHidden, cl::Grouping,
+ cl::aliasopt(DisassembleAll));
-cl::opt<bool>
-llvm::Disassemble("disassemble",
- cl::desc("Display assembler mnemonics for the machine instructions"));
+static cl::list<std::string>
+ DisassembleFunctions("disassemble-functions", cl::CommaSeparated,
+ cl::desc("List of functions to disassemble. "
+ "Accept demangled names when --demangle is "
+ "specified, otherwise accept mangled names"),
+ cl::cat(ObjdumpCat));
+
+static cl::opt<bool> DisassembleZeroes(
+ "disassemble-zeroes",
+ cl::desc("Do not skip blocks of zeroes when disassembling"),
+ cl::cat(ObjdumpCat));
static cl::alias
-Disassembled("d", cl::desc("Alias for --disassemble"),
- cl::aliasopt(Disassemble));
+ DisassembleZeroesShort("z", cl::desc("Alias for --disassemble-zeroes"),
+ cl::NotHidden, cl::Grouping,
+ cl::aliasopt(DisassembleZeroes));
-cl::opt<bool>
-llvm::DisassembleAll("disassemble-all",
- cl::desc("Display assembler mnemonics for the machine instructions"));
+static cl::list<std::string>
+ DisassemblerOptions("disassembler-options",
+ cl::desc("Pass target specific disassembler options"),
+ cl::value_desc("options"), cl::CommaSeparated,
+ cl::cat(ObjdumpCat));
static cl::alias
-DisassembleAlld("D", cl::desc("Alias for --disassemble-all"),
- cl::aliasopt(DisassembleAll));
-
-cl::opt<bool> llvm::Demangle("demangle", cl::desc("Demangle symbols names"),
- cl::init(false));
-
-static cl::alias DemangleShort("C", cl::desc("Alias for --demangle"),
- cl::aliasopt(llvm::Demangle));
+ DisassemblerOptionsShort("M", cl::desc("Alias for --disassembler-options"),
+ cl::NotHidden, cl::Grouping, cl::Prefix,
+ cl::CommaSeparated,
+ cl::aliasopt(DisassemblerOptions));
-static cl::list<std::string>
-DisassembleFunctions("df",
- cl::CommaSeparated,
- cl::desc("List of functions to disassemble"));
-static StringSet<> DisasmFuncsSet;
+cl::opt<DIDumpType> DwarfDumpType(
+ "dwarf", cl::init(DIDT_Null), cl::desc("Dump of dwarf debug sections:"),
+ cl::values(clEnumValN(DIDT_DebugFrame, "frames", ".debug_frame")),
+ cl::cat(ObjdumpCat));
+
+static cl::opt<bool> DynamicRelocations(
+ "dynamic-reloc",
+ cl::desc("Display the dynamic relocation entries in the file"),
+ cl::cat(ObjdumpCat));
+static cl::alias DynamicRelocationShort("R",
+ cl::desc("Alias for --dynamic-reloc"),
+ cl::NotHidden, cl::Grouping,
+ cl::aliasopt(DynamicRelocations));
-cl::opt<bool>
-llvm::Relocations("reloc",
- cl::desc("Display the relocation entries in the file"));
-static cl::alias RelocationsShort("r", cl::desc("Alias for --reloc"),
- cl::NotHidden,
- cl::aliasopt(llvm::Relocations));
+static cl::opt<bool>
+ FaultMapSection("fault-map-section",
+ cl::desc("Display contents of faultmap section"),
+ cl::cat(ObjdumpCat));
-cl::opt<bool>
-llvm::DynamicRelocations("dynamic-reloc",
- cl::desc("Display the dynamic relocation entries in the file"));
-static cl::alias
-DynamicRelocationsd("R", cl::desc("Alias for --dynamic-reloc"),
- cl::aliasopt(DynamicRelocations));
+static cl::opt<bool>
+ FileHeaders("file-headers",
+ cl::desc("Display the contents of the overall file header"),
+ cl::cat(ObjdumpCat));
+static cl::alias FileHeadersShort("f", cl::desc("Alias for --file-headers"),
+ cl::NotHidden, cl::Grouping,
+ cl::aliasopt(FileHeaders));
-cl::opt<bool>
- llvm::SectionContents("full-contents",
- cl::desc("Display the content of each section"));
+cl::opt<bool> SectionContents("full-contents",
+ cl::desc("Display the content of each section"),
+ cl::cat(ObjdumpCat));
static cl::alias SectionContentsShort("s",
cl::desc("Alias for --full-contents"),
+ cl::NotHidden, cl::Grouping,
cl::aliasopt(SectionContents));
-cl::opt<bool> llvm::SymbolTable("syms", cl::desc("Display the symbol table"));
-static cl::alias SymbolTableShort("t", cl::desc("Alias for --syms"),
- cl::NotHidden,
- cl::aliasopt(llvm::SymbolTable));
-
-cl::opt<bool>
-llvm::ExportsTrie("exports-trie", cl::desc("Display mach-o exported symbols"));
-
-cl::opt<bool>
-llvm::Rebase("rebase", cl::desc("Display mach-o rebasing info"));
-
-cl::opt<bool>
-llvm::Bind("bind", cl::desc("Display mach-o binding info"));
-
-cl::opt<bool>
-llvm::LazyBind("lazy-bind", cl::desc("Display mach-o lazy binding info"));
-
-cl::opt<bool>
-llvm::WeakBind("weak-bind", cl::desc("Display mach-o weak binding info"));
-
-cl::opt<bool>
-llvm::RawClangAST("raw-clang-ast",
- cl::desc("Dump the raw binary contents of the clang AST section"));
+static cl::list<std::string> InputFilenames(cl::Positional,
+ cl::desc("<input object files>"),
+ cl::ZeroOrMore,
+ cl::cat(ObjdumpCat));
static cl::opt<bool>
-MachOOpt("macho", cl::desc("Use MachO specific object file parser"));
-static cl::alias
-MachOm("m", cl::desc("Alias for --macho"), cl::aliasopt(MachOOpt));
-
-cl::opt<std::string>
-llvm::TripleName("triple", cl::desc("Target triple to disassemble for, "
- "see -version for available targets"));
+ PrintLines("line-numbers",
+ cl::desc("Display source line numbers with "
+ "disassembly. Implies disassemble object"),
+ cl::cat(ObjdumpCat));
+static cl::alias PrintLinesShort("l", cl::desc("Alias for --line-numbers"),
+ cl::NotHidden, cl::Grouping,
+ cl::aliasopt(PrintLines));
+
+static cl::opt<bool> MachOOpt("macho",
+ cl::desc("Use MachO specific object file parser"),
+ cl::cat(ObjdumpCat));
+static cl::alias MachOm("m", cl::desc("Alias for --macho"), cl::NotHidden,
+ cl::Grouping, cl::aliasopt(MachOOpt));
cl::opt<std::string>
-llvm::MCPU("mcpu",
- cl::desc("Target a specific cpu type (-mcpu=help for details)"),
- cl::value_desc("cpu-name"),
- cl::init(""));
-
-cl::opt<std::string>
-llvm::ArchName("arch-name", cl::desc("Target arch to disassemble for, "
- "see -version for available targets"));
+ MCPU("mcpu",
+ cl::desc("Target a specific cpu type (-mcpu=help for details)"),
+ cl::value_desc("cpu-name"), cl::init(""), cl::cat(ObjdumpCat));
+
+cl::list<std::string> MAttrs("mattr", cl::CommaSeparated,
+ cl::desc("Target specific attributes"),
+ cl::value_desc("a1,+a2,-a3,..."),
+ cl::cat(ObjdumpCat));
+
+cl::opt<bool> NoShowRawInsn("no-show-raw-insn",
+ cl::desc("When disassembling "
+ "instructions, do not print "
+ "the instruction bytes."),
+ cl::cat(ObjdumpCat));
+cl::opt<bool> NoLeadingAddr("no-leading-addr",
+ cl::desc("Print no leading address"),
+ cl::cat(ObjdumpCat));
+
+static cl::opt<bool> RawClangAST(
+ "raw-clang-ast",
+ cl::desc("Dump the raw binary contents of the clang AST section"),
+ cl::cat(ObjdumpCat));
cl::opt<bool>
-llvm::SectionHeaders("section-headers", cl::desc("Display summaries of the "
- "headers for each section."));
-static cl::alias
-SectionHeadersShort("headers", cl::desc("Alias for --section-headers"),
- cl::aliasopt(SectionHeaders));
-static cl::alias
-SectionHeadersShorter("h", cl::desc("Alias for --section-headers"),
- cl::aliasopt(SectionHeaders));
-
-cl::list<std::string>
-llvm::FilterSections("section", cl::desc("Operate on the specified sections only. "
- "With -macho dump segment,section"));
-cl::alias
-static FilterSectionsj("j", cl::desc("Alias for --section"),
- cl::aliasopt(llvm::FilterSections));
-
-cl::list<std::string>
-llvm::MAttrs("mattr",
- cl::CommaSeparated,
- cl::desc("Target specific attributes"),
- cl::value_desc("a1,+a2,-a3,..."));
-
-cl::opt<bool>
-llvm::NoShowRawInsn("no-show-raw-insn", cl::desc("When disassembling "
- "instructions, do not print "
- "the instruction bytes."));
-cl::opt<bool>
-llvm::NoLeadingAddr("no-leading-addr", cl::desc("Print no leading address"));
-
-cl::opt<bool>
-llvm::UnwindInfo("unwind-info", cl::desc("Display unwind information"));
-
-static cl::alias
-UnwindInfoShort("u", cl::desc("Alias for --unwind-info"),
- cl::aliasopt(UnwindInfo));
-
-cl::opt<bool>
-llvm::PrivateHeaders("private-headers",
- cl::desc("Display format specific file headers"));
-
-cl::opt<bool>
-llvm::FirstPrivateHeader("private-header",
- cl::desc("Display only the first format specific file "
- "header"));
-
-static cl::alias
-PrivateHeadersShort("p", cl::desc("Alias for --private-headers"),
- cl::aliasopt(PrivateHeaders));
-
-cl::opt<bool> llvm::FileHeaders(
- "file-headers",
- cl::desc("Display the contents of the overall file header"));
-
-static cl::alias FileHeadersShort("f", cl::desc("Alias for --file-headers"),
- cl::aliasopt(FileHeaders));
-
-cl::opt<bool>
- llvm::ArchiveHeaders("archive-headers",
- cl::desc("Display archive header information"));
+ Relocations("reloc", cl::desc("Display the relocation entries in the file"),
+ cl::cat(ObjdumpCat));
+static cl::alias RelocationsShort("r", cl::desc("Alias for --reloc"),
+ cl::NotHidden, cl::Grouping,
+ cl::aliasopt(Relocations));
-cl::alias
-ArchiveHeadersShort("a", cl::desc("Alias for --archive-headers"),
- cl::aliasopt(ArchiveHeaders));
+cl::opt<bool> PrintImmHex("print-imm-hex",
+ cl::desc("Use hex format for immediate values"),
+ cl::cat(ObjdumpCat));
-cl::opt<bool>
- llvm::PrintImmHex("print-imm-hex",
- cl::desc("Use hex format for immediate values"));
+cl::opt<bool> PrivateHeaders("private-headers",
+ cl::desc("Display format specific file headers"),
+ cl::cat(ObjdumpCat));
+static cl::alias PrivateHeadersShort("p",
+ cl::desc("Alias for --private-headers"),
+ cl::NotHidden, cl::Grouping,
+ cl::aliasopt(PrivateHeaders));
-cl::opt<bool> PrintFaultMaps("fault-map-section",
- cl::desc("Display contents of faultmap section"));
+cl::list<std::string>
+ FilterSections("section",
+ cl::desc("Operate on the specified sections only. "
+ "With -macho dump segment,section"),
+ cl::cat(ObjdumpCat));
+static cl::alias FilterSectionsj("j", cl::desc("Alias for --section"),
+ cl::NotHidden, cl::Grouping, cl::Prefix,
+ cl::aliasopt(FilterSections));
+
+cl::opt<bool> SectionHeaders("section-headers",
+ cl::desc("Display summaries of the "
+ "headers for each section."),
+ cl::cat(ObjdumpCat));
+static cl::alias SectionHeadersShort("headers",
+ cl::desc("Alias for --section-headers"),
+ cl::NotHidden,
+ cl::aliasopt(SectionHeaders));
+static cl::alias SectionHeadersShorter("h",
+ cl::desc("Alias for --section-headers"),
+ cl::NotHidden, cl::Grouping,
+ cl::aliasopt(SectionHeaders));
-cl::opt<DIDumpType> llvm::DwarfDumpType(
- "dwarf", cl::init(DIDT_Null), cl::desc("Dump of dwarf debug sections:"),
- cl::values(clEnumValN(DIDT_DebugFrame, "frames", ".debug_frame")));
+static cl::opt<bool>
+ ShowLMA("show-lma",
+ cl::desc("Display LMA column when dumping ELF section headers"),
+ cl::cat(ObjdumpCat));
-cl::opt<bool> PrintSource(
+static cl::opt<bool> PrintSource(
"source",
cl::desc(
- "Display source inlined with disassembly. Implies disassemble object"));
-
-cl::alias PrintSourceShort("S", cl::desc("Alias for -source"),
- cl::aliasopt(PrintSource));
+ "Display source inlined with disassembly. Implies disassemble object"),
+ cl::cat(ObjdumpCat));
+static cl::alias PrintSourceShort("S", cl::desc("Alias for -source"),
+ cl::NotHidden, cl::Grouping,
+ cl::aliasopt(PrintSource));
-cl::opt<bool> PrintLines("line-numbers",
- cl::desc("Display source line numbers with "
- "disassembly. Implies disassemble object"));
-
-cl::alias PrintLinesShort("l", cl::desc("Alias for -line-numbers"),
- cl::aliasopt(PrintLines));
-
-cl::opt<unsigned long long>
+static cl::opt<uint64_t>
StartAddress("start-address", cl::desc("Disassemble beginning at address"),
- cl::value_desc("address"), cl::init(0));
-cl::opt<unsigned long long>
- StopAddress("stop-address",
- cl::desc("Stop disassembly at address"),
- cl::value_desc("address"), cl::init(UINT64_MAX));
-
-cl::opt<bool> DisassembleZeroes(
- "disassemble-zeroes",
- cl::desc("Do not skip blocks of zeroes when disassembling"));
-cl::alias DisassembleZeroesShort("z",
- cl::desc("Alias for --disassemble-zeroes"),
- cl::aliasopt(DisassembleZeroes));
+ cl::value_desc("address"), cl::init(0), cl::cat(ObjdumpCat));
+static cl::opt<uint64_t> StopAddress("stop-address",
+ cl::desc("Stop disassembly at address"),
+ cl::value_desc("address"),
+ cl::init(UINT64_MAX), cl::cat(ObjdumpCat));
+
+cl::opt<bool> SymbolTable("syms", cl::desc("Display the symbol table"),
+ cl::cat(ObjdumpCat));
+static cl::alias SymbolTableShort("t", cl::desc("Alias for --syms"),
+ cl::NotHidden, cl::Grouping,
+ cl::aliasopt(SymbolTable));
-static StringRef ToolName;
+cl::opt<std::string> TripleName("triple",
+ cl::desc("Target triple to disassemble for, "
+ "see -version for available targets"),
+ cl::cat(ObjdumpCat));
-typedef std::vector<std::tuple<uint64_t, StringRef, uint8_t>> SectionSymbolsTy;
+cl::opt<bool> UnwindInfo("unwind-info", cl::desc("Display unwind information"),
+ cl::cat(ObjdumpCat));
+static cl::alias UnwindInfoShort("u", cl::desc("Alias for --unwind-info"),
+ cl::NotHidden, cl::Grouping,
+ cl::aliasopt(UnwindInfo));
-namespace {
-typedef std::function<bool(llvm::object::SectionRef const &)> FilterPredicate;
+static cl::opt<bool>
+ Wide("wide", cl::desc("Ignored for compatibility with GNU objdump"),
+ cl::cat(ObjdumpCat));
+static cl::alias WideShort("w", cl::Grouping, cl::aliasopt(Wide));
-class SectionFilterIterator {
-public:
- SectionFilterIterator(FilterPredicate P,
- llvm::object::section_iterator const &I,
- llvm::object::section_iterator const &E)
- : Predicate(std::move(P)), Iterator(I), End(E) {
- ScanPredicate();
- }
- const llvm::object::SectionRef &operator*() const { return *Iterator; }
- SectionFilterIterator &operator++() {
- ++Iterator;
- ScanPredicate();
- return *this;
- }
- bool operator!=(SectionFilterIterator const &Other) const {
- return Iterator != Other.Iterator;
- }
+static cl::extrahelp
+ HelpResponse("\nPass @FILE as argument to read options from FILE.\n");
-private:
- void ScanPredicate() {
- while (Iterator != End && !Predicate(*Iterator)) {
- ++Iterator;
- }
- }
- FilterPredicate Predicate;
- llvm::object::section_iterator Iterator;
- llvm::object::section_iterator End;
-};
+static StringSet<> DisasmFuncsSet;
+static StringSet<> FoundSectionSet;
+static StringRef ToolName;
-class SectionFilter {
-public:
- SectionFilter(FilterPredicate P, llvm::object::ObjectFile const &O)
- : Predicate(std::move(P)), Object(O) {}
- SectionFilterIterator begin() {
- return SectionFilterIterator(Predicate, Object.section_begin(),
- Object.section_end());
- }
- SectionFilterIterator end() {
- return SectionFilterIterator(Predicate, Object.section_end(),
- Object.section_end());
- }
+typedef std::vector<std::tuple<uint64_t, StringRef, uint8_t>> SectionSymbolsTy;
-private:
- FilterPredicate Predicate;
- llvm::object::ObjectFile const &Object;
-};
-SectionFilter ToolSectionFilter(llvm::object::ObjectFile const &O) {
- return SectionFilter(
- [](llvm::object::SectionRef const &S) {
- if (FilterSections.empty())
- return true;
- llvm::StringRef String;
- std::error_code error = S.getName(String);
- if (error)
- return false;
- return is_contained(FilterSections, String);
- },
- O);
+static bool shouldKeep(object::SectionRef S) {
+ if (FilterSections.empty())
+ return true;
+ StringRef SecName;
+ std::error_code error = S.getName(SecName);
+ if (error)
+ return false;
+ // StringSet does not allow empty key so avoid adding sections with
+ // no name (such as the section with index 0) here.
+ if (!SecName.empty())
+ FoundSectionSet.insert(SecName);
+ return is_contained(FilterSections, SecName);
}
+
+SectionFilter ToolSectionFilter(object::ObjectFile const &O) {
+ return SectionFilter([](object::SectionRef S) { return shouldKeep(S); }, O);
}
-void llvm::error(std::error_code EC) {
+void error(std::error_code EC) {
if (!EC)
return;
WithColor::error(errs(), ToolName)
@@ -353,34 +368,39 @@ void llvm::error(std::error_code EC) {
exit(1);
}
-LLVM_ATTRIBUTE_NORETURN void llvm::error(Twine Message) {
+void error(Error E) {
+ if (!E)
+ return;
+ WithColor::error(errs(), ToolName) << toString(std::move(E));
+ exit(1);
+}
+
+LLVM_ATTRIBUTE_NORETURN void error(Twine Message) {
WithColor::error(errs(), ToolName) << Message << ".\n";
errs().flush();
exit(1);
}
-void llvm::warn(StringRef Message) {
+void warn(StringRef Message) {
WithColor::warning(errs(), ToolName) << Message << ".\n";
errs().flush();
}
-LLVM_ATTRIBUTE_NORETURN void llvm::report_error(StringRef File,
- Twine Message) {
- WithColor::error(errs(), ToolName)
- << "'" << File << "': " << Message << ".\n";
- exit(1);
+static void warn(Twine Message) {
+ // Output order between errs() and outs() matters especially for archive
+ // files where the output is per member object.
+ outs().flush();
+ WithColor::warning(errs(), ToolName) << Message << "\n";
+ errs().flush();
}
-LLVM_ATTRIBUTE_NORETURN void llvm::report_error(StringRef File,
- std::error_code EC) {
- assert(EC);
+LLVM_ATTRIBUTE_NORETURN void report_error(StringRef File, Twine Message) {
WithColor::error(errs(), ToolName)
- << "'" << File << "': " << EC.message() << ".\n";
+ << "'" << File << "': " << Message << ".\n";
exit(1);
}
-LLVM_ATTRIBUTE_NORETURN void llvm::report_error(StringRef File,
- llvm::Error E) {
+LLVM_ATTRIBUTE_NORETURN void report_error(Error E, StringRef File) {
assert(E);
std::string Buf;
raw_string_ostream OS(Buf);
@@ -390,10 +410,9 @@ LLVM_ATTRIBUTE_NORETURN void llvm::report_error(StringRef File,
exit(1);
}
-LLVM_ATTRIBUTE_NORETURN void llvm::report_error(StringRef ArchiveName,
- StringRef FileName,
- llvm::Error E,
- StringRef ArchitectureName) {
+LLVM_ATTRIBUTE_NORETURN void report_error(Error E, StringRef ArchiveName,
+ StringRef FileName,
+ StringRef ArchitectureName) {
assert(E);
WithColor::error(errs(), ToolName);
if (ArchiveName != "")
@@ -410,25 +429,39 @@ LLVM_ATTRIBUTE_NORETURN void llvm::report_error(StringRef ArchiveName,
exit(1);
}
-LLVM_ATTRIBUTE_NORETURN void llvm::report_error(StringRef ArchiveName,
- const object::Archive::Child &C,
- llvm::Error E,
- StringRef ArchitectureName) {
+LLVM_ATTRIBUTE_NORETURN void report_error(Error E, StringRef ArchiveName,
+ const object::Archive::Child &C,
+ StringRef ArchitectureName) {
Expected<StringRef> NameOrErr = C.getName();
// TODO: if we have a error getting the name then it would be nice to print
// the index of which archive member this is and or its offset in the
// archive instead of "???" as the name.
if (!NameOrErr) {
consumeError(NameOrErr.takeError());
- llvm::report_error(ArchiveName, "???", std::move(E), ArchitectureName);
+ report_error(std::move(E), ArchiveName, "???", ArchitectureName);
} else
- llvm::report_error(ArchiveName, NameOrErr.get(), std::move(E),
- ArchitectureName);
+ report_error(std::move(E), ArchiveName, NameOrErr.get(), ArchitectureName);
+}
+
+static void warnOnNoMatchForSections() {
+ SetVector<StringRef> MissingSections;
+ for (StringRef S : FilterSections) {
+ if (FoundSectionSet.count(S))
+ return;
+ // User may specify a unnamed section. Don't warn for it.
+ if (!S.empty())
+ MissingSections.insert(S);
+ }
+
+ // Warn only if no section in FilterSections is matched.
+ for (StringRef S : MissingSections)
+ warn("section '" + S + "' mentioned in a -j/--section option, but not "
+ "found in any input file");
}
static const Target *getTarget(const ObjectFile *Obj = nullptr) {
// Figure out the target triple.
- llvm::Triple TheTriple("unknown-unknown-unknown");
+ Triple TheTriple("unknown-unknown-unknown");
if (TripleName.empty()) {
if (Obj)
TheTriple = Obj->makeTriple();
@@ -459,423 +492,21 @@ static const Target *getTarget(const ObjectFile *Obj = nullptr) {
return TheTarget;
}
-bool llvm::isRelocAddressLess(RelocationRef A, RelocationRef B) {
+bool isRelocAddressLess(RelocationRef A, RelocationRef B) {
return A.getOffset() < B.getOffset();
}
-static std::string demangle(StringRef Name) {
- char *Demangled = nullptr;
- if (Name.startswith("_Z"))
- Demangled = itaniumDemangle(Name.data(), Demangled, nullptr, nullptr);
- else if (Name.startswith("?"))
- Demangled = microsoftDemangle(Name.data(), Demangled, nullptr, nullptr);
-
- if (!Demangled)
- return Name;
-
- std::string Ret = Demangled;
- free(Demangled);
- return Ret;
-}
-
-template <class ELFT>
-static std::error_code getRelocationValueString(const ELFObjectFile<ELFT> *Obj,
- const RelocationRef &RelRef,
- SmallVectorImpl<char> &Result) {
- typedef typename ELFObjectFile<ELFT>::Elf_Sym Elf_Sym;
- typedef typename ELFObjectFile<ELFT>::Elf_Shdr Elf_Shdr;
- typedef typename ELFObjectFile<ELFT>::Elf_Rela Elf_Rela;
-
- const ELFFile<ELFT> &EF = *Obj->getELFFile();
- DataRefImpl Rel = RelRef.getRawDataRefImpl();
- auto SecOrErr = EF.getSection(Rel.d.a);
- if (!SecOrErr)
- return errorToErrorCode(SecOrErr.takeError());
- const Elf_Shdr *Sec = *SecOrErr;
- auto SymTabOrErr = EF.getSection(Sec->sh_link);
- if (!SymTabOrErr)
- return errorToErrorCode(SymTabOrErr.takeError());
- const Elf_Shdr *SymTab = *SymTabOrErr;
- assert(SymTab->sh_type == ELF::SHT_SYMTAB ||
- SymTab->sh_type == ELF::SHT_DYNSYM);
- auto StrTabSec = EF.getSection(SymTab->sh_link);
- if (!StrTabSec)
- return errorToErrorCode(StrTabSec.takeError());
- auto StrTabOrErr = EF.getStringTable(*StrTabSec);
- if (!StrTabOrErr)
- return errorToErrorCode(StrTabOrErr.takeError());
- StringRef StrTab = *StrTabOrErr;
- int64_t Addend = 0;
- // If there is no Symbol associated with the relocation, we set the undef
- // boolean value to 'true'. This will prevent us from calling functions that
- // requires the relocation to be associated with a symbol.
- bool Undef = false;
- switch (Sec->sh_type) {
- default:
- return object_error::parse_failed;
- case ELF::SHT_REL: {
- // TODO: Read implicit addend from section data.
- break;
- }
- case ELF::SHT_RELA: {
- const Elf_Rela *ERela = Obj->getRela(Rel);
- Addend = ERela->r_addend;
- Undef = ERela->getSymbol(false) == 0;
- break;
- }
- }
- std::string Target;
- if (!Undef) {
- symbol_iterator SI = RelRef.getSymbol();
- const Elf_Sym *symb = Obj->getSymbol(SI->getRawDataRefImpl());
- if (symb->getType() == ELF::STT_SECTION) {
- Expected<section_iterator> SymSI = SI->getSection();
- if (!SymSI)
- return errorToErrorCode(SymSI.takeError());
- const Elf_Shdr *SymSec = Obj->getSection((*SymSI)->getRawDataRefImpl());
- auto SecName = EF.getSectionName(SymSec);
- if (!SecName)
- return errorToErrorCode(SecName.takeError());
- Target = *SecName;
- } else {
- Expected<StringRef> SymName = symb->getName(StrTab);
- if (!SymName)
- return errorToErrorCode(SymName.takeError());
- if (Demangle)
- Target = demangle(*SymName);
- else
- Target = *SymName;
- }
- } else
- Target = "*ABS*";
-
- // Default scheme is to print Target, as well as "+ <addend>" for nonzero
- // addend. Should be acceptable for all normal purposes.
- std::string FmtBuf;
- raw_string_ostream Fmt(FmtBuf);
- Fmt << Target;
- if (Addend != 0)
- Fmt << (Addend < 0 ? "" : "+") << Addend;
- Fmt.flush();
- Result.append(FmtBuf.begin(), FmtBuf.end());
- return std::error_code();
-}
-
-static std::error_code getRelocationValueString(const ELFObjectFileBase *Obj,
- const RelocationRef &Rel,
- SmallVectorImpl<char> &Result) {
- if (auto *ELF32LE = dyn_cast<ELF32LEObjectFile>(Obj))
- return getRelocationValueString(ELF32LE, Rel, Result);
- if (auto *ELF64LE = dyn_cast<ELF64LEObjectFile>(Obj))
- return getRelocationValueString(ELF64LE, Rel, Result);
- if (auto *ELF32BE = dyn_cast<ELF32BEObjectFile>(Obj))
- return getRelocationValueString(ELF32BE, Rel, Result);
- auto *ELF64BE = cast<ELF64BEObjectFile>(Obj);
- return getRelocationValueString(ELF64BE, Rel, Result);
-}
-
-static std::error_code getRelocationValueString(const COFFObjectFile *Obj,
- const RelocationRef &Rel,
- SmallVectorImpl<char> &Result) {
- symbol_iterator SymI = Rel.getSymbol();
- Expected<StringRef> SymNameOrErr = SymI->getName();
- if (!SymNameOrErr)
- return errorToErrorCode(SymNameOrErr.takeError());
- StringRef SymName = *SymNameOrErr;
- Result.append(SymName.begin(), SymName.end());
- return std::error_code();
-}
-
-static void printRelocationTargetName(const MachOObjectFile *O,
- const MachO::any_relocation_info &RE,
- raw_string_ostream &Fmt) {
- // Target of a scattered relocation is an address. In the interest of
- // generating pretty output, scan through the symbol table looking for a
- // symbol that aligns with that address. If we find one, print it.
- // Otherwise, we just print the hex address of the target.
- if (O->isRelocationScattered(RE)) {
- uint32_t Val = O->getPlainRelocationSymbolNum(RE);
-
- for (const SymbolRef &Symbol : O->symbols()) {
- Expected<uint64_t> Addr = Symbol.getAddress();
- if (!Addr)
- report_error(O->getFileName(), Addr.takeError());
- if (*Addr != Val)
- continue;
- Expected<StringRef> Name = Symbol.getName();
- if (!Name)
- report_error(O->getFileName(), Name.takeError());
- Fmt << *Name;
- return;
- }
-
- // If we couldn't find a symbol that this relocation refers to, try
- // to find a section beginning instead.
- for (const SectionRef &Section : ToolSectionFilter(*O)) {
- std::error_code ec;
-
- StringRef Name;
- uint64_t Addr = Section.getAddress();
- if (Addr != Val)
- continue;
- if ((ec = Section.getName(Name)))
- report_error(O->getFileName(), ec);
- Fmt << Name;
- return;
- }
-
- Fmt << format("0x%x", Val);
- return;
- }
-
- StringRef S;
- bool isExtern = O->getPlainRelocationExternal(RE);
- uint64_t Val = O->getPlainRelocationSymbolNum(RE);
-
- if (O->getAnyRelocationType(RE) == MachO::ARM64_RELOC_ADDEND) {
- Fmt << format("0x%0" PRIx64, Val);
- return;
- }
-
- if (isExtern) {
- symbol_iterator SI = O->symbol_begin();
- advance(SI, Val);
- Expected<StringRef> SOrErr = SI->getName();
- if (!SOrErr)
- report_error(O->getFileName(), SOrErr.takeError());
- S = *SOrErr;
- } else {
- section_iterator SI = O->section_begin();
- // Adjust for the fact that sections are 1-indexed.
- if (Val == 0) {
- Fmt << "0 (?,?)";
- return;
- }
- uint32_t I = Val - 1;
- while (I != 0 && SI != O->section_end()) {
- --I;
- advance(SI, 1);
- }
- if (SI == O->section_end())
- Fmt << Val << " (?,?)";
- else
- SI->getName(S);
- }
-
- Fmt << S;
-}
-
-static std::error_code getRelocationValueString(const WasmObjectFile *Obj,
- const RelocationRef &RelRef,
- SmallVectorImpl<char> &Result) {
- const wasm::WasmRelocation& Rel = Obj->getWasmRelocation(RelRef);
- symbol_iterator SI = RelRef.getSymbol();
- std::string FmtBuf;
- raw_string_ostream Fmt(FmtBuf);
- if (SI == Obj->symbol_end()) {
- // Not all wasm relocations have symbols associated with them.
- // In particular R_WEBASSEMBLY_TYPE_INDEX_LEB.
- Fmt << Rel.Index;
- } else {
- Expected<StringRef> SymNameOrErr = SI->getName();
- if (!SymNameOrErr)
- return errorToErrorCode(SymNameOrErr.takeError());
- StringRef SymName = *SymNameOrErr;
- Result.append(SymName.begin(), SymName.end());
- }
- Fmt << (Rel.Addend < 0 ? "" : "+") << Rel.Addend;
- Fmt.flush();
- Result.append(FmtBuf.begin(), FmtBuf.end());
- return std::error_code();
-}
-
-static std::error_code getRelocationValueString(const MachOObjectFile *Obj,
- const RelocationRef &RelRef,
- SmallVectorImpl<char> &Result) {
- DataRefImpl Rel = RelRef.getRawDataRefImpl();
- MachO::any_relocation_info RE = Obj->getRelocation(Rel);
-
- unsigned Arch = Obj->getArch();
-
- std::string FmtBuf;
- raw_string_ostream Fmt(FmtBuf);
- unsigned Type = Obj->getAnyRelocationType(RE);
- bool IsPCRel = Obj->getAnyRelocationPCRel(RE);
-
- // Determine any addends that should be displayed with the relocation.
- // These require decoding the relocation type, which is triple-specific.
-
- // X86_64 has entirely custom relocation types.
- if (Arch == Triple::x86_64) {
- switch (Type) {
- case MachO::X86_64_RELOC_GOT_LOAD:
- case MachO::X86_64_RELOC_GOT: {
- printRelocationTargetName(Obj, RE, Fmt);
- Fmt << "@GOT";
- if (IsPCRel)
- Fmt << "PCREL";
- break;
- }
- case MachO::X86_64_RELOC_SUBTRACTOR: {
- DataRefImpl RelNext = Rel;
- Obj->moveRelocationNext(RelNext);
- MachO::any_relocation_info RENext = Obj->getRelocation(RelNext);
-
- // X86_64_RELOC_SUBTRACTOR must be followed by a relocation of type
- // X86_64_RELOC_UNSIGNED.
- // NOTE: Scattered relocations don't exist on x86_64.
- unsigned RType = Obj->getAnyRelocationType(RENext);
- if (RType != MachO::X86_64_RELOC_UNSIGNED)
- report_error(Obj->getFileName(), "Expected X86_64_RELOC_UNSIGNED after "
- "X86_64_RELOC_SUBTRACTOR.");
-
- // The X86_64_RELOC_UNSIGNED contains the minuend symbol;
- // X86_64_RELOC_SUBTRACTOR contains the subtrahend.
- printRelocationTargetName(Obj, RENext, Fmt);
- Fmt << "-";
- printRelocationTargetName(Obj, RE, Fmt);
- break;
- }
- case MachO::X86_64_RELOC_TLV:
- printRelocationTargetName(Obj, RE, Fmt);
- Fmt << "@TLV";
- if (IsPCRel)
- Fmt << "P";
- break;
- case MachO::X86_64_RELOC_SIGNED_1:
- printRelocationTargetName(Obj, RE, Fmt);
- Fmt << "-1";
- break;
- case MachO::X86_64_RELOC_SIGNED_2:
- printRelocationTargetName(Obj, RE, Fmt);
- Fmt << "-2";
- break;
- case MachO::X86_64_RELOC_SIGNED_4:
- printRelocationTargetName(Obj, RE, Fmt);
- Fmt << "-4";
- break;
- default:
- printRelocationTargetName(Obj, RE, Fmt);
- break;
- }
- // X86 and ARM share some relocation types in common.
- } else if (Arch == Triple::x86 || Arch == Triple::arm ||
- Arch == Triple::ppc) {
- // Generic relocation types...
- switch (Type) {
- case MachO::GENERIC_RELOC_PAIR: // prints no info
- return std::error_code();
- case MachO::GENERIC_RELOC_SECTDIFF: {
- DataRefImpl RelNext = Rel;
- Obj->moveRelocationNext(RelNext);
- MachO::any_relocation_info RENext = Obj->getRelocation(RelNext);
-
- // X86 sect diff's must be followed by a relocation of type
- // GENERIC_RELOC_PAIR.
- unsigned RType = Obj->getAnyRelocationType(RENext);
-
- if (RType != MachO::GENERIC_RELOC_PAIR)
- report_error(Obj->getFileName(), "Expected GENERIC_RELOC_PAIR after "
- "GENERIC_RELOC_SECTDIFF.");
-
- printRelocationTargetName(Obj, RE, Fmt);
- Fmt << "-";
- printRelocationTargetName(Obj, RENext, Fmt);
- break;
- }
- }
-
- if (Arch == Triple::x86 || Arch == Triple::ppc) {
- switch (Type) {
- case MachO::GENERIC_RELOC_LOCAL_SECTDIFF: {
- DataRefImpl RelNext = Rel;
- Obj->moveRelocationNext(RelNext);
- MachO::any_relocation_info RENext = Obj->getRelocation(RelNext);
-
- // X86 sect diff's must be followed by a relocation of type
- // GENERIC_RELOC_PAIR.
- unsigned RType = Obj->getAnyRelocationType(RENext);
- if (RType != MachO::GENERIC_RELOC_PAIR)
- report_error(Obj->getFileName(), "Expected GENERIC_RELOC_PAIR after "
- "GENERIC_RELOC_LOCAL_SECTDIFF.");
-
- printRelocationTargetName(Obj, RE, Fmt);
- Fmt << "-";
- printRelocationTargetName(Obj, RENext, Fmt);
- break;
- }
- case MachO::GENERIC_RELOC_TLV: {
- printRelocationTargetName(Obj, RE, Fmt);
- Fmt << "@TLV";
- if (IsPCRel)
- Fmt << "P";
- break;
- }
- default:
- printRelocationTargetName(Obj, RE, Fmt);
- }
- } else { // ARM-specific relocations
- switch (Type) {
- case MachO::ARM_RELOC_HALF:
- case MachO::ARM_RELOC_HALF_SECTDIFF: {
- // Half relocations steal a bit from the length field to encode
- // whether this is an upper16 or a lower16 relocation.
- bool isUpper = (Obj->getAnyRelocationLength(RE) & 0x1) == 1;
-
- if (isUpper)
- Fmt << ":upper16:(";
- else
- Fmt << ":lower16:(";
- printRelocationTargetName(Obj, RE, Fmt);
-
- DataRefImpl RelNext = Rel;
- Obj->moveRelocationNext(RelNext);
- MachO::any_relocation_info RENext = Obj->getRelocation(RelNext);
-
- // ARM half relocs must be followed by a relocation of type
- // ARM_RELOC_PAIR.
- unsigned RType = Obj->getAnyRelocationType(RENext);
- if (RType != MachO::ARM_RELOC_PAIR)
- report_error(Obj->getFileName(), "Expected ARM_RELOC_PAIR after "
- "ARM_RELOC_HALF");
-
- // NOTE: The half of the target virtual address is stashed in the
- // address field of the secondary relocation, but we can't reverse
- // engineer the constant offset from it without decoding the movw/movt
- // instruction to find the other half in its immediate field.
-
- // ARM_RELOC_HALF_SECTDIFF encodes the second section in the
- // symbol/section pointer of the follow-on relocation.
- if (Type == MachO::ARM_RELOC_HALF_SECTDIFF) {
- Fmt << "-";
- printRelocationTargetName(Obj, RENext, Fmt);
- }
-
- Fmt << ")";
- break;
- }
- default: { printRelocationTargetName(Obj, RE, Fmt); }
- }
- }
- } else
- printRelocationTargetName(Obj, RE, Fmt);
-
- Fmt.flush();
- Result.append(FmtBuf.begin(), FmtBuf.end());
- return std::error_code();
-}
-
-static std::error_code getRelocationValueString(const RelocationRef &Rel,
- SmallVectorImpl<char> &Result) {
+static Error getRelocationValueString(const RelocationRef &Rel,
+ SmallVectorImpl<char> &Result) {
const ObjectFile *Obj = Rel.getObject();
if (auto *ELF = dyn_cast<ELFObjectFileBase>(Obj))
- return getRelocationValueString(ELF, Rel, Result);
+ return getELFRelocationValueString(ELF, Rel, Result);
if (auto *COFF = dyn_cast<COFFObjectFile>(Obj))
- return getRelocationValueString(COFF, Rel, Result);
+ return getCOFFRelocationValueString(COFF, Rel, Result);
if (auto *Wasm = dyn_cast<WasmObjectFile>(Obj))
- return getRelocationValueString(Wasm, Rel, Result);
+ return getWasmRelocationValueString(Wasm, Rel, Result);
if (auto *MachO = dyn_cast<MachOObjectFile>(Obj))
- return getRelocationValueString(MachO, Rel, Result);
+ return getMachORelocationValueString(MachO, Rel, Result);
llvm_unreachable("unknown object file format");
}
@@ -928,13 +559,15 @@ private:
public:
SourcePrinter() = default;
SourcePrinter(const ObjectFile *Obj, StringRef DefaultArch) : Obj(Obj) {
- symbolize::LLVMSymbolizer::Options SymbolizerOpts(
- DILineInfoSpecifier::FunctionNameKind::None, true, false, false,
- DefaultArch);
+ symbolize::LLVMSymbolizer::Options SymbolizerOpts;
+ SymbolizerOpts.PrintFunctions = DILineInfoSpecifier::FunctionNameKind::None;
+ SymbolizerOpts.Demangle = false;
+ SymbolizerOpts.DefaultArch = DefaultArch;
Symbolizer.reset(new symbolize::LLVMSymbolizer(SymbolizerOpts));
}
virtual ~SourcePrinter() = default;
- virtual void printSourceLine(raw_ostream &OS, uint64_t Address,
+ virtual void printSourceLine(raw_ostream &OS,
+ object::SectionedAddress Address,
StringRef Delimiter = "; ");
};
@@ -949,35 +582,37 @@ bool SourcePrinter::cacheSource(const DILineInfo &LineInfo) {
Buffer = std::move(*BufferOrError);
}
// Chomp the file to get lines
- size_t BufferSize = Buffer->getBufferSize();
- const char *BufferStart = Buffer->getBufferStart();
- for (const char *Start = BufferStart, *End = BufferStart;
- End < BufferStart + BufferSize; End++)
- if (*End == '\n' || End == BufferStart + BufferSize - 1 ||
- (*End == '\r' && *(End + 1) == '\n')) {
- LineCache[LineInfo.FileName].push_back(StringRef(Start, End - Start));
- if (*End == '\r')
- End++;
- Start = End + 1;
+ const char *BufferStart = Buffer->getBufferStart(),
+ *BufferEnd = Buffer->getBufferEnd();
+ std::vector<StringRef> &Lines = LineCache[LineInfo.FileName];
+ const char *Start = BufferStart;
+ for (const char *I = BufferStart; I != BufferEnd; ++I)
+ if (*I == '\n') {
+ Lines.emplace_back(Start, I - Start - (BufferStart < I && I[-1] == '\r'));
+ Start = I + 1;
}
+ if (Start < BufferEnd)
+ Lines.emplace_back(Start, BufferEnd - Start);
SourceCache[LineInfo.FileName] = std::move(Buffer);
return true;
}
-void SourcePrinter::printSourceLine(raw_ostream &OS, uint64_t Address,
+void SourcePrinter::printSourceLine(raw_ostream &OS,
+ object::SectionedAddress Address,
StringRef Delimiter) {
if (!Symbolizer)
return;
+
DILineInfo LineInfo = DILineInfo();
- auto ExpectecLineInfo =
- Symbolizer->symbolizeCode(Obj->getFileName(), Address);
- if (!ExpectecLineInfo)
- consumeError(ExpectecLineInfo.takeError());
+ auto ExpectedLineInfo = Symbolizer->symbolizeCode(*Obj, Address);
+ if (!ExpectedLineInfo)
+ consumeError(ExpectedLineInfo.takeError());
else
- LineInfo = *ExpectecLineInfo;
+ LineInfo = *ExpectedLineInfo;
- if ((LineInfo.FileName == "<invalid>") || OldLineInfo.Line == LineInfo.Line ||
- LineInfo.Line == 0)
+ if ((LineInfo.FileName == "<invalid>") || LineInfo.Line == 0 ||
+ ((OldLineInfo.Line == LineInfo.Line) &&
+ (OldLineInfo.FileName == LineInfo.FileName)))
return;
if (PrintLines)
@@ -986,53 +621,79 @@ void SourcePrinter::printSourceLine(raw_ostream &OS, uint64_t Address,
if (SourceCache.find(LineInfo.FileName) == SourceCache.end())
if (!cacheSource(LineInfo))
return;
- auto FileBuffer = SourceCache.find(LineInfo.FileName);
- if (FileBuffer != SourceCache.end()) {
- auto LineBuffer = LineCache.find(LineInfo.FileName);
- if (LineBuffer != LineCache.end()) {
- if (LineInfo.Line > LineBuffer->second.size())
- return;
- // Vector begins at 0, line numbers are non-zero
- OS << Delimiter << LineBuffer->second[LineInfo.Line - 1].ltrim()
- << "\n";
- }
+ auto LineBuffer = LineCache.find(LineInfo.FileName);
+ if (LineBuffer != LineCache.end()) {
+ if (LineInfo.Line > LineBuffer->second.size())
+ return;
+ // Vector begins at 0, line numbers are non-zero
+ OS << Delimiter << LineBuffer->second[LineInfo.Line - 1] << '\n';
}
}
OldLineInfo = LineInfo;
}
+static bool isAArch64Elf(const ObjectFile *Obj) {
+ const auto *Elf = dyn_cast<ELFObjectFileBase>(Obj);
+ return Elf && Elf->getEMachine() == ELF::EM_AARCH64;
+}
+
static bool isArmElf(const ObjectFile *Obj) {
- return (Obj->isELF() &&
- (Obj->getArch() == Triple::aarch64 ||
- Obj->getArch() == Triple::aarch64_be ||
- Obj->getArch() == Triple::arm || Obj->getArch() == Triple::armeb ||
- Obj->getArch() == Triple::thumb ||
- Obj->getArch() == Triple::thumbeb));
+ const auto *Elf = dyn_cast<ELFObjectFileBase>(Obj);
+ return Elf && Elf->getEMachine() == ELF::EM_ARM;
+}
+
+static bool hasMappingSymbols(const ObjectFile *Obj) {
+ return isArmElf(Obj) || isAArch64Elf(Obj);
+}
+
+static void printRelocation(const RelocationRef &Rel, uint64_t Address,
+ bool Is64Bits) {
+ StringRef Fmt = Is64Bits ? "\t\t%016" PRIx64 ": " : "\t\t\t%08" PRIx64 ": ";
+ SmallString<16> Name;
+ SmallString<32> Val;
+ Rel.getTypeName(Name);
+ error(getRelocationValueString(Rel, Val));
+ outs() << format(Fmt.data(), Address) << Name << "\t" << Val << "\n";
}
class PrettyPrinter {
public:
virtual ~PrettyPrinter() = default;
virtual void printInst(MCInstPrinter &IP, const MCInst *MI,
- ArrayRef<uint8_t> Bytes, uint64_t Address,
- raw_ostream &OS, StringRef Annot,
- MCSubtargetInfo const &STI, SourcePrinter *SP,
+ ArrayRef<uint8_t> Bytes,
+ object::SectionedAddress Address, raw_ostream &OS,
+ StringRef Annot, MCSubtargetInfo const &STI,
+ SourcePrinter *SP,
std::vector<RelocationRef> *Rels = nullptr) {
if (SP && (PrintSource || PrintLines))
SP->printSourceLine(OS, Address);
- if (!NoLeadingAddr)
- OS << format("%8" PRIx64 ":", Address);
- if (!NoShowRawInsn) {
- OS << "\t";
- dumpBytes(Bytes, OS);
+
+ {
+ formatted_raw_ostream FOS(OS);
+ if (!NoLeadingAddr)
+ FOS << format("%8" PRIx64 ":", Address.Address);
+ if (!NoShowRawInsn) {
+ FOS << ' ';
+ dumpBytes(Bytes, FOS);
+ }
+ FOS.flush();
+ // The output of printInst starts with a tab. Print some spaces so that
+ // the tab has 1 column and advances to the target tab stop.
+ unsigned TabStop = NoShowRawInsn ? 16 : 40;
+ unsigned Column = FOS.getColumn();
+ FOS.indent(Column < TabStop - 1 ? TabStop - 1 - Column : 7 - Column % 8);
+
+ // The dtor calls flush() to ensure the indent comes before printInst().
}
+
if (MI)
IP.printInst(MI, OS, "", STI);
else
- OS << " <unknown>";
+ OS << "\t<unknown>";
}
};
PrettyPrinter PrettyPrinterInst;
+
class HexagonPrettyPrinter : public PrettyPrinter {
public:
void printLead(ArrayRef<uint8_t> Bytes, uint64_t Address,
@@ -1044,17 +705,17 @@ public:
if (!NoShowRawInsn) {
OS << "\t";
dumpBytes(Bytes.slice(0, 4), OS);
- OS << format("%08" PRIx32, opcode);
+ OS << format("\t%08" PRIx32, opcode);
}
}
void printInst(MCInstPrinter &IP, const MCInst *MI, ArrayRef<uint8_t> Bytes,
- uint64_t Address, raw_ostream &OS, StringRef Annot,
- MCSubtargetInfo const &STI, SourcePrinter *SP,
+ object::SectionedAddress Address, raw_ostream &OS,
+ StringRef Annot, MCSubtargetInfo const &STI, SourcePrinter *SP,
std::vector<RelocationRef> *Rels) override {
if (SP && (PrintSource || PrintLines))
SP->printSourceLine(OS, Address, "");
if (!MI) {
- printLead(Bytes, Address, OS);
+ printLead(Bytes, Address.Address, OS);
OS << " <unknown>";
return;
}
@@ -1070,21 +731,15 @@ public:
auto HeadTail = PacketBundle.first.split('\n');
auto Preamble = " { ";
auto Separator = "";
- StringRef Fmt = "\t\t\t%08" PRIx64 ": ";
- std::vector<RelocationRef>::const_iterator RelCur = Rels->begin();
- std::vector<RelocationRef>::const_iterator RelEnd = Rels->end();
// Hexagon's packets require relocations to be inline rather than
// clustered at the end of the packet.
+ std::vector<RelocationRef>::const_iterator RelCur = Rels->begin();
+ std::vector<RelocationRef>::const_iterator RelEnd = Rels->end();
auto PrintReloc = [&]() -> void {
- while ((RelCur != RelEnd) && (RelCur->getOffset() <= Address)) {
- if (RelCur->getOffset() == Address) {
- SmallString<16> Name;
- SmallString<32> Val;
- RelCur->getTypeName(Name);
- error(getRelocationValueString(*RelCur, Val));
- OS << Separator << format(Fmt.data(), Address) << Name << "\t" << Val
- << "\n";
+ while ((RelCur != RelEnd) && (RelCur->getOffset() <= Address.Address)) {
+ if (RelCur->getOffset() == Address.Address) {
+ printRelocation(*RelCur, Address.Address, false);
return;
}
++RelCur;
@@ -1096,7 +751,7 @@ public:
Separator = "\n";
if (SP && (PrintSource || PrintLines))
SP->printSourceLine(OS, Address, "");
- printLead(Bytes, Address, OS);
+ printLead(Bytes, Address.Address, OS);
OS << Preamble;
Preamble = " ";
StringRef Inst;
@@ -1114,7 +769,7 @@ public:
OS << " } " << PacketBundle.second;
PrintReloc();
Bytes = Bytes.slice(4);
- Address += 4;
+ Address.Address += 4;
}
}
};
@@ -1123,14 +778,12 @@ HexagonPrettyPrinter HexagonPrettyPrinterInst;
class AMDGCNPrettyPrinter : public PrettyPrinter {
public:
void printInst(MCInstPrinter &IP, const MCInst *MI, ArrayRef<uint8_t> Bytes,
- uint64_t Address, raw_ostream &OS, StringRef Annot,
- MCSubtargetInfo const &STI, SourcePrinter *SP,
+ object::SectionedAddress Address, raw_ostream &OS,
+ StringRef Annot, MCSubtargetInfo const &STI, SourcePrinter *SP,
std::vector<RelocationRef> *Rels) override {
if (SP && (PrintSource || PrintLines))
SP->printSourceLine(OS, Address);
- typedef support::ulittle32_t U32;
-
if (MI) {
SmallString<40> InstStr;
raw_svector_ostream IS(InstStr);
@@ -1144,7 +797,7 @@ public:
// remaining
if (Bytes.size() >= 4) {
OS << format("\t.long 0x%08" PRIx32 " ",
- static_cast<uint32_t>(*reinterpret_cast<const U32*>(Bytes.data())));
+ support::endian::read32<support::little>(Bytes.data()));
OS.indent(42);
} else {
OS << format("\t.byte 0x%02" PRIx8, Bytes[0]);
@@ -1154,20 +807,21 @@ public:
}
}
- OS << format("// %012" PRIX64 ": ", Address);
- if (Bytes.size() >=4) {
- for (auto D : makeArrayRef(reinterpret_cast<const U32*>(Bytes.data()),
- Bytes.size() / sizeof(U32)))
- // D should be explicitly casted to uint32_t here as it is passed
- // by format to snprintf as vararg.
- OS << format("%08" PRIX32 " ", static_cast<uint32_t>(D));
+ OS << format("// %012" PRIX64 ":", Address.Address);
+ if (Bytes.size() >= 4) {
+ // D should be casted to uint32_t here as it is passed by format to
+ // snprintf as vararg.
+ for (uint32_t D : makeArrayRef(
+ reinterpret_cast<const support::little32_t *>(Bytes.data()),
+ Bytes.size() / 4))
+ OS << format(" %08" PRIX32, D);
} else {
- for (unsigned int i = 0; i < Bytes.size(); i++)
- OS << format("%02" PRIX8 " ", Bytes[i]);
+ for (unsigned char B : Bytes)
+ OS << format(" %02" PRIX8, B);
}
if (!Annot.empty())
- OS << "// " << Annot;
+ OS << " // " << Annot;
}
};
AMDGCNPrettyPrinter AMDGCNPrettyPrinterInst;
@@ -1175,13 +829,13 @@ AMDGCNPrettyPrinter AMDGCNPrettyPrinterInst;
class BPFPrettyPrinter : public PrettyPrinter {
public:
void printInst(MCInstPrinter &IP, const MCInst *MI, ArrayRef<uint8_t> Bytes,
- uint64_t Address, raw_ostream &OS, StringRef Annot,
- MCSubtargetInfo const &STI, SourcePrinter *SP,
+ object::SectionedAddress Address, raw_ostream &OS,
+ StringRef Annot, MCSubtargetInfo const &STI, SourcePrinter *SP,
std::vector<RelocationRef> *Rels) override {
if (SP && (PrintSource || PrintLines))
SP->printSourceLine(OS, Address);
if (!NoLeadingAddr)
- OS << format("%8" PRId64 ":", Address / 8);
+ OS << format("%8" PRId64 ":", Address.Address / 8);
if (!NoShowRawInsn) {
OS << "\t";
dumpBytes(Bytes, OS);
@@ -1189,7 +843,7 @@ public:
if (MI)
IP.printInst(MI, OS, "", STI);
else
- OS << " <unknown>";
+ OS << "\t<unknown>";
}
};
BPFPrettyPrinter BPFPrettyPrinterInst;
@@ -1227,27 +881,25 @@ addDynamicElfSymbols(const ELFObjectFile<ELFT> *Obj,
std::map<SectionRef, SectionSymbolsTy> &AllSymbols) {
for (auto Symbol : Obj->getDynamicSymbolIterators()) {
uint8_t SymbolType = Symbol.getELFType();
- if (SymbolType != ELF::STT_FUNC || Symbol.getSize() == 0)
+ if (SymbolType == ELF::STT_SECTION)
continue;
- Expected<uint64_t> AddressOrErr = Symbol.getAddress();
- if (!AddressOrErr)
- report_error(Obj->getFileName(), AddressOrErr.takeError());
+ uint64_t Address = unwrapOrError(Symbol.getAddress(), Obj->getFileName());
+ // ELFSymbolRef::getAddress() returns size instead of value for common
+ // symbols which is not desirable for disassembly output. Overriding.
+ if (SymbolType == ELF::STT_COMMON)
+ Address = Obj->getSymbol(Symbol.getRawDataRefImpl())->st_value;
- Expected<StringRef> Name = Symbol.getName();
- if (!Name)
- report_error(Obj->getFileName(), Name.takeError());
- if (Name->empty())
+ StringRef Name = unwrapOrError(Symbol.getName(), Obj->getFileName());
+ if (Name.empty())
continue;
- Expected<section_iterator> SectionOrErr = Symbol.getSection();
- if (!SectionOrErr)
- report_error(Obj->getFileName(), SectionOrErr.takeError());
- section_iterator SecI = *SectionOrErr;
+ section_iterator SecI =
+ unwrapOrError(Symbol.getSection(), Obj->getFileName());
if (SecI == Obj->section_end())
continue;
- AllSymbols[*SecI].emplace_back(*AddressOrErr, *Name, SymbolType);
+ AllSymbols[*SecI].emplace_back(Address, Name, SymbolType);
}
}
@@ -1285,14 +937,10 @@ static void addPltEntries(const ObjectFile *Obj,
SymbolRef Symbol(PltEntry.first, ElfObj);
uint8_t SymbolType = getElfSymbolType(Obj, Symbol);
- Expected<StringRef> NameOrErr = Symbol.getName();
- if (!NameOrErr)
- report_error(Obj->getFileName(), NameOrErr.takeError());
- if (NameOrErr->empty())
- continue;
- StringRef Name = Saver.save((*NameOrErr + "@plt").str());
-
- AllSymbols[*Plt].emplace_back(PltEntry.second, Name, SymbolType);
+ StringRef Name = unwrapOrError(Symbol.getName(), Obj->getFileName());
+ if (!Name.empty())
+ AllSymbols[*Plt].emplace_back(
+ PltEntry.second, Saver.save((Name + "@plt").str()), SymbolType);
}
}
}
@@ -1301,10 +949,6 @@ static void addPltEntries(const ObjectFile *Obj,
// returns the number of zero bytes that can be skipped when dumping the
// disassembly of the instructions in Buf.
static size_t countSkippableZeroBytes(ArrayRef<uint8_t> Buf) {
- // When -z or --disassemble-zeroes are given we always dissasemble them.
- if (DisassembleZeroes)
- return 0;
-
// Find the number of leading zeroes.
size_t N = 0;
while (N < Buf.size() && !Buf[N])
@@ -1320,108 +964,160 @@ static size_t countSkippableZeroBytes(ArrayRef<uint8_t> Buf) {
return N & ~0x3;
}
-static void disassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
- if (StartAddress > StopAddress)
- error("Start address should be less than stop address");
-
- const Target *TheTarget = getTarget(Obj);
-
- // Package up features to be passed to target/subtarget
- SubtargetFeatures Features = Obj->getFeatures();
- if (!MAttrs.empty())
- for (unsigned I = 0; I != MAttrs.size(); ++I)
- Features.AddFeature(MAttrs[I]);
-
- std::unique_ptr<const MCRegisterInfo> MRI(
- TheTarget->createMCRegInfo(TripleName));
- if (!MRI)
- report_error(Obj->getFileName(), "no register info for target " +
- TripleName);
-
- // Set up disassembler.
- std::unique_ptr<const MCAsmInfo> AsmInfo(
- TheTarget->createMCAsmInfo(*MRI, TripleName));
- if (!AsmInfo)
- report_error(Obj->getFileName(), "no assembly info for target " +
- TripleName);
- std::unique_ptr<const MCSubtargetInfo> STI(
- TheTarget->createMCSubtargetInfo(TripleName, MCPU, Features.getString()));
- if (!STI)
- report_error(Obj->getFileName(), "no subtarget info for target " +
- TripleName);
- std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo());
- if (!MII)
- report_error(Obj->getFileName(), "no instruction info for target " +
- TripleName);
- MCObjectFileInfo MOFI;
- MCContext Ctx(AsmInfo.get(), MRI.get(), &MOFI);
- // FIXME: for now initialize MCObjectFileInfo with default values
- MOFI.InitMCObjectFileInfo(Triple(TripleName), false, Ctx);
-
- std::unique_ptr<MCDisassembler> DisAsm(
- TheTarget->createMCDisassembler(*STI, Ctx));
- if (!DisAsm)
- report_error(Obj->getFileName(), "no disassembler for target " +
- TripleName);
+// Returns a map from sections to their relocations.
+static std::map<SectionRef, std::vector<RelocationRef>>
+getRelocsMap(object::ObjectFile const &Obj) {
+ std::map<SectionRef, std::vector<RelocationRef>> Ret;
+ for (SectionRef Sec : Obj.sections()) {
+ section_iterator Relocated = Sec.getRelocatedSection();
+ if (Relocated == Obj.section_end() || !shouldKeep(*Relocated))
+ continue;
+ std::vector<RelocationRef> &V = Ret[*Relocated];
+ for (const RelocationRef &R : Sec.relocations())
+ V.push_back(R);
+ // Sort relocations by address.
+ llvm::stable_sort(V, isRelocAddressLess);
+ }
+ return Ret;
+}
- std::unique_ptr<const MCInstrAnalysis> MIA(
- TheTarget->createMCInstrAnalysis(MII.get()));
+// Used for --adjust-vma to check if address should be adjusted by the
+// specified value for a given section.
+// For ELF we do not adjust non-allocatable sections like debug ones,
+// because they are not loadable.
+// TODO: implement for other file formats.
+static bool shouldAdjustVA(const SectionRef &Section) {
+ const ObjectFile *Obj = Section.getObject();
+ if (isa<object::ELFObjectFileBase>(Obj))
+ return ELFSectionRef(Section).getFlags() & ELF::SHF_ALLOC;
+ return false;
+}
- int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
- std::unique_ptr<MCInstPrinter> IP(TheTarget->createMCInstPrinter(
- Triple(TripleName), AsmPrinterVariant, *AsmInfo, *MII, *MRI));
- if (!IP)
- report_error(Obj->getFileName(), "no instruction printer for target " +
- TripleName);
- IP->setPrintImmHex(PrintImmHex);
- PrettyPrinter &PIP = selectPrettyPrinter(Triple(TripleName));
- StringRef Fmt = Obj->getBytesInAddress() > 4 ? "\t\t%016" PRIx64 ": " :
- "\t\t\t%08" PRIx64 ": ";
+typedef std::pair<uint64_t, char> MappingSymbolPair;
+static char getMappingSymbolKind(ArrayRef<MappingSymbolPair> MappingSymbols,
+ uint64_t Address) {
+ auto It =
+ partition_point(MappingSymbols, [Address](const MappingSymbolPair &Val) {
+ return Val.first <= Address;
+ });
+ // Return zero for any address before the first mapping symbol; this means
+ // we should use the default disassembly mode, depending on the target.
+ if (It == MappingSymbols.begin())
+ return '\x00';
+ return (It - 1)->second;
+}
- SourcePrinter SP(Obj, TheTarget->getName());
+static uint64_t
+dumpARMELFData(uint64_t SectionAddr, uint64_t Index, uint64_t End,
+ const ObjectFile *Obj, ArrayRef<uint8_t> Bytes,
+ ArrayRef<MappingSymbolPair> MappingSymbols) {
+ support::endianness Endian =
+ Obj->isLittleEndian() ? support::little : support::big;
+ while (Index < End) {
+ outs() << format("%8" PRIx64 ":", SectionAddr + Index);
+ outs() << "\t";
+ if (Index + 4 <= End) {
+ dumpBytes(Bytes.slice(Index, 4), outs());
+ outs() << "\t.word\t"
+ << format_hex(
+ support::endian::read32(Bytes.data() + Index, Endian), 10);
+ Index += 4;
+ } else if (Index + 2 <= End) {
+ dumpBytes(Bytes.slice(Index, 2), outs());
+ outs() << "\t\t.short\t"
+ << format_hex(
+ support::endian::read16(Bytes.data() + Index, Endian), 6);
+ Index += 2;
+ } else {
+ dumpBytes(Bytes.slice(Index, 1), outs());
+ outs() << "\t\t.byte\t" << format_hex(Bytes[0], 4);
+ ++Index;
+ }
+ outs() << "\n";
+ if (getMappingSymbolKind(MappingSymbols, Index) != 'd')
+ break;
+ }
+ return Index;
+}
- // Create a mapping, RelocSecs = SectionRelocMap[S], where sections
- // in RelocSecs contain the relocations for section S.
- std::error_code EC;
- std::map<SectionRef, SmallVector<SectionRef, 1>> SectionRelocMap;
- for (const SectionRef &Section : ToolSectionFilter(*Obj)) {
- section_iterator Sec2 = Section.getRelocatedSection();
- if (Sec2 != Obj->section_end())
- SectionRelocMap[*Sec2].push_back(Section);
+static void dumpELFData(uint64_t SectionAddr, uint64_t Index, uint64_t End,
+ ArrayRef<uint8_t> Bytes) {
+ // print out data up to 8 bytes at a time in hex and ascii
+ uint8_t AsciiData[9] = {'\0'};
+ uint8_t Byte;
+ int NumBytes = 0;
+
+ for (; Index < End; ++Index) {
+ if (NumBytes == 0)
+ outs() << format("%8" PRIx64 ":", SectionAddr + Index);
+ Byte = Bytes.slice(Index)[0];
+ outs() << format(" %02x", Byte);
+ AsciiData[NumBytes] = isPrint(Byte) ? Byte : '.';
+
+ uint8_t IndentOffset = 0;
+ NumBytes++;
+ if (Index == End - 1 || NumBytes > 8) {
+ // Indent the space for less than 8 bytes data.
+ // 2 spaces for byte and one for space between bytes
+ IndentOffset = 3 * (8 - NumBytes);
+ for (int Excess = NumBytes; Excess < 8; Excess++)
+ AsciiData[Excess] = '\0';
+ NumBytes = 8;
+ }
+ if (NumBytes == 8) {
+ AsciiData[8] = '\0';
+ outs() << std::string(IndentOffset, ' ') << " ";
+ outs() << reinterpret_cast<char *>(AsciiData);
+ outs() << '\n';
+ NumBytes = 0;
+ }
}
+}
+
+static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
+ MCContext &Ctx, MCDisassembler *PrimaryDisAsm,
+ MCDisassembler *SecondaryDisAsm,
+ const MCInstrAnalysis *MIA, MCInstPrinter *IP,
+ const MCSubtargetInfo *PrimarySTI,
+ const MCSubtargetInfo *SecondarySTI,
+ PrettyPrinter &PIP,
+ SourcePrinter &SP, bool InlineRelocs) {
+ const MCSubtargetInfo *STI = PrimarySTI;
+ MCDisassembler *DisAsm = PrimaryDisAsm;
+ bool PrimaryIsThumb = false;
+ if (isArmElf(Obj))
+ PrimaryIsThumb = STI->checkFeatures("+thumb-mode");
+
+ std::map<SectionRef, std::vector<RelocationRef>> RelocMap;
+ if (InlineRelocs)
+ RelocMap = getRelocsMap(*Obj);
+ bool Is64Bits = Obj->getBytesInAddress() > 4;
// Create a mapping from virtual address to symbol name. This is used to
// pretty print the symbols while disassembling.
std::map<SectionRef, SectionSymbolsTy> AllSymbols;
SectionSymbolsTy AbsoluteSymbols;
+ const StringRef FileName = Obj->getFileName();
for (const SymbolRef &Symbol : Obj->symbols()) {
- Expected<uint64_t> AddressOrErr = Symbol.getAddress();
- if (!AddressOrErr)
- report_error(Obj->getFileName(), AddressOrErr.takeError());
- uint64_t Address = *AddressOrErr;
-
- Expected<StringRef> Name = Symbol.getName();
- if (!Name)
- report_error(Obj->getFileName(), Name.takeError());
- if (Name->empty())
- continue;
+ uint64_t Address = unwrapOrError(Symbol.getAddress(), FileName);
- Expected<section_iterator> SectionOrErr = Symbol.getSection();
- if (!SectionOrErr)
- report_error(Obj->getFileName(), SectionOrErr.takeError());
+ StringRef Name = unwrapOrError(Symbol.getName(), FileName);
+ if (Name.empty())
+ continue;
uint8_t SymbolType = ELF::STT_NOTYPE;
- if (Obj->isELF())
+ if (Obj->isELF()) {
SymbolType = getElfSymbolType(Obj, Symbol);
+ if (SymbolType == ELF::STT_SECTION)
+ continue;
+ }
- section_iterator SecI = *SectionOrErr;
+ section_iterator SecI = unwrapOrError(Symbol.getSection(), FileName);
if (SecI != Obj->section_end())
- AllSymbols[*SecI].emplace_back(Address, *Name, SymbolType);
+ AllSymbols[*SecI].emplace_back(Address, Name, SymbolType);
else
- AbsoluteSymbols.emplace_back(Address, *Name, SymbolType);
-
-
+ AbsoluteSymbols.emplace_back(Address, Name, SymbolType);
}
if (AllSymbols.empty() && Obj->isELF())
addDynamicElfSymbols(Obj, AllSymbols);
@@ -1448,31 +1144,28 @@ static void disassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
error(ExportEntry.getExportRVA(RVA));
uint64_t VA = COFFObj->getImageBase() + RVA;
- auto Sec = std::upper_bound(
- SectionAddresses.begin(), SectionAddresses.end(), VA,
- [](uint64_t LHS, const std::pair<uint64_t, SectionRef> &RHS) {
- return LHS < RHS.first;
+ auto Sec = partition_point(
+ SectionAddresses, [VA](const std::pair<uint64_t, SectionRef> &O) {
+ return O.first <= VA;
});
- if (Sec != SectionAddresses.begin())
+ if (Sec != SectionAddresses.begin()) {
--Sec;
- else
- Sec = SectionAddresses.end();
-
- if (Sec != SectionAddresses.end())
AllSymbols[Sec->second].emplace_back(VA, Name, ELF::STT_NOTYPE);
- else
+ } else
AbsoluteSymbols.emplace_back(VA, Name, ELF::STT_NOTYPE);
}
}
// Sort all the symbols, this allows us to use a simple binary search to find
// a symbol near an address.
+ StringSet<> FoundDisasmFuncsSet;
for (std::pair<const SectionRef, SectionSymbolsTy> &SecSyms : AllSymbols)
array_pod_sort(SecSyms.second.begin(), SecSyms.second.end());
array_pod_sort(AbsoluteSymbols.begin(), AbsoluteSymbols.end());
for (const SectionRef &Section : ToolSectionFilter(*Obj)) {
- if (!DisassembleAll && (!Section.isText() || Section.isVirtual()))
+ if (FilterSections.empty() && !DisassembleAll &&
+ (!Section.isText() || Section.isVirtual()))
continue;
uint64_t SectionAddr = Section.getAddress();
@@ -1482,25 +1175,23 @@ static void disassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
// Get the list of all the symbols in this section.
SectionSymbolsTy &Symbols = AllSymbols[Section];
- std::vector<uint64_t> DataMappingSymsAddr;
- std::vector<uint64_t> TextMappingSymsAddr;
- if (isArmElf(Obj)) {
+ std::vector<MappingSymbolPair> MappingSymbols;
+ if (hasMappingSymbols(Obj)) {
for (const auto &Symb : Symbols) {
uint64_t Address = std::get<0>(Symb);
StringRef Name = std::get<1>(Symb);
if (Name.startswith("$d"))
- DataMappingSymsAddr.push_back(Address - SectionAddr);
+ MappingSymbols.emplace_back(Address - SectionAddr, 'd');
if (Name.startswith("$x"))
- TextMappingSymsAddr.push_back(Address - SectionAddr);
+ MappingSymbols.emplace_back(Address - SectionAddr, 'x');
if (Name.startswith("$a"))
- TextMappingSymsAddr.push_back(Address - SectionAddr);
+ MappingSymbols.emplace_back(Address - SectionAddr, 'a');
if (Name.startswith("$t"))
- TextMappingSymsAddr.push_back(Address - SectionAddr);
+ MappingSymbols.emplace_back(Address - SectionAddr, 't');
}
}
- llvm::sort(DataMappingSymsAddr);
- llvm::sort(TextMappingSymsAddr);
+ llvm::sort(MappingSymbols);
if (Obj->isELF() && Obj->getArch() == Triple::amdgcn) {
// AMDGPU disassembler uses symbolizer for printing labels
@@ -1514,19 +1205,6 @@ static void disassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
}
}
- // Make a list of all the relocations for this section.
- std::vector<RelocationRef> Rels;
- if (InlineRelocs) {
- for (const SectionRef &RelocSec : SectionRelocMap[Section]) {
- for (const RelocationRef &Reloc : RelocSec.relocations()) {
- Rels.push_back(Reloc);
- }
- }
- }
-
- // Sort relocations by address.
- llvm::sort(Rels, isRelocAddressLess);
-
StringRef SegmentName = "";
if (const MachOObjectFile *MachO = dyn_cast<const MachOObjectFile>(Obj)) {
DataRefImpl DR = Section.getRawDataRefImpl();
@@ -1546,56 +1224,54 @@ static void disassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
SmallString<40> Comments;
raw_svector_ostream CommentStream(Comments);
- StringRef BytesStr;
- error(Section.getContents(BytesStr));
- ArrayRef<uint8_t> Bytes(reinterpret_cast<const uint8_t *>(BytesStr.data()),
- BytesStr.size());
+ ArrayRef<uint8_t> Bytes = arrayRefFromStringRef(
+ unwrapOrError(Section.getContents(), Obj->getFileName()));
+
+ uint64_t VMAAdjustment = 0;
+ if (shouldAdjustVA(Section))
+ VMAAdjustment = AdjustVMA;
uint64_t Size;
uint64_t Index;
bool PrintedSection = false;
-
+ std::vector<RelocationRef> Rels = RelocMap[Section];
std::vector<RelocationRef>::const_iterator RelCur = Rels.begin();
std::vector<RelocationRef>::const_iterator RelEnd = Rels.end();
// Disassemble symbol by symbol.
for (unsigned SI = 0, SE = Symbols.size(); SI != SE; ++SI) {
- uint64_t Start = std::get<0>(Symbols[SI]) - SectionAddr;
- // The end is either the section end or the beginning of the next
- // symbol.
- uint64_t End = (SI == SE - 1)
- ? SectSize
- : std::get<0>(Symbols[SI + 1]) - SectionAddr;
- // Don't try to disassemble beyond the end of section contents.
- if (End > SectSize)
- End = SectSize;
- // If this symbol has the same address as the next symbol, then skip it.
- if (Start >= End)
- continue;
+ std::string SymbolName = std::get<1>(Symbols[SI]).str();
+ if (Demangle)
+ SymbolName = demangle(SymbolName);
- // Check if we need to skip symbol
- // Skip if the symbol's data is not between StartAddress and StopAddress
- if (End + SectionAddr < StartAddress ||
- Start + SectionAddr > StopAddress) {
+ // Skip if --disassemble-functions is not empty and the symbol is not in
+ // the list.
+ if (!DisasmFuncsSet.empty() && !DisasmFuncsSet.count(SymbolName))
continue;
- }
- /// Skip if user requested specific symbols and this is not in the list
- if (!DisasmFuncsSet.empty() &&
- !DisasmFuncsSet.count(std::get<1>(Symbols[SI])))
+ uint64_t Start = std::get<0>(Symbols[SI]);
+ if (Start < SectionAddr || StopAddress <= Start)
continue;
+ else
+ FoundDisasmFuncsSet.insert(SymbolName);
+
+ // The end is the section end, the beginning of the next symbol, or
+ // --stop-address.
+ uint64_t End = std::min<uint64_t>(SectionAddr + SectSize, StopAddress);
+ if (SI + 1 < SE)
+ End = std::min(End, std::get<0>(Symbols[SI + 1]));
+ if (Start >= End || End <= StartAddress)
+ continue;
+ Start -= SectionAddr;
+ End -= SectionAddr;
if (!PrintedSection) {
PrintedSection = true;
- outs() << "Disassembly of section ";
+ outs() << "\nDisassembly of section ";
if (!SegmentName.empty())
outs() << SegmentName << ",";
- outs() << SectionName << ':';
+ outs() << SectionName << ":\n";
}
- // Stop disassembly at the stop address specified
- if (End + SectionAddr > StopAddress)
- End = StopAddress - SectionAddr;
-
if (Obj->isELF() && Obj->getArch() == Triple::amdgcn) {
if (std::get<2>(Symbols[SI]) == ELF::STT_AMDGPU_HSA_KERNEL) {
// skip amd_kernel_code_t at the begining of kernel symbol (256 bytes)
@@ -1615,13 +1291,10 @@ static void disassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
outs() << '\n';
if (!NoLeadingAddr)
- outs() << format("%016" PRIx64 " ", SectionAddr + Start);
+ outs() << format(Is64Bits ? "%016" PRIx64 " " : "%08" PRIx64 " ",
+ SectionAddr + Start + VMAAdjustment);
- StringRef SymbolName = std::get<1>(Symbols[SI]);
- if (Demangle)
- outs() << demangle(SymbolName) << ":\n";
- else
- outs() << SymbolName << ":\n";
+ outs() << SymbolName << ":\n";
// Don't print raw contents of a virtual section. A virtual section
// doesn't have any contents in the file.
@@ -1636,143 +1309,82 @@ static void disassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
raw_ostream &DebugOut = nulls();
#endif
- for (Index = Start; Index < End; Index += Size) {
- MCInst Inst;
+ // Some targets (like WebAssembly) have a special prelude at the start
+ // of each symbol.
+ DisAsm->onSymbolStart(SymbolName, Size, Bytes.slice(Start, End - Start),
+ SectionAddr + Start, DebugOut, CommentStream);
+ Start += Size;
+
+ Index = Start;
+ if (SectionAddr < StartAddress)
+ Index = std::max<uint64_t>(Index, StartAddress - SectionAddr);
+
+ // If there is a data/common symbol inside an ELF text section and we are
+ // only disassembling text (applicable all architectures), we are in a
+ // situation where we must print the data and not disassemble it.
+ if (Obj->isELF() && !DisassembleAll && Section.isText()) {
+ uint8_t SymTy = std::get<2>(Symbols[SI]);
+ if (SymTy == ELF::STT_OBJECT || SymTy == ELF::STT_COMMON) {
+ dumpELFData(SectionAddr, Index, End, Bytes);
+ Index = End;
+ }
+ }
- if (Index + SectionAddr < StartAddress ||
- Index + SectionAddr > StopAddress) {
- // skip byte by byte till StartAddress is reached
- Size = 1;
+ bool CheckARMELFData = hasMappingSymbols(Obj) &&
+ std::get<2>(Symbols[SI]) != ELF::STT_OBJECT &&
+ !DisassembleAll;
+ while (Index < End) {
+ // ARM and AArch64 ELF binaries can interleave data and text in the
+ // same section. We rely on the markers introduced to understand what
+ // we need to dump. If the data marker is within a function, it is
+ // denoted as a word/short etc.
+ if (CheckARMELFData &&
+ getMappingSymbolKind(MappingSymbols, Index) == 'd') {
+ Index = dumpARMELFData(SectionAddr, Index, End, Obj, Bytes,
+ MappingSymbols);
continue;
}
- // AArch64 ELF binaries can interleave data and text in the
- // same section. We rely on the markers introduced to
- // understand what we need to dump. If the data marker is within a
- // function, it is denoted as a word/short etc
- if (isArmElf(Obj) && std::get<2>(Symbols[SI]) != ELF::STT_OBJECT &&
- !DisassembleAll) {
- uint64_t Stride = 0;
-
- auto DAI = std::lower_bound(DataMappingSymsAddr.begin(),
- DataMappingSymsAddr.end(), Index);
- if (DAI != DataMappingSymsAddr.end() && *DAI == Index) {
- // Switch to data.
- while (Index < End) {
- outs() << format("%8" PRIx64 ":", SectionAddr + Index);
- outs() << "\t";
- if (Index + 4 <= End) {
- Stride = 4;
- dumpBytes(Bytes.slice(Index, 4), outs());
- outs() << "\t.word\t";
- uint32_t Data = 0;
- if (Obj->isLittleEndian()) {
- const auto Word =
- reinterpret_cast<const support::ulittle32_t *>(
- Bytes.data() + Index);
- Data = *Word;
- } else {
- const auto Word = reinterpret_cast<const support::ubig32_t *>(
- Bytes.data() + Index);
- Data = *Word;
- }
- outs() << "0x" << format("%08" PRIx32, Data);
- } else if (Index + 2 <= End) {
- Stride = 2;
- dumpBytes(Bytes.slice(Index, 2), outs());
- outs() << "\t\t.short\t";
- uint16_t Data = 0;
- if (Obj->isLittleEndian()) {
- const auto Short =
- reinterpret_cast<const support::ulittle16_t *>(
- Bytes.data() + Index);
- Data = *Short;
- } else {
- const auto Short =
- reinterpret_cast<const support::ubig16_t *>(Bytes.data() +
- Index);
- Data = *Short;
- }
- outs() << "0x" << format("%04" PRIx16, Data);
- } else {
- Stride = 1;
- dumpBytes(Bytes.slice(Index, 1), outs());
- outs() << "\t\t.byte\t";
- outs() << "0x" << format("%02" PRIx8, Bytes.slice(Index, 1)[0]);
- }
- Index += Stride;
- outs() << "\n";
- auto TAI = std::lower_bound(TextMappingSymsAddr.begin(),
- TextMappingSymsAddr.end(), Index);
- if (TAI != TextMappingSymsAddr.end() && *TAI == Index)
- break;
- }
+
+ // When -z or --disassemble-zeroes are given we always dissasemble
+ // them. Otherwise we might want to skip zero bytes we see.
+ if (!DisassembleZeroes) {
+ uint64_t MaxOffset = End - Index;
+ // For -reloc: print zero blocks patched by relocations, so that
+ // relocations can be shown in the dump.
+ if (RelCur != RelEnd)
+ MaxOffset = RelCur->getOffset() - Index;
+
+ if (size_t N =
+ countSkippableZeroBytes(Bytes.slice(Index, MaxOffset))) {
+ outs() << "\t\t..." << '\n';
+ Index += N;
+ continue;
}
}
- // If there is a data symbol inside an ELF text section and we are only
- // disassembling text (applicable all architectures),
- // we are in a situation where we must print the data and not
- // disassemble it.
- if (Obj->isELF() && std::get<2>(Symbols[SI]) == ELF::STT_OBJECT &&
- !DisassembleAll && Section.isText()) {
- // print out data up to 8 bytes at a time in hex and ascii
- uint8_t AsciiData[9] = {'\0'};
- uint8_t Byte;
- int NumBytes = 0;
-
- for (Index = Start; Index < End; Index += 1) {
- if (((SectionAddr + Index) < StartAddress) ||
- ((SectionAddr + Index) > StopAddress))
- continue;
- if (NumBytes == 0) {
- outs() << format("%8" PRIx64 ":", SectionAddr + Index);
- outs() << "\t";
- }
- Byte = Bytes.slice(Index)[0];
- outs() << format(" %02x", Byte);
- AsciiData[NumBytes] = isPrint(Byte) ? Byte : '.';
-
- uint8_t IndentOffset = 0;
- NumBytes++;
- if (Index == End - 1 || NumBytes > 8) {
- // Indent the space for less than 8 bytes data.
- // 2 spaces for byte and one for space between bytes
- IndentOffset = 3 * (8 - NumBytes);
- for (int Excess = 8 - NumBytes; Excess < 8; Excess++)
- AsciiData[Excess] = '\0';
- NumBytes = 8;
- }
- if (NumBytes == 8) {
- AsciiData[8] = '\0';
- outs() << std::string(IndentOffset, ' ') << " ";
- outs() << reinterpret_cast<char *>(AsciiData);
- outs() << '\n';
- NumBytes = 0;
- }
+ if (SecondarySTI) {
+ if (getMappingSymbolKind(MappingSymbols, Index) == 'a') {
+ STI = PrimaryIsThumb ? SecondarySTI : PrimarySTI;
+ DisAsm = PrimaryIsThumb ? SecondaryDisAsm : PrimaryDisAsm;
+ } else if (getMappingSymbolKind(MappingSymbols, Index) == 't') {
+ STI = PrimaryIsThumb ? PrimarySTI : SecondarySTI;
+ DisAsm = PrimaryIsThumb ? PrimaryDisAsm : SecondaryDisAsm;
}
}
- if (Index >= End)
- break;
-
- if (size_t N =
- countSkippableZeroBytes(Bytes.slice(Index, End - Index))) {
- outs() << "\t\t..." << '\n';
- Index += N;
- if (Index >= End)
- break;
- }
// Disassemble a real instruction or a data when disassemble all is
// provided
- bool Disassembled = DisAsm->getInstruction(Inst, Size, Bytes.slice(Index),
- SectionAddr + Index, DebugOut,
- CommentStream);
+ MCInst Inst;
+ bool Disassembled = DisAsm->getInstruction(
+ Inst, Size, Bytes.slice(Index), SectionAddr + Index, DebugOut,
+ CommentStream);
if (Size == 0)
Size = 1;
- PIP.printInst(*IP, Disassembled ? &Inst : nullptr,
- Bytes.slice(Index, Size), SectionAddr + Index, outs(), "",
- *STI, &SP, &Rels);
+ PIP.printInst(
+ *IP, Disassembled ? &Inst : nullptr, Bytes.slice(Index, Size),
+ {SectionAddr + Index + VMAAdjustment, Section.getIndex()}, outs(),
+ "", *STI, &SP, &Rels);
outs() << CommentStream.str();
Comments.clear();
@@ -1791,37 +1403,34 @@ static void disassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
// N.B. We don't walk the relocations in the relocatable case yet.
auto *TargetSectionSymbols = &Symbols;
if (!Obj->isRelocatableObject()) {
- auto SectionAddress = std::upper_bound(
- SectionAddresses.begin(), SectionAddresses.end(), Target,
- [](uint64_t LHS,
- const std::pair<uint64_t, SectionRef> &RHS) {
- return LHS < RHS.first;
+ auto It = partition_point(
+ SectionAddresses,
+ [=](const std::pair<uint64_t, SectionRef> &O) {
+ return O.first <= Target;
});
- if (SectionAddress != SectionAddresses.begin()) {
- --SectionAddress;
- TargetSectionSymbols = &AllSymbols[SectionAddress->second];
+ if (It != SectionAddresses.begin()) {
+ --It;
+ TargetSectionSymbols = &AllSymbols[It->second];
} else {
TargetSectionSymbols = &AbsoluteSymbols;
}
}
- // Find the first symbol in the section whose offset is less than
+ // Find the last symbol in the section whose offset is less than
// or equal to the target. If there isn't a section that contains
// the target, find the nearest preceding absolute symbol.
- auto TargetSym = std::upper_bound(
- TargetSectionSymbols->begin(), TargetSectionSymbols->end(),
- Target, [](uint64_t LHS,
- const std::tuple<uint64_t, StringRef, uint8_t> &RHS) {
- return LHS < std::get<0>(RHS);
+ auto TargetSym = partition_point(
+ *TargetSectionSymbols,
+ [=](const std::tuple<uint64_t, StringRef, uint8_t> &O) {
+ return std::get<0>(O) <= Target;
});
if (TargetSym == TargetSectionSymbols->begin()) {
TargetSectionSymbols = &AbsoluteSymbols;
- TargetSym = std::upper_bound(
- AbsoluteSymbols.begin(), AbsoluteSymbols.end(),
- Target, [](uint64_t LHS,
- const std::tuple<uint64_t, StringRef, uint8_t> &RHS) {
- return LHS < std::get<0>(RHS);
- });
+ TargetSym = partition_point(
+ AbsoluteSymbols,
+ [=](const std::tuple<uint64_t, StringRef, uint8_t> &O) {
+ return std::get<0>(O) <= Target;
+ });
}
if (TargetSym != TargetSectionSymbols->begin()) {
--TargetSym;
@@ -1838,34 +1447,125 @@ static void disassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
outs() << "\n";
// Hexagon does this in pretty printer
- if (Obj->getArch() != Triple::hexagon)
+ if (Obj->getArch() != Triple::hexagon) {
// Print relocation for instruction.
while (RelCur != RelEnd) {
- uint64_t Addr = RelCur->getOffset();
- SmallString<16> Name;
- SmallString<32> Val;
-
+ uint64_t Offset = RelCur->getOffset();
// If this relocation is hidden, skip it.
- if (getHidden(*RelCur) || ((SectionAddr + Addr) < StartAddress)) {
+ if (getHidden(*RelCur) || SectionAddr + Offset < StartAddress) {
++RelCur;
continue;
}
- // Stop when rel_cur's address is past the current instruction.
- if (Addr >= Index + Size)
+ // Stop when RelCur's offset is past the current instruction.
+ if (Offset >= Index + Size)
break;
- RelCur->getTypeName(Name);
- error(getRelocationValueString(*RelCur, Val));
- outs() << format(Fmt.data(), SectionAddr + Addr) << Name << "\t"
- << Val << "\n";
+
+ // When --adjust-vma is used, update the address printed.
+ if (RelCur->getSymbol() != Obj->symbol_end()) {
+ Expected<section_iterator> SymSI =
+ RelCur->getSymbol()->getSection();
+ if (SymSI && *SymSI != Obj->section_end() &&
+ shouldAdjustVA(**SymSI))
+ Offset += AdjustVMA;
+ }
+
+ printRelocation(*RelCur, SectionAddr + Offset, Is64Bits);
++RelCur;
}
+ }
+
+ Index += Size;
}
}
}
+ StringSet<> MissingDisasmFuncsSet =
+ set_difference(DisasmFuncsSet, FoundDisasmFuncsSet);
+ for (StringRef MissingDisasmFunc : MissingDisasmFuncsSet.keys())
+ warn("failed to disassemble missing function " + MissingDisasmFunc);
+}
+
+static void disassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
+ const Target *TheTarget = getTarget(Obj);
+
+ // Package up features to be passed to target/subtarget
+ SubtargetFeatures Features = Obj->getFeatures();
+ if (!MAttrs.empty())
+ for (unsigned I = 0; I != MAttrs.size(); ++I)
+ Features.AddFeature(MAttrs[I]);
+
+ std::unique_ptr<const MCRegisterInfo> MRI(
+ TheTarget->createMCRegInfo(TripleName));
+ if (!MRI)
+ report_error(Obj->getFileName(),
+ "no register info for target " + TripleName);
+
+ // Set up disassembler.
+ std::unique_ptr<const MCAsmInfo> AsmInfo(
+ TheTarget->createMCAsmInfo(*MRI, TripleName));
+ if (!AsmInfo)
+ report_error(Obj->getFileName(),
+ "no assembly info for target " + TripleName);
+ std::unique_ptr<const MCSubtargetInfo> STI(
+ TheTarget->createMCSubtargetInfo(TripleName, MCPU, Features.getString()));
+ if (!STI)
+ report_error(Obj->getFileName(),
+ "no subtarget info for target " + TripleName);
+ std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo());
+ if (!MII)
+ report_error(Obj->getFileName(),
+ "no instruction info for target " + TripleName);
+ MCObjectFileInfo MOFI;
+ MCContext Ctx(AsmInfo.get(), MRI.get(), &MOFI);
+ // FIXME: for now initialize MCObjectFileInfo with default values
+ MOFI.InitMCObjectFileInfo(Triple(TripleName), false, Ctx);
+
+ std::unique_ptr<MCDisassembler> DisAsm(
+ TheTarget->createMCDisassembler(*STI, Ctx));
+ if (!DisAsm)
+ report_error(Obj->getFileName(),
+ "no disassembler for target " + TripleName);
+
+ // If we have an ARM object file, we need a second disassembler, because
+ // ARM CPUs have two different instruction sets: ARM mode, and Thumb mode.
+ // We use mapping symbols to switch between the two assemblers, where
+ // appropriate.
+ std::unique_ptr<MCDisassembler> SecondaryDisAsm;
+ std::unique_ptr<const MCSubtargetInfo> SecondarySTI;
+ if (isArmElf(Obj) && !STI->checkFeatures("+mclass")) {
+ if (STI->checkFeatures("+thumb-mode"))
+ Features.AddFeature("-thumb-mode");
+ else
+ Features.AddFeature("+thumb-mode");
+ SecondarySTI.reset(TheTarget->createMCSubtargetInfo(TripleName, MCPU,
+ Features.getString()));
+ SecondaryDisAsm.reset(TheTarget->createMCDisassembler(*SecondarySTI, Ctx));
+ }
+
+ std::unique_ptr<const MCInstrAnalysis> MIA(
+ TheTarget->createMCInstrAnalysis(MII.get()));
+
+ int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
+ std::unique_ptr<MCInstPrinter> IP(TheTarget->createMCInstPrinter(
+ Triple(TripleName), AsmPrinterVariant, *AsmInfo, *MII, *MRI));
+ if (!IP)
+ report_error(Obj->getFileName(),
+ "no instruction printer for target " + TripleName);
+ IP->setPrintImmHex(PrintImmHex);
+
+ PrettyPrinter &PIP = selectPrettyPrinter(Triple(TripleName));
+ SourcePrinter SP(Obj, TheTarget->getName());
+
+ for (StringRef Opt : DisassemblerOptions)
+ if (!IP->applyTargetSpecificCLOption(Opt))
+ error("Unrecognized disassembler option: " + Opt);
+
+ disassembleObject(TheTarget, Obj, Ctx, DisAsm.get(), SecondaryDisAsm.get(),
+ MIA.get(), IP.get(), STI.get(), SecondarySTI.get(), PIP,
+ SP, InlineRelocs);
}
-void llvm::printRelocations(const ObjectFile *Obj) {
+void printRelocations(const ObjectFile *Obj) {
StringRef Fmt = Obj->getBytesInAddress() > 4 ? "%016" PRIx64 :
"%08" PRIx64;
// Regular objdump doesn't print relocations in non-relocatable object
@@ -1873,28 +1573,40 @@ void llvm::printRelocations(const ObjectFile *Obj) {
if (!Obj->isRelocatableObject())
return;
+ // Build a mapping from relocation target to a vector of relocation
+ // sections. Usually, there is an only one relocation section for
+ // each relocated section.
+ MapVector<SectionRef, std::vector<SectionRef>> SecToRelSec;
for (const SectionRef &Section : ToolSectionFilter(*Obj)) {
if (Section.relocation_begin() == Section.relocation_end())
continue;
+ const SectionRef TargetSec = *Section.getRelocatedSection();
+ SecToRelSec[TargetSec].push_back(Section);
+ }
+
+ for (std::pair<SectionRef, std::vector<SectionRef>> &P : SecToRelSec) {
StringRef SecName;
- error(Section.getName(SecName));
+ error(P.first.getName(SecName));
outs() << "RELOCATION RECORDS FOR [" << SecName << "]:\n";
- for (const RelocationRef &Reloc : Section.relocations()) {
- uint64_t Address = Reloc.getOffset();
- SmallString<32> RelocName;
- SmallString<32> ValueStr;
- if (Address < StartAddress || Address > StopAddress || getHidden(Reloc))
- continue;
- Reloc.getTypeName(RelocName);
- error(getRelocationValueString(Reloc, ValueStr));
- outs() << format(Fmt.data(), Address) << " " << RelocName << " "
- << ValueStr << "\n";
+
+ for (SectionRef Section : P.second) {
+ for (const RelocationRef &Reloc : Section.relocations()) {
+ uint64_t Address = Reloc.getOffset();
+ SmallString<32> RelocName;
+ SmallString<32> ValueStr;
+ if (Address < StartAddress || Address > StopAddress || getHidden(Reloc))
+ continue;
+ Reloc.getTypeName(RelocName);
+ error(getRelocationValueString(Reloc, ValueStr));
+ outs() << format(Fmt.data(), Address) << " " << RelocName << " "
+ << ValueStr << "\n";
+ }
}
outs() << "\n";
}
}
-void llvm::printDynamicRelocations(const ObjectFile *Obj) {
+void printDynamicRelocations(const ObjectFile *Obj) {
// For the moment, this option is for ELF only
if (!Obj->isELF())
return;
@@ -1911,9 +1623,7 @@ void llvm::printDynamicRelocations(const ObjectFile *Obj) {
outs() << "DYNAMIC RELOCATION RECORDS\n";
StringRef Fmt = Obj->getBytesInAddress() > 4 ? "%016" PRIx64 : "%08" PRIx64;
- for (const SectionRef &Section : DynRelSec) {
- if (Section.relocation_begin() == Section.relocation_end())
- continue;
+ for (const SectionRef &Section : DynRelSec)
for (const RelocationRef &Reloc : Section.relocations()) {
uint64_t Address = Reloc.getOffset();
SmallString<32> RelocName;
@@ -1923,34 +1633,60 @@ void llvm::printDynamicRelocations(const ObjectFile *Obj) {
outs() << format(Fmt.data(), Address) << " " << RelocName << " "
<< ValueStr << "\n";
}
- }
}
-void llvm::printSectionHeaders(const ObjectFile *Obj) {
- outs() << "Sections:\n"
- "Idx Name Size Address Type\n";
+// Returns true if we need to show LMA column when dumping section headers. We
+// show it only when the platform is ELF and either we have at least one section
+// whose VMA and LMA are different and/or when --show-lma flag is used.
+static bool shouldDisplayLMA(const ObjectFile *Obj) {
+ if (!Obj->isELF())
+ return false;
+ for (const SectionRef &S : ToolSectionFilter(*Obj))
+ if (S.getAddress() != getELFSectionLMA(S))
+ return true;
+ return ShowLMA;
+}
+
+void printSectionHeaders(const ObjectFile *Obj) {
+ bool HasLMAColumn = shouldDisplayLMA(Obj);
+ if (HasLMAColumn)
+ outs() << "Sections:\n"
+ "Idx Name Size VMA LMA "
+ "Type\n";
+ else
+ outs() << "Sections:\n"
+ "Idx Name Size VMA Type\n";
+
for (const SectionRef &Section : ToolSectionFilter(*Obj)) {
StringRef Name;
error(Section.getName(Name));
- uint64_t Address = Section.getAddress();
+ uint64_t VMA = Section.getAddress();
+ if (shouldAdjustVA(Section))
+ VMA += AdjustVMA;
+
uint64_t Size = Section.getSize();
bool Text = Section.isText();
bool Data = Section.isData();
bool BSS = Section.isBSS();
std::string Type = (std::string(Text ? "TEXT " : "") +
(Data ? "DATA " : "") + (BSS ? "BSS" : ""));
- outs() << format("%3d %-13s %08" PRIx64 " %016" PRIx64 " %s\n",
- (unsigned)Section.getIndex(), Name.str().c_str(), Size,
- Address, Type.c_str());
+
+ if (HasLMAColumn)
+ outs() << format("%3d %-13s %08" PRIx64 " %016" PRIx64 " %016" PRIx64
+ " %s\n",
+ (unsigned)Section.getIndex(), Name.str().c_str(), Size,
+ VMA, getELFSectionLMA(Section), Type.c_str());
+ else
+ outs() << format("%3d %-13s %08" PRIx64 " %016" PRIx64 " %s\n",
+ (unsigned)Section.getIndex(), Name.str().c_str(), Size,
+ VMA, Type.c_str());
}
outs() << "\n";
}
-void llvm::printSectionContents(const ObjectFile *Obj) {
- std::error_code EC;
+void printSectionContents(const ObjectFile *Obj) {
for (const SectionRef &Section : ToolSectionFilter(*Obj)) {
StringRef Name;
- StringRef Contents;
error(Section.getName(Name));
uint64_t BaseAddr = Section.getAddress();
uint64_t Size = Section.getSize();
@@ -1965,7 +1701,7 @@ void llvm::printSectionContents(const ObjectFile *Obj) {
continue;
}
- error(Section.getContents(Contents));
+ StringRef Contents = unwrapOrError(Section.getContents(), Obj->getFileName());
// Dump out the content as hex and printable ascii characters.
for (std::size_t Addr = 0, End = Contents.size(); Addr < End; Addr += 16) {
@@ -1993,8 +1729,8 @@ void llvm::printSectionContents(const ObjectFile *Obj) {
}
}
-void llvm::printSymbolTable(const ObjectFile *O, StringRef ArchiveName,
- StringRef ArchitectureName) {
+void printSymbolTable(const ObjectFile *O, StringRef ArchiveName,
+ StringRef ArchitectureName) {
outs() << "SYMBOL TABLE:\n";
if (const COFFObjectFile *Coff = dyn_cast<const COFFObjectFile>(O)) {
@@ -2002,41 +1738,24 @@ void llvm::printSymbolTable(const ObjectFile *O, StringRef ArchiveName,
return;
}
+ const StringRef FileName = O->getFileName();
for (auto I = O->symbol_begin(), E = O->symbol_end(); I != E; ++I) {
- // Skip printing the special zero symbol when dumping an ELF file.
- // This makes the output consistent with the GNU objdump.
- if (I == O->symbol_begin() && isa<ELFObjectFileBase>(O))
- continue;
-
const SymbolRef &Symbol = *I;
- Expected<uint64_t> AddressOrError = Symbol.getAddress();
- if (!AddressOrError)
- report_error(ArchiveName, O->getFileName(), AddressOrError.takeError(),
- ArchitectureName);
- uint64_t Address = *AddressOrError;
+ uint64_t Address = unwrapOrError(Symbol.getAddress(), ArchiveName, FileName,
+ ArchitectureName);
if ((Address < StartAddress) || (Address > StopAddress))
continue;
- Expected<SymbolRef::Type> TypeOrError = Symbol.getType();
- if (!TypeOrError)
- report_error(ArchiveName, O->getFileName(), TypeOrError.takeError(),
- ArchitectureName);
- SymbolRef::Type Type = *TypeOrError;
+ SymbolRef::Type Type = unwrapOrError(Symbol.getType(), ArchiveName,
+ FileName, ArchitectureName);
uint32_t Flags = Symbol.getFlags();
- Expected<section_iterator> SectionOrErr = Symbol.getSection();
- if (!SectionOrErr)
- report_error(ArchiveName, O->getFileName(), SectionOrErr.takeError(),
- ArchitectureName);
- section_iterator Section = *SectionOrErr;
+ section_iterator Section = unwrapOrError(Symbol.getSection(), ArchiveName,
+ FileName, ArchitectureName);
StringRef Name;
- if (Type == SymbolRef::ST_Debug && Section != O->section_end()) {
+ if (Type == SymbolRef::ST_Debug && Section != O->section_end())
Section->getName(Name);
- } else {
- Expected<StringRef> NameOrErr = Symbol.getName();
- if (!NameOrErr)
- report_error(ArchiveName, O->getFileName(), NameOrErr.takeError(),
- ArchitectureName);
- Name = *NameOrErr;
- }
+ else
+ Name = unwrapOrError(Symbol.getName(), ArchiveName, FileName,
+ ArchitectureName);
bool Global = Flags & SymbolRef::SF_Global;
bool Weak = Flags & SymbolRef::SF_Weak;
@@ -2087,20 +1806,38 @@ void llvm::printSymbolTable(const ObjectFile *O, StringRef ArchiveName,
outs() << SectionName;
}
- outs() << '\t';
if (Common || isa<ELFObjectFileBase>(O)) {
uint64_t Val =
Common ? Symbol.getAlignment() : ELFSymbolRef(Symbol).getSize();
- outs() << format("\t %08" PRIx64 " ", Val);
+ outs() << format("\t%08" PRIx64, Val);
}
- if (Hidden)
- outs() << ".hidden ";
+ if (isa<ELFObjectFileBase>(O)) {
+ uint8_t Other = ELFSymbolRef(Symbol).getOther();
+ switch (Other) {
+ case ELF::STV_DEFAULT:
+ break;
+ case ELF::STV_INTERNAL:
+ outs() << " .internal";
+ break;
+ case ELF::STV_HIDDEN:
+ outs() << " .hidden";
+ break;
+ case ELF::STV_PROTECTED:
+ outs() << " .protected";
+ break;
+ default:
+ outs() << format(" 0x%02x", Other);
+ break;
+ }
+ } else if (Hidden) {
+ outs() << " .hidden";
+ }
if (Demangle)
- outs() << demangle(Name) << '\n';
+ outs() << ' ' << demangle(Name) << '\n';
else
- outs() << Name << '\n';
+ outs() << ' ' << Name << '\n';
}
}
@@ -2118,59 +1855,9 @@ static void printUnwindInfo(const ObjectFile *O) {
"for COFF and MachO object files.\n";
}
-void llvm::printExportsTrie(const ObjectFile *o) {
- outs() << "Exports trie:\n";
- if (const MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(o))
- printMachOExportsTrie(MachO);
- else
- WithColor::error(errs(), ToolName)
- << "This operation is only currently supported "
- "for Mach-O executable files.\n";
-}
-
-void llvm::printRebaseTable(ObjectFile *o) {
- outs() << "Rebase table:\n";
- if (MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(o))
- printMachORebaseTable(MachO);
- else
- WithColor::error(errs(), ToolName)
- << "This operation is only currently supported "
- "for Mach-O executable files.\n";
-}
-
-void llvm::printBindTable(ObjectFile *o) {
- outs() << "Bind table:\n";
- if (MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(o))
- printMachOBindTable(MachO);
- else
- WithColor::error(errs(), ToolName)
- << "This operation is only currently supported "
- "for Mach-O executable files.\n";
-}
-
-void llvm::printLazyBindTable(ObjectFile *o) {
- outs() << "Lazy bind table:\n";
- if (MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(o))
- printMachOLazyBindTable(MachO);
- else
- WithColor::error(errs(), ToolName)
- << "This operation is only currently supported "
- "for Mach-O executable files.\n";
-}
-
-void llvm::printWeakBindTable(ObjectFile *o) {
- outs() << "Weak bind table:\n";
- if (MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(o))
- printMachOWeakBindTable(MachO);
- else
- WithColor::error(errs(), ToolName)
- << "This operation is only currently supported "
- "for Mach-O executable files.\n";
-}
-
/// Dump the raw contents of the __clangast section so the output can be piped
/// into llvm-bcanalyzer.
-void llvm::printRawClangAST(const ObjectFile *Obj) {
+void printRawClangAST(const ObjectFile *Obj) {
if (outs().is_displayed()) {
WithColor::error(errs(), ToolName)
<< "The -raw-clang-ast option will dump the raw binary contents of "
@@ -2197,8 +1884,8 @@ void llvm::printRawClangAST(const ObjectFile *Obj) {
if (!ClangASTSection)
return;
- StringRef ClangASTContents;
- error(ClangASTSection.getValue().getContents(ClangASTContents));
+ StringRef ClangASTContents = unwrapOrError(
+ ClangASTSection.getValue().getContents(), Obj->getFileName());
outs().write(ClangASTContents.data(), ClangASTContents.size());
}
@@ -2234,9 +1921,8 @@ static void printFaultMaps(const ObjectFile *Obj) {
return;
}
- StringRef FaultMapContents;
- error(FaultMapSection.getValue().getContents(FaultMapContents));
-
+ StringRef FaultMapContents =
+ unwrapOrError(FaultMapSection.getValue().getContents(), Obj->getFileName());
FaultMapParser FMP(FaultMapContents.bytes_begin(),
FaultMapContents.bytes_end());
@@ -2246,7 +1932,9 @@ static void printFaultMaps(const ObjectFile *Obj) {
static void printPrivateFileHeaders(const ObjectFile *O, bool OnlyFirst) {
if (O->isELF()) {
printELFFileHeader(O);
- return printELFDynamicSection(O);
+ printELFDynamicSection(O);
+ printELFSymbolVersionInfo(O);
+ return;
}
if (O->isCOFF())
return printCOFFFileHeader(O);
@@ -2267,12 +1955,9 @@ static void printFileHeaders(const ObjectFile *O) {
Triple::ArchType AT = O->getArch();
outs() << "architecture: " << Triple::getArchTypeName(AT) << "\n";
- Expected<uint64_t> StartAddrOrErr = O->getStartAddress();
- if (!StartAddrOrErr)
- report_error(O->getFileName(), StartAddrOrErr.takeError());
+ uint64_t Address = unwrapOrError(O->getStartAddress(), O->getFileName());
StringRef Fmt = O->getBytesInAddress() > 4 ? "%016" PRIx64 : "%08" PRIx64;
- uint64_t Address = StartAddrOrErr.get();
outs() << "start address: "
<< "0x" << format(Fmt.data(), Address) << "\n\n";
}
@@ -2297,22 +1982,9 @@ static void printArchiveChild(StringRef Filename, const Archive::Child &C) {
outs() << " ";
- Expected<unsigned> UIDOrErr = C.getUID();
- if (!UIDOrErr)
- report_error(Filename, UIDOrErr.takeError());
- unsigned UID = UIDOrErr.get();
- outs() << format("%d/", UID);
-
- Expected<unsigned> GIDOrErr = C.getGID();
- if (!GIDOrErr)
- report_error(Filename, GIDOrErr.takeError());
- unsigned GID = GIDOrErr.get();
- outs() << format("%-d ", GID);
-
- Expected<uint64_t> Size = C.getRawSize();
- if (!Size)
- report_error(Filename, Size.takeError());
- outs() << format("%6" PRId64, Size.get()) << " ";
+ outs() << format("%d/%d %6" PRId64 " ", unwrapOrError(C.getUID(), Filename),
+ unwrapOrError(C.getGID(), Filename),
+ unwrapOrError(C.getRawSize(), Filename));
StringRef RawLastModified = C.getRawLastModified();
unsigned Seconds;
@@ -2331,10 +2003,7 @@ static void printArchiveChild(StringRef Filename, const Archive::Child &C) {
Expected<StringRef> NameOrErr = C.getName();
if (!NameOrErr) {
consumeError(NameOrErr.takeError());
- Expected<StringRef> RawNameOrErr = C.getRawName();
- if (!RawNameOrErr)
- report_error(Filename, NameOrErr.takeError());
- Name = RawNameOrErr.get();
+ Name = unwrapOrError(C.getRawName(), Filename);
} else {
Name = NameOrErr.get();
}
@@ -2386,7 +2055,7 @@ static void dumpObject(ObjectFile *O, const Archive *A = nullptr,
printWeakBindTable(O);
if (RawClangAST)
printRawClangAST(O);
- if (PrintFaultMaps)
+ if (FaultMapSection)
printFaultMaps(O);
if (DwarfDumpType != DIDT_Null) {
std::unique_ptr<DIContext> DICtx = DWARFContext::create(*O);
@@ -2421,7 +2090,7 @@ static void dumpArchive(const Archive *A) {
Expected<std::unique_ptr<Binary>> ChildOrErr = C.getAsBinary();
if (!ChildOrErr) {
if (auto E = isNotObjectErrorInvalidFileType(ChildOrErr.takeError()))
- report_error(A->getFileName(), C, std::move(E));
+ report_error(std::move(E), A->getFileName(), C);
continue;
}
if (ObjectFile *O = dyn_cast<ObjectFile>(&*ChildOrErr.get()))
@@ -2429,10 +2098,11 @@ static void dumpArchive(const Archive *A) {
else if (COFFImportFile *I = dyn_cast<COFFImportFile>(&*ChildOrErr.get()))
dumpObject(I, A, &C);
else
- report_error(A->getFileName(), object_error::invalid_file_type);
+ report_error(errorCodeToError(object_error::invalid_file_type),
+ A->getFileName());
}
if (Err)
- report_error(A->getFileName(), std::move(Err));
+ report_error(std::move(Err), A->getFileName());
}
/// Open file and figure out how to dump it.
@@ -2446,10 +2116,8 @@ static void dumpInput(StringRef file) {
}
// Attempt to open the binary.
- Expected<OwningBinary<Binary>> BinaryOrErr = createBinary(file);
- if (!BinaryOrErr)
- report_error(file, BinaryOrErr.takeError());
- Binary &Binary = *BinaryOrErr.get().getBinary();
+ OwningBinary<Binary> OBinary = unwrapOrError(createBinary(file), file);
+ Binary &Binary = *OBinary.getBinary();
if (Archive *A = dyn_cast<Archive>(&Binary))
dumpArchive(A);
@@ -2458,22 +2126,29 @@ static void dumpInput(StringRef file) {
else if (MachOUniversalBinary *UB = dyn_cast<MachOUniversalBinary>(&Binary))
parseInputMachO(UB);
else
- report_error(file, object_error::invalid_file_type);
+ report_error(errorCodeToError(object_error::invalid_file_type), file);
}
+} // namespace llvm
int main(int argc, char **argv) {
+ using namespace llvm;
InitLLVM X(argc, argv);
+ const cl::OptionCategory *OptionFilters[] = {&ObjdumpCat, &MachOCat};
+ cl::HideUnrelatedOptions(OptionFilters);
// Initialize targets and assembly printers/parsers.
- llvm::InitializeAllTargetInfos();
- llvm::InitializeAllTargetMCs();
- llvm::InitializeAllDisassemblers();
+ InitializeAllTargetInfos();
+ InitializeAllTargetMCs();
+ InitializeAllDisassemblers();
// Register the target printer for --version.
cl::AddExtraVersionPrinter(TargetRegistry::printRegisteredTargetsForVersion);
cl::ParseCommandLineOptions(argc, argv, "llvm object file dumper\n");
+ if (StartAddress >= StopAddress)
+ error("start address should be less than stop address");
+
ToolName = argv[0];
// Defaults to a.out if no filenames specified.
@@ -2481,40 +2156,22 @@ int main(int argc, char **argv) {
InputFilenames.push_back("a.out");
if (AllHeaders)
- FileHeaders = PrivateHeaders = Relocations = SectionHeaders = SymbolTable =
- true;
+ ArchiveHeaders = FileHeaders = PrivateHeaders = Relocations =
+ SectionHeaders = SymbolTable = true;
- if (DisassembleAll || PrintSource || PrintLines)
+ if (DisassembleAll || PrintSource || PrintLines ||
+ (!DisassembleFunctions.empty()))
Disassemble = true;
- if (!Disassemble
- && !Relocations
- && !DynamicRelocations
- && !SectionHeaders
- && !SectionContents
- && !SymbolTable
- && !UnwindInfo
- && !PrivateHeaders
- && !FileHeaders
- && !FirstPrivateHeader
- && !ExportsTrie
- && !Rebase
- && !Bind
- && !LazyBind
- && !WeakBind
- && !RawClangAST
- && !(UniversalHeaders && MachOOpt)
- && !ArchiveHeaders
- && !(IndirectSymbols && MachOOpt)
- && !(DataInCode && MachOOpt)
- && !(LinkOptHints && MachOOpt)
- && !(InfoPlist && MachOOpt)
- && !(DylibsUsed && MachOOpt)
- && !(DylibId && MachOOpt)
- && !(ObjcMetaData && MachOOpt)
- && !(!FilterSections.empty() && MachOOpt)
- && !PrintFaultMaps
- && DwarfDumpType == DIDT_Null) {
+ if (!ArchiveHeaders && !Disassemble && DwarfDumpType == DIDT_Null &&
+ !DynamicRelocations && !FileHeaders && !PrivateHeaders && !RawClangAST &&
+ !Relocations && !SectionHeaders && !SectionContents && !SymbolTable &&
+ !UnwindInfo && !FaultMapSection &&
+ !(MachOOpt &&
+ (Bind || DataInCode || DylibId || DylibsUsed || ExportsTrie ||
+ FirstPrivateHeader || IndirectSymbols || InfoPlist || LazyBind ||
+ LinkOptHints || ObjcMetaData || Rebase || UniversalHeaders ||
+ WeakBind || !FilterSections.empty()))) {
cl::PrintHelpMessage();
return 2;
}
@@ -2524,5 +2181,7 @@ int main(int argc, char **argv) {
llvm::for_each(InputFilenames, dumpInput);
+ warnOnNoMatchForSections();
+
return EXIT_SUCCESS;
}
diff --git a/tools/llvm-objdump/llvm-objdump.h b/tools/llvm-objdump/llvm-objdump.h
index fe2cb05fe227..e58d4a05c2e6 100644
--- a/tools/llvm-objdump/llvm-objdump.h
+++ b/tools/llvm-objdump/llvm-objdump.h
@@ -1,8 +1,7 @@
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -19,57 +18,86 @@ namespace llvm {
class StringRef;
namespace object {
- class COFFObjectFile;
- class COFFImportFile;
- class MachOObjectFile;
- class MachOUniversalBinary;
- class ObjectFile;
- class Archive;
- class RelocationRef;
+class COFFObjectFile;
+class COFFImportFile;
+class ELFObjectFileBase;
+class ELFSectionRef;
+class MachOObjectFile;
+class MachOUniversalBinary;
+class RelocationRef;
}
-extern cl::opt<std::string> TripleName;
-extern cl::opt<std::string> ArchName;
-extern cl::opt<std::string> MCPU;
-extern cl::list<std::string> MAttrs;
-extern cl::list<std::string> FilterSections;
-extern cl::opt<bool> AllHeaders;
extern cl::opt<bool> Demangle;
-extern cl::opt<bool> Disassemble;
-extern cl::opt<bool> DisassembleAll;
-extern cl::opt<bool> NoShowRawInsn;
-extern cl::opt<bool> NoLeadingAddr;
-extern cl::opt<bool> PrivateHeaders;
-extern cl::opt<bool> FileHeaders;
-extern cl::opt<bool> FirstPrivateHeader;
-extern cl::opt<bool> ExportsTrie;
-extern cl::opt<bool> Rebase;
-extern cl::opt<bool> Bind;
-extern cl::opt<bool> LazyBind;
-extern cl::opt<bool> WeakBind;
-extern cl::opt<bool> RawClangAST;
-extern cl::opt<bool> UniversalHeaders;
-extern cl::opt<bool> ArchiveHeaders;
-extern cl::opt<bool> IndirectSymbols;
-extern cl::opt<bool> DataInCode;
-extern cl::opt<bool> LinkOptHints;
-extern cl::opt<bool> InfoPlist;
-extern cl::opt<bool> DylibsUsed;
-extern cl::opt<bool> DylibId;
-extern cl::opt<bool> ObjcMetaData;
-extern cl::opt<std::string> DisSymName;
-extern cl::opt<bool> NonVerbose;
-extern cl::opt<bool> Relocations;
-extern cl::opt<bool> DynamicRelocations;
-extern cl::opt<bool> SectionHeaders;
-extern cl::opt<bool> SectionContents;
-extern cl::opt<bool> SymbolTable;
-extern cl::opt<bool> UnwindInfo;
-extern cl::opt<bool> PrintImmHex;
-extern cl::opt<DIDumpType> DwarfDumpType;
+
+typedef std::function<bool(llvm::object::SectionRef const &)> FilterPredicate;
+
+class SectionFilterIterator {
+public:
+ SectionFilterIterator(FilterPredicate P,
+ llvm::object::section_iterator const &I,
+ llvm::object::section_iterator const &E)
+ : Predicate(std::move(P)), Iterator(I), End(E) {
+ ScanPredicate();
+ }
+ const llvm::object::SectionRef &operator*() const { return *Iterator; }
+ SectionFilterIterator &operator++() {
+ ++Iterator;
+ ScanPredicate();
+ return *this;
+ }
+ bool operator!=(SectionFilterIterator const &Other) const {
+ return Iterator != Other.Iterator;
+ }
+
+private:
+ void ScanPredicate() {
+ while (Iterator != End && !Predicate(*Iterator)) {
+ ++Iterator;
+ }
+ }
+ FilterPredicate Predicate;
+ llvm::object::section_iterator Iterator;
+ llvm::object::section_iterator End;
+};
+
+class SectionFilter {
+public:
+ SectionFilter(FilterPredicate P, llvm::object::ObjectFile const &O)
+ : Predicate(std::move(P)), Object(O) {}
+ SectionFilterIterator begin() {
+ return SectionFilterIterator(Predicate, Object.section_begin(),
+ Object.section_end());
+ }
+ SectionFilterIterator end() {
+ return SectionFilterIterator(Predicate, Object.section_end(),
+ Object.section_end());
+ }
+
+private:
+ FilterPredicate Predicate;
+ llvm::object::ObjectFile const &Object;
+};
// Various helper functions.
+SectionFilter ToolSectionFilter(llvm::object::ObjectFile const &O);
+
+Error getELFRelocationValueString(const object::ELFObjectFileBase *Obj,
+ const object::RelocationRef &Rel,
+ llvm::SmallVectorImpl<char> &Result);
+Error getCOFFRelocationValueString(const object::COFFObjectFile *Obj,
+ const object::RelocationRef &Rel,
+ llvm::SmallVectorImpl<char> &Result);
+Error getWasmRelocationValueString(const object::WasmObjectFile *Obj,
+ const object::RelocationRef &RelRef,
+ llvm::SmallVectorImpl<char> &Result);
+Error getMachORelocationValueString(const object::MachOObjectFile *Obj,
+ const object::RelocationRef &RelRef,
+ llvm::SmallVectorImpl<char> &Result);
+
+uint64_t getELFSectionLMA(const object::ELFSectionRef& Sec);
+
void error(std::error_code ec);
+void error(Error E);
bool isRelocAddressLess(object::RelocationRef A, object::RelocationRef B);
void parseInputMachO(StringRef Filename);
void parseInputMachO(object::MachOUniversalBinary *UB);
@@ -82,6 +110,7 @@ void printMachOLazyBindTable(object::MachOObjectFile *O);
void printMachOWeakBindTable(object::MachOObjectFile *O);
void printELFFileHeader(const object::ObjectFile *O);
void printELFDynamicSection(const object::ObjectFile *Obj);
+void printELFSymbolVersionInfo(const object::ObjectFile *Obj);
void printCOFFFileHeader(const object::ObjectFile *O);
void printCOFFSymbolTable(const object::COFFImportFile *I);
void printCOFFSymbolTable(const object::COFFObjectFile *O);
@@ -103,18 +132,20 @@ void printSymbolTable(const object::ObjectFile *O, StringRef ArchiveName,
void warn(StringRef Message);
LLVM_ATTRIBUTE_NORETURN void error(Twine Message);
LLVM_ATTRIBUTE_NORETURN void report_error(StringRef File, Twine Message);
-LLVM_ATTRIBUTE_NORETURN void report_error(StringRef File, std::error_code EC);
-LLVM_ATTRIBUTE_NORETURN void report_error(StringRef File, llvm::Error E);
-LLVM_ATTRIBUTE_NORETURN void report_error(StringRef FileName,
- StringRef ArchiveName,
- llvm::Error E,
- StringRef ArchitectureName
- = StringRef());
-LLVM_ATTRIBUTE_NORETURN void report_error(StringRef ArchiveName,
- const object::Archive::Child &C,
- llvm::Error E,
- StringRef ArchitectureName
- = StringRef());
+LLVM_ATTRIBUTE_NORETURN void report_error(Error E, StringRef File);
+LLVM_ATTRIBUTE_NORETURN void
+report_error(Error E, StringRef FileName, StringRef ArchiveName,
+ StringRef ArchitectureName = StringRef());
+LLVM_ATTRIBUTE_NORETURN void
+report_error(Error E, StringRef ArchiveName, const object::Archive::Child &C,
+ StringRef ArchitectureName = StringRef());
+
+template <typename T, typename... Ts>
+T unwrapOrError(Expected<T> EO, Ts &&... Args) {
+ if (EO)
+ return std::move(*EO);
+ report_error(EO.takeError(), std::forward<Ts>(Args)...);
+}
} // end namespace llvm
diff --git a/tools/llvm-pdbutil/BytesOutputStyle.cpp b/tools/llvm-pdbutil/BytesOutputStyle.cpp
index 2b96c8f986aa..162d12c120b4 100644
--- a/tools/llvm-pdbutil/BytesOutputStyle.cpp
+++ b/tools/llvm-pdbutil/BytesOutputStyle.cpp
@@ -1,9 +1,8 @@
//===- BytesOutputStyle.cpp ----------------------------------- *- C++ --*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -341,9 +340,7 @@ static void iterateOneModule(PDBFile &File, LinePrinter &P,
if (ModiStream == kInvalidStreamIndex)
return;
- auto ModStreamData = MappedBlockStream::createIndexedStream(
- File.getMsfLayout(), File.getMsfBuffer(), ModiStream,
- File.getAllocator());
+ auto ModStreamData = File.createIndexedStream(ModiStream);
ModuleDebugStreamRef ModStream(Modi, std::move(ModStreamData));
if (auto EC = ModStream.reload()) {
P.formatLine("Could not parse debug information.");
diff --git a/tools/llvm-pdbutil/BytesOutputStyle.h b/tools/llvm-pdbutil/BytesOutputStyle.h
index aa5342998e56..d3aceb47679e 100644
--- a/tools/llvm-pdbutil/BytesOutputStyle.h
+++ b/tools/llvm-pdbutil/BytesOutputStyle.h
@@ -1,9 +1,8 @@
//===- BytesOutputStyle.h ------------------------------------- *- C++ --*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/tools/llvm-pdbutil/DumpOutputStyle.cpp b/tools/llvm-pdbutil/DumpOutputStyle.cpp
index e4f6aa7f6ec5..962d4cf88a8a 100644
--- a/tools/llvm-pdbutil/DumpOutputStyle.cpp
+++ b/tools/llvm-pdbutil/DumpOutputStyle.cpp
@@ -1,9 +1,8 @@
//===- DumpOutputStyle.cpp ------------------------------------ *- C++ --*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -14,6 +13,7 @@
#include "MinimalSymbolDumper.h"
#include "MinimalTypeDumper.h"
#include "StreamUtil.h"
+#include "TypeReferenceTracker.h"
#include "llvm-pdbutil.h"
#include "llvm/ADT/STLExtras.h"
@@ -61,7 +61,12 @@ using namespace llvm::msf;
using namespace llvm::pdb;
DumpOutputStyle::DumpOutputStyle(InputFile &File)
- : File(File), P(2, false, outs()) {}
+ : File(File), P(2, false, outs()) {
+ if (opts::dump::DumpTypeRefStats)
+ RefTracker.reset(new TypeReferenceTracker(File));
+}
+
+DumpOutputStyle::~DumpOutputStyle() {}
PDBFile &DumpOutputStyle::getPdb() { return File.pdb(); }
object::COFFObjectFile &DumpOutputStyle::getObj() { return File.obj(); }
@@ -77,6 +82,10 @@ void DumpOutputStyle::printStreamNotPresent(StringRef StreamName) {
}
Error DumpOutputStyle::dump() {
+ // Walk symbols & globals if we are supposed to mark types referenced.
+ if (opts::dump::DumpTypeRefStats)
+ RefTracker->mark();
+
if (opts::dump::DumpSummary) {
if (auto EC = dumpFileSummary())
return EC;
@@ -101,6 +110,12 @@ Error DumpOutputStyle::dump() {
P.NewLine();
}
+ if (opts::dump::DumpTypeStats) {
+ if (auto EC = dumpTypeStats())
+ return EC;
+ P.NewLine();
+ }
+
if (opts::dump::DumpNamedStreams) {
if (auto EC = dumpNamedStreams())
return EC;
@@ -188,6 +203,11 @@ Error DumpOutputStyle::dump() {
return EC;
}
+ if (opts::dump::DumpTypeRefStats) {
+ if (auto EC = dumpTypeRefStats())
+ return EC;
+ }
+
if (opts::dump::DumpSectionHeaders) {
if (auto EC = dumpSectionHeaders())
return EC;
@@ -203,6 +223,8 @@ Error DumpOutputStyle::dump() {
return EC;
}
+ P.NewLine();
+
return Error::success();
}
@@ -293,18 +315,30 @@ static inline std::string formatModuleDetailKind(SymbolKind K) {
return formatSymbolKind(K);
}
+// Get the stats sorted by size, descending.
+std::vector<StatCollection::KindAndStat>
+StatCollection::getStatsSortedBySize() const {
+ std::vector<KindAndStat> SortedStats(Individual.begin(), Individual.end());
+ llvm::stable_sort(SortedStats,
+ [](const KindAndStat &LHS, const KindAndStat &RHS) {
+ return LHS.second.Size > RHS.second.Size;
+ });
+ return SortedStats;
+}
+
template <typename Kind>
static void printModuleDetailStats(LinePrinter &P, StringRef Label,
const StatCollection &Stats) {
P.NewLine();
P.formatLine(" {0}", Label);
AutoIndent Indent(P);
- P.formatLine("{0,40}: {1,7} entries ({2,8} bytes)", "Total",
+ P.formatLine("{0,40}: {1,7} entries ({2,12:N} bytes)", "Total",
Stats.Totals.Count, Stats.Totals.Size);
P.formatLine("{0}", fmt_repeat('-', 74));
- for (const auto &K : Stats.Individual) {
+
+ for (const auto &K : Stats.getStatsSortedBySize()) {
std::string KindName = formatModuleDetailKind(Kind(K.first));
- P.formatLine("{0,40}: {1,7} entries ({2,8} bytes)", KindName,
+ P.formatLine("{0,40}: {1,7} entries ({2,12:N} bytes)", KindName,
K.second.Count, K.second.Size);
}
}
@@ -662,6 +696,35 @@ Error DumpOutputStyle::dumpSymbolStats() {
return Error::success();
}
+Error DumpOutputStyle::dumpTypeStats() {
+ printHeader(P, "Type Record Stats");
+
+ // Iterate the types, categorize by kind, accumulate size stats.
+ StatCollection TypeStats;
+ LazyRandomTypeCollection &Types = File.types();
+ for (Optional<TypeIndex> TI = Types.getFirst(); TI; TI = Types.getNext(*TI)) {
+ CVType Type = Types.getType(*TI);
+ TypeStats.update(uint32_t(Type.kind()), Type.length());
+ }
+
+ P.NewLine();
+ P.formatLine(" Types");
+ AutoIndent Indent(P);
+ P.formatLine("{0,14}: {1,7} entries ({2,12:N} bytes, {3,7} avg)", "Total",
+ TypeStats.Totals.Count, TypeStats.Totals.Size,
+ (double)TypeStats.Totals.Size / TypeStats.Totals.Count);
+ P.formatLine("{0}", fmt_repeat('-', 74));
+
+ for (const auto &K : TypeStats.getStatsSortedBySize()) {
+ P.formatLine("{0,14}: {1,7} entries ({2,12:N} bytes, {3,7} avg)",
+ formatTypeLeafKind(TypeLeafKind(K.first)), K.second.Count,
+ K.second.Size, (double)K.second.Size / K.second.Count);
+ }
+
+
+ return Error::success();
+}
+
static bool isValidNamespaceIdentifier(StringRef S) {
if (S.empty())
return false;
@@ -806,7 +869,7 @@ Error DumpOutputStyle::dumpUdtStats() {
fmt_align(SizeHeader, AlignStyle::Right, SD));
P.formatLine("{0}", fmt_repeat('-', TableWidth));
- for (const auto &Stat : UdtTargetStats.Individual) {
+ for (const auto &Stat : UdtTargetStats.getStatsSortedBySize()) {
StringRef Label = getUdtStatLabel(Stat.first);
P.formatLine("{0} | {1:N} {2:N}",
fmt_align(Label, AlignStyle::Right, FieldWidth),
@@ -819,12 +882,25 @@ Error DumpOutputStyle::dumpUdtStats() {
fmt_align(UdtStats.Totals.Count, AlignStyle::Right, CD),
fmt_align(UdtStats.Totals.Size, AlignStyle::Right, SD));
P.formatLine("{0}", fmt_repeat('-', TableWidth));
- for (const auto &Stat : NamespacedStats) {
- std::string Label = formatv("namespace '{0}'", Stat.getKey());
+ struct StrAndStat {
+ StringRef Key;
+ StatCollection::Stat Stat;
+ };
+
+ // Print namespace stats in descending order of size.
+ std::vector<StrAndStat> NamespacedStatsSorted;
+ for (const auto &Stat : NamespacedStats)
+ NamespacedStatsSorted.push_back({Stat.getKey(), Stat.second});
+ llvm::stable_sort(NamespacedStatsSorted,
+ [](const StrAndStat &L, const StrAndStat &R) {
+ return L.Stat.Size > R.Stat.Size;
+ });
+ for (const auto &Stat : NamespacedStatsSorted) {
+ std::string Label = formatv("namespace '{0}'", Stat.Key);
P.formatLine("{0} | {1:N} {2:N}",
fmt_align(Label, AlignStyle::Right, FieldWidth),
- fmt_align(Stat.second.Count, AlignStyle::Right, CD),
- fmt_align(Stat.second.Size, AlignStyle::Right, SD));
+ fmt_align(Stat.Stat.Count, AlignStyle::Right, CD),
+ fmt_align(Stat.Stat.Size, AlignStyle::Right, SD));
}
return Error::success();
}
@@ -921,6 +997,10 @@ Error DumpOutputStyle::dumpInlineeLines() {
P.formatLine("{0,+8} | {1,+5} | ", Entry.Header->Inlinee,
fmtle(Entry.Header->SourceLineNum));
Strings.formatFromChecksumsOffset(P, Entry.Header->FileID, true);
+ for (const auto &ExtraFileID : Entry.ExtraFiles) {
+ P.formatLine(" ");
+ Strings.formatFromChecksumsOffset(P, ExtraFileID, true);
+ }
}
P.NewLine();
});
@@ -1011,17 +1091,12 @@ Error DumpOutputStyle::dumpOldFpo(PDBFile &File) {
ExitOnError Err("Error dumping old fpo data:");
auto &Dbi = Err(File.getPDBDbiStream());
- uint32_t Index = Dbi.getDebugStreamIndex(DbgHeaderType::FPO);
- if (Index == kInvalidStreamIndex) {
+ if (!Dbi.hasOldFpoRecords()) {
printStreamNotPresent("FPO");
return Error::success();
}
- std::unique_ptr<MappedBlockStream> OldFpo = File.createIndexedStream(Index);
- BinaryStreamReader Reader(*OldFpo);
- FixedStreamArray<object::FpoData> Records;
- Err(Reader.readArray(Records,
- Reader.bytesRemaining() / sizeof(object::FpoData)));
+ const FixedStreamArray<object::FpoData>& Records = Dbi.getOldFpoRecords();
P.printLine(" RVA | Code | Locals | Params | Prolog | Saved Regs | Use "
"BP | Has SEH | Frame Type");
@@ -1043,18 +1118,12 @@ Error DumpOutputStyle::dumpNewFpo(PDBFile &File) {
ExitOnError Err("Error dumping new fpo data:");
auto &Dbi = Err(File.getPDBDbiStream());
- uint32_t Index = Dbi.getDebugStreamIndex(DbgHeaderType::NewFPO);
- if (Index == kInvalidStreamIndex) {
+ if (!Dbi.hasNewFpoRecords()) {
printStreamNotPresent("New FPO");
return Error::success();
}
- std::unique_ptr<MappedBlockStream> NewFpo = File.createIndexedStream(Index);
-
- DebugFrameDataSubsectionRef FDS;
- if (auto EC = FDS.initialize(*NewFpo))
- return make_error<RawError>(raw_error_code::corrupt_file,
- "Invalid new fpo stream");
+ const DebugFrameDataSubsectionRef& FDS = Dbi.getNewFpoRecords();
P.printLine(" RVA | Code | Locals | Params | Stack | Prolog | Saved Regs "
"| Has SEH | Has C++EH | Start | Program");
@@ -1239,14 +1308,15 @@ static void buildDepSet(LazyRandomTypeCollection &Types,
static void
dumpFullTypeStream(LinePrinter &Printer, LazyRandomTypeCollection &Types,
- uint32_t NumTypeRecords, uint32_t NumHashBuckets,
+ TypeReferenceTracker *RefTracker, uint32_t NumTypeRecords,
+ uint32_t NumHashBuckets,
FixedStreamArray<support::ulittle32_t> HashValues,
TpiStream *Stream, bool Bytes, bool Extras) {
Printer.formatLine("Showing {0:N} records", NumTypeRecords);
uint32_t Width = NumDigits(TypeIndex::FirstNonSimpleIndex + NumTypeRecords);
- MinimalTypeDumpVisitor V(Printer, Width + 2, Bytes, Extras, Types,
+ MinimalTypeDumpVisitor V(Printer, Width + 2, Bytes, Extras, Types, RefTracker,
NumHashBuckets, HashValues, Stream);
if (auto EC = codeview::visitTypeStream(Types, V)) {
@@ -1257,12 +1327,13 @@ dumpFullTypeStream(LinePrinter &Printer, LazyRandomTypeCollection &Types,
static void dumpPartialTypeStream(LinePrinter &Printer,
LazyRandomTypeCollection &Types,
+ TypeReferenceTracker *RefTracker,
TpiStream &Stream, ArrayRef<TypeIndex> TiList,
bool Bytes, bool Extras, bool Deps) {
uint32_t Width =
NumDigits(TypeIndex::FirstNonSimpleIndex + Stream.getNumTypeRecords());
- MinimalTypeDumpVisitor V(Printer, Width + 2, Bytes, Extras, Types,
+ MinimalTypeDumpVisitor V(Printer, Width + 2, Bytes, Extras, Types, RefTracker,
Stream.getNumHashBuckets(), Stream.getHashValues(),
&Stream);
@@ -1311,12 +1382,12 @@ Error DumpOutputStyle::dumpTypesFromObjectFile() {
else
continue;
- StringRef Contents;
- if (auto EC = S.getContents(Contents))
- return errorCodeToError(EC);
+ Expected<StringRef> ContentsOrErr = S.getContents();
+ if (!ContentsOrErr)
+ return ContentsOrErr.takeError();
uint32_t Magic;
- BinaryStreamReader Reader(Contents, llvm::support::little);
+ BinaryStreamReader Reader(*ContentsOrErr, llvm::support::little);
if (auto EC = Reader.readInteger(Magic))
return EC;
if (Magic != COFF::DEBUG_SECTION_MAGIC)
@@ -1326,8 +1397,8 @@ Error DumpOutputStyle::dumpTypesFromObjectFile() {
Types.reset(Reader, 100);
if (opts::dump::DumpTypes) {
- dumpFullTypeStream(P, Types, 0, 0, {}, nullptr, opts::dump::DumpTypeData,
- false);
+ dumpFullTypeStream(P, Types, RefTracker.get(), 0, 0, {}, nullptr,
+ opts::dump::DumpTypeData, false);
} else if (opts::dump::DumpTypeExtras) {
auto LocalHashes = LocallyHashedType::hashTypeCollection(Types);
auto GlobalHashes = GloballyHashedType::hashTypeCollection(Types);
@@ -1396,23 +1467,36 @@ Error DumpOutputStyle::dumpTpiStream(uint32_t StreamIdx) {
auto &Types = (StreamIdx == StreamTPI) ? File.types() : File.ids();
+ // Only emit notes about referenced/unreferenced for types.
+ TypeReferenceTracker *MaybeTracker =
+ (StreamIdx == StreamTPI) ? RefTracker.get() : nullptr;
+
// Enable resolving forward decls.
Stream.buildHashMap();
if (DumpTypes || !Indices.empty()) {
if (Indices.empty())
- dumpFullTypeStream(P, Types, Stream.getNumTypeRecords(),
+ dumpFullTypeStream(P, Types, MaybeTracker, Stream.getNumTypeRecords(),
Stream.getNumHashBuckets(), Stream.getHashValues(),
&Stream, DumpBytes, DumpExtras);
else {
std::vector<TypeIndex> TiList(Indices.begin(), Indices.end());
- dumpPartialTypeStream(P, Types, Stream, TiList, DumpBytes, DumpExtras,
- opts::dump::DumpTypeDependents);
+ dumpPartialTypeStream(P, Types, MaybeTracker, Stream, TiList, DumpBytes,
+ DumpExtras, opts::dump::DumpTypeDependents);
}
}
if (DumpExtras) {
P.NewLine();
+
+ P.formatLine("Header Version: {0}",
+ static_cast<uint32_t>(Stream.getTpiVersion()));
+ P.formatLine("Hash Stream Index: {0}", Stream.getTypeHashStreamIndex());
+ P.formatLine("Aux Hash Stream Index: {0}",
+ Stream.getTypeHashStreamAuxIndex());
+ P.formatLine("Hash Key Size: {0}", Stream.getHashKeySize());
+ P.formatLine("Num Hash Buckets: {0}", Stream.getNumHashBuckets());
+
auto IndexOffsets = Stream.getTypeIndexOffsets();
P.formatLine("Type Index Offsets:");
for (const auto &IO : IndexOffsets) {
@@ -1523,6 +1607,34 @@ Error DumpOutputStyle::dumpModuleSymsForPdb() {
return Error::success();
}
+Error DumpOutputStyle::dumpTypeRefStats() {
+ printHeader(P, "Type Reference Statistics");
+ AutoIndent Indent(P);
+
+ // Sum the byte size of all type records, and the size and count of all
+ // referenced records.
+ size_t TotalRecs = File.types().size();
+ size_t RefRecs = 0;
+ size_t TotalBytes = 0;
+ size_t RefBytes = 0;
+ auto &Types = File.types();
+ for (Optional<TypeIndex> TI = Types.getFirst(); TI; TI = Types.getNext(*TI)) {
+ CVType Type = File.types().getType(*TI);
+ TotalBytes += Type.length();
+ if (RefTracker->isTypeReferenced(*TI)) {
+ ++RefRecs;
+ RefBytes += Type.length();
+ }
+ }
+
+ P.formatLine("Records referenced: {0:N} / {1:N} {2:P}", RefRecs, TotalRecs,
+ (double)RefRecs / TotalRecs);
+ P.formatLine("Bytes referenced: {0:N} / {1:N} {2:P}", RefBytes, TotalBytes,
+ (double)RefBytes / TotalBytes);
+
+ return Error::success();
+}
+
Error DumpOutputStyle::dumpGSIRecords() {
printHeader(P, "GSI Records");
diff --git a/tools/llvm-pdbutil/DumpOutputStyle.h b/tools/llvm-pdbutil/DumpOutputStyle.h
index 9b3a85587bde..796cd7a10c36 100644
--- a/tools/llvm-pdbutil/DumpOutputStyle.h
+++ b/tools/llvm-pdbutil/DumpOutputStyle.h
@@ -1,9 +1,8 @@
//===- DumpOutputStyle.h -------------------------------------- *- C++ --*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -35,6 +34,7 @@ class COFFObjectFile;
namespace pdb {
class GSIHashTable;
class InputFile;
+class TypeReferenceTracker;
struct StatCollection {
struct Stat {
@@ -49,6 +49,8 @@ struct StatCollection {
}
};
+ using KindAndStat = std::pair<uint32_t, Stat>;
+
void update(uint32_t Kind, uint32_t RecordSize) {
Totals.update(RecordSize);
auto Iter = Individual.try_emplace(Kind, 1, RecordSize);
@@ -57,12 +59,15 @@ struct StatCollection {
}
Stat Totals;
DenseMap<uint32_t, Stat> Individual;
+
+ std::vector<KindAndStat> getStatsSortedBySize() const;
};
class DumpOutputStyle : public OutputStyle {
public:
DumpOutputStyle(InputFile &File);
+ ~DumpOutputStyle() override;
Error dump() override;
@@ -77,6 +82,7 @@ private:
Error dumpStreamSummary();
Error dumpSymbolStats();
Error dumpUdtStats();
+ Error dumpTypeStats();
Error dumpNamedStreams();
Error dumpStringTable();
Error dumpStringTableFromPdb();
@@ -90,6 +96,7 @@ private:
Error dumpNewFpo(PDBFile &File);
Error dumpTpiStream(uint32_t StreamIdx);
Error dumpTypesFromObjectFile();
+ Error dumpTypeRefStats();
Error dumpModules();
Error dumpModuleFiles();
Error dumpModuleSymsForPdb();
@@ -105,6 +112,7 @@ private:
void dumpSectionHeaders(StringRef Label, DbgHeaderType Type);
InputFile &File;
+ std::unique_ptr<TypeReferenceTracker> RefTracker;
LinePrinter P;
SmallVector<StreamInfo, 32> StreamPurposes;
};
diff --git a/tools/llvm-pdbutil/ExplainOutputStyle.cpp b/tools/llvm-pdbutil/ExplainOutputStyle.cpp
index d16bfa480e1d..94faa0463981 100644
--- a/tools/llvm-pdbutil/ExplainOutputStyle.cpp
+++ b/tools/llvm-pdbutil/ExplainOutputStyle.cpp
@@ -1,9 +1,8 @@
//===- ExplainOutputStyle.cpp --------------------------------- *- C++ --*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/tools/llvm-pdbutil/ExplainOutputStyle.h b/tools/llvm-pdbutil/ExplainOutputStyle.h
index 9a497accb812..f405cf615e92 100644
--- a/tools/llvm-pdbutil/ExplainOutputStyle.h
+++ b/tools/llvm-pdbutil/ExplainOutputStyle.h
@@ -1,9 +1,8 @@
//===- ExplainOutputStyle.h ----------------------------------- *- C++ --*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/tools/llvm-pdbutil/FormatUtil.cpp b/tools/llvm-pdbutil/FormatUtil.cpp
index f55d478127d6..1a13f383e53c 100644
--- a/tools/llvm-pdbutil/FormatUtil.cpp
+++ b/tools/llvm-pdbutil/FormatUtil.cpp
@@ -1,9 +1,8 @@
//===- FormatUtil.cpp ----------------------------------------- *- C++ --*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/tools/llvm-pdbutil/FormatUtil.h b/tools/llvm-pdbutil/FormatUtil.h
index 9a003c9285c9..19ce248f9a6f 100644
--- a/tools/llvm-pdbutil/FormatUtil.h
+++ b/tools/llvm-pdbutil/FormatUtil.h
@@ -1,9 +1,8 @@
//===- FormatUtil.h ------------------------------------------- *- C++ --*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/tools/llvm-pdbutil/InputFile.cpp b/tools/llvm-pdbutil/InputFile.cpp
index 8eb116cf0d80..bd23bfdbe31a 100644
--- a/tools/llvm-pdbutil/InputFile.cpp
+++ b/tools/llvm-pdbutil/InputFile.cpp
@@ -1,9 +1,8 @@
//===- InputFile.cpp ------------------------------------------ *- C++ --*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -67,17 +66,20 @@ getModuleDebugStream(PDBFile &File, StringRef &ModuleName, uint32_t Index) {
static inline bool isCodeViewDebugSubsection(object::SectionRef Section,
StringRef Name,
BinaryStreamReader &Reader) {
- StringRef SectionName, Contents;
+ StringRef SectionName;
if (Section.getName(SectionName))
return false;
if (SectionName != Name)
return false;
- if (Section.getContents(Contents))
+ Expected<StringRef> ContentsOrErr = Section.getContents();
+ if (!ContentsOrErr) {
+ consumeError(ContentsOrErr.takeError());
return false;
+ }
- Reader = BinaryStreamReader(Contents, support::little);
+ Reader = BinaryStreamReader(*ContentsOrErr, support::little);
uint32_t Magic;
if (Reader.bytesRemaining() < sizeof(uint32_t))
return false;
diff --git a/tools/llvm-pdbutil/InputFile.h b/tools/llvm-pdbutil/InputFile.h
index ee4e651c1e99..f25390c971d0 100644
--- a/tools/llvm-pdbutil/InputFile.h
+++ b/tools/llvm-pdbutil/InputFile.h
@@ -1,9 +1,8 @@
//===- InputFile.h -------------------------------------------- *- C++ --*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/tools/llvm-pdbutil/LinePrinter.cpp b/tools/llvm-pdbutil/LinePrinter.cpp
index e80a1762450b..280c000bd65f 100644
--- a/tools/llvm-pdbutil/LinePrinter.cpp
+++ b/tools/llvm-pdbutil/LinePrinter.cpp
@@ -1,9 +1,8 @@
//===- LinePrinter.cpp ------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -187,8 +186,7 @@ void LinePrinter::formatMsfStreamData(StringRef Label, PDBFile &File,
return;
}
- auto S = MappedBlockStream::createIndexedStream(
- File.getMsfLayout(), File.getMsfBuffer(), StreamIdx, File.getAllocator());
+ auto S = File.createIndexedStream(StreamIdx);
if (!S) {
NewLine();
formatLine("Stream {0}: Not present", StreamIdx);
diff --git a/tools/llvm-pdbutil/LinePrinter.h b/tools/llvm-pdbutil/LinePrinter.h
index 09bde28f516a..7ecfae17354f 100644
--- a/tools/llvm-pdbutil/LinePrinter.h
+++ b/tools/llvm-pdbutil/LinePrinter.h
@@ -1,9 +1,8 @@
//===- LinePrinter.h ------------------------------------------ *- C++ --*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -133,8 +132,7 @@ struct AutoIndent {
template <class T>
inline raw_ostream &operator<<(LinePrinter &Printer, const T &Item) {
- Printer.getStream() << Item;
- return Printer.getStream();
+ return Printer.getStream() << Item;
}
enum class PDB_ColorItem {
diff --git a/tools/llvm-pdbutil/MinimalSymbolDumper.cpp b/tools/llvm-pdbutil/MinimalSymbolDumper.cpp
index 2c7b213b0a9f..e5ae47050678 100644
--- a/tools/llvm-pdbutil/MinimalSymbolDumper.cpp
+++ b/tools/llvm-pdbutil/MinimalSymbolDumper.cpp
@@ -1,9 +1,8 @@
//===- MinimalSymbolDumper.cpp -------------------------------- *- C++ --*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -207,6 +206,7 @@ static std::string formatSourceLanguage(SourceLanguage Lang) {
RETURN_CASE(SourceLanguage, MSIL, "msil");
RETURN_CASE(SourceLanguage, HLSL, "hlsl");
RETURN_CASE(SourceLanguage, D, "d");
+ RETURN_CASE(SourceLanguage, Swift, "swift");
}
return formatUnknownEnum(Lang);
}
@@ -287,21 +287,39 @@ static std::string formatCookieKind(FrameCookieKind Kind) {
return formatUnknownEnum(Kind);
}
-static std::string formatRegisterId(RegisterId Id) {
- switch (Id) {
+static std::string formatRegisterId(RegisterId Id, CPUType Cpu) {
+ if (Cpu == CPUType::ARM64) {
+ switch (Id) {
+#define CV_REGISTERS_ARM64
+#define CV_REGISTER(name, val) RETURN_CASE(RegisterId, name, #name)
+#include "llvm/DebugInfo/CodeView/CodeViewRegisters.def"
+#undef CV_REGISTER
+#undef CV_REGISTERS_ARM64
+
+ default:
+ break;
+ }
+ } else {
+ switch (Id) {
+#define CV_REGISTERS_X86
#define CV_REGISTER(name, val) RETURN_CASE(RegisterId, name, #name)
#include "llvm/DebugInfo/CodeView/CodeViewRegisters.def"
#undef CV_REGISTER
+#undef CV_REGISTERS_X86
+
+ default:
+ break;
+ }
}
return formatUnknownEnum(Id);
}
-static std::string formatRegisterId(uint16_t Reg16) {
- return formatRegisterId(RegisterId(Reg16));
+static std::string formatRegisterId(uint16_t Reg16, CPUType Cpu) {
+ return formatRegisterId(RegisterId(Reg16), Cpu);
}
-static std::string formatRegisterId(ulittle16_t &Reg16) {
- return formatRegisterId(uint16_t(Reg16));
+static std::string formatRegisterId(ulittle16_t &Reg16, CPUType Cpu) {
+ return formatRegisterId(uint16_t(Reg16), Cpu);
}
static std::string formatRange(LocalVariableAddrRange Range) {
@@ -331,7 +349,7 @@ Error MinimalSymbolDumper::visitSymbolBegin(codeview::CVSymbol &Record,
// append to the existing line.
P.formatLine("{0} | {1} [size = {2}]",
fmt_align(Offset, AlignStyle::Right, 6),
- formatSymbolKind(Record.Type), Record.length());
+ formatSymbolKind(Record.kind()), Record.length());
P.Indent();
return Error::success();
}
@@ -562,7 +580,7 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR,
AutoIndent Indent(P, 7);
P.formatLine("register = {0}, offset = {1}, offset in parent = {2}, has "
"spilled udt = {3}",
- formatRegisterId(Def.Hdr.Register),
+ formatRegisterId(Def.Hdr.Register, CompilationCPU),
int32_t(Def.Hdr.BasePointerOffset), Def.offsetInParent(),
Def.hasSpilledUDTMember());
P.formatLine("range = {0}, gaps = {1}", formatRange(Def.Range),
@@ -575,7 +593,7 @@ Error MinimalSymbolDumper::visitKnownRecord(
AutoIndent Indent(P, 7);
P.formatLine("register = {0}, may have no name = {1}, range start = "
"{2}, length = {3}",
- formatRegisterId(DefRangeRegister.Hdr.Register),
+ formatRegisterId(DefRangeRegister.Hdr.Register, CompilationCPU),
bool(DefRangeRegister.Hdr.MayHaveNoName),
formatSegmentOffset(DefRangeRegister.Range.ISectStart,
DefRangeRegister.Range.OffsetStart),
@@ -590,7 +608,7 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR,
AutoIndent Indent(P, 7);
bool NoName = !!(Def.Hdr.MayHaveNoName == 0);
P.formatLine("register = {0}, may have no name = {1}, offset in parent = {2}",
- formatRegisterId(Def.Hdr.Register), NoName,
+ formatRegisterId(Def.Hdr.Register, CompilationCPU), NoName,
uint32_t(Def.Hdr.OffsetInParent));
P.formatLine("range = {0}, gaps = {1}", formatRange(Def.Range),
formatGaps(P.getIndentLevel() + 9, Def.Gaps));
@@ -617,7 +635,7 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR, DefRangeSym &Def) {
Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR, FrameCookieSym &FC) {
AutoIndent Indent(P, 7);
P.formatLine("code offset = {0}, Register = {1}, kind = {2}, flags = {3}",
- FC.CodeOffset, formatRegisterId(FC.Register),
+ FC.CodeOffset, formatRegisterId(FC.Register, CompilationCPU),
formatCookieKind(FC.CookieKind), FC.Flags);
return Error::success();
}
@@ -631,9 +649,10 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR, FrameProcSym &FP) {
FP.BytesOfCalleeSavedRegisters,
formatSegmentOffset(FP.SectionIdOfExceptionHandler,
FP.OffsetOfExceptionHandler));
- P.formatLine("local fp reg = {0}, param fp reg = {1}",
- formatRegisterId(FP.getLocalFramePtrReg(CompilationCPU)),
- formatRegisterId(FP.getParamFramePtrReg(CompilationCPU)));
+ P.formatLine(
+ "local fp reg = {0}, param fp reg = {1}",
+ formatRegisterId(FP.getLocalFramePtrReg(CompilationCPU), CompilationCPU),
+ formatRegisterId(FP.getParamFramePtrReg(CompilationCPU), CompilationCPU));
P.formatLine("flags = {0}",
formatFrameProcedureOptions(P.getIndentLevel() + 9, FP.Flags));
return Error::success();
@@ -650,13 +669,89 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR,
Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR, InlineSiteSym &IS) {
AutoIndent Indent(P, 7);
- auto Bytes = makeArrayRef(IS.AnnotationData);
- StringRef Annotations(reinterpret_cast<const char *>(Bytes.begin()),
- Bytes.size());
-
P.formatLine("inlinee = {0}, parent = {1}, end = {2}", idIndex(IS.Inlinee),
IS.Parent, IS.End);
- P.formatLine("annotations = {0}", toHex(Annotations));
+
+ // Break down the annotation byte code and calculate code and line offsets.
+ // FIXME: It would be helpful if we could look up the initial file and inlinee
+ // lines offset using the inlinee index above.
+ uint32_t CodeOffset = 0;
+ int32_t LineOffset = 0;
+ for (auto &Annot : IS.annotations()) {
+ P.formatLine(" {0}", fmt_align(toHex(Annot.Bytes), AlignStyle::Left, 9));
+
+ auto formatCodeOffset = [&](uint32_t Delta) {
+ CodeOffset += Delta;
+ P.format(" code 0x{0} (+0x{1})", utohexstr(CodeOffset), utohexstr(Delta));
+ };
+ auto formatCodeLength = [&](uint32_t Length) {
+ // Notably, changing the code length does not affect the code offset.
+ P.format(" code end 0x{0} (+0x{1})", utohexstr(CodeOffset + Length),
+ utohexstr(Length));
+ };
+ auto formatLineOffset = [&](int32_t Delta) {
+ LineOffset += Delta;
+ char Sign = Delta > 0 ? '+' : '-';
+ P.format(" line {0} ({1}{2})", LineOffset, Sign, std::abs(Delta));
+ };
+
+ // Use the opcode to interpret the integer values.
+ switch (Annot.OpCode) {
+ case BinaryAnnotationsOpCode::Invalid:
+ break;
+ case BinaryAnnotationsOpCode::CodeOffset:
+ case BinaryAnnotationsOpCode::ChangeCodeOffset:
+ formatCodeOffset(Annot.U1);
+ break;
+ case BinaryAnnotationsOpCode::ChangeLineOffset:
+ formatLineOffset(Annot.S1);
+ break;
+ case BinaryAnnotationsOpCode::ChangeCodeLength:
+ formatCodeLength(Annot.U1);
+ // Apparently this annotation updates the code offset. It's hard to make
+ // MSVC produce this opcode, but clang uses it, and debuggers seem to use
+ // this interpretation.
+ CodeOffset += Annot.U1;
+ break;
+ case BinaryAnnotationsOpCode::ChangeCodeOffsetAndLineOffset:
+ formatCodeOffset(Annot.U1);
+ formatLineOffset(Annot.S1);
+ break;
+ case BinaryAnnotationsOpCode::ChangeCodeLengthAndCodeOffset:
+ formatCodeOffset(Annot.U2);
+ formatCodeLength(Annot.U1);
+ break;
+
+ case BinaryAnnotationsOpCode::ChangeFile: {
+ uint32_t FileOffset = Annot.U1;
+ StringRef Filename = "<unknown>";
+ if (SymGroup) {
+ if (Expected<StringRef> MaybeFile =
+ SymGroup->getNameFromStringTable(FileOffset))
+ Filename = *MaybeFile;
+ else
+ return MaybeFile.takeError();
+ }
+ P.format(" setfile {0} 0x{1}", utohexstr(FileOffset));
+ break;
+ }
+
+ // The rest of these are hard to convince MSVC to emit, so they are not as
+ // well understood.
+ case BinaryAnnotationsOpCode::ChangeCodeOffsetBase:
+ formatCodeOffset(Annot.U1);
+ break;
+ case BinaryAnnotationsOpCode::ChangeLineEndDelta:
+ case BinaryAnnotationsOpCode::ChangeRangeKind:
+ case BinaryAnnotationsOpCode::ChangeColumnStart:
+ case BinaryAnnotationsOpCode::ChangeColumnEnd:
+ P.format(" {0} {1}", Annot.Name, Annot.U1);
+ break;
+ case BinaryAnnotationsOpCode::ChangeColumnEndDelta:
+ P.format(" {0} {1}", Annot.Name, Annot.S1);
+ break;
+ }
+ }
return Error::success();
}
@@ -665,7 +760,8 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR,
P.format(" `{0}`", Register.Name);
AutoIndent Indent(P, 7);
P.formatLine("register = {0}, type = {1}",
- formatRegisterId(Register.Register), typeIndex(Register.Index));
+ formatRegisterId(Register.Register, CompilationCPU),
+ typeIndex(Register.Index));
return Error::success();
}
@@ -753,9 +849,9 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR,
RegRelativeSym &RegRel) {
P.format(" `{0}`", RegRel.Name);
AutoIndent Indent(P, 7);
- P.formatLine("type = {0}, register = {1}, offset = {2}",
- typeIndex(RegRel.Type), formatRegisterId(RegRel.Register),
- RegRel.Offset);
+ P.formatLine(
+ "type = {0}, register = {1}, offset = {2}", typeIndex(RegRel.Type),
+ formatRegisterId(RegRel.Register, CompilationCPU), RegRel.Offset);
return Error::success();
}
@@ -780,3 +876,12 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR,
P.format(" `{0}`", UN.Name);
return Error::success();
}
+
+Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR,
+ AnnotationSym &Annot) {
+ AutoIndent Indent(P, 7);
+ P.formatLine("addr = {0}", formatSegmentOffset(Annot.Segment, Annot.CodeOffset));
+ P.formatLine("strings = {0}", typesetStringList(P.getIndentLevel() + 9 + 2,
+ Annot.Strings));
+ return Error::success();
+}
diff --git a/tools/llvm-pdbutil/MinimalSymbolDumper.h b/tools/llvm-pdbutil/MinimalSymbolDumper.h
index 033e193cee6c..cdc75c1cfba0 100644
--- a/tools/llvm-pdbutil/MinimalSymbolDumper.h
+++ b/tools/llvm-pdbutil/MinimalSymbolDumper.h
@@ -1,9 +1,8 @@
//===- MinimalSymbolDumper.h ---------------------------------- *- C++ --*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/tools/llvm-pdbutil/MinimalTypeDumper.cpp b/tools/llvm-pdbutil/MinimalTypeDumper.cpp
index 3f10e8ab8a1e..3fdef085f19e 100644
--- a/tools/llvm-pdbutil/MinimalTypeDumper.cpp
+++ b/tools/llvm-pdbutil/MinimalTypeDumper.cpp
@@ -1,9 +1,8 @@
//===- MinimalTypeDumper.cpp ---------------------------------- *- C++ --*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -11,6 +10,7 @@
#include "FormatUtil.h"
#include "LinePrinter.h"
+#include "TypeReferenceTracker.h"
#include "llvm-pdbutil.h"
#include "llvm/DebugInfo/CodeView/CVRecord.h"
@@ -222,11 +222,10 @@ Error MinimalTypeDumpVisitor::visitTypeBegin(CVType &Record, TypeIndex Index) {
// formatLine puts the newline at the beginning, so we use formatLine here
// to start a new line, and then individual visit methods use format to
// append to the existing line.
- if (!Hashes) {
- P.formatLine("{0} | {1} [size = {2}]",
- fmt_align(Index, AlignStyle::Right, Width),
- formatTypeLeafKind(Record.Type), Record.length());
- } else {
+ P.formatLine("{0} | {1} [size = {2}",
+ fmt_align(Index, AlignStyle::Right, Width),
+ formatTypeLeafKind(Record.kind()), Record.length());
+ if (Hashes) {
std::string H;
if (Index.toArrayIndex() >= HashValues.size()) {
H = "(not present)";
@@ -242,13 +241,19 @@ Error MinimalTypeDumpVisitor::visitTypeBegin(CVType &Record, TypeIndex Index) {
else
H = "0x" + utohexstr(Hash) + ", our hash = 0x" + utohexstr(OurHash);
}
- P.formatLine("{0} | {1} [size = {2}, hash = {3}]",
- fmt_align(Index, AlignStyle::Right, Width),
- formatTypeLeafKind(Record.Type), Record.length(), H);
+ P.format(", hash = {0}", H);
}
+ if (RefTracker) {
+ if (RefTracker->isTypeReferenced(Index))
+ P.format(", referenced");
+ else
+ P.format(", unreferenced");
+ }
+ P.format("]");
P.Indent(Width + 3);
return Error::success();
}
+
Error MinimalTypeDumpVisitor::visitTypeEnd(CVType &Record) {
P.Unindent(Width + 3);
if (RecordBytes) {
diff --git a/tools/llvm-pdbutil/MinimalTypeDumper.h b/tools/llvm-pdbutil/MinimalTypeDumper.h
index 8f6bdc6110ae..6bc456d47ac4 100644
--- a/tools/llvm-pdbutil/MinimalTypeDumper.h
+++ b/tools/llvm-pdbutil/MinimalTypeDumper.h
@@ -1,9 +1,8 @@
//===- MinimalTypeDumper.h ------------------------------------ *- C++ --*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -21,17 +20,19 @@ class LazyRandomTypeCollection;
namespace pdb {
class LinePrinter;
class TpiStream;
+class TypeReferenceTracker;
class MinimalTypeDumpVisitor : public codeview::TypeVisitorCallbacks {
public:
MinimalTypeDumpVisitor(LinePrinter &P, uint32_t Width, bool RecordBytes,
bool Hashes, codeview::LazyRandomTypeCollection &Types,
+ TypeReferenceTracker *RefTracker,
uint32_t NumHashBuckets,
FixedStreamArray<support::ulittle32_t> HashValues,
pdb::TpiStream *Stream)
: P(P), Width(Width), RecordBytes(RecordBytes), Hashes(Hashes),
- Types(Types), NumHashBuckets(NumHashBuckets), HashValues(HashValues),
- Stream(Stream) {}
+ Types(Types), RefTracker(RefTracker), NumHashBuckets(NumHashBuckets),
+ HashValues(HashValues), Stream(Stream) {}
Error visitTypeBegin(codeview::CVType &Record,
codeview::TypeIndex Index) override;
@@ -57,6 +58,7 @@ private:
bool RecordBytes = false;
bool Hashes = false;
codeview::LazyRandomTypeCollection &Types;
+ pdb::TypeReferenceTracker *RefTracker = nullptr;
uint32_t NumHashBuckets;
codeview::TypeIndex CurrentTypeIndex;
FixedStreamArray<support::ulittle32_t> HashValues;
diff --git a/tools/llvm-pdbutil/OutputStyle.h b/tools/llvm-pdbutil/OutputStyle.h
index dfefc25a215e..40b0de8bdf72 100644
--- a/tools/llvm-pdbutil/OutputStyle.h
+++ b/tools/llvm-pdbutil/OutputStyle.h
@@ -1,9 +1,8 @@
//===- OutputStyle.h ------------------------------------------ *- C++ --*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/tools/llvm-pdbutil/PdbYaml.cpp b/tools/llvm-pdbutil/PdbYaml.cpp
index 3ea333608314..a26241967b5a 100644
--- a/tools/llvm-pdbutil/PdbYaml.cpp
+++ b/tools/llvm-pdbutil/PdbYaml.cpp
@@ -1,9 +1,8 @@
-//===- PdbYAML.cpp -------------------------------------------- *- C++ --*-===//
+//===-- PdbYaml.cpp ------------------------------------------- *- C++ --*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -51,6 +50,7 @@ template <> struct ScalarEnumerationTraits<llvm::pdb::PDB_Machine> {
io.enumCase(Value, "SH3DSP", PDB_Machine::SH3DSP);
io.enumCase(Value, "Thumb", PDB_Machine::Thumb);
io.enumCase(Value, "WceMipsV2", PDB_Machine::WceMipsV2);
+ io.enumCase(Value, "Arm64", PDB_Machine::Arm64);
}
};
diff --git a/tools/llvm-pdbutil/PdbYaml.h b/tools/llvm-pdbutil/PdbYaml.h
index 97ba87266cc6..ed6346c2c4db 100644
--- a/tools/llvm-pdbutil/PdbYaml.h
+++ b/tools/llvm-pdbutil/PdbYaml.h
@@ -1,9 +1,8 @@
//===- PdbYAML.h ---------------------------------------------- *- C++ --*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/tools/llvm-pdbutil/PrettyBuiltinDumper.cpp b/tools/llvm-pdbutil/PrettyBuiltinDumper.cpp
index bcdecca81aec..cd01a4004819 100644
--- a/tools/llvm-pdbutil/PrettyBuiltinDumper.cpp
+++ b/tools/llvm-pdbutil/PrettyBuiltinDumper.cpp
@@ -1,9 +1,8 @@
//===- PrettyBuiltinDumper.cpp ---------------------------------- *- C++ *-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/tools/llvm-pdbutil/PrettyBuiltinDumper.h b/tools/llvm-pdbutil/PrettyBuiltinDumper.h
index fb6b0b172e6e..3bdef34c48f8 100644
--- a/tools/llvm-pdbutil/PrettyBuiltinDumper.h
+++ b/tools/llvm-pdbutil/PrettyBuiltinDumper.h
@@ -1,9 +1,8 @@
//===- PrettyBuiltinDumper.h ---------------------------------- *- C++ --*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/tools/llvm-pdbutil/PrettyClassDefinitionDumper.cpp b/tools/llvm-pdbutil/PrettyClassDefinitionDumper.cpp
index f009f53a3932..b7eccac5988c 100644
--- a/tools/llvm-pdbutil/PrettyClassDefinitionDumper.cpp
+++ b/tools/llvm-pdbutil/PrettyClassDefinitionDumper.cpp
@@ -1,9 +1,8 @@
//===- PrettyClassDefinitionDumper.cpp --------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/tools/llvm-pdbutil/PrettyClassDefinitionDumper.h b/tools/llvm-pdbutil/PrettyClassDefinitionDumper.h
index 6569a1d304f6..f43c5c11bdfd 100644
--- a/tools/llvm-pdbutil/PrettyClassDefinitionDumper.h
+++ b/tools/llvm-pdbutil/PrettyClassDefinitionDumper.h
@@ -1,9 +1,8 @@
//===- PrettyClassDefinitionDumper.h ----------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/tools/llvm-pdbutil/PrettyClassLayoutGraphicalDumper.cpp b/tools/llvm-pdbutil/PrettyClassLayoutGraphicalDumper.cpp
index a572522c8cd7..a522935e34f1 100644
--- a/tools/llvm-pdbutil/PrettyClassLayoutGraphicalDumper.cpp
+++ b/tools/llvm-pdbutil/PrettyClassLayoutGraphicalDumper.cpp
@@ -1,9 +1,8 @@
//===- PrettyClassLayoutGraphicalDumper.h -----------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/tools/llvm-pdbutil/PrettyClassLayoutGraphicalDumper.h b/tools/llvm-pdbutil/PrettyClassLayoutGraphicalDumper.h
index f83f1a6c1b34..8f78b3b503d0 100644
--- a/tools/llvm-pdbutil/PrettyClassLayoutGraphicalDumper.h
+++ b/tools/llvm-pdbutil/PrettyClassLayoutGraphicalDumper.h
@@ -1,9 +1,8 @@
//===- PrettyClassLayoutGraphicalDumper.h -----------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/tools/llvm-pdbutil/PrettyCompilandDumper.cpp b/tools/llvm-pdbutil/PrettyCompilandDumper.cpp
index 94a0b2d5e780..cf769ff66472 100644
--- a/tools/llvm-pdbutil/PrettyCompilandDumper.cpp
+++ b/tools/llvm-pdbutil/PrettyCompilandDumper.cpp
@@ -1,9 +1,8 @@
//===- PrettyCompilandDumper.cpp - llvm-pdbutil compiland dumper -*- C++ *-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/tools/llvm-pdbutil/PrettyCompilandDumper.h b/tools/llvm-pdbutil/PrettyCompilandDumper.h
index 1a840e49607c..c83a58672d1a 100644
--- a/tools/llvm-pdbutil/PrettyCompilandDumper.h
+++ b/tools/llvm-pdbutil/PrettyCompilandDumper.h
@@ -1,9 +1,8 @@
//===- PrettyCompilandDumper.h - llvm-pdbutil compiland dumper -*- C++ --*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/tools/llvm-pdbutil/PrettyEnumDumper.cpp b/tools/llvm-pdbutil/PrettyEnumDumper.cpp
index f4cbd3f8fa14..9ed5893f252e 100644
--- a/tools/llvm-pdbutil/PrettyEnumDumper.cpp
+++ b/tools/llvm-pdbutil/PrettyEnumDumper.cpp
@@ -1,9 +1,8 @@
//===- PrettyEnumDumper.cpp -------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/tools/llvm-pdbutil/PrettyEnumDumper.h b/tools/llvm-pdbutil/PrettyEnumDumper.h
index c6e65a6d1772..e7c5c1aeb018 100644
--- a/tools/llvm-pdbutil/PrettyEnumDumper.h
+++ b/tools/llvm-pdbutil/PrettyEnumDumper.h
@@ -1,9 +1,8 @@
//===- PrettyEnumDumper.h ---------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/tools/llvm-pdbutil/PrettyExternalSymbolDumper.cpp b/tools/llvm-pdbutil/PrettyExternalSymbolDumper.cpp
index 1270223b1c78..fede031ec0c0 100644
--- a/tools/llvm-pdbutil/PrettyExternalSymbolDumper.cpp
+++ b/tools/llvm-pdbutil/PrettyExternalSymbolDumper.cpp
@@ -1,9 +1,8 @@
//===- PrettyExternalSymbolDumper.cpp -------------------------- *- C++ *-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/tools/llvm-pdbutil/PrettyExternalSymbolDumper.h b/tools/llvm-pdbutil/PrettyExternalSymbolDumper.h
index 6a009862ddd4..58fafe943315 100644
--- a/tools/llvm-pdbutil/PrettyExternalSymbolDumper.h
+++ b/tools/llvm-pdbutil/PrettyExternalSymbolDumper.h
@@ -1,9 +1,8 @@
//===- PrettyExternalSymbolDumper.h --------------------------- *- C++ --*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/tools/llvm-pdbutil/PrettyFunctionDumper.cpp b/tools/llvm-pdbutil/PrettyFunctionDumper.cpp
index 836ede41054e..b820ca333965 100644
--- a/tools/llvm-pdbutil/PrettyFunctionDumper.cpp
+++ b/tools/llvm-pdbutil/PrettyFunctionDumper.cpp
@@ -1,9 +1,8 @@
//===- PrettyFunctionDumper.cpp --------------------------------- *- C++ *-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -139,7 +138,8 @@ void FunctionDumper::start(const PDBSymbolFunc &Symbol, PointerType Pointer) {
if (Symbol.hasFramePointer()) {
WithColor(Printer, PDB_ColorItem::Register).get()
- << Symbol.getLocalBasePointerRegisterId();
+ << CPURegister{Symbol.getRawSymbol().getPlatform(),
+ Symbol.getLocalBasePointerRegisterId()};
} else {
WithColor(Printer, PDB_ColorItem::Register).get() << "FPO";
}
@@ -229,9 +229,9 @@ void FunctionDumper::dump(const PDBSymbolTypeFunctionArg &Symbol) {
uint32_t TypeId = Symbol.getTypeId();
auto Type = Symbol.getSession().getSymbolById(TypeId);
if (Type)
- Printer << "<unknown-type>";
- else
Type->dump(*this);
+ else
+ Printer << "<unknown-type>";
}
void FunctionDumper::dump(const PDBSymbolTypeTypedef &Symbol) {
diff --git a/tools/llvm-pdbutil/PrettyFunctionDumper.h b/tools/llvm-pdbutil/PrettyFunctionDumper.h
index 1a6f5430ec5a..df62604ac881 100644
--- a/tools/llvm-pdbutil/PrettyFunctionDumper.h
+++ b/tools/llvm-pdbutil/PrettyFunctionDumper.h
@@ -1,9 +1,8 @@
//===- PrettyFunctionDumper.h --------------------------------- *- C++ --*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/tools/llvm-pdbutil/PrettyTypeDumper.cpp b/tools/llvm-pdbutil/PrettyTypeDumper.cpp
index daf3cd45b327..e8f8e5aa62c9 100644
--- a/tools/llvm-pdbutil/PrettyTypeDumper.cpp
+++ b/tools/llvm-pdbutil/PrettyTypeDumper.cpp
@@ -1,9 +1,8 @@
//===- PrettyTypeDumper.cpp - PDBSymDumper type dumper *------------ C++ *-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/tools/llvm-pdbutil/PrettyTypeDumper.h b/tools/llvm-pdbutil/PrettyTypeDumper.h
index 36e586fea7e3..b6539d95bf31 100644
--- a/tools/llvm-pdbutil/PrettyTypeDumper.h
+++ b/tools/llvm-pdbutil/PrettyTypeDumper.h
@@ -1,9 +1,8 @@
//===- PrettyTypeDumper.h - PDBSymDumper implementation for types *- C++ *-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/tools/llvm-pdbutil/PrettyTypedefDumper.cpp b/tools/llvm-pdbutil/PrettyTypedefDumper.cpp
index 2b3f3691ed98..ef73a8cdf9c4 100644
--- a/tools/llvm-pdbutil/PrettyTypedefDumper.cpp
+++ b/tools/llvm-pdbutil/PrettyTypedefDumper.cpp
@@ -1,9 +1,8 @@
//===- PrettyTypedefDumper.cpp - PDBSymDumper impl for typedefs -- * C++ *-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/tools/llvm-pdbutil/PrettyTypedefDumper.h b/tools/llvm-pdbutil/PrettyTypedefDumper.h
index 133bbfb7db0e..ad8b3f37dcfd 100644
--- a/tools/llvm-pdbutil/PrettyTypedefDumper.h
+++ b/tools/llvm-pdbutil/PrettyTypedefDumper.h
@@ -1,9 +1,8 @@
//===- PrettyTypedefDumper.h - llvm-pdbutil typedef dumper ---*- C++ ----*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/tools/llvm-pdbutil/PrettyVariableDumper.cpp b/tools/llvm-pdbutil/PrettyVariableDumper.cpp
index ddac8cf0da4a..6dd7cc384cc9 100644
--- a/tools/llvm-pdbutil/PrettyVariableDumper.cpp
+++ b/tools/llvm-pdbutil/PrettyVariableDumper.cpp
@@ -1,9 +1,8 @@
//===- PrettyVariableDumper.cpp ---------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/tools/llvm-pdbutil/PrettyVariableDumper.h b/tools/llvm-pdbutil/PrettyVariableDumper.h
index cacf1ce9577b..65cf5cd2cf55 100644
--- a/tools/llvm-pdbutil/PrettyVariableDumper.h
+++ b/tools/llvm-pdbutil/PrettyVariableDumper.h
@@ -1,9 +1,8 @@
//===- PrettyVariableDumper.h - PDBSymDumper variable dumper ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/tools/llvm-pdbutil/StreamUtil.cpp b/tools/llvm-pdbutil/StreamUtil.cpp
index 367d947d25ee..7dfc2beefe78 100644
--- a/tools/llvm-pdbutil/StreamUtil.cpp
+++ b/tools/llvm-pdbutil/StreamUtil.cpp
@@ -1,9 +1,8 @@
//===- StreamUtil.cpp - PDB stream utilities --------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/tools/llvm-pdbutil/StreamUtil.h b/tools/llvm-pdbutil/StreamUtil.h
index 0e2e80707361..f810f7dc15b4 100644
--- a/tools/llvm-pdbutil/StreamUtil.h
+++ b/tools/llvm-pdbutil/StreamUtil.h
@@ -1,9 +1,8 @@
//===- Streamutil.h - PDB stream utilities ----------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/tools/llvm-pdbutil/TypeReferenceTracker.cpp b/tools/llvm-pdbutil/TypeReferenceTracker.cpp
new file mode 100644
index 000000000000..f184f02e01ee
--- /dev/null
+++ b/tools/llvm-pdbutil/TypeReferenceTracker.cpp
@@ -0,0 +1,160 @@
+//===- TypeReferenceTracker.cpp ------------------------------- *- C++ --*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "TypeReferenceTracker.h"
+
+#include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h"
+#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
+#include "llvm/DebugInfo/PDB/Native/TpiStream.h"
+#include "llvm/DebugInfo/PDB/Native/GlobalsStream.h"
+#include "llvm/DebugInfo/PDB/Native/SymbolStream.h"
+
+using namespace llvm;
+using namespace llvm::pdb;
+using namespace llvm::codeview;
+
+// LazyRandomTypeCollection doesn't appear to expose the number of records, so
+// just iterate up front to find out.
+static uint32_t getNumRecordsInCollection(LazyRandomTypeCollection &Types) {
+ uint32_t NumTypes = 0;
+ for (Optional<TypeIndex> TI = Types.getFirst(); TI; TI = Types.getNext(*TI))
+ ++NumTypes;
+ return NumTypes;
+}
+
+TypeReferenceTracker::TypeReferenceTracker(InputFile &File)
+ : File(File), Types(File.types()),
+ Ids(File.isPdb() ? &File.ids() : nullptr) {
+ NumTypeRecords = getNumRecordsInCollection(Types);
+ TypeReferenced.resize(NumTypeRecords, false);
+
+ // If this is a PDB, ids are stored separately, so make a separate bit vector.
+ if (Ids) {
+ NumIdRecords = getNumRecordsInCollection(*Ids);
+ IdReferenced.resize(NumIdRecords, false);
+ }
+
+ // Get the TpiStream pointer for forward decl resolution if this is a pdb.
+ // Build the hash map to enable resolving forward decls.
+ if (File.isPdb()) {
+ Tpi = &cantFail(File.pdb().getPDBTpiStream());
+ Tpi->buildHashMap();
+ }
+}
+
+void TypeReferenceTracker::mark() {
+ // Walk type roots:
+ // - globals
+ // - modi symbols
+ // - LF_UDT_MOD_SRC_LINE? VC always links these in.
+ for (SymbolGroup SG : File.symbol_groups()) {
+ if (File.isObj()) {
+ for (const auto &SS : SG.getDebugSubsections()) {
+ // FIXME: Are there other type-referencing subsections? Inlinees?
+ // Probably for IDs.
+ if (SS.kind() != DebugSubsectionKind::Symbols)
+ continue;
+
+ CVSymbolArray Symbols;
+ BinaryStreamReader Reader(SS.getRecordData());
+ cantFail(Reader.readArray(Symbols, Reader.getLength()));
+ for (const CVSymbol &S : Symbols)
+ addTypeRefsFromSymbol(S);
+ }
+ } else if (SG.hasDebugStream()) {
+ for (const CVSymbol &S : SG.getPdbModuleStream().getSymbolArray())
+ addTypeRefsFromSymbol(S);
+ }
+ }
+
+ // Walk globals and mark types referenced from globals.
+ if (File.isPdb() && File.pdb().hasPDBGlobalsStream()) {
+ SymbolStream &SymStream = cantFail(File.pdb().getPDBSymbolStream());
+ GlobalsStream &GS = cantFail(File.pdb().getPDBGlobalsStream());
+ for (uint32_t PubSymOff : GS.getGlobalsTable()) {
+ CVSymbol Sym = SymStream.readRecord(PubSymOff);
+ addTypeRefsFromSymbol(Sym);
+ }
+ }
+
+ // FIXME: Should we walk Ids?
+}
+
+void TypeReferenceTracker::addOneTypeRef(TiRefKind RefKind, TypeIndex RefTI) {
+ // If it's simple or already seen, no need to add to work list.
+ BitVector &TypeOrIdReferenced =
+ (Ids && RefKind == TiRefKind::IndexRef) ? IdReferenced : TypeReferenced;
+ if (RefTI.isSimple() || TypeOrIdReferenced.test(RefTI.toArrayIndex()))
+ return;
+
+ // Otherwise, mark it seen and add it to the work list.
+ TypeOrIdReferenced.set(RefTI.toArrayIndex());
+ RefWorklist.push_back({RefKind, RefTI});
+}
+
+void TypeReferenceTracker::addTypeRefsFromSymbol(const CVSymbol &Sym) {
+ SmallVector<TiReference, 4> DepList;
+ // FIXME: Check for failure.
+ discoverTypeIndicesInSymbol(Sym, DepList);
+ addReferencedTypes(Sym.content(), DepList);
+ markReferencedTypes();
+}
+
+void TypeReferenceTracker::addReferencedTypes(ArrayRef<uint8_t> RecData,
+ ArrayRef<TiReference> DepList) {
+ for (const auto &Ref : DepList) {
+ // FIXME: Report OOB slice instead of truncating.
+ ArrayRef<uint8_t> ByteSlice =
+ RecData.drop_front(Ref.Offset).take_front(4 * Ref.Count);
+ ArrayRef<TypeIndex> TIs(
+ reinterpret_cast<const TypeIndex *>(ByteSlice.data()),
+ ByteSlice.size() / 4);
+
+ // If this is a PDB and this is an item reference, track it in the IPI
+ // bitvector. Otherwise, it's a type ref, or there is only one stream.
+ for (TypeIndex RefTI : TIs)
+ addOneTypeRef(Ref.Kind, RefTI);
+ }
+}
+
+void TypeReferenceTracker::markReferencedTypes() {
+ while (!RefWorklist.empty()) {
+ TiRefKind RefKind;
+ TypeIndex RefTI;
+ std::tie(RefKind, RefTI) = RefWorklist.pop_back_val();
+ Optional<CVType> Rec = (Ids && RefKind == TiRefKind::IndexRef)
+ ? Ids->tryGetType(RefTI)
+ : Types.tryGetType(RefTI);
+ if (!Rec)
+ continue; // FIXME: Report a reference to a non-existant type.
+
+ SmallVector<TiReference, 4> DepList;
+ // FIXME: Check for failure.
+ discoverTypeIndices(*Rec, DepList);
+ addReferencedTypes(Rec->content(), DepList);
+
+ // If this is a tag kind and this is a PDB input, mark the complete type as
+ // referenced.
+ // FIXME: This limitation makes this feature somewhat useless on object file
+ // inputs.
+ if (Tpi) {
+ switch (Rec->kind()) {
+ default:
+ break;
+ case LF_CLASS:
+ case LF_INTERFACE:
+ case LF_STRUCTURE:
+ case LF_UNION:
+ case LF_ENUM:
+ addOneTypeRef(TiRefKind::TypeRef,
+ cantFail(Tpi->findFullDeclForForwardRef(RefTI)));
+ break;
+ }
+ }
+ }
+}
diff --git a/tools/llvm-pdbutil/TypeReferenceTracker.h b/tools/llvm-pdbutil/TypeReferenceTracker.h
new file mode 100644
index 000000000000..8861731ab6ee
--- /dev/null
+++ b/tools/llvm-pdbutil/TypeReferenceTracker.h
@@ -0,0 +1,69 @@
+//===- TypeReferenceTracker.h --------------------------------- *- C++ --*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVMPDBDUMP_TYPEREFERENCETRACKER_H
+#define LLVM_TOOLS_LLVMPDBDUMP_TYPEREFERENCETRACKER_H
+
+#include "InputFile.h"
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/DebugInfo/CodeView/CVRecord.h"
+#include "llvm/DebugInfo/CodeView/TypeIndex.h"
+#include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h"
+#include "llvm/Support/Error.h"
+
+namespace llvm {
+namespace pdb {
+
+class TpiStream;
+
+/// Maintains bitvector to track whether a type was referenced by a symbol
+/// record.
+class TypeReferenceTracker {
+public:
+ TypeReferenceTracker(InputFile &File);
+
+ // Do the work of marking referenced types.
+ void mark();
+
+ // Return true if a symbol record transitively references this type.
+ bool isTypeReferenced(codeview::TypeIndex TI) {
+ return TI.toArrayIndex() <= NumTypeRecords &&
+ TypeReferenced.test(TI.toArrayIndex());
+ }
+
+private:
+ void addTypeRefsFromSymbol(const codeview::CVSymbol &Sym);
+
+ // Mark types on this list as referenced.
+ void addReferencedTypes(ArrayRef<uint8_t> RecData,
+ ArrayRef<codeview::TiReference> Refs);
+
+ // Consume all types on the worklist.
+ void markReferencedTypes();
+
+ void addOneTypeRef(codeview::TiRefKind RefKind, codeview::TypeIndex RefTI);
+
+ InputFile &File;
+ codeview::LazyRandomTypeCollection &Types;
+ codeview::LazyRandomTypeCollection *Ids = nullptr;
+ TpiStream *Tpi = nullptr;
+ BitVector TypeReferenced;
+ BitVector IdReferenced;
+ SmallVector<std::pair<codeview::TiRefKind, codeview::TypeIndex>, 10>
+ RefWorklist;
+ uint32_t NumTypeRecords = 0;
+ uint32_t NumIdRecords = 0;
+};
+
+} // namespace pdb
+} // namespace llvm
+
+#endif // LLVM_TOOLS_LLVMPDBDUMP_TYPEREFERENCETRACKER_H
diff --git a/tools/llvm-pdbutil/YAMLOutputStyle.cpp b/tools/llvm-pdbutil/YAMLOutputStyle.cpp
index 62b5c428d410..80b76657facc 100644
--- a/tools/llvm-pdbutil/YAMLOutputStyle.cpp
+++ b/tools/llvm-pdbutil/YAMLOutputStyle.cpp
@@ -1,9 +1,8 @@
//===- YAMLOutputStyle.cpp ------------------------------------ *- C++ --*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -232,10 +231,7 @@ Error YAMLOutputStyle::dumpDbiStream() {
if (ModiStream == kInvalidStreamIndex)
continue;
- auto ModStreamData = msf::MappedBlockStream::createIndexedStream(
- File.getMsfLayout(), File.getMsfBuffer(), ModiStream,
- File.getAllocator());
-
+ auto ModStreamData = File.createIndexedStream(ModiStream);
pdb::ModuleDebugStreamRef ModS(MI, std::move(ModStreamData));
if (auto EC = ModS.reload())
return EC;
diff --git a/tools/llvm-pdbutil/YAMLOutputStyle.h b/tools/llvm-pdbutil/YAMLOutputStyle.h
index a5ad3355d2ab..7a50af1abe3f 100644
--- a/tools/llvm-pdbutil/YAMLOutputStyle.h
+++ b/tools/llvm-pdbutil/YAMLOutputStyle.h
@@ -1,9 +1,8 @@
//===- YAMLOutputStyle.h -------------------------------------- *- C++ --*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/tools/llvm-pdbutil/llvm-pdbutil.cpp b/tools/llvm-pdbutil/llvm-pdbutil.cpp
index 76f61a2a95a7..785a98086791 100644
--- a/tools/llvm-pdbutil/llvm-pdbutil.cpp
+++ b/tools/llvm-pdbutil/llvm-pdbutil.cpp
@@ -1,9 +1,8 @@
//===- llvm-pdbutil.cpp - Dump debug info from a PDB file -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -463,7 +462,10 @@ cl::opt<bool> DumpSymbolStats(
"sym-stats",
cl::desc("Dump a detailed breakdown of symbol usage/size for each module"),
cl::cat(MsfOptions), cl::sub(DumpSubcommand));
-
+cl::opt<bool> DumpTypeStats(
+ "type-stats",
+ cl::desc("Dump a detailed breakdown of type usage/size"),
+ cl::cat(MsfOptions), cl::sub(DumpSubcommand));
cl::opt<bool> DumpUdtStats(
"udt-stats",
cl::desc("Dump a detailed breakdown of S_UDT record usage / stats"),
@@ -477,6 +479,11 @@ cl::opt<bool> DumpTypeData(
"type-data",
cl::desc("dump CodeView type record raw bytes from TPI stream"),
cl::cat(TypeOptions), cl::sub(DumpSubcommand));
+cl::opt<bool>
+ DumpTypeRefStats("type-ref-stats",
+ cl::desc("dump statistics on the number and size of types "
+ "transitively referenced by symbol records"),
+ cl::cat(TypeOptions), cl::sub(DumpSubcommand));
cl::opt<bool> DumpTypeExtras("type-extras",
cl::desc("dump type hashes and index offsets"),
@@ -927,7 +934,7 @@ static std::string stringOr(std::string Str, std::string IfEmpty) {
static void dumpInjectedSources(LinePrinter &Printer, IPDBSession &Session) {
auto Sources = Session.getInjectedSources();
- if (0 == Sources->getChildCount()) {
+ if (!Sources || !Sources->getChildCount()) {
Printer.printLine("There are no injected sources.");
return;
}
@@ -940,9 +947,6 @@ static void dumpInjectedSources(LinePrinter &Printer, IPDBSession &Session) {
std::string VFName = stringOr(IS->getVirtualFileName(), "<null>");
uint32_t CRC = IS->getCrc32();
- std::string CompressionStr;
- llvm::raw_string_ostream Stream(CompressionStr);
- Stream << IS->getCompression();
WithColor(Printer, PDB_ColorItem::Path).get() << File;
Printer << " (";
WithColor(Printer, PDB_ColorItem::LiteralValue).get() << Size;
@@ -961,7 +965,9 @@ static void dumpInjectedSources(LinePrinter &Printer, IPDBSession &Session) {
Printer << ", ";
WithColor(Printer, PDB_ColorItem::Keyword).get() << "compression";
Printer << "=";
- WithColor(Printer, PDB_ColorItem::LiteralValue).get() << Stream.str();
+ dumpPDBSourceCompression(
+ WithColor(Printer, PDB_ColorItem::LiteralValue).get(),
+ IS->getCompression());
if (!opts::pretty::ShowInjectedSourceContent)
continue;
@@ -970,7 +976,12 @@ static void dumpInjectedSources(LinePrinter &Printer, IPDBSession &Session) {
int Indent = Printer.getIndentLevel();
Printer.Unindent(Indent);
- Printer.printLine(IS->getCode());
+ if (IS->getCompression() == PDB_SourceCompression::None)
+ Printer.printLine(IS->getCode());
+ else
+ Printer.formatBinary("Compressed data",
+ arrayRefFromStringRef(IS->getCode()),
+ /*StartOffset=*/0);
// Re-indent back to the original level.
Printer.Indent(Indent);
@@ -1272,12 +1283,7 @@ static void dumpPretty(StringRef Path) {
WithColor(Printer, PDB_ColorItem::SectionHeader).get()
<< "---INJECTED SOURCES---";
AutoIndent Indent1(Printer);
-
- if (ReaderType == PDB_ReaderType::Native)
- Printer.printLine(
- "Injected sources are not supported with the native reader.");
- else
- dumpInjectedSources(Printer, *Session);
+ dumpInjectedSources(Printer, *Session);
}
Printer.NewLine();
@@ -1377,8 +1383,7 @@ static void exportStream() {
<< "' (index " << Index << ") to file " << OutFileName << ".\n";
}
- SourceStream = MappedBlockStream::createIndexedStream(
- File.getMsfLayout(), File.getMsfBuffer(), Index, File.getAllocator());
+ SourceStream = File.createIndexedStream(Index);
auto OutFile = ExitOnErr(
FileOutputBuffer::create(OutFileName, SourceStream->getLength()));
FileBufferByteStream DestStream(std::move(OutFile), llvm::support::little);
diff --git a/tools/llvm-pdbutil/llvm-pdbutil.h b/tools/llvm-pdbutil/llvm-pdbutil.h
index a57cc51d7fd7..321f41bba7f1 100644
--- a/tools/llvm-pdbutil/llvm-pdbutil.h
+++ b/tools/llvm-pdbutil/llvm-pdbutil.h
@@ -1,9 +1,8 @@
//===- llvm-pdbutil.h ----------------------------------------- *- C++ --*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -141,6 +140,7 @@ extern llvm::cl::opt<bool> DumpSummary;
extern llvm::cl::opt<bool> DumpFpm;
extern llvm::cl::opt<bool> DumpStreams;
extern llvm::cl::opt<bool> DumpSymbolStats;
+extern llvm::cl::opt<bool> DumpTypeStats;
extern llvm::cl::opt<bool> DumpUdtStats;
extern llvm::cl::opt<bool> DumpStreamBlocks;
@@ -156,6 +156,7 @@ extern llvm::cl::opt<bool> DumpTypeData;
extern llvm::cl::opt<bool> DumpTypeExtras;
extern llvm::cl::list<uint32_t> DumpTypeIndex;
extern llvm::cl::opt<bool> DumpTypeDependents;
+extern llvm::cl::opt<bool> DumpTypeRefStats;
extern llvm::cl::opt<bool> DumpSectionHeaders;
extern llvm::cl::opt<bool> DumpIds;
diff --git a/tools/llvm-profdata/llvm-profdata.cpp b/tools/llvm-profdata/llvm-profdata.cpp
index c25cbc2b64df..16d3ebe3fcbc 100644
--- a/tools/llvm-profdata/llvm-profdata.cpp
+++ b/tools/llvm-profdata/llvm-profdata.cpp
@@ -1,9 +1,8 @@
//===- llvm-profdata.cpp - LLVM profile data tool -------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -27,8 +26,8 @@
#include "llvm/Support/InitLLVM.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
-#include "llvm/Support/WithColor.h"
#include "llvm/Support/ThreadPool.h"
+#include "llvm/Support/WithColor.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
@@ -201,6 +200,32 @@ static bool isFatalError(instrprof_error IPE) {
}
}
+/// Computer the overlap b/w profile BaseFilename and TestFileName,
+/// and store the program level result to Overlap.
+static void overlapInput(const std::string &BaseFilename,
+ const std::string &TestFilename, WriterContext *WC,
+ OverlapStats &Overlap,
+ const OverlapFuncFilters &FuncFilter,
+ raw_fd_ostream &OS, bool IsCS) {
+ auto ReaderOrErr = InstrProfReader::create(TestFilename);
+ if (Error E = ReaderOrErr.takeError()) {
+ // Skip the empty profiles by returning sliently.
+ instrprof_error IPE = InstrProfError::take(std::move(E));
+ if (IPE != instrprof_error::empty_raw_profile)
+ WC->Err = make_error<InstrProfError>(IPE);
+ return;
+ }
+
+ auto Reader = std::move(ReaderOrErr.get());
+ for (auto &I : *Reader) {
+ OverlapStats FuncOverlap(OverlapStats::FunctionLevel);
+ FuncOverlap.setFuncInfo(I.Name, I.Hash);
+
+ WC->Writer.overlapRecord(std::move(I), Overlap, FuncOverlap, FuncFilter);
+ FuncOverlap.dump(OS);
+ }
+}
+
/// Load an input into a writer context.
static void loadInput(const WeightedFile &Input, SymbolRemapper *Remapper,
WriterContext *WC) {
@@ -226,7 +251,8 @@ static void loadInput(const WeightedFile &Input, SymbolRemapper *Remapper,
auto Reader = std::move(ReaderOrErr.get());
bool IsIRProfile = Reader->isIRLevelProfile();
- if (WC->Writer.setIsIRLevelProfile(IsIRProfile)) {
+ bool HasCSIRProfile = Reader->hasCSIRLevelProfile();
+ if (WC->Writer.setIsIRLevelProfile(IsIRProfile, HasCSIRProfile)) {
WC->Err = make_error<StringError>(
"Merge IR generated profile with Clang generated profile.",
std::error_code());
@@ -291,11 +317,6 @@ static void mergeInstrProfile(const WeightedFileVector &Inputs,
OutputFormat != PF_Text)
exitWithError("Unknown format is specified.");
- std::error_code EC;
- raw_fd_ostream Output(OutputFilename.data(), EC, sys::fs::F_None);
- if (EC)
- exitWithErrorCode(EC, OutputFilename);
-
std::mutex ErrorLock;
SmallSet<instrprof_error, 4> WriterErrorCodes;
@@ -358,6 +379,11 @@ static void mergeInstrProfile(const WeightedFileVector &Inputs,
WC->ErrWhence);
}
+ std::error_code EC;
+ raw_fd_ostream Output(OutputFilename.data(), EC, sys::fs::F_None);
+ if (EC)
+ exitWithErrorCode(EC, OutputFilename);
+
InstrProfWriter &Writer = Contexts[0]->Writer;
if (OutputFormat == PF_Text) {
if (Error E = Writer.writeText(Output))
@@ -407,12 +433,6 @@ static void mergeSampleProfile(const WeightedFileVector &Inputs,
StringRef OutputFilename,
ProfileFormat OutputFormat) {
using namespace sampleprof;
- auto WriterOrErr =
- SampleProfileWriter::create(OutputFilename, FormatMap[OutputFormat]);
- if (std::error_code EC = WriterOrErr.getError())
- exitWithErrorCode(EC, OutputFilename);
-
- auto Writer = std::move(WriterOrErr.get());
StringMap<FunctionSamples> ProfileMap;
SmallVector<std::unique_ptr<sampleprof::SampleProfileReader>, 5> Readers;
LLVMContext Context;
@@ -447,6 +467,12 @@ static void mergeSampleProfile(const WeightedFileVector &Inputs,
}
}
}
+ auto WriterOrErr =
+ SampleProfileWriter::create(OutputFilename, FormatMap[OutputFormat]);
+ if (std::error_code EC = WriterOrErr.getError())
+ exitWithErrorCode(EC, OutputFilename);
+
+ auto Writer = std::move(WriterOrErr.get());
Writer->write(ProfileMap);
}
@@ -608,6 +634,65 @@ static int merge_main(int argc, const char *argv[]) {
return 0;
}
+/// Computer the overlap b/w profile BaseFilename and profile TestFilename.
+static void overlapInstrProfile(const std::string &BaseFilename,
+ const std::string &TestFilename,
+ const OverlapFuncFilters &FuncFilter,
+ raw_fd_ostream &OS, bool IsCS) {
+ std::mutex ErrorLock;
+ SmallSet<instrprof_error, 4> WriterErrorCodes;
+ WriterContext Context(false, ErrorLock, WriterErrorCodes);
+ WeightedFile WeightedInput{BaseFilename, 1};
+ OverlapStats Overlap;
+ Error E = Overlap.accumuateCounts(BaseFilename, TestFilename, IsCS);
+ if (E)
+ exitWithError(std::move(E), "Error in getting profile count sums");
+ if (Overlap.Base.CountSum < 1.0f) {
+ OS << "Sum of edge counts for profile " << BaseFilename << " is 0.\n";
+ exit(0);
+ }
+ if (Overlap.Test.CountSum < 1.0f) {
+ OS << "Sum of edge counts for profile " << TestFilename << " is 0.\n";
+ exit(0);
+ }
+ loadInput(WeightedInput, nullptr, &Context);
+ overlapInput(BaseFilename, TestFilename, &Context, Overlap, FuncFilter, OS,
+ IsCS);
+ Overlap.dump(OS);
+}
+
+static int overlap_main(int argc, const char *argv[]) {
+ cl::opt<std::string> BaseFilename(cl::Positional, cl::Required,
+ cl::desc("<base profile file>"));
+ cl::opt<std::string> TestFilename(cl::Positional, cl::Required,
+ cl::desc("<test profile file>"));
+ cl::opt<std::string> Output("output", cl::value_desc("output"), cl::init("-"),
+ cl::desc("Output file"));
+ cl::alias OutputA("o", cl::desc("Alias for --output"), cl::aliasopt(Output));
+ cl::opt<bool> IsCS("cs", cl::init(false),
+ cl::desc("For context sensitive counts"));
+ cl::opt<unsigned long long> ValueCutoff(
+ "value-cutoff", cl::init(-1),
+ cl::desc(
+ "Function level overlap information for every function in test "
+ "profile with max count value greater then the parameter value"));
+ cl::opt<std::string> FuncNameFilter(
+ "function",
+ cl::desc("Function level overlap information for matching functions"));
+ cl::ParseCommandLineOptions(argc, argv, "LLVM profile data overlap tool\n");
+
+ std::error_code EC;
+ raw_fd_ostream OS(Output.data(), EC, sys::fs::F_Text);
+ if (EC)
+ exitWithErrorCode(EC, Output);
+
+ overlapInstrProfile(BaseFilename, TestFilename,
+ OverlapFuncFilters{ValueCutoff, FuncNameFilter}, OS,
+ IsCS);
+
+ return 0;
+}
+
typedef struct ValueSitesStats {
ValueSitesStats()
: TotalNumValueSites(0), TotalNumValueSitesWithValueProfile(0),
@@ -643,7 +728,7 @@ static void traverseAllValueSites(const InstrProfRecord &Func, uint32_t VK,
for (uint32_t V = 0; V < NV; V++) {
OS << "\t[ " << format("%2u", I) << ", ";
if (Symtab == nullptr)
- OS << format("%4u", VD[V].Value);
+ OS << format("%4" PRIu64, VD[V].Value);
else
OS << Symtab->getFuncName(VD[V].Value);
OS << ", " << format("%10" PRId64, VD[V].Count) << " ] ("
@@ -670,9 +755,10 @@ static int showInstrProfile(const std::string &Filename, bool ShowCounts,
uint32_t TopN, bool ShowIndirectCallTargets,
bool ShowMemOPSizes, bool ShowDetailedSummary,
std::vector<uint32_t> DetailedSummaryCutoffs,
- bool ShowAllFunctions, uint64_t ValueCutoff,
- bool OnlyListBelow, const std::string &ShowFunction,
- bool TextFormat, raw_fd_ostream &OS) {
+ bool ShowAllFunctions, bool ShowCS,
+ uint64_t ValueCutoff, bool OnlyListBelow,
+ const std::string &ShowFunction, bool TextFormat,
+ raw_fd_ostream &OS) {
auto ReaderOrErr = InstrProfReader::create(Filename);
std::vector<uint32_t> Cutoffs = std::move(DetailedSummaryCutoffs);
if (ShowDetailedSummary && Cutoffs.empty()) {
@@ -709,6 +795,11 @@ static int showInstrProfile(const std::string &Filename, bool ShowCounts,
OS << ":ir\n";
for (const auto &Func : *Reader) {
+ if (Reader->isIRLevelProfile()) {
+ bool FuncIsCS = NamedInstrProfRecord::hasCSFlagInHash(Func.Hash);
+ if (FuncIsCS != ShowCS)
+ continue;
+ }
bool Show =
ShowAllFunctions || (!ShowFunction.empty() &&
Func.Name.find(ShowFunction) != Func.Name.npos);
@@ -900,6 +991,8 @@ static int show_main(int argc, const char *argv[]) {
cl::value_desc("800000,901000,999999"));
cl::opt<bool> ShowAllFunctions("all-functions", cl::init(false),
cl::desc("Details for every function"));
+ cl::opt<bool> ShowCS("showcs", cl::init(false),
+ cl::desc("Show context sensitive counts"));
cl::opt<std::string> ShowFunction("function",
cl::desc("Details for matching functions"));
@@ -927,6 +1020,12 @@ static int show_main(int argc, const char *argv[]) {
if (OutputFilename.empty())
OutputFilename = "-";
+ if (!Filename.compare(OutputFilename)) {
+ errs() << sys::path::filename(argv[0])
+ << ": Input file name cannot be the same as the output file name!\n";
+ return 1;
+ }
+
std::error_code EC;
raw_fd_ostream OS(OutputFilename.data(), EC, sys::fs::F_Text);
if (EC)
@@ -935,14 +1034,12 @@ static int show_main(int argc, const char *argv[]) {
if (ShowAllFunctions && !ShowFunction.empty())
WithColor::warning() << "-function argument ignored: showing all functions\n";
- std::vector<uint32_t> Cutoffs(DetailedSummaryCutoffs.begin(),
- DetailedSummaryCutoffs.end());
if (ProfileKind == instr)
return showInstrProfile(Filename, ShowCounts, TopNFunctions,
ShowIndirectCallTargets, ShowMemOPSizes,
ShowDetailedSummary, DetailedSummaryCutoffs,
- ShowAllFunctions, ValueCutoff, OnlyListBelow,
- ShowFunction, TextFormat, OS);
+ ShowAllFunctions, ShowCS, ValueCutoff,
+ OnlyListBelow, ShowFunction, TextFormat, OS);
else
return showSampleProfile(Filename, ShowCounts, ShowAllFunctions,
ShowFunction, OS);
@@ -959,6 +1056,8 @@ int main(int argc, const char *argv[]) {
func = merge_main;
else if (strcmp(argv[1], "show") == 0)
func = show_main;
+ else if (strcmp(argv[1], "overlap") == 0)
+ func = overlap_main;
if (func) {
std::string Invocation(ProgName.str() + " " + argv[1]);
@@ -973,7 +1072,7 @@ int main(int argc, const char *argv[]) {
<< "USAGE: " << ProgName << " <command> [args...]\n"
<< "USAGE: " << ProgName << " <command> -help\n\n"
<< "See each individual command --help for more details.\n"
- << "Available commands: merge, show\n";
+ << "Available commands: merge, show, overlap\n";
return 0;
}
}
@@ -983,6 +1082,6 @@ int main(int argc, const char *argv[]) {
else
errs() << ProgName << ": Unknown command!\n";
- errs() << "USAGE: " << ProgName << " <merge|show> [args...]\n";
+ errs() << "USAGE: " << ProgName << " <merge|show|overlap> [args...]\n";
return 1;
}
diff --git a/tools/llvm-readobj/ARMEHABIPrinter.h b/tools/llvm-readobj/ARMEHABIPrinter.h
index 51128f113c4c..11f9d6166a59 100644
--- a/tools/llvm-readobj/ARMEHABIPrinter.h
+++ b/tools/llvm-readobj/ARMEHABIPrinter.h
@@ -1,9 +1,8 @@
//===--- ARMEHABIPrinter.h - ARM EHABI Unwind Information Printer ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -366,6 +365,8 @@ template <typename ET>
ErrorOr<StringRef>
PrinterContext<ET>::FunctionAtAddress(unsigned Section,
uint64_t Address) const {
+ if (!Symtab)
+ return readobj_error::unknown_symbol;
auto StrTableOrErr = ELF->getStringTableForSymtab(*Symtab);
if (!StrTableOrErr)
error(StrTableOrErr.takeError());
@@ -551,13 +552,15 @@ void PrinterContext<ET>::PrintIndexTable(unsigned SectionIndex,
const Elf_Shdr *EHT =
FindExceptionTable(SectionIndex, Entry * IndexTableEntrySize + 4);
- if (auto Name = ELF->getSectionName(EHT))
- SW.printString("ExceptionHandlingTable", *Name);
+ if (EHT)
+ if (auto Name = ELF->getSectionName(EHT))
+ SW.printString("ExceptionHandlingTable", *Name);
uint64_t TableEntryOffset = PREL31(Word1, IT->sh_addr);
SW.printHex("TableEntryOffset", TableEntryOffset);
- PrintExceptionTable(IT, EHT, TableEntryOffset);
+ if (EHT)
+ PrintExceptionTable(IT, EHT, TableEntryOffset);
}
}
}
diff --git a/tools/llvm-readobj/ARMWinEHPrinter.cpp b/tools/llvm-readobj/ARMWinEHPrinter.cpp
index 4b823b816c35..4de14e2e78d5 100644
--- a/tools/llvm-readobj/ARMWinEHPrinter.cpp
+++ b/tools/llvm-readobj/ARMWinEHPrinter.cpp
@@ -1,9 +1,8 @@
//===-- ARMWinEHPrinter.cpp - Windows on ARM EH Data Printer ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -1095,17 +1094,17 @@ void Decoder::dumpProcedureData(const COFFObjectFile &COFF,
break;
}
-std::error_code Decoder::dumpProcedureData(const COFFObjectFile &COFF) {
+Error Decoder::dumpProcedureData(const COFFObjectFile &COFF) {
for (const auto &Section : COFF.sections()) {
- StringRef SectionName;
- if (std::error_code EC =
- COFF.getSectionName(COFF.getCOFFSection(Section), SectionName))
- return EC;
+ Expected<StringRef> NameOrErr =
+ COFF.getSectionName(COFF.getCOFFSection(Section));
+ if (!NameOrErr)
+ return NameOrErr.takeError();
- if (SectionName.startswith(".pdata"))
+ if (NameOrErr->startswith(".pdata"))
dumpProcedureData(COFF, Section);
}
- return std::error_code();
+ return Error::success();
}
}
}
diff --git a/tools/llvm-readobj/ARMWinEHPrinter.h b/tools/llvm-readobj/ARMWinEHPrinter.h
index e271a1e6fe77..5de7062cb1d7 100644
--- a/tools/llvm-readobj/ARMWinEHPrinter.h
+++ b/tools/llvm-readobj/ARMWinEHPrinter.h
@@ -1,9 +1,8 @@
//===--- ARMWinEHPrinter.h - Windows on ARM Unwind Information Printer ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -157,7 +156,7 @@ public:
Decoder(ScopedPrinter &SW, bool isAArch64) : SW(SW),
OS(SW.getOStream()),
isAArch64(isAArch64) {}
- std::error_code dumpProcedureData(const object::COFFObjectFile &COFF);
+ Error dumpProcedureData(const object::COFFObjectFile &COFF);
};
}
}
diff --git a/tools/llvm-readobj/COFFDumper.cpp b/tools/llvm-readobj/COFFDumper.cpp
index 3e2626dad118..4c2e39dfa3cc 100644
--- a/tools/llvm-readobj/COFFDumper.cpp
+++ b/tools/llvm-readobj/COFFDumper.cpp
@@ -1,9 +1,8 @@
//===-- COFFDumper.cpp - COFF-specific dumper -------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -44,13 +43,14 @@
#include "llvm/DebugInfo/CodeView/TypeTableCollection.h"
#include "llvm/Object/COFF.h"
#include "llvm/Object/ObjectFile.h"
+#include "llvm/Object/WindowsResource.h"
#include "llvm/Support/BinaryStreamReader.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ConvertUTF.h"
#include "llvm/Support/FormatVariadic.h"
-#include "llvm/Support/ScopedPrinter.h"
#include "llvm/Support/LEB128.h"
+#include "llvm/Support/ScopedPrinter.h"
#include "llvm/Support/Win64EH.h"
#include "llvm/Support/raw_ostream.h"
@@ -81,8 +81,6 @@ public:
void printFileHeaders() override;
void printSectionHeaders() override;
void printRelocations() override;
- void printSymbols() override;
- void printDynamicSymbols() override;
void printUnwindInfo() override;
void printNeededLibraries() override;
@@ -95,12 +93,16 @@ public:
void printCOFFResources() override;
void printCOFFLoadConfig() override;
void printCodeViewDebugInfo() override;
- void
- mergeCodeViewTypes(llvm::codeview::MergingTypeTableBuilder &CVIDs,
- llvm::codeview::MergingTypeTableBuilder &CVTypes) override;
+ void mergeCodeViewTypes(llvm::codeview::MergingTypeTableBuilder &CVIDs,
+ llvm::codeview::MergingTypeTableBuilder &CVTypes,
+ llvm::codeview::GlobalTypeTableBuilder &GlobalCVIDs,
+ llvm::codeview::GlobalTypeTableBuilder &GlobalCVTypes,
+ bool GHash) override;
void printStackMap() const override;
void printAddrsig() override;
private:
+ void printSymbols() override;
+ void printDynamicSymbols() override;
void printSymbol(const SymbolRef &Sym);
void printRelocation(const SectionRef &Section, const RelocationRef &Reloc,
uint64_t Bias = 0);
@@ -568,29 +570,6 @@ static const EnumEntry<uint8_t> FileChecksumKindNames[] = {
LLVM_READOBJ_ENUM_CLASS_ENT(FileChecksumKind, SHA256),
};
-static const EnumEntry<COFF::ResourceTypeID> ResourceTypeNames[]{
- {"kRT_CURSOR (ID 1)", COFF::RID_Cursor},
- {"kRT_BITMAP (ID 2)", COFF::RID_Bitmap},
- {"kRT_ICON (ID 3)", COFF::RID_Icon},
- {"kRT_MENU (ID 4)", COFF::RID_Menu},
- {"kRT_DIALOG (ID 5)", COFF::RID_Dialog},
- {"kRT_STRING (ID 6)", COFF::RID_String},
- {"kRT_FONTDIR (ID 7)", COFF::RID_FontDir},
- {"kRT_FONT (ID 8)", COFF::RID_Font},
- {"kRT_ACCELERATOR (ID 9)", COFF::RID_Accelerator},
- {"kRT_RCDATA (ID 10)", COFF::RID_RCData},
- {"kRT_MESSAGETABLE (ID 11)", COFF::RID_MessageTable},
- {"kRT_GROUP_CURSOR (ID 12)", COFF::RID_Group_Cursor},
- {"kRT_GROUP_ICON (ID 14)", COFF::RID_Group_Icon},
- {"kRT_VERSION (ID 16)", COFF::RID_Version},
- {"kRT_DLGINCLUDE (ID 17)", COFF::RID_DLGInclude},
- {"kRT_PLUGPLAY (ID 19)", COFF::RID_PlugPlay},
- {"kRT_VXD (ID 20)", COFF::RID_VXD},
- {"kRT_ANICURSOR (ID 21)", COFF::RID_AniCursor},
- {"kRT_ANIICON (ID 22)", COFF::RID_AniIcon},
- {"kRT_HTML (ID 23)", COFF::RID_HTML},
- {"kRT_MANIFEST (ID 24)", COFF::RID_Manifest}};
-
template <typename T>
static std::error_code getSymbolAuxData(const COFFObjectFile *Obj,
COFFSymbolRef Symbol,
@@ -613,11 +592,14 @@ void COFFDumper::cacheRelocations() {
RelocMap[Section].push_back(Reloc);
// Sort relocations by address.
- llvm::sort(RelocMap[Section], relocAddressLess);
+ llvm::sort(RelocMap[Section], [](RelocationRef L, RelocationRef R) {
+ return L.getOffset() < R.getOffset();
+ });
}
}
-void COFFDumper::printDataDirectory(uint32_t Index, const std::string &FieldName) {
+void COFFDumper::printDataDirectory(uint32_t Index,
+ const std::string &FieldName) {
const data_directory *Data;
if (Obj->getDataDirectory(Index, Data))
return;
@@ -951,8 +933,7 @@ void COFFDumper::initializeFileAndStringTables(BinaryStreamReader &Reader) {
void COFFDumper::printCodeViewSymbolSection(StringRef SectionName,
const SectionRef &Section) {
- StringRef SectionContents;
- error(Section.getContents(SectionContents));
+ StringRef SectionContents = unwrapOrError(Section.getContents());
StringRef Data = SectionContents;
SmallVector<StringRef, 10> FunctionNames;
@@ -980,6 +961,11 @@ void COFFDumper::printCodeViewSymbolSection(StringRef SectionName,
error(consume(Data, SubSectionSize));
ListScope S(W, "Subsection");
+ // Dump the subsection as normal even if the ignore bit is set.
+ if (SubType & SubsectionIgnoreFlag) {
+ W.printHex("IgnoredSubsectionKind", SubType);
+ SubType &= ~SubsectionIgnoreFlag;
+ }
W.printEnum("SubSectionType", SubType, makeArrayRef(SubSectionTypes));
W.printHex("SubSectionSize", SubSectionSize);
@@ -1228,13 +1214,15 @@ void COFFDumper::printFileNameForOffset(StringRef Label, uint32_t FileOffset) {
}
void COFFDumper::mergeCodeViewTypes(MergingTypeTableBuilder &CVIDs,
- MergingTypeTableBuilder &CVTypes) {
+ MergingTypeTableBuilder &CVTypes,
+ GlobalTypeTableBuilder &GlobalCVIDs,
+ GlobalTypeTableBuilder &GlobalCVTypes,
+ bool GHash) {
for (const SectionRef &S : Obj->sections()) {
StringRef SectionName;
error(S.getName(SectionName));
if (SectionName == ".debug$T") {
- StringRef Data;
- error(S.getContents(Data));
+ StringRef Data = unwrapOrError(S.getContents());
uint32_t Magic;
error(consume(Data, Magic));
if (Magic != 4)
@@ -1249,9 +1237,18 @@ void COFFDumper::mergeCodeViewTypes(MergingTypeTableBuilder &CVIDs,
}
SmallVector<TypeIndex, 128> SourceToDest;
Optional<uint32_t> PCHSignature;
- if (auto EC = mergeTypeAndIdRecords(CVIDs, CVTypes, SourceToDest, Types,
- PCHSignature))
- return error(std::move(EC));
+ if (GHash) {
+ std::vector<GloballyHashedType> Hashes =
+ GloballyHashedType::hashTypes(Types);
+ if (auto EC =
+ mergeTypeAndIdRecords(GlobalCVIDs, GlobalCVTypes, SourceToDest,
+ Types, Hashes, PCHSignature))
+ return error(std::move(EC));
+ } else {
+ if (auto EC = mergeTypeAndIdRecords(CVIDs, CVTypes, SourceToDest, Types,
+ PCHSignature))
+ return error(std::move(EC));
+ }
}
}
}
@@ -1261,8 +1258,7 @@ void COFFDumper::printCodeViewTypeSection(StringRef SectionName,
ListScope D(W, "CodeViewTypes");
W.printNumber("Section", SectionName, Obj->getSectionID(Section));
- StringRef Data;
- error(Section.getContents(Data));
+ StringRef Data = unwrapOrError(Section.getContents());
if (opts::CodeViewSubsectionBytes)
W.printBinaryBlock("Data", Data);
@@ -1322,9 +1318,7 @@ void COFFDumper::printSectionHeaders() {
if (opts::SectionData &&
!(Section->Characteristics & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA)) {
- StringRef Data;
- error(Sec.getContents(Data));
-
+ StringRef Data = unwrapOrError(Sec.getContents());
W.printBinaryBlock("SectionData", Data);
}
}
@@ -1398,15 +1392,11 @@ void COFFDumper::printSymbols() {
void COFFDumper::printDynamicSymbols() { ListScope Group(W, "DynamicSymbols"); }
-static ErrorOr<StringRef>
+static Expected<StringRef>
getSectionName(const llvm::object::COFFObjectFile *Obj, int32_t SectionNumber,
const coff_section *Section) {
- if (Section) {
- StringRef SectionName;
- if (std::error_code EC = Obj->getSectionName(Section, SectionName))
- return EC;
- return SectionName;
- }
+ if (Section)
+ return Obj->getSectionName(Section);
if (SectionNumber == llvm::COFF::IMAGE_SYM_DEBUG)
return StringRef("IMAGE_SYM_DEBUG");
if (SectionNumber == llvm::COFF::IMAGE_SYM_ABSOLUTE)
@@ -1431,11 +1421,10 @@ void COFFDumper::printSymbol(const SymbolRef &Sym) {
if (Obj->getSymbolName(Symbol, SymbolName))
SymbolName = "";
- StringRef SectionName = "";
- ErrorOr<StringRef> Res =
- getSectionName(Obj, Symbol.getSectionNumber(), Section);
- if (Res)
- SectionName = *Res;
+ StringRef SectionName;
+ if (Expected<StringRef> NameOrErr =
+ getSectionName(Obj, Symbol.getSectionNumber(), Section))
+ SectionName = *NameOrErr;
W.printString("Name", SymbolName);
W.printNumber("Value", Symbol.getValue());
@@ -1503,16 +1492,12 @@ void COFFDumper::printSymbol(const SymbolRef &Sym) {
&& Aux->Selection == COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE) {
const coff_section *Assoc;
StringRef AssocName = "";
- std::error_code EC = Obj->getSection(AuxNumber, Assoc);
- ErrorOr<StringRef> Res = getSectionName(Obj, AuxNumber, Assoc);
- if (Res)
- AssocName = *Res;
- if (!EC)
- EC = Res.getError();
- if (EC) {
- AssocName = "";
+ if (std::error_code EC = Obj->getSection(AuxNumber, Assoc))
error(EC);
- }
+ Expected<StringRef> Res = getSectionName(Obj, AuxNumber, Assoc);
+ if (!Res)
+ error(Res.takeError());
+ AssocName = *Res;
W.printNumber("AssocSection", AssocName, AuxNumber);
}
@@ -1559,7 +1544,8 @@ void COFFDumper::printUnwindInfo() {
case COFF::IMAGE_FILE_MACHINE_ARMNT: {
ARM::WinEH::Decoder Decoder(W, Obj->getMachine() ==
COFF::IMAGE_FILE_MACHINE_ARM64);
- Decoder.dumpProcedureData(*Obj);
+ // TODO Propagate the error.
+ consumeError(Decoder.dumpProcedureData(*Obj));
break;
}
default:
@@ -1581,10 +1567,10 @@ void COFFDumper::printNeededLibraries() {
Libs.push_back(Name);
}
- std::stable_sort(Libs.begin(), Libs.end());
+ llvm::stable_sort(Libs);
for (const auto &L : Libs) {
- outs() << " " << L << "\n";
+ W.startLine() << L << "\n";
}
}
@@ -1674,15 +1660,13 @@ void COFFDumper::printCOFFExports() {
void COFFDumper::printCOFFDirectives() {
for (const SectionRef &Section : Obj->sections()) {
- StringRef Contents;
StringRef Name;
error(Section.getName(Name));
if (Name != ".drectve")
continue;
- error(Section.getContents(Contents));
-
+ StringRef Contents = unwrapOrError(Section.getContents());
W.printString("Directive(s)", Contents);
}
}
@@ -1721,8 +1705,7 @@ void COFFDumper::printCOFFResources() {
if (!Name.startswith(".rsrc"))
continue;
- StringRef Ref;
- error(S.getContents(Ref));
+ StringRef Ref = unwrapOrError(S.getContents());
if ((Name == ".rsrc") || (Name == ".rsrc$01")) {
ResourceSectionRef RSF(Ref);
@@ -1777,7 +1760,8 @@ void COFFDumper::printResourceDirectoryTable(
SmallString<20> IDStr;
raw_svector_ostream OS(IDStr);
if (i < Table.NumberOfNameEntries) {
- ArrayRef<UTF16> RawEntryNameString = unwrapOrError(RSF.getEntryNameString(Entry));
+ ArrayRef<UTF16> RawEntryNameString =
+ unwrapOrError(RSF.getEntryNameString(Entry));
std::vector<UTF16> EndianCorrectedNameString;
if (llvm::sys::IsBigEndianHost) {
EndianCorrectedNameString.resize(RawEntryNameString.size() + 1);
@@ -1793,9 +1777,8 @@ void COFFDumper::printResourceDirectoryTable(
OS << EntryNameString;
} else {
if (Level == "Type") {
- ScopedPrinter Printer(OS);
- Printer.printEnum("", Entry.Identifier.ID,
- makeArrayRef(ResourceTypeNames));
+ OS << ": ";
+ printResourceTypeName(Entry.Identifier.ID, OS);
IDStr = IDStr.slice(0, IDStr.find_first_of(")", 0) + 1);
} else {
OS << ": (ID " << Entry.Identifier.ID << ")";
@@ -1848,18 +1831,16 @@ void COFFDumper::printStackMap() const {
if (StackMapSection == object::SectionRef())
return;
- StringRef StackMapContents;
- StackMapSection.getContents(StackMapContents);
- ArrayRef<uint8_t> StackMapContentsArray(
- reinterpret_cast<const uint8_t*>(StackMapContents.data()),
- StackMapContents.size());
+ StringRef StackMapContents = unwrapOrError(StackMapSection.getContents());
+ ArrayRef<uint8_t> StackMapContentsArray =
+ arrayRefFromStringRef(StackMapContents);
if (Obj->isLittleEndian())
prettyPrintStackMap(
- W, StackMapV2Parser<support::little>(StackMapContentsArray));
+ W, StackMapParser<support::little>(StackMapContentsArray));
else
- prettyPrintStackMap(W,
- StackMapV2Parser<support::big>(StackMapContentsArray));
+ prettyPrintStackMap(
+ W, StackMapParser<support::big>(StackMapContentsArray));
}
void COFFDumper::printAddrsig() {
@@ -1876,15 +1857,13 @@ void COFFDumper::printAddrsig() {
if (AddrsigSection == object::SectionRef())
return;
- StringRef AddrsigContents;
- AddrsigSection.getContents(AddrsigContents);
- ArrayRef<uint8_t> AddrsigContentsArray(
- reinterpret_cast<const uint8_t*>(AddrsigContents.data()),
- AddrsigContents.size());
+ StringRef AddrsigContents = unwrapOrError(AddrsigSection.getContents());
+ ArrayRef<uint8_t> AddrsigContentsArray(AddrsigContents.bytes_begin(),
+ AddrsigContents.size());
ListScope L(W, "Addrsig");
- auto *Cur = reinterpret_cast<const uint8_t *>(AddrsigContents.begin());
- auto *End = reinterpret_cast<const uint8_t *>(AddrsigContents.end());
+ const uint8_t *Cur = AddrsigContents.bytes_begin();
+ const uint8_t *End = AddrsigContents.bytes_end();
while (Cur != End) {
unsigned Size;
const char *Err;
@@ -1905,16 +1884,10 @@ void COFFDumper::printAddrsig() {
}
}
-void llvm::dumpCodeViewMergedTypes(
- ScopedPrinter &Writer, llvm::codeview::MergingTypeTableBuilder &IDTable,
- llvm::codeview::MergingTypeTableBuilder &CVTypes) {
- // Flatten it first, then run our dumper on it.
- SmallString<0> TypeBuf;
- CVTypes.ForEachRecord([&](TypeIndex TI, const CVType &Record) {
- TypeBuf.append(Record.RecordData.begin(), Record.RecordData.end());
- });
-
- TypeTableCollection TpiTypes(CVTypes.records());
+void llvm::dumpCodeViewMergedTypes(ScopedPrinter &Writer,
+ ArrayRef<ArrayRef<uint8_t>> IpiRecords,
+ ArrayRef<ArrayRef<uint8_t>> TpiRecords) {
+ TypeTableCollection TpiTypes(TpiRecords);
{
ListScope S(Writer, "MergedTypeStream");
TypeDumpVisitor TDV(TpiTypes, &Writer, opts::CodeViewSubsectionBytes);
@@ -1924,7 +1897,7 @@ void llvm::dumpCodeViewMergedTypes(
// Flatten the id stream and print it next. The ID stream refers to names from
// the type stream.
- TypeTableCollection IpiTypes(IDTable.records());
+ TypeTableCollection IpiTypes(IpiRecords);
{
ListScope S(Writer, "MergedIDStream");
TypeDumpVisitor TDV(TpiTypes, &Writer, opts::CodeViewSubsectionBytes);
diff --git a/tools/llvm-readobj/COFFImportDumper.cpp b/tools/llvm-readobj/COFFImportDumper.cpp
index 18010c34f0f3..c9d5e82263db 100644
--- a/tools/llvm-readobj/COFFImportDumper.cpp
+++ b/tools/llvm-readobj/COFFImportDumper.cpp
@@ -1,9 +1,8 @@
//===-- COFFImportDumper.cpp - COFF import library dumper -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -51,7 +50,7 @@ void dumpCOFFImportFile(const COFFImportFile *File, ScopedPrinter &Writer) {
for (const object::BasicSymbolRef &Sym : File->symbols()) {
raw_ostream &OS = Writer.startLine();
OS << "Symbol: ";
- Sym.printName(OS);
+ cantFail(Sym.printName(OS));
OS << "\n";
}
}
diff --git a/tools/llvm-readobj/DwarfCFIEHPrinter.h b/tools/llvm-readobj/DwarfCFIEHPrinter.h
index d91d764c4d0a..7055510ef2f2 100644
--- a/tools/llvm-readobj/DwarfCFIEHPrinter.h
+++ b/tools/llvm-readobj/DwarfCFIEHPrinter.h
@@ -1,9 +1,8 @@
//===--- DwarfCFIEHPrinter.h - DWARF-based Unwind Information Printer -----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/tools/llvm-readobj/ELFDumper.cpp b/tools/llvm-readobj/ELFDumper.cpp
index 93254717e921..4e1cb7d544e7 100644
--- a/tools/llvm-readobj/ELFDumper.cpp
+++ b/tools/llvm-readobj/ELFDumper.cpp
@@ -1,9 +1,8 @@
//===- ELFDumper.cpp - ELF-specific dumper --------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -20,6 +19,7 @@
#include "llvm-readobj.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/STLExtras.h"
@@ -30,6 +30,7 @@
#include "llvm/ADT/Twine.h"
#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/Demangle/Demangle.h"
#include "llvm/Object/ELF.h"
#include "llvm/Object/ELFObjectFile.h"
#include "llvm/Object/ELFTypes.h"
@@ -66,13 +67,14 @@ using namespace llvm;
using namespace llvm::object;
using namespace ELF;
-#define LLVM_READOBJ_ENUM_CASE(ns, enum) \
- case ns::enum: return #enum;
+#define LLVM_READOBJ_ENUM_CASE(ns, enum) \
+ case ns::enum: \
+ return #enum;
-#define ENUM_ENT(enum, altName) \
+#define ENUM_ENT(enum, altName) \
{ #enum, altName, ELF::enum }
-#define ENUM_ENT_1(enum) \
+#define ENUM_ENT_1(enum) \
{ #enum, #enum, ELF::enum }
#define LLVM_READOBJ_PHDR_ENUM(ns, enum) \
@@ -132,14 +134,17 @@ struct DynRegionInfo {
const Type *Start = reinterpret_cast<const Type *>(Addr);
if (!Start)
return {Start, Start};
- if (EntSize != sizeof(Type) || Size % EntSize)
- reportError("Invalid entity size");
+ if (EntSize != sizeof(Type) || Size % EntSize) {
+ // TODO: Add a section index to this warning.
+ reportWarning("invalid section size (" + Twine(Size) +
+ ") or entity size (" + Twine(EntSize) + ")");
+ return {Start, Start};
+ }
return {Start, Start + (Size / EntSize)};
}
};
-template<typename ELFT>
-class ELFDumper : public ObjDumper {
+template <typename ELFT> class ELFDumper : public ObjDumper {
public:
ELFDumper(const object::ELFObjectFile<ELFT> *ObjF, ScopedPrinter &Writer);
@@ -147,13 +152,14 @@ public:
void printSectionHeaders() override;
void printRelocations() override;
void printDynamicRelocations() override;
- void printSymbols() override;
- void printDynamicSymbols() override;
+ void printSymbols(bool PrintSymbols, bool PrintDynamicSymbols) override;
+ void printHashSymbols() override;
void printUnwindInfo() override;
void printDynamicTable() override;
void printNeededLibraries() override;
- void printProgramHeaders() override;
+ void printProgramHeaders(bool PrintProgramHeaders,
+ cl::boolOrDefault PrintSectionMapping) override;
void printHashTable() override;
void printGnuHashTable() override;
void printLoadName() override;
@@ -177,6 +183,8 @@ public:
void printELFLinkerOptions() override;
+ const object::ELFObjectFile<ELFT> *getElfObject() const { return ObjF; };
+
private:
std::unique_ptr<DumpStyle<ELFT>> ELFDumperStyle;
@@ -185,24 +193,25 @@ private:
DynRegionInfo checkDRI(DynRegionInfo DRI) {
const ELFFile<ELFT> *Obj = ObjF->getELFFile();
if (DRI.Addr < Obj->base() ||
- (const uint8_t *)DRI.Addr + DRI.Size > Obj->base() + Obj->getBufSize())
+ reinterpret_cast<const uint8_t *>(DRI.Addr) + DRI.Size >
+ Obj->base() + Obj->getBufSize())
error(llvm::object::object_error::parse_failed);
return DRI;
}
DynRegionInfo createDRIFrom(const Elf_Phdr *P, uintX_t EntSize) {
- return checkDRI({ObjF->getELFFile()->base() + P->p_offset, P->p_filesz, EntSize});
+ return checkDRI(
+ {ObjF->getELFFile()->base() + P->p_offset, P->p_filesz, EntSize});
}
DynRegionInfo createDRIFrom(const Elf_Shdr *S) {
- return checkDRI({ObjF->getELFFile()->base() + S->sh_offset, S->sh_size, S->sh_entsize});
+ return checkDRI(
+ {ObjF->getELFFile()->base() + S->sh_offset, S->sh_size, S->sh_entsize});
}
- void parseDynamicTable(ArrayRef<const Elf_Phdr *> LoadSegments);
+ void loadDynamicTable(const ELFFile<ELFT> *Obj);
+ void parseDynamicTable();
- void printValue(uint64_t Type, uint64_t Value);
-
- StringRef getDynamicString(uint64_t Offset) const;
StringRef getSymbolVersion(StringRef StrTab, const Elf_Sym *symb,
bool &IsDefault) const;
void LoadVersionMap() const;
@@ -217,7 +226,7 @@ private:
DynRegionInfo DynSymRegion;
DynRegionInfo DynamicTable;
StringRef DynamicStringTable;
- StringRef SOName;
+ StringRef SOName = "<Not found>";
const Elf_Hash *HashTable = nullptr;
const Elf_GnuHash *GnuHashTable = nullptr;
const Elf_Shdr *DotSymtabSec = nullptr;
@@ -226,9 +235,9 @@ private:
StringRef DynSymtabName;
ArrayRef<Elf_Word> ShndxTable;
- const Elf_Shdr *dot_gnu_version_sec = nullptr; // .gnu.version
- const Elf_Shdr *dot_gnu_version_r_sec = nullptr; // .gnu.version_r
- const Elf_Shdr *dot_gnu_version_d_sec = nullptr; // .gnu.version_d
+ const Elf_Shdr *SymbolVersionSection = nullptr; // .gnu.version
+ const Elf_Shdr *SymbolVersionNeedSection = nullptr; // .gnu.version_r
+ const Elf_Shdr *SymbolVersionDefSection = nullptr; // .gnu.version_d
// Records for each version index the corresponding Verdef or Vernaux entry.
// This is filled the first time LoadVersionMap() is called.
@@ -256,7 +265,18 @@ private:
public:
Elf_Dyn_Range dynamic_table() const {
- return DynamicTable.getAsArrayRef<Elf_Dyn>();
+ // A valid .dynamic section contains an array of entries terminated
+ // with a DT_NULL entry. However, sometimes the section content may
+ // continue past the DT_NULL entry, so to dump the section correctly,
+ // we first find the end of the entries by iterating over them.
+ Elf_Dyn_Range Table = DynamicTable.getAsArrayRef<Elf_Dyn>();
+
+ size_t Size = 0;
+ while (Size < Table.size())
+ if (Table[Size++].getTag() == DT_NULL)
+ break;
+
+ return Table.slice(0, Size);
}
Elf_Sym_Range dynamic_symbols() const {
@@ -271,9 +291,14 @@ public:
void getSectionNameIndex(const Elf_Sym *Symbol, const Elf_Sym *FirstSym,
StringRef &SectionName,
unsigned &SectionIndex) const;
- StringRef getStaticSymbolName(uint32_t Index) const;
+ std::string getStaticSymbolName(uint32_t Index) const;
+ StringRef getSymbolVersionByIndex(StringRef StrTab,
+ uint32_t VersionSymbolIndex,
+ bool &IsDefault) const;
void printSymbolsHelper(bool IsDynamic) const;
+ void printDynamicEntry(raw_ostream &OS, uint64_t Type, uint64_t Value) const;
+
const Elf_Shdr *getDotSymtabSec() const { return DotSymtabSec; }
const Elf_Shdr *getDotCGProfileSec() const { return DotCGProfileSec; }
const Elf_Shdr *getDotAddrsigSec() const { return DotAddrsigSec; }
@@ -283,6 +308,7 @@ public:
const DynRegionInfo &getDynRelaRegion() const { return DynRelaRegion; }
const DynRegionInfo &getDynRelrRegion() const { return DynRelrRegion; }
const DynRegionInfo &getDynPLTRelRegion() const { return DynPLTRelRegion; }
+ const DynRegionInfo &getDynamicTableRegion() const { return DynamicTable; }
const Elf_Hash *getHashTable() const { return HashTable; }
const Elf_GnuHash *getGnuHashTable() const { return GnuHashTable; }
};
@@ -328,15 +354,25 @@ public:
virtual void printGroupSections(const ELFFile<ELFT> *Obj) = 0;
virtual void printRelocations(const ELFFile<ELFT> *Obj) = 0;
virtual void printSectionHeaders(const ELFFile<ELFT> *Obj) = 0;
- virtual void printSymbols(const ELFFile<ELFT> *Obj) = 0;
- virtual void printDynamicSymbols(const ELFFile<ELFT> *Obj) = 0;
+ virtual void printSymbols(const ELFFile<ELFT> *Obj, bool PrintSymbols,
+ bool PrintDynamicSymbols) = 0;
+ virtual void printHashSymbols(const ELFFile<ELFT> *Obj) {}
+ virtual void printDynamic(const ELFFile<ELFT> *Obj) {}
virtual void printDynamicRelocations(const ELFFile<ELFT> *Obj) = 0;
- virtual void printSymtabMessage(const ELFFile<ELFT> *obj, StringRef Name,
+ virtual void printSymtabMessage(const ELFFile<ELFT> *Obj, StringRef Name,
size_t Offset) {}
virtual void printSymbol(const ELFFile<ELFT> *Obj, const Elf_Sym *Symbol,
const Elf_Sym *FirstSym, StringRef StrTable,
bool IsDynamic) = 0;
- virtual void printProgramHeaders(const ELFFile<ELFT> *Obj) = 0;
+ virtual void printProgramHeaders(const ELFFile<ELFT> *Obj,
+ bool PrintProgramHeaders,
+ cl::boolOrDefault PrintSectionMapping) = 0;
+ virtual void printVersionSymbolSection(const ELFFile<ELFT> *Obj,
+ const Elf_Shdr *Sec) = 0;
+ virtual void printVersionDefinitionSection(const ELFFile<ELFT> *Obj,
+ const Elf_Shdr *Sec) = 0;
+ virtual void printVersionDependencySection(const ELFFile<ELFT> *Obj,
+ const Elf_Shdr *Sec) = 0;
virtual void printHashHistogram(const ELFFile<ELFT> *Obj) = 0;
virtual void printCGProfile(const ELFFile<ELFT> *Obj) = 0;
virtual void printAddrsig(const ELFFile<ELFT> *Obj) = 0;
@@ -351,24 +387,36 @@ private:
};
template <typename ELFT> class GNUStyle : public DumpStyle<ELFT> {
- formatted_raw_ostream OS;
+ formatted_raw_ostream &OS;
public:
TYPEDEF_ELF_TYPES(ELFT)
GNUStyle(ScopedPrinter &W, ELFDumper<ELFT> *Dumper)
- : DumpStyle<ELFT>(Dumper), OS(W.getOStream()) {}
+ : DumpStyle<ELFT>(Dumper),
+ OS(static_cast<formatted_raw_ostream&>(W.getOStream())) {
+ assert (&W.getOStream() == &llvm::fouts());
+ }
void printFileHeaders(const ELFO *Obj) override;
void printGroupSections(const ELFFile<ELFT> *Obj) override;
void printRelocations(const ELFO *Obj) override;
void printSectionHeaders(const ELFO *Obj) override;
- void printSymbols(const ELFO *Obj) override;
- void printDynamicSymbols(const ELFO *Obj) override;
+ void printSymbols(const ELFO *Obj, bool PrintSymbols,
+ bool PrintDynamicSymbols) override;
+ void printHashSymbols(const ELFO *Obj) override;
+ void printDynamic(const ELFFile<ELFT> *Obj) override;
void printDynamicRelocations(const ELFO *Obj) override;
void printSymtabMessage(const ELFO *Obj, StringRef Name,
size_t Offset) override;
- void printProgramHeaders(const ELFO *Obj) override;
+ void printProgramHeaders(const ELFO *Obj, bool PrintProgramHeaders,
+ cl::boolOrDefault PrintSectionMapping) override;
+ void printVersionSymbolSection(const ELFFile<ELFT> *Obj,
+ const Elf_Shdr *Sec) override;
+ void printVersionDefinitionSection(const ELFFile<ELFT> *Obj,
+ const Elf_Shdr *Sec) override;
+ void printVersionDependencySection(const ELFFile<ELFT> *Obj,
+ const Elf_Shdr *Sec) override;
void printHashHistogram(const ELFFile<ELFT> *Obj) override;
void printCGProfile(const ELFFile<ELFT> *Obj) override;
void printAddrsig(const ELFFile<ELFT> *Obj) override;
@@ -379,11 +427,11 @@ public:
private:
struct Field {
- StringRef Str;
+ std::string Str;
unsigned Column;
Field(StringRef S, unsigned Col) : Str(S), Column(Col) {}
- Field(unsigned Col) : Str(""), Column(Col) {}
+ Field(unsigned Col) : Column(Col) {}
};
template <typename T, typename TEnum>
@@ -433,6 +481,8 @@ private:
void printRelocHeader(unsigned SType);
void printRelocation(const ELFO *Obj, const Elf_Shdr *SymTab,
const Elf_Rela &R, bool IsRela);
+ void printRelocation(const ELFO *Obj, const Elf_Sym *Sym,
+ StringRef SymbolName, const Elf_Rela &R, bool IsRela);
void printSymbol(const ELFO *Obj, const Elf_Sym *Symbol, const Elf_Sym *First,
StringRef StrTable, bool IsDynamic) override;
std::string getSymbolSectionNdx(const ELFO *Obj, const Elf_Sym *Symbol,
@@ -442,6 +492,8 @@ private:
bool checkoffsets(const Elf_Phdr &Phdr, const Elf_Shdr &Sec);
bool checkVMA(const Elf_Phdr &Phdr, const Elf_Shdr &Sec);
bool checkPTDynamic(const Elf_Phdr &Phdr, const Elf_Shdr &Sec);
+ void printProgramHeaders(const ELFO *Obj);
+ void printSectionMapping(const ELFO *Obj);
};
template <typename ELFT> class LLVMStyle : public DumpStyle<ELFT> {
@@ -456,10 +508,18 @@ public:
void printRelocations(const ELFO *Obj) override;
void printRelocations(const Elf_Shdr *Sec, const ELFO *Obj);
void printSectionHeaders(const ELFO *Obj) override;
- void printSymbols(const ELFO *Obj) override;
- void printDynamicSymbols(const ELFO *Obj) override;
+ void printSymbols(const ELFO *Obj, bool PrintSymbols,
+ bool PrintDynamicSymbols) override;
+ void printDynamic(const ELFFile<ELFT> *Obj) override;
void printDynamicRelocations(const ELFO *Obj) override;
- void printProgramHeaders(const ELFO *Obj) override;
+ void printProgramHeaders(const ELFO *Obj, bool PrintProgramHeaders,
+ cl::boolOrDefault PrintSectionMapping) override;
+ void printVersionSymbolSection(const ELFFile<ELFT> *Obj,
+ const Elf_Shdr *Sec) override;
+ void printVersionDefinitionSection(const ELFFile<ELFT> *Obj,
+ const Elf_Shdr *Sec) override;
+ void printVersionDependencySection(const ELFFile<ELFT> *Obj,
+ const Elf_Shdr *Sec) override;
void printHashHistogram(const ELFFile<ELFT> *Obj) override;
void printCGProfile(const ELFFile<ELFT> *Obj) override;
void printAddrsig(const ELFFile<ELFT> *Obj) override;
@@ -471,8 +531,12 @@ public:
private:
void printRelocation(const ELFO *Obj, Elf_Rela Rel, const Elf_Shdr *SymTab);
void printDynamicRelocation(const ELFO *Obj, Elf_Rela Rel);
+ void printSymbols(const ELFO *Obj);
+ void printDynamicSymbols(const ELFO *Obj);
void printSymbol(const ELFO *Obj, const Elf_Sym *Symbol, const Elf_Sym *First,
StringRef StrTable, bool IsDynamic) override;
+ void printProgramHeaders(const ELFO *Obj);
+ void printSectionMapping(const ELFO *Obj) {}
ScopedPrinter &W;
};
@@ -516,65 +580,71 @@ std::error_code createELFDumper(const object::ObjectFile *Obj,
// Iterate through the versions needed section, and place each Elf_Vernaux
// in the VersionMap according to its index.
template <class ELFT>
-void ELFDumper<ELFT>::LoadVersionNeeds(const Elf_Shdr *sec) const {
- unsigned vn_size = sec->sh_size; // Size of section in bytes
- unsigned vn_count = sec->sh_info; // Number of Verneed entries
- const char *sec_start = (const char *)ObjF->getELFFile()->base() + sec->sh_offset;
- const char *sec_end = sec_start + vn_size;
+void ELFDumper<ELFT>::LoadVersionNeeds(const Elf_Shdr *Sec) const {
+ unsigned VerneedSize = Sec->sh_size; // Size of section in bytes
+ unsigned VerneedEntries = Sec->sh_info; // Number of Verneed entries
+ const uint8_t *VerneedStart = reinterpret_cast<const uint8_t *>(
+ ObjF->getELFFile()->base() + Sec->sh_offset);
+ const uint8_t *VerneedEnd = VerneedStart + VerneedSize;
// The first Verneed entry is at the start of the section.
- const char *p = sec_start;
- for (unsigned i = 0; i < vn_count; i++) {
- if (p + sizeof(Elf_Verneed) > sec_end)
+ const uint8_t *VerneedBuf = VerneedStart;
+ for (unsigned VerneedIndex = 0; VerneedIndex < VerneedEntries;
+ ++VerneedIndex) {
+ if (VerneedBuf + sizeof(Elf_Verneed) > VerneedEnd)
report_fatal_error("Section ended unexpectedly while scanning "
"version needed records.");
- const Elf_Verneed *vn = reinterpret_cast<const Elf_Verneed *>(p);
- if (vn->vn_version != ELF::VER_NEED_CURRENT)
+ const Elf_Verneed *Verneed =
+ reinterpret_cast<const Elf_Verneed *>(VerneedBuf);
+ if (Verneed->vn_version != ELF::VER_NEED_CURRENT)
report_fatal_error("Unexpected verneed version");
// Iterate through the Vernaux entries
- const char *paux = p + vn->vn_aux;
- for (unsigned j = 0; j < vn->vn_cnt; j++) {
- if (paux + sizeof(Elf_Vernaux) > sec_end)
+ const uint8_t *VernauxBuf = VerneedBuf + Verneed->vn_aux;
+ for (unsigned VernauxIndex = 0; VernauxIndex < Verneed->vn_cnt;
+ ++VernauxIndex) {
+ if (VernauxBuf + sizeof(Elf_Vernaux) > VerneedEnd)
report_fatal_error("Section ended unexpected while scanning auxiliary "
"version needed records.");
- const Elf_Vernaux *vna = reinterpret_cast<const Elf_Vernaux *>(paux);
- size_t index = vna->vna_other & ELF::VERSYM_VERSION;
- if (index >= VersionMap.size())
- VersionMap.resize(index + 1);
- VersionMap[index] = VersionMapEntry(vna);
- paux += vna->vna_next;
+ const Elf_Vernaux *Vernaux =
+ reinterpret_cast<const Elf_Vernaux *>(VernauxBuf);
+ size_t Index = Vernaux->vna_other & ELF::VERSYM_VERSION;
+ if (Index >= VersionMap.size())
+ VersionMap.resize(Index + 1);
+ VersionMap[Index] = VersionMapEntry(Vernaux);
+ VernauxBuf += Vernaux->vna_next;
}
- p += vn->vn_next;
+ VerneedBuf += Verneed->vn_next;
}
}
// Iterate through the version definitions, and place each Elf_Verdef
// in the VersionMap according to its index.
template <class ELFT>
-void ELFDumper<ELFT>::LoadVersionDefs(const Elf_Shdr *sec) const {
- unsigned vd_size = sec->sh_size; // Size of section in bytes
- unsigned vd_count = sec->sh_info; // Number of Verdef entries
- const char *sec_start = (const char *)ObjF->getELFFile()->base() + sec->sh_offset;
- const char *sec_end = sec_start + vd_size;
+void ELFDumper<ELFT>::LoadVersionDefs(const Elf_Shdr *Sec) const {
+ unsigned VerdefSize = Sec->sh_size; // Size of section in bytes
+ unsigned VerdefEntries = Sec->sh_info; // Number of Verdef entries
+ const uint8_t *VerdefStart = reinterpret_cast<const uint8_t *>(
+ ObjF->getELFFile()->base() + Sec->sh_offset);
+ const uint8_t *VerdefEnd = VerdefStart + VerdefSize;
// The first Verdef entry is at the start of the section.
- const char *p = sec_start;
- for (unsigned i = 0; i < vd_count; i++) {
- if (p + sizeof(Elf_Verdef) > sec_end)
+ const uint8_t *VerdefBuf = VerdefStart;
+ for (unsigned VerdefIndex = 0; VerdefIndex < VerdefEntries; ++VerdefIndex) {
+ if (VerdefBuf + sizeof(Elf_Verdef) > VerdefEnd)
report_fatal_error("Section ended unexpectedly while scanning "
"version definitions.");
- const Elf_Verdef *vd = reinterpret_cast<const Elf_Verdef *>(p);
- if (vd->vd_version != ELF::VER_DEF_CURRENT)
+ const Elf_Verdef *Verdef = reinterpret_cast<const Elf_Verdef *>(VerdefBuf);
+ if (Verdef->vd_version != ELF::VER_DEF_CURRENT)
report_fatal_error("Unexpected verdef version");
- size_t index = vd->vd_ndx & ELF::VERSYM_VERSION;
- if (index >= VersionMap.size())
- VersionMap.resize(index + 1);
- VersionMap[index] = VersionMapEntry(vd);
- p += vd->vd_next;
+ size_t Index = Verdef->vd_ndx & ELF::VERSYM_VERSION;
+ if (Index >= VersionMap.size())
+ VersionMap.resize(Index + 1);
+ VersionMap[Index] = VersionMapEntry(Verdef);
+ VerdefBuf += Verdef->vd_next;
}
}
template <class ELFT> void ELFDumper<ELFT>::LoadVersionMap() const {
// If there is no dynamic symtab or version table, there is nothing to do.
- if (!DynSymRegion.Addr || !dot_gnu_version_sec)
+ if (!DynSymRegion.Addr || !SymbolVersionSection)
return;
// Has the VersionMap already been loaded?
@@ -586,243 +656,111 @@ template <class ELFT> void ELFDumper<ELFT>::LoadVersionMap() const {
VersionMap.push_back(VersionMapEntry());
VersionMap.push_back(VersionMapEntry());
- if (dot_gnu_version_d_sec)
- LoadVersionDefs(dot_gnu_version_d_sec);
-
- if (dot_gnu_version_r_sec)
- LoadVersionNeeds(dot_gnu_version_r_sec);
-}
-
-template <typename ELFO, class ELFT>
-static void printVersionSymbolSection(ELFDumper<ELFT> *Dumper, const ELFO *Obj,
- const typename ELFO::Elf_Shdr *Sec,
- ScopedPrinter &W) {
- DictScope SS(W, "Version symbols");
- if (!Sec)
- return;
- StringRef Name = unwrapOrError(Obj->getSectionName(Sec));
- W.printNumber("Section Name", Name, Sec->sh_name);
- W.printHex("Address", Sec->sh_addr);
- W.printHex("Offset", Sec->sh_offset);
- W.printNumber("Link", Sec->sh_link);
-
- const uint8_t *P = (const uint8_t *)Obj->base() + Sec->sh_offset;
- StringRef StrTable = Dumper->getDynamicStringTable();
-
- // Same number of entries in the dynamic symbol table (DT_SYMTAB).
- ListScope Syms(W, "Symbols");
- for (const typename ELFO::Elf_Sym &Sym : Dumper->dynamic_symbols()) {
- DictScope S(W, "Symbol");
- std::string FullSymbolName =
- Dumper->getFullSymbolName(&Sym, StrTable, true /* IsDynamic */);
- W.printNumber("Version", *P);
- W.printString("Name", FullSymbolName);
- P += sizeof(typename ELFO::Elf_Half);
- }
-}
-
-static const EnumEntry<unsigned> SymVersionFlags[] = {
- {"Base", "BASE", VER_FLG_BASE},
- {"Weak", "WEAK", VER_FLG_WEAK},
- {"Info", "INFO", VER_FLG_INFO}};
-
-template <typename ELFO, class ELFT>
-static void printVersionDefinitionSection(ELFDumper<ELFT> *Dumper,
- const ELFO *Obj,
- const typename ELFO::Elf_Shdr *Sec,
- ScopedPrinter &W) {
- using VerDef = typename ELFO::Elf_Verdef;
- using VerdAux = typename ELFO::Elf_Verdaux;
-
- DictScope SD(W, "SHT_GNU_verdef");
- if (!Sec)
- return;
-
- // The number of entries in the section SHT_GNU_verdef
- // is determined by DT_VERDEFNUM tag.
- unsigned VerDefsNum = 0;
- for (const typename ELFO::Elf_Dyn &Dyn : Dumper->dynamic_table()) {
- if (Dyn.d_tag == DT_VERDEFNUM) {
- VerDefsNum = Dyn.d_un.d_val;
- break;
- }
- }
-
- const uint8_t *SecStartAddress =
- (const uint8_t *)Obj->base() + Sec->sh_offset;
- const uint8_t *SecEndAddress = SecStartAddress + Sec->sh_size;
- const uint8_t *P = SecStartAddress;
- const typename ELFO::Elf_Shdr *StrTab =
- unwrapOrError(Obj->getSection(Sec->sh_link));
-
- while (VerDefsNum--) {
- if (P + sizeof(VerDef) > SecEndAddress)
- report_fatal_error("invalid offset in the section");
-
- auto *VD = reinterpret_cast<const VerDef *>(P);
- DictScope Def(W, "Definition");
- W.printNumber("Version", VD->vd_version);
- W.printEnum("Flags", VD->vd_flags, makeArrayRef(SymVersionFlags));
- W.printNumber("Index", VD->vd_ndx);
- W.printNumber("Hash", VD->vd_hash);
- W.printString("Name",
- StringRef((const char *)(Obj->base() + StrTab->sh_offset +
- VD->getAux()->vda_name)));
- if (!VD->vd_cnt)
- report_fatal_error("at least one definition string must exist");
- if (VD->vd_cnt > 2)
- report_fatal_error("more than one predecessor is not expected");
-
- if (VD->vd_cnt == 2) {
- const uint8_t *PAux = P + VD->vd_aux + VD->getAux()->vda_next;
- const VerdAux *Aux = reinterpret_cast<const VerdAux *>(PAux);
- W.printString("Predecessor",
- StringRef((const char *)(Obj->base() + StrTab->sh_offset +
- Aux->vda_name)));
- }
-
- P += VD->vd_next;
- }
-}
-
-template <typename ELFO, class ELFT>
-static void printVersionDependencySection(ELFDumper<ELFT> *Dumper,
- const ELFO *Obj,
- const typename ELFO::Elf_Shdr *Sec,
- ScopedPrinter &W) {
- using VerNeed = typename ELFO::Elf_Verneed;
- using VernAux = typename ELFO::Elf_Vernaux;
-
- DictScope SD(W, "SHT_GNU_verneed");
- if (!Sec)
- return;
-
- unsigned VerNeedNum = 0;
- for (const typename ELFO::Elf_Dyn &Dyn : Dumper->dynamic_table()) {
- if (Dyn.d_tag == DT_VERNEEDNUM) {
- VerNeedNum = Dyn.d_un.d_val;
- break;
- }
- }
-
- const uint8_t *SecData = (const uint8_t *)Obj->base() + Sec->sh_offset;
- const typename ELFO::Elf_Shdr *StrTab =
- unwrapOrError(Obj->getSection(Sec->sh_link));
-
- const uint8_t *P = SecData;
- for (unsigned I = 0; I < VerNeedNum; ++I) {
- const VerNeed *Need = reinterpret_cast<const VerNeed *>(P);
- DictScope Entry(W, "Dependency");
- W.printNumber("Version", Need->vn_version);
- W.printNumber("Count", Need->vn_cnt);
- W.printString("FileName",
- StringRef((const char *)(Obj->base() + StrTab->sh_offset +
- Need->vn_file)));
-
- const uint8_t *PAux = P + Need->vn_aux;
- for (unsigned J = 0; J < Need->vn_cnt; ++J) {
- const VernAux *Aux = reinterpret_cast<const VernAux *>(PAux);
- DictScope Entry(W, "Entry");
- W.printNumber("Hash", Aux->vna_hash);
- W.printEnum("Flags", Aux->vna_flags, makeArrayRef(SymVersionFlags));
- W.printNumber("Index", Aux->vna_other);
- W.printString("Name",
- StringRef((const char *)(Obj->base() + StrTab->sh_offset +
- Aux->vna_name)));
- PAux += Aux->vna_next;
- }
- P += Need->vn_next;
- }
-}
-
-template <typename ELFT> void ELFDumper<ELFT>::printVersionInfo() {
- // Dump version symbol section.
- printVersionSymbolSection(this, ObjF->getELFFile(), dot_gnu_version_sec, W);
-
- // Dump version definition section.
- printVersionDefinitionSection(this, ObjF->getELFFile(), dot_gnu_version_d_sec, W);
+ if (SymbolVersionDefSection)
+ LoadVersionDefs(SymbolVersionDefSection);
- // Dump version dependency section.
- printVersionDependencySection(this, ObjF->getELFFile(), dot_gnu_version_r_sec, W);
+ if (SymbolVersionNeedSection)
+ LoadVersionNeeds(SymbolVersionNeedSection);
}
template <typename ELFT>
StringRef ELFDumper<ELFT>::getSymbolVersion(StringRef StrTab,
- const Elf_Sym *symb,
+ const Elf_Sym *Sym,
bool &IsDefault) const {
// This is a dynamic symbol. Look in the GNU symbol version table.
- if (!dot_gnu_version_sec) {
+ if (!SymbolVersionSection) {
// No version table.
IsDefault = false;
- return StringRef("");
+ return "";
}
// Determine the position in the symbol table of this entry.
- size_t entry_index = (reinterpret_cast<uintptr_t>(symb) -
+ size_t EntryIndex = (reinterpret_cast<uintptr_t>(Sym) -
reinterpret_cast<uintptr_t>(DynSymRegion.Addr)) /
sizeof(Elf_Sym);
- // Get the corresponding version index entry
- const Elf_Versym *vs = unwrapOrError(
- ObjF->getELFFile()->template getEntry<Elf_Versym>(dot_gnu_version_sec, entry_index));
- size_t version_index = vs->vs_index & ELF::VERSYM_VERSION;
+ // Get the corresponding version index entry.
+ const Elf_Versym *Versym =
+ unwrapOrError(ObjF->getELFFile()->template getEntry<Elf_Versym>(
+ SymbolVersionSection, EntryIndex));
+ return this->getSymbolVersionByIndex(StrTab, Versym->vs_index, IsDefault);
+}
+
+static std::string maybeDemangle(StringRef Name) {
+ return opts::Demangle ? demangle(Name) : Name.str();
+}
+
+template <typename ELFT>
+std::string ELFDumper<ELFT>::getStaticSymbolName(uint32_t Index) const {
+ const ELFFile<ELFT> *Obj = ObjF->getELFFile();
+ StringRef StrTable =
+ unwrapOrError(Obj->getStringTableForSymtab(*DotSymtabSec));
+ Elf_Sym_Range Syms = unwrapOrError(Obj->symbols(DotSymtabSec));
+ if (Index >= Syms.size())
+ reportError("Invalid symbol index");
+ const Elf_Sym *Sym = &Syms[Index];
+ return maybeDemangle(unwrapOrError(Sym->getName(StrTable)));
+}
+
+template <typename ELFT>
+StringRef ELFDumper<ELFT>::getSymbolVersionByIndex(StringRef StrTab,
+ uint32_t SymbolVersionIndex,
+ bool &IsDefault) const {
+ size_t VersionIndex = SymbolVersionIndex & VERSYM_VERSION;
// Special markers for unversioned symbols.
- if (version_index == ELF::VER_NDX_LOCAL ||
- version_index == ELF::VER_NDX_GLOBAL) {
+ if (VersionIndex == VER_NDX_LOCAL || VersionIndex == VER_NDX_GLOBAL) {
IsDefault = false;
- return StringRef("");
+ return "";
}
- // Lookup this symbol in the version table
+ // Lookup this symbol in the version table.
LoadVersionMap();
- if (version_index >= VersionMap.size() || VersionMap[version_index].isNull())
+ if (VersionIndex >= VersionMap.size() || VersionMap[VersionIndex].isNull())
reportError("Invalid version entry");
- const VersionMapEntry &entry = VersionMap[version_index];
+ const VersionMapEntry &Entry = VersionMap[VersionIndex];
- // Get the version name string
- size_t name_offset;
- if (entry.isVerdef()) {
+ // Get the version name string.
+ size_t NameOffset;
+ if (Entry.isVerdef()) {
// The first Verdaux entry holds the name.
- name_offset = entry.getVerdef()->getAux()->vda_name;
- IsDefault = !(vs->vs_index & ELF::VERSYM_HIDDEN);
+ NameOffset = Entry.getVerdef()->getAux()->vda_name;
+ IsDefault = !(SymbolVersionIndex & VERSYM_HIDDEN);
} else {
- name_offset = entry.getVernaux()->vna_name;
+ NameOffset = Entry.getVernaux()->vna_name;
IsDefault = false;
}
- if (name_offset >= StrTab.size())
+ if (NameOffset >= StrTab.size())
reportError("Invalid string offset");
- return StringRef(StrTab.data() + name_offset);
-}
-
-template <typename ELFT>
-StringRef ELFDumper<ELFT>::getStaticSymbolName(uint32_t Index) const {
- const ELFFile<ELFT> *Obj = ObjF->getELFFile();
- StringRef StrTable = unwrapOrError(Obj->getStringTableForSymtab(*DotSymtabSec));
- Elf_Sym_Range Syms = unwrapOrError(Obj->symbols(DotSymtabSec));
- if (Index >= Syms.size())
- reportError("Invalid symbol index");
- const Elf_Sym *Sym = &Syms[Index];
- return unwrapOrError(Sym->getName(StrTable));
+ return StrTab.data() + NameOffset;
}
template <typename ELFT>
std::string ELFDumper<ELFT>::getFullSymbolName(const Elf_Sym *Symbol,
StringRef StrTable,
bool IsDynamic) const {
- StringRef SymbolName = unwrapOrError(Symbol->getName(StrTable));
+ std::string SymbolName =
+ maybeDemangle(unwrapOrError(Symbol->getName(StrTable)));
+
+ if (SymbolName.empty() && Symbol->getType() == ELF::STT_SECTION) {
+ unsigned SectionIndex;
+ StringRef SectionName;
+ Elf_Sym_Range Syms =
+ unwrapOrError(ObjF->getELFFile()->symbols(DotSymtabSec));
+ getSectionNameIndex(Symbol, Syms.begin(), SectionName, SectionIndex);
+ return SectionName;
+ }
+
if (!IsDynamic)
return SymbolName;
- std::string FullSymbolName(SymbolName);
-
bool IsDefault;
StringRef Version = getSymbolVersion(StrTable, &*Symbol, IsDefault);
if (!Version.empty()) {
- FullSymbolName += (IsDefault ? "@@" : "@");
- FullSymbolName += Version;
+ SymbolName += (IsDefault ? "@@" : "@");
+ SymbolName += Version;
}
- return FullSymbolName;
+ return SymbolName;
}
template <typename ELFT>
@@ -914,6 +852,11 @@ static const EnumEntry<unsigned> ElfOSABI[] = {
{"Standalone", "Standalone App", ELF::ELFOSABI_STANDALONE}
};
+static const EnumEntry<unsigned> SymVersionFlags[] = {
+ {"Base", "BASE", VER_FLG_BASE},
+ {"Weak", "WEAK", VER_FLG_WEAK},
+ {"Info", "INFO", VER_FLG_INFO}};
+
static const EnumEntry<unsigned> AMDGPUElfOSABI[] = {
{"AMDGPU_HSA", "AMDGPU - HSA", ELF::ELFOSABI_AMDGPU_HSA},
{"AMDGPU_PAL", "AMDGPU - PAL", ELF::ELFOSABI_AMDGPU_PAL},
@@ -1103,16 +1046,6 @@ static const EnumEntry<unsigned> ElfSymbolVisibilities[] = {
{"HIDDEN", "HIDDEN", ELF::STV_HIDDEN},
{"PROTECTED", "PROTECTED", ELF::STV_PROTECTED}};
-static const EnumEntry<unsigned> ElfSymbolTypes[] = {
- {"None", "NOTYPE", ELF::STT_NOTYPE},
- {"Object", "OBJECT", ELF::STT_OBJECT},
- {"Function", "FUNC", ELF::STT_FUNC},
- {"Section", "SECTION", ELF::STT_SECTION},
- {"File", "FILE", ELF::STT_FILE},
- {"Common", "COMMON", ELF::STT_COMMON},
- {"TLS", "TLS", ELF::STT_TLS},
- {"GNU_IFunc", "IFUNC", ELF::STT_GNU_IFUNC}};
-
static const EnumEntry<unsigned> AMDGPUSymbolTypes[] = {
{ "AMDGPU_HSA_KERNEL", ELF::STT_AMDGPU_HSA_KERNEL }
};
@@ -1205,14 +1138,12 @@ static const char *getElfSegmentType(unsigned Arch, unsigned Type) {
// program header type.
switch (Arch) {
case ELF::EM_ARM:
- switch (Type) {
- LLVM_READOBJ_ENUM_CASE(ELF, PT_ARM_EXIDX);
- }
+ switch (Type) { LLVM_READOBJ_ENUM_CASE(ELF, PT_ARM_EXIDX); }
break;
case ELF::EM_MIPS:
case ELF::EM_MIPS_RS3_LE:
switch (Type) {
- LLVM_READOBJ_ENUM_CASE(ELF, PT_MIPS_REGINFO);
+ LLVM_READOBJ_ENUM_CASE(ELF, PT_MIPS_REGINFO);
LLVM_READOBJ_ENUM_CASE(ELF, PT_MIPS_RTPROC);
LLVM_READOBJ_ENUM_CASE(ELF, PT_MIPS_OPTIONS);
LLVM_READOBJ_ENUM_CASE(ELF, PT_MIPS_ABIFLAGS);
@@ -1233,14 +1164,15 @@ static const char *getElfSegmentType(unsigned Arch, unsigned Type) {
LLVM_READOBJ_ENUM_CASE(ELF, PT_GNU_EH_FRAME);
LLVM_READOBJ_ENUM_CASE(ELF, PT_SUNW_UNWIND);
- LLVM_READOBJ_ENUM_CASE(ELF, PT_GNU_STACK);
- LLVM_READOBJ_ENUM_CASE(ELF, PT_GNU_RELRO);
+ LLVM_READOBJ_ENUM_CASE(ELF, PT_GNU_STACK);
+ LLVM_READOBJ_ENUM_CASE(ELF, PT_GNU_RELRO);
- LLVM_READOBJ_ENUM_CASE(ELF, PT_OPENBSD_RANDOMIZE);
- LLVM_READOBJ_ENUM_CASE(ELF, PT_OPENBSD_WXNEEDED);
- LLVM_READOBJ_ENUM_CASE(ELF, PT_OPENBSD_BOOTDATA);
+ LLVM_READOBJ_ENUM_CASE(ELF, PT_OPENBSD_RANDOMIZE);
+ LLVM_READOBJ_ENUM_CASE(ELF, PT_OPENBSD_WXNEEDED);
+ LLVM_READOBJ_ENUM_CASE(ELF, PT_OPENBSD_BOOTDATA);
- default: return "";
+ default:
+ return "";
}
}
@@ -1368,7 +1300,11 @@ static const EnumEntry<unsigned> ElfHeaderAMDGPUFlags[] = {
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX902),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX904),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX906),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX908),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX909),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1010),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1011),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1012),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_XNACK),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_SRAM_ECC)
};
@@ -1420,68 +1356,118 @@ static const char *getElfMipsOptionsOdkType(unsigned Odk) {
}
template <typename ELFT>
-ELFDumper<ELFT>::ELFDumper(const object::ELFObjectFile<ELFT> *ObjF,
- ScopedPrinter &Writer)
- : ObjDumper(Writer), ObjF(ObjF) {
- SmallVector<const Elf_Phdr *, 4> LoadSegments;
- const ELFFile<ELFT> *Obj = ObjF->getELFFile();
+void ELFDumper<ELFT>::loadDynamicTable(const ELFFile<ELFT> *Obj) {
+ // Try to locate the PT_DYNAMIC header.
+ const Elf_Phdr *DynamicPhdr = nullptr;
for (const Elf_Phdr &Phdr : unwrapOrError(Obj->program_headers())) {
- if (Phdr.p_type == ELF::PT_DYNAMIC) {
- DynamicTable = createDRIFrom(&Phdr, sizeof(Elf_Dyn));
+ if (Phdr.p_type != ELF::PT_DYNAMIC)
continue;
- }
- if (Phdr.p_type != ELF::PT_LOAD || Phdr.p_filesz == 0)
+ DynamicPhdr = &Phdr;
+ break;
+ }
+
+ // Try to locate the .dynamic section in the sections header table.
+ const Elf_Shdr *DynamicSec = nullptr;
+ for (const Elf_Shdr &Sec : unwrapOrError(Obj->sections())) {
+ if (Sec.sh_type != ELF::SHT_DYNAMIC)
continue;
- LoadSegments.push_back(&Phdr);
+ DynamicSec = &Sec;
+ break;
+ }
+
+ // Information in the section header has priority over the information
+ // in a PT_DYNAMIC header.
+ // Ignore sh_entsize and use the expected value for entry size explicitly.
+ // This allows us to dump the dynamic sections with a broken sh_entsize
+ // field.
+ if (DynamicSec) {
+ DynamicTable = checkDRI({ObjF->getELFFile()->base() + DynamicSec->sh_offset,
+ DynamicSec->sh_size, sizeof(Elf_Dyn)});
+ parseDynamicTable();
+ }
+
+ // If we have a PT_DYNAMIC header, we will either check the found dynamic
+ // section or take the dynamic table data directly from the header.
+ if (!DynamicPhdr)
+ return;
+
+ if (DynamicPhdr->p_offset + DynamicPhdr->p_filesz >
+ ObjF->getMemoryBufferRef().getBufferSize())
+ reportError(
+ "PT_DYNAMIC segment offset + size exceeds the size of the file");
+
+ if (!DynamicSec) {
+ DynamicTable = createDRIFrom(DynamicPhdr, sizeof(Elf_Dyn));
+ parseDynamicTable();
+ return;
}
+ StringRef Name = unwrapOrError(Obj->getSectionName(DynamicSec));
+ if (DynamicSec->sh_addr + DynamicSec->sh_size >
+ DynamicPhdr->p_vaddr + DynamicPhdr->p_memsz ||
+ DynamicSec->sh_addr < DynamicPhdr->p_vaddr)
+ reportWarning("The SHT_DYNAMIC section '" + Name +
+ "' is not contained within the "
+ "PT_DYNAMIC segment");
+
+ if (DynamicSec->sh_addr != DynamicPhdr->p_vaddr)
+ reportWarning("The SHT_DYNAMIC section '" + Name +
+ "' is not at the start of "
+ "PT_DYNAMIC segment");
+}
+
+template <typename ELFT>
+ELFDumper<ELFT>::ELFDumper(const object::ELFObjectFile<ELFT> *ObjF,
+ ScopedPrinter &Writer)
+ : ObjDumper(Writer), ObjF(ObjF) {
+ const ELFFile<ELFT> *Obj = ObjF->getELFFile();
+
for (const Elf_Shdr &Sec : unwrapOrError(Obj->sections())) {
switch (Sec.sh_type) {
case ELF::SHT_SYMTAB:
- if (DotSymtabSec != nullptr)
- reportError("Multiple SHT_SYMTAB");
- DotSymtabSec = &Sec;
+ if (!DotSymtabSec)
+ DotSymtabSec = &Sec;
break;
case ELF::SHT_DYNSYM:
- if (DynSymRegion.Size)
- reportError("Multiple SHT_DYNSYM");
- DynSymRegion = createDRIFrom(&Sec);
- // This is only used (if Elf_Shdr present)for naming section in GNU style
- DynSymtabName = unwrapOrError(Obj->getSectionName(&Sec));
- DynamicStringTable = unwrapOrError(Obj->getStringTableForSymtab(Sec));
+ if (!DynSymRegion.Size) {
+ DynSymRegion = createDRIFrom(&Sec);
+ // This is only used (if Elf_Shdr present)for naming section in GNU
+ // style
+ DynSymtabName = unwrapOrError(Obj->getSectionName(&Sec));
+
+ if (Expected<StringRef> E = Obj->getStringTableForSymtab(Sec))
+ DynamicStringTable = *E;
+ else
+ warn(E.takeError());
+ }
break;
case ELF::SHT_SYMTAB_SHNDX:
ShndxTable = unwrapOrError(Obj->getSHNDXTable(Sec));
break;
case ELF::SHT_GNU_versym:
- if (dot_gnu_version_sec != nullptr)
- reportError("Multiple SHT_GNU_versym");
- dot_gnu_version_sec = &Sec;
+ if (!SymbolVersionSection)
+ SymbolVersionSection = &Sec;
break;
case ELF::SHT_GNU_verdef:
- if (dot_gnu_version_d_sec != nullptr)
- reportError("Multiple SHT_GNU_verdef");
- dot_gnu_version_d_sec = &Sec;
+ if (!SymbolVersionDefSection)
+ SymbolVersionDefSection = &Sec;
break;
case ELF::SHT_GNU_verneed:
- if (dot_gnu_version_r_sec != nullptr)
- reportError("Multiple SHT_GNU_verneed");
- dot_gnu_version_r_sec = &Sec;
+ if (!SymbolVersionNeedSection)
+ SymbolVersionNeedSection = &Sec;
break;
case ELF::SHT_LLVM_CALL_GRAPH_PROFILE:
- if (DotCGProfileSec != nullptr)
- reportError("Multiple .llvm.call-graph-profile");
- DotCGProfileSec = &Sec;
+ if (!DotCGProfileSec)
+ DotCGProfileSec = &Sec;
break;
case ELF::SHT_LLVM_ADDRSIG:
- if (DotAddrsigSec != nullptr)
- reportError("Multiple .llvm_addrsig");
- DotAddrsigSec = &Sec;
+ if (!DotAddrsigSec)
+ DotAddrsigSec = &Sec;
break;
}
}
- parseDynamicTable(LoadSegments);
+ loadDynamicTable(Obj);
if (opts::Output == opts::GNU)
ELFDumperStyle.reset(new GNUStyle<ELFT>(Writer, this));
@@ -1489,13 +1475,84 @@ ELFDumper<ELFT>::ELFDumper(const object::ELFObjectFile<ELFT> *ObjF,
ELFDumperStyle.reset(new LLVMStyle<ELFT>(Writer, this));
}
-template <typename ELFT>
-void ELFDumper<ELFT>::parseDynamicTable(
- ArrayRef<const Elf_Phdr *> LoadSegments) {
- auto toMappedAddr = [&](uint64_t VAddr) -> const uint8_t * {
+static const char *getTypeString(unsigned Arch, uint64_t Type) {
+#define DYNAMIC_TAG(n, v)
+ switch (Arch) {
+
+ case EM_AARCH64:
+ switch (Type) {
+#define AARCH64_DYNAMIC_TAG(name, value) \
+ case DT_##name: \
+ return #name;
+#include "llvm/BinaryFormat/DynamicTags.def"
+#undef AARCH64_DYNAMIC_TAG
+ }
+ break;
+
+ case EM_HEXAGON:
+ switch (Type) {
+#define HEXAGON_DYNAMIC_TAG(name, value) \
+ case DT_##name: \
+ return #name;
+#include "llvm/BinaryFormat/DynamicTags.def"
+#undef HEXAGON_DYNAMIC_TAG
+ }
+ break;
+
+ case EM_MIPS:
+ switch (Type) {
+#define MIPS_DYNAMIC_TAG(name, value) \
+ case DT_##name: \
+ return #name;
+#include "llvm/BinaryFormat/DynamicTags.def"
+#undef MIPS_DYNAMIC_TAG
+ }
+ break;
+
+ case EM_PPC64:
+ switch (Type) {
+#define PPC64_DYNAMIC_TAG(name, value) \
+ case DT_##name: \
+ return #name;
+#include "llvm/BinaryFormat/DynamicTags.def"
+#undef PPC64_DYNAMIC_TAG
+ }
+ break;
+ }
+#undef DYNAMIC_TAG
+ switch (Type) {
+// Now handle all dynamic tags except the architecture specific ones
+#define AARCH64_DYNAMIC_TAG(name, value)
+#define MIPS_DYNAMIC_TAG(name, value)
+#define HEXAGON_DYNAMIC_TAG(name, value)
+#define PPC64_DYNAMIC_TAG(name, value)
+// Also ignore marker tags such as DT_HIOS (maps to DT_VERNEEDNUM), etc.
+#define DYNAMIC_TAG_MARKER(name, value)
+#define DYNAMIC_TAG(name, value) \
+ case DT_##name: \
+ return #name;
+#include "llvm/BinaryFormat/DynamicTags.def"
+#undef DYNAMIC_TAG
+#undef AARCH64_DYNAMIC_TAG
+#undef MIPS_DYNAMIC_TAG
+#undef HEXAGON_DYNAMIC_TAG
+#undef PPC64_DYNAMIC_TAG
+#undef DYNAMIC_TAG_MARKER
+ default:
+ return "unknown";
+ }
+}
+
+template <typename ELFT> void ELFDumper<ELFT>::parseDynamicTable() {
+ auto toMappedAddr = [&](uint64_t Tag, uint64_t VAddr) -> const uint8_t * {
auto MappedAddrOrError = ObjF->getELFFile()->toMappedAddr(VAddr);
- if (!MappedAddrOrError)
- report_fatal_error(MappedAddrOrError.takeError());
+ if (!MappedAddrOrError) {
+ reportWarning("Unable to parse DT_" +
+ Twine(getTypeString(
+ ObjF->getELFFile()->getHeader()->e_machine, Tag)) +
+ ": " + llvm::toString(MappedAddrOrError.takeError()));
+ return nullptr;
+ }
return MappedAddrOrError.get();
};
@@ -1505,25 +1562,26 @@ void ELFDumper<ELFT>::parseDynamicTable(
for (const Elf_Dyn &Dyn : dynamic_table()) {
switch (Dyn.d_tag) {
case ELF::DT_HASH:
- HashTable =
- reinterpret_cast<const Elf_Hash *>(toMappedAddr(Dyn.getPtr()));
+ HashTable = reinterpret_cast<const Elf_Hash *>(
+ toMappedAddr(Dyn.getTag(), Dyn.getPtr()));
break;
case ELF::DT_GNU_HASH:
- GnuHashTable =
- reinterpret_cast<const Elf_GnuHash *>(toMappedAddr(Dyn.getPtr()));
+ GnuHashTable = reinterpret_cast<const Elf_GnuHash *>(
+ toMappedAddr(Dyn.getTag(), Dyn.getPtr()));
break;
case ELF::DT_STRTAB:
- StringTableBegin = (const char *)toMappedAddr(Dyn.getPtr());
+ StringTableBegin = reinterpret_cast<const char *>(
+ toMappedAddr(Dyn.getTag(), Dyn.getPtr()));
break;
case ELF::DT_STRSZ:
StringTableSize = Dyn.getVal();
break;
case ELF::DT_SYMTAB:
- DynSymRegion.Addr = toMappedAddr(Dyn.getPtr());
+ DynSymRegion.Addr = toMappedAddr(Dyn.getTag(), Dyn.getPtr());
DynSymRegion.EntSize = sizeof(Elf_Sym);
break;
case ELF::DT_RELA:
- DynRelaRegion.Addr = toMappedAddr(Dyn.getPtr());
+ DynRelaRegion.Addr = toMappedAddr(Dyn.getTag(), Dyn.getPtr());
break;
case ELF::DT_RELASZ:
DynRelaRegion.Size = Dyn.getVal();
@@ -1535,7 +1593,7 @@ void ELFDumper<ELFT>::parseDynamicTable(
SONameOffset = Dyn.getVal();
break;
case ELF::DT_REL:
- DynRelRegion.Addr = toMappedAddr(Dyn.getPtr());
+ DynRelRegion.Addr = toMappedAddr(Dyn.getTag(), Dyn.getPtr());
break;
case ELF::DT_RELSZ:
DynRelRegion.Size = Dyn.getVal();
@@ -1545,7 +1603,7 @@ void ELFDumper<ELFT>::parseDynamicTable(
break;
case ELF::DT_RELR:
case ELF::DT_ANDROID_RELR:
- DynRelrRegion.Addr = toMappedAddr(Dyn.getPtr());
+ DynRelrRegion.Addr = toMappedAddr(Dyn.getTag(), Dyn.getPtr());
break;
case ELF::DT_RELRSZ:
case ELF::DT_ANDROID_RELRSZ:
@@ -1565,7 +1623,7 @@ void ELFDumper<ELFT>::parseDynamicTable(
Twine((uint64_t)Dyn.getVal()));
break;
case ELF::DT_JMPREL:
- DynPLTRelRegion.Addr = toMappedAddr(Dyn.getPtr());
+ DynPLTRelRegion.Addr = toMappedAddr(Dyn.getTag(), Dyn.getPtr());
break;
case ELF::DT_PLTRELSZ:
DynPLTRelRegion.Size = Dyn.getVal();
@@ -1574,8 +1632,8 @@ void ELFDumper<ELFT>::parseDynamicTable(
}
if (StringTableBegin)
DynamicStringTable = StringRef(StringTableBegin, StringTableSize);
- if (SONameOffset)
- SOName = getDynamicString(SONameOffset);
+ if (SONameOffset && SONameOffset < DynamicStringTable.size())
+ SOName = DynamicStringTable.data() + SONameOffset;
}
template <typename ELFT>
@@ -1593,37 +1651,52 @@ typename ELFDumper<ELFT>::Elf_Relr_Range ELFDumper<ELFT>::dyn_relrs() const {
return DynRelrRegion.getAsArrayRef<Elf_Relr>();
}
-template<class ELFT>
-void ELFDumper<ELFT>::printFileHeaders() {
+template <class ELFT> void ELFDumper<ELFT>::printFileHeaders() {
ELFDumperStyle->printFileHeaders(ObjF->getELFFile());
}
-template<class ELFT>
-void ELFDumper<ELFT>::printSectionHeaders() {
+template <class ELFT> void ELFDumper<ELFT>::printSectionHeaders() {
ELFDumperStyle->printSectionHeaders(ObjF->getELFFile());
}
-template<class ELFT>
-void ELFDumper<ELFT>::printRelocations() {
+template <class ELFT> void ELFDumper<ELFT>::printRelocations() {
ELFDumperStyle->printRelocations(ObjF->getELFFile());
}
-template <class ELFT> void ELFDumper<ELFT>::printProgramHeaders() {
- ELFDumperStyle->printProgramHeaders(ObjF->getELFFile());
+template <class ELFT>
+void ELFDumper<ELFT>::printProgramHeaders(
+ bool PrintProgramHeaders, cl::boolOrDefault PrintSectionMapping) {
+ ELFDumperStyle->printProgramHeaders(ObjF->getELFFile(), PrintProgramHeaders,
+ PrintSectionMapping);
+}
+
+template <typename ELFT> void ELFDumper<ELFT>::printVersionInfo() {
+ // Dump version symbol section.
+ ELFDumperStyle->printVersionSymbolSection(ObjF->getELFFile(),
+ SymbolVersionSection);
+
+ // Dump version definition section.
+ ELFDumperStyle->printVersionDefinitionSection(ObjF->getELFFile(),
+ SymbolVersionDefSection);
+
+ // Dump version dependency section.
+ ELFDumperStyle->printVersionDependencySection(ObjF->getELFFile(),
+ SymbolVersionNeedSection);
}
template <class ELFT> void ELFDumper<ELFT>::printDynamicRelocations() {
ELFDumperStyle->printDynamicRelocations(ObjF->getELFFile());
}
-template<class ELFT>
-void ELFDumper<ELFT>::printSymbols() {
- ELFDumperStyle->printSymbols(ObjF->getELFFile());
+template <class ELFT>
+void ELFDumper<ELFT>::printSymbols(bool PrintSymbols,
+ bool PrintDynamicSymbols) {
+ ELFDumperStyle->printSymbols(ObjF->getELFFile(), PrintSymbols,
+ PrintDynamicSymbols);
}
-template<class ELFT>
-void ELFDumper<ELFT>::printDynamicSymbols() {
- ELFDumperStyle->printDynamicSymbols(ObjF->getELFFile());
+template <class ELFT> void ELFDumper<ELFT>::printHashSymbols() {
+ ELFDumperStyle->printHashSymbols(ObjF->getELFFile());
}
template <class ELFT> void ELFDumper<ELFT>::printHashHistogram() {
@@ -1642,61 +1715,7 @@ template <class ELFT> void ELFDumper<ELFT>::printELFLinkerOptions() {
ELFDumperStyle->printELFLinkerOptions(ObjF->getELFFile());
}
-static const char *getTypeString(unsigned Arch, uint64_t Type) {
-#define DYNAMIC_TAG(n, v)
- switch (Arch) {
- case EM_HEXAGON:
- switch (Type) {
-#define HEXAGON_DYNAMIC_TAG(name, value) \
- case DT_##name: \
- return #name;
-#include "llvm/BinaryFormat/DynamicTags.def"
-#undef HEXAGON_DYNAMIC_TAG
- }
- break;
-
- case EM_MIPS:
- switch (Type) {
-#define MIPS_DYNAMIC_TAG(name, value) \
- case DT_##name: \
- return #name;
-#include "llvm/BinaryFormat/DynamicTags.def"
-#undef MIPS_DYNAMIC_TAG
- }
- break;
-
- case EM_PPC64:
- switch(Type) {
-#define PPC64_DYNAMIC_TAG(name, value) \
- case DT_##name: \
- return #name;
-#include "llvm/BinaryFormat/DynamicTags.def"
-#undef PPC64_DYNAMIC_TAG
- }
- break;
- }
-#undef DYNAMIC_TAG
- switch (Type) {
-// Now handle all dynamic tags except the architecture specific ones
-#define MIPS_DYNAMIC_TAG(name, value)
-#define HEXAGON_DYNAMIC_TAG(name, value)
-#define PPC64_DYNAMIC_TAG(name, value)
-// Also ignore marker tags such as DT_HIOS (maps to DT_VERNEEDNUM), etc.
-#define DYNAMIC_TAG_MARKER(name, value)
-#define DYNAMIC_TAG(name, value) \
- case DT_##name: \
- return #name;
-#include "llvm/BinaryFormat/DynamicTags.def"
-#undef DYNAMIC_TAG
-#undef MIPS_DYNAMIC_TAG
-#undef HEXAGON_DYNAMIC_TAG
-#undef PPC64_DYNAMIC_TAG
-#undef DYNAMIC_TAG_MARKER
- default: return "unknown";
- }
-}
-
-#define LLVM_READOBJ_DT_FLAG_ENT(prefix, enum) \
+#define LLVM_READOBJ_DT_FLAG_ENT(prefix, enum) \
{ #enum, prefix##_##enum }
static const EnumEntry<unsigned> ElfDynamicDTFlags[] = {
@@ -1724,6 +1743,7 @@ static const EnumEntry<unsigned> ElfDynamicDTFlags1[] = {
LLVM_READOBJ_DT_FLAG_ENT(DF_1, CONFALT),
LLVM_READOBJ_DT_FLAG_ENT(DF_1, ENDFILTEE),
LLVM_READOBJ_DT_FLAG_ENT(DF_1, DISPRELDNE),
+ LLVM_READOBJ_DT_FLAG_ENT(DF_1, DISPRELPND),
LLVM_READOBJ_DT_FLAG_ENT(DF_1, NODIRECT),
LLVM_READOBJ_DT_FLAG_ENT(DF_1, IGNMULDEF),
LLVM_READOBJ_DT_FLAG_ENT(DF_1, NOKSYMS),
@@ -1776,20 +1796,97 @@ void printFlags(T Value, ArrayRef<EnumEntry<TFlag>> Flags, raw_ostream &OS) {
}
template <class ELFT>
-StringRef ELFDumper<ELFT>::getDynamicString(uint64_t Value) const {
- if (Value >= DynamicStringTable.size())
- reportError("Invalid dynamic string table reference");
- return StringRef(DynamicStringTable.data() + Value);
-}
-
-static void printLibrary(raw_ostream &OS, const Twine &Tag, const Twine &Name) {
- OS << Tag << ": [" << Name << "]";
-}
+void ELFDumper<ELFT>::printDynamicEntry(raw_ostream &OS, uint64_t Type,
+ uint64_t Value) const {
+ const char *ConvChar =
+ (opts::Output == opts::GNU) ? "0x%" PRIx64 : "0x%" PRIX64;
+
+ // Handle custom printing of architecture specific tags
+ switch (ObjF->getELFFile()->getHeader()->e_machine) {
+ case EM_AARCH64:
+ switch (Type) {
+ case DT_AARCH64_BTI_PLT:
+ case DT_AARCH64_PAC_PLT:
+ OS << Value;
+ return;
+ default:
+ break;
+ }
+ break;
+ case EM_HEXAGON:
+ switch (Type) {
+ case DT_HEXAGON_VER:
+ OS << Value;
+ return;
+ case DT_HEXAGON_SYMSZ:
+ case DT_HEXAGON_PLT:
+ OS << format(ConvChar, Value);
+ return;
+ default:
+ break;
+ }
+ break;
+ case EM_MIPS:
+ switch (Type) {
+ case DT_MIPS_RLD_VERSION:
+ case DT_MIPS_LOCAL_GOTNO:
+ case DT_MIPS_SYMTABNO:
+ case DT_MIPS_UNREFEXTNO:
+ OS << Value;
+ return;
+ case DT_MIPS_TIME_STAMP:
+ case DT_MIPS_ICHECKSUM:
+ case DT_MIPS_IVERSION:
+ case DT_MIPS_BASE_ADDRESS:
+ case DT_MIPS_MSYM:
+ case DT_MIPS_CONFLICT:
+ case DT_MIPS_LIBLIST:
+ case DT_MIPS_CONFLICTNO:
+ case DT_MIPS_LIBLISTNO:
+ case DT_MIPS_GOTSYM:
+ case DT_MIPS_HIPAGENO:
+ case DT_MIPS_RLD_MAP:
+ case DT_MIPS_DELTA_CLASS:
+ case DT_MIPS_DELTA_CLASS_NO:
+ case DT_MIPS_DELTA_INSTANCE:
+ case DT_MIPS_DELTA_RELOC:
+ case DT_MIPS_DELTA_RELOC_NO:
+ case DT_MIPS_DELTA_SYM:
+ case DT_MIPS_DELTA_SYM_NO:
+ case DT_MIPS_DELTA_CLASSSYM:
+ case DT_MIPS_DELTA_CLASSSYM_NO:
+ case DT_MIPS_CXX_FLAGS:
+ case DT_MIPS_PIXIE_INIT:
+ case DT_MIPS_SYMBOL_LIB:
+ case DT_MIPS_LOCALPAGE_GOTIDX:
+ case DT_MIPS_LOCAL_GOTIDX:
+ case DT_MIPS_HIDDEN_GOTIDX:
+ case DT_MIPS_PROTECTED_GOTIDX:
+ case DT_MIPS_OPTIONS:
+ case DT_MIPS_INTERFACE:
+ case DT_MIPS_DYNSTR_ALIGN:
+ case DT_MIPS_INTERFACE_SIZE:
+ case DT_MIPS_RLD_TEXT_RESOLVE_ADDR:
+ case DT_MIPS_PERF_SUFFIX:
+ case DT_MIPS_COMPACT_SIZE:
+ case DT_MIPS_GP_VALUE:
+ case DT_MIPS_AUX_DYNAMIC:
+ case DT_MIPS_PLTGOT:
+ case DT_MIPS_RWPLT:
+ case DT_MIPS_RLD_MAP_REL:
+ OS << format(ConvChar, Value);
+ return;
+ case DT_MIPS_FLAGS:
+ printFlags(Value, makeArrayRef(ElfDynamicDTMipsFlags), OS);
+ return;
+ default:
+ break;
+ }
+ break;
+ default:
+ break;
+ }
-template <class ELFT>
-void ELFDumper<ELFT>::printValue(uint64_t Type, uint64_t Value) {
- raw_ostream &OS = W.getOStream();
- const char* ConvChar = (opts::Output == opts::GNU) ? "0x%" PRIx64 : "0x%" PRIX64;
switch (Type) {
case DT_PLTREL:
if (Value == DT_REL) {
@@ -1818,22 +1915,12 @@ void ELFDumper<ELFT>::printValue(uint64_t Type, uint64_t Value) {
case DT_VERSYM:
case DT_GNU_HASH:
case DT_NULL:
- case DT_MIPS_BASE_ADDRESS:
- case DT_MIPS_GOTSYM:
- case DT_MIPS_RLD_MAP:
- case DT_MIPS_RLD_MAP_REL:
- case DT_MIPS_PLTGOT:
- case DT_MIPS_OPTIONS:
OS << format(ConvChar, Value);
break;
case DT_RELACOUNT:
case DT_RELCOUNT:
case DT_VERDEFNUM:
case DT_VERNEEDNUM:
- case DT_MIPS_RLD_VERSION:
- case DT_MIPS_LOCAL_GOTNO:
- case DT_MIPS_SYMTABNO:
- case DT_MIPS_UNREFEXTNO:
OS << Value;
break;
case DT_PLTRELSZ:
@@ -1851,24 +1938,30 @@ void ELFDumper<ELFT>::printValue(uint64_t Type, uint64_t Value) {
OS << Value << " (bytes)";
break;
case DT_NEEDED:
- printLibrary(OS, "Shared library", getDynamicString(Value));
- break;
case DT_SONAME:
- printLibrary(OS, "Library soname", getDynamicString(Value));
- break;
case DT_AUXILIARY:
- printLibrary(OS, "Auxiliary library", getDynamicString(Value));
- break;
+ case DT_USED:
case DT_FILTER:
- printLibrary(OS, "Filter library", getDynamicString(Value));
- break;
case DT_RPATH:
- case DT_RUNPATH:
- OS << getDynamicString(Value);
- break;
- case DT_MIPS_FLAGS:
- printFlags(Value, makeArrayRef(ElfDynamicDTMipsFlags), OS);
+ case DT_RUNPATH: {
+ const std::map<uint64_t, const char*> TagNames = {
+ {DT_NEEDED, "Shared library"},
+ {DT_SONAME, "Library soname"},
+ {DT_AUXILIARY, "Auxiliary library"},
+ {DT_USED, "Not needed object"},
+ {DT_FILTER, "Filter library"},
+ {DT_RPATH, "Library rpath"},
+ {DT_RUNPATH, "Library runpath"},
+ };
+ OS << TagNames.at(Type) << ": ";
+ if (DynamicStringTable.empty())
+ OS << "<String table is empty or was not found> ";
+ else if (Value < DynamicStringTable.size())
+ OS << "[" << StringRef(DynamicStringTable.data() + Value) << "]";
+ else
+ OS << "<Invalid offset 0x" << utohexstr(Value) << ">";
break;
+ }
case DT_FLAGS:
printFlags(Value, makeArrayRef(ElfDynamicDTFlags), OS);
break;
@@ -1881,14 +1974,9 @@ void ELFDumper<ELFT>::printValue(uint64_t Type, uint64_t Value) {
}
}
-template<class ELFT>
-void ELFDumper<ELFT>::printUnwindInfo() {
- const unsigned Machine = ObjF->getELFFile()->getHeader()->e_machine;
- if (Machine == EM_386 || Machine == EM_X86_64) {
- DwarfCFIEH::PrinterContext<ELFT> Ctx(W, ObjF);
- return Ctx.printUnwindInformation();
- }
- W.startLine() << "UnwindInfo not implemented.\n";
+template <class ELFT> void ELFDumper<ELFT>::printUnwindInfo() {
+ DwarfCFIEH::PrinterContext<ELFT> Ctx(W, ObjF);
+ Ctx.printUnwindInformation();
}
namespace {
@@ -1898,73 +1986,40 @@ template <> void ELFDumper<ELF32LE>::printUnwindInfo() {
const unsigned Machine = Obj->getHeader()->e_machine;
if (Machine == EM_ARM) {
ARM::EHABI::PrinterContext<ELF32LE> Ctx(W, Obj, DotSymtabSec);
- return Ctx.PrintUnwindInformation();
+ Ctx.PrintUnwindInformation();
}
- W.startLine() << "UnwindInfo not implemented.\n";
+ DwarfCFIEH::PrinterContext<ELF32LE> Ctx(W, ObjF);
+ Ctx.printUnwindInformation();
}
} // end anonymous namespace
-template<class ELFT>
-void ELFDumper<ELFT>::printDynamicTable() {
- auto I = dynamic_table().begin();
- auto E = dynamic_table().end();
-
- if (I == E)
- return;
-
- --E;
- while (I != E && E->getTag() == ELF::DT_NULL)
- --E;
- if (E->getTag() != ELF::DT_NULL)
- ++E;
- ++E;
-
- ptrdiff_t Total = std::distance(I, E);
- if (Total == 0)
- return;
-
- raw_ostream &OS = W.getOStream();
- W.startLine() << "DynamicSection [ (" << Total << " entries)\n";
-
- bool Is64 = ELFT::Is64Bits;
-
- W.startLine()
- << " Tag" << (Is64 ? " " : " ") << "Type"
- << " " << "Name/Value\n";
- while (I != E) {
- const Elf_Dyn &Entry = *I;
- uintX_t Tag = Entry.getTag();
- ++I;
- W.startLine() << " " << format_hex(Tag, Is64 ? 18 : 10, opts::Output != opts::GNU) << " "
- << format("%-21s", getTypeString(ObjF->getELFFile()->getHeader()->e_machine, Tag));
- printValue(Tag, Entry.getVal());
- OS << "\n";
- }
-
- W.startLine() << "]\n";
+template <class ELFT> void ELFDumper<ELFT>::printDynamicTable() {
+ ELFDumperStyle->printDynamic(ObjF->getELFFile());
}
-template<class ELFT>
-void ELFDumper<ELFT>::printNeededLibraries() {
+template <class ELFT> void ELFDumper<ELFT>::printNeededLibraries() {
ListScope D(W, "NeededLibraries");
using LibsTy = std::vector<StringRef>;
LibsTy Libs;
for (const auto &Entry : dynamic_table())
- if (Entry.d_tag == ELF::DT_NEEDED)
- Libs.push_back(getDynamicString(Entry.d_un.d_val));
+ if (Entry.d_tag == ELF::DT_NEEDED) {
+ uint64_t Value = Entry.d_un.d_val;
+ if (Value < DynamicStringTable.size())
+ Libs.push_back(StringRef(DynamicStringTable.data() + Value));
+ else
+ Libs.push_back("<Library name index out of range>");
+ }
- std::stable_sort(Libs.begin(), Libs.end());
+ llvm::stable_sort(Libs);
for (const auto &L : Libs)
- W.startLine() << L << "\n";
+ W.startLine() << L << "\n";
}
-
-template <typename ELFT>
-void ELFDumper<ELFT>::printHashTable() {
+template <typename ELFT> void ELFDumper<ELFT>::printHashTable() {
DictScope D(W, "HashTable");
if (!HashTable)
return;
@@ -1974,8 +2029,7 @@ void ELFDumper<ELFT>::printHashTable() {
W.printList("Chains", HashTable->chains());
}
-template <typename ELFT>
-void ELFDumper<ELFT>::printGnuHashTable() {
+template <typename ELFT> void ELFDumper<ELFT>::printGnuHashTable() {
DictScope D(W, "GnuHashTable");
if (!GnuHashTable)
return;
@@ -1996,8 +2050,7 @@ template <typename ELFT> void ELFDumper<ELFT>::printLoadName() {
W.printString("LoadName", SOName);
}
-template <class ELFT>
-void ELFDumper<ELFT>::printAttributes() {
+template <class ELFT> void ELFDumper<ELFT>::printAttributes() {
W.startLine() << "Attributes not implemented.\n";
}
@@ -2486,7 +2539,7 @@ template <class ELFT> void ELFDumper<ELFT>::printStackMap() const {
unwrapOrError(Obj->getSectionContents(StackMapSection));
prettyPrintStackMap(
- W, StackMapV2Parser<ELFT::TargetEndianness>(StackMapContentsArray));
+ W, StackMapParser<ELFT::TargetEndianness>(StackMapContentsArray));
}
template <class ELFT> void ELFDumper<ELFT>::printGroupSections() {
@@ -2527,7 +2580,8 @@ static std::string getSectionHeaderTableIndexString(const ELFFile<ELFT> *Obj) {
ArrayRef<typename ELFT::Shdr> Arr = unwrapOrError(Obj->sections());
if (Arr.empty())
return "65535 (corrupt: out of range)";
- return to_string(ElfHeader->e_shstrndx) + " (" + to_string(Arr[0].sh_link) + ")";
+ return to_string(ElfHeader->e_shstrndx) + " (" + to_string(Arr[0].sh_link) +
+ ")";
}
template <class ELFT> void GNUStyle<ELFT>::printFileHeaders(const ELFO *Obj) {
@@ -2599,7 +2653,7 @@ struct GroupMember {
struct GroupSection {
StringRef Name;
- StringRef Signature;
+ std::string Signature;
uint64_t ShName;
uint64_t Index;
uint32_t Link;
@@ -2630,13 +2684,13 @@ std::vector<GroupSection> getGroups(const ELFFile<ELFT> *Obj) {
StringRef Name = unwrapOrError(Obj->getSectionName(&Sec));
StringRef Signature = StrTable.data() + Sym->st_name;
- Ret.push_back({Name,
- Signature,
- Sec.sh_name,
+ Ret.push_back({Name,
+ maybeDemangle(Signature),
+ Sec.sh_name,
I - 1,
Sec.sh_link,
Sec.sh_info,
- Data[0],
+ Data[0],
{}});
std::vector<GroupMember> &GM = Ret.back().Members;
@@ -2691,53 +2745,57 @@ template <class ELFT> void GNUStyle<ELFT>::printGroupSections(const ELFO *Obj) {
template <class ELFT>
void GNUStyle<ELFT>::printRelocation(const ELFO *Obj, const Elf_Shdr *SymTab,
const Elf_Rela &R, bool IsRela) {
- std::string Offset, Info, Addend, Value;
- SmallString<32> RelocName;
- StringRef TargetName;
- const Elf_Sym *Sym = nullptr;
- unsigned Width = ELFT::Is64Bits ? 16 : 8;
- unsigned Bias = ELFT::Is64Bits ? 8 : 0;
-
- // First two fields are bit width dependent. The rest of them are after are
- // fixed width.
- Field Fields[5] = {0, 10 + Bias, 19 + 2 * Bias, 42 + 2 * Bias, 53 + 2 * Bias};
- Obj->getRelocationTypeName(R.getType(Obj->isMips64EL()), RelocName);
- Sym = unwrapOrError(Obj->getRelocationSymbol(&R, SymTab));
+ const Elf_Sym *Sym = unwrapOrError(Obj->getRelocationSymbol(&R, SymTab));
+ std::string TargetName;
if (Sym && Sym->getType() == ELF::STT_SECTION) {
const Elf_Shdr *Sec = unwrapOrError(
Obj->getSection(Sym, SymTab, this->dumper()->getShndxTable()));
TargetName = unwrapOrError(Obj->getSectionName(Sec));
} else if (Sym) {
StringRef StrTable = unwrapOrError(Obj->getStringTableForSymtab(*SymTab));
- TargetName = unwrapOrError(Sym->getName(StrTable));
+ TargetName = this->dumper()->getFullSymbolName(
+ Sym, StrTable, SymTab->sh_type == SHT_DYNSYM /* IsDynamic */);
}
+ printRelocation(Obj, Sym, TargetName, R, IsRela);
+}
- if (Sym && IsRela) {
- if (R.r_addend < 0)
- Addend = " - ";
- else
- Addend = " + ";
- }
+template <class ELFT>
+void GNUStyle<ELFT>::printRelocation(const ELFO *Obj, const Elf_Sym *Sym,
+ StringRef SymbolName, const Elf_Rela &R,
+ bool IsRela) {
+ // First two fields are bit width dependent. The rest of them are fixed width.
+ unsigned Bias = ELFT::Is64Bits ? 8 : 0;
+ Field Fields[5] = {0, 10 + Bias, 19 + 2 * Bias, 42 + 2 * Bias, 53 + 2 * Bias};
+ unsigned Width = ELFT::Is64Bits ? 16 : 8;
- Offset = to_string(format_hex_no_prefix(R.r_offset, Width));
- Info = to_string(format_hex_no_prefix(R.r_info, Width));
+ Fields[0].Str = to_string(format_hex_no_prefix(R.r_offset, Width));
+ Fields[1].Str = to_string(format_hex_no_prefix(R.r_info, Width));
- int64_t RelAddend = R.r_addend;
- if (IsRela)
- Addend += to_hexString(std::abs(RelAddend), false);
-
- if (Sym)
- Value = to_string(format_hex_no_prefix(Sym->getValue(), Width));
-
- Fields[0].Str = Offset;
- Fields[1].Str = Info;
- Fields[2].Str = RelocName;
- Fields[3].Str = Value;
- Fields[4].Str = TargetName;
- for (auto &field : Fields)
- printField(field);
- OS << Addend;
- OS << "\n";
+ SmallString<32> RelocName;
+ Obj->getRelocationTypeName(R.getType(Obj->isMips64EL()), RelocName);
+ Fields[2].Str = RelocName.c_str();
+
+ if (Sym && (!SymbolName.empty() || Sym->getValue() != 0))
+ Fields[3].Str = to_string(format_hex_no_prefix(Sym->getValue(), Width));
+
+ Fields[4].Str = SymbolName;
+ for (const Field &F : Fields)
+ printField(F);
+
+ std::string Addend;
+ if (IsRela) {
+ int64_t RelAddend = R.r_addend;
+ if (!SymbolName.empty()) {
+ if (R.r_addend < 0) {
+ Addend = " - ";
+ RelAddend = std::abs(RelAddend);
+ } else
+ Addend = " + ";
+ }
+
+ Addend += to_hexString(RelAddend, false);
+ }
+ OS << Addend << "\n";
}
template <class ELFT> void GNUStyle<ELFT>::printRelocHeader(unsigned SType) {
@@ -2764,10 +2822,8 @@ template <class ELFT> void GNUStyle<ELFT>::printRelocHeader(unsigned SType) {
template <class ELFT> void GNUStyle<ELFT>::printRelocations(const ELFO *Obj) {
bool HasRelocSections = false;
for (const Elf_Shdr &Sec : unwrapOrError(Obj->sections())) {
- if (Sec.sh_type != ELF::SHT_REL &&
- Sec.sh_type != ELF::SHT_RELA &&
- Sec.sh_type != ELF::SHT_RELR &&
- Sec.sh_type != ELF::SHT_ANDROID_REL &&
+ if (Sec.sh_type != ELF::SHT_REL && Sec.sh_type != ELF::SHT_RELA &&
+ Sec.sh_type != ELF::SHT_RELR && Sec.sh_type != ELF::SHT_ANDROID_REL &&
Sec.sh_type != ELF::SHT_ANDROID_RELA &&
Sec.sh_type != ELF::SHT_ANDROID_RELR)
continue;
@@ -2832,7 +2888,21 @@ template <class ELFT> void GNUStyle<ELFT>::printRelocations(const ELFO *Obj) {
OS << "\nThere are no relocations in this file.\n";
}
-std::string getSectionTypeString(unsigned Arch, unsigned Type) {
+// Print the offset of a particular section from anyone of the ranges:
+// [SHT_LOOS, SHT_HIOS], [SHT_LOPROC, SHT_HIPROC], [SHT_LOUSER, SHT_HIUSER].
+// If 'Type' does not fall within any of those ranges, then a string is
+// returned as '<unknown>' followed by the type value.
+static std::string getSectionTypeOffsetString(unsigned Type) {
+ if (Type >= SHT_LOOS && Type <= SHT_HIOS)
+ return "LOOS+0x" + to_hexString(Type - SHT_LOOS);
+ else if (Type >= SHT_LOPROC && Type <= SHT_HIPROC)
+ return "LOPROC+0x" + to_hexString(Type - SHT_LOPROC);
+ else if (Type >= SHT_LOUSER && Type <= SHT_HIUSER)
+ return "LOUSER+0x" + to_hexString(Type - SHT_LOUSER);
+ return "0x" + to_hexString(Type) + ": <unknown>";
+}
+
+static std::string getSectionTypeString(unsigned Arch, unsigned Type) {
using namespace ELF;
switch (Arch) {
@@ -2863,10 +2933,10 @@ std::string getSectionTypeString(unsigned Arch, unsigned Type) {
return "MIPS_REGINFO";
case SHT_MIPS_OPTIONS:
return "MIPS_OPTIONS";
+ case SHT_MIPS_DWARF:
+ return "MIPS_DWARF";
case SHT_MIPS_ABIFLAGS:
return "MIPS_ABIFLAGS";
- case SHT_MIPS_DWARF:
- return "SHT_MIPS_DWARF";
}
break;
}
@@ -2905,6 +2975,10 @@ std::string getSectionTypeString(unsigned Arch, unsigned Type) {
return "GROUP";
case SHT_SYMTAB_SHNDX:
return "SYMTAB SECTION INDICES";
+ case SHT_ANDROID_REL:
+ return "ANDROID_REL";
+ case SHT_ANDROID_RELA:
+ return "ANDROID_RELA";
case SHT_RELR:
case SHT_ANDROID_RELR:
return "RELR";
@@ -2916,6 +2990,8 @@ std::string getSectionTypeString(unsigned Arch, unsigned Type) {
return "LLVM_CALL_GRAPH_PROFILE";
case SHT_LLVM_ADDRSIG:
return "LLVM_ADDRSIG";
+ case SHT_LLVM_DEPENDENT_LIBRARIES:
+ return "LLVM_DEPENDENT_LIBRARIES";
// FIXME: Parse processor specific GNU attributes
case SHT_GNU_ATTRIBUTES:
return "ATTRIBUTES";
@@ -2928,69 +3004,65 @@ std::string getSectionTypeString(unsigned Arch, unsigned Type) {
case SHT_GNU_versym:
return "VERSYM";
default:
- return "";
+ return getSectionTypeOffsetString(Type);
}
return "";
}
template <class ELFT>
-void GNUStyle<ELFT>::printSectionHeaders(const ELFO *Obj) {
- size_t SectionIndex = 0;
- std::string Number, Type, Size, Address, Offset, Flags, Link, Info, EntrySize,
- Alignment;
- unsigned Bias;
- unsigned Width;
-
- if (ELFT::Is64Bits) {
- Bias = 0;
- Width = 16;
- } else {
- Bias = 8;
- Width = 8;
- }
+static StringRef getSectionName(const typename ELFT::Shdr &Sec,
+ const ELFObjectFile<ELFT> &ElfObj,
+ ArrayRef<typename ELFT::Shdr> Sections) {
+ const ELFFile<ELFT> &Obj = *ElfObj.getELFFile();
+ uint32_t Index = Obj.getHeader()->e_shstrndx;
+ if (Index == ELF::SHN_XINDEX)
+ Index = Sections[0].sh_link;
+ if (!Index) // no section string table.
+ return "";
+ // TODO: Test a case when the sh_link of the section with index 0 is broken.
+ if (Index >= Sections.size())
+ reportError(ElfObj.getFileName(),
+ createError("section header string table index " +
+ Twine(Index) + " does not exist"));
+ StringRef Data = toStringRef(unwrapOrError(
+ Obj.template getSectionContentsAsArray<uint8_t>(&Sections[Index])));
+ return unwrapOrError(Obj.getSectionName(&Sec, Data));
+}
+template <class ELFT>
+void GNUStyle<ELFT>::printSectionHeaders(const ELFO *Obj) {
+ unsigned Bias = ELFT::Is64Bits ? 0 : 8;
ArrayRef<Elf_Shdr> Sections = unwrapOrError(Obj->sections());
OS << "There are " << to_string(Sections.size())
<< " section headers, starting at offset "
<< "0x" << to_hexString(Obj->getHeader()->e_shoff, false) << ":\n\n";
OS << "Section Headers:\n";
- Field Fields[11] = {{"[Nr]", 2},
- {"Name", 7},
- {"Type", 25},
- {"Address", 41},
- {"Off", 58 - Bias},
- {"Size", 65 - Bias},
- {"ES", 72 - Bias},
- {"Flg", 75 - Bias},
- {"Lk", 79 - Bias},
- {"Inf", 82 - Bias},
- {"Al", 86 - Bias}};
- for (auto &f : Fields)
- printField(f);
+ Field Fields[11] = {
+ {"[Nr]", 2}, {"Name", 7}, {"Type", 25},
+ {"Address", 41}, {"Off", 58 - Bias}, {"Size", 65 - Bias},
+ {"ES", 72 - Bias}, {"Flg", 75 - Bias}, {"Lk", 79 - Bias},
+ {"Inf", 82 - Bias}, {"Al", 86 - Bias}};
+ for (auto &F : Fields)
+ printField(F);
OS << "\n";
+ const ELFObjectFile<ELFT> *ElfObj = this->dumper()->getElfObject();
+ size_t SectionIndex = 0;
for (const Elf_Shdr &Sec : Sections) {
- Number = to_string(SectionIndex);
- Fields[0].Str = Number;
- Fields[1].Str = unwrapOrError(Obj->getSectionName(&Sec));
- Type = getSectionTypeString(Obj->getHeader()->e_machine, Sec.sh_type);
- Fields[2].Str = Type;
- Address = to_string(format_hex_no_prefix(Sec.sh_addr, Width));
- Fields[3].Str = Address;
- Offset = to_string(format_hex_no_prefix(Sec.sh_offset, 6));
- Fields[4].Str = Offset;
- Size = to_string(format_hex_no_prefix(Sec.sh_size, 6));
- Fields[5].Str = Size;
- EntrySize = to_string(format_hex_no_prefix(Sec.sh_entsize, 2));
- Fields[6].Str = EntrySize;
- Flags = getGNUFlags(Sec.sh_flags);
- Fields[7].Str = Flags;
- Link = to_string(Sec.sh_link);
- Fields[8].Str = Link;
- Info = to_string(Sec.sh_info);
- Fields[9].Str = Info;
- Alignment = to_string(Sec.sh_addralign);
- Fields[10].Str = Alignment;
+ Fields[0].Str = to_string(SectionIndex);
+ Fields[1].Str = getSectionName(Sec, *ElfObj, Sections);
+ Fields[2].Str =
+ getSectionTypeString(Obj->getHeader()->e_machine, Sec.sh_type);
+ Fields[3].Str =
+ to_string(format_hex_no_prefix(Sec.sh_addr, ELFT::Is64Bits ? 16 : 8));
+ Fields[4].Str = to_string(format_hex_no_prefix(Sec.sh_offset, 6));
+ Fields[5].Str = to_string(format_hex_no_prefix(Sec.sh_size, 6));
+ Fields[6].Str = to_string(format_hex_no_prefix(Sec.sh_entsize, 2));
+ Fields[7].Str = getGNUFlags(Sec.sh_flags);
+ Fields[8].Str = to_string(Sec.sh_link);
+ Fields[9].Str = to_string(Sec.sh_info);
+ Fields[10].Str = to_string(Sec.sh_addralign);
+
OS.PadToColumn(Fields[0].Column);
OS << "[" << right_justify(Fields[0].Str, 2) << "]";
for (int i = 1; i < 7; i++)
@@ -3043,9 +3115,10 @@ std::string GNUStyle<ELFT>::getSymbolSectionNdx(const ELFO *Obj,
case ELF::SHN_COMMON:
return "COM";
case ELF::SHN_XINDEX:
- SectionIndex = unwrapOrError(object::getExtendedSymbolTableIndex<ELFT>(
- Symbol, FirstSym, this->dumper()->getShndxTable()));
- LLVM_FALLTHROUGH;
+ return to_string(
+ format_decimal(unwrapOrError(object::getExtendedSymbolTableIndex<ELFT>(
+ Symbol, FirstSym, this->dumper()->getShndxTable())),
+ 3));
default:
// Find if:
// Processor specific
@@ -3072,7 +3145,6 @@ void GNUStyle<ELFT>::printSymbol(const ELFO *Obj, const Elf_Sym *Symbol,
bool IsDynamic) {
static int Idx = 0;
static bool Dynamic = true;
- size_t Width;
// If this function was called with a different value from IsDynamic
// from last call, happens when we move from dynamic to static symbol
@@ -3081,111 +3153,87 @@ void GNUStyle<ELFT>::printSymbol(const ELFO *Obj, const Elf_Sym *Symbol,
Idx = 0;
Dynamic = false;
}
- std::string Num, Name, Value, Size, Binding, Type, Visibility, Section;
- unsigned Bias = 0;
- if (ELFT::Is64Bits) {
- Bias = 8;
- Width = 16;
- } else {
- Bias = 0;
- Width = 8;
- }
+
+ unsigned Bias = ELFT::Is64Bits ? 8 : 0;
Field Fields[8] = {0, 8, 17 + Bias, 23 + Bias,
31 + Bias, 38 + Bias, 47 + Bias, 51 + Bias};
- Num = to_string(format_decimal(Idx++, 6)) + ":";
- Value = to_string(format_hex_no_prefix(Symbol->st_value, Width));
- Size = to_string(format_decimal(Symbol->st_size, 5));
+ Fields[0].Str = to_string(format_decimal(Idx++, 6)) + ":";
+ Fields[1].Str = to_string(
+ format_hex_no_prefix(Symbol->st_value, ELFT::Is64Bits ? 16 : 8));
+ Fields[2].Str = to_string(format_decimal(Symbol->st_size, 5));
+
unsigned char SymbolType = Symbol->getType();
if (Obj->getHeader()->e_machine == ELF::EM_AMDGPU &&
SymbolType >= ELF::STT_LOOS && SymbolType < ELF::STT_HIOS)
- Type = printEnum(SymbolType, makeArrayRef(AMDGPUSymbolTypes));
+ Fields[3].Str = printEnum(SymbolType, makeArrayRef(AMDGPUSymbolTypes));
else
- Type = printEnum(SymbolType, makeArrayRef(ElfSymbolTypes));
- unsigned Vis = Symbol->getVisibility();
- Binding = printEnum(Symbol->getBinding(), makeArrayRef(ElfSymbolBindings));
- Visibility = printEnum(Vis, makeArrayRef(ElfSymbolVisibilities));
- Section = getSymbolSectionNdx(Obj, Symbol, FirstSym);
- Name = this->dumper()->getFullSymbolName(Symbol, StrTable, IsDynamic);
- Fields[0].Str = Num;
- Fields[1].Str = Value;
- Fields[2].Str = Size;
- Fields[3].Str = Type;
- Fields[4].Str = Binding;
- Fields[5].Str = Visibility;
- Fields[6].Str = Section;
- Fields[7].Str = Name;
+ Fields[3].Str = printEnum(SymbolType, makeArrayRef(ElfSymbolTypes));
+
+ Fields[4].Str =
+ printEnum(Symbol->getBinding(), makeArrayRef(ElfSymbolBindings));
+ Fields[5].Str =
+ printEnum(Symbol->getVisibility(), makeArrayRef(ElfSymbolVisibilities));
+ Fields[6].Str = getSymbolSectionNdx(Obj, Symbol, FirstSym);
+ Fields[7].Str =
+ this->dumper()->getFullSymbolName(Symbol, StrTable, IsDynamic);
for (auto &Entry : Fields)
printField(Entry);
OS << "\n";
}
+
template <class ELFT>
void GNUStyle<ELFT>::printHashedSymbol(const ELFO *Obj, const Elf_Sym *FirstSym,
uint32_t Sym, StringRef StrTable,
uint32_t Bucket) {
- std::string Num, Buc, Name, Value, Size, Binding, Type, Visibility, Section;
- unsigned Width, Bias = 0;
- if (ELFT::Is64Bits) {
- Bias = 8;
- Width = 16;
- } else {
- Bias = 0;
- Width = 8;
- }
+ unsigned Bias = ELFT::Is64Bits ? 8 : 0;
Field Fields[9] = {0, 6, 11, 20 + Bias, 25 + Bias,
34 + Bias, 41 + Bias, 49 + Bias, 53 + Bias};
- Num = to_string(format_decimal(Sym, 5));
- Buc = to_string(format_decimal(Bucket, 3)) + ":";
+ Fields[0].Str = to_string(format_decimal(Sym, 5));
+ Fields[1].Str = to_string(format_decimal(Bucket, 3)) + ":";
const auto Symbol = FirstSym + Sym;
- Value = to_string(format_hex_no_prefix(Symbol->st_value, Width));
- Size = to_string(format_decimal(Symbol->st_size, 5));
+ Fields[2].Str = to_string(
+ format_hex_no_prefix(Symbol->st_value, ELFT::Is64Bits ? 18 : 8));
+ Fields[3].Str = to_string(format_decimal(Symbol->st_size, 5));
+
unsigned char SymbolType = Symbol->getType();
if (Obj->getHeader()->e_machine == ELF::EM_AMDGPU &&
SymbolType >= ELF::STT_LOOS && SymbolType < ELF::STT_HIOS)
- Type = printEnum(SymbolType, makeArrayRef(AMDGPUSymbolTypes));
+ Fields[4].Str = printEnum(SymbolType, makeArrayRef(AMDGPUSymbolTypes));
else
- Type = printEnum(SymbolType, makeArrayRef(ElfSymbolTypes));
- unsigned Vis = Symbol->getVisibility();
- Binding = printEnum(Symbol->getBinding(), makeArrayRef(ElfSymbolBindings));
- Visibility = printEnum(Vis, makeArrayRef(ElfSymbolVisibilities));
- Section = getSymbolSectionNdx(Obj, Symbol, FirstSym);
- Name = this->dumper()->getFullSymbolName(Symbol, StrTable, true);
- Fields[0].Str = Num;
- Fields[1].Str = Buc;
- Fields[2].Str = Value;
- Fields[3].Str = Size;
- Fields[4].Str = Type;
- Fields[5].Str = Binding;
- Fields[6].Str = Visibility;
- Fields[7].Str = Section;
- Fields[8].Str = Name;
+ Fields[4].Str = printEnum(SymbolType, makeArrayRef(ElfSymbolTypes));
+
+ Fields[5].Str =
+ printEnum(Symbol->getBinding(), makeArrayRef(ElfSymbolBindings));
+ Fields[6].Str =
+ printEnum(Symbol->getVisibility(), makeArrayRef(ElfSymbolVisibilities));
+ Fields[7].Str = getSymbolSectionNdx(Obj, Symbol, FirstSym);
+ Fields[8].Str = this->dumper()->getFullSymbolName(Symbol, StrTable, true);
+
for (auto &Entry : Fields)
printField(Entry);
OS << "\n";
}
-template <class ELFT> void GNUStyle<ELFT>::printSymbols(const ELFO *Obj) {
- if (opts::DynamicSymbols)
+template <class ELFT>
+void GNUStyle<ELFT>::printSymbols(const ELFO *Obj, bool PrintSymbols,
+ bool PrintDynamicSymbols) {
+ if (!PrintSymbols && !PrintDynamicSymbols)
return;
+ // GNU readelf prints both the .dynsym and .symtab with --symbols.
this->dumper()->printSymbolsHelper(true);
- this->dumper()->printSymbolsHelper(false);
+ if (PrintSymbols)
+ this->dumper()->printSymbolsHelper(false);
}
-template <class ELFT>
-void GNUStyle<ELFT>::printDynamicSymbols(const ELFO *Obj) {
+template <class ELFT> void GNUStyle<ELFT>::printHashSymbols(const ELFO *Obj) {
if (this->dumper()->getDynamicStringTable().empty())
return;
auto StringTable = this->dumper()->getDynamicStringTable();
auto DynSyms = this->dumper()->dynamic_symbols();
- auto GnuHash = this->dumper()->getGnuHashTable();
- auto SysVHash = this->dumper()->getHashTable();
-
- // If no hash or .gnu.hash found, try using symbol table
- if (GnuHash == nullptr && SysVHash == nullptr)
- this->dumper()->printSymbolsHelper(true);
// Try printing .hash
- if (this->dumper()->getHashTable()) {
+ if (auto SysVHash = this->dumper()->getHashTable()) {
OS << "\n Symbol table of .hash for image:\n";
if (ELFT::Is64Bits)
OS << " Num Buc: Value Size Type Bind Vis Ndx Name";
@@ -3193,14 +3241,12 @@ void GNUStyle<ELFT>::printDynamicSymbols(const ELFO *Obj) {
OS << " Num Buc: Value Size Type Bind Vis Ndx Name";
OS << "\n";
- uint32_t NBuckets = SysVHash->nbucket;
- uint32_t NChains = SysVHash->nchain;
auto Buckets = SysVHash->buckets();
auto Chains = SysVHash->chains();
- for (uint32_t Buc = 0; Buc < NBuckets; Buc++) {
+ for (uint32_t Buc = 0; Buc < SysVHash->nbucket; Buc++) {
if (Buckets[Buc] == ELF::STN_UNDEF)
continue;
- for (uint32_t Ch = Buckets[Buc]; Ch < NChains; Ch = Chains[Ch]) {
+ for (uint32_t Ch = Buckets[Buc]; Ch < SysVHash->nchain; Ch = Chains[Ch]) {
if (Ch == ELF::STN_UNDEF)
break;
printHashedSymbol(Obj, &DynSyms[0], Ch, StringTable, Buc);
@@ -3209,16 +3255,15 @@ void GNUStyle<ELFT>::printDynamicSymbols(const ELFO *Obj) {
}
// Try printing .gnu.hash
- if (GnuHash) {
+ if (auto GnuHash = this->dumper()->getGnuHashTable()) {
OS << "\n Symbol table of .gnu.hash for image:\n";
if (ELFT::Is64Bits)
OS << " Num Buc: Value Size Type Bind Vis Ndx Name";
else
OS << " Num Buc: Value Size Type Bind Vis Ndx Name";
OS << "\n";
- uint32_t NBuckets = GnuHash->nbuckets;
auto Buckets = GnuHash->buckets();
- for (uint32_t Buc = 0; Buc < NBuckets; Buc++) {
+ for (uint32_t Buc = 0; Buc < GnuHash->nbuckets; Buc++) {
if (Buckets[Buc] == ELF::STN_UNDEF)
continue;
uint32_t Index = Buckets[Buc];
@@ -3266,8 +3311,8 @@ bool GNUStyle<ELFT>::checkoffsets(const Elf_Phdr &Phdr, const Elf_Shdr &Sec) {
(IsSpecial && Phdr.p_type != ELF::PT_TLS) ? 0 : Sec.sh_size;
if (Sec.sh_offset >= Phdr.p_offset)
return ((Sec.sh_offset + SectionSize <= Phdr.p_filesz + Phdr.p_offset)
- /*only non-zero sized sections at end*/ &&
- (Sec.sh_offset + 1 <= Phdr.p_offset + Phdr.p_filesz));
+ /*only non-zero sized sections at end*/
+ && (Sec.sh_offset + 1 <= Phdr.p_offset + Phdr.p_filesz));
return false;
}
@@ -3302,12 +3347,21 @@ bool GNUStyle<ELFT>::checkPTDynamic(const Elf_Phdr &Phdr, const Elf_Shdr &Sec) {
}
template <class ELFT>
+void GNUStyle<ELFT>::printProgramHeaders(
+ const ELFO *Obj, bool PrintProgramHeaders,
+ cl::boolOrDefault PrintSectionMapping) {
+ if (PrintProgramHeaders)
+ printProgramHeaders(Obj);
+
+ // Display the section mapping along with the program headers, unless
+ // -section-mapping is explicitly set to false.
+ if (PrintSectionMapping != cl::BOU_FALSE)
+ printSectionMapping(Obj);
+}
+
+template <class ELFT>
void GNUStyle<ELFT>::printProgramHeaders(const ELFO *Obj) {
unsigned Bias = ELFT::Is64Bits ? 8 : 0;
- unsigned Width = ELFT::Is64Bits ? 18 : 10;
- unsigned SizeWidth = ELFT::Is64Bits ? 8 : 7;
- std::string Type, Offset, VMA, LMA, FileSz, MemSz, Flag, Align;
-
const Elf_Ehdr *Header = Obj->getHeader();
Field Fields[8] = {2, 17, 26, 37 + Bias,
48 + Bias, 56 + Bias, 64 + Bias, 68 + Bias};
@@ -3323,23 +3377,18 @@ void GNUStyle<ELFT>::printProgramHeaders(const ELFO *Obj) {
else
OS << " Type Offset VirtAddr PhysAddr FileSiz "
<< "MemSiz Flg Align\n";
+
+ unsigned Width = ELFT::Is64Bits ? 18 : 10;
+ unsigned SizeWidth = ELFT::Is64Bits ? 8 : 7;
for (const auto &Phdr : unwrapOrError(Obj->program_headers())) {
- Type = getElfPtType(Header->e_machine, Phdr.p_type);
- Offset = to_string(format_hex(Phdr.p_offset, 8));
- VMA = to_string(format_hex(Phdr.p_vaddr, Width));
- LMA = to_string(format_hex(Phdr.p_paddr, Width));
- FileSz = to_string(format_hex(Phdr.p_filesz, SizeWidth));
- MemSz = to_string(format_hex(Phdr.p_memsz, SizeWidth));
- Flag = printPhdrFlags(Phdr.p_flags);
- Align = to_string(format_hex(Phdr.p_align, 1));
- Fields[0].Str = Type;
- Fields[1].Str = Offset;
- Fields[2].Str = VMA;
- Fields[3].Str = LMA;
- Fields[4].Str = FileSz;
- Fields[5].Str = MemSz;
- Fields[6].Str = Flag;
- Fields[7].Str = Align;
+ Fields[0].Str = getElfPtType(Header->e_machine, Phdr.p_type);
+ Fields[1].Str = to_string(format_hex(Phdr.p_offset, 8));
+ Fields[2].Str = to_string(format_hex(Phdr.p_vaddr, Width));
+ Fields[3].Str = to_string(format_hex(Phdr.p_paddr, Width));
+ Fields[4].Str = to_string(format_hex(Phdr.p_filesz, SizeWidth));
+ Fields[5].Str = to_string(format_hex(Phdr.p_memsz, SizeWidth));
+ Fields[6].Str = printPhdrFlags(Phdr.p_flags);
+ Fields[7].Str = to_string(format_hex(Phdr.p_align, 1));
for (auto Field : Fields)
printField(Field);
if (Phdr.p_type == ELF::PT_INTERP) {
@@ -3348,7 +3397,12 @@ void GNUStyle<ELFT>::printProgramHeaders(const ELFO *Obj) {
}
OS << "\n";
}
+}
+
+template <class ELFT>
+void GNUStyle<ELFT>::printSectionMapping(const ELFO *Obj) {
OS << "\n Section to Segment mapping:\n Segment Sections...\n";
+ DenseSet<const Elf_Shdr *> BelongsToSegment;
int Phnum = 0;
for (const Elf_Phdr &Phdr : unwrapOrError(Obj->program_headers())) {
std::string Sections;
@@ -3363,58 +3417,66 @@ void GNUStyle<ELFT>::printProgramHeaders(const ELFO *Obj) {
Phdr.p_type != ELF::PT_TLS;
if (!TbssInNonTLS && checkTLSSections(Phdr, Sec) &&
checkoffsets(Phdr, Sec) && checkVMA(Phdr, Sec) &&
- checkPTDynamic(Phdr, Sec) && (Sec.sh_type != ELF::SHT_NULL))
+ checkPTDynamic(Phdr, Sec) && (Sec.sh_type != ELF::SHT_NULL)) {
Sections += unwrapOrError(Obj->getSectionName(&Sec)).str() + " ";
+ BelongsToSegment.insert(&Sec);
+ }
}
OS << Sections << "\n";
OS.flush();
}
+
+ // Display sections that do not belong to a segment.
+ std::string Sections;
+ for (const Elf_Shdr &Sec : unwrapOrError(Obj->sections())) {
+ if (BelongsToSegment.find(&Sec) == BelongsToSegment.end())
+ Sections += unwrapOrError(Obj->getSectionName(&Sec)).str() + ' ';
+ }
+ if (!Sections.empty()) {
+ OS << " None " << Sections << '\n';
+ OS.flush();
+ }
}
template <class ELFT>
void GNUStyle<ELFT>::printDynamicRelocation(const ELFO *Obj, Elf_Rela R,
bool IsRela) {
- SmallString<32> RelocName;
- StringRef SymbolName;
- unsigned Width = ELFT::Is64Bits ? 16 : 8;
- unsigned Bias = ELFT::Is64Bits ? 8 : 0;
- // First two fields are bit width dependent. The rest of them are after are
- // fixed width.
- Field Fields[5] = {0, 10 + Bias, 19 + 2 * Bias, 42 + 2 * Bias, 53 + 2 * Bias};
-
uint32_t SymIndex = R.getSymbol(Obj->isMips64EL());
const Elf_Sym *Sym = this->dumper()->dynamic_symbols().begin() + SymIndex;
- Obj->getRelocationTypeName(R.getType(Obj->isMips64EL()), RelocName);
- SymbolName =
- unwrapOrError(Sym->getName(this->dumper()->getDynamicStringTable()));
- std::string Addend, Info, Offset, Value;
- Offset = to_string(format_hex_no_prefix(R.r_offset, Width));
- Info = to_string(format_hex_no_prefix(R.r_info, Width));
- Value = to_string(format_hex_no_prefix(Sym->getValue(), Width));
- int64_t RelAddend = R.r_addend;
- if (!SymbolName.empty() && IsRela) {
- if (R.r_addend < 0)
- Addend = " - ";
- else
- Addend = " + ";
- }
+ std::string SymbolName = maybeDemangle(
+ unwrapOrError(Sym->getName(this->dumper()->getDynamicStringTable())));
+ printRelocation(Obj, Sym, SymbolName, R, IsRela);
+}
- if (SymbolName.empty() && Sym->getValue() == 0)
- Value = "";
+template <class ELFT> void GNUStyle<ELFT>::printDynamic(const ELFO *Obj) {
+ Elf_Dyn_Range Table = this->dumper()->dynamic_table();
+ if (Table.empty())
+ return;
- if (IsRela)
- Addend += to_string(format_hex_no_prefix(std::abs(RelAddend), 1));
+ const DynRegionInfo &DynamicTableRegion =
+ this->dumper()->getDynamicTableRegion();
+ OS << "Dynamic section at offset "
+ << format_hex(reinterpret_cast<const uint8_t *>(DynamicTableRegion.Addr) -
+ Obj->base(),
+ 1)
+ << " contains " << Table.size() << " entries:\n";
- Fields[0].Str = Offset;
- Fields[1].Str = Info;
- Fields[2].Str = RelocName.c_str();
- Fields[3].Str = Value;
- Fields[4].Str = SymbolName;
- for (auto &Field : Fields)
- printField(Field);
- OS << Addend;
- OS << "\n";
+ bool Is64 = ELFT::Is64Bits;
+ if (Is64)
+ OS << " Tag Type Name/Value\n";
+ else
+ OS << " Tag Type Name/Value\n";
+ for (auto Entry : Table) {
+ uintX_t Tag = Entry.getTag();
+ std::string TypeString = std::string("(") +
+ getTypeString(Obj->getHeader()->e_machine, Tag) +
+ ")";
+ OS << " " << format_hex(Tag, Is64 ? 18 : 10)
+ << format(" %-20s ", TypeString.c_str());
+ this->dumper()->printDynamicEntry(OS, Tag, Entry.getVal());
+ OS << "\n";
+ }
}
template <class ELFT>
@@ -3427,7 +3489,8 @@ void GNUStyle<ELFT>::printDynamicRelocations(const ELFO *Obj) {
OS << "\n'RELA' relocation section at offset "
<< format_hex(reinterpret_cast<const uint8_t *>(DynRelaRegion.Addr) -
Obj->base(),
- 1) << " contains " << DynRelaRegion.Size << " bytes:\n";
+ 1)
+ << " contains " << DynRelaRegion.Size << " bytes:\n";
printRelocHeader(ELF::SHT_RELA);
for (const Elf_Rela &Rela : this->dumper()->dyn_relas())
printDynamicRelocation(Obj, Rela, true);
@@ -3436,7 +3499,8 @@ void GNUStyle<ELFT>::printDynamicRelocations(const ELFO *Obj) {
OS << "\n'REL' relocation section at offset "
<< format_hex(reinterpret_cast<const uint8_t *>(DynRelRegion.Addr) -
Obj->base(),
- 1) << " contains " << DynRelRegion.Size << " bytes:\n";
+ 1)
+ << " contains " << DynRelRegion.Size << " bytes:\n";
printRelocHeader(ELF::SHT_REL);
for (const Elf_Rel &Rel : this->dumper()->dyn_rels()) {
Elf_Rela Rela;
@@ -3450,7 +3514,8 @@ void GNUStyle<ELFT>::printDynamicRelocations(const ELFO *Obj) {
OS << "\n'RELR' relocation section at offset "
<< format_hex(reinterpret_cast<const uint8_t *>(DynRelrRegion.Addr) -
Obj->base(),
- 1) << " contains " << DynRelrRegion.Size << " bytes:\n";
+ 1)
+ << " contains " << DynRelrRegion.Size << " bytes:\n";
printRelocHeader(ELF::SHT_REL);
Elf_Relr_Range Relrs = this->dumper()->dyn_relrs();
std::vector<Elf_Rela> RelrRelas = unwrapOrError(Obj->decode_relrs(Relrs));
@@ -3462,7 +3527,8 @@ void GNUStyle<ELFT>::printDynamicRelocations(const ELFO *Obj) {
OS << "\n'PLT' relocation section at offset "
<< format_hex(reinterpret_cast<const uint8_t *>(DynPLTRelRegion.Addr) -
Obj->base(),
- 1) << " contains " << DynPLTRelRegion.Size << " bytes:\n";
+ 1)
+ << " contains " << DynPLTRelRegion.Size << " bytes:\n";
}
if (DynPLTRelRegion.EntSize == sizeof(Elf_Rela)) {
printRelocHeader(ELF::SHT_RELA);
@@ -3480,18 +3546,189 @@ void GNUStyle<ELFT>::printDynamicRelocations(const ELFO *Obj) {
}
}
+template <class ELFT>
+static void printGNUVersionSectionProlog(formatted_raw_ostream &OS,
+ const Twine &Name, unsigned EntriesNum,
+ const ELFFile<ELFT> *Obj,
+ const typename ELFT::Shdr *Sec) {
+ StringRef SecName = unwrapOrError(Obj->getSectionName(Sec));
+ OS << Name << " section '" << SecName << "' "
+ << "contains " << EntriesNum << " entries:\n";
+
+ const typename ELFT::Shdr *SymTab =
+ unwrapOrError(Obj->getSection(Sec->sh_link));
+ StringRef SymTabName = unwrapOrError(Obj->getSectionName(SymTab));
+ OS << " Addr: " << format_hex_no_prefix(Sec->sh_addr, 16)
+ << " Offset: " << format_hex(Sec->sh_offset, 8)
+ << " Link: " << Sec->sh_link << " (" << SymTabName << ")\n";
+}
+
+template <class ELFT>
+void GNUStyle<ELFT>::printVersionSymbolSection(const ELFFile<ELFT> *Obj,
+ const Elf_Shdr *Sec) {
+ if (!Sec)
+ return;
+
+ unsigned Entries = Sec->sh_size / sizeof(Elf_Versym);
+ printGNUVersionSectionProlog(OS, "Version symbols", Entries, Obj, Sec);
+
+ const uint8_t *VersymBuf =
+ reinterpret_cast<const uint8_t *>(Obj->base() + Sec->sh_offset);
+ const ELFDumper<ELFT> *Dumper = this->dumper();
+ StringRef StrTable = Dumper->getDynamicStringTable();
+
+ // readelf prints 4 entries per line.
+ for (uint64_t VersymRow = 0; VersymRow < Entries; VersymRow += 4) {
+ OS << " " << format_hex_no_prefix(VersymRow, 3) << ":";
+
+ for (uint64_t VersymIndex = 0;
+ (VersymIndex < 4) && (VersymIndex + VersymRow) < Entries;
+ ++VersymIndex) {
+ const Elf_Versym *Versym =
+ reinterpret_cast<const Elf_Versym *>(VersymBuf);
+ switch (Versym->vs_index) {
+ case 0:
+ OS << " 0 (*local*) ";
+ break;
+ case 1:
+ OS << " 1 (*global*) ";
+ break;
+ default:
+ OS << format("%4x%c", Versym->vs_index & VERSYM_VERSION,
+ Versym->vs_index & VERSYM_HIDDEN ? 'h' : ' ');
+
+ bool IsDefault = true;
+ std::string VersionName = Dumper->getSymbolVersionByIndex(
+ StrTable, Versym->vs_index, IsDefault);
+
+ if (!VersionName.empty())
+ VersionName = "(" + VersionName + ")";
+ else
+ VersionName = "(*invalid*)";
+ OS << left_justify(VersionName, 13);
+ }
+ VersymBuf += sizeof(Elf_Versym);
+ }
+ OS << '\n';
+ }
+ OS << '\n';
+}
+
+static std::string versionFlagToString(unsigned Flags) {
+ if (Flags == 0)
+ return "none";
+
+ std::string Ret;
+ auto AddFlag = [&Ret, &Flags](unsigned Flag, StringRef Name) {
+ if (!(Flags & Flag))
+ return;
+ if (!Ret.empty())
+ Ret += " | ";
+ Ret += Name;
+ Flags &= ~Flag;
+ };
+
+ AddFlag(VER_FLG_BASE, "BASE");
+ AddFlag(VER_FLG_WEAK, "WEAK");
+ AddFlag(VER_FLG_INFO, "INFO");
+ AddFlag(~0, "<unknown>");
+ return Ret;
+}
+
+template <class ELFT>
+void GNUStyle<ELFT>::printVersionDefinitionSection(const ELFFile<ELFT> *Obj,
+ const Elf_Shdr *Sec) {
+ if (!Sec)
+ return;
+
+ unsigned VerDefsNum = Sec->sh_info;
+ printGNUVersionSectionProlog(OS, "Version definition", VerDefsNum, Obj, Sec);
+
+ const Elf_Shdr *StrTabSec = unwrapOrError(Obj->getSection(Sec->sh_link));
+ StringRef StringTable(
+ reinterpret_cast<const char *>(Obj->base() + StrTabSec->sh_offset),
+ (size_t)StrTabSec->sh_size);
+
+ const uint8_t *VerdefBuf = unwrapOrError(Obj->getSectionContents(Sec)).data();
+ const uint8_t *Begin = VerdefBuf;
+
+ while (VerDefsNum--) {
+ const Elf_Verdef *Verdef = reinterpret_cast<const Elf_Verdef *>(VerdefBuf);
+ OS << format(" 0x%04x: Rev: %u Flags: %s Index: %u Cnt: %u",
+ VerdefBuf - Begin, (unsigned)Verdef->vd_version,
+ versionFlagToString(Verdef->vd_flags).c_str(),
+ (unsigned)Verdef->vd_ndx, (unsigned)Verdef->vd_cnt);
+
+ const uint8_t *VerdauxBuf = VerdefBuf + Verdef->vd_aux;
+ const Elf_Verdaux *Verdaux =
+ reinterpret_cast<const Elf_Verdaux *>(VerdauxBuf);
+ OS << format(" Name: %s\n",
+ StringTable.drop_front(Verdaux->vda_name).data());
+
+ for (unsigned I = 1; I < Verdef->vd_cnt; ++I) {
+ VerdauxBuf += Verdaux->vda_next;
+ Verdaux = reinterpret_cast<const Elf_Verdaux *>(VerdauxBuf);
+ OS << format(" 0x%04x: Parent %u: %s\n", VerdauxBuf - Begin, I,
+ StringTable.drop_front(Verdaux->vda_name).data());
+ }
+
+ VerdefBuf += Verdef->vd_next;
+ }
+ OS << '\n';
+}
+
+template <class ELFT>
+void GNUStyle<ELFT>::printVersionDependencySection(const ELFFile<ELFT> *Obj,
+ const Elf_Shdr *Sec) {
+ if (!Sec)
+ return;
+
+ unsigned VerneedNum = Sec->sh_info;
+ printGNUVersionSectionProlog(OS, "Version needs", VerneedNum, Obj, Sec);
+
+ ArrayRef<uint8_t> SecData = unwrapOrError(Obj->getSectionContents(Sec));
+
+ const Elf_Shdr *StrTabSec = unwrapOrError(Obj->getSection(Sec->sh_link));
+ StringRef StringTable = {
+ reinterpret_cast<const char *>(Obj->base() + StrTabSec->sh_offset),
+ (size_t)StrTabSec->sh_size};
+
+ const uint8_t *VerneedBuf = SecData.data();
+ for (unsigned I = 0; I < VerneedNum; ++I) {
+ const Elf_Verneed *Verneed =
+ reinterpret_cast<const Elf_Verneed *>(VerneedBuf);
+
+ OS << format(" 0x%04x: Version: %u File: %s Cnt: %u\n",
+ reinterpret_cast<const uint8_t *>(Verneed) - SecData.begin(),
+ (unsigned)Verneed->vn_version,
+ StringTable.drop_front(Verneed->vn_file).data(),
+ (unsigned)Verneed->vn_cnt);
+
+ const uint8_t *VernauxBuf = VerneedBuf + Verneed->vn_aux;
+ for (unsigned J = 0; J < Verneed->vn_cnt; ++J) {
+ const Elf_Vernaux *Vernaux =
+ reinterpret_cast<const Elf_Vernaux *>(VernauxBuf);
+
+ OS << format(" 0x%04x: Name: %s Flags: %s Version: %u\n",
+ reinterpret_cast<const uint8_t *>(Vernaux) - SecData.begin(),
+ StringTable.drop_front(Vernaux->vna_name).data(),
+ versionFlagToString(Vernaux->vna_flags).c_str(),
+ (unsigned)Vernaux->vna_other);
+ VernauxBuf += Vernaux->vna_next;
+ }
+ VerneedBuf += Verneed->vn_next;
+ }
+ OS << '\n';
+}
+
// Hash histogram shows statistics of how efficient the hash was for the
// dynamic symbol table. The table shows number of hash buckets for different
// lengths of chains as absolute number and percentage of the total buckets.
// Additionally cumulative coverage of symbols for each set of buckets.
template <class ELFT>
void GNUStyle<ELFT>::printHashHistogram(const ELFFile<ELFT> *Obj) {
-
- const Elf_Hash *HashTable = this->dumper()->getHashTable();
- const Elf_GnuHash *GnuHashTable = this->dumper()->getGnuHashTable();
-
// Print histogram for .hash section
- if (HashTable) {
+ if (const Elf_Hash *HashTable = this->dumper()->getHashTable()) {
size_t NBucket = HashTable->nbucket;
size_t NChain = HashTable->nchain;
ArrayRef<Elf_Word> Buckets = HashTable->buckets();
@@ -3535,7 +3772,7 @@ void GNUStyle<ELFT>::printHashHistogram(const ELFFile<ELFT> *Obj) {
}
// Print histogram for .gnu.hash section
- if (GnuHashTable) {
+ if (const Elf_GnuHash *GnuHashTable = this->dumper()->getGnuHashTable()) {
size_t NBucket = GnuHashTable->nbuckets;
ArrayRef<Elf_Word> Buckets = GnuHashTable->buckets();
unsigned NumSyms = this->dumper()->dynamic_symbols().size();
@@ -3595,6 +3832,24 @@ void GNUStyle<ELFT>::printAddrsig(const ELFFile<ELFT> *Obj) {
OS << "GNUStyle::printAddrsig not implemented\n";
}
+static StringRef getGenericNoteTypeName(const uint32_t NT) {
+ static const struct {
+ uint32_t ID;
+ const char *Name;
+ } Notes[] = {
+ {ELF::NT_VERSION, "NT_VERSION (version)"},
+ {ELF::NT_ARCH, "NT_ARCH (architecture)"},
+ {ELF::NT_GNU_BUILD_ATTRIBUTE_OPEN, "OPEN"},
+ {ELF::NT_GNU_BUILD_ATTRIBUTE_FUNC, "func"},
+ };
+
+ for (const auto &Note : Notes)
+ if (Note.ID == NT)
+ return Note.Name;
+
+ return "";
+}
+
static std::string getGNUNoteTypeName(const uint32_t NT) {
static const struct {
uint32_t ID;
@@ -3649,14 +3904,11 @@ static std::string getAMDNoteTypeName(const uint32_t NT) {
static const struct {
uint32_t ID;
const char *Name;
- } Notes[] = {
- {ELF::NT_AMD_AMDGPU_HSA_METADATA,
- "NT_AMD_AMDGPU_HSA_METADATA (HSA Metadata)"},
- {ELF::NT_AMD_AMDGPU_ISA,
- "NT_AMD_AMDGPU_ISA (ISA Version)"},
- {ELF::NT_AMD_AMDGPU_PAL_METADATA,
- "NT_AMD_AMDGPU_PAL_METADATA (PAL Metadata)"}
- };
+ } Notes[] = {{ELF::NT_AMD_AMDGPU_HSA_METADATA,
+ "NT_AMD_AMDGPU_HSA_METADATA (HSA Metadata)"},
+ {ELF::NT_AMD_AMDGPU_ISA, "NT_AMD_AMDGPU_ISA (ISA Version)"},
+ {ELF::NT_AMD_AMDGPU_PAL_METADATA,
+ "NT_AMD_AMDGPU_PAL_METADATA (PAL Metadata)"}};
for (const auto &Note : Notes)
if (Note.ID == NT)
@@ -3683,6 +3935,16 @@ static std::string getGNUProperty(uint32_t Type, uint32_t DataSize,
ArrayRef<uint8_t> Data) {
std::string str;
raw_string_ostream OS(str);
+ uint32_t PrData;
+ auto DumpBit = [&](uint32_t Flag, StringRef Name) {
+ if (PrData & Flag) {
+ PrData &= ~Flag;
+ OS << Name;
+ if (PrData)
+ OS << ", ";
+ }
+ };
+
switch (Type) {
default:
OS << format("<application-specific type 0x%x>", Type);
@@ -3701,41 +3963,101 @@ static std::string getGNUProperty(uint32_t Type, uint32_t DataSize,
if (DataSize)
OS << format(" <corrupt length: 0x%x>", DataSize);
return OS.str();
+ case GNU_PROPERTY_AARCH64_FEATURE_1_AND:
case GNU_PROPERTY_X86_FEATURE_1_AND:
- OS << "X86 features: ";
- if (DataSize != 4 && DataSize != 8) {
+ OS << ((Type == GNU_PROPERTY_AARCH64_FEATURE_1_AND) ? "aarch64 feature: "
+ : "x86 feature: ");
+ if (DataSize != 4) {
OS << format("<corrupt length: 0x%x>", DataSize);
return OS.str();
}
- uint64_t CFProtection =
- (DataSize == 4)
- ? support::endian::read32<ELFT::TargetEndianness>(Data.data())
- : support::endian::read64<ELFT::TargetEndianness>(Data.data());
- if (CFProtection == 0) {
- OS << "none";
+ PrData = support::endian::read32<ELFT::TargetEndianness>(Data.data());
+ if (PrData == 0) {
+ OS << "<None>";
return OS.str();
}
- if (CFProtection & GNU_PROPERTY_X86_FEATURE_1_IBT) {
- OS << "IBT";
- CFProtection &= ~GNU_PROPERTY_X86_FEATURE_1_IBT;
- if (CFProtection)
- OS << ", ";
+ if (Type == GNU_PROPERTY_AARCH64_FEATURE_1_AND) {
+ DumpBit(GNU_PROPERTY_AARCH64_FEATURE_1_BTI, "BTI");
+ DumpBit(GNU_PROPERTY_AARCH64_FEATURE_1_PAC, "PAC");
+ } else {
+ DumpBit(GNU_PROPERTY_X86_FEATURE_1_IBT, "IBT");
+ DumpBit(GNU_PROPERTY_X86_FEATURE_1_SHSTK, "SHSTK");
}
- if (CFProtection & GNU_PROPERTY_X86_FEATURE_1_SHSTK) {
- OS << "SHSTK";
- CFProtection &= ~GNU_PROPERTY_X86_FEATURE_1_SHSTK;
- if (CFProtection)
- OS << ", ";
+ if (PrData)
+ OS << format("<unknown flags: 0x%x>", PrData);
+ return OS.str();
+ case GNU_PROPERTY_X86_ISA_1_NEEDED:
+ case GNU_PROPERTY_X86_ISA_1_USED:
+ OS << "x86 ISA "
+ << (Type == GNU_PROPERTY_X86_ISA_1_NEEDED ? "needed: " : "used: ");
+ if (DataSize != 4) {
+ OS << format("<corrupt length: 0x%x>", DataSize);
+ return OS.str();
}
- if (CFProtection)
- OS << format("<unknown flags: 0x%llx>", CFProtection);
+ PrData = support::endian::read32<ELFT::TargetEndianness>(Data.data());
+ if (PrData == 0) {
+ OS << "<None>";
+ return OS.str();
+ }
+ DumpBit(GNU_PROPERTY_X86_ISA_1_CMOV, "CMOV");
+ DumpBit(GNU_PROPERTY_X86_ISA_1_SSE, "SSE");
+ DumpBit(GNU_PROPERTY_X86_ISA_1_SSE2, "SSE2");
+ DumpBit(GNU_PROPERTY_X86_ISA_1_SSE3, "SSE3");
+ DumpBit(GNU_PROPERTY_X86_ISA_1_SSSE3, "SSSE3");
+ DumpBit(GNU_PROPERTY_X86_ISA_1_SSE4_1, "SSE4_1");
+ DumpBit(GNU_PROPERTY_X86_ISA_1_SSE4_2, "SSE4_2");
+ DumpBit(GNU_PROPERTY_X86_ISA_1_AVX, "AVX");
+ DumpBit(GNU_PROPERTY_X86_ISA_1_AVX2, "AVX2");
+ DumpBit(GNU_PROPERTY_X86_ISA_1_FMA, "FMA");
+ DumpBit(GNU_PROPERTY_X86_ISA_1_AVX512F, "AVX512F");
+ DumpBit(GNU_PROPERTY_X86_ISA_1_AVX512CD, "AVX512CD");
+ DumpBit(GNU_PROPERTY_X86_ISA_1_AVX512ER, "AVX512ER");
+ DumpBit(GNU_PROPERTY_X86_ISA_1_AVX512PF, "AVX512PF");
+ DumpBit(GNU_PROPERTY_X86_ISA_1_AVX512VL, "AVX512VL");
+ DumpBit(GNU_PROPERTY_X86_ISA_1_AVX512DQ, "AVX512DQ");
+ DumpBit(GNU_PROPERTY_X86_ISA_1_AVX512BW, "AVX512BW");
+ DumpBit(GNU_PROPERTY_X86_ISA_1_AVX512_4FMAPS, "AVX512_4FMAPS");
+ DumpBit(GNU_PROPERTY_X86_ISA_1_AVX512_4VNNIW, "AVX512_4VNNIW");
+ DumpBit(GNU_PROPERTY_X86_ISA_1_AVX512_BITALG, "AVX512_BITALG");
+ DumpBit(GNU_PROPERTY_X86_ISA_1_AVX512_IFMA, "AVX512_IFMA");
+ DumpBit(GNU_PROPERTY_X86_ISA_1_AVX512_VBMI, "AVX512_VBMI");
+ DumpBit(GNU_PROPERTY_X86_ISA_1_AVX512_VBMI2, "AVX512_VBMI2");
+ DumpBit(GNU_PROPERTY_X86_ISA_1_AVX512_VNNI, "AVX512_VNNI");
+ if (PrData)
+ OS << format("<unknown flags: 0x%x>", PrData);
+ return OS.str();
+ break;
+ case GNU_PROPERTY_X86_FEATURE_2_NEEDED:
+ case GNU_PROPERTY_X86_FEATURE_2_USED:
+ OS << "x86 feature "
+ << (Type == GNU_PROPERTY_X86_FEATURE_2_NEEDED ? "needed: " : "used: ");
+ if (DataSize != 4) {
+ OS << format("<corrupt length: 0x%x>", DataSize);
+ return OS.str();
+ }
+ PrData = support::endian::read32<ELFT::TargetEndianness>(Data.data());
+ if (PrData == 0) {
+ OS << "<None>";
+ return OS.str();
+ }
+ DumpBit(GNU_PROPERTY_X86_FEATURE_2_X86, "x86");
+ DumpBit(GNU_PROPERTY_X86_FEATURE_2_X87, "x87");
+ DumpBit(GNU_PROPERTY_X86_FEATURE_2_MMX, "MMX");
+ DumpBit(GNU_PROPERTY_X86_FEATURE_2_XMM, "XMM");
+ DumpBit(GNU_PROPERTY_X86_FEATURE_2_YMM, "YMM");
+ DumpBit(GNU_PROPERTY_X86_FEATURE_2_ZMM, "ZMM");
+ DumpBit(GNU_PROPERTY_X86_FEATURE_2_FXSR, "FXSR");
+ DumpBit(GNU_PROPERTY_X86_FEATURE_2_XSAVE, "XSAVE");
+ DumpBit(GNU_PROPERTY_X86_FEATURE_2_XSAVEOPT, "XSAVEOPT");
+ DumpBit(GNU_PROPERTY_X86_FEATURE_2_XSAVEC, "XSAVEC");
+ if (PrData)
+ OS << format("<unknown flags: 0x%x>", PrData);
return OS.str();
}
}
template <typename ELFT>
-static SmallVector<std::string, 4>
-getGNUPropertyList(ArrayRef<uint8_t> Arr) {
+static SmallVector<std::string, 4> getGNUPropertyList(ArrayRef<uint8_t> Arr) {
using Elf_Word = typename ELFT::Word;
SmallVector<std::string, 4> Properties;
@@ -3770,12 +4092,11 @@ struct GNUAbiTag {
bool IsValid;
};
-template <typename ELFT>
-static GNUAbiTag getGNUAbiTag(ArrayRef<uint8_t> Desc) {
+template <typename ELFT> static GNUAbiTag getGNUAbiTag(ArrayRef<uint8_t> Desc) {
typedef typename ELFT::Word Elf_Word;
- ArrayRef<Elf_Word> Words(reinterpret_cast<const Elf_Word*>(Desc.begin()),
- reinterpret_cast<const Elf_Word*>(Desc.end()));
+ ArrayRef<Elf_Word> Words(reinterpret_cast<const Elf_Word *>(Desc.begin()),
+ reinterpret_cast<const Elf_Word *>(Desc.end()));
if (Words.size() < 4)
return {"", "", /*IsValid=*/false};
@@ -3846,24 +4167,13 @@ static AMDNote getAMDNote(uint32_t NoteType, ArrayRef<uint8_t> Desc) {
default:
return {"", ""};
case ELF::NT_AMD_AMDGPU_HSA_METADATA:
- return {"HSA Metadata",
- std::string(reinterpret_cast<const char *>(Desc.data()),
- Desc.size())};
+ return {
+ "HSA Metadata",
+ std::string(reinterpret_cast<const char *>(Desc.data()), Desc.size())};
case ELF::NT_AMD_AMDGPU_ISA:
- return {"ISA Version",
- std::string(reinterpret_cast<const char *>(Desc.data()),
- Desc.size())};
- case ELF::NT_AMD_AMDGPU_PAL_METADATA:
- const uint32_t *PALMetadataBegin =
- reinterpret_cast<const uint32_t *>(Desc.data());
- const uint32_t *PALMetadataEnd = PALMetadataBegin + Desc.size();
- std::vector<uint32_t> PALMetadata(PALMetadataBegin, PALMetadataEnd);
- std::string PALMetadataString;
- auto Error = AMDGPU::PALMD::toString(PALMetadata, PALMetadataString);
- if (Error) {
- return {"PAL Metadata", "Invalid"};
- }
- return {"PAL Metadata", PALMetadataString};
+ return {
+ "ISA Version",
+ std::string(reinterpret_cast<const char *>(Desc.data()), Desc.size())};
}
}
@@ -3877,36 +4187,28 @@ static AMDGPUNote getAMDGPUNote(uint32_t NoteType, ArrayRef<uint8_t> Desc) {
switch (NoteType) {
default:
return {"", ""};
- case ELF::NT_AMDGPU_METADATA:
+ case ELF::NT_AMDGPU_METADATA: {
auto MsgPackString =
StringRef(reinterpret_cast<const char *>(Desc.data()), Desc.size());
- msgpack::Reader MsgPackReader(MsgPackString);
- auto OptMsgPackNodeOrErr = msgpack::Node::read(MsgPackReader);
- if (errorToBool(OptMsgPackNodeOrErr.takeError()))
- return {"AMDGPU Metadata", "Invalid AMDGPU Metadata"};
- auto &OptMsgPackNode = *OptMsgPackNodeOrErr;
- if (!OptMsgPackNode)
+ msgpack::Document MsgPackDoc;
+ if (!MsgPackDoc.readFromBlob(MsgPackString, /*Multi=*/false))
return {"AMDGPU Metadata", "Invalid AMDGPU Metadata"};
- auto &MsgPackNode = *OptMsgPackNode;
AMDGPU::HSAMD::V3::MetadataVerifier Verifier(true);
- if (!Verifier.verify(*MsgPackNode))
+ if (!Verifier.verify(MsgPackDoc.getRoot()))
return {"AMDGPU Metadata", "Invalid AMDGPU Metadata"};
std::string HSAMetadataString;
raw_string_ostream StrOS(HSAMetadataString);
- yaml::Output YOut(StrOS);
- YOut << MsgPackNode;
+ MsgPackDoc.toYAML(StrOS);
return {"AMDGPU Metadata", StrOS.str()};
}
+ }
}
template <class ELFT>
void GNUStyle<ELFT>::printNotes(const ELFFile<ELFT> *Obj) {
- const Elf_Ehdr *e = Obj->getHeader();
- bool IsCore = e->e_type == ELF::ET_CORE;
-
auto PrintHeader = [&](const typename ELFT::Off Offset,
const typename ELFT::Addr Size) {
OS << "Displaying notes found at file offset " << format_hex(Offset, 10)
@@ -3938,12 +4240,16 @@ void GNUStyle<ELFT>::printNotes(const ELFFile<ELFT> *Obj) {
if (!N.Type.empty())
OS << " " << N.Type << ":\n " << N.Value << '\n';
} else {
- OS << "Unknown note type: (" << format_hex(Type, 10) << ')';
+ StringRef NoteType = getGenericNoteTypeName(Type);
+ if (!NoteType.empty())
+ OS << NoteType;
+ else
+ OS << "Unknown note type: (" << format_hex(Type, 10) << ')';
}
OS << '\n';
};
- if (IsCore) {
+ if (Obj->getHeader()->e_type == ELF::ET_CORE) {
for (const auto &P : unwrapOrError(Obj->program_headers())) {
if (P.p_type != PT_NOTE)
continue;
@@ -3992,7 +4298,10 @@ void GNUStyle<ELFT>::printMipsGOT(const MipsGOTParser<ELFT> &Parser) {
<< format_hex_no_prefix(Parser.getGp(), 8 + Bias) << "\n\n";
OS << " Reserved entries:\n";
- OS << " Address Access Initial Purpose\n";
+ if (ELFT::Is64Bits)
+ OS << " Address Access Initial Purpose\n";
+ else
+ OS << " Address Access Initial Purpose\n";
PrintEntry(Parser.getGotLazyResolver(), "Lazy resolver");
if (Parser.getGotModulePointer())
PrintEntry(Parser.getGotModulePointer(), "Module pointer (GNU extension)");
@@ -4000,7 +4309,10 @@ void GNUStyle<ELFT>::printMipsGOT(const MipsGOTParser<ELFT> &Parser) {
if (!Parser.getLocalEntries().empty()) {
OS << "\n";
OS << " Local entries:\n";
- OS << " Address Access Initial\n";
+ if (ELFT::Is64Bits)
+ OS << " Address Access Initial\n";
+ else
+ OS << " Address Access Initial\n";
for (auto &E : Parser.getLocalEntries())
PrintEntry(&E, "");
}
@@ -4011,7 +4323,11 @@ void GNUStyle<ELFT>::printMipsGOT(const MipsGOTParser<ELFT> &Parser) {
if (!Parser.getGlobalEntries().empty()) {
OS << "\n";
OS << " Global entries:\n";
- OS << " Address Access Initial Sym.Val. Type Ndx Name\n";
+ if (ELFT::Is64Bits)
+ OS << " Address Access Initial Sym.Val."
+ << " Type Ndx Name\n";
+ else
+ OS << " Address Access Initial Sym.Val. Type Ndx Name\n";
for (auto &E : Parser.getGlobalEntries()) {
const Elf_Sym *Sym = Parser.getGotSym(&E);
std::string SymName = this->dumper()->getFullSymbolName(
@@ -4045,7 +4361,7 @@ void GNUStyle<ELFT>::printMipsPLT(const MipsGOTParser<ELFT> &Parser) {
size_t Bias = ELFT::Is64Bits ? 8 : 0;
auto PrintEntry = [&](const Elf_Addr *E, StringRef Purpose) {
OS.PadToColumn(2);
- OS << format_hex_no_prefix(Parser.getGotAddress(E), 8 + Bias);
+ OS << format_hex_no_prefix(Parser.getPltAddress(E), 8 + Bias);
OS.PadToColumn(11 + Bias);
OS << format_hex_no_prefix(*E, 8 + Bias);
OS.PadToColumn(20 + 2 * Bias);
@@ -4058,7 +4374,7 @@ void GNUStyle<ELFT>::printMipsPLT(const MipsGOTParser<ELFT> &Parser) {
OS << " Address Initial Purpose\n";
PrintEntry(Parser.getPltLazyResolver(), "PLT lazy resolver");
if (Parser.getPltModulePointer())
- PrintEntry(Parser.getGotModulePointer(), "Module pointer");
+ PrintEntry(Parser.getPltModulePointer(), "Module pointer");
if (!Parser.getPltEntries().empty()) {
OS << "\n";
@@ -4070,7 +4386,7 @@ void GNUStyle<ELFT>::printMipsPLT(const MipsGOTParser<ELFT> &Parser) {
Sym, this->dumper()->getDynamicStringTable(), false);
OS.PadToColumn(2);
- OS << to_string(format_hex_no_prefix(Parser.getGotAddress(&E), 8 + Bias));
+ OS << to_string(format_hex_no_prefix(Parser.getPltAddress(&E), 8 + Bias));
OS.PadToColumn(11 + Bias);
OS << to_string(format_hex_no_prefix(E, 8 + Bias));
OS.PadToColumn(20 + 2 * Bias);
@@ -4087,21 +4403,21 @@ void GNUStyle<ELFT>::printMipsPLT(const MipsGOTParser<ELFT> &Parser) {
}
template <class ELFT> void LLVMStyle<ELFT>::printFileHeaders(const ELFO *Obj) {
- const Elf_Ehdr *e = Obj->getHeader();
+ const Elf_Ehdr *E = Obj->getHeader();
{
DictScope D(W, "ElfHeader");
{
DictScope D(W, "Ident");
- W.printBinary("Magic", makeArrayRef(e->e_ident).slice(ELF::EI_MAG0, 4));
- W.printEnum("Class", e->e_ident[ELF::EI_CLASS], makeArrayRef(ElfClass));
- W.printEnum("DataEncoding", e->e_ident[ELF::EI_DATA],
+ W.printBinary("Magic", makeArrayRef(E->e_ident).slice(ELF::EI_MAG0, 4));
+ W.printEnum("Class", E->e_ident[ELF::EI_CLASS], makeArrayRef(ElfClass));
+ W.printEnum("DataEncoding", E->e_ident[ELF::EI_DATA],
makeArrayRef(ElfDataEncoding));
- W.printNumber("FileVersion", e->e_ident[ELF::EI_VERSION]);
+ W.printNumber("FileVersion", E->e_ident[ELF::EI_VERSION]);
auto OSABI = makeArrayRef(ElfOSABI);
- if (e->e_ident[ELF::EI_OSABI] >= ELF::ELFOSABI_FIRST_ARCH &&
- e->e_ident[ELF::EI_OSABI] <= ELF::ELFOSABI_LAST_ARCH) {
- switch (e->e_machine) {
+ if (E->e_ident[ELF::EI_OSABI] >= ELF::ELFOSABI_FIRST_ARCH &&
+ E->e_ident[ELF::EI_OSABI] <= ELF::ELFOSABI_LAST_ARCH) {
+ switch (E->e_machine) {
case ELF::EM_AMDGPU:
OSABI = makeArrayRef(AMDGPUElfOSABI);
break;
@@ -4113,34 +4429,35 @@ template <class ELFT> void LLVMStyle<ELFT>::printFileHeaders(const ELFO *Obj) {
break;
}
}
- W.printEnum("OS/ABI", e->e_ident[ELF::EI_OSABI], OSABI);
- W.printNumber("ABIVersion", e->e_ident[ELF::EI_ABIVERSION]);
- W.printBinary("Unused", makeArrayRef(e->e_ident).slice(ELF::EI_PAD));
+ W.printEnum("OS/ABI", E->e_ident[ELF::EI_OSABI], OSABI);
+ W.printNumber("ABIVersion", E->e_ident[ELF::EI_ABIVERSION]);
+ W.printBinary("Unused", makeArrayRef(E->e_ident).slice(ELF::EI_PAD));
}
- W.printEnum("Type", e->e_type, makeArrayRef(ElfObjectFileType));
- W.printEnum("Machine", e->e_machine, makeArrayRef(ElfMachineType));
- W.printNumber("Version", e->e_version);
- W.printHex("Entry", e->e_entry);
- W.printHex("ProgramHeaderOffset", e->e_phoff);
- W.printHex("SectionHeaderOffset", e->e_shoff);
- if (e->e_machine == EM_MIPS)
- W.printFlags("Flags", e->e_flags, makeArrayRef(ElfHeaderMipsFlags),
+ W.printEnum("Type", E->e_type, makeArrayRef(ElfObjectFileType));
+ W.printEnum("Machine", E->e_machine, makeArrayRef(ElfMachineType));
+ W.printNumber("Version", E->e_version);
+ W.printHex("Entry", E->e_entry);
+ W.printHex("ProgramHeaderOffset", E->e_phoff);
+ W.printHex("SectionHeaderOffset", E->e_shoff);
+ if (E->e_machine == EM_MIPS)
+ W.printFlags("Flags", E->e_flags, makeArrayRef(ElfHeaderMipsFlags),
unsigned(ELF::EF_MIPS_ARCH), unsigned(ELF::EF_MIPS_ABI),
unsigned(ELF::EF_MIPS_MACH));
- else if (e->e_machine == EM_AMDGPU)
- W.printFlags("Flags", e->e_flags, makeArrayRef(ElfHeaderAMDGPUFlags),
+ else if (E->e_machine == EM_AMDGPU)
+ W.printFlags("Flags", E->e_flags, makeArrayRef(ElfHeaderAMDGPUFlags),
unsigned(ELF::EF_AMDGPU_MACH));
- else if (e->e_machine == EM_RISCV)
- W.printFlags("Flags", e->e_flags, makeArrayRef(ElfHeaderRISCVFlags));
+ else if (E->e_machine == EM_RISCV)
+ W.printFlags("Flags", E->e_flags, makeArrayRef(ElfHeaderRISCVFlags));
else
- W.printFlags("Flags", e->e_flags);
- W.printNumber("HeaderSize", e->e_ehsize);
- W.printNumber("ProgramHeaderEntrySize", e->e_phentsize);
- W.printNumber("ProgramHeaderCount", e->e_phnum);
- W.printNumber("SectionHeaderEntrySize", e->e_shentsize);
+ W.printFlags("Flags", E->e_flags);
+ W.printNumber("HeaderSize", E->e_ehsize);
+ W.printNumber("ProgramHeaderEntrySize", E->e_phentsize);
+ W.printNumber("ProgramHeaderCount", E->e_phnum);
+ W.printNumber("SectionHeaderEntrySize", E->e_shentsize);
W.printString("SectionHeaderCount", getSectionHeadersNumString(Obj));
- W.printString("StringTableSectionIndex", getSectionHeaderTableIndexString(Obj));
+ W.printString("StringTableSectionIndex",
+ getSectionHeaderTableIndexString(Obj));
}
}
@@ -4185,10 +4502,8 @@ template <class ELFT> void LLVMStyle<ELFT>::printRelocations(const ELFO *Obj) {
for (const Elf_Shdr &Sec : unwrapOrError(Obj->sections())) {
++SectionNumber;
- if (Sec.sh_type != ELF::SHT_REL &&
- Sec.sh_type != ELF::SHT_RELA &&
- Sec.sh_type != ELF::SHT_RELR &&
- Sec.sh_type != ELF::SHT_ANDROID_REL &&
+ if (Sec.sh_type != ELF::SHT_REL && Sec.sh_type != ELF::SHT_RELA &&
+ Sec.sh_type != ELF::SHT_RELR && Sec.sh_type != ELF::SHT_ANDROID_REL &&
Sec.sh_type != ELF::SHT_ANDROID_RELA &&
Sec.sh_type != ELF::SHT_ANDROID_RELR)
continue;
@@ -4249,7 +4564,7 @@ void LLVMStyle<ELFT>::printRelocation(const ELFO *Obj, Elf_Rela Rel,
const Elf_Shdr *SymTab) {
SmallString<32> RelocName;
Obj->getRelocationTypeName(Rel.getType(Obj->isMips64EL()), RelocName);
- StringRef TargetName;
+ std::string TargetName;
const Elf_Sym *Sym = unwrapOrError(Obj->getRelocationSymbol(&Rel, SymTab));
if (Sym && Sym->getType() == ELF::STT_SECTION) {
const Elf_Shdr *Sec = unwrapOrError(
@@ -4257,7 +4572,8 @@ void LLVMStyle<ELFT>::printRelocation(const ELFO *Obj, Elf_Rela Rel,
TargetName = unwrapOrError(Obj->getSectionName(Sec));
} else if (Sym) {
StringRef StrTable = unwrapOrError(Obj->getStringTableForSymtab(*SymTab));
- TargetName = unwrapOrError(Sym->getName(StrTable));
+ TargetName = this->dumper()->getFullSymbolName(
+ Sym, StrTable, SymTab->sh_type == SHT_DYNSYM /* IsDynamic */);
}
if (opts::ExpandRelocs) {
@@ -4270,8 +4586,8 @@ void LLVMStyle<ELFT>::printRelocation(const ELFO *Obj, Elf_Rela Rel,
} else {
raw_ostream &OS = W.startLine();
OS << W.hex(Rel.r_offset) << " " << RelocName << " "
- << (!TargetName.empty() ? TargetName : "-") << " "
- << W.hex(Rel.r_addend) << "\n";
+ << (!TargetName.empty() ? TargetName : "-") << " " << W.hex(Rel.r_addend)
+ << "\n";
}
}
@@ -4280,13 +4596,12 @@ void LLVMStyle<ELFT>::printSectionHeaders(const ELFO *Obj) {
ListScope SectionsD(W, "Sections");
int SectionIndex = -1;
- for (const Elf_Shdr &Sec : unwrapOrError(Obj->sections())) {
- ++SectionIndex;
-
- StringRef Name = unwrapOrError(Obj->getSectionName(&Sec));
-
+ ArrayRef<Elf_Shdr> Sections = unwrapOrError(Obj->sections());
+ const ELFObjectFile<ELFT> *ElfObj = this->dumper()->getElfObject();
+ for (const Elf_Shdr &Sec : Sections) {
+ StringRef Name = getSectionName(Sec, *ElfObj, Sections);
DictScope SectionD(W, "Section");
- W.printNumber("Index", SectionIndex);
+ W.printNumber("Index", ++SectionIndex);
W.printNumber("Name", Name, Sec.sh_name);
W.printHex(
"Type",
@@ -4350,8 +4665,9 @@ void LLVMStyle<ELFT>::printSectionHeaders(const ELFO *Obj) {
if (opts::SectionData && Sec.sh_type != ELF::SHT_NOBITS) {
ArrayRef<uint8_t> Data = unwrapOrError(Obj->getSectionContents(&Sec));
- W.printBinaryBlock("SectionData",
- StringRef((const char *)Data.data(), Data.size()));
+ W.printBinaryBlock(
+ "SectionData",
+ StringRef(reinterpret_cast<const char *>(Data.data()), Data.size()));
}
}
}
@@ -4402,6 +4718,15 @@ void LLVMStyle<ELFT>::printSymbol(const ELFO *Obj, const Elf_Sym *Symbol,
W.printHex("Section", SectionName, SectionIndex);
}
+template <class ELFT>
+void LLVMStyle<ELFT>::printSymbols(const ELFO *Obj, bool PrintSymbols,
+ bool PrintDynamicSymbols) {
+ if (PrintSymbols)
+ printSymbols(Obj);
+ if (PrintDynamicSymbols)
+ printDynamicSymbols(Obj);
+}
+
template <class ELFT> void LLVMStyle<ELFT>::printSymbols(const ELFO *Obj) {
ListScope Group(W, "Symbols");
this->dumper()->printSymbolsHelper(false);
@@ -4413,6 +4738,31 @@ void LLVMStyle<ELFT>::printDynamicSymbols(const ELFO *Obj) {
this->dumper()->printSymbolsHelper(true);
}
+template <class ELFT> void LLVMStyle<ELFT>::printDynamic(const ELFFile<ELFT> *Obj) {
+ Elf_Dyn_Range Table = this->dumper()->dynamic_table();
+ if (Table.empty())
+ return;
+
+ raw_ostream &OS = W.getOStream();
+ W.startLine() << "DynamicSection [ (" << Table.size() << " entries)\n";
+
+ bool Is64 = ELFT::Is64Bits;
+ if (Is64)
+ W.startLine() << " Tag Type Name/Value\n";
+ else
+ W.startLine() << " Tag Type Name/Value\n";
+ for (auto Entry : Table) {
+ uintX_t Tag = Entry.getTag();
+ W.startLine() << " " << format_hex(Tag, Is64 ? 18 : 10, true) << " "
+ << format("%-21s",
+ getTypeString(Obj->getHeader()->e_machine, Tag));
+ this->dumper()->printDynamicEntry(OS, Tag, Entry.getVal());
+ OS << "\n";
+ }
+
+ W.startLine() << "]\n";
+}
+
template <class ELFT>
void LLVMStyle<ELFT>::printDynamicRelocations(const ELFO *Obj) {
const DynRegionInfo &DynRelRegion = this->dumper()->getDynRelRegion();
@@ -4459,11 +4809,11 @@ template <class ELFT>
void LLVMStyle<ELFT>::printDynamicRelocation(const ELFO *Obj, Elf_Rela Rel) {
SmallString<32> RelocName;
Obj->getRelocationTypeName(Rel.getType(Obj->isMips64EL()), RelocName);
- StringRef SymbolName;
+ std::string SymbolName;
uint32_t SymIndex = Rel.getSymbol(Obj->isMips64EL());
const Elf_Sym *Sym = this->dumper()->dynamic_symbols().begin() + SymIndex;
- SymbolName =
- unwrapOrError(Sym->getName(this->dumper()->getDynamicStringTable()));
+ SymbolName = maybeDemangle(
+ unwrapOrError(Sym->getName(this->dumper()->getDynamicStringTable())));
if (opts::ExpandRelocs) {
DictScope Group(W, "Relocation");
W.printHex("Offset", Rel.r_offset);
@@ -4473,12 +4823,22 @@ void LLVMStyle<ELFT>::printDynamicRelocation(const ELFO *Obj, Elf_Rela Rel) {
} else {
raw_ostream &OS = W.startLine();
OS << W.hex(Rel.r_offset) << " " << RelocName << " "
- << (!SymbolName.empty() ? SymbolName : "-") << " "
- << W.hex(Rel.r_addend) << "\n";
+ << (!SymbolName.empty() ? SymbolName : "-") << " " << W.hex(Rel.r_addend)
+ << "\n";
}
}
template <class ELFT>
+void LLVMStyle<ELFT>::printProgramHeaders(
+ const ELFO *Obj, bool PrintProgramHeaders,
+ cl::boolOrDefault PrintSectionMapping) {
+ if (PrintProgramHeaders)
+ printProgramHeaders(Obj);
+ if (PrintSectionMapping == cl::BOU_TRUE)
+ printSectionMapping(Obj);
+}
+
+template <class ELFT>
void LLVMStyle<ELFT>::printProgramHeaders(const ELFO *Obj) {
ListScope L(W, "ProgramHeaders");
@@ -4498,6 +4858,125 @@ void LLVMStyle<ELFT>::printProgramHeaders(const ELFO *Obj) {
}
template <class ELFT>
+void LLVMStyle<ELFT>::printVersionSymbolSection(const ELFFile<ELFT> *Obj,
+ const Elf_Shdr *Sec) {
+ DictScope SS(W, "Version symbols");
+ if (!Sec)
+ return;
+
+ StringRef SecName = unwrapOrError(Obj->getSectionName(Sec));
+ W.printNumber("Section Name", SecName, Sec->sh_name);
+ W.printHex("Address", Sec->sh_addr);
+ W.printHex("Offset", Sec->sh_offset);
+ W.printNumber("Link", Sec->sh_link);
+
+ const uint8_t *VersymBuf =
+ reinterpret_cast<const uint8_t *>(Obj->base() + Sec->sh_offset);
+ const ELFDumper<ELFT> *Dumper = this->dumper();
+ StringRef StrTable = Dumper->getDynamicStringTable();
+
+ // Same number of entries in the dynamic symbol table (DT_SYMTAB).
+ ListScope Syms(W, "Symbols");
+ for (const Elf_Sym &Sym : Dumper->dynamic_symbols()) {
+ DictScope S(W, "Symbol");
+ const Elf_Versym *Versym = reinterpret_cast<const Elf_Versym *>(VersymBuf);
+ std::string FullSymbolName =
+ Dumper->getFullSymbolName(&Sym, StrTable, true /* IsDynamic */);
+ W.printNumber("Version", Versym->vs_index & VERSYM_VERSION);
+ W.printString("Name", FullSymbolName);
+ VersymBuf += sizeof(Elf_Versym);
+ }
+}
+
+template <class ELFT>
+void LLVMStyle<ELFT>::printVersionDefinitionSection(const ELFFile<ELFT> *Obj,
+ const Elf_Shdr *Sec) {
+ DictScope SD(W, "SHT_GNU_verdef");
+ if (!Sec)
+ return;
+
+ const uint8_t *SecStartAddress =
+ reinterpret_cast<const uint8_t *>(Obj->base() + Sec->sh_offset);
+ const uint8_t *SecEndAddress = SecStartAddress + Sec->sh_size;
+ const uint8_t *VerdefBuf = SecStartAddress;
+ const Elf_Shdr *StrTab = unwrapOrError(Obj->getSection(Sec->sh_link));
+
+ unsigned VerDefsNum = Sec->sh_info;
+ while (VerDefsNum--) {
+ if (VerdefBuf + sizeof(Elf_Verdef) > SecEndAddress)
+ // FIXME: report_fatal_error is not a good way to report error. We should
+ // emit a parsing error here and below.
+ report_fatal_error("invalid offset in the section");
+
+ const Elf_Verdef *Verdef = reinterpret_cast<const Elf_Verdef *>(VerdefBuf);
+ DictScope Def(W, "Definition");
+ W.printNumber("Version", Verdef->vd_version);
+ W.printEnum("Flags", Verdef->vd_flags, makeArrayRef(SymVersionFlags));
+ W.printNumber("Index", Verdef->vd_ndx);
+ W.printNumber("Hash", Verdef->vd_hash);
+ W.printString("Name", StringRef(reinterpret_cast<const char *>(
+ Obj->base() + StrTab->sh_offset +
+ Verdef->getAux()->vda_name)));
+ if (!Verdef->vd_cnt)
+ report_fatal_error("at least one definition string must exist");
+ if (Verdef->vd_cnt > 2)
+ report_fatal_error("more than one predecessor is not expected");
+
+ if (Verdef->vd_cnt == 2) {
+ const uint8_t *VerdauxBuf =
+ VerdefBuf + Verdef->vd_aux + Verdef->getAux()->vda_next;
+ const Elf_Verdaux *Verdaux =
+ reinterpret_cast<const Elf_Verdaux *>(VerdauxBuf);
+ W.printString("Predecessor",
+ StringRef(reinterpret_cast<const char *>(
+ Obj->base() + StrTab->sh_offset + Verdaux->vda_name)));
+ }
+ VerdefBuf += Verdef->vd_next;
+ }
+}
+
+template <class ELFT>
+void LLVMStyle<ELFT>::printVersionDependencySection(const ELFFile<ELFT> *Obj,
+ const Elf_Shdr *Sec) {
+ DictScope SD(W, "SHT_GNU_verneed");
+ if (!Sec)
+ return;
+
+ const uint8_t *SecData =
+ reinterpret_cast<const uint8_t *>(Obj->base() + Sec->sh_offset);
+ const Elf_Shdr *StrTab = unwrapOrError(Obj->getSection(Sec->sh_link));
+
+ const uint8_t *VerneedBuf = SecData;
+ unsigned VerneedNum = Sec->sh_info;
+ for (unsigned I = 0; I < VerneedNum; ++I) {
+ const Elf_Verneed *Verneed =
+ reinterpret_cast<const Elf_Verneed *>(VerneedBuf);
+ DictScope Entry(W, "Dependency");
+ W.printNumber("Version", Verneed->vn_version);
+ W.printNumber("Count", Verneed->vn_cnt);
+ W.printString("FileName",
+ StringRef(reinterpret_cast<const char *>(
+ Obj->base() + StrTab->sh_offset + Verneed->vn_file)));
+
+ const uint8_t *VernauxBuf = VerneedBuf + Verneed->vn_aux;
+ ListScope L(W, "Entries");
+ for (unsigned J = 0; J < Verneed->vn_cnt; ++J) {
+ const Elf_Vernaux *Vernaux =
+ reinterpret_cast<const Elf_Vernaux *>(VernauxBuf);
+ DictScope Entry(W, "Entry");
+ W.printNumber("Hash", Vernaux->vna_hash);
+ W.printEnum("Flags", Vernaux->vna_flags, makeArrayRef(SymVersionFlags));
+ W.printNumber("Index", Vernaux->vna_other);
+ W.printString("Name",
+ StringRef(reinterpret_cast<const char *>(
+ Obj->base() + StrTab->sh_offset + Vernaux->vna_name)));
+ VernauxBuf += Vernaux->vna_next;
+ }
+ VerneedBuf += Verneed->vn_next;
+ }
+}
+
+template <class ELFT>
void LLVMStyle<ELFT>::printHashHistogram(const ELFFile<ELFT> *Obj) {
W.startLine() << "Hash Histogram not implemented!\n";
}
@@ -4542,8 +5021,7 @@ void LLVMStyle<ELFT>::printAddrsig(const ELFFile<ELFT> *Obj) {
}
template <typename ELFT>
-static void printGNUNoteLLVMStyle(uint32_t NoteType,
- ArrayRef<uint8_t> Desc,
+static void printGNUNoteLLVMStyle(uint32_t NoteType, ArrayRef<uint8_t> Desc,
ScopedPrinter &W) {
switch (NoteType) {
default:
@@ -4576,8 +5054,6 @@ static void printGNUNoteLLVMStyle(uint32_t NoteType,
template <class ELFT>
void LLVMStyle<ELFT>::printNotes(const ELFFile<ELFT> *Obj) {
ListScope L(W, "Notes");
- const Elf_Ehdr *e = Obj->getHeader();
- bool IsCore = e->e_type == ELF::ET_CORE;
auto PrintHeader = [&](const typename ELFT::Off Offset,
const typename ELFT::Addr Size) {
@@ -4609,11 +5085,16 @@ void LLVMStyle<ELFT>::printNotes(const ELFFile<ELFT> *Obj) {
if (!N.Type.empty())
W.printString(N.Type, N.Value);
} else {
- W.getOStream() << "Unknown note type: (" << format_hex(Type, 10) << ')';
+ StringRef NoteType = getGenericNoteTypeName(Type);
+ if (!NoteType.empty())
+ W.printString("Type", NoteType);
+ else
+ W.printString("Type",
+ "Unknown (" + to_string(format_hex(Type, 10)) + ")");
}
};
- if (IsCore) {
+ if (Obj->getHeader()->e_type == ELF::ET_CORE) {
for (const auto &P : unwrapOrError(Obj->program_headers())) {
if (P.p_type != PT_NOTE)
continue;
diff --git a/tools/llvm-readobj/Error.cpp b/tools/llvm-readobj/Error.cpp
index 03d349440e6b..1010f18a58c8 100644
--- a/tools/llvm-readobj/Error.cpp
+++ b/tools/llvm-readobj/Error.cpp
@@ -1,9 +1,8 @@
//===- Error.cpp - system_error extensions for llvm-readobj -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/tools/llvm-readobj/Error.h b/tools/llvm-readobj/Error.h
index f3e24bbe5dbf..f390e1b96f8a 100644
--- a/tools/llvm-readobj/Error.h
+++ b/tools/llvm-readobj/Error.h
@@ -1,9 +1,8 @@
//===- Error.h - system_error extensions for llvm-readobj -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/tools/llvm-readobj/MachODumper.cpp b/tools/llvm-readobj/MachODumper.cpp
index 35e4cfcb6b10..32a3866eb2f2 100644
--- a/tools/llvm-readobj/MachODumper.cpp
+++ b/tools/llvm-readobj/MachODumper.cpp
@@ -1,9 +1,8 @@
-//===-- MachODump.cpp - Object file dumping utility for llvm --------------===//
+//===- MachODumper.cpp - Object file dumping utility for llvm -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -34,8 +33,6 @@ public:
void printFileHeaders() override;
void printSectionHeaders() override;
void printRelocations() override;
- void printSymbols() override;
- void printDynamicSymbols() override;
void printUnwindInfo() override;
void printStackMap() const override;
@@ -53,6 +50,8 @@ private:
template<class MachHeader>
void printFileHeaders(const MachHeader &Header);
+ void printSymbols() override;
+ void printDynamicSymbols() override;
void printSymbol(const SymbolRef &Symbol);
void printRelocation(const RelocationRef &Reloc);
@@ -163,6 +162,7 @@ static const EnumEntry<uint32_t> MachOHeaderCpuSubtypesARM[] = {
static const EnumEntry<uint32_t> MachOHeaderCpuSubtypesARM64[] = {
LLVM_READOBJ_ENUM_ENT(MachO, CPU_SUBTYPE_ARM64_ALL),
+ LLVM_READOBJ_ENUM_ENT(MachO, CPU_SUBTYPE_ARM64E),
};
static const EnumEntry<uint32_t> MachOHeaderCpuSubtypesSPARC[] = {
@@ -483,15 +483,8 @@ void MachODumper::printSectionHeaders(const MachOObjectFile *Obj) {
}
}
- if (opts::SectionData) {
- bool IsBSS = Section.isBSS();
- if (!IsBSS) {
- StringRef Data;
- error(Section.getContents(Data));
-
- W.printBinaryBlock("SectionData", Data);
- }
- }
+ if (opts::SectionData && !Section.isBSS())
+ W.printBinaryBlock("SectionData", unwrapOrError(Section.getContents()));
}
}
@@ -660,18 +653,16 @@ void MachODumper::printStackMap() const {
if (StackMapSection == object::SectionRef())
return;
- StringRef StackMapContents;
- StackMapSection.getContents(StackMapContents);
- ArrayRef<uint8_t> StackMapContentsArray(
- reinterpret_cast<const uint8_t*>(StackMapContents.data()),
- StackMapContents.size());
+ StringRef StackMapContents = unwrapOrError(StackMapSection.getContents());
+ ArrayRef<uint8_t> StackMapContentsArray =
+ arrayRefFromStringRef(StackMapContents);
if (Obj->isLittleEndian())
prettyPrintStackMap(
- W, StackMapV2Parser<support::little>(StackMapContentsArray));
+ W, StackMapParser<support::little>(StackMapContentsArray));
else
- prettyPrintStackMap(W,
- StackMapV2Parser<support::big>(StackMapContentsArray));
+ prettyPrintStackMap(
+ W, StackMapParser<support::big>(StackMapContentsArray));
}
void MachODumper::printNeededLibraries() {
@@ -695,10 +686,10 @@ void MachODumper::printNeededLibraries() {
}
}
- std::stable_sort(Libs.begin(), Libs.end());
+ llvm::stable_sort(Libs);
for (const auto &L : Libs) {
- outs() << " " << L << "\n";
+ W.startLine() << L << "\n";
}
}
diff --git a/tools/llvm-readobj/ObjDumper.cpp b/tools/llvm-readobj/ObjDumper.cpp
index a725140c9d33..0a9e22c8a71c 100644
--- a/tools/llvm-readobj/ObjDumper.cpp
+++ b/tools/llvm-readobj/ObjDumper.cpp
@@ -1,9 +1,8 @@
//===-- ObjDumper.cpp - Base dumper class -----------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -17,8 +16,10 @@
#include "llvm-readobj.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/Error.h"
+#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/ScopedPrinter.h"
#include "llvm/Support/raw_ostream.h"
+#include <map>
namespace llvm {
@@ -32,116 +33,127 @@ static void printAsPrintable(raw_ostream &W, const uint8_t *Start, size_t Len) {
W << (isPrint(Start[i]) ? static_cast<char>(Start[i]) : '.');
}
-static Expected<object::SectionRef>
-getSecNameOrIndexAsSecRef(const object::ObjectFile *Obj, StringRef SecName) {
- char *StrPtr;
- long SectionIndex = strtol(SecName.data(), &StrPtr, 10);
- object::SectionRef Section;
- long SecIndex;
- if (Obj->isELF())
- SecIndex = 0;
- else
- SecIndex = 1;
- for (object::SectionRef SecRef : Obj->sections()) {
- if (*StrPtr) {
- StringRef SectionName;
-
- if (std::error_code E = SecRef.getName(SectionName))
- return errorCodeToError(E);
-
- if (SectionName == SecName)
- return SecRef;
- } else if (SecIndex == SectionIndex)
- return SecRef;
+static std::vector<object::SectionRef>
+getSectionRefsByNameOrIndex(const object::ObjectFile *Obj,
+ ArrayRef<std::string> Sections) {
+ std::vector<object::SectionRef> Ret;
+ std::map<std::string, bool> SecNames;
+ std::map<unsigned, bool> SecIndices;
+ unsigned SecIndex;
+ for (StringRef Section : Sections) {
+ if (!Section.getAsInteger(0, SecIndex))
+ SecIndices.emplace(SecIndex, false);
+ else
+ SecNames.emplace(Section, false);
+ }
+ SecIndex = Obj->isELF() ? 0 : 1;
+ for (object::SectionRef SecRef : Obj->sections()) {
+ StringRef SecName;
+ error(SecRef.getName(SecName));
+ auto NameIt = SecNames.find(SecName);
+ if (NameIt != SecNames.end())
+ NameIt->second = true;
+ auto IndexIt = SecIndices.find(SecIndex);
+ if (IndexIt != SecIndices.end())
+ IndexIt->second = true;
+ if (NameIt != SecNames.end() || IndexIt != SecIndices.end())
+ Ret.push_back(SecRef);
SecIndex++;
}
- return make_error<StringError>("invalid section reference",
- object::object_error::parse_failed);
+
+ for (const std::pair<std::string, bool> &S : SecNames)
+ if (!S.second)
+ reportWarning(formatv("could not find section '{0}'", S.first).str());
+ for (std::pair<unsigned, bool> S : SecIndices)
+ if (!S.second)
+ reportWarning(formatv("could not find section {0}", S.first).str());
+
+ return Ret;
}
-void ObjDumper::printSectionAsString(const object::ObjectFile *Obj,
- StringRef SecName) {
- Expected<object::SectionRef> SectionRefOrError =
- getSecNameOrIndexAsSecRef(Obj, SecName);
- if (!SectionRefOrError)
- error(std::move(SectionRefOrError));
- object::SectionRef Section = *SectionRefOrError;
- StringRef SectionName;
-
- if (std::error_code E = Section.getName(SectionName))
- error(E);
- W.startLine() << "String dump of section '" << SectionName << "':\n";
-
- StringRef SectionContent;
- Section.getContents(SectionContent);
-
- const uint8_t *SecContent = SectionContent.bytes_begin();
- const uint8_t *CurrentWord = SecContent;
- const uint8_t *SecEnd = SectionContent.bytes_end();
-
- while (CurrentWord <= SecEnd) {
- size_t WordSize = strnlen(reinterpret_cast<const char *>(CurrentWord),
- SecEnd - CurrentWord);
- if (!WordSize) {
- CurrentWord++;
- continue;
+void ObjDumper::printSectionsAsString(const object::ObjectFile *Obj,
+ ArrayRef<std::string> Sections) {
+ bool First = true;
+ for (object::SectionRef Section :
+ getSectionRefsByNameOrIndex(Obj, Sections)) {
+ StringRef SectionName;
+ error(Section.getName(SectionName));
+ if (!First)
+ W.startLine() << '\n';
+ First = false;
+ W.startLine() << "String dump of section '" << SectionName << "':\n";
+
+ StringRef SectionContent = unwrapOrError(Section.getContents());
+
+ const uint8_t *SecContent = SectionContent.bytes_begin();
+ const uint8_t *CurrentWord = SecContent;
+ const uint8_t *SecEnd = SectionContent.bytes_end();
+
+ while (CurrentWord <= SecEnd) {
+ size_t WordSize = strnlen(reinterpret_cast<const char *>(CurrentWord),
+ SecEnd - CurrentWord);
+ if (!WordSize) {
+ CurrentWord++;
+ continue;
+ }
+ W.startLine() << format("[%6tx] ", CurrentWord - SecContent);
+ printAsPrintable(W.startLine(), CurrentWord, WordSize);
+ W.startLine() << '\n';
+ CurrentWord += WordSize + 1;
}
- W.startLine() << format("[%6tx] ", CurrentWord - SecContent);
- printAsPrintable(W.startLine(), CurrentWord, WordSize);
- W.startLine() << '\n';
- CurrentWord += WordSize + 1;
}
}
-void ObjDumper::printSectionAsHex(const object::ObjectFile *Obj,
- StringRef SecName) {
- Expected<object::SectionRef> SectionRefOrError =
- getSecNameOrIndexAsSecRef(Obj, SecName);
- if (!SectionRefOrError)
- error(std::move(SectionRefOrError));
- object::SectionRef Section = *SectionRefOrError;
- StringRef SectionName;
-
- if (std::error_code E = Section.getName(SectionName))
- error(E);
- W.startLine() << "Hex dump of section '" << SectionName << "':\n";
-
- StringRef SectionContent;
- Section.getContents(SectionContent);
- const uint8_t *SecContent = SectionContent.bytes_begin();
- const uint8_t *SecEnd = SecContent + SectionContent.size();
-
- for (const uint8_t *SecPtr = SecContent; SecPtr < SecEnd; SecPtr += 16) {
- const uint8_t *TmpSecPtr = SecPtr;
- uint8_t i;
- uint8_t k;
-
- W.startLine() << format_hex(SecPtr - SecContent, 10);
- W.startLine() << ' ';
- for (i = 0; TmpSecPtr < SecEnd && i < 4; ++i) {
- for (k = 0; TmpSecPtr < SecEnd && k < 4; k++, TmpSecPtr++) {
- uint8_t Val = *(reinterpret_cast<const uint8_t *>(TmpSecPtr));
- W.startLine() << format_hex_no_prefix(Val, 2);
- }
+void ObjDumper::printSectionsAsHex(const object::ObjectFile *Obj,
+ ArrayRef<std::string> Sections) {
+ bool First = true;
+ for (object::SectionRef Section :
+ getSectionRefsByNameOrIndex(Obj, Sections)) {
+ StringRef SectionName;
+ error(Section.getName(SectionName));
+ if (!First)
+ W.startLine() << '\n';
+ First = false;
+ W.startLine() << "Hex dump of section '" << SectionName << "':\n";
+
+ StringRef SectionContent = unwrapOrError(Section.getContents());
+ const uint8_t *SecContent = SectionContent.bytes_begin();
+ const uint8_t *SecEnd = SecContent + SectionContent.size();
+
+ for (const uint8_t *SecPtr = SecContent; SecPtr < SecEnd; SecPtr += 16) {
+ const uint8_t *TmpSecPtr = SecPtr;
+ uint8_t i;
+ uint8_t k;
+
+ W.startLine() << format_hex(Section.getAddress() + (SecPtr - SecContent),
+ 10);
W.startLine() << ' ';
- }
+ for (i = 0; TmpSecPtr < SecEnd && i < 4; ++i) {
+ for (k = 0; TmpSecPtr < SecEnd && k < 4; k++, TmpSecPtr++) {
+ uint8_t Val = *(reinterpret_cast<const uint8_t *>(TmpSecPtr));
+ W.startLine() << format_hex_no_prefix(Val, 2);
+ }
+ W.startLine() << ' ';
+ }
- // We need to print the correct amount of spaces to match the format.
- // We are adding the (4 - i) last rows that are 8 characters each.
- // Then, the (4 - i) spaces that are in between the rows.
- // Least, if we cut in a middle of a row, we add the remaining characters,
- // which is (8 - (k * 2))
- if (i < 4)
- W.startLine() << format("%*c", (4 - i) * 8 + (4 - i) + (8 - (k * 2)),
- ' ');
-
- TmpSecPtr = SecPtr;
- for (i = 0; TmpSecPtr + i < SecEnd && i < 16; ++i)
- W.startLine() << (isPrint(TmpSecPtr[i]) ? static_cast<char>(TmpSecPtr[i])
- : '.');
-
- W.startLine() << '\n';
+ // We need to print the correct amount of spaces to match the format.
+ // We are adding the (4 - i) last rows that are 8 characters each.
+ // Then, the (4 - i) spaces that are in between the rows.
+ // Least, if we cut in a middle of a row, we add the remaining characters,
+ // which is (8 - (k * 2)).
+ if (i < 4)
+ W.startLine() << format("%*c", (4 - i) * 8 + (4 - i) + (8 - (k * 2)),
+ ' ');
+
+ TmpSecPtr = SecPtr;
+ for (i = 0; TmpSecPtr + i < SecEnd && i < 16; ++i)
+ W.startLine() << (isPrint(TmpSecPtr[i])
+ ? static_cast<char>(TmpSecPtr[i])
+ : '.');
+
+ W.startLine() << '\n';
+ }
}
}
diff --git a/tools/llvm-readobj/ObjDumper.h b/tools/llvm-readobj/ObjDumper.h
index 13de563469ab..aaabfa2ca2e8 100644
--- a/tools/llvm-readobj/ObjDumper.h
+++ b/tools/llvm-readobj/ObjDumper.h
@@ -1,9 +1,8 @@
//===-- ObjDumper.h ---------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -15,6 +14,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/CommandLine.h"
namespace llvm {
namespace object {
@@ -22,8 +22,9 @@ class COFFImportFile;
class ObjectFile;
}
namespace codeview {
+class GlobalTypeTableBuilder;
class MergingTypeTableBuilder;
-}
+} // namespace codeview
class ScopedPrinter;
@@ -35,18 +36,30 @@ public:
virtual void printFileHeaders() = 0;
virtual void printSectionHeaders() = 0;
virtual void printRelocations() = 0;
- virtual void printSymbols() = 0;
- virtual void printDynamicSymbols() = 0;
+ virtual void printSymbols(bool PrintSymbols, bool PrintDynamicSymbols) {
+ if (PrintSymbols)
+ printSymbols();
+ if (PrintDynamicSymbols)
+ printDynamicSymbols();
+ }
+ virtual void printProgramHeaders(bool PrintProgramHeaders,
+ cl::boolOrDefault PrintSectionMapping) {
+ if (PrintProgramHeaders)
+ printProgramHeaders();
+ if (PrintSectionMapping == cl::BOU_TRUE)
+ printSectionMapping();
+ }
+
virtual void printUnwindInfo() = 0;
// Only implemented for ELF at this time.
virtual void printDynamicRelocations() { }
virtual void printDynamicTable() { }
virtual void printNeededLibraries() { }
- virtual void printProgramHeaders() { }
virtual void printSectionAsHex(StringRef SectionName) {}
virtual void printHashTable() { }
virtual void printGnuHashTable() { }
+ virtual void printHashSymbols() {}
virtual void printLoadName() {}
virtual void printVersionInfo() {}
virtual void printGroupSections() {}
@@ -76,7 +89,10 @@ public:
virtual void printCodeViewDebugInfo() { }
virtual void
mergeCodeViewTypes(llvm::codeview::MergingTypeTableBuilder &CVIDs,
- llvm::codeview::MergingTypeTableBuilder &CVTypes) {}
+ llvm::codeview::MergingTypeTableBuilder &CVTypes,
+ llvm::codeview::GlobalTypeTableBuilder &GlobalCVIDs,
+ llvm::codeview::GlobalTypeTableBuilder &GlobalCVTypes,
+ bool GHash) {}
// Only implemented for MachO.
virtual void printMachODataInCode() { }
@@ -88,11 +104,19 @@ public:
virtual void printStackMap() const = 0;
- void printSectionAsString(const object::ObjectFile *Obj, StringRef SecName);
- void printSectionAsHex(const object::ObjectFile *Obj, StringRef SecName);
+ void printSectionsAsString(const object::ObjectFile *Obj,
+ ArrayRef<std::string> Sections);
+ void printSectionsAsHex(const object::ObjectFile *Obj,
+ ArrayRef<std::string> Sections);
protected:
ScopedPrinter &W;
+
+private:
+ virtual void printSymbols() {}
+ virtual void printDynamicSymbols() {}
+ virtual void printProgramHeaders() {}
+ virtual void printSectionMapping() {}
};
std::error_code createCOFFDumper(const object::ObjectFile *Obj,
@@ -111,12 +135,16 @@ std::error_code createWasmDumper(const object::ObjectFile *Obj,
ScopedPrinter &Writer,
std::unique_ptr<ObjDumper> &Result);
+std::error_code createXCOFFDumper(const object::ObjectFile *Obj,
+ ScopedPrinter &Writer,
+ std::unique_ptr<ObjDumper> &Result);
+
void dumpCOFFImportFile(const object::COFFImportFile *File,
ScopedPrinter &Writer);
-void dumpCodeViewMergedTypes(
- ScopedPrinter &Writer, llvm::codeview::MergingTypeTableBuilder &IDTable,
- llvm::codeview::MergingTypeTableBuilder &TypeTable);
+void dumpCodeViewMergedTypes(ScopedPrinter &Writer,
+ ArrayRef<ArrayRef<uint8_t>> IpiRecords,
+ ArrayRef<ArrayRef<uint8_t>> TpiRecords);
} // namespace llvm
diff --git a/tools/llvm-readobj/StackMapPrinter.h b/tools/llvm-readobj/StackMapPrinter.h
index 77a054b178a5..ef7575640268 100644
--- a/tools/llvm-readobj/StackMapPrinter.h
+++ b/tools/llvm-readobj/StackMapPrinter.h
@@ -1,9 +1,8 @@
//===-------- StackMapPrinter.h - Pretty-print stackmaps --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -48,24 +47,24 @@ void prettyPrintStackMap(ScopedPrinter &W, const StackMapParserT &SMP) {
OS << " #" << ++LocationIndex << ": ";
switch (Loc.getKind()) {
case StackMapParserT::LocationKind::Register:
- OS << "Register R#" << Loc.getDwarfRegNum() << "\n";
+ OS << "Register R#" << Loc.getDwarfRegNum();
break;
case StackMapParserT::LocationKind::Direct:
- OS << "Direct R#" << Loc.getDwarfRegNum() << " + " << Loc.getOffset()
- << "\n";
+ OS << "Direct R#" << Loc.getDwarfRegNum() << " + " << Loc.getOffset();
break;
case StackMapParserT::LocationKind::Indirect:
OS << "Indirect [R#" << Loc.getDwarfRegNum() << " + " << Loc.getOffset()
- << "]\n";
+ << "]";
break;
case StackMapParserT::LocationKind::Constant:
- OS << "Constant " << Loc.getSmallConstant() << "\n";
+ OS << "Constant " << Loc.getSmallConstant();
break;
case StackMapParserT::LocationKind::ConstantIndex:
OS << "ConstantIndex #" << Loc.getConstantIndex() << " ("
- << SMP.getConstant(Loc.getConstantIndex()).getValue() << ")\n";
+ << SMP.getConstant(Loc.getConstantIndex()).getValue() << ")";
break;
}
+ OS << ", size: " << Loc.getSizeInBytes() << "\n";
}
raw_ostream &OS = W.startLine();
diff --git a/tools/llvm-readobj/WasmDumper.cpp b/tools/llvm-readobj/WasmDumper.cpp
index 79d3db4e2d29..041a9a15bdb6 100644
--- a/tools/llvm-readobj/WasmDumper.cpp
+++ b/tools/llvm-readobj/WasmDumper.cpp
@@ -1,9 +1,8 @@
//===-- WasmDumper.cpp - Wasm-specific object file dumper -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -33,11 +32,25 @@ static const EnumEntry<unsigned> WasmSymbolTypes[] = {
static const EnumEntry<uint32_t> WasmSectionTypes[] = {
#define ENUM_ENTRY(X) \
{ #X, wasm::WASM_SEC_##X }
- ENUM_ENTRY(CUSTOM), ENUM_ENTRY(TYPE), ENUM_ENTRY(IMPORT),
- ENUM_ENTRY(FUNCTION), ENUM_ENTRY(TABLE), ENUM_ENTRY(MEMORY),
- ENUM_ENTRY(GLOBAL), ENUM_ENTRY(EVENT), ENUM_ENTRY(EXPORT),
- ENUM_ENTRY(START), ENUM_ENTRY(ELEM), ENUM_ENTRY(CODE),
- ENUM_ENTRY(DATA),
+ ENUM_ENTRY(CUSTOM), ENUM_ENTRY(TYPE), ENUM_ENTRY(IMPORT),
+ ENUM_ENTRY(FUNCTION), ENUM_ENTRY(TABLE), ENUM_ENTRY(MEMORY),
+ ENUM_ENTRY(GLOBAL), ENUM_ENTRY(EVENT), ENUM_ENTRY(EXPORT),
+ ENUM_ENTRY(START), ENUM_ENTRY(ELEM), ENUM_ENTRY(CODE),
+ ENUM_ENTRY(DATA), ENUM_ENTRY(DATACOUNT),
+#undef ENUM_ENTRY
+};
+
+static const EnumEntry<unsigned> WasmSymbolFlags[] = {
+#define ENUM_ENTRY(X) \
+ { #X, wasm::WASM_SYMBOL_##X }
+ ENUM_ENTRY(BINDING_GLOBAL),
+ ENUM_ENTRY(BINDING_WEAK),
+ ENUM_ENTRY(BINDING_LOCAL),
+ ENUM_ENTRY(VISIBILITY_DEFAULT),
+ ENUM_ENTRY(VISIBILITY_HIDDEN),
+ ENUM_ENTRY(UNDEFINED),
+ ENUM_ENTRY(EXPORTED),
+ ENUM_ENTRY(EXPLICIT_NAME),
#undef ENUM_ENTRY
};
@@ -49,8 +62,6 @@ public:
void printFileHeaders() override;
void printSectionHeaders() override;
void printRelocations() override;
- void printSymbols() override;
- void printDynamicSymbols() override { llvm_unreachable("unimplemented"); }
void printUnwindInfo() override { llvm_unreachable("unimplemented"); }
void printStackMap() const override { llvm_unreachable("unimplemented"); }
@@ -59,6 +70,9 @@ protected:
void printRelocation(const SectionRef &Section, const RelocationRef &Reloc);
private:
+ void printSymbols() override;
+ void printDynamicSymbols() override { llvm_unreachable("unimplemented"); }
+
const WasmObjectFile *Obj;
};
@@ -80,11 +94,11 @@ void WasmDumper::printRelocation(const SectionRef &Section,
bool HasAddend = false;
switch (RelocType) {
- case wasm::R_WEBASSEMBLY_MEMORY_ADDR_LEB:
- case wasm::R_WEBASSEMBLY_MEMORY_ADDR_SLEB:
- case wasm::R_WEBASSEMBLY_MEMORY_ADDR_I32:
- case wasm::R_WEBASSEMBLY_FUNCTION_OFFSET_I32:
- case wasm::R_WEBASSEMBLY_SECTION_OFFSET_I32:
+ case wasm::R_WASM_MEMORY_ADDR_LEB:
+ case wasm::R_WASM_MEMORY_ADDR_SLEB:
+ case wasm::R_WASM_MEMORY_ADDR_I32:
+ case wasm::R_WASM_FUNCTION_OFFSET_I32:
+ case wasm::R_WASM_SECTION_OFFSET_I32:
HasAddend = true;
break;
default:
@@ -209,7 +223,19 @@ void WasmDumper::printSymbol(const SymbolRef &Sym) {
WasmSymbol Symbol = Obj->getWasmSymbol(Sym.getRawDataRefImpl());
W.printString("Name", Symbol.Info.Name);
W.printEnum("Type", Symbol.Info.Kind, makeArrayRef(WasmSymbolTypes));
- W.printHex("Flags", Symbol.Info.Flags);
+ W.printFlags("Flags", Symbol.Info.Flags, makeArrayRef(WasmSymbolFlags));
+
+ if (Symbol.Info.Flags & wasm::WASM_SYMBOL_UNDEFINED) {
+ W.printString("ImportName", Symbol.Info.ImportName);
+ W.printString("ImportModule", Symbol.Info.ImportModule);
+ }
+ if (Symbol.Info.Kind != wasm::WASM_SYMBOL_TYPE_DATA) {
+ W.printHex("ElementIndex", Symbol.Info.ElementIndex);
+ } else if (!(Symbol.Info.Flags & wasm::WASM_SYMBOL_UNDEFINED)) {
+ W.printHex("Offset", Symbol.Info.DataRef.Offset);
+ W.printHex("Segment", Symbol.Info.DataRef.Segment);
+ W.printHex("Size", Symbol.Info.DataRef.Size);
+ }
}
} // namespace
@@ -219,7 +245,7 @@ namespace llvm {
std::error_code createWasmDumper(const object::ObjectFile *Obj,
ScopedPrinter &Writer,
std::unique_ptr<ObjDumper> &Result) {
- const WasmObjectFile *WasmObj = dyn_cast<WasmObjectFile>(Obj);
+ const auto *WasmObj = dyn_cast<WasmObjectFile>(Obj);
assert(WasmObj && "createWasmDumper called with non-wasm object");
Result.reset(new WasmDumper(WasmObj, Writer));
diff --git a/tools/llvm-readobj/Win64EHDumper.cpp b/tools/llvm-readobj/Win64EHDumper.cpp
index f7e56b361542..e64b8f157180 100644
--- a/tools/llvm-readobj/Win64EHDumper.cpp
+++ b/tools/llvm-readobj/Win64EHDumper.cpp
@@ -1,9 +1,8 @@
//===- Win64EHDumper.cpp - Win64 EH Printer ---------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/tools/llvm-readobj/Win64EHDumper.h b/tools/llvm-readobj/Win64EHDumper.h
index 772f68bf283f..97458c916bec 100644
--- a/tools/llvm-readobj/Win64EHDumper.h
+++ b/tools/llvm-readobj/Win64EHDumper.h
@@ -1,9 +1,8 @@
//===- Win64EHDumper.h - Win64 EH Printing ----------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/tools/llvm-readobj/WindowsResourceDumper.cpp b/tools/llvm-readobj/WindowsResourceDumper.cpp
index 1f568a963671..13989f696d9d 100644
--- a/tools/llvm-readobj/WindowsResourceDumper.cpp
+++ b/tools/llvm-readobj/WindowsResourceDumper.cpp
@@ -1,9 +1,8 @@
//===-- WindowsResourceDumper.cpp - Windows Resource printer --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/tools/llvm-readobj/WindowsResourceDumper.h b/tools/llvm-readobj/WindowsResourceDumper.h
index ca6da4046605..6a5878804eb1 100644
--- a/tools/llvm-readobj/WindowsResourceDumper.h
+++ b/tools/llvm-readobj/WindowsResourceDumper.h
@@ -1,9 +1,8 @@
//===- WindowsResourceDumper.h - Windows Resource printer -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/tools/llvm-readobj/XCOFFDumper.cpp b/tools/llvm-readobj/XCOFFDumper.cpp
new file mode 100644
index 000000000000..6f260f91537f
--- /dev/null
+++ b/tools/llvm-readobj/XCOFFDumper.cpp
@@ -0,0 +1,190 @@
+//===-- XCOFFDumper.cpp - XCOFF dumping utility -----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements an XCOFF specific dumper for llvm-readobj.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Error.h"
+#include "ObjDumper.h"
+#include "llvm-readobj.h"
+#include "llvm/Object/XCOFFObjectFile.h"
+#include "llvm/Support/ScopedPrinter.h"
+
+using namespace llvm;
+using namespace object;
+
+namespace {
+
+class XCOFFDumper : public ObjDumper {
+public:
+ XCOFFDumper(const XCOFFObjectFile &Obj, ScopedPrinter &Writer)
+ : ObjDumper(Writer), Obj(Obj) {}
+
+ void printFileHeaders() override;
+ void printSectionHeaders() override;
+ void printRelocations() override;
+ void printSymbols() override;
+ void printDynamicSymbols() override;
+ void printUnwindInfo() override;
+ void printStackMap() const override;
+ void printNeededLibraries() override;
+
+private:
+ template <typename T> void printSectionHeaders(ArrayRef<T> Sections);
+
+ const XCOFFObjectFile &Obj;
+
+ // Least significant 3 bits are reserved.
+ static constexpr unsigned SectionFlagsReservedMask = 0x7;
+};
+} // anonymous namespace
+
+void XCOFFDumper::printFileHeaders() {
+ DictScope DS(W, "FileHeader");
+ W.printHex("Magic", Obj.getMagic());
+ W.printNumber("NumberOfSections", Obj.getNumberOfSections());
+
+ // Negative timestamp values are reserved for future use.
+ int32_t TimeStamp = Obj.getTimeStamp();
+ if (TimeStamp > 0) {
+ // This handling of the time stamp assumes that the host system's time_t is
+ // compatible with AIX time_t. If a platform is not compatible, the lit
+ // tests will let us know.
+ time_t TimeDate = TimeStamp;
+
+ char FormattedTime[21] = {};
+ size_t BytesWritten =
+ strftime(FormattedTime, 21, "%Y-%m-%dT%H:%M:%SZ", gmtime(&TimeDate));
+ if (BytesWritten)
+ W.printHex("TimeStamp", FormattedTime, TimeStamp);
+ else
+ W.printHex("Timestamp", TimeStamp);
+ } else {
+ W.printHex("TimeStamp", TimeStamp == 0 ? "None" : "Reserved Value",
+ TimeStamp);
+ }
+
+ // The number of symbol table entries is an unsigned value in 64-bit objects
+ // and a signed value (with negative values being 'reserved') in 32-bit
+ // objects.
+ if (Obj.is64Bit()) {
+ W.printHex("SymbolTableOffset", Obj.getSymbolTableOffset64());
+ W.printNumber("SymbolTableEntries", Obj.getNumberOfSymbolTableEntries64());
+ } else {
+ W.printHex("SymbolTableOffset", Obj.getSymbolTableOffset32());
+ int32_t SymTabEntries = Obj.getRawNumberOfSymbolTableEntries32();
+ if (SymTabEntries >= 0)
+ W.printNumber("SymbolTableEntries", SymTabEntries);
+ else
+ W.printHex("SymbolTableEntries", "Reserved Value", SymTabEntries);
+ }
+
+ W.printHex("OptionalHeaderSize", Obj.getOptionalHeaderSize());
+ W.printHex("Flags", Obj.getFlags());
+
+ // TODO FIXME Add support for the auxiliary header (if any) once
+ // XCOFFObjectFile has the necessary support.
+}
+
+void XCOFFDumper::printSectionHeaders() {
+ if (Obj.is64Bit())
+ printSectionHeaders(Obj.sections64());
+ else
+ printSectionHeaders(Obj.sections32());
+}
+
+void XCOFFDumper::printRelocations() {
+ llvm_unreachable("Unimplemented functionality for XCOFFDumper");
+}
+
+void XCOFFDumper::printSymbols() {
+ llvm_unreachable("Unimplemented functionality for XCOFFDumper");
+}
+
+void XCOFFDumper::printDynamicSymbols() {
+ llvm_unreachable("Unimplemented functionality for XCOFFDumper");
+}
+
+void XCOFFDumper::printUnwindInfo() {
+ llvm_unreachable("Unimplemented functionality for XCOFFDumper");
+}
+
+void XCOFFDumper::printStackMap() const {
+ llvm_unreachable("Unimplemented functionality for XCOFFDumper");
+}
+
+void XCOFFDumper::printNeededLibraries() {
+ llvm_unreachable("Unimplemented functionality for XCOFFDumper");
+}
+
+static const EnumEntry<XCOFF::SectionTypeFlags> SectionTypeFlagsNames[] = {
+#define ECase(X) \
+ { #X, XCOFF::X }
+ ECase(STYP_PAD), ECase(STYP_DWARF), ECase(STYP_TEXT),
+ ECase(STYP_DATA), ECase(STYP_BSS), ECase(STYP_EXCEPT),
+ ECase(STYP_INFO), ECase(STYP_TDATA), ECase(STYP_TBSS),
+ ECase(STYP_LOADER), ECase(STYP_DEBUG), ECase(STYP_TYPCHK),
+ ECase(STYP_OVRFLO)
+#undef ECase
+};
+
+template <typename T>
+void XCOFFDumper::printSectionHeaders(ArrayRef<T> Sections) {
+ ListScope Group(W, "Sections");
+
+ uint16_t Index = 1;
+ for (const T &Sec : Sections) {
+ DictScope SecDS(W, "Section");
+
+ W.printNumber("Index", Index++);
+ W.printString("Name", Sec.getName());
+
+ W.printHex("PhysicalAddress", Sec.PhysicalAddress);
+ W.printHex("VirtualAddress", Sec.VirtualAddress);
+ W.printHex("Size", Sec.SectionSize);
+ W.printHex("RawDataOffset", Sec.FileOffsetToRawData);
+ W.printHex("RelocationPointer", Sec.FileOffsetToRelocationInfo);
+ W.printHex("LineNumberPointer", Sec.FileOffsetToLineNumberInfo);
+
+ // TODO Need to add overflow handling when NumberOfX == _OVERFLOW_MARKER
+ // in 32-bit object files.
+ W.printNumber("NumberOfRelocations", Sec.NumberOfRelocations);
+ W.printNumber("NumberOfLineNumbers", Sec.NumberOfLineNumbers);
+
+ // The most significant 16-bits represent the DWARF section subtype. For
+ // now we just dump the section type flags.
+ uint16_t Flags = Sec.Flags & 0xffffu;
+ if (Flags & SectionFlagsReservedMask)
+ W.printHex("Flags", "Reserved", Flags);
+ else
+ W.printEnum("Type", Flags, makeArrayRef(SectionTypeFlagsNames));
+ }
+
+ if (opts::SectionRelocations)
+ report_fatal_error("Dumping section relocations is unimplemented");
+
+ if (opts::SectionSymbols)
+ report_fatal_error("Dumping symbols is unimplemented");
+
+ if (opts::SectionData)
+ report_fatal_error("Dumping section data is unimplemented");
+}
+
+namespace llvm {
+std::error_code createXCOFFDumper(const object::ObjectFile *Obj,
+ ScopedPrinter &Writer,
+ std::unique_ptr<ObjDumper> &Result) {
+ const XCOFFObjectFile *XObj = dyn_cast<XCOFFObjectFile>(Obj);
+ if (!XObj)
+ return readobj_error::unsupported_obj_file_format;
+
+ Result.reset(new XCOFFDumper(*XObj, Writer));
+ return readobj_error::success;
+}
+} // namespace llvm
diff --git a/tools/llvm-readobj/llvm-readobj.cpp b/tools/llvm-readobj/llvm-readobj.cpp
index 81ce7a590364..1bd5bb74bf29 100644
--- a/tools/llvm-readobj/llvm-readobj.cpp
+++ b/tools/llvm-readobj/llvm-readobj.cpp
@@ -1,9 +1,8 @@
//===- llvm-readobj.cpp - Dump contents of an Object File -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -23,6 +22,7 @@
#include "Error.h"
#include "ObjDumper.h"
#include "WindowsResourceDumper.h"
+#include "llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h"
#include "llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h"
#include "llvm/Object/Archive.h"
#include "llvm/Object/COFFImportFile.h"
@@ -39,6 +39,7 @@
#include "llvm/Support/Path.h"
#include "llvm/Support/ScopedPrinter.h"
#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/WithColor.h"
using namespace llvm;
using namespace llvm::object;
@@ -48,7 +49,7 @@ namespace opts {
cl::desc("<input object files>"),
cl::ZeroOrMore);
- // -all, -a
+ // --all, -a
cl::opt<bool>
All("all",
cl::desc("Equivalent to setting: --file-headers, --program-headers, "
@@ -65,7 +66,7 @@ namespace opts {
cl::alias HeadersShort("e", cl::desc("Alias for --headers"),
cl::aliasopt(Headers));
- // -wide, -W
+ // --wide, -W
cl::opt<bool>
WideOutput("wide", cl::desc("Ignored for compatibility with GNU readelf"),
cl::Hidden);
@@ -73,7 +74,7 @@ namespace opts {
cl::desc("Alias for --wide"),
cl::aliasopt(WideOutput));
- // -file-headers, -file-header, -h
+ // --file-headers, --file-header, -h
cl::opt<bool> FileHeaders("file-headers",
cl::desc("Display file headers "));
cl::alias FileHeadersShort("h", cl::desc("Alias for --file-headers"),
@@ -82,7 +83,7 @@ namespace opts {
cl::desc("Alias for --file-headers"),
cl::aliasopt(FileHeaders));
- // -section-headers, -sections, -S
+ // --section-headers, --sections, -S
// Also -s in llvm-readobj mode.
cl::opt<bool> SectionHeaders("section-headers",
cl::desc("Display all section headers."));
@@ -92,22 +93,27 @@ namespace opts {
cl::desc("Alias for --section-headers"),
cl::aliasopt(SectionHeaders), cl::NotHidden);
- // -section-relocations
- // Also -sr in llvm-readobj mode.
+ // --section-relocations
+ // Also --sr in llvm-readobj mode.
cl::opt<bool> SectionRelocations("section-relocations",
cl::desc("Display relocations for each section shown."));
- // -section-symbols
- // Also -st in llvm-readobj mode.
+ // --section-symbols
+ // Also --st in llvm-readobj mode.
cl::opt<bool> SectionSymbols("section-symbols",
cl::desc("Display symbols for each section shown."));
- // -section-data
- // Also -sd in llvm-readobj mode.
+ // --section-data
+ // Also --sd in llvm-readobj mode.
cl::opt<bool> SectionData("section-data",
cl::desc("Display section data for each section shown."));
- // -relocations, -relocs, -r
+ // --section-mapping
+ cl::opt<cl::boolOrDefault>
+ SectionMapping("section-mapping",
+ cl::desc("Display the section to segment mapping."));
+
+ // --relocations, --relocs, -r
cl::opt<bool> Relocations("relocations",
cl::desc("Display the relocation entries in the file"));
cl::alias RelocationsShort("r", cl::desc("Alias for --relocations"),
@@ -115,36 +121,43 @@ namespace opts {
cl::alias RelocationsGNU("relocs", cl::desc("Alias for --relocations"),
cl::aliasopt(Relocations));
- // -notes, -n
+ // --notes, -n
cl::opt<bool> Notes("notes", cl::desc("Display the ELF notes in the file"));
cl::alias NotesShort("n", cl::desc("Alias for --notes"), cl::aliasopt(Notes));
- // -dyn-relocations
+ // --dyn-relocations
cl::opt<bool> DynRelocs("dyn-relocations",
cl::desc("Display the dynamic relocation entries in the file"));
- // -symbols
+ // --symbols
// Also -s in llvm-readelf mode, or -t in llvm-readobj mode.
- cl::opt<bool> Symbols("symbols",
- cl::desc("Display the symbol table"));
+ cl::opt<bool>
+ Symbols("symbols",
+ cl::desc("Display the symbol table. Also display the dynamic "
+ "symbol table when using GNU output style for ELF"));
cl::alias SymbolsGNU("syms", cl::desc("Alias for --symbols"),
cl::aliasopt(Symbols));
- // -dyn-symbols, -dyn-syms
- // Also -dt in llvm-readobj mode.
+ // --dyn-symbols, --dyn-syms
+ // Also --dt in llvm-readobj mode.
cl::opt<bool> DynamicSymbols("dyn-symbols",
cl::desc("Display the dynamic symbol table"));
cl::alias DynSymsGNU("dyn-syms", cl::desc("Alias for --dyn-symbols"),
cl::aliasopt(DynamicSymbols));
- // -unwind, -u
+ // --hash-symbols
+ cl::opt<bool> HashSymbols(
+ "hash-symbols",
+ cl::desc("Display the dynamic symbols derived from the hash section"));
+
+ // --unwind, -u
cl::opt<bool> UnwindInfo("unwind",
cl::desc("Display unwind information"));
cl::alias UnwindInfoShort("u",
cl::desc("Alias for --unwind"),
cl::aliasopt(UnwindInfo));
- // -dynamic-table, -dynamic, -d
+ // --dynamic-table, --dynamic, -d
cl::opt<bool> DynamicTable("dynamic-table",
cl::desc("Display the ELF .dynamic section table"));
cl::alias DynamicTableShort("d", cl::desc("Alias for --dynamic-table"),
@@ -152,11 +165,11 @@ namespace opts {
cl::alias DynamicTableAlias("dynamic", cl::desc("Alias for --dynamic-table"),
cl::aliasopt(DynamicTable));
- // -needed-libs
+ // --needed-libs
cl::opt<bool> NeededLibraries("needed-libs",
cl::desc("Display the needed libraries"));
- // -program-headers, -segments, -l
+ // --program-headers, --segments, -l
cl::opt<bool> ProgramHeaders("program-headers",
cl::desc("Display ELF program headers"));
cl::alias ProgramHeadersShort("l", cl::desc("Alias for --program-headers"),
@@ -164,149 +177,161 @@ namespace opts {
cl::alias SegmentsAlias("segments", cl::desc("Alias for --program-headers"),
cl::aliasopt(ProgramHeaders));
- // -string-dump, -p
+ // --string-dump, -p
cl::list<std::string> StringDump("string-dump", cl::desc("<number|name>"),
cl::ZeroOrMore);
cl::alias StringDumpShort("p", cl::desc("Alias for --string-dump"),
- cl::aliasopt(StringDump));
+ cl::aliasopt(StringDump), cl::Prefix);
- // -hex-dump, -x
+ // --hex-dump, -x
cl::list<std::string> HexDump("hex-dump", cl::desc("<number|name>"),
cl::ZeroOrMore);
cl::alias HexDumpShort("x", cl::desc("Alias for --hex-dump"),
- cl::aliasopt(HexDump));
+ cl::aliasopt(HexDump), cl::Prefix);
- // -hash-table
+ // --demangle, -C
+ cl::opt<bool> Demangle("demangle",
+ cl::desc("Demangle symbol names in output"));
+ cl::alias DemangleShort("C", cl::desc("Alias for --demangle"),
+ cl::aliasopt(Demangle), cl::NotHidden);
+
+ // --hash-table
cl::opt<bool> HashTable("hash-table",
cl::desc("Display ELF hash table"));
- // -gnu-hash-table
+ // --gnu-hash-table
cl::opt<bool> GnuHashTable("gnu-hash-table",
cl::desc("Display ELF .gnu.hash section"));
- // -expand-relocs
+ // --expand-relocs
cl::opt<bool> ExpandRelocs("expand-relocs",
cl::desc("Expand each shown relocation to multiple lines"));
- // -raw-relr
+ // --raw-relr
cl::opt<bool> RawRelr("raw-relr",
cl::desc("Do not decode relocations in SHT_RELR section, display raw contents"));
- // -codeview
+ // --codeview
cl::opt<bool> CodeView("codeview",
cl::desc("Display CodeView debug information"));
- // -codeview-merged-types
+ // --codeview-merged-types
cl::opt<bool>
CodeViewMergedTypes("codeview-merged-types",
cl::desc("Display the merged CodeView type stream"));
- // -codeview-subsection-bytes
+ // --codeview-ghash
+ cl::opt<bool> CodeViewEnableGHash(
+ "codeview-ghash",
+ cl::desc(
+ "Enable global hashing for CodeView type stream de-duplication"));
+
+ // --codeview-subsection-bytes
cl::opt<bool> CodeViewSubsectionBytes(
"codeview-subsection-bytes",
cl::desc("Dump raw contents of codeview debug sections and records"));
- // -arm-attributes
+ // --arm-attributes
cl::opt<bool> ARMAttributes("arm-attributes",
cl::desc("Display the ARM attributes section"));
- // -mips-plt-got
+ // --mips-plt-got
cl::opt<bool>
MipsPLTGOT("mips-plt-got",
cl::desc("Display the MIPS GOT and PLT GOT sections"));
- // -mips-abi-flags
+ // --mips-abi-flags
cl::opt<bool> MipsABIFlags("mips-abi-flags",
cl::desc("Display the MIPS.abiflags section"));
- // -mips-reginfo
+ // --mips-reginfo
cl::opt<bool> MipsReginfo("mips-reginfo",
cl::desc("Display the MIPS .reginfo section"));
- // -mips-options
+ // --mips-options
cl::opt<bool> MipsOptions("mips-options",
cl::desc("Display the MIPS .MIPS.options section"));
- // -coff-imports
+ // --coff-imports
cl::opt<bool>
COFFImports("coff-imports", cl::desc("Display the PE/COFF import table"));
- // -coff-exports
+ // --coff-exports
cl::opt<bool>
COFFExports("coff-exports", cl::desc("Display the PE/COFF export table"));
- // -coff-directives
+ // --coff-directives
cl::opt<bool>
COFFDirectives("coff-directives",
cl::desc("Display the PE/COFF .drectve section"));
- // -coff-basereloc
+ // --coff-basereloc
cl::opt<bool>
COFFBaseRelocs("coff-basereloc",
cl::desc("Display the PE/COFF .reloc section"));
- // -coff-debug-directory
+ // --coff-debug-directory
cl::opt<bool>
COFFDebugDirectory("coff-debug-directory",
cl::desc("Display the PE/COFF debug directory"));
- // -coff-resources
+ // --coff-resources
cl::opt<bool> COFFResources("coff-resources",
cl::desc("Display the PE/COFF .rsrc section"));
- // -coff-load-config
+ // --coff-load-config
cl::opt<bool>
COFFLoadConfig("coff-load-config",
cl::desc("Display the PE/COFF load config"));
- // -elf-linker-options
+ // --elf-linker-options
cl::opt<bool>
ELFLinkerOptions("elf-linker-options",
cl::desc("Display the ELF .linker-options section"));
- // -macho-data-in-code
+ // --macho-data-in-code
cl::opt<bool>
MachODataInCode("macho-data-in-code",
cl::desc("Display MachO Data in Code command"));
- // -macho-indirect-symbols
+ // --macho-indirect-symbols
cl::opt<bool>
MachOIndirectSymbols("macho-indirect-symbols",
cl::desc("Display MachO indirect symbols"));
- // -macho-linker-options
+ // --macho-linker-options
cl::opt<bool>
MachOLinkerOptions("macho-linker-options",
cl::desc("Display MachO linker options"));
- // -macho-segment
+ // --macho-segment
cl::opt<bool>
MachOSegment("macho-segment",
cl::desc("Display MachO Segment command"));
- // -macho-version-min
+ // --macho-version-min
cl::opt<bool>
MachOVersionMin("macho-version-min",
cl::desc("Display MachO version min command"));
- // -macho-dysymtab
+ // --macho-dysymtab
cl::opt<bool>
MachODysymtab("macho-dysymtab",
cl::desc("Display MachO Dysymtab command"));
- // -stackmap
+ // --stackmap
cl::opt<bool>
PrintStackMap("stackmap",
cl::desc("Display contents of stackmap section"));
- // -version-info, -V
+ // --version-info, -V
cl::opt<bool>
VersionInfo("version-info",
cl::desc("Display ELF version sections (if present)"));
cl::alias VersionInfoShort("V", cl::desc("Alias for -version-info"),
cl::aliasopt(VersionInfo));
- // -elf-section-groups, -section-groups, -g
+ // --elf-section-groups, --section-groups, -g
cl::opt<bool> SectionGroups("elf-section-groups",
cl::desc("Display ELF section group contents"));
cl::alias SectionGroupsAlias("section-groups",
@@ -315,7 +340,7 @@ namespace opts {
cl::alias SectionGroupsShort("g", cl::desc("Alias for -elf-sections-groups"),
cl::aliasopt(SectionGroups));
- // -elf-hash-histogram, -histogram, -I
+ // --elf-hash-histogram, --histogram, -I
cl::opt<bool> HashHistogram(
"elf-hash-histogram",
cl::desc("Display bucket list histogram for hash sections"));
@@ -325,7 +350,7 @@ namespace opts {
cl::desc("Alias for --elf-hash-histogram"),
cl::aliasopt(HashHistogram));
- // -elf-cg-profile
+ // --elf-cg-profile
cl::opt<bool> CGProfile("elf-cg-profile", cl::desc("Display callgraph profile section"));
// -addrsig
@@ -338,16 +363,38 @@ namespace opts {
cl::values(clEnumVal(LLVM, "LLVM default style"),
clEnumVal(GNU, "GNU readelf style")),
cl::init(LLVM));
+
+ cl::extrahelp
+ HelpResponse("\nPass @FILE as argument to read options from FILE.\n");
} // namespace opts
namespace llvm {
LLVM_ATTRIBUTE_NORETURN void reportError(Twine Msg) {
- errs() << "\nError reading file: " << Msg << ".\n";
- errs().flush();
+ fouts().flush();
+ errs() << "\n";
+ WithColor::error(errs()) << Msg << "\n";
exit(1);
}
+void reportError(StringRef Input, Error Err) {
+ if (Input == "-")
+ Input = "<stdin>";
+ error(createFileError(Input, std::move(Err)));
+}
+
+void reportWarning(Twine Msg) {
+ fouts().flush();
+ errs() << "\n";
+ WithColor::warning(errs()) << Msg << "\n";
+}
+
+void warn(Error Err) {
+ handleAllErrors(std::move(Err), [&](const ErrorInfoBase &EI) {
+ reportWarning(EI.message());
+ });
+}
+
void error(Error EC) {
if (!EC)
return;
@@ -361,28 +408,10 @@ void error(std::error_code EC) {
reportError(EC.message());
}
-bool relocAddressLess(RelocationRef a, RelocationRef b) {
- return a.getOffset() < b.getOffset();
-}
-
} // namespace llvm
static void reportError(StringRef Input, std::error_code EC) {
- if (Input == "-")
- Input = "<stdin>";
-
- reportError(Twine(Input) + ": " + EC.message());
-}
-
-static void reportError(StringRef Input, Error Err) {
- if (Input == "-")
- Input = "<stdin>";
- std::string ErrMsg;
- {
- raw_string_ostream ErrStream(ErrMsg);
- logAllUnhandledErrors(std::move(Err), ErrStream, Input + ": ");
- }
- reportError(ErrMsg);
+ reportError(Input, errorCodeToError(EC));
}
static bool isMipsArch(unsigned Arch) {
@@ -399,13 +428,17 @@ static bool isMipsArch(unsigned Arch) {
namespace {
struct ReadObjTypeTableBuilder {
ReadObjTypeTableBuilder()
- : Allocator(), IDTable(Allocator), TypeTable(Allocator) {}
+ : Allocator(), IDTable(Allocator), TypeTable(Allocator),
+ GlobalIDTable(Allocator), GlobalTypeTable(Allocator) {}
llvm::BumpPtrAllocator Allocator;
llvm::codeview::MergingTypeTableBuilder IDTable;
llvm::codeview::MergingTypeTableBuilder TypeTable;
+ llvm::codeview::GlobalTypeTableBuilder GlobalIDTable;
+ llvm::codeview::GlobalTypeTableBuilder GlobalTypeTable;
+ std::vector<OwningBinary<Binary>> Binaries;
};
-}
+} // namespace
static ReadObjTypeTableBuilder CVTypes;
/// Creates an format-specific object file dumper.
@@ -423,25 +456,34 @@ static std::error_code createDumper(const ObjectFile *Obj,
return createMachODumper(Obj, Writer, Result);
if (Obj->isWasm())
return createWasmDumper(Obj, Writer, Result);
+ if (Obj->isXCOFF())
+ return createXCOFFDumper(Obj, Writer, Result);
return readobj_error::unsupported_obj_file_format;
}
/// Dumps the specified object file.
-static void dumpObject(const ObjectFile *Obj, ScopedPrinter &Writer) {
+static void dumpObject(const ObjectFile *Obj, ScopedPrinter &Writer,
+ const Archive *A = nullptr) {
+ std::string FileStr =
+ A ? Twine(A->getFileName() + "(" + Obj->getFileName() + ")").str()
+ : Obj->getFileName().str();
+
std::unique_ptr<ObjDumper> Dumper;
if (std::error_code EC = createDumper(Obj, Writer, Dumper))
- reportError(Obj->getFileName(), EC);
+ reportError(FileStr, EC);
+ Writer.startLine() << "\n";
if (opts::Output == opts::LLVM) {
- Writer.startLine() << "\n";
- Writer.printString("File", Obj->getFileName());
+ Writer.printString("File", FileStr);
Writer.printString("Format", Obj->getFileFormatName());
Writer.printString("Arch", Triple::getArchTypeName(
(llvm::Triple::ArchType)Obj->getArch()));
Writer.printString("AddressSize",
formatv("{0}bit", 8 * Obj->getBytesInAddress()));
Dumper->printLoadName();
+ } else if (opts::Output == opts::GNU && A) {
+ Writer.printString("File", FileStr);
}
if (opts::FileHeaders)
@@ -452,26 +494,22 @@ static void dumpObject(const ObjectFile *Obj, ScopedPrinter &Writer) {
Dumper->printRelocations();
if (opts::DynRelocs)
Dumper->printDynamicRelocations();
- if (opts::Symbols)
- Dumper->printSymbols();
- if (opts::DynamicSymbols)
- Dumper->printDynamicSymbols();
+ if (opts::Symbols || opts::DynamicSymbols)
+ Dumper->printSymbols(opts::Symbols, opts::DynamicSymbols);
+ if (opts::HashSymbols)
+ Dumper->printHashSymbols();
if (opts::UnwindInfo)
Dumper->printUnwindInfo();
if (opts::DynamicTable)
Dumper->printDynamicTable();
if (opts::NeededLibraries)
Dumper->printNeededLibraries();
- if (opts::ProgramHeaders)
- Dumper->printProgramHeaders();
+ if (opts::ProgramHeaders || opts::SectionMapping == cl::BOU_TRUE)
+ Dumper->printProgramHeaders(opts::ProgramHeaders, opts::SectionMapping);
if (!opts::StringDump.empty())
- llvm::for_each(opts::StringDump, [&Dumper, Obj](StringRef SectionName) {
- Dumper->printSectionAsString(Obj, SectionName);
- });
+ Dumper->printSectionsAsString(Obj, opts::StringDump);
if (!opts::HexDump.empty())
- llvm::for_each(opts::HexDump, [&Dumper, Obj](StringRef SectionName) {
- Dumper->printSectionAsHex(Obj, SectionName);
- });
+ Dumper->printSectionsAsHex(Obj, opts::HexDump);
if (opts::HashTable)
Dumper->printHashTable();
if (opts::GnuHashTable)
@@ -525,7 +563,9 @@ static void dumpObject(const ObjectFile *Obj, ScopedPrinter &Writer) {
if (opts::CodeView)
Dumper->printCodeViewDebugInfo();
if (opts::CodeViewMergedTypes)
- Dumper->mergeCodeViewTypes(CVTypes.IDTable, CVTypes.TypeTable);
+ Dumper->mergeCodeViewTypes(CVTypes.IDTable, CVTypes.TypeTable,
+ CVTypes.GlobalIDTable, CVTypes.GlobalTypeTable,
+ opts::CodeViewEnableGHash);
}
if (Obj->isMachO()) {
if (opts::MachODataInCode)
@@ -552,12 +592,12 @@ static void dumpArchive(const Archive *Arc, ScopedPrinter &Writer) {
Expected<std::unique_ptr<Binary>> ChildOrErr = Child.getAsBinary();
if (!ChildOrErr) {
if (auto E = isNotObjectErrorInvalidFileType(ChildOrErr.takeError())) {
- reportError(Arc->getFileName(), ChildOrErr.takeError());
+ reportError(Arc->getFileName(), std::move(E));
}
continue;
}
if (ObjectFile *Obj = dyn_cast<ObjectFile>(&*ChildOrErr.get()))
- dumpObject(Obj, Writer);
+ dumpObject(Obj, Writer, Arc);
else if (COFFImportFile *Imp = dyn_cast<COFFImportFile>(&*ChildOrErr.get()))
dumpCOFFImportFile(Imp, Writer);
else
@@ -583,8 +623,8 @@ static void dumpMachOUniversalBinary(const MachOUniversalBinary *UBinary,
}
/// Dumps \a WinRes, Windows Resource (.res) file;
-static void dumpWindowsResourceFile(WindowsResource *WinRes) {
- ScopedPrinter Printer{outs()};
+static void dumpWindowsResourceFile(WindowsResource *WinRes,
+ ScopedPrinter &Printer) {
WindowsRes::Dumper Dumper(WinRes, Printer);
if (auto Err = Dumper.printData())
reportError(WinRes->getFileName(), std::move(Err));
@@ -592,9 +632,7 @@ static void dumpWindowsResourceFile(WindowsResource *WinRes) {
/// Opens \a File and dumps it.
-static void dumpInput(StringRef File) {
- ScopedPrinter Writer(outs());
-
+static void dumpInput(StringRef File, ScopedPrinter &Writer) {
// Attempt to open the binary.
Expected<OwningBinary<Binary>> BinaryOrErr = createBinary(File);
if (!BinaryOrErr)
@@ -611,9 +649,11 @@ static void dumpInput(StringRef File) {
else if (COFFImportFile *Import = dyn_cast<COFFImportFile>(&Binary))
dumpCOFFImportFile(Import, Writer);
else if (WindowsResource *WinRes = dyn_cast<WindowsResource>(&Binary))
- dumpWindowsResourceFile(WinRes);
+ dumpWindowsResourceFile(WinRes, Writer);
else
reportError(File, readobj_error::unrecognized_file_format);
+
+ CVTypes.Binaries.push_back(std::move(*BinaryOrErr));
}
/// Registers aliases that should only be allowed by readobj.
@@ -656,7 +696,7 @@ static void registerReadelfAliases() {
StringRef ArgName = OptEntry.getKey();
cl::Option *Option = OptEntry.getValue();
if (ArgName.size() == 1)
- Option->setFormattingFlag(cl::Grouping);
+ apply(Option, cl::Grouping);
}
}
@@ -699,11 +739,17 @@ int main(int argc, const char *argv[]) {
if (opts::InputFilenames.empty())
opts::InputFilenames.push_back("-");
- llvm::for_each(opts::InputFilenames, dumpInput);
+ ScopedPrinter Writer(fouts());
+ for (const std::string &I : opts::InputFilenames)
+ dumpInput(I, Writer);
if (opts::CodeViewMergedTypes) {
- ScopedPrinter W(outs());
- dumpCodeViewMergedTypes(W, CVTypes.IDTable, CVTypes.TypeTable);
+ if (opts::CodeViewEnableGHash)
+ dumpCodeViewMergedTypes(Writer, CVTypes.GlobalIDTable.records(),
+ CVTypes.GlobalTypeTable.records());
+ else
+ dumpCodeViewMergedTypes(Writer, CVTypes.IDTable.records(),
+ CVTypes.TypeTable.records());
}
return 0;
diff --git a/tools/llvm-readobj/llvm-readobj.h b/tools/llvm-readobj/llvm-readobj.h
index 92ed098dc642..0e02da4cb847 100644
--- a/tools/llvm-readobj/llvm-readobj.h
+++ b/tools/llvm-readobj/llvm-readobj.h
@@ -1,9 +1,8 @@
//===-- llvm-readobj.h ----------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -23,6 +22,9 @@ namespace llvm {
// Various helper functions.
LLVM_ATTRIBUTE_NORETURN void reportError(Twine Msg);
+ void reportError(StringRef Input, Error Err);
+ void reportWarning(Twine Msg);
+ void warn(llvm::Error Err);
void error(std::error_code EC);
void error(llvm::Error EC);
template <typename T> T error(llvm::Expected<T> &&E) {
@@ -44,18 +46,16 @@ namespace llvm {
OS.flush();
reportError(Buf);
}
- bool relocAddressLess(object::RelocationRef A,
- object::RelocationRef B);
} // namespace llvm
namespace opts {
extern llvm::cl::opt<bool> SectionRelocations;
extern llvm::cl::opt<bool> SectionSymbols;
extern llvm::cl::opt<bool> SectionData;
- extern llvm::cl::opt<bool> DynamicSymbols;
extern llvm::cl::opt<bool> ExpandRelocs;
extern llvm::cl::opt<bool> RawRelr;
extern llvm::cl::opt<bool> CodeViewSubsectionBytes;
+ extern llvm::cl::opt<bool> Demangle;
enum OutputStyleTy { LLVM, GNU };
extern llvm::cl::opt<OutputStyleTy> Output;
} // namespace opts
diff --git a/tools/llvm-rtdyld/llvm-rtdyld.cpp b/tools/llvm-rtdyld/llvm-rtdyld.cpp
index 975638ed82d1..a7cc1deb8cf6 100644
--- a/tools/llvm-rtdyld/llvm-rtdyld.cpp
+++ b/tools/llvm-rtdyld/llvm-rtdyld.cpp
@@ -1,9 +1,8 @@
//===-- llvm-rtdyld.cpp - MCJIT Testing Tool ------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -30,9 +29,13 @@
#include "llvm/Support/InitLLVM.h"
#include "llvm/Support/Memory.h"
#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/MSVCErrorWorkarounds.h"
+#include "llvm/Support/Path.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/raw_ostream.h"
+
+#include <future>
#include <list>
using namespace llvm;
@@ -74,6 +77,10 @@ Dylibs("dylib",
cl::desc("Add library."),
cl::ZeroOrMore);
+static cl::list<std::string> InputArgv("args", cl::Positional,
+ cl::desc("<program arguments>..."),
+ cl::ZeroOrMore, cl::PositionalEatsArgs);
+
static cl::opt<std::string>
TripleName("triple", cl::desc("Target triple for disassembler"));
@@ -88,35 +95,28 @@ CheckFiles("check",
cl::desc("File containing RuntimeDyld verifier checks."),
cl::ZeroOrMore);
-// Tracking BUG: 19665
-// http://llvm.org/bugs/show_bug.cgi?id=19665
-//
-// Do not change these options to cl::opt<uint64_t> since this silently breaks
-// argument parsing.
-static cl::opt<unsigned long long>
-PreallocMemory("preallocate",
- cl::desc("Allocate memory upfront rather than on-demand"),
- cl::init(0));
-
-static cl::opt<unsigned long long>
-TargetAddrStart("target-addr-start",
- cl::desc("For -verify only: start of phony target address "
- "range."),
- cl::init(4096), // Start at "page 1" - no allocating at "null".
- cl::Hidden);
-
-static cl::opt<unsigned long long>
-TargetAddrEnd("target-addr-end",
- cl::desc("For -verify only: end of phony target address range."),
- cl::init(~0ULL),
- cl::Hidden);
-
-static cl::opt<unsigned long long>
-TargetSectionSep("target-section-sep",
- cl::desc("For -verify only: Separation between sections in "
- "phony target address space."),
- cl::init(0),
- cl::Hidden);
+static cl::opt<uint64_t>
+ PreallocMemory("preallocate",
+ cl::desc("Allocate memory upfront rather than on-demand"),
+ cl::init(0));
+
+static cl::opt<uint64_t> TargetAddrStart(
+ "target-addr-start",
+ cl::desc("For -verify only: start of phony target address "
+ "range."),
+ cl::init(4096), // Start at "page 1" - no allocating at "null".
+ cl::Hidden);
+
+static cl::opt<uint64_t> TargetAddrEnd(
+ "target-addr-end",
+ cl::desc("For -verify only: end of phony target address range."),
+ cl::init(~0ULL), cl::Hidden);
+
+static cl::opt<uint64_t> TargetSectionSep(
+ "target-section-sep",
+ cl::desc("For -verify only: Separation between sections in "
+ "phony target address space."),
+ cl::init(0), cl::Hidden);
static cl::list<std::string>
SpecificSectionMappings("map-section",
@@ -138,14 +138,50 @@ PrintAllocationRequests("print-alloc-requests",
"manager by RuntimeDyld"),
cl::Hidden);
+ExitOnError ExitOnErr;
+
/* *** */
+using SectionIDMap = StringMap<unsigned>;
+using FileToSectionIDMap = StringMap<SectionIDMap>;
+
+void dumpFileToSectionIDMap(const FileToSectionIDMap &FileToSecIDMap) {
+ for (const auto &KV : FileToSecIDMap) {
+ llvm::dbgs() << "In " << KV.first() << "\n";
+ for (auto &KV2 : KV.second)
+ llvm::dbgs() << " \"" << KV2.first() << "\" -> " << KV2.second << "\n";
+ }
+}
+
+Expected<unsigned> getSectionId(const FileToSectionIDMap &FileToSecIDMap,
+ StringRef FileName, StringRef SectionName) {
+ auto I = FileToSecIDMap.find(FileName);
+ if (I == FileToSecIDMap.end())
+ return make_error<StringError>("No file named " + FileName,
+ inconvertibleErrorCode());
+ auto &SectionIDs = I->second;
+ auto J = SectionIDs.find(SectionName);
+ if (J == SectionIDs.end())
+ return make_error<StringError>("No section named \"" + SectionName +
+ "\" in file " + FileName,
+ inconvertibleErrorCode());
+ return J->second;
+}
+
// A trivial memory manager that doesn't do anything fancy, just uses the
// support library allocation routines directly.
class TrivialMemoryManager : public RTDyldMemoryManager {
public:
- SmallVector<sys::MemoryBlock, 16> FunctionMemory;
- SmallVector<sys::MemoryBlock, 16> DataMemory;
+ struct SectionInfo {
+ SectionInfo(StringRef Name, sys::MemoryBlock MB, unsigned SectionID)
+ : Name(Name), MB(std::move(MB)), SectionID(SectionID) {}
+ std::string Name;
+ sys::MemoryBlock MB;
+ unsigned SectionID = ~0U;
+ };
+
+ SmallVector<SectionInfo, 16> FunctionMemory;
+ SmallVector<SectionInfo, 16> DataMemory;
uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
unsigned SectionID,
@@ -154,6 +190,11 @@ public:
unsigned SectionID, StringRef SectionName,
bool IsReadOnly) override;
+ /// If non null, records subsequent Name -> SectionID mappings.
+ void setSectionIDsMap(SectionIDMap *SecIDMap) {
+ this->SecIDMap = SecIDMap;
+ }
+
void *getPointerToNamedFunction(const std::string &Name,
bool AbortOnFailure = true) override {
return nullptr;
@@ -171,7 +212,15 @@ public:
if (I != DummyExterns.end())
return JITSymbol(I->second, JITSymbolFlags::Exported);
- return RTDyldMemoryManager::findSymbol(Name);
+ if (auto Sym = RTDyldMemoryManager::findSymbol(Name))
+ return Sym;
+ else if (auto Err = Sym.takeError())
+ ExitOnErr(std::move(Err));
+ else
+ ExitOnErr(make_error<StringError>("Could not find definition for \"" +
+ Name + "\"",
+ inconvertibleErrorCode()));
+ llvm_unreachable("Should have returned or exited by now");
}
void registerEHFrames(uint8_t *Addr, uint64_t LoadAddr,
@@ -193,7 +242,8 @@ public:
SlabSize = Size;
}
- uint8_t *allocateFromSlab(uintptr_t Size, unsigned Alignment, bool isCode) {
+ uint8_t *allocateFromSlab(uintptr_t Size, unsigned Alignment, bool isCode,
+ StringRef SectionName, unsigned SectionID) {
Size = alignTo(Size, Alignment);
if (CurrentSlabOffset + Size > SlabSize)
report_fatal_error("Can't allocate enough memory. Tune --preallocate");
@@ -201,9 +251,9 @@ public:
uintptr_t OldSlabOffset = CurrentSlabOffset;
sys::MemoryBlock MB((void *)OldSlabOffset, Size);
if (isCode)
- FunctionMemory.push_back(MB);
+ FunctionMemory.push_back(SectionInfo(SectionName, MB, SectionID));
else
- DataMemory.push_back(MB);
+ DataMemory.push_back(SectionInfo(SectionName, MB, SectionID));
CurrentSlabOffset += Size;
return (uint8_t*)OldSlabOffset;
}
@@ -214,6 +264,7 @@ private:
bool UsePreallocation = false;
uintptr_t SlabSize = 0;
uintptr_t CurrentSlabOffset = 0;
+ SectionIDMap *SecIDMap = nullptr;
};
uint8_t *TrivialMemoryManager::allocateCodeSection(uintptr_t Size,
@@ -224,8 +275,12 @@ uint8_t *TrivialMemoryManager::allocateCodeSection(uintptr_t Size,
outs() << "allocateCodeSection(Size = " << Size << ", Alignment = "
<< Alignment << ", SectionName = " << SectionName << ")\n";
+ if (SecIDMap)
+ (*SecIDMap)[SectionName] = SectionID;
+
if (UsePreallocation)
- return allocateFromSlab(Size, Alignment, true /* isCode */);
+ return allocateFromSlab(Size, Alignment, true /* isCode */,
+ SectionName, SectionID);
std::error_code EC;
sys::MemoryBlock MB =
@@ -235,7 +290,7 @@ uint8_t *TrivialMemoryManager::allocateCodeSection(uintptr_t Size,
EC);
if (!MB.base())
report_fatal_error("MemoryManager allocation failed: " + EC.message());
- FunctionMemory.push_back(MB);
+ FunctionMemory.push_back(SectionInfo(SectionName, MB, SectionID));
return (uint8_t*)MB.base();
}
@@ -248,8 +303,12 @@ uint8_t *TrivialMemoryManager::allocateDataSection(uintptr_t Size,
outs() << "allocateDataSection(Size = " << Size << ", Alignment = "
<< Alignment << ", SectionName = " << SectionName << ")\n";
+ if (SecIDMap)
+ (*SecIDMap)[SectionName] = SectionID;
+
if (UsePreallocation)
- return allocateFromSlab(Size, Alignment, false /* isCode */);
+ return allocateFromSlab(Size, Alignment, false /* isCode */, SectionName,
+ SectionID);
std::error_code EC;
sys::MemoryBlock MB =
@@ -259,7 +318,7 @@ uint8_t *TrivialMemoryManager::allocateDataSection(uintptr_t Size,
EC);
if (!MB.base())
report_fatal_error("MemoryManager allocation failed: " + EC.message());
- DataMemory.push_back(MB);
+ DataMemory.push_back(SectionInfo(SectionName, MB, SectionID));
return (uint8_t*)MB.base();
}
@@ -368,6 +427,8 @@ static int printLineInfoForInput(bool LoadObjects, bool UseDebugObj) {
}
uint64_t Addr = *AddrOrErr;
+ object::SectionedAddress Address;
+
uint64_t Size = P.second;
// If we're not using the debug object, compute the address of the
// symbol in memory (rather than that in the unrelocated object file)
@@ -382,16 +443,20 @@ static int printLineInfoForInput(bool LoadObjects, bool UseDebugObj) {
object::section_iterator Sec = *SecOrErr;
StringRef SecName;
Sec->getName(SecName);
+ Address.SectionIndex = Sec->getIndex();
uint64_t SectionLoadAddress =
LoadedObjInfo->getSectionLoadAddress(*Sec);
if (SectionLoadAddress != 0)
Addr += SectionLoadAddress - Sec->getAddress();
- }
+ } else if (auto SecOrErr = Sym.getSection())
+ Address.SectionIndex = SecOrErr.get()->getIndex();
outs() << "Function: " << *Name << ", Size = " << Size
<< ", Addr = " << Addr << "\n";
- DILineInfoTable Lines = Context->getLineInfoForAddressRange(Addr, Size);
+ Address.Address = Addr;
+ DILineInfoTable Lines =
+ Context->getLineInfoForAddressRange(Address, Size);
for (auto &D : Lines) {
outs() << " Line info @ " << D.first - Addr << ": "
<< D.second.FileName << ", line:" << D.second.Line << "\n";
@@ -464,9 +529,11 @@ static int executeInput() {
// Invalidate the instruction cache for each loaded function.
for (auto &FM : MemMgr.FunctionMemory) {
+ auto &FM_MB = FM.MB;
+
// Make sure the memory is executable.
// setExecutable will call InvalidateInstructionCache.
- if (auto EC = sys::Memory::protectMappedMemory(FM,
+ if (auto EC = sys::Memory::protectMappedMemory(FM_MB,
sys::Memory::MF_READ |
sys::Memory::MF_EXEC))
ErrorAndExit("unable to mark function executable: '" + EC.message() +
@@ -478,11 +545,13 @@ static int executeInput() {
int (*Main)(int, const char**) =
(int(*)(int,const char**)) uintptr_t(MainAddress);
- const char **Argv = new const char*[2];
+ std::vector<const char *> Argv;
// Use the name of the first input object module as argv[0] for the target.
- Argv[0] = InputFileList[0].c_str();
- Argv[1] = nullptr;
- return Main(1, Argv);
+ Argv.push_back(InputFileList[0].data());
+ for (auto &Arg : InputArgv)
+ Argv.push_back(Arg.data());
+ Argv.push_back(nullptr);
+ return Main(Argv.size() - 1, Argv.data());
}
static int checkAllExpressions(RuntimeDyldChecker &Checker) {
@@ -500,10 +569,10 @@ static int checkAllExpressions(RuntimeDyldChecker &Checker) {
return 0;
}
-void applySpecificSectionMappings(RuntimeDyldChecker &Checker) {
+void applySpecificSectionMappings(RuntimeDyld &Dyld,
+ const FileToSectionIDMap &FileToSecIDMap) {
for (StringRef Mapping : SpecificSectionMappings) {
-
size_t EqualsIdx = Mapping.find_first_of("=");
std::string SectionIDStr = Mapping.substr(0, EqualsIdx);
size_t ComaIdx = Mapping.find_first_of(",");
@@ -514,17 +583,10 @@ void applySpecificSectionMappings(RuntimeDyldChecker &Checker) {
std::string FileName = SectionIDStr.substr(0, ComaIdx);
std::string SectionName = SectionIDStr.substr(ComaIdx + 1);
+ unsigned SectionID =
+ ExitOnErr(getSectionId(FileToSecIDMap, FileName, SectionName));
- uint64_t OldAddrInt;
- std::string ErrorMsg;
- std::tie(OldAddrInt, ErrorMsg) =
- Checker.getSectionAddr(FileName, SectionName, true);
-
- if (ErrorMsg != "")
- report_fatal_error(ErrorMsg);
-
- void* OldAddr = reinterpret_cast<void*>(static_cast<uintptr_t>(OldAddrInt));
-
+ auto* OldAddr = Dyld.getSectionContent(SectionID).data();
std::string NewAddrStr = Mapping.substr(EqualsIdx + 1);
uint64_t NewAddr;
@@ -532,7 +594,7 @@ void applySpecificSectionMappings(RuntimeDyldChecker &Checker) {
report_fatal_error("Invalid section address in mapping '" + Mapping +
"'.");
- Checker.getRTDyld().mapSectionAddress(OldAddr, NewAddr);
+ Dyld.mapSectionAddress(OldAddr, NewAddr);
}
}
@@ -548,21 +610,17 @@ void applySpecificSectionMappings(RuntimeDyldChecker &Checker) {
// (e.g. 1 << 32) to stress-test stubs, GOTs, etc.
//
static void remapSectionsAndSymbols(const llvm::Triple &TargetTriple,
- TrivialMemoryManager &MemMgr,
- RuntimeDyldChecker &Checker) {
+ RuntimeDyld &Dyld,
+ TrivialMemoryManager &MemMgr) {
// Set up a work list (section addr/size pairs).
- typedef std::list<std::pair<void*, uint64_t>> WorklistT;
+ typedef std::list<const TrivialMemoryManager::SectionInfo*> WorklistT;
WorklistT Worklist;
for (const auto& CodeSection : MemMgr.FunctionMemory)
- Worklist.push_back(std::make_pair(CodeSection.base(), CodeSection.size()));
+ Worklist.push_back(&CodeSection);
for (const auto& DataSection : MemMgr.DataMemory)
- Worklist.push_back(std::make_pair(DataSection.base(), DataSection.size()));
-
- // Apply any section-specific mappings that were requested on the command
- // line.
- applySpecificSectionMappings(Checker);
+ Worklist.push_back(&DataSection);
// Keep an "already allocated" mapping of section target addresses to sizes.
// Sections whose address mappings aren't specified on the command line will
@@ -577,16 +635,16 @@ static void remapSectionsAndSymbols(const llvm::Triple &TargetTriple,
I != E;) {
WorklistT::iterator Tmp = I;
++I;
- auto LoadAddr = Checker.getSectionLoadAddress(Tmp->first);
- if (LoadAddr &&
- *LoadAddr != static_cast<uint64_t>(
- reinterpret_cast<uintptr_t>(Tmp->first))) {
+ auto LoadAddr = Dyld.getSectionLoadAddress((*Tmp)->SectionID);
+
+ if (LoadAddr != static_cast<uint64_t>(
+ reinterpret_cast<uintptr_t>((*Tmp)->MB.base()))) {
// A section will have a LoadAddr of 0 if it wasn't loaded for whatever
// reason (e.g. zero byte COFF sections). Don't include those sections in
// the allocation map.
- if (*LoadAddr != 0)
- AlreadyAllocated[*LoadAddr] = Tmp->second;
+ if (LoadAddr != 0)
+ AlreadyAllocated[LoadAddr] = (*Tmp)->MB.allocatedSize();
Worklist.erase(Tmp);
}
}
@@ -604,19 +662,20 @@ static void remapSectionsAndSymbols(const llvm::Triple &TargetTriple,
// Process any elements remaining in the worklist.
while (!Worklist.empty()) {
- std::pair<void*, uint64_t> CurEntry = Worklist.front();
+ auto *CurEntry = Worklist.front();
Worklist.pop_front();
uint64_t NextSectionAddr = TargetAddrStart;
for (const auto &Alloc : AlreadyAllocated)
- if (NextSectionAddr + CurEntry.second + TargetSectionSep <= Alloc.first)
+ if (NextSectionAddr + CurEntry->MB.allocatedSize() + TargetSectionSep <=
+ Alloc.first)
break;
else
NextSectionAddr = Alloc.first + Alloc.second + TargetSectionSep;
- AlreadyAllocated[NextSectionAddr] = CurEntry.second;
- Checker.getRTDyld().mapSectionAddress(CurEntry.first, NextSectionAddr);
+ Dyld.mapSectionAddress(CurEntry->MB.base(), NextSectionAddr);
+ AlreadyAllocated[NextSectionAddr] = CurEntry->MB.allocatedSize();
}
// Add dummy symbols to the memory manager.
@@ -688,18 +747,132 @@ static int linkAndVerify() {
// Instantiate a dynamic linker.
TrivialMemoryManager MemMgr;
doPreallocation(MemMgr);
+
+ struct StubID {
+ unsigned SectionID;
+ uint32_t Offset;
+ };
+ using StubInfos = StringMap<StubID>;
+ using StubContainers = StringMap<StubInfos>;
+
+ StubContainers StubMap;
RuntimeDyld Dyld(MemMgr, MemMgr);
Dyld.setProcessAllSections(true);
- RuntimeDyldChecker Checker(Dyld, Disassembler.get(), InstPrinter.get(),
- llvm::dbgs());
+
+ Dyld.setNotifyStubEmitted([&StubMap](StringRef FilePath,
+ StringRef SectionName,
+ StringRef SymbolName, unsigned SectionID,
+ uint32_t StubOffset) {
+ std::string ContainerName =
+ (sys::path::filename(FilePath) + "/" + SectionName).str();
+ StubMap[ContainerName][SymbolName] = {SectionID, StubOffset};
+ });
+
+ auto GetSymbolInfo =
+ [&Dyld, &MemMgr](
+ StringRef Symbol) -> Expected<RuntimeDyldChecker::MemoryRegionInfo> {
+ RuntimeDyldChecker::MemoryRegionInfo SymInfo;
+
+ // First get the target address.
+ if (auto InternalSymbol = Dyld.getSymbol(Symbol))
+ SymInfo.setTargetAddress(InternalSymbol.getAddress());
+ else {
+ // Symbol not found in RuntimeDyld. Fall back to external lookup.
+#ifdef _MSC_VER
+ using ExpectedLookupResult =
+ MSVCPExpected<JITSymbolResolver::LookupResult>;
+#else
+ using ExpectedLookupResult = Expected<JITSymbolResolver::LookupResult>;
+#endif
+
+ auto ResultP = std::make_shared<std::promise<ExpectedLookupResult>>();
+ auto ResultF = ResultP->get_future();
+
+ MemMgr.lookup(JITSymbolResolver::LookupSet({Symbol}),
+ [=](Expected<JITSymbolResolver::LookupResult> Result) {
+ ResultP->set_value(std::move(Result));
+ });
+
+ auto Result = ResultF.get();
+ if (!Result)
+ return Result.takeError();
+
+ auto I = Result->find(Symbol);
+ assert(I != Result->end() &&
+ "Expected symbol address if no error occurred");
+ SymInfo.setTargetAddress(I->second.getAddress());
+ }
+
+ // Now find the symbol content if possible (otherwise leave content as a
+ // default-constructed StringRef).
+ if (auto *SymAddr = Dyld.getSymbolLocalAddress(Symbol)) {
+ unsigned SectionID = Dyld.getSymbolSectionID(Symbol);
+ if (SectionID != ~0U) {
+ char *CSymAddr = static_cast<char *>(SymAddr);
+ StringRef SecContent = Dyld.getSectionContent(SectionID);
+ uint64_t SymSize = SecContent.size() - (CSymAddr - SecContent.data());
+ SymInfo.setContent(StringRef(CSymAddr, SymSize));
+ }
+ }
+ return SymInfo;
+ };
+
+ auto IsSymbolValid = [&Dyld, GetSymbolInfo](StringRef Symbol) {
+ if (Dyld.getSymbol(Symbol))
+ return true;
+ auto SymInfo = GetSymbolInfo(Symbol);
+ if (!SymInfo) {
+ logAllUnhandledErrors(SymInfo.takeError(), errs(), "RTDyldChecker: ");
+ return false;
+ }
+ return SymInfo->getTargetAddress() != 0;
+ };
+
+ FileToSectionIDMap FileToSecIDMap;
+
+ auto GetSectionInfo = [&Dyld, &FileToSecIDMap](StringRef FileName,
+ StringRef SectionName)
+ -> Expected<RuntimeDyldChecker::MemoryRegionInfo> {
+ auto SectionID = getSectionId(FileToSecIDMap, FileName, SectionName);
+ if (!SectionID)
+ return SectionID.takeError();
+ RuntimeDyldChecker::MemoryRegionInfo SecInfo;
+ SecInfo.setTargetAddress(Dyld.getSectionLoadAddress(*SectionID));
+ SecInfo.setContent(Dyld.getSectionContent(*SectionID));
+ return SecInfo;
+ };
+
+ auto GetStubInfo = [&Dyld, &StubMap](StringRef StubContainer,
+ StringRef SymbolName)
+ -> Expected<RuntimeDyldChecker::MemoryRegionInfo> {
+ if (!StubMap.count(StubContainer))
+ return make_error<StringError>("Stub container not found: " +
+ StubContainer,
+ inconvertibleErrorCode());
+ if (!StubMap[StubContainer].count(SymbolName))
+ return make_error<StringError>("Symbol name " + SymbolName +
+ " in stub container " + StubContainer,
+ inconvertibleErrorCode());
+ auto &SI = StubMap[StubContainer][SymbolName];
+ RuntimeDyldChecker::MemoryRegionInfo StubMemInfo;
+ StubMemInfo.setTargetAddress(Dyld.getSectionLoadAddress(SI.SectionID) +
+ SI.Offset);
+ StubMemInfo.setContent(
+ Dyld.getSectionContent(SI.SectionID).substr(SI.Offset));
+ return StubMemInfo;
+ };
+
+ // We will initialize this below once we have the first object file and can
+ // know the endianness.
+ std::unique_ptr<RuntimeDyldChecker> Checker;
// If we don't have any input files, read from stdin.
if (!InputFileList.size())
InputFileList.push_back("-");
- for (auto &Filename : InputFileList) {
+ for (auto &InputFile : InputFileList) {
// Load the input memory buffer.
ErrorOr<std::unique_ptr<MemoryBuffer>> InputBuffer =
- MemoryBuffer::getFileOrSTDIN(Filename);
+ MemoryBuffer::getFileOrSTDIN(InputFile);
if (std::error_code EC = InputBuffer.getError())
ErrorAndExit("unable to read input: '" + EC.message() + "'");
@@ -717,6 +890,15 @@ static int linkAndVerify() {
ObjectFile &Obj = **MaybeObj;
+ if (!Checker)
+ Checker = llvm::make_unique<RuntimeDyldChecker>(
+ IsSymbolValid, GetSymbolInfo, GetSectionInfo, GetStubInfo,
+ GetStubInfo, Obj.isLittleEndian() ? support::little : support::big,
+ Disassembler.get(), InstPrinter.get(), dbgs());
+
+ auto FileName = sys::path::filename(InputFile);
+ MemMgr.setSectionIDsMap(&FileToSecIDMap[FileName]);
+
// Load the object file
Dyld.loadObject(Obj);
if (Dyld.hasError()) {
@@ -726,7 +908,8 @@ static int linkAndVerify() {
// Re-map the section addresses into the phony target address space and add
// dummy symbols.
- remapSectionsAndSymbols(TheTriple, MemMgr, Checker);
+ applySpecificSectionMappings(Dyld, FileToSecIDMap);
+ remapSectionsAndSymbols(TheTriple, Dyld, MemMgr);
// Resolve all the relocations we can.
Dyld.resolveRelocations();
@@ -734,7 +917,7 @@ static int linkAndVerify() {
// Register EH frames.
Dyld.registerEHFrames();
- int ErrorCode = checkAllExpressions(Checker);
+ int ErrorCode = checkAllExpressions(*Checker);
if (Dyld.hasError())
ErrorAndExit("RTDyld reported an error applying relocations:\n " +
Dyld.getErrorString());
@@ -752,6 +935,8 @@ int main(int argc, char **argv) {
cl::ParseCommandLineOptions(argc, argv, "llvm MC-JIT tool\n");
+ ExitOnErr.setBanner(std::string(argv[0]) + ": ");
+
switch (Action) {
case AC_Execute:
return executeInput();
diff --git a/tools/llvm-stress/llvm-stress.cpp b/tools/llvm-stress/llvm-stress.cpp
index c29b7a7f7e46..a455bf13fe7b 100644
--- a/tools/llvm-stress/llvm-stress.cpp
+++ b/tools/llvm-stress/llvm-stress.cpp
@@ -1,9 +1,8 @@
//===- llvm-stress.cpp - Generate random LL files to stress-test LLVM -----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -277,7 +276,7 @@ protected:
/// Pick a random type.
Type *pickType() {
- return (getRandom() & 1 ? pickVectorType() : pickScalarType());
+ return (getRandom() & 1) ? pickVectorType() : pickScalarType();
}
/// Pick a random pointer type.
diff --git a/tools/llvm-symbolizer/llvm-symbolizer.cpp b/tools/llvm-symbolizer/llvm-symbolizer.cpp
index 9d19f994b739..ea94cf9b69a1 100644
--- a/tools/llvm-symbolizer/llvm-symbolizer.cpp
+++ b/tools/llvm-symbolizer/llvm-symbolizer.cpp
@@ -1,9 +1,8 @@
//===-- llvm-symbolizer.cpp - Simple addr2line-like symbolizer ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -39,12 +38,17 @@ ClUseSymbolTable("use-symbol-table", cl::init(true),
static cl::opt<FunctionNameKind> ClPrintFunctions(
"functions", cl::init(FunctionNameKind::LinkageName),
- cl::desc("Print function name for a given address:"),
+ cl::desc("Print function name for a given address"), cl::ValueOptional,
cl::values(clEnumValN(FunctionNameKind::None, "none", "omit function name"),
clEnumValN(FunctionNameKind::ShortName, "short",
"print short function name"),
clEnumValN(FunctionNameKind::LinkageName, "linkage",
- "print function linkage name")));
+ "print function linkage name"),
+ // Sentinel value for unspecified value.
+ clEnumValN(FunctionNameKind::LinkageName, "", "")));
+static cl::alias ClPrintFunctionsShort("f", cl::desc("Alias for -functions"),
+ cl::NotHidden, cl::Grouping,
+ cl::aliasopt(ClPrintFunctions));
static cl::opt<bool>
ClUseRelativeAddress("relative-address", cl::init(false),
@@ -54,13 +58,29 @@ static cl::opt<bool>
static cl::opt<bool>
ClPrintInlining("inlining", cl::init(true),
cl::desc("Print all inlined frames for a given address"));
+static cl::alias
+ ClPrintInliningAliasI("i", cl::desc("Alias for -inlining"),
+ cl::NotHidden, cl::aliasopt(ClPrintInlining),
+ cl::Grouping);
+static cl::alias
+ ClPrintInliningAliasInlines("inlines", cl::desc("Alias for -inlining"),
+ cl::NotHidden, cl::aliasopt(ClPrintInlining));
-// -demangle, -C
+// -basenames, -s
+static cl::opt<bool> ClBasenames("basenames", cl::init(false),
+ cl::desc("Strip directory names from paths"));
+static cl::alias ClBasenamesShort("s", cl::desc("Alias for -basenames"),
+ cl::NotHidden, cl::aliasopt(ClBasenames));
+
+// -demangle, -C, -no-demangle
static cl::opt<bool>
ClDemangle("demangle", cl::init(true), cl::desc("Demangle function names"));
static cl::alias
ClDemangleShort("C", cl::desc("Alias for -demangle"),
- cl::NotHidden, cl::aliasopt(ClDemangle));
+ cl::NotHidden, cl::aliasopt(ClDemangle), cl::Grouping);
+static cl::opt<bool>
+ClNoDemangle("no-demangle", cl::init(false),
+ cl::desc("Don't demangle function names"));
static cl::opt<std::string> ClDefaultArch("default-arch", cl::init(""),
cl::desc("Default architecture "
@@ -74,10 +94,9 @@ ClBinaryName("obj", cl::init(""),
static cl::alias
ClBinaryNameAliasExe("exe", cl::desc("Alias for -obj"),
cl::NotHidden, cl::aliasopt(ClBinaryName));
-static cl::alias
-ClBinaryNameAliasE("e", cl::desc("Alias for -obj"),
- cl::NotHidden, cl::aliasopt(ClBinaryName));
-
+static cl::alias ClBinaryNameAliasE("e", cl::desc("Alias for -obj"),
+ cl::NotHidden, cl::Grouping, cl::Prefix,
+ cl::aliasopt(ClBinaryName));
static cl::opt<std::string>
ClDwpName("dwp", cl::init(""),
@@ -97,7 +116,7 @@ ClPrintAddressAliasAddresses("addresses", cl::desc("Alias for -print-address"),
cl::NotHidden, cl::aliasopt(ClPrintAddress));
static cl::alias
ClPrintAddressAliasA("a", cl::desc("Alias for -print-address"),
- cl::NotHidden, cl::aliasopt(ClPrintAddress));
+ cl::NotHidden, cl::aliasopt(ClPrintAddress), cl::Grouping);
// -pretty-print, -p
static cl::opt<bool>
@@ -105,7 +124,7 @@ static cl::opt<bool>
cl::desc("Make the output more human friendly"));
static cl::alias ClPrettyPrintShort("p", cl::desc("Alias for -pretty-print"),
cl::NotHidden,
- cl::aliasopt(ClPrettyPrint));
+ cl::aliasopt(ClPrettyPrint), cl::Grouping);
static cl::opt<int> ClPrintSourceContextLines(
"print-source-context-lines", cl::init(0),
@@ -114,10 +133,30 @@ static cl::opt<int> ClPrintSourceContextLines(
static cl::opt<bool> ClVerbose("verbose", cl::init(false),
cl::desc("Print verbose line info"));
+// -adjust-vma
+static cl::opt<uint64_t>
+ ClAdjustVMA("adjust-vma", cl::init(0), cl::value_desc("offset"),
+ cl::desc("Add specified offset to object file addresses"));
+
static cl::list<std::string> ClInputAddresses(cl::Positional,
cl::desc("<input addresses>..."),
cl::ZeroOrMore);
+static cl::opt<std::string>
+ ClFallbackDebugPath("fallback-debug-path", cl::init(""),
+ cl::desc("Fallback path for debug binaries."));
+
+static cl::opt<DIPrinter::OutputStyle>
+ ClOutputStyle("output-style", cl::init(DIPrinter::OutputStyle::LLVM),
+ cl::desc("Specify print style"),
+ cl::values(clEnumValN(DIPrinter::OutputStyle::LLVM, "LLVM",
+ "LLVM default style"),
+ clEnumValN(DIPrinter::OutputStyle::GNU, "GNU",
+ "GNU addr2line style")));
+
+static cl::extrahelp
+ HelpResponse("\nPass @FILE as argument to read options from FILE.\n");
+
template<typename T>
static bool error(Expected<T> &ResOrErr) {
if (ResOrErr)
@@ -127,17 +166,25 @@ static bool error(Expected<T> &ResOrErr) {
return true;
}
-static bool parseCommand(StringRef InputString, bool &IsData,
+enum class Command {
+ Code,
+ Data,
+ Frame,
+};
+
+static bool parseCommand(StringRef InputString, Command &Cmd,
std::string &ModuleName, uint64_t &ModuleOffset) {
const char kDelimiters[] = " \n\r";
ModuleName = "";
if (InputString.consume_front("CODE ")) {
- IsData = false;
+ Cmd = Command::Code;
} else if (InputString.consume_front("DATA ")) {
- IsData = true;
+ Cmd = Command::Data;
+ } else if (InputString.consume_front("FRAME ")) {
+ Cmd = Command::Frame;
} else {
// If no cmd, assume it's CODE.
- IsData = false;
+ Cmd = Command::Code;
}
const char *pos = InputString.data();
// Skip delimiters and parse input filename (if needed).
@@ -167,44 +214,85 @@ static bool parseCommand(StringRef InputString, bool &IsData,
static void symbolizeInput(StringRef InputString, LLVMSymbolizer &Symbolizer,
DIPrinter &Printer) {
- bool IsData = false;
+ Command Cmd;
std::string ModuleName;
- uint64_t ModuleOffset = 0;
- if (!parseCommand(StringRef(InputString), IsData, ModuleName, ModuleOffset)) {
+ uint64_t Offset = 0;
+ if (!parseCommand(StringRef(InputString), Cmd, ModuleName, Offset)) {
outs() << InputString;
return;
}
if (ClPrintAddress) {
outs() << "0x";
- outs().write_hex(ModuleOffset);
+ outs().write_hex(Offset);
StringRef Delimiter = ClPrettyPrint ? ": " : "\n";
outs() << Delimiter;
}
- if (IsData) {
- auto ResOrErr = Symbolizer.symbolizeData(ModuleName, ModuleOffset);
+ Offset -= ClAdjustVMA;
+ if (Cmd == Command::Data) {
+ auto ResOrErr = Symbolizer.symbolizeData(
+ ModuleName, {Offset, object::SectionedAddress::UndefSection});
Printer << (error(ResOrErr) ? DIGlobal() : ResOrErr.get());
+ } else if (Cmd == Command::Frame) {
+ auto ResOrErr = Symbolizer.symbolizeFrame(
+ ModuleName, {Offset, object::SectionedAddress::UndefSection});
+ if (!error(ResOrErr)) {
+ for (DILocal Local : *ResOrErr)
+ Printer << Local;
+ if (ResOrErr->empty())
+ outs() << "??\n";
+ }
} else if (ClPrintInlining) {
- auto ResOrErr =
- Symbolizer.symbolizeInlinedCode(ModuleName, ModuleOffset, ClDwpName);
+ auto ResOrErr = Symbolizer.symbolizeInlinedCode(
+ ModuleName, {Offset, object::SectionedAddress::UndefSection});
Printer << (error(ResOrErr) ? DIInliningInfo() : ResOrErr.get());
+ } else if (ClOutputStyle == DIPrinter::OutputStyle::GNU) {
+ // With ClPrintFunctions == FunctionNameKind::LinkageName (default)
+ // and ClUseSymbolTable == true (also default), Symbolizer.symbolizeCode()
+ // may override the name of an inlined function with the name of the topmost
+ // caller function in the inlining chain. This contradicts the existing
+ // behavior of addr2line. Symbolizer.symbolizeInlinedCode() overrides only
+ // the topmost function, which suits our needs better.
+ auto ResOrErr = Symbolizer.symbolizeInlinedCode(
+ ModuleName, {Offset, object::SectionedAddress::UndefSection});
+ Printer << (error(ResOrErr) ? DILineInfo() : ResOrErr.get().getFrame(0));
} else {
- auto ResOrErr =
- Symbolizer.symbolizeCode(ModuleName, ModuleOffset, ClDwpName);
+ auto ResOrErr = Symbolizer.symbolizeCode(
+ ModuleName, {Offset, object::SectionedAddress::UndefSection});
Printer << (error(ResOrErr) ? DILineInfo() : ResOrErr.get());
}
- outs() << "\n";
- outs().flush();
+ if (ClOutputStyle == DIPrinter::OutputStyle::LLVM)
+ outs() << "\n";
}
int main(int argc, char **argv) {
InitLLVM X(argc, argv);
+ bool IsAddr2Line = sys::path::stem(argv[0]).contains("addr2line");
+
+ if (IsAddr2Line) {
+ ClDemangle.setInitialValue(false);
+ ClPrintFunctions.setInitialValue(FunctionNameKind::None);
+ ClPrintInlining.setInitialValue(false);
+ ClOutputStyle.setInitialValue(DIPrinter::OutputStyle::GNU);
+ }
+
llvm::sys::InitializeCOMRAII COM(llvm::sys::COMThreadingMode::MultiThreaded);
+ cl::ParseCommandLineOptions(argc, argv, IsAddr2Line ? "llvm-addr2line\n"
+ : "llvm-symbolizer\n");
- cl::ParseCommandLineOptions(argc, argv, "llvm-symbolizer\n");
- LLVMSymbolizer::Options Opts(ClPrintFunctions, ClUseSymbolTable, ClDemangle,
- ClUseRelativeAddress, ClDefaultArch);
+ // If both --demangle and --no-demangle are specified then pick the last one.
+ if (ClNoDemangle.getPosition() > ClDemangle.getPosition())
+ ClDemangle = !ClNoDemangle;
+
+ LLVMSymbolizer::Options Opts;
+ Opts.PrintFunctions = ClPrintFunctions;
+ Opts.UseSymbolTable = ClUseSymbolTable;
+ Opts.Demangle = ClDemangle;
+ Opts.RelativeAddresses = ClUseRelativeAddress;
+ Opts.DefaultArch = ClDefaultArch;
+ Opts.FallbackDebugPath = ClFallbackDebugPath;
+ Opts.DWPName = ClDwpName;
for (const auto &hint : ClDsymHint) {
if (sys::path::extension(hint) == ".dSYM") {
@@ -217,14 +305,17 @@ int main(int argc, char **argv) {
LLVMSymbolizer Symbolizer(Opts);
DIPrinter Printer(outs(), ClPrintFunctions != FunctionNameKind::None,
- ClPrettyPrint, ClPrintSourceContextLines, ClVerbose);
+ ClPrettyPrint, ClPrintSourceContextLines, ClVerbose,
+ ClBasenames, ClOutputStyle);
if (ClInputAddresses.empty()) {
const int kMaxInputStringLength = 1024;
char InputString[kMaxInputStringLength];
- while (fgets(InputString, sizeof(InputString), stdin))
+ while (fgets(InputString, sizeof(InputString), stdin)) {
symbolizeInput(InputString, Symbolizer, Printer);
+ outs().flush();
+ }
} else {
for (StringRef Address : ClInputAddresses)
symbolizeInput(Address, Symbolizer, Printer);
diff --git a/tools/llvm-xray/func-id-helper.cpp b/tools/llvm-xray/func-id-helper.cpp
index c2bef6ddfb39..dc821a420c67 100644
--- a/tools/llvm-xray/func-id-helper.cpp
+++ b/tools/llvm-xray/func-id-helper.cpp
@@ -1,9 +1,8 @@
//===- xray-fc-account.cpp: XRay Function Call Accounting Tool ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -30,7 +29,12 @@ std::string FuncIdConversionHelper::SymbolOrNumber(int32_t FuncId) const {
return F.str();
}
- if (auto ResOrErr = Symbolizer.symbolizeCode(BinaryInstrMap, It->second)) {
+ object::SectionedAddress ModuleAddress;
+ ModuleAddress.Address = It->second;
+ // TODO: set proper section index here.
+ // object::SectionedAddress::UndefSection works for only absolute addresses.
+ ModuleAddress.SectionIndex = object::SectionedAddress::UndefSection;
+ if (auto ResOrErr = Symbolizer.symbolizeCode(BinaryInstrMap, ModuleAddress)) {
auto &DI = *ResOrErr;
if (DI.FunctionName == "<invalid>")
F << "@(" << std::hex << It->second << ")";
@@ -52,7 +56,12 @@ std::string FuncIdConversionHelper::FileLineAndColumn(int32_t FuncId) const {
return "(unknown)";
std::ostringstream F;
- auto ResOrErr = Symbolizer.symbolizeCode(BinaryInstrMap, It->second);
+ object::SectionedAddress ModuleAddress;
+ ModuleAddress.Address = It->second;
+ // TODO: set proper section index here.
+ // object::SectionedAddress::UndefSection works for only absolute addresses.
+ ModuleAddress.SectionIndex = object::SectionedAddress::UndefSection;
+ auto ResOrErr = Symbolizer.symbolizeCode(BinaryInstrMap, ModuleAddress);
if (!ResOrErr) {
consumeError(ResOrErr.takeError());
return "(unknown)";
diff --git a/tools/llvm-xray/func-id-helper.h b/tools/llvm-xray/func-id-helper.h
index 3e0780d54f90..c6ce198170d5 100644
--- a/tools/llvm-xray/func-id-helper.h
+++ b/tools/llvm-xray/func-id-helper.h
@@ -1,9 +1,8 @@
//===- func-id-helper.h - XRay Function ID Conversion Helpers -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/tools/llvm-xray/llvm-xray.cpp b/tools/llvm-xray/llvm-xray.cpp
index e74628f5025f..9ee653e97b2d 100644
--- a/tools/llvm-xray/llvm-xray.cpp
+++ b/tools/llvm-xray/llvm-xray.cpp
@@ -1,9 +1,8 @@
//===- llvm-xray.cpp: XRay Tool Main Program ------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/tools/llvm-xray/trie-node.h b/tools/llvm-xray/trie-node.h
index e6ba4e215b91..47d4b8f1e78c 100644
--- a/tools/llvm-xray/trie-node.h
+++ b/tools/llvm-xray/trie-node.h
@@ -1,9 +1,8 @@
//===- trie-node.h - XRay Call Stack Data Structure -----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/tools/llvm-xray/xray-account.cpp b/tools/llvm-xray/xray-account.cpp
index 9985c9adcf6c..2b49a311d7e3 100644
--- a/tools/llvm-xray/xray-account.cpp
+++ b/tools/llvm-xray/xray-account.cpp
@@ -1,9 +1,8 @@
//===- xray-account.h - XRay Function Call Accounting ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -428,9 +427,7 @@ static CommandRegistration Unused(&Account, []() -> Error {
Twine("Cannot open file '") + AccountOutput + "' for writing.", EC);
const auto &FunctionAddresses = Map.getFunctionAddresses();
- symbolize::LLVMSymbolizer::Options Opts(
- symbolize::FunctionNameKind::LinkageName, true, true, false, "");
- symbolize::LLVMSymbolizer Symbolizer(Opts);
+ symbolize::LLVMSymbolizer Symbolizer;
llvm::xray::FuncIdConversionHelper FuncIdHelper(AccountInstrMap, Symbolizer,
FunctionAddresses);
xray::LatencyAccountant FCA(FuncIdHelper, AccountDeduceSiblingCalls);
diff --git a/tools/llvm-xray/xray-account.h b/tools/llvm-xray/xray-account.h
index 5c457f178166..b63ecc59b71a 100644
--- a/tools/llvm-xray/xray-account.h
+++ b/tools/llvm-xray/xray-account.h
@@ -1,9 +1,8 @@
//===- xray-account.h - XRay Function Call Accounting ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -29,12 +28,11 @@ namespace xray {
class LatencyAccountant {
public:
typedef std::map<int32_t, std::vector<uint64_t>> FunctionLatencyMap;
- typedef std::map<llvm::sys::procid_t, std::pair<uint64_t, uint64_t>>
+ typedef std::map<uint32_t, std::pair<uint64_t, uint64_t>>
PerThreadMinMaxTSCMap;
typedef std::map<uint8_t, std::pair<uint64_t, uint64_t>> PerCPUMinMaxTSCMap;
typedef std::vector<std::pair<int32_t, uint64_t>> FunctionStack;
- typedef std::map<llvm::sys::procid_t, FunctionStack>
- PerThreadFunctionStackMap;
+ typedef std::map<uint32_t, FunctionStack> PerThreadFunctionStackMap;
private:
PerThreadFunctionStackMap PerThreadFunctionStack;
@@ -78,13 +76,6 @@ public:
///
bool accountRecord(const XRayRecord &Record);
- const FunctionStack *getThreadFunctionStack(llvm::sys::procid_t TId) const {
- auto I = PerThreadFunctionStack.find(TId);
- if (I == PerThreadFunctionStack.end())
- return nullptr;
- return &I->second;
- }
-
const PerThreadFunctionStackMap &getPerThreadFunctionStack() const {
return PerThreadFunctionStack;
}
diff --git a/tools/llvm-xray/xray-color-helper.cpp b/tools/llvm-xray/xray-color-helper.cpp
index 78a264b73d8f..c09cad3ba7d2 100644
--- a/tools/llvm-xray/xray-color-helper.cpp
+++ b/tools/llvm-xray/xray-color-helper.cpp
@@ -1,9 +1,8 @@
//===-- xray-graph.cpp: XRay Function Call Graph Renderer -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/tools/llvm-xray/xray-color-helper.h b/tools/llvm-xray/xray-color-helper.h
index b2dcf626a65f..0940fc211343 100644
--- a/tools/llvm-xray/xray-color-helper.h
+++ b/tools/llvm-xray/xray-color-helper.h
@@ -1,9 +1,8 @@
//===-- xray-graph.h - XRay Function Call Graph Renderer --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/tools/llvm-xray/xray-converter.cpp b/tools/llvm-xray/xray-converter.cpp
index 3f153b99bc93..dfc757e0f276 100644
--- a/tools/llvm-xray/xray-converter.cpp
+++ b/tools/llvm-xray/xray-converter.cpp
@@ -1,9 +1,8 @@
//===- xray-converter.cpp: XRay Trace Conversion --------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -18,7 +17,6 @@
#include "llvm/Support/EndianStream.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/FormatVariadic.h"
-#include "llvm/Support/JSON.h"
#include "llvm/Support/ScopedPrinter.h"
#include "llvm/Support/YAMLTraits.h"
#include "llvm/Support/raw_ostream.h"
@@ -242,6 +240,31 @@ StackTrieNode *findOrCreateStackNode(
return CurrentStack;
}
+void writeTraceViewerRecord(uint16_t Version, raw_ostream &OS, int32_t FuncId,
+ uint32_t TId, uint32_t PId, bool Symbolize,
+ const FuncIdConversionHelper &FuncIdHelper,
+ double EventTimestampUs,
+ const StackTrieNode &StackCursor,
+ StringRef FunctionPhenotype) {
+ OS << " ";
+ if (Version >= 3) {
+ OS << llvm::formatv(
+ R"({ "name" : "{0}", "ph" : "{1}", "tid" : "{2}", "pid" : "{3}", )"
+ R"("ts" : "{4:f4}", "sf" : "{5}" })",
+ (Symbolize ? FuncIdHelper.SymbolOrNumber(FuncId)
+ : llvm::to_string(FuncId)),
+ FunctionPhenotype, TId, PId, EventTimestampUs,
+ StackCursor.ExtraData.id);
+ } else {
+ OS << llvm::formatv(
+ R"({ "name" : "{0}", "ph" : "{1}", "tid" : "{2}", "pid" : "1", )"
+ R"("ts" : "{3:f3}", "sf" : "{4}" })",
+ (Symbolize ? FuncIdHelper.SymbolOrNumber(FuncId)
+ : llvm::to_string(FuncId)),
+ FunctionPhenotype, TId, EventTimestampUs, StackCursor.ExtraData.id);
+ }
+}
+
} // namespace
void TraceConverter::exportAsChromeTraceEventFormat(const Trace &Records,
@@ -252,14 +275,18 @@ void TraceConverter::exportAsChromeTraceEventFormat(const Trace &Records,
unsigned id_counter = 0;
+ OS << "{\n \"traceEvents\": [";
DenseMap<uint32_t, StackTrieNode *> StackCursorByThreadId{};
DenseMap<uint32_t, SmallVector<StackTrieNode *, 4>> StackRootsByThreadId{};
DenseMap<unsigned, StackTrieNode *> StacksByStackId{};
std::forward_list<StackTrieNode> NodeStore{};
-
- // Create a JSON Array which will hold all trace events.
- json::Array TraceEvents;
+ int loop_count = 0;
for (const auto &R : Records) {
+ if (loop_count++ == 0)
+ OS << "\n";
+ else
+ OS << ",\n";
+
// Chrome trace event format always wants data in micros.
// CyclesPerMicro = CycleHertz / 10^6
// TSC / CyclesPerMicro == TSC * 10^6 / CycleHertz == MicroTimestamp
@@ -284,15 +311,8 @@ void TraceConverter::exportAsChromeTraceEventFormat(const Trace &Records,
// type of B for begin or E for end, thread id, process id,
// timestamp in microseconds, and a stack frame id. The ids are logged
// in an id dictionary after the events.
- TraceEvents.push_back(json::Object({
- {"name", Symbolize ? FuncIdHelper.SymbolOrNumber(R.FuncId)
- : llvm::to_string(R.FuncId)},
- {"ph", "B"},
- {"tid", llvm::to_string(R.TId)},
- {"pid", llvm::to_string(Version >= 3 ? R.PId : 1)},
- {"ts", llvm::formatv("{0:f4}", EventTimestampUs)},
- {"sf", llvm::to_string(StackCursor->ExtraData.id)},
- }));
+ writeTraceViewerRecord(Version, OS, R.FuncId, R.TId, R.PId, Symbolize,
+ FuncIdHelper, EventTimestampUs, *StackCursor, "B");
break;
case RecordTypes::EXIT:
case RecordTypes::TAIL_EXIT:
@@ -303,51 +323,43 @@ void TraceConverter::exportAsChromeTraceEventFormat(const Trace &Records,
// (And/Or in loop termination below)
StackTrieNode *PreviousCursor = nullptr;
do {
- TraceEvents.push_back(json::Object({
- {"name", Symbolize
- ? FuncIdHelper.SymbolOrNumber(StackCursor->FuncId)
- : llvm::to_string(StackCursor->FuncId)},
- {"ph", "E"},
- {"tid", llvm::to_string(R.TId)},
- {"pid", llvm::to_string(Version >= 3 ? R.PId : 1)},
- {"ts", llvm::formatv("{0:f4}", EventTimestampUs)},
- {"sf", llvm::to_string(StackCursor->ExtraData.id)},
- }));
+ if (PreviousCursor != nullptr) {
+ OS << ",\n";
+ }
+ writeTraceViewerRecord(Version, OS, StackCursor->FuncId, R.TId, R.PId,
+ Symbolize, FuncIdHelper, EventTimestampUs,
+ *StackCursor, "E");
PreviousCursor = StackCursor;
StackCursor = StackCursor->Parent;
} while (PreviousCursor->FuncId != R.FuncId && StackCursor != nullptr);
break;
}
}
+ OS << "\n ],\n"; // Close the Trace Events array.
+ OS << " "
+ << "\"displayTimeUnit\": \"ns\",\n";
// The stackFrames dictionary substantially reduces size of the output file by
// avoiding repeating the entire call stack of function names for each entry.
- json::Object StackFrames;
- for (const auto &Stack : StacksByStackId) {
- const auto &StackId = Stack.first;
- const auto &StackFunctionNode = Stack.second;
- json::Object::iterator It;
- std::tie(It, std::ignore) = StackFrames.insert({
- llvm::to_string(StackId),
- json::Object{
- {"name",
- Symbolize ? FuncIdHelper.SymbolOrNumber(StackFunctionNode->FuncId)
- : llvm::to_string(StackFunctionNode->FuncId)}},
- });
-
- if (StackFunctionNode->Parent != nullptr)
- It->second.getAsObject()->insert(
- {"parent", llvm::to_string(StackFunctionNode->Parent->ExtraData.id)});
+ OS << R"( "stackFrames": {)";
+ int stack_frame_count = 0;
+ for (auto map_iter : StacksByStackId) {
+ if (stack_frame_count++ == 0)
+ OS << "\n";
+ else
+ OS << ",\n";
+ OS << " ";
+ OS << llvm::formatv(
+ R"("{0}" : { "name" : "{1}")", map_iter.first,
+ (Symbolize ? FuncIdHelper.SymbolOrNumber(map_iter.second->FuncId)
+ : llvm::to_string(map_iter.second->FuncId)));
+ if (map_iter.second->Parent != nullptr)
+ OS << llvm::formatv(R"(, "parent": "{0}")",
+ map_iter.second->Parent->ExtraData.id);
+ OS << " }";
}
-
- json::Object TraceJSON{
- {"displayTimeUnit", "ns"},
- {"traceEvents", std::move(TraceEvents)},
- {"stackFrames", std::move(StackFrames)},
- };
-
- // Pretty-print the JSON using two spaces for indentations.
- OS << formatv("{0:2}", json::Value(std::move(TraceJSON)));
+ OS << "\n }\n"; // Close the stack frames map.
+ OS << "}\n"; // Close the JSON entry.
}
namespace llvm {
@@ -368,9 +380,7 @@ static CommandRegistration Unused(&Convert, []() -> Error {
}
const auto &FunctionAddresses = Map.getFunctionAddresses();
- symbolize::LLVMSymbolizer::Options Opts(
- symbolize::FunctionNameKind::LinkageName, true, true, false, "");
- symbolize::LLVMSymbolizer Symbolizer(Opts);
+ symbolize::LLVMSymbolizer Symbolizer;
llvm::xray::FuncIdConversionHelper FuncIdHelper(ConvertInstrMap, Symbolizer,
FunctionAddresses);
llvm::xray::TraceConverter TC(FuncIdHelper, ConvertSymbolize);
diff --git a/tools/llvm-xray/xray-converter.h b/tools/llvm-xray/xray-converter.h
index 5f0a3ee298eb..db6d2b1614ee 100644
--- a/tools/llvm-xray/xray-converter.h
+++ b/tools/llvm-xray/xray-converter.h
@@ -1,9 +1,8 @@
//===- xray-converter.h - XRay Trace Conversion ---------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/tools/llvm-xray/xray-extract.cpp b/tools/llvm-xray/xray-extract.cpp
index 10fe7d8d6209..7c7d26b5a389 100644
--- a/tools/llvm-xray/xray-extract.cpp
+++ b/tools/llvm-xray/xray-extract.cpp
@@ -1,9 +1,8 @@
//===- xray-extract.cpp: XRay Instrumentation Map Extraction --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -87,9 +86,7 @@ static CommandRegistration Unused(&Extract, []() -> Error {
Twine("Cannot open file '") + ExtractOutput + "' for writing.", EC);
const auto &FunctionAddresses =
InstrumentationMapOrError->getFunctionAddresses();
- symbolize::LLVMSymbolizer::Options Opts(
- symbolize::FunctionNameKind::LinkageName, true, true, false, "");
- symbolize::LLVMSymbolizer Symbolizer(Opts);
+ symbolize::LLVMSymbolizer Symbolizer;
llvm::xray::FuncIdConversionHelper FuncIdHelper(ExtractInput, Symbolizer,
FunctionAddresses);
exportAsYAML(*InstrumentationMapOrError, OS, FuncIdHelper);
diff --git a/tools/llvm-xray/xray-fdr-dump.cpp b/tools/llvm-xray/xray-fdr-dump.cpp
index 389825605b62..81a93cac57c4 100644
--- a/tools/llvm-xray/xray-fdr-dump.cpp
+++ b/tools/llvm-xray/xray-fdr-dump.cpp
@@ -1,9 +1,8 @@
//===- xray-fdr-dump.cpp: XRay FDR Trace Dump Tool ------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -36,10 +35,9 @@ static cl::opt<bool> DumpVerify("verify",
static CommandRegistration Unused(&Dump, []() -> Error {
// Open the file provided.
- int Fd;
- if (auto EC = sys::fs::openFileForRead(DumpInput, Fd))
- return createStringError(EC, "Cannot open file '%s' for read.",
- DumpInput.c_str());
+ auto FDOrErr = sys::fs::openNativeFileForRead(DumpInput);
+ if (!FDOrErr)
+ return FDOrErr.takeError();
uint64_t FileSize;
if (auto EC = sys::fs::file_size(DumpInput, FileSize))
@@ -48,7 +46,9 @@ static CommandRegistration Unused(&Dump, []() -> Error {
std::error_code EC;
sys::fs::mapped_file_region MappedFile(
- Fd, sys::fs::mapped_file_region::mapmode::readonly, FileSize, 0, EC);
+ *FDOrErr, sys::fs::mapped_file_region::mapmode::readonly, FileSize, 0,
+ EC);
+ sys::fs::closeFile(*FDOrErr);
DataExtractor DE(StringRef(MappedFile.data(), MappedFile.size()), true, 8);
uint32_t OffsetPtr = 0;
diff --git a/tools/llvm-xray/xray-graph-diff.cpp b/tools/llvm-xray/xray-graph-diff.cpp
index a22f2a99811d..a514be97f40b 100644
--- a/tools/llvm-xray/xray-graph-diff.cpp
+++ b/tools/llvm-xray/xray-graph-diff.cpp
@@ -1,9 +1,8 @@
//===-- xray-graph-diff.cpp: XRay Function Call Graph Renderer ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/tools/llvm-xray/xray-graph-diff.h b/tools/llvm-xray/xray-graph-diff.h
index 5abec91d8582..5d12c563f47c 100644
--- a/tools/llvm-xray/xray-graph-diff.h
+++ b/tools/llvm-xray/xray-graph-diff.h
@@ -1,9 +1,8 @@
//===-- xray-graph-diff.h - XRay Graph Diff Renderer ------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/tools/llvm-xray/xray-graph.cpp b/tools/llvm-xray/xray-graph.cpp
index fe49cca20d57..c09357fcb502 100644
--- a/tools/llvm-xray/xray-graph.cpp
+++ b/tools/llvm-xray/xray-graph.cpp
@@ -1,9 +1,8 @@
//===-- xray-graph.cpp: XRay Function Call Graph Renderer -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -437,9 +436,7 @@ Expected<GraphRenderer> GraphRenderer::Factory::getGraphRenderer() {
const auto &FunctionAddresses = Map.getFunctionAddresses();
- symbolize::LLVMSymbolizer::Options Opts(
- symbolize::FunctionNameKind::LinkageName, true, true, false, "");
- symbolize::LLVMSymbolizer Symbolizer(Opts);
+ symbolize::LLVMSymbolizer Symbolizer;
const auto &Header = Trace.getFileHeader();
llvm::xray::FuncIdConversionHelper FuncIdHelper(InstrMap, Symbolizer,
diff --git a/tools/llvm-xray/xray-graph.h b/tools/llvm-xray/xray-graph.h
index fc7f8bb470f2..23372d40f05e 100644
--- a/tools/llvm-xray/xray-graph.h
+++ b/tools/llvm-xray/xray-graph.h
@@ -1,9 +1,8 @@
//===-- xray-graph.h - XRay Function Call Graph Renderer --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -79,8 +78,7 @@ public:
using FunctionStack = SmallVector<FunctionAttr, 4>;
- using PerThreadFunctionStackMap =
- DenseMap<llvm::sys::procid_t, FunctionStack>;
+ using PerThreadFunctionStackMap = DenseMap<uint32_t, FunctionStack>;
class GraphT : public Graph<FunctionStats, CallStats, int32_t> {
public:
diff --git a/tools/llvm-xray/xray-registry.cpp b/tools/llvm-xray/xray-registry.cpp
index fe58e4deaa1e..e5c253d2e8f1 100644
--- a/tools/llvm-xray/xray-registry.cpp
+++ b/tools/llvm-xray/xray-registry.cpp
@@ -1,9 +1,8 @@
//===- xray-registry.cpp: Implement a command registry. -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/tools/llvm-xray/xray-registry.h b/tools/llvm-xray/xray-registry.h
index 6eab016273f5..d6fae78ea53c 100644
--- a/tools/llvm-xray/xray-registry.h
+++ b/tools/llvm-xray/xray-registry.h
@@ -1,9 +1,8 @@
//===- xray-registry.h - Define registry mechanism for commands. ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/tools/llvm-xray/xray-stacks.cpp b/tools/llvm-xray/xray-stacks.cpp
index d3af9e25e6f2..bcfc5cb1f1be 100644
--- a/tools/llvm-xray/xray-stacks.cpp
+++ b/tools/llvm-xray/xray-stacks.cpp
@@ -1,9 +1,8 @@
//===- xray-stacks.cpp: XRay Function Call Stack Accounting ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -634,10 +633,8 @@ public:
Top->ExtraData.TerminalDurations.end(), 0uLL);
{
auto E = std::make_pair(Top, TopSum);
- TopStacksBySum.insert(std::lower_bound(TopStacksBySum.begin(),
- TopStacksBySum.end(), E,
- greater_second),
- E);
+ TopStacksBySum.insert(
+ llvm::lower_bound(TopStacksBySum, E, greater_second), E);
if (TopStacksBySum.size() == 11)
TopStacksBySum.pop_back();
}
@@ -721,9 +718,7 @@ static CommandRegistration Unused(&Stack, []() -> Error {
"-all-stacks."),
std::make_error_code(std::errc::invalid_argument));
- symbolize::LLVMSymbolizer::Options Opts(
- symbolize::FunctionNameKind::LinkageName, true, true, false, "");
- symbolize::LLVMSymbolizer Symbolizer(Opts);
+ symbolize::LLVMSymbolizer Symbolizer;
FuncIdConversionHelper FuncIdHelper(StacksInstrMap, Symbolizer,
Map.getFunctionAddresses());
// TODO: Someday, support output to files instead of just directly to
diff --git a/tools/opt/AnalysisWrappers.cpp b/tools/opt/AnalysisWrappers.cpp
index cfdd2cf1582b..b888605a516c 100644
--- a/tools/opt/AnalysisWrappers.cpp
+++ b/tools/opt/AnalysisWrappers.cpp
@@ -1,9 +1,8 @@
//===- AnalysisWrappers.cpp - Wrappers around non-pass analyses -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/tools/opt/BreakpointPrinter.cpp b/tools/opt/BreakpointPrinter.cpp
index d3f54c034f55..a57a8c43c264 100644
--- a/tools/opt/BreakpointPrinter.cpp
+++ b/tools/opt/BreakpointPrinter.cpp
@@ -1,9 +1,8 @@
//===- BreakpointPrinter.cpp - Breakpoint location printer ----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -36,7 +35,7 @@ struct BreakpointPrinter : public ModulePass {
}
} else if (auto *TY = dyn_cast<DIType>(Context)) {
if (!TY->getName().empty()) {
- getContextName(TY->getScope().resolve(), N);
+ getContextName(TY->getScope(), N);
N = N + TY->getName().str() + "::";
}
}
@@ -50,7 +49,7 @@ struct BreakpointPrinter : public ModulePass {
auto *SP = cast_or_null<DISubprogram>(NMD->getOperand(i));
if (!SP)
continue;
- getContextName(SP->getScope().resolve(), Name);
+ getContextName(SP->getScope(), Name);
Name = Name + SP->getName().str();
if (!Name.empty() && Processed.insert(Name).second) {
Out << Name << "\n";
diff --git a/tools/opt/BreakpointPrinter.h b/tools/opt/BreakpointPrinter.h
index 57670e5ee8d8..2877555f852c 100644
--- a/tools/opt/BreakpointPrinter.h
+++ b/tools/opt/BreakpointPrinter.h
@@ -1,9 +1,8 @@
//===- BreakpointPrinter.h - Breakpoint location printer ------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/tools/opt/Debugify.cpp b/tools/opt/Debugify.cpp
index 3b1effba1592..222cc702bc1f 100644
--- a/tools/opt/Debugify.cpp
+++ b/tools/opt/Debugify.cpp
@@ -1,9 +1,8 @@
//===- Debugify.cpp - Attach synthetic debug info to everything -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/tools/opt/Debugify.h b/tools/opt/Debugify.h
index d1a60c73e723..266f577951ae 100644
--- a/tools/opt/Debugify.h
+++ b/tools/opt/Debugify.h
@@ -1,9 +1,8 @@
//===- Debugify.h - Attach synthetic debug info to everything -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/tools/opt/GraphPrinters.cpp b/tools/opt/GraphPrinters.cpp
index a8bb12f3e018..611fb20513c9 100644
--- a/tools/opt/GraphPrinters.cpp
+++ b/tools/opt/GraphPrinters.cpp
@@ -1,9 +1,8 @@
//===- GraphPrinters.cpp - DOT printers for various graph types -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/tools/opt/NewPMDriver.cpp b/tools/opt/NewPMDriver.cpp
index 211a3b151fe1..efe0bec35d72 100644
--- a/tools/opt/NewPMDriver.cpp
+++ b/tools/opt/NewPMDriver.cpp
@@ -1,9 +1,8 @@
//===- NewPMDriver.cpp - Driver for opt with new PM -----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -101,19 +100,11 @@ static cl::opt<std::string> OptimizerLastEPPipeline(
"the OptimizerLast extension point into default pipelines"),
cl::Hidden);
-enum PGOKind { NoPGO, InstrGen, InstrUse, SampleUse };
-static cl::opt<PGOKind> PGOKindFlag(
- "pgo-kind", cl::init(NoPGO), cl::Hidden,
- cl::desc("The kind of profile guided optimization"),
- cl::values(clEnumValN(NoPGO, "nopgo", "Do not use PGO."),
- clEnumValN(InstrGen, "new-pm-pgo-instr-gen-pipeline",
- "Instrument the IR to generate profile."),
- clEnumValN(InstrUse, "new-pm-pgo-instr-use-pipeline",
- "Use instrumented profile to guide PGO."),
- clEnumValN(SampleUse, "new-pm-pgo-sample-use-pipeline",
- "Use sampled profile to guide PGO.")));
-static cl::opt<std::string> ProfileFile(
- "profile-file", cl::desc("Path to the profile."), cl::Hidden);
+extern cl::opt<PGOKind> PGOKindFlag;
+extern cl::opt<std::string> ProfileFile;
+extern cl::opt<CSPGOKind> CSPGOKindFlag;
+extern cl::opt<std::string> CSProfileGenFile;
+
static cl::opt<std::string>
ProfileRemappingFile("profile-remapping-file",
cl::desc("Path to the profile remapping file."),
@@ -231,25 +222,46 @@ bool llvm::runPassPipeline(StringRef Arg0, Module &M, TargetMachine *TM,
Optional<PGOOptions> P;
switch (PGOKindFlag) {
case InstrGen:
- P = PGOOptions(ProfileFile, "", "", "", true);
+ P = PGOOptions(ProfileFile, "", "", PGOOptions::IRInstr);
break;
case InstrUse:
- P = PGOOptions("", ProfileFile, "", ProfileRemappingFile, false);
+ P = PGOOptions(ProfileFile, "", ProfileRemappingFile, PGOOptions::IRUse);
break;
case SampleUse:
- P = PGOOptions("", "", ProfileFile, ProfileRemappingFile, false);
+ P = PGOOptions(ProfileFile, "", ProfileRemappingFile,
+ PGOOptions::SampleUse);
break;
case NoPGO:
if (DebugInfoForProfiling)
- P = PGOOptions("", "", "", "", false, true);
+ P = PGOOptions("", "", "", PGOOptions::NoAction, PGOOptions::NoCSAction,
+ true);
else
P = None;
- }
+ }
+ if (CSPGOKindFlag != NoCSPGO) {
+ if (P && (P->Action == PGOOptions::IRInstr ||
+ P->Action == PGOOptions::SampleUse))
+ errs() << "CSPGOKind cannot be used with IRInstr or SampleUse";
+ if (CSPGOKindFlag == CSInstrGen) {
+ if (CSProfileGenFile.empty())
+ errs() << "CSInstrGen needs to specify CSProfileGenFile";
+ if (P) {
+ P->CSAction = PGOOptions::CSIRInstr;
+ P->CSProfileGenFile = CSProfileGenFile;
+ } else
+ P = PGOOptions("", CSProfileGenFile, ProfileRemappingFile,
+ PGOOptions::NoAction, PGOOptions::CSIRInstr);
+ } else /* CSPGOKindFlag == CSInstrUse */ {
+ if (!P)
+ errs() << "CSInstrUse needs to be together with InstrUse";
+ P->CSAction = PGOOptions::CSIRUse;
+ }
+ }
PassInstrumentationCallbacks PIC;
StandardInstrumentations SI;
SI.registerCallbacks(PIC);
- PassBuilder PB(TM, P, &PIC);
+ PassBuilder PB(TM, PipelineTuningOptions(), P, &PIC);
registerEPCallbacks(PB, VerifyEachPass, DebugPM);
// Load requested pass plugins and let them register pass builder callbacks
diff --git a/tools/opt/NewPMDriver.h b/tools/opt/NewPMDriver.h
index 7d74a5777d11..b672c97c9aa3 100644
--- a/tools/opt/NewPMDriver.h
+++ b/tools/opt/NewPMDriver.h
@@ -1,9 +1,8 @@
//===- NewPMDriver.h - Function to drive opt with the new PM ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -40,6 +39,13 @@ enum VerifierKind {
VK_VerifyInAndOut,
VK_VerifyEachPass
};
+enum PGOKind {
+ NoPGO,
+ InstrGen,
+ InstrUse,
+ SampleUse
+};
+enum CSPGOKind { NoCSPGO, CSInstrGen, CSInstrUse };
}
/// Driver function to run the new pass manager over a module.
diff --git a/tools/opt/PassPrinters.cpp b/tools/opt/PassPrinters.cpp
index 310d491c06a5..70da6a43f8d9 100644
--- a/tools/opt/PassPrinters.cpp
+++ b/tools/opt/PassPrinters.cpp
@@ -1,9 +1,8 @@
//===- PassPrinters.cpp - Utilities to print analysis info for passes -----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/tools/opt/PassPrinters.h b/tools/opt/PassPrinters.h
index e66f3f457b7a..d4e7a4a97f31 100644
--- a/tools/opt/PassPrinters.h
+++ b/tools/opt/PassPrinters.h
@@ -1,9 +1,8 @@
//=- PassPrinters.h - Utilities to print analysis info for passes -*- C++ -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/tools/opt/PrintSCC.cpp b/tools/opt/PrintSCC.cpp
index 78ede2b72f84..419886d6cc60 100644
--- a/tools/opt/PrintSCC.cpp
+++ b/tools/opt/PrintSCC.cpp
@@ -1,9 +1,8 @@
//===- PrintSCC.cpp - Enumerate SCCs in some key graphs -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/tools/opt/opt.cpp b/tools/opt/opt.cpp
index a4967a234d9c..ccf8b073b82b 100644
--- a/tools/opt/opt.cpp
+++ b/tools/opt/opt.cpp
@@ -1,9 +1,8 @@
//===- opt.cpp - The LLVM Modular Optimizer -------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -34,6 +33,7 @@
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/LegacyPassNameParser.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/RemarkStreamer.h"
#include "llvm/IR/Verifier.h"
#include "llvm/IRReader/IRReader.h"
#include "llvm/InitializePasses.h"
@@ -173,18 +173,9 @@ static cl::opt<std::string>
TargetTriple("mtriple", cl::desc("Override target triple for module"));
static cl::opt<bool>
-UnitAtATime("funit-at-a-time",
- cl::desc("Enable IPO. This corresponds to gcc's -funit-at-a-time"),
- cl::init(true));
-
-static cl::opt<bool>
DisableLoopUnrolling("disable-loop-unrolling",
cl::desc("Disable loop unrolling in all relevant passes"),
cl::init(false));
-static cl::opt<bool>
-DisableLoopVectorization("disable-loop-vectorization",
- cl::desc("Disable the loop vectorization pass"),
- cl::init(false));
static cl::opt<bool>
DisableSLPVectorization("disable-slp-vectorization",
@@ -260,21 +251,62 @@ static cl::opt<bool> Coroutines(
cl::desc("Enable coroutine passes."),
cl::init(false), cl::Hidden);
-static cl::opt<bool> PassRemarksWithHotness(
+static cl::opt<bool> RemarksWithHotness(
"pass-remarks-with-hotness",
cl::desc("With PGO, include profile count in optimization remarks"),
cl::Hidden);
-static cl::opt<unsigned> PassRemarksHotnessThreshold(
- "pass-remarks-hotness-threshold",
- cl::desc("Minimum profile count required for an optimization remark to be output"),
- cl::Hidden);
+static cl::opt<unsigned>
+ RemarksHotnessThreshold("pass-remarks-hotness-threshold",
+ cl::desc("Minimum profile count required for "
+ "an optimization remark to be output"),
+ cl::Hidden);
static cl::opt<std::string>
RemarksFilename("pass-remarks-output",
- cl::desc("YAML output filename for pass remarks"),
+ cl::desc("Output filename for pass remarks"),
cl::value_desc("filename"));
+static cl::opt<std::string>
+ RemarksPasses("pass-remarks-filter",
+ cl::desc("Only record optimization remarks from passes whose "
+ "names match the given regular expression"),
+ cl::value_desc("regex"));
+
+static cl::opt<std::string> RemarksFormat(
+ "pass-remarks-format",
+ cl::desc("The format used for serializing remarks (default: YAML)"),
+ cl::value_desc("format"), cl::init("yaml"));
+
+cl::opt<PGOKind>
+ PGOKindFlag("pgo-kind", cl::init(NoPGO), cl::Hidden,
+ cl::desc("The kind of profile guided optimization"),
+ cl::values(clEnumValN(NoPGO, "nopgo", "Do not use PGO."),
+ clEnumValN(InstrGen, "pgo-instr-gen-pipeline",
+ "Instrument the IR to generate profile."),
+ clEnumValN(InstrUse, "pgo-instr-use-pipeline",
+ "Use instrumented profile to guide PGO."),
+ clEnumValN(SampleUse, "pgo-sample-use-pipeline",
+ "Use sampled profile to guide PGO.")));
+cl::opt<std::string> ProfileFile("profile-file",
+ cl::desc("Path to the profile."), cl::Hidden);
+
+cl::opt<CSPGOKind> CSPGOKindFlag(
+ "cspgo-kind", cl::init(NoCSPGO), cl::Hidden,
+ cl::desc("The kind of context sensitive profile guided optimization"),
+ cl::values(
+ clEnumValN(NoCSPGO, "nocspgo", "Do not use CSPGO."),
+ clEnumValN(
+ CSInstrGen, "cspgo-instr-gen-pipeline",
+ "Instrument (context sensitive) the IR to generate profile."),
+ clEnumValN(
+ CSInstrUse, "cspgo-instr-use-pipeline",
+ "Use instrumented (context sensitive) profile to guide PGO.")));
+cl::opt<std::string> CSProfileGenFile(
+ "cs-profilegen-file",
+ cl::desc("Path to the instrumented context sensitive profile."),
+ cl::Hidden);
+
class OptCustomPassManager : public legacy::PassManager {
DebugifyStatsMap DIStatsMap;
@@ -348,15 +380,16 @@ static void AddOptimizationPasses(legacy::PassManagerBase &MPM,
} else {
Builder.Inliner = createAlwaysInlinerLegacyPass();
}
- Builder.DisableUnitAtATime = !UnitAtATime;
Builder.DisableUnrollLoops = (DisableLoopUnrolling.getNumOccurrences() > 0) ?
DisableLoopUnrolling : OptLevel == 0;
- // This is final, unless there is a #pragma vectorize enable
- if (DisableLoopVectorization)
- Builder.LoopVectorize = false;
- // If option wasn't forced via cmd line (-vectorize-loops, -loop-vectorize)
- else if (!Builder.LoopVectorize)
+ // Check if vectorization is explicitly disabled via -vectorize-loops=false.
+ // The flag enables vectorization in the LoopVectorize pass, it is on by
+ // default, and if it was disabled, leave it disabled here.
+ // Another flag that exists: -loop-vectorize, controls adding the pass to the
+ // pass manager. If set, the pass is added, and there is no additional check
+ // here for it.
+ if (Builder.LoopVectorize)
Builder.LoopVectorize = OptLevel > 1 && SizeLevel < 2;
// When #pragma vectorize is on for SLP, do the same as above
@@ -369,6 +402,32 @@ static void AddOptimizationPasses(legacy::PassManagerBase &MPM,
if (Coroutines)
addCoroutinePassesToExtensionPoints(Builder);
+ switch (PGOKindFlag) {
+ case InstrGen:
+ Builder.EnablePGOInstrGen = true;
+ Builder.PGOInstrGen = ProfileFile;
+ break;
+ case InstrUse:
+ Builder.PGOInstrUse = ProfileFile;
+ break;
+ case SampleUse:
+ Builder.PGOSampleUse = ProfileFile;
+ break;
+ default:
+ break;
+ }
+
+ switch (CSPGOKindFlag) {
+ case CSInstrGen:
+ Builder.EnablePGOCSInstrGen = true;
+ break;
+ case CSInstrUse:
+ Builder.EnablePGOCSInstrUse = true;
+ break;
+ default:
+ break;
+ }
+
Builder.populateFunctionPassManager(FPM);
Builder.populateModulePassManager(MPM);
}
@@ -464,6 +523,7 @@ int main(int argc, char **argv) {
initializeDwarfEHPreparePass(Registry);
initializeSafeStackLegacyPassPass(Registry);
initializeSjLjEHPreparePass(Registry);
+ initializeStackProtectorPass(Registry);
initializePreISelIntrinsicLoweringLegacyPassPass(Registry);
initializeGlobalMergePass(Registry);
initializeIndirectBrExpandPassPass(Registry);
@@ -475,6 +535,7 @@ int main(int argc, char **argv) {
initializeExpandReductionsPass(Registry);
initializeWasmEHPreparePass(Registry);
initializeWriteBitcodePassPass(Registry);
+ initializeHardwareLoopsPass(Registry);
#ifdef LINK_POLLY_INTO_TOOLS
polly::initializePollyPasses(Registry);
@@ -494,24 +555,15 @@ int main(int argc, char **argv) {
if (!DisableDITypeMap)
Context.enableDebugTypeODRUniquing();
- if (PassRemarksWithHotness)
- Context.setDiagnosticsHotnessRequested(true);
-
- if (PassRemarksHotnessThreshold)
- Context.setDiagnosticsHotnessThreshold(PassRemarksHotnessThreshold);
-
- std::unique_ptr<ToolOutputFile> OptRemarkFile;
- if (RemarksFilename != "") {
- std::error_code EC;
- OptRemarkFile =
- llvm::make_unique<ToolOutputFile>(RemarksFilename, EC, sys::fs::F_None);
- if (EC) {
- errs() << EC.message() << '\n';
- return 1;
- }
- Context.setDiagnosticsOutputFile(
- llvm::make_unique<yaml::Output>(OptRemarkFile->os()));
+ Expected<std::unique_ptr<ToolOutputFile>> RemarksFileOrErr =
+ setupOptimizationRemarks(Context, RemarksFilename, RemarksPasses,
+ RemarksFormat, RemarksWithHotness,
+ RemarksHotnessThreshold);
+ if (Error E = RemarksFileOrErr.takeError()) {
+ errs() << toString(std::move(E)) << '\n';
+ return 1;
}
+ std::unique_ptr<ToolOutputFile> RemarksFile = std::move(*RemarksFileOrErr);
// Load the input module...
std::unique_ptr<Module> M =
@@ -585,6 +637,11 @@ int main(int argc, char **argv) {
CPUStr = getCPUStr();
FeaturesStr = getFeaturesStr();
Machine = GetTargetMachine(ModuleTriple, CPUStr, FeaturesStr, Options);
+ } else if (ModuleTriple.getArchName() != "unknown" &&
+ ModuleTriple.getArchName() != "") {
+ errs() << argv[0] << ": unrecognized architecture '"
+ << ModuleTriple.getArchName() << "' provided.\n";
+ return 1;
}
std::unique_ptr<TargetMachine> TM(Machine);
@@ -620,7 +677,7 @@ int main(int argc, char **argv) {
// string. Hand off the rest of the functionality to the new code for that
// layer.
return runPassPipeline(argv[0], *M, TM.get(), Out.get(), ThinLinkOut.get(),
- OptRemarkFile.get(), PassPipeline, OK, VK,
+ RemarksFile.get(), PassPipeline, OK, VK,
PreserveAssemblyUseListOrder,
PreserveBitcodeUseListOrder, EmitSummaryIndex,
EmitModuleHash, EnableDebugify)
@@ -856,8 +913,8 @@ int main(int argc, char **argv) {
"the compile-twice option\n";
Out->os() << BOS->str();
Out->keep();
- if (OptRemarkFile)
- OptRemarkFile->keep();
+ if (RemarksFile)
+ RemarksFile->keep();
return 1;
}
Out->os() << BOS->str();
@@ -870,8 +927,8 @@ int main(int argc, char **argv) {
if (!NoOutput || PrintBreakpoints)
Out->keep();
- if (OptRemarkFile)
- OptRemarkFile->keep();
+ if (RemarksFile)
+ RemarksFile->keep();
if (ThinLinkOut)
ThinLinkOut->keep();
diff --git a/utils/TableGen/AsmMatcherEmitter.cpp b/utils/TableGen/AsmMatcherEmitter.cpp
index 5b4229e64682..146d10835b8d 100644
--- a/utils/TableGen/AsmMatcherEmitter.cpp
+++ b/utils/TableGen/AsmMatcherEmitter.cpp
@@ -1,9 +1,8 @@
//===- AsmMatcherEmitter.cpp - Generate an assembly matcher ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -1073,8 +1072,9 @@ bool MatchableInfo::validate(StringRef CommentDelimiter, bool IsAlias) const {
// handle, the target should be refactored to use operands instead of
// modifiers.
//
- // Also, check for instructions which reference the operand multiple times;
- // this implies a constraint we would not honor.
+ // Also, check for instructions which reference the operand multiple times,
+ // if they don't define a custom AsmMatcher: this implies a constraint that
+ // the built-in matching code would not honor.
std::set<std::string> OperandNames;
for (const AsmOperand &Op : AsmOperands) {
StringRef Tok = Op.Token;
@@ -1084,7 +1084,8 @@ bool MatchableInfo::validate(StringRef CommentDelimiter, bool IsAlias) const {
"' not supported by asm matcher. Mark isCodeGenOnly!");
// Verify that any operand is only mentioned once.
// We reject aliases and ignore instructions for now.
- if (!IsAlias && Tok[0] == '$' && !OperandNames.insert(Tok).second) {
+ if (!IsAlias && TheDef->getValueAsString("AsmMatchConverter").empty() &&
+ Tok[0] == '$' && !OperandNames.insert(Tok).second) {
LLVM_DEBUG({
errs() << "warning: '" << TheDef->getName() << "': "
<< "ignoring instruction with tied operand '"
@@ -1160,8 +1161,9 @@ AsmMatcherInfo::getOperandClass(Record *Rec, int SubOpIdx) {
// use it, else just fall back to the underlying register class.
const RecordVal *R = Rec->getValue("ParserMatchClass");
if (!R || !R->getValue())
- PrintFatalError("Record `" + Rec->getName() +
- "' does not have a ParserMatchClass!\n");
+ PrintFatalError(Rec->getLoc(),
+ "Record `" + Rec->getName() +
+ "' does not have a ParserMatchClass!\n");
if (DefInit *DI= dyn_cast<DefInit>(R->getValue())) {
Record *MatchClass = DI->getDef();
@@ -1473,7 +1475,6 @@ void AsmMatcherInfo::buildInfo() {
for (const auto &Pair : SubtargetFeatures)
LLVM_DEBUG(Pair.second.dump());
#endif // NDEBUG
- assert(SubtargetFeatures.size() <= 64 && "Too many subtarget features!");
bool HasMnemonicFirst = AsmParser->getValueAsBit("HasMnemonicFirst");
bool ReportMultipleNearMisses =
@@ -1928,10 +1929,11 @@ getConverterOperandID(const std::string &Name,
return ID;
}
-static void emitConvertFuncs(CodeGenTarget &Target, StringRef ClassName,
- std::vector<std::unique_ptr<MatchableInfo>> &Infos,
- bool HasMnemonicFirst, bool HasOptionalOperands,
- raw_ostream &OS) {
+static unsigned
+emitConvertFuncs(CodeGenTarget &Target, StringRef ClassName,
+ std::vector<std::unique_ptr<MatchableInfo>> &Infos,
+ bool HasMnemonicFirst, bool HasOptionalOperands,
+ raw_ostream &OS) {
SmallSetVector<CachedHashString, 16> OperandConversionKinds;
SmallSetVector<CachedHashString, 16> InstructionConversionKinds;
std::vector<std::vector<uint8_t> > ConversionTable;
@@ -2337,6 +2339,8 @@ static void emitConvertFuncs(CodeGenTarget &Target, StringRef ClassName,
// Spit out the operand number lookup function.
OS << OpOS.str();
+
+ return ConversionTable.size();
}
/// emitMatchClassEnumeration - Emit the enumeration for match class kinds.
@@ -2675,7 +2679,7 @@ static void emitGetSubtargetFeatureName(AsmMatcherInfo &Info, raw_ostream &OS) {
for (const auto &SF : Info.SubtargetFeatures) {
const SubtargetFeatureInfo &SFI = SF.second;
// FIXME: Totally just a placeholder name to get the algorithm working.
- OS << " case " << SFI.getEnumName() << ": return \""
+ OS << " case " << SFI.getEnumBitName() << ": return \""
<< SFI.TheDef->getValueAsString("PredicateName") << "\";\n";
}
OS << " default: return \"(unknown)\";\n";
@@ -2691,7 +2695,10 @@ static std::string GetAliasRequiredFeatures(Record *R,
const AsmMatcherInfo &Info) {
std::vector<Record*> ReqFeatures = R->getValueAsListOfDefs("Predicates");
std::string Result;
- unsigned NumFeatures = 0;
+
+ if (ReqFeatures.empty())
+ return Result;
+
for (unsigned i = 0, e = ReqFeatures.size(); i != e; ++i) {
const SubtargetFeatureInfo *F = Info.getSubtargetFeature(ReqFeatures[i]);
@@ -2699,15 +2706,12 @@ static std::string GetAliasRequiredFeatures(Record *R,
PrintFatalError(R->getLoc(), "Predicate '" + ReqFeatures[i]->getName() +
"' is not marked as an AssemblerPredicate!");
- if (NumFeatures)
- Result += '|';
+ if (i)
+ Result += " && ";
- Result += F->getEnumName();
- ++NumFeatures;
+ Result += "Features.test(" + F->getEnumBitName() + ')';
}
- if (NumFeatures > 1)
- Result = '(' + Result + ')';
return Result;
}
@@ -2763,7 +2767,7 @@ static void emitMnemonicAliasVariant(raw_ostream &OS,const AsmMatcherInfo &Info,
if (!MatchCode.empty())
MatchCode += "else ";
- MatchCode += "if ((Features & " + FeatureMask + ") == "+FeatureMask+")\n";
+ MatchCode += "if (" + FeatureMask + ")\n";
MatchCode += " Mnemonic = \"";
MatchCode += R->getValueAsString("ToMnemonic");
MatchCode += "\";\n";
@@ -2798,7 +2802,7 @@ static bool emitMnemonicAliases(raw_ostream &OS, const AsmMatcherInfo &Info,
if (Aliases.empty()) return false;
OS << "static void applyMnemonicAliases(StringRef &Mnemonic, "
- "uint64_t Features, unsigned VariantID) {\n";
+ "const FeatureBitset &Features, unsigned VariantID) {\n";
OS << " switch (VariantID) {\n";
unsigned VariantCount = Target.getAsmParserVariantCount();
for (unsigned VC = 0; VC != VariantCount; ++VC) {
@@ -2823,7 +2827,9 @@ static bool emitMnemonicAliases(raw_ostream &OS, const AsmMatcherInfo &Info,
static void emitCustomOperandParsing(raw_ostream &OS, CodeGenTarget &Target,
const AsmMatcherInfo &Info, StringRef ClassName,
StringToOffsetTable &StringTable,
- unsigned MaxMnemonicIndex, bool HasMnemonicFirst) {
+ unsigned MaxMnemonicIndex,
+ unsigned MaxFeaturesIndex,
+ bool HasMnemonicFirst) {
unsigned MaxMask = 0;
for (const OperandMatchEntry &OMI : Info.OperandMatchInfo) {
MaxMask |= OMI.OperandMask;
@@ -2832,14 +2838,14 @@ static void emitCustomOperandParsing(raw_ostream &OS, CodeGenTarget &Target,
// Emit the static custom operand parsing table;
OS << "namespace {\n";
OS << " struct OperandMatchEntry {\n";
- OS << " " << getMinimalTypeForEnumBitfield(Info.SubtargetFeatures.size())
- << " RequiredFeatures;\n";
OS << " " << getMinimalTypeForRange(MaxMnemonicIndex)
<< " Mnemonic;\n";
+ OS << " " << getMinimalTypeForRange(MaxMask)
+ << " OperandMask;\n";
OS << " " << getMinimalTypeForRange(std::distance(
Info.Classes.begin(), Info.Classes.end())) << " Class;\n";
- OS << " " << getMinimalTypeForRange(MaxMask)
- << " OperandMask;\n\n";
+ OS << " " << getMinimalTypeForRange(MaxFeaturesIndex)
+ << " RequiredFeaturesIdx;\n\n";
OS << " StringRef getMnemonic() const {\n";
OS << " return StringRef(MnemonicTable + Mnemonic + 1,\n";
OS << " MnemonicTable[Mnemonic]);\n";
@@ -2865,29 +2871,18 @@ static void emitCustomOperandParsing(raw_ostream &OS, CodeGenTarget &Target,
OS << "static const OperandMatchEntry OperandMatchTable["
<< Info.OperandMatchInfo.size() << "] = {\n";
- OS << " /* Operand List Mask, Mnemonic, Operand Class, Features */\n";
+ OS << " /* Operand List Mnemonic, Mask, Operand Class, Features */\n";
for (const OperandMatchEntry &OMI : Info.OperandMatchInfo) {
const MatchableInfo &II = *OMI.MI;
OS << " { ";
- // Write the required features mask.
- if (!II.RequiredFeatures.empty()) {
- for (unsigned i = 0, e = II.RequiredFeatures.size(); i != e; ++i) {
- if (i) OS << "|";
- OS << II.RequiredFeatures[i]->getEnumName();
- }
- } else
- OS << "0";
-
// Store a pascal-style length byte in the mnemonic.
std::string LenMnemonic = char(II.Mnemonic.size()) + II.Mnemonic.str();
- OS << ", " << StringTable.GetOrAddStringOffset(LenMnemonic, false)
+ OS << StringTable.GetOrAddStringOffset(LenMnemonic, false)
<< " /* " << II.Mnemonic << " */, ";
- OS << OMI.CI->Name;
-
- OS << ", " << OMI.OperandMask;
+ OS << OMI.OperandMask;
OS << " /* ";
bool printComma = false;
for (int i = 0, e = 31; i !=e; ++i)
@@ -2897,7 +2892,17 @@ static void emitCustomOperandParsing(raw_ostream &OS, CodeGenTarget &Target,
OS << i;
printComma = true;
}
- OS << " */";
+ OS << " */, ";
+
+ OS << OMI.CI->Name;
+
+ // Write the required features mask.
+ OS << ", AMFBS";
+ if (II.RequiredFeatures.empty())
+ OS << "_None";
+ else
+ for (unsigned i = 0, e = II.RequiredFeatures.size(); i != e; ++i)
+ OS << '_' << II.RequiredFeatures[i]->TheDef->getName();
OS << " },\n";
}
@@ -2933,7 +2938,7 @@ static void emitCustomOperandParsing(raw_ostream &OS, CodeGenTarget &Target,
// Emit code to get the available features.
OS << " // Get the current feature set.\n";
- OS << " uint64_t AvailableFeatures = getAvailableFeatures();\n\n";
+ OS << " const FeatureBitset &AvailableFeatures = getAvailableFeatures();\n\n";
OS << " // Get the next operand index.\n";
OS << " unsigned NextOpNum = Operands.size()"
@@ -2967,8 +2972,10 @@ static void emitCustomOperandParsing(raw_ostream &OS, CodeGenTarget &Target,
// Emit check that the required features are available.
OS << " // check if the available features match\n";
+ OS << " const FeatureBitset &RequiredFeatures = "
+ "FeatureBitsets[it->RequiredFeaturesIdx];\n";
OS << " if (!ParseForAllFeatures && (AvailableFeatures & "
- "it->RequiredFeatures) != it->RequiredFeatures)\n";
+ "RequiredFeatures) != RequiredFeatures)\n";
OS << " continue;\n\n";
// Emit check to ensure the operand number matches.
@@ -3034,7 +3041,8 @@ static void emitAsmTiedOperandConstraints(CodeGenTarget &Target,
static void emitMnemonicSpellChecker(raw_ostream &OS, CodeGenTarget &Target,
unsigned VariantCount) {
OS << "static std::string " << Target.getName()
- << "MnemonicSpellCheck(StringRef S, uint64_t FBS, unsigned VariantID) {\n";
+ << "MnemonicSpellCheck(StringRef S, const FeatureBitset &FBS,"
+ << " unsigned VariantID) {\n";
if (!VariantCount)
OS << " return \"\";";
else {
@@ -3055,7 +3063,9 @@ static void emitMnemonicSpellChecker(raw_ostream &OS, CodeGenTarget &Target,
OS << " }\n\n";
OS << " for (auto I = Start; I < End; I++) {\n";
OS << " // Ignore unsupported instructions.\n";
- OS << " if ((FBS & I->RequiredFeatures) != I->RequiredFeatures)\n";
+ OS << " const FeatureBitset &RequiredFeatures = "
+ "FeatureBitsets[I->RequiredFeaturesIdx];\n";
+ OS << " if ((FBS & RequiredFeatures) != RequiredFeatures)\n";
OS << " continue;\n";
OS << "\n";
OS << " StringRef T = I->getMnemonic();\n";
@@ -3103,6 +3113,14 @@ static void emitMatchClassKindNames(std::forward_list<ClassInfo> &Infos,
OS << "#endif // NDEBUG\n";
}
+static std::string
+getNameForFeatureBitset(const std::vector<Record *> &FeatureBitset) {
+ std::string Name = "AMFBS";
+ for (const auto &Feature : FeatureBitset)
+ Name += ("_" + Feature->getName()).str();
+ return Name;
+}
+
void AsmMatcherEmitter::run(raw_ostream &OS) {
CodeGenTarget Target(Records);
Record *AsmParser = Target.getAsmParser();
@@ -3115,10 +3133,10 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
// Sort the instruction table using the partial order on classes. We use
// stable_sort to ensure that ambiguous instructions are still
// deterministically ordered.
- std::stable_sort(Info.Matchables.begin(), Info.Matchables.end(),
- [](const std::unique_ptr<MatchableInfo> &a,
- const std::unique_ptr<MatchableInfo> &b){
- return *a < *b;});
+ llvm::stable_sort(
+ Info.Matchables,
+ [](const std::unique_ptr<MatchableInfo> &a,
+ const std::unique_ptr<MatchableInfo> &b) { return *a < *b; });
#ifdef EXPENSIVE_CHECKS
// Verify that the table is sorted and operator < works transitively.
@@ -3174,7 +3192,7 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
OS << "#undef GET_ASSEMBLER_HEADER\n";
OS << " // This should be included into the middle of the declaration of\n";
OS << " // your subclasses implementation of MCTargetAsmParser.\n";
- OS << " uint64_t ComputeAvailableFeatures(const FeatureBitset& FB) const;\n";
+ OS << " FeatureBitset ComputeAvailableFeatures(const FeatureBitset& FB) const;\n";
if (HasOptionalOperands) {
OS << " void convertToMCInst(unsigned Kind, MCInst &Inst, "
<< "unsigned Opcode,\n"
@@ -3192,9 +3210,21 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
if (ReportMultipleNearMisses)
OS << " SmallVectorImpl<NearMissInfo> *NearMisses,\n";
else
- OS << " uint64_t &ErrorInfo,\n";
+ OS << " uint64_t &ErrorInfo,\n"
+ << " FeatureBitset &MissingFeatures,\n";
OS << " bool matchingInlineAsm,\n"
<< " unsigned VariantID = 0);\n";
+ if (!ReportMultipleNearMisses)
+ OS << " unsigned MatchInstructionImpl(const OperandVector &Operands,\n"
+ << " MCInst &Inst,\n"
+ << " uint64_t &ErrorInfo,\n"
+ << " bool matchingInlineAsm,\n"
+ << " unsigned VariantID = 0) {\n"
+ << " FeatureBitset MissingFeatures;\n"
+ << " return MatchInstructionImpl(Operands, Inst, ErrorInfo, MissingFeatures,\n"
+ << " matchingInlineAsm, VariantID);\n"
+ << " }\n\n";
+
if (!Info.OperandMatchInfo.empty()) {
OS << " OperandMatchResultTy MatchOperandParserImpl(\n";
@@ -3219,7 +3249,7 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
OS << "#undef GET_REGISTER_MATCHER\n\n";
// Emit the subtarget feature enumeration.
- SubtargetFeatureInfo::emitSubtargetFeatureFlagEnumeration(
+ SubtargetFeatureInfo::emitSubtargetFeatureBitEnumeration(
Info.SubtargetFeatures, OS);
// Emit the function to match a register name to number.
@@ -3249,8 +3279,9 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
// Generate the convertToMCInst function to convert operands into an MCInst.
// Also, generate the convertToMapAndConstraints function for MS-style inline
// assembly. The latter doesn't actually generate a MCInst.
- emitConvertFuncs(Target, ClassName, Info.Matchables, HasMnemonicFirst,
- HasOptionalOperands, OS);
+ unsigned NumConverters = emitConvertFuncs(Target, ClassName, Info.Matchables,
+ HasMnemonicFirst,
+ HasOptionalOperands, OS);
// Emit the enumeration for classes which participate in matching.
emitMatchClassEnumeration(Target, Info.Classes, OS);
@@ -3300,6 +3331,56 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
StringTable.EmitString(OS);
OS << ";\n\n";
+ std::vector<std::vector<Record *>> FeatureBitsets;
+ for (const auto &MI : Info.Matchables) {
+ if (MI->RequiredFeatures.empty())
+ continue;
+ FeatureBitsets.emplace_back();
+ for (unsigned I = 0, E = MI->RequiredFeatures.size(); I != E; ++I)
+ FeatureBitsets.back().push_back(MI->RequiredFeatures[I]->TheDef);
+ }
+
+ llvm::sort(FeatureBitsets, [&](const std::vector<Record *> &A,
+ const std::vector<Record *> &B) {
+ if (A.size() < B.size())
+ return true;
+ if (A.size() > B.size())
+ return false;
+ for (const auto &Pair : zip(A, B)) {
+ if (std::get<0>(Pair)->getName() < std::get<1>(Pair)->getName())
+ return true;
+ if (std::get<0>(Pair)->getName() > std::get<1>(Pair)->getName())
+ return false;
+ }
+ return false;
+ });
+ FeatureBitsets.erase(
+ std::unique(FeatureBitsets.begin(), FeatureBitsets.end()),
+ FeatureBitsets.end());
+ OS << "// Feature bitsets.\n"
+ << "enum : " << getMinimalTypeForRange(FeatureBitsets.size()) << " {\n"
+ << " AMFBS_None,\n";
+ for (const auto &FeatureBitset : FeatureBitsets) {
+ if (FeatureBitset.empty())
+ continue;
+ OS << " " << getNameForFeatureBitset(FeatureBitset) << ",\n";
+ }
+ OS << "};\n\n"
+ << "const static FeatureBitset FeatureBitsets[] {\n"
+ << " {}, // AMFBS_None\n";
+ for (const auto &FeatureBitset : FeatureBitsets) {
+ if (FeatureBitset.empty())
+ continue;
+ OS << " {";
+ for (const auto &Feature : FeatureBitset) {
+ const auto &I = Info.SubtargetFeatures.find(Feature);
+ assert(I != Info.SubtargetFeatures.end() && "Didn't import predicate?");
+ OS << I->second.getEnumBitName() << ", ";
+ }
+ OS << "},\n";
+ }
+ OS << "};\n\n";
+
// Emit the static match table; unused classes get initialized to 0 which is
// guaranteed to be InvalidMatchClass.
//
@@ -3315,10 +3396,10 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
OS << " " << getMinimalTypeForRange(MaxMnemonicIndex)
<< " Mnemonic;\n";
OS << " uint16_t Opcode;\n";
- OS << " " << getMinimalTypeForRange(Info.Matchables.size())
+ OS << " " << getMinimalTypeForRange(NumConverters)
<< " ConvertFn;\n";
- OS << " " << getMinimalTypeForEnumBitfield(Info.SubtargetFeatures.size())
- << " RequiredFeatures;\n";
+ OS << " " << getMinimalTypeForRange(FeatureBitsets.size())
+ << " RequiredFeaturesIdx;\n";
OS << " " << getMinimalTypeForRange(
std::distance(Info.Classes.begin(), Info.Classes.end()))
<< " Classes[" << MaxNumOperands << "];\n";
@@ -3363,13 +3444,12 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
<< MI->ConversionFnKind << ", ";
// Write the required features mask.
- if (!MI->RequiredFeatures.empty()) {
- for (unsigned i = 0, e = MI->RequiredFeatures.size(); i != e; ++i) {
- if (i) OS << "|";
- OS << MI->RequiredFeatures[i]->getEnumName();
- }
- } else
- OS << "0";
+ OS << "AMFBS";
+ if (MI->RequiredFeatures.empty())
+ OS << "_None";
+ else
+ for (unsigned i = 0, e = MI->RequiredFeatures.size(); i != e; ++i)
+ OS << '_' << MI->RequiredFeatures[i]->TheDef->getName();
OS << ", { ";
for (unsigned i = 0, e = MI->AsmOperands.size(); i != e; ++i) {
@@ -3394,7 +3474,8 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
if (ReportMultipleNearMisses)
OS << " SmallVectorImpl<NearMissInfo> *NearMisses,\n";
else
- OS << " uint64_t &ErrorInfo,\n";
+ OS << " uint64_t &ErrorInfo,\n"
+ << " FeatureBitset &MissingFeatures,\n";
OS << " bool matchingInlineAsm, unsigned VariantID) {\n";
if (!ReportMultipleNearMisses) {
@@ -3409,7 +3490,7 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
// Emit code to get the available features.
OS << " // Get the current feature set.\n";
- OS << " uint64_t AvailableFeatures = getAvailableFeatures();\n\n";
+ OS << " const FeatureBitset &AvailableFeatures = getAvailableFeatures();\n\n";
OS << " // Get the instruction mnemonic, which is the first token.\n";
if (HasMnemonicFirst) {
@@ -3433,7 +3514,7 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
OS << " bool HadMatchOtherThanFeatures = false;\n";
OS << " bool HadMatchOtherThanPredicate = false;\n";
OS << " unsigned RetCode = Match_InvalidOperand;\n";
- OS << " uint64_t MissingFeatures = ~0ULL;\n";
+ OS << " MissingFeatures.set();\n";
OS << " // Set ErrorInfo to the operand that mismatches if it is\n";
OS << " // wrong for all instances of the instruction.\n";
OS << " ErrorInfo = ~0ULL;\n";
@@ -3479,9 +3560,10 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
OS << " for (const MatchEntry *it = MnemonicRange.first, "
<< "*ie = MnemonicRange.second;\n";
OS << " it != ie; ++it) {\n";
+ OS << " const FeatureBitset &RequiredFeatures = "
+ "FeatureBitsets[it->RequiredFeaturesIdx];\n";
OS << " bool HasRequiredFeatures =\n";
- OS << " (AvailableFeatures & it->RequiredFeatures) == "
- "it->RequiredFeatures;\n";
+ OS << " (AvailableFeatures & RequiredFeatures) == RequiredFeatures;\n";
OS << " DEBUG_WITH_TYPE(\"asm-matcher\", dbgs() << \"Trying to match opcode \"\n";
OS << " << MII.getName(it->Opcode) << \"\\n\");\n";
@@ -3640,16 +3722,18 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
OS << " if (!HasRequiredFeatures) {\n";
if (!ReportMultipleNearMisses)
OS << " HadMatchOtherThanFeatures = true;\n";
- OS << " uint64_t NewMissingFeatures = it->RequiredFeatures & "
+ OS << " FeatureBitset NewMissingFeatures = RequiredFeatures & "
"~AvailableFeatures;\n";
- OS << " DEBUG_WITH_TYPE(\"asm-matcher\", dbgs() << \"Missing target features: \"\n";
- OS << " << format_hex(NewMissingFeatures, 18)\n";
- OS << " << \"\\n\");\n";
+ OS << " DEBUG_WITH_TYPE(\"asm-matcher\", dbgs() << \"Missing target features:\";\n";
+ OS << " for (unsigned I = 0, E = NewMissingFeatures.size(); I != E; ++I)\n";
+ OS << " if (NewMissingFeatures[I])\n";
+ OS << " dbgs() << ' ' << I;\n";
+ OS << " dbgs() << \"\\n\");\n";
if (ReportMultipleNearMisses) {
OS << " FeaturesNearMiss = NearMissInfo::getMissedFeature(NewMissingFeatures);\n";
} else {
- OS << " if (countPopulation(NewMissingFeatures) <=\n"
- " countPopulation(MissingFeatures))\n";
+ OS << " if (NewMissingFeatures.count() <=\n"
+ " MissingFeatures.count())\n";
OS << " MissingFeatures = NewMissingFeatures;\n";
OS << " continue;\n";
}
@@ -3804,15 +3888,15 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
OS << " // Okay, we had no match. Try to return a useful error code.\n";
OS << " if (HadMatchOtherThanPredicate || !HadMatchOtherThanFeatures)\n";
OS << " return RetCode;\n\n";
- OS << " // Missing feature matches return which features were missing\n";
- OS << " ErrorInfo = MissingFeatures;\n";
+ OS << " ErrorInfo = 0;\n";
OS << " return Match_MissingFeature;\n";
}
OS << "}\n\n";
if (!Info.OperandMatchInfo.empty())
emitCustomOperandParsing(OS, Target, Info, ClassName, StringTable,
- MaxMnemonicIndex, HasMnemonicFirst);
+ MaxMnemonicIndex, FeatureBitsets.size(),
+ HasMnemonicFirst);
OS << "#endif // GET_MATCHER_IMPLEMENTATION\n\n";
diff --git a/utils/TableGen/AsmWriterEmitter.cpp b/utils/TableGen/AsmWriterEmitter.cpp
index a8f191181766..05d81f133505 100644
--- a/utils/TableGen/AsmWriterEmitter.cpp
+++ b/utils/TableGen/AsmWriterEmitter.cpp
@@ -1,9 +1,8 @@
//===- AsmWriterEmitter.cpp - Generate an assembly writer -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -586,11 +585,20 @@ void AsmWriterEmitter::EmitGetRegisterName(raw_ostream &O) {
O << " case ";
if (!Namespace.empty())
O << Namespace << "::";
- O << AltName << ":\n"
- << " assert(*(AsmStrs" << AltName << "+RegAsmOffset" << AltName
- << "[RegNo-1]) &&\n"
- << " \"Invalid alt name index for register!\");\n"
- << " return AsmStrs" << AltName << "+RegAsmOffset" << AltName
+ O << AltName << ":\n";
+ if (R->isValueUnset("FallbackRegAltNameIndex"))
+ O << " assert(*(AsmStrs" << AltName << "+RegAsmOffset" << AltName
+ << "[RegNo-1]) &&\n"
+ << " \"Invalid alt name index for register!\");\n";
+ else {
+ O << " if (!*(AsmStrs" << AltName << "+RegAsmOffset" << AltName
+ << "[RegNo-1]))\n"
+ << " return getRegisterName(RegNo, ";
+ if (!Namespace.empty())
+ O << Namespace << "::";
+ O << R->getValueAsDef("FallbackRegAltNameIndex")->getName() << ");\n";
+ }
+ O << " return AsmStrs" << AltName << "+RegAsmOffset" << AltName
<< "[RegNo-1];\n";
}
O << " }\n";
diff --git a/utils/TableGen/AsmWriterInst.cpp b/utils/TableGen/AsmWriterInst.cpp
index 2c19e5d663d6..c26e0e421183 100644
--- a/utils/TableGen/AsmWriterInst.cpp
+++ b/utils/TableGen/AsmWriterInst.cpp
@@ -1,9 +1,8 @@
//===- AsmWriterInst.h - Classes encapsulating a printable inst -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -94,8 +93,10 @@ AsmWriterInst::AsmWriterInst(const CodeGenInstruction &CGI, unsigned CGIIndex,
!= std::string::npos) {
AddLiteralString(std::string(1, AsmString[DollarPos+1]));
} else {
- PrintFatalError("Non-supported escaped character found in instruction '" +
- CGI.TheDef->getName() + "'!");
+ PrintFatalError(
+ CGI.TheDef->getLoc(),
+ "Non-supported escaped character found in instruction '" +
+ CGI.TheDef->getName() + "'!");
}
LastEmitted = DollarPos+2;
continue;
@@ -132,15 +133,19 @@ AsmWriterInst::AsmWriterInst(const CodeGenInstruction &CGI, unsigned CGIIndex,
// brace.
if (hasCurlyBraces) {
if (VarEnd >= AsmString.size())
- PrintFatalError("Reached end of string before terminating curly brace in '"
- + CGI.TheDef->getName() + "'");
+ PrintFatalError(
+ CGI.TheDef->getLoc(),
+ "Reached end of string before terminating curly brace in '" +
+ CGI.TheDef->getName() + "'");
// Look for a modifier string.
if (AsmString[VarEnd] == ':') {
++VarEnd;
if (VarEnd >= AsmString.size())
- PrintFatalError("Reached end of string before terminating curly brace in '"
- + CGI.TheDef->getName() + "'");
+ PrintFatalError(
+ CGI.TheDef->getLoc(),
+ "Reached end of string before terminating curly brace in '" +
+ CGI.TheDef->getName() + "'");
std::string::size_type ModifierStart = VarEnd;
while (VarEnd < AsmString.size() && isIdentChar(AsmString[VarEnd]))
@@ -148,17 +153,22 @@ AsmWriterInst::AsmWriterInst(const CodeGenInstruction &CGI, unsigned CGIIndex,
Modifier = std::string(AsmString.begin()+ModifierStart,
AsmString.begin()+VarEnd);
if (Modifier.empty())
- PrintFatalError("Bad operand modifier name in '"+ CGI.TheDef->getName() + "'");
+ PrintFatalError(CGI.TheDef->getLoc(),
+ "Bad operand modifier name in '" +
+ CGI.TheDef->getName() + "'");
}
if (AsmString[VarEnd] != '}')
- PrintFatalError("Variable name beginning with '{' did not end with '}' in '"
- + CGI.TheDef->getName() + "'");
+ PrintFatalError(
+ CGI.TheDef->getLoc(),
+ "Variable name beginning with '{' did not end with '}' in '" +
+ CGI.TheDef->getName() + "'");
++VarEnd;
}
if (VarName.empty() && Modifier.empty())
- PrintFatalError("Stray '$' in '" + CGI.TheDef->getName() +
- "' asm string, maybe you want $$?");
+ PrintFatalError(CGI.TheDef->getLoc(),
+ "Stray '$' in '" + CGI.TheDef->getName() +
+ "' asm string, maybe you want $$?");
if (VarName.empty()) {
// Just a modifier, pass this into PrintSpecial.
diff --git a/utils/TableGen/AsmWriterInst.h b/utils/TableGen/AsmWriterInst.h
index 708f23cb5b0e..7d88e5a9d037 100644
--- a/utils/TableGen/AsmWriterInst.h
+++ b/utils/TableGen/AsmWriterInst.h
@@ -1,9 +1,8 @@
//===- AsmWriterInst.h - Classes encapsulating a printable inst -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/utils/TableGen/Attributes.cpp b/utils/TableGen/Attributes.cpp
index 6bfc0ab896f9..6fbc595d7300 100644
--- a/utils/TableGen/Attributes.cpp
+++ b/utils/TableGen/Attributes.cpp
@@ -1,9 +1,8 @@
//===- Attributes.cpp - Generate attributes -------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/utils/TableGen/CTagsEmitter.cpp b/utils/TableGen/CTagsEmitter.cpp
index bd596bcb47a8..ccb7f3300dde 100644
--- a/utils/TableGen/CTagsEmitter.cpp
+++ b/utils/TableGen/CTagsEmitter.cpp
@@ -1,9 +1,8 @@
//===- CTagsEmitter.cpp - Generate ctags-compatible index ------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/utils/TableGen/CallingConvEmitter.cpp b/utils/TableGen/CallingConvEmitter.cpp
index d452031f8850..de5044e24d49 100644
--- a/utils/TableGen/CallingConvEmitter.cpp
+++ b/utils/TableGen/CallingConvEmitter.cpp
@@ -1,9 +1,8 @@
//===- CallingConvEmitter.cpp - Generate calling conventions --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -41,11 +40,17 @@ void CallingConvEmitter::run(raw_ostream &O) {
// each other.
for (Record *CC : CCs) {
if (!CC->getValueAsBit("Custom")) {
- O << "static bool " << CC->getName()
- << "(unsigned ValNo, MVT ValVT,\n"
- << std::string(CC->getName().size() + 13, ' ')
- << "MVT LocVT, CCValAssign::LocInfo LocInfo,\n"
- << std::string(CC->getName().size() + 13, ' ')
+ unsigned Pad = CC->getName().size();
+ if (CC->getValueAsBit("Entry")) {
+ O << "bool llvm::";
+ Pad += 12;
+ } else {
+ O << "static bool ";
+ Pad += 13;
+ }
+ O << CC->getName() << "(unsigned ValNo, MVT ValVT,\n"
+ << std::string(Pad, ' ') << "MVT LocVT, CCValAssign::LocInfo LocInfo,\n"
+ << std::string(Pad, ' ')
<< "ISD::ArgFlagsTy ArgFlags, CCState &State);\n";
}
}
@@ -62,12 +67,18 @@ void CallingConvEmitter::EmitCallingConv(Record *CC, raw_ostream &O) {
ListInit *CCActions = CC->getValueAsListInit("Actions");
Counter = 0;
- O << "\n\nstatic bool " << CC->getName()
- << "(unsigned ValNo, MVT ValVT,\n"
- << std::string(CC->getName().size()+13, ' ')
- << "MVT LocVT, CCValAssign::LocInfo LocInfo,\n"
- << std::string(CC->getName().size()+13, ' ')
- << "ISD::ArgFlagsTy ArgFlags, CCState &State) {\n";
+ O << "\n\n";
+ unsigned Pad = CC->getName().size();
+ if (CC->getValueAsBit("Entry")) {
+ O << "bool llvm::";
+ Pad += 12;
+ } else {
+ O << "static bool ";
+ Pad += 13;
+ }
+ O << CC->getName() << "(unsigned ValNo, MVT ValVT,\n"
+ << std::string(Pad, ' ') << "MVT LocVT, CCValAssign::LocInfo LocInfo,\n"
+ << std::string(Pad, ' ') << "ISD::ArgFlagsTy ArgFlags, CCState &State) {\n";
// Emit all of the actions, in order.
for (unsigned i = 0, e = CCActions->size(); i != e; ++i) {
O << "\n";
@@ -97,7 +108,7 @@ void CallingConvEmitter::EmitAction(Record *Action,
O << Action->getValueAsString("Predicate");
} else {
errs() << *Action;
- PrintFatalError("Unknown CCPredicateAction!");
+ PrintFatalError(Action->getLoc(), "Unknown CCPredicateAction!");
}
O << ") {\n";
@@ -134,7 +145,8 @@ void CallingConvEmitter::EmitAction(Record *Action,
ListInit *RegList = Action->getValueAsListInit("RegList");
ListInit *ShadowRegList = Action->getValueAsListInit("ShadowRegList");
if (!ShadowRegList->empty() && ShadowRegList->size() != RegList->size())
- PrintFatalError("Invalid length of list of shadowed registers");
+ PrintFatalError(Action->getLoc(),
+ "Invalid length of list of shadowed registers");
if (RegList->size() == 1) {
O << IndentStr << "if (unsigned Reg = State.AllocateReg(";
@@ -237,7 +249,8 @@ void CallingConvEmitter::EmitAction(Record *Action,
MVT::SimpleValueType DestVT = getValueType(DestTy);
O << IndentStr << "LocVT = " << getEnumName(DestVT) << ";\n";
if (MVT(DestVT).isFloatingPoint()) {
- PrintFatalError("CCPromoteToUpperBitsInType does not handle floating "
+ PrintFatalError(Action->getLoc(),
+ "CCPromoteToUpperBitsInType does not handle floating "
"point");
} else {
O << IndentStr << "if (ArgFlags.isSExt())\n"
@@ -269,7 +282,7 @@ void CallingConvEmitter::EmitAction(Record *Action,
O << IndentStr << IndentStr << "return false;\n";
} else {
errs() << *Action;
- PrintFatalError("Unknown CCAction!");
+ PrintFatalError(Action->getLoc(), "Unknown CCAction!");
}
}
}
diff --git a/utils/TableGen/CodeEmitterGen.cpp b/utils/TableGen/CodeEmitterGen.cpp
index 23751a2cbfba..da65763905a8 100644
--- a/utils/TableGen/CodeEmitterGen.cpp
+++ b/utils/TableGen/CodeEmitterGen.cpp
@@ -1,9 +1,8 @@
//===- CodeEmitterGen.cpp - Code Emitter Generator ------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -16,6 +15,7 @@
#include "CodeGenInstruction.h"
#include "CodeGenTarget.h"
#include "SubtargetFeatureInfo.h"
+#include "Types.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/Casting.h"
@@ -229,6 +229,14 @@ std::string CodeEmitterGen::getInstructionCase(Record *R,
return Case;
}
+static std::string
+getNameForFeatureBitset(const std::vector<Record *> &FeatureBitset) {
+ std::string Name = "CEFBS";
+ for (const auto &Feature : FeatureBitset)
+ Name += ("_" + Feature->getName()).str();
+ return Name;
+}
+
void CodeEmitterGen::run(raw_ostream &o) {
CodeGenTarget Target(Records);
std::vector<Record*> Insts = Records.getAllDerivedDefinitions("Instruction");
@@ -327,8 +335,8 @@ void CodeEmitterGen::run(raw_ostream &o) {
<< "#include <sstream>\n\n";
// Emit the subtarget feature enumeration.
- SubtargetFeatureInfo::emitSubtargetFeatureFlagEnumeration(SubtargetFeatures,
- o);
+ SubtargetFeatureInfo::emitSubtargetFeatureBitEnumeration(SubtargetFeatures,
+ o);
// Emit the name table for error messages.
o << "#ifndef NDEBUG\n";
@@ -340,35 +348,97 @@ void CodeEmitterGen::run(raw_ostream &o) {
Target.getName(), "MCCodeEmitter", "computeAvailableFeatures",
SubtargetFeatures, o);
+ std::vector<std::vector<Record *>> FeatureBitsets;
+ for (const CodeGenInstruction *Inst : Target.getInstructionsByEnumValue()) {
+ FeatureBitsets.emplace_back();
+ for (Record *Predicate : Inst->TheDef->getValueAsListOfDefs("Predicates")) {
+ const auto &I = SubtargetFeatures.find(Predicate);
+ if (I != SubtargetFeatures.end())
+ FeatureBitsets.back().push_back(I->second.TheDef);
+ }
+ }
+
+ llvm::sort(FeatureBitsets, [&](const std::vector<Record *> &A,
+ const std::vector<Record *> &B) {
+ if (A.size() < B.size())
+ return true;
+ if (A.size() > B.size())
+ return false;
+ for (const auto &Pair : zip(A, B)) {
+ if (std::get<0>(Pair)->getName() < std::get<1>(Pair)->getName())
+ return true;
+ if (std::get<0>(Pair)->getName() > std::get<1>(Pair)->getName())
+ return false;
+ }
+ return false;
+ });
+ FeatureBitsets.erase(
+ std::unique(FeatureBitsets.begin(), FeatureBitsets.end()),
+ FeatureBitsets.end());
+ o << "#ifndef NDEBUG\n"
+ << "// Feature bitsets.\n"
+ << "enum : " << getMinimalTypeForRange(FeatureBitsets.size()) << " {\n"
+ << " CEFBS_None,\n";
+ for (const auto &FeatureBitset : FeatureBitsets) {
+ if (FeatureBitset.empty())
+ continue;
+ o << " " << getNameForFeatureBitset(FeatureBitset) << ",\n";
+ }
+ o << "};\n\n"
+ << "const static FeatureBitset FeatureBitsets[] {\n"
+ << " {}, // CEFBS_None\n";
+ for (const auto &FeatureBitset : FeatureBitsets) {
+ if (FeatureBitset.empty())
+ continue;
+ o << " {";
+ for (const auto &Feature : FeatureBitset) {
+ const auto &I = SubtargetFeatures.find(Feature);
+ assert(I != SubtargetFeatures.end() && "Didn't import predicate?");
+ o << I->second.getEnumBitName() << ", ";
+ }
+ o << "},\n";
+ }
+ o << "};\n"
+ << "#endif // NDEBUG\n\n";
+
+
// Emit the predicate verifier.
o << "void " << Target.getName()
<< "MCCodeEmitter::verifyInstructionPredicates(\n"
- << " const MCInst &Inst, uint64_t AvailableFeatures) const {\n"
+ << " const MCInst &Inst, const FeatureBitset &AvailableFeatures) const {\n"
<< "#ifndef NDEBUG\n"
- << " static uint64_t RequiredFeatures[] = {\n";
+ << " static " << getMinimalTypeForRange(FeatureBitsets.size())
+ << " RequiredFeaturesRefs[] = {\n";
unsigned InstIdx = 0;
for (const CodeGenInstruction *Inst : Target.getInstructionsByEnumValue()) {
- o << " ";
+ o << " CEFBS";
+ unsigned NumPredicates = 0;
for (Record *Predicate : Inst->TheDef->getValueAsListOfDefs("Predicates")) {
const auto &I = SubtargetFeatures.find(Predicate);
- if (I != SubtargetFeatures.end())
- o << I->second.getEnumName() << " | ";
+ if (I != SubtargetFeatures.end()) {
+ o << '_' << I->second.TheDef->getName();
+ NumPredicates++;
+ }
}
- o << "0, // " << Inst->TheDef->getName() << " = " << InstIdx << "\n";
+ if (!NumPredicates)
+ o << "_None";
+ o << ", // " << Inst->TheDef->getName() << " = " << InstIdx << "\n";
InstIdx++;
}
o << " };\n\n";
o << " assert(Inst.getOpcode() < " << InstIdx << ");\n";
- o << " uint64_t MissingFeatures =\n"
- << " (AvailableFeatures & RequiredFeatures[Inst.getOpcode()]) ^\n"
- << " RequiredFeatures[Inst.getOpcode()];\n"
- << " if (MissingFeatures) {\n"
+ o << " const FeatureBitset &RequiredFeatures = "
+ "FeatureBitsets[RequiredFeaturesRefs[Inst.getOpcode()]];\n";
+ o << " FeatureBitset MissingFeatures =\n"
+ << " (AvailableFeatures & RequiredFeatures) ^\n"
+ << " RequiredFeatures;\n"
+ << " if (MissingFeatures.any()) {\n"
<< " std::ostringstream Msg;\n"
<< " Msg << \"Attempting to emit \" << "
"MCII.getName(Inst.getOpcode()).str()\n"
<< " << \" instruction but the \";\n"
- << " for (unsigned i = 0; i < 8 * sizeof(MissingFeatures); ++i)\n"
- << " if (MissingFeatures & (1ULL << i))\n"
+ << " for (unsigned i = 0, e = MissingFeatures.size(); i != e; ++i)\n"
+ << " if (MissingFeatures.test(i))\n"
<< " Msg << SubtargetFeatureNames[i] << \" \";\n"
<< " Msg << \"predicate(s) are not met\";\n"
<< " report_fatal_error(Msg.str());\n"
diff --git a/utils/TableGen/CodeGenDAGPatterns.cpp b/utils/TableGen/CodeGenDAGPatterns.cpp
index 96c90c9cf6bd..c8f710d66a03 100644
--- a/utils/TableGen/CodeGenDAGPatterns.cpp
+++ b/utils/TableGen/CodeGenDAGPatterns.cpp
@@ -1,9 +1,8 @@
//===- CodeGenDAGPatterns.cpp - Read DAG patterns from .td file -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -68,8 +67,10 @@ static bool berase_if(MachineValueTypeSet &S, Predicate P) {
// inference will apply to each mode separately.
TypeSetByHwMode::TypeSetByHwMode(ArrayRef<ValueTypeByHwMode> VTList) {
- for (const ValueTypeByHwMode &VVT : VTList)
+ for (const ValueTypeByHwMode &VVT : VTList) {
insert(VVT);
+ AddrSpaces.push_back(VVT.PtrAddrSpace);
+ }
}
bool TypeSetByHwMode::isValueTypeByHwMode(bool AllowEmpty) const {
@@ -86,9 +87,13 @@ ValueTypeByHwMode TypeSetByHwMode::getValueTypeByHwMode() const {
assert(isValueTypeByHwMode(true) &&
"The type set has multiple types for at least one HW mode");
ValueTypeByHwMode VVT;
+ auto ASI = AddrSpaces.begin();
+
for (const auto &I : *this) {
MVT T = I.second.empty() ? MVT::Other : *I.second.begin();
VVT.getOrCreateTypeForMode(I.first, T);
+ if (ASI != AddrSpaces.end())
+ VVT.PtrAddrSpace = *ASI++;
}
return VVT;
}
@@ -502,22 +507,14 @@ bool TypeInfer::EnforceSmallerThan(TypeSetByHwMode &Small,
(A.getScalarSizeInBits() == B.getScalarSizeInBits() &&
A.getSizeInBits() < B.getSizeInBits());
};
- auto LE = [](MVT A, MVT B) -> bool {
+ auto LE = [&LT](MVT A, MVT B) -> bool {
// This function is used when removing elements: when a vector is compared
// to a non-vector, it should return false (to avoid removal).
if (A.isVector() != B.isVector())
return false;
- // Note on the < comparison below:
- // X86 has patterns like
- // (set VR128X:$dst, (v16i8 (X86vtrunc (v4i32 VR128X:$src1)))),
- // where the truncated vector is given a type v16i8, while the source
- // vector has type v4i32. They both have the same size in bits.
- // The minimal type in the result is obviously v16i8, and when we remove
- // all types from the source that are smaller-or-equal than v8i16, the
- // only source type would also be removed (since it's equal in size).
- return A.getScalarSizeInBits() <= B.getScalarSizeInBits() ||
- A.getSizeInBits() < B.getSizeInBits();
+ return LT(A, B) || (A.getScalarSizeInBits() == B.getScalarSizeInBits() &&
+ A.getSizeInBits() == B.getSizeInBits());
};
for (unsigned M : Modes) {
@@ -957,13 +954,33 @@ std::string TreePredicateFn::getPredCode() const {
}
if (isLoad() || isStore() || isAtomic()) {
- StringRef SDNodeName =
- isLoad() ? "LoadSDNode" : isStore() ? "StoreSDNode" : "AtomicSDNode";
+ if (ListInit *AddressSpaces = getAddressSpaces()) {
+ Code += "unsigned AddrSpace = cast<MemSDNode>(N)->getAddressSpace();\n"
+ " if (";
+
+ bool First = true;
+ for (Init *Val : AddressSpaces->getValues()) {
+ if (First)
+ First = false;
+ else
+ Code += " && ";
+
+ IntInit *IntVal = dyn_cast<IntInit>(Val);
+ if (!IntVal) {
+ PrintFatalError(getOrigPatFragRecord()->getRecord()->getLoc(),
+ "AddressSpaces element must be integer");
+ }
+
+ Code += "AddrSpace != " + utostr(IntVal->getValue());
+ }
+
+ Code += ")\nreturn false;\n";
+ }
Record *MemoryVT = getMemoryVT();
if (MemoryVT)
- Code += ("if (cast<" + SDNodeName + ">(N)->getMemoryVT() != MVT::" +
+ Code += ("if (cast<MemSDNode>(N)->getMemoryVT() != MVT::" +
MemoryVT->getName() + ") return false;\n")
.str();
}
@@ -1152,6 +1169,14 @@ Record *TreePredicateFn::getMemoryVT() const {
return nullptr;
return R->getValueAsDef("MemoryVT");
}
+
+ListInit *TreePredicateFn::getAddressSpaces() const {
+ Record *R = getOrigPatFragRecord()->getRecord();
+ if (R->isValueUnset("AddressSpaces"))
+ return nullptr;
+ return R->getValueAsListInit("AddressSpaces");
+}
+
Record *TreePredicateFn::getScalarMemoryVT() const {
Record *R = getOrigPatFragRecord()->getRecord();
if (R->isValueUnset("ScalarMemoryVT"))
@@ -1276,6 +1301,17 @@ std::string TreePredicateFn::getCodeToRunOnSDNode() const {
// PatternToMatch implementation
//
+static bool isImmAllOnesAllZerosMatch(const TreePatternNode *P) {
+ if (!P->isLeaf())
+ return false;
+ DefInit *DI = dyn_cast<DefInit>(P->getLeafValue());
+ if (!DI)
+ return false;
+
+ Record *R = DI->getDef();
+ return R->getName() == "immAllOnesV" || R->getName() == "immAllZerosV";
+}
+
/// getPatternSize - Return the 'size' of this pattern. We want to match large
/// patterns before small ones. This is used to determine the size of a
/// pattern.
@@ -1315,6 +1351,8 @@ static unsigned getPatternSize(const TreePatternNode *P,
Size += 5; // Matches a ConstantSDNode (+3) and a specific value (+2).
else if (Child->getComplexPatternInfo(CGP))
Size += getPatternSize(Child, CGP);
+ else if (isImmAllOnesAllZerosMatch(Child))
+ Size += 4; // Matches a build_vector(+3) and a predicate (+1).
else if (!Child->getPredicateCalls().empty())
++Size;
}
@@ -1408,7 +1446,8 @@ SDTypeConstraint::SDTypeConstraint(Record *R, const CodeGenHwModes &CGH) {
x.SDTCisSameSizeAs_Info.OtherOperandNum =
R->getValueAsInt("OtherOperandNum");
} else {
- PrintFatalError("Unrecognized SDTypeConstraint '" + R->getName() + "'!\n");
+ PrintFatalError(R->getLoc(),
+ "Unrecognized SDTypeConstraint '" + R->getName() + "'!\n");
}
}
@@ -2120,7 +2159,8 @@ static TypeSetByHwMode getImplicitType(Record *R, unsigned ResNo,
}
if (R->getName() == "node" || R->getName() == "srcvalue" ||
- R->getName() == "zero_reg") {
+ R->getName() == "zero_reg" || R->getName() == "immAllOnesV" ||
+ R->getName() == "immAllZerosV" || R->getName() == "undef_tied_input") {
// Placeholder.
return TypeSetByHwMode(); // Unknown.
}
@@ -2425,18 +2465,32 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) {
}
}
+ // If one or more operands with a default value appear at the end of the
+ // formal operand list for an instruction, we allow them to be overridden
+ // by optional operands provided in the pattern.
+ //
+ // But if an operand B without a default appears at any point after an
+ // operand A with a default, then we don't allow A to be overridden,
+ // because there would be no way to specify whether the next operand in
+ // the pattern was intended to override A or skip it.
+ unsigned NonOverridableOperands = Inst.getNumOperands();
+ while (NonOverridableOperands > 0 &&
+ CDP.operandHasDefault(Inst.getOperand(NonOverridableOperands-1)))
+ --NonOverridableOperands;
+
unsigned ChildNo = 0;
for (unsigned i = 0, e = Inst.getNumOperands(); i != e; ++i) {
Record *OperandNode = Inst.getOperand(i);
- // If the instruction expects a predicate or optional def operand, we
- // codegen this by setting the operand to it's default value if it has a
- // non-empty DefaultOps field.
- if (OperandNode->isSubClassOf("OperandWithDefaultOps") &&
- !CDP.getDefaultOperand(OperandNode).DefaultOps.empty())
+ // If the operand has a default value, do we use it? We must use the
+ // default if we've run out of children of the pattern DAG to consume,
+ // or if the operand is followed by a non-defaulted one.
+ if (CDP.operandHasDefault(OperandNode) &&
+ (i < NonOverridableOperands || ChildNo >= getNumChildren()))
continue;
- // Verify that we didn't run out of provided operands.
+ // If we have run out of child nodes and there _isn't_ a default
+ // value we can use for the next operand, give an error.
if (ChildNo >= getNumChildren()) {
emitTooFewOperandsError(TP, getOperator()->getName(), getNumChildren());
return false;
@@ -2753,7 +2807,7 @@ TreePatternNodePtr TreePattern::ParseTreePattern(Init *TheInit,
// chain.
if (Int.IS.RetVTs.empty())
Operator = getDAGPatterns().get_intrinsic_void_sdnode();
- else if (Int.ModRef != CodeGenIntrinsic::NoMem)
+ else if (Int.ModRef != CodeGenIntrinsic::NoMem || Int.hasSideEffects)
// Has side-effects, requires chain.
Operator = getDAGPatterns().get_intrinsic_w_chain_sdnode();
else // Otherwise, no chain.
diff --git a/utils/TableGen/CodeGenDAGPatterns.h b/utils/TableGen/CodeGenDAGPatterns.h
index 4be9afdcacd2..2b49a64c3f1d 100644
--- a/utils/TableGen/CodeGenDAGPatterns.h
+++ b/utils/TableGen/CodeGenDAGPatterns.h
@@ -1,9 +1,8 @@
//===- CodeGenDAGPatterns.h - Read DAG patterns from .td file ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -191,6 +190,7 @@ private:
struct TypeSetByHwMode : public InfoByHwMode<MachineValueTypeSet> {
using SetType = MachineValueTypeSet;
+ std::vector<unsigned> AddrSpaces;
TypeSetByHwMode() = default;
TypeSetByHwMode(const TypeSetByHwMode &VTS) = default;
@@ -227,6 +227,15 @@ struct TypeSetByHwMode : public InfoByHwMode<MachineValueTypeSet> {
return Map.size() == 1 && Map.begin()->first == DefaultMode;
}
+ bool isPointer() const {
+ return getValueTypeByHwMode().isPointer();
+ }
+
+ unsigned getPtrAddrSpace() const {
+ assert(isPointer());
+ return getValueTypeByHwMode().PtrAddrSpace;
+ }
+
bool insert(const ValueTypeByHwMode &VVT);
bool constrain(const TypeSetByHwMode &VTS);
template <typename Predicate> bool constrain(Predicate P);
@@ -243,6 +252,7 @@ struct TypeSetByHwMode : public InfoByHwMode<MachineValueTypeSet> {
bool validate() const;
private:
+ unsigned PtrAddrSpace = std::numeric_limits<unsigned>::max();
/// Intersect two sets. Return true if anything has changed.
bool intersect(SetType &Out, const SetType &In);
};
@@ -583,6 +593,8 @@ public:
/// ValueType record for the memory VT.
Record *getScalarMemoryVT() const;
+ ListInit *getAddressSpaces() const;
+
// If true, indicates that GlobalISel-based C++ code was supplied.
bool hasGISelPredicateCode() const;
std::string getGISelPredicateCode() const;
@@ -1272,6 +1284,11 @@ public:
unsigned allocateScope() { return ++NumScopes; }
+ bool operandHasDefault(Record *Op) const {
+ return Op->isSubClassOf("OperandWithDefaultOps") &&
+ !getDefaultOperand(Op).DefaultOps.empty();
+ }
+
private:
void ParseNodeInfo();
void ParseNodeTransforms();
diff --git a/utils/TableGen/CodeGenHwModes.cpp b/utils/TableGen/CodeGenHwModes.cpp
index 9f88d95275b4..9052cdd2bd3e 100644
--- a/utils/TableGen/CodeGenHwModes.cpp
+++ b/utils/TableGen/CodeGenHwModes.cpp
@@ -1,9 +1,8 @@
//===--- CodeGenHwModes.cpp -----------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// Classes to parse and store HW mode information for instruction selection
diff --git a/utils/TableGen/CodeGenHwModes.h b/utils/TableGen/CodeGenHwModes.h
index 36df835d1933..1ff2faaa0e52 100644
--- a/utils/TableGen/CodeGenHwModes.h
+++ b/utils/TableGen/CodeGenHwModes.h
@@ -1,9 +1,8 @@
//===--- CodeGenHwModes.h ---------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// Classes to parse and store HW mode information for instruction selection.
diff --git a/utils/TableGen/CodeGenInstruction.cpp b/utils/TableGen/CodeGenInstruction.cpp
index 6d06ba2c8b67..2463824469ab 100644
--- a/utils/TableGen/CodeGenInstruction.cpp
+++ b/utils/TableGen/CodeGenInstruction.cpp
@@ -1,9 +1,8 @@
//===- CodeGenInstruction.cpp - CodeGen Instruction Class Wrapper ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -34,18 +33,24 @@ CGIOperandList::CGIOperandList(Record *R) : TheDef(R) {
if (DefInit *Init = dyn_cast<DefInit>(OutDI->getOperator())) {
if (Init->getDef()->getName() != "outs")
- PrintFatalError(R->getName() + ": invalid def name for output list: use 'outs'");
+ PrintFatalError(R->getLoc(),
+ R->getName() +
+ ": invalid def name for output list: use 'outs'");
} else
- PrintFatalError(R->getName() + ": invalid output list: use 'outs'");
+ PrintFatalError(R->getLoc(),
+ R->getName() + ": invalid output list: use 'outs'");
NumDefs = OutDI->getNumArgs();
DagInit *InDI = R->getValueAsDag("InOperandList");
if (DefInit *Init = dyn_cast<DefInit>(InDI->getOperator())) {
if (Init->getDef()->getName() != "ins")
- PrintFatalError(R->getName() + ": invalid def name for input list: use 'ins'");
+ PrintFatalError(R->getLoc(),
+ R->getName() +
+ ": invalid def name for input list: use 'ins'");
} else
- PrintFatalError(R->getName() + ": invalid input list: use 'ins'");
+ PrintFatalError(R->getLoc(),
+ R->getName() + ": invalid input list: use 'ins'");
unsigned MIOperandNo = 0;
std::set<std::string> OperandNames;
@@ -64,7 +69,8 @@ CGIOperandList::CGIOperandList(Record *R) : TheDef(R) {
DefInit *Arg = dyn_cast<DefInit>(ArgInit);
if (!Arg)
- PrintFatalError("Illegal operand for the '" + R->getName() + "' instruction!");
+ PrintFatalError(R->getLoc(), "Illegal operand for the '" + R->getName() +
+ "' instruction!");
Record *Rec = Arg->getDef();
std::string PrintMethod = "printOperand";
@@ -89,8 +95,9 @@ CGIOperandList::CGIOperandList(Record *R) : TheDef(R) {
// Verify that MIOpInfo has an 'ops' root value.
if (!isa<DefInit>(MIOpInfo->getOperator()) ||
cast<DefInit>(MIOpInfo->getOperator())->getDef()->getName() != "ops")
- PrintFatalError("Bad value for MIOperandInfo in operand '" + Rec->getName() +
- "'\n");
+ PrintFatalError(R->getLoc(),
+ "Bad value for MIOperandInfo in operand '" +
+ Rec->getName() + "'\n");
// If we have MIOpInfo, then we have #operands equal to number of entries
// in MIOperandInfo.
@@ -108,16 +115,20 @@ CGIOperandList::CGIOperandList(Record *R) : TheDef(R) {
OperandType = "OPERAND_REGISTER";
} else if (!Rec->isSubClassOf("PointerLikeRegClass") &&
!Rec->isSubClassOf("unknown_class"))
- PrintFatalError("Unknown operand class '" + Rec->getName() +
- "' in '" + R->getName() + "' instruction!");
+ PrintFatalError(R->getLoc(), "Unknown operand class '" + Rec->getName() +
+ "' in '" + R->getName() +
+ "' instruction!");
// Check that the operand has a name and that it's unique.
if (ArgName.empty())
- PrintFatalError("In instruction '" + R->getName() + "', operand #" +
- Twine(i) + " has no name!");
+ PrintFatalError(R->getLoc(), "In instruction '" + R->getName() +
+ "', operand #" + Twine(i) +
+ " has no name!");
if (!OperandNames.insert(ArgName).second)
- PrintFatalError("In instruction '" + R->getName() + "', operand #" +
- Twine(i) + " has the same name as a previous operand!");
+ PrintFatalError(R->getLoc(),
+ "In instruction '" + R->getName() + "', operand #" +
+ Twine(i) +
+ " has the same name as a previous operand!");
OperandList.emplace_back(Rec, ArgName, PrintMethod, EncoderMethod,
OperandNamespace + "::" + OperandType, MIOperandNo,
@@ -139,9 +150,11 @@ CGIOperandList::CGIOperandList(Record *R) : TheDef(R) {
///
unsigned CGIOperandList::getOperandNamed(StringRef Name) const {
unsigned OpIdx;
- if (hasOperandNamed(Name, OpIdx)) return OpIdx;
- PrintFatalError("'" + TheDef->getName() +
- "' does not have an operand named '$" + Name + "'!");
+ if (hasOperandNamed(Name, OpIdx))
+ return OpIdx;
+ PrintFatalError(TheDef->getLoc(), "'" + TheDef->getName() +
+ "' does not have an operand named '$" +
+ Name + "'!");
}
/// hasOperandNamed - Query whether the instruction has an operand of the
@@ -160,7 +173,8 @@ bool CGIOperandList::hasOperandNamed(StringRef Name, unsigned &OpIdx) const {
std::pair<unsigned,unsigned>
CGIOperandList::ParseOperandName(const std::string &Op, bool AllowWholeOp) {
if (Op.empty() || Op[0] != '$')
- PrintFatalError(TheDef->getName() + ": Illegal operand name: '" + Op + "'");
+ PrintFatalError(TheDef->getLoc(),
+ TheDef->getName() + ": Illegal operand name: '" + Op + "'");
std::string OpName = Op.substr(1);
std::string SubOpName;
@@ -170,7 +184,9 @@ CGIOperandList::ParseOperandName(const std::string &Op, bool AllowWholeOp) {
if (DotIdx != std::string::npos) {
SubOpName = OpName.substr(DotIdx+1);
if (SubOpName.empty())
- PrintFatalError(TheDef->getName() + ": illegal empty suboperand name in '" +Op +"'");
+ PrintFatalError(TheDef->getLoc(),
+ TheDef->getName() +
+ ": illegal empty suboperand name in '" + Op + "'");
OpName = OpName.substr(0, DotIdx);
}
@@ -180,8 +196,11 @@ CGIOperandList::ParseOperandName(const std::string &Op, bool AllowWholeOp) {
// If one was needed, throw.
if (OperandList[OpIdx].MINumOperands > 1 && !AllowWholeOp &&
SubOpName.empty())
- PrintFatalError(TheDef->getName() + ": Illegal to refer to"
- " whole operand part of complex operand '" + Op + "'");
+ PrintFatalError(TheDef->getLoc(),
+ TheDef->getName() +
+ ": Illegal to refer to"
+ " whole operand part of complex operand '" +
+ Op + "'");
// Otherwise, return the operand.
return std::make_pair(OpIdx, 0U);
@@ -190,7 +209,9 @@ CGIOperandList::ParseOperandName(const std::string &Op, bool AllowWholeOp) {
// Find the suboperand number involved.
DagInit *MIOpInfo = OperandList[OpIdx].MIOperandInfo;
if (!MIOpInfo)
- PrintFatalError(TheDef->getName() + ": unknown suboperand name in '" + Op + "'");
+ PrintFatalError(TheDef->getLoc(), TheDef->getName() +
+ ": unknown suboperand name in '" +
+ Op + "'");
// Find the operand with the right name.
for (unsigned i = 0, e = MIOpInfo->getNumArgs(); i != e; ++i)
@@ -198,7 +219,9 @@ CGIOperandList::ParseOperandName(const std::string &Op, bool AllowWholeOp) {
return std::make_pair(OpIdx, i);
// Otherwise, didn't find it!
- PrintFatalError(TheDef->getName() + ": unknown suboperand name in '" + Op + "'");
+ PrintFatalError(TheDef->getLoc(), TheDef->getName() +
+ ": unknown suboperand name in '" + Op +
+ "'");
return std::make_pair(0U, 0U);
}
@@ -354,7 +377,8 @@ CodeGenInstruction::CodeGenInstruction(Record *R)
isAdd = R->getValueAsBit("isAdd");
isTrap = R->getValueAsBit("isTrap");
canFoldAsLoad = R->getValueAsBit("canFoldAsLoad");
- isPredicable = Operands.isPredicable || R->getValueAsBit("isPredicable");
+ isPredicable = !R->getValueAsBit("isUnpredicable") && (
+ Operands.isPredicable || R->getValueAsBit("isPredicable"));
isConvertibleToThreeAddress = R->getValueAsBit("isConvertibleToThreeAddress");
isCommutable = R->getValueAsBit("isCommutable");
isTerminator = R->getValueAsBit("isTerminator");
@@ -377,6 +401,7 @@ CodeGenInstruction::CodeGenInstruction(Record *R)
mayLoad_Unset = Unset;
mayStore = R->getValueAsBitOrUnset("mayStore", Unset);
mayStore_Unset = Unset;
+ mayRaiseFPException = R->getValueAsBit("mayRaiseFPException");
hasSideEffects = R->getValueAsBitOrUnset("hasSideEffects", Unset);
hasSideEffects_Unset = Unset;
diff --git a/utils/TableGen/CodeGenInstruction.h b/utils/TableGen/CodeGenInstruction.h
index 2e3d2f48a928..bb5b1369649f 100644
--- a/utils/TableGen/CodeGenInstruction.h
+++ b/utils/TableGen/CodeGenInstruction.h
@@ -1,9 +1,8 @@
//===- CodeGenInstruction.h - Instruction Class Wrapper ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -250,6 +249,7 @@ template <typename T> class ArrayRef;
bool mayLoad_Unset : 1;
bool mayStore : 1;
bool mayStore_Unset : 1;
+ bool mayRaiseFPException : 1;
bool isPredicable : 1;
bool isConvertibleToThreeAddress : 1;
bool isCommutable : 1;
diff --git a/utils/TableGen/CodeGenIntrinsics.h b/utils/TableGen/CodeGenIntrinsics.h
index 9487a79c1432..7b74bb07d6e0 100644
--- a/utils/TableGen/CodeGenIntrinsics.h
+++ b/utils/TableGen/CodeGenIntrinsics.h
@@ -1,9 +1,8 @@
//===- CodeGenIntrinsic.h - Intrinsic Class Wrapper ------------*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -124,6 +123,9 @@ struct CodeGenIntrinsic {
/// True if the intrinsic is no-return.
bool isNoReturn;
+ /// True if the intrinsic is will-return.
+ bool isWillReturn;
+
/// True if the intrinsic is cold.
bool isCold;
@@ -137,7 +139,15 @@ struct CodeGenIntrinsic {
// True if the intrinsic is marked as speculatable.
bool isSpeculatable;
- enum ArgAttribute { NoCapture, Returned, ReadOnly, WriteOnly, ReadNone };
+ enum ArgAttribute {
+ NoCapture,
+ Returned,
+ ReadOnly,
+ WriteOnly,
+ ReadNone,
+ ImmArg
+ };
+
std::vector<std::pair<unsigned, ArgAttribute>> ArgumentAttributes;
bool hasProperty(enum SDNP Prop) const {
diff --git a/utils/TableGen/CodeGenMapTable.cpp b/utils/TableGen/CodeGenMapTable.cpp
index e5b0426cdcc3..b1774b01ba8c 100644
--- a/utils/TableGen/CodeGenMapTable.cpp
+++ b/utils/TableGen/CodeGenMapTable.cpp
@@ -1,9 +1,8 @@
//===- CodeGenMapTable.cpp - Instruction Mapping Table Generator ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// CodeGenMapTable provides functionality for the TabelGen to create
diff --git a/utils/TableGen/CodeGenRegisters.cpp b/utils/TableGen/CodeGenRegisters.cpp
index 74a2b078dfb3..f87c6d6c945a 100644
--- a/utils/TableGen/CodeGenRegisters.cpp
+++ b/utils/TableGen/CodeGenRegisters.cpp
@@ -1,9 +1,8 @@
//===- CodeGenRegisters.cpp - Register and RegisterClass Info -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -740,8 +739,9 @@ CodeGenRegisterClass::CodeGenRegisterClass(CodeGenRegBank &RegBank, Record *R)
for (unsigned i = 0, e = TypeList.size(); i != e; ++i) {
Record *Type = TypeList[i];
if (!Type->isSubClassOf("ValueType"))
- PrintFatalError("RegTypes list member '" + Type->getName() +
- "' does not derive from the ValueType class!");
+ PrintFatalError(R->getLoc(),
+ "RegTypes list member '" + Type->getName() +
+ "' does not derive from the ValueType class!");
VTs.push_back(getValueTypeByHwMode(Type, RegBank.getHwModes()));
}
assert(!VTs.empty() && "RegisterClass must contain at least one ValueType!");
@@ -2101,8 +2101,7 @@ void CodeGenRegBank::computeDerivedInfo() {
for (unsigned Idx = 0, EndIdx = RegUnitSets.size(); Idx != EndIdx; ++Idx)
RegUnitSetOrder.push_back(Idx);
- std::stable_sort(RegUnitSetOrder.begin(), RegUnitSetOrder.end(),
- [this](unsigned ID1, unsigned ID2) {
+ llvm::stable_sort(RegUnitSetOrder, [this](unsigned ID1, unsigned ID2) {
return getRegPressureSet(ID1).Units.size() <
getRegPressureSet(ID2).Units.size();
});
diff --git a/utils/TableGen/CodeGenRegisters.h b/utils/TableGen/CodeGenRegisters.h
index 0f7a025ded10..f04a90f8fde5 100644
--- a/utils/TableGen/CodeGenRegisters.h
+++ b/utils/TableGen/CodeGenRegisters.h
@@ -1,9 +1,8 @@
//===- CodeGenRegisters.h - Register and RegisterClass Info -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/utils/TableGen/CodeGenSchedule.cpp b/utils/TableGen/CodeGenSchedule.cpp
index 6d259cbb33ee..fd007044a16e 100644
--- a/utils/TableGen/CodeGenSchedule.cpp
+++ b/utils/TableGen/CodeGenSchedule.cpp
@@ -1,9 +1,8 @@
//===- CodeGenSchedule.cpp - Scheduling MachineModels ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -369,24 +368,22 @@ processSTIPredicate(STIPredicateFunction &Fn,
[&](const OpcodeMapPair &Lhs, const OpcodeMapPair &Rhs) {
unsigned LhsIdx = Opcode2Index[Lhs.first];
unsigned RhsIdx = Opcode2Index[Rhs.first];
- std::pair<APInt, APInt> &LhsMasks = OpcodeMasks[LhsIdx];
- std::pair<APInt, APInt> &RhsMasks = OpcodeMasks[RhsIdx];
-
- if (LhsMasks.first != RhsMasks.first) {
- if (LhsMasks.first.countPopulation() <
- RhsMasks.first.countPopulation())
- return true;
- return LhsMasks.first.countLeadingZeros() >
- RhsMasks.first.countLeadingZeros();
- }
-
- if (LhsMasks.second != RhsMasks.second) {
- if (LhsMasks.second.countPopulation() <
- RhsMasks.second.countPopulation())
- return true;
- return LhsMasks.second.countLeadingZeros() >
- RhsMasks.second.countLeadingZeros();
- }
+ const std::pair<APInt, APInt> &LhsMasks = OpcodeMasks[LhsIdx];
+ const std::pair<APInt, APInt> &RhsMasks = OpcodeMasks[RhsIdx];
+
+ auto LessThan = [](const APInt &Lhs, const APInt &Rhs) {
+ unsigned LhsCountPopulation = Lhs.countPopulation();
+ unsigned RhsCountPopulation = Rhs.countPopulation();
+ return ((LhsCountPopulation < RhsCountPopulation) ||
+ ((LhsCountPopulation == RhsCountPopulation) &&
+ (Lhs.countLeadingZeros() > Rhs.countLeadingZeros())));
+ };
+
+ if (LhsMasks.first != RhsMasks.first)
+ return LessThan(LhsMasks.first, RhsMasks.first);
+
+ if (LhsMasks.second != RhsMasks.second)
+ return LessThan(LhsMasks.second, RhsMasks.second);
return LhsIdx < RhsIdx;
});
@@ -1936,8 +1933,10 @@ void CodeGenSchedModels::checkCompleteness() {
unsigned SCIdx = getSchedClassIdx(*Inst);
if (!SCIdx) {
if (Inst->TheDef->isValueUnset("SchedRW") && !HadCompleteModel) {
- PrintError("No schedule information for instruction '"
- + Inst->TheDef->getName() + "'");
+ PrintError(Inst->TheDef->getLoc(),
+ "No schedule information for instruction '" +
+ Inst->TheDef->getName() + "' in SchedMachineModel '" +
+ ProcModel.ModelDef->getName() + "'");
Complete = false;
}
continue;
@@ -1955,8 +1954,9 @@ void CodeGenSchedModels::checkCompleteness() {
return R->getValueAsDef("SchedModel") == ProcModel.ModelDef;
});
if (I == InstRWs.end()) {
- PrintError("'" + ProcModel.ModelName + "' lacks information for '" +
- Inst->TheDef->getName() + "'");
+ PrintError(Inst->TheDef->getLoc(), "'" + ProcModel.ModelName +
+ "' lacks information for '" +
+ Inst->TheDef->getName() + "'");
Complete = false;
}
}
diff --git a/utils/TableGen/CodeGenSchedule.h b/utils/TableGen/CodeGenSchedule.h
index 87a051b0c05e..c26fb1f97807 100644
--- a/utils/TableGen/CodeGenSchedule.h
+++ b/utils/TableGen/CodeGenSchedule.h
@@ -1,9 +1,8 @@
//===- CodeGenSchedule.h - Scheduling Machine Models ------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/utils/TableGen/CodeGenTarget.cpp b/utils/TableGen/CodeGenTarget.cpp
index bcb653135551..b65e1b6af791 100644
--- a/utils/TableGen/CodeGenTarget.cpp
+++ b/utils/TableGen/CodeGenTarget.cpp
@@ -1,9 +1,8 @@
//===- CodeGenTarget.cpp - CodeGen Target Class Wrapper -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -21,8 +20,10 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Timer.h"
#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/Record.h"
+#include "llvm/TableGen/TableGenBackend.h"
#include <algorithm>
using namespace llvm;
@@ -105,11 +106,18 @@ StringRef llvm::getEnumName(MVT::SimpleValueType T) {
case MVT::v128i16: return "MVT::v128i16";
case MVT::v1i32: return "MVT::v1i32";
case MVT::v2i32: return "MVT::v2i32";
+ case MVT::v3i32: return "MVT::v3i32";
case MVT::v4i32: return "MVT::v4i32";
+ case MVT::v5i32: return "MVT::v5i32";
case MVT::v8i32: return "MVT::v8i32";
case MVT::v16i32: return "MVT::v16i32";
case MVT::v32i32: return "MVT::v32i32";
case MVT::v64i32: return "MVT::v64i32";
+ case MVT::v128i32: return "MVT::v128i32";
+ case MVT::v256i32: return "MVT::v256i32";
+ case MVT::v512i32: return "MVT::v512i32";
+ case MVT::v1024i32: return "MVT::v1024i32";
+ case MVT::v2048i32: return "MVT::v2048i32";
case MVT::v1i64: return "MVT::v1i64";
case MVT::v2i64: return "MVT::v2i64";
case MVT::v4i64: return "MVT::v4i64";
@@ -122,9 +130,18 @@ StringRef llvm::getEnumName(MVT::SimpleValueType T) {
case MVT::v8f16: return "MVT::v8f16";
case MVT::v1f32: return "MVT::v1f32";
case MVT::v2f32: return "MVT::v2f32";
+ case MVT::v3f32: return "MVT::v3f32";
case MVT::v4f32: return "MVT::v4f32";
+ case MVT::v5f32: return "MVT::v5f32";
case MVT::v8f32: return "MVT::v8f32";
case MVT::v16f32: return "MVT::v16f32";
+ case MVT::v32f32: return "MVT::v32f32";
+ case MVT::v64f32: return "MVT::v64f32";
+ case MVT::v128f32: return "MVT::v128f32";
+ case MVT::v256f32: return "MVT::v256f32";
+ case MVT::v512f32: return "MVT::v512f32";
+ case MVT::v1024f32: return "MVT::v1024f32";
+ case MVT::v2048f32: return "MVT::v2048f32";
case MVT::v1f64: return "MVT::v1f64";
case MVT::v2f64: return "MVT::v2f64";
case MVT::v4f64: return "MVT::v4f64";
@@ -174,7 +191,7 @@ StringRef llvm::getEnumName(MVT::SimpleValueType T) {
case MVT::iPTR: return "MVT::iPTR";
case MVT::iPTRAny: return "MVT::iPTRAny";
case MVT::Untyped: return "MVT::Untyped";
- case MVT::ExceptRef: return "MVT::ExceptRef";
+ case MVT::exnref: return "MVT::exnref";
default: llvm_unreachable("ILLEGAL VALUE TYPE!");
}
}
@@ -327,6 +344,8 @@ CodeGenSchedModels &CodeGenTarget::getSchedModels() const {
}
void CodeGenTarget::ReadInstructions() const {
+ NamedRegionTimer T("Read Instructions", "Time spent reading instructions",
+ "CodeGenTarget", "CodeGenTarget", TimeRegions);
std::vector<Record*> Insts = Records.getAllDerivedDefinitions("Instruction");
if (Insts.size() <= 2)
PrintFatalError("No 'Instruction' subclasses defined!");
@@ -492,9 +511,10 @@ ComplexPattern::ComplexPattern(Record *R) {
} else if (PropList[i]->getName() == "SDNPWantParent") {
Properties |= 1 << SDNPWantParent;
} else {
- PrintFatalError("Unsupported SD Node property '" +
- PropList[i]->getName() + "' on ComplexPattern '" +
- R->getName() + "'!");
+ PrintFatalError(R->getLoc(), "Unsupported SD Node property '" +
+ PropList[i]->getName() +
+ "' on ComplexPattern '" + R->getName() +
+ "'!");
}
}
@@ -530,12 +550,14 @@ CodeGenIntrinsicTable::CodeGenIntrinsicTable(const RecordKeeper &RC,
CodeGenIntrinsic::CodeGenIntrinsic(Record *R) {
TheDef = R;
std::string DefName = R->getName();
+ ArrayRef<SMLoc> DefLoc = R->getLoc();
ModRef = ReadWriteMem;
Properties = 0;
isOverloaded = false;
isCommutative = false;
canThrow = false;
isNoReturn = false;
+ isWillReturn = false;
isCold = false;
isNoDuplicate = false;
isConvergent = false;
@@ -544,7 +566,8 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) {
if (DefName.size() <= 4 ||
std::string(DefName.begin(), DefName.begin() + 4) != "int_")
- PrintFatalError("Intrinsic '" + DefName + "' does not start with 'int_'!");
+ PrintFatalError(DefLoc,
+ "Intrinsic '" + DefName + "' does not start with 'int_'!");
EnumName = std::string(DefName.begin()+4, DefName.end());
@@ -566,7 +589,8 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) {
// Verify it starts with "llvm.".
if (Name.size() <= 5 ||
std::string(Name.begin(), Name.begin() + 5) != "llvm.")
- PrintFatalError("Intrinsic '" + DefName + "'s name does not start with 'llvm.'!");
+ PrintFatalError(DefLoc, "Intrinsic '" + DefName +
+ "'s name does not start with 'llvm.'!");
}
// If TargetPrefix is specified, make sure that Name starts with
@@ -575,13 +599,34 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) {
if (Name.size() < 6+TargetPrefix.size() ||
std::string(Name.begin() + 5, Name.begin() + 6 + TargetPrefix.size())
!= (TargetPrefix + "."))
- PrintFatalError("Intrinsic '" + DefName + "' does not start with 'llvm." +
- TargetPrefix + ".'!");
+ PrintFatalError(DefLoc, "Intrinsic '" + DefName +
+ "' does not start with 'llvm." +
+ TargetPrefix + ".'!");
}
- // Parse the list of return types.
+ ListInit *RetTypes = R->getValueAsListInit("RetTypes");
+ ListInit *ParamTypes = R->getValueAsListInit("ParamTypes");
+
+ // First collate a list of overloaded types.
std::vector<MVT::SimpleValueType> OverloadedVTs;
- ListInit *TypeList = R->getValueAsListInit("RetTypes");
+ for (ListInit *TypeList : {RetTypes, ParamTypes}) {
+ for (unsigned i = 0, e = TypeList->size(); i != e; ++i) {
+ Record *TyEl = TypeList->getElementAsRecord(i);
+ assert(TyEl->isSubClassOf("LLVMType") && "Expected a type!");
+
+ if (TyEl->isSubClassOf("LLVMMatchType"))
+ continue;
+
+ MVT::SimpleValueType VT = getValueType(TyEl->getValueAsDef("VT"));
+ if (MVT(VT).isOverloaded()) {
+ OverloadedVTs.push_back(VT);
+ isOverloaded = true;
+ }
+ }
+ }
+
+ // Parse the list of return types.
+ ListInit *TypeList = RetTypes;
for (unsigned i = 0, e = TypeList->size(); i != e; ++i) {
Record *TyEl = TypeList->getElementAsRecord(i);
assert(TyEl->isSubClassOf("LLVMType") && "Expected a type!");
@@ -601,21 +646,18 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) {
} else {
VT = getValueType(TyEl->getValueAsDef("VT"));
}
- if (MVT(VT).isOverloaded()) {
- OverloadedVTs.push_back(VT);
- isOverloaded = true;
- }
// Reject invalid types.
if (VT == MVT::isVoid)
- PrintFatalError("Intrinsic '" + DefName + " has void in result type list!");
+ PrintFatalError(DefLoc, "Intrinsic '" + DefName +
+ " has void in result type list!");
IS.RetVTs.push_back(VT);
IS.RetTypeDefs.push_back(TyEl);
}
// Parse the list of parameter types.
- TypeList = R->getValueAsListInit("ParamTypes");
+ TypeList = ParamTypes;
for (unsigned i = 0, e = TypeList->size(); i != e; ++i) {
Record *TyEl = TypeList->getElementAsRecord(i);
assert(TyEl->isSubClassOf("LLVMType") && "Expected a type!");
@@ -626,7 +668,8 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) {
PrintError(R->getLoc(),
"Parameter #" + Twine(i) + " has out of bounds matching "
"number " + Twine(MatchTy));
- PrintFatalError(Twine("ParamTypes is ") + TypeList->getAsString());
+ PrintFatalError(DefLoc,
+ Twine("ParamTypes is ") + TypeList->getAsString());
}
VT = OverloadedVTs[MatchTy];
// It only makes sense to use the extended and truncated vector element
@@ -634,20 +677,16 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) {
// overloaded, all the types can be specified directly.
assert(((!TyEl->isSubClassOf("LLVMExtendedType") &&
!TyEl->isSubClassOf("LLVMTruncatedType") &&
- !TyEl->isSubClassOf("LLVMVectorSameWidth")) ||
+ !TyEl->isSubClassOf("LLVMScalarOrSameVectorWidth")) ||
VT == MVT::iAny || VT == MVT::vAny) &&
"Expected iAny or vAny type");
} else
VT = getValueType(TyEl->getValueAsDef("VT"));
- if (MVT(VT).isOverloaded()) {
- OverloadedVTs.push_back(VT);
- isOverloaded = true;
- }
-
// Reject invalid types.
if (VT == MVT::isVoid && i != e-1 /*void at end means varargs*/)
- PrintFatalError("Intrinsic '" + DefName + " has void in result type list!");
+ PrintFatalError(DefLoc, "Intrinsic '" + DefName +
+ " has void in result type list!");
IS.ParamVTs.push_back(VT);
IS.ParamTypeDefs.push_back(TyEl);
@@ -683,6 +722,8 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) {
isConvergent = true;
else if (Property->getName() == "IntrNoReturn")
isNoReturn = true;
+ else if (Property->getName() == "IntrWillReturn")
+ isWillReturn = true;
else if (Property->getName() == "IntrCold")
isCold = true;
else if (Property->getName() == "IntrSpeculatable")
@@ -704,6 +745,9 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) {
} else if (Property->isSubClassOf("ReadNone")) {
unsigned ArgNo = Property->getValueAsInt("ArgNo");
ArgumentAttributes.push_back(std::make_pair(ArgNo, ReadNone));
+ } else if (Property->isSubClassOf("ImmArg")) {
+ unsigned ArgNo = Property->getValueAsInt("ArgNo");
+ ArgumentAttributes.push_back(std::make_pair(ArgNo, ImmArg));
} else
llvm_unreachable("Unknown property!");
}
diff --git a/utils/TableGen/CodeGenTarget.h b/utils/TableGen/CodeGenTarget.h
index d2833d5b6a92..1ab2de269c76 100644
--- a/utils/TableGen/CodeGenTarget.h
+++ b/utils/TableGen/CodeGenTarget.h
@@ -1,9 +1,8 @@
//===- CodeGenTarget.h - Target Class Wrapper -------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/utils/TableGen/DAGISelEmitter.cpp b/utils/TableGen/DAGISelEmitter.cpp
index 62a0ff700725..fb0c6faa5295 100644
--- a/utils/TableGen/DAGISelEmitter.cpp
+++ b/utils/TableGen/DAGISelEmitter.cpp
@@ -1,9 +1,8 @@
//===- DAGISelEmitter.cpp - Generate an instruction selector --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/utils/TableGen/DAGISelMatcher.cpp b/utils/TableGen/DAGISelMatcher.cpp
index c8e005739460..bebd205ad58f 100644
--- a/utils/TableGen/DAGISelMatcher.cpp
+++ b/utils/TableGen/DAGISelMatcher.cpp
@@ -1,9 +1,8 @@
//===- DAGISelMatcher.cpp - Representation of DAG pattern matcher ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -212,6 +211,11 @@ void CheckCondCodeMatcher::printImpl(raw_ostream &OS, unsigned indent) const {
OS.indent(indent) << "CheckCondCode ISD::" << CondCodeName << '\n';
}
+void CheckChild2CondCodeMatcher::printImpl(raw_ostream &OS,
+ unsigned indent) const {
+ OS.indent(indent) << "CheckChild2CondCode ISD::" << CondCodeName << '\n';
+}
+
void CheckValueTypeMatcher::printImpl(raw_ostream &OS, unsigned indent) const {
OS.indent(indent) << "CheckValueType MVT::" << TypeName << '\n';
}
@@ -233,6 +237,16 @@ void CheckFoldableChainNodeMatcher::printImpl(raw_ostream &OS,
OS.indent(indent) << "CheckFoldableChainNode\n";
}
+void CheckImmAllOnesVMatcher::printImpl(raw_ostream &OS,
+ unsigned indent) const {
+ OS.indent(indent) << "CheckAllOnesV\n";
+}
+
+void CheckImmAllZerosVMatcher::printImpl(raw_ostream &OS,
+ unsigned indent) const {
+ OS.indent(indent) << "CheckAllZerosV\n";
+}
+
void EmitIntegerMatcher::printImpl(raw_ostream &OS, unsigned indent) const {
OS.indent(indent) << "EmitInteger " << Val << " VT=" << getEnumName(VT)
<< '\n';
@@ -398,3 +412,12 @@ bool CheckValueTypeMatcher::isContradictoryImpl(const Matcher *M) const {
return false;
}
+bool CheckImmAllOnesVMatcher::isContradictoryImpl(const Matcher *M) const {
+ // AllZeros is contradictory.
+ return isa<CheckImmAllZerosVMatcher>(M);
+}
+
+bool CheckImmAllZerosVMatcher::isContradictoryImpl(const Matcher *M) const {
+ // AllOnes is contradictory.
+ return isa<CheckImmAllOnesVMatcher>(M);
+}
diff --git a/utils/TableGen/DAGISelMatcher.h b/utils/TableGen/DAGISelMatcher.h
index 9be7295c67d4..0a782e84a372 100644
--- a/utils/TableGen/DAGISelMatcher.h
+++ b/utils/TableGen/DAGISelMatcher.h
@@ -1,9 +1,8 @@
//===- DAGISelMatcher.h - Representation of DAG pattern matcher -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -67,10 +66,13 @@ public:
CheckInteger, // Fail if wrong val.
CheckChildInteger, // Fail if child is wrong val.
CheckCondCode, // Fail if not condcode.
+ CheckChild2CondCode, // Fail if child is wrong condcode.
CheckValueType,
CheckComplexPat,
CheckAndImm,
CheckOrImm,
+ CheckImmAllOnesV,
+ CheckImmAllZerosV,
CheckFoldableChainNode,
// Node creation/emisssion.
@@ -122,9 +124,12 @@ public:
case CheckInteger:
case CheckChildInteger:
case CheckCondCode:
+ case CheckChild2CondCode:
case CheckValueType:
case CheckAndImm:
case CheckOrImm:
+ case CheckImmAllOnesV:
+ case CheckImmAllZerosV:
case CheckFoldableChainNode:
return true;
}
@@ -626,6 +631,27 @@ private:
}
};
+/// CheckChild2CondCodeMatcher - This checks to see if child 2 node is a
+/// CondCodeSDNode with the specified condition, if not it fails to match.
+class CheckChild2CondCodeMatcher : public Matcher {
+ StringRef CondCodeName;
+public:
+ CheckChild2CondCodeMatcher(StringRef condcodename)
+ : Matcher(CheckChild2CondCode), CondCodeName(condcodename) {}
+
+ StringRef getCondCodeName() const { return CondCodeName; }
+
+ static bool classof(const Matcher *N) {
+ return N->getKind() == CheckChild2CondCode;
+ }
+
+private:
+ void printImpl(raw_ostream &OS, unsigned indent) const override;
+ bool isEqualImpl(const Matcher *M) const override {
+ return cast<CheckChild2CondCodeMatcher>(M)->CondCodeName == CondCodeName;
+ }
+};
+
/// CheckValueTypeMatcher - This checks to see if the current node is a
/// VTSDNode with the specified type, if not it fails to match.
class CheckValueTypeMatcher : public Matcher {
@@ -731,6 +757,38 @@ private:
}
};
+/// CheckImmAllOnesVMatcher - This check if the current node is an build vector
+/// of all ones.
+class CheckImmAllOnesVMatcher : public Matcher {
+public:
+ CheckImmAllOnesVMatcher() : Matcher(CheckImmAllOnesV) {}
+
+ static bool classof(const Matcher *N) {
+ return N->getKind() == CheckImmAllOnesV;
+ }
+
+private:
+ void printImpl(raw_ostream &OS, unsigned indent) const override;
+ bool isEqualImpl(const Matcher *M) const override { return true; }
+ bool isContradictoryImpl(const Matcher *M) const override;
+};
+
+/// CheckImmAllZerosVMatcher - This check if the current node is an build vector
+/// of all zeros.
+class CheckImmAllZerosVMatcher : public Matcher {
+public:
+ CheckImmAllZerosVMatcher() : Matcher(CheckImmAllZerosV) {}
+
+ static bool classof(const Matcher *N) {
+ return N->getKind() == CheckImmAllZerosV;
+ }
+
+private:
+ void printImpl(raw_ostream &OS, unsigned indent) const override;
+ bool isEqualImpl(const Matcher *M) const override { return true; }
+ bool isContradictoryImpl(const Matcher *M) const override;
+};
+
/// CheckFoldableChainNodeMatcher - This checks to see if the current node
/// (which defines a chain operand) is safe to fold into a larger pattern.
class CheckFoldableChainNodeMatcher : public Matcher {
diff --git a/utils/TableGen/DAGISelMatcherEmitter.cpp b/utils/TableGen/DAGISelMatcherEmitter.cpp
index 90ca1bff5344..cecbc6cccdff 100644
--- a/utils/TableGen/DAGISelMatcherEmitter.cpp
+++ b/utils/TableGen/DAGISelMatcherEmitter.cpp
@@ -1,9 +1,8 @@
//===- DAGISelMatcherEmitter.cpp - Matcher Emitter ------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -259,7 +258,7 @@ void MatcherTableEmitter::EmitPatternMatchTable(raw_ostream &OS) {
OS << "\n};";
OS << "\nreturn StringRef(PATTERN_MATCH_TABLE[Index]);";
- OS << "\n}";
+ OS << "\n}\n";
EndEmitFunction(OS);
BeginEmitFunction(OS, "StringRef", "getIncludePathForIndex(unsigned Index)",
@@ -273,7 +272,7 @@ void MatcherTableEmitter::EmitPatternMatchTable(raw_ostream &OS) {
OS << "\n};";
OS << "\nreturn StringRef(INCLUDE_PATH_TABLE[Index]);";
- OS << "\n}";
+ OS << "\n}\n";
EndEmitFunction(OS);
}
@@ -555,6 +554,11 @@ EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
<< cast<CheckCondCodeMatcher>(N)->getCondCodeName() << ",\n";
return 2;
+ case Matcher::CheckChild2CondCode:
+ OS << "OPC_CheckChild2CondCode, ISD::"
+ << cast<CheckChild2CondCodeMatcher>(N)->getCondCodeName() << ",\n";
+ return 2;
+
case Matcher::CheckValueType:
OS << "OPC_CheckValueType, MVT::"
<< cast<CheckValueTypeMatcher>(N)->getTypeName() << ",\n";
@@ -597,6 +601,14 @@ EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
OS << "OPC_CheckFoldableChainNode,\n";
return 1;
+ case Matcher::CheckImmAllOnesV:
+ OS << "OPC_CheckImmAllOnesV,\n";
+ return 1;
+
+ case Matcher::CheckImmAllZerosV:
+ OS << "OPC_CheckImmAllZerosV,\n";
+ return 1;
+
case Matcher::EmitInteger: {
int64_t Val = cast<EmitIntegerMatcher>(N)->getValue();
OS << "OPC_EmitInteger, "
@@ -996,12 +1008,15 @@ static StringRef getOpcodeString(Matcher::KindTy Kind) {
case Matcher::CheckInteger: return "OPC_CheckInteger"; break;
case Matcher::CheckChildInteger: return "OPC_CheckChildInteger"; break;
case Matcher::CheckCondCode: return "OPC_CheckCondCode"; break;
+ case Matcher::CheckChild2CondCode: return "OPC_CheckChild2CondCode"; break;
case Matcher::CheckValueType: return "OPC_CheckValueType"; break;
case Matcher::CheckComplexPat: return "OPC_CheckComplexPat"; break;
case Matcher::CheckAndImm: return "OPC_CheckAndImm"; break;
case Matcher::CheckOrImm: return "OPC_CheckOrImm"; break;
case Matcher::CheckFoldableChainNode:
return "OPC_CheckFoldableChainNode"; break;
+ case Matcher::CheckImmAllOnesV: return "OPC_CheckImmAllOnesV"; break;
+ case Matcher::CheckImmAllZerosV: return "OPC_CheckImmAllZerosV"; break;
case Matcher::EmitInteger: return "OPC_EmitInteger"; break;
case Matcher::EmitStringInteger: return "OPC_EmitStringInteger"; break;
case Matcher::EmitRegister: return "OPC_EmitRegister"; break;
diff --git a/utils/TableGen/DAGISelMatcherGen.cpp b/utils/TableGen/DAGISelMatcherGen.cpp
index 612342ddcddf..8f54beeba65b 100644
--- a/utils/TableGen/DAGISelMatcherGen.cpp
+++ b/utils/TableGen/DAGISelMatcherGen.cpp
@@ -1,9 +1,8 @@
//===- DAGISelMatcherGen.cpp - Matcher generator --------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -278,6 +277,27 @@ void MatcherGen::EmitLeafMatchCode(const TreePatternNode *N) {
return;
}
+ if (LeafRec->getName() == "immAllOnesV") {
+ // If this is the root of the dag we're matching, we emit a redundant opcode
+ // check to ensure that this gets folded into the normal top-level
+ // OpcodeSwitch.
+ if (N == Pattern.getSrcPattern()) {
+ const SDNodeInfo &NI = CGP.getSDNodeInfo(CGP.getSDNodeNamed("build_vector"));
+ AddMatcher(new CheckOpcodeMatcher(NI));
+ }
+ return AddMatcher(new CheckImmAllOnesVMatcher());
+ }
+ if (LeafRec->getName() == "immAllZerosV") {
+ // If this is the root of the dag we're matching, we emit a redundant opcode
+ // check to ensure that this gets folded into the normal top-level
+ // OpcodeSwitch.
+ if (N == Pattern.getSrcPattern()) {
+ const SDNodeInfo &NI = CGP.getSDNodeInfo(CGP.getSDNodeNamed("build_vector"));
+ AddMatcher(new CheckOpcodeMatcher(NI));
+ }
+ return AddMatcher(new CheckImmAllZerosVMatcher());
+ }
+
errs() << "Unknown leaf kind: " << *N << "\n";
abort();
}
@@ -671,6 +691,17 @@ void MatcherGen::EmitResultLeafAsOperand(const TreePatternNode *N,
return;
}
+ if (Def->getName() == "undef_tied_input") {
+ std::array<MVT::SimpleValueType, 1> ResultVTs = {{ N->getSimpleType(0) }};
+ std::array<unsigned, 0> InstOps;
+ auto IDOperandNo = NextRecordedOperandNo++;
+ AddMatcher(new EmitNodeMatcher("TargetOpcode::IMPLICIT_DEF",
+ ResultVTs, InstOps, false, false, false,
+ false, -1, IDOperandNo));
+ ResultOps.push_back(IDOperandNo);
+ return;
+ }
+
// Handle a reference to a register class. This is used
// in COPY_TO_SUBREG instructions.
if (Def->isSubClassOf("RegisterOperand"))
@@ -763,14 +794,27 @@ EmitResultInstructionAsOperand(const TreePatternNode *N,
// 'execute always' values. Match up the node operands to the instruction
// operands to do this.
unsigned ChildNo = 0;
+
+ // Similarly to the code in TreePatternNode::ApplyTypeConstraints, count the
+ // number of operands at the end of the list which have default values.
+ // Those can come from the pattern if it provides enough arguments, or be
+ // filled in with the default if the pattern hasn't provided them. But any
+ // operand with a default value _before_ the last mandatory one will be
+ // filled in with their defaults unconditionally.
+ unsigned NonOverridableOperands = NumFixedOperands;
+ while (NonOverridableOperands > NumResults &&
+ CGP.operandHasDefault(II.Operands[NonOverridableOperands-1].Rec))
+ --NonOverridableOperands;
+
for (unsigned InstOpNo = NumResults, e = NumFixedOperands;
InstOpNo != e; ++InstOpNo) {
// Determine what to emit for this operand.
Record *OperandNode = II.Operands[InstOpNo].Rec;
- if (OperandNode->isSubClassOf("OperandWithDefaultOps") &&
- !CGP.getDefaultOperand(OperandNode).DefaultOps.empty()) {
- // This is a predicate or optional def operand; emit the
- // 'default ops' operands.
+ if (CGP.operandHasDefault(OperandNode) &&
+ (InstOpNo < NonOverridableOperands || ChildNo >= N->getNumChildren())) {
+ // This is a predicate or optional def operand which the pattern has not
+ // overridden, or which we aren't letting it override; emit the 'default
+ // ops' operands.
const DAGDefaultOperand &DefaultOp
= CGP.getDefaultOperand(OperandNode);
for (unsigned i = 0, e = DefaultOp.DefaultOps.size(); i != e; ++i)
diff --git a/utils/TableGen/DAGISelMatcherOpt.cpp b/utils/TableGen/DAGISelMatcherOpt.cpp
index 554c7438ce3d..7d51b0769372 100644
--- a/utils/TableGen/DAGISelMatcherOpt.cpp
+++ b/utils/TableGen/DAGISelMatcherOpt.cpp
@@ -1,9 +1,8 @@
//===- DAGISelMatcherOpt.cpp - Optimize a DAG Matcher ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -56,9 +55,13 @@ static void ContractNodes(std::unique_ptr<Matcher> &MatcherPtr,
if (MC->getChildNo() < 4) // Only have CheckChildSame0...3
New = new CheckChildSameMatcher(MC->getChildNo(), CS->getMatchNumber());
- if (CheckIntegerMatcher *CS = dyn_cast<CheckIntegerMatcher>(MC->getNext()))
+ if (CheckIntegerMatcher *CI = dyn_cast<CheckIntegerMatcher>(MC->getNext()))
if (MC->getChildNo() < 5) // Only have CheckChildInteger0...4
- New = new CheckChildIntegerMatcher(MC->getChildNo(), CS->getValue());
+ New = new CheckChildIntegerMatcher(MC->getChildNo(), CI->getValue());
+
+ if (auto *CCC = dyn_cast<CheckCondCodeMatcher>(MC->getNext()))
+ if (MC->getChildNo() == 2) // Only have CheckChild2CondCode
+ New = new CheckChild2CondCodeMatcher(CCC->getCondCodeName());
if (New) {
// Insert the new node.
diff --git a/utils/TableGen/DFAPacketizerEmitter.cpp b/utils/TableGen/DFAPacketizerEmitter.cpp
index 0db0f55f5ed6..dabcc8f8ed55 100644
--- a/utils/TableGen/DFAPacketizerEmitter.cpp
+++ b/utils/TableGen/DFAPacketizerEmitter.cpp
@@ -1,9 +1,8 @@
//===- DFAPacketizerEmitter.cpp - Packetization DFA for a VLIW machine ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/utils/TableGen/DisassemblerEmitter.cpp b/utils/TableGen/DisassemblerEmitter.cpp
index b99a0a973a2c..9e75c7fba77b 100644
--- a/utils/TableGen/DisassemblerEmitter.cpp
+++ b/utils/TableGen/DisassemblerEmitter.cpp
@@ -1,9 +1,8 @@
//===- DisassemblerEmitter.cpp - Generate a disassembler ------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/utils/TableGen/ExegesisEmitter.cpp b/utils/TableGen/ExegesisEmitter.cpp
index 208237aca20c..976d5f51776f 100644
--- a/utils/TableGen/ExegesisEmitter.cpp
+++ b/utils/TableGen/ExegesisEmitter.cpp
@@ -1,9 +1,8 @@
//===- ExegesisEmitter.cpp - Generate exegesis target data ----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/utils/TableGen/FastISelEmitter.cpp b/utils/TableGen/FastISelEmitter.cpp
index 5134b684c6f9..b39956859fe8 100644
--- a/utils/TableGen/FastISelEmitter.cpp
+++ b/utils/TableGen/FastISelEmitter.cpp
@@ -1,9 +1,8 @@
///===- FastISelEmitter.cpp - Generate an instruction selector -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/utils/TableGen/FixedLenDecoderEmitter.cpp b/utils/TableGen/FixedLenDecoderEmitter.cpp
index 5e621fc0efdd..f5e975d2e5ae 100644
--- a/utils/TableGen/FixedLenDecoderEmitter.cpp
+++ b/utils/TableGen/FixedLenDecoderEmitter.cpp
@@ -1,9 +1,8 @@
//===------------ FixedLenDecoderEmitter.cpp - Decoder Generator ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -17,9 +16,10 @@
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/CachedHashString.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCFixedLenDisassembler.h"
@@ -48,6 +48,12 @@ using namespace llvm;
namespace {
+STATISTIC(NumEncodings, "Number of encodings considered");
+STATISTIC(NumEncodingsLackingDisasm, "Number of encodings without disassembler info");
+STATISTIC(NumInstructions, "Number of instructions considered");
+STATISTIC(NumEncodingsSupported, "Number of encodings supported");
+STATISTIC(NumEncodingsOmitted, "Number of encodings omitted");
+
struct EncodingField {
unsigned Base, Width, Offset;
EncodingField(unsigned B, unsigned W, unsigned O)
@@ -95,6 +101,15 @@ struct EncodingAndInst {
: EncodingDef(EncodingDef), Inst(Inst) {}
};
+struct EncodingIDAndOpcode {
+ unsigned EncodingID;
+ unsigned Opcode;
+
+ EncodingIDAndOpcode() : EncodingID(0), Opcode(0) {}
+ EncodingIDAndOpcode(unsigned EncodingID, unsigned Opcode)
+ : EncodingID(EncodingID), Opcode(Opcode) {}
+};
+
raw_ostream &operator<<(raw_ostream &OS, const EncodingAndInst &Value) {
if (Value.EncodingDef != Value.Inst->TheDef)
OS << Value.EncodingDef->getName() << ":";
@@ -103,6 +118,7 @@ raw_ostream &operator<<(raw_ostream &OS, const EncodingAndInst &Value) {
}
class FixedLenDecoderEmitter {
+ RecordKeeper &RK;
std::vector<EncodingAndInst> NumberedEncodings;
public:
@@ -114,7 +130,7 @@ public:
std::string ROK = "MCDisassembler::Success",
std::string RFail = "MCDisassembler::Fail",
std::string L = "")
- : Target(R), PredicateNamespace(std::move(PredicateNamespace)),
+ : RK(R), Target(R), PredicateNamespace(std::move(PredicateNamespace)),
GuardPrefix(std::move(GPrefix)), GuardPostfix(std::move(GPostfix)),
ReturnOK(std::move(ROK)), ReturnFail(std::move(RFail)),
Locals(std::move(L)) {}
@@ -252,10 +268,11 @@ protected:
bool Mixed; // a mixed region contains both set and unset bits
// Map of well-known segment value to the set of uid's with that value.
- std::map<uint64_t, std::vector<unsigned>> FilteredInstructions;
+ std::map<uint64_t, std::vector<EncodingIDAndOpcode>>
+ FilteredInstructions;
// Set of uid's with non-constant segment values.
- std::vector<unsigned> VariableInstructions;
+ std::vector<EncodingIDAndOpcode> VariableInstructions;
// Map of well-known segment value to its delegate.
std::map<unsigned, std::unique_ptr<const FilterChooser>> FilterChooserMap;
@@ -264,7 +281,7 @@ protected:
unsigned NumFiltered;
// Keeps track of the last opcode in the filtered bucket.
- unsigned LastOpcFiltered;
+ EncodingIDAndOpcode LastOpcFiltered;
public:
Filter(Filter &&f);
@@ -274,7 +291,7 @@ public:
unsigned getNumFiltered() const { return NumFiltered; }
- unsigned getSingletonOpc() const {
+ EncodingIDAndOpcode getSingletonOpc() const {
assert(NumFiltered == 1);
return LastOpcFiltered;
}
@@ -341,7 +358,9 @@ protected:
ArrayRef<EncodingAndInst> AllInstructions;
// Vector of uid's for this filter chooser to work on.
- const std::vector<unsigned> &Opcodes;
+ // The first member of the pair is the opcode id being decoded, the second is
+ // the opcode id that should be emitted.
+ const std::vector<EncodingIDAndOpcode> &Opcodes;
// Lookup table for the operand decoding of instructions.
const std::map<unsigned, std::vector<OperandInfo>> &Operands;
@@ -367,7 +386,7 @@ protected:
public:
FilterChooser(ArrayRef<EncodingAndInst> Insts,
- const std::vector<unsigned> &IDs,
+ const std::vector<EncodingIDAndOpcode> &IDs,
const std::map<unsigned, std::vector<OperandInfo>> &Ops,
unsigned BW, const FixedLenDecoderEmitter *E)
: AllInstructions(Insts), Opcodes(IDs), Operands(Ops),
@@ -377,7 +396,7 @@ public:
}
FilterChooser(ArrayRef<EncodingAndInst> Insts,
- const std::vector<unsigned> &IDs,
+ const std::vector<EncodingIDAndOpcode> &IDs,
const std::map<unsigned, std::vector<OperandInfo>> &Ops,
const std::vector<bit_value_t> &ParentFilterBitValues,
const FilterChooser &parent)
@@ -413,6 +432,15 @@ protected:
}
}
+ // Emit the name of the encoding/instruction pair.
+ void emitNameWithID(raw_ostream &OS, unsigned Opcode) const {
+ const Record *EncodingDef = AllInstructions[Opcode].EncodingDef;
+ const Record *InstDef = AllInstructions[Opcode].Inst->TheDef;
+ if (EncodingDef != InstDef)
+ OS << EncodingDef->getName() << ":";
+ OS << InstDef->getName();
+ }
+
// Populates the field of the insn given the start position and the number of
// consecutive bits to scan for.
//
@@ -463,7 +491,7 @@ protected:
// Emits table entries to decode the singleton.
void emitSingletonTableEntry(DecoderTableInfo &TableInfo,
- unsigned Opc) const;
+ EncodingIDAndOpcode Opc) const;
// Emits code to decode the singleton, and then to decode the rest.
void emitSingletonTableEntry(DecoderTableInfo &TableInfo,
@@ -524,13 +552,13 @@ Filter::Filter(FilterChooser &owner, unsigned startBit, unsigned numBits,
assert(StartBit + NumBits - 1 < Owner->BitWidth);
NumFiltered = 0;
- LastOpcFiltered = 0;
+ LastOpcFiltered = {0, 0};
for (unsigned i = 0, e = Owner->Opcodes.size(); i != e; ++i) {
insn_t Insn;
// Populates the insn given the uid.
- Owner->insnWithID(Insn, Owner->Opcodes[i]);
+ Owner->insnWithID(Insn, Owner->Opcodes[i].EncodingID);
uint64_t Field;
// Scans the segment for possibly well-specified encoding bits.
@@ -1026,7 +1054,7 @@ unsigned FilterChooser::getIslands(std::vector<unsigned> &StartBits,
// 1: Water (the bit value does not affect decoding)
// 2: Island (well-known bit value needed for decoding)
int State = 0;
- int Val = -1;
+ int64_t Val = -1;
for (unsigned i = 0; i < BitWidth; ++i) {
Val = Value(Insn[i]);
@@ -1314,12 +1342,12 @@ void FilterChooser::emitSoftFailTableEntry(DecoderTableInfo &TableInfo,
// Emits table entries to decode the singleton.
void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo,
- unsigned Opc) const {
+ EncodingIDAndOpcode Opc) const {
std::vector<unsigned> StartBits;
std::vector<unsigned> EndBits;
std::vector<uint64_t> FieldVals;
insn_t Insn;
- insnWithID(Insn, Opc);
+ insnWithID(Insn, Opc.EncodingID);
// Look for islands of undecoded bits of the singleton.
getIslands(StartBits, EndBits, FieldVals, Insn);
@@ -1327,7 +1355,7 @@ void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo,
unsigned Size = StartBits.size();
// Emit the predicate table entry if one is needed.
- emitPredicateTableEntry(TableInfo, Opc);
+ emitPredicateTableEntry(TableInfo, Opc.EncodingID);
// Check any additional encoding fields needed.
for (unsigned I = Size; I != 0; --I) {
@@ -1351,10 +1379,11 @@ void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo,
}
// Check for soft failure of the match.
- emitSoftFailTableEntry(TableInfo, Opc);
+ emitSoftFailTableEntry(TableInfo, Opc.EncodingID);
bool HasCompleteDecoder;
- unsigned DIdx = getDecoderIndex(TableInfo.Decoders, Opc, HasCompleteDecoder);
+ unsigned DIdx =
+ getDecoderIndex(TableInfo.Decoders, Opc.EncodingID, HasCompleteDecoder);
// Produce OPC_Decode or OPC_TryDecode opcode based on the information
// whether the instruction decoder is complete or not. If it is complete
@@ -1367,8 +1396,9 @@ void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo,
// can decode it.
TableInfo.Table.push_back(HasCompleteDecoder ? MCD::OPC_Decode :
MCD::OPC_TryDecode);
+ NumEncodingsSupported++;
uint8_t Buffer[16], *p;
- encodeULEB128(Opc, Buffer);
+ encodeULEB128(Opc.Opcode, Buffer);
for (p = Buffer; *p >= 128 ; ++p)
TableInfo.Table.push_back(*p);
TableInfo.Table.push_back(*p);
@@ -1394,7 +1424,7 @@ void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo,
// Emits table entries to decode the singleton, and then to decode the rest.
void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo,
const Filter &Best) const {
- unsigned Opc = Best.getSingletonOpc();
+ EncodingIDAndOpcode Opc = Best.getSingletonOpc();
// complex singletons need predicate checks from the first singleton
// to refer forward to the variable filterchooser that follows.
@@ -1454,7 +1484,7 @@ bool FilterChooser::filterProcessor(bool AllowMixed, bool Greedy) {
std::vector<uint64_t> FieldVals;
insn_t Insn;
- insnWithID(Insn, Opcodes[i]);
+ insnWithID(Insn, Opcodes[i].EncodingID);
// Look for islands of undecoded bits of any instruction.
if (getIslands(StartBits, EndBits, FieldVals, Insn) > 0) {
@@ -1498,7 +1528,7 @@ bool FilterChooser::filterProcessor(bool AllowMixed, bool Greedy) {
for (unsigned InsnIndex = 0; InsnIndex < numInstructions; ++InsnIndex) {
insn_t insn;
- insnWithID(insn, Opcodes[InsnIndex]);
+ insnWithID(insn, Opcodes[InsnIndex].EncodingID);
for (BitIndex = 0; BitIndex < BitWidth; ++BitIndex) {
switch (bitAttrs[BitIndex]) {
@@ -1717,9 +1747,12 @@ void FilterChooser::emitTableEntries(DecoderTableInfo &TableInfo) const {
dumpStack(errs(), "\t\t");
for (unsigned i = 0; i < Opcodes.size(); ++i) {
- errs() << '\t' << AllInstructions[Opcodes[i]] << " ";
- dumpBits(errs(),
- getBitsField(*AllInstructions[Opcodes[i]].EncodingDef, "Inst"));
+ errs() << '\t';
+ emitNameWithID(errs(), Opcodes[i].EncodingID);
+ errs() << " ";
+ dumpBits(
+ errs(),
+ getBitsField(*AllInstructions[Opcodes[i].EncodingID].EncodingDef, "Inst"));
errs() << '\n';
}
}
@@ -1751,24 +1784,25 @@ static std::string findOperandDecoderMethod(TypedInit *TI) {
return Decoder;
}
-static bool populateInstruction(CodeGenTarget &Target,
- const CodeGenInstruction &CGI, unsigned Opc,
- std::map<unsigned, std::vector<OperandInfo>> &Operands){
+static bool
+populateInstruction(CodeGenTarget &Target, const Record &EncodingDef,
+ const CodeGenInstruction &CGI, unsigned Opc,
+ std::map<unsigned, std::vector<OperandInfo>> &Operands) {
const Record &Def = *CGI.TheDef;
// If all the bit positions are not specified; do not decode this instruction.
// We are bound to fail! For proper disassembly, the well-known encoding bits
// of the instruction must be fully specified.
- BitsInit &Bits = getBitsField(Def, "Inst");
+ BitsInit &Bits = getBitsField(EncodingDef, "Inst");
if (Bits.allInComplete()) return false;
std::vector<OperandInfo> InsnOperands;
// If the instruction has specified a custom decoding hook, use that instead
// of trying to auto-generate the decoder.
- StringRef InstDecoder = Def.getValueAsString("DecoderMethod");
+ StringRef InstDecoder = EncodingDef.getValueAsString("DecoderMethod");
if (InstDecoder != "") {
- bool HasCompleteInstDecoder = Def.getValueAsBit("hasCompleteDecoder");
+ bool HasCompleteInstDecoder = EncodingDef.getValueAsBit("hasCompleteDecoder");
InsnOperands.push_back(OperandInfo(InstDecoder, HasCompleteInstDecoder));
Operands[Opc] = InsnOperands;
return true;
@@ -2144,7 +2178,7 @@ static void emitDecodeInstruction(formatted_raw_ostream &OS) {
<< " const FeatureBitset& Bits = STI.getFeatureBits();\n"
<< "\n"
<< " const uint8_t *Ptr = DecodeTable;\n"
- << " uint32_t CurFieldValue = 0;\n"
+ << " InsnType CurFieldValue = 0;\n"
<< " DecodeStatus S = MCDisassembler::Success;\n"
<< " while (true) {\n"
<< " ptrdiff_t Loc = Ptr - DecodeTable;\n"
@@ -2189,7 +2223,7 @@ static void emitDecodeInstruction(formatted_raw_ostream &OS) {
<< " unsigned Len = *++Ptr;\n"
<< " InsnType FieldValue = fieldFromInstruction(insn, Start, Len);\n"
<< " // Decode the field value.\n"
- << " uint32_t ExpectedValue = decodeULEB128(++Ptr, &Len);\n"
+ << " InsnType ExpectedValue = decodeULEB128(++Ptr, &Len);\n"
<< " Ptr += Len;\n"
<< " // NumToSkip is a plain 24-bit integer.\n"
<< " unsigned NumToSkip = *Ptr++;\n"
@@ -2336,37 +2370,52 @@ void FixedLenDecoderEmitter::run(raw_ostream &o) {
// Parameterize the decoders based on namespace and instruction width.
const auto &NumberedInstructions = Target.getInstructionsByEnumValue();
NumberedEncodings.reserve(NumberedInstructions.size());
- for (const auto &NumberedInstruction : NumberedInstructions)
+ DenseMap<Record *, unsigned> IndexOfInstruction;
+ for (const auto &NumberedInstruction : NumberedInstructions) {
+ IndexOfInstruction[NumberedInstruction->TheDef] = NumberedEncodings.size();
NumberedEncodings.emplace_back(NumberedInstruction->TheDef, NumberedInstruction);
+ }
+ for (const auto &NumberedAlias : RK.getAllDerivedDefinitions("AdditionalEncoding"))
+ NumberedEncodings.emplace_back(
+ NumberedAlias,
+ &Target.getInstruction(NumberedAlias->getValueAsDef("AliasOf")));
- std::map<std::pair<std::string, unsigned>,
- std::vector<unsigned>> OpcMap;
+ std::map<std::pair<std::string, unsigned>, std::vector<EncodingIDAndOpcode>>
+ OpcMap;
std::map<unsigned, std::vector<OperandInfo>> Operands;
for (unsigned i = 0; i < NumberedEncodings.size(); ++i) {
+ const Record *EncodingDef = NumberedEncodings[i].EncodingDef;
const CodeGenInstruction *Inst = NumberedEncodings[i].Inst;
const Record *Def = Inst->TheDef;
- unsigned Size = Def->getValueAsInt("Size");
+ unsigned Size = EncodingDef->getValueAsInt("Size");
if (Def->getValueAsString("Namespace") == "TargetOpcode" ||
Def->getValueAsBit("isPseudo") ||
Def->getValueAsBit("isAsmParserOnly") ||
- Def->getValueAsBit("isCodeGenOnly"))
+ Def->getValueAsBit("isCodeGenOnly")) {
+ NumEncodingsLackingDisasm++;
continue;
+ }
- StringRef DecoderNamespace = Def->getValueAsString("DecoderNamespace");
+ if (i < NumberedInstructions.size())
+ NumInstructions++;
+ NumEncodings++;
+
+ StringRef DecoderNamespace = EncodingDef->getValueAsString("DecoderNamespace");
if (Size) {
- if (populateInstruction(Target, *Inst, i, Operands)) {
- OpcMap[std::make_pair(DecoderNamespace, Size)].push_back(i);
- }
+ if (populateInstruction(Target, *EncodingDef, *Inst, i, Operands)) {
+ OpcMap[std::make_pair(DecoderNamespace, Size)].emplace_back(i, IndexOfInstruction.find(Def)->second);
+ } else
+ NumEncodingsOmitted++;
}
}
DecoderTableInfo TableInfo;
for (const auto &Opc : OpcMap) {
// Emit the decoder for this namespace+width combination.
- ArrayRef<EncodingAndInst> NumberedEncodingsRef(NumberedEncodings.data(),
- NumberedEncodings.size());
+ ArrayRef<EncodingAndInst> NumberedEncodingsRef(
+ NumberedEncodings.data(), NumberedEncodings.size());
FilterChooser FC(NumberedEncodingsRef, Opc.second, Operands,
8 * Opc.first.second, this);
diff --git a/utils/TableGen/GlobalISelEmitter.cpp b/utils/TableGen/GlobalISelEmitter.cpp
index 997ceb12becd..f1c02134198b 100644
--- a/utils/TableGen/GlobalISelEmitter.cpp
+++ b/utils/TableGen/GlobalISelEmitter.cpp
@@ -1,9 +1,8 @@
//===- GlobalISelEmitter.cpp - Generate an instruction selector -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -233,6 +232,23 @@ static std::string explainPredicates(const TreePatternNode *N) {
if (Record *VT = P.getScalarMemoryVT())
Explanation += (" ScalarVT(MemVT)=" + VT->getName()).str();
+ if (ListInit *AddrSpaces = P.getAddressSpaces()) {
+ raw_string_ostream OS(Explanation);
+ OS << " AddressSpaces=[";
+
+ StringRef AddrSpaceSeparator;
+ for (Init *Val : AddrSpaces->getValues()) {
+ IntInit *IntVal = dyn_cast<IntInit>(Val);
+ if (!IntVal)
+ continue;
+
+ OS << AddrSpaceSeparator << IntVal->getValue();
+ AddrSpaceSeparator = ", ";
+ }
+
+ OS << ']';
+ }
+
if (P.isAtomicOrderingMonotonic())
Explanation += " monotonic";
if (P.isAtomicOrderingAcquire())
@@ -298,7 +314,7 @@ static Error isTrivialOperatorNode(const TreePatternNode *N) {
Predicate.isSignExtLoad() || Predicate.isZeroExtLoad())
continue;
- if (Predicate.isNonTruncStore())
+ if (Predicate.isNonTruncStore() || Predicate.isTruncStore())
continue;
if (Predicate.isLoad() && Predicate.getMemoryVT())
@@ -309,6 +325,12 @@ static Error isTrivialOperatorNode(const TreePatternNode *N) {
continue;
}
+ if (Predicate.isLoad() || Predicate.isStore() || Predicate.isAtomic()) {
+ const ListInit *AddrSpaces = Predicate.getAddressSpaces();
+ if (AddrSpaces && !AddrSpaces->empty())
+ continue;
+ }
+
if (Predicate.isAtomic() && Predicate.getMemoryVT())
continue;
@@ -882,12 +904,19 @@ public:
void defineOperand(StringRef SymbolicName, OperandMatcher &OM);
- void defineComplexSubOperand(StringRef SymbolicName, Record *ComplexPattern,
- unsigned RendererID, unsigned SubOperandID) {
- assert(ComplexSubOperands.count(SymbolicName) == 0 && "Already defined");
+ Error defineComplexSubOperand(StringRef SymbolicName, Record *ComplexPattern,
+ unsigned RendererID, unsigned SubOperandID) {
+ if (ComplexSubOperands.count(SymbolicName))
+ return failedImport(
+ "Complex suboperand referenced more than once (Operand: " +
+ SymbolicName + ")");
+
ComplexSubOperands[SymbolicName] =
std::make_tuple(ComplexPattern, RendererID, SubOperandID);
+
+ return Error::success();
}
+
Optional<DefinedComplexPatternSubOperand>
getComplexSubOperand(StringRef SymbolicName) const {
const auto &I = ComplexSubOperands.find(SymbolicName);
@@ -1022,6 +1051,7 @@ public:
IPM_AtomicOrderingMMO,
IPM_MemoryLLTSize,
IPM_MemoryVsLLTSize,
+ IPM_MemoryAddressSpace,
IPM_GenericPredicate,
OPM_SameOperand,
OPM_ComplexPattern,
@@ -1507,6 +1537,9 @@ Error OperandMatcher::addTypeCheckPredicate(const TypeSetByHwMode &VTy,
if (OperandIsAPointer)
addPredicate<PointerToAnyOperandMatcher>(OpTyOrNone->get().getSizeInBits());
+ else if (VTy.isPointer())
+ addPredicate<LLTOperandMatcher>(LLT::pointer(VTy.getPtrAddrSpace(),
+ OpTyOrNone->get().getSizeInBits()));
else
addPredicate<LLTOperandMatcher>(*OpTyOrNone);
return Error::success();
@@ -1780,6 +1813,42 @@ public:
}
};
+class MemoryAddressSpacePredicateMatcher : public InstructionPredicateMatcher {
+protected:
+ unsigned MMOIdx;
+ SmallVector<unsigned, 4> AddrSpaces;
+
+public:
+ MemoryAddressSpacePredicateMatcher(unsigned InsnVarID, unsigned MMOIdx,
+ ArrayRef<unsigned> AddrSpaces)
+ : InstructionPredicateMatcher(IPM_MemoryAddressSpace, InsnVarID),
+ MMOIdx(MMOIdx), AddrSpaces(AddrSpaces.begin(), AddrSpaces.end()) {}
+
+ static bool classof(const PredicateMatcher *P) {
+ return P->getKind() == IPM_MemoryAddressSpace;
+ }
+ bool isIdentical(const PredicateMatcher &B) const override {
+ if (!InstructionPredicateMatcher::isIdentical(B))
+ return false;
+ auto *Other = cast<MemoryAddressSpacePredicateMatcher>(&B);
+ return MMOIdx == Other->MMOIdx && AddrSpaces == Other->AddrSpaces;
+ }
+
+ void emitPredicateOpcodes(MatchTable &Table,
+ RuleMatcher &Rule) const override {
+ Table << MatchTable::Opcode("GIM_CheckMemoryAddressSpace")
+ << MatchTable::Comment("MI") << MatchTable::IntValue(InsnVarID)
+ << MatchTable::Comment("MMO") << MatchTable::IntValue(MMOIdx)
+ // Encode number of address spaces to expect.
+ << MatchTable::Comment("NumAddrSpace")
+ << MatchTable::IntValue(AddrSpaces.size());
+ for (unsigned AS : AddrSpaces)
+ Table << MatchTable::Comment("AddrSpace") << MatchTable::IntValue(AS);
+
+ Table << MatchTable::LineBreak;
+ }
+};
+
/// Generates code to check that the size of an MMO is less-than, equal-to, or
/// greater than a given LLT.
class MemoryVsLLTSizePredicateMatcher : public InstructionPredicateMatcher {
@@ -3028,7 +3097,8 @@ private:
importExplicitUseRenderer(action_iterator InsertPt, RuleMatcher &Rule,
BuildMIAction &DstMIBuilder,
TreePatternNode *DstChild);
- Error importDefaultOperandRenderers(BuildMIAction &DstMIBuilder,
+ Error importDefaultOperandRenderers(action_iterator InsertPt, RuleMatcher &M,
+ BuildMIAction &DstMIBuilder,
DagInit *DefaultOps) const;
Error
importImplicitDefRenderers(BuildMIAction &DstMIBuilder,
@@ -3200,7 +3270,26 @@ Expected<InstructionMatcher &> GlobalISelEmitter::createAndImportSelDAGMatcher(
continue;
}
- // G_LOAD is used for both non-extending and any-extending loads.
+ // An address space check is needed in all contexts if there is one.
+ if (Predicate.isLoad() || Predicate.isStore() || Predicate.isAtomic()) {
+ if (const ListInit *AddrSpaces = Predicate.getAddressSpaces()) {
+ SmallVector<unsigned, 4> ParsedAddrSpaces;
+
+ for (Init *Val : AddrSpaces->getValues()) {
+ IntInit *IntVal = dyn_cast<IntInit>(Val);
+ if (!IntVal)
+ return failedImport("Address space is not an integer");
+ ParsedAddrSpaces.push_back(IntVal->getValue());
+ }
+
+ if (!ParsedAddrSpaces.empty()) {
+ InsnMatcher.addPredicate<MemoryAddressSpacePredicateMatcher>(
+ 0, ParsedAddrSpaces);
+ }
+ }
+ }
+
+ // G_LOAD is used for both non-extending and any-extending loads.
if (Predicate.isLoad() && Predicate.isNonExtLoad()) {
InsnMatcher.addPredicate<MemoryVsLLTSizePredicateMatcher>(
0, MemoryVsLLTSizePredicateMatcher::EqualTo, 0);
@@ -3212,6 +3301,13 @@ Expected<InstructionMatcher &> GlobalISelEmitter::createAndImportSelDAGMatcher(
continue;
}
+ if (Predicate.isStore() && Predicate.isTruncStore()) {
+ // FIXME: If MemoryVT is set, we end up with 2 checks for the MMO size.
+ InsnMatcher.addPredicate<MemoryVsLLTSizePredicateMatcher>(
+ 0, MemoryVsLLTSizePredicateMatcher::LessThan, 0);
+ continue;
+ }
+
// No check required. We already did it by swapping the opcode.
if (!SrcGIEquivOrNull->isValueUnset("IfSignExtend") &&
Predicate.isSignExtLoad())
@@ -3422,9 +3518,12 @@ Error GlobalISelEmitter::importChildMatcher(RuleMatcher &Rule,
for (unsigned i = 0, e = SrcChild->getNumChildren(); i != e; ++i) {
auto *SubOperand = SrcChild->getChild(i);
- if (!SubOperand->getName().empty())
- Rule.defineComplexSubOperand(SubOperand->getName(),
- SrcChild->getOperator(), RendererID, i);
+ if (!SubOperand->getName().empty()) {
+ if (auto Error = Rule.defineComplexSubOperand(SubOperand->getName(),
+ SrcChild->getOperator(),
+ RendererID, i))
+ return Error;
+ }
}
return Error::success();
@@ -3765,7 +3864,8 @@ Expected<action_iterator> GlobalISelEmitter::importExplicitUseRenderers(
// end up with too many rendered operands.
if (DstIOperand.Rec->isSubClassOf("OperandWithDefaultOps")) {
DagInit *DefaultOps = DstIOperand.Rec->getValueAsDag("DefaultOps");
- if (auto Error = importDefaultOperandRenderers(DstMIBuilder, DefaultOps))
+ if (auto Error = importDefaultOperandRenderers(
+ InsertPt, M, DstMIBuilder, DefaultOps))
return std::move(Error);
++NumDefaultOps;
continue;
@@ -3790,19 +3890,39 @@ Expected<action_iterator> GlobalISelEmitter::importExplicitUseRenderers(
}
Error GlobalISelEmitter::importDefaultOperandRenderers(
- BuildMIAction &DstMIBuilder, DagInit *DefaultOps) const {
+ action_iterator InsertPt, RuleMatcher &M, BuildMIAction &DstMIBuilder,
+ DagInit *DefaultOps) const {
for (const auto *DefaultOp : DefaultOps->getArgs()) {
+ Optional<LLTCodeGen> OpTyOrNone = None;
+
// Look through ValueType operators.
if (const DagInit *DefaultDagOp = dyn_cast<DagInit>(DefaultOp)) {
if (const DefInit *DefaultDagOperator =
dyn_cast<DefInit>(DefaultDagOp->getOperator())) {
- if (DefaultDagOperator->getDef()->isSubClassOf("ValueType"))
+ if (DefaultDagOperator->getDef()->isSubClassOf("ValueType")) {
+ OpTyOrNone = MVTToLLT(getValueType(
+ DefaultDagOperator->getDef()));
DefaultOp = DefaultDagOp->getArg(0);
+ }
}
}
if (const DefInit *DefaultDefOp = dyn_cast<DefInit>(DefaultOp)) {
- DstMIBuilder.addRenderer<AddRegisterRenderer>(DefaultDefOp->getDef());
+ auto Def = DefaultDefOp->getDef();
+ if (Def->getName() == "undef_tied_input") {
+ unsigned TempRegID = M.allocateTempRegID();
+ M.insertAction<MakeTempRegisterAction>(
+ InsertPt, OpTyOrNone.getValue(), TempRegID);
+ InsertPt = M.insertAction<BuildMIAction>(
+ InsertPt, M.allocateOutputInsnID(),
+ &Target.getInstruction(RK.getDef("IMPLICIT_DEF")));
+ BuildMIAction &IDMIBuilder = *static_cast<BuildMIAction *>(
+ InsertPt->get());
+ IDMIBuilder.addRenderer<TempRegRenderer>(TempRegID);
+ DstMIBuilder.addRenderer<TempRegRenderer>(TempRegID);
+ } else {
+ DstMIBuilder.addRenderer<AddRegisterRenderer>(Def);
+ }
continue;
}
@@ -4489,8 +4609,7 @@ void GlobalISelEmitter::run(raw_ostream &OS) {
<< ", // " << Record->getName() << "\n";
OS << "};\n\n";
- std::stable_sort(Rules.begin(), Rules.end(), [&](const RuleMatcher &A,
- const RuleMatcher &B) {
+ llvm::stable_sort(Rules, [&](const RuleMatcher &A, const RuleMatcher &B) {
int ScoreA = RuleMatcherScores[A.getRuleID()];
int ScoreB = RuleMatcherScores[B.getRuleID()];
if (ScoreA > ScoreB)
diff --git a/utils/TableGen/InfoByHwMode.cpp b/utils/TableGen/InfoByHwMode.cpp
index 086e12dafd74..d9662889a5db 100644
--- a/utils/TableGen/InfoByHwMode.cpp
+++ b/utils/TableGen/InfoByHwMode.cpp
@@ -1,9 +1,8 @@
//===--- InfoByHwMode.cpp -------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// Classes that implement data parameterized by HW modes for instruction
@@ -39,6 +38,11 @@ ValueTypeByHwMode::ValueTypeByHwMode(Record *R, const CodeGenHwModes &CGH) {
}
}
+ValueTypeByHwMode::ValueTypeByHwMode(Record *R, MVT T) : ValueTypeByHwMode(T) {
+ if (R->isSubClassOf("PtrValueType"))
+ PtrAddrSpace = R->getValueAsInt("AddrSpace");
+}
+
bool ValueTypeByHwMode::operator== (const ValueTypeByHwMode &T) const {
assert(isValid() && T.isValid() && "Invalid type in assignment");
bool Simple = isSimple();
@@ -112,7 +116,7 @@ ValueTypeByHwMode llvm::getValueTypeByHwMode(Record *Rec,
"Record must be derived from ValueType");
if (Rec->isSubClassOf("HwModeSelect"))
return ValueTypeByHwMode(Rec, CGH);
- return ValueTypeByHwMode(llvm::getValueType(Rec));
+ return ValueTypeByHwMode(Rec, llvm::getValueType(Rec));
}
RegSizeInfo::RegSizeInfo(Record *R, const CodeGenHwModes &CGH) {
diff --git a/utils/TableGen/InfoByHwMode.h b/utils/TableGen/InfoByHwMode.h
index 7be4678f271b..9e5cc3d5f2a4 100644
--- a/utils/TableGen/InfoByHwMode.h
+++ b/utils/TableGen/InfoByHwMode.h
@@ -1,9 +1,8 @@
//===--- InfoByHwMode.h -----------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// Classes that implement data parameterized by HW modes for instruction
@@ -120,6 +119,7 @@ struct InfoByHwMode {
struct ValueTypeByHwMode : public InfoByHwMode<MVT> {
ValueTypeByHwMode(Record *R, const CodeGenHwModes &CGH);
+ ValueTypeByHwMode(Record *R, MVT T);
ValueTypeByHwMode(MVT T) { Map.insert({DefaultMode,T}); }
ValueTypeByHwMode() = default;
@@ -135,6 +135,11 @@ struct ValueTypeByHwMode : public InfoByHwMode<MVT> {
static StringRef getMVTName(MVT T);
void writeToStream(raw_ostream &OS) const;
void dump() const;
+
+ unsigned PtrAddrSpace = std::numeric_limits<unsigned>::max();
+ bool isPointer() const {
+ return PtrAddrSpace != std::numeric_limits<unsigned>::max();
+ }
};
ValueTypeByHwMode getValueTypeByHwMode(Record *Rec,
diff --git a/utils/TableGen/InstrDocsEmitter.cpp b/utils/TableGen/InstrDocsEmitter.cpp
index 9d50351854ec..91c457ba08fd 100644
--- a/utils/TableGen/InstrDocsEmitter.cpp
+++ b/utils/TableGen/InstrDocsEmitter.cpp
@@ -1,9 +1,8 @@
//===- InstrDocsEmitter.cpp - Opcode Documentation Generator --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/utils/TableGen/InstrInfoEmitter.cpp b/utils/TableGen/InstrInfoEmitter.cpp
index 39d9e8526386..2d367f538b71 100644
--- a/utils/TableGen/InstrInfoEmitter.cpp
+++ b/utils/TableGen/InstrInfoEmitter.cpp
@@ -1,9 +1,8 @@
//===- InstrInfoEmitter.cpp - Generate a Instruction Set Desc. --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -77,7 +76,9 @@ private:
std::map<std::vector<Record*>, unsigned> &EL,
const OperandInfoMapTy &OpInfo,
raw_ostream &OS);
- void emitOperandTypesEnum(raw_ostream &OS, const CodeGenTarget &Target);
+ void emitOperandTypeMappings(
+ raw_ostream &OS, const CodeGenTarget &Target,
+ ArrayRef<const CodeGenInstruction *> NumberedInstructions);
void initOperandMapData(
ArrayRef<const CodeGenInstruction *> NumberedInstructions,
StringRef Namespace,
@@ -212,7 +213,7 @@ void InstrInfoEmitter::EmitOperandInfo(raw_ostream &OS,
}
/// Initialize data structures for generating operand name mappings.
-///
+///
/// \param Operands [out] A map used to generate the OpName enum with operand
/// names as its keys and operand enum values as its values.
/// \param OperandMap [out] A map for representing the operand name mappings for
@@ -325,8 +326,9 @@ void InstrInfoEmitter::emitOperandNameMappings(raw_ostream &OS,
/// Generate an enum for all the operand types for this target, under the
/// llvm::TargetNamespace::OpTypes namespace.
/// Operand types are all definitions derived of the Operand Target.td class.
-void InstrInfoEmitter::emitOperandTypesEnum(raw_ostream &OS,
- const CodeGenTarget &Target) {
+void InstrInfoEmitter::emitOperandTypeMappings(
+ raw_ostream &OS, const CodeGenTarget &Target,
+ ArrayRef<const CodeGenInstruction *> NumberedInstructions) {
StringRef Namespace = Target.getInstNamespace();
std::vector<Record *> Operands = Records.getAllDerivedDefinitions("Operand");
@@ -350,6 +352,69 @@ void InstrInfoEmitter::emitOperandTypesEnum(raw_ostream &OS,
OS << "} // end namespace " << Namespace << "\n";
OS << "} // end namespace llvm\n";
OS << "#endif // GET_INSTRINFO_OPERAND_TYPES_ENUM\n\n";
+
+ OS << "#ifdef GET_INSTRINFO_OPERAND_TYPE\n";
+ OS << "#undef GET_INSTRINFO_OPERAND_TYPE\n";
+ OS << "namespace llvm {\n";
+ OS << "namespace " << Namespace << " {\n";
+ OS << "LLVM_READONLY\n";
+ OS << "int getOperandType(uint16_t Opcode, uint16_t OpIdx) {\n";
+ if (!NumberedInstructions.empty()) {
+ std::vector<int> OperandOffsets;
+ std::vector<Record *> OperandRecords;
+ int CurrentOffset = 0;
+ for (const CodeGenInstruction *Inst : NumberedInstructions) {
+ OperandOffsets.push_back(CurrentOffset);
+ for (const auto &Op : Inst->Operands) {
+ const DagInit *MIOI = Op.MIOperandInfo;
+ if (!MIOI || MIOI->getNumArgs() == 0) {
+ // Single, anonymous, operand.
+ OperandRecords.push_back(Op.Rec);
+ ++CurrentOffset;
+ } else {
+ for (Init *Arg : make_range(MIOI->arg_begin(), MIOI->arg_end())) {
+ OperandRecords.push_back(cast<DefInit>(Arg)->getDef());
+ ++CurrentOffset;
+ }
+ }
+ }
+ }
+
+ // Emit the table of offsets for the opcode lookup.
+ OS << " const int Offsets[] = {\n";
+ for (int I = 0, E = OperandOffsets.size(); I != E; ++I)
+ OS << " " << OperandOffsets[I] << ",\n";
+ OS << " };\n";
+
+ // Add an entry for the end so that we don't need to special case it below.
+ OperandOffsets.push_back(OperandRecords.size());
+ // Emit the actual operand types in a flat table.
+ OS << " const int OpcodeOperandTypes[] = {\n ";
+ for (int I = 0, E = OperandRecords.size(), CurOffset = 1; I != E; ++I) {
+ // We print each Opcode's operands in its own row.
+ if (I == OperandOffsets[CurOffset]) {
+ OS << "\n ";
+ // If there are empty rows, mark them with an empty comment.
+ while (OperandOffsets[++CurOffset] == I)
+ OS << "/**/\n ";
+ }
+ Record *OpR = OperandRecords[I];
+ if (OpR->isSubClassOf("Operand") && !OpR->isAnonymous())
+ OS << "OpTypes::" << OpR->getName();
+ else
+ OS << -1;
+ OS << ", ";
+ }
+ OS << "\n };\n";
+
+ OS << " return OpcodeOperandTypes[Offsets[Opcode] + OpIdx];\n";
+ } else {
+ OS << " llvm_unreachable(\"No instructions defined\");\n";
+ }
+ OS << "}\n";
+ OS << "} // end namespace " << Namespace << "\n";
+ OS << "} // end namespace llvm\n";
+ OS << "#endif //GET_INSTRINFO_OPERAND_TYPE\n\n";
}
void InstrInfoEmitter::emitMCIIHelperMethods(raw_ostream &OS,
@@ -561,7 +626,7 @@ void InstrInfoEmitter::run(raw_ostream &OS) {
emitOperandNameMappings(OS, Target, NumberedInstructions);
- emitOperandTypesEnum(OS, Target);
+ emitOperandTypeMappings(OS, Target, NumberedInstructions);
emitMCIIHelperMethods(OS, TargetName);
}
@@ -604,6 +669,7 @@ void InstrInfoEmitter::emitRecord(const CodeGenInstruction &Inst, unsigned Num,
if (Inst.canFoldAsLoad) OS << "|(1ULL<<MCID::FoldableAsLoad)";
if (Inst.mayLoad) OS << "|(1ULL<<MCID::MayLoad)";
if (Inst.mayStore) OS << "|(1ULL<<MCID::MayStore)";
+ if (Inst.mayRaiseFPException) OS << "|(1ULL<<MCID::MayRaiseFPException)";
if (Inst.isPredicable) OS << "|(1ULL<<MCID::Predicable)";
if (Inst.isConvertibleToThreeAddress) OS << "|(1ULL<<MCID::ConvertibleTo3Addr)";
if (Inst.isCommutable) OS << "|(1ULL<<MCID::Commutable)";
@@ -629,13 +695,14 @@ void InstrInfoEmitter::emitRecord(const CodeGenInstruction &Inst, unsigned Num,
// Emit all of the target-specific flags...
BitsInit *TSF = Inst.TheDef->getValueAsBitsInit("TSFlags");
if (!TSF)
- PrintFatalError("no TSFlags?");
+ PrintFatalError(Inst.TheDef->getLoc(), "no TSFlags?");
uint64_t Value = 0;
for (unsigned i = 0, e = TSF->getNumBits(); i != e; ++i) {
if (const auto *Bit = dyn_cast<BitInit>(TSF->getBit(i)))
Value |= uint64_t(Bit->getValue()) << i;
else
- PrintFatalError("Invalid TSFlags bit in " + Inst.TheDef->getName());
+ PrintFatalError(Inst.TheDef->getLoc(),
+ "Invalid TSFlags bit in " + Inst.TheDef->getName());
}
OS << ", 0x";
OS.write_hex(Value);
diff --git a/utils/TableGen/IntrinsicEmitter.cpp b/utils/TableGen/IntrinsicEmitter.cpp
index 049282e5ebfe..979af98f6768 100644
--- a/utils/TableGen/IntrinsicEmitter.cpp
+++ b/utils/TableGen/IntrinsicEmitter.cpp
@@ -1,9 +1,8 @@
//===- IntrinsicEmitter.cpp - Generate intrinsic information --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -220,7 +219,8 @@ enum IIT_Info {
IIT_STRUCT6 = 38,
IIT_STRUCT7 = 39,
IIT_STRUCT8 = 40,
- IIT_F128 = 41
+ IIT_F128 = 41,
+ IIT_VEC_ELEMENT = 42
};
static void EncodeFixedValueType(MVT::SimpleValueType VT,
@@ -259,10 +259,12 @@ static void EncodeFixedValueType(MVT::SimpleValueType VT,
#endif
static void EncodeFixedType(Record *R, std::vector<unsigned char> &ArgCodes,
- std::vector<unsigned char> &Sig) {
+ unsigned &NextArgCode,
+ std::vector<unsigned char> &Sig,
+ ArrayRef<unsigned char> Mapping) {
if (R->isSubClassOf("LLVMMatchType")) {
- unsigned Number = R->getValueAsInt("Number");
+ unsigned Number = Mapping[R->getValueAsInt("Number")];
assert(Number < ArgCodes.size() && "Invalid matching number!");
if (R->isSubClassOf("LLVMExtendedType"))
Sig.push_back(IIT_EXTEND_ARG);
@@ -270,7 +272,7 @@ static void EncodeFixedType(Record *R, std::vector<unsigned char> &ArgCodes,
Sig.push_back(IIT_TRUNC_ARG);
else if (R->isSubClassOf("LLVMHalfElementsVectorType"))
Sig.push_back(IIT_HALF_VEC_ARG);
- else if (R->isSubClassOf("LLVMVectorSameWidth")) {
+ else if (R->isSubClassOf("LLVMScalarOrSameVectorWidth")) {
Sig.push_back(IIT_SAME_VEC_WIDTH_ARG);
Sig.push_back((Number << 3) | ArgCodes[Number]);
MVT::SimpleValueType VT = getValueType(R->getValueAsDef("ElTy"));
@@ -281,18 +283,18 @@ static void EncodeFixedType(Record *R, std::vector<unsigned char> &ArgCodes,
Sig.push_back(IIT_PTR_TO_ARG);
else if (R->isSubClassOf("LLVMVectorOfAnyPointersToElt")) {
Sig.push_back(IIT_VEC_OF_ANYPTRS_TO_ELT);
- unsigned ArgNo = ArgCodes.size();
- ArgCodes.push_back(3 /*vAny*/);
// Encode overloaded ArgNo
- Sig.push_back(ArgNo);
+ Sig.push_back(NextArgCode++);
// Encode LLVMMatchType<Number> ArgNo
Sig.push_back(Number);
return;
} else if (R->isSubClassOf("LLVMPointerToElt"))
Sig.push_back(IIT_PTR_TO_ELT);
+ else if (R->isSubClassOf("LLVMVectorElementType"))
+ Sig.push_back(IIT_VEC_ELEMENT);
else
Sig.push_back(IIT_ARG);
- return Sig.push_back((Number << 3) | ArgCodes[Number]);
+ return Sig.push_back((Number << 3) | 7 /*IITDescriptor::AK_MatchType*/);
}
MVT::SimpleValueType VT = getValueType(R->getValueAsDef("VT"));
@@ -310,8 +312,9 @@ static void EncodeFixedType(Record *R, std::vector<unsigned char> &ArgCodes,
Sig.push_back(IIT_ARG);
// Figure out what arg # this is consuming, and remember what kind it was.
- unsigned ArgNo = ArgCodes.size();
- ArgCodes.push_back(Tmp);
+ assert(NextArgCode < ArgCodes.size() && ArgCodes[NextArgCode] == Tmp &&
+ "Invalid or no ArgCode associated with overloaded VT!");
+ unsigned ArgNo = NextArgCode++;
// Encode what sort of argument it must be in the low 3 bits of the ArgNo.
return Sig.push_back((ArgNo << 3) | Tmp);
@@ -329,7 +332,8 @@ static void EncodeFixedType(Record *R, std::vector<unsigned char> &ArgCodes,
} else {
Sig.push_back(IIT_PTR);
}
- return EncodeFixedType(R->getValueAsDef("ElTy"), ArgCodes, Sig);
+ return EncodeFixedType(R->getValueAsDef("ElTy"), ArgCodes, NextArgCode, Sig,
+ Mapping);
}
}
@@ -354,6 +358,45 @@ static void EncodeFixedType(Record *R, std::vector<unsigned char> &ArgCodes,
EncodeFixedValueType(VT, Sig);
}
+static void UpdateArgCodes(Record *R, std::vector<unsigned char> &ArgCodes,
+ unsigned int &NumInserted,
+ SmallVectorImpl<unsigned char> &Mapping) {
+ if (R->isSubClassOf("LLVMMatchType")) {
+ if (R->isSubClassOf("LLVMVectorOfAnyPointersToElt")) {
+ ArgCodes.push_back(3 /*vAny*/);
+ ++NumInserted;
+ }
+ return;
+ }
+
+ unsigned Tmp = 0;
+ switch (getValueType(R->getValueAsDef("VT"))) {
+ default: break;
+ case MVT::iPTR:
+ UpdateArgCodes(R->getValueAsDef("ElTy"), ArgCodes, NumInserted, Mapping);
+ break;
+ case MVT::iPTRAny:
+ ++Tmp;
+ LLVM_FALLTHROUGH;
+ case MVT::vAny:
+ ++Tmp;
+ LLVM_FALLTHROUGH;
+ case MVT::fAny:
+ ++Tmp;
+ LLVM_FALLTHROUGH;
+ case MVT::iAny:
+ ++Tmp;
+ LLVM_FALLTHROUGH;
+ case MVT::Any:
+ unsigned OriginalIdx = ArgCodes.size() - NumInserted;
+ assert(OriginalIdx >= Mapping.size());
+ Mapping.resize(OriginalIdx+1);
+ Mapping[OriginalIdx] = ArgCodes.size();
+ ArgCodes.push_back(Tmp);
+ break;
+ }
+}
+
#if defined(_MSC_VER) && !defined(__clang__)
#pragma optimize("",on)
#endif
@@ -364,6 +407,17 @@ static void ComputeFixedEncoding(const CodeGenIntrinsic &Int,
std::vector<unsigned char> &TypeSig) {
std::vector<unsigned char> ArgCodes;
+ // Add codes for any overloaded result VTs.
+ unsigned int NumInserted = 0;
+ SmallVector<unsigned char, 8> ArgMapping;
+ for (unsigned i = 0, e = Int.IS.RetVTs.size(); i != e; ++i)
+ UpdateArgCodes(Int.IS.RetTypeDefs[i], ArgCodes, NumInserted, ArgMapping);
+
+ // Add codes for any overloaded operand VTs.
+ for (unsigned i = 0, e = Int.IS.ParamTypeDefs.size(); i != e; ++i)
+ UpdateArgCodes(Int.IS.ParamTypeDefs[i], ArgCodes, NumInserted, ArgMapping);
+
+ unsigned NextArgCode = 0;
if (Int.IS.RetVTs.empty())
TypeSig.push_back(IIT_Done);
else if (Int.IS.RetVTs.size() == 1 &&
@@ -383,11 +437,13 @@ static void ComputeFixedEncoding(const CodeGenIntrinsic &Int,
}
for (unsigned i = 0, e = Int.IS.RetVTs.size(); i != e; ++i)
- EncodeFixedType(Int.IS.RetTypeDefs[i], ArgCodes, TypeSig);
+ EncodeFixedType(Int.IS.RetTypeDefs[i], ArgCodes, NextArgCode, TypeSig,
+ ArgMapping);
}
for (unsigned i = 0, e = Int.IS.ParamTypeDefs.size(); i != e; ++i)
- EncodeFixedType(Int.IS.ParamTypeDefs[i], ArgCodes, TypeSig);
+ EncodeFixedType(Int.IS.ParamTypeDefs[i], ArgCodes, NextArgCode, TypeSig,
+ ArgMapping);
}
static void printIITEntry(raw_ostream &OS, unsigned char X) {
@@ -489,6 +545,9 @@ struct AttributeComparator {
if (L->isNoReturn != R->isNoReturn)
return R->isNoReturn;
+ if (L->isWillReturn != R->isWillReturn)
+ return R->isWillReturn;
+
if (L->isCold != R->isCold)
return R->isCold;
@@ -505,7 +564,6 @@ struct AttributeComparator {
CodeGenIntrinsic::ModRefBehavior LK = L->ModRef;
CodeGenIntrinsic::ModRefBehavior RK = R->ModRef;
if (LK != RK) return (LK > RK);
-
// Order by argument attributes.
// This is reliable because each side is already sorted internally.
return (L->ArgumentAttributes < R->ArgumentAttributes);
@@ -613,6 +671,12 @@ void IntrinsicEmitter::EmitAttributes(const CodeGenIntrinsicTable &Ints,
OS << "Attribute::ReadNone";
addComma = true;
break;
+ case CodeGenIntrinsic::ImmArg:
+ if (addComma)
+ OS << ',';
+ OS << "Attribute::ImmArg";
+ addComma = true;
+ break;
}
++ai;
@@ -624,9 +688,10 @@ void IntrinsicEmitter::EmitAttributes(const CodeGenIntrinsicTable &Ints,
}
if (!intrinsic.canThrow ||
- intrinsic.ModRef != CodeGenIntrinsic::ReadWriteMem ||
- intrinsic.isNoReturn || intrinsic.isCold || intrinsic.isNoDuplicate ||
- intrinsic.isConvergent || intrinsic.isSpeculatable) {
+ (intrinsic.ModRef != CodeGenIntrinsic::ReadWriteMem && !intrinsic.hasSideEffects) ||
+ intrinsic.isNoReturn || intrinsic.isWillReturn || intrinsic.isCold ||
+ intrinsic.isNoDuplicate || intrinsic.isConvergent ||
+ intrinsic.isSpeculatable) {
OS << " const Attribute::AttrKind Atts[] = {";
bool addComma = false;
if (!intrinsic.canThrow) {
@@ -639,6 +704,12 @@ void IntrinsicEmitter::EmitAttributes(const CodeGenIntrinsicTable &Ints,
OS << "Attribute::NoReturn";
addComma = true;
}
+ if (intrinsic.isWillReturn) {
+ if (addComma)
+ OS << ",";
+ OS << "Attribute::WillReturn";
+ addComma = true;
+ }
if (intrinsic.isCold) {
if (addComma)
OS << ",";
@@ -666,6 +737,8 @@ void IntrinsicEmitter::EmitAttributes(const CodeGenIntrinsicTable &Ints,
switch (intrinsic.ModRef) {
case CodeGenIntrinsic::NoMem:
+ if (intrinsic.hasSideEffects)
+ break;
if (addComma)
OS << ",";
OS << "Attribute::ReadNone";
@@ -771,8 +844,9 @@ void IntrinsicEmitter::EmitIntrinsicToBuiltinMap(
BuiltinMap[Ints[i].TargetPrefix];
if (!BIM.insert(std::make_pair(BuiltinName, Ints[i].EnumName)).second)
- PrintFatalError("Intrinsic '" + Ints[i].TheDef->getName() +
- "': duplicate " + CompilerName + " builtin name!");
+ PrintFatalError(Ints[i].TheDef->getLoc(),
+ "Intrinsic '" + Ints[i].TheDef->getName() +
+ "': duplicate " + CompilerName + " builtin name!");
Table.GetOrAddStringOffset(BuiltinName);
}
}
diff --git a/utils/TableGen/OptParserEmitter.cpp b/utils/TableGen/OptParserEmitter.cpp
index 0358cf26509b..51b1cb093b21 100644
--- a/utils/TableGen/OptParserEmitter.cpp
+++ b/utils/TableGen/OptParserEmitter.cpp
@@ -1,9 +1,8 @@
//===- OptParserEmitter.cpp - Table Driven Command Line Parsing -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/utils/TableGen/PredicateExpander.cpp b/utils/TableGen/PredicateExpander.cpp
index 2e01b7c3138e..9f7f40db2626 100644
--- a/utils/TableGen/PredicateExpander.cpp
+++ b/utils/TableGen/PredicateExpander.cpp
@@ -1,9 +1,8 @@
//===--------------------- PredicateExpander.cpp --------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/utils/TableGen/PredicateExpander.h b/utils/TableGen/PredicateExpander.h
index 0f3ee6867e65..115a81cf123b 100644
--- a/utils/TableGen/PredicateExpander.h
+++ b/utils/TableGen/PredicateExpander.h
@@ -1,9 +1,8 @@
//===--------------------- PredicateExpander.h ----------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/utils/TableGen/PseudoLoweringEmitter.cpp b/utils/TableGen/PseudoLoweringEmitter.cpp
index a363015730f3..3a80d8e5d1c4 100644
--- a/utils/TableGen/PseudoLoweringEmitter.cpp
+++ b/utils/TableGen/PseudoLoweringEmitter.cpp
@@ -1,9 +1,8 @@
//===- PseudoLoweringEmitter.cpp - PseudoLowering Generator -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/utils/TableGen/RISCVCompressInstEmitter.cpp b/utils/TableGen/RISCVCompressInstEmitter.cpp
index e03663b40f8a..e62f528ebc2e 100644
--- a/utils/TableGen/RISCVCompressInstEmitter.cpp
+++ b/utils/TableGen/RISCVCompressInstEmitter.cpp
@@ -1,9 +1,8 @@
//===- RISCVCompressInstEmitter.cpp - Generator for RISCV Compression -===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
// RISCVCompressInstEmitter implements a tablegen-driven CompressPat based
// RISCV Instruction Compression mechanism.
@@ -65,6 +64,7 @@
#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/Record.h"
#include "llvm/TableGen/TableGenBackend.h"
+#include <set>
#include <vector>
using namespace llvm;
@@ -253,12 +253,14 @@ static bool verifyDagOpCount(CodeGenInstruction &Inst, DagInit *Dag,
// Source instructions are non compressed instructions and don't have tied
// operands.
if (IsSource)
- PrintFatalError("Input operands for Inst '" + Inst.TheDef->getName() +
- "' and input Dag operand count mismatch");
+ PrintFatalError(Inst.TheDef->getLoc(),
+ "Input operands for Inst '" + Inst.TheDef->getName() +
+ "' and input Dag operand count mismatch");
// The Dag can't have more arguments than the Instruction.
if (Dag->getNumArgs() > Inst.Operands.size())
- PrintFatalError("Inst '" + Inst.TheDef->getName() +
- "' and Dag operand count mismatch");
+ PrintFatalError(Inst.TheDef->getLoc(),
+ "Inst '" + Inst.TheDef->getName() +
+ "' and Dag operand count mismatch");
// The Instruction might have tied operands so the Dag might have
// a fewer operand count.
@@ -268,8 +270,9 @@ static bool verifyDagOpCount(CodeGenInstruction &Inst, DagInit *Dag,
--RealCount;
if (Dag->getNumArgs() != RealCount)
- PrintFatalError("Inst '" + Inst.TheDef->getName() +
- "' and Dag operand count mismatch");
+ PrintFatalError(Inst.TheDef->getLoc(),
+ "Inst '" + Inst.TheDef->getName() +
+ "' and Dag operand count mismatch");
return true;
}
@@ -472,7 +475,7 @@ void RISCVCompressInstEmitter::evaluateCompressPat(Record *Rec) {
SourceOperandMap, DestOperandMap));
}
-static void getReqFeatures(std::map<StringRef, int> &FeaturesMap,
+static void getReqFeatures(std::set<StringRef> &FeaturesSet,
const std::vector<Record *> &ReqFeatures) {
for (auto &R : ReqFeatures) {
StringRef AsmCondString = R->getValueAsString("AssemblerCondString");
@@ -481,11 +484,9 @@ static void getReqFeatures(std::map<StringRef, int> &FeaturesMap,
SmallVector<StringRef, 4> Ops;
SplitString(AsmCondString, Ops, ",");
assert(!Ops.empty() && "AssemblerCondString cannot be empty");
-
for (auto &Op : Ops) {
assert(!Op.empty() && "Empty operator");
- if (FeaturesMap.find(Op) == FeaturesMap.end())
- FeaturesMap[Op] = FeaturesMap.size();
+ FeaturesSet.insert(Op);
}
}
}
@@ -530,7 +531,8 @@ void RISCVCompressInstEmitter::emitCompressInstEmitter(raw_ostream &o,
bool Compress) {
Record *AsmWriter = Target.getAsmWriter();
if (!AsmWriter->getValueAsInt("PassSubtarget"))
- PrintFatalError("'PassSubtarget' is false. SubTargetInfo object is needed "
+ PrintFatalError(AsmWriter->getLoc(),
+ "'PassSubtarget' is false. SubTargetInfo object is needed "
"for target features.\n");
std::string Namespace = Target.getName();
@@ -540,15 +542,15 @@ void RISCVCompressInstEmitter::emitCompressInstEmitter(raw_ostream &o,
// transformed to a C_ADD or a C_MV. When emitting 'uncompress()' function the
// source and destination are flipped and the sort key needs to change
// accordingly.
- std::stable_sort(CompressPatterns.begin(), CompressPatterns.end(),
- [Compress](const CompressPat &LHS, const CompressPat &RHS) {
- if (Compress)
- return (LHS.Source.TheDef->getName().str() <
- RHS.Source.TheDef->getName().str());
- else
- return (LHS.Dest.TheDef->getName().str() <
- RHS.Dest.TheDef->getName().str());
- });
+ llvm::stable_sort(CompressPatterns,
+ [Compress](const CompressPat &LHS, const CompressPat &RHS) {
+ if (Compress)
+ return (LHS.Source.TheDef->getName().str() <
+ RHS.Source.TheDef->getName().str());
+ else
+ return (LHS.Dest.TheDef->getName().str() <
+ RHS.Dest.TheDef->getName().str());
+ });
// A list of MCOperandPredicates for all operands in use, and the reverse map.
std::vector<const Record *> MCOpPredicates;
@@ -617,9 +619,9 @@ void RISCVCompressInstEmitter::emitCompressInstEmitter(raw_ostream &o,
CaseStream.indent(4) << "case " + Namespace + "::" + CurOp + ": {\n";
}
- std::map<StringRef, int> FeaturesMap;
+ std::set<StringRef> FeaturesSet;
// Add CompressPat required features.
- getReqFeatures(FeaturesMap, CompressPat.PatReqFeatures);
+ getReqFeatures(FeaturesSet, CompressPat.PatReqFeatures);
// Add Dest instruction required features.
std::vector<Record *> ReqFeatures;
@@ -627,11 +629,10 @@ void RISCVCompressInstEmitter::emitCompressInstEmitter(raw_ostream &o,
copy_if(RF, std::back_inserter(ReqFeatures), [](Record *R) {
return R->getValueAsBit("AssemblerMatcherPredicate");
});
- getReqFeatures(FeaturesMap, ReqFeatures);
+ getReqFeatures(FeaturesSet, ReqFeatures);
// Emit checks for all required features.
- for (auto &F : FeaturesMap) {
- StringRef Op = F.first;
+ for (auto &Op : FeaturesSet) {
if (Op[0] == '!')
CondStream.indent(6) << ("!STI.getFeatureBits()[" + Namespace +
"::" + Op.substr(1) + "]")
diff --git a/utils/TableGen/RegisterBankEmitter.cpp b/utils/TableGen/RegisterBankEmitter.cpp
index 879b4162d629..7f6b3931d3de 100644
--- a/utils/TableGen/RegisterBankEmitter.cpp
+++ b/utils/TableGen/RegisterBankEmitter.cpp
@@ -1,9 +1,8 @@
//===- RegisterBankEmitter.cpp - Generate a Register Bank Desc. -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/utils/TableGen/RegisterInfoEmitter.cpp b/utils/TableGen/RegisterInfoEmitter.cpp
index ded54c828bcd..1b619072c814 100644
--- a/utils/TableGen/RegisterInfoEmitter.cpp
+++ b/utils/TableGen/RegisterInfoEmitter.cpp
@@ -1,9 +1,8 @@
//===- RegisterInfoEmitter.cpp - Generate a Register File Desc. -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/utils/TableGen/SDNodeProperties.cpp b/utils/TableGen/SDNodeProperties.cpp
index 343febc99d1e..1843a78aa3cf 100644
--- a/utils/TableGen/SDNodeProperties.cpp
+++ b/utils/TableGen/SDNodeProperties.cpp
@@ -1,9 +1,8 @@
//===- SDNodeProperties.cpp -----------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -39,9 +38,9 @@ unsigned llvm::parseSDPatternOperatorProperties(Record *R) {
} else if (Property->getName() == "SDNPVariadic") {
Properties |= 1 << SDNPVariadic;
} else {
- PrintFatalError("Unknown SD Node property '" +
- Property->getName() + "' on node '" +
- R->getName() + "'!");
+ PrintFatalError(R->getLoc(), "Unknown SD Node property '" +
+ Property->getName() + "' on node '" +
+ R->getName() + "'!");
}
}
diff --git a/utils/TableGen/SDNodeProperties.h b/utils/TableGen/SDNodeProperties.h
index a8d4efb5dab0..66a04e63150c 100644
--- a/utils/TableGen/SDNodeProperties.h
+++ b/utils/TableGen/SDNodeProperties.h
@@ -1,9 +1,8 @@
//===- SDNodeProperties.h ---------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/utils/TableGen/SearchableTableEmitter.cpp b/utils/TableGen/SearchableTableEmitter.cpp
index f98a7c74bf0c..954b63e7253c 100644
--- a/utils/TableGen/SearchableTableEmitter.cpp
+++ b/utils/TableGen/SearchableTableEmitter.cpp
@@ -1,9 +1,8 @@
//===- SearchableTableEmitter.cpp - Generate efficiently searchable tables -==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -142,7 +141,7 @@ private:
bool compareBy(Record *LHS, Record *RHS, const SearchIndex &Index);
bool isIntegral(Init *I) {
- return isa<BitsInit>(I) || isIntrinsic(I);
+ return isa<BitsInit>(I) || isa<CodeInit>(I) || isIntrinsic(I);
}
std::string searchableFieldType(const GenericField &Field, TypeContext Ctx) {
@@ -600,9 +599,10 @@ void SearchableTableEmitter::collectTableEntries(
for (auto &Field : Table.Fields) {
auto TI = dyn_cast<TypedInit>(EntryRec->getValueInit(Field.Name));
if (!TI) {
- PrintFatalError(Twine("Record '") + EntryRec->getName() +
- "' in table '" + Table.Name + "' is missing field '" +
- Field.Name + "'");
+ PrintFatalError(EntryRec->getLoc(),
+ Twine("Record '") + EntryRec->getName() +
+ "' in table '" + Table.Name +
+ "' is missing field '" + Field.Name + "'");
}
if (!Field.RecType) {
Field.RecType = TI->getType();
@@ -611,7 +611,7 @@ void SearchableTableEmitter::collectTableEntries(
if (!Ty)
PrintFatalError(Twine("Field '") + Field.Name + "' of table '" +
Table.Name + "' has incompatible type: " +
- Ty->getAsString() + " vs. " +
+ Field.RecType->getAsString() + " vs. " +
TI->getType()->getAsString());
Field.RecType = Ty;
}
@@ -654,8 +654,8 @@ void SearchableTableEmitter::run(raw_ostream &OS) {
StringRef FilterClass = EnumRec->getValueAsString("FilterClass");
Enum->Class = Records.getClass(FilterClass);
if (!Enum->Class)
- PrintFatalError(Twine("Enum FilterClass '") + FilterClass +
- "' does not exist");
+ PrintFatalError(EnumRec->getLoc(), Twine("Enum FilterClass '") +
+ FilterClass + "' does not exist");
collectEnumEntries(*Enum, NameField, ValueField,
Records.getAllDerivedDefinitions(FilterClass));
@@ -675,9 +675,10 @@ void SearchableTableEmitter::run(raw_ostream &OS) {
if (auto TypeOfVal = TableRec->getValue(("TypeOf_" + FieldName).str())) {
if (!parseFieldType(Table->Fields.back(), TypeOfVal->getValue())) {
- PrintFatalError(Twine("Table '") + Table->Name +
- "' has bad 'TypeOf_" + FieldName + "': " +
- TypeOfVal->getValue()->getAsString());
+ PrintFatalError(TableRec->getLoc(),
+ Twine("Table '") + Table->Name +
+ "' has bad 'TypeOf_" + FieldName +
+ "': " + TypeOfVal->getValue()->getAsString());
}
}
}
@@ -705,8 +706,10 @@ void SearchableTableEmitter::run(raw_ostream &OS) {
Record *TableRec = IndexRec->getValueAsDef("Table");
auto It = TableMap.find(TableRec);
if (It == TableMap.end())
- PrintFatalError(Twine("SearchIndex '") + IndexRec->getName() +
- "' refers to non-existing table '" + TableRec->getName());
+ PrintFatalError(IndexRec->getLoc(),
+ Twine("SearchIndex '") + IndexRec->getName() +
+ "' refers to non-existing table '" +
+ TableRec->getName());
GenericTable &Table = *It->second;
Table.Indices.push_back(parseSearchIndex(
diff --git a/utils/TableGen/SequenceToOffsetTable.h b/utils/TableGen/SequenceToOffsetTable.h
index 2b8f66a3bf3e..8a826eff311d 100644
--- a/utils/TableGen/SequenceToOffsetTable.h
+++ b/utils/TableGen/SequenceToOffsetTable.h
@@ -1,9 +1,8 @@
//===-- SequenceToOffsetTable.h - Compress similar sequences ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/utils/TableGen/SubtargetEmitter.cpp b/utils/TableGen/SubtargetEmitter.cpp
index 731c14bdb9a0..9ce2b3b275c8 100644
--- a/utils/TableGen/SubtargetEmitter.cpp
+++ b/utils/TableGen/SubtargetEmitter.cpp
@@ -1,9 +1,8 @@
//===- SubtargetEmitter.cpp - Generate subtarget enumerations -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -74,9 +73,11 @@ class SubtargetEmitter {
CodeGenSchedModels &SchedModels;
std::string Target;
- void Enumeration(raw_ostream &OS);
- unsigned FeatureKeyValues(raw_ostream &OS);
- unsigned CPUKeyValues(raw_ostream &OS);
+ void Enumeration(raw_ostream &OS, DenseMap<Record *, unsigned> &FeatureMap);
+ unsigned FeatureKeyValues(raw_ostream &OS,
+ const DenseMap<Record *, unsigned> &FeatureMap);
+ unsigned CPUKeyValues(raw_ostream &OS,
+ const DenseMap<Record *, unsigned> &FeatureMap);
void FormItineraryStageString(const std::string &Names,
Record *ItinData, std::string &ItinString,
unsigned &NStages);
@@ -138,7 +139,8 @@ public:
//
// Enumeration - Emit the specified class as an enumeration.
//
-void SubtargetEmitter::Enumeration(raw_ostream &OS) {
+void SubtargetEmitter::Enumeration(raw_ostream &OS,
+ DenseMap<Record *, unsigned> &FeatureMap) {
// Get all records of class and sort
std::vector<Record*> DefList =
Records.getAllDerivedDefinitions("SubtargetFeature");
@@ -147,7 +149,7 @@ void SubtargetEmitter::Enumeration(raw_ostream &OS) {
unsigned N = DefList.size();
if (N == 0)
return;
- if (N > MAX_SUBTARGET_FEATURES)
+ if (N + 1 > MAX_SUBTARGET_FEATURES)
PrintFatalError("Too many subtarget features! Bump MAX_SUBTARGET_FEATURES.");
OS << "namespace " << Target << " {\n";
@@ -162,18 +164,42 @@ void SubtargetEmitter::Enumeration(raw_ostream &OS) {
// Get and emit name
OS << " " << Def->getName() << " = " << i << ",\n";
+
+ // Save the index for this feature.
+ FeatureMap[Def] = i;
}
+ OS << " "
+ << "NumSubtargetFeatures = " << N << "\n";
+
// Close enumeration and namespace
OS << "};\n";
OS << "} // end namespace " << Target << "\n";
}
+static void printFeatureMask(raw_ostream &OS, RecVec &FeatureList,
+ const DenseMap<Record *, unsigned> &FeatureMap) {
+ std::array<uint64_t, MAX_SUBTARGET_WORDS> Mask = {};
+ for (unsigned j = 0, M = FeatureList.size(); j < M; ++j) {
+ unsigned Bit = FeatureMap.lookup(FeatureList[j]);
+ Mask[Bit / 64] |= 1ULL << (Bit % 64);
+ }
+
+ OS << "{ { { ";
+ for (unsigned i = 0; i != Mask.size(); ++i) {
+ OS << "0x";
+ OS.write_hex(Mask[i]);
+ OS << "ULL, ";
+ }
+ OS << "} } }";
+}
+
//
// FeatureKeyValues - Emit data of all the subtarget features. Used by the
// command line.
//
-unsigned SubtargetEmitter::FeatureKeyValues(raw_ostream &OS) {
+unsigned SubtargetEmitter::FeatureKeyValues(
+ raw_ostream &OS, const DenseMap<Record *, unsigned> &FeatureMap) {
// Gather and sort all the features
std::vector<Record*> FeatureList =
Records.getAllDerivedDefinitions("SubtargetFeature");
@@ -204,16 +230,13 @@ unsigned SubtargetEmitter::FeatureKeyValues(raw_ostream &OS) {
OS << " { "
<< "\"" << CommandLineName << "\", "
<< "\"" << Desc << "\", "
- << "{ " << Target << "::" << Name << " }, ";
+ << Target << "::" << Name << ", ";
RecVec ImpliesList = Feature->getValueAsListOfDefs("Implies");
- OS << "{";
- for (unsigned j = 0, M = ImpliesList.size(); j < M;) {
- OS << " " << Target << "::" << ImpliesList[j]->getName();
- if (++j < M) OS << ",";
- }
- OS << " } },\n";
+ printFeatureMask(OS, ImpliesList, FeatureMap);
+
+ OS << " },\n";
++NumFeatures;
}
@@ -227,7 +250,9 @@ unsigned SubtargetEmitter::FeatureKeyValues(raw_ostream &OS) {
// CPUKeyValues - Emit data of all the subtarget processors. Used by command
// line.
//
-unsigned SubtargetEmitter::CPUKeyValues(raw_ostream &OS) {
+unsigned
+SubtargetEmitter::CPUKeyValues(raw_ostream &OS,
+ const DenseMap<Record *, unsigned> &FeatureMap) {
// Gather and sort processor information
std::vector<Record*> ProcessorList =
Records.getAllDerivedDefinitions("Processor");
@@ -235,7 +260,7 @@ unsigned SubtargetEmitter::CPUKeyValues(raw_ostream &OS) {
// Begin processor table
OS << "// Sorted (by key) array of values for CPU subtype.\n"
- << "extern const llvm::SubtargetFeatureKV " << Target
+ << "extern const llvm::SubtargetSubTypeKV " << Target
<< "SubTypeKV[] = {\n";
// For each processor
@@ -243,18 +268,16 @@ unsigned SubtargetEmitter::CPUKeyValues(raw_ostream &OS) {
StringRef Name = Processor->getValueAsString("Name");
RecVec FeatureList = Processor->getValueAsListOfDefs("Features");
- // Emit as { "cpu", "description", { f1 , f2 , ... fn } },
- OS << " { "
- << "\"" << Name << "\", "
- << "\"Select the " << Name << " processor\", ";
+ // Emit as { "cpu", "description", 0, { f1 , f2 , ... fn } },
+ OS << " { "
+ << "\"" << Name << "\", ";
- OS << "{";
- for (unsigned j = 0, M = FeatureList.size(); j < M;) {
- OS << " " << Target << "::" << FeatureList[j]->getName();
- if (++j < M) OS << ",";
- }
- // The { } is for the "implies" section of this data structure.
- OS << " }, { } },\n";
+ printFeatureMask(OS, FeatureList, FeatureMap);
+
+ // Emit the scheduler model pointer.
+ const std::string &ProcModelName =
+ SchedModels.getModelForProc(Processor).ModelName;
+ OS << ", &" << ProcModelName << " },\n";
}
// End processor table
@@ -1369,33 +1392,6 @@ void SubtargetEmitter::EmitProcessorModels(raw_ostream &OS) {
}
//
-// EmitProcessorLookup - generate cpu name to sched model lookup tables.
-//
-void SubtargetEmitter::EmitProcessorLookup(raw_ostream &OS) {
- // Gather and sort processor information
- std::vector<Record*> ProcessorList =
- Records.getAllDerivedDefinitions("Processor");
- llvm::sort(ProcessorList, LessRecordFieldName());
-
- // Begin processor->sched model table
- OS << "\n";
- OS << "// Sorted (by key) array of sched model for CPU subtype.\n"
- << "extern const llvm::SubtargetInfoKV " << Target
- << "ProcSchedKV[] = {\n";
- // For each processor
- for (Record *Processor : ProcessorList) {
- StringRef Name = Processor->getValueAsString("Name");
- const std::string &ProcModelName =
- SchedModels.getModelForProc(Processor).ModelName;
-
- // Emit as { "cpu", procinit },
- OS << " { \"" << Name << "\", (const void *)&" << ProcModelName << " },\n";
- }
- // End processor->sched model table
- OS << "};\n";
-}
-
-//
// EmitSchedModel - Emits all scheduling model tables, folding common patterns.
//
void SubtargetEmitter::EmitSchedModel(raw_ostream &OS) {
@@ -1423,12 +1419,10 @@ void SubtargetEmitter::EmitSchedModel(raw_ostream &OS) {
}
EmitSchedClassTables(SchedTables, OS);
+ OS << "\n#undef DBGFIELD\n";
+
// Emit the processor machine model
EmitProcessorModels(OS);
- // Emit the processor lookup data
- EmitProcessorLookup(OS);
-
- OS << "\n#undef DBGFIELD";
}
static void emitPredicateProlog(const RecordKeeper &Records, raw_ostream &OS) {
@@ -1740,13 +1734,12 @@ void SubtargetEmitter::emitGenMCSubtargetInfo(raw_ostream &OS) {
<< "GenMCSubtargetInfo : public MCSubtargetInfo {\n";
OS << " " << Target << "GenMCSubtargetInfo(const Triple &TT, \n"
<< " StringRef CPU, StringRef FS, ArrayRef<SubtargetFeatureKV> PF,\n"
- << " ArrayRef<SubtargetFeatureKV> PD,\n"
- << " const SubtargetInfoKV *ProcSched,\n"
+ << " ArrayRef<SubtargetSubTypeKV> PD,\n"
<< " const MCWriteProcResEntry *WPR,\n"
<< " const MCWriteLatencyEntry *WL,\n"
<< " const MCReadAdvanceEntry *RA, const InstrStage *IS,\n"
<< " const unsigned *OC, const unsigned *FP) :\n"
- << " MCSubtargetInfo(TT, CPU, FS, PF, PD, ProcSched,\n"
+ << " MCSubtargetInfo(TT, CPU, FS, PF, PD,\n"
<< " WPR, WL, RA, IS, OC, FP) { }\n\n"
<< " unsigned resolveVariantSchedClass(unsigned SchedClass,\n"
<< " const MCInst *MI, unsigned CPUID) const override {\n"
@@ -1790,8 +1783,10 @@ void SubtargetEmitter::run(raw_ostream &OS) {
OS << "\n#ifdef GET_SUBTARGETINFO_ENUM\n";
OS << "#undef GET_SUBTARGETINFO_ENUM\n\n";
+ DenseMap<Record *, unsigned> FeatureMap;
+
OS << "namespace llvm {\n";
- Enumeration(OS);
+ Enumeration(OS, FeatureMap);
OS << "} // end namespace llvm\n\n";
OS << "#endif // GET_SUBTARGETINFO_ENUM\n\n";
@@ -1802,12 +1797,12 @@ void SubtargetEmitter::run(raw_ostream &OS) {
#if 0
OS << "namespace {\n";
#endif
- unsigned NumFeatures = FeatureKeyValues(OS);
- OS << "\n";
- unsigned NumProcs = CPUKeyValues(OS);
+ unsigned NumFeatures = FeatureKeyValues(OS, FeatureMap);
OS << "\n";
EmitSchedModel(OS);
OS << "\n";
+ unsigned NumProcs = CPUKeyValues(OS, FeatureMap);
+ OS << "\n";
#if 0
OS << "} // end anonymous namespace\n\n";
#endif
@@ -1828,8 +1823,7 @@ void SubtargetEmitter::run(raw_ostream &OS) {
else
OS << "None, ";
OS << '\n'; OS.indent(22);
- OS << Target << "ProcSchedKV, "
- << Target << "WriteProcResTable, "
+ OS << Target << "WriteProcResTable, "
<< Target << "WriteLatencyTable, "
<< Target << "ReadAdvanceTable, ";
OS << '\n'; OS.indent(22);
@@ -1895,8 +1889,7 @@ void SubtargetEmitter::run(raw_ostream &OS) {
OS << "#include \"llvm/CodeGen/TargetSchedule.h\"\n\n";
OS << "namespace llvm {\n";
OS << "extern const llvm::SubtargetFeatureKV " << Target << "FeatureKV[];\n";
- OS << "extern const llvm::SubtargetFeatureKV " << Target << "SubTypeKV[];\n";
- OS << "extern const llvm::SubtargetInfoKV " << Target << "ProcSchedKV[];\n";
+ OS << "extern const llvm::SubtargetSubTypeKV " << Target << "SubTypeKV[];\n";
OS << "extern const llvm::MCWriteProcResEntry "
<< Target << "WriteProcResTable[];\n";
OS << "extern const llvm::MCWriteLatencyEntry "
@@ -1922,8 +1915,7 @@ void SubtargetEmitter::run(raw_ostream &OS) {
else
OS << "None, ";
OS << '\n'; OS.indent(24);
- OS << Target << "ProcSchedKV, "
- << Target << "WriteProcResTable, "
+ OS << Target << "WriteProcResTable, "
<< Target << "WriteLatencyTable, "
<< Target << "ReadAdvanceTable, ";
OS << '\n'; OS.indent(24);
diff --git a/utils/TableGen/SubtargetFeatureInfo.cpp b/utils/TableGen/SubtargetFeatureInfo.cpp
index f9b8853cc117..edf0b4a01c6d 100644
--- a/utils/TableGen/SubtargetFeatureInfo.cpp
+++ b/utils/TableGen/SubtargetFeatureInfo.cpp
@@ -1,9 +1,8 @@
//===- SubtargetFeatureInfo.cpp - Helpers for subtarget features ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -45,20 +44,6 @@ SubtargetFeatureInfo::getAll(const RecordKeeper &Records) {
return SubtargetFeatures;
}
-void SubtargetFeatureInfo::emitSubtargetFeatureFlagEnumeration(
- SubtargetFeatureInfoMap &SubtargetFeatures, raw_ostream &OS) {
- OS << "// Flags for subtarget features that participate in "
- << "instruction matching.\n";
- OS << "enum SubtargetFeatureFlag : "
- << getMinimalTypeForEnumBitfield(SubtargetFeatures.size()) << " {\n";
- for (const auto &SF : SubtargetFeatures) {
- const SubtargetFeatureInfo &SFI = SF.second;
- OS << " " << SFI.getEnumName() << " = (1ULL << " << SFI.Index << "),\n";
- }
- OS << " Feature_None = 0\n";
- OS << "};\n\n";
-}
-
void SubtargetFeatureInfo::emitSubtargetFeatureBitEnumeration(
SubtargetFeatureInfoMap &SubtargetFeatures, raw_ostream &OS) {
OS << "// Bits for subtarget features that participate in "
@@ -121,9 +106,9 @@ void SubtargetFeatureInfo::emitComputeAvailableFeatures(
void SubtargetFeatureInfo::emitComputeAssemblerAvailableFeatures(
StringRef TargetName, StringRef ClassName, StringRef FuncName,
SubtargetFeatureInfoMap &SubtargetFeatures, raw_ostream &OS) {
- OS << "uint64_t " << TargetName << ClassName << "::\n"
+ OS << "FeatureBitset " << TargetName << ClassName << "::\n"
<< FuncName << "(const FeatureBitset& FB) const {\n";
- OS << " uint64_t Features = 0;\n";
+ OS << " FeatureBitset Features;\n";
for (const auto &SF : SubtargetFeatures) {
const SubtargetFeatureInfo &SFI = SF.second;
@@ -157,7 +142,7 @@ void SubtargetFeatureInfo::emitComputeAssemblerAvailableFeatures(
} while (true);
OS << ")\n";
- OS << " Features |= " << SFI.getEnumName() << ";\n";
+ OS << " Features[" << SFI.getEnumBitName() << "] = 1;\n";
}
OS << " return Features;\n";
OS << "}\n\n";
diff --git a/utils/TableGen/SubtargetFeatureInfo.h b/utils/TableGen/SubtargetFeatureInfo.h
index 71e6748c863f..d72f8b93461f 100644
--- a/utils/TableGen/SubtargetFeatureInfo.h
+++ b/utils/TableGen/SubtargetFeatureInfo.h
@@ -1,9 +1,8 @@
-//===- SubtargetFeatureInfo.h - Helpers for subtarget features ------------===//
+//===- SubtargetFeatureInfo.h - Helpers for subtarget features --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -56,13 +55,6 @@ struct SubtargetFeatureInfo {
/// Emit the subtarget feature flag definitions.
///
- /// This version emits the bit value for the feature and is therefore limited
- /// to 64 feature bits.
- static void emitSubtargetFeatureFlagEnumeration(
- SubtargetFeatureInfoMap &SubtargetFeatures, raw_ostream &OS);
-
- /// Emit the subtarget feature flag definitions.
- ///
/// This version emits the bit index for the feature and can therefore support
/// more than 64 feature bits.
static void
diff --git a/utils/TableGen/TableGen.cpp b/utils/TableGen/TableGen.cpp
index d5b6a3c12647..c485ed2feb7a 100644
--- a/utils/TableGen/TableGen.cpp
+++ b/utils/TableGen/TableGen.cpp
@@ -1,9 +1,8 @@
//===- TableGen.cpp - Top-Level TableGen implementation for LLVM ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -56,6 +55,12 @@ enum ActionType {
GenExegesis,
};
+namespace llvm {
+/// Storage for TimeRegionsOpt as a global so that backends aren't required to
+/// include CommandLine.h
+bool TimeRegions = false;
+} // end namespace llvm
+
namespace {
cl::opt<ActionType>
Action(cl::desc("Action to perform:"),
@@ -127,6 +132,11 @@ namespace {
Class("class", cl::desc("Print Enum list for this class"),
cl::value_desc("class name"), cl::cat(PrintEnumsCat));
+cl::opt<bool, true>
+ TimeRegionsOpt("time-regions",
+ cl::desc("Time regions of tablegens execution"),
+ cl::location(TimeRegions));
+
bool LLVMTableGenMain(raw_ostream &OS, RecordKeeper &Records) {
switch (Action) {
case PrintRecords:
diff --git a/utils/TableGen/TableGenBackends.h b/utils/TableGen/TableGenBackends.h
index f4f2909f8e88..135ec65c0f95 100644
--- a/utils/TableGen/TableGenBackends.h
+++ b/utils/TableGen/TableGenBackends.h
@@ -1,9 +1,8 @@
//===- TableGenBackends.h - Declarations for LLVM TableGen Backends -------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/utils/TableGen/Types.cpp b/utils/TableGen/Types.cpp
index 04d9e40f6743..a6682da90e6b 100644
--- a/utils/TableGen/Types.cpp
+++ b/utils/TableGen/Types.cpp
@@ -1,9 +1,8 @@
//===- Types.cpp - Helper for the selection of C++ data types. ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/utils/TableGen/Types.h b/utils/TableGen/Types.h
index d511b7eae6e8..17c7742ccaac 100644
--- a/utils/TableGen/Types.h
+++ b/utils/TableGen/Types.h
@@ -1,9 +1,8 @@
//===- Types.h - Helper for the selection of C++ types. ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp b/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp
index 788f142e125f..365cba5a60ca 100644
--- a/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp
+++ b/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp
@@ -1,9 +1,8 @@
//===- WebAssemblyDisassemblerEmitter.cpp - Disassembler tables -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -51,12 +50,33 @@ void emitWebAssemblyDisassemblerTables(
auto IsStackBased =
StackString &&
reinterpret_cast<const StringInit *>(StackString)->getValue() == "true";
- if (IsStackBased && !CGIP.second) {
- // this picks the first of many typed variants, which is
- // currently the except_ref one, though this shouldn't matter for
- // disassembly purposes.
- CGIP = std::make_pair(I, &CGI);
+ if (!IsStackBased)
+ continue;
+ if (CGIP.second) {
+ // We already have an instruction for this slot, so decide which one
+ // should be the canonical one. This determines which variant gets
+ // printed in a disassembly. We want e.g. "call" not "i32.call", and
+ // "end" when we don't know if its "end_loop" or "end_block" etc.
+ auto IsCanonicalExisting = CGIP.second->TheDef->getValue("IsCanonical")
+ ->getValue()
+ ->getAsString() == "1";
+ // We already have one marked explicitly as canonical, so keep it.
+ if (IsCanonicalExisting)
+ continue;
+ auto IsCanonicalNew =
+ Def.getValue("IsCanonical")->getValue()->getAsString() == "1";
+ // If the new one is explicitly marked as canonical, take it.
+ if (!IsCanonicalNew) {
+ // Neither the existing or new instruction is canonical.
+ // Pick the one with the shortest name as heuristic.
+ // Though ideally IsCanonical is always defined for at least one
+ // variant so this never has to apply.
+ if (CGIP.second->AsmString.size() <= CGI.AsmString.size())
+ continue;
+ }
}
+ // Set this instruction as the one to use.
+ CGIP = std::make_pair(I, &CGI);
}
OS << "#include \"MCTargetDesc/WebAssemblyMCTargetDesc.h\"\n";
OS << "\n";
diff --git a/utils/TableGen/WebAssemblyDisassemblerEmitter.h b/utils/TableGen/WebAssemblyDisassemblerEmitter.h
index 91f820f120a2..60d3d9433eca 100644
--- a/utils/TableGen/WebAssemblyDisassemblerEmitter.h
+++ b/utils/TableGen/WebAssemblyDisassemblerEmitter.h
@@ -1,9 +1,8 @@
//===- WebAssemblyDisassemblerEmitter.h - Disassembler tables ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/utils/TableGen/X86DisassemblerShared.h b/utils/TableGen/X86DisassemblerShared.h
index 220765f72410..093f220fda5e 100644
--- a/utils/TableGen/X86DisassemblerShared.h
+++ b/utils/TableGen/X86DisassemblerShared.h
@@ -1,9 +1,8 @@
//===- X86DisassemblerShared.h - Emitter shared header ----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/utils/TableGen/X86DisassemblerTables.cpp b/utils/TableGen/X86DisassemblerTables.cpp
index 2b5cc1279605..8036aecc4f4b 100644
--- a/utils/TableGen/X86DisassemblerTables.cpp
+++ b/utils/TableGen/X86DisassemblerTables.cpp
@@ -1,9 +1,8 @@
//===- X86DisassemblerTables.cpp - Disassembler tables ----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -889,67 +888,44 @@ void DisassemblerTables::emitInstructionInfo(raw_ostream &o,
}
void DisassemblerTables::emitContextTable(raw_ostream &o, unsigned &i) const {
- const unsigned int tableSize = 16384;
o.indent(i * 2) << "static const uint8_t " CONTEXTS_STR
- "[" << tableSize << "] = {\n";
+ "[" << ATTR_max << "] = {\n";
i++;
- for (unsigned index = 0; index < tableSize; ++index) {
+ for (unsigned index = 0; index < ATTR_max; ++index) {
o.indent(i * 2);
- if (index & ATTR_EVEX) {
- o << "IC_EVEX";
- if (index & ATTR_EVEXL2)
+ if ((index & ATTR_EVEX) || (index & ATTR_VEX) || (index & ATTR_VEXL)) {
+ if (index & ATTR_EVEX)
+ o << "IC_EVEX";
+ else
+ o << "IC_VEX";
+
+ if ((index & ATTR_EVEX) && (index & ATTR_EVEXL2))
o << "_L2";
- else if (index & ATTR_EVEXL)
+ else if (index & ATTR_VEXL)
o << "_L";
+
if (index & ATTR_REXW)
o << "_W";
+
if (index & ATTR_OPSIZE)
o << "_OPSIZE";
else if (index & ATTR_XD)
o << "_XD";
else if (index & ATTR_XS)
o << "_XS";
- if (index & ATTR_EVEXKZ)
- o << "_KZ";
- else if (index & ATTR_EVEXK)
- o << "_K";
- if (index & ATTR_EVEXB)
- o << "_B";
+
+ if ((index & ATTR_EVEX)) {
+ if (index & ATTR_EVEXKZ)
+ o << "_KZ";
+ else if (index & ATTR_EVEXK)
+ o << "_K";
+
+ if (index & ATTR_EVEXB)
+ o << "_B";
+ }
}
- else if ((index & ATTR_VEXL) && (index & ATTR_REXW) && (index & ATTR_OPSIZE))
- o << "IC_VEX_L_W_OPSIZE";
- else if ((index & ATTR_VEXL) && (index & ATTR_REXW) && (index & ATTR_XD))
- o << "IC_VEX_L_W_XD";
- else if ((index & ATTR_VEXL) && (index & ATTR_REXW) && (index & ATTR_XS))
- o << "IC_VEX_L_W_XS";
- else if ((index & ATTR_VEXL) && (index & ATTR_REXW))
- o << "IC_VEX_L_W";
- else if ((index & ATTR_VEXL) && (index & ATTR_OPSIZE))
- o << "IC_VEX_L_OPSIZE";
- else if ((index & ATTR_VEXL) && (index & ATTR_XD))
- o << "IC_VEX_L_XD";
- else if ((index & ATTR_VEXL) && (index & ATTR_XS))
- o << "IC_VEX_L_XS";
- else if ((index & ATTR_VEX) && (index & ATTR_REXW) && (index & ATTR_OPSIZE))
- o << "IC_VEX_W_OPSIZE";
- else if ((index & ATTR_VEX) && (index & ATTR_REXW) && (index & ATTR_XD))
- o << "IC_VEX_W_XD";
- else if ((index & ATTR_VEX) && (index & ATTR_REXW) && (index & ATTR_XS))
- o << "IC_VEX_W_XS";
- else if (index & ATTR_VEXL)
- o << "IC_VEX_L";
- else if ((index & ATTR_VEX) && (index & ATTR_REXW))
- o << "IC_VEX_W";
- else if ((index & ATTR_VEX) && (index & ATTR_OPSIZE))
- o << "IC_VEX_OPSIZE";
- else if ((index & ATTR_VEX) && (index & ATTR_XD))
- o << "IC_VEX_XD";
- else if ((index & ATTR_VEX) && (index & ATTR_XS))
- o << "IC_VEX_XS";
- else if (index & ATTR_VEX)
- o << "IC_VEX";
else if ((index & ATTR_64BIT) && (index & ATTR_REXW) && (index & ATTR_XS))
o << "IC_64BIT_REXW_XS";
else if ((index & ATTR_64BIT) && (index & ATTR_REXW) && (index & ATTR_XD))
@@ -1004,12 +980,7 @@ void DisassemblerTables::emitContextTable(raw_ostream &o, unsigned &i) const {
else
o << "IC";
- if (index < tableSize - 1)
- o << ",";
- else
- o << " ";
-
- o << " /* " << index << " */";
+ o << ", /* " << index << " */";
o << "\n";
}
diff --git a/utils/TableGen/X86DisassemblerTables.h b/utils/TableGen/X86DisassemblerTables.h
index b0ea9c2e8625..63af68b6fbfa 100644
--- a/utils/TableGen/X86DisassemblerTables.h
+++ b/utils/TableGen/X86DisassemblerTables.h
@@ -1,9 +1,8 @@
//===- X86DisassemblerTables.h - Disassembler tables ------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp b/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp
index d5dc10ecad25..3df14f40e4a9 100644
--- a/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp
+++ b/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp
@@ -1,9 +1,8 @@
//===- utils/TableGen/X86EVEX2VEXTablesEmitter.cpp - X86 backend-*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -69,23 +68,6 @@ void X86EVEX2VEXTablesEmitter::printTable(const std::vector<Entry> &Table,
}
// Return true if the 2 BitsInits are equal
-static inline bool equalBitsInits(const BitsInit *B1, const BitsInit *B2) {
- if (B1->getNumBits() != B2->getNumBits())
- PrintFatalError("Comparing two BitsInits with different sizes!");
-
- for (unsigned i = 0, e = B1->getNumBits(); i != e; ++i) {
- if (BitInit *Bit1 = dyn_cast<BitInit>(B1->getBit(i))) {
- if (BitInit *Bit2 = dyn_cast<BitInit>(B2->getBit(i))) {
- if (Bit1->getValue() != Bit2->getValue())
- return false;
- } else
- PrintFatalError("Invalid BitsInit bit");
- } else
- PrintFatalError("Invalid BitsInit bit");
- }
- return true;
-}
-
// Calculates the integer value residing BitsInit object
static inline uint64_t getValueFromBitsInit(const BitsInit *B) {
uint64_t Value = 0;
@@ -109,26 +91,25 @@ public:
bool operator()(const CodeGenInstruction *VEXInst) {
Record *RecE = EVEXInst->TheDef;
Record *RecV = VEXInst->TheDef;
- uint64_t EVEX_W =
- getValueFromBitsInit(RecE->getValueAsBitsInit("VEX_WPrefix"));
- uint64_t VEX_W =
- getValueFromBitsInit(RecV->getValueAsBitsInit("VEX_WPrefix"));
+ bool EVEX_W = RecE->getValueAsBit("HasVEX_W");
+ bool VEX_W = RecV->getValueAsBit("HasVEX_W");
+ bool VEX_WIG = RecV->getValueAsBit("IgnoresVEX_W");
+ bool EVEX_WIG = RecE->getValueAsBit("IgnoresVEX_W");
+ bool EVEX_W1_VEX_W0 = RecE->getValueAsBit("EVEX_W1_VEX_W0");
if (RecV->getValueAsDef("OpEnc")->getName().str() != "EncVEX" ||
// VEX/EVEX fields
RecV->getValueAsDef("OpPrefix") != RecE->getValueAsDef("OpPrefix") ||
RecV->getValueAsDef("OpMap") != RecE->getValueAsDef("OpMap") ||
RecV->getValueAsBit("hasVEX_4V") != RecE->getValueAsBit("hasVEX_4V") ||
- !equalBitsInits(RecV->getValueAsBitsInit("EVEX_LL"),
- RecE->getValueAsBitsInit("EVEX_LL")) ||
+ RecV->getValueAsBit("hasEVEX_L2") != RecE->getValueAsBit("hasEVEX_L2") ||
+ RecV->getValueAsBit("hasVEX_L") != RecE->getValueAsBit("hasVEX_L") ||
// Match is allowed if either is VEX_WIG, or they match, or EVEX
// is VEX_W1X and VEX is VEX_W0.
- (!(EVEX_W == 2 || VEX_W == 2 || EVEX_W == VEX_W ||
- (EVEX_W == 3 && VEX_W == 0))) ||
+ (!(VEX_WIG || (!EVEX_WIG && EVEX_W == VEX_W) ||
+ (EVEX_W1_VEX_W0 && EVEX_W && !VEX_W))) ||
// Instruction's format
- RecV->getValueAsDef("Form") != RecE->getValueAsDef("Form") ||
- RecV->getValueAsBit("isAsmParserOnly") !=
- RecE->getValueAsBit("isAsmParserOnly"))
+ RecV->getValueAsDef("Form") != RecE->getValueAsDef("Form"))
return false;
// This is needed for instructions with intrinsic version (_Int).
@@ -150,8 +131,9 @@ public:
} else if (isMemoryOperand(OpRec1) && isMemoryOperand(OpRec2)) {
return false;
} else if (isImmediateOperand(OpRec1) && isImmediateOperand(OpRec2)) {
- if (OpRec1->getValueAsDef("Type") != OpRec2->getValueAsDef("Type"))
+ if (OpRec1->getValueAsDef("Type") != OpRec2->getValueAsDef("Type")) {
return false;
+ }
} else
return false;
}
@@ -207,8 +189,7 @@ void X86EVEX2VEXTablesEmitter::run(raw_ostream &OS) {
else if (Inst->TheDef->getValueAsDef("OpEnc")->getName() == "EncEVEX" &&
!Inst->TheDef->getValueAsBit("hasEVEX_K") &&
!Inst->TheDef->getValueAsBit("hasEVEX_B") &&
- getValueFromBitsInit(Inst->TheDef->
- getValueAsBitsInit("EVEX_LL")) != 2 &&
+ !Inst->TheDef->getValueAsBit("hasEVEX_L2") &&
!Inst->TheDef->getValueAsBit("notEVEX2VEXConvertible"))
EVEXInsts.push_back(Inst);
}
@@ -236,17 +217,10 @@ void X86EVEX2VEXTablesEmitter::run(raw_ostream &OS) {
continue;
// In case a match is found add new entry to the appropriate table
- switch (getValueFromBitsInit(
- EVEXInst->TheDef->getValueAsBitsInit("EVEX_LL"))) {
- case 0:
- EVEX2VEX128.push_back(std::make_pair(EVEXInst, VEXInst)); // {0,0}
- break;
- case 1:
+ if (EVEXInst->TheDef->getValueAsBit("hasVEX_L"))
EVEX2VEX256.push_back(std::make_pair(EVEXInst, VEXInst)); // {0,1}
- break;
- default:
- llvm_unreachable("Instruction's size not fit for the mapping!");
- }
+ else
+ EVEX2VEX128.push_back(std::make_pair(EVEXInst, VEXInst)); // {0,0}
}
// Print both tables
diff --git a/utils/TableGen/X86FoldTablesEmitter.cpp b/utils/TableGen/X86FoldTablesEmitter.cpp
index 1ea668643575..2c15e35f234d 100644
--- a/utils/TableGen/X86FoldTablesEmitter.cpp
+++ b/utils/TableGen/X86FoldTablesEmitter.cpp
@@ -1,9 +1,8 @@
//===- utils/TableGen/X86FoldTablesEmitter.cpp - X86 backend-*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -14,6 +13,7 @@
#include "CodeGenTarget.h"
#include "X86RecognizableInstr.h"
+#include "llvm/Support/FormattedStream.h"
#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/TableGenBackend.h"
@@ -62,9 +62,12 @@ const ManualMapEntry ManualMapSet[] = {
{ "ADD64ri32_DB", "ADD64mi32", NO_UNFOLD },
{ "ADD64ri8_DB", "ADD64mi8", NO_UNFOLD },
{ "ADD64rr_DB", "ADD64mr", NO_UNFOLD },
+ { "ADD8ri_DB", "ADD8mi", NO_UNFOLD },
+ { "ADD8rr_DB", "ADD8mr", NO_UNFOLD },
{ "ADD16rr_DB", "ADD16rm", NO_UNFOLD },
{ "ADD32rr_DB", "ADD32rm", NO_UNFOLD },
{ "ADD64rr_DB", "ADD64rm", NO_UNFOLD },
+ { "ADD8rr_DB", "ADD8rm", NO_UNFOLD },
{ "PUSH16r", "PUSH16rmm", UNFOLD },
{ "PUSH32r", "PUSH32rmm", UNFOLD },
{ "PUSH64r", "PUSH64rmm", UNFOLD },
@@ -106,23 +109,23 @@ class X86FoldTablesEmitter {
const CodeGenInstruction *MemInst)
: RegInst(RegInst), MemInst(MemInst) {}
- friend raw_ostream &operator<<(raw_ostream &OS,
- const X86FoldTableEntry &E) {
- OS << "{ X86::" << E.RegInst->TheDef->getName()
- << ", X86::" << E.MemInst->TheDef->getName() << ", ";
+ void print(formatted_raw_ostream &OS) const {
+ OS.indent(2);
+ OS << "{ X86::" << RegInst->TheDef->getName() << ",";
+ OS.PadToColumn(40);
+ OS << "X86::" << MemInst->TheDef->getName() << ",";
+ OS.PadToColumn(75);
- if (E.IsLoad)
+ if (IsLoad)
OS << "TB_FOLDED_LOAD | ";
- if (E.IsStore)
+ if (IsStore)
OS << "TB_FOLDED_STORE | ";
- if (E.CannotUnfold)
+ if (CannotUnfold)
OS << "TB_NO_REVERSE | ";
- if (E.IsAligned)
- OS << "TB_ALIGN_" << E.Alignment << " | ";
+ if (IsAligned)
+ OS << "TB_ALIGN_" << Alignment << " | ";
OS << "0 },\n";
-
- return OS;
}
};
@@ -142,7 +145,7 @@ public:
X86FoldTablesEmitter(RecordKeeper &R) : Records(R), Target(R) {}
// run - Generate the 6 X86 memory fold tables.
- void run(raw_ostream &OS);
+ void run(formatted_raw_ostream &OS);
private:
// Decides to which table to add the entry with the given instructions.
@@ -160,21 +163,21 @@ private:
// Print the given table as a static const C++ array of type
// X86MemoryFoldTableEntry.
void printTable(const FoldTable &Table, StringRef TableName,
- raw_ostream &OS) {
+ formatted_raw_ostream &OS) {
OS << "static const X86MemoryFoldTableEntry MemoryFold" << TableName
<< "[] = {\n";
for (const X86FoldTableEntry &E : Table)
- OS << E;
+ E.print(OS);
- OS << "};\n";
+ OS << "};\n\n";
}
};
// Return true if one of the instruction's operands is a RST register class
static bool hasRSTRegClass(const CodeGenInstruction *Inst) {
return any_of(Inst->Operands, [](const CGIOperandList::OperandInfo &OpIn) {
- return OpIn.Rec->getName() == "RST";
+ return OpIn.Rec->getName() == "RST" || OpIn.Rec->getName() == "RSTi";
});
}
@@ -345,10 +348,18 @@ public:
MemRec->getValueAsBit("hasLockPrefix") ||
RegRec->getValueAsBit("hasNoTrackPrefix") !=
MemRec->getValueAsBit("hasNoTrackPrefix") ||
- !equalBitsInits(RegRec->getValueAsBitsInit("EVEX_LL"),
- MemRec->getValueAsBitsInit("EVEX_LL")) ||
- !equalBitsInits(RegRec->getValueAsBitsInit("VEX_WPrefix"),
- MemRec->getValueAsBitsInit("VEX_WPrefix")) ||
+ RegRec->getValueAsBit("hasVEX_L") !=
+ MemRec->getValueAsBit("hasVEX_L") ||
+ RegRec->getValueAsBit("hasEVEX_L2") !=
+ MemRec->getValueAsBit("hasEVEX_L2") ||
+ RegRec->getValueAsBit("ignoresVEX_L") !=
+ MemRec->getValueAsBit("ignoresVEX_L") ||
+ RegRec->getValueAsBit("HasVEX_W") !=
+ MemRec->getValueAsBit("HasVEX_W") ||
+ RegRec->getValueAsBit("IgnoresVEX_W") !=
+ MemRec->getValueAsBit("IgnoresVEX_W") ||
+ RegRec->getValueAsBit("EVEX_W1_VEX_W0") !=
+ MemRec->getValueAsBit("EVEX_W1_VEX_W0") ||
// Instruction's format - The register form's "Form" field should be
// the opposite of the memory form's "Form" field.
!areOppositeForms(RegRec->getValueAsBitsInit("FormBits"),
@@ -421,6 +432,7 @@ private:
(MemFormNum == X86Local::MRM6m && RegFormNum == X86Local::MRM6r) ||
(MemFormNum == X86Local::MRM7m && RegFormNum == X86Local::MRM7r) ||
(MemFormNum == X86Local::MRMXm && RegFormNum == X86Local::MRMXr) ||
+ (MemFormNum == X86Local::MRMXmCC && RegFormNum == X86Local::MRMXrCC) ||
(MemFormNum == X86Local::MRMDestMem &&
RegFormNum == X86Local::MRMDestReg) ||
(MemFormNum == X86Local::MRMSrcMem &&
@@ -428,7 +440,9 @@ private:
(MemFormNum == X86Local::MRMSrcMem4VOp3 &&
RegFormNum == X86Local::MRMSrcReg4VOp3) ||
(MemFormNum == X86Local::MRMSrcMemOp4 &&
- RegFormNum == X86Local::MRMSrcRegOp4))
+ RegFormNum == X86Local::MRMSrcRegOp4) ||
+ (MemFormNum == X86Local::MRMSrcMemCC &&
+ RegFormNum == X86Local::MRMSrcRegCC))
return true;
return false;
@@ -558,7 +572,7 @@ void X86FoldTablesEmitter::updateTables(const CodeGenInstruction *RegInstr,
return;
}
-void X86FoldTablesEmitter::run(raw_ostream &OS) {
+void X86FoldTablesEmitter::run(formatted_raw_ostream &OS) {
emitSourceFileHeader("X86 fold tables", OS);
// Holds all memory instructions
@@ -639,7 +653,7 @@ void X86FoldTablesEmitter::run(raw_ostream &OS) {
&(Target.getInstruction(MemInstIter)), Entry.Strategy);
}
- // Print all tables to raw_ostream OS.
+ // Print all tables.
printTable(Table2Addr, "Table2Addr", OS);
printTable(Table0, "Table0", OS);
printTable(Table1, "Table1", OS);
@@ -650,7 +664,8 @@ void X86FoldTablesEmitter::run(raw_ostream &OS) {
namespace llvm {
-void EmitX86FoldTables(RecordKeeper &RK, raw_ostream &OS) {
+void EmitX86FoldTables(RecordKeeper &RK, raw_ostream &o) {
+ formatted_raw_ostream OS(o);
X86FoldTablesEmitter(RK).run(OS);
}
} // namespace llvm
diff --git a/utils/TableGen/X86ModRMFilters.cpp b/utils/TableGen/X86ModRMFilters.cpp
index 1641613aa32d..98e6fb6104d7 100644
--- a/utils/TableGen/X86ModRMFilters.cpp
+++ b/utils/TableGen/X86ModRMFilters.cpp
@@ -1,9 +1,8 @@
//===- X86ModRMFilters.cpp - Disassembler ModR/M filterss -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/utils/TableGen/X86ModRMFilters.h b/utils/TableGen/X86ModRMFilters.h
index b0248e878d07..c77b4c21aec4 100644
--- a/utils/TableGen/X86ModRMFilters.h
+++ b/utils/TableGen/X86ModRMFilters.h
@@ -1,9 +1,8 @@
//===- X86ModRMFilters.h - Disassembler ModR/M filterss ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/utils/TableGen/X86RecognizableInstr.cpp b/utils/TableGen/X86RecognizableInstr.cpp
index 2f9b428b8cfe..ab8a8855c478 100644
--- a/utils/TableGen/X86RecognizableInstr.cpp
+++ b/utils/TableGen/X86RecognizableInstr.cpp
@@ -1,9 +1,8 @@
//===- X86RecognizableInstr.cpp - Disassembler instruction spec --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -84,7 +83,8 @@ RecognizableInstr::RecognizableInstr(DisassemblerTables &tables,
AdSize = byteFromRec(Rec, "AdSizeBits");
HasREX_WPrefix = Rec->getValueAsBit("hasREX_WPrefix");
HasVEX_4V = Rec->getValueAsBit("hasVEX_4V");
- VEX_WPrefix = byteFromRec(Rec,"VEX_WPrefix");
+ HasVEX_W = Rec->getValueAsBit("HasVEX_W");
+ IgnoresVEX_W = Rec->getValueAsBit("IgnoresVEX_W");
IgnoresVEX_L = Rec->getValueAsBit("ignoresVEX_L");
HasEVEX_L2Prefix = Rec->getValueAsBit("hasEVEX_L2");
HasEVEX_K = Rec->getValueAsBit("hasEVEX_K");
@@ -110,7 +110,7 @@ RecognizableInstr::RecognizableInstr(DisassemblerTables &tables,
std::vector<Record*> Predicates = Rec->getValueAsListOfDefs("Predicates");
for (unsigned i = 0, e = Predicates.size(); i != e; ++i) {
if (Predicates[i]->getName().find("Not64Bit") != Name.npos ||
- Predicates[i]->getName().find("In32Bit") != Name.npos) {
+ Predicates[i]->getName().find("In32Bit") != Name.npos) {
Is32Bit = true;
break;
}
@@ -164,8 +164,7 @@ InstructionContext RecognizableInstr::insnContext() const {
llvm_unreachable("Don't support VEX.L if EVEX_L2 is enabled");
}
// VEX_L & VEX_W
- if (!EncodeRC && HasVEX_LPrefix && (VEX_WPrefix == X86Local::VEX_W1 ||
- VEX_WPrefix == X86Local::VEX_W1X)) {
+ if (!EncodeRC && HasVEX_LPrefix && HasVEX_W) {
if (OpPrefix == X86Local::PD)
insnContext = EVEX_KB(IC_EVEX_L_W_OPSIZE);
else if (OpPrefix == X86Local::XS)
@@ -192,9 +191,7 @@ InstructionContext RecognizableInstr::insnContext() const {
errs() << "Instruction does not use a prefix: " << Name << "\n";
llvm_unreachable("Invalid prefix");
}
- } else if (!EncodeRC && HasEVEX_L2Prefix &&
- (VEX_WPrefix == X86Local::VEX_W1 ||
- VEX_WPrefix == X86Local::VEX_W1X)) {
+ } else if (!EncodeRC && HasEVEX_L2Prefix && HasVEX_W) {
// EVEX_L2 & VEX_W
if (OpPrefix == X86Local::PD)
insnContext = EVEX_KB(IC_EVEX_L2_W_OPSIZE);
@@ -223,8 +220,7 @@ InstructionContext RecognizableInstr::insnContext() const {
llvm_unreachable("Invalid prefix");
}
}
- else if (VEX_WPrefix == X86Local::VEX_W1 ||
- VEX_WPrefix == X86Local::VEX_W1X) {
+ else if (HasVEX_W) {
// VEX_W
if (OpPrefix == X86Local::PD)
insnContext = EVEX_KB(IC_EVEX_W_OPSIZE);
@@ -254,8 +250,7 @@ InstructionContext RecognizableInstr::insnContext() const {
}
/// eof EVEX
} else if (Encoding == X86Local::VEX || Encoding == X86Local::XOP) {
- if (HasVEX_LPrefix && (VEX_WPrefix == X86Local::VEX_W1 ||
- VEX_WPrefix == X86Local::VEX_W1X)) {
+ if (HasVEX_LPrefix && HasVEX_W) {
if (OpPrefix == X86Local::PD)
insnContext = IC_VEX_L_W_OPSIZE;
else if (OpPrefix == X86Local::XS)
@@ -270,8 +265,7 @@ InstructionContext RecognizableInstr::insnContext() const {
}
} else if (OpPrefix == X86Local::PD && HasVEX_LPrefix)
insnContext = IC_VEX_L_OPSIZE;
- else if (OpPrefix == X86Local::PD && (VEX_WPrefix == X86Local::VEX_W1 ||
- VEX_WPrefix == X86Local::VEX_W1X))
+ else if (OpPrefix == X86Local::PD && HasVEX_W)
insnContext = IC_VEX_W_OPSIZE;
else if (OpPrefix == X86Local::PD)
insnContext = IC_VEX_OPSIZE;
@@ -279,14 +273,11 @@ InstructionContext RecognizableInstr::insnContext() const {
insnContext = IC_VEX_L_XS;
else if (HasVEX_LPrefix && OpPrefix == X86Local::XD)
insnContext = IC_VEX_L_XD;
- else if ((VEX_WPrefix == X86Local::VEX_W1 ||
- VEX_WPrefix == X86Local::VEX_W1X) && OpPrefix == X86Local::XS)
+ else if (HasVEX_W && OpPrefix == X86Local::XS)
insnContext = IC_VEX_W_XS;
- else if ((VEX_WPrefix == X86Local::VEX_W1 ||
- VEX_WPrefix == X86Local::VEX_W1X) && OpPrefix == X86Local::XD)
+ else if (HasVEX_W && OpPrefix == X86Local::XD)
insnContext = IC_VEX_W_XD;
- else if ((VEX_WPrefix == X86Local::VEX_W1 ||
- VEX_WPrefix == X86Local::VEX_W1X) && OpPrefix == X86Local::PS)
+ else if (HasVEX_W && OpPrefix == X86Local::PS)
insnContext = IC_VEX_W;
else if (HasVEX_LPrefix && OpPrefix == X86Local::PS)
insnContext = IC_VEX_L;
@@ -496,6 +487,13 @@ void RecognizableInstr::emitInstructionSpecifier() {
HANDLE_OPERAND(opcodeModifier)
HANDLE_OPTIONAL(relocation)
break;
+ case X86Local::AddCCFrm:
+ // Operand 1 (optional) is an address or immediate.
+ assert(numPhysicalOperands == 2 &&
+ "Unexpected number of operands for AddCCFrm");
+ HANDLE_OPERAND(relocation)
+ HANDLE_OPERAND(opcodeModifier)
+ break;
case X86Local::MRMDestReg:
// Operand 1 is a register operand in the R/M field.
// - In AVX512 there may be a mask operand here -
@@ -581,6 +579,13 @@ void RecognizableInstr::emitInstructionSpecifier() {
HANDLE_OPERAND(rmRegister)
HANDLE_OPTIONAL(immediate)
break;
+ case X86Local::MRMSrcRegCC:
+ assert(numPhysicalOperands == 3 &&
+ "Unexpected number of operands for MRMSrcRegCC");
+ HANDLE_OPERAND(roRegister)
+ HANDLE_OPERAND(rmRegister)
+ HANDLE_OPERAND(opcodeModifier)
+ break;
case X86Local::MRMSrcMem:
// Operand 1 is a register operand in the Reg/Opcode field.
// Operand 2 is a memory operand (possibly SIB-extended)
@@ -621,6 +626,19 @@ void RecognizableInstr::emitInstructionSpecifier() {
HANDLE_OPERAND(memory)
HANDLE_OPTIONAL(immediate)
break;
+ case X86Local::MRMSrcMemCC:
+ assert(numPhysicalOperands == 3 &&
+ "Unexpected number of operands for MRMSrcMemCC");
+ HANDLE_OPERAND(roRegister)
+ HANDLE_OPERAND(memory)
+ HANDLE_OPERAND(opcodeModifier)
+ break;
+ case X86Local::MRMXrCC:
+ assert(numPhysicalOperands == 2 &&
+ "Unexpected number of operands for MRMXrCC");
+ HANDLE_OPERAND(rmRegister)
+ HANDLE_OPERAND(opcodeModifier)
+ break;
case X86Local::MRMXr:
case X86Local::MRM0r:
case X86Local::MRM1r:
@@ -646,6 +664,12 @@ void RecognizableInstr::emitInstructionSpecifier() {
HANDLE_OPTIONAL(relocation)
HANDLE_OPTIONAL(immediate)
break;
+ case X86Local::MRMXmCC:
+ assert(numPhysicalOperands == 2 &&
+ "Unexpected number of operands for MRMXm");
+ HANDLE_OPERAND(memory)
+ HANDLE_OPERAND(opcodeModifier)
+ break;
case X86Local::MRMXm:
case X86Local::MRM0m:
case X86Local::MRM1m:
@@ -724,12 +748,15 @@ void RecognizableInstr::emitDecodePath(DisassemblerTables &tables) const {
case X86Local::RawFrmDstSrc:
case X86Local::RawFrmImm8:
case X86Local::RawFrmImm16:
+ case X86Local::AddCCFrm:
filter = llvm::make_unique<DumbFilter>();
break;
case X86Local::MRMDestReg:
case X86Local::MRMSrcReg:
case X86Local::MRMSrcReg4VOp3:
case X86Local::MRMSrcRegOp4:
+ case X86Local::MRMSrcRegCC:
+ case X86Local::MRMXrCC:
case X86Local::MRMXr:
filter = llvm::make_unique<ModFilter>(true);
break;
@@ -737,6 +764,8 @@ void RecognizableInstr::emitDecodePath(DisassemblerTables &tables) const {
case X86Local::MRMSrcMem:
case X86Local::MRMSrcMem4VOp3:
case X86Local::MRMSrcMemOp4:
+ case X86Local::MRMSrcMemCC:
+ case X86Local::MRMXmCC:
case X86Local::MRMXm:
filter = llvm::make_unique<ModFilter>(false);
break;
@@ -769,23 +798,24 @@ void RecognizableInstr::emitDecodePath(DisassemblerTables &tables) const {
assert(opcodeType && "Opcode type not set");
assert(filter && "Filter not set");
- if (Form == X86Local::AddRegFrm) {
- assert(((opcodeToSet & 7) == 0) &&
- "ADDREG_FRM opcode not aligned");
+ if (Form == X86Local::AddRegFrm || Form == X86Local::MRMSrcRegCC ||
+ Form == X86Local::MRMSrcMemCC || Form == X86Local::MRMXrCC ||
+ Form == X86Local::MRMXmCC || Form == X86Local::AddCCFrm) {
+ unsigned Count = Form == X86Local::AddRegFrm ? 8 : 16;
+ assert(((opcodeToSet % Count) == 0) && "ADDREG_FRM opcode not aligned");
uint8_t currentOpcode;
- for (currentOpcode = opcodeToSet;
- currentOpcode < opcodeToSet + 8;
+ for (currentOpcode = opcodeToSet; currentOpcode < opcodeToSet + Count;
++currentOpcode)
tables.setTableFields(*opcodeType, insnContext(), currentOpcode, *filter,
UID, Is32Bit, OpPrefix == 0,
IgnoresVEX_L || EncodeRC,
- VEX_WPrefix == X86Local::VEX_WIG, AddressSize);
+ IgnoresVEX_W, AddressSize);
} else {
tables.setTableFields(*opcodeType, insnContext(), opcodeToSet, *filter, UID,
Is32Bit, OpPrefix == 0, IgnoresVEX_L || EncodeRC,
- VEX_WPrefix == X86Local::VEX_WIG, AddressSize);
+ IgnoresVEX_W, AddressSize);
}
#undef MAP
@@ -825,7 +855,9 @@ OperandType RecognizableInstr::typeFromString(const std::string &s,
TYPE("i8mem", TYPE_M)
TYPE("i8imm", TYPE_IMM)
TYPE("u8imm", TYPE_UIMM8)
+ TYPE("i16u8imm", TYPE_UIMM8)
TYPE("i32u8imm", TYPE_UIMM8)
+ TYPE("i64u8imm", TYPE_UIMM8)
TYPE("GR8", TYPE_R8)
TYPE("VR128", TYPE_XMM)
TYPE("VR128X", TYPE_XMM)
@@ -842,16 +874,14 @@ OperandType RecognizableInstr::typeFromString(const std::string &s,
TYPE("f32mem", TYPE_M)
TYPE("ssmem", TYPE_M)
TYPE("RST", TYPE_ST)
+ TYPE("RSTi", TYPE_ST)
TYPE("i128mem", TYPE_M)
TYPE("i256mem", TYPE_M)
TYPE("i512mem", TYPE_M)
TYPE("i64i32imm_pcrel", TYPE_REL)
TYPE("i16imm_pcrel", TYPE_REL)
TYPE("i32imm_pcrel", TYPE_REL)
- TYPE("SSECC", TYPE_IMM3)
- TYPE("XOPCC", TYPE_IMM3)
- TYPE("AVXCC", TYPE_IMM5)
- TYPE("AVX512ICC", TYPE_AVX512ICC)
+ TYPE("ccode", TYPE_IMM)
TYPE("AVX512RC", TYPE_IMM)
TYPE("brtarget32", TYPE_REL)
TYPE("brtarget16", TYPE_REL)
@@ -902,6 +932,11 @@ OperandType RecognizableInstr::typeFromString(const std::string &s,
TYPE("VK32WM", TYPE_VK)
TYPE("VK64", TYPE_VK)
TYPE("VK64WM", TYPE_VK)
+ TYPE("VK1Pair", TYPE_VK_PAIR)
+ TYPE("VK2Pair", TYPE_VK_PAIR)
+ TYPE("VK4Pair", TYPE_VK_PAIR)
+ TYPE("VK8Pair", TYPE_VK_PAIR)
+ TYPE("VK16Pair", TYPE_VK_PAIR)
TYPE("vx64mem", TYPE_MVSIBX)
TYPE("vx128mem", TYPE_MVSIBX)
TYPE("vx256mem", TYPE_MVSIBX)
@@ -931,10 +966,6 @@ RecognizableInstr::immediateEncodingFromString(const std::string &s,
ENCODING("i16imm", ENCODING_IW)
}
ENCODING("i32i8imm", ENCODING_IB)
- ENCODING("SSECC", ENCODING_IB)
- ENCODING("XOPCC", ENCODING_IB)
- ENCODING("AVXCC", ENCODING_IB)
- ENCODING("AVX512ICC", ENCODING_IB)
ENCODING("AVX512RC", ENCODING_IRC)
ENCODING("i16imm", ENCODING_Iv)
ENCODING("i16i8imm", ENCODING_IB)
@@ -943,7 +974,9 @@ RecognizableInstr::immediateEncodingFromString(const std::string &s,
ENCODING("i64i8imm", ENCODING_IB)
ENCODING("i8imm", ENCODING_IB)
ENCODING("u8imm", ENCODING_IB)
+ ENCODING("i16u8imm", ENCODING_IB)
ENCODING("i32u8imm", ENCODING_IB)
+ ENCODING("i64u8imm", ENCODING_IB)
// This is not a typo. Instructions like BLENDVPD put
// register IDs in 8-bit immediates nowadays.
ENCODING("FR32", ENCODING_IB)
@@ -964,6 +997,7 @@ OperandEncoding
RecognizableInstr::rmRegisterEncodingFromString(const std::string &s,
uint8_t OpSize) {
ENCODING("RST", ENCODING_FP)
+ ENCODING("RSTi", ENCODING_FP)
ENCODING("GR16", ENCODING_RM)
ENCODING("GR32", ENCODING_RM)
ENCODING("GR32orGR64", ENCODING_RM)
@@ -987,6 +1021,11 @@ RecognizableInstr::rmRegisterEncodingFromString(const std::string &s,
ENCODING("VK16", ENCODING_RM)
ENCODING("VK32", ENCODING_RM)
ENCODING("VK64", ENCODING_RM)
+ ENCODING("VK1PAIR", ENCODING_RM)
+ ENCODING("VK2PAIR", ENCODING_RM)
+ ENCODING("VK4PAIR", ENCODING_RM)
+ ENCODING("VK8PAIR", ENCODING_RM)
+ ENCODING("VK16PAIR", ENCODING_RM)
ENCODING("BNDR", ENCODING_RM)
errs() << "Unhandled R/M register encoding " << s << "\n";
llvm_unreachable("Unhandled R/M register encoding");
@@ -1021,6 +1060,11 @@ RecognizableInstr::roRegisterEncodingFromString(const std::string &s,
ENCODING("VK16", ENCODING_REG)
ENCODING("VK32", ENCODING_REG)
ENCODING("VK64", ENCODING_REG)
+ ENCODING("VK1Pair", ENCODING_REG)
+ ENCODING("VK2Pair", ENCODING_REG)
+ ENCODING("VK4Pair", ENCODING_REG)
+ ENCODING("VK8Pair", ENCODING_REG)
+ ENCODING("VK16Pair", ENCODING_REG)
ENCODING("VK1WM", ENCODING_REG)
ENCODING("VK2WM", ENCODING_REG)
ENCODING("VK4WM", ENCODING_REG)
@@ -1055,6 +1099,11 @@ RecognizableInstr::vvvvRegisterEncodingFromString(const std::string &s,
ENCODING("VK16", ENCODING_VVVV)
ENCODING("VK32", ENCODING_VVVV)
ENCODING("VK64", ENCODING_VVVV)
+ ENCODING("VK1PAIR", ENCODING_VVVV)
+ ENCODING("VK2PAIR", ENCODING_VVVV)
+ ENCODING("VK4PAIR", ENCODING_VVVV)
+ ENCODING("VK8PAIR", ENCODING_VVVV)
+ ENCODING("VK16PAIR", ENCODING_VVVV)
errs() << "Unhandled VEX.vvvv register encoding " << s << "\n";
llvm_unreachable("Unhandled VEX.vvvv register encoding");
}
@@ -1128,7 +1177,9 @@ RecognizableInstr::relocationEncodingFromString(const std::string &s,
ENCODING("i64i8imm", ENCODING_IB)
ENCODING("i8imm", ENCODING_IB)
ENCODING("u8imm", ENCODING_IB)
+ ENCODING("i16u8imm", ENCODING_IB)
ENCODING("i32u8imm", ENCODING_IB)
+ ENCODING("i64u8imm", ENCODING_IB)
ENCODING("i64i32imm_pcrel", ENCODING_ID)
ENCODING("i16imm_pcrel", ENCODING_IW)
ENCODING("i32imm_pcrel", ENCODING_ID)
@@ -1166,6 +1217,7 @@ RecognizableInstr::opcodeModifierEncodingFromString(const std::string &s,
ENCODING("GR64", ENCODING_RO)
ENCODING("GR16", ENCODING_Rv)
ENCODING("GR8", ENCODING_RB)
+ ENCODING("ccode", ENCODING_CC)
errs() << "Unhandled opcode modifier encoding " << s << "\n";
llvm_unreachable("Unhandled opcode modifier encoding");
}
diff --git a/utils/TableGen/X86RecognizableInstr.h b/utils/TableGen/X86RecognizableInstr.h
index c4d34ee6c80c..b15bef4e1931 100644
--- a/utils/TableGen/X86RecognizableInstr.h
+++ b/utils/TableGen/X86RecognizableInstr.h
@@ -1,9 +1,8 @@
//===- X86RecognizableInstr.h - Disassembler instruction spec ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -102,18 +101,21 @@ namespace X86Local {
RawFrmDstSrc = 6,
RawFrmImm8 = 7,
RawFrmImm16 = 8,
+ AddCCFrm = 9,
MRMDestMem = 32,
MRMSrcMem = 33,
MRMSrcMem4VOp3 = 34,
MRMSrcMemOp4 = 35,
- MRMXm = 39,
+ MRMSrcMemCC = 36,
+ MRMXmCC = 38, MRMXm = 39,
MRM0m = 40, MRM1m = 41, MRM2m = 42, MRM3m = 43,
MRM4m = 44, MRM5m = 45, MRM6m = 46, MRM7m = 47,
MRMDestReg = 48,
MRMSrcReg = 49,
MRMSrcReg4VOp3 = 50,
MRMSrcRegOp4 = 51,
- MRMXr = 55,
+ MRMSrcRegCC = 52,
+ MRMXrCC = 54, MRMXr = 55,
MRM0r = 56, MRM1r = 57, MRM2r = 58, MRM3r = 59,
MRM4r = 60, MRM5r = 61, MRM6r = 62, MRM7r = 63,
#define MAP(from, to) MRM_##from = to,
@@ -140,10 +142,6 @@ namespace X86Local {
enum {
AdSize16 = 1, AdSize32 = 2, AdSize64 = 3
};
-
- enum {
- VEX_W0 = 0, VEX_W1 = 1, VEX_WIG = 2, VEX_W1X = 3
- };
}
namespace X86Disassembler {
@@ -177,8 +175,10 @@ private:
bool HasREX_WPrefix;
/// The hasVEX_4V field from the record
bool HasVEX_4V;
- /// The VEX_WPrefix field from the record
- uint8_t VEX_WPrefix;
+ /// The HasVEX_WPrefix field from the record
+ bool HasVEX_W;
+ /// The IgnoresVEX_W field from the record
+ bool IgnoresVEX_W;
/// Inferred from the operands; indicates whether the L bit in the VEX prefix is set
bool HasVEX_LPrefix;
/// The ignoreVEX_L field from the record